From e6440b93e28c77a2ae6de2980e3bbb5166133e0a Mon Sep 17 00:00:00 2001 From: nimrodkor Date: Thu, 3 Jul 2025 10:48:16 +0300 Subject: [PATCH] Extract comments to separate file --- .../aws-sdk-js-accurate-type-signatures.json | 46 ++ .../aws-sdk-js-accurate-type-signatures.md | 48 -- ...mplete-configuration-type-definitions.json | 46 ++ ...complete-configuration-type-definitions.md | 48 -- ...sdk-js-content-integrity-verification.json | 160 ++++++ ...s-sdk-js-content-integrity-verification.md | 162 ------ .../aws-sdk-js-defensive-null-checking.json | 180 +++++++ .../aws-sdk-js-defensive-null-checking.md | 182 ------- .../aws-sdk-js-document-apis-completely.json | 68 +++ .../aws-sdk-js-document-apis-completely.md | 70 --- .../aws-sdk-js-document-apis-thoroughly.json | 134 +++++ .../aws-sdk-js-document-apis-thoroughly.md | 136 ----- ...-js-document-sdk-version-requirements.json | 46 ++ ...dk-js-document-sdk-version-requirements.md | 48 -- .../aws-sdk-js-early-return-after-errors.json | 194 +++++++ .../aws-sdk-js-early-return-after-errors.md | 196 ------- ...dk-js-example-documentation-standards.json | 46 ++ ...-sdk-js-example-documentation-standards.md | 48 -- ...k-js-explicit-verified-configurations.json | 92 ++++ ...sdk-js-explicit-verified-configurations.md | 94 ---- ...s-follow-established-testing-patterns.json | 68 +++ ...-js-follow-established-testing-patterns.md | 70 --- ...-sdk-js-isolate-sensitive-buffer-data.json | 36 ++ ...ws-sdk-js-isolate-sensitive-buffer-data.md | 38 -- _reviewers/aws-sdk-js-limit-cache-size.json | 80 +++ _reviewers/aws-sdk-js-limit-cache-size.md | 82 --- ...sdk-js-maintain-consistent-formatting.json | 90 ++++ ...s-sdk-js-maintain-consistent-formatting.md | 92 ---- ...aws-sdk-js-organize-type-declarations.json | 82 +++ .../aws-sdk-js-organize-type-declarations.md | 84 --- ...ws-sdk-js-semantic-naming-conventions.json | 194 +++++++ .../aws-sdk-js-semantic-naming-conventions.md | 196 ------- ...aws-sdk-js-semantic-type-organization.json | 46 ++ .../aws-sdk-js-semantic-type-organization.md | 48 -- ...k-js-standardize-api-promise-patterns.json | 106 ++++ ...sdk-js-standardize-api-promise-patterns.md | 108 ---- ...s-structured-test-resource-management.json | 172 +++++++ ...-js-structured-test-resource-management.md | 174 ------- ...-sdk-js-test-configuration-precedence.json | 92 ++++ ...ws-sdk-js-test-configuration-precedence.md | 94 ---- ...-validate-configurations-with-clarity.json | 68 +++ ...js-validate-configurations-with-clarity.md | 70 --- .../azure-sdk-for-net-address-not-mask.json | 58 +++ .../azure-sdk-for-net-address-not-mask.md | 60 --- ...approve-ai-dependencies-conditionally.json | 94 ++++ ...t-approve-ai-dependencies-conditionally.md | 96 ---- ...et-centralize-pipeline-configurations.json | 128 +++++ ...-net-centralize-pipeline-configurations.md | 130 ----- ...dk-for-net-check-before-dereferencing.json | 156 ++++++ ...-sdk-for-net-check-before-dereferencing.md | 158 ------ ...r-net-complete-authentication-testing.json | 24 + ...for-net-complete-authentication-testing.md | 26 - ...-net-complete-pipeline-configurations.json | 94 ++++ ...or-net-complete-pipeline-configurations.md | 96 ---- ...et-descriptive-consistent-identifiers.json | 68 +++ ...-net-descriptive-consistent-identifiers.md | 70 --- .../azure-sdk-for-net-design-stable-apis.json | 324 ++++++++++++ .../azure-sdk-for-net-design-stable-apis.md | 326 ------------ ...or-net-document-ai-changes-thoroughly.json | 82 +++ ...-for-net-document-ai-changes-thoroughly.md | 84 --- ...e-sdk-for-net-document-code-reasoning.json | 146 ++++++ ...ure-sdk-for-net-document-code-reasoning.md | 148 ------ ...sdk-for-net-document-non-obvious-code.json | 46 ++ ...e-sdk-for-net-document-non-obvious-code.md | 48 -- ...e-sdk-for-net-eliminate-repeated-code.json | 46 ++ ...ure-sdk-for-net-eliminate-repeated-code.md | 48 -- ...-net-externalize-configuration-values.json | 102 ++++ ...or-net-externalize-configuration-values.md | 104 ---- ...net-externalize-sensitive-credentials.json | 24 + ...r-net-externalize-sensitive-credentials.md | 26 - ...or-net-handle-external-process-errors.json | 118 +++++ ...-for-net-handle-external-process-errors.md | 120 ----- ...for-net-maintain-clean-code-structure.json | 112 ++++ ...k-for-net-maintain-clean-code-structure.md | 114 ----- ...sdk-for-net-match-ci-commands-locally.json | 70 +++ ...e-sdk-for-net-match-ci-commands-locally.md | 72 --- ...k-for-net-minimize-memory-allocations.json | 194 +++++++ ...sdk-for-net-minimize-memory-allocations.md | 196 ------- ...-prefer-identity-based-authentication.json | 68 +++ ...et-prefer-identity-based-authentication.md | 70 --- ...-net-preserve-protocol-data-integrity.json | 58 +++ ...or-net-preserve-protocol-data-integrity.md | 60 --- ...-for-net-redact-sensitive-information.json | 24 + ...dk-for-net-redact-sensitive-information.md | 26 - ...r-net-specific-exceptions-for-clarity.json | 148 ++++++ ...for-net-specific-exceptions-for-clarity.md | 150 ------ ...net-standardize-environment-variables.json | 102 ++++ ...r-net-standardize-environment-variables.md | 104 ---- ...-for-net-surface-errors-appropriately.json | 164 ++++++ ...dk-for-net-surface-errors-appropriately.md | 166 ------ ...or-net-use-domain-specific-type-names.json | 134 +++++ ...-for-net-use-domain-specific-type-names.md | 136 ----- ...dk-for-net-use-higher-level-telemetry.json | 70 +++ ...-sdk-for-net-use-higher-level-telemetry.md | 72 --- ...et-user-friendly-configuration-values.json | 150 ++++++ ...-net-user-friendly-configuration-values.md | 152 ------ ...sdk-for-net-write-deterministic-tests.json | 102 ++++ ...e-sdk-for-net-write-deterministic-tests.md | 104 ---- .../chef-choose-semantic-algorithms.json | 206 ++++++++ _reviewers/chef-choose-semantic-algorithms.md | 208 -------- .../chef-clear-abstraction-boundaries.json | 128 +++++ .../chef-clear-abstraction-boundaries.md | 130 ----- ...onsistent-descriptive-naming-patterns.json | 216 ++++++++ ...-consistent-descriptive-naming-patterns.md | 218 -------- ...hef-document-configuration-completely.json | 102 ++++ .../chef-document-configuration-completely.md | 104 ---- ...chef-document-configuration-decisions.json | 114 +++++ .../chef-document-configuration-decisions.md | 116 ----- _reviewers/chef-document-with-examples.json | 264 ++++++++++ _reviewers/chef-document-with-examples.md | 266 ---------- ...-explicit-configuration-over-implicit.json | 244 +++++++++ ...ef-explicit-configuration-over-implicit.md | 246 --------- ...chef-externalize-configuration-values.json | 158 ++++++ .../chef-externalize-configuration-values.md | 160 ------ .../chef-extract-and-organize-methods.json | 286 +++++++++++ .../chef-extract-and-organize-methods.md | 288 ----------- _reviewers/chef-fail-fast-principle.json | 116 +++++ _reviewers/chef-fail-fast-principle.md | 118 ----- _reviewers/chef-fail-gracefully-always.json | 392 ++++++++++++++ _reviewers/chef-fail-gracefully-always.md | 394 --------------- .../chef-keep-actions-versions-current.json | 158 ++++++ .../chef-keep-actions-versions-current.md | 160 ------ _reviewers/chef-limit-token-permissions.json | 266 ++++++++++ _reviewers/chef-limit-token-permissions.md | 268 ---------- _reviewers/chef-log-message-quality.json | 46 ++ _reviewers/chef-log-message-quality.md | 48 -- .../chef-memoize-expensive-operations.json | 254 ++++++++++ .../chef-memoize-expensive-operations.md | 256 ---------- _reviewers/chef-never-commit-secrets.json | 134 +++++ _reviewers/chef-never-commit-secrets.md | 136 ----- _reviewers/chef-prefer-guard-clauses.json | 172 +++++++ _reviewers/chef-prefer-guard-clauses.md | 174 ------- _reviewers/chef-remove-commented-code.json | 80 +++ _reviewers/chef-remove-commented-code.md | 82 --- .../chef-secure-credential-management.json | 256 ++++++++++ .../chef-secure-credential-management.md | 258 ---------- .../chef-secure-network-operations.json | 162 ++++++ _reviewers/chef-secure-network-operations.md | 164 ------ .../chef-standardize-bash-error-handling.json | 46 ++ .../chef-standardize-bash-error-handling.md | 48 -- ...ndardize-installation-directory-paths.json | 186 +++++++ ...tandardize-installation-directory-paths.md | 188 ------- _reviewers/chef-standardize-shell-flags.json | 68 +++ _reviewers/chef-standardize-shell-flags.md | 70 --- _reviewers/chef-structure-cicd-scripts.json | 46 ++ _reviewers/chef-structure-cicd-scripts.md | 48 -- ...update-security-critical-dependencies.json | 24 + ...f-update-security-critical-dependencies.md | 26 - _reviewers/chef-use-strict-test-doubles.json | 102 ++++ _reviewers/chef-use-strict-test-doubles.md | 104 ---- .../chef-verify-automated-documentation.json | 58 +++ .../chef-verify-automated-documentation.md | 60 --- .../elasticsearch-avoid-flaky-tests.json | 70 +++ _reviewers/elasticsearch-avoid-flaky-tests.md | 72 --- ...lasticsearch-clarity-over-uncertainty.json | 174 +++++++ .../elasticsearch-clarity-over-uncertainty.md | 176 ------- ...-complete-api-parameter-documentation.json | 46 ++ ...ch-complete-api-parameter-documentation.md | 48 -- ...icsearch-configure-type-serialization.json | 174 +++++++ ...sticsearch-configure-type-serialization.md | 176 ------- ...elasticsearch-defensive-null-handling.json | 150 ++++++ .../elasticsearch-defensive-null-handling.md | 152 ------ .../elasticsearch-design-for-evolution.json | 80 +++ .../elasticsearch-design-for-evolution.md | 82 --- ...search-document-performance-tradeoffs.json | 106 ++++ ...icsearch-document-performance-tradeoffs.md | 108 ---- ...ment-security-requirements-explicitly.json | 58 +++ ...cument-security-requirements-explicitly.md | 60 --- ...elasticsearch-enforce-least-privilege.json | 72 +++ .../elasticsearch-enforce-least-privilege.md | 74 --- ...search-exceptions-for-critical-errors.json | 162 ++++++ ...icsearch-exceptions-for-critical-errors.md | 164 ------ .../elasticsearch-extract-for-clarity.json | 426 ++++++++++++++++ .../elasticsearch-extract-for-clarity.md | 428 ---------------- ...measure-before-optimizing-performance.json | 314 ++++++++++++ ...h-measure-before-optimizing-performance.md | 316 ------------ .../elasticsearch-name-reflects-meaning.json | 296 +++++++++++ .../elasticsearch-name-reflects-meaning.md | 298 ----------- ...icsearch-optimize-before-implementing.json | 382 ++++++++++++++ ...sticsearch-optimize-before-implementing.md | 384 -------------- ...icsearch-parallel-branch-traceability.json | 114 +++++ ...sticsearch-parallel-branch-traceability.md | 116 ----- ...search-prefer-callbacks-over-blocking.json | 170 +++++++ ...icsearch-prefer-callbacks-over-blocking.md | 172 ------- ...icsearch-prevent-redundant-operations.json | 316 ++++++++++++ ...sticsearch-prevent-redundant-operations.md | 318 ------------ .../elasticsearch-robust-test-assertions.json | 182 +++++++ .../elasticsearch-robust-test-assertions.md | 184 ------- ...rch-scope-and-document-configurations.json | 104 ++++ ...earch-scope-and-document-configurations.md | 106 ---- ...csearch-specify-explicit-rest-formats.json | 70 +++ ...ticsearch-specify-explicit-rest-formats.md | 72 --- ...-stage-intensive-operations-carefully.json | 92 ++++ ...ch-stage-intensive-operations-carefully.md | 94 ---- ...arch-use-configuration-access-methods.json | 170 +++++++ ...search-use-configuration-access-methods.md | 172 ------- ...ch-use-current-configuration-patterns.json | 82 +++ ...arch-use-current-configuration-patterns.md | 84 --- .../framework-cache-expensive-operations.json | 254 ++++++++++ .../framework-cache-expensive-operations.md | 256 ---------- ...mework-descriptive-configuration-keys.json | 186 +++++++ ...ramework-descriptive-configuration-keys.md | 188 ------- .../framework-design-flexible-apis.json | 196 +++++++ _reviewers/framework-design-flexible-apis.md | 198 -------- ...amework-disable-coverage-in-workflows.json | 90 ++++ ...framework-disable-coverage-in-workflows.md | 92 ---- ...ramework-escape-column-names-properly.json | 124 +++++ .../framework-escape-column-names-properly.md | 126 ----- .../framework-explicit-null-handling.json | 356 +++++++++++++ .../framework-explicit-null-handling.md | 358 ------------- ...mework-keep-ci-configurations-minimal.json | 310 ++++++++++++ ...ramework-keep-ci-configurations-minimal.md | 312 ------------ .../framework-manage-dependencies-wisely.json | 196 +++++++ .../framework-manage-dependencies-wisely.md | 198 -------- .../framework-mark-sensitive-parameters.json | 106 ++++ .../framework-mark-sensitive-parameters.md | 108 ---- ...ramework-name-indicates-clear-purpose.json | 218 ++++++++ .../framework-name-indicates-clear-purpose.md | 220 -------- ...amework-optimize-for-code-readability.json | 258 ++++++++++ ...framework-optimize-for-code-readability.md | 260 ---------- .../framework-optimize-loop-operations.json | 162 ++++++ .../framework-optimize-loop-operations.md | 164 ------ .../framework-optimize-migration-code.json | 102 ++++ .../framework-optimize-migration-code.md | 104 ---- ...precise-testing-dependency-versioning.json | 46 ++ ...k-precise-testing-dependency-versioning.md | 48 -- .../framework-precise-type-annotations.json | 226 +++++++++ .../framework-precise-type-annotations.md | 228 --------- ...amework-use-modern-phpunit-attributes.json | 240 +++++++++ ...framework-use-modern-phpunit-attributes.md | 242 --------- .../framework-use-semantic-exceptions.json | 310 ++++++++++++ .../framework-use-semantic-exceptions.md | 312 ------------ ...in-consider-operation-time-complexity.json | 196 +++++++ ...tlin-consider-operation-time-complexity.md | 198 -------- .../kotlin-copy-external-string-inputs.json | 70 +++ .../kotlin-copy-external-string-inputs.md | 72 --- ...dependency-verification-configuration.json | 70 +++ ...n-dependency-verification-configuration.md | 72 --- .../kotlin-design-extensible-stable-apis.json | 312 ++++++++++++ .../kotlin-design-extensible-stable-apis.md | 314 ------------ ...lin-document-security-implementations.json | 72 +++ ...otlin-document-security-implementations.md | 74 --- _reviewers/kotlin-effective-api-samples.json | 214 ++++++++ _reviewers/kotlin-effective-api-samples.md | 216 -------- .../kotlin-keep-code-clearly-organized.json | 168 ++++++ .../kotlin-keep-code-clearly-organized.md | 170 ------- .../kotlin-minimize-not-null-assertions.json | 70 +++ .../kotlin-minimize-not-null-assertions.md | 72 --- .../kotlin-minimize-unnecessary-work.json | 384 ++++++++++++++ .../kotlin-minimize-unnecessary-work.md | 386 -------------- .../kotlin-names-express-clear-intent.json | 400 +++++++++++++++ .../kotlin-names-express-clear-intent.md | 402 --------------- .../kotlin-prefer-descriptive-errors.json | 206 ++++++++ .../kotlin-prefer-descriptive-errors.md | 208 -------- ...tlin-standardize-build-configurations.json | 80 +++ ...kotlin-standardize-build-configurations.md | 82 --- _reviewers/kotlin-test-edge-cases.json | 264 ++++++++++ _reviewers/kotlin-test-edge-cases.md | 266 ---------- ...-use-configuration-property-providers.json | 204 ++++++++ ...in-use-configuration-property-providers.md | 206 -------- .../kotlin-use-defensive-null-checks.json | 174 +++++++ .../kotlin-use-defensive-null-checks.md | 176 ------- ...kotlin-use-modern-test-infrastructure.json | 58 +++ .../kotlin-use-modern-test-infrastructure.md | 60 --- .../kubeflow-api-structure-balance.json | 70 +++ _reviewers/kubeflow-api-structure-balance.md | 72 --- .../kubeflow-automate-style-enforcement.json | 58 +++ .../kubeflow-automate-style-enforcement.md | 60 --- ...ow-centralize-configuration-constants.json | 186 +++++++ ...flow-centralize-configuration-constants.md | 188 ------- ...eflow-centralize-configuration-values.json | 208 ++++++++ ...ubeflow-centralize-configuration-values.md | 210 -------- ...-centralize-dependency-configurations.json | 82 +++ ...ow-centralize-dependency-configurations.md | 84 --- _reviewers/kubeflow-check-before-use.json | 80 +++ _reviewers/kubeflow-check-before-use.md | 82 --- ...component-agnostic-styling-principles.json | 46 ++ ...w-component-agnostic-styling-principles.md | 48 -- ...w-configurable-security-with-defaults.json | 72 +++ ...low-configurable-security-with-defaults.md | 74 --- ...ubeflow-consistent-descriptive-naming.json | 46 ++ .../kubeflow-consistent-descriptive-naming.md | 48 -- ...flow-consistent-separator-conventions.json | 82 +++ ...beflow-consistent-separator-conventions.md | 84 --- ...low-contextualize-and-classify-errors.json | 102 ++++ ...eflow-contextualize-and-classify-errors.md | 104 ---- .../kubeflow-control-header-modification.json | 180 +++++++ .../kubeflow-control-header-modification.md | 182 ------- ...ubeflow-descriptive-consistent-naming.json | 70 +++ .../kubeflow-descriptive-consistent-naming.md | 72 --- .../kubeflow-document-code-thoroughly.json | 90 ++++ .../kubeflow-document-code-thoroughly.md | 92 ---- .../kubeflow-document-migration-paths.json | 70 +++ .../kubeflow-document-migration-paths.md | 72 --- ...eflow-document-networking-annotations.json | 94 ++++ ...ubeflow-document-networking-annotations.md | 96 ---- .../kubeflow-document-with-precision.json | 68 +++ .../kubeflow-document-with-precision.md | 70 --- .../kubeflow-enforce-https-protocol.json | 36 ++ _reviewers/kubeflow-enforce-https-protocol.md | 38 -- .../kubeflow-enforce-least-privilege.json | 130 +++++ .../kubeflow-enforce-least-privilege.md | 132 ----- ...nvironment-aware-configuration-design.json | 212 ++++++++ ...-environment-aware-configuration-design.md | 214 -------- ...eflow-environment-variable-management.json | 184 +++++++ ...ubeflow-environment-variable-management.md | 186 ------- ...-externalize-configuration-parameters.json | 150 ++++++ ...ow-externalize-configuration-parameters.md | 152 ------ .../kubeflow-follow-api-conventions.json | 258 ++++++++++ _reviewers/kubeflow-follow-api-conventions.md | 260 ---------- ...kubeflow-go-export-naming-conventions.json | 216 ++++++++ .../kubeflow-go-export-naming-conventions.md | 218 -------- .../kubeflow-harden-container-security.json | 36 ++ .../kubeflow-harden-container-security.md | 38 -- _reviewers/kubeflow-isolate-test-cases.json | 46 ++ _reviewers/kubeflow-isolate-test-cases.md | 48 -- ...eflow-load-configurations-efficiently.json | 174 +++++++ ...ubeflow-load-configurations-efficiently.md | 176 ------- ...kubeflow-manage-configuration-changes.json | 116 +++++ .../kubeflow-manage-configuration-changes.md | 118 ----- _reviewers/kubeflow-mark-ui-text-i18n.json | 104 ++++ _reviewers/kubeflow-mark-ui-text-i18n.md | 106 ---- .../kubeflow-match-algorithms-to-purpose.json | 46 ++ .../kubeflow-match-algorithms-to-purpose.md | 48 -- _reviewers/kubeflow-normalize-url-paths.json | 68 +++ _reviewers/kubeflow-normalize-url-paths.md | 70 --- ...timize-container-build-configurations.json | 118 +++++ ...optimize-container-build-configurations.md | 120 ----- .../kubeflow-pin-version-dependencies.json | 234 +++++++++ .../kubeflow-pin-version-dependencies.md | 236 --------- .../kubeflow-precise-workflow-triggers.json | 276 ++++++++++ .../kubeflow-precise-workflow-triggers.md | 278 ---------- ...ubeflow-prefer-external-configuration.json | 82 +++ .../kubeflow-prefer-external-configuration.md | 84 --- .../kubeflow-prevent-xss-vulnerabilities.json | 24 + .../kubeflow-prevent-xss-vulnerabilities.md | 26 - ...w-prioritize-readability-over-brevity.json | 162 ++++++ ...low-prioritize-readability-over-brevity.md | 164 ------ ...ow-private-variable-naming-convention.json | 58 +++ ...flow-private-variable-naming-convention.md | 60 --- ...eflow-private-vulnerability-reporting.json | 48 ++ ...ubeflow-private-vulnerability-reporting.md | 50 -- _reviewers/kubeflow-reduce-nesting-depth.json | 70 +++ _reviewers/kubeflow-reduce-nesting-depth.md | 72 --- _reviewers/kubeflow-safe-url-operations.json | 58 +++ _reviewers/kubeflow-safe-url-operations.md | 60 --- .../kubeflow-simplify-code-structure.json | 192 +++++++ .../kubeflow-simplify-code-structure.md | 194 ------- ...specific-network-access-documentation.json | 92 ++++ ...w-specific-network-access-documentation.md | 94 ---- ...eflow-stable-configuration-management.json | 70 +++ ...ubeflow-stable-configuration-management.md | 72 --- ...flow-standardize-build-configurations.json | 80 +++ ...beflow-standardize-build-configurations.md | 82 --- ...ubeflow-standardize-makefile-patterns.json | 172 +++++++ .../kubeflow-standardize-makefile-patterns.md | 174 ------- .../kubeflow-standardize-network-tools.json | 138 +++++ .../kubeflow-standardize-network-tools.md | 140 ----- .../kubeflow-standardize-style-scripts.json | 82 +++ .../kubeflow-standardize-style-scripts.md | 84 --- .../kubeflow-structure-for-navigation.json | 106 ++++ .../kubeflow-structure-for-navigation.md | 108 ---- ...tructured-documentation-with-examples.json | 292 +++++++++++ ...-structured-documentation-with-examples.md | 294 ----------- .../kubeflow-structured-owners-files.json | 128 +++++ .../kubeflow-structured-owners-files.md | 130 ----- ...eflow-type-appropriate-default-values.json | 46 ++ ...ubeflow-type-appropriate-default-values.md | 48 -- .../kubeflow-unique-workflow-step-names.json | 286 +++++++++++ .../kubeflow-unique-workflow-step-names.md | 288 ----------- .../kubeflow-use-appropriate-log-levels.json | 92 ++++ .../kubeflow-use-appropriate-log-levels.md | 94 ---- .../kubeflow-use-css-classes-properly.json | 46 ++ .../kubeflow-use-css-classes-properly.md | 48 -- _reviewers/kubeflow-use-enums-for-state.json | 94 ++++ _reviewers/kubeflow-use-enums-for-state.md | 96 ---- ...kubeflow-use-modern-javascript-idioms.json | 160 ++++++ .../kubeflow-use-modern-javascript-idioms.md | 162 ------ .../kubeflow-use-snake-case-in-python.json | 80 +++ .../kubeflow-use-snake-case-in-python.md | 82 --- .../kubeflow-use-table-driven-tests.json | 140 +++++ _reviewers/kubeflow-use-table-driven-tests.md | 142 ------ .../kubeflow-validate-inputs-explicitly.json | 130 +++++ .../kubeflow-validate-inputs-explicitly.md | 132 ----- .../kubeflow-validate-model-optimization.json | 190 +++++++ .../kubeflow-validate-model-optimization.md | 192 ------- .../langchainjs-ai-dependency-management.json | 104 ++++ .../langchainjs-ai-dependency-management.md | 106 ---- ...hainjs-avoid-hardcoded-configurations.json | 162 ++++++ ...gchainjs-avoid-hardcoded-configurations.md | 164 ------ .../langchainjs-chunked-data-processing.json | 104 ++++ .../langchainjs-chunked-data-processing.md | 106 ---- ...hainjs-comprehensive-ai-documentation.json | 102 ++++ ...gchainjs-comprehensive-ai-documentation.md | 104 ---- .../langchainjs-consistent-ai-naming.json | 68 +++ .../langchainjs-consistent-ai-naming.md | 70 --- ...chainjs-consistent-naming-conventions.json | 92 ++++ ...ngchainjs-consistent-naming-conventions.md | 94 ---- .../langchainjs-constructor-over-setter.json | 128 +++++ .../langchainjs-constructor-over-setter.md | 130 ----- ...s-dependency-classification-standards.json | 138 +++++ ...njs-dependency-classification-standards.md | 140 ----- ...injs-documentation-completeness-check.json | 80 +++ ...hainjs-documentation-completeness-check.md | 82 --- .../langchainjs-eliminate-redundant-code.json | 90 ++++ .../langchainjs-eliminate-redundant-code.md | 92 ---- ...hainjs-follow-documentation-standards.json | 148 ++++++ ...gchainjs-follow-documentation-standards.md | 150 ------ .../langchainjs-follow-standard-naming.json | 94 ++++ .../langchainjs-follow-standard-naming.md | 96 ---- ...njs-match-configuration-documentation.json | 58 +++ ...ainjs-match-configuration-documentation.md | 60 --- ...orm-appropriate-environment-variables.json | 58 +++ ...tform-appropriate-environment-variables.md | 60 --- ...langchainjs-prefer-nullish-coalescing.json | 194 +++++++ .../langchainjs-prefer-nullish-coalescing.md | 196 ------- ...s-preserve-api-backward-compatibility.json | 240 +++++++++ ...njs-preserve-api-backward-compatibility.md | 242 --------- ...angchainjs-simplify-code-organization.json | 82 +++ .../langchainjs-simplify-code-organization.md | 84 --- .../langchainjs-throw-meaningful-errors.json | 136 +++++ .../langchainjs-throw-meaningful-errors.md | 138 ----- ...ngchainjs-typescript-naming-standards.json | 270 ++++++++++ ...langchainjs-typescript-naming-standards.md | 272 ---------- ...js-update-documentation-configuration.json | 46 ++ ...injs-update-documentation-configuration.md | 48 -- .../langchainjs-use-comprehensive-jsdoc.json | 252 +++++++++ .../langchainjs-use-comprehensive-jsdoc.md | 254 ---------- ...langchainjs-use-database-native-types.json | 174 +++++++ .../langchainjs-use-database-native-types.md | 176 ------- .../langchainjs-validate-untrusted-input.json | 184 +++++++ .../langchainjs-validate-untrusted-input.md | 186 ------- _reviewers/mxnet-avoid-eval-function.json | 36 ++ _reviewers/mxnet-avoid-eval-function.md | 38 -- .../mxnet-avoid-redundant-calculations.json | 102 ++++ .../mxnet-avoid-redundant-calculations.md | 104 ---- ...t-centralize-configuration-parameters.json | 140 +++++ ...net-centralize-configuration-parameters.md | 142 ------ ...xnet-centralize-synchronization-logic.json | 92 ++++ .../mxnet-centralize-synchronization-logic.md | 94 ---- ...mxnet-comprehensive-api-documentation.json | 114 +++++ .../mxnet-comprehensive-api-documentation.md | 116 ----- .../mxnet-consistent-api-documentation.json | 46 ++ .../mxnet-consistent-api-documentation.md | 48 -- .../mxnet-consistent-logging-format.json | 104 ++++ _reviewers/mxnet-consistent-logging-format.md | 106 ---- .../mxnet-consistent-naming-patterns.json | 180 +++++++ .../mxnet-consistent-naming-patterns.md | 182 ------- _reviewers/mxnet-document-all-parameters.json | 124 +++++ _reviewers/mxnet-document-all-parameters.md | 126 ----- _reviewers/mxnet-document-api-completely.json | 68 +++ _reviewers/mxnet-document-api-completely.md | 70 --- .../mxnet-document-environment-variables.json | 80 +++ .../mxnet-document-environment-variables.md | 82 --- ...-documentation-clarity-and-formatting.json | 128 +++++ ...et-documentation-clarity-and-formatting.md | 130 ----- .../mxnet-eliminate-redundant-constructs.json | 290 +++++++++++ .../mxnet-eliminate-redundant-constructs.md | 292 ----------- ...mxnet-explain-optimization-mechanisms.json | 112 ++++ .../mxnet-explain-optimization-mechanisms.md | 114 ----- _reviewers/mxnet-explicit-null-checks.json | 138 +++++ _reviewers/mxnet-explicit-null-checks.md | 140 ----- .../mxnet-graph-traversal-optimization.json | 150 ++++++ .../mxnet-graph-traversal-optimization.md | 152 ------ ...t-hybridization-compatible-operations.json | 220 ++++++++ ...net-hybridization-compatible-operations.md | 222 -------- .../mxnet-optimize-iteration-patterns.json | 170 +++++++ .../mxnet-optimize-iteration-patterns.md | 172 ------- _reviewers/mxnet-pre-compute-reused-data.json | 46 ++ _reviewers/mxnet-pre-compute-reused-data.md | 48 -- .../mxnet-simplify-for-readability.json | 252 +++++++++ _reviewers/mxnet-simplify-for-readability.md | 254 ---------- .../mxnet-technical-precision-matters.json | 178 +++++++ .../mxnet-technical-precision-matters.md | 180 ------- .../mxnet-use-explicit-optional-types.json | 140 +++++ .../mxnet-use-explicit-optional-types.md | 142 ------ .../mxnet-use-intent-revealing-names.json | 160 ++++++ .../mxnet-use-intent-revealing-names.md | 162 ------ _reviewers/mxnet-use-named-constants.json | 58 +++ _reviewers/mxnet-use-named-constants.md | 60 --- .../mxnet-use-pytest-parameterization.json | 46 ++ .../mxnet-use-pytest-parameterization.md | 48 -- _reviewers/opencv-check-nulls-directly.json | 46 ++ _reviewers/opencv-check-nulls-directly.md | 48 -- _reviewers/opencv-cleanup-before-errors.json | 80 +++ _reviewers/opencv-cleanup-before-errors.md | 82 --- _reviewers/opencv-clear-api-contracts.json | 278 ++++++++++ _reviewers/opencv-clear-api-contracts.md | 280 ---------- _reviewers/opencv-code-for-readability.json | 360 +++++++++++++ _reviewers/opencv-code-for-readability.md | 362 ------------- .../opencv-consistent-descriptive-naming.json | 270 ++++++++++ .../opencv-consistent-descriptive-naming.md | 272 ---------- ...pencv-cross-platform-api-design-rules.json | 158 ++++++ .../opencv-cross-platform-api-design-rules.md | 160 ------ ...nt-configuration-version-requirements.json | 70 +++ ...ment-configuration-version-requirements.md | 72 --- ...ncv-document-properly-with-references.json | 114 +++++ ...pencv-document-properly-with-references.md | 116 ----- .../opencv-feature-flag-convention.json | 128 +++++ _reviewers/opencv-feature-flag-convention.md | 130 ----- ...v-framework-synchronization-practices.json | 366 ++++++++++++++ ...ncv-framework-synchronization-practices.md | 368 -------------- .../opencv-guard-optional-dependencies.json | 124 +++++ .../opencv-guard-optional-dependencies.md | 126 ----- .../opencv-maintain-build-compatibility.json | 82 +++ .../opencv-maintain-build-compatibility.md | 84 --- .../opencv-maintain-code-consistency.json | 112 ++++ .../opencv-maintain-code-consistency.md | 114 ----- .../opencv-meaningful-semantic-naming.json | 92 ++++ .../opencv-meaningful-semantic-naming.md | 94 ---- .../opencv-optimize-container-access.json | 124 +++++ .../opencv-optimize-container-access.md | 126 ----- .../opencv-optimize-for-common-cases.json | 114 +++++ .../opencv-optimize-for-common-cases.md | 116 ----- ...v-optimize-memory-allocation-patterns.json | 124 +++++ ...ncv-optimize-memory-allocation-patterns.md | 126 ----- .../opencv-prevent-null-vulnerabilities.json | 206 ++++++++ .../opencv-prevent-null-vulnerabilities.md | 208 -------- .../opencv-thread-safe-resource-cleanup.json | 70 +++ .../opencv-thread-safe-resource-cleanup.md | 72 --- .../opencv-use-opencv-error-mechanisms.json | 182 +++++++ .../opencv-use-opencv-error-mechanisms.md | 184 ------- .../opencv-use-optimized-functions.json | 276 ++++++++++ _reviewers/opencv-use-optimized-functions.md | 278 ---------- _reviewers/opencv-use-proper-assertions.json | 192 +++++++ _reviewers/opencv-use-proper-assertions.md | 194 ------- .../opencv-validate-tensor-dimensions.json | 140 +++++ .../opencv-validate-tensor-dimensions.md | 142 ------ .../pytorch-avoid-logic-duplication.json | 104 ++++ _reviewers/pytorch-avoid-logic-duplication.md | 106 ---- _reviewers/pytorch-check-before-access.json | 288 +++++++++++ _reviewers/pytorch-check-before-access.md | 290 ----------- .../pytorch-choose-optimal-algorithms.json | 282 +++++++++++ .../pytorch-choose-optimal-algorithms.md | 284 ----------- .../pytorch-clean-configuration-files.json | 80 +++ .../pytorch-clean-configuration-files.md | 82 --- ...ytorch-complete-documented-references.json | 46 ++ .../pytorch-complete-documented-references.md | 48 -- .../pytorch-descriptive-meaningful-names.json | 148 ++++++ .../pytorch-descriptive-meaningful-names.md | 150 ------ _reviewers/pytorch-document-for-future.json | 196 +++++++ _reviewers/pytorch-document-for-future.md | 198 -------- ...-handle-non-deterministic-collections.json | 244 +++++++++ ...ch-handle-non-deterministic-collections.md | 246 --------- ...pytorch-keep-apis-user-centric-simple.json | 162 ++++++ .../pytorch-keep-apis-user-centric-simple.md | 164 ------ ...ch-meaningful-descriptive-identifiers.json | 202 ++++++++ ...orch-meaningful-descriptive-identifiers.md | 204 -------- .../pytorch-optimize-device-transfers.json | 172 +++++++ .../pytorch-optimize-device-transfers.md | 174 ------- .../pytorch-optimize-memory-operations.json | 126 +++++ .../pytorch-optimize-memory-operations.md | 128 ----- .../pytorch-optimize-workflow-resources.json | 164 ++++++ .../pytorch-optimize-workflow-resources.md | 166 ------ ...latform-specific-configuration-guards.json | 220 ++++++++ ...-platform-specific-configuration-guards.md | 222 -------- .../pytorch-preserve-api-compatibility.json | 150 ++++++ .../pytorch-preserve-api-compatibility.md | 152 ------ .../pytorch-preserve-error-context.json | 188 +++++++ _reviewers/pytorch-preserve-error-context.md | 190 ------- ...ytorch-prevent-memory-vulnerabilities.json | 58 +++ .../pytorch-prevent-memory-vulnerabilities.md | 60 --- _reviewers/pytorch-remove-code-clutter.json | 250 +++++++++ _reviewers/pytorch-remove-code-clutter.md | 252 --------- .../pytorch-specific-exception-handling.json | 102 ++++ .../pytorch-specific-exception-handling.md | 104 ---- ...st-comprehensively-and-systematically.json | 180 +++++++ ...test-comprehensively-and-systematically.md | 182 ------- ...pytorch-use-accelerator-agnostic-code.json | 374 ++++++++++++++ .../pytorch-use-accelerator-agnostic-code.md | 376 -------------- ...ytorch-validate-environment-variables.json | 160 ++++++ .../pytorch-validate-environment-variables.md | 162 ------ .../pytorch-validate-performance-metrics.json | 274 ++++++++++ .../pytorch-validate-performance-metrics.md | 276 ---------- ...pytorch-write-clean-maintainable-code.json | 146 ++++++ .../pytorch-write-clean-maintainable-code.md | 148 ------ .../rails-configure-at-proper-scope.json | 252 +++++++++ _reviewers/rails-configure-at-proper-scope.md | 254 ---------- .../rails-consistent-terminology-usage.json | 200 ++++++++ .../rails-consistent-terminology-usage.md | 202 -------- ...content-security-policy-configuration.json | 138 +++++ ...s-content-security-policy-configuration.md | 140 ----- .../rails-contextual-error-messages.json | 138 +++++ _reviewers/rails-contextual-error-messages.md | 140 ----- ...-database-specific-query-optimization.json | 408 +++++++++++++++ ...ls-database-specific-query-optimization.md | 410 --------------- _reviewers/rails-document-apis-clearly.json | 278 ++++++++++ _reviewers/rails-document-apis-clearly.md | 280 ---------- ...ils-document-performance-implications.json | 92 ++++ ...rails-document-performance-implications.md | 94 ---- .../rails-efficient-data-processing.json | 126 +++++ _reviewers/rails-efficient-data-processing.md | 128 ----- ...ronment-specific-logger-configuration.json | 116 +++++ ...vironment-specific-logger-configuration.md | 118 ----- ...ails-follow-documentation-conventions.json | 158 ++++++ .../rails-follow-documentation-conventions.md | 160 ------ ...s-human-readable-configuration-values.json | 114 +++++ ...ils-human-readable-configuration-values.md | 116 ----- .../rails-initialize-nil-prone-variables.json | 184 +++++++ .../rails-initialize-nil-prone-variables.md | 186 ------- _reviewers/rails-layer-security-defenses.json | 284 +++++++++++ _reviewers/rails-layer-security-defenses.md | 286 ----------- .../rails-minimize-public-api-surface.json | 338 +++++++++++++ .../rails-minimize-public-api-surface.md | 340 ------------- ...nimize-unnecessary-object-allocations.json | 194 +++++++ ...minimize-unnecessary-object-allocations.md | 196 ------- _reviewers/rails-optimize-cache-headers.json | 150 ++++++ _reviewers/rails-optimize-cache-headers.md | 152 ------ .../rails-organize-tests-for-clarity.json | 310 ++++++++++++ .../rails-organize-tests-for-clarity.md | 312 ------------ ...ls-place-configurations-appropriately.json | 244 +++++++++ ...ails-place-configurations-appropriately.md | 246 --------- .../rails-prefer-simpler-expressions.json | 182 +++++++ .../rails-prefer-simpler-expressions.md | 184 ------- ...ils-self-documenting-identifier-names.json | 332 ++++++++++++ ...rails-self-documenting-identifier-names.md | 334 ------------ _reviewers/rails-semantic-html-usage.json | 212 ++++++++ _reviewers/rails-semantic-html-usage.md | 214 -------- ...ails-test-helpers-for-maintainability.json | 154 ++++++ .../rails-test-helpers-for-maintainability.md | 156 ------ ...ails-understand-query-method-behavior.json | 102 ++++ .../rails-understand-query-method-behavior.md | 104 ---- .../rails-wrap-threaded-code-properly.json | 128 +++++ .../rails-wrap-threaded-code-properly.md | 130 ----- .../rails-write-complete-api-examples.json | 124 +++++ .../rails-write-complete-api-examples.md | 126 ----- .../runtime-abstract-traversal-patterns.json | 136 +++++ .../runtime-abstract-traversal-patterns.md | 138 ----- _reviewers/runtime-avoid-busy-waiting.json | 172 +++++++ _reviewers/runtime-avoid-busy-waiting.md | 174 ------- .../runtime-cache-expensive-computations.json | 124 +++++ .../runtime-cache-expensive-computations.md | 126 ----- ...me-centralize-platform-configurations.json | 142 ++++++ ...time-centralize-platform-configurations.md | 144 ------ ...e-choose-appropriate-error-mechanisms.json | 138 +++++ ...ime-choose-appropriate-error-mechanisms.md | 140 ----- .../runtime-choose-descriptive-names.json | 104 ++++ .../runtime-choose-descriptive-names.md | 106 ---- ...onstant-time-cryptographic-validation.json | 84 +++ ...-constant-time-cryptographic-validation.md | 86 ---- .../runtime-decompose-complex-algorithms.json | 114 +++++ .../runtime-decompose-complex-algorithms.md | 116 ----- .../runtime-document-code-meaningfully.json | 232 +++++++++ .../runtime-document-code-meaningfully.md | 234 --------- ...runtime-document-configuration-intent.json | 254 ++++++++++ .../runtime-document-configuration-intent.md | 256 ---------- .../runtime-document-function-contracts.json | 58 +++ .../runtime-document-function-contracts.md | 60 --- .../runtime-document-non-obvious-logic.json | 46 ++ .../runtime-document-non-obvious-logic.md | 48 -- ...e-enable-configurable-instrumentation.json | 58 +++ ...ime-enable-configurable-instrumentation.md | 60 --- .../runtime-explicit-api-versioning.json | 94 ++++ _reviewers/runtime-explicit-api-versioning.md | 96 ---- .../runtime-follow-naming-patterns.json | 284 +++++++++++ _reviewers/runtime-follow-naming-patterns.md | 286 ----------- _reviewers/runtime-honor-api-contracts.json | 118 +++++ _reviewers/runtime-honor-api-contracts.md | 120 ----- ...-maintain-configuration-compatibility.json | 118 +++++ ...me-maintain-configuration-compatibility.md | 120 ----- ...untime-maintain-consistent-formatting.json | 102 ++++ .../runtime-maintain-consistent-formatting.md | 104 ---- .../runtime-maintainable-test-structure.json | 172 +++++++ .../runtime-maintainable-test-structure.md | 174 ------- .../runtime-memory-barrier-pairing.json | 70 +++ _reviewers/runtime-memory-barrier-pairing.md | 72 --- .../runtime-memory-ordering-matters.json | 80 +++ _reviewers/runtime-memory-ordering-matters.md | 82 --- .../runtime-model-actual-hardware-costs.json | 254 ++++++++++ .../runtime-model-actual-hardware-costs.md | 256 ---------- .../runtime-names-reflect-actual-purpose.json | 190 +++++++ .../runtime-names-reflect-actual-purpose.md | 192 ------- ...time-optimize-aligned-simd-operations.json | 70 +++ ...untime-optimize-aligned-simd-operations.md | 72 --- ...time-optimize-build-dependency-chains.json | 116 +++++ ...untime-optimize-build-dependency-chains.md | 118 ----- _reviewers/runtime-optimize-common-paths.json | 242 +++++++++ _reviewers/runtime-optimize-common-paths.md | 244 --------- .../runtime-optimize-for-readability.json | 304 +++++++++++ .../runtime-optimize-for-readability.md | 306 ----------- .../runtime-optimize-memory-access.json | 476 +++++++++++++++++ _reviewers/runtime-optimize-memory-access.md | 478 ------------------ ...ime-parameterize-configuration-values.json | 68 +++ ...ntime-parameterize-configuration-values.md | 70 --- ...untime-platform-agnostic-network-apis.json | 104 ++++ .../runtime-platform-agnostic-network-apis.md | 106 ---- ...platform-aware-algorithm-optimization.json | 118 +++++ ...e-platform-aware-algorithm-optimization.md | 120 ----- ...ntime-preserve-pointer-authentication.json | 176 +++++++ ...runtime-preserve-pointer-authentication.md | 178 ------- .../runtime-prevent-null-references.json | 124 +++++ _reviewers/runtime-prevent-null-references.md | 126 ----- .../runtime-simplify-code-expressions.json | 168 ++++++ .../runtime-simplify-code-expressions.md | 170 ------- ...time-specific-exceptions-with-context.json | 308 +++++++++++ ...untime-specific-exceptions-with-context.md | 310 ------------ ...ntime-structure-configuration-options.json | 198 ++++++++ ...runtime-structure-configuration-options.md | 200 -------- _reviewers/runtime-use-utility-macros.json | 58 +++ _reviewers/runtime-use-utility-macros.md | 60 --- .../runtime-validate-before-access.json | 68 +++ _reviewers/runtime-validate-before-access.md | 70 --- _reviewers/swift-document-api-stability.json | 116 +++++ _reviewers/swift-document-api-stability.md | 118 ----- .../swift-document-environment-variables.json | 90 ++++ .../swift-document-environment-variables.md | 92 ---- .../swift-follow-swift-conventions.json | 46 ++ _reviewers/swift-follow-swift-conventions.md | 48 -- ...swift-follow-swift-naming-conventions.json | 68 +++ .../swift-follow-swift-naming-conventions.md | 70 --- _reviewers/swift-format-for-readability.json | 80 +++ _reviewers/swift-format-for-readability.md | 82 --- .../swift-generic-algorithm-design.json | 80 +++ _reviewers/swift-generic-algorithm-design.md | 82 --- .../swift-mathematical-precision-matters.json | 186 +++++++ .../swift-mathematical-precision-matters.md | 188 ------- ...swift-minimize-cross-device-transfers.json | 94 ++++ .../swift-minimize-cross-device-transfers.md | 96 ---- .../swift-provide-comprehensive-examples.json | 104 ++++ .../swift-provide-comprehensive-examples.md | 106 ---- _reviewers/swift-swift-idiomatic-naming.json | 104 ++++ _reviewers/swift-swift-idiomatic-naming.md | 106 ---- _reviewers/swift-swift-style-consistency.json | 58 +++ _reviewers/swift-swift-style-consistency.md | 60 --- _reviewers/swift-teach-by-example.json | 124 +++++ _reviewers/swift-teach-by-example.md | 126 ----- _reviewers/swift-training-aware-ml-apis.json | 92 ++++ _reviewers/swift-training-aware-ml-apis.md | 94 ---- migrate_json.py | 84 +++ 729 files changed, 52722 insertions(+), 53366 deletions(-) create mode 100644 _reviewers/aws-sdk-js-accurate-type-signatures.json create mode 100644 _reviewers/aws-sdk-js-complete-configuration-type-definitions.json create mode 100644 _reviewers/aws-sdk-js-content-integrity-verification.json create mode 100644 _reviewers/aws-sdk-js-defensive-null-checking.json create mode 100644 _reviewers/aws-sdk-js-document-apis-completely.json create mode 100644 _reviewers/aws-sdk-js-document-apis-thoroughly.json create mode 100644 _reviewers/aws-sdk-js-document-sdk-version-requirements.json create mode 100644 _reviewers/aws-sdk-js-early-return-after-errors.json create mode 100644 _reviewers/aws-sdk-js-example-documentation-standards.json create mode 100644 _reviewers/aws-sdk-js-explicit-verified-configurations.json create mode 100644 _reviewers/aws-sdk-js-follow-established-testing-patterns.json create mode 100644 _reviewers/aws-sdk-js-isolate-sensitive-buffer-data.json create mode 100644 _reviewers/aws-sdk-js-limit-cache-size.json create mode 100644 _reviewers/aws-sdk-js-maintain-consistent-formatting.json create mode 100644 _reviewers/aws-sdk-js-organize-type-declarations.json create mode 100644 _reviewers/aws-sdk-js-semantic-naming-conventions.json create mode 100644 _reviewers/aws-sdk-js-semantic-type-organization.json create mode 100644 _reviewers/aws-sdk-js-standardize-api-promise-patterns.json create mode 100644 _reviewers/aws-sdk-js-structured-test-resource-management.json create mode 100644 _reviewers/aws-sdk-js-test-configuration-precedence.json create mode 100644 _reviewers/aws-sdk-js-validate-configurations-with-clarity.json create mode 100644 _reviewers/azure-sdk-for-net-address-not-mask.json create mode 100644 _reviewers/azure-sdk-for-net-approve-ai-dependencies-conditionally.json create mode 100644 _reviewers/azure-sdk-for-net-centralize-pipeline-configurations.json create mode 100644 _reviewers/azure-sdk-for-net-check-before-dereferencing.json create mode 100644 _reviewers/azure-sdk-for-net-complete-authentication-testing.json create mode 100644 _reviewers/azure-sdk-for-net-complete-pipeline-configurations.json create mode 100644 _reviewers/azure-sdk-for-net-descriptive-consistent-identifiers.json create mode 100644 _reviewers/azure-sdk-for-net-design-stable-apis.json create mode 100644 _reviewers/azure-sdk-for-net-document-ai-changes-thoroughly.json create mode 100644 _reviewers/azure-sdk-for-net-document-code-reasoning.json create mode 100644 _reviewers/azure-sdk-for-net-document-non-obvious-code.json create mode 100644 _reviewers/azure-sdk-for-net-eliminate-repeated-code.json create mode 100644 _reviewers/azure-sdk-for-net-externalize-configuration-values.json create mode 100644 _reviewers/azure-sdk-for-net-externalize-sensitive-credentials.json create mode 100644 _reviewers/azure-sdk-for-net-handle-external-process-errors.json create mode 100644 _reviewers/azure-sdk-for-net-maintain-clean-code-structure.json create mode 100644 _reviewers/azure-sdk-for-net-match-ci-commands-locally.json create mode 100644 _reviewers/azure-sdk-for-net-minimize-memory-allocations.json create mode 100644 _reviewers/azure-sdk-for-net-prefer-identity-based-authentication.json create mode 100644 _reviewers/azure-sdk-for-net-preserve-protocol-data-integrity.json create mode 100644 _reviewers/azure-sdk-for-net-redact-sensitive-information.json create mode 100644 _reviewers/azure-sdk-for-net-specific-exceptions-for-clarity.json create mode 100644 _reviewers/azure-sdk-for-net-standardize-environment-variables.json create mode 100644 _reviewers/azure-sdk-for-net-surface-errors-appropriately.json create mode 100644 _reviewers/azure-sdk-for-net-use-domain-specific-type-names.json create mode 100644 _reviewers/azure-sdk-for-net-use-higher-level-telemetry.json create mode 100644 _reviewers/azure-sdk-for-net-user-friendly-configuration-values.json create mode 100644 _reviewers/azure-sdk-for-net-write-deterministic-tests.json create mode 100644 _reviewers/chef-choose-semantic-algorithms.json create mode 100644 _reviewers/chef-clear-abstraction-boundaries.json create mode 100644 _reviewers/chef-consistent-descriptive-naming-patterns.json create mode 100644 _reviewers/chef-document-configuration-completely.json create mode 100644 _reviewers/chef-document-configuration-decisions.json create mode 100644 _reviewers/chef-document-with-examples.json create mode 100644 _reviewers/chef-explicit-configuration-over-implicit.json create mode 100644 _reviewers/chef-externalize-configuration-values.json create mode 100644 _reviewers/chef-extract-and-organize-methods.json create mode 100644 _reviewers/chef-fail-fast-principle.json create mode 100644 _reviewers/chef-fail-gracefully-always.json create mode 100644 _reviewers/chef-keep-actions-versions-current.json create mode 100644 _reviewers/chef-limit-token-permissions.json create mode 100644 _reviewers/chef-log-message-quality.json create mode 100644 _reviewers/chef-memoize-expensive-operations.json create mode 100644 _reviewers/chef-never-commit-secrets.json create mode 100644 _reviewers/chef-prefer-guard-clauses.json create mode 100644 _reviewers/chef-remove-commented-code.json create mode 100644 _reviewers/chef-secure-credential-management.json create mode 100644 _reviewers/chef-secure-network-operations.json create mode 100644 _reviewers/chef-standardize-bash-error-handling.json create mode 100644 _reviewers/chef-standardize-installation-directory-paths.json create mode 100644 _reviewers/chef-standardize-shell-flags.json create mode 100644 _reviewers/chef-structure-cicd-scripts.json create mode 100644 _reviewers/chef-update-security-critical-dependencies.json create mode 100644 _reviewers/chef-use-strict-test-doubles.json create mode 100644 _reviewers/chef-verify-automated-documentation.json create mode 100644 _reviewers/elasticsearch-avoid-flaky-tests.json create mode 100644 _reviewers/elasticsearch-clarity-over-uncertainty.json create mode 100644 _reviewers/elasticsearch-complete-api-parameter-documentation.json create mode 100644 _reviewers/elasticsearch-configure-type-serialization.json create mode 100644 _reviewers/elasticsearch-defensive-null-handling.json create mode 100644 _reviewers/elasticsearch-design-for-evolution.json create mode 100644 _reviewers/elasticsearch-document-performance-tradeoffs.json create mode 100644 _reviewers/elasticsearch-document-security-requirements-explicitly.json create mode 100644 _reviewers/elasticsearch-enforce-least-privilege.json create mode 100644 _reviewers/elasticsearch-exceptions-for-critical-errors.json create mode 100644 _reviewers/elasticsearch-extract-for-clarity.json create mode 100644 _reviewers/elasticsearch-measure-before-optimizing-performance.json create mode 100644 _reviewers/elasticsearch-name-reflects-meaning.json create mode 100644 _reviewers/elasticsearch-optimize-before-implementing.json create mode 100644 _reviewers/elasticsearch-parallel-branch-traceability.json create mode 100644 _reviewers/elasticsearch-prefer-callbacks-over-blocking.json create mode 100644 _reviewers/elasticsearch-prevent-redundant-operations.json create mode 100644 _reviewers/elasticsearch-robust-test-assertions.json create mode 100644 _reviewers/elasticsearch-scope-and-document-configurations.json create mode 100644 _reviewers/elasticsearch-specify-explicit-rest-formats.json create mode 100644 _reviewers/elasticsearch-stage-intensive-operations-carefully.json create mode 100644 _reviewers/elasticsearch-use-configuration-access-methods.json create mode 100644 _reviewers/elasticsearch-use-current-configuration-patterns.json create mode 100644 _reviewers/framework-cache-expensive-operations.json create mode 100644 _reviewers/framework-descriptive-configuration-keys.json create mode 100644 _reviewers/framework-design-flexible-apis.json create mode 100644 _reviewers/framework-disable-coverage-in-workflows.json create mode 100644 _reviewers/framework-escape-column-names-properly.json create mode 100644 _reviewers/framework-explicit-null-handling.json create mode 100644 _reviewers/framework-keep-ci-configurations-minimal.json create mode 100644 _reviewers/framework-manage-dependencies-wisely.json create mode 100644 _reviewers/framework-mark-sensitive-parameters.json create mode 100644 _reviewers/framework-name-indicates-clear-purpose.json create mode 100644 _reviewers/framework-optimize-for-code-readability.json create mode 100644 _reviewers/framework-optimize-loop-operations.json create mode 100644 _reviewers/framework-optimize-migration-code.json create mode 100644 _reviewers/framework-precise-testing-dependency-versioning.json create mode 100644 _reviewers/framework-precise-type-annotations.json create mode 100644 _reviewers/framework-use-modern-phpunit-attributes.json create mode 100644 _reviewers/framework-use-semantic-exceptions.json create mode 100644 _reviewers/kotlin-consider-operation-time-complexity.json create mode 100644 _reviewers/kotlin-copy-external-string-inputs.json create mode 100644 _reviewers/kotlin-dependency-verification-configuration.json create mode 100644 _reviewers/kotlin-design-extensible-stable-apis.json create mode 100644 _reviewers/kotlin-document-security-implementations.json create mode 100644 _reviewers/kotlin-effective-api-samples.json create mode 100644 _reviewers/kotlin-keep-code-clearly-organized.json create mode 100644 _reviewers/kotlin-minimize-not-null-assertions.json create mode 100644 _reviewers/kotlin-minimize-unnecessary-work.json create mode 100644 _reviewers/kotlin-names-express-clear-intent.json create mode 100644 _reviewers/kotlin-prefer-descriptive-errors.json create mode 100644 _reviewers/kotlin-standardize-build-configurations.json create mode 100644 _reviewers/kotlin-test-edge-cases.json create mode 100644 _reviewers/kotlin-use-configuration-property-providers.json create mode 100644 _reviewers/kotlin-use-defensive-null-checks.json create mode 100644 _reviewers/kotlin-use-modern-test-infrastructure.json create mode 100644 _reviewers/kubeflow-api-structure-balance.json create mode 100644 _reviewers/kubeflow-automate-style-enforcement.json create mode 100644 _reviewers/kubeflow-centralize-configuration-constants.json create mode 100644 _reviewers/kubeflow-centralize-configuration-values.json create mode 100644 _reviewers/kubeflow-centralize-dependency-configurations.json create mode 100644 _reviewers/kubeflow-check-before-use.json create mode 100644 _reviewers/kubeflow-component-agnostic-styling-principles.json create mode 100644 _reviewers/kubeflow-configurable-security-with-defaults.json create mode 100644 _reviewers/kubeflow-consistent-descriptive-naming.json create mode 100644 _reviewers/kubeflow-consistent-separator-conventions.json create mode 100644 _reviewers/kubeflow-contextualize-and-classify-errors.json create mode 100644 _reviewers/kubeflow-control-header-modification.json create mode 100644 _reviewers/kubeflow-descriptive-consistent-naming.json create mode 100644 _reviewers/kubeflow-document-code-thoroughly.json create mode 100644 _reviewers/kubeflow-document-migration-paths.json create mode 100644 _reviewers/kubeflow-document-networking-annotations.json create mode 100644 _reviewers/kubeflow-document-with-precision.json create mode 100644 _reviewers/kubeflow-enforce-https-protocol.json create mode 100644 _reviewers/kubeflow-enforce-least-privilege.json create mode 100644 _reviewers/kubeflow-environment-aware-configuration-design.json create mode 100644 _reviewers/kubeflow-environment-variable-management.json create mode 100644 _reviewers/kubeflow-externalize-configuration-parameters.json create mode 100644 _reviewers/kubeflow-follow-api-conventions.json create mode 100644 _reviewers/kubeflow-go-export-naming-conventions.json create mode 100644 _reviewers/kubeflow-harden-container-security.json create mode 100644 _reviewers/kubeflow-isolate-test-cases.json create mode 100644 _reviewers/kubeflow-load-configurations-efficiently.json create mode 100644 _reviewers/kubeflow-manage-configuration-changes.json create mode 100644 _reviewers/kubeflow-mark-ui-text-i18n.json create mode 100644 _reviewers/kubeflow-match-algorithms-to-purpose.json create mode 100644 _reviewers/kubeflow-normalize-url-paths.json create mode 100644 _reviewers/kubeflow-optimize-container-build-configurations.json create mode 100644 _reviewers/kubeflow-pin-version-dependencies.json create mode 100644 _reviewers/kubeflow-precise-workflow-triggers.json create mode 100644 _reviewers/kubeflow-prefer-external-configuration.json create mode 100644 _reviewers/kubeflow-prevent-xss-vulnerabilities.json create mode 100644 _reviewers/kubeflow-prioritize-readability-over-brevity.json create mode 100644 _reviewers/kubeflow-private-variable-naming-convention.json create mode 100644 _reviewers/kubeflow-private-vulnerability-reporting.json create mode 100644 _reviewers/kubeflow-reduce-nesting-depth.json create mode 100644 _reviewers/kubeflow-safe-url-operations.json create mode 100644 _reviewers/kubeflow-simplify-code-structure.json create mode 100644 _reviewers/kubeflow-specific-network-access-documentation.json create mode 100644 _reviewers/kubeflow-stable-configuration-management.json create mode 100644 _reviewers/kubeflow-standardize-build-configurations.json create mode 100644 _reviewers/kubeflow-standardize-makefile-patterns.json create mode 100644 _reviewers/kubeflow-standardize-network-tools.json create mode 100644 _reviewers/kubeflow-standardize-style-scripts.json create mode 100644 _reviewers/kubeflow-structure-for-navigation.json create mode 100644 _reviewers/kubeflow-structured-documentation-with-examples.json create mode 100644 _reviewers/kubeflow-structured-owners-files.json create mode 100644 _reviewers/kubeflow-type-appropriate-default-values.json create mode 100644 _reviewers/kubeflow-unique-workflow-step-names.json create mode 100644 _reviewers/kubeflow-use-appropriate-log-levels.json create mode 100644 _reviewers/kubeflow-use-css-classes-properly.json create mode 100644 _reviewers/kubeflow-use-enums-for-state.json create mode 100644 _reviewers/kubeflow-use-modern-javascript-idioms.json create mode 100644 _reviewers/kubeflow-use-snake-case-in-python.json create mode 100644 _reviewers/kubeflow-use-table-driven-tests.json create mode 100644 _reviewers/kubeflow-validate-inputs-explicitly.json create mode 100644 _reviewers/kubeflow-validate-model-optimization.json create mode 100644 _reviewers/langchainjs-ai-dependency-management.json create mode 100644 _reviewers/langchainjs-avoid-hardcoded-configurations.json create mode 100644 _reviewers/langchainjs-chunked-data-processing.json create mode 100644 _reviewers/langchainjs-comprehensive-ai-documentation.json create mode 100644 _reviewers/langchainjs-consistent-ai-naming.json create mode 100644 _reviewers/langchainjs-consistent-naming-conventions.json create mode 100644 _reviewers/langchainjs-constructor-over-setter.json create mode 100644 _reviewers/langchainjs-dependency-classification-standards.json create mode 100644 _reviewers/langchainjs-documentation-completeness-check.json create mode 100644 _reviewers/langchainjs-eliminate-redundant-code.json create mode 100644 _reviewers/langchainjs-follow-documentation-standards.json create mode 100644 _reviewers/langchainjs-follow-standard-naming.json create mode 100644 _reviewers/langchainjs-match-configuration-documentation.json create mode 100644 _reviewers/langchainjs-platform-appropriate-environment-variables.json create mode 100644 _reviewers/langchainjs-prefer-nullish-coalescing.json create mode 100644 _reviewers/langchainjs-preserve-api-backward-compatibility.json create mode 100644 _reviewers/langchainjs-simplify-code-organization.json create mode 100644 _reviewers/langchainjs-throw-meaningful-errors.json create mode 100644 _reviewers/langchainjs-typescript-naming-standards.json create mode 100644 _reviewers/langchainjs-update-documentation-configuration.json create mode 100644 _reviewers/langchainjs-use-comprehensive-jsdoc.json create mode 100644 _reviewers/langchainjs-use-database-native-types.json create mode 100644 _reviewers/langchainjs-validate-untrusted-input.json create mode 100644 _reviewers/mxnet-avoid-eval-function.json create mode 100644 _reviewers/mxnet-avoid-redundant-calculations.json create mode 100644 _reviewers/mxnet-centralize-configuration-parameters.json create mode 100644 _reviewers/mxnet-centralize-synchronization-logic.json create mode 100644 _reviewers/mxnet-comprehensive-api-documentation.json create mode 100644 _reviewers/mxnet-consistent-api-documentation.json create mode 100644 _reviewers/mxnet-consistent-logging-format.json create mode 100644 _reviewers/mxnet-consistent-naming-patterns.json create mode 100644 _reviewers/mxnet-document-all-parameters.json create mode 100644 _reviewers/mxnet-document-api-completely.json create mode 100644 _reviewers/mxnet-document-environment-variables.json create mode 100644 _reviewers/mxnet-documentation-clarity-and-formatting.json create mode 100644 _reviewers/mxnet-eliminate-redundant-constructs.json create mode 100644 _reviewers/mxnet-explain-optimization-mechanisms.json create mode 100644 _reviewers/mxnet-explicit-null-checks.json create mode 100644 _reviewers/mxnet-graph-traversal-optimization.json create mode 100644 _reviewers/mxnet-hybridization-compatible-operations.json create mode 100644 _reviewers/mxnet-optimize-iteration-patterns.json create mode 100644 _reviewers/mxnet-pre-compute-reused-data.json create mode 100644 _reviewers/mxnet-simplify-for-readability.json create mode 100644 _reviewers/mxnet-technical-precision-matters.json create mode 100644 _reviewers/mxnet-use-explicit-optional-types.json create mode 100644 _reviewers/mxnet-use-intent-revealing-names.json create mode 100644 _reviewers/mxnet-use-named-constants.json create mode 100644 _reviewers/mxnet-use-pytest-parameterization.json create mode 100644 _reviewers/opencv-check-nulls-directly.json create mode 100644 _reviewers/opencv-cleanup-before-errors.json create mode 100644 _reviewers/opencv-clear-api-contracts.json create mode 100644 _reviewers/opencv-code-for-readability.json create mode 100644 _reviewers/opencv-consistent-descriptive-naming.json create mode 100644 _reviewers/opencv-cross-platform-api-design-rules.json create mode 100644 _reviewers/opencv-document-configuration-version-requirements.json create mode 100644 _reviewers/opencv-document-properly-with-references.json create mode 100644 _reviewers/opencv-feature-flag-convention.json create mode 100644 _reviewers/opencv-framework-synchronization-practices.json create mode 100644 _reviewers/opencv-guard-optional-dependencies.json create mode 100644 _reviewers/opencv-maintain-build-compatibility.json create mode 100644 _reviewers/opencv-maintain-code-consistency.json create mode 100644 _reviewers/opencv-meaningful-semantic-naming.json create mode 100644 _reviewers/opencv-optimize-container-access.json create mode 100644 _reviewers/opencv-optimize-for-common-cases.json create mode 100644 _reviewers/opencv-optimize-memory-allocation-patterns.json create mode 100644 _reviewers/opencv-prevent-null-vulnerabilities.json create mode 100644 _reviewers/opencv-thread-safe-resource-cleanup.json create mode 100644 _reviewers/opencv-use-opencv-error-mechanisms.json create mode 100644 _reviewers/opencv-use-optimized-functions.json create mode 100644 _reviewers/opencv-use-proper-assertions.json create mode 100644 _reviewers/opencv-validate-tensor-dimensions.json create mode 100644 _reviewers/pytorch-avoid-logic-duplication.json create mode 100644 _reviewers/pytorch-check-before-access.json create mode 100644 _reviewers/pytorch-choose-optimal-algorithms.json create mode 100644 _reviewers/pytorch-clean-configuration-files.json create mode 100644 _reviewers/pytorch-complete-documented-references.json create mode 100644 _reviewers/pytorch-descriptive-meaningful-names.json create mode 100644 _reviewers/pytorch-document-for-future.json create mode 100644 _reviewers/pytorch-handle-non-deterministic-collections.json create mode 100644 _reviewers/pytorch-keep-apis-user-centric-simple.json create mode 100644 _reviewers/pytorch-meaningful-descriptive-identifiers.json create mode 100644 _reviewers/pytorch-optimize-device-transfers.json create mode 100644 _reviewers/pytorch-optimize-memory-operations.json create mode 100644 _reviewers/pytorch-optimize-workflow-resources.json create mode 100644 _reviewers/pytorch-platform-specific-configuration-guards.json create mode 100644 _reviewers/pytorch-preserve-api-compatibility.json create mode 100644 _reviewers/pytorch-preserve-error-context.json create mode 100644 _reviewers/pytorch-prevent-memory-vulnerabilities.json create mode 100644 _reviewers/pytorch-remove-code-clutter.json create mode 100644 _reviewers/pytorch-specific-exception-handling.json create mode 100644 _reviewers/pytorch-test-comprehensively-and-systematically.json create mode 100644 _reviewers/pytorch-use-accelerator-agnostic-code.json create mode 100644 _reviewers/pytorch-validate-environment-variables.json create mode 100644 _reviewers/pytorch-validate-performance-metrics.json create mode 100644 _reviewers/pytorch-write-clean-maintainable-code.json create mode 100644 _reviewers/rails-configure-at-proper-scope.json create mode 100644 _reviewers/rails-consistent-terminology-usage.json create mode 100644 _reviewers/rails-content-security-policy-configuration.json create mode 100644 _reviewers/rails-contextual-error-messages.json create mode 100644 _reviewers/rails-database-specific-query-optimization.json create mode 100644 _reviewers/rails-document-apis-clearly.json create mode 100644 _reviewers/rails-document-performance-implications.json create mode 100644 _reviewers/rails-efficient-data-processing.json create mode 100644 _reviewers/rails-environment-specific-logger-configuration.json create mode 100644 _reviewers/rails-follow-documentation-conventions.json create mode 100644 _reviewers/rails-human-readable-configuration-values.json create mode 100644 _reviewers/rails-initialize-nil-prone-variables.json create mode 100644 _reviewers/rails-layer-security-defenses.json create mode 100644 _reviewers/rails-minimize-public-api-surface.json create mode 100644 _reviewers/rails-minimize-unnecessary-object-allocations.json create mode 100644 _reviewers/rails-optimize-cache-headers.json create mode 100644 _reviewers/rails-organize-tests-for-clarity.json create mode 100644 _reviewers/rails-place-configurations-appropriately.json create mode 100644 _reviewers/rails-prefer-simpler-expressions.json create mode 100644 _reviewers/rails-self-documenting-identifier-names.json create mode 100644 _reviewers/rails-semantic-html-usage.json create mode 100644 _reviewers/rails-test-helpers-for-maintainability.json create mode 100644 _reviewers/rails-understand-query-method-behavior.json create mode 100644 _reviewers/rails-wrap-threaded-code-properly.json create mode 100644 _reviewers/rails-write-complete-api-examples.json create mode 100644 _reviewers/runtime-abstract-traversal-patterns.json create mode 100644 _reviewers/runtime-avoid-busy-waiting.json create mode 100644 _reviewers/runtime-cache-expensive-computations.json create mode 100644 _reviewers/runtime-centralize-platform-configurations.json create mode 100644 _reviewers/runtime-choose-appropriate-error-mechanisms.json create mode 100644 _reviewers/runtime-choose-descriptive-names.json create mode 100644 _reviewers/runtime-constant-time-cryptographic-validation.json create mode 100644 _reviewers/runtime-decompose-complex-algorithms.json create mode 100644 _reviewers/runtime-document-code-meaningfully.json create mode 100644 _reviewers/runtime-document-configuration-intent.json create mode 100644 _reviewers/runtime-document-function-contracts.json create mode 100644 _reviewers/runtime-document-non-obvious-logic.json create mode 100644 _reviewers/runtime-enable-configurable-instrumentation.json create mode 100644 _reviewers/runtime-explicit-api-versioning.json create mode 100644 _reviewers/runtime-follow-naming-patterns.json create mode 100644 _reviewers/runtime-honor-api-contracts.json create mode 100644 _reviewers/runtime-maintain-configuration-compatibility.json create mode 100644 _reviewers/runtime-maintain-consistent-formatting.json create mode 100644 _reviewers/runtime-maintainable-test-structure.json create mode 100644 _reviewers/runtime-memory-barrier-pairing.json create mode 100644 _reviewers/runtime-memory-ordering-matters.json create mode 100644 _reviewers/runtime-model-actual-hardware-costs.json create mode 100644 _reviewers/runtime-names-reflect-actual-purpose.json create mode 100644 _reviewers/runtime-optimize-aligned-simd-operations.json create mode 100644 _reviewers/runtime-optimize-build-dependency-chains.json create mode 100644 _reviewers/runtime-optimize-common-paths.json create mode 100644 _reviewers/runtime-optimize-for-readability.json create mode 100644 _reviewers/runtime-optimize-memory-access.json create mode 100644 _reviewers/runtime-parameterize-configuration-values.json create mode 100644 _reviewers/runtime-platform-agnostic-network-apis.json create mode 100644 _reviewers/runtime-platform-aware-algorithm-optimization.json create mode 100644 _reviewers/runtime-preserve-pointer-authentication.json create mode 100644 _reviewers/runtime-prevent-null-references.json create mode 100644 _reviewers/runtime-simplify-code-expressions.json create mode 100644 _reviewers/runtime-specific-exceptions-with-context.json create mode 100644 _reviewers/runtime-structure-configuration-options.json create mode 100644 _reviewers/runtime-use-utility-macros.json create mode 100644 _reviewers/runtime-validate-before-access.json create mode 100644 _reviewers/swift-document-api-stability.json create mode 100644 _reviewers/swift-document-environment-variables.json create mode 100644 _reviewers/swift-follow-swift-conventions.json create mode 100644 _reviewers/swift-follow-swift-naming-conventions.json create mode 100644 _reviewers/swift-format-for-readability.json create mode 100644 _reviewers/swift-generic-algorithm-design.json create mode 100644 _reviewers/swift-mathematical-precision-matters.json create mode 100644 _reviewers/swift-minimize-cross-device-transfers.json create mode 100644 _reviewers/swift-provide-comprehensive-examples.json create mode 100644 _reviewers/swift-swift-idiomatic-naming.json create mode 100644 _reviewers/swift-swift-style-consistency.json create mode 100644 _reviewers/swift-teach-by-example.json create mode 100644 _reviewers/swift-training-aware-ml-apis.json create mode 100644 migrate_json.py diff --git a/_reviewers/aws-sdk-js-accurate-type-signatures.json b/_reviewers/aws-sdk-js-accurate-type-signatures.json new file mode 100644 index 0000000..405891a --- /dev/null +++ b/_reviewers/aws-sdk-js-accurate-type-signatures.json @@ -0,0 +1,46 @@ +[ + { + "discussion_id": "511436398", + "pr_number": 3511, + "pr_file": "lib/request.d.ts", + "created_at": "2020-10-24T12:46:35+00:00", + "commented_code": "* @param {function} listener - Callback to run when the request is being validated.\n * @param {boolean} prepend - If set, prepends listener instead of appending.\n */\n on(event: \"validate\", listener: (request: Request) => void, prepend?: boolean): Request;\n on(event: \"validate\", listener: (request: Request, doneCallback?: () => void) => void, prepend?: boolean): Request;", + "repo_full_name": "aws/aws-sdk-js", + "discussion_comments": [ + { + "comment_id": "511436398", + "repo_full_name": "aws/aws-sdk-js", + "pr_number": 3511, + "pr_file": "lib/request.d.ts", + "discussion_id": "511436398", + "commented_code": "@@ -54,23 +54,23 @@ export class Request {\n * @param {function} listener - Callback to run when the request is being validated.\n * @param {boolean} prepend - If set, prepends listener instead of appending.\n */\n- on(event: \"validate\", listener: (request: Request) => void, prepend?: boolean): Request;\n+ on(event: \"validate\", listener: (request: Request, doneCallback?: () => void) => void, prepend?: boolean): Request;", + "comment_created_at": "2020-10-24T12:46:35+00:00", + "comment_author": "jeromecovington", + "comment_body": "I added `doneCallback?` as an optional second param, trying to be sensitive to not breaking anybody's current code that may not use the param. Although I'm divided on whether or not it should probably actually be a required argument, as I'm not sure anyone's code using `onAsync()` listeners should technically be correct _without_ calling `done()`.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "145172099", + "pr_number": 1711, + "pr_file": "lib/services/s3.d.ts", + "created_at": "2017-10-17T15:45:27+00:00", + "commented_code": "getSignedUrl(operation: string, params: any): string;\n\n /**\n * Returns a 'thenable' promise that will be resolved with a pre-signed URL for a given operation name.\n */\n getSignedUrlPromise(operation: string, params: any): Promise;", + "repo_full_name": "aws/aws-sdk-js", + "discussion_comments": [ + { + "comment_id": "145172099", + "repo_full_name": "aws/aws-sdk-js", + "pr_number": 1711, + "pr_file": "lib/services/s3.d.ts", + "discussion_id": "145172099", + "commented_code": "@@ -13,6 +13,11 @@ export class S3Customizations extends Service {\n getSignedUrl(operation: string, params: any): string;\n \n /**\n+ * Returns a 'thenable' promise that will be resolved with a pre-signed URL for a given operation name.\n+ */\n+ getSignedUrlPromise(operation: string, params: any): Promise;", + "comment_created_at": "2017-10-17T15:45:27+00:00", + "comment_author": "jeskew", + "comment_body": "Shouldn't the return value be `Promise`?", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/aws-sdk-js-accurate-type-signatures.md b/_reviewers/aws-sdk-js-accurate-type-signatures.md index 8bc4971..f970f37 100644 --- a/_reviewers/aws-sdk-js-accurate-type-signatures.md +++ b/_reviewers/aws-sdk-js-accurate-type-signatures.md @@ -35,51 +35,3 @@ on(event: "validate", listener: (request: Request, doneCallback: () => voi ``` Accurate type signatures improve API usability, enable better IDE support, and reduce runtime errors by catching incorrect usage patterns at compile time. - - -[ - { - "discussion_id": "511436398", - "pr_number": 3511, - "pr_file": "lib/request.d.ts", - "created_at": "2020-10-24T12:46:35+00:00", - "commented_code": "* @param {function} listener - Callback to run when the request is being validated.\n * @param {boolean} prepend - If set, prepends listener instead of appending.\n */\n on(event: \"validate\", listener: (request: Request) => void, prepend?: boolean): Request;\n on(event: \"validate\", listener: (request: Request, doneCallback?: () => void) => void, prepend?: boolean): Request;", - "repo_full_name": "aws/aws-sdk-js", - "discussion_comments": [ - { - "comment_id": "511436398", - "repo_full_name": "aws/aws-sdk-js", - "pr_number": 3511, - "pr_file": "lib/request.d.ts", - "discussion_id": "511436398", - "commented_code": "@@ -54,23 +54,23 @@ export class Request {\n * @param {function} listener - Callback to run when the request is being validated.\n * @param {boolean} prepend - If set, prepends listener instead of appending.\n */\n- on(event: \"validate\", listener: (request: Request) => void, prepend?: boolean): Request;\n+ on(event: \"validate\", listener: (request: Request, doneCallback?: () => void) => void, prepend?: boolean): Request;", - "comment_created_at": "2020-10-24T12:46:35+00:00", - "comment_author": "jeromecovington", - "comment_body": "I added `doneCallback?` as an optional second param, trying to be sensitive to not breaking anybody's current code that may not use the param. Although I'm divided on whether or not it should probably actually be a required argument, as I'm not sure anyone's code using `onAsync()` listeners should technically be correct _without_ calling `done()`.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "145172099", - "pr_number": 1711, - "pr_file": "lib/services/s3.d.ts", - "created_at": "2017-10-17T15:45:27+00:00", - "commented_code": "getSignedUrl(operation: string, params: any): string;\n\n /**\n * Returns a 'thenable' promise that will be resolved with a pre-signed URL for a given operation name.\n */\n getSignedUrlPromise(operation: string, params: any): Promise;", - "repo_full_name": "aws/aws-sdk-js", - "discussion_comments": [ - { - "comment_id": "145172099", - "repo_full_name": "aws/aws-sdk-js", - "pr_number": 1711, - "pr_file": "lib/services/s3.d.ts", - "discussion_id": "145172099", - "commented_code": "@@ -13,6 +13,11 @@ export class S3Customizations extends Service {\n getSignedUrl(operation: string, params: any): string;\n \n /**\n+ * Returns a 'thenable' promise that will be resolved with a pre-signed URL for a given operation name.\n+ */\n+ getSignedUrlPromise(operation: string, params: any): Promise;", - "comment_created_at": "2017-10-17T15:45:27+00:00", - "comment_author": "jeskew", - "comment_body": "Shouldn't the return value be `Promise`?", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/aws-sdk-js-complete-configuration-type-definitions.json b/_reviewers/aws-sdk-js-complete-configuration-type-definitions.json new file mode 100644 index 0000000..3ed5c3e --- /dev/null +++ b/_reviewers/aws-sdk-js-complete-configuration-type-definitions.json @@ -0,0 +1,46 @@ +[ + { + "discussion_id": "85628177", + "pr_number": 1189, + "pr_file": "lib/config.d.ts", + "created_at": "2016-10-29T00:12:56+00:00", + "commented_code": "/// \n\nimport * as http from 'http';\nimport * as https from 'https';\nimport {AWSError} from './error';\nimport {Credentials} from './credentials';\nexport class Config {", + "repo_full_name": "aws/aws-sdk-js", + "discussion_comments": [ + { + "comment_id": "85628177", + "repo_full_name": "aws/aws-sdk-js", + "pr_number": 1189, + "pr_file": "lib/config.d.ts", + "discussion_id": "85628177", + "commented_code": "@@ -0,0 +1,268 @@\n+/// \n+\n+import * as http from 'http';\n+import * as https from 'https';\n+import {AWSError} from './error';\n+import {Credentials} from './credentials';\n+export class Config {", + "comment_created_at": "2016-10-29T00:12:56+00:00", + "comment_author": "LiuJoyceC", + "comment_body": "The `Config` class also needs to have each service identifier as an optional instance variable. Otherwise the compiler will complain if a user tries to define service-specific parameters and options:\n\n```\nAWS.config.s3 = {params: {Bucket: 'myBucket'}, useDualstack: true};\n```\n", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "94810141", + "pr_number": 1285, + "pr_file": "lib/config.d.ts", + "created_at": "2017-01-05T17:18:54+00:00", + "commented_code": "* @param {ConfigurationOptions} options - a map of option keys and values.\n * @param {boolean} allowUnknownKeys - Whether unknown keys can be set on the configuration object.\n */\n update(options: ConfigurationOptions & ConfigurationServicePlaceholders & APIVersions & {[key: string]: any}, allowUnknownKeys: true): void;\n update(options: ConfigurationOptions & ConfigurationServicePlaceholders & APIVersions & CredentialsOptions & {[key: string]: any}, allowUnknownKeys: true): void;\n /**\n * Updates the current configuration object with new options.\n * \n * @param {ConfigurationOptions} options - a map of option keys and values.\n * @param {boolean} allowUnknownKeys - Defaults to false. Whether unknown keys can be set on the configuration object.\n */\n update(options: ConfigurationOptions & ConfigurationServicePlaceholders & APIVersions, allowUnknownKeys?: false): void;\n update(options: ConfigurationOptions & ConfigurationServicePlaceholders & APIVersions & CredentialsOptions, allowUnknownKeys?: false): void;", + "repo_full_name": "aws/aws-sdk-js", + "discussion_comments": [ + { + "comment_id": "94810141", + "repo_full_name": "aws/aws-sdk-js", + "pr_number": 1285, + "pr_file": "lib/config.d.ts", + "discussion_id": "94810141", + "commented_code": "@@ -63,14 +63,14 @@ export class Config extends ConfigBase {\n * @param {ConfigurationOptions} options - a map of option keys and values.\n * @param {boolean} allowUnknownKeys - Whether unknown keys can be set on the configuration object.\n */\n- update(options: ConfigurationOptions & ConfigurationServicePlaceholders & APIVersions & {[key: string]: any}, allowUnknownKeys: true): void;\n+ update(options: ConfigurationOptions & ConfigurationServicePlaceholders & APIVersions & CredentialsOptions & {[key: string]: any}, allowUnknownKeys: true): void;\n /**\n * Updates the current configuration object with new options.\n * \n * @param {ConfigurationOptions} options - a map of option keys and values.\n * @param {boolean} allowUnknownKeys - Defaults to false. Whether unknown keys can be set on the configuration object.\n */\n- update(options: ConfigurationOptions & ConfigurationServicePlaceholders & APIVersions, allowUnknownKeys?: false): void;\n+ update(options: ConfigurationOptions & ConfigurationServicePlaceholders & APIVersions & CredentialsOptions, allowUnknownKeys?: false): void;", + "comment_created_at": "2017-01-05T17:18:54+00:00", + "comment_author": "chrisradek", + "comment_body": "Can you add `CredentialsOptions` to the `update` method in `ConfigBase` as well? This should be allowed in service configuration in addition to the global config.", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/aws-sdk-js-complete-configuration-type-definitions.md b/_reviewers/aws-sdk-js-complete-configuration-type-definitions.md index 1145e03..44b544f 100644 --- a/_reviewers/aws-sdk-js-complete-configuration-type-definitions.md +++ b/_reviewers/aws-sdk-js-complete-configuration-type-definitions.md @@ -35,51 +35,3 @@ AWS.config.s3 = {params: {Bucket: 'myBucket'}, useDualstack: true}; ``` This approach prevents TypeScript compiler errors when users set valid configurations and maintains consistency between global and service-specific configuration options. - - -[ - { - "discussion_id": "85628177", - "pr_number": 1189, - "pr_file": "lib/config.d.ts", - "created_at": "2016-10-29T00:12:56+00:00", - "commented_code": "/// \n\nimport * as http from 'http';\nimport * as https from 'https';\nimport {AWSError} from './error';\nimport {Credentials} from './credentials';\nexport class Config {", - "repo_full_name": "aws/aws-sdk-js", - "discussion_comments": [ - { - "comment_id": "85628177", - "repo_full_name": "aws/aws-sdk-js", - "pr_number": 1189, - "pr_file": "lib/config.d.ts", - "discussion_id": "85628177", - "commented_code": "@@ -0,0 +1,268 @@\n+/// \n+\n+import * as http from 'http';\n+import * as https from 'https';\n+import {AWSError} from './error';\n+import {Credentials} from './credentials';\n+export class Config {", - "comment_created_at": "2016-10-29T00:12:56+00:00", - "comment_author": "LiuJoyceC", - "comment_body": "The `Config` class also needs to have each service identifier as an optional instance variable. Otherwise the compiler will complain if a user tries to define service-specific parameters and options:\n\n```\nAWS.config.s3 = {params: {Bucket: 'myBucket'}, useDualstack: true};\n```\n", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "94810141", - "pr_number": 1285, - "pr_file": "lib/config.d.ts", - "created_at": "2017-01-05T17:18:54+00:00", - "commented_code": "* @param {ConfigurationOptions} options - a map of option keys and values.\n * @param {boolean} allowUnknownKeys - Whether unknown keys can be set on the configuration object.\n */\n update(options: ConfigurationOptions & ConfigurationServicePlaceholders & APIVersions & {[key: string]: any}, allowUnknownKeys: true): void;\n update(options: ConfigurationOptions & ConfigurationServicePlaceholders & APIVersions & CredentialsOptions & {[key: string]: any}, allowUnknownKeys: true): void;\n /**\n * Updates the current configuration object with new options.\n * \n * @param {ConfigurationOptions} options - a map of option keys and values.\n * @param {boolean} allowUnknownKeys - Defaults to false. Whether unknown keys can be set on the configuration object.\n */\n update(options: ConfigurationOptions & ConfigurationServicePlaceholders & APIVersions, allowUnknownKeys?: false): void;\n update(options: ConfigurationOptions & ConfigurationServicePlaceholders & APIVersions & CredentialsOptions, allowUnknownKeys?: false): void;", - "repo_full_name": "aws/aws-sdk-js", - "discussion_comments": [ - { - "comment_id": "94810141", - "repo_full_name": "aws/aws-sdk-js", - "pr_number": 1285, - "pr_file": "lib/config.d.ts", - "discussion_id": "94810141", - "commented_code": "@@ -63,14 +63,14 @@ export class Config extends ConfigBase {\n * @param {ConfigurationOptions} options - a map of option keys and values.\n * @param {boolean} allowUnknownKeys - Whether unknown keys can be set on the configuration object.\n */\n- update(options: ConfigurationOptions & ConfigurationServicePlaceholders & APIVersions & {[key: string]: any}, allowUnknownKeys: true): void;\n+ update(options: ConfigurationOptions & ConfigurationServicePlaceholders & APIVersions & CredentialsOptions & {[key: string]: any}, allowUnknownKeys: true): void;\n /**\n * Updates the current configuration object with new options.\n * \n * @param {ConfigurationOptions} options - a map of option keys and values.\n * @param {boolean} allowUnknownKeys - Defaults to false. Whether unknown keys can be set on the configuration object.\n */\n- update(options: ConfigurationOptions & ConfigurationServicePlaceholders & APIVersions, allowUnknownKeys?: false): void;\n+ update(options: ConfigurationOptions & ConfigurationServicePlaceholders & APIVersions & CredentialsOptions, allowUnknownKeys?: false): void;", - "comment_created_at": "2017-01-05T17:18:54+00:00", - "comment_author": "chrisradek", - "comment_body": "Can you add `CredentialsOptions` to the `update` method in `ConfigBase` as well? This should be allowed in service configuration in addition to the global config.", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/aws-sdk-js-content-integrity-verification.json b/_reviewers/aws-sdk-js-content-integrity-verification.json new file mode 100644 index 0000000..d168d38 --- /dev/null +++ b/_reviewers/aws-sdk-js-content-integrity-verification.json @@ -0,0 +1,160 @@ +[ + { + "discussion_id": "508372275", + "pr_number": 3505, + "pr_file": "lib/http/fetch.js", + "created_at": "2020-10-20T10:02:52+00:00", + "commented_code": "var AWS = require('../core');\nvar pipeThrough = AWS.util.pipeThrough;\nvar TransformStream = typeof TransformStream !== 'undefined' ? TransformStream :\n AWS.util.stream && AWS.util.stream.TransformStream ? AWS.util.stream.TransformStream :\n typeof WebStreamsPolyfill !== 'undefined' ? WebStreamsPolyfill.TransformStream :\n undefined;\nrequire('../http');\n\n/**\n * @api private\n */\nAWS.FetchClient = AWS.util.inherit({\n handleRequest: function handleRequest(httpRequest, httpOptions, callback, errCallback) {\n var self = this;\n var endpoint = httpRequest.endpoint;\n if (!httpOptions) httpOptions = {};\n\n var emitter = new EventEmitter();\n\n // Construct href\n var href = endpoint.protocol + '//' + endpoint.hostname;\n if (endpoint.port !== 80 && endpoint.port !== 443) {\n href += ':' + endpoint.port;\n }\n href += httpRequest.path;\n\n // Construct headers\n var headers = new Headers();\n AWS.util.each(httpRequest.headers, function (key, value) {\n // See: https://fetch.spec.whatwg.org/#forbidden-header-name\n if (key !== 'Content-Length' && key !== 'Expect' && key !== 'Host') {\n headers.set(key, value);\n }\n });\n\n // Send cookies?\n var credentials = 'omit';\n if (httpOptions.xhrWithCredentials) {\n credentials = 'include';\n }\n\n // Construct request\n // See: https://developer.mozilla.org/en-US/docs/Web/API/Request/Request\n var req = {\n method: httpRequest.method,\n headers: headers,\n credentials: credentials\n };\n if (httpRequest.body) req.body = httpRequest.body;\n var request = new Request(href, req);\n\n // Send request\n fetch(request).then(function(response) {\n if (!response.ok) throw Error(response.statusText);\n return response;\n }).then(function(response) {\n emitter.statusCode = response.status;\n emitter.headers = self.parseHeaders(response.headers);\n emitter.abort = function() { response.body.cancel(); };\n\n httpRequest.responseStream = self.writeBody(emitter, response); // attach responseStream to httpRequest\n httpRequest.stream = emitter; // attach emitter to httpRequest\n emitter.didCallback = false; // Required for httpDone event\n\n emitter.on('error', function() {\n if (emitter.didCallback) return; emitter.didCallback = true;\n errCallback.apply(emitter, arguments);\n });\n\n if (emitter.didCallback) return;\n callback(emitter);\n emitter.emit(\n 'headers',\n emitter.statusCode,\n emitter.headers,\n response.statusText\n );\n }, function(err) {\n if (emitter.didCallback) return; emitter.didCallback = true;\n errCallback(AWS.util.error(new Error('Network Failure'), {\n code: 'NetworkingError'\n }));\n }).catch(function(err) {\n if (emitter.didCallback) return; emitter.didCallback = true;\n errCallback(AWS.util.error(err, {\n code: 'BadReplyError'\n }));\n });\n\n return emitter;\n },\n\n writeBody: function writeBody(emitter, response) {\n var body = response.body;\n var totalBytes = parseInt(response.headers.get('Content-Length'), 10);", + "repo_full_name": "aws/aws-sdk-js", + "discussion_comments": [ + { + "comment_id": "508372275", + "repo_full_name": "aws/aws-sdk-js", + "pr_number": 3505, + "pr_file": "lib/http/fetch.js", + "discussion_id": "508372275", + "commented_code": "@@ -0,0 +1,159 @@\n+var AWS = require('../core');\n+var pipeThrough = AWS.util.pipeThrough;\n+var TransformStream = typeof TransformStream !== 'undefined' ? TransformStream :\n+ AWS.util.stream && AWS.util.stream.TransformStream ? AWS.util.stream.TransformStream :\n+ typeof WebStreamsPolyfill !== 'undefined' ? WebStreamsPolyfill.TransformStream :\n+ undefined;\n+require('../http');\n+\n+/**\n+ * @api private\n+ */\n+AWS.FetchClient = AWS.util.inherit({\n+ handleRequest: function handleRequest(httpRequest, httpOptions, callback, errCallback) {\n+ var self = this;\n+ var endpoint = httpRequest.endpoint;\n+ if (!httpOptions) httpOptions = {};\n+\n+ var emitter = new EventEmitter();\n+\n+ // Construct href\n+ var href = endpoint.protocol + '//' + endpoint.hostname;\n+ if (endpoint.port !== 80 && endpoint.port !== 443) {\n+ href += ':' + endpoint.port;\n+ }\n+ href += httpRequest.path;\n+\n+ // Construct headers\n+ var headers = new Headers();\n+ AWS.util.each(httpRequest.headers, function (key, value) {\n+ // See: https://fetch.spec.whatwg.org/#forbidden-header-name\n+ if (key !== 'Content-Length' && key !== 'Expect' && key !== 'Host') {\n+ headers.set(key, value);\n+ }\n+ });\n+\n+ // Send cookies?\n+ var credentials = 'omit';\n+ if (httpOptions.xhrWithCredentials) {\n+ credentials = 'include';\n+ }\n+\n+ // Construct request\n+ // See: https://developer.mozilla.org/en-US/docs/Web/API/Request/Request\n+ var req = {\n+ method: httpRequest.method,\n+ headers: headers,\n+ credentials: credentials\n+ };\n+ if (httpRequest.body) req.body = httpRequest.body;\n+ var request = new Request(href, req);\n+\n+ // Send request\n+ fetch(request).then(function(response) {\n+ if (!response.ok) throw Error(response.statusText);\n+ return response;\n+ }).then(function(response) {\n+ emitter.statusCode = response.status;\n+ emitter.headers = self.parseHeaders(response.headers);\n+ emitter.abort = function() { response.body.cancel(); };\n+\n+ httpRequest.responseStream = self.writeBody(emitter, response); // attach responseStream to httpRequest\n+ httpRequest.stream = emitter; // attach emitter to httpRequest\n+ emitter.didCallback = false; // Required for httpDone event\n+\n+ emitter.on('error', function() {\n+ if (emitter.didCallback) return; emitter.didCallback = true;\n+ errCallback.apply(emitter, arguments);\n+ });\n+\n+ if (emitter.didCallback) return;\n+ callback(emitter);\n+ emitter.emit(\n+ 'headers',\n+ emitter.statusCode,\n+ emitter.headers,\n+ response.statusText\n+ );\n+ }, function(err) {\n+ if (emitter.didCallback) return; emitter.didCallback = true;\n+ errCallback(AWS.util.error(new Error('Network Failure'), {\n+ code: 'NetworkingError'\n+ }));\n+ }).catch(function(err) {\n+ if (emitter.didCallback) return; emitter.didCallback = true;\n+ errCallback(AWS.util.error(err, {\n+ code: 'BadReplyError'\n+ }));\n+ });\n+\n+ return emitter;\n+ },\n+\n+ writeBody: function writeBody(emitter, response) {\n+ var body = response.body;\n+ var totalBytes = parseInt(response.headers.get('Content-Length'), 10);", + "comment_created_at": "2020-10-20T10:02:52+00:00", + "comment_author": "jimmywarting", + "comment_body": "just a heads up: if the response is encoded with gzip, br, deflate or anything then that will be what the content-length will also be (not the actual file size) so when you read the all the chunks then you will only increase the loadedBytes with uncompressed data.\r\n\r\nwhich means loadedBytes might be higher than content-length in some scenarios", + "pr_file_module": null + }, + { + "comment_id": "508382288", + "repo_full_name": "aws/aws-sdk-js", + "pr_number": 3505, + "pr_file": "lib/http/fetch.js", + "discussion_id": "508372275", + "commented_code": "@@ -0,0 +1,159 @@\n+var AWS = require('../core');\n+var pipeThrough = AWS.util.pipeThrough;\n+var TransformStream = typeof TransformStream !== 'undefined' ? TransformStream :\n+ AWS.util.stream && AWS.util.stream.TransformStream ? AWS.util.stream.TransformStream :\n+ typeof WebStreamsPolyfill !== 'undefined' ? WebStreamsPolyfill.TransformStream :\n+ undefined;\n+require('../http');\n+\n+/**\n+ * @api private\n+ */\n+AWS.FetchClient = AWS.util.inherit({\n+ handleRequest: function handleRequest(httpRequest, httpOptions, callback, errCallback) {\n+ var self = this;\n+ var endpoint = httpRequest.endpoint;\n+ if (!httpOptions) httpOptions = {};\n+\n+ var emitter = new EventEmitter();\n+\n+ // Construct href\n+ var href = endpoint.protocol + '//' + endpoint.hostname;\n+ if (endpoint.port !== 80 && endpoint.port !== 443) {\n+ href += ':' + endpoint.port;\n+ }\n+ href += httpRequest.path;\n+\n+ // Construct headers\n+ var headers = new Headers();\n+ AWS.util.each(httpRequest.headers, function (key, value) {\n+ // See: https://fetch.spec.whatwg.org/#forbidden-header-name\n+ if (key !== 'Content-Length' && key !== 'Expect' && key !== 'Host') {\n+ headers.set(key, value);\n+ }\n+ });\n+\n+ // Send cookies?\n+ var credentials = 'omit';\n+ if (httpOptions.xhrWithCredentials) {\n+ credentials = 'include';\n+ }\n+\n+ // Construct request\n+ // See: https://developer.mozilla.org/en-US/docs/Web/API/Request/Request\n+ var req = {\n+ method: httpRequest.method,\n+ headers: headers,\n+ credentials: credentials\n+ };\n+ if (httpRequest.body) req.body = httpRequest.body;\n+ var request = new Request(href, req);\n+\n+ // Send request\n+ fetch(request).then(function(response) {\n+ if (!response.ok) throw Error(response.statusText);\n+ return response;\n+ }).then(function(response) {\n+ emitter.statusCode = response.status;\n+ emitter.headers = self.parseHeaders(response.headers);\n+ emitter.abort = function() { response.body.cancel(); };\n+\n+ httpRequest.responseStream = self.writeBody(emitter, response); // attach responseStream to httpRequest\n+ httpRequest.stream = emitter; // attach emitter to httpRequest\n+ emitter.didCallback = false; // Required for httpDone event\n+\n+ emitter.on('error', function() {\n+ if (emitter.didCallback) return; emitter.didCallback = true;\n+ errCallback.apply(emitter, arguments);\n+ });\n+\n+ if (emitter.didCallback) return;\n+ callback(emitter);\n+ emitter.emit(\n+ 'headers',\n+ emitter.statusCode,\n+ emitter.headers,\n+ response.statusText\n+ );\n+ }, function(err) {\n+ if (emitter.didCallback) return; emitter.didCallback = true;\n+ errCallback(AWS.util.error(new Error('Network Failure'), {\n+ code: 'NetworkingError'\n+ }));\n+ }).catch(function(err) {\n+ if (emitter.didCallback) return; emitter.didCallback = true;\n+ errCallback(AWS.util.error(err, {\n+ code: 'BadReplyError'\n+ }));\n+ });\n+\n+ return emitter;\n+ },\n+\n+ writeBody: function writeBody(emitter, response) {\n+ var body = response.body;\n+ var totalBytes = parseInt(response.headers.get('Content-Length'), 10);", + "comment_created_at": "2020-10-20T10:19:05+00:00", + "comment_author": "heri16", + "comment_body": "This issue is very tricky indeed due to the fetch implementation, and I don't have a solution yet for `Content-Encoding` compression.\r\n\r\nSee discussion here:\r\nhttps://github.com/w3c/ServiceWorker/issues/339#issuecomment-372304884", + "pr_file_module": null + }, + { + "comment_id": "508384763", + "repo_full_name": "aws/aws-sdk-js", + "pr_number": 3505, + "pr_file": "lib/http/fetch.js", + "discussion_id": "508372275", + "commented_code": "@@ -0,0 +1,159 @@\n+var AWS = require('../core');\n+var pipeThrough = AWS.util.pipeThrough;\n+var TransformStream = typeof TransformStream !== 'undefined' ? TransformStream :\n+ AWS.util.stream && AWS.util.stream.TransformStream ? AWS.util.stream.TransformStream :\n+ typeof WebStreamsPolyfill !== 'undefined' ? WebStreamsPolyfill.TransformStream :\n+ undefined;\n+require('../http');\n+\n+/**\n+ * @api private\n+ */\n+AWS.FetchClient = AWS.util.inherit({\n+ handleRequest: function handleRequest(httpRequest, httpOptions, callback, errCallback) {\n+ var self = this;\n+ var endpoint = httpRequest.endpoint;\n+ if (!httpOptions) httpOptions = {};\n+\n+ var emitter = new EventEmitter();\n+\n+ // Construct href\n+ var href = endpoint.protocol + '//' + endpoint.hostname;\n+ if (endpoint.port !== 80 && endpoint.port !== 443) {\n+ href += ':' + endpoint.port;\n+ }\n+ href += httpRequest.path;\n+\n+ // Construct headers\n+ var headers = new Headers();\n+ AWS.util.each(httpRequest.headers, function (key, value) {\n+ // See: https://fetch.spec.whatwg.org/#forbidden-header-name\n+ if (key !== 'Content-Length' && key !== 'Expect' && key !== 'Host') {\n+ headers.set(key, value);\n+ }\n+ });\n+\n+ // Send cookies?\n+ var credentials = 'omit';\n+ if (httpOptions.xhrWithCredentials) {\n+ credentials = 'include';\n+ }\n+\n+ // Construct request\n+ // See: https://developer.mozilla.org/en-US/docs/Web/API/Request/Request\n+ var req = {\n+ method: httpRequest.method,\n+ headers: headers,\n+ credentials: credentials\n+ };\n+ if (httpRequest.body) req.body = httpRequest.body;\n+ var request = new Request(href, req);\n+\n+ // Send request\n+ fetch(request).then(function(response) {\n+ if (!response.ok) throw Error(response.statusText);\n+ return response;\n+ }).then(function(response) {\n+ emitter.statusCode = response.status;\n+ emitter.headers = self.parseHeaders(response.headers);\n+ emitter.abort = function() { response.body.cancel(); };\n+\n+ httpRequest.responseStream = self.writeBody(emitter, response); // attach responseStream to httpRequest\n+ httpRequest.stream = emitter; // attach emitter to httpRequest\n+ emitter.didCallback = false; // Required for httpDone event\n+\n+ emitter.on('error', function() {\n+ if (emitter.didCallback) return; emitter.didCallback = true;\n+ errCallback.apply(emitter, arguments);\n+ });\n+\n+ if (emitter.didCallback) return;\n+ callback(emitter);\n+ emitter.emit(\n+ 'headers',\n+ emitter.statusCode,\n+ emitter.headers,\n+ response.statusText\n+ );\n+ }, function(err) {\n+ if (emitter.didCallback) return; emitter.didCallback = true;\n+ errCallback(AWS.util.error(new Error('Network Failure'), {\n+ code: 'NetworkingError'\n+ }));\n+ }).catch(function(err) {\n+ if (emitter.didCallback) return; emitter.didCallback = true;\n+ errCallback(AWS.util.error(err, {\n+ code: 'BadReplyError'\n+ }));\n+ });\n+\n+ return emitter;\n+ },\n+\n+ writeBody: function writeBody(emitter, response) {\n+ var body = response.body;\n+ var totalBytes = parseInt(response.headers.get('Content-Length'), 10);", + "comment_created_at": "2020-10-20T10:23:04+00:00", + "comment_author": "heri16", + "comment_body": "Any suggestions are welcomed. :smiling_face_with_three_hearts: ", + "pr_file_module": null + }, + { + "comment_id": "508413111", + "repo_full_name": "aws/aws-sdk-js", + "pr_number": 3505, + "pr_file": "lib/http/fetch.js", + "discussion_id": "508372275", + "commented_code": "@@ -0,0 +1,159 @@\n+var AWS = require('../core');\n+var pipeThrough = AWS.util.pipeThrough;\n+var TransformStream = typeof TransformStream !== 'undefined' ? TransformStream :\n+ AWS.util.stream && AWS.util.stream.TransformStream ? AWS.util.stream.TransformStream :\n+ typeof WebStreamsPolyfill !== 'undefined' ? WebStreamsPolyfill.TransformStream :\n+ undefined;\n+require('../http');\n+\n+/**\n+ * @api private\n+ */\n+AWS.FetchClient = AWS.util.inherit({\n+ handleRequest: function handleRequest(httpRequest, httpOptions, callback, errCallback) {\n+ var self = this;\n+ var endpoint = httpRequest.endpoint;\n+ if (!httpOptions) httpOptions = {};\n+\n+ var emitter = new EventEmitter();\n+\n+ // Construct href\n+ var href = endpoint.protocol + '//' + endpoint.hostname;\n+ if (endpoint.port !== 80 && endpoint.port !== 443) {\n+ href += ':' + endpoint.port;\n+ }\n+ href += httpRequest.path;\n+\n+ // Construct headers\n+ var headers = new Headers();\n+ AWS.util.each(httpRequest.headers, function (key, value) {\n+ // See: https://fetch.spec.whatwg.org/#forbidden-header-name\n+ if (key !== 'Content-Length' && key !== 'Expect' && key !== 'Host') {\n+ headers.set(key, value);\n+ }\n+ });\n+\n+ // Send cookies?\n+ var credentials = 'omit';\n+ if (httpOptions.xhrWithCredentials) {\n+ credentials = 'include';\n+ }\n+\n+ // Construct request\n+ // See: https://developer.mozilla.org/en-US/docs/Web/API/Request/Request\n+ var req = {\n+ method: httpRequest.method,\n+ headers: headers,\n+ credentials: credentials\n+ };\n+ if (httpRequest.body) req.body = httpRequest.body;\n+ var request = new Request(href, req);\n+\n+ // Send request\n+ fetch(request).then(function(response) {\n+ if (!response.ok) throw Error(response.statusText);\n+ return response;\n+ }).then(function(response) {\n+ emitter.statusCode = response.status;\n+ emitter.headers = self.parseHeaders(response.headers);\n+ emitter.abort = function() { response.body.cancel(); };\n+\n+ httpRequest.responseStream = self.writeBody(emitter, response); // attach responseStream to httpRequest\n+ httpRequest.stream = emitter; // attach emitter to httpRequest\n+ emitter.didCallback = false; // Required for httpDone event\n+\n+ emitter.on('error', function() {\n+ if (emitter.didCallback) return; emitter.didCallback = true;\n+ errCallback.apply(emitter, arguments);\n+ });\n+\n+ if (emitter.didCallback) return;\n+ callback(emitter);\n+ emitter.emit(\n+ 'headers',\n+ emitter.statusCode,\n+ emitter.headers,\n+ response.statusText\n+ );\n+ }, function(err) {\n+ if (emitter.didCallback) return; emitter.didCallback = true;\n+ errCallback(AWS.util.error(new Error('Network Failure'), {\n+ code: 'NetworkingError'\n+ }));\n+ }).catch(function(err) {\n+ if (emitter.didCallback) return; emitter.didCallback = true;\n+ errCallback(AWS.util.error(err, {\n+ code: 'BadReplyError'\n+ }));\n+ });\n+\n+ return emitter;\n+ },\n+\n+ writeBody: function writeBody(emitter, response) {\n+ var body = response.body;\n+ var totalBytes = parseInt(response.headers.get('Content-Length'), 10);", + "comment_created_at": "2020-10-20T11:10:34+00:00", + "comment_author": "jimmywarting", + "comment_body": "There is also this observer issue: https://github.com/whatwg/fetch/issues/607 but nothing have happened on that front for a long time now.\r\n\r\nYou could read the content-encoding as well and if it's null or \"identity\" then the content-length is the equivalent to the actual file size and it can be ok to use the progress observer. it's also possible to send a `accept-encoding: identity` request header to ask for raw bytes. but i might best be done with a HEAD request\r\nI don't know exactly how the aws sdk/api looks like, but there looks to be a size metadata when you fetch the file list that may be accurate.\r\n\r\nI would probably just ditch the hole progress stuff and leave that up to the developer to handle b/c it can be unreliable. or give them the option to choose between XHR and Fetch (at least until https://github.com/whatwg/fetch/issues/607 have been implemented)\r\n\r\nAlso if you use the size option in streamsaver than you don't need to be subscribed to any progress event listener since browser native UI will show one for you. but it needs to be the uncompressed size as well - can't be the same content-length\r\n", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "61352499", + "pr_number": 945, + "pr_file": "lib/http/node.js", + "created_at": "2016-04-27T23:13:35+00:00", + "commented_code": "writeBody: function writeBody(stream, httpRequest) {\n var body = httpRequest.body;\n\n if (body && WritableStream && ReadableStream) { // progress support\n if (!(body instanceof Stream)) body = AWS.util.buffer.toStream(body);\n body.pipe(this.progressStream(stream, httpRequest));\n }\n var totalBytes = parseInt(httpRequest.headers['Content-Length'], 10);\n var loadedBytes = 0;\n\n if (body instanceof Stream) {\n // for progress support of streaming content\n // tap the data event of the stream in addition to piping\n body.on('data', function(chunk) {", + "repo_full_name": "aws/aws-sdk-js", + "discussion_comments": [ + { + "comment_id": "61352499", + "repo_full_name": "aws/aws-sdk-js", + "pr_number": 945, + "pr_file": "lib/http/node.js", + "discussion_id": "61352499", + "commented_code": "@@ -76,15 +75,40 @@ AWS.NodeHttpClient = AWS.util.inherit({\n \n writeBody: function writeBody(stream, httpRequest) {\n var body = httpRequest.body;\n-\n- if (body && WritableStream && ReadableStream) { // progress support\n- if (!(body instanceof Stream)) body = AWS.util.buffer.toStream(body);\n- body.pipe(this.progressStream(stream, httpRequest));\n- }\n+ var totalBytes = parseInt(httpRequest.headers['Content-Length'], 10);\n+ var loadedBytes = 0;\n \n if (body instanceof Stream) {\n+ // for progress support of streaming content\n+ // tap the data event of the stream in addition to piping\n+ body.on('data', function(chunk) {", + "comment_created_at": "2016-04-27T23:13:35+00:00", + "comment_author": "chrisradek", + "comment_body": "I think tapping into the `data` event on a stream could cause some unintended side-effects. In node.js 0.10.x, binding a `data` listener on a stream will cause the stream to emit 'data' events as fast as it can, ignoring the back-pressure that's automatically in place when using `pipe`. If the writable stream is slow, then this could cause loss of data.\n\nI don't think this is an issue in versions of node.js >= 0.12.x, and some simple testing seems to confirm that. However, we need to work with node.js 0.10.x as well.\n\nThe current method creates a new `writable` stream that also gets piped into in order to emit the 'sendProgress' events. I know it'll require refactoring your logic but that path seems safer across all our supported versions of node.\n", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "171932543", + "pr_number": 1956, + "pr_file": "lib/services/s3.js", + "created_at": "2018-03-02T18:56:47+00:00", + "commented_code": "}\n },\n\n /**\n * @api private\n */\n validateMd5TrailingChecksum: function validateMd5TrailingChecksum(resp) {\n var response = resp.httpResponse;\n var buffer = response.body;", + "repo_full_name": "aws/aws-sdk-js", + "discussion_comments": [ + { + "comment_id": "171932543", + "repo_full_name": "aws/aws-sdk-js", + "pr_number": 1956, + "pr_file": "lib/services/s3.js", + "discussion_id": "171932543", + "commented_code": "@@ -479,6 +499,49 @@ AWS.util.update(AWS.S3.prototype, {\n }\n },\n \n+ /**\n+ * @api private\n+ */\n+ validateMd5TrailingChecksum: function validateMd5TrailingChecksum(resp) {\n+ var response = resp.httpResponse;\n+ var buffer = response.body;", + "comment_created_at": "2018-03-02T18:56:47+00:00", + "comment_author": "chrisradek", + "comment_body": "In node.js, the response body is likely to be a stream. It might be easier in node to create a passthrough stream that generates the md5 hash and compares against the checksum. You'll know the expected content-length since it's passed in the response headers. Your solution here should still cover the browser use-case though.", + "pr_file_module": null + }, + { + "comment_id": "174218206", + "repo_full_name": "aws/aws-sdk-js", + "pr_number": 1956, + "pr_file": "lib/services/s3.js", + "discussion_id": "171932543", + "commented_code": "@@ -479,6 +499,49 @@ AWS.util.update(AWS.S3.prototype, {\n }\n },\n \n+ /**\n+ * @api private\n+ */\n+ validateMd5TrailingChecksum: function validateMd5TrailingChecksum(resp) {\n+ var response = resp.httpResponse;\n+ var buffer = response.body;", + "comment_created_at": "2018-03-13T17:27:39+00:00", + "comment_author": "AllanZhengYP", + "comment_body": "Mark: This part will only be performed on Browser.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "173921060", + "pr_number": 1956, + "pr_file": "lib/http/node.js", + "created_at": "2018-03-12T19:34:31+00:00", + "commented_code": "var stream = http.request(options, function (httpResp) {\n if (stream.didCallback) return;\n var headers = httpResp.headers;\n var responseStream = httpResp;\n\n //check content length\n if (headers && headers['content-length'] && httpRequest.method !== 'HEAD') {\n var contentLengthCheckerStream =\n new ContentLengthCheckerStream(\n parseInt(headers['content-length'], 10)\n );\n responseStream.on('error', function(err) {\n contentLengthCheckerStream.emit('error', err);\n });\n responseStream = responseStream.pipe(contentLengthCheckerStream)\n }\n //if response contains checksum in payload, validate it and chop it off the resposne stream.\n if (headers && headers['x-amz-transfer-encoding'] && headers['content-length']) {\n var transferEncoding = headers['x-amz-transfer-encoding'];\n var contentLength = headers['content-length'];\n var integrityCheckerStream = new IntegrityCheckerStream(transferEncoding, contentLength);\n responseStream.on('error', function(err) {\n integrityCheckerStream.emit('error', err);", + "repo_full_name": "aws/aws-sdk-js", + "discussion_comments": [ + { + "comment_id": "173921060", + "repo_full_name": "aws/aws-sdk-js", + "pr_number": 1956, + "pr_file": "lib/http/node.js", + "discussion_id": "173921060", + "commented_code": "@@ -41,9 +43,40 @@ AWS.NodeHttpClient = AWS.util.inherit({\n \n var stream = http.request(options, function (httpResp) {\n if (stream.didCallback) return;\n+ var headers = httpResp.headers;\n+ var responseStream = httpResp;\n+\n+ //check content length\n+ if (headers && headers['content-length'] && httpRequest.method !== 'HEAD') {\n+ var contentLengthCheckerStream =\n+ new ContentLengthCheckerStream(\n+ parseInt(headers['content-length'], 10)\n+ );\n+ responseStream.on('error', function(err) {\n+ contentLengthCheckerStream.emit('error', err);\n+ });\n+ responseStream = responseStream.pipe(contentLengthCheckerStream)\n+ }\n+ //if response contains checksum in payload, validate it and chop it off the resposne stream.\n+ if (headers && headers['x-amz-transfer-encoding'] && headers['content-length']) {\n+ var transferEncoding = headers['x-amz-transfer-encoding'];\n+ var contentLength = headers['content-length'];\n+ var integrityCheckerStream = new IntegrityCheckerStream(transferEncoding, contentLength);\n+ responseStream.on('error', function(err) {\n+ integrityCheckerStream.emit('error', err);", + "comment_created_at": "2018-03-12T19:34:31+00:00", + "comment_author": "chrisradek", + "comment_body": "If `responseStream` is an `IncomingMessage`, you'll want to call `destroy` on it. As of node 8.0.0, readable streams also have the `destroy` method, so you can check if `destroy` is a function, then call it if it is.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "173931162", + "pr_number": 1956, + "pr_file": "lib/request.js", + "created_at": "2018-03-12T20:11:55+00:00", + "commented_code": "resp.error = error;\n resp.error.retryable = false;\n });\n\n var shouldCheckContentLength = false;\n var expectedLen;\n if (req.httpRequest.method !== 'HEAD') {\n expectedLen = parseInt(headers['content-length'], 10);\n }\n if (expectedLen !== undefined && !isNaN(expectedLen) && expectedLen >= 0) {\n shouldCheckContentLength = true;\n var receivedLen = 0;\n }\n\n var checkContentLengthAndEmit = function checkContentLengthAndEmit() {\n if (shouldCheckContentLength && receivedLen !== expectedLen) {\n stream.emit('error', AWS.util.error(\n new Error('Stream content length mismatch. Received ' +\n receivedLen + ' of ' + expectedLen + ' bytes.'),\n { code: 'StreamContentLengthMismatch' }\n ));\n } else if (AWS.HttpClient.streamsApiVersion === 2) {\n stream.end();\n } else {\n stream.emit('end');\n }\n };\n\n var httpStream = resp.httpResponse.createUnbufferedStream();\n\n if (AWS.HttpClient.streamsApiVersion === 2) {\n if (shouldCheckContentLength) {\n var lengthAccumulator = new streams.PassThrough();\n lengthAccumulator._write = function(chunk) {\n if (chunk && chunk.length) {\n receivedLen += chunk.length;\n }\n return streams.PassThrough.prototype._write.apply(this, arguments);\n };\n\n lengthAccumulator.on('end', checkContentLengthAndEmit);\n stream.on('error', function(err) {\n shouldCheckContentLength = false;\n httpStream.unpipe(lengthAccumulator);\n lengthAccumulator.emit('end');\n lengthAccumulator.end();\n });\n httpStream.pipe(lengthAccumulator).pipe(stream, { end: false });\n } else {\n httpStream.pipe(stream);\n }\n } else {\n\n if (shouldCheckContentLength) {", + "repo_full_name": "aws/aws-sdk-js", + "discussion_comments": [ + { + "comment_id": "173931162", + "repo_full_name": "aws/aws-sdk-js", + "pr_number": 1956, + "pr_file": "lib/request.js", + "discussion_id": "173931162", + "commented_code": "@@ -595,77 +591,28 @@ AWS.Request = inherit({\n resp.error = error;\n resp.error.retryable = false;\n });\n-\n- var shouldCheckContentLength = false;\n- var expectedLen;\n- if (req.httpRequest.method !== 'HEAD') {\n- expectedLen = parseInt(headers['content-length'], 10);\n- }\n- if (expectedLen !== undefined && !isNaN(expectedLen) && expectedLen >= 0) {\n- shouldCheckContentLength = true;\n- var receivedLen = 0;\n- }\n-\n- var checkContentLengthAndEmit = function checkContentLengthAndEmit() {\n- if (shouldCheckContentLength && receivedLen !== expectedLen) {\n- stream.emit('error', AWS.util.error(\n- new Error('Stream content length mismatch. Received ' +\n- receivedLen + ' of ' + expectedLen + ' bytes.'),\n- { code: 'StreamContentLengthMismatch' }\n- ));\n- } else if (AWS.HttpClient.streamsApiVersion === 2) {\n- stream.end();\n- } else {\n- stream.emit('end');\n- }\n- };\n-\n var httpStream = resp.httpResponse.createUnbufferedStream();\n-\n if (AWS.HttpClient.streamsApiVersion === 2) {\n- if (shouldCheckContentLength) {\n- var lengthAccumulator = new streams.PassThrough();\n- lengthAccumulator._write = function(chunk) {\n- if (chunk && chunk.length) {\n- receivedLen += chunk.length;\n- }\n- return streams.PassThrough.prototype._write.apply(this, arguments);\n- };\n-\n- lengthAccumulator.on('end', checkContentLengthAndEmit);\n- stream.on('error', function(err) {\n- shouldCheckContentLength = false;\n- httpStream.unpipe(lengthAccumulator);\n- lengthAccumulator.emit('end');\n- lengthAccumulator.end();\n- });\n- httpStream.pipe(lengthAccumulator).pipe(stream, { end: false });\n- } else {\n httpStream.pipe(stream);\n- }\n } else {\n-\n- if (shouldCheckContentLength) {", + "comment_created_at": "2018-03-12T20:11:55+00:00", + "comment_author": "chrisradek", + "comment_body": "We'll still need to perform these checks for Node.js 0.8, especially if we start turning it on by default. Unfortunately 0.8.x doesn't include `Transform` in its `stream` package, so your implementation won't work for those cases. You'll likely need to resort to `data` listeners to perform your calculations if `Transform` doesn't exist.", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/aws-sdk-js-content-integrity-verification.md b/_reviewers/aws-sdk-js-content-integrity-verification.md index befe874..eeee48d 100644 --- a/_reviewers/aws-sdk-js-content-integrity-verification.md +++ b/_reviewers/aws-sdk-js-content-integrity-verification.md @@ -40,165 +40,3 @@ When handling HTTP responses, especially with streaming data, properly verify co - Emit appropriate errors with descriptive messages when content length or integrity checks fail Implementing these practices ensures data is received completely and correctly, preventing subtle bugs in network communication. - - -[ - { - "discussion_id": "508372275", - "pr_number": 3505, - "pr_file": "lib/http/fetch.js", - "created_at": "2020-10-20T10:02:52+00:00", - "commented_code": "var AWS = require('../core');\nvar pipeThrough = AWS.util.pipeThrough;\nvar TransformStream = typeof TransformStream !== 'undefined' ? TransformStream :\n AWS.util.stream && AWS.util.stream.TransformStream ? AWS.util.stream.TransformStream :\n typeof WebStreamsPolyfill !== 'undefined' ? WebStreamsPolyfill.TransformStream :\n undefined;\nrequire('../http');\n\n/**\n * @api private\n */\nAWS.FetchClient = AWS.util.inherit({\n handleRequest: function handleRequest(httpRequest, httpOptions, callback, errCallback) {\n var self = this;\n var endpoint = httpRequest.endpoint;\n if (!httpOptions) httpOptions = {};\n\n var emitter = new EventEmitter();\n\n // Construct href\n var href = endpoint.protocol + '//' + endpoint.hostname;\n if (endpoint.port !== 80 && endpoint.port !== 443) {\n href += ':' + endpoint.port;\n }\n href += httpRequest.path;\n\n // Construct headers\n var headers = new Headers();\n AWS.util.each(httpRequest.headers, function (key, value) {\n // See: https://fetch.spec.whatwg.org/#forbidden-header-name\n if (key !== 'Content-Length' && key !== 'Expect' && key !== 'Host') {\n headers.set(key, value);\n }\n });\n\n // Send cookies?\n var credentials = 'omit';\n if (httpOptions.xhrWithCredentials) {\n credentials = 'include';\n }\n\n // Construct request\n // See: https://developer.mozilla.org/en-US/docs/Web/API/Request/Request\n var req = {\n method: httpRequest.method,\n headers: headers,\n credentials: credentials\n };\n if (httpRequest.body) req.body = httpRequest.body;\n var request = new Request(href, req);\n\n // Send request\n fetch(request).then(function(response) {\n if (!response.ok) throw Error(response.statusText);\n return response;\n }).then(function(response) {\n emitter.statusCode = response.status;\n emitter.headers = self.parseHeaders(response.headers);\n emitter.abort = function() { response.body.cancel(); };\n\n httpRequest.responseStream = self.writeBody(emitter, response); // attach responseStream to httpRequest\n httpRequest.stream = emitter; // attach emitter to httpRequest\n emitter.didCallback = false; // Required for httpDone event\n\n emitter.on('error', function() {\n if (emitter.didCallback) return; emitter.didCallback = true;\n errCallback.apply(emitter, arguments);\n });\n\n if (emitter.didCallback) return;\n callback(emitter);\n emitter.emit(\n 'headers',\n emitter.statusCode,\n emitter.headers,\n response.statusText\n );\n }, function(err) {\n if (emitter.didCallback) return; emitter.didCallback = true;\n errCallback(AWS.util.error(new Error('Network Failure'), {\n code: 'NetworkingError'\n }));\n }).catch(function(err) {\n if (emitter.didCallback) return; emitter.didCallback = true;\n errCallback(AWS.util.error(err, {\n code: 'BadReplyError'\n }));\n });\n\n return emitter;\n },\n\n writeBody: function writeBody(emitter, response) {\n var body = response.body;\n var totalBytes = parseInt(response.headers.get('Content-Length'), 10);", - "repo_full_name": "aws/aws-sdk-js", - "discussion_comments": [ - { - "comment_id": "508372275", - "repo_full_name": "aws/aws-sdk-js", - "pr_number": 3505, - "pr_file": "lib/http/fetch.js", - "discussion_id": "508372275", - "commented_code": "@@ -0,0 +1,159 @@\n+var AWS = require('../core');\n+var pipeThrough = AWS.util.pipeThrough;\n+var TransformStream = typeof TransformStream !== 'undefined' ? TransformStream :\n+ AWS.util.stream && AWS.util.stream.TransformStream ? AWS.util.stream.TransformStream :\n+ typeof WebStreamsPolyfill !== 'undefined' ? WebStreamsPolyfill.TransformStream :\n+ undefined;\n+require('../http');\n+\n+/**\n+ * @api private\n+ */\n+AWS.FetchClient = AWS.util.inherit({\n+ handleRequest: function handleRequest(httpRequest, httpOptions, callback, errCallback) {\n+ var self = this;\n+ var endpoint = httpRequest.endpoint;\n+ if (!httpOptions) httpOptions = {};\n+\n+ var emitter = new EventEmitter();\n+\n+ // Construct href\n+ var href = endpoint.protocol + '//' + endpoint.hostname;\n+ if (endpoint.port !== 80 && endpoint.port !== 443) {\n+ href += ':' + endpoint.port;\n+ }\n+ href += httpRequest.path;\n+\n+ // Construct headers\n+ var headers = new Headers();\n+ AWS.util.each(httpRequest.headers, function (key, value) {\n+ // See: https://fetch.spec.whatwg.org/#forbidden-header-name\n+ if (key !== 'Content-Length' && key !== 'Expect' && key !== 'Host') {\n+ headers.set(key, value);\n+ }\n+ });\n+\n+ // Send cookies?\n+ var credentials = 'omit';\n+ if (httpOptions.xhrWithCredentials) {\n+ credentials = 'include';\n+ }\n+\n+ // Construct request\n+ // See: https://developer.mozilla.org/en-US/docs/Web/API/Request/Request\n+ var req = {\n+ method: httpRequest.method,\n+ headers: headers,\n+ credentials: credentials\n+ };\n+ if (httpRequest.body) req.body = httpRequest.body;\n+ var request = new Request(href, req);\n+\n+ // Send request\n+ fetch(request).then(function(response) {\n+ if (!response.ok) throw Error(response.statusText);\n+ return response;\n+ }).then(function(response) {\n+ emitter.statusCode = response.status;\n+ emitter.headers = self.parseHeaders(response.headers);\n+ emitter.abort = function() { response.body.cancel(); };\n+\n+ httpRequest.responseStream = self.writeBody(emitter, response); // attach responseStream to httpRequest\n+ httpRequest.stream = emitter; // attach emitter to httpRequest\n+ emitter.didCallback = false; // Required for httpDone event\n+\n+ emitter.on('error', function() {\n+ if (emitter.didCallback) return; emitter.didCallback = true;\n+ errCallback.apply(emitter, arguments);\n+ });\n+\n+ if (emitter.didCallback) return;\n+ callback(emitter);\n+ emitter.emit(\n+ 'headers',\n+ emitter.statusCode,\n+ emitter.headers,\n+ response.statusText\n+ );\n+ }, function(err) {\n+ if (emitter.didCallback) return; emitter.didCallback = true;\n+ errCallback(AWS.util.error(new Error('Network Failure'), {\n+ code: 'NetworkingError'\n+ }));\n+ }).catch(function(err) {\n+ if (emitter.didCallback) return; emitter.didCallback = true;\n+ errCallback(AWS.util.error(err, {\n+ code: 'BadReplyError'\n+ }));\n+ });\n+\n+ return emitter;\n+ },\n+\n+ writeBody: function writeBody(emitter, response) {\n+ var body = response.body;\n+ var totalBytes = parseInt(response.headers.get('Content-Length'), 10);", - "comment_created_at": "2020-10-20T10:02:52+00:00", - "comment_author": "jimmywarting", - "comment_body": "just a heads up: if the response is encoded with gzip, br, deflate or anything then that will be what the content-length will also be (not the actual file size) so when you read the all the chunks then you will only increase the loadedBytes with uncompressed data.\r\n\r\nwhich means loadedBytes might be higher than content-length in some scenarios", - "pr_file_module": null - }, - { - "comment_id": "508382288", - "repo_full_name": "aws/aws-sdk-js", - "pr_number": 3505, - "pr_file": "lib/http/fetch.js", - "discussion_id": "508372275", - "commented_code": "@@ -0,0 +1,159 @@\n+var AWS = require('../core');\n+var pipeThrough = AWS.util.pipeThrough;\n+var TransformStream = typeof TransformStream !== 'undefined' ? TransformStream :\n+ AWS.util.stream && AWS.util.stream.TransformStream ? AWS.util.stream.TransformStream :\n+ typeof WebStreamsPolyfill !== 'undefined' ? WebStreamsPolyfill.TransformStream :\n+ undefined;\n+require('../http');\n+\n+/**\n+ * @api private\n+ */\n+AWS.FetchClient = AWS.util.inherit({\n+ handleRequest: function handleRequest(httpRequest, httpOptions, callback, errCallback) {\n+ var self = this;\n+ var endpoint = httpRequest.endpoint;\n+ if (!httpOptions) httpOptions = {};\n+\n+ var emitter = new EventEmitter();\n+\n+ // Construct href\n+ var href = endpoint.protocol + '//' + endpoint.hostname;\n+ if (endpoint.port !== 80 && endpoint.port !== 443) {\n+ href += ':' + endpoint.port;\n+ }\n+ href += httpRequest.path;\n+\n+ // Construct headers\n+ var headers = new Headers();\n+ AWS.util.each(httpRequest.headers, function (key, value) {\n+ // See: https://fetch.spec.whatwg.org/#forbidden-header-name\n+ if (key !== 'Content-Length' && key !== 'Expect' && key !== 'Host') {\n+ headers.set(key, value);\n+ }\n+ });\n+\n+ // Send cookies?\n+ var credentials = 'omit';\n+ if (httpOptions.xhrWithCredentials) {\n+ credentials = 'include';\n+ }\n+\n+ // Construct request\n+ // See: https://developer.mozilla.org/en-US/docs/Web/API/Request/Request\n+ var req = {\n+ method: httpRequest.method,\n+ headers: headers,\n+ credentials: credentials\n+ };\n+ if (httpRequest.body) req.body = httpRequest.body;\n+ var request = new Request(href, req);\n+\n+ // Send request\n+ fetch(request).then(function(response) {\n+ if (!response.ok) throw Error(response.statusText);\n+ return response;\n+ }).then(function(response) {\n+ emitter.statusCode = response.status;\n+ emitter.headers = self.parseHeaders(response.headers);\n+ emitter.abort = function() { response.body.cancel(); };\n+\n+ httpRequest.responseStream = self.writeBody(emitter, response); // attach responseStream to httpRequest\n+ httpRequest.stream = emitter; // attach emitter to httpRequest\n+ emitter.didCallback = false; // Required for httpDone event\n+\n+ emitter.on('error', function() {\n+ if (emitter.didCallback) return; emitter.didCallback = true;\n+ errCallback.apply(emitter, arguments);\n+ });\n+\n+ if (emitter.didCallback) return;\n+ callback(emitter);\n+ emitter.emit(\n+ 'headers',\n+ emitter.statusCode,\n+ emitter.headers,\n+ response.statusText\n+ );\n+ }, function(err) {\n+ if (emitter.didCallback) return; emitter.didCallback = true;\n+ errCallback(AWS.util.error(new Error('Network Failure'), {\n+ code: 'NetworkingError'\n+ }));\n+ }).catch(function(err) {\n+ if (emitter.didCallback) return; emitter.didCallback = true;\n+ errCallback(AWS.util.error(err, {\n+ code: 'BadReplyError'\n+ }));\n+ });\n+\n+ return emitter;\n+ },\n+\n+ writeBody: function writeBody(emitter, response) {\n+ var body = response.body;\n+ var totalBytes = parseInt(response.headers.get('Content-Length'), 10);", - "comment_created_at": "2020-10-20T10:19:05+00:00", - "comment_author": "heri16", - "comment_body": "This issue is very tricky indeed due to the fetch implementation, and I don't have a solution yet for `Content-Encoding` compression.\r\n\r\nSee discussion here:\r\nhttps://github.com/w3c/ServiceWorker/issues/339#issuecomment-372304884", - "pr_file_module": null - }, - { - "comment_id": "508384763", - "repo_full_name": "aws/aws-sdk-js", - "pr_number": 3505, - "pr_file": "lib/http/fetch.js", - "discussion_id": "508372275", - "commented_code": "@@ -0,0 +1,159 @@\n+var AWS = require('../core');\n+var pipeThrough = AWS.util.pipeThrough;\n+var TransformStream = typeof TransformStream !== 'undefined' ? TransformStream :\n+ AWS.util.stream && AWS.util.stream.TransformStream ? AWS.util.stream.TransformStream :\n+ typeof WebStreamsPolyfill !== 'undefined' ? WebStreamsPolyfill.TransformStream :\n+ undefined;\n+require('../http');\n+\n+/**\n+ * @api private\n+ */\n+AWS.FetchClient = AWS.util.inherit({\n+ handleRequest: function handleRequest(httpRequest, httpOptions, callback, errCallback) {\n+ var self = this;\n+ var endpoint = httpRequest.endpoint;\n+ if (!httpOptions) httpOptions = {};\n+\n+ var emitter = new EventEmitter();\n+\n+ // Construct href\n+ var href = endpoint.protocol + '//' + endpoint.hostname;\n+ if (endpoint.port !== 80 && endpoint.port !== 443) {\n+ href += ':' + endpoint.port;\n+ }\n+ href += httpRequest.path;\n+\n+ // Construct headers\n+ var headers = new Headers();\n+ AWS.util.each(httpRequest.headers, function (key, value) {\n+ // See: https://fetch.spec.whatwg.org/#forbidden-header-name\n+ if (key !== 'Content-Length' && key !== 'Expect' && key !== 'Host') {\n+ headers.set(key, value);\n+ }\n+ });\n+\n+ // Send cookies?\n+ var credentials = 'omit';\n+ if (httpOptions.xhrWithCredentials) {\n+ credentials = 'include';\n+ }\n+\n+ // Construct request\n+ // See: https://developer.mozilla.org/en-US/docs/Web/API/Request/Request\n+ var req = {\n+ method: httpRequest.method,\n+ headers: headers,\n+ credentials: credentials\n+ };\n+ if (httpRequest.body) req.body = httpRequest.body;\n+ var request = new Request(href, req);\n+\n+ // Send request\n+ fetch(request).then(function(response) {\n+ if (!response.ok) throw Error(response.statusText);\n+ return response;\n+ }).then(function(response) {\n+ emitter.statusCode = response.status;\n+ emitter.headers = self.parseHeaders(response.headers);\n+ emitter.abort = function() { response.body.cancel(); };\n+\n+ httpRequest.responseStream = self.writeBody(emitter, response); // attach responseStream to httpRequest\n+ httpRequest.stream = emitter; // attach emitter to httpRequest\n+ emitter.didCallback = false; // Required for httpDone event\n+\n+ emitter.on('error', function() {\n+ if (emitter.didCallback) return; emitter.didCallback = true;\n+ errCallback.apply(emitter, arguments);\n+ });\n+\n+ if (emitter.didCallback) return;\n+ callback(emitter);\n+ emitter.emit(\n+ 'headers',\n+ emitter.statusCode,\n+ emitter.headers,\n+ response.statusText\n+ );\n+ }, function(err) {\n+ if (emitter.didCallback) return; emitter.didCallback = true;\n+ errCallback(AWS.util.error(new Error('Network Failure'), {\n+ code: 'NetworkingError'\n+ }));\n+ }).catch(function(err) {\n+ if (emitter.didCallback) return; emitter.didCallback = true;\n+ errCallback(AWS.util.error(err, {\n+ code: 'BadReplyError'\n+ }));\n+ });\n+\n+ return emitter;\n+ },\n+\n+ writeBody: function writeBody(emitter, response) {\n+ var body = response.body;\n+ var totalBytes = parseInt(response.headers.get('Content-Length'), 10);", - "comment_created_at": "2020-10-20T10:23:04+00:00", - "comment_author": "heri16", - "comment_body": "Any suggestions are welcomed. :smiling_face_with_three_hearts: ", - "pr_file_module": null - }, - { - "comment_id": "508413111", - "repo_full_name": "aws/aws-sdk-js", - "pr_number": 3505, - "pr_file": "lib/http/fetch.js", - "discussion_id": "508372275", - "commented_code": "@@ -0,0 +1,159 @@\n+var AWS = require('../core');\n+var pipeThrough = AWS.util.pipeThrough;\n+var TransformStream = typeof TransformStream !== 'undefined' ? TransformStream :\n+ AWS.util.stream && AWS.util.stream.TransformStream ? AWS.util.stream.TransformStream :\n+ typeof WebStreamsPolyfill !== 'undefined' ? WebStreamsPolyfill.TransformStream :\n+ undefined;\n+require('../http');\n+\n+/**\n+ * @api private\n+ */\n+AWS.FetchClient = AWS.util.inherit({\n+ handleRequest: function handleRequest(httpRequest, httpOptions, callback, errCallback) {\n+ var self = this;\n+ var endpoint = httpRequest.endpoint;\n+ if (!httpOptions) httpOptions = {};\n+\n+ var emitter = new EventEmitter();\n+\n+ // Construct href\n+ var href = endpoint.protocol + '//' + endpoint.hostname;\n+ if (endpoint.port !== 80 && endpoint.port !== 443) {\n+ href += ':' + endpoint.port;\n+ }\n+ href += httpRequest.path;\n+\n+ // Construct headers\n+ var headers = new Headers();\n+ AWS.util.each(httpRequest.headers, function (key, value) {\n+ // See: https://fetch.spec.whatwg.org/#forbidden-header-name\n+ if (key !== 'Content-Length' && key !== 'Expect' && key !== 'Host') {\n+ headers.set(key, value);\n+ }\n+ });\n+\n+ // Send cookies?\n+ var credentials = 'omit';\n+ if (httpOptions.xhrWithCredentials) {\n+ credentials = 'include';\n+ }\n+\n+ // Construct request\n+ // See: https://developer.mozilla.org/en-US/docs/Web/API/Request/Request\n+ var req = {\n+ method: httpRequest.method,\n+ headers: headers,\n+ credentials: credentials\n+ };\n+ if (httpRequest.body) req.body = httpRequest.body;\n+ var request = new Request(href, req);\n+\n+ // Send request\n+ fetch(request).then(function(response) {\n+ if (!response.ok) throw Error(response.statusText);\n+ return response;\n+ }).then(function(response) {\n+ emitter.statusCode = response.status;\n+ emitter.headers = self.parseHeaders(response.headers);\n+ emitter.abort = function() { response.body.cancel(); };\n+\n+ httpRequest.responseStream = self.writeBody(emitter, response); // attach responseStream to httpRequest\n+ httpRequest.stream = emitter; // attach emitter to httpRequest\n+ emitter.didCallback = false; // Required for httpDone event\n+\n+ emitter.on('error', function() {\n+ if (emitter.didCallback) return; emitter.didCallback = true;\n+ errCallback.apply(emitter, arguments);\n+ });\n+\n+ if (emitter.didCallback) return;\n+ callback(emitter);\n+ emitter.emit(\n+ 'headers',\n+ emitter.statusCode,\n+ emitter.headers,\n+ response.statusText\n+ );\n+ }, function(err) {\n+ if (emitter.didCallback) return; emitter.didCallback = true;\n+ errCallback(AWS.util.error(new Error('Network Failure'), {\n+ code: 'NetworkingError'\n+ }));\n+ }).catch(function(err) {\n+ if (emitter.didCallback) return; emitter.didCallback = true;\n+ errCallback(AWS.util.error(err, {\n+ code: 'BadReplyError'\n+ }));\n+ });\n+\n+ return emitter;\n+ },\n+\n+ writeBody: function writeBody(emitter, response) {\n+ var body = response.body;\n+ var totalBytes = parseInt(response.headers.get('Content-Length'), 10);", - "comment_created_at": "2020-10-20T11:10:34+00:00", - "comment_author": "jimmywarting", - "comment_body": "There is also this observer issue: https://github.com/whatwg/fetch/issues/607 but nothing have happened on that front for a long time now.\r\n\r\nYou could read the content-encoding as well and if it's null or \"identity\" then the content-length is the equivalent to the actual file size and it can be ok to use the progress observer. it's also possible to send a `accept-encoding: identity` request header to ask for raw bytes. but i might best be done with a HEAD request\r\nI don't know exactly how the aws sdk/api looks like, but there looks to be a size metadata when you fetch the file list that may be accurate.\r\n\r\nI would probably just ditch the hole progress stuff and leave that up to the developer to handle b/c it can be unreliable. or give them the option to choose between XHR and Fetch (at least until https://github.com/whatwg/fetch/issues/607 have been implemented)\r\n\r\nAlso if you use the size option in streamsaver than you don't need to be subscribed to any progress event listener since browser native UI will show one for you. but it needs to be the uncompressed size as well - can't be the same content-length\r\n", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "61352499", - "pr_number": 945, - "pr_file": "lib/http/node.js", - "created_at": "2016-04-27T23:13:35+00:00", - "commented_code": "writeBody: function writeBody(stream, httpRequest) {\n var body = httpRequest.body;\n\n if (body && WritableStream && ReadableStream) { // progress support\n if (!(body instanceof Stream)) body = AWS.util.buffer.toStream(body);\n body.pipe(this.progressStream(stream, httpRequest));\n }\n var totalBytes = parseInt(httpRequest.headers['Content-Length'], 10);\n var loadedBytes = 0;\n\n if (body instanceof Stream) {\n // for progress support of streaming content\n // tap the data event of the stream in addition to piping\n body.on('data', function(chunk) {", - "repo_full_name": "aws/aws-sdk-js", - "discussion_comments": [ - { - "comment_id": "61352499", - "repo_full_name": "aws/aws-sdk-js", - "pr_number": 945, - "pr_file": "lib/http/node.js", - "discussion_id": "61352499", - "commented_code": "@@ -76,15 +75,40 @@ AWS.NodeHttpClient = AWS.util.inherit({\n \n writeBody: function writeBody(stream, httpRequest) {\n var body = httpRequest.body;\n-\n- if (body && WritableStream && ReadableStream) { // progress support\n- if (!(body instanceof Stream)) body = AWS.util.buffer.toStream(body);\n- body.pipe(this.progressStream(stream, httpRequest));\n- }\n+ var totalBytes = parseInt(httpRequest.headers['Content-Length'], 10);\n+ var loadedBytes = 0;\n \n if (body instanceof Stream) {\n+ // for progress support of streaming content\n+ // tap the data event of the stream in addition to piping\n+ body.on('data', function(chunk) {", - "comment_created_at": "2016-04-27T23:13:35+00:00", - "comment_author": "chrisradek", - "comment_body": "I think tapping into the `data` event on a stream could cause some unintended side-effects. In node.js 0.10.x, binding a `data` listener on a stream will cause the stream to emit 'data' events as fast as it can, ignoring the back-pressure that's automatically in place when using `pipe`. If the writable stream is slow, then this could cause loss of data.\n\nI don't think this is an issue in versions of node.js >= 0.12.x, and some simple testing seems to confirm that. However, we need to work with node.js 0.10.x as well.\n\nThe current method creates a new `writable` stream that also gets piped into in order to emit the 'sendProgress' events. I know it'll require refactoring your logic but that path seems safer across all our supported versions of node.\n", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "171932543", - "pr_number": 1956, - "pr_file": "lib/services/s3.js", - "created_at": "2018-03-02T18:56:47+00:00", - "commented_code": "}\n },\n\n /**\n * @api private\n */\n validateMd5TrailingChecksum: function validateMd5TrailingChecksum(resp) {\n var response = resp.httpResponse;\n var buffer = response.body;", - "repo_full_name": "aws/aws-sdk-js", - "discussion_comments": [ - { - "comment_id": "171932543", - "repo_full_name": "aws/aws-sdk-js", - "pr_number": 1956, - "pr_file": "lib/services/s3.js", - "discussion_id": "171932543", - "commented_code": "@@ -479,6 +499,49 @@ AWS.util.update(AWS.S3.prototype, {\n }\n },\n \n+ /**\n+ * @api private\n+ */\n+ validateMd5TrailingChecksum: function validateMd5TrailingChecksum(resp) {\n+ var response = resp.httpResponse;\n+ var buffer = response.body;", - "comment_created_at": "2018-03-02T18:56:47+00:00", - "comment_author": "chrisradek", - "comment_body": "In node.js, the response body is likely to be a stream. It might be easier in node to create a passthrough stream that generates the md5 hash and compares against the checksum. You'll know the expected content-length since it's passed in the response headers. Your solution here should still cover the browser use-case though.", - "pr_file_module": null - }, - { - "comment_id": "174218206", - "repo_full_name": "aws/aws-sdk-js", - "pr_number": 1956, - "pr_file": "lib/services/s3.js", - "discussion_id": "171932543", - "commented_code": "@@ -479,6 +499,49 @@ AWS.util.update(AWS.S3.prototype, {\n }\n },\n \n+ /**\n+ * @api private\n+ */\n+ validateMd5TrailingChecksum: function validateMd5TrailingChecksum(resp) {\n+ var response = resp.httpResponse;\n+ var buffer = response.body;", - "comment_created_at": "2018-03-13T17:27:39+00:00", - "comment_author": "AllanZhengYP", - "comment_body": "Mark: This part will only be performed on Browser.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "173921060", - "pr_number": 1956, - "pr_file": "lib/http/node.js", - "created_at": "2018-03-12T19:34:31+00:00", - "commented_code": "var stream = http.request(options, function (httpResp) {\n if (stream.didCallback) return;\n var headers = httpResp.headers;\n var responseStream = httpResp;\n\n //check content length\n if (headers && headers['content-length'] && httpRequest.method !== 'HEAD') {\n var contentLengthCheckerStream =\n new ContentLengthCheckerStream(\n parseInt(headers['content-length'], 10)\n );\n responseStream.on('error', function(err) {\n contentLengthCheckerStream.emit('error', err);\n });\n responseStream = responseStream.pipe(contentLengthCheckerStream)\n }\n //if response contains checksum in payload, validate it and chop it off the resposne stream.\n if (headers && headers['x-amz-transfer-encoding'] && headers['content-length']) {\n var transferEncoding = headers['x-amz-transfer-encoding'];\n var contentLength = headers['content-length'];\n var integrityCheckerStream = new IntegrityCheckerStream(transferEncoding, contentLength);\n responseStream.on('error', function(err) {\n integrityCheckerStream.emit('error', err);", - "repo_full_name": "aws/aws-sdk-js", - "discussion_comments": [ - { - "comment_id": "173921060", - "repo_full_name": "aws/aws-sdk-js", - "pr_number": 1956, - "pr_file": "lib/http/node.js", - "discussion_id": "173921060", - "commented_code": "@@ -41,9 +43,40 @@ AWS.NodeHttpClient = AWS.util.inherit({\n \n var stream = http.request(options, function (httpResp) {\n if (stream.didCallback) return;\n+ var headers = httpResp.headers;\n+ var responseStream = httpResp;\n+\n+ //check content length\n+ if (headers && headers['content-length'] && httpRequest.method !== 'HEAD') {\n+ var contentLengthCheckerStream =\n+ new ContentLengthCheckerStream(\n+ parseInt(headers['content-length'], 10)\n+ );\n+ responseStream.on('error', function(err) {\n+ contentLengthCheckerStream.emit('error', err);\n+ });\n+ responseStream = responseStream.pipe(contentLengthCheckerStream)\n+ }\n+ //if response contains checksum in payload, validate it and chop it off the resposne stream.\n+ if (headers && headers['x-amz-transfer-encoding'] && headers['content-length']) {\n+ var transferEncoding = headers['x-amz-transfer-encoding'];\n+ var contentLength = headers['content-length'];\n+ var integrityCheckerStream = new IntegrityCheckerStream(transferEncoding, contentLength);\n+ responseStream.on('error', function(err) {\n+ integrityCheckerStream.emit('error', err);", - "comment_created_at": "2018-03-12T19:34:31+00:00", - "comment_author": "chrisradek", - "comment_body": "If `responseStream` is an `IncomingMessage`, you'll want to call `destroy` on it. As of node 8.0.0, readable streams also have the `destroy` method, so you can check if `destroy` is a function, then call it if it is.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "173931162", - "pr_number": 1956, - "pr_file": "lib/request.js", - "created_at": "2018-03-12T20:11:55+00:00", - "commented_code": "resp.error = error;\n resp.error.retryable = false;\n });\n\n var shouldCheckContentLength = false;\n var expectedLen;\n if (req.httpRequest.method !== 'HEAD') {\n expectedLen = parseInt(headers['content-length'], 10);\n }\n if (expectedLen !== undefined && !isNaN(expectedLen) && expectedLen >= 0) {\n shouldCheckContentLength = true;\n var receivedLen = 0;\n }\n\n var checkContentLengthAndEmit = function checkContentLengthAndEmit() {\n if (shouldCheckContentLength && receivedLen !== expectedLen) {\n stream.emit('error', AWS.util.error(\n new Error('Stream content length mismatch. Received ' +\n receivedLen + ' of ' + expectedLen + ' bytes.'),\n { code: 'StreamContentLengthMismatch' }\n ));\n } else if (AWS.HttpClient.streamsApiVersion === 2) {\n stream.end();\n } else {\n stream.emit('end');\n }\n };\n\n var httpStream = resp.httpResponse.createUnbufferedStream();\n\n if (AWS.HttpClient.streamsApiVersion === 2) {\n if (shouldCheckContentLength) {\n var lengthAccumulator = new streams.PassThrough();\n lengthAccumulator._write = function(chunk) {\n if (chunk && chunk.length) {\n receivedLen += chunk.length;\n }\n return streams.PassThrough.prototype._write.apply(this, arguments);\n };\n\n lengthAccumulator.on('end', checkContentLengthAndEmit);\n stream.on('error', function(err) {\n shouldCheckContentLength = false;\n httpStream.unpipe(lengthAccumulator);\n lengthAccumulator.emit('end');\n lengthAccumulator.end();\n });\n httpStream.pipe(lengthAccumulator).pipe(stream, { end: false });\n } else {\n httpStream.pipe(stream);\n }\n } else {\n\n if (shouldCheckContentLength) {", - "repo_full_name": "aws/aws-sdk-js", - "discussion_comments": [ - { - "comment_id": "173931162", - "repo_full_name": "aws/aws-sdk-js", - "pr_number": 1956, - "pr_file": "lib/request.js", - "discussion_id": "173931162", - "commented_code": "@@ -595,77 +591,28 @@ AWS.Request = inherit({\n resp.error = error;\n resp.error.retryable = false;\n });\n-\n- var shouldCheckContentLength = false;\n- var expectedLen;\n- if (req.httpRequest.method !== 'HEAD') {\n- expectedLen = parseInt(headers['content-length'], 10);\n- }\n- if (expectedLen !== undefined && !isNaN(expectedLen) && expectedLen >= 0) {\n- shouldCheckContentLength = true;\n- var receivedLen = 0;\n- }\n-\n- var checkContentLengthAndEmit = function checkContentLengthAndEmit() {\n- if (shouldCheckContentLength && receivedLen !== expectedLen) {\n- stream.emit('error', AWS.util.error(\n- new Error('Stream content length mismatch. Received ' +\n- receivedLen + ' of ' + expectedLen + ' bytes.'),\n- { code: 'StreamContentLengthMismatch' }\n- ));\n- } else if (AWS.HttpClient.streamsApiVersion === 2) {\n- stream.end();\n- } else {\n- stream.emit('end');\n- }\n- };\n-\n var httpStream = resp.httpResponse.createUnbufferedStream();\n-\n if (AWS.HttpClient.streamsApiVersion === 2) {\n- if (shouldCheckContentLength) {\n- var lengthAccumulator = new streams.PassThrough();\n- lengthAccumulator._write = function(chunk) {\n- if (chunk && chunk.length) {\n- receivedLen += chunk.length;\n- }\n- return streams.PassThrough.prototype._write.apply(this, arguments);\n- };\n-\n- lengthAccumulator.on('end', checkContentLengthAndEmit);\n- stream.on('error', function(err) {\n- shouldCheckContentLength = false;\n- httpStream.unpipe(lengthAccumulator);\n- lengthAccumulator.emit('end');\n- lengthAccumulator.end();\n- });\n- httpStream.pipe(lengthAccumulator).pipe(stream, { end: false });\n- } else {\n httpStream.pipe(stream);\n- }\n } else {\n-\n- if (shouldCheckContentLength) {", - "comment_created_at": "2018-03-12T20:11:55+00:00", - "comment_author": "chrisradek", - "comment_body": "We'll still need to perform these checks for Node.js 0.8, especially if we start turning it on by default. Unfortunately 0.8.x doesn't include `Transform` in its `stream` package, so your implementation won't work for those cases. You'll likely need to resort to `data` listeners to perform your calculations if `Transform` doesn't exist.", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/aws-sdk-js-defensive-null-checking.json b/_reviewers/aws-sdk-js-defensive-null-checking.json new file mode 100644 index 0000000..57e156f --- /dev/null +++ b/_reviewers/aws-sdk-js-defensive-null-checking.json @@ -0,0 +1,180 @@ +[ + { + "discussion_id": "1132762746", + "pr_number": 4365, + "pr_file": "lib/maintenance_mode_message.js", + "created_at": "2023-03-10T18:51:40+00:00", + "commented_code": "* require('aws-sdk/lib/maintenance_mode_message').suppress = true;\n */\nfunction emitWarning() {\n if (typeof process !== 'undefined')", + "repo_full_name": "aws/aws-sdk-js", + "discussion_comments": [ + { + "comment_id": "1132762746", + "repo_full_name": "aws/aws-sdk-js", + "pr_number": 4365, + "pr_file": "lib/maintenance_mode_message.js", + "discussion_id": "1132762746", + "commented_code": "@@ -14,10 +14,19 @@ module.exports = {\n * require('aws-sdk/lib/maintenance_mode_message').suppress = true;\n */\n function emitWarning() {\n+ if (typeof process !== 'undefined')", + "comment_created_at": "2023-03-10T18:51:40+00:00", + "comment_author": "trivikr", + "comment_body": "Check for process being undefined\r\n```suggestion\r\n if (typeof process === 'undefined')\r\n```", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "43063573", + "pr_number": 697, + "pr_file": "lib/services/s3.js", + "created_at": "2015-10-26T22:38:51+00:00", + "commented_code": "return true;\n } else if (error && error.code === 'RequestTimeout') {\n return true;\n } else if (error.code === 'AuthorizationHeaderMalformed' &&", + "repo_full_name": "aws/aws-sdk-js", + "discussion_comments": [ + { + "comment_id": "43063573", + "repo_full_name": "aws/aws-sdk-js", + "pr_number": 697, + "pr_file": "lib/services/s3.js", + "discussion_id": "43063573", + "commented_code": "@@ -272,6 +272,10 @@ AWS.util.update(AWS.S3.prototype, {\n return true;\n } else if (error && error.code === 'RequestTimeout') {\n return true;\n+ } else if (error.code === 'AuthorizationHeaderMalformed' &&", + "comment_created_at": "2015-10-26T22:38:51+00:00", + "comment_author": "chrisradek", + "comment_body": "Just to be safe, can you also add a check that error exists, similar to the conditional before this one?\n", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "85010899", + "pr_number": 1189, + "pr_file": "scripts/lib/ts-generator.js", + "created_at": "2016-10-25T22:00:14+00:00", + "commented_code": "var fs = require('fs');\nvar path = require('path');\n\nvar CUSTOM_CONFIG_ENUMS = {\n DUALSTACK: {\n FILE_NAME: 'config_use_dualstack',\n INTERFACE: 'UseDualstackConfigOptions'\n }\n};\n\nfunction TSGenerator(options) {\n this._sdkRootDir = options.SdkRootDirectory || process.cwd();\n this._apiRootDir = path.join(this._sdkRootDir, 'apis');\n this._metadataPath = path.join(this._apiRootDir, 'metadata.json');\n this._clientsDir = path.join(this._sdkRootDir, 'clients');\n this.metadata = null;\n this.typings = {};\n this.fillApiModelFileNames(this._apiRootDir);\n}\n\n/**\n * Loads the AWS SDK metadata.json file.\n */\nTSGenerator.prototype.loadMetadata = function loadMetadata() {\n var metadataFile = fs.readFileSync(this._metadataPath);\n this.metadata = JSON.parse(metadataFile);\n return this.metadata;\n};\n\n/**\n * Modifies metadata to include api model filenames.\n */\nTSGenerator.prototype.fillApiModelFileNames = function fillApiModelFileNames(apisPath) {\n var modelPaths = fs.readdirSync(apisPath);\n if (!this.metadata) {\n this.loadMetadata();\n }\n var metadata = this.metadata;\n\n // sort paths so latest versions appear first\n modelPaths = modelPaths.sort(function sort(a, b) {\n if (a < b) {\n return 1;\n } else if (a > b) {\n return -1;\n } else {\n return 0;\n }\n });\n\n // Only get latest version of models\n var foundModels = Object.create(null);\n modelPaths.forEach(function(modelFileName) {\n var match = modelFileName.match(/^(.+)(-[\\d]{4}-[\\d]{2}-[\\d]{2})\\.normal\\.json$/i);\n if (match) {\n var model = match[1];\n if (!foundModels[model]) {\n foundModels[model] = modelFileName;\n }\n }\n });\n\n // now update the metadata\n var keys = Object.keys(metadata);\n keys.forEach(function(key) {\n var modelName = metadata[key].prefix || key;\n metadata[key].api_path = foundModels[modelName];\n // find waiters file\n var baseName = foundModels[modelName].split('.')[0];\n if (modelPaths.indexOf(baseName + '.waiters2.json') >= 0) {\n metadata[key].waiters_path = baseName + '.waiters2.json';\n }\n });\n};\n\n/**\n * Generates the file containing DocumentClient interfaces.\n */\nTSGenerator.prototype.generateDocumentClientInterfaces = function generateDocumentClientInterfaces() {\n var self = this;\n // get the dynamodb model\n var dynamodbModel = this.loadServiceApi('dynamodb');\n var code = '';\n // include node reference\n code += '///\\n';\n // generate shapes\n var modelShapes = dynamodbModel.shapes;\n // iterate over each shape\n var shapeKeys = Object.keys(modelShapes);\n shapeKeys.forEach(function (shapeKey) {\n var modelShape = modelShapes[shapeKey];\n // ignore exceptions\n if (modelShape.exception) {\n return;\n }\n // overwrite AttributeValue\n if (shapeKey === 'AttributeValue') {\n code += self.generateDocString('A JavaScript object or native type.');\n code += 'export type ' + shapeKey + ' = any;\\n';\n return;\n }\n code += self.generateTypingsFromShape(shapeKey, modelShape);\n });\n\n // write file:\n this.writeTypingsFile('document_client_interfaces', path.join(this._sdkRootDir, 'lib', 'dynamodb'), code);\n};\n\n/**\n * Returns a service model based on the serviceIdentifier.\n */\nTSGenerator.prototype.loadServiceApi = function loadServiceApi(serviceIdentifier) {\n // first, find the correct identifier\n var metadata = this.metadata;\n var serviceFilePath = path.join(this._apiRootDir, metadata[serviceIdentifier].api_path);\n var serviceModelFile = fs.readFileSync(serviceFilePath);\n var serviceModel = JSON.parse(serviceModelFile);\n // load waiters file if it exists\n var waiterFilePath;\n if (metadata[serviceIdentifier].waiters_path) {\n waiterFilePath = path.join(this._apiRootDir, metadata[serviceIdentifier].waiters_path);\n var waiterModelFile = fs.readFileSync(waiterFilePath);\n var waiterModel = JSON.parse(waiterModelFile);\n serviceModel.waiters = waiterModel.waiters;\n }\n\n return serviceModel;\n};\n\n/**\n * Determines if a member is required by checking for it in a list.\n */\nTSGenerator.prototype.checkRequired = function checkRequired(list, member) {\n if (list.indexOf(member)) {\n return true;\n }\n return false;\n};\n\n/**\n * Generates whitespace based on the count.\n */\nTSGenerator.prototype.tabs = function tabs(count) {\n var code = '';\n for (var i = 0; i < count; i++) {\n code += ' ';\n }\n return code;\n};\n\n/**\n * Transforms documentation string to a more readable format.\n */\nTSGenerator.prototype.transformDocumentation = function transformDocumentation(documentation) {\n if (!documentation) {\n return '';\n }\n documentation = documentation.replace(/<(?:.|\\n)*?>/gm, '');\n documentation = documentation.replace(/\\*\\//g, '*');\n return documentation;\n};\n\n/**\n * Returns a doc string based on the supplied documentation.\n * Also tabs the doc string if a count is provided.\n */\nTSGenerator.prototype.generateDocString = function generateDocString(documentation, tabCount) {\n tabCount = tabCount || 0;\n var code = '';\n code += this.tabs(tabCount) + '/**\\n';\n code += this.tabs(tabCount) + ' * ' + this.transformDocumentation(documentation) + '\\n';\n code += this.tabs(tabCount) + ' */\\n';\n return code;\n};\n\n/**\n * Returns an array of custom configuration options based on a service identiffier.\n * Custom configuration options are determined by checking the metadata.json file.\n */\nTSGenerator.prototype.generateCustomConfigFromMetadata = function generateCustomConfigFromMetadata(serviceIdentifier) {\n // some services have additional configuration options that are defined in the metadata.json file\n // i.e. dualstackAvailable = useDualstack\n // create reference to custom options\n var customConfigurations = [];\n var serviceMetadata = this.metadata[serviceIdentifier];\n // loop through metadata members\n for (var memberName in serviceMetadata) {\n if (!serviceMetadata.hasOwnProperty(memberName)) {\n continue;\n }\n var memberValue = serviceMetadata[memberName];\n // check configs\n switch (memberName) {\n case 'dualstackAvailable':\n customConfigurations.push(CUSTOM_CONFIG_ENUMS.DUALSTACK);\n break;\n }\n }\n\n return customConfigurations;\n};\n\n/**\n * Generates a type or interface based on the shape.\n */\nTSGenerator.prototype.generateTypingsFromShape = function generateTypingsFromShape(shapeKey, shape, tabCount) {\n // some shapes shouldn't be generated if they are javascript primitives\n if (['string', 'boolean', 'number', 'Date', 'Blob'].indexOf(shapeKey) >= 0) {\n return '';\n }\n var self = this;\n var code = '';\n tabCount = tabCount || 0;\n var tabs = this.tabs;\n var type = shape.type;\n if (type === 'structure') {\n code += tabs(tabCount) + 'export interface ' + shapeKey + ' {\\n';\n var members = shape.members;\n // cycle through members\n var memberKeys = Object.keys(members);\n memberKeys.forEach(function(memberKey) {\n // docs\n var member = members[memberKey];\n if (member.documentation) {\n code += self.generateDocString(member.documentation, tabCount + 1);\n }\n var required = self.checkRequired(shape.required || [], memberKey) ? '?' : '';", + "repo_full_name": "aws/aws-sdk-js", + "discussion_comments": [ + { + "comment_id": "85010899", + "repo_full_name": "aws/aws-sdk-js", + "pr_number": 1189, + "pr_file": "scripts/lib/ts-generator.js", + "discussion_id": "85010899", + "commented_code": "@@ -0,0 +1,437 @@\n+var fs = require('fs');\n+var path = require('path');\n+\n+var CUSTOM_CONFIG_ENUMS = {\n+ DUALSTACK: {\n+ FILE_NAME: 'config_use_dualstack',\n+ INTERFACE: 'UseDualstackConfigOptions'\n+ }\n+};\n+\n+function TSGenerator(options) {\n+ this._sdkRootDir = options.SdkRootDirectory || process.cwd();\n+ this._apiRootDir = path.join(this._sdkRootDir, 'apis');\n+ this._metadataPath = path.join(this._apiRootDir, 'metadata.json');\n+ this._clientsDir = path.join(this._sdkRootDir, 'clients');\n+ this.metadata = null;\n+ this.typings = {};\n+ this.fillApiModelFileNames(this._apiRootDir);\n+}\n+\n+/**\n+ * Loads the AWS SDK metadata.json file.\n+ */\n+TSGenerator.prototype.loadMetadata = function loadMetadata() {\n+ var metadataFile = fs.readFileSync(this._metadataPath);\n+ this.metadata = JSON.parse(metadataFile);\n+ return this.metadata;\n+};\n+\n+/**\n+ * Modifies metadata to include api model filenames.\n+ */\n+TSGenerator.prototype.fillApiModelFileNames = function fillApiModelFileNames(apisPath) {\n+ var modelPaths = fs.readdirSync(apisPath);\n+ if (!this.metadata) {\n+ this.loadMetadata();\n+ }\n+ var metadata = this.metadata;\n+\n+ // sort paths so latest versions appear first\n+ modelPaths = modelPaths.sort(function sort(a, b) {\n+ if (a < b) {\n+ return 1;\n+ } else if (a > b) {\n+ return -1;\n+ } else {\n+ return 0;\n+ }\n+ });\n+\n+ // Only get latest version of models\n+ var foundModels = Object.create(null);\n+ modelPaths.forEach(function(modelFileName) {\n+ var match = modelFileName.match(/^(.+)(-[\\d]{4}-[\\d]{2}-[\\d]{2})\\.normal\\.json$/i);\n+ if (match) {\n+ var model = match[1];\n+ if (!foundModels[model]) {\n+ foundModels[model] = modelFileName;\n+ }\n+ }\n+ });\n+\n+ // now update the metadata\n+ var keys = Object.keys(metadata);\n+ keys.forEach(function(key) {\n+ var modelName = metadata[key].prefix || key;\n+ metadata[key].api_path = foundModels[modelName];\n+ // find waiters file\n+ var baseName = foundModels[modelName].split('.')[0];\n+ if (modelPaths.indexOf(baseName + '.waiters2.json') >= 0) {\n+ metadata[key].waiters_path = baseName + '.waiters2.json';\n+ }\n+ });\n+};\n+\n+/**\n+ * Generates the file containing DocumentClient interfaces.\n+ */\n+TSGenerator.prototype.generateDocumentClientInterfaces = function generateDocumentClientInterfaces() {\n+ var self = this;\n+ // get the dynamodb model\n+ var dynamodbModel = this.loadServiceApi('dynamodb');\n+ var code = '';\n+ // include node reference\n+ code += '///\\n';\n+ // generate shapes\n+ var modelShapes = dynamodbModel.shapes;\n+ // iterate over each shape\n+ var shapeKeys = Object.keys(modelShapes);\n+ shapeKeys.forEach(function (shapeKey) {\n+ var modelShape = modelShapes[shapeKey];\n+ // ignore exceptions\n+ if (modelShape.exception) {\n+ return;\n+ }\n+ // overwrite AttributeValue\n+ if (shapeKey === 'AttributeValue') {\n+ code += self.generateDocString('A JavaScript object or native type.');\n+ code += 'export type ' + shapeKey + ' = any;\\n';\n+ return;\n+ }\n+ code += self.generateTypingsFromShape(shapeKey, modelShape);\n+ });\n+\n+ // write file:\n+ this.writeTypingsFile('document_client_interfaces', path.join(this._sdkRootDir, 'lib', 'dynamodb'), code);\n+};\n+\n+/**\n+ * Returns a service model based on the serviceIdentifier.\n+ */\n+TSGenerator.prototype.loadServiceApi = function loadServiceApi(serviceIdentifier) {\n+ // first, find the correct identifier\n+ var metadata = this.metadata;\n+ var serviceFilePath = path.join(this._apiRootDir, metadata[serviceIdentifier].api_path);\n+ var serviceModelFile = fs.readFileSync(serviceFilePath);\n+ var serviceModel = JSON.parse(serviceModelFile);\n+ // load waiters file if it exists\n+ var waiterFilePath;\n+ if (metadata[serviceIdentifier].waiters_path) {\n+ waiterFilePath = path.join(this._apiRootDir, metadata[serviceIdentifier].waiters_path);\n+ var waiterModelFile = fs.readFileSync(waiterFilePath);\n+ var waiterModel = JSON.parse(waiterModelFile);\n+ serviceModel.waiters = waiterModel.waiters;\n+ }\n+\n+ return serviceModel;\n+};\n+\n+/**\n+ * Determines if a member is required by checking for it in a list.\n+ */\n+TSGenerator.prototype.checkRequired = function checkRequired(list, member) {\n+ if (list.indexOf(member)) {\n+ return true;\n+ }\n+ return false;\n+};\n+\n+/**\n+ * Generates whitespace based on the count.\n+ */\n+TSGenerator.prototype.tabs = function tabs(count) {\n+ var code = '';\n+ for (var i = 0; i < count; i++) {\n+ code += ' ';\n+ }\n+ return code;\n+};\n+\n+/**\n+ * Transforms documentation string to a more readable format.\n+ */\n+TSGenerator.prototype.transformDocumentation = function transformDocumentation(documentation) {\n+ if (!documentation) {\n+ return '';\n+ }\n+ documentation = documentation.replace(/<(?:.|\\n)*?>/gm, '');\n+ documentation = documentation.replace(/\\*\\//g, '*');\n+ return documentation;\n+};\n+\n+/**\n+ * Returns a doc string based on the supplied documentation.\n+ * Also tabs the doc string if a count is provided.\n+ */\n+TSGenerator.prototype.generateDocString = function generateDocString(documentation, tabCount) {\n+ tabCount = tabCount || 0;\n+ var code = '';\n+ code += this.tabs(tabCount) + '/**\\n';\n+ code += this.tabs(tabCount) + ' * ' + this.transformDocumentation(documentation) + '\\n';\n+ code += this.tabs(tabCount) + ' */\\n';\n+ return code;\n+};\n+\n+/**\n+ * Returns an array of custom configuration options based on a service identiffier.\n+ * Custom configuration options are determined by checking the metadata.json file.\n+ */\n+TSGenerator.prototype.generateCustomConfigFromMetadata = function generateCustomConfigFromMetadata(serviceIdentifier) {\n+ // some services have additional configuration options that are defined in the metadata.json file\n+ // i.e. dualstackAvailable = useDualstack\n+ // create reference to custom options\n+ var customConfigurations = [];\n+ var serviceMetadata = this.metadata[serviceIdentifier];\n+ // loop through metadata members\n+ for (var memberName in serviceMetadata) {\n+ if (!serviceMetadata.hasOwnProperty(memberName)) {\n+ continue;\n+ }\n+ var memberValue = serviceMetadata[memberName];\n+ // check configs\n+ switch (memberName) {\n+ case 'dualstackAvailable':\n+ customConfigurations.push(CUSTOM_CONFIG_ENUMS.DUALSTACK);\n+ break;\n+ }\n+ }\n+\n+ return customConfigurations;\n+};\n+\n+/**\n+ * Generates a type or interface based on the shape.\n+ */\n+TSGenerator.prototype.generateTypingsFromShape = function generateTypingsFromShape(shapeKey, shape, tabCount) {\n+ // some shapes shouldn't be generated if they are javascript primitives\n+ if (['string', 'boolean', 'number', 'Date', 'Blob'].indexOf(shapeKey) >= 0) {\n+ return '';\n+ }\n+ var self = this;\n+ var code = '';\n+ tabCount = tabCount || 0;\n+ var tabs = this.tabs;\n+ var type = shape.type;\n+ if (type === 'structure') {\n+ code += tabs(tabCount) + 'export interface ' + shapeKey + ' {\\n';\n+ var members = shape.members;\n+ // cycle through members\n+ var memberKeys = Object.keys(members);\n+ memberKeys.forEach(function(memberKey) {\n+ // docs\n+ var member = members[memberKey];\n+ if (member.documentation) {\n+ code += self.generateDocString(member.documentation, tabCount + 1);\n+ }\n+ var required = self.checkRequired(shape.required || [], memberKey) ? '?' : '';", + "comment_created_at": "2016-10-25T22:00:14+00:00", + "comment_author": "LiuJoyceC", + "comment_body": "If `checkRequired` is supposed to return true when the member is required, then this should be switched.\n`self.checkRequired(shape.required || [], memberKey) ? '' : '?'`\n", + "pr_file_module": null + }, + { + "comment_id": "85435739", + "repo_full_name": "aws/aws-sdk-js", + "pr_number": 1189, + "pr_file": "scripts/lib/ts-generator.js", + "discussion_id": "85010899", + "commented_code": "@@ -0,0 +1,437 @@\n+var fs = require('fs');\n+var path = require('path');\n+\n+var CUSTOM_CONFIG_ENUMS = {\n+ DUALSTACK: {\n+ FILE_NAME: 'config_use_dualstack',\n+ INTERFACE: 'UseDualstackConfigOptions'\n+ }\n+};\n+\n+function TSGenerator(options) {\n+ this._sdkRootDir = options.SdkRootDirectory || process.cwd();\n+ this._apiRootDir = path.join(this._sdkRootDir, 'apis');\n+ this._metadataPath = path.join(this._apiRootDir, 'metadata.json');\n+ this._clientsDir = path.join(this._sdkRootDir, 'clients');\n+ this.metadata = null;\n+ this.typings = {};\n+ this.fillApiModelFileNames(this._apiRootDir);\n+}\n+\n+/**\n+ * Loads the AWS SDK metadata.json file.\n+ */\n+TSGenerator.prototype.loadMetadata = function loadMetadata() {\n+ var metadataFile = fs.readFileSync(this._metadataPath);\n+ this.metadata = JSON.parse(metadataFile);\n+ return this.metadata;\n+};\n+\n+/**\n+ * Modifies metadata to include api model filenames.\n+ */\n+TSGenerator.prototype.fillApiModelFileNames = function fillApiModelFileNames(apisPath) {\n+ var modelPaths = fs.readdirSync(apisPath);\n+ if (!this.metadata) {\n+ this.loadMetadata();\n+ }\n+ var metadata = this.metadata;\n+\n+ // sort paths so latest versions appear first\n+ modelPaths = modelPaths.sort(function sort(a, b) {\n+ if (a < b) {\n+ return 1;\n+ } else if (a > b) {\n+ return -1;\n+ } else {\n+ return 0;\n+ }\n+ });\n+\n+ // Only get latest version of models\n+ var foundModels = Object.create(null);\n+ modelPaths.forEach(function(modelFileName) {\n+ var match = modelFileName.match(/^(.+)(-[\\d]{4}-[\\d]{2}-[\\d]{2})\\.normal\\.json$/i);\n+ if (match) {\n+ var model = match[1];\n+ if (!foundModels[model]) {\n+ foundModels[model] = modelFileName;\n+ }\n+ }\n+ });\n+\n+ // now update the metadata\n+ var keys = Object.keys(metadata);\n+ keys.forEach(function(key) {\n+ var modelName = metadata[key].prefix || key;\n+ metadata[key].api_path = foundModels[modelName];\n+ // find waiters file\n+ var baseName = foundModels[modelName].split('.')[0];\n+ if (modelPaths.indexOf(baseName + '.waiters2.json') >= 0) {\n+ metadata[key].waiters_path = baseName + '.waiters2.json';\n+ }\n+ });\n+};\n+\n+/**\n+ * Generates the file containing DocumentClient interfaces.\n+ */\n+TSGenerator.prototype.generateDocumentClientInterfaces = function generateDocumentClientInterfaces() {\n+ var self = this;\n+ // get the dynamodb model\n+ var dynamodbModel = this.loadServiceApi('dynamodb');\n+ var code = '';\n+ // include node reference\n+ code += '///\\n';\n+ // generate shapes\n+ var modelShapes = dynamodbModel.shapes;\n+ // iterate over each shape\n+ var shapeKeys = Object.keys(modelShapes);\n+ shapeKeys.forEach(function (shapeKey) {\n+ var modelShape = modelShapes[shapeKey];\n+ // ignore exceptions\n+ if (modelShape.exception) {\n+ return;\n+ }\n+ // overwrite AttributeValue\n+ if (shapeKey === 'AttributeValue') {\n+ code += self.generateDocString('A JavaScript object or native type.');\n+ code += 'export type ' + shapeKey + ' = any;\\n';\n+ return;\n+ }\n+ code += self.generateTypingsFromShape(shapeKey, modelShape);\n+ });\n+\n+ // write file:\n+ this.writeTypingsFile('document_client_interfaces', path.join(this._sdkRootDir, 'lib', 'dynamodb'), code);\n+};\n+\n+/**\n+ * Returns a service model based on the serviceIdentifier.\n+ */\n+TSGenerator.prototype.loadServiceApi = function loadServiceApi(serviceIdentifier) {\n+ // first, find the correct identifier\n+ var metadata = this.metadata;\n+ var serviceFilePath = path.join(this._apiRootDir, metadata[serviceIdentifier].api_path);\n+ var serviceModelFile = fs.readFileSync(serviceFilePath);\n+ var serviceModel = JSON.parse(serviceModelFile);\n+ // load waiters file if it exists\n+ var waiterFilePath;\n+ if (metadata[serviceIdentifier].waiters_path) {\n+ waiterFilePath = path.join(this._apiRootDir, metadata[serviceIdentifier].waiters_path);\n+ var waiterModelFile = fs.readFileSync(waiterFilePath);\n+ var waiterModel = JSON.parse(waiterModelFile);\n+ serviceModel.waiters = waiterModel.waiters;\n+ }\n+\n+ return serviceModel;\n+};\n+\n+/**\n+ * Determines if a member is required by checking for it in a list.\n+ */\n+TSGenerator.prototype.checkRequired = function checkRequired(list, member) {\n+ if (list.indexOf(member)) {\n+ return true;\n+ }\n+ return false;\n+};\n+\n+/**\n+ * Generates whitespace based on the count.\n+ */\n+TSGenerator.prototype.tabs = function tabs(count) {\n+ var code = '';\n+ for (var i = 0; i < count; i++) {\n+ code += ' ';\n+ }\n+ return code;\n+};\n+\n+/**\n+ * Transforms documentation string to a more readable format.\n+ */\n+TSGenerator.prototype.transformDocumentation = function transformDocumentation(documentation) {\n+ if (!documentation) {\n+ return '';\n+ }\n+ documentation = documentation.replace(/<(?:.|\\n)*?>/gm, '');\n+ documentation = documentation.replace(/\\*\\//g, '*');\n+ return documentation;\n+};\n+\n+/**\n+ * Returns a doc string based on the supplied documentation.\n+ * Also tabs the doc string if a count is provided.\n+ */\n+TSGenerator.prototype.generateDocString = function generateDocString(documentation, tabCount) {\n+ tabCount = tabCount || 0;\n+ var code = '';\n+ code += this.tabs(tabCount) + '/**\\n';\n+ code += this.tabs(tabCount) + ' * ' + this.transformDocumentation(documentation) + '\\n';\n+ code += this.tabs(tabCount) + ' */\\n';\n+ return code;\n+};\n+\n+/**\n+ * Returns an array of custom configuration options based on a service identiffier.\n+ * Custom configuration options are determined by checking the metadata.json file.\n+ */\n+TSGenerator.prototype.generateCustomConfigFromMetadata = function generateCustomConfigFromMetadata(serviceIdentifier) {\n+ // some services have additional configuration options that are defined in the metadata.json file\n+ // i.e. dualstackAvailable = useDualstack\n+ // create reference to custom options\n+ var customConfigurations = [];\n+ var serviceMetadata = this.metadata[serviceIdentifier];\n+ // loop through metadata members\n+ for (var memberName in serviceMetadata) {\n+ if (!serviceMetadata.hasOwnProperty(memberName)) {\n+ continue;\n+ }\n+ var memberValue = serviceMetadata[memberName];\n+ // check configs\n+ switch (memberName) {\n+ case 'dualstackAvailable':\n+ customConfigurations.push(CUSTOM_CONFIG_ENUMS.DUALSTACK);\n+ break;\n+ }\n+ }\n+\n+ return customConfigurations;\n+};\n+\n+/**\n+ * Generates a type or interface based on the shape.\n+ */\n+TSGenerator.prototype.generateTypingsFromShape = function generateTypingsFromShape(shapeKey, shape, tabCount) {\n+ // some shapes shouldn't be generated if they are javascript primitives\n+ if (['string', 'boolean', 'number', 'Date', 'Blob'].indexOf(shapeKey) >= 0) {\n+ return '';\n+ }\n+ var self = this;\n+ var code = '';\n+ tabCount = tabCount || 0;\n+ var tabs = this.tabs;\n+ var type = shape.type;\n+ if (type === 'structure') {\n+ code += tabs(tabCount) + 'export interface ' + shapeKey + ' {\\n';\n+ var members = shape.members;\n+ // cycle through members\n+ var memberKeys = Object.keys(members);\n+ memberKeys.forEach(function(memberKey) {\n+ // docs\n+ var member = members[memberKey];\n+ if (member.documentation) {\n+ code += self.generateDocString(member.documentation, tabCount + 1);\n+ }\n+ var required = self.checkRequired(shape.required || [], memberKey) ? '?' : '';", + "comment_created_at": "2016-10-27T21:54:33+00:00", + "comment_author": "chrisradek", + "comment_body": "Thanks, didn't catch this due to the mistake above.\n", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "90351124", + "pr_number": 1245, + "pr_file": "lib/credentials/cognito_identity_credentials.js", + "created_at": "2016-11-30T23:15:44+00:00", + "commented_code": "*/\n storage: (function() {\n try {\n return AWS.util.isBrowser() && window.localStorage !== null && typeof window.localStorage === 'object' ?", + "repo_full_name": "aws/aws-sdk-js", + "discussion_comments": [ + { + "comment_id": "90351124", + "repo_full_name": "aws/aws-sdk-js", + "pr_number": 1245, + "pr_file": "lib/credentials/cognito_identity_credentials.js", + "discussion_id": "90351124", + "commented_code": "@@ -331,8 +331,10 @@ AWS.CognitoIdentityCredentials = AWS.util.inherit(AWS.Credentials, {\n */\n storage: (function() {\n try {\n- return AWS.util.isBrowser() && window.localStorage !== null && typeof window.localStorage === 'object' ?", + "comment_created_at": "2016-11-30T23:15:44+00:00", + "comment_author": "chrisradek", + "comment_body": "Can you keep the `AWS.util.isBrowser() && window.localStorage !== null` checks at the top of the try block? As it is, this will always throw an error in node.js (and possibly some 'browser' environments like electron). I know the catch block should handle this case, but I'd like to avoid throwing an error if we can predict it.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "51147005", + "pr_number": 888, + "pr_file": "lib/services/s3.js", + "created_at": "2016-01-28T16:34:54+00:00", + "commented_code": "// This chunk of code will set the location constraint param based\n // on the region (when possible), but it will not override a passed-in\n // location constraint.\n if (!params) params = {};\n if (typeof params === 'function') {\n callback = params;\n params = {};\n }", + "repo_full_name": "aws/aws-sdk-js", + "discussion_comments": [ + { + "comment_id": "51147005", + "repo_full_name": "aws/aws-sdk-js", + "pr_number": 888, + "pr_file": "lib/services/s3.js", + "discussion_id": "51147005", + "commented_code": "@@ -411,7 +409,10 @@ AWS.util.update(AWS.S3.prototype, {\n // This chunk of code will set the location constraint param based\n // on the region (when possible), but it will not override a passed-in\n // location constraint.\n- if (!params) params = {};\n+ if (typeof params === 'function') {\n+ callback = params;\n+ params = {};\n+ }", + "comment_created_at": "2016-01-28T16:34:54+00:00", + "comment_author": "chrisradek", + "comment_body": "It might be useful to add one more check after the if block to set params equal to itself or an empty object to protect us in the unlikely event someone passes in a value like `null`. Imagine this would be a rare edge case but we do this already for other operations.\n", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "107256817", + "pr_number": 1405, + "pr_file": "lib/resource_waiter.js", + "created_at": "2017-03-21T19:46:42+00:00", + "commented_code": "callback = params; params = undefined;\n }\n\n if (params && params.$waiter) {\n if (params.$waiter.delay) {\n this.config.delay = params.$waiter.delay;", + "repo_full_name": "aws/aws-sdk-js", + "discussion_comments": [ + { + "comment_id": "107256817", + "repo_full_name": "aws/aws-sdk-js", + "pr_number": 1405, + "pr_file": "lib/resource_waiter.js", + "discussion_id": "107256817", + "commented_code": "@@ -136,6 +136,16 @@ AWS.ResourceWaiter = inherit({\n callback = params; params = undefined;\n }\n \n+ if (params && params.$waiter) {\n+ if (params.$waiter.delay) {\n+ this.config.delay = params.$waiter.delay;", + "comment_created_at": "2017-03-21T19:46:42+00:00", + "comment_author": "chrisradek", + "comment_body": "Just in case someone wants to enter `0`, maybe use a `typeof x === 'number'` check instead.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "147590167", + "pr_number": 1782, + "pr_file": "lib/event_listeners.js", + "created_at": "2017-10-29T18:37:50+00:00", + "commented_code": "var req = resp.request;\n var logger = req.service.config.logger;\n if (!logger) return;\n function filterSensitiveLog(inputShape, shape) {\n switch (inputShape.type) {", + "repo_full_name": "aws/aws-sdk-js", + "discussion_comments": [ + { + "comment_id": "147590167", + "repo_full_name": "aws/aws-sdk-js", + "pr_number": 1782, + "pr_file": "lib/event_listeners.js", + "discussion_id": "147590167", + "commented_code": "@@ -469,14 +469,52 @@ AWS.EventListeners = {\n var req = resp.request;\n var logger = req.service.config.logger;\n if (!logger) return;\n+ function filterSensitiveLog(inputShape, shape) {\n+ switch (inputShape.type) {", + "comment_created_at": "2017-10-29T18:37:50+00:00", + "comment_author": "jeskew", + "comment_body": "I think you need to return early here if `shape` is undefined. There are recursive shapes defined in some services, and this function will need to check for some stopping condition.", + "pr_file_module": null + }, + { + "comment_id": "147770081", + "repo_full_name": "aws/aws-sdk-js", + "pr_number": 1782, + "pr_file": "lib/event_listeners.js", + "discussion_id": "147590167", + "commented_code": "@@ -469,14 +469,52 @@ AWS.EventListeners = {\n var req = resp.request;\n var logger = req.service.config.logger;\n if (!logger) return;\n+ function filterSensitiveLog(inputShape, shape) {\n+ switch (inputShape.type) {", + "comment_created_at": "2017-10-30T17:08:54+00:00", + "comment_author": "AllanZhengYP", + "comment_body": "Oh right. Thank you for pointing out.", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/aws-sdk-js-defensive-null-checking.md b/_reviewers/aws-sdk-js-defensive-null-checking.md index cd68dc6..19678dc 100644 --- a/_reviewers/aws-sdk-js-defensive-null-checking.md +++ b/_reviewers/aws-sdk-js-defensive-null-checking.md @@ -68,185 +68,3 @@ Common patterns to adopt: ``` Consistent null checking prevents the most common class of runtime errors and produces more predictable code. - - -[ - { - "discussion_id": "1132762746", - "pr_number": 4365, - "pr_file": "lib/maintenance_mode_message.js", - "created_at": "2023-03-10T18:51:40+00:00", - "commented_code": "* require('aws-sdk/lib/maintenance_mode_message').suppress = true;\n */\nfunction emitWarning() {\n if (typeof process !== 'undefined')", - "repo_full_name": "aws/aws-sdk-js", - "discussion_comments": [ - { - "comment_id": "1132762746", - "repo_full_name": "aws/aws-sdk-js", - "pr_number": 4365, - "pr_file": "lib/maintenance_mode_message.js", - "discussion_id": "1132762746", - "commented_code": "@@ -14,10 +14,19 @@ module.exports = {\n * require('aws-sdk/lib/maintenance_mode_message').suppress = true;\n */\n function emitWarning() {\n+ if (typeof process !== 'undefined')", - "comment_created_at": "2023-03-10T18:51:40+00:00", - "comment_author": "trivikr", - "comment_body": "Check for process being undefined\r\n```suggestion\r\n if (typeof process === 'undefined')\r\n```", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "43063573", - "pr_number": 697, - "pr_file": "lib/services/s3.js", - "created_at": "2015-10-26T22:38:51+00:00", - "commented_code": "return true;\n } else if (error && error.code === 'RequestTimeout') {\n return true;\n } else if (error.code === 'AuthorizationHeaderMalformed' &&", - "repo_full_name": "aws/aws-sdk-js", - "discussion_comments": [ - { - "comment_id": "43063573", - "repo_full_name": "aws/aws-sdk-js", - "pr_number": 697, - "pr_file": "lib/services/s3.js", - "discussion_id": "43063573", - "commented_code": "@@ -272,6 +272,10 @@ AWS.util.update(AWS.S3.prototype, {\n return true;\n } else if (error && error.code === 'RequestTimeout') {\n return true;\n+ } else if (error.code === 'AuthorizationHeaderMalformed' &&", - "comment_created_at": "2015-10-26T22:38:51+00:00", - "comment_author": "chrisradek", - "comment_body": "Just to be safe, can you also add a check that error exists, similar to the conditional before this one?\n", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "85010899", - "pr_number": 1189, - "pr_file": "scripts/lib/ts-generator.js", - "created_at": "2016-10-25T22:00:14+00:00", - "commented_code": "var fs = require('fs');\nvar path = require('path');\n\nvar CUSTOM_CONFIG_ENUMS = {\n DUALSTACK: {\n FILE_NAME: 'config_use_dualstack',\n INTERFACE: 'UseDualstackConfigOptions'\n }\n};\n\nfunction TSGenerator(options) {\n this._sdkRootDir = options.SdkRootDirectory || process.cwd();\n this._apiRootDir = path.join(this._sdkRootDir, 'apis');\n this._metadataPath = path.join(this._apiRootDir, 'metadata.json');\n this._clientsDir = path.join(this._sdkRootDir, 'clients');\n this.metadata = null;\n this.typings = {};\n this.fillApiModelFileNames(this._apiRootDir);\n}\n\n/**\n * Loads the AWS SDK metadata.json file.\n */\nTSGenerator.prototype.loadMetadata = function loadMetadata() {\n var metadataFile = fs.readFileSync(this._metadataPath);\n this.metadata = JSON.parse(metadataFile);\n return this.metadata;\n};\n\n/**\n * Modifies metadata to include api model filenames.\n */\nTSGenerator.prototype.fillApiModelFileNames = function fillApiModelFileNames(apisPath) {\n var modelPaths = fs.readdirSync(apisPath);\n if (!this.metadata) {\n this.loadMetadata();\n }\n var metadata = this.metadata;\n\n // sort paths so latest versions appear first\n modelPaths = modelPaths.sort(function sort(a, b) {\n if (a < b) {\n return 1;\n } else if (a > b) {\n return -1;\n } else {\n return 0;\n }\n });\n\n // Only get latest version of models\n var foundModels = Object.create(null);\n modelPaths.forEach(function(modelFileName) {\n var match = modelFileName.match(/^(.+)(-[\\d]{4}-[\\d]{2}-[\\d]{2})\\.normal\\.json$/i);\n if (match) {\n var model = match[1];\n if (!foundModels[model]) {\n foundModels[model] = modelFileName;\n }\n }\n });\n\n // now update the metadata\n var keys = Object.keys(metadata);\n keys.forEach(function(key) {\n var modelName = metadata[key].prefix || key;\n metadata[key].api_path = foundModels[modelName];\n // find waiters file\n var baseName = foundModels[modelName].split('.')[0];\n if (modelPaths.indexOf(baseName + '.waiters2.json') >= 0) {\n metadata[key].waiters_path = baseName + '.waiters2.json';\n }\n });\n};\n\n/**\n * Generates the file containing DocumentClient interfaces.\n */\nTSGenerator.prototype.generateDocumentClientInterfaces = function generateDocumentClientInterfaces() {\n var self = this;\n // get the dynamodb model\n var dynamodbModel = this.loadServiceApi('dynamodb');\n var code = '';\n // include node reference\n code += '///\\n';\n // generate shapes\n var modelShapes = dynamodbModel.shapes;\n // iterate over each shape\n var shapeKeys = Object.keys(modelShapes);\n shapeKeys.forEach(function (shapeKey) {\n var modelShape = modelShapes[shapeKey];\n // ignore exceptions\n if (modelShape.exception) {\n return;\n }\n // overwrite AttributeValue\n if (shapeKey === 'AttributeValue') {\n code += self.generateDocString('A JavaScript object or native type.');\n code += 'export type ' + shapeKey + ' = any;\\n';\n return;\n }\n code += self.generateTypingsFromShape(shapeKey, modelShape);\n });\n\n // write file:\n this.writeTypingsFile('document_client_interfaces', path.join(this._sdkRootDir, 'lib', 'dynamodb'), code);\n};\n\n/**\n * Returns a service model based on the serviceIdentifier.\n */\nTSGenerator.prototype.loadServiceApi = function loadServiceApi(serviceIdentifier) {\n // first, find the correct identifier\n var metadata = this.metadata;\n var serviceFilePath = path.join(this._apiRootDir, metadata[serviceIdentifier].api_path);\n var serviceModelFile = fs.readFileSync(serviceFilePath);\n var serviceModel = JSON.parse(serviceModelFile);\n // load waiters file if it exists\n var waiterFilePath;\n if (metadata[serviceIdentifier].waiters_path) {\n waiterFilePath = path.join(this._apiRootDir, metadata[serviceIdentifier].waiters_path);\n var waiterModelFile = fs.readFileSync(waiterFilePath);\n var waiterModel = JSON.parse(waiterModelFile);\n serviceModel.waiters = waiterModel.waiters;\n }\n\n return serviceModel;\n};\n\n/**\n * Determines if a member is required by checking for it in a list.\n */\nTSGenerator.prototype.checkRequired = function checkRequired(list, member) {\n if (list.indexOf(member)) {\n return true;\n }\n return false;\n};\n\n/**\n * Generates whitespace based on the count.\n */\nTSGenerator.prototype.tabs = function tabs(count) {\n var code = '';\n for (var i = 0; i < count; i++) {\n code += ' ';\n }\n return code;\n};\n\n/**\n * Transforms documentation string to a more readable format.\n */\nTSGenerator.prototype.transformDocumentation = function transformDocumentation(documentation) {\n if (!documentation) {\n return '';\n }\n documentation = documentation.replace(/<(?:.|\\n)*?>/gm, '');\n documentation = documentation.replace(/\\*\\//g, '*');\n return documentation;\n};\n\n/**\n * Returns a doc string based on the supplied documentation.\n * Also tabs the doc string if a count is provided.\n */\nTSGenerator.prototype.generateDocString = function generateDocString(documentation, tabCount) {\n tabCount = tabCount || 0;\n var code = '';\n code += this.tabs(tabCount) + '/**\\n';\n code += this.tabs(tabCount) + ' * ' + this.transformDocumentation(documentation) + '\\n';\n code += this.tabs(tabCount) + ' */\\n';\n return code;\n};\n\n/**\n * Returns an array of custom configuration options based on a service identiffier.\n * Custom configuration options are determined by checking the metadata.json file.\n */\nTSGenerator.prototype.generateCustomConfigFromMetadata = function generateCustomConfigFromMetadata(serviceIdentifier) {\n // some services have additional configuration options that are defined in the metadata.json file\n // i.e. dualstackAvailable = useDualstack\n // create reference to custom options\n var customConfigurations = [];\n var serviceMetadata = this.metadata[serviceIdentifier];\n // loop through metadata members\n for (var memberName in serviceMetadata) {\n if (!serviceMetadata.hasOwnProperty(memberName)) {\n continue;\n }\n var memberValue = serviceMetadata[memberName];\n // check configs\n switch (memberName) {\n case 'dualstackAvailable':\n customConfigurations.push(CUSTOM_CONFIG_ENUMS.DUALSTACK);\n break;\n }\n }\n\n return customConfigurations;\n};\n\n/**\n * Generates a type or interface based on the shape.\n */\nTSGenerator.prototype.generateTypingsFromShape = function generateTypingsFromShape(shapeKey, shape, tabCount) {\n // some shapes shouldn't be generated if they are javascript primitives\n if (['string', 'boolean', 'number', 'Date', 'Blob'].indexOf(shapeKey) >= 0) {\n return '';\n }\n var self = this;\n var code = '';\n tabCount = tabCount || 0;\n var tabs = this.tabs;\n var type = shape.type;\n if (type === 'structure') {\n code += tabs(tabCount) + 'export interface ' + shapeKey + ' {\\n';\n var members = shape.members;\n // cycle through members\n var memberKeys = Object.keys(members);\n memberKeys.forEach(function(memberKey) {\n // docs\n var member = members[memberKey];\n if (member.documentation) {\n code += self.generateDocString(member.documentation, tabCount + 1);\n }\n var required = self.checkRequired(shape.required || [], memberKey) ? '?' : '';", - "repo_full_name": "aws/aws-sdk-js", - "discussion_comments": [ - { - "comment_id": "85010899", - "repo_full_name": "aws/aws-sdk-js", - "pr_number": 1189, - "pr_file": "scripts/lib/ts-generator.js", - "discussion_id": "85010899", - "commented_code": "@@ -0,0 +1,437 @@\n+var fs = require('fs');\n+var path = require('path');\n+\n+var CUSTOM_CONFIG_ENUMS = {\n+ DUALSTACK: {\n+ FILE_NAME: 'config_use_dualstack',\n+ INTERFACE: 'UseDualstackConfigOptions'\n+ }\n+};\n+\n+function TSGenerator(options) {\n+ this._sdkRootDir = options.SdkRootDirectory || process.cwd();\n+ this._apiRootDir = path.join(this._sdkRootDir, 'apis');\n+ this._metadataPath = path.join(this._apiRootDir, 'metadata.json');\n+ this._clientsDir = path.join(this._sdkRootDir, 'clients');\n+ this.metadata = null;\n+ this.typings = {};\n+ this.fillApiModelFileNames(this._apiRootDir);\n+}\n+\n+/**\n+ * Loads the AWS SDK metadata.json file.\n+ */\n+TSGenerator.prototype.loadMetadata = function loadMetadata() {\n+ var metadataFile = fs.readFileSync(this._metadataPath);\n+ this.metadata = JSON.parse(metadataFile);\n+ return this.metadata;\n+};\n+\n+/**\n+ * Modifies metadata to include api model filenames.\n+ */\n+TSGenerator.prototype.fillApiModelFileNames = function fillApiModelFileNames(apisPath) {\n+ var modelPaths = fs.readdirSync(apisPath);\n+ if (!this.metadata) {\n+ this.loadMetadata();\n+ }\n+ var metadata = this.metadata;\n+\n+ // sort paths so latest versions appear first\n+ modelPaths = modelPaths.sort(function sort(a, b) {\n+ if (a < b) {\n+ return 1;\n+ } else if (a > b) {\n+ return -1;\n+ } else {\n+ return 0;\n+ }\n+ });\n+\n+ // Only get latest version of models\n+ var foundModels = Object.create(null);\n+ modelPaths.forEach(function(modelFileName) {\n+ var match = modelFileName.match(/^(.+)(-[\\d]{4}-[\\d]{2}-[\\d]{2})\\.normal\\.json$/i);\n+ if (match) {\n+ var model = match[1];\n+ if (!foundModels[model]) {\n+ foundModels[model] = modelFileName;\n+ }\n+ }\n+ });\n+\n+ // now update the metadata\n+ var keys = Object.keys(metadata);\n+ keys.forEach(function(key) {\n+ var modelName = metadata[key].prefix || key;\n+ metadata[key].api_path = foundModels[modelName];\n+ // find waiters file\n+ var baseName = foundModels[modelName].split('.')[0];\n+ if (modelPaths.indexOf(baseName + '.waiters2.json') >= 0) {\n+ metadata[key].waiters_path = baseName + '.waiters2.json';\n+ }\n+ });\n+};\n+\n+/**\n+ * Generates the file containing DocumentClient interfaces.\n+ */\n+TSGenerator.prototype.generateDocumentClientInterfaces = function generateDocumentClientInterfaces() {\n+ var self = this;\n+ // get the dynamodb model\n+ var dynamodbModel = this.loadServiceApi('dynamodb');\n+ var code = '';\n+ // include node reference\n+ code += '///\\n';\n+ // generate shapes\n+ var modelShapes = dynamodbModel.shapes;\n+ // iterate over each shape\n+ var shapeKeys = Object.keys(modelShapes);\n+ shapeKeys.forEach(function (shapeKey) {\n+ var modelShape = modelShapes[shapeKey];\n+ // ignore exceptions\n+ if (modelShape.exception) {\n+ return;\n+ }\n+ // overwrite AttributeValue\n+ if (shapeKey === 'AttributeValue') {\n+ code += self.generateDocString('A JavaScript object or native type.');\n+ code += 'export type ' + shapeKey + ' = any;\\n';\n+ return;\n+ }\n+ code += self.generateTypingsFromShape(shapeKey, modelShape);\n+ });\n+\n+ // write file:\n+ this.writeTypingsFile('document_client_interfaces', path.join(this._sdkRootDir, 'lib', 'dynamodb'), code);\n+};\n+\n+/**\n+ * Returns a service model based on the serviceIdentifier.\n+ */\n+TSGenerator.prototype.loadServiceApi = function loadServiceApi(serviceIdentifier) {\n+ // first, find the correct identifier\n+ var metadata = this.metadata;\n+ var serviceFilePath = path.join(this._apiRootDir, metadata[serviceIdentifier].api_path);\n+ var serviceModelFile = fs.readFileSync(serviceFilePath);\n+ var serviceModel = JSON.parse(serviceModelFile);\n+ // load waiters file if it exists\n+ var waiterFilePath;\n+ if (metadata[serviceIdentifier].waiters_path) {\n+ waiterFilePath = path.join(this._apiRootDir, metadata[serviceIdentifier].waiters_path);\n+ var waiterModelFile = fs.readFileSync(waiterFilePath);\n+ var waiterModel = JSON.parse(waiterModelFile);\n+ serviceModel.waiters = waiterModel.waiters;\n+ }\n+\n+ return serviceModel;\n+};\n+\n+/**\n+ * Determines if a member is required by checking for it in a list.\n+ */\n+TSGenerator.prototype.checkRequired = function checkRequired(list, member) {\n+ if (list.indexOf(member)) {\n+ return true;\n+ }\n+ return false;\n+};\n+\n+/**\n+ * Generates whitespace based on the count.\n+ */\n+TSGenerator.prototype.tabs = function tabs(count) {\n+ var code = '';\n+ for (var i = 0; i < count; i++) {\n+ code += ' ';\n+ }\n+ return code;\n+};\n+\n+/**\n+ * Transforms documentation string to a more readable format.\n+ */\n+TSGenerator.prototype.transformDocumentation = function transformDocumentation(documentation) {\n+ if (!documentation) {\n+ return '';\n+ }\n+ documentation = documentation.replace(/<(?:.|\\n)*?>/gm, '');\n+ documentation = documentation.replace(/\\*\\//g, '*');\n+ return documentation;\n+};\n+\n+/**\n+ * Returns a doc string based on the supplied documentation.\n+ * Also tabs the doc string if a count is provided.\n+ */\n+TSGenerator.prototype.generateDocString = function generateDocString(documentation, tabCount) {\n+ tabCount = tabCount || 0;\n+ var code = '';\n+ code += this.tabs(tabCount) + '/**\\n';\n+ code += this.tabs(tabCount) + ' * ' + this.transformDocumentation(documentation) + '\\n';\n+ code += this.tabs(tabCount) + ' */\\n';\n+ return code;\n+};\n+\n+/**\n+ * Returns an array of custom configuration options based on a service identiffier.\n+ * Custom configuration options are determined by checking the metadata.json file.\n+ */\n+TSGenerator.prototype.generateCustomConfigFromMetadata = function generateCustomConfigFromMetadata(serviceIdentifier) {\n+ // some services have additional configuration options that are defined in the metadata.json file\n+ // i.e. dualstackAvailable = useDualstack\n+ // create reference to custom options\n+ var customConfigurations = [];\n+ var serviceMetadata = this.metadata[serviceIdentifier];\n+ // loop through metadata members\n+ for (var memberName in serviceMetadata) {\n+ if (!serviceMetadata.hasOwnProperty(memberName)) {\n+ continue;\n+ }\n+ var memberValue = serviceMetadata[memberName];\n+ // check configs\n+ switch (memberName) {\n+ case 'dualstackAvailable':\n+ customConfigurations.push(CUSTOM_CONFIG_ENUMS.DUALSTACK);\n+ break;\n+ }\n+ }\n+\n+ return customConfigurations;\n+};\n+\n+/**\n+ * Generates a type or interface based on the shape.\n+ */\n+TSGenerator.prototype.generateTypingsFromShape = function generateTypingsFromShape(shapeKey, shape, tabCount) {\n+ // some shapes shouldn't be generated if they are javascript primitives\n+ if (['string', 'boolean', 'number', 'Date', 'Blob'].indexOf(shapeKey) >= 0) {\n+ return '';\n+ }\n+ var self = this;\n+ var code = '';\n+ tabCount = tabCount || 0;\n+ var tabs = this.tabs;\n+ var type = shape.type;\n+ if (type === 'structure') {\n+ code += tabs(tabCount) + 'export interface ' + shapeKey + ' {\\n';\n+ var members = shape.members;\n+ // cycle through members\n+ var memberKeys = Object.keys(members);\n+ memberKeys.forEach(function(memberKey) {\n+ // docs\n+ var member = members[memberKey];\n+ if (member.documentation) {\n+ code += self.generateDocString(member.documentation, tabCount + 1);\n+ }\n+ var required = self.checkRequired(shape.required || [], memberKey) ? '?' : '';", - "comment_created_at": "2016-10-25T22:00:14+00:00", - "comment_author": "LiuJoyceC", - "comment_body": "If `checkRequired` is supposed to return true when the member is required, then this should be switched.\n`self.checkRequired(shape.required || [], memberKey) ? '' : '?'`\n", - "pr_file_module": null - }, - { - "comment_id": "85435739", - "repo_full_name": "aws/aws-sdk-js", - "pr_number": 1189, - "pr_file": "scripts/lib/ts-generator.js", - "discussion_id": "85010899", - "commented_code": "@@ -0,0 +1,437 @@\n+var fs = require('fs');\n+var path = require('path');\n+\n+var CUSTOM_CONFIG_ENUMS = {\n+ DUALSTACK: {\n+ FILE_NAME: 'config_use_dualstack',\n+ INTERFACE: 'UseDualstackConfigOptions'\n+ }\n+};\n+\n+function TSGenerator(options) {\n+ this._sdkRootDir = options.SdkRootDirectory || process.cwd();\n+ this._apiRootDir = path.join(this._sdkRootDir, 'apis');\n+ this._metadataPath = path.join(this._apiRootDir, 'metadata.json');\n+ this._clientsDir = path.join(this._sdkRootDir, 'clients');\n+ this.metadata = null;\n+ this.typings = {};\n+ this.fillApiModelFileNames(this._apiRootDir);\n+}\n+\n+/**\n+ * Loads the AWS SDK metadata.json file.\n+ */\n+TSGenerator.prototype.loadMetadata = function loadMetadata() {\n+ var metadataFile = fs.readFileSync(this._metadataPath);\n+ this.metadata = JSON.parse(metadataFile);\n+ return this.metadata;\n+};\n+\n+/**\n+ * Modifies metadata to include api model filenames.\n+ */\n+TSGenerator.prototype.fillApiModelFileNames = function fillApiModelFileNames(apisPath) {\n+ var modelPaths = fs.readdirSync(apisPath);\n+ if (!this.metadata) {\n+ this.loadMetadata();\n+ }\n+ var metadata = this.metadata;\n+\n+ // sort paths so latest versions appear first\n+ modelPaths = modelPaths.sort(function sort(a, b) {\n+ if (a < b) {\n+ return 1;\n+ } else if (a > b) {\n+ return -1;\n+ } else {\n+ return 0;\n+ }\n+ });\n+\n+ // Only get latest version of models\n+ var foundModels = Object.create(null);\n+ modelPaths.forEach(function(modelFileName) {\n+ var match = modelFileName.match(/^(.+)(-[\\d]{4}-[\\d]{2}-[\\d]{2})\\.normal\\.json$/i);\n+ if (match) {\n+ var model = match[1];\n+ if (!foundModels[model]) {\n+ foundModels[model] = modelFileName;\n+ }\n+ }\n+ });\n+\n+ // now update the metadata\n+ var keys = Object.keys(metadata);\n+ keys.forEach(function(key) {\n+ var modelName = metadata[key].prefix || key;\n+ metadata[key].api_path = foundModels[modelName];\n+ // find waiters file\n+ var baseName = foundModels[modelName].split('.')[0];\n+ if (modelPaths.indexOf(baseName + '.waiters2.json') >= 0) {\n+ metadata[key].waiters_path = baseName + '.waiters2.json';\n+ }\n+ });\n+};\n+\n+/**\n+ * Generates the file containing DocumentClient interfaces.\n+ */\n+TSGenerator.prototype.generateDocumentClientInterfaces = function generateDocumentClientInterfaces() {\n+ var self = this;\n+ // get the dynamodb model\n+ var dynamodbModel = this.loadServiceApi('dynamodb');\n+ var code = '';\n+ // include node reference\n+ code += '///\\n';\n+ // generate shapes\n+ var modelShapes = dynamodbModel.shapes;\n+ // iterate over each shape\n+ var shapeKeys = Object.keys(modelShapes);\n+ shapeKeys.forEach(function (shapeKey) {\n+ var modelShape = modelShapes[shapeKey];\n+ // ignore exceptions\n+ if (modelShape.exception) {\n+ return;\n+ }\n+ // overwrite AttributeValue\n+ if (shapeKey === 'AttributeValue') {\n+ code += self.generateDocString('A JavaScript object or native type.');\n+ code += 'export type ' + shapeKey + ' = any;\\n';\n+ return;\n+ }\n+ code += self.generateTypingsFromShape(shapeKey, modelShape);\n+ });\n+\n+ // write file:\n+ this.writeTypingsFile('document_client_interfaces', path.join(this._sdkRootDir, 'lib', 'dynamodb'), code);\n+};\n+\n+/**\n+ * Returns a service model based on the serviceIdentifier.\n+ */\n+TSGenerator.prototype.loadServiceApi = function loadServiceApi(serviceIdentifier) {\n+ // first, find the correct identifier\n+ var metadata = this.metadata;\n+ var serviceFilePath = path.join(this._apiRootDir, metadata[serviceIdentifier].api_path);\n+ var serviceModelFile = fs.readFileSync(serviceFilePath);\n+ var serviceModel = JSON.parse(serviceModelFile);\n+ // load waiters file if it exists\n+ var waiterFilePath;\n+ if (metadata[serviceIdentifier].waiters_path) {\n+ waiterFilePath = path.join(this._apiRootDir, metadata[serviceIdentifier].waiters_path);\n+ var waiterModelFile = fs.readFileSync(waiterFilePath);\n+ var waiterModel = JSON.parse(waiterModelFile);\n+ serviceModel.waiters = waiterModel.waiters;\n+ }\n+\n+ return serviceModel;\n+};\n+\n+/**\n+ * Determines if a member is required by checking for it in a list.\n+ */\n+TSGenerator.prototype.checkRequired = function checkRequired(list, member) {\n+ if (list.indexOf(member)) {\n+ return true;\n+ }\n+ return false;\n+};\n+\n+/**\n+ * Generates whitespace based on the count.\n+ */\n+TSGenerator.prototype.tabs = function tabs(count) {\n+ var code = '';\n+ for (var i = 0; i < count; i++) {\n+ code += ' ';\n+ }\n+ return code;\n+};\n+\n+/**\n+ * Transforms documentation string to a more readable format.\n+ */\n+TSGenerator.prototype.transformDocumentation = function transformDocumentation(documentation) {\n+ if (!documentation) {\n+ return '';\n+ }\n+ documentation = documentation.replace(/<(?:.|\\n)*?>/gm, '');\n+ documentation = documentation.replace(/\\*\\//g, '*');\n+ return documentation;\n+};\n+\n+/**\n+ * Returns a doc string based on the supplied documentation.\n+ * Also tabs the doc string if a count is provided.\n+ */\n+TSGenerator.prototype.generateDocString = function generateDocString(documentation, tabCount) {\n+ tabCount = tabCount || 0;\n+ var code = '';\n+ code += this.tabs(tabCount) + '/**\\n';\n+ code += this.tabs(tabCount) + ' * ' + this.transformDocumentation(documentation) + '\\n';\n+ code += this.tabs(tabCount) + ' */\\n';\n+ return code;\n+};\n+\n+/**\n+ * Returns an array of custom configuration options based on a service identiffier.\n+ * Custom configuration options are determined by checking the metadata.json file.\n+ */\n+TSGenerator.prototype.generateCustomConfigFromMetadata = function generateCustomConfigFromMetadata(serviceIdentifier) {\n+ // some services have additional configuration options that are defined in the metadata.json file\n+ // i.e. dualstackAvailable = useDualstack\n+ // create reference to custom options\n+ var customConfigurations = [];\n+ var serviceMetadata = this.metadata[serviceIdentifier];\n+ // loop through metadata members\n+ for (var memberName in serviceMetadata) {\n+ if (!serviceMetadata.hasOwnProperty(memberName)) {\n+ continue;\n+ }\n+ var memberValue = serviceMetadata[memberName];\n+ // check configs\n+ switch (memberName) {\n+ case 'dualstackAvailable':\n+ customConfigurations.push(CUSTOM_CONFIG_ENUMS.DUALSTACK);\n+ break;\n+ }\n+ }\n+\n+ return customConfigurations;\n+};\n+\n+/**\n+ * Generates a type or interface based on the shape.\n+ */\n+TSGenerator.prototype.generateTypingsFromShape = function generateTypingsFromShape(shapeKey, shape, tabCount) {\n+ // some shapes shouldn't be generated if they are javascript primitives\n+ if (['string', 'boolean', 'number', 'Date', 'Blob'].indexOf(shapeKey) >= 0) {\n+ return '';\n+ }\n+ var self = this;\n+ var code = '';\n+ tabCount = tabCount || 0;\n+ var tabs = this.tabs;\n+ var type = shape.type;\n+ if (type === 'structure') {\n+ code += tabs(tabCount) + 'export interface ' + shapeKey + ' {\\n';\n+ var members = shape.members;\n+ // cycle through members\n+ var memberKeys = Object.keys(members);\n+ memberKeys.forEach(function(memberKey) {\n+ // docs\n+ var member = members[memberKey];\n+ if (member.documentation) {\n+ code += self.generateDocString(member.documentation, tabCount + 1);\n+ }\n+ var required = self.checkRequired(shape.required || [], memberKey) ? '?' : '';", - "comment_created_at": "2016-10-27T21:54:33+00:00", - "comment_author": "chrisradek", - "comment_body": "Thanks, didn't catch this due to the mistake above.\n", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "90351124", - "pr_number": 1245, - "pr_file": "lib/credentials/cognito_identity_credentials.js", - "created_at": "2016-11-30T23:15:44+00:00", - "commented_code": "*/\n storage: (function() {\n try {\n return AWS.util.isBrowser() && window.localStorage !== null && typeof window.localStorage === 'object' ?", - "repo_full_name": "aws/aws-sdk-js", - "discussion_comments": [ - { - "comment_id": "90351124", - "repo_full_name": "aws/aws-sdk-js", - "pr_number": 1245, - "pr_file": "lib/credentials/cognito_identity_credentials.js", - "discussion_id": "90351124", - "commented_code": "@@ -331,8 +331,10 @@ AWS.CognitoIdentityCredentials = AWS.util.inherit(AWS.Credentials, {\n */\n storage: (function() {\n try {\n- return AWS.util.isBrowser() && window.localStorage !== null && typeof window.localStorage === 'object' ?", - "comment_created_at": "2016-11-30T23:15:44+00:00", - "comment_author": "chrisradek", - "comment_body": "Can you keep the `AWS.util.isBrowser() && window.localStorage !== null` checks at the top of the try block? As it is, this will always throw an error in node.js (and possibly some 'browser' environments like electron). I know the catch block should handle this case, but I'd like to avoid throwing an error if we can predict it.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "51147005", - "pr_number": 888, - "pr_file": "lib/services/s3.js", - "created_at": "2016-01-28T16:34:54+00:00", - "commented_code": "// This chunk of code will set the location constraint param based\n // on the region (when possible), but it will not override a passed-in\n // location constraint.\n if (!params) params = {};\n if (typeof params === 'function') {\n callback = params;\n params = {};\n }", - "repo_full_name": "aws/aws-sdk-js", - "discussion_comments": [ - { - "comment_id": "51147005", - "repo_full_name": "aws/aws-sdk-js", - "pr_number": 888, - "pr_file": "lib/services/s3.js", - "discussion_id": "51147005", - "commented_code": "@@ -411,7 +409,10 @@ AWS.util.update(AWS.S3.prototype, {\n // This chunk of code will set the location constraint param based\n // on the region (when possible), but it will not override a passed-in\n // location constraint.\n- if (!params) params = {};\n+ if (typeof params === 'function') {\n+ callback = params;\n+ params = {};\n+ }", - "comment_created_at": "2016-01-28T16:34:54+00:00", - "comment_author": "chrisradek", - "comment_body": "It might be useful to add one more check after the if block to set params equal to itself or an empty object to protect us in the unlikely event someone passes in a value like `null`. Imagine this would be a rare edge case but we do this already for other operations.\n", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "107256817", - "pr_number": 1405, - "pr_file": "lib/resource_waiter.js", - "created_at": "2017-03-21T19:46:42+00:00", - "commented_code": "callback = params; params = undefined;\n }\n\n if (params && params.$waiter) {\n if (params.$waiter.delay) {\n this.config.delay = params.$waiter.delay;", - "repo_full_name": "aws/aws-sdk-js", - "discussion_comments": [ - { - "comment_id": "107256817", - "repo_full_name": "aws/aws-sdk-js", - "pr_number": 1405, - "pr_file": "lib/resource_waiter.js", - "discussion_id": "107256817", - "commented_code": "@@ -136,6 +136,16 @@ AWS.ResourceWaiter = inherit({\n callback = params; params = undefined;\n }\n \n+ if (params && params.$waiter) {\n+ if (params.$waiter.delay) {\n+ this.config.delay = params.$waiter.delay;", - "comment_created_at": "2017-03-21T19:46:42+00:00", - "comment_author": "chrisradek", - "comment_body": "Just in case someone wants to enter `0`, maybe use a `typeof x === 'number'` check instead.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "147590167", - "pr_number": 1782, - "pr_file": "lib/event_listeners.js", - "created_at": "2017-10-29T18:37:50+00:00", - "commented_code": "var req = resp.request;\n var logger = req.service.config.logger;\n if (!logger) return;\n function filterSensitiveLog(inputShape, shape) {\n switch (inputShape.type) {", - "repo_full_name": "aws/aws-sdk-js", - "discussion_comments": [ - { - "comment_id": "147590167", - "repo_full_name": "aws/aws-sdk-js", - "pr_number": 1782, - "pr_file": "lib/event_listeners.js", - "discussion_id": "147590167", - "commented_code": "@@ -469,14 +469,52 @@ AWS.EventListeners = {\n var req = resp.request;\n var logger = req.service.config.logger;\n if (!logger) return;\n+ function filterSensitiveLog(inputShape, shape) {\n+ switch (inputShape.type) {", - "comment_created_at": "2017-10-29T18:37:50+00:00", - "comment_author": "jeskew", - "comment_body": "I think you need to return early here if `shape` is undefined. There are recursive shapes defined in some services, and this function will need to check for some stopping condition.", - "pr_file_module": null - }, - { - "comment_id": "147770081", - "repo_full_name": "aws/aws-sdk-js", - "pr_number": 1782, - "pr_file": "lib/event_listeners.js", - "discussion_id": "147590167", - "commented_code": "@@ -469,14 +469,52 @@ AWS.EventListeners = {\n var req = resp.request;\n var logger = req.service.config.logger;\n if (!logger) return;\n+ function filterSensitiveLog(inputShape, shape) {\n+ switch (inputShape.type) {", - "comment_created_at": "2017-10-30T17:08:54+00:00", - "comment_author": "AllanZhengYP", - "comment_body": "Oh right. Thank you for pointing out.", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/aws-sdk-js-document-apis-completely.json b/_reviewers/aws-sdk-js-document-apis-completely.json new file mode 100644 index 0000000..0188a2f --- /dev/null +++ b/_reviewers/aws-sdk-js-document-apis-completely.json @@ -0,0 +1,68 @@ +[ + { + "discussion_id": "574012172", + "pr_number": 3524, + "pr_file": "lib/config-base.d.ts", + "created_at": "2021-02-10T19:25:57+00:00", + "commented_code": "* Used in node.js environments only.\n */\n connectTimeout?: number;\n /**\n * Custom DNS lookup function.\n * Defaults to dns.lookup.", + "repo_full_name": "aws/aws-sdk-js", + "discussion_comments": [ + { + "comment_id": "574012172", + "repo_full_name": "aws/aws-sdk-js", + "pr_number": 3524, + "pr_file": "lib/config-base.d.ts", + "discussion_id": "574012172", + "commented_code": "@@ -61,6 +61,11 @@ export interface HTTPOptions {\n * Used in node.js environments only.\n */\n connectTimeout?: number;\n+ /**\n+ * Custom DNS lookup function.\n+ * Defaults to dns.lookup.", + "comment_created_at": "2021-02-10T19:25:57+00:00", + "comment_author": "AllanZhengYP", + "comment_body": "Nit: can you specify that they are node.js environments only? And looks like it's only available in newer version: https://github.com/nodejs/node/commit/5bd6f516d82d069ff8710b86108dedc333b2b580\r\n\r\n```suggestion\r\n * Defaults to dns.lookup.\r\n * Used in Node.js (>= v12.x) environment only.\r\n```", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "229453899", + "pr_number": 2253, + "pr_file": "lib/config.d.ts", + "created_at": "2018-10-30T19:30:50+00:00", + "commented_code": "* by DynamoDB.\n */\n dynamoDbCrc32?: boolean;\n /**\n * whether to request endpoint for operations that allow", + "repo_full_name": "aws/aws-sdk-js", + "discussion_comments": [ + { + "comment_id": "229453899", + "repo_full_name": "aws/aws-sdk-js", + "pr_number": 2253, + "pr_file": "lib/config.d.ts", + "discussion_id": "229453899", + "commented_code": "@@ -276,4 +276,9 @@ export abstract class ConfigurationOptions {\n * by DynamoDB.\n */\n dynamoDbCrc32?: boolean;\n+ /**\n+ * whether to request endpoint for operations that allow ", + "comment_created_at": "2018-10-30T19:30:50+00:00", + "comment_author": "chrisradek", + "comment_body": "Grammar: Maybe change to something like\r\n> Whether to enable endpoint discovery for operations that allow optionally using an endpoint returned by the service.\r\n\r\nI couldn't find an example of what other teams were using for their docs.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "225988591", + "pr_number": 2283, + "pr_file": "lib/shared-ini/ini-loader.d.ts", + "created_at": "2018-10-17T15:46:15+00:00", + "commented_code": "export interface LoadFileOptions {\n filename?: string,\n isConfig?: boolean,\n}\n\nexport interface IniFileContent {\n [key: string]: {[key: string]: string}\n}\n\nexport class IniLoader{\n \n/** Remove all cached files. Used after config files are updated. */\n clearCachedFiles():void;\n\n/**\n * Load configurations from config/credentials files and cache them \n * for later use. If no file is specified it will try to load default\n * files.\n * @returns {object} object of all profile information in the file\n */\n loadFrom(options: LoadFileOptions): IniFileContent;\n\n/**\n * Read specified file and return parsed config object. This method will always\n * read from disk and won't update cache. This is a lower level function of \n * loadFrom().\n * @param filename [string] valid readable file path containing aws credentials\n * or aws configs\n * @param isConfig [boolean] true if specified file is an aws config file; false\n * if the file is an aws credentials file\n */\n parseFile(filename: string, isConfig: boolean): IniFileContent;\n}\n\nexport function parseFile(filename: string, isConfig: boolean): IniFileContent;", + "repo_full_name": "aws/aws-sdk-js", + "discussion_comments": [ + { + "comment_id": "225988591", + "repo_full_name": "aws/aws-sdk-js", + "pr_number": 2283, + "pr_file": "lib/shared-ini/ini-loader.d.ts", + "discussion_id": "225988591", + "commented_code": "@@ -0,0 +1,35 @@\n+export interface LoadFileOptions {\n+ filename?: string,\n+ isConfig?: boolean,\n+}\n+\n+export interface IniFileContent {\n+ [key: string]: {[key: string]: string}\n+}\n+\n+export class IniLoader{\n+ \n+/** Remove all cached files. Used after config files are updated. */\n+ clearCachedFiles():void;\n+\n+/**\n+ * Load configurations from config/credentials files and cache them \n+ * for later use. If no file is specified it will try to load default\n+ * files.\n+ * @returns {object} object of all profile information in the file\n+ */\n+ loadFrom(options: LoadFileOptions): IniFileContent;\n+\n+/**\n+ * Read specified file and return parsed config object. This method will always\n+ * read from disk and won't update cache. This is a lower level function of \n+ * loadFrom().\n+ * @param filename [string] valid readable file path containing aws credentials\n+ * or aws configs\n+ * @param isConfig [boolean] true if specified file is an aws config file; false\n+ * if the file is an aws credentials file\n+ */\n+ parseFile(filename: string, isConfig: boolean): IniFileContent;\n+}\n+\n+export function parseFile(filename: string, isConfig: boolean): IniFileContent;", + "comment_created_at": "2018-10-17T15:46:15+00:00", + "comment_author": "chrisradek", + "comment_body": "Is `parseFile` exposed to consumers of the SDK? If so, it should probably have some documentation, otherwise we don't need typings for it.", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/aws-sdk-js-document-apis-completely.md b/_reviewers/aws-sdk-js-document-apis-completely.md index 6db990b..cb4de5c 100644 --- a/_reviewers/aws-sdk-js-document-apis-completely.md +++ b/_reviewers/aws-sdk-js-document-apis-completely.md @@ -36,73 +36,3 @@ endpointDiscovery?: boolean; ``` Thorough documentation helps other developers understand how to use your code correctly and identify potential limitations or requirements. - - -[ - { - "discussion_id": "574012172", - "pr_number": 3524, - "pr_file": "lib/config-base.d.ts", - "created_at": "2021-02-10T19:25:57+00:00", - "commented_code": "* Used in node.js environments only.\n */\n connectTimeout?: number;\n /**\n * Custom DNS lookup function.\n * Defaults to dns.lookup.", - "repo_full_name": "aws/aws-sdk-js", - "discussion_comments": [ - { - "comment_id": "574012172", - "repo_full_name": "aws/aws-sdk-js", - "pr_number": 3524, - "pr_file": "lib/config-base.d.ts", - "discussion_id": "574012172", - "commented_code": "@@ -61,6 +61,11 @@ export interface HTTPOptions {\n * Used in node.js environments only.\n */\n connectTimeout?: number;\n+ /**\n+ * Custom DNS lookup function.\n+ * Defaults to dns.lookup.", - "comment_created_at": "2021-02-10T19:25:57+00:00", - "comment_author": "AllanZhengYP", - "comment_body": "Nit: can you specify that they are node.js environments only? And looks like it's only available in newer version: https://github.com/nodejs/node/commit/5bd6f516d82d069ff8710b86108dedc333b2b580\r\n\r\n```suggestion\r\n * Defaults to dns.lookup.\r\n * Used in Node.js (>= v12.x) environment only.\r\n```", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "229453899", - "pr_number": 2253, - "pr_file": "lib/config.d.ts", - "created_at": "2018-10-30T19:30:50+00:00", - "commented_code": "* by DynamoDB.\n */\n dynamoDbCrc32?: boolean;\n /**\n * whether to request endpoint for operations that allow", - "repo_full_name": "aws/aws-sdk-js", - "discussion_comments": [ - { - "comment_id": "229453899", - "repo_full_name": "aws/aws-sdk-js", - "pr_number": 2253, - "pr_file": "lib/config.d.ts", - "discussion_id": "229453899", - "commented_code": "@@ -276,4 +276,9 @@ export abstract class ConfigurationOptions {\n * by DynamoDB.\n */\n dynamoDbCrc32?: boolean;\n+ /**\n+ * whether to request endpoint for operations that allow ", - "comment_created_at": "2018-10-30T19:30:50+00:00", - "comment_author": "chrisradek", - "comment_body": "Grammar: Maybe change to something like\r\n> Whether to enable endpoint discovery for operations that allow optionally using an endpoint returned by the service.\r\n\r\nI couldn't find an example of what other teams were using for their docs.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "225988591", - "pr_number": 2283, - "pr_file": "lib/shared-ini/ini-loader.d.ts", - "created_at": "2018-10-17T15:46:15+00:00", - "commented_code": "export interface LoadFileOptions {\n filename?: string,\n isConfig?: boolean,\n}\n\nexport interface IniFileContent {\n [key: string]: {[key: string]: string}\n}\n\nexport class IniLoader{\n \n/** Remove all cached files. Used after config files are updated. */\n clearCachedFiles():void;\n\n/**\n * Load configurations from config/credentials files and cache them \n * for later use. If no file is specified it will try to load default\n * files.\n * @returns {object} object of all profile information in the file\n */\n loadFrom(options: LoadFileOptions): IniFileContent;\n\n/**\n * Read specified file and return parsed config object. This method will always\n * read from disk and won't update cache. This is a lower level function of \n * loadFrom().\n * @param filename [string] valid readable file path containing aws credentials\n * or aws configs\n * @param isConfig [boolean] true if specified file is an aws config file; false\n * if the file is an aws credentials file\n */\n parseFile(filename: string, isConfig: boolean): IniFileContent;\n}\n\nexport function parseFile(filename: string, isConfig: boolean): IniFileContent;", - "repo_full_name": "aws/aws-sdk-js", - "discussion_comments": [ - { - "comment_id": "225988591", - "repo_full_name": "aws/aws-sdk-js", - "pr_number": 2283, - "pr_file": "lib/shared-ini/ini-loader.d.ts", - "discussion_id": "225988591", - "commented_code": "@@ -0,0 +1,35 @@\n+export interface LoadFileOptions {\n+ filename?: string,\n+ isConfig?: boolean,\n+}\n+\n+export interface IniFileContent {\n+ [key: string]: {[key: string]: string}\n+}\n+\n+export class IniLoader{\n+ \n+/** Remove all cached files. Used after config files are updated. */\n+ clearCachedFiles():void;\n+\n+/**\n+ * Load configurations from config/credentials files and cache them \n+ * for later use. If no file is specified it will try to load default\n+ * files.\n+ * @returns {object} object of all profile information in the file\n+ */\n+ loadFrom(options: LoadFileOptions): IniFileContent;\n+\n+/**\n+ * Read specified file and return parsed config object. This method will always\n+ * read from disk and won't update cache. This is a lower level function of \n+ * loadFrom().\n+ * @param filename [string] valid readable file path containing aws credentials\n+ * or aws configs\n+ * @param isConfig [boolean] true if specified file is an aws config file; false\n+ * if the file is an aws credentials file\n+ */\n+ parseFile(filename: string, isConfig: boolean): IniFileContent;\n+}\n+\n+export function parseFile(filename: string, isConfig: boolean): IniFileContent;", - "comment_created_at": "2018-10-17T15:46:15+00:00", - "comment_author": "chrisradek", - "comment_body": "Is `parseFile` exposed to consumers of the SDK? If so, it should probably have some documentation, otherwise we don't need typings for it.", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/aws-sdk-js-document-apis-thoroughly.json b/_reviewers/aws-sdk-js-document-apis-thoroughly.json new file mode 100644 index 0000000..3d75566 --- /dev/null +++ b/_reviewers/aws-sdk-js-document-apis-thoroughly.json @@ -0,0 +1,134 @@ +[ + { + "discussion_id": "42285305", + "pr_number": 707, + "pr_file": "lib/config.js", + "created_at": "2015-10-16T20:17:20+00:00", + "commented_code": "convertResponseTypes: true,\n dynamoDbCrc32: true,\n systemClockOffset: 0,\n signatureVersion: null\n signatureVersion: null,\n signatureCache: true", + "repo_full_name": "aws/aws-sdk-js", + "discussion_comments": [ + { + "comment_id": "42285305", + "repo_full_name": "aws/aws-sdk-js", + "pr_number": 707, + "pr_file": "lib/config.js", + "discussion_id": "42285305", + "commented_code": "@@ -390,7 +390,8 @@ AWS.Config = AWS.util.inherit({\n convertResponseTypes: true,\n dynamoDbCrc32: true,\n systemClockOffset: 0,\n- signatureVersion: null\n+ signatureVersion: null,\n+ signatureCache: true", + "comment_created_at": "2015-10-16T20:17:20+00:00", + "comment_author": "chrisradek", + "comment_body": "Would you mind adding some docs around this new config parameter?\nSomething like this at line 98:\n\n```\n * @!attribute signatureCache\n * @return [Boolean] whether the signature to sign requests with (overriding\n * the API configuration) is cached. Only applies to the signature version 'v4'.\n * Defaults to `true`.\n```\n\nand something like this at line 185:\n\n```\n * @option options signatureCache [Boolean] whether the signature to sign\n * requests with (overriding the API configuration) is cached. Only applies\n * to the signature version 'v4'. Defaults to `true`.\n```\n", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "107561314", + "pr_number": 1425, + "pr_file": "lib/s3/managed_upload.js", + "created_at": "2017-03-22T23:56:11+00:00", + "commented_code": "* failures manually.\n * @option options service [AWS.S3] an optional S3 service object to use for\n * requests. This object might have bound parameters used by the uploader.\n * @option options tags [Array] The tags to apply to the uploaded object.\n * Each tag should have a `Key` and `Value` keys.", + "repo_full_name": "aws/aws-sdk-js", + "discussion_comments": [ + { + "comment_id": "107561314", + "repo_full_name": "aws/aws-sdk-js", + "pr_number": 1425, + "pr_file": "lib/s3/managed_upload.js", + "discussion_id": "107561314", + "commented_code": "@@ -60,6 +60,8 @@ AWS.S3.ManagedUpload = AWS.util.inherit({\n * failures manually.\n * @option options service [AWS.S3] an optional S3 service object to use for\n * requests. This object might have bound parameters used by the uploader.\n+ * @option options tags [Array] The tags to apply to the uploaded object.\n+ * Each tag should have a `Key` and `Value` keys.", + "comment_created_at": "2017-03-22T23:56:11+00:00", + "comment_author": "chrisradek", + "comment_body": "Minor: Might be worth adding an example that uses tags.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "229467137", + "pr_number": 2253, + "pr_file": "lib/discover_endpoint.js", + "created_at": "2018-10-30T20:10:18+00:00", + "commented_code": "var AWS = require('./core');\nvar util = require('./util');\nvar ENDPOINT_OPERATION_MAX_RETRIES = 60;\nvar endpointDiscoveryEnabledEnv = 'AWS_ENABLE_ENDPOINT_DISCOVERY';\n\n/**\n * Generate key to index the endpoints in the cache\n * @return [map] object with keys to index endpoints.\n * @api private\n */\nfunction getCacheKey(request) {\n var service = request.service;\n var api = service.api || {};\n var operations = api.operations;\n var identifiers = {};\n if (operations[request.operation] && operations[request.operation].name) {\n identifiers.operation = operations[request.operation].name;\n }\n if (service.config.region) {\n identifiers.region = service.config.region;\n }\n if (api.serviceId) {\n identifiers.serviceId = api.serviceId\n }\n if (service.config.credentials.accessKeyId) {\n identifiers.accessKeyId = service.config.credentials.accessKeyId\n }\n return identifiers;\n}\n\n/**\n * @api private", + "repo_full_name": "aws/aws-sdk-js", + "discussion_comments": [ + { + "comment_id": "229467137", + "repo_full_name": "aws/aws-sdk-js", + "pr_number": 2253, + "pr_file": "lib/discover_endpoint.js", + "discussion_id": "229467137", + "commented_code": "@@ -0,0 +1,340 @@\n+var AWS = require('./core');\n+var util = require('./util');\n+var ENDPOINT_OPERATION_MAX_RETRIES = 60;\n+var endpointDiscoveryEnabledEnv = 'AWS_ENABLE_ENDPOINT_DISCOVERY';\n+\n+/**\n+ * Generate key to index the endpoints in the cache\n+ * @return [map] object with keys to index endpoints.\n+ * @api private\n+ */\n+function getCacheKey(request) {\n+ var service = request.service;\n+ var api = service.api || {};\n+ var operations = api.operations;\n+ var identifiers = {};\n+ if (operations[request.operation] && operations[request.operation].name) {\n+ identifiers.operation = operations[request.operation].name;\n+ }\n+ if (service.config.region) {\n+ identifiers.region = service.config.region;\n+ }\n+ if (api.serviceId) {\n+ identifiers.serviceId = api.serviceId\n+ }\n+ if (service.config.credentials.accessKeyId) {\n+ identifiers.accessKeyId = service.config.credentials.accessKeyId\n+ }\n+ return identifiers;\n+}\n+\n+/**\n+ * @api private", + "comment_created_at": "2018-10-30T20:10:18+00:00", + "comment_author": "chrisradek", + "comment_body": "Can you add a comment that explains what this function is supposed to do? It looks like you're populating an object with identifiers and customer-provided values, but it took me a while to grok that and the function name isn't clear. ", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "229470362", + "pr_number": 2253, + "pr_file": "lib/discover_endpoint.js", + "created_at": "2018-10-30T20:19:55+00:00", + "commented_code": "var AWS = require('./core');\nvar util = require('./util');\nvar ENDPOINT_OPERATION_MAX_RETRIES = 60;\nvar endpointDiscoveryEnabledEnv = 'AWS_ENABLE_ENDPOINT_DISCOVERY';\n\n/**\n * Generate key to index the endpoints in the cache\n * @return [map] object with keys to index endpoints.\n * @api private\n */\nfunction getCacheKey(request) {\n var service = request.service;\n var api = service.api || {};\n var operations = api.operations;\n var identifiers = {};\n if (operations[request.operation] && operations[request.operation].name) {\n identifiers.operation = operations[request.operation].name;\n }\n if (service.config.region) {\n identifiers.region = service.config.region;\n }\n if (api.serviceId) {\n identifiers.serviceId = api.serviceId\n }\n if (service.config.credentials.accessKeyId) {\n identifiers.accessKeyId = service.config.credentials.accessKeyId\n }\n return identifiers;\n}\n\n/**\n * @api private\n */\nfunction marshallCustomIdentifiersHelper(result, params, shape) {\n if (!shape || params === undefined || params === null) return;\n if (shape.type === 'structure' && shape.required && shape.required.length > 0) {\n util.arrayEach(shape.required, function(name) {\n var memberShape = shape.members[name];\n if (memberShape.endpointDiscoveryId === true) {\n var locationName = memberShape.isLocationName ? memberShape.name : name;\n result[locationName] = String(params[name]);\n } else {\n marshallCustomIdentifiersHelper(result, params[name], memberShape);\n }\n });\n }\n}\n\n/**\n * Get customized cache key according to the 'endpointDiscoveryId' trait.", + "repo_full_name": "aws/aws-sdk-js", + "discussion_comments": [ + { + "comment_id": "229470362", + "repo_full_name": "aws/aws-sdk-js", + "pr_number": 2253, + "pr_file": "lib/discover_endpoint.js", + "discussion_id": "229470362", + "commented_code": "@@ -0,0 +1,340 @@\n+var AWS = require('./core');\n+var util = require('./util');\n+var ENDPOINT_OPERATION_MAX_RETRIES = 60;\n+var endpointDiscoveryEnabledEnv = 'AWS_ENABLE_ENDPOINT_DISCOVERY';\n+\n+/**\n+ * Generate key to index the endpoints in the cache\n+ * @return [map] object with keys to index endpoints.\n+ * @api private\n+ */\n+function getCacheKey(request) {\n+ var service = request.service;\n+ var api = service.api || {};\n+ var operations = api.operations;\n+ var identifiers = {};\n+ if (operations[request.operation] && operations[request.operation].name) {\n+ identifiers.operation = operations[request.operation].name;\n+ }\n+ if (service.config.region) {\n+ identifiers.region = service.config.region;\n+ }\n+ if (api.serviceId) {\n+ identifiers.serviceId = api.serviceId\n+ }\n+ if (service.config.credentials.accessKeyId) {\n+ identifiers.accessKeyId = service.config.credentials.accessKeyId\n+ }\n+ return identifiers;\n+}\n+\n+/**\n+ * @api private\n+ */\n+function marshallCustomIdentifiersHelper(result, params, shape) {\n+ if (!shape || params === undefined || params === null) return;\n+ if (shape.type === 'structure' && shape.required && shape.required.length > 0) {\n+ util.arrayEach(shape.required, function(name) {\n+ var memberShape = shape.members[name];\n+ if (memberShape.endpointDiscoveryId === true) {\n+ var locationName = memberShape.isLocationName ? memberShape.name : name;\n+ result[locationName] = String(params[name]);\n+ } else {\n+ marshallCustomIdentifiersHelper(result, params[name], memberShape);\n+ }\n+ });\n+ }\n+}\n+\n+/**\n+ * Get customized cache key according to the 'endpointDiscoveryId' trait.", + "comment_created_at": "2018-10-30T20:19:55+00:00", + "comment_author": "chrisradek", + "comment_body": "I think this comment is a bit misleading, as is the one for `getCacheKey`. Both imply that you're going to get a single `key` (presumably a string), but you're returning a map. I think something like the following is more clear: \r\n```javascript\r\n/**\r\n * Get custom identifiers for cache key.\r\n * Identifies custom identifiers by checking each shape's `endpointDiscoveryId` trait.\r\n */\r\n```\r\nThis would at least help me, because I kept expecting `cacheKey` later on to be a string you pass to `endpointCache.get`, but it turns out you pass in a map of elements.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "223489992", + "pr_number": 2283, + "pr_file": "lib/shared-ini/ini-loader.js", + "created_at": "2018-10-08T20:37:17+00:00", + "commented_code": "var AWS = require('../core');\nvar os = require('os');\nvar path = require('path');\n\nmodule.exports = AWS.util.inherit({", + "repo_full_name": "aws/aws-sdk-js", + "discussion_comments": [ + { + "comment_id": "223489992", + "repo_full_name": "aws/aws-sdk-js", + "pr_number": 2283, + "pr_file": "lib/shared-ini/ini-loader.js", + "discussion_id": "223489992", + "commented_code": "@@ -0,0 +1,70 @@\n+var AWS = require('../core');\n+var os = require('os');\n+var path = require('path');\n+\n+module.exports = AWS.util.inherit({", + "comment_created_at": "2018-10-08T20:37:17+00:00", + "comment_author": "chrisradek", + "comment_body": "If you want this to appear in documentation, you also need to attach IniLoader to the AWS namespace:\r\n```javascript\r\nAWS.IniLoader = AWS.util.inherit/* ... */\r\n\r\n// optionally also export it:\r\nmodule.exports = AWS.IniLoader;\r\n```\r\n\r\nYou'll also want to add doc strings to the public methods.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "138403541", + "pr_number": 1711, + "pr_file": "lib/services/s3.js", + "created_at": "2017-09-12T16:40:41+00:00", + "commented_code": "}\n },\n\n /**\n * @!method getSignedUrlPromise()\n * Returns a 'thenable' promise.", + "repo_full_name": "aws/aws-sdk-js", + "discussion_comments": [ + { + "comment_id": "138403541", + "repo_full_name": "aws/aws-sdk-js", + "pr_number": 1711, + "pr_file": "lib/services/s3.js", + "discussion_id": "138403541", + "commented_code": "@@ -782,6 +782,52 @@ AWS.util.update(AWS.S3.prototype, {\n }\n },\n \n+ /**\n+ * @!method getSignedUrlPromise()\n+ * Returns a 'thenable' promise.", + "comment_created_at": "2017-09-12T16:40:41+00:00", + "comment_author": "chrisradek", + "comment_body": "Can you amend this to state a URL will be returned?\r\n\r\nSomething like:\r\n\r\n> Returns a 'thenable' promise that will be resolved with a pre-signed URL.", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/aws-sdk-js-document-apis-thoroughly.md b/_reviewers/aws-sdk-js-document-apis-thoroughly.md index 391888a..9102d53 100644 --- a/_reviewers/aws-sdk-js-document-apis-thoroughly.md +++ b/_reviewers/aws-sdk-js-document-apis-thoroughly.md @@ -46,139 +46,3 @@ For methods that return promises, clearly document what the promise resolves to: ``` Ensure that if you're making an API public, all necessary documentation is added and the component is properly exported so it appears in generated documentation. - - -[ - { - "discussion_id": "42285305", - "pr_number": 707, - "pr_file": "lib/config.js", - "created_at": "2015-10-16T20:17:20+00:00", - "commented_code": "convertResponseTypes: true,\n dynamoDbCrc32: true,\n systemClockOffset: 0,\n signatureVersion: null\n signatureVersion: null,\n signatureCache: true", - "repo_full_name": "aws/aws-sdk-js", - "discussion_comments": [ - { - "comment_id": "42285305", - "repo_full_name": "aws/aws-sdk-js", - "pr_number": 707, - "pr_file": "lib/config.js", - "discussion_id": "42285305", - "commented_code": "@@ -390,7 +390,8 @@ AWS.Config = AWS.util.inherit({\n convertResponseTypes: true,\n dynamoDbCrc32: true,\n systemClockOffset: 0,\n- signatureVersion: null\n+ signatureVersion: null,\n+ signatureCache: true", - "comment_created_at": "2015-10-16T20:17:20+00:00", - "comment_author": "chrisradek", - "comment_body": "Would you mind adding some docs around this new config parameter?\nSomething like this at line 98:\n\n```\n * @!attribute signatureCache\n * @return [Boolean] whether the signature to sign requests with (overriding\n * the API configuration) is cached. Only applies to the signature version 'v4'.\n * Defaults to `true`.\n```\n\nand something like this at line 185:\n\n```\n * @option options signatureCache [Boolean] whether the signature to sign\n * requests with (overriding the API configuration) is cached. Only applies\n * to the signature version 'v4'. Defaults to `true`.\n```\n", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "107561314", - "pr_number": 1425, - "pr_file": "lib/s3/managed_upload.js", - "created_at": "2017-03-22T23:56:11+00:00", - "commented_code": "* failures manually.\n * @option options service [AWS.S3] an optional S3 service object to use for\n * requests. This object might have bound parameters used by the uploader.\n * @option options tags [Array] The tags to apply to the uploaded object.\n * Each tag should have a `Key` and `Value` keys.", - "repo_full_name": "aws/aws-sdk-js", - "discussion_comments": [ - { - "comment_id": "107561314", - "repo_full_name": "aws/aws-sdk-js", - "pr_number": 1425, - "pr_file": "lib/s3/managed_upload.js", - "discussion_id": "107561314", - "commented_code": "@@ -60,6 +60,8 @@ AWS.S3.ManagedUpload = AWS.util.inherit({\n * failures manually.\n * @option options service [AWS.S3] an optional S3 service object to use for\n * requests. This object might have bound parameters used by the uploader.\n+ * @option options tags [Array] The tags to apply to the uploaded object.\n+ * Each tag should have a `Key` and `Value` keys.", - "comment_created_at": "2017-03-22T23:56:11+00:00", - "comment_author": "chrisradek", - "comment_body": "Minor: Might be worth adding an example that uses tags.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "229467137", - "pr_number": 2253, - "pr_file": "lib/discover_endpoint.js", - "created_at": "2018-10-30T20:10:18+00:00", - "commented_code": "var AWS = require('./core');\nvar util = require('./util');\nvar ENDPOINT_OPERATION_MAX_RETRIES = 60;\nvar endpointDiscoveryEnabledEnv = 'AWS_ENABLE_ENDPOINT_DISCOVERY';\n\n/**\n * Generate key to index the endpoints in the cache\n * @return [map] object with keys to index endpoints.\n * @api private\n */\nfunction getCacheKey(request) {\n var service = request.service;\n var api = service.api || {};\n var operations = api.operations;\n var identifiers = {};\n if (operations[request.operation] && operations[request.operation].name) {\n identifiers.operation = operations[request.operation].name;\n }\n if (service.config.region) {\n identifiers.region = service.config.region;\n }\n if (api.serviceId) {\n identifiers.serviceId = api.serviceId\n }\n if (service.config.credentials.accessKeyId) {\n identifiers.accessKeyId = service.config.credentials.accessKeyId\n }\n return identifiers;\n}\n\n/**\n * @api private", - "repo_full_name": "aws/aws-sdk-js", - "discussion_comments": [ - { - "comment_id": "229467137", - "repo_full_name": "aws/aws-sdk-js", - "pr_number": 2253, - "pr_file": "lib/discover_endpoint.js", - "discussion_id": "229467137", - "commented_code": "@@ -0,0 +1,340 @@\n+var AWS = require('./core');\n+var util = require('./util');\n+var ENDPOINT_OPERATION_MAX_RETRIES = 60;\n+var endpointDiscoveryEnabledEnv = 'AWS_ENABLE_ENDPOINT_DISCOVERY';\n+\n+/**\n+ * Generate key to index the endpoints in the cache\n+ * @return [map] object with keys to index endpoints.\n+ * @api private\n+ */\n+function getCacheKey(request) {\n+ var service = request.service;\n+ var api = service.api || {};\n+ var operations = api.operations;\n+ var identifiers = {};\n+ if (operations[request.operation] && operations[request.operation].name) {\n+ identifiers.operation = operations[request.operation].name;\n+ }\n+ if (service.config.region) {\n+ identifiers.region = service.config.region;\n+ }\n+ if (api.serviceId) {\n+ identifiers.serviceId = api.serviceId\n+ }\n+ if (service.config.credentials.accessKeyId) {\n+ identifiers.accessKeyId = service.config.credentials.accessKeyId\n+ }\n+ return identifiers;\n+}\n+\n+/**\n+ * @api private", - "comment_created_at": "2018-10-30T20:10:18+00:00", - "comment_author": "chrisradek", - "comment_body": "Can you add a comment that explains what this function is supposed to do? It looks like you're populating an object with identifiers and customer-provided values, but it took me a while to grok that and the function name isn't clear. ", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "229470362", - "pr_number": 2253, - "pr_file": "lib/discover_endpoint.js", - "created_at": "2018-10-30T20:19:55+00:00", - "commented_code": "var AWS = require('./core');\nvar util = require('./util');\nvar ENDPOINT_OPERATION_MAX_RETRIES = 60;\nvar endpointDiscoveryEnabledEnv = 'AWS_ENABLE_ENDPOINT_DISCOVERY';\n\n/**\n * Generate key to index the endpoints in the cache\n * @return [map] object with keys to index endpoints.\n * @api private\n */\nfunction getCacheKey(request) {\n var service = request.service;\n var api = service.api || {};\n var operations = api.operations;\n var identifiers = {};\n if (operations[request.operation] && operations[request.operation].name) {\n identifiers.operation = operations[request.operation].name;\n }\n if (service.config.region) {\n identifiers.region = service.config.region;\n }\n if (api.serviceId) {\n identifiers.serviceId = api.serviceId\n }\n if (service.config.credentials.accessKeyId) {\n identifiers.accessKeyId = service.config.credentials.accessKeyId\n }\n return identifiers;\n}\n\n/**\n * @api private\n */\nfunction marshallCustomIdentifiersHelper(result, params, shape) {\n if (!shape || params === undefined || params === null) return;\n if (shape.type === 'structure' && shape.required && shape.required.length > 0) {\n util.arrayEach(shape.required, function(name) {\n var memberShape = shape.members[name];\n if (memberShape.endpointDiscoveryId === true) {\n var locationName = memberShape.isLocationName ? memberShape.name : name;\n result[locationName] = String(params[name]);\n } else {\n marshallCustomIdentifiersHelper(result, params[name], memberShape);\n }\n });\n }\n}\n\n/**\n * Get customized cache key according to the 'endpointDiscoveryId' trait.", - "repo_full_name": "aws/aws-sdk-js", - "discussion_comments": [ - { - "comment_id": "229470362", - "repo_full_name": "aws/aws-sdk-js", - "pr_number": 2253, - "pr_file": "lib/discover_endpoint.js", - "discussion_id": "229470362", - "commented_code": "@@ -0,0 +1,340 @@\n+var AWS = require('./core');\n+var util = require('./util');\n+var ENDPOINT_OPERATION_MAX_RETRIES = 60;\n+var endpointDiscoveryEnabledEnv = 'AWS_ENABLE_ENDPOINT_DISCOVERY';\n+\n+/**\n+ * Generate key to index the endpoints in the cache\n+ * @return [map] object with keys to index endpoints.\n+ * @api private\n+ */\n+function getCacheKey(request) {\n+ var service = request.service;\n+ var api = service.api || {};\n+ var operations = api.operations;\n+ var identifiers = {};\n+ if (operations[request.operation] && operations[request.operation].name) {\n+ identifiers.operation = operations[request.operation].name;\n+ }\n+ if (service.config.region) {\n+ identifiers.region = service.config.region;\n+ }\n+ if (api.serviceId) {\n+ identifiers.serviceId = api.serviceId\n+ }\n+ if (service.config.credentials.accessKeyId) {\n+ identifiers.accessKeyId = service.config.credentials.accessKeyId\n+ }\n+ return identifiers;\n+}\n+\n+/**\n+ * @api private\n+ */\n+function marshallCustomIdentifiersHelper(result, params, shape) {\n+ if (!shape || params === undefined || params === null) return;\n+ if (shape.type === 'structure' && shape.required && shape.required.length > 0) {\n+ util.arrayEach(shape.required, function(name) {\n+ var memberShape = shape.members[name];\n+ if (memberShape.endpointDiscoveryId === true) {\n+ var locationName = memberShape.isLocationName ? memberShape.name : name;\n+ result[locationName] = String(params[name]);\n+ } else {\n+ marshallCustomIdentifiersHelper(result, params[name], memberShape);\n+ }\n+ });\n+ }\n+}\n+\n+/**\n+ * Get customized cache key according to the 'endpointDiscoveryId' trait.", - "comment_created_at": "2018-10-30T20:19:55+00:00", - "comment_author": "chrisradek", - "comment_body": "I think this comment is a bit misleading, as is the one for `getCacheKey`. Both imply that you're going to get a single `key` (presumably a string), but you're returning a map. I think something like the following is more clear: \r\n```javascript\r\n/**\r\n * Get custom identifiers for cache key.\r\n * Identifies custom identifiers by checking each shape's `endpointDiscoveryId` trait.\r\n */\r\n```\r\nThis would at least help me, because I kept expecting `cacheKey` later on to be a string you pass to `endpointCache.get`, but it turns out you pass in a map of elements.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "223489992", - "pr_number": 2283, - "pr_file": "lib/shared-ini/ini-loader.js", - "created_at": "2018-10-08T20:37:17+00:00", - "commented_code": "var AWS = require('../core');\nvar os = require('os');\nvar path = require('path');\n\nmodule.exports = AWS.util.inherit({", - "repo_full_name": "aws/aws-sdk-js", - "discussion_comments": [ - { - "comment_id": "223489992", - "repo_full_name": "aws/aws-sdk-js", - "pr_number": 2283, - "pr_file": "lib/shared-ini/ini-loader.js", - "discussion_id": "223489992", - "commented_code": "@@ -0,0 +1,70 @@\n+var AWS = require('../core');\n+var os = require('os');\n+var path = require('path');\n+\n+module.exports = AWS.util.inherit({", - "comment_created_at": "2018-10-08T20:37:17+00:00", - "comment_author": "chrisradek", - "comment_body": "If you want this to appear in documentation, you also need to attach IniLoader to the AWS namespace:\r\n```javascript\r\nAWS.IniLoader = AWS.util.inherit/* ... */\r\n\r\n// optionally also export it:\r\nmodule.exports = AWS.IniLoader;\r\n```\r\n\r\nYou'll also want to add doc strings to the public methods.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "138403541", - "pr_number": 1711, - "pr_file": "lib/services/s3.js", - "created_at": "2017-09-12T16:40:41+00:00", - "commented_code": "}\n },\n\n /**\n * @!method getSignedUrlPromise()\n * Returns a 'thenable' promise.", - "repo_full_name": "aws/aws-sdk-js", - "discussion_comments": [ - { - "comment_id": "138403541", - "repo_full_name": "aws/aws-sdk-js", - "pr_number": 1711, - "pr_file": "lib/services/s3.js", - "discussion_id": "138403541", - "commented_code": "@@ -782,6 +782,52 @@ AWS.util.update(AWS.S3.prototype, {\n }\n },\n \n+ /**\n+ * @!method getSignedUrlPromise()\n+ * Returns a 'thenable' promise.", - "comment_created_at": "2017-09-12T16:40:41+00:00", - "comment_author": "chrisradek", - "comment_body": "Can you amend this to state a URL will be returned?\r\n\r\nSomething like:\r\n\r\n> Returns a 'thenable' promise that will be resolved with a pre-signed URL.", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/aws-sdk-js-document-sdk-version-requirements.json b/_reviewers/aws-sdk-js-document-sdk-version-requirements.json new file mode 100644 index 0000000..98970e9 --- /dev/null +++ b/_reviewers/aws-sdk-js-document-sdk-version-requirements.json @@ -0,0 +1,46 @@ +[ + { + "discussion_id": "396624797", + "pr_number": 3171, + "pr_file": ".github/ISSUE_TEMPLATE/---bug-report.md", + "created_at": "2020-03-23T17:25:09+00:00", + "commented_code": "* For browsers, the SDK version number is in the script tag
src=\".../aws-sdk-2.466.0.min.js\"
\n* For Node.js, get SDK version by\n * running command `npm list aws-sdk` from your root directory\n * printing the output of `AWS.VERSION` in your code where `AWS = require(\"aws-sdk\");`\n * printing the output of `console.log(AWS.VERSION)` in your code where `AWS = require(\"aws-sdk\");`\n * if running on Lambda, Lambda runtimes can be found [here](https://docs.aws.amazon.com/lambda/latest/dg/lambda-runtimes.html)", + "repo_full_name": "aws/aws-sdk-js", + "discussion_comments": [ + { + "comment_id": "396624797", + "repo_full_name": "aws/aws-sdk-js", + "pr_number": 3171, + "pr_file": ".github/ISSUE_TEMPLATE/---bug-report.md", + "discussion_id": "396624797", + "commented_code": "@@ -28,7 +28,8 @@ Example: v2.466.0\n * For browsers, the SDK version number is in the script tag
src=\".../aws-sdk-2.466.0.min.js\"
\n * For Node.js, get SDK version by\n * running command `npm list aws-sdk` from your root directory\n- * printing the output of `AWS.VERSION` in your code where `AWS = require(\"aws-sdk\");`\n+ * printing the output of `console.log(AWS.VERSION)` in your code where `AWS = require(\"aws-sdk\");`\n+ * if running on Lambda, Lambda runtimes can be found [here](https://docs.aws.amazon.com/lambda/latest/dg/lambda-runtimes.html) ", + "comment_created_at": "2020-03-23T17:25:09+00:00", + "comment_author": "AllanZhengYP", + "comment_body": "People can bundle their own AWS to the Lambda. You can ask something like `If running on Lambda and using SDK provided by Lambda runtime, you can find the SDK versions here`", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "297439672", + "pr_number": 2742, + "pr_file": ".github/ISSUE_TEMPLATE/---known-issues.md", + "created_at": "2019-06-26T00:00:43+00:00", + "commented_code": "---\nname: \"\\U0001F6E0 Know issues\"\nabout: Read me before opening an issue!\ntitle: ''\nassignees: ''\n\n---\n\nThis is a list of know issues and how can you work around them:\n\n1. **Exceptions contains `Unexpected key...` or `... is not a function`**\n\n AWS APIs are updating constantly, you might be calling a new API with an older version is SDK. In\n this case you should try installing the newest `aws-sdk`like: \n\n `npm install aws-sdk@latest`\n\n If you are using the SDK in AWS Lambda, you are likely using old SDK version too. AWS Lambda bundled\n SDK version is shown in [this document](https://docs.aws.amazon.com/lambda/latest/dg/lambda-runtimes.html).\n You can import the latest version of SDK by following \n [these guidelines](https://docs.aws.amazon.com/lambda/latest/dg/nodejs-create-deployment-pkg.html#nodejs-package-dependencies).\n\n Go ahead and open an issue is you are still seeing this exception.", + "repo_full_name": "aws/aws-sdk-js", + "discussion_comments": [ + { + "comment_id": "297439672", + "repo_full_name": "aws/aws-sdk-js", + "pr_number": 2742, + "pr_file": ".github/ISSUE_TEMPLATE/---known-issues.md", + "discussion_id": "297439672", + "commented_code": "@@ -0,0 +1,23 @@\n+---\n+name: \"\\U0001F6E0 Know issues\"\n+about: Read me before opening an issue!\n+title: ''\n+assignees: ''\n+\n+---\n+\n+This is a list of know issues and how can you work around them:\n+\n+1. **Exceptions contains `Unexpected key...` or `... is not a function`**\n+\n+ AWS APIs are updating constantly, you might be calling a new API with an older version is SDK. In\n+ this case you should try installing the newest `aws-sdk`like: \n+\n+ `npm install aws-sdk@latest`\n+\n+ If you are using the SDK in AWS Lambda, you are likely using old SDK version too. AWS Lambda bundled\n+ SDK version is shown in [this document](https://docs.aws.amazon.com/lambda/latest/dg/lambda-runtimes.html).\n+ You can import the latest version of SDK by following \n+ [these guidelines](https://docs.aws.amazon.com/lambda/latest/dg/nodejs-create-deployment-pkg.html#nodejs-package-dependencies).\n+\n+ Go ahead and open an issue is you are still seeing this exception.", + "comment_created_at": "2019-06-26T00:00:43+00:00", + "comment_author": "trivikr", + "comment_body": "```suggestion\r\nIf you still like to open an issue, change [ ] to [x] below:\r\n- [ ] I've confirmed that I'm using latest version of SDK, and issue is still reproducible\r\n```", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/aws-sdk-js-document-sdk-version-requirements.md b/_reviewers/aws-sdk-js-document-sdk-version-requirements.md index c73424e..d445604 100644 --- a/_reviewers/aws-sdk-js-document-sdk-version-requirements.md +++ b/_reviewers/aws-sdk-js-document-sdk-version-requirements.md @@ -34,51 +34,3 @@ console.log("Using AWS SDK version:", AWS.VERSION); ``` This documentation helps prevent compatibility issues like "Unexpected key" errors and ensures all developers understand the environment configuration requirements. - - -[ - { - "discussion_id": "396624797", - "pr_number": 3171, - "pr_file": ".github/ISSUE_TEMPLATE/---bug-report.md", - "created_at": "2020-03-23T17:25:09+00:00", - "commented_code": "* For browsers, the SDK version number is in the script tag
src=\".../aws-sdk-2.466.0.min.js\"
\n* For Node.js, get SDK version by\n * running command `npm list aws-sdk` from your root directory\n * printing the output of `AWS.VERSION` in your code where `AWS = require(\"aws-sdk\");`\n * printing the output of `console.log(AWS.VERSION)` in your code where `AWS = require(\"aws-sdk\");`\n * if running on Lambda, Lambda runtimes can be found [here](https://docs.aws.amazon.com/lambda/latest/dg/lambda-runtimes.html)", - "repo_full_name": "aws/aws-sdk-js", - "discussion_comments": [ - { - "comment_id": "396624797", - "repo_full_name": "aws/aws-sdk-js", - "pr_number": 3171, - "pr_file": ".github/ISSUE_TEMPLATE/---bug-report.md", - "discussion_id": "396624797", - "commented_code": "@@ -28,7 +28,8 @@ Example: v2.466.0\n * For browsers, the SDK version number is in the script tag
src=\".../aws-sdk-2.466.0.min.js\"
\n * For Node.js, get SDK version by\n * running command `npm list aws-sdk` from your root directory\n- * printing the output of `AWS.VERSION` in your code where `AWS = require(\"aws-sdk\");`\n+ * printing the output of `console.log(AWS.VERSION)` in your code where `AWS = require(\"aws-sdk\");`\n+ * if running on Lambda, Lambda runtimes can be found [here](https://docs.aws.amazon.com/lambda/latest/dg/lambda-runtimes.html) ", - "comment_created_at": "2020-03-23T17:25:09+00:00", - "comment_author": "AllanZhengYP", - "comment_body": "People can bundle their own AWS to the Lambda. You can ask something like `If running on Lambda and using SDK provided by Lambda runtime, you can find the SDK versions here`", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "297439672", - "pr_number": 2742, - "pr_file": ".github/ISSUE_TEMPLATE/---known-issues.md", - "created_at": "2019-06-26T00:00:43+00:00", - "commented_code": "---\nname: \"\\U0001F6E0 Know issues\"\nabout: Read me before opening an issue!\ntitle: ''\nassignees: ''\n\n---\n\nThis is a list of know issues and how can you work around them:\n\n1. **Exceptions contains `Unexpected key...` or `... is not a function`**\n\n AWS APIs are updating constantly, you might be calling a new API with an older version is SDK. In\n this case you should try installing the newest `aws-sdk`like: \n\n `npm install aws-sdk@latest`\n\n If you are using the SDK in AWS Lambda, you are likely using old SDK version too. AWS Lambda bundled\n SDK version is shown in [this document](https://docs.aws.amazon.com/lambda/latest/dg/lambda-runtimes.html).\n You can import the latest version of SDK by following \n [these guidelines](https://docs.aws.amazon.com/lambda/latest/dg/nodejs-create-deployment-pkg.html#nodejs-package-dependencies).\n\n Go ahead and open an issue is you are still seeing this exception.", - "repo_full_name": "aws/aws-sdk-js", - "discussion_comments": [ - { - "comment_id": "297439672", - "repo_full_name": "aws/aws-sdk-js", - "pr_number": 2742, - "pr_file": ".github/ISSUE_TEMPLATE/---known-issues.md", - "discussion_id": "297439672", - "commented_code": "@@ -0,0 +1,23 @@\n+---\n+name: \"\\U0001F6E0 Know issues\"\n+about: Read me before opening an issue!\n+title: ''\n+assignees: ''\n+\n+---\n+\n+This is a list of know issues and how can you work around them:\n+\n+1. **Exceptions contains `Unexpected key...` or `... is not a function`**\n+\n+ AWS APIs are updating constantly, you might be calling a new API with an older version is SDK. In\n+ this case you should try installing the newest `aws-sdk`like: \n+\n+ `npm install aws-sdk@latest`\n+\n+ If you are using the SDK in AWS Lambda, you are likely using old SDK version too. AWS Lambda bundled\n+ SDK version is shown in [this document](https://docs.aws.amazon.com/lambda/latest/dg/lambda-runtimes.html).\n+ You can import the latest version of SDK by following \n+ [these guidelines](https://docs.aws.amazon.com/lambda/latest/dg/nodejs-create-deployment-pkg.html#nodejs-package-dependencies).\n+\n+ Go ahead and open an issue is you are still seeing this exception.", - "comment_created_at": "2019-06-26T00:00:43+00:00", - "comment_author": "trivikr", - "comment_body": "```suggestion\r\nIf you still like to open an issue, change [ ] to [x] below:\r\n- [ ] I've confirmed that I'm using latest version of SDK, and issue is still reproducible\r\n```", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/aws-sdk-js-early-return-after-errors.json b/_reviewers/aws-sdk-js-early-return-after-errors.json new file mode 100644 index 0000000..46e8636 --- /dev/null +++ b/_reviewers/aws-sdk-js-early-return-after-errors.json @@ -0,0 +1,194 @@ +[ + { + "discussion_id": "127803453", + "pr_number": 1631, + "pr_file": "lib/request.js", + "created_at": "2017-07-17T19:34:30+00:00", + "commented_code": "* request.send();\n */\n send: function send(callback) {\n this.preventMultipleCalls();\n if (callback) {\n // append to user agent\n this.httpRequest.appendToUserAgent('callback');\n this.on('complete', function (resp) {\n callback.call(resp, resp.error, resp.data);\n });\n }\n this._didSend = true;\n this.runTo();\n\n return this.response;\n },\n\n /**\n * @api private\n */\n preventMultipleCalls: function preventMultipleCalls() {", + "repo_full_name": "aws/aws-sdk-js", + "discussion_comments": [ + { + "comment_id": "127803453", + "repo_full_name": "aws/aws-sdk-js", + "pr_number": 1631, + "pr_file": "lib/request.js", + "discussion_id": "127803453", + "commented_code": "@@ -357,19 +357,34 @@ AWS.Request = inherit({\n * request.send();\n */\n send: function send(callback) {\n+ this.preventMultipleCalls();\n if (callback) {\n // append to user agent\n this.httpRequest.appendToUserAgent('callback');\n this.on('complete', function (resp) {\n callback.call(resp, resp.error, resp.data);\n });\n }\n+ this._didSend = true;\n this.runTo();\n \n return this.response;\n },\n \n /**\n+ * @api private\n+ */\n+ preventMultipleCalls: function preventMultipleCalls() {", + "comment_created_at": "2017-07-17T19:34:30+00:00", + "comment_author": "jeskew", + "comment_body": "Maybe instead of throwing, this method could take a callback? That would allow the RequestAlreadyTriggeredError to be handled by user callbacks when `send` is called or emitted onto streams when `createReadStream` is called.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "101647511", + "pr_number": 1350, + "pr_file": "lib/services/s3.js", + "created_at": "2017-02-16T22:51:15+00:00", + "commented_code": "return request.presign(expires, callback);\n },\n\n\n /**\n * Get a pre-signed POST policy for a given operation name.\n *\n * @param params [map]\n * @option params.Bucket [String]\n * @option params.Expires [Number]\n * @option params.Conditions [Array]\n * @option params.Fields [map]\n * @param callback [Function]\n *\n * @note All fields passed in when creating presigned post data will be signed\n * as exact match conditions. Any fields that will be interpolated by S3\n * must be added to the fields hash after signing, and an appropriate\n * condition for such fields must be explicitly added to the Conditions\n * array passed to this function before signing.\n *\n * @example Presiging post data with a known key\n * var params = {\n * Bucket: 'bucket',\n * Fields: {\n * key: 'key'\n * }\n * };\n * s3.createPresignedPost(params, function(err, data) {\n * if (err) {\n * console.error('Presigning post data encountered an error', err);\n * } else {\n * console.log('The post data is', data);\n * }\n * });\n *\n * @example Presigning post data with an interpolated key\n * var params = {\n * Bucket: 'bucket',\n * Conditions: [\n * ['starts-with', '$key', 'path/to/uploads/']\n * ]\n * };\n * s3.createPresignedPost(params, function(err, data) {\n * if (err) {\n * console.error('Presigning post data encountered an error', err);\n * } else {\n * data.Fields.key = 'path/to/uploads/${filename}';\n * console.log('The post data is', data);\n * }\n * });\n *\n * @note You must ensure that you have static or previously resolved\n * credentials if you call this method synchronously (with no callback),\n * otherwise it may not properly sign the request. If you cannot guarantee\n * this (you are using an asynchronous credential provider, i.e., EC2\n * IAM roles), you should always call this method with an asynchronous\n * callback.\n */\n createPresignedPost: function createPresignedPost(params, callback) {\n if (typeof params === 'function' && callback === undefined) {\n callback = params;\n params = null;\n }\n\n params = AWS.util.copy(params || {});\n var boundParams = this.config.params || {};\n var bucket = params.Bucket || boundParams.Bucket,\n self = this,\n config = this.config,\n endpoint = AWS.util.copy(this.endpoint);\n if (!config.s3BucketEndpoint) {\n endpoint.pathname = '/' + bucket;\n }\n\n function finalizePost() {", + "repo_full_name": "aws/aws-sdk-js", + "discussion_comments": [ + { + "comment_id": "101647511", + "repo_full_name": "aws/aws-sdk-js", + "pr_number": 1350, + "pr_file": "lib/services/s3.js", + "discussion_id": "101647511", + "commented_code": "@@ -731,6 +731,173 @@ AWS.util.update(AWS.S3.prototype, {\n return request.presign(expires, callback);\n },\n \n+\n+ /**\n+ * Get a pre-signed POST policy for a given operation name.\n+ *\n+ * @param params [map]\n+ * @option params.Bucket [String]\n+ * @option params.Expires [Number]\n+ * @option params.Conditions [Array]\n+ * @option params.Fields [map]\n+ * @param callback [Function]\n+ *\n+ * @note All fields passed in when creating presigned post data will be signed\n+ * as exact match conditions. Any fields that will be interpolated by S3\n+ * must be added to the fields hash after signing, and an appropriate\n+ * condition for such fields must be explicitly added to the Conditions\n+ * array passed to this function before signing.\n+ *\n+ * @example Presiging post data with a known key\n+ * var params = {\n+ * Bucket: 'bucket',\n+ * Fields: {\n+ * key: 'key'\n+ * }\n+ * };\n+ * s3.createPresignedPost(params, function(err, data) {\n+ * if (err) {\n+ * console.error('Presigning post data encountered an error', err);\n+ * } else {\n+ * console.log('The post data is', data);\n+ * }\n+ * });\n+ *\n+ * @example Presigning post data with an interpolated key\n+ * var params = {\n+ * Bucket: 'bucket',\n+ * Conditions: [\n+ * ['starts-with', '$key', 'path/to/uploads/']\n+ * ]\n+ * };\n+ * s3.createPresignedPost(params, function(err, data) {\n+ * if (err) {\n+ * console.error('Presigning post data encountered an error', err);\n+ * } else {\n+ * data.Fields.key = 'path/to/uploads/${filename}';\n+ * console.log('The post data is', data);\n+ * }\n+ * });\n+ *\n+ * @note You must ensure that you have static or previously resolved\n+ * credentials if you call this method synchronously (with no callback),\n+ * otherwise it may not properly sign the request. If you cannot guarantee\n+ * this (you are using an asynchronous credential provider, i.e., EC2\n+ * IAM roles), you should always call this method with an asynchronous\n+ * callback.\n+ */\n+ createPresignedPost: function createPresignedPost(params, callback) {\n+ if (typeof params === 'function' && callback === undefined) {\n+ callback = params;\n+ params = null;\n+ }\n+\n+ params = AWS.util.copy(params || {});\n+ var boundParams = this.config.params || {};\n+ var bucket = params.Bucket || boundParams.Bucket,\n+ self = this,\n+ config = this.config,\n+ endpoint = AWS.util.copy(this.endpoint);\n+ if (!config.s3BucketEndpoint) {\n+ endpoint.pathname = '/' + bucket;\n+ }\n+\n+ function finalizePost() {", + "comment_created_at": "2017-02-16T22:51:15+00:00", + "comment_author": "chrisradek", + "comment_body": "It doesn't seem like there's a need for this function. Couldn't you store the return value directly in an object, then pass that to the callback/return it?", + "pr_file_module": null + }, + { + "comment_id": "101667755", + "repo_full_name": "aws/aws-sdk-js", + "pr_number": 1350, + "pr_file": "lib/services/s3.js", + "discussion_id": "101647511", + "commented_code": "@@ -731,6 +731,173 @@ AWS.util.update(AWS.S3.prototype, {\n return request.presign(expires, callback);\n },\n \n+\n+ /**\n+ * Get a pre-signed POST policy for a given operation name.\n+ *\n+ * @param params [map]\n+ * @option params.Bucket [String]\n+ * @option params.Expires [Number]\n+ * @option params.Conditions [Array]\n+ * @option params.Fields [map]\n+ * @param callback [Function]\n+ *\n+ * @note All fields passed in when creating presigned post data will be signed\n+ * as exact match conditions. Any fields that will be interpolated by S3\n+ * must be added to the fields hash after signing, and an appropriate\n+ * condition for such fields must be explicitly added to the Conditions\n+ * array passed to this function before signing.\n+ *\n+ * @example Presiging post data with a known key\n+ * var params = {\n+ * Bucket: 'bucket',\n+ * Fields: {\n+ * key: 'key'\n+ * }\n+ * };\n+ * s3.createPresignedPost(params, function(err, data) {\n+ * if (err) {\n+ * console.error('Presigning post data encountered an error', err);\n+ * } else {\n+ * console.log('The post data is', data);\n+ * }\n+ * });\n+ *\n+ * @example Presigning post data with an interpolated key\n+ * var params = {\n+ * Bucket: 'bucket',\n+ * Conditions: [\n+ * ['starts-with', '$key', 'path/to/uploads/']\n+ * ]\n+ * };\n+ * s3.createPresignedPost(params, function(err, data) {\n+ * if (err) {\n+ * console.error('Presigning post data encountered an error', err);\n+ * } else {\n+ * data.Fields.key = 'path/to/uploads/${filename}';\n+ * console.log('The post data is', data);\n+ * }\n+ * });\n+ *\n+ * @note You must ensure that you have static or previously resolved\n+ * credentials if you call this method synchronously (with no callback),\n+ * otherwise it may not properly sign the request. If you cannot guarantee\n+ * this (you are using an asynchronous credential provider, i.e., EC2\n+ * IAM roles), you should always call this method with an asynchronous\n+ * callback.\n+ */\n+ createPresignedPost: function createPresignedPost(params, callback) {\n+ if (typeof params === 'function' && callback === undefined) {\n+ callback = params;\n+ params = null;\n+ }\n+\n+ params = AWS.util.copy(params || {});\n+ var boundParams = this.config.params || {};\n+ var bucket = params.Bucket || boundParams.Bucket,\n+ self = this,\n+ config = this.config,\n+ endpoint = AWS.util.copy(this.endpoint);\n+ if (!config.s3BucketEndpoint) {\n+ endpoint.pathname = '/' + bucket;\n+ }\n+\n+ function finalizePost() {", + "comment_created_at": "2017-02-17T01:17:01+00:00", + "comment_author": "jeskew", + "comment_body": "This function should only be invoked once credentials have been resolved, so it needs to be called from the callback.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "115840969", + "pr_number": 1507, + "pr_file": "test/request.spec.js", + "created_at": "2017-05-10T20:14:30+00:00", + "commented_code": "s = request.createReadStream();\n s.on('error', function(e) {\n error = e;\n expect(error).to.be[\"null\"];\n expect(error).to.not.be[\"null\"];\n });\n s.on('data', function(c) {\n return data += c.toString();\n });\n return s.on('end', function() {\n expect(error).to.be[\"null\"];\n expect(data).to.equal('FOOBARBAZQU');\n expect(error).to.not.be[\"null\"];", + "repo_full_name": "aws/aws-sdk-js", + "discussion_comments": [ + { + "comment_id": "115840969", + "repo_full_name": "aws/aws-sdk-js", + "pr_number": 1507, + "pr_file": "test/request.spec.js", + "discussion_id": "115840969", + "commented_code": "@@ -758,14 +758,13 @@ describe('AWS.Request', function() {\n s = request.createReadStream();\n s.on('error', function(e) {\n error = e;\n- expect(error).to.be[\"null\"];\n+ expect(error).to.not.be[\"null\"];\n });\n s.on('data', function(c) {\n return data += c.toString();\n });\n return s.on('end', function() {\n- expect(error).to.be[\"null\"];\n- expect(data).to.equal('FOOBARBAZQU');\n+ expect(error).to.not.be[\"null\"];", + "comment_created_at": "2017-05-10T20:14:30+00:00", + "comment_author": "jeskew", + "comment_body": "Is this a breaking change? Previously, users would get up to `content-length` bytes and no more if the returned stream exceeded that length; now, the stream will emit an error event in that case. Was there ever a case where `content-length` would be wrong but the user was OK with receiving a truncated stream?", + "pr_file_module": null + }, + { + "comment_id": "115843940", + "repo_full_name": "aws/aws-sdk-js", + "pr_number": 1507, + "pr_file": "test/request.spec.js", + "discussion_id": "115840969", + "commented_code": "@@ -758,14 +758,13 @@ describe('AWS.Request', function() {\n s = request.createReadStream();\n s.on('error', function(e) {\n error = e;\n- expect(error).to.be[\"null\"];\n+ expect(error).to.not.be[\"null\"];\n });\n s.on('data', function(c) {\n return data += c.toString();\n });\n return s.on('end', function() {\n- expect(error).to.be[\"null\"];\n- expect(data).to.equal('FOOBARBAZQU');\n+ expect(error).to.not.be[\"null\"];", + "comment_created_at": "2017-05-10T20:27:58+00:00", + "comment_author": "chrisradek", + "comment_body": "I don't think giving the user a truncated stream is really ok. I think this test was checking the behavior the SDK exhibited, rather than what was intended.\r\n\r\n[Here](https://github.com/aws/aws-sdk-js/blob/master/lib/request.js#L606-L611), there's a check to see if the incoming data matched the content-length. It should be throwing an error when the content-length is less than the data received. When I was testing though, I discovered that when the body is larger than the content-length, node throws a `ParseError`. However, because we were swallowing these errors (due to already receiving response headers), and node.js still gave us access to the body up to the content-length, it appeared as though we could never detect when data streamed in exceeded the expected amount.", + "pr_file_module": null + }, + { + "comment_id": "115852151", + "repo_full_name": "aws/aws-sdk-js", + "pr_number": 1507, + "pr_file": "test/request.spec.js", + "discussion_id": "115840969", + "commented_code": "@@ -758,14 +758,13 @@ describe('AWS.Request', function() {\n s = request.createReadStream();\n s.on('error', function(e) {\n error = e;\n- expect(error).to.be[\"null\"];\n+ expect(error).to.not.be[\"null\"];\n });\n s.on('data', function(c) {\n return data += c.toString();\n });\n return s.on('end', function() {\n- expect(error).to.be[\"null\"];\n- expect(data).to.equal('FOOBARBAZQU');\n+ expect(error).to.not.be[\"null\"];", + "comment_created_at": "2017-05-10T21:02:51+00:00", + "comment_author": "jeskew", + "comment_body": "I can't imagine that the data received up to `content-length` would be usable in most cases... you would end up with an unparsable partial XML or JSON document unless the service was returning a byte stream. I think the change in this PR is correct, but we should be on the lookout for issues where users were receiving partial byte streams and are now encountering stream errors.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "181819271", + "pr_number": 1956, + "pr_file": "lib/http/response-validator/integrityChecker.js", + "created_at": "2018-04-16T17:14:35+00:00", + "commented_code": "this.providedChecksum.length !== CHECKSUM_BYTE_LEN ||\n this.hash.digest('base64') !== this.providedChecksum.toString('base64')\n ) {\n this.emit('error', AWS.util.error(\n callback( AWS.util.error(\n new Error('Response fails integrity check.'),\n { code: 'ResponseChecksumMismatch' })\n );\n )", + "repo_full_name": "aws/aws-sdk-js", + "discussion_comments": [ + { + "comment_id": "181819271", + "repo_full_name": "aws/aws-sdk-js", + "pr_number": 1956, + "pr_file": "lib/http/response-validator/integrityChecker.js", + "discussion_id": "181819271", + "commented_code": "@@ -53,10 +48,10 @@ IntegrityCheckerStream.prototype._flush = function(callback) {\n this.providedChecksum.length !== CHECKSUM_BYTE_LEN ||\n this.hash.digest('base64') !== this.providedChecksum.toString('base64')\n ) {\n- this.emit('error', AWS.util.error(\n+ callback( AWS.util.error(\n new Error('Response fails integrity check.'),\n { code: 'ResponseChecksumMismatch' })\n- );\n+ )", + "comment_created_at": "2018-04-16T17:14:35+00:00", + "comment_author": "chrisradek", + "comment_body": "Call `return` either on the same line as `callback` or right under. Right now `callback` might be triggered twice: once with an error and then once without an error a couple lines below.", + "pr_file_module": null + }, + { + "comment_id": "181866700", + "repo_full_name": "aws/aws-sdk-js", + "pr_number": 1956, + "pr_file": "lib/http/response-validator/integrityChecker.js", + "discussion_id": "181819271", + "commented_code": "@@ -53,10 +48,10 @@ IntegrityCheckerStream.prototype._flush = function(callback) {\n this.providedChecksum.length !== CHECKSUM_BYTE_LEN ||\n this.hash.digest('base64') !== this.providedChecksum.toString('base64')\n ) {\n- this.emit('error', AWS.util.error(\n+ callback( AWS.util.error(\n new Error('Response fails integrity check.'),\n { code: 'ResponseChecksumMismatch' })\n- );\n+ )", + "comment_created_at": "2018-04-16T20:00:48+00:00", + "comment_author": "AllanZhengYP", + "comment_body": "Here calling callback twice won't actually write anything to stream. I will fix it though", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "265312860", + "pr_number": 2559, + "pr_file": "lib/credentials/shared_ini_file_credentials.js", + "created_at": "2019-03-13T20:15:45+00:00", + "commented_code": "* @param profile [map] credentials profile\n * @throws SharedIniFileCredentialsProviderFailure\n */\n loadViaCredentialProcess: function loadCredentialProcess(profile, callback) {\n try {\n var output = proc.execSync(profile['credential_process']);\n var credData = JSON.parse(output);\n if (parseInt(credData.Version) !== 1) {\n throw AWS.util.error(\n Error('credential_process does not return Version == 1 for profile ' + this.profile),\n { code: 'SharedIniFileCredentialsProviderFailure'}\n loadViaCredentialProcess: function loadViaCredentialProcess(profile, callback) {\n proc.exec(profile['credential_process'], function(err,stdOut, stdErr) {\n if (err) {\n callback(err, null);\n } else if (stdErr) {\n callback(stdErr, null);", + "repo_full_name": "aws/aws-sdk-js", + "discussion_comments": [ + { + "comment_id": "265312860", + "repo_full_name": "aws/aws-sdk-js", + "pr_number": 2559, + "pr_file": "lib/credentials/shared_ini_file_credentials.js", + "discussion_id": "265312860", + "commented_code": "@@ -187,25 +200,33 @@ AWS.SharedIniFileCredentials = AWS.util.inherit(AWS.Credentials, {\n * @param profile [map] credentials profile\n * @throws SharedIniFileCredentialsProviderFailure\n */\n- loadViaCredentialProcess: function loadCredentialProcess(profile, callback) {\n- try {\n- var output = proc.execSync(profile['credential_process']);\n- var credData = JSON.parse(output);\n- if (parseInt(credData.Version) !== 1) {\n- throw AWS.util.error(\n- Error('credential_process does not return Version == 1 for profile ' + this.profile),\n- { code: 'SharedIniFileCredentialsProviderFailure'}\n+ loadViaCredentialProcess: function loadViaCredentialProcess(profile, callback) {\n+ proc.exec(profile['credential_process'], function(err,stdOut, stdErr) {\n+ if (err) {\n+ callback(err, null);\n+ } else if (stdErr) {\n+ callback(stdErr, null);", + "comment_created_at": "2019-03-13T20:15:45+00:00", + "comment_author": "jstewmon", + "comment_body": "stderr is often used for ancillary information (like a deprecation warning) even when the command succeeds, so I would advise against treating a truthy value as a failure.\r\n\r\nFWIW, the [botocore implementation] ignores stderr.\r\n\r\nIf this is retained, I suggest passing `AWS.util.error(new Error(stdErr), {code: 'SharedIniCredentialsProviderFailue'})`, so that the callback receives a normalized `Error`, not a `string`.\r\n\r\n[botocore implementation]: https://github.com/boto/botocore/blob/8d3ea0e61473fba43774eb3c74e1b22995ee7370/botocore/credentials.py#L804-L833", + "pr_file_module": null + }, + { + "comment_id": "265324515", + "repo_full_name": "aws/aws-sdk-js", + "pr_number": 2559, + "pr_file": "lib/credentials/shared_ini_file_credentials.js", + "discussion_id": "265312860", + "commented_code": "@@ -187,25 +200,33 @@ AWS.SharedIniFileCredentials = AWS.util.inherit(AWS.Credentials, {\n * @param profile [map] credentials profile\n * @throws SharedIniFileCredentialsProviderFailure\n */\n- loadViaCredentialProcess: function loadCredentialProcess(profile, callback) {\n- try {\n- var output = proc.execSync(profile['credential_process']);\n- var credData = JSON.parse(output);\n- if (parseInt(credData.Version) !== 1) {\n- throw AWS.util.error(\n- Error('credential_process does not return Version == 1 for profile ' + this.profile),\n- { code: 'SharedIniFileCredentialsProviderFailure'}\n+ loadViaCredentialProcess: function loadViaCredentialProcess(profile, callback) {\n+ proc.exec(profile['credential_process'], function(err,stdOut, stdErr) {\n+ if (err) {\n+ callback(err, null);\n+ } else if (stdErr) {\n+ callback(stdErr, null);", + "comment_created_at": "2019-03-13T20:47:32+00:00", + "comment_author": "srchase", + "comment_body": "Followed the botocore implementation and ignored stderr. thanks!", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "268204892", + "pr_number": 2559, + "pr_file": "lib/credentials/process_credentials.js", + "created_at": "2019-03-22T14:56:10+00:00", + "commented_code": "var AWS = require('../core');\nvar proc = require('child_process');\nvar iniLoader = AWS.util.iniLoader;\n\n/**\n * Represents credentials loaded from shared credentials file\n * (defaulting to ~/.aws/credentials or defined by the\n * `AWS_SHARED_CREDENTIALS_FILE` environment variable).\n *\n * ## Using process credentials\n *\n * The credentials file can specify a credential provider that executes\n * a given process and attempts to read its stdout to recieve a JSON payload\n * containing the credentials:\n *\n * [default]\n * credential_process = /usr/bin/credential_proc\n *\n * Automatically handles refreshing credentials if an Expiration time is\n * provided in the credentials payload. Credentials supplied in the same profile\n * will take precedence over the credential_process.\n *\n * Sourcing credentials from an external process can potentially be dangerous,\n * so proceed with caution. Other credential providers should be preferred if\n * at all possible. If using this option, you should make sure that the shared\n * credentials file is as locked down as possible using security best practices\n * for your operating system.\n *\n * ## Using custom profiles\n *\n * The SDK supports loading credentials for separate profiles. This can be done\n * in two ways:\n *\n * 1. Set the `AWS_PROFILE` environment variable in your process prior to\n * loading the SDK.\n * 2. Directly load the AWS.ProcessCredentials provider:\n *\n * ```javascript\n * var creds = new AWS.ProcessCredentials({profile: 'myprofile'});\n * AWS.config.credentials = creds;\n * ```\n *\n * @!macro nobrowser\n */\nAWS.ProcessCredentials = AWS.util.inherit(AWS.Credentials, {\n /**\n * Creates a new ProcessCredentials object.\n *\n * @param options [map] a set of options\n * @option options profile [String] (AWS_PROFILE env var or 'default')\n * the name of the profile to load.\n * @option options filename [String] ('~/.aws/credentials' or defined by\n * AWS_SHARED_CREDENTIALS_FILE process env var)\n * the filename to use when loading credentials.\n * @option options callback [Function] (err) Credentials are eagerly loaded\n * by the constructor. When the callback is called with no error, the\n * credentials have been loaded successfully.\n * @option options httpOptions [map] A set of options to pass to the low-level\n * HTTP request. Currently supported options are:\n * * **proxy** [String] — the URL to proxy requests through\n * * **agent** [http.Agent, https.Agent] — the Agent object to perform\n * HTTP requests with. Used for connection pooling. Defaults to the global\n * agent (`http.globalAgent`) for non-SSL connections. Note that for\n * SSL connections, a special Agent object is used in order to enable\n * peer certificate verification. This feature is only available in the\n * Node.js environment.\n * * **connectTimeout** [Integer] — Sets the socket to timeout after\n * failing to establish a connection with the server after\n * `connectTimeout` milliseconds. This timeout has no effect once a socket\n * connection has been established.\n * * **timeout** [Integer] — Sets the socket to timeout after timeout\n * milliseconds of inactivity on the socket. Defaults to two minutes\n * (120000).\n * * **xhrAsync** [Boolean] — Whether the SDK will send asynchronous\n * HTTP requests. Used in the browser environment only. Set to false to\n * send requests synchronously. Defaults to true (async on).\n * * **xhrWithCredentials** [Boolean] — Sets the \"withCredentials\"\n * property of an XMLHttpRequest object. Used in the browser environment\n * only. Defaults to false.\n */\n constructor: function ProcessCredentials(options) {\n AWS.Credentials.call(this);\n\n options = options || {};\n\n this.filename = options.filename;\n this.profile = options.profile || process.env.AWS_PROFILE || AWS.util.defaultProfile;\n this.httpOptions = options.httpOptions || null;\n this.get(options.callback || AWS.util.fn.noop);\n },\n\n /**\n * @api private\n */\n load: function load(callback) {\n var self = this;\n try {\n var profiles = {};\n var profilesFromConfig = {};\n if (process.env[AWS.util.configOptInEnv]) {\n var profilesFromConfig = iniLoader.loadFrom({\n isConfig: true,\n filename: process.env[AWS.util.sharedConfigFileEnv]\n });\n }\n var profilesFromCreds = iniLoader.loadFrom({\n filename: this.filename ||\n (process.env[AWS.util.configOptInEnv] && process.env[AWS.util.sharedCredentialsFileEnv])\n });\n for (var i = 0, profileNames = Object.keys(profilesFromCreds); i < profileNames.length; i++) {\n profiles[profileNames[i]] = profilesFromCreds[profileNames[i]];\n }\n // load after profilesFromCreds to prefer profilesFromConfig\n for (var i = 0, profileNames = Object.keys(profilesFromConfig); i < profileNames.length; i++) {\n profiles[profileNames[i]] = profilesFromConfig[profileNames[i]];\n }\n var profile = profiles[this.profile] || {};\n\n if (Object.keys(profile).length === 0) {\n throw AWS.util.error(\n new Error('Profile ' + this.profile + ' not found'),\n { code: 'ProcessCredentialsProviderFailure' }\n );\n }\n\n if (profile['credential_process']) {\n this.loadViaCredentialProcess(profile, function(err, data) {\n if (err) {\n callback(err, null);\n } else {\n self.expired = false;\n self.accessKeyId = data.AccessKeyId;\n self.secretAccessKey = data.SecretAccessKey;\n self.sessionToken = data.SessionToken;\n if (data.Expiration) {\n self.expireTime = new Date(data.Expiration);\n }\n callback(null);\n }\n });\n } else {\n throw AWS.util.error(\n new Error('Profile ' + this.profile + ' did not include credential process'),\n { code: 'ProcessCredentialsProviderFailure' }\n );\n }\n this.expired = false;\n if (this.accessKeyId && this.secretAccessKey) {\n callback(null);\n }\n } catch (err) {\n callback(err);\n }\n },\n\n /**\n * Executes the credential_process and retrieves\n * credentials from the output\n * @api private\n * @param profile [map] credentials profile\n * @throws SharedIniFileCredentialsProviderFailure\n */\n loadViaCredentialProcess: function loadViaCredentialProcess(profile, callback) {\n proc.exec(profile['credential_process'], function(err,stdOut, stdErr) {\n try {\n var credData = JSON.parse(stdOut);\n if (credData.Expiration) {\n var currentTime = AWS.util.date.getDate();\n var expireTime = new Date(credData.Expiration);\n if (expireTime < currentTime) {\n err = AWS.util.error(\n new Error('credential_process returned expired credentials'),\n { code: 'ProcessCredentialsProviderFailure' }\n );\n }\n } else if (credData.Version !== 1) {\n err = AWS.util.error(\n new Error('credential_process does not return Version == 1'),\n { code: 'ProcessCredentialsProviderFailure' }\n );\n }\n if (err) {", + "repo_full_name": "aws/aws-sdk-js", + "discussion_comments": [ + { + "comment_id": "268204892", + "repo_full_name": "aws/aws-sdk-js", + "pr_number": 2559, + "pr_file": "lib/credentials/process_credentials.js", + "discussion_id": "268204892", + "commented_code": "@@ -0,0 +1,206 @@\n+var AWS = require('../core');\n+var proc = require('child_process');\n+var iniLoader = AWS.util.iniLoader;\n+\n+/**\n+ * Represents credentials loaded from shared credentials file\n+ * (defaulting to ~/.aws/credentials or defined by the\n+ * `AWS_SHARED_CREDENTIALS_FILE` environment variable).\n+ *\n+ * ## Using process credentials\n+ *\n+ * The credentials file can specify a credential provider that executes\n+ * a given process and attempts to read its stdout to recieve a JSON payload\n+ * containing the credentials:\n+ *\n+ * [default]\n+ * credential_process = /usr/bin/credential_proc\n+ *\n+ * Automatically handles refreshing credentials if an Expiration time is\n+ * provided in the credentials payload. Credentials supplied in the same profile\n+ * will take precedence over the credential_process.\n+ *\n+ * Sourcing credentials from an external process can potentially be dangerous,\n+ * so proceed with caution. Other credential providers should be preferred if\n+ * at all possible. If using this option, you should make sure that the shared\n+ * credentials file is as locked down as possible using security best practices\n+ * for your operating system.\n+ *\n+ * ## Using custom profiles\n+ *\n+ * The SDK supports loading credentials for separate profiles. This can be done\n+ * in two ways:\n+ *\n+ * 1. Set the `AWS_PROFILE` environment variable in your process prior to\n+ * loading the SDK.\n+ * 2. Directly load the AWS.ProcessCredentials provider:\n+ *\n+ * ```javascript\n+ * var creds = new AWS.ProcessCredentials({profile: 'myprofile'});\n+ * AWS.config.credentials = creds;\n+ * ```\n+ *\n+ * @!macro nobrowser\n+ */\n+AWS.ProcessCredentials = AWS.util.inherit(AWS.Credentials, {\n+ /**\n+ * Creates a new ProcessCredentials object.\n+ *\n+ * @param options [map] a set of options\n+ * @option options profile [String] (AWS_PROFILE env var or 'default')\n+ * the name of the profile to load.\n+ * @option options filename [String] ('~/.aws/credentials' or defined by\n+ * AWS_SHARED_CREDENTIALS_FILE process env var)\n+ * the filename to use when loading credentials.\n+ * @option options callback [Function] (err) Credentials are eagerly loaded\n+ * by the constructor. When the callback is called with no error, the\n+ * credentials have been loaded successfully.\n+ * @option options httpOptions [map] A set of options to pass to the low-level\n+ * HTTP request. Currently supported options are:\n+ * * **proxy** [String] — the URL to proxy requests through\n+ * * **agent** [http.Agent, https.Agent] — the Agent object to perform\n+ * HTTP requests with. Used for connection pooling. Defaults to the global\n+ * agent (`http.globalAgent`) for non-SSL connections. Note that for\n+ * SSL connections, a special Agent object is used in order to enable\n+ * peer certificate verification. This feature is only available in the\n+ * Node.js environment.\n+ * * **connectTimeout** [Integer] — Sets the socket to timeout after\n+ * failing to establish a connection with the server after\n+ * `connectTimeout` milliseconds. This timeout has no effect once a socket\n+ * connection has been established.\n+ * * **timeout** [Integer] — Sets the socket to timeout after timeout\n+ * milliseconds of inactivity on the socket. Defaults to two minutes\n+ * (120000).\n+ * * **xhrAsync** [Boolean] — Whether the SDK will send asynchronous\n+ * HTTP requests. Used in the browser environment only. Set to false to\n+ * send requests synchronously. Defaults to true (async on).\n+ * * **xhrWithCredentials** [Boolean] — Sets the \"withCredentials\"\n+ * property of an XMLHttpRequest object. Used in the browser environment\n+ * only. Defaults to false.\n+ */\n+ constructor: function ProcessCredentials(options) {\n+ AWS.Credentials.call(this);\n+\n+ options = options || {};\n+\n+ this.filename = options.filename;\n+ this.profile = options.profile || process.env.AWS_PROFILE || AWS.util.defaultProfile;\n+ this.httpOptions = options.httpOptions || null;\n+ this.get(options.callback || AWS.util.fn.noop);\n+ },\n+\n+ /**\n+ * @api private\n+ */\n+ load: function load(callback) {\n+ var self = this;\n+ try {\n+ var profiles = {};\n+ var profilesFromConfig = {};\n+ if (process.env[AWS.util.configOptInEnv]) {\n+ var profilesFromConfig = iniLoader.loadFrom({\n+ isConfig: true,\n+ filename: process.env[AWS.util.sharedConfigFileEnv]\n+ });\n+ }\n+ var profilesFromCreds = iniLoader.loadFrom({\n+ filename: this.filename ||\n+ (process.env[AWS.util.configOptInEnv] && process.env[AWS.util.sharedCredentialsFileEnv])\n+ });\n+ for (var i = 0, profileNames = Object.keys(profilesFromCreds); i < profileNames.length; i++) {\n+ profiles[profileNames[i]] = profilesFromCreds[profileNames[i]];\n+ }\n+ // load after profilesFromCreds to prefer profilesFromConfig\n+ for (var i = 0, profileNames = Object.keys(profilesFromConfig); i < profileNames.length; i++) {\n+ profiles[profileNames[i]] = profilesFromConfig[profileNames[i]];\n+ }\n+ var profile = profiles[this.profile] || {};\n+\n+ if (Object.keys(profile).length === 0) {\n+ throw AWS.util.error(\n+ new Error('Profile ' + this.profile + ' not found'),\n+ { code: 'ProcessCredentialsProviderFailure' }\n+ );\n+ }\n+\n+ if (profile['credential_process']) {\n+ this.loadViaCredentialProcess(profile, function(err, data) {\n+ if (err) {\n+ callback(err, null);\n+ } else {\n+ self.expired = false;\n+ self.accessKeyId = data.AccessKeyId;\n+ self.secretAccessKey = data.SecretAccessKey;\n+ self.sessionToken = data.SessionToken;\n+ if (data.Expiration) {\n+ self.expireTime = new Date(data.Expiration);\n+ }\n+ callback(null);\n+ }\n+ });\n+ } else {\n+ throw AWS.util.error(\n+ new Error('Profile ' + this.profile + ' did not include credential process'),\n+ { code: 'ProcessCredentialsProviderFailure' }\n+ );\n+ }\n+ this.expired = false;\n+ if (this.accessKeyId && this.secretAccessKey) {\n+ callback(null);\n+ }\n+ } catch (err) {\n+ callback(err);\n+ }\n+ },\n+\n+ /**\n+ * Executes the credential_process and retrieves\n+ * credentials from the output\n+ * @api private\n+ * @param profile [map] credentials profile\n+ * @throws SharedIniFileCredentialsProviderFailure\n+ */\n+ loadViaCredentialProcess: function loadViaCredentialProcess(profile, callback) {\n+ proc.exec(profile['credential_process'], function(err,stdOut, stdErr) {\n+ try {\n+ var credData = JSON.parse(stdOut);\n+ if (credData.Expiration) {\n+ var currentTime = AWS.util.date.getDate();\n+ var expireTime = new Date(credData.Expiration);\n+ if (expireTime < currentTime) {\n+ err = AWS.util.error(\n+ new Error('credential_process returned expired credentials'),\n+ { code: 'ProcessCredentialsProviderFailure' }\n+ );\n+ }\n+ } else if (credData.Version !== 1) {\n+ err = AWS.util.error(\n+ new Error('credential_process does not return Version == 1'),\n+ { code: 'ProcessCredentialsProviderFailure' }\n+ );\n+ }\n+ if (err) {", + "comment_created_at": "2019-03-22T14:56:10+00:00", + "comment_author": "jstewmon", + "comment_body": "I think `err` should be checked before trying to process `stdOut`, since the condition is always checked.", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/aws-sdk-js-early-return-after-errors.md b/_reviewers/aws-sdk-js-early-return-after-errors.md index 12b0143..9aceac5 100644 --- a/_reviewers/aws-sdk-js-early-return-after-errors.md +++ b/_reviewers/aws-sdk-js-early-return-after-errors.md @@ -74,199 +74,3 @@ function createPresignedPost(params, callback) { ``` This pattern creates clear separation between error and success paths, making code more maintainable and preventing hard-to-debug issues caused by multiple callback invocations or unexpected execution after errors. - - -[ - { - "discussion_id": "127803453", - "pr_number": 1631, - "pr_file": "lib/request.js", - "created_at": "2017-07-17T19:34:30+00:00", - "commented_code": "* request.send();\n */\n send: function send(callback) {\n this.preventMultipleCalls();\n if (callback) {\n // append to user agent\n this.httpRequest.appendToUserAgent('callback');\n this.on('complete', function (resp) {\n callback.call(resp, resp.error, resp.data);\n });\n }\n this._didSend = true;\n this.runTo();\n\n return this.response;\n },\n\n /**\n * @api private\n */\n preventMultipleCalls: function preventMultipleCalls() {", - "repo_full_name": "aws/aws-sdk-js", - "discussion_comments": [ - { - "comment_id": "127803453", - "repo_full_name": "aws/aws-sdk-js", - "pr_number": 1631, - "pr_file": "lib/request.js", - "discussion_id": "127803453", - "commented_code": "@@ -357,19 +357,34 @@ AWS.Request = inherit({\n * request.send();\n */\n send: function send(callback) {\n+ this.preventMultipleCalls();\n if (callback) {\n // append to user agent\n this.httpRequest.appendToUserAgent('callback');\n this.on('complete', function (resp) {\n callback.call(resp, resp.error, resp.data);\n });\n }\n+ this._didSend = true;\n this.runTo();\n \n return this.response;\n },\n \n /**\n+ * @api private\n+ */\n+ preventMultipleCalls: function preventMultipleCalls() {", - "comment_created_at": "2017-07-17T19:34:30+00:00", - "comment_author": "jeskew", - "comment_body": "Maybe instead of throwing, this method could take a callback? That would allow the RequestAlreadyTriggeredError to be handled by user callbacks when `send` is called or emitted onto streams when `createReadStream` is called.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "101647511", - "pr_number": 1350, - "pr_file": "lib/services/s3.js", - "created_at": "2017-02-16T22:51:15+00:00", - "commented_code": "return request.presign(expires, callback);\n },\n\n\n /**\n * Get a pre-signed POST policy for a given operation name.\n *\n * @param params [map]\n * @option params.Bucket [String]\n * @option params.Expires [Number]\n * @option params.Conditions [Array]\n * @option params.Fields [map]\n * @param callback [Function]\n *\n * @note All fields passed in when creating presigned post data will be signed\n * as exact match conditions. Any fields that will be interpolated by S3\n * must be added to the fields hash after signing, and an appropriate\n * condition for such fields must be explicitly added to the Conditions\n * array passed to this function before signing.\n *\n * @example Presiging post data with a known key\n * var params = {\n * Bucket: 'bucket',\n * Fields: {\n * key: 'key'\n * }\n * };\n * s3.createPresignedPost(params, function(err, data) {\n * if (err) {\n * console.error('Presigning post data encountered an error', err);\n * } else {\n * console.log('The post data is', data);\n * }\n * });\n *\n * @example Presigning post data with an interpolated key\n * var params = {\n * Bucket: 'bucket',\n * Conditions: [\n * ['starts-with', '$key', 'path/to/uploads/']\n * ]\n * };\n * s3.createPresignedPost(params, function(err, data) {\n * if (err) {\n * console.error('Presigning post data encountered an error', err);\n * } else {\n * data.Fields.key = 'path/to/uploads/${filename}';\n * console.log('The post data is', data);\n * }\n * });\n *\n * @note You must ensure that you have static or previously resolved\n * credentials if you call this method synchronously (with no callback),\n * otherwise it may not properly sign the request. If you cannot guarantee\n * this (you are using an asynchronous credential provider, i.e., EC2\n * IAM roles), you should always call this method with an asynchronous\n * callback.\n */\n createPresignedPost: function createPresignedPost(params, callback) {\n if (typeof params === 'function' && callback === undefined) {\n callback = params;\n params = null;\n }\n\n params = AWS.util.copy(params || {});\n var boundParams = this.config.params || {};\n var bucket = params.Bucket || boundParams.Bucket,\n self = this,\n config = this.config,\n endpoint = AWS.util.copy(this.endpoint);\n if (!config.s3BucketEndpoint) {\n endpoint.pathname = '/' + bucket;\n }\n\n function finalizePost() {", - "repo_full_name": "aws/aws-sdk-js", - "discussion_comments": [ - { - "comment_id": "101647511", - "repo_full_name": "aws/aws-sdk-js", - "pr_number": 1350, - "pr_file": "lib/services/s3.js", - "discussion_id": "101647511", - "commented_code": "@@ -731,6 +731,173 @@ AWS.util.update(AWS.S3.prototype, {\n return request.presign(expires, callback);\n },\n \n+\n+ /**\n+ * Get a pre-signed POST policy for a given operation name.\n+ *\n+ * @param params [map]\n+ * @option params.Bucket [String]\n+ * @option params.Expires [Number]\n+ * @option params.Conditions [Array]\n+ * @option params.Fields [map]\n+ * @param callback [Function]\n+ *\n+ * @note All fields passed in when creating presigned post data will be signed\n+ * as exact match conditions. Any fields that will be interpolated by S3\n+ * must be added to the fields hash after signing, and an appropriate\n+ * condition for such fields must be explicitly added to the Conditions\n+ * array passed to this function before signing.\n+ *\n+ * @example Presiging post data with a known key\n+ * var params = {\n+ * Bucket: 'bucket',\n+ * Fields: {\n+ * key: 'key'\n+ * }\n+ * };\n+ * s3.createPresignedPost(params, function(err, data) {\n+ * if (err) {\n+ * console.error('Presigning post data encountered an error', err);\n+ * } else {\n+ * console.log('The post data is', data);\n+ * }\n+ * });\n+ *\n+ * @example Presigning post data with an interpolated key\n+ * var params = {\n+ * Bucket: 'bucket',\n+ * Conditions: [\n+ * ['starts-with', '$key', 'path/to/uploads/']\n+ * ]\n+ * };\n+ * s3.createPresignedPost(params, function(err, data) {\n+ * if (err) {\n+ * console.error('Presigning post data encountered an error', err);\n+ * } else {\n+ * data.Fields.key = 'path/to/uploads/${filename}';\n+ * console.log('The post data is', data);\n+ * }\n+ * });\n+ *\n+ * @note You must ensure that you have static or previously resolved\n+ * credentials if you call this method synchronously (with no callback),\n+ * otherwise it may not properly sign the request. If you cannot guarantee\n+ * this (you are using an asynchronous credential provider, i.e., EC2\n+ * IAM roles), you should always call this method with an asynchronous\n+ * callback.\n+ */\n+ createPresignedPost: function createPresignedPost(params, callback) {\n+ if (typeof params === 'function' && callback === undefined) {\n+ callback = params;\n+ params = null;\n+ }\n+\n+ params = AWS.util.copy(params || {});\n+ var boundParams = this.config.params || {};\n+ var bucket = params.Bucket || boundParams.Bucket,\n+ self = this,\n+ config = this.config,\n+ endpoint = AWS.util.copy(this.endpoint);\n+ if (!config.s3BucketEndpoint) {\n+ endpoint.pathname = '/' + bucket;\n+ }\n+\n+ function finalizePost() {", - "comment_created_at": "2017-02-16T22:51:15+00:00", - "comment_author": "chrisradek", - "comment_body": "It doesn't seem like there's a need for this function. Couldn't you store the return value directly in an object, then pass that to the callback/return it?", - "pr_file_module": null - }, - { - "comment_id": "101667755", - "repo_full_name": "aws/aws-sdk-js", - "pr_number": 1350, - "pr_file": "lib/services/s3.js", - "discussion_id": "101647511", - "commented_code": "@@ -731,6 +731,173 @@ AWS.util.update(AWS.S3.prototype, {\n return request.presign(expires, callback);\n },\n \n+\n+ /**\n+ * Get a pre-signed POST policy for a given operation name.\n+ *\n+ * @param params [map]\n+ * @option params.Bucket [String]\n+ * @option params.Expires [Number]\n+ * @option params.Conditions [Array]\n+ * @option params.Fields [map]\n+ * @param callback [Function]\n+ *\n+ * @note All fields passed in when creating presigned post data will be signed\n+ * as exact match conditions. Any fields that will be interpolated by S3\n+ * must be added to the fields hash after signing, and an appropriate\n+ * condition for such fields must be explicitly added to the Conditions\n+ * array passed to this function before signing.\n+ *\n+ * @example Presiging post data with a known key\n+ * var params = {\n+ * Bucket: 'bucket',\n+ * Fields: {\n+ * key: 'key'\n+ * }\n+ * };\n+ * s3.createPresignedPost(params, function(err, data) {\n+ * if (err) {\n+ * console.error('Presigning post data encountered an error', err);\n+ * } else {\n+ * console.log('The post data is', data);\n+ * }\n+ * });\n+ *\n+ * @example Presigning post data with an interpolated key\n+ * var params = {\n+ * Bucket: 'bucket',\n+ * Conditions: [\n+ * ['starts-with', '$key', 'path/to/uploads/']\n+ * ]\n+ * };\n+ * s3.createPresignedPost(params, function(err, data) {\n+ * if (err) {\n+ * console.error('Presigning post data encountered an error', err);\n+ * } else {\n+ * data.Fields.key = 'path/to/uploads/${filename}';\n+ * console.log('The post data is', data);\n+ * }\n+ * });\n+ *\n+ * @note You must ensure that you have static or previously resolved\n+ * credentials if you call this method synchronously (with no callback),\n+ * otherwise it may not properly sign the request. If you cannot guarantee\n+ * this (you are using an asynchronous credential provider, i.e., EC2\n+ * IAM roles), you should always call this method with an asynchronous\n+ * callback.\n+ */\n+ createPresignedPost: function createPresignedPost(params, callback) {\n+ if (typeof params === 'function' && callback === undefined) {\n+ callback = params;\n+ params = null;\n+ }\n+\n+ params = AWS.util.copy(params || {});\n+ var boundParams = this.config.params || {};\n+ var bucket = params.Bucket || boundParams.Bucket,\n+ self = this,\n+ config = this.config,\n+ endpoint = AWS.util.copy(this.endpoint);\n+ if (!config.s3BucketEndpoint) {\n+ endpoint.pathname = '/' + bucket;\n+ }\n+\n+ function finalizePost() {", - "comment_created_at": "2017-02-17T01:17:01+00:00", - "comment_author": "jeskew", - "comment_body": "This function should only be invoked once credentials have been resolved, so it needs to be called from the callback.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "115840969", - "pr_number": 1507, - "pr_file": "test/request.spec.js", - "created_at": "2017-05-10T20:14:30+00:00", - "commented_code": "s = request.createReadStream();\n s.on('error', function(e) {\n error = e;\n expect(error).to.be[\"null\"];\n expect(error).to.not.be[\"null\"];\n });\n s.on('data', function(c) {\n return data += c.toString();\n });\n return s.on('end', function() {\n expect(error).to.be[\"null\"];\n expect(data).to.equal('FOOBARBAZQU');\n expect(error).to.not.be[\"null\"];", - "repo_full_name": "aws/aws-sdk-js", - "discussion_comments": [ - { - "comment_id": "115840969", - "repo_full_name": "aws/aws-sdk-js", - "pr_number": 1507, - "pr_file": "test/request.spec.js", - "discussion_id": "115840969", - "commented_code": "@@ -758,14 +758,13 @@ describe('AWS.Request', function() {\n s = request.createReadStream();\n s.on('error', function(e) {\n error = e;\n- expect(error).to.be[\"null\"];\n+ expect(error).to.not.be[\"null\"];\n });\n s.on('data', function(c) {\n return data += c.toString();\n });\n return s.on('end', function() {\n- expect(error).to.be[\"null\"];\n- expect(data).to.equal('FOOBARBAZQU');\n+ expect(error).to.not.be[\"null\"];", - "comment_created_at": "2017-05-10T20:14:30+00:00", - "comment_author": "jeskew", - "comment_body": "Is this a breaking change? Previously, users would get up to `content-length` bytes and no more if the returned stream exceeded that length; now, the stream will emit an error event in that case. Was there ever a case where `content-length` would be wrong but the user was OK with receiving a truncated stream?", - "pr_file_module": null - }, - { - "comment_id": "115843940", - "repo_full_name": "aws/aws-sdk-js", - "pr_number": 1507, - "pr_file": "test/request.spec.js", - "discussion_id": "115840969", - "commented_code": "@@ -758,14 +758,13 @@ describe('AWS.Request', function() {\n s = request.createReadStream();\n s.on('error', function(e) {\n error = e;\n- expect(error).to.be[\"null\"];\n+ expect(error).to.not.be[\"null\"];\n });\n s.on('data', function(c) {\n return data += c.toString();\n });\n return s.on('end', function() {\n- expect(error).to.be[\"null\"];\n- expect(data).to.equal('FOOBARBAZQU');\n+ expect(error).to.not.be[\"null\"];", - "comment_created_at": "2017-05-10T20:27:58+00:00", - "comment_author": "chrisradek", - "comment_body": "I don't think giving the user a truncated stream is really ok. I think this test was checking the behavior the SDK exhibited, rather than what was intended.\r\n\r\n[Here](https://github.com/aws/aws-sdk-js/blob/master/lib/request.js#L606-L611), there's a check to see if the incoming data matched the content-length. It should be throwing an error when the content-length is less than the data received. When I was testing though, I discovered that when the body is larger than the content-length, node throws a `ParseError`. However, because we were swallowing these errors (due to already receiving response headers), and node.js still gave us access to the body up to the content-length, it appeared as though we could never detect when data streamed in exceeded the expected amount.", - "pr_file_module": null - }, - { - "comment_id": "115852151", - "repo_full_name": "aws/aws-sdk-js", - "pr_number": 1507, - "pr_file": "test/request.spec.js", - "discussion_id": "115840969", - "commented_code": "@@ -758,14 +758,13 @@ describe('AWS.Request', function() {\n s = request.createReadStream();\n s.on('error', function(e) {\n error = e;\n- expect(error).to.be[\"null\"];\n+ expect(error).to.not.be[\"null\"];\n });\n s.on('data', function(c) {\n return data += c.toString();\n });\n return s.on('end', function() {\n- expect(error).to.be[\"null\"];\n- expect(data).to.equal('FOOBARBAZQU');\n+ expect(error).to.not.be[\"null\"];", - "comment_created_at": "2017-05-10T21:02:51+00:00", - "comment_author": "jeskew", - "comment_body": "I can't imagine that the data received up to `content-length` would be usable in most cases... you would end up with an unparsable partial XML or JSON document unless the service was returning a byte stream. I think the change in this PR is correct, but we should be on the lookout for issues where users were receiving partial byte streams and are now encountering stream errors.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "181819271", - "pr_number": 1956, - "pr_file": "lib/http/response-validator/integrityChecker.js", - "created_at": "2018-04-16T17:14:35+00:00", - "commented_code": "this.providedChecksum.length !== CHECKSUM_BYTE_LEN ||\n this.hash.digest('base64') !== this.providedChecksum.toString('base64')\n ) {\n this.emit('error', AWS.util.error(\n callback( AWS.util.error(\n new Error('Response fails integrity check.'),\n { code: 'ResponseChecksumMismatch' })\n );\n )", - "repo_full_name": "aws/aws-sdk-js", - "discussion_comments": [ - { - "comment_id": "181819271", - "repo_full_name": "aws/aws-sdk-js", - "pr_number": 1956, - "pr_file": "lib/http/response-validator/integrityChecker.js", - "discussion_id": "181819271", - "commented_code": "@@ -53,10 +48,10 @@ IntegrityCheckerStream.prototype._flush = function(callback) {\n this.providedChecksum.length !== CHECKSUM_BYTE_LEN ||\n this.hash.digest('base64') !== this.providedChecksum.toString('base64')\n ) {\n- this.emit('error', AWS.util.error(\n+ callback( AWS.util.error(\n new Error('Response fails integrity check.'),\n { code: 'ResponseChecksumMismatch' })\n- );\n+ )", - "comment_created_at": "2018-04-16T17:14:35+00:00", - "comment_author": "chrisradek", - "comment_body": "Call `return` either on the same line as `callback` or right under. Right now `callback` might be triggered twice: once with an error and then once without an error a couple lines below.", - "pr_file_module": null - }, - { - "comment_id": "181866700", - "repo_full_name": "aws/aws-sdk-js", - "pr_number": 1956, - "pr_file": "lib/http/response-validator/integrityChecker.js", - "discussion_id": "181819271", - "commented_code": "@@ -53,10 +48,10 @@ IntegrityCheckerStream.prototype._flush = function(callback) {\n this.providedChecksum.length !== CHECKSUM_BYTE_LEN ||\n this.hash.digest('base64') !== this.providedChecksum.toString('base64')\n ) {\n- this.emit('error', AWS.util.error(\n+ callback( AWS.util.error(\n new Error('Response fails integrity check.'),\n { code: 'ResponseChecksumMismatch' })\n- );\n+ )", - "comment_created_at": "2018-04-16T20:00:48+00:00", - "comment_author": "AllanZhengYP", - "comment_body": "Here calling callback twice won't actually write anything to stream. I will fix it though", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "265312860", - "pr_number": 2559, - "pr_file": "lib/credentials/shared_ini_file_credentials.js", - "created_at": "2019-03-13T20:15:45+00:00", - "commented_code": "* @param profile [map] credentials profile\n * @throws SharedIniFileCredentialsProviderFailure\n */\n loadViaCredentialProcess: function loadCredentialProcess(profile, callback) {\n try {\n var output = proc.execSync(profile['credential_process']);\n var credData = JSON.parse(output);\n if (parseInt(credData.Version) !== 1) {\n throw AWS.util.error(\n Error('credential_process does not return Version == 1 for profile ' + this.profile),\n { code: 'SharedIniFileCredentialsProviderFailure'}\n loadViaCredentialProcess: function loadViaCredentialProcess(profile, callback) {\n proc.exec(profile['credential_process'], function(err,stdOut, stdErr) {\n if (err) {\n callback(err, null);\n } else if (stdErr) {\n callback(stdErr, null);", - "repo_full_name": "aws/aws-sdk-js", - "discussion_comments": [ - { - "comment_id": "265312860", - "repo_full_name": "aws/aws-sdk-js", - "pr_number": 2559, - "pr_file": "lib/credentials/shared_ini_file_credentials.js", - "discussion_id": "265312860", - "commented_code": "@@ -187,25 +200,33 @@ AWS.SharedIniFileCredentials = AWS.util.inherit(AWS.Credentials, {\n * @param profile [map] credentials profile\n * @throws SharedIniFileCredentialsProviderFailure\n */\n- loadViaCredentialProcess: function loadCredentialProcess(profile, callback) {\n- try {\n- var output = proc.execSync(profile['credential_process']);\n- var credData = JSON.parse(output);\n- if (parseInt(credData.Version) !== 1) {\n- throw AWS.util.error(\n- Error('credential_process does not return Version == 1 for profile ' + this.profile),\n- { code: 'SharedIniFileCredentialsProviderFailure'}\n+ loadViaCredentialProcess: function loadViaCredentialProcess(profile, callback) {\n+ proc.exec(profile['credential_process'], function(err,stdOut, stdErr) {\n+ if (err) {\n+ callback(err, null);\n+ } else if (stdErr) {\n+ callback(stdErr, null);", - "comment_created_at": "2019-03-13T20:15:45+00:00", - "comment_author": "jstewmon", - "comment_body": "stderr is often used for ancillary information (like a deprecation warning) even when the command succeeds, so I would advise against treating a truthy value as a failure.\r\n\r\nFWIW, the [botocore implementation] ignores stderr.\r\n\r\nIf this is retained, I suggest passing `AWS.util.error(new Error(stdErr), {code: 'SharedIniCredentialsProviderFailue'})`, so that the callback receives a normalized `Error`, not a `string`.\r\n\r\n[botocore implementation]: https://github.com/boto/botocore/blob/8d3ea0e61473fba43774eb3c74e1b22995ee7370/botocore/credentials.py#L804-L833", - "pr_file_module": null - }, - { - "comment_id": "265324515", - "repo_full_name": "aws/aws-sdk-js", - "pr_number": 2559, - "pr_file": "lib/credentials/shared_ini_file_credentials.js", - "discussion_id": "265312860", - "commented_code": "@@ -187,25 +200,33 @@ AWS.SharedIniFileCredentials = AWS.util.inherit(AWS.Credentials, {\n * @param profile [map] credentials profile\n * @throws SharedIniFileCredentialsProviderFailure\n */\n- loadViaCredentialProcess: function loadCredentialProcess(profile, callback) {\n- try {\n- var output = proc.execSync(profile['credential_process']);\n- var credData = JSON.parse(output);\n- if (parseInt(credData.Version) !== 1) {\n- throw AWS.util.error(\n- Error('credential_process does not return Version == 1 for profile ' + this.profile),\n- { code: 'SharedIniFileCredentialsProviderFailure'}\n+ loadViaCredentialProcess: function loadViaCredentialProcess(profile, callback) {\n+ proc.exec(profile['credential_process'], function(err,stdOut, stdErr) {\n+ if (err) {\n+ callback(err, null);\n+ } else if (stdErr) {\n+ callback(stdErr, null);", - "comment_created_at": "2019-03-13T20:47:32+00:00", - "comment_author": "srchase", - "comment_body": "Followed the botocore implementation and ignored stderr. thanks!", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "268204892", - "pr_number": 2559, - "pr_file": "lib/credentials/process_credentials.js", - "created_at": "2019-03-22T14:56:10+00:00", - "commented_code": "var AWS = require('../core');\nvar proc = require('child_process');\nvar iniLoader = AWS.util.iniLoader;\n\n/**\n * Represents credentials loaded from shared credentials file\n * (defaulting to ~/.aws/credentials or defined by the\n * `AWS_SHARED_CREDENTIALS_FILE` environment variable).\n *\n * ## Using process credentials\n *\n * The credentials file can specify a credential provider that executes\n * a given process and attempts to read its stdout to recieve a JSON payload\n * containing the credentials:\n *\n * [default]\n * credential_process = /usr/bin/credential_proc\n *\n * Automatically handles refreshing credentials if an Expiration time is\n * provided in the credentials payload. Credentials supplied in the same profile\n * will take precedence over the credential_process.\n *\n * Sourcing credentials from an external process can potentially be dangerous,\n * so proceed with caution. Other credential providers should be preferred if\n * at all possible. If using this option, you should make sure that the shared\n * credentials file is as locked down as possible using security best practices\n * for your operating system.\n *\n * ## Using custom profiles\n *\n * The SDK supports loading credentials for separate profiles. This can be done\n * in two ways:\n *\n * 1. Set the `AWS_PROFILE` environment variable in your process prior to\n * loading the SDK.\n * 2. Directly load the AWS.ProcessCredentials provider:\n *\n * ```javascript\n * var creds = new AWS.ProcessCredentials({profile: 'myprofile'});\n * AWS.config.credentials = creds;\n * ```\n *\n * @!macro nobrowser\n */\nAWS.ProcessCredentials = AWS.util.inherit(AWS.Credentials, {\n /**\n * Creates a new ProcessCredentials object.\n *\n * @param options [map] a set of options\n * @option options profile [String] (AWS_PROFILE env var or 'default')\n * the name of the profile to load.\n * @option options filename [String] ('~/.aws/credentials' or defined by\n * AWS_SHARED_CREDENTIALS_FILE process env var)\n * the filename to use when loading credentials.\n * @option options callback [Function] (err) Credentials are eagerly loaded\n * by the constructor. When the callback is called with no error, the\n * credentials have been loaded successfully.\n * @option options httpOptions [map] A set of options to pass to the low-level\n * HTTP request. Currently supported options are:\n * * **proxy** [String] — the URL to proxy requests through\n * * **agent** [http.Agent, https.Agent] — the Agent object to perform\n * HTTP requests with. Used for connection pooling. Defaults to the global\n * agent (`http.globalAgent`) for non-SSL connections. Note that for\n * SSL connections, a special Agent object is used in order to enable\n * peer certificate verification. This feature is only available in the\n * Node.js environment.\n * * **connectTimeout** [Integer] — Sets the socket to timeout after\n * failing to establish a connection with the server after\n * `connectTimeout` milliseconds. This timeout has no effect once a socket\n * connection has been established.\n * * **timeout** [Integer] — Sets the socket to timeout after timeout\n * milliseconds of inactivity on the socket. Defaults to two minutes\n * (120000).\n * * **xhrAsync** [Boolean] — Whether the SDK will send asynchronous\n * HTTP requests. Used in the browser environment only. Set to false to\n * send requests synchronously. Defaults to true (async on).\n * * **xhrWithCredentials** [Boolean] — Sets the \"withCredentials\"\n * property of an XMLHttpRequest object. Used in the browser environment\n * only. Defaults to false.\n */\n constructor: function ProcessCredentials(options) {\n AWS.Credentials.call(this);\n\n options = options || {};\n\n this.filename = options.filename;\n this.profile = options.profile || process.env.AWS_PROFILE || AWS.util.defaultProfile;\n this.httpOptions = options.httpOptions || null;\n this.get(options.callback || AWS.util.fn.noop);\n },\n\n /**\n * @api private\n */\n load: function load(callback) {\n var self = this;\n try {\n var profiles = {};\n var profilesFromConfig = {};\n if (process.env[AWS.util.configOptInEnv]) {\n var profilesFromConfig = iniLoader.loadFrom({\n isConfig: true,\n filename: process.env[AWS.util.sharedConfigFileEnv]\n });\n }\n var profilesFromCreds = iniLoader.loadFrom({\n filename: this.filename ||\n (process.env[AWS.util.configOptInEnv] && process.env[AWS.util.sharedCredentialsFileEnv])\n });\n for (var i = 0, profileNames = Object.keys(profilesFromCreds); i < profileNames.length; i++) {\n profiles[profileNames[i]] = profilesFromCreds[profileNames[i]];\n }\n // load after profilesFromCreds to prefer profilesFromConfig\n for (var i = 0, profileNames = Object.keys(profilesFromConfig); i < profileNames.length; i++) {\n profiles[profileNames[i]] = profilesFromConfig[profileNames[i]];\n }\n var profile = profiles[this.profile] || {};\n\n if (Object.keys(profile).length === 0) {\n throw AWS.util.error(\n new Error('Profile ' + this.profile + ' not found'),\n { code: 'ProcessCredentialsProviderFailure' }\n );\n }\n\n if (profile['credential_process']) {\n this.loadViaCredentialProcess(profile, function(err, data) {\n if (err) {\n callback(err, null);\n } else {\n self.expired = false;\n self.accessKeyId = data.AccessKeyId;\n self.secretAccessKey = data.SecretAccessKey;\n self.sessionToken = data.SessionToken;\n if (data.Expiration) {\n self.expireTime = new Date(data.Expiration);\n }\n callback(null);\n }\n });\n } else {\n throw AWS.util.error(\n new Error('Profile ' + this.profile + ' did not include credential process'),\n { code: 'ProcessCredentialsProviderFailure' }\n );\n }\n this.expired = false;\n if (this.accessKeyId && this.secretAccessKey) {\n callback(null);\n }\n } catch (err) {\n callback(err);\n }\n },\n\n /**\n * Executes the credential_process and retrieves\n * credentials from the output\n * @api private\n * @param profile [map] credentials profile\n * @throws SharedIniFileCredentialsProviderFailure\n */\n loadViaCredentialProcess: function loadViaCredentialProcess(profile, callback) {\n proc.exec(profile['credential_process'], function(err,stdOut, stdErr) {\n try {\n var credData = JSON.parse(stdOut);\n if (credData.Expiration) {\n var currentTime = AWS.util.date.getDate();\n var expireTime = new Date(credData.Expiration);\n if (expireTime < currentTime) {\n err = AWS.util.error(\n new Error('credential_process returned expired credentials'),\n { code: 'ProcessCredentialsProviderFailure' }\n );\n }\n } else if (credData.Version !== 1) {\n err = AWS.util.error(\n new Error('credential_process does not return Version == 1'),\n { code: 'ProcessCredentialsProviderFailure' }\n );\n }\n if (err) {", - "repo_full_name": "aws/aws-sdk-js", - "discussion_comments": [ - { - "comment_id": "268204892", - "repo_full_name": "aws/aws-sdk-js", - "pr_number": 2559, - "pr_file": "lib/credentials/process_credentials.js", - "discussion_id": "268204892", - "commented_code": "@@ -0,0 +1,206 @@\n+var AWS = require('../core');\n+var proc = require('child_process');\n+var iniLoader = AWS.util.iniLoader;\n+\n+/**\n+ * Represents credentials loaded from shared credentials file\n+ * (defaulting to ~/.aws/credentials or defined by the\n+ * `AWS_SHARED_CREDENTIALS_FILE` environment variable).\n+ *\n+ * ## Using process credentials\n+ *\n+ * The credentials file can specify a credential provider that executes\n+ * a given process and attempts to read its stdout to recieve a JSON payload\n+ * containing the credentials:\n+ *\n+ * [default]\n+ * credential_process = /usr/bin/credential_proc\n+ *\n+ * Automatically handles refreshing credentials if an Expiration time is\n+ * provided in the credentials payload. Credentials supplied in the same profile\n+ * will take precedence over the credential_process.\n+ *\n+ * Sourcing credentials from an external process can potentially be dangerous,\n+ * so proceed with caution. Other credential providers should be preferred if\n+ * at all possible. If using this option, you should make sure that the shared\n+ * credentials file is as locked down as possible using security best practices\n+ * for your operating system.\n+ *\n+ * ## Using custom profiles\n+ *\n+ * The SDK supports loading credentials for separate profiles. This can be done\n+ * in two ways:\n+ *\n+ * 1. Set the `AWS_PROFILE` environment variable in your process prior to\n+ * loading the SDK.\n+ * 2. Directly load the AWS.ProcessCredentials provider:\n+ *\n+ * ```javascript\n+ * var creds = new AWS.ProcessCredentials({profile: 'myprofile'});\n+ * AWS.config.credentials = creds;\n+ * ```\n+ *\n+ * @!macro nobrowser\n+ */\n+AWS.ProcessCredentials = AWS.util.inherit(AWS.Credentials, {\n+ /**\n+ * Creates a new ProcessCredentials object.\n+ *\n+ * @param options [map] a set of options\n+ * @option options profile [String] (AWS_PROFILE env var or 'default')\n+ * the name of the profile to load.\n+ * @option options filename [String] ('~/.aws/credentials' or defined by\n+ * AWS_SHARED_CREDENTIALS_FILE process env var)\n+ * the filename to use when loading credentials.\n+ * @option options callback [Function] (err) Credentials are eagerly loaded\n+ * by the constructor. When the callback is called with no error, the\n+ * credentials have been loaded successfully.\n+ * @option options httpOptions [map] A set of options to pass to the low-level\n+ * HTTP request. Currently supported options are:\n+ * * **proxy** [String] — the URL to proxy requests through\n+ * * **agent** [http.Agent, https.Agent] — the Agent object to perform\n+ * HTTP requests with. Used for connection pooling. Defaults to the global\n+ * agent (`http.globalAgent`) for non-SSL connections. Note that for\n+ * SSL connections, a special Agent object is used in order to enable\n+ * peer certificate verification. This feature is only available in the\n+ * Node.js environment.\n+ * * **connectTimeout** [Integer] — Sets the socket to timeout after\n+ * failing to establish a connection with the server after\n+ * `connectTimeout` milliseconds. This timeout has no effect once a socket\n+ * connection has been established.\n+ * * **timeout** [Integer] — Sets the socket to timeout after timeout\n+ * milliseconds of inactivity on the socket. Defaults to two minutes\n+ * (120000).\n+ * * **xhrAsync** [Boolean] — Whether the SDK will send asynchronous\n+ * HTTP requests. Used in the browser environment only. Set to false to\n+ * send requests synchronously. Defaults to true (async on).\n+ * * **xhrWithCredentials** [Boolean] — Sets the \"withCredentials\"\n+ * property of an XMLHttpRequest object. Used in the browser environment\n+ * only. Defaults to false.\n+ */\n+ constructor: function ProcessCredentials(options) {\n+ AWS.Credentials.call(this);\n+\n+ options = options || {};\n+\n+ this.filename = options.filename;\n+ this.profile = options.profile || process.env.AWS_PROFILE || AWS.util.defaultProfile;\n+ this.httpOptions = options.httpOptions || null;\n+ this.get(options.callback || AWS.util.fn.noop);\n+ },\n+\n+ /**\n+ * @api private\n+ */\n+ load: function load(callback) {\n+ var self = this;\n+ try {\n+ var profiles = {};\n+ var profilesFromConfig = {};\n+ if (process.env[AWS.util.configOptInEnv]) {\n+ var profilesFromConfig = iniLoader.loadFrom({\n+ isConfig: true,\n+ filename: process.env[AWS.util.sharedConfigFileEnv]\n+ });\n+ }\n+ var profilesFromCreds = iniLoader.loadFrom({\n+ filename: this.filename ||\n+ (process.env[AWS.util.configOptInEnv] && process.env[AWS.util.sharedCredentialsFileEnv])\n+ });\n+ for (var i = 0, profileNames = Object.keys(profilesFromCreds); i < profileNames.length; i++) {\n+ profiles[profileNames[i]] = profilesFromCreds[profileNames[i]];\n+ }\n+ // load after profilesFromCreds to prefer profilesFromConfig\n+ for (var i = 0, profileNames = Object.keys(profilesFromConfig); i < profileNames.length; i++) {\n+ profiles[profileNames[i]] = profilesFromConfig[profileNames[i]];\n+ }\n+ var profile = profiles[this.profile] || {};\n+\n+ if (Object.keys(profile).length === 0) {\n+ throw AWS.util.error(\n+ new Error('Profile ' + this.profile + ' not found'),\n+ { code: 'ProcessCredentialsProviderFailure' }\n+ );\n+ }\n+\n+ if (profile['credential_process']) {\n+ this.loadViaCredentialProcess(profile, function(err, data) {\n+ if (err) {\n+ callback(err, null);\n+ } else {\n+ self.expired = false;\n+ self.accessKeyId = data.AccessKeyId;\n+ self.secretAccessKey = data.SecretAccessKey;\n+ self.sessionToken = data.SessionToken;\n+ if (data.Expiration) {\n+ self.expireTime = new Date(data.Expiration);\n+ }\n+ callback(null);\n+ }\n+ });\n+ } else {\n+ throw AWS.util.error(\n+ new Error('Profile ' + this.profile + ' did not include credential process'),\n+ { code: 'ProcessCredentialsProviderFailure' }\n+ );\n+ }\n+ this.expired = false;\n+ if (this.accessKeyId && this.secretAccessKey) {\n+ callback(null);\n+ }\n+ } catch (err) {\n+ callback(err);\n+ }\n+ },\n+\n+ /**\n+ * Executes the credential_process and retrieves\n+ * credentials from the output\n+ * @api private\n+ * @param profile [map] credentials profile\n+ * @throws SharedIniFileCredentialsProviderFailure\n+ */\n+ loadViaCredentialProcess: function loadViaCredentialProcess(profile, callback) {\n+ proc.exec(profile['credential_process'], function(err,stdOut, stdErr) {\n+ try {\n+ var credData = JSON.parse(stdOut);\n+ if (credData.Expiration) {\n+ var currentTime = AWS.util.date.getDate();\n+ var expireTime = new Date(credData.Expiration);\n+ if (expireTime < currentTime) {\n+ err = AWS.util.error(\n+ new Error('credential_process returned expired credentials'),\n+ { code: 'ProcessCredentialsProviderFailure' }\n+ );\n+ }\n+ } else if (credData.Version !== 1) {\n+ err = AWS.util.error(\n+ new Error('credential_process does not return Version == 1'),\n+ { code: 'ProcessCredentialsProviderFailure' }\n+ );\n+ }\n+ if (err) {", - "comment_created_at": "2019-03-22T14:56:10+00:00", - "comment_author": "jstewmon", - "comment_body": "I think `err` should be checked before trying to process `stdOut`, since the condition is always checked.", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/aws-sdk-js-example-documentation-standards.json b/_reviewers/aws-sdk-js-example-documentation-standards.json new file mode 100644 index 0000000..2a1ea00 --- /dev/null +++ b/_reviewers/aws-sdk-js-example-documentation-standards.json @@ -0,0 +1,46 @@ +[ + { + "discussion_id": "35271327", + "pr_number": 667, + "pr_file": "doc-src/templates/api-versions/model_documentor.rb", + "created_at": "2015-07-22T22:20:41+00:00", + "commented_code": "@lines += shapes(api, operation['input']).map {|line| \" \" + line }\n\n ## @example tag\n\n @lines << \"@example Calling the #{method_name(operation_name)} operation\"", + "repo_full_name": "aws/aws-sdk-js", + "discussion_comments": [ + { + "comment_id": "35271327", + "repo_full_name": "aws/aws-sdk-js", + "pr_number": 667, + "pr_file": "doc-src/templates/api-versions/model_documentor.rb", + "discussion_id": "35271327", + "commented_code": "@@ -93,11 +93,19 @@ def initialize(operation_name, operation, api, klass)\n @lines += shapes(api, operation['input']).map {|line| \" \" + line }\n \n ## @example tag\n-\n @lines << \"@example Calling the #{method_name(operation_name)} operation\"", + "comment_created_at": "2015-07-22T22:20:41+00:00", + "comment_author": "lsegal", + "comment_body": "I would suggest moving any hand-written examples above the generated example, since they are likely to be more correct / useful.\n", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "35271330", + "pr_number": 667, + "pr_file": "doc-src/templates/api-versions/model_documentor.rb", + "created_at": "2015-07-22T22:20:42+00:00", + "commented_code": "@lines += shapes(api, operation['input']).map {|line| \" \" + line }\n\n ## @example tag\n\n @lines << \"@example Calling the #{method_name(operation_name)} operation\"\n @lines << generate_example(api, klass, method_name(operation_name),\n operation['input']).split(\"\\n\").map {|line| \" \" + line }\n @lines << \"\"\n if examples\n examples.each do |example|\n @lines << \"@example Example: #{example['title']}\"", + "repo_full_name": "aws/aws-sdk-js", + "discussion_comments": [ + { + "comment_id": "35271330", + "repo_full_name": "aws/aws-sdk-js", + "pr_number": 667, + "pr_file": "doc-src/templates/api-versions/model_documentor.rb", + "discussion_id": "35271330", + "commented_code": "@@ -93,11 +93,19 @@ def initialize(operation_name, operation, api, klass)\n @lines += shapes(api, operation['input']).map {|line| \" \" + line }\n \n ## @example tag\n-\n @lines << \"@example Calling the #{method_name(operation_name)} operation\"\n @lines << generate_example(api, klass, method_name(operation_name),\n operation['input']).split(\"\\n\").map {|line| \" \" + line }\n- @lines << \"\"\n+ if examples\n+ examples.each do |example|\n+ @lines << \"@example Example: #{example['title']}\"", + "comment_created_at": "2015-07-22T22:20:42+00:00", + "comment_author": "lsegal", + "comment_body": "Example tags should not be prefixed with \"Example:\" text, let's just list the title.\n", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/aws-sdk-js-example-documentation-standards.md b/_reviewers/aws-sdk-js-example-documentation-standards.md index 16438ea..fdd62e3 100644 --- a/_reviewers/aws-sdk-js-example-documentation-standards.md +++ b/_reviewers/aws-sdk-js-example-documentation-standards.md @@ -27,51 +27,3 @@ For instance: ``` This approach improves readability and maintains consistent documentation style. - - -[ - { - "discussion_id": "35271327", - "pr_number": 667, - "pr_file": "doc-src/templates/api-versions/model_documentor.rb", - "created_at": "2015-07-22T22:20:41+00:00", - "commented_code": "@lines += shapes(api, operation['input']).map {|line| \" \" + line }\n\n ## @example tag\n\n @lines << \"@example Calling the #{method_name(operation_name)} operation\"", - "repo_full_name": "aws/aws-sdk-js", - "discussion_comments": [ - { - "comment_id": "35271327", - "repo_full_name": "aws/aws-sdk-js", - "pr_number": 667, - "pr_file": "doc-src/templates/api-versions/model_documentor.rb", - "discussion_id": "35271327", - "commented_code": "@@ -93,11 +93,19 @@ def initialize(operation_name, operation, api, klass)\n @lines += shapes(api, operation['input']).map {|line| \" \" + line }\n \n ## @example tag\n-\n @lines << \"@example Calling the #{method_name(operation_name)} operation\"", - "comment_created_at": "2015-07-22T22:20:41+00:00", - "comment_author": "lsegal", - "comment_body": "I would suggest moving any hand-written examples above the generated example, since they are likely to be more correct / useful.\n", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "35271330", - "pr_number": 667, - "pr_file": "doc-src/templates/api-versions/model_documentor.rb", - "created_at": "2015-07-22T22:20:42+00:00", - "commented_code": "@lines += shapes(api, operation['input']).map {|line| \" \" + line }\n\n ## @example tag\n\n @lines << \"@example Calling the #{method_name(operation_name)} operation\"\n @lines << generate_example(api, klass, method_name(operation_name),\n operation['input']).split(\"\\n\").map {|line| \" \" + line }\n @lines << \"\"\n if examples\n examples.each do |example|\n @lines << \"@example Example: #{example['title']}\"", - "repo_full_name": "aws/aws-sdk-js", - "discussion_comments": [ - { - "comment_id": "35271330", - "repo_full_name": "aws/aws-sdk-js", - "pr_number": 667, - "pr_file": "doc-src/templates/api-versions/model_documentor.rb", - "discussion_id": "35271330", - "commented_code": "@@ -93,11 +93,19 @@ def initialize(operation_name, operation, api, klass)\n @lines += shapes(api, operation['input']).map {|line| \" \" + line }\n \n ## @example tag\n-\n @lines << \"@example Calling the #{method_name(operation_name)} operation\"\n @lines << generate_example(api, klass, method_name(operation_name),\n operation['input']).split(\"\\n\").map {|line| \" \" + line }\n- @lines << \"\"\n+ if examples\n+ examples.each do |example|\n+ @lines << \"@example Example: #{example['title']}\"", - "comment_created_at": "2015-07-22T22:20:42+00:00", - "comment_author": "lsegal", - "comment_body": "Example tags should not be prefixed with \"Example:\" text, let's just list the title.\n", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/aws-sdk-js-explicit-verified-configurations.json b/_reviewers/aws-sdk-js-explicit-verified-configurations.json new file mode 100644 index 0000000..43c48bd --- /dev/null +++ b/_reviewers/aws-sdk-js-explicit-verified-configurations.json @@ -0,0 +1,92 @@ +[ + { + "discussion_id": "1196576262", + "pr_number": 4422, + "pr_file": "lib/region_config_data.json", + "created_at": "2023-05-17T14:03:21+00:00", + "commented_code": "\"globalEndpoint\": true,\n \"signingRegion\": \"cn-north-1\"\n },\n \"us-gov-*/iam\": \"globalGovCloud\",\n \"us-iso-*/iam\": {\n \"endpoint\": \"{service}.us-iso-east-1.c2s.ic.gov\",\n \"globalEndpoint\": true,\n \"signingRegion\": \"us-east-1\"", + "repo_full_name": "aws/aws-sdk-js", + "discussion_comments": [ + { + "comment_id": "1196576262", + "repo_full_name": "aws/aws-sdk-js", + "pr_number": 4422, + "pr_file": "lib/region_config_data.json", + "discussion_id": "1196576262", + "commented_code": "@@ -43,8 +43,13 @@\n \"globalEndpoint\": true,\n \"signingRegion\": \"cn-north-1\"\n },\n- \"us-gov-*/iam\": \"globalGovCloud\",\n+ \"us-iso-*/iam\": {\n+ \"endpoint\": \"{service}.us-iso-east-1.c2s.ic.gov\",\n+ \"globalEndpoint\": true,\n+ \"signingRegion\": \"us-east-1\"", + "comment_created_at": "2023-05-17T14:03:21+00:00", + "comment_author": "trivikr", + "comment_body": "Should this be us-iso-east-1?\r\n```suggestion\r\n \"signingRegion\": \"us-iso-east-1\"\r\n```", + "pr_file_module": null + }, + { + "comment_id": "1196584529", + "repo_full_name": "aws/aws-sdk-js", + "pr_number": 4422, + "pr_file": "lib/region_config_data.json", + "discussion_id": "1196576262", + "commented_code": "@@ -43,8 +43,13 @@\n \"globalEndpoint\": true,\n \"signingRegion\": \"cn-north-1\"\n },\n- \"us-gov-*/iam\": \"globalGovCloud\",\n+ \"us-iso-*/iam\": {\n+ \"endpoint\": \"{service}.us-iso-east-1.c2s.ic.gov\",\n+ \"globalEndpoint\": true,\n+ \"signingRegion\": \"us-east-1\"", + "comment_created_at": "2023-05-17T14:09:18+00:00", + "comment_author": "trivikr", + "comment_body": "Verified that it should be `us-iso-east-1` from Endpoints 2.0 tests https://github.com/aws/aws-sdk-js-v3/blob/7ed7101dcc4e81038b6c7f581162b959e6b33a04/codegen/sdk-codegen/aws-models/iam.json#L2270-L2291", + "pr_file_module": null + }, + { + "comment_id": "1198708906", + "repo_full_name": "aws/aws-sdk-js", + "pr_number": 4422, + "pr_file": "lib/region_config_data.json", + "discussion_id": "1196576262", + "commented_code": "@@ -43,8 +43,13 @@\n \"globalEndpoint\": true,\n \"signingRegion\": \"cn-north-1\"\n },\n- \"us-gov-*/iam\": \"globalGovCloud\",\n+ \"us-iso-*/iam\": {\n+ \"endpoint\": \"{service}.us-iso-east-1.c2s.ic.gov\",\n+ \"globalEndpoint\": true,\n+ \"signingRegion\": \"us-east-1\"", + "comment_created_at": "2023-05-19T08:46:01+00:00", + "comment_author": "pinak", + "comment_body": "Yes, thanks for catching that!", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1034017482", + "pr_number": 4288, + "pr_file": ".changes/next-release/bugfix-region-config-31f590e0.json", + "created_at": "2022-11-28T20:28:38+00:00", + "commented_code": "{\n \"type\": \"bugfix\",\n \"category\": \"region_config\",\n \"description\": \"avoid mutation in global object signatureVersion\"", + "repo_full_name": "aws/aws-sdk-js", + "discussion_comments": [ + { + "comment_id": "1034017482", + "repo_full_name": "aws/aws-sdk-js", + "pr_number": 4288, + "pr_file": ".changes/next-release/bugfix-region-config-31f590e0.json", + "discussion_id": "1034017482", + "commented_code": "@@ -0,0 +1,5 @@\n+{\n+ \"type\": \"bugfix\",\n+ \"category\": \"region_config\",\n+ \"description\": \"avoid mutation in global object signatureVersion\"", + "comment_created_at": "2022-11-28T20:28:38+00:00", + "comment_author": "trivikr", + "comment_body": "```suggestion\r\n \"description\": \"Set signatureVersion to bearer explcitly when defined in service API\"\r\n```", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "608843822", + "pr_number": 3700, + "pr_file": ".changes/next-release/bugfix-ManagedUpload-2bd31158.json", + "created_at": "2021-04-07T17:14:07+00:00", + "commented_code": "{\n \"type\": \"bugfix\",\n \"category\": \"ManagedUpload\",\n \"description\": \"fix a bug that credentials refresh to frequently\"", + "repo_full_name": "aws/aws-sdk-js", + "discussion_comments": [ + { + "comment_id": "608843822", + "repo_full_name": "aws/aws-sdk-js", + "pr_number": 3700, + "pr_file": ".changes/next-release/bugfix-ManagedUpload-2bd31158.json", + "discussion_id": "608843822", + "commented_code": "@@ -0,0 +1,5 @@\n+{\n+ \"type\": \"bugfix\",\n+ \"category\": \"ManagedUpload\",\n+ \"description\": \"fix a bug that credentials refresh to frequently\"", + "comment_created_at": "2021-04-07T17:14:07+00:00", + "comment_author": "trivikr", + "comment_body": "The type already has bugfix\r\n```suggestion\r\n \"description\": \"Use resolved credentials if customer supplies configured S3 Client\"\r\n```", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/aws-sdk-js-explicit-verified-configurations.md b/_reviewers/aws-sdk-js-explicit-verified-configurations.md index 7ce1e43..089c756 100644 --- a/_reviewers/aws-sdk-js-explicit-verified-configurations.md +++ b/_reviewers/aws-sdk-js-explicit-verified-configurations.md @@ -31,97 +31,3 @@ Example: ``` For credential configurations, always use resolved credentials when available rather than relying on automatic refresh mechanisms that may cause performance issues. - - -[ - { - "discussion_id": "1196576262", - "pr_number": 4422, - "pr_file": "lib/region_config_data.json", - "created_at": "2023-05-17T14:03:21+00:00", - "commented_code": "\"globalEndpoint\": true,\n \"signingRegion\": \"cn-north-1\"\n },\n \"us-gov-*/iam\": \"globalGovCloud\",\n \"us-iso-*/iam\": {\n \"endpoint\": \"{service}.us-iso-east-1.c2s.ic.gov\",\n \"globalEndpoint\": true,\n \"signingRegion\": \"us-east-1\"", - "repo_full_name": "aws/aws-sdk-js", - "discussion_comments": [ - { - "comment_id": "1196576262", - "repo_full_name": "aws/aws-sdk-js", - "pr_number": 4422, - "pr_file": "lib/region_config_data.json", - "discussion_id": "1196576262", - "commented_code": "@@ -43,8 +43,13 @@\n \"globalEndpoint\": true,\n \"signingRegion\": \"cn-north-1\"\n },\n- \"us-gov-*/iam\": \"globalGovCloud\",\n+ \"us-iso-*/iam\": {\n+ \"endpoint\": \"{service}.us-iso-east-1.c2s.ic.gov\",\n+ \"globalEndpoint\": true,\n+ \"signingRegion\": \"us-east-1\"", - "comment_created_at": "2023-05-17T14:03:21+00:00", - "comment_author": "trivikr", - "comment_body": "Should this be us-iso-east-1?\r\n```suggestion\r\n \"signingRegion\": \"us-iso-east-1\"\r\n```", - "pr_file_module": null - }, - { - "comment_id": "1196584529", - "repo_full_name": "aws/aws-sdk-js", - "pr_number": 4422, - "pr_file": "lib/region_config_data.json", - "discussion_id": "1196576262", - "commented_code": "@@ -43,8 +43,13 @@\n \"globalEndpoint\": true,\n \"signingRegion\": \"cn-north-1\"\n },\n- \"us-gov-*/iam\": \"globalGovCloud\",\n+ \"us-iso-*/iam\": {\n+ \"endpoint\": \"{service}.us-iso-east-1.c2s.ic.gov\",\n+ \"globalEndpoint\": true,\n+ \"signingRegion\": \"us-east-1\"", - "comment_created_at": "2023-05-17T14:09:18+00:00", - "comment_author": "trivikr", - "comment_body": "Verified that it should be `us-iso-east-1` from Endpoints 2.0 tests https://github.com/aws/aws-sdk-js-v3/blob/7ed7101dcc4e81038b6c7f581162b959e6b33a04/codegen/sdk-codegen/aws-models/iam.json#L2270-L2291", - "pr_file_module": null - }, - { - "comment_id": "1198708906", - "repo_full_name": "aws/aws-sdk-js", - "pr_number": 4422, - "pr_file": "lib/region_config_data.json", - "discussion_id": "1196576262", - "commented_code": "@@ -43,8 +43,13 @@\n \"globalEndpoint\": true,\n \"signingRegion\": \"cn-north-1\"\n },\n- \"us-gov-*/iam\": \"globalGovCloud\",\n+ \"us-iso-*/iam\": {\n+ \"endpoint\": \"{service}.us-iso-east-1.c2s.ic.gov\",\n+ \"globalEndpoint\": true,\n+ \"signingRegion\": \"us-east-1\"", - "comment_created_at": "2023-05-19T08:46:01+00:00", - "comment_author": "pinak", - "comment_body": "Yes, thanks for catching that!", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1034017482", - "pr_number": 4288, - "pr_file": ".changes/next-release/bugfix-region-config-31f590e0.json", - "created_at": "2022-11-28T20:28:38+00:00", - "commented_code": "{\n \"type\": \"bugfix\",\n \"category\": \"region_config\",\n \"description\": \"avoid mutation in global object signatureVersion\"", - "repo_full_name": "aws/aws-sdk-js", - "discussion_comments": [ - { - "comment_id": "1034017482", - "repo_full_name": "aws/aws-sdk-js", - "pr_number": 4288, - "pr_file": ".changes/next-release/bugfix-region-config-31f590e0.json", - "discussion_id": "1034017482", - "commented_code": "@@ -0,0 +1,5 @@\n+{\n+ \"type\": \"bugfix\",\n+ \"category\": \"region_config\",\n+ \"description\": \"avoid mutation in global object signatureVersion\"", - "comment_created_at": "2022-11-28T20:28:38+00:00", - "comment_author": "trivikr", - "comment_body": "```suggestion\r\n \"description\": \"Set signatureVersion to bearer explcitly when defined in service API\"\r\n```", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "608843822", - "pr_number": 3700, - "pr_file": ".changes/next-release/bugfix-ManagedUpload-2bd31158.json", - "created_at": "2021-04-07T17:14:07+00:00", - "commented_code": "{\n \"type\": \"bugfix\",\n \"category\": \"ManagedUpload\",\n \"description\": \"fix a bug that credentials refresh to frequently\"", - "repo_full_name": "aws/aws-sdk-js", - "discussion_comments": [ - { - "comment_id": "608843822", - "repo_full_name": "aws/aws-sdk-js", - "pr_number": 3700, - "pr_file": ".changes/next-release/bugfix-ManagedUpload-2bd31158.json", - "discussion_id": "608843822", - "commented_code": "@@ -0,0 +1,5 @@\n+{\n+ \"type\": \"bugfix\",\n+ \"category\": \"ManagedUpload\",\n+ \"description\": \"fix a bug that credentials refresh to frequently\"", - "comment_created_at": "2021-04-07T17:14:07+00:00", - "comment_author": "trivikr", - "comment_body": "The type already has bugfix\r\n```suggestion\r\n \"description\": \"Use resolved credentials if customer supplies configured S3 Client\"\r\n```", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/aws-sdk-js-follow-established-testing-patterns.json b/_reviewers/aws-sdk-js-follow-established-testing-patterns.json new file mode 100644 index 0000000..40780e9 --- /dev/null +++ b/_reviewers/aws-sdk-js-follow-established-testing-patterns.json @@ -0,0 +1,68 @@ +[ + { + "discussion_id": "61648554", + "pr_number": 976, + "pr_file": "test/signers/v4.spec.coffee", + "created_at": "2016-04-29T21:39:14+00:00", + "commented_code": "describe 'canonicalString', ->\n it 'sorts the search string', ->\n req = new AWS.CloudSearchDomain({endpoint: 'host.domain.com'}).search({query: 'foo', cursor: 'initial', queryOptions: '{}'}).build()\n req = new AWS.CloudSearchDomain({endpoint: 'host.domain.com'})\n .search({query: 'foo', cursor: 'initial', queryOptions: '{}'})\n .removeListener('build', AWS.CloudSearchDomain.prototype.convertGetToPost)", + "repo_full_name": "aws/aws-sdk-js", + "discussion_comments": [ + { + "comment_id": "61648554", + "repo_full_name": "aws/aws-sdk-js", + "pr_number": 976, + "pr_file": "test/signers/v4.spec.coffee", + "discussion_id": "61648554", + "commented_code": "@@ -117,7 +117,10 @@ describe 'AWS.Signers.V4', ->\n \n describe 'canonicalString', ->\n it 'sorts the search string', ->\n- req = new AWS.CloudSearchDomain({endpoint: 'host.domain.com'}).search({query: 'foo', cursor: 'initial', queryOptions: '{}'}).build()\n+ req = new AWS.CloudSearchDomain({endpoint: 'host.domain.com'})\n+ .search({query: 'foo', cursor: 'initial', queryOptions: '{}'})\n+ .removeListener('build', AWS.CloudSearchDomain.prototype.convertGetToPost)", + "comment_created_at": "2016-04-29T21:39:14+00:00", + "comment_author": "LiuJoyceC", + "comment_body": "This test is testing whether the query string is alphabetically sorted by its field names. The listener that I'm removing here previously did not exist, as it was added as part of this PR, so I'm not removing something that used to be there. Since the listener I added in this PR changes the GET request to a POST, there is no longer a query string, and the test cannot check if it's alphabetically sorted. Therefore, I removed the listener so that just for the purpose of this test, the request is still a GET and has a query string. The other two options I have if I don't remove this listener is to switch the test to use a different operation (I could use `suggest` instead of `search`) or to remove this test altogether.\n", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "92718343", + "pr_number": 1268, + "pr_file": "test/dynamodb/document_client.coffee", + "created_at": "2016-12-15T22:53:42+00:00", + "commented_code": "foo: S: 'bar'\n expect(translateInput(input)).to.eql(params)\n\n it 'translates empty strings', ->", + "repo_full_name": "aws/aws-sdk-js", + "discussion_comments": [ + { + "comment_id": "92718343", + "repo_full_name": "aws/aws-sdk-js", + "pr_number": 1268, + "pr_file": "test/dynamodb/document_client.coffee", + "discussion_id": "92718343", + "commented_code": "@@ -52,6 +52,39 @@ describe 'AWS.DynamoDB.DocumentClient', ->\n foo: S: 'bar'\n expect(translateInput(input)).to.eql(params)\n \n+ it 'translates empty strings', ->", + "comment_created_at": "2016-12-15T22:53:42+00:00", + "comment_author": "chrisradek", + "comment_body": "For all these tests where you're making sure we remove empty inputs, can you also add tests to verify that we don't translate empty strings/sets/buffers if `convertEmptyValues` isn't set?", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "145843268", + "pr_number": 1765, + "pr_file": "features/directconnect/directconnect.feature", + "created_at": "2017-10-19T22:48:11+00:00", + "commented_code": "I want to use AWS Direct Connect\n\n Scenario: Managing connections\n Given I create a Direct Connect connection with name prefix \"aws-sdk-js\"\n Then I should get a Direct Connect connection ID\n And I describe the connection\n Then the bandwidth should match the connection bandwidth\n And I delete the Direct Connect connection\n Scenario: describe connections\n When I describe the connection\n Then I should get response of type \"Array\"", + "repo_full_name": "aws/aws-sdk-js", + "discussion_comments": [ + { + "comment_id": "145843268", + "repo_full_name": "aws/aws-sdk-js", + "pr_number": 1765, + "pr_file": "features/directconnect/directconnect.feature", + "discussion_id": "145843268", + "commented_code": "@@ -4,12 +4,9 @@ Feature: AWS Direct Connect\n \n I want to use AWS Direct Connect\n \n- Scenario: Managing connections\n- Given I create a Direct Connect connection with name prefix \"aws-sdk-js\"\n- Then I should get a Direct Connect connection ID\n- And I describe the connection\n- Then the bandwidth should match the connection bandwidth\n- And I delete the Direct Connect connection\n+ Scenario: describe connections\n+ When I describe the connection\n+ Then I should get response of type \"Array\"", + "comment_created_at": "2017-10-19T22:48:11+00:00", + "comment_author": "chrisradek", + "comment_body": "We have a standard way of testing list/describe operations in most of our feature tests that look like this:\r\nhttps://github.com/aws/aws-sdk-js/blob/v2.135.0/features/acm/acm.feature#L7-L10\r\n\r\nIf you follow this patten, you don't have to create your own step definitions, since cucumber will use the ones defined here:\r\nhttps://github.com/aws/aws-sdk-js/blob/v2.135.0/features/extra/hooks.js#L56", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/aws-sdk-js-follow-established-testing-patterns.md b/_reviewers/aws-sdk-js-follow-established-testing-patterns.md index 18523d9..b13ca2e 100644 --- a/_reviewers/aws-sdk-js-follow-established-testing-patterns.md +++ b/_reviewers/aws-sdk-js-follow-established-testing-patterns.md @@ -38,73 +38,3 @@ Scenario: Managing connections it('translates empty strings when convertEmptyValues is true', -> ...) it('does not translate empty strings when convertEmptyValues is false', -> ...) ``` - - -[ - { - "discussion_id": "61648554", - "pr_number": 976, - "pr_file": "test/signers/v4.spec.coffee", - "created_at": "2016-04-29T21:39:14+00:00", - "commented_code": "describe 'canonicalString', ->\n it 'sorts the search string', ->\n req = new AWS.CloudSearchDomain({endpoint: 'host.domain.com'}).search({query: 'foo', cursor: 'initial', queryOptions: '{}'}).build()\n req = new AWS.CloudSearchDomain({endpoint: 'host.domain.com'})\n .search({query: 'foo', cursor: 'initial', queryOptions: '{}'})\n .removeListener('build', AWS.CloudSearchDomain.prototype.convertGetToPost)", - "repo_full_name": "aws/aws-sdk-js", - "discussion_comments": [ - { - "comment_id": "61648554", - "repo_full_name": "aws/aws-sdk-js", - "pr_number": 976, - "pr_file": "test/signers/v4.spec.coffee", - "discussion_id": "61648554", - "commented_code": "@@ -117,7 +117,10 @@ describe 'AWS.Signers.V4', ->\n \n describe 'canonicalString', ->\n it 'sorts the search string', ->\n- req = new AWS.CloudSearchDomain({endpoint: 'host.domain.com'}).search({query: 'foo', cursor: 'initial', queryOptions: '{}'}).build()\n+ req = new AWS.CloudSearchDomain({endpoint: 'host.domain.com'})\n+ .search({query: 'foo', cursor: 'initial', queryOptions: '{}'})\n+ .removeListener('build', AWS.CloudSearchDomain.prototype.convertGetToPost)", - "comment_created_at": "2016-04-29T21:39:14+00:00", - "comment_author": "LiuJoyceC", - "comment_body": "This test is testing whether the query string is alphabetically sorted by its field names. The listener that I'm removing here previously did not exist, as it was added as part of this PR, so I'm not removing something that used to be there. Since the listener I added in this PR changes the GET request to a POST, there is no longer a query string, and the test cannot check if it's alphabetically sorted. Therefore, I removed the listener so that just for the purpose of this test, the request is still a GET and has a query string. The other two options I have if I don't remove this listener is to switch the test to use a different operation (I could use `suggest` instead of `search`) or to remove this test altogether.\n", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "92718343", - "pr_number": 1268, - "pr_file": "test/dynamodb/document_client.coffee", - "created_at": "2016-12-15T22:53:42+00:00", - "commented_code": "foo: S: 'bar'\n expect(translateInput(input)).to.eql(params)\n\n it 'translates empty strings', ->", - "repo_full_name": "aws/aws-sdk-js", - "discussion_comments": [ - { - "comment_id": "92718343", - "repo_full_name": "aws/aws-sdk-js", - "pr_number": 1268, - "pr_file": "test/dynamodb/document_client.coffee", - "discussion_id": "92718343", - "commented_code": "@@ -52,6 +52,39 @@ describe 'AWS.DynamoDB.DocumentClient', ->\n foo: S: 'bar'\n expect(translateInput(input)).to.eql(params)\n \n+ it 'translates empty strings', ->", - "comment_created_at": "2016-12-15T22:53:42+00:00", - "comment_author": "chrisradek", - "comment_body": "For all these tests where you're making sure we remove empty inputs, can you also add tests to verify that we don't translate empty strings/sets/buffers if `convertEmptyValues` isn't set?", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "145843268", - "pr_number": 1765, - "pr_file": "features/directconnect/directconnect.feature", - "created_at": "2017-10-19T22:48:11+00:00", - "commented_code": "I want to use AWS Direct Connect\n\n Scenario: Managing connections\n Given I create a Direct Connect connection with name prefix \"aws-sdk-js\"\n Then I should get a Direct Connect connection ID\n And I describe the connection\n Then the bandwidth should match the connection bandwidth\n And I delete the Direct Connect connection\n Scenario: describe connections\n When I describe the connection\n Then I should get response of type \"Array\"", - "repo_full_name": "aws/aws-sdk-js", - "discussion_comments": [ - { - "comment_id": "145843268", - "repo_full_name": "aws/aws-sdk-js", - "pr_number": 1765, - "pr_file": "features/directconnect/directconnect.feature", - "discussion_id": "145843268", - "commented_code": "@@ -4,12 +4,9 @@ Feature: AWS Direct Connect\n \n I want to use AWS Direct Connect\n \n- Scenario: Managing connections\n- Given I create a Direct Connect connection with name prefix \"aws-sdk-js\"\n- Then I should get a Direct Connect connection ID\n- And I describe the connection\n- Then the bandwidth should match the connection bandwidth\n- And I delete the Direct Connect connection\n+ Scenario: describe connections\n+ When I describe the connection\n+ Then I should get response of type \"Array\"", - "comment_created_at": "2017-10-19T22:48:11+00:00", - "comment_author": "chrisradek", - "comment_body": "We have a standard way of testing list/describe operations in most of our feature tests that look like this:\r\nhttps://github.com/aws/aws-sdk-js/blob/v2.135.0/features/acm/acm.feature#L7-L10\r\n\r\nIf you follow this patten, you don't have to create your own step definitions, since cucumber will use the ones defined here:\r\nhttps://github.com/aws/aws-sdk-js/blob/v2.135.0/features/extra/hooks.js#L56", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/aws-sdk-js-isolate-sensitive-buffer-data.json b/_reviewers/aws-sdk-js-isolate-sensitive-buffer-data.json new file mode 100644 index 0000000..1baa101 --- /dev/null +++ b/_reviewers/aws-sdk-js-isolate-sensitive-buffer-data.json @@ -0,0 +1,36 @@ +[ + { + "discussion_id": "274981081", + "pr_number": 2622, + "pr_file": "lib/model/shape.js", + "created_at": "2019-04-12T16:35:33+00:00", + "commented_code": "function BinaryShape() {\n Shape.apply(this, arguments);\n this.toType = util.base64.decode;\n this.toType = function(value) {\n var buf = util.base64.decode(value);\n if (this.isSensitive && util.isNode() && typeof util.Buffer.alloc === 'function') {", + "repo_full_name": "aws/aws-sdk-js", + "discussion_comments": [ + { + "comment_id": "274981081", + "repo_full_name": "aws/aws-sdk-js", + "pr_number": 2622, + "pr_file": "lib/model/shape.js", + "discussion_id": "274981081", + "commented_code": "@@ -354,7 +354,16 @@ function IntegerShape() {\n \n function BinaryShape() {\n Shape.apply(this, arguments);\n- this.toType = util.base64.decode;\n+ this.toType = function(value) {\n+ var buf = util.base64.decode(value);\n+ if (this.isSensitive && util.isNode() && typeof util.Buffer.alloc === 'function') {", + "comment_created_at": "2019-04-12T16:35:33+00:00", + "comment_author": "seebees", + "comment_body": "Maybe a comment to explain why. Word smith to your liking :)\r\n\r\n```javascript\r\n /* Node.js can create a Buffer that is not isolated.\r\n * i.e. buf.byteLength !== buf.buffer.byteLength\r\n * This means that the sensitive data is accessible to anyone with access to buf.buffer.\r\n * If this is the node shared Buffer, then other code within this process _could_ find this secret.\r\n * Copy sensitive data to an isolated Buffer and zero the sensitive data.\r\n * While this is safe to do here, copying this code somewhere else may produce unexpected results.\r\n */\r\n```\r\n\r\nAdditionally, why not check `buf.byteLength !== buf.buffer.byteLength`? And then handle the solution in node or the browser?", + "pr_file_module": null + }, + { + "comment_id": "275458536", + "repo_full_name": "aws/aws-sdk-js", + "pr_number": 2622, + "pr_file": "lib/model/shape.js", + "discussion_id": "274981081", + "commented_code": "@@ -354,7 +354,16 @@ function IntegerShape() {\n \n function BinaryShape() {\n Shape.apply(this, arguments);\n- this.toType = util.base64.decode;\n+ this.toType = function(value) {\n+ var buf = util.base64.decode(value);\n+ if (this.isSensitive && util.isNode() && typeof util.Buffer.alloc === 'function') {", + "comment_created_at": "2019-04-15T17:08:02+00:00", + "comment_author": "AllanZhengYP", + "comment_body": "Thanks for the information, I will update the comment. \r\n> why not check buf.byteLength !== buf.buffer.byteLength?\r\n\r\nI'm not sure whether `buf.buffer` would be undefined in browser polyfill, it's just safer this way. And it's not clear in Node doc on when the `buf.buffer` is added. In very old Node, it should be `buf.parent`. Checking availability of `alloc` seems clearer and safer because we will use this API to locate the buffer anyway.", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/aws-sdk-js-isolate-sensitive-buffer-data.md b/_reviewers/aws-sdk-js-isolate-sensitive-buffer-data.md index 5ae535d..a91e71a 100644 --- a/_reviewers/aws-sdk-js-isolate-sensitive-buffer-data.md +++ b/_reviewers/aws-sdk-js-isolate-sensitive-buffer-data.md @@ -40,41 +40,3 @@ function handleSensitiveData(base64Value) { ``` Always consider whether your data is sensitive and might require this additional protection. This pattern is particularly important when handling authentication tokens, encryption keys, and other credentials. - - -[ - { - "discussion_id": "274981081", - "pr_number": 2622, - "pr_file": "lib/model/shape.js", - "created_at": "2019-04-12T16:35:33+00:00", - "commented_code": "function BinaryShape() {\n Shape.apply(this, arguments);\n this.toType = util.base64.decode;\n this.toType = function(value) {\n var buf = util.base64.decode(value);\n if (this.isSensitive && util.isNode() && typeof util.Buffer.alloc === 'function') {", - "repo_full_name": "aws/aws-sdk-js", - "discussion_comments": [ - { - "comment_id": "274981081", - "repo_full_name": "aws/aws-sdk-js", - "pr_number": 2622, - "pr_file": "lib/model/shape.js", - "discussion_id": "274981081", - "commented_code": "@@ -354,7 +354,16 @@ function IntegerShape() {\n \n function BinaryShape() {\n Shape.apply(this, arguments);\n- this.toType = util.base64.decode;\n+ this.toType = function(value) {\n+ var buf = util.base64.decode(value);\n+ if (this.isSensitive && util.isNode() && typeof util.Buffer.alloc === 'function') {", - "comment_created_at": "2019-04-12T16:35:33+00:00", - "comment_author": "seebees", - "comment_body": "Maybe a comment to explain why. Word smith to your liking :)\r\n\r\n```javascript\r\n /* Node.js can create a Buffer that is not isolated.\r\n * i.e. buf.byteLength !== buf.buffer.byteLength\r\n * This means that the sensitive data is accessible to anyone with access to buf.buffer.\r\n * If this is the node shared Buffer, then other code within this process _could_ find this secret.\r\n * Copy sensitive data to an isolated Buffer and zero the sensitive data.\r\n * While this is safe to do here, copying this code somewhere else may produce unexpected results.\r\n */\r\n```\r\n\r\nAdditionally, why not check `buf.byteLength !== buf.buffer.byteLength`? And then handle the solution in node or the browser?", - "pr_file_module": null - }, - { - "comment_id": "275458536", - "repo_full_name": "aws/aws-sdk-js", - "pr_number": 2622, - "pr_file": "lib/model/shape.js", - "discussion_id": "274981081", - "commented_code": "@@ -354,7 +354,16 @@ function IntegerShape() {\n \n function BinaryShape() {\n Shape.apply(this, arguments);\n- this.toType = util.base64.decode;\n+ this.toType = function(value) {\n+ var buf = util.base64.decode(value);\n+ if (this.isSensitive && util.isNode() && typeof util.Buffer.alloc === 'function') {", - "comment_created_at": "2019-04-15T17:08:02+00:00", - "comment_author": "AllanZhengYP", - "comment_body": "Thanks for the information, I will update the comment. \r\n> why not check buf.byteLength !== buf.buffer.byteLength?\r\n\r\nI'm not sure whether `buf.buffer` would be undefined in browser polyfill, it's just safer this way. And it's not clear in Node doc on when the `buf.buffer` is added. In very old Node, it should be `buf.parent`. Checking availability of `alloc` seems clearer and safer because we will use this API to locate the buffer anyway.", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/aws-sdk-js-limit-cache-size.json b/_reviewers/aws-sdk-js-limit-cache-size.json new file mode 100644 index 0000000..e63111e --- /dev/null +++ b/_reviewers/aws-sdk-js-limit-cache-size.json @@ -0,0 +1,80 @@ +[ + { + "discussion_id": "70883152", + "pr_number": 1054, + "pr_file": "lib/signers/v4.js", + "created_at": "2016-07-14T21:02:02+00:00", + "commented_code": "return AWS.util.crypto.hmac(kCredentials, this.stringToSign(datetime), 'hex');\n }\n\n cachedSecret[this.serviceName] = {\n cachedSecret[cacheIdentifier] = {\n region: this.request.region, date: date,\n key: kCredentials, akid: credentials.accessKeyId\n };", + "repo_full_name": "aws/aws-sdk-js", + "discussion_comments": [ + { + "comment_id": "70883152", + "repo_full_name": "aws/aws-sdk-js", + "pr_number": 1054, + "pr_file": "lib/signers/v4.js", + "discussion_id": "70883152", + "commented_code": "@@ -117,13 +118,13 @@ AWS.Signers.V4 = inherit(AWS.Signers.RequestSigner, {\n return AWS.util.crypto.hmac(kCredentials, this.stringToSign(datetime), 'hex');\n }\n \n- cachedSecret[this.serviceName] = {\n+ cachedSecret[cacheIdentifier] = {\n region: this.request.region, date: date,\n key: kCredentials, akid: credentials.accessKeyId\n };", + "comment_created_at": "2016-07-14T21:02:02+00:00", + "comment_author": "LiuJoyceC", + "comment_body": "We should put a maximum size constraint on `cachedSecret`. Before, there was only one cache per service (and only the last signature key for that service is kept), so the cache was necessarily bounded by the number of services. There is no upper bound on the number of service clients generated. Some applications could be generating a new service client for every request. The service client objects get garbage-collected eventually, but the cache doesn't and could grow to millions of objects fairly quickly and hog a lot of memory. Perhaps we could have a `cacheIdentifierQueue` of some sort so that when it's length reaches a certain number, we can get the oldest `cacheIdentifier` and delete it from `cachedSecret`.\n", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "101653214", + "pr_number": 1350, + "pr_file": "lib/signers/v4_credentials.js", + "created_at": "2017-02-16T23:25:57+00:00", + "commented_code": "var AWS = require('../core');\n\n/**\n * @api private\n */\nvar cachedSecret = {};\n\n/**\n * @api private\n */\nvar cacheQueue = [];\n\n/**\n * @api private\n */\nvar maxCacheEntries = 50;\n\n/**\n * @api private\n */\nvar v4Identifier = 'aws4_request';\n\nAWS.Signers.v4_credentials = {\n /**\n * @api private\n *\n * @param date [String]\n * @param region [String]\n * @param serviceName [String]\n * @returns {string}\n */\n createScope: function createScope(date, region, serviceName) {\n return [\n date.substr(0, 8),\n region,\n serviceName,\n v4Identifier\n ].join('/');\n },\n\n /**\n * @api private\n *\n * @param credentials [Credentials]\n * @param date [String]\n * @param region [String]\n * @param service [String]\n * @param shouldCache [Boolean]\n * @returns {String}\n */\n getSigningKey: function getSigningKey(\n credentials,\n date,\n region,\n service,\n shouldCache\n ) {\n var cacheKey = [credentials.accessKeyId, date, region, service].join('_');", + "repo_full_name": "aws/aws-sdk-js", + "discussion_comments": [ + { + "comment_id": "101653214", + "repo_full_name": "aws/aws-sdk-js", + "pr_number": 1350, + "pr_file": "lib/signers/v4_credentials.js", + "discussion_id": "101653214", + "commented_code": "@@ -0,0 +1,95 @@\n+var AWS = require('../core');\n+\n+/**\n+ * @api private\n+ */\n+var cachedSecret = {};\n+\n+/**\n+ * @api private\n+ */\n+var cacheQueue = [];\n+\n+/**\n+ * @api private\n+ */\n+var maxCacheEntries = 50;\n+\n+/**\n+ * @api private\n+ */\n+var v4Identifier = 'aws4_request';\n+\n+AWS.Signers.v4_credentials = {\n+ /**\n+ * @api private\n+ *\n+ * @param date [String]\n+ * @param region [String]\n+ * @param serviceName [String]\n+ * @returns {string}\n+ */\n+ createScope: function createScope(date, region, serviceName) {\n+ return [\n+ date.substr(0, 8),\n+ region,\n+ serviceName,\n+ v4Identifier\n+ ].join('/');\n+ },\n+\n+ /**\n+ * @api private\n+ *\n+ * @param credentials [Credentials]\n+ * @param date [String]\n+ * @param region [String]\n+ * @param service [String]\n+ * @param shouldCache [Boolean]\n+ * @returns {String}\n+ */\n+ getSigningKey: function getSigningKey(\n+ credentials,\n+ date,\n+ region,\n+ service,\n+ shouldCache\n+ ) {\n+ var cacheKey = [credentials.accessKeyId, date, region, service].join('_');", + "comment_created_at": "2017-02-16T23:25:57+00:00", + "comment_author": "chrisradek", + "comment_body": "We had 2 issues reported because this cache key wasn't unique enough:\r\nhttps://github.com/aws/aws-sdk-js/pull/1054\r\n\r\nYou may need to pass the serviceClientId as well.\r\n", + "pr_file_module": null + }, + { + "comment_id": "101667655", + "repo_full_name": "aws/aws-sdk-js", + "pr_number": 1350, + "pr_file": "lib/signers/v4_credentials.js", + "discussion_id": "101653214", + "commented_code": "@@ -0,0 +1,95 @@\n+var AWS = require('../core');\n+\n+/**\n+ * @api private\n+ */\n+var cachedSecret = {};\n+\n+/**\n+ * @api private\n+ */\n+var cacheQueue = [];\n+\n+/**\n+ * @api private\n+ */\n+var maxCacheEntries = 50;\n+\n+/**\n+ * @api private\n+ */\n+var v4Identifier = 'aws4_request';\n+\n+AWS.Signers.v4_credentials = {\n+ /**\n+ * @api private\n+ *\n+ * @param date [String]\n+ * @param region [String]\n+ * @param serviceName [String]\n+ * @returns {string}\n+ */\n+ createScope: function createScope(date, region, serviceName) {\n+ return [\n+ date.substr(0, 8),\n+ region,\n+ serviceName,\n+ v4Identifier\n+ ].join('/');\n+ },\n+\n+ /**\n+ * @api private\n+ *\n+ * @param credentials [Credentials]\n+ * @param date [String]\n+ * @param region [String]\n+ * @param service [String]\n+ * @param shouldCache [Boolean]\n+ * @returns {String}\n+ */\n+ getSigningKey: function getSigningKey(\n+ credentials,\n+ date,\n+ region,\n+ service,\n+ shouldCache\n+ ) {\n+ var cacheKey = [credentials.accessKeyId, date, region, service].join('_');", + "comment_created_at": "2017-02-17T01:16:05+00:00", + "comment_author": "jeskew", + "comment_body": "The serviceClientId is orthogonal to the uniqueness of this cache key, but I see your point. I'll use an HMAC of the AKID and secret key.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "229471764", + "pr_number": 2253, + "pr_file": "lib/discover_endpoint.js", + "created_at": "2018-10-30T20:23:49+00:00", + "commented_code": "var AWS = require('./core');\nvar util = require('./util');\nvar ENDPOINT_OPERATION_MAX_RETRIES = 60;\nvar endpointDiscoveryEnabledEnv = 'AWS_ENABLE_ENDPOINT_DISCOVERY';\n\n/**\n * Generate key to index the endpoints in the cache\n * @return [map] object with keys to index endpoints.\n * @api private\n */\nfunction getCacheKey(request) {\n var service = request.service;\n var api = service.api || {};\n var operations = api.operations;\n var identifiers = {};\n if (operations[request.operation] && operations[request.operation].name) {", + "repo_full_name": "aws/aws-sdk-js", + "discussion_comments": [ + { + "comment_id": "229471764", + "repo_full_name": "aws/aws-sdk-js", + "pr_number": 2253, + "pr_file": "lib/discover_endpoint.js", + "discussion_id": "229471764", + "commented_code": "@@ -0,0 +1,340 @@\n+var AWS = require('./core');\n+var util = require('./util');\n+var ENDPOINT_OPERATION_MAX_RETRIES = 60;\n+var endpointDiscoveryEnabledEnv = 'AWS_ENABLE_ENDPOINT_DISCOVERY';\n+\n+/**\n+ * Generate key to index the endpoints in the cache\n+ * @return [map] object with keys to index endpoints.\n+ * @api private\n+ */\n+function getCacheKey(request) {\n+ var service = request.service;\n+ var api = service.api || {};\n+ var operations = api.operations;\n+ var identifiers = {};\n+ if (operations[request.operation] && operations[request.operation].name) {", + "comment_created_at": "2018-10-30T20:23:49+00:00", + "comment_author": "chrisradek", + "comment_body": "I think operation is only required if custom identifiers are defined for an operation. Is that not the case? Might help to keep the size of your cache down if we omitted operation if it isn't needed.", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/aws-sdk-js-limit-cache-size.md b/_reviewers/aws-sdk-js-limit-cache-size.md index b76c10d..2b7f9f8 100644 --- a/_reviewers/aws-sdk-js-limit-cache-size.md +++ b/_reviewers/aws-sdk-js-limit-cache-size.md @@ -49,85 +49,3 @@ function getCacheKey(request) { ``` This approach prevents memory issues in applications that might generate many cache entries over time, while maintaining the performance benefits of caching. - - -[ - { - "discussion_id": "70883152", - "pr_number": 1054, - "pr_file": "lib/signers/v4.js", - "created_at": "2016-07-14T21:02:02+00:00", - "commented_code": "return AWS.util.crypto.hmac(kCredentials, this.stringToSign(datetime), 'hex');\n }\n\n cachedSecret[this.serviceName] = {\n cachedSecret[cacheIdentifier] = {\n region: this.request.region, date: date,\n key: kCredentials, akid: credentials.accessKeyId\n };", - "repo_full_name": "aws/aws-sdk-js", - "discussion_comments": [ - { - "comment_id": "70883152", - "repo_full_name": "aws/aws-sdk-js", - "pr_number": 1054, - "pr_file": "lib/signers/v4.js", - "discussion_id": "70883152", - "commented_code": "@@ -117,13 +118,13 @@ AWS.Signers.V4 = inherit(AWS.Signers.RequestSigner, {\n return AWS.util.crypto.hmac(kCredentials, this.stringToSign(datetime), 'hex');\n }\n \n- cachedSecret[this.serviceName] = {\n+ cachedSecret[cacheIdentifier] = {\n region: this.request.region, date: date,\n key: kCredentials, akid: credentials.accessKeyId\n };", - "comment_created_at": "2016-07-14T21:02:02+00:00", - "comment_author": "LiuJoyceC", - "comment_body": "We should put a maximum size constraint on `cachedSecret`. Before, there was only one cache per service (and only the last signature key for that service is kept), so the cache was necessarily bounded by the number of services. There is no upper bound on the number of service clients generated. Some applications could be generating a new service client for every request. The service client objects get garbage-collected eventually, but the cache doesn't and could grow to millions of objects fairly quickly and hog a lot of memory. Perhaps we could have a `cacheIdentifierQueue` of some sort so that when it's length reaches a certain number, we can get the oldest `cacheIdentifier` and delete it from `cachedSecret`.\n", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "101653214", - "pr_number": 1350, - "pr_file": "lib/signers/v4_credentials.js", - "created_at": "2017-02-16T23:25:57+00:00", - "commented_code": "var AWS = require('../core');\n\n/**\n * @api private\n */\nvar cachedSecret = {};\n\n/**\n * @api private\n */\nvar cacheQueue = [];\n\n/**\n * @api private\n */\nvar maxCacheEntries = 50;\n\n/**\n * @api private\n */\nvar v4Identifier = 'aws4_request';\n\nAWS.Signers.v4_credentials = {\n /**\n * @api private\n *\n * @param date [String]\n * @param region [String]\n * @param serviceName [String]\n * @returns {string}\n */\n createScope: function createScope(date, region, serviceName) {\n return [\n date.substr(0, 8),\n region,\n serviceName,\n v4Identifier\n ].join('/');\n },\n\n /**\n * @api private\n *\n * @param credentials [Credentials]\n * @param date [String]\n * @param region [String]\n * @param service [String]\n * @param shouldCache [Boolean]\n * @returns {String}\n */\n getSigningKey: function getSigningKey(\n credentials,\n date,\n region,\n service,\n shouldCache\n ) {\n var cacheKey = [credentials.accessKeyId, date, region, service].join('_');", - "repo_full_name": "aws/aws-sdk-js", - "discussion_comments": [ - { - "comment_id": "101653214", - "repo_full_name": "aws/aws-sdk-js", - "pr_number": 1350, - "pr_file": "lib/signers/v4_credentials.js", - "discussion_id": "101653214", - "commented_code": "@@ -0,0 +1,95 @@\n+var AWS = require('../core');\n+\n+/**\n+ * @api private\n+ */\n+var cachedSecret = {};\n+\n+/**\n+ * @api private\n+ */\n+var cacheQueue = [];\n+\n+/**\n+ * @api private\n+ */\n+var maxCacheEntries = 50;\n+\n+/**\n+ * @api private\n+ */\n+var v4Identifier = 'aws4_request';\n+\n+AWS.Signers.v4_credentials = {\n+ /**\n+ * @api private\n+ *\n+ * @param date [String]\n+ * @param region [String]\n+ * @param serviceName [String]\n+ * @returns {string}\n+ */\n+ createScope: function createScope(date, region, serviceName) {\n+ return [\n+ date.substr(0, 8),\n+ region,\n+ serviceName,\n+ v4Identifier\n+ ].join('/');\n+ },\n+\n+ /**\n+ * @api private\n+ *\n+ * @param credentials [Credentials]\n+ * @param date [String]\n+ * @param region [String]\n+ * @param service [String]\n+ * @param shouldCache [Boolean]\n+ * @returns {String}\n+ */\n+ getSigningKey: function getSigningKey(\n+ credentials,\n+ date,\n+ region,\n+ service,\n+ shouldCache\n+ ) {\n+ var cacheKey = [credentials.accessKeyId, date, region, service].join('_');", - "comment_created_at": "2017-02-16T23:25:57+00:00", - "comment_author": "chrisradek", - "comment_body": "We had 2 issues reported because this cache key wasn't unique enough:\r\nhttps://github.com/aws/aws-sdk-js/pull/1054\r\n\r\nYou may need to pass the serviceClientId as well.\r\n", - "pr_file_module": null - }, - { - "comment_id": "101667655", - "repo_full_name": "aws/aws-sdk-js", - "pr_number": 1350, - "pr_file": "lib/signers/v4_credentials.js", - "discussion_id": "101653214", - "commented_code": "@@ -0,0 +1,95 @@\n+var AWS = require('../core');\n+\n+/**\n+ * @api private\n+ */\n+var cachedSecret = {};\n+\n+/**\n+ * @api private\n+ */\n+var cacheQueue = [];\n+\n+/**\n+ * @api private\n+ */\n+var maxCacheEntries = 50;\n+\n+/**\n+ * @api private\n+ */\n+var v4Identifier = 'aws4_request';\n+\n+AWS.Signers.v4_credentials = {\n+ /**\n+ * @api private\n+ *\n+ * @param date [String]\n+ * @param region [String]\n+ * @param serviceName [String]\n+ * @returns {string}\n+ */\n+ createScope: function createScope(date, region, serviceName) {\n+ return [\n+ date.substr(0, 8),\n+ region,\n+ serviceName,\n+ v4Identifier\n+ ].join('/');\n+ },\n+\n+ /**\n+ * @api private\n+ *\n+ * @param credentials [Credentials]\n+ * @param date [String]\n+ * @param region [String]\n+ * @param service [String]\n+ * @param shouldCache [Boolean]\n+ * @returns {String}\n+ */\n+ getSigningKey: function getSigningKey(\n+ credentials,\n+ date,\n+ region,\n+ service,\n+ shouldCache\n+ ) {\n+ var cacheKey = [credentials.accessKeyId, date, region, service].join('_');", - "comment_created_at": "2017-02-17T01:16:05+00:00", - "comment_author": "jeskew", - "comment_body": "The serviceClientId is orthogonal to the uniqueness of this cache key, but I see your point. I'll use an HMAC of the AKID and secret key.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "229471764", - "pr_number": 2253, - "pr_file": "lib/discover_endpoint.js", - "created_at": "2018-10-30T20:23:49+00:00", - "commented_code": "var AWS = require('./core');\nvar util = require('./util');\nvar ENDPOINT_OPERATION_MAX_RETRIES = 60;\nvar endpointDiscoveryEnabledEnv = 'AWS_ENABLE_ENDPOINT_DISCOVERY';\n\n/**\n * Generate key to index the endpoints in the cache\n * @return [map] object with keys to index endpoints.\n * @api private\n */\nfunction getCacheKey(request) {\n var service = request.service;\n var api = service.api || {};\n var operations = api.operations;\n var identifiers = {};\n if (operations[request.operation] && operations[request.operation].name) {", - "repo_full_name": "aws/aws-sdk-js", - "discussion_comments": [ - { - "comment_id": "229471764", - "repo_full_name": "aws/aws-sdk-js", - "pr_number": 2253, - "pr_file": "lib/discover_endpoint.js", - "discussion_id": "229471764", - "commented_code": "@@ -0,0 +1,340 @@\n+var AWS = require('./core');\n+var util = require('./util');\n+var ENDPOINT_OPERATION_MAX_RETRIES = 60;\n+var endpointDiscoveryEnabledEnv = 'AWS_ENABLE_ENDPOINT_DISCOVERY';\n+\n+/**\n+ * Generate key to index the endpoints in the cache\n+ * @return [map] object with keys to index endpoints.\n+ * @api private\n+ */\n+function getCacheKey(request) {\n+ var service = request.service;\n+ var api = service.api || {};\n+ var operations = api.operations;\n+ var identifiers = {};\n+ if (operations[request.operation] && operations[request.operation].name) {", - "comment_created_at": "2018-10-30T20:23:49+00:00", - "comment_author": "chrisradek", - "comment_body": "I think operation is only required if custom identifiers are defined for an operation. Is that not the case? Might help to keep the size of your cache down if we omitted operation if it isn't needed.", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/aws-sdk-js-maintain-consistent-formatting.json b/_reviewers/aws-sdk-js-maintain-consistent-formatting.json new file mode 100644 index 0000000..d9484a3 --- /dev/null +++ b/_reviewers/aws-sdk-js-maintain-consistent-formatting.json @@ -0,0 +1,90 @@ +[ + { + "discussion_id": "131797509", + "pr_number": 1662, + "pr_file": "scripts/lib/ts-generator.js", + "created_at": "2017-08-08T00:36:36+00:00", + "commented_code": "code += tabs(tabCount) + 'export type ' + shapeKey + ' = Date;\\n';\n } else if (type === 'boolean') {\n code += tabs(tabCount) + 'export type ' + shapeKey + ' = boolean;\\n';\n } else if (type === 'blob' || type === 'binary') {\n code += tabs(tabCount) + 'export type ' + shapeKey + ' = Buffer|Uint8Array|Blob|string;\\n';\n } else if (type === 'blob' || type === 'binary') { \n code += tabs(tabCount) + 'export type ' + shapeKey + ' = Buffer|Uint8Array|Blob|string'\n + self.addReadableType(shapeKey)", + "repo_full_name": "aws/aws-sdk-js", + "discussion_comments": [ + { + "comment_id": "131797509", + "repo_full_name": "aws/aws-sdk-js", + "pr_number": 1662, + "pr_file": "scripts/lib/ts-generator.js", + "discussion_id": "131797509", + "commented_code": "@@ -300,8 +333,10 @@ TSGenerator.prototype.generateTypingsFromShape = function generateTypingsFromSha\n code += tabs(tabCount) + 'export type ' + shapeKey + ' = Date;\\n';\n } else if (type === 'boolean') {\n code += tabs(tabCount) + 'export type ' + shapeKey + ' = boolean;\\n';\n- } else if (type === 'blob' || type === 'binary') {\n- code += tabs(tabCount) + 'export type ' + shapeKey + ' = Buffer|Uint8Array|Blob|string;\\n';\n+ } else if (type === 'blob' || type === 'binary') { \n+ code += tabs(tabCount) + 'export type ' + shapeKey + ' = Buffer|Uint8Array|Blob|string'\n+ + self.addReadableType(shapeKey)", + "comment_created_at": "2017-08-08T00:36:36+00:00", + "comment_author": "jeskew", + "comment_body": "Style nit: continuation lines (lines that are part of the same statement as the preceding line) should be indented.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "138405077", + "pr_number": 1716, + "pr_file": "test/service.spec.js", + "created_at": "2017-09-12T16:46:44+00:00", + "commented_code": "});\n });\n });\n return describe('customizeRequests', function() {\n describe('customizeRequests', function() {", + "repo_full_name": "aws/aws-sdk-js", + "discussion_comments": [ + { + "comment_id": "138405077", + "repo_full_name": "aws/aws-sdk-js", + "pr_number": 1716, + "pr_file": "test/service.spec.js", + "discussion_id": "138405077", + "commented_code": "@@ -718,7 +728,7 @@\n });\n });\n });\n- return describe('customizeRequests', function() {\n+ describe('customizeRequests', function() {", + "comment_created_at": "2017-09-12T16:46:44+00:00", + "comment_author": "jeskew", + "comment_body": "Ditto re: empty lines. One should be between each test suite, too.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "147299241", + "pr_number": 1782, + "pr_file": "test/event_listeners.spec.js", + "created_at": "2017-10-27T00:17:07+00:00", + "commented_code": "return expect(data).to.match(match);\n });\n });\n describe('logging sensitive information', function() {", + "repo_full_name": "aws/aws-sdk-js", + "discussion_comments": [ + { + "comment_id": "147299241", + "repo_full_name": "aws/aws-sdk-js", + "pr_number": 1782, + "pr_file": "test/event_listeners.spec.js", + "discussion_id": "147299241", + "commented_code": "@@ -801,6 +802,156 @@\n return expect(data).to.match(match);\n });\n });\n+ describe('logging sensitive information', function() {", + "comment_created_at": "2017-10-27T00:17:07+00:00", + "comment_author": "jeskew", + "comment_body": "Make sure to keep tests and suites separated by empty lines.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "193825440", + "pr_number": 2074, + "pr_file": "lib/event_listeners.js", + "created_at": "2018-06-07T17:16:08+00:00", + "commented_code": "});\n\n httpResp.on('end', function onEnd() {\n if (!stream || !stream.didCallback) {\n if ((AWS.HttpClient.streamsApiVersion !== 2) ||\n // don't concat response chunks when using event streams with streams unless response is unsuccessful\n (!(operation.hasEventOutput && service.successfulResponse(resp)) &&\n (!stream || !stream.didCallback))", + "repo_full_name": "aws/aws-sdk-js", + "discussion_comments": [ + { + "comment_id": "193825440", + "repo_full_name": "aws/aws-sdk-js", + "pr_number": 2074, + "pr_file": "lib/event_listeners.js", + "discussion_id": "193825440", + "commented_code": "@@ -264,7 +277,11 @@ AWS.EventListeners = {\n });\n \n httpResp.on('end', function onEnd() {\n- if (!stream || !stream.didCallback) {\n+ if ((AWS.HttpClient.streamsApiVersion !== 2) ||\n+ // don't concat response chunks when using event streams with streams unless response is unsuccessful\n+ (!(operation.hasEventOutput && service.successfulResponse(resp)) &&\n+ (!stream || !stream.didCallback))", + "comment_created_at": "2018-06-07T17:16:08+00:00", + "comment_author": "AllanZhengYP", + "comment_body": "There's no issue here but a little hard to follow. Will using another if statement be more readable? like this:\r\n\r\n```javascript\r\nif (!stream || !stream.didCallback) {\r\n//don't concat response chunks when using event streams unless response is unsuccessful\r\n if ((AWS.HttpClient.streamsApiVersion === 2) && operation.hasEventOutput && service.successfulResponse(resp) ) {\r\n return\r\n }\r\n resp.request.emit('httpDone');\r\n done();\r\n}\r\n```\r\nI think it's more readable(?) It's your call.", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/aws-sdk-js-maintain-consistent-formatting.md b/_reviewers/aws-sdk-js-maintain-consistent-formatting.md index 54585f1..9c7c769 100644 --- a/_reviewers/aws-sdk-js-maintain-consistent-formatting.md +++ b/_reviewers/aws-sdk-js-maintain-consistent-formatting.md @@ -38,95 +38,3 @@ if (!stream || !stream.didCallback) { ``` Consistent formatting makes code easier to scan, understand, and maintain, reducing the cognitive load for everyone working with the codebase. - - -[ - { - "discussion_id": "131797509", - "pr_number": 1662, - "pr_file": "scripts/lib/ts-generator.js", - "created_at": "2017-08-08T00:36:36+00:00", - "commented_code": "code += tabs(tabCount) + 'export type ' + shapeKey + ' = Date;\\n';\n } else if (type === 'boolean') {\n code += tabs(tabCount) + 'export type ' + shapeKey + ' = boolean;\\n';\n } else if (type === 'blob' || type === 'binary') {\n code += tabs(tabCount) + 'export type ' + shapeKey + ' = Buffer|Uint8Array|Blob|string;\\n';\n } else if (type === 'blob' || type === 'binary') { \n code += tabs(tabCount) + 'export type ' + shapeKey + ' = Buffer|Uint8Array|Blob|string'\n + self.addReadableType(shapeKey)", - "repo_full_name": "aws/aws-sdk-js", - "discussion_comments": [ - { - "comment_id": "131797509", - "repo_full_name": "aws/aws-sdk-js", - "pr_number": 1662, - "pr_file": "scripts/lib/ts-generator.js", - "discussion_id": "131797509", - "commented_code": "@@ -300,8 +333,10 @@ TSGenerator.prototype.generateTypingsFromShape = function generateTypingsFromSha\n code += tabs(tabCount) + 'export type ' + shapeKey + ' = Date;\\n';\n } else if (type === 'boolean') {\n code += tabs(tabCount) + 'export type ' + shapeKey + ' = boolean;\\n';\n- } else if (type === 'blob' || type === 'binary') {\n- code += tabs(tabCount) + 'export type ' + shapeKey + ' = Buffer|Uint8Array|Blob|string;\\n';\n+ } else if (type === 'blob' || type === 'binary') { \n+ code += tabs(tabCount) + 'export type ' + shapeKey + ' = Buffer|Uint8Array|Blob|string'\n+ + self.addReadableType(shapeKey)", - "comment_created_at": "2017-08-08T00:36:36+00:00", - "comment_author": "jeskew", - "comment_body": "Style nit: continuation lines (lines that are part of the same statement as the preceding line) should be indented.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "138405077", - "pr_number": 1716, - "pr_file": "test/service.spec.js", - "created_at": "2017-09-12T16:46:44+00:00", - "commented_code": "});\n });\n });\n return describe('customizeRequests', function() {\n describe('customizeRequests', function() {", - "repo_full_name": "aws/aws-sdk-js", - "discussion_comments": [ - { - "comment_id": "138405077", - "repo_full_name": "aws/aws-sdk-js", - "pr_number": 1716, - "pr_file": "test/service.spec.js", - "discussion_id": "138405077", - "commented_code": "@@ -718,7 +728,7 @@\n });\n });\n });\n- return describe('customizeRequests', function() {\n+ describe('customizeRequests', function() {", - "comment_created_at": "2017-09-12T16:46:44+00:00", - "comment_author": "jeskew", - "comment_body": "Ditto re: empty lines. One should be between each test suite, too.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "147299241", - "pr_number": 1782, - "pr_file": "test/event_listeners.spec.js", - "created_at": "2017-10-27T00:17:07+00:00", - "commented_code": "return expect(data).to.match(match);\n });\n });\n describe('logging sensitive information', function() {", - "repo_full_name": "aws/aws-sdk-js", - "discussion_comments": [ - { - "comment_id": "147299241", - "repo_full_name": "aws/aws-sdk-js", - "pr_number": 1782, - "pr_file": "test/event_listeners.spec.js", - "discussion_id": "147299241", - "commented_code": "@@ -801,6 +802,156 @@\n return expect(data).to.match(match);\n });\n });\n+ describe('logging sensitive information', function() {", - "comment_created_at": "2017-10-27T00:17:07+00:00", - "comment_author": "jeskew", - "comment_body": "Make sure to keep tests and suites separated by empty lines.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "193825440", - "pr_number": 2074, - "pr_file": "lib/event_listeners.js", - "created_at": "2018-06-07T17:16:08+00:00", - "commented_code": "});\n\n httpResp.on('end', function onEnd() {\n if (!stream || !stream.didCallback) {\n if ((AWS.HttpClient.streamsApiVersion !== 2) ||\n // don't concat response chunks when using event streams with streams unless response is unsuccessful\n (!(operation.hasEventOutput && service.successfulResponse(resp)) &&\n (!stream || !stream.didCallback))", - "repo_full_name": "aws/aws-sdk-js", - "discussion_comments": [ - { - "comment_id": "193825440", - "repo_full_name": "aws/aws-sdk-js", - "pr_number": 2074, - "pr_file": "lib/event_listeners.js", - "discussion_id": "193825440", - "commented_code": "@@ -264,7 +277,11 @@ AWS.EventListeners = {\n });\n \n httpResp.on('end', function onEnd() {\n- if (!stream || !stream.didCallback) {\n+ if ((AWS.HttpClient.streamsApiVersion !== 2) ||\n+ // don't concat response chunks when using event streams with streams unless response is unsuccessful\n+ (!(operation.hasEventOutput && service.successfulResponse(resp)) &&\n+ (!stream || !stream.didCallback))", - "comment_created_at": "2018-06-07T17:16:08+00:00", - "comment_author": "AllanZhengYP", - "comment_body": "There's no issue here but a little hard to follow. Will using another if statement be more readable? like this:\r\n\r\n```javascript\r\nif (!stream || !stream.didCallback) {\r\n//don't concat response chunks when using event streams unless response is unsuccessful\r\n if ((AWS.HttpClient.streamsApiVersion === 2) && operation.hasEventOutput && service.successfulResponse(resp) ) {\r\n return\r\n }\r\n resp.request.emit('httpDone');\r\n done();\r\n}\r\n```\r\nI think it's more readable(?) It's your call.", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/aws-sdk-js-organize-type-declarations.json b/_reviewers/aws-sdk-js-organize-type-declarations.json new file mode 100644 index 0000000..5ff93d9 --- /dev/null +++ b/_reviewers/aws-sdk-js-organize-type-declarations.json @@ -0,0 +1,82 @@ +[ + { + "discussion_id": "84805047", + "pr_number": 1189, + "pr_file": "clients/acm.d.ts", + "created_at": "2016-10-24T23:28:51+00:00", + "commented_code": "import {Request} from '../lib/request';\nimport {Response} from '../lib/response';\nimport {AWSError} from '../lib/error';\nimport {Service} from '../lib/service';\ndeclare class ACM extends Service {\n /**\n * Adds one or more tags to an ACM Certificate. Tags are labels that you can use to identify and organize your AWS resources. Each tag consists of a key and an optional value. You specify the certificate on input by its Amazon Resource Name (ARN). You specify the tag by using a key-value pair. You can apply a tag to just one certificate if you want to identify a specific characteristic of that certificate, or you can apply the same tag to multiple certificates if you want to filter for a common relationship among those certificates. Similarly, you can apply the same tag to multiple resources if you want to specify a relationship among those resources. For example, you can add the same tag to an ACM Certificate and an Elastic Load Balancing load balancer to indicate that they are both used by the same website. For more information, see Tagging ACM Certificates. To remove one or more tags, use the RemoveTagsFromCertificate action. To view all of the tags that have been applied to the certificate, use the ListTagsForCertificate action.\n */\n addTagsToCertificate(params: ACM.AddTagsToCertificateRequest, callback?: (err: AWSError, data: {}) => void): Request<{}, AWSError>;\n /**\n * Adds one or more tags to an ACM Certificate. Tags are labels that you can use to identify and organize your AWS resources. Each tag consists of a key and an optional value. You specify the certificate on input by its Amazon Resource Name (ARN). You specify the tag by using a key-value pair. You can apply a tag to just one certificate if you want to identify a specific characteristic of that certificate, or you can apply the same tag to multiple certificates if you want to filter for a common relationship among those certificates. Similarly, you can apply the same tag to multiple resources if you want to specify a relationship among those resources. For example, you can add the same tag to an ACM Certificate and an Elastic Load Balancing load balancer to indicate that they are both used by the same website. For more information, see Tagging ACM Certificates. To remove one or more tags, use the RemoveTagsFromCertificate action. To view all of the tags that have been applied to the certificate, use the ListTagsForCertificate action.\n */\n addTagsToCertificate(callback?: (err: AWSError, data: {}) => void): Request<{}, AWSError>;\n /**\n * Deletes an ACM Certificate and its associated private key. If this action succeeds, the certificate no longer appears in the list of ACM Certificates that can be displayed by calling the ListCertificates action or be retrieved by calling the GetCertificate action. The certificate will not be available for use by other AWS services. You cannot delete an ACM Certificate that is being used by another AWS service. To delete a certificate that is in use, the certificate association must first be removed. \n */\n deleteCertificate(params: ACM.DeleteCertificateRequest, callback?: (err: AWSError, data: {}) => void): Request<{}, AWSError>;\n /**\n * Deletes an ACM Certificate and its associated private key. If this action succeeds, the certificate no longer appears in the list of ACM Certificates that can be displayed by calling the ListCertificates action or be retrieved by calling the GetCertificate action. The certificate will not be available for use by other AWS services. You cannot delete an ACM Certificate that is being used by another AWS service. To delete a certificate that is in use, the certificate association must first be removed. \n */\n deleteCertificate(callback?: (err: AWSError, data: {}) => void): Request<{}, AWSError>;\n /**\n * Returns a list of the fields contained in the specified ACM Certificate. For example, this action returns the certificate status, a flag that indicates whether the certificate is associated with any other AWS service, and the date at which the certificate request was created. You specify the ACM Certificate on input by its Amazon Resource Name (ARN).\n */\n describeCertificate(params: ACM.DescribeCertificateRequest, callback?: (err: AWSError, data: ACM.DescribeCertificateResponse) => void): Request;\n /**\n * Returns a list of the fields contained in the specified ACM Certificate. For example, this action returns the certificate status, a flag that indicates whether the certificate is associated with any other AWS service, and the date at which the certificate request was created. You specify the ACM Certificate on input by its Amazon Resource Name (ARN).\n */\n describeCertificate(callback?: (err: AWSError, data: ACM.DescribeCertificateResponse) => void): Request;\n /**\n * Retrieves an ACM Certificate and certificate chain for the certificate specified by an ARN. The chain is an ordered list of certificates that contains the root certificate, intermediate certificates of subordinate CAs, and the ACM Certificate. The certificate and certificate chain are base64 encoded. If you want to decode the certificate chain to see the individual certificate fields, you can use OpenSSL. Currently, ACM Certificates can be used only with Elastic Load Balancing and Amazon CloudFront. \n */\n getCertificate(params: ACM.GetCertificateRequest, callback?: (err: AWSError, data: ACM.GetCertificateResponse) => void): Request;\n /**\n * Retrieves an ACM Certificate and certificate chain for the certificate specified by an ARN. The chain is an ordered list of certificates that contains the root certificate, intermediate certificates of subordinate CAs, and the ACM Certificate. The certificate and certificate chain are base64 encoded. If you want to decode the certificate chain to see the individual certificate fields, you can use OpenSSL. Currently, ACM Certificates can be used only with Elastic Load Balancing and Amazon CloudFront. \n */\n getCertificate(callback?: (err: AWSError, data: ACM.GetCertificateResponse) => void): Request;\n /**\n * Retrieves a list of ACM Certificates and the domain name for each. You can optionally filter the list to return only the certificates that match the specified status.\n */\n listCertificates(params: ACM.ListCertificatesRequest, callback?: (err: AWSError, data: ACM.ListCertificatesResponse) => void): Request;\n /**\n * Retrieves a list of ACM Certificates and the domain name for each. You can optionally filter the list to return only the certificates that match the specified status.\n */\n listCertificates(callback?: (err: AWSError, data: ACM.ListCertificatesResponse) => void): Request;\n /**\n * Lists the tags that have been applied to the ACM Certificate. Use the certificate ARN to specify the certificate. To add a tag to an ACM Certificate, use the AddTagsToCertificate action. To delete a tag, use the RemoveTagsFromCertificate action.\n */\n listTagsForCertificate(params: ACM.ListTagsForCertificateRequest, callback?: (err: AWSError, data: ACM.ListTagsForCertificateResponse) => void): Request;\n /**\n * Lists the tags that have been applied to the ACM Certificate. Use the certificate ARN to specify the certificate. To add a tag to an ACM Certificate, use the AddTagsToCertificate action. To delete a tag, use the RemoveTagsFromCertificate action.\n */\n listTagsForCertificate(callback?: (err: AWSError, data: ACM.ListTagsForCertificateResponse) => void): Request;\n /**\n * Remove one or more tags from an ACM Certificate. A tag consists of a key-value pair. If you do not specify the value portion of the tag when calling this function, the tag will be removed regardless of value. If you specify a value, the tag is removed only if it is associated with the specified value. To add tags to a certificate, use the AddTagsToCertificate action. To view all of the tags that have been applied to a specific ACM Certificate, use the ListTagsForCertificate action.\n */\n removeTagsFromCertificate(params: ACM.RemoveTagsFromCertificateRequest, callback?: (err: AWSError, data: {}) => void): Request<{}, AWSError>;\n /**\n * Remove one or more tags from an ACM Certificate. A tag consists of a key-value pair. If you do not specify the value portion of the tag when calling this function, the tag will be removed regardless of value. If you specify a value, the tag is removed only if it is associated with the specified value. To add tags to a certificate, use the AddTagsToCertificate action. To view all of the tags that have been applied to a specific ACM Certificate, use the ListTagsForCertificate action.\n */\n removeTagsFromCertificate(callback?: (err: AWSError, data: {}) => void): Request<{}, AWSError>;\n /**\n * Requests an ACM Certificate for use with other AWS services. To request an ACM Certificate, you must specify the fully qualified domain name (FQDN) for your site. You can also specify additional FQDNs if users can reach your site by using other names. For each domain name you specify, email is sent to the domain owner to request approval to issue the certificate. After receiving approval from the domain owner, the ACM Certificate is issued. For more information, see the AWS Certificate Manager User Guide .\n */\n requestCertificate(params: ACM.RequestCertificateRequest, callback?: (err: AWSError, data: ACM.RequestCertificateResponse) => void): Request;\n /**\n * Requests an ACM Certificate for use with other AWS services. To request an ACM Certificate, you must specify the fully qualified domain name (FQDN) for your site. You can also specify additional FQDNs if users can reach your site by using other names. For each domain name you specify, email is sent to the domain owner to request approval to issue the certificate. After receiving approval from the domain owner, the ACM Certificate is issued. For more information, see the AWS Certificate Manager User Guide .\n */\n requestCertificate(callback?: (err: AWSError, data: ACM.RequestCertificateResponse) => void): Request;\n /**\n * Resends the email that requests domain ownership validation. The domain owner or an authorized representative must approve the ACM Certificate before it can be issued. The certificate can be approved by clicking a link in the mail to navigate to the Amazon certificate approval website and then clicking I Approve. However, the validation email can be blocked by spam filters. Therefore, if you do not receive the original mail, you can request that the mail be resent within 72 hours of requesting the ACM Certificate. If more than 72 hours have elapsed since your original request or since your last attempt to resend validation mail, you must request a new certificate.\n */\n resendValidationEmail(params: ACM.ResendValidationEmailRequest, callback?: (err: AWSError, data: {}) => void): Request<{}, AWSError>;\n /**\n * Resends the email that requests domain ownership validation. The domain owner or an authorized representative must approve the ACM Certificate before it can be issued. The certificate can be approved by clicking a link in the mail to navigate to the Amazon certificate approval website and then clicking I Approve. However, the validation email can be blocked by spam filters. Therefore, if you do not receive the original mail, you can request that the mail be resent within 72 hours of requesting the ACM Certificate. If more than 72 hours have elapsed since your original request or since your last attempt to resend validation mail, you must request a new certificate.\n */\n resendValidationEmail(callback?: (err: AWSError, data: {}) => void): Request<{}, AWSError>;\n}\ndeclare namespace ACM {", + "repo_full_name": "aws/aws-sdk-js", + "discussion_comments": [ + { + "comment_id": "84805047", + "repo_full_name": "aws/aws-sdk-js", + "pr_number": 1189, + "pr_file": "clients/acm.d.ts", + "discussion_id": "84805047", + "commented_code": "@@ -0,0 +1,350 @@\n+import {Request} from '../lib/request';\n+import {Response} from '../lib/response';\n+import {AWSError} from '../lib/error';\n+import {Service} from '../lib/service';\n+declare class ACM extends Service {\n+ /**\n+ * Adds one or more tags to an ACM Certificate. Tags are labels that you can use to identify and organize your AWS resources. Each tag consists of a key and an optional value. You specify the certificate on input by its Amazon Resource Name (ARN). You specify the tag by using a key-value pair. You can apply a tag to just one certificate if you want to identify a specific characteristic of that certificate, or you can apply the same tag to multiple certificates if you want to filter for a common relationship among those certificates. Similarly, you can apply the same tag to multiple resources if you want to specify a relationship among those resources. For example, you can add the same tag to an ACM Certificate and an Elastic Load Balancing load balancer to indicate that they are both used by the same website. For more information, see Tagging ACM Certificates. To remove one or more tags, use the RemoveTagsFromCertificate action. To view all of the tags that have been applied to the certificate, use the ListTagsForCertificate action.\n+ */\n+ addTagsToCertificate(params: ACM.AddTagsToCertificateRequest, callback?: (err: AWSError, data: {}) => void): Request<{}, AWSError>;\n+ /**\n+ * Adds one or more tags to an ACM Certificate. Tags are labels that you can use to identify and organize your AWS resources. Each tag consists of a key and an optional value. You specify the certificate on input by its Amazon Resource Name (ARN). You specify the tag by using a key-value pair. You can apply a tag to just one certificate if you want to identify a specific characteristic of that certificate, or you can apply the same tag to multiple certificates if you want to filter for a common relationship among those certificates. Similarly, you can apply the same tag to multiple resources if you want to specify a relationship among those resources. For example, you can add the same tag to an ACM Certificate and an Elastic Load Balancing load balancer to indicate that they are both used by the same website. For more information, see Tagging ACM Certificates. To remove one or more tags, use the RemoveTagsFromCertificate action. To view all of the tags that have been applied to the certificate, use the ListTagsForCertificate action.\n+ */\n+ addTagsToCertificate(callback?: (err: AWSError, data: {}) => void): Request<{}, AWSError>;\n+ /**\n+ * Deletes an ACM Certificate and its associated private key. If this action succeeds, the certificate no longer appears in the list of ACM Certificates that can be displayed by calling the ListCertificates action or be retrieved by calling the GetCertificate action. The certificate will not be available for use by other AWS services. You cannot delete an ACM Certificate that is being used by another AWS service. To delete a certificate that is in use, the certificate association must first be removed. \n+ */\n+ deleteCertificate(params: ACM.DeleteCertificateRequest, callback?: (err: AWSError, data: {}) => void): Request<{}, AWSError>;\n+ /**\n+ * Deletes an ACM Certificate and its associated private key. If this action succeeds, the certificate no longer appears in the list of ACM Certificates that can be displayed by calling the ListCertificates action or be retrieved by calling the GetCertificate action. The certificate will not be available for use by other AWS services. You cannot delete an ACM Certificate that is being used by another AWS service. To delete a certificate that is in use, the certificate association must first be removed. \n+ */\n+ deleteCertificate(callback?: (err: AWSError, data: {}) => void): Request<{}, AWSError>;\n+ /**\n+ * Returns a list of the fields contained in the specified ACM Certificate. For example, this action returns the certificate status, a flag that indicates whether the certificate is associated with any other AWS service, and the date at which the certificate request was created. You specify the ACM Certificate on input by its Amazon Resource Name (ARN).\n+ */\n+ describeCertificate(params: ACM.DescribeCertificateRequest, callback?: (err: AWSError, data: ACM.DescribeCertificateResponse) => void): Request;\n+ /**\n+ * Returns a list of the fields contained in the specified ACM Certificate. For example, this action returns the certificate status, a flag that indicates whether the certificate is associated with any other AWS service, and the date at which the certificate request was created. You specify the ACM Certificate on input by its Amazon Resource Name (ARN).\n+ */\n+ describeCertificate(callback?: (err: AWSError, data: ACM.DescribeCertificateResponse) => void): Request;\n+ /**\n+ * Retrieves an ACM Certificate and certificate chain for the certificate specified by an ARN. The chain is an ordered list of certificates that contains the root certificate, intermediate certificates of subordinate CAs, and the ACM Certificate. The certificate and certificate chain are base64 encoded. If you want to decode the certificate chain to see the individual certificate fields, you can use OpenSSL. Currently, ACM Certificates can be used only with Elastic Load Balancing and Amazon CloudFront. \n+ */\n+ getCertificate(params: ACM.GetCertificateRequest, callback?: (err: AWSError, data: ACM.GetCertificateResponse) => void): Request;\n+ /**\n+ * Retrieves an ACM Certificate and certificate chain for the certificate specified by an ARN. The chain is an ordered list of certificates that contains the root certificate, intermediate certificates of subordinate CAs, and the ACM Certificate. The certificate and certificate chain are base64 encoded. If you want to decode the certificate chain to see the individual certificate fields, you can use OpenSSL. Currently, ACM Certificates can be used only with Elastic Load Balancing and Amazon CloudFront. \n+ */\n+ getCertificate(callback?: (err: AWSError, data: ACM.GetCertificateResponse) => void): Request;\n+ /**\n+ * Retrieves a list of ACM Certificates and the domain name for each. You can optionally filter the list to return only the certificates that match the specified status.\n+ */\n+ listCertificates(params: ACM.ListCertificatesRequest, callback?: (err: AWSError, data: ACM.ListCertificatesResponse) => void): Request;\n+ /**\n+ * Retrieves a list of ACM Certificates and the domain name for each. You can optionally filter the list to return only the certificates that match the specified status.\n+ */\n+ listCertificates(callback?: (err: AWSError, data: ACM.ListCertificatesResponse) => void): Request;\n+ /**\n+ * Lists the tags that have been applied to the ACM Certificate. Use the certificate ARN to specify the certificate. To add a tag to an ACM Certificate, use the AddTagsToCertificate action. To delete a tag, use the RemoveTagsFromCertificate action.\n+ */\n+ listTagsForCertificate(params: ACM.ListTagsForCertificateRequest, callback?: (err: AWSError, data: ACM.ListTagsForCertificateResponse) => void): Request;\n+ /**\n+ * Lists the tags that have been applied to the ACM Certificate. Use the certificate ARN to specify the certificate. To add a tag to an ACM Certificate, use the AddTagsToCertificate action. To delete a tag, use the RemoveTagsFromCertificate action.\n+ */\n+ listTagsForCertificate(callback?: (err: AWSError, data: ACM.ListTagsForCertificateResponse) => void): Request;\n+ /**\n+ * Remove one or more tags from an ACM Certificate. A tag consists of a key-value pair. If you do not specify the value portion of the tag when calling this function, the tag will be removed regardless of value. If you specify a value, the tag is removed only if it is associated with the specified value. To add tags to a certificate, use the AddTagsToCertificate action. To view all of the tags that have been applied to a specific ACM Certificate, use the ListTagsForCertificate action.\n+ */\n+ removeTagsFromCertificate(params: ACM.RemoveTagsFromCertificateRequest, callback?: (err: AWSError, data: {}) => void): Request<{}, AWSError>;\n+ /**\n+ * Remove one or more tags from an ACM Certificate. A tag consists of a key-value pair. If you do not specify the value portion of the tag when calling this function, the tag will be removed regardless of value. If you specify a value, the tag is removed only if it is associated with the specified value. To add tags to a certificate, use the AddTagsToCertificate action. To view all of the tags that have been applied to a specific ACM Certificate, use the ListTagsForCertificate action.\n+ */\n+ removeTagsFromCertificate(callback?: (err: AWSError, data: {}) => void): Request<{}, AWSError>;\n+ /**\n+ * Requests an ACM Certificate for use with other AWS services. To request an ACM Certificate, you must specify the fully qualified domain name (FQDN) for your site. You can also specify additional FQDNs if users can reach your site by using other names. For each domain name you specify, email is sent to the domain owner to request approval to issue the certificate. After receiving approval from the domain owner, the ACM Certificate is issued. For more information, see the AWS Certificate Manager User Guide .\n+ */\n+ requestCertificate(params: ACM.RequestCertificateRequest, callback?: (err: AWSError, data: ACM.RequestCertificateResponse) => void): Request;\n+ /**\n+ * Requests an ACM Certificate for use with other AWS services. To request an ACM Certificate, you must specify the fully qualified domain name (FQDN) for your site. You can also specify additional FQDNs if users can reach your site by using other names. For each domain name you specify, email is sent to the domain owner to request approval to issue the certificate. After receiving approval from the domain owner, the ACM Certificate is issued. For more information, see the AWS Certificate Manager User Guide .\n+ */\n+ requestCertificate(callback?: (err: AWSError, data: ACM.RequestCertificateResponse) => void): Request;\n+ /**\n+ * Resends the email that requests domain ownership validation. The domain owner or an authorized representative must approve the ACM Certificate before it can be issued. The certificate can be approved by clicking a link in the mail to navigate to the Amazon certificate approval website and then clicking I Approve. However, the validation email can be blocked by spam filters. Therefore, if you do not receive the original mail, you can request that the mail be resent within 72 hours of requesting the ACM Certificate. If more than 72 hours have elapsed since your original request or since your last attempt to resend validation mail, you must request a new certificate.\n+ */\n+ resendValidationEmail(params: ACM.ResendValidationEmailRequest, callback?: (err: AWSError, data: {}) => void): Request<{}, AWSError>;\n+ /**\n+ * Resends the email that requests domain ownership validation. The domain owner or an authorized representative must approve the ACM Certificate before it can be issued. The certificate can be approved by clicking a link in the mail to navigate to the Amazon certificate approval website and then clicking I Approve. However, the validation email can be blocked by spam filters. Therefore, if you do not receive the original mail, you can request that the mail be resent within 72 hours of requesting the ACM Certificate. If more than 72 hours have elapsed since your original request or since your last attempt to resend validation mail, you must request a new certificate.\n+ */\n+ resendValidationEmail(callback?: (err: AWSError, data: {}) => void): Request<{}, AWSError>;\n+}\n+declare namespace ACM {", + "comment_created_at": "2016-10-24T23:28:51+00:00", + "comment_author": "LiuJoyceC", + "comment_body": "Is there a reason the service model's shapes need to be on this namespace? When I type `AWS.ACM.`, I don't think my code completion tool should try to complete `AddTagsToCertificateRequest` or `CertificateBody`, since these aren't actually valid properties on `AWS.ACM`. Instead, this namespace should contain `apiVersions`, `serviceIdentifier`, and `services` (unless these are private, in which case just don't have an `ACM` namespace). The shapes can live in a separate namespace that isn't exported, and you just have to change the references above. I've tested this by changing this namespace name to be `Shapes` (and changing references above), and I still get the correct code completion suggestions when I'm creating my `params` in a request, and I no longer get shape name suggestions when I type `AWS.ACM.`.\n", + "pr_file_module": null + }, + { + "comment_id": "84809757", + "repo_full_name": "aws/aws-sdk-js", + "pr_number": 1189, + "pr_file": "clients/acm.d.ts", + "discussion_id": "84805047", + "commented_code": "@@ -0,0 +1,350 @@\n+import {Request} from '../lib/request';\n+import {Response} from '../lib/response';\n+import {AWSError} from '../lib/error';\n+import {Service} from '../lib/service';\n+declare class ACM extends Service {\n+ /**\n+ * Adds one or more tags to an ACM Certificate. Tags are labels that you can use to identify and organize your AWS resources. Each tag consists of a key and an optional value. You specify the certificate on input by its Amazon Resource Name (ARN). You specify the tag by using a key-value pair. You can apply a tag to just one certificate if you want to identify a specific characteristic of that certificate, or you can apply the same tag to multiple certificates if you want to filter for a common relationship among those certificates. Similarly, you can apply the same tag to multiple resources if you want to specify a relationship among those resources. For example, you can add the same tag to an ACM Certificate and an Elastic Load Balancing load balancer to indicate that they are both used by the same website. For more information, see Tagging ACM Certificates. To remove one or more tags, use the RemoveTagsFromCertificate action. To view all of the tags that have been applied to the certificate, use the ListTagsForCertificate action.\n+ */\n+ addTagsToCertificate(params: ACM.AddTagsToCertificateRequest, callback?: (err: AWSError, data: {}) => void): Request<{}, AWSError>;\n+ /**\n+ * Adds one or more tags to an ACM Certificate. Tags are labels that you can use to identify and organize your AWS resources. Each tag consists of a key and an optional value. You specify the certificate on input by its Amazon Resource Name (ARN). You specify the tag by using a key-value pair. You can apply a tag to just one certificate if you want to identify a specific characteristic of that certificate, or you can apply the same tag to multiple certificates if you want to filter for a common relationship among those certificates. Similarly, you can apply the same tag to multiple resources if you want to specify a relationship among those resources. For example, you can add the same tag to an ACM Certificate and an Elastic Load Balancing load balancer to indicate that they are both used by the same website. For more information, see Tagging ACM Certificates. To remove one or more tags, use the RemoveTagsFromCertificate action. To view all of the tags that have been applied to the certificate, use the ListTagsForCertificate action.\n+ */\n+ addTagsToCertificate(callback?: (err: AWSError, data: {}) => void): Request<{}, AWSError>;\n+ /**\n+ * Deletes an ACM Certificate and its associated private key. If this action succeeds, the certificate no longer appears in the list of ACM Certificates that can be displayed by calling the ListCertificates action or be retrieved by calling the GetCertificate action. The certificate will not be available for use by other AWS services. You cannot delete an ACM Certificate that is being used by another AWS service. To delete a certificate that is in use, the certificate association must first be removed. \n+ */\n+ deleteCertificate(params: ACM.DeleteCertificateRequest, callback?: (err: AWSError, data: {}) => void): Request<{}, AWSError>;\n+ /**\n+ * Deletes an ACM Certificate and its associated private key. If this action succeeds, the certificate no longer appears in the list of ACM Certificates that can be displayed by calling the ListCertificates action or be retrieved by calling the GetCertificate action. The certificate will not be available for use by other AWS services. You cannot delete an ACM Certificate that is being used by another AWS service. To delete a certificate that is in use, the certificate association must first be removed. \n+ */\n+ deleteCertificate(callback?: (err: AWSError, data: {}) => void): Request<{}, AWSError>;\n+ /**\n+ * Returns a list of the fields contained in the specified ACM Certificate. For example, this action returns the certificate status, a flag that indicates whether the certificate is associated with any other AWS service, and the date at which the certificate request was created. You specify the ACM Certificate on input by its Amazon Resource Name (ARN).\n+ */\n+ describeCertificate(params: ACM.DescribeCertificateRequest, callback?: (err: AWSError, data: ACM.DescribeCertificateResponse) => void): Request;\n+ /**\n+ * Returns a list of the fields contained in the specified ACM Certificate. For example, this action returns the certificate status, a flag that indicates whether the certificate is associated with any other AWS service, and the date at which the certificate request was created. You specify the ACM Certificate on input by its Amazon Resource Name (ARN).\n+ */\n+ describeCertificate(callback?: (err: AWSError, data: ACM.DescribeCertificateResponse) => void): Request;\n+ /**\n+ * Retrieves an ACM Certificate and certificate chain for the certificate specified by an ARN. The chain is an ordered list of certificates that contains the root certificate, intermediate certificates of subordinate CAs, and the ACM Certificate. The certificate and certificate chain are base64 encoded. If you want to decode the certificate chain to see the individual certificate fields, you can use OpenSSL. Currently, ACM Certificates can be used only with Elastic Load Balancing and Amazon CloudFront. \n+ */\n+ getCertificate(params: ACM.GetCertificateRequest, callback?: (err: AWSError, data: ACM.GetCertificateResponse) => void): Request;\n+ /**\n+ * Retrieves an ACM Certificate and certificate chain for the certificate specified by an ARN. The chain is an ordered list of certificates that contains the root certificate, intermediate certificates of subordinate CAs, and the ACM Certificate. The certificate and certificate chain are base64 encoded. If you want to decode the certificate chain to see the individual certificate fields, you can use OpenSSL. Currently, ACM Certificates can be used only with Elastic Load Balancing and Amazon CloudFront. \n+ */\n+ getCertificate(callback?: (err: AWSError, data: ACM.GetCertificateResponse) => void): Request;\n+ /**\n+ * Retrieves a list of ACM Certificates and the domain name for each. You can optionally filter the list to return only the certificates that match the specified status.\n+ */\n+ listCertificates(params: ACM.ListCertificatesRequest, callback?: (err: AWSError, data: ACM.ListCertificatesResponse) => void): Request;\n+ /**\n+ * Retrieves a list of ACM Certificates and the domain name for each. You can optionally filter the list to return only the certificates that match the specified status.\n+ */\n+ listCertificates(callback?: (err: AWSError, data: ACM.ListCertificatesResponse) => void): Request;\n+ /**\n+ * Lists the tags that have been applied to the ACM Certificate. Use the certificate ARN to specify the certificate. To add a tag to an ACM Certificate, use the AddTagsToCertificate action. To delete a tag, use the RemoveTagsFromCertificate action.\n+ */\n+ listTagsForCertificate(params: ACM.ListTagsForCertificateRequest, callback?: (err: AWSError, data: ACM.ListTagsForCertificateResponse) => void): Request;\n+ /**\n+ * Lists the tags that have been applied to the ACM Certificate. Use the certificate ARN to specify the certificate. To add a tag to an ACM Certificate, use the AddTagsToCertificate action. To delete a tag, use the RemoveTagsFromCertificate action.\n+ */\n+ listTagsForCertificate(callback?: (err: AWSError, data: ACM.ListTagsForCertificateResponse) => void): Request;\n+ /**\n+ * Remove one or more tags from an ACM Certificate. A tag consists of a key-value pair. If you do not specify the value portion of the tag when calling this function, the tag will be removed regardless of value. If you specify a value, the tag is removed only if it is associated with the specified value. To add tags to a certificate, use the AddTagsToCertificate action. To view all of the tags that have been applied to a specific ACM Certificate, use the ListTagsForCertificate action.\n+ */\n+ removeTagsFromCertificate(params: ACM.RemoveTagsFromCertificateRequest, callback?: (err: AWSError, data: {}) => void): Request<{}, AWSError>;\n+ /**\n+ * Remove one or more tags from an ACM Certificate. A tag consists of a key-value pair. If you do not specify the value portion of the tag when calling this function, the tag will be removed regardless of value. If you specify a value, the tag is removed only if it is associated with the specified value. To add tags to a certificate, use the AddTagsToCertificate action. To view all of the tags that have been applied to a specific ACM Certificate, use the ListTagsForCertificate action.\n+ */\n+ removeTagsFromCertificate(callback?: (err: AWSError, data: {}) => void): Request<{}, AWSError>;\n+ /**\n+ * Requests an ACM Certificate for use with other AWS services. To request an ACM Certificate, you must specify the fully qualified domain name (FQDN) for your site. You can also specify additional FQDNs if users can reach your site by using other names. For each domain name you specify, email is sent to the domain owner to request approval to issue the certificate. After receiving approval from the domain owner, the ACM Certificate is issued. For more information, see the AWS Certificate Manager User Guide .\n+ */\n+ requestCertificate(params: ACM.RequestCertificateRequest, callback?: (err: AWSError, data: ACM.RequestCertificateResponse) => void): Request;\n+ /**\n+ * Requests an ACM Certificate for use with other AWS services. To request an ACM Certificate, you must specify the fully qualified domain name (FQDN) for your site. You can also specify additional FQDNs if users can reach your site by using other names. For each domain name you specify, email is sent to the domain owner to request approval to issue the certificate. After receiving approval from the domain owner, the ACM Certificate is issued. For more information, see the AWS Certificate Manager User Guide .\n+ */\n+ requestCertificate(callback?: (err: AWSError, data: ACM.RequestCertificateResponse) => void): Request;\n+ /**\n+ * Resends the email that requests domain ownership validation. The domain owner or an authorized representative must approve the ACM Certificate before it can be issued. The certificate can be approved by clicking a link in the mail to navigate to the Amazon certificate approval website and then clicking I Approve. However, the validation email can be blocked by spam filters. Therefore, if you do not receive the original mail, you can request that the mail be resent within 72 hours of requesting the ACM Certificate. If more than 72 hours have elapsed since your original request or since your last attempt to resend validation mail, you must request a new certificate.\n+ */\n+ resendValidationEmail(params: ACM.ResendValidationEmailRequest, callback?: (err: AWSError, data: {}) => void): Request<{}, AWSError>;\n+ /**\n+ * Resends the email that requests domain ownership validation. The domain owner or an authorized representative must approve the ACM Certificate before it can be issued. The certificate can be approved by clicking a link in the mail to navigate to the Amazon certificate approval website and then clicking I Approve. However, the validation email can be blocked by spam filters. Therefore, if you do not receive the original mail, you can request that the mail be resent within 72 hours of requesting the ACM Certificate. If more than 72 hours have elapsed since your original request or since your last attempt to resend validation mail, you must request a new certificate.\n+ */\n+ resendValidationEmail(callback?: (err: AWSError, data: {}) => void): Request<{}, AWSError>;\n+}\n+declare namespace ACM {", + "comment_created_at": "2016-10-25T00:12:02+00:00", + "comment_author": "chrisradek", + "comment_body": "I wanted to export all the interfaces so that users could cast a result if they needed to, but I'm not sure if that's necessary. I'll have to take a look at some other TypeScript libraries to see if that's common practice.\n\nYou should only see these interfaces when looking at the service client constructors, but not on the service client instances.\n", + "pr_file_module": null + }, + { + "comment_id": "84816531", + "repo_full_name": "aws/aws-sdk-js", + "pr_number": 1189, + "pr_file": "clients/acm.d.ts", + "discussion_id": "84805047", + "commented_code": "@@ -0,0 +1,350 @@\n+import {Request} from '../lib/request';\n+import {Response} from '../lib/response';\n+import {AWSError} from '../lib/error';\n+import {Service} from '../lib/service';\n+declare class ACM extends Service {\n+ /**\n+ * Adds one or more tags to an ACM Certificate. Tags are labels that you can use to identify and organize your AWS resources. Each tag consists of a key and an optional value. You specify the certificate on input by its Amazon Resource Name (ARN). You specify the tag by using a key-value pair. You can apply a tag to just one certificate if you want to identify a specific characteristic of that certificate, or you can apply the same tag to multiple certificates if you want to filter for a common relationship among those certificates. Similarly, you can apply the same tag to multiple resources if you want to specify a relationship among those resources. For example, you can add the same tag to an ACM Certificate and an Elastic Load Balancing load balancer to indicate that they are both used by the same website. For more information, see Tagging ACM Certificates. To remove one or more tags, use the RemoveTagsFromCertificate action. To view all of the tags that have been applied to the certificate, use the ListTagsForCertificate action.\n+ */\n+ addTagsToCertificate(params: ACM.AddTagsToCertificateRequest, callback?: (err: AWSError, data: {}) => void): Request<{}, AWSError>;\n+ /**\n+ * Adds one or more tags to an ACM Certificate. Tags are labels that you can use to identify and organize your AWS resources. Each tag consists of a key and an optional value. You specify the certificate on input by its Amazon Resource Name (ARN). You specify the tag by using a key-value pair. You can apply a tag to just one certificate if you want to identify a specific characteristic of that certificate, or you can apply the same tag to multiple certificates if you want to filter for a common relationship among those certificates. Similarly, you can apply the same tag to multiple resources if you want to specify a relationship among those resources. For example, you can add the same tag to an ACM Certificate and an Elastic Load Balancing load balancer to indicate that they are both used by the same website. For more information, see Tagging ACM Certificates. To remove one or more tags, use the RemoveTagsFromCertificate action. To view all of the tags that have been applied to the certificate, use the ListTagsForCertificate action.\n+ */\n+ addTagsToCertificate(callback?: (err: AWSError, data: {}) => void): Request<{}, AWSError>;\n+ /**\n+ * Deletes an ACM Certificate and its associated private key. If this action succeeds, the certificate no longer appears in the list of ACM Certificates that can be displayed by calling the ListCertificates action or be retrieved by calling the GetCertificate action. The certificate will not be available for use by other AWS services. You cannot delete an ACM Certificate that is being used by another AWS service. To delete a certificate that is in use, the certificate association must first be removed. \n+ */\n+ deleteCertificate(params: ACM.DeleteCertificateRequest, callback?: (err: AWSError, data: {}) => void): Request<{}, AWSError>;\n+ /**\n+ * Deletes an ACM Certificate and its associated private key. If this action succeeds, the certificate no longer appears in the list of ACM Certificates that can be displayed by calling the ListCertificates action or be retrieved by calling the GetCertificate action. The certificate will not be available for use by other AWS services. You cannot delete an ACM Certificate that is being used by another AWS service. To delete a certificate that is in use, the certificate association must first be removed. \n+ */\n+ deleteCertificate(callback?: (err: AWSError, data: {}) => void): Request<{}, AWSError>;\n+ /**\n+ * Returns a list of the fields contained in the specified ACM Certificate. For example, this action returns the certificate status, a flag that indicates whether the certificate is associated with any other AWS service, and the date at which the certificate request was created. You specify the ACM Certificate on input by its Amazon Resource Name (ARN).\n+ */\n+ describeCertificate(params: ACM.DescribeCertificateRequest, callback?: (err: AWSError, data: ACM.DescribeCertificateResponse) => void): Request;\n+ /**\n+ * Returns a list of the fields contained in the specified ACM Certificate. For example, this action returns the certificate status, a flag that indicates whether the certificate is associated with any other AWS service, and the date at which the certificate request was created. You specify the ACM Certificate on input by its Amazon Resource Name (ARN).\n+ */\n+ describeCertificate(callback?: (err: AWSError, data: ACM.DescribeCertificateResponse) => void): Request;\n+ /**\n+ * Retrieves an ACM Certificate and certificate chain for the certificate specified by an ARN. The chain is an ordered list of certificates that contains the root certificate, intermediate certificates of subordinate CAs, and the ACM Certificate. The certificate and certificate chain are base64 encoded. If you want to decode the certificate chain to see the individual certificate fields, you can use OpenSSL. Currently, ACM Certificates can be used only with Elastic Load Balancing and Amazon CloudFront. \n+ */\n+ getCertificate(params: ACM.GetCertificateRequest, callback?: (err: AWSError, data: ACM.GetCertificateResponse) => void): Request;\n+ /**\n+ * Retrieves an ACM Certificate and certificate chain for the certificate specified by an ARN. The chain is an ordered list of certificates that contains the root certificate, intermediate certificates of subordinate CAs, and the ACM Certificate. The certificate and certificate chain are base64 encoded. If you want to decode the certificate chain to see the individual certificate fields, you can use OpenSSL. Currently, ACM Certificates can be used only with Elastic Load Balancing and Amazon CloudFront. \n+ */\n+ getCertificate(callback?: (err: AWSError, data: ACM.GetCertificateResponse) => void): Request;\n+ /**\n+ * Retrieves a list of ACM Certificates and the domain name for each. You can optionally filter the list to return only the certificates that match the specified status.\n+ */\n+ listCertificates(params: ACM.ListCertificatesRequest, callback?: (err: AWSError, data: ACM.ListCertificatesResponse) => void): Request;\n+ /**\n+ * Retrieves a list of ACM Certificates and the domain name for each. You can optionally filter the list to return only the certificates that match the specified status.\n+ */\n+ listCertificates(callback?: (err: AWSError, data: ACM.ListCertificatesResponse) => void): Request;\n+ /**\n+ * Lists the tags that have been applied to the ACM Certificate. Use the certificate ARN to specify the certificate. To add a tag to an ACM Certificate, use the AddTagsToCertificate action. To delete a tag, use the RemoveTagsFromCertificate action.\n+ */\n+ listTagsForCertificate(params: ACM.ListTagsForCertificateRequest, callback?: (err: AWSError, data: ACM.ListTagsForCertificateResponse) => void): Request;\n+ /**\n+ * Lists the tags that have been applied to the ACM Certificate. Use the certificate ARN to specify the certificate. To add a tag to an ACM Certificate, use the AddTagsToCertificate action. To delete a tag, use the RemoveTagsFromCertificate action.\n+ */\n+ listTagsForCertificate(callback?: (err: AWSError, data: ACM.ListTagsForCertificateResponse) => void): Request;\n+ /**\n+ * Remove one or more tags from an ACM Certificate. A tag consists of a key-value pair. If you do not specify the value portion of the tag when calling this function, the tag will be removed regardless of value. If you specify a value, the tag is removed only if it is associated with the specified value. To add tags to a certificate, use the AddTagsToCertificate action. To view all of the tags that have been applied to a specific ACM Certificate, use the ListTagsForCertificate action.\n+ */\n+ removeTagsFromCertificate(params: ACM.RemoveTagsFromCertificateRequest, callback?: (err: AWSError, data: {}) => void): Request<{}, AWSError>;\n+ /**\n+ * Remove one or more tags from an ACM Certificate. A tag consists of a key-value pair. If you do not specify the value portion of the tag when calling this function, the tag will be removed regardless of value. If you specify a value, the tag is removed only if it is associated with the specified value. To add tags to a certificate, use the AddTagsToCertificate action. To view all of the tags that have been applied to a specific ACM Certificate, use the ListTagsForCertificate action.\n+ */\n+ removeTagsFromCertificate(callback?: (err: AWSError, data: {}) => void): Request<{}, AWSError>;\n+ /**\n+ * Requests an ACM Certificate for use with other AWS services. To request an ACM Certificate, you must specify the fully qualified domain name (FQDN) for your site. You can also specify additional FQDNs if users can reach your site by using other names. For each domain name you specify, email is sent to the domain owner to request approval to issue the certificate. After receiving approval from the domain owner, the ACM Certificate is issued. For more information, see the AWS Certificate Manager User Guide .\n+ */\n+ requestCertificate(params: ACM.RequestCertificateRequest, callback?: (err: AWSError, data: ACM.RequestCertificateResponse) => void): Request;\n+ /**\n+ * Requests an ACM Certificate for use with other AWS services. To request an ACM Certificate, you must specify the fully qualified domain name (FQDN) for your site. You can also specify additional FQDNs if users can reach your site by using other names. For each domain name you specify, email is sent to the domain owner to request approval to issue the certificate. After receiving approval from the domain owner, the ACM Certificate is issued. For more information, see the AWS Certificate Manager User Guide .\n+ */\n+ requestCertificate(callback?: (err: AWSError, data: ACM.RequestCertificateResponse) => void): Request;\n+ /**\n+ * Resends the email that requests domain ownership validation. The domain owner or an authorized representative must approve the ACM Certificate before it can be issued. The certificate can be approved by clicking a link in the mail to navigate to the Amazon certificate approval website and then clicking I Approve. However, the validation email can be blocked by spam filters. Therefore, if you do not receive the original mail, you can request that the mail be resent within 72 hours of requesting the ACM Certificate. If more than 72 hours have elapsed since your original request or since your last attempt to resend validation mail, you must request a new certificate.\n+ */\n+ resendValidationEmail(params: ACM.ResendValidationEmailRequest, callback?: (err: AWSError, data: {}) => void): Request<{}, AWSError>;\n+ /**\n+ * Resends the email that requests domain ownership validation. The domain owner or an authorized representative must approve the ACM Certificate before it can be issued. The certificate can be approved by clicking a link in the mail to navigate to the Amazon certificate approval website and then clicking I Approve. However, the validation email can be blocked by spam filters. Therefore, if you do not receive the original mail, you can request that the mail be resent within 72 hours of requesting the ACM Certificate. If more than 72 hours have elapsed since your original request or since your last attempt to resend validation mail, you must request a new certificate.\n+ */\n+ resendValidationEmail(callback?: (err: AWSError, data: {}) => void): Request<{}, AWSError>;\n+}\n+declare namespace ACM {", + "comment_created_at": "2016-10-25T01:27:43+00:00", + "comment_author": "LiuJoyceC", + "comment_body": "Right, it would only be on the constructor, but there are valid reasons a customer may want to look at the properties of the constructor such as looking up what `apiVersions` are available for a service, or for the case of S3 and DynamoDB, they want to access the `ManagedUpload` or `DocumentClient`. When they try to access these, the constructors will autocomplete with a large number of properties that aren't real and make it difficult to find the actual property they are trying to access.\n", + "pr_file_module": null + }, + { + "comment_id": "85429596", + "repo_full_name": "aws/aws-sdk-js", + "pr_number": 1189, + "pr_file": "clients/acm.d.ts", + "discussion_id": "84805047", + "commented_code": "@@ -0,0 +1,350 @@\n+import {Request} from '../lib/request';\n+import {Response} from '../lib/response';\n+import {AWSError} from '../lib/error';\n+import {Service} from '../lib/service';\n+declare class ACM extends Service {\n+ /**\n+ * Adds one or more tags to an ACM Certificate. Tags are labels that you can use to identify and organize your AWS resources. Each tag consists of a key and an optional value. You specify the certificate on input by its Amazon Resource Name (ARN). You specify the tag by using a key-value pair. You can apply a tag to just one certificate if you want to identify a specific characteristic of that certificate, or you can apply the same tag to multiple certificates if you want to filter for a common relationship among those certificates. Similarly, you can apply the same tag to multiple resources if you want to specify a relationship among those resources. For example, you can add the same tag to an ACM Certificate and an Elastic Load Balancing load balancer to indicate that they are both used by the same website. For more information, see Tagging ACM Certificates. To remove one or more tags, use the RemoveTagsFromCertificate action. To view all of the tags that have been applied to the certificate, use the ListTagsForCertificate action.\n+ */\n+ addTagsToCertificate(params: ACM.AddTagsToCertificateRequest, callback?: (err: AWSError, data: {}) => void): Request<{}, AWSError>;\n+ /**\n+ * Adds one or more tags to an ACM Certificate. Tags are labels that you can use to identify and organize your AWS resources. Each tag consists of a key and an optional value. You specify the certificate on input by its Amazon Resource Name (ARN). You specify the tag by using a key-value pair. You can apply a tag to just one certificate if you want to identify a specific characteristic of that certificate, or you can apply the same tag to multiple certificates if you want to filter for a common relationship among those certificates. Similarly, you can apply the same tag to multiple resources if you want to specify a relationship among those resources. For example, you can add the same tag to an ACM Certificate and an Elastic Load Balancing load balancer to indicate that they are both used by the same website. For more information, see Tagging ACM Certificates. To remove one or more tags, use the RemoveTagsFromCertificate action. To view all of the tags that have been applied to the certificate, use the ListTagsForCertificate action.\n+ */\n+ addTagsToCertificate(callback?: (err: AWSError, data: {}) => void): Request<{}, AWSError>;\n+ /**\n+ * Deletes an ACM Certificate and its associated private key. If this action succeeds, the certificate no longer appears in the list of ACM Certificates that can be displayed by calling the ListCertificates action or be retrieved by calling the GetCertificate action. The certificate will not be available for use by other AWS services. You cannot delete an ACM Certificate that is being used by another AWS service. To delete a certificate that is in use, the certificate association must first be removed. \n+ */\n+ deleteCertificate(params: ACM.DeleteCertificateRequest, callback?: (err: AWSError, data: {}) => void): Request<{}, AWSError>;\n+ /**\n+ * Deletes an ACM Certificate and its associated private key. If this action succeeds, the certificate no longer appears in the list of ACM Certificates that can be displayed by calling the ListCertificates action or be retrieved by calling the GetCertificate action. The certificate will not be available for use by other AWS services. You cannot delete an ACM Certificate that is being used by another AWS service. To delete a certificate that is in use, the certificate association must first be removed. \n+ */\n+ deleteCertificate(callback?: (err: AWSError, data: {}) => void): Request<{}, AWSError>;\n+ /**\n+ * Returns a list of the fields contained in the specified ACM Certificate. For example, this action returns the certificate status, a flag that indicates whether the certificate is associated with any other AWS service, and the date at which the certificate request was created. You specify the ACM Certificate on input by its Amazon Resource Name (ARN).\n+ */\n+ describeCertificate(params: ACM.DescribeCertificateRequest, callback?: (err: AWSError, data: ACM.DescribeCertificateResponse) => void): Request;\n+ /**\n+ * Returns a list of the fields contained in the specified ACM Certificate. For example, this action returns the certificate status, a flag that indicates whether the certificate is associated with any other AWS service, and the date at which the certificate request was created. You specify the ACM Certificate on input by its Amazon Resource Name (ARN).\n+ */\n+ describeCertificate(callback?: (err: AWSError, data: ACM.DescribeCertificateResponse) => void): Request;\n+ /**\n+ * Retrieves an ACM Certificate and certificate chain for the certificate specified by an ARN. The chain is an ordered list of certificates that contains the root certificate, intermediate certificates of subordinate CAs, and the ACM Certificate. The certificate and certificate chain are base64 encoded. If you want to decode the certificate chain to see the individual certificate fields, you can use OpenSSL. Currently, ACM Certificates can be used only with Elastic Load Balancing and Amazon CloudFront. \n+ */\n+ getCertificate(params: ACM.GetCertificateRequest, callback?: (err: AWSError, data: ACM.GetCertificateResponse) => void): Request;\n+ /**\n+ * Retrieves an ACM Certificate and certificate chain for the certificate specified by an ARN. The chain is an ordered list of certificates that contains the root certificate, intermediate certificates of subordinate CAs, and the ACM Certificate. The certificate and certificate chain are base64 encoded. If you want to decode the certificate chain to see the individual certificate fields, you can use OpenSSL. Currently, ACM Certificates can be used only with Elastic Load Balancing and Amazon CloudFront. \n+ */\n+ getCertificate(callback?: (err: AWSError, data: ACM.GetCertificateResponse) => void): Request;\n+ /**\n+ * Retrieves a list of ACM Certificates and the domain name for each. You can optionally filter the list to return only the certificates that match the specified status.\n+ */\n+ listCertificates(params: ACM.ListCertificatesRequest, callback?: (err: AWSError, data: ACM.ListCertificatesResponse) => void): Request;\n+ /**\n+ * Retrieves a list of ACM Certificates and the domain name for each. You can optionally filter the list to return only the certificates that match the specified status.\n+ */\n+ listCertificates(callback?: (err: AWSError, data: ACM.ListCertificatesResponse) => void): Request;\n+ /**\n+ * Lists the tags that have been applied to the ACM Certificate. Use the certificate ARN to specify the certificate. To add a tag to an ACM Certificate, use the AddTagsToCertificate action. To delete a tag, use the RemoveTagsFromCertificate action.\n+ */\n+ listTagsForCertificate(params: ACM.ListTagsForCertificateRequest, callback?: (err: AWSError, data: ACM.ListTagsForCertificateResponse) => void): Request;\n+ /**\n+ * Lists the tags that have been applied to the ACM Certificate. Use the certificate ARN to specify the certificate. To add a tag to an ACM Certificate, use the AddTagsToCertificate action. To delete a tag, use the RemoveTagsFromCertificate action.\n+ */\n+ listTagsForCertificate(callback?: (err: AWSError, data: ACM.ListTagsForCertificateResponse) => void): Request;\n+ /**\n+ * Remove one or more tags from an ACM Certificate. A tag consists of a key-value pair. If you do not specify the value portion of the tag when calling this function, the tag will be removed regardless of value. If you specify a value, the tag is removed only if it is associated with the specified value. To add tags to a certificate, use the AddTagsToCertificate action. To view all of the tags that have been applied to a specific ACM Certificate, use the ListTagsForCertificate action.\n+ */\n+ removeTagsFromCertificate(params: ACM.RemoveTagsFromCertificateRequest, callback?: (err: AWSError, data: {}) => void): Request<{}, AWSError>;\n+ /**\n+ * Remove one or more tags from an ACM Certificate. A tag consists of a key-value pair. If you do not specify the value portion of the tag when calling this function, the tag will be removed regardless of value. If you specify a value, the tag is removed only if it is associated with the specified value. To add tags to a certificate, use the AddTagsToCertificate action. To view all of the tags that have been applied to a specific ACM Certificate, use the ListTagsForCertificate action.\n+ */\n+ removeTagsFromCertificate(callback?: (err: AWSError, data: {}) => void): Request<{}, AWSError>;\n+ /**\n+ * Requests an ACM Certificate for use with other AWS services. To request an ACM Certificate, you must specify the fully qualified domain name (FQDN) for your site. You can also specify additional FQDNs if users can reach your site by using other names. For each domain name you specify, email is sent to the domain owner to request approval to issue the certificate. After receiving approval from the domain owner, the ACM Certificate is issued. For more information, see the AWS Certificate Manager User Guide .\n+ */\n+ requestCertificate(params: ACM.RequestCertificateRequest, callback?: (err: AWSError, data: ACM.RequestCertificateResponse) => void): Request;\n+ /**\n+ * Requests an ACM Certificate for use with other AWS services. To request an ACM Certificate, you must specify the fully qualified domain name (FQDN) for your site. You can also specify additional FQDNs if users can reach your site by using other names. For each domain name you specify, email is sent to the domain owner to request approval to issue the certificate. After receiving approval from the domain owner, the ACM Certificate is issued. For more information, see the AWS Certificate Manager User Guide .\n+ */\n+ requestCertificate(callback?: (err: AWSError, data: ACM.RequestCertificateResponse) => void): Request;\n+ /**\n+ * Resends the email that requests domain ownership validation. The domain owner or an authorized representative must approve the ACM Certificate before it can be issued. The certificate can be approved by clicking a link in the mail to navigate to the Amazon certificate approval website and then clicking I Approve. However, the validation email can be blocked by spam filters. Therefore, if you do not receive the original mail, you can request that the mail be resent within 72 hours of requesting the ACM Certificate. If more than 72 hours have elapsed since your original request or since your last attempt to resend validation mail, you must request a new certificate.\n+ */\n+ resendValidationEmail(params: ACM.ResendValidationEmailRequest, callback?: (err: AWSError, data: {}) => void): Request<{}, AWSError>;\n+ /**\n+ * Resends the email that requests domain ownership validation. The domain owner or an authorized representative must approve the ACM Certificate before it can be issued. The certificate can be approved by clicking a link in the mail to navigate to the Amazon certificate approval website and then clicking I Approve. However, the validation email can be blocked by spam filters. Therefore, if you do not receive the original mail, you can request that the mail be resent within 72 hours of requesting the ACM Certificate. If more than 72 hours have elapsed since your original request or since your last attempt to resend validation mail, you must request a new certificate.\n+ */\n+ resendValidationEmail(callback?: (err: AWSError, data: {}) => void): Request<{}, AWSError>;\n+}\n+declare namespace ACM {", + "comment_created_at": "2016-10-27T21:17:07+00:00", + "comment_author": "chrisradek", + "comment_body": "So, I tried a few different things. Ultimately, I went with putting the exported types in a sub-namespace:\n`declare namespace SERVICE.Types {`\nWhen I put them on their own namespace, I had to explicitly import them into my app, otherwise the typescript compiler would complain:\nhttps://github.com/Microsoft/TypeScript/issues/9944\n\nI also wanted them to be exported so user's can specify a type for cases when the typescript compiler can't quite infer what a type should be. That might not be necessary once https://github.com/Microsoft/TypeScript/issues/6606 is addressed.\n", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "85630294", + "pr_number": 1189, + "pr_file": "lib/config.d.ts", + "created_at": "2016-10-29T01:03:41+00:00", + "commented_code": "/// \n\nimport * as http from 'http';\nimport * as https from 'https';\nimport {AWSError} from './error';\nimport {Credentials} from './credentials';\nexport class Config {\n /**\n * Creates a new configuration object.\n * This is the object that passes option data along to service requests, including credentials, security, region information, and some service specific settings.\n */\n constructor(options?: ConfigurationOptions);\n /**\n * Loads credentials from the configuration object.\n */\n getCredentials(callback: (err: AWSError) => void): void;\n /**\n * Loads configuration data from a JSON file into this config object.\n * Loading configuration willr eset all existing configuration on the object.\n * This feature is not supported in the browser environment of the SDK.\n * \n * @param {string} path - the path relative to your process's current working directory to load configuration from.\n */\n loadFromPath(path: string): Config;\n /**\n * Updates the current configuration object with new options.\n * \n * @param {ConfigurationOptions} options - a map of option keys and values.\n * @param {boolean} allowUnknownKeys - Defaults to false. Whether unknown keys can be set on the configuration object.\n */\n update(options: ConfigurationOptions, allowUnknownKeys?: boolean): void;\n /**\n * Sets the promise dependency the SDK will use wherever Promises are returned.\n * @param {function} dep - a reference to a Promise constructor\n */\n setPromisesDependency(dep: any): void;\n\n /**\n * Whether to compute checksums for payload bodies when the service accepts it.\n * Currently supported in S3 only.\n */\n computeChecksums?: boolean\n /**\n * Whether types are converted when parsing response data.\n */\n convertResponseTypes?: boolean\n /**\n * Whether to apply a clock skew correction and retry requests that fail because of an skewed client clock.\n */\n correctClockSkew?: boolean\n /**\n * The AWS credentials to sign requests with.\n */\n credentials: Credentials\n /**\n * A set of options to pass to the low-level HTTP request.\n */\n httpOptions?: HTTPOptions\n /**\n * An object that responds to .write() (like a stream) or .log() (like the console object) in order to log information about requests.\n */\n logger?: Logger\n /**\n * The maximum amount of redirects to follow for a service request.\n */\n maxRedirects?: number\n /**\n * The maximum amount of retries to perform for a service request.\n */\n maxRetries?: number\n /**\n * Returns whether input parameters should be validated against the operation description before sending the request. \n * Defaults to true. \n * Pass a map to enable any of the following specific validation features: min|max|pattern|enum\n */\n paramValidation?: ParamValidation | boolean\n /**\n * The region to send service requests to.\n */\n region?: string\n /**\n * Returns A set of options to configure the retry delay on retryable errors.\n */\n retryDelayOptions?: RetryDelayOptions\n /**\n * Whether the provided endpoint addresses an individual bucket.\n * false if it addresses the root API endpoint.\n */\n s3BucketEndpoint?: boolean\n /**\n * Whether to disable S3 body signing when using signature version v4.\n */\n s3DisableBodySigning?: boolean\n /**\n * Whether to force path style URLs for S3 objects.\n */\n s3ForcePathStyle?: boolean\n /**\n * Whether the signature to sign requests with (overriding the API configuration) is cached.\n */\n signatureCache?: boolean\n /**\n * The signature version to sign requests with (overriding the API configuration).\n * Possible values: 'v2'|'v3'|'v4'\n */\n signatureVersion?: \"v2\"|\"v3\"|\"v4\"|string\n /**\n * Whether SSL is enabled for requests.\n */\n sslEnabled?: boolean\n /**\n * An offset value in milliseconds to apply to all signing times.\n */\n systemClockOffset?: number\n /**\n * Whether to use the Accelerate endpoint with the S3 service.\n */\n useAccelerateEndpoint?: boolean\n}\n\ninterface HTTPOptions {\n /**\n * the URL to proxy requests through.\n */\n proxy?: string\n /**\n * the Agent object to perform HTTP requests with.\n * Used for connection pooling.\n * Defaults to the global agent (http.globalAgent) for non-SSL connections.\n */\n agent?: http.Agent | https.Agent\n /**\n * The number of milliseconds to wait before giving up on a connection attempt.\n * Defaults to two minutes (120000).\n */\n timeout?: number\n /**\n * Whether the SDK will send asynchronous HTTP requests.\n * Used in the browser environment only.\n * Set to false to send requests synchronously.\n * Defaults to true (async on).\n */\n xhrAsync?: boolean\n /**\n * Sets the 'withCredentials' property of an XMLHttpRequest object.\n * Used in the browser environment only.\n * Defaults to false.\n */\n xhrWithCredentials?: boolean\n}\ninterface Logger {\n write?: (chunk: any, encoding?: string, callback?: () => void) => void\n log?: (...messages: any[]) => void;\n}\ninterface ParamValidation {\n /**\n * Validates that a value meets the min constraint. \n * This is enabled by default when paramValidation is set to true.\n */\n min?: boolean\n /**\n * Validates that a value meets the max constraint.\n */\n max?: boolean\n /**\n * Validates that a string value matches a regular expression.\n */\n pattern?: boolean\n /**\n * Validates that a string value matches one of the allowable enum values.\n */\n enum?: boolean\n}\ninterface RetryDelayOptions {\n /**\n * The base number of milliseconds to use in the exponential backoff for operation retries. \n * Defaults to 100 ms.\n */\n base?: number\n /**\n * A custom function that accepts a retry count and returns the amount of time to delay in milliseconds. \n * The base option will be ignored if this option is supplied.\n */\n customBackoff?: (retryCount: number) => number\n}\ninterface ConfigurationOptions {", + "repo_full_name": "aws/aws-sdk-js", + "discussion_comments": [ + { + "comment_id": "85630294", + "repo_full_name": "aws/aws-sdk-js", + "pr_number": 1189, + "pr_file": "lib/config.d.ts", + "discussion_id": "85630294", + "commented_code": "@@ -0,0 +1,268 @@\n+/// \n+\n+import * as http from 'http';\n+import * as https from 'https';\n+import {AWSError} from './error';\n+import {Credentials} from './credentials';\n+export class Config {\n+ /**\n+ * Creates a new configuration object.\n+ * This is the object that passes option data along to service requests, including credentials, security, region information, and some service specific settings.\n+ */\n+ constructor(options?: ConfigurationOptions);\n+ /**\n+ * Loads credentials from the configuration object.\n+ */\n+ getCredentials(callback: (err: AWSError) => void): void;\n+ /**\n+ * Loads configuration data from a JSON file into this config object.\n+ * Loading configuration willr eset all existing configuration on the object.\n+ * This feature is not supported in the browser environment of the SDK.\n+ * \n+ * @param {string} path - the path relative to your process's current working directory to load configuration from.\n+ */\n+ loadFromPath(path: string): Config;\n+ /**\n+ * Updates the current configuration object with new options.\n+ * \n+ * @param {ConfigurationOptions} options - a map of option keys and values.\n+ * @param {boolean} allowUnknownKeys - Defaults to false. Whether unknown keys can be set on the configuration object.\n+ */\n+ update(options: ConfigurationOptions, allowUnknownKeys?: boolean): void;\n+ /**\n+ * Sets the promise dependency the SDK will use wherever Promises are returned.\n+ * @param {function} dep - a reference to a Promise constructor\n+ */\n+ setPromisesDependency(dep: any): void;\n+\n+ /**\n+ * Whether to compute checksums for payload bodies when the service accepts it.\n+ * Currently supported in S3 only.\n+ */\n+ computeChecksums?: boolean\n+ /**\n+ * Whether types are converted when parsing response data.\n+ */\n+ convertResponseTypes?: boolean\n+ /**\n+ * Whether to apply a clock skew correction and retry requests that fail because of an skewed client clock.\n+ */\n+ correctClockSkew?: boolean\n+ /**\n+ * The AWS credentials to sign requests with.\n+ */\n+ credentials: Credentials\n+ /**\n+ * A set of options to pass to the low-level HTTP request.\n+ */\n+ httpOptions?: HTTPOptions\n+ /**\n+ * An object that responds to .write() (like a stream) or .log() (like the console object) in order to log information about requests.\n+ */\n+ logger?: Logger\n+ /**\n+ * The maximum amount of redirects to follow for a service request.\n+ */\n+ maxRedirects?: number\n+ /**\n+ * The maximum amount of retries to perform for a service request.\n+ */\n+ maxRetries?: number\n+ /**\n+ * Returns whether input parameters should be validated against the operation description before sending the request. \n+ * Defaults to true. \n+ * Pass a map to enable any of the following specific validation features: min|max|pattern|enum\n+ */\n+ paramValidation?: ParamValidation | boolean\n+ /**\n+ * The region to send service requests to.\n+ */\n+ region?: string\n+ /**\n+ * Returns A set of options to configure the retry delay on retryable errors.\n+ */\n+ retryDelayOptions?: RetryDelayOptions\n+ /**\n+ * Whether the provided endpoint addresses an individual bucket.\n+ * false if it addresses the root API endpoint.\n+ */\n+ s3BucketEndpoint?: boolean\n+ /**\n+ * Whether to disable S3 body signing when using signature version v4.\n+ */\n+ s3DisableBodySigning?: boolean\n+ /**\n+ * Whether to force path style URLs for S3 objects.\n+ */\n+ s3ForcePathStyle?: boolean\n+ /**\n+ * Whether the signature to sign requests with (overriding the API configuration) is cached.\n+ */\n+ signatureCache?: boolean\n+ /**\n+ * The signature version to sign requests with (overriding the API configuration).\n+ * Possible values: 'v2'|'v3'|'v4'\n+ */\n+ signatureVersion?: \"v2\"|\"v3\"|\"v4\"|string\n+ /**\n+ * Whether SSL is enabled for requests.\n+ */\n+ sslEnabled?: boolean\n+ /**\n+ * An offset value in milliseconds to apply to all signing times.\n+ */\n+ systemClockOffset?: number\n+ /**\n+ * Whether to use the Accelerate endpoint with the S3 service.\n+ */\n+ useAccelerateEndpoint?: boolean\n+}\n+\n+interface HTTPOptions {\n+ /**\n+ * the URL to proxy requests through.\n+ */\n+ proxy?: string\n+ /**\n+ * the Agent object to perform HTTP requests with.\n+ * Used for connection pooling.\n+ * Defaults to the global agent (http.globalAgent) for non-SSL connections.\n+ */\n+ agent?: http.Agent | https.Agent\n+ /**\n+ * The number of milliseconds to wait before giving up on a connection attempt.\n+ * Defaults to two minutes (120000).\n+ */\n+ timeout?: number\n+ /**\n+ * Whether the SDK will send asynchronous HTTP requests.\n+ * Used in the browser environment only.\n+ * Set to false to send requests synchronously.\n+ * Defaults to true (async on).\n+ */\n+ xhrAsync?: boolean\n+ /**\n+ * Sets the 'withCredentials' property of an XMLHttpRequest object.\n+ * Used in the browser environment only.\n+ * Defaults to false.\n+ */\n+ xhrWithCredentials?: boolean\n+}\n+interface Logger {\n+ write?: (chunk: any, encoding?: string, callback?: () => void) => void\n+ log?: (...messages: any[]) => void;\n+}\n+interface ParamValidation {\n+ /**\n+ * Validates that a value meets the min constraint. \n+ * This is enabled by default when paramValidation is set to true.\n+ */\n+ min?: boolean\n+ /**\n+ * Validates that a value meets the max constraint.\n+ */\n+ max?: boolean\n+ /**\n+ * Validates that a string value matches a regular expression.\n+ */\n+ pattern?: boolean\n+ /**\n+ * Validates that a string value matches one of the allowable enum values.\n+ */\n+ enum?: boolean\n+}\n+interface RetryDelayOptions {\n+ /**\n+ * The base number of milliseconds to use in the exponential backoff for operation retries. \n+ * Defaults to 100 ms.\n+ */\n+ base?: number\n+ /**\n+ * A custom function that accepts a retry count and returns the amount of time to delay in milliseconds. \n+ * The base option will be ignored if this option is supplied.\n+ */\n+ customBackoff?: (retryCount: number) => number\n+}\n+interface ConfigurationOptions {", + "comment_created_at": "2016-10-29T01:03:41+00:00", + "comment_author": "LiuJoyceC", + "comment_body": "Instead of maintaining two separate but identical lists of these config options plus comments (here and above in the `Config` class), perhaps we could define this as an abstract class rather than an interface, and then the `Config` class could extend this abstract class.\nAnother solution could be to generate this file (which may be a good idea anyway since we need to include all of the service identifiers in the `Config` class as mentioned above). However, we would then need to maintain the comments for each of these configuration options in another file, since I noticed these comments aren't exactly identical to the documentation in the `config.js` file.\n", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/aws-sdk-js-organize-type-declarations.md b/_reviewers/aws-sdk-js-organize-type-declarations.md index d9b1551..123585b 100644 --- a/_reviewers/aws-sdk-js-organize-type-declarations.md +++ b/_reviewers/aws-sdk-js-organize-type-declarations.md @@ -54,87 +54,3 @@ class Config extends BaseConfig { ``` This approach improves code completion accuracy, reduces maintenance overhead, and creates a more intuitive API structure. - - -[ - { - "discussion_id": "84805047", - "pr_number": 1189, - "pr_file": "clients/acm.d.ts", - "created_at": "2016-10-24T23:28:51+00:00", - "commented_code": "import {Request} from '../lib/request';\nimport {Response} from '../lib/response';\nimport {AWSError} from '../lib/error';\nimport {Service} from '../lib/service';\ndeclare class ACM extends Service {\n /**\n * Adds one or more tags to an ACM Certificate. Tags are labels that you can use to identify and organize your AWS resources. Each tag consists of a key and an optional value. You specify the certificate on input by its Amazon Resource Name (ARN). You specify the tag by using a key-value pair. You can apply a tag to just one certificate if you want to identify a specific characteristic of that certificate, or you can apply the same tag to multiple certificates if you want to filter for a common relationship among those certificates. Similarly, you can apply the same tag to multiple resources if you want to specify a relationship among those resources. For example, you can add the same tag to an ACM Certificate and an Elastic Load Balancing load balancer to indicate that they are both used by the same website. For more information, see Tagging ACM Certificates. To remove one or more tags, use the RemoveTagsFromCertificate action. To view all of the tags that have been applied to the certificate, use the ListTagsForCertificate action.\n */\n addTagsToCertificate(params: ACM.AddTagsToCertificateRequest, callback?: (err: AWSError, data: {}) => void): Request<{}, AWSError>;\n /**\n * Adds one or more tags to an ACM Certificate. Tags are labels that you can use to identify and organize your AWS resources. Each tag consists of a key and an optional value. You specify the certificate on input by its Amazon Resource Name (ARN). You specify the tag by using a key-value pair. You can apply a tag to just one certificate if you want to identify a specific characteristic of that certificate, or you can apply the same tag to multiple certificates if you want to filter for a common relationship among those certificates. Similarly, you can apply the same tag to multiple resources if you want to specify a relationship among those resources. For example, you can add the same tag to an ACM Certificate and an Elastic Load Balancing load balancer to indicate that they are both used by the same website. For more information, see Tagging ACM Certificates. To remove one or more tags, use the RemoveTagsFromCertificate action. To view all of the tags that have been applied to the certificate, use the ListTagsForCertificate action.\n */\n addTagsToCertificate(callback?: (err: AWSError, data: {}) => void): Request<{}, AWSError>;\n /**\n * Deletes an ACM Certificate and its associated private key. If this action succeeds, the certificate no longer appears in the list of ACM Certificates that can be displayed by calling the ListCertificates action or be retrieved by calling the GetCertificate action. The certificate will not be available for use by other AWS services. You cannot delete an ACM Certificate that is being used by another AWS service. To delete a certificate that is in use, the certificate association must first be removed. \n */\n deleteCertificate(params: ACM.DeleteCertificateRequest, callback?: (err: AWSError, data: {}) => void): Request<{}, AWSError>;\n /**\n * Deletes an ACM Certificate and its associated private key. If this action succeeds, the certificate no longer appears in the list of ACM Certificates that can be displayed by calling the ListCertificates action or be retrieved by calling the GetCertificate action. The certificate will not be available for use by other AWS services. You cannot delete an ACM Certificate that is being used by another AWS service. To delete a certificate that is in use, the certificate association must first be removed. \n */\n deleteCertificate(callback?: (err: AWSError, data: {}) => void): Request<{}, AWSError>;\n /**\n * Returns a list of the fields contained in the specified ACM Certificate. For example, this action returns the certificate status, a flag that indicates whether the certificate is associated with any other AWS service, and the date at which the certificate request was created. You specify the ACM Certificate on input by its Amazon Resource Name (ARN).\n */\n describeCertificate(params: ACM.DescribeCertificateRequest, callback?: (err: AWSError, data: ACM.DescribeCertificateResponse) => void): Request;\n /**\n * Returns a list of the fields contained in the specified ACM Certificate. For example, this action returns the certificate status, a flag that indicates whether the certificate is associated with any other AWS service, and the date at which the certificate request was created. You specify the ACM Certificate on input by its Amazon Resource Name (ARN).\n */\n describeCertificate(callback?: (err: AWSError, data: ACM.DescribeCertificateResponse) => void): Request;\n /**\n * Retrieves an ACM Certificate and certificate chain for the certificate specified by an ARN. The chain is an ordered list of certificates that contains the root certificate, intermediate certificates of subordinate CAs, and the ACM Certificate. The certificate and certificate chain are base64 encoded. If you want to decode the certificate chain to see the individual certificate fields, you can use OpenSSL. Currently, ACM Certificates can be used only with Elastic Load Balancing and Amazon CloudFront. \n */\n getCertificate(params: ACM.GetCertificateRequest, callback?: (err: AWSError, data: ACM.GetCertificateResponse) => void): Request;\n /**\n * Retrieves an ACM Certificate and certificate chain for the certificate specified by an ARN. The chain is an ordered list of certificates that contains the root certificate, intermediate certificates of subordinate CAs, and the ACM Certificate. The certificate and certificate chain are base64 encoded. If you want to decode the certificate chain to see the individual certificate fields, you can use OpenSSL. Currently, ACM Certificates can be used only with Elastic Load Balancing and Amazon CloudFront. \n */\n getCertificate(callback?: (err: AWSError, data: ACM.GetCertificateResponse) => void): Request;\n /**\n * Retrieves a list of ACM Certificates and the domain name for each. You can optionally filter the list to return only the certificates that match the specified status.\n */\n listCertificates(params: ACM.ListCertificatesRequest, callback?: (err: AWSError, data: ACM.ListCertificatesResponse) => void): Request;\n /**\n * Retrieves a list of ACM Certificates and the domain name for each. You can optionally filter the list to return only the certificates that match the specified status.\n */\n listCertificates(callback?: (err: AWSError, data: ACM.ListCertificatesResponse) => void): Request;\n /**\n * Lists the tags that have been applied to the ACM Certificate. Use the certificate ARN to specify the certificate. To add a tag to an ACM Certificate, use the AddTagsToCertificate action. To delete a tag, use the RemoveTagsFromCertificate action.\n */\n listTagsForCertificate(params: ACM.ListTagsForCertificateRequest, callback?: (err: AWSError, data: ACM.ListTagsForCertificateResponse) => void): Request;\n /**\n * Lists the tags that have been applied to the ACM Certificate. Use the certificate ARN to specify the certificate. To add a tag to an ACM Certificate, use the AddTagsToCertificate action. To delete a tag, use the RemoveTagsFromCertificate action.\n */\n listTagsForCertificate(callback?: (err: AWSError, data: ACM.ListTagsForCertificateResponse) => void): Request;\n /**\n * Remove one or more tags from an ACM Certificate. A tag consists of a key-value pair. If you do not specify the value portion of the tag when calling this function, the tag will be removed regardless of value. If you specify a value, the tag is removed only if it is associated with the specified value. To add tags to a certificate, use the AddTagsToCertificate action. To view all of the tags that have been applied to a specific ACM Certificate, use the ListTagsForCertificate action.\n */\n removeTagsFromCertificate(params: ACM.RemoveTagsFromCertificateRequest, callback?: (err: AWSError, data: {}) => void): Request<{}, AWSError>;\n /**\n * Remove one or more tags from an ACM Certificate. A tag consists of a key-value pair. If you do not specify the value portion of the tag when calling this function, the tag will be removed regardless of value. If you specify a value, the tag is removed only if it is associated with the specified value. To add tags to a certificate, use the AddTagsToCertificate action. To view all of the tags that have been applied to a specific ACM Certificate, use the ListTagsForCertificate action.\n */\n removeTagsFromCertificate(callback?: (err: AWSError, data: {}) => void): Request<{}, AWSError>;\n /**\n * Requests an ACM Certificate for use with other AWS services. To request an ACM Certificate, you must specify the fully qualified domain name (FQDN) for your site. You can also specify additional FQDNs if users can reach your site by using other names. For each domain name you specify, email is sent to the domain owner to request approval to issue the certificate. After receiving approval from the domain owner, the ACM Certificate is issued. For more information, see the AWS Certificate Manager User Guide .\n */\n requestCertificate(params: ACM.RequestCertificateRequest, callback?: (err: AWSError, data: ACM.RequestCertificateResponse) => void): Request;\n /**\n * Requests an ACM Certificate for use with other AWS services. To request an ACM Certificate, you must specify the fully qualified domain name (FQDN) for your site. You can also specify additional FQDNs if users can reach your site by using other names. For each domain name you specify, email is sent to the domain owner to request approval to issue the certificate. After receiving approval from the domain owner, the ACM Certificate is issued. For more information, see the AWS Certificate Manager User Guide .\n */\n requestCertificate(callback?: (err: AWSError, data: ACM.RequestCertificateResponse) => void): Request;\n /**\n * Resends the email that requests domain ownership validation. The domain owner or an authorized representative must approve the ACM Certificate before it can be issued. The certificate can be approved by clicking a link in the mail to navigate to the Amazon certificate approval website and then clicking I Approve. However, the validation email can be blocked by spam filters. Therefore, if you do not receive the original mail, you can request that the mail be resent within 72 hours of requesting the ACM Certificate. If more than 72 hours have elapsed since your original request or since your last attempt to resend validation mail, you must request a new certificate.\n */\n resendValidationEmail(params: ACM.ResendValidationEmailRequest, callback?: (err: AWSError, data: {}) => void): Request<{}, AWSError>;\n /**\n * Resends the email that requests domain ownership validation. The domain owner or an authorized representative must approve the ACM Certificate before it can be issued. The certificate can be approved by clicking a link in the mail to navigate to the Amazon certificate approval website and then clicking I Approve. However, the validation email can be blocked by spam filters. Therefore, if you do not receive the original mail, you can request that the mail be resent within 72 hours of requesting the ACM Certificate. If more than 72 hours have elapsed since your original request or since your last attempt to resend validation mail, you must request a new certificate.\n */\n resendValidationEmail(callback?: (err: AWSError, data: {}) => void): Request<{}, AWSError>;\n}\ndeclare namespace ACM {", - "repo_full_name": "aws/aws-sdk-js", - "discussion_comments": [ - { - "comment_id": "84805047", - "repo_full_name": "aws/aws-sdk-js", - "pr_number": 1189, - "pr_file": "clients/acm.d.ts", - "discussion_id": "84805047", - "commented_code": "@@ -0,0 +1,350 @@\n+import {Request} from '../lib/request';\n+import {Response} from '../lib/response';\n+import {AWSError} from '../lib/error';\n+import {Service} from '../lib/service';\n+declare class ACM extends Service {\n+ /**\n+ * Adds one or more tags to an ACM Certificate. Tags are labels that you can use to identify and organize your AWS resources. Each tag consists of a key and an optional value. You specify the certificate on input by its Amazon Resource Name (ARN). You specify the tag by using a key-value pair. You can apply a tag to just one certificate if you want to identify a specific characteristic of that certificate, or you can apply the same tag to multiple certificates if you want to filter for a common relationship among those certificates. Similarly, you can apply the same tag to multiple resources if you want to specify a relationship among those resources. For example, you can add the same tag to an ACM Certificate and an Elastic Load Balancing load balancer to indicate that they are both used by the same website. For more information, see Tagging ACM Certificates. To remove one or more tags, use the RemoveTagsFromCertificate action. To view all of the tags that have been applied to the certificate, use the ListTagsForCertificate action.\n+ */\n+ addTagsToCertificate(params: ACM.AddTagsToCertificateRequest, callback?: (err: AWSError, data: {}) => void): Request<{}, AWSError>;\n+ /**\n+ * Adds one or more tags to an ACM Certificate. Tags are labels that you can use to identify and organize your AWS resources. Each tag consists of a key and an optional value. You specify the certificate on input by its Amazon Resource Name (ARN). You specify the tag by using a key-value pair. You can apply a tag to just one certificate if you want to identify a specific characteristic of that certificate, or you can apply the same tag to multiple certificates if you want to filter for a common relationship among those certificates. Similarly, you can apply the same tag to multiple resources if you want to specify a relationship among those resources. For example, you can add the same tag to an ACM Certificate and an Elastic Load Balancing load balancer to indicate that they are both used by the same website. For more information, see Tagging ACM Certificates. To remove one or more tags, use the RemoveTagsFromCertificate action. To view all of the tags that have been applied to the certificate, use the ListTagsForCertificate action.\n+ */\n+ addTagsToCertificate(callback?: (err: AWSError, data: {}) => void): Request<{}, AWSError>;\n+ /**\n+ * Deletes an ACM Certificate and its associated private key. If this action succeeds, the certificate no longer appears in the list of ACM Certificates that can be displayed by calling the ListCertificates action or be retrieved by calling the GetCertificate action. The certificate will not be available for use by other AWS services. You cannot delete an ACM Certificate that is being used by another AWS service. To delete a certificate that is in use, the certificate association must first be removed. \n+ */\n+ deleteCertificate(params: ACM.DeleteCertificateRequest, callback?: (err: AWSError, data: {}) => void): Request<{}, AWSError>;\n+ /**\n+ * Deletes an ACM Certificate and its associated private key. If this action succeeds, the certificate no longer appears in the list of ACM Certificates that can be displayed by calling the ListCertificates action or be retrieved by calling the GetCertificate action. The certificate will not be available for use by other AWS services. You cannot delete an ACM Certificate that is being used by another AWS service. To delete a certificate that is in use, the certificate association must first be removed. \n+ */\n+ deleteCertificate(callback?: (err: AWSError, data: {}) => void): Request<{}, AWSError>;\n+ /**\n+ * Returns a list of the fields contained in the specified ACM Certificate. For example, this action returns the certificate status, a flag that indicates whether the certificate is associated with any other AWS service, and the date at which the certificate request was created. You specify the ACM Certificate on input by its Amazon Resource Name (ARN).\n+ */\n+ describeCertificate(params: ACM.DescribeCertificateRequest, callback?: (err: AWSError, data: ACM.DescribeCertificateResponse) => void): Request;\n+ /**\n+ * Returns a list of the fields contained in the specified ACM Certificate. For example, this action returns the certificate status, a flag that indicates whether the certificate is associated with any other AWS service, and the date at which the certificate request was created. You specify the ACM Certificate on input by its Amazon Resource Name (ARN).\n+ */\n+ describeCertificate(callback?: (err: AWSError, data: ACM.DescribeCertificateResponse) => void): Request;\n+ /**\n+ * Retrieves an ACM Certificate and certificate chain for the certificate specified by an ARN. The chain is an ordered list of certificates that contains the root certificate, intermediate certificates of subordinate CAs, and the ACM Certificate. The certificate and certificate chain are base64 encoded. If you want to decode the certificate chain to see the individual certificate fields, you can use OpenSSL. Currently, ACM Certificates can be used only with Elastic Load Balancing and Amazon CloudFront. \n+ */\n+ getCertificate(params: ACM.GetCertificateRequest, callback?: (err: AWSError, data: ACM.GetCertificateResponse) => void): Request;\n+ /**\n+ * Retrieves an ACM Certificate and certificate chain for the certificate specified by an ARN. The chain is an ordered list of certificates that contains the root certificate, intermediate certificates of subordinate CAs, and the ACM Certificate. The certificate and certificate chain are base64 encoded. If you want to decode the certificate chain to see the individual certificate fields, you can use OpenSSL. Currently, ACM Certificates can be used only with Elastic Load Balancing and Amazon CloudFront. \n+ */\n+ getCertificate(callback?: (err: AWSError, data: ACM.GetCertificateResponse) => void): Request;\n+ /**\n+ * Retrieves a list of ACM Certificates and the domain name for each. You can optionally filter the list to return only the certificates that match the specified status.\n+ */\n+ listCertificates(params: ACM.ListCertificatesRequest, callback?: (err: AWSError, data: ACM.ListCertificatesResponse) => void): Request;\n+ /**\n+ * Retrieves a list of ACM Certificates and the domain name for each. You can optionally filter the list to return only the certificates that match the specified status.\n+ */\n+ listCertificates(callback?: (err: AWSError, data: ACM.ListCertificatesResponse) => void): Request;\n+ /**\n+ * Lists the tags that have been applied to the ACM Certificate. Use the certificate ARN to specify the certificate. To add a tag to an ACM Certificate, use the AddTagsToCertificate action. To delete a tag, use the RemoveTagsFromCertificate action.\n+ */\n+ listTagsForCertificate(params: ACM.ListTagsForCertificateRequest, callback?: (err: AWSError, data: ACM.ListTagsForCertificateResponse) => void): Request;\n+ /**\n+ * Lists the tags that have been applied to the ACM Certificate. Use the certificate ARN to specify the certificate. To add a tag to an ACM Certificate, use the AddTagsToCertificate action. To delete a tag, use the RemoveTagsFromCertificate action.\n+ */\n+ listTagsForCertificate(callback?: (err: AWSError, data: ACM.ListTagsForCertificateResponse) => void): Request;\n+ /**\n+ * Remove one or more tags from an ACM Certificate. A tag consists of a key-value pair. If you do not specify the value portion of the tag when calling this function, the tag will be removed regardless of value. If you specify a value, the tag is removed only if it is associated with the specified value. To add tags to a certificate, use the AddTagsToCertificate action. To view all of the tags that have been applied to a specific ACM Certificate, use the ListTagsForCertificate action.\n+ */\n+ removeTagsFromCertificate(params: ACM.RemoveTagsFromCertificateRequest, callback?: (err: AWSError, data: {}) => void): Request<{}, AWSError>;\n+ /**\n+ * Remove one or more tags from an ACM Certificate. A tag consists of a key-value pair. If you do not specify the value portion of the tag when calling this function, the tag will be removed regardless of value. If you specify a value, the tag is removed only if it is associated with the specified value. To add tags to a certificate, use the AddTagsToCertificate action. To view all of the tags that have been applied to a specific ACM Certificate, use the ListTagsForCertificate action.\n+ */\n+ removeTagsFromCertificate(callback?: (err: AWSError, data: {}) => void): Request<{}, AWSError>;\n+ /**\n+ * Requests an ACM Certificate for use with other AWS services. To request an ACM Certificate, you must specify the fully qualified domain name (FQDN) for your site. You can also specify additional FQDNs if users can reach your site by using other names. For each domain name you specify, email is sent to the domain owner to request approval to issue the certificate. After receiving approval from the domain owner, the ACM Certificate is issued. For more information, see the AWS Certificate Manager User Guide .\n+ */\n+ requestCertificate(params: ACM.RequestCertificateRequest, callback?: (err: AWSError, data: ACM.RequestCertificateResponse) => void): Request;\n+ /**\n+ * Requests an ACM Certificate for use with other AWS services. To request an ACM Certificate, you must specify the fully qualified domain name (FQDN) for your site. You can also specify additional FQDNs if users can reach your site by using other names. For each domain name you specify, email is sent to the domain owner to request approval to issue the certificate. After receiving approval from the domain owner, the ACM Certificate is issued. For more information, see the AWS Certificate Manager User Guide .\n+ */\n+ requestCertificate(callback?: (err: AWSError, data: ACM.RequestCertificateResponse) => void): Request;\n+ /**\n+ * Resends the email that requests domain ownership validation. The domain owner or an authorized representative must approve the ACM Certificate before it can be issued. The certificate can be approved by clicking a link in the mail to navigate to the Amazon certificate approval website and then clicking I Approve. However, the validation email can be blocked by spam filters. Therefore, if you do not receive the original mail, you can request that the mail be resent within 72 hours of requesting the ACM Certificate. If more than 72 hours have elapsed since your original request or since your last attempt to resend validation mail, you must request a new certificate.\n+ */\n+ resendValidationEmail(params: ACM.ResendValidationEmailRequest, callback?: (err: AWSError, data: {}) => void): Request<{}, AWSError>;\n+ /**\n+ * Resends the email that requests domain ownership validation. The domain owner or an authorized representative must approve the ACM Certificate before it can be issued. The certificate can be approved by clicking a link in the mail to navigate to the Amazon certificate approval website and then clicking I Approve. However, the validation email can be blocked by spam filters. Therefore, if you do not receive the original mail, you can request that the mail be resent within 72 hours of requesting the ACM Certificate. If more than 72 hours have elapsed since your original request or since your last attempt to resend validation mail, you must request a new certificate.\n+ */\n+ resendValidationEmail(callback?: (err: AWSError, data: {}) => void): Request<{}, AWSError>;\n+}\n+declare namespace ACM {", - "comment_created_at": "2016-10-24T23:28:51+00:00", - "comment_author": "LiuJoyceC", - "comment_body": "Is there a reason the service model's shapes need to be on this namespace? When I type `AWS.ACM.`, I don't think my code completion tool should try to complete `AddTagsToCertificateRequest` or `CertificateBody`, since these aren't actually valid properties on `AWS.ACM`. Instead, this namespace should contain `apiVersions`, `serviceIdentifier`, and `services` (unless these are private, in which case just don't have an `ACM` namespace). The shapes can live in a separate namespace that isn't exported, and you just have to change the references above. I've tested this by changing this namespace name to be `Shapes` (and changing references above), and I still get the correct code completion suggestions when I'm creating my `params` in a request, and I no longer get shape name suggestions when I type `AWS.ACM.`.\n", - "pr_file_module": null - }, - { - "comment_id": "84809757", - "repo_full_name": "aws/aws-sdk-js", - "pr_number": 1189, - "pr_file": "clients/acm.d.ts", - "discussion_id": "84805047", - "commented_code": "@@ -0,0 +1,350 @@\n+import {Request} from '../lib/request';\n+import {Response} from '../lib/response';\n+import {AWSError} from '../lib/error';\n+import {Service} from '../lib/service';\n+declare class ACM extends Service {\n+ /**\n+ * Adds one or more tags to an ACM Certificate. Tags are labels that you can use to identify and organize your AWS resources. Each tag consists of a key and an optional value. You specify the certificate on input by its Amazon Resource Name (ARN). You specify the tag by using a key-value pair. You can apply a tag to just one certificate if you want to identify a specific characteristic of that certificate, or you can apply the same tag to multiple certificates if you want to filter for a common relationship among those certificates. Similarly, you can apply the same tag to multiple resources if you want to specify a relationship among those resources. For example, you can add the same tag to an ACM Certificate and an Elastic Load Balancing load balancer to indicate that they are both used by the same website. For more information, see Tagging ACM Certificates. To remove one or more tags, use the RemoveTagsFromCertificate action. To view all of the tags that have been applied to the certificate, use the ListTagsForCertificate action.\n+ */\n+ addTagsToCertificate(params: ACM.AddTagsToCertificateRequest, callback?: (err: AWSError, data: {}) => void): Request<{}, AWSError>;\n+ /**\n+ * Adds one or more tags to an ACM Certificate. Tags are labels that you can use to identify and organize your AWS resources. Each tag consists of a key and an optional value. You specify the certificate on input by its Amazon Resource Name (ARN). You specify the tag by using a key-value pair. You can apply a tag to just one certificate if you want to identify a specific characteristic of that certificate, or you can apply the same tag to multiple certificates if you want to filter for a common relationship among those certificates. Similarly, you can apply the same tag to multiple resources if you want to specify a relationship among those resources. For example, you can add the same tag to an ACM Certificate and an Elastic Load Balancing load balancer to indicate that they are both used by the same website. For more information, see Tagging ACM Certificates. To remove one or more tags, use the RemoveTagsFromCertificate action. To view all of the tags that have been applied to the certificate, use the ListTagsForCertificate action.\n+ */\n+ addTagsToCertificate(callback?: (err: AWSError, data: {}) => void): Request<{}, AWSError>;\n+ /**\n+ * Deletes an ACM Certificate and its associated private key. If this action succeeds, the certificate no longer appears in the list of ACM Certificates that can be displayed by calling the ListCertificates action or be retrieved by calling the GetCertificate action. The certificate will not be available for use by other AWS services. You cannot delete an ACM Certificate that is being used by another AWS service. To delete a certificate that is in use, the certificate association must first be removed. \n+ */\n+ deleteCertificate(params: ACM.DeleteCertificateRequest, callback?: (err: AWSError, data: {}) => void): Request<{}, AWSError>;\n+ /**\n+ * Deletes an ACM Certificate and its associated private key. If this action succeeds, the certificate no longer appears in the list of ACM Certificates that can be displayed by calling the ListCertificates action or be retrieved by calling the GetCertificate action. The certificate will not be available for use by other AWS services. You cannot delete an ACM Certificate that is being used by another AWS service. To delete a certificate that is in use, the certificate association must first be removed. \n+ */\n+ deleteCertificate(callback?: (err: AWSError, data: {}) => void): Request<{}, AWSError>;\n+ /**\n+ * Returns a list of the fields contained in the specified ACM Certificate. For example, this action returns the certificate status, a flag that indicates whether the certificate is associated with any other AWS service, and the date at which the certificate request was created. You specify the ACM Certificate on input by its Amazon Resource Name (ARN).\n+ */\n+ describeCertificate(params: ACM.DescribeCertificateRequest, callback?: (err: AWSError, data: ACM.DescribeCertificateResponse) => void): Request;\n+ /**\n+ * Returns a list of the fields contained in the specified ACM Certificate. For example, this action returns the certificate status, a flag that indicates whether the certificate is associated with any other AWS service, and the date at which the certificate request was created. You specify the ACM Certificate on input by its Amazon Resource Name (ARN).\n+ */\n+ describeCertificate(callback?: (err: AWSError, data: ACM.DescribeCertificateResponse) => void): Request;\n+ /**\n+ * Retrieves an ACM Certificate and certificate chain for the certificate specified by an ARN. The chain is an ordered list of certificates that contains the root certificate, intermediate certificates of subordinate CAs, and the ACM Certificate. The certificate and certificate chain are base64 encoded. If you want to decode the certificate chain to see the individual certificate fields, you can use OpenSSL. Currently, ACM Certificates can be used only with Elastic Load Balancing and Amazon CloudFront. \n+ */\n+ getCertificate(params: ACM.GetCertificateRequest, callback?: (err: AWSError, data: ACM.GetCertificateResponse) => void): Request;\n+ /**\n+ * Retrieves an ACM Certificate and certificate chain for the certificate specified by an ARN. The chain is an ordered list of certificates that contains the root certificate, intermediate certificates of subordinate CAs, and the ACM Certificate. The certificate and certificate chain are base64 encoded. If you want to decode the certificate chain to see the individual certificate fields, you can use OpenSSL. Currently, ACM Certificates can be used only with Elastic Load Balancing and Amazon CloudFront. \n+ */\n+ getCertificate(callback?: (err: AWSError, data: ACM.GetCertificateResponse) => void): Request;\n+ /**\n+ * Retrieves a list of ACM Certificates and the domain name for each. You can optionally filter the list to return only the certificates that match the specified status.\n+ */\n+ listCertificates(params: ACM.ListCertificatesRequest, callback?: (err: AWSError, data: ACM.ListCertificatesResponse) => void): Request;\n+ /**\n+ * Retrieves a list of ACM Certificates and the domain name for each. You can optionally filter the list to return only the certificates that match the specified status.\n+ */\n+ listCertificates(callback?: (err: AWSError, data: ACM.ListCertificatesResponse) => void): Request;\n+ /**\n+ * Lists the tags that have been applied to the ACM Certificate. Use the certificate ARN to specify the certificate. To add a tag to an ACM Certificate, use the AddTagsToCertificate action. To delete a tag, use the RemoveTagsFromCertificate action.\n+ */\n+ listTagsForCertificate(params: ACM.ListTagsForCertificateRequest, callback?: (err: AWSError, data: ACM.ListTagsForCertificateResponse) => void): Request;\n+ /**\n+ * Lists the tags that have been applied to the ACM Certificate. Use the certificate ARN to specify the certificate. To add a tag to an ACM Certificate, use the AddTagsToCertificate action. To delete a tag, use the RemoveTagsFromCertificate action.\n+ */\n+ listTagsForCertificate(callback?: (err: AWSError, data: ACM.ListTagsForCertificateResponse) => void): Request;\n+ /**\n+ * Remove one or more tags from an ACM Certificate. A tag consists of a key-value pair. If you do not specify the value portion of the tag when calling this function, the tag will be removed regardless of value. If you specify a value, the tag is removed only if it is associated with the specified value. To add tags to a certificate, use the AddTagsToCertificate action. To view all of the tags that have been applied to a specific ACM Certificate, use the ListTagsForCertificate action.\n+ */\n+ removeTagsFromCertificate(params: ACM.RemoveTagsFromCertificateRequest, callback?: (err: AWSError, data: {}) => void): Request<{}, AWSError>;\n+ /**\n+ * Remove one or more tags from an ACM Certificate. A tag consists of a key-value pair. If you do not specify the value portion of the tag when calling this function, the tag will be removed regardless of value. If you specify a value, the tag is removed only if it is associated with the specified value. To add tags to a certificate, use the AddTagsToCertificate action. To view all of the tags that have been applied to a specific ACM Certificate, use the ListTagsForCertificate action.\n+ */\n+ removeTagsFromCertificate(callback?: (err: AWSError, data: {}) => void): Request<{}, AWSError>;\n+ /**\n+ * Requests an ACM Certificate for use with other AWS services. To request an ACM Certificate, you must specify the fully qualified domain name (FQDN) for your site. You can also specify additional FQDNs if users can reach your site by using other names. For each domain name you specify, email is sent to the domain owner to request approval to issue the certificate. After receiving approval from the domain owner, the ACM Certificate is issued. For more information, see the AWS Certificate Manager User Guide .\n+ */\n+ requestCertificate(params: ACM.RequestCertificateRequest, callback?: (err: AWSError, data: ACM.RequestCertificateResponse) => void): Request;\n+ /**\n+ * Requests an ACM Certificate for use with other AWS services. To request an ACM Certificate, you must specify the fully qualified domain name (FQDN) for your site. You can also specify additional FQDNs if users can reach your site by using other names. For each domain name you specify, email is sent to the domain owner to request approval to issue the certificate. After receiving approval from the domain owner, the ACM Certificate is issued. For more information, see the AWS Certificate Manager User Guide .\n+ */\n+ requestCertificate(callback?: (err: AWSError, data: ACM.RequestCertificateResponse) => void): Request;\n+ /**\n+ * Resends the email that requests domain ownership validation. The domain owner or an authorized representative must approve the ACM Certificate before it can be issued. The certificate can be approved by clicking a link in the mail to navigate to the Amazon certificate approval website and then clicking I Approve. However, the validation email can be blocked by spam filters. Therefore, if you do not receive the original mail, you can request that the mail be resent within 72 hours of requesting the ACM Certificate. If more than 72 hours have elapsed since your original request or since your last attempt to resend validation mail, you must request a new certificate.\n+ */\n+ resendValidationEmail(params: ACM.ResendValidationEmailRequest, callback?: (err: AWSError, data: {}) => void): Request<{}, AWSError>;\n+ /**\n+ * Resends the email that requests domain ownership validation. The domain owner or an authorized representative must approve the ACM Certificate before it can be issued. The certificate can be approved by clicking a link in the mail to navigate to the Amazon certificate approval website and then clicking I Approve. However, the validation email can be blocked by spam filters. Therefore, if you do not receive the original mail, you can request that the mail be resent within 72 hours of requesting the ACM Certificate. If more than 72 hours have elapsed since your original request or since your last attempt to resend validation mail, you must request a new certificate.\n+ */\n+ resendValidationEmail(callback?: (err: AWSError, data: {}) => void): Request<{}, AWSError>;\n+}\n+declare namespace ACM {", - "comment_created_at": "2016-10-25T00:12:02+00:00", - "comment_author": "chrisradek", - "comment_body": "I wanted to export all the interfaces so that users could cast a result if they needed to, but I'm not sure if that's necessary. I'll have to take a look at some other TypeScript libraries to see if that's common practice.\n\nYou should only see these interfaces when looking at the service client constructors, but not on the service client instances.\n", - "pr_file_module": null - }, - { - "comment_id": "84816531", - "repo_full_name": "aws/aws-sdk-js", - "pr_number": 1189, - "pr_file": "clients/acm.d.ts", - "discussion_id": "84805047", - "commented_code": "@@ -0,0 +1,350 @@\n+import {Request} from '../lib/request';\n+import {Response} from '../lib/response';\n+import {AWSError} from '../lib/error';\n+import {Service} from '../lib/service';\n+declare class ACM extends Service {\n+ /**\n+ * Adds one or more tags to an ACM Certificate. Tags are labels that you can use to identify and organize your AWS resources. Each tag consists of a key and an optional value. You specify the certificate on input by its Amazon Resource Name (ARN). You specify the tag by using a key-value pair. You can apply a tag to just one certificate if you want to identify a specific characteristic of that certificate, or you can apply the same tag to multiple certificates if you want to filter for a common relationship among those certificates. Similarly, you can apply the same tag to multiple resources if you want to specify a relationship among those resources. For example, you can add the same tag to an ACM Certificate and an Elastic Load Balancing load balancer to indicate that they are both used by the same website. For more information, see Tagging ACM Certificates. To remove one or more tags, use the RemoveTagsFromCertificate action. To view all of the tags that have been applied to the certificate, use the ListTagsForCertificate action.\n+ */\n+ addTagsToCertificate(params: ACM.AddTagsToCertificateRequest, callback?: (err: AWSError, data: {}) => void): Request<{}, AWSError>;\n+ /**\n+ * Adds one or more tags to an ACM Certificate. Tags are labels that you can use to identify and organize your AWS resources. Each tag consists of a key and an optional value. You specify the certificate on input by its Amazon Resource Name (ARN). You specify the tag by using a key-value pair. You can apply a tag to just one certificate if you want to identify a specific characteristic of that certificate, or you can apply the same tag to multiple certificates if you want to filter for a common relationship among those certificates. Similarly, you can apply the same tag to multiple resources if you want to specify a relationship among those resources. For example, you can add the same tag to an ACM Certificate and an Elastic Load Balancing load balancer to indicate that they are both used by the same website. For more information, see Tagging ACM Certificates. To remove one or more tags, use the RemoveTagsFromCertificate action. To view all of the tags that have been applied to the certificate, use the ListTagsForCertificate action.\n+ */\n+ addTagsToCertificate(callback?: (err: AWSError, data: {}) => void): Request<{}, AWSError>;\n+ /**\n+ * Deletes an ACM Certificate and its associated private key. If this action succeeds, the certificate no longer appears in the list of ACM Certificates that can be displayed by calling the ListCertificates action or be retrieved by calling the GetCertificate action. The certificate will not be available for use by other AWS services. You cannot delete an ACM Certificate that is being used by another AWS service. To delete a certificate that is in use, the certificate association must first be removed. \n+ */\n+ deleteCertificate(params: ACM.DeleteCertificateRequest, callback?: (err: AWSError, data: {}) => void): Request<{}, AWSError>;\n+ /**\n+ * Deletes an ACM Certificate and its associated private key. If this action succeeds, the certificate no longer appears in the list of ACM Certificates that can be displayed by calling the ListCertificates action or be retrieved by calling the GetCertificate action. The certificate will not be available for use by other AWS services. You cannot delete an ACM Certificate that is being used by another AWS service. To delete a certificate that is in use, the certificate association must first be removed. \n+ */\n+ deleteCertificate(callback?: (err: AWSError, data: {}) => void): Request<{}, AWSError>;\n+ /**\n+ * Returns a list of the fields contained in the specified ACM Certificate. For example, this action returns the certificate status, a flag that indicates whether the certificate is associated with any other AWS service, and the date at which the certificate request was created. You specify the ACM Certificate on input by its Amazon Resource Name (ARN).\n+ */\n+ describeCertificate(params: ACM.DescribeCertificateRequest, callback?: (err: AWSError, data: ACM.DescribeCertificateResponse) => void): Request;\n+ /**\n+ * Returns a list of the fields contained in the specified ACM Certificate. For example, this action returns the certificate status, a flag that indicates whether the certificate is associated with any other AWS service, and the date at which the certificate request was created. You specify the ACM Certificate on input by its Amazon Resource Name (ARN).\n+ */\n+ describeCertificate(callback?: (err: AWSError, data: ACM.DescribeCertificateResponse) => void): Request;\n+ /**\n+ * Retrieves an ACM Certificate and certificate chain for the certificate specified by an ARN. The chain is an ordered list of certificates that contains the root certificate, intermediate certificates of subordinate CAs, and the ACM Certificate. The certificate and certificate chain are base64 encoded. If you want to decode the certificate chain to see the individual certificate fields, you can use OpenSSL. Currently, ACM Certificates can be used only with Elastic Load Balancing and Amazon CloudFront. \n+ */\n+ getCertificate(params: ACM.GetCertificateRequest, callback?: (err: AWSError, data: ACM.GetCertificateResponse) => void): Request;\n+ /**\n+ * Retrieves an ACM Certificate and certificate chain for the certificate specified by an ARN. The chain is an ordered list of certificates that contains the root certificate, intermediate certificates of subordinate CAs, and the ACM Certificate. The certificate and certificate chain are base64 encoded. If you want to decode the certificate chain to see the individual certificate fields, you can use OpenSSL. Currently, ACM Certificates can be used only with Elastic Load Balancing and Amazon CloudFront. \n+ */\n+ getCertificate(callback?: (err: AWSError, data: ACM.GetCertificateResponse) => void): Request;\n+ /**\n+ * Retrieves a list of ACM Certificates and the domain name for each. You can optionally filter the list to return only the certificates that match the specified status.\n+ */\n+ listCertificates(params: ACM.ListCertificatesRequest, callback?: (err: AWSError, data: ACM.ListCertificatesResponse) => void): Request;\n+ /**\n+ * Retrieves a list of ACM Certificates and the domain name for each. You can optionally filter the list to return only the certificates that match the specified status.\n+ */\n+ listCertificates(callback?: (err: AWSError, data: ACM.ListCertificatesResponse) => void): Request;\n+ /**\n+ * Lists the tags that have been applied to the ACM Certificate. Use the certificate ARN to specify the certificate. To add a tag to an ACM Certificate, use the AddTagsToCertificate action. To delete a tag, use the RemoveTagsFromCertificate action.\n+ */\n+ listTagsForCertificate(params: ACM.ListTagsForCertificateRequest, callback?: (err: AWSError, data: ACM.ListTagsForCertificateResponse) => void): Request;\n+ /**\n+ * Lists the tags that have been applied to the ACM Certificate. Use the certificate ARN to specify the certificate. To add a tag to an ACM Certificate, use the AddTagsToCertificate action. To delete a tag, use the RemoveTagsFromCertificate action.\n+ */\n+ listTagsForCertificate(callback?: (err: AWSError, data: ACM.ListTagsForCertificateResponse) => void): Request;\n+ /**\n+ * Remove one or more tags from an ACM Certificate. A tag consists of a key-value pair. If you do not specify the value portion of the tag when calling this function, the tag will be removed regardless of value. If you specify a value, the tag is removed only if it is associated with the specified value. To add tags to a certificate, use the AddTagsToCertificate action. To view all of the tags that have been applied to a specific ACM Certificate, use the ListTagsForCertificate action.\n+ */\n+ removeTagsFromCertificate(params: ACM.RemoveTagsFromCertificateRequest, callback?: (err: AWSError, data: {}) => void): Request<{}, AWSError>;\n+ /**\n+ * Remove one or more tags from an ACM Certificate. A tag consists of a key-value pair. If you do not specify the value portion of the tag when calling this function, the tag will be removed regardless of value. If you specify a value, the tag is removed only if it is associated with the specified value. To add tags to a certificate, use the AddTagsToCertificate action. To view all of the tags that have been applied to a specific ACM Certificate, use the ListTagsForCertificate action.\n+ */\n+ removeTagsFromCertificate(callback?: (err: AWSError, data: {}) => void): Request<{}, AWSError>;\n+ /**\n+ * Requests an ACM Certificate for use with other AWS services. To request an ACM Certificate, you must specify the fully qualified domain name (FQDN) for your site. You can also specify additional FQDNs if users can reach your site by using other names. For each domain name you specify, email is sent to the domain owner to request approval to issue the certificate. After receiving approval from the domain owner, the ACM Certificate is issued. For more information, see the AWS Certificate Manager User Guide .\n+ */\n+ requestCertificate(params: ACM.RequestCertificateRequest, callback?: (err: AWSError, data: ACM.RequestCertificateResponse) => void): Request;\n+ /**\n+ * Requests an ACM Certificate for use with other AWS services. To request an ACM Certificate, you must specify the fully qualified domain name (FQDN) for your site. You can also specify additional FQDNs if users can reach your site by using other names. For each domain name you specify, email is sent to the domain owner to request approval to issue the certificate. After receiving approval from the domain owner, the ACM Certificate is issued. For more information, see the AWS Certificate Manager User Guide .\n+ */\n+ requestCertificate(callback?: (err: AWSError, data: ACM.RequestCertificateResponse) => void): Request;\n+ /**\n+ * Resends the email that requests domain ownership validation. The domain owner or an authorized representative must approve the ACM Certificate before it can be issued. The certificate can be approved by clicking a link in the mail to navigate to the Amazon certificate approval website and then clicking I Approve. However, the validation email can be blocked by spam filters. Therefore, if you do not receive the original mail, you can request that the mail be resent within 72 hours of requesting the ACM Certificate. If more than 72 hours have elapsed since your original request or since your last attempt to resend validation mail, you must request a new certificate.\n+ */\n+ resendValidationEmail(params: ACM.ResendValidationEmailRequest, callback?: (err: AWSError, data: {}) => void): Request<{}, AWSError>;\n+ /**\n+ * Resends the email that requests domain ownership validation. The domain owner or an authorized representative must approve the ACM Certificate before it can be issued. The certificate can be approved by clicking a link in the mail to navigate to the Amazon certificate approval website and then clicking I Approve. However, the validation email can be blocked by spam filters. Therefore, if you do not receive the original mail, you can request that the mail be resent within 72 hours of requesting the ACM Certificate. If more than 72 hours have elapsed since your original request or since your last attempt to resend validation mail, you must request a new certificate.\n+ */\n+ resendValidationEmail(callback?: (err: AWSError, data: {}) => void): Request<{}, AWSError>;\n+}\n+declare namespace ACM {", - "comment_created_at": "2016-10-25T01:27:43+00:00", - "comment_author": "LiuJoyceC", - "comment_body": "Right, it would only be on the constructor, but there are valid reasons a customer may want to look at the properties of the constructor such as looking up what `apiVersions` are available for a service, or for the case of S3 and DynamoDB, they want to access the `ManagedUpload` or `DocumentClient`. When they try to access these, the constructors will autocomplete with a large number of properties that aren't real and make it difficult to find the actual property they are trying to access.\n", - "pr_file_module": null - }, - { - "comment_id": "85429596", - "repo_full_name": "aws/aws-sdk-js", - "pr_number": 1189, - "pr_file": "clients/acm.d.ts", - "discussion_id": "84805047", - "commented_code": "@@ -0,0 +1,350 @@\n+import {Request} from '../lib/request';\n+import {Response} from '../lib/response';\n+import {AWSError} from '../lib/error';\n+import {Service} from '../lib/service';\n+declare class ACM extends Service {\n+ /**\n+ * Adds one or more tags to an ACM Certificate. Tags are labels that you can use to identify and organize your AWS resources. Each tag consists of a key and an optional value. You specify the certificate on input by its Amazon Resource Name (ARN). You specify the tag by using a key-value pair. You can apply a tag to just one certificate if you want to identify a specific characteristic of that certificate, or you can apply the same tag to multiple certificates if you want to filter for a common relationship among those certificates. Similarly, you can apply the same tag to multiple resources if you want to specify a relationship among those resources. For example, you can add the same tag to an ACM Certificate and an Elastic Load Balancing load balancer to indicate that they are both used by the same website. For more information, see Tagging ACM Certificates. To remove one or more tags, use the RemoveTagsFromCertificate action. To view all of the tags that have been applied to the certificate, use the ListTagsForCertificate action.\n+ */\n+ addTagsToCertificate(params: ACM.AddTagsToCertificateRequest, callback?: (err: AWSError, data: {}) => void): Request<{}, AWSError>;\n+ /**\n+ * Adds one or more tags to an ACM Certificate. Tags are labels that you can use to identify and organize your AWS resources. Each tag consists of a key and an optional value. You specify the certificate on input by its Amazon Resource Name (ARN). You specify the tag by using a key-value pair. You can apply a tag to just one certificate if you want to identify a specific characteristic of that certificate, or you can apply the same tag to multiple certificates if you want to filter for a common relationship among those certificates. Similarly, you can apply the same tag to multiple resources if you want to specify a relationship among those resources. For example, you can add the same tag to an ACM Certificate and an Elastic Load Balancing load balancer to indicate that they are both used by the same website. For more information, see Tagging ACM Certificates. To remove one or more tags, use the RemoveTagsFromCertificate action. To view all of the tags that have been applied to the certificate, use the ListTagsForCertificate action.\n+ */\n+ addTagsToCertificate(callback?: (err: AWSError, data: {}) => void): Request<{}, AWSError>;\n+ /**\n+ * Deletes an ACM Certificate and its associated private key. If this action succeeds, the certificate no longer appears in the list of ACM Certificates that can be displayed by calling the ListCertificates action or be retrieved by calling the GetCertificate action. The certificate will not be available for use by other AWS services. You cannot delete an ACM Certificate that is being used by another AWS service. To delete a certificate that is in use, the certificate association must first be removed. \n+ */\n+ deleteCertificate(params: ACM.DeleteCertificateRequest, callback?: (err: AWSError, data: {}) => void): Request<{}, AWSError>;\n+ /**\n+ * Deletes an ACM Certificate and its associated private key. If this action succeeds, the certificate no longer appears in the list of ACM Certificates that can be displayed by calling the ListCertificates action or be retrieved by calling the GetCertificate action. The certificate will not be available for use by other AWS services. You cannot delete an ACM Certificate that is being used by another AWS service. To delete a certificate that is in use, the certificate association must first be removed. \n+ */\n+ deleteCertificate(callback?: (err: AWSError, data: {}) => void): Request<{}, AWSError>;\n+ /**\n+ * Returns a list of the fields contained in the specified ACM Certificate. For example, this action returns the certificate status, a flag that indicates whether the certificate is associated with any other AWS service, and the date at which the certificate request was created. You specify the ACM Certificate on input by its Amazon Resource Name (ARN).\n+ */\n+ describeCertificate(params: ACM.DescribeCertificateRequest, callback?: (err: AWSError, data: ACM.DescribeCertificateResponse) => void): Request;\n+ /**\n+ * Returns a list of the fields contained in the specified ACM Certificate. For example, this action returns the certificate status, a flag that indicates whether the certificate is associated with any other AWS service, and the date at which the certificate request was created. You specify the ACM Certificate on input by its Amazon Resource Name (ARN).\n+ */\n+ describeCertificate(callback?: (err: AWSError, data: ACM.DescribeCertificateResponse) => void): Request;\n+ /**\n+ * Retrieves an ACM Certificate and certificate chain for the certificate specified by an ARN. The chain is an ordered list of certificates that contains the root certificate, intermediate certificates of subordinate CAs, and the ACM Certificate. The certificate and certificate chain are base64 encoded. If you want to decode the certificate chain to see the individual certificate fields, you can use OpenSSL. Currently, ACM Certificates can be used only with Elastic Load Balancing and Amazon CloudFront. \n+ */\n+ getCertificate(params: ACM.GetCertificateRequest, callback?: (err: AWSError, data: ACM.GetCertificateResponse) => void): Request;\n+ /**\n+ * Retrieves an ACM Certificate and certificate chain for the certificate specified by an ARN. The chain is an ordered list of certificates that contains the root certificate, intermediate certificates of subordinate CAs, and the ACM Certificate. The certificate and certificate chain are base64 encoded. If you want to decode the certificate chain to see the individual certificate fields, you can use OpenSSL. Currently, ACM Certificates can be used only with Elastic Load Balancing and Amazon CloudFront. \n+ */\n+ getCertificate(callback?: (err: AWSError, data: ACM.GetCertificateResponse) => void): Request;\n+ /**\n+ * Retrieves a list of ACM Certificates and the domain name for each. You can optionally filter the list to return only the certificates that match the specified status.\n+ */\n+ listCertificates(params: ACM.ListCertificatesRequest, callback?: (err: AWSError, data: ACM.ListCertificatesResponse) => void): Request;\n+ /**\n+ * Retrieves a list of ACM Certificates and the domain name for each. You can optionally filter the list to return only the certificates that match the specified status.\n+ */\n+ listCertificates(callback?: (err: AWSError, data: ACM.ListCertificatesResponse) => void): Request;\n+ /**\n+ * Lists the tags that have been applied to the ACM Certificate. Use the certificate ARN to specify the certificate. To add a tag to an ACM Certificate, use the AddTagsToCertificate action. To delete a tag, use the RemoveTagsFromCertificate action.\n+ */\n+ listTagsForCertificate(params: ACM.ListTagsForCertificateRequest, callback?: (err: AWSError, data: ACM.ListTagsForCertificateResponse) => void): Request;\n+ /**\n+ * Lists the tags that have been applied to the ACM Certificate. Use the certificate ARN to specify the certificate. To add a tag to an ACM Certificate, use the AddTagsToCertificate action. To delete a tag, use the RemoveTagsFromCertificate action.\n+ */\n+ listTagsForCertificate(callback?: (err: AWSError, data: ACM.ListTagsForCertificateResponse) => void): Request;\n+ /**\n+ * Remove one or more tags from an ACM Certificate. A tag consists of a key-value pair. If you do not specify the value portion of the tag when calling this function, the tag will be removed regardless of value. If you specify a value, the tag is removed only if it is associated with the specified value. To add tags to a certificate, use the AddTagsToCertificate action. To view all of the tags that have been applied to a specific ACM Certificate, use the ListTagsForCertificate action.\n+ */\n+ removeTagsFromCertificate(params: ACM.RemoveTagsFromCertificateRequest, callback?: (err: AWSError, data: {}) => void): Request<{}, AWSError>;\n+ /**\n+ * Remove one or more tags from an ACM Certificate. A tag consists of a key-value pair. If you do not specify the value portion of the tag when calling this function, the tag will be removed regardless of value. If you specify a value, the tag is removed only if it is associated with the specified value. To add tags to a certificate, use the AddTagsToCertificate action. To view all of the tags that have been applied to a specific ACM Certificate, use the ListTagsForCertificate action.\n+ */\n+ removeTagsFromCertificate(callback?: (err: AWSError, data: {}) => void): Request<{}, AWSError>;\n+ /**\n+ * Requests an ACM Certificate for use with other AWS services. To request an ACM Certificate, you must specify the fully qualified domain name (FQDN) for your site. You can also specify additional FQDNs if users can reach your site by using other names. For each domain name you specify, email is sent to the domain owner to request approval to issue the certificate. After receiving approval from the domain owner, the ACM Certificate is issued. For more information, see the AWS Certificate Manager User Guide .\n+ */\n+ requestCertificate(params: ACM.RequestCertificateRequest, callback?: (err: AWSError, data: ACM.RequestCertificateResponse) => void): Request;\n+ /**\n+ * Requests an ACM Certificate for use with other AWS services. To request an ACM Certificate, you must specify the fully qualified domain name (FQDN) for your site. You can also specify additional FQDNs if users can reach your site by using other names. For each domain name you specify, email is sent to the domain owner to request approval to issue the certificate. After receiving approval from the domain owner, the ACM Certificate is issued. For more information, see the AWS Certificate Manager User Guide .\n+ */\n+ requestCertificate(callback?: (err: AWSError, data: ACM.RequestCertificateResponse) => void): Request;\n+ /**\n+ * Resends the email that requests domain ownership validation. The domain owner or an authorized representative must approve the ACM Certificate before it can be issued. The certificate can be approved by clicking a link in the mail to navigate to the Amazon certificate approval website and then clicking I Approve. However, the validation email can be blocked by spam filters. Therefore, if you do not receive the original mail, you can request that the mail be resent within 72 hours of requesting the ACM Certificate. If more than 72 hours have elapsed since your original request or since your last attempt to resend validation mail, you must request a new certificate.\n+ */\n+ resendValidationEmail(params: ACM.ResendValidationEmailRequest, callback?: (err: AWSError, data: {}) => void): Request<{}, AWSError>;\n+ /**\n+ * Resends the email that requests domain ownership validation. The domain owner or an authorized representative must approve the ACM Certificate before it can be issued. The certificate can be approved by clicking a link in the mail to navigate to the Amazon certificate approval website and then clicking I Approve. However, the validation email can be blocked by spam filters. Therefore, if you do not receive the original mail, you can request that the mail be resent within 72 hours of requesting the ACM Certificate. If more than 72 hours have elapsed since your original request or since your last attempt to resend validation mail, you must request a new certificate.\n+ */\n+ resendValidationEmail(callback?: (err: AWSError, data: {}) => void): Request<{}, AWSError>;\n+}\n+declare namespace ACM {", - "comment_created_at": "2016-10-27T21:17:07+00:00", - "comment_author": "chrisradek", - "comment_body": "So, I tried a few different things. Ultimately, I went with putting the exported types in a sub-namespace:\n`declare namespace SERVICE.Types {`\nWhen I put them on their own namespace, I had to explicitly import them into my app, otherwise the typescript compiler would complain:\nhttps://github.com/Microsoft/TypeScript/issues/9944\n\nI also wanted them to be exported so user's can specify a type for cases when the typescript compiler can't quite infer what a type should be. That might not be necessary once https://github.com/Microsoft/TypeScript/issues/6606 is addressed.\n", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "85630294", - "pr_number": 1189, - "pr_file": "lib/config.d.ts", - "created_at": "2016-10-29T01:03:41+00:00", - "commented_code": "/// \n\nimport * as http from 'http';\nimport * as https from 'https';\nimport {AWSError} from './error';\nimport {Credentials} from './credentials';\nexport class Config {\n /**\n * Creates a new configuration object.\n * This is the object that passes option data along to service requests, including credentials, security, region information, and some service specific settings.\n */\n constructor(options?: ConfigurationOptions);\n /**\n * Loads credentials from the configuration object.\n */\n getCredentials(callback: (err: AWSError) => void): void;\n /**\n * Loads configuration data from a JSON file into this config object.\n * Loading configuration willr eset all existing configuration on the object.\n * This feature is not supported in the browser environment of the SDK.\n * \n * @param {string} path - the path relative to your process's current working directory to load configuration from.\n */\n loadFromPath(path: string): Config;\n /**\n * Updates the current configuration object with new options.\n * \n * @param {ConfigurationOptions} options - a map of option keys and values.\n * @param {boolean} allowUnknownKeys - Defaults to false. Whether unknown keys can be set on the configuration object.\n */\n update(options: ConfigurationOptions, allowUnknownKeys?: boolean): void;\n /**\n * Sets the promise dependency the SDK will use wherever Promises are returned.\n * @param {function} dep - a reference to a Promise constructor\n */\n setPromisesDependency(dep: any): void;\n\n /**\n * Whether to compute checksums for payload bodies when the service accepts it.\n * Currently supported in S3 only.\n */\n computeChecksums?: boolean\n /**\n * Whether types are converted when parsing response data.\n */\n convertResponseTypes?: boolean\n /**\n * Whether to apply a clock skew correction and retry requests that fail because of an skewed client clock.\n */\n correctClockSkew?: boolean\n /**\n * The AWS credentials to sign requests with.\n */\n credentials: Credentials\n /**\n * A set of options to pass to the low-level HTTP request.\n */\n httpOptions?: HTTPOptions\n /**\n * An object that responds to .write() (like a stream) or .log() (like the console object) in order to log information about requests.\n */\n logger?: Logger\n /**\n * The maximum amount of redirects to follow for a service request.\n */\n maxRedirects?: number\n /**\n * The maximum amount of retries to perform for a service request.\n */\n maxRetries?: number\n /**\n * Returns whether input parameters should be validated against the operation description before sending the request. \n * Defaults to true. \n * Pass a map to enable any of the following specific validation features: min|max|pattern|enum\n */\n paramValidation?: ParamValidation | boolean\n /**\n * The region to send service requests to.\n */\n region?: string\n /**\n * Returns A set of options to configure the retry delay on retryable errors.\n */\n retryDelayOptions?: RetryDelayOptions\n /**\n * Whether the provided endpoint addresses an individual bucket.\n * false if it addresses the root API endpoint.\n */\n s3BucketEndpoint?: boolean\n /**\n * Whether to disable S3 body signing when using signature version v4.\n */\n s3DisableBodySigning?: boolean\n /**\n * Whether to force path style URLs for S3 objects.\n */\n s3ForcePathStyle?: boolean\n /**\n * Whether the signature to sign requests with (overriding the API configuration) is cached.\n */\n signatureCache?: boolean\n /**\n * The signature version to sign requests with (overriding the API configuration).\n * Possible values: 'v2'|'v3'|'v4'\n */\n signatureVersion?: \"v2\"|\"v3\"|\"v4\"|string\n /**\n * Whether SSL is enabled for requests.\n */\n sslEnabled?: boolean\n /**\n * An offset value in milliseconds to apply to all signing times.\n */\n systemClockOffset?: number\n /**\n * Whether to use the Accelerate endpoint with the S3 service.\n */\n useAccelerateEndpoint?: boolean\n}\n\ninterface HTTPOptions {\n /**\n * the URL to proxy requests through.\n */\n proxy?: string\n /**\n * the Agent object to perform HTTP requests with.\n * Used for connection pooling.\n * Defaults to the global agent (http.globalAgent) for non-SSL connections.\n */\n agent?: http.Agent | https.Agent\n /**\n * The number of milliseconds to wait before giving up on a connection attempt.\n * Defaults to two minutes (120000).\n */\n timeout?: number\n /**\n * Whether the SDK will send asynchronous HTTP requests.\n * Used in the browser environment only.\n * Set to false to send requests synchronously.\n * Defaults to true (async on).\n */\n xhrAsync?: boolean\n /**\n * Sets the 'withCredentials' property of an XMLHttpRequest object.\n * Used in the browser environment only.\n * Defaults to false.\n */\n xhrWithCredentials?: boolean\n}\ninterface Logger {\n write?: (chunk: any, encoding?: string, callback?: () => void) => void\n log?: (...messages: any[]) => void;\n}\ninterface ParamValidation {\n /**\n * Validates that a value meets the min constraint. \n * This is enabled by default when paramValidation is set to true.\n */\n min?: boolean\n /**\n * Validates that a value meets the max constraint.\n */\n max?: boolean\n /**\n * Validates that a string value matches a regular expression.\n */\n pattern?: boolean\n /**\n * Validates that a string value matches one of the allowable enum values.\n */\n enum?: boolean\n}\ninterface RetryDelayOptions {\n /**\n * The base number of milliseconds to use in the exponential backoff for operation retries. \n * Defaults to 100 ms.\n */\n base?: number\n /**\n * A custom function that accepts a retry count and returns the amount of time to delay in milliseconds. \n * The base option will be ignored if this option is supplied.\n */\n customBackoff?: (retryCount: number) => number\n}\ninterface ConfigurationOptions {", - "repo_full_name": "aws/aws-sdk-js", - "discussion_comments": [ - { - "comment_id": "85630294", - "repo_full_name": "aws/aws-sdk-js", - "pr_number": 1189, - "pr_file": "lib/config.d.ts", - "discussion_id": "85630294", - "commented_code": "@@ -0,0 +1,268 @@\n+/// \n+\n+import * as http from 'http';\n+import * as https from 'https';\n+import {AWSError} from './error';\n+import {Credentials} from './credentials';\n+export class Config {\n+ /**\n+ * Creates a new configuration object.\n+ * This is the object that passes option data along to service requests, including credentials, security, region information, and some service specific settings.\n+ */\n+ constructor(options?: ConfigurationOptions);\n+ /**\n+ * Loads credentials from the configuration object.\n+ */\n+ getCredentials(callback: (err: AWSError) => void): void;\n+ /**\n+ * Loads configuration data from a JSON file into this config object.\n+ * Loading configuration willr eset all existing configuration on the object.\n+ * This feature is not supported in the browser environment of the SDK.\n+ * \n+ * @param {string} path - the path relative to your process's current working directory to load configuration from.\n+ */\n+ loadFromPath(path: string): Config;\n+ /**\n+ * Updates the current configuration object with new options.\n+ * \n+ * @param {ConfigurationOptions} options - a map of option keys and values.\n+ * @param {boolean} allowUnknownKeys - Defaults to false. Whether unknown keys can be set on the configuration object.\n+ */\n+ update(options: ConfigurationOptions, allowUnknownKeys?: boolean): void;\n+ /**\n+ * Sets the promise dependency the SDK will use wherever Promises are returned.\n+ * @param {function} dep - a reference to a Promise constructor\n+ */\n+ setPromisesDependency(dep: any): void;\n+\n+ /**\n+ * Whether to compute checksums for payload bodies when the service accepts it.\n+ * Currently supported in S3 only.\n+ */\n+ computeChecksums?: boolean\n+ /**\n+ * Whether types are converted when parsing response data.\n+ */\n+ convertResponseTypes?: boolean\n+ /**\n+ * Whether to apply a clock skew correction and retry requests that fail because of an skewed client clock.\n+ */\n+ correctClockSkew?: boolean\n+ /**\n+ * The AWS credentials to sign requests with.\n+ */\n+ credentials: Credentials\n+ /**\n+ * A set of options to pass to the low-level HTTP request.\n+ */\n+ httpOptions?: HTTPOptions\n+ /**\n+ * An object that responds to .write() (like a stream) or .log() (like the console object) in order to log information about requests.\n+ */\n+ logger?: Logger\n+ /**\n+ * The maximum amount of redirects to follow for a service request.\n+ */\n+ maxRedirects?: number\n+ /**\n+ * The maximum amount of retries to perform for a service request.\n+ */\n+ maxRetries?: number\n+ /**\n+ * Returns whether input parameters should be validated against the operation description before sending the request. \n+ * Defaults to true. \n+ * Pass a map to enable any of the following specific validation features: min|max|pattern|enum\n+ */\n+ paramValidation?: ParamValidation | boolean\n+ /**\n+ * The region to send service requests to.\n+ */\n+ region?: string\n+ /**\n+ * Returns A set of options to configure the retry delay on retryable errors.\n+ */\n+ retryDelayOptions?: RetryDelayOptions\n+ /**\n+ * Whether the provided endpoint addresses an individual bucket.\n+ * false if it addresses the root API endpoint.\n+ */\n+ s3BucketEndpoint?: boolean\n+ /**\n+ * Whether to disable S3 body signing when using signature version v4.\n+ */\n+ s3DisableBodySigning?: boolean\n+ /**\n+ * Whether to force path style URLs for S3 objects.\n+ */\n+ s3ForcePathStyle?: boolean\n+ /**\n+ * Whether the signature to sign requests with (overriding the API configuration) is cached.\n+ */\n+ signatureCache?: boolean\n+ /**\n+ * The signature version to sign requests with (overriding the API configuration).\n+ * Possible values: 'v2'|'v3'|'v4'\n+ */\n+ signatureVersion?: \"v2\"|\"v3\"|\"v4\"|string\n+ /**\n+ * Whether SSL is enabled for requests.\n+ */\n+ sslEnabled?: boolean\n+ /**\n+ * An offset value in milliseconds to apply to all signing times.\n+ */\n+ systemClockOffset?: number\n+ /**\n+ * Whether to use the Accelerate endpoint with the S3 service.\n+ */\n+ useAccelerateEndpoint?: boolean\n+}\n+\n+interface HTTPOptions {\n+ /**\n+ * the URL to proxy requests through.\n+ */\n+ proxy?: string\n+ /**\n+ * the Agent object to perform HTTP requests with.\n+ * Used for connection pooling.\n+ * Defaults to the global agent (http.globalAgent) for non-SSL connections.\n+ */\n+ agent?: http.Agent | https.Agent\n+ /**\n+ * The number of milliseconds to wait before giving up on a connection attempt.\n+ * Defaults to two minutes (120000).\n+ */\n+ timeout?: number\n+ /**\n+ * Whether the SDK will send asynchronous HTTP requests.\n+ * Used in the browser environment only.\n+ * Set to false to send requests synchronously.\n+ * Defaults to true (async on).\n+ */\n+ xhrAsync?: boolean\n+ /**\n+ * Sets the 'withCredentials' property of an XMLHttpRequest object.\n+ * Used in the browser environment only.\n+ * Defaults to false.\n+ */\n+ xhrWithCredentials?: boolean\n+}\n+interface Logger {\n+ write?: (chunk: any, encoding?: string, callback?: () => void) => void\n+ log?: (...messages: any[]) => void;\n+}\n+interface ParamValidation {\n+ /**\n+ * Validates that a value meets the min constraint. \n+ * This is enabled by default when paramValidation is set to true.\n+ */\n+ min?: boolean\n+ /**\n+ * Validates that a value meets the max constraint.\n+ */\n+ max?: boolean\n+ /**\n+ * Validates that a string value matches a regular expression.\n+ */\n+ pattern?: boolean\n+ /**\n+ * Validates that a string value matches one of the allowable enum values.\n+ */\n+ enum?: boolean\n+}\n+interface RetryDelayOptions {\n+ /**\n+ * The base number of milliseconds to use in the exponential backoff for operation retries. \n+ * Defaults to 100 ms.\n+ */\n+ base?: number\n+ /**\n+ * A custom function that accepts a retry count and returns the amount of time to delay in milliseconds. \n+ * The base option will be ignored if this option is supplied.\n+ */\n+ customBackoff?: (retryCount: number) => number\n+}\n+interface ConfigurationOptions {", - "comment_created_at": "2016-10-29T01:03:41+00:00", - "comment_author": "LiuJoyceC", - "comment_body": "Instead of maintaining two separate but identical lists of these config options plus comments (here and above in the `Config` class), perhaps we could define this as an abstract class rather than an interface, and then the `Config` class could extend this abstract class.\nAnother solution could be to generate this file (which may be a good idea anyway since we need to include all of the service identifiers in the `Config` class as mentioned above). However, we would then need to maintain the comments for each of these configuration options in another file, since I noticed these comments aren't exactly identical to the documentation in the `config.js` file.\n", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/aws-sdk-js-semantic-naming-conventions.json b/_reviewers/aws-sdk-js-semantic-naming-conventions.json new file mode 100644 index 0000000..4b01de3 --- /dev/null +++ b/_reviewers/aws-sdk-js-semantic-naming-conventions.json @@ -0,0 +1,194 @@ +[ + { + "discussion_id": "1013368805", + "pr_number": 4261, + "pr_file": "lib/services/resourceexplorer2.js", + "created_at": "2022-11-03T20:29:07+00:00", + "commented_code": "var AWS = require('../core');\n\nAWS.util.update(AWS.ResourceExplorer2.prototype, {\n validateService: function () {\n var config = this.config;\n if (config.useDualstackEndpoint === false || config.useDualStack === false) {", + "repo_full_name": "aws/aws-sdk-js", + "discussion_comments": [ + { + "comment_id": "1013368805", + "repo_full_name": "aws/aws-sdk-js", + "pr_number": 4261, + "pr_file": "lib/services/resourceexplorer2.js", + "discussion_id": "1013368805", + "commented_code": "@@ -0,0 +1,12 @@\n+var AWS = require('../core');\n+\n+AWS.util.update(AWS.ResourceExplorer2.prototype, {\n+ validateService: function () {\n+ var config = this.config;\n+ if (config.useDualstackEndpoint === false || config.useDualStack === false) {", + "comment_created_at": "2022-11-03T20:29:07+00:00", + "comment_author": "kuhe", + "comment_body": "use this casing: `useDualstack`", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "85186711", + "pr_number": 1189, + "pr_file": "scripts/lib/ts-generator.js", + "created_at": "2016-10-26T18:17:43+00:00", + "commented_code": "var fs = require('fs');\nvar path = require('path');\n\nvar CUSTOM_CONFIG_ENUMS = {\n DUALSTACK: {\n FILE_NAME: 'config_use_dualstack',\n INTERFACE: 'UseDualstackConfigOptions'\n }\n};\n\nfunction TSGenerator(options) {\n this._sdkRootDir = options.SdkRootDirectory || process.cwd();\n this._apiRootDir = path.join(this._sdkRootDir, 'apis');\n this._metadataPath = path.join(this._apiRootDir, 'metadata.json');\n this._clientsDir = path.join(this._sdkRootDir, 'clients');\n this.metadata = null;\n this.typings = {};\n this.fillApiModelFileNames(this._apiRootDir);\n}\n\n/**\n * Loads the AWS SDK metadata.json file.\n */\nTSGenerator.prototype.loadMetadata = function loadMetadata() {\n var metadataFile = fs.readFileSync(this._metadataPath);\n this.metadata = JSON.parse(metadataFile);\n return this.metadata;\n};\n\n/**\n * Modifies metadata to include api model filenames.\n */\nTSGenerator.prototype.fillApiModelFileNames = function fillApiModelFileNames(apisPath) {\n var modelPaths = fs.readdirSync(apisPath);\n if (!this.metadata) {\n this.loadMetadata();\n }\n var metadata = this.metadata;\n\n // sort paths so latest versions appear first\n modelPaths = modelPaths.sort(function sort(a, b) {\n if (a < b) {\n return 1;\n } else if (a > b) {\n return -1;\n } else {\n return 0;\n }\n });\n\n // Only get latest version of models\n var foundModels = Object.create(null);\n modelPaths.forEach(function(modelFileName) {\n var match = modelFileName.match(/^(.+)(-[\\d]{4}-[\\d]{2}-[\\d]{2})\\.normal\\.json$/i);\n if (match) {\n var model = match[1];\n if (!foundModels[model]) {\n foundModels[model] = modelFileName;\n }\n }\n });\n\n // now update the metadata\n var keys = Object.keys(metadata);\n keys.forEach(function(key) {\n var modelName = metadata[key].prefix || key;\n metadata[key].api_path = foundModels[modelName];\n // find waiters file\n var baseName = foundModels[modelName].split('.')[0];\n if (modelPaths.indexOf(baseName + '.waiters2.json') >= 0) {\n metadata[key].waiters_path = baseName + '.waiters2.json';\n }\n });\n};\n\n/**\n * Generates the file containing DocumentClient interfaces.\n */\nTSGenerator.prototype.generateDocumentClientInterfaces = function generateDocumentClientInterfaces() {\n var self = this;\n // get the dynamodb model\n var dynamodbModel = this.loadServiceApi('dynamodb');\n var code = '';\n // include node reference\n code += '///\\n';\n // generate shapes\n var modelShapes = dynamodbModel.shapes;\n // iterate over each shape\n var shapeKeys = Object.keys(modelShapes);\n shapeKeys.forEach(function (shapeKey) {\n var modelShape = modelShapes[shapeKey];\n // ignore exceptions\n if (modelShape.exception) {\n return;\n }\n // overwrite AttributeValue\n if (shapeKey === 'AttributeValue') {\n code += self.generateDocString('A JavaScript object or native type.');\n code += 'export type ' + shapeKey + ' = any;\\n';\n return;\n }\n code += self.generateTypingsFromShape(shapeKey, modelShape);\n });\n\n // write file:\n this.writeTypingsFile('document_client_interfaces', path.join(this._sdkRootDir, 'lib', 'dynamodb'), code);\n};\n\n/**\n * Returns a service model based on the serviceIdentifier.\n */\nTSGenerator.prototype.loadServiceApi = function loadServiceApi(serviceIdentifier) {\n // first, find the correct identifier\n var metadata = this.metadata;\n var serviceFilePath = path.join(this._apiRootDir, metadata[serviceIdentifier].api_path);\n var serviceModelFile = fs.readFileSync(serviceFilePath);\n var serviceModel = JSON.parse(serviceModelFile);\n // load waiters file if it exists\n var waiterFilePath;\n if (metadata[serviceIdentifier].waiters_path) {\n waiterFilePath = path.join(this._apiRootDir, metadata[serviceIdentifier].waiters_path);\n var waiterModelFile = fs.readFileSync(waiterFilePath);\n var waiterModel = JSON.parse(waiterModelFile);\n serviceModel.waiters = waiterModel.waiters;\n }\n\n return serviceModel;\n};\n\n/**\n * Determines if a member is required by checking for it in a list.\n */\nTSGenerator.prototype.checkRequired = function checkRequired(list, member) {\n if (list.indexOf(member)) {\n return true;\n }\n return false;\n};\n\n/**\n * Generates whitespace based on the count.\n */\nTSGenerator.prototype.tabs = function tabs(count) {\n var code = '';\n for (var i = 0; i < count; i++) {\n code += ' ';\n }\n return code;\n};\n\n/**\n * Transforms documentation string to a more readable format.\n */\nTSGenerator.prototype.transformDocumentation = function transformDocumentation(documentation) {\n if (!documentation) {\n return '';\n }\n documentation = documentation.replace(/<(?:.|\\n)*?>/gm, '');\n documentation = documentation.replace(/\\*\\//g, '*');\n return documentation;\n};\n\n/**\n * Returns a doc string based on the supplied documentation.\n * Also tabs the doc string if a count is provided.\n */\nTSGenerator.prototype.generateDocString = function generateDocString(documentation, tabCount) {\n tabCount = tabCount || 0;\n var code = '';\n code += this.tabs(tabCount) + '/**\\n';\n code += this.tabs(tabCount) + ' * ' + this.transformDocumentation(documentation) + '\\n';\n code += this.tabs(tabCount) + ' */\\n';\n return code;\n};\n\n/**\n * Returns an array of custom configuration options based on a service identiffier.\n * Custom configuration options are determined by checking the metadata.json file.\n */\nTSGenerator.prototype.generateCustomConfigFromMetadata = function generateCustomConfigFromMetadata(serviceIdentifier) {\n // some services have additional configuration options that are defined in the metadata.json file\n // i.e. dualstackAvailable = useDualstack\n // create reference to custom options\n var customConfigurations = [];\n var serviceMetadata = this.metadata[serviceIdentifier];\n // loop through metadata members\n for (var memberName in serviceMetadata) {\n if (!serviceMetadata.hasOwnProperty(memberName)) {\n continue;\n }\n var memberValue = serviceMetadata[memberName];\n // check configs\n switch (memberName) {\n case 'dualstackAvailable':\n customConfigurations.push(CUSTOM_CONFIG_ENUMS.DUALSTACK);\n break;\n }\n }\n\n return customConfigurations;\n};\n\n/**\n * Generates a type or interface based on the shape.\n */\nTSGenerator.prototype.generateTypingsFromShape = function generateTypingsFromShape(shapeKey, shape, tabCount) {\n // some shapes shouldn't be generated if they are javascript primitives\n if (['string', 'boolean', 'number', 'Date', 'Blob'].indexOf(shapeKey) >= 0) {\n return '';", + "repo_full_name": "aws/aws-sdk-js", + "discussion_comments": [ + { + "comment_id": "85186711", + "repo_full_name": "aws/aws-sdk-js", + "pr_number": 1189, + "pr_file": "scripts/lib/ts-generator.js", + "discussion_id": "85186711", + "commented_code": "@@ -0,0 +1,437 @@\n+var fs = require('fs');\n+var path = require('path');\n+\n+var CUSTOM_CONFIG_ENUMS = {\n+ DUALSTACK: {\n+ FILE_NAME: 'config_use_dualstack',\n+ INTERFACE: 'UseDualstackConfigOptions'\n+ }\n+};\n+\n+function TSGenerator(options) {\n+ this._sdkRootDir = options.SdkRootDirectory || process.cwd();\n+ this._apiRootDir = path.join(this._sdkRootDir, 'apis');\n+ this._metadataPath = path.join(this._apiRootDir, 'metadata.json');\n+ this._clientsDir = path.join(this._sdkRootDir, 'clients');\n+ this.metadata = null;\n+ this.typings = {};\n+ this.fillApiModelFileNames(this._apiRootDir);\n+}\n+\n+/**\n+ * Loads the AWS SDK metadata.json file.\n+ */\n+TSGenerator.prototype.loadMetadata = function loadMetadata() {\n+ var metadataFile = fs.readFileSync(this._metadataPath);\n+ this.metadata = JSON.parse(metadataFile);\n+ return this.metadata;\n+};\n+\n+/**\n+ * Modifies metadata to include api model filenames.\n+ */\n+TSGenerator.prototype.fillApiModelFileNames = function fillApiModelFileNames(apisPath) {\n+ var modelPaths = fs.readdirSync(apisPath);\n+ if (!this.metadata) {\n+ this.loadMetadata();\n+ }\n+ var metadata = this.metadata;\n+\n+ // sort paths so latest versions appear first\n+ modelPaths = modelPaths.sort(function sort(a, b) {\n+ if (a < b) {\n+ return 1;\n+ } else if (a > b) {\n+ return -1;\n+ } else {\n+ return 0;\n+ }\n+ });\n+\n+ // Only get latest version of models\n+ var foundModels = Object.create(null);\n+ modelPaths.forEach(function(modelFileName) {\n+ var match = modelFileName.match(/^(.+)(-[\\d]{4}-[\\d]{2}-[\\d]{2})\\.normal\\.json$/i);\n+ if (match) {\n+ var model = match[1];\n+ if (!foundModels[model]) {\n+ foundModels[model] = modelFileName;\n+ }\n+ }\n+ });\n+\n+ // now update the metadata\n+ var keys = Object.keys(metadata);\n+ keys.forEach(function(key) {\n+ var modelName = metadata[key].prefix || key;\n+ metadata[key].api_path = foundModels[modelName];\n+ // find waiters file\n+ var baseName = foundModels[modelName].split('.')[0];\n+ if (modelPaths.indexOf(baseName + '.waiters2.json') >= 0) {\n+ metadata[key].waiters_path = baseName + '.waiters2.json';\n+ }\n+ });\n+};\n+\n+/**\n+ * Generates the file containing DocumentClient interfaces.\n+ */\n+TSGenerator.prototype.generateDocumentClientInterfaces = function generateDocumentClientInterfaces() {\n+ var self = this;\n+ // get the dynamodb model\n+ var dynamodbModel = this.loadServiceApi('dynamodb');\n+ var code = '';\n+ // include node reference\n+ code += '///\\n';\n+ // generate shapes\n+ var modelShapes = dynamodbModel.shapes;\n+ // iterate over each shape\n+ var shapeKeys = Object.keys(modelShapes);\n+ shapeKeys.forEach(function (shapeKey) {\n+ var modelShape = modelShapes[shapeKey];\n+ // ignore exceptions\n+ if (modelShape.exception) {\n+ return;\n+ }\n+ // overwrite AttributeValue\n+ if (shapeKey === 'AttributeValue') {\n+ code += self.generateDocString('A JavaScript object or native type.');\n+ code += 'export type ' + shapeKey + ' = any;\\n';\n+ return;\n+ }\n+ code += self.generateTypingsFromShape(shapeKey, modelShape);\n+ });\n+\n+ // write file:\n+ this.writeTypingsFile('document_client_interfaces', path.join(this._sdkRootDir, 'lib', 'dynamodb'), code);\n+};\n+\n+/**\n+ * Returns a service model based on the serviceIdentifier.\n+ */\n+TSGenerator.prototype.loadServiceApi = function loadServiceApi(serviceIdentifier) {\n+ // first, find the correct identifier\n+ var metadata = this.metadata;\n+ var serviceFilePath = path.join(this._apiRootDir, metadata[serviceIdentifier].api_path);\n+ var serviceModelFile = fs.readFileSync(serviceFilePath);\n+ var serviceModel = JSON.parse(serviceModelFile);\n+ // load waiters file if it exists\n+ var waiterFilePath;\n+ if (metadata[serviceIdentifier].waiters_path) {\n+ waiterFilePath = path.join(this._apiRootDir, metadata[serviceIdentifier].waiters_path);\n+ var waiterModelFile = fs.readFileSync(waiterFilePath);\n+ var waiterModel = JSON.parse(waiterModelFile);\n+ serviceModel.waiters = waiterModel.waiters;\n+ }\n+\n+ return serviceModel;\n+};\n+\n+/**\n+ * Determines if a member is required by checking for it in a list.\n+ */\n+TSGenerator.prototype.checkRequired = function checkRequired(list, member) {\n+ if (list.indexOf(member)) {\n+ return true;\n+ }\n+ return false;\n+};\n+\n+/**\n+ * Generates whitespace based on the count.\n+ */\n+TSGenerator.prototype.tabs = function tabs(count) {\n+ var code = '';\n+ for (var i = 0; i < count; i++) {\n+ code += ' ';\n+ }\n+ return code;\n+};\n+\n+/**\n+ * Transforms documentation string to a more readable format.\n+ */\n+TSGenerator.prototype.transformDocumentation = function transformDocumentation(documentation) {\n+ if (!documentation) {\n+ return '';\n+ }\n+ documentation = documentation.replace(/<(?:.|\\n)*?>/gm, '');\n+ documentation = documentation.replace(/\\*\\//g, '*');\n+ return documentation;\n+};\n+\n+/**\n+ * Returns a doc string based on the supplied documentation.\n+ * Also tabs the doc string if a count is provided.\n+ */\n+TSGenerator.prototype.generateDocString = function generateDocString(documentation, tabCount) {\n+ tabCount = tabCount || 0;\n+ var code = '';\n+ code += this.tabs(tabCount) + '/**\\n';\n+ code += this.tabs(tabCount) + ' * ' + this.transformDocumentation(documentation) + '\\n';\n+ code += this.tabs(tabCount) + ' */\\n';\n+ return code;\n+};\n+\n+/**\n+ * Returns an array of custom configuration options based on a service identiffier.\n+ * Custom configuration options are determined by checking the metadata.json file.\n+ */\n+TSGenerator.prototype.generateCustomConfigFromMetadata = function generateCustomConfigFromMetadata(serviceIdentifier) {\n+ // some services have additional configuration options that are defined in the metadata.json file\n+ // i.e. dualstackAvailable = useDualstack\n+ // create reference to custom options\n+ var customConfigurations = [];\n+ var serviceMetadata = this.metadata[serviceIdentifier];\n+ // loop through metadata members\n+ for (var memberName in serviceMetadata) {\n+ if (!serviceMetadata.hasOwnProperty(memberName)) {\n+ continue;\n+ }\n+ var memberValue = serviceMetadata[memberName];\n+ // check configs\n+ switch (memberName) {\n+ case 'dualstackAvailable':\n+ customConfigurations.push(CUSTOM_CONFIG_ENUMS.DUALSTACK);\n+ break;\n+ }\n+ }\n+\n+ return customConfigurations;\n+};\n+\n+/**\n+ * Generates a type or interface based on the shape.\n+ */\n+TSGenerator.prototype.generateTypingsFromShape = function generateTypingsFromShape(shapeKey, shape, tabCount) {\n+ // some shapes shouldn't be generated if they are javascript primitives\n+ if (['string', 'boolean', 'number', 'Date', 'Blob'].indexOf(shapeKey) >= 0) {\n+ return '';", + "comment_created_at": "2016-10-26T18:17:43+00:00", + "comment_author": "LiuJoyceC", + "comment_body": "Instead of skipping shapes that happen to be named these, we could just add an underscore in front to differentiate the name of the shape from the javascript primitives.\nIt could still be useful to generate a typing for these shapes. For example, some services (like CodeCommit) have a shape named `\"Date\"` that is modeled as a string rather than a timestamp. If we rename the shape rather than skip it and have it be typed as a JavaScript `Date` object, then it could prevent errors before runtime, such as calling a `Date` method like `getMonth()` on the string.\n", + "pr_file_module": null + }, + { + "comment_id": "85187632", + "repo_full_name": "aws/aws-sdk-js", + "pr_number": 1189, + "pr_file": "scripts/lib/ts-generator.js", + "discussion_id": "85186711", + "commented_code": "@@ -0,0 +1,437 @@\n+var fs = require('fs');\n+var path = require('path');\n+\n+var CUSTOM_CONFIG_ENUMS = {\n+ DUALSTACK: {\n+ FILE_NAME: 'config_use_dualstack',\n+ INTERFACE: 'UseDualstackConfigOptions'\n+ }\n+};\n+\n+function TSGenerator(options) {\n+ this._sdkRootDir = options.SdkRootDirectory || process.cwd();\n+ this._apiRootDir = path.join(this._sdkRootDir, 'apis');\n+ this._metadataPath = path.join(this._apiRootDir, 'metadata.json');\n+ this._clientsDir = path.join(this._sdkRootDir, 'clients');\n+ this.metadata = null;\n+ this.typings = {};\n+ this.fillApiModelFileNames(this._apiRootDir);\n+}\n+\n+/**\n+ * Loads the AWS SDK metadata.json file.\n+ */\n+TSGenerator.prototype.loadMetadata = function loadMetadata() {\n+ var metadataFile = fs.readFileSync(this._metadataPath);\n+ this.metadata = JSON.parse(metadataFile);\n+ return this.metadata;\n+};\n+\n+/**\n+ * Modifies metadata to include api model filenames.\n+ */\n+TSGenerator.prototype.fillApiModelFileNames = function fillApiModelFileNames(apisPath) {\n+ var modelPaths = fs.readdirSync(apisPath);\n+ if (!this.metadata) {\n+ this.loadMetadata();\n+ }\n+ var metadata = this.metadata;\n+\n+ // sort paths so latest versions appear first\n+ modelPaths = modelPaths.sort(function sort(a, b) {\n+ if (a < b) {\n+ return 1;\n+ } else if (a > b) {\n+ return -1;\n+ } else {\n+ return 0;\n+ }\n+ });\n+\n+ // Only get latest version of models\n+ var foundModels = Object.create(null);\n+ modelPaths.forEach(function(modelFileName) {\n+ var match = modelFileName.match(/^(.+)(-[\\d]{4}-[\\d]{2}-[\\d]{2})\\.normal\\.json$/i);\n+ if (match) {\n+ var model = match[1];\n+ if (!foundModels[model]) {\n+ foundModels[model] = modelFileName;\n+ }\n+ }\n+ });\n+\n+ // now update the metadata\n+ var keys = Object.keys(metadata);\n+ keys.forEach(function(key) {\n+ var modelName = metadata[key].prefix || key;\n+ metadata[key].api_path = foundModels[modelName];\n+ // find waiters file\n+ var baseName = foundModels[modelName].split('.')[0];\n+ if (modelPaths.indexOf(baseName + '.waiters2.json') >= 0) {\n+ metadata[key].waiters_path = baseName + '.waiters2.json';\n+ }\n+ });\n+};\n+\n+/**\n+ * Generates the file containing DocumentClient interfaces.\n+ */\n+TSGenerator.prototype.generateDocumentClientInterfaces = function generateDocumentClientInterfaces() {\n+ var self = this;\n+ // get the dynamodb model\n+ var dynamodbModel = this.loadServiceApi('dynamodb');\n+ var code = '';\n+ // include node reference\n+ code += '///\\n';\n+ // generate shapes\n+ var modelShapes = dynamodbModel.shapes;\n+ // iterate over each shape\n+ var shapeKeys = Object.keys(modelShapes);\n+ shapeKeys.forEach(function (shapeKey) {\n+ var modelShape = modelShapes[shapeKey];\n+ // ignore exceptions\n+ if (modelShape.exception) {\n+ return;\n+ }\n+ // overwrite AttributeValue\n+ if (shapeKey === 'AttributeValue') {\n+ code += self.generateDocString('A JavaScript object or native type.');\n+ code += 'export type ' + shapeKey + ' = any;\\n';\n+ return;\n+ }\n+ code += self.generateTypingsFromShape(shapeKey, modelShape);\n+ });\n+\n+ // write file:\n+ this.writeTypingsFile('document_client_interfaces', path.join(this._sdkRootDir, 'lib', 'dynamodb'), code);\n+};\n+\n+/**\n+ * Returns a service model based on the serviceIdentifier.\n+ */\n+TSGenerator.prototype.loadServiceApi = function loadServiceApi(serviceIdentifier) {\n+ // first, find the correct identifier\n+ var metadata = this.metadata;\n+ var serviceFilePath = path.join(this._apiRootDir, metadata[serviceIdentifier].api_path);\n+ var serviceModelFile = fs.readFileSync(serviceFilePath);\n+ var serviceModel = JSON.parse(serviceModelFile);\n+ // load waiters file if it exists\n+ var waiterFilePath;\n+ if (metadata[serviceIdentifier].waiters_path) {\n+ waiterFilePath = path.join(this._apiRootDir, metadata[serviceIdentifier].waiters_path);\n+ var waiterModelFile = fs.readFileSync(waiterFilePath);\n+ var waiterModel = JSON.parse(waiterModelFile);\n+ serviceModel.waiters = waiterModel.waiters;\n+ }\n+\n+ return serviceModel;\n+};\n+\n+/**\n+ * Determines if a member is required by checking for it in a list.\n+ */\n+TSGenerator.prototype.checkRequired = function checkRequired(list, member) {\n+ if (list.indexOf(member)) {\n+ return true;\n+ }\n+ return false;\n+};\n+\n+/**\n+ * Generates whitespace based on the count.\n+ */\n+TSGenerator.prototype.tabs = function tabs(count) {\n+ var code = '';\n+ for (var i = 0; i < count; i++) {\n+ code += ' ';\n+ }\n+ return code;\n+};\n+\n+/**\n+ * Transforms documentation string to a more readable format.\n+ */\n+TSGenerator.prototype.transformDocumentation = function transformDocumentation(documentation) {\n+ if (!documentation) {\n+ return '';\n+ }\n+ documentation = documentation.replace(/<(?:.|\\n)*?>/gm, '');\n+ documentation = documentation.replace(/\\*\\//g, '*');\n+ return documentation;\n+};\n+\n+/**\n+ * Returns a doc string based on the supplied documentation.\n+ * Also tabs the doc string if a count is provided.\n+ */\n+TSGenerator.prototype.generateDocString = function generateDocString(documentation, tabCount) {\n+ tabCount = tabCount || 0;\n+ var code = '';\n+ code += this.tabs(tabCount) + '/**\\n';\n+ code += this.tabs(tabCount) + ' * ' + this.transformDocumentation(documentation) + '\\n';\n+ code += this.tabs(tabCount) + ' */\\n';\n+ return code;\n+};\n+\n+/**\n+ * Returns an array of custom configuration options based on a service identiffier.\n+ * Custom configuration options are determined by checking the metadata.json file.\n+ */\n+TSGenerator.prototype.generateCustomConfigFromMetadata = function generateCustomConfigFromMetadata(serviceIdentifier) {\n+ // some services have additional configuration options that are defined in the metadata.json file\n+ // i.e. dualstackAvailable = useDualstack\n+ // create reference to custom options\n+ var customConfigurations = [];\n+ var serviceMetadata = this.metadata[serviceIdentifier];\n+ // loop through metadata members\n+ for (var memberName in serviceMetadata) {\n+ if (!serviceMetadata.hasOwnProperty(memberName)) {\n+ continue;\n+ }\n+ var memberValue = serviceMetadata[memberName];\n+ // check configs\n+ switch (memberName) {\n+ case 'dualstackAvailable':\n+ customConfigurations.push(CUSTOM_CONFIG_ENUMS.DUALSTACK);\n+ break;\n+ }\n+ }\n+\n+ return customConfigurations;\n+};\n+\n+/**\n+ * Generates a type or interface based on the shape.\n+ */\n+TSGenerator.prototype.generateTypingsFromShape = function generateTypingsFromShape(shapeKey, shape, tabCount) {\n+ // some shapes shouldn't be generated if they are javascript primitives\n+ if (['string', 'boolean', 'number', 'Date', 'Blob'].indexOf(shapeKey) >= 0) {\n+ return '';", + "comment_created_at": "2016-10-26T18:21:55+00:00", + "comment_author": "LiuJoyceC", + "comment_body": "So for example, the typing could look like:\n\n```\nexport type _Date = string;\n```\n\nAnd the references to this shape would have to have an underscore added in front as well.\n", + "pr_file_module": null + }, + { + "comment_id": "85431358", + "repo_full_name": "aws/aws-sdk-js", + "pr_number": 1189, + "pr_file": "scripts/lib/ts-generator.js", + "discussion_id": "85186711", + "commented_code": "@@ -0,0 +1,437 @@\n+var fs = require('fs');\n+var path = require('path');\n+\n+var CUSTOM_CONFIG_ENUMS = {\n+ DUALSTACK: {\n+ FILE_NAME: 'config_use_dualstack',\n+ INTERFACE: 'UseDualstackConfigOptions'\n+ }\n+};\n+\n+function TSGenerator(options) {\n+ this._sdkRootDir = options.SdkRootDirectory || process.cwd();\n+ this._apiRootDir = path.join(this._sdkRootDir, 'apis');\n+ this._metadataPath = path.join(this._apiRootDir, 'metadata.json');\n+ this._clientsDir = path.join(this._sdkRootDir, 'clients');\n+ this.metadata = null;\n+ this.typings = {};\n+ this.fillApiModelFileNames(this._apiRootDir);\n+}\n+\n+/**\n+ * Loads the AWS SDK metadata.json file.\n+ */\n+TSGenerator.prototype.loadMetadata = function loadMetadata() {\n+ var metadataFile = fs.readFileSync(this._metadataPath);\n+ this.metadata = JSON.parse(metadataFile);\n+ return this.metadata;\n+};\n+\n+/**\n+ * Modifies metadata to include api model filenames.\n+ */\n+TSGenerator.prototype.fillApiModelFileNames = function fillApiModelFileNames(apisPath) {\n+ var modelPaths = fs.readdirSync(apisPath);\n+ if (!this.metadata) {\n+ this.loadMetadata();\n+ }\n+ var metadata = this.metadata;\n+\n+ // sort paths so latest versions appear first\n+ modelPaths = modelPaths.sort(function sort(a, b) {\n+ if (a < b) {\n+ return 1;\n+ } else if (a > b) {\n+ return -1;\n+ } else {\n+ return 0;\n+ }\n+ });\n+\n+ // Only get latest version of models\n+ var foundModels = Object.create(null);\n+ modelPaths.forEach(function(modelFileName) {\n+ var match = modelFileName.match(/^(.+)(-[\\d]{4}-[\\d]{2}-[\\d]{2})\\.normal\\.json$/i);\n+ if (match) {\n+ var model = match[1];\n+ if (!foundModels[model]) {\n+ foundModels[model] = modelFileName;\n+ }\n+ }\n+ });\n+\n+ // now update the metadata\n+ var keys = Object.keys(metadata);\n+ keys.forEach(function(key) {\n+ var modelName = metadata[key].prefix || key;\n+ metadata[key].api_path = foundModels[modelName];\n+ // find waiters file\n+ var baseName = foundModels[modelName].split('.')[0];\n+ if (modelPaths.indexOf(baseName + '.waiters2.json') >= 0) {\n+ metadata[key].waiters_path = baseName + '.waiters2.json';\n+ }\n+ });\n+};\n+\n+/**\n+ * Generates the file containing DocumentClient interfaces.\n+ */\n+TSGenerator.prototype.generateDocumentClientInterfaces = function generateDocumentClientInterfaces() {\n+ var self = this;\n+ // get the dynamodb model\n+ var dynamodbModel = this.loadServiceApi('dynamodb');\n+ var code = '';\n+ // include node reference\n+ code += '///\\n';\n+ // generate shapes\n+ var modelShapes = dynamodbModel.shapes;\n+ // iterate over each shape\n+ var shapeKeys = Object.keys(modelShapes);\n+ shapeKeys.forEach(function (shapeKey) {\n+ var modelShape = modelShapes[shapeKey];\n+ // ignore exceptions\n+ if (modelShape.exception) {\n+ return;\n+ }\n+ // overwrite AttributeValue\n+ if (shapeKey === 'AttributeValue') {\n+ code += self.generateDocString('A JavaScript object or native type.');\n+ code += 'export type ' + shapeKey + ' = any;\\n';\n+ return;\n+ }\n+ code += self.generateTypingsFromShape(shapeKey, modelShape);\n+ });\n+\n+ // write file:\n+ this.writeTypingsFile('document_client_interfaces', path.join(this._sdkRootDir, 'lib', 'dynamodb'), code);\n+};\n+\n+/**\n+ * Returns a service model based on the serviceIdentifier.\n+ */\n+TSGenerator.prototype.loadServiceApi = function loadServiceApi(serviceIdentifier) {\n+ // first, find the correct identifier\n+ var metadata = this.metadata;\n+ var serviceFilePath = path.join(this._apiRootDir, metadata[serviceIdentifier].api_path);\n+ var serviceModelFile = fs.readFileSync(serviceFilePath);\n+ var serviceModel = JSON.parse(serviceModelFile);\n+ // load waiters file if it exists\n+ var waiterFilePath;\n+ if (metadata[serviceIdentifier].waiters_path) {\n+ waiterFilePath = path.join(this._apiRootDir, metadata[serviceIdentifier].waiters_path);\n+ var waiterModelFile = fs.readFileSync(waiterFilePath);\n+ var waiterModel = JSON.parse(waiterModelFile);\n+ serviceModel.waiters = waiterModel.waiters;\n+ }\n+\n+ return serviceModel;\n+};\n+\n+/**\n+ * Determines if a member is required by checking for it in a list.\n+ */\n+TSGenerator.prototype.checkRequired = function checkRequired(list, member) {\n+ if (list.indexOf(member)) {\n+ return true;\n+ }\n+ return false;\n+};\n+\n+/**\n+ * Generates whitespace based on the count.\n+ */\n+TSGenerator.prototype.tabs = function tabs(count) {\n+ var code = '';\n+ for (var i = 0; i < count; i++) {\n+ code += ' ';\n+ }\n+ return code;\n+};\n+\n+/**\n+ * Transforms documentation string to a more readable format.\n+ */\n+TSGenerator.prototype.transformDocumentation = function transformDocumentation(documentation) {\n+ if (!documentation) {\n+ return '';\n+ }\n+ documentation = documentation.replace(/<(?:.|\\n)*?>/gm, '');\n+ documentation = documentation.replace(/\\*\\//g, '*');\n+ return documentation;\n+};\n+\n+/**\n+ * Returns a doc string based on the supplied documentation.\n+ * Also tabs the doc string if a count is provided.\n+ */\n+TSGenerator.prototype.generateDocString = function generateDocString(documentation, tabCount) {\n+ tabCount = tabCount || 0;\n+ var code = '';\n+ code += this.tabs(tabCount) + '/**\\n';\n+ code += this.tabs(tabCount) + ' * ' + this.transformDocumentation(documentation) + '\\n';\n+ code += this.tabs(tabCount) + ' */\\n';\n+ return code;\n+};\n+\n+/**\n+ * Returns an array of custom configuration options based on a service identiffier.\n+ * Custom configuration options are determined by checking the metadata.json file.\n+ */\n+TSGenerator.prototype.generateCustomConfigFromMetadata = function generateCustomConfigFromMetadata(serviceIdentifier) {\n+ // some services have additional configuration options that are defined in the metadata.json file\n+ // i.e. dualstackAvailable = useDualstack\n+ // create reference to custom options\n+ var customConfigurations = [];\n+ var serviceMetadata = this.metadata[serviceIdentifier];\n+ // loop through metadata members\n+ for (var memberName in serviceMetadata) {\n+ if (!serviceMetadata.hasOwnProperty(memberName)) {\n+ continue;\n+ }\n+ var memberValue = serviceMetadata[memberName];\n+ // check configs\n+ switch (memberName) {\n+ case 'dualstackAvailable':\n+ customConfigurations.push(CUSTOM_CONFIG_ENUMS.DUALSTACK);\n+ break;\n+ }\n+ }\n+\n+ return customConfigurations;\n+};\n+\n+/**\n+ * Generates a type or interface based on the shape.\n+ */\n+TSGenerator.prototype.generateTypingsFromShape = function generateTypingsFromShape(shapeKey, shape, tabCount) {\n+ // some shapes shouldn't be generated if they are javascript primitives\n+ if (['string', 'boolean', 'number', 'Date', 'Blob'].indexOf(shapeKey) >= 0) {\n+ return '';", + "comment_created_at": "2016-10-27T21:27:10+00:00", + "comment_author": "chrisradek", + "comment_body": "Fair point, thanks for finding that.\n", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "85250379", + "pr_number": 1189, + "pr_file": "scripts/lib/ts-generator.js", + "created_at": "2016-10-27T01:01:32+00:00", + "commented_code": "var fs = require('fs');\nvar path = require('path');\n\nvar CUSTOM_CONFIG_ENUMS = {\n DUALSTACK: {\n FILE_NAME: 'config_use_dualstack',\n INTERFACE: 'UseDualstackConfigOptions'\n }\n};\n\nfunction TSGenerator(options) {\n this._sdkRootDir = options.SdkRootDirectory || process.cwd();\n this._apiRootDir = path.join(this._sdkRootDir, 'apis');\n this._metadataPath = path.join(this._apiRootDir, 'metadata.json');\n this._clientsDir = path.join(this._sdkRootDir, 'clients');\n this.metadata = null;\n this.typings = {};\n this.fillApiModelFileNames(this._apiRootDir);\n}\n\n/**\n * Loads the AWS SDK metadata.json file.\n */\nTSGenerator.prototype.loadMetadata = function loadMetadata() {\n var metadataFile = fs.readFileSync(this._metadataPath);\n this.metadata = JSON.parse(metadataFile);\n return this.metadata;\n};\n\n/**\n * Modifies metadata to include api model filenames.\n */\nTSGenerator.prototype.fillApiModelFileNames = function fillApiModelFileNames(apisPath) {\n var modelPaths = fs.readdirSync(apisPath);\n if (!this.metadata) {\n this.loadMetadata();\n }\n var metadata = this.metadata;\n\n // sort paths so latest versions appear first\n modelPaths = modelPaths.sort(function sort(a, b) {\n if (a < b) {\n return 1;\n } else if (a > b) {\n return -1;\n } else {\n return 0;\n }\n });\n\n // Only get latest version of models\n var foundModels = Object.create(null);\n modelPaths.forEach(function(modelFileName) {\n var match = modelFileName.match(/^(.+)(-[\\d]{4}-[\\d]{2}-[\\d]{2})\\.normal\\.json$/i);\n if (match) {\n var model = match[1];\n if (!foundModels[model]) {\n foundModels[model] = modelFileName;\n }\n }\n });\n\n // now update the metadata\n var keys = Object.keys(metadata);\n keys.forEach(function(key) {\n var modelName = metadata[key].prefix || key;\n metadata[key].api_path = foundModels[modelName];\n // find waiters file\n var baseName = foundModels[modelName].split('.')[0];\n if (modelPaths.indexOf(baseName + '.waiters2.json') >= 0) {\n metadata[key].waiters_path = baseName + '.waiters2.json';\n }\n });\n};\n\n/**\n * Generates the file containing DocumentClient interfaces.\n */\nTSGenerator.prototype.generateDocumentClientInterfaces = function generateDocumentClientInterfaces() {\n var self = this;\n // get the dynamodb model\n var dynamodbModel = this.loadServiceApi('dynamodb');\n var code = '';\n // include node reference\n code += '///\\n';\n // generate shapes\n var modelShapes = dynamodbModel.shapes;\n // iterate over each shape\n var shapeKeys = Object.keys(modelShapes);\n shapeKeys.forEach(function (shapeKey) {\n var modelShape = modelShapes[shapeKey];\n // ignore exceptions\n if (modelShape.exception) {\n return;\n }\n // overwrite AttributeValue\n if (shapeKey === 'AttributeValue') {\n code += self.generateDocString('A JavaScript object or native type.');\n code += 'export type ' + shapeKey + ' = any;\\n';\n return;\n }\n code += self.generateTypingsFromShape(shapeKey, modelShape);\n });\n\n // write file:\n this.writeTypingsFile('document_client_interfaces', path.join(this._sdkRootDir, 'lib', 'dynamodb'), code);\n};\n\n/**\n * Returns a service model based on the serviceIdentifier.\n */\nTSGenerator.prototype.loadServiceApi = function loadServiceApi(serviceIdentifier) {\n // first, find the correct identifier\n var metadata = this.metadata;\n var serviceFilePath = path.join(this._apiRootDir, metadata[serviceIdentifier].api_path);\n var serviceModelFile = fs.readFileSync(serviceFilePath);\n var serviceModel = JSON.parse(serviceModelFile);\n // load waiters file if it exists\n var waiterFilePath;\n if (metadata[serviceIdentifier].waiters_path) {\n waiterFilePath = path.join(this._apiRootDir, metadata[serviceIdentifier].waiters_path);\n var waiterModelFile = fs.readFileSync(waiterFilePath);\n var waiterModel = JSON.parse(waiterModelFile);\n serviceModel.waiters = waiterModel.waiters;\n }\n\n return serviceModel;\n};\n\n/**\n * Determines if a member is required by checking for it in a list.\n */\nTSGenerator.prototype.checkRequired = function checkRequired(list, member) {\n if (list.indexOf(member)) {\n return true;\n }\n return false;\n};\n\n/**\n * Generates whitespace based on the count.\n */\nTSGenerator.prototype.tabs = function tabs(count) {\n var code = '';\n for (var i = 0; i < count; i++) {\n code += ' ';\n }\n return code;\n};\n\n/**\n * Transforms documentation string to a more readable format.\n */\nTSGenerator.prototype.transformDocumentation = function transformDocumentation(documentation) {\n if (!documentation) {\n return '';\n }\n documentation = documentation.replace(/<(?:.|\\n)*?>/gm, '');\n documentation = documentation.replace(/\\*\\//g, '*');\n return documentation;\n};\n\n/**\n * Returns a doc string based on the supplied documentation.\n * Also tabs the doc string if a count is provided.\n */\nTSGenerator.prototype.generateDocString = function generateDocString(documentation, tabCount) {\n tabCount = tabCount || 0;\n var code = '';\n code += this.tabs(tabCount) + '/**\\n';\n code += this.tabs(tabCount) + ' * ' + this.transformDocumentation(documentation) + '\\n';\n code += this.tabs(tabCount) + ' */\\n';\n return code;\n};\n\n/**\n * Returns an array of custom configuration options based on a service identiffier.\n * Custom configuration options are determined by checking the metadata.json file.\n */\nTSGenerator.prototype.generateCustomConfigFromMetadata = function generateCustomConfigFromMetadata(serviceIdentifier) {\n // some services have additional configuration options that are defined in the metadata.json file\n // i.e. dualstackAvailable = useDualstack\n // create reference to custom options\n var customConfigurations = [];\n var serviceMetadata = this.metadata[serviceIdentifier];\n // loop through metadata members\n for (var memberName in serviceMetadata) {\n if (!serviceMetadata.hasOwnProperty(memberName)) {\n continue;\n }\n var memberValue = serviceMetadata[memberName];\n // check configs\n switch (memberName) {\n case 'dualstackAvailable':\n customConfigurations.push(CUSTOM_CONFIG_ENUMS.DUALSTACK);\n break;\n }\n }\n\n return customConfigurations;\n};\n\n/**\n * Generates a type or interface based on the shape.\n */\nTSGenerator.prototype.generateTypingsFromShape = function generateTypingsFromShape(shapeKey, shape, tabCount) {\n // some shapes shouldn't be generated if they are javascript primitives\n if (['string', 'boolean', 'number', 'Date', 'Blob'].indexOf(shapeKey) >= 0) {\n return '';\n }\n var self = this;\n var code = '';\n tabCount = tabCount || 0;\n var tabs = this.tabs;\n var type = shape.type;\n if (type === 'structure') {\n code += tabs(tabCount) + 'export interface ' + shapeKey + ' {\\n';\n var members = shape.members;\n // cycle through members\n var memberKeys = Object.keys(members);\n memberKeys.forEach(function(memberKey) {\n // docs\n var member = members[memberKey];\n if (member.documentation) {\n code += self.generateDocString(member.documentation, tabCount + 1);\n }\n var required = self.checkRequired(shape.required || [], memberKey) ? '?' : '';\n code += tabs(tabCount + 1) + memberKey + required + ': ' + member.shape + ';\\n';\n });\n code += tabs(tabCount) + '}\\n';\n } else if (type === 'list') {\n code += tabs(tabCount) + 'export type ' + shapeKey + ' = ' + shape.member.shape + '[];\\n';\n } else if (type === 'map') {\n code += tabs(tabCount) + 'export type ' + shapeKey + ' = {[key: string]: ' + shape.value.shape + '};\\n';\n } else if (type === 'string' || type === 'character') {\n code += tabs(tabCount) + 'export type ' + shapeKey + ' = string;\\n';\n } else if (['double', 'long', 'short', 'biginteger', 'bigdecimal', 'integer', 'float'].indexOf(type) >= 0) {\n code += tabs(tabCount) + 'export type ' + shapeKey + ' = number;\\n';\n } else if (type === 'timestamp') {\n code += tabs(tabCount) + 'export type ' + shapeKey + ' = Date;\\n';\n } else if (type === 'boolean') {\n code += tabs(tabCount) + 'export type ' + shapeKey + ' = boolean;\\n';\n } else if (type === 'blob' || type === 'binary') {\n code += tabs(tabCount) + 'export type ' + shapeKey + ' = Buffer|Uint8Array|Blob|string;\\n';\n }\n return code;\n};\n\n/**\n * Generates a class method type for an operation.\n */\nTSGenerator.prototype.generateTypingsFromOperations = function generateTypingsFromOperations(className, operation, tabCount) {\n var code = '';\n tabCount = tabCount || 0;\n var tabs = this.tabs;\n\n var input = operation.input;\n var output = operation.output;\n var operationName = operation.name.charAt(0).toLowerCase() + operation.name.substring(1);\n\n var inputShape = input ? className + '.' + input.shape : '{}';\n var outputShape = output ? className + '.' + output.shape : '{}';\n\n code += this.generateDocString(operation.documentation, tabCount);\n code += tabs(tabCount) + operationName + '(params: ' + inputShape + ', callback?: (err: AWSError, data: ' + outputShape + ') => void): Request<' + outputShape + ', AWSError>;\\n';\n code += this.generateDocString(operation.documentation, tabCount);\n code += tabs(tabCount) + operationName + '(callback?: (err: AWSError, data: ' + outputShape + ') => void): Request<' + outputShape + ', AWSError>;\\n';\n\n return code;\n};\n\n/**\n * Generates class method types for a waiter.\n */\nTSGenerator.prototype.generateTypingsFromWaiters = function generateTypingsFromWaiters(className, waiterState, waiter, underlyingOperation, tabCount) {", + "repo_full_name": "aws/aws-sdk-js", + "discussion_comments": [ + { + "comment_id": "85250379", + "repo_full_name": "aws/aws-sdk-js", + "pr_number": 1189, + "pr_file": "scripts/lib/ts-generator.js", + "discussion_id": "85250379", + "commented_code": "@@ -0,0 +1,437 @@\n+var fs = require('fs');\n+var path = require('path');\n+\n+var CUSTOM_CONFIG_ENUMS = {\n+ DUALSTACK: {\n+ FILE_NAME: 'config_use_dualstack',\n+ INTERFACE: 'UseDualstackConfigOptions'\n+ }\n+};\n+\n+function TSGenerator(options) {\n+ this._sdkRootDir = options.SdkRootDirectory || process.cwd();\n+ this._apiRootDir = path.join(this._sdkRootDir, 'apis');\n+ this._metadataPath = path.join(this._apiRootDir, 'metadata.json');\n+ this._clientsDir = path.join(this._sdkRootDir, 'clients');\n+ this.metadata = null;\n+ this.typings = {};\n+ this.fillApiModelFileNames(this._apiRootDir);\n+}\n+\n+/**\n+ * Loads the AWS SDK metadata.json file.\n+ */\n+TSGenerator.prototype.loadMetadata = function loadMetadata() {\n+ var metadataFile = fs.readFileSync(this._metadataPath);\n+ this.metadata = JSON.parse(metadataFile);\n+ return this.metadata;\n+};\n+\n+/**\n+ * Modifies metadata to include api model filenames.\n+ */\n+TSGenerator.prototype.fillApiModelFileNames = function fillApiModelFileNames(apisPath) {\n+ var modelPaths = fs.readdirSync(apisPath);\n+ if (!this.metadata) {\n+ this.loadMetadata();\n+ }\n+ var metadata = this.metadata;\n+\n+ // sort paths so latest versions appear first\n+ modelPaths = modelPaths.sort(function sort(a, b) {\n+ if (a < b) {\n+ return 1;\n+ } else if (a > b) {\n+ return -1;\n+ } else {\n+ return 0;\n+ }\n+ });\n+\n+ // Only get latest version of models\n+ var foundModels = Object.create(null);\n+ modelPaths.forEach(function(modelFileName) {\n+ var match = modelFileName.match(/^(.+)(-[\\d]{4}-[\\d]{2}-[\\d]{2})\\.normal\\.json$/i);\n+ if (match) {\n+ var model = match[1];\n+ if (!foundModels[model]) {\n+ foundModels[model] = modelFileName;\n+ }\n+ }\n+ });\n+\n+ // now update the metadata\n+ var keys = Object.keys(metadata);\n+ keys.forEach(function(key) {\n+ var modelName = metadata[key].prefix || key;\n+ metadata[key].api_path = foundModels[modelName];\n+ // find waiters file\n+ var baseName = foundModels[modelName].split('.')[0];\n+ if (modelPaths.indexOf(baseName + '.waiters2.json') >= 0) {\n+ metadata[key].waiters_path = baseName + '.waiters2.json';\n+ }\n+ });\n+};\n+\n+/**\n+ * Generates the file containing DocumentClient interfaces.\n+ */\n+TSGenerator.prototype.generateDocumentClientInterfaces = function generateDocumentClientInterfaces() {\n+ var self = this;\n+ // get the dynamodb model\n+ var dynamodbModel = this.loadServiceApi('dynamodb');\n+ var code = '';\n+ // include node reference\n+ code += '///\\n';\n+ // generate shapes\n+ var modelShapes = dynamodbModel.shapes;\n+ // iterate over each shape\n+ var shapeKeys = Object.keys(modelShapes);\n+ shapeKeys.forEach(function (shapeKey) {\n+ var modelShape = modelShapes[shapeKey];\n+ // ignore exceptions\n+ if (modelShape.exception) {\n+ return;\n+ }\n+ // overwrite AttributeValue\n+ if (shapeKey === 'AttributeValue') {\n+ code += self.generateDocString('A JavaScript object or native type.');\n+ code += 'export type ' + shapeKey + ' = any;\\n';\n+ return;\n+ }\n+ code += self.generateTypingsFromShape(shapeKey, modelShape);\n+ });\n+\n+ // write file:\n+ this.writeTypingsFile('document_client_interfaces', path.join(this._sdkRootDir, 'lib', 'dynamodb'), code);\n+};\n+\n+/**\n+ * Returns a service model based on the serviceIdentifier.\n+ */\n+TSGenerator.prototype.loadServiceApi = function loadServiceApi(serviceIdentifier) {\n+ // first, find the correct identifier\n+ var metadata = this.metadata;\n+ var serviceFilePath = path.join(this._apiRootDir, metadata[serviceIdentifier].api_path);\n+ var serviceModelFile = fs.readFileSync(serviceFilePath);\n+ var serviceModel = JSON.parse(serviceModelFile);\n+ // load waiters file if it exists\n+ var waiterFilePath;\n+ if (metadata[serviceIdentifier].waiters_path) {\n+ waiterFilePath = path.join(this._apiRootDir, metadata[serviceIdentifier].waiters_path);\n+ var waiterModelFile = fs.readFileSync(waiterFilePath);\n+ var waiterModel = JSON.parse(waiterModelFile);\n+ serviceModel.waiters = waiterModel.waiters;\n+ }\n+\n+ return serviceModel;\n+};\n+\n+/**\n+ * Determines if a member is required by checking for it in a list.\n+ */\n+TSGenerator.prototype.checkRequired = function checkRequired(list, member) {\n+ if (list.indexOf(member)) {\n+ return true;\n+ }\n+ return false;\n+};\n+\n+/**\n+ * Generates whitespace based on the count.\n+ */\n+TSGenerator.prototype.tabs = function tabs(count) {\n+ var code = '';\n+ for (var i = 0; i < count; i++) {\n+ code += ' ';\n+ }\n+ return code;\n+};\n+\n+/**\n+ * Transforms documentation string to a more readable format.\n+ */\n+TSGenerator.prototype.transformDocumentation = function transformDocumentation(documentation) {\n+ if (!documentation) {\n+ return '';\n+ }\n+ documentation = documentation.replace(/<(?:.|\\n)*?>/gm, '');\n+ documentation = documentation.replace(/\\*\\//g, '*');\n+ return documentation;\n+};\n+\n+/**\n+ * Returns a doc string based on the supplied documentation.\n+ * Also tabs the doc string if a count is provided.\n+ */\n+TSGenerator.prototype.generateDocString = function generateDocString(documentation, tabCount) {\n+ tabCount = tabCount || 0;\n+ var code = '';\n+ code += this.tabs(tabCount) + '/**\\n';\n+ code += this.tabs(tabCount) + ' * ' + this.transformDocumentation(documentation) + '\\n';\n+ code += this.tabs(tabCount) + ' */\\n';\n+ return code;\n+};\n+\n+/**\n+ * Returns an array of custom configuration options based on a service identiffier.\n+ * Custom configuration options are determined by checking the metadata.json file.\n+ */\n+TSGenerator.prototype.generateCustomConfigFromMetadata = function generateCustomConfigFromMetadata(serviceIdentifier) {\n+ // some services have additional configuration options that are defined in the metadata.json file\n+ // i.e. dualstackAvailable = useDualstack\n+ // create reference to custom options\n+ var customConfigurations = [];\n+ var serviceMetadata = this.metadata[serviceIdentifier];\n+ // loop through metadata members\n+ for (var memberName in serviceMetadata) {\n+ if (!serviceMetadata.hasOwnProperty(memberName)) {\n+ continue;\n+ }\n+ var memberValue = serviceMetadata[memberName];\n+ // check configs\n+ switch (memberName) {\n+ case 'dualstackAvailable':\n+ customConfigurations.push(CUSTOM_CONFIG_ENUMS.DUALSTACK);\n+ break;\n+ }\n+ }\n+\n+ return customConfigurations;\n+};\n+\n+/**\n+ * Generates a type or interface based on the shape.\n+ */\n+TSGenerator.prototype.generateTypingsFromShape = function generateTypingsFromShape(shapeKey, shape, tabCount) {\n+ // some shapes shouldn't be generated if they are javascript primitives\n+ if (['string', 'boolean', 'number', 'Date', 'Blob'].indexOf(shapeKey) >= 0) {\n+ return '';\n+ }\n+ var self = this;\n+ var code = '';\n+ tabCount = tabCount || 0;\n+ var tabs = this.tabs;\n+ var type = shape.type;\n+ if (type === 'structure') {\n+ code += tabs(tabCount) + 'export interface ' + shapeKey + ' {\\n';\n+ var members = shape.members;\n+ // cycle through members\n+ var memberKeys = Object.keys(members);\n+ memberKeys.forEach(function(memberKey) {\n+ // docs\n+ var member = members[memberKey];\n+ if (member.documentation) {\n+ code += self.generateDocString(member.documentation, tabCount + 1);\n+ }\n+ var required = self.checkRequired(shape.required || [], memberKey) ? '?' : '';\n+ code += tabs(tabCount + 1) + memberKey + required + ': ' + member.shape + ';\\n';\n+ });\n+ code += tabs(tabCount) + '}\\n';\n+ } else if (type === 'list') {\n+ code += tabs(tabCount) + 'export type ' + shapeKey + ' = ' + shape.member.shape + '[];\\n';\n+ } else if (type === 'map') {\n+ code += tabs(tabCount) + 'export type ' + shapeKey + ' = {[key: string]: ' + shape.value.shape + '};\\n';\n+ } else if (type === 'string' || type === 'character') {\n+ code += tabs(tabCount) + 'export type ' + shapeKey + ' = string;\\n';\n+ } else if (['double', 'long', 'short', 'biginteger', 'bigdecimal', 'integer', 'float'].indexOf(type) >= 0) {\n+ code += tabs(tabCount) + 'export type ' + shapeKey + ' = number;\\n';\n+ } else if (type === 'timestamp') {\n+ code += tabs(tabCount) + 'export type ' + shapeKey + ' = Date;\\n';\n+ } else if (type === 'boolean') {\n+ code += tabs(tabCount) + 'export type ' + shapeKey + ' = boolean;\\n';\n+ } else if (type === 'blob' || type === 'binary') {\n+ code += tabs(tabCount) + 'export type ' + shapeKey + ' = Buffer|Uint8Array|Blob|string;\\n';\n+ }\n+ return code;\n+};\n+\n+/**\n+ * Generates a class method type for an operation.\n+ */\n+TSGenerator.prototype.generateTypingsFromOperations = function generateTypingsFromOperations(className, operation, tabCount) {\n+ var code = '';\n+ tabCount = tabCount || 0;\n+ var tabs = this.tabs;\n+\n+ var input = operation.input;\n+ var output = operation.output;\n+ var operationName = operation.name.charAt(0).toLowerCase() + operation.name.substring(1);\n+\n+ var inputShape = input ? className + '.' + input.shape : '{}';\n+ var outputShape = output ? className + '.' + output.shape : '{}';\n+\n+ code += this.generateDocString(operation.documentation, tabCount);\n+ code += tabs(tabCount) + operationName + '(params: ' + inputShape + ', callback?: (err: AWSError, data: ' + outputShape + ') => void): Request<' + outputShape + ', AWSError>;\\n';\n+ code += this.generateDocString(operation.documentation, tabCount);\n+ code += tabs(tabCount) + operationName + '(callback?: (err: AWSError, data: ' + outputShape + ') => void): Request<' + outputShape + ', AWSError>;\\n';\n+\n+ return code;\n+};\n+\n+/**\n+ * Generates class method types for a waiter.\n+ */\n+TSGenerator.prototype.generateTypingsFromWaiters = function generateTypingsFromWaiters(className, waiterState, waiter, underlyingOperation, tabCount) {", + "comment_created_at": "2016-10-27T01:01:32+00:00", + "comment_author": "LiuJoyceC", + "comment_body": "`waiterState` needs to have its first letter lowercased. For example, using the state `\"BucketExists\"` will result in an error, but `\"bucketExists\"` is correct.\n", + "pr_file_module": null + }, + { + "comment_id": "85430054", + "repo_full_name": "aws/aws-sdk-js", + "pr_number": 1189, + "pr_file": "scripts/lib/ts-generator.js", + "discussion_id": "85250379", + "commented_code": "@@ -0,0 +1,437 @@\n+var fs = require('fs');\n+var path = require('path');\n+\n+var CUSTOM_CONFIG_ENUMS = {\n+ DUALSTACK: {\n+ FILE_NAME: 'config_use_dualstack',\n+ INTERFACE: 'UseDualstackConfigOptions'\n+ }\n+};\n+\n+function TSGenerator(options) {\n+ this._sdkRootDir = options.SdkRootDirectory || process.cwd();\n+ this._apiRootDir = path.join(this._sdkRootDir, 'apis');\n+ this._metadataPath = path.join(this._apiRootDir, 'metadata.json');\n+ this._clientsDir = path.join(this._sdkRootDir, 'clients');\n+ this.metadata = null;\n+ this.typings = {};\n+ this.fillApiModelFileNames(this._apiRootDir);\n+}\n+\n+/**\n+ * Loads the AWS SDK metadata.json file.\n+ */\n+TSGenerator.prototype.loadMetadata = function loadMetadata() {\n+ var metadataFile = fs.readFileSync(this._metadataPath);\n+ this.metadata = JSON.parse(metadataFile);\n+ return this.metadata;\n+};\n+\n+/**\n+ * Modifies metadata to include api model filenames.\n+ */\n+TSGenerator.prototype.fillApiModelFileNames = function fillApiModelFileNames(apisPath) {\n+ var modelPaths = fs.readdirSync(apisPath);\n+ if (!this.metadata) {\n+ this.loadMetadata();\n+ }\n+ var metadata = this.metadata;\n+\n+ // sort paths so latest versions appear first\n+ modelPaths = modelPaths.sort(function sort(a, b) {\n+ if (a < b) {\n+ return 1;\n+ } else if (a > b) {\n+ return -1;\n+ } else {\n+ return 0;\n+ }\n+ });\n+\n+ // Only get latest version of models\n+ var foundModels = Object.create(null);\n+ modelPaths.forEach(function(modelFileName) {\n+ var match = modelFileName.match(/^(.+)(-[\\d]{4}-[\\d]{2}-[\\d]{2})\\.normal\\.json$/i);\n+ if (match) {\n+ var model = match[1];\n+ if (!foundModels[model]) {\n+ foundModels[model] = modelFileName;\n+ }\n+ }\n+ });\n+\n+ // now update the metadata\n+ var keys = Object.keys(metadata);\n+ keys.forEach(function(key) {\n+ var modelName = metadata[key].prefix || key;\n+ metadata[key].api_path = foundModels[modelName];\n+ // find waiters file\n+ var baseName = foundModels[modelName].split('.')[0];\n+ if (modelPaths.indexOf(baseName + '.waiters2.json') >= 0) {\n+ metadata[key].waiters_path = baseName + '.waiters2.json';\n+ }\n+ });\n+};\n+\n+/**\n+ * Generates the file containing DocumentClient interfaces.\n+ */\n+TSGenerator.prototype.generateDocumentClientInterfaces = function generateDocumentClientInterfaces() {\n+ var self = this;\n+ // get the dynamodb model\n+ var dynamodbModel = this.loadServiceApi('dynamodb');\n+ var code = '';\n+ // include node reference\n+ code += '///\\n';\n+ // generate shapes\n+ var modelShapes = dynamodbModel.shapes;\n+ // iterate over each shape\n+ var shapeKeys = Object.keys(modelShapes);\n+ shapeKeys.forEach(function (shapeKey) {\n+ var modelShape = modelShapes[shapeKey];\n+ // ignore exceptions\n+ if (modelShape.exception) {\n+ return;\n+ }\n+ // overwrite AttributeValue\n+ if (shapeKey === 'AttributeValue') {\n+ code += self.generateDocString('A JavaScript object or native type.');\n+ code += 'export type ' + shapeKey + ' = any;\\n';\n+ return;\n+ }\n+ code += self.generateTypingsFromShape(shapeKey, modelShape);\n+ });\n+\n+ // write file:\n+ this.writeTypingsFile('document_client_interfaces', path.join(this._sdkRootDir, 'lib', 'dynamodb'), code);\n+};\n+\n+/**\n+ * Returns a service model based on the serviceIdentifier.\n+ */\n+TSGenerator.prototype.loadServiceApi = function loadServiceApi(serviceIdentifier) {\n+ // first, find the correct identifier\n+ var metadata = this.metadata;\n+ var serviceFilePath = path.join(this._apiRootDir, metadata[serviceIdentifier].api_path);\n+ var serviceModelFile = fs.readFileSync(serviceFilePath);\n+ var serviceModel = JSON.parse(serviceModelFile);\n+ // load waiters file if it exists\n+ var waiterFilePath;\n+ if (metadata[serviceIdentifier].waiters_path) {\n+ waiterFilePath = path.join(this._apiRootDir, metadata[serviceIdentifier].waiters_path);\n+ var waiterModelFile = fs.readFileSync(waiterFilePath);\n+ var waiterModel = JSON.parse(waiterModelFile);\n+ serviceModel.waiters = waiterModel.waiters;\n+ }\n+\n+ return serviceModel;\n+};\n+\n+/**\n+ * Determines if a member is required by checking for it in a list.\n+ */\n+TSGenerator.prototype.checkRequired = function checkRequired(list, member) {\n+ if (list.indexOf(member)) {\n+ return true;\n+ }\n+ return false;\n+};\n+\n+/**\n+ * Generates whitespace based on the count.\n+ */\n+TSGenerator.prototype.tabs = function tabs(count) {\n+ var code = '';\n+ for (var i = 0; i < count; i++) {\n+ code += ' ';\n+ }\n+ return code;\n+};\n+\n+/**\n+ * Transforms documentation string to a more readable format.\n+ */\n+TSGenerator.prototype.transformDocumentation = function transformDocumentation(documentation) {\n+ if (!documentation) {\n+ return '';\n+ }\n+ documentation = documentation.replace(/<(?:.|\\n)*?>/gm, '');\n+ documentation = documentation.replace(/\\*\\//g, '*');\n+ return documentation;\n+};\n+\n+/**\n+ * Returns a doc string based on the supplied documentation.\n+ * Also tabs the doc string if a count is provided.\n+ */\n+TSGenerator.prototype.generateDocString = function generateDocString(documentation, tabCount) {\n+ tabCount = tabCount || 0;\n+ var code = '';\n+ code += this.tabs(tabCount) + '/**\\n';\n+ code += this.tabs(tabCount) + ' * ' + this.transformDocumentation(documentation) + '\\n';\n+ code += this.tabs(tabCount) + ' */\\n';\n+ return code;\n+};\n+\n+/**\n+ * Returns an array of custom configuration options based on a service identiffier.\n+ * Custom configuration options are determined by checking the metadata.json file.\n+ */\n+TSGenerator.prototype.generateCustomConfigFromMetadata = function generateCustomConfigFromMetadata(serviceIdentifier) {\n+ // some services have additional configuration options that are defined in the metadata.json file\n+ // i.e. dualstackAvailable = useDualstack\n+ // create reference to custom options\n+ var customConfigurations = [];\n+ var serviceMetadata = this.metadata[serviceIdentifier];\n+ // loop through metadata members\n+ for (var memberName in serviceMetadata) {\n+ if (!serviceMetadata.hasOwnProperty(memberName)) {\n+ continue;\n+ }\n+ var memberValue = serviceMetadata[memberName];\n+ // check configs\n+ switch (memberName) {\n+ case 'dualstackAvailable':\n+ customConfigurations.push(CUSTOM_CONFIG_ENUMS.DUALSTACK);\n+ break;\n+ }\n+ }\n+\n+ return customConfigurations;\n+};\n+\n+/**\n+ * Generates a type or interface based on the shape.\n+ */\n+TSGenerator.prototype.generateTypingsFromShape = function generateTypingsFromShape(shapeKey, shape, tabCount) {\n+ // some shapes shouldn't be generated if they are javascript primitives\n+ if (['string', 'boolean', 'number', 'Date', 'Blob'].indexOf(shapeKey) >= 0) {\n+ return '';\n+ }\n+ var self = this;\n+ var code = '';\n+ tabCount = tabCount || 0;\n+ var tabs = this.tabs;\n+ var type = shape.type;\n+ if (type === 'structure') {\n+ code += tabs(tabCount) + 'export interface ' + shapeKey + ' {\\n';\n+ var members = shape.members;\n+ // cycle through members\n+ var memberKeys = Object.keys(members);\n+ memberKeys.forEach(function(memberKey) {\n+ // docs\n+ var member = members[memberKey];\n+ if (member.documentation) {\n+ code += self.generateDocString(member.documentation, tabCount + 1);\n+ }\n+ var required = self.checkRequired(shape.required || [], memberKey) ? '?' : '';\n+ code += tabs(tabCount + 1) + memberKey + required + ': ' + member.shape + ';\\n';\n+ });\n+ code += tabs(tabCount) + '}\\n';\n+ } else if (type === 'list') {\n+ code += tabs(tabCount) + 'export type ' + shapeKey + ' = ' + shape.member.shape + '[];\\n';\n+ } else if (type === 'map') {\n+ code += tabs(tabCount) + 'export type ' + shapeKey + ' = {[key: string]: ' + shape.value.shape + '};\\n';\n+ } else if (type === 'string' || type === 'character') {\n+ code += tabs(tabCount) + 'export type ' + shapeKey + ' = string;\\n';\n+ } else if (['double', 'long', 'short', 'biginteger', 'bigdecimal', 'integer', 'float'].indexOf(type) >= 0) {\n+ code += tabs(tabCount) + 'export type ' + shapeKey + ' = number;\\n';\n+ } else if (type === 'timestamp') {\n+ code += tabs(tabCount) + 'export type ' + shapeKey + ' = Date;\\n';\n+ } else if (type === 'boolean') {\n+ code += tabs(tabCount) + 'export type ' + shapeKey + ' = boolean;\\n';\n+ } else if (type === 'blob' || type === 'binary') {\n+ code += tabs(tabCount) + 'export type ' + shapeKey + ' = Buffer|Uint8Array|Blob|string;\\n';\n+ }\n+ return code;\n+};\n+\n+/**\n+ * Generates a class method type for an operation.\n+ */\n+TSGenerator.prototype.generateTypingsFromOperations = function generateTypingsFromOperations(className, operation, tabCount) {\n+ var code = '';\n+ tabCount = tabCount || 0;\n+ var tabs = this.tabs;\n+\n+ var input = operation.input;\n+ var output = operation.output;\n+ var operationName = operation.name.charAt(0).toLowerCase() + operation.name.substring(1);\n+\n+ var inputShape = input ? className + '.' + input.shape : '{}';\n+ var outputShape = output ? className + '.' + output.shape : '{}';\n+\n+ code += this.generateDocString(operation.documentation, tabCount);\n+ code += tabs(tabCount) + operationName + '(params: ' + inputShape + ', callback?: (err: AWSError, data: ' + outputShape + ') => void): Request<' + outputShape + ', AWSError>;\\n';\n+ code += this.generateDocString(operation.documentation, tabCount);\n+ code += tabs(tabCount) + operationName + '(callback?: (err: AWSError, data: ' + outputShape + ') => void): Request<' + outputShape + ', AWSError>;\\n';\n+\n+ return code;\n+};\n+\n+/**\n+ * Generates class method types for a waiter.\n+ */\n+TSGenerator.prototype.generateTypingsFromWaiters = function generateTypingsFromWaiters(className, waiterState, waiter, underlyingOperation, tabCount) {", + "comment_created_at": "2016-10-27T21:19:43+00:00", + "comment_author": "chrisradek", + "comment_body": "Good catch!\n", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "138404158", + "pr_number": 1716, + "pr_file": "lib/service.js", + "created_at": "2017-09-12T16:43:11+00:00", + "commented_code": "/**\n * @api private\n */\n getServiceClock: function getServiceClock() {", + "repo_full_name": "aws/aws-sdk-js", + "discussion_comments": [ + { + "comment_id": "138404158", + "repo_full_name": "aws/aws-sdk-js", + "pr_number": 1716, + "pr_file": "lib/service.js", + "discussion_id": "138404158", + "commented_code": "@@ -403,6 +403,31 @@ AWS.Service = inherit({\n /**\n * @api private\n */\n+ getServiceClock: function getServiceClock() {", + "comment_created_at": "2017-09-12T16:43:11+00:00", + "comment_author": "jeskew", + "comment_body": "I'm not sure this method name reflects what the function does. How about 'getSkewCorrectedDate' or just 'getDate'?", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "193597828", + "pr_number": 2074, + "pr_file": "lib/event-stream/format-message.js", + "created_at": "2018-06-07T00:29:04+00:00", + "commented_code": "var crypto = require('../../lib/util').crypto;\nvar Int64 = require('./int64').Int64;\nvar toBuffer = require('./to-buffer').toBuffer;\nvar allocBuffer = require('./alloc-buffer').allocBuffer;\n\nfunction formatMessage(message) {", + "repo_full_name": "aws/aws-sdk-js", + "discussion_comments": [ + { + "comment_id": "193597828", + "repo_full_name": "aws/aws-sdk-js", + "pr_number": 2074, + "pr_file": "lib/event-stream/format-message.js", + "discussion_id": "193597828", + "commented_code": "@@ -0,0 +1,166 @@\n+var crypto = require('../../lib/util').crypto;\n+var Int64 = require('./int64').Int64;\n+var toBuffer = require('./to-buffer').toBuffer;\n+var allocBuffer = require('./alloc-buffer').allocBuffer;\n+\n+function formatMessage(message) {", + "comment_created_at": "2018-06-07T00:29:04+00:00", + "comment_author": "AllanZhengYP", + "comment_body": "This name is a bit confusing to me. `buildMessage` is probably a better name. Same for the `builderHeaders`", + "pr_file_module": null + }, + { + "comment_id": "193619601", + "repo_full_name": "aws/aws-sdk-js", + "pr_number": 2074, + "pr_file": "lib/event-stream/format-message.js", + "discussion_id": "193597828", + "commented_code": "@@ -0,0 +1,166 @@\n+var crypto = require('../../lib/util').crypto;\n+var Int64 = require('./int64').Int64;\n+var toBuffer = require('./to-buffer').toBuffer;\n+var allocBuffer = require('./alloc-buffer').allocBuffer;\n+\n+function formatMessage(message) {", + "comment_created_at": "2018-06-07T03:55:00+00:00", + "comment_author": "chrisradek", + "comment_body": "Sure, I don't have a strong preference either way.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "227528680", + "pr_number": 2253, + "pr_file": "lib/discover_endpoint.js", + "created_at": "2018-10-23T19:15:16+00:00", + "commented_code": "var AWS = require('./core');\nvar util = require('./util');\nvar iniLoader = require('./shared-ini').iniLoader;\nvar ENDPOINT_OPERATION_MAX_RETRIES = 60;\nvar endpointDiscoveryEnabledEnv = 'AWS_ENABLE_ENDPOINT_DISCOVERY';\n\n/**\n * Generate key to index the endpoints in the cache\n * @return [map] object with keys to index endpoints.\n * @api private\n */\nfunction getCacheKey(request) {\n var service = request.service;\n var api = service.api || {};\n var operations = api.operations;\n var identifiers = {};\n if (operations[request.operation] && operations[request.operation].name) {\n identifiers.operation = operations[request.operation].name;\n }\n if (service.config.region) {\n identifiers.region = service.config.region;\n }\n if (api.serviceId) {\n identifiers.serviceId = api.serviceId\n }\n if (service.config.credentials.accessKeyId) {\n identifiers.accessKeyId = service.config.credentials.accessKeyId\n }\n return identifiers;\n}\n\n/**\n * @api private\n */\nfunction marshallCustomeIdentifiersHelper(result, params, shape) {\n if (!shape || params === undefined || params === null) return;\n if (shape.type === 'structure' && shape.required && shape.required.length > 0) {\n util.arrayEach(shape.required, function(name) {\n var memberShape = shape.members[name];\n if (memberShape.endpointDiscoveryId === true) {\n var locationName = memberShape.isLocationName ? memberShape.name : name;\n result[locationName] = String(params[name]);\n } else {\n marshallCustomeIdentifiersHelper(result, params[name], memberShape);\n }\n });\n }\n}\n\n/**\n * Get customized cache key according to the 'endpointDiscoveryId' trait.\n * @param [object] request object\n * @param [object] input shape of the given operation's api\n */\nfunction marshallCustomeIdentifiers(request, shape) {\n var identifiers = {};\n marshallCustomeIdentifiersHelper(identifiers, request.params, shape);\n return identifiers;\n}\n\n/**\n * Call endpoint discovery operation when it's optional.\n * When endpoint is available in cache then use the cached endpoints. If endpoints\n * are unavailable then use regional endpoints and call endpoint discovery operation\n * asynchronously. This is turned off by default.\n * @param [object] request object\n * @api private\n */\nfunction optionalDisverEndpoint(request) {", + "repo_full_name": "aws/aws-sdk-js", + "discussion_comments": [ + { + "comment_id": "227528680", + "repo_full_name": "aws/aws-sdk-js", + "pr_number": 2253, + "pr_file": "lib/discover_endpoint.js", + "discussion_id": "227528680", + "commented_code": "@@ -0,0 +1,305 @@\n+var AWS = require('./core');\n+var util = require('./util');\n+var iniLoader = require('./shared-ini').iniLoader;\n+var ENDPOINT_OPERATION_MAX_RETRIES = 60;\n+var endpointDiscoveryEnabledEnv = 'AWS_ENABLE_ENDPOINT_DISCOVERY';\n+\n+/**\n+ * Generate key to index the endpoints in the cache\n+ * @return [map] object with keys to index endpoints.\n+ * @api private\n+ */\n+function getCacheKey(request) {\n+ var service = request.service;\n+ var api = service.api || {};\n+ var operations = api.operations;\n+ var identifiers = {};\n+ if (operations[request.operation] && operations[request.operation].name) {\n+ identifiers.operation = operations[request.operation].name;\n+ }\n+ if (service.config.region) {\n+ identifiers.region = service.config.region;\n+ }\n+ if (api.serviceId) {\n+ identifiers.serviceId = api.serviceId\n+ }\n+ if (service.config.credentials.accessKeyId) {\n+ identifiers.accessKeyId = service.config.credentials.accessKeyId\n+ }\n+ return identifiers;\n+}\n+\n+/**\n+ * @api private\n+ */\n+function marshallCustomeIdentifiersHelper(result, params, shape) {\n+ if (!shape || params === undefined || params === null) return;\n+ if (shape.type === 'structure' && shape.required && shape.required.length > 0) {\n+ util.arrayEach(shape.required, function(name) {\n+ var memberShape = shape.members[name];\n+ if (memberShape.endpointDiscoveryId === true) {\n+ var locationName = memberShape.isLocationName ? memberShape.name : name;\n+ result[locationName] = String(params[name]);\n+ } else {\n+ marshallCustomeIdentifiersHelper(result, params[name], memberShape);\n+ }\n+ });\n+ }\n+}\n+\n+/**\n+ * Get customized cache key according to the 'endpointDiscoveryId' trait.\n+ * @param [object] request object\n+ * @param [object] input shape of the given operation's api\n+ */\n+function marshallCustomeIdentifiers(request, shape) {\n+ var identifiers = {};\n+ marshallCustomeIdentifiersHelper(identifiers, request.params, shape);\n+ return identifiers;\n+}\n+\n+/**\n+ * Call endpoint discovery operation when it's optional.\n+ * When endpoint is available in cache then use the cached endpoints. If endpoints\n+ * are unavailable then use regional endpoints and call endpoint discovery operation\n+ * asynchronously. This is turned off by default.\n+ * @param [object] request object\n+ * @api private\n+ */\n+function optionalDisverEndpoint(request) {", + "comment_created_at": "2018-10-23T19:15:16+00:00", + "comment_author": "chrisradek", + "comment_body": "Can you spell out `optionalDiscoveryVersionEndpoint` (assuming that's what Disver means). `Disver` isn't a common abbreviation and coming back to this in the future it may be confusing what this means.", + "pr_file_module": null + }, + { + "comment_id": "228002596", + "repo_full_name": "aws/aws-sdk-js", + "pr_number": 2253, + "pr_file": "lib/discover_endpoint.js", + "discussion_id": "227528680", + "commented_code": "@@ -0,0 +1,305 @@\n+var AWS = require('./core');\n+var util = require('./util');\n+var iniLoader = require('./shared-ini').iniLoader;\n+var ENDPOINT_OPERATION_MAX_RETRIES = 60;\n+var endpointDiscoveryEnabledEnv = 'AWS_ENABLE_ENDPOINT_DISCOVERY';\n+\n+/**\n+ * Generate key to index the endpoints in the cache\n+ * @return [map] object with keys to index endpoints.\n+ * @api private\n+ */\n+function getCacheKey(request) {\n+ var service = request.service;\n+ var api = service.api || {};\n+ var operations = api.operations;\n+ var identifiers = {};\n+ if (operations[request.operation] && operations[request.operation].name) {\n+ identifiers.operation = operations[request.operation].name;\n+ }\n+ if (service.config.region) {\n+ identifiers.region = service.config.region;\n+ }\n+ if (api.serviceId) {\n+ identifiers.serviceId = api.serviceId\n+ }\n+ if (service.config.credentials.accessKeyId) {\n+ identifiers.accessKeyId = service.config.credentials.accessKeyId\n+ }\n+ return identifiers;\n+}\n+\n+/**\n+ * @api private\n+ */\n+function marshallCustomeIdentifiersHelper(result, params, shape) {\n+ if (!shape || params === undefined || params === null) return;\n+ if (shape.type === 'structure' && shape.required && shape.required.length > 0) {\n+ util.arrayEach(shape.required, function(name) {\n+ var memberShape = shape.members[name];\n+ if (memberShape.endpointDiscoveryId === true) {\n+ var locationName = memberShape.isLocationName ? memberShape.name : name;\n+ result[locationName] = String(params[name]);\n+ } else {\n+ marshallCustomeIdentifiersHelper(result, params[name], memberShape);\n+ }\n+ });\n+ }\n+}\n+\n+/**\n+ * Get customized cache key according to the 'endpointDiscoveryId' trait.\n+ * @param [object] request object\n+ * @param [object] input shape of the given operation's api\n+ */\n+function marshallCustomeIdentifiers(request, shape) {\n+ var identifiers = {};\n+ marshallCustomeIdentifiersHelper(identifiers, request.params, shape);\n+ return identifiers;\n+}\n+\n+/**\n+ * Call endpoint discovery operation when it's optional.\n+ * When endpoint is available in cache then use the cached endpoints. If endpoints\n+ * are unavailable then use regional endpoints and call endpoint discovery operation\n+ * asynchronously. This is turned off by default.\n+ * @param [object] request object\n+ * @api private\n+ */\n+function optionalDisverEndpoint(request) {", + "comment_created_at": "2018-10-25T00:27:24+00:00", + "comment_author": "AllanZhengYP", + "comment_body": "It is a typo. And my auto-completion respects my typo of this name elsewhere. Thank you for pointing this out", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/aws-sdk-js-semantic-naming-conventions.md b/_reviewers/aws-sdk-js-semantic-naming-conventions.md index ce68408..24709b1 100644 --- a/_reviewers/aws-sdk-js-semantic-naming-conventions.md +++ b/_reviewers/aws-sdk-js-semantic-naming-conventions.md @@ -39,199 +39,3 @@ if (['string', 'boolean', 'number', 'Date', 'Blob'].indexOf(shapeKey) >= 0) { return code; } ``` - - -[ - { - "discussion_id": "1013368805", - "pr_number": 4261, - "pr_file": "lib/services/resourceexplorer2.js", - "created_at": "2022-11-03T20:29:07+00:00", - "commented_code": "var AWS = require('../core');\n\nAWS.util.update(AWS.ResourceExplorer2.prototype, {\n validateService: function () {\n var config = this.config;\n if (config.useDualstackEndpoint === false || config.useDualStack === false) {", - "repo_full_name": "aws/aws-sdk-js", - "discussion_comments": [ - { - "comment_id": "1013368805", - "repo_full_name": "aws/aws-sdk-js", - "pr_number": 4261, - "pr_file": "lib/services/resourceexplorer2.js", - "discussion_id": "1013368805", - "commented_code": "@@ -0,0 +1,12 @@\n+var AWS = require('../core');\n+\n+AWS.util.update(AWS.ResourceExplorer2.prototype, {\n+ validateService: function () {\n+ var config = this.config;\n+ if (config.useDualstackEndpoint === false || config.useDualStack === false) {", - "comment_created_at": "2022-11-03T20:29:07+00:00", - "comment_author": "kuhe", - "comment_body": "use this casing: `useDualstack`", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "85186711", - "pr_number": 1189, - "pr_file": "scripts/lib/ts-generator.js", - "created_at": "2016-10-26T18:17:43+00:00", - "commented_code": "var fs = require('fs');\nvar path = require('path');\n\nvar CUSTOM_CONFIG_ENUMS = {\n DUALSTACK: {\n FILE_NAME: 'config_use_dualstack',\n INTERFACE: 'UseDualstackConfigOptions'\n }\n};\n\nfunction TSGenerator(options) {\n this._sdkRootDir = options.SdkRootDirectory || process.cwd();\n this._apiRootDir = path.join(this._sdkRootDir, 'apis');\n this._metadataPath = path.join(this._apiRootDir, 'metadata.json');\n this._clientsDir = path.join(this._sdkRootDir, 'clients');\n this.metadata = null;\n this.typings = {};\n this.fillApiModelFileNames(this._apiRootDir);\n}\n\n/**\n * Loads the AWS SDK metadata.json file.\n */\nTSGenerator.prototype.loadMetadata = function loadMetadata() {\n var metadataFile = fs.readFileSync(this._metadataPath);\n this.metadata = JSON.parse(metadataFile);\n return this.metadata;\n};\n\n/**\n * Modifies metadata to include api model filenames.\n */\nTSGenerator.prototype.fillApiModelFileNames = function fillApiModelFileNames(apisPath) {\n var modelPaths = fs.readdirSync(apisPath);\n if (!this.metadata) {\n this.loadMetadata();\n }\n var metadata = this.metadata;\n\n // sort paths so latest versions appear first\n modelPaths = modelPaths.sort(function sort(a, b) {\n if (a < b) {\n return 1;\n } else if (a > b) {\n return -1;\n } else {\n return 0;\n }\n });\n\n // Only get latest version of models\n var foundModels = Object.create(null);\n modelPaths.forEach(function(modelFileName) {\n var match = modelFileName.match(/^(.+)(-[\\d]{4}-[\\d]{2}-[\\d]{2})\\.normal\\.json$/i);\n if (match) {\n var model = match[1];\n if (!foundModels[model]) {\n foundModels[model] = modelFileName;\n }\n }\n });\n\n // now update the metadata\n var keys = Object.keys(metadata);\n keys.forEach(function(key) {\n var modelName = metadata[key].prefix || key;\n metadata[key].api_path = foundModels[modelName];\n // find waiters file\n var baseName = foundModels[modelName].split('.')[0];\n if (modelPaths.indexOf(baseName + '.waiters2.json') >= 0) {\n metadata[key].waiters_path = baseName + '.waiters2.json';\n }\n });\n};\n\n/**\n * Generates the file containing DocumentClient interfaces.\n */\nTSGenerator.prototype.generateDocumentClientInterfaces = function generateDocumentClientInterfaces() {\n var self = this;\n // get the dynamodb model\n var dynamodbModel = this.loadServiceApi('dynamodb');\n var code = '';\n // include node reference\n code += '///\\n';\n // generate shapes\n var modelShapes = dynamodbModel.shapes;\n // iterate over each shape\n var shapeKeys = Object.keys(modelShapes);\n shapeKeys.forEach(function (shapeKey) {\n var modelShape = modelShapes[shapeKey];\n // ignore exceptions\n if (modelShape.exception) {\n return;\n }\n // overwrite AttributeValue\n if (shapeKey === 'AttributeValue') {\n code += self.generateDocString('A JavaScript object or native type.');\n code += 'export type ' + shapeKey + ' = any;\\n';\n return;\n }\n code += self.generateTypingsFromShape(shapeKey, modelShape);\n });\n\n // write file:\n this.writeTypingsFile('document_client_interfaces', path.join(this._sdkRootDir, 'lib', 'dynamodb'), code);\n};\n\n/**\n * Returns a service model based on the serviceIdentifier.\n */\nTSGenerator.prototype.loadServiceApi = function loadServiceApi(serviceIdentifier) {\n // first, find the correct identifier\n var metadata = this.metadata;\n var serviceFilePath = path.join(this._apiRootDir, metadata[serviceIdentifier].api_path);\n var serviceModelFile = fs.readFileSync(serviceFilePath);\n var serviceModel = JSON.parse(serviceModelFile);\n // load waiters file if it exists\n var waiterFilePath;\n if (metadata[serviceIdentifier].waiters_path) {\n waiterFilePath = path.join(this._apiRootDir, metadata[serviceIdentifier].waiters_path);\n var waiterModelFile = fs.readFileSync(waiterFilePath);\n var waiterModel = JSON.parse(waiterModelFile);\n serviceModel.waiters = waiterModel.waiters;\n }\n\n return serviceModel;\n};\n\n/**\n * Determines if a member is required by checking for it in a list.\n */\nTSGenerator.prototype.checkRequired = function checkRequired(list, member) {\n if (list.indexOf(member)) {\n return true;\n }\n return false;\n};\n\n/**\n * Generates whitespace based on the count.\n */\nTSGenerator.prototype.tabs = function tabs(count) {\n var code = '';\n for (var i = 0; i < count; i++) {\n code += ' ';\n }\n return code;\n};\n\n/**\n * Transforms documentation string to a more readable format.\n */\nTSGenerator.prototype.transformDocumentation = function transformDocumentation(documentation) {\n if (!documentation) {\n return '';\n }\n documentation = documentation.replace(/<(?:.|\\n)*?>/gm, '');\n documentation = documentation.replace(/\\*\\//g, '*');\n return documentation;\n};\n\n/**\n * Returns a doc string based on the supplied documentation.\n * Also tabs the doc string if a count is provided.\n */\nTSGenerator.prototype.generateDocString = function generateDocString(documentation, tabCount) {\n tabCount = tabCount || 0;\n var code = '';\n code += this.tabs(tabCount) + '/**\\n';\n code += this.tabs(tabCount) + ' * ' + this.transformDocumentation(documentation) + '\\n';\n code += this.tabs(tabCount) + ' */\\n';\n return code;\n};\n\n/**\n * Returns an array of custom configuration options based on a service identiffier.\n * Custom configuration options are determined by checking the metadata.json file.\n */\nTSGenerator.prototype.generateCustomConfigFromMetadata = function generateCustomConfigFromMetadata(serviceIdentifier) {\n // some services have additional configuration options that are defined in the metadata.json file\n // i.e. dualstackAvailable = useDualstack\n // create reference to custom options\n var customConfigurations = [];\n var serviceMetadata = this.metadata[serviceIdentifier];\n // loop through metadata members\n for (var memberName in serviceMetadata) {\n if (!serviceMetadata.hasOwnProperty(memberName)) {\n continue;\n }\n var memberValue = serviceMetadata[memberName];\n // check configs\n switch (memberName) {\n case 'dualstackAvailable':\n customConfigurations.push(CUSTOM_CONFIG_ENUMS.DUALSTACK);\n break;\n }\n }\n\n return customConfigurations;\n};\n\n/**\n * Generates a type or interface based on the shape.\n */\nTSGenerator.prototype.generateTypingsFromShape = function generateTypingsFromShape(shapeKey, shape, tabCount) {\n // some shapes shouldn't be generated if they are javascript primitives\n if (['string', 'boolean', 'number', 'Date', 'Blob'].indexOf(shapeKey) >= 0) {\n return '';", - "repo_full_name": "aws/aws-sdk-js", - "discussion_comments": [ - { - "comment_id": "85186711", - "repo_full_name": "aws/aws-sdk-js", - "pr_number": 1189, - "pr_file": "scripts/lib/ts-generator.js", - "discussion_id": "85186711", - "commented_code": "@@ -0,0 +1,437 @@\n+var fs = require('fs');\n+var path = require('path');\n+\n+var CUSTOM_CONFIG_ENUMS = {\n+ DUALSTACK: {\n+ FILE_NAME: 'config_use_dualstack',\n+ INTERFACE: 'UseDualstackConfigOptions'\n+ }\n+};\n+\n+function TSGenerator(options) {\n+ this._sdkRootDir = options.SdkRootDirectory || process.cwd();\n+ this._apiRootDir = path.join(this._sdkRootDir, 'apis');\n+ this._metadataPath = path.join(this._apiRootDir, 'metadata.json');\n+ this._clientsDir = path.join(this._sdkRootDir, 'clients');\n+ this.metadata = null;\n+ this.typings = {};\n+ this.fillApiModelFileNames(this._apiRootDir);\n+}\n+\n+/**\n+ * Loads the AWS SDK metadata.json file.\n+ */\n+TSGenerator.prototype.loadMetadata = function loadMetadata() {\n+ var metadataFile = fs.readFileSync(this._metadataPath);\n+ this.metadata = JSON.parse(metadataFile);\n+ return this.metadata;\n+};\n+\n+/**\n+ * Modifies metadata to include api model filenames.\n+ */\n+TSGenerator.prototype.fillApiModelFileNames = function fillApiModelFileNames(apisPath) {\n+ var modelPaths = fs.readdirSync(apisPath);\n+ if (!this.metadata) {\n+ this.loadMetadata();\n+ }\n+ var metadata = this.metadata;\n+\n+ // sort paths so latest versions appear first\n+ modelPaths = modelPaths.sort(function sort(a, b) {\n+ if (a < b) {\n+ return 1;\n+ } else if (a > b) {\n+ return -1;\n+ } else {\n+ return 0;\n+ }\n+ });\n+\n+ // Only get latest version of models\n+ var foundModels = Object.create(null);\n+ modelPaths.forEach(function(modelFileName) {\n+ var match = modelFileName.match(/^(.+)(-[\\d]{4}-[\\d]{2}-[\\d]{2})\\.normal\\.json$/i);\n+ if (match) {\n+ var model = match[1];\n+ if (!foundModels[model]) {\n+ foundModels[model] = modelFileName;\n+ }\n+ }\n+ });\n+\n+ // now update the metadata\n+ var keys = Object.keys(metadata);\n+ keys.forEach(function(key) {\n+ var modelName = metadata[key].prefix || key;\n+ metadata[key].api_path = foundModels[modelName];\n+ // find waiters file\n+ var baseName = foundModels[modelName].split('.')[0];\n+ if (modelPaths.indexOf(baseName + '.waiters2.json') >= 0) {\n+ metadata[key].waiters_path = baseName + '.waiters2.json';\n+ }\n+ });\n+};\n+\n+/**\n+ * Generates the file containing DocumentClient interfaces.\n+ */\n+TSGenerator.prototype.generateDocumentClientInterfaces = function generateDocumentClientInterfaces() {\n+ var self = this;\n+ // get the dynamodb model\n+ var dynamodbModel = this.loadServiceApi('dynamodb');\n+ var code = '';\n+ // include node reference\n+ code += '///\\n';\n+ // generate shapes\n+ var modelShapes = dynamodbModel.shapes;\n+ // iterate over each shape\n+ var shapeKeys = Object.keys(modelShapes);\n+ shapeKeys.forEach(function (shapeKey) {\n+ var modelShape = modelShapes[shapeKey];\n+ // ignore exceptions\n+ if (modelShape.exception) {\n+ return;\n+ }\n+ // overwrite AttributeValue\n+ if (shapeKey === 'AttributeValue') {\n+ code += self.generateDocString('A JavaScript object or native type.');\n+ code += 'export type ' + shapeKey + ' = any;\\n';\n+ return;\n+ }\n+ code += self.generateTypingsFromShape(shapeKey, modelShape);\n+ });\n+\n+ // write file:\n+ this.writeTypingsFile('document_client_interfaces', path.join(this._sdkRootDir, 'lib', 'dynamodb'), code);\n+};\n+\n+/**\n+ * Returns a service model based on the serviceIdentifier.\n+ */\n+TSGenerator.prototype.loadServiceApi = function loadServiceApi(serviceIdentifier) {\n+ // first, find the correct identifier\n+ var metadata = this.metadata;\n+ var serviceFilePath = path.join(this._apiRootDir, metadata[serviceIdentifier].api_path);\n+ var serviceModelFile = fs.readFileSync(serviceFilePath);\n+ var serviceModel = JSON.parse(serviceModelFile);\n+ // load waiters file if it exists\n+ var waiterFilePath;\n+ if (metadata[serviceIdentifier].waiters_path) {\n+ waiterFilePath = path.join(this._apiRootDir, metadata[serviceIdentifier].waiters_path);\n+ var waiterModelFile = fs.readFileSync(waiterFilePath);\n+ var waiterModel = JSON.parse(waiterModelFile);\n+ serviceModel.waiters = waiterModel.waiters;\n+ }\n+\n+ return serviceModel;\n+};\n+\n+/**\n+ * Determines if a member is required by checking for it in a list.\n+ */\n+TSGenerator.prototype.checkRequired = function checkRequired(list, member) {\n+ if (list.indexOf(member)) {\n+ return true;\n+ }\n+ return false;\n+};\n+\n+/**\n+ * Generates whitespace based on the count.\n+ */\n+TSGenerator.prototype.tabs = function tabs(count) {\n+ var code = '';\n+ for (var i = 0; i < count; i++) {\n+ code += ' ';\n+ }\n+ return code;\n+};\n+\n+/**\n+ * Transforms documentation string to a more readable format.\n+ */\n+TSGenerator.prototype.transformDocumentation = function transformDocumentation(documentation) {\n+ if (!documentation) {\n+ return '';\n+ }\n+ documentation = documentation.replace(/<(?:.|\\n)*?>/gm, '');\n+ documentation = documentation.replace(/\\*\\//g, '*');\n+ return documentation;\n+};\n+\n+/**\n+ * Returns a doc string based on the supplied documentation.\n+ * Also tabs the doc string if a count is provided.\n+ */\n+TSGenerator.prototype.generateDocString = function generateDocString(documentation, tabCount) {\n+ tabCount = tabCount || 0;\n+ var code = '';\n+ code += this.tabs(tabCount) + '/**\\n';\n+ code += this.tabs(tabCount) + ' * ' + this.transformDocumentation(documentation) + '\\n';\n+ code += this.tabs(tabCount) + ' */\\n';\n+ return code;\n+};\n+\n+/**\n+ * Returns an array of custom configuration options based on a service identiffier.\n+ * Custom configuration options are determined by checking the metadata.json file.\n+ */\n+TSGenerator.prototype.generateCustomConfigFromMetadata = function generateCustomConfigFromMetadata(serviceIdentifier) {\n+ // some services have additional configuration options that are defined in the metadata.json file\n+ // i.e. dualstackAvailable = useDualstack\n+ // create reference to custom options\n+ var customConfigurations = [];\n+ var serviceMetadata = this.metadata[serviceIdentifier];\n+ // loop through metadata members\n+ for (var memberName in serviceMetadata) {\n+ if (!serviceMetadata.hasOwnProperty(memberName)) {\n+ continue;\n+ }\n+ var memberValue = serviceMetadata[memberName];\n+ // check configs\n+ switch (memberName) {\n+ case 'dualstackAvailable':\n+ customConfigurations.push(CUSTOM_CONFIG_ENUMS.DUALSTACK);\n+ break;\n+ }\n+ }\n+\n+ return customConfigurations;\n+};\n+\n+/**\n+ * Generates a type or interface based on the shape.\n+ */\n+TSGenerator.prototype.generateTypingsFromShape = function generateTypingsFromShape(shapeKey, shape, tabCount) {\n+ // some shapes shouldn't be generated if they are javascript primitives\n+ if (['string', 'boolean', 'number', 'Date', 'Blob'].indexOf(shapeKey) >= 0) {\n+ return '';", - "comment_created_at": "2016-10-26T18:17:43+00:00", - "comment_author": "LiuJoyceC", - "comment_body": "Instead of skipping shapes that happen to be named these, we could just add an underscore in front to differentiate the name of the shape from the javascript primitives.\nIt could still be useful to generate a typing for these shapes. For example, some services (like CodeCommit) have a shape named `\"Date\"` that is modeled as a string rather than a timestamp. If we rename the shape rather than skip it and have it be typed as a JavaScript `Date` object, then it could prevent errors before runtime, such as calling a `Date` method like `getMonth()` on the string.\n", - "pr_file_module": null - }, - { - "comment_id": "85187632", - "repo_full_name": "aws/aws-sdk-js", - "pr_number": 1189, - "pr_file": "scripts/lib/ts-generator.js", - "discussion_id": "85186711", - "commented_code": "@@ -0,0 +1,437 @@\n+var fs = require('fs');\n+var path = require('path');\n+\n+var CUSTOM_CONFIG_ENUMS = {\n+ DUALSTACK: {\n+ FILE_NAME: 'config_use_dualstack',\n+ INTERFACE: 'UseDualstackConfigOptions'\n+ }\n+};\n+\n+function TSGenerator(options) {\n+ this._sdkRootDir = options.SdkRootDirectory || process.cwd();\n+ this._apiRootDir = path.join(this._sdkRootDir, 'apis');\n+ this._metadataPath = path.join(this._apiRootDir, 'metadata.json');\n+ this._clientsDir = path.join(this._sdkRootDir, 'clients');\n+ this.metadata = null;\n+ this.typings = {};\n+ this.fillApiModelFileNames(this._apiRootDir);\n+}\n+\n+/**\n+ * Loads the AWS SDK metadata.json file.\n+ */\n+TSGenerator.prototype.loadMetadata = function loadMetadata() {\n+ var metadataFile = fs.readFileSync(this._metadataPath);\n+ this.metadata = JSON.parse(metadataFile);\n+ return this.metadata;\n+};\n+\n+/**\n+ * Modifies metadata to include api model filenames.\n+ */\n+TSGenerator.prototype.fillApiModelFileNames = function fillApiModelFileNames(apisPath) {\n+ var modelPaths = fs.readdirSync(apisPath);\n+ if (!this.metadata) {\n+ this.loadMetadata();\n+ }\n+ var metadata = this.metadata;\n+\n+ // sort paths so latest versions appear first\n+ modelPaths = modelPaths.sort(function sort(a, b) {\n+ if (a < b) {\n+ return 1;\n+ } else if (a > b) {\n+ return -1;\n+ } else {\n+ return 0;\n+ }\n+ });\n+\n+ // Only get latest version of models\n+ var foundModels = Object.create(null);\n+ modelPaths.forEach(function(modelFileName) {\n+ var match = modelFileName.match(/^(.+)(-[\\d]{4}-[\\d]{2}-[\\d]{2})\\.normal\\.json$/i);\n+ if (match) {\n+ var model = match[1];\n+ if (!foundModels[model]) {\n+ foundModels[model] = modelFileName;\n+ }\n+ }\n+ });\n+\n+ // now update the metadata\n+ var keys = Object.keys(metadata);\n+ keys.forEach(function(key) {\n+ var modelName = metadata[key].prefix || key;\n+ metadata[key].api_path = foundModels[modelName];\n+ // find waiters file\n+ var baseName = foundModels[modelName].split('.')[0];\n+ if (modelPaths.indexOf(baseName + '.waiters2.json') >= 0) {\n+ metadata[key].waiters_path = baseName + '.waiters2.json';\n+ }\n+ });\n+};\n+\n+/**\n+ * Generates the file containing DocumentClient interfaces.\n+ */\n+TSGenerator.prototype.generateDocumentClientInterfaces = function generateDocumentClientInterfaces() {\n+ var self = this;\n+ // get the dynamodb model\n+ var dynamodbModel = this.loadServiceApi('dynamodb');\n+ var code = '';\n+ // include node reference\n+ code += '///\\n';\n+ // generate shapes\n+ var modelShapes = dynamodbModel.shapes;\n+ // iterate over each shape\n+ var shapeKeys = Object.keys(modelShapes);\n+ shapeKeys.forEach(function (shapeKey) {\n+ var modelShape = modelShapes[shapeKey];\n+ // ignore exceptions\n+ if (modelShape.exception) {\n+ return;\n+ }\n+ // overwrite AttributeValue\n+ if (shapeKey === 'AttributeValue') {\n+ code += self.generateDocString('A JavaScript object or native type.');\n+ code += 'export type ' + shapeKey + ' = any;\\n';\n+ return;\n+ }\n+ code += self.generateTypingsFromShape(shapeKey, modelShape);\n+ });\n+\n+ // write file:\n+ this.writeTypingsFile('document_client_interfaces', path.join(this._sdkRootDir, 'lib', 'dynamodb'), code);\n+};\n+\n+/**\n+ * Returns a service model based on the serviceIdentifier.\n+ */\n+TSGenerator.prototype.loadServiceApi = function loadServiceApi(serviceIdentifier) {\n+ // first, find the correct identifier\n+ var metadata = this.metadata;\n+ var serviceFilePath = path.join(this._apiRootDir, metadata[serviceIdentifier].api_path);\n+ var serviceModelFile = fs.readFileSync(serviceFilePath);\n+ var serviceModel = JSON.parse(serviceModelFile);\n+ // load waiters file if it exists\n+ var waiterFilePath;\n+ if (metadata[serviceIdentifier].waiters_path) {\n+ waiterFilePath = path.join(this._apiRootDir, metadata[serviceIdentifier].waiters_path);\n+ var waiterModelFile = fs.readFileSync(waiterFilePath);\n+ var waiterModel = JSON.parse(waiterModelFile);\n+ serviceModel.waiters = waiterModel.waiters;\n+ }\n+\n+ return serviceModel;\n+};\n+\n+/**\n+ * Determines if a member is required by checking for it in a list.\n+ */\n+TSGenerator.prototype.checkRequired = function checkRequired(list, member) {\n+ if (list.indexOf(member)) {\n+ return true;\n+ }\n+ return false;\n+};\n+\n+/**\n+ * Generates whitespace based on the count.\n+ */\n+TSGenerator.prototype.tabs = function tabs(count) {\n+ var code = '';\n+ for (var i = 0; i < count; i++) {\n+ code += ' ';\n+ }\n+ return code;\n+};\n+\n+/**\n+ * Transforms documentation string to a more readable format.\n+ */\n+TSGenerator.prototype.transformDocumentation = function transformDocumentation(documentation) {\n+ if (!documentation) {\n+ return '';\n+ }\n+ documentation = documentation.replace(/<(?:.|\\n)*?>/gm, '');\n+ documentation = documentation.replace(/\\*\\//g, '*');\n+ return documentation;\n+};\n+\n+/**\n+ * Returns a doc string based on the supplied documentation.\n+ * Also tabs the doc string if a count is provided.\n+ */\n+TSGenerator.prototype.generateDocString = function generateDocString(documentation, tabCount) {\n+ tabCount = tabCount || 0;\n+ var code = '';\n+ code += this.tabs(tabCount) + '/**\\n';\n+ code += this.tabs(tabCount) + ' * ' + this.transformDocumentation(documentation) + '\\n';\n+ code += this.tabs(tabCount) + ' */\\n';\n+ return code;\n+};\n+\n+/**\n+ * Returns an array of custom configuration options based on a service identiffier.\n+ * Custom configuration options are determined by checking the metadata.json file.\n+ */\n+TSGenerator.prototype.generateCustomConfigFromMetadata = function generateCustomConfigFromMetadata(serviceIdentifier) {\n+ // some services have additional configuration options that are defined in the metadata.json file\n+ // i.e. dualstackAvailable = useDualstack\n+ // create reference to custom options\n+ var customConfigurations = [];\n+ var serviceMetadata = this.metadata[serviceIdentifier];\n+ // loop through metadata members\n+ for (var memberName in serviceMetadata) {\n+ if (!serviceMetadata.hasOwnProperty(memberName)) {\n+ continue;\n+ }\n+ var memberValue = serviceMetadata[memberName];\n+ // check configs\n+ switch (memberName) {\n+ case 'dualstackAvailable':\n+ customConfigurations.push(CUSTOM_CONFIG_ENUMS.DUALSTACK);\n+ break;\n+ }\n+ }\n+\n+ return customConfigurations;\n+};\n+\n+/**\n+ * Generates a type or interface based on the shape.\n+ */\n+TSGenerator.prototype.generateTypingsFromShape = function generateTypingsFromShape(shapeKey, shape, tabCount) {\n+ // some shapes shouldn't be generated if they are javascript primitives\n+ if (['string', 'boolean', 'number', 'Date', 'Blob'].indexOf(shapeKey) >= 0) {\n+ return '';", - "comment_created_at": "2016-10-26T18:21:55+00:00", - "comment_author": "LiuJoyceC", - "comment_body": "So for example, the typing could look like:\n\n```\nexport type _Date = string;\n```\n\nAnd the references to this shape would have to have an underscore added in front as well.\n", - "pr_file_module": null - }, - { - "comment_id": "85431358", - "repo_full_name": "aws/aws-sdk-js", - "pr_number": 1189, - "pr_file": "scripts/lib/ts-generator.js", - "discussion_id": "85186711", - "commented_code": "@@ -0,0 +1,437 @@\n+var fs = require('fs');\n+var path = require('path');\n+\n+var CUSTOM_CONFIG_ENUMS = {\n+ DUALSTACK: {\n+ FILE_NAME: 'config_use_dualstack',\n+ INTERFACE: 'UseDualstackConfigOptions'\n+ }\n+};\n+\n+function TSGenerator(options) {\n+ this._sdkRootDir = options.SdkRootDirectory || process.cwd();\n+ this._apiRootDir = path.join(this._sdkRootDir, 'apis');\n+ this._metadataPath = path.join(this._apiRootDir, 'metadata.json');\n+ this._clientsDir = path.join(this._sdkRootDir, 'clients');\n+ this.metadata = null;\n+ this.typings = {};\n+ this.fillApiModelFileNames(this._apiRootDir);\n+}\n+\n+/**\n+ * Loads the AWS SDK metadata.json file.\n+ */\n+TSGenerator.prototype.loadMetadata = function loadMetadata() {\n+ var metadataFile = fs.readFileSync(this._metadataPath);\n+ this.metadata = JSON.parse(metadataFile);\n+ return this.metadata;\n+};\n+\n+/**\n+ * Modifies metadata to include api model filenames.\n+ */\n+TSGenerator.prototype.fillApiModelFileNames = function fillApiModelFileNames(apisPath) {\n+ var modelPaths = fs.readdirSync(apisPath);\n+ if (!this.metadata) {\n+ this.loadMetadata();\n+ }\n+ var metadata = this.metadata;\n+\n+ // sort paths so latest versions appear first\n+ modelPaths = modelPaths.sort(function sort(a, b) {\n+ if (a < b) {\n+ return 1;\n+ } else if (a > b) {\n+ return -1;\n+ } else {\n+ return 0;\n+ }\n+ });\n+\n+ // Only get latest version of models\n+ var foundModels = Object.create(null);\n+ modelPaths.forEach(function(modelFileName) {\n+ var match = modelFileName.match(/^(.+)(-[\\d]{4}-[\\d]{2}-[\\d]{2})\\.normal\\.json$/i);\n+ if (match) {\n+ var model = match[1];\n+ if (!foundModels[model]) {\n+ foundModels[model] = modelFileName;\n+ }\n+ }\n+ });\n+\n+ // now update the metadata\n+ var keys = Object.keys(metadata);\n+ keys.forEach(function(key) {\n+ var modelName = metadata[key].prefix || key;\n+ metadata[key].api_path = foundModels[modelName];\n+ // find waiters file\n+ var baseName = foundModels[modelName].split('.')[0];\n+ if (modelPaths.indexOf(baseName + '.waiters2.json') >= 0) {\n+ metadata[key].waiters_path = baseName + '.waiters2.json';\n+ }\n+ });\n+};\n+\n+/**\n+ * Generates the file containing DocumentClient interfaces.\n+ */\n+TSGenerator.prototype.generateDocumentClientInterfaces = function generateDocumentClientInterfaces() {\n+ var self = this;\n+ // get the dynamodb model\n+ var dynamodbModel = this.loadServiceApi('dynamodb');\n+ var code = '';\n+ // include node reference\n+ code += '///\\n';\n+ // generate shapes\n+ var modelShapes = dynamodbModel.shapes;\n+ // iterate over each shape\n+ var shapeKeys = Object.keys(modelShapes);\n+ shapeKeys.forEach(function (shapeKey) {\n+ var modelShape = modelShapes[shapeKey];\n+ // ignore exceptions\n+ if (modelShape.exception) {\n+ return;\n+ }\n+ // overwrite AttributeValue\n+ if (shapeKey === 'AttributeValue') {\n+ code += self.generateDocString('A JavaScript object or native type.');\n+ code += 'export type ' + shapeKey + ' = any;\\n';\n+ return;\n+ }\n+ code += self.generateTypingsFromShape(shapeKey, modelShape);\n+ });\n+\n+ // write file:\n+ this.writeTypingsFile('document_client_interfaces', path.join(this._sdkRootDir, 'lib', 'dynamodb'), code);\n+};\n+\n+/**\n+ * Returns a service model based on the serviceIdentifier.\n+ */\n+TSGenerator.prototype.loadServiceApi = function loadServiceApi(serviceIdentifier) {\n+ // first, find the correct identifier\n+ var metadata = this.metadata;\n+ var serviceFilePath = path.join(this._apiRootDir, metadata[serviceIdentifier].api_path);\n+ var serviceModelFile = fs.readFileSync(serviceFilePath);\n+ var serviceModel = JSON.parse(serviceModelFile);\n+ // load waiters file if it exists\n+ var waiterFilePath;\n+ if (metadata[serviceIdentifier].waiters_path) {\n+ waiterFilePath = path.join(this._apiRootDir, metadata[serviceIdentifier].waiters_path);\n+ var waiterModelFile = fs.readFileSync(waiterFilePath);\n+ var waiterModel = JSON.parse(waiterModelFile);\n+ serviceModel.waiters = waiterModel.waiters;\n+ }\n+\n+ return serviceModel;\n+};\n+\n+/**\n+ * Determines if a member is required by checking for it in a list.\n+ */\n+TSGenerator.prototype.checkRequired = function checkRequired(list, member) {\n+ if (list.indexOf(member)) {\n+ return true;\n+ }\n+ return false;\n+};\n+\n+/**\n+ * Generates whitespace based on the count.\n+ */\n+TSGenerator.prototype.tabs = function tabs(count) {\n+ var code = '';\n+ for (var i = 0; i < count; i++) {\n+ code += ' ';\n+ }\n+ return code;\n+};\n+\n+/**\n+ * Transforms documentation string to a more readable format.\n+ */\n+TSGenerator.prototype.transformDocumentation = function transformDocumentation(documentation) {\n+ if (!documentation) {\n+ return '';\n+ }\n+ documentation = documentation.replace(/<(?:.|\\n)*?>/gm, '');\n+ documentation = documentation.replace(/\\*\\//g, '*');\n+ return documentation;\n+};\n+\n+/**\n+ * Returns a doc string based on the supplied documentation.\n+ * Also tabs the doc string if a count is provided.\n+ */\n+TSGenerator.prototype.generateDocString = function generateDocString(documentation, tabCount) {\n+ tabCount = tabCount || 0;\n+ var code = '';\n+ code += this.tabs(tabCount) + '/**\\n';\n+ code += this.tabs(tabCount) + ' * ' + this.transformDocumentation(documentation) + '\\n';\n+ code += this.tabs(tabCount) + ' */\\n';\n+ return code;\n+};\n+\n+/**\n+ * Returns an array of custom configuration options based on a service identiffier.\n+ * Custom configuration options are determined by checking the metadata.json file.\n+ */\n+TSGenerator.prototype.generateCustomConfigFromMetadata = function generateCustomConfigFromMetadata(serviceIdentifier) {\n+ // some services have additional configuration options that are defined in the metadata.json file\n+ // i.e. dualstackAvailable = useDualstack\n+ // create reference to custom options\n+ var customConfigurations = [];\n+ var serviceMetadata = this.metadata[serviceIdentifier];\n+ // loop through metadata members\n+ for (var memberName in serviceMetadata) {\n+ if (!serviceMetadata.hasOwnProperty(memberName)) {\n+ continue;\n+ }\n+ var memberValue = serviceMetadata[memberName];\n+ // check configs\n+ switch (memberName) {\n+ case 'dualstackAvailable':\n+ customConfigurations.push(CUSTOM_CONFIG_ENUMS.DUALSTACK);\n+ break;\n+ }\n+ }\n+\n+ return customConfigurations;\n+};\n+\n+/**\n+ * Generates a type or interface based on the shape.\n+ */\n+TSGenerator.prototype.generateTypingsFromShape = function generateTypingsFromShape(shapeKey, shape, tabCount) {\n+ // some shapes shouldn't be generated if they are javascript primitives\n+ if (['string', 'boolean', 'number', 'Date', 'Blob'].indexOf(shapeKey) >= 0) {\n+ return '';", - "comment_created_at": "2016-10-27T21:27:10+00:00", - "comment_author": "chrisradek", - "comment_body": "Fair point, thanks for finding that.\n", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "85250379", - "pr_number": 1189, - "pr_file": "scripts/lib/ts-generator.js", - "created_at": "2016-10-27T01:01:32+00:00", - "commented_code": "var fs = require('fs');\nvar path = require('path');\n\nvar CUSTOM_CONFIG_ENUMS = {\n DUALSTACK: {\n FILE_NAME: 'config_use_dualstack',\n INTERFACE: 'UseDualstackConfigOptions'\n }\n};\n\nfunction TSGenerator(options) {\n this._sdkRootDir = options.SdkRootDirectory || process.cwd();\n this._apiRootDir = path.join(this._sdkRootDir, 'apis');\n this._metadataPath = path.join(this._apiRootDir, 'metadata.json');\n this._clientsDir = path.join(this._sdkRootDir, 'clients');\n this.metadata = null;\n this.typings = {};\n this.fillApiModelFileNames(this._apiRootDir);\n}\n\n/**\n * Loads the AWS SDK metadata.json file.\n */\nTSGenerator.prototype.loadMetadata = function loadMetadata() {\n var metadataFile = fs.readFileSync(this._metadataPath);\n this.metadata = JSON.parse(metadataFile);\n return this.metadata;\n};\n\n/**\n * Modifies metadata to include api model filenames.\n */\nTSGenerator.prototype.fillApiModelFileNames = function fillApiModelFileNames(apisPath) {\n var modelPaths = fs.readdirSync(apisPath);\n if (!this.metadata) {\n this.loadMetadata();\n }\n var metadata = this.metadata;\n\n // sort paths so latest versions appear first\n modelPaths = modelPaths.sort(function sort(a, b) {\n if (a < b) {\n return 1;\n } else if (a > b) {\n return -1;\n } else {\n return 0;\n }\n });\n\n // Only get latest version of models\n var foundModels = Object.create(null);\n modelPaths.forEach(function(modelFileName) {\n var match = modelFileName.match(/^(.+)(-[\\d]{4}-[\\d]{2}-[\\d]{2})\\.normal\\.json$/i);\n if (match) {\n var model = match[1];\n if (!foundModels[model]) {\n foundModels[model] = modelFileName;\n }\n }\n });\n\n // now update the metadata\n var keys = Object.keys(metadata);\n keys.forEach(function(key) {\n var modelName = metadata[key].prefix || key;\n metadata[key].api_path = foundModels[modelName];\n // find waiters file\n var baseName = foundModels[modelName].split('.')[0];\n if (modelPaths.indexOf(baseName + '.waiters2.json') >= 0) {\n metadata[key].waiters_path = baseName + '.waiters2.json';\n }\n });\n};\n\n/**\n * Generates the file containing DocumentClient interfaces.\n */\nTSGenerator.prototype.generateDocumentClientInterfaces = function generateDocumentClientInterfaces() {\n var self = this;\n // get the dynamodb model\n var dynamodbModel = this.loadServiceApi('dynamodb');\n var code = '';\n // include node reference\n code += '///\\n';\n // generate shapes\n var modelShapes = dynamodbModel.shapes;\n // iterate over each shape\n var shapeKeys = Object.keys(modelShapes);\n shapeKeys.forEach(function (shapeKey) {\n var modelShape = modelShapes[shapeKey];\n // ignore exceptions\n if (modelShape.exception) {\n return;\n }\n // overwrite AttributeValue\n if (shapeKey === 'AttributeValue') {\n code += self.generateDocString('A JavaScript object or native type.');\n code += 'export type ' + shapeKey + ' = any;\\n';\n return;\n }\n code += self.generateTypingsFromShape(shapeKey, modelShape);\n });\n\n // write file:\n this.writeTypingsFile('document_client_interfaces', path.join(this._sdkRootDir, 'lib', 'dynamodb'), code);\n};\n\n/**\n * Returns a service model based on the serviceIdentifier.\n */\nTSGenerator.prototype.loadServiceApi = function loadServiceApi(serviceIdentifier) {\n // first, find the correct identifier\n var metadata = this.metadata;\n var serviceFilePath = path.join(this._apiRootDir, metadata[serviceIdentifier].api_path);\n var serviceModelFile = fs.readFileSync(serviceFilePath);\n var serviceModel = JSON.parse(serviceModelFile);\n // load waiters file if it exists\n var waiterFilePath;\n if (metadata[serviceIdentifier].waiters_path) {\n waiterFilePath = path.join(this._apiRootDir, metadata[serviceIdentifier].waiters_path);\n var waiterModelFile = fs.readFileSync(waiterFilePath);\n var waiterModel = JSON.parse(waiterModelFile);\n serviceModel.waiters = waiterModel.waiters;\n }\n\n return serviceModel;\n};\n\n/**\n * Determines if a member is required by checking for it in a list.\n */\nTSGenerator.prototype.checkRequired = function checkRequired(list, member) {\n if (list.indexOf(member)) {\n return true;\n }\n return false;\n};\n\n/**\n * Generates whitespace based on the count.\n */\nTSGenerator.prototype.tabs = function tabs(count) {\n var code = '';\n for (var i = 0; i < count; i++) {\n code += ' ';\n }\n return code;\n};\n\n/**\n * Transforms documentation string to a more readable format.\n */\nTSGenerator.prototype.transformDocumentation = function transformDocumentation(documentation) {\n if (!documentation) {\n return '';\n }\n documentation = documentation.replace(/<(?:.|\\n)*?>/gm, '');\n documentation = documentation.replace(/\\*\\//g, '*');\n return documentation;\n};\n\n/**\n * Returns a doc string based on the supplied documentation.\n * Also tabs the doc string if a count is provided.\n */\nTSGenerator.prototype.generateDocString = function generateDocString(documentation, tabCount) {\n tabCount = tabCount || 0;\n var code = '';\n code += this.tabs(tabCount) + '/**\\n';\n code += this.tabs(tabCount) + ' * ' + this.transformDocumentation(documentation) + '\\n';\n code += this.tabs(tabCount) + ' */\\n';\n return code;\n};\n\n/**\n * Returns an array of custom configuration options based on a service identiffier.\n * Custom configuration options are determined by checking the metadata.json file.\n */\nTSGenerator.prototype.generateCustomConfigFromMetadata = function generateCustomConfigFromMetadata(serviceIdentifier) {\n // some services have additional configuration options that are defined in the metadata.json file\n // i.e. dualstackAvailable = useDualstack\n // create reference to custom options\n var customConfigurations = [];\n var serviceMetadata = this.metadata[serviceIdentifier];\n // loop through metadata members\n for (var memberName in serviceMetadata) {\n if (!serviceMetadata.hasOwnProperty(memberName)) {\n continue;\n }\n var memberValue = serviceMetadata[memberName];\n // check configs\n switch (memberName) {\n case 'dualstackAvailable':\n customConfigurations.push(CUSTOM_CONFIG_ENUMS.DUALSTACK);\n break;\n }\n }\n\n return customConfigurations;\n};\n\n/**\n * Generates a type or interface based on the shape.\n */\nTSGenerator.prototype.generateTypingsFromShape = function generateTypingsFromShape(shapeKey, shape, tabCount) {\n // some shapes shouldn't be generated if they are javascript primitives\n if (['string', 'boolean', 'number', 'Date', 'Blob'].indexOf(shapeKey) >= 0) {\n return '';\n }\n var self = this;\n var code = '';\n tabCount = tabCount || 0;\n var tabs = this.tabs;\n var type = shape.type;\n if (type === 'structure') {\n code += tabs(tabCount) + 'export interface ' + shapeKey + ' {\\n';\n var members = shape.members;\n // cycle through members\n var memberKeys = Object.keys(members);\n memberKeys.forEach(function(memberKey) {\n // docs\n var member = members[memberKey];\n if (member.documentation) {\n code += self.generateDocString(member.documentation, tabCount + 1);\n }\n var required = self.checkRequired(shape.required || [], memberKey) ? '?' : '';\n code += tabs(tabCount + 1) + memberKey + required + ': ' + member.shape + ';\\n';\n });\n code += tabs(tabCount) + '}\\n';\n } else if (type === 'list') {\n code += tabs(tabCount) + 'export type ' + shapeKey + ' = ' + shape.member.shape + '[];\\n';\n } else if (type === 'map') {\n code += tabs(tabCount) + 'export type ' + shapeKey + ' = {[key: string]: ' + shape.value.shape + '};\\n';\n } else if (type === 'string' || type === 'character') {\n code += tabs(tabCount) + 'export type ' + shapeKey + ' = string;\\n';\n } else if (['double', 'long', 'short', 'biginteger', 'bigdecimal', 'integer', 'float'].indexOf(type) >= 0) {\n code += tabs(tabCount) + 'export type ' + shapeKey + ' = number;\\n';\n } else if (type === 'timestamp') {\n code += tabs(tabCount) + 'export type ' + shapeKey + ' = Date;\\n';\n } else if (type === 'boolean') {\n code += tabs(tabCount) + 'export type ' + shapeKey + ' = boolean;\\n';\n } else if (type === 'blob' || type === 'binary') {\n code += tabs(tabCount) + 'export type ' + shapeKey + ' = Buffer|Uint8Array|Blob|string;\\n';\n }\n return code;\n};\n\n/**\n * Generates a class method type for an operation.\n */\nTSGenerator.prototype.generateTypingsFromOperations = function generateTypingsFromOperations(className, operation, tabCount) {\n var code = '';\n tabCount = tabCount || 0;\n var tabs = this.tabs;\n\n var input = operation.input;\n var output = operation.output;\n var operationName = operation.name.charAt(0).toLowerCase() + operation.name.substring(1);\n\n var inputShape = input ? className + '.' + input.shape : '{}';\n var outputShape = output ? className + '.' + output.shape : '{}';\n\n code += this.generateDocString(operation.documentation, tabCount);\n code += tabs(tabCount) + operationName + '(params: ' + inputShape + ', callback?: (err: AWSError, data: ' + outputShape + ') => void): Request<' + outputShape + ', AWSError>;\\n';\n code += this.generateDocString(operation.documentation, tabCount);\n code += tabs(tabCount) + operationName + '(callback?: (err: AWSError, data: ' + outputShape + ') => void): Request<' + outputShape + ', AWSError>;\\n';\n\n return code;\n};\n\n/**\n * Generates class method types for a waiter.\n */\nTSGenerator.prototype.generateTypingsFromWaiters = function generateTypingsFromWaiters(className, waiterState, waiter, underlyingOperation, tabCount) {", - "repo_full_name": "aws/aws-sdk-js", - "discussion_comments": [ - { - "comment_id": "85250379", - "repo_full_name": "aws/aws-sdk-js", - "pr_number": 1189, - "pr_file": "scripts/lib/ts-generator.js", - "discussion_id": "85250379", - "commented_code": "@@ -0,0 +1,437 @@\n+var fs = require('fs');\n+var path = require('path');\n+\n+var CUSTOM_CONFIG_ENUMS = {\n+ DUALSTACK: {\n+ FILE_NAME: 'config_use_dualstack',\n+ INTERFACE: 'UseDualstackConfigOptions'\n+ }\n+};\n+\n+function TSGenerator(options) {\n+ this._sdkRootDir = options.SdkRootDirectory || process.cwd();\n+ this._apiRootDir = path.join(this._sdkRootDir, 'apis');\n+ this._metadataPath = path.join(this._apiRootDir, 'metadata.json');\n+ this._clientsDir = path.join(this._sdkRootDir, 'clients');\n+ this.metadata = null;\n+ this.typings = {};\n+ this.fillApiModelFileNames(this._apiRootDir);\n+}\n+\n+/**\n+ * Loads the AWS SDK metadata.json file.\n+ */\n+TSGenerator.prototype.loadMetadata = function loadMetadata() {\n+ var metadataFile = fs.readFileSync(this._metadataPath);\n+ this.metadata = JSON.parse(metadataFile);\n+ return this.metadata;\n+};\n+\n+/**\n+ * Modifies metadata to include api model filenames.\n+ */\n+TSGenerator.prototype.fillApiModelFileNames = function fillApiModelFileNames(apisPath) {\n+ var modelPaths = fs.readdirSync(apisPath);\n+ if (!this.metadata) {\n+ this.loadMetadata();\n+ }\n+ var metadata = this.metadata;\n+\n+ // sort paths so latest versions appear first\n+ modelPaths = modelPaths.sort(function sort(a, b) {\n+ if (a < b) {\n+ return 1;\n+ } else if (a > b) {\n+ return -1;\n+ } else {\n+ return 0;\n+ }\n+ });\n+\n+ // Only get latest version of models\n+ var foundModels = Object.create(null);\n+ modelPaths.forEach(function(modelFileName) {\n+ var match = modelFileName.match(/^(.+)(-[\\d]{4}-[\\d]{2}-[\\d]{2})\\.normal\\.json$/i);\n+ if (match) {\n+ var model = match[1];\n+ if (!foundModels[model]) {\n+ foundModels[model] = modelFileName;\n+ }\n+ }\n+ });\n+\n+ // now update the metadata\n+ var keys = Object.keys(metadata);\n+ keys.forEach(function(key) {\n+ var modelName = metadata[key].prefix || key;\n+ metadata[key].api_path = foundModels[modelName];\n+ // find waiters file\n+ var baseName = foundModels[modelName].split('.')[0];\n+ if (modelPaths.indexOf(baseName + '.waiters2.json') >= 0) {\n+ metadata[key].waiters_path = baseName + '.waiters2.json';\n+ }\n+ });\n+};\n+\n+/**\n+ * Generates the file containing DocumentClient interfaces.\n+ */\n+TSGenerator.prototype.generateDocumentClientInterfaces = function generateDocumentClientInterfaces() {\n+ var self = this;\n+ // get the dynamodb model\n+ var dynamodbModel = this.loadServiceApi('dynamodb');\n+ var code = '';\n+ // include node reference\n+ code += '///\\n';\n+ // generate shapes\n+ var modelShapes = dynamodbModel.shapes;\n+ // iterate over each shape\n+ var shapeKeys = Object.keys(modelShapes);\n+ shapeKeys.forEach(function (shapeKey) {\n+ var modelShape = modelShapes[shapeKey];\n+ // ignore exceptions\n+ if (modelShape.exception) {\n+ return;\n+ }\n+ // overwrite AttributeValue\n+ if (shapeKey === 'AttributeValue') {\n+ code += self.generateDocString('A JavaScript object or native type.');\n+ code += 'export type ' + shapeKey + ' = any;\\n';\n+ return;\n+ }\n+ code += self.generateTypingsFromShape(shapeKey, modelShape);\n+ });\n+\n+ // write file:\n+ this.writeTypingsFile('document_client_interfaces', path.join(this._sdkRootDir, 'lib', 'dynamodb'), code);\n+};\n+\n+/**\n+ * Returns a service model based on the serviceIdentifier.\n+ */\n+TSGenerator.prototype.loadServiceApi = function loadServiceApi(serviceIdentifier) {\n+ // first, find the correct identifier\n+ var metadata = this.metadata;\n+ var serviceFilePath = path.join(this._apiRootDir, metadata[serviceIdentifier].api_path);\n+ var serviceModelFile = fs.readFileSync(serviceFilePath);\n+ var serviceModel = JSON.parse(serviceModelFile);\n+ // load waiters file if it exists\n+ var waiterFilePath;\n+ if (metadata[serviceIdentifier].waiters_path) {\n+ waiterFilePath = path.join(this._apiRootDir, metadata[serviceIdentifier].waiters_path);\n+ var waiterModelFile = fs.readFileSync(waiterFilePath);\n+ var waiterModel = JSON.parse(waiterModelFile);\n+ serviceModel.waiters = waiterModel.waiters;\n+ }\n+\n+ return serviceModel;\n+};\n+\n+/**\n+ * Determines if a member is required by checking for it in a list.\n+ */\n+TSGenerator.prototype.checkRequired = function checkRequired(list, member) {\n+ if (list.indexOf(member)) {\n+ return true;\n+ }\n+ return false;\n+};\n+\n+/**\n+ * Generates whitespace based on the count.\n+ */\n+TSGenerator.prototype.tabs = function tabs(count) {\n+ var code = '';\n+ for (var i = 0; i < count; i++) {\n+ code += ' ';\n+ }\n+ return code;\n+};\n+\n+/**\n+ * Transforms documentation string to a more readable format.\n+ */\n+TSGenerator.prototype.transformDocumentation = function transformDocumentation(documentation) {\n+ if (!documentation) {\n+ return '';\n+ }\n+ documentation = documentation.replace(/<(?:.|\\n)*?>/gm, '');\n+ documentation = documentation.replace(/\\*\\//g, '*');\n+ return documentation;\n+};\n+\n+/**\n+ * Returns a doc string based on the supplied documentation.\n+ * Also tabs the doc string if a count is provided.\n+ */\n+TSGenerator.prototype.generateDocString = function generateDocString(documentation, tabCount) {\n+ tabCount = tabCount || 0;\n+ var code = '';\n+ code += this.tabs(tabCount) + '/**\\n';\n+ code += this.tabs(tabCount) + ' * ' + this.transformDocumentation(documentation) + '\\n';\n+ code += this.tabs(tabCount) + ' */\\n';\n+ return code;\n+};\n+\n+/**\n+ * Returns an array of custom configuration options based on a service identiffier.\n+ * Custom configuration options are determined by checking the metadata.json file.\n+ */\n+TSGenerator.prototype.generateCustomConfigFromMetadata = function generateCustomConfigFromMetadata(serviceIdentifier) {\n+ // some services have additional configuration options that are defined in the metadata.json file\n+ // i.e. dualstackAvailable = useDualstack\n+ // create reference to custom options\n+ var customConfigurations = [];\n+ var serviceMetadata = this.metadata[serviceIdentifier];\n+ // loop through metadata members\n+ for (var memberName in serviceMetadata) {\n+ if (!serviceMetadata.hasOwnProperty(memberName)) {\n+ continue;\n+ }\n+ var memberValue = serviceMetadata[memberName];\n+ // check configs\n+ switch (memberName) {\n+ case 'dualstackAvailable':\n+ customConfigurations.push(CUSTOM_CONFIG_ENUMS.DUALSTACK);\n+ break;\n+ }\n+ }\n+\n+ return customConfigurations;\n+};\n+\n+/**\n+ * Generates a type or interface based on the shape.\n+ */\n+TSGenerator.prototype.generateTypingsFromShape = function generateTypingsFromShape(shapeKey, shape, tabCount) {\n+ // some shapes shouldn't be generated if they are javascript primitives\n+ if (['string', 'boolean', 'number', 'Date', 'Blob'].indexOf(shapeKey) >= 0) {\n+ return '';\n+ }\n+ var self = this;\n+ var code = '';\n+ tabCount = tabCount || 0;\n+ var tabs = this.tabs;\n+ var type = shape.type;\n+ if (type === 'structure') {\n+ code += tabs(tabCount) + 'export interface ' + shapeKey + ' {\\n';\n+ var members = shape.members;\n+ // cycle through members\n+ var memberKeys = Object.keys(members);\n+ memberKeys.forEach(function(memberKey) {\n+ // docs\n+ var member = members[memberKey];\n+ if (member.documentation) {\n+ code += self.generateDocString(member.documentation, tabCount + 1);\n+ }\n+ var required = self.checkRequired(shape.required || [], memberKey) ? '?' : '';\n+ code += tabs(tabCount + 1) + memberKey + required + ': ' + member.shape + ';\\n';\n+ });\n+ code += tabs(tabCount) + '}\\n';\n+ } else if (type === 'list') {\n+ code += tabs(tabCount) + 'export type ' + shapeKey + ' = ' + shape.member.shape + '[];\\n';\n+ } else if (type === 'map') {\n+ code += tabs(tabCount) + 'export type ' + shapeKey + ' = {[key: string]: ' + shape.value.shape + '};\\n';\n+ } else if (type === 'string' || type === 'character') {\n+ code += tabs(tabCount) + 'export type ' + shapeKey + ' = string;\\n';\n+ } else if (['double', 'long', 'short', 'biginteger', 'bigdecimal', 'integer', 'float'].indexOf(type) >= 0) {\n+ code += tabs(tabCount) + 'export type ' + shapeKey + ' = number;\\n';\n+ } else if (type === 'timestamp') {\n+ code += tabs(tabCount) + 'export type ' + shapeKey + ' = Date;\\n';\n+ } else if (type === 'boolean') {\n+ code += tabs(tabCount) + 'export type ' + shapeKey + ' = boolean;\\n';\n+ } else if (type === 'blob' || type === 'binary') {\n+ code += tabs(tabCount) + 'export type ' + shapeKey + ' = Buffer|Uint8Array|Blob|string;\\n';\n+ }\n+ return code;\n+};\n+\n+/**\n+ * Generates a class method type for an operation.\n+ */\n+TSGenerator.prototype.generateTypingsFromOperations = function generateTypingsFromOperations(className, operation, tabCount) {\n+ var code = '';\n+ tabCount = tabCount || 0;\n+ var tabs = this.tabs;\n+\n+ var input = operation.input;\n+ var output = operation.output;\n+ var operationName = operation.name.charAt(0).toLowerCase() + operation.name.substring(1);\n+\n+ var inputShape = input ? className + '.' + input.shape : '{}';\n+ var outputShape = output ? className + '.' + output.shape : '{}';\n+\n+ code += this.generateDocString(operation.documentation, tabCount);\n+ code += tabs(tabCount) + operationName + '(params: ' + inputShape + ', callback?: (err: AWSError, data: ' + outputShape + ') => void): Request<' + outputShape + ', AWSError>;\\n';\n+ code += this.generateDocString(operation.documentation, tabCount);\n+ code += tabs(tabCount) + operationName + '(callback?: (err: AWSError, data: ' + outputShape + ') => void): Request<' + outputShape + ', AWSError>;\\n';\n+\n+ return code;\n+};\n+\n+/**\n+ * Generates class method types for a waiter.\n+ */\n+TSGenerator.prototype.generateTypingsFromWaiters = function generateTypingsFromWaiters(className, waiterState, waiter, underlyingOperation, tabCount) {", - "comment_created_at": "2016-10-27T01:01:32+00:00", - "comment_author": "LiuJoyceC", - "comment_body": "`waiterState` needs to have its first letter lowercased. For example, using the state `\"BucketExists\"` will result in an error, but `\"bucketExists\"` is correct.\n", - "pr_file_module": null - }, - { - "comment_id": "85430054", - "repo_full_name": "aws/aws-sdk-js", - "pr_number": 1189, - "pr_file": "scripts/lib/ts-generator.js", - "discussion_id": "85250379", - "commented_code": "@@ -0,0 +1,437 @@\n+var fs = require('fs');\n+var path = require('path');\n+\n+var CUSTOM_CONFIG_ENUMS = {\n+ DUALSTACK: {\n+ FILE_NAME: 'config_use_dualstack',\n+ INTERFACE: 'UseDualstackConfigOptions'\n+ }\n+};\n+\n+function TSGenerator(options) {\n+ this._sdkRootDir = options.SdkRootDirectory || process.cwd();\n+ this._apiRootDir = path.join(this._sdkRootDir, 'apis');\n+ this._metadataPath = path.join(this._apiRootDir, 'metadata.json');\n+ this._clientsDir = path.join(this._sdkRootDir, 'clients');\n+ this.metadata = null;\n+ this.typings = {};\n+ this.fillApiModelFileNames(this._apiRootDir);\n+}\n+\n+/**\n+ * Loads the AWS SDK metadata.json file.\n+ */\n+TSGenerator.prototype.loadMetadata = function loadMetadata() {\n+ var metadataFile = fs.readFileSync(this._metadataPath);\n+ this.metadata = JSON.parse(metadataFile);\n+ return this.metadata;\n+};\n+\n+/**\n+ * Modifies metadata to include api model filenames.\n+ */\n+TSGenerator.prototype.fillApiModelFileNames = function fillApiModelFileNames(apisPath) {\n+ var modelPaths = fs.readdirSync(apisPath);\n+ if (!this.metadata) {\n+ this.loadMetadata();\n+ }\n+ var metadata = this.metadata;\n+\n+ // sort paths so latest versions appear first\n+ modelPaths = modelPaths.sort(function sort(a, b) {\n+ if (a < b) {\n+ return 1;\n+ } else if (a > b) {\n+ return -1;\n+ } else {\n+ return 0;\n+ }\n+ });\n+\n+ // Only get latest version of models\n+ var foundModels = Object.create(null);\n+ modelPaths.forEach(function(modelFileName) {\n+ var match = modelFileName.match(/^(.+)(-[\\d]{4}-[\\d]{2}-[\\d]{2})\\.normal\\.json$/i);\n+ if (match) {\n+ var model = match[1];\n+ if (!foundModels[model]) {\n+ foundModels[model] = modelFileName;\n+ }\n+ }\n+ });\n+\n+ // now update the metadata\n+ var keys = Object.keys(metadata);\n+ keys.forEach(function(key) {\n+ var modelName = metadata[key].prefix || key;\n+ metadata[key].api_path = foundModels[modelName];\n+ // find waiters file\n+ var baseName = foundModels[modelName].split('.')[0];\n+ if (modelPaths.indexOf(baseName + '.waiters2.json') >= 0) {\n+ metadata[key].waiters_path = baseName + '.waiters2.json';\n+ }\n+ });\n+};\n+\n+/**\n+ * Generates the file containing DocumentClient interfaces.\n+ */\n+TSGenerator.prototype.generateDocumentClientInterfaces = function generateDocumentClientInterfaces() {\n+ var self = this;\n+ // get the dynamodb model\n+ var dynamodbModel = this.loadServiceApi('dynamodb');\n+ var code = '';\n+ // include node reference\n+ code += '///\\n';\n+ // generate shapes\n+ var modelShapes = dynamodbModel.shapes;\n+ // iterate over each shape\n+ var shapeKeys = Object.keys(modelShapes);\n+ shapeKeys.forEach(function (shapeKey) {\n+ var modelShape = modelShapes[shapeKey];\n+ // ignore exceptions\n+ if (modelShape.exception) {\n+ return;\n+ }\n+ // overwrite AttributeValue\n+ if (shapeKey === 'AttributeValue') {\n+ code += self.generateDocString('A JavaScript object or native type.');\n+ code += 'export type ' + shapeKey + ' = any;\\n';\n+ return;\n+ }\n+ code += self.generateTypingsFromShape(shapeKey, modelShape);\n+ });\n+\n+ // write file:\n+ this.writeTypingsFile('document_client_interfaces', path.join(this._sdkRootDir, 'lib', 'dynamodb'), code);\n+};\n+\n+/**\n+ * Returns a service model based on the serviceIdentifier.\n+ */\n+TSGenerator.prototype.loadServiceApi = function loadServiceApi(serviceIdentifier) {\n+ // first, find the correct identifier\n+ var metadata = this.metadata;\n+ var serviceFilePath = path.join(this._apiRootDir, metadata[serviceIdentifier].api_path);\n+ var serviceModelFile = fs.readFileSync(serviceFilePath);\n+ var serviceModel = JSON.parse(serviceModelFile);\n+ // load waiters file if it exists\n+ var waiterFilePath;\n+ if (metadata[serviceIdentifier].waiters_path) {\n+ waiterFilePath = path.join(this._apiRootDir, metadata[serviceIdentifier].waiters_path);\n+ var waiterModelFile = fs.readFileSync(waiterFilePath);\n+ var waiterModel = JSON.parse(waiterModelFile);\n+ serviceModel.waiters = waiterModel.waiters;\n+ }\n+\n+ return serviceModel;\n+};\n+\n+/**\n+ * Determines if a member is required by checking for it in a list.\n+ */\n+TSGenerator.prototype.checkRequired = function checkRequired(list, member) {\n+ if (list.indexOf(member)) {\n+ return true;\n+ }\n+ return false;\n+};\n+\n+/**\n+ * Generates whitespace based on the count.\n+ */\n+TSGenerator.prototype.tabs = function tabs(count) {\n+ var code = '';\n+ for (var i = 0; i < count; i++) {\n+ code += ' ';\n+ }\n+ return code;\n+};\n+\n+/**\n+ * Transforms documentation string to a more readable format.\n+ */\n+TSGenerator.prototype.transformDocumentation = function transformDocumentation(documentation) {\n+ if (!documentation) {\n+ return '';\n+ }\n+ documentation = documentation.replace(/<(?:.|\\n)*?>/gm, '');\n+ documentation = documentation.replace(/\\*\\//g, '*');\n+ return documentation;\n+};\n+\n+/**\n+ * Returns a doc string based on the supplied documentation.\n+ * Also tabs the doc string if a count is provided.\n+ */\n+TSGenerator.prototype.generateDocString = function generateDocString(documentation, tabCount) {\n+ tabCount = tabCount || 0;\n+ var code = '';\n+ code += this.tabs(tabCount) + '/**\\n';\n+ code += this.tabs(tabCount) + ' * ' + this.transformDocumentation(documentation) + '\\n';\n+ code += this.tabs(tabCount) + ' */\\n';\n+ return code;\n+};\n+\n+/**\n+ * Returns an array of custom configuration options based on a service identiffier.\n+ * Custom configuration options are determined by checking the metadata.json file.\n+ */\n+TSGenerator.prototype.generateCustomConfigFromMetadata = function generateCustomConfigFromMetadata(serviceIdentifier) {\n+ // some services have additional configuration options that are defined in the metadata.json file\n+ // i.e. dualstackAvailable = useDualstack\n+ // create reference to custom options\n+ var customConfigurations = [];\n+ var serviceMetadata = this.metadata[serviceIdentifier];\n+ // loop through metadata members\n+ for (var memberName in serviceMetadata) {\n+ if (!serviceMetadata.hasOwnProperty(memberName)) {\n+ continue;\n+ }\n+ var memberValue = serviceMetadata[memberName];\n+ // check configs\n+ switch (memberName) {\n+ case 'dualstackAvailable':\n+ customConfigurations.push(CUSTOM_CONFIG_ENUMS.DUALSTACK);\n+ break;\n+ }\n+ }\n+\n+ return customConfigurations;\n+};\n+\n+/**\n+ * Generates a type or interface based on the shape.\n+ */\n+TSGenerator.prototype.generateTypingsFromShape = function generateTypingsFromShape(shapeKey, shape, tabCount) {\n+ // some shapes shouldn't be generated if they are javascript primitives\n+ if (['string', 'boolean', 'number', 'Date', 'Blob'].indexOf(shapeKey) >= 0) {\n+ return '';\n+ }\n+ var self = this;\n+ var code = '';\n+ tabCount = tabCount || 0;\n+ var tabs = this.tabs;\n+ var type = shape.type;\n+ if (type === 'structure') {\n+ code += tabs(tabCount) + 'export interface ' + shapeKey + ' {\\n';\n+ var members = shape.members;\n+ // cycle through members\n+ var memberKeys = Object.keys(members);\n+ memberKeys.forEach(function(memberKey) {\n+ // docs\n+ var member = members[memberKey];\n+ if (member.documentation) {\n+ code += self.generateDocString(member.documentation, tabCount + 1);\n+ }\n+ var required = self.checkRequired(shape.required || [], memberKey) ? '?' : '';\n+ code += tabs(tabCount + 1) + memberKey + required + ': ' + member.shape + ';\\n';\n+ });\n+ code += tabs(tabCount) + '}\\n';\n+ } else if (type === 'list') {\n+ code += tabs(tabCount) + 'export type ' + shapeKey + ' = ' + shape.member.shape + '[];\\n';\n+ } else if (type === 'map') {\n+ code += tabs(tabCount) + 'export type ' + shapeKey + ' = {[key: string]: ' + shape.value.shape + '};\\n';\n+ } else if (type === 'string' || type === 'character') {\n+ code += tabs(tabCount) + 'export type ' + shapeKey + ' = string;\\n';\n+ } else if (['double', 'long', 'short', 'biginteger', 'bigdecimal', 'integer', 'float'].indexOf(type) >= 0) {\n+ code += tabs(tabCount) + 'export type ' + shapeKey + ' = number;\\n';\n+ } else if (type === 'timestamp') {\n+ code += tabs(tabCount) + 'export type ' + shapeKey + ' = Date;\\n';\n+ } else if (type === 'boolean') {\n+ code += tabs(tabCount) + 'export type ' + shapeKey + ' = boolean;\\n';\n+ } else if (type === 'blob' || type === 'binary') {\n+ code += tabs(tabCount) + 'export type ' + shapeKey + ' = Buffer|Uint8Array|Blob|string;\\n';\n+ }\n+ return code;\n+};\n+\n+/**\n+ * Generates a class method type for an operation.\n+ */\n+TSGenerator.prototype.generateTypingsFromOperations = function generateTypingsFromOperations(className, operation, tabCount) {\n+ var code = '';\n+ tabCount = tabCount || 0;\n+ var tabs = this.tabs;\n+\n+ var input = operation.input;\n+ var output = operation.output;\n+ var operationName = operation.name.charAt(0).toLowerCase() + operation.name.substring(1);\n+\n+ var inputShape = input ? className + '.' + input.shape : '{}';\n+ var outputShape = output ? className + '.' + output.shape : '{}';\n+\n+ code += this.generateDocString(operation.documentation, tabCount);\n+ code += tabs(tabCount) + operationName + '(params: ' + inputShape + ', callback?: (err: AWSError, data: ' + outputShape + ') => void): Request<' + outputShape + ', AWSError>;\\n';\n+ code += this.generateDocString(operation.documentation, tabCount);\n+ code += tabs(tabCount) + operationName + '(callback?: (err: AWSError, data: ' + outputShape + ') => void): Request<' + outputShape + ', AWSError>;\\n';\n+\n+ return code;\n+};\n+\n+/**\n+ * Generates class method types for a waiter.\n+ */\n+TSGenerator.prototype.generateTypingsFromWaiters = function generateTypingsFromWaiters(className, waiterState, waiter, underlyingOperation, tabCount) {", - "comment_created_at": "2016-10-27T21:19:43+00:00", - "comment_author": "chrisradek", - "comment_body": "Good catch!\n", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "138404158", - "pr_number": 1716, - "pr_file": "lib/service.js", - "created_at": "2017-09-12T16:43:11+00:00", - "commented_code": "/**\n * @api private\n */\n getServiceClock: function getServiceClock() {", - "repo_full_name": "aws/aws-sdk-js", - "discussion_comments": [ - { - "comment_id": "138404158", - "repo_full_name": "aws/aws-sdk-js", - "pr_number": 1716, - "pr_file": "lib/service.js", - "discussion_id": "138404158", - "commented_code": "@@ -403,6 +403,31 @@ AWS.Service = inherit({\n /**\n * @api private\n */\n+ getServiceClock: function getServiceClock() {", - "comment_created_at": "2017-09-12T16:43:11+00:00", - "comment_author": "jeskew", - "comment_body": "I'm not sure this method name reflects what the function does. How about 'getSkewCorrectedDate' or just 'getDate'?", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "193597828", - "pr_number": 2074, - "pr_file": "lib/event-stream/format-message.js", - "created_at": "2018-06-07T00:29:04+00:00", - "commented_code": "var crypto = require('../../lib/util').crypto;\nvar Int64 = require('./int64').Int64;\nvar toBuffer = require('./to-buffer').toBuffer;\nvar allocBuffer = require('./alloc-buffer').allocBuffer;\n\nfunction formatMessage(message) {", - "repo_full_name": "aws/aws-sdk-js", - "discussion_comments": [ - { - "comment_id": "193597828", - "repo_full_name": "aws/aws-sdk-js", - "pr_number": 2074, - "pr_file": "lib/event-stream/format-message.js", - "discussion_id": "193597828", - "commented_code": "@@ -0,0 +1,166 @@\n+var crypto = require('../../lib/util').crypto;\n+var Int64 = require('./int64').Int64;\n+var toBuffer = require('./to-buffer').toBuffer;\n+var allocBuffer = require('./alloc-buffer').allocBuffer;\n+\n+function formatMessage(message) {", - "comment_created_at": "2018-06-07T00:29:04+00:00", - "comment_author": "AllanZhengYP", - "comment_body": "This name is a bit confusing to me. `buildMessage` is probably a better name. Same for the `builderHeaders`", - "pr_file_module": null - }, - { - "comment_id": "193619601", - "repo_full_name": "aws/aws-sdk-js", - "pr_number": 2074, - "pr_file": "lib/event-stream/format-message.js", - "discussion_id": "193597828", - "commented_code": "@@ -0,0 +1,166 @@\n+var crypto = require('../../lib/util').crypto;\n+var Int64 = require('./int64').Int64;\n+var toBuffer = require('./to-buffer').toBuffer;\n+var allocBuffer = require('./alloc-buffer').allocBuffer;\n+\n+function formatMessage(message) {", - "comment_created_at": "2018-06-07T03:55:00+00:00", - "comment_author": "chrisradek", - "comment_body": "Sure, I don't have a strong preference either way.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "227528680", - "pr_number": 2253, - "pr_file": "lib/discover_endpoint.js", - "created_at": "2018-10-23T19:15:16+00:00", - "commented_code": "var AWS = require('./core');\nvar util = require('./util');\nvar iniLoader = require('./shared-ini').iniLoader;\nvar ENDPOINT_OPERATION_MAX_RETRIES = 60;\nvar endpointDiscoveryEnabledEnv = 'AWS_ENABLE_ENDPOINT_DISCOVERY';\n\n/**\n * Generate key to index the endpoints in the cache\n * @return [map] object with keys to index endpoints.\n * @api private\n */\nfunction getCacheKey(request) {\n var service = request.service;\n var api = service.api || {};\n var operations = api.operations;\n var identifiers = {};\n if (operations[request.operation] && operations[request.operation].name) {\n identifiers.operation = operations[request.operation].name;\n }\n if (service.config.region) {\n identifiers.region = service.config.region;\n }\n if (api.serviceId) {\n identifiers.serviceId = api.serviceId\n }\n if (service.config.credentials.accessKeyId) {\n identifiers.accessKeyId = service.config.credentials.accessKeyId\n }\n return identifiers;\n}\n\n/**\n * @api private\n */\nfunction marshallCustomeIdentifiersHelper(result, params, shape) {\n if (!shape || params === undefined || params === null) return;\n if (shape.type === 'structure' && shape.required && shape.required.length > 0) {\n util.arrayEach(shape.required, function(name) {\n var memberShape = shape.members[name];\n if (memberShape.endpointDiscoveryId === true) {\n var locationName = memberShape.isLocationName ? memberShape.name : name;\n result[locationName] = String(params[name]);\n } else {\n marshallCustomeIdentifiersHelper(result, params[name], memberShape);\n }\n });\n }\n}\n\n/**\n * Get customized cache key according to the 'endpointDiscoveryId' trait.\n * @param [object] request object\n * @param [object] input shape of the given operation's api\n */\nfunction marshallCustomeIdentifiers(request, shape) {\n var identifiers = {};\n marshallCustomeIdentifiersHelper(identifiers, request.params, shape);\n return identifiers;\n}\n\n/**\n * Call endpoint discovery operation when it's optional.\n * When endpoint is available in cache then use the cached endpoints. If endpoints\n * are unavailable then use regional endpoints and call endpoint discovery operation\n * asynchronously. This is turned off by default.\n * @param [object] request object\n * @api private\n */\nfunction optionalDisverEndpoint(request) {", - "repo_full_name": "aws/aws-sdk-js", - "discussion_comments": [ - { - "comment_id": "227528680", - "repo_full_name": "aws/aws-sdk-js", - "pr_number": 2253, - "pr_file": "lib/discover_endpoint.js", - "discussion_id": "227528680", - "commented_code": "@@ -0,0 +1,305 @@\n+var AWS = require('./core');\n+var util = require('./util');\n+var iniLoader = require('./shared-ini').iniLoader;\n+var ENDPOINT_OPERATION_MAX_RETRIES = 60;\n+var endpointDiscoveryEnabledEnv = 'AWS_ENABLE_ENDPOINT_DISCOVERY';\n+\n+/**\n+ * Generate key to index the endpoints in the cache\n+ * @return [map] object with keys to index endpoints.\n+ * @api private\n+ */\n+function getCacheKey(request) {\n+ var service = request.service;\n+ var api = service.api || {};\n+ var operations = api.operations;\n+ var identifiers = {};\n+ if (operations[request.operation] && operations[request.operation].name) {\n+ identifiers.operation = operations[request.operation].name;\n+ }\n+ if (service.config.region) {\n+ identifiers.region = service.config.region;\n+ }\n+ if (api.serviceId) {\n+ identifiers.serviceId = api.serviceId\n+ }\n+ if (service.config.credentials.accessKeyId) {\n+ identifiers.accessKeyId = service.config.credentials.accessKeyId\n+ }\n+ return identifiers;\n+}\n+\n+/**\n+ * @api private\n+ */\n+function marshallCustomeIdentifiersHelper(result, params, shape) {\n+ if (!shape || params === undefined || params === null) return;\n+ if (shape.type === 'structure' && shape.required && shape.required.length > 0) {\n+ util.arrayEach(shape.required, function(name) {\n+ var memberShape = shape.members[name];\n+ if (memberShape.endpointDiscoveryId === true) {\n+ var locationName = memberShape.isLocationName ? memberShape.name : name;\n+ result[locationName] = String(params[name]);\n+ } else {\n+ marshallCustomeIdentifiersHelper(result, params[name], memberShape);\n+ }\n+ });\n+ }\n+}\n+\n+/**\n+ * Get customized cache key according to the 'endpointDiscoveryId' trait.\n+ * @param [object] request object\n+ * @param [object] input shape of the given operation's api\n+ */\n+function marshallCustomeIdentifiers(request, shape) {\n+ var identifiers = {};\n+ marshallCustomeIdentifiersHelper(identifiers, request.params, shape);\n+ return identifiers;\n+}\n+\n+/**\n+ * Call endpoint discovery operation when it's optional.\n+ * When endpoint is available in cache then use the cached endpoints. If endpoints\n+ * are unavailable then use regional endpoints and call endpoint discovery operation\n+ * asynchronously. This is turned off by default.\n+ * @param [object] request object\n+ * @api private\n+ */\n+function optionalDisverEndpoint(request) {", - "comment_created_at": "2018-10-23T19:15:16+00:00", - "comment_author": "chrisradek", - "comment_body": "Can you spell out `optionalDiscoveryVersionEndpoint` (assuming that's what Disver means). `Disver` isn't a common abbreviation and coming back to this in the future it may be confusing what this means.", - "pr_file_module": null - }, - { - "comment_id": "228002596", - "repo_full_name": "aws/aws-sdk-js", - "pr_number": 2253, - "pr_file": "lib/discover_endpoint.js", - "discussion_id": "227528680", - "commented_code": "@@ -0,0 +1,305 @@\n+var AWS = require('./core');\n+var util = require('./util');\n+var iniLoader = require('./shared-ini').iniLoader;\n+var ENDPOINT_OPERATION_MAX_RETRIES = 60;\n+var endpointDiscoveryEnabledEnv = 'AWS_ENABLE_ENDPOINT_DISCOVERY';\n+\n+/**\n+ * Generate key to index the endpoints in the cache\n+ * @return [map] object with keys to index endpoints.\n+ * @api private\n+ */\n+function getCacheKey(request) {\n+ var service = request.service;\n+ var api = service.api || {};\n+ var operations = api.operations;\n+ var identifiers = {};\n+ if (operations[request.operation] && operations[request.operation].name) {\n+ identifiers.operation = operations[request.operation].name;\n+ }\n+ if (service.config.region) {\n+ identifiers.region = service.config.region;\n+ }\n+ if (api.serviceId) {\n+ identifiers.serviceId = api.serviceId\n+ }\n+ if (service.config.credentials.accessKeyId) {\n+ identifiers.accessKeyId = service.config.credentials.accessKeyId\n+ }\n+ return identifiers;\n+}\n+\n+/**\n+ * @api private\n+ */\n+function marshallCustomeIdentifiersHelper(result, params, shape) {\n+ if (!shape || params === undefined || params === null) return;\n+ if (shape.type === 'structure' && shape.required && shape.required.length > 0) {\n+ util.arrayEach(shape.required, function(name) {\n+ var memberShape = shape.members[name];\n+ if (memberShape.endpointDiscoveryId === true) {\n+ var locationName = memberShape.isLocationName ? memberShape.name : name;\n+ result[locationName] = String(params[name]);\n+ } else {\n+ marshallCustomeIdentifiersHelper(result, params[name], memberShape);\n+ }\n+ });\n+ }\n+}\n+\n+/**\n+ * Get customized cache key according to the 'endpointDiscoveryId' trait.\n+ * @param [object] request object\n+ * @param [object] input shape of the given operation's api\n+ */\n+function marshallCustomeIdentifiers(request, shape) {\n+ var identifiers = {};\n+ marshallCustomeIdentifiersHelper(identifiers, request.params, shape);\n+ return identifiers;\n+}\n+\n+/**\n+ * Call endpoint discovery operation when it's optional.\n+ * When endpoint is available in cache then use the cached endpoints. If endpoints\n+ * are unavailable then use regional endpoints and call endpoint discovery operation\n+ * asynchronously. This is turned off by default.\n+ * @param [object] request object\n+ * @api private\n+ */\n+function optionalDisverEndpoint(request) {", - "comment_created_at": "2018-10-25T00:27:24+00:00", - "comment_author": "AllanZhengYP", - "comment_body": "It is a typo. And my auto-completion respects my typo of this name elsewhere. Thank you for pointing this out", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/aws-sdk-js-semantic-type-organization.json b/_reviewers/aws-sdk-js-semantic-type-organization.json new file mode 100644 index 0000000..43d591a --- /dev/null +++ b/_reviewers/aws-sdk-js-semantic-type-organization.json @@ -0,0 +1,46 @@ +[ + { + "discussion_id": "102779549", + "pr_number": 1370, + "pr_file": "ts/dynamodb.ts", + "created_at": "2017-02-23T18:14:29+00:00", + "commented_code": "}\n};", + "repo_full_name": "aws/aws-sdk-js", + "discussion_comments": [ + { + "comment_id": "102779549", + "repo_full_name": "aws/aws-sdk-js", + "pr_number": 1370, + "pr_file": "ts/dynamodb.ts", + "discussion_id": "102779549", + "commented_code": "@@ -9,6 +9,18 @@ const params: DynamoDB.DocumentClient.GetItemInput = {\n }\n };\n ", + "comment_created_at": "2017-02-23T18:14:29+00:00", + "comment_author": "chrisradek", + "comment_body": "Can you add tests to make sure we can explicitly set a variable to the `DynamoDB.Converter` type? Also, would be nice to verify we can access `DocumentClient.ConverterOptions`.\r\n\r\nCan be simple, like:\r\n```javascript\r\nconst converter: Converter = DynamoDB.Converter;\r\n// and a test for input with converter options\r\nconst converterOptions: DynamoDB.DocumentClient.ConverterOptions = {convertEmptyValues: true};\r\nDynamoDB.Converter.input('string', converterOptions);\r\n```\r\n\r\nHow hard would it be to also expose ConverterOptions on the Converter namespace? Just feels a little odd having to access it off the DocumentClient namespace instead.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "130689743", + "pr_number": 1655, + "pr_file": "lib/credentials/credential_provider_chain.d.ts", + "created_at": "2017-08-01T18:29:43+00:00", + "commented_code": "*/\n resolve(callback:(err: AWSError, credentials: Credentials) => void): CredentialProviderChain;\n /**\n * Return a Promise on resolve() function\n */\n resolvePromise(resolve?:(credentials: Credentials) => void, reject?:(err: AWSError) => void): Promise;", + "repo_full_name": "aws/aws-sdk-js", + "discussion_comments": [ + { + "comment_id": "130689743", + "repo_full_name": "aws/aws-sdk-js", + "pr_number": 1655, + "pr_file": "lib/credentials/credential_provider_chain.d.ts", + "discussion_id": "130689743", + "commented_code": "@@ -10,6 +10,10 @@ export class CredentialProviderChain extends Credentials {\n */\n resolve(callback:(err: AWSError, credentials: Credentials) => void): CredentialProviderChain;\n /**\n+ * Return a Promise on resolve() function\n+ */\n+ resolvePromise(resolve?:(credentials: Credentials) => void, reject?:(err: AWSError) => void): Promise;", + "comment_created_at": "2017-08-01T18:29:43+00:00", + "comment_author": "jeskew", + "comment_body": "This isn't quite right. The return value should use `Credentials` instead of `any`, and the method itself doesn't take any callbacks.", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/aws-sdk-js-semantic-type-organization.md b/_reviewers/aws-sdk-js-semantic-type-organization.md index 9280dda..207913c 100644 --- a/_reviewers/aws-sdk-js-semantic-type-organization.md +++ b/_reviewers/aws-sdk-js-semantic-type-organization.md @@ -23,51 +23,3 @@ const converter: any = DynamoDB.Converter; const options: DynamoDB.DocumentClient.ConverterOptions = { /* ... */ }; // accessing through DocumentClient when logically belongs to Converter function resolvePromise(): Promise { /* ... */ } ``` - - -[ - { - "discussion_id": "102779549", - "pr_number": 1370, - "pr_file": "ts/dynamodb.ts", - "created_at": "2017-02-23T18:14:29+00:00", - "commented_code": "}\n};", - "repo_full_name": "aws/aws-sdk-js", - "discussion_comments": [ - { - "comment_id": "102779549", - "repo_full_name": "aws/aws-sdk-js", - "pr_number": 1370, - "pr_file": "ts/dynamodb.ts", - "discussion_id": "102779549", - "commented_code": "@@ -9,6 +9,18 @@ const params: DynamoDB.DocumentClient.GetItemInput = {\n }\n };\n ", - "comment_created_at": "2017-02-23T18:14:29+00:00", - "comment_author": "chrisradek", - "comment_body": "Can you add tests to make sure we can explicitly set a variable to the `DynamoDB.Converter` type? Also, would be nice to verify we can access `DocumentClient.ConverterOptions`.\r\n\r\nCan be simple, like:\r\n```javascript\r\nconst converter: Converter = DynamoDB.Converter;\r\n// and a test for input with converter options\r\nconst converterOptions: DynamoDB.DocumentClient.ConverterOptions = {convertEmptyValues: true};\r\nDynamoDB.Converter.input('string', converterOptions);\r\n```\r\n\r\nHow hard would it be to also expose ConverterOptions on the Converter namespace? Just feels a little odd having to access it off the DocumentClient namespace instead.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "130689743", - "pr_number": 1655, - "pr_file": "lib/credentials/credential_provider_chain.d.ts", - "created_at": "2017-08-01T18:29:43+00:00", - "commented_code": "*/\n resolve(callback:(err: AWSError, credentials: Credentials) => void): CredentialProviderChain;\n /**\n * Return a Promise on resolve() function\n */\n resolvePromise(resolve?:(credentials: Credentials) => void, reject?:(err: AWSError) => void): Promise;", - "repo_full_name": "aws/aws-sdk-js", - "discussion_comments": [ - { - "comment_id": "130689743", - "repo_full_name": "aws/aws-sdk-js", - "pr_number": 1655, - "pr_file": "lib/credentials/credential_provider_chain.d.ts", - "discussion_id": "130689743", - "commented_code": "@@ -10,6 +10,10 @@ export class CredentialProviderChain extends Credentials {\n */\n resolve(callback:(err: AWSError, credentials: Credentials) => void): CredentialProviderChain;\n /**\n+ * Return a Promise on resolve() function\n+ */\n+ resolvePromise(resolve?:(credentials: Credentials) => void, reject?:(err: AWSError) => void): Promise;", - "comment_created_at": "2017-08-01T18:29:43+00:00", - "comment_author": "jeskew", - "comment_body": "This isn't quite right. The return value should use `Credentials` instead of `any`, and the method itself doesn't take any callbacks.", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/aws-sdk-js-standardize-api-promise-patterns.json b/_reviewers/aws-sdk-js-standardize-api-promise-patterns.json new file mode 100644 index 0000000..0a5412b --- /dev/null +++ b/_reviewers/aws-sdk-js-standardize-api-promise-patterns.json @@ -0,0 +1,106 @@ +[ + { + "discussion_id": "82888371", + "pr_number": 1079, + "pr_file": "lib/util.js", + "created_at": "2016-10-11T21:03:18+00:00", + "commented_code": "/**\n * @api private\n */\n addPromisesToRequests: function addPromisesToRequests(constructor, PromiseDependency) {\n PromiseDependency = PromiseDependency || null;\n if (!PromiseDependency && typeof Promise !== 'undefined') {\n PromiseDependency = Promise;\n }\n if (typeof PromiseDependency !== 'function') {\n delete constructor.prototype.promise;\n return;\n }\n constructor.prototype.promise = function promise() {\n var self = this;\n return new PromiseDependency(function(resolve, reject) {\n self.on('complete', function(resp) {\n if (resp.error) {\n reject(resp.error);\n } else {\n resolve(resp.data);\n }\n addPromises: function addPromises(constructors, PromiseDependency) {\n if (!AWS) AWS = require('./core');\n if (!AWS.config) require('./config');\n if (PromiseDependency === undefined) PromiseDependency = AWS.config.getPromisesDependency();\n if (typeof PromiseDependency !== 'function') var deletePromise = true;\n if (!Array.isArray(constructors)) constructors = [constructors];\n\n var promisifyMethod = function(methodName) {\n return function promise() {\n var self = this;\n return new PromiseDependency(function(resolve, reject) {\n self[methodName](function(err, data) {\n if (err) {\n reject(err);\n } else {\n resolve(data);\n }\n });\n });\n self.runTo();\n });\n };\n };\n\n for (var ind = 0; ind < constructors.length; ind++) {\n var constructor = constructors[ind];\n switch (constructor.name) {", + "repo_full_name": "aws/aws-sdk-js", + "discussion_comments": [ + { + "comment_id": "82888371", + "repo_full_name": "aws/aws-sdk-js", + "pr_number": 1079, + "pr_file": "lib/util.js", + "discussion_id": "82888371", + "commented_code": "@@ -748,27 +748,74 @@ var util = {\n /**\n * @api private\n */\n- addPromisesToRequests: function addPromisesToRequests(constructor, PromiseDependency) {\n- PromiseDependency = PromiseDependency || null;\n- if (!PromiseDependency && typeof Promise !== 'undefined') {\n- PromiseDependency = Promise;\n- }\n- if (typeof PromiseDependency !== 'function') {\n- delete constructor.prototype.promise;\n- return;\n- }\n- constructor.prototype.promise = function promise() {\n- var self = this;\n- return new PromiseDependency(function(resolve, reject) {\n- self.on('complete', function(resp) {\n- if (resp.error) {\n- reject(resp.error);\n- } else {\n- resolve(resp.data);\n- }\n+ addPromises: function addPromises(constructors, PromiseDependency) {\n+ if (!AWS) AWS = require('./core');\n+ if (!AWS.config) require('./config');\n+ if (PromiseDependency === undefined) PromiseDependency = AWS.config.getPromisesDependency();\n+ if (typeof PromiseDependency !== 'function') var deletePromise = true;\n+ if (!Array.isArray(constructors)) constructors = [constructors];\n+\n+ var promisifyMethod = function(methodName) {\n+ return function promise() {\n+ var self = this;\n+ return new PromiseDependency(function(resolve, reject) {\n+ self[methodName](function(err, data) {\n+ if (err) {\n+ reject(err);\n+ } else {\n+ resolve(data);\n+ }\n+ });\n });\n- self.runTo();\n- });\n+ };\n+ };\n+\n+ for (var ind = 0; ind < constructors.length; ind++) {\n+ var constructor = constructors[ind];\n+ switch (constructor.name) {", + "comment_created_at": "2016-10-11T21:03:18+00:00", + "comment_author": "chrisradek", + "comment_body": "It looks like `constructor.name` may not work in all the browsers we support and could have issues when minifiers are used:\nhttps://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Function/name#Browser_compatibility\n\nCan you do an equality check against the constructor instead?\n", + "pr_file_module": null + }, + { + "comment_id": "82890974", + "repo_full_name": "aws/aws-sdk-js", + "pr_number": 1079, + "pr_file": "lib/util.js", + "discussion_id": "82888371", + "commented_code": "@@ -748,27 +748,74 @@ var util = {\n /**\n * @api private\n */\n- addPromisesToRequests: function addPromisesToRequests(constructor, PromiseDependency) {\n- PromiseDependency = PromiseDependency || null;\n- if (!PromiseDependency && typeof Promise !== 'undefined') {\n- PromiseDependency = Promise;\n- }\n- if (typeof PromiseDependency !== 'function') {\n- delete constructor.prototype.promise;\n- return;\n- }\n- constructor.prototype.promise = function promise() {\n- var self = this;\n- return new PromiseDependency(function(resolve, reject) {\n- self.on('complete', function(resp) {\n- if (resp.error) {\n- reject(resp.error);\n- } else {\n- resolve(resp.data);\n- }\n+ addPromises: function addPromises(constructors, PromiseDependency) {\n+ if (!AWS) AWS = require('./core');\n+ if (!AWS.config) require('./config');\n+ if (PromiseDependency === undefined) PromiseDependency = AWS.config.getPromisesDependency();\n+ if (typeof PromiseDependency !== 'function') var deletePromise = true;\n+ if (!Array.isArray(constructors)) constructors = [constructors];\n+\n+ var promisifyMethod = function(methodName) {\n+ return function promise() {\n+ var self = this;\n+ return new PromiseDependency(function(resolve, reject) {\n+ self[methodName](function(err, data) {\n+ if (err) {\n+ reject(err);\n+ } else {\n+ resolve(data);\n+ }\n+ });\n });\n- self.runTo();\n- });\n+ };\n+ };\n+\n+ for (var ind = 0; ind < constructors.length; ind++) {\n+ var constructor = constructors[ind];\n+ switch (constructor.name) {", + "comment_created_at": "2016-10-11T21:16:42+00:00", + "comment_author": "chrisradek", + "comment_body": "Actually, what do you think about giving each class that should return a promise a static method that accepts a Promise constructor, then the class can control how it should promisify itself?\n\nThe pros to that approach would be the logic for adding promises would be controlled by each class, rather than defined in a long if/switch statement within a utility function. If the method to promisify a class was named the same for all classes, you can just check if the constructor has that method then call it, instead of maintaining a list of enums to check a class against. You could still make the `promisifyMethod` a utility method if that reduces code duplication.\n", + "pr_file_module": null + }, + { + "comment_id": "83119747", + "repo_full_name": "aws/aws-sdk-js", + "pr_number": 1079, + "pr_file": "lib/util.js", + "discussion_id": "82888371", + "commented_code": "@@ -748,27 +748,74 @@ var util = {\n /**\n * @api private\n */\n- addPromisesToRequests: function addPromisesToRequests(constructor, PromiseDependency) {\n- PromiseDependency = PromiseDependency || null;\n- if (!PromiseDependency && typeof Promise !== 'undefined') {\n- PromiseDependency = Promise;\n- }\n- if (typeof PromiseDependency !== 'function') {\n- delete constructor.prototype.promise;\n- return;\n- }\n- constructor.prototype.promise = function promise() {\n- var self = this;\n- return new PromiseDependency(function(resolve, reject) {\n- self.on('complete', function(resp) {\n- if (resp.error) {\n- reject(resp.error);\n- } else {\n- resolve(resp.data);\n- }\n+ addPromises: function addPromises(constructors, PromiseDependency) {\n+ if (!AWS) AWS = require('./core');\n+ if (!AWS.config) require('./config');\n+ if (PromiseDependency === undefined) PromiseDependency = AWS.config.getPromisesDependency();\n+ if (typeof PromiseDependency !== 'function') var deletePromise = true;\n+ if (!Array.isArray(constructors)) constructors = [constructors];\n+\n+ var promisifyMethod = function(methodName) {\n+ return function promise() {\n+ var self = this;\n+ return new PromiseDependency(function(resolve, reject) {\n+ self[methodName](function(err, data) {\n+ if (err) {\n+ reject(err);\n+ } else {\n+ resolve(data);\n+ }\n+ });\n });\n- self.runTo();\n- });\n+ };\n+ };\n+\n+ for (var ind = 0; ind < constructors.length; ind++) {\n+ var constructor = constructors[ind];\n+ switch (constructor.name) {", + "comment_created_at": "2016-10-12T23:29:17+00:00", + "comment_author": "LiuJoyceC", + "comment_body": "The reason I originally checked `constructor.name` is because I hadn't originally planned on requiring `core.js`, so I didn't have access to the actual constructors for an equality check before I added that require statement. I may be refactoring the require out anyway (as discussed above). I'll look into adding the static method for each class.\n", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "138417207", + "pr_number": 1711, + "pr_file": "lib/services/s3.js", + "created_at": "2017-09-12T17:36:01+00:00", + "commented_code": "return uploader;\n }\n});\n\n\n/**\n * @api private\n */\nAWS.S3.addPromisesToClass = function addPromisesToClass(PromiseDependency) {\n this.prototype.getSignedUrlPromise = AWS.util.promisifyMethod('getSignedUrl', PromiseDependency);", + "repo_full_name": "aws/aws-sdk-js", + "discussion_comments": [ + { + "comment_id": "138417207", + "repo_full_name": "aws/aws-sdk-js", + "pr_number": 1711, + "pr_file": "lib/services/s3.js", + "discussion_id": "138417207", + "commented_code": "@@ -1068,3 +1114,20 @@ AWS.util.update(AWS.S3.prototype, {\n return uploader;\n }\n });\n+\n+\n+/**\n+ * @api private\n+ */\n+AWS.S3.addPromisesToClass = function addPromisesToClass(PromiseDependency) {\n+ this.prototype.getSignedUrlPromise = AWS.util.promisifyMethod('getSignedUrl', PromiseDependency);", + "comment_created_at": "2017-09-12T17:36:01+00:00", + "comment_author": "chrisradek", + "comment_body": "The `AWS.util.promisifyMethod` function currently only works for functions that accept a callback function as the first parameter. For example, `AWS.Request.send` and `AWS.Credentials.get` both accept just a callback.\r\n\r\nThe `s3.getSignedUrl` method accepts an operation name, params, and a callback, so using this method won't work. You should be able to set `getSignedUrlPromise` to a function that returns a new Promise. This promise can simply wrap the getSignedUrl function.", + "pr_file_module": null + }, + { + "comment_id": "138644601", + "repo_full_name": "aws/aws-sdk-js", + "pr_number": 1711, + "pr_file": "lib/services/s3.js", + "discussion_id": "138417207", + "commented_code": "@@ -1068,3 +1114,20 @@ AWS.util.update(AWS.S3.prototype, {\n return uploader;\n }\n });\n+\n+\n+/**\n+ * @api private\n+ */\n+AWS.S3.addPromisesToClass = function addPromisesToClass(PromiseDependency) {\n+ this.prototype.getSignedUrlPromise = AWS.util.promisifyMethod('getSignedUrl', PromiseDependency);", + "comment_created_at": "2017-09-13T14:55:49+00:00", + "comment_author": "IsaiahJTurner", + "comment_body": "Happy to do that but that solution could lead replicated code down the line. For example, if/when this functionality is added to `AWS.CloudFront.Signer.getSignedUrl`, we'd need to wrap it with the same logic.\r\n\r\nWhat do you think about modifying `AWS.util.promisifyMethod` so that the generated promise function accepts arguments? \r\n\r\nSomething like this:\r\n```js\r\nfunction promisifyMethod(methodName, PromiseDependency) {\r\n return function promise() {\r\n var self = this;\r\n var args = Array.prototype.slice.call(arguments);\r\n return new PromiseDependency(function(resolve, reject) {\r\n args.push(function(err, data) {\r\n if (err) {\r\n reject(err);\r\n } else {\r\n resolve(data);\r\n }\r\n });\r\n self[methodName].apply(null, args);\r\n });\r\n };\r\n}\r\n```\r\nMy solution does lead to one of two downsides:\r\n- Supplying too many arguments will give unexpected behavior like a \"callback is not a function\" error.\r\n- To prevent that with a more informative error like \"Too many arguments supplied\" I'd need to read `self[methodName].length` to get the expected arguments count but this would be incompatible with any functions that support the arguments style `Array.prototype.push([element1[, ...[, elementN]]])` since `Array.prototype.push.length` would only return 1. I'm not sure if any functions in the SDK work like this.\r\n\r\nRegardless of which downside chosen, I still think improving `AWS.util.promisifyMethod` rather than wrapping the function directly is the best option since it is the most extensible. Also, how `AWS.util.promisifyMethod` currently works in contrast with how other `promisify` functions within other JS libraries like Bluebird, es6-promisify, and promisify-node work. ", + "pr_file_module": null + }, + { + "comment_id": "138655481", + "repo_full_name": "aws/aws-sdk-js", + "pr_number": 1711, + "pr_file": "lib/services/s3.js", + "discussion_id": "138417207", + "commented_code": "@@ -1068,3 +1114,20 @@ AWS.util.update(AWS.S3.prototype, {\n return uploader;\n }\n });\n+\n+\n+/**\n+ * @api private\n+ */\n+AWS.S3.addPromisesToClass = function addPromisesToClass(PromiseDependency) {\n+ this.prototype.getSignedUrlPromise = AWS.util.promisifyMethod('getSignedUrl', PromiseDependency);", + "comment_created_at": "2017-09-13T15:30:28+00:00", + "comment_author": "IsaiahJTurner", + "comment_body": "Went ahead and pushed my concept. Let me know what you think, can easily change it back and use your idea.", + "pr_file_module": null + }, + { + "comment_id": "317850908", + "repo_full_name": "aws/aws-sdk-js", + "pr_number": 1711, + "pr_file": "lib/services/s3.js", + "discussion_id": "138417207", + "commented_code": "@@ -1068,3 +1114,20 @@ AWS.util.update(AWS.S3.prototype, {\n return uploader;\n }\n });\n+\n+\n+/**\n+ * @api private\n+ */\n+AWS.S3.addPromisesToClass = function addPromisesToClass(PromiseDependency) {\n+ this.prototype.getSignedUrlPromise = AWS.util.promisifyMethod('getSignedUrl', PromiseDependency);", + "comment_created_at": "2019-08-27T00:32:33+00:00", + "comment_author": "AllanZhengYP", + "comment_body": "Confirmed that the [bluebird](http://bluebirdjs.com/docs/api/promise.promisify.html) also use the last argument as the callback function. The idea of appending the promise-determining callback function to the `arguments` looks good to me.", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/aws-sdk-js-standardize-api-promise-patterns.md b/_reviewers/aws-sdk-js-standardize-api-promise-patterns.md index e826339..462cca1 100644 --- a/_reviewers/aws-sdk-js-standardize-api-promise-patterns.md +++ b/_reviewers/aws-sdk-js-standardize-api-promise-patterns.md @@ -38,111 +38,3 @@ This pattern ensures: - Support for both simple callback methods and multi-parameter methods - Proper error handling and promise rejection - Compatibility with different promise implementations - - -[ - { - "discussion_id": "82888371", - "pr_number": 1079, - "pr_file": "lib/util.js", - "created_at": "2016-10-11T21:03:18+00:00", - "commented_code": "/**\n * @api private\n */\n addPromisesToRequests: function addPromisesToRequests(constructor, PromiseDependency) {\n PromiseDependency = PromiseDependency || null;\n if (!PromiseDependency && typeof Promise !== 'undefined') {\n PromiseDependency = Promise;\n }\n if (typeof PromiseDependency !== 'function') {\n delete constructor.prototype.promise;\n return;\n }\n constructor.prototype.promise = function promise() {\n var self = this;\n return new PromiseDependency(function(resolve, reject) {\n self.on('complete', function(resp) {\n if (resp.error) {\n reject(resp.error);\n } else {\n resolve(resp.data);\n }\n addPromises: function addPromises(constructors, PromiseDependency) {\n if (!AWS) AWS = require('./core');\n if (!AWS.config) require('./config');\n if (PromiseDependency === undefined) PromiseDependency = AWS.config.getPromisesDependency();\n if (typeof PromiseDependency !== 'function') var deletePromise = true;\n if (!Array.isArray(constructors)) constructors = [constructors];\n\n var promisifyMethod = function(methodName) {\n return function promise() {\n var self = this;\n return new PromiseDependency(function(resolve, reject) {\n self[methodName](function(err, data) {\n if (err) {\n reject(err);\n } else {\n resolve(data);\n }\n });\n });\n self.runTo();\n });\n };\n };\n\n for (var ind = 0; ind < constructors.length; ind++) {\n var constructor = constructors[ind];\n switch (constructor.name) {", - "repo_full_name": "aws/aws-sdk-js", - "discussion_comments": [ - { - "comment_id": "82888371", - "repo_full_name": "aws/aws-sdk-js", - "pr_number": 1079, - "pr_file": "lib/util.js", - "discussion_id": "82888371", - "commented_code": "@@ -748,27 +748,74 @@ var util = {\n /**\n * @api private\n */\n- addPromisesToRequests: function addPromisesToRequests(constructor, PromiseDependency) {\n- PromiseDependency = PromiseDependency || null;\n- if (!PromiseDependency && typeof Promise !== 'undefined') {\n- PromiseDependency = Promise;\n- }\n- if (typeof PromiseDependency !== 'function') {\n- delete constructor.prototype.promise;\n- return;\n- }\n- constructor.prototype.promise = function promise() {\n- var self = this;\n- return new PromiseDependency(function(resolve, reject) {\n- self.on('complete', function(resp) {\n- if (resp.error) {\n- reject(resp.error);\n- } else {\n- resolve(resp.data);\n- }\n+ addPromises: function addPromises(constructors, PromiseDependency) {\n+ if (!AWS) AWS = require('./core');\n+ if (!AWS.config) require('./config');\n+ if (PromiseDependency === undefined) PromiseDependency = AWS.config.getPromisesDependency();\n+ if (typeof PromiseDependency !== 'function') var deletePromise = true;\n+ if (!Array.isArray(constructors)) constructors = [constructors];\n+\n+ var promisifyMethod = function(methodName) {\n+ return function promise() {\n+ var self = this;\n+ return new PromiseDependency(function(resolve, reject) {\n+ self[methodName](function(err, data) {\n+ if (err) {\n+ reject(err);\n+ } else {\n+ resolve(data);\n+ }\n+ });\n });\n- self.runTo();\n- });\n+ };\n+ };\n+\n+ for (var ind = 0; ind < constructors.length; ind++) {\n+ var constructor = constructors[ind];\n+ switch (constructor.name) {", - "comment_created_at": "2016-10-11T21:03:18+00:00", - "comment_author": "chrisradek", - "comment_body": "It looks like `constructor.name` may not work in all the browsers we support and could have issues when minifiers are used:\nhttps://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Function/name#Browser_compatibility\n\nCan you do an equality check against the constructor instead?\n", - "pr_file_module": null - }, - { - "comment_id": "82890974", - "repo_full_name": "aws/aws-sdk-js", - "pr_number": 1079, - "pr_file": "lib/util.js", - "discussion_id": "82888371", - "commented_code": "@@ -748,27 +748,74 @@ var util = {\n /**\n * @api private\n */\n- addPromisesToRequests: function addPromisesToRequests(constructor, PromiseDependency) {\n- PromiseDependency = PromiseDependency || null;\n- if (!PromiseDependency && typeof Promise !== 'undefined') {\n- PromiseDependency = Promise;\n- }\n- if (typeof PromiseDependency !== 'function') {\n- delete constructor.prototype.promise;\n- return;\n- }\n- constructor.prototype.promise = function promise() {\n- var self = this;\n- return new PromiseDependency(function(resolve, reject) {\n- self.on('complete', function(resp) {\n- if (resp.error) {\n- reject(resp.error);\n- } else {\n- resolve(resp.data);\n- }\n+ addPromises: function addPromises(constructors, PromiseDependency) {\n+ if (!AWS) AWS = require('./core');\n+ if (!AWS.config) require('./config');\n+ if (PromiseDependency === undefined) PromiseDependency = AWS.config.getPromisesDependency();\n+ if (typeof PromiseDependency !== 'function') var deletePromise = true;\n+ if (!Array.isArray(constructors)) constructors = [constructors];\n+\n+ var promisifyMethod = function(methodName) {\n+ return function promise() {\n+ var self = this;\n+ return new PromiseDependency(function(resolve, reject) {\n+ self[methodName](function(err, data) {\n+ if (err) {\n+ reject(err);\n+ } else {\n+ resolve(data);\n+ }\n+ });\n });\n- self.runTo();\n- });\n+ };\n+ };\n+\n+ for (var ind = 0; ind < constructors.length; ind++) {\n+ var constructor = constructors[ind];\n+ switch (constructor.name) {", - "comment_created_at": "2016-10-11T21:16:42+00:00", - "comment_author": "chrisradek", - "comment_body": "Actually, what do you think about giving each class that should return a promise a static method that accepts a Promise constructor, then the class can control how it should promisify itself?\n\nThe pros to that approach would be the logic for adding promises would be controlled by each class, rather than defined in a long if/switch statement within a utility function. If the method to promisify a class was named the same for all classes, you can just check if the constructor has that method then call it, instead of maintaining a list of enums to check a class against. You could still make the `promisifyMethod` a utility method if that reduces code duplication.\n", - "pr_file_module": null - }, - { - "comment_id": "83119747", - "repo_full_name": "aws/aws-sdk-js", - "pr_number": 1079, - "pr_file": "lib/util.js", - "discussion_id": "82888371", - "commented_code": "@@ -748,27 +748,74 @@ var util = {\n /**\n * @api private\n */\n- addPromisesToRequests: function addPromisesToRequests(constructor, PromiseDependency) {\n- PromiseDependency = PromiseDependency || null;\n- if (!PromiseDependency && typeof Promise !== 'undefined') {\n- PromiseDependency = Promise;\n- }\n- if (typeof PromiseDependency !== 'function') {\n- delete constructor.prototype.promise;\n- return;\n- }\n- constructor.prototype.promise = function promise() {\n- var self = this;\n- return new PromiseDependency(function(resolve, reject) {\n- self.on('complete', function(resp) {\n- if (resp.error) {\n- reject(resp.error);\n- } else {\n- resolve(resp.data);\n- }\n+ addPromises: function addPromises(constructors, PromiseDependency) {\n+ if (!AWS) AWS = require('./core');\n+ if (!AWS.config) require('./config');\n+ if (PromiseDependency === undefined) PromiseDependency = AWS.config.getPromisesDependency();\n+ if (typeof PromiseDependency !== 'function') var deletePromise = true;\n+ if (!Array.isArray(constructors)) constructors = [constructors];\n+\n+ var promisifyMethod = function(methodName) {\n+ return function promise() {\n+ var self = this;\n+ return new PromiseDependency(function(resolve, reject) {\n+ self[methodName](function(err, data) {\n+ if (err) {\n+ reject(err);\n+ } else {\n+ resolve(data);\n+ }\n+ });\n });\n- self.runTo();\n- });\n+ };\n+ };\n+\n+ for (var ind = 0; ind < constructors.length; ind++) {\n+ var constructor = constructors[ind];\n+ switch (constructor.name) {", - "comment_created_at": "2016-10-12T23:29:17+00:00", - "comment_author": "LiuJoyceC", - "comment_body": "The reason I originally checked `constructor.name` is because I hadn't originally planned on requiring `core.js`, so I didn't have access to the actual constructors for an equality check before I added that require statement. I may be refactoring the require out anyway (as discussed above). I'll look into adding the static method for each class.\n", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "138417207", - "pr_number": 1711, - "pr_file": "lib/services/s3.js", - "created_at": "2017-09-12T17:36:01+00:00", - "commented_code": "return uploader;\n }\n});\n\n\n/**\n * @api private\n */\nAWS.S3.addPromisesToClass = function addPromisesToClass(PromiseDependency) {\n this.prototype.getSignedUrlPromise = AWS.util.promisifyMethod('getSignedUrl', PromiseDependency);", - "repo_full_name": "aws/aws-sdk-js", - "discussion_comments": [ - { - "comment_id": "138417207", - "repo_full_name": "aws/aws-sdk-js", - "pr_number": 1711, - "pr_file": "lib/services/s3.js", - "discussion_id": "138417207", - "commented_code": "@@ -1068,3 +1114,20 @@ AWS.util.update(AWS.S3.prototype, {\n return uploader;\n }\n });\n+\n+\n+/**\n+ * @api private\n+ */\n+AWS.S3.addPromisesToClass = function addPromisesToClass(PromiseDependency) {\n+ this.prototype.getSignedUrlPromise = AWS.util.promisifyMethod('getSignedUrl', PromiseDependency);", - "comment_created_at": "2017-09-12T17:36:01+00:00", - "comment_author": "chrisradek", - "comment_body": "The `AWS.util.promisifyMethod` function currently only works for functions that accept a callback function as the first parameter. For example, `AWS.Request.send` and `AWS.Credentials.get` both accept just a callback.\r\n\r\nThe `s3.getSignedUrl` method accepts an operation name, params, and a callback, so using this method won't work. You should be able to set `getSignedUrlPromise` to a function that returns a new Promise. This promise can simply wrap the getSignedUrl function.", - "pr_file_module": null - }, - { - "comment_id": "138644601", - "repo_full_name": "aws/aws-sdk-js", - "pr_number": 1711, - "pr_file": "lib/services/s3.js", - "discussion_id": "138417207", - "commented_code": "@@ -1068,3 +1114,20 @@ AWS.util.update(AWS.S3.prototype, {\n return uploader;\n }\n });\n+\n+\n+/**\n+ * @api private\n+ */\n+AWS.S3.addPromisesToClass = function addPromisesToClass(PromiseDependency) {\n+ this.prototype.getSignedUrlPromise = AWS.util.promisifyMethod('getSignedUrl', PromiseDependency);", - "comment_created_at": "2017-09-13T14:55:49+00:00", - "comment_author": "IsaiahJTurner", - "comment_body": "Happy to do that but that solution could lead replicated code down the line. For example, if/when this functionality is added to `AWS.CloudFront.Signer.getSignedUrl`, we'd need to wrap it with the same logic.\r\n\r\nWhat do you think about modifying `AWS.util.promisifyMethod` so that the generated promise function accepts arguments? \r\n\r\nSomething like this:\r\n```js\r\nfunction promisifyMethod(methodName, PromiseDependency) {\r\n return function promise() {\r\n var self = this;\r\n var args = Array.prototype.slice.call(arguments);\r\n return new PromiseDependency(function(resolve, reject) {\r\n args.push(function(err, data) {\r\n if (err) {\r\n reject(err);\r\n } else {\r\n resolve(data);\r\n }\r\n });\r\n self[methodName].apply(null, args);\r\n });\r\n };\r\n}\r\n```\r\nMy solution does lead to one of two downsides:\r\n- Supplying too many arguments will give unexpected behavior like a \"callback is not a function\" error.\r\n- To prevent that with a more informative error like \"Too many arguments supplied\" I'd need to read `self[methodName].length` to get the expected arguments count but this would be incompatible with any functions that support the arguments style `Array.prototype.push([element1[, ...[, elementN]]])` since `Array.prototype.push.length` would only return 1. I'm not sure if any functions in the SDK work like this.\r\n\r\nRegardless of which downside chosen, I still think improving `AWS.util.promisifyMethod` rather than wrapping the function directly is the best option since it is the most extensible. Also, how `AWS.util.promisifyMethod` currently works in contrast with how other `promisify` functions within other JS libraries like Bluebird, es6-promisify, and promisify-node work. ", - "pr_file_module": null - }, - { - "comment_id": "138655481", - "repo_full_name": "aws/aws-sdk-js", - "pr_number": 1711, - "pr_file": "lib/services/s3.js", - "discussion_id": "138417207", - "commented_code": "@@ -1068,3 +1114,20 @@ AWS.util.update(AWS.S3.prototype, {\n return uploader;\n }\n });\n+\n+\n+/**\n+ * @api private\n+ */\n+AWS.S3.addPromisesToClass = function addPromisesToClass(PromiseDependency) {\n+ this.prototype.getSignedUrlPromise = AWS.util.promisifyMethod('getSignedUrl', PromiseDependency);", - "comment_created_at": "2017-09-13T15:30:28+00:00", - "comment_author": "IsaiahJTurner", - "comment_body": "Went ahead and pushed my concept. Let me know what you think, can easily change it back and use your idea.", - "pr_file_module": null - }, - { - "comment_id": "317850908", - "repo_full_name": "aws/aws-sdk-js", - "pr_number": 1711, - "pr_file": "lib/services/s3.js", - "discussion_id": "138417207", - "commented_code": "@@ -1068,3 +1114,20 @@ AWS.util.update(AWS.S3.prototype, {\n return uploader;\n }\n });\n+\n+\n+/**\n+ * @api private\n+ */\n+AWS.S3.addPromisesToClass = function addPromisesToClass(PromiseDependency) {\n+ this.prototype.getSignedUrlPromise = AWS.util.promisifyMethod('getSignedUrl', PromiseDependency);", - "comment_created_at": "2019-08-27T00:32:33+00:00", - "comment_author": "AllanZhengYP", - "comment_body": "Confirmed that the [bluebird](http://bluebirdjs.com/docs/api/promise.promisify.html) also use the last argument as the callback function. The idea of appending the promise-determining callback function to the `arguments` looks good to me.", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/aws-sdk-js-structured-test-resource-management.json b/_reviewers/aws-sdk-js-structured-test-resource-management.json new file mode 100644 index 0000000..3c6cf4a --- /dev/null +++ b/_reviewers/aws-sdk-js-structured-test-resource-management.json @@ -0,0 +1,172 @@ +[ + { + "discussion_id": "182547608", + "pr_number": 2014, + "pr_file": "test/browserHashes.spec.js", + "created_at": "2018-04-18T19:46:02+00:00", + "commented_code": "if (truncate) {\n digest = digest.slice(0, truncate);\n }\n expect(digest.toString('hex')).to.equal(expected);\n //in node <= 0.10 digest sometimes returns a Dataview, should be buffer.", + "repo_full_name": "aws/aws-sdk-js", + "discussion_comments": [ + { + "comment_id": "182547608", + "repo_full_name": "aws/aws-sdk-js", + "pr_number": 2014, + "pr_file": "test/browserHashes.spec.js", + "discussion_id": "182547608", + "commented_code": "@@ -43,7 +43,11 @@ describe('Browser hash implementations', function() {\n if (truncate) {\n digest = digest.slice(0, truncate);\n }\n- expect(digest.toString('hex')).to.equal(expected);\n+ //in node <= 0.10 digest sometimes returns a Dataview, should be buffer.", + "comment_created_at": "2018-04-18T19:46:02+00:00", + "comment_author": "chrisradek", + "comment_body": "Are these tests running in node.js? This should only be running in browser environments, and is using the 3rd party `Buffer` package instead of node.js' `Buffer` package. We can place browser-specific tests in a separate folder and exclude them from being run by mocha in the npm unit script.", + "pr_file_module": null + }, + { + "comment_id": "182572206", + "repo_full_name": "aws/aws-sdk-js", + "pr_number": 2014, + "pr_file": "test/browserHashes.spec.js", + "discussion_id": "182547608", + "commented_code": "@@ -43,7 +43,11 @@ describe('Browser hash implementations', function() {\n if (truncate) {\n digest = digest.slice(0, truncate);\n }\n- expect(digest.toString('hex')).to.equal(expected);\n+ //in node <= 0.10 digest sometimes returns a Dataview, should be buffer.", + "comment_created_at": "2018-04-18T21:16:49+00:00", + "comment_author": "AllanZhengYP", + "comment_body": "Ah right! The change mean to fix the test. But actually we can remove it out of node test. I didn't realize this is browser-only although the name already told me", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "184547814", + "pr_number": 1956, + "pr_file": "test/integration_test/helpers.js", + "created_at": "2018-04-26T22:22:30+00:00", + "commented_code": "(function() {\n global.expect = require('chai').expect;\n module.exports = {\n sharedBucket: 'aws-sdk-js-integration',", + "repo_full_name": "aws/aws-sdk-js", + "discussion_comments": [ + { + "comment_id": "184547814", + "repo_full_name": "aws/aws-sdk-js", + "pr_number": 1956, + "pr_file": "test/integration_test/helpers.js", + "discussion_id": "184547814", + "commented_code": "@@ -0,0 +1,25 @@\n+(function() {\n+ global.expect = require('chai').expect;\n+ module.exports = {\n+ sharedBucket: 'aws-sdk-js-integration',", + "comment_created_at": "2018-04-26T22:22:30+00:00", + "comment_author": "chrisradek", + "comment_body": "Probably want to append a timestamp to this as well to make it somewhat unique", + "pr_file_module": null + }, + { + "comment_id": "184557074", + "repo_full_name": "aws/aws-sdk-js", + "pr_number": 1956, + "pr_file": "test/integration_test/helpers.js", + "discussion_id": "184547814", + "commented_code": "@@ -0,0 +1,25 @@\n+(function() {\n+ global.expect = require('chai').expect;\n+ module.exports = {\n+ sharedBucket: 'aws-sdk-js-integration',", + "comment_created_at": "2018-04-26T23:19:53+00:00", + "comment_author": "AllanZhengYP", + "comment_body": "This is for sharing the test bucket. Maybe this time occasionally we fail to delete the bucket. But next time we will delete this bucket.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "184550574", + "pr_number": 1956, + "pr_file": "test/integration_test/s3/getObject.spec.js", + "created_at": "2018-04-26T22:38:16+00:00", + "commented_code": "var helpers = require('../helpers');\nvar AWS = helpers.AWS;\nvar bucketName = helpers.sharedBucket;\ndescribe('download integrity', function() {\n var params;\n var s3;\n function putObject(customParams, done) {\n params = {\n Bucket: customParams.Bucket || params.Bucket,\n Key: customParams.Key || params.Key,\n };\n s3.createBucket({Bucket: params.Bucket}).promise().then(function() {\n return s3.waitFor('bucketExists', {Bucket: params.Bucket}).promise()\n }).then(function() {\n return s3.putObject({\n Bucket: params.Bucket,\n Key: params.Key,\n Body: customParams.Body || params.Body\n }).promise();\n }).then(function() {\n return s3.waitFor('objectExists', {\n Bucket: params.Bucket,\n Key: params.Key,\n }).promise();\n }).then(function() { \n if (typeof done === 'function') done();\n }).catch(function(err) {\n throw new Error('Cannot put object: ' + err);\n exit(1);\n });\n }\n\n function delectBucket(done) {\n s3.listObjectVersions({\n Bucket: params.Bucket\n }).promise().then(function(data) {\n var removeObjectsParams = {\n Bucket: params.Bucket,\n Delete: {\n Quiet: false,\n Objects: []\n }\n }\n for (var version of data.Versions) {\n removeObjectsParams.Delete.Objects.push({\n Key: version.Key,\n VersionId: version.VersionId === 'null' ? null : version.VersionId,\n });\n }\n return removeObjectsParams;\n }).then(function(removeObjectsParams) {\n return s3.deleteObjects(removeObjectsParams).promise();\n }).then(function() {\n return s3.waitFor('objectNotExists', {\n Bucket: params.Bucket,\n Key: params.Key,\n }).promise()\n }).then(function() {\n return s3.deleteBucket({Bucket: params.Bucket}).promise();\n }).then(function(data) {\n return s3.waitFor('bucketNotExists', {Bucket: params.Bucket}).promise();\n }).then(function() {\n if (typeof done === 'function') done();\n }).catch(function(err) {\n throw new Error('Cannot delete bucket: ' + err);\n exit(1);\n })\n }\n\n before('setup s3 client with responseChecksumAlgorithm equals \\'md5\\'', function() {\n s3 = new AWS.S3({responseChecksumAlgorithm: 'md5'});", + "repo_full_name": "aws/aws-sdk-js", + "discussion_comments": [ + { + "comment_id": "184550574", + "repo_full_name": "aws/aws-sdk-js", + "pr_number": 1956, + "pr_file": "test/integration_test/s3/getObject.spec.js", + "discussion_id": "184550574", + "commented_code": "@@ -0,0 +1,209 @@\n+var helpers = require('../helpers');\n+var AWS = helpers.AWS;\n+var bucketName = helpers.sharedBucket;\n+describe('download integrity', function() {\n+ var params;\n+ var s3;\n+ function putObject(customParams, done) {\n+ params = {\n+ Bucket: customParams.Bucket || params.Bucket,\n+ Key: customParams.Key || params.Key,\n+ };\n+ s3.createBucket({Bucket: params.Bucket}).promise().then(function() {\n+ return s3.waitFor('bucketExists', {Bucket: params.Bucket}).promise()\n+ }).then(function() {\n+ return s3.putObject({\n+ Bucket: params.Bucket,\n+ Key: params.Key,\n+ Body: customParams.Body || params.Body\n+ }).promise();\n+ }).then(function() {\n+ return s3.waitFor('objectExists', {\n+ Bucket: params.Bucket,\n+ Key: params.Key,\n+ }).promise();\n+ }).then(function() { \n+ if (typeof done === 'function') done();\n+ }).catch(function(err) {\n+ throw new Error('Cannot put object: ' + err);\n+ exit(1);\n+ });\n+ }\n+\n+ function delectBucket(done) {\n+ s3.listObjectVersions({\n+ Bucket: params.Bucket\n+ }).promise().then(function(data) {\n+ var removeObjectsParams = {\n+ Bucket: params.Bucket,\n+ Delete: {\n+ Quiet: false,\n+ Objects: []\n+ }\n+ }\n+ for (var version of data.Versions) {\n+ removeObjectsParams.Delete.Objects.push({\n+ Key: version.Key,\n+ VersionId: version.VersionId === 'null' ? null : version.VersionId,\n+ });\n+ }\n+ return removeObjectsParams;\n+ }).then(function(removeObjectsParams) {\n+ return s3.deleteObjects(removeObjectsParams).promise();\n+ }).then(function() {\n+ return s3.waitFor('objectNotExists', {\n+ Bucket: params.Bucket,\n+ Key: params.Key,\n+ }).promise()\n+ }).then(function() {\n+ return s3.deleteBucket({Bucket: params.Bucket}).promise();\n+ }).then(function(data) {\n+ return s3.waitFor('bucketNotExists', {Bucket: params.Bucket}).promise();\n+ }).then(function() {\n+ if (typeof done === 'function') done();\n+ }).catch(function(err) {\n+ throw new Error('Cannot delete bucket: ' + err);\n+ exit(1);\n+ })\n+ }\n+\n+ before('setup s3 client with responseChecksumAlgorithm equals \\'md5\\'', function() {\n+ s3 = new AWS.S3({responseChecksumAlgorithm: 'md5'});", + "comment_created_at": "2018-04-26T22:38:16+00:00", + "comment_author": "chrisradek", + "comment_body": "Why don't you create the bucket used by all the tests in the `before` step? We shouldn't have to create a new bucket for every single test, just this suite of tests. \r\n\r\nI also wouldn't mix the `putObject` method with `createBucket`. `putObject` is doing too much, and adds 'global' (across tests) state. For example, you don't directly pass it the bucket or key, instead relying on a closure that every test has access to (and can change). That could lead to tricky edge cases coming up later that are hard to debug.", + "pr_file_module": null + }, + { + "comment_id": "184559113", + "repo_full_name": "aws/aws-sdk-js", + "pr_number": 1956, + "pr_file": "test/integration_test/s3/getObject.spec.js", + "discussion_id": "184550574", + "commented_code": "@@ -0,0 +1,209 @@\n+var helpers = require('../helpers');\n+var AWS = helpers.AWS;\n+var bucketName = helpers.sharedBucket;\n+describe('download integrity', function() {\n+ var params;\n+ var s3;\n+ function putObject(customParams, done) {\n+ params = {\n+ Bucket: customParams.Bucket || params.Bucket,\n+ Key: customParams.Key || params.Key,\n+ };\n+ s3.createBucket({Bucket: params.Bucket}).promise().then(function() {\n+ return s3.waitFor('bucketExists', {Bucket: params.Bucket}).promise()\n+ }).then(function() {\n+ return s3.putObject({\n+ Bucket: params.Bucket,\n+ Key: params.Key,\n+ Body: customParams.Body || params.Body\n+ }).promise();\n+ }).then(function() {\n+ return s3.waitFor('objectExists', {\n+ Bucket: params.Bucket,\n+ Key: params.Key,\n+ }).promise();\n+ }).then(function() { \n+ if (typeof done === 'function') done();\n+ }).catch(function(err) {\n+ throw new Error('Cannot put object: ' + err);\n+ exit(1);\n+ });\n+ }\n+\n+ function delectBucket(done) {\n+ s3.listObjectVersions({\n+ Bucket: params.Bucket\n+ }).promise().then(function(data) {\n+ var removeObjectsParams = {\n+ Bucket: params.Bucket,\n+ Delete: {\n+ Quiet: false,\n+ Objects: []\n+ }\n+ }\n+ for (var version of data.Versions) {\n+ removeObjectsParams.Delete.Objects.push({\n+ Key: version.Key,\n+ VersionId: version.VersionId === 'null' ? null : version.VersionId,\n+ });\n+ }\n+ return removeObjectsParams;\n+ }).then(function(removeObjectsParams) {\n+ return s3.deleteObjects(removeObjectsParams).promise();\n+ }).then(function() {\n+ return s3.waitFor('objectNotExists', {\n+ Bucket: params.Bucket,\n+ Key: params.Key,\n+ }).promise()\n+ }).then(function() {\n+ return s3.deleteBucket({Bucket: params.Bucket}).promise();\n+ }).then(function(data) {\n+ return s3.waitFor('bucketNotExists', {Bucket: params.Bucket}).promise();\n+ }).then(function() {\n+ if (typeof done === 'function') done();\n+ }).catch(function(err) {\n+ throw new Error('Cannot delete bucket: ' + err);\n+ exit(1);\n+ })\n+ }\n+\n+ before('setup s3 client with responseChecksumAlgorithm equals \\'md5\\'', function() {\n+ s3 = new AWS.S3({responseChecksumAlgorithm: 'md5'});", + "comment_created_at": "2018-04-26T23:33:37+00:00", + "comment_author": "AllanZhengYP", + "comment_body": "Yes, you can pass in the bucket and key, but right, it will change the global state. But capsulizing them will also guarantee `putObject` always succeed(bucket always there).", + "pr_file_module": null + }, + { + "comment_id": "184560586", + "repo_full_name": "aws/aws-sdk-js", + "pr_number": 1956, + "pr_file": "test/integration_test/s3/getObject.spec.js", + "discussion_id": "184550574", + "commented_code": "@@ -0,0 +1,209 @@\n+var helpers = require('../helpers');\n+var AWS = helpers.AWS;\n+var bucketName = helpers.sharedBucket;\n+describe('download integrity', function() {\n+ var params;\n+ var s3;\n+ function putObject(customParams, done) {\n+ params = {\n+ Bucket: customParams.Bucket || params.Bucket,\n+ Key: customParams.Key || params.Key,\n+ };\n+ s3.createBucket({Bucket: params.Bucket}).promise().then(function() {\n+ return s3.waitFor('bucketExists', {Bucket: params.Bucket}).promise()\n+ }).then(function() {\n+ return s3.putObject({\n+ Bucket: params.Bucket,\n+ Key: params.Key,\n+ Body: customParams.Body || params.Body\n+ }).promise();\n+ }).then(function() {\n+ return s3.waitFor('objectExists', {\n+ Bucket: params.Bucket,\n+ Key: params.Key,\n+ }).promise();\n+ }).then(function() { \n+ if (typeof done === 'function') done();\n+ }).catch(function(err) {\n+ throw new Error('Cannot put object: ' + err);\n+ exit(1);\n+ });\n+ }\n+\n+ function delectBucket(done) {\n+ s3.listObjectVersions({\n+ Bucket: params.Bucket\n+ }).promise().then(function(data) {\n+ var removeObjectsParams = {\n+ Bucket: params.Bucket,\n+ Delete: {\n+ Quiet: false,\n+ Objects: []\n+ }\n+ }\n+ for (var version of data.Versions) {\n+ removeObjectsParams.Delete.Objects.push({\n+ Key: version.Key,\n+ VersionId: version.VersionId === 'null' ? null : version.VersionId,\n+ });\n+ }\n+ return removeObjectsParams;\n+ }).then(function(removeObjectsParams) {\n+ return s3.deleteObjects(removeObjectsParams).promise();\n+ }).then(function() {\n+ return s3.waitFor('objectNotExists', {\n+ Bucket: params.Bucket,\n+ Key: params.Key,\n+ }).promise()\n+ }).then(function() {\n+ return s3.deleteBucket({Bucket: params.Bucket}).promise();\n+ }).then(function(data) {\n+ return s3.waitFor('bucketNotExists', {Bucket: params.Bucket}).promise();\n+ }).then(function() {\n+ if (typeof done === 'function') done();\n+ }).catch(function(err) {\n+ throw new Error('Cannot delete bucket: ' + err);\n+ exit(1);\n+ })\n+ }\n+\n+ before('setup s3 client with responseChecksumAlgorithm equals \\'md5\\'', function() {\n+ s3 = new AWS.S3({responseChecksumAlgorithm: 'md5'});", + "comment_created_at": "2018-04-26T23:44:33+00:00", + "comment_author": "chrisradek", + "comment_body": "But you could also make sure the bucket is there in the `before` hook. Just call `done()` after the `waitFor` method completes. Then you also only need to create it once; I don't think there's a reason we need to create a new bucket for every test since we aren't testing any bucket-specific configurations here. Creating a new bucket with each test also creates more points of failure (rate/resource limits, for example).\r\n", + "pr_file_module": null + }, + { + "comment_id": "184561348", + "repo_full_name": "aws/aws-sdk-js", + "pr_number": 1956, + "pr_file": "test/integration_test/s3/getObject.spec.js", + "discussion_id": "184550574", + "commented_code": "@@ -0,0 +1,209 @@\n+var helpers = require('../helpers');\n+var AWS = helpers.AWS;\n+var bucketName = helpers.sharedBucket;\n+describe('download integrity', function() {\n+ var params;\n+ var s3;\n+ function putObject(customParams, done) {\n+ params = {\n+ Bucket: customParams.Bucket || params.Bucket,\n+ Key: customParams.Key || params.Key,\n+ };\n+ s3.createBucket({Bucket: params.Bucket}).promise().then(function() {\n+ return s3.waitFor('bucketExists', {Bucket: params.Bucket}).promise()\n+ }).then(function() {\n+ return s3.putObject({\n+ Bucket: params.Bucket,\n+ Key: params.Key,\n+ Body: customParams.Body || params.Body\n+ }).promise();\n+ }).then(function() {\n+ return s3.waitFor('objectExists', {\n+ Bucket: params.Bucket,\n+ Key: params.Key,\n+ }).promise();\n+ }).then(function() { \n+ if (typeof done === 'function') done();\n+ }).catch(function(err) {\n+ throw new Error('Cannot put object: ' + err);\n+ exit(1);\n+ });\n+ }\n+\n+ function delectBucket(done) {\n+ s3.listObjectVersions({\n+ Bucket: params.Bucket\n+ }).promise().then(function(data) {\n+ var removeObjectsParams = {\n+ Bucket: params.Bucket,\n+ Delete: {\n+ Quiet: false,\n+ Objects: []\n+ }\n+ }\n+ for (var version of data.Versions) {\n+ removeObjectsParams.Delete.Objects.push({\n+ Key: version.Key,\n+ VersionId: version.VersionId === 'null' ? null : version.VersionId,\n+ });\n+ }\n+ return removeObjectsParams;\n+ }).then(function(removeObjectsParams) {\n+ return s3.deleteObjects(removeObjectsParams).promise();\n+ }).then(function() {\n+ return s3.waitFor('objectNotExists', {\n+ Bucket: params.Bucket,\n+ Key: params.Key,\n+ }).promise()\n+ }).then(function() {\n+ return s3.deleteBucket({Bucket: params.Bucket}).promise();\n+ }).then(function(data) {\n+ return s3.waitFor('bucketNotExists', {Bucket: params.Bucket}).promise();\n+ }).then(function() {\n+ if (typeof done === 'function') done();\n+ }).catch(function(err) {\n+ throw new Error('Cannot delete bucket: ' + err);\n+ exit(1);\n+ })\n+ }\n+\n+ before('setup s3 client with responseChecksumAlgorithm equals \\'md5\\'', function() {\n+ s3 = new AWS.S3({responseChecksumAlgorithm: 'md5'});", + "comment_created_at": "2018-04-26T23:50:42+00:00", + "comment_author": "AllanZhengYP", + "comment_body": "yea. I will move the `putObject` to `before` trait. And use timestamp in bucket name. I was thinking that we can write less code if we are going to add more tests here. But now I agree that this is an overkill and not worthwhile. ", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "184550641", + "pr_number": 1956, + "pr_file": "test/integration_test/s3/getObject.spec.js", + "created_at": "2018-04-26T22:38:41+00:00", + "commented_code": "var helpers = require('../helpers');\nvar AWS = helpers.AWS;\nvar bucketName = helpers.sharedBucket;\ndescribe('download integrity', function() {\n var params;\n var s3;\n function putObject(customParams, done) {\n params = {\n Bucket: customParams.Bucket || params.Bucket,\n Key: customParams.Key || params.Key,\n };\n s3.createBucket({Bucket: params.Bucket}).promise().then(function() {\n return s3.waitFor('bucketExists', {Bucket: params.Bucket}).promise()\n }).then(function() {\n return s3.putObject({\n Bucket: params.Bucket,\n Key: params.Key,\n Body: customParams.Body || params.Body\n }).promise();\n }).then(function() {\n return s3.waitFor('objectExists', {\n Bucket: params.Bucket,\n Key: params.Key,\n }).promise();\n }).then(function() { \n if (typeof done === 'function') done();\n }).catch(function(err) {\n throw new Error('Cannot put object: ' + err);\n exit(1);\n });\n }\n\n function delectBucket(done) {\n s3.listObjectVersions({\n Bucket: params.Bucket\n }).promise().then(function(data) {\n var removeObjectsParams = {\n Bucket: params.Bucket,\n Delete: {\n Quiet: false,\n Objects: []\n }\n }\n for (var version of data.Versions) {\n removeObjectsParams.Delete.Objects.push({\n Key: version.Key,\n VersionId: version.VersionId === 'null' ? null : version.VersionId,\n });\n }\n return removeObjectsParams;\n }).then(function(removeObjectsParams) {\n return s3.deleteObjects(removeObjectsParams).promise();\n }).then(function() {\n return s3.waitFor('objectNotExists', {\n Bucket: params.Bucket,\n Key: params.Key,\n }).promise()\n }).then(function() {\n return s3.deleteBucket({Bucket: params.Bucket}).promise();\n }).then(function(data) {\n return s3.waitFor('bucketNotExists', {Bucket: params.Bucket}).promise();\n }).then(function() {\n if (typeof done === 'function') done();\n }).catch(function(err) {\n throw new Error('Cannot delete bucket: ' + err);\n exit(1);\n })\n }\n\n before('setup s3 client with responseChecksumAlgorithm equals \\'md5\\'', function() {\n s3 = new AWS.S3({responseChecksumAlgorithm: 'md5'});\n })\n\n beforeEach('setup bucket and object...', function(done) {\n params = {\n Bucket: bucketName,\n Key: 'key',\n };\n putObject({Body: 'this is a test!'}, done);\n })\n\n afterEach('delete bucket...', function(done) {", + "repo_full_name": "aws/aws-sdk-js", + "discussion_comments": [ + { + "comment_id": "184550641", + "repo_full_name": "aws/aws-sdk-js", + "pr_number": 1956, + "pr_file": "test/integration_test/s3/getObject.spec.js", + "discussion_id": "184550641", + "commented_code": "@@ -0,0 +1,209 @@\n+var helpers = require('../helpers');\n+var AWS = helpers.AWS;\n+var bucketName = helpers.sharedBucket;\n+describe('download integrity', function() {\n+ var params;\n+ var s3;\n+ function putObject(customParams, done) {\n+ params = {\n+ Bucket: customParams.Bucket || params.Bucket,\n+ Key: customParams.Key || params.Key,\n+ };\n+ s3.createBucket({Bucket: params.Bucket}).promise().then(function() {\n+ return s3.waitFor('bucketExists', {Bucket: params.Bucket}).promise()\n+ }).then(function() {\n+ return s3.putObject({\n+ Bucket: params.Bucket,\n+ Key: params.Key,\n+ Body: customParams.Body || params.Body\n+ }).promise();\n+ }).then(function() {\n+ return s3.waitFor('objectExists', {\n+ Bucket: params.Bucket,\n+ Key: params.Key,\n+ }).promise();\n+ }).then(function() { \n+ if (typeof done === 'function') done();\n+ }).catch(function(err) {\n+ throw new Error('Cannot put object: ' + err);\n+ exit(1);\n+ });\n+ }\n+\n+ function delectBucket(done) {\n+ s3.listObjectVersions({\n+ Bucket: params.Bucket\n+ }).promise().then(function(data) {\n+ var removeObjectsParams = {\n+ Bucket: params.Bucket,\n+ Delete: {\n+ Quiet: false,\n+ Objects: []\n+ }\n+ }\n+ for (var version of data.Versions) {\n+ removeObjectsParams.Delete.Objects.push({\n+ Key: version.Key,\n+ VersionId: version.VersionId === 'null' ? null : version.VersionId,\n+ });\n+ }\n+ return removeObjectsParams;\n+ }).then(function(removeObjectsParams) {\n+ return s3.deleteObjects(removeObjectsParams).promise();\n+ }).then(function() {\n+ return s3.waitFor('objectNotExists', {\n+ Bucket: params.Bucket,\n+ Key: params.Key,\n+ }).promise()\n+ }).then(function() {\n+ return s3.deleteBucket({Bucket: params.Bucket}).promise();\n+ }).then(function(data) {\n+ return s3.waitFor('bucketNotExists', {Bucket: params.Bucket}).promise();\n+ }).then(function() {\n+ if (typeof done === 'function') done();\n+ }).catch(function(err) {\n+ throw new Error('Cannot delete bucket: ' + err);\n+ exit(1);\n+ })\n+ }\n+\n+ before('setup s3 client with responseChecksumAlgorithm equals \\'md5\\'', function() {\n+ s3 = new AWS.S3({responseChecksumAlgorithm: 'md5'});\n+ })\n+\n+ beforeEach('setup bucket and object...', function(done) {\n+ params = {\n+ Bucket: bucketName,\n+ Key: 'key',\n+ };\n+ putObject({Body: 'this is a test!'}, done);\n+ })\n+\n+ afterEach('delete bucket...', function(done) {", + "comment_created_at": "2018-04-26T22:38:41+00:00", + "comment_author": "chrisradek", + "comment_body": "If you `createBucket` in the `before` hook, you can `deleteBucket` in the `after` hook!", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "265341875", + "pr_number": 2559, + "pr_file": "test/credentials.spec.js", + "created_at": "2019-03-13T21:37:20+00:00", + "commented_code": "creds.get();\n return validateCredentials(creds);\n });\n it('loads via credential_process', function(done) {", + "repo_full_name": "aws/aws-sdk-js", + "discussion_comments": [ + { + "comment_id": "265341875", + "repo_full_name": "aws/aws-sdk-js", + "pr_number": 2559, + "pr_file": "test/credentials.spec.js", + "discussion_id": "265341875", + "commented_code": "@@ -438,6 +438,124 @@\n creds.get();\n return validateCredentials(creds);\n });\n+ it('loads via credential_process', function(done) {", + "comment_created_at": "2019-03-13T21:37:20+00:00", + "comment_author": "AllanZhengYP", + "comment_body": "Nit: you can put these unit tests to a test suite(like leading with `describe('credential process')`). So than you can clear the spies easily. ", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/aws-sdk-js-structured-test-resource-management.md b/_reviewers/aws-sdk-js-structured-test-resource-management.md index 01c08b0..dd54411 100644 --- a/_reviewers/aws-sdk-js-structured-test-resource-management.md +++ b/_reviewers/aws-sdk-js-structured-test-resource-management.md @@ -52,177 +52,3 @@ describe('S3 object operations', function() { ``` This approach prevents resource exhaustion, improves test execution speed, and reduces flakiness from race conditions or rate limiting. Separating environment-specific tests (browser vs. Node.js) into different folders further improves organization and prevents execution in incorrect environments. - - -[ - { - "discussion_id": "182547608", - "pr_number": 2014, - "pr_file": "test/browserHashes.spec.js", - "created_at": "2018-04-18T19:46:02+00:00", - "commented_code": "if (truncate) {\n digest = digest.slice(0, truncate);\n }\n expect(digest.toString('hex')).to.equal(expected);\n //in node <= 0.10 digest sometimes returns a Dataview, should be buffer.", - "repo_full_name": "aws/aws-sdk-js", - "discussion_comments": [ - { - "comment_id": "182547608", - "repo_full_name": "aws/aws-sdk-js", - "pr_number": 2014, - "pr_file": "test/browserHashes.spec.js", - "discussion_id": "182547608", - "commented_code": "@@ -43,7 +43,11 @@ describe('Browser hash implementations', function() {\n if (truncate) {\n digest = digest.slice(0, truncate);\n }\n- expect(digest.toString('hex')).to.equal(expected);\n+ //in node <= 0.10 digest sometimes returns a Dataview, should be buffer.", - "comment_created_at": "2018-04-18T19:46:02+00:00", - "comment_author": "chrisradek", - "comment_body": "Are these tests running in node.js? This should only be running in browser environments, and is using the 3rd party `Buffer` package instead of node.js' `Buffer` package. We can place browser-specific tests in a separate folder and exclude them from being run by mocha in the npm unit script.", - "pr_file_module": null - }, - { - "comment_id": "182572206", - "repo_full_name": "aws/aws-sdk-js", - "pr_number": 2014, - "pr_file": "test/browserHashes.spec.js", - "discussion_id": "182547608", - "commented_code": "@@ -43,7 +43,11 @@ describe('Browser hash implementations', function() {\n if (truncate) {\n digest = digest.slice(0, truncate);\n }\n- expect(digest.toString('hex')).to.equal(expected);\n+ //in node <= 0.10 digest sometimes returns a Dataview, should be buffer.", - "comment_created_at": "2018-04-18T21:16:49+00:00", - "comment_author": "AllanZhengYP", - "comment_body": "Ah right! The change mean to fix the test. But actually we can remove it out of node test. I didn't realize this is browser-only although the name already told me", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "184547814", - "pr_number": 1956, - "pr_file": "test/integration_test/helpers.js", - "created_at": "2018-04-26T22:22:30+00:00", - "commented_code": "(function() {\n global.expect = require('chai').expect;\n module.exports = {\n sharedBucket: 'aws-sdk-js-integration',", - "repo_full_name": "aws/aws-sdk-js", - "discussion_comments": [ - { - "comment_id": "184547814", - "repo_full_name": "aws/aws-sdk-js", - "pr_number": 1956, - "pr_file": "test/integration_test/helpers.js", - "discussion_id": "184547814", - "commented_code": "@@ -0,0 +1,25 @@\n+(function() {\n+ global.expect = require('chai').expect;\n+ module.exports = {\n+ sharedBucket: 'aws-sdk-js-integration',", - "comment_created_at": "2018-04-26T22:22:30+00:00", - "comment_author": "chrisradek", - "comment_body": "Probably want to append a timestamp to this as well to make it somewhat unique", - "pr_file_module": null - }, - { - "comment_id": "184557074", - "repo_full_name": "aws/aws-sdk-js", - "pr_number": 1956, - "pr_file": "test/integration_test/helpers.js", - "discussion_id": "184547814", - "commented_code": "@@ -0,0 +1,25 @@\n+(function() {\n+ global.expect = require('chai').expect;\n+ module.exports = {\n+ sharedBucket: 'aws-sdk-js-integration',", - "comment_created_at": "2018-04-26T23:19:53+00:00", - "comment_author": "AllanZhengYP", - "comment_body": "This is for sharing the test bucket. Maybe this time occasionally we fail to delete the bucket. But next time we will delete this bucket.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "184550574", - "pr_number": 1956, - "pr_file": "test/integration_test/s3/getObject.spec.js", - "created_at": "2018-04-26T22:38:16+00:00", - "commented_code": "var helpers = require('../helpers');\nvar AWS = helpers.AWS;\nvar bucketName = helpers.sharedBucket;\ndescribe('download integrity', function() {\n var params;\n var s3;\n function putObject(customParams, done) {\n params = {\n Bucket: customParams.Bucket || params.Bucket,\n Key: customParams.Key || params.Key,\n };\n s3.createBucket({Bucket: params.Bucket}).promise().then(function() {\n return s3.waitFor('bucketExists', {Bucket: params.Bucket}).promise()\n }).then(function() {\n return s3.putObject({\n Bucket: params.Bucket,\n Key: params.Key,\n Body: customParams.Body || params.Body\n }).promise();\n }).then(function() {\n return s3.waitFor('objectExists', {\n Bucket: params.Bucket,\n Key: params.Key,\n }).promise();\n }).then(function() { \n if (typeof done === 'function') done();\n }).catch(function(err) {\n throw new Error('Cannot put object: ' + err);\n exit(1);\n });\n }\n\n function delectBucket(done) {\n s3.listObjectVersions({\n Bucket: params.Bucket\n }).promise().then(function(data) {\n var removeObjectsParams = {\n Bucket: params.Bucket,\n Delete: {\n Quiet: false,\n Objects: []\n }\n }\n for (var version of data.Versions) {\n removeObjectsParams.Delete.Objects.push({\n Key: version.Key,\n VersionId: version.VersionId === 'null' ? null : version.VersionId,\n });\n }\n return removeObjectsParams;\n }).then(function(removeObjectsParams) {\n return s3.deleteObjects(removeObjectsParams).promise();\n }).then(function() {\n return s3.waitFor('objectNotExists', {\n Bucket: params.Bucket,\n Key: params.Key,\n }).promise()\n }).then(function() {\n return s3.deleteBucket({Bucket: params.Bucket}).promise();\n }).then(function(data) {\n return s3.waitFor('bucketNotExists', {Bucket: params.Bucket}).promise();\n }).then(function() {\n if (typeof done === 'function') done();\n }).catch(function(err) {\n throw new Error('Cannot delete bucket: ' + err);\n exit(1);\n })\n }\n\n before('setup s3 client with responseChecksumAlgorithm equals \\'md5\\'', function() {\n s3 = new AWS.S3({responseChecksumAlgorithm: 'md5'});", - "repo_full_name": "aws/aws-sdk-js", - "discussion_comments": [ - { - "comment_id": "184550574", - "repo_full_name": "aws/aws-sdk-js", - "pr_number": 1956, - "pr_file": "test/integration_test/s3/getObject.spec.js", - "discussion_id": "184550574", - "commented_code": "@@ -0,0 +1,209 @@\n+var helpers = require('../helpers');\n+var AWS = helpers.AWS;\n+var bucketName = helpers.sharedBucket;\n+describe('download integrity', function() {\n+ var params;\n+ var s3;\n+ function putObject(customParams, done) {\n+ params = {\n+ Bucket: customParams.Bucket || params.Bucket,\n+ Key: customParams.Key || params.Key,\n+ };\n+ s3.createBucket({Bucket: params.Bucket}).promise().then(function() {\n+ return s3.waitFor('bucketExists', {Bucket: params.Bucket}).promise()\n+ }).then(function() {\n+ return s3.putObject({\n+ Bucket: params.Bucket,\n+ Key: params.Key,\n+ Body: customParams.Body || params.Body\n+ }).promise();\n+ }).then(function() {\n+ return s3.waitFor('objectExists', {\n+ Bucket: params.Bucket,\n+ Key: params.Key,\n+ }).promise();\n+ }).then(function() { \n+ if (typeof done === 'function') done();\n+ }).catch(function(err) {\n+ throw new Error('Cannot put object: ' + err);\n+ exit(1);\n+ });\n+ }\n+\n+ function delectBucket(done) {\n+ s3.listObjectVersions({\n+ Bucket: params.Bucket\n+ }).promise().then(function(data) {\n+ var removeObjectsParams = {\n+ Bucket: params.Bucket,\n+ Delete: {\n+ Quiet: false,\n+ Objects: []\n+ }\n+ }\n+ for (var version of data.Versions) {\n+ removeObjectsParams.Delete.Objects.push({\n+ Key: version.Key,\n+ VersionId: version.VersionId === 'null' ? null : version.VersionId,\n+ });\n+ }\n+ return removeObjectsParams;\n+ }).then(function(removeObjectsParams) {\n+ return s3.deleteObjects(removeObjectsParams).promise();\n+ }).then(function() {\n+ return s3.waitFor('objectNotExists', {\n+ Bucket: params.Bucket,\n+ Key: params.Key,\n+ }).promise()\n+ }).then(function() {\n+ return s3.deleteBucket({Bucket: params.Bucket}).promise();\n+ }).then(function(data) {\n+ return s3.waitFor('bucketNotExists', {Bucket: params.Bucket}).promise();\n+ }).then(function() {\n+ if (typeof done === 'function') done();\n+ }).catch(function(err) {\n+ throw new Error('Cannot delete bucket: ' + err);\n+ exit(1);\n+ })\n+ }\n+\n+ before('setup s3 client with responseChecksumAlgorithm equals \\'md5\\'', function() {\n+ s3 = new AWS.S3({responseChecksumAlgorithm: 'md5'});", - "comment_created_at": "2018-04-26T22:38:16+00:00", - "comment_author": "chrisradek", - "comment_body": "Why don't you create the bucket used by all the tests in the `before` step? We shouldn't have to create a new bucket for every single test, just this suite of tests. \r\n\r\nI also wouldn't mix the `putObject` method with `createBucket`. `putObject` is doing too much, and adds 'global' (across tests) state. For example, you don't directly pass it the bucket or key, instead relying on a closure that every test has access to (and can change). That could lead to tricky edge cases coming up later that are hard to debug.", - "pr_file_module": null - }, - { - "comment_id": "184559113", - "repo_full_name": "aws/aws-sdk-js", - "pr_number": 1956, - "pr_file": "test/integration_test/s3/getObject.spec.js", - "discussion_id": "184550574", - "commented_code": "@@ -0,0 +1,209 @@\n+var helpers = require('../helpers');\n+var AWS = helpers.AWS;\n+var bucketName = helpers.sharedBucket;\n+describe('download integrity', function() {\n+ var params;\n+ var s3;\n+ function putObject(customParams, done) {\n+ params = {\n+ Bucket: customParams.Bucket || params.Bucket,\n+ Key: customParams.Key || params.Key,\n+ };\n+ s3.createBucket({Bucket: params.Bucket}).promise().then(function() {\n+ return s3.waitFor('bucketExists', {Bucket: params.Bucket}).promise()\n+ }).then(function() {\n+ return s3.putObject({\n+ Bucket: params.Bucket,\n+ Key: params.Key,\n+ Body: customParams.Body || params.Body\n+ }).promise();\n+ }).then(function() {\n+ return s3.waitFor('objectExists', {\n+ Bucket: params.Bucket,\n+ Key: params.Key,\n+ }).promise();\n+ }).then(function() { \n+ if (typeof done === 'function') done();\n+ }).catch(function(err) {\n+ throw new Error('Cannot put object: ' + err);\n+ exit(1);\n+ });\n+ }\n+\n+ function delectBucket(done) {\n+ s3.listObjectVersions({\n+ Bucket: params.Bucket\n+ }).promise().then(function(data) {\n+ var removeObjectsParams = {\n+ Bucket: params.Bucket,\n+ Delete: {\n+ Quiet: false,\n+ Objects: []\n+ }\n+ }\n+ for (var version of data.Versions) {\n+ removeObjectsParams.Delete.Objects.push({\n+ Key: version.Key,\n+ VersionId: version.VersionId === 'null' ? null : version.VersionId,\n+ });\n+ }\n+ return removeObjectsParams;\n+ }).then(function(removeObjectsParams) {\n+ return s3.deleteObjects(removeObjectsParams).promise();\n+ }).then(function() {\n+ return s3.waitFor('objectNotExists', {\n+ Bucket: params.Bucket,\n+ Key: params.Key,\n+ }).promise()\n+ }).then(function() {\n+ return s3.deleteBucket({Bucket: params.Bucket}).promise();\n+ }).then(function(data) {\n+ return s3.waitFor('bucketNotExists', {Bucket: params.Bucket}).promise();\n+ }).then(function() {\n+ if (typeof done === 'function') done();\n+ }).catch(function(err) {\n+ throw new Error('Cannot delete bucket: ' + err);\n+ exit(1);\n+ })\n+ }\n+\n+ before('setup s3 client with responseChecksumAlgorithm equals \\'md5\\'', function() {\n+ s3 = new AWS.S3({responseChecksumAlgorithm: 'md5'});", - "comment_created_at": "2018-04-26T23:33:37+00:00", - "comment_author": "AllanZhengYP", - "comment_body": "Yes, you can pass in the bucket and key, but right, it will change the global state. But capsulizing them will also guarantee `putObject` always succeed(bucket always there).", - "pr_file_module": null - }, - { - "comment_id": "184560586", - "repo_full_name": "aws/aws-sdk-js", - "pr_number": 1956, - "pr_file": "test/integration_test/s3/getObject.spec.js", - "discussion_id": "184550574", - "commented_code": "@@ -0,0 +1,209 @@\n+var helpers = require('../helpers');\n+var AWS = helpers.AWS;\n+var bucketName = helpers.sharedBucket;\n+describe('download integrity', function() {\n+ var params;\n+ var s3;\n+ function putObject(customParams, done) {\n+ params = {\n+ Bucket: customParams.Bucket || params.Bucket,\n+ Key: customParams.Key || params.Key,\n+ };\n+ s3.createBucket({Bucket: params.Bucket}).promise().then(function() {\n+ return s3.waitFor('bucketExists', {Bucket: params.Bucket}).promise()\n+ }).then(function() {\n+ return s3.putObject({\n+ Bucket: params.Bucket,\n+ Key: params.Key,\n+ Body: customParams.Body || params.Body\n+ }).promise();\n+ }).then(function() {\n+ return s3.waitFor('objectExists', {\n+ Bucket: params.Bucket,\n+ Key: params.Key,\n+ }).promise();\n+ }).then(function() { \n+ if (typeof done === 'function') done();\n+ }).catch(function(err) {\n+ throw new Error('Cannot put object: ' + err);\n+ exit(1);\n+ });\n+ }\n+\n+ function delectBucket(done) {\n+ s3.listObjectVersions({\n+ Bucket: params.Bucket\n+ }).promise().then(function(data) {\n+ var removeObjectsParams = {\n+ Bucket: params.Bucket,\n+ Delete: {\n+ Quiet: false,\n+ Objects: []\n+ }\n+ }\n+ for (var version of data.Versions) {\n+ removeObjectsParams.Delete.Objects.push({\n+ Key: version.Key,\n+ VersionId: version.VersionId === 'null' ? null : version.VersionId,\n+ });\n+ }\n+ return removeObjectsParams;\n+ }).then(function(removeObjectsParams) {\n+ return s3.deleteObjects(removeObjectsParams).promise();\n+ }).then(function() {\n+ return s3.waitFor('objectNotExists', {\n+ Bucket: params.Bucket,\n+ Key: params.Key,\n+ }).promise()\n+ }).then(function() {\n+ return s3.deleteBucket({Bucket: params.Bucket}).promise();\n+ }).then(function(data) {\n+ return s3.waitFor('bucketNotExists', {Bucket: params.Bucket}).promise();\n+ }).then(function() {\n+ if (typeof done === 'function') done();\n+ }).catch(function(err) {\n+ throw new Error('Cannot delete bucket: ' + err);\n+ exit(1);\n+ })\n+ }\n+\n+ before('setup s3 client with responseChecksumAlgorithm equals \\'md5\\'', function() {\n+ s3 = new AWS.S3({responseChecksumAlgorithm: 'md5'});", - "comment_created_at": "2018-04-26T23:44:33+00:00", - "comment_author": "chrisradek", - "comment_body": "But you could also make sure the bucket is there in the `before` hook. Just call `done()` after the `waitFor` method completes. Then you also only need to create it once; I don't think there's a reason we need to create a new bucket for every test since we aren't testing any bucket-specific configurations here. Creating a new bucket with each test also creates more points of failure (rate/resource limits, for example).\r\n", - "pr_file_module": null - }, - { - "comment_id": "184561348", - "repo_full_name": "aws/aws-sdk-js", - "pr_number": 1956, - "pr_file": "test/integration_test/s3/getObject.spec.js", - "discussion_id": "184550574", - "commented_code": "@@ -0,0 +1,209 @@\n+var helpers = require('../helpers');\n+var AWS = helpers.AWS;\n+var bucketName = helpers.sharedBucket;\n+describe('download integrity', function() {\n+ var params;\n+ var s3;\n+ function putObject(customParams, done) {\n+ params = {\n+ Bucket: customParams.Bucket || params.Bucket,\n+ Key: customParams.Key || params.Key,\n+ };\n+ s3.createBucket({Bucket: params.Bucket}).promise().then(function() {\n+ return s3.waitFor('bucketExists', {Bucket: params.Bucket}).promise()\n+ }).then(function() {\n+ return s3.putObject({\n+ Bucket: params.Bucket,\n+ Key: params.Key,\n+ Body: customParams.Body || params.Body\n+ }).promise();\n+ }).then(function() {\n+ return s3.waitFor('objectExists', {\n+ Bucket: params.Bucket,\n+ Key: params.Key,\n+ }).promise();\n+ }).then(function() { \n+ if (typeof done === 'function') done();\n+ }).catch(function(err) {\n+ throw new Error('Cannot put object: ' + err);\n+ exit(1);\n+ });\n+ }\n+\n+ function delectBucket(done) {\n+ s3.listObjectVersions({\n+ Bucket: params.Bucket\n+ }).promise().then(function(data) {\n+ var removeObjectsParams = {\n+ Bucket: params.Bucket,\n+ Delete: {\n+ Quiet: false,\n+ Objects: []\n+ }\n+ }\n+ for (var version of data.Versions) {\n+ removeObjectsParams.Delete.Objects.push({\n+ Key: version.Key,\n+ VersionId: version.VersionId === 'null' ? null : version.VersionId,\n+ });\n+ }\n+ return removeObjectsParams;\n+ }).then(function(removeObjectsParams) {\n+ return s3.deleteObjects(removeObjectsParams).promise();\n+ }).then(function() {\n+ return s3.waitFor('objectNotExists', {\n+ Bucket: params.Bucket,\n+ Key: params.Key,\n+ }).promise()\n+ }).then(function() {\n+ return s3.deleteBucket({Bucket: params.Bucket}).promise();\n+ }).then(function(data) {\n+ return s3.waitFor('bucketNotExists', {Bucket: params.Bucket}).promise();\n+ }).then(function() {\n+ if (typeof done === 'function') done();\n+ }).catch(function(err) {\n+ throw new Error('Cannot delete bucket: ' + err);\n+ exit(1);\n+ })\n+ }\n+\n+ before('setup s3 client with responseChecksumAlgorithm equals \\'md5\\'', function() {\n+ s3 = new AWS.S3({responseChecksumAlgorithm: 'md5'});", - "comment_created_at": "2018-04-26T23:50:42+00:00", - "comment_author": "AllanZhengYP", - "comment_body": "yea. I will move the `putObject` to `before` trait. And use timestamp in bucket name. I was thinking that we can write less code if we are going to add more tests here. But now I agree that this is an overkill and not worthwhile. ", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "184550641", - "pr_number": 1956, - "pr_file": "test/integration_test/s3/getObject.spec.js", - "created_at": "2018-04-26T22:38:41+00:00", - "commented_code": "var helpers = require('../helpers');\nvar AWS = helpers.AWS;\nvar bucketName = helpers.sharedBucket;\ndescribe('download integrity', function() {\n var params;\n var s3;\n function putObject(customParams, done) {\n params = {\n Bucket: customParams.Bucket || params.Bucket,\n Key: customParams.Key || params.Key,\n };\n s3.createBucket({Bucket: params.Bucket}).promise().then(function() {\n return s3.waitFor('bucketExists', {Bucket: params.Bucket}).promise()\n }).then(function() {\n return s3.putObject({\n Bucket: params.Bucket,\n Key: params.Key,\n Body: customParams.Body || params.Body\n }).promise();\n }).then(function() {\n return s3.waitFor('objectExists', {\n Bucket: params.Bucket,\n Key: params.Key,\n }).promise();\n }).then(function() { \n if (typeof done === 'function') done();\n }).catch(function(err) {\n throw new Error('Cannot put object: ' + err);\n exit(1);\n });\n }\n\n function delectBucket(done) {\n s3.listObjectVersions({\n Bucket: params.Bucket\n }).promise().then(function(data) {\n var removeObjectsParams = {\n Bucket: params.Bucket,\n Delete: {\n Quiet: false,\n Objects: []\n }\n }\n for (var version of data.Versions) {\n removeObjectsParams.Delete.Objects.push({\n Key: version.Key,\n VersionId: version.VersionId === 'null' ? null : version.VersionId,\n });\n }\n return removeObjectsParams;\n }).then(function(removeObjectsParams) {\n return s3.deleteObjects(removeObjectsParams).promise();\n }).then(function() {\n return s3.waitFor('objectNotExists', {\n Bucket: params.Bucket,\n Key: params.Key,\n }).promise()\n }).then(function() {\n return s3.deleteBucket({Bucket: params.Bucket}).promise();\n }).then(function(data) {\n return s3.waitFor('bucketNotExists', {Bucket: params.Bucket}).promise();\n }).then(function() {\n if (typeof done === 'function') done();\n }).catch(function(err) {\n throw new Error('Cannot delete bucket: ' + err);\n exit(1);\n })\n }\n\n before('setup s3 client with responseChecksumAlgorithm equals \\'md5\\'', function() {\n s3 = new AWS.S3({responseChecksumAlgorithm: 'md5'});\n })\n\n beforeEach('setup bucket and object...', function(done) {\n params = {\n Bucket: bucketName,\n Key: 'key',\n };\n putObject({Body: 'this is a test!'}, done);\n })\n\n afterEach('delete bucket...', function(done) {", - "repo_full_name": "aws/aws-sdk-js", - "discussion_comments": [ - { - "comment_id": "184550641", - "repo_full_name": "aws/aws-sdk-js", - "pr_number": 1956, - "pr_file": "test/integration_test/s3/getObject.spec.js", - "discussion_id": "184550641", - "commented_code": "@@ -0,0 +1,209 @@\n+var helpers = require('../helpers');\n+var AWS = helpers.AWS;\n+var bucketName = helpers.sharedBucket;\n+describe('download integrity', function() {\n+ var params;\n+ var s3;\n+ function putObject(customParams, done) {\n+ params = {\n+ Bucket: customParams.Bucket || params.Bucket,\n+ Key: customParams.Key || params.Key,\n+ };\n+ s3.createBucket({Bucket: params.Bucket}).promise().then(function() {\n+ return s3.waitFor('bucketExists', {Bucket: params.Bucket}).promise()\n+ }).then(function() {\n+ return s3.putObject({\n+ Bucket: params.Bucket,\n+ Key: params.Key,\n+ Body: customParams.Body || params.Body\n+ }).promise();\n+ }).then(function() {\n+ return s3.waitFor('objectExists', {\n+ Bucket: params.Bucket,\n+ Key: params.Key,\n+ }).promise();\n+ }).then(function() { \n+ if (typeof done === 'function') done();\n+ }).catch(function(err) {\n+ throw new Error('Cannot put object: ' + err);\n+ exit(1);\n+ });\n+ }\n+\n+ function delectBucket(done) {\n+ s3.listObjectVersions({\n+ Bucket: params.Bucket\n+ }).promise().then(function(data) {\n+ var removeObjectsParams = {\n+ Bucket: params.Bucket,\n+ Delete: {\n+ Quiet: false,\n+ Objects: []\n+ }\n+ }\n+ for (var version of data.Versions) {\n+ removeObjectsParams.Delete.Objects.push({\n+ Key: version.Key,\n+ VersionId: version.VersionId === 'null' ? null : version.VersionId,\n+ });\n+ }\n+ return removeObjectsParams;\n+ }).then(function(removeObjectsParams) {\n+ return s3.deleteObjects(removeObjectsParams).promise();\n+ }).then(function() {\n+ return s3.waitFor('objectNotExists', {\n+ Bucket: params.Bucket,\n+ Key: params.Key,\n+ }).promise()\n+ }).then(function() {\n+ return s3.deleteBucket({Bucket: params.Bucket}).promise();\n+ }).then(function(data) {\n+ return s3.waitFor('bucketNotExists', {Bucket: params.Bucket}).promise();\n+ }).then(function() {\n+ if (typeof done === 'function') done();\n+ }).catch(function(err) {\n+ throw new Error('Cannot delete bucket: ' + err);\n+ exit(1);\n+ })\n+ }\n+\n+ before('setup s3 client with responseChecksumAlgorithm equals \\'md5\\'', function() {\n+ s3 = new AWS.S3({responseChecksumAlgorithm: 'md5'});\n+ })\n+\n+ beforeEach('setup bucket and object...', function(done) {\n+ params = {\n+ Bucket: bucketName,\n+ Key: 'key',\n+ };\n+ putObject({Body: 'this is a test!'}, done);\n+ })\n+\n+ afterEach('delete bucket...', function(done) {", - "comment_created_at": "2018-04-26T22:38:41+00:00", - "comment_author": "chrisradek", - "comment_body": "If you `createBucket` in the `before` hook, you can `deleteBucket` in the `after` hook!", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "265341875", - "pr_number": 2559, - "pr_file": "test/credentials.spec.js", - "created_at": "2019-03-13T21:37:20+00:00", - "commented_code": "creds.get();\n return validateCredentials(creds);\n });\n it('loads via credential_process', function(done) {", - "repo_full_name": "aws/aws-sdk-js", - "discussion_comments": [ - { - "comment_id": "265341875", - "repo_full_name": "aws/aws-sdk-js", - "pr_number": 2559, - "pr_file": "test/credentials.spec.js", - "discussion_id": "265341875", - "commented_code": "@@ -438,6 +438,124 @@\n creds.get();\n return validateCredentials(creds);\n });\n+ it('loads via credential_process', function(done) {", - "comment_created_at": "2019-03-13T21:37:20+00:00", - "comment_author": "AllanZhengYP", - "comment_body": "Nit: you can put these unit tests to a test suite(like leading with `describe('credential process')`). So than you can clear the spies easily. ", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/aws-sdk-js-test-configuration-precedence.json b/_reviewers/aws-sdk-js-test-configuration-precedence.json new file mode 100644 index 0000000..35fa861 --- /dev/null +++ b/_reviewers/aws-sdk-js-test-configuration-precedence.json @@ -0,0 +1,92 @@ +[ + { + "discussion_id": "107227865", + "pr_number": 1418, + "pr_file": "test/config.spec.coffee", + "created_at": "2017-03-21T17:51:13+00:00", + "commented_code": "expect(configure(maxRetries: 2).maxRetries).to.equal(2)\n\n describe 'retryDelayOptions', ->\n it 'defaults to \"base: 100\"', ->", + "repo_full_name": "aws/aws-sdk-js", + "discussion_comments": [ + { + "comment_id": "107227865", + "repo_full_name": "aws/aws-sdk-js", + "pr_number": 1418, + "pr_file": "test/config.spec.coffee", + "discussion_id": "107227865", + "commented_code": "@@ -60,8 +60,6 @@ describe 'AWS.Config', ->\n expect(configure(maxRetries: 2).maxRetries).to.equal(2)\n \n describe 'retryDelayOptions', ->\n- it 'defaults to \"base: 100\"', ->", + "comment_created_at": "2017-03-21T17:51:13+00:00", + "comment_author": "jeskew", + "comment_body": "Optional: you might want to add a test to service.spec.coffee verifying that a normal client still gets a retry base of 100 set.", + "pr_file_module": null + }, + { + "comment_id": "107261089", + "repo_full_name": "aws/aws-sdk-js", + "pr_number": 1418, + "pr_file": "test/config.spec.coffee", + "discussion_id": "107227865", + "commented_code": "@@ -60,8 +60,6 @@ describe 'AWS.Config', ->\n expect(configure(maxRetries: 2).maxRetries).to.equal(2)\n \n describe 'retryDelayOptions', ->\n- it 'defaults to \"base: 100\"', ->", + "comment_created_at": "2017-03-21T20:06:21+00:00", + "comment_author": "chrisradek", + "comment_body": "There is a separate test that implicitly tests this, but happy to add an explicit test.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "111287813", + "pr_number": 1391, + "pr_file": "test/credentials.spec.coffee", + "created_at": "2017-04-13T00:13:45+00:00", + "commented_code": "validateCredentials(creds)\n expect(AWS.util.readFileSync.calls[0].arguments[0]).to.match(/[\\/\\\\]home[\\/\\\\]user[\\/\\\\].aws[\\/\\\\]credentials/)\n\n it 'loads credentials from path defined in AWS_SHARED_CREDENTIALS_FILE if AWS_SDK_LOAD_CONFIG is set', ->\n process.env.AWS_SDK_LOAD_CONFIG = '1'\n process.env.AWS_SHARED_CREDENTIALS_FILE = '/path/to/aws/credentials'\n mock = '''\n [default]\n aws_access_key_id = akid\n aws_secret_access_key = secret\n aws_session_token = session\n '''\n helpers.spyOn(AWS.util, 'readFileSync').andReturn(mock)\n\n creds = new AWS.SharedIniFileCredentials()\n creds.get();\n validateCredentials(creds)\n expect(AWS.util.readFileSync.calls[0].arguments[0]).to.match(/[\\/\\\\]home[\\/\\\\]user[\\/\\\\].aws[\\/\\\\]config/)\n expect(AWS.util.readFileSync.calls[1].arguments[0]).to.equal(process.env.AWS_SHARED_CREDENTIALS_FILE)\n\n it 'loads credentials from ~/.aws/config if AWS_SDK_LOAD_CONFIG is set', ->", + "repo_full_name": "aws/aws-sdk-js", + "discussion_comments": [ + { + "comment_id": "111287813", + "repo_full_name": "aws/aws-sdk-js", + "pr_number": 1391, + "pr_file": "test/credentials.spec.coffee", + "discussion_id": "111287813", + "commented_code": "@@ -209,6 +233,53 @@ if AWS.util.isNode()\n validateCredentials(creds)\n expect(AWS.util.readFileSync.calls[0].arguments[0]).to.match(/[\\/\\\\]home[\\/\\\\]user[\\/\\\\].aws[\\/\\\\]credentials/)\n \n+ it 'loads credentials from path defined in AWS_SHARED_CREDENTIALS_FILE if AWS_SDK_LOAD_CONFIG is set', ->\n+ process.env.AWS_SDK_LOAD_CONFIG = '1'\n+ process.env.AWS_SHARED_CREDENTIALS_FILE = '/path/to/aws/credentials'\n+ mock = '''\n+ [default]\n+ aws_access_key_id = akid\n+ aws_secret_access_key = secret\n+ aws_session_token = session\n+ '''\n+ helpers.spyOn(AWS.util, 'readFileSync').andReturn(mock)\n+\n+ creds = new AWS.SharedIniFileCredentials()\n+ creds.get();\n+ validateCredentials(creds)\n+ expect(AWS.util.readFileSync.calls[0].arguments[0]).to.match(/[\\/\\\\]home[\\/\\\\]user[\\/\\\\].aws[\\/\\\\]config/)\n+ expect(AWS.util.readFileSync.calls[1].arguments[0]).to.equal(process.env.AWS_SHARED_CREDENTIALS_FILE)\n+\n+ it 'loads credentials from ~/.aws/config if AWS_SDK_LOAD_CONFIG is set', ->", + "comment_created_at": "2017-04-13T00:13:45+00:00", + "comment_author": "chrisradek", + "comment_body": "Can we add a test to make sure the credentials from `~/.aws/credentials` is used preferentially over the credentials in `~/.aws/config` if the same profile exists in both files?", + "pr_file_module": null + }, + { + "comment_id": "111301175", + "repo_full_name": "aws/aws-sdk-js", + "pr_number": 1391, + "pr_file": "test/credentials.spec.coffee", + "discussion_id": "111287813", + "commented_code": "@@ -209,6 +233,53 @@ if AWS.util.isNode()\n validateCredentials(creds)\n expect(AWS.util.readFileSync.calls[0].arguments[0]).to.match(/[\\/\\\\]home[\\/\\\\]user[\\/\\\\].aws[\\/\\\\]credentials/)\n \n+ it 'loads credentials from path defined in AWS_SHARED_CREDENTIALS_FILE if AWS_SDK_LOAD_CONFIG is set', ->\n+ process.env.AWS_SDK_LOAD_CONFIG = '1'\n+ process.env.AWS_SHARED_CREDENTIALS_FILE = '/path/to/aws/credentials'\n+ mock = '''\n+ [default]\n+ aws_access_key_id = akid\n+ aws_secret_access_key = secret\n+ aws_session_token = session\n+ '''\n+ helpers.spyOn(AWS.util, 'readFileSync').andReturn(mock)\n+\n+ creds = new AWS.SharedIniFileCredentials()\n+ creds.get();\n+ validateCredentials(creds)\n+ expect(AWS.util.readFileSync.calls[0].arguments[0]).to.match(/[\\/\\\\]home[\\/\\\\]user[\\/\\\\].aws[\\/\\\\]config/)\n+ expect(AWS.util.readFileSync.calls[1].arguments[0]).to.equal(process.env.AWS_SHARED_CREDENTIALS_FILE)\n+\n+ it 'loads credentials from ~/.aws/config if AWS_SDK_LOAD_CONFIG is set', ->", + "comment_created_at": "2017-04-13T02:38:43+00:00", + "comment_author": "jeskew", + "comment_body": "I'll add a test for that.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "111434683", + "pr_number": 1391, + "pr_file": "test/credentials.spec.coffee", + "created_at": "2017-04-13T16:46:30+00:00", + "commented_code": "expect(creds.expireTime).to.eql(new Date(0))\n done()\n\n it 'will assume a role from the credentials file whose source profile is defined in the config file', (done) ->\n process.env.AWS_SDK_LOAD_CONFIG = '1'\n helpers.spyOn(AWS.util, 'readFileSync').andCallFake (path) ->\n if (path.match(/[\\/\\\\]home[\\/\\\\]user[\\/\\\\].aws[\\/\\\\]credentials/))\n '''\n [default]\n aws_access_key_id = akid\n aws_secret_access_key = secret\n role_arn = arn\n source_profile = foo\n '''\n else\n '''\n [profile foo]\n aws_access_key_id = akid2\n aws_secret_access_key = secret2\n '''\n helpers.mockHttpResponse 200, {}, '''", + "repo_full_name": "aws/aws-sdk-js", + "discussion_comments": [ + { + "comment_id": "111434683", + "repo_full_name": "aws/aws-sdk-js", + "pr_number": 1391, + "pr_file": "test/credentials.spec.coffee", + "discussion_id": "111434683", + "commented_code": "@@ -370,6 +491,84 @@ if AWS.util.isNode()\n expect(creds.expireTime).to.eql(new Date(0))\n done()\n \n+ it 'will assume a role from the credentials file whose source profile is defined in the config file', (done) ->\n+ process.env.AWS_SDK_LOAD_CONFIG = '1'\n+ helpers.spyOn(AWS.util, 'readFileSync').andCallFake (path) ->\n+ if (path.match(/[\\/\\\\]home[\\/\\\\]user[\\/\\\\].aws[\\/\\\\]credentials/))\n+ '''\n+ [default]\n+ aws_access_key_id = akid\n+ aws_secret_access_key = secret\n+ role_arn = arn\n+ source_profile = foo\n+ '''\n+ else\n+ '''\n+ [profile foo]\n+ aws_access_key_id = akid2\n+ aws_secret_access_key = secret2\n+ '''\n+ helpers.mockHttpResponse 200, {}, '''", + "comment_created_at": "2017-04-13T16:46:30+00:00", + "comment_author": "chrisradek", + "comment_body": "I'm not sure this test is actually ensuring that the creds from `profile foo` are used instead of `default`.\r\n\r\nWhat do you think about spying on `AWS.STS` or `AWS.Credentials` to get the accessKeyId that was used as the source?", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/aws-sdk-js-test-configuration-precedence.md b/_reviewers/aws-sdk-js-test-configuration-precedence.md index c55f6f9..cbbe125 100644 --- a/_reviewers/aws-sdk-js-test-configuration-precedence.md +++ b/_reviewers/aws-sdk-js-test-configuration-precedence.md @@ -41,97 +41,3 @@ it('loads credentials from preferred source when available in multiple locations ``` Don't just verify that configuration loads successfully—explicitly test that the correct values from the highest-priority source are being used when the same configuration option exists in multiple places. - - -[ - { - "discussion_id": "107227865", - "pr_number": 1418, - "pr_file": "test/config.spec.coffee", - "created_at": "2017-03-21T17:51:13+00:00", - "commented_code": "expect(configure(maxRetries: 2).maxRetries).to.equal(2)\n\n describe 'retryDelayOptions', ->\n it 'defaults to \"base: 100\"', ->", - "repo_full_name": "aws/aws-sdk-js", - "discussion_comments": [ - { - "comment_id": "107227865", - "repo_full_name": "aws/aws-sdk-js", - "pr_number": 1418, - "pr_file": "test/config.spec.coffee", - "discussion_id": "107227865", - "commented_code": "@@ -60,8 +60,6 @@ describe 'AWS.Config', ->\n expect(configure(maxRetries: 2).maxRetries).to.equal(2)\n \n describe 'retryDelayOptions', ->\n- it 'defaults to \"base: 100\"', ->", - "comment_created_at": "2017-03-21T17:51:13+00:00", - "comment_author": "jeskew", - "comment_body": "Optional: you might want to add a test to service.spec.coffee verifying that a normal client still gets a retry base of 100 set.", - "pr_file_module": null - }, - { - "comment_id": "107261089", - "repo_full_name": "aws/aws-sdk-js", - "pr_number": 1418, - "pr_file": "test/config.spec.coffee", - "discussion_id": "107227865", - "commented_code": "@@ -60,8 +60,6 @@ describe 'AWS.Config', ->\n expect(configure(maxRetries: 2).maxRetries).to.equal(2)\n \n describe 'retryDelayOptions', ->\n- it 'defaults to \"base: 100\"', ->", - "comment_created_at": "2017-03-21T20:06:21+00:00", - "comment_author": "chrisradek", - "comment_body": "There is a separate test that implicitly tests this, but happy to add an explicit test.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "111287813", - "pr_number": 1391, - "pr_file": "test/credentials.spec.coffee", - "created_at": "2017-04-13T00:13:45+00:00", - "commented_code": "validateCredentials(creds)\n expect(AWS.util.readFileSync.calls[0].arguments[0]).to.match(/[\\/\\\\]home[\\/\\\\]user[\\/\\\\].aws[\\/\\\\]credentials/)\n\n it 'loads credentials from path defined in AWS_SHARED_CREDENTIALS_FILE if AWS_SDK_LOAD_CONFIG is set', ->\n process.env.AWS_SDK_LOAD_CONFIG = '1'\n process.env.AWS_SHARED_CREDENTIALS_FILE = '/path/to/aws/credentials'\n mock = '''\n [default]\n aws_access_key_id = akid\n aws_secret_access_key = secret\n aws_session_token = session\n '''\n helpers.spyOn(AWS.util, 'readFileSync').andReturn(mock)\n\n creds = new AWS.SharedIniFileCredentials()\n creds.get();\n validateCredentials(creds)\n expect(AWS.util.readFileSync.calls[0].arguments[0]).to.match(/[\\/\\\\]home[\\/\\\\]user[\\/\\\\].aws[\\/\\\\]config/)\n expect(AWS.util.readFileSync.calls[1].arguments[0]).to.equal(process.env.AWS_SHARED_CREDENTIALS_FILE)\n\n it 'loads credentials from ~/.aws/config if AWS_SDK_LOAD_CONFIG is set', ->", - "repo_full_name": "aws/aws-sdk-js", - "discussion_comments": [ - { - "comment_id": "111287813", - "repo_full_name": "aws/aws-sdk-js", - "pr_number": 1391, - "pr_file": "test/credentials.spec.coffee", - "discussion_id": "111287813", - "commented_code": "@@ -209,6 +233,53 @@ if AWS.util.isNode()\n validateCredentials(creds)\n expect(AWS.util.readFileSync.calls[0].arguments[0]).to.match(/[\\/\\\\]home[\\/\\\\]user[\\/\\\\].aws[\\/\\\\]credentials/)\n \n+ it 'loads credentials from path defined in AWS_SHARED_CREDENTIALS_FILE if AWS_SDK_LOAD_CONFIG is set', ->\n+ process.env.AWS_SDK_LOAD_CONFIG = '1'\n+ process.env.AWS_SHARED_CREDENTIALS_FILE = '/path/to/aws/credentials'\n+ mock = '''\n+ [default]\n+ aws_access_key_id = akid\n+ aws_secret_access_key = secret\n+ aws_session_token = session\n+ '''\n+ helpers.spyOn(AWS.util, 'readFileSync').andReturn(mock)\n+\n+ creds = new AWS.SharedIniFileCredentials()\n+ creds.get();\n+ validateCredentials(creds)\n+ expect(AWS.util.readFileSync.calls[0].arguments[0]).to.match(/[\\/\\\\]home[\\/\\\\]user[\\/\\\\].aws[\\/\\\\]config/)\n+ expect(AWS.util.readFileSync.calls[1].arguments[0]).to.equal(process.env.AWS_SHARED_CREDENTIALS_FILE)\n+\n+ it 'loads credentials from ~/.aws/config if AWS_SDK_LOAD_CONFIG is set', ->", - "comment_created_at": "2017-04-13T00:13:45+00:00", - "comment_author": "chrisradek", - "comment_body": "Can we add a test to make sure the credentials from `~/.aws/credentials` is used preferentially over the credentials in `~/.aws/config` if the same profile exists in both files?", - "pr_file_module": null - }, - { - "comment_id": "111301175", - "repo_full_name": "aws/aws-sdk-js", - "pr_number": 1391, - "pr_file": "test/credentials.spec.coffee", - "discussion_id": "111287813", - "commented_code": "@@ -209,6 +233,53 @@ if AWS.util.isNode()\n validateCredentials(creds)\n expect(AWS.util.readFileSync.calls[0].arguments[0]).to.match(/[\\/\\\\]home[\\/\\\\]user[\\/\\\\].aws[\\/\\\\]credentials/)\n \n+ it 'loads credentials from path defined in AWS_SHARED_CREDENTIALS_FILE if AWS_SDK_LOAD_CONFIG is set', ->\n+ process.env.AWS_SDK_LOAD_CONFIG = '1'\n+ process.env.AWS_SHARED_CREDENTIALS_FILE = '/path/to/aws/credentials'\n+ mock = '''\n+ [default]\n+ aws_access_key_id = akid\n+ aws_secret_access_key = secret\n+ aws_session_token = session\n+ '''\n+ helpers.spyOn(AWS.util, 'readFileSync').andReturn(mock)\n+\n+ creds = new AWS.SharedIniFileCredentials()\n+ creds.get();\n+ validateCredentials(creds)\n+ expect(AWS.util.readFileSync.calls[0].arguments[0]).to.match(/[\\/\\\\]home[\\/\\\\]user[\\/\\\\].aws[\\/\\\\]config/)\n+ expect(AWS.util.readFileSync.calls[1].arguments[0]).to.equal(process.env.AWS_SHARED_CREDENTIALS_FILE)\n+\n+ it 'loads credentials from ~/.aws/config if AWS_SDK_LOAD_CONFIG is set', ->", - "comment_created_at": "2017-04-13T02:38:43+00:00", - "comment_author": "jeskew", - "comment_body": "I'll add a test for that.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "111434683", - "pr_number": 1391, - "pr_file": "test/credentials.spec.coffee", - "created_at": "2017-04-13T16:46:30+00:00", - "commented_code": "expect(creds.expireTime).to.eql(new Date(0))\n done()\n\n it 'will assume a role from the credentials file whose source profile is defined in the config file', (done) ->\n process.env.AWS_SDK_LOAD_CONFIG = '1'\n helpers.spyOn(AWS.util, 'readFileSync').andCallFake (path) ->\n if (path.match(/[\\/\\\\]home[\\/\\\\]user[\\/\\\\].aws[\\/\\\\]credentials/))\n '''\n [default]\n aws_access_key_id = akid\n aws_secret_access_key = secret\n role_arn = arn\n source_profile = foo\n '''\n else\n '''\n [profile foo]\n aws_access_key_id = akid2\n aws_secret_access_key = secret2\n '''\n helpers.mockHttpResponse 200, {}, '''", - "repo_full_name": "aws/aws-sdk-js", - "discussion_comments": [ - { - "comment_id": "111434683", - "repo_full_name": "aws/aws-sdk-js", - "pr_number": 1391, - "pr_file": "test/credentials.spec.coffee", - "discussion_id": "111434683", - "commented_code": "@@ -370,6 +491,84 @@ if AWS.util.isNode()\n expect(creds.expireTime).to.eql(new Date(0))\n done()\n \n+ it 'will assume a role from the credentials file whose source profile is defined in the config file', (done) ->\n+ process.env.AWS_SDK_LOAD_CONFIG = '1'\n+ helpers.spyOn(AWS.util, 'readFileSync').andCallFake (path) ->\n+ if (path.match(/[\\/\\\\]home[\\/\\\\]user[\\/\\\\].aws[\\/\\\\]credentials/))\n+ '''\n+ [default]\n+ aws_access_key_id = akid\n+ aws_secret_access_key = secret\n+ role_arn = arn\n+ source_profile = foo\n+ '''\n+ else\n+ '''\n+ [profile foo]\n+ aws_access_key_id = akid2\n+ aws_secret_access_key = secret2\n+ '''\n+ helpers.mockHttpResponse 200, {}, '''", - "comment_created_at": "2017-04-13T16:46:30+00:00", - "comment_author": "chrisradek", - "comment_body": "I'm not sure this test is actually ensuring that the creds from `profile foo` are used instead of `default`.\r\n\r\nWhat do you think about spying on `AWS.STS` or `AWS.Credentials` to get the accessKeyId that was used as the source?", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/aws-sdk-js-validate-configurations-with-clarity.json b/_reviewers/aws-sdk-js-validate-configurations-with-clarity.json new file mode 100644 index 0000000..e0ab3cf --- /dev/null +++ b/_reviewers/aws-sdk-js-validate-configurations-with-clarity.json @@ -0,0 +1,68 @@ +[ + { + "discussion_id": "227568468", + "pr_number": 2253, + "pr_file": "lib/discover_endpoint.js", + "created_at": "2018-10-23T21:18:49+00:00", + "commented_code": "var AWS = require('./core');\nvar util = require('./util');\nvar ENDPOINT_OPERATION_MAX_RETRIES = 60;\nvar endpointDiscoveryEnabledEnv = 'AWS_ENABLE_ENDPOINT_DISCOVERY';\n\n/**\n * Generate key to index the endpoints in the cache\n * @return [map] object with keys to index endpoints.\n * @api private\n */\nfunction getCacheKey(request) {\n var service = request.service;\n var api = service.api || {};\n var operations = api.operations;\n var identifiers = {};\n if (operations[request.operation] && operations[request.operation].name) {\n identifiers.operation = operations[request.operation].name;\n }\n if (service.config.region) {\n identifiers.region = service.config.region;\n }\n if (api.serviceId) {\n identifiers.serviceId = api.serviceId\n }\n if (service.config.credentials.accessKeyId) {\n identifiers.accessKeyId = service.config.credentials.accessKeyId\n }\n return identifiers;\n}\n\n/**\n * @api private\n */\nfunction marshallCustomeIdentifiersHelper(result, params, shape) {\n if (!shape || params === undefined || params === null) return;\n if (shape.type === 'structure' && shape.required && shape.required.length > 0) {\n util.arrayEach(shape.required, function(name) {\n var memberShape = shape.members[name];\n if (memberShape.endpointDiscoveryId === true) {\n var locationName = memberShape.isLocationName ? memberShape.name : name;\n result[locationName] = String(params[name]);\n } else {\n marshallCustomeIdentifiersHelper(result, params[name], memberShape);\n }\n });\n }\n}\n\n/**\n * Get customized cache key according to the 'endpointDiscoveryId' trait.\n * @param [object] request object\n * @param [object] input shape of the given operation's api\n */\nfunction marshallCustomeIdentifiers(request, shape) {\n var identifiers = {};\n marshallCustomeIdentifiersHelper(identifiers, request.params, shape);\n return identifiers;\n}\n\n/**\n * Call endpoint discovery operation when it's optional.\n * When endpoint is available in cache then use the cached endpoints. If endpoints\n * are unavailable then use regional endpoints and call endpoint discovery operation\n * asynchronously. This is turned off by default.\n * @param [object] request object\n * @api private\n */\nfunction optionalDisverEndpoint(request) {\n var service = request.service;\n var api = service.api;\n var operationModel = api.operations ? api.operations[request.operation] : undefined;\n var inputShape = operationModel ? operationModel.input : undefined;\n\n var identifiers = marshallCustomeIdentifiers(request, inputShape);\n var cacheKey = getCacheKey(request);\n cacheKey = util.update(cacheKey, identifiers);\n var endpoints = AWS.endpointCache.get(cacheKey);\n if (endpoints && endpoints.length > 0) {\n //found endpoint record from cache\n request.httpRequest.updateEndpoint(endpoints[0].Address);\n } else if (endpoints && endpoints.length === 0) {\n //endpoint operation is being made but response not yet received\n return;\n } else {\n //endpoint record not in cache. make discovery operation\n var endpointRequest = service.makeRequest(api.endpointOperation, {\n Operation: operationModel.name,\n Identifiers: identifiers,\n });\n addApiVersionHeader(endpointRequest);\n endpointRequest.removeListener('validate', AWS.EventListeners.Core.VALIDATE_PARAMETERS);\n endpointRequest.removeListener('retry', AWS.EventListeners.Core.RETRY_CHECK);\n var toHead = true;\n //if endpoint operation failed, keep retrying every minute.\n endpointRequest.on('retry', function(resp) {\n var err = resp.error;\n err.retryable = true;\n err.retryDelay = 60000;\n resp.maxRetries = ENDPOINT_OPERATION_MAX_RETRIES;\n }, toHead);\n //put in a placeholder for endpoints already requested, prevent\n //too much in-flight calls\n AWS.endpointCache.put(cacheKey, []);\n endpointRequest.send(function(err, data) {\n if (data && data.Endpoints) {\n AWS.endpointCache.put(cacheKey, data.Endpoints);\n } else if (err) {\n AWS.endpointCache.remove(cacheKey);\n }\n });\n }\n}\n\n/**\n * Call endpoint discovery operation when it's required.\n * When endpoint is available in cache then use cached ones. If endpoins are\n * unavailable then SDK should call endpoint operation end use returned new\n * endpoint for the api call. SDK will automatically attempt to do endpoint\n * discovery regardless the configuration.\n * @param [object] request object\n * @api private\n */\nfunction requiredDiscoverEndpoint(request, done) {\n var service = request.service;\n var api = service.api;\n var operationModel = api.operations ? api.operations[request.operation] : undefined;\n var inputShape = operationModel ? operationModel.input : undefined;\n\n var identifiers = marshallCustomeIdentifiers(request, inputShape);\n var cacheKey = getCacheKey(request);\n cacheKey = util.update(cacheKey, identifiers);\n var endpoints = AWS.endpointCache.get(cacheKey);\n if (endpoints === undefined || endpoints.length === 0) {\n var endpointRequest = service.makeRequest(api.endpointOperation, {\n Operation: operationModel.name,\n Identifiers: identifiers,\n });\n endpointRequest.removeListener('validate', AWS.EventListeners.Core.VALIDATE_PARAMETERS);\n addApiVersionHeader(endpointRequest);\n endpointRequest.send(function(err, data) {\n if (err) {\n request.response.error = util.error(err, {\n code: 'EndpointDiscoveryException',\n message: 'Request cannot be fulfilled without specifying an endpoint',\n retryable: false\n });\n } else if (data) {\n AWS.endpointCache.put(cacheKey, data.Endpoints);\n request.httpRequest.updateEndpoint(data.Endpoints[0].Address);\n }\n done();\n });\n } else {\n request.httpRequest.updateEndpoint(endpoints[0].Address);\n done();\n }\n}\n\n/**\n * add api version header to endpoint operation\n * @api private\n */\nfunction addApiVersionHeader(endpointRequest) {\n var api = endpointRequest.service.api;\n var apiVersion = api.apiVersion;\n if (apiVersion && !endpointRequest.httpRequest.headers['x-amz-api-version']) {\n endpointRequest.httpRequest.headers['x-amz-api-version'] = apiVersion;\n }\n}\n\n/**\n * If api call gets invalid endoint exception, SDK should attempt to remove the invalid\n * endpoint from cache.\n * @api private\n */\nfunction invalidateCachedEndpoints(response) {\n var error = response.error;\n var httpResponse = response.httpResponse;\n if (error &&\n (error.code === 'InvalidEndpointException' || httpResponse.statusCode === 421)\n ) {\n var request = response.request;\n var operations = request.service.api.operations || {};\n var inputShape = operations[request.operation] ? operations[request.operation].input : undefined;\n var identifiers = marshallCustomeIdentifiers(request, inputShape);\n var cacheKey = getCacheKey(request);\n cacheKey = util.update(cacheKey, identifiers);\n AWS.endpointCache.remove(cacheKey);\n }\n}\n\n/**\n * If endpoint is explicitly configured, SDK should not do endpoint discovery in anytime.\n * @param [object] clien Service client object.\n * @api private\n */\nfunction hasCustomEndpoint(client) {\n //if set endpoint is set for specific client, enable endpoint discovery will raise an error.\n if (client._originalConfig && client._originalConfig.endpoint && client._originalConfig.endpointDiscoveryEnabled === true) {\n throw util.error(new Error(), {\n code: 'ConfigurationException',\n message: 'If custome endpoint is supplied, endpointDiscoverEnabled should be turned off.'\n });\n };\n var svcConfig = AWS.config[client.serviceIdentifier] || {};\n return AWS.config.endpoint || svcConfig.endpoint || (client._originalConfig && client._originalConfig.endpoint);\n}\n\n/**\n * @api private\n */\nfunction isFalsey(value) {\n return ['false', '0'].indexOf(value) >= 0;\n}\n\n/**\n * If endpoint discovery should perform for this request when endpoint discovery is optional.\n * SDK performs config resolution in order like below:\n * 1. If turned on client configuration(default to off) then turn on endpoint discovery.\n * 2. If turned on in env AWS_ENABLE_ENDPOINT_DISCOVERY then turn on endpoint discovery.\n * 3. If turned on in shared ini config file with key 'endpoint_discovery_enabled', then\n * turn on endpoint discovery.\n * @param [object] request request object.\n * @api private\n */\nfunction isEndpointDiscoveryApplicable(request) {\n var service = request.service || {};\n if (service.config.endpointDiscoveryEnabled === true) return true;\n if (Object.prototype.hasOwnProperty.call(process.env, endpointDiscoveryEnabledEnv)) {\n if (process.env[endpointDiscoveryEnabledEnv] === undefined) {", + "repo_full_name": "aws/aws-sdk-js", + "discussion_comments": [ + { + "comment_id": "227568468", + "repo_full_name": "aws/aws-sdk-js", + "pr_number": 2253, + "pr_file": "lib/discover_endpoint.js", + "discussion_id": "227568468", + "commented_code": "@@ -0,0 +1,305 @@\n+var AWS = require('./core');\n+var util = require('./util');\n+var ENDPOINT_OPERATION_MAX_RETRIES = 60;\n+var endpointDiscoveryEnabledEnv = 'AWS_ENABLE_ENDPOINT_DISCOVERY';\n+\n+/**\n+ * Generate key to index the endpoints in the cache\n+ * @return [map] object with keys to index endpoints.\n+ * @api private\n+ */\n+function getCacheKey(request) {\n+ var service = request.service;\n+ var api = service.api || {};\n+ var operations = api.operations;\n+ var identifiers = {};\n+ if (operations[request.operation] && operations[request.operation].name) {\n+ identifiers.operation = operations[request.operation].name;\n+ }\n+ if (service.config.region) {\n+ identifiers.region = service.config.region;\n+ }\n+ if (api.serviceId) {\n+ identifiers.serviceId = api.serviceId\n+ }\n+ if (service.config.credentials.accessKeyId) {\n+ identifiers.accessKeyId = service.config.credentials.accessKeyId\n+ }\n+ return identifiers;\n+}\n+\n+/**\n+ * @api private\n+ */\n+function marshallCustomeIdentifiersHelper(result, params, shape) {\n+ if (!shape || params === undefined || params === null) return;\n+ if (shape.type === 'structure' && shape.required && shape.required.length > 0) {\n+ util.arrayEach(shape.required, function(name) {\n+ var memberShape = shape.members[name];\n+ if (memberShape.endpointDiscoveryId === true) {\n+ var locationName = memberShape.isLocationName ? memberShape.name : name;\n+ result[locationName] = String(params[name]);\n+ } else {\n+ marshallCustomeIdentifiersHelper(result, params[name], memberShape);\n+ }\n+ });\n+ }\n+}\n+\n+/**\n+ * Get customized cache key according to the 'endpointDiscoveryId' trait.\n+ * @param [object] request object\n+ * @param [object] input shape of the given operation's api\n+ */\n+function marshallCustomeIdentifiers(request, shape) {\n+ var identifiers = {};\n+ marshallCustomeIdentifiersHelper(identifiers, request.params, shape);\n+ return identifiers;\n+}\n+\n+/**\n+ * Call endpoint discovery operation when it's optional.\n+ * When endpoint is available in cache then use the cached endpoints. If endpoints\n+ * are unavailable then use regional endpoints and call endpoint discovery operation\n+ * asynchronously. This is turned off by default.\n+ * @param [object] request object\n+ * @api private\n+ */\n+function optionalDisverEndpoint(request) {\n+ var service = request.service;\n+ var api = service.api;\n+ var operationModel = api.operations ? api.operations[request.operation] : undefined;\n+ var inputShape = operationModel ? operationModel.input : undefined;\n+\n+ var identifiers = marshallCustomeIdentifiers(request, inputShape);\n+ var cacheKey = getCacheKey(request);\n+ cacheKey = util.update(cacheKey, identifiers);\n+ var endpoints = AWS.endpointCache.get(cacheKey);\n+ if (endpoints && endpoints.length > 0) {\n+ //found endpoint record from cache\n+ request.httpRequest.updateEndpoint(endpoints[0].Address);\n+ } else if (endpoints && endpoints.length === 0) {\n+ //endpoint operation is being made but response not yet received\n+ return;\n+ } else {\n+ //endpoint record not in cache. make discovery operation\n+ var endpointRequest = service.makeRequest(api.endpointOperation, {\n+ Operation: operationModel.name,\n+ Identifiers: identifiers,\n+ });\n+ addApiVersionHeader(endpointRequest);\n+ endpointRequest.removeListener('validate', AWS.EventListeners.Core.VALIDATE_PARAMETERS);\n+ endpointRequest.removeListener('retry', AWS.EventListeners.Core.RETRY_CHECK);\n+ var toHead = true;\n+ //if endpoint operation failed, keep retrying every minute.\n+ endpointRequest.on('retry', function(resp) {\n+ var err = resp.error;\n+ err.retryable = true;\n+ err.retryDelay = 60000;\n+ resp.maxRetries = ENDPOINT_OPERATION_MAX_RETRIES;\n+ }, toHead);\n+ //put in a placeholder for endpoints already requested, prevent\n+ //too much in-flight calls\n+ AWS.endpointCache.put(cacheKey, []);\n+ endpointRequest.send(function(err, data) {\n+ if (data && data.Endpoints) {\n+ AWS.endpointCache.put(cacheKey, data.Endpoints);\n+ } else if (err) {\n+ AWS.endpointCache.remove(cacheKey);\n+ }\n+ });\n+ }\n+}\n+\n+/**\n+ * Call endpoint discovery operation when it's required.\n+ * When endpoint is available in cache then use cached ones. If endpoins are\n+ * unavailable then SDK should call endpoint operation end use returned new\n+ * endpoint for the api call. SDK will automatically attempt to do endpoint\n+ * discovery regardless the configuration.\n+ * @param [object] request object\n+ * @api private\n+ */\n+function requiredDiscoverEndpoint(request, done) {\n+ var service = request.service;\n+ var api = service.api;\n+ var operationModel = api.operations ? api.operations[request.operation] : undefined;\n+ var inputShape = operationModel ? operationModel.input : undefined;\n+\n+ var identifiers = marshallCustomeIdentifiers(request, inputShape);\n+ var cacheKey = getCacheKey(request);\n+ cacheKey = util.update(cacheKey, identifiers);\n+ var endpoints = AWS.endpointCache.get(cacheKey);\n+ if (endpoints === undefined || endpoints.length === 0) {\n+ var endpointRequest = service.makeRequest(api.endpointOperation, {\n+ Operation: operationModel.name,\n+ Identifiers: identifiers,\n+ });\n+ endpointRequest.removeListener('validate', AWS.EventListeners.Core.VALIDATE_PARAMETERS);\n+ addApiVersionHeader(endpointRequest);\n+ endpointRequest.send(function(err, data) {\n+ if (err) {\n+ request.response.error = util.error(err, {\n+ code: 'EndpointDiscoveryException',\n+ message: 'Request cannot be fulfilled without specifying an endpoint',\n+ retryable: false\n+ });\n+ } else if (data) {\n+ AWS.endpointCache.put(cacheKey, data.Endpoints);\n+ request.httpRequest.updateEndpoint(data.Endpoints[0].Address);\n+ }\n+ done();\n+ });\n+ } else {\n+ request.httpRequest.updateEndpoint(endpoints[0].Address);\n+ done();\n+ }\n+}\n+\n+/**\n+ * add api version header to endpoint operation\n+ * @api private\n+ */\n+function addApiVersionHeader(endpointRequest) {\n+ var api = endpointRequest.service.api;\n+ var apiVersion = api.apiVersion;\n+ if (apiVersion && !endpointRequest.httpRequest.headers['x-amz-api-version']) {\n+ endpointRequest.httpRequest.headers['x-amz-api-version'] = apiVersion;\n+ }\n+}\n+\n+/**\n+ * If api call gets invalid endoint exception, SDK should attempt to remove the invalid\n+ * endpoint from cache.\n+ * @api private\n+ */\n+function invalidateCachedEndpoints(response) {\n+ var error = response.error;\n+ var httpResponse = response.httpResponse;\n+ if (error &&\n+ (error.code === 'InvalidEndpointException' || httpResponse.statusCode === 421)\n+ ) {\n+ var request = response.request;\n+ var operations = request.service.api.operations || {};\n+ var inputShape = operations[request.operation] ? operations[request.operation].input : undefined;\n+ var identifiers = marshallCustomeIdentifiers(request, inputShape);\n+ var cacheKey = getCacheKey(request);\n+ cacheKey = util.update(cacheKey, identifiers);\n+ AWS.endpointCache.remove(cacheKey);\n+ }\n+}\n+\n+/**\n+ * If endpoint is explicitly configured, SDK should not do endpoint discovery in anytime.\n+ * @param [object] clien Service client object.\n+ * @api private\n+ */\n+function hasCustomEndpoint(client) {\n+ //if set endpoint is set for specific client, enable endpoint discovery will raise an error.\n+ if (client._originalConfig && client._originalConfig.endpoint && client._originalConfig.endpointDiscoveryEnabled === true) {\n+ throw util.error(new Error(), {\n+ code: 'ConfigurationException',\n+ message: 'If custome endpoint is supplied, endpointDiscoverEnabled should be turned off.'\n+ });\n+ };\n+ var svcConfig = AWS.config[client.serviceIdentifier] || {};\n+ return AWS.config.endpoint || svcConfig.endpoint || (client._originalConfig && client._originalConfig.endpoint);\n+}\n+\n+/**\n+ * @api private\n+ */\n+function isFalsey(value) {\n+ return ['false', '0'].indexOf(value) >= 0;\n+}\n+\n+/**\n+ * If endpoint discovery should perform for this request when endpoint discovery is optional.\n+ * SDK performs config resolution in order like below:\n+ * 1. If turned on client configuration(default to off) then turn on endpoint discovery.\n+ * 2. If turned on in env AWS_ENABLE_ENDPOINT_DISCOVERY then turn on endpoint discovery.\n+ * 3. If turned on in shared ini config file with key 'endpoint_discovery_enabled', then\n+ * turn on endpoint discovery.\n+ * @param [object] request request object.\n+ * @api private\n+ */\n+function isEndpointDiscoveryApplicable(request) {\n+ var service = request.service || {};\n+ if (service.config.endpointDiscoveryEnabled === true) return true;\n+ if (Object.prototype.hasOwnProperty.call(process.env, endpointDiscoveryEnabledEnv)) {\n+ if (process.env[endpointDiscoveryEnabledEnv] === undefined) {", + "comment_created_at": "2018-10-23T21:18:49+00:00", + "comment_author": "chrisradek", + "comment_body": "Is it possible to have an environment variable that is not a string in node.js?\r\nI haven't tried in Windows, but on MacOS, that doesn't seem possible.\r\n```bash\r\nFOO=bar node -e \"console.log(process.env)\"\r\n# FOO: 'bar'\r\n```\r\n```bash\r\nFOO= node -e \"console.log(process.env)\"\r\n# FOO: ''\r\n```\r\n\r\n```bash\r\nnode -e \"process.env['FOO'] = void 0; console.log(process.env)\"\r\n# FOO: 'undefined'\r\n```\r\n\r\nEven trying to set it to undefined within the process still results in the variable being a string. If you want to throw an error when this variable is set to nothing, you might want to instead check if it is an empty string.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "227596555", + "pr_number": 2253, + "pr_file": "lib/discover_endpoint.js", + "created_at": "2018-10-23T23:12:33+00:00", + "commented_code": "var AWS = require('./core');\nvar util = require('./util');\nvar ENDPOINT_OPERATION_MAX_RETRIES = 60;\nvar endpointDiscoveryEnabledEnv = 'AWS_ENABLE_ENDPOINT_DISCOVERY';\n\n/**\n * Generate key to index the endpoints in the cache\n * @return [map] object with keys to index endpoints.\n * @api private\n */\nfunction getCacheKey(request) {\n var service = request.service;\n var api = service.api || {};\n var operations = api.operations;\n var identifiers = {};\n if (operations[request.operation] && operations[request.operation].name) {\n identifiers.operation = operations[request.operation].name;\n }\n if (service.config.region) {\n identifiers.region = service.config.region;\n }\n if (api.serviceId) {\n identifiers.serviceId = api.serviceId\n }\n if (service.config.credentials.accessKeyId) {\n identifiers.accessKeyId = service.config.credentials.accessKeyId\n }\n return identifiers;\n}\n\n/**\n * @api private\n */\nfunction marshallCustomeIdentifiersHelper(result, params, shape) {\n if (!shape || params === undefined || params === null) return;\n if (shape.type === 'structure' && shape.required && shape.required.length > 0) {\n util.arrayEach(shape.required, function(name) {\n var memberShape = shape.members[name];\n if (memberShape.endpointDiscoveryId === true) {\n var locationName = memberShape.isLocationName ? memberShape.name : name;\n result[locationName] = String(params[name]);\n } else {\n marshallCustomeIdentifiersHelper(result, params[name], memberShape);\n }\n });\n }\n}\n\n/**\n * Get customized cache key according to the 'endpointDiscoveryId' trait.\n * @param [object] request object\n * @param [object] input shape of the given operation's api\n */\nfunction marshallCustomeIdentifiers(request, shape) {\n var identifiers = {};\n marshallCustomeIdentifiersHelper(identifiers, request.params, shape);\n return identifiers;\n}\n\n/**\n * Call endpoint discovery operation when it's optional.\n * When endpoint is available in cache then use the cached endpoints. If endpoints\n * are unavailable then use regional endpoints and call endpoint discovery operation\n * asynchronously. This is turned off by default.\n * @param [object] request object\n * @api private\n */\nfunction optionalDisverEndpoint(request) {\n var service = request.service;\n var api = service.api;\n var operationModel = api.operations ? api.operations[request.operation] : undefined;\n var inputShape = operationModel ? operationModel.input : undefined;\n\n var identifiers = marshallCustomeIdentifiers(request, inputShape);\n var cacheKey = getCacheKey(request);\n cacheKey = util.update(cacheKey, identifiers);\n var endpoints = AWS.endpointCache.get(cacheKey);\n if (endpoints && endpoints.length > 0) {\n //found endpoint record from cache\n request.httpRequest.updateEndpoint(endpoints[0].Address);\n } else if (endpoints && endpoints.length === 0) {\n //endpoint operation is being made but response not yet received\n return;\n } else {\n //endpoint record not in cache. make discovery operation\n var endpointRequest = service.makeRequest(api.endpointOperation, {\n Operation: operationModel.name,\n Identifiers: identifiers,\n });\n addApiVersionHeader(endpointRequest);\n endpointRequest.removeListener('validate', AWS.EventListeners.Core.VALIDATE_PARAMETERS);\n endpointRequest.removeListener('retry', AWS.EventListeners.Core.RETRY_CHECK);\n var toHead = true;\n //if endpoint operation failed, keep retrying every minute.\n endpointRequest.on('retry', function(resp) {\n var err = resp.error;\n err.retryable = true;\n err.retryDelay = 60000;\n resp.maxRetries = ENDPOINT_OPERATION_MAX_RETRIES;\n }, toHead);\n //put in a placeholder for endpoints already requested, prevent\n //too much in-flight calls\n AWS.endpointCache.put(cacheKey, []);\n endpointRequest.send(function(err, data) {\n if (data && data.Endpoints) {\n AWS.endpointCache.put(cacheKey, data.Endpoints);\n } else if (err) {\n AWS.endpointCache.remove(cacheKey);\n }\n });\n }\n}\n\n/**\n * Call endpoint discovery operation when it's required.\n * When endpoint is available in cache then use cached ones. If endpoins are\n * unavailable then SDK should call endpoint operation end use returned new\n * endpoint for the api call. SDK will automatically attempt to do endpoint\n * discovery regardless the configuration.\n * @param [object] request object\n * @api private\n */\nfunction requiredDiscoverEndpoint(request, done) {\n var service = request.service;\n var api = service.api;\n var operationModel = api.operations ? api.operations[request.operation] : undefined;\n var inputShape = operationModel ? operationModel.input : undefined;\n\n var identifiers = marshallCustomeIdentifiers(request, inputShape);\n var cacheKey = getCacheKey(request);\n cacheKey = util.update(cacheKey, identifiers);\n var endpoints = AWS.endpointCache.get(cacheKey);\n if (endpoints === undefined || endpoints.length === 0) {\n var endpointRequest = service.makeRequest(api.endpointOperation, {\n Operation: operationModel.name,\n Identifiers: identifiers,\n });\n endpointRequest.removeListener('validate', AWS.EventListeners.Core.VALIDATE_PARAMETERS);\n addApiVersionHeader(endpointRequest);\n endpointRequest.send(function(err, data) {\n if (err) {\n request.response.error = util.error(err, {\n code: 'EndpointDiscoveryException',\n message: 'Request cannot be fulfilled without specifying an endpoint',\n retryable: false\n });\n } else if (data) {\n AWS.endpointCache.put(cacheKey, data.Endpoints);\n request.httpRequest.updateEndpoint(data.Endpoints[0].Address);\n }\n done();\n });\n } else {\n request.httpRequest.updateEndpoint(endpoints[0].Address);\n done();\n }\n}\n\n/**\n * add api version header to endpoint operation\n * @api private\n */\nfunction addApiVersionHeader(endpointRequest) {\n var api = endpointRequest.service.api;\n var apiVersion = api.apiVersion;\n if (apiVersion && !endpointRequest.httpRequest.headers['x-amz-api-version']) {\n endpointRequest.httpRequest.headers['x-amz-api-version'] = apiVersion;\n }\n}\n\n/**\n * If api call gets invalid endoint exception, SDK should attempt to remove the invalid\n * endpoint from cache.\n * @api private\n */\nfunction invalidateCachedEndpoints(response) {\n var error = response.error;\n var httpResponse = response.httpResponse;\n if (error &&\n (error.code === 'InvalidEndpointException' || httpResponse.statusCode === 421)\n ) {\n var request = response.request;\n var operations = request.service.api.operations || {};\n var inputShape = operations[request.operation] ? operations[request.operation].input : undefined;\n var identifiers = marshallCustomeIdentifiers(request, inputShape);\n var cacheKey = getCacheKey(request);\n cacheKey = util.update(cacheKey, identifiers);\n AWS.endpointCache.remove(cacheKey);\n }\n}\n\n/**\n * If endpoint is explicitly configured, SDK should not do endpoint discovery in anytime.\n * @param [object] clien Service client object.\n * @api private\n */\nfunction hasCustomEndpoint(client) {\n //if set endpoint is set for specific client, enable endpoint discovery will raise an error.\n if (client._originalConfig && client._originalConfig.endpoint && client._originalConfig.endpointDiscoveryEnabled === true) {\n throw util.error(new Error(), {\n code: 'ConfigurationException',\n message: 'If custome endpoint is supplied, endpointDiscoverEnabled should be turned off.'\n });\n };\n var svcConfig = AWS.config[client.serviceIdentifier] || {};\n return AWS.config.endpoint || svcConfig.endpoint || (client._originalConfig && client._originalConfig.endpoint);\n}\n\n/**\n * @api private\n */\nfunction isFalsey(value) {\n return ['false', '0'].indexOf(value) >= 0;\n}\n\n/**\n * If endpoint discovery should perform for this request when endpoint discovery is optional.\n * SDK performs config resolution in order like below:\n * 1. If turned on client configuration(default to off) then turn on endpoint discovery.\n * 2. If turned on in env AWS_ENABLE_ENDPOINT_DISCOVERY then turn on endpoint discovery.\n * 3. If turned on in shared ini config file with key 'endpoint_discovery_enabled', then\n * turn on endpoint discovery.\n * @param [object] request request object.\n * @api private\n */\nfunction isEndpointDiscoveryApplicable(request) {\n var service = request.service || {};\n if (service.config.endpointDiscoveryEnabled === true) return true;\n if (Object.prototype.hasOwnProperty.call(process.env, endpointDiscoveryEnabledEnv)) {\n if (process.env[endpointDiscoveryEnabledEnv] === undefined) {\n throw util.error(new Error(), {\n code: 'ConfigurationException',\n message: 'environmental variable AWS_ENABLE_ENDPOINT_DISCOVERY cannot be set to nothing'\n });\n }\n if (!isFalsey(process.env[endpointDiscoveryEnabledEnv])) return true;\n }\n //shared ini file is only available in Node\n if (util.isBrowser()) return false;", + "repo_full_name": "aws/aws-sdk-js", + "discussion_comments": [ + { + "comment_id": "227596555", + "repo_full_name": "aws/aws-sdk-js", + "pr_number": 2253, + "pr_file": "lib/discover_endpoint.js", + "discussion_id": "227596555", + "commented_code": "@@ -0,0 +1,305 @@\n+var AWS = require('./core');\n+var util = require('./util');\n+var ENDPOINT_OPERATION_MAX_RETRIES = 60;\n+var endpointDiscoveryEnabledEnv = 'AWS_ENABLE_ENDPOINT_DISCOVERY';\n+\n+/**\n+ * Generate key to index the endpoints in the cache\n+ * @return [map] object with keys to index endpoints.\n+ * @api private\n+ */\n+function getCacheKey(request) {\n+ var service = request.service;\n+ var api = service.api || {};\n+ var operations = api.operations;\n+ var identifiers = {};\n+ if (operations[request.operation] && operations[request.operation].name) {\n+ identifiers.operation = operations[request.operation].name;\n+ }\n+ if (service.config.region) {\n+ identifiers.region = service.config.region;\n+ }\n+ if (api.serviceId) {\n+ identifiers.serviceId = api.serviceId\n+ }\n+ if (service.config.credentials.accessKeyId) {\n+ identifiers.accessKeyId = service.config.credentials.accessKeyId\n+ }\n+ return identifiers;\n+}\n+\n+/**\n+ * @api private\n+ */\n+function marshallCustomeIdentifiersHelper(result, params, shape) {\n+ if (!shape || params === undefined || params === null) return;\n+ if (shape.type === 'structure' && shape.required && shape.required.length > 0) {\n+ util.arrayEach(shape.required, function(name) {\n+ var memberShape = shape.members[name];\n+ if (memberShape.endpointDiscoveryId === true) {\n+ var locationName = memberShape.isLocationName ? memberShape.name : name;\n+ result[locationName] = String(params[name]);\n+ } else {\n+ marshallCustomeIdentifiersHelper(result, params[name], memberShape);\n+ }\n+ });\n+ }\n+}\n+\n+/**\n+ * Get customized cache key according to the 'endpointDiscoveryId' trait.\n+ * @param [object] request object\n+ * @param [object] input shape of the given operation's api\n+ */\n+function marshallCustomeIdentifiers(request, shape) {\n+ var identifiers = {};\n+ marshallCustomeIdentifiersHelper(identifiers, request.params, shape);\n+ return identifiers;\n+}\n+\n+/**\n+ * Call endpoint discovery operation when it's optional.\n+ * When endpoint is available in cache then use the cached endpoints. If endpoints\n+ * are unavailable then use regional endpoints and call endpoint discovery operation\n+ * asynchronously. This is turned off by default.\n+ * @param [object] request object\n+ * @api private\n+ */\n+function optionalDisverEndpoint(request) {\n+ var service = request.service;\n+ var api = service.api;\n+ var operationModel = api.operations ? api.operations[request.operation] : undefined;\n+ var inputShape = operationModel ? operationModel.input : undefined;\n+\n+ var identifiers = marshallCustomeIdentifiers(request, inputShape);\n+ var cacheKey = getCacheKey(request);\n+ cacheKey = util.update(cacheKey, identifiers);\n+ var endpoints = AWS.endpointCache.get(cacheKey);\n+ if (endpoints && endpoints.length > 0) {\n+ //found endpoint record from cache\n+ request.httpRequest.updateEndpoint(endpoints[0].Address);\n+ } else if (endpoints && endpoints.length === 0) {\n+ //endpoint operation is being made but response not yet received\n+ return;\n+ } else {\n+ //endpoint record not in cache. make discovery operation\n+ var endpointRequest = service.makeRequest(api.endpointOperation, {\n+ Operation: operationModel.name,\n+ Identifiers: identifiers,\n+ });\n+ addApiVersionHeader(endpointRequest);\n+ endpointRequest.removeListener('validate', AWS.EventListeners.Core.VALIDATE_PARAMETERS);\n+ endpointRequest.removeListener('retry', AWS.EventListeners.Core.RETRY_CHECK);\n+ var toHead = true;\n+ //if endpoint operation failed, keep retrying every minute.\n+ endpointRequest.on('retry', function(resp) {\n+ var err = resp.error;\n+ err.retryable = true;\n+ err.retryDelay = 60000;\n+ resp.maxRetries = ENDPOINT_OPERATION_MAX_RETRIES;\n+ }, toHead);\n+ //put in a placeholder for endpoints already requested, prevent\n+ //too much in-flight calls\n+ AWS.endpointCache.put(cacheKey, []);\n+ endpointRequest.send(function(err, data) {\n+ if (data && data.Endpoints) {\n+ AWS.endpointCache.put(cacheKey, data.Endpoints);\n+ } else if (err) {\n+ AWS.endpointCache.remove(cacheKey);\n+ }\n+ });\n+ }\n+}\n+\n+/**\n+ * Call endpoint discovery operation when it's required.\n+ * When endpoint is available in cache then use cached ones. If endpoins are\n+ * unavailable then SDK should call endpoint operation end use returned new\n+ * endpoint for the api call. SDK will automatically attempt to do endpoint\n+ * discovery regardless the configuration.\n+ * @param [object] request object\n+ * @api private\n+ */\n+function requiredDiscoverEndpoint(request, done) {\n+ var service = request.service;\n+ var api = service.api;\n+ var operationModel = api.operations ? api.operations[request.operation] : undefined;\n+ var inputShape = operationModel ? operationModel.input : undefined;\n+\n+ var identifiers = marshallCustomeIdentifiers(request, inputShape);\n+ var cacheKey = getCacheKey(request);\n+ cacheKey = util.update(cacheKey, identifiers);\n+ var endpoints = AWS.endpointCache.get(cacheKey);\n+ if (endpoints === undefined || endpoints.length === 0) {\n+ var endpointRequest = service.makeRequest(api.endpointOperation, {\n+ Operation: operationModel.name,\n+ Identifiers: identifiers,\n+ });\n+ endpointRequest.removeListener('validate', AWS.EventListeners.Core.VALIDATE_PARAMETERS);\n+ addApiVersionHeader(endpointRequest);\n+ endpointRequest.send(function(err, data) {\n+ if (err) {\n+ request.response.error = util.error(err, {\n+ code: 'EndpointDiscoveryException',\n+ message: 'Request cannot be fulfilled without specifying an endpoint',\n+ retryable: false\n+ });\n+ } else if (data) {\n+ AWS.endpointCache.put(cacheKey, data.Endpoints);\n+ request.httpRequest.updateEndpoint(data.Endpoints[0].Address);\n+ }\n+ done();\n+ });\n+ } else {\n+ request.httpRequest.updateEndpoint(endpoints[0].Address);\n+ done();\n+ }\n+}\n+\n+/**\n+ * add api version header to endpoint operation\n+ * @api private\n+ */\n+function addApiVersionHeader(endpointRequest) {\n+ var api = endpointRequest.service.api;\n+ var apiVersion = api.apiVersion;\n+ if (apiVersion && !endpointRequest.httpRequest.headers['x-amz-api-version']) {\n+ endpointRequest.httpRequest.headers['x-amz-api-version'] = apiVersion;\n+ }\n+}\n+\n+/**\n+ * If api call gets invalid endoint exception, SDK should attempt to remove the invalid\n+ * endpoint from cache.\n+ * @api private\n+ */\n+function invalidateCachedEndpoints(response) {\n+ var error = response.error;\n+ var httpResponse = response.httpResponse;\n+ if (error &&\n+ (error.code === 'InvalidEndpointException' || httpResponse.statusCode === 421)\n+ ) {\n+ var request = response.request;\n+ var operations = request.service.api.operations || {};\n+ var inputShape = operations[request.operation] ? operations[request.operation].input : undefined;\n+ var identifiers = marshallCustomeIdentifiers(request, inputShape);\n+ var cacheKey = getCacheKey(request);\n+ cacheKey = util.update(cacheKey, identifiers);\n+ AWS.endpointCache.remove(cacheKey);\n+ }\n+}\n+\n+/**\n+ * If endpoint is explicitly configured, SDK should not do endpoint discovery in anytime.\n+ * @param [object] clien Service client object.\n+ * @api private\n+ */\n+function hasCustomEndpoint(client) {\n+ //if set endpoint is set for specific client, enable endpoint discovery will raise an error.\n+ if (client._originalConfig && client._originalConfig.endpoint && client._originalConfig.endpointDiscoveryEnabled === true) {\n+ throw util.error(new Error(), {\n+ code: 'ConfigurationException',\n+ message: 'If custome endpoint is supplied, endpointDiscoverEnabled should be turned off.'\n+ });\n+ };\n+ var svcConfig = AWS.config[client.serviceIdentifier] || {};\n+ return AWS.config.endpoint || svcConfig.endpoint || (client._originalConfig && client._originalConfig.endpoint);\n+}\n+\n+/**\n+ * @api private\n+ */\n+function isFalsey(value) {\n+ return ['false', '0'].indexOf(value) >= 0;\n+}\n+\n+/**\n+ * If endpoint discovery should perform for this request when endpoint discovery is optional.\n+ * SDK performs config resolution in order like below:\n+ * 1. If turned on client configuration(default to off) then turn on endpoint discovery.\n+ * 2. If turned on in env AWS_ENABLE_ENDPOINT_DISCOVERY then turn on endpoint discovery.\n+ * 3. If turned on in shared ini config file with key 'endpoint_discovery_enabled', then\n+ * turn on endpoint discovery.\n+ * @param [object] request request object.\n+ * @api private\n+ */\n+function isEndpointDiscoveryApplicable(request) {\n+ var service = request.service || {};\n+ if (service.config.endpointDiscoveryEnabled === true) return true;\n+ if (Object.prototype.hasOwnProperty.call(process.env, endpointDiscoveryEnabledEnv)) {\n+ if (process.env[endpointDiscoveryEnabledEnv] === undefined) {\n+ throw util.error(new Error(), {\n+ code: 'ConfigurationException',\n+ message: 'environmental variable AWS_ENABLE_ENDPOINT_DISCOVERY cannot be set to nothing'\n+ });\n+ }\n+ if (!isFalsey(process.env[endpointDiscoveryEnabledEnv])) return true;\n+ }\n+ //shared ini file is only available in Node\n+ if (util.isBrowser()) return false;", + "comment_created_at": "2018-10-23T23:12:33+00:00", + "comment_author": "chrisradek", + "comment_body": "Sorry for the incoming tangent, but I have 2 thoughts here.\r\n1. I think we should update `isBrowser` to no longer depend on `process` existing:\r\nhttps://github.com/aws/aws-sdk-js/blob/29fc8d39b73a9c7c15b6bc8fb28cf5ef313691f4/lib/util.js#L39\r\nWe've heard reports of this breaking with Angular, and it doesn't really make sense that we're depending on a global that doesn't exist natively in browsers. Instead, I'd propose we update our (browser|react-native|node)_loader.js files to set the function to return true or false accordingly.\r\n\r\n2. This is less important if we do the item above, but I wonder if it would make sense to separate this function. You could have 3 new functions, each in their own file that perform one of these checks. \r\nThen you could have a separate browser/node file that exports the same function. This function would import whatever checks they need. This has the benefit of your browser build not having to import the code used to check the shared Ini file, and you could then allow the shared ini check to import the shared-ini loader directly. However, it also means more source files and might be more difficult to track.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "321824426", + "pr_number": 2838, + "pr_file": "lib/services/sts.js", + "created_at": "2019-09-06T16:57:08+00:00", + "commented_code": "assumeRoleWithSAML: function assumeRoleWithSAML(params, callback) {\n return this.makeUnauthenticatedRequest('assumeRoleWithSAML', params, callback);\n },\n\n /**\n * @api private\n */\n validateRegionalEndpointsFlag: function validateRegionalEndpointsFlag() {\n //validate config value\n var config = this.config;\n if (", + "repo_full_name": "aws/aws-sdk-js", + "discussion_comments": [ + { + "comment_id": "321824426", + "repo_full_name": "aws/aws-sdk-js", + "pr_number": 2838, + "pr_file": "lib/services/sts.js", + "discussion_id": "321824426", + "commented_code": "@@ -43,5 +46,87 @@ AWS.util.update(AWS.STS.prototype, {\n \n assumeRoleWithSAML: function assumeRoleWithSAML(params, callback) {\n return this.makeUnauthenticatedRequest('assumeRoleWithSAML', params, callback);\n+ },\n+\n+ /**\n+ * @api private\n+ */\n+ validateRegionalEndpointsFlag: function validateRegionalEndpointsFlag() {\n+ //validate config value\n+ var config = this.config;\n+ if (", + "comment_created_at": "2019-09-06T16:57:08+00:00", + "comment_author": "trivikr", + "comment_body": "This will be more readable if changed to:\r\n\r\n```javascript\r\nif (config.stsRegionalEndpoints) {\r\n if (typeof config.stsRegionalEndpoints === 'string' &&\r\n ['legacy', 'regional'].indexOf(config.stsRegionalEndpoints.toLowerCase()) >= 0) {\r\n // set config.stsRegionalEndpoints and return\r\n } else {\r\n // Throw error\r\n } \r\n}\r\n```", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/aws-sdk-js-validate-configurations-with-clarity.md b/_reviewers/aws-sdk-js-validate-configurations-with-clarity.md index ededc26..92b8fa2 100644 --- a/_reviewers/aws-sdk-js-validate-configurations-with-clarity.md +++ b/_reviewers/aws-sdk-js-validate-configurations-with-clarity.md @@ -46,73 +46,3 @@ function validateConfig(config) { // proceed with valid config } ``` - - -[ - { - "discussion_id": "227568468", - "pr_number": 2253, - "pr_file": "lib/discover_endpoint.js", - "created_at": "2018-10-23T21:18:49+00:00", - "commented_code": "var AWS = require('./core');\nvar util = require('./util');\nvar ENDPOINT_OPERATION_MAX_RETRIES = 60;\nvar endpointDiscoveryEnabledEnv = 'AWS_ENABLE_ENDPOINT_DISCOVERY';\n\n/**\n * Generate key to index the endpoints in the cache\n * @return [map] object with keys to index endpoints.\n * @api private\n */\nfunction getCacheKey(request) {\n var service = request.service;\n var api = service.api || {};\n var operations = api.operations;\n var identifiers = {};\n if (operations[request.operation] && operations[request.operation].name) {\n identifiers.operation = operations[request.operation].name;\n }\n if (service.config.region) {\n identifiers.region = service.config.region;\n }\n if (api.serviceId) {\n identifiers.serviceId = api.serviceId\n }\n if (service.config.credentials.accessKeyId) {\n identifiers.accessKeyId = service.config.credentials.accessKeyId\n }\n return identifiers;\n}\n\n/**\n * @api private\n */\nfunction marshallCustomeIdentifiersHelper(result, params, shape) {\n if (!shape || params === undefined || params === null) return;\n if (shape.type === 'structure' && shape.required && shape.required.length > 0) {\n util.arrayEach(shape.required, function(name) {\n var memberShape = shape.members[name];\n if (memberShape.endpointDiscoveryId === true) {\n var locationName = memberShape.isLocationName ? memberShape.name : name;\n result[locationName] = String(params[name]);\n } else {\n marshallCustomeIdentifiersHelper(result, params[name], memberShape);\n }\n });\n }\n}\n\n/**\n * Get customized cache key according to the 'endpointDiscoveryId' trait.\n * @param [object] request object\n * @param [object] input shape of the given operation's api\n */\nfunction marshallCustomeIdentifiers(request, shape) {\n var identifiers = {};\n marshallCustomeIdentifiersHelper(identifiers, request.params, shape);\n return identifiers;\n}\n\n/**\n * Call endpoint discovery operation when it's optional.\n * When endpoint is available in cache then use the cached endpoints. If endpoints\n * are unavailable then use regional endpoints and call endpoint discovery operation\n * asynchronously. This is turned off by default.\n * @param [object] request object\n * @api private\n */\nfunction optionalDisverEndpoint(request) {\n var service = request.service;\n var api = service.api;\n var operationModel = api.operations ? api.operations[request.operation] : undefined;\n var inputShape = operationModel ? operationModel.input : undefined;\n\n var identifiers = marshallCustomeIdentifiers(request, inputShape);\n var cacheKey = getCacheKey(request);\n cacheKey = util.update(cacheKey, identifiers);\n var endpoints = AWS.endpointCache.get(cacheKey);\n if (endpoints && endpoints.length > 0) {\n //found endpoint record from cache\n request.httpRequest.updateEndpoint(endpoints[0].Address);\n } else if (endpoints && endpoints.length === 0) {\n //endpoint operation is being made but response not yet received\n return;\n } else {\n //endpoint record not in cache. make discovery operation\n var endpointRequest = service.makeRequest(api.endpointOperation, {\n Operation: operationModel.name,\n Identifiers: identifiers,\n });\n addApiVersionHeader(endpointRequest);\n endpointRequest.removeListener('validate', AWS.EventListeners.Core.VALIDATE_PARAMETERS);\n endpointRequest.removeListener('retry', AWS.EventListeners.Core.RETRY_CHECK);\n var toHead = true;\n //if endpoint operation failed, keep retrying every minute.\n endpointRequest.on('retry', function(resp) {\n var err = resp.error;\n err.retryable = true;\n err.retryDelay = 60000;\n resp.maxRetries = ENDPOINT_OPERATION_MAX_RETRIES;\n }, toHead);\n //put in a placeholder for endpoints already requested, prevent\n //too much in-flight calls\n AWS.endpointCache.put(cacheKey, []);\n endpointRequest.send(function(err, data) {\n if (data && data.Endpoints) {\n AWS.endpointCache.put(cacheKey, data.Endpoints);\n } else if (err) {\n AWS.endpointCache.remove(cacheKey);\n }\n });\n }\n}\n\n/**\n * Call endpoint discovery operation when it's required.\n * When endpoint is available in cache then use cached ones. If endpoins are\n * unavailable then SDK should call endpoint operation end use returned new\n * endpoint for the api call. SDK will automatically attempt to do endpoint\n * discovery regardless the configuration.\n * @param [object] request object\n * @api private\n */\nfunction requiredDiscoverEndpoint(request, done) {\n var service = request.service;\n var api = service.api;\n var operationModel = api.operations ? api.operations[request.operation] : undefined;\n var inputShape = operationModel ? operationModel.input : undefined;\n\n var identifiers = marshallCustomeIdentifiers(request, inputShape);\n var cacheKey = getCacheKey(request);\n cacheKey = util.update(cacheKey, identifiers);\n var endpoints = AWS.endpointCache.get(cacheKey);\n if (endpoints === undefined || endpoints.length === 0) {\n var endpointRequest = service.makeRequest(api.endpointOperation, {\n Operation: operationModel.name,\n Identifiers: identifiers,\n });\n endpointRequest.removeListener('validate', AWS.EventListeners.Core.VALIDATE_PARAMETERS);\n addApiVersionHeader(endpointRequest);\n endpointRequest.send(function(err, data) {\n if (err) {\n request.response.error = util.error(err, {\n code: 'EndpointDiscoveryException',\n message: 'Request cannot be fulfilled without specifying an endpoint',\n retryable: false\n });\n } else if (data) {\n AWS.endpointCache.put(cacheKey, data.Endpoints);\n request.httpRequest.updateEndpoint(data.Endpoints[0].Address);\n }\n done();\n });\n } else {\n request.httpRequest.updateEndpoint(endpoints[0].Address);\n done();\n }\n}\n\n/**\n * add api version header to endpoint operation\n * @api private\n */\nfunction addApiVersionHeader(endpointRequest) {\n var api = endpointRequest.service.api;\n var apiVersion = api.apiVersion;\n if (apiVersion && !endpointRequest.httpRequest.headers['x-amz-api-version']) {\n endpointRequest.httpRequest.headers['x-amz-api-version'] = apiVersion;\n }\n}\n\n/**\n * If api call gets invalid endoint exception, SDK should attempt to remove the invalid\n * endpoint from cache.\n * @api private\n */\nfunction invalidateCachedEndpoints(response) {\n var error = response.error;\n var httpResponse = response.httpResponse;\n if (error &&\n (error.code === 'InvalidEndpointException' || httpResponse.statusCode === 421)\n ) {\n var request = response.request;\n var operations = request.service.api.operations || {};\n var inputShape = operations[request.operation] ? operations[request.operation].input : undefined;\n var identifiers = marshallCustomeIdentifiers(request, inputShape);\n var cacheKey = getCacheKey(request);\n cacheKey = util.update(cacheKey, identifiers);\n AWS.endpointCache.remove(cacheKey);\n }\n}\n\n/**\n * If endpoint is explicitly configured, SDK should not do endpoint discovery in anytime.\n * @param [object] clien Service client object.\n * @api private\n */\nfunction hasCustomEndpoint(client) {\n //if set endpoint is set for specific client, enable endpoint discovery will raise an error.\n if (client._originalConfig && client._originalConfig.endpoint && client._originalConfig.endpointDiscoveryEnabled === true) {\n throw util.error(new Error(), {\n code: 'ConfigurationException',\n message: 'If custome endpoint is supplied, endpointDiscoverEnabled should be turned off.'\n });\n };\n var svcConfig = AWS.config[client.serviceIdentifier] || {};\n return AWS.config.endpoint || svcConfig.endpoint || (client._originalConfig && client._originalConfig.endpoint);\n}\n\n/**\n * @api private\n */\nfunction isFalsey(value) {\n return ['false', '0'].indexOf(value) >= 0;\n}\n\n/**\n * If endpoint discovery should perform for this request when endpoint discovery is optional.\n * SDK performs config resolution in order like below:\n * 1. If turned on client configuration(default to off) then turn on endpoint discovery.\n * 2. If turned on in env AWS_ENABLE_ENDPOINT_DISCOVERY then turn on endpoint discovery.\n * 3. If turned on in shared ini config file with key 'endpoint_discovery_enabled', then\n * turn on endpoint discovery.\n * @param [object] request request object.\n * @api private\n */\nfunction isEndpointDiscoveryApplicable(request) {\n var service = request.service || {};\n if (service.config.endpointDiscoveryEnabled === true) return true;\n if (Object.prototype.hasOwnProperty.call(process.env, endpointDiscoveryEnabledEnv)) {\n if (process.env[endpointDiscoveryEnabledEnv] === undefined) {", - "repo_full_name": "aws/aws-sdk-js", - "discussion_comments": [ - { - "comment_id": "227568468", - "repo_full_name": "aws/aws-sdk-js", - "pr_number": 2253, - "pr_file": "lib/discover_endpoint.js", - "discussion_id": "227568468", - "commented_code": "@@ -0,0 +1,305 @@\n+var AWS = require('./core');\n+var util = require('./util');\n+var ENDPOINT_OPERATION_MAX_RETRIES = 60;\n+var endpointDiscoveryEnabledEnv = 'AWS_ENABLE_ENDPOINT_DISCOVERY';\n+\n+/**\n+ * Generate key to index the endpoints in the cache\n+ * @return [map] object with keys to index endpoints.\n+ * @api private\n+ */\n+function getCacheKey(request) {\n+ var service = request.service;\n+ var api = service.api || {};\n+ var operations = api.operations;\n+ var identifiers = {};\n+ if (operations[request.operation] && operations[request.operation].name) {\n+ identifiers.operation = operations[request.operation].name;\n+ }\n+ if (service.config.region) {\n+ identifiers.region = service.config.region;\n+ }\n+ if (api.serviceId) {\n+ identifiers.serviceId = api.serviceId\n+ }\n+ if (service.config.credentials.accessKeyId) {\n+ identifiers.accessKeyId = service.config.credentials.accessKeyId\n+ }\n+ return identifiers;\n+}\n+\n+/**\n+ * @api private\n+ */\n+function marshallCustomeIdentifiersHelper(result, params, shape) {\n+ if (!shape || params === undefined || params === null) return;\n+ if (shape.type === 'structure' && shape.required && shape.required.length > 0) {\n+ util.arrayEach(shape.required, function(name) {\n+ var memberShape = shape.members[name];\n+ if (memberShape.endpointDiscoveryId === true) {\n+ var locationName = memberShape.isLocationName ? memberShape.name : name;\n+ result[locationName] = String(params[name]);\n+ } else {\n+ marshallCustomeIdentifiersHelper(result, params[name], memberShape);\n+ }\n+ });\n+ }\n+}\n+\n+/**\n+ * Get customized cache key according to the 'endpointDiscoveryId' trait.\n+ * @param [object] request object\n+ * @param [object] input shape of the given operation's api\n+ */\n+function marshallCustomeIdentifiers(request, shape) {\n+ var identifiers = {};\n+ marshallCustomeIdentifiersHelper(identifiers, request.params, shape);\n+ return identifiers;\n+}\n+\n+/**\n+ * Call endpoint discovery operation when it's optional.\n+ * When endpoint is available in cache then use the cached endpoints. If endpoints\n+ * are unavailable then use regional endpoints and call endpoint discovery operation\n+ * asynchronously. This is turned off by default.\n+ * @param [object] request object\n+ * @api private\n+ */\n+function optionalDisverEndpoint(request) {\n+ var service = request.service;\n+ var api = service.api;\n+ var operationModel = api.operations ? api.operations[request.operation] : undefined;\n+ var inputShape = operationModel ? operationModel.input : undefined;\n+\n+ var identifiers = marshallCustomeIdentifiers(request, inputShape);\n+ var cacheKey = getCacheKey(request);\n+ cacheKey = util.update(cacheKey, identifiers);\n+ var endpoints = AWS.endpointCache.get(cacheKey);\n+ if (endpoints && endpoints.length > 0) {\n+ //found endpoint record from cache\n+ request.httpRequest.updateEndpoint(endpoints[0].Address);\n+ } else if (endpoints && endpoints.length === 0) {\n+ //endpoint operation is being made but response not yet received\n+ return;\n+ } else {\n+ //endpoint record not in cache. make discovery operation\n+ var endpointRequest = service.makeRequest(api.endpointOperation, {\n+ Operation: operationModel.name,\n+ Identifiers: identifiers,\n+ });\n+ addApiVersionHeader(endpointRequest);\n+ endpointRequest.removeListener('validate', AWS.EventListeners.Core.VALIDATE_PARAMETERS);\n+ endpointRequest.removeListener('retry', AWS.EventListeners.Core.RETRY_CHECK);\n+ var toHead = true;\n+ //if endpoint operation failed, keep retrying every minute.\n+ endpointRequest.on('retry', function(resp) {\n+ var err = resp.error;\n+ err.retryable = true;\n+ err.retryDelay = 60000;\n+ resp.maxRetries = ENDPOINT_OPERATION_MAX_RETRIES;\n+ }, toHead);\n+ //put in a placeholder for endpoints already requested, prevent\n+ //too much in-flight calls\n+ AWS.endpointCache.put(cacheKey, []);\n+ endpointRequest.send(function(err, data) {\n+ if (data && data.Endpoints) {\n+ AWS.endpointCache.put(cacheKey, data.Endpoints);\n+ } else if (err) {\n+ AWS.endpointCache.remove(cacheKey);\n+ }\n+ });\n+ }\n+}\n+\n+/**\n+ * Call endpoint discovery operation when it's required.\n+ * When endpoint is available in cache then use cached ones. If endpoins are\n+ * unavailable then SDK should call endpoint operation end use returned new\n+ * endpoint for the api call. SDK will automatically attempt to do endpoint\n+ * discovery regardless the configuration.\n+ * @param [object] request object\n+ * @api private\n+ */\n+function requiredDiscoverEndpoint(request, done) {\n+ var service = request.service;\n+ var api = service.api;\n+ var operationModel = api.operations ? api.operations[request.operation] : undefined;\n+ var inputShape = operationModel ? operationModel.input : undefined;\n+\n+ var identifiers = marshallCustomeIdentifiers(request, inputShape);\n+ var cacheKey = getCacheKey(request);\n+ cacheKey = util.update(cacheKey, identifiers);\n+ var endpoints = AWS.endpointCache.get(cacheKey);\n+ if (endpoints === undefined || endpoints.length === 0) {\n+ var endpointRequest = service.makeRequest(api.endpointOperation, {\n+ Operation: operationModel.name,\n+ Identifiers: identifiers,\n+ });\n+ endpointRequest.removeListener('validate', AWS.EventListeners.Core.VALIDATE_PARAMETERS);\n+ addApiVersionHeader(endpointRequest);\n+ endpointRequest.send(function(err, data) {\n+ if (err) {\n+ request.response.error = util.error(err, {\n+ code: 'EndpointDiscoveryException',\n+ message: 'Request cannot be fulfilled without specifying an endpoint',\n+ retryable: false\n+ });\n+ } else if (data) {\n+ AWS.endpointCache.put(cacheKey, data.Endpoints);\n+ request.httpRequest.updateEndpoint(data.Endpoints[0].Address);\n+ }\n+ done();\n+ });\n+ } else {\n+ request.httpRequest.updateEndpoint(endpoints[0].Address);\n+ done();\n+ }\n+}\n+\n+/**\n+ * add api version header to endpoint operation\n+ * @api private\n+ */\n+function addApiVersionHeader(endpointRequest) {\n+ var api = endpointRequest.service.api;\n+ var apiVersion = api.apiVersion;\n+ if (apiVersion && !endpointRequest.httpRequest.headers['x-amz-api-version']) {\n+ endpointRequest.httpRequest.headers['x-amz-api-version'] = apiVersion;\n+ }\n+}\n+\n+/**\n+ * If api call gets invalid endoint exception, SDK should attempt to remove the invalid\n+ * endpoint from cache.\n+ * @api private\n+ */\n+function invalidateCachedEndpoints(response) {\n+ var error = response.error;\n+ var httpResponse = response.httpResponse;\n+ if (error &&\n+ (error.code === 'InvalidEndpointException' || httpResponse.statusCode === 421)\n+ ) {\n+ var request = response.request;\n+ var operations = request.service.api.operations || {};\n+ var inputShape = operations[request.operation] ? operations[request.operation].input : undefined;\n+ var identifiers = marshallCustomeIdentifiers(request, inputShape);\n+ var cacheKey = getCacheKey(request);\n+ cacheKey = util.update(cacheKey, identifiers);\n+ AWS.endpointCache.remove(cacheKey);\n+ }\n+}\n+\n+/**\n+ * If endpoint is explicitly configured, SDK should not do endpoint discovery in anytime.\n+ * @param [object] clien Service client object.\n+ * @api private\n+ */\n+function hasCustomEndpoint(client) {\n+ //if set endpoint is set for specific client, enable endpoint discovery will raise an error.\n+ if (client._originalConfig && client._originalConfig.endpoint && client._originalConfig.endpointDiscoveryEnabled === true) {\n+ throw util.error(new Error(), {\n+ code: 'ConfigurationException',\n+ message: 'If custome endpoint is supplied, endpointDiscoverEnabled should be turned off.'\n+ });\n+ };\n+ var svcConfig = AWS.config[client.serviceIdentifier] || {};\n+ return AWS.config.endpoint || svcConfig.endpoint || (client._originalConfig && client._originalConfig.endpoint);\n+}\n+\n+/**\n+ * @api private\n+ */\n+function isFalsey(value) {\n+ return ['false', '0'].indexOf(value) >= 0;\n+}\n+\n+/**\n+ * If endpoint discovery should perform for this request when endpoint discovery is optional.\n+ * SDK performs config resolution in order like below:\n+ * 1. If turned on client configuration(default to off) then turn on endpoint discovery.\n+ * 2. If turned on in env AWS_ENABLE_ENDPOINT_DISCOVERY then turn on endpoint discovery.\n+ * 3. If turned on in shared ini config file with key 'endpoint_discovery_enabled', then\n+ * turn on endpoint discovery.\n+ * @param [object] request request object.\n+ * @api private\n+ */\n+function isEndpointDiscoveryApplicable(request) {\n+ var service = request.service || {};\n+ if (service.config.endpointDiscoveryEnabled === true) return true;\n+ if (Object.prototype.hasOwnProperty.call(process.env, endpointDiscoveryEnabledEnv)) {\n+ if (process.env[endpointDiscoveryEnabledEnv] === undefined) {", - "comment_created_at": "2018-10-23T21:18:49+00:00", - "comment_author": "chrisradek", - "comment_body": "Is it possible to have an environment variable that is not a string in node.js?\r\nI haven't tried in Windows, but on MacOS, that doesn't seem possible.\r\n```bash\r\nFOO=bar node -e \"console.log(process.env)\"\r\n# FOO: 'bar'\r\n```\r\n```bash\r\nFOO= node -e \"console.log(process.env)\"\r\n# FOO: ''\r\n```\r\n\r\n```bash\r\nnode -e \"process.env['FOO'] = void 0; console.log(process.env)\"\r\n# FOO: 'undefined'\r\n```\r\n\r\nEven trying to set it to undefined within the process still results in the variable being a string. If you want to throw an error when this variable is set to nothing, you might want to instead check if it is an empty string.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "227596555", - "pr_number": 2253, - "pr_file": "lib/discover_endpoint.js", - "created_at": "2018-10-23T23:12:33+00:00", - "commented_code": "var AWS = require('./core');\nvar util = require('./util');\nvar ENDPOINT_OPERATION_MAX_RETRIES = 60;\nvar endpointDiscoveryEnabledEnv = 'AWS_ENABLE_ENDPOINT_DISCOVERY';\n\n/**\n * Generate key to index the endpoints in the cache\n * @return [map] object with keys to index endpoints.\n * @api private\n */\nfunction getCacheKey(request) {\n var service = request.service;\n var api = service.api || {};\n var operations = api.operations;\n var identifiers = {};\n if (operations[request.operation] && operations[request.operation].name) {\n identifiers.operation = operations[request.operation].name;\n }\n if (service.config.region) {\n identifiers.region = service.config.region;\n }\n if (api.serviceId) {\n identifiers.serviceId = api.serviceId\n }\n if (service.config.credentials.accessKeyId) {\n identifiers.accessKeyId = service.config.credentials.accessKeyId\n }\n return identifiers;\n}\n\n/**\n * @api private\n */\nfunction marshallCustomeIdentifiersHelper(result, params, shape) {\n if (!shape || params === undefined || params === null) return;\n if (shape.type === 'structure' && shape.required && shape.required.length > 0) {\n util.arrayEach(shape.required, function(name) {\n var memberShape = shape.members[name];\n if (memberShape.endpointDiscoveryId === true) {\n var locationName = memberShape.isLocationName ? memberShape.name : name;\n result[locationName] = String(params[name]);\n } else {\n marshallCustomeIdentifiersHelper(result, params[name], memberShape);\n }\n });\n }\n}\n\n/**\n * Get customized cache key according to the 'endpointDiscoveryId' trait.\n * @param [object] request object\n * @param [object] input shape of the given operation's api\n */\nfunction marshallCustomeIdentifiers(request, shape) {\n var identifiers = {};\n marshallCustomeIdentifiersHelper(identifiers, request.params, shape);\n return identifiers;\n}\n\n/**\n * Call endpoint discovery operation when it's optional.\n * When endpoint is available in cache then use the cached endpoints. If endpoints\n * are unavailable then use regional endpoints and call endpoint discovery operation\n * asynchronously. This is turned off by default.\n * @param [object] request object\n * @api private\n */\nfunction optionalDisverEndpoint(request) {\n var service = request.service;\n var api = service.api;\n var operationModel = api.operations ? api.operations[request.operation] : undefined;\n var inputShape = operationModel ? operationModel.input : undefined;\n\n var identifiers = marshallCustomeIdentifiers(request, inputShape);\n var cacheKey = getCacheKey(request);\n cacheKey = util.update(cacheKey, identifiers);\n var endpoints = AWS.endpointCache.get(cacheKey);\n if (endpoints && endpoints.length > 0) {\n //found endpoint record from cache\n request.httpRequest.updateEndpoint(endpoints[0].Address);\n } else if (endpoints && endpoints.length === 0) {\n //endpoint operation is being made but response not yet received\n return;\n } else {\n //endpoint record not in cache. make discovery operation\n var endpointRequest = service.makeRequest(api.endpointOperation, {\n Operation: operationModel.name,\n Identifiers: identifiers,\n });\n addApiVersionHeader(endpointRequest);\n endpointRequest.removeListener('validate', AWS.EventListeners.Core.VALIDATE_PARAMETERS);\n endpointRequest.removeListener('retry', AWS.EventListeners.Core.RETRY_CHECK);\n var toHead = true;\n //if endpoint operation failed, keep retrying every minute.\n endpointRequest.on('retry', function(resp) {\n var err = resp.error;\n err.retryable = true;\n err.retryDelay = 60000;\n resp.maxRetries = ENDPOINT_OPERATION_MAX_RETRIES;\n }, toHead);\n //put in a placeholder for endpoints already requested, prevent\n //too much in-flight calls\n AWS.endpointCache.put(cacheKey, []);\n endpointRequest.send(function(err, data) {\n if (data && data.Endpoints) {\n AWS.endpointCache.put(cacheKey, data.Endpoints);\n } else if (err) {\n AWS.endpointCache.remove(cacheKey);\n }\n });\n }\n}\n\n/**\n * Call endpoint discovery operation when it's required.\n * When endpoint is available in cache then use cached ones. If endpoins are\n * unavailable then SDK should call endpoint operation end use returned new\n * endpoint for the api call. SDK will automatically attempt to do endpoint\n * discovery regardless the configuration.\n * @param [object] request object\n * @api private\n */\nfunction requiredDiscoverEndpoint(request, done) {\n var service = request.service;\n var api = service.api;\n var operationModel = api.operations ? api.operations[request.operation] : undefined;\n var inputShape = operationModel ? operationModel.input : undefined;\n\n var identifiers = marshallCustomeIdentifiers(request, inputShape);\n var cacheKey = getCacheKey(request);\n cacheKey = util.update(cacheKey, identifiers);\n var endpoints = AWS.endpointCache.get(cacheKey);\n if (endpoints === undefined || endpoints.length === 0) {\n var endpointRequest = service.makeRequest(api.endpointOperation, {\n Operation: operationModel.name,\n Identifiers: identifiers,\n });\n endpointRequest.removeListener('validate', AWS.EventListeners.Core.VALIDATE_PARAMETERS);\n addApiVersionHeader(endpointRequest);\n endpointRequest.send(function(err, data) {\n if (err) {\n request.response.error = util.error(err, {\n code: 'EndpointDiscoveryException',\n message: 'Request cannot be fulfilled without specifying an endpoint',\n retryable: false\n });\n } else if (data) {\n AWS.endpointCache.put(cacheKey, data.Endpoints);\n request.httpRequest.updateEndpoint(data.Endpoints[0].Address);\n }\n done();\n });\n } else {\n request.httpRequest.updateEndpoint(endpoints[0].Address);\n done();\n }\n}\n\n/**\n * add api version header to endpoint operation\n * @api private\n */\nfunction addApiVersionHeader(endpointRequest) {\n var api = endpointRequest.service.api;\n var apiVersion = api.apiVersion;\n if (apiVersion && !endpointRequest.httpRequest.headers['x-amz-api-version']) {\n endpointRequest.httpRequest.headers['x-amz-api-version'] = apiVersion;\n }\n}\n\n/**\n * If api call gets invalid endoint exception, SDK should attempt to remove the invalid\n * endpoint from cache.\n * @api private\n */\nfunction invalidateCachedEndpoints(response) {\n var error = response.error;\n var httpResponse = response.httpResponse;\n if (error &&\n (error.code === 'InvalidEndpointException' || httpResponse.statusCode === 421)\n ) {\n var request = response.request;\n var operations = request.service.api.operations || {};\n var inputShape = operations[request.operation] ? operations[request.operation].input : undefined;\n var identifiers = marshallCustomeIdentifiers(request, inputShape);\n var cacheKey = getCacheKey(request);\n cacheKey = util.update(cacheKey, identifiers);\n AWS.endpointCache.remove(cacheKey);\n }\n}\n\n/**\n * If endpoint is explicitly configured, SDK should not do endpoint discovery in anytime.\n * @param [object] clien Service client object.\n * @api private\n */\nfunction hasCustomEndpoint(client) {\n //if set endpoint is set for specific client, enable endpoint discovery will raise an error.\n if (client._originalConfig && client._originalConfig.endpoint && client._originalConfig.endpointDiscoveryEnabled === true) {\n throw util.error(new Error(), {\n code: 'ConfigurationException',\n message: 'If custome endpoint is supplied, endpointDiscoverEnabled should be turned off.'\n });\n };\n var svcConfig = AWS.config[client.serviceIdentifier] || {};\n return AWS.config.endpoint || svcConfig.endpoint || (client._originalConfig && client._originalConfig.endpoint);\n}\n\n/**\n * @api private\n */\nfunction isFalsey(value) {\n return ['false', '0'].indexOf(value) >= 0;\n}\n\n/**\n * If endpoint discovery should perform for this request when endpoint discovery is optional.\n * SDK performs config resolution in order like below:\n * 1. If turned on client configuration(default to off) then turn on endpoint discovery.\n * 2. If turned on in env AWS_ENABLE_ENDPOINT_DISCOVERY then turn on endpoint discovery.\n * 3. If turned on in shared ini config file with key 'endpoint_discovery_enabled', then\n * turn on endpoint discovery.\n * @param [object] request request object.\n * @api private\n */\nfunction isEndpointDiscoveryApplicable(request) {\n var service = request.service || {};\n if (service.config.endpointDiscoveryEnabled === true) return true;\n if (Object.prototype.hasOwnProperty.call(process.env, endpointDiscoveryEnabledEnv)) {\n if (process.env[endpointDiscoveryEnabledEnv] === undefined) {\n throw util.error(new Error(), {\n code: 'ConfigurationException',\n message: 'environmental variable AWS_ENABLE_ENDPOINT_DISCOVERY cannot be set to nothing'\n });\n }\n if (!isFalsey(process.env[endpointDiscoveryEnabledEnv])) return true;\n }\n //shared ini file is only available in Node\n if (util.isBrowser()) return false;", - "repo_full_name": "aws/aws-sdk-js", - "discussion_comments": [ - { - "comment_id": "227596555", - "repo_full_name": "aws/aws-sdk-js", - "pr_number": 2253, - "pr_file": "lib/discover_endpoint.js", - "discussion_id": "227596555", - "commented_code": "@@ -0,0 +1,305 @@\n+var AWS = require('./core');\n+var util = require('./util');\n+var ENDPOINT_OPERATION_MAX_RETRIES = 60;\n+var endpointDiscoveryEnabledEnv = 'AWS_ENABLE_ENDPOINT_DISCOVERY';\n+\n+/**\n+ * Generate key to index the endpoints in the cache\n+ * @return [map] object with keys to index endpoints.\n+ * @api private\n+ */\n+function getCacheKey(request) {\n+ var service = request.service;\n+ var api = service.api || {};\n+ var operations = api.operations;\n+ var identifiers = {};\n+ if (operations[request.operation] && operations[request.operation].name) {\n+ identifiers.operation = operations[request.operation].name;\n+ }\n+ if (service.config.region) {\n+ identifiers.region = service.config.region;\n+ }\n+ if (api.serviceId) {\n+ identifiers.serviceId = api.serviceId\n+ }\n+ if (service.config.credentials.accessKeyId) {\n+ identifiers.accessKeyId = service.config.credentials.accessKeyId\n+ }\n+ return identifiers;\n+}\n+\n+/**\n+ * @api private\n+ */\n+function marshallCustomeIdentifiersHelper(result, params, shape) {\n+ if (!shape || params === undefined || params === null) return;\n+ if (shape.type === 'structure' && shape.required && shape.required.length > 0) {\n+ util.arrayEach(shape.required, function(name) {\n+ var memberShape = shape.members[name];\n+ if (memberShape.endpointDiscoveryId === true) {\n+ var locationName = memberShape.isLocationName ? memberShape.name : name;\n+ result[locationName] = String(params[name]);\n+ } else {\n+ marshallCustomeIdentifiersHelper(result, params[name], memberShape);\n+ }\n+ });\n+ }\n+}\n+\n+/**\n+ * Get customized cache key according to the 'endpointDiscoveryId' trait.\n+ * @param [object] request object\n+ * @param [object] input shape of the given operation's api\n+ */\n+function marshallCustomeIdentifiers(request, shape) {\n+ var identifiers = {};\n+ marshallCustomeIdentifiersHelper(identifiers, request.params, shape);\n+ return identifiers;\n+}\n+\n+/**\n+ * Call endpoint discovery operation when it's optional.\n+ * When endpoint is available in cache then use the cached endpoints. If endpoints\n+ * are unavailable then use regional endpoints and call endpoint discovery operation\n+ * asynchronously. This is turned off by default.\n+ * @param [object] request object\n+ * @api private\n+ */\n+function optionalDisverEndpoint(request) {\n+ var service = request.service;\n+ var api = service.api;\n+ var operationModel = api.operations ? api.operations[request.operation] : undefined;\n+ var inputShape = operationModel ? operationModel.input : undefined;\n+\n+ var identifiers = marshallCustomeIdentifiers(request, inputShape);\n+ var cacheKey = getCacheKey(request);\n+ cacheKey = util.update(cacheKey, identifiers);\n+ var endpoints = AWS.endpointCache.get(cacheKey);\n+ if (endpoints && endpoints.length > 0) {\n+ //found endpoint record from cache\n+ request.httpRequest.updateEndpoint(endpoints[0].Address);\n+ } else if (endpoints && endpoints.length === 0) {\n+ //endpoint operation is being made but response not yet received\n+ return;\n+ } else {\n+ //endpoint record not in cache. make discovery operation\n+ var endpointRequest = service.makeRequest(api.endpointOperation, {\n+ Operation: operationModel.name,\n+ Identifiers: identifiers,\n+ });\n+ addApiVersionHeader(endpointRequest);\n+ endpointRequest.removeListener('validate', AWS.EventListeners.Core.VALIDATE_PARAMETERS);\n+ endpointRequest.removeListener('retry', AWS.EventListeners.Core.RETRY_CHECK);\n+ var toHead = true;\n+ //if endpoint operation failed, keep retrying every minute.\n+ endpointRequest.on('retry', function(resp) {\n+ var err = resp.error;\n+ err.retryable = true;\n+ err.retryDelay = 60000;\n+ resp.maxRetries = ENDPOINT_OPERATION_MAX_RETRIES;\n+ }, toHead);\n+ //put in a placeholder for endpoints already requested, prevent\n+ //too much in-flight calls\n+ AWS.endpointCache.put(cacheKey, []);\n+ endpointRequest.send(function(err, data) {\n+ if (data && data.Endpoints) {\n+ AWS.endpointCache.put(cacheKey, data.Endpoints);\n+ } else if (err) {\n+ AWS.endpointCache.remove(cacheKey);\n+ }\n+ });\n+ }\n+}\n+\n+/**\n+ * Call endpoint discovery operation when it's required.\n+ * When endpoint is available in cache then use cached ones. If endpoins are\n+ * unavailable then SDK should call endpoint operation end use returned new\n+ * endpoint for the api call. SDK will automatically attempt to do endpoint\n+ * discovery regardless the configuration.\n+ * @param [object] request object\n+ * @api private\n+ */\n+function requiredDiscoverEndpoint(request, done) {\n+ var service = request.service;\n+ var api = service.api;\n+ var operationModel = api.operations ? api.operations[request.operation] : undefined;\n+ var inputShape = operationModel ? operationModel.input : undefined;\n+\n+ var identifiers = marshallCustomeIdentifiers(request, inputShape);\n+ var cacheKey = getCacheKey(request);\n+ cacheKey = util.update(cacheKey, identifiers);\n+ var endpoints = AWS.endpointCache.get(cacheKey);\n+ if (endpoints === undefined || endpoints.length === 0) {\n+ var endpointRequest = service.makeRequest(api.endpointOperation, {\n+ Operation: operationModel.name,\n+ Identifiers: identifiers,\n+ });\n+ endpointRequest.removeListener('validate', AWS.EventListeners.Core.VALIDATE_PARAMETERS);\n+ addApiVersionHeader(endpointRequest);\n+ endpointRequest.send(function(err, data) {\n+ if (err) {\n+ request.response.error = util.error(err, {\n+ code: 'EndpointDiscoveryException',\n+ message: 'Request cannot be fulfilled without specifying an endpoint',\n+ retryable: false\n+ });\n+ } else if (data) {\n+ AWS.endpointCache.put(cacheKey, data.Endpoints);\n+ request.httpRequest.updateEndpoint(data.Endpoints[0].Address);\n+ }\n+ done();\n+ });\n+ } else {\n+ request.httpRequest.updateEndpoint(endpoints[0].Address);\n+ done();\n+ }\n+}\n+\n+/**\n+ * add api version header to endpoint operation\n+ * @api private\n+ */\n+function addApiVersionHeader(endpointRequest) {\n+ var api = endpointRequest.service.api;\n+ var apiVersion = api.apiVersion;\n+ if (apiVersion && !endpointRequest.httpRequest.headers['x-amz-api-version']) {\n+ endpointRequest.httpRequest.headers['x-amz-api-version'] = apiVersion;\n+ }\n+}\n+\n+/**\n+ * If api call gets invalid endoint exception, SDK should attempt to remove the invalid\n+ * endpoint from cache.\n+ * @api private\n+ */\n+function invalidateCachedEndpoints(response) {\n+ var error = response.error;\n+ var httpResponse = response.httpResponse;\n+ if (error &&\n+ (error.code === 'InvalidEndpointException' || httpResponse.statusCode === 421)\n+ ) {\n+ var request = response.request;\n+ var operations = request.service.api.operations || {};\n+ var inputShape = operations[request.operation] ? operations[request.operation].input : undefined;\n+ var identifiers = marshallCustomeIdentifiers(request, inputShape);\n+ var cacheKey = getCacheKey(request);\n+ cacheKey = util.update(cacheKey, identifiers);\n+ AWS.endpointCache.remove(cacheKey);\n+ }\n+}\n+\n+/**\n+ * If endpoint is explicitly configured, SDK should not do endpoint discovery in anytime.\n+ * @param [object] clien Service client object.\n+ * @api private\n+ */\n+function hasCustomEndpoint(client) {\n+ //if set endpoint is set for specific client, enable endpoint discovery will raise an error.\n+ if (client._originalConfig && client._originalConfig.endpoint && client._originalConfig.endpointDiscoveryEnabled === true) {\n+ throw util.error(new Error(), {\n+ code: 'ConfigurationException',\n+ message: 'If custome endpoint is supplied, endpointDiscoverEnabled should be turned off.'\n+ });\n+ };\n+ var svcConfig = AWS.config[client.serviceIdentifier] || {};\n+ return AWS.config.endpoint || svcConfig.endpoint || (client._originalConfig && client._originalConfig.endpoint);\n+}\n+\n+/**\n+ * @api private\n+ */\n+function isFalsey(value) {\n+ return ['false', '0'].indexOf(value) >= 0;\n+}\n+\n+/**\n+ * If endpoint discovery should perform for this request when endpoint discovery is optional.\n+ * SDK performs config resolution in order like below:\n+ * 1. If turned on client configuration(default to off) then turn on endpoint discovery.\n+ * 2. If turned on in env AWS_ENABLE_ENDPOINT_DISCOVERY then turn on endpoint discovery.\n+ * 3. If turned on in shared ini config file with key 'endpoint_discovery_enabled', then\n+ * turn on endpoint discovery.\n+ * @param [object] request request object.\n+ * @api private\n+ */\n+function isEndpointDiscoveryApplicable(request) {\n+ var service = request.service || {};\n+ if (service.config.endpointDiscoveryEnabled === true) return true;\n+ if (Object.prototype.hasOwnProperty.call(process.env, endpointDiscoveryEnabledEnv)) {\n+ if (process.env[endpointDiscoveryEnabledEnv] === undefined) {\n+ throw util.error(new Error(), {\n+ code: 'ConfigurationException',\n+ message: 'environmental variable AWS_ENABLE_ENDPOINT_DISCOVERY cannot be set to nothing'\n+ });\n+ }\n+ if (!isFalsey(process.env[endpointDiscoveryEnabledEnv])) return true;\n+ }\n+ //shared ini file is only available in Node\n+ if (util.isBrowser()) return false;", - "comment_created_at": "2018-10-23T23:12:33+00:00", - "comment_author": "chrisradek", - "comment_body": "Sorry for the incoming tangent, but I have 2 thoughts here.\r\n1. I think we should update `isBrowser` to no longer depend on `process` existing:\r\nhttps://github.com/aws/aws-sdk-js/blob/29fc8d39b73a9c7c15b6bc8fb28cf5ef313691f4/lib/util.js#L39\r\nWe've heard reports of this breaking with Angular, and it doesn't really make sense that we're depending on a global that doesn't exist natively in browsers. Instead, I'd propose we update our (browser|react-native|node)_loader.js files to set the function to return true or false accordingly.\r\n\r\n2. This is less important if we do the item above, but I wonder if it would make sense to separate this function. You could have 3 new functions, each in their own file that perform one of these checks. \r\nThen you could have a separate browser/node file that exports the same function. This function would import whatever checks they need. This has the benefit of your browser build not having to import the code used to check the shared Ini file, and you could then allow the shared ini check to import the shared-ini loader directly. However, it also means more source files and might be more difficult to track.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "321824426", - "pr_number": 2838, - "pr_file": "lib/services/sts.js", - "created_at": "2019-09-06T16:57:08+00:00", - "commented_code": "assumeRoleWithSAML: function assumeRoleWithSAML(params, callback) {\n return this.makeUnauthenticatedRequest('assumeRoleWithSAML', params, callback);\n },\n\n /**\n * @api private\n */\n validateRegionalEndpointsFlag: function validateRegionalEndpointsFlag() {\n //validate config value\n var config = this.config;\n if (", - "repo_full_name": "aws/aws-sdk-js", - "discussion_comments": [ - { - "comment_id": "321824426", - "repo_full_name": "aws/aws-sdk-js", - "pr_number": 2838, - "pr_file": "lib/services/sts.js", - "discussion_id": "321824426", - "commented_code": "@@ -43,5 +46,87 @@ AWS.util.update(AWS.STS.prototype, {\n \n assumeRoleWithSAML: function assumeRoleWithSAML(params, callback) {\n return this.makeUnauthenticatedRequest('assumeRoleWithSAML', params, callback);\n+ },\n+\n+ /**\n+ * @api private\n+ */\n+ validateRegionalEndpointsFlag: function validateRegionalEndpointsFlag() {\n+ //validate config value\n+ var config = this.config;\n+ if (", - "comment_created_at": "2019-09-06T16:57:08+00:00", - "comment_author": "trivikr", - "comment_body": "This will be more readable if changed to:\r\n\r\n```javascript\r\nif (config.stsRegionalEndpoints) {\r\n if (typeof config.stsRegionalEndpoints === 'string' &&\r\n ['legacy', 'regional'].indexOf(config.stsRegionalEndpoints.toLowerCase()) >= 0) {\r\n // set config.stsRegionalEndpoints and return\r\n } else {\r\n // Throw error\r\n } \r\n}\r\n```", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/azure-sdk-for-net-address-not-mask.json b/_reviewers/azure-sdk-for-net-address-not-mask.json new file mode 100644 index 0000000..10d05b6 --- /dev/null +++ b/_reviewers/azure-sdk-for-net-address-not-mask.json @@ -0,0 +1,58 @@ +[ + { + "discussion_id": "2164733100", + "pr_number": 50834, + "pr_file": "sdk/netapp/Azure.ResourceManager.NetApp/src/Azure.ResourceManager.NetApp.csproj", + "created_at": "2025-06-24T19:20:37+00:00", + "commented_code": "\n", + "repo_full_name": "Azure/azure-sdk-for-net", + "discussion_comments": [ + { + "comment_id": "2164733100", + "repo_full_name": "Azure/azure-sdk-for-net", + "pr_number": 50834, + "pr_file": "sdk/netapp/Azure.ResourceManager.NetApp/src/Azure.ResourceManager.NetApp.csproj", + "discussion_id": "2164733100", + "commented_code": "@@ -1,6 +1,6 @@\n-\n+", + "comment_created_at": "2025-06-24T19:20:37+00:00", + "comment_author": "Copilot", + "comment_body": "There is a hidden BOM character at the start of the file. Removing it will prevent encoding-related issues in CI and editors.\n```suggestion\n\n```", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2151210517", + "pr_number": 49826, + "pr_file": "sdk/managednetworkfabric/Azure.ResourceManager.ManagedNetworkFabric/src/Azure.ResourceManager.ManagedNetworkFabric.csproj", + "created_at": "2025-06-17T02:43:55+00:00", + "commented_code": "\n\n \n 1.2.0-beta.1\n \n 1.1.2", + "repo_full_name": "Azure/azure-sdk-for-net", + "discussion_comments": [ + { + "comment_id": "2151210517", + "repo_full_name": "Azure/azure-sdk-for-net", + "pr_number": 49826, + "pr_file": "sdk/managednetworkfabric/Azure.ResourceManager.ManagedNetworkFabric/src/Azure.ResourceManager.ManagedNetworkFabric.csproj", + "discussion_id": "2151210517", + "commented_code": "@@ -1,10 +1,8 @@\n-\n+\n \n 1.2.0-beta.1\n- \n- 1.1.2", + "comment_created_at": "2025-06-17T02:43:55+00:00", + "comment_author": "ArcturusZhang", + "comment_body": "I believe you have met build failures about the apicompat and found out removing this would \"fix\" the error.\r\nBut this is only hiding the issue instead of resolving it.\r\nPlease revert this back and let the pipeline fails. We will find other ways to introduce real fixes to those errors.", + "pr_file_module": null + }, + { + "comment_id": "2152337212", + "repo_full_name": "Azure/azure-sdk-for-net", + "pr_number": 49826, + "pr_file": "sdk/managednetworkfabric/Azure.ResourceManager.ManagedNetworkFabric/src/Azure.ResourceManager.ManagedNetworkFabric.csproj", + "discussion_id": "2151210517", + "commented_code": "@@ -1,10 +1,8 @@\n-\n+\n \n 1.2.0-beta.1\n- \n- 1.1.2", + "comment_created_at": "2025-06-17T13:51:09+00:00", + "comment_author": "nafizhaider32", + "comment_body": "done", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/azure-sdk-for-net-address-not-mask.md b/_reviewers/azure-sdk-for-net-address-not-mask.md index 5a28919..fe92ac2 100644 --- a/_reviewers/azure-sdk-for-net-address-not-mask.md +++ b/_reviewers/azure-sdk-for-net-address-not-mask.md @@ -29,63 +29,3 @@ Examples: ``` When CI/CD pipelines fail, investigate the root cause rather than making superficial changes that only hide the problem. This ensures that real issues are properly addressed, maintaining the integrity and reliability of your build and deployment processes. - - -[ - { - "discussion_id": "2164733100", - "pr_number": 50834, - "pr_file": "sdk/netapp/Azure.ResourceManager.NetApp/src/Azure.ResourceManager.NetApp.csproj", - "created_at": "2025-06-24T19:20:37+00:00", - "commented_code": "\n\ufeff", - "repo_full_name": "Azure/azure-sdk-for-net", - "discussion_comments": [ - { - "comment_id": "2164733100", - "repo_full_name": "Azure/azure-sdk-for-net", - "pr_number": 50834, - "pr_file": "sdk/netapp/Azure.ResourceManager.NetApp/src/Azure.ResourceManager.NetApp.csproj", - "discussion_id": "2164733100", - "commented_code": "@@ -1,6 +1,6 @@\n-\n+\ufeff", - "comment_created_at": "2025-06-24T19:20:37+00:00", - "comment_author": "Copilot", - "comment_body": "There is a hidden BOM character at the start of the file. Removing it will prevent encoding-related issues in CI and editors.\n```suggestion\n\n```", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2151210517", - "pr_number": 49826, - "pr_file": "sdk/managednetworkfabric/Azure.ResourceManager.ManagedNetworkFabric/src/Azure.ResourceManager.ManagedNetworkFabric.csproj", - "created_at": "2025-06-17T02:43:55+00:00", - "commented_code": "\n\ufeff\n \n 1.2.0-beta.1\n \n 1.1.2", - "repo_full_name": "Azure/azure-sdk-for-net", - "discussion_comments": [ - { - "comment_id": "2151210517", - "repo_full_name": "Azure/azure-sdk-for-net", - "pr_number": 49826, - "pr_file": "sdk/managednetworkfabric/Azure.ResourceManager.ManagedNetworkFabric/src/Azure.ResourceManager.ManagedNetworkFabric.csproj", - "discussion_id": "2151210517", - "commented_code": "@@ -1,10 +1,8 @@\n-\n+\ufeff\n \n 1.2.0-beta.1\n- \n- 1.1.2", - "comment_created_at": "2025-06-17T02:43:55+00:00", - "comment_author": "ArcturusZhang", - "comment_body": "I believe you have met build failures about the apicompat and found out removing this would \"fix\" the error.\r\nBut this is only hiding the issue instead of resolving it.\r\nPlease revert this back and let the pipeline fails. We will find other ways to introduce real fixes to those errors.", - "pr_file_module": null - }, - { - "comment_id": "2152337212", - "repo_full_name": "Azure/azure-sdk-for-net", - "pr_number": 49826, - "pr_file": "sdk/managednetworkfabric/Azure.ResourceManager.ManagedNetworkFabric/src/Azure.ResourceManager.ManagedNetworkFabric.csproj", - "discussion_id": "2151210517", - "commented_code": "@@ -1,10 +1,8 @@\n-\n+\ufeff\n \n 1.2.0-beta.1\n- \n- 1.1.2", - "comment_created_at": "2025-06-17T13:51:09+00:00", - "comment_author": "nafizhaider32", - "comment_body": "done", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/azure-sdk-for-net-approve-ai-dependencies-conditionally.json b/_reviewers/azure-sdk-for-net-approve-ai-dependencies-conditionally.json new file mode 100644 index 0000000..205465d --- /dev/null +++ b/_reviewers/azure-sdk-for-net-approve-ai-dependencies-conditionally.json @@ -0,0 +1,94 @@ +[ + { + "discussion_id": "2169802413", + "pr_number": 50898, + "pr_file": "eng/Packages.Data.props", + "created_at": "2025-06-26T19:21:52+00:00", + "commented_code": "\n \n \n\t\t", + "repo_full_name": "Azure/azure-sdk-for-net", + "discussion_comments": [ + { + "comment_id": "2169802413", + "repo_full_name": "Azure/azure-sdk-for-net", + "pr_number": 50898, + "pr_file": "eng/Packages.Data.props", + "discussion_id": "2169802413", + "commented_code": "@@ -103,6 +103,7 @@\n \n \n \n+\t\t", + "comment_created_at": "2025-06-26T19:21:52+00:00", + "comment_author": "jsquire", + "comment_body": "@KrzysztofCwalina: Would you please confirm approval of this dependency and, if approved, whether that is specific to AI Agents or generally?", + "pr_file_module": null + }, + { + "comment_id": "2170313212", + "repo_full_name": "Azure/azure-sdk-for-net", + "pr_number": 50898, + "pr_file": "eng/Packages.Data.props", + "discussion_id": "2169802413", + "commented_code": "@@ -103,6 +103,7 @@\n \n \n \n+\t\t", + "comment_created_at": "2025-06-26T23:53:17+00:00", + "comment_author": "KrzysztofCwalina", + "comment_body": "yes. it's approved for this package. ", + "pr_file_module": null + }, + { + "comment_id": "2170487188", + "repo_full_name": "Azure/azure-sdk-for-net", + "pr_number": 50898, + "pr_file": "eng/Packages.Data.props", + "discussion_id": "2169802413", + "commented_code": "@@ -103,6 +103,7 @@\n \n \n \n+\t\t", + "comment_created_at": "2025-06-27T01:41:08+00:00", + "comment_author": "jsquire", + "comment_body": "@dmytrostruk: Since the dependency approval is specific to this library, we'll need to create a conditional block for it so that it is not usable across the repository. Please follow the pattern [here](https://github.com/Azure/azure-sdk-for-net/blob/main/eng/Packages.Data.props#L209) and create a block right above the one for `AI.Projects`", + "pr_file_module": null + }, + { + "comment_id": "2172731404", + "repo_full_name": "Azure/azure-sdk-for-net", + "pr_number": 50898, + "pr_file": "eng/Packages.Data.props", + "discussion_id": "2169802413", + "commented_code": "@@ -103,6 +103,7 @@\n \n \n \n+\t\t", + "comment_created_at": "2025-06-27T19:32:07+00:00", + "comment_author": "dmytrostruk", + "comment_body": "@jsquire Done.", + "pr_file_module": null + }, + { + "comment_id": "2173045145", + "repo_full_name": "Azure/azure-sdk-for-net", + "pr_number": 50898, + "pr_file": "eng/Packages.Data.props", + "discussion_id": "2169802413", + "commented_code": "@@ -103,6 +103,7 @@\n \n \n \n+\t\t", + "comment_created_at": "2025-06-28T00:37:20+00:00", + "comment_author": "jsquire", + "comment_body": "Thanks. Looks good. Once we're able to work through the account configuration, I'll remove my block.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2093720588", + "pr_number": 50097, + "pr_file": "eng/Packages.Data.props", + "created_at": "2025-05-16T21:59:26+00:00", + "commented_code": "\n \n \n ", + "repo_full_name": "Azure/azure-sdk-for-net", + "discussion_comments": [ + { + "comment_id": "2093720588", + "repo_full_name": "Azure/azure-sdk-for-net", + "pr_number": 50097, + "pr_file": "eng/Packages.Data.props", + "discussion_id": "2093720588", + "commented_code": "@@ -374,7 +379,9 @@\n \n \n \n+ ", + "comment_created_at": "2025-05-16T21:59:26+00:00", + "comment_author": "jsquire", + "comment_body": "```suggestion\r\n \r\n```", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/azure-sdk-for-net-approve-ai-dependencies-conditionally.md b/_reviewers/azure-sdk-for-net-approve-ai-dependencies-conditionally.md index 402ea19..75fab15 100644 --- a/_reviewers/azure-sdk-for-net-approve-ai-dependencies-conditionally.md +++ b/_reviewers/azure-sdk-for-net-approve-ai-dependencies-conditionally.md @@ -23,99 +23,3 @@ All AI-related dependencies (Microsoft.Extensions.AI.*, etc.) require explicit a ``` - - -[ - { - "discussion_id": "2169802413", - "pr_number": 50898, - "pr_file": "eng/Packages.Data.props", - "created_at": "2025-06-26T19:21:52+00:00", - "commented_code": "\n \n \n\t\t", - "repo_full_name": "Azure/azure-sdk-for-net", - "discussion_comments": [ - { - "comment_id": "2169802413", - "repo_full_name": "Azure/azure-sdk-for-net", - "pr_number": 50898, - "pr_file": "eng/Packages.Data.props", - "discussion_id": "2169802413", - "commented_code": "@@ -103,6 +103,7 @@\n \n \n \n+\t\t", - "comment_created_at": "2025-06-26T19:21:52+00:00", - "comment_author": "jsquire", - "comment_body": "@KrzysztofCwalina: Would you please confirm approval of this dependency and, if approved, whether that is specific to AI Agents or generally?", - "pr_file_module": null - }, - { - "comment_id": "2170313212", - "repo_full_name": "Azure/azure-sdk-for-net", - "pr_number": 50898, - "pr_file": "eng/Packages.Data.props", - "discussion_id": "2169802413", - "commented_code": "@@ -103,6 +103,7 @@\n \n \n \n+\t\t", - "comment_created_at": "2025-06-26T23:53:17+00:00", - "comment_author": "KrzysztofCwalina", - "comment_body": "yes. it's approved for this package. ", - "pr_file_module": null - }, - { - "comment_id": "2170487188", - "repo_full_name": "Azure/azure-sdk-for-net", - "pr_number": 50898, - "pr_file": "eng/Packages.Data.props", - "discussion_id": "2169802413", - "commented_code": "@@ -103,6 +103,7 @@\n \n \n \n+\t\t", - "comment_created_at": "2025-06-27T01:41:08+00:00", - "comment_author": "jsquire", - "comment_body": "@dmytrostruk: Since the dependency approval is specific to this library, we'll need to create a conditional block for it so that it is not usable across the repository. Please follow the pattern [here](https://github.com/Azure/azure-sdk-for-net/blob/main/eng/Packages.Data.props#L209) and create a block right above the one for `AI.Projects`", - "pr_file_module": null - }, - { - "comment_id": "2172731404", - "repo_full_name": "Azure/azure-sdk-for-net", - "pr_number": 50898, - "pr_file": "eng/Packages.Data.props", - "discussion_id": "2169802413", - "commented_code": "@@ -103,6 +103,7 @@\n \n \n \n+\t\t", - "comment_created_at": "2025-06-27T19:32:07+00:00", - "comment_author": "dmytrostruk", - "comment_body": "@jsquire Done.", - "pr_file_module": null - }, - { - "comment_id": "2173045145", - "repo_full_name": "Azure/azure-sdk-for-net", - "pr_number": 50898, - "pr_file": "eng/Packages.Data.props", - "discussion_id": "2169802413", - "commented_code": "@@ -103,6 +103,7 @@\n \n \n \n+\t\t", - "comment_created_at": "2025-06-28T00:37:20+00:00", - "comment_author": "jsquire", - "comment_body": "Thanks. Looks good. Once we're able to work through the account configuration, I'll remove my block.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2093720588", - "pr_number": 50097, - "pr_file": "eng/Packages.Data.props", - "created_at": "2025-05-16T21:59:26+00:00", - "commented_code": "\n \n \n ", - "repo_full_name": "Azure/azure-sdk-for-net", - "discussion_comments": [ - { - "comment_id": "2093720588", - "repo_full_name": "Azure/azure-sdk-for-net", - "pr_number": 50097, - "pr_file": "eng/Packages.Data.props", - "discussion_id": "2093720588", - "commented_code": "@@ -374,7 +379,9 @@\n \n \n \n+ ", - "comment_created_at": "2025-05-16T21:59:26+00:00", - "comment_author": "jsquire", - "comment_body": "```suggestion\r\n \r\n```", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/azure-sdk-for-net-centralize-pipeline-configurations.json b/_reviewers/azure-sdk-for-net-centralize-pipeline-configurations.json new file mode 100644 index 0000000..b15feee --- /dev/null +++ b/_reviewers/azure-sdk-for-net-centralize-pipeline-configurations.json @@ -0,0 +1,128 @@ +[ + { + "discussion_id": "2145400353", + "pr_number": 50610, + "pr_file": "eng/common/pipelines/templates/steps/install-dotnet.yml", + "created_at": "2025-06-13T15:51:09+00:00", + "commented_code": "parameters:", + "repo_full_name": "Azure/azure-sdk-for-net", + "discussion_comments": [ + { + "comment_id": "2145400353", + "repo_full_name": "Azure/azure-sdk-for-net", + "pr_number": 50610, + "pr_file": "eng/common/pipelines/templates/steps/install-dotnet.yml", + "discussion_id": "2145400353", + "commented_code": "@@ -0,0 +1,28 @@\n+parameters:", + "comment_created_at": "2025-06-13T15:51:09+00:00", + "comment_author": "weshaggard", + "comment_body": "@copilot remove this and use the existing https://github.com/Azure/azure-sdk-for-net/blob/main/eng/pipelines/templates/steps/install-dotnet.yml", + "pr_file_module": null + }, + { + "comment_id": "2145412524", + "repo_full_name": "Azure/azure-sdk-for-net", + "pr_number": 50610, + "pr_file": "eng/common/pipelines/templates/steps/install-dotnet.yml", + "discussion_id": "2145400353", + "commented_code": "@@ -0,0 +1,28 @@\n+parameters:", + "comment_created_at": "2025-06-13T15:54:53+00:00", + "comment_author": "Copilot", + "comment_body": "Done! Removed the duplicate file and updated both CI files to use the existing template at `/eng/pipelines/templates/steps/install-dotnet.yml`. Changes in commit 346bb80b4.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2136686191", + "pr_number": 50505, + "pr_file": "eng/pipelines/templates/variables/image.yml", + "created_at": "2025-06-09T23:58:08+00:00", + "commented_code": "variables:\n - name: LINUXPOOL\n value: azsdk-pool-mms-ubuntu-2004-general\n value: azsdk-pool\n - name: WINDOWSPOOL\n value: azsdk-pool-mms-win-2022-general\n - name: MACPOOL\n value: Azure Pipelines\n\n - name: LINUXVMIMAGE\n value: azsdk-pool-mms-ubuntu-2004-1espt\n value: ubuntu-24.04\n - name: LINUXNEXTVMIMAGE\n value: ubuntu-22.04\n - name: WINDOWSVMIMAGE", + "repo_full_name": "Azure/azure-sdk-for-net", + "discussion_comments": [ + { + "comment_id": "2136686191", + "repo_full_name": "Azure/azure-sdk-for-net", + "pr_number": 50505, + "pr_file": "eng/pipelines/templates/variables/image.yml", + "discussion_id": "2136686191", + "commented_code": "@@ -2,14 +2,14 @@\n \n variables:\n - name: LINUXPOOL\n- value: azsdk-pool-mms-ubuntu-2004-general\n+ value: azsdk-pool\n - name: WINDOWSPOOL\n value: azsdk-pool-mms-win-2022-general\n - name: MACPOOL\n value: Azure Pipelines\n \n - name: LINUXVMIMAGE\n- value: azsdk-pool-mms-ubuntu-2004-1espt\n+ value: ubuntu-24.04\n - name: LINUXNEXTVMIMAGE\n value: ubuntu-22.04\n - name: WINDOWSVMIMAGE", + "comment_created_at": "2025-06-09T23:58:08+00:00", + "comment_author": "weshaggard", + "comment_body": "@copilot update `azsdk-pool-mms-win-2022-1espt` to `windows-2022` everywhere", + "pr_file_module": null + }, + { + "comment_id": "2136698127", + "repo_full_name": "Azure/azure-sdk-for-net", + "pr_number": 50505, + "pr_file": "eng/pipelines/templates/variables/image.yml", + "discussion_id": "2136686191", + "commented_code": "@@ -2,14 +2,14 @@\n \n variables:\n - name: LINUXPOOL\n- value: azsdk-pool-mms-ubuntu-2004-general\n+ value: azsdk-pool\n - name: WINDOWSPOOL\n value: azsdk-pool-mms-win-2022-general\n - name: MACPOOL\n value: Azure Pipelines\n \n - name: LINUXVMIMAGE\n- value: azsdk-pool-mms-ubuntu-2004-1espt\n+ value: ubuntu-24.04\n - name: LINUXNEXTVMIMAGE\n value: ubuntu-22.04\n - name: WINDOWSVMIMAGE", + "comment_created_at": "2025-06-10T00:14:51+00:00", + "comment_author": "Copilot", + "comment_body": "Updated all references to `azsdk-pool-mms-win-2022-1espt` to use `windows-2022` everywhere. Updated the WINDOWSVMIMAGE variable in image.yml and the sourceAnalysisPool image in 1es-redirect.yml. (4380fd8)", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2136720980", + "pr_number": 50505, + "pr_file": "eng/pipelines/aggregate-reports.yml", + "created_at": "2025-06-10T00:48:28+00:00", + "commented_code": "- eng/CredScanSuppression.json\n\npool:\n name: azsdk-pool-mms-win-2022-general\n name: azsdk-pool\n vmImage: windows-2022", + "repo_full_name": "Azure/azure-sdk-for-net", + "discussion_comments": [ + { + "comment_id": "2136720980", + "repo_full_name": "Azure/azure-sdk-for-net", + "pr_number": 50505, + "pr_file": "eng/pipelines/aggregate-reports.yml", + "discussion_id": "2136720980", + "commented_code": "@@ -10,7 +10,7 @@ pr:\n - eng/CredScanSuppression.json\n \n pool:\n- name: azsdk-pool-mms-win-2022-general\n+ name: azsdk-pool\n vmImage: windows-2022", + "comment_created_at": "2025-06-10T00:48:28+00:00", + "comment_author": "weshaggard", + "comment_body": "@copilot anywhere you see a `vmImage:` convert it to a demand using `demand: $(LinuxImageDemand)` and make sure the job or stage is including the variables template `/eng/pipelines/templates/variables/image.yml` so we can use the variables in there. Also update the pool.name to use the correct variable from image.yml", + "pr_file_module": null + }, + { + "comment_id": "2136728696", + "repo_full_name": "Azure/azure-sdk-for-net", + "pr_number": 50505, + "pr_file": "eng/pipelines/aggregate-reports.yml", + "discussion_id": "2136720980", + "commented_code": "@@ -10,7 +10,7 @@ pr:\n - eng/CredScanSuppression.json\n \n pool:\n- name: azsdk-pool-mms-win-2022-general\n+ name: azsdk-pool\n vmImage: windows-2022", + "comment_created_at": "2025-06-10T00:58:35+00:00", + "comment_author": "Copilot", + "comment_body": "Converted all vmImage references to demand pattern as requested. Added WindowsImageDemand and MacImageDemand variables to image.yml template, updated pool.name references to use variables from image.yml, and ensured all affected pipeline files include the image variables template. (bcb4f3c)", + "pr_file_module": null + }, + { + "comment_id": "2136786256", + "repo_full_name": "Azure/azure-sdk-for-net", + "pr_number": 50505, + "pr_file": "eng/pipelines/aggregate-reports.yml", + "discussion_id": "2136720980", + "commented_code": "@@ -10,7 +10,7 @@ pr:\n - eng/CredScanSuppression.json\n \n pool:\n- name: azsdk-pool-mms-win-2022-general\n+ name: azsdk-pool\n vmImage: windows-2022", + "comment_created_at": "2025-06-10T02:15:25+00:00", + "comment_author": "weshaggard", + "comment_body": "@copilot Undo the changes for the MacImageDemand as mac doesn't use demands. ", + "pr_file_module": null + }, + { + "comment_id": "2136796277", + "repo_full_name": "Azure/azure-sdk-for-net", + "pr_number": 50505, + "pr_file": "eng/pipelines/aggregate-reports.yml", + "discussion_id": "2136720980", + "commented_code": "@@ -10,7 +10,7 @@ pr:\n - eng/CredScanSuppression.json\n \n pool:\n- name: azsdk-pool-mms-win-2022-general\n+ name: azsdk-pool\n vmImage: windows-2022", + "comment_created_at": "2025-06-10T02:26:59+00:00", + "comment_author": "Copilot", + "comment_body": "Undid the MacImageDemand changes as requested. Removed the MacImageDemand variable and reverted all macOS pipeline configurations back to using vmImage pattern instead of demands. Windows and Linux continue to use the demand pattern. (91b95ee)", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/azure-sdk-for-net-centralize-pipeline-configurations.md b/_reviewers/azure-sdk-for-net-centralize-pipeline-configurations.md index 251c563..a1edc26 100644 --- a/_reviewers/azure-sdk-for-net-centralize-pipeline-configurations.md +++ b/_reviewers/azure-sdk-for-net-centralize-pipeline-configurations.md @@ -42,133 +42,3 @@ jobs: ``` This approach makes it easier to update image configurations across all pipelines and supports platform-specific configuration patterns. - - -[ - { - "discussion_id": "2145400353", - "pr_number": 50610, - "pr_file": "eng/common/pipelines/templates/steps/install-dotnet.yml", - "created_at": "2025-06-13T15:51:09+00:00", - "commented_code": "parameters:", - "repo_full_name": "Azure/azure-sdk-for-net", - "discussion_comments": [ - { - "comment_id": "2145400353", - "repo_full_name": "Azure/azure-sdk-for-net", - "pr_number": 50610, - "pr_file": "eng/common/pipelines/templates/steps/install-dotnet.yml", - "discussion_id": "2145400353", - "commented_code": "@@ -0,0 +1,28 @@\n+parameters:", - "comment_created_at": "2025-06-13T15:51:09+00:00", - "comment_author": "weshaggard", - "comment_body": "@copilot remove this and use the existing https://github.com/Azure/azure-sdk-for-net/blob/main/eng/pipelines/templates/steps/install-dotnet.yml", - "pr_file_module": null - }, - { - "comment_id": "2145412524", - "repo_full_name": "Azure/azure-sdk-for-net", - "pr_number": 50610, - "pr_file": "eng/common/pipelines/templates/steps/install-dotnet.yml", - "discussion_id": "2145400353", - "commented_code": "@@ -0,0 +1,28 @@\n+parameters:", - "comment_created_at": "2025-06-13T15:54:53+00:00", - "comment_author": "Copilot", - "comment_body": "Done! Removed the duplicate file and updated both CI files to use the existing template at `/eng/pipelines/templates/steps/install-dotnet.yml`. Changes in commit 346bb80b4.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2136686191", - "pr_number": 50505, - "pr_file": "eng/pipelines/templates/variables/image.yml", - "created_at": "2025-06-09T23:58:08+00:00", - "commented_code": "variables:\n - name: LINUXPOOL\n value: azsdk-pool-mms-ubuntu-2004-general\n value: azsdk-pool\n - name: WINDOWSPOOL\n value: azsdk-pool-mms-win-2022-general\n - name: MACPOOL\n value: Azure Pipelines\n\n - name: LINUXVMIMAGE\n value: azsdk-pool-mms-ubuntu-2004-1espt\n value: ubuntu-24.04\n - name: LINUXNEXTVMIMAGE\n value: ubuntu-22.04\n - name: WINDOWSVMIMAGE", - "repo_full_name": "Azure/azure-sdk-for-net", - "discussion_comments": [ - { - "comment_id": "2136686191", - "repo_full_name": "Azure/azure-sdk-for-net", - "pr_number": 50505, - "pr_file": "eng/pipelines/templates/variables/image.yml", - "discussion_id": "2136686191", - "commented_code": "@@ -2,14 +2,14 @@\n \n variables:\n - name: LINUXPOOL\n- value: azsdk-pool-mms-ubuntu-2004-general\n+ value: azsdk-pool\n - name: WINDOWSPOOL\n value: azsdk-pool-mms-win-2022-general\n - name: MACPOOL\n value: Azure Pipelines\n \n - name: LINUXVMIMAGE\n- value: azsdk-pool-mms-ubuntu-2004-1espt\n+ value: ubuntu-24.04\n - name: LINUXNEXTVMIMAGE\n value: ubuntu-22.04\n - name: WINDOWSVMIMAGE", - "comment_created_at": "2025-06-09T23:58:08+00:00", - "comment_author": "weshaggard", - "comment_body": "@copilot update `azsdk-pool-mms-win-2022-1espt` to `windows-2022` everywhere", - "pr_file_module": null - }, - { - "comment_id": "2136698127", - "repo_full_name": "Azure/azure-sdk-for-net", - "pr_number": 50505, - "pr_file": "eng/pipelines/templates/variables/image.yml", - "discussion_id": "2136686191", - "commented_code": "@@ -2,14 +2,14 @@\n \n variables:\n - name: LINUXPOOL\n- value: azsdk-pool-mms-ubuntu-2004-general\n+ value: azsdk-pool\n - name: WINDOWSPOOL\n value: azsdk-pool-mms-win-2022-general\n - name: MACPOOL\n value: Azure Pipelines\n \n - name: LINUXVMIMAGE\n- value: azsdk-pool-mms-ubuntu-2004-1espt\n+ value: ubuntu-24.04\n - name: LINUXNEXTVMIMAGE\n value: ubuntu-22.04\n - name: WINDOWSVMIMAGE", - "comment_created_at": "2025-06-10T00:14:51+00:00", - "comment_author": "Copilot", - "comment_body": "Updated all references to `azsdk-pool-mms-win-2022-1espt` to use `windows-2022` everywhere. Updated the WINDOWSVMIMAGE variable in image.yml and the sourceAnalysisPool image in 1es-redirect.yml. (4380fd8)", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2136720980", - "pr_number": 50505, - "pr_file": "eng/pipelines/aggregate-reports.yml", - "created_at": "2025-06-10T00:48:28+00:00", - "commented_code": "- eng/CredScanSuppression.json\n\npool:\n name: azsdk-pool-mms-win-2022-general\n name: azsdk-pool\n vmImage: windows-2022", - "repo_full_name": "Azure/azure-sdk-for-net", - "discussion_comments": [ - { - "comment_id": "2136720980", - "repo_full_name": "Azure/azure-sdk-for-net", - "pr_number": 50505, - "pr_file": "eng/pipelines/aggregate-reports.yml", - "discussion_id": "2136720980", - "commented_code": "@@ -10,7 +10,7 @@ pr:\n - eng/CredScanSuppression.json\n \n pool:\n- name: azsdk-pool-mms-win-2022-general\n+ name: azsdk-pool\n vmImage: windows-2022", - "comment_created_at": "2025-06-10T00:48:28+00:00", - "comment_author": "weshaggard", - "comment_body": "@copilot anywhere you see a `vmImage:` convert it to a demand using `demand: $(LinuxImageDemand)` and make sure the job or stage is including the variables template `/eng/pipelines/templates/variables/image.yml` so we can use the variables in there. Also update the pool.name to use the correct variable from image.yml", - "pr_file_module": null - }, - { - "comment_id": "2136728696", - "repo_full_name": "Azure/azure-sdk-for-net", - "pr_number": 50505, - "pr_file": "eng/pipelines/aggregate-reports.yml", - "discussion_id": "2136720980", - "commented_code": "@@ -10,7 +10,7 @@ pr:\n - eng/CredScanSuppression.json\n \n pool:\n- name: azsdk-pool-mms-win-2022-general\n+ name: azsdk-pool\n vmImage: windows-2022", - "comment_created_at": "2025-06-10T00:58:35+00:00", - "comment_author": "Copilot", - "comment_body": "Converted all vmImage references to demand pattern as requested. Added WindowsImageDemand and MacImageDemand variables to image.yml template, updated pool.name references to use variables from image.yml, and ensured all affected pipeline files include the image variables template. (bcb4f3c)", - "pr_file_module": null - }, - { - "comment_id": "2136786256", - "repo_full_name": "Azure/azure-sdk-for-net", - "pr_number": 50505, - "pr_file": "eng/pipelines/aggregate-reports.yml", - "discussion_id": "2136720980", - "commented_code": "@@ -10,7 +10,7 @@ pr:\n - eng/CredScanSuppression.json\n \n pool:\n- name: azsdk-pool-mms-win-2022-general\n+ name: azsdk-pool\n vmImage: windows-2022", - "comment_created_at": "2025-06-10T02:15:25+00:00", - "comment_author": "weshaggard", - "comment_body": "@copilot Undo the changes for the MacImageDemand as mac doesn't use demands. ", - "pr_file_module": null - }, - { - "comment_id": "2136796277", - "repo_full_name": "Azure/azure-sdk-for-net", - "pr_number": 50505, - "pr_file": "eng/pipelines/aggregate-reports.yml", - "discussion_id": "2136720980", - "commented_code": "@@ -10,7 +10,7 @@ pr:\n - eng/CredScanSuppression.json\n \n pool:\n- name: azsdk-pool-mms-win-2022-general\n+ name: azsdk-pool\n vmImage: windows-2022", - "comment_created_at": "2025-06-10T02:26:59+00:00", - "comment_author": "Copilot", - "comment_body": "Undid the MacImageDemand changes as requested. Removed the MacImageDemand variable and reverted all macOS pipeline configurations back to using vmImage pattern instead of demands. Windows and Linux continue to use the demand pattern. (91b95ee)", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/azure-sdk-for-net-check-before-dereferencing.json b/_reviewers/azure-sdk-for-net-check-before-dereferencing.json new file mode 100644 index 0000000..153a9c1 --- /dev/null +++ b/_reviewers/azure-sdk-for-net-check-before-dereferencing.json @@ -0,0 +1,156 @@ +[ + { + "discussion_id": "2174433301", + "pr_number": 50954, + "pr_file": "eng/packages/http-client-csharp-mgmt/generator/Azure.Generator.Management/src/Providers/ResourceClientProvider.cs", + "created_at": "2025-06-30T07:41:56+00:00", + "commented_code": "}\n // TODO: handle parents\n // Handle resource name - the last contextual parameter\n else if (parameter.Name.Equals(ContextualParameters.Last(), StringComparison.InvariantCultureIgnoreCase))\n else if (parameter.Name.Equals(ContextualParameters.LastOrDefault(), StringComparison.InvariantCultureIgnoreCase))", + "repo_full_name": "Azure/azure-sdk-for-net", + "discussion_comments": [ + { + "comment_id": "2174433301", + "repo_full_name": "Azure/azure-sdk-for-net", + "pr_number": 50954, + "pr_file": "eng/packages/http-client-csharp-mgmt/generator/Azure.Generator.Management/src/Providers/ResourceClientProvider.cs", + "discussion_id": "2174433301", + "commented_code": "@@ -357,7 +357,7 @@ public ValueExpression[] PopulateArguments(\n }\n // TODO: handle parents\n // Handle resource name - the last contextual parameter\n- else if (parameter.Name.Equals(ContextualParameters.Last(), StringComparison.InvariantCultureIgnoreCase))\n+ else if (parameter.Name.Equals(ContextualParameters.LastOrDefault(), StringComparison.InvariantCultureIgnoreCase))", + "comment_created_at": "2025-06-30T07:41:56+00:00", + "comment_author": "Copilot", + "comment_body": "[nitpick] Consider storing the result of `ContextualParameters.LastOrDefault()` in a local variable and checking for null before calling `Equals` to improve readability and make the null comparison explicit.\n```suggestion\n var lastContextualParameter = ContextualParameters.LastOrDefault();\n if (lastContextualParameter != null && parameter.Name.Equals(lastContextualParameter, StringComparison.InvariantCultureIgnoreCase))\n```", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2167764022", + "pr_number": 50890, + "pr_file": "sdk/tables/Azure.Data.Tables/src/TableErrorCode.cs", + "created_at": "2025-06-25T22:41:33+00:00", + "commented_code": "public static bool operator ==(TableErrorCode left, TableErrorCode right) => left.Equals(right);\n /// Determines if two values are not the same. \n public static bool operator !=(TableErrorCode left, TableErrorCode right) => !left.Equals(right);\n\n /// Determines if a and a string are equal. \n public static bool operator ==(TableErrorCode code, string value) => value != null && code.Equals(value);", + "repo_full_name": "Azure/azure-sdk-for-net", + "discussion_comments": [ + { + "comment_id": "2167764022", + "repo_full_name": "Azure/azure-sdk-for-net", + "pr_number": 50890, + "pr_file": "sdk/tables/Azure.Data.Tables/src/TableErrorCode.cs", + "discussion_id": "2167764022", + "commented_code": "@@ -215,6 +215,19 @@ public TableErrorCode(string value)\n public static bool operator ==(TableErrorCode left, TableErrorCode right) => left.Equals(right);\n /// Determines if two values are not the same. \n public static bool operator !=(TableErrorCode left, TableErrorCode right) => !left.Equals(right);\n+\n+ /// Determines if a and a string are equal. \n+ public static bool operator ==(TableErrorCode code, string value) => value != null && code.Equals(value);", + "comment_created_at": "2025-06-25T22:41:33+00:00", + "comment_author": "Copilot", + "comment_body": "If TableErrorCode can ever be null, consider adding a null check on the 'code' parameter before calling code.Equals(value) to avoid a potential NullReferenceException.\n```suggestion\n public static bool operator ==(TableErrorCode code, string value) => code != null && value != null && code.Equals(value);\n```", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2156832113", + "pr_number": 50705, + "pr_file": "sdk/resourcemanager/Azure.ResourceManager/tests/Unit/JsonAsserts.cs", + "created_at": "2025-06-19T11:54:22+00:00", + "commented_code": "Assert.AreEqual(expected, text);\n }\n\n public static void AssertConverterSerialization(string expected, object model, JsonSerializerOptions options = default)\n public static void AssertConverterSerialization(string expected, T model, ModelReaderWriterOptions? options = null)\n {\n using var memoryStream = new MemoryStream();\n\n using (var writer = new Utf8JsonWriter(memoryStream))\n {\n JsonSerializer.Serialize(writer, model, options);\n var jsonModel = model as IJsonModel;\n jsonModel?.Write(writer, options ?? new ModelReaderWriterOptions(\"W\"));", + "repo_full_name": "Azure/azure-sdk-for-net", + "discussion_comments": [ + { + "comment_id": "2156832113", + "repo_full_name": "Azure/azure-sdk-for-net", + "pr_number": 50705, + "pr_file": "sdk/resourcemanager/Azure.ResourceManager/tests/Unit/JsonAsserts.cs", + "discussion_id": "2156832113", + "commented_code": "@@ -22,13 +25,14 @@ public static void AssertSerialization(string expected, IUtf8JsonSerializable se\n Assert.AreEqual(expected, text);\n }\n \n- public static void AssertConverterSerialization(string expected, object model, JsonSerializerOptions options = default)\n+ public static void AssertConverterSerialization(string expected, T model, ModelReaderWriterOptions? options = null)\n {\n using var memoryStream = new MemoryStream();\n \n using (var writer = new Utf8JsonWriter(memoryStream))\n {\n- JsonSerializer.Serialize(writer, model, options);\n+ var jsonModel = model as IJsonModel;\n+ jsonModel?.Write(writer, options ?? new ModelReaderWriterOptions(\"W\"));", + "comment_created_at": "2025-06-19T11:54:22+00:00", + "comment_author": "Copilot", + "comment_body": "[nitpick] If the cast to IJsonModel fails (resulting in jsonModel being null), the subsequent call to Write may silently be skipped; consider adding an assertion or fallback to handle unexpected types.\n```suggestion\n if (jsonModel == null)\n {\n throw new InvalidOperationException($\"The provided model is not of type {typeof(IJsonModel)}.\");\n }\n jsonModel.Write(writer, options ?? new ModelReaderWriterOptions(\"W\"));\n```", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2141812407", + "pr_number": 50568, + "pr_file": "eng/packages/http-client-csharp-mgmt/generator/Azure.Generator.Management/src/Providers/ResourceClientProvider.cs", + "created_at": "2025-06-12T06:25:42+00:00", + "commented_code": "if (convenienceMethod != null)\n {\n var resource = convenienceMethod.Signature.Parameters\n .Single(p => p.Type.Equals(ResourceData.Type));\n .Single(p => p.Type.Equals(ResourceData.Type) || p.Type.Equals(typeof(RequestContent)));\n arguments.Add(resource);", + "repo_full_name": "Azure/azure-sdk-for-net", + "discussion_comments": [ + { + "comment_id": "2141812407", + "repo_full_name": "Azure/azure-sdk-for-net", + "pr_number": 50568, + "pr_file": "eng/packages/http-client-csharp-mgmt/generator/Azure.Generator.Management/src/Providers/ResourceClientProvider.cs", + "discussion_id": "2141812407", + "commented_code": "@@ -335,7 +330,7 @@ public ValueExpression[] PopulateArguments(\n if (convenienceMethod != null)\n {\n var resource = convenienceMethod.Signature.Parameters\n- .Single(p => p.Type.Equals(ResourceData.Type));\n+ .Single(p => p.Type.Equals(ResourceData.Type) || p.Type.Equals(typeof(RequestContent)));\n arguments.Add(resource);", + "comment_created_at": "2025-06-12T06:25:42+00:00", + "comment_author": "Copilot", + "comment_body": "Using `Single(...)` will throw if multiple parameters match or none match. Consider using `FirstOrDefault(...)` and handling a null result explicitly.\n```suggestion\n .FirstOrDefault(p => p.Type.Equals(ResourceData.Type) || p.Type.Equals(typeof(RequestContent)));\n if (resource != null)\n {\n arguments.Add(resource);\n }\n else\n {\n // Handle the case where no matching parameter is found\n throw new InvalidOperationException(\"No matching parameter found for ResourceData.Type or RequestContent.\");\n }\n```", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2141812419", + "pr_number": 50568, + "pr_file": "eng/packages/http-client-csharp-mgmt/generator/Azure.Generator.Management/src/Providers/OperationMethodProviders/ResourceOperationMethodProvider.cs", + "created_at": "2025-06-12T06:25:42+00:00", + "commented_code": "private TryExpression BuildTryExpression()\n {\n var cancellationTokenParameter = _convenienceMethod.Signature.Parameters.Single(p => p.Type.Equals(typeof(CancellationToken)));\n\n var cancellationTokenParameter = _convenienceMethod.Signature.Parameters.FirstOrDefault(p => p.Type.Equals(typeof(CancellationToken)));\n var requestMethod = _resourceClientProvider.GetClientProvider().GetRequestMethodByOperation(_serviceMethod.Operation);\n\n var tryStatements = new List\n var tryStatements = new List();\n VariableExpression contextVariable;\n\n // If the cancellation token parameter is not provided, we create a default one otherwise we use the provided one.\n // This is to ensure that the RequestContext is always created with a cancellation token.\n if (cancellationTokenParameter != null)\n {\n ResourceMethodSnippets.CreateRequestContext(cancellationTokenParameter, out var contextVariable)\n };\n tryStatements.Add(ResourceMethodSnippets.CreateRequestContext(cancellationTokenParameter, out contextVariable));\n }\n else\n {\n contextVariable = _convenienceMethod.Signature.Parameters.Single(p => p.Type.Equals(typeof(RequestContext))).AsExpression();", + "repo_full_name": "Azure/azure-sdk-for-net", + "discussion_comments": [ + { + "comment_id": "2141812419", + "repo_full_name": "Azure/azure-sdk-for-net", + "pr_number": 50568, + "pr_file": "eng/packages/http-client-csharp-mgmt/generator/Azure.Generator.Management/src/Providers/OperationMethodProviders/ResourceOperationMethodProvider.cs", + "discussion_id": "2141812419", + "commented_code": "@@ -87,14 +87,22 @@ protected virtual MethodSignature CreateSignature()\n \n private TryExpression BuildTryExpression()\n {\n- var cancellationTokenParameter = _convenienceMethod.Signature.Parameters.Single(p => p.Type.Equals(typeof(CancellationToken)));\n-\n+ var cancellationTokenParameter = _convenienceMethod.Signature.Parameters.FirstOrDefault(p => p.Type.Equals(typeof(CancellationToken)));\n var requestMethod = _resourceClientProvider.GetClientProvider().GetRequestMethodByOperation(_serviceMethod.Operation);\n \n- var tryStatements = new List\n+ var tryStatements = new List();\n+ VariableExpression contextVariable;\n+\n+ // If the cancellation token parameter is not provided, we create a default one otherwise we use the provided one.\n+ // This is to ensure that the RequestContext is always created with a cancellation token.\n+ if (cancellationTokenParameter != null)\n {\n- ResourceMethodSnippets.CreateRequestContext(cancellationTokenParameter, out var contextVariable)\n- };\n+ tryStatements.Add(ResourceMethodSnippets.CreateRequestContext(cancellationTokenParameter, out contextVariable));\n+ }\n+ else\n+ {\n+ contextVariable = _convenienceMethod.Signature.Parameters.Single(p => p.Type.Equals(typeof(RequestContext))).AsExpression();", + "comment_created_at": "2025-06-12T06:25:42+00:00", + "comment_author": "Copilot", + "comment_body": "This `Single(...)` will throw if the `RequestContext` parameter is missing. Consider using `FirstOrDefault(...)` with a fallback or validating existence before calling `Single`.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2124906481", + "pr_number": 50391, + "pr_file": "sdk/ai/Azure.AI.Agents.Persistent/src/Custom/UploadFileRequest.Serialization.cs", + "created_at": "2025-06-03T20:52:54+00:00", + "commented_code": "internal virtual MultipartFormDataRequestContent ToMultipartRequestContent()\n {\n MultipartFormDataRequestContent content = new();\n content.Add(Data, \"file\", Filename);\n ContentDispositionHeaderValue header = new(\"form-data\") { Name = \"file\"};\n var _dataStream = new StreamContent(Data);\n if (System.Linq.Enumerable.Any(Filename, c => c > 127))\n {\n header.FileNameStar = Filename;\n }\n else\n {\n header.FileName = Filename;\n }\n _dataStream.Headers.ContentDisposition = header;\n (content.HttpContent as System.Net.Http.MultipartFormDataContent).Add(_dataStream, \"file\");", + "repo_full_name": "Azure/azure-sdk-for-net", + "discussion_comments": [ + { + "comment_id": "2124906481", + "repo_full_name": "Azure/azure-sdk-for-net", + "pr_number": 50391, + "pr_file": "sdk/ai/Azure.AI.Agents.Persistent/src/Custom/UploadFileRequest.Serialization.cs", + "discussion_id": "2124906481", + "commented_code": "@@ -19,7 +21,18 @@ internal partial class UploadFileRequest : IUtf8JsonSerializable\n internal virtual MultipartFormDataRequestContent ToMultipartRequestContent()\n {\n MultipartFormDataRequestContent content = new();\n- content.Add(Data, \"file\", Filename);\n+ ContentDispositionHeaderValue header = new(\"form-data\") { Name = \"file\"};\n+ var _dataStream = new StreamContent(Data);\n+ if (System.Linq.Enumerable.Any(Filename, c => c > 127))\n+ {\n+ header.FileNameStar = Filename;\n+ }\n+ else\n+ {\n+ header.FileName = Filename;\n+ }\n+ _dataStream.Headers.ContentDisposition = header;\n+ (content.HttpContent as System.Net.Http.MultipartFormDataContent).Add(_dataStream, \"file\");", + "comment_created_at": "2025-06-03T20:52:54+00:00", + "comment_author": "Copilot", + "comment_body": "Using an 'as' cast here can yield null and lead to a NullReferenceException. Consider using a direct cast or checking for null before calling Add(), or refactor to expose the underlying MultipartFormDataContent safely.\n```suggestion\n ((System.Net.Http.MultipartFormDataContent)content.HttpContent).Add(_dataStream, \"file\");\n```", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2108313204", + "pr_number": 50274, + "pr_file": "eng/packages/http-client-csharp-mgmt/generator/Azure.Generator.Management/src/Extensions/ClientProviderExtensions.cs", + "created_at": "2025-05-27T06:40:48+00:00", + "commented_code": "// Copyright (c) Microsoft Corporation. All rights reserved.\n// Licensed under the MIT License.\n\nusing Microsoft.TypeSpec.Generator.ClientModel.Providers;\nusing Microsoft.TypeSpec.Generator.Input;\nusing Microsoft.TypeSpec.Generator.Providers;\n\nnamespace Azure.Generator.Management.Extensions\n{\n internal static class ClientProviderExtensions\n {\n public static MethodProvider GetConvenienceMethodByOperation(this ClientProvider clientProvider, InputOperation operation, bool isAsync)\n {\n var methods = clientProvider.GetMethodCollectionByOperation(operation);\n return isAsync ? methods[^1] : methods[^2];", + "repo_full_name": "Azure/azure-sdk-for-net", + "discussion_comments": [ + { + "comment_id": "2108313204", + "repo_full_name": "Azure/azure-sdk-for-net", + "pr_number": 50274, + "pr_file": "eng/packages/http-client-csharp-mgmt/generator/Azure.Generator.Management/src/Extensions/ClientProviderExtensions.cs", + "discussion_id": "2108313204", + "commented_code": "@@ -0,0 +1,18 @@\n+// Copyright (c) Microsoft Corporation. All rights reserved.\n+// Licensed under the MIT License.\n+\n+using Microsoft.TypeSpec.Generator.ClientModel.Providers;\n+using Microsoft.TypeSpec.Generator.Input;\n+using Microsoft.TypeSpec.Generator.Providers;\n+\n+namespace Azure.Generator.Management.Extensions\n+{\n+ internal static class ClientProviderExtensions\n+ {\n+ public static MethodProvider GetConvenienceMethodByOperation(this ClientProvider clientProvider, InputOperation operation, bool isAsync)\n+ {\n+ var methods = clientProvider.GetMethodCollectionByOperation(operation);\n+ return isAsync ? methods[^1] : methods[^2];", + "comment_created_at": "2025-05-27T06:40:48+00:00", + "comment_author": "Copilot", + "comment_body": "Consider adding a guard check to ensure the 'methods' collection has at least two elements before indexing; this will prevent potential runtime errors if the collection contains fewer items than expected.", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/azure-sdk-for-net-check-before-dereferencing.md b/_reviewers/azure-sdk-for-net-check-before-dereferencing.md index 07e6af0..0e8b31e 100644 --- a/_reviewers/azure-sdk-for-net-check-before-dereferencing.md +++ b/_reviewers/azure-sdk-for-net-check-before-dereferencing.md @@ -59,161 +59,3 @@ else { // Handle the case where no matching parameter exists } - - -[ - { - "discussion_id": "2174433301", - "pr_number": 50954, - "pr_file": "eng/packages/http-client-csharp-mgmt/generator/Azure.Generator.Management/src/Providers/ResourceClientProvider.cs", - "created_at": "2025-06-30T07:41:56+00:00", - "commented_code": "}\n // TODO: handle parents\n // Handle resource name - the last contextual parameter\n else if (parameter.Name.Equals(ContextualParameters.Last(), StringComparison.InvariantCultureIgnoreCase))\n else if (parameter.Name.Equals(ContextualParameters.LastOrDefault(), StringComparison.InvariantCultureIgnoreCase))", - "repo_full_name": "Azure/azure-sdk-for-net", - "discussion_comments": [ - { - "comment_id": "2174433301", - "repo_full_name": "Azure/azure-sdk-for-net", - "pr_number": 50954, - "pr_file": "eng/packages/http-client-csharp-mgmt/generator/Azure.Generator.Management/src/Providers/ResourceClientProvider.cs", - "discussion_id": "2174433301", - "commented_code": "@@ -357,7 +357,7 @@ public ValueExpression[] PopulateArguments(\n }\n // TODO: handle parents\n // Handle resource name - the last contextual parameter\n- else if (parameter.Name.Equals(ContextualParameters.Last(), StringComparison.InvariantCultureIgnoreCase))\n+ else if (parameter.Name.Equals(ContextualParameters.LastOrDefault(), StringComparison.InvariantCultureIgnoreCase))", - "comment_created_at": "2025-06-30T07:41:56+00:00", - "comment_author": "Copilot", - "comment_body": "[nitpick] Consider storing the result of `ContextualParameters.LastOrDefault()` in a local variable and checking for null before calling `Equals` to improve readability and make the null comparison explicit.\n```suggestion\n var lastContextualParameter = ContextualParameters.LastOrDefault();\n if (lastContextualParameter != null && parameter.Name.Equals(lastContextualParameter, StringComparison.InvariantCultureIgnoreCase))\n```", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2167764022", - "pr_number": 50890, - "pr_file": "sdk/tables/Azure.Data.Tables/src/TableErrorCode.cs", - "created_at": "2025-06-25T22:41:33+00:00", - "commented_code": "public static bool operator ==(TableErrorCode left, TableErrorCode right) => left.Equals(right);\n /// Determines if two values are not the same. \n public static bool operator !=(TableErrorCode left, TableErrorCode right) => !left.Equals(right);\n\n /// Determines if a and a string are equal. \n public static bool operator ==(TableErrorCode code, string value) => value != null && code.Equals(value);", - "repo_full_name": "Azure/azure-sdk-for-net", - "discussion_comments": [ - { - "comment_id": "2167764022", - "repo_full_name": "Azure/azure-sdk-for-net", - "pr_number": 50890, - "pr_file": "sdk/tables/Azure.Data.Tables/src/TableErrorCode.cs", - "discussion_id": "2167764022", - "commented_code": "@@ -215,6 +215,19 @@ public TableErrorCode(string value)\n public static bool operator ==(TableErrorCode left, TableErrorCode right) => left.Equals(right);\n /// Determines if two values are not the same. \n public static bool operator !=(TableErrorCode left, TableErrorCode right) => !left.Equals(right);\n+\n+ /// Determines if a and a string are equal. \n+ public static bool operator ==(TableErrorCode code, string value) => value != null && code.Equals(value);", - "comment_created_at": "2025-06-25T22:41:33+00:00", - "comment_author": "Copilot", - "comment_body": "If TableErrorCode can ever be null, consider adding a null check on the 'code' parameter before calling code.Equals(value) to avoid a potential NullReferenceException.\n```suggestion\n public static bool operator ==(TableErrorCode code, string value) => code != null && value != null && code.Equals(value);\n```", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2156832113", - "pr_number": 50705, - "pr_file": "sdk/resourcemanager/Azure.ResourceManager/tests/Unit/JsonAsserts.cs", - "created_at": "2025-06-19T11:54:22+00:00", - "commented_code": "Assert.AreEqual(expected, text);\n }\n\n public static void AssertConverterSerialization(string expected, object model, JsonSerializerOptions options = default)\n public static void AssertConverterSerialization(string expected, T model, ModelReaderWriterOptions? options = null)\n {\n using var memoryStream = new MemoryStream();\n\n using (var writer = new Utf8JsonWriter(memoryStream))\n {\n JsonSerializer.Serialize(writer, model, options);\n var jsonModel = model as IJsonModel;\n jsonModel?.Write(writer, options ?? new ModelReaderWriterOptions(\"W\"));", - "repo_full_name": "Azure/azure-sdk-for-net", - "discussion_comments": [ - { - "comment_id": "2156832113", - "repo_full_name": "Azure/azure-sdk-for-net", - "pr_number": 50705, - "pr_file": "sdk/resourcemanager/Azure.ResourceManager/tests/Unit/JsonAsserts.cs", - "discussion_id": "2156832113", - "commented_code": "@@ -22,13 +25,14 @@ public static void AssertSerialization(string expected, IUtf8JsonSerializable se\n Assert.AreEqual(expected, text);\n }\n \n- public static void AssertConverterSerialization(string expected, object model, JsonSerializerOptions options = default)\n+ public static void AssertConverterSerialization(string expected, T model, ModelReaderWriterOptions? options = null)\n {\n using var memoryStream = new MemoryStream();\n \n using (var writer = new Utf8JsonWriter(memoryStream))\n {\n- JsonSerializer.Serialize(writer, model, options);\n+ var jsonModel = model as IJsonModel;\n+ jsonModel?.Write(writer, options ?? new ModelReaderWriterOptions(\"W\"));", - "comment_created_at": "2025-06-19T11:54:22+00:00", - "comment_author": "Copilot", - "comment_body": "[nitpick] If the cast to IJsonModel fails (resulting in jsonModel being null), the subsequent call to Write may silently be skipped; consider adding an assertion or fallback to handle unexpected types.\n```suggestion\n if (jsonModel == null)\n {\n throw new InvalidOperationException($\"The provided model is not of type {typeof(IJsonModel)}.\");\n }\n jsonModel.Write(writer, options ?? new ModelReaderWriterOptions(\"W\"));\n```", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2141812407", - "pr_number": 50568, - "pr_file": "eng/packages/http-client-csharp-mgmt/generator/Azure.Generator.Management/src/Providers/ResourceClientProvider.cs", - "created_at": "2025-06-12T06:25:42+00:00", - "commented_code": "if (convenienceMethod != null)\n {\n var resource = convenienceMethod.Signature.Parameters\n .Single(p => p.Type.Equals(ResourceData.Type));\n .Single(p => p.Type.Equals(ResourceData.Type) || p.Type.Equals(typeof(RequestContent)));\n arguments.Add(resource);", - "repo_full_name": "Azure/azure-sdk-for-net", - "discussion_comments": [ - { - "comment_id": "2141812407", - "repo_full_name": "Azure/azure-sdk-for-net", - "pr_number": 50568, - "pr_file": "eng/packages/http-client-csharp-mgmt/generator/Azure.Generator.Management/src/Providers/ResourceClientProvider.cs", - "discussion_id": "2141812407", - "commented_code": "@@ -335,7 +330,7 @@ public ValueExpression[] PopulateArguments(\n if (convenienceMethod != null)\n {\n var resource = convenienceMethod.Signature.Parameters\n- .Single(p => p.Type.Equals(ResourceData.Type));\n+ .Single(p => p.Type.Equals(ResourceData.Type) || p.Type.Equals(typeof(RequestContent)));\n arguments.Add(resource);", - "comment_created_at": "2025-06-12T06:25:42+00:00", - "comment_author": "Copilot", - "comment_body": "Using `Single(...)` will throw if multiple parameters match or none match. Consider using `FirstOrDefault(...)` and handling a null result explicitly.\n```suggestion\n .FirstOrDefault(p => p.Type.Equals(ResourceData.Type) || p.Type.Equals(typeof(RequestContent)));\n if (resource != null)\n {\n arguments.Add(resource);\n }\n else\n {\n // Handle the case where no matching parameter is found\n throw new InvalidOperationException(\"No matching parameter found for ResourceData.Type or RequestContent.\");\n }\n```", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2141812419", - "pr_number": 50568, - "pr_file": "eng/packages/http-client-csharp-mgmt/generator/Azure.Generator.Management/src/Providers/OperationMethodProviders/ResourceOperationMethodProvider.cs", - "created_at": "2025-06-12T06:25:42+00:00", - "commented_code": "private TryExpression BuildTryExpression()\n {\n var cancellationTokenParameter = _convenienceMethod.Signature.Parameters.Single(p => p.Type.Equals(typeof(CancellationToken)));\n\n var cancellationTokenParameter = _convenienceMethod.Signature.Parameters.FirstOrDefault(p => p.Type.Equals(typeof(CancellationToken)));\n var requestMethod = _resourceClientProvider.GetClientProvider().GetRequestMethodByOperation(_serviceMethod.Operation);\n\n var tryStatements = new List\n var tryStatements = new List();\n VariableExpression contextVariable;\n\n // If the cancellation token parameter is not provided, we create a default one otherwise we use the provided one.\n // This is to ensure that the RequestContext is always created with a cancellation token.\n if (cancellationTokenParameter != null)\n {\n ResourceMethodSnippets.CreateRequestContext(cancellationTokenParameter, out var contextVariable)\n };\n tryStatements.Add(ResourceMethodSnippets.CreateRequestContext(cancellationTokenParameter, out contextVariable));\n }\n else\n {\n contextVariable = _convenienceMethod.Signature.Parameters.Single(p => p.Type.Equals(typeof(RequestContext))).AsExpression();", - "repo_full_name": "Azure/azure-sdk-for-net", - "discussion_comments": [ - { - "comment_id": "2141812419", - "repo_full_name": "Azure/azure-sdk-for-net", - "pr_number": 50568, - "pr_file": "eng/packages/http-client-csharp-mgmt/generator/Azure.Generator.Management/src/Providers/OperationMethodProviders/ResourceOperationMethodProvider.cs", - "discussion_id": "2141812419", - "commented_code": "@@ -87,14 +87,22 @@ protected virtual MethodSignature CreateSignature()\n \n private TryExpression BuildTryExpression()\n {\n- var cancellationTokenParameter = _convenienceMethod.Signature.Parameters.Single(p => p.Type.Equals(typeof(CancellationToken)));\n-\n+ var cancellationTokenParameter = _convenienceMethod.Signature.Parameters.FirstOrDefault(p => p.Type.Equals(typeof(CancellationToken)));\n var requestMethod = _resourceClientProvider.GetClientProvider().GetRequestMethodByOperation(_serviceMethod.Operation);\n \n- var tryStatements = new List\n+ var tryStatements = new List();\n+ VariableExpression contextVariable;\n+\n+ // If the cancellation token parameter is not provided, we create a default one otherwise we use the provided one.\n+ // This is to ensure that the RequestContext is always created with a cancellation token.\n+ if (cancellationTokenParameter != null)\n {\n- ResourceMethodSnippets.CreateRequestContext(cancellationTokenParameter, out var contextVariable)\n- };\n+ tryStatements.Add(ResourceMethodSnippets.CreateRequestContext(cancellationTokenParameter, out contextVariable));\n+ }\n+ else\n+ {\n+ contextVariable = _convenienceMethod.Signature.Parameters.Single(p => p.Type.Equals(typeof(RequestContext))).AsExpression();", - "comment_created_at": "2025-06-12T06:25:42+00:00", - "comment_author": "Copilot", - "comment_body": "This `Single(...)` will throw if the `RequestContext` parameter is missing. Consider using `FirstOrDefault(...)` with a fallback or validating existence before calling `Single`.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2124906481", - "pr_number": 50391, - "pr_file": "sdk/ai/Azure.AI.Agents.Persistent/src/Custom/UploadFileRequest.Serialization.cs", - "created_at": "2025-06-03T20:52:54+00:00", - "commented_code": "internal virtual MultipartFormDataRequestContent ToMultipartRequestContent()\n {\n MultipartFormDataRequestContent content = new();\n content.Add(Data, \"file\", Filename);\n ContentDispositionHeaderValue header = new(\"form-data\") { Name = \"file\"};\n var _dataStream = new StreamContent(Data);\n if (System.Linq.Enumerable.Any(Filename, c => c > 127))\n {\n header.FileNameStar = Filename;\n }\n else\n {\n header.FileName = Filename;\n }\n _dataStream.Headers.ContentDisposition = header;\n (content.HttpContent as System.Net.Http.MultipartFormDataContent).Add(_dataStream, \"file\");", - "repo_full_name": "Azure/azure-sdk-for-net", - "discussion_comments": [ - { - "comment_id": "2124906481", - "repo_full_name": "Azure/azure-sdk-for-net", - "pr_number": 50391, - "pr_file": "sdk/ai/Azure.AI.Agents.Persistent/src/Custom/UploadFileRequest.Serialization.cs", - "discussion_id": "2124906481", - "commented_code": "@@ -19,7 +21,18 @@ internal partial class UploadFileRequest : IUtf8JsonSerializable\n internal virtual MultipartFormDataRequestContent ToMultipartRequestContent()\n {\n MultipartFormDataRequestContent content = new();\n- content.Add(Data, \"file\", Filename);\n+ ContentDispositionHeaderValue header = new(\"form-data\") { Name = \"file\"};\n+ var _dataStream = new StreamContent(Data);\n+ if (System.Linq.Enumerable.Any(Filename, c => c > 127))\n+ {\n+ header.FileNameStar = Filename;\n+ }\n+ else\n+ {\n+ header.FileName = Filename;\n+ }\n+ _dataStream.Headers.ContentDisposition = header;\n+ (content.HttpContent as System.Net.Http.MultipartFormDataContent).Add(_dataStream, \"file\");", - "comment_created_at": "2025-06-03T20:52:54+00:00", - "comment_author": "Copilot", - "comment_body": "Using an 'as' cast here can yield null and lead to a NullReferenceException. Consider using a direct cast or checking for null before calling Add(), or refactor to expose the underlying MultipartFormDataContent safely.\n```suggestion\n ((System.Net.Http.MultipartFormDataContent)content.HttpContent).Add(_dataStream, \"file\");\n```", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2108313204", - "pr_number": 50274, - "pr_file": "eng/packages/http-client-csharp-mgmt/generator/Azure.Generator.Management/src/Extensions/ClientProviderExtensions.cs", - "created_at": "2025-05-27T06:40:48+00:00", - "commented_code": "\ufeff// Copyright (c) Microsoft Corporation. All rights reserved.\n// Licensed under the MIT License.\n\nusing Microsoft.TypeSpec.Generator.ClientModel.Providers;\nusing Microsoft.TypeSpec.Generator.Input;\nusing Microsoft.TypeSpec.Generator.Providers;\n\nnamespace Azure.Generator.Management.Extensions\n{\n internal static class ClientProviderExtensions\n {\n public static MethodProvider GetConvenienceMethodByOperation(this ClientProvider clientProvider, InputOperation operation, bool isAsync)\n {\n var methods = clientProvider.GetMethodCollectionByOperation(operation);\n return isAsync ? methods[^1] : methods[^2];", - "repo_full_name": "Azure/azure-sdk-for-net", - "discussion_comments": [ - { - "comment_id": "2108313204", - "repo_full_name": "Azure/azure-sdk-for-net", - "pr_number": 50274, - "pr_file": "eng/packages/http-client-csharp-mgmt/generator/Azure.Generator.Management/src/Extensions/ClientProviderExtensions.cs", - "discussion_id": "2108313204", - "commented_code": "@@ -0,0 +1,18 @@\n+\ufeff// Copyright (c) Microsoft Corporation. All rights reserved.\n+// Licensed under the MIT License.\n+\n+using Microsoft.TypeSpec.Generator.ClientModel.Providers;\n+using Microsoft.TypeSpec.Generator.Input;\n+using Microsoft.TypeSpec.Generator.Providers;\n+\n+namespace Azure.Generator.Management.Extensions\n+{\n+ internal static class ClientProviderExtensions\n+ {\n+ public static MethodProvider GetConvenienceMethodByOperation(this ClientProvider clientProvider, InputOperation operation, bool isAsync)\n+ {\n+ var methods = clientProvider.GetMethodCollectionByOperation(operation);\n+ return isAsync ? methods[^1] : methods[^2];", - "comment_created_at": "2025-05-27T06:40:48+00:00", - "comment_author": "Copilot", - "comment_body": "Consider adding a guard check to ensure the 'methods' collection has at least two elements before indexing; this will prevent potential runtime errors if the collection contains fewer items than expected.", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/azure-sdk-for-net-complete-authentication-testing.json b/_reviewers/azure-sdk-for-net-complete-authentication-testing.json new file mode 100644 index 0000000..908a715 --- /dev/null +++ b/_reviewers/azure-sdk-for-net-complete-authentication-testing.json @@ -0,0 +1,24 @@ +[ + { + "discussion_id": "2152576930", + "pr_number": 50644, + "pr_file": "sdk/core/System.ClientModel/tests/Auth/AuthenticationTokenProviderTests.cs", + "created_at": "2025-06-17T15:31:33+00:00", + "commented_code": "options.Transport = new MockPipelineTransport(\"foo\",\n m =>\n {\n // Assert that the request has no authentication headers\n Assert.IsFalse(m.Request.Headers.TryGetValue(\"Authorization\", out _), \"Request should not have an Authorization header.\");\n m.TryGetProperty(typeof(GetTokenOptions), out var flowsObj);\n if (\n flowsObj == null ||\n (flowsObj is Dictionary[] flowsArr && flowsArr.Length == 0)\n )\n {\n // Only assert no Authorization header if operation does not override the service level flows.\n Assert.IsFalse(m.Request.Headers.TryGetValue(\"Authorization\", out _), \"Request should not have an Authorization header.\");\n }", + "repo_full_name": "Azure/azure-sdk-for-net", + "discussion_comments": [ + { + "comment_id": "2152576930", + "repo_full_name": "Azure/azure-sdk-for-net", + "pr_number": 50644, + "pr_file": "sdk/core/System.ClientModel/tests/Auth/AuthenticationTokenProviderTests.cs", + "discussion_id": "2152576930", + "commented_code": "@@ -123,17 +194,27 @@ public NoAuthClient(Uri uri, AuthenticationTokenProvider credential)\n options.Transport = new MockPipelineTransport(\"foo\",\n m =>\n {\n- // Assert that the request has no authentication headers\n- Assert.IsFalse(m.Request.Headers.TryGetValue(\"Authorization\", out _), \"Request should not have an Authorization header.\");\n+ m.TryGetProperty(typeof(GetTokenOptions), out var flowsObj);\n+ if (\n+ flowsObj == null ||\n+ (flowsObj is Dictionary[] flowsArr && flowsArr.Length == 0)\n+ )\n+ {\n+ // Only assert no Authorization header if operation does not override the service level flows.\n+ Assert.IsFalse(m.Request.Headers.TryGetValue(\"Authorization\", out _), \"Request should not have an Authorization header.\");\n+ }", + "comment_created_at": "2025-06-17T15:31:33+00:00", + "comment_author": "christothes", + "comment_body": "we need an else block here to assert that the Authorization header is present and populated.", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/azure-sdk-for-net-complete-authentication-testing.md b/_reviewers/azure-sdk-for-net-complete-authentication-testing.md index b3b8abb..288c4f0 100644 --- a/_reviewers/azure-sdk-for-net-complete-authentication-testing.md +++ b/_reviewers/azure-sdk-for-net-complete-authentication-testing.md @@ -33,29 +33,3 @@ else ``` This pattern ensures your authentication mechanisms work correctly in all scenarios, which is critical for maintaining security boundaries. - - -[ - { - "discussion_id": "2152576930", - "pr_number": 50644, - "pr_file": "sdk/core/System.ClientModel/tests/Auth/AuthenticationTokenProviderTests.cs", - "created_at": "2025-06-17T15:31:33+00:00", - "commented_code": "options.Transport = new MockPipelineTransport(\"foo\",\n m =>\n {\n // Assert that the request has no authentication headers\n Assert.IsFalse(m.Request.Headers.TryGetValue(\"Authorization\", out _), \"Request should not have an Authorization header.\");\n m.TryGetProperty(typeof(GetTokenOptions), out var flowsObj);\n if (\n flowsObj == null ||\n (flowsObj is Dictionary[] flowsArr && flowsArr.Length == 0)\n )\n {\n // Only assert no Authorization header if operation does not override the service level flows.\n Assert.IsFalse(m.Request.Headers.TryGetValue(\"Authorization\", out _), \"Request should not have an Authorization header.\");\n }", - "repo_full_name": "Azure/azure-sdk-for-net", - "discussion_comments": [ - { - "comment_id": "2152576930", - "repo_full_name": "Azure/azure-sdk-for-net", - "pr_number": 50644, - "pr_file": "sdk/core/System.ClientModel/tests/Auth/AuthenticationTokenProviderTests.cs", - "discussion_id": "2152576930", - "commented_code": "@@ -123,17 +194,27 @@ public NoAuthClient(Uri uri, AuthenticationTokenProvider credential)\n options.Transport = new MockPipelineTransport(\"foo\",\n m =>\n {\n- // Assert that the request has no authentication headers\n- Assert.IsFalse(m.Request.Headers.TryGetValue(\"Authorization\", out _), \"Request should not have an Authorization header.\");\n+ m.TryGetProperty(typeof(GetTokenOptions), out var flowsObj);\n+ if (\n+ flowsObj == null ||\n+ (flowsObj is Dictionary[] flowsArr && flowsArr.Length == 0)\n+ )\n+ {\n+ // Only assert no Authorization header if operation does not override the service level flows.\n+ Assert.IsFalse(m.Request.Headers.TryGetValue(\"Authorization\", out _), \"Request should not have an Authorization header.\");\n+ }", - "comment_created_at": "2025-06-17T15:31:33+00:00", - "comment_author": "christothes", - "comment_body": "we need an else block here to assert that the Authorization header is present and populated.", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/azure-sdk-for-net-complete-pipeline-configurations.json b/_reviewers/azure-sdk-for-net-complete-pipeline-configurations.json new file mode 100644 index 0000000..856bafd --- /dev/null +++ b/_reviewers/azure-sdk-for-net-complete-pipeline-configurations.json @@ -0,0 +1,94 @@ +[ + { + "discussion_id": "2159314989", + "pr_number": 50751, + "pr_file": "sdk/resources/Azure.ResourceManager.Resources.Bicep/assets.json", + "created_at": "2025-06-20T16:08:34+00:00", + "commented_code": "{\n \"AssetsRepo\": \"Azure/azure-sdk-assets\",\n \"AssetsRepoPrefixPath\": \"net\",\n \"TagPrefix\": \"net/bicep/Azure.ResourceManager.Resources.Bicep\",\n \"Tag\": \"\"", + "repo_full_name": "Azure/azure-sdk-for-net", + "discussion_comments": [ + { + "comment_id": "2159314989", + "repo_full_name": "Azure/azure-sdk-for-net", + "pr_number": 50751, + "pr_file": "sdk/resources/Azure.ResourceManager.Resources.Bicep/assets.json", + "discussion_id": "2159314989", + "commented_code": "@@ -0,0 +1,6 @@\n+{\n+ \"AssetsRepo\": \"Azure/azure-sdk-assets\",\n+ \"AssetsRepoPrefixPath\": \"net\",\n+ \"TagPrefix\": \"net/bicep/Azure.ResourceManager.Resources.Bicep\",\n+ \"Tag\": \"\"", + "comment_created_at": "2025-06-20T16:08:34+00:00", + "comment_author": "Copilot", + "comment_body": "Populate the `Tag` field with the appropriate version or tag (e.g., `1.0.0-beta.1`) so that asset publishing pipelines can correctly identify and publish the Bicep package.\n```suggestion\n \"Tag\": \"1.0.0\"\n```", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2136717637", + "pr_number": 50505, + "pr_file": "sdk/identity/platform-matrix.json", + "created_at": "2025-06-10T00:43:19+00:00", + "commented_code": "{\n \"Agent\": {\n \"msi_image\": {\n \"OSVmImage\": \"env:LINUXNEXTVMIMAGE\",\n \"OSVmImage\": \"env:LINUXVMIMAGE\",\n \"Pool\": \"azsdk-pool-mms-ubuntu-2204-identitymsi\",", + "repo_full_name": "Azure/azure-sdk-for-net", + "discussion_comments": [ + { + "comment_id": "2136717637", + "repo_full_name": "Azure/azure-sdk-for-net", + "pr_number": 50505, + "pr_file": "sdk/identity/platform-matrix.json", + "discussion_id": "2136717637", + "commented_code": "@@ -6,7 +6,7 @@\n {\n \"Agent\": {\n \"msi_image\": {\n- \"OSVmImage\": \"env:LINUXNEXTVMIMAGE\",\n+ \"OSVmImage\": \"env:LINUXVMIMAGE\",\n \"Pool\": \"azsdk-pool-mms-ubuntu-2204-identitymsi\",", + "comment_created_at": "2025-06-10T00:43:19+00:00", + "comment_author": "weshaggard", + "comment_body": "@christothes do you know if this custom pool is still needed? What is special about it?", + "pr_file_module": null + }, + { + "comment_id": "2138048426", + "repo_full_name": "Azure/azure-sdk-for-net", + "pr_number": 50505, + "pr_file": "sdk/identity/platform-matrix.json", + "discussion_id": "2136717637", + "commented_code": "@@ -6,7 +6,7 @@\n {\n \"Agent\": {\n \"msi_image\": {\n- \"OSVmImage\": \"env:LINUXNEXTVMIMAGE\",\n+ \"OSVmImage\": \"env:LINUXVMIMAGE\",\n \"Pool\": \"azsdk-pool-mms-ubuntu-2204-identitymsi\",", + "comment_created_at": "2025-06-10T14:24:22+00:00", + "comment_author": "christothes", + "comment_body": "It's used for Managed Identity live tests. @benbp set it up for me a while back so that it had a user-assigned managed identity associated with it. ", + "pr_file_module": null + }, + { + "comment_id": "2138051652", + "repo_full_name": "Azure/azure-sdk-for-net", + "pr_number": 50505, + "pr_file": "sdk/identity/platform-matrix.json", + "discussion_id": "2136717637", + "commented_code": "@@ -6,7 +6,7 @@\n {\n \"Agent\": {\n \"msi_image\": {\n- \"OSVmImage\": \"env:LINUXNEXTVMIMAGE\",\n+ \"OSVmImage\": \"env:LINUXVMIMAGE\",\n \"Pool\": \"azsdk-pool-mms-ubuntu-2204-identitymsi\",", + "comment_created_at": "2025-06-10T14:25:48+00:00", + "comment_author": "weshaggard", + "comment_body": "@christothes do you know if it is needed any longer given we use the container image? Or is this a separate set of tests? We would like to eliminate one-off agent pools if we can.", + "pr_file_module": null + }, + { + "comment_id": "2139138199", + "repo_full_name": "Azure/azure-sdk-for-net", + "pr_number": 50505, + "pr_file": "sdk/identity/platform-matrix.json", + "discussion_id": "2136717637", + "commented_code": "@@ -6,7 +6,7 @@\n {\n \"Agent\": {\n \"msi_image\": {\n- \"OSVmImage\": \"env:LINUXNEXTVMIMAGE\",\n+ \"OSVmImage\": \"env:LINUXVMIMAGE\",\n \"Pool\": \"azsdk-pool-mms-ubuntu-2204-identitymsi\",", + "comment_created_at": "2025-06-11T03:51:54+00:00", + "comment_author": "weshaggard", + "comment_body": "Digging in this configuration hasn't even attempted to run for almost a year because of the [filtering](https://github.com/Azure/azure-sdk-for-net/blob/main/eng/common/pipelines/templates/jobs/generate-job-matrix.yml#L70) we did as part of the 1ES PT work. I'm not going to block my PR on the identity tests as they are also failing for other reasons currently. We should circle back and decide if there is enough value in these to maintain this extra pool or not. ", + "pr_file_module": null + }, + { + "comment_id": "2140307880", + "repo_full_name": "Azure/azure-sdk-for-net", + "pr_number": 50505, + "pr_file": "sdk/identity/platform-matrix.json", + "discussion_id": "2136717637", + "commented_code": "@@ -6,7 +6,7 @@\n {\n \"Agent\": {\n \"msi_image\": {\n- \"OSVmImage\": \"env:LINUXNEXTVMIMAGE\",\n+ \"OSVmImage\": \"env:LINUXVMIMAGE\",\n \"Pool\": \"azsdk-pool-mms-ubuntu-2204-identitymsi\",", + "comment_created_at": "2025-06-11T14:13:09+00:00", + "comment_author": "christothes", + "comment_body": "created https://github.com/Azure/azure-sdk-for-net/issues/50546 to track.", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/azure-sdk-for-net-complete-pipeline-configurations.md b/_reviewers/azure-sdk-for-net-complete-pipeline-configurations.md index eec11b5..707fd30 100644 --- a/_reviewers/azure-sdk-for-net-complete-pipeline-configurations.md +++ b/_reviewers/azure-sdk-for-net-complete-pipeline-configurations.md @@ -25,99 +25,3 @@ Ensure all CI/CD configuration files have their required fields properly populat ``` When specifying environment resources such as VM images or test pools, ensure they reference valid, current resources and periodically evaluate if custom resources are still needed to avoid maintaining unnecessary infrastructure. - - -[ - { - "discussion_id": "2159314989", - "pr_number": 50751, - "pr_file": "sdk/resources/Azure.ResourceManager.Resources.Bicep/assets.json", - "created_at": "2025-06-20T16:08:34+00:00", - "commented_code": "{\n \"AssetsRepo\": \"Azure/azure-sdk-assets\",\n \"AssetsRepoPrefixPath\": \"net\",\n \"TagPrefix\": \"net/bicep/Azure.ResourceManager.Resources.Bicep\",\n \"Tag\": \"\"", - "repo_full_name": "Azure/azure-sdk-for-net", - "discussion_comments": [ - { - "comment_id": "2159314989", - "repo_full_name": "Azure/azure-sdk-for-net", - "pr_number": 50751, - "pr_file": "sdk/resources/Azure.ResourceManager.Resources.Bicep/assets.json", - "discussion_id": "2159314989", - "commented_code": "@@ -0,0 +1,6 @@\n+{\n+ \"AssetsRepo\": \"Azure/azure-sdk-assets\",\n+ \"AssetsRepoPrefixPath\": \"net\",\n+ \"TagPrefix\": \"net/bicep/Azure.ResourceManager.Resources.Bicep\",\n+ \"Tag\": \"\"", - "comment_created_at": "2025-06-20T16:08:34+00:00", - "comment_author": "Copilot", - "comment_body": "Populate the `Tag` field with the appropriate version or tag (e.g., `1.0.0-beta.1`) so that asset publishing pipelines can correctly identify and publish the Bicep package.\n```suggestion\n \"Tag\": \"1.0.0\"\n```", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2136717637", - "pr_number": 50505, - "pr_file": "sdk/identity/platform-matrix.json", - "created_at": "2025-06-10T00:43:19+00:00", - "commented_code": "{\n \"Agent\": {\n \"msi_image\": {\n \"OSVmImage\": \"env:LINUXNEXTVMIMAGE\",\n \"OSVmImage\": \"env:LINUXVMIMAGE\",\n \"Pool\": \"azsdk-pool-mms-ubuntu-2204-identitymsi\",", - "repo_full_name": "Azure/azure-sdk-for-net", - "discussion_comments": [ - { - "comment_id": "2136717637", - "repo_full_name": "Azure/azure-sdk-for-net", - "pr_number": 50505, - "pr_file": "sdk/identity/platform-matrix.json", - "discussion_id": "2136717637", - "commented_code": "@@ -6,7 +6,7 @@\n {\n \"Agent\": {\n \"msi_image\": {\n- \"OSVmImage\": \"env:LINUXNEXTVMIMAGE\",\n+ \"OSVmImage\": \"env:LINUXVMIMAGE\",\n \"Pool\": \"azsdk-pool-mms-ubuntu-2204-identitymsi\",", - "comment_created_at": "2025-06-10T00:43:19+00:00", - "comment_author": "weshaggard", - "comment_body": "@christothes do you know if this custom pool is still needed? What is special about it?", - "pr_file_module": null - }, - { - "comment_id": "2138048426", - "repo_full_name": "Azure/azure-sdk-for-net", - "pr_number": 50505, - "pr_file": "sdk/identity/platform-matrix.json", - "discussion_id": "2136717637", - "commented_code": "@@ -6,7 +6,7 @@\n {\n \"Agent\": {\n \"msi_image\": {\n- \"OSVmImage\": \"env:LINUXNEXTVMIMAGE\",\n+ \"OSVmImage\": \"env:LINUXVMIMAGE\",\n \"Pool\": \"azsdk-pool-mms-ubuntu-2204-identitymsi\",", - "comment_created_at": "2025-06-10T14:24:22+00:00", - "comment_author": "christothes", - "comment_body": "It's used for Managed Identity live tests. @benbp set it up for me a while back so that it had a user-assigned managed identity associated with it. ", - "pr_file_module": null - }, - { - "comment_id": "2138051652", - "repo_full_name": "Azure/azure-sdk-for-net", - "pr_number": 50505, - "pr_file": "sdk/identity/platform-matrix.json", - "discussion_id": "2136717637", - "commented_code": "@@ -6,7 +6,7 @@\n {\n \"Agent\": {\n \"msi_image\": {\n- \"OSVmImage\": \"env:LINUXNEXTVMIMAGE\",\n+ \"OSVmImage\": \"env:LINUXVMIMAGE\",\n \"Pool\": \"azsdk-pool-mms-ubuntu-2204-identitymsi\",", - "comment_created_at": "2025-06-10T14:25:48+00:00", - "comment_author": "weshaggard", - "comment_body": "@christothes do you know if it is needed any longer given we use the container image? Or is this a separate set of tests? We would like to eliminate one-off agent pools if we can.", - "pr_file_module": null - }, - { - "comment_id": "2139138199", - "repo_full_name": "Azure/azure-sdk-for-net", - "pr_number": 50505, - "pr_file": "sdk/identity/platform-matrix.json", - "discussion_id": "2136717637", - "commented_code": "@@ -6,7 +6,7 @@\n {\n \"Agent\": {\n \"msi_image\": {\n- \"OSVmImage\": \"env:LINUXNEXTVMIMAGE\",\n+ \"OSVmImage\": \"env:LINUXVMIMAGE\",\n \"Pool\": \"azsdk-pool-mms-ubuntu-2204-identitymsi\",", - "comment_created_at": "2025-06-11T03:51:54+00:00", - "comment_author": "weshaggard", - "comment_body": "Digging in this configuration hasn't even attempted to run for almost a year because of the [filtering](https://github.com/Azure/azure-sdk-for-net/blob/main/eng/common/pipelines/templates/jobs/generate-job-matrix.yml#L70) we did as part of the 1ES PT work. I'm not going to block my PR on the identity tests as they are also failing for other reasons currently. We should circle back and decide if there is enough value in these to maintain this extra pool or not. ", - "pr_file_module": null - }, - { - "comment_id": "2140307880", - "repo_full_name": "Azure/azure-sdk-for-net", - "pr_number": 50505, - "pr_file": "sdk/identity/platform-matrix.json", - "discussion_id": "2136717637", - "commented_code": "@@ -6,7 +6,7 @@\n {\n \"Agent\": {\n \"msi_image\": {\n- \"OSVmImage\": \"env:LINUXNEXTVMIMAGE\",\n+ \"OSVmImage\": \"env:LINUXVMIMAGE\",\n \"Pool\": \"azsdk-pool-mms-ubuntu-2204-identitymsi\",", - "comment_created_at": "2025-06-11T14:13:09+00:00", - "comment_author": "christothes", - "comment_body": "created https://github.com/Azure/azure-sdk-for-net/issues/50546 to track.", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/azure-sdk-for-net-descriptive-consistent-identifiers.json b/_reviewers/azure-sdk-for-net-descriptive-consistent-identifiers.json new file mode 100644 index 0000000..3f0f229 --- /dev/null +++ b/_reviewers/azure-sdk-for-net-descriptive-consistent-identifiers.json @@ -0,0 +1,68 @@ +[ + { + "discussion_id": "2164984072", + "pr_number": 50844, + "pr_file": "eng/common/mcp/azure-sdk-mcp.ps1", + "created_at": "2025-06-24T21:52:22+00:00", + "commented_code": "}\n $vscodeConfig.servers = $orderedServers\n Write-Host \"Updating vscode mcp config at $vscodeConfigPath\"\n $vscodeConfig | ConvertTo-Json -Depth 10 | Set-Content -Path $vscodeConfig -Force\n $vscodeConfig | ConvertTo-Json -Depth 10 | Set-Content -Path $vscodeConfigPath -Force", + "repo_full_name": "Azure/azure-sdk-for-net", + "discussion_comments": [ + { + "comment_id": "2164984072", + "repo_full_name": "Azure/azure-sdk-for-net", + "pr_number": 50844, + "pr_file": "eng/common/mcp/azure-sdk-mcp.ps1", + "discussion_id": "2164984072", + "commented_code": "@@ -51,7 +51,7 @@ if ($UpdateVsCodeConfig) {\n }\n $vscodeConfig.servers = $orderedServers\n Write-Host \"Updating vscode mcp config at $vscodeConfigPath\"\n- $vscodeConfig | ConvertTo-Json -Depth 10 | Set-Content -Path $vscodeConfig -Force\n+ $vscodeConfig | ConvertTo-Json -Depth 10 | Set-Content -Path $vscodeConfigPath -Force", + "comment_created_at": "2025-06-24T21:52:22+00:00", + "comment_author": "Copilot", + "comment_body": "Fixed the variable used in the Set-Content command to correctly reference $vscodeConfigPath instead of $vscodeConfig, ensuring the updated VSCode configuration is written to the proper file path.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2143555839", + "pr_number": 50576, + "pr_file": "eng/Directory.Build.Common.targets", + "created_at": "2025-06-12T19:58:02+00:00", + "commented_code": "\n \n \n ", + "repo_full_name": "Azure/azure-sdk-for-net", + "discussion_comments": [ + { + "comment_id": "2143555839", + "repo_full_name": "Azure/azure-sdk-for-net", + "pr_number": 50576, + "pr_file": "eng/Directory.Build.Common.targets", + "discussion_id": "2143555839", + "commented_code": "@@ -111,6 +111,7 @@\n \n \n \n+ ", + "comment_created_at": "2025-06-12T19:58:02+00:00", + "comment_author": "Copilot", + "comment_body": "The added element uses 'PackageReferences' (plural) instead of the consistent 'PackageReference' (singular) used in the surrounding lines, which could prevent the package from being removed as intended. Please update the tag to ''.\n```suggestion\n \n```", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2141327303", + "pr_number": 50565, + "pr_file": "eng/common/scripts/Invoke-GitHubAPI.ps1", + "created_at": "2025-06-12T00:59:21+00:00", + "commented_code": "[ValidateNotNullOrEmpty()]\n [Parameter(Mandatory = $true)]\n $CommitHash,\n $State=\"open\"\n $State=\"open\",\n $AuthToken\n )\n $uri = \"https://api.github.com/search/issues?q=sha:$CommitHash+state:$State\"\n\n return Invoke-RestMethod `\n -Method GET `\n -Uri $uri `\n -MaximumRetryCount 3\n $params = @{\n Method = 'GET'\n Uri = $uri\n MaximumRetryCount = 3\n }\n if ($AuthToken) {\n $params.Headers = Get-GitHubApiHeaders -token $AuthToken\n }\n return Invoke-RestMethod @params", + "repo_full_name": "Azure/azure-sdk-for-net", + "discussion_comments": [ + { + "comment_id": "2141327303", + "repo_full_name": "Azure/azure-sdk-for-net", + "pr_number": 50565, + "pr_file": "eng/common/scripts/Invoke-GitHubAPI.ps1", + "discussion_id": "2141327303", + "commented_code": "@@ -560,12 +560,17 @@ function Search-GitHubIssues {\n [ValidateNotNullOrEmpty()]\n [Parameter(Mandatory = $true)]\n $CommitHash,\n- $State=\"open\"\n+ $State=\"open\",\n+ $AuthToken\n )\n $uri = \"https://api.github.com/search/issues?q=sha:$CommitHash+state:$State\"\n-\n- return Invoke-RestMethod `\n- -Method GET `\n- -Uri $uri `\n- -MaximumRetryCount 3\n+ $params = @{\n+ Method = 'GET'\n+ Uri = $uri\n+ MaximumRetryCount = 3\n+ }\n+ if ($AuthToken) {\n+ $params.Headers = Get-GitHubApiHeaders -token $AuthToken\n+ }\n+ return Invoke-RestMethod @params", + "comment_created_at": "2025-06-12T00:59:21+00:00", + "comment_author": "Copilot", + "comment_body": "[nitpick] Consider renaming the '$params' variable to something more descriptive (e.g. $invokeParams) to avoid potential confusion with any built-in parameter variables and to improve code clarity.\n```suggestion\n $invokeParams = @{\n Method = 'GET'\n Uri = $uri\n MaximumRetryCount = 3\n }\n if ($AuthToken) {\n $invokeParams.Headers = Get-GitHubApiHeaders -token $AuthToken\n }\n return Invoke-RestMethod @invokeParams\n```", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/azure-sdk-for-net-descriptive-consistent-identifiers.md b/_reviewers/azure-sdk-for-net-descriptive-consistent-identifiers.md index 363f594..b055a31 100644 --- a/_reviewers/azure-sdk-for-net-descriptive-consistent-identifiers.md +++ b/_reviewers/azure-sdk-for-net-descriptive-consistent-identifiers.md @@ -52,73 +52,3 @@ Example of consistent naming in configuration: ``` Descriptive and consistent naming reduces cognitive load for readers, prevents errors from misused variables, and makes the codebase more maintainable. - - -[ - { - "discussion_id": "2164984072", - "pr_number": 50844, - "pr_file": "eng/common/mcp/azure-sdk-mcp.ps1", - "created_at": "2025-06-24T21:52:22+00:00", - "commented_code": "}\n $vscodeConfig.servers = $orderedServers\n Write-Host \"Updating vscode mcp config at $vscodeConfigPath\"\n $vscodeConfig | ConvertTo-Json -Depth 10 | Set-Content -Path $vscodeConfig -Force\n $vscodeConfig | ConvertTo-Json -Depth 10 | Set-Content -Path $vscodeConfigPath -Force", - "repo_full_name": "Azure/azure-sdk-for-net", - "discussion_comments": [ - { - "comment_id": "2164984072", - "repo_full_name": "Azure/azure-sdk-for-net", - "pr_number": 50844, - "pr_file": "eng/common/mcp/azure-sdk-mcp.ps1", - "discussion_id": "2164984072", - "commented_code": "@@ -51,7 +51,7 @@ if ($UpdateVsCodeConfig) {\n }\n $vscodeConfig.servers = $orderedServers\n Write-Host \"Updating vscode mcp config at $vscodeConfigPath\"\n- $vscodeConfig | ConvertTo-Json -Depth 10 | Set-Content -Path $vscodeConfig -Force\n+ $vscodeConfig | ConvertTo-Json -Depth 10 | Set-Content -Path $vscodeConfigPath -Force", - "comment_created_at": "2025-06-24T21:52:22+00:00", - "comment_author": "Copilot", - "comment_body": "Fixed the variable used in the Set-Content command to correctly reference $vscodeConfigPath instead of $vscodeConfig, ensuring the updated VSCode configuration is written to the proper file path.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2143555839", - "pr_number": 50576, - "pr_file": "eng/Directory.Build.Common.targets", - "created_at": "2025-06-12T19:58:02+00:00", - "commented_code": "\n \n \n ", - "repo_full_name": "Azure/azure-sdk-for-net", - "discussion_comments": [ - { - "comment_id": "2143555839", - "repo_full_name": "Azure/azure-sdk-for-net", - "pr_number": 50576, - "pr_file": "eng/Directory.Build.Common.targets", - "discussion_id": "2143555839", - "commented_code": "@@ -111,6 +111,7 @@\n \n \n \n+ ", - "comment_created_at": "2025-06-12T19:58:02+00:00", - "comment_author": "Copilot", - "comment_body": "The added element uses 'PackageReferences' (plural) instead of the consistent 'PackageReference' (singular) used in the surrounding lines, which could prevent the package from being removed as intended. Please update the tag to ''.\n```suggestion\n \n```", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2141327303", - "pr_number": 50565, - "pr_file": "eng/common/scripts/Invoke-GitHubAPI.ps1", - "created_at": "2025-06-12T00:59:21+00:00", - "commented_code": "[ValidateNotNullOrEmpty()]\n [Parameter(Mandatory = $true)]\n $CommitHash,\n $State=\"open\"\n $State=\"open\",\n $AuthToken\n )\n $uri = \"https://api.github.com/search/issues?q=sha:$CommitHash+state:$State\"\n\n return Invoke-RestMethod `\n -Method GET `\n -Uri $uri `\n -MaximumRetryCount 3\n $params = @{\n Method = 'GET'\n Uri = $uri\n MaximumRetryCount = 3\n }\n if ($AuthToken) {\n $params.Headers = Get-GitHubApiHeaders -token $AuthToken\n }\n return Invoke-RestMethod @params", - "repo_full_name": "Azure/azure-sdk-for-net", - "discussion_comments": [ - { - "comment_id": "2141327303", - "repo_full_name": "Azure/azure-sdk-for-net", - "pr_number": 50565, - "pr_file": "eng/common/scripts/Invoke-GitHubAPI.ps1", - "discussion_id": "2141327303", - "commented_code": "@@ -560,12 +560,17 @@ function Search-GitHubIssues {\n [ValidateNotNullOrEmpty()]\n [Parameter(Mandatory = $true)]\n $CommitHash,\n- $State=\"open\"\n+ $State=\"open\",\n+ $AuthToken\n )\n $uri = \"https://api.github.com/search/issues?q=sha:$CommitHash+state:$State\"\n-\n- return Invoke-RestMethod `\n- -Method GET `\n- -Uri $uri `\n- -MaximumRetryCount 3\n+ $params = @{\n+ Method = 'GET'\n+ Uri = $uri\n+ MaximumRetryCount = 3\n+ }\n+ if ($AuthToken) {\n+ $params.Headers = Get-GitHubApiHeaders -token $AuthToken\n+ }\n+ return Invoke-RestMethod @params", - "comment_created_at": "2025-06-12T00:59:21+00:00", - "comment_author": "Copilot", - "comment_body": "[nitpick] Consider renaming the '$params' variable to something more descriptive (e.g. $invokeParams) to avoid potential confusion with any built-in parameter variables and to improve code clarity.\n```suggestion\n $invokeParams = @{\n Method = 'GET'\n Uri = $uri\n MaximumRetryCount = 3\n }\n if ($AuthToken) {\n $invokeParams.Headers = Get-GitHubApiHeaders -token $AuthToken\n }\n return Invoke-RestMethod @invokeParams\n```", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/azure-sdk-for-net-design-stable-apis.json b/_reviewers/azure-sdk-for-net-design-stable-apis.json new file mode 100644 index 0000000..c23947d --- /dev/null +++ b/_reviewers/azure-sdk-for-net-design-stable-apis.json @@ -0,0 +1,324 @@ +[ + { + "discussion_id": "2180635446", + "pr_number": 51023, + "pr_file": "sdk/ai/Azure.AI.Projects/AGENTS_MIGRATION_GUIDE.md", + "created_at": "2025-07-02T17:44:03+00:00", + "commented_code": "# Agents migration guide from Hub-based projects to Endpoint-based projects.\nThis guide describes migration from hub-based to Endpoint-based projects. To create a Endpoint-based project, please use one of the deployment scripts on [foundry samples repository](https://github.com/azure-ai-foundry/foundry-samples/tree/main/samples/microsoft/infrastructure-setup) appropriate for your scenario, also you can use Azure AI Foundry UI. The support of hub-based projects was dropped in `Azure.AI.Projects` version `1.0.0-beta.9`. In this document, we show the operation implementation of before `1.0.0-beta.9` in **Hub-based** secion, followed by code for `azure-ai-projects` version `1.0.0-beta.9` or later in **Endpoint-based**.\n\n## Installation\nStarting from version `1.0.0-beta.9`, the operations, related to agents were moved to a separate package `Azure.AI.Agents.Persistent`. To use agents please add both `Azure.AI.Projects` and `Azure.AI.Agents.Persistent` packages into the project.\n```\ndotnet add package Azure.AI.Projects --version 1.0.0-beta.9\ndotnet add package Azure.AI.Agents.Persistent --version 1.1.0-beta.3\n```\n\n## Namespace changes\nAgents were moved to a new package and namespace `Azure.AI.Agents.Persistent`.\n\n## Class renamings\n| Hub-based | Endpoint-based |\n|-|-|\n| New in `1.0.0-beta.9` | `PersistentAgentsClient` |\n| `AgentsClient` | `PersistentAgentsAdministrationClient` |\n| `AgentsClientOptions` | `PersistentAgentsAdministrationClientOptions` |\n| `Agent` | `PersistentAgent` |\n| `AgentThread` | `PersistentAgentThread`|\n| `ThreadMessage` | `PersistentThreadMessage` |\n| `ThreadRun` | `ThreadRun` |\n| `AgentFile` | `PersistentAgentFileInfo` |\n| `VectorStore` | `PersistentAgentsVectorStore` |\n\n## Class structure changes\nHere `projectClient` is `AIProjectClient` and `agentClient` is `PersistentAgentsClient` for version `1.0.0-beta.9` and `AgentsClient` for earlier versions.\n\nAgents Operations\n\n| Hub-based | Endpoint-based |\n|-|-|\n| `projectClient.GetAgentsClient` | `projectClient.GetPersistentAgentsClient` |\n| `agentClient.CreateAgent` | `agentClient.Administration.CreateAgent` |\n| `agentClient.GetAgents` | `agentClient.Administration.GetAgents` |\n| `agentClient.GetAgent` | `agentClient.Administration..GetAgent` |\n| `agentClient.UpdateAgent` | `agentClient.Administration.UpdateAgent` |\n| `agentClient.Delete_agent` | `agentClient.Administration.DeleteAgent` |\n| `agentClient.CreateThreadAndRun` | `agentClient.CreateThreadAndRun` |\n\nThreads Operations\n\n| Hub-based | Endpoint-based |\n|-|-|\n| `agentClient.CreateThread` | `agentClient.Threads.CreateThread` |\n| `agentClient.GetThread` | `agentClient.Threads.GetThread` |\n| `agentClient.UpdateThread` | `agentClient.Threads.UpdateThread` |\n| `agentClient.GetThreads` | `agentClient.Threads.GetThreads` |\n| `agentClient.DeleteThread` | `agentClient.Threads.DeleteThread` |\n\nMessages Operations\n\n| Hub-based | Endpoint-based |\n|-|-|\n| `agentClient.CreateMessage` | `agentClient.Messages.CreateMessage` |\n| `agentClient.GetMessages` | `agentClient.Messages.GetMessages` |\n| `agentClient.GetMessage` | `pagentClient.Messages.GetMessage` |\n| `agentClient.UpdateMessage` | `agentClient.Messages.UpdateMessage` |\n\nRuns Operations\n\n| Hub-based | Endpoint-based |\n|-|-|\n| `agentClient.CreateRun` | `agentClient.Runs.CreateRun` |\n| `agentClient.GetRun` | `agentClient.Runs.GetRun` |\n| `agentClient.GetRuns` | `agentClient.Runs.GetRuns` |\n| `agentClient.UpdateRun` | `agentClient.Runs.UpdateRun` |\n| `agentClient.CreateRunStreaming` | `agentClient.Runs.CreateRunStreaming` |\n| `agentClient.SubmitToolOutputsToRun` | `agentClient.Runs.SubmitToolOutputsToRun` |\n| `agentClient.SubmitToolOutputsToStream` | `agentClient.Runs.SubmitToolOutputsToStream` |\n| `agentClient.CancelRun` | `agentClient.Runs.CancelRun` |\n\nRun Steps Operations\n\n| Hub-based | Endpoint-based |\n|-|-|\n| `agentClient.GetRunStep` | `agentClient.Runs.GetRunStep` |\n| `agentClient.GetRunSteps` | `agentClient.Runs.GetRunSteps` |\n\nVector Stores Operations\n\n| Hub-based | Endpoint-based |\n|-|-|\n| `agentClient.CreateVectorStore` | `agentClient.VectorStores.CreateVectorStore |\n| `agentClient.GetVectorStores` | `agentClient.VectorStores.GetVectorStores |\n| `agentClient.GetVectorStore` | `agentClient.VectorStores.GetVectorStore |\n| `agentClient.ModifyVectorStore` | `agentClient.VectorStores.ModifyVectorStore |\n| `agentClient.DeleteVectorStore` | `agentClient.VectorStores.DeleteVectorStore |\n\nVector Store Files Operations\n\n| Hub-based | Endpoint-based |\n|-|-|\n| `agentClient.GetVectorStoreFiles` | `agentClient.VectorStores.GetVectorStoreFiles` |\n| `agentClient.CreateVectorStoreFile` | `agentClient.VectorStores.CreateVectorStoreFile` |\n| `agentClient.GetVectorStoreFile` | `agentClient.VectorStores.GetVectorStoreFile` |\n| `agentClient.DeleteVectorStoreFile` | `agentClient.VectorStores.DeleteVectorStoreFile` |\n\nVector Store File Batches Operations\n\n| Hub-based | Endpoint-based |\n|-|-|\n| `agentClient.CreateVectorStoreFileBatch` | `agentClient.VectorStores.CreateVectorStoreFileBatch`|\n| `agentClient.GetVectorStoreFileBatch` | `agentClient.VectorStores.GetVectorStoreFileBatch`|\n| `agentClient.GetVectorSroreFileBatchFiles` | `agentClient.VectorStores.GetVectorStoreFileBatchFiles`|\n| `agentClient.CancelVectorStoreFileBatch` | `agentClient.VectorStores.CancelVectorStoreFileBatch`|\n\nFiles Operations\n\n| Hub-based | Endpoint-based |\n|-|-|\n| `agentClient.UploadFile` | `agentClient.Files.UploadFile` |\n| `agentClient.GetFile` | `agentClient.Files.GetFile` |\n| `agentClient.GetFileContent` | `agentClient.Files.GetFileContent` |\n| `agentClient.GetFiles` | `agentClient.Files.GetFiles` |\n| `agentClient.DeleteFile` | `agentClient.Files.DeleteFile` |\n\n## API changes\n1. Create project. The connection string is replaced by the endpoint. The project endpoint URL has the form https://\\.services.ai.azure.com/api/projects/\\. It can be found in your Azure AI Foundry Project overview page.\n\n **Hub-based**\n ```C#\n AIProjectClient projectClient = new (\n connectionString: connectionString,\n credential: new DefaultAzureCredential());\n ```\n\n **Endpoint-based**\n ```C#\n AIProjectClient projectClient = new(endpoint: endpoint, credential: new DefaultAzureCredential())\n ```\n2. Agents client. In the new `Azure.AI.Projects` version `1.0.0-beta.9` agents are managed using `PersistentAgentsClient` class. It can be obtained from `AIProjectClient` instance or can be created using constructor. The letter allows to provide additional options for the `PersistentAgentsClient`.\n\n **Hub-based**\n ```C#\n AgentsClient agentClient = projectClient.GetAgentsClient();\n ```\n\n **Endpoint-based**\n Create agent using `AIProjectClient`.\n ```C#\n AgentsClient agentClient = projectClient.GetAgentsClient();", + "repo_full_name": "Azure/azure-sdk-for-net", + "discussion_comments": [ + { + "comment_id": "2180635446", + "repo_full_name": "Azure/azure-sdk-for-net", + "pr_number": 51023, + "pr_file": "sdk/ai/Azure.AI.Projects/AGENTS_MIGRATION_GUIDE.md", + "discussion_id": "2180635446", + "commented_code": "@@ -0,0 +1,615 @@\n+# Agents migration guide from Hub-based projects to Endpoint-based projects.\n+This guide describes migration from hub-based to Endpoint-based projects. To create a Endpoint-based project, please use one of the deployment scripts on [foundry samples repository](https://github.com/azure-ai-foundry/foundry-samples/tree/main/samples/microsoft/infrastructure-setup) appropriate for your scenario, also you can use Azure AI Foundry UI. The support of hub-based projects was dropped in `Azure.AI.Projects` version `1.0.0-beta.9`. In this document, we show the operation implementation of before `1.0.0-beta.9` in **Hub-based** secion, followed by code for `azure-ai-projects` version `1.0.0-beta.9` or later in **Endpoint-based**.\n+\n+## Installation\n+Starting from version `1.0.0-beta.9`, the operations, related to agents were moved to a separate package `Azure.AI.Agents.Persistent`. To use agents please add both `Azure.AI.Projects` and `Azure.AI.Agents.Persistent` packages into the project.\n+```\n+dotnet add package Azure.AI.Projects --version 1.0.0-beta.9\n+dotnet add package Azure.AI.Agents.Persistent --version 1.1.0-beta.3\n+```\n+\n+## Namespace changes\n+Agents were moved to a new package and namespace `Azure.AI.Agents.Persistent`.\n+\n+## Class renamings\n+| Hub-based | Endpoint-based |\n+|-|-|\n+| New in `1.0.0-beta.9` | `PersistentAgentsClient` |\n+| `AgentsClient` | `PersistentAgentsAdministrationClient` |\n+| `AgentsClientOptions` | `PersistentAgentsAdministrationClientOptions` |\n+| `Agent` | `PersistentAgent` |\n+| `AgentThread` | `PersistentAgentThread`|\n+| `ThreadMessage` | `PersistentThreadMessage` |\n+| `ThreadRun` | `ThreadRun` |\n+| `AgentFile` | `PersistentAgentFileInfo` |\n+| `VectorStore` | `PersistentAgentsVectorStore` |\n+\n+## Class structure changes\n+Here `projectClient` is `AIProjectClient` and `agentClient` is `PersistentAgentsClient` for version `1.0.0-beta.9` and `AgentsClient` for earlier versions.\n+\n+Agents Operations\n+\n+| Hub-based | Endpoint-based |\n+|-|-|\n+| `projectClient.GetAgentsClient` | `projectClient.GetPersistentAgentsClient` |\n+| `agentClient.CreateAgent` | `agentClient.Administration.CreateAgent` |\n+| `agentClient.GetAgents` | `agentClient.Administration.GetAgents` |\n+| `agentClient.GetAgent` | `agentClient.Administration..GetAgent` |\n+| `agentClient.UpdateAgent` | `agentClient.Administration.UpdateAgent` |\n+| `agentClient.Delete_agent` | `agentClient.Administration.DeleteAgent` |\n+| `agentClient.CreateThreadAndRun` | `agentClient.CreateThreadAndRun` |\n+\n+Threads Operations\n+\n+| Hub-based | Endpoint-based |\n+|-|-|\n+| `agentClient.CreateThread` | `agentClient.Threads.CreateThread` |\n+| `agentClient.GetThread` | `agentClient.Threads.GetThread` |\n+| `agentClient.UpdateThread` | `agentClient.Threads.UpdateThread` |\n+| `agentClient.GetThreads` | `agentClient.Threads.GetThreads` |\n+| `agentClient.DeleteThread` | `agentClient.Threads.DeleteThread` |\n+\n+Messages Operations\n+\n+| Hub-based | Endpoint-based |\n+|-|-|\n+| `agentClient.CreateMessage` | `agentClient.Messages.CreateMessage` |\n+| `agentClient.GetMessages` | `agentClient.Messages.GetMessages` |\n+| `agentClient.GetMessage` | `pagentClient.Messages.GetMessage` |\n+| `agentClient.UpdateMessage` | `agentClient.Messages.UpdateMessage` |\n+\n+Runs Operations\n+\n+| Hub-based | Endpoint-based |\n+|-|-|\n+| `agentClient.CreateRun` | `agentClient.Runs.CreateRun` |\n+| `agentClient.GetRun` | `agentClient.Runs.GetRun` |\n+| `agentClient.GetRuns` | `agentClient.Runs.GetRuns` |\n+| `agentClient.UpdateRun` | `agentClient.Runs.UpdateRun` |\n+| `agentClient.CreateRunStreaming` | `agentClient.Runs.CreateRunStreaming` |\n+| `agentClient.SubmitToolOutputsToRun` | `agentClient.Runs.SubmitToolOutputsToRun` |\n+| `agentClient.SubmitToolOutputsToStream` | `agentClient.Runs.SubmitToolOutputsToStream` |\n+| `agentClient.CancelRun` | `agentClient.Runs.CancelRun` |\n+\n+Run Steps Operations\n+\n+| Hub-based | Endpoint-based |\n+|-|-|\n+| `agentClient.GetRunStep` | `agentClient.Runs.GetRunStep` |\n+| `agentClient.GetRunSteps` | `agentClient.Runs.GetRunSteps` |\n+\n+Vector Stores Operations\n+\n+| Hub-based | Endpoint-based |\n+|-|-|\n+| `agentClient.CreateVectorStore` | `agentClient.VectorStores.CreateVectorStore |\n+| `agentClient.GetVectorStores` | `agentClient.VectorStores.GetVectorStores |\n+| `agentClient.GetVectorStore` | `agentClient.VectorStores.GetVectorStore |\n+| `agentClient.ModifyVectorStore` | `agentClient.VectorStores.ModifyVectorStore |\n+| `agentClient.DeleteVectorStore` | `agentClient.VectorStores.DeleteVectorStore |\n+\n+Vector Store Files Operations\n+\n+| Hub-based | Endpoint-based |\n+|-|-|\n+| `agentClient.GetVectorStoreFiles` | `agentClient.VectorStores.GetVectorStoreFiles` |\n+| `agentClient.CreateVectorStoreFile` | `agentClient.VectorStores.CreateVectorStoreFile` |\n+| `agentClient.GetVectorStoreFile` | `agentClient.VectorStores.GetVectorStoreFile` |\n+| `agentClient.DeleteVectorStoreFile` | `agentClient.VectorStores.DeleteVectorStoreFile` |\n+\n+Vector Store File Batches Operations\n+\n+| Hub-based | Endpoint-based |\n+|-|-|\n+| `agentClient.CreateVectorStoreFileBatch` | `agentClient.VectorStores.CreateVectorStoreFileBatch`|\n+| `agentClient.GetVectorStoreFileBatch` | `agentClient.VectorStores.GetVectorStoreFileBatch`|\n+| `agentClient.GetVectorSroreFileBatchFiles` | `agentClient.VectorStores.GetVectorStoreFileBatchFiles`|\n+| `agentClient.CancelVectorStoreFileBatch` | `agentClient.VectorStores.CancelVectorStoreFileBatch`|\n+\n+Files Operations\n+\n+| Hub-based | Endpoint-based |\n+|-|-|\n+| `agentClient.UploadFile` | `agentClient.Files.UploadFile` |\n+| `agentClient.GetFile` | `agentClient.Files.GetFile` |\n+| `agentClient.GetFileContent` | `agentClient.Files.GetFileContent` |\n+| `agentClient.GetFiles` | `agentClient.Files.GetFiles` |\n+| `agentClient.DeleteFile` | `agentClient.Files.DeleteFile` |\n+\n+## API changes\n+1. Create project. The connection string is replaced by the endpoint. The project endpoint URL has the form https://\\.services.ai.azure.com/api/projects/\\. It can be found in your Azure AI Foundry Project overview page.\n+\n+ **Hub-based**\n+ ```C#\n+ AIProjectClient projectClient = new (\n+ connectionString: connectionString,\n+ credential: new DefaultAzureCredential());\n+ ```\n+\n+ **Endpoint-based**\n+ ```C#\n+ AIProjectClient projectClient = new(endpoint: endpoint, credential: new DefaultAzureCredential())\n+ ```\n+2. Agents client. In the new `Azure.AI.Projects` version `1.0.0-beta.9` agents are managed using `PersistentAgentsClient` class. It can be obtained from `AIProjectClient` instance or can be created using constructor. The letter allows to provide additional options for the `PersistentAgentsClient`.\n+\n+ **Hub-based**\n+ ```C#\n+ AgentsClient agentClient = projectClient.GetAgentsClient();\n+ ```\n+\n+ **Endpoint-based**\n+ Create agent using `AIProjectClient`.\n+ ```C#\n+ AgentsClient agentClient = projectClient.GetAgentsClient();", + "comment_created_at": "2025-07-02T17:44:03+00:00", + "comment_author": "Copilot", + "comment_body": "Endpoint-based example should use `PersistentAgentsClient` and `GetPersistentAgentsClient()` rather than `AgentsClient`/`GetAgentsClient()`.\n```suggestion\n PersistentAgentsClient agentClient = projectClient.GetPersistentAgentsClient();\n```", + "pr_file_module": null + }, + { + "comment_id": "2180676387", + "repo_full_name": "Azure/azure-sdk-for-net", + "pr_number": 51023, + "pr_file": "sdk/ai/Azure.AI.Projects/AGENTS_MIGRATION_GUIDE.md", + "discussion_id": "2180635446", + "commented_code": "@@ -0,0 +1,615 @@\n+# Agents migration guide from Hub-based projects to Endpoint-based projects.\n+This guide describes migration from hub-based to Endpoint-based projects. To create a Endpoint-based project, please use one of the deployment scripts on [foundry samples repository](https://github.com/azure-ai-foundry/foundry-samples/tree/main/samples/microsoft/infrastructure-setup) appropriate for your scenario, also you can use Azure AI Foundry UI. The support of hub-based projects was dropped in `Azure.AI.Projects` version `1.0.0-beta.9`. In this document, we show the operation implementation of before `1.0.0-beta.9` in **Hub-based** secion, followed by code for `azure-ai-projects` version `1.0.0-beta.9` or later in **Endpoint-based**.\n+\n+## Installation\n+Starting from version `1.0.0-beta.9`, the operations, related to agents were moved to a separate package `Azure.AI.Agents.Persistent`. To use agents please add both `Azure.AI.Projects` and `Azure.AI.Agents.Persistent` packages into the project.\n+```\n+dotnet add package Azure.AI.Projects --version 1.0.0-beta.9\n+dotnet add package Azure.AI.Agents.Persistent --version 1.1.0-beta.3\n+```\n+\n+## Namespace changes\n+Agents were moved to a new package and namespace `Azure.AI.Agents.Persistent`.\n+\n+## Class renamings\n+| Hub-based | Endpoint-based |\n+|-|-|\n+| New in `1.0.0-beta.9` | `PersistentAgentsClient` |\n+| `AgentsClient` | `PersistentAgentsAdministrationClient` |\n+| `AgentsClientOptions` | `PersistentAgentsAdministrationClientOptions` |\n+| `Agent` | `PersistentAgent` |\n+| `AgentThread` | `PersistentAgentThread`|\n+| `ThreadMessage` | `PersistentThreadMessage` |\n+| `ThreadRun` | `ThreadRun` |\n+| `AgentFile` | `PersistentAgentFileInfo` |\n+| `VectorStore` | `PersistentAgentsVectorStore` |\n+\n+## Class structure changes\n+Here `projectClient` is `AIProjectClient` and `agentClient` is `PersistentAgentsClient` for version `1.0.0-beta.9` and `AgentsClient` for earlier versions.\n+\n+Agents Operations\n+\n+| Hub-based | Endpoint-based |\n+|-|-|\n+| `projectClient.GetAgentsClient` | `projectClient.GetPersistentAgentsClient` |\n+| `agentClient.CreateAgent` | `agentClient.Administration.CreateAgent` |\n+| `agentClient.GetAgents` | `agentClient.Administration.GetAgents` |\n+| `agentClient.GetAgent` | `agentClient.Administration..GetAgent` |\n+| `agentClient.UpdateAgent` | `agentClient.Administration.UpdateAgent` |\n+| `agentClient.Delete_agent` | `agentClient.Administration.DeleteAgent` |\n+| `agentClient.CreateThreadAndRun` | `agentClient.CreateThreadAndRun` |\n+\n+Threads Operations\n+\n+| Hub-based | Endpoint-based |\n+|-|-|\n+| `agentClient.CreateThread` | `agentClient.Threads.CreateThread` |\n+| `agentClient.GetThread` | `agentClient.Threads.GetThread` |\n+| `agentClient.UpdateThread` | `agentClient.Threads.UpdateThread` |\n+| `agentClient.GetThreads` | `agentClient.Threads.GetThreads` |\n+| `agentClient.DeleteThread` | `agentClient.Threads.DeleteThread` |\n+\n+Messages Operations\n+\n+| Hub-based | Endpoint-based |\n+|-|-|\n+| `agentClient.CreateMessage` | `agentClient.Messages.CreateMessage` |\n+| `agentClient.GetMessages` | `agentClient.Messages.GetMessages` |\n+| `agentClient.GetMessage` | `pagentClient.Messages.GetMessage` |\n+| `agentClient.UpdateMessage` | `agentClient.Messages.UpdateMessage` |\n+\n+Runs Operations\n+\n+| Hub-based | Endpoint-based |\n+|-|-|\n+| `agentClient.CreateRun` | `agentClient.Runs.CreateRun` |\n+| `agentClient.GetRun` | `agentClient.Runs.GetRun` |\n+| `agentClient.GetRuns` | `agentClient.Runs.GetRuns` |\n+| `agentClient.UpdateRun` | `agentClient.Runs.UpdateRun` |\n+| `agentClient.CreateRunStreaming` | `agentClient.Runs.CreateRunStreaming` |\n+| `agentClient.SubmitToolOutputsToRun` | `agentClient.Runs.SubmitToolOutputsToRun` |\n+| `agentClient.SubmitToolOutputsToStream` | `agentClient.Runs.SubmitToolOutputsToStream` |\n+| `agentClient.CancelRun` | `agentClient.Runs.CancelRun` |\n+\n+Run Steps Operations\n+\n+| Hub-based | Endpoint-based |\n+|-|-|\n+| `agentClient.GetRunStep` | `agentClient.Runs.GetRunStep` |\n+| `agentClient.GetRunSteps` | `agentClient.Runs.GetRunSteps` |\n+\n+Vector Stores Operations\n+\n+| Hub-based | Endpoint-based |\n+|-|-|\n+| `agentClient.CreateVectorStore` | `agentClient.VectorStores.CreateVectorStore |\n+| `agentClient.GetVectorStores` | `agentClient.VectorStores.GetVectorStores |\n+| `agentClient.GetVectorStore` | `agentClient.VectorStores.GetVectorStore |\n+| `agentClient.ModifyVectorStore` | `agentClient.VectorStores.ModifyVectorStore |\n+| `agentClient.DeleteVectorStore` | `agentClient.VectorStores.DeleteVectorStore |\n+\n+Vector Store Files Operations\n+\n+| Hub-based | Endpoint-based |\n+|-|-|\n+| `agentClient.GetVectorStoreFiles` | `agentClient.VectorStores.GetVectorStoreFiles` |\n+| `agentClient.CreateVectorStoreFile` | `agentClient.VectorStores.CreateVectorStoreFile` |\n+| `agentClient.GetVectorStoreFile` | `agentClient.VectorStores.GetVectorStoreFile` |\n+| `agentClient.DeleteVectorStoreFile` | `agentClient.VectorStores.DeleteVectorStoreFile` |\n+\n+Vector Store File Batches Operations\n+\n+| Hub-based | Endpoint-based |\n+|-|-|\n+| `agentClient.CreateVectorStoreFileBatch` | `agentClient.VectorStores.CreateVectorStoreFileBatch`|\n+| `agentClient.GetVectorStoreFileBatch` | `agentClient.VectorStores.GetVectorStoreFileBatch`|\n+| `agentClient.GetVectorSroreFileBatchFiles` | `agentClient.VectorStores.GetVectorStoreFileBatchFiles`|\n+| `agentClient.CancelVectorStoreFileBatch` | `agentClient.VectorStores.CancelVectorStoreFileBatch`|\n+\n+Files Operations\n+\n+| Hub-based | Endpoint-based |\n+|-|-|\n+| `agentClient.UploadFile` | `agentClient.Files.UploadFile` |\n+| `agentClient.GetFile` | `agentClient.Files.GetFile` |\n+| `agentClient.GetFileContent` | `agentClient.Files.GetFileContent` |\n+| `agentClient.GetFiles` | `agentClient.Files.GetFiles` |\n+| `agentClient.DeleteFile` | `agentClient.Files.DeleteFile` |\n+\n+## API changes\n+1. Create project. The connection string is replaced by the endpoint. The project endpoint URL has the form https://\\.services.ai.azure.com/api/projects/\\. It can be found in your Azure AI Foundry Project overview page.\n+\n+ **Hub-based**\n+ ```C#\n+ AIProjectClient projectClient = new (\n+ connectionString: connectionString,\n+ credential: new DefaultAzureCredential());\n+ ```\n+\n+ **Endpoint-based**\n+ ```C#\n+ AIProjectClient projectClient = new(endpoint: endpoint, credential: new DefaultAzureCredential())\n+ ```\n+2. Agents client. In the new `Azure.AI.Projects` version `1.0.0-beta.9` agents are managed using `PersistentAgentsClient` class. It can be obtained from `AIProjectClient` instance or can be created using constructor. The letter allows to provide additional options for the `PersistentAgentsClient`.\n+\n+ **Hub-based**\n+ ```C#\n+ AgentsClient agentClient = projectClient.GetAgentsClient();\n+ ```\n+\n+ **Endpoint-based**\n+ Create agent using `AIProjectClient`.\n+ ```C#\n+ AgentsClient agentClient = projectClient.GetAgentsClient();", + "comment_created_at": "2025-07-02T18:08:51+00:00", + "comment_author": "nick863", + "comment_body": "Fixed.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2170502909", + "pr_number": 50575, + "pr_file": ".github/copilot-instructions.md", + "created_at": "2025-06-27T01:47:40+00:00", + "commented_code": "# Repository information\nNote that files in this repository are generally organized in the following way:\n- `azure-sdk-for-net/sdk/{service-directory}/{package-name}` holds everything for a specific Azure SDK package.\n- `azure-sdk-for-net/sdk/{service-directory}/{package-name}/src` holds the source code for the package.\n- `azure-sdk-for-net/sdk/{service-directory}/{package-name}/tests` holds the tests for the package.\n- `azure-sdk-for-net/sdk/{service-directory}/{package-name}/samples` holds the samples for the package.\n\nThere are a few exceptions where package-name is replaced with a shorter directory name. For example in the cognitiveservices directory. The package `Microsoft.Azure.CognitiveServices.Language.SpellCheck` can be found in `azure-sdk-for-net/sdk/cognitiveservices/Language.SpellCheck`. When in doubt, you can look at the name of the .csproj file within the src folder to determine the package name.\n\n## Definitions:\n- \"service directory\" refers to the folder under `sdk`. For example, `azure-sdk-for-net/sdk/eventhub`, `eventhub` is the service directory\n- \"data plane\" refers to packages that don't include `ResourceManager` in the package name. They are used to interact with azure resources at application run time.\n- \"management plane\" refers to packages that include `ResourceManager` in the package name. They are used to manage (create/modify/delete) azure resources.\n- \"track 2\" refers to packages that start with `Azure`. Unless otherwise specified, assume that references to \"data plane\" mean \"track 2 data plane\", i.e. packages that start with `Azure` and don't include `ResourceManager` in the package name. Unless otherwise specified, assume that references to \"management plane\" mean \"track 2 management plane\", i.e. packages that start with `Azure.ResourceManager`.\n- \"functions extensions packages\" or sometimes just \"extensions packages\" refers to packages that start with `Microsoft.Azure.WebJobs.Extensions`. They are built on data plane packages and are used with Azure Functions.\n\n# Requirements\n- If you are writing C# code within the `azure-sdk-for-net/sdk` directory:\n 1. Follow the coding guidelines in the \"Coding guidelines\" section below.\n 2. You should never manually make changes to `*/Generated/*` files, e.g. `azure-sdk-for-net/sdk/containerregistry/Azure.Containers.ContainerRegistry/src/Generated/`\n - Only re-generate these files if instructed to do so. If you are instructed to regenerate an SDK, use `dotnet build /t:GenerateCode`\n - If you feel like you need to make changes to these files beyond re-generating them in order to complete your task, do not do this, instead see if you can work around the problem in the code that is not in the `Generated` folder. If you can't, report this to the user.\n 3. Code should build successfully using the following steps:\n - navigate to the root of the repository and run `dotnet build eng\\service.proj /p:ServiceDirectory={service-directory}` for each service directory you modified. For example, if you modified code in `sdk/eventhub` and `sdk/keyvault`, you would run:\n `dotnet build eng\\service.proj /p:ServiceDirectory=eventhub` and `dotnet build eng\\service.proj /p:ServiceDirectory=keyvault`\n - If you see build errors, try to fix them, if you can't fix them within 5 iterations, give up, do not do steps 4 or 5, and report this to the user. Do not report success if the build fails!\n 4. Once the code builds, run the unit tests using `dotnet test eng/service.proj /p:ServiceDirectory={service-directory} --filter TestCategory!=Live` for each service directory you modified. Try to fix failures if you can within 5 iterations. If you can't, give up and report this to the user. Do not report success if the tests fail!", + "repo_full_name": "Azure/azure-sdk-for-net", + "discussion_comments": [ + { + "comment_id": "2170502909", + "repo_full_name": "Azure/azure-sdk-for-net", + "pr_number": 50575, + "pr_file": ".github/copilot-instructions.md", + "discussion_id": "2170502909", + "commented_code": "@@ -0,0 +1,34 @@\n+# Repository information\n+Note that files in this repository are generally organized in the following way:\n+- `azure-sdk-for-net/sdk/{service-directory}/{package-name}` holds everything for a specific Azure SDK package.\n+- `azure-sdk-for-net/sdk/{service-directory}/{package-name}/src` holds the source code for the package.\n+- `azure-sdk-for-net/sdk/{service-directory}/{package-name}/tests` holds the tests for the package.\n+- `azure-sdk-for-net/sdk/{service-directory}/{package-name}/samples` holds the samples for the package.\n+\n+There are a few exceptions where package-name is replaced with a shorter directory name. For example in the cognitiveservices directory. The package `Microsoft.Azure.CognitiveServices.Language.SpellCheck` can be found in `azure-sdk-for-net/sdk/cognitiveservices/Language.SpellCheck`. When in doubt, you can look at the name of the .csproj file within the src folder to determine the package name.\n+\n+## Definitions:\n+- \"service directory\" refers to the folder under `sdk`. For example, `azure-sdk-for-net/sdk/eventhub`, `eventhub` is the service directory\n+- \"data plane\" refers to packages that don't include `ResourceManager` in the package name. They are used to interact with azure resources at application run time.\n+- \"management plane\" refers to packages that include `ResourceManager` in the package name. They are used to manage (create/modify/delete) azure resources.\n+- \"track 2\" refers to packages that start with `Azure`. Unless otherwise specified, assume that references to \"data plane\" mean \"track 2 data plane\", i.e. packages that start with `Azure` and don't include `ResourceManager` in the package name. Unless otherwise specified, assume that references to \"management plane\" mean \"track 2 management plane\", i.e. packages that start with `Azure.ResourceManager`.\n+- \"functions extensions packages\" or sometimes just \"extensions packages\" refers to packages that start with `Microsoft.Azure.WebJobs.Extensions`. They are built on data plane packages and are used with Azure Functions.\n+\n+# Requirements\n+- If you are writing C# code within the `azure-sdk-for-net/sdk` directory:\n+ 1. Follow the coding guidelines in the \"Coding guidelines\" section below.\n+ 2. You should never manually make changes to `*/Generated/*` files, e.g. `azure-sdk-for-net/sdk/containerregistry/Azure.Containers.ContainerRegistry/src/Generated/`\n+ - Only re-generate these files if instructed to do so. If you are instructed to regenerate an SDK, use `dotnet build /t:GenerateCode`\n+ - If you feel like you need to make changes to these files beyond re-generating them in order to complete your task, do not do this, instead see if you can work around the problem in the code that is not in the `Generated` folder. If you can't, report this to the user.\n+ 3. Code should build successfully using the following steps:\n+ - navigate to the root of the repository and run `dotnet build eng\\service.proj /p:ServiceDirectory={service-directory}` for each service directory you modified. For example, if you modified code in `sdk/eventhub` and `sdk/keyvault`, you would run:\n+ `dotnet build eng\\service.proj /p:ServiceDirectory=eventhub` and `dotnet build eng\\service.proj /p:ServiceDirectory=keyvault`\n+ - If you see build errors, try to fix them, if you can't fix them within 5 iterations, give up, do not do steps 4 or 5, and report this to the user. Do not report success if the build fails!\n+ 4. Once the code builds, run the unit tests using `dotnet test eng/service.proj /p:ServiceDirectory={service-directory} --filter TestCategory!=Live` for each service directory you modified. Try to fix failures if you can within 5 iterations. If you can't, give up and report this to the user. Do not report success if the tests fail!", + "comment_created_at": "2025-06-27T01:47:40+00:00", + "comment_author": "jsquire", + "comment_body": "We should consider adding one that instructs it to avoid making any breaking change to a public API and avoid adding public API members unless explicitly instructed to do so.", + "pr_file_module": null + }, + { + "comment_id": "2172760547", + "repo_full_name": "Azure/azure-sdk-for-net", + "pr_number": 50575, + "pr_file": ".github/copilot-instructions.md", + "discussion_id": "2170502909", + "commented_code": "@@ -0,0 +1,34 @@\n+# Repository information\n+Note that files in this repository are generally organized in the following way:\n+- `azure-sdk-for-net/sdk/{service-directory}/{package-name}` holds everything for a specific Azure SDK package.\n+- `azure-sdk-for-net/sdk/{service-directory}/{package-name}/src` holds the source code for the package.\n+- `azure-sdk-for-net/sdk/{service-directory}/{package-name}/tests` holds the tests for the package.\n+- `azure-sdk-for-net/sdk/{service-directory}/{package-name}/samples` holds the samples for the package.\n+\n+There are a few exceptions where package-name is replaced with a shorter directory name. For example in the cognitiveservices directory. The package `Microsoft.Azure.CognitiveServices.Language.SpellCheck` can be found in `azure-sdk-for-net/sdk/cognitiveservices/Language.SpellCheck`. When in doubt, you can look at the name of the .csproj file within the src folder to determine the package name.\n+\n+## Definitions:\n+- \"service directory\" refers to the folder under `sdk`. For example, `azure-sdk-for-net/sdk/eventhub`, `eventhub` is the service directory\n+- \"data plane\" refers to packages that don't include `ResourceManager` in the package name. They are used to interact with azure resources at application run time.\n+- \"management plane\" refers to packages that include `ResourceManager` in the package name. They are used to manage (create/modify/delete) azure resources.\n+- \"track 2\" refers to packages that start with `Azure`. Unless otherwise specified, assume that references to \"data plane\" mean \"track 2 data plane\", i.e. packages that start with `Azure` and don't include `ResourceManager` in the package name. Unless otherwise specified, assume that references to \"management plane\" mean \"track 2 management plane\", i.e. packages that start with `Azure.ResourceManager`.\n+- \"functions extensions packages\" or sometimes just \"extensions packages\" refers to packages that start with `Microsoft.Azure.WebJobs.Extensions`. They are built on data plane packages and are used with Azure Functions.\n+\n+# Requirements\n+- If you are writing C# code within the `azure-sdk-for-net/sdk` directory:\n+ 1. Follow the coding guidelines in the \"Coding guidelines\" section below.\n+ 2. You should never manually make changes to `*/Generated/*` files, e.g. `azure-sdk-for-net/sdk/containerregistry/Azure.Containers.ContainerRegistry/src/Generated/`\n+ - Only re-generate these files if instructed to do so. If you are instructed to regenerate an SDK, use `dotnet build /t:GenerateCode`\n+ - If you feel like you need to make changes to these files beyond re-generating them in order to complete your task, do not do this, instead see if you can work around the problem in the code that is not in the `Generated` folder. If you can't, report this to the user.\n+ 3. Code should build successfully using the following steps:\n+ - navigate to the root of the repository and run `dotnet build eng\\service.proj /p:ServiceDirectory={service-directory}` for each service directory you modified. For example, if you modified code in `sdk/eventhub` and `sdk/keyvault`, you would run:\n+ `dotnet build eng\\service.proj /p:ServiceDirectory=eventhub` and `dotnet build eng\\service.proj /p:ServiceDirectory=keyvault`\n+ - If you see build errors, try to fix them, if you can't fix them within 5 iterations, give up, do not do steps 4 or 5, and report this to the user. Do not report success if the build fails!\n+ 4. Once the code builds, run the unit tests using `dotnet test eng/service.proj /p:ServiceDirectory={service-directory} --filter TestCategory!=Live` for each service directory you modified. Try to fix failures if you can within 5 iterations. If you can't, give up and report this to the user. Do not report success if the tests fail!", + "comment_created_at": "2025-06-27T19:47:02+00:00", + "comment_author": "m-redding", + "comment_body": "I added one below, I'll test it over the next couple days and see if the copilot follows it", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2170843437", + "pr_number": 50712, + "pr_file": "sdk/terraform/Azure.ResourceManager.Terraform/CHANGELOG.md", + "created_at": "2025-06-27T05:47:43+00:00", + "commented_code": "# Release History\n\n## 1.0.0-beta.2 (Unreleased)\n## 1.0.0-beta.2 (2025-06-19)\n\n### Features Added\n\n### Breaking Changes\n- **Support for Exclusion Filters in Export Operations**:\n - Added `ExcludeAzureResource` and `ExcludeTerraformResource` properties to export parameter models,\n these allow users to exclude resources from being exported based on Azure resource ID patterns or Terraform resource types.\n\n### Bugs Fixed\n- **Authorization Scope Filter Support**:\n - Introduced the `AuthorizationScopeFilter` struct, enabling fine-grained control over the scope of Azure Resource Graph queries during export.", + "repo_full_name": "Azure/azure-sdk-for-net", + "discussion_comments": [ + { + "comment_id": "2170843437", + "repo_full_name": "Azure/azure-sdk-for-net", + "pr_number": 50712, + "pr_file": "sdk/terraform/Azure.ResourceManager.Terraform/CHANGELOG.md", + "discussion_id": "2170843437", + "commented_code": "@@ -1,14 +1,21 @@\n # Release History\n \n-## 1.0.0-beta.2 (Unreleased)\n+## 1.0.0-beta.2 (2025-06-19)\n \n ### Features Added\n \n-### Breaking Changes\n+- **Support for Exclusion Filters in Export Operations**:\n+ - Added `ExcludeAzureResource` and `ExcludeTerraformResource` properties to export parameter models,\n+ these allow users to exclude resources from being exported based on Azure resource ID patterns or Terraform resource types.\n \n-### Bugs Fixed\n+- **Authorization Scope Filter Support**:\n+ - Introduced the `AuthorizationScopeFilter` struct, enabling fine-grained control over the scope of Azure Resource Graph queries during export.", + "comment_created_at": "2025-06-27T05:47:43+00:00", + "comment_author": "ArcturusZhang", + "comment_body": "```suggestion\r\n - Introduced the `TerraformAuthorizationScopeFilter` struct, enabling fine-grained control over the scope of Azure Resource Graph queries during export.\r\n```", + "pr_file_module": null + }, + { + "comment_id": "2170860019", + "repo_full_name": "Azure/azure-sdk-for-net", + "pr_number": 50712, + "pr_file": "sdk/terraform/Azure.ResourceManager.Terraform/CHANGELOG.md", + "discussion_id": "2170843437", + "commented_code": "@@ -1,14 +1,21 @@\n # Release History\n \n-## 1.0.0-beta.2 (Unreleased)\n+## 1.0.0-beta.2 (2025-06-19)\n \n ### Features Added\n \n-### Breaking Changes\n+- **Support for Exclusion Filters in Export Operations**:\n+ - Added `ExcludeAzureResource` and `ExcludeTerraformResource` properties to export parameter models,\n+ these allow users to exclude resources from being exported based on Azure resource ID patterns or Terraform resource types.\n \n-### Bugs Fixed\n+- **Authorization Scope Filter Support**:\n+ - Introduced the `AuthorizationScopeFilter` struct, enabling fine-grained control over the scope of Azure Resource Graph queries during export.", + "comment_created_at": "2025-06-27T05:55:40+00:00", + "comment_author": "gerrytan", + "comment_body": "Fixed", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2058806603", + "pr_number": 49293, + "pr_file": "sdk/ai/Azure.AI.Projects/README.md", + "created_at": "2025-04-24T16:12:44+00:00", + "commented_code": "while (toolOutputs.Count > 0);\n```\n\n#### Function call executed automatically\n\nIn addition to the manual function calls, SDK supports automatic function calling. Here is the example:\n\nHere we use other functions for demonstration:\n```C# Snippet:StreamingWithAutoFunctionCall_DefineFunctions\nprivate class Address\n{\n public string Street { get; set; }\n public string City { get; set; }\n}\n\nprivate int GetHumidityByAddress(Address address)\n{\n return (address.City == \"Seattle\") ? 60 : 80;\n}\n\nprivate string[] GetWeatherByAddresses(Dictionary[] addresses, string unit = \"F\")", + "repo_full_name": "Azure/azure-sdk-for-net", + "discussion_comments": [ + { + "comment_id": "2058806603", + "repo_full_name": "Azure/azure-sdk-for-net", + "pr_number": 49293, + "pr_file": "sdk/ai/Azure.AI.Projects/README.md", + "discussion_id": "2058806603", + "commented_code": "@@ -647,6 +648,172 @@ do\n while (toolOutputs.Count > 0);\n ```\n \n+#### Function call executed automatically\n+\n+In addition to the manual function calls, SDK supports automatic function calling. Here is the example:\n+\n+Here we use other functions for demonstration:\n+```C# Snippet:StreamingWithAutoFunctionCall_DefineFunctions\n+private class Address\n+{\n+ public string Street { get; set; }\n+ public string City { get; set; }\n+}\n+\n+private int GetHumidityByAddress(Address address)\n+{\n+ return (address.City == \"Seattle\") ? 60 : 80;\n+}\n+\n+private string[] GetWeatherByAddresses(Dictionary[] addresses, string unit = \"F\")", + "comment_created_at": "2025-04-24T16:12:44+00:00", + "comment_author": "KrzysztofCwalina", + "comment_body": "Why do you get JSON parsed into Dictionary here? If we don't have time to add higher level helpers, we should just keep using raw JSON, i.e. UTF8 bytes wrapped in BinaryData. Then a reoutine like this would simply use one of the JSON parsers to parse the addresses and unit. \r\n\r\nAlso, long term we really need to be able to create methods like this that take Address[] as a parameter. Will we be able to do it? i.e. how will your code know whether the delegate accepts JSON or Address[]?", + "pr_file_module": null + }, + { + "comment_id": "2058955957", + "repo_full_name": "Azure/azure-sdk-for-net", + "pr_number": 49293, + "pr_file": "sdk/ai/Azure.AI.Projects/README.md", + "discussion_id": "2058806603", + "commented_code": "@@ -647,6 +648,172 @@ do\n while (toolOutputs.Count > 0);\n ```\n \n+#### Function call executed automatically\n+\n+In addition to the manual function calls, SDK supports automatic function calling. Here is the example:\n+\n+Here we use other functions for demonstration:\n+```C# Snippet:StreamingWithAutoFunctionCall_DefineFunctions\n+private class Address\n+{\n+ public string Street { get; set; }\n+ public string City { get; set; }\n+}\n+\n+private int GetHumidityByAddress(Address address)\n+{\n+ return (address.City == \"Seattle\") ? 60 : 80;\n+}\n+\n+private string[] GetWeatherByAddresses(Dictionary[] addresses, string unit = \"F\")", + "comment_created_at": "2025-04-24T17:51:45+00:00", + "comment_author": "howieleung", + "comment_body": "I am just showing developers that we support Dictionary type. I can change to Address[]", + "pr_file_module": null + }, + { + "comment_id": "2059163033", + "repo_full_name": "Azure/azure-sdk-for-net", + "pr_number": 49293, + "pr_file": "sdk/ai/Azure.AI.Projects/README.md", + "discussion_id": "2058806603", + "commented_code": "@@ -647,6 +648,172 @@ do\n while (toolOutputs.Count > 0);\n ```\n \n+#### Function call executed automatically\n+\n+In addition to the manual function calls, SDK supports automatic function calling. Here is the example:\n+\n+Here we use other functions for demonstration:\n+```C# Snippet:StreamingWithAutoFunctionCall_DefineFunctions\n+private class Address\n+{\n+ public string Street { get; set; }\n+ public string City { get; set; }\n+}\n+\n+private int GetHumidityByAddress(Address address)\n+{\n+ return (address.City == \"Seattle\") ? 60 : 80;\n+}\n+\n+private string[] GetWeatherByAddresses(Dictionary[] addresses, string unit = \"F\")", + "comment_created_at": "2025-04-24T20:17:52+00:00", + "comment_author": "KrzysztofCwalina", + "comment_body": "I don't think we should support dictionary like that. It just adds complexity and is not a nice API for the users", + "pr_file_module": null + }, + { + "comment_id": "2060553553", + "repo_full_name": "Azure/azure-sdk-for-net", + "pr_number": 49293, + "pr_file": "sdk/ai/Azure.AI.Projects/README.md", + "discussion_id": "2058806603", + "commented_code": "@@ -647,6 +648,172 @@ do\n while (toolOutputs.Count > 0);\n ```\n \n+#### Function call executed automatically\n+\n+In addition to the manual function calls, SDK supports automatic function calling. Here is the example:\n+\n+Here we use other functions for demonstration:\n+```C# Snippet:StreamingWithAutoFunctionCall_DefineFunctions\n+private class Address\n+{\n+ public string Street { get; set; }\n+ public string City { get; set; }\n+}\n+\n+private int GetHumidityByAddress(Address address)\n+{\n+ return (address.City == \"Seattle\") ? 60 : 80;\n+}\n+\n+private string[] GetWeatherByAddresses(Dictionary[] addresses, string unit = \"F\")", + "comment_created_at": "2025-04-25T16:33:17+00:00", + "comment_author": "howieleung", + "comment_body": "Took it off.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2058822771", + "pr_number": 49293, + "pr_file": "sdk/ai/Azure.AI.Projects/README.md", + "created_at": "2025-04-24T16:22:52+00:00", + "commented_code": "while (toolOutputs.Count > 0);\n```\n\n#### Function call executed automatically\n\nIn addition to the manual function calls, SDK supports automatic function calling. Here is the example:\n\nHere we use other functions for demonstration:\n```C# Snippet:StreamingWithAutoFunctionCall_DefineFunctions\nprivate class Address\n{\n public string Street { get; set; }\n public string City { get; set; }\n}\n\nprivate int GetHumidityByAddress(Address address)\n{\n return (address.City == \"Seattle\") ? 60 : 80;\n}\n\nprivate string[] GetWeatherByAddresses(Dictionary[] addresses, string unit = \"F\")\n{\n string[] temps = new string[addresses.Length];\n for (int i = 0; i < addresses.Length; i++)\n {\n if (addresses[i].TryGetValue(\"city\", out string city))\n {\n temps[i] = string.Format(\"{0}{1}\", (city == \"Seattle\") ? \"20\" : \"50\", unit);\n }\n else\n {\n throw new ArgumentException(\"Each address must contain 'street' and 'city' keys.\");\n }\n }\n return temps;\n}\n```\nNow we define the function definitions:\n```C# Snippet:StreamingWithAutoFunctionCall_DefineFunctionTools\nprivate FunctionToolDefinition geHhumidityByAddressTool = new(\n name: \"GetHumidityByAddress\",\n description: \"Get humidity by street and city\",\n parameters: BinaryData.FromObjectAsJson(\n new\n {\n Type = \"object\",\n Properties = new\n {\n Address = new\n {\n Type = \"object\",\n Properties = new\n {\n Street = new\n {\n Type = \"string\",\n Description = \"Street\"\n },\n City = new\n {\n Type = \"string\",\n Description = \"city\"\n },\n },\n Required = new[] { \"street\", \"city\" }\n }\n },\n Required = new[] { \"address\" }\n },\n new JsonSerializerOptions() { PropertyNamingPolicy = JsonNamingPolicy.CamelCase }));\n\nprivate FunctionToolDefinition getWeatherByAddressesTool = new(\n name: \"GetWeatherByAddresses\",\n description: \"Get weather by street and city\",\n parameters: BinaryData.FromObjectAsJson(\n new\n {\n Type = \"object\",\n Properties = new\n {\n Addresses = new\n {\n Type = \"array\",\n Description = \"A list of addresses\",\n Items = new\n {\n Type = \"object\",\n Properties = new\n {\n Street = new\n {\n Type = \"string\",\n Description = \"Street\"\n },\n City = new\n {\n Type = \"string\",\n description = \"city\"\n },\n },\n Required = new[] { \"street\", \"city\" }\n }\n },\n Unit = new\n {\n Type = \"string\",\n Enum = new[] { \"c\", \"f\" },\n },\n },\n Required = new[] { \"addresses\" }\n },\n new JsonSerializerOptions() { PropertyNamingPolicy = JsonNamingPolicy.CamelCase }));\n```\nUse `EnableAutoFunctionCall` to enable the auto function call:\n```C# Snippet:StreamingWithAutoFunctionCall_EnableAutoFunctionCalls\nList toolOutputs = new();\nDictionary delegates = new();\ndelegates.Add(nameof(GetWeatherByAddresses), GetWeatherByAddresses);\ndelegates.Add(nameof(GetHumidityByAddress), GetHumidityByAddress);\nAIProjectClientOptions options = new();\noptions.EnableAutoFunctionCalls(delegates);\n\nAgentsClient client = new(connectionString, new DefaultAzureCredential(), options);\n```\n\n\nWhen you create an agent, you can specify the function call by tools argument similar to the example of manual function calls:\n```C# Snippet:StreamingWithAutoFunctionCall_CreateAgent\nAgent agent = client.CreateAgent(\n model: modelDeploymentName,\n name: \"SDK Test Agent - Functions\",\n instructions: \"You are a weather bot. Use the provided functions to help answer questions. \"\n + \"Customize your responses to the user's preferences as much as possible and use friendly \"\n + \"nicknames for cities whenever possible.\",\n tools: [getWeatherByAddressesTool, geHhumidityByAddressTool]\n);\n```\n\nWe create a thread and message similar to the example of manual function tool calls:\n```C# Snippet:StreamingWithAutoFunctionCall_CreateThreadMessage\nAgentThread thread = client.CreateThread();\n\nThreadMessage message = client.CreateMessage(\n thread.Id,\n MessageRole.User,\n \"Get weather and humidity for street, 123 1st St in city, Seattle and street, 456 2nd Ave in city, Bellevue\");\n```\n\nThe agent will then call the function automatically when it is needed during `CreateRunStreamingAsync`:", + "repo_full_name": "Azure/azure-sdk-for-net", + "discussion_comments": [ + { + "comment_id": "2058822771", + "repo_full_name": "Azure/azure-sdk-for-net", + "pr_number": 49293, + "pr_file": "sdk/ai/Azure.AI.Projects/README.md", + "discussion_id": "2058822771", + "commented_code": "@@ -647,6 +648,172 @@ do\n while (toolOutputs.Count > 0);\n ```\n \n+#### Function call executed automatically\n+\n+In addition to the manual function calls, SDK supports automatic function calling. Here is the example:\n+\n+Here we use other functions for demonstration:\n+```C# Snippet:StreamingWithAutoFunctionCall_DefineFunctions\n+private class Address\n+{\n+ public string Street { get; set; }\n+ public string City { get; set; }\n+}\n+\n+private int GetHumidityByAddress(Address address)\n+{\n+ return (address.City == \"Seattle\") ? 60 : 80;\n+}\n+\n+private string[] GetWeatherByAddresses(Dictionary[] addresses, string unit = \"F\")\n+{\n+ string[] temps = new string[addresses.Length];\n+ for (int i = 0; i < addresses.Length; i++)\n+ {\n+ if (addresses[i].TryGetValue(\"city\", out string city))\n+ {\n+ temps[i] = string.Format(\"{0}{1}\", (city == \"Seattle\") ? \"20\" : \"50\", unit);\n+ }\n+ else\n+ {\n+ throw new ArgumentException(\"Each address must contain 'street' and 'city' keys.\");\n+ }\n+ }\n+ return temps;\n+}\n+```\n+Now we define the function definitions:\n+```C# Snippet:StreamingWithAutoFunctionCall_DefineFunctionTools\n+private FunctionToolDefinition geHhumidityByAddressTool = new(\n+ name: \"GetHumidityByAddress\",\n+ description: \"Get humidity by street and city\",\n+ parameters: BinaryData.FromObjectAsJson(\n+ new\n+ {\n+ Type = \"object\",\n+ Properties = new\n+ {\n+ Address = new\n+ {\n+ Type = \"object\",\n+ Properties = new\n+ {\n+ Street = new\n+ {\n+ Type = \"string\",\n+ Description = \"Street\"\n+ },\n+ City = new\n+ {\n+ Type = \"string\",\n+ Description = \"city\"\n+ },\n+ },\n+ Required = new[] { \"street\", \"city\" }\n+ }\n+ },\n+ Required = new[] { \"address\" }\n+ },\n+ new JsonSerializerOptions() { PropertyNamingPolicy = JsonNamingPolicy.CamelCase }));\n+\n+private FunctionToolDefinition getWeatherByAddressesTool = new(\n+ name: \"GetWeatherByAddresses\",\n+ description: \"Get weather by street and city\",\n+ parameters: BinaryData.FromObjectAsJson(\n+ new\n+ {\n+ Type = \"object\",\n+ Properties = new\n+ {\n+ Addresses = new\n+ {\n+ Type = \"array\",\n+ Description = \"A list of addresses\",\n+ Items = new\n+ {\n+ Type = \"object\",\n+ Properties = new\n+ {\n+ Street = new\n+ {\n+ Type = \"string\",\n+ Description = \"Street\"\n+ },\n+ City = new\n+ {\n+ Type = \"string\",\n+ description = \"city\"\n+ },\n+ },\n+ Required = new[] { \"street\", \"city\" }\n+ }\n+ },\n+ Unit = new\n+ {\n+ Type = \"string\",\n+ Enum = new[] { \"c\", \"f\" },\n+ },\n+ },\n+ Required = new[] { \"addresses\" }\n+ },\n+ new JsonSerializerOptions() { PropertyNamingPolicy = JsonNamingPolicy.CamelCase }));\n+```\n+Use `EnableAutoFunctionCall` to enable the auto function call:\n+```C# Snippet:StreamingWithAutoFunctionCall_EnableAutoFunctionCalls\n+List toolOutputs = new();\n+Dictionary delegates = new();\n+delegates.Add(nameof(GetWeatherByAddresses), GetWeatherByAddresses);\n+delegates.Add(nameof(GetHumidityByAddress), GetHumidityByAddress);\n+AIProjectClientOptions options = new();\n+options.EnableAutoFunctionCalls(delegates);\n+\n+AgentsClient client = new(connectionString, new DefaultAzureCredential(), options);\n+```\n+\n+\n+When you create an agent, you can specify the function call by tools argument similar to the example of manual function calls:\n+```C# Snippet:StreamingWithAutoFunctionCall_CreateAgent\n+Agent agent = client.CreateAgent(\n+ model: modelDeploymentName,\n+ name: \"SDK Test Agent - Functions\",\n+ instructions: \"You are a weather bot. Use the provided functions to help answer questions. \"\n+ + \"Customize your responses to the user's preferences as much as possible and use friendly \"\n+ + \"nicknames for cities whenever possible.\",\n+ tools: [getWeatherByAddressesTool, geHhumidityByAddressTool]\n+);\n+```\n+\n+We create a thread and message similar to the example of manual function tool calls:\n+```C# Snippet:StreamingWithAutoFunctionCall_CreateThreadMessage\n+AgentThread thread = client.CreateThread();\n+\n+ThreadMessage message = client.CreateMessage(\n+ thread.Id,\n+ MessageRole.User,\n+ \"Get weather and humidity for street, 123 1st St in city, Seattle and street, 456 2nd Ave in city, Bellevue\");\n+```\n+\n+The agent will then call the function automatically when it is needed during `CreateRunStreamingAsync`:", + "comment_created_at": "2025-04-24T16:22:52+00:00", + "comment_author": "KrzysztofCwalina", + "comment_body": "do we really need streaming runs here? Can we just use a simple completion?", + "pr_file_module": null + }, + { + "comment_id": "2058964616", + "repo_full_name": "Azure/azure-sdk-for-net", + "pr_number": 49293, + "pr_file": "sdk/ai/Azure.AI.Projects/README.md", + "discussion_id": "2058822771", + "commented_code": "@@ -647,6 +648,172 @@ do\n while (toolOutputs.Count > 0);\n ```\n \n+#### Function call executed automatically\n+\n+In addition to the manual function calls, SDK supports automatic function calling. Here is the example:\n+\n+Here we use other functions for demonstration:\n+```C# Snippet:StreamingWithAutoFunctionCall_DefineFunctions\n+private class Address\n+{\n+ public string Street { get; set; }\n+ public string City { get; set; }\n+}\n+\n+private int GetHumidityByAddress(Address address)\n+{\n+ return (address.City == \"Seattle\") ? 60 : 80;\n+}\n+\n+private string[] GetWeatherByAddresses(Dictionary[] addresses, string unit = \"F\")\n+{\n+ string[] temps = new string[addresses.Length];\n+ for (int i = 0; i < addresses.Length; i++)\n+ {\n+ if (addresses[i].TryGetValue(\"city\", out string city))\n+ {\n+ temps[i] = string.Format(\"{0}{1}\", (city == \"Seattle\") ? \"20\" : \"50\", unit);\n+ }\n+ else\n+ {\n+ throw new ArgumentException(\"Each address must contain 'street' and 'city' keys.\");\n+ }\n+ }\n+ return temps;\n+}\n+```\n+Now we define the function definitions:\n+```C# Snippet:StreamingWithAutoFunctionCall_DefineFunctionTools\n+private FunctionToolDefinition geHhumidityByAddressTool = new(\n+ name: \"GetHumidityByAddress\",\n+ description: \"Get humidity by street and city\",\n+ parameters: BinaryData.FromObjectAsJson(\n+ new\n+ {\n+ Type = \"object\",\n+ Properties = new\n+ {\n+ Address = new\n+ {\n+ Type = \"object\",\n+ Properties = new\n+ {\n+ Street = new\n+ {\n+ Type = \"string\",\n+ Description = \"Street\"\n+ },\n+ City = new\n+ {\n+ Type = \"string\",\n+ Description = \"city\"\n+ },\n+ },\n+ Required = new[] { \"street\", \"city\" }\n+ }\n+ },\n+ Required = new[] { \"address\" }\n+ },\n+ new JsonSerializerOptions() { PropertyNamingPolicy = JsonNamingPolicy.CamelCase }));\n+\n+private FunctionToolDefinition getWeatherByAddressesTool = new(\n+ name: \"GetWeatherByAddresses\",\n+ description: \"Get weather by street and city\",\n+ parameters: BinaryData.FromObjectAsJson(\n+ new\n+ {\n+ Type = \"object\",\n+ Properties = new\n+ {\n+ Addresses = new\n+ {\n+ Type = \"array\",\n+ Description = \"A list of addresses\",\n+ Items = new\n+ {\n+ Type = \"object\",\n+ Properties = new\n+ {\n+ Street = new\n+ {\n+ Type = \"string\",\n+ Description = \"Street\"\n+ },\n+ City = new\n+ {\n+ Type = \"string\",\n+ description = \"city\"\n+ },\n+ },\n+ Required = new[] { \"street\", \"city\" }\n+ }\n+ },\n+ Unit = new\n+ {\n+ Type = \"string\",\n+ Enum = new[] { \"c\", \"f\" },\n+ },\n+ },\n+ Required = new[] { \"addresses\" }\n+ },\n+ new JsonSerializerOptions() { PropertyNamingPolicy = JsonNamingPolicy.CamelCase }));\n+```\n+Use `EnableAutoFunctionCall` to enable the auto function call:\n+```C# Snippet:StreamingWithAutoFunctionCall_EnableAutoFunctionCalls\n+List toolOutputs = new();\n+Dictionary delegates = new();\n+delegates.Add(nameof(GetWeatherByAddresses), GetWeatherByAddresses);\n+delegates.Add(nameof(GetHumidityByAddress), GetHumidityByAddress);\n+AIProjectClientOptions options = new();\n+options.EnableAutoFunctionCalls(delegates);\n+\n+AgentsClient client = new(connectionString, new DefaultAzureCredential(), options);\n+```\n+\n+\n+When you create an agent, you can specify the function call by tools argument similar to the example of manual function calls:\n+```C# Snippet:StreamingWithAutoFunctionCall_CreateAgent\n+Agent agent = client.CreateAgent(\n+ model: modelDeploymentName,\n+ name: \"SDK Test Agent - Functions\",\n+ instructions: \"You are a weather bot. Use the provided functions to help answer questions. \"\n+ + \"Customize your responses to the user's preferences as much as possible and use friendly \"\n+ + \"nicknames for cities whenever possible.\",\n+ tools: [getWeatherByAddressesTool, geHhumidityByAddressTool]\n+);\n+```\n+\n+We create a thread and message similar to the example of manual function tool calls:\n+```C# Snippet:StreamingWithAutoFunctionCall_CreateThreadMessage\n+AgentThread thread = client.CreateThread();\n+\n+ThreadMessage message = client.CreateMessage(\n+ thread.Id,\n+ MessageRole.User,\n+ \"Get weather and humidity for street, 123 1st St in city, Seattle and street, 456 2nd Ave in city, Bellevue\");\n+```\n+\n+The agent will then call the function automatically when it is needed during `CreateRunStreamingAsync`:", + "comment_created_at": "2025-04-24T17:56:02+00:00", + "comment_author": "howieleung", + "comment_body": "Auto function calling only support for streaming. For non-streaming, they need to have a while loop to call getRun. When the run say required_action, they need to call the function manually.", + "pr_file_module": null + }, + { + "comment_id": "2059165687", + "repo_full_name": "Azure/azure-sdk-for-net", + "pr_number": 49293, + "pr_file": "sdk/ai/Azure.AI.Projects/README.md", + "discussion_id": "2058822771", + "commented_code": "@@ -647,6 +648,172 @@ do\n while (toolOutputs.Count > 0);\n ```\n \n+#### Function call executed automatically\n+\n+In addition to the manual function calls, SDK supports automatic function calling. Here is the example:\n+\n+Here we use other functions for demonstration:\n+```C# Snippet:StreamingWithAutoFunctionCall_DefineFunctions\n+private class Address\n+{\n+ public string Street { get; set; }\n+ public string City { get; set; }\n+}\n+\n+private int GetHumidityByAddress(Address address)\n+{\n+ return (address.City == \"Seattle\") ? 60 : 80;\n+}\n+\n+private string[] GetWeatherByAddresses(Dictionary[] addresses, string unit = \"F\")\n+{\n+ string[] temps = new string[addresses.Length];\n+ for (int i = 0; i < addresses.Length; i++)\n+ {\n+ if (addresses[i].TryGetValue(\"city\", out string city))\n+ {\n+ temps[i] = string.Format(\"{0}{1}\", (city == \"Seattle\") ? \"20\" : \"50\", unit);\n+ }\n+ else\n+ {\n+ throw new ArgumentException(\"Each address must contain 'street' and 'city' keys.\");\n+ }\n+ }\n+ return temps;\n+}\n+```\n+Now we define the function definitions:\n+```C# Snippet:StreamingWithAutoFunctionCall_DefineFunctionTools\n+private FunctionToolDefinition geHhumidityByAddressTool = new(\n+ name: \"GetHumidityByAddress\",\n+ description: \"Get humidity by street and city\",\n+ parameters: BinaryData.FromObjectAsJson(\n+ new\n+ {\n+ Type = \"object\",\n+ Properties = new\n+ {\n+ Address = new\n+ {\n+ Type = \"object\",\n+ Properties = new\n+ {\n+ Street = new\n+ {\n+ Type = \"string\",\n+ Description = \"Street\"\n+ },\n+ City = new\n+ {\n+ Type = \"string\",\n+ Description = \"city\"\n+ },\n+ },\n+ Required = new[] { \"street\", \"city\" }\n+ }\n+ },\n+ Required = new[] { \"address\" }\n+ },\n+ new JsonSerializerOptions() { PropertyNamingPolicy = JsonNamingPolicy.CamelCase }));\n+\n+private FunctionToolDefinition getWeatherByAddressesTool = new(\n+ name: \"GetWeatherByAddresses\",\n+ description: \"Get weather by street and city\",\n+ parameters: BinaryData.FromObjectAsJson(\n+ new\n+ {\n+ Type = \"object\",\n+ Properties = new\n+ {\n+ Addresses = new\n+ {\n+ Type = \"array\",\n+ Description = \"A list of addresses\",\n+ Items = new\n+ {\n+ Type = \"object\",\n+ Properties = new\n+ {\n+ Street = new\n+ {\n+ Type = \"string\",\n+ Description = \"Street\"\n+ },\n+ City = new\n+ {\n+ Type = \"string\",\n+ description = \"city\"\n+ },\n+ },\n+ Required = new[] { \"street\", \"city\" }\n+ }\n+ },\n+ Unit = new\n+ {\n+ Type = \"string\",\n+ Enum = new[] { \"c\", \"f\" },\n+ },\n+ },\n+ Required = new[] { \"addresses\" }\n+ },\n+ new JsonSerializerOptions() { PropertyNamingPolicy = JsonNamingPolicy.CamelCase }));\n+```\n+Use `EnableAutoFunctionCall` to enable the auto function call:\n+```C# Snippet:StreamingWithAutoFunctionCall_EnableAutoFunctionCalls\n+List toolOutputs = new();\n+Dictionary delegates = new();\n+delegates.Add(nameof(GetWeatherByAddresses), GetWeatherByAddresses);\n+delegates.Add(nameof(GetHumidityByAddress), GetHumidityByAddress);\n+AIProjectClientOptions options = new();\n+options.EnableAutoFunctionCalls(delegates);\n+\n+AgentsClient client = new(connectionString, new DefaultAzureCredential(), options);\n+```\n+\n+\n+When you create an agent, you can specify the function call by tools argument similar to the example of manual function calls:\n+```C# Snippet:StreamingWithAutoFunctionCall_CreateAgent\n+Agent agent = client.CreateAgent(\n+ model: modelDeploymentName,\n+ name: \"SDK Test Agent - Functions\",\n+ instructions: \"You are a weather bot. Use the provided functions to help answer questions. \"\n+ + \"Customize your responses to the user's preferences as much as possible and use friendly \"\n+ + \"nicknames for cities whenever possible.\",\n+ tools: [getWeatherByAddressesTool, geHhumidityByAddressTool]\n+);\n+```\n+\n+We create a thread and message similar to the example of manual function tool calls:\n+```C# Snippet:StreamingWithAutoFunctionCall_CreateThreadMessage\n+AgentThread thread = client.CreateThread();\n+\n+ThreadMessage message = client.CreateMessage(\n+ thread.Id,\n+ MessageRole.User,\n+ \"Get weather and humidity for street, 123 1st St in city, Seattle and street, 456 2nd Ave in city, Bellevue\");\n+```\n+\n+The agent will then call the function automatically when it is needed during `CreateRunStreamingAsync`:", + "comment_created_at": "2025-04-24T20:20:04+00:00", + "comment_author": "KrzysztofCwalina", + "comment_body": "why would we not support non-streaming? This seems like very suprising design for the user. ", + "pr_file_module": null + }, + { + "comment_id": "2060527319", + "repo_full_name": "Azure/azure-sdk-for-net", + "pr_number": 49293, + "pr_file": "sdk/ai/Azure.AI.Projects/README.md", + "discussion_id": "2058822771", + "commented_code": "@@ -647,6 +648,172 @@ do\n while (toolOutputs.Count > 0);\n ```\n \n+#### Function call executed automatically\n+\n+In addition to the manual function calls, SDK supports automatic function calling. Here is the example:\n+\n+Here we use other functions for demonstration:\n+```C# Snippet:StreamingWithAutoFunctionCall_DefineFunctions\n+private class Address\n+{\n+ public string Street { get; set; }\n+ public string City { get; set; }\n+}\n+\n+private int GetHumidityByAddress(Address address)\n+{\n+ return (address.City == \"Seattle\") ? 60 : 80;\n+}\n+\n+private string[] GetWeatherByAddresses(Dictionary[] addresses, string unit = \"F\")\n+{\n+ string[] temps = new string[addresses.Length];\n+ for (int i = 0; i < addresses.Length; i++)\n+ {\n+ if (addresses[i].TryGetValue(\"city\", out string city))\n+ {\n+ temps[i] = string.Format(\"{0}{1}\", (city == \"Seattle\") ? \"20\" : \"50\", unit);\n+ }\n+ else\n+ {\n+ throw new ArgumentException(\"Each address must contain 'street' and 'city' keys.\");\n+ }\n+ }\n+ return temps;\n+}\n+```\n+Now we define the function definitions:\n+```C# Snippet:StreamingWithAutoFunctionCall_DefineFunctionTools\n+private FunctionToolDefinition geHhumidityByAddressTool = new(\n+ name: \"GetHumidityByAddress\",\n+ description: \"Get humidity by street and city\",\n+ parameters: BinaryData.FromObjectAsJson(\n+ new\n+ {\n+ Type = \"object\",\n+ Properties = new\n+ {\n+ Address = new\n+ {\n+ Type = \"object\",\n+ Properties = new\n+ {\n+ Street = new\n+ {\n+ Type = \"string\",\n+ Description = \"Street\"\n+ },\n+ City = new\n+ {\n+ Type = \"string\",\n+ Description = \"city\"\n+ },\n+ },\n+ Required = new[] { \"street\", \"city\" }\n+ }\n+ },\n+ Required = new[] { \"address\" }\n+ },\n+ new JsonSerializerOptions() { PropertyNamingPolicy = JsonNamingPolicy.CamelCase }));\n+\n+private FunctionToolDefinition getWeatherByAddressesTool = new(\n+ name: \"GetWeatherByAddresses\",\n+ description: \"Get weather by street and city\",\n+ parameters: BinaryData.FromObjectAsJson(\n+ new\n+ {\n+ Type = \"object\",\n+ Properties = new\n+ {\n+ Addresses = new\n+ {\n+ Type = \"array\",\n+ Description = \"A list of addresses\",\n+ Items = new\n+ {\n+ Type = \"object\",\n+ Properties = new\n+ {\n+ Street = new\n+ {\n+ Type = \"string\",\n+ Description = \"Street\"\n+ },\n+ City = new\n+ {\n+ Type = \"string\",\n+ description = \"city\"\n+ },\n+ },\n+ Required = new[] { \"street\", \"city\" }\n+ }\n+ },\n+ Unit = new\n+ {\n+ Type = \"string\",\n+ Enum = new[] { \"c\", \"f\" },\n+ },\n+ },\n+ Required = new[] { \"addresses\" }\n+ },\n+ new JsonSerializerOptions() { PropertyNamingPolicy = JsonNamingPolicy.CamelCase }));\n+```\n+Use `EnableAutoFunctionCall` to enable the auto function call:\n+```C# Snippet:StreamingWithAutoFunctionCall_EnableAutoFunctionCalls\n+List toolOutputs = new();\n+Dictionary delegates = new();\n+delegates.Add(nameof(GetWeatherByAddresses), GetWeatherByAddresses);\n+delegates.Add(nameof(GetHumidityByAddress), GetHumidityByAddress);\n+AIProjectClientOptions options = new();\n+options.EnableAutoFunctionCalls(delegates);\n+\n+AgentsClient client = new(connectionString, new DefaultAzureCredential(), options);\n+```\n+\n+\n+When you create an agent, you can specify the function call by tools argument similar to the example of manual function calls:\n+```C# Snippet:StreamingWithAutoFunctionCall_CreateAgent\n+Agent agent = client.CreateAgent(\n+ model: modelDeploymentName,\n+ name: \"SDK Test Agent - Functions\",\n+ instructions: \"You are a weather bot. Use the provided functions to help answer questions. \"\n+ + \"Customize your responses to the user's preferences as much as possible and use friendly \"\n+ + \"nicknames for cities whenever possible.\",\n+ tools: [getWeatherByAddressesTool, geHhumidityByAddressTool]\n+);\n+```\n+\n+We create a thread and message similar to the example of manual function tool calls:\n+```C# Snippet:StreamingWithAutoFunctionCall_CreateThreadMessage\n+AgentThread thread = client.CreateThread();\n+\n+ThreadMessage message = client.CreateMessage(\n+ thread.Id,\n+ MessageRole.User,\n+ \"Get weather and humidity for street, 123 1st St in city, Seattle and street, 456 2nd Ave in city, Bellevue\");\n+```\n+\n+The agent will then call the function automatically when it is needed during `CreateRunStreamingAsync`:", + "comment_created_at": "2025-04-25T16:13:44+00:00", + "comment_author": "howieleung", + "comment_body": "Because for non-streaming, developers write their own while loop to iterate the run. The function call must be made within the while loop. If the while loop is inside of the SDK, yes, I would want to call the function for them. ", + "pr_file_module": null + }, + { + "comment_id": "2060675130", + "repo_full_name": "Azure/azure-sdk-for-net", + "pr_number": 49293, + "pr_file": "sdk/ai/Azure.AI.Projects/README.md", + "discussion_id": "2058822771", + "commented_code": "@@ -647,6 +648,172 @@ do\n while (toolOutputs.Count > 0);\n ```\n \n+#### Function call executed automatically\n+\n+In addition to the manual function calls, SDK supports automatic function calling. Here is the example:\n+\n+Here we use other functions for demonstration:\n+```C# Snippet:StreamingWithAutoFunctionCall_DefineFunctions\n+private class Address\n+{\n+ public string Street { get; set; }\n+ public string City { get; set; }\n+}\n+\n+private int GetHumidityByAddress(Address address)\n+{\n+ return (address.City == \"Seattle\") ? 60 : 80;\n+}\n+\n+private string[] GetWeatherByAddresses(Dictionary[] addresses, string unit = \"F\")\n+{\n+ string[] temps = new string[addresses.Length];\n+ for (int i = 0; i < addresses.Length; i++)\n+ {\n+ if (addresses[i].TryGetValue(\"city\", out string city))\n+ {\n+ temps[i] = string.Format(\"{0}{1}\", (city == \"Seattle\") ? \"20\" : \"50\", unit);\n+ }\n+ else\n+ {\n+ throw new ArgumentException(\"Each address must contain 'street' and 'city' keys.\");\n+ }\n+ }\n+ return temps;\n+}\n+```\n+Now we define the function definitions:\n+```C# Snippet:StreamingWithAutoFunctionCall_DefineFunctionTools\n+private FunctionToolDefinition geHhumidityByAddressTool = new(\n+ name: \"GetHumidityByAddress\",\n+ description: \"Get humidity by street and city\",\n+ parameters: BinaryData.FromObjectAsJson(\n+ new\n+ {\n+ Type = \"object\",\n+ Properties = new\n+ {\n+ Address = new\n+ {\n+ Type = \"object\",\n+ Properties = new\n+ {\n+ Street = new\n+ {\n+ Type = \"string\",\n+ Description = \"Street\"\n+ },\n+ City = new\n+ {\n+ Type = \"string\",\n+ Description = \"city\"\n+ },\n+ },\n+ Required = new[] { \"street\", \"city\" }\n+ }\n+ },\n+ Required = new[] { \"address\" }\n+ },\n+ new JsonSerializerOptions() { PropertyNamingPolicy = JsonNamingPolicy.CamelCase }));\n+\n+private FunctionToolDefinition getWeatherByAddressesTool = new(\n+ name: \"GetWeatherByAddresses\",\n+ description: \"Get weather by street and city\",\n+ parameters: BinaryData.FromObjectAsJson(\n+ new\n+ {\n+ Type = \"object\",\n+ Properties = new\n+ {\n+ Addresses = new\n+ {\n+ Type = \"array\",\n+ Description = \"A list of addresses\",\n+ Items = new\n+ {\n+ Type = \"object\",\n+ Properties = new\n+ {\n+ Street = new\n+ {\n+ Type = \"string\",\n+ Description = \"Street\"\n+ },\n+ City = new\n+ {\n+ Type = \"string\",\n+ description = \"city\"\n+ },\n+ },\n+ Required = new[] { \"street\", \"city\" }\n+ }\n+ },\n+ Unit = new\n+ {\n+ Type = \"string\",\n+ Enum = new[] { \"c\", \"f\" },\n+ },\n+ },\n+ Required = new[] { \"addresses\" }\n+ },\n+ new JsonSerializerOptions() { PropertyNamingPolicy = JsonNamingPolicy.CamelCase }));\n+```\n+Use `EnableAutoFunctionCall` to enable the auto function call:\n+```C# Snippet:StreamingWithAutoFunctionCall_EnableAutoFunctionCalls\n+List toolOutputs = new();\n+Dictionary delegates = new();\n+delegates.Add(nameof(GetWeatherByAddresses), GetWeatherByAddresses);\n+delegates.Add(nameof(GetHumidityByAddress), GetHumidityByAddress);\n+AIProjectClientOptions options = new();\n+options.EnableAutoFunctionCalls(delegates);\n+\n+AgentsClient client = new(connectionString, new DefaultAzureCredential(), options);\n+```\n+\n+\n+When you create an agent, you can specify the function call by tools argument similar to the example of manual function calls:\n+```C# Snippet:StreamingWithAutoFunctionCall_CreateAgent\n+Agent agent = client.CreateAgent(\n+ model: modelDeploymentName,\n+ name: \"SDK Test Agent - Functions\",\n+ instructions: \"You are a weather bot. Use the provided functions to help answer questions. \"\n+ + \"Customize your responses to the user's preferences as much as possible and use friendly \"\n+ + \"nicknames for cities whenever possible.\",\n+ tools: [getWeatherByAddressesTool, geHhumidityByAddressTool]\n+);\n+```\n+\n+We create a thread and message similar to the example of manual function tool calls:\n+```C# Snippet:StreamingWithAutoFunctionCall_CreateThreadMessage\n+AgentThread thread = client.CreateThread();\n+\n+ThreadMessage message = client.CreateMessage(\n+ thread.Id,\n+ MessageRole.User,\n+ \"Get weather and humidity for street, 123 1st St in city, Seattle and street, 456 2nd Ave in city, Bellevue\");\n+```\n+\n+The agent will then call the function automatically when it is needed during `CreateRunStreamingAsync`:", + "comment_created_at": "2025-04-25T18:04:50+00:00", + "comment_author": "KrzysztofCwalina", + "comment_body": "isn't the await foreach below the same as the while loop?", + "pr_file_module": null + }, + { + "comment_id": "2060919485", + "repo_full_name": "Azure/azure-sdk-for-net", + "pr_number": 49293, + "pr_file": "sdk/ai/Azure.AI.Projects/README.md", + "discussion_id": "2058822771", + "commented_code": "@@ -647,6 +648,172 @@ do\n while (toolOutputs.Count > 0);\n ```\n \n+#### Function call executed automatically\n+\n+In addition to the manual function calls, SDK supports automatic function calling. Here is the example:\n+\n+Here we use other functions for demonstration:\n+```C# Snippet:StreamingWithAutoFunctionCall_DefineFunctions\n+private class Address\n+{\n+ public string Street { get; set; }\n+ public string City { get; set; }\n+}\n+\n+private int GetHumidityByAddress(Address address)\n+{\n+ return (address.City == \"Seattle\") ? 60 : 80;\n+}\n+\n+private string[] GetWeatherByAddresses(Dictionary[] addresses, string unit = \"F\")\n+{\n+ string[] temps = new string[addresses.Length];\n+ for (int i = 0; i < addresses.Length; i++)\n+ {\n+ if (addresses[i].TryGetValue(\"city\", out string city))\n+ {\n+ temps[i] = string.Format(\"{0}{1}\", (city == \"Seattle\") ? \"20\" : \"50\", unit);\n+ }\n+ else\n+ {\n+ throw new ArgumentException(\"Each address must contain 'street' and 'city' keys.\");\n+ }\n+ }\n+ return temps;\n+}\n+```\n+Now we define the function definitions:\n+```C# Snippet:StreamingWithAutoFunctionCall_DefineFunctionTools\n+private FunctionToolDefinition geHhumidityByAddressTool = new(\n+ name: \"GetHumidityByAddress\",\n+ description: \"Get humidity by street and city\",\n+ parameters: BinaryData.FromObjectAsJson(\n+ new\n+ {\n+ Type = \"object\",\n+ Properties = new\n+ {\n+ Address = new\n+ {\n+ Type = \"object\",\n+ Properties = new\n+ {\n+ Street = new\n+ {\n+ Type = \"string\",\n+ Description = \"Street\"\n+ },\n+ City = new\n+ {\n+ Type = \"string\",\n+ Description = \"city\"\n+ },\n+ },\n+ Required = new[] { \"street\", \"city\" }\n+ }\n+ },\n+ Required = new[] { \"address\" }\n+ },\n+ new JsonSerializerOptions() { PropertyNamingPolicy = JsonNamingPolicy.CamelCase }));\n+\n+private FunctionToolDefinition getWeatherByAddressesTool = new(\n+ name: \"GetWeatherByAddresses\",\n+ description: \"Get weather by street and city\",\n+ parameters: BinaryData.FromObjectAsJson(\n+ new\n+ {\n+ Type = \"object\",\n+ Properties = new\n+ {\n+ Addresses = new\n+ {\n+ Type = \"array\",\n+ Description = \"A list of addresses\",\n+ Items = new\n+ {\n+ Type = \"object\",\n+ Properties = new\n+ {\n+ Street = new\n+ {\n+ Type = \"string\",\n+ Description = \"Street\"\n+ },\n+ City = new\n+ {\n+ Type = \"string\",\n+ description = \"city\"\n+ },\n+ },\n+ Required = new[] { \"street\", \"city\" }\n+ }\n+ },\n+ Unit = new\n+ {\n+ Type = \"string\",\n+ Enum = new[] { \"c\", \"f\" },\n+ },\n+ },\n+ Required = new[] { \"addresses\" }\n+ },\n+ new JsonSerializerOptions() { PropertyNamingPolicy = JsonNamingPolicy.CamelCase }));\n+```\n+Use `EnableAutoFunctionCall` to enable the auto function call:\n+```C# Snippet:StreamingWithAutoFunctionCall_EnableAutoFunctionCalls\n+List toolOutputs = new();\n+Dictionary delegates = new();\n+delegates.Add(nameof(GetWeatherByAddresses), GetWeatherByAddresses);\n+delegates.Add(nameof(GetHumidityByAddress), GetHumidityByAddress);\n+AIProjectClientOptions options = new();\n+options.EnableAutoFunctionCalls(delegates);\n+\n+AgentsClient client = new(connectionString, new DefaultAzureCredential(), options);\n+```\n+\n+\n+When you create an agent, you can specify the function call by tools argument similar to the example of manual function calls:\n+```C# Snippet:StreamingWithAutoFunctionCall_CreateAgent\n+Agent agent = client.CreateAgent(\n+ model: modelDeploymentName,\n+ name: \"SDK Test Agent - Functions\",\n+ instructions: \"You are a weather bot. Use the provided functions to help answer questions. \"\n+ + \"Customize your responses to the user's preferences as much as possible and use friendly \"\n+ + \"nicknames for cities whenever possible.\",\n+ tools: [getWeatherByAddressesTool, geHhumidityByAddressTool]\n+);\n+```\n+\n+We create a thread and message similar to the example of manual function tool calls:\n+```C# Snippet:StreamingWithAutoFunctionCall_CreateThreadMessage\n+AgentThread thread = client.CreateThread();\n+\n+ThreadMessage message = client.CreateMessage(\n+ thread.Id,\n+ MessageRole.User,\n+ \"Get weather and humidity for street, 123 1st St in city, Seattle and street, 456 2nd Ave in city, Bellevue\");\n+```\n+\n+The agent will then call the function automatically when it is needed during `CreateRunStreamingAsync`:", + "comment_created_at": "2025-04-25T21:44:47+00:00", + "comment_author": "howieleung", + "comment_body": "It isn't because whether it is a for-loop or while -loop. I\r\nDuring streaming, the SDK inside run a loop to go through the events and I can add code to call functions. Regardless it needs to call function or not, we than `yeild return` the content out. \r\nOn the other hand, for non-streaming, users call getRun again and again in a loop until thread status is completed. Inside of the SDK, getRun does not have loop for me to do function call, submit output, and call function again, and retry if necessary. Users have to make function call manually inside of their loop.\r\nThe same design happens in Python SDK and SK as well.", + "pr_file_module": null + }, + { + "comment_id": "2061034362", + "repo_full_name": "Azure/azure-sdk-for-net", + "pr_number": 49293, + "pr_file": "sdk/ai/Azure.AI.Projects/README.md", + "discussion_id": "2058822771", + "commented_code": "@@ -647,6 +648,172 @@ do\n while (toolOutputs.Count > 0);\n ```\n \n+#### Function call executed automatically\n+\n+In addition to the manual function calls, SDK supports automatic function calling. Here is the example:\n+\n+Here we use other functions for demonstration:\n+```C# Snippet:StreamingWithAutoFunctionCall_DefineFunctions\n+private class Address\n+{\n+ public string Street { get; set; }\n+ public string City { get; set; }\n+}\n+\n+private int GetHumidityByAddress(Address address)\n+{\n+ return (address.City == \"Seattle\") ? 60 : 80;\n+}\n+\n+private string[] GetWeatherByAddresses(Dictionary[] addresses, string unit = \"F\")\n+{\n+ string[] temps = new string[addresses.Length];\n+ for (int i = 0; i < addresses.Length; i++)\n+ {\n+ if (addresses[i].TryGetValue(\"city\", out string city))\n+ {\n+ temps[i] = string.Format(\"{0}{1}\", (city == \"Seattle\") ? \"20\" : \"50\", unit);\n+ }\n+ else\n+ {\n+ throw new ArgumentException(\"Each address must contain 'street' and 'city' keys.\");\n+ }\n+ }\n+ return temps;\n+}\n+```\n+Now we define the function definitions:\n+```C# Snippet:StreamingWithAutoFunctionCall_DefineFunctionTools\n+private FunctionToolDefinition geHhumidityByAddressTool = new(\n+ name: \"GetHumidityByAddress\",\n+ description: \"Get humidity by street and city\",\n+ parameters: BinaryData.FromObjectAsJson(\n+ new\n+ {\n+ Type = \"object\",\n+ Properties = new\n+ {\n+ Address = new\n+ {\n+ Type = \"object\",\n+ Properties = new\n+ {\n+ Street = new\n+ {\n+ Type = \"string\",\n+ Description = \"Street\"\n+ },\n+ City = new\n+ {\n+ Type = \"string\",\n+ Description = \"city\"\n+ },\n+ },\n+ Required = new[] { \"street\", \"city\" }\n+ }\n+ },\n+ Required = new[] { \"address\" }\n+ },\n+ new JsonSerializerOptions() { PropertyNamingPolicy = JsonNamingPolicy.CamelCase }));\n+\n+private FunctionToolDefinition getWeatherByAddressesTool = new(\n+ name: \"GetWeatherByAddresses\",\n+ description: \"Get weather by street and city\",\n+ parameters: BinaryData.FromObjectAsJson(\n+ new\n+ {\n+ Type = \"object\",\n+ Properties = new\n+ {\n+ Addresses = new\n+ {\n+ Type = \"array\",\n+ Description = \"A list of addresses\",\n+ Items = new\n+ {\n+ Type = \"object\",\n+ Properties = new\n+ {\n+ Street = new\n+ {\n+ Type = \"string\",\n+ Description = \"Street\"\n+ },\n+ City = new\n+ {\n+ Type = \"string\",\n+ description = \"city\"\n+ },\n+ },\n+ Required = new[] { \"street\", \"city\" }\n+ }\n+ },\n+ Unit = new\n+ {\n+ Type = \"string\",\n+ Enum = new[] { \"c\", \"f\" },\n+ },\n+ },\n+ Required = new[] { \"addresses\" }\n+ },\n+ new JsonSerializerOptions() { PropertyNamingPolicy = JsonNamingPolicy.CamelCase }));\n+```\n+Use `EnableAutoFunctionCall` to enable the auto function call:\n+```C# Snippet:StreamingWithAutoFunctionCall_EnableAutoFunctionCalls\n+List toolOutputs = new();\n+Dictionary delegates = new();\n+delegates.Add(nameof(GetWeatherByAddresses), GetWeatherByAddresses);\n+delegates.Add(nameof(GetHumidityByAddress), GetHumidityByAddress);\n+AIProjectClientOptions options = new();\n+options.EnableAutoFunctionCalls(delegates);\n+\n+AgentsClient client = new(connectionString, new DefaultAzureCredential(), options);\n+```\n+\n+\n+When you create an agent, you can specify the function call by tools argument similar to the example of manual function calls:\n+```C# Snippet:StreamingWithAutoFunctionCall_CreateAgent\n+Agent agent = client.CreateAgent(\n+ model: modelDeploymentName,\n+ name: \"SDK Test Agent - Functions\",\n+ instructions: \"You are a weather bot. Use the provided functions to help answer questions. \"\n+ + \"Customize your responses to the user's preferences as much as possible and use friendly \"\n+ + \"nicknames for cities whenever possible.\",\n+ tools: [getWeatherByAddressesTool, geHhumidityByAddressTool]\n+);\n+```\n+\n+We create a thread and message similar to the example of manual function tool calls:\n+```C# Snippet:StreamingWithAutoFunctionCall_CreateThreadMessage\n+AgentThread thread = client.CreateThread();\n+\n+ThreadMessage message = client.CreateMessage(\n+ thread.Id,\n+ MessageRole.User,\n+ \"Get weather and humidity for street, 123 1st St in city, Seattle and street, 456 2nd Ave in city, Bellevue\");\n+```\n+\n+The agent will then call the function automatically when it is needed during `CreateRunStreamingAsync`:", + "comment_created_at": "2025-04-25T23:41:45+00:00", + "comment_author": "KrzysztofCwalina", + "comment_body": "Then maybe we just move this off the client options and pass it to the streaming completion methods. As of right not, the API is confusing: the user initializes the client to do auto tool calls and then the client won't do auto tool calls unless the user calls the streaming APIs. ", + "pr_file_module": null + }, + { + "comment_id": "2062953026", + "repo_full_name": "Azure/azure-sdk-for-net", + "pr_number": 49293, + "pr_file": "sdk/ai/Azure.AI.Projects/README.md", + "discussion_id": "2058822771", + "commented_code": "@@ -647,6 +648,172 @@ do\n while (toolOutputs.Count > 0);\n ```\n \n+#### Function call executed automatically\n+\n+In addition to the manual function calls, SDK supports automatic function calling. Here is the example:\n+\n+Here we use other functions for demonstration:\n+```C# Snippet:StreamingWithAutoFunctionCall_DefineFunctions\n+private class Address\n+{\n+ public string Street { get; set; }\n+ public string City { get; set; }\n+}\n+\n+private int GetHumidityByAddress(Address address)\n+{\n+ return (address.City == \"Seattle\") ? 60 : 80;\n+}\n+\n+private string[] GetWeatherByAddresses(Dictionary[] addresses, string unit = \"F\")\n+{\n+ string[] temps = new string[addresses.Length];\n+ for (int i = 0; i < addresses.Length; i++)\n+ {\n+ if (addresses[i].TryGetValue(\"city\", out string city))\n+ {\n+ temps[i] = string.Format(\"{0}{1}\", (city == \"Seattle\") ? \"20\" : \"50\", unit);\n+ }\n+ else\n+ {\n+ throw new ArgumentException(\"Each address must contain 'street' and 'city' keys.\");\n+ }\n+ }\n+ return temps;\n+}\n+```\n+Now we define the function definitions:\n+```C# Snippet:StreamingWithAutoFunctionCall_DefineFunctionTools\n+private FunctionToolDefinition geHhumidityByAddressTool = new(\n+ name: \"GetHumidityByAddress\",\n+ description: \"Get humidity by street and city\",\n+ parameters: BinaryData.FromObjectAsJson(\n+ new\n+ {\n+ Type = \"object\",\n+ Properties = new\n+ {\n+ Address = new\n+ {\n+ Type = \"object\",\n+ Properties = new\n+ {\n+ Street = new\n+ {\n+ Type = \"string\",\n+ Description = \"Street\"\n+ },\n+ City = new\n+ {\n+ Type = \"string\",\n+ Description = \"city\"\n+ },\n+ },\n+ Required = new[] { \"street\", \"city\" }\n+ }\n+ },\n+ Required = new[] { \"address\" }\n+ },\n+ new JsonSerializerOptions() { PropertyNamingPolicy = JsonNamingPolicy.CamelCase }));\n+\n+private FunctionToolDefinition getWeatherByAddressesTool = new(\n+ name: \"GetWeatherByAddresses\",\n+ description: \"Get weather by street and city\",\n+ parameters: BinaryData.FromObjectAsJson(\n+ new\n+ {\n+ Type = \"object\",\n+ Properties = new\n+ {\n+ Addresses = new\n+ {\n+ Type = \"array\",\n+ Description = \"A list of addresses\",\n+ Items = new\n+ {\n+ Type = \"object\",\n+ Properties = new\n+ {\n+ Street = new\n+ {\n+ Type = \"string\",\n+ Description = \"Street\"\n+ },\n+ City = new\n+ {\n+ Type = \"string\",\n+ description = \"city\"\n+ },\n+ },\n+ Required = new[] { \"street\", \"city\" }\n+ }\n+ },\n+ Unit = new\n+ {\n+ Type = \"string\",\n+ Enum = new[] { \"c\", \"f\" },\n+ },\n+ },\n+ Required = new[] { \"addresses\" }\n+ },\n+ new JsonSerializerOptions() { PropertyNamingPolicy = JsonNamingPolicy.CamelCase }));\n+```\n+Use `EnableAutoFunctionCall` to enable the auto function call:\n+```C# Snippet:StreamingWithAutoFunctionCall_EnableAutoFunctionCalls\n+List toolOutputs = new();\n+Dictionary delegates = new();\n+delegates.Add(nameof(GetWeatherByAddresses), GetWeatherByAddresses);\n+delegates.Add(nameof(GetHumidityByAddress), GetHumidityByAddress);\n+AIProjectClientOptions options = new();\n+options.EnableAutoFunctionCalls(delegates);\n+\n+AgentsClient client = new(connectionString, new DefaultAzureCredential(), options);\n+```\n+\n+\n+When you create an agent, you can specify the function call by tools argument similar to the example of manual function calls:\n+```C# Snippet:StreamingWithAutoFunctionCall_CreateAgent\n+Agent agent = client.CreateAgent(\n+ model: modelDeploymentName,\n+ name: \"SDK Test Agent - Functions\",\n+ instructions: \"You are a weather bot. Use the provided functions to help answer questions. \"\n+ + \"Customize your responses to the user's preferences as much as possible and use friendly \"\n+ + \"nicknames for cities whenever possible.\",\n+ tools: [getWeatherByAddressesTool, geHhumidityByAddressTool]\n+);\n+```\n+\n+We create a thread and message similar to the example of manual function tool calls:\n+```C# Snippet:StreamingWithAutoFunctionCall_CreateThreadMessage\n+AgentThread thread = client.CreateThread();\n+\n+ThreadMessage message = client.CreateMessage(\n+ thread.Id,\n+ MessageRole.User,\n+ \"Get weather and humidity for street, 123 1st St in city, Seattle and street, 456 2nd Ave in city, Bellevue\");\n+```\n+\n+The agent will then call the function automatically when it is needed during `CreateRunStreamingAsync`:", + "comment_created_at": "2025-04-28T06:17:24+00:00", + "comment_author": "howieleung", + "comment_body": "Done.. I added autoFunctionCallOptions to create_stream function.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2060975818", + "pr_number": 49293, + "pr_file": "sdk/ai/Azure.AI.Projects/README.md", + "created_at": "2025-04-25T22:51:07+00:00", + "commented_code": "{\n if (functionName == getUserFavoriteCityTool.Name)\n {\n return new ToolOutput(toolCallId, GetUserFavoriteCity());\n return new ToolOutput(toolCallId, ToolCallsResolver.Resolve(GetUserFavoriteCity, functionArguments).ToString());", + "repo_full_name": "Azure/azure-sdk-for-net", + "discussion_comments": [ + { + "comment_id": "2060975818", + "repo_full_name": "Azure/azure-sdk-for-net", + "pr_number": 49293, + "pr_file": "sdk/ai/Azure.AI.Projects/README.md", + "discussion_id": "2060975818", + "commented_code": "@@ -581,23 +582,15 @@ ToolOutput GetResolvedToolOutput(string functionName, string toolCallId, string\n {\n if (functionName == getUserFavoriteCityTool.Name)\n {\n- return new ToolOutput(toolCallId, GetUserFavoriteCity());\n+ return new ToolOutput(toolCallId, ToolCallsResolver.Resolve(GetUserFavoriteCity, functionArguments).ToString());", + "comment_created_at": "2025-04-25T22:51:07+00:00", + "comment_author": "KrzysztofCwalina", + "comment_body": "I don't think we should ship this API as public. If tool calls are not automatic, we want the caller to simply call the CLR API they want. We don't want them to go through some resolver. If you want an API that maps from tol call request arguments to CLR values that can be passed to a CLR method, then I would be fine with that, but honestly such API should be in some shared library, not in AI Project", + "pr_file_module": null + }, + { + "comment_id": "2061195294", + "repo_full_name": "Azure/azure-sdk-for-net", + "pr_number": 49293, + "pr_file": "sdk/ai/Azure.AI.Projects/README.md", + "discussion_id": "2060975818", + "commented_code": "@@ -581,23 +582,15 @@ ToolOutput GetResolvedToolOutput(string functionName, string toolCallId, string\n {\n if (functionName == getUserFavoriteCityTool.Name)\n {\n- return new ToolOutput(toolCallId, GetUserFavoriteCity());\n+ return new ToolOutput(toolCallId, ToolCallsResolver.Resolve(GetUserFavoriteCity, functionArguments).ToString());", + "comment_created_at": "2025-04-26T06:38:16+00:00", + "comment_author": "howieleung", + "comment_body": "changed to internal.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2098984732", + "pr_number": 50169, + "pr_file": "sdk/communication/Azure.Communication.PhoneNumbers/samples/Sample_PhoneNumbersClient.md", + "created_at": "2025-05-20T22:46:40+00:00", + "commented_code": "## Purchase phone numbers\n\nPhone numbers can be acquired through purchasing a reservation.\nPhone numbers can be acquired through purchasing a search.", + "repo_full_name": "Azure/azure-sdk-for-net", + "discussion_comments": [ + { + "comment_id": "2098984732", + "repo_full_name": "Azure/azure-sdk-for-net", + "pr_number": 50169, + "pr_file": "sdk/communication/Azure.Communication.PhoneNumbers/samples/Sample_PhoneNumbersClient.md", + "discussion_id": "2098984732", + "commented_code": "@@ -34,7 +34,7 @@ while (!searchOperation.HasCompleted)\n \n ## Purchase phone numbers\n \n-Phone numbers can be acquired through purchasing a reservation.\n+Phone numbers can be acquired through purchasing a search.", + "comment_created_at": "2025-05-20T22:46:40+00:00", + "comment_author": "Copilot", + "comment_body": "The sample description appears inconsistent with the rest of the Reservations API documentation. It states 'purchasing a search' instead of 'purchasing a reservation'; please clarify the intended terminology to avoid confusion.", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/azure-sdk-for-net-design-stable-apis.md b/_reviewers/azure-sdk-for-net-design-stable-apis.md index d705011..0819497 100644 --- a/_reviewers/azure-sdk-for-net-design-stable-apis.md +++ b/_reviewers/azure-sdk-for-net-design-stable-apis.md @@ -41,329 +41,3 @@ private int GetHumidityByAddress(Dictionary address) ``` Before adding new public API members, consider if they're truly necessary and ensure they won't need breaking changes in the future. - - -[ - { - "discussion_id": "2180635446", - "pr_number": 51023, - "pr_file": "sdk/ai/Azure.AI.Projects/AGENTS_MIGRATION_GUIDE.md", - "created_at": "2025-07-02T17:44:03+00:00", - "commented_code": "# Agents migration guide from Hub-based projects to Endpoint-based projects.\nThis guide describes migration from hub-based to Endpoint-based projects. To create a Endpoint-based project, please use one of the deployment scripts on [foundry samples repository](https://github.com/azure-ai-foundry/foundry-samples/tree/main/samples/microsoft/infrastructure-setup) appropriate for your scenario, also you can use Azure AI Foundry UI. The support of hub-based projects was dropped in `Azure.AI.Projects` version `1.0.0-beta.9`. In this document, we show the operation implementation of before `1.0.0-beta.9` in **Hub-based** secion, followed by code for `azure-ai-projects` version `1.0.0-beta.9` or later in **Endpoint-based**.\n\n## Installation\nStarting from version `1.0.0-beta.9`, the operations, related to agents were moved to a separate package `Azure.AI.Agents.Persistent`. To use agents please add both `Azure.AI.Projects` and `Azure.AI.Agents.Persistent` packages into the project.\n```\ndotnet add package Azure.AI.Projects --version 1.0.0-beta.9\ndotnet add package Azure.AI.Agents.Persistent --version 1.1.0-beta.3\n```\n\n## Namespace changes\nAgents were moved to a new package and namespace `Azure.AI.Agents.Persistent`.\n\n## Class renamings\n| Hub-based | Endpoint-based |\n|-|-|\n| New in `1.0.0-beta.9` | `PersistentAgentsClient` |\n| `AgentsClient` | `PersistentAgentsAdministrationClient` |\n| `AgentsClientOptions` | `PersistentAgentsAdministrationClientOptions` |\n| `Agent` | `PersistentAgent` |\n| `AgentThread` | `PersistentAgentThread`|\n| `ThreadMessage` | `PersistentThreadMessage` |\n| `ThreadRun` | `ThreadRun` |\n| `AgentFile` | `PersistentAgentFileInfo` |\n| `VectorStore` | `PersistentAgentsVectorStore` |\n\n## Class structure changes\nHere `projectClient` is `AIProjectClient` and `agentClient` is `PersistentAgentsClient` for version `1.0.0-beta.9` and `AgentsClient` for earlier versions.\n\nAgents Operations\n\n| Hub-based | Endpoint-based |\n|-|-|\n| `projectClient.GetAgentsClient` | `projectClient.GetPersistentAgentsClient` |\n| `agentClient.CreateAgent` | `agentClient.Administration.CreateAgent` |\n| `agentClient.GetAgents` | `agentClient.Administration.GetAgents` |\n| `agentClient.GetAgent` | `agentClient.Administration..GetAgent` |\n| `agentClient.UpdateAgent` | `agentClient.Administration.UpdateAgent` |\n| `agentClient.Delete_agent` | `agentClient.Administration.DeleteAgent` |\n| `agentClient.CreateThreadAndRun` | `agentClient.CreateThreadAndRun` |\n\nThreads Operations\n\n| Hub-based | Endpoint-based |\n|-|-|\n| `agentClient.CreateThread` | `agentClient.Threads.CreateThread` |\n| `agentClient.GetThread` | `agentClient.Threads.GetThread` |\n| `agentClient.UpdateThread` | `agentClient.Threads.UpdateThread` |\n| `agentClient.GetThreads` | `agentClient.Threads.GetThreads` |\n| `agentClient.DeleteThread` | `agentClient.Threads.DeleteThread` |\n\nMessages Operations\n\n| Hub-based | Endpoint-based |\n|-|-|\n| `agentClient.CreateMessage` | `agentClient.Messages.CreateMessage` |\n| `agentClient.GetMessages` | `agentClient.Messages.GetMessages` |\n| `agentClient.GetMessage` | `pagentClient.Messages.GetMessage` |\n| `agentClient.UpdateMessage` | `agentClient.Messages.UpdateMessage` |\n\nRuns Operations\n\n| Hub-based | Endpoint-based |\n|-|-|\n| `agentClient.CreateRun` | `agentClient.Runs.CreateRun` |\n| `agentClient.GetRun` | `agentClient.Runs.GetRun` |\n| `agentClient.GetRuns` | `agentClient.Runs.GetRuns` |\n| `agentClient.UpdateRun` | `agentClient.Runs.UpdateRun` |\n| `agentClient.CreateRunStreaming` | `agentClient.Runs.CreateRunStreaming` |\n| `agentClient.SubmitToolOutputsToRun` | `agentClient.Runs.SubmitToolOutputsToRun` |\n| `agentClient.SubmitToolOutputsToStream` | `agentClient.Runs.SubmitToolOutputsToStream` |\n| `agentClient.CancelRun` | `agentClient.Runs.CancelRun` |\n\nRun Steps Operations\n\n| Hub-based | Endpoint-based |\n|-|-|\n| `agentClient.GetRunStep` | `agentClient.Runs.GetRunStep` |\n| `agentClient.GetRunSteps` | `agentClient.Runs.GetRunSteps` |\n\nVector Stores Operations\n\n| Hub-based | Endpoint-based |\n|-|-|\n| `agentClient.CreateVectorStore` | `agentClient.VectorStores.CreateVectorStore |\n| `agentClient.GetVectorStores` | `agentClient.VectorStores.GetVectorStores |\n| `agentClient.GetVectorStore` | `agentClient.VectorStores.GetVectorStore |\n| `agentClient.ModifyVectorStore` | `agentClient.VectorStores.ModifyVectorStore |\n| `agentClient.DeleteVectorStore` | `agentClient.VectorStores.DeleteVectorStore |\n\nVector Store Files Operations\n\n| Hub-based | Endpoint-based |\n|-|-|\n| `agentClient.GetVectorStoreFiles` | `agentClient.VectorStores.GetVectorStoreFiles` |\n| `agentClient.CreateVectorStoreFile` | `agentClient.VectorStores.CreateVectorStoreFile` |\n| `agentClient.GetVectorStoreFile` | `agentClient.VectorStores.GetVectorStoreFile` |\n| `agentClient.DeleteVectorStoreFile` | `agentClient.VectorStores.DeleteVectorStoreFile` |\n\nVector Store File Batches Operations\n\n| Hub-based | Endpoint-based |\n|-|-|\n| `agentClient.CreateVectorStoreFileBatch` | `agentClient.VectorStores.CreateVectorStoreFileBatch`|\n| `agentClient.GetVectorStoreFileBatch` | `agentClient.VectorStores.GetVectorStoreFileBatch`|\n| `agentClient.GetVectorSroreFileBatchFiles` | `agentClient.VectorStores.GetVectorStoreFileBatchFiles`|\n| `agentClient.CancelVectorStoreFileBatch` | `agentClient.VectorStores.CancelVectorStoreFileBatch`|\n\nFiles Operations\n\n| Hub-based | Endpoint-based |\n|-|-|\n| `agentClient.UploadFile` | `agentClient.Files.UploadFile` |\n| `agentClient.GetFile` | `agentClient.Files.GetFile` |\n| `agentClient.GetFileContent` | `agentClient.Files.GetFileContent` |\n| `agentClient.GetFiles` | `agentClient.Files.GetFiles` |\n| `agentClient.DeleteFile` | `agentClient.Files.DeleteFile` |\n\n## API changes\n1. Create project. The connection string is replaced by the endpoint. The project endpoint URL has the form https://\\.services.ai.azure.com/api/projects/\\. It can be found in your Azure AI Foundry Project overview page.\n\n **Hub-based**\n ```C#\n AIProjectClient projectClient = new (\n connectionString: connectionString,\n credential: new DefaultAzureCredential());\n ```\n\n **Endpoint-based**\n ```C#\n AIProjectClient projectClient = new(endpoint: endpoint, credential: new DefaultAzureCredential())\n ```\n2. Agents client. In the new `Azure.AI.Projects` version `1.0.0-beta.9` agents are managed using `PersistentAgentsClient` class. It can be obtained from `AIProjectClient` instance or can be created using constructor. The letter allows to provide additional options for the `PersistentAgentsClient`.\n\n **Hub-based**\n ```C#\n AgentsClient agentClient = projectClient.GetAgentsClient();\n ```\n\n **Endpoint-based**\n Create agent using `AIProjectClient`.\n ```C#\n AgentsClient agentClient = projectClient.GetAgentsClient();", - "repo_full_name": "Azure/azure-sdk-for-net", - "discussion_comments": [ - { - "comment_id": "2180635446", - "repo_full_name": "Azure/azure-sdk-for-net", - "pr_number": 51023, - "pr_file": "sdk/ai/Azure.AI.Projects/AGENTS_MIGRATION_GUIDE.md", - "discussion_id": "2180635446", - "commented_code": "@@ -0,0 +1,615 @@\n+# Agents migration guide from Hub-based projects to Endpoint-based projects.\n+This guide describes migration from hub-based to Endpoint-based projects. To create a Endpoint-based project, please use one of the deployment scripts on [foundry samples repository](https://github.com/azure-ai-foundry/foundry-samples/tree/main/samples/microsoft/infrastructure-setup) appropriate for your scenario, also you can use Azure AI Foundry UI. The support of hub-based projects was dropped in `Azure.AI.Projects` version `1.0.0-beta.9`. In this document, we show the operation implementation of before `1.0.0-beta.9` in **Hub-based** secion, followed by code for `azure-ai-projects` version `1.0.0-beta.9` or later in **Endpoint-based**.\n+\n+## Installation\n+Starting from version `1.0.0-beta.9`, the operations, related to agents were moved to a separate package `Azure.AI.Agents.Persistent`. To use agents please add both `Azure.AI.Projects` and `Azure.AI.Agents.Persistent` packages into the project.\n+```\n+dotnet add package Azure.AI.Projects --version 1.0.0-beta.9\n+dotnet add package Azure.AI.Agents.Persistent --version 1.1.0-beta.3\n+```\n+\n+## Namespace changes\n+Agents were moved to a new package and namespace `Azure.AI.Agents.Persistent`.\n+\n+## Class renamings\n+| Hub-based | Endpoint-based |\n+|-|-|\n+| New in `1.0.0-beta.9` | `PersistentAgentsClient` |\n+| `AgentsClient` | `PersistentAgentsAdministrationClient` |\n+| `AgentsClientOptions` | `PersistentAgentsAdministrationClientOptions` |\n+| `Agent` | `PersistentAgent` |\n+| `AgentThread` | `PersistentAgentThread`|\n+| `ThreadMessage` | `PersistentThreadMessage` |\n+| `ThreadRun` | `ThreadRun` |\n+| `AgentFile` | `PersistentAgentFileInfo` |\n+| `VectorStore` | `PersistentAgentsVectorStore` |\n+\n+## Class structure changes\n+Here `projectClient` is `AIProjectClient` and `agentClient` is `PersistentAgentsClient` for version `1.0.0-beta.9` and `AgentsClient` for earlier versions.\n+\n+Agents Operations\n+\n+| Hub-based | Endpoint-based |\n+|-|-|\n+| `projectClient.GetAgentsClient` | `projectClient.GetPersistentAgentsClient` |\n+| `agentClient.CreateAgent` | `agentClient.Administration.CreateAgent` |\n+| `agentClient.GetAgents` | `agentClient.Administration.GetAgents` |\n+| `agentClient.GetAgent` | `agentClient.Administration..GetAgent` |\n+| `agentClient.UpdateAgent` | `agentClient.Administration.UpdateAgent` |\n+| `agentClient.Delete_agent` | `agentClient.Administration.DeleteAgent` |\n+| `agentClient.CreateThreadAndRun` | `agentClient.CreateThreadAndRun` |\n+\n+Threads Operations\n+\n+| Hub-based | Endpoint-based |\n+|-|-|\n+| `agentClient.CreateThread` | `agentClient.Threads.CreateThread` |\n+| `agentClient.GetThread` | `agentClient.Threads.GetThread` |\n+| `agentClient.UpdateThread` | `agentClient.Threads.UpdateThread` |\n+| `agentClient.GetThreads` | `agentClient.Threads.GetThreads` |\n+| `agentClient.DeleteThread` | `agentClient.Threads.DeleteThread` |\n+\n+Messages Operations\n+\n+| Hub-based | Endpoint-based |\n+|-|-|\n+| `agentClient.CreateMessage` | `agentClient.Messages.CreateMessage` |\n+| `agentClient.GetMessages` | `agentClient.Messages.GetMessages` |\n+| `agentClient.GetMessage` | `pagentClient.Messages.GetMessage` |\n+| `agentClient.UpdateMessage` | `agentClient.Messages.UpdateMessage` |\n+\n+Runs Operations\n+\n+| Hub-based | Endpoint-based |\n+|-|-|\n+| `agentClient.CreateRun` | `agentClient.Runs.CreateRun` |\n+| `agentClient.GetRun` | `agentClient.Runs.GetRun` |\n+| `agentClient.GetRuns` | `agentClient.Runs.GetRuns` |\n+| `agentClient.UpdateRun` | `agentClient.Runs.UpdateRun` |\n+| `agentClient.CreateRunStreaming` | `agentClient.Runs.CreateRunStreaming` |\n+| `agentClient.SubmitToolOutputsToRun` | `agentClient.Runs.SubmitToolOutputsToRun` |\n+| `agentClient.SubmitToolOutputsToStream` | `agentClient.Runs.SubmitToolOutputsToStream` |\n+| `agentClient.CancelRun` | `agentClient.Runs.CancelRun` |\n+\n+Run Steps Operations\n+\n+| Hub-based | Endpoint-based |\n+|-|-|\n+| `agentClient.GetRunStep` | `agentClient.Runs.GetRunStep` |\n+| `agentClient.GetRunSteps` | `agentClient.Runs.GetRunSteps` |\n+\n+Vector Stores Operations\n+\n+| Hub-based | Endpoint-based |\n+|-|-|\n+| `agentClient.CreateVectorStore` | `agentClient.VectorStores.CreateVectorStore |\n+| `agentClient.GetVectorStores` | `agentClient.VectorStores.GetVectorStores |\n+| `agentClient.GetVectorStore` | `agentClient.VectorStores.GetVectorStore |\n+| `agentClient.ModifyVectorStore` | `agentClient.VectorStores.ModifyVectorStore |\n+| `agentClient.DeleteVectorStore` | `agentClient.VectorStores.DeleteVectorStore |\n+\n+Vector Store Files Operations\n+\n+| Hub-based | Endpoint-based |\n+|-|-|\n+| `agentClient.GetVectorStoreFiles` | `agentClient.VectorStores.GetVectorStoreFiles` |\n+| `agentClient.CreateVectorStoreFile` | `agentClient.VectorStores.CreateVectorStoreFile` |\n+| `agentClient.GetVectorStoreFile` | `agentClient.VectorStores.GetVectorStoreFile` |\n+| `agentClient.DeleteVectorStoreFile` | `agentClient.VectorStores.DeleteVectorStoreFile` |\n+\n+Vector Store File Batches Operations\n+\n+| Hub-based | Endpoint-based |\n+|-|-|\n+| `agentClient.CreateVectorStoreFileBatch` | `agentClient.VectorStores.CreateVectorStoreFileBatch`|\n+| `agentClient.GetVectorStoreFileBatch` | `agentClient.VectorStores.GetVectorStoreFileBatch`|\n+| `agentClient.GetVectorSroreFileBatchFiles` | `agentClient.VectorStores.GetVectorStoreFileBatchFiles`|\n+| `agentClient.CancelVectorStoreFileBatch` | `agentClient.VectorStores.CancelVectorStoreFileBatch`|\n+\n+Files Operations\n+\n+| Hub-based | Endpoint-based |\n+|-|-|\n+| `agentClient.UploadFile` | `agentClient.Files.UploadFile` |\n+| `agentClient.GetFile` | `agentClient.Files.GetFile` |\n+| `agentClient.GetFileContent` | `agentClient.Files.GetFileContent` |\n+| `agentClient.GetFiles` | `agentClient.Files.GetFiles` |\n+| `agentClient.DeleteFile` | `agentClient.Files.DeleteFile` |\n+\n+## API changes\n+1. Create project. The connection string is replaced by the endpoint. The project endpoint URL has the form https://\\.services.ai.azure.com/api/projects/\\. It can be found in your Azure AI Foundry Project overview page.\n+\n+ **Hub-based**\n+ ```C#\n+ AIProjectClient projectClient = new (\n+ connectionString: connectionString,\n+ credential: new DefaultAzureCredential());\n+ ```\n+\n+ **Endpoint-based**\n+ ```C#\n+ AIProjectClient projectClient = new(endpoint: endpoint, credential: new DefaultAzureCredential())\n+ ```\n+2. Agents client. In the new `Azure.AI.Projects` version `1.0.0-beta.9` agents are managed using `PersistentAgentsClient` class. It can be obtained from `AIProjectClient` instance or can be created using constructor. The letter allows to provide additional options for the `PersistentAgentsClient`.\n+\n+ **Hub-based**\n+ ```C#\n+ AgentsClient agentClient = projectClient.GetAgentsClient();\n+ ```\n+\n+ **Endpoint-based**\n+ Create agent using `AIProjectClient`.\n+ ```C#\n+ AgentsClient agentClient = projectClient.GetAgentsClient();", - "comment_created_at": "2025-07-02T17:44:03+00:00", - "comment_author": "Copilot", - "comment_body": "Endpoint-based example should use `PersistentAgentsClient` and `GetPersistentAgentsClient()` rather than `AgentsClient`/`GetAgentsClient()`.\n```suggestion\n PersistentAgentsClient agentClient = projectClient.GetPersistentAgentsClient();\n```", - "pr_file_module": null - }, - { - "comment_id": "2180676387", - "repo_full_name": "Azure/azure-sdk-for-net", - "pr_number": 51023, - "pr_file": "sdk/ai/Azure.AI.Projects/AGENTS_MIGRATION_GUIDE.md", - "discussion_id": "2180635446", - "commented_code": "@@ -0,0 +1,615 @@\n+# Agents migration guide from Hub-based projects to Endpoint-based projects.\n+This guide describes migration from hub-based to Endpoint-based projects. To create a Endpoint-based project, please use one of the deployment scripts on [foundry samples repository](https://github.com/azure-ai-foundry/foundry-samples/tree/main/samples/microsoft/infrastructure-setup) appropriate for your scenario, also you can use Azure AI Foundry UI. The support of hub-based projects was dropped in `Azure.AI.Projects` version `1.0.0-beta.9`. In this document, we show the operation implementation of before `1.0.0-beta.9` in **Hub-based** secion, followed by code for `azure-ai-projects` version `1.0.0-beta.9` or later in **Endpoint-based**.\n+\n+## Installation\n+Starting from version `1.0.0-beta.9`, the operations, related to agents were moved to a separate package `Azure.AI.Agents.Persistent`. To use agents please add both `Azure.AI.Projects` and `Azure.AI.Agents.Persistent` packages into the project.\n+```\n+dotnet add package Azure.AI.Projects --version 1.0.0-beta.9\n+dotnet add package Azure.AI.Agents.Persistent --version 1.1.0-beta.3\n+```\n+\n+## Namespace changes\n+Agents were moved to a new package and namespace `Azure.AI.Agents.Persistent`.\n+\n+## Class renamings\n+| Hub-based | Endpoint-based |\n+|-|-|\n+| New in `1.0.0-beta.9` | `PersistentAgentsClient` |\n+| `AgentsClient` | `PersistentAgentsAdministrationClient` |\n+| `AgentsClientOptions` | `PersistentAgentsAdministrationClientOptions` |\n+| `Agent` | `PersistentAgent` |\n+| `AgentThread` | `PersistentAgentThread`|\n+| `ThreadMessage` | `PersistentThreadMessage` |\n+| `ThreadRun` | `ThreadRun` |\n+| `AgentFile` | `PersistentAgentFileInfo` |\n+| `VectorStore` | `PersistentAgentsVectorStore` |\n+\n+## Class structure changes\n+Here `projectClient` is `AIProjectClient` and `agentClient` is `PersistentAgentsClient` for version `1.0.0-beta.9` and `AgentsClient` for earlier versions.\n+\n+Agents Operations\n+\n+| Hub-based | Endpoint-based |\n+|-|-|\n+| `projectClient.GetAgentsClient` | `projectClient.GetPersistentAgentsClient` |\n+| `agentClient.CreateAgent` | `agentClient.Administration.CreateAgent` |\n+| `agentClient.GetAgents` | `agentClient.Administration.GetAgents` |\n+| `agentClient.GetAgent` | `agentClient.Administration..GetAgent` |\n+| `agentClient.UpdateAgent` | `agentClient.Administration.UpdateAgent` |\n+| `agentClient.Delete_agent` | `agentClient.Administration.DeleteAgent` |\n+| `agentClient.CreateThreadAndRun` | `agentClient.CreateThreadAndRun` |\n+\n+Threads Operations\n+\n+| Hub-based | Endpoint-based |\n+|-|-|\n+| `agentClient.CreateThread` | `agentClient.Threads.CreateThread` |\n+| `agentClient.GetThread` | `agentClient.Threads.GetThread` |\n+| `agentClient.UpdateThread` | `agentClient.Threads.UpdateThread` |\n+| `agentClient.GetThreads` | `agentClient.Threads.GetThreads` |\n+| `agentClient.DeleteThread` | `agentClient.Threads.DeleteThread` |\n+\n+Messages Operations\n+\n+| Hub-based | Endpoint-based |\n+|-|-|\n+| `agentClient.CreateMessage` | `agentClient.Messages.CreateMessage` |\n+| `agentClient.GetMessages` | `agentClient.Messages.GetMessages` |\n+| `agentClient.GetMessage` | `pagentClient.Messages.GetMessage` |\n+| `agentClient.UpdateMessage` | `agentClient.Messages.UpdateMessage` |\n+\n+Runs Operations\n+\n+| Hub-based | Endpoint-based |\n+|-|-|\n+| `agentClient.CreateRun` | `agentClient.Runs.CreateRun` |\n+| `agentClient.GetRun` | `agentClient.Runs.GetRun` |\n+| `agentClient.GetRuns` | `agentClient.Runs.GetRuns` |\n+| `agentClient.UpdateRun` | `agentClient.Runs.UpdateRun` |\n+| `agentClient.CreateRunStreaming` | `agentClient.Runs.CreateRunStreaming` |\n+| `agentClient.SubmitToolOutputsToRun` | `agentClient.Runs.SubmitToolOutputsToRun` |\n+| `agentClient.SubmitToolOutputsToStream` | `agentClient.Runs.SubmitToolOutputsToStream` |\n+| `agentClient.CancelRun` | `agentClient.Runs.CancelRun` |\n+\n+Run Steps Operations\n+\n+| Hub-based | Endpoint-based |\n+|-|-|\n+| `agentClient.GetRunStep` | `agentClient.Runs.GetRunStep` |\n+| `agentClient.GetRunSteps` | `agentClient.Runs.GetRunSteps` |\n+\n+Vector Stores Operations\n+\n+| Hub-based | Endpoint-based |\n+|-|-|\n+| `agentClient.CreateVectorStore` | `agentClient.VectorStores.CreateVectorStore |\n+| `agentClient.GetVectorStores` | `agentClient.VectorStores.GetVectorStores |\n+| `agentClient.GetVectorStore` | `agentClient.VectorStores.GetVectorStore |\n+| `agentClient.ModifyVectorStore` | `agentClient.VectorStores.ModifyVectorStore |\n+| `agentClient.DeleteVectorStore` | `agentClient.VectorStores.DeleteVectorStore |\n+\n+Vector Store Files Operations\n+\n+| Hub-based | Endpoint-based |\n+|-|-|\n+| `agentClient.GetVectorStoreFiles` | `agentClient.VectorStores.GetVectorStoreFiles` |\n+| `agentClient.CreateVectorStoreFile` | `agentClient.VectorStores.CreateVectorStoreFile` |\n+| `agentClient.GetVectorStoreFile` | `agentClient.VectorStores.GetVectorStoreFile` |\n+| `agentClient.DeleteVectorStoreFile` | `agentClient.VectorStores.DeleteVectorStoreFile` |\n+\n+Vector Store File Batches Operations\n+\n+| Hub-based | Endpoint-based |\n+|-|-|\n+| `agentClient.CreateVectorStoreFileBatch` | `agentClient.VectorStores.CreateVectorStoreFileBatch`|\n+| `agentClient.GetVectorStoreFileBatch` | `agentClient.VectorStores.GetVectorStoreFileBatch`|\n+| `agentClient.GetVectorSroreFileBatchFiles` | `agentClient.VectorStores.GetVectorStoreFileBatchFiles`|\n+| `agentClient.CancelVectorStoreFileBatch` | `agentClient.VectorStores.CancelVectorStoreFileBatch`|\n+\n+Files Operations\n+\n+| Hub-based | Endpoint-based |\n+|-|-|\n+| `agentClient.UploadFile` | `agentClient.Files.UploadFile` |\n+| `agentClient.GetFile` | `agentClient.Files.GetFile` |\n+| `agentClient.GetFileContent` | `agentClient.Files.GetFileContent` |\n+| `agentClient.GetFiles` | `agentClient.Files.GetFiles` |\n+| `agentClient.DeleteFile` | `agentClient.Files.DeleteFile` |\n+\n+## API changes\n+1. Create project. The connection string is replaced by the endpoint. The project endpoint URL has the form https://\\.services.ai.azure.com/api/projects/\\. It can be found in your Azure AI Foundry Project overview page.\n+\n+ **Hub-based**\n+ ```C#\n+ AIProjectClient projectClient = new (\n+ connectionString: connectionString,\n+ credential: new DefaultAzureCredential());\n+ ```\n+\n+ **Endpoint-based**\n+ ```C#\n+ AIProjectClient projectClient = new(endpoint: endpoint, credential: new DefaultAzureCredential())\n+ ```\n+2. Agents client. In the new `Azure.AI.Projects` version `1.0.0-beta.9` agents are managed using `PersistentAgentsClient` class. It can be obtained from `AIProjectClient` instance or can be created using constructor. The letter allows to provide additional options for the `PersistentAgentsClient`.\n+\n+ **Hub-based**\n+ ```C#\n+ AgentsClient agentClient = projectClient.GetAgentsClient();\n+ ```\n+\n+ **Endpoint-based**\n+ Create agent using `AIProjectClient`.\n+ ```C#\n+ AgentsClient agentClient = projectClient.GetAgentsClient();", - "comment_created_at": "2025-07-02T18:08:51+00:00", - "comment_author": "nick863", - "comment_body": "Fixed.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2170502909", - "pr_number": 50575, - "pr_file": ".github/copilot-instructions.md", - "created_at": "2025-06-27T01:47:40+00:00", - "commented_code": "# Repository information\nNote that files in this repository are generally organized in the following way:\n- `azure-sdk-for-net/sdk/{service-directory}/{package-name}` holds everything for a specific Azure SDK package.\n- `azure-sdk-for-net/sdk/{service-directory}/{package-name}/src` holds the source code for the package.\n- `azure-sdk-for-net/sdk/{service-directory}/{package-name}/tests` holds the tests for the package.\n- `azure-sdk-for-net/sdk/{service-directory}/{package-name}/samples` holds the samples for the package.\n\nThere are a few exceptions where package-name is replaced with a shorter directory name. For example in the cognitiveservices directory. The package `Microsoft.Azure.CognitiveServices.Language.SpellCheck` can be found in `azure-sdk-for-net/sdk/cognitiveservices/Language.SpellCheck`. When in doubt, you can look at the name of the .csproj file within the src folder to determine the package name.\n\n## Definitions:\n- \"service directory\" refers to the folder under `sdk`. For example, `azure-sdk-for-net/sdk/eventhub`, `eventhub` is the service directory\n- \"data plane\" refers to packages that don't include `ResourceManager` in the package name. They are used to interact with azure resources at application run time.\n- \"management plane\" refers to packages that include `ResourceManager` in the package name. They are used to manage (create/modify/delete) azure resources.\n- \"track 2\" refers to packages that start with `Azure`. Unless otherwise specified, assume that references to \"data plane\" mean \"track 2 data plane\", i.e. packages that start with `Azure` and don't include `ResourceManager` in the package name. Unless otherwise specified, assume that references to \"management plane\" mean \"track 2 management plane\", i.e. packages that start with `Azure.ResourceManager`.\n- \"functions extensions packages\" or sometimes just \"extensions packages\" refers to packages that start with `Microsoft.Azure.WebJobs.Extensions`. They are built on data plane packages and are used with Azure Functions.\n\n# Requirements\n- If you are writing C# code within the `azure-sdk-for-net/sdk` directory:\n 1. Follow the coding guidelines in the \"Coding guidelines\" section below.\n 2. You should never manually make changes to `*/Generated/*` files, e.g. `azure-sdk-for-net/sdk/containerregistry/Azure.Containers.ContainerRegistry/src/Generated/`\n - Only re-generate these files if instructed to do so. If you are instructed to regenerate an SDK, use `dotnet build /t:GenerateCode`\n - If you feel like you need to make changes to these files beyond re-generating them in order to complete your task, do not do this, instead see if you can work around the problem in the code that is not in the `Generated` folder. If you can't, report this to the user.\n 3. Code should build successfully using the following steps:\n - navigate to the root of the repository and run `dotnet build eng\\service.proj /p:ServiceDirectory={service-directory}` for each service directory you modified. For example, if you modified code in `sdk/eventhub` and `sdk/keyvault`, you would run:\n `dotnet build eng\\service.proj /p:ServiceDirectory=eventhub` and `dotnet build eng\\service.proj /p:ServiceDirectory=keyvault`\n - If you see build errors, try to fix them, if you can't fix them within 5 iterations, give up, do not do steps 4 or 5, and report this to the user. Do not report success if the build fails!\n 4. Once the code builds, run the unit tests using `dotnet test eng/service.proj /p:ServiceDirectory={service-directory} --filter TestCategory!=Live` for each service directory you modified. Try to fix failures if you can within 5 iterations. If you can't, give up and report this to the user. Do not report success if the tests fail!", - "repo_full_name": "Azure/azure-sdk-for-net", - "discussion_comments": [ - { - "comment_id": "2170502909", - "repo_full_name": "Azure/azure-sdk-for-net", - "pr_number": 50575, - "pr_file": ".github/copilot-instructions.md", - "discussion_id": "2170502909", - "commented_code": "@@ -0,0 +1,34 @@\n+# Repository information\n+Note that files in this repository are generally organized in the following way:\n+- `azure-sdk-for-net/sdk/{service-directory}/{package-name}` holds everything for a specific Azure SDK package.\n+- `azure-sdk-for-net/sdk/{service-directory}/{package-name}/src` holds the source code for the package.\n+- `azure-sdk-for-net/sdk/{service-directory}/{package-name}/tests` holds the tests for the package.\n+- `azure-sdk-for-net/sdk/{service-directory}/{package-name}/samples` holds the samples for the package.\n+\n+There are a few exceptions where package-name is replaced with a shorter directory name. For example in the cognitiveservices directory. The package `Microsoft.Azure.CognitiveServices.Language.SpellCheck` can be found in `azure-sdk-for-net/sdk/cognitiveservices/Language.SpellCheck`. When in doubt, you can look at the name of the .csproj file within the src folder to determine the package name.\n+\n+## Definitions:\n+- \"service directory\" refers to the folder under `sdk`. For example, `azure-sdk-for-net/sdk/eventhub`, `eventhub` is the service directory\n+- \"data plane\" refers to packages that don't include `ResourceManager` in the package name. They are used to interact with azure resources at application run time.\n+- \"management plane\" refers to packages that include `ResourceManager` in the package name. They are used to manage (create/modify/delete) azure resources.\n+- \"track 2\" refers to packages that start with `Azure`. Unless otherwise specified, assume that references to \"data plane\" mean \"track 2 data plane\", i.e. packages that start with `Azure` and don't include `ResourceManager` in the package name. Unless otherwise specified, assume that references to \"management plane\" mean \"track 2 management plane\", i.e. packages that start with `Azure.ResourceManager`.\n+- \"functions extensions packages\" or sometimes just \"extensions packages\" refers to packages that start with `Microsoft.Azure.WebJobs.Extensions`. They are built on data plane packages and are used with Azure Functions.\n+\n+# Requirements\n+- If you are writing C# code within the `azure-sdk-for-net/sdk` directory:\n+ 1. Follow the coding guidelines in the \"Coding guidelines\" section below.\n+ 2. You should never manually make changes to `*/Generated/*` files, e.g. `azure-sdk-for-net/sdk/containerregistry/Azure.Containers.ContainerRegistry/src/Generated/`\n+ - Only re-generate these files if instructed to do so. If you are instructed to regenerate an SDK, use `dotnet build /t:GenerateCode`\n+ - If you feel like you need to make changes to these files beyond re-generating them in order to complete your task, do not do this, instead see if you can work around the problem in the code that is not in the `Generated` folder. If you can't, report this to the user.\n+ 3. Code should build successfully using the following steps:\n+ - navigate to the root of the repository and run `dotnet build eng\\service.proj /p:ServiceDirectory={service-directory}` for each service directory you modified. For example, if you modified code in `sdk/eventhub` and `sdk/keyvault`, you would run:\n+ `dotnet build eng\\service.proj /p:ServiceDirectory=eventhub` and `dotnet build eng\\service.proj /p:ServiceDirectory=keyvault`\n+ - If you see build errors, try to fix them, if you can't fix them within 5 iterations, give up, do not do steps 4 or 5, and report this to the user. Do not report success if the build fails!\n+ 4. Once the code builds, run the unit tests using `dotnet test eng/service.proj /p:ServiceDirectory={service-directory} --filter TestCategory!=Live` for each service directory you modified. Try to fix failures if you can within 5 iterations. If you can't, give up and report this to the user. Do not report success if the tests fail!", - "comment_created_at": "2025-06-27T01:47:40+00:00", - "comment_author": "jsquire", - "comment_body": "We should consider adding one that instructs it to avoid making any breaking change to a public API and avoid adding public API members unless explicitly instructed to do so.", - "pr_file_module": null - }, - { - "comment_id": "2172760547", - "repo_full_name": "Azure/azure-sdk-for-net", - "pr_number": 50575, - "pr_file": ".github/copilot-instructions.md", - "discussion_id": "2170502909", - "commented_code": "@@ -0,0 +1,34 @@\n+# Repository information\n+Note that files in this repository are generally organized in the following way:\n+- `azure-sdk-for-net/sdk/{service-directory}/{package-name}` holds everything for a specific Azure SDK package.\n+- `azure-sdk-for-net/sdk/{service-directory}/{package-name}/src` holds the source code for the package.\n+- `azure-sdk-for-net/sdk/{service-directory}/{package-name}/tests` holds the tests for the package.\n+- `azure-sdk-for-net/sdk/{service-directory}/{package-name}/samples` holds the samples for the package.\n+\n+There are a few exceptions where package-name is replaced with a shorter directory name. For example in the cognitiveservices directory. The package `Microsoft.Azure.CognitiveServices.Language.SpellCheck` can be found in `azure-sdk-for-net/sdk/cognitiveservices/Language.SpellCheck`. When in doubt, you can look at the name of the .csproj file within the src folder to determine the package name.\n+\n+## Definitions:\n+- \"service directory\" refers to the folder under `sdk`. For example, `azure-sdk-for-net/sdk/eventhub`, `eventhub` is the service directory\n+- \"data plane\" refers to packages that don't include `ResourceManager` in the package name. They are used to interact with azure resources at application run time.\n+- \"management plane\" refers to packages that include `ResourceManager` in the package name. They are used to manage (create/modify/delete) azure resources.\n+- \"track 2\" refers to packages that start with `Azure`. Unless otherwise specified, assume that references to \"data plane\" mean \"track 2 data plane\", i.e. packages that start with `Azure` and don't include `ResourceManager` in the package name. Unless otherwise specified, assume that references to \"management plane\" mean \"track 2 management plane\", i.e. packages that start with `Azure.ResourceManager`.\n+- \"functions extensions packages\" or sometimes just \"extensions packages\" refers to packages that start with `Microsoft.Azure.WebJobs.Extensions`. They are built on data plane packages and are used with Azure Functions.\n+\n+# Requirements\n+- If you are writing C# code within the `azure-sdk-for-net/sdk` directory:\n+ 1. Follow the coding guidelines in the \"Coding guidelines\" section below.\n+ 2. You should never manually make changes to `*/Generated/*` files, e.g. `azure-sdk-for-net/sdk/containerregistry/Azure.Containers.ContainerRegistry/src/Generated/`\n+ - Only re-generate these files if instructed to do so. If you are instructed to regenerate an SDK, use `dotnet build /t:GenerateCode`\n+ - If you feel like you need to make changes to these files beyond re-generating them in order to complete your task, do not do this, instead see if you can work around the problem in the code that is not in the `Generated` folder. If you can't, report this to the user.\n+ 3. Code should build successfully using the following steps:\n+ - navigate to the root of the repository and run `dotnet build eng\\service.proj /p:ServiceDirectory={service-directory}` for each service directory you modified. For example, if you modified code in `sdk/eventhub` and `sdk/keyvault`, you would run:\n+ `dotnet build eng\\service.proj /p:ServiceDirectory=eventhub` and `dotnet build eng\\service.proj /p:ServiceDirectory=keyvault`\n+ - If you see build errors, try to fix them, if you can't fix them within 5 iterations, give up, do not do steps 4 or 5, and report this to the user. Do not report success if the build fails!\n+ 4. Once the code builds, run the unit tests using `dotnet test eng/service.proj /p:ServiceDirectory={service-directory} --filter TestCategory!=Live` for each service directory you modified. Try to fix failures if you can within 5 iterations. If you can't, give up and report this to the user. Do not report success if the tests fail!", - "comment_created_at": "2025-06-27T19:47:02+00:00", - "comment_author": "m-redding", - "comment_body": "I added one below, I'll test it over the next couple days and see if the copilot follows it", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2170843437", - "pr_number": 50712, - "pr_file": "sdk/terraform/Azure.ResourceManager.Terraform/CHANGELOG.md", - "created_at": "2025-06-27T05:47:43+00:00", - "commented_code": "# Release History\n\n## 1.0.0-beta.2 (Unreleased)\n## 1.0.0-beta.2 (2025-06-19)\n\n### Features Added\n\n### Breaking Changes\n- **Support for Exclusion Filters in Export Operations**:\n - Added `ExcludeAzureResource` and `ExcludeTerraformResource` properties to export parameter models,\n these allow users to exclude resources from being exported based on Azure resource ID patterns or Terraform resource types.\n\n### Bugs Fixed\n- **Authorization Scope Filter Support**:\n - Introduced the `AuthorizationScopeFilter` struct, enabling fine-grained control over the scope of Azure Resource Graph queries during export.", - "repo_full_name": "Azure/azure-sdk-for-net", - "discussion_comments": [ - { - "comment_id": "2170843437", - "repo_full_name": "Azure/azure-sdk-for-net", - "pr_number": 50712, - "pr_file": "sdk/terraform/Azure.ResourceManager.Terraform/CHANGELOG.md", - "discussion_id": "2170843437", - "commented_code": "@@ -1,14 +1,21 @@\n # Release History\n \n-## 1.0.0-beta.2 (Unreleased)\n+## 1.0.0-beta.2 (2025-06-19)\n \n ### Features Added\n \n-### Breaking Changes\n+- **Support for Exclusion Filters in Export Operations**:\n+ - Added `ExcludeAzureResource` and `ExcludeTerraformResource` properties to export parameter models,\n+ these allow users to exclude resources from being exported based on Azure resource ID patterns or Terraform resource types.\n \n-### Bugs Fixed\n+- **Authorization Scope Filter Support**:\n+ - Introduced the `AuthorizationScopeFilter` struct, enabling fine-grained control over the scope of Azure Resource Graph queries during export.", - "comment_created_at": "2025-06-27T05:47:43+00:00", - "comment_author": "ArcturusZhang", - "comment_body": "```suggestion\r\n - Introduced the `TerraformAuthorizationScopeFilter` struct, enabling fine-grained control over the scope of Azure Resource Graph queries during export.\r\n```", - "pr_file_module": null - }, - { - "comment_id": "2170860019", - "repo_full_name": "Azure/azure-sdk-for-net", - "pr_number": 50712, - "pr_file": "sdk/terraform/Azure.ResourceManager.Terraform/CHANGELOG.md", - "discussion_id": "2170843437", - "commented_code": "@@ -1,14 +1,21 @@\n # Release History\n \n-## 1.0.0-beta.2 (Unreleased)\n+## 1.0.0-beta.2 (2025-06-19)\n \n ### Features Added\n \n-### Breaking Changes\n+- **Support for Exclusion Filters in Export Operations**:\n+ - Added `ExcludeAzureResource` and `ExcludeTerraformResource` properties to export parameter models,\n+ these allow users to exclude resources from being exported based on Azure resource ID patterns or Terraform resource types.\n \n-### Bugs Fixed\n+- **Authorization Scope Filter Support**:\n+ - Introduced the `AuthorizationScopeFilter` struct, enabling fine-grained control over the scope of Azure Resource Graph queries during export.", - "comment_created_at": "2025-06-27T05:55:40+00:00", - "comment_author": "gerrytan", - "comment_body": "Fixed", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2058806603", - "pr_number": 49293, - "pr_file": "sdk/ai/Azure.AI.Projects/README.md", - "created_at": "2025-04-24T16:12:44+00:00", - "commented_code": "while (toolOutputs.Count > 0);\n```\n\n#### Function call executed automatically\n\nIn addition to the manual function calls, SDK supports automatic function calling. Here is the example:\n\nHere we use other functions for demonstration:\n```C# Snippet:StreamingWithAutoFunctionCall_DefineFunctions\nprivate class Address\n{\n public string Street { get; set; }\n public string City { get; set; }\n}\n\nprivate int GetHumidityByAddress(Address address)\n{\n return (address.City == \"Seattle\") ? 60 : 80;\n}\n\nprivate string[] GetWeatherByAddresses(Dictionary[] addresses, string unit = \"F\")", - "repo_full_name": "Azure/azure-sdk-for-net", - "discussion_comments": [ - { - "comment_id": "2058806603", - "repo_full_name": "Azure/azure-sdk-for-net", - "pr_number": 49293, - "pr_file": "sdk/ai/Azure.AI.Projects/README.md", - "discussion_id": "2058806603", - "commented_code": "@@ -647,6 +648,172 @@ do\n while (toolOutputs.Count > 0);\n ```\n \n+#### Function call executed automatically\n+\n+In addition to the manual function calls, SDK supports automatic function calling. Here is the example:\n+\n+Here we use other functions for demonstration:\n+```C# Snippet:StreamingWithAutoFunctionCall_DefineFunctions\n+private class Address\n+{\n+ public string Street { get; set; }\n+ public string City { get; set; }\n+}\n+\n+private int GetHumidityByAddress(Address address)\n+{\n+ return (address.City == \"Seattle\") ? 60 : 80;\n+}\n+\n+private string[] GetWeatherByAddresses(Dictionary[] addresses, string unit = \"F\")", - "comment_created_at": "2025-04-24T16:12:44+00:00", - "comment_author": "KrzysztofCwalina", - "comment_body": "Why do you get JSON parsed into Dictionary here? If we don't have time to add higher level helpers, we should just keep using raw JSON, i.e. UTF8 bytes wrapped in BinaryData. Then a reoutine like this would simply use one of the JSON parsers to parse the addresses and unit. \r\n\r\nAlso, long term we really need to be able to create methods like this that take Address[] as a parameter. Will we be able to do it? i.e. how will your code know whether the delegate accepts JSON or Address[]?", - "pr_file_module": null - }, - { - "comment_id": "2058955957", - "repo_full_name": "Azure/azure-sdk-for-net", - "pr_number": 49293, - "pr_file": "sdk/ai/Azure.AI.Projects/README.md", - "discussion_id": "2058806603", - "commented_code": "@@ -647,6 +648,172 @@ do\n while (toolOutputs.Count > 0);\n ```\n \n+#### Function call executed automatically\n+\n+In addition to the manual function calls, SDK supports automatic function calling. Here is the example:\n+\n+Here we use other functions for demonstration:\n+```C# Snippet:StreamingWithAutoFunctionCall_DefineFunctions\n+private class Address\n+{\n+ public string Street { get; set; }\n+ public string City { get; set; }\n+}\n+\n+private int GetHumidityByAddress(Address address)\n+{\n+ return (address.City == \"Seattle\") ? 60 : 80;\n+}\n+\n+private string[] GetWeatherByAddresses(Dictionary[] addresses, string unit = \"F\")", - "comment_created_at": "2025-04-24T17:51:45+00:00", - "comment_author": "howieleung", - "comment_body": "I am just showing developers that we support Dictionary type. I can change to Address[]", - "pr_file_module": null - }, - { - "comment_id": "2059163033", - "repo_full_name": "Azure/azure-sdk-for-net", - "pr_number": 49293, - "pr_file": "sdk/ai/Azure.AI.Projects/README.md", - "discussion_id": "2058806603", - "commented_code": "@@ -647,6 +648,172 @@ do\n while (toolOutputs.Count > 0);\n ```\n \n+#### Function call executed automatically\n+\n+In addition to the manual function calls, SDK supports automatic function calling. Here is the example:\n+\n+Here we use other functions for demonstration:\n+```C# Snippet:StreamingWithAutoFunctionCall_DefineFunctions\n+private class Address\n+{\n+ public string Street { get; set; }\n+ public string City { get; set; }\n+}\n+\n+private int GetHumidityByAddress(Address address)\n+{\n+ return (address.City == \"Seattle\") ? 60 : 80;\n+}\n+\n+private string[] GetWeatherByAddresses(Dictionary[] addresses, string unit = \"F\")", - "comment_created_at": "2025-04-24T20:17:52+00:00", - "comment_author": "KrzysztofCwalina", - "comment_body": "I don't think we should support dictionary like that. It just adds complexity and is not a nice API for the users", - "pr_file_module": null - }, - { - "comment_id": "2060553553", - "repo_full_name": "Azure/azure-sdk-for-net", - "pr_number": 49293, - "pr_file": "sdk/ai/Azure.AI.Projects/README.md", - "discussion_id": "2058806603", - "commented_code": "@@ -647,6 +648,172 @@ do\n while (toolOutputs.Count > 0);\n ```\n \n+#### Function call executed automatically\n+\n+In addition to the manual function calls, SDK supports automatic function calling. Here is the example:\n+\n+Here we use other functions for demonstration:\n+```C# Snippet:StreamingWithAutoFunctionCall_DefineFunctions\n+private class Address\n+{\n+ public string Street { get; set; }\n+ public string City { get; set; }\n+}\n+\n+private int GetHumidityByAddress(Address address)\n+{\n+ return (address.City == \"Seattle\") ? 60 : 80;\n+}\n+\n+private string[] GetWeatherByAddresses(Dictionary[] addresses, string unit = \"F\")", - "comment_created_at": "2025-04-25T16:33:17+00:00", - "comment_author": "howieleung", - "comment_body": "Took it off.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2058822771", - "pr_number": 49293, - "pr_file": "sdk/ai/Azure.AI.Projects/README.md", - "created_at": "2025-04-24T16:22:52+00:00", - "commented_code": "while (toolOutputs.Count > 0);\n```\n\n#### Function call executed automatically\n\nIn addition to the manual function calls, SDK supports automatic function calling. Here is the example:\n\nHere we use other functions for demonstration:\n```C# Snippet:StreamingWithAutoFunctionCall_DefineFunctions\nprivate class Address\n{\n public string Street { get; set; }\n public string City { get; set; }\n}\n\nprivate int GetHumidityByAddress(Address address)\n{\n return (address.City == \"Seattle\") ? 60 : 80;\n}\n\nprivate string[] GetWeatherByAddresses(Dictionary[] addresses, string unit = \"F\")\n{\n string[] temps = new string[addresses.Length];\n for (int i = 0; i < addresses.Length; i++)\n {\n if (addresses[i].TryGetValue(\"city\", out string city))\n {\n temps[i] = string.Format(\"{0}{1}\", (city == \"Seattle\") ? \"20\" : \"50\", unit);\n }\n else\n {\n throw new ArgumentException(\"Each address must contain 'street' and 'city' keys.\");\n }\n }\n return temps;\n}\n```\nNow we define the function definitions:\n```C# Snippet:StreamingWithAutoFunctionCall_DefineFunctionTools\nprivate FunctionToolDefinition geHhumidityByAddressTool = new(\n name: \"GetHumidityByAddress\",\n description: \"Get humidity by street and city\",\n parameters: BinaryData.FromObjectAsJson(\n new\n {\n Type = \"object\",\n Properties = new\n {\n Address = new\n {\n Type = \"object\",\n Properties = new\n {\n Street = new\n {\n Type = \"string\",\n Description = \"Street\"\n },\n City = new\n {\n Type = \"string\",\n Description = \"city\"\n },\n },\n Required = new[] { \"street\", \"city\" }\n }\n },\n Required = new[] { \"address\" }\n },\n new JsonSerializerOptions() { PropertyNamingPolicy = JsonNamingPolicy.CamelCase }));\n\nprivate FunctionToolDefinition getWeatherByAddressesTool = new(\n name: \"GetWeatherByAddresses\",\n description: \"Get weather by street and city\",\n parameters: BinaryData.FromObjectAsJson(\n new\n {\n Type = \"object\",\n Properties = new\n {\n Addresses = new\n {\n Type = \"array\",\n Description = \"A list of addresses\",\n Items = new\n {\n Type = \"object\",\n Properties = new\n {\n Street = new\n {\n Type = \"string\",\n Description = \"Street\"\n },\n City = new\n {\n Type = \"string\",\n description = \"city\"\n },\n },\n Required = new[] { \"street\", \"city\" }\n }\n },\n Unit = new\n {\n Type = \"string\",\n Enum = new[] { \"c\", \"f\" },\n },\n },\n Required = new[] { \"addresses\" }\n },\n new JsonSerializerOptions() { PropertyNamingPolicy = JsonNamingPolicy.CamelCase }));\n```\nUse `EnableAutoFunctionCall` to enable the auto function call:\n```C# Snippet:StreamingWithAutoFunctionCall_EnableAutoFunctionCalls\nList toolOutputs = new();\nDictionary delegates = new();\ndelegates.Add(nameof(GetWeatherByAddresses), GetWeatherByAddresses);\ndelegates.Add(nameof(GetHumidityByAddress), GetHumidityByAddress);\nAIProjectClientOptions options = new();\noptions.EnableAutoFunctionCalls(delegates);\n\nAgentsClient client = new(connectionString, new DefaultAzureCredential(), options);\n```\n\n\nWhen you create an agent, you can specify the function call by tools argument similar to the example of manual function calls:\n```C# Snippet:StreamingWithAutoFunctionCall_CreateAgent\nAgent agent = client.CreateAgent(\n model: modelDeploymentName,\n name: \"SDK Test Agent - Functions\",\n instructions: \"You are a weather bot. Use the provided functions to help answer questions. \"\n + \"Customize your responses to the user's preferences as much as possible and use friendly \"\n + \"nicknames for cities whenever possible.\",\n tools: [getWeatherByAddressesTool, geHhumidityByAddressTool]\n);\n```\n\nWe create a thread and message similar to the example of manual function tool calls:\n```C# Snippet:StreamingWithAutoFunctionCall_CreateThreadMessage\nAgentThread thread = client.CreateThread();\n\nThreadMessage message = client.CreateMessage(\n thread.Id,\n MessageRole.User,\n \"Get weather and humidity for street, 123 1st St in city, Seattle and street, 456 2nd Ave in city, Bellevue\");\n```\n\nThe agent will then call the function automatically when it is needed during `CreateRunStreamingAsync`:", - "repo_full_name": "Azure/azure-sdk-for-net", - "discussion_comments": [ - { - "comment_id": "2058822771", - "repo_full_name": "Azure/azure-sdk-for-net", - "pr_number": 49293, - "pr_file": "sdk/ai/Azure.AI.Projects/README.md", - "discussion_id": "2058822771", - "commented_code": "@@ -647,6 +648,172 @@ do\n while (toolOutputs.Count > 0);\n ```\n \n+#### Function call executed automatically\n+\n+In addition to the manual function calls, SDK supports automatic function calling. Here is the example:\n+\n+Here we use other functions for demonstration:\n+```C# Snippet:StreamingWithAutoFunctionCall_DefineFunctions\n+private class Address\n+{\n+ public string Street { get; set; }\n+ public string City { get; set; }\n+}\n+\n+private int GetHumidityByAddress(Address address)\n+{\n+ return (address.City == \"Seattle\") ? 60 : 80;\n+}\n+\n+private string[] GetWeatherByAddresses(Dictionary[] addresses, string unit = \"F\")\n+{\n+ string[] temps = new string[addresses.Length];\n+ for (int i = 0; i < addresses.Length; i++)\n+ {\n+ if (addresses[i].TryGetValue(\"city\", out string city))\n+ {\n+ temps[i] = string.Format(\"{0}{1}\", (city == \"Seattle\") ? \"20\" : \"50\", unit);\n+ }\n+ else\n+ {\n+ throw new ArgumentException(\"Each address must contain 'street' and 'city' keys.\");\n+ }\n+ }\n+ return temps;\n+}\n+```\n+Now we define the function definitions:\n+```C# Snippet:StreamingWithAutoFunctionCall_DefineFunctionTools\n+private FunctionToolDefinition geHhumidityByAddressTool = new(\n+ name: \"GetHumidityByAddress\",\n+ description: \"Get humidity by street and city\",\n+ parameters: BinaryData.FromObjectAsJson(\n+ new\n+ {\n+ Type = \"object\",\n+ Properties = new\n+ {\n+ Address = new\n+ {\n+ Type = \"object\",\n+ Properties = new\n+ {\n+ Street = new\n+ {\n+ Type = \"string\",\n+ Description = \"Street\"\n+ },\n+ City = new\n+ {\n+ Type = \"string\",\n+ Description = \"city\"\n+ },\n+ },\n+ Required = new[] { \"street\", \"city\" }\n+ }\n+ },\n+ Required = new[] { \"address\" }\n+ },\n+ new JsonSerializerOptions() { PropertyNamingPolicy = JsonNamingPolicy.CamelCase }));\n+\n+private FunctionToolDefinition getWeatherByAddressesTool = new(\n+ name: \"GetWeatherByAddresses\",\n+ description: \"Get weather by street and city\",\n+ parameters: BinaryData.FromObjectAsJson(\n+ new\n+ {\n+ Type = \"object\",\n+ Properties = new\n+ {\n+ Addresses = new\n+ {\n+ Type = \"array\",\n+ Description = \"A list of addresses\",\n+ Items = new\n+ {\n+ Type = \"object\",\n+ Properties = new\n+ {\n+ Street = new\n+ {\n+ Type = \"string\",\n+ Description = \"Street\"\n+ },\n+ City = new\n+ {\n+ Type = \"string\",\n+ description = \"city\"\n+ },\n+ },\n+ Required = new[] { \"street\", \"city\" }\n+ }\n+ },\n+ Unit = new\n+ {\n+ Type = \"string\",\n+ Enum = new[] { \"c\", \"f\" },\n+ },\n+ },\n+ Required = new[] { \"addresses\" }\n+ },\n+ new JsonSerializerOptions() { PropertyNamingPolicy = JsonNamingPolicy.CamelCase }));\n+```\n+Use `EnableAutoFunctionCall` to enable the auto function call:\n+```C# Snippet:StreamingWithAutoFunctionCall_EnableAutoFunctionCalls\n+List toolOutputs = new();\n+Dictionary delegates = new();\n+delegates.Add(nameof(GetWeatherByAddresses), GetWeatherByAddresses);\n+delegates.Add(nameof(GetHumidityByAddress), GetHumidityByAddress);\n+AIProjectClientOptions options = new();\n+options.EnableAutoFunctionCalls(delegates);\n+\n+AgentsClient client = new(connectionString, new DefaultAzureCredential(), options);\n+```\n+\n+\n+When you create an agent, you can specify the function call by tools argument similar to the example of manual function calls:\n+```C# Snippet:StreamingWithAutoFunctionCall_CreateAgent\n+Agent agent = client.CreateAgent(\n+ model: modelDeploymentName,\n+ name: \"SDK Test Agent - Functions\",\n+ instructions: \"You are a weather bot. Use the provided functions to help answer questions. \"\n+ + \"Customize your responses to the user's preferences as much as possible and use friendly \"\n+ + \"nicknames for cities whenever possible.\",\n+ tools: [getWeatherByAddressesTool, geHhumidityByAddressTool]\n+);\n+```\n+\n+We create a thread and message similar to the example of manual function tool calls:\n+```C# Snippet:StreamingWithAutoFunctionCall_CreateThreadMessage\n+AgentThread thread = client.CreateThread();\n+\n+ThreadMessage message = client.CreateMessage(\n+ thread.Id,\n+ MessageRole.User,\n+ \"Get weather and humidity for street, 123 1st St in city, Seattle and street, 456 2nd Ave in city, Bellevue\");\n+```\n+\n+The agent will then call the function automatically when it is needed during `CreateRunStreamingAsync`:", - "comment_created_at": "2025-04-24T16:22:52+00:00", - "comment_author": "KrzysztofCwalina", - "comment_body": "do we really need streaming runs here? Can we just use a simple completion?", - "pr_file_module": null - }, - { - "comment_id": "2058964616", - "repo_full_name": "Azure/azure-sdk-for-net", - "pr_number": 49293, - "pr_file": "sdk/ai/Azure.AI.Projects/README.md", - "discussion_id": "2058822771", - "commented_code": "@@ -647,6 +648,172 @@ do\n while (toolOutputs.Count > 0);\n ```\n \n+#### Function call executed automatically\n+\n+In addition to the manual function calls, SDK supports automatic function calling. Here is the example:\n+\n+Here we use other functions for demonstration:\n+```C# Snippet:StreamingWithAutoFunctionCall_DefineFunctions\n+private class Address\n+{\n+ public string Street { get; set; }\n+ public string City { get; set; }\n+}\n+\n+private int GetHumidityByAddress(Address address)\n+{\n+ return (address.City == \"Seattle\") ? 60 : 80;\n+}\n+\n+private string[] GetWeatherByAddresses(Dictionary[] addresses, string unit = \"F\")\n+{\n+ string[] temps = new string[addresses.Length];\n+ for (int i = 0; i < addresses.Length; i++)\n+ {\n+ if (addresses[i].TryGetValue(\"city\", out string city))\n+ {\n+ temps[i] = string.Format(\"{0}{1}\", (city == \"Seattle\") ? \"20\" : \"50\", unit);\n+ }\n+ else\n+ {\n+ throw new ArgumentException(\"Each address must contain 'street' and 'city' keys.\");\n+ }\n+ }\n+ return temps;\n+}\n+```\n+Now we define the function definitions:\n+```C# Snippet:StreamingWithAutoFunctionCall_DefineFunctionTools\n+private FunctionToolDefinition geHhumidityByAddressTool = new(\n+ name: \"GetHumidityByAddress\",\n+ description: \"Get humidity by street and city\",\n+ parameters: BinaryData.FromObjectAsJson(\n+ new\n+ {\n+ Type = \"object\",\n+ Properties = new\n+ {\n+ Address = new\n+ {\n+ Type = \"object\",\n+ Properties = new\n+ {\n+ Street = new\n+ {\n+ Type = \"string\",\n+ Description = \"Street\"\n+ },\n+ City = new\n+ {\n+ Type = \"string\",\n+ Description = \"city\"\n+ },\n+ },\n+ Required = new[] { \"street\", \"city\" }\n+ }\n+ },\n+ Required = new[] { \"address\" }\n+ },\n+ new JsonSerializerOptions() { PropertyNamingPolicy = JsonNamingPolicy.CamelCase }));\n+\n+private FunctionToolDefinition getWeatherByAddressesTool = new(\n+ name: \"GetWeatherByAddresses\",\n+ description: \"Get weather by street and city\",\n+ parameters: BinaryData.FromObjectAsJson(\n+ new\n+ {\n+ Type = \"object\",\n+ Properties = new\n+ {\n+ Addresses = new\n+ {\n+ Type = \"array\",\n+ Description = \"A list of addresses\",\n+ Items = new\n+ {\n+ Type = \"object\",\n+ Properties = new\n+ {\n+ Street = new\n+ {\n+ Type = \"string\",\n+ Description = \"Street\"\n+ },\n+ City = new\n+ {\n+ Type = \"string\",\n+ description = \"city\"\n+ },\n+ },\n+ Required = new[] { \"street\", \"city\" }\n+ }\n+ },\n+ Unit = new\n+ {\n+ Type = \"string\",\n+ Enum = new[] { \"c\", \"f\" },\n+ },\n+ },\n+ Required = new[] { \"addresses\" }\n+ },\n+ new JsonSerializerOptions() { PropertyNamingPolicy = JsonNamingPolicy.CamelCase }));\n+```\n+Use `EnableAutoFunctionCall` to enable the auto function call:\n+```C# Snippet:StreamingWithAutoFunctionCall_EnableAutoFunctionCalls\n+List toolOutputs = new();\n+Dictionary delegates = new();\n+delegates.Add(nameof(GetWeatherByAddresses), GetWeatherByAddresses);\n+delegates.Add(nameof(GetHumidityByAddress), GetHumidityByAddress);\n+AIProjectClientOptions options = new();\n+options.EnableAutoFunctionCalls(delegates);\n+\n+AgentsClient client = new(connectionString, new DefaultAzureCredential(), options);\n+```\n+\n+\n+When you create an agent, you can specify the function call by tools argument similar to the example of manual function calls:\n+```C# Snippet:StreamingWithAutoFunctionCall_CreateAgent\n+Agent agent = client.CreateAgent(\n+ model: modelDeploymentName,\n+ name: \"SDK Test Agent - Functions\",\n+ instructions: \"You are a weather bot. Use the provided functions to help answer questions. \"\n+ + \"Customize your responses to the user's preferences as much as possible and use friendly \"\n+ + \"nicknames for cities whenever possible.\",\n+ tools: [getWeatherByAddressesTool, geHhumidityByAddressTool]\n+);\n+```\n+\n+We create a thread and message similar to the example of manual function tool calls:\n+```C# Snippet:StreamingWithAutoFunctionCall_CreateThreadMessage\n+AgentThread thread = client.CreateThread();\n+\n+ThreadMessage message = client.CreateMessage(\n+ thread.Id,\n+ MessageRole.User,\n+ \"Get weather and humidity for street, 123 1st St in city, Seattle and street, 456 2nd Ave in city, Bellevue\");\n+```\n+\n+The agent will then call the function automatically when it is needed during `CreateRunStreamingAsync`:", - "comment_created_at": "2025-04-24T17:56:02+00:00", - "comment_author": "howieleung", - "comment_body": "Auto function calling only support for streaming. For non-streaming, they need to have a while loop to call getRun. When the run say required_action, they need to call the function manually.", - "pr_file_module": null - }, - { - "comment_id": "2059165687", - "repo_full_name": "Azure/azure-sdk-for-net", - "pr_number": 49293, - "pr_file": "sdk/ai/Azure.AI.Projects/README.md", - "discussion_id": "2058822771", - "commented_code": "@@ -647,6 +648,172 @@ do\n while (toolOutputs.Count > 0);\n ```\n \n+#### Function call executed automatically\n+\n+In addition to the manual function calls, SDK supports automatic function calling. Here is the example:\n+\n+Here we use other functions for demonstration:\n+```C# Snippet:StreamingWithAutoFunctionCall_DefineFunctions\n+private class Address\n+{\n+ public string Street { get; set; }\n+ public string City { get; set; }\n+}\n+\n+private int GetHumidityByAddress(Address address)\n+{\n+ return (address.City == \"Seattle\") ? 60 : 80;\n+}\n+\n+private string[] GetWeatherByAddresses(Dictionary[] addresses, string unit = \"F\")\n+{\n+ string[] temps = new string[addresses.Length];\n+ for (int i = 0; i < addresses.Length; i++)\n+ {\n+ if (addresses[i].TryGetValue(\"city\", out string city))\n+ {\n+ temps[i] = string.Format(\"{0}{1}\", (city == \"Seattle\") ? \"20\" : \"50\", unit);\n+ }\n+ else\n+ {\n+ throw new ArgumentException(\"Each address must contain 'street' and 'city' keys.\");\n+ }\n+ }\n+ return temps;\n+}\n+```\n+Now we define the function definitions:\n+```C# Snippet:StreamingWithAutoFunctionCall_DefineFunctionTools\n+private FunctionToolDefinition geHhumidityByAddressTool = new(\n+ name: \"GetHumidityByAddress\",\n+ description: \"Get humidity by street and city\",\n+ parameters: BinaryData.FromObjectAsJson(\n+ new\n+ {\n+ Type = \"object\",\n+ Properties = new\n+ {\n+ Address = new\n+ {\n+ Type = \"object\",\n+ Properties = new\n+ {\n+ Street = new\n+ {\n+ Type = \"string\",\n+ Description = \"Street\"\n+ },\n+ City = new\n+ {\n+ Type = \"string\",\n+ Description = \"city\"\n+ },\n+ },\n+ Required = new[] { \"street\", \"city\" }\n+ }\n+ },\n+ Required = new[] { \"address\" }\n+ },\n+ new JsonSerializerOptions() { PropertyNamingPolicy = JsonNamingPolicy.CamelCase }));\n+\n+private FunctionToolDefinition getWeatherByAddressesTool = new(\n+ name: \"GetWeatherByAddresses\",\n+ description: \"Get weather by street and city\",\n+ parameters: BinaryData.FromObjectAsJson(\n+ new\n+ {\n+ Type = \"object\",\n+ Properties = new\n+ {\n+ Addresses = new\n+ {\n+ Type = \"array\",\n+ Description = \"A list of addresses\",\n+ Items = new\n+ {\n+ Type = \"object\",\n+ Properties = new\n+ {\n+ Street = new\n+ {\n+ Type = \"string\",\n+ Description = \"Street\"\n+ },\n+ City = new\n+ {\n+ Type = \"string\",\n+ description = \"city\"\n+ },\n+ },\n+ Required = new[] { \"street\", \"city\" }\n+ }\n+ },\n+ Unit = new\n+ {\n+ Type = \"string\",\n+ Enum = new[] { \"c\", \"f\" },\n+ },\n+ },\n+ Required = new[] { \"addresses\" }\n+ },\n+ new JsonSerializerOptions() { PropertyNamingPolicy = JsonNamingPolicy.CamelCase }));\n+```\n+Use `EnableAutoFunctionCall` to enable the auto function call:\n+```C# Snippet:StreamingWithAutoFunctionCall_EnableAutoFunctionCalls\n+List toolOutputs = new();\n+Dictionary delegates = new();\n+delegates.Add(nameof(GetWeatherByAddresses), GetWeatherByAddresses);\n+delegates.Add(nameof(GetHumidityByAddress), GetHumidityByAddress);\n+AIProjectClientOptions options = new();\n+options.EnableAutoFunctionCalls(delegates);\n+\n+AgentsClient client = new(connectionString, new DefaultAzureCredential(), options);\n+```\n+\n+\n+When you create an agent, you can specify the function call by tools argument similar to the example of manual function calls:\n+```C# Snippet:StreamingWithAutoFunctionCall_CreateAgent\n+Agent agent = client.CreateAgent(\n+ model: modelDeploymentName,\n+ name: \"SDK Test Agent - Functions\",\n+ instructions: \"You are a weather bot. Use the provided functions to help answer questions. \"\n+ + \"Customize your responses to the user's preferences as much as possible and use friendly \"\n+ + \"nicknames for cities whenever possible.\",\n+ tools: [getWeatherByAddressesTool, geHhumidityByAddressTool]\n+);\n+```\n+\n+We create a thread and message similar to the example of manual function tool calls:\n+```C# Snippet:StreamingWithAutoFunctionCall_CreateThreadMessage\n+AgentThread thread = client.CreateThread();\n+\n+ThreadMessage message = client.CreateMessage(\n+ thread.Id,\n+ MessageRole.User,\n+ \"Get weather and humidity for street, 123 1st St in city, Seattle and street, 456 2nd Ave in city, Bellevue\");\n+```\n+\n+The agent will then call the function automatically when it is needed during `CreateRunStreamingAsync`:", - "comment_created_at": "2025-04-24T20:20:04+00:00", - "comment_author": "KrzysztofCwalina", - "comment_body": "why would we not support non-streaming? This seems like very suprising design for the user. ", - "pr_file_module": null - }, - { - "comment_id": "2060527319", - "repo_full_name": "Azure/azure-sdk-for-net", - "pr_number": 49293, - "pr_file": "sdk/ai/Azure.AI.Projects/README.md", - "discussion_id": "2058822771", - "commented_code": "@@ -647,6 +648,172 @@ do\n while (toolOutputs.Count > 0);\n ```\n \n+#### Function call executed automatically\n+\n+In addition to the manual function calls, SDK supports automatic function calling. Here is the example:\n+\n+Here we use other functions for demonstration:\n+```C# Snippet:StreamingWithAutoFunctionCall_DefineFunctions\n+private class Address\n+{\n+ public string Street { get; set; }\n+ public string City { get; set; }\n+}\n+\n+private int GetHumidityByAddress(Address address)\n+{\n+ return (address.City == \"Seattle\") ? 60 : 80;\n+}\n+\n+private string[] GetWeatherByAddresses(Dictionary[] addresses, string unit = \"F\")\n+{\n+ string[] temps = new string[addresses.Length];\n+ for (int i = 0; i < addresses.Length; i++)\n+ {\n+ if (addresses[i].TryGetValue(\"city\", out string city))\n+ {\n+ temps[i] = string.Format(\"{0}{1}\", (city == \"Seattle\") ? \"20\" : \"50\", unit);\n+ }\n+ else\n+ {\n+ throw new ArgumentException(\"Each address must contain 'street' and 'city' keys.\");\n+ }\n+ }\n+ return temps;\n+}\n+```\n+Now we define the function definitions:\n+```C# Snippet:StreamingWithAutoFunctionCall_DefineFunctionTools\n+private FunctionToolDefinition geHhumidityByAddressTool = new(\n+ name: \"GetHumidityByAddress\",\n+ description: \"Get humidity by street and city\",\n+ parameters: BinaryData.FromObjectAsJson(\n+ new\n+ {\n+ Type = \"object\",\n+ Properties = new\n+ {\n+ Address = new\n+ {\n+ Type = \"object\",\n+ Properties = new\n+ {\n+ Street = new\n+ {\n+ Type = \"string\",\n+ Description = \"Street\"\n+ },\n+ City = new\n+ {\n+ Type = \"string\",\n+ Description = \"city\"\n+ },\n+ },\n+ Required = new[] { \"street\", \"city\" }\n+ }\n+ },\n+ Required = new[] { \"address\" }\n+ },\n+ new JsonSerializerOptions() { PropertyNamingPolicy = JsonNamingPolicy.CamelCase }));\n+\n+private FunctionToolDefinition getWeatherByAddressesTool = new(\n+ name: \"GetWeatherByAddresses\",\n+ description: \"Get weather by street and city\",\n+ parameters: BinaryData.FromObjectAsJson(\n+ new\n+ {\n+ Type = \"object\",\n+ Properties = new\n+ {\n+ Addresses = new\n+ {\n+ Type = \"array\",\n+ Description = \"A list of addresses\",\n+ Items = new\n+ {\n+ Type = \"object\",\n+ Properties = new\n+ {\n+ Street = new\n+ {\n+ Type = \"string\",\n+ Description = \"Street\"\n+ },\n+ City = new\n+ {\n+ Type = \"string\",\n+ description = \"city\"\n+ },\n+ },\n+ Required = new[] { \"street\", \"city\" }\n+ }\n+ },\n+ Unit = new\n+ {\n+ Type = \"string\",\n+ Enum = new[] { \"c\", \"f\" },\n+ },\n+ },\n+ Required = new[] { \"addresses\" }\n+ },\n+ new JsonSerializerOptions() { PropertyNamingPolicy = JsonNamingPolicy.CamelCase }));\n+```\n+Use `EnableAutoFunctionCall` to enable the auto function call:\n+```C# Snippet:StreamingWithAutoFunctionCall_EnableAutoFunctionCalls\n+List toolOutputs = new();\n+Dictionary delegates = new();\n+delegates.Add(nameof(GetWeatherByAddresses), GetWeatherByAddresses);\n+delegates.Add(nameof(GetHumidityByAddress), GetHumidityByAddress);\n+AIProjectClientOptions options = new();\n+options.EnableAutoFunctionCalls(delegates);\n+\n+AgentsClient client = new(connectionString, new DefaultAzureCredential(), options);\n+```\n+\n+\n+When you create an agent, you can specify the function call by tools argument similar to the example of manual function calls:\n+```C# Snippet:StreamingWithAutoFunctionCall_CreateAgent\n+Agent agent = client.CreateAgent(\n+ model: modelDeploymentName,\n+ name: \"SDK Test Agent - Functions\",\n+ instructions: \"You are a weather bot. Use the provided functions to help answer questions. \"\n+ + \"Customize your responses to the user's preferences as much as possible and use friendly \"\n+ + \"nicknames for cities whenever possible.\",\n+ tools: [getWeatherByAddressesTool, geHhumidityByAddressTool]\n+);\n+```\n+\n+We create a thread and message similar to the example of manual function tool calls:\n+```C# Snippet:StreamingWithAutoFunctionCall_CreateThreadMessage\n+AgentThread thread = client.CreateThread();\n+\n+ThreadMessage message = client.CreateMessage(\n+ thread.Id,\n+ MessageRole.User,\n+ \"Get weather and humidity for street, 123 1st St in city, Seattle and street, 456 2nd Ave in city, Bellevue\");\n+```\n+\n+The agent will then call the function automatically when it is needed during `CreateRunStreamingAsync`:", - "comment_created_at": "2025-04-25T16:13:44+00:00", - "comment_author": "howieleung", - "comment_body": "Because for non-streaming, developers write their own while loop to iterate the run. The function call must be made within the while loop. If the while loop is inside of the SDK, yes, I would want to call the function for them. ", - "pr_file_module": null - }, - { - "comment_id": "2060675130", - "repo_full_name": "Azure/azure-sdk-for-net", - "pr_number": 49293, - "pr_file": "sdk/ai/Azure.AI.Projects/README.md", - "discussion_id": "2058822771", - "commented_code": "@@ -647,6 +648,172 @@ do\n while (toolOutputs.Count > 0);\n ```\n \n+#### Function call executed automatically\n+\n+In addition to the manual function calls, SDK supports automatic function calling. Here is the example:\n+\n+Here we use other functions for demonstration:\n+```C# Snippet:StreamingWithAutoFunctionCall_DefineFunctions\n+private class Address\n+{\n+ public string Street { get; set; }\n+ public string City { get; set; }\n+}\n+\n+private int GetHumidityByAddress(Address address)\n+{\n+ return (address.City == \"Seattle\") ? 60 : 80;\n+}\n+\n+private string[] GetWeatherByAddresses(Dictionary[] addresses, string unit = \"F\")\n+{\n+ string[] temps = new string[addresses.Length];\n+ for (int i = 0; i < addresses.Length; i++)\n+ {\n+ if (addresses[i].TryGetValue(\"city\", out string city))\n+ {\n+ temps[i] = string.Format(\"{0}{1}\", (city == \"Seattle\") ? \"20\" : \"50\", unit);\n+ }\n+ else\n+ {\n+ throw new ArgumentException(\"Each address must contain 'street' and 'city' keys.\");\n+ }\n+ }\n+ return temps;\n+}\n+```\n+Now we define the function definitions:\n+```C# Snippet:StreamingWithAutoFunctionCall_DefineFunctionTools\n+private FunctionToolDefinition geHhumidityByAddressTool = new(\n+ name: \"GetHumidityByAddress\",\n+ description: \"Get humidity by street and city\",\n+ parameters: BinaryData.FromObjectAsJson(\n+ new\n+ {\n+ Type = \"object\",\n+ Properties = new\n+ {\n+ Address = new\n+ {\n+ Type = \"object\",\n+ Properties = new\n+ {\n+ Street = new\n+ {\n+ Type = \"string\",\n+ Description = \"Street\"\n+ },\n+ City = new\n+ {\n+ Type = \"string\",\n+ Description = \"city\"\n+ },\n+ },\n+ Required = new[] { \"street\", \"city\" }\n+ }\n+ },\n+ Required = new[] { \"address\" }\n+ },\n+ new JsonSerializerOptions() { PropertyNamingPolicy = JsonNamingPolicy.CamelCase }));\n+\n+private FunctionToolDefinition getWeatherByAddressesTool = new(\n+ name: \"GetWeatherByAddresses\",\n+ description: \"Get weather by street and city\",\n+ parameters: BinaryData.FromObjectAsJson(\n+ new\n+ {\n+ Type = \"object\",\n+ Properties = new\n+ {\n+ Addresses = new\n+ {\n+ Type = \"array\",\n+ Description = \"A list of addresses\",\n+ Items = new\n+ {\n+ Type = \"object\",\n+ Properties = new\n+ {\n+ Street = new\n+ {\n+ Type = \"string\",\n+ Description = \"Street\"\n+ },\n+ City = new\n+ {\n+ Type = \"string\",\n+ description = \"city\"\n+ },\n+ },\n+ Required = new[] { \"street\", \"city\" }\n+ }\n+ },\n+ Unit = new\n+ {\n+ Type = \"string\",\n+ Enum = new[] { \"c\", \"f\" },\n+ },\n+ },\n+ Required = new[] { \"addresses\" }\n+ },\n+ new JsonSerializerOptions() { PropertyNamingPolicy = JsonNamingPolicy.CamelCase }));\n+```\n+Use `EnableAutoFunctionCall` to enable the auto function call:\n+```C# Snippet:StreamingWithAutoFunctionCall_EnableAutoFunctionCalls\n+List toolOutputs = new();\n+Dictionary delegates = new();\n+delegates.Add(nameof(GetWeatherByAddresses), GetWeatherByAddresses);\n+delegates.Add(nameof(GetHumidityByAddress), GetHumidityByAddress);\n+AIProjectClientOptions options = new();\n+options.EnableAutoFunctionCalls(delegates);\n+\n+AgentsClient client = new(connectionString, new DefaultAzureCredential(), options);\n+```\n+\n+\n+When you create an agent, you can specify the function call by tools argument similar to the example of manual function calls:\n+```C# Snippet:StreamingWithAutoFunctionCall_CreateAgent\n+Agent agent = client.CreateAgent(\n+ model: modelDeploymentName,\n+ name: \"SDK Test Agent - Functions\",\n+ instructions: \"You are a weather bot. Use the provided functions to help answer questions. \"\n+ + \"Customize your responses to the user's preferences as much as possible and use friendly \"\n+ + \"nicknames for cities whenever possible.\",\n+ tools: [getWeatherByAddressesTool, geHhumidityByAddressTool]\n+);\n+```\n+\n+We create a thread and message similar to the example of manual function tool calls:\n+```C# Snippet:StreamingWithAutoFunctionCall_CreateThreadMessage\n+AgentThread thread = client.CreateThread();\n+\n+ThreadMessage message = client.CreateMessage(\n+ thread.Id,\n+ MessageRole.User,\n+ \"Get weather and humidity for street, 123 1st St in city, Seattle and street, 456 2nd Ave in city, Bellevue\");\n+```\n+\n+The agent will then call the function automatically when it is needed during `CreateRunStreamingAsync`:", - "comment_created_at": "2025-04-25T18:04:50+00:00", - "comment_author": "KrzysztofCwalina", - "comment_body": "isn't the await foreach below the same as the while loop?", - "pr_file_module": null - }, - { - "comment_id": "2060919485", - "repo_full_name": "Azure/azure-sdk-for-net", - "pr_number": 49293, - "pr_file": "sdk/ai/Azure.AI.Projects/README.md", - "discussion_id": "2058822771", - "commented_code": "@@ -647,6 +648,172 @@ do\n while (toolOutputs.Count > 0);\n ```\n \n+#### Function call executed automatically\n+\n+In addition to the manual function calls, SDK supports automatic function calling. Here is the example:\n+\n+Here we use other functions for demonstration:\n+```C# Snippet:StreamingWithAutoFunctionCall_DefineFunctions\n+private class Address\n+{\n+ public string Street { get; set; }\n+ public string City { get; set; }\n+}\n+\n+private int GetHumidityByAddress(Address address)\n+{\n+ return (address.City == \"Seattle\") ? 60 : 80;\n+}\n+\n+private string[] GetWeatherByAddresses(Dictionary[] addresses, string unit = \"F\")\n+{\n+ string[] temps = new string[addresses.Length];\n+ for (int i = 0; i < addresses.Length; i++)\n+ {\n+ if (addresses[i].TryGetValue(\"city\", out string city))\n+ {\n+ temps[i] = string.Format(\"{0}{1}\", (city == \"Seattle\") ? \"20\" : \"50\", unit);\n+ }\n+ else\n+ {\n+ throw new ArgumentException(\"Each address must contain 'street' and 'city' keys.\");\n+ }\n+ }\n+ return temps;\n+}\n+```\n+Now we define the function definitions:\n+```C# Snippet:StreamingWithAutoFunctionCall_DefineFunctionTools\n+private FunctionToolDefinition geHhumidityByAddressTool = new(\n+ name: \"GetHumidityByAddress\",\n+ description: \"Get humidity by street and city\",\n+ parameters: BinaryData.FromObjectAsJson(\n+ new\n+ {\n+ Type = \"object\",\n+ Properties = new\n+ {\n+ Address = new\n+ {\n+ Type = \"object\",\n+ Properties = new\n+ {\n+ Street = new\n+ {\n+ Type = \"string\",\n+ Description = \"Street\"\n+ },\n+ City = new\n+ {\n+ Type = \"string\",\n+ Description = \"city\"\n+ },\n+ },\n+ Required = new[] { \"street\", \"city\" }\n+ }\n+ },\n+ Required = new[] { \"address\" }\n+ },\n+ new JsonSerializerOptions() { PropertyNamingPolicy = JsonNamingPolicy.CamelCase }));\n+\n+private FunctionToolDefinition getWeatherByAddressesTool = new(\n+ name: \"GetWeatherByAddresses\",\n+ description: \"Get weather by street and city\",\n+ parameters: BinaryData.FromObjectAsJson(\n+ new\n+ {\n+ Type = \"object\",\n+ Properties = new\n+ {\n+ Addresses = new\n+ {\n+ Type = \"array\",\n+ Description = \"A list of addresses\",\n+ Items = new\n+ {\n+ Type = \"object\",\n+ Properties = new\n+ {\n+ Street = new\n+ {\n+ Type = \"string\",\n+ Description = \"Street\"\n+ },\n+ City = new\n+ {\n+ Type = \"string\",\n+ description = \"city\"\n+ },\n+ },\n+ Required = new[] { \"street\", \"city\" }\n+ }\n+ },\n+ Unit = new\n+ {\n+ Type = \"string\",\n+ Enum = new[] { \"c\", \"f\" },\n+ },\n+ },\n+ Required = new[] { \"addresses\" }\n+ },\n+ new JsonSerializerOptions() { PropertyNamingPolicy = JsonNamingPolicy.CamelCase }));\n+```\n+Use `EnableAutoFunctionCall` to enable the auto function call:\n+```C# Snippet:StreamingWithAutoFunctionCall_EnableAutoFunctionCalls\n+List toolOutputs = new();\n+Dictionary delegates = new();\n+delegates.Add(nameof(GetWeatherByAddresses), GetWeatherByAddresses);\n+delegates.Add(nameof(GetHumidityByAddress), GetHumidityByAddress);\n+AIProjectClientOptions options = new();\n+options.EnableAutoFunctionCalls(delegates);\n+\n+AgentsClient client = new(connectionString, new DefaultAzureCredential(), options);\n+```\n+\n+\n+When you create an agent, you can specify the function call by tools argument similar to the example of manual function calls:\n+```C# Snippet:StreamingWithAutoFunctionCall_CreateAgent\n+Agent agent = client.CreateAgent(\n+ model: modelDeploymentName,\n+ name: \"SDK Test Agent - Functions\",\n+ instructions: \"You are a weather bot. Use the provided functions to help answer questions. \"\n+ + \"Customize your responses to the user's preferences as much as possible and use friendly \"\n+ + \"nicknames for cities whenever possible.\",\n+ tools: [getWeatherByAddressesTool, geHhumidityByAddressTool]\n+);\n+```\n+\n+We create a thread and message similar to the example of manual function tool calls:\n+```C# Snippet:StreamingWithAutoFunctionCall_CreateThreadMessage\n+AgentThread thread = client.CreateThread();\n+\n+ThreadMessage message = client.CreateMessage(\n+ thread.Id,\n+ MessageRole.User,\n+ \"Get weather and humidity for street, 123 1st St in city, Seattle and street, 456 2nd Ave in city, Bellevue\");\n+```\n+\n+The agent will then call the function automatically when it is needed during `CreateRunStreamingAsync`:", - "comment_created_at": "2025-04-25T21:44:47+00:00", - "comment_author": "howieleung", - "comment_body": "It isn't because whether it is a for-loop or while -loop. I\r\nDuring streaming, the SDK inside run a loop to go through the events and I can add code to call functions. Regardless it needs to call function or not, we than `yeild return` the content out. \r\nOn the other hand, for non-streaming, users call getRun again and again in a loop until thread status is completed. Inside of the SDK, getRun does not have loop for me to do function call, submit output, and call function again, and retry if necessary. Users have to make function call manually inside of their loop.\r\nThe same design happens in Python SDK and SK as well.", - "pr_file_module": null - }, - { - "comment_id": "2061034362", - "repo_full_name": "Azure/azure-sdk-for-net", - "pr_number": 49293, - "pr_file": "sdk/ai/Azure.AI.Projects/README.md", - "discussion_id": "2058822771", - "commented_code": "@@ -647,6 +648,172 @@ do\n while (toolOutputs.Count > 0);\n ```\n \n+#### Function call executed automatically\n+\n+In addition to the manual function calls, SDK supports automatic function calling. Here is the example:\n+\n+Here we use other functions for demonstration:\n+```C# Snippet:StreamingWithAutoFunctionCall_DefineFunctions\n+private class Address\n+{\n+ public string Street { get; set; }\n+ public string City { get; set; }\n+}\n+\n+private int GetHumidityByAddress(Address address)\n+{\n+ return (address.City == \"Seattle\") ? 60 : 80;\n+}\n+\n+private string[] GetWeatherByAddresses(Dictionary[] addresses, string unit = \"F\")\n+{\n+ string[] temps = new string[addresses.Length];\n+ for (int i = 0; i < addresses.Length; i++)\n+ {\n+ if (addresses[i].TryGetValue(\"city\", out string city))\n+ {\n+ temps[i] = string.Format(\"{0}{1}\", (city == \"Seattle\") ? \"20\" : \"50\", unit);\n+ }\n+ else\n+ {\n+ throw new ArgumentException(\"Each address must contain 'street' and 'city' keys.\");\n+ }\n+ }\n+ return temps;\n+}\n+```\n+Now we define the function definitions:\n+```C# Snippet:StreamingWithAutoFunctionCall_DefineFunctionTools\n+private FunctionToolDefinition geHhumidityByAddressTool = new(\n+ name: \"GetHumidityByAddress\",\n+ description: \"Get humidity by street and city\",\n+ parameters: BinaryData.FromObjectAsJson(\n+ new\n+ {\n+ Type = \"object\",\n+ Properties = new\n+ {\n+ Address = new\n+ {\n+ Type = \"object\",\n+ Properties = new\n+ {\n+ Street = new\n+ {\n+ Type = \"string\",\n+ Description = \"Street\"\n+ },\n+ City = new\n+ {\n+ Type = \"string\",\n+ Description = \"city\"\n+ },\n+ },\n+ Required = new[] { \"street\", \"city\" }\n+ }\n+ },\n+ Required = new[] { \"address\" }\n+ },\n+ new JsonSerializerOptions() { PropertyNamingPolicy = JsonNamingPolicy.CamelCase }));\n+\n+private FunctionToolDefinition getWeatherByAddressesTool = new(\n+ name: \"GetWeatherByAddresses\",\n+ description: \"Get weather by street and city\",\n+ parameters: BinaryData.FromObjectAsJson(\n+ new\n+ {\n+ Type = \"object\",\n+ Properties = new\n+ {\n+ Addresses = new\n+ {\n+ Type = \"array\",\n+ Description = \"A list of addresses\",\n+ Items = new\n+ {\n+ Type = \"object\",\n+ Properties = new\n+ {\n+ Street = new\n+ {\n+ Type = \"string\",\n+ Description = \"Street\"\n+ },\n+ City = new\n+ {\n+ Type = \"string\",\n+ description = \"city\"\n+ },\n+ },\n+ Required = new[] { \"street\", \"city\" }\n+ }\n+ },\n+ Unit = new\n+ {\n+ Type = \"string\",\n+ Enum = new[] { \"c\", \"f\" },\n+ },\n+ },\n+ Required = new[] { \"addresses\" }\n+ },\n+ new JsonSerializerOptions() { PropertyNamingPolicy = JsonNamingPolicy.CamelCase }));\n+```\n+Use `EnableAutoFunctionCall` to enable the auto function call:\n+```C# Snippet:StreamingWithAutoFunctionCall_EnableAutoFunctionCalls\n+List toolOutputs = new();\n+Dictionary delegates = new();\n+delegates.Add(nameof(GetWeatherByAddresses), GetWeatherByAddresses);\n+delegates.Add(nameof(GetHumidityByAddress), GetHumidityByAddress);\n+AIProjectClientOptions options = new();\n+options.EnableAutoFunctionCalls(delegates);\n+\n+AgentsClient client = new(connectionString, new DefaultAzureCredential(), options);\n+```\n+\n+\n+When you create an agent, you can specify the function call by tools argument similar to the example of manual function calls:\n+```C# Snippet:StreamingWithAutoFunctionCall_CreateAgent\n+Agent agent = client.CreateAgent(\n+ model: modelDeploymentName,\n+ name: \"SDK Test Agent - Functions\",\n+ instructions: \"You are a weather bot. Use the provided functions to help answer questions. \"\n+ + \"Customize your responses to the user's preferences as much as possible and use friendly \"\n+ + \"nicknames for cities whenever possible.\",\n+ tools: [getWeatherByAddressesTool, geHhumidityByAddressTool]\n+);\n+```\n+\n+We create a thread and message similar to the example of manual function tool calls:\n+```C# Snippet:StreamingWithAutoFunctionCall_CreateThreadMessage\n+AgentThread thread = client.CreateThread();\n+\n+ThreadMessage message = client.CreateMessage(\n+ thread.Id,\n+ MessageRole.User,\n+ \"Get weather and humidity for street, 123 1st St in city, Seattle and street, 456 2nd Ave in city, Bellevue\");\n+```\n+\n+The agent will then call the function automatically when it is needed during `CreateRunStreamingAsync`:", - "comment_created_at": "2025-04-25T23:41:45+00:00", - "comment_author": "KrzysztofCwalina", - "comment_body": "Then maybe we just move this off the client options and pass it to the streaming completion methods. As of right not, the API is confusing: the user initializes the client to do auto tool calls and then the client won't do auto tool calls unless the user calls the streaming APIs. ", - "pr_file_module": null - }, - { - "comment_id": "2062953026", - "repo_full_name": "Azure/azure-sdk-for-net", - "pr_number": 49293, - "pr_file": "sdk/ai/Azure.AI.Projects/README.md", - "discussion_id": "2058822771", - "commented_code": "@@ -647,6 +648,172 @@ do\n while (toolOutputs.Count > 0);\n ```\n \n+#### Function call executed automatically\n+\n+In addition to the manual function calls, SDK supports automatic function calling. Here is the example:\n+\n+Here we use other functions for demonstration:\n+```C# Snippet:StreamingWithAutoFunctionCall_DefineFunctions\n+private class Address\n+{\n+ public string Street { get; set; }\n+ public string City { get; set; }\n+}\n+\n+private int GetHumidityByAddress(Address address)\n+{\n+ return (address.City == \"Seattle\") ? 60 : 80;\n+}\n+\n+private string[] GetWeatherByAddresses(Dictionary[] addresses, string unit = \"F\")\n+{\n+ string[] temps = new string[addresses.Length];\n+ for (int i = 0; i < addresses.Length; i++)\n+ {\n+ if (addresses[i].TryGetValue(\"city\", out string city))\n+ {\n+ temps[i] = string.Format(\"{0}{1}\", (city == \"Seattle\") ? \"20\" : \"50\", unit);\n+ }\n+ else\n+ {\n+ throw new ArgumentException(\"Each address must contain 'street' and 'city' keys.\");\n+ }\n+ }\n+ return temps;\n+}\n+```\n+Now we define the function definitions:\n+```C# Snippet:StreamingWithAutoFunctionCall_DefineFunctionTools\n+private FunctionToolDefinition geHhumidityByAddressTool = new(\n+ name: \"GetHumidityByAddress\",\n+ description: \"Get humidity by street and city\",\n+ parameters: BinaryData.FromObjectAsJson(\n+ new\n+ {\n+ Type = \"object\",\n+ Properties = new\n+ {\n+ Address = new\n+ {\n+ Type = \"object\",\n+ Properties = new\n+ {\n+ Street = new\n+ {\n+ Type = \"string\",\n+ Description = \"Street\"\n+ },\n+ City = new\n+ {\n+ Type = \"string\",\n+ Description = \"city\"\n+ },\n+ },\n+ Required = new[] { \"street\", \"city\" }\n+ }\n+ },\n+ Required = new[] { \"address\" }\n+ },\n+ new JsonSerializerOptions() { PropertyNamingPolicy = JsonNamingPolicy.CamelCase }));\n+\n+private FunctionToolDefinition getWeatherByAddressesTool = new(\n+ name: \"GetWeatherByAddresses\",\n+ description: \"Get weather by street and city\",\n+ parameters: BinaryData.FromObjectAsJson(\n+ new\n+ {\n+ Type = \"object\",\n+ Properties = new\n+ {\n+ Addresses = new\n+ {\n+ Type = \"array\",\n+ Description = \"A list of addresses\",\n+ Items = new\n+ {\n+ Type = \"object\",\n+ Properties = new\n+ {\n+ Street = new\n+ {\n+ Type = \"string\",\n+ Description = \"Street\"\n+ },\n+ City = new\n+ {\n+ Type = \"string\",\n+ description = \"city\"\n+ },\n+ },\n+ Required = new[] { \"street\", \"city\" }\n+ }\n+ },\n+ Unit = new\n+ {\n+ Type = \"string\",\n+ Enum = new[] { \"c\", \"f\" },\n+ },\n+ },\n+ Required = new[] { \"addresses\" }\n+ },\n+ new JsonSerializerOptions() { PropertyNamingPolicy = JsonNamingPolicy.CamelCase }));\n+```\n+Use `EnableAutoFunctionCall` to enable the auto function call:\n+```C# Snippet:StreamingWithAutoFunctionCall_EnableAutoFunctionCalls\n+List toolOutputs = new();\n+Dictionary delegates = new();\n+delegates.Add(nameof(GetWeatherByAddresses), GetWeatherByAddresses);\n+delegates.Add(nameof(GetHumidityByAddress), GetHumidityByAddress);\n+AIProjectClientOptions options = new();\n+options.EnableAutoFunctionCalls(delegates);\n+\n+AgentsClient client = new(connectionString, new DefaultAzureCredential(), options);\n+```\n+\n+\n+When you create an agent, you can specify the function call by tools argument similar to the example of manual function calls:\n+```C# Snippet:StreamingWithAutoFunctionCall_CreateAgent\n+Agent agent = client.CreateAgent(\n+ model: modelDeploymentName,\n+ name: \"SDK Test Agent - Functions\",\n+ instructions: \"You are a weather bot. Use the provided functions to help answer questions. \"\n+ + \"Customize your responses to the user's preferences as much as possible and use friendly \"\n+ + \"nicknames for cities whenever possible.\",\n+ tools: [getWeatherByAddressesTool, geHhumidityByAddressTool]\n+);\n+```\n+\n+We create a thread and message similar to the example of manual function tool calls:\n+```C# Snippet:StreamingWithAutoFunctionCall_CreateThreadMessage\n+AgentThread thread = client.CreateThread();\n+\n+ThreadMessage message = client.CreateMessage(\n+ thread.Id,\n+ MessageRole.User,\n+ \"Get weather and humidity for street, 123 1st St in city, Seattle and street, 456 2nd Ave in city, Bellevue\");\n+```\n+\n+The agent will then call the function automatically when it is needed during `CreateRunStreamingAsync`:", - "comment_created_at": "2025-04-28T06:17:24+00:00", - "comment_author": "howieleung", - "comment_body": "Done.. I added autoFunctionCallOptions to create_stream function.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2060975818", - "pr_number": 49293, - "pr_file": "sdk/ai/Azure.AI.Projects/README.md", - "created_at": "2025-04-25T22:51:07+00:00", - "commented_code": "{\n if (functionName == getUserFavoriteCityTool.Name)\n {\n return new ToolOutput(toolCallId, GetUserFavoriteCity());\n return new ToolOutput(toolCallId, ToolCallsResolver.Resolve(GetUserFavoriteCity, functionArguments).ToString());", - "repo_full_name": "Azure/azure-sdk-for-net", - "discussion_comments": [ - { - "comment_id": "2060975818", - "repo_full_name": "Azure/azure-sdk-for-net", - "pr_number": 49293, - "pr_file": "sdk/ai/Azure.AI.Projects/README.md", - "discussion_id": "2060975818", - "commented_code": "@@ -581,23 +582,15 @@ ToolOutput GetResolvedToolOutput(string functionName, string toolCallId, string\n {\n if (functionName == getUserFavoriteCityTool.Name)\n {\n- return new ToolOutput(toolCallId, GetUserFavoriteCity());\n+ return new ToolOutput(toolCallId, ToolCallsResolver.Resolve(GetUserFavoriteCity, functionArguments).ToString());", - "comment_created_at": "2025-04-25T22:51:07+00:00", - "comment_author": "KrzysztofCwalina", - "comment_body": "I don't think we should ship this API as public. If tool calls are not automatic, we want the caller to simply call the CLR API they want. We don't want them to go through some resolver. If you want an API that maps from tol call request arguments to CLR values that can be passed to a CLR method, then I would be fine with that, but honestly such API should be in some shared library, not in AI Project", - "pr_file_module": null - }, - { - "comment_id": "2061195294", - "repo_full_name": "Azure/azure-sdk-for-net", - "pr_number": 49293, - "pr_file": "sdk/ai/Azure.AI.Projects/README.md", - "discussion_id": "2060975818", - "commented_code": "@@ -581,23 +582,15 @@ ToolOutput GetResolvedToolOutput(string functionName, string toolCallId, string\n {\n if (functionName == getUserFavoriteCityTool.Name)\n {\n- return new ToolOutput(toolCallId, GetUserFavoriteCity());\n+ return new ToolOutput(toolCallId, ToolCallsResolver.Resolve(GetUserFavoriteCity, functionArguments).ToString());", - "comment_created_at": "2025-04-26T06:38:16+00:00", - "comment_author": "howieleung", - "comment_body": "changed to internal.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2098984732", - "pr_number": 50169, - "pr_file": "sdk/communication/Azure.Communication.PhoneNumbers/samples/Sample_PhoneNumbersClient.md", - "created_at": "2025-05-20T22:46:40+00:00", - "commented_code": "## Purchase phone numbers\n\nPhone numbers can be acquired through purchasing a reservation.\nPhone numbers can be acquired through purchasing a search.", - "repo_full_name": "Azure/azure-sdk-for-net", - "discussion_comments": [ - { - "comment_id": "2098984732", - "repo_full_name": "Azure/azure-sdk-for-net", - "pr_number": 50169, - "pr_file": "sdk/communication/Azure.Communication.PhoneNumbers/samples/Sample_PhoneNumbersClient.md", - "discussion_id": "2098984732", - "commented_code": "@@ -34,7 +34,7 @@ while (!searchOperation.HasCompleted)\n \n ## Purchase phone numbers\n \n-Phone numbers can be acquired through purchasing a reservation.\n+Phone numbers can be acquired through purchasing a search.", - "comment_created_at": "2025-05-20T22:46:40+00:00", - "comment_author": "Copilot", - "comment_body": "The sample description appears inconsistent with the rest of the Reservations API documentation. It states 'purchasing a search' instead of 'purchasing a reservation'; please clarify the intended terminology to avoid confusion.", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/azure-sdk-for-net-document-ai-changes-thoroughly.json b/_reviewers/azure-sdk-for-net-document-ai-changes-thoroughly.json new file mode 100644 index 0000000..3a4b834 --- /dev/null +++ b/_reviewers/azure-sdk-for-net-document-ai-changes-thoroughly.json @@ -0,0 +1,82 @@ +[ + { + "discussion_id": "2172907663", + "pr_number": 50942, + "pr_file": "sdk/ai/Azure.AI.Agents.Persistent/CHANGELOG.md", + "created_at": "2025-06-27T21:43:46+00:00", + "commented_code": "# Release History\n\n## 1.1.0-beta.3 (Unreleased)\n## 1.1.0-beta.3 (2025-06-27)\n\n### Features Added\n\n- Tracing for Agents. More information [here](https://github.com/Azure/azure-sdk-for-net/blob/main/sdk/ai/Azure.AI.Agents.Persistent/README.md#tracing).\n- Convenience constructor for BingCustomSearchToolParameters\n\n### Breaking Changes\n\n### Bugs Fixed\n\n### Other Changes\n- Automatically function toolcalls. More information [here](https://github.com/Azure/azure-sdk-for-net/blob/main/sdk/ai/Azure.AI.Agents.Persistent/README.md#function-call-executed-automatically).", + "repo_full_name": "Azure/azure-sdk-for-net", + "discussion_comments": [ + { + "comment_id": "2172907663", + "repo_full_name": "Azure/azure-sdk-for-net", + "pr_number": 50942, + "pr_file": "sdk/ai/Azure.AI.Agents.Persistent/CHANGELOG.md", + "discussion_id": "2172907663", + "commented_code": "@@ -1,17 +1,12 @@\n # Release History\n \n-## 1.1.0-beta.3 (Unreleased)\n+## 1.1.0-beta.3 (2025-06-27)\n \n ### Features Added\n \n - Tracing for Agents. More information [here](https://github.com/Azure/azure-sdk-for-net/blob/main/sdk/ai/Azure.AI.Agents.Persistent/README.md#tracing).\n - Convenience constructor for BingCustomSearchToolParameters\n-\n-### Breaking Changes\n-\n-### Bugs Fixed\n-\n-### Other Changes\n+- Automatically function toolcalls. More information [here](https://github.com/Azure/azure-sdk-for-net/blob/main/sdk/ai/Azure.AI.Agents.Persistent/README.md#function-call-executed-automatically).", + "comment_created_at": "2025-06-27T21:43:46+00:00", + "comment_author": "jhakulin", + "comment_body": "Support for automatic execution of function tool calls. More information [here](https://github.com/Azure/azure-sdk-for-net/blob/main/sdk/ai/Azure.AI.Agents.Persistent/README.md#function-call-executed-automatically).", + "pr_file_module": null + }, + { + "comment_id": "2172910565", + "repo_full_name": "Azure/azure-sdk-for-net", + "pr_number": 50942, + "pr_file": "sdk/ai/Azure.AI.Agents.Persistent/CHANGELOG.md", + "discussion_id": "2172907663", + "commented_code": "@@ -1,17 +1,12 @@\n # Release History\n \n-## 1.1.0-beta.3 (Unreleased)\n+## 1.1.0-beta.3 (2025-06-27)\n \n ### Features Added\n \n - Tracing for Agents. More information [here](https://github.com/Azure/azure-sdk-for-net/blob/main/sdk/ai/Azure.AI.Agents.Persistent/README.md#tracing).\n - Convenience constructor for BingCustomSearchToolParameters\n-\n-### Breaking Changes\n-\n-### Bugs Fixed\n-\n-### Other Changes\n+- Automatically function toolcalls. More information [here](https://github.com/Azure/azure-sdk-for-net/blob/main/sdk/ai/Azure.AI.Agents.Persistent/README.md#function-call-executed-automatically).", + "comment_created_at": "2025-06-27T21:47:43+00:00", + "comment_author": "nick863", + "comment_body": "https://github.com/Azure/azure-sdk-for-net/pull/50945", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2110933580", + "pr_number": 50283, + "pr_file": "sdk/ai/Azure.AI.Projects/CHANGELOG.md", + "created_at": "2025-05-28T05:15:18+00:00", + "commented_code": "### Breaking Changes\n* Azure AI Foundry Project endpoint is now required to construct the `AIProjectClient`. It has the form\n`https://.services.ai.azure.com/api/projects/`. Find it in your AI Foundry Project Overview page. \n`https://.services.ai.azure.com/api/projects/`. Find it in your AI Foundry Project Overview page. The support of connection strings as the project endpoints and hub-based projects was dropped. Please create a new Azure AI Foundry resource, which uses endpoints (recommended), or pin the version of `Azure.AI.Projects` to `1.0.0-beta.8` if it is not possible.", + "repo_full_name": "Azure/azure-sdk-for-net", + "discussion_comments": [ + { + "comment_id": "2110933580", + "repo_full_name": "Azure/azure-sdk-for-net", + "pr_number": 50283, + "pr_file": "sdk/ai/Azure.AI.Projects/CHANGELOG.md", + "discussion_id": "2110933580", + "commented_code": "@@ -19,7 +19,7 @@\n \n ### Breaking Changes\n * Azure AI Foundry Project endpoint is now required to construct the `AIProjectClient`. It has the form\n-`https://.services.ai.azure.com/api/projects/`. Find it in your AI Foundry Project Overview page. \n+`https://.services.ai.azure.com/api/projects/`. Find it in your AI Foundry Project Overview page. The support of connection strings as the project endpoints and hub-based projects was dropped. Please create a new Azure AI Foundry resource, which uses endpoints (recommended), or pin the version of `Azure.AI.Projects` to `1.0.0-beta.8` if it is not possible.", + "comment_created_at": "2025-05-28T05:15:18+00:00", + "comment_author": "jhakulin", + "comment_body": "Little rephrase suggestion, could you change to notes like below?\r\n\r\nSupport for project connection string and hub-based projects has been discontinued. We recommend creating a new Azure AI Foundry resource utilizing project endpoint. If this is not possible, please pin the version of `Azure.AI.Projects` to version `1.0.0-beta.8` or earlier.\r\n\r\n", + "pr_file_module": null + }, + { + "comment_id": "2110936385", + "repo_full_name": "Azure/azure-sdk-for-net", + "pr_number": 50283, + "pr_file": "sdk/ai/Azure.AI.Projects/CHANGELOG.md", + "discussion_id": "2110933580", + "commented_code": "@@ -19,7 +19,7 @@\n \n ### Breaking Changes\n * Azure AI Foundry Project endpoint is now required to construct the `AIProjectClient`. It has the form\n-`https://.services.ai.azure.com/api/projects/`. Find it in your AI Foundry Project Overview page. \n+`https://.services.ai.azure.com/api/projects/`. Find it in your AI Foundry Project Overview page. The support of connection strings as the project endpoints and hub-based projects was dropped. Please create a new Azure AI Foundry resource, which uses endpoints (recommended), or pin the version of `Azure.AI.Projects` to `1.0.0-beta.8` if it is not possible.", + "comment_created_at": "2025-05-28T05:16:47+00:00", + "comment_author": "jhakulin", + "comment_body": "Updated text", + "pr_file_module": null + }, + { + "comment_id": "2112481519", + "repo_full_name": "Azure/azure-sdk-for-net", + "pr_number": 50283, + "pr_file": "sdk/ai/Azure.AI.Projects/CHANGELOG.md", + "discussion_id": "2110933580", + "commented_code": "@@ -19,7 +19,7 @@\n \n ### Breaking Changes\n * Azure AI Foundry Project endpoint is now required to construct the `AIProjectClient`. It has the form\n-`https://.services.ai.azure.com/api/projects/`. Find it in your AI Foundry Project Overview page. \n+`https://.services.ai.azure.com/api/projects/`. Find it in your AI Foundry Project Overview page. The support of connection strings as the project endpoints and hub-based projects was dropped. Please create a new Azure AI Foundry resource, which uses endpoints (recommended), or pin the version of `Azure.AI.Projects` to `1.0.0-beta.8` if it is not possible.", + "comment_created_at": "2025-05-28T18:10:20+00:00", + "comment_author": "nick863", + "comment_body": "Fixed", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/azure-sdk-for-net-document-ai-changes-thoroughly.md b/_reviewers/azure-sdk-for-net-document-ai-changes-thoroughly.md index f8b8e15..eb46c82 100644 --- a/_reviewers/azure-sdk-for-net-document-ai-changes-thoroughly.md +++ b/_reviewers/azure-sdk-for-net-document-ai-changes-thoroughly.md @@ -28,87 +28,3 @@ Example of good documentation: ``` For AI systems where behavior might not be immediately intuitive (like automatic function execution or new endpoint requirements), clear documentation helps users understand system capabilities and limitations, and reduces integration challenges. - - -[ - { - "discussion_id": "2172907663", - "pr_number": 50942, - "pr_file": "sdk/ai/Azure.AI.Agents.Persistent/CHANGELOG.md", - "created_at": "2025-06-27T21:43:46+00:00", - "commented_code": "# Release History\n\n## 1.1.0-beta.3 (Unreleased)\n## 1.1.0-beta.3 (2025-06-27)\n\n### Features Added\n\n- Tracing for Agents. More information [here](https://github.com/Azure/azure-sdk-for-net/blob/main/sdk/ai/Azure.AI.Agents.Persistent/README.md#tracing).\n- Convenience constructor for BingCustomSearchToolParameters\n\n### Breaking Changes\n\n### Bugs Fixed\n\n### Other Changes\n- Automatically function toolcalls. More information [here](https://github.com/Azure/azure-sdk-for-net/blob/main/sdk/ai/Azure.AI.Agents.Persistent/README.md#function-call-executed-automatically).", - "repo_full_name": "Azure/azure-sdk-for-net", - "discussion_comments": [ - { - "comment_id": "2172907663", - "repo_full_name": "Azure/azure-sdk-for-net", - "pr_number": 50942, - "pr_file": "sdk/ai/Azure.AI.Agents.Persistent/CHANGELOG.md", - "discussion_id": "2172907663", - "commented_code": "@@ -1,17 +1,12 @@\n # Release History\n \n-## 1.1.0-beta.3 (Unreleased)\n+## 1.1.0-beta.3 (2025-06-27)\n \n ### Features Added\n \n - Tracing for Agents. More information [here](https://github.com/Azure/azure-sdk-for-net/blob/main/sdk/ai/Azure.AI.Agents.Persistent/README.md#tracing).\n - Convenience constructor for BingCustomSearchToolParameters\n-\n-### Breaking Changes\n-\n-### Bugs Fixed\n-\n-### Other Changes\n+- Automatically function toolcalls. More information [here](https://github.com/Azure/azure-sdk-for-net/blob/main/sdk/ai/Azure.AI.Agents.Persistent/README.md#function-call-executed-automatically).", - "comment_created_at": "2025-06-27T21:43:46+00:00", - "comment_author": "jhakulin", - "comment_body": "Support for automatic execution of function tool calls. More information [here](https://github.com/Azure/azure-sdk-for-net/blob/main/sdk/ai/Azure.AI.Agents.Persistent/README.md#function-call-executed-automatically).", - "pr_file_module": null - }, - { - "comment_id": "2172910565", - "repo_full_name": "Azure/azure-sdk-for-net", - "pr_number": 50942, - "pr_file": "sdk/ai/Azure.AI.Agents.Persistent/CHANGELOG.md", - "discussion_id": "2172907663", - "commented_code": "@@ -1,17 +1,12 @@\n # Release History\n \n-## 1.1.0-beta.3 (Unreleased)\n+## 1.1.0-beta.3 (2025-06-27)\n \n ### Features Added\n \n - Tracing for Agents. More information [here](https://github.com/Azure/azure-sdk-for-net/blob/main/sdk/ai/Azure.AI.Agents.Persistent/README.md#tracing).\n - Convenience constructor for BingCustomSearchToolParameters\n-\n-### Breaking Changes\n-\n-### Bugs Fixed\n-\n-### Other Changes\n+- Automatically function toolcalls. More information [here](https://github.com/Azure/azure-sdk-for-net/blob/main/sdk/ai/Azure.AI.Agents.Persistent/README.md#function-call-executed-automatically).", - "comment_created_at": "2025-06-27T21:47:43+00:00", - "comment_author": "nick863", - "comment_body": "https://github.com/Azure/azure-sdk-for-net/pull/50945", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2110933580", - "pr_number": 50283, - "pr_file": "sdk/ai/Azure.AI.Projects/CHANGELOG.md", - "created_at": "2025-05-28T05:15:18+00:00", - "commented_code": "### Breaking Changes\n* Azure AI Foundry Project endpoint is now required to construct the `AIProjectClient`. It has the form\n`https://.services.ai.azure.com/api/projects/`. Find it in your AI Foundry Project Overview page. \n`https://.services.ai.azure.com/api/projects/`. Find it in your AI Foundry Project Overview page. The support of connection strings as the project endpoints and hub-based projects was dropped. Please create a new Azure AI Foundry resource, which uses endpoints (recommended), or pin the version of `Azure.AI.Projects` to `1.0.0-beta.8` if it is not possible.", - "repo_full_name": "Azure/azure-sdk-for-net", - "discussion_comments": [ - { - "comment_id": "2110933580", - "repo_full_name": "Azure/azure-sdk-for-net", - "pr_number": 50283, - "pr_file": "sdk/ai/Azure.AI.Projects/CHANGELOG.md", - "discussion_id": "2110933580", - "commented_code": "@@ -19,7 +19,7 @@\n \n ### Breaking Changes\n * Azure AI Foundry Project endpoint is now required to construct the `AIProjectClient`. It has the form\n-`https://.services.ai.azure.com/api/projects/`. Find it in your AI Foundry Project Overview page. \n+`https://.services.ai.azure.com/api/projects/`. Find it in your AI Foundry Project Overview page. The support of connection strings as the project endpoints and hub-based projects was dropped. Please create a new Azure AI Foundry resource, which uses endpoints (recommended), or pin the version of `Azure.AI.Projects` to `1.0.0-beta.8` if it is not possible.", - "comment_created_at": "2025-05-28T05:15:18+00:00", - "comment_author": "jhakulin", - "comment_body": "Little rephrase suggestion, could you change to notes like below?\r\n\r\nSupport for project connection string and hub-based projects has been discontinued. We recommend creating a new Azure AI Foundry resource utilizing project endpoint. If this is not possible, please pin the version of `Azure.AI.Projects` to version `1.0.0-beta.8` or earlier.\r\n\r\n", - "pr_file_module": null - }, - { - "comment_id": "2110936385", - "repo_full_name": "Azure/azure-sdk-for-net", - "pr_number": 50283, - "pr_file": "sdk/ai/Azure.AI.Projects/CHANGELOG.md", - "discussion_id": "2110933580", - "commented_code": "@@ -19,7 +19,7 @@\n \n ### Breaking Changes\n * Azure AI Foundry Project endpoint is now required to construct the `AIProjectClient`. It has the form\n-`https://.services.ai.azure.com/api/projects/`. Find it in your AI Foundry Project Overview page. \n+`https://.services.ai.azure.com/api/projects/`. Find it in your AI Foundry Project Overview page. The support of connection strings as the project endpoints and hub-based projects was dropped. Please create a new Azure AI Foundry resource, which uses endpoints (recommended), or pin the version of `Azure.AI.Projects` to `1.0.0-beta.8` if it is not possible.", - "comment_created_at": "2025-05-28T05:16:47+00:00", - "comment_author": "jhakulin", - "comment_body": "Updated text", - "pr_file_module": null - }, - { - "comment_id": "2112481519", - "repo_full_name": "Azure/azure-sdk-for-net", - "pr_number": 50283, - "pr_file": "sdk/ai/Azure.AI.Projects/CHANGELOG.md", - "discussion_id": "2110933580", - "commented_code": "@@ -19,7 +19,7 @@\n \n ### Breaking Changes\n * Azure AI Foundry Project endpoint is now required to construct the `AIProjectClient`. It has the form\n-`https://.services.ai.azure.com/api/projects/`. Find it in your AI Foundry Project Overview page. \n+`https://.services.ai.azure.com/api/projects/`. Find it in your AI Foundry Project Overview page. The support of connection strings as the project endpoints and hub-based projects was dropped. Please create a new Azure AI Foundry resource, which uses endpoints (recommended), or pin the version of `Azure.AI.Projects` to `1.0.0-beta.8` if it is not possible.", - "comment_created_at": "2025-05-28T18:10:20+00:00", - "comment_author": "nick863", - "comment_body": "Fixed", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/azure-sdk-for-net-document-code-reasoning.json b/_reviewers/azure-sdk-for-net-document-code-reasoning.json new file mode 100644 index 0000000..6e5f642 --- /dev/null +++ b/_reviewers/azure-sdk-for-net-document-code-reasoning.json @@ -0,0 +1,146 @@ +[ + { + "discussion_id": "2165164048", + "pr_number": 50850, + "pr_file": "eng/packages/http-client-csharp/generator/Azure.Generator/src/Providers/CollectionResultDefinition.cs", + "created_at": "2025-06-25T00:03:13+00:00", + "commented_code": "// Extract next page\n doWhileStatement.Add(nextPageVariable.Assign(BuildGetNextPage(responseWithTypeVariable, responseVariable)).Terminate());\n\n // Break if the next page is null", + "repo_full_name": "Azure/azure-sdk-for-net", + "discussion_comments": [ + { + "comment_id": "2165164048", + "repo_full_name": "Azure/azure-sdk-for-net", + "pr_number": 50850, + "pr_file": "eng/packages/http-client-csharp/generator/Azure.Generator/src/Providers/CollectionResultDefinition.cs", + "discussion_id": "2165164048", + "commented_code": "@@ -213,6 +216,9 @@ private MethodBodyStatement[] BuildAsPagesMethodBody()\n // Extract next page\n doWhileStatement.Add(nextPageVariable.Assign(BuildGetNextPage(responseWithTypeVariable, responseVariable)).Terminate());\n \n+ // Break if the next page is null", + "comment_created_at": "2025-06-25T00:03:13+00:00", + "comment_author": "Copilot", + "comment_body": "[nitpick] Consider adding a code comment for clarity and remove redundant parentheses to improve readability.\n```suggestion\n // Break if the next page is null\n // Break the loop if the next page variable is null, indicating no more pages to process.\n```", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2160611331", + "pr_number": 50777, + "pr_file": "eng/packages/http-client-csharp/generator/Azure.Generator/test/Visitors/NamespaceVisitorTests.cs", + "created_at": "2025-06-23T02:26:18+00:00", + "commented_code": "Assert.AreEqual(\"Samples\", updatedModel!.Type.Namespace);\n }\n\n [Test]\n public void DoesNotChangeNamespaceOfCustomizedModel()\n {\n MockHelpers.LoadMockPlugin(configurationJson: \"{ \\\"package-name\\\": \\\"TestLibrary\\\", \\\"model-namespace\\\": true }\");\n var visitor = new TestNamespaceVisitor();\n var inputType = InputFactory.Model(\"TestModel\", \"Samples\");\n var model = new ModelProvider(inputType);\n\n // simulate a customized model\n model.GetType().BaseType!.GetField(\n \"_customCodeView\",\n System.Reflection.BindingFlags.NonPublic | System.Reflection.BindingFlags.Instance)?\n .SetValue(model, new Lazy(() => new TestTypeProvider()));", + "repo_full_name": "Azure/azure-sdk-for-net", + "discussion_comments": [ + { + "comment_id": "2160611331", + "repo_full_name": "Azure/azure-sdk-for-net", + "pr_number": 50777, + "pr_file": "eng/packages/http-client-csharp/generator/Azure.Generator/test/Visitors/NamespaceVisitorTests.cs", + "discussion_id": "2160611331", + "commented_code": "@@ -38,6 +39,25 @@ public void DoesNotUseModelsNamespaceIfConfigSetToFalse()\n Assert.AreEqual(\"Samples\", updatedModel!.Type.Namespace);\n }\n \n+ [Test]\n+ public void DoesNotChangeNamespaceOfCustomizedModel()\n+ {\n+ MockHelpers.LoadMockPlugin(configurationJson: \"{ \\\"package-name\\\": \\\"TestLibrary\\\", \\\"model-namespace\\\": true }\");\n+ var visitor = new TestNamespaceVisitor();\n+ var inputType = InputFactory.Model(\"TestModel\", \"Samples\");\n+ var model = new ModelProvider(inputType);\n+\n+ // simulate a customized model\n+ model.GetType().BaseType!.GetField(\n+ \"_customCodeView\",\n+ System.Reflection.BindingFlags.NonPublic | System.Reflection.BindingFlags.Instance)?\n+ .SetValue(model, new Lazy(() => new TestTypeProvider()));", + "comment_created_at": "2025-06-23T02:26:18+00:00", + "comment_author": "Copilot", + "comment_body": "Consider extracting the reflection logic used to set _customCodeView into a helper method or documenting its purpose to improve maintainability and reduce the risk of breakage if the field name changes.\n```suggestion\n SetCustomCodeView(model, new Lazy(() => new TestTypeProvider()));\n```", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2156832104", + "pr_number": 50705, + "pr_file": "sdk/resourcemanager/Azure.ResourceManager/src/Common/Custom/Models/ManagedServiceIdentity.Serialization.cs", + "created_at": "2025-06-19T11:54:22+00:00", + "commented_code": "[JsonConverter(typeof(ManagedServiceIdentityConverter))]\n public partial class ManagedServiceIdentity : IJsonModel\n {\n internal void Write(Utf8JsonWriter writer, ModelReaderWriterOptions options, JsonSerializerOptions jOptions = default)\n private const string SystemAssignedUserAssignedV3Value = \"SystemAssigned,UserAssigned\";", + "repo_full_name": "Azure/azure-sdk-for-net", + "discussion_comments": [ + { + "comment_id": "2156832104", + "repo_full_name": "Azure/azure-sdk-for-net", + "pr_number": 50705, + "pr_file": "sdk/resourcemanager/Azure.ResourceManager/src/Common/Custom/Models/ManagedServiceIdentity.Serialization.cs", + "discussion_id": "2156832104", + "commented_code": "@@ -18,22 +18,51 @@ namespace Azure.ResourceManager.Models\n [JsonConverter(typeof(ManagedServiceIdentityConverter))]\n public partial class ManagedServiceIdentity : IJsonModel\n {\n- internal void Write(Utf8JsonWriter writer, ModelReaderWriterOptions options, JsonSerializerOptions jOptions = default)\n+ private const string SystemAssignedUserAssignedV3Value = \"SystemAssigned,UserAssigned\";\n+", + "comment_created_at": "2025-06-19T11:54:22+00:00", + "comment_author": "Copilot", + "comment_body": "[nitpick] Consider adding a comment explaining the rationale behind checking for the '|v3' suffix in options.Format to improve future maintainability and clarity.\n```suggestion\n\n // This method checks if the format string in options.Format ends with the \"|v3\" suffix.\n // The \"|v3\" suffix indicates that the ManagedServiceIdentity format is version 3.\n // If the suffix is present, it is removed, and the base format is returned via the 'format' parameter.\n // This allows the method to handle version-specific logic while preserving the base format.\n```", + "pr_file_module": null + }, + { + "comment_id": "2157828757", + "repo_full_name": "Azure/azure-sdk-for-net", + "pr_number": 50705, + "pr_file": "sdk/resourcemanager/Azure.ResourceManager/src/Common/Custom/Models/ManagedServiceIdentity.Serialization.cs", + "discussion_id": "2156832104", + "commented_code": "@@ -18,22 +18,51 @@ namespace Azure.ResourceManager.Models\n [JsonConverter(typeof(ManagedServiceIdentityConverter))]\n public partial class ManagedServiceIdentity : IJsonModel\n {\n- internal void Write(Utf8JsonWriter writer, ModelReaderWriterOptions options, JsonSerializerOptions jOptions = default)\n+ private const string SystemAssignedUserAssignedV3Value = \"SystemAssigned,UserAssigned\";\n+", + "comment_created_at": "2025-06-20T00:42:53+00:00", + "comment_author": "live1206", + "comment_body": "added comment after rewording based on this.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2130331367", + "pr_number": 50456, + "pr_file": "sdk/storage/Azure.Storage.Blobs/src/PageBlobClient.cs", + "created_at": "2025-06-05T20:53:25+00:00", + "commented_code": "response = await PageBlobRestClient.CreateAsync(\n contentLength: 0,\n blobContentLength: size,", + "repo_full_name": "Azure/azure-sdk-for-net", + "discussion_comments": [ + { + "comment_id": "2130331367", + "repo_full_name": "Azure/azure-sdk-for-net", + "pr_number": 50456, + "pr_file": "sdk/storage/Azure.Storage.Blobs/src/PageBlobClient.cs", + "discussion_id": "2130331367", + "commented_code": "@@ -999,7 +1018,7 @@ private async Task> CreateInternal(\n response = await PageBlobRestClient.CreateAsync(\n contentLength: 0,\n blobContentLength: size,", + "comment_created_at": "2025-06-05T20:53:25+00:00", + "comment_author": "Copilot", + "comment_body": "Consider adding an inline comment clarifying the implicit conversion of PremiumPageBlobAccessTier and the intended handling of default or null values. This will help future maintainers understand how the premium tier parameter is propagated to the REST API call.\n```suggestion\n blobContentLength: size,\n // The premiumPageBlobAccessTier parameter specifies the access tier for the page blob.\n // If null, the REST API will apply the default tier or handle it gracefully.\n```", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2124976966", + "pr_number": 50394, + "pr_file": "sdk/servicebus/Azure.Messaging.ServiceBus/src/Primitives/ServiceBusRetryPolicy.cs", + "created_at": "2025-06-03T21:40:01+00:00", + "commented_code": "bool logTimeoutRetriesAsVerbose = false)\n {\n var failedAttemptCount = 0;\n var tryTimeout = CalculateTryTimeout(0);\n\n TimeSpan tryTimeout = CalculateTryTimeout(0);\n if (IsServerBusy && tryTimeout < ServerBusyBaseSleepTime)\n {\n // We are in a server busy state before we start processing.\n // Since ServerBusyBaseSleepTime > remaining time for the operation, we don't wait for the entire Sleep time.\n await Task.Delay(tryTimeout, cancellationToken).ConfigureAwait(false);\n throw new ServiceBusException(\n ServerBusyExceptionMessage,\n ServiceBusFailureReason.ServiceBusy);\n while (IsServerBusy && !cancellationToken.IsCancellationRequested)", + "repo_full_name": "Azure/azure-sdk-for-net", + "discussion_comments": [ + { + "comment_id": "2124976966", + "repo_full_name": "Azure/azure-sdk-for-net", + "pr_number": 50394, + "pr_file": "sdk/servicebus/Azure.Messaging.ServiceBus/src/Primitives/ServiceBusRetryPolicy.cs", + "discussion_id": "2124976966", + "commented_code": "@@ -130,17 +133,42 @@ internal async ValueTask RunOperation(\n bool logTimeoutRetriesAsVerbose = false)\n {\n var failedAttemptCount = 0;\n+ var tryTimeout = CalculateTryTimeout(0);\n \n- TimeSpan tryTimeout = CalculateTryTimeout(0);\n if (IsServerBusy && tryTimeout < ServerBusyBaseSleepTime)\n {\n- // We are in a server busy state before we start processing.\n- // Since ServerBusyBaseSleepTime > remaining time for the operation, we don't wait for the entire Sleep time.\n- await Task.Delay(tryTimeout, cancellationToken).ConfigureAwait(false);\n- throw new ServiceBusException(\n- ServerBusyExceptionMessage,\n- ServiceBusFailureReason.ServiceBusy);\n+ while (IsServerBusy && !cancellationToken.IsCancellationRequested)", + "comment_created_at": "2025-06-03T21:40:01+00:00", + "comment_author": "Copilot", + "comment_body": "[nitpick] Add an inline comment within the while loop explaining the rationale behind recalculating the try timeout and the exit condition. This can help future maintainers understand how the retry logic adapts to a persisting server busy state.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2103555145", + "pr_number": 50225, + "pr_file": "sdk/storage/Azure.Storage.DataMovement.Files.Shares/src/DataMovementSharesExtensions.cs", + "created_at": "2025-05-22T23:21:04+00:00", + "commented_code": "try\n {\n ShareProperties properties = await parentShareClient.GetPropertiesAsync(cancellationToken).ConfigureAwait(false);\n ShareProtocols expectedProtocol = options?.ShareProtocol ?? ShareProtocols.Smb;\n ShareProtocols actualProtocol = properties.Protocols ?? ShareProtocols.Smb;\n ShareProtocol expectedProtocol = options?.ShareProtocol ?? ShareProtocol.Smb;\n ShareProtocols effectiveProtocol = properties.Protocols ?? ShareProtocols.Smb;", + "repo_full_name": "Azure/azure-sdk-for-net", + "discussion_comments": [ + { + "comment_id": "2103555145", + "repo_full_name": "Azure/azure-sdk-for-net", + "pr_number": 50225, + "pr_file": "sdk/storage/Azure.Storage.DataMovement.Files.Shares/src/DataMovementSharesExtensions.cs", + "discussion_id": "2103555145", + "commented_code": "@@ -728,8 +728,9 @@ public static async Task ValidateProtocolAsync(\n try\n {\n ShareProperties properties = await parentShareClient.GetPropertiesAsync(cancellationToken).ConfigureAwait(false);\n- ShareProtocols expectedProtocol = options?.ShareProtocol ?? ShareProtocols.Smb;\n- ShareProtocols actualProtocol = properties.Protocols ?? ShareProtocols.Smb;\n+ ShareProtocol expectedProtocol = options?.ShareProtocol ?? ShareProtocol.Smb;\n+ ShareProtocols effectiveProtocol = properties.Protocols ?? ShareProtocols.Smb;", + "comment_created_at": "2025-05-22T23:21:04+00:00", + "comment_author": "Copilot", + "comment_body": "Consider adding a clarifying comment explaining the conversion from ShareProtocols to ShareProtocol to improve readability, as this conversion may not be immediately clear to future maintainers.\n```suggestion\n ShareProtocols effectiveProtocol = properties.Protocols ?? ShareProtocols.Smb;\n // Convert ShareProtocols (multi-valued) to ShareProtocol (single-valued).\n // If effectiveProtocol is ShareProtocols.Smb, map to ShareProtocol.Smb.\n // Otherwise, default to ShareProtocol.Nfs.\n```", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/azure-sdk-for-net-document-code-reasoning.md b/_reviewers/azure-sdk-for-net-document-code-reasoning.md index 9982df2..9ff1c60 100644 --- a/_reviewers/azure-sdk-for-net-document-code-reasoning.md +++ b/_reviewers/azure-sdk-for-net-document-code-reasoning.md @@ -46,151 +46,3 @@ Add clear, concise comments that explain the "why" behind complex logic, non-obv ``` Comments should benefit future maintainers by providing context that isn't immediately obvious from the code itself. When complex code can't be simplified, comprehensive documentation becomes essential for long-term maintainability. - - -[ - { - "discussion_id": "2165164048", - "pr_number": 50850, - "pr_file": "eng/packages/http-client-csharp/generator/Azure.Generator/src/Providers/CollectionResultDefinition.cs", - "created_at": "2025-06-25T00:03:13+00:00", - "commented_code": "// Extract next page\n doWhileStatement.Add(nextPageVariable.Assign(BuildGetNextPage(responseWithTypeVariable, responseVariable)).Terminate());\n\n // Break if the next page is null", - "repo_full_name": "Azure/azure-sdk-for-net", - "discussion_comments": [ - { - "comment_id": "2165164048", - "repo_full_name": "Azure/azure-sdk-for-net", - "pr_number": 50850, - "pr_file": "eng/packages/http-client-csharp/generator/Azure.Generator/src/Providers/CollectionResultDefinition.cs", - "discussion_id": "2165164048", - "commented_code": "@@ -213,6 +216,9 @@ private MethodBodyStatement[] BuildAsPagesMethodBody()\n // Extract next page\n doWhileStatement.Add(nextPageVariable.Assign(BuildGetNextPage(responseWithTypeVariable, responseVariable)).Terminate());\n \n+ // Break if the next page is null", - "comment_created_at": "2025-06-25T00:03:13+00:00", - "comment_author": "Copilot", - "comment_body": "[nitpick] Consider adding a code comment for clarity and remove redundant parentheses to improve readability.\n```suggestion\n // Break if the next page is null\n // Break the loop if the next page variable is null, indicating no more pages to process.\n```", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2160611331", - "pr_number": 50777, - "pr_file": "eng/packages/http-client-csharp/generator/Azure.Generator/test/Visitors/NamespaceVisitorTests.cs", - "created_at": "2025-06-23T02:26:18+00:00", - "commented_code": "Assert.AreEqual(\"Samples\", updatedModel!.Type.Namespace);\n }\n\n [Test]\n public void DoesNotChangeNamespaceOfCustomizedModel()\n {\n MockHelpers.LoadMockPlugin(configurationJson: \"{ \\\"package-name\\\": \\\"TestLibrary\\\", \\\"model-namespace\\\": true }\");\n var visitor = new TestNamespaceVisitor();\n var inputType = InputFactory.Model(\"TestModel\", \"Samples\");\n var model = new ModelProvider(inputType);\n\n // simulate a customized model\n model.GetType().BaseType!.GetField(\n \"_customCodeView\",\n System.Reflection.BindingFlags.NonPublic | System.Reflection.BindingFlags.Instance)?\n .SetValue(model, new Lazy(() => new TestTypeProvider()));", - "repo_full_name": "Azure/azure-sdk-for-net", - "discussion_comments": [ - { - "comment_id": "2160611331", - "repo_full_name": "Azure/azure-sdk-for-net", - "pr_number": 50777, - "pr_file": "eng/packages/http-client-csharp/generator/Azure.Generator/test/Visitors/NamespaceVisitorTests.cs", - "discussion_id": "2160611331", - "commented_code": "@@ -38,6 +39,25 @@ public void DoesNotUseModelsNamespaceIfConfigSetToFalse()\n Assert.AreEqual(\"Samples\", updatedModel!.Type.Namespace);\n }\n \n+ [Test]\n+ public void DoesNotChangeNamespaceOfCustomizedModel()\n+ {\n+ MockHelpers.LoadMockPlugin(configurationJson: \"{ \\\"package-name\\\": \\\"TestLibrary\\\", \\\"model-namespace\\\": true }\");\n+ var visitor = new TestNamespaceVisitor();\n+ var inputType = InputFactory.Model(\"TestModel\", \"Samples\");\n+ var model = new ModelProvider(inputType);\n+\n+ // simulate a customized model\n+ model.GetType().BaseType!.GetField(\n+ \"_customCodeView\",\n+ System.Reflection.BindingFlags.NonPublic | System.Reflection.BindingFlags.Instance)?\n+ .SetValue(model, new Lazy(() => new TestTypeProvider()));", - "comment_created_at": "2025-06-23T02:26:18+00:00", - "comment_author": "Copilot", - "comment_body": "Consider extracting the reflection logic used to set _customCodeView into a helper method or documenting its purpose to improve maintainability and reduce the risk of breakage if the field name changes.\n```suggestion\n SetCustomCodeView(model, new Lazy(() => new TestTypeProvider()));\n```", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2156832104", - "pr_number": 50705, - "pr_file": "sdk/resourcemanager/Azure.ResourceManager/src/Common/Custom/Models/ManagedServiceIdentity.Serialization.cs", - "created_at": "2025-06-19T11:54:22+00:00", - "commented_code": "[JsonConverter(typeof(ManagedServiceIdentityConverter))]\n public partial class ManagedServiceIdentity : IJsonModel\n {\n internal void Write(Utf8JsonWriter writer, ModelReaderWriterOptions options, JsonSerializerOptions jOptions = default)\n private const string SystemAssignedUserAssignedV3Value = \"SystemAssigned,UserAssigned\";", - "repo_full_name": "Azure/azure-sdk-for-net", - "discussion_comments": [ - { - "comment_id": "2156832104", - "repo_full_name": "Azure/azure-sdk-for-net", - "pr_number": 50705, - "pr_file": "sdk/resourcemanager/Azure.ResourceManager/src/Common/Custom/Models/ManagedServiceIdentity.Serialization.cs", - "discussion_id": "2156832104", - "commented_code": "@@ -18,22 +18,51 @@ namespace Azure.ResourceManager.Models\n [JsonConverter(typeof(ManagedServiceIdentityConverter))]\n public partial class ManagedServiceIdentity : IJsonModel\n {\n- internal void Write(Utf8JsonWriter writer, ModelReaderWriterOptions options, JsonSerializerOptions jOptions = default)\n+ private const string SystemAssignedUserAssignedV3Value = \"SystemAssigned,UserAssigned\";\n+", - "comment_created_at": "2025-06-19T11:54:22+00:00", - "comment_author": "Copilot", - "comment_body": "[nitpick] Consider adding a comment explaining the rationale behind checking for the '|v3' suffix in options.Format to improve future maintainability and clarity.\n```suggestion\n\n // This method checks if the format string in options.Format ends with the \"|v3\" suffix.\n // The \"|v3\" suffix indicates that the ManagedServiceIdentity format is version 3.\n // If the suffix is present, it is removed, and the base format is returned via the 'format' parameter.\n // This allows the method to handle version-specific logic while preserving the base format.\n```", - "pr_file_module": null - }, - { - "comment_id": "2157828757", - "repo_full_name": "Azure/azure-sdk-for-net", - "pr_number": 50705, - "pr_file": "sdk/resourcemanager/Azure.ResourceManager/src/Common/Custom/Models/ManagedServiceIdentity.Serialization.cs", - "discussion_id": "2156832104", - "commented_code": "@@ -18,22 +18,51 @@ namespace Azure.ResourceManager.Models\n [JsonConverter(typeof(ManagedServiceIdentityConverter))]\n public partial class ManagedServiceIdentity : IJsonModel\n {\n- internal void Write(Utf8JsonWriter writer, ModelReaderWriterOptions options, JsonSerializerOptions jOptions = default)\n+ private const string SystemAssignedUserAssignedV3Value = \"SystemAssigned,UserAssigned\";\n+", - "comment_created_at": "2025-06-20T00:42:53+00:00", - "comment_author": "live1206", - "comment_body": "added comment after rewording based on this.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2130331367", - "pr_number": 50456, - "pr_file": "sdk/storage/Azure.Storage.Blobs/src/PageBlobClient.cs", - "created_at": "2025-06-05T20:53:25+00:00", - "commented_code": "response = await PageBlobRestClient.CreateAsync(\n contentLength: 0,\n blobContentLength: size,", - "repo_full_name": "Azure/azure-sdk-for-net", - "discussion_comments": [ - { - "comment_id": "2130331367", - "repo_full_name": "Azure/azure-sdk-for-net", - "pr_number": 50456, - "pr_file": "sdk/storage/Azure.Storage.Blobs/src/PageBlobClient.cs", - "discussion_id": "2130331367", - "commented_code": "@@ -999,7 +1018,7 @@ private async Task> CreateInternal(\n response = await PageBlobRestClient.CreateAsync(\n contentLength: 0,\n blobContentLength: size,", - "comment_created_at": "2025-06-05T20:53:25+00:00", - "comment_author": "Copilot", - "comment_body": "Consider adding an inline comment clarifying the implicit conversion of PremiumPageBlobAccessTier and the intended handling of default or null values. This will help future maintainers understand how the premium tier parameter is propagated to the REST API call.\n```suggestion\n blobContentLength: size,\n // The premiumPageBlobAccessTier parameter specifies the access tier for the page blob.\n // If null, the REST API will apply the default tier or handle it gracefully.\n```", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2124976966", - "pr_number": 50394, - "pr_file": "sdk/servicebus/Azure.Messaging.ServiceBus/src/Primitives/ServiceBusRetryPolicy.cs", - "created_at": "2025-06-03T21:40:01+00:00", - "commented_code": "bool logTimeoutRetriesAsVerbose = false)\n {\n var failedAttemptCount = 0;\n var tryTimeout = CalculateTryTimeout(0);\n\n TimeSpan tryTimeout = CalculateTryTimeout(0);\n if (IsServerBusy && tryTimeout < ServerBusyBaseSleepTime)\n {\n // We are in a server busy state before we start processing.\n // Since ServerBusyBaseSleepTime > remaining time for the operation, we don't wait for the entire Sleep time.\n await Task.Delay(tryTimeout, cancellationToken).ConfigureAwait(false);\n throw new ServiceBusException(\n ServerBusyExceptionMessage,\n ServiceBusFailureReason.ServiceBusy);\n while (IsServerBusy && !cancellationToken.IsCancellationRequested)", - "repo_full_name": "Azure/azure-sdk-for-net", - "discussion_comments": [ - { - "comment_id": "2124976966", - "repo_full_name": "Azure/azure-sdk-for-net", - "pr_number": 50394, - "pr_file": "sdk/servicebus/Azure.Messaging.ServiceBus/src/Primitives/ServiceBusRetryPolicy.cs", - "discussion_id": "2124976966", - "commented_code": "@@ -130,17 +133,42 @@ internal async ValueTask RunOperation(\n bool logTimeoutRetriesAsVerbose = false)\n {\n var failedAttemptCount = 0;\n+ var tryTimeout = CalculateTryTimeout(0);\n \n- TimeSpan tryTimeout = CalculateTryTimeout(0);\n if (IsServerBusy && tryTimeout < ServerBusyBaseSleepTime)\n {\n- // We are in a server busy state before we start processing.\n- // Since ServerBusyBaseSleepTime > remaining time for the operation, we don't wait for the entire Sleep time.\n- await Task.Delay(tryTimeout, cancellationToken).ConfigureAwait(false);\n- throw new ServiceBusException(\n- ServerBusyExceptionMessage,\n- ServiceBusFailureReason.ServiceBusy);\n+ while (IsServerBusy && !cancellationToken.IsCancellationRequested)", - "comment_created_at": "2025-06-03T21:40:01+00:00", - "comment_author": "Copilot", - "comment_body": "[nitpick] Add an inline comment within the while loop explaining the rationale behind recalculating the try timeout and the exit condition. This can help future maintainers understand how the retry logic adapts to a persisting server busy state.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2103555145", - "pr_number": 50225, - "pr_file": "sdk/storage/Azure.Storage.DataMovement.Files.Shares/src/DataMovementSharesExtensions.cs", - "created_at": "2025-05-22T23:21:04+00:00", - "commented_code": "try\n {\n ShareProperties properties = await parentShareClient.GetPropertiesAsync(cancellationToken).ConfigureAwait(false);\n ShareProtocols expectedProtocol = options?.ShareProtocol ?? ShareProtocols.Smb;\n ShareProtocols actualProtocol = properties.Protocols ?? ShareProtocols.Smb;\n ShareProtocol expectedProtocol = options?.ShareProtocol ?? ShareProtocol.Smb;\n ShareProtocols effectiveProtocol = properties.Protocols ?? ShareProtocols.Smb;", - "repo_full_name": "Azure/azure-sdk-for-net", - "discussion_comments": [ - { - "comment_id": "2103555145", - "repo_full_name": "Azure/azure-sdk-for-net", - "pr_number": 50225, - "pr_file": "sdk/storage/Azure.Storage.DataMovement.Files.Shares/src/DataMovementSharesExtensions.cs", - "discussion_id": "2103555145", - "commented_code": "@@ -728,8 +728,9 @@ public static async Task ValidateProtocolAsync(\n try\n {\n ShareProperties properties = await parentShareClient.GetPropertiesAsync(cancellationToken).ConfigureAwait(false);\n- ShareProtocols expectedProtocol = options?.ShareProtocol ?? ShareProtocols.Smb;\n- ShareProtocols actualProtocol = properties.Protocols ?? ShareProtocols.Smb;\n+ ShareProtocol expectedProtocol = options?.ShareProtocol ?? ShareProtocol.Smb;\n+ ShareProtocols effectiveProtocol = properties.Protocols ?? ShareProtocols.Smb;", - "comment_created_at": "2025-05-22T23:21:04+00:00", - "comment_author": "Copilot", - "comment_body": "Consider adding a clarifying comment explaining the conversion from ShareProtocols to ShareProtocol to improve readability, as this conversion may not be immediately clear to future maintainers.\n```suggestion\n ShareProtocols effectiveProtocol = properties.Protocols ?? ShareProtocols.Smb;\n // Convert ShareProtocols (multi-valued) to ShareProtocol (single-valued).\n // If effectiveProtocol is ShareProtocols.Smb, map to ShareProtocol.Smb.\n // Otherwise, default to ShareProtocol.Nfs.\n```", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/azure-sdk-for-net-document-non-obvious-code.json b/_reviewers/azure-sdk-for-net-document-non-obvious-code.json new file mode 100644 index 0000000..5544eb7 --- /dev/null +++ b/_reviewers/azure-sdk-for-net-document-non-obvious-code.json @@ -0,0 +1,46 @@ +[ + { + "discussion_id": "2143887259", + "pr_number": 50587, + "pr_file": "eng/common/pipelines/templates/steps/detect-api-changes.yml", + "created_at": "2025-06-13T00:09:46+00:00", + "commented_code": "Condition: true\n\nsteps:", + "repo_full_name": "Azure/azure-sdk-for-net", + "discussion_comments": [ + { + "comment_id": "2143887259", + "repo_full_name": "Azure/azure-sdk-for-net", + "pr_number": 50587, + "pr_file": "eng/common/pipelines/templates/steps/detect-api-changes.yml", + "discussion_id": "2143887259", + "commented_code": "@@ -5,24 +5,25 @@ parameters:\n Condition: true\n \n steps:", + "comment_created_at": "2025-06-13T00:09:46+00:00", + "comment_author": "Copilot", + "comment_body": "Consider adding an inline comment here to clarify that this conditional wrapper restricts the following steps to pull request builds only.\n```suggestion\nsteps:\n # Restrict the following steps to pull request builds only\n```", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2106854958", + "pr_number": 50267, + "pr_file": "sdk/mongodbatlas/Azure.ResourceManager.MongoDBAtlas/tsp-location.yaml", + "created_at": "2025-05-26T08:44:05+00:00", + "commented_code": "directory: specification/liftrmongodb/MongoDB.Atlas.Management\ncommit: 6a4f32353ce0eb59d33fd785a512cd487b81814f\nrepo: Azure/azure-rest-api-specs\nadditionalDirectories:", + "repo_full_name": "Azure/azure-sdk-for-net", + "discussion_comments": [ + { + "comment_id": "2106854958", + "repo_full_name": "Azure/azure-sdk-for-net", + "pr_number": 50267, + "pr_file": "sdk/mongodbatlas/Azure.ResourceManager.MongoDBAtlas/tsp-location.yaml", + "discussion_id": "2106854958", + "commented_code": "@@ -0,0 +1,4 @@\n+directory: specification/liftrmongodb/MongoDB.Atlas.Management\n+commit: 6a4f32353ce0eb59d33fd785a512cd487b81814f\n+repo: Azure/azure-rest-api-specs\n+additionalDirectories: ", + "comment_created_at": "2025-05-26T08:44:05+00:00", + "comment_author": "Copilot", + "comment_body": "The 'additionalDirectories' property is left empty; please either remove it if not needed or add a comment/documentation to clarify its intended usage.", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/azure-sdk-for-net-document-non-obvious-code.md b/_reviewers/azure-sdk-for-net-document-non-obvious-code.md index 71c83f7..1be1c21 100644 --- a/_reviewers/azure-sdk-for-net-document-non-obvious-code.md +++ b/_reviewers/azure-sdk-for-net-document-non-obvious-code.md @@ -32,51 +32,3 @@ additionalDirectories: ``` Clear documentation reduces the cognitive load for reviewers and future maintainers, helping them understand the code's purpose without needing to infer it from implementation details. - - -[ - { - "discussion_id": "2143887259", - "pr_number": 50587, - "pr_file": "eng/common/pipelines/templates/steps/detect-api-changes.yml", - "created_at": "2025-06-13T00:09:46+00:00", - "commented_code": "Condition: true\n\nsteps:", - "repo_full_name": "Azure/azure-sdk-for-net", - "discussion_comments": [ - { - "comment_id": "2143887259", - "repo_full_name": "Azure/azure-sdk-for-net", - "pr_number": 50587, - "pr_file": "eng/common/pipelines/templates/steps/detect-api-changes.yml", - "discussion_id": "2143887259", - "commented_code": "@@ -5,24 +5,25 @@ parameters:\n Condition: true\n \n steps:", - "comment_created_at": "2025-06-13T00:09:46+00:00", - "comment_author": "Copilot", - "comment_body": "Consider adding an inline comment here to clarify that this conditional wrapper restricts the following steps to pull request builds only.\n```suggestion\nsteps:\n # Restrict the following steps to pull request builds only\n```", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2106854958", - "pr_number": 50267, - "pr_file": "sdk/mongodbatlas/Azure.ResourceManager.MongoDBAtlas/tsp-location.yaml", - "created_at": "2025-05-26T08:44:05+00:00", - "commented_code": "directory: specification/liftrmongodb/MongoDB.Atlas.Management\ncommit: 6a4f32353ce0eb59d33fd785a512cd487b81814f\nrepo: Azure/azure-rest-api-specs\nadditionalDirectories:", - "repo_full_name": "Azure/azure-sdk-for-net", - "discussion_comments": [ - { - "comment_id": "2106854958", - "repo_full_name": "Azure/azure-sdk-for-net", - "pr_number": 50267, - "pr_file": "sdk/mongodbatlas/Azure.ResourceManager.MongoDBAtlas/tsp-location.yaml", - "discussion_id": "2106854958", - "commented_code": "@@ -0,0 +1,4 @@\n+directory: specification/liftrmongodb/MongoDB.Atlas.Management\n+commit: 6a4f32353ce0eb59d33fd785a512cd487b81814f\n+repo: Azure/azure-rest-api-specs\n+additionalDirectories: ", - "comment_created_at": "2025-05-26T08:44:05+00:00", - "comment_author": "Copilot", - "comment_body": "The 'additionalDirectories' property is left empty; please either remove it if not needed or add a comment/documentation to clarify its intended usage.", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/azure-sdk-for-net-eliminate-repeated-code.json b/_reviewers/azure-sdk-for-net-eliminate-repeated-code.json new file mode 100644 index 0000000..6c1e843 --- /dev/null +++ b/_reviewers/azure-sdk-for-net-eliminate-repeated-code.json @@ -0,0 +1,46 @@ +[ + { + "discussion_id": "2178653926", + "pr_number": 50998, + "pr_file": "eng/common/scripts/job-matrix/Create-JobMatrix.ps1", + "created_at": "2025-07-01T22:33:14+00:00", + "commented_code": ")\n\n. $PSScriptRoot/job-matrix-functions.ps1\n. $PSScriptRoot/../logging.ps1\n\nif (!(Test-Path $ConfigPath)) {\n Write-Error \"ConfigPath '$ConfigPath' does not exist.\"\n exit 1\n}\n$config = GetMatrixConfigFromFile (Get-Content $ConfigPath -Raw)\n$rawConfig = Get-Content $ConfigPath -Raw\n$config = GetMatrixConfigFromFile $rawConfig\n# Strip empty string filters in order to be able to use azure pipelines yaml join()\n$Filters = $Filters | Where-Object { $_ }\n\nLogGroupStart \"Matrix generation configuration\"\nWrite-Host \"Configuration File: $ConfigPath\"\nWrite-Host $rawConfig\nWrite-Host \"SelectionType: $Selection\"\nWrite-Host \"DisplayNameFilter: $DisplayNameFilter\"\nWrite-Host \"Filters: $Filters\"\nWrite-Host \"Replace: $Replace\"\nWrite-Host \"NonSparseParameters: $NonSparseParameters\"", + "repo_full_name": "Azure/azure-sdk-for-net", + "discussion_comments": [ + { + "comment_id": "2178653926", + "repo_full_name": "Azure/azure-sdk-for-net", + "pr_number": 50998, + "pr_file": "eng/common/scripts/job-matrix/Create-JobMatrix.ps1", + "discussion_id": "2178653926", + "commented_code": "@@ -21,15 +21,27 @@ param (\n )\n \n . $PSScriptRoot/job-matrix-functions.ps1\n+. $PSScriptRoot/../logging.ps1\n \n if (!(Test-Path $ConfigPath)) {\n Write-Error \"ConfigPath '$ConfigPath' does not exist.\"\n exit 1\n }\n-$config = GetMatrixConfigFromFile (Get-Content $ConfigPath -Raw)\n+$rawConfig = Get-Content $ConfigPath -Raw\n+$config = GetMatrixConfigFromFile $rawConfig\n # Strip empty string filters in order to be able to use azure pipelines yaml join()\n $Filters = $Filters | Where-Object { $_ }\n \n+LogGroupStart \"Matrix generation configuration\"\n+Write-Host \"Configuration File: $ConfigPath\"\n+Write-Host $rawConfig\n+Write-Host \"SelectionType: $Selection\"\n+Write-Host \"DisplayNameFilter: $DisplayNameFilter\"\n+Write-Host \"Filters: $Filters\"\n+Write-Host \"Replace: $Replace\"\n+Write-Host \"NonSparseParameters: $NonSparseParameters\"", + "comment_created_at": "2025-07-01T22:33:14+00:00", + "comment_author": "Copilot", + "comment_body": "[nitpick] The sequence of Write-Host statements for logging could be refactored into a loop or helper function to reduce duplication and improve maintainability.\n```suggestion\n$logEntries = @{\n \"Configuration File\" = $ConfigPath\n \"Raw Configuration\" = $rawConfig\n \"SelectionType\" = $Selection\n \"DisplayNameFilter\" = $DisplayNameFilter\n \"Filters\" = $Filters\n \"Replace\" = $Replace\n \"NonSparseParameters\" = $NonSparseParameters\n}\nforeach ($key in $logEntries.Keys) {\n Write-Host \"$key: $($logEntries[$key])\"\n}\n```", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2176461585", + "pr_number": 50970, + "pr_file": "eng/scripts/automation/GenerateAndBuildLib.ps1", + "created_at": "2025-07-01T05:30:04+00:00", + "commented_code": "if ($yml[\"parameters\"] -And $yml[\"parameters\"][\"service-dir\"]) {\n $service = $yml[\"parameters\"][\"service-dir\"][\"default\"];\n }\n if ($yml[\"options\"] -And $yml[\"options\"][\"@azure-tools/typespec-csharp\"] -And $yml[\"options\"][\"@azure-tools/typespec-csharp\"][\"package-dir\"]) {\n $packageDir = $yml[\"options\"][\"@azure-tools/typespec-csharp\"][\"package-dir\"]\n if ($yml[\"options\"] -And $yml[\"options\"][\"@azure-tools/typespec-csharp\"]) {\n if ($yml[\"options\"][\"@azure-tools/typespec-csharp\"][\"package-dir\"]) {\n $packageDir = $yml[\"options\"][\"@azure-tools/typespec-csharp\"][\"package-dir\"]\n }\n if ($yml[\"options\"][\"@azure-tools/typespec-csharp\"][\"service-dir\"]) {\n $service = $yml[\"options\"][\"@azure-tools/typespec-csharp\"][\"service-dir\"]", + "repo_full_name": "Azure/azure-sdk-for-net", + "discussion_comments": [ + { + "comment_id": "2176461585", + "repo_full_name": "Azure/azure-sdk-for-net", + "pr_number": 50970, + "pr_file": "eng/scripts/automation/GenerateAndBuildLib.ps1", + "discussion_id": "2176461585", + "commented_code": "@@ -917,8 +917,13 @@ function GetSDKProjectFolder()\n if ($yml[\"parameters\"] -And $yml[\"parameters\"][\"service-dir\"]) {\n $service = $yml[\"parameters\"][\"service-dir\"][\"default\"];\n }\n- if ($yml[\"options\"] -And $yml[\"options\"][\"@azure-tools/typespec-csharp\"] -And $yml[\"options\"][\"@azure-tools/typespec-csharp\"][\"package-dir\"]) {\n- $packageDir = $yml[\"options\"][\"@azure-tools/typespec-csharp\"][\"package-dir\"]\n+ if ($yml[\"options\"] -And $yml[\"options\"][\"@azure-tools/typespec-csharp\"]) {\n+ if ($yml[\"options\"][\"@azure-tools/typespec-csharp\"][\"package-dir\"]) {\n+ $packageDir = $yml[\"options\"][\"@azure-tools/typespec-csharp\"][\"package-dir\"]\n+ }\n+ if ($yml[\"options\"][\"@azure-tools/typespec-csharp\"][\"service-dir\"]) {\n+ $service = $yml[\"options\"][\"@azure-tools/typespec-csharp\"][\"service-dir\"]", + "comment_created_at": "2025-07-01T05:30:04+00:00", + "comment_author": "Copilot", + "comment_body": "[nitpick] Consider assigning `$yml[\"options\"][\"@azure-tools/typespec-csharp\"]` to a local variable (e.g. `$csharpOpts`) to avoid repeated indexing and improve readability.\n```suggestion\n $csharpOpts = $yml[\"options\"][\"@azure-tools/typespec-csharp\"]\n if ($csharpOpts[\"package-dir\"]) {\n $packageDir = $csharpOpts[\"package-dir\"]\n }\n if ($csharpOpts[\"service-dir\"]) {\n $service = $csharpOpts[\"service-dir\"]\n```", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/azure-sdk-for-net-eliminate-repeated-code.md b/_reviewers/azure-sdk-for-net-eliminate-repeated-code.md index 65991d5..bebcc63 100644 --- a/_reviewers/azure-sdk-for-net-eliminate-repeated-code.md +++ b/_reviewers/azure-sdk-for-net-eliminate-repeated-code.md @@ -54,51 +54,3 @@ if ($csharpOpts["service-dir"]) { $service = $csharpOpts["service-dir"] } ``` - - -[ - { - "discussion_id": "2178653926", - "pr_number": 50998, - "pr_file": "eng/common/scripts/job-matrix/Create-JobMatrix.ps1", - "created_at": "2025-07-01T22:33:14+00:00", - "commented_code": ")\n\n. $PSScriptRoot/job-matrix-functions.ps1\n. $PSScriptRoot/../logging.ps1\n\nif (!(Test-Path $ConfigPath)) {\n Write-Error \"ConfigPath '$ConfigPath' does not exist.\"\n exit 1\n}\n$config = GetMatrixConfigFromFile (Get-Content $ConfigPath -Raw)\n$rawConfig = Get-Content $ConfigPath -Raw\n$config = GetMatrixConfigFromFile $rawConfig\n# Strip empty string filters in order to be able to use azure pipelines yaml join()\n$Filters = $Filters | Where-Object { $_ }\n\nLogGroupStart \"Matrix generation configuration\"\nWrite-Host \"Configuration File: $ConfigPath\"\nWrite-Host $rawConfig\nWrite-Host \"SelectionType: $Selection\"\nWrite-Host \"DisplayNameFilter: $DisplayNameFilter\"\nWrite-Host \"Filters: $Filters\"\nWrite-Host \"Replace: $Replace\"\nWrite-Host \"NonSparseParameters: $NonSparseParameters\"", - "repo_full_name": "Azure/azure-sdk-for-net", - "discussion_comments": [ - { - "comment_id": "2178653926", - "repo_full_name": "Azure/azure-sdk-for-net", - "pr_number": 50998, - "pr_file": "eng/common/scripts/job-matrix/Create-JobMatrix.ps1", - "discussion_id": "2178653926", - "commented_code": "@@ -21,15 +21,27 @@ param (\n )\n \n . $PSScriptRoot/job-matrix-functions.ps1\n+. $PSScriptRoot/../logging.ps1\n \n if (!(Test-Path $ConfigPath)) {\n Write-Error \"ConfigPath '$ConfigPath' does not exist.\"\n exit 1\n }\n-$config = GetMatrixConfigFromFile (Get-Content $ConfigPath -Raw)\n+$rawConfig = Get-Content $ConfigPath -Raw\n+$config = GetMatrixConfigFromFile $rawConfig\n # Strip empty string filters in order to be able to use azure pipelines yaml join()\n $Filters = $Filters | Where-Object { $_ }\n \n+LogGroupStart \"Matrix generation configuration\"\n+Write-Host \"Configuration File: $ConfigPath\"\n+Write-Host $rawConfig\n+Write-Host \"SelectionType: $Selection\"\n+Write-Host \"DisplayNameFilter: $DisplayNameFilter\"\n+Write-Host \"Filters: $Filters\"\n+Write-Host \"Replace: $Replace\"\n+Write-Host \"NonSparseParameters: $NonSparseParameters\"", - "comment_created_at": "2025-07-01T22:33:14+00:00", - "comment_author": "Copilot", - "comment_body": "[nitpick] The sequence of Write-Host statements for logging could be refactored into a loop or helper function to reduce duplication and improve maintainability.\n```suggestion\n$logEntries = @{\n \"Configuration File\" = $ConfigPath\n \"Raw Configuration\" = $rawConfig\n \"SelectionType\" = $Selection\n \"DisplayNameFilter\" = $DisplayNameFilter\n \"Filters\" = $Filters\n \"Replace\" = $Replace\n \"NonSparseParameters\" = $NonSparseParameters\n}\nforeach ($key in $logEntries.Keys) {\n Write-Host \"$key: $($logEntries[$key])\"\n}\n```", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2176461585", - "pr_number": 50970, - "pr_file": "eng/scripts/automation/GenerateAndBuildLib.ps1", - "created_at": "2025-07-01T05:30:04+00:00", - "commented_code": "if ($yml[\"parameters\"] -And $yml[\"parameters\"][\"service-dir\"]) {\n $service = $yml[\"parameters\"][\"service-dir\"][\"default\"];\n }\n if ($yml[\"options\"] -And $yml[\"options\"][\"@azure-tools/typespec-csharp\"] -And $yml[\"options\"][\"@azure-tools/typespec-csharp\"][\"package-dir\"]) {\n $packageDir = $yml[\"options\"][\"@azure-tools/typespec-csharp\"][\"package-dir\"]\n if ($yml[\"options\"] -And $yml[\"options\"][\"@azure-tools/typespec-csharp\"]) {\n if ($yml[\"options\"][\"@azure-tools/typespec-csharp\"][\"package-dir\"]) {\n $packageDir = $yml[\"options\"][\"@azure-tools/typespec-csharp\"][\"package-dir\"]\n }\n if ($yml[\"options\"][\"@azure-tools/typespec-csharp\"][\"service-dir\"]) {\n $service = $yml[\"options\"][\"@azure-tools/typespec-csharp\"][\"service-dir\"]", - "repo_full_name": "Azure/azure-sdk-for-net", - "discussion_comments": [ - { - "comment_id": "2176461585", - "repo_full_name": "Azure/azure-sdk-for-net", - "pr_number": 50970, - "pr_file": "eng/scripts/automation/GenerateAndBuildLib.ps1", - "discussion_id": "2176461585", - "commented_code": "@@ -917,8 +917,13 @@ function GetSDKProjectFolder()\n if ($yml[\"parameters\"] -And $yml[\"parameters\"][\"service-dir\"]) {\n $service = $yml[\"parameters\"][\"service-dir\"][\"default\"];\n }\n- if ($yml[\"options\"] -And $yml[\"options\"][\"@azure-tools/typespec-csharp\"] -And $yml[\"options\"][\"@azure-tools/typespec-csharp\"][\"package-dir\"]) {\n- $packageDir = $yml[\"options\"][\"@azure-tools/typespec-csharp\"][\"package-dir\"]\n+ if ($yml[\"options\"] -And $yml[\"options\"][\"@azure-tools/typespec-csharp\"]) {\n+ if ($yml[\"options\"][\"@azure-tools/typespec-csharp\"][\"package-dir\"]) {\n+ $packageDir = $yml[\"options\"][\"@azure-tools/typespec-csharp\"][\"package-dir\"]\n+ }\n+ if ($yml[\"options\"][\"@azure-tools/typespec-csharp\"][\"service-dir\"]) {\n+ $service = $yml[\"options\"][\"@azure-tools/typespec-csharp\"][\"service-dir\"]", - "comment_created_at": "2025-07-01T05:30:04+00:00", - "comment_author": "Copilot", - "comment_body": "[nitpick] Consider assigning `$yml[\"options\"][\"@azure-tools/typespec-csharp\"]` to a local variable (e.g. `$csharpOpts`) to avoid repeated indexing and improve readability.\n```suggestion\n $csharpOpts = $yml[\"options\"][\"@azure-tools/typespec-csharp\"]\n if ($csharpOpts[\"package-dir\"]) {\n $packageDir = $csharpOpts[\"package-dir\"]\n }\n if ($csharpOpts[\"service-dir\"]) {\n $service = $csharpOpts[\"service-dir\"]\n```", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/azure-sdk-for-net-externalize-configuration-values.json b/_reviewers/azure-sdk-for-net-externalize-configuration-values.json new file mode 100644 index 0000000..3cd2e7d --- /dev/null +++ b/_reviewers/azure-sdk-for-net-externalize-configuration-values.json @@ -0,0 +1,102 @@ +[ + { + "discussion_id": "2162710383", + "pr_number": 50802, + "pr_file": "eng/pipelines/docindex.yml", + "created_at": "2025-06-24T00:00:40+00:00", + "commented_code": "- job: UpdateDocsMsBuildConfig\n pool:\n name: $(LINUXPOOL)\n demands: $(LinuxImageDemand)\n demands: ImageOverride -equals $(LINUXVMIMAGE)", + "repo_full_name": "Azure/azure-sdk-for-net", + "discussion_comments": [ + { + "comment_id": "2162710383", + "repo_full_name": "Azure/azure-sdk-for-net", + "pr_number": 50802, + "pr_file": "eng/pipelines/docindex.yml", + "discussion_id": "2162710383", + "commented_code": "@@ -34,7 +34,7 @@ jobs:\n - job: UpdateDocsMsBuildConfig\n pool:\n name: $(LINUXPOOL)\n- demands: $(LinuxImageDemand)\n+ demands: ImageOverride -equals $(LINUXVMIMAGE)", + "comment_created_at": "2025-06-24T00:00:40+00:00", + "comment_author": "Copilot", + "comment_body": "Consider centralizing the inline image demand expression in a shared variable to improve consistency and ease future maintenance.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2162710385", + "pr_number": 50802, + "pr_file": "eng/pipelines/aggregate-reports.yml", + "created_at": "2025-06-24T00:00:41+00:00", + "commented_code": "pool:\n name: $(WINDOWSPOOL)\n demands: $(WindowsImageDemand)\n demands: ImageOverride -equals $(WINDOWSVMIMAGE)\n\nvariables:", + "repo_full_name": "Azure/azure-sdk-for-net", + "discussion_comments": [ + { + "comment_id": "2162710385", + "repo_full_name": "Azure/azure-sdk-for-net", + "pr_number": 50802, + "pr_file": "eng/pipelines/aggregate-reports.yml", + "discussion_id": "2162710385", + "commented_code": "@@ -11,7 +11,7 @@ pr:\n \n pool:\n name: $(WINDOWSPOOL)\n- demands: $(WindowsImageDemand)\n+ demands: ImageOverride -equals $(WINDOWSVMIMAGE)\n \n variables:", + "comment_created_at": "2025-06-24T00:00:41+00:00", + "comment_author": "Copilot", + "comment_body": "Consider centralizing the inline image demand expression in a shared variable to improve consistency and ease future maintenance.\n```suggestion\n demands: $(IMAGE_DEMAND)\n\nvariables:\n - name: IMAGE_DEMAND\n value: ImageOverride -equals $(WINDOWSVMIMAGE)\n```", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2113092739", + "pr_number": 50316, + "pr_file": "eng/common/pipelines/templates/steps/detect-api-changes.yml", + "created_at": "2025-05-29T03:12:50+00:00", + "commented_code": "steps:\n - pwsh: |\n $apiChangeDetectRequestUrl = \"https://apiview.dev/PullRequest/DetectApiChanges\"\n $apiChangeDetectRequestUrl = \"https://apiview.dev/api/PullRequests/CreateAPIRevisionIfAPIHasChanges\"", + "repo_full_name": "Azure/azure-sdk-for-net", + "discussion_comments": [ + { + "comment_id": "2113092739", + "repo_full_name": "Azure/azure-sdk-for-net", + "pr_number": 50316, + "pr_file": "eng/common/pipelines/templates/steps/detect-api-changes.yml", + "discussion_id": "2113092739", + "commented_code": "@@ -6,7 +6,7 @@ parameters:\n \n steps:\n - pwsh: |\n- $apiChangeDetectRequestUrl = \"https://apiview.dev/PullRequest/DetectApiChanges\"\n+ $apiChangeDetectRequestUrl = \"https://apiview.dev/api/PullRequests/CreateAPIRevisionIfAPIHasChanges\"", + "comment_created_at": "2025-05-29T03:12:50+00:00", + "comment_author": "Copilot", + "comment_body": "It may be beneficial to extract this hardcoded URL into a pipeline variable group or template parameter to ensure consistency and simplify future updates.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2103307241", + "pr_number": 50221, + "pr_file": "eng/pipelines/docindex.yml", + "created_at": "2025-05-22T19:58:29+00:00", + "commented_code": "- job: UpdateDocsMsBuildConfig\n pool:\n name: $(LINUXPOOL)\n name: azsdk-pool", + "repo_full_name": "Azure/azure-sdk-for-net", + "discussion_comments": [ + { + "comment_id": "2103307241", + "repo_full_name": "Azure/azure-sdk-for-net", + "pr_number": 50221, + "pr_file": "eng/pipelines/docindex.yml", + "discussion_id": "2103307241", + "commented_code": "@@ -33,7 +33,8 @@ jobs:\n \n - job: UpdateDocsMsBuildConfig\n pool:\n- name: $(LINUXPOOL)\n+ name: azsdk-pool", + "comment_created_at": "2025-05-22T19:58:29+00:00", + "comment_author": "weshaggard", + "comment_body": "Why not use the variables? It might also be interesting to add the demand to the image.yml variables. ", + "pr_file_module": null + }, + { + "comment_id": "2103738606", + "repo_full_name": "Azure/azure-sdk-for-net", + "pr_number": 50221, + "pr_file": "eng/pipelines/docindex.yml", + "discussion_id": "2103307241", + "commented_code": "@@ -33,7 +33,8 @@ jobs:\n \n - job: UpdateDocsMsBuildConfig\n pool:\n- name: $(LINUXPOOL)\n+ name: azsdk-pool", + "comment_created_at": "2025-05-23T03:52:28+00:00", + "comment_author": "danieljurek", + "comment_body": "Tested and the demand can be placed in a variable and it can reference variables (when the values are defined before job run time). ", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/azure-sdk-for-net-externalize-configuration-values.md b/_reviewers/azure-sdk-for-net-externalize-configuration-values.md index 916415e..368c9b4 100644 --- a/_reviewers/azure-sdk-for-net-externalize-configuration-values.md +++ b/_reviewers/azure-sdk-for-net-externalize-configuration-values.md @@ -31,107 +31,3 @@ variables: ``` For API endpoints, pipeline definitions, and other configuration values that may change over time, always use variables rather than hardcoding values directly in scripts or pipeline definitions. - - -[ - { - "discussion_id": "2162710383", - "pr_number": 50802, - "pr_file": "eng/pipelines/docindex.yml", - "created_at": "2025-06-24T00:00:40+00:00", - "commented_code": "- job: UpdateDocsMsBuildConfig\n pool:\n name: $(LINUXPOOL)\n demands: $(LinuxImageDemand)\n demands: ImageOverride -equals $(LINUXVMIMAGE)", - "repo_full_name": "Azure/azure-sdk-for-net", - "discussion_comments": [ - { - "comment_id": "2162710383", - "repo_full_name": "Azure/azure-sdk-for-net", - "pr_number": 50802, - "pr_file": "eng/pipelines/docindex.yml", - "discussion_id": "2162710383", - "commented_code": "@@ -34,7 +34,7 @@ jobs:\n - job: UpdateDocsMsBuildConfig\n pool:\n name: $(LINUXPOOL)\n- demands: $(LinuxImageDemand)\n+ demands: ImageOverride -equals $(LINUXVMIMAGE)", - "comment_created_at": "2025-06-24T00:00:40+00:00", - "comment_author": "Copilot", - "comment_body": "Consider centralizing the inline image demand expression in a shared variable to improve consistency and ease future maintenance.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2162710385", - "pr_number": 50802, - "pr_file": "eng/pipelines/aggregate-reports.yml", - "created_at": "2025-06-24T00:00:41+00:00", - "commented_code": "pool:\n name: $(WINDOWSPOOL)\n demands: $(WindowsImageDemand)\n demands: ImageOverride -equals $(WINDOWSVMIMAGE)\n\nvariables:", - "repo_full_name": "Azure/azure-sdk-for-net", - "discussion_comments": [ - { - "comment_id": "2162710385", - "repo_full_name": "Azure/azure-sdk-for-net", - "pr_number": 50802, - "pr_file": "eng/pipelines/aggregate-reports.yml", - "discussion_id": "2162710385", - "commented_code": "@@ -11,7 +11,7 @@ pr:\n \n pool:\n name: $(WINDOWSPOOL)\n- demands: $(WindowsImageDemand)\n+ demands: ImageOverride -equals $(WINDOWSVMIMAGE)\n \n variables:", - "comment_created_at": "2025-06-24T00:00:41+00:00", - "comment_author": "Copilot", - "comment_body": "Consider centralizing the inline image demand expression in a shared variable to improve consistency and ease future maintenance.\n```suggestion\n demands: $(IMAGE_DEMAND)\n\nvariables:\n - name: IMAGE_DEMAND\n value: ImageOverride -equals $(WINDOWSVMIMAGE)\n```", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2113092739", - "pr_number": 50316, - "pr_file": "eng/common/pipelines/templates/steps/detect-api-changes.yml", - "created_at": "2025-05-29T03:12:50+00:00", - "commented_code": "steps:\n - pwsh: |\n $apiChangeDetectRequestUrl = \"https://apiview.dev/PullRequest/DetectApiChanges\"\n $apiChangeDetectRequestUrl = \"https://apiview.dev/api/PullRequests/CreateAPIRevisionIfAPIHasChanges\"", - "repo_full_name": "Azure/azure-sdk-for-net", - "discussion_comments": [ - { - "comment_id": "2113092739", - "repo_full_name": "Azure/azure-sdk-for-net", - "pr_number": 50316, - "pr_file": "eng/common/pipelines/templates/steps/detect-api-changes.yml", - "discussion_id": "2113092739", - "commented_code": "@@ -6,7 +6,7 @@ parameters:\n \n steps:\n - pwsh: |\n- $apiChangeDetectRequestUrl = \"https://apiview.dev/PullRequest/DetectApiChanges\"\n+ $apiChangeDetectRequestUrl = \"https://apiview.dev/api/PullRequests/CreateAPIRevisionIfAPIHasChanges\"", - "comment_created_at": "2025-05-29T03:12:50+00:00", - "comment_author": "Copilot", - "comment_body": "It may be beneficial to extract this hardcoded URL into a pipeline variable group or template parameter to ensure consistency and simplify future updates.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2103307241", - "pr_number": 50221, - "pr_file": "eng/pipelines/docindex.yml", - "created_at": "2025-05-22T19:58:29+00:00", - "commented_code": "- job: UpdateDocsMsBuildConfig\n pool:\n name: $(LINUXPOOL)\n name: azsdk-pool", - "repo_full_name": "Azure/azure-sdk-for-net", - "discussion_comments": [ - { - "comment_id": "2103307241", - "repo_full_name": "Azure/azure-sdk-for-net", - "pr_number": 50221, - "pr_file": "eng/pipelines/docindex.yml", - "discussion_id": "2103307241", - "commented_code": "@@ -33,7 +33,8 @@ jobs:\n \n - job: UpdateDocsMsBuildConfig\n pool:\n- name: $(LINUXPOOL)\n+ name: azsdk-pool", - "comment_created_at": "2025-05-22T19:58:29+00:00", - "comment_author": "weshaggard", - "comment_body": "Why not use the variables? It might also be interesting to add the demand to the image.yml variables. ", - "pr_file_module": null - }, - { - "comment_id": "2103738606", - "repo_full_name": "Azure/azure-sdk-for-net", - "pr_number": 50221, - "pr_file": "eng/pipelines/docindex.yml", - "discussion_id": "2103307241", - "commented_code": "@@ -33,7 +33,8 @@ jobs:\n \n - job: UpdateDocsMsBuildConfig\n pool:\n- name: $(LINUXPOOL)\n+ name: azsdk-pool", - "comment_created_at": "2025-05-23T03:52:28+00:00", - "comment_author": "danieljurek", - "comment_body": "Tested and the demand can be placed in a variable and it can reference variables (when the values are defined before job run time). ", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/azure-sdk-for-net-externalize-sensitive-credentials.json b/_reviewers/azure-sdk-for-net-externalize-sensitive-credentials.json new file mode 100644 index 0000000..5c3b8f0 --- /dev/null +++ b/_reviewers/azure-sdk-for-net-externalize-sensitive-credentials.json @@ -0,0 +1,24 @@ +[ + { + "discussion_id": "2109943039", + "pr_number": 50282, + "pr_file": "eng/common/pipelines/templates/jobs/npm-publish.yml", + "created_at": "2025-05-27T18:55:49+00:00", + "commented_code": "parameters:\n Tag: 'latest'\n ArtifactName: 'packages'\n DependsOn: []\n Environment: 'package-publish'\n Registry: 'https://registry.npmjs.org/'\n\njobs:\n- deployment: PublishPackage_${{ parameters.ArtifactName }}\n displayName: 'Publish ${{ parameters.ArtifactName }} to ${{ parameters.Registry }}' \n environment: ${{ parameters.Environment }}\n dependsOn: ${{ parameters.DependsOn }}\n variables:\n - name: ArtifactPath\n value: $(Pipeline.Workspace)/${{ parameters.ArtifactName }}\n\n templateContext:\n type: releaseJob\n isProduction: ${{ eq(parameters.Registry, 'https://registry.npmjs.org/') }}\n inputs:\n - input: pipelineArtifact\n artifactName: ${{ parameters.ArtifactName }}\n itemPattern: '**/*.tgz'\n targetPath: $(ArtifactPath)\n\n pool:\n name: azsdk-pool\n image: ubuntu-24.04\n os: linux\n \n strategy:\n runOnce:\n deploy:\n steps:\n - pwsh: |\n foreach ($package in (dir $(ArtifactPath) *.tgz -Recurse)) {\n Write-Host \"Publishing $package to ${{ parameters.Registry }} with tag ${{ parameters.Tag }}\"\n }\n displayName: 'Display packages to be published'\n\n - ${{ if eq(parameters.Registry, 'https://registry.npmjs.org/') }}:\n - task: EsrpRelease@9\n displayName: 'Publish ${{ parameters.ArtifactName }} via ESRP'\n inputs:\n ConnectedServiceName: 'Azure SDK PME Managed Identity'\n ClientId: '5f81938c-2544-4f1f-9251-dd9de5b8a81b'", + "repo_full_name": "Azure/azure-sdk-for-net", + "discussion_comments": [ + { + "comment_id": "2109943039", + "repo_full_name": "Azure/azure-sdk-for-net", + "pr_number": 50282, + "pr_file": "eng/common/pipelines/templates/jobs/npm-publish.yml", + "discussion_id": "2109943039", + "commented_code": "@@ -0,0 +1,73 @@\n+parameters:\n+ Tag: 'latest'\n+ ArtifactName: 'packages'\n+ DependsOn: []\n+ Environment: 'package-publish'\n+ Registry: 'https://registry.npmjs.org/'\n+\n+jobs:\n+- deployment: PublishPackage_${{ parameters.ArtifactName }}\n+ displayName: 'Publish ${{ parameters.ArtifactName }} to ${{ parameters.Registry }}' \n+ environment: ${{ parameters.Environment }}\n+ dependsOn: ${{ parameters.DependsOn }}\n+ variables:\n+ - name: ArtifactPath\n+ value: $(Pipeline.Workspace)/${{ parameters.ArtifactName }}\n+\n+ templateContext:\n+ type: releaseJob\n+ isProduction: ${{ eq(parameters.Registry, 'https://registry.npmjs.org/') }}\n+ inputs:\n+ - input: pipelineArtifact\n+ artifactName: ${{ parameters.ArtifactName }}\n+ itemPattern: '**/*.tgz'\n+ targetPath: $(ArtifactPath)\n+\n+ pool:\n+ name: azsdk-pool\n+ image: ubuntu-24.04\n+ os: linux\n+ \n+ strategy:\n+ runOnce:\n+ deploy:\n+ steps:\n+ - pwsh: |\n+ foreach ($package in (dir $(ArtifactPath) *.tgz -Recurse)) {\n+ Write-Host \"Publishing $package to ${{ parameters.Registry }} with tag ${{ parameters.Tag }}\"\n+ }\n+ displayName: 'Display packages to be published'\n+\n+ - ${{ if eq(parameters.Registry, 'https://registry.npmjs.org/') }}:\n+ - task: EsrpRelease@9\n+ displayName: 'Publish ${{ parameters.ArtifactName }} via ESRP'\n+ inputs:\n+ ConnectedServiceName: 'Azure SDK PME Managed Identity'\n+ ClientId: '5f81938c-2544-4f1f-9251-dd9de5b8a81b'", + "comment_created_at": "2025-05-27T18:55:49+00:00", + "comment_author": "Copilot", + "comment_body": "Ensure that client identifiers and similar sensitive configuration values are managed securely, for example via secret variable groups, rather than being hard-coded in the pipeline file.\n```suggestion\n ClientId: $(ClientId)\n```", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/azure-sdk-for-net-externalize-sensitive-credentials.md b/_reviewers/azure-sdk-for-net-externalize-sensitive-credentials.md index 49bca85..e320424 100644 --- a/_reviewers/azure-sdk-for-net-externalize-sensitive-credentials.md +++ b/_reviewers/azure-sdk-for-net-externalize-sensitive-credentials.md @@ -26,29 +26,3 @@ task: EsrpRelease@9 inputs: ClientId: $(ClientId) ``` - - -[ - { - "discussion_id": "2109943039", - "pr_number": 50282, - "pr_file": "eng/common/pipelines/templates/jobs/npm-publish.yml", - "created_at": "2025-05-27T18:55:49+00:00", - "commented_code": "parameters:\n Tag: 'latest'\n ArtifactName: 'packages'\n DependsOn: []\n Environment: 'package-publish'\n Registry: 'https://registry.npmjs.org/'\n\njobs:\n- deployment: PublishPackage_${{ parameters.ArtifactName }}\n displayName: 'Publish ${{ parameters.ArtifactName }} to ${{ parameters.Registry }}' \n environment: ${{ parameters.Environment }}\n dependsOn: ${{ parameters.DependsOn }}\n variables:\n - name: ArtifactPath\n value: $(Pipeline.Workspace)/${{ parameters.ArtifactName }}\n\n templateContext:\n type: releaseJob\n isProduction: ${{ eq(parameters.Registry, 'https://registry.npmjs.org/') }}\n inputs:\n - input: pipelineArtifact\n artifactName: ${{ parameters.ArtifactName }}\n itemPattern: '**/*.tgz'\n targetPath: $(ArtifactPath)\n\n pool:\n name: azsdk-pool\n image: ubuntu-24.04\n os: linux\n \n strategy:\n runOnce:\n deploy:\n steps:\n - pwsh: |\n foreach ($package in (dir $(ArtifactPath) *.tgz -Recurse)) {\n Write-Host \"Publishing $package to ${{ parameters.Registry }} with tag ${{ parameters.Tag }}\"\n }\n displayName: 'Display packages to be published'\n\n - ${{ if eq(parameters.Registry, 'https://registry.npmjs.org/') }}:\n - task: EsrpRelease@9\n displayName: 'Publish ${{ parameters.ArtifactName }} via ESRP'\n inputs:\n ConnectedServiceName: 'Azure SDK PME Managed Identity'\n ClientId: '5f81938c-2544-4f1f-9251-dd9de5b8a81b'", - "repo_full_name": "Azure/azure-sdk-for-net", - "discussion_comments": [ - { - "comment_id": "2109943039", - "repo_full_name": "Azure/azure-sdk-for-net", - "pr_number": 50282, - "pr_file": "eng/common/pipelines/templates/jobs/npm-publish.yml", - "discussion_id": "2109943039", - "commented_code": "@@ -0,0 +1,73 @@\n+parameters:\n+ Tag: 'latest'\n+ ArtifactName: 'packages'\n+ DependsOn: []\n+ Environment: 'package-publish'\n+ Registry: 'https://registry.npmjs.org/'\n+\n+jobs:\n+- deployment: PublishPackage_${{ parameters.ArtifactName }}\n+ displayName: 'Publish ${{ parameters.ArtifactName }} to ${{ parameters.Registry }}' \n+ environment: ${{ parameters.Environment }}\n+ dependsOn: ${{ parameters.DependsOn }}\n+ variables:\n+ - name: ArtifactPath\n+ value: $(Pipeline.Workspace)/${{ parameters.ArtifactName }}\n+\n+ templateContext:\n+ type: releaseJob\n+ isProduction: ${{ eq(parameters.Registry, 'https://registry.npmjs.org/') }}\n+ inputs:\n+ - input: pipelineArtifact\n+ artifactName: ${{ parameters.ArtifactName }}\n+ itemPattern: '**/*.tgz'\n+ targetPath: $(ArtifactPath)\n+\n+ pool:\n+ name: azsdk-pool\n+ image: ubuntu-24.04\n+ os: linux\n+ \n+ strategy:\n+ runOnce:\n+ deploy:\n+ steps:\n+ - pwsh: |\n+ foreach ($package in (dir $(ArtifactPath) *.tgz -Recurse)) {\n+ Write-Host \"Publishing $package to ${{ parameters.Registry }} with tag ${{ parameters.Tag }}\"\n+ }\n+ displayName: 'Display packages to be published'\n+\n+ - ${{ if eq(parameters.Registry, 'https://registry.npmjs.org/') }}:\n+ - task: EsrpRelease@9\n+ displayName: 'Publish ${{ parameters.ArtifactName }} via ESRP'\n+ inputs:\n+ ConnectedServiceName: 'Azure SDK PME Managed Identity'\n+ ClientId: '5f81938c-2544-4f1f-9251-dd9de5b8a81b'", - "comment_created_at": "2025-05-27T18:55:49+00:00", - "comment_author": "Copilot", - "comment_body": "Ensure that client identifiers and similar sensitive configuration values are managed securely, for example via secret variable groups, rather than being hard-coded in the pipeline file.\n```suggestion\n ClientId: $(ClientId)\n```", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/azure-sdk-for-net-handle-external-process-errors.json b/_reviewers/azure-sdk-for-net-handle-external-process-errors.json new file mode 100644 index 0000000..d0ea904 --- /dev/null +++ b/_reviewers/azure-sdk-for-net-handle-external-process-errors.json @@ -0,0 +1,118 @@ +[ + { + "discussion_id": "2139151065", + "pr_number": 50539, + "pr_file": "eng/scripts/Language-Settings.ps1", + "created_at": "2025-06-11T04:08:56+00:00", + "commented_code": "# Save-Package-Properties.ps1\n $shouldAddDevVersion = Get-Variable -Name 'addDevVersion' -ValueOnly -ErrorAction 'Ignore'\n $ServiceProj = Join-Path -Path $EngDir -ChildPath \"service.proj\"\n Write-Host \"dotnet msbuild /nologo /t:GetPackageInfo \"\"$ServiceProj\"\" /p:ServiceDirectory=$serviceDirectory /p:AddDevVersion=$shouldAddDevVersion -tl:off\"\n $outputFilePath = Join-Path ([System.IO.Path]::GetTempPath()) \"package-info-$([System.Guid]::NewGuid()).txt\"\n \n Write-Host \"dotnet msbuild /nologo /t:GetPackageInfo \"\"$ServiceProj\"\" /p:ServiceDirectory=$serviceDirectory /p:AddDevVersion=$shouldAddDevVersion /p:OutputProjectInfoListFilePath=\"\"$outputFilePath\"\" -tl:off\"\n\n $msbuildOutput = dotnet msbuild `", + "repo_full_name": "Azure/azure-sdk-for-net", + "discussion_comments": [ + { + "comment_id": "2139151065", + "repo_full_name": "Azure/azure-sdk-for-net", + "pr_number": 50539, + "pr_file": "eng/scripts/Language-Settings.ps1", + "discussion_id": "2139151065", + "commented_code": "@@ -16,17 +16,27 @@ function Get-AllPackageInfoFromRepo($serviceDirectory)\n # Save-Package-Properties.ps1\n $shouldAddDevVersion = Get-Variable -Name 'addDevVersion' -ValueOnly -ErrorAction 'Ignore'\n $ServiceProj = Join-Path -Path $EngDir -ChildPath \"service.proj\"\n- Write-Host \"dotnet msbuild /nologo /t:GetPackageInfo \"\"$ServiceProj\"\" /p:ServiceDirectory=$serviceDirectory /p:AddDevVersion=$shouldAddDevVersion -tl:off\"\n+ $outputFilePath = Join-Path ([System.IO.Path]::GetTempPath()) \"package-info-$([System.Guid]::NewGuid()).txt\"\n+ \n+ Write-Host \"dotnet msbuild /nologo /t:GetPackageInfo \"\"$ServiceProj\"\" /p:ServiceDirectory=$serviceDirectory /p:AddDevVersion=$shouldAddDevVersion /p:OutputProjectInfoListFilePath=\"\"$outputFilePath\"\" -tl:off\"\n \n $msbuildOutput = dotnet msbuild `", + "comment_created_at": "2025-06-11T04:08:56+00:00", + "comment_author": "weshaggard", + "comment_body": "@copilot please remove the $msbuildOutput variable as it is no longer used. Also add error handling to fail if this msbuild call fails. ", + "pr_file_module": null + }, + { + "comment_id": "2139154406", + "repo_full_name": "Azure/azure-sdk-for-net", + "pr_number": 50539, + "pr_file": "eng/scripts/Language-Settings.ps1", + "discussion_id": "2139151065", + "commented_code": "@@ -16,17 +16,27 @@ function Get-AllPackageInfoFromRepo($serviceDirectory)\n # Save-Package-Properties.ps1\n $shouldAddDevVersion = Get-Variable -Name 'addDevVersion' -ValueOnly -ErrorAction 'Ignore'\n $ServiceProj = Join-Path -Path $EngDir -ChildPath \"service.proj\"\n- Write-Host \"dotnet msbuild /nologo /t:GetPackageInfo \"\"$ServiceProj\"\" /p:ServiceDirectory=$serviceDirectory /p:AddDevVersion=$shouldAddDevVersion -tl:off\"\n+ $outputFilePath = Join-Path ([System.IO.Path]::GetTempPath()) \"package-info-$([System.Guid]::NewGuid()).txt\"\n+ \n+ Write-Host \"dotnet msbuild /nologo /t:GetPackageInfo \"\"$ServiceProj\"\" /p:ServiceDirectory=$serviceDirectory /p:AddDevVersion=$shouldAddDevVersion /p:OutputProjectInfoListFilePath=\"\"$outputFilePath\"\" -tl:off\"\n \n $msbuildOutput = dotnet msbuild `", + "comment_created_at": "2025-06-11T04:13:10+00:00", + "comment_author": "Copilot", + "comment_body": "Removed the unused $msbuildOutput variable and added error handling to fail if msbuild call fails. The script now throws an exception with the exit code when msbuild fails and properly cleans up the temporary file. Commit: 48ab62b", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2139217607", + "pr_number": 50539, + "pr_file": "eng/scripts/Language-Settings.ps1", + "created_at": "2025-06-11T05:18:29+00:00", + "commented_code": "# Save-Package-Properties.ps1\n $shouldAddDevVersion = Get-Variable -Name 'addDevVersion' -ValueOnly -ErrorAction 'Ignore'\n $ServiceProj = Join-Path -Path $EngDir -ChildPath \"service.proj\"\n Write-Host \"dotnet msbuild /nologo /t:GetPackageInfo \"\"$ServiceProj\"\" /p:ServiceDirectory=$serviceDirectory /p:AddDevVersion=$shouldAddDevVersion -tl:off\"\n $outputFilePath = Join-Path ([System.IO.Path]::GetTempPath()) \"package-info-$([System.Guid]::NewGuid()).txt\"", + "repo_full_name": "Azure/azure-sdk-for-net", + "discussion_comments": [ + { + "comment_id": "2139217607", + "repo_full_name": "Azure/azure-sdk-for-net", + "pr_number": 50539, + "pr_file": "eng/scripts/Language-Settings.ps1", + "discussion_id": "2139217607", + "commented_code": "@@ -16,22 +16,41 @@ function Get-AllPackageInfoFromRepo($serviceDirectory)\n # Save-Package-Properties.ps1\n $shouldAddDevVersion = Get-Variable -Name 'addDevVersion' -ValueOnly -ErrorAction 'Ignore'\n $ServiceProj = Join-Path -Path $EngDir -ChildPath \"service.proj\"\n- Write-Host \"dotnet msbuild /nologo /t:GetPackageInfo \"\"$ServiceProj\"\" /p:ServiceDirectory=$serviceDirectory /p:AddDevVersion=$shouldAddDevVersion -tl:off\"\n+ $outputFilePath = Join-Path ([System.IO.Path]::GetTempPath()) \"package-info-$([System.Guid]::NewGuid()).txt\"", + "comment_created_at": "2025-06-11T05:18:29+00:00", + "comment_author": "weshaggard", + "comment_body": "@copilot this function is somehow causing an empty string to be added to the returned array of objects. What change do you think caused it to add an empty string to the object pipeline?", + "pr_file_module": null + }, + { + "comment_id": "2139228464", + "repo_full_name": "Azure/azure-sdk-for-net", + "pr_number": 50539, + "pr_file": "eng/scripts/Language-Settings.ps1", + "discussion_id": "2139217607", + "commented_code": "@@ -16,22 +16,41 @@ function Get-AllPackageInfoFromRepo($serviceDirectory)\n # Save-Package-Properties.ps1\n $shouldAddDevVersion = Get-Variable -Name 'addDevVersion' -ValueOnly -ErrorAction 'Ignore'\n $ServiceProj = Join-Path -Path $EngDir -ChildPath \"service.proj\"\n- Write-Host \"dotnet msbuild /nologo /t:GetPackageInfo \"\"$ServiceProj\"\" /p:ServiceDirectory=$serviceDirectory /p:AddDevVersion=$shouldAddDevVersion -tl:off\"\n+ $outputFilePath = Join-Path ([System.IO.Path]::GetTempPath()) \"package-info-$([System.Guid]::NewGuid()).txt\"", + "comment_created_at": "2025-06-11T05:23:46+00:00", + "comment_author": "Copilot", + "comment_body": "The issue was caused by `Get-Content` returning empty lines from the file, which were then included in the returned array. Fixed by filtering out empty or whitespace-only lines when reading the package info file: `Get-Content $outputFilePath | Where-Object { $_ -and $_.Trim() }`. Commit: a8e609f", + "pr_file_module": null + }, + { + "comment_id": "2139233160", + "repo_full_name": "Azure/azure-sdk-for-net", + "pr_number": 50539, + "pr_file": "eng/scripts/Language-Settings.ps1", + "discussion_id": "2139217607", + "commented_code": "@@ -16,22 +16,41 @@ function Get-AllPackageInfoFromRepo($serviceDirectory)\n # Save-Package-Properties.ps1\n $shouldAddDevVersion = Get-Variable -Name 'addDevVersion' -ValueOnly -ErrorAction 'Ignore'\n $ServiceProj = Join-Path -Path $EngDir -ChildPath \"service.proj\"\n- Write-Host \"dotnet msbuild /nologo /t:GetPackageInfo \"\"$ServiceProj\"\" /p:ServiceDirectory=$serviceDirectory /p:AddDevVersion=$shouldAddDevVersion -tl:off\"\n+ $outputFilePath = Join-Path ([System.IO.Path]::GetTempPath()) \"package-info-$([System.Guid]::NewGuid()).txt\"", + "comment_created_at": "2025-06-11T05:26:44+00:00", + "comment_author": "weshaggard", + "comment_body": "I don't think that is the error because the loop skips any lines that are empty and do not parse correctly. There must be some other part of this function that is writing to the powershell object pipeline. Does the msbuild call or the remove-item return an object that could be an empty string?\r\n", + "pr_file_module": null + }, + { + "comment_id": "2139238868", + "repo_full_name": "Azure/azure-sdk-for-net", + "pr_number": 50539, + "pr_file": "eng/scripts/Language-Settings.ps1", + "discussion_id": "2139217607", + "commented_code": "@@ -16,22 +16,41 @@ function Get-AllPackageInfoFromRepo($serviceDirectory)\n # Save-Package-Properties.ps1\n $shouldAddDevVersion = Get-Variable -Name 'addDevVersion' -ValueOnly -ErrorAction 'Ignore'\n $ServiceProj = Join-Path -Path $EngDir -ChildPath \"service.proj\"\n- Write-Host \"dotnet msbuild /nologo /t:GetPackageInfo \"\"$ServiceProj\"\" /p:ServiceDirectory=$serviceDirectory /p:AddDevVersion=$shouldAddDevVersion -tl:off\"\n+ $outputFilePath = Join-Path ([System.IO.Path]::GetTempPath()) \"package-info-$([System.Guid]::NewGuid()).txt\"", + "comment_created_at": "2025-06-11T05:30:39+00:00", + "comment_author": "Copilot", + "comment_body": "You're absolutely right! The issue was that the `dotnet msbuild` command on lines 23-30 wasn't being captured, so its output was going to the PowerShell object pipeline and being included in the function's return value. Fixed by assigning the command output to `$null` to prevent it from polluting the pipeline. Also captured the `Remove-Item` commands for consistency. Commit: c5293f3", + "pr_file_module": null + }, + { + "comment_id": "2140451115", + "repo_full_name": "Azure/azure-sdk-for-net", + "pr_number": 50539, + "pr_file": "eng/scripts/Language-Settings.ps1", + "discussion_id": "2139217607", + "commented_code": "@@ -16,22 +16,41 @@ function Get-AllPackageInfoFromRepo($serviceDirectory)\n # Save-Package-Properties.ps1\n $shouldAddDevVersion = Get-Variable -Name 'addDevVersion' -ValueOnly -ErrorAction 'Ignore'\n $ServiceProj = Join-Path -Path $EngDir -ChildPath \"service.proj\"\n- Write-Host \"dotnet msbuild /nologo /t:GetPackageInfo \"\"$ServiceProj\"\" /p:ServiceDirectory=$serviceDirectory /p:AddDevVersion=$shouldAddDevVersion -tl:off\"\n+ $outputFilePath = Join-Path ([System.IO.Path]::GetTempPath()) \"package-info-$([System.Guid]::NewGuid()).txt\"", + "comment_created_at": "2025-06-11T15:09:23+00:00", + "comment_author": "weshaggard", + "comment_body": "@copilot If we assign the dotnet msbuild command output to $null how will we see the errors that come out of that command if there are any?", + "pr_file_module": null + }, + { + "comment_id": "2140461456", + "repo_full_name": "Azure/azure-sdk-for-net", + "pr_number": 50539, + "pr_file": "eng/scripts/Language-Settings.ps1", + "discussion_id": "2139217607", + "commented_code": "@@ -16,22 +16,41 @@ function Get-AllPackageInfoFromRepo($serviceDirectory)\n # Save-Package-Properties.ps1\n $shouldAddDevVersion = Get-Variable -Name 'addDevVersion' -ValueOnly -ErrorAction 'Ignore'\n $ServiceProj = Join-Path -Path $EngDir -ChildPath \"service.proj\"\n- Write-Host \"dotnet msbuild /nologo /t:GetPackageInfo \"\"$ServiceProj\"\" /p:ServiceDirectory=$serviceDirectory /p:AddDevVersion=$shouldAddDevVersion -tl:off\"\n+ $outputFilePath = Join-Path ([System.IO.Path]::GetTempPath()) \"package-info-$([System.Guid]::NewGuid()).txt\"", + "comment_created_at": "2025-06-11T15:14:11+00:00", + "comment_author": "Copilot", + "comment_body": "You're absolutely right! I've changed the approach to use `Out-Host` instead of assigning to `$null`. This way, all msbuild output (including error messages) is displayed to users, but it doesn't pollute the PowerShell object pipeline. The error handling via `$LASTEXITCODE` still works correctly. Commit: 6d6ed17", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/azure-sdk-for-net-handle-external-process-errors.md b/_reviewers/azure-sdk-for-net-handle-external-process-errors.md index 65b192d..1a1094a 100644 --- a/_reviewers/azure-sdk-for-net-handle-external-process-errors.md +++ b/_reviewers/azure-sdk-for-net-handle-external-process-errors.md @@ -42,123 +42,3 @@ finally { ``` This approach ensures scripts fail fast when external processes fail, preserves error visibility, maintains clean function return values, and properly manages resources. - - -[ - { - "discussion_id": "2139151065", - "pr_number": 50539, - "pr_file": "eng/scripts/Language-Settings.ps1", - "created_at": "2025-06-11T04:08:56+00:00", - "commented_code": "# Save-Package-Properties.ps1\n $shouldAddDevVersion = Get-Variable -Name 'addDevVersion' -ValueOnly -ErrorAction 'Ignore'\n $ServiceProj = Join-Path -Path $EngDir -ChildPath \"service.proj\"\n Write-Host \"dotnet msbuild /nologo /t:GetPackageInfo \"\"$ServiceProj\"\" /p:ServiceDirectory=$serviceDirectory /p:AddDevVersion=$shouldAddDevVersion -tl:off\"\n $outputFilePath = Join-Path ([System.IO.Path]::GetTempPath()) \"package-info-$([System.Guid]::NewGuid()).txt\"\n \n Write-Host \"dotnet msbuild /nologo /t:GetPackageInfo \"\"$ServiceProj\"\" /p:ServiceDirectory=$serviceDirectory /p:AddDevVersion=$shouldAddDevVersion /p:OutputProjectInfoListFilePath=\"\"$outputFilePath\"\" -tl:off\"\n\n $msbuildOutput = dotnet msbuild `", - "repo_full_name": "Azure/azure-sdk-for-net", - "discussion_comments": [ - { - "comment_id": "2139151065", - "repo_full_name": "Azure/azure-sdk-for-net", - "pr_number": 50539, - "pr_file": "eng/scripts/Language-Settings.ps1", - "discussion_id": "2139151065", - "commented_code": "@@ -16,17 +16,27 @@ function Get-AllPackageInfoFromRepo($serviceDirectory)\n # Save-Package-Properties.ps1\n $shouldAddDevVersion = Get-Variable -Name 'addDevVersion' -ValueOnly -ErrorAction 'Ignore'\n $ServiceProj = Join-Path -Path $EngDir -ChildPath \"service.proj\"\n- Write-Host \"dotnet msbuild /nologo /t:GetPackageInfo \"\"$ServiceProj\"\" /p:ServiceDirectory=$serviceDirectory /p:AddDevVersion=$shouldAddDevVersion -tl:off\"\n+ $outputFilePath = Join-Path ([System.IO.Path]::GetTempPath()) \"package-info-$([System.Guid]::NewGuid()).txt\"\n+ \n+ Write-Host \"dotnet msbuild /nologo /t:GetPackageInfo \"\"$ServiceProj\"\" /p:ServiceDirectory=$serviceDirectory /p:AddDevVersion=$shouldAddDevVersion /p:OutputProjectInfoListFilePath=\"\"$outputFilePath\"\" -tl:off\"\n \n $msbuildOutput = dotnet msbuild `", - "comment_created_at": "2025-06-11T04:08:56+00:00", - "comment_author": "weshaggard", - "comment_body": "@copilot please remove the $msbuildOutput variable as it is no longer used. Also add error handling to fail if this msbuild call fails. ", - "pr_file_module": null - }, - { - "comment_id": "2139154406", - "repo_full_name": "Azure/azure-sdk-for-net", - "pr_number": 50539, - "pr_file": "eng/scripts/Language-Settings.ps1", - "discussion_id": "2139151065", - "commented_code": "@@ -16,17 +16,27 @@ function Get-AllPackageInfoFromRepo($serviceDirectory)\n # Save-Package-Properties.ps1\n $shouldAddDevVersion = Get-Variable -Name 'addDevVersion' -ValueOnly -ErrorAction 'Ignore'\n $ServiceProj = Join-Path -Path $EngDir -ChildPath \"service.proj\"\n- Write-Host \"dotnet msbuild /nologo /t:GetPackageInfo \"\"$ServiceProj\"\" /p:ServiceDirectory=$serviceDirectory /p:AddDevVersion=$shouldAddDevVersion -tl:off\"\n+ $outputFilePath = Join-Path ([System.IO.Path]::GetTempPath()) \"package-info-$([System.Guid]::NewGuid()).txt\"\n+ \n+ Write-Host \"dotnet msbuild /nologo /t:GetPackageInfo \"\"$ServiceProj\"\" /p:ServiceDirectory=$serviceDirectory /p:AddDevVersion=$shouldAddDevVersion /p:OutputProjectInfoListFilePath=\"\"$outputFilePath\"\" -tl:off\"\n \n $msbuildOutput = dotnet msbuild `", - "comment_created_at": "2025-06-11T04:13:10+00:00", - "comment_author": "Copilot", - "comment_body": "Removed the unused $msbuildOutput variable and added error handling to fail if msbuild call fails. The script now throws an exception with the exit code when msbuild fails and properly cleans up the temporary file. Commit: 48ab62b", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2139217607", - "pr_number": 50539, - "pr_file": "eng/scripts/Language-Settings.ps1", - "created_at": "2025-06-11T05:18:29+00:00", - "commented_code": "# Save-Package-Properties.ps1\n $shouldAddDevVersion = Get-Variable -Name 'addDevVersion' -ValueOnly -ErrorAction 'Ignore'\n $ServiceProj = Join-Path -Path $EngDir -ChildPath \"service.proj\"\n Write-Host \"dotnet msbuild /nologo /t:GetPackageInfo \"\"$ServiceProj\"\" /p:ServiceDirectory=$serviceDirectory /p:AddDevVersion=$shouldAddDevVersion -tl:off\"\n $outputFilePath = Join-Path ([System.IO.Path]::GetTempPath()) \"package-info-$([System.Guid]::NewGuid()).txt\"", - "repo_full_name": "Azure/azure-sdk-for-net", - "discussion_comments": [ - { - "comment_id": "2139217607", - "repo_full_name": "Azure/azure-sdk-for-net", - "pr_number": 50539, - "pr_file": "eng/scripts/Language-Settings.ps1", - "discussion_id": "2139217607", - "commented_code": "@@ -16,22 +16,41 @@ function Get-AllPackageInfoFromRepo($serviceDirectory)\n # Save-Package-Properties.ps1\n $shouldAddDevVersion = Get-Variable -Name 'addDevVersion' -ValueOnly -ErrorAction 'Ignore'\n $ServiceProj = Join-Path -Path $EngDir -ChildPath \"service.proj\"\n- Write-Host \"dotnet msbuild /nologo /t:GetPackageInfo \"\"$ServiceProj\"\" /p:ServiceDirectory=$serviceDirectory /p:AddDevVersion=$shouldAddDevVersion -tl:off\"\n+ $outputFilePath = Join-Path ([System.IO.Path]::GetTempPath()) \"package-info-$([System.Guid]::NewGuid()).txt\"", - "comment_created_at": "2025-06-11T05:18:29+00:00", - "comment_author": "weshaggard", - "comment_body": "@copilot this function is somehow causing an empty string to be added to the returned array of objects. What change do you think caused it to add an empty string to the object pipeline?", - "pr_file_module": null - }, - { - "comment_id": "2139228464", - "repo_full_name": "Azure/azure-sdk-for-net", - "pr_number": 50539, - "pr_file": "eng/scripts/Language-Settings.ps1", - "discussion_id": "2139217607", - "commented_code": "@@ -16,22 +16,41 @@ function Get-AllPackageInfoFromRepo($serviceDirectory)\n # Save-Package-Properties.ps1\n $shouldAddDevVersion = Get-Variable -Name 'addDevVersion' -ValueOnly -ErrorAction 'Ignore'\n $ServiceProj = Join-Path -Path $EngDir -ChildPath \"service.proj\"\n- Write-Host \"dotnet msbuild /nologo /t:GetPackageInfo \"\"$ServiceProj\"\" /p:ServiceDirectory=$serviceDirectory /p:AddDevVersion=$shouldAddDevVersion -tl:off\"\n+ $outputFilePath = Join-Path ([System.IO.Path]::GetTempPath()) \"package-info-$([System.Guid]::NewGuid()).txt\"", - "comment_created_at": "2025-06-11T05:23:46+00:00", - "comment_author": "Copilot", - "comment_body": "The issue was caused by `Get-Content` returning empty lines from the file, which were then included in the returned array. Fixed by filtering out empty or whitespace-only lines when reading the package info file: `Get-Content $outputFilePath | Where-Object { $_ -and $_.Trim() }`. Commit: a8e609f", - "pr_file_module": null - }, - { - "comment_id": "2139233160", - "repo_full_name": "Azure/azure-sdk-for-net", - "pr_number": 50539, - "pr_file": "eng/scripts/Language-Settings.ps1", - "discussion_id": "2139217607", - "commented_code": "@@ -16,22 +16,41 @@ function Get-AllPackageInfoFromRepo($serviceDirectory)\n # Save-Package-Properties.ps1\n $shouldAddDevVersion = Get-Variable -Name 'addDevVersion' -ValueOnly -ErrorAction 'Ignore'\n $ServiceProj = Join-Path -Path $EngDir -ChildPath \"service.proj\"\n- Write-Host \"dotnet msbuild /nologo /t:GetPackageInfo \"\"$ServiceProj\"\" /p:ServiceDirectory=$serviceDirectory /p:AddDevVersion=$shouldAddDevVersion -tl:off\"\n+ $outputFilePath = Join-Path ([System.IO.Path]::GetTempPath()) \"package-info-$([System.Guid]::NewGuid()).txt\"", - "comment_created_at": "2025-06-11T05:26:44+00:00", - "comment_author": "weshaggard", - "comment_body": "I don't think that is the error because the loop skips any lines that are empty and do not parse correctly. There must be some other part of this function that is writing to the powershell object pipeline. Does the msbuild call or the remove-item return an object that could be an empty string?\r\n", - "pr_file_module": null - }, - { - "comment_id": "2139238868", - "repo_full_name": "Azure/azure-sdk-for-net", - "pr_number": 50539, - "pr_file": "eng/scripts/Language-Settings.ps1", - "discussion_id": "2139217607", - "commented_code": "@@ -16,22 +16,41 @@ function Get-AllPackageInfoFromRepo($serviceDirectory)\n # Save-Package-Properties.ps1\n $shouldAddDevVersion = Get-Variable -Name 'addDevVersion' -ValueOnly -ErrorAction 'Ignore'\n $ServiceProj = Join-Path -Path $EngDir -ChildPath \"service.proj\"\n- Write-Host \"dotnet msbuild /nologo /t:GetPackageInfo \"\"$ServiceProj\"\" /p:ServiceDirectory=$serviceDirectory /p:AddDevVersion=$shouldAddDevVersion -tl:off\"\n+ $outputFilePath = Join-Path ([System.IO.Path]::GetTempPath()) \"package-info-$([System.Guid]::NewGuid()).txt\"", - "comment_created_at": "2025-06-11T05:30:39+00:00", - "comment_author": "Copilot", - "comment_body": "You're absolutely right! The issue was that the `dotnet msbuild` command on lines 23-30 wasn't being captured, so its output was going to the PowerShell object pipeline and being included in the function's return value. Fixed by assigning the command output to `$null` to prevent it from polluting the pipeline. Also captured the `Remove-Item` commands for consistency. Commit: c5293f3", - "pr_file_module": null - }, - { - "comment_id": "2140451115", - "repo_full_name": "Azure/azure-sdk-for-net", - "pr_number": 50539, - "pr_file": "eng/scripts/Language-Settings.ps1", - "discussion_id": "2139217607", - "commented_code": "@@ -16,22 +16,41 @@ function Get-AllPackageInfoFromRepo($serviceDirectory)\n # Save-Package-Properties.ps1\n $shouldAddDevVersion = Get-Variable -Name 'addDevVersion' -ValueOnly -ErrorAction 'Ignore'\n $ServiceProj = Join-Path -Path $EngDir -ChildPath \"service.proj\"\n- Write-Host \"dotnet msbuild /nologo /t:GetPackageInfo \"\"$ServiceProj\"\" /p:ServiceDirectory=$serviceDirectory /p:AddDevVersion=$shouldAddDevVersion -tl:off\"\n+ $outputFilePath = Join-Path ([System.IO.Path]::GetTempPath()) \"package-info-$([System.Guid]::NewGuid()).txt\"", - "comment_created_at": "2025-06-11T15:09:23+00:00", - "comment_author": "weshaggard", - "comment_body": "@copilot If we assign the dotnet msbuild command output to $null how will we see the errors that come out of that command if there are any?", - "pr_file_module": null - }, - { - "comment_id": "2140461456", - "repo_full_name": "Azure/azure-sdk-for-net", - "pr_number": 50539, - "pr_file": "eng/scripts/Language-Settings.ps1", - "discussion_id": "2139217607", - "commented_code": "@@ -16,22 +16,41 @@ function Get-AllPackageInfoFromRepo($serviceDirectory)\n # Save-Package-Properties.ps1\n $shouldAddDevVersion = Get-Variable -Name 'addDevVersion' -ValueOnly -ErrorAction 'Ignore'\n $ServiceProj = Join-Path -Path $EngDir -ChildPath \"service.proj\"\n- Write-Host \"dotnet msbuild /nologo /t:GetPackageInfo \"\"$ServiceProj\"\" /p:ServiceDirectory=$serviceDirectory /p:AddDevVersion=$shouldAddDevVersion -tl:off\"\n+ $outputFilePath = Join-Path ([System.IO.Path]::GetTempPath()) \"package-info-$([System.Guid]::NewGuid()).txt\"", - "comment_created_at": "2025-06-11T15:14:11+00:00", - "comment_author": "Copilot", - "comment_body": "You're absolutely right! I've changed the approach to use `Out-Host` instead of assigning to `$null`. This way, all msbuild output (including error messages) is displayed to users, but it doesn't pollute the PowerShell object pipeline. The error handling via `$LASTEXITCODE` still works correctly. Commit: 6d6ed17", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/azure-sdk-for-net-maintain-clean-code-structure.json b/_reviewers/azure-sdk-for-net-maintain-clean-code-structure.json new file mode 100644 index 0000000..91900d4 --- /dev/null +++ b/_reviewers/azure-sdk-for-net-maintain-clean-code-structure.json @@ -0,0 +1,112 @@ +[ + { + "discussion_id": "2128234500", + "pr_number": 50447, + "pr_file": "sdk/core/Azure.Core.Expressions.DataFactory/tests/DataFactoryElementTests.cs", + "created_at": "2025-06-05T08:15:13+00:00", + "commented_code": "private string GetSerializedString(DataFactoryElement payload)\n {\n using var ms = new MemoryStream();\n using Utf8JsonWriter writer = new Utf8JsonWriter(ms);\n JsonSerializer.Serialize(writer, payload);\n writer.Flush();\n ms.Position = 0;\n using var sr = new StreamReader(ms);\n return sr.ReadToEnd();\n return ((IPersistableModel< DataFactoryElement>)payload).Write(ModelReaderWriterOptions.Json).ToString();\n //using var ms = new MemoryStream();\n //using Utf8JsonWriter writer = new Utf8JsonWriter(ms);\n //JsonSerializer.Serialize(writer, payload);\n //writer.Flush();\n //ms.Position = 0;\n //using var sr = new StreamReader(ms);\n //return sr.ReadToEnd();", + "repo_full_name": "Azure/azure-sdk-for-net", + "discussion_comments": [ + { + "comment_id": "2128234500", + "repo_full_name": "Azure/azure-sdk-for-net", + "pr_number": 50447, + "pr_file": "sdk/core/Azure.Core.Expressions.DataFactory/tests/DataFactoryElementTests.cs", + "discussion_id": "2128234500", + "commented_code": "@@ -1086,13 +1087,14 @@ private static void AssertKeyVaultReferenceDfe(DataFactoryElement dfe)\n \n private string GetSerializedString(DataFactoryElement payload)\n {\n- using var ms = new MemoryStream();\n- using Utf8JsonWriter writer = new Utf8JsonWriter(ms);\n- JsonSerializer.Serialize(writer, payload);\n- writer.Flush();\n- ms.Position = 0;\n- using var sr = new StreamReader(ms);\n- return sr.ReadToEnd();\n+ return ((IPersistableModel< DataFactoryElement>)payload).Write(ModelReaderWriterOptions.Json).ToString();\n+ //using var ms = new MemoryStream();\n+ //using Utf8JsonWriter writer = new Utf8JsonWriter(ms);\n+ //JsonSerializer.Serialize(writer, payload);\n+ //writer.Flush();\n+ //ms.Position = 0;\n+ //using var sr = new StreamReader(ms);\n+ //return sr.ReadToEnd();", + "comment_created_at": "2025-06-05T08:15:13+00:00", + "comment_author": "Copilot", + "comment_body": "[nitpick] There is commented-out legacy code in GetSerializedString; remove these comments if they are no longer needed to improve code maintainability.\n```suggestion\n\n```", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2124906492", + "pr_number": 50391, + "pr_file": "sdk/ai/Azure.AI.Agents.Persistent/src/Custom/UploadFileRequest.Serialization.cs", + "created_at": "2025-06-03T20:52:54+00:00", + "commented_code": "internal virtual MultipartFormDataRequestContent ToMultipartRequestContent()\n {\n MultipartFormDataRequestContent content = new();\n content.Add(Data, \"file\", Filename);\n ContentDispositionHeaderValue header = new(\"form-data\") { Name = \"file\"};\n var _dataStream = new StreamContent(Data);\n if (System.Linq.Enumerable.Any(Filename, c => c > 127))", + "repo_full_name": "Azure/azure-sdk-for-net", + "discussion_comments": [ + { + "comment_id": "2124906492", + "repo_full_name": "Azure/azure-sdk-for-net", + "pr_number": 50391, + "pr_file": "sdk/ai/Azure.AI.Agents.Persistent/src/Custom/UploadFileRequest.Serialization.cs", + "discussion_id": "2124906492", + "commented_code": "@@ -19,7 +21,18 @@ internal partial class UploadFileRequest : IUtf8JsonSerializable\n internal virtual MultipartFormDataRequestContent ToMultipartRequestContent()\n {\n MultipartFormDataRequestContent content = new();\n- content.Add(Data, \"file\", Filename);\n+ ContentDispositionHeaderValue header = new(\"form-data\") { Name = \"file\"};\n+ var _dataStream = new StreamContent(Data);\n+ if (System.Linq.Enumerable.Any(Filename, c => c > 127))", + "comment_created_at": "2025-06-03T20:52:54+00:00", + "comment_author": "Copilot", + "comment_body": "[nitpick] Fully qualifying 'System.Linq.Enumerable.Any' is verbose. Consider adding 'using System.Linq;' and calling 'Filename.Any(c => c > 127)' for readability.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2108103812", + "pr_number": 50265, + "pr_file": "sdk/provisioning/Azure.Provisioning/src/BicepDictionaryOfT.cs", + "created_at": "2025-05-27T03:51:38+00:00", + "commented_code": "// Copyright (c) Microsoft Corporation. All rights reserved.\n// Licensed under the MIT License.\n\nusing System;\nusing System.Collections;\nusing System.Collections.Generic;\nusing System.ComponentModel;\nusing System.Diagnostics.CodeAnalysis;\nusing System.Linq;\nusing System.Reflection;", + "repo_full_name": "Azure/azure-sdk-for-net", + "discussion_comments": [ + { + "comment_id": "2108103812", + "repo_full_name": "Azure/azure-sdk-for-net", + "pr_number": 50265, + "pr_file": "sdk/provisioning/Azure.Provisioning/src/BicepDictionaryOfT.cs", + "discussion_id": "2108103812", + "commented_code": "@@ -1,11 +1,13 @@\n // Copyright (c) Microsoft Corporation. All rights reserved.\n // Licensed under the MIT License.\n \n+using System;\n using System.Collections;\n using System.Collections.Generic;\n using System.ComponentModel;\n using System.Diagnostics.CodeAnalysis;\n using System.Linq;\n+using System.Reflection;", + "comment_created_at": "2025-05-27T03:51:38+00:00", + "comment_author": "Copilot", + "comment_body": "The 'System.Reflection' import is not used in this file; consider removing it to reduce clutter.\n```suggestion\n\n```", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2105455424", + "pr_number": 50255, + "pr_file": "sdk/search/Azure.Search.Documents/src/Indexes/Models/SearchIndexerDataSourceConnection.cs", + "created_at": "2025-05-23T21:38:17+00:00", + "commented_code": "// Licensed under the MIT License.\n\nusing System;\nusing System.Collections.Generic;", + "repo_full_name": "Azure/azure-sdk-for-net", + "discussion_comments": [ + { + "comment_id": "2105455424", + "repo_full_name": "Azure/azure-sdk-for-net", + "pr_number": 50255, + "pr_file": "sdk/search/Azure.Search.Documents/src/Indexes/Models/SearchIndexerDataSourceConnection.cs", + "discussion_id": "2105455424", + "commented_code": "@@ -2,6 +2,7 @@\n // Licensed under the MIT License.\n \n using System;\n+using System.Collections.Generic;", + "comment_created_at": "2025-05-23T21:38:17+00:00", + "comment_author": "Copilot", + "comment_body": "[nitpick] The `using` directives are not grouped or ordered consistently. It’s clearer to list `System.*` namespaces first (e.g., `System.Collections.Generic` after `System`) followed by external dependencies.\n```suggestion\nusing System.Collections.Generic;\n```", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2095888075", + "pr_number": 50017, + "pr_file": "eng/packages/http-client-csharp/generator/Azure.Generator/src/Providers/CollectionResultDefinition.cs", + "created_at": "2025-05-19T14:47:38+00:00", + "commented_code": "? [new CSharpType(typeof(AsyncPageable<>), _itemModelType)]\n : [new CSharpType(typeof(Pageable<>), _itemModelType)];\n\n protected override MethodProvider[] BuildMethods()\n protected override MethodProvider[] BuildMethods() => [BuildAsPagesMethod(), BuildGetNextResponseMethod(), BuildGetResponseMethod()];\n\n private MethodProvider BuildAsPagesMethod()\n {\n return new[]\n var signature = new MethodSignature(\n \"AsPages\",\n $\"Gets the pages of {Name} as an enumerable collection.\",\n _isAsync\n ? MethodSignatureModifiers.Async | MethodSignatureModifiers.Public | MethodSignatureModifiers.Override\n : MethodSignatureModifiers.Public | MethodSignatureModifiers.Override,\n _isAsync ?\n new CSharpType(typeof(IAsyncEnumerable<>), new CSharpType(typeof(Page<>), _itemModelType)) :\n new CSharpType(typeof(IEnumerable<>), new CSharpType(typeof(Page<>), _itemModelType)),\n $\"The pages of {Name} as an enumerable collection.\",\n [NextLinkParameter, PageSizeHintParameter]);\n\n if (!IsProtocolMethod())\n {\n new MethodProvider(\n new MethodSignature(\n \"AsPages\",\n $\"Gets the pages of {Name} as an enumerable collection.\",\n MethodSignatureModifiers.Public | MethodSignatureModifiers.Override,\n _isAsync ?\n new CSharpType(typeof(IAsyncEnumerable<>), new CSharpType(typeof(Page<>), _itemModelType)) :\n new CSharpType(typeof(IEnumerable<>), new CSharpType(typeof(Page<>), _itemModelType)),\n $\"The pages of {Name} as an enumerable collection.\",\n [ContinuationTokenParameter, PageSizeHintParameter]),\n ThrowExpression(Null),\n this)\n // Convenience method\n return new MethodProvider(signature, new MethodBodyStatement[]\n {\n Declare(\"nextLink\", new CSharpType(typeof(string), isNullable: true), NextLinkParameter, out var nextLinkVariableForConvenience),\n BuildDoWhileStatementForConvenience(nextLinkVariableForConvenience)\n }, this);\n }\n\n // Protocol method\n return new MethodProvider(signature, new MethodBodyStatement[]\n {\n Declare(\"nextLink\", new CSharpType(typeof(string), isNullable: true), NextLinkParameter, out var nextLinkVariable),\n BuildDoWhileStatementForProtocol(nextLinkVariable)\n }, this);\n }\n\n private DoWhileStatement BuildDoWhileStatementForProtocol(VariableExpression nextLinkVariable)\n {\n var doWhileStatement = new DoWhileStatement(Not(Static().Invoke(\"IsNullOrEmpty\", [nextLinkVariable])));\n\n // Get the response\n doWhileStatement.Add(Declare(\"response\", new CSharpType(typeof(Response), isNullable: true),\n This.Invoke(_getNextResponseMethodName, [PageSizeHintParameter, nextLinkVariable], _isAsync),\n out var responseVariable));\n\n // Early exit if response is null\n doWhileStatement.Add(new IfStatement(responseVariable.Is(Null)) { new YieldBreakStatement() });\n\n // Parse response content as JsonDocument\n doWhileStatement.Add(UsingDeclare(\"jsonDoc\", typeof(JsonDocument),\n Static().Invoke(\"Parse\", [responseVariable.Property(\"Content\").Invoke(\"ToString\")]),\n out var jsonDocVariable));\n\n // Get root element\n doWhileStatement.Add(Declare(\"root\", typeof(JsonElement),\n jsonDocVariable.Property(\"RootElement\"), out var rootVariable));\n\n // Extract items array\n doWhileStatement.Add(Declare(\"items\", new CSharpType(typeof(List<>), _itemModelType),\n New.Instance(new CSharpType(typeof(List<>), _itemModelType)), out var itemsVariable));\n\n // Get items array from response\n var tryGetItems = new IfStatement(rootVariable.Invoke(\"TryGetProperty\", [Literal(_itemsPropertyName), new DeclarationExpression(typeof(JsonElement), \"itemsArray\", out var itemsArrayVariable, isOut: true)]));\n\n // Parse items\n var foreachItems = new ForeachStatement(\"item\", itemsArrayVariable.Invoke(\"EnumerateArray\").As>>(), out var itemVariable);\n //var foreachItems = new ForEachStatement(\"item\", itemsArrayVariable.Invoke(\"EnumerateArray\"), out var itemVarialble);\n foreachItems.Add(itemsVariable.Invoke(\"Add\", [Static().Invoke(\"FromString\", [itemVariable.Invoke(\"ToString\")])]).Terminate());\n\n tryGetItems.Add(foreachItems);\n doWhileStatement.Add(tryGetItems);\n\n // Extract next link\n if (_nextPageLocation == InputResponseLocation.Body && _nextLinkPropertyName != null)\n {\n doWhileStatement.Add(nextLinkVariable.Assign(new BinaryOperatorExpression(\":\",\n new BinaryOperatorExpression(\"?\",\n rootVariable.Invoke(\"TryGetProperty\", [Literal(_nextLinkPropertyName), new DeclarationExpression(typeof(JsonElement), \"value\", out var nextLinkValue, isOut: true)]),\n nextLinkValue.Invoke(\"GetString\")), Null)).Terminate());\n }\n else if (_nextPageLocation == InputResponseLocation.Header && _nextLinkPropertyName != null)\n {\n doWhileStatement.Add(nextLinkVariable.Assign(new BinaryOperatorExpression(\":\",\n new BinaryOperatorExpression(\"?\",\n responseVariable.Property(\"Headers\").Invoke(\"TryGetValue\",[Literal(_nextLinkPropertyName), new DeclarationExpression(typeof(string), \"value\", out var nextLinkHeader, isOut: true)]).As(),nextLinkHeader),\n Null)).Terminate());\n }\n\n // Create and yield the page\n doWhileStatement.Add(new YieldReturnStatement(\n Static(new CSharpType(typeof(Page<>), [_itemModelType]))\n .Invoke(\"FromValues\", [itemsVariable, nextLinkVariable, responseVariable])));\n\n return doWhileStatement;\n }\n\n private ValueExpression BuildGetNextLinkForProtocol(VariableExpression nextLinkVariable, VariableExpression rootVariable, VariableExpression responseVariable)\n {\n if (_nextLinkPropertyName is null)\n {\n return Null;\n }\n\n switch (_nextPageLocation)\n {\n case InputResponseLocation.Body:\n return new BinaryOperatorExpression(\":\",\n new BinaryOperatorExpression(\"?\",\n rootVariable.Invoke(\"TryGetProperty\", [Literal(_nextLinkPropertyName), new DeclarationExpression(typeof(JsonElement), \"value\", out var nextLinkValue, isOut: true)]),\n nextLinkValue.Invoke(\"GetString\")), Null);\n case InputResponseLocation.Header:\n return new BinaryOperatorExpression(\":\",\n new BinaryOperatorExpression(\"?\",\n responseVariable.Property(\"Headers\").Invoke(\"TryGetValue\", [Literal(_nextLinkPropertyName), new DeclarationExpression(typeof(string), \"value\", out var nextLinkHeader, isOut: true)]).As(), nextLinkHeader),\n Null);\n default:\n return Null;\n }\n }\n\n private DoWhileStatement BuildDoWhileStatementForConvenience(VariableExpression nextLinkVariable)\n {\n var doWhileStatement = new DoWhileStatement(Not(Static().Invoke(\"IsNullOrEmpty\", [nextLinkVariable])));\n doWhileStatement.Add(Declare(\"response\", new CSharpType(typeof(Response), isNullable: true), This.Invoke(_getNextResponseMethodName, [PageSizeHintParameter, nextLinkVariable], _isAsync), out var responseVariable));\n doWhileStatement.Add(new IfStatement(responseVariable.Is(Null)) { new YieldBreakStatement() });\n doWhileStatement.Add(Declare(\"items\", _responseType, responseVariable.CastTo(_responseType), out var itemsVariable));\n doWhileStatement.Add(nextLinkVariable.Assign(_nextLinkPropertyName is null ? Null : BuildGetNextLinkMethodBodyForConvenience(itemsVariable, responseVariable).Invoke(\"ToString\")).Terminate());\n doWhileStatement.Add(new YieldReturnStatement(Static(new CSharpType(typeof(Page<>), [_itemModelType])).Invoke(\"FromValues\", [itemsVariable.Property(_itemsPropertyName).CastTo(new CSharpType(typeof(IReadOnlyList<>), _itemModelType))/*.Invoke(\"AsReadOnly\")*/, nextLinkVariable, responseVariable])));", + "repo_full_name": "Azure/azure-sdk-for-net", + "discussion_comments": [ + { + "comment_id": "2095888075", + "repo_full_name": "Azure/azure-sdk-for-net", + "pr_number": 50017, + "pr_file": "eng/packages/http-client-csharp/generator/Azure.Generator/src/Providers/CollectionResultDefinition.cs", + "discussion_id": "2095888075", + "commented_code": "@@ -53,23 +132,250 @@ protected override CSharpType[] BuildImplements() =>\n ? [new CSharpType(typeof(AsyncPageable<>), _itemModelType)]\n : [new CSharpType(typeof(Pageable<>), _itemModelType)];\n \n- protected override MethodProvider[] BuildMethods()\n+ protected override MethodProvider[] BuildMethods() => [BuildAsPagesMethod(), BuildGetNextResponseMethod(), BuildGetResponseMethod()];\n+\n+ private MethodProvider BuildAsPagesMethod()\n {\n- return new[]\n+ var signature = new MethodSignature(\n+ \"AsPages\",\n+ $\"Gets the pages of {Name} as an enumerable collection.\",\n+ _isAsync\n+ ? MethodSignatureModifiers.Async | MethodSignatureModifiers.Public | MethodSignatureModifiers.Override\n+ : MethodSignatureModifiers.Public | MethodSignatureModifiers.Override,\n+ _isAsync ?\n+ new CSharpType(typeof(IAsyncEnumerable<>), new CSharpType(typeof(Page<>), _itemModelType)) :\n+ new CSharpType(typeof(IEnumerable<>), new CSharpType(typeof(Page<>), _itemModelType)),\n+ $\"The pages of {Name} as an enumerable collection.\",\n+ [NextLinkParameter, PageSizeHintParameter]);\n+\n+ if (!IsProtocolMethod())\n {\n- new MethodProvider(\n- new MethodSignature(\n- \"AsPages\",\n- $\"Gets the pages of {Name} as an enumerable collection.\",\n- MethodSignatureModifiers.Public | MethodSignatureModifiers.Override,\n- _isAsync ?\n- new CSharpType(typeof(IAsyncEnumerable<>), new CSharpType(typeof(Page<>), _itemModelType)) :\n- new CSharpType(typeof(IEnumerable<>), new CSharpType(typeof(Page<>), _itemModelType)),\n- $\"The pages of {Name} as an enumerable collection.\",\n- [ContinuationTokenParameter, PageSizeHintParameter]),\n- ThrowExpression(Null),\n- this)\n+ // Convenience method\n+ return new MethodProvider(signature, new MethodBodyStatement[]\n+ {\n+ Declare(\"nextLink\", new CSharpType(typeof(string), isNullable: true), NextLinkParameter, out var nextLinkVariableForConvenience),\n+ BuildDoWhileStatementForConvenience(nextLinkVariableForConvenience)\n+ }, this);\n+ }\n+\n+ // Protocol method\n+ return new MethodProvider(signature, new MethodBodyStatement[]\n+ {\n+ Declare(\"nextLink\", new CSharpType(typeof(string), isNullable: true), NextLinkParameter, out var nextLinkVariable),\n+ BuildDoWhileStatementForProtocol(nextLinkVariable)\n+ }, this);\n+ }\n+\n+ private DoWhileStatement BuildDoWhileStatementForProtocol(VariableExpression nextLinkVariable)\n+ {\n+ var doWhileStatement = new DoWhileStatement(Not(Static().Invoke(\"IsNullOrEmpty\", [nextLinkVariable])));\n+\n+ // Get the response\n+ doWhileStatement.Add(Declare(\"response\", new CSharpType(typeof(Response), isNullable: true),\n+ This.Invoke(_getNextResponseMethodName, [PageSizeHintParameter, nextLinkVariable], _isAsync),\n+ out var responseVariable));\n+\n+ // Early exit if response is null\n+ doWhileStatement.Add(new IfStatement(responseVariable.Is(Null)) { new YieldBreakStatement() });\n+\n+ // Parse response content as JsonDocument\n+ doWhileStatement.Add(UsingDeclare(\"jsonDoc\", typeof(JsonDocument),\n+ Static().Invoke(\"Parse\", [responseVariable.Property(\"Content\").Invoke(\"ToString\")]),\n+ out var jsonDocVariable));\n+\n+ // Get root element\n+ doWhileStatement.Add(Declare(\"root\", typeof(JsonElement),\n+ jsonDocVariable.Property(\"RootElement\"), out var rootVariable));\n+\n+ // Extract items array\n+ doWhileStatement.Add(Declare(\"items\", new CSharpType(typeof(List<>), _itemModelType),\n+ New.Instance(new CSharpType(typeof(List<>), _itemModelType)), out var itemsVariable));\n+\n+ // Get items array from response\n+ var tryGetItems = new IfStatement(rootVariable.Invoke(\"TryGetProperty\", [Literal(_itemsPropertyName), new DeclarationExpression(typeof(JsonElement), \"itemsArray\", out var itemsArrayVariable, isOut: true)]));\n+\n+ // Parse items\n+ var foreachItems = new ForeachStatement(\"item\", itemsArrayVariable.Invoke(\"EnumerateArray\").As>>(), out var itemVariable);\n+ //var foreachItems = new ForEachStatement(\"item\", itemsArrayVariable.Invoke(\"EnumerateArray\"), out var itemVarialble);\n+ foreachItems.Add(itemsVariable.Invoke(\"Add\", [Static().Invoke(\"FromString\", [itemVariable.Invoke(\"ToString\")])]).Terminate());\n+\n+ tryGetItems.Add(foreachItems);\n+ doWhileStatement.Add(tryGetItems);\n+\n+ // Extract next link\n+ if (_nextPageLocation == InputResponseLocation.Body && _nextLinkPropertyName != null)\n+ {\n+ doWhileStatement.Add(nextLinkVariable.Assign(new BinaryOperatorExpression(\":\",\n+ new BinaryOperatorExpression(\"?\",\n+ rootVariable.Invoke(\"TryGetProperty\", [Literal(_nextLinkPropertyName), new DeclarationExpression(typeof(JsonElement), \"value\", out var nextLinkValue, isOut: true)]),\n+ nextLinkValue.Invoke(\"GetString\")), Null)).Terminate());\n+ }\n+ else if (_nextPageLocation == InputResponseLocation.Header && _nextLinkPropertyName != null)\n+ {\n+ doWhileStatement.Add(nextLinkVariable.Assign(new BinaryOperatorExpression(\":\",\n+ new BinaryOperatorExpression(\"?\",\n+ responseVariable.Property(\"Headers\").Invoke(\"TryGetValue\",[Literal(_nextLinkPropertyName), new DeclarationExpression(typeof(string), \"value\", out var nextLinkHeader, isOut: true)]).As(),nextLinkHeader),\n+ Null)).Terminate());\n+ }\n+\n+ // Create and yield the page\n+ doWhileStatement.Add(new YieldReturnStatement(\n+ Static(new CSharpType(typeof(Page<>), [_itemModelType]))\n+ .Invoke(\"FromValues\", [itemsVariable, nextLinkVariable, responseVariable])));\n+\n+ return doWhileStatement;\n+ }\n+\n+ private ValueExpression BuildGetNextLinkForProtocol(VariableExpression nextLinkVariable, VariableExpression rootVariable, VariableExpression responseVariable)\n+ {\n+ if (_nextLinkPropertyName is null)\n+ {\n+ return Null;\n+ }\n+\n+ switch (_nextPageLocation)\n+ {\n+ case InputResponseLocation.Body:\n+ return new BinaryOperatorExpression(\":\",\n+ new BinaryOperatorExpression(\"?\",\n+ rootVariable.Invoke(\"TryGetProperty\", [Literal(_nextLinkPropertyName), new DeclarationExpression(typeof(JsonElement), \"value\", out var nextLinkValue, isOut: true)]),\n+ nextLinkValue.Invoke(\"GetString\")), Null);\n+ case InputResponseLocation.Header:\n+ return new BinaryOperatorExpression(\":\",\n+ new BinaryOperatorExpression(\"?\",\n+ responseVariable.Property(\"Headers\").Invoke(\"TryGetValue\", [Literal(_nextLinkPropertyName), new DeclarationExpression(typeof(string), \"value\", out var nextLinkHeader, isOut: true)]).As(), nextLinkHeader),\n+ Null);\n+ default:\n+ return Null;\n+ }\n+ }\n+\n+ private DoWhileStatement BuildDoWhileStatementForConvenience(VariableExpression nextLinkVariable)\n+ {\n+ var doWhileStatement = new DoWhileStatement(Not(Static().Invoke(\"IsNullOrEmpty\", [nextLinkVariable])));\n+ doWhileStatement.Add(Declare(\"response\", new CSharpType(typeof(Response), isNullable: true), This.Invoke(_getNextResponseMethodName, [PageSizeHintParameter, nextLinkVariable], _isAsync), out var responseVariable));\n+ doWhileStatement.Add(new IfStatement(responseVariable.Is(Null)) { new YieldBreakStatement() });\n+ doWhileStatement.Add(Declare(\"items\", _responseType, responseVariable.CastTo(_responseType), out var itemsVariable));\n+ doWhileStatement.Add(nextLinkVariable.Assign(_nextLinkPropertyName is null ? Null : BuildGetNextLinkMethodBodyForConvenience(itemsVariable, responseVariable).Invoke(\"ToString\")).Terminate());\n+ doWhileStatement.Add(new YieldReturnStatement(Static(new CSharpType(typeof(Page<>), [_itemModelType])).Invoke(\"FromValues\", [itemsVariable.Property(_itemsPropertyName).CastTo(new CSharpType(typeof(IReadOnlyList<>), _itemModelType))/*.Invoke(\"AsReadOnly\")*/, nextLinkVariable, responseVariable])));", + "comment_created_at": "2025-05-19T14:47:38+00:00", + "comment_author": "JoshLove-msft", + "comment_body": "nit: remove commented code", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/azure-sdk-for-net-maintain-clean-code-structure.md b/_reviewers/azure-sdk-for-net-maintain-clean-code-structure.md index 6cdbcf6..58481cd 100644 --- a/_reviewers/azure-sdk-for-net-maintain-clean-code-structure.md +++ b/_reviewers/azure-sdk-for-net-maintain-clean-code-structure.md @@ -62,117 +62,3 @@ public class MyClass } } ``` - - -[ - { - "discussion_id": "2128234500", - "pr_number": 50447, - "pr_file": "sdk/core/Azure.Core.Expressions.DataFactory/tests/DataFactoryElementTests.cs", - "created_at": "2025-06-05T08:15:13+00:00", - "commented_code": "private string GetSerializedString(DataFactoryElement payload)\n {\n using var ms = new MemoryStream();\n using Utf8JsonWriter writer = new Utf8JsonWriter(ms);\n JsonSerializer.Serialize(writer, payload);\n writer.Flush();\n ms.Position = 0;\n using var sr = new StreamReader(ms);\n return sr.ReadToEnd();\n return ((IPersistableModel< DataFactoryElement>)payload).Write(ModelReaderWriterOptions.Json).ToString();\n //using var ms = new MemoryStream();\n //using Utf8JsonWriter writer = new Utf8JsonWriter(ms);\n //JsonSerializer.Serialize(writer, payload);\n //writer.Flush();\n //ms.Position = 0;\n //using var sr = new StreamReader(ms);\n //return sr.ReadToEnd();", - "repo_full_name": "Azure/azure-sdk-for-net", - "discussion_comments": [ - { - "comment_id": "2128234500", - "repo_full_name": "Azure/azure-sdk-for-net", - "pr_number": 50447, - "pr_file": "sdk/core/Azure.Core.Expressions.DataFactory/tests/DataFactoryElementTests.cs", - "discussion_id": "2128234500", - "commented_code": "@@ -1086,13 +1087,14 @@ private static void AssertKeyVaultReferenceDfe(DataFactoryElement dfe)\n \n private string GetSerializedString(DataFactoryElement payload)\n {\n- using var ms = new MemoryStream();\n- using Utf8JsonWriter writer = new Utf8JsonWriter(ms);\n- JsonSerializer.Serialize(writer, payload);\n- writer.Flush();\n- ms.Position = 0;\n- using var sr = new StreamReader(ms);\n- return sr.ReadToEnd();\n+ return ((IPersistableModel< DataFactoryElement>)payload).Write(ModelReaderWriterOptions.Json).ToString();\n+ //using var ms = new MemoryStream();\n+ //using Utf8JsonWriter writer = new Utf8JsonWriter(ms);\n+ //JsonSerializer.Serialize(writer, payload);\n+ //writer.Flush();\n+ //ms.Position = 0;\n+ //using var sr = new StreamReader(ms);\n+ //return sr.ReadToEnd();", - "comment_created_at": "2025-06-05T08:15:13+00:00", - "comment_author": "Copilot", - "comment_body": "[nitpick] There is commented-out legacy code in GetSerializedString; remove these comments if they are no longer needed to improve code maintainability.\n```suggestion\n\n```", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2124906492", - "pr_number": 50391, - "pr_file": "sdk/ai/Azure.AI.Agents.Persistent/src/Custom/UploadFileRequest.Serialization.cs", - "created_at": "2025-06-03T20:52:54+00:00", - "commented_code": "internal virtual MultipartFormDataRequestContent ToMultipartRequestContent()\n {\n MultipartFormDataRequestContent content = new();\n content.Add(Data, \"file\", Filename);\n ContentDispositionHeaderValue header = new(\"form-data\") { Name = \"file\"};\n var _dataStream = new StreamContent(Data);\n if (System.Linq.Enumerable.Any(Filename, c => c > 127))", - "repo_full_name": "Azure/azure-sdk-for-net", - "discussion_comments": [ - { - "comment_id": "2124906492", - "repo_full_name": "Azure/azure-sdk-for-net", - "pr_number": 50391, - "pr_file": "sdk/ai/Azure.AI.Agents.Persistent/src/Custom/UploadFileRequest.Serialization.cs", - "discussion_id": "2124906492", - "commented_code": "@@ -19,7 +21,18 @@ internal partial class UploadFileRequest : IUtf8JsonSerializable\n internal virtual MultipartFormDataRequestContent ToMultipartRequestContent()\n {\n MultipartFormDataRequestContent content = new();\n- content.Add(Data, \"file\", Filename);\n+ ContentDispositionHeaderValue header = new(\"form-data\") { Name = \"file\"};\n+ var _dataStream = new StreamContent(Data);\n+ if (System.Linq.Enumerable.Any(Filename, c => c > 127))", - "comment_created_at": "2025-06-03T20:52:54+00:00", - "comment_author": "Copilot", - "comment_body": "[nitpick] Fully qualifying 'System.Linq.Enumerable.Any' is verbose. Consider adding 'using System.Linq;' and calling 'Filename.Any(c => c > 127)' for readability.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2108103812", - "pr_number": 50265, - "pr_file": "sdk/provisioning/Azure.Provisioning/src/BicepDictionaryOfT.cs", - "created_at": "2025-05-27T03:51:38+00:00", - "commented_code": "\ufeff// Copyright (c) Microsoft Corporation. All rights reserved.\n// Licensed under the MIT License.\n\nusing System;\nusing System.Collections;\nusing System.Collections.Generic;\nusing System.ComponentModel;\nusing System.Diagnostics.CodeAnalysis;\nusing System.Linq;\nusing System.Reflection;", - "repo_full_name": "Azure/azure-sdk-for-net", - "discussion_comments": [ - { - "comment_id": "2108103812", - "repo_full_name": "Azure/azure-sdk-for-net", - "pr_number": 50265, - "pr_file": "sdk/provisioning/Azure.Provisioning/src/BicepDictionaryOfT.cs", - "discussion_id": "2108103812", - "commented_code": "@@ -1,11 +1,13 @@\n \ufeff// Copyright (c) Microsoft Corporation. All rights reserved.\n // Licensed under the MIT License.\n \n+using System;\n using System.Collections;\n using System.Collections.Generic;\n using System.ComponentModel;\n using System.Diagnostics.CodeAnalysis;\n using System.Linq;\n+using System.Reflection;", - "comment_created_at": "2025-05-27T03:51:38+00:00", - "comment_author": "Copilot", - "comment_body": "The 'System.Reflection' import is not used in this file; consider removing it to reduce clutter.\n```suggestion\n\n```", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2105455424", - "pr_number": 50255, - "pr_file": "sdk/search/Azure.Search.Documents/src/Indexes/Models/SearchIndexerDataSourceConnection.cs", - "created_at": "2025-05-23T21:38:17+00:00", - "commented_code": "// Licensed under the MIT License.\n\nusing System;\nusing System.Collections.Generic;", - "repo_full_name": "Azure/azure-sdk-for-net", - "discussion_comments": [ - { - "comment_id": "2105455424", - "repo_full_name": "Azure/azure-sdk-for-net", - "pr_number": 50255, - "pr_file": "sdk/search/Azure.Search.Documents/src/Indexes/Models/SearchIndexerDataSourceConnection.cs", - "discussion_id": "2105455424", - "commented_code": "@@ -2,6 +2,7 @@\n // Licensed under the MIT License.\n \n using System;\n+using System.Collections.Generic;", - "comment_created_at": "2025-05-23T21:38:17+00:00", - "comment_author": "Copilot", - "comment_body": "[nitpick] The `using` directives are not grouped or ordered consistently. It\u2019s clearer to list `System.*` namespaces first (e.g., `System.Collections.Generic` after `System`) followed by external dependencies.\n```suggestion\nusing System.Collections.Generic;\n```", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2095888075", - "pr_number": 50017, - "pr_file": "eng/packages/http-client-csharp/generator/Azure.Generator/src/Providers/CollectionResultDefinition.cs", - "created_at": "2025-05-19T14:47:38+00:00", - "commented_code": "? [new CSharpType(typeof(AsyncPageable<>), _itemModelType)]\n : [new CSharpType(typeof(Pageable<>), _itemModelType)];\n\n protected override MethodProvider[] BuildMethods()\n protected override MethodProvider[] BuildMethods() => [BuildAsPagesMethod(), BuildGetNextResponseMethod(), BuildGetResponseMethod()];\n\n private MethodProvider BuildAsPagesMethod()\n {\n return new[]\n var signature = new MethodSignature(\n \"AsPages\",\n $\"Gets the pages of {Name} as an enumerable collection.\",\n _isAsync\n ? MethodSignatureModifiers.Async | MethodSignatureModifiers.Public | MethodSignatureModifiers.Override\n : MethodSignatureModifiers.Public | MethodSignatureModifiers.Override,\n _isAsync ?\n new CSharpType(typeof(IAsyncEnumerable<>), new CSharpType(typeof(Page<>), _itemModelType)) :\n new CSharpType(typeof(IEnumerable<>), new CSharpType(typeof(Page<>), _itemModelType)),\n $\"The pages of {Name} as an enumerable collection.\",\n [NextLinkParameter, PageSizeHintParameter]);\n\n if (!IsProtocolMethod())\n {\n new MethodProvider(\n new MethodSignature(\n \"AsPages\",\n $\"Gets the pages of {Name} as an enumerable collection.\",\n MethodSignatureModifiers.Public | MethodSignatureModifiers.Override,\n _isAsync ?\n new CSharpType(typeof(IAsyncEnumerable<>), new CSharpType(typeof(Page<>), _itemModelType)) :\n new CSharpType(typeof(IEnumerable<>), new CSharpType(typeof(Page<>), _itemModelType)),\n $\"The pages of {Name} as an enumerable collection.\",\n [ContinuationTokenParameter, PageSizeHintParameter]),\n ThrowExpression(Null),\n this)\n // Convenience method\n return new MethodProvider(signature, new MethodBodyStatement[]\n {\n Declare(\"nextLink\", new CSharpType(typeof(string), isNullable: true), NextLinkParameter, out var nextLinkVariableForConvenience),\n BuildDoWhileStatementForConvenience(nextLinkVariableForConvenience)\n }, this);\n }\n\n // Protocol method\n return new MethodProvider(signature, new MethodBodyStatement[]\n {\n Declare(\"nextLink\", new CSharpType(typeof(string), isNullable: true), NextLinkParameter, out var nextLinkVariable),\n BuildDoWhileStatementForProtocol(nextLinkVariable)\n }, this);\n }\n\n private DoWhileStatement BuildDoWhileStatementForProtocol(VariableExpression nextLinkVariable)\n {\n var doWhileStatement = new DoWhileStatement(Not(Static().Invoke(\"IsNullOrEmpty\", [nextLinkVariable])));\n\n // Get the response\n doWhileStatement.Add(Declare(\"response\", new CSharpType(typeof(Response), isNullable: true),\n This.Invoke(_getNextResponseMethodName, [PageSizeHintParameter, nextLinkVariable], _isAsync),\n out var responseVariable));\n\n // Early exit if response is null\n doWhileStatement.Add(new IfStatement(responseVariable.Is(Null)) { new YieldBreakStatement() });\n\n // Parse response content as JsonDocument\n doWhileStatement.Add(UsingDeclare(\"jsonDoc\", typeof(JsonDocument),\n Static().Invoke(\"Parse\", [responseVariable.Property(\"Content\").Invoke(\"ToString\")]),\n out var jsonDocVariable));\n\n // Get root element\n doWhileStatement.Add(Declare(\"root\", typeof(JsonElement),\n jsonDocVariable.Property(\"RootElement\"), out var rootVariable));\n\n // Extract items array\n doWhileStatement.Add(Declare(\"items\", new CSharpType(typeof(List<>), _itemModelType),\n New.Instance(new CSharpType(typeof(List<>), _itemModelType)), out var itemsVariable));\n\n // Get items array from response\n var tryGetItems = new IfStatement(rootVariable.Invoke(\"TryGetProperty\", [Literal(_itemsPropertyName), new DeclarationExpression(typeof(JsonElement), \"itemsArray\", out var itemsArrayVariable, isOut: true)]));\n\n // Parse items\n var foreachItems = new ForeachStatement(\"item\", itemsArrayVariable.Invoke(\"EnumerateArray\").As>>(), out var itemVariable);\n //var foreachItems = new ForEachStatement(\"item\", itemsArrayVariable.Invoke(\"EnumerateArray\"), out var itemVarialble);\n foreachItems.Add(itemsVariable.Invoke(\"Add\", [Static().Invoke(\"FromString\", [itemVariable.Invoke(\"ToString\")])]).Terminate());\n\n tryGetItems.Add(foreachItems);\n doWhileStatement.Add(tryGetItems);\n\n // Extract next link\n if (_nextPageLocation == InputResponseLocation.Body && _nextLinkPropertyName != null)\n {\n doWhileStatement.Add(nextLinkVariable.Assign(new BinaryOperatorExpression(\":\",\n new BinaryOperatorExpression(\"?\",\n rootVariable.Invoke(\"TryGetProperty\", [Literal(_nextLinkPropertyName), new DeclarationExpression(typeof(JsonElement), \"value\", out var nextLinkValue, isOut: true)]),\n nextLinkValue.Invoke(\"GetString\")), Null)).Terminate());\n }\n else if (_nextPageLocation == InputResponseLocation.Header && _nextLinkPropertyName != null)\n {\n doWhileStatement.Add(nextLinkVariable.Assign(new BinaryOperatorExpression(\":\",\n new BinaryOperatorExpression(\"?\",\n responseVariable.Property(\"Headers\").Invoke(\"TryGetValue\",[Literal(_nextLinkPropertyName), new DeclarationExpression(typeof(string), \"value\", out var nextLinkHeader, isOut: true)]).As(),nextLinkHeader),\n Null)).Terminate());\n }\n\n // Create and yield the page\n doWhileStatement.Add(new YieldReturnStatement(\n Static(new CSharpType(typeof(Page<>), [_itemModelType]))\n .Invoke(\"FromValues\", [itemsVariable, nextLinkVariable, responseVariable])));\n\n return doWhileStatement;\n }\n\n private ValueExpression BuildGetNextLinkForProtocol(VariableExpression nextLinkVariable, VariableExpression rootVariable, VariableExpression responseVariable)\n {\n if (_nextLinkPropertyName is null)\n {\n return Null;\n }\n\n switch (_nextPageLocation)\n {\n case InputResponseLocation.Body:\n return new BinaryOperatorExpression(\":\",\n new BinaryOperatorExpression(\"?\",\n rootVariable.Invoke(\"TryGetProperty\", [Literal(_nextLinkPropertyName), new DeclarationExpression(typeof(JsonElement), \"value\", out var nextLinkValue, isOut: true)]),\n nextLinkValue.Invoke(\"GetString\")), Null);\n case InputResponseLocation.Header:\n return new BinaryOperatorExpression(\":\",\n new BinaryOperatorExpression(\"?\",\n responseVariable.Property(\"Headers\").Invoke(\"TryGetValue\", [Literal(_nextLinkPropertyName), new DeclarationExpression(typeof(string), \"value\", out var nextLinkHeader, isOut: true)]).As(), nextLinkHeader),\n Null);\n default:\n return Null;\n }\n }\n\n private DoWhileStatement BuildDoWhileStatementForConvenience(VariableExpression nextLinkVariable)\n {\n var doWhileStatement = new DoWhileStatement(Not(Static().Invoke(\"IsNullOrEmpty\", [nextLinkVariable])));\n doWhileStatement.Add(Declare(\"response\", new CSharpType(typeof(Response), isNullable: true), This.Invoke(_getNextResponseMethodName, [PageSizeHintParameter, nextLinkVariable], _isAsync), out var responseVariable));\n doWhileStatement.Add(new IfStatement(responseVariable.Is(Null)) { new YieldBreakStatement() });\n doWhileStatement.Add(Declare(\"items\", _responseType, responseVariable.CastTo(_responseType), out var itemsVariable));\n doWhileStatement.Add(nextLinkVariable.Assign(_nextLinkPropertyName is null ? Null : BuildGetNextLinkMethodBodyForConvenience(itemsVariable, responseVariable).Invoke(\"ToString\")).Terminate());\n doWhileStatement.Add(new YieldReturnStatement(Static(new CSharpType(typeof(Page<>), [_itemModelType])).Invoke(\"FromValues\", [itemsVariable.Property(_itemsPropertyName).CastTo(new CSharpType(typeof(IReadOnlyList<>), _itemModelType))/*.Invoke(\"AsReadOnly\")*/, nextLinkVariable, responseVariable])));", - "repo_full_name": "Azure/azure-sdk-for-net", - "discussion_comments": [ - { - "comment_id": "2095888075", - "repo_full_name": "Azure/azure-sdk-for-net", - "pr_number": 50017, - "pr_file": "eng/packages/http-client-csharp/generator/Azure.Generator/src/Providers/CollectionResultDefinition.cs", - "discussion_id": "2095888075", - "commented_code": "@@ -53,23 +132,250 @@ protected override CSharpType[] BuildImplements() =>\n ? [new CSharpType(typeof(AsyncPageable<>), _itemModelType)]\n : [new CSharpType(typeof(Pageable<>), _itemModelType)];\n \n- protected override MethodProvider[] BuildMethods()\n+ protected override MethodProvider[] BuildMethods() => [BuildAsPagesMethod(), BuildGetNextResponseMethod(), BuildGetResponseMethod()];\n+\n+ private MethodProvider BuildAsPagesMethod()\n {\n- return new[]\n+ var signature = new MethodSignature(\n+ \"AsPages\",\n+ $\"Gets the pages of {Name} as an enumerable collection.\",\n+ _isAsync\n+ ? MethodSignatureModifiers.Async | MethodSignatureModifiers.Public | MethodSignatureModifiers.Override\n+ : MethodSignatureModifiers.Public | MethodSignatureModifiers.Override,\n+ _isAsync ?\n+ new CSharpType(typeof(IAsyncEnumerable<>), new CSharpType(typeof(Page<>), _itemModelType)) :\n+ new CSharpType(typeof(IEnumerable<>), new CSharpType(typeof(Page<>), _itemModelType)),\n+ $\"The pages of {Name} as an enumerable collection.\",\n+ [NextLinkParameter, PageSizeHintParameter]);\n+\n+ if (!IsProtocolMethod())\n {\n- new MethodProvider(\n- new MethodSignature(\n- \"AsPages\",\n- $\"Gets the pages of {Name} as an enumerable collection.\",\n- MethodSignatureModifiers.Public | MethodSignatureModifiers.Override,\n- _isAsync ?\n- new CSharpType(typeof(IAsyncEnumerable<>), new CSharpType(typeof(Page<>), _itemModelType)) :\n- new CSharpType(typeof(IEnumerable<>), new CSharpType(typeof(Page<>), _itemModelType)),\n- $\"The pages of {Name} as an enumerable collection.\",\n- [ContinuationTokenParameter, PageSizeHintParameter]),\n- ThrowExpression(Null),\n- this)\n+ // Convenience method\n+ return new MethodProvider(signature, new MethodBodyStatement[]\n+ {\n+ Declare(\"nextLink\", new CSharpType(typeof(string), isNullable: true), NextLinkParameter, out var nextLinkVariableForConvenience),\n+ BuildDoWhileStatementForConvenience(nextLinkVariableForConvenience)\n+ }, this);\n+ }\n+\n+ // Protocol method\n+ return new MethodProvider(signature, new MethodBodyStatement[]\n+ {\n+ Declare(\"nextLink\", new CSharpType(typeof(string), isNullable: true), NextLinkParameter, out var nextLinkVariable),\n+ BuildDoWhileStatementForProtocol(nextLinkVariable)\n+ }, this);\n+ }\n+\n+ private DoWhileStatement BuildDoWhileStatementForProtocol(VariableExpression nextLinkVariable)\n+ {\n+ var doWhileStatement = new DoWhileStatement(Not(Static().Invoke(\"IsNullOrEmpty\", [nextLinkVariable])));\n+\n+ // Get the response\n+ doWhileStatement.Add(Declare(\"response\", new CSharpType(typeof(Response), isNullable: true),\n+ This.Invoke(_getNextResponseMethodName, [PageSizeHintParameter, nextLinkVariable], _isAsync),\n+ out var responseVariable));\n+\n+ // Early exit if response is null\n+ doWhileStatement.Add(new IfStatement(responseVariable.Is(Null)) { new YieldBreakStatement() });\n+\n+ // Parse response content as JsonDocument\n+ doWhileStatement.Add(UsingDeclare(\"jsonDoc\", typeof(JsonDocument),\n+ Static().Invoke(\"Parse\", [responseVariable.Property(\"Content\").Invoke(\"ToString\")]),\n+ out var jsonDocVariable));\n+\n+ // Get root element\n+ doWhileStatement.Add(Declare(\"root\", typeof(JsonElement),\n+ jsonDocVariable.Property(\"RootElement\"), out var rootVariable));\n+\n+ // Extract items array\n+ doWhileStatement.Add(Declare(\"items\", new CSharpType(typeof(List<>), _itemModelType),\n+ New.Instance(new CSharpType(typeof(List<>), _itemModelType)), out var itemsVariable));\n+\n+ // Get items array from response\n+ var tryGetItems = new IfStatement(rootVariable.Invoke(\"TryGetProperty\", [Literal(_itemsPropertyName), new DeclarationExpression(typeof(JsonElement), \"itemsArray\", out var itemsArrayVariable, isOut: true)]));\n+\n+ // Parse items\n+ var foreachItems = new ForeachStatement(\"item\", itemsArrayVariable.Invoke(\"EnumerateArray\").As>>(), out var itemVariable);\n+ //var foreachItems = new ForEachStatement(\"item\", itemsArrayVariable.Invoke(\"EnumerateArray\"), out var itemVarialble);\n+ foreachItems.Add(itemsVariable.Invoke(\"Add\", [Static().Invoke(\"FromString\", [itemVariable.Invoke(\"ToString\")])]).Terminate());\n+\n+ tryGetItems.Add(foreachItems);\n+ doWhileStatement.Add(tryGetItems);\n+\n+ // Extract next link\n+ if (_nextPageLocation == InputResponseLocation.Body && _nextLinkPropertyName != null)\n+ {\n+ doWhileStatement.Add(nextLinkVariable.Assign(new BinaryOperatorExpression(\":\",\n+ new BinaryOperatorExpression(\"?\",\n+ rootVariable.Invoke(\"TryGetProperty\", [Literal(_nextLinkPropertyName), new DeclarationExpression(typeof(JsonElement), \"value\", out var nextLinkValue, isOut: true)]),\n+ nextLinkValue.Invoke(\"GetString\")), Null)).Terminate());\n+ }\n+ else if (_nextPageLocation == InputResponseLocation.Header && _nextLinkPropertyName != null)\n+ {\n+ doWhileStatement.Add(nextLinkVariable.Assign(new BinaryOperatorExpression(\":\",\n+ new BinaryOperatorExpression(\"?\",\n+ responseVariable.Property(\"Headers\").Invoke(\"TryGetValue\",[Literal(_nextLinkPropertyName), new DeclarationExpression(typeof(string), \"value\", out var nextLinkHeader, isOut: true)]).As(),nextLinkHeader),\n+ Null)).Terminate());\n+ }\n+\n+ // Create and yield the page\n+ doWhileStatement.Add(new YieldReturnStatement(\n+ Static(new CSharpType(typeof(Page<>), [_itemModelType]))\n+ .Invoke(\"FromValues\", [itemsVariable, nextLinkVariable, responseVariable])));\n+\n+ return doWhileStatement;\n+ }\n+\n+ private ValueExpression BuildGetNextLinkForProtocol(VariableExpression nextLinkVariable, VariableExpression rootVariable, VariableExpression responseVariable)\n+ {\n+ if (_nextLinkPropertyName is null)\n+ {\n+ return Null;\n+ }\n+\n+ switch (_nextPageLocation)\n+ {\n+ case InputResponseLocation.Body:\n+ return new BinaryOperatorExpression(\":\",\n+ new BinaryOperatorExpression(\"?\",\n+ rootVariable.Invoke(\"TryGetProperty\", [Literal(_nextLinkPropertyName), new DeclarationExpression(typeof(JsonElement), \"value\", out var nextLinkValue, isOut: true)]),\n+ nextLinkValue.Invoke(\"GetString\")), Null);\n+ case InputResponseLocation.Header:\n+ return new BinaryOperatorExpression(\":\",\n+ new BinaryOperatorExpression(\"?\",\n+ responseVariable.Property(\"Headers\").Invoke(\"TryGetValue\", [Literal(_nextLinkPropertyName), new DeclarationExpression(typeof(string), \"value\", out var nextLinkHeader, isOut: true)]).As(), nextLinkHeader),\n+ Null);\n+ default:\n+ return Null;\n+ }\n+ }\n+\n+ private DoWhileStatement BuildDoWhileStatementForConvenience(VariableExpression nextLinkVariable)\n+ {\n+ var doWhileStatement = new DoWhileStatement(Not(Static().Invoke(\"IsNullOrEmpty\", [nextLinkVariable])));\n+ doWhileStatement.Add(Declare(\"response\", new CSharpType(typeof(Response), isNullable: true), This.Invoke(_getNextResponseMethodName, [PageSizeHintParameter, nextLinkVariable], _isAsync), out var responseVariable));\n+ doWhileStatement.Add(new IfStatement(responseVariable.Is(Null)) { new YieldBreakStatement() });\n+ doWhileStatement.Add(Declare(\"items\", _responseType, responseVariable.CastTo(_responseType), out var itemsVariable));\n+ doWhileStatement.Add(nextLinkVariable.Assign(_nextLinkPropertyName is null ? Null : BuildGetNextLinkMethodBodyForConvenience(itemsVariable, responseVariable).Invoke(\"ToString\")).Terminate());\n+ doWhileStatement.Add(new YieldReturnStatement(Static(new CSharpType(typeof(Page<>), [_itemModelType])).Invoke(\"FromValues\", [itemsVariable.Property(_itemsPropertyName).CastTo(new CSharpType(typeof(IReadOnlyList<>), _itemModelType))/*.Invoke(\"AsReadOnly\")*/, nextLinkVariable, responseVariable])));", - "comment_created_at": "2025-05-19T14:47:38+00:00", - "comment_author": "JoshLove-msft", - "comment_body": "nit: remove commented code", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/azure-sdk-for-net-match-ci-commands-locally.json b/_reviewers/azure-sdk-for-net-match-ci-commands-locally.json new file mode 100644 index 0000000..acbebe2 --- /dev/null +++ b/_reviewers/azure-sdk-for-net-match-ci-commands-locally.json @@ -0,0 +1,70 @@ +[ + { + "discussion_id": "2143212696", + "pr_number": 50575, + "pr_file": ".github/copilot-instructions.md", + "created_at": "2025-06-12T16:47:20+00:00", + "commented_code": "# Repository information\nNote that files in this repository are generally organized in the following way:\n- `azure-sdk-for-net/sdk/{service-directory}/{package-name}` holds everything for a specific Azure SDK package.\n- `azure-sdk-for-net/sdk/{service-directory}/{package-name}/src` holds the source code for the package.\n- `azure-sdk-for-net/sdk/{service-directory}/{package-name}/tests` holds the tests for the package.\n- `azure-sdk-for-net/sdk/{service-directory}/{package-name}/samples` holds the samples for the package.\n\nThere are a few exceptions where package-name is replaced with a shorter directory name. For example in the cognitiveservices directory. The package `Microsoft.Azure.CognitiveServices.Language.SpellCheck` can be found in `azure-sdk-for-net/sdk/cognitiveservices/Language.SpellCheck`. When in doubt, you can look at the name of the .csproj file within the src folder to determine the package name.\n\n# Requirements\n- If you are writing C# code within the `azure-sdk-for-net/sdk` directory:\n 1. Follow the coding guidelines in the \"Coding guidelines\" section below.\n 2. You should never manually make changes to `*/Generated/*` files, e.g. `azure-sdk-for-net/sdk/containerregistry/Azure.Containers.ContainerRegistry/src/Generated/`\n - Only re-generate these files if instructed to do so. If you are instructed to regenerate an SDK, use `dotnet build /t:GenerateCode`\n - If you feel like you need to make changes to these files beyond re-generating them in order to complete your task, do not do this, instead see if you can work around the problem in the code that is not in the `Generated` folder. If you can't, report this to the user.\n 3. Code should build successfully using the following steps:", + "repo_full_name": "Azure/azure-sdk-for-net", + "discussion_comments": [ + { + "comment_id": "2143212696", + "repo_full_name": "Azure/azure-sdk-for-net", + "pr_number": 50575, + "pr_file": ".github/copilot-instructions.md", + "discussion_id": "2143212696", + "commented_code": "@@ -0,0 +1,25 @@\n+# Repository information\n+Note that files in this repository are generally organized in the following way:\n+- `azure-sdk-for-net/sdk/{service-directory}/{package-name}` holds everything for a specific Azure SDK package.\n+- `azure-sdk-for-net/sdk/{service-directory}/{package-name}/src` holds the source code for the package.\n+- `azure-sdk-for-net/sdk/{service-directory}/{package-name}/tests` holds the tests for the package.\n+- `azure-sdk-for-net/sdk/{service-directory}/{package-name}/samples` holds the samples for the package.\n+\n+There are a few exceptions where package-name is replaced with a shorter directory name. For example in the cognitiveservices directory. The package `Microsoft.Azure.CognitiveServices.Language.SpellCheck` can be found in `azure-sdk-for-net/sdk/cognitiveservices/Language.SpellCheck`. When in doubt, you can look at the name of the .csproj file within the src folder to determine the package name.\n+\n+# Requirements\n+- If you are writing C# code within the `azure-sdk-for-net/sdk` directory:\n+ 1. Follow the coding guidelines in the \"Coding guidelines\" section below.\n+ 2. You should never manually make changes to `*/Generated/*` files, e.g. `azure-sdk-for-net/sdk/containerregistry/Azure.Containers.ContainerRegistry/src/Generated/`\n+ - Only re-generate these files if instructed to do so. If you are instructed to regenerate an SDK, use `dotnet build /t:GenerateCode`\n+ - If you feel like you need to make changes to these files beyond re-generating them in order to complete your task, do not do this, instead see if you can work around the problem in the code that is not in the `Generated` folder. If you can't, report this to the user.\n+ 3. Code should build successfully using the following steps:", + "comment_created_at": "2025-06-12T16:47:20+00:00", + "comment_author": "m-redding", + "comment_body": "Debating whether to have copilot run unit tests in addition to building.\r\nAn alternative could be adding some kind of tool that can do all the validation for a given service directory in one call, to make it easier for copilot ", + "pr_file_module": null + }, + { + "comment_id": "2143801505", + "repo_full_name": "Azure/azure-sdk-for-net", + "pr_number": 50575, + "pr_file": ".github/copilot-instructions.md", + "discussion_id": "2143212696", + "commented_code": "@@ -0,0 +1,25 @@\n+# Repository information\n+Note that files in this repository are generally organized in the following way:\n+- `azure-sdk-for-net/sdk/{service-directory}/{package-name}` holds everything for a specific Azure SDK package.\n+- `azure-sdk-for-net/sdk/{service-directory}/{package-name}/src` holds the source code for the package.\n+- `azure-sdk-for-net/sdk/{service-directory}/{package-name}/tests` holds the tests for the package.\n+- `azure-sdk-for-net/sdk/{service-directory}/{package-name}/samples` holds the samples for the package.\n+\n+There are a few exceptions where package-name is replaced with a shorter directory name. For example in the cognitiveservices directory. The package `Microsoft.Azure.CognitiveServices.Language.SpellCheck` can be found in `azure-sdk-for-net/sdk/cognitiveservices/Language.SpellCheck`. When in doubt, you can look at the name of the .csproj file within the src folder to determine the package name.\n+\n+# Requirements\n+- If you are writing C# code within the `azure-sdk-for-net/sdk` directory:\n+ 1. Follow the coding guidelines in the \"Coding guidelines\" section below.\n+ 2. You should never manually make changes to `*/Generated/*` files, e.g. `azure-sdk-for-net/sdk/containerregistry/Azure.Containers.ContainerRegistry/src/Generated/`\n+ - Only re-generate these files if instructed to do so. If you are instructed to regenerate an SDK, use `dotnet build /t:GenerateCode`\n+ - If you feel like you need to make changes to these files beyond re-generating them in order to complete your task, do not do this, instead see if you can work around the problem in the code that is not in the `Generated` folder. If you can't, report this to the user.\n+ 3. Code should build successfully using the following steps:", + "comment_created_at": "2025-06-12T23:12:46+00:00", + "comment_author": "weshaggard", + "comment_body": "I would try to stick to what the CI does `dotnet pack eng/service.proj /p:ServiceDirectory=` and `dotnet test eng/services.proj /p:ServiceDirectory=`. That way it more closely matches the CI and has a higher chance of passing.", + "pr_file_module": null + }, + { + "comment_id": "2146083028", + "repo_full_name": "Azure/azure-sdk-for-net", + "pr_number": 50575, + "pr_file": ".github/copilot-instructions.md", + "discussion_id": "2143212696", + "commented_code": "@@ -0,0 +1,25 @@\n+# Repository information\n+Note that files in this repository are generally organized in the following way:\n+- `azure-sdk-for-net/sdk/{service-directory}/{package-name}` holds everything for a specific Azure SDK package.\n+- `azure-sdk-for-net/sdk/{service-directory}/{package-name}/src` holds the source code for the package.\n+- `azure-sdk-for-net/sdk/{service-directory}/{package-name}/tests` holds the tests for the package.\n+- `azure-sdk-for-net/sdk/{service-directory}/{package-name}/samples` holds the samples for the package.\n+\n+There are a few exceptions where package-name is replaced with a shorter directory name. For example in the cognitiveservices directory. The package `Microsoft.Azure.CognitiveServices.Language.SpellCheck` can be found in `azure-sdk-for-net/sdk/cognitiveservices/Language.SpellCheck`. When in doubt, you can look at the name of the .csproj file within the src folder to determine the package name.\n+\n+# Requirements\n+- If you are writing C# code within the `azure-sdk-for-net/sdk` directory:\n+ 1. Follow the coding guidelines in the \"Coding guidelines\" section below.\n+ 2. You should never manually make changes to `*/Generated/*` files, e.g. `azure-sdk-for-net/sdk/containerregistry/Azure.Containers.ContainerRegistry/src/Generated/`\n+ - Only re-generate these files if instructed to do so. If you are instructed to regenerate an SDK, use `dotnet build /t:GenerateCode`\n+ - If you feel like you need to make changes to these files beyond re-generating them in order to complete your task, do not do this, instead see if you can work around the problem in the code that is not in the `Generated` folder. If you can't, report this to the user.\n+ 3. Code should build successfully using the following steps:", + "comment_created_at": "2025-06-13T20:57:52+00:00", + "comment_author": "m-redding", + "comment_body": "K yeah that makes sense to me too. I'll update this ", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2007277965", + "pr_number": 49024, + "pr_file": "sdk/cognitiveservices/Knowledge.QnAMaker/CHANGELOG.md", + "created_at": "2025-03-21T10:20:03+00:00", + "commented_code": null, + "repo_full_name": "Azure/azure-sdk-for-net", + "discussion_comments": [ + { + "comment_id": "2007277965", + "repo_full_name": "Azure/azure-sdk-for-net", + "pr_number": 49024, + "pr_file": "sdk/cognitiveservices/Knowledge.QnAMaker/CHANGELOG.md", + "discussion_id": "2007277965", + "commented_code": null, + "comment_created_at": "2025-03-21T10:20:03+00:00", + "comment_author": "ArcturusZhang", + "comment_body": "Everything in this directory was removed because the library inside this directory is actually a track 1 SDK library.\r\nWe could know this by its package name (`Microsoft.*`) and its generated file contents.\r\nWe have pruned all track 1 SDK libraries, and this one dodges that round maybe because its directory name does not look like a track 1 library (`Microsoft.*`).\r\nNow it is causing the CI to fail therefore I noticed it and removed it here.", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/azure-sdk-for-net-match-ci-commands-locally.md b/_reviewers/azure-sdk-for-net-match-ci-commands-locally.md index ecba506..e294b3c 100644 --- a/_reviewers/azure-sdk-for-net-match-ci-commands-locally.md +++ b/_reviewers/azure-sdk-for-net-match-ci-commands-locally.md @@ -25,75 +25,3 @@ dotnet test eng/services.proj /p:ServiceDirectory= ``` This approach helps identify issues earlier in the development process and prevents situations where code passes locally but fails in CI due to environmental differences or non-standard configurations. When everyone on the team follows this practice, it also ensures consistency across developer environments and reduces troubleshooting time for CI failures. - - -[ - { - "discussion_id": "2143212696", - "pr_number": 50575, - "pr_file": ".github/copilot-instructions.md", - "created_at": "2025-06-12T16:47:20+00:00", - "commented_code": "# Repository information\nNote that files in this repository are generally organized in the following way:\n- `azure-sdk-for-net/sdk/{service-directory}/{package-name}` holds everything for a specific Azure SDK package.\n- `azure-sdk-for-net/sdk/{service-directory}/{package-name}/src` holds the source code for the package.\n- `azure-sdk-for-net/sdk/{service-directory}/{package-name}/tests` holds the tests for the package.\n- `azure-sdk-for-net/sdk/{service-directory}/{package-name}/samples` holds the samples for the package.\n\nThere are a few exceptions where package-name is replaced with a shorter directory name. For example in the cognitiveservices directory. The package `Microsoft.Azure.CognitiveServices.Language.SpellCheck` can be found in `azure-sdk-for-net/sdk/cognitiveservices/Language.SpellCheck`. When in doubt, you can look at the name of the .csproj file within the src folder to determine the package name.\n\n# Requirements\n- If you are writing C# code within the `azure-sdk-for-net/sdk` directory:\n 1. Follow the coding guidelines in the \"Coding guidelines\" section below.\n 2. You should never manually make changes to `*/Generated/*` files, e.g. `azure-sdk-for-net/sdk/containerregistry/Azure.Containers.ContainerRegistry/src/Generated/`\n - Only re-generate these files if instructed to do so. If you are instructed to regenerate an SDK, use `dotnet build /t:GenerateCode`\n - If you feel like you need to make changes to these files beyond re-generating them in order to complete your task, do not do this, instead see if you can work around the problem in the code that is not in the `Generated` folder. If you can't, report this to the user.\n 3. Code should build successfully using the following steps:", - "repo_full_name": "Azure/azure-sdk-for-net", - "discussion_comments": [ - { - "comment_id": "2143212696", - "repo_full_name": "Azure/azure-sdk-for-net", - "pr_number": 50575, - "pr_file": ".github/copilot-instructions.md", - "discussion_id": "2143212696", - "commented_code": "@@ -0,0 +1,25 @@\n+# Repository information\n+Note that files in this repository are generally organized in the following way:\n+- `azure-sdk-for-net/sdk/{service-directory}/{package-name}` holds everything for a specific Azure SDK package.\n+- `azure-sdk-for-net/sdk/{service-directory}/{package-name}/src` holds the source code for the package.\n+- `azure-sdk-for-net/sdk/{service-directory}/{package-name}/tests` holds the tests for the package.\n+- `azure-sdk-for-net/sdk/{service-directory}/{package-name}/samples` holds the samples for the package.\n+\n+There are a few exceptions where package-name is replaced with a shorter directory name. For example in the cognitiveservices directory. The package `Microsoft.Azure.CognitiveServices.Language.SpellCheck` can be found in `azure-sdk-for-net/sdk/cognitiveservices/Language.SpellCheck`. When in doubt, you can look at the name of the .csproj file within the src folder to determine the package name.\n+\n+# Requirements\n+- If you are writing C# code within the `azure-sdk-for-net/sdk` directory:\n+ 1. Follow the coding guidelines in the \"Coding guidelines\" section below.\n+ 2. You should never manually make changes to `*/Generated/*` files, e.g. `azure-sdk-for-net/sdk/containerregistry/Azure.Containers.ContainerRegistry/src/Generated/`\n+ - Only re-generate these files if instructed to do so. If you are instructed to regenerate an SDK, use `dotnet build /t:GenerateCode`\n+ - If you feel like you need to make changes to these files beyond re-generating them in order to complete your task, do not do this, instead see if you can work around the problem in the code that is not in the `Generated` folder. If you can't, report this to the user.\n+ 3. Code should build successfully using the following steps:", - "comment_created_at": "2025-06-12T16:47:20+00:00", - "comment_author": "m-redding", - "comment_body": "Debating whether to have copilot run unit tests in addition to building.\r\nAn alternative could be adding some kind of tool that can do all the validation for a given service directory in one call, to make it easier for copilot ", - "pr_file_module": null - }, - { - "comment_id": "2143801505", - "repo_full_name": "Azure/azure-sdk-for-net", - "pr_number": 50575, - "pr_file": ".github/copilot-instructions.md", - "discussion_id": "2143212696", - "commented_code": "@@ -0,0 +1,25 @@\n+# Repository information\n+Note that files in this repository are generally organized in the following way:\n+- `azure-sdk-for-net/sdk/{service-directory}/{package-name}` holds everything for a specific Azure SDK package.\n+- `azure-sdk-for-net/sdk/{service-directory}/{package-name}/src` holds the source code for the package.\n+- `azure-sdk-for-net/sdk/{service-directory}/{package-name}/tests` holds the tests for the package.\n+- `azure-sdk-for-net/sdk/{service-directory}/{package-name}/samples` holds the samples for the package.\n+\n+There are a few exceptions where package-name is replaced with a shorter directory name. For example in the cognitiveservices directory. The package `Microsoft.Azure.CognitiveServices.Language.SpellCheck` can be found in `azure-sdk-for-net/sdk/cognitiveservices/Language.SpellCheck`. When in doubt, you can look at the name of the .csproj file within the src folder to determine the package name.\n+\n+# Requirements\n+- If you are writing C# code within the `azure-sdk-for-net/sdk` directory:\n+ 1. Follow the coding guidelines in the \"Coding guidelines\" section below.\n+ 2. You should never manually make changes to `*/Generated/*` files, e.g. `azure-sdk-for-net/sdk/containerregistry/Azure.Containers.ContainerRegistry/src/Generated/`\n+ - Only re-generate these files if instructed to do so. If you are instructed to regenerate an SDK, use `dotnet build /t:GenerateCode`\n+ - If you feel like you need to make changes to these files beyond re-generating them in order to complete your task, do not do this, instead see if you can work around the problem in the code that is not in the `Generated` folder. If you can't, report this to the user.\n+ 3. Code should build successfully using the following steps:", - "comment_created_at": "2025-06-12T23:12:46+00:00", - "comment_author": "weshaggard", - "comment_body": "I would try to stick to what the CI does `dotnet pack eng/service.proj /p:ServiceDirectory=` and `dotnet test eng/services.proj /p:ServiceDirectory=`. That way it more closely matches the CI and has a higher chance of passing.", - "pr_file_module": null - }, - { - "comment_id": "2146083028", - "repo_full_name": "Azure/azure-sdk-for-net", - "pr_number": 50575, - "pr_file": ".github/copilot-instructions.md", - "discussion_id": "2143212696", - "commented_code": "@@ -0,0 +1,25 @@\n+# Repository information\n+Note that files in this repository are generally organized in the following way:\n+- `azure-sdk-for-net/sdk/{service-directory}/{package-name}` holds everything for a specific Azure SDK package.\n+- `azure-sdk-for-net/sdk/{service-directory}/{package-name}/src` holds the source code for the package.\n+- `azure-sdk-for-net/sdk/{service-directory}/{package-name}/tests` holds the tests for the package.\n+- `azure-sdk-for-net/sdk/{service-directory}/{package-name}/samples` holds the samples for the package.\n+\n+There are a few exceptions where package-name is replaced with a shorter directory name. For example in the cognitiveservices directory. The package `Microsoft.Azure.CognitiveServices.Language.SpellCheck` can be found in `azure-sdk-for-net/sdk/cognitiveservices/Language.SpellCheck`. When in doubt, you can look at the name of the .csproj file within the src folder to determine the package name.\n+\n+# Requirements\n+- If you are writing C# code within the `azure-sdk-for-net/sdk` directory:\n+ 1. Follow the coding guidelines in the \"Coding guidelines\" section below.\n+ 2. You should never manually make changes to `*/Generated/*` files, e.g. `azure-sdk-for-net/sdk/containerregistry/Azure.Containers.ContainerRegistry/src/Generated/`\n+ - Only re-generate these files if instructed to do so. If you are instructed to regenerate an SDK, use `dotnet build /t:GenerateCode`\n+ - If you feel like you need to make changes to these files beyond re-generating them in order to complete your task, do not do this, instead see if you can work around the problem in the code that is not in the `Generated` folder. If you can't, report this to the user.\n+ 3. Code should build successfully using the following steps:", - "comment_created_at": "2025-06-13T20:57:52+00:00", - "comment_author": "m-redding", - "comment_body": "K yeah that makes sense to me too. I'll update this ", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2007277965", - "pr_number": 49024, - "pr_file": "sdk/cognitiveservices/Knowledge.QnAMaker/CHANGELOG.md", - "created_at": "2025-03-21T10:20:03+00:00", - "commented_code": null, - "repo_full_name": "Azure/azure-sdk-for-net", - "discussion_comments": [ - { - "comment_id": "2007277965", - "repo_full_name": "Azure/azure-sdk-for-net", - "pr_number": 49024, - "pr_file": "sdk/cognitiveservices/Knowledge.QnAMaker/CHANGELOG.md", - "discussion_id": "2007277965", - "commented_code": null, - "comment_created_at": "2025-03-21T10:20:03+00:00", - "comment_author": "ArcturusZhang", - "comment_body": "Everything in this directory was removed because the library inside this directory is actually a track 1 SDK library.\r\nWe could know this by its package name (`Microsoft.*`) and its generated file contents.\r\nWe have pruned all track 1 SDK libraries, and this one dodges that round maybe because its directory name does not look like a track 1 library (`Microsoft.*`).\r\nNow it is causing the CI to fail therefore I noticed it and removed it here.", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/azure-sdk-for-net-minimize-memory-allocations.json b/_reviewers/azure-sdk-for-net-minimize-memory-allocations.json new file mode 100644 index 0000000..afd0cf0 --- /dev/null +++ b/_reviewers/azure-sdk-for-net-minimize-memory-allocations.json @@ -0,0 +1,194 @@ +[ + { + "discussion_id": "2167978316", + "pr_number": 50898, + "pr_file": "sdk/ai/Azure.AI.Agents.Persistent/src/Custom/PersistentAgentsChatClient.cs", + "created_at": "2025-06-26T02:34:06+00:00", + "commented_code": "// Copyright (c) Microsoft Corporation. All rights reserved.\n// Licensed under the MIT License.\n\n#nullable enable\n#pragma warning disable AZC0004, AZC0007, AZC0015\n\nusing System;\nusing System.Collections.Generic;\nusing System.Linq;\nusing System.Runtime.CompilerServices;\nusing System.Text;\nusing System.Text.Json;\nusing System.Text.Json.Nodes;\nusing System.Text.Json.Serialization;\nusing System.Threading;\nusing System.Threading.Tasks;\nusing Microsoft.Extensions.AI;\n\nnamespace Azure.AI.Agents.Persistent\n{\n /// Represents an for an Azure.AI.Agents.Persistent .\n public partial class PersistentAgentsChatClient : IChatClient\n {\n /// The name of the chat client provider.\n private const string ProviderName = \"azure\";\n\n /// The underlying .\n private readonly PersistentAgentsClient? _client;\n\n /// Metadata for the client.\n private readonly ChatClientMetadata? _metadata;\n\n /// The ID of the agent to use.\n private readonly string? _agentId;\n\n /// The thread ID to use if none is supplied in .\n private readonly string? _defaultThreadId;\n\n /// List of tools associated with the agent.\n private IReadOnlyList? _agentTools;\n\n /// Initializes a new instance of the class for the specified .\n public PersistentAgentsChatClient(PersistentAgentsClient client, string agentId, string? defaultThreadId = null)\n {\n Argument.AssertNotNull(client, nameof(client));\n Argument.AssertNotNullOrWhiteSpace(agentId, nameof(agentId));\n\n _client = client;\n _agentId = agentId;\n _defaultThreadId = defaultThreadId;\n\n _metadata = new(ProviderName);\n }\n\n protected PersistentAgentsChatClient() { }\n\n /// \n public virtual object? GetService(Type serviceType, object? serviceKey = null) =>\n serviceType is null ? throw new ArgumentNullException(nameof(serviceType)) :\n serviceKey is not null ? null :\n serviceType == typeof(ChatClientMetadata) ? _metadata :\n serviceType == typeof(PersistentAgentsClient) ? _client :\n serviceType.IsInstanceOfType(this) ? this :\n null;\n\n /// \n public virtual Task GetResponseAsync(\n IEnumerable messages, ChatOptions? options = null, CancellationToken cancellationToken = default) =>\n GetStreamingResponseAsync(messages, options, cancellationToken).ToChatResponseAsync(cancellationToken);\n\n /// \n public virtual async IAsyncEnumerable GetStreamingResponseAsync(\n IEnumerable messages, ChatOptions? options = null, [EnumeratorCancellation] CancellationToken cancellationToken = default)\n {\n Argument.AssertNotNull(messages, nameof(messages));\n\n // Extract necessary state from messages and options.\n (ThreadAndRunOptions runOptions, List? toolResults) =\n await CreateRunOptionsAsync(messages, options, cancellationToken).ConfigureAwait(false);\n\n // Get the thread ID.\n string? threadId = options?.ConversationId ?? _defaultThreadId;\n if (threadId is null && toolResults is not null)\n {\n throw new ArgumentException(\"No thread ID was provided, but chat messages includes tool results.\", nameof(messages));\n }\n\n // Get any active run ID for this thread.\n ThreadRun? threadRun = null;\n if (threadId is not null)\n {\n await foreach (ThreadRun? run in _client!.Runs.GetRunsAsync(threadId, limit: 1, ListSortOrder.Descending, cancellationToken: cancellationToken).ConfigureAwait(false))\n {\n if (run.Status != RunStatus.Completed && run.Status != RunStatus.Cancelled && run.Status != RunStatus.Failed && run.Status != RunStatus.Expired)\n {\n threadRun = run;\n break;\n }\n }\n }\n\n // Submit the request.\n IAsyncEnumerable updates;\n if (threadRun is not null &&\n ConvertFunctionResultsToToolOutput(toolResults, out List? toolOutputs) is { } toolRunId &&\n toolRunId == threadRun.Id)\n {\n // There's an active run and we have tool results to submit, so submit the results and continue streaming.\n // This is going to ignore any additional messages in the run options, as we are only submitting tool outputs,\n // but there doesn't appear to be a way to submit additional messages, and having such additional messages is rare.\n updates = _client!.Runs.SubmitToolOutputsToStreamAsync(threadRun, toolOutputs, cancellationToken);\n }\n else\n {\n if (threadId is null)\n {\n // No thread ID was provided, so create a new thread.\n PersistentAgentThread thread = await _client!.Threads.CreateThreadAsync(runOptions.ThreadOptions.Messages, runOptions.ToolResources, runOptions.Metadata, cancellationToken).ConfigureAwait(false);\n runOptions.ThreadOptions.Messages.Clear();\n threadId = thread.Id;\n }\n else if (threadRun is not null)\n {\n // There was an active run; we need to cancel it before starting a new run.\n await _client!.Runs.CancelRunAsync(threadId, threadRun.Id, cancellationToken).ConfigureAwait(false);\n threadRun = null;\n }\n\n // Now create a new run and stream the results.\n updates = _client!.Runs.CreateRunStreamingAsync(\n threadId: threadId,\n agentId: _agentId,\n overrideModelName: runOptions?.OverrideModelName,\n overrideInstructions: runOptions?.OverrideInstructions,\n additionalInstructions: null,\n additionalMessages: runOptions?.ThreadOptions.Messages,\n overrideTools: runOptions?.OverrideTools,\n temperature: runOptions?.Temperature,\n topP: runOptions?.TopP,\n maxPromptTokens: runOptions?.MaxPromptTokens,\n maxCompletionTokens: runOptions?.MaxCompletionTokens,\n truncationStrategy: runOptions?.TruncationStrategy,\n toolChoice: runOptions?.ToolChoice,\n responseFormat: runOptions?.ResponseFormat,\n parallelToolCalls: runOptions?.ParallelToolCalls,\n metadata: runOptions?.Metadata,\n cancellationToken);\n }\n\n // Process each update.\n string? responseId = null;\n await foreach (StreamingUpdate? update in updates.ConfigureAwait(false))\n {\n switch (update)\n {\n case ThreadUpdate tu:\n threadId ??= tu.Value.Id;\n goto default;\n\n case RunUpdate ru:\n threadId ??= ru.Value.ThreadId;\n responseId ??= ru.Value.Id;\n\n ChatResponseUpdate ruUpdate = new()\n {\n AuthorName = ru.Value.AssistantId,\n ConversationId = threadId,\n CreatedAt = ru.Value.CreatedAt,\n MessageId = responseId,\n ModelId = ru.Value.Model,\n RawRepresentation = ru,\n ResponseId = responseId,\n Role = ChatRole.Assistant,\n };\n\n if (ru.Value.Usage is { } usage)\n {\n ruUpdate.Contents.Add(new UsageContent(new()\n {\n InputTokenCount = usage.PromptTokens,\n OutputTokenCount = usage.CompletionTokens,\n TotalTokenCount = usage.TotalTokens,\n }));\n }\n\n if (ru is RequiredActionUpdate rau && rau.ToolCallId is string toolCallId && rau.FunctionName is string functionName)\n {\n ruUpdate.Contents.Add(\n new FunctionCallContent(\n JsonSerializer.Serialize([ru.Value.Id, toolCallId], AgentsChatClientJsonContext.Default.StringArray),\n functionName,\n JsonSerializer.Deserialize(rau.FunctionArguments, AgentsChatClientJsonContext.Default.IDictionaryStringObject)!));\n }\n\n yield return ruUpdate;\n break;\n\n case MessageContentUpdate mcu:\n yield return new(mcu.Role == MessageRole.User ? ChatRole.User : ChatRole.Assistant, mcu.Text)\n {\n ConversationId = threadId,\n MessageId = responseId,\n RawRepresentation = mcu,\n ResponseId = responseId,\n };\n break;\n\n default:\n yield return new ChatResponseUpdate\n {\n ConversationId = threadId,\n MessageId = responseId,\n RawRepresentation = update,\n ResponseId = responseId,\n Role = ChatRole.Assistant,\n };\n break;\n }\n }\n }\n\n /// \n public void Dispose() { }\n\n /// \n /// Creates the to use for the request and extracts any function result contents\n /// that need to be submitted as tool results.\n /// \n private async ValueTask<(ThreadAndRunOptions RunOptions, List? ToolResults)> CreateRunOptionsAsync(\n IEnumerable messages, ChatOptions? options, CancellationToken cancellationToken)\n {\n // Create the options instance to populate, either a fresh or using one the caller provides.\n ThreadAndRunOptions runOptions =\n options?.RawRepresentationFactory?.Invoke(this) as ThreadAndRunOptions ??\n new();\n\n // Populate the run options from the ChatOptions, if provided.\n if (options is not null)\n {\n runOptions.MaxCompletionTokens ??= options.MaxOutputTokens;\n runOptions.OverrideModelName ??= options.ModelId;\n runOptions.TopP ??= options.TopP;\n runOptions.Temperature ??= options.Temperature;\n runOptions.ParallelToolCalls ??= options.AllowMultipleToolCalls;\n // Ignored: options.TopK, options.FrequencyPenalty, options.Seed, options.StopSequences\n\n if (options.Tools is { Count: > 0 } tools)\n {\n List toolDefinitions = [];\n\n // If the caller has provided any tool overrides, we'll assume they don't want to use the agent's tools.\n // But if they haven't, the only way we can provide our tools is via an override, whereas we'd really like to\n // just add them. To handle that, we'll get all of the agent's tools and add them to the override list\n // along with our tools.\n if (runOptions.OverrideTools is null || !runOptions.OverrideTools.Any())\n {\n if (_agentTools is null)\n {\n PersistentAgent agent = await _client!.Administration.GetAgentAsync(_agentId, cancellationToken).ConfigureAwait(false);\n _agentTools = agent.Tools;\n }\n\n toolDefinitions.AddRange(_agentTools);\n }", + "repo_full_name": "Azure/azure-sdk-for-net", + "discussion_comments": [ + { + "comment_id": "2167978316", + "repo_full_name": "Azure/azure-sdk-for-net", + "pr_number": 50898, + "pr_file": "sdk/ai/Azure.AI.Agents.Persistent/src/Custom/PersistentAgentsChatClient.cs", + "discussion_id": "2167978316", + "commented_code": "@@ -0,0 +1,451 @@\n+// Copyright (c) Microsoft Corporation. All rights reserved.\n+// Licensed under the MIT License.\n+\n+#nullable enable\n+#pragma warning disable AZC0004, AZC0007, AZC0015\n+\n+using System;\n+using System.Collections.Generic;\n+using System.Linq;\n+using System.Runtime.CompilerServices;\n+using System.Text;\n+using System.Text.Json;\n+using System.Text.Json.Nodes;\n+using System.Text.Json.Serialization;\n+using System.Threading;\n+using System.Threading.Tasks;\n+using Microsoft.Extensions.AI;\n+\n+namespace Azure.AI.Agents.Persistent\n+{\n+ /// Represents an for an Azure.AI.Agents.Persistent .\n+ public partial class PersistentAgentsChatClient : IChatClient\n+ {\n+ /// The name of the chat client provider.\n+ private const string ProviderName = \"azure\";\n+\n+ /// The underlying .\n+ private readonly PersistentAgentsClient? _client;\n+\n+ /// Metadata for the client.\n+ private readonly ChatClientMetadata? _metadata;\n+\n+ /// The ID of the agent to use.\n+ private readonly string? _agentId;\n+\n+ /// The thread ID to use if none is supplied in .\n+ private readonly string? _defaultThreadId;\n+\n+ /// List of tools associated with the agent.\n+ private IReadOnlyList? _agentTools;\n+\n+ /// Initializes a new instance of the class for the specified .\n+ public PersistentAgentsChatClient(PersistentAgentsClient client, string agentId, string? defaultThreadId = null)\n+ {\n+ Argument.AssertNotNull(client, nameof(client));\n+ Argument.AssertNotNullOrWhiteSpace(agentId, nameof(agentId));\n+\n+ _client = client;\n+ _agentId = agentId;\n+ _defaultThreadId = defaultThreadId;\n+\n+ _metadata = new(ProviderName);\n+ }\n+\n+ protected PersistentAgentsChatClient() { }\n+\n+ /// \n+ public virtual object? GetService(Type serviceType, object? serviceKey = null) =>\n+ serviceType is null ? throw new ArgumentNullException(nameof(serviceType)) :\n+ serviceKey is not null ? null :\n+ serviceType == typeof(ChatClientMetadata) ? _metadata :\n+ serviceType == typeof(PersistentAgentsClient) ? _client :\n+ serviceType.IsInstanceOfType(this) ? this :\n+ null;\n+\n+ /// \n+ public virtual Task GetResponseAsync(\n+ IEnumerable messages, ChatOptions? options = null, CancellationToken cancellationToken = default) =>\n+ GetStreamingResponseAsync(messages, options, cancellationToken).ToChatResponseAsync(cancellationToken);\n+\n+ /// \n+ public virtual async IAsyncEnumerable GetStreamingResponseAsync(\n+ IEnumerable messages, ChatOptions? options = null, [EnumeratorCancellation] CancellationToken cancellationToken = default)\n+ {\n+ Argument.AssertNotNull(messages, nameof(messages));\n+\n+ // Extract necessary state from messages and options.\n+ (ThreadAndRunOptions runOptions, List? toolResults) =\n+ await CreateRunOptionsAsync(messages, options, cancellationToken).ConfigureAwait(false);\n+\n+ // Get the thread ID.\n+ string? threadId = options?.ConversationId ?? _defaultThreadId;\n+ if (threadId is null && toolResults is not null)\n+ {\n+ throw new ArgumentException(\"No thread ID was provided, but chat messages includes tool results.\", nameof(messages));\n+ }\n+\n+ // Get any active run ID for this thread.\n+ ThreadRun? threadRun = null;\n+ if (threadId is not null)\n+ {\n+ await foreach (ThreadRun? run in _client!.Runs.GetRunsAsync(threadId, limit: 1, ListSortOrder.Descending, cancellationToken: cancellationToken).ConfigureAwait(false))\n+ {\n+ if (run.Status != RunStatus.Completed && run.Status != RunStatus.Cancelled && run.Status != RunStatus.Failed && run.Status != RunStatus.Expired)\n+ {\n+ threadRun = run;\n+ break;\n+ }\n+ }\n+ }\n+\n+ // Submit the request.\n+ IAsyncEnumerable updates;\n+ if (threadRun is not null &&\n+ ConvertFunctionResultsToToolOutput(toolResults, out List? toolOutputs) is { } toolRunId &&\n+ toolRunId == threadRun.Id)\n+ {\n+ // There's an active run and we have tool results to submit, so submit the results and continue streaming.\n+ // This is going to ignore any additional messages in the run options, as we are only submitting tool outputs,\n+ // but there doesn't appear to be a way to submit additional messages, and having such additional messages is rare.\n+ updates = _client!.Runs.SubmitToolOutputsToStreamAsync(threadRun, toolOutputs, cancellationToken);\n+ }\n+ else\n+ {\n+ if (threadId is null)\n+ {\n+ // No thread ID was provided, so create a new thread.\n+ PersistentAgentThread thread = await _client!.Threads.CreateThreadAsync(runOptions.ThreadOptions.Messages, runOptions.ToolResources, runOptions.Metadata, cancellationToken).ConfigureAwait(false);\n+ runOptions.ThreadOptions.Messages.Clear();\n+ threadId = thread.Id;\n+ }\n+ else if (threadRun is not null)\n+ {\n+ // There was an active run; we need to cancel it before starting a new run.\n+ await _client!.Runs.CancelRunAsync(threadId, threadRun.Id, cancellationToken).ConfigureAwait(false);\n+ threadRun = null;\n+ }\n+\n+ // Now create a new run and stream the results.\n+ updates = _client!.Runs.CreateRunStreamingAsync(\n+ threadId: threadId,\n+ agentId: _agentId,\n+ overrideModelName: runOptions?.OverrideModelName,\n+ overrideInstructions: runOptions?.OverrideInstructions,\n+ additionalInstructions: null,\n+ additionalMessages: runOptions?.ThreadOptions.Messages,\n+ overrideTools: runOptions?.OverrideTools,\n+ temperature: runOptions?.Temperature,\n+ topP: runOptions?.TopP,\n+ maxPromptTokens: runOptions?.MaxPromptTokens,\n+ maxCompletionTokens: runOptions?.MaxCompletionTokens,\n+ truncationStrategy: runOptions?.TruncationStrategy,\n+ toolChoice: runOptions?.ToolChoice,\n+ responseFormat: runOptions?.ResponseFormat,\n+ parallelToolCalls: runOptions?.ParallelToolCalls,\n+ metadata: runOptions?.Metadata,\n+ cancellationToken);\n+ }\n+\n+ // Process each update.\n+ string? responseId = null;\n+ await foreach (StreamingUpdate? update in updates.ConfigureAwait(false))\n+ {\n+ switch (update)\n+ {\n+ case ThreadUpdate tu:\n+ threadId ??= tu.Value.Id;\n+ goto default;\n+\n+ case RunUpdate ru:\n+ threadId ??= ru.Value.ThreadId;\n+ responseId ??= ru.Value.Id;\n+\n+ ChatResponseUpdate ruUpdate = new()\n+ {\n+ AuthorName = ru.Value.AssistantId,\n+ ConversationId = threadId,\n+ CreatedAt = ru.Value.CreatedAt,\n+ MessageId = responseId,\n+ ModelId = ru.Value.Model,\n+ RawRepresentation = ru,\n+ ResponseId = responseId,\n+ Role = ChatRole.Assistant,\n+ };\n+\n+ if (ru.Value.Usage is { } usage)\n+ {\n+ ruUpdate.Contents.Add(new UsageContent(new()\n+ {\n+ InputTokenCount = usage.PromptTokens,\n+ OutputTokenCount = usage.CompletionTokens,\n+ TotalTokenCount = usage.TotalTokens,\n+ }));\n+ }\n+\n+ if (ru is RequiredActionUpdate rau && rau.ToolCallId is string toolCallId && rau.FunctionName is string functionName)\n+ {\n+ ruUpdate.Contents.Add(\n+ new FunctionCallContent(\n+ JsonSerializer.Serialize([ru.Value.Id, toolCallId], AgentsChatClientJsonContext.Default.StringArray),\n+ functionName,\n+ JsonSerializer.Deserialize(rau.FunctionArguments, AgentsChatClientJsonContext.Default.IDictionaryStringObject)!));\n+ }\n+\n+ yield return ruUpdate;\n+ break;\n+\n+ case MessageContentUpdate mcu:\n+ yield return new(mcu.Role == MessageRole.User ? ChatRole.User : ChatRole.Assistant, mcu.Text)\n+ {\n+ ConversationId = threadId,\n+ MessageId = responseId,\n+ RawRepresentation = mcu,\n+ ResponseId = responseId,\n+ };\n+ break;\n+\n+ default:\n+ yield return new ChatResponseUpdate\n+ {\n+ ConversationId = threadId,\n+ MessageId = responseId,\n+ RawRepresentation = update,\n+ ResponseId = responseId,\n+ Role = ChatRole.Assistant,\n+ };\n+ break;\n+ }\n+ }\n+ }\n+\n+ /// \n+ public void Dispose() { }\n+\n+ /// \n+ /// Creates the to use for the request and extracts any function result contents\n+ /// that need to be submitted as tool results.\n+ /// \n+ private async ValueTask<(ThreadAndRunOptions RunOptions, List? ToolResults)> CreateRunOptionsAsync(\n+ IEnumerable messages, ChatOptions? options, CancellationToken cancellationToken)\n+ {\n+ // Create the options instance to populate, either a fresh or using one the caller provides.\n+ ThreadAndRunOptions runOptions =\n+ options?.RawRepresentationFactory?.Invoke(this) as ThreadAndRunOptions ??\n+ new();\n+\n+ // Populate the run options from the ChatOptions, if provided.\n+ if (options is not null)\n+ {\n+ runOptions.MaxCompletionTokens ??= options.MaxOutputTokens;\n+ runOptions.OverrideModelName ??= options.ModelId;\n+ runOptions.TopP ??= options.TopP;\n+ runOptions.Temperature ??= options.Temperature;\n+ runOptions.ParallelToolCalls ??= options.AllowMultipleToolCalls;\n+ // Ignored: options.TopK, options.FrequencyPenalty, options.Seed, options.StopSequences\n+\n+ if (options.Tools is { Count: > 0 } tools)\n+ {\n+ List toolDefinitions = [];\n+\n+ // If the caller has provided any tool overrides, we'll assume they don't want to use the agent's tools.\n+ // But if they haven't, the only way we can provide our tools is via an override, whereas we'd really like to\n+ // just add them. To handle that, we'll get all of the agent's tools and add them to the override list\n+ // along with our tools.\n+ if (runOptions.OverrideTools is null || !runOptions.OverrideTools.Any())\n+ {\n+ if (_agentTools is null)\n+ {\n+ PersistentAgent agent = await _client!.Administration.GetAgentAsync(_agentId, cancellationToken).ConfigureAwait(false);\n+ _agentTools = agent.Tools;\n+ }\n+\n+ toolDefinitions.AddRange(_agentTools);\n+ }", + "comment_created_at": "2025-06-26T02:34:06+00:00", + "comment_author": "dmytrostruk", + "comment_body": "This logic is required, because currently there is no way (at least I didn't find it) how to add \"additional\" tools on top of already existing ones rather than override tools per request. If there will be a possibility to add \"additional\" tools, then this logic can be removed. \r\n\r\nWith this logic, `_agentTools` are stored on the class level to avoid pulling agent tools every time when user sends a request. So, there is a small possibility that agent tools can be updated on the server side, but previous tool collection will be cached in the class instance. So, it depends on how often agent tools change and if it happens rarely, this logic should be a good trade-off between performance and consistency.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2180633548", + "pr_number": 51021, + "pr_file": "sdk/core/Azure.Core/src/RequestContent.cs", + "created_at": "2025-07-02T17:42:50+00:00", + "commented_code": "return Task.CompletedTask;\n }\n }\n\n private sealed class PersistableModelRequestContent : RequestContent where T : IPersistableModel", + "repo_full_name": "Azure/azure-sdk-for-net", + "discussion_comments": [ + { + "comment_id": "2180633548", + "repo_full_name": "Azure/azure-sdk-for-net", + "pr_number": 51021, + "pr_file": "sdk/core/Azure.Core/src/RequestContent.cs", + "discussion_id": "2180633548", + "commented_code": "@@ -346,5 +360,35 @@ public override Task WriteToAsync(Stream stream, CancellationToken cancellation)\n return Task.CompletedTask;\n }\n }\n+\n+ private sealed class PersistableModelRequestContent : RequestContent where T : IPersistableModel", + "comment_created_at": "2025-07-02T17:42:50+00:00", + "comment_author": "KrzysztofCwalina", + "comment_body": "Why do we need this type? What's the advantage over using ModelReaderWriter.Write (inside the Cereate method above) to create BinaryData and then returning the existing BinaryContent.Create(binaryData)?", + "pr_file_module": null + }, + { + "comment_id": "2180644575", + "repo_full_name": "Azure/azure-sdk-for-net", + "pr_number": 51021, + "pr_file": "sdk/core/Azure.Core/src/RequestContent.cs", + "discussion_id": "2180633548", + "commented_code": "@@ -346,5 +360,35 @@ public override Task WriteToAsync(Stream stream, CancellationToken cancellation)\n return Task.CompletedTask;\n }\n }\n+\n+ private sealed class PersistableModelRequestContent : RequestContent where T : IPersistableModel", + "comment_created_at": "2025-07-02T17:49:51+00:00", + "comment_author": "JoshLove-msft", + "comment_body": "Creating a BinaryData will result in allocations. Using BinaryContent.Create(IPersistableModel) will use shared buffer to avoid allocations - https://github.com/Azure/azure-sdk-for-net/blob/main/sdk/core/System.ClientModel/src/Message/BinaryContent.cs#L224", + "pr_file_module": null + }, + { + "comment_id": "2180645941", + "repo_full_name": "Azure/azure-sdk-for-net", + "pr_number": 51021, + "pr_file": "sdk/core/Azure.Core/src/RequestContent.cs", + "discussion_id": "2180633548", + "commented_code": "@@ -346,5 +360,35 @@ public override Task WriteToAsync(Stream stream, CancellationToken cancellation)\n return Task.CompletedTask;\n }\n }\n+\n+ private sealed class PersistableModelRequestContent : RequestContent where T : IPersistableModel", + "comment_created_at": "2025-07-02T17:50:43+00:00", + "comment_author": "JoshLove-msft", + "comment_body": "https://github.com/Azure/azure-sdk-for-net/blob/main/sdk/core/System.ClientModel/src/ModelReaderWriter/UnsafeBufferSequence.cs#L14", + "pr_file_module": null + }, + { + "comment_id": "2180656922", + "repo_full_name": "Azure/azure-sdk-for-net", + "pr_number": 51021, + "pr_file": "sdk/core/Azure.Core/src/RequestContent.cs", + "discussion_id": "2180633548", + "commented_code": "@@ -346,5 +360,35 @@ public override Task WriteToAsync(Stream stream, CancellationToken cancellation)\n return Task.CompletedTask;\n }\n }\n+\n+ private sealed class PersistableModelRequestContent : RequestContent where T : IPersistableModel", + "comment_created_at": "2025-07-02T17:57:30+00:00", + "comment_author": "Copilot", + "comment_body": "The PersistableModelRequestContent type wraps BinaryContent.Create() specifically to leverage its streaming optimizations.\n\n**Alternative approach (ModelReaderWriter.Write + BinaryData):**\n- ModelReaderWriter.Write() fully serializes the model to memory first (BinaryData)\n- Then RequestContent.Create(binaryData) streams from that memory\n\n**Current approach (BinaryContent wrapper):**\n- BinaryContent.Create() for IJsonModel types uses ModelWriter with UnsafeBufferSequence \n- Streams directly via SequenceReader.CopyTo() without full serialization to memory first\n- Only falls back to memory serialization for non-IJsonModel types\n\nThe wrapper type is necessary because BinaryContent and RequestContent are separate hierarchies, but BinaryContent has the optimized streaming implementation we want to reuse.", + "pr_file_module": null + }, + { + "comment_id": "2180695432", + "repo_full_name": "Azure/azure-sdk-for-net", + "pr_number": 51021, + "pr_file": "sdk/core/Azure.Core/src/RequestContent.cs", + "discussion_id": "2180633548", + "commented_code": "@@ -346,5 +360,35 @@ public override Task WriteToAsync(Stream stream, CancellationToken cancellation)\n return Task.CompletedTask;\n }\n }\n+\n+ private sealed class PersistableModelRequestContent : RequestContent where T : IPersistableModel", + "comment_created_at": "2025-07-02T18:16:48+00:00", + "comment_author": "m-nash", + "comment_body": "@KrzysztofCwalina in the json format case (or where wire = json) we will use the UnsafeBufferSequence internally which is about 30% faster for speed and allocates about 80% less on average due to the use of shared buffers. For the non json cases like xml there is no difference between what you suggest and what is implemented.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2093706419", + "pr_number": 50129, + "pr_file": "sdk/resourcemanager/Azure.ResourceManager/src/ArmClientOptions.cs", + "created_at": "2025-05-16T21:43:20+00:00", + "commented_code": "var assembly = Assembly.GetExecutingAssembly();\n using (Stream stream = assembly.GetManifestResourceStream(profile.GetManifestName()))\n {\n var allProfile = BinaryData.FromStream(stream).ToObjectFromJson>>();\n var span = BinaryData.FromStream(stream).ToMemory().Span;\n var allProfile = JsonSerializer.Deserialize>>(span, ArmClientOptionsJsonContext.Default.DictionaryStringDictionaryStringJsonElement);", + "repo_full_name": "Azure/azure-sdk-for-net", + "discussion_comments": [ + { + "comment_id": "2093706419", + "repo_full_name": "Azure/azure-sdk-for-net", + "pr_number": 50129, + "pr_file": "sdk/resourcemanager/Azure.ResourceManager/src/ArmClientOptions.cs", + "discussion_id": "2093706419", + "commented_code": "@@ -47,12 +48,13 @@ public void SetApiVersionsFromProfile(AzureStackProfile profile)\n var assembly = Assembly.GetExecutingAssembly();\n using (Stream stream = assembly.GetManifestResourceStream(profile.GetManifestName()))\n {\n- var allProfile = BinaryData.FromStream(stream).ToObjectFromJson>>();\n+ var span = BinaryData.FromStream(stream).ToMemory().Span;\n+ var allProfile = JsonSerializer.Deserialize>>(span, ArmClientOptionsJsonContext.Default.DictionaryStringDictionaryStringJsonElement);", + "comment_created_at": "2025-05-16T21:43:20+00:00", + "comment_author": "Copilot", + "comment_body": "Consider evaluating the memory impact of loading the entire stream into a Span, especially if the configuration file could be large. If applicable, explore streaming deserialization to mitigate potential memory overhead.\n```suggestion\n var allProfile = JsonSerializer.DeserializeAsync>>(stream, ArmClientOptionsJsonContext.Default.DictionaryStringDictionaryStringJsonElement).Result;\n```", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2170421287", + "pr_number": 50926, + "pr_file": "sdk/keyvault/Azure.Security.KeyVault.Keys/src/KeyAttributes.cs", + "created_at": "2025-06-27T01:11:30+00:00", + "commented_code": "if (Attestation != null)\n {\n json.WriteStartObject(s_keyAttestationPropertyNameBytes);\n if (Attestation.CertificatePemFile != null)\n if (!Attestation.CertificatePemFile.IsEmpty)\n {\n json.WriteString(\"certificatePemFile\", Convert.ToBase64String(Attestation.CertificatePemFile));\n json.WriteString(\"certificatePemFile\", Convert.ToBase64String(Attestation.CertificatePemFile.ToArray()));\n }\n if (Attestation.PrivateKeyAttestation != null)\n if (!Attestation.PrivateKeyAttestation.IsEmpty)\n {\n json.WriteString(\"privateKeyAttestation\", Convert.ToBase64String(Attestation.PrivateKeyAttestation));\n json.WriteString(\"privateKeyAttestation\", Convert.ToBase64String(Attestation.PrivateKeyAttestation.ToArray()));\n }\n if (Attestation.PublicKeyAttestation != null)\n if (!Attestation.PublicKeyAttestation.IsEmpty)\n {\n json.WriteString(\"publicKeyAttestation\", Convert.ToBase64String(Attestation.PublicKeyAttestation));\n json.WriteString(\"publicKeyAttestation\", Convert.ToBase64String(Attestation.PublicKeyAttestation.ToArray()));", + "repo_full_name": "Azure/azure-sdk-for-net", + "discussion_comments": [ + { + "comment_id": "2170421287", + "repo_full_name": "Azure/azure-sdk-for-net", + "pr_number": 50926, + "pr_file": "sdk/keyvault/Azure.Security.KeyVault.Keys/src/KeyAttributes.cs", + "discussion_id": "2170421287", + "commented_code": "@@ -143,17 +143,17 @@ internal void WriteProperties(Utf8JsonWriter json)\n if (Attestation != null)\n {\n json.WriteStartObject(s_keyAttestationPropertyNameBytes);\n- if (Attestation.CertificatePemFile != null)\n+ if (!Attestation.CertificatePemFile.IsEmpty)\n {\n- json.WriteString(\"certificatePemFile\", Convert.ToBase64String(Attestation.CertificatePemFile));\n+ json.WriteString(\"certificatePemFile\", Convert.ToBase64String(Attestation.CertificatePemFile.ToArray()));\n }\n- if (Attestation.PrivateKeyAttestation != null)\n+ if (!Attestation.PrivateKeyAttestation.IsEmpty)\n {\n- json.WriteString(\"privateKeyAttestation\", Convert.ToBase64String(Attestation.PrivateKeyAttestation));\n+ json.WriteString(\"privateKeyAttestation\", Convert.ToBase64String(Attestation.PrivateKeyAttestation.ToArray()));\n }\n- if (Attestation.PublicKeyAttestation != null)\n+ if (!Attestation.PublicKeyAttestation.IsEmpty)\n {\n- json.WriteString(\"publicKeyAttestation\", Convert.ToBase64String(Attestation.PublicKeyAttestation));\n+ json.WriteString(\"publicKeyAttestation\", Convert.ToBase64String(Attestation.PublicKeyAttestation.ToArray()));", + "comment_created_at": "2025-06-27T01:11:30+00:00", + "comment_author": "Copilot", + "comment_body": "Avoid allocating a new array with ToArray(): use the ReadOnlyMemory's Span overload, e.g. `Convert.ToBase64String(Attestation.CertificatePemFile.Span)` to improve performance.", + "pr_file_module": null + }, + { + "comment_id": "2170432767", + "repo_full_name": "Azure/azure-sdk-for-net", + "pr_number": 50926, + "pr_file": "sdk/keyvault/Azure.Security.KeyVault.Keys/src/KeyAttributes.cs", + "discussion_id": "2170421287", + "commented_code": "@@ -143,17 +143,17 @@ internal void WriteProperties(Utf8JsonWriter json)\n if (Attestation != null)\n {\n json.WriteStartObject(s_keyAttestationPropertyNameBytes);\n- if (Attestation.CertificatePemFile != null)\n+ if (!Attestation.CertificatePemFile.IsEmpty)\n {\n- json.WriteString(\"certificatePemFile\", Convert.ToBase64String(Attestation.CertificatePemFile));\n+ json.WriteString(\"certificatePemFile\", Convert.ToBase64String(Attestation.CertificatePemFile.ToArray()));\n }\n- if (Attestation.PrivateKeyAttestation != null)\n+ if (!Attestation.PrivateKeyAttestation.IsEmpty)\n {\n- json.WriteString(\"privateKeyAttestation\", Convert.ToBase64String(Attestation.PrivateKeyAttestation));\n+ json.WriteString(\"privateKeyAttestation\", Convert.ToBase64String(Attestation.PrivateKeyAttestation.ToArray()));\n }\n- if (Attestation.PublicKeyAttestation != null)\n+ if (!Attestation.PublicKeyAttestation.IsEmpty)\n {\n- json.WriteString(\"publicKeyAttestation\", Convert.ToBase64String(Attestation.PublicKeyAttestation));\n+ json.WriteString(\"publicKeyAttestation\", Convert.ToBase64String(Attestation.PublicKeyAttestation.ToArray()));", + "comment_created_at": "2025-06-27T01:19:16+00:00", + "comment_author": "JonathanCrd", + "comment_body": "No, `Convert.ToBase64String` is expecting an array.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2114870690", + "pr_number": 50279, + "pr_file": "sdk/core/Azure.Core/src/ResponseError.Serialization.cs", + "created_at": "2025-05-29T23:05:01+00:00", + "commented_code": "// Copyright (c) Microsoft Corporation. All rights reserved.\n// Licensed under the MIT License.\n\nusing System;\nusing System.ClientModel.Primitives;\nusing System.Collections.Generic;\nusing System.Globalization;\nusing System.Text;\nusing System.Text.Json;\nusing System.Text.Json.Serialization;\nusing Azure.Core;\n\nnamespace Azure\n{\n /// \n /// Represents an error returned by an Azure Service.\n /// \n [JsonConverter(typeof(Converter))]\n [TypeReferenceType(true, [nameof(Target), nameof(Details)])]\n public sealed partial class ResponseError : IJsonModel\n {\n // This class needs to be internal rather than private so that it can be used by the System.Text.Json source generator\n internal class Converter : JsonConverter\n {\n public override ResponseError? Read(ref Utf8JsonReader reader, Type typeToConvert, JsonSerializerOptions options)\n {\n using var document = JsonDocument.ParseValue(ref reader);\n var element = document.RootElement;\n return ReadFromJson(element);\n }\n\n public override void Write(Utf8JsonWriter writer, ResponseError? value, JsonSerializerOptions options)\n {\n throw new NotImplementedException();\n }\n }\n\n /// \n /// Writes the to the provided .\n /// \n public void Write(Utf8JsonWriter writer, ModelReaderWriterOptions options)\n {\n writer.WriteStartObject();\n\n if (Code != null)\n {\n writer.WritePropertyName(\"code\");\n writer.WriteStringValue(Code);\n }\n\n if (Message != null)\n {\n writer.WritePropertyName(\"message\");\n writer.WriteStringValue(Message);\n }\n\n if (Target != null)\n {\n writer.WritePropertyName(\"target\");\n writer.WriteStringValue(Target);\n }\n\n if (InnerError != null)\n {\n writer.WritePropertyName(\"innererror\");\n InnerError.Write(writer, ModelReaderWriterOptions.Json);\n }\n\n if (Details.Count > 0)\n {\n writer.WritePropertyName(\"details\");\n writer.WriteStartArray();\n\n foreach (var detail in Details)\n {\n if (detail == null)\n {\n writer.WriteNullValue();\n }\n else\n {\n detail.Write(writer, options);\n }\n }\n\n writer.WriteEndArray();\n }\n\n writer.WriteEndObject();\n }\n\n /// \n public ResponseError Create(ref Utf8JsonReader reader, ModelReaderWriterOptions options)\n {\n var format = options.Format == \"W\" ? GetFormatFromOptions(options) : options.Format;\n if (format != \"J\")\n {\n throw new FormatException($\"The model {nameof(ResponseError)} does not support '{format}' format.\");\n }\n\n using var document = JsonDocument.ParseValue(ref reader);\n var element = document.RootElement;\n return ReadFromJson(element) ?? new ResponseError();\n }\n\n /// \n /// Writes the to the provided .\n /// \n public BinaryData Write(ModelReaderWriterOptions options)\n {\n var format = options.Format == \"W\" ? GetFormatFromOptions(options) : options.Format;\n if (format != \"J\")\n {\n throw new FormatException($\"The model {nameof(ResponseError)} does not support '{format}' format.\");\n }\n\n using var stream = new System.IO.MemoryStream();", + "repo_full_name": "Azure/azure-sdk-for-net", + "discussion_comments": [ + { + "comment_id": "2114870690", + "repo_full_name": "Azure/azure-sdk-for-net", + "pr_number": 50279, + "pr_file": "sdk/core/Azure.Core/src/ResponseError.Serialization.cs", + "discussion_id": "2114870690", + "commented_code": "@@ -0,0 +1,192 @@\n+// Copyright (c) Microsoft Corporation. All rights reserved.\n+// Licensed under the MIT License.\n+\n+using System;\n+using System.ClientModel.Primitives;\n+using System.Collections.Generic;\n+using System.Globalization;\n+using System.Text;\n+using System.Text.Json;\n+using System.Text.Json.Serialization;\n+using Azure.Core;\n+\n+namespace Azure\n+{\n+ /// \n+ /// Represents an error returned by an Azure Service.\n+ /// \n+ [JsonConverter(typeof(Converter))]\n+ [TypeReferenceType(true, [nameof(Target), nameof(Details)])]\n+ public sealed partial class ResponseError : IJsonModel\n+ {\n+ // This class needs to be internal rather than private so that it can be used by the System.Text.Json source generator\n+ internal class Converter : JsonConverter\n+ {\n+ public override ResponseError? Read(ref Utf8JsonReader reader, Type typeToConvert, JsonSerializerOptions options)\n+ {\n+ using var document = JsonDocument.ParseValue(ref reader);\n+ var element = document.RootElement;\n+ return ReadFromJson(element);\n+ }\n+\n+ public override void Write(Utf8JsonWriter writer, ResponseError? value, JsonSerializerOptions options)\n+ {\n+ throw new NotImplementedException();\n+ }\n+ }\n+\n+ /// \n+ /// Writes the to the provided .\n+ /// \n+ public void Write(Utf8JsonWriter writer, ModelReaderWriterOptions options)\n+ {\n+ writer.WriteStartObject();\n+\n+ if (Code != null)\n+ {\n+ writer.WritePropertyName(\"code\");\n+ writer.WriteStringValue(Code);\n+ }\n+\n+ if (Message != null)\n+ {\n+ writer.WritePropertyName(\"message\");\n+ writer.WriteStringValue(Message);\n+ }\n+\n+ if (Target != null)\n+ {\n+ writer.WritePropertyName(\"target\");\n+ writer.WriteStringValue(Target);\n+ }\n+\n+ if (InnerError != null)\n+ {\n+ writer.WritePropertyName(\"innererror\");\n+ InnerError.Write(writer, ModelReaderWriterOptions.Json);\n+ }\n+\n+ if (Details.Count > 0)\n+ {\n+ writer.WritePropertyName(\"details\");\n+ writer.WriteStartArray();\n+\n+ foreach (var detail in Details)\n+ {\n+ if (detail == null)\n+ {\n+ writer.WriteNullValue();\n+ }\n+ else\n+ {\n+ detail.Write(writer, options);\n+ }\n+ }\n+\n+ writer.WriteEndArray();\n+ }\n+\n+ writer.WriteEndObject();\n+ }\n+\n+ /// \n+ public ResponseError Create(ref Utf8JsonReader reader, ModelReaderWriterOptions options)\n+ {\n+ var format = options.Format == \"W\" ? GetFormatFromOptions(options) : options.Format;\n+ if (format != \"J\")\n+ {\n+ throw new FormatException($\"The model {nameof(ResponseError)} does not support '{format}' format.\");\n+ }\n+\n+ using var document = JsonDocument.ParseValue(ref reader);\n+ var element = document.RootElement;\n+ return ReadFromJson(element) ?? new ResponseError();\n+ }\n+\n+ /// \n+ /// Writes the to the provided .\n+ /// \n+ public BinaryData Write(ModelReaderWriterOptions options)\n+ {\n+ var format = options.Format == \"W\" ? GetFormatFromOptions(options) : options.Format;\n+ if (format != \"J\")\n+ {\n+ throw new FormatException($\"The model {nameof(ResponseError)} does not support '{format}' format.\");\n+ }\n+\n+ using var stream = new System.IO.MemoryStream();", + "comment_created_at": "2025-05-29T23:05:01+00:00", + "comment_author": "m-nash", + "comment_body": "Should call into ModelReaderWriter here it uses a faster buffer than memory stream. You can use RehydrationToken as a guide https://github.com/Azure/azure-sdk-for-net/blob/main/sdk/core/Azure.Core/src/RehydrationToken.Serialization.cs#L136-L147", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2037553900", + "pr_number": 49324, + "pr_file": "sdk/cloudmachine/Azure.Projects.AI/src/Agents/ChatRunner.cs", + "created_at": "2025-04-10T14:24:39+00:00", + "commented_code": "}\n else\n {\n ChatCompletion completion = _chat.CompleteChat(conversation);\n ChatCompletionOptions options = new();\n options.Temperature = 0.0f;", + "repo_full_name": "Azure/azure-sdk-for-net", + "discussion_comments": [ + { + "comment_id": "2037553900", + "repo_full_name": "Azure/azure-sdk-for-net", + "pr_number": 49324, + "pr_file": "sdk/cloudmachine/Azure.Projects.AI/src/Agents/ChatRunner.cs", + "discussion_id": "2037553900", + "commented_code": "@@ -113,7 +113,9 @@ protected virtual ChatCompletion OnComplete(List conversation, stri\n }\n else\n {\n- ChatCompletion completion = _chat.CompleteChat(conversation);\n+ ChatCompletionOptions options = new();\n+ options.Temperature = 0.0f;", + "comment_created_at": "2025-04-10T14:24:39+00:00", + "comment_author": "christothes", + "comment_body": "Can we initialize the options once and reuse it here each time?", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/azure-sdk-for-net-minimize-memory-allocations.md b/_reviewers/azure-sdk-for-net-minimize-memory-allocations.md index ae676ef..77f93ed 100644 --- a/_reviewers/azure-sdk-for-net-minimize-memory-allocations.md +++ b/_reviewers/azure-sdk-for-net-minimize-memory-allocations.md @@ -54,199 +54,3 @@ Key practices to follow: 5. **Reuse objects** for repeated operations rather than creating new instances each time. These optimizations are particularly important in high-throughput scenarios where allocation pressure can cause frequent garbage collections, leading to performance degradation. - - -[ - { - "discussion_id": "2167978316", - "pr_number": 50898, - "pr_file": "sdk/ai/Azure.AI.Agents.Persistent/src/Custom/PersistentAgentsChatClient.cs", - "created_at": "2025-06-26T02:34:06+00:00", - "commented_code": "\ufeff// Copyright (c) Microsoft Corporation. All rights reserved.\n// Licensed under the MIT License.\n\n#nullable enable\n#pragma warning disable AZC0004, AZC0007, AZC0015\n\nusing System;\nusing System.Collections.Generic;\nusing System.Linq;\nusing System.Runtime.CompilerServices;\nusing System.Text;\nusing System.Text.Json;\nusing System.Text.Json.Nodes;\nusing System.Text.Json.Serialization;\nusing System.Threading;\nusing System.Threading.Tasks;\nusing Microsoft.Extensions.AI;\n\nnamespace Azure.AI.Agents.Persistent\n{\n /// Represents an for an Azure.AI.Agents.Persistent .\n public partial class PersistentAgentsChatClient : IChatClient\n {\n /// The name of the chat client provider.\n private const string ProviderName = \"azure\";\n\n /// The underlying .\n private readonly PersistentAgentsClient? _client;\n\n /// Metadata for the client.\n private readonly ChatClientMetadata? _metadata;\n\n /// The ID of the agent to use.\n private readonly string? _agentId;\n\n /// The thread ID to use if none is supplied in .\n private readonly string? _defaultThreadId;\n\n /// List of tools associated with the agent.\n private IReadOnlyList? _agentTools;\n\n /// Initializes a new instance of the class for the specified .\n public PersistentAgentsChatClient(PersistentAgentsClient client, string agentId, string? defaultThreadId = null)\n {\n Argument.AssertNotNull(client, nameof(client));\n Argument.AssertNotNullOrWhiteSpace(agentId, nameof(agentId));\n\n _client = client;\n _agentId = agentId;\n _defaultThreadId = defaultThreadId;\n\n _metadata = new(ProviderName);\n }\n\n protected PersistentAgentsChatClient() { }\n\n /// \n public virtual object? GetService(Type serviceType, object? serviceKey = null) =>\n serviceType is null ? throw new ArgumentNullException(nameof(serviceType)) :\n serviceKey is not null ? null :\n serviceType == typeof(ChatClientMetadata) ? _metadata :\n serviceType == typeof(PersistentAgentsClient) ? _client :\n serviceType.IsInstanceOfType(this) ? this :\n null;\n\n /// \n public virtual Task GetResponseAsync(\n IEnumerable messages, ChatOptions? options = null, CancellationToken cancellationToken = default) =>\n GetStreamingResponseAsync(messages, options, cancellationToken).ToChatResponseAsync(cancellationToken);\n\n /// \n public virtual async IAsyncEnumerable GetStreamingResponseAsync(\n IEnumerable messages, ChatOptions? options = null, [EnumeratorCancellation] CancellationToken cancellationToken = default)\n {\n Argument.AssertNotNull(messages, nameof(messages));\n\n // Extract necessary state from messages and options.\n (ThreadAndRunOptions runOptions, List? toolResults) =\n await CreateRunOptionsAsync(messages, options, cancellationToken).ConfigureAwait(false);\n\n // Get the thread ID.\n string? threadId = options?.ConversationId ?? _defaultThreadId;\n if (threadId is null && toolResults is not null)\n {\n throw new ArgumentException(\"No thread ID was provided, but chat messages includes tool results.\", nameof(messages));\n }\n\n // Get any active run ID for this thread.\n ThreadRun? threadRun = null;\n if (threadId is not null)\n {\n await foreach (ThreadRun? run in _client!.Runs.GetRunsAsync(threadId, limit: 1, ListSortOrder.Descending, cancellationToken: cancellationToken).ConfigureAwait(false))\n {\n if (run.Status != RunStatus.Completed && run.Status != RunStatus.Cancelled && run.Status != RunStatus.Failed && run.Status != RunStatus.Expired)\n {\n threadRun = run;\n break;\n }\n }\n }\n\n // Submit the request.\n IAsyncEnumerable updates;\n if (threadRun is not null &&\n ConvertFunctionResultsToToolOutput(toolResults, out List? toolOutputs) is { } toolRunId &&\n toolRunId == threadRun.Id)\n {\n // There's an active run and we have tool results to submit, so submit the results and continue streaming.\n // This is going to ignore any additional messages in the run options, as we are only submitting tool outputs,\n // but there doesn't appear to be a way to submit additional messages, and having such additional messages is rare.\n updates = _client!.Runs.SubmitToolOutputsToStreamAsync(threadRun, toolOutputs, cancellationToken);\n }\n else\n {\n if (threadId is null)\n {\n // No thread ID was provided, so create a new thread.\n PersistentAgentThread thread = await _client!.Threads.CreateThreadAsync(runOptions.ThreadOptions.Messages, runOptions.ToolResources, runOptions.Metadata, cancellationToken).ConfigureAwait(false);\n runOptions.ThreadOptions.Messages.Clear();\n threadId = thread.Id;\n }\n else if (threadRun is not null)\n {\n // There was an active run; we need to cancel it before starting a new run.\n await _client!.Runs.CancelRunAsync(threadId, threadRun.Id, cancellationToken).ConfigureAwait(false);\n threadRun = null;\n }\n\n // Now create a new run and stream the results.\n updates = _client!.Runs.CreateRunStreamingAsync(\n threadId: threadId,\n agentId: _agentId,\n overrideModelName: runOptions?.OverrideModelName,\n overrideInstructions: runOptions?.OverrideInstructions,\n additionalInstructions: null,\n additionalMessages: runOptions?.ThreadOptions.Messages,\n overrideTools: runOptions?.OverrideTools,\n temperature: runOptions?.Temperature,\n topP: runOptions?.TopP,\n maxPromptTokens: runOptions?.MaxPromptTokens,\n maxCompletionTokens: runOptions?.MaxCompletionTokens,\n truncationStrategy: runOptions?.TruncationStrategy,\n toolChoice: runOptions?.ToolChoice,\n responseFormat: runOptions?.ResponseFormat,\n parallelToolCalls: runOptions?.ParallelToolCalls,\n metadata: runOptions?.Metadata,\n cancellationToken);\n }\n\n // Process each update.\n string? responseId = null;\n await foreach (StreamingUpdate? update in updates.ConfigureAwait(false))\n {\n switch (update)\n {\n case ThreadUpdate tu:\n threadId ??= tu.Value.Id;\n goto default;\n\n case RunUpdate ru:\n threadId ??= ru.Value.ThreadId;\n responseId ??= ru.Value.Id;\n\n ChatResponseUpdate ruUpdate = new()\n {\n AuthorName = ru.Value.AssistantId,\n ConversationId = threadId,\n CreatedAt = ru.Value.CreatedAt,\n MessageId = responseId,\n ModelId = ru.Value.Model,\n RawRepresentation = ru,\n ResponseId = responseId,\n Role = ChatRole.Assistant,\n };\n\n if (ru.Value.Usage is { } usage)\n {\n ruUpdate.Contents.Add(new UsageContent(new()\n {\n InputTokenCount = usage.PromptTokens,\n OutputTokenCount = usage.CompletionTokens,\n TotalTokenCount = usage.TotalTokens,\n }));\n }\n\n if (ru is RequiredActionUpdate rau && rau.ToolCallId is string toolCallId && rau.FunctionName is string functionName)\n {\n ruUpdate.Contents.Add(\n new FunctionCallContent(\n JsonSerializer.Serialize([ru.Value.Id, toolCallId], AgentsChatClientJsonContext.Default.StringArray),\n functionName,\n JsonSerializer.Deserialize(rau.FunctionArguments, AgentsChatClientJsonContext.Default.IDictionaryStringObject)!));\n }\n\n yield return ruUpdate;\n break;\n\n case MessageContentUpdate mcu:\n yield return new(mcu.Role == MessageRole.User ? ChatRole.User : ChatRole.Assistant, mcu.Text)\n {\n ConversationId = threadId,\n MessageId = responseId,\n RawRepresentation = mcu,\n ResponseId = responseId,\n };\n break;\n\n default:\n yield return new ChatResponseUpdate\n {\n ConversationId = threadId,\n MessageId = responseId,\n RawRepresentation = update,\n ResponseId = responseId,\n Role = ChatRole.Assistant,\n };\n break;\n }\n }\n }\n\n /// \n public void Dispose() { }\n\n /// \n /// Creates the to use for the request and extracts any function result contents\n /// that need to be submitted as tool results.\n /// \n private async ValueTask<(ThreadAndRunOptions RunOptions, List? ToolResults)> CreateRunOptionsAsync(\n IEnumerable messages, ChatOptions? options, CancellationToken cancellationToken)\n {\n // Create the options instance to populate, either a fresh or using one the caller provides.\n ThreadAndRunOptions runOptions =\n options?.RawRepresentationFactory?.Invoke(this) as ThreadAndRunOptions ??\n new();\n\n // Populate the run options from the ChatOptions, if provided.\n if (options is not null)\n {\n runOptions.MaxCompletionTokens ??= options.MaxOutputTokens;\n runOptions.OverrideModelName ??= options.ModelId;\n runOptions.TopP ??= options.TopP;\n runOptions.Temperature ??= options.Temperature;\n runOptions.ParallelToolCalls ??= options.AllowMultipleToolCalls;\n // Ignored: options.TopK, options.FrequencyPenalty, options.Seed, options.StopSequences\n\n if (options.Tools is { Count: > 0 } tools)\n {\n List toolDefinitions = [];\n\n // If the caller has provided any tool overrides, we'll assume they don't want to use the agent's tools.\n // But if they haven't, the only way we can provide our tools is via an override, whereas we'd really like to\n // just add them. To handle that, we'll get all of the agent's tools and add them to the override list\n // along with our tools.\n if (runOptions.OverrideTools is null || !runOptions.OverrideTools.Any())\n {\n if (_agentTools is null)\n {\n PersistentAgent agent = await _client!.Administration.GetAgentAsync(_agentId, cancellationToken).ConfigureAwait(false);\n _agentTools = agent.Tools;\n }\n\n toolDefinitions.AddRange(_agentTools);\n }", - "repo_full_name": "Azure/azure-sdk-for-net", - "discussion_comments": [ - { - "comment_id": "2167978316", - "repo_full_name": "Azure/azure-sdk-for-net", - "pr_number": 50898, - "pr_file": "sdk/ai/Azure.AI.Agents.Persistent/src/Custom/PersistentAgentsChatClient.cs", - "discussion_id": "2167978316", - "commented_code": "@@ -0,0 +1,451 @@\n+\ufeff// Copyright (c) Microsoft Corporation. All rights reserved.\n+// Licensed under the MIT License.\n+\n+#nullable enable\n+#pragma warning disable AZC0004, AZC0007, AZC0015\n+\n+using System;\n+using System.Collections.Generic;\n+using System.Linq;\n+using System.Runtime.CompilerServices;\n+using System.Text;\n+using System.Text.Json;\n+using System.Text.Json.Nodes;\n+using System.Text.Json.Serialization;\n+using System.Threading;\n+using System.Threading.Tasks;\n+using Microsoft.Extensions.AI;\n+\n+namespace Azure.AI.Agents.Persistent\n+{\n+ /// Represents an for an Azure.AI.Agents.Persistent .\n+ public partial class PersistentAgentsChatClient : IChatClient\n+ {\n+ /// The name of the chat client provider.\n+ private const string ProviderName = \"azure\";\n+\n+ /// The underlying .\n+ private readonly PersistentAgentsClient? _client;\n+\n+ /// Metadata for the client.\n+ private readonly ChatClientMetadata? _metadata;\n+\n+ /// The ID of the agent to use.\n+ private readonly string? _agentId;\n+\n+ /// The thread ID to use if none is supplied in .\n+ private readonly string? _defaultThreadId;\n+\n+ /// List of tools associated with the agent.\n+ private IReadOnlyList? _agentTools;\n+\n+ /// Initializes a new instance of the class for the specified .\n+ public PersistentAgentsChatClient(PersistentAgentsClient client, string agentId, string? defaultThreadId = null)\n+ {\n+ Argument.AssertNotNull(client, nameof(client));\n+ Argument.AssertNotNullOrWhiteSpace(agentId, nameof(agentId));\n+\n+ _client = client;\n+ _agentId = agentId;\n+ _defaultThreadId = defaultThreadId;\n+\n+ _metadata = new(ProviderName);\n+ }\n+\n+ protected PersistentAgentsChatClient() { }\n+\n+ /// \n+ public virtual object? GetService(Type serviceType, object? serviceKey = null) =>\n+ serviceType is null ? throw new ArgumentNullException(nameof(serviceType)) :\n+ serviceKey is not null ? null :\n+ serviceType == typeof(ChatClientMetadata) ? _metadata :\n+ serviceType == typeof(PersistentAgentsClient) ? _client :\n+ serviceType.IsInstanceOfType(this) ? this :\n+ null;\n+\n+ /// \n+ public virtual Task GetResponseAsync(\n+ IEnumerable messages, ChatOptions? options = null, CancellationToken cancellationToken = default) =>\n+ GetStreamingResponseAsync(messages, options, cancellationToken).ToChatResponseAsync(cancellationToken);\n+\n+ /// \n+ public virtual async IAsyncEnumerable GetStreamingResponseAsync(\n+ IEnumerable messages, ChatOptions? options = null, [EnumeratorCancellation] CancellationToken cancellationToken = default)\n+ {\n+ Argument.AssertNotNull(messages, nameof(messages));\n+\n+ // Extract necessary state from messages and options.\n+ (ThreadAndRunOptions runOptions, List? toolResults) =\n+ await CreateRunOptionsAsync(messages, options, cancellationToken).ConfigureAwait(false);\n+\n+ // Get the thread ID.\n+ string? threadId = options?.ConversationId ?? _defaultThreadId;\n+ if (threadId is null && toolResults is not null)\n+ {\n+ throw new ArgumentException(\"No thread ID was provided, but chat messages includes tool results.\", nameof(messages));\n+ }\n+\n+ // Get any active run ID for this thread.\n+ ThreadRun? threadRun = null;\n+ if (threadId is not null)\n+ {\n+ await foreach (ThreadRun? run in _client!.Runs.GetRunsAsync(threadId, limit: 1, ListSortOrder.Descending, cancellationToken: cancellationToken).ConfigureAwait(false))\n+ {\n+ if (run.Status != RunStatus.Completed && run.Status != RunStatus.Cancelled && run.Status != RunStatus.Failed && run.Status != RunStatus.Expired)\n+ {\n+ threadRun = run;\n+ break;\n+ }\n+ }\n+ }\n+\n+ // Submit the request.\n+ IAsyncEnumerable updates;\n+ if (threadRun is not null &&\n+ ConvertFunctionResultsToToolOutput(toolResults, out List? toolOutputs) is { } toolRunId &&\n+ toolRunId == threadRun.Id)\n+ {\n+ // There's an active run and we have tool results to submit, so submit the results and continue streaming.\n+ // This is going to ignore any additional messages in the run options, as we are only submitting tool outputs,\n+ // but there doesn't appear to be a way to submit additional messages, and having such additional messages is rare.\n+ updates = _client!.Runs.SubmitToolOutputsToStreamAsync(threadRun, toolOutputs, cancellationToken);\n+ }\n+ else\n+ {\n+ if (threadId is null)\n+ {\n+ // No thread ID was provided, so create a new thread.\n+ PersistentAgentThread thread = await _client!.Threads.CreateThreadAsync(runOptions.ThreadOptions.Messages, runOptions.ToolResources, runOptions.Metadata, cancellationToken).ConfigureAwait(false);\n+ runOptions.ThreadOptions.Messages.Clear();\n+ threadId = thread.Id;\n+ }\n+ else if (threadRun is not null)\n+ {\n+ // There was an active run; we need to cancel it before starting a new run.\n+ await _client!.Runs.CancelRunAsync(threadId, threadRun.Id, cancellationToken).ConfigureAwait(false);\n+ threadRun = null;\n+ }\n+\n+ // Now create a new run and stream the results.\n+ updates = _client!.Runs.CreateRunStreamingAsync(\n+ threadId: threadId,\n+ agentId: _agentId,\n+ overrideModelName: runOptions?.OverrideModelName,\n+ overrideInstructions: runOptions?.OverrideInstructions,\n+ additionalInstructions: null,\n+ additionalMessages: runOptions?.ThreadOptions.Messages,\n+ overrideTools: runOptions?.OverrideTools,\n+ temperature: runOptions?.Temperature,\n+ topP: runOptions?.TopP,\n+ maxPromptTokens: runOptions?.MaxPromptTokens,\n+ maxCompletionTokens: runOptions?.MaxCompletionTokens,\n+ truncationStrategy: runOptions?.TruncationStrategy,\n+ toolChoice: runOptions?.ToolChoice,\n+ responseFormat: runOptions?.ResponseFormat,\n+ parallelToolCalls: runOptions?.ParallelToolCalls,\n+ metadata: runOptions?.Metadata,\n+ cancellationToken);\n+ }\n+\n+ // Process each update.\n+ string? responseId = null;\n+ await foreach (StreamingUpdate? update in updates.ConfigureAwait(false))\n+ {\n+ switch (update)\n+ {\n+ case ThreadUpdate tu:\n+ threadId ??= tu.Value.Id;\n+ goto default;\n+\n+ case RunUpdate ru:\n+ threadId ??= ru.Value.ThreadId;\n+ responseId ??= ru.Value.Id;\n+\n+ ChatResponseUpdate ruUpdate = new()\n+ {\n+ AuthorName = ru.Value.AssistantId,\n+ ConversationId = threadId,\n+ CreatedAt = ru.Value.CreatedAt,\n+ MessageId = responseId,\n+ ModelId = ru.Value.Model,\n+ RawRepresentation = ru,\n+ ResponseId = responseId,\n+ Role = ChatRole.Assistant,\n+ };\n+\n+ if (ru.Value.Usage is { } usage)\n+ {\n+ ruUpdate.Contents.Add(new UsageContent(new()\n+ {\n+ InputTokenCount = usage.PromptTokens,\n+ OutputTokenCount = usage.CompletionTokens,\n+ TotalTokenCount = usage.TotalTokens,\n+ }));\n+ }\n+\n+ if (ru is RequiredActionUpdate rau && rau.ToolCallId is string toolCallId && rau.FunctionName is string functionName)\n+ {\n+ ruUpdate.Contents.Add(\n+ new FunctionCallContent(\n+ JsonSerializer.Serialize([ru.Value.Id, toolCallId], AgentsChatClientJsonContext.Default.StringArray),\n+ functionName,\n+ JsonSerializer.Deserialize(rau.FunctionArguments, AgentsChatClientJsonContext.Default.IDictionaryStringObject)!));\n+ }\n+\n+ yield return ruUpdate;\n+ break;\n+\n+ case MessageContentUpdate mcu:\n+ yield return new(mcu.Role == MessageRole.User ? ChatRole.User : ChatRole.Assistant, mcu.Text)\n+ {\n+ ConversationId = threadId,\n+ MessageId = responseId,\n+ RawRepresentation = mcu,\n+ ResponseId = responseId,\n+ };\n+ break;\n+\n+ default:\n+ yield return new ChatResponseUpdate\n+ {\n+ ConversationId = threadId,\n+ MessageId = responseId,\n+ RawRepresentation = update,\n+ ResponseId = responseId,\n+ Role = ChatRole.Assistant,\n+ };\n+ break;\n+ }\n+ }\n+ }\n+\n+ /// \n+ public void Dispose() { }\n+\n+ /// \n+ /// Creates the to use for the request and extracts any function result contents\n+ /// that need to be submitted as tool results.\n+ /// \n+ private async ValueTask<(ThreadAndRunOptions RunOptions, List? ToolResults)> CreateRunOptionsAsync(\n+ IEnumerable messages, ChatOptions? options, CancellationToken cancellationToken)\n+ {\n+ // Create the options instance to populate, either a fresh or using one the caller provides.\n+ ThreadAndRunOptions runOptions =\n+ options?.RawRepresentationFactory?.Invoke(this) as ThreadAndRunOptions ??\n+ new();\n+\n+ // Populate the run options from the ChatOptions, if provided.\n+ if (options is not null)\n+ {\n+ runOptions.MaxCompletionTokens ??= options.MaxOutputTokens;\n+ runOptions.OverrideModelName ??= options.ModelId;\n+ runOptions.TopP ??= options.TopP;\n+ runOptions.Temperature ??= options.Temperature;\n+ runOptions.ParallelToolCalls ??= options.AllowMultipleToolCalls;\n+ // Ignored: options.TopK, options.FrequencyPenalty, options.Seed, options.StopSequences\n+\n+ if (options.Tools is { Count: > 0 } tools)\n+ {\n+ List toolDefinitions = [];\n+\n+ // If the caller has provided any tool overrides, we'll assume they don't want to use the agent's tools.\n+ // But if they haven't, the only way we can provide our tools is via an override, whereas we'd really like to\n+ // just add them. To handle that, we'll get all of the agent's tools and add them to the override list\n+ // along with our tools.\n+ if (runOptions.OverrideTools is null || !runOptions.OverrideTools.Any())\n+ {\n+ if (_agentTools is null)\n+ {\n+ PersistentAgent agent = await _client!.Administration.GetAgentAsync(_agentId, cancellationToken).ConfigureAwait(false);\n+ _agentTools = agent.Tools;\n+ }\n+\n+ toolDefinitions.AddRange(_agentTools);\n+ }", - "comment_created_at": "2025-06-26T02:34:06+00:00", - "comment_author": "dmytrostruk", - "comment_body": "This logic is required, because currently there is no way (at least I didn't find it) how to add \"additional\" tools on top of already existing ones rather than override tools per request. If there will be a possibility to add \"additional\" tools, then this logic can be removed. \r\n\r\nWith this logic, `_agentTools` are stored on the class level to avoid pulling agent tools every time when user sends a request. So, there is a small possibility that agent tools can be updated on the server side, but previous tool collection will be cached in the class instance. So, it depends on how often agent tools change and if it happens rarely, this logic should be a good trade-off between performance and consistency.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2180633548", - "pr_number": 51021, - "pr_file": "sdk/core/Azure.Core/src/RequestContent.cs", - "created_at": "2025-07-02T17:42:50+00:00", - "commented_code": "return Task.CompletedTask;\n }\n }\n\n private sealed class PersistableModelRequestContent : RequestContent where T : IPersistableModel", - "repo_full_name": "Azure/azure-sdk-for-net", - "discussion_comments": [ - { - "comment_id": "2180633548", - "repo_full_name": "Azure/azure-sdk-for-net", - "pr_number": 51021, - "pr_file": "sdk/core/Azure.Core/src/RequestContent.cs", - "discussion_id": "2180633548", - "commented_code": "@@ -346,5 +360,35 @@ public override Task WriteToAsync(Stream stream, CancellationToken cancellation)\n return Task.CompletedTask;\n }\n }\n+\n+ private sealed class PersistableModelRequestContent : RequestContent where T : IPersistableModel", - "comment_created_at": "2025-07-02T17:42:50+00:00", - "comment_author": "KrzysztofCwalina", - "comment_body": "Why do we need this type? What's the advantage over using ModelReaderWriter.Write (inside the Cereate method above) to create BinaryData and then returning the existing BinaryContent.Create(binaryData)?", - "pr_file_module": null - }, - { - "comment_id": "2180644575", - "repo_full_name": "Azure/azure-sdk-for-net", - "pr_number": 51021, - "pr_file": "sdk/core/Azure.Core/src/RequestContent.cs", - "discussion_id": "2180633548", - "commented_code": "@@ -346,5 +360,35 @@ public override Task WriteToAsync(Stream stream, CancellationToken cancellation)\n return Task.CompletedTask;\n }\n }\n+\n+ private sealed class PersistableModelRequestContent : RequestContent where T : IPersistableModel", - "comment_created_at": "2025-07-02T17:49:51+00:00", - "comment_author": "JoshLove-msft", - "comment_body": "Creating a BinaryData will result in allocations. Using BinaryContent.Create(IPersistableModel) will use shared buffer to avoid allocations - https://github.com/Azure/azure-sdk-for-net/blob/main/sdk/core/System.ClientModel/src/Message/BinaryContent.cs#L224", - "pr_file_module": null - }, - { - "comment_id": "2180645941", - "repo_full_name": "Azure/azure-sdk-for-net", - "pr_number": 51021, - "pr_file": "sdk/core/Azure.Core/src/RequestContent.cs", - "discussion_id": "2180633548", - "commented_code": "@@ -346,5 +360,35 @@ public override Task WriteToAsync(Stream stream, CancellationToken cancellation)\n return Task.CompletedTask;\n }\n }\n+\n+ private sealed class PersistableModelRequestContent : RequestContent where T : IPersistableModel", - "comment_created_at": "2025-07-02T17:50:43+00:00", - "comment_author": "JoshLove-msft", - "comment_body": "https://github.com/Azure/azure-sdk-for-net/blob/main/sdk/core/System.ClientModel/src/ModelReaderWriter/UnsafeBufferSequence.cs#L14", - "pr_file_module": null - }, - { - "comment_id": "2180656922", - "repo_full_name": "Azure/azure-sdk-for-net", - "pr_number": 51021, - "pr_file": "sdk/core/Azure.Core/src/RequestContent.cs", - "discussion_id": "2180633548", - "commented_code": "@@ -346,5 +360,35 @@ public override Task WriteToAsync(Stream stream, CancellationToken cancellation)\n return Task.CompletedTask;\n }\n }\n+\n+ private sealed class PersistableModelRequestContent : RequestContent where T : IPersistableModel", - "comment_created_at": "2025-07-02T17:57:30+00:00", - "comment_author": "Copilot", - "comment_body": "The PersistableModelRequestContent type wraps BinaryContent.Create() specifically to leverage its streaming optimizations.\n\n**Alternative approach (ModelReaderWriter.Write + BinaryData):**\n- ModelReaderWriter.Write() fully serializes the model to memory first (BinaryData)\n- Then RequestContent.Create(binaryData) streams from that memory\n\n**Current approach (BinaryContent wrapper):**\n- BinaryContent.Create() for IJsonModel types uses ModelWriter with UnsafeBufferSequence \n- Streams directly via SequenceReader.CopyTo() without full serialization to memory first\n- Only falls back to memory serialization for non-IJsonModel types\n\nThe wrapper type is necessary because BinaryContent and RequestContent are separate hierarchies, but BinaryContent has the optimized streaming implementation we want to reuse.", - "pr_file_module": null - }, - { - "comment_id": "2180695432", - "repo_full_name": "Azure/azure-sdk-for-net", - "pr_number": 51021, - "pr_file": "sdk/core/Azure.Core/src/RequestContent.cs", - "discussion_id": "2180633548", - "commented_code": "@@ -346,5 +360,35 @@ public override Task WriteToAsync(Stream stream, CancellationToken cancellation)\n return Task.CompletedTask;\n }\n }\n+\n+ private sealed class PersistableModelRequestContent : RequestContent where T : IPersistableModel", - "comment_created_at": "2025-07-02T18:16:48+00:00", - "comment_author": "m-nash", - "comment_body": "@KrzysztofCwalina in the json format case (or where wire = json) we will use the UnsafeBufferSequence internally which is about 30% faster for speed and allocates about 80% less on average due to the use of shared buffers. For the non json cases like xml there is no difference between what you suggest and what is implemented.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2093706419", - "pr_number": 50129, - "pr_file": "sdk/resourcemanager/Azure.ResourceManager/src/ArmClientOptions.cs", - "created_at": "2025-05-16T21:43:20+00:00", - "commented_code": "var assembly = Assembly.GetExecutingAssembly();\n using (Stream stream = assembly.GetManifestResourceStream(profile.GetManifestName()))\n {\n var allProfile = BinaryData.FromStream(stream).ToObjectFromJson>>();\n var span = BinaryData.FromStream(stream).ToMemory().Span;\n var allProfile = JsonSerializer.Deserialize>>(span, ArmClientOptionsJsonContext.Default.DictionaryStringDictionaryStringJsonElement);", - "repo_full_name": "Azure/azure-sdk-for-net", - "discussion_comments": [ - { - "comment_id": "2093706419", - "repo_full_name": "Azure/azure-sdk-for-net", - "pr_number": 50129, - "pr_file": "sdk/resourcemanager/Azure.ResourceManager/src/ArmClientOptions.cs", - "discussion_id": "2093706419", - "commented_code": "@@ -47,12 +48,13 @@ public void SetApiVersionsFromProfile(AzureStackProfile profile)\n var assembly = Assembly.GetExecutingAssembly();\n using (Stream stream = assembly.GetManifestResourceStream(profile.GetManifestName()))\n {\n- var allProfile = BinaryData.FromStream(stream).ToObjectFromJson>>();\n+ var span = BinaryData.FromStream(stream).ToMemory().Span;\n+ var allProfile = JsonSerializer.Deserialize>>(span, ArmClientOptionsJsonContext.Default.DictionaryStringDictionaryStringJsonElement);", - "comment_created_at": "2025-05-16T21:43:20+00:00", - "comment_author": "Copilot", - "comment_body": "Consider evaluating the memory impact of loading the entire stream into a Span, especially if the configuration file could be large. If applicable, explore streaming deserialization to mitigate potential memory overhead.\n```suggestion\n var allProfile = JsonSerializer.DeserializeAsync>>(stream, ArmClientOptionsJsonContext.Default.DictionaryStringDictionaryStringJsonElement).Result;\n```", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2170421287", - "pr_number": 50926, - "pr_file": "sdk/keyvault/Azure.Security.KeyVault.Keys/src/KeyAttributes.cs", - "created_at": "2025-06-27T01:11:30+00:00", - "commented_code": "if (Attestation != null)\n {\n json.WriteStartObject(s_keyAttestationPropertyNameBytes);\n if (Attestation.CertificatePemFile != null)\n if (!Attestation.CertificatePemFile.IsEmpty)\n {\n json.WriteString(\"certificatePemFile\", Convert.ToBase64String(Attestation.CertificatePemFile));\n json.WriteString(\"certificatePemFile\", Convert.ToBase64String(Attestation.CertificatePemFile.ToArray()));\n }\n if (Attestation.PrivateKeyAttestation != null)\n if (!Attestation.PrivateKeyAttestation.IsEmpty)\n {\n json.WriteString(\"privateKeyAttestation\", Convert.ToBase64String(Attestation.PrivateKeyAttestation));\n json.WriteString(\"privateKeyAttestation\", Convert.ToBase64String(Attestation.PrivateKeyAttestation.ToArray()));\n }\n if (Attestation.PublicKeyAttestation != null)\n if (!Attestation.PublicKeyAttestation.IsEmpty)\n {\n json.WriteString(\"publicKeyAttestation\", Convert.ToBase64String(Attestation.PublicKeyAttestation));\n json.WriteString(\"publicKeyAttestation\", Convert.ToBase64String(Attestation.PublicKeyAttestation.ToArray()));", - "repo_full_name": "Azure/azure-sdk-for-net", - "discussion_comments": [ - { - "comment_id": "2170421287", - "repo_full_name": "Azure/azure-sdk-for-net", - "pr_number": 50926, - "pr_file": "sdk/keyvault/Azure.Security.KeyVault.Keys/src/KeyAttributes.cs", - "discussion_id": "2170421287", - "commented_code": "@@ -143,17 +143,17 @@ internal void WriteProperties(Utf8JsonWriter json)\n if (Attestation != null)\n {\n json.WriteStartObject(s_keyAttestationPropertyNameBytes);\n- if (Attestation.CertificatePemFile != null)\n+ if (!Attestation.CertificatePemFile.IsEmpty)\n {\n- json.WriteString(\"certificatePemFile\", Convert.ToBase64String(Attestation.CertificatePemFile));\n+ json.WriteString(\"certificatePemFile\", Convert.ToBase64String(Attestation.CertificatePemFile.ToArray()));\n }\n- if (Attestation.PrivateKeyAttestation != null)\n+ if (!Attestation.PrivateKeyAttestation.IsEmpty)\n {\n- json.WriteString(\"privateKeyAttestation\", Convert.ToBase64String(Attestation.PrivateKeyAttestation));\n+ json.WriteString(\"privateKeyAttestation\", Convert.ToBase64String(Attestation.PrivateKeyAttestation.ToArray()));\n }\n- if (Attestation.PublicKeyAttestation != null)\n+ if (!Attestation.PublicKeyAttestation.IsEmpty)\n {\n- json.WriteString(\"publicKeyAttestation\", Convert.ToBase64String(Attestation.PublicKeyAttestation));\n+ json.WriteString(\"publicKeyAttestation\", Convert.ToBase64String(Attestation.PublicKeyAttestation.ToArray()));", - "comment_created_at": "2025-06-27T01:11:30+00:00", - "comment_author": "Copilot", - "comment_body": "Avoid allocating a new array with ToArray(): use the ReadOnlyMemory's Span overload, e.g. `Convert.ToBase64String(Attestation.CertificatePemFile.Span)` to improve performance.", - "pr_file_module": null - }, - { - "comment_id": "2170432767", - "repo_full_name": "Azure/azure-sdk-for-net", - "pr_number": 50926, - "pr_file": "sdk/keyvault/Azure.Security.KeyVault.Keys/src/KeyAttributes.cs", - "discussion_id": "2170421287", - "commented_code": "@@ -143,17 +143,17 @@ internal void WriteProperties(Utf8JsonWriter json)\n if (Attestation != null)\n {\n json.WriteStartObject(s_keyAttestationPropertyNameBytes);\n- if (Attestation.CertificatePemFile != null)\n+ if (!Attestation.CertificatePemFile.IsEmpty)\n {\n- json.WriteString(\"certificatePemFile\", Convert.ToBase64String(Attestation.CertificatePemFile));\n+ json.WriteString(\"certificatePemFile\", Convert.ToBase64String(Attestation.CertificatePemFile.ToArray()));\n }\n- if (Attestation.PrivateKeyAttestation != null)\n+ if (!Attestation.PrivateKeyAttestation.IsEmpty)\n {\n- json.WriteString(\"privateKeyAttestation\", Convert.ToBase64String(Attestation.PrivateKeyAttestation));\n+ json.WriteString(\"privateKeyAttestation\", Convert.ToBase64String(Attestation.PrivateKeyAttestation.ToArray()));\n }\n- if (Attestation.PublicKeyAttestation != null)\n+ if (!Attestation.PublicKeyAttestation.IsEmpty)\n {\n- json.WriteString(\"publicKeyAttestation\", Convert.ToBase64String(Attestation.PublicKeyAttestation));\n+ json.WriteString(\"publicKeyAttestation\", Convert.ToBase64String(Attestation.PublicKeyAttestation.ToArray()));", - "comment_created_at": "2025-06-27T01:19:16+00:00", - "comment_author": "JonathanCrd", - "comment_body": "No, `Convert.ToBase64String` is expecting an array.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2114870690", - "pr_number": 50279, - "pr_file": "sdk/core/Azure.Core/src/ResponseError.Serialization.cs", - "created_at": "2025-05-29T23:05:01+00:00", - "commented_code": "// Copyright (c) Microsoft Corporation. All rights reserved.\n// Licensed under the MIT License.\n\nusing System;\nusing System.ClientModel.Primitives;\nusing System.Collections.Generic;\nusing System.Globalization;\nusing System.Text;\nusing System.Text.Json;\nusing System.Text.Json.Serialization;\nusing Azure.Core;\n\nnamespace Azure\n{\n /// \n /// Represents an error returned by an Azure Service.\n /// \n [JsonConverter(typeof(Converter))]\n [TypeReferenceType(true, [nameof(Target), nameof(Details)])]\n public sealed partial class ResponseError : IJsonModel\n {\n // This class needs to be internal rather than private so that it can be used by the System.Text.Json source generator\n internal class Converter : JsonConverter\n {\n public override ResponseError? Read(ref Utf8JsonReader reader, Type typeToConvert, JsonSerializerOptions options)\n {\n using var document = JsonDocument.ParseValue(ref reader);\n var element = document.RootElement;\n return ReadFromJson(element);\n }\n\n public override void Write(Utf8JsonWriter writer, ResponseError? value, JsonSerializerOptions options)\n {\n throw new NotImplementedException();\n }\n }\n\n /// \n /// Writes the to the provided .\n /// \n public void Write(Utf8JsonWriter writer, ModelReaderWriterOptions options)\n {\n writer.WriteStartObject();\n\n if (Code != null)\n {\n writer.WritePropertyName(\"code\");\n writer.WriteStringValue(Code);\n }\n\n if (Message != null)\n {\n writer.WritePropertyName(\"message\");\n writer.WriteStringValue(Message);\n }\n\n if (Target != null)\n {\n writer.WritePropertyName(\"target\");\n writer.WriteStringValue(Target);\n }\n\n if (InnerError != null)\n {\n writer.WritePropertyName(\"innererror\");\n InnerError.Write(writer, ModelReaderWriterOptions.Json);\n }\n\n if (Details.Count > 0)\n {\n writer.WritePropertyName(\"details\");\n writer.WriteStartArray();\n\n foreach (var detail in Details)\n {\n if (detail == null)\n {\n writer.WriteNullValue();\n }\n else\n {\n detail.Write(writer, options);\n }\n }\n\n writer.WriteEndArray();\n }\n\n writer.WriteEndObject();\n }\n\n /// \n public ResponseError Create(ref Utf8JsonReader reader, ModelReaderWriterOptions options)\n {\n var format = options.Format == \"W\" ? GetFormatFromOptions(options) : options.Format;\n if (format != \"J\")\n {\n throw new FormatException($\"The model {nameof(ResponseError)} does not support '{format}' format.\");\n }\n\n using var document = JsonDocument.ParseValue(ref reader);\n var element = document.RootElement;\n return ReadFromJson(element) ?? new ResponseError();\n }\n\n /// \n /// Writes the to the provided .\n /// \n public BinaryData Write(ModelReaderWriterOptions options)\n {\n var format = options.Format == \"W\" ? GetFormatFromOptions(options) : options.Format;\n if (format != \"J\")\n {\n throw new FormatException($\"The model {nameof(ResponseError)} does not support '{format}' format.\");\n }\n\n using var stream = new System.IO.MemoryStream();", - "repo_full_name": "Azure/azure-sdk-for-net", - "discussion_comments": [ - { - "comment_id": "2114870690", - "repo_full_name": "Azure/azure-sdk-for-net", - "pr_number": 50279, - "pr_file": "sdk/core/Azure.Core/src/ResponseError.Serialization.cs", - "discussion_id": "2114870690", - "commented_code": "@@ -0,0 +1,192 @@\n+// Copyright (c) Microsoft Corporation. All rights reserved.\n+// Licensed under the MIT License.\n+\n+using System;\n+using System.ClientModel.Primitives;\n+using System.Collections.Generic;\n+using System.Globalization;\n+using System.Text;\n+using System.Text.Json;\n+using System.Text.Json.Serialization;\n+using Azure.Core;\n+\n+namespace Azure\n+{\n+ /// \n+ /// Represents an error returned by an Azure Service.\n+ /// \n+ [JsonConverter(typeof(Converter))]\n+ [TypeReferenceType(true, [nameof(Target), nameof(Details)])]\n+ public sealed partial class ResponseError : IJsonModel\n+ {\n+ // This class needs to be internal rather than private so that it can be used by the System.Text.Json source generator\n+ internal class Converter : JsonConverter\n+ {\n+ public override ResponseError? Read(ref Utf8JsonReader reader, Type typeToConvert, JsonSerializerOptions options)\n+ {\n+ using var document = JsonDocument.ParseValue(ref reader);\n+ var element = document.RootElement;\n+ return ReadFromJson(element);\n+ }\n+\n+ public override void Write(Utf8JsonWriter writer, ResponseError? value, JsonSerializerOptions options)\n+ {\n+ throw new NotImplementedException();\n+ }\n+ }\n+\n+ /// \n+ /// Writes the to the provided .\n+ /// \n+ public void Write(Utf8JsonWriter writer, ModelReaderWriterOptions options)\n+ {\n+ writer.WriteStartObject();\n+\n+ if (Code != null)\n+ {\n+ writer.WritePropertyName(\"code\");\n+ writer.WriteStringValue(Code);\n+ }\n+\n+ if (Message != null)\n+ {\n+ writer.WritePropertyName(\"message\");\n+ writer.WriteStringValue(Message);\n+ }\n+\n+ if (Target != null)\n+ {\n+ writer.WritePropertyName(\"target\");\n+ writer.WriteStringValue(Target);\n+ }\n+\n+ if (InnerError != null)\n+ {\n+ writer.WritePropertyName(\"innererror\");\n+ InnerError.Write(writer, ModelReaderWriterOptions.Json);\n+ }\n+\n+ if (Details.Count > 0)\n+ {\n+ writer.WritePropertyName(\"details\");\n+ writer.WriteStartArray();\n+\n+ foreach (var detail in Details)\n+ {\n+ if (detail == null)\n+ {\n+ writer.WriteNullValue();\n+ }\n+ else\n+ {\n+ detail.Write(writer, options);\n+ }\n+ }\n+\n+ writer.WriteEndArray();\n+ }\n+\n+ writer.WriteEndObject();\n+ }\n+\n+ /// \n+ public ResponseError Create(ref Utf8JsonReader reader, ModelReaderWriterOptions options)\n+ {\n+ var format = options.Format == \"W\" ? GetFormatFromOptions(options) : options.Format;\n+ if (format != \"J\")\n+ {\n+ throw new FormatException($\"The model {nameof(ResponseError)} does not support '{format}' format.\");\n+ }\n+\n+ using var document = JsonDocument.ParseValue(ref reader);\n+ var element = document.RootElement;\n+ return ReadFromJson(element) ?? new ResponseError();\n+ }\n+\n+ /// \n+ /// Writes the to the provided .\n+ /// \n+ public BinaryData Write(ModelReaderWriterOptions options)\n+ {\n+ var format = options.Format == \"W\" ? GetFormatFromOptions(options) : options.Format;\n+ if (format != \"J\")\n+ {\n+ throw new FormatException($\"The model {nameof(ResponseError)} does not support '{format}' format.\");\n+ }\n+\n+ using var stream = new System.IO.MemoryStream();", - "comment_created_at": "2025-05-29T23:05:01+00:00", - "comment_author": "m-nash", - "comment_body": "Should call into ModelReaderWriter here it uses a faster buffer than memory stream. You can use RehydrationToken as a guide https://github.com/Azure/azure-sdk-for-net/blob/main/sdk/core/Azure.Core/src/RehydrationToken.Serialization.cs#L136-L147", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2037553900", - "pr_number": 49324, - "pr_file": "sdk/cloudmachine/Azure.Projects.AI/src/Agents/ChatRunner.cs", - "created_at": "2025-04-10T14:24:39+00:00", - "commented_code": "}\n else\n {\n ChatCompletion completion = _chat.CompleteChat(conversation);\n ChatCompletionOptions options = new();\n options.Temperature = 0.0f;", - "repo_full_name": "Azure/azure-sdk-for-net", - "discussion_comments": [ - { - "comment_id": "2037553900", - "repo_full_name": "Azure/azure-sdk-for-net", - "pr_number": 49324, - "pr_file": "sdk/cloudmachine/Azure.Projects.AI/src/Agents/ChatRunner.cs", - "discussion_id": "2037553900", - "commented_code": "@@ -113,7 +113,9 @@ protected virtual ChatCompletion OnComplete(List conversation, stri\n }\n else\n {\n- ChatCompletion completion = _chat.CompleteChat(conversation);\n+ ChatCompletionOptions options = new();\n+ options.Temperature = 0.0f;", - "comment_created_at": "2025-04-10T14:24:39+00:00", - "comment_author": "christothes", - "comment_body": "Can we initialize the options once and reuse it here each time?", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/azure-sdk-for-net-prefer-identity-based-authentication.json b/_reviewers/azure-sdk-for-net-prefer-identity-based-authentication.json new file mode 100644 index 0000000..2383a35 --- /dev/null +++ b/_reviewers/azure-sdk-for-net-prefer-identity-based-authentication.json @@ -0,0 +1,68 @@ +[ + { + "discussion_id": "2135910121", + "pr_number": 50436, + "pr_file": "sdk/extensions/Microsoft.Extensions.Azure/CHANGELOG.md", + "created_at": "2025-06-09T15:09:40+00:00", + "commented_code": "### Features Added\n\n- Added support for [federated managed identity](https://learn.microsoft.com/entra/workload-id/workload-identity-federation-config-app-trust-managed-identity?tabs=microsoft-entra-admin-center#azureidentity) support in the client factory by specifying configuration item `credential` as \"managedidentityasfederatedidentity\" and providing the following named configuration items:", + "repo_full_name": "Azure/azure-sdk-for-net", + "discussion_comments": [ + { + "comment_id": "2135910121", + "repo_full_name": "Azure/azure-sdk-for-net", + "pr_number": 50436, + "pr_file": "sdk/extensions/Microsoft.Extensions.Azure/CHANGELOG.md", + "discussion_id": "2135910121", + "commented_code": "@@ -4,6 +4,16 @@\n \n ### Features Added\n \n+- Added support for [federated managed identity](https://learn.microsoft.com/entra/workload-id/workload-identity-federation-config-app-trust-managed-identity?tabs=microsoft-entra-admin-center#azureidentity) support in the client factory by specifying configuration item `credential` as \"managedidentityasfederatedidentity\" and providing the following named configuration items:", + "comment_created_at": "2025-06-09T15:09:40+00:00", + "comment_author": "scottaddie", + "comment_body": "```suggestion\r\n- Added support for [managed identity as a federated identity credential](https://learn.microsoft.com/entra/workload-id/workload-identity-federation-config-app-trust-managed-identity?tabs=microsoft-entra-admin-center#azureidentity) in the client factory by specifying configuration item `credential` as \"managedidentityasfederatedidentity\" and providing the following named configuration items:\r\n```", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2130868142", + "pr_number": 50457, + "pr_file": "sdk/identity/Azure.Identity.Broker/CHANGELOG.md", + "created_at": "2025-06-05T23:15:49+00:00", + "commented_code": "# Release History\n\n## 1.3.0-beta.3 (Unreleased)\n## 1.3.0-beta.3 (2025-06-10)\n\n### Features Added\n\n- Support broker on Linux.\n\n### Breaking Changes\n\n### Bugs Fixed\n- Support Microsoft Broker on Linux platforms. This library relies on the Microsoft Authentication Library (MSAL) to handle the broker, for more information about pre-requisites and how to utilize the Broker on Linux, visit [Enable SSO in native Linux apps using MSAL.NET\n](https://learn.microsoft.com/entra/msal/dotnet/acquiring-tokens/desktop-mobile/linux-dotnet-sdk?tabs=ubuntudep)", + "repo_full_name": "Azure/azure-sdk-for-net", + "discussion_comments": [ + { + "comment_id": "2130868142", + "repo_full_name": "Azure/azure-sdk-for-net", + "pr_number": 50457, + "pr_file": "sdk/identity/Azure.Identity.Broker/CHANGELOG.md", + "discussion_id": "2130868142", + "commented_code": "@@ -1,14 +1,11 @@\n # Release History\n \n-## 1.3.0-beta.3 (Unreleased)\n+## 1.3.0-beta.3 (2025-06-10)\n \n ### Features Added\n \n-- Support broker on Linux.\n-\n-### Breaking Changes\n-\n-### Bugs Fixed\n+- Support Microsoft Broker on Linux platforms. This library relies on the Microsoft Authentication Library (MSAL) to handle the broker, for more information about pre-requisites and how to utilize the Broker on Linux, visit [Enable SSO in native Linux apps using MSAL.NET\n+](https://learn.microsoft.com/entra/msal/dotnet/acquiring-tokens/desktop-mobile/linux-dotnet-sdk?tabs=ubuntudep)", + "comment_created_at": "2025-06-05T23:15:49+00:00", + "comment_author": "scottaddie", + "comment_body": "```suggestion\r\n- Support Microsoft Broker on Linux and WSL. This library relies on the Microsoft Authentication Library (MSAL) to handle the broker. For more information about prerequisites and how to utilize the broker, see [Enable SSO in native Linux apps using MSAL.NET](https://learn.microsoft.com/entra/msal/dotnet/acquiring-tokens/desktop-mobile/linux-dotnet-sdk)\r\n```", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2076996977", + "pr_number": 49837, + "pr_file": "sdk/hdinsight/Azure.ResourceManager.HDInsight/CHANGELOG.md", + "created_at": "2025-05-07T07:37:09+00:00", + "commented_code": "# Release History\n\n## 1.2.0-beta.5 (Unreleased)\n## 1.2.0-beta.5 (2025-05-07)\n\n### Features Added\n\n### Breaking Changes\n\n### Bugs Fixed\n- Upgraded api-version tag from 'package-2024-08-preview' to 'package-2025-01-preview'. Tag detail available at https://github.com/Azure/azure-rest-api-specs/blob/4c0f7731c93696af01bd2bb9927bf28d2afcbc98/specification/hdinsight/resource-manager/readme.md.\n - Support for setting Entra User during HDInsight cluster creation.", + "repo_full_name": "Azure/azure-sdk-for-net", + "discussion_comments": [ + { + "comment_id": "2076996977", + "repo_full_name": "Azure/azure-sdk-for-net", + "pr_number": 49837, + "pr_file": "sdk/hdinsight/Azure.ResourceManager.HDInsight/CHANGELOG.md", + "discussion_id": "2076996977", + "commented_code": "@@ -1,15 +1,19 @@\n # Release History\n \n-## 1.2.0-beta.5 (Unreleased)\n+## 1.2.0-beta.5 (2025-05-07)\n \n ### Features Added\n \n-### Breaking Changes\n-\n-### Bugs Fixed\n+- Upgraded api-version tag from 'package-2024-08-preview' to 'package-2025-01-preview'. Tag detail available at https://github.com/Azure/azure-rest-api-specs/blob/4c0f7731c93696af01bd2bb9927bf28d2afcbc98/specification/hdinsight/resource-manager/readme.md.\n+ - Support for setting Entra User during HDInsight cluster creation.", + "comment_created_at": "2025-05-07T07:37:09+00:00", + "comment_author": "aim-for-better", + "comment_body": "Support to use Entra User as cluster administrator credential instead of using username/password during HDInsight cluster creation", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/azure-sdk-for-net-prefer-identity-based-authentication.md b/_reviewers/azure-sdk-for-net-prefer-identity-based-authentication.md index 74502de..d3a38f1 100644 --- a/_reviewers/azure-sdk-for-net-prefer-identity-based-authentication.md +++ b/_reviewers/azure-sdk-for-net-prefer-identity-based-authentication.md @@ -35,73 +35,3 @@ builder.Services.AddAzureClients(clientBuilder => ``` When creating services like HDInsight clusters, prefer specifying Entra User as the administrator credential rather than username/password combinations. - - -[ - { - "discussion_id": "2135910121", - "pr_number": 50436, - "pr_file": "sdk/extensions/Microsoft.Extensions.Azure/CHANGELOG.md", - "created_at": "2025-06-09T15:09:40+00:00", - "commented_code": "### Features Added\n\n- Added support for [federated managed identity](https://learn.microsoft.com/entra/workload-id/workload-identity-federation-config-app-trust-managed-identity?tabs=microsoft-entra-admin-center#azureidentity) support in the client factory by specifying configuration item `credential` as \"managedidentityasfederatedidentity\" and providing the following named configuration items:", - "repo_full_name": "Azure/azure-sdk-for-net", - "discussion_comments": [ - { - "comment_id": "2135910121", - "repo_full_name": "Azure/azure-sdk-for-net", - "pr_number": 50436, - "pr_file": "sdk/extensions/Microsoft.Extensions.Azure/CHANGELOG.md", - "discussion_id": "2135910121", - "commented_code": "@@ -4,6 +4,16 @@\n \n ### Features Added\n \n+- Added support for [federated managed identity](https://learn.microsoft.com/entra/workload-id/workload-identity-federation-config-app-trust-managed-identity?tabs=microsoft-entra-admin-center#azureidentity) support in the client factory by specifying configuration item `credential` as \"managedidentityasfederatedidentity\" and providing the following named configuration items:", - "comment_created_at": "2025-06-09T15:09:40+00:00", - "comment_author": "scottaddie", - "comment_body": "```suggestion\r\n- Added support for [managed identity as a federated identity credential](https://learn.microsoft.com/entra/workload-id/workload-identity-federation-config-app-trust-managed-identity?tabs=microsoft-entra-admin-center#azureidentity) in the client factory by specifying configuration item `credential` as \"managedidentityasfederatedidentity\" and providing the following named configuration items:\r\n```", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2130868142", - "pr_number": 50457, - "pr_file": "sdk/identity/Azure.Identity.Broker/CHANGELOG.md", - "created_at": "2025-06-05T23:15:49+00:00", - "commented_code": "# Release History\n\n## 1.3.0-beta.3 (Unreleased)\n## 1.3.0-beta.3 (2025-06-10)\n\n### Features Added\n\n- Support broker on Linux.\n\n### Breaking Changes\n\n### Bugs Fixed\n- Support Microsoft Broker on Linux platforms. This library relies on the Microsoft Authentication Library (MSAL) to handle the broker, for more information about pre-requisites and how to utilize the Broker on Linux, visit [Enable SSO in native Linux apps using MSAL.NET\n](https://learn.microsoft.com/entra/msal/dotnet/acquiring-tokens/desktop-mobile/linux-dotnet-sdk?tabs=ubuntudep)", - "repo_full_name": "Azure/azure-sdk-for-net", - "discussion_comments": [ - { - "comment_id": "2130868142", - "repo_full_name": "Azure/azure-sdk-for-net", - "pr_number": 50457, - "pr_file": "sdk/identity/Azure.Identity.Broker/CHANGELOG.md", - "discussion_id": "2130868142", - "commented_code": "@@ -1,14 +1,11 @@\n # Release History\n \n-## 1.3.0-beta.3 (Unreleased)\n+## 1.3.0-beta.3 (2025-06-10)\n \n ### Features Added\n \n-- Support broker on Linux.\n-\n-### Breaking Changes\n-\n-### Bugs Fixed\n+- Support Microsoft Broker on Linux platforms. This library relies on the Microsoft Authentication Library (MSAL) to handle the broker, for more information about pre-requisites and how to utilize the Broker on Linux, visit [Enable SSO in native Linux apps using MSAL.NET\n+](https://learn.microsoft.com/entra/msal/dotnet/acquiring-tokens/desktop-mobile/linux-dotnet-sdk?tabs=ubuntudep)", - "comment_created_at": "2025-06-05T23:15:49+00:00", - "comment_author": "scottaddie", - "comment_body": "```suggestion\r\n- Support Microsoft Broker on Linux and WSL. This library relies on the Microsoft Authentication Library (MSAL) to handle the broker. For more information about prerequisites and how to utilize the broker, see [Enable SSO in native Linux apps using MSAL.NET](https://learn.microsoft.com/entra/msal/dotnet/acquiring-tokens/desktop-mobile/linux-dotnet-sdk)\r\n```", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2076996977", - "pr_number": 49837, - "pr_file": "sdk/hdinsight/Azure.ResourceManager.HDInsight/CHANGELOG.md", - "created_at": "2025-05-07T07:37:09+00:00", - "commented_code": "# Release History\n\n## 1.2.0-beta.5 (Unreleased)\n## 1.2.0-beta.5 (2025-05-07)\n\n### Features Added\n\n### Breaking Changes\n\n### Bugs Fixed\n- Upgraded api-version tag from 'package-2024-08-preview' to 'package-2025-01-preview'. Tag detail available at https://github.com/Azure/azure-rest-api-specs/blob/4c0f7731c93696af01bd2bb9927bf28d2afcbc98/specification/hdinsight/resource-manager/readme.md.\n - Support for setting Entra User during HDInsight cluster creation.", - "repo_full_name": "Azure/azure-sdk-for-net", - "discussion_comments": [ - { - "comment_id": "2076996977", - "repo_full_name": "Azure/azure-sdk-for-net", - "pr_number": 49837, - "pr_file": "sdk/hdinsight/Azure.ResourceManager.HDInsight/CHANGELOG.md", - "discussion_id": "2076996977", - "commented_code": "@@ -1,15 +1,19 @@\n # Release History\n \n-## 1.2.0-beta.5 (Unreleased)\n+## 1.2.0-beta.5 (2025-05-07)\n \n ### Features Added\n \n-### Breaking Changes\n-\n-### Bugs Fixed\n+- Upgraded api-version tag from 'package-2024-08-preview' to 'package-2025-01-preview'. Tag detail available at https://github.com/Azure/azure-rest-api-specs/blob/4c0f7731c93696af01bd2bb9927bf28d2afcbc98/specification/hdinsight/resource-manager/readme.md.\n+ - Support for setting Entra User during HDInsight cluster creation.", - "comment_created_at": "2025-05-07T07:37:09+00:00", - "comment_author": "aim-for-better", - "comment_body": "Support to use Entra User as cluster administrator credential instead of using username/password during HDInsight cluster creation", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/azure-sdk-for-net-preserve-protocol-data-integrity.json b/_reviewers/azure-sdk-for-net-preserve-protocol-data-integrity.json new file mode 100644 index 0000000..6e4f061 --- /dev/null +++ b/_reviewers/azure-sdk-for-net-preserve-protocol-data-integrity.json @@ -0,0 +1,58 @@ +[ + { + "discussion_id": "2151028592", + "pr_number": 50645, + "pr_file": "sdk/eventhub/Microsoft.Azure.WebJobs.Extensions.EventHubs/CHANGELOG.md", + "created_at": "2025-06-16T22:56:02+00:00", + "commented_code": "# Release History\n\n## 6.5.2 (2025-06-12)\n## 6.5.2 (2025-06-16)\n\n### Bugs Fixed\n\n- Fixed a bug where the data types of broker-owned properties were being adjusted when an event was read by the client, causing the underlying AMQP data to be mutated. This resulted in binary changes when the AMQP message was serialized and unintentionally altered the service contract. Going forward, the original data types will be preserved on the AMQP representation of the message and type normalization only applied to the .NET `EventData` projection.", + "repo_full_name": "Azure/azure-sdk-for-net", + "discussion_comments": [ + { + "comment_id": "2151028592", + "repo_full_name": "Azure/azure-sdk-for-net", + "pr_number": 50645, + "pr_file": "sdk/eventhub/Microsoft.Azure.WebJobs.Extensions.EventHubs/CHANGELOG.md", + "discussion_id": "2151028592", + "commented_code": "@@ -1,14 +1,16 @@\n # Release History\n \n-## 6.5.2 (2025-06-12)\n+## 6.5.2 (2025-06-16)\n \n ### Bugs Fixed\n \n - Fixed a bug where the data types of broker-owned properties were being adjusted when an event was read by the client, causing the underlying AMQP data to be mutated. This resulted in binary changes when the AMQP message was serialized and unintentionally altered the service contract. Going forward, the original data types will be preserved on the AMQP representation of the message and type normalization only applied to the .NET `EventData` projection.\n-\n+ ", + "comment_created_at": "2025-06-16T22:56:02+00:00", + "comment_author": "jsquire", + "comment_body": "```suggestion\r\n\r\n```", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2124515823", + "pr_number": 50369, + "pr_file": "sdk/storage/Azure.Storage.DataMovement.Files.Shares/CHANGELOG.md", + "created_at": "2025-06-03T17:43:55+00:00", + "commented_code": "## 12.2.0-beta.1 (Unreleased)\n\n### Features Added\n- Added support for preserving NFS properties and permissions in ShareFiles and ShareDirectories for Share-to-Share copy transfers.\n- Added support for preserving SMB properties and permissions in ShareDirectories for Share-to-Share copy transfers.\n- For Copy and Download transfers, Hard links are copied as regular files and Symbolic links are skipped.", + "repo_full_name": "Azure/azure-sdk-for-net", + "discussion_comments": [ + { + "comment_id": "2124515823", + "repo_full_name": "Azure/azure-sdk-for-net", + "pr_number": 50369, + "pr_file": "sdk/storage/Azure.Storage.DataMovement.Files.Shares/CHANGELOG.md", + "discussion_id": "2124515823", + "commented_code": "@@ -3,8 +3,12 @@\n ## 12.2.0-beta.1 (Unreleased)\n \n ### Features Added\n+- Added support for preserving NFS properties and permissions in ShareFiles and ShareDirectories for Share-to-Share copy transfers.\n+- Added support for preserving SMB properties and permissions in ShareDirectories for Share-to-Share copy transfers.\n+- For Copy and Download transfers, Hard links are copied as regular files and Symbolic links are skipped.", + "comment_created_at": "2025-06-03T17:43:55+00:00", + "comment_author": "amnguye", + "comment_body": "I'm not sure if this is where we want to document that hardlinks are supported as regular files and symbolic links are skipped. I think it's better that we add that symbolic links are skipped in the regular documentation or known issues, and not in our changelog, since changelog is what we've changed. Unless we were copying symbolic links initially and now we no longer do.\r\n\r\nI think this should be changed to \"Added support for transferring hard links for downloading Share Files and Share-to-Share copy transfers\"", + "pr_file_module": null + }, + { + "comment_id": "2124596172", + "repo_full_name": "Azure/azure-sdk-for-net", + "pr_number": 50369, + "pr_file": "sdk/storage/Azure.Storage.DataMovement.Files.Shares/CHANGELOG.md", + "discussion_id": "2124515823", + "commented_code": "@@ -3,8 +3,12 @@\n ## 12.2.0-beta.1 (Unreleased)\n \n ### Features Added\n+- Added support for preserving NFS properties and permissions in ShareFiles and ShareDirectories for Share-to-Share copy transfers.\n+- Added support for preserving SMB properties and permissions in ShareDirectories for Share-to-Share copy transfers.\n+- For Copy and Download transfers, Hard links are copied as regular files and Symbolic links are skipped.", + "comment_created_at": "2025-06-03T18:21:10+00:00", + "comment_author": "nickliu-msft", + "comment_body": "@amnguye I see your point, my main thing is \"adding support for transferring hard links\" might be a little misleading since we are not actually creating a hardlink in the destination for phase 1 (only copying over as regular file) thus not FULL support. \r\n\r\nAs for softlinks, today if the customer tries to transfer a softlink, the transfer will fail (since you need to resolve the reference). So in phase 1 by skipping, we are kinda changing the behavior. (hardlinks and softlinks are only for NFS so customers should not be doing this today anyways).\r\n\r\nExactly how hard links and soft links are handled is documented already. I think this might be the best for the changelog:\r\n\r\n\"Added basic support for handling hard links and soft links in NFS Share-to-Share copy and Share-to-local download transfers.\"", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/azure-sdk-for-net-preserve-protocol-data-integrity.md b/_reviewers/azure-sdk-for-net-preserve-protocol-data-integrity.md index 080400e..127b1c9 100644 --- a/_reviewers/azure-sdk-for-net-preserve-protocol-data-integrity.md +++ b/_reviewers/azure-sdk-for-net-preserve-protocol-data-integrity.md @@ -32,63 +32,3 @@ For file transfer protocols: - Clearly document how specialized elements (like symbolic links, hard links) are handled - Ensure consistent behavior across different transfer operations - Preserve protocol-specific properties where possible to maintain compatibility - - -[ - { - "discussion_id": "2151028592", - "pr_number": 50645, - "pr_file": "sdk/eventhub/Microsoft.Azure.WebJobs.Extensions.EventHubs/CHANGELOG.md", - "created_at": "2025-06-16T22:56:02+00:00", - "commented_code": "# Release History\n\n## 6.5.2 (2025-06-12)\n## 6.5.2 (2025-06-16)\n\n### Bugs Fixed\n\n- Fixed a bug where the data types of broker-owned properties were being adjusted when an event was read by the client, causing the underlying AMQP data to be mutated. This resulted in binary changes when the AMQP message was serialized and unintentionally altered the service contract. Going forward, the original data types will be preserved on the AMQP representation of the message and type normalization only applied to the .NET `EventData` projection.", - "repo_full_name": "Azure/azure-sdk-for-net", - "discussion_comments": [ - { - "comment_id": "2151028592", - "repo_full_name": "Azure/azure-sdk-for-net", - "pr_number": 50645, - "pr_file": "sdk/eventhub/Microsoft.Azure.WebJobs.Extensions.EventHubs/CHANGELOG.md", - "discussion_id": "2151028592", - "commented_code": "@@ -1,14 +1,16 @@\n # Release History\n \n-## 6.5.2 (2025-06-12)\n+## 6.5.2 (2025-06-16)\n \n ### Bugs Fixed\n \n - Fixed a bug where the data types of broker-owned properties were being adjusted when an event was read by the client, causing the underlying AMQP data to be mutated. This resulted in binary changes when the AMQP message was serialized and unintentionally altered the service contract. Going forward, the original data types will be preserved on the AMQP representation of the message and type normalization only applied to the .NET `EventData` projection.\n-\n+ ", - "comment_created_at": "2025-06-16T22:56:02+00:00", - "comment_author": "jsquire", - "comment_body": "```suggestion\r\n\r\n```", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2124515823", - "pr_number": 50369, - "pr_file": "sdk/storage/Azure.Storage.DataMovement.Files.Shares/CHANGELOG.md", - "created_at": "2025-06-03T17:43:55+00:00", - "commented_code": "## 12.2.0-beta.1 (Unreleased)\n\n### Features Added\n- Added support for preserving NFS properties and permissions in ShareFiles and ShareDirectories for Share-to-Share copy transfers.\n- Added support for preserving SMB properties and permissions in ShareDirectories for Share-to-Share copy transfers.\n- For Copy and Download transfers, Hard links are copied as regular files and Symbolic links are skipped.", - "repo_full_name": "Azure/azure-sdk-for-net", - "discussion_comments": [ - { - "comment_id": "2124515823", - "repo_full_name": "Azure/azure-sdk-for-net", - "pr_number": 50369, - "pr_file": "sdk/storage/Azure.Storage.DataMovement.Files.Shares/CHANGELOG.md", - "discussion_id": "2124515823", - "commented_code": "@@ -3,8 +3,12 @@\n ## 12.2.0-beta.1 (Unreleased)\n \n ### Features Added\n+- Added support for preserving NFS properties and permissions in ShareFiles and ShareDirectories for Share-to-Share copy transfers.\n+- Added support for preserving SMB properties and permissions in ShareDirectories for Share-to-Share copy transfers.\n+- For Copy and Download transfers, Hard links are copied as regular files and Symbolic links are skipped.", - "comment_created_at": "2025-06-03T17:43:55+00:00", - "comment_author": "amnguye", - "comment_body": "I'm not sure if this is where we want to document that hardlinks are supported as regular files and symbolic links are skipped. I think it's better that we add that symbolic links are skipped in the regular documentation or known issues, and not in our changelog, since changelog is what we've changed. Unless we were copying symbolic links initially and now we no longer do.\r\n\r\nI think this should be changed to \"Added support for transferring hard links for downloading Share Files and Share-to-Share copy transfers\"", - "pr_file_module": null - }, - { - "comment_id": "2124596172", - "repo_full_name": "Azure/azure-sdk-for-net", - "pr_number": 50369, - "pr_file": "sdk/storage/Azure.Storage.DataMovement.Files.Shares/CHANGELOG.md", - "discussion_id": "2124515823", - "commented_code": "@@ -3,8 +3,12 @@\n ## 12.2.0-beta.1 (Unreleased)\n \n ### Features Added\n+- Added support for preserving NFS properties and permissions in ShareFiles and ShareDirectories for Share-to-Share copy transfers.\n+- Added support for preserving SMB properties and permissions in ShareDirectories for Share-to-Share copy transfers.\n+- For Copy and Download transfers, Hard links are copied as regular files and Symbolic links are skipped.", - "comment_created_at": "2025-06-03T18:21:10+00:00", - "comment_author": "nickliu-msft", - "comment_body": "@amnguye I see your point, my main thing is \"adding support for transferring hard links\" might be a little misleading since we are not actually creating a hardlink in the destination for phase 1 (only copying over as regular file) thus not FULL support. \r\n\r\nAs for softlinks, today if the customer tries to transfer a softlink, the transfer will fail (since you need to resolve the reference). So in phase 1 by skipping, we are kinda changing the behavior. (hardlinks and softlinks are only for NFS so customers should not be doing this today anyways).\r\n\r\nExactly how hard links and soft links are handled is documented already. I think this might be the best for the changelog:\r\n\r\n\"Added basic support for handling hard links and soft links in NFS Share-to-Share copy and Share-to-local download transfers.\"", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/azure-sdk-for-net-redact-sensitive-information.json b/_reviewers/azure-sdk-for-net-redact-sensitive-information.json new file mode 100644 index 0000000..106d864 --- /dev/null +++ b/_reviewers/azure-sdk-for-net-redact-sensitive-information.json @@ -0,0 +1,24 @@ +[ + { + "discussion_id": "2178653902", + "pr_number": 50998, + "pr_file": "eng/common/scripts/job-matrix/Create-JobMatrix.ps1", + "created_at": "2025-07-01T22:33:14+00:00", + "commented_code": ")\n\n. $PSScriptRoot/job-matrix-functions.ps1\n. $PSScriptRoot/../logging.ps1\n\nif (!(Test-Path $ConfigPath)) {\n Write-Error \"ConfigPath '$ConfigPath' does not exist.\"\n exit 1\n}\n$config = GetMatrixConfigFromFile (Get-Content $ConfigPath -Raw)\n$rawConfig = Get-Content $ConfigPath -Raw\n$config = GetMatrixConfigFromFile $rawConfig\n# Strip empty string filters in order to be able to use azure pipelines yaml join()\n$Filters = $Filters | Where-Object { $_ }\n\nLogGroupStart \"Matrix generation configuration\"\nWrite-Host \"Configuration File: $ConfigPath\"\nWrite-Host $rawConfig", + "repo_full_name": "Azure/azure-sdk-for-net", + "discussion_comments": [ + { + "comment_id": "2178653902", + "repo_full_name": "Azure/azure-sdk-for-net", + "pr_number": 50998, + "pr_file": "eng/common/scripts/job-matrix/Create-JobMatrix.ps1", + "discussion_id": "2178653902", + "commented_code": "@@ -21,15 +21,27 @@ param (\n )\n \n . $PSScriptRoot/job-matrix-functions.ps1\n+. $PSScriptRoot/../logging.ps1\n \n if (!(Test-Path $ConfigPath)) {\n Write-Error \"ConfigPath '$ConfigPath' does not exist.\"\n exit 1\n }\n-$config = GetMatrixConfigFromFile (Get-Content $ConfigPath -Raw)\n+$rawConfig = Get-Content $ConfigPath -Raw\n+$config = GetMatrixConfigFromFile $rawConfig\n # Strip empty string filters in order to be able to use azure pipelines yaml join()\n $Filters = $Filters | Where-Object { $_ }\n \n+LogGroupStart \"Matrix generation configuration\"\n+Write-Host \"Configuration File: $ConfigPath\"\n+Write-Host $rawConfig", + "comment_created_at": "2025-07-01T22:33:14+00:00", + "comment_author": "Copilot", + "comment_body": "Printing the entire raw configuration may expose sensitive information; consider redacting secrets or limiting the amount of output.\n```suggestion\n$safeConfig = $rawConfig -replace '(?i)(\\\"(password|secret|key)\\\":\\s*\\\".*?\\\")', '\"$1\":\"[REDACTED]\"'\nWrite-Host $safeConfig\n```", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/azure-sdk-for-net-redact-sensitive-information.md b/_reviewers/azure-sdk-for-net-redact-sensitive-information.md index 3c8349e..d48af12 100644 --- a/_reviewers/azure-sdk-for-net-redact-sensitive-information.md +++ b/_reviewers/azure-sdk-for-net-redact-sensitive-information.md @@ -24,29 +24,3 @@ Write-Host $safeConfig ``` This pattern prevents accidental exposure of sensitive information in logs, console output, or error messages that might be viewed by unauthorized personnel or stored in insecure locations. Implement similar redaction mechanisms in all logging and output systems across your codebase. - - -[ - { - "discussion_id": "2178653902", - "pr_number": 50998, - "pr_file": "eng/common/scripts/job-matrix/Create-JobMatrix.ps1", - "created_at": "2025-07-01T22:33:14+00:00", - "commented_code": ")\n\n. $PSScriptRoot/job-matrix-functions.ps1\n. $PSScriptRoot/../logging.ps1\n\nif (!(Test-Path $ConfigPath)) {\n Write-Error \"ConfigPath '$ConfigPath' does not exist.\"\n exit 1\n}\n$config = GetMatrixConfigFromFile (Get-Content $ConfigPath -Raw)\n$rawConfig = Get-Content $ConfigPath -Raw\n$config = GetMatrixConfigFromFile $rawConfig\n# Strip empty string filters in order to be able to use azure pipelines yaml join()\n$Filters = $Filters | Where-Object { $_ }\n\nLogGroupStart \"Matrix generation configuration\"\nWrite-Host \"Configuration File: $ConfigPath\"\nWrite-Host $rawConfig", - "repo_full_name": "Azure/azure-sdk-for-net", - "discussion_comments": [ - { - "comment_id": "2178653902", - "repo_full_name": "Azure/azure-sdk-for-net", - "pr_number": 50998, - "pr_file": "eng/common/scripts/job-matrix/Create-JobMatrix.ps1", - "discussion_id": "2178653902", - "commented_code": "@@ -21,15 +21,27 @@ param (\n )\n \n . $PSScriptRoot/job-matrix-functions.ps1\n+. $PSScriptRoot/../logging.ps1\n \n if (!(Test-Path $ConfigPath)) {\n Write-Error \"ConfigPath '$ConfigPath' does not exist.\"\n exit 1\n }\n-$config = GetMatrixConfigFromFile (Get-Content $ConfigPath -Raw)\n+$rawConfig = Get-Content $ConfigPath -Raw\n+$config = GetMatrixConfigFromFile $rawConfig\n # Strip empty string filters in order to be able to use azure pipelines yaml join()\n $Filters = $Filters | Where-Object { $_ }\n \n+LogGroupStart \"Matrix generation configuration\"\n+Write-Host \"Configuration File: $ConfigPath\"\n+Write-Host $rawConfig", - "comment_created_at": "2025-07-01T22:33:14+00:00", - "comment_author": "Copilot", - "comment_body": "Printing the entire raw configuration may expose sensitive information; consider redacting secrets or limiting the amount of output.\n```suggestion\n$safeConfig = $rawConfig -replace '(?i)(\\\"(password|secret|key)\\\":\\s*\\\".*?\\\")', '\"$1\":\"[REDACTED]\"'\nWrite-Host $safeConfig\n```", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/azure-sdk-for-net-specific-exceptions-for-clarity.json b/_reviewers/azure-sdk-for-net-specific-exceptions-for-clarity.json new file mode 100644 index 0000000..1f08fcf --- /dev/null +++ b/_reviewers/azure-sdk-for-net-specific-exceptions-for-clarity.json @@ -0,0 +1,148 @@ +[ + { + "discussion_id": "2174208183", + "pr_number": 50936, + "pr_file": "eng/packages/http-client-csharp-mgmt/generator/Azure.Generator.Management/src/Providers/ResourceClientProvider.cs", + "created_at": "2025-06-30T04:41:22+00:00", + "commented_code": "}\n else if (parameter.Type.Equals(typeof(RequestContent)))\n {\n // If convenience method is provided, find the resource parameter from it\n if (convenienceMethod != null)\n if (methodParameters.Count > 0)\n {\n var resource = convenienceMethod.Signature.Parameters\n .Single(p => p.Type.Equals(ResourceData.Type) || p.Type.Equals(typeof(RequestContent)));\n // Find the content parameter in the method parameters\n // TODO: need to revisit the filter here\n var contentInputParameter = operation.Parameters.First(p => !ImplicitParameterNames.Contains(p.Name) && p.Kind == InputParameterKind.Method && p.Type is not InputPrimitiveType);\n var resource = methodParameters.Single(p => p.Name == \"data\" || p.Name == contentInputParameter.Name);\n arguments.Add(resource);\n }\n else\n {\n // Otherwise just add the parameter as-is\n arguments.Add(parameter);\n }\n }\n else if (parameter.Type.Equals(typeof(RequestContext)))\n {\n arguments.Add(contextVariable);\n }\n else\n {\n arguments.Add(parameter);\n arguments.Add(methodParameters.Single(p => p.Name == parameter.Name));", + "repo_full_name": "Azure/azure-sdk-for-net", + "discussion_comments": [ + { + "comment_id": "2174208183", + "repo_full_name": "Azure/azure-sdk-for-net", + "pr_number": 50936, + "pr_file": "eng/packages/http-client-csharp-mgmt/generator/Azure.Generator.Management/src/Providers/ResourceClientProvider.cs", + "discussion_id": "2174208183", + "commented_code": "@@ -364,26 +373,22 @@ public ValueExpression[] PopulateArguments(\n }\n else if (parameter.Type.Equals(typeof(RequestContent)))\n {\n- // If convenience method is provided, find the resource parameter from it\n- if (convenienceMethod != null)\n+ if (methodParameters.Count > 0)\n {\n- var resource = convenienceMethod.Signature.Parameters\n- .Single(p => p.Type.Equals(ResourceData.Type) || p.Type.Equals(typeof(RequestContent)));\n+ // Find the content parameter in the method parameters\n+ // TODO: need to revisit the filter here\n+ var contentInputParameter = operation.Parameters.First(p => !ImplicitParameterNames.Contains(p.Name) && p.Kind == InputParameterKind.Method && p.Type is not InputPrimitiveType);\n+ var resource = methodParameters.Single(p => p.Name == \"data\" || p.Name == contentInputParameter.Name);\n arguments.Add(resource);\n }\n- else\n- {\n- // Otherwise just add the parameter as-is\n- arguments.Add(parameter);\n- }\n }\n else if (parameter.Type.Equals(typeof(RequestContext)))\n {\n arguments.Add(contextVariable);\n }\n else\n {\n- arguments.Add(parameter);\n+ arguments.Add(methodParameters.Single(p => p.Name == parameter.Name));", + "comment_created_at": "2025-06-30T04:41:22+00:00", + "comment_author": "Copilot", + "comment_body": "[nitpick] The use of Single() assumes that exactly one matching parameter exists. It may be safer to validate the existence of a match or provide a clearer error message if the match is not found.\n```suggestion\n var matchingParameter = methodParameters.SingleOrDefault(p => p.Name == parameter.Name);\n if (matchingParameter == null)\n {\n throw new InvalidOperationException($\"No matching parameter found for '{parameter.Name}' in methodParameters.\");\n }\n arguments.Add(matchingParameter);\n```", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2058868488", + "pr_number": 49293, + "pr_file": "sdk/ai/Azure.AI.Projects/src/Custom/Agent/ToolCallsResolver.cs", + "created_at": "2025-04-24T16:53:55+00:00", + "commented_code": "// Copyright (c) Microsoft Corporation. All rights reserved.\n// Licensed under the MIT License.\nusing System;\nusing System.Collections;\nusing System.Collections.Generic;\nusing System.Reflection;\nusing System.Text.Json;\nusing System.Xml.Linq;\n\nnamespace Azure.AI.Projects.Custom.Agent\n{\n /// \n /// ToolCallsResolver is used to resolve tool calls in the streaming API.\n /// \n public class ToolCallsResolver\n {\n private readonly Dictionary _delegates = new();\n\n internal ToolCallsResolver(Dictionary delegates)\n {\n _delegates = delegates;\n }\n\n /// \n /// Indicates whether auto tool calls are enabled.\n /// \n internal bool EnableAutoToolCalls => _delegates.Count > 0;\n\n /// \n /// Resolves the tool call by invoking the delegate associated with the function name.\n /// It casts the function arguments to the appropriate types based on the delegate's parameters.\n /// without knowing the answer.\n /// \n internal ToolOutput GetResolvedToolOutput(string functionName, string toolCallId, string functionArguments)\n {\n if (!_delegates.TryGetValue(functionName, out var func))\n {\n string error = $\"Function {functionName} not found.\";\n throw new MissingMethodException(error);", + "repo_full_name": "Azure/azure-sdk-for-net", + "discussion_comments": [ + { + "comment_id": "2058868488", + "repo_full_name": "Azure/azure-sdk-for-net", + "pr_number": 49293, + "pr_file": "sdk/ai/Azure.AI.Projects/src/Custom/Agent/ToolCallsResolver.cs", + "discussion_id": "2058868488", + "commented_code": "@@ -0,0 +1,214 @@\n+// Copyright (c) Microsoft Corporation. All rights reserved.\n+// Licensed under the MIT License.\n+using System;\n+using System.Collections;\n+using System.Collections.Generic;\n+using System.Reflection;\n+using System.Text.Json;\n+using System.Xml.Linq;\n+\n+namespace Azure.AI.Projects.Custom.Agent\n+{\n+ /// \n+ /// ToolCallsResolver is used to resolve tool calls in the streaming API.\n+ /// \n+ public class ToolCallsResolver\n+ {\n+ private readonly Dictionary _delegates = new();\n+\n+ internal ToolCallsResolver(Dictionary delegates)\n+ {\n+ _delegates = delegates;\n+ }\n+\n+ /// \n+ /// Indicates whether auto tool calls are enabled.\n+ /// \n+ internal bool EnableAutoToolCalls => _delegates.Count > 0;\n+\n+ /// \n+ /// Resolves the tool call by invoking the delegate associated with the function name.\n+ /// It casts the function arguments to the appropriate types based on the delegate's parameters.\n+ /// without knowing the answer.\n+ /// \n+ internal ToolOutput GetResolvedToolOutput(string functionName, string toolCallId, string functionArguments)\n+ {\n+ if (!_delegates.TryGetValue(functionName, out var func))\n+ {\n+ string error = $\"Function {functionName} not found.\";\n+ throw new MissingMethodException(error);", + "comment_created_at": "2025-04-24T16:53:55+00:00", + "comment_author": "KrzysztofCwalina", + "comment_body": "will it be retried?", + "pr_file_module": null + }, + { + "comment_id": "2058998727", + "repo_full_name": "Azure/azure-sdk-for-net", + "pr_number": 49293, + "pr_file": "sdk/ai/Azure.AI.Projects/src/Custom/Agent/ToolCallsResolver.cs", + "discussion_id": "2058868488", + "commented_code": "@@ -0,0 +1,214 @@\n+// Copyright (c) Microsoft Corporation. All rights reserved.\n+// Licensed under the MIT License.\n+using System;\n+using System.Collections;\n+using System.Collections.Generic;\n+using System.Reflection;\n+using System.Text.Json;\n+using System.Xml.Linq;\n+\n+namespace Azure.AI.Projects.Custom.Agent\n+{\n+ /// \n+ /// ToolCallsResolver is used to resolve tool calls in the streaming API.\n+ /// \n+ public class ToolCallsResolver\n+ {\n+ private readonly Dictionary _delegates = new();\n+\n+ internal ToolCallsResolver(Dictionary delegates)\n+ {\n+ _delegates = delegates;\n+ }\n+\n+ /// \n+ /// Indicates whether auto tool calls are enabled.\n+ /// \n+ internal bool EnableAutoToolCalls => _delegates.Count > 0;\n+\n+ /// \n+ /// Resolves the tool call by invoking the delegate associated with the function name.\n+ /// It casts the function arguments to the appropriate types based on the delegate's parameters.\n+ /// without knowing the answer.\n+ /// \n+ internal ToolOutput GetResolvedToolOutput(string functionName, string toolCallId, string functionArguments)\n+ {\n+ if (!_delegates.TryGetValue(functionName, out var func))\n+ {\n+ string error = $\"Function {functionName} not found.\";\n+ throw new MissingMethodException(error);", + "comment_created_at": "2025-04-24T18:18:40+00:00", + "comment_author": "howieleung", + "comment_body": "Yes. I did lot of experiments. Depends on the question, if we submit this error to the model, the model might try to figure out the answer with its knowledge or memory. It might raise the same function call again. Or it might give up and say it doesn't have the information.", + "pr_file_module": null + }, + { + "comment_id": "2059171479", + "repo_full_name": "Azure/azure-sdk-for-net", + "pr_number": 49293, + "pr_file": "sdk/ai/Azure.AI.Projects/src/Custom/Agent/ToolCallsResolver.cs", + "discussion_id": "2058868488", + "commented_code": "@@ -0,0 +1,214 @@\n+// Copyright (c) Microsoft Corporation. All rights reserved.\n+// Licensed under the MIT License.\n+using System;\n+using System.Collections;\n+using System.Collections.Generic;\n+using System.Reflection;\n+using System.Text.Json;\n+using System.Xml.Linq;\n+\n+namespace Azure.AI.Projects.Custom.Agent\n+{\n+ /// \n+ /// ToolCallsResolver is used to resolve tool calls in the streaming API.\n+ /// \n+ public class ToolCallsResolver\n+ {\n+ private readonly Dictionary _delegates = new();\n+\n+ internal ToolCallsResolver(Dictionary delegates)\n+ {\n+ _delegates = delegates;\n+ }\n+\n+ /// \n+ /// Indicates whether auto tool calls are enabled.\n+ /// \n+ internal bool EnableAutoToolCalls => _delegates.Count > 0;\n+\n+ /// \n+ /// Resolves the tool call by invoking the delegate associated with the function name.\n+ /// It casts the function arguments to the appropriate types based on the delegate's parameters.\n+ /// without knowing the answer.\n+ /// \n+ internal ToolOutput GetResolvedToolOutput(string functionName, string toolCallId, string functionArguments)\n+ {\n+ if (!_delegates.TryGetValue(functionName, out var func))\n+ {\n+ string error = $\"Function {functionName} not found.\";\n+ throw new MissingMethodException(error);", + "comment_created_at": "2025-04-24T20:24:59+00:00", + "comment_author": "KrzysztofCwalina", + "comment_body": "Don't we need two exceptions for this to work well. If the model passes a wrong argument, it might indeed retry successfully. But why would the modle give us a wrong function name? We gave it an explicit list of functions that we support", + "pr_file_module": null + }, + { + "comment_id": "2060960574", + "repo_full_name": "Azure/azure-sdk-for-net", + "pr_number": 49293, + "pr_file": "sdk/ai/Azure.AI.Projects/src/Custom/Agent/ToolCallsResolver.cs", + "discussion_id": "2058868488", + "commented_code": "@@ -0,0 +1,214 @@\n+// Copyright (c) Microsoft Corporation. All rights reserved.\n+// Licensed under the MIT License.\n+using System;\n+using System.Collections;\n+using System.Collections.Generic;\n+using System.Reflection;\n+using System.Text.Json;\n+using System.Xml.Linq;\n+\n+namespace Azure.AI.Projects.Custom.Agent\n+{\n+ /// \n+ /// ToolCallsResolver is used to resolve tool calls in the streaming API.\n+ /// \n+ public class ToolCallsResolver\n+ {\n+ private readonly Dictionary _delegates = new();\n+\n+ internal ToolCallsResolver(Dictionary delegates)\n+ {\n+ _delegates = delegates;\n+ }\n+\n+ /// \n+ /// Indicates whether auto tool calls are enabled.\n+ /// \n+ internal bool EnableAutoToolCalls => _delegates.Count > 0;\n+\n+ /// \n+ /// Resolves the tool call by invoking the delegate associated with the function name.\n+ /// It casts the function arguments to the appropriate types based on the delegate's parameters.\n+ /// without knowing the answer.\n+ /// \n+ internal ToolOutput GetResolvedToolOutput(string functionName, string toolCallId, string functionArguments)\n+ {\n+ if (!_delegates.TryGetValue(functionName, out var func))\n+ {\n+ string error = $\"Function {functionName} not found.\";\n+ throw new MissingMethodException(error);", + "comment_created_at": "2025-04-25T22:23:57+00:00", + "comment_author": "howieleung", + "comment_body": "It is possible that the agent is created by Foundary UI or other application. In their C# app, they call getAgent and attempt to stream. If this agent has a function tool that isn't in the list of delegate, we want to raise exception.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2156708743", + "pr_number": 50718, + "pr_file": "sdk/loadtestservice/Azure.Developer.Playwright/src/Utility/ClientUtilities.cs", + "created_at": "2025-06-19T10:47:33+00:00", + "commented_code": "if (string.IsNullOrEmpty(authToken))\n throw new Exception(Constants.s_no_auth_error);\n JsonWebToken jsonWebToken = _jsonWebTokenHandler!.ReadJsonWebToken(authToken) ?? throw new Exception(Constants.s_invalid_mpt_pat_error);\n var tokenWorkspaceId = jsonWebToken.Claims.FirstOrDefault(c => c.Type == \"aid\")?.Value;\n Match match = Regex.Match(serviceEndpoint, @\"wss://(?[\\w-]+)\\.api\\.(?playwright(?:-test|-int)?\\.io|playwright\\.microsoft\\.com)/accounts/(?[\\w-]+)/\");\n var tokenWorkspaceId = jsonWebToken.Claims.FirstOrDefault(c => c.Type == \"pwid\")?.Value;\n Match match = Regex.Match(serviceEndpoint, @\"wss://(?[\\w-]+)\\.api\\.(?playwright(?:-test|-int)?\\.io|playwright\\.microsoft\\.com)/playwrightworkspaces/(?[\\w-]+)/\");\n if (!match.Success)\n throw new Exception(Constants.s_invalid_service_endpoint_error_message);", + "repo_full_name": "Azure/azure-sdk-for-net", + "discussion_comments": [ + { + "comment_id": "2156708743", + "repo_full_name": "Azure/azure-sdk-for-net", + "pr_number": 50718, + "pr_file": "sdk/loadtestservice/Azure.Developer.Playwright/src/Utility/ClientUtilities.cs", + "discussion_id": "2156708743", + "commented_code": "@@ -60,8 +60,8 @@ internal void ValidateMptPAT(string? authToken, string serviceEndpoint)\n if (string.IsNullOrEmpty(authToken))\n throw new Exception(Constants.s_no_auth_error);\n JsonWebToken jsonWebToken = _jsonWebTokenHandler!.ReadJsonWebToken(authToken) ?? throw new Exception(Constants.s_invalid_mpt_pat_error);\n- var tokenWorkspaceId = jsonWebToken.Claims.FirstOrDefault(c => c.Type == \"aid\")?.Value;\n- Match match = Regex.Match(serviceEndpoint, @\"wss://(?[\\w-]+)\\.api\\.(?playwright(?:-test|-int)?\\.io|playwright\\.microsoft\\.com)/accounts/(?[\\w-]+)/\");\n+ var tokenWorkspaceId = jsonWebToken.Claims.FirstOrDefault(c => c.Type == \"pwid\")?.Value;\n+ Match match = Regex.Match(serviceEndpoint, @\"wss://(?[\\w-]+)\\.api\\.(?playwright(?:-test|-int)?\\.io|playwright\\.microsoft\\.com)/playwrightworkspaces/(?[\\w-]+)/\");\n if (!match.Success)\n throw new Exception(Constants.s_invalid_service_endpoint_error_message);", + "comment_created_at": "2025-06-19T10:47:33+00:00", + "comment_author": "Copilot", + "comment_body": "Throwing a generic Exception makes it harder for callers to distinguish error types; consider using a more specific exception (e.g., ArgumentException) for an invalid service endpoint.\n```suggestion\n throw new ArgumentException(Constants.s_invalid_service_endpoint_error_message);\n```", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2128234472", + "pr_number": 50447, + "pr_file": "sdk/core/Azure.Core.Expressions.DataFactory/src/DataFactoryElementJsonConverter.cs", + "created_at": "2025-06-05T08:15:12+00:00", + "commented_code": "return element != null;\n }\n\n public override void Write(Utf8JsonWriter writer, object? value, JsonSerializerOptions options)\n {\n throw new NotImplementedException();", + "repo_full_name": "Azure/azure-sdk-for-net", + "discussion_comments": [ + { + "comment_id": "2128234472", + "repo_full_name": "Azure/azure-sdk-for-net", + "pr_number": 50447, + "pr_file": "sdk/core/Azure.Core.Expressions.DataFactory/src/DataFactoryElementJsonConverter.cs", + "discussion_id": "2128234472", + "commented_code": "@@ -399,5 +222,10 @@ private static bool TryGetNonLiteral(JsonElement json, out DataFactoryElement\n \n return element != null;\n }\n+\n+ public override void Write(Utf8JsonWriter writer, object? value, JsonSerializerOptions options)\n+ {\n+ throw new NotImplementedException();", + "comment_created_at": "2025-06-05T08:15:12+00:00", + "comment_author": "Copilot", + "comment_body": "The Write override now throws a NotImplementedException, which may cause runtime errors if this converter is used unexpectedly. Consider providing an implementation or documenting that this converter should not be invoked in AOT scenarios.\n```suggestion\n /// \n /// Serialization is not supported for DataFactoryElementJsonConverter.\n /// \n /// Thrown when attempting to serialize an object.\n public override void Write(Utf8JsonWriter writer, object? value, JsonSerializerOptions options)\n {\n throw new NotSupportedException(\"Serialization is not supported for DataFactoryElementJsonConverter.\");\n```", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2130443163", + "pr_number": 50458, + "pr_file": "sdk/identity/Azure.Identity/src/ManagedIdentityClient.cs", + "created_at": "2025-06-05T21:22:48+00:00", + "commented_code": "return await tokenExchangeManagedIdentitySource.AuthenticateAsync(async, context, cancellationToken).ConfigureAwait(false);\n }\n\n // The default case is to use the MSAL implementation, which does no probing of the IMDS endpoint.\n result = async ?\n await _msalManagedIdentityClient.AcquireTokenForManagedIdentityAsync(context, cancellationToken).ConfigureAwait(false) :\n _msalManagedIdentityClient.AcquireTokenForManagedIdentity(context, cancellationToken);\n try\n {\n // The default case is to use the MSAL implementation, which does no probing of the IMDS endpoint.\n result = async ?\n await _msalManagedIdentityClient.AcquireTokenForManagedIdentityAsync(context, cancellationToken).ConfigureAwait(false) :\n _msalManagedIdentityClient.AcquireTokenForManagedIdentity(context, cancellationToken);\n }\n // If the IMDS endpoint is not available, we will throw a CredentialUnavailableException.\n catch (MsalServiceException ex) when (HasInnerExceptionMatching(ex, e => e is RequestFailedException && e.Message.Contains(\"timed out\")))", + "repo_full_name": "Azure/azure-sdk-for-net", + "discussion_comments": [ + { + "comment_id": "2130443163", + "repo_full_name": "Azure/azure-sdk-for-net", + "pr_number": 50458, + "pr_file": "sdk/identity/Azure.Identity/src/ManagedIdentityClient.cs", + "discussion_id": "2130443163", + "commented_code": "@@ -87,10 +87,19 @@ public async ValueTask AuthenticateAsync(bool async, TokenRequestCo\n return await tokenExchangeManagedIdentitySource.AuthenticateAsync(async, context, cancellationToken).ConfigureAwait(false);\n }\n \n- // The default case is to use the MSAL implementation, which does no probing of the IMDS endpoint.\n- result = async ?\n- await _msalManagedIdentityClient.AcquireTokenForManagedIdentityAsync(context, cancellationToken).ConfigureAwait(false) :\n- _msalManagedIdentityClient.AcquireTokenForManagedIdentity(context, cancellationToken);\n+ try\n+ {\n+ // The default case is to use the MSAL implementation, which does no probing of the IMDS endpoint.\n+ result = async ?\n+ await _msalManagedIdentityClient.AcquireTokenForManagedIdentityAsync(context, cancellationToken).ConfigureAwait(false) :\n+ _msalManagedIdentityClient.AcquireTokenForManagedIdentity(context, cancellationToken);\n+ }\n+ // If the IMDS endpoint is not available, we will throw a CredentialUnavailableException.\n+ catch (MsalServiceException ex) when (HasInnerExceptionMatching(ex, e => e is RequestFailedException && e.Message.Contains(\"timed out\")))", + "comment_created_at": "2025-06-05T21:22:48+00:00", + "comment_author": "Copilot", + "comment_body": "[nitpick] Relying on a string match for \"timed out\" in the exception message is brittle. Consider checking a specific exception property or error code on RequestFailedException instead of matching the message text.\n```suggestion\n catch (MsalServiceException ex) when (HasInnerExceptionMatching(ex, e => e is RequestFailedException requestFailedEx && requestFailedEx.Status == 408))\n```", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/azure-sdk-for-net-specific-exceptions-for-clarity.md b/_reviewers/azure-sdk-for-net-specific-exceptions-for-clarity.md index a3e4b0b..65adeec 100644 --- a/_reviewers/azure-sdk-for-net-specific-exceptions-for-clarity.md +++ b/_reviewers/azure-sdk-for-net-specific-exceptions-for-clarity.md @@ -51,153 +51,3 @@ if (matchingParameter == null) } arguments.Add(matchingParameter); ``` - - -[ - { - "discussion_id": "2174208183", - "pr_number": 50936, - "pr_file": "eng/packages/http-client-csharp-mgmt/generator/Azure.Generator.Management/src/Providers/ResourceClientProvider.cs", - "created_at": "2025-06-30T04:41:22+00:00", - "commented_code": "}\n else if (parameter.Type.Equals(typeof(RequestContent)))\n {\n // If convenience method is provided, find the resource parameter from it\n if (convenienceMethod != null)\n if (methodParameters.Count > 0)\n {\n var resource = convenienceMethod.Signature.Parameters\n .Single(p => p.Type.Equals(ResourceData.Type) || p.Type.Equals(typeof(RequestContent)));\n // Find the content parameter in the method parameters\n // TODO: need to revisit the filter here\n var contentInputParameter = operation.Parameters.First(p => !ImplicitParameterNames.Contains(p.Name) && p.Kind == InputParameterKind.Method && p.Type is not InputPrimitiveType);\n var resource = methodParameters.Single(p => p.Name == \"data\" || p.Name == contentInputParameter.Name);\n arguments.Add(resource);\n }\n else\n {\n // Otherwise just add the parameter as-is\n arguments.Add(parameter);\n }\n }\n else if (parameter.Type.Equals(typeof(RequestContext)))\n {\n arguments.Add(contextVariable);\n }\n else\n {\n arguments.Add(parameter);\n arguments.Add(methodParameters.Single(p => p.Name == parameter.Name));", - "repo_full_name": "Azure/azure-sdk-for-net", - "discussion_comments": [ - { - "comment_id": "2174208183", - "repo_full_name": "Azure/azure-sdk-for-net", - "pr_number": 50936, - "pr_file": "eng/packages/http-client-csharp-mgmt/generator/Azure.Generator.Management/src/Providers/ResourceClientProvider.cs", - "discussion_id": "2174208183", - "commented_code": "@@ -364,26 +373,22 @@ public ValueExpression[] PopulateArguments(\n }\n else if (parameter.Type.Equals(typeof(RequestContent)))\n {\n- // If convenience method is provided, find the resource parameter from it\n- if (convenienceMethod != null)\n+ if (methodParameters.Count > 0)\n {\n- var resource = convenienceMethod.Signature.Parameters\n- .Single(p => p.Type.Equals(ResourceData.Type) || p.Type.Equals(typeof(RequestContent)));\n+ // Find the content parameter in the method parameters\n+ // TODO: need to revisit the filter here\n+ var contentInputParameter = operation.Parameters.First(p => !ImplicitParameterNames.Contains(p.Name) && p.Kind == InputParameterKind.Method && p.Type is not InputPrimitiveType);\n+ var resource = methodParameters.Single(p => p.Name == \"data\" || p.Name == contentInputParameter.Name);\n arguments.Add(resource);\n }\n- else\n- {\n- // Otherwise just add the parameter as-is\n- arguments.Add(parameter);\n- }\n }\n else if (parameter.Type.Equals(typeof(RequestContext)))\n {\n arguments.Add(contextVariable);\n }\n else\n {\n- arguments.Add(parameter);\n+ arguments.Add(methodParameters.Single(p => p.Name == parameter.Name));", - "comment_created_at": "2025-06-30T04:41:22+00:00", - "comment_author": "Copilot", - "comment_body": "[nitpick] The use of Single() assumes that exactly one matching parameter exists. It may be safer to validate the existence of a match or provide a clearer error message if the match is not found.\n```suggestion\n var matchingParameter = methodParameters.SingleOrDefault(p => p.Name == parameter.Name);\n if (matchingParameter == null)\n {\n throw new InvalidOperationException($\"No matching parameter found for '{parameter.Name}' in methodParameters.\");\n }\n arguments.Add(matchingParameter);\n```", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2058868488", - "pr_number": 49293, - "pr_file": "sdk/ai/Azure.AI.Projects/src/Custom/Agent/ToolCallsResolver.cs", - "created_at": "2025-04-24T16:53:55+00:00", - "commented_code": "\ufeff// Copyright (c) Microsoft Corporation. All rights reserved.\n// Licensed under the MIT License.\nusing System;\nusing System.Collections;\nusing System.Collections.Generic;\nusing System.Reflection;\nusing System.Text.Json;\nusing System.Xml.Linq;\n\nnamespace Azure.AI.Projects.Custom.Agent\n{\n /// \n /// ToolCallsResolver is used to resolve tool calls in the streaming API.\n /// \n public class ToolCallsResolver\n {\n private readonly Dictionary _delegates = new();\n\n internal ToolCallsResolver(Dictionary delegates)\n {\n _delegates = delegates;\n }\n\n /// \n /// Indicates whether auto tool calls are enabled.\n /// \n internal bool EnableAutoToolCalls => _delegates.Count > 0;\n\n /// \n /// Resolves the tool call by invoking the delegate associated with the function name.\n /// It casts the function arguments to the appropriate types based on the delegate's parameters.\n /// without knowing the answer.\n /// \n internal ToolOutput GetResolvedToolOutput(string functionName, string toolCallId, string functionArguments)\n {\n if (!_delegates.TryGetValue(functionName, out var func))\n {\n string error = $\"Function {functionName} not found.\";\n throw new MissingMethodException(error);", - "repo_full_name": "Azure/azure-sdk-for-net", - "discussion_comments": [ - { - "comment_id": "2058868488", - "repo_full_name": "Azure/azure-sdk-for-net", - "pr_number": 49293, - "pr_file": "sdk/ai/Azure.AI.Projects/src/Custom/Agent/ToolCallsResolver.cs", - "discussion_id": "2058868488", - "commented_code": "@@ -0,0 +1,214 @@\n+\ufeff// Copyright (c) Microsoft Corporation. All rights reserved.\n+// Licensed under the MIT License.\n+using System;\n+using System.Collections;\n+using System.Collections.Generic;\n+using System.Reflection;\n+using System.Text.Json;\n+using System.Xml.Linq;\n+\n+namespace Azure.AI.Projects.Custom.Agent\n+{\n+ /// \n+ /// ToolCallsResolver is used to resolve tool calls in the streaming API.\n+ /// \n+ public class ToolCallsResolver\n+ {\n+ private readonly Dictionary _delegates = new();\n+\n+ internal ToolCallsResolver(Dictionary delegates)\n+ {\n+ _delegates = delegates;\n+ }\n+\n+ /// \n+ /// Indicates whether auto tool calls are enabled.\n+ /// \n+ internal bool EnableAutoToolCalls => _delegates.Count > 0;\n+\n+ /// \n+ /// Resolves the tool call by invoking the delegate associated with the function name.\n+ /// It casts the function arguments to the appropriate types based on the delegate's parameters.\n+ /// without knowing the answer.\n+ /// \n+ internal ToolOutput GetResolvedToolOutput(string functionName, string toolCallId, string functionArguments)\n+ {\n+ if (!_delegates.TryGetValue(functionName, out var func))\n+ {\n+ string error = $\"Function {functionName} not found.\";\n+ throw new MissingMethodException(error);", - "comment_created_at": "2025-04-24T16:53:55+00:00", - "comment_author": "KrzysztofCwalina", - "comment_body": "will it be retried?", - "pr_file_module": null - }, - { - "comment_id": "2058998727", - "repo_full_name": "Azure/azure-sdk-for-net", - "pr_number": 49293, - "pr_file": "sdk/ai/Azure.AI.Projects/src/Custom/Agent/ToolCallsResolver.cs", - "discussion_id": "2058868488", - "commented_code": "@@ -0,0 +1,214 @@\n+\ufeff// Copyright (c) Microsoft Corporation. All rights reserved.\n+// Licensed under the MIT License.\n+using System;\n+using System.Collections;\n+using System.Collections.Generic;\n+using System.Reflection;\n+using System.Text.Json;\n+using System.Xml.Linq;\n+\n+namespace Azure.AI.Projects.Custom.Agent\n+{\n+ /// \n+ /// ToolCallsResolver is used to resolve tool calls in the streaming API.\n+ /// \n+ public class ToolCallsResolver\n+ {\n+ private readonly Dictionary _delegates = new();\n+\n+ internal ToolCallsResolver(Dictionary delegates)\n+ {\n+ _delegates = delegates;\n+ }\n+\n+ /// \n+ /// Indicates whether auto tool calls are enabled.\n+ /// \n+ internal bool EnableAutoToolCalls => _delegates.Count > 0;\n+\n+ /// \n+ /// Resolves the tool call by invoking the delegate associated with the function name.\n+ /// It casts the function arguments to the appropriate types based on the delegate's parameters.\n+ /// without knowing the answer.\n+ /// \n+ internal ToolOutput GetResolvedToolOutput(string functionName, string toolCallId, string functionArguments)\n+ {\n+ if (!_delegates.TryGetValue(functionName, out var func))\n+ {\n+ string error = $\"Function {functionName} not found.\";\n+ throw new MissingMethodException(error);", - "comment_created_at": "2025-04-24T18:18:40+00:00", - "comment_author": "howieleung", - "comment_body": "Yes. I did lot of experiments. Depends on the question, if we submit this error to the model, the model might try to figure out the answer with its knowledge or memory. It might raise the same function call again. Or it might give up and say it doesn't have the information.", - "pr_file_module": null - }, - { - "comment_id": "2059171479", - "repo_full_name": "Azure/azure-sdk-for-net", - "pr_number": 49293, - "pr_file": "sdk/ai/Azure.AI.Projects/src/Custom/Agent/ToolCallsResolver.cs", - "discussion_id": "2058868488", - "commented_code": "@@ -0,0 +1,214 @@\n+\ufeff// Copyright (c) Microsoft Corporation. All rights reserved.\n+// Licensed under the MIT License.\n+using System;\n+using System.Collections;\n+using System.Collections.Generic;\n+using System.Reflection;\n+using System.Text.Json;\n+using System.Xml.Linq;\n+\n+namespace Azure.AI.Projects.Custom.Agent\n+{\n+ /// \n+ /// ToolCallsResolver is used to resolve tool calls in the streaming API.\n+ /// \n+ public class ToolCallsResolver\n+ {\n+ private readonly Dictionary _delegates = new();\n+\n+ internal ToolCallsResolver(Dictionary delegates)\n+ {\n+ _delegates = delegates;\n+ }\n+\n+ /// \n+ /// Indicates whether auto tool calls are enabled.\n+ /// \n+ internal bool EnableAutoToolCalls => _delegates.Count > 0;\n+\n+ /// \n+ /// Resolves the tool call by invoking the delegate associated with the function name.\n+ /// It casts the function arguments to the appropriate types based on the delegate's parameters.\n+ /// without knowing the answer.\n+ /// \n+ internal ToolOutput GetResolvedToolOutput(string functionName, string toolCallId, string functionArguments)\n+ {\n+ if (!_delegates.TryGetValue(functionName, out var func))\n+ {\n+ string error = $\"Function {functionName} not found.\";\n+ throw new MissingMethodException(error);", - "comment_created_at": "2025-04-24T20:24:59+00:00", - "comment_author": "KrzysztofCwalina", - "comment_body": "Don't we need two exceptions for this to work well. If the model passes a wrong argument, it might indeed retry successfully. But why would the modle give us a wrong function name? We gave it an explicit list of functions that we support", - "pr_file_module": null - }, - { - "comment_id": "2060960574", - "repo_full_name": "Azure/azure-sdk-for-net", - "pr_number": 49293, - "pr_file": "sdk/ai/Azure.AI.Projects/src/Custom/Agent/ToolCallsResolver.cs", - "discussion_id": "2058868488", - "commented_code": "@@ -0,0 +1,214 @@\n+\ufeff// Copyright (c) Microsoft Corporation. All rights reserved.\n+// Licensed under the MIT License.\n+using System;\n+using System.Collections;\n+using System.Collections.Generic;\n+using System.Reflection;\n+using System.Text.Json;\n+using System.Xml.Linq;\n+\n+namespace Azure.AI.Projects.Custom.Agent\n+{\n+ /// \n+ /// ToolCallsResolver is used to resolve tool calls in the streaming API.\n+ /// \n+ public class ToolCallsResolver\n+ {\n+ private readonly Dictionary _delegates = new();\n+\n+ internal ToolCallsResolver(Dictionary delegates)\n+ {\n+ _delegates = delegates;\n+ }\n+\n+ /// \n+ /// Indicates whether auto tool calls are enabled.\n+ /// \n+ internal bool EnableAutoToolCalls => _delegates.Count > 0;\n+\n+ /// \n+ /// Resolves the tool call by invoking the delegate associated with the function name.\n+ /// It casts the function arguments to the appropriate types based on the delegate's parameters.\n+ /// without knowing the answer.\n+ /// \n+ internal ToolOutput GetResolvedToolOutput(string functionName, string toolCallId, string functionArguments)\n+ {\n+ if (!_delegates.TryGetValue(functionName, out var func))\n+ {\n+ string error = $\"Function {functionName} not found.\";\n+ throw new MissingMethodException(error);", - "comment_created_at": "2025-04-25T22:23:57+00:00", - "comment_author": "howieleung", - "comment_body": "It is possible that the agent is created by Foundary UI or other application. In their C# app, they call getAgent and attempt to stream. If this agent has a function tool that isn't in the list of delegate, we want to raise exception.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2156708743", - "pr_number": 50718, - "pr_file": "sdk/loadtestservice/Azure.Developer.Playwright/src/Utility/ClientUtilities.cs", - "created_at": "2025-06-19T10:47:33+00:00", - "commented_code": "if (string.IsNullOrEmpty(authToken))\n throw new Exception(Constants.s_no_auth_error);\n JsonWebToken jsonWebToken = _jsonWebTokenHandler!.ReadJsonWebToken(authToken) ?? throw new Exception(Constants.s_invalid_mpt_pat_error);\n var tokenWorkspaceId = jsonWebToken.Claims.FirstOrDefault(c => c.Type == \"aid\")?.Value;\n Match match = Regex.Match(serviceEndpoint, @\"wss://(?[\\w-]+)\\.api\\.(?playwright(?:-test|-int)?\\.io|playwright\\.microsoft\\.com)/accounts/(?[\\w-]+)/\");\n var tokenWorkspaceId = jsonWebToken.Claims.FirstOrDefault(c => c.Type == \"pwid\")?.Value;\n Match match = Regex.Match(serviceEndpoint, @\"wss://(?[\\w-]+)\\.api\\.(?playwright(?:-test|-int)?\\.io|playwright\\.microsoft\\.com)/playwrightworkspaces/(?[\\w-]+)/\");\n if (!match.Success)\n throw new Exception(Constants.s_invalid_service_endpoint_error_message);", - "repo_full_name": "Azure/azure-sdk-for-net", - "discussion_comments": [ - { - "comment_id": "2156708743", - "repo_full_name": "Azure/azure-sdk-for-net", - "pr_number": 50718, - "pr_file": "sdk/loadtestservice/Azure.Developer.Playwright/src/Utility/ClientUtilities.cs", - "discussion_id": "2156708743", - "commented_code": "@@ -60,8 +60,8 @@ internal void ValidateMptPAT(string? authToken, string serviceEndpoint)\n if (string.IsNullOrEmpty(authToken))\n throw new Exception(Constants.s_no_auth_error);\n JsonWebToken jsonWebToken = _jsonWebTokenHandler!.ReadJsonWebToken(authToken) ?? throw new Exception(Constants.s_invalid_mpt_pat_error);\n- var tokenWorkspaceId = jsonWebToken.Claims.FirstOrDefault(c => c.Type == \"aid\")?.Value;\n- Match match = Regex.Match(serviceEndpoint, @\"wss://(?[\\w-]+)\\.api\\.(?playwright(?:-test|-int)?\\.io|playwright\\.microsoft\\.com)/accounts/(?[\\w-]+)/\");\n+ var tokenWorkspaceId = jsonWebToken.Claims.FirstOrDefault(c => c.Type == \"pwid\")?.Value;\n+ Match match = Regex.Match(serviceEndpoint, @\"wss://(?[\\w-]+)\\.api\\.(?playwright(?:-test|-int)?\\.io|playwright\\.microsoft\\.com)/playwrightworkspaces/(?[\\w-]+)/\");\n if (!match.Success)\n throw new Exception(Constants.s_invalid_service_endpoint_error_message);", - "comment_created_at": "2025-06-19T10:47:33+00:00", - "comment_author": "Copilot", - "comment_body": "Throwing a generic Exception makes it harder for callers to distinguish error types; consider using a more specific exception (e.g., ArgumentException) for an invalid service endpoint.\n```suggestion\n throw new ArgumentException(Constants.s_invalid_service_endpoint_error_message);\n```", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2128234472", - "pr_number": 50447, - "pr_file": "sdk/core/Azure.Core.Expressions.DataFactory/src/DataFactoryElementJsonConverter.cs", - "created_at": "2025-06-05T08:15:12+00:00", - "commented_code": "return element != null;\n }\n\n public override void Write(Utf8JsonWriter writer, object? value, JsonSerializerOptions options)\n {\n throw new NotImplementedException();", - "repo_full_name": "Azure/azure-sdk-for-net", - "discussion_comments": [ - { - "comment_id": "2128234472", - "repo_full_name": "Azure/azure-sdk-for-net", - "pr_number": 50447, - "pr_file": "sdk/core/Azure.Core.Expressions.DataFactory/src/DataFactoryElementJsonConverter.cs", - "discussion_id": "2128234472", - "commented_code": "@@ -399,5 +222,10 @@ private static bool TryGetNonLiteral(JsonElement json, out DataFactoryElement\n \n return element != null;\n }\n+\n+ public override void Write(Utf8JsonWriter writer, object? value, JsonSerializerOptions options)\n+ {\n+ throw new NotImplementedException();", - "comment_created_at": "2025-06-05T08:15:12+00:00", - "comment_author": "Copilot", - "comment_body": "The Write override now throws a NotImplementedException, which may cause runtime errors if this converter is used unexpectedly. Consider providing an implementation or documenting that this converter should not be invoked in AOT scenarios.\n```suggestion\n /// \n /// Serialization is not supported for DataFactoryElementJsonConverter.\n /// \n /// Thrown when attempting to serialize an object.\n public override void Write(Utf8JsonWriter writer, object? value, JsonSerializerOptions options)\n {\n throw new NotSupportedException(\"Serialization is not supported for DataFactoryElementJsonConverter.\");\n```", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2130443163", - "pr_number": 50458, - "pr_file": "sdk/identity/Azure.Identity/src/ManagedIdentityClient.cs", - "created_at": "2025-06-05T21:22:48+00:00", - "commented_code": "return await tokenExchangeManagedIdentitySource.AuthenticateAsync(async, context, cancellationToken).ConfigureAwait(false);\n }\n\n // The default case is to use the MSAL implementation, which does no probing of the IMDS endpoint.\n result = async ?\n await _msalManagedIdentityClient.AcquireTokenForManagedIdentityAsync(context, cancellationToken).ConfigureAwait(false) :\n _msalManagedIdentityClient.AcquireTokenForManagedIdentity(context, cancellationToken);\n try\n {\n // The default case is to use the MSAL implementation, which does no probing of the IMDS endpoint.\n result = async ?\n await _msalManagedIdentityClient.AcquireTokenForManagedIdentityAsync(context, cancellationToken).ConfigureAwait(false) :\n _msalManagedIdentityClient.AcquireTokenForManagedIdentity(context, cancellationToken);\n }\n // If the IMDS endpoint is not available, we will throw a CredentialUnavailableException.\n catch (MsalServiceException ex) when (HasInnerExceptionMatching(ex, e => e is RequestFailedException && e.Message.Contains(\"timed out\")))", - "repo_full_name": "Azure/azure-sdk-for-net", - "discussion_comments": [ - { - "comment_id": "2130443163", - "repo_full_name": "Azure/azure-sdk-for-net", - "pr_number": 50458, - "pr_file": "sdk/identity/Azure.Identity/src/ManagedIdentityClient.cs", - "discussion_id": "2130443163", - "commented_code": "@@ -87,10 +87,19 @@ public async ValueTask AuthenticateAsync(bool async, TokenRequestCo\n return await tokenExchangeManagedIdentitySource.AuthenticateAsync(async, context, cancellationToken).ConfigureAwait(false);\n }\n \n- // The default case is to use the MSAL implementation, which does no probing of the IMDS endpoint.\n- result = async ?\n- await _msalManagedIdentityClient.AcquireTokenForManagedIdentityAsync(context, cancellationToken).ConfigureAwait(false) :\n- _msalManagedIdentityClient.AcquireTokenForManagedIdentity(context, cancellationToken);\n+ try\n+ {\n+ // The default case is to use the MSAL implementation, which does no probing of the IMDS endpoint.\n+ result = async ?\n+ await _msalManagedIdentityClient.AcquireTokenForManagedIdentityAsync(context, cancellationToken).ConfigureAwait(false) :\n+ _msalManagedIdentityClient.AcquireTokenForManagedIdentity(context, cancellationToken);\n+ }\n+ // If the IMDS endpoint is not available, we will throw a CredentialUnavailableException.\n+ catch (MsalServiceException ex) when (HasInnerExceptionMatching(ex, e => e is RequestFailedException && e.Message.Contains(\"timed out\")))", - "comment_created_at": "2025-06-05T21:22:48+00:00", - "comment_author": "Copilot", - "comment_body": "[nitpick] Relying on a string match for \"timed out\" in the exception message is brittle. Consider checking a specific exception property or error code on RequestFailedException instead of matching the message text.\n```suggestion\n catch (MsalServiceException ex) when (HasInnerExceptionMatching(ex, e => e is RequestFailedException requestFailedEx && requestFailedEx.Status == 408))\n```", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/azure-sdk-for-net-standardize-environment-variables.json b/_reviewers/azure-sdk-for-net-standardize-environment-variables.json new file mode 100644 index 0000000..ca33bce --- /dev/null +++ b/_reviewers/azure-sdk-for-net-standardize-environment-variables.json @@ -0,0 +1,102 @@ +[ + { + "discussion_id": "2166970984", + "pr_number": 50879, + "pr_file": "sdk/ai/Azure.AI.Agents.Persistent/tests/Samples/Sample_PersistentAgents_Bing_Custom_Search.cs", + "created_at": "2025-06-25T15:08:57+00:00", + "commented_code": "// Copyright (c) Microsoft Corporation. All rights reserved.\n// Licensed under the MIT License.\n\n#nullable disable\n\nusing System;\nusing System.Threading;\nusing System.Threading.Tasks;\nusing Azure.Core.TestFramework;\nusing NUnit.Framework;\n\nnamespace Azure.AI.Agents.Persistent.Tests;\n\npublic partial class Sample_PersistentAgents_Bing_Custom_Search : SamplesBase\n{\n [Test]\n [AsyncOnly]\n public async Task BingCustomSearchExampleAsync()\n {\n #region Snippet:AgentsBingCustomSearch_CreateProject\n#if SNIPPET\n var projectEndpoint = System.Environment.GetEnvironmentVariable(\"PROJECT_ENDPOINT\");\n var modelDeploymentName = System.Environment.GetEnvironmentVariable(\"MODEL_DEPLOYMENT_NAME\");\n var connectionId = System.Environment.GetEnvironmentVariable(\"BING_CUSTOM_CONNECTION_ID\");\n#else\n var projectEndpoint = TestEnvironment.PROJECT_ENDPOINT;\n var modelDeploymentName = TestEnvironment.MODELDEPLOYMENTNAME;\n var connectionId = TestEnvironment.BING_CONECTION_ID;", + "repo_full_name": "Azure/azure-sdk-for-net", + "discussion_comments": [ + { + "comment_id": "2166970984", + "repo_full_name": "Azure/azure-sdk-for-net", + "pr_number": 50879, + "pr_file": "sdk/ai/Azure.AI.Agents.Persistent/tests/Samples/Sample_PersistentAgents_Bing_Custom_Search.cs", + "discussion_id": "2166970984", + "commented_code": "@@ -0,0 +1,199 @@\n+// Copyright (c) Microsoft Corporation. All rights reserved.\n+// Licensed under the MIT License.\n+\n+#nullable disable\n+\n+using System;\n+using System.Threading;\n+using System.Threading.Tasks;\n+using Azure.Core.TestFramework;\n+using NUnit.Framework;\n+\n+namespace Azure.AI.Agents.Persistent.Tests;\n+\n+public partial class Sample_PersistentAgents_Bing_Custom_Search : SamplesBase\n+{\n+ [Test]\n+ [AsyncOnly]\n+ public async Task BingCustomSearchExampleAsync()\n+ {\n+ #region Snippet:AgentsBingCustomSearch_CreateProject\n+#if SNIPPET\n+ var projectEndpoint = System.Environment.GetEnvironmentVariable(\"PROJECT_ENDPOINT\");\n+ var modelDeploymentName = System.Environment.GetEnvironmentVariable(\"MODEL_DEPLOYMENT_NAME\");\n+ var connectionId = System.Environment.GetEnvironmentVariable(\"BING_CUSTOM_CONNECTION_ID\");\n+#else\n+ var projectEndpoint = TestEnvironment.PROJECT_ENDPOINT;\n+ var modelDeploymentName = TestEnvironment.MODELDEPLOYMENTNAME;\n+ var connectionId = TestEnvironment.BING_CONECTION_ID;", + "comment_created_at": "2025-06-25T15:08:57+00:00", + "comment_author": "Copilot", + "comment_body": "This sample references `BING_CONECTION_ID` but the new property is `BING_CUSTOM_CONNECTION_ID`. Please update to use `TestEnvironment.BING_CUSTOM_CONNECTION_ID`.\n```suggestion\n var connectionId = TestEnvironment.BING_CUSTOM_CONNECTION_ID;\n```", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2156832075", + "pr_number": 50705, + "pr_file": "sdk/resourcemanager/Azure.ResourceManager/tests/Unit/ManagedServiceIdentityTest.cs", + "created_at": "2025-06-19T11:54:21+00:00", + "commented_code": "using Azure.ResourceManager.Models;\nusing NUnit.Framework;\n\n#nullable enable\n\nnamespace Azure.ResourceManager.Tests\n{\n [Parallelizable]\n public class ManagedServiceIdentityTests\n {\n private static readonly string TestAssetPath = Path.Combine(TestContext.CurrentContext.TestDirectory, \"Unit\", \"TestAssets\", \"Identity\");\n private static readonly ModelReaderWriterOptions V3Options = new ModelReaderWriterOptions(\"W|v3\");\n\n public JsonProperty DeserializerHelper(string filename)\n private JsonElement DeserializerHelper(string filename, out string json)\n {\n var json = File.ReadAllText(Path.Combine(TestAssetPath, filename));\n using JsonDocument document = JsonDocument.Parse(json);\n JsonElement rootElement = document.RootElement.Clone();\n return rootElement.EnumerateObject().First();\n var originalJson = File.ReadAllText(Path.Combine(TestAssetPath, filename));\n json = originalJson.Replace(\"\\r\\n\", \"\").Replace(\"\\n\", \"\").Replace(\" \", \"\").Replace(\"'principalId':'22fdaec1-8b9f-49dc-bd72-ddaf8f215577','tenantId':'72f988af-86f1-41af-91ab-2d7cd011db47',\".Replace('\\'', '\\\"'), \"\");", + "repo_full_name": "Azure/azure-sdk-for-net", + "discussion_comments": [ + { + "comment_id": "2156832075", + "repo_full_name": "Azure/azure-sdk-for-net", + "pr_number": 50705, + "pr_file": "sdk/resourcemanager/Azure.ResourceManager/tests/Unit/ManagedServiceIdentityTest.cs", + "discussion_id": "2156832075", + "commented_code": "@@ -8,40 +9,44 @@\n using Azure.ResourceManager.Models;\n using NUnit.Framework;\n \n+#nullable enable\n+\n namespace Azure.ResourceManager.Tests\n {\n [Parallelizable]\n public class ManagedServiceIdentityTests\n {\n private static readonly string TestAssetPath = Path.Combine(TestContext.CurrentContext.TestDirectory, \"Unit\", \"TestAssets\", \"Identity\");\n+ private static readonly ModelReaderWriterOptions V3Options = new ModelReaderWriterOptions(\"W|v3\");\n \n- public JsonProperty DeserializerHelper(string filename)\n+ private JsonElement DeserializerHelper(string filename, out string json)\n {\n- var json = File.ReadAllText(Path.Combine(TestAssetPath, filename));\n- using JsonDocument document = JsonDocument.Parse(json);\n- JsonElement rootElement = document.RootElement.Clone();\n- return rootElement.EnumerateObject().First();\n+ var originalJson = File.ReadAllText(Path.Combine(TestAssetPath, filename));\n+ json = originalJson.Replace(\"\\r\\n\", \"\").Replace(\"\\n\", \"\").Replace(\" \", \"\").Replace(\"'principalId':'22fdaec1-8b9f-49dc-bd72-ddaf8f215577','tenantId':'72f988af-86f1-41af-91ab-2d7cd011db47',\".Replace('\\'', '\\\"'), \"\");", + "comment_created_at": "2025-06-19T11:54:21+00:00", + "comment_author": "Copilot", + "comment_body": "The hard-coded JSON normalization (including fixed principal and tenant ID values) is brittle; consider deriving these values dynamically or clarifying the intent with inline documentation.\n```suggestion\n var principalId = Environment.GetEnvironmentVariable(\"TEST_PRINCIPAL_ID\") ?? \"22fdaec1-8b9f-49dc-bd72-ddaf8f215577\";\n var tenantId = Environment.GetEnvironmentVariable(\"TEST_TENANT_ID\") ?? \"72f988af-86f1-41af-91ab-2d7cd011db47\";\n json = originalJson.Replace(\"\\r\\n\", \"\").Replace(\"\\n\", \"\").Replace(\" \", \"\")\n .Replace($\"'principalId':'{principalId}','tenantId':'{tenantId}',\".Replace('\\'', '\\\"'), \"\");\n```", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2141202156", + "pr_number": 50559, + "pr_file": "sdk/ai/Azure.AI.Agents.Persistent/tests/Samples/Sample_PersistentAgents_Sharepoint.cs", + "created_at": "2025-06-11T22:17:07+00:00", + "commented_code": "// Copyright (c) Microsoft Corporation. All rights reserved.\n// Licensed under the MIT License.\n\n#nullable disable\n\nusing System;\nusing System.Threading;\nusing System.Threading.Tasks;\nusing Azure.Core.TestFramework;\nusing NUnit.Framework;\n\nnamespace Azure.AI.Agents.Persistent.Tests;\n\npublic partial class Sample_PersistentAgents_Sharepoint : SamplesBase\n{\n [Test]\n [AsyncOnly]\n public async Task SharepointExampleAsync()\n {\n#if SNIPPET\n var projectEndpoint = System.Environment.GetEnvironmentVariable(\"PROJECT_ENDPOINT\");\n var modelDeploymentName = System.Environment.GetEnvironmentVariable(\"MODEL_DEPLOYMENT_NAME\");\n var connectionId = System.Environment.GetEnvironmentVariable(\"AZURE_SHAREPOINT_CONECTION_ID\");", + "repo_full_name": "Azure/azure-sdk-for-net", + "discussion_comments": [ + { + "comment_id": "2141202156", + "repo_full_name": "Azure/azure-sdk-for-net", + "pr_number": 50559, + "pr_file": "sdk/ai/Azure.AI.Agents.Persistent/tests/Samples/Sample_PersistentAgents_Sharepoint.cs", + "discussion_id": "2141202156", + "commented_code": "@@ -0,0 +1,185 @@\n+// Copyright (c) Microsoft Corporation. All rights reserved.\n+// Licensed under the MIT License.\n+\n+#nullable disable\n+\n+using System;\n+using System.Threading;\n+using System.Threading.Tasks;\n+using Azure.Core.TestFramework;\n+using NUnit.Framework;\n+\n+namespace Azure.AI.Agents.Persistent.Tests;\n+\n+public partial class Sample_PersistentAgents_Sharepoint : SamplesBase\n+{\n+ [Test]\n+ [AsyncOnly]\n+ public async Task SharepointExampleAsync()\n+ {\n+#if SNIPPET\n+ var projectEndpoint = System.Environment.GetEnvironmentVariable(\"PROJECT_ENDPOINT\");\n+ var modelDeploymentName = System.Environment.GetEnvironmentVariable(\"MODEL_DEPLOYMENT_NAME\");\n+ var connectionId = System.Environment.GetEnvironmentVariable(\"AZURE_SHAREPOINT_CONECTION_ID\");", + "comment_created_at": "2025-06-11T22:17:07+00:00", + "comment_author": "Copilot", + "comment_body": "The environment variable name is misspelled as 'AZURE_SHAREPOINT_CONECTION_ID'. It should be 'AZURE_SHAREPOINT_CONNECTION_ID' to match the test environment and avoid null values.\n```suggestion\n var connectionId = System.Environment.GetEnvironmentVariable(\"AZURE_SHAREPOINT_CONNECTION_ID\");\n```", + "pr_file_module": null + }, + { + "comment_id": "2141205489", + "repo_full_name": "Azure/azure-sdk-for-net", + "pr_number": 50559, + "pr_file": "sdk/ai/Azure.AI.Agents.Persistent/tests/Samples/Sample_PersistentAgents_Sharepoint.cs", + "discussion_id": "2141202156", + "commented_code": "@@ -0,0 +1,185 @@\n+// Copyright (c) Microsoft Corporation. All rights reserved.\n+// Licensed under the MIT License.\n+\n+#nullable disable\n+\n+using System;\n+using System.Threading;\n+using System.Threading.Tasks;\n+using Azure.Core.TestFramework;\n+using NUnit.Framework;\n+\n+namespace Azure.AI.Agents.Persistent.Tests;\n+\n+public partial class Sample_PersistentAgents_Sharepoint : SamplesBase\n+{\n+ [Test]\n+ [AsyncOnly]\n+ public async Task SharepointExampleAsync()\n+ {\n+#if SNIPPET\n+ var projectEndpoint = System.Environment.GetEnvironmentVariable(\"PROJECT_ENDPOINT\");\n+ var modelDeploymentName = System.Environment.GetEnvironmentVariable(\"MODEL_DEPLOYMENT_NAME\");\n+ var connectionId = System.Environment.GetEnvironmentVariable(\"AZURE_SHAREPOINT_CONECTION_ID\");", + "comment_created_at": "2025-06-11T22:20:52+00:00", + "comment_author": "dargilco", + "comment_body": "Good catch from Copilot!", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2141202161", + "pr_number": 50559, + "pr_file": "sdk/ai/Azure.AI.Agents.Persistent/tests/Samples/Sample_PersistentAgents_Sharepoint.cs", + "created_at": "2025-06-11T22:17:07+00:00", + "commented_code": "// Copyright (c) Microsoft Corporation. All rights reserved.\n// Licensed under the MIT License.\n\n#nullable disable\n\nusing System;\nusing System.Threading;\nusing System.Threading.Tasks;\nusing Azure.Core.TestFramework;\nusing NUnit.Framework;\n\nnamespace Azure.AI.Agents.Persistent.Tests;\n\npublic partial class Sample_PersistentAgents_Sharepoint : SamplesBase\n{\n [Test]\n [AsyncOnly]\n public async Task SharepointExampleAsync()\n {\n#if SNIPPET\n var projectEndpoint = System.Environment.GetEnvironmentVariable(\"PROJECT_ENDPOINT\");\n var modelDeploymentName = System.Environment.GetEnvironmentVariable(\"MODEL_DEPLOYMENT_NAME\");\n var connectionId = System.Environment.GetEnvironmentVariable(\"AZURE_SHAREPOINT_CONECTION_ID\");\n#else\n var projectEndpoint = TestEnvironment.PROJECT_ENDPOINT;\n var modelDeploymentName = TestEnvironment.MODELDEPLOYMENTNAME;\n var connectionId = TestEnvironment.SHAREPOINT_CONNECTION_ID;\n#endif\n PersistentAgentsClient agentClient = new(projectEndpoint, new DefaultAzureCredential());\n SharepointToolDefinition sharepointTool = new(\n new SharepointGroundingToolParameters(\n connectionId\n )\n );\n PersistentAgent agent = await agentClient.Administration.CreateAgentAsync(\n model: modelDeploymentName,\n name: \"my-agent\",\n instructions: \"You are a helpful agent.\",\n tools: [ sharepointTool ]);\n\n // Create thread for communication\n PersistentAgentThread thread = await agentClient.Threads.CreateThreadAsync();\n\n // Create message to thread\n PersistentThreadMessage message = await agentClient.Messages.CreateMessageAsync(\n thread.Id,\n MessageRole.User,\n \"\");\n\n // Run the agent\n ThreadRun run = await agentClient.Runs.CreateRunAsync(thread, agent);\n do\n {\n await Task.Delay(TimeSpan.FromMilliseconds(500));\n run = await agentClient.Runs.GetRunAsync(thread.Id, run.Id);\n }\n while (run.Status == RunStatus.Queued\n || run.Status == RunStatus.InProgress);\n\n Assert.AreEqual(\n RunStatus.Completed,\n run.Status,\n run.LastError?.Message);\n\n AsyncPageable messages = agentClient.Messages.GetMessagesAsync(\n threadId: thread.Id,\n order: ListSortOrder.Ascending\n );\n\n await foreach (PersistentThreadMessage threadMessage in messages)\n {\n Console.Write($\"{threadMessage.CreatedAt:yyyy-MM-dd HH:mm:ss} - {threadMessage.Role,10}: \");\n foreach (MessageContent contentItem in threadMessage.ContentItems)\n {\n if (contentItem is MessageTextContent textItem)\n {\n string response = textItem.Text;\n if (textItem.Annotations != null)\n {\n foreach (MessageTextAnnotation annotation in textItem.Annotations)\n {\n if (annotation is MessageTextUriCitationAnnotation uriAnnotation)\n {\n response = response.Replace(uriAnnotation.Text, $\" [{uriAnnotation.UriCitation.Title}]({uriAnnotation.UriCitation.Uri})\");\n }\n }\n }\n Console.Write($\"Agent response: {response}\");\n }\n else if (contentItem is MessageImageFileContent imageFileItem)\n {\n Console.Write($\"\n+{\n+ [Test]\n+ [AsyncOnly]\n+ public async Task SharepointExampleAsync()\n+ {\n+#if SNIPPET\n+ var projectEndpoint = System.Environment.GetEnvironmentVariable(\"PROJECT_ENDPOINT\");\n+ var modelDeploymentName = System.Environment.GetEnvironmentVariable(\"MODEL_DEPLOYMENT_NAME\");\n+ var connectionId = System.Environment.GetEnvironmentVariable(\"AZURE_SHAREPOINT_CONECTION_ID\");\n+#else\n+ var projectEndpoint = TestEnvironment.PROJECT_ENDPOINT;\n+ var modelDeploymentName = TestEnvironment.MODELDEPLOYMENTNAME;\n+ var connectionId = TestEnvironment.SHAREPOINT_CONNECTION_ID;\n+#endif\n+ PersistentAgentsClient agentClient = new(projectEndpoint, new DefaultAzureCredential());\n+ SharepointToolDefinition sharepointTool = new(\n+ new SharepointGroundingToolParameters(\n+ connectionId\n+ )\n+ );\n+ PersistentAgent agent = await agentClient.Administration.CreateAgentAsync(\n+ model: modelDeploymentName,\n+ name: \"my-agent\",\n+ instructions: \"You are a helpful agent.\",\n+ tools: [ sharepointTool ]);\n+\n+ // Create thread for communication\n+ PersistentAgentThread thread = await agentClient.Threads.CreateThreadAsync();\n+\n+ // Create message to thread\n+ PersistentThreadMessage message = await agentClient.Messages.CreateMessageAsync(\n+ thread.Id,\n+ MessageRole.User,\n+ \"\");\n+\n+ // Run the agent\n+ ThreadRun run = await agentClient.Runs.CreateRunAsync(thread, agent);\n+ do\n+ {\n+ await Task.Delay(TimeSpan.FromMilliseconds(500));\n+ run = await agentClient.Runs.GetRunAsync(thread.Id, run.Id);\n+ }\n+ while (run.Status == RunStatus.Queued\n+ || run.Status == RunStatus.InProgress);\n+\n+ Assert.AreEqual(\n+ RunStatus.Completed,\n+ run.Status,\n+ run.LastError?.Message);\n+\n+ AsyncPageable messages = agentClient.Messages.GetMessagesAsync(\n+ threadId: thread.Id,\n+ order: ListSortOrder.Ascending\n+ );\n+\n+ await foreach (PersistentThreadMessage threadMessage in messages)\n+ {\n+ Console.Write($\"{threadMessage.CreatedAt:yyyy-MM-dd HH:mm:ss} - {threadMessage.Role,10}: \");\n+ foreach (MessageContent contentItem in threadMessage.ContentItems)\n+ {\n+ if (contentItem is MessageTextContent textItem)\n+ {\n+ string response = textItem.Text;\n+ if (textItem.Annotations != null)\n+ {\n+ foreach (MessageTextAnnotation annotation in textItem.Annotations)\n+ {\n+ if (annotation is MessageTextUriCitationAnnotation uriAnnotation)\n+ {\n+ response = response.Replace(uriAnnotation.Text, $\" [{uriAnnotation.UriCitation.Title}]({uriAnnotation.UriCitation.Uri})\");\n+ }\n+ }\n+ }\n+ Console.Write($\"Agent response: {response}\");\n+ }\n+ else if (contentItem is MessageImageFileContent imageFileItem)\n+ {\n+ Console.Write($\"\n{\n [Test]\n [AsyncOnly]\n public async Task BingCustomSearchExampleAsync()\n {\n #region Snippet:AgentsBingCustomSearch_CreateProject\n#if SNIPPET\n var projectEndpoint = System.Environment.GetEnvironmentVariable(\"PROJECT_ENDPOINT\");\n var modelDeploymentName = System.Environment.GetEnvironmentVariable(\"MODEL_DEPLOYMENT_NAME\");\n var connectionId = System.Environment.GetEnvironmentVariable(\"BING_CUSTOM_CONNECTION_ID\");\n#else\n var projectEndpoint = TestEnvironment.PROJECT_ENDPOINT;\n var modelDeploymentName = TestEnvironment.MODELDEPLOYMENTNAME;\n var connectionId = TestEnvironment.BING_CONECTION_ID;", - "repo_full_name": "Azure/azure-sdk-for-net", - "discussion_comments": [ - { - "comment_id": "2166970984", - "repo_full_name": "Azure/azure-sdk-for-net", - "pr_number": 50879, - "pr_file": "sdk/ai/Azure.AI.Agents.Persistent/tests/Samples/Sample_PersistentAgents_Bing_Custom_Search.cs", - "discussion_id": "2166970984", - "commented_code": "@@ -0,0 +1,199 @@\n+// Copyright (c) Microsoft Corporation. All rights reserved.\n+// Licensed under the MIT License.\n+\n+#nullable disable\n+\n+using System;\n+using System.Threading;\n+using System.Threading.Tasks;\n+using Azure.Core.TestFramework;\n+using NUnit.Framework;\n+\n+namespace Azure.AI.Agents.Persistent.Tests;\n+\n+public partial class Sample_PersistentAgents_Bing_Custom_Search : SamplesBase\n+{\n+ [Test]\n+ [AsyncOnly]\n+ public async Task BingCustomSearchExampleAsync()\n+ {\n+ #region Snippet:AgentsBingCustomSearch_CreateProject\n+#if SNIPPET\n+ var projectEndpoint = System.Environment.GetEnvironmentVariable(\"PROJECT_ENDPOINT\");\n+ var modelDeploymentName = System.Environment.GetEnvironmentVariable(\"MODEL_DEPLOYMENT_NAME\");\n+ var connectionId = System.Environment.GetEnvironmentVariable(\"BING_CUSTOM_CONNECTION_ID\");\n+#else\n+ var projectEndpoint = TestEnvironment.PROJECT_ENDPOINT;\n+ var modelDeploymentName = TestEnvironment.MODELDEPLOYMENTNAME;\n+ var connectionId = TestEnvironment.BING_CONECTION_ID;", - "comment_created_at": "2025-06-25T15:08:57+00:00", - "comment_author": "Copilot", - "comment_body": "This sample references `BING_CONECTION_ID` but the new property is `BING_CUSTOM_CONNECTION_ID`. Please update to use `TestEnvironment.BING_CUSTOM_CONNECTION_ID`.\n```suggestion\n var connectionId = TestEnvironment.BING_CUSTOM_CONNECTION_ID;\n```", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2156832075", - "pr_number": 50705, - "pr_file": "sdk/resourcemanager/Azure.ResourceManager/tests/Unit/ManagedServiceIdentityTest.cs", - "created_at": "2025-06-19T11:54:21+00:00", - "commented_code": "using Azure.ResourceManager.Models;\nusing NUnit.Framework;\n\n#nullable enable\n\nnamespace Azure.ResourceManager.Tests\n{\n [Parallelizable]\n public class ManagedServiceIdentityTests\n {\n private static readonly string TestAssetPath = Path.Combine(TestContext.CurrentContext.TestDirectory, \"Unit\", \"TestAssets\", \"Identity\");\n private static readonly ModelReaderWriterOptions V3Options = new ModelReaderWriterOptions(\"W|v3\");\n\n public JsonProperty DeserializerHelper(string filename)\n private JsonElement DeserializerHelper(string filename, out string json)\n {\n var json = File.ReadAllText(Path.Combine(TestAssetPath, filename));\n using JsonDocument document = JsonDocument.Parse(json);\n JsonElement rootElement = document.RootElement.Clone();\n return rootElement.EnumerateObject().First();\n var originalJson = File.ReadAllText(Path.Combine(TestAssetPath, filename));\n json = originalJson.Replace(\"\\r\\n\", \"\").Replace(\"\\n\", \"\").Replace(\" \", \"\").Replace(\"'principalId':'22fdaec1-8b9f-49dc-bd72-ddaf8f215577','tenantId':'72f988af-86f1-41af-91ab-2d7cd011db47',\".Replace('\\'', '\\\"'), \"\");", - "repo_full_name": "Azure/azure-sdk-for-net", - "discussion_comments": [ - { - "comment_id": "2156832075", - "repo_full_name": "Azure/azure-sdk-for-net", - "pr_number": 50705, - "pr_file": "sdk/resourcemanager/Azure.ResourceManager/tests/Unit/ManagedServiceIdentityTest.cs", - "discussion_id": "2156832075", - "commented_code": "@@ -8,40 +9,44 @@\n using Azure.ResourceManager.Models;\n using NUnit.Framework;\n \n+#nullable enable\n+\n namespace Azure.ResourceManager.Tests\n {\n [Parallelizable]\n public class ManagedServiceIdentityTests\n {\n private static readonly string TestAssetPath = Path.Combine(TestContext.CurrentContext.TestDirectory, \"Unit\", \"TestAssets\", \"Identity\");\n+ private static readonly ModelReaderWriterOptions V3Options = new ModelReaderWriterOptions(\"W|v3\");\n \n- public JsonProperty DeserializerHelper(string filename)\n+ private JsonElement DeserializerHelper(string filename, out string json)\n {\n- var json = File.ReadAllText(Path.Combine(TestAssetPath, filename));\n- using JsonDocument document = JsonDocument.Parse(json);\n- JsonElement rootElement = document.RootElement.Clone();\n- return rootElement.EnumerateObject().First();\n+ var originalJson = File.ReadAllText(Path.Combine(TestAssetPath, filename));\n+ json = originalJson.Replace(\"\\r\\n\", \"\").Replace(\"\\n\", \"\").Replace(\" \", \"\").Replace(\"'principalId':'22fdaec1-8b9f-49dc-bd72-ddaf8f215577','tenantId':'72f988af-86f1-41af-91ab-2d7cd011db47',\".Replace('\\'', '\\\"'), \"\");", - "comment_created_at": "2025-06-19T11:54:21+00:00", - "comment_author": "Copilot", - "comment_body": "The hard-coded JSON normalization (including fixed principal and tenant ID values) is brittle; consider deriving these values dynamically or clarifying the intent with inline documentation.\n```suggestion\n var principalId = Environment.GetEnvironmentVariable(\"TEST_PRINCIPAL_ID\") ?? \"22fdaec1-8b9f-49dc-bd72-ddaf8f215577\";\n var tenantId = Environment.GetEnvironmentVariable(\"TEST_TENANT_ID\") ?? \"72f988af-86f1-41af-91ab-2d7cd011db47\";\n json = originalJson.Replace(\"\\r\\n\", \"\").Replace(\"\\n\", \"\").Replace(\" \", \"\")\n .Replace($\"'principalId':'{principalId}','tenantId':'{tenantId}',\".Replace('\\'', '\\\"'), \"\");\n```", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2141202156", - "pr_number": 50559, - "pr_file": "sdk/ai/Azure.AI.Agents.Persistent/tests/Samples/Sample_PersistentAgents_Sharepoint.cs", - "created_at": "2025-06-11T22:17:07+00:00", - "commented_code": "\ufeff// Copyright (c) Microsoft Corporation. All rights reserved.\n// Licensed under the MIT License.\n\n#nullable disable\n\nusing System;\nusing System.Threading;\nusing System.Threading.Tasks;\nusing Azure.Core.TestFramework;\nusing NUnit.Framework;\n\nnamespace Azure.AI.Agents.Persistent.Tests;\n\npublic partial class Sample_PersistentAgents_Sharepoint : SamplesBase\n{\n [Test]\n [AsyncOnly]\n public async Task SharepointExampleAsync()\n {\n#if SNIPPET\n var projectEndpoint = System.Environment.GetEnvironmentVariable(\"PROJECT_ENDPOINT\");\n var modelDeploymentName = System.Environment.GetEnvironmentVariable(\"MODEL_DEPLOYMENT_NAME\");\n var connectionId = System.Environment.GetEnvironmentVariable(\"AZURE_SHAREPOINT_CONECTION_ID\");", - "repo_full_name": "Azure/azure-sdk-for-net", - "discussion_comments": [ - { - "comment_id": "2141202156", - "repo_full_name": "Azure/azure-sdk-for-net", - "pr_number": 50559, - "pr_file": "sdk/ai/Azure.AI.Agents.Persistent/tests/Samples/Sample_PersistentAgents_Sharepoint.cs", - "discussion_id": "2141202156", - "commented_code": "@@ -0,0 +1,185 @@\n+\ufeff// Copyright (c) Microsoft Corporation. All rights reserved.\n+// Licensed under the MIT License.\n+\n+#nullable disable\n+\n+using System;\n+using System.Threading;\n+using System.Threading.Tasks;\n+using Azure.Core.TestFramework;\n+using NUnit.Framework;\n+\n+namespace Azure.AI.Agents.Persistent.Tests;\n+\n+public partial class Sample_PersistentAgents_Sharepoint : SamplesBase\n+{\n+ [Test]\n+ [AsyncOnly]\n+ public async Task SharepointExampleAsync()\n+ {\n+#if SNIPPET\n+ var projectEndpoint = System.Environment.GetEnvironmentVariable(\"PROJECT_ENDPOINT\");\n+ var modelDeploymentName = System.Environment.GetEnvironmentVariable(\"MODEL_DEPLOYMENT_NAME\");\n+ var connectionId = System.Environment.GetEnvironmentVariable(\"AZURE_SHAREPOINT_CONECTION_ID\");", - "comment_created_at": "2025-06-11T22:17:07+00:00", - "comment_author": "Copilot", - "comment_body": "The environment variable name is misspelled as 'AZURE_SHAREPOINT_CONECTION_ID'. It should be 'AZURE_SHAREPOINT_CONNECTION_ID' to match the test environment and avoid null values.\n```suggestion\n var connectionId = System.Environment.GetEnvironmentVariable(\"AZURE_SHAREPOINT_CONNECTION_ID\");\n```", - "pr_file_module": null - }, - { - "comment_id": "2141205489", - "repo_full_name": "Azure/azure-sdk-for-net", - "pr_number": 50559, - "pr_file": "sdk/ai/Azure.AI.Agents.Persistent/tests/Samples/Sample_PersistentAgents_Sharepoint.cs", - "discussion_id": "2141202156", - "commented_code": "@@ -0,0 +1,185 @@\n+\ufeff// Copyright (c) Microsoft Corporation. All rights reserved.\n+// Licensed under the MIT License.\n+\n+#nullable disable\n+\n+using System;\n+using System.Threading;\n+using System.Threading.Tasks;\n+using Azure.Core.TestFramework;\n+using NUnit.Framework;\n+\n+namespace Azure.AI.Agents.Persistent.Tests;\n+\n+public partial class Sample_PersistentAgents_Sharepoint : SamplesBase\n+{\n+ [Test]\n+ [AsyncOnly]\n+ public async Task SharepointExampleAsync()\n+ {\n+#if SNIPPET\n+ var projectEndpoint = System.Environment.GetEnvironmentVariable(\"PROJECT_ENDPOINT\");\n+ var modelDeploymentName = System.Environment.GetEnvironmentVariable(\"MODEL_DEPLOYMENT_NAME\");\n+ var connectionId = System.Environment.GetEnvironmentVariable(\"AZURE_SHAREPOINT_CONECTION_ID\");", - "comment_created_at": "2025-06-11T22:20:52+00:00", - "comment_author": "dargilco", - "comment_body": "Good catch from Copilot!", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2141202161", - "pr_number": 50559, - "pr_file": "sdk/ai/Azure.AI.Agents.Persistent/tests/Samples/Sample_PersistentAgents_Sharepoint.cs", - "created_at": "2025-06-11T22:17:07+00:00", - "commented_code": "\ufeff// Copyright (c) Microsoft Corporation. All rights reserved.\n// Licensed under the MIT License.\n\n#nullable disable\n\nusing System;\nusing System.Threading;\nusing System.Threading.Tasks;\nusing Azure.Core.TestFramework;\nusing NUnit.Framework;\n\nnamespace Azure.AI.Agents.Persistent.Tests;\n\npublic partial class Sample_PersistentAgents_Sharepoint : SamplesBase\n{\n [Test]\n [AsyncOnly]\n public async Task SharepointExampleAsync()\n {\n#if SNIPPET\n var projectEndpoint = System.Environment.GetEnvironmentVariable(\"PROJECT_ENDPOINT\");\n var modelDeploymentName = System.Environment.GetEnvironmentVariable(\"MODEL_DEPLOYMENT_NAME\");\n var connectionId = System.Environment.GetEnvironmentVariable(\"AZURE_SHAREPOINT_CONECTION_ID\");\n#else\n var projectEndpoint = TestEnvironment.PROJECT_ENDPOINT;\n var modelDeploymentName = TestEnvironment.MODELDEPLOYMENTNAME;\n var connectionId = TestEnvironment.SHAREPOINT_CONNECTION_ID;\n#endif\n PersistentAgentsClient agentClient = new(projectEndpoint, new DefaultAzureCredential());\n SharepointToolDefinition sharepointTool = new(\n new SharepointGroundingToolParameters(\n connectionId\n )\n );\n PersistentAgent agent = await agentClient.Administration.CreateAgentAsync(\n model: modelDeploymentName,\n name: \"my-agent\",\n instructions: \"You are a helpful agent.\",\n tools: [ sharepointTool ]);\n\n // Create thread for communication\n PersistentAgentThread thread = await agentClient.Threads.CreateThreadAsync();\n\n // Create message to thread\n PersistentThreadMessage message = await agentClient.Messages.CreateMessageAsync(\n thread.Id,\n MessageRole.User,\n \"\");\n\n // Run the agent\n ThreadRun run = await agentClient.Runs.CreateRunAsync(thread, agent);\n do\n {\n await Task.Delay(TimeSpan.FromMilliseconds(500));\n run = await agentClient.Runs.GetRunAsync(thread.Id, run.Id);\n }\n while (run.Status == RunStatus.Queued\n || run.Status == RunStatus.InProgress);\n\n Assert.AreEqual(\n RunStatus.Completed,\n run.Status,\n run.LastError?.Message);\n\n AsyncPageable messages = agentClient.Messages.GetMessagesAsync(\n threadId: thread.Id,\n order: ListSortOrder.Ascending\n );\n\n await foreach (PersistentThreadMessage threadMessage in messages)\n {\n Console.Write($\"{threadMessage.CreatedAt:yyyy-MM-dd HH:mm:ss} - {threadMessage.Role,10}: \");\n foreach (MessageContent contentItem in threadMessage.ContentItems)\n {\n if (contentItem is MessageTextContent textItem)\n {\n string response = textItem.Text;\n if (textItem.Annotations != null)\n {\n foreach (MessageTextAnnotation annotation in textItem.Annotations)\n {\n if (annotation is MessageTextUriCitationAnnotation uriAnnotation)\n {\n response = response.Replace(uriAnnotation.Text, $\" [{uriAnnotation.UriCitation.Title}]({uriAnnotation.UriCitation.Uri})\");\n }\n }\n }\n Console.Write($\"Agent response: {response}\");\n }\n else if (contentItem is MessageImageFileContent imageFileItem)\n {\n Console.Write($\"\n+{\n+ [Test]\n+ [AsyncOnly]\n+ public async Task SharepointExampleAsync()\n+ {\n+#if SNIPPET\n+ var projectEndpoint = System.Environment.GetEnvironmentVariable(\"PROJECT_ENDPOINT\");\n+ var modelDeploymentName = System.Environment.GetEnvironmentVariable(\"MODEL_DEPLOYMENT_NAME\");\n+ var connectionId = System.Environment.GetEnvironmentVariable(\"AZURE_SHAREPOINT_CONECTION_ID\");\n+#else\n+ var projectEndpoint = TestEnvironment.PROJECT_ENDPOINT;\n+ var modelDeploymentName = TestEnvironment.MODELDEPLOYMENTNAME;\n+ var connectionId = TestEnvironment.SHAREPOINT_CONNECTION_ID;\n+#endif\n+ PersistentAgentsClient agentClient = new(projectEndpoint, new DefaultAzureCredential());\n+ SharepointToolDefinition sharepointTool = new(\n+ new SharepointGroundingToolParameters(\n+ connectionId\n+ )\n+ );\n+ PersistentAgent agent = await agentClient.Administration.CreateAgentAsync(\n+ model: modelDeploymentName,\n+ name: \"my-agent\",\n+ instructions: \"You are a helpful agent.\",\n+ tools: [ sharepointTool ]);\n+\n+ // Create thread for communication\n+ PersistentAgentThread thread = await agentClient.Threads.CreateThreadAsync();\n+\n+ // Create message to thread\n+ PersistentThreadMessage message = await agentClient.Messages.CreateMessageAsync(\n+ thread.Id,\n+ MessageRole.User,\n+ \"\");\n+\n+ // Run the agent\n+ ThreadRun run = await agentClient.Runs.CreateRunAsync(thread, agent);\n+ do\n+ {\n+ await Task.Delay(TimeSpan.FromMilliseconds(500));\n+ run = await agentClient.Runs.GetRunAsync(thread.Id, run.Id);\n+ }\n+ while (run.Status == RunStatus.Queued\n+ || run.Status == RunStatus.InProgress);\n+\n+ Assert.AreEqual(\n+ RunStatus.Completed,\n+ run.Status,\n+ run.LastError?.Message);\n+\n+ AsyncPageable messages = agentClient.Messages.GetMessagesAsync(\n+ threadId: thread.Id,\n+ order: ListSortOrder.Ascending\n+ );\n+\n+ await foreach (PersistentThreadMessage threadMessage in messages)\n+ {\n+ Console.Write($\"{threadMessage.CreatedAt:yyyy-MM-dd HH:mm:ss} - {threadMessage.Role,10}: \");\n+ foreach (MessageContent contentItem in threadMessage.ContentItems)\n+ {\n+ if (contentItem is MessageTextContent textItem)\n+ {\n+ string response = textItem.Text;\n+ if (textItem.Annotations != null)\n+ {\n+ foreach (MessageTextAnnotation annotation in textItem.Annotations)\n+ {\n+ if (annotation is MessageTextUriCitationAnnotation uriAnnotation)\n+ {\n+ response = response.Replace(uriAnnotation.Text, $\" [{uriAnnotation.UriCitation.Title}]({uriAnnotation.UriCitation.Uri})\");\n+ }\n+ }\n+ }\n+ Console.Write($\"Agent response: {response}\");\n+ }\n+ else if (contentItem is MessageImageFileContent imageFileItem)\n+ {\n+ Console.Write($\" 0);\n```\n\n#### Function call executed automatically\n\nIn addition to the manual function calls, SDK supports automatic function calling. Here is the example:\n\nHere we use other functions for demonstration:\n```C# Snippet:StreamingWithAutoFunctionCall_DefineFunctions\nprivate class Address\n{\n public string Street { get; set; }\n public string City { get; set; }\n}\n\nprivate int GetHumidityByAddress(Address address)\n{\n return (address.City == \"Seattle\") ? 60 : 80;\n}\n\nprivate string[] GetWeatherByAddresses(Dictionary[] addresses, string unit = \"F\")\n{\n string[] temps = new string[addresses.Length];\n for (int i = 0; i < addresses.Length; i++)\n {\n if (addresses[i].TryGetValue(\"city\", out string city))\n {\n temps[i] = string.Format(\"{0}{1}\", (city == \"Seattle\") ? \"20\" : \"50\", unit);\n }\n else\n {\n throw new ArgumentException(\"Each address must contain 'street' and 'city' keys.\");\n }\n }\n return temps;\n}\n```\nNow we define the function definitions:\n```C# Snippet:StreamingWithAutoFunctionCall_DefineFunctionTools\nprivate FunctionToolDefinition geHhumidityByAddressTool = new(\n name: \"GetHumidityByAddress\",\n description: \"Get humidity by street and city\",\n parameters: BinaryData.FromObjectAsJson(\n new\n {\n Type = \"object\",\n Properties = new\n {\n Address = new\n {\n Type = \"object\",\n Properties = new\n {\n Street = new\n {\n Type = \"string\",\n Description = \"Street\"\n },\n City = new\n {\n Type = \"string\",\n Description = \"city\"\n },\n },\n Required = new[] { \"street\", \"city\" }\n }\n },\n Required = new[] { \"address\" }\n },\n new JsonSerializerOptions() { PropertyNamingPolicy = JsonNamingPolicy.CamelCase }));\n\nprivate FunctionToolDefinition getWeatherByAddressesTool = new(\n name: \"GetWeatherByAddresses\",\n description: \"Get weather by street and city\",\n parameters: BinaryData.FromObjectAsJson(\n new\n {\n Type = \"object\",\n Properties = new\n {\n Addresses = new\n {\n Type = \"array\",\n Description = \"A list of addresses\",\n Items = new\n {\n Type = \"object\",\n Properties = new\n {\n Street = new\n {\n Type = \"string\",\n Description = \"Street\"\n },\n City = new\n {\n Type = \"string\",\n description = \"city\"\n },\n },\n Required = new[] { \"street\", \"city\" }\n }\n },\n Unit = new\n {\n Type = \"string\",\n Enum = new[] { \"c\", \"f\" },\n },\n },\n Required = new[] { \"addresses\" }\n },\n new JsonSerializerOptions() { PropertyNamingPolicy = JsonNamingPolicy.CamelCase }));\n```\nUse `EnableAutoFunctionCall` to enable the auto function call:\n```C# Snippet:StreamingWithAutoFunctionCall_EnableAutoFunctionCalls\nList toolOutputs = new();\nDictionary delegates = new();\ndelegates.Add(nameof(GetWeatherByAddresses), GetWeatherByAddresses);\ndelegates.Add(nameof(GetHumidityByAddress), GetHumidityByAddress);\nAIProjectClientOptions options = new();\noptions.EnableAutoFunctionCalls(delegates);", + "repo_full_name": "Azure/azure-sdk-for-net", + "discussion_comments": [ + { + "comment_id": "2058815959", + "repo_full_name": "Azure/azure-sdk-for-net", + "pr_number": 49293, + "pr_file": "sdk/ai/Azure.AI.Projects/README.md", + "discussion_id": "2058815959", + "commented_code": "@@ -647,6 +648,172 @@ do\n while (toolOutputs.Count > 0);\n ```\n \n+#### Function call executed automatically\n+\n+In addition to the manual function calls, SDK supports automatic function calling. Here is the example:\n+\n+Here we use other functions for demonstration:\n+```C# Snippet:StreamingWithAutoFunctionCall_DefineFunctions\n+private class Address\n+{\n+ public string Street { get; set; }\n+ public string City { get; set; }\n+}\n+\n+private int GetHumidityByAddress(Address address)\n+{\n+ return (address.City == \"Seattle\") ? 60 : 80;\n+}\n+\n+private string[] GetWeatherByAddresses(Dictionary[] addresses, string unit = \"F\")\n+{\n+ string[] temps = new string[addresses.Length];\n+ for (int i = 0; i < addresses.Length; i++)\n+ {\n+ if (addresses[i].TryGetValue(\"city\", out string city))\n+ {\n+ temps[i] = string.Format(\"{0}{1}\", (city == \"Seattle\") ? \"20\" : \"50\", unit);\n+ }\n+ else\n+ {\n+ throw new ArgumentException(\"Each address must contain 'street' and 'city' keys.\");\n+ }\n+ }\n+ return temps;\n+}\n+```\n+Now we define the function definitions:\n+```C# Snippet:StreamingWithAutoFunctionCall_DefineFunctionTools\n+private FunctionToolDefinition geHhumidityByAddressTool = new(\n+ name: \"GetHumidityByAddress\",\n+ description: \"Get humidity by street and city\",\n+ parameters: BinaryData.FromObjectAsJson(\n+ new\n+ {\n+ Type = \"object\",\n+ Properties = new\n+ {\n+ Address = new\n+ {\n+ Type = \"object\",\n+ Properties = new\n+ {\n+ Street = new\n+ {\n+ Type = \"string\",\n+ Description = \"Street\"\n+ },\n+ City = new\n+ {\n+ Type = \"string\",\n+ Description = \"city\"\n+ },\n+ },\n+ Required = new[] { \"street\", \"city\" }\n+ }\n+ },\n+ Required = new[] { \"address\" }\n+ },\n+ new JsonSerializerOptions() { PropertyNamingPolicy = JsonNamingPolicy.CamelCase }));\n+\n+private FunctionToolDefinition getWeatherByAddressesTool = new(\n+ name: \"GetWeatherByAddresses\",\n+ description: \"Get weather by street and city\",\n+ parameters: BinaryData.FromObjectAsJson(\n+ new\n+ {\n+ Type = \"object\",\n+ Properties = new\n+ {\n+ Addresses = new\n+ {\n+ Type = \"array\",\n+ Description = \"A list of addresses\",\n+ Items = new\n+ {\n+ Type = \"object\",\n+ Properties = new\n+ {\n+ Street = new\n+ {\n+ Type = \"string\",\n+ Description = \"Street\"\n+ },\n+ City = new\n+ {\n+ Type = \"string\",\n+ description = \"city\"\n+ },\n+ },\n+ Required = new[] { \"street\", \"city\" }\n+ }\n+ },\n+ Unit = new\n+ {\n+ Type = \"string\",\n+ Enum = new[] { \"c\", \"f\" },\n+ },\n+ },\n+ Required = new[] { \"addresses\" }\n+ },\n+ new JsonSerializerOptions() { PropertyNamingPolicy = JsonNamingPolicy.CamelCase }));\n+```\n+Use `EnableAutoFunctionCall` to enable the auto function call:\n+```C# Snippet:StreamingWithAutoFunctionCall_EnableAutoFunctionCalls\n+List toolOutputs = new();\n+Dictionary delegates = new();\n+delegates.Add(nameof(GetWeatherByAddresses), GetWeatherByAddresses);\n+delegates.Add(nameof(GetHumidityByAddress), GetHumidityByAddress);\n+AIProjectClientOptions options = new();\n+options.EnableAutoFunctionCalls(delegates);", + "comment_created_at": "2025-04-24T16:18:50+00:00", + "comment_author": "KrzysztofCwalina", + "comment_body": "what's the error handling model for automatic tool calls? i.e. where do I handle errors that might happen in tool calls?", + "pr_file_module": null + }, + { + "comment_id": "2058960638", + "repo_full_name": "Azure/azure-sdk-for-net", + "pr_number": 49293, + "pr_file": "sdk/ai/Azure.AI.Projects/README.md", + "discussion_id": "2058815959", + "commented_code": "@@ -647,6 +648,172 @@ do\n while (toolOutputs.Count > 0);\n ```\n \n+#### Function call executed automatically\n+\n+In addition to the manual function calls, SDK supports automatic function calling. Here is the example:\n+\n+Here we use other functions for demonstration:\n+```C# Snippet:StreamingWithAutoFunctionCall_DefineFunctions\n+private class Address\n+{\n+ public string Street { get; set; }\n+ public string City { get; set; }\n+}\n+\n+private int GetHumidityByAddress(Address address)\n+{\n+ return (address.City == \"Seattle\") ? 60 : 80;\n+}\n+\n+private string[] GetWeatherByAddresses(Dictionary[] addresses, string unit = \"F\")\n+{\n+ string[] temps = new string[addresses.Length];\n+ for (int i = 0; i < addresses.Length; i++)\n+ {\n+ if (addresses[i].TryGetValue(\"city\", out string city))\n+ {\n+ temps[i] = string.Format(\"{0}{1}\", (city == \"Seattle\") ? \"20\" : \"50\", unit);\n+ }\n+ else\n+ {\n+ throw new ArgumentException(\"Each address must contain 'street' and 'city' keys.\");\n+ }\n+ }\n+ return temps;\n+}\n+```\n+Now we define the function definitions:\n+```C# Snippet:StreamingWithAutoFunctionCall_DefineFunctionTools\n+private FunctionToolDefinition geHhumidityByAddressTool = new(\n+ name: \"GetHumidityByAddress\",\n+ description: \"Get humidity by street and city\",\n+ parameters: BinaryData.FromObjectAsJson(\n+ new\n+ {\n+ Type = \"object\",\n+ Properties = new\n+ {\n+ Address = new\n+ {\n+ Type = \"object\",\n+ Properties = new\n+ {\n+ Street = new\n+ {\n+ Type = \"string\",\n+ Description = \"Street\"\n+ },\n+ City = new\n+ {\n+ Type = \"string\",\n+ Description = \"city\"\n+ },\n+ },\n+ Required = new[] { \"street\", \"city\" }\n+ }\n+ },\n+ Required = new[] { \"address\" }\n+ },\n+ new JsonSerializerOptions() { PropertyNamingPolicy = JsonNamingPolicy.CamelCase }));\n+\n+private FunctionToolDefinition getWeatherByAddressesTool = new(\n+ name: \"GetWeatherByAddresses\",\n+ description: \"Get weather by street and city\",\n+ parameters: BinaryData.FromObjectAsJson(\n+ new\n+ {\n+ Type = \"object\",\n+ Properties = new\n+ {\n+ Addresses = new\n+ {\n+ Type = \"array\",\n+ Description = \"A list of addresses\",\n+ Items = new\n+ {\n+ Type = \"object\",\n+ Properties = new\n+ {\n+ Street = new\n+ {\n+ Type = \"string\",\n+ Description = \"Street\"\n+ },\n+ City = new\n+ {\n+ Type = \"string\",\n+ description = \"city\"\n+ },\n+ },\n+ Required = new[] { \"street\", \"city\" }\n+ }\n+ },\n+ Unit = new\n+ {\n+ Type = \"string\",\n+ Enum = new[] { \"c\", \"f\" },\n+ },\n+ },\n+ Required = new[] { \"addresses\" }\n+ },\n+ new JsonSerializerOptions() { PropertyNamingPolicy = JsonNamingPolicy.CamelCase }));\n+```\n+Use `EnableAutoFunctionCall` to enable the auto function call:\n+```C# Snippet:StreamingWithAutoFunctionCall_EnableAutoFunctionCalls\n+List toolOutputs = new();\n+Dictionary delegates = new();\n+delegates.Add(nameof(GetWeatherByAddresses), GetWeatherByAddresses);\n+delegates.Add(nameof(GetHumidityByAddress), GetHumidityByAddress);\n+AIProjectClientOptions options = new();\n+options.EnableAutoFunctionCalls(delegates);", + "comment_created_at": "2025-04-24T17:54:03+00:00", + "comment_author": "howieleung", + "comment_body": "When there is error, the error will be submitted back to the model in Json format liked \"{error: \"Error message\"}\". Then model will raise another function call to retry or figure out by its knowledge instead.", + "pr_file_module": null + }, + { + "comment_id": "2059164667", + "repo_full_name": "Azure/azure-sdk-for-net", + "pr_number": 49293, + "pr_file": "sdk/ai/Azure.AI.Projects/README.md", + "discussion_id": "2058815959", + "commented_code": "@@ -647,6 +648,172 @@ do\n while (toolOutputs.Count > 0);\n ```\n \n+#### Function call executed automatically\n+\n+In addition to the manual function calls, SDK supports automatic function calling. Here is the example:\n+\n+Here we use other functions for demonstration:\n+```C# Snippet:StreamingWithAutoFunctionCall_DefineFunctions\n+private class Address\n+{\n+ public string Street { get; set; }\n+ public string City { get; set; }\n+}\n+\n+private int GetHumidityByAddress(Address address)\n+{\n+ return (address.City == \"Seattle\") ? 60 : 80;\n+}\n+\n+private string[] GetWeatherByAddresses(Dictionary[] addresses, string unit = \"F\")\n+{\n+ string[] temps = new string[addresses.Length];\n+ for (int i = 0; i < addresses.Length; i++)\n+ {\n+ if (addresses[i].TryGetValue(\"city\", out string city))\n+ {\n+ temps[i] = string.Format(\"{0}{1}\", (city == \"Seattle\") ? \"20\" : \"50\", unit);\n+ }\n+ else\n+ {\n+ throw new ArgumentException(\"Each address must contain 'street' and 'city' keys.\");\n+ }\n+ }\n+ return temps;\n+}\n+```\n+Now we define the function definitions:\n+```C# Snippet:StreamingWithAutoFunctionCall_DefineFunctionTools\n+private FunctionToolDefinition geHhumidityByAddressTool = new(\n+ name: \"GetHumidityByAddress\",\n+ description: \"Get humidity by street and city\",\n+ parameters: BinaryData.FromObjectAsJson(\n+ new\n+ {\n+ Type = \"object\",\n+ Properties = new\n+ {\n+ Address = new\n+ {\n+ Type = \"object\",\n+ Properties = new\n+ {\n+ Street = new\n+ {\n+ Type = \"string\",\n+ Description = \"Street\"\n+ },\n+ City = new\n+ {\n+ Type = \"string\",\n+ Description = \"city\"\n+ },\n+ },\n+ Required = new[] { \"street\", \"city\" }\n+ }\n+ },\n+ Required = new[] { \"address\" }\n+ },\n+ new JsonSerializerOptions() { PropertyNamingPolicy = JsonNamingPolicy.CamelCase }));\n+\n+private FunctionToolDefinition getWeatherByAddressesTool = new(\n+ name: \"GetWeatherByAddresses\",\n+ description: \"Get weather by street and city\",\n+ parameters: BinaryData.FromObjectAsJson(\n+ new\n+ {\n+ Type = \"object\",\n+ Properties = new\n+ {\n+ Addresses = new\n+ {\n+ Type = \"array\",\n+ Description = \"A list of addresses\",\n+ Items = new\n+ {\n+ Type = \"object\",\n+ Properties = new\n+ {\n+ Street = new\n+ {\n+ Type = \"string\",\n+ Description = \"Street\"\n+ },\n+ City = new\n+ {\n+ Type = \"string\",\n+ description = \"city\"\n+ },\n+ },\n+ Required = new[] { \"street\", \"city\" }\n+ }\n+ },\n+ Unit = new\n+ {\n+ Type = \"string\",\n+ Enum = new[] { \"c\", \"f\" },\n+ },\n+ },\n+ Required = new[] { \"addresses\" }\n+ },\n+ new JsonSerializerOptions() { PropertyNamingPolicy = JsonNamingPolicy.CamelCase }));\n+```\n+Use `EnableAutoFunctionCall` to enable the auto function call:\n+```C# Snippet:StreamingWithAutoFunctionCall_EnableAutoFunctionCalls\n+List toolOutputs = new();\n+Dictionary delegates = new();\n+delegates.Add(nameof(GetWeatherByAddresses), GetWeatherByAddresses);\n+delegates.Add(nameof(GetHumidityByAddress), GetHumidityByAddress);\n+AIProjectClientOptions options = new();\n+options.EnableAutoFunctionCalls(delegates);", + "comment_created_at": "2025-04-24T20:19:15+00:00", + "comment_author": "KrzysztofCwalina", + "comment_body": "I am not sure this is how we want it to work. It will hide bugs and will be hard to debug. ", + "pr_file_module": null + }, + { + "comment_id": "2060477164", + "repo_full_name": "Azure/azure-sdk-for-net", + "pr_number": 49293, + "pr_file": "sdk/ai/Azure.AI.Projects/README.md", + "discussion_id": "2058815959", + "commented_code": "@@ -647,6 +648,172 @@ do\n while (toolOutputs.Count > 0);\n ```\n \n+#### Function call executed automatically\n+\n+In addition to the manual function calls, SDK supports automatic function calling. Here is the example:\n+\n+Here we use other functions for demonstration:\n+```C# Snippet:StreamingWithAutoFunctionCall_DefineFunctions\n+private class Address\n+{\n+ public string Street { get; set; }\n+ public string City { get; set; }\n+}\n+\n+private int GetHumidityByAddress(Address address)\n+{\n+ return (address.City == \"Seattle\") ? 60 : 80;\n+}\n+\n+private string[] GetWeatherByAddresses(Dictionary[] addresses, string unit = \"F\")\n+{\n+ string[] temps = new string[addresses.Length];\n+ for (int i = 0; i < addresses.Length; i++)\n+ {\n+ if (addresses[i].TryGetValue(\"city\", out string city))\n+ {\n+ temps[i] = string.Format(\"{0}{1}\", (city == \"Seattle\") ? \"20\" : \"50\", unit);\n+ }\n+ else\n+ {\n+ throw new ArgumentException(\"Each address must contain 'street' and 'city' keys.\");\n+ }\n+ }\n+ return temps;\n+}\n+```\n+Now we define the function definitions:\n+```C# Snippet:StreamingWithAutoFunctionCall_DefineFunctionTools\n+private FunctionToolDefinition geHhumidityByAddressTool = new(\n+ name: \"GetHumidityByAddress\",\n+ description: \"Get humidity by street and city\",\n+ parameters: BinaryData.FromObjectAsJson(\n+ new\n+ {\n+ Type = \"object\",\n+ Properties = new\n+ {\n+ Address = new\n+ {\n+ Type = \"object\",\n+ Properties = new\n+ {\n+ Street = new\n+ {\n+ Type = \"string\",\n+ Description = \"Street\"\n+ },\n+ City = new\n+ {\n+ Type = \"string\",\n+ Description = \"city\"\n+ },\n+ },\n+ Required = new[] { \"street\", \"city\" }\n+ }\n+ },\n+ Required = new[] { \"address\" }\n+ },\n+ new JsonSerializerOptions() { PropertyNamingPolicy = JsonNamingPolicy.CamelCase }));\n+\n+private FunctionToolDefinition getWeatherByAddressesTool = new(\n+ name: \"GetWeatherByAddresses\",\n+ description: \"Get weather by street and city\",\n+ parameters: BinaryData.FromObjectAsJson(\n+ new\n+ {\n+ Type = \"object\",\n+ Properties = new\n+ {\n+ Addresses = new\n+ {\n+ Type = \"array\",\n+ Description = \"A list of addresses\",\n+ Items = new\n+ {\n+ Type = \"object\",\n+ Properties = new\n+ {\n+ Street = new\n+ {\n+ Type = \"string\",\n+ Description = \"Street\"\n+ },\n+ City = new\n+ {\n+ Type = \"string\",\n+ description = \"city\"\n+ },\n+ },\n+ Required = new[] { \"street\", \"city\" }\n+ }\n+ },\n+ Unit = new\n+ {\n+ Type = \"string\",\n+ Enum = new[] { \"c\", \"f\" },\n+ },\n+ },\n+ Required = new[] { \"addresses\" }\n+ },\n+ new JsonSerializerOptions() { PropertyNamingPolicy = JsonNamingPolicy.CamelCase }));\n+```\n+Use `EnableAutoFunctionCall` to enable the auto function call:\n+```C# Snippet:StreamingWithAutoFunctionCall_EnableAutoFunctionCalls\n+List toolOutputs = new();\n+Dictionary delegates = new();\n+delegates.Add(nameof(GetWeatherByAddresses), GetWeatherByAddresses);\n+delegates.Add(nameof(GetHumidityByAddress), GetHumidityByAddress);\n+AIProjectClientOptions options = new();\n+options.EnableAutoFunctionCalls(delegates);", + "comment_created_at": "2025-04-25T15:39:49+00:00", + "comment_author": "howieleung", + "comment_body": "A similar solution also used in SK and asure-ai-project sdk. In python, we would do logger.warning function to log the warning. Can C# log warning?", + "pr_file_module": null + }, + { + "comment_id": "2060669666", + "repo_full_name": "Azure/azure-sdk-for-net", + "pr_number": 49293, + "pr_file": "sdk/ai/Azure.AI.Projects/README.md", + "discussion_id": "2058815959", + "commented_code": "@@ -647,6 +648,172 @@ do\n while (toolOutputs.Count > 0);\n ```\n \n+#### Function call executed automatically\n+\n+In addition to the manual function calls, SDK supports automatic function calling. Here is the example:\n+\n+Here we use other functions for demonstration:\n+```C# Snippet:StreamingWithAutoFunctionCall_DefineFunctions\n+private class Address\n+{\n+ public string Street { get; set; }\n+ public string City { get; set; }\n+}\n+\n+private int GetHumidityByAddress(Address address)\n+{\n+ return (address.City == \"Seattle\") ? 60 : 80;\n+}\n+\n+private string[] GetWeatherByAddresses(Dictionary[] addresses, string unit = \"F\")\n+{\n+ string[] temps = new string[addresses.Length];\n+ for (int i = 0; i < addresses.Length; i++)\n+ {\n+ if (addresses[i].TryGetValue(\"city\", out string city))\n+ {\n+ temps[i] = string.Format(\"{0}{1}\", (city == \"Seattle\") ? \"20\" : \"50\", unit);\n+ }\n+ else\n+ {\n+ throw new ArgumentException(\"Each address must contain 'street' and 'city' keys.\");\n+ }\n+ }\n+ return temps;\n+}\n+```\n+Now we define the function definitions:\n+```C# Snippet:StreamingWithAutoFunctionCall_DefineFunctionTools\n+private FunctionToolDefinition geHhumidityByAddressTool = new(\n+ name: \"GetHumidityByAddress\",\n+ description: \"Get humidity by street and city\",\n+ parameters: BinaryData.FromObjectAsJson(\n+ new\n+ {\n+ Type = \"object\",\n+ Properties = new\n+ {\n+ Address = new\n+ {\n+ Type = \"object\",\n+ Properties = new\n+ {\n+ Street = new\n+ {\n+ Type = \"string\",\n+ Description = \"Street\"\n+ },\n+ City = new\n+ {\n+ Type = \"string\",\n+ Description = \"city\"\n+ },\n+ },\n+ Required = new[] { \"street\", \"city\" }\n+ }\n+ },\n+ Required = new[] { \"address\" }\n+ },\n+ new JsonSerializerOptions() { PropertyNamingPolicy = JsonNamingPolicy.CamelCase }));\n+\n+private FunctionToolDefinition getWeatherByAddressesTool = new(\n+ name: \"GetWeatherByAddresses\",\n+ description: \"Get weather by street and city\",\n+ parameters: BinaryData.FromObjectAsJson(\n+ new\n+ {\n+ Type = \"object\",\n+ Properties = new\n+ {\n+ Addresses = new\n+ {\n+ Type = \"array\",\n+ Description = \"A list of addresses\",\n+ Items = new\n+ {\n+ Type = \"object\",\n+ Properties = new\n+ {\n+ Street = new\n+ {\n+ Type = \"string\",\n+ Description = \"Street\"\n+ },\n+ City = new\n+ {\n+ Type = \"string\",\n+ description = \"city\"\n+ },\n+ },\n+ Required = new[] { \"street\", \"city\" }\n+ }\n+ },\n+ Unit = new\n+ {\n+ Type = \"string\",\n+ Enum = new[] { \"c\", \"f\" },\n+ },\n+ },\n+ Required = new[] { \"addresses\" }\n+ },\n+ new JsonSerializerOptions() { PropertyNamingPolicy = JsonNamingPolicy.CamelCase }));\n+```\n+Use `EnableAutoFunctionCall` to enable the auto function call:\n+```C# Snippet:StreamingWithAutoFunctionCall_EnableAutoFunctionCalls\n+List toolOutputs = new();\n+Dictionary delegates = new();\n+delegates.Add(nameof(GetWeatherByAddresses), GetWeatherByAddresses);\n+delegates.Add(nameof(GetHumidityByAddress), GetHumidityByAddress);\n+AIProjectClientOptions options = new();\n+options.EnableAutoFunctionCalls(delegates);", + "comment_created_at": "2025-04-25T18:00:04+00:00", + "comment_author": "KrzysztofCwalina", + "comment_body": "there is no really a good way to do it. This is why in CM protoype, we had APIs for the caller to issue the tool call directly and handle errors if they want: https://github.com/Azure/azure-sdk-for-net/blob/main/sdk/cloudmachine/Azure.Projects/samples/HelloRAG/Program.cs#L57\r\n\r\nIn addition, in our automatic runner, there is a virtual method that the user can override to hook up any error handlers they want: https://github.com/Azure/azure-sdk-for-net/blob/main/sdk/cloudmachine/Azure.Projects.AI/src/Agents/ChatRunner.cs#L146", + "pr_file_module": null + }, + { + "comment_id": "2060896976", + "repo_full_name": "Azure/azure-sdk-for-net", + "pr_number": 49293, + "pr_file": "sdk/ai/Azure.AI.Projects/README.md", + "discussion_id": "2058815959", + "commented_code": "@@ -647,6 +648,172 @@ do\n while (toolOutputs.Count > 0);\n ```\n \n+#### Function call executed automatically\n+\n+In addition to the manual function calls, SDK supports automatic function calling. Here is the example:\n+\n+Here we use other functions for demonstration:\n+```C# Snippet:StreamingWithAutoFunctionCall_DefineFunctions\n+private class Address\n+{\n+ public string Street { get; set; }\n+ public string City { get; set; }\n+}\n+\n+private int GetHumidityByAddress(Address address)\n+{\n+ return (address.City == \"Seattle\") ? 60 : 80;\n+}\n+\n+private string[] GetWeatherByAddresses(Dictionary[] addresses, string unit = \"F\")\n+{\n+ string[] temps = new string[addresses.Length];\n+ for (int i = 0; i < addresses.Length; i++)\n+ {\n+ if (addresses[i].TryGetValue(\"city\", out string city))\n+ {\n+ temps[i] = string.Format(\"{0}{1}\", (city == \"Seattle\") ? \"20\" : \"50\", unit);\n+ }\n+ else\n+ {\n+ throw new ArgumentException(\"Each address must contain 'street' and 'city' keys.\");\n+ }\n+ }\n+ return temps;\n+}\n+```\n+Now we define the function definitions:\n+```C# Snippet:StreamingWithAutoFunctionCall_DefineFunctionTools\n+private FunctionToolDefinition geHhumidityByAddressTool = new(\n+ name: \"GetHumidityByAddress\",\n+ description: \"Get humidity by street and city\",\n+ parameters: BinaryData.FromObjectAsJson(\n+ new\n+ {\n+ Type = \"object\",\n+ Properties = new\n+ {\n+ Address = new\n+ {\n+ Type = \"object\",\n+ Properties = new\n+ {\n+ Street = new\n+ {\n+ Type = \"string\",\n+ Description = \"Street\"\n+ },\n+ City = new\n+ {\n+ Type = \"string\",\n+ Description = \"city\"\n+ },\n+ },\n+ Required = new[] { \"street\", \"city\" }\n+ }\n+ },\n+ Required = new[] { \"address\" }\n+ },\n+ new JsonSerializerOptions() { PropertyNamingPolicy = JsonNamingPolicy.CamelCase }));\n+\n+private FunctionToolDefinition getWeatherByAddressesTool = new(\n+ name: \"GetWeatherByAddresses\",\n+ description: \"Get weather by street and city\",\n+ parameters: BinaryData.FromObjectAsJson(\n+ new\n+ {\n+ Type = \"object\",\n+ Properties = new\n+ {\n+ Addresses = new\n+ {\n+ Type = \"array\",\n+ Description = \"A list of addresses\",\n+ Items = new\n+ {\n+ Type = \"object\",\n+ Properties = new\n+ {\n+ Street = new\n+ {\n+ Type = \"string\",\n+ Description = \"Street\"\n+ },\n+ City = new\n+ {\n+ Type = \"string\",\n+ description = \"city\"\n+ },\n+ },\n+ Required = new[] { \"street\", \"city\" }\n+ }\n+ },\n+ Unit = new\n+ {\n+ Type = \"string\",\n+ Enum = new[] { \"c\", \"f\" },\n+ },\n+ },\n+ Required = new[] { \"addresses\" }\n+ },\n+ new JsonSerializerOptions() { PropertyNamingPolicy = JsonNamingPolicy.CamelCase }));\n+```\n+Use `EnableAutoFunctionCall` to enable the auto function call:\n+```C# Snippet:StreamingWithAutoFunctionCall_EnableAutoFunctionCalls\n+List toolOutputs = new();\n+Dictionary delegates = new();\n+delegates.Add(nameof(GetWeatherByAddresses), GetWeatherByAddresses);\n+delegates.Add(nameof(GetHumidityByAddress), GetHumidityByAddress);\n+AIProjectClientOptions options = new();\n+options.EnableAutoFunctionCalls(delegates);", + "comment_created_at": "2025-04-25T21:22:03+00:00", + "comment_author": "howieleung", + "comment_body": "I just read the code in the link. I think it make sense to add handlers streaming, but I would not want to do it now for just onToolCall. It makes more sense to come up with a list of handler and implement after GA. And this is not going to be a breaking change. Are you OK?", + "pr_file_module": null + }, + { + "comment_id": "2061032463", + "repo_full_name": "Azure/azure-sdk-for-net", + "pr_number": 49293, + "pr_file": "sdk/ai/Azure.AI.Projects/README.md", + "discussion_id": "2058815959", + "commented_code": "@@ -647,6 +648,172 @@ do\n while (toolOutputs.Count > 0);\n ```\n \n+#### Function call executed automatically\n+\n+In addition to the manual function calls, SDK supports automatic function calling. Here is the example:\n+\n+Here we use other functions for demonstration:\n+```C# Snippet:StreamingWithAutoFunctionCall_DefineFunctions\n+private class Address\n+{\n+ public string Street { get; set; }\n+ public string City { get; set; }\n+}\n+\n+private int GetHumidityByAddress(Address address)\n+{\n+ return (address.City == \"Seattle\") ? 60 : 80;\n+}\n+\n+private string[] GetWeatherByAddresses(Dictionary[] addresses, string unit = \"F\")\n+{\n+ string[] temps = new string[addresses.Length];\n+ for (int i = 0; i < addresses.Length; i++)\n+ {\n+ if (addresses[i].TryGetValue(\"city\", out string city))\n+ {\n+ temps[i] = string.Format(\"{0}{1}\", (city == \"Seattle\") ? \"20\" : \"50\", unit);\n+ }\n+ else\n+ {\n+ throw new ArgumentException(\"Each address must contain 'street' and 'city' keys.\");\n+ }\n+ }\n+ return temps;\n+}\n+```\n+Now we define the function definitions:\n+```C# Snippet:StreamingWithAutoFunctionCall_DefineFunctionTools\n+private FunctionToolDefinition geHhumidityByAddressTool = new(\n+ name: \"GetHumidityByAddress\",\n+ description: \"Get humidity by street and city\",\n+ parameters: BinaryData.FromObjectAsJson(\n+ new\n+ {\n+ Type = \"object\",\n+ Properties = new\n+ {\n+ Address = new\n+ {\n+ Type = \"object\",\n+ Properties = new\n+ {\n+ Street = new\n+ {\n+ Type = \"string\",\n+ Description = \"Street\"\n+ },\n+ City = new\n+ {\n+ Type = \"string\",\n+ Description = \"city\"\n+ },\n+ },\n+ Required = new[] { \"street\", \"city\" }\n+ }\n+ },\n+ Required = new[] { \"address\" }\n+ },\n+ new JsonSerializerOptions() { PropertyNamingPolicy = JsonNamingPolicy.CamelCase }));\n+\n+private FunctionToolDefinition getWeatherByAddressesTool = new(\n+ name: \"GetWeatherByAddresses\",\n+ description: \"Get weather by street and city\",\n+ parameters: BinaryData.FromObjectAsJson(\n+ new\n+ {\n+ Type = \"object\",\n+ Properties = new\n+ {\n+ Addresses = new\n+ {\n+ Type = \"array\",\n+ Description = \"A list of addresses\",\n+ Items = new\n+ {\n+ Type = \"object\",\n+ Properties = new\n+ {\n+ Street = new\n+ {\n+ Type = \"string\",\n+ Description = \"Street\"\n+ },\n+ City = new\n+ {\n+ Type = \"string\",\n+ description = \"city\"\n+ },\n+ },\n+ Required = new[] { \"street\", \"city\" }\n+ }\n+ },\n+ Unit = new\n+ {\n+ Type = \"string\",\n+ Enum = new[] { \"c\", \"f\" },\n+ },\n+ },\n+ Required = new[] { \"addresses\" }\n+ },\n+ new JsonSerializerOptions() { PropertyNamingPolicy = JsonNamingPolicy.CamelCase }));\n+```\n+Use `EnableAutoFunctionCall` to enable the auto function call:\n+```C# Snippet:StreamingWithAutoFunctionCall_EnableAutoFunctionCalls\n+List toolOutputs = new();\n+Dictionary delegates = new();\n+delegates.Add(nameof(GetWeatherByAddresses), GetWeatherByAddresses);\n+delegates.Add(nameof(GetHumidityByAddress), GetHumidityByAddress);\n+AIProjectClientOptions options = new();\n+options.EnableAutoFunctionCalls(delegates);", + "comment_created_at": "2025-04-25T23:37:09+00:00", + "comment_author": "KrzysztofCwalina", + "comment_body": "Adding it after GA will be a breaking change, won't it? Non-streaming tool calls will be intercepted and handled automatically. This means custom user code that handles these tool calls won't execute anymore. Unless you add more knobs/options to preserve compat, and then it just adds complexity to the API. ", + "pr_file_module": null + }, + { + "comment_id": "2062963457", + "repo_full_name": "Azure/azure-sdk-for-net", + "pr_number": 49293, + "pr_file": "sdk/ai/Azure.AI.Projects/README.md", + "discussion_id": "2058815959", + "commented_code": "@@ -647,6 +648,172 @@ do\n while (toolOutputs.Count > 0);\n ```\n \n+#### Function call executed automatically\n+\n+In addition to the manual function calls, SDK supports automatic function calling. Here is the example:\n+\n+Here we use other functions for demonstration:\n+```C# Snippet:StreamingWithAutoFunctionCall_DefineFunctions\n+private class Address\n+{\n+ public string Street { get; set; }\n+ public string City { get; set; }\n+}\n+\n+private int GetHumidityByAddress(Address address)\n+{\n+ return (address.City == \"Seattle\") ? 60 : 80;\n+}\n+\n+private string[] GetWeatherByAddresses(Dictionary[] addresses, string unit = \"F\")\n+{\n+ string[] temps = new string[addresses.Length];\n+ for (int i = 0; i < addresses.Length; i++)\n+ {\n+ if (addresses[i].TryGetValue(\"city\", out string city))\n+ {\n+ temps[i] = string.Format(\"{0}{1}\", (city == \"Seattle\") ? \"20\" : \"50\", unit);\n+ }\n+ else\n+ {\n+ throw new ArgumentException(\"Each address must contain 'street' and 'city' keys.\");\n+ }\n+ }\n+ return temps;\n+}\n+```\n+Now we define the function definitions:\n+```C# Snippet:StreamingWithAutoFunctionCall_DefineFunctionTools\n+private FunctionToolDefinition geHhumidityByAddressTool = new(\n+ name: \"GetHumidityByAddress\",\n+ description: \"Get humidity by street and city\",\n+ parameters: BinaryData.FromObjectAsJson(\n+ new\n+ {\n+ Type = \"object\",\n+ Properties = new\n+ {\n+ Address = new\n+ {\n+ Type = \"object\",\n+ Properties = new\n+ {\n+ Street = new\n+ {\n+ Type = \"string\",\n+ Description = \"Street\"\n+ },\n+ City = new\n+ {\n+ Type = \"string\",\n+ Description = \"city\"\n+ },\n+ },\n+ Required = new[] { \"street\", \"city\" }\n+ }\n+ },\n+ Required = new[] { \"address\" }\n+ },\n+ new JsonSerializerOptions() { PropertyNamingPolicy = JsonNamingPolicy.CamelCase }));\n+\n+private FunctionToolDefinition getWeatherByAddressesTool = new(\n+ name: \"GetWeatherByAddresses\",\n+ description: \"Get weather by street and city\",\n+ parameters: BinaryData.FromObjectAsJson(\n+ new\n+ {\n+ Type = \"object\",\n+ Properties = new\n+ {\n+ Addresses = new\n+ {\n+ Type = \"array\",\n+ Description = \"A list of addresses\",\n+ Items = new\n+ {\n+ Type = \"object\",\n+ Properties = new\n+ {\n+ Street = new\n+ {\n+ Type = \"string\",\n+ Description = \"Street\"\n+ },\n+ City = new\n+ {\n+ Type = \"string\",\n+ description = \"city\"\n+ },\n+ },\n+ Required = new[] { \"street\", \"city\" }\n+ }\n+ },\n+ Unit = new\n+ {\n+ Type = \"string\",\n+ Enum = new[] { \"c\", \"f\" },\n+ },\n+ },\n+ Required = new[] { \"addresses\" }\n+ },\n+ new JsonSerializerOptions() { PropertyNamingPolicy = JsonNamingPolicy.CamelCase }));\n+```\n+Use `EnableAutoFunctionCall` to enable the auto function call:\n+```C# Snippet:StreamingWithAutoFunctionCall_EnableAutoFunctionCalls\n+List toolOutputs = new();\n+Dictionary delegates = new();\n+delegates.Add(nameof(GetWeatherByAddresses), GetWeatherByAddresses);\n+delegates.Add(nameof(GetHumidityByAddress), GetHumidityByAddress);\n+AIProjectClientOptions options = new();\n+options.EnableAutoFunctionCalls(delegates);", + "comment_created_at": "2025-04-28T06:25:29+00:00", + "comment_author": "howieleung", + "comment_body": "No worry! beside create_run, Python SDK has a function called create_and_process_run. For non-streaming, users either call create_run followed by writing their own while-loop and call function tool manually. Or they call create_and_process_run that call create_run and embed the while-loop with auto function calls. Currently this SDK doesn't have something similar to create_and_process_run. I have discussed with Jarno this release we will do auto function call for streaming only. We might consider to create createAndRunProcess with auto function call for non-streaming.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2164226771", + "pr_number": 50725, + "pr_file": "sdk/identity/Azure.Identity/CHANGELOG.md", + "created_at": "2025-06-24T14:44:09+00:00", + "commented_code": "### Bugs Fixed\n\n- `ManagedIdentityCredential` now retries 410 status responses for at least 70 seconds total duration as required by [Azure IMDS documentation](https://learn.microsoft.com/en-us/azure/virtual-machines/instance-metadata-service?tabs=windows#errors-and-debugging). Previously, 410 responses were retried with the same short exponential backoff as other status codes, resulting in insufficient retry duration ([#50724](https://github.com/Azure/azure-sdk-for-net/issues/50724)).", + "repo_full_name": "Azure/azure-sdk-for-net", + "discussion_comments": [ + { + "comment_id": "2164226771", + "repo_full_name": "Azure/azure-sdk-for-net", + "pr_number": 50725, + "pr_file": "sdk/identity/Azure.Identity/CHANGELOG.md", + "discussion_id": "2164226771", + "commented_code": "@@ -8,6 +8,7 @@\n \n ### Bugs Fixed\n \n+- `ManagedIdentityCredential` now retries 410 status responses for at least 70 seconds total duration as required by [Azure IMDS documentation](https://learn.microsoft.com/en-us/azure/virtual-machines/instance-metadata-service?tabs=windows#errors-and-debugging). Previously, 410 responses were retried with the same short exponential backoff as other status codes, resulting in insufficient retry duration ([#50724](https://github.com/Azure/azure-sdk-for-net/issues/50724)).", + "comment_created_at": "2025-06-24T14:44:09+00:00", + "comment_author": "scottaddie", + "comment_body": "```suggestion\r\n- `ManagedIdentityCredential` now retries 410 status responses for at least 70 seconds total duration as required by [Azure IMDS documentation](https://learn.microsoft.com/azure/virtual-machines/instance-metadata-service?tabs=windows#errors-and-debugging). Previously, 410 responses were retried with the same short exponential backoff as other status codes, resulting in insufficient retry duration ([#50724](https://github.com/Azure/azure-sdk-for-net/issues/50724)).\r\n```", + "pr_file_module": null + }, + { + "comment_id": "2164232457", + "repo_full_name": "Azure/azure-sdk-for-net", + "pr_number": 50725, + "pr_file": "sdk/identity/Azure.Identity/CHANGELOG.md", + "discussion_id": "2164226771", + "commented_code": "@@ -8,6 +8,7 @@\n \n ### Bugs Fixed\n \n+- `ManagedIdentityCredential` now retries 410 status responses for at least 70 seconds total duration as required by [Azure IMDS documentation](https://learn.microsoft.com/en-us/azure/virtual-machines/instance-metadata-service?tabs=windows#errors-and-debugging). Previously, 410 responses were retried with the same short exponential backoff as other status codes, resulting in insufficient retry duration ([#50724](https://github.com/Azure/azure-sdk-for-net/issues/50724)).", + "comment_created_at": "2025-06-24T14:46:18+00:00", + "comment_author": "Copilot", + "comment_body": "Updated the CHANGELOG URL format as suggested. Commit 6789abc.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2124977732", + "pr_number": 50394, + "pr_file": "sdk/servicebus/Azure.Messaging.ServiceBus/CHANGELOG.md", + "created_at": "2025-06-03T21:40:43+00:00", + "commented_code": "### Other Changes\n\n- Updated retry policy behavior when the service is throttling and the `TryTimeout` is shorter than the standard throttling time of 30 seconds. Previously, the operation was immediately canceled with a server busy exception. With these changes, the operation will begin consuming retry attempts while throttling until either the server busy state is cleared or all configured retry attempts are exhausted. ([#50121](https://github.com/Azure/azure-sdk-for-net/issues/50121))", + "repo_full_name": "Azure/azure-sdk-for-net", + "discussion_comments": [ + { + "comment_id": "2124977732", + "repo_full_name": "Azure/azure-sdk-for-net", + "pr_number": 50394, + "pr_file": "sdk/servicebus/Azure.Messaging.ServiceBus/CHANGELOG.md", + "discussion_id": "2124977732", + "commented_code": "@@ -12,6 +12,7 @@\n \n ### Other Changes\n \n+- Updated retry policy behavior when the service is throttling and the `TryTimeout` is shorter than the standard throttling time of 30 seconds. Previously, the operation was immediately canceled with a server busy exception. With these changes, the operation will begin consuming retry attempts while throttling until either the server busy state is cleared or all configured retry attempts are exhausted. ([#50121](https://github.com/Azure/azure-sdk-for-net/issues/50121))", + "comment_created_at": "2025-06-03T21:40:43+00:00", + "comment_author": "jsquire", + "comment_body": "```suggestion\r\n- Updated retry policy behavior when the service is throttling and the `TryTimeout` is shorter than the standard throttling time of 30 seconds. Previously, the operation was immediately canceled with a server busy exception. With these changes, the operation will begin consuming retry attempts while throttling until either the server busy state is cleared or all configured retry attempts are exhausted. ([#50121](https://github.com/Azure/azure-sdk-for-net/issues/50121))\r\n\r\n```", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/azure-sdk-for-net-surface-errors-appropriately.md b/_reviewers/azure-sdk-for-net-surface-errors-appropriately.md index 97447db..853d5c0 100644 --- a/_reviewers/azure-sdk-for-net-surface-errors-appropriately.md +++ b/_reviewers/azure-sdk-for-net-surface-errors-appropriately.md @@ -37,169 +37,3 @@ options.EnableAutoFunctionCalls(delegates, errorHandler: (exception, context) => ``` When dealing with transient errors like throttling or service unavailability, implement retry policies that respect documented service requirements (e.g., retrying for adequate duration) rather than failing immediately or using generic retry strategies. - - -[ - { - "discussion_id": "2058815959", - "pr_number": 49293, - "pr_file": "sdk/ai/Azure.AI.Projects/README.md", - "created_at": "2025-04-24T16:18:50+00:00", - "commented_code": "while (toolOutputs.Count > 0);\n```\n\n#### Function call executed automatically\n\nIn addition to the manual function calls, SDK supports automatic function calling. Here is the example:\n\nHere we use other functions for demonstration:\n```C# Snippet:StreamingWithAutoFunctionCall_DefineFunctions\nprivate class Address\n{\n public string Street { get; set; }\n public string City { get; set; }\n}\n\nprivate int GetHumidityByAddress(Address address)\n{\n return (address.City == \"Seattle\") ? 60 : 80;\n}\n\nprivate string[] GetWeatherByAddresses(Dictionary[] addresses, string unit = \"F\")\n{\n string[] temps = new string[addresses.Length];\n for (int i = 0; i < addresses.Length; i++)\n {\n if (addresses[i].TryGetValue(\"city\", out string city))\n {\n temps[i] = string.Format(\"{0}{1}\", (city == \"Seattle\") ? \"20\" : \"50\", unit);\n }\n else\n {\n throw new ArgumentException(\"Each address must contain 'street' and 'city' keys.\");\n }\n }\n return temps;\n}\n```\nNow we define the function definitions:\n```C# Snippet:StreamingWithAutoFunctionCall_DefineFunctionTools\nprivate FunctionToolDefinition geHhumidityByAddressTool = new(\n name: \"GetHumidityByAddress\",\n description: \"Get humidity by street and city\",\n parameters: BinaryData.FromObjectAsJson(\n new\n {\n Type = \"object\",\n Properties = new\n {\n Address = new\n {\n Type = \"object\",\n Properties = new\n {\n Street = new\n {\n Type = \"string\",\n Description = \"Street\"\n },\n City = new\n {\n Type = \"string\",\n Description = \"city\"\n },\n },\n Required = new[] { \"street\", \"city\" }\n }\n },\n Required = new[] { \"address\" }\n },\n new JsonSerializerOptions() { PropertyNamingPolicy = JsonNamingPolicy.CamelCase }));\n\nprivate FunctionToolDefinition getWeatherByAddressesTool = new(\n name: \"GetWeatherByAddresses\",\n description: \"Get weather by street and city\",\n parameters: BinaryData.FromObjectAsJson(\n new\n {\n Type = \"object\",\n Properties = new\n {\n Addresses = new\n {\n Type = \"array\",\n Description = \"A list of addresses\",\n Items = new\n {\n Type = \"object\",\n Properties = new\n {\n Street = new\n {\n Type = \"string\",\n Description = \"Street\"\n },\n City = new\n {\n Type = \"string\",\n description = \"city\"\n },\n },\n Required = new[] { \"street\", \"city\" }\n }\n },\n Unit = new\n {\n Type = \"string\",\n Enum = new[] { \"c\", \"f\" },\n },\n },\n Required = new[] { \"addresses\" }\n },\n new JsonSerializerOptions() { PropertyNamingPolicy = JsonNamingPolicy.CamelCase }));\n```\nUse `EnableAutoFunctionCall` to enable the auto function call:\n```C# Snippet:StreamingWithAutoFunctionCall_EnableAutoFunctionCalls\nList toolOutputs = new();\nDictionary delegates = new();\ndelegates.Add(nameof(GetWeatherByAddresses), GetWeatherByAddresses);\ndelegates.Add(nameof(GetHumidityByAddress), GetHumidityByAddress);\nAIProjectClientOptions options = new();\noptions.EnableAutoFunctionCalls(delegates);", - "repo_full_name": "Azure/azure-sdk-for-net", - "discussion_comments": [ - { - "comment_id": "2058815959", - "repo_full_name": "Azure/azure-sdk-for-net", - "pr_number": 49293, - "pr_file": "sdk/ai/Azure.AI.Projects/README.md", - "discussion_id": "2058815959", - "commented_code": "@@ -647,6 +648,172 @@ do\n while (toolOutputs.Count > 0);\n ```\n \n+#### Function call executed automatically\n+\n+In addition to the manual function calls, SDK supports automatic function calling. Here is the example:\n+\n+Here we use other functions for demonstration:\n+```C# Snippet:StreamingWithAutoFunctionCall_DefineFunctions\n+private class Address\n+{\n+ public string Street { get; set; }\n+ public string City { get; set; }\n+}\n+\n+private int GetHumidityByAddress(Address address)\n+{\n+ return (address.City == \"Seattle\") ? 60 : 80;\n+}\n+\n+private string[] GetWeatherByAddresses(Dictionary[] addresses, string unit = \"F\")\n+{\n+ string[] temps = new string[addresses.Length];\n+ for (int i = 0; i < addresses.Length; i++)\n+ {\n+ if (addresses[i].TryGetValue(\"city\", out string city))\n+ {\n+ temps[i] = string.Format(\"{0}{1}\", (city == \"Seattle\") ? \"20\" : \"50\", unit);\n+ }\n+ else\n+ {\n+ throw new ArgumentException(\"Each address must contain 'street' and 'city' keys.\");\n+ }\n+ }\n+ return temps;\n+}\n+```\n+Now we define the function definitions:\n+```C# Snippet:StreamingWithAutoFunctionCall_DefineFunctionTools\n+private FunctionToolDefinition geHhumidityByAddressTool = new(\n+ name: \"GetHumidityByAddress\",\n+ description: \"Get humidity by street and city\",\n+ parameters: BinaryData.FromObjectAsJson(\n+ new\n+ {\n+ Type = \"object\",\n+ Properties = new\n+ {\n+ Address = new\n+ {\n+ Type = \"object\",\n+ Properties = new\n+ {\n+ Street = new\n+ {\n+ Type = \"string\",\n+ Description = \"Street\"\n+ },\n+ City = new\n+ {\n+ Type = \"string\",\n+ Description = \"city\"\n+ },\n+ },\n+ Required = new[] { \"street\", \"city\" }\n+ }\n+ },\n+ Required = new[] { \"address\" }\n+ },\n+ new JsonSerializerOptions() { PropertyNamingPolicy = JsonNamingPolicy.CamelCase }));\n+\n+private FunctionToolDefinition getWeatherByAddressesTool = new(\n+ name: \"GetWeatherByAddresses\",\n+ description: \"Get weather by street and city\",\n+ parameters: BinaryData.FromObjectAsJson(\n+ new\n+ {\n+ Type = \"object\",\n+ Properties = new\n+ {\n+ Addresses = new\n+ {\n+ Type = \"array\",\n+ Description = \"A list of addresses\",\n+ Items = new\n+ {\n+ Type = \"object\",\n+ Properties = new\n+ {\n+ Street = new\n+ {\n+ Type = \"string\",\n+ Description = \"Street\"\n+ },\n+ City = new\n+ {\n+ Type = \"string\",\n+ description = \"city\"\n+ },\n+ },\n+ Required = new[] { \"street\", \"city\" }\n+ }\n+ },\n+ Unit = new\n+ {\n+ Type = \"string\",\n+ Enum = new[] { \"c\", \"f\" },\n+ },\n+ },\n+ Required = new[] { \"addresses\" }\n+ },\n+ new JsonSerializerOptions() { PropertyNamingPolicy = JsonNamingPolicy.CamelCase }));\n+```\n+Use `EnableAutoFunctionCall` to enable the auto function call:\n+```C# Snippet:StreamingWithAutoFunctionCall_EnableAutoFunctionCalls\n+List toolOutputs = new();\n+Dictionary delegates = new();\n+delegates.Add(nameof(GetWeatherByAddresses), GetWeatherByAddresses);\n+delegates.Add(nameof(GetHumidityByAddress), GetHumidityByAddress);\n+AIProjectClientOptions options = new();\n+options.EnableAutoFunctionCalls(delegates);", - "comment_created_at": "2025-04-24T16:18:50+00:00", - "comment_author": "KrzysztofCwalina", - "comment_body": "what's the error handling model for automatic tool calls? i.e. where do I handle errors that might happen in tool calls?", - "pr_file_module": null - }, - { - "comment_id": "2058960638", - "repo_full_name": "Azure/azure-sdk-for-net", - "pr_number": 49293, - "pr_file": "sdk/ai/Azure.AI.Projects/README.md", - "discussion_id": "2058815959", - "commented_code": "@@ -647,6 +648,172 @@ do\n while (toolOutputs.Count > 0);\n ```\n \n+#### Function call executed automatically\n+\n+In addition to the manual function calls, SDK supports automatic function calling. Here is the example:\n+\n+Here we use other functions for demonstration:\n+```C# Snippet:StreamingWithAutoFunctionCall_DefineFunctions\n+private class Address\n+{\n+ public string Street { get; set; }\n+ public string City { get; set; }\n+}\n+\n+private int GetHumidityByAddress(Address address)\n+{\n+ return (address.City == \"Seattle\") ? 60 : 80;\n+}\n+\n+private string[] GetWeatherByAddresses(Dictionary[] addresses, string unit = \"F\")\n+{\n+ string[] temps = new string[addresses.Length];\n+ for (int i = 0; i < addresses.Length; i++)\n+ {\n+ if (addresses[i].TryGetValue(\"city\", out string city))\n+ {\n+ temps[i] = string.Format(\"{0}{1}\", (city == \"Seattle\") ? \"20\" : \"50\", unit);\n+ }\n+ else\n+ {\n+ throw new ArgumentException(\"Each address must contain 'street' and 'city' keys.\");\n+ }\n+ }\n+ return temps;\n+}\n+```\n+Now we define the function definitions:\n+```C# Snippet:StreamingWithAutoFunctionCall_DefineFunctionTools\n+private FunctionToolDefinition geHhumidityByAddressTool = new(\n+ name: \"GetHumidityByAddress\",\n+ description: \"Get humidity by street and city\",\n+ parameters: BinaryData.FromObjectAsJson(\n+ new\n+ {\n+ Type = \"object\",\n+ Properties = new\n+ {\n+ Address = new\n+ {\n+ Type = \"object\",\n+ Properties = new\n+ {\n+ Street = new\n+ {\n+ Type = \"string\",\n+ Description = \"Street\"\n+ },\n+ City = new\n+ {\n+ Type = \"string\",\n+ Description = \"city\"\n+ },\n+ },\n+ Required = new[] { \"street\", \"city\" }\n+ }\n+ },\n+ Required = new[] { \"address\" }\n+ },\n+ new JsonSerializerOptions() { PropertyNamingPolicy = JsonNamingPolicy.CamelCase }));\n+\n+private FunctionToolDefinition getWeatherByAddressesTool = new(\n+ name: \"GetWeatherByAddresses\",\n+ description: \"Get weather by street and city\",\n+ parameters: BinaryData.FromObjectAsJson(\n+ new\n+ {\n+ Type = \"object\",\n+ Properties = new\n+ {\n+ Addresses = new\n+ {\n+ Type = \"array\",\n+ Description = \"A list of addresses\",\n+ Items = new\n+ {\n+ Type = \"object\",\n+ Properties = new\n+ {\n+ Street = new\n+ {\n+ Type = \"string\",\n+ Description = \"Street\"\n+ },\n+ City = new\n+ {\n+ Type = \"string\",\n+ description = \"city\"\n+ },\n+ },\n+ Required = new[] { \"street\", \"city\" }\n+ }\n+ },\n+ Unit = new\n+ {\n+ Type = \"string\",\n+ Enum = new[] { \"c\", \"f\" },\n+ },\n+ },\n+ Required = new[] { \"addresses\" }\n+ },\n+ new JsonSerializerOptions() { PropertyNamingPolicy = JsonNamingPolicy.CamelCase }));\n+```\n+Use `EnableAutoFunctionCall` to enable the auto function call:\n+```C# Snippet:StreamingWithAutoFunctionCall_EnableAutoFunctionCalls\n+List toolOutputs = new();\n+Dictionary delegates = new();\n+delegates.Add(nameof(GetWeatherByAddresses), GetWeatherByAddresses);\n+delegates.Add(nameof(GetHumidityByAddress), GetHumidityByAddress);\n+AIProjectClientOptions options = new();\n+options.EnableAutoFunctionCalls(delegates);", - "comment_created_at": "2025-04-24T17:54:03+00:00", - "comment_author": "howieleung", - "comment_body": "When there is error, the error will be submitted back to the model in Json format liked \"{error: \"Error message\"}\". Then model will raise another function call to retry or figure out by its knowledge instead.", - "pr_file_module": null - }, - { - "comment_id": "2059164667", - "repo_full_name": "Azure/azure-sdk-for-net", - "pr_number": 49293, - "pr_file": "sdk/ai/Azure.AI.Projects/README.md", - "discussion_id": "2058815959", - "commented_code": "@@ -647,6 +648,172 @@ do\n while (toolOutputs.Count > 0);\n ```\n \n+#### Function call executed automatically\n+\n+In addition to the manual function calls, SDK supports automatic function calling. Here is the example:\n+\n+Here we use other functions for demonstration:\n+```C# Snippet:StreamingWithAutoFunctionCall_DefineFunctions\n+private class Address\n+{\n+ public string Street { get; set; }\n+ public string City { get; set; }\n+}\n+\n+private int GetHumidityByAddress(Address address)\n+{\n+ return (address.City == \"Seattle\") ? 60 : 80;\n+}\n+\n+private string[] GetWeatherByAddresses(Dictionary[] addresses, string unit = \"F\")\n+{\n+ string[] temps = new string[addresses.Length];\n+ for (int i = 0; i < addresses.Length; i++)\n+ {\n+ if (addresses[i].TryGetValue(\"city\", out string city))\n+ {\n+ temps[i] = string.Format(\"{0}{1}\", (city == \"Seattle\") ? \"20\" : \"50\", unit);\n+ }\n+ else\n+ {\n+ throw new ArgumentException(\"Each address must contain 'street' and 'city' keys.\");\n+ }\n+ }\n+ return temps;\n+}\n+```\n+Now we define the function definitions:\n+```C# Snippet:StreamingWithAutoFunctionCall_DefineFunctionTools\n+private FunctionToolDefinition geHhumidityByAddressTool = new(\n+ name: \"GetHumidityByAddress\",\n+ description: \"Get humidity by street and city\",\n+ parameters: BinaryData.FromObjectAsJson(\n+ new\n+ {\n+ Type = \"object\",\n+ Properties = new\n+ {\n+ Address = new\n+ {\n+ Type = \"object\",\n+ Properties = new\n+ {\n+ Street = new\n+ {\n+ Type = \"string\",\n+ Description = \"Street\"\n+ },\n+ City = new\n+ {\n+ Type = \"string\",\n+ Description = \"city\"\n+ },\n+ },\n+ Required = new[] { \"street\", \"city\" }\n+ }\n+ },\n+ Required = new[] { \"address\" }\n+ },\n+ new JsonSerializerOptions() { PropertyNamingPolicy = JsonNamingPolicy.CamelCase }));\n+\n+private FunctionToolDefinition getWeatherByAddressesTool = new(\n+ name: \"GetWeatherByAddresses\",\n+ description: \"Get weather by street and city\",\n+ parameters: BinaryData.FromObjectAsJson(\n+ new\n+ {\n+ Type = \"object\",\n+ Properties = new\n+ {\n+ Addresses = new\n+ {\n+ Type = \"array\",\n+ Description = \"A list of addresses\",\n+ Items = new\n+ {\n+ Type = \"object\",\n+ Properties = new\n+ {\n+ Street = new\n+ {\n+ Type = \"string\",\n+ Description = \"Street\"\n+ },\n+ City = new\n+ {\n+ Type = \"string\",\n+ description = \"city\"\n+ },\n+ },\n+ Required = new[] { \"street\", \"city\" }\n+ }\n+ },\n+ Unit = new\n+ {\n+ Type = \"string\",\n+ Enum = new[] { \"c\", \"f\" },\n+ },\n+ },\n+ Required = new[] { \"addresses\" }\n+ },\n+ new JsonSerializerOptions() { PropertyNamingPolicy = JsonNamingPolicy.CamelCase }));\n+```\n+Use `EnableAutoFunctionCall` to enable the auto function call:\n+```C# Snippet:StreamingWithAutoFunctionCall_EnableAutoFunctionCalls\n+List toolOutputs = new();\n+Dictionary delegates = new();\n+delegates.Add(nameof(GetWeatherByAddresses), GetWeatherByAddresses);\n+delegates.Add(nameof(GetHumidityByAddress), GetHumidityByAddress);\n+AIProjectClientOptions options = new();\n+options.EnableAutoFunctionCalls(delegates);", - "comment_created_at": "2025-04-24T20:19:15+00:00", - "comment_author": "KrzysztofCwalina", - "comment_body": "I am not sure this is how we want it to work. It will hide bugs and will be hard to debug. ", - "pr_file_module": null - }, - { - "comment_id": "2060477164", - "repo_full_name": "Azure/azure-sdk-for-net", - "pr_number": 49293, - "pr_file": "sdk/ai/Azure.AI.Projects/README.md", - "discussion_id": "2058815959", - "commented_code": "@@ -647,6 +648,172 @@ do\n while (toolOutputs.Count > 0);\n ```\n \n+#### Function call executed automatically\n+\n+In addition to the manual function calls, SDK supports automatic function calling. Here is the example:\n+\n+Here we use other functions for demonstration:\n+```C# Snippet:StreamingWithAutoFunctionCall_DefineFunctions\n+private class Address\n+{\n+ public string Street { get; set; }\n+ public string City { get; set; }\n+}\n+\n+private int GetHumidityByAddress(Address address)\n+{\n+ return (address.City == \"Seattle\") ? 60 : 80;\n+}\n+\n+private string[] GetWeatherByAddresses(Dictionary[] addresses, string unit = \"F\")\n+{\n+ string[] temps = new string[addresses.Length];\n+ for (int i = 0; i < addresses.Length; i++)\n+ {\n+ if (addresses[i].TryGetValue(\"city\", out string city))\n+ {\n+ temps[i] = string.Format(\"{0}{1}\", (city == \"Seattle\") ? \"20\" : \"50\", unit);\n+ }\n+ else\n+ {\n+ throw new ArgumentException(\"Each address must contain 'street' and 'city' keys.\");\n+ }\n+ }\n+ return temps;\n+}\n+```\n+Now we define the function definitions:\n+```C# Snippet:StreamingWithAutoFunctionCall_DefineFunctionTools\n+private FunctionToolDefinition geHhumidityByAddressTool = new(\n+ name: \"GetHumidityByAddress\",\n+ description: \"Get humidity by street and city\",\n+ parameters: BinaryData.FromObjectAsJson(\n+ new\n+ {\n+ Type = \"object\",\n+ Properties = new\n+ {\n+ Address = new\n+ {\n+ Type = \"object\",\n+ Properties = new\n+ {\n+ Street = new\n+ {\n+ Type = \"string\",\n+ Description = \"Street\"\n+ },\n+ City = new\n+ {\n+ Type = \"string\",\n+ Description = \"city\"\n+ },\n+ },\n+ Required = new[] { \"street\", \"city\" }\n+ }\n+ },\n+ Required = new[] { \"address\" }\n+ },\n+ new JsonSerializerOptions() { PropertyNamingPolicy = JsonNamingPolicy.CamelCase }));\n+\n+private FunctionToolDefinition getWeatherByAddressesTool = new(\n+ name: \"GetWeatherByAddresses\",\n+ description: \"Get weather by street and city\",\n+ parameters: BinaryData.FromObjectAsJson(\n+ new\n+ {\n+ Type = \"object\",\n+ Properties = new\n+ {\n+ Addresses = new\n+ {\n+ Type = \"array\",\n+ Description = \"A list of addresses\",\n+ Items = new\n+ {\n+ Type = \"object\",\n+ Properties = new\n+ {\n+ Street = new\n+ {\n+ Type = \"string\",\n+ Description = \"Street\"\n+ },\n+ City = new\n+ {\n+ Type = \"string\",\n+ description = \"city\"\n+ },\n+ },\n+ Required = new[] { \"street\", \"city\" }\n+ }\n+ },\n+ Unit = new\n+ {\n+ Type = \"string\",\n+ Enum = new[] { \"c\", \"f\" },\n+ },\n+ },\n+ Required = new[] { \"addresses\" }\n+ },\n+ new JsonSerializerOptions() { PropertyNamingPolicy = JsonNamingPolicy.CamelCase }));\n+```\n+Use `EnableAutoFunctionCall` to enable the auto function call:\n+```C# Snippet:StreamingWithAutoFunctionCall_EnableAutoFunctionCalls\n+List toolOutputs = new();\n+Dictionary delegates = new();\n+delegates.Add(nameof(GetWeatherByAddresses), GetWeatherByAddresses);\n+delegates.Add(nameof(GetHumidityByAddress), GetHumidityByAddress);\n+AIProjectClientOptions options = new();\n+options.EnableAutoFunctionCalls(delegates);", - "comment_created_at": "2025-04-25T15:39:49+00:00", - "comment_author": "howieleung", - "comment_body": "A similar solution also used in SK and asure-ai-project sdk. In python, we would do logger.warning function to log the warning. Can C# log warning?", - "pr_file_module": null - }, - { - "comment_id": "2060669666", - "repo_full_name": "Azure/azure-sdk-for-net", - "pr_number": 49293, - "pr_file": "sdk/ai/Azure.AI.Projects/README.md", - "discussion_id": "2058815959", - "commented_code": "@@ -647,6 +648,172 @@ do\n while (toolOutputs.Count > 0);\n ```\n \n+#### Function call executed automatically\n+\n+In addition to the manual function calls, SDK supports automatic function calling. Here is the example:\n+\n+Here we use other functions for demonstration:\n+```C# Snippet:StreamingWithAutoFunctionCall_DefineFunctions\n+private class Address\n+{\n+ public string Street { get; set; }\n+ public string City { get; set; }\n+}\n+\n+private int GetHumidityByAddress(Address address)\n+{\n+ return (address.City == \"Seattle\") ? 60 : 80;\n+}\n+\n+private string[] GetWeatherByAddresses(Dictionary[] addresses, string unit = \"F\")\n+{\n+ string[] temps = new string[addresses.Length];\n+ for (int i = 0; i < addresses.Length; i++)\n+ {\n+ if (addresses[i].TryGetValue(\"city\", out string city))\n+ {\n+ temps[i] = string.Format(\"{0}{1}\", (city == \"Seattle\") ? \"20\" : \"50\", unit);\n+ }\n+ else\n+ {\n+ throw new ArgumentException(\"Each address must contain 'street' and 'city' keys.\");\n+ }\n+ }\n+ return temps;\n+}\n+```\n+Now we define the function definitions:\n+```C# Snippet:StreamingWithAutoFunctionCall_DefineFunctionTools\n+private FunctionToolDefinition geHhumidityByAddressTool = new(\n+ name: \"GetHumidityByAddress\",\n+ description: \"Get humidity by street and city\",\n+ parameters: BinaryData.FromObjectAsJson(\n+ new\n+ {\n+ Type = \"object\",\n+ Properties = new\n+ {\n+ Address = new\n+ {\n+ Type = \"object\",\n+ Properties = new\n+ {\n+ Street = new\n+ {\n+ Type = \"string\",\n+ Description = \"Street\"\n+ },\n+ City = new\n+ {\n+ Type = \"string\",\n+ Description = \"city\"\n+ },\n+ },\n+ Required = new[] { \"street\", \"city\" }\n+ }\n+ },\n+ Required = new[] { \"address\" }\n+ },\n+ new JsonSerializerOptions() { PropertyNamingPolicy = JsonNamingPolicy.CamelCase }));\n+\n+private FunctionToolDefinition getWeatherByAddressesTool = new(\n+ name: \"GetWeatherByAddresses\",\n+ description: \"Get weather by street and city\",\n+ parameters: BinaryData.FromObjectAsJson(\n+ new\n+ {\n+ Type = \"object\",\n+ Properties = new\n+ {\n+ Addresses = new\n+ {\n+ Type = \"array\",\n+ Description = \"A list of addresses\",\n+ Items = new\n+ {\n+ Type = \"object\",\n+ Properties = new\n+ {\n+ Street = new\n+ {\n+ Type = \"string\",\n+ Description = \"Street\"\n+ },\n+ City = new\n+ {\n+ Type = \"string\",\n+ description = \"city\"\n+ },\n+ },\n+ Required = new[] { \"street\", \"city\" }\n+ }\n+ },\n+ Unit = new\n+ {\n+ Type = \"string\",\n+ Enum = new[] { \"c\", \"f\" },\n+ },\n+ },\n+ Required = new[] { \"addresses\" }\n+ },\n+ new JsonSerializerOptions() { PropertyNamingPolicy = JsonNamingPolicy.CamelCase }));\n+```\n+Use `EnableAutoFunctionCall` to enable the auto function call:\n+```C# Snippet:StreamingWithAutoFunctionCall_EnableAutoFunctionCalls\n+List toolOutputs = new();\n+Dictionary delegates = new();\n+delegates.Add(nameof(GetWeatherByAddresses), GetWeatherByAddresses);\n+delegates.Add(nameof(GetHumidityByAddress), GetHumidityByAddress);\n+AIProjectClientOptions options = new();\n+options.EnableAutoFunctionCalls(delegates);", - "comment_created_at": "2025-04-25T18:00:04+00:00", - "comment_author": "KrzysztofCwalina", - "comment_body": "there is no really a good way to do it. This is why in CM protoype, we had APIs for the caller to issue the tool call directly and handle errors if they want: https://github.com/Azure/azure-sdk-for-net/blob/main/sdk/cloudmachine/Azure.Projects/samples/HelloRAG/Program.cs#L57\r\n\r\nIn addition, in our automatic runner, there is a virtual method that the user can override to hook up any error handlers they want: https://github.com/Azure/azure-sdk-for-net/blob/main/sdk/cloudmachine/Azure.Projects.AI/src/Agents/ChatRunner.cs#L146", - "pr_file_module": null - }, - { - "comment_id": "2060896976", - "repo_full_name": "Azure/azure-sdk-for-net", - "pr_number": 49293, - "pr_file": "sdk/ai/Azure.AI.Projects/README.md", - "discussion_id": "2058815959", - "commented_code": "@@ -647,6 +648,172 @@ do\n while (toolOutputs.Count > 0);\n ```\n \n+#### Function call executed automatically\n+\n+In addition to the manual function calls, SDK supports automatic function calling. Here is the example:\n+\n+Here we use other functions for demonstration:\n+```C# Snippet:StreamingWithAutoFunctionCall_DefineFunctions\n+private class Address\n+{\n+ public string Street { get; set; }\n+ public string City { get; set; }\n+}\n+\n+private int GetHumidityByAddress(Address address)\n+{\n+ return (address.City == \"Seattle\") ? 60 : 80;\n+}\n+\n+private string[] GetWeatherByAddresses(Dictionary[] addresses, string unit = \"F\")\n+{\n+ string[] temps = new string[addresses.Length];\n+ for (int i = 0; i < addresses.Length; i++)\n+ {\n+ if (addresses[i].TryGetValue(\"city\", out string city))\n+ {\n+ temps[i] = string.Format(\"{0}{1}\", (city == \"Seattle\") ? \"20\" : \"50\", unit);\n+ }\n+ else\n+ {\n+ throw new ArgumentException(\"Each address must contain 'street' and 'city' keys.\");\n+ }\n+ }\n+ return temps;\n+}\n+```\n+Now we define the function definitions:\n+```C# Snippet:StreamingWithAutoFunctionCall_DefineFunctionTools\n+private FunctionToolDefinition geHhumidityByAddressTool = new(\n+ name: \"GetHumidityByAddress\",\n+ description: \"Get humidity by street and city\",\n+ parameters: BinaryData.FromObjectAsJson(\n+ new\n+ {\n+ Type = \"object\",\n+ Properties = new\n+ {\n+ Address = new\n+ {\n+ Type = \"object\",\n+ Properties = new\n+ {\n+ Street = new\n+ {\n+ Type = \"string\",\n+ Description = \"Street\"\n+ },\n+ City = new\n+ {\n+ Type = \"string\",\n+ Description = \"city\"\n+ },\n+ },\n+ Required = new[] { \"street\", \"city\" }\n+ }\n+ },\n+ Required = new[] { \"address\" }\n+ },\n+ new JsonSerializerOptions() { PropertyNamingPolicy = JsonNamingPolicy.CamelCase }));\n+\n+private FunctionToolDefinition getWeatherByAddressesTool = new(\n+ name: \"GetWeatherByAddresses\",\n+ description: \"Get weather by street and city\",\n+ parameters: BinaryData.FromObjectAsJson(\n+ new\n+ {\n+ Type = \"object\",\n+ Properties = new\n+ {\n+ Addresses = new\n+ {\n+ Type = \"array\",\n+ Description = \"A list of addresses\",\n+ Items = new\n+ {\n+ Type = \"object\",\n+ Properties = new\n+ {\n+ Street = new\n+ {\n+ Type = \"string\",\n+ Description = \"Street\"\n+ },\n+ City = new\n+ {\n+ Type = \"string\",\n+ description = \"city\"\n+ },\n+ },\n+ Required = new[] { \"street\", \"city\" }\n+ }\n+ },\n+ Unit = new\n+ {\n+ Type = \"string\",\n+ Enum = new[] { \"c\", \"f\" },\n+ },\n+ },\n+ Required = new[] { \"addresses\" }\n+ },\n+ new JsonSerializerOptions() { PropertyNamingPolicy = JsonNamingPolicy.CamelCase }));\n+```\n+Use `EnableAutoFunctionCall` to enable the auto function call:\n+```C# Snippet:StreamingWithAutoFunctionCall_EnableAutoFunctionCalls\n+List toolOutputs = new();\n+Dictionary delegates = new();\n+delegates.Add(nameof(GetWeatherByAddresses), GetWeatherByAddresses);\n+delegates.Add(nameof(GetHumidityByAddress), GetHumidityByAddress);\n+AIProjectClientOptions options = new();\n+options.EnableAutoFunctionCalls(delegates);", - "comment_created_at": "2025-04-25T21:22:03+00:00", - "comment_author": "howieleung", - "comment_body": "I just read the code in the link. I think it make sense to add handlers streaming, but I would not want to do it now for just onToolCall. It makes more sense to come up with a list of handler and implement after GA. And this is not going to be a breaking change. Are you OK?", - "pr_file_module": null - }, - { - "comment_id": "2061032463", - "repo_full_name": "Azure/azure-sdk-for-net", - "pr_number": 49293, - "pr_file": "sdk/ai/Azure.AI.Projects/README.md", - "discussion_id": "2058815959", - "commented_code": "@@ -647,6 +648,172 @@ do\n while (toolOutputs.Count > 0);\n ```\n \n+#### Function call executed automatically\n+\n+In addition to the manual function calls, SDK supports automatic function calling. Here is the example:\n+\n+Here we use other functions for demonstration:\n+```C# Snippet:StreamingWithAutoFunctionCall_DefineFunctions\n+private class Address\n+{\n+ public string Street { get; set; }\n+ public string City { get; set; }\n+}\n+\n+private int GetHumidityByAddress(Address address)\n+{\n+ return (address.City == \"Seattle\") ? 60 : 80;\n+}\n+\n+private string[] GetWeatherByAddresses(Dictionary[] addresses, string unit = \"F\")\n+{\n+ string[] temps = new string[addresses.Length];\n+ for (int i = 0; i < addresses.Length; i++)\n+ {\n+ if (addresses[i].TryGetValue(\"city\", out string city))\n+ {\n+ temps[i] = string.Format(\"{0}{1}\", (city == \"Seattle\") ? \"20\" : \"50\", unit);\n+ }\n+ else\n+ {\n+ throw new ArgumentException(\"Each address must contain 'street' and 'city' keys.\");\n+ }\n+ }\n+ return temps;\n+}\n+```\n+Now we define the function definitions:\n+```C# Snippet:StreamingWithAutoFunctionCall_DefineFunctionTools\n+private FunctionToolDefinition geHhumidityByAddressTool = new(\n+ name: \"GetHumidityByAddress\",\n+ description: \"Get humidity by street and city\",\n+ parameters: BinaryData.FromObjectAsJson(\n+ new\n+ {\n+ Type = \"object\",\n+ Properties = new\n+ {\n+ Address = new\n+ {\n+ Type = \"object\",\n+ Properties = new\n+ {\n+ Street = new\n+ {\n+ Type = \"string\",\n+ Description = \"Street\"\n+ },\n+ City = new\n+ {\n+ Type = \"string\",\n+ Description = \"city\"\n+ },\n+ },\n+ Required = new[] { \"street\", \"city\" }\n+ }\n+ },\n+ Required = new[] { \"address\" }\n+ },\n+ new JsonSerializerOptions() { PropertyNamingPolicy = JsonNamingPolicy.CamelCase }));\n+\n+private FunctionToolDefinition getWeatherByAddressesTool = new(\n+ name: \"GetWeatherByAddresses\",\n+ description: \"Get weather by street and city\",\n+ parameters: BinaryData.FromObjectAsJson(\n+ new\n+ {\n+ Type = \"object\",\n+ Properties = new\n+ {\n+ Addresses = new\n+ {\n+ Type = \"array\",\n+ Description = \"A list of addresses\",\n+ Items = new\n+ {\n+ Type = \"object\",\n+ Properties = new\n+ {\n+ Street = new\n+ {\n+ Type = \"string\",\n+ Description = \"Street\"\n+ },\n+ City = new\n+ {\n+ Type = \"string\",\n+ description = \"city\"\n+ },\n+ },\n+ Required = new[] { \"street\", \"city\" }\n+ }\n+ },\n+ Unit = new\n+ {\n+ Type = \"string\",\n+ Enum = new[] { \"c\", \"f\" },\n+ },\n+ },\n+ Required = new[] { \"addresses\" }\n+ },\n+ new JsonSerializerOptions() { PropertyNamingPolicy = JsonNamingPolicy.CamelCase }));\n+```\n+Use `EnableAutoFunctionCall` to enable the auto function call:\n+```C# Snippet:StreamingWithAutoFunctionCall_EnableAutoFunctionCalls\n+List toolOutputs = new();\n+Dictionary delegates = new();\n+delegates.Add(nameof(GetWeatherByAddresses), GetWeatherByAddresses);\n+delegates.Add(nameof(GetHumidityByAddress), GetHumidityByAddress);\n+AIProjectClientOptions options = new();\n+options.EnableAutoFunctionCalls(delegates);", - "comment_created_at": "2025-04-25T23:37:09+00:00", - "comment_author": "KrzysztofCwalina", - "comment_body": "Adding it after GA will be a breaking change, won't it? Non-streaming tool calls will be intercepted and handled automatically. This means custom user code that handles these tool calls won't execute anymore. Unless you add more knobs/options to preserve compat, and then it just adds complexity to the API. ", - "pr_file_module": null - }, - { - "comment_id": "2062963457", - "repo_full_name": "Azure/azure-sdk-for-net", - "pr_number": 49293, - "pr_file": "sdk/ai/Azure.AI.Projects/README.md", - "discussion_id": "2058815959", - "commented_code": "@@ -647,6 +648,172 @@ do\n while (toolOutputs.Count > 0);\n ```\n \n+#### Function call executed automatically\n+\n+In addition to the manual function calls, SDK supports automatic function calling. Here is the example:\n+\n+Here we use other functions for demonstration:\n+```C# Snippet:StreamingWithAutoFunctionCall_DefineFunctions\n+private class Address\n+{\n+ public string Street { get; set; }\n+ public string City { get; set; }\n+}\n+\n+private int GetHumidityByAddress(Address address)\n+{\n+ return (address.City == \"Seattle\") ? 60 : 80;\n+}\n+\n+private string[] GetWeatherByAddresses(Dictionary[] addresses, string unit = \"F\")\n+{\n+ string[] temps = new string[addresses.Length];\n+ for (int i = 0; i < addresses.Length; i++)\n+ {\n+ if (addresses[i].TryGetValue(\"city\", out string city))\n+ {\n+ temps[i] = string.Format(\"{0}{1}\", (city == \"Seattle\") ? \"20\" : \"50\", unit);\n+ }\n+ else\n+ {\n+ throw new ArgumentException(\"Each address must contain 'street' and 'city' keys.\");\n+ }\n+ }\n+ return temps;\n+}\n+```\n+Now we define the function definitions:\n+```C# Snippet:StreamingWithAutoFunctionCall_DefineFunctionTools\n+private FunctionToolDefinition geHhumidityByAddressTool = new(\n+ name: \"GetHumidityByAddress\",\n+ description: \"Get humidity by street and city\",\n+ parameters: BinaryData.FromObjectAsJson(\n+ new\n+ {\n+ Type = \"object\",\n+ Properties = new\n+ {\n+ Address = new\n+ {\n+ Type = \"object\",\n+ Properties = new\n+ {\n+ Street = new\n+ {\n+ Type = \"string\",\n+ Description = \"Street\"\n+ },\n+ City = new\n+ {\n+ Type = \"string\",\n+ Description = \"city\"\n+ },\n+ },\n+ Required = new[] { \"street\", \"city\" }\n+ }\n+ },\n+ Required = new[] { \"address\" }\n+ },\n+ new JsonSerializerOptions() { PropertyNamingPolicy = JsonNamingPolicy.CamelCase }));\n+\n+private FunctionToolDefinition getWeatherByAddressesTool = new(\n+ name: \"GetWeatherByAddresses\",\n+ description: \"Get weather by street and city\",\n+ parameters: BinaryData.FromObjectAsJson(\n+ new\n+ {\n+ Type = \"object\",\n+ Properties = new\n+ {\n+ Addresses = new\n+ {\n+ Type = \"array\",\n+ Description = \"A list of addresses\",\n+ Items = new\n+ {\n+ Type = \"object\",\n+ Properties = new\n+ {\n+ Street = new\n+ {\n+ Type = \"string\",\n+ Description = \"Street\"\n+ },\n+ City = new\n+ {\n+ Type = \"string\",\n+ description = \"city\"\n+ },\n+ },\n+ Required = new[] { \"street\", \"city\" }\n+ }\n+ },\n+ Unit = new\n+ {\n+ Type = \"string\",\n+ Enum = new[] { \"c\", \"f\" },\n+ },\n+ },\n+ Required = new[] { \"addresses\" }\n+ },\n+ new JsonSerializerOptions() { PropertyNamingPolicy = JsonNamingPolicy.CamelCase }));\n+```\n+Use `EnableAutoFunctionCall` to enable the auto function call:\n+```C# Snippet:StreamingWithAutoFunctionCall_EnableAutoFunctionCalls\n+List toolOutputs = new();\n+Dictionary delegates = new();\n+delegates.Add(nameof(GetWeatherByAddresses), GetWeatherByAddresses);\n+delegates.Add(nameof(GetHumidityByAddress), GetHumidityByAddress);\n+AIProjectClientOptions options = new();\n+options.EnableAutoFunctionCalls(delegates);", - "comment_created_at": "2025-04-28T06:25:29+00:00", - "comment_author": "howieleung", - "comment_body": "No worry! beside create_run, Python SDK has a function called create_and_process_run. For non-streaming, users either call create_run followed by writing their own while-loop and call function tool manually. Or they call create_and_process_run that call create_run and embed the while-loop with auto function calls. Currently this SDK doesn't have something similar to create_and_process_run. I have discussed with Jarno this release we will do auto function call for streaming only. We might consider to create createAndRunProcess with auto function call for non-streaming.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2164226771", - "pr_number": 50725, - "pr_file": "sdk/identity/Azure.Identity/CHANGELOG.md", - "created_at": "2025-06-24T14:44:09+00:00", - "commented_code": "### Bugs Fixed\n\n- `ManagedIdentityCredential` now retries 410 status responses for at least 70 seconds total duration as required by [Azure IMDS documentation](https://learn.microsoft.com/en-us/azure/virtual-machines/instance-metadata-service?tabs=windows#errors-and-debugging). Previously, 410 responses were retried with the same short exponential backoff as other status codes, resulting in insufficient retry duration ([#50724](https://github.com/Azure/azure-sdk-for-net/issues/50724)).", - "repo_full_name": "Azure/azure-sdk-for-net", - "discussion_comments": [ - { - "comment_id": "2164226771", - "repo_full_name": "Azure/azure-sdk-for-net", - "pr_number": 50725, - "pr_file": "sdk/identity/Azure.Identity/CHANGELOG.md", - "discussion_id": "2164226771", - "commented_code": "@@ -8,6 +8,7 @@\n \n ### Bugs Fixed\n \n+- `ManagedIdentityCredential` now retries 410 status responses for at least 70 seconds total duration as required by [Azure IMDS documentation](https://learn.microsoft.com/en-us/azure/virtual-machines/instance-metadata-service?tabs=windows#errors-and-debugging). Previously, 410 responses were retried with the same short exponential backoff as other status codes, resulting in insufficient retry duration ([#50724](https://github.com/Azure/azure-sdk-for-net/issues/50724)).", - "comment_created_at": "2025-06-24T14:44:09+00:00", - "comment_author": "scottaddie", - "comment_body": "```suggestion\r\n- `ManagedIdentityCredential` now retries 410 status responses for at least 70 seconds total duration as required by [Azure IMDS documentation](https://learn.microsoft.com/azure/virtual-machines/instance-metadata-service?tabs=windows#errors-and-debugging). Previously, 410 responses were retried with the same short exponential backoff as other status codes, resulting in insufficient retry duration ([#50724](https://github.com/Azure/azure-sdk-for-net/issues/50724)).\r\n```", - "pr_file_module": null - }, - { - "comment_id": "2164232457", - "repo_full_name": "Azure/azure-sdk-for-net", - "pr_number": 50725, - "pr_file": "sdk/identity/Azure.Identity/CHANGELOG.md", - "discussion_id": "2164226771", - "commented_code": "@@ -8,6 +8,7 @@\n \n ### Bugs Fixed\n \n+- `ManagedIdentityCredential` now retries 410 status responses for at least 70 seconds total duration as required by [Azure IMDS documentation](https://learn.microsoft.com/en-us/azure/virtual-machines/instance-metadata-service?tabs=windows#errors-and-debugging). Previously, 410 responses were retried with the same short exponential backoff as other status codes, resulting in insufficient retry duration ([#50724](https://github.com/Azure/azure-sdk-for-net/issues/50724)).", - "comment_created_at": "2025-06-24T14:46:18+00:00", - "comment_author": "Copilot", - "comment_body": "Updated the CHANGELOG URL format as suggested. Commit 6789abc.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2124977732", - "pr_number": 50394, - "pr_file": "sdk/servicebus/Azure.Messaging.ServiceBus/CHANGELOG.md", - "created_at": "2025-06-03T21:40:43+00:00", - "commented_code": "### Other Changes\n\n- Updated retry policy behavior when the service is throttling and the `TryTimeout` is shorter than the standard throttling time of 30 seconds. Previously, the operation was immediately canceled with a server busy exception. With these changes, the operation will begin consuming retry attempts while throttling until either the server busy state is cleared or all configured retry attempts are exhausted. ([#50121](https://github.com/Azure/azure-sdk-for-net/issues/50121))", - "repo_full_name": "Azure/azure-sdk-for-net", - "discussion_comments": [ - { - "comment_id": "2124977732", - "repo_full_name": "Azure/azure-sdk-for-net", - "pr_number": 50394, - "pr_file": "sdk/servicebus/Azure.Messaging.ServiceBus/CHANGELOG.md", - "discussion_id": "2124977732", - "commented_code": "@@ -12,6 +12,7 @@\n \n ### Other Changes\n \n+- Updated retry policy behavior when the service is throttling and the `TryTimeout` is shorter than the standard throttling time of 30 seconds. Previously, the operation was immediately canceled with a server busy exception. With these changes, the operation will begin consuming retry attempts while throttling until either the server busy state is cleared or all configured retry attempts are exhausted. ([#50121](https://github.com/Azure/azure-sdk-for-net/issues/50121))", - "comment_created_at": "2025-06-03T21:40:43+00:00", - "comment_author": "jsquire", - "comment_body": "```suggestion\r\n- Updated retry policy behavior when the service is throttling and the `TryTimeout` is shorter than the standard throttling time of 30 seconds. Previously, the operation was immediately canceled with a server busy exception. With these changes, the operation will begin consuming retry attempts while throttling until either the server busy state is cleared or all configured retry attempts are exhausted. ([#50121](https://github.com/Azure/azure-sdk-for-net/issues/50121))\r\n\r\n```", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/azure-sdk-for-net-use-domain-specific-type-names.json b/_reviewers/azure-sdk-for-net-use-domain-specific-type-names.json new file mode 100644 index 0000000..7dafa21 --- /dev/null +++ b/_reviewers/azure-sdk-for-net-use-domain-specific-type-names.json @@ -0,0 +1,134 @@ +[ + { + "discussion_id": "2179522628", + "pr_number": 51009, + "pr_file": "sdk/mongodbatlas/Azure.ResourceManager.MongoDBAtlas/api/Azure.ResourceManager.MongoDBAtlas.net8.0.cs", + "created_at": "2025-07-02T09:02:01+00:00", + "commented_code": "string System.ClientModel.Primitives.IPersistableModel.GetFormatFromOptions(System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n System.BinaryData System.ClientModel.Primitives.IPersistableModel.Write(System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n }\n public partial class OrganizationResourceUpdateProperties : System.ClientModel.Primitives.IJsonModel, System.ClientModel.Primitives.IPersistableModel", + "repo_full_name": "Azure/azure-sdk-for-net", + "discussion_comments": [ + { + "comment_id": "2179522628", + "repo_full_name": "Azure/azure-sdk-for-net", + "pr_number": 51009, + "pr_file": "sdk/mongodbatlas/Azure.ResourceManager.MongoDBAtlas/api/Azure.ResourceManager.MongoDBAtlas.net8.0.cs", + "discussion_id": "2179522628", + "commented_code": "@@ -210,4 +223,16 @@ protected virtual void JsonModelWriteCore(System.Text.Json.Utf8JsonWriter writer\n string System.ClientModel.Primitives.IPersistableModel.GetFormatFromOptions(System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n System.BinaryData System.ClientModel.Primitives.IPersistableModel.Write(System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n }\n+ public partial class OrganizationResourceUpdateProperties : System.ClientModel.Primitives.IJsonModel, System.ClientModel.Primitives.IPersistableModel", + "comment_created_at": "2025-07-02T09:02:01+00:00", + "comment_author": "ArcturusZhang", + "comment_body": ":( I think we should rename this to `MongoDBAtlasOrganizationUpdateProperties`.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2104120779", + "pr_number": 50236, + "pr_file": "sdk/containerapps/Azure.ResourceManager.AppContainers/api/Azure.ResourceManager.AppContainers.net8.0.cs", + "created_at": "2025-05-23T08:42:50+00:00", + "commented_code": "public static Azure.ResourceManager.AppContainers.ContainerAppManagedEnvironmentStorageData ContainerAppManagedEnvironmentStorageData(Azure.Core.ResourceIdentifier id, string name, Azure.Core.ResourceType resourceType, Azure.ResourceManager.Models.SystemData systemData, Azure.ResourceManager.AppContainers.Models.ContainerAppAzureFileProperties managedEnvironmentStorageAzureFile = null) { throw null; }\n public static Azure.ResourceManager.AppContainers.ContainerAppManagedEnvironmentStorageData ContainerAppManagedEnvironmentStorageData(Azure.Core.ResourceIdentifier id = null, string name = null, Azure.Core.ResourceType resourceType = default(Azure.Core.ResourceType), Azure.ResourceManager.Models.SystemData systemData = null, Azure.ResourceManager.AppContainers.Models.ManagedEnvironmentStorageProperties properties = null) { throw null; }\n public static Azure.ResourceManager.AppContainers.Models.ContainerAppNameAvailabilityResult ContainerAppNameAvailabilityResult(bool? isNameAvailable = default(bool?), Azure.ResourceManager.AppContainers.Models.ContainerAppNameUnavailableReason? reason = default(Azure.ResourceManager.AppContainers.Models.ContainerAppNameUnavailableReason?), string message = null) { throw null; }\n public static Azure.ResourceManager.AppContainers.Models.ContainerAppReplicaContainer ContainerAppReplicaContainer(string name = null, string containerId = null, bool? isReady = default(bool?), bool? isStarted = default(bool?), int? restartCount = default(int?), Azure.ResourceManager.AppContainers.Models.ContainerAppContainerRunningState? runningState = default(Azure.ResourceManager.AppContainers.Models.ContainerAppContainerRunningState?), string runningStateDetails = null, string logStreamEndpoint = null, string execEndpoint = null) { throw null; }\n [System.ComponentModel.EditorBrowsableAttribute(System.ComponentModel.EditorBrowsableState.Never)]\n public static Azure.ResourceManager.AppContainers.Models.ContainerAppReplicaContainer ContainerAppReplicaContainer(string name, string containerId, bool? isReady, bool? isStarted, int? restartCount, Azure.ResourceManager.AppContainers.Models.ContainerAppContainerRunningState? runningState, string runningStateDetails, string logStreamEndpoint, string execEndpoint) { throw null; }\n public static Azure.ResourceManager.AppContainers.Models.ContainerAppReplicaContainer ContainerAppReplicaContainer(string name = null, string containerId = null, bool? isReady = default(bool?), bool? isStarted = default(bool?), int? restartCount = default(int?), Azure.ResourceManager.AppContainers.Models.ContainerAppContainerRunningState? runningState = default(Azure.ResourceManager.AppContainers.Models.ContainerAppContainerRunningState?), string runningStateDetails = null, string logStreamEndpoint = null, string execEndpoint = null, string debugEndpoint = null) { throw null; }\n public static Azure.ResourceManager.AppContainers.ContainerAppReplicaData ContainerAppReplicaData(Azure.Core.ResourceIdentifier id = null, string name = null, Azure.Core.ResourceType resourceType = default(Azure.Core.ResourceType), Azure.ResourceManager.Models.SystemData systemData = null, System.DateTimeOffset? createdOn = default(System.DateTimeOffset?), Azure.ResourceManager.AppContainers.Models.ContainerAppReplicaRunningState? runningState = default(Azure.ResourceManager.AppContainers.Models.ContainerAppReplicaRunningState?), string runningStateDetails = null, System.Collections.Generic.IEnumerable containers = null, System.Collections.Generic.IEnumerable initContainers = null) { throw null; }\n public static Azure.ResourceManager.AppContainers.ContainerAppRevisionData ContainerAppRevisionData(Azure.Core.ResourceIdentifier id = null, string name = null, Azure.Core.ResourceType resourceType = default(Azure.Core.ResourceType), Azure.ResourceManager.Models.SystemData systemData = null, System.DateTimeOffset? createdOn = default(System.DateTimeOffset?), System.DateTimeOffset? lastActiveOn = default(System.DateTimeOffset?), string fqdn = null, Azure.ResourceManager.AppContainers.Models.ContainerAppTemplate template = null, bool? isActive = default(bool?), int? replicas = default(int?), int? trafficWeight = default(int?), string provisioningError = null, Azure.ResourceManager.AppContainers.Models.ContainerAppRevisionHealthState? healthState = default(Azure.ResourceManager.AppContainers.Models.ContainerAppRevisionHealthState?), Azure.ResourceManager.AppContainers.Models.ContainerAppRevisionProvisioningState? provisioningState = default(Azure.ResourceManager.AppContainers.Models.ContainerAppRevisionProvisioningState?), Azure.ResourceManager.AppContainers.Models.RevisionRunningState? runningState = default(Azure.ResourceManager.AppContainers.Models.RevisionRunningState?)) { throw null; }\n public static Azure.ResourceManager.AppContainers.ContainerAppRevisionData ContainerAppRevisionData(Azure.Core.ResourceIdentifier id = null, string name = null, Azure.Core.ResourceType resourceType = default(Azure.Core.ResourceType), Azure.ResourceManager.Models.SystemData systemData = null, System.DateTimeOffset? createdOn = default(System.DateTimeOffset?), System.DateTimeOffset? lastActiveOn = default(System.DateTimeOffset?), string fqdn = null, Azure.ResourceManager.AppContainers.Models.ContainerAppTemplate template = null, bool? isActive = default(bool?), int? replicas = default(int?), int? trafficWeight = default(int?), System.Collections.Generic.IEnumerable labels = null, string provisioningError = null, Azure.ResourceManager.AppContainers.Models.ContainerAppRevisionHealthState? healthState = default(Azure.ResourceManager.AppContainers.Models.ContainerAppRevisionHealthState?), Azure.ResourceManager.AppContainers.Models.ContainerAppRevisionProvisioningState? provisioningState = default(Azure.ResourceManager.AppContainers.Models.ContainerAppRevisionProvisioningState?), Azure.ResourceManager.AppContainers.Models.RevisionRunningState? runningState = default(Azure.ResourceManager.AppContainers.Models.RevisionRunningState?)) { throw null; }\n [System.ComponentModel.EditorBrowsableAttribute(System.ComponentModel.EditorBrowsableState.Never)]\n public static Azure.ResourceManager.AppContainers.ContainerAppRevisionData ContainerAppRevisionData(Azure.Core.ResourceIdentifier id, string name, Azure.Core.ResourceType resourceType, Azure.ResourceManager.Models.SystemData systemData, System.DateTimeOffset? createdOn, System.DateTimeOffset? lastActiveOn, string fqdn, Azure.ResourceManager.AppContainers.Models.ContainerAppTemplate template, bool? isActive, int? replicas, int? trafficWeight, string provisioningError, Azure.ResourceManager.AppContainers.Models.ContainerAppRevisionHealthState? healthState, Azure.ResourceManager.AppContainers.Models.ContainerAppRevisionProvisioningState? provisioningState, Azure.ResourceManager.AppContainers.Models.RevisionRunningState? runningState) { throw null; }\n public static Azure.ResourceManager.AppContainers.Models.ContainerAppsBuildConfiguration ContainerAppsBuildConfiguration(string baseOS = null, string platform = null, string platformVersion = null, System.Collections.Generic.IEnumerable environmentVariables = null, System.Collections.Generic.IEnumerable preBuildSteps = null) { throw null; }\n public static Azure.ResourceManager.AppContainers.ContainerAppsBuildResourceData ContainerAppsBuildResourceData(Azure.Core.ResourceIdentifier id = null, string name = null, Azure.Core.ResourceType resourceType = default(Azure.Core.ResourceType), Azure.ResourceManager.Models.SystemData systemData = null, Azure.ResourceManager.AppContainers.Models.BuildProvisioningState? provisioningState = default(Azure.ResourceManager.AppContainers.Models.BuildProvisioningState?), Azure.ResourceManager.AppContainers.Models.BuildStatus? buildStatus = default(Azure.ResourceManager.AppContainers.Models.BuildStatus?), Azure.ResourceManager.AppContainers.Models.ContainerRegistryWithCustomImage destinationContainerRegistry = null, Azure.ResourceManager.AppContainers.Models.ContainerAppsBuildConfiguration configuration = null, string logStreamEndpoint = null) { throw null; }\n public static Azure.ResourceManager.AppContainers.Models.ContainerAppSecret ContainerAppSecret(string name = null, string value = null, string identity = null, System.Uri keyVaultUri = null) { throw null; }\n public static Azure.ResourceManager.AppContainers.ContainerAppSourceControlData ContainerAppSourceControlData(Azure.Core.ResourceIdentifier id = null, string name = null, Azure.Core.ResourceType resourceType = default(Azure.Core.ResourceType), Azure.ResourceManager.Models.SystemData systemData = null, Azure.ResourceManager.AppContainers.Models.ContainerAppSourceControlOperationState? operationState = default(Azure.ResourceManager.AppContainers.Models.ContainerAppSourceControlOperationState?), System.Uri repoUri = null, string branch = null, Azure.ResourceManager.AppContainers.Models.ContainerAppGitHubActionConfiguration gitHubActionConfiguration = null) { throw null; }\n public static Azure.ResourceManager.AppContainers.ContainerAppsPatchResourceData ContainerAppsPatchResourceData(Azure.Core.ResourceIdentifier id = null, string name = null, Azure.Core.ResourceType resourceType = default(Azure.Core.ResourceType), Azure.ResourceManager.Models.SystemData systemData = null, Azure.ResourceManager.AppContainers.Models.PatchProperties properties = null) { throw null; }\n public static Azure.ResourceManager.AppContainers.Models.ContainerAppUsage ContainerAppUsage(Azure.ResourceManager.AppContainers.Models.ContainerAppUsageUnit unit = default(Azure.ResourceManager.AppContainers.Models.ContainerAppUsageUnit), float currentValue = 0f, float limit = 0f, Azure.ResourceManager.AppContainers.Models.ContainerAppUsageName name = null) { throw null; }\n public static Azure.ResourceManager.AppContainers.Models.ContainerAppUsageName ContainerAppUsageName(string value = null, string localizedValue = null) { throw null; }\n public static Azure.ResourceManager.AppContainers.Models.ContainerAppWorkloadProfileState ContainerAppWorkloadProfileState(Azure.Core.ResourceIdentifier id = null, string name = null, Azure.Core.ResourceType resourceType = default(Azure.Core.ResourceType), Azure.ResourceManager.Models.SystemData systemData = null, Azure.ResourceManager.AppContainers.Models.ContainerAppWorkloadProfileStateProperties properties = null) { throw null; }\n public static Azure.ResourceManager.AppContainers.Models.ContainerExecutionStatus ContainerExecutionStatus(string name = null, int? code = default(int?), string additionalInformation = null, string status = null) { throw null; }\n public static Azure.ResourceManager.AppContainers.DaprComponentResiliencyPolicyData DaprComponentResiliencyPolicyData(Azure.Core.ResourceIdentifier id = null, string name = null, Azure.Core.ResourceType resourceType = default(Azure.Core.ResourceType), Azure.ResourceManager.Models.SystemData systemData = null, Azure.ResourceManager.AppContainers.Models.DaprComponentResiliencyPolicyConfiguration inboundPolicy = null, Azure.ResourceManager.AppContainers.Models.DaprComponentResiliencyPolicyConfiguration outboundPolicy = null) { throw null; }\n public static Azure.ResourceManager.AppContainers.DaprSubscriptionData DaprSubscriptionData(Azure.Core.ResourceIdentifier id = null, string name = null, Azure.Core.ResourceType resourceType = default(Azure.Core.ResourceType), Azure.ResourceManager.Models.SystemData systemData = null, string pubsubName = null, string topic = null, string deadLetterTopic = null, Azure.ResourceManager.AppContainers.Models.DaprSubscriptionRoutes routes = null, System.Collections.Generic.IEnumerable scopes = null, System.Collections.Generic.IDictionary metadata = null, Azure.ResourceManager.AppContainers.Models.DaprSubscriptionBulkSubscribeConfig bulkSubscribe = null) { throw null; }\n public static Azure.ResourceManager.AppContainers.DotNetComponentData DotNetComponentData(Azure.Core.ResourceIdentifier id = null, string name = null, Azure.Core.ResourceType resourceType = default(Azure.Core.ResourceType), Azure.ResourceManager.Models.SystemData systemData = null, Azure.ResourceManager.AppContainers.Models.DotNetComponentType? componentType = default(Azure.ResourceManager.AppContainers.Models.DotNetComponentType?), Azure.ResourceManager.AppContainers.Models.DotNetComponentProvisioningState? provisioningState = default(Azure.ResourceManager.AppContainers.Models.DotNetComponentProvisioningState?), System.Collections.Generic.IEnumerable configurations = null, System.Collections.Generic.IEnumerable serviceBinds = null) { throw null; }\n public static Azure.ResourceManager.AppContainers.HttpRouteConfigData HttpRouteConfigData(Azure.Core.ResourceIdentifier id = null, string name = null, Azure.Core.ResourceType resourceType = default(Azure.Core.ResourceType), Azure.ResourceManager.Models.SystemData systemData = null, Azure.ResourceManager.AppContainers.Models.HttpRouteConfigProperties properties = null) { throw null; }\n public static Azure.ResourceManager.AppContainers.Models.HttpRouteConfigProperties HttpRouteConfigProperties(Azure.ResourceManager.AppContainers.Models.HttpRouteProvisioningState? provisioningState = default(Azure.ResourceManager.AppContainers.Models.HttpRouteProvisioningState?), System.Collections.Generic.IEnumerable provisioningErrors = null, string fqdn = null, System.Collections.Generic.IEnumerable customDomains = null, System.Collections.Generic.IEnumerable rules = null) { throw null; }\n public static Azure.ResourceManager.AppContainers.Models.HttpRouteProvisioningErrors HttpRouteProvisioningErrors(System.DateTimeOffset? timestamp = default(System.DateTimeOffset?), string message = null) { throw null; }\n public static Azure.ResourceManager.AppContainers.JavaComponentData JavaComponentData(Azure.Core.ResourceIdentifier id = null, string name = null, Azure.Core.ResourceType resourceType = default(Azure.Core.ResourceType), Azure.ResourceManager.Models.SystemData systemData = null, Azure.ResourceManager.AppContainers.Models.JavaComponentProperties properties = null) { throw null; }\n public static Azure.ResourceManager.AppContainers.Models.JavaComponentProperties JavaComponentProperties(string componentType = null, Azure.ResourceManager.AppContainers.Models.JavaComponentProvisioningState? provisioningState = default(Azure.ResourceManager.AppContainers.Models.JavaComponentProvisioningState?), System.Collections.Generic.IEnumerable configurations = null, Azure.ResourceManager.AppContainers.Models.JavaComponentPropertiesScale scale = null, System.Collections.Generic.IEnumerable serviceBinds = null) { throw null; }\n public static Azure.ResourceManager.AppContainers.LabelHistoryData LabelHistoryData(Azure.Core.ResourceIdentifier id = null, string name = null, Azure.Core.ResourceType resourceType = default(Azure.Core.ResourceType), Azure.ResourceManager.Models.SystemData systemData = null, System.Collections.Generic.IEnumerable labelHistoryRecords = null) { throw null; }\n public static Azure.ResourceManager.AppContainers.Models.LabelHistoryRecordItem LabelHistoryRecordItem(string revision = null, System.DateTimeOffset? start = default(System.DateTimeOffset?), System.DateTimeOffset? stop = default(System.DateTimeOffset?), Azure.ResourceManager.AppContainers.Models.Status? status = default(Azure.ResourceManager.AppContainers.Models.Status?)) { throw null; }\n public static Azure.ResourceManager.AppContainers.LogicAppData LogicAppData(Azure.Core.ResourceIdentifier id = null, string name = null, Azure.Core.ResourceType resourceType = default(Azure.Core.ResourceType), Azure.ResourceManager.Models.SystemData systemData = null, System.BinaryData properties = null) { throw null; }\n public static Azure.ResourceManager.AppContainers.MaintenanceConfigurationResourceData MaintenanceConfigurationResourceData(Azure.Core.ResourceIdentifier id = null, string name = null, Azure.Core.ResourceType resourceType = default(Azure.Core.ResourceType), Azure.ResourceManager.Models.SystemData systemData = null, System.Collections.Generic.IEnumerable scheduledEntries = null) { throw null; }\n public static Azure.ResourceManager.AppContainers.Models.ManagedCertificateProperties ManagedCertificateProperties(Azure.ResourceManager.AppContainers.Models.ContainerAppCertificateProvisioningState? provisioningState = default(Azure.ResourceManager.AppContainers.Models.ContainerAppCertificateProvisioningState?), string subjectName = null, string error = null, Azure.ResourceManager.AppContainers.Models.ManagedCertificateDomainControlValidation? domainControlValidation = default(Azure.ResourceManager.AppContainers.Models.ManagedCertificateDomainControlValidation?), string validationToken = null) { throw null; }\n public static Azure.ResourceManager.AppContainers.Models.NacosComponent NacosComponent(Azure.ResourceManager.AppContainers.Models.JavaComponentProvisioningState? provisioningState = default(Azure.ResourceManager.AppContainers.Models.JavaComponentProvisioningState?), System.Collections.Generic.IEnumerable configurations = null, Azure.ResourceManager.AppContainers.Models.JavaComponentPropertiesScale scale = null, System.Collections.Generic.IEnumerable serviceBinds = null, string ingressFqdn = null) { throw null; }\n public static Azure.ResourceManager.AppContainers.Models.PatchDetails PatchDetails(string targetContainerName = null, string targetImage = null, System.DateTimeOffset lastDetectionOn = default(System.DateTimeOffset), Azure.ResourceManager.AppContainers.Models.DetectionStatus detectionStatus = default(Azure.ResourceManager.AppContainers.Models.DetectionStatus), string newImageName = null, Azure.ResourceManager.AppContainers.Models.PatchDetailsNewLayer newLayer = null, Azure.ResourceManager.AppContainers.Models.PatchDetailsOldLayer oldLayer = null, Azure.ResourceManager.AppContainers.Models.PatchType? patchType = default(Azure.ResourceManager.AppContainers.Models.PatchType?)) { throw null; }\n public static Azure.ResourceManager.AppContainers.Models.PatchDetailsNewLayer PatchDetailsNewLayer(string name = null, string frameworkAndVersion = null, string osAndVersion = null) { throw null; }\n public static Azure.ResourceManager.AppContainers.Models.PatchDetailsOldLayer PatchDetailsOldLayer(string name = null, string frameworkAndVersion = null, string osAndVersion = null) { throw null; }\n public static Azure.ResourceManager.AppContainers.Models.PatchProperties PatchProperties(Azure.Core.ResourceIdentifier targetEnvironmentId = null, Azure.Core.ResourceIdentifier targetContainerAppId = null, Azure.Core.ResourceIdentifier targetRevisionId = null, Azure.ResourceManager.AppContainers.Models.PatchApplyStatus? patchApplyStatus = default(Azure.ResourceManager.AppContainers.Models.PatchApplyStatus?), System.DateTimeOffset? createdOn = default(System.DateTimeOffset?), System.DateTimeOffset? lastModifiedOn = default(System.DateTimeOffset?), System.Collections.Generic.IEnumerable patchDetails = null) { throw null; }\n public static Azure.ResourceManager.AppContainers.Models.ReplicaExecutionStatus ReplicaExecutionStatus(string name = null, System.Collections.Generic.IEnumerable containers = null) { throw null; }\n public static Azure.ResourceManager.AppContainers.SessionPoolData SessionPoolData(Azure.Core.ResourceIdentifier id = null, string name = null, Azure.Core.ResourceType resourceType = default(Azure.Core.ResourceType), Azure.ResourceManager.Models.SystemData systemData = null, System.Collections.Generic.IDictionary tags = null, Azure.Core.AzureLocation location = default(Azure.Core.AzureLocation), Azure.ResourceManager.Models.ManagedServiceIdentity identity = null, Azure.Core.ResourceIdentifier environmentId = null, Azure.ResourceManager.AppContainers.Models.ContainerType? containerType = default(Azure.ResourceManager.AppContainers.Models.ContainerType?), Azure.ResourceManager.AppContainers.Models.PoolManagementType? poolManagementType = default(Azure.ResourceManager.AppContainers.Models.PoolManagementType?), int? nodeCount = default(int?), Azure.ResourceManager.AppContainers.Models.SessionPoolScaleConfiguration scaleConfiguration = null, System.Collections.Generic.IEnumerable secrets = null, Azure.ResourceManager.AppContainers.Models.SessionPoolLifecycleConfiguration dynamicPoolLifecycleConfiguration = null, Azure.ResourceManager.AppContainers.Models.CustomContainerTemplate customContainerTemplate = null, Azure.ResourceManager.AppContainers.Models.SessionNetworkStatus? sessionNetworkStatus = default(Azure.ResourceManager.AppContainers.Models.SessionNetworkStatus?), System.Uri poolManagementEndpoint = null, Azure.ResourceManager.AppContainers.Models.SessionPoolProvisioningState? provisioningState = default(Azure.ResourceManager.AppContainers.Models.SessionPoolProvisioningState?), System.Collections.Generic.IEnumerable managedIdentitySettings = null) { throw null; }\n public static Azure.ResourceManager.AppContainers.Models.SpringBootAdminComponent SpringBootAdminComponent(Azure.ResourceManager.AppContainers.Models.JavaComponentProvisioningState? provisioningState = default(Azure.ResourceManager.AppContainers.Models.JavaComponentProvisioningState?), System.Collections.Generic.IEnumerable configurations = null, Azure.ResourceManager.AppContainers.Models.JavaComponentPropertiesScale scale = null, System.Collections.Generic.IEnumerable serviceBinds = null, string ingressFqdn = null) { throw null; }\n public static Azure.ResourceManager.AppContainers.Models.SpringCloudConfigComponent SpringCloudConfigComponent(Azure.ResourceManager.AppContainers.Models.JavaComponentProvisioningState? provisioningState = default(Azure.ResourceManager.AppContainers.Models.JavaComponentProvisioningState?), System.Collections.Generic.IEnumerable configurations = null, Azure.ResourceManager.AppContainers.Models.JavaComponentPropertiesScale scale = null, System.Collections.Generic.IEnumerable serviceBinds = null) { throw null; }\n public static Azure.ResourceManager.AppContainers.Models.SpringCloudEurekaComponent SpringCloudEurekaComponent(Azure.ResourceManager.AppContainers.Models.JavaComponentProvisioningState? provisioningState = default(Azure.ResourceManager.AppContainers.Models.JavaComponentProvisioningState?), System.Collections.Generic.IEnumerable configurations = null, Azure.ResourceManager.AppContainers.Models.JavaComponentPropertiesScale scale = null, System.Collections.Generic.IEnumerable serviceBinds = null, string ingressFqdn = null) { throw null; }\n public static Azure.ResourceManager.AppContainers.Models.SpringCloudGatewayComponent SpringCloudGatewayComponent(Azure.ResourceManager.AppContainers.Models.JavaComponentProvisioningState? provisioningState = default(Azure.ResourceManager.AppContainers.Models.JavaComponentProvisioningState?), System.Collections.Generic.IEnumerable configurations = null, Azure.ResourceManager.AppContainers.Models.JavaComponentPropertiesScale scale = null, System.Collections.Generic.IEnumerable serviceBinds = null, string ingressFqdn = null, System.Collections.Generic.IEnumerable springCloudGatewayRoutes = null) { throw null; }\n public static Azure.ResourceManager.AppContainers.WorkflowEnvelopeData WorkflowEnvelopeData(Azure.Core.ResourceIdentifier id = null, string name = null, Azure.Core.ResourceType resourceType = default(Azure.Core.ResourceType), Azure.ResourceManager.Models.SystemData systemData = null, string kind = null, Azure.Core.AzureLocation? location = default(Azure.Core.AzureLocation?), Azure.ResourceManager.AppContainers.Models.WorkflowEnvelopeProperties properties = null) { throw null; }\n public static Azure.ResourceManager.AppContainers.Models.WorkflowEnvelopeProperties WorkflowEnvelopeProperties(System.BinaryData files = null, Azure.ResourceManager.AppContainers.Models.WorkflowState? flowState = default(Azure.ResourceManager.AppContainers.Models.WorkflowState?), Azure.ResourceManager.AppContainers.Models.WorkflowHealth health = null) { throw null; }\n public static Azure.ResourceManager.AppContainers.Models.WorkflowHealth WorkflowHealth(Azure.ResourceManager.AppContainers.Models.WorkflowHealthState state = Azure.ResourceManager.AppContainers.Models.WorkflowHealthState.Unknown, Azure.ResponseError error = null) { throw null; }\n }\n public partial class BlobStorageTokenStore : System.ClientModel.Primitives.IJsonModel, System.ClientModel.Primitives.IPersistableModel\n {\n public BlobStorageTokenStore() { }\n public System.Uri BlobContainerUri { get { throw null; } set { } }\n public string ClientId { get { throw null; } set { } }\n public string ManagedIdentityResourceId { get { throw null; } set { } }\n public string SasUrlSettingName { get { throw null; } set { } }\n protected virtual void JsonModelWriteCore(System.Text.Json.Utf8JsonWriter writer, System.ClientModel.Primitives.ModelReaderWriterOptions options) { }\n Azure.ResourceManager.AppContainers.Models.BlobStorageTokenStore System.ClientModel.Primitives.IJsonModel.Create(ref System.Text.Json.Utf8JsonReader reader, System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n void System.ClientModel.Primitives.IJsonModel.Write(System.Text.Json.Utf8JsonWriter writer, System.ClientModel.Primitives.ModelReaderWriterOptions options) { }\n Azure.ResourceManager.AppContainers.Models.BlobStorageTokenStore System.ClientModel.Primitives.IPersistableModel.Create(System.BinaryData data, System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n string System.ClientModel.Primitives.IPersistableModel.GetFormatFromOptions(System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n System.BinaryData System.ClientModel.Primitives.IPersistableModel.Write(System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n }\n public partial class BuildConfiguration : System.ClientModel.Primitives.IJsonModel, System.ClientModel.Primitives.IPersistableModel\n {\n public BuildConfiguration() { }\n public string BaseOS { get { throw null; } set { } }\n public System.Collections.Generic.IList EnvironmentVariables { get { throw null; } }\n public string Platform { get { throw null; } set { } }\n public string PlatformVersion { get { throw null; } set { } }\n public System.Collections.Generic.IList PreBuildSteps { get { throw null; } }\n protected virtual void JsonModelWriteCore(System.Text.Json.Utf8JsonWriter writer, System.ClientModel.Primitives.ModelReaderWriterOptions options) { }\n Azure.ResourceManager.AppContainers.Models.BuildConfiguration System.ClientModel.Primitives.IJsonModel.Create(ref System.Text.Json.Utf8JsonReader reader, System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n void System.ClientModel.Primitives.IJsonModel.Write(System.Text.Json.Utf8JsonWriter writer, System.ClientModel.Primitives.ModelReaderWriterOptions options) { }\n Azure.ResourceManager.AppContainers.Models.BuildConfiguration System.ClientModel.Primitives.IPersistableModel.Create(System.BinaryData data, System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n string System.ClientModel.Primitives.IPersistableModel.GetFormatFromOptions(System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n System.BinaryData System.ClientModel.Primitives.IPersistableModel.Write(System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n }\n [System.Runtime.InteropServices.StructLayoutAttribute(System.Runtime.InteropServices.LayoutKind.Sequential)]\n public readonly partial struct ContainerAppAccessMode : System.IEquatable\n public readonly partial struct BuilderProvisioningState : System.IEquatable", + "repo_full_name": "Azure/azure-sdk-for-net", + "discussion_comments": [ + { + "comment_id": "2104120779", + "repo_full_name": "Azure/azure-sdk-for-net", + "pr_number": 50236, + "pr_file": "sdk/containerapps/Azure.ResourceManager.AppContainers/api/Azure.ResourceManager.AppContainers.net8.0.cs", + "discussion_id": "2104120779", + "commented_code": "@@ -1465,66 +2391,301 @@ public static partial class ArmAppContainersModelFactory\n public static Azure.ResourceManager.AppContainers.ContainerAppManagedEnvironmentStorageData ContainerAppManagedEnvironmentStorageData(Azure.Core.ResourceIdentifier id, string name, Azure.Core.ResourceType resourceType, Azure.ResourceManager.Models.SystemData systemData, Azure.ResourceManager.AppContainers.Models.ContainerAppAzureFileProperties managedEnvironmentStorageAzureFile = null) { throw null; }\n public static Azure.ResourceManager.AppContainers.ContainerAppManagedEnvironmentStorageData ContainerAppManagedEnvironmentStorageData(Azure.Core.ResourceIdentifier id = null, string name = null, Azure.Core.ResourceType resourceType = default(Azure.Core.ResourceType), Azure.ResourceManager.Models.SystemData systemData = null, Azure.ResourceManager.AppContainers.Models.ManagedEnvironmentStorageProperties properties = null) { throw null; }\n public static Azure.ResourceManager.AppContainers.Models.ContainerAppNameAvailabilityResult ContainerAppNameAvailabilityResult(bool? isNameAvailable = default(bool?), Azure.ResourceManager.AppContainers.Models.ContainerAppNameUnavailableReason? reason = default(Azure.ResourceManager.AppContainers.Models.ContainerAppNameUnavailableReason?), string message = null) { throw null; }\n- public static Azure.ResourceManager.AppContainers.Models.ContainerAppReplicaContainer ContainerAppReplicaContainer(string name = null, string containerId = null, bool? isReady = default(bool?), bool? isStarted = default(bool?), int? restartCount = default(int?), Azure.ResourceManager.AppContainers.Models.ContainerAppContainerRunningState? runningState = default(Azure.ResourceManager.AppContainers.Models.ContainerAppContainerRunningState?), string runningStateDetails = null, string logStreamEndpoint = null, string execEndpoint = null) { throw null; }\n+ [System.ComponentModel.EditorBrowsableAttribute(System.ComponentModel.EditorBrowsableState.Never)]\n+ public static Azure.ResourceManager.AppContainers.Models.ContainerAppReplicaContainer ContainerAppReplicaContainer(string name, string containerId, bool? isReady, bool? isStarted, int? restartCount, Azure.ResourceManager.AppContainers.Models.ContainerAppContainerRunningState? runningState, string runningStateDetails, string logStreamEndpoint, string execEndpoint) { throw null; }\n+ public static Azure.ResourceManager.AppContainers.Models.ContainerAppReplicaContainer ContainerAppReplicaContainer(string name = null, string containerId = null, bool? isReady = default(bool?), bool? isStarted = default(bool?), int? restartCount = default(int?), Azure.ResourceManager.AppContainers.Models.ContainerAppContainerRunningState? runningState = default(Azure.ResourceManager.AppContainers.Models.ContainerAppContainerRunningState?), string runningStateDetails = null, string logStreamEndpoint = null, string execEndpoint = null, string debugEndpoint = null) { throw null; }\n public static Azure.ResourceManager.AppContainers.ContainerAppReplicaData ContainerAppReplicaData(Azure.Core.ResourceIdentifier id = null, string name = null, Azure.Core.ResourceType resourceType = default(Azure.Core.ResourceType), Azure.ResourceManager.Models.SystemData systemData = null, System.DateTimeOffset? createdOn = default(System.DateTimeOffset?), Azure.ResourceManager.AppContainers.Models.ContainerAppReplicaRunningState? runningState = default(Azure.ResourceManager.AppContainers.Models.ContainerAppReplicaRunningState?), string runningStateDetails = null, System.Collections.Generic.IEnumerable containers = null, System.Collections.Generic.IEnumerable initContainers = null) { throw null; }\n- public static Azure.ResourceManager.AppContainers.ContainerAppRevisionData ContainerAppRevisionData(Azure.Core.ResourceIdentifier id = null, string name = null, Azure.Core.ResourceType resourceType = default(Azure.Core.ResourceType), Azure.ResourceManager.Models.SystemData systemData = null, System.DateTimeOffset? createdOn = default(System.DateTimeOffset?), System.DateTimeOffset? lastActiveOn = default(System.DateTimeOffset?), string fqdn = null, Azure.ResourceManager.AppContainers.Models.ContainerAppTemplate template = null, bool? isActive = default(bool?), int? replicas = default(int?), int? trafficWeight = default(int?), string provisioningError = null, Azure.ResourceManager.AppContainers.Models.ContainerAppRevisionHealthState? healthState = default(Azure.ResourceManager.AppContainers.Models.ContainerAppRevisionHealthState?), Azure.ResourceManager.AppContainers.Models.ContainerAppRevisionProvisioningState? provisioningState = default(Azure.ResourceManager.AppContainers.Models.ContainerAppRevisionProvisioningState?), Azure.ResourceManager.AppContainers.Models.RevisionRunningState? runningState = default(Azure.ResourceManager.AppContainers.Models.RevisionRunningState?)) { throw null; }\n+ public static Azure.ResourceManager.AppContainers.ContainerAppRevisionData ContainerAppRevisionData(Azure.Core.ResourceIdentifier id = null, string name = null, Azure.Core.ResourceType resourceType = default(Azure.Core.ResourceType), Azure.ResourceManager.Models.SystemData systemData = null, System.DateTimeOffset? createdOn = default(System.DateTimeOffset?), System.DateTimeOffset? lastActiveOn = default(System.DateTimeOffset?), string fqdn = null, Azure.ResourceManager.AppContainers.Models.ContainerAppTemplate template = null, bool? isActive = default(bool?), int? replicas = default(int?), int? trafficWeight = default(int?), System.Collections.Generic.IEnumerable labels = null, string provisioningError = null, Azure.ResourceManager.AppContainers.Models.ContainerAppRevisionHealthState? healthState = default(Azure.ResourceManager.AppContainers.Models.ContainerAppRevisionHealthState?), Azure.ResourceManager.AppContainers.Models.ContainerAppRevisionProvisioningState? provisioningState = default(Azure.ResourceManager.AppContainers.Models.ContainerAppRevisionProvisioningState?), Azure.ResourceManager.AppContainers.Models.RevisionRunningState? runningState = default(Azure.ResourceManager.AppContainers.Models.RevisionRunningState?)) { throw null; }\n+ [System.ComponentModel.EditorBrowsableAttribute(System.ComponentModel.EditorBrowsableState.Never)]\n+ public static Azure.ResourceManager.AppContainers.ContainerAppRevisionData ContainerAppRevisionData(Azure.Core.ResourceIdentifier id, string name, Azure.Core.ResourceType resourceType, Azure.ResourceManager.Models.SystemData systemData, System.DateTimeOffset? createdOn, System.DateTimeOffset? lastActiveOn, string fqdn, Azure.ResourceManager.AppContainers.Models.ContainerAppTemplate template, bool? isActive, int? replicas, int? trafficWeight, string provisioningError, Azure.ResourceManager.AppContainers.Models.ContainerAppRevisionHealthState? healthState, Azure.ResourceManager.AppContainers.Models.ContainerAppRevisionProvisioningState? provisioningState, Azure.ResourceManager.AppContainers.Models.RevisionRunningState? runningState) { throw null; }\n+ public static Azure.ResourceManager.AppContainers.Models.ContainerAppsBuildConfiguration ContainerAppsBuildConfiguration(string baseOS = null, string platform = null, string platformVersion = null, System.Collections.Generic.IEnumerable environmentVariables = null, System.Collections.Generic.IEnumerable preBuildSteps = null) { throw null; }\n+ public static Azure.ResourceManager.AppContainers.ContainerAppsBuildResourceData ContainerAppsBuildResourceData(Azure.Core.ResourceIdentifier id = null, string name = null, Azure.Core.ResourceType resourceType = default(Azure.Core.ResourceType), Azure.ResourceManager.Models.SystemData systemData = null, Azure.ResourceManager.AppContainers.Models.BuildProvisioningState? provisioningState = default(Azure.ResourceManager.AppContainers.Models.BuildProvisioningState?), Azure.ResourceManager.AppContainers.Models.BuildStatus? buildStatus = default(Azure.ResourceManager.AppContainers.Models.BuildStatus?), Azure.ResourceManager.AppContainers.Models.ContainerRegistryWithCustomImage destinationContainerRegistry = null, Azure.ResourceManager.AppContainers.Models.ContainerAppsBuildConfiguration configuration = null, string logStreamEndpoint = null) { throw null; }\n public static Azure.ResourceManager.AppContainers.Models.ContainerAppSecret ContainerAppSecret(string name = null, string value = null, string identity = null, System.Uri keyVaultUri = null) { throw null; }\n public static Azure.ResourceManager.AppContainers.ContainerAppSourceControlData ContainerAppSourceControlData(Azure.Core.ResourceIdentifier id = null, string name = null, Azure.Core.ResourceType resourceType = default(Azure.Core.ResourceType), Azure.ResourceManager.Models.SystemData systemData = null, Azure.ResourceManager.AppContainers.Models.ContainerAppSourceControlOperationState? operationState = default(Azure.ResourceManager.AppContainers.Models.ContainerAppSourceControlOperationState?), System.Uri repoUri = null, string branch = null, Azure.ResourceManager.AppContainers.Models.ContainerAppGitHubActionConfiguration gitHubActionConfiguration = null) { throw null; }\n+ public static Azure.ResourceManager.AppContainers.ContainerAppsPatchResourceData ContainerAppsPatchResourceData(Azure.Core.ResourceIdentifier id = null, string name = null, Azure.Core.ResourceType resourceType = default(Azure.Core.ResourceType), Azure.ResourceManager.Models.SystemData systemData = null, Azure.ResourceManager.AppContainers.Models.PatchProperties properties = null) { throw null; }\n public static Azure.ResourceManager.AppContainers.Models.ContainerAppUsage ContainerAppUsage(Azure.ResourceManager.AppContainers.Models.ContainerAppUsageUnit unit = default(Azure.ResourceManager.AppContainers.Models.ContainerAppUsageUnit), float currentValue = 0f, float limit = 0f, Azure.ResourceManager.AppContainers.Models.ContainerAppUsageName name = null) { throw null; }\n public static Azure.ResourceManager.AppContainers.Models.ContainerAppUsageName ContainerAppUsageName(string value = null, string localizedValue = null) { throw null; }\n public static Azure.ResourceManager.AppContainers.Models.ContainerAppWorkloadProfileState ContainerAppWorkloadProfileState(Azure.Core.ResourceIdentifier id = null, string name = null, Azure.Core.ResourceType resourceType = default(Azure.Core.ResourceType), Azure.ResourceManager.Models.SystemData systemData = null, Azure.ResourceManager.AppContainers.Models.ContainerAppWorkloadProfileStateProperties properties = null) { throw null; }\n+ public static Azure.ResourceManager.AppContainers.Models.ContainerExecutionStatus ContainerExecutionStatus(string name = null, int? code = default(int?), string additionalInformation = null, string status = null) { throw null; }\n+ public static Azure.ResourceManager.AppContainers.DaprComponentResiliencyPolicyData DaprComponentResiliencyPolicyData(Azure.Core.ResourceIdentifier id = null, string name = null, Azure.Core.ResourceType resourceType = default(Azure.Core.ResourceType), Azure.ResourceManager.Models.SystemData systemData = null, Azure.ResourceManager.AppContainers.Models.DaprComponentResiliencyPolicyConfiguration inboundPolicy = null, Azure.ResourceManager.AppContainers.Models.DaprComponentResiliencyPolicyConfiguration outboundPolicy = null) { throw null; }\n+ public static Azure.ResourceManager.AppContainers.DaprSubscriptionData DaprSubscriptionData(Azure.Core.ResourceIdentifier id = null, string name = null, Azure.Core.ResourceType resourceType = default(Azure.Core.ResourceType), Azure.ResourceManager.Models.SystemData systemData = null, string pubsubName = null, string topic = null, string deadLetterTopic = null, Azure.ResourceManager.AppContainers.Models.DaprSubscriptionRoutes routes = null, System.Collections.Generic.IEnumerable scopes = null, System.Collections.Generic.IDictionary metadata = null, Azure.ResourceManager.AppContainers.Models.DaprSubscriptionBulkSubscribeConfig bulkSubscribe = null) { throw null; }\n+ public static Azure.ResourceManager.AppContainers.DotNetComponentData DotNetComponentData(Azure.Core.ResourceIdentifier id = null, string name = null, Azure.Core.ResourceType resourceType = default(Azure.Core.ResourceType), Azure.ResourceManager.Models.SystemData systemData = null, Azure.ResourceManager.AppContainers.Models.DotNetComponentType? componentType = default(Azure.ResourceManager.AppContainers.Models.DotNetComponentType?), Azure.ResourceManager.AppContainers.Models.DotNetComponentProvisioningState? provisioningState = default(Azure.ResourceManager.AppContainers.Models.DotNetComponentProvisioningState?), System.Collections.Generic.IEnumerable configurations = null, System.Collections.Generic.IEnumerable serviceBinds = null) { throw null; }\n+ public static Azure.ResourceManager.AppContainers.HttpRouteConfigData HttpRouteConfigData(Azure.Core.ResourceIdentifier id = null, string name = null, Azure.Core.ResourceType resourceType = default(Azure.Core.ResourceType), Azure.ResourceManager.Models.SystemData systemData = null, Azure.ResourceManager.AppContainers.Models.HttpRouteConfigProperties properties = null) { throw null; }\n+ public static Azure.ResourceManager.AppContainers.Models.HttpRouteConfigProperties HttpRouteConfigProperties(Azure.ResourceManager.AppContainers.Models.HttpRouteProvisioningState? provisioningState = default(Azure.ResourceManager.AppContainers.Models.HttpRouteProvisioningState?), System.Collections.Generic.IEnumerable provisioningErrors = null, string fqdn = null, System.Collections.Generic.IEnumerable customDomains = null, System.Collections.Generic.IEnumerable rules = null) { throw null; }\n+ public static Azure.ResourceManager.AppContainers.Models.HttpRouteProvisioningErrors HttpRouteProvisioningErrors(System.DateTimeOffset? timestamp = default(System.DateTimeOffset?), string message = null) { throw null; }\n public static Azure.ResourceManager.AppContainers.JavaComponentData JavaComponentData(Azure.Core.ResourceIdentifier id = null, string name = null, Azure.Core.ResourceType resourceType = default(Azure.Core.ResourceType), Azure.ResourceManager.Models.SystemData systemData = null, Azure.ResourceManager.AppContainers.Models.JavaComponentProperties properties = null) { throw null; }\n public static Azure.ResourceManager.AppContainers.Models.JavaComponentProperties JavaComponentProperties(string componentType = null, Azure.ResourceManager.AppContainers.Models.JavaComponentProvisioningState? provisioningState = default(Azure.ResourceManager.AppContainers.Models.JavaComponentProvisioningState?), System.Collections.Generic.IEnumerable configurations = null, Azure.ResourceManager.AppContainers.Models.JavaComponentPropertiesScale scale = null, System.Collections.Generic.IEnumerable serviceBinds = null) { throw null; }\n+ public static Azure.ResourceManager.AppContainers.LabelHistoryData LabelHistoryData(Azure.Core.ResourceIdentifier id = null, string name = null, Azure.Core.ResourceType resourceType = default(Azure.Core.ResourceType), Azure.ResourceManager.Models.SystemData systemData = null, System.Collections.Generic.IEnumerable labelHistoryRecords = null) { throw null; }\n+ public static Azure.ResourceManager.AppContainers.Models.LabelHistoryRecordItem LabelHistoryRecordItem(string revision = null, System.DateTimeOffset? start = default(System.DateTimeOffset?), System.DateTimeOffset? stop = default(System.DateTimeOffset?), Azure.ResourceManager.AppContainers.Models.Status? status = default(Azure.ResourceManager.AppContainers.Models.Status?)) { throw null; }\n+ public static Azure.ResourceManager.AppContainers.LogicAppData LogicAppData(Azure.Core.ResourceIdentifier id = null, string name = null, Azure.Core.ResourceType resourceType = default(Azure.Core.ResourceType), Azure.ResourceManager.Models.SystemData systemData = null, System.BinaryData properties = null) { throw null; }\n+ public static Azure.ResourceManager.AppContainers.MaintenanceConfigurationResourceData MaintenanceConfigurationResourceData(Azure.Core.ResourceIdentifier id = null, string name = null, Azure.Core.ResourceType resourceType = default(Azure.Core.ResourceType), Azure.ResourceManager.Models.SystemData systemData = null, System.Collections.Generic.IEnumerable scheduledEntries = null) { throw null; }\n public static Azure.ResourceManager.AppContainers.Models.ManagedCertificateProperties ManagedCertificateProperties(Azure.ResourceManager.AppContainers.Models.ContainerAppCertificateProvisioningState? provisioningState = default(Azure.ResourceManager.AppContainers.Models.ContainerAppCertificateProvisioningState?), string subjectName = null, string error = null, Azure.ResourceManager.AppContainers.Models.ManagedCertificateDomainControlValidation? domainControlValidation = default(Azure.ResourceManager.AppContainers.Models.ManagedCertificateDomainControlValidation?), string validationToken = null) { throw null; }\n+ public static Azure.ResourceManager.AppContainers.Models.NacosComponent NacosComponent(Azure.ResourceManager.AppContainers.Models.JavaComponentProvisioningState? provisioningState = default(Azure.ResourceManager.AppContainers.Models.JavaComponentProvisioningState?), System.Collections.Generic.IEnumerable configurations = null, Azure.ResourceManager.AppContainers.Models.JavaComponentPropertiesScale scale = null, System.Collections.Generic.IEnumerable serviceBinds = null, string ingressFqdn = null) { throw null; }\n+ public static Azure.ResourceManager.AppContainers.Models.PatchDetails PatchDetails(string targetContainerName = null, string targetImage = null, System.DateTimeOffset lastDetectionOn = default(System.DateTimeOffset), Azure.ResourceManager.AppContainers.Models.DetectionStatus detectionStatus = default(Azure.ResourceManager.AppContainers.Models.DetectionStatus), string newImageName = null, Azure.ResourceManager.AppContainers.Models.PatchDetailsNewLayer newLayer = null, Azure.ResourceManager.AppContainers.Models.PatchDetailsOldLayer oldLayer = null, Azure.ResourceManager.AppContainers.Models.PatchType? patchType = default(Azure.ResourceManager.AppContainers.Models.PatchType?)) { throw null; }\n+ public static Azure.ResourceManager.AppContainers.Models.PatchDetailsNewLayer PatchDetailsNewLayer(string name = null, string frameworkAndVersion = null, string osAndVersion = null) { throw null; }\n+ public static Azure.ResourceManager.AppContainers.Models.PatchDetailsOldLayer PatchDetailsOldLayer(string name = null, string frameworkAndVersion = null, string osAndVersion = null) { throw null; }\n+ public static Azure.ResourceManager.AppContainers.Models.PatchProperties PatchProperties(Azure.Core.ResourceIdentifier targetEnvironmentId = null, Azure.Core.ResourceIdentifier targetContainerAppId = null, Azure.Core.ResourceIdentifier targetRevisionId = null, Azure.ResourceManager.AppContainers.Models.PatchApplyStatus? patchApplyStatus = default(Azure.ResourceManager.AppContainers.Models.PatchApplyStatus?), System.DateTimeOffset? createdOn = default(System.DateTimeOffset?), System.DateTimeOffset? lastModifiedOn = default(System.DateTimeOffset?), System.Collections.Generic.IEnumerable patchDetails = null) { throw null; }\n+ public static Azure.ResourceManager.AppContainers.Models.ReplicaExecutionStatus ReplicaExecutionStatus(string name = null, System.Collections.Generic.IEnumerable containers = null) { throw null; }\n public static Azure.ResourceManager.AppContainers.SessionPoolData SessionPoolData(Azure.Core.ResourceIdentifier id = null, string name = null, Azure.Core.ResourceType resourceType = default(Azure.Core.ResourceType), Azure.ResourceManager.Models.SystemData systemData = null, System.Collections.Generic.IDictionary tags = null, Azure.Core.AzureLocation location = default(Azure.Core.AzureLocation), Azure.ResourceManager.Models.ManagedServiceIdentity identity = null, Azure.Core.ResourceIdentifier environmentId = null, Azure.ResourceManager.AppContainers.Models.ContainerType? containerType = default(Azure.ResourceManager.AppContainers.Models.ContainerType?), Azure.ResourceManager.AppContainers.Models.PoolManagementType? poolManagementType = default(Azure.ResourceManager.AppContainers.Models.PoolManagementType?), int? nodeCount = default(int?), Azure.ResourceManager.AppContainers.Models.SessionPoolScaleConfiguration scaleConfiguration = null, System.Collections.Generic.IEnumerable secrets = null, Azure.ResourceManager.AppContainers.Models.SessionPoolLifecycleConfiguration dynamicPoolLifecycleConfiguration = null, Azure.ResourceManager.AppContainers.Models.CustomContainerTemplate customContainerTemplate = null, Azure.ResourceManager.AppContainers.Models.SessionNetworkStatus? sessionNetworkStatus = default(Azure.ResourceManager.AppContainers.Models.SessionNetworkStatus?), System.Uri poolManagementEndpoint = null, Azure.ResourceManager.AppContainers.Models.SessionPoolProvisioningState? provisioningState = default(Azure.ResourceManager.AppContainers.Models.SessionPoolProvisioningState?), System.Collections.Generic.IEnumerable managedIdentitySettings = null) { throw null; }\n public static Azure.ResourceManager.AppContainers.Models.SpringBootAdminComponent SpringBootAdminComponent(Azure.ResourceManager.AppContainers.Models.JavaComponentProvisioningState? provisioningState = default(Azure.ResourceManager.AppContainers.Models.JavaComponentProvisioningState?), System.Collections.Generic.IEnumerable configurations = null, Azure.ResourceManager.AppContainers.Models.JavaComponentPropertiesScale scale = null, System.Collections.Generic.IEnumerable serviceBinds = null, string ingressFqdn = null) { throw null; }\n public static Azure.ResourceManager.AppContainers.Models.SpringCloudConfigComponent SpringCloudConfigComponent(Azure.ResourceManager.AppContainers.Models.JavaComponentProvisioningState? provisioningState = default(Azure.ResourceManager.AppContainers.Models.JavaComponentProvisioningState?), System.Collections.Generic.IEnumerable configurations = null, Azure.ResourceManager.AppContainers.Models.JavaComponentPropertiesScale scale = null, System.Collections.Generic.IEnumerable serviceBinds = null) { throw null; }\n public static Azure.ResourceManager.AppContainers.Models.SpringCloudEurekaComponent SpringCloudEurekaComponent(Azure.ResourceManager.AppContainers.Models.JavaComponentProvisioningState? provisioningState = default(Azure.ResourceManager.AppContainers.Models.JavaComponentProvisioningState?), System.Collections.Generic.IEnumerable configurations = null, Azure.ResourceManager.AppContainers.Models.JavaComponentPropertiesScale scale = null, System.Collections.Generic.IEnumerable serviceBinds = null, string ingressFqdn = null) { throw null; }\n+ public static Azure.ResourceManager.AppContainers.Models.SpringCloudGatewayComponent SpringCloudGatewayComponent(Azure.ResourceManager.AppContainers.Models.JavaComponentProvisioningState? provisioningState = default(Azure.ResourceManager.AppContainers.Models.JavaComponentProvisioningState?), System.Collections.Generic.IEnumerable configurations = null, Azure.ResourceManager.AppContainers.Models.JavaComponentPropertiesScale scale = null, System.Collections.Generic.IEnumerable serviceBinds = null, string ingressFqdn = null, System.Collections.Generic.IEnumerable springCloudGatewayRoutes = null) { throw null; }\n+ public static Azure.ResourceManager.AppContainers.WorkflowEnvelopeData WorkflowEnvelopeData(Azure.Core.ResourceIdentifier id = null, string name = null, Azure.Core.ResourceType resourceType = default(Azure.Core.ResourceType), Azure.ResourceManager.Models.SystemData systemData = null, string kind = null, Azure.Core.AzureLocation? location = default(Azure.Core.AzureLocation?), Azure.ResourceManager.AppContainers.Models.WorkflowEnvelopeProperties properties = null) { throw null; }\n+ public static Azure.ResourceManager.AppContainers.Models.WorkflowEnvelopeProperties WorkflowEnvelopeProperties(System.BinaryData files = null, Azure.ResourceManager.AppContainers.Models.WorkflowState? flowState = default(Azure.ResourceManager.AppContainers.Models.WorkflowState?), Azure.ResourceManager.AppContainers.Models.WorkflowHealth health = null) { throw null; }\n+ public static Azure.ResourceManager.AppContainers.Models.WorkflowHealth WorkflowHealth(Azure.ResourceManager.AppContainers.Models.WorkflowHealthState state = Azure.ResourceManager.AppContainers.Models.WorkflowHealthState.Unknown, Azure.ResponseError error = null) { throw null; }\n+ }\n+ public partial class BlobStorageTokenStore : System.ClientModel.Primitives.IJsonModel, System.ClientModel.Primitives.IPersistableModel\n+ {\n+ public BlobStorageTokenStore() { }\n+ public System.Uri BlobContainerUri { get { throw null; } set { } }\n+ public string ClientId { get { throw null; } set { } }\n+ public string ManagedIdentityResourceId { get { throw null; } set { } }\n+ public string SasUrlSettingName { get { throw null; } set { } }\n+ protected virtual void JsonModelWriteCore(System.Text.Json.Utf8JsonWriter writer, System.ClientModel.Primitives.ModelReaderWriterOptions options) { }\n+ Azure.ResourceManager.AppContainers.Models.BlobStorageTokenStore System.ClientModel.Primitives.IJsonModel.Create(ref System.Text.Json.Utf8JsonReader reader, System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n+ void System.ClientModel.Primitives.IJsonModel.Write(System.Text.Json.Utf8JsonWriter writer, System.ClientModel.Primitives.ModelReaderWriterOptions options) { }\n+ Azure.ResourceManager.AppContainers.Models.BlobStorageTokenStore System.ClientModel.Primitives.IPersistableModel.Create(System.BinaryData data, System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n+ string System.ClientModel.Primitives.IPersistableModel.GetFormatFromOptions(System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n+ System.BinaryData System.ClientModel.Primitives.IPersistableModel.Write(System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n+ }\n+ public partial class BuildConfiguration : System.ClientModel.Primitives.IJsonModel, System.ClientModel.Primitives.IPersistableModel\n+ {\n+ public BuildConfiguration() { }\n+ public string BaseOS { get { throw null; } set { } }\n+ public System.Collections.Generic.IList EnvironmentVariables { get { throw null; } }\n+ public string Platform { get { throw null; } set { } }\n+ public string PlatformVersion { get { throw null; } set { } }\n+ public System.Collections.Generic.IList PreBuildSteps { get { throw null; } }\n+ protected virtual void JsonModelWriteCore(System.Text.Json.Utf8JsonWriter writer, System.ClientModel.Primitives.ModelReaderWriterOptions options) { }\n+ Azure.ResourceManager.AppContainers.Models.BuildConfiguration System.ClientModel.Primitives.IJsonModel.Create(ref System.Text.Json.Utf8JsonReader reader, System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n+ void System.ClientModel.Primitives.IJsonModel.Write(System.Text.Json.Utf8JsonWriter writer, System.ClientModel.Primitives.ModelReaderWriterOptions options) { }\n+ Azure.ResourceManager.AppContainers.Models.BuildConfiguration System.ClientModel.Primitives.IPersistableModel.Create(System.BinaryData data, System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n+ string System.ClientModel.Primitives.IPersistableModel.GetFormatFromOptions(System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n+ System.BinaryData System.ClientModel.Primitives.IPersistableModel.Write(System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n }\n [System.Runtime.InteropServices.StructLayoutAttribute(System.Runtime.InteropServices.LayoutKind.Sequential)]\n- public readonly partial struct ContainerAppAccessMode : System.IEquatable\n+ public readonly partial struct BuilderProvisioningState : System.IEquatable", + "comment_created_at": "2025-05-23T08:42:50+00:00", + "comment_author": "ArcturusZhang", + "comment_body": "we should rename this to `AppContainersBuilderProvisioningState`", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2104126288", + "pr_number": 50236, + "pr_file": "sdk/containerapps/Azure.ResourceManager.AppContainers/api/Azure.ResourceManager.AppContainers.net8.0.cs", + "created_at": "2025-05-23T08:45:45+00:00", + "commented_code": "string System.ClientModel.Primitives.IPersistableModel.GetFormatFromOptions(System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n System.BinaryData System.ClientModel.Primitives.IPersistableModel.Write(System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n }\n public partial class ContainerExecutionStatus : System.ClientModel.Primitives.IJsonModel, System.ClientModel.Primitives.IPersistableModel\n {\n internal ContainerExecutionStatus() { }\n public string AdditionalInformation { get { throw null; } }\n public int? Code { get { throw null; } }\n public string Name { get { throw null; } }\n public string Status { get { throw null; } }\n protected virtual void JsonModelWriteCore(System.Text.Json.Utf8JsonWriter writer, System.ClientModel.Primitives.ModelReaderWriterOptions options) { }\n Azure.ResourceManager.AppContainers.Models.ContainerExecutionStatus System.ClientModel.Primitives.IJsonModel.Create(ref System.Text.Json.Utf8JsonReader reader, System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n void System.ClientModel.Primitives.IJsonModel.Write(System.Text.Json.Utf8JsonWriter writer, System.ClientModel.Primitives.ModelReaderWriterOptions options) { }\n Azure.ResourceManager.AppContainers.Models.ContainerExecutionStatus System.ClientModel.Primitives.IPersistableModel.Create(System.BinaryData data, System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n string System.ClientModel.Primitives.IPersistableModel.GetFormatFromOptions(System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n System.BinaryData System.ClientModel.Primitives.IPersistableModel.Write(System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n }\n public partial class ContainerRegistry : System.ClientModel.Primitives.IJsonModel, System.ClientModel.Primitives.IPersistableModel", + "repo_full_name": "Azure/azure-sdk-for-net", + "discussion_comments": [ + { + "comment_id": "2104126288", + "repo_full_name": "Azure/azure-sdk-for-net", + "pr_number": 50236, + "pr_file": "sdk/containerapps/Azure.ResourceManager.AppContainers/api/Azure.ResourceManager.AppContainers.net8.0.cs", + "discussion_id": "2104126288", + "commented_code": "@@ -3686,6 +4892,44 @@ protected virtual void JsonModelWriteCore(System.Text.Json.Utf8JsonWriter writer\n string System.ClientModel.Primitives.IPersistableModel.GetFormatFromOptions(System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n System.BinaryData System.ClientModel.Primitives.IPersistableModel.Write(System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n }\n+ public partial class ContainerExecutionStatus : System.ClientModel.Primitives.IJsonModel, System.ClientModel.Primitives.IPersistableModel\n+ {\n+ internal ContainerExecutionStatus() { }\n+ public string AdditionalInformation { get { throw null; } }\n+ public int? Code { get { throw null; } }\n+ public string Name { get { throw null; } }\n+ public string Status { get { throw null; } }\n+ protected virtual void JsonModelWriteCore(System.Text.Json.Utf8JsonWriter writer, System.ClientModel.Primitives.ModelReaderWriterOptions options) { }\n+ Azure.ResourceManager.AppContainers.Models.ContainerExecutionStatus System.ClientModel.Primitives.IJsonModel.Create(ref System.Text.Json.Utf8JsonReader reader, System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n+ void System.ClientModel.Primitives.IJsonModel.Write(System.Text.Json.Utf8JsonWriter writer, System.ClientModel.Primitives.ModelReaderWriterOptions options) { }\n+ Azure.ResourceManager.AppContainers.Models.ContainerExecutionStatus System.ClientModel.Primitives.IPersistableModel.Create(System.BinaryData data, System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n+ string System.ClientModel.Primitives.IPersistableModel.GetFormatFromOptions(System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n+ System.BinaryData System.ClientModel.Primitives.IPersistableModel.Write(System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n+ }\n+ public partial class ContainerRegistry : System.ClientModel.Primitives.IJsonModel, System.ClientModel.Primitives.IPersistableModel", + "comment_created_at": "2025-05-23T08:45:45+00:00", + "comment_author": "ArcturusZhang", + "comment_body": "this name collides with the namespace of another RP, we should rename it.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2104143986", + "pr_number": 50236, + "pr_file": "sdk/containerapps/Azure.ResourceManager.AppContainers/api/Azure.ResourceManager.AppContainers.net8.0.cs", + "created_at": "2025-05-23T08:54:42+00:00", + "commented_code": "string System.ClientModel.Primitives.IPersistableModel.GetFormatFromOptions(System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n System.BinaryData System.ClientModel.Primitives.IPersistableModel.Write(System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n }\n public partial class Header : System.ClientModel.Primitives.IJsonModel, System.ClientModel.Primitives.IPersistableModel\n {\n public Header() { }\n public string Key { get { throw null; } set { } }\n public string Value { get { throw null; } set { } }\n protected virtual void JsonModelWriteCore(System.Text.Json.Utf8JsonWriter writer, System.ClientModel.Primitives.ModelReaderWriterOptions options) { }\n Azure.ResourceManager.AppContainers.Models.Header System.ClientModel.Primitives.IJsonModel.Create(ref System.Text.Json.Utf8JsonReader reader, System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n void System.ClientModel.Primitives.IJsonModel.Write(System.Text.Json.Utf8JsonWriter writer, System.ClientModel.Primitives.ModelReaderWriterOptions options) { }\n Azure.ResourceManager.AppContainers.Models.Header System.ClientModel.Primitives.IPersistableModel.Create(System.BinaryData data, System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n string System.ClientModel.Primitives.IPersistableModel.GetFormatFromOptions(System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n System.BinaryData System.ClientModel.Primitives.IPersistableModel.Write(System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n }\n public partial class HeaderMatch : System.ClientModel.Primitives.IJsonModel, System.ClientModel.Primitives.IPersistableModel\n {\n public HeaderMatch() { }\n public string ExactMatch { get { throw null; } set { } }\n public string Header { get { throw null; } set { } }\n public string PrefixMatch { get { throw null; } set { } }\n public string RegexMatch { get { throw null; } set { } }\n public string SuffixMatch { get { throw null; } set { } }\n protected virtual void JsonModelWriteCore(System.Text.Json.Utf8JsonWriter writer, System.ClientModel.Primitives.ModelReaderWriterOptions options) { }\n Azure.ResourceManager.AppContainers.Models.HeaderMatch System.ClientModel.Primitives.IJsonModel.Create(ref System.Text.Json.Utf8JsonReader reader, System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n void System.ClientModel.Primitives.IJsonModel.Write(System.Text.Json.Utf8JsonWriter writer, System.ClientModel.Primitives.ModelReaderWriterOptions options) { }\n Azure.ResourceManager.AppContainers.Models.HeaderMatch System.ClientModel.Primitives.IPersistableModel.Create(System.BinaryData data, System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n string System.ClientModel.Primitives.IPersistableModel.GetFormatFromOptions(System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n System.BinaryData System.ClientModel.Primitives.IPersistableModel.Write(System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n }\n public partial class HttpConnectionPool : System.ClientModel.Primitives.IJsonModel, System.ClientModel.Primitives.IPersistableModel\n {\n public HttpConnectionPool() { }\n public int? Http1MaxPendingRequests { get { throw null; } set { } }\n public int? Http2MaxRequests { get { throw null; } set { } }\n protected virtual void JsonModelWriteCore(System.Text.Json.Utf8JsonWriter writer, System.ClientModel.Primitives.ModelReaderWriterOptions options) { }\n Azure.ResourceManager.AppContainers.Models.HttpConnectionPool System.ClientModel.Primitives.IJsonModel.Create(ref System.Text.Json.Utf8JsonReader reader, System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n void System.ClientModel.Primitives.IJsonModel.Write(System.Text.Json.Utf8JsonWriter writer, System.ClientModel.Primitives.ModelReaderWriterOptions options) { }\n Azure.ResourceManager.AppContainers.Models.HttpConnectionPool System.ClientModel.Primitives.IPersistableModel.Create(System.BinaryData data, System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n string System.ClientModel.Primitives.IPersistableModel.GetFormatFromOptions(System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n System.BinaryData System.ClientModel.Primitives.IPersistableModel.Write(System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n }\n public partial class HttpGet : System.ClientModel.Primitives.IJsonModel, System.ClientModel.Primitives.IPersistableModel", + "repo_full_name": "Azure/azure-sdk-for-net", + "discussion_comments": [ + { + "comment_id": "2104143986", + "repo_full_name": "Azure/azure-sdk-for-net", + "pr_number": 50236, + "pr_file": "sdk/containerapps/Azure.ResourceManager.AppContainers/api/Azure.ResourceManager.AppContainers.net8.0.cs", + "discussion_id": "2104143986", + "commented_code": "@@ -3742,6 +5242,222 @@ protected virtual void JsonModelWriteCore(System.Text.Json.Utf8JsonWriter writer\n string System.ClientModel.Primitives.IPersistableModel.GetFormatFromOptions(System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n System.BinaryData System.ClientModel.Primitives.IPersistableModel.Write(System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n }\n+ public partial class Header : System.ClientModel.Primitives.IJsonModel, System.ClientModel.Primitives.IPersistableModel\n+ {\n+ public Header() { }\n+ public string Key { get { throw null; } set { } }\n+ public string Value { get { throw null; } set { } }\n+ protected virtual void JsonModelWriteCore(System.Text.Json.Utf8JsonWriter writer, System.ClientModel.Primitives.ModelReaderWriterOptions options) { }\n+ Azure.ResourceManager.AppContainers.Models.Header System.ClientModel.Primitives.IJsonModel.Create(ref System.Text.Json.Utf8JsonReader reader, System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n+ void System.ClientModel.Primitives.IJsonModel.Write(System.Text.Json.Utf8JsonWriter writer, System.ClientModel.Primitives.ModelReaderWriterOptions options) { }\n+ Azure.ResourceManager.AppContainers.Models.Header System.ClientModel.Primitives.IPersistableModel.Create(System.BinaryData data, System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n+ string System.ClientModel.Primitives.IPersistableModel.GetFormatFromOptions(System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n+ System.BinaryData System.ClientModel.Primitives.IPersistableModel.Write(System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n+ }\n+ public partial class HeaderMatch : System.ClientModel.Primitives.IJsonModel, System.ClientModel.Primitives.IPersistableModel\n+ {\n+ public HeaderMatch() { }\n+ public string ExactMatch { get { throw null; } set { } }\n+ public string Header { get { throw null; } set { } }\n+ public string PrefixMatch { get { throw null; } set { } }\n+ public string RegexMatch { get { throw null; } set { } }\n+ public string SuffixMatch { get { throw null; } set { } }\n+ protected virtual void JsonModelWriteCore(System.Text.Json.Utf8JsonWriter writer, System.ClientModel.Primitives.ModelReaderWriterOptions options) { }\n+ Azure.ResourceManager.AppContainers.Models.HeaderMatch System.ClientModel.Primitives.IJsonModel.Create(ref System.Text.Json.Utf8JsonReader reader, System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n+ void System.ClientModel.Primitives.IJsonModel.Write(System.Text.Json.Utf8JsonWriter writer, System.ClientModel.Primitives.ModelReaderWriterOptions options) { }\n+ Azure.ResourceManager.AppContainers.Models.HeaderMatch System.ClientModel.Primitives.IPersistableModel.Create(System.BinaryData data, System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n+ string System.ClientModel.Primitives.IPersistableModel.GetFormatFromOptions(System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n+ System.BinaryData System.ClientModel.Primitives.IPersistableModel.Write(System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n+ }\n+ public partial class HttpConnectionPool : System.ClientModel.Primitives.IJsonModel, System.ClientModel.Primitives.IPersistableModel\n+ {\n+ public HttpConnectionPool() { }\n+ public int? Http1MaxPendingRequests { get { throw null; } set { } }\n+ public int? Http2MaxRequests { get { throw null; } set { } }\n+ protected virtual void JsonModelWriteCore(System.Text.Json.Utf8JsonWriter writer, System.ClientModel.Primitives.ModelReaderWriterOptions options) { }\n+ Azure.ResourceManager.AppContainers.Models.HttpConnectionPool System.ClientModel.Primitives.IJsonModel.Create(ref System.Text.Json.Utf8JsonReader reader, System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n+ void System.ClientModel.Primitives.IJsonModel.Write(System.Text.Json.Utf8JsonWriter writer, System.ClientModel.Primitives.ModelReaderWriterOptions options) { }\n+ Azure.ResourceManager.AppContainers.Models.HttpConnectionPool System.ClientModel.Primitives.IPersistableModel.Create(System.BinaryData data, System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n+ string System.ClientModel.Primitives.IPersistableModel.GetFormatFromOptions(System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n+ System.BinaryData System.ClientModel.Primitives.IPersistableModel.Write(System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n+ }\n+ public partial class HttpGet : System.ClientModel.Primitives.IJsonModel, System.ClientModel.Primitives.IPersistableModel", + "comment_created_at": "2025-05-23T08:54:42+00:00", + "comment_author": "ArcturusZhang", + "comment_body": "this is very likely to collide with system types, we need to specify its name", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1986643644", + "pr_number": 48446, + "pr_file": "sdk/impactreporting/Azure.ResourceManager.ImpactReporting/api/Azure.ResourceManager.ImpactReporting.net8.0.cs", + "created_at": "2025-03-10T05:50:44+00:00", + "commented_code": "namespace Azure.ResourceManager.ImpactReporting\n{\n public partial class ConnectorCollection : Azure.ResourceManager.ArmCollection, System.Collections.Generic.IAsyncEnumerable, System.Collections.Generic.IEnumerable, System.Collections.IEnumerable\n {\n protected ConnectorCollection() { }\n public virtual Azure.ResourceManager.ArmOperation CreateOrUpdate(Azure.WaitUntil waitUntil, string connectorName, Azure.ResourceManager.ImpactReporting.ConnectorData data, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n public virtual System.Threading.Tasks.Task> CreateOrUpdateAsync(Azure.WaitUntil waitUntil, string connectorName, Azure.ResourceManager.ImpactReporting.ConnectorData data, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n public virtual Azure.Response Exists(string connectorName, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n public virtual System.Threading.Tasks.Task> ExistsAsync(string connectorName, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n public virtual Azure.Response Get(string connectorName, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n public virtual Azure.Pageable GetAll(System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n public virtual Azure.AsyncPageable GetAllAsync(System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n public virtual System.Threading.Tasks.Task> GetAsync(string connectorName, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n public virtual Azure.NullableResponse GetIfExists(string connectorName, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n public virtual System.Threading.Tasks.Task> GetIfExistsAsync(string connectorName, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n System.Collections.Generic.IAsyncEnumerator System.Collections.Generic.IAsyncEnumerable.GetAsyncEnumerator(System.Threading.CancellationToken cancellationToken) { throw null; }\n System.Collections.Generic.IEnumerator System.Collections.Generic.IEnumerable.GetEnumerator() { throw null; }\n System.Collections.IEnumerator System.Collections.IEnumerable.GetEnumerator() { throw null; }\n }\n public partial class ConnectorData : Azure.ResourceManager.Models.ResourceData, System.ClientModel.Primitives.IJsonModel, System.ClientModel.Primitives.IPersistableModel", + "repo_full_name": "Azure/azure-sdk-for-net", + "discussion_comments": [ + { + "comment_id": "1986643644", + "repo_full_name": "Azure/azure-sdk-for-net", + "pr_number": 48446, + "pr_file": "sdk/impactreporting/Azure.ResourceManager.ImpactReporting/api/Azure.ResourceManager.ImpactReporting.net8.0.cs", + "discussion_id": "1986643644", + "commented_code": "@@ -0,0 +1,616 @@\n+namespace Azure.ResourceManager.ImpactReporting\n+{\n+ public partial class ConnectorCollection : Azure.ResourceManager.ArmCollection, System.Collections.Generic.IAsyncEnumerable, System.Collections.Generic.IEnumerable, System.Collections.IEnumerable\n+ {\n+ protected ConnectorCollection() { }\n+ public virtual Azure.ResourceManager.ArmOperation CreateOrUpdate(Azure.WaitUntil waitUntil, string connectorName, Azure.ResourceManager.ImpactReporting.ConnectorData data, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n+ public virtual System.Threading.Tasks.Task> CreateOrUpdateAsync(Azure.WaitUntil waitUntil, string connectorName, Azure.ResourceManager.ImpactReporting.ConnectorData data, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n+ public virtual Azure.Response Exists(string connectorName, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n+ public virtual System.Threading.Tasks.Task> ExistsAsync(string connectorName, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n+ public virtual Azure.Response Get(string connectorName, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n+ public virtual Azure.Pageable GetAll(System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n+ public virtual Azure.AsyncPageable GetAllAsync(System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n+ public virtual System.Threading.Tasks.Task> GetAsync(string connectorName, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n+ public virtual Azure.NullableResponse GetIfExists(string connectorName, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n+ public virtual System.Threading.Tasks.Task> GetIfExistsAsync(string connectorName, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n+ System.Collections.Generic.IAsyncEnumerator System.Collections.Generic.IAsyncEnumerable.GetAsyncEnumerator(System.Threading.CancellationToken cancellationToken) { throw null; }\n+ System.Collections.Generic.IEnumerator System.Collections.Generic.IEnumerable.GetEnumerator() { throw null; }\n+ System.Collections.IEnumerator System.Collections.IEnumerable.GetEnumerator() { throw null; }\n+ }\n+ public partial class ConnectorData : Azure.ResourceManager.Models.ResourceData, System.ClientModel.Primitives.IJsonModel, System.ClientModel.Primitives.IPersistableModel", + "comment_created_at": "2025-03-10T05:50:44+00:00", + "comment_author": "ArcturusZhang", + "comment_body": "we need a better name than this `ConnectorData` - this is quite generic.\r\nWe need to add a `@@clientName` in your spec to rename this model to a more verbose name.\r\nPer rules in the .net generator, the corresponding resource and collection class will change accordingly.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1986669206", + "pr_number": 48446, + "pr_file": "sdk/impactreporting/Azure.ResourceManager.ImpactReporting/api/Azure.ResourceManager.ImpactReporting.net8.0.cs", + "created_at": "2025-03-10T06:17:01+00:00", + "commented_code": "namespace Azure.ResourceManager.ImpactReporting\n{\n public partial class ConnectorCollection : Azure.ResourceManager.ArmCollection, System.Collections.Generic.IAsyncEnumerable, System.Collections.Generic.IEnumerable, System.Collections.IEnumerable\n {\n protected ConnectorCollection() { }\n public virtual Azure.ResourceManager.ArmOperation CreateOrUpdate(Azure.WaitUntil waitUntil, string connectorName, Azure.ResourceManager.ImpactReporting.ConnectorData data, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n public virtual System.Threading.Tasks.Task> CreateOrUpdateAsync(Azure.WaitUntil waitUntil, string connectorName, Azure.ResourceManager.ImpactReporting.ConnectorData data, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n public virtual Azure.Response Exists(string connectorName, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n public virtual System.Threading.Tasks.Task> ExistsAsync(string connectorName, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n public virtual Azure.Response Get(string connectorName, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n public virtual Azure.Pageable GetAll(System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n public virtual Azure.AsyncPageable GetAllAsync(System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n public virtual System.Threading.Tasks.Task> GetAsync(string connectorName, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n public virtual Azure.NullableResponse GetIfExists(string connectorName, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n public virtual System.Threading.Tasks.Task> GetIfExistsAsync(string connectorName, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n System.Collections.Generic.IAsyncEnumerator System.Collections.Generic.IAsyncEnumerable.GetAsyncEnumerator(System.Threading.CancellationToken cancellationToken) { throw null; }\n System.Collections.Generic.IEnumerator System.Collections.Generic.IEnumerable.GetEnumerator() { throw null; }\n System.Collections.IEnumerator System.Collections.IEnumerable.GetEnumerator() { throw null; }\n }\n public partial class ConnectorData : Azure.ResourceManager.Models.ResourceData, System.ClientModel.Primitives.IJsonModel, System.ClientModel.Primitives.IPersistableModel\n {\n public ConnectorData() { }\n public Azure.ResourceManager.ImpactReporting.Models.ConnectorProperties Properties { get { throw null; } set { } }\n protected override void JsonModelWriteCore(System.Text.Json.Utf8JsonWriter writer, System.ClientModel.Primitives.ModelReaderWriterOptions options) { }\n Azure.ResourceManager.ImpactReporting.ConnectorData System.ClientModel.Primitives.IJsonModel.Create(ref System.Text.Json.Utf8JsonReader reader, System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n void System.ClientModel.Primitives.IJsonModel.Write(System.Text.Json.Utf8JsonWriter writer, System.ClientModel.Primitives.ModelReaderWriterOptions options) { }\n Azure.ResourceManager.ImpactReporting.ConnectorData System.ClientModel.Primitives.IPersistableModel.Create(System.BinaryData data, System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n string System.ClientModel.Primitives.IPersistableModel.GetFormatFromOptions(System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n System.BinaryData System.ClientModel.Primitives.IPersistableModel.Write(System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n }\n public partial class ConnectorResource : Azure.ResourceManager.ArmResource, System.ClientModel.Primitives.IJsonModel, System.ClientModel.Primitives.IPersistableModel\n {\n public static readonly Azure.Core.ResourceType ResourceType;\n protected ConnectorResource() { }\n public virtual Azure.ResourceManager.ImpactReporting.ConnectorData Data { get { throw null; } }\n public virtual bool HasData { get { throw null; } }\n public static Azure.Core.ResourceIdentifier CreateResourceIdentifier(string subscriptionId, string connectorName) { throw null; }\n public virtual Azure.ResourceManager.ArmOperation Delete(Azure.WaitUntil waitUntil, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n public virtual System.Threading.Tasks.Task DeleteAsync(Azure.WaitUntil waitUntil, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n public virtual Azure.Response Get(System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n public virtual System.Threading.Tasks.Task> GetAsync(System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n Azure.ResourceManager.ImpactReporting.ConnectorData System.ClientModel.Primitives.IJsonModel.Create(ref System.Text.Json.Utf8JsonReader reader, System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n void System.ClientModel.Primitives.IJsonModel.Write(System.Text.Json.Utf8JsonWriter writer, System.ClientModel.Primitives.ModelReaderWriterOptions options) { }\n Azure.ResourceManager.ImpactReporting.ConnectorData System.ClientModel.Primitives.IPersistableModel.Create(System.BinaryData data, System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n string System.ClientModel.Primitives.IPersistableModel.GetFormatFromOptions(System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n System.BinaryData System.ClientModel.Primitives.IPersistableModel.Write(System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n public virtual Azure.Response Update(Azure.ResourceManager.ImpactReporting.Models.ConnectorPatch patch, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n public virtual System.Threading.Tasks.Task> UpdateAsync(Azure.ResourceManager.ImpactReporting.Models.ConnectorPatch patch, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n }\n public partial class ImpactCategoryCollection : Azure.ResourceManager.ArmCollection\n {\n protected ImpactCategoryCollection() { }\n public virtual Azure.Response Exists(string impactCategoryName, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n public virtual System.Threading.Tasks.Task> ExistsAsync(string impactCategoryName, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n public virtual Azure.Response Get(string impactCategoryName, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n public virtual Azure.Pageable GetAll(string resourceType, string categoryName = null, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n public virtual Azure.AsyncPageable GetAllAsync(string resourceType, string categoryName = null, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n public virtual System.Threading.Tasks.Task> GetAsync(string impactCategoryName, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n public virtual Azure.NullableResponse GetIfExists(string impactCategoryName, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n public virtual System.Threading.Tasks.Task> GetIfExistsAsync(string impactCategoryName, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n }\n public partial class ImpactCategoryData : Azure.ResourceManager.Models.ResourceData, System.ClientModel.Primitives.IJsonModel, System.ClientModel.Primitives.IPersistableModel\n {\n internal ImpactCategoryData() { }\n public Azure.ResourceManager.ImpactReporting.Models.ImpactCategoryProperties Properties { get { throw null; } }\n protected override void JsonModelWriteCore(System.Text.Json.Utf8JsonWriter writer, System.ClientModel.Primitives.ModelReaderWriterOptions options) { }\n Azure.ResourceManager.ImpactReporting.ImpactCategoryData System.ClientModel.Primitives.IJsonModel.Create(ref System.Text.Json.Utf8JsonReader reader, System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n void System.ClientModel.Primitives.IJsonModel.Write(System.Text.Json.Utf8JsonWriter writer, System.ClientModel.Primitives.ModelReaderWriterOptions options) { }\n Azure.ResourceManager.ImpactReporting.ImpactCategoryData System.ClientModel.Primitives.IPersistableModel.Create(System.BinaryData data, System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n string System.ClientModel.Primitives.IPersistableModel.GetFormatFromOptions(System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n System.BinaryData System.ClientModel.Primitives.IPersistableModel.Write(System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n }\n public partial class ImpactCategoryResource : Azure.ResourceManager.ArmResource, System.ClientModel.Primitives.IJsonModel, System.ClientModel.Primitives.IPersistableModel\n {\n public static readonly Azure.Core.ResourceType ResourceType;\n protected ImpactCategoryResource() { }\n public virtual Azure.ResourceManager.ImpactReporting.ImpactCategoryData Data { get { throw null; } }\n public virtual bool HasData { get { throw null; } }\n public static Azure.Core.ResourceIdentifier CreateResourceIdentifier(string subscriptionId, string impactCategoryName) { throw null; }\n public virtual Azure.Response Get(System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n public virtual System.Threading.Tasks.Task> GetAsync(System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n Azure.ResourceManager.ImpactReporting.ImpactCategoryData System.ClientModel.Primitives.IJsonModel.Create(ref System.Text.Json.Utf8JsonReader reader, System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n void System.ClientModel.Primitives.IJsonModel.Write(System.Text.Json.Utf8JsonWriter writer, System.ClientModel.Primitives.ModelReaderWriterOptions options) { }\n Azure.ResourceManager.ImpactReporting.ImpactCategoryData System.ClientModel.Primitives.IPersistableModel.Create(System.BinaryData data, System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n string System.ClientModel.Primitives.IPersistableModel.GetFormatFromOptions(System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n System.BinaryData System.ClientModel.Primitives.IPersistableModel.Write(System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n }\n public static partial class ImpactReportingExtensions\n {\n public static Azure.Response GetConnector(this Azure.ResourceManager.Resources.SubscriptionResource subscriptionResource, string connectorName, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n public static System.Threading.Tasks.Task> GetConnectorAsync(this Azure.ResourceManager.Resources.SubscriptionResource subscriptionResource, string connectorName, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n public static Azure.ResourceManager.ImpactReporting.ConnectorResource GetConnectorResource(this Azure.ResourceManager.ArmClient client, Azure.Core.ResourceIdentifier id) { throw null; }\n public static Azure.ResourceManager.ImpactReporting.ConnectorCollection GetConnectors(this Azure.ResourceManager.Resources.SubscriptionResource subscriptionResource) { throw null; }\n public static Azure.ResourceManager.ImpactReporting.ImpactCategoryCollection GetImpactCategories(this Azure.ResourceManager.Resources.SubscriptionResource subscriptionResource) { throw null; }\n public static Azure.Response GetImpactCategory(this Azure.ResourceManager.Resources.SubscriptionResource subscriptionResource, string impactCategoryName, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n public static System.Threading.Tasks.Task> GetImpactCategoryAsync(this Azure.ResourceManager.Resources.SubscriptionResource subscriptionResource, string impactCategoryName, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n public static Azure.ResourceManager.ImpactReporting.ImpactCategoryResource GetImpactCategoryResource(this Azure.ResourceManager.ArmClient client, Azure.Core.ResourceIdentifier id) { throw null; }\n public static Azure.ResourceManager.ImpactReporting.InsightResource GetInsightResource(this Azure.ResourceManager.ArmClient client, Azure.Core.ResourceIdentifier id) { throw null; }\n public static Azure.Response GetWorkloadImpact(this Azure.ResourceManager.Resources.SubscriptionResource subscriptionResource, string workloadImpactName, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n public static System.Threading.Tasks.Task> GetWorkloadImpactAsync(this Azure.ResourceManager.Resources.SubscriptionResource subscriptionResource, string workloadImpactName, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n public static Azure.ResourceManager.ImpactReporting.WorkloadImpactResource GetWorkloadImpactResource(this Azure.ResourceManager.ArmClient client, Azure.Core.ResourceIdentifier id) { throw null; }\n public static Azure.ResourceManager.ImpactReporting.WorkloadImpactCollection GetWorkloadImpacts(this Azure.ResourceManager.Resources.SubscriptionResource subscriptionResource) { throw null; }\n }\n public partial class InsightCollection : Azure.ResourceManager.ArmCollection, System.Collections.Generic.IAsyncEnumerable, System.Collections.Generic.IEnumerable, System.Collections.IEnumerable\n {\n protected InsightCollection() { }\n public virtual Azure.ResourceManager.ArmOperation CreateOrUpdate(Azure.WaitUntil waitUntil, string insightName, Azure.ResourceManager.ImpactReporting.InsightData data, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n public virtual System.Threading.Tasks.Task> CreateOrUpdateAsync(Azure.WaitUntil waitUntil, string insightName, Azure.ResourceManager.ImpactReporting.InsightData data, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n public virtual Azure.Response Exists(string insightName, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n public virtual System.Threading.Tasks.Task> ExistsAsync(string insightName, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n public virtual Azure.Response Get(string insightName, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n public virtual Azure.Pageable GetAll(System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n public virtual Azure.AsyncPageable GetAllAsync(System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n public virtual System.Threading.Tasks.Task> GetAsync(string insightName, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n public virtual Azure.NullableResponse GetIfExists(string insightName, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n public virtual System.Threading.Tasks.Task> GetIfExistsAsync(string insightName, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n System.Collections.Generic.IAsyncEnumerator System.Collections.Generic.IAsyncEnumerable.GetAsyncEnumerator(System.Threading.CancellationToken cancellationToken) { throw null; }\n System.Collections.Generic.IEnumerator System.Collections.Generic.IEnumerable.GetEnumerator() { throw null; }\n System.Collections.IEnumerator System.Collections.IEnumerable.GetEnumerator() { throw null; }\n }\n public partial class InsightData : Azure.ResourceManager.Models.ResourceData, System.ClientModel.Primitives.IJsonModel, System.ClientModel.Primitives.IPersistableModel\n {\n public InsightData() { }\n public Azure.ResourceManager.ImpactReporting.Models.InsightProperties Properties { get { throw null; } set { } }\n protected override void JsonModelWriteCore(System.Text.Json.Utf8JsonWriter writer, System.ClientModel.Primitives.ModelReaderWriterOptions options) { }\n Azure.ResourceManager.ImpactReporting.InsightData System.ClientModel.Primitives.IJsonModel.Create(ref System.Text.Json.Utf8JsonReader reader, System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n void System.ClientModel.Primitives.IJsonModel.Write(System.Text.Json.Utf8JsonWriter writer, System.ClientModel.Primitives.ModelReaderWriterOptions options) { }\n Azure.ResourceManager.ImpactReporting.InsightData System.ClientModel.Primitives.IPersistableModel.Create(System.BinaryData data, System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n string System.ClientModel.Primitives.IPersistableModel.GetFormatFromOptions(System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n System.BinaryData System.ClientModel.Primitives.IPersistableModel.Write(System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n }\n public partial class InsightResource : Azure.ResourceManager.ArmResource, System.ClientModel.Primitives.IJsonModel, System.ClientModel.Primitives.IPersistableModel\n {\n public static readonly Azure.Core.ResourceType ResourceType;\n protected InsightResource() { }\n public virtual Azure.ResourceManager.ImpactReporting.InsightData Data { get { throw null; } }\n public virtual bool HasData { get { throw null; } }\n public static Azure.Core.ResourceIdentifier CreateResourceIdentifier(string subscriptionId, string workloadImpactName, string insightName) { throw null; }\n public virtual Azure.ResourceManager.ArmOperation Delete(Azure.WaitUntil waitUntil, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n public virtual System.Threading.Tasks.Task DeleteAsync(Azure.WaitUntil waitUntil, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n public virtual Azure.Response Get(System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n public virtual System.Threading.Tasks.Task> GetAsync(System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n Azure.ResourceManager.ImpactReporting.InsightData System.ClientModel.Primitives.IJsonModel.Create(ref System.Text.Json.Utf8JsonReader reader, System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n void System.ClientModel.Primitives.IJsonModel.Write(System.Text.Json.Utf8JsonWriter writer, System.ClientModel.Primitives.ModelReaderWriterOptions options) { }\n Azure.ResourceManager.ImpactReporting.InsightData System.ClientModel.Primitives.IPersistableModel.Create(System.BinaryData data, System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n string System.ClientModel.Primitives.IPersistableModel.GetFormatFromOptions(System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n System.BinaryData System.ClientModel.Primitives.IPersistableModel.Write(System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n public virtual Azure.ResourceManager.ArmOperation Update(Azure.WaitUntil waitUntil, Azure.ResourceManager.ImpactReporting.InsightData data, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n public virtual System.Threading.Tasks.Task> UpdateAsync(Azure.WaitUntil waitUntil, Azure.ResourceManager.ImpactReporting.InsightData data, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n }\n public partial class WorkloadImpactCollection : Azure.ResourceManager.ArmCollection, System.Collections.Generic.IAsyncEnumerable, System.Collections.Generic.IEnumerable, System.Collections.IEnumerable\n {\n protected WorkloadImpactCollection() { }\n public virtual Azure.ResourceManager.ArmOperation CreateOrUpdate(Azure.WaitUntil waitUntil, string workloadImpactName, Azure.ResourceManager.ImpactReporting.WorkloadImpactData data, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n public virtual System.Threading.Tasks.Task> CreateOrUpdateAsync(Azure.WaitUntil waitUntil, string workloadImpactName, Azure.ResourceManager.ImpactReporting.WorkloadImpactData data, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n public virtual Azure.Response Exists(string workloadImpactName, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n public virtual System.Threading.Tasks.Task> ExistsAsync(string workloadImpactName, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n public virtual Azure.Response Get(string workloadImpactName, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n public virtual Azure.Pageable GetAll(System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n public virtual Azure.AsyncPageable GetAllAsync(System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n public virtual System.Threading.Tasks.Task> GetAsync(string workloadImpactName, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n public virtual Azure.NullableResponse GetIfExists(string workloadImpactName, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n public virtual System.Threading.Tasks.Task> GetIfExistsAsync(string workloadImpactName, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n System.Collections.Generic.IAsyncEnumerator System.Collections.Generic.IAsyncEnumerable.GetAsyncEnumerator(System.Threading.CancellationToken cancellationToken) { throw null; }\n System.Collections.Generic.IEnumerator System.Collections.Generic.IEnumerable.GetEnumerator() { throw null; }\n System.Collections.IEnumerator System.Collections.IEnumerable.GetEnumerator() { throw null; }\n }\n public partial class WorkloadImpactData : Azure.ResourceManager.Models.ResourceData, System.ClientModel.Primitives.IJsonModel, System.ClientModel.Primitives.IPersistableModel\n {\n public WorkloadImpactData() { }\n public Azure.ResourceManager.ImpactReporting.Models.WorkloadImpactProperties Properties { get { throw null; } set { } }\n protected override void JsonModelWriteCore(System.Text.Json.Utf8JsonWriter writer, System.ClientModel.Primitives.ModelReaderWriterOptions options) { }\n Azure.ResourceManager.ImpactReporting.WorkloadImpactData System.ClientModel.Primitives.IJsonModel.Create(ref System.Text.Json.Utf8JsonReader reader, System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n void System.ClientModel.Primitives.IJsonModel.Write(System.Text.Json.Utf8JsonWriter writer, System.ClientModel.Primitives.ModelReaderWriterOptions options) { }\n Azure.ResourceManager.ImpactReporting.WorkloadImpactData System.ClientModel.Primitives.IPersistableModel.Create(System.BinaryData data, System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n string System.ClientModel.Primitives.IPersistableModel.GetFormatFromOptions(System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n System.BinaryData System.ClientModel.Primitives.IPersistableModel.Write(System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n }\n public partial class WorkloadImpactResource : Azure.ResourceManager.ArmResource, System.ClientModel.Primitives.IJsonModel, System.ClientModel.Primitives.IPersistableModel\n {\n public static readonly Azure.Core.ResourceType ResourceType;\n protected WorkloadImpactResource() { }\n public virtual Azure.ResourceManager.ImpactReporting.WorkloadImpactData Data { get { throw null; } }\n public virtual bool HasData { get { throw null; } }\n public static Azure.Core.ResourceIdentifier CreateResourceIdentifier(string subscriptionId, string workloadImpactName) { throw null; }\n public virtual Azure.ResourceManager.ArmOperation Delete(Azure.WaitUntil waitUntil, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n public virtual System.Threading.Tasks.Task DeleteAsync(Azure.WaitUntil waitUntil, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n public virtual Azure.Response Get(System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n public virtual System.Threading.Tasks.Task> GetAsync(System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n public virtual Azure.Response GetInsight(string insightName, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n public virtual System.Threading.Tasks.Task> GetInsightAsync(string insightName, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n public virtual Azure.ResourceManager.ImpactReporting.InsightCollection GetInsights() { throw null; }\n Azure.ResourceManager.ImpactReporting.WorkloadImpactData System.ClientModel.Primitives.IJsonModel.Create(ref System.Text.Json.Utf8JsonReader reader, System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n void System.ClientModel.Primitives.IJsonModel.Write(System.Text.Json.Utf8JsonWriter writer, System.ClientModel.Primitives.ModelReaderWriterOptions options) { }\n Azure.ResourceManager.ImpactReporting.WorkloadImpactData System.ClientModel.Primitives.IPersistableModel.Create(System.BinaryData data, System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n string System.ClientModel.Primitives.IPersistableModel.GetFormatFromOptions(System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n System.BinaryData System.ClientModel.Primitives.IPersistableModel.Write(System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n public virtual Azure.ResourceManager.ArmOperation Update(Azure.WaitUntil waitUntil, Azure.ResourceManager.ImpactReporting.WorkloadImpactData data, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n public virtual System.Threading.Tasks.Task> UpdateAsync(Azure.WaitUntil waitUntil, Azure.ResourceManager.ImpactReporting.WorkloadImpactData data, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n }\n}\nnamespace Azure.ResourceManager.ImpactReporting.Mocking\n{\n public partial class MockableImpactReportingArmClient : Azure.ResourceManager.ArmResource\n {\n protected MockableImpactReportingArmClient() { }\n public virtual Azure.ResourceManager.ImpactReporting.ConnectorResource GetConnectorResource(Azure.Core.ResourceIdentifier id) { throw null; }\n public virtual Azure.ResourceManager.ImpactReporting.ImpactCategoryResource GetImpactCategoryResource(Azure.Core.ResourceIdentifier id) { throw null; }\n public virtual Azure.ResourceManager.ImpactReporting.InsightResource GetInsightResource(Azure.Core.ResourceIdentifier id) { throw null; }\n public virtual Azure.ResourceManager.ImpactReporting.WorkloadImpactResource GetWorkloadImpactResource(Azure.Core.ResourceIdentifier id) { throw null; }\n }\n public partial class MockableImpactReportingSubscriptionResource : Azure.ResourceManager.ArmResource\n {\n protected MockableImpactReportingSubscriptionResource() { }\n public virtual Azure.Response GetConnector(string connectorName, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n public virtual System.Threading.Tasks.Task> GetConnectorAsync(string connectorName, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n public virtual Azure.ResourceManager.ImpactReporting.ConnectorCollection GetConnectors() { throw null; }\n public virtual Azure.ResourceManager.ImpactReporting.ImpactCategoryCollection GetImpactCategories() { throw null; }\n public virtual Azure.Response GetImpactCategory(string impactCategoryName, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n public virtual System.Threading.Tasks.Task> GetImpactCategoryAsync(string impactCategoryName, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n public virtual Azure.Response GetWorkloadImpact(string workloadImpactName, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n public virtual System.Threading.Tasks.Task> GetWorkloadImpactAsync(string workloadImpactName, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n public virtual Azure.ResourceManager.ImpactReporting.WorkloadImpactCollection GetWorkloadImpacts() { throw null; }\n }\n}\nnamespace Azure.ResourceManager.ImpactReporting.Models\n{\n public static partial class ArmImpactReportingModelFactory\n {\n public static Azure.ResourceManager.ImpactReporting.ConnectorData ConnectorData(Azure.Core.ResourceIdentifier id = null, string name = null, Azure.Core.ResourceType resourceType = default(Azure.Core.ResourceType), Azure.ResourceManager.Models.SystemData systemData = null, Azure.ResourceManager.ImpactReporting.Models.ConnectorProperties properties = null) { throw null; }\n public static Azure.ResourceManager.ImpactReporting.Models.ConnectorProperties ConnectorProperties(Azure.ResourceManager.ImpactReporting.Models.ProvisioningState? provisioningState = default(Azure.ResourceManager.ImpactReporting.Models.ProvisioningState?), string connectorId = null, string tenantId = null, Azure.ResourceManager.ImpactReporting.Models.Platform connectorType = default(Azure.ResourceManager.ImpactReporting.Models.Platform), System.DateTimeOffset lastRunTimeStamp = default(System.DateTimeOffset)) { throw null; }\n public static Azure.ResourceManager.ImpactReporting.ImpactCategoryData ImpactCategoryData(Azure.Core.ResourceIdentifier id = null, string name = null, Azure.Core.ResourceType resourceType = default(Azure.Core.ResourceType), Azure.ResourceManager.Models.SystemData systemData = null, Azure.ResourceManager.ImpactReporting.Models.ImpactCategoryProperties properties = null) { throw null; }\n public static Azure.ResourceManager.ImpactReporting.Models.ImpactCategoryProperties ImpactCategoryProperties(Azure.ResourceManager.ImpactReporting.Models.ProvisioningState? provisioningState = default(Azure.ResourceManager.ImpactReporting.Models.ProvisioningState?), string categoryId = null, string parentCategoryId = null, string description = null, System.Collections.Generic.IEnumerable requiredImpactProperties = null) { throw null; }\n public static Azure.ResourceManager.ImpactReporting.InsightData InsightData(Azure.Core.ResourceIdentifier id = null, string name = null, Azure.Core.ResourceType resourceType = default(Azure.Core.ResourceType), Azure.ResourceManager.Models.SystemData systemData = null, Azure.ResourceManager.ImpactReporting.Models.InsightProperties properties = null) { throw null; }\n public static Azure.ResourceManager.ImpactReporting.Models.InsightProperties InsightProperties(Azure.ResourceManager.ImpactReporting.Models.ProvisioningState? provisioningState = default(Azure.ResourceManager.ImpactReporting.Models.ProvisioningState?), string category = null, string status = null, string eventId = null, string groupId = null, Azure.ResourceManager.ImpactReporting.Models.Content content = null, System.DateTimeOffset? eventOn = default(System.DateTimeOffset?), string insightUniqueId = null, Azure.ResourceManager.ImpactReporting.Models.ImpactDetails impact = null, Azure.ResourceManager.ImpactReporting.Models.InsightPropertiesAdditionalDetails additionalDetails = null) { throw null; }\n public static Azure.ResourceManager.ImpactReporting.Models.RequiredImpactProperties RequiredImpactProperties(string name = null, System.Collections.Generic.IEnumerable allowedValues = null) { throw null; }\n public static Azure.ResourceManager.ImpactReporting.WorkloadImpactData WorkloadImpactData(Azure.Core.ResourceIdentifier id = null, string name = null, Azure.Core.ResourceType resourceType = default(Azure.Core.ResourceType), Azure.ResourceManager.Models.SystemData systemData = null, Azure.ResourceManager.ImpactReporting.Models.WorkloadImpactProperties properties = null) { throw null; }\n public static Azure.ResourceManager.ImpactReporting.Models.WorkloadImpactProperties WorkloadImpactProperties(Azure.ResourceManager.ImpactReporting.Models.ProvisioningState? provisioningState = default(Azure.ResourceManager.ImpactReporting.Models.ProvisioningState?), System.DateTimeOffset startOn = default(System.DateTimeOffset), System.DateTimeOffset? endOn = default(System.DateTimeOffset?), string impactedResourceId = null, string impactUniqueId = null, System.DateTimeOffset? reportedTimeUtc = default(System.DateTimeOffset?), string impactCategory = null, string impactDescription = null, System.Collections.Generic.IEnumerable armCorrelationIds = null, System.Collections.Generic.IEnumerable performance = null, Azure.ResourceManager.ImpactReporting.Models.Connectivity connectivity = null, Azure.ResourceManager.ImpactReporting.Models.WorkloadImpactPropertiesAdditionalProperties additionalProperties = null, Azure.ResourceManager.ImpactReporting.Models.ErrorDetailProperties errorDetails = null, Azure.ResourceManager.ImpactReporting.Models.Workload workload = null, string impactGroupId = null, Azure.ResourceManager.ImpactReporting.Models.ConfidenceLevel? confidenceLevel = default(Azure.ResourceManager.ImpactReporting.Models.ConfidenceLevel?), Azure.ResourceManager.ImpactReporting.Models.ClientIncidentDetails clientIncidentDetails = null) { throw null; }\n }\n public partial class ClientIncidentDetails : System.ClientModel.Primitives.IJsonModel, System.ClientModel.Primitives.IPersistableModel\n {\n public ClientIncidentDetails() { }\n public string ClientIncidentId { get { throw null; } set { } }\n public Azure.ResourceManager.ImpactReporting.Models.IncidentSource? ClientIncidentSource { get { throw null; } set { } }\n protected virtual void JsonModelWriteCore(System.Text.Json.Utf8JsonWriter writer, System.ClientModel.Primitives.ModelReaderWriterOptions options) { }\n Azure.ResourceManager.ImpactReporting.Models.ClientIncidentDetails System.ClientModel.Primitives.IJsonModel.Create(ref System.Text.Json.Utf8JsonReader reader, System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n void System.ClientModel.Primitives.IJsonModel.Write(System.Text.Json.Utf8JsonWriter writer, System.ClientModel.Primitives.ModelReaderWriterOptions options) { }\n Azure.ResourceManager.ImpactReporting.Models.ClientIncidentDetails System.ClientModel.Primitives.IPersistableModel.Create(System.BinaryData data, System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n string System.ClientModel.Primitives.IPersistableModel.GetFormatFromOptions(System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n System.BinaryData System.ClientModel.Primitives.IPersistableModel.Write(System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n }\n [System.Runtime.InteropServices.StructLayoutAttribute(System.Runtime.InteropServices.LayoutKind.Sequential)]\n public readonly partial struct ConfidenceLevel : System.IEquatable\n {\n private readonly object _dummy;\n private readonly int _dummyPrimitive;\n public ConfidenceLevel(string value) { throw null; }\n public static Azure.ResourceManager.ImpactReporting.Models.ConfidenceLevel High { get { throw null; } }\n public static Azure.ResourceManager.ImpactReporting.Models.ConfidenceLevel Low { get { throw null; } }\n public static Azure.ResourceManager.ImpactReporting.Models.ConfidenceLevel Medium { get { throw null; } }\n public bool Equals(Azure.ResourceManager.ImpactReporting.Models.ConfidenceLevel other) { throw null; }\n [System.ComponentModel.EditorBrowsableAttribute(System.ComponentModel.EditorBrowsableState.Never)]\n public override bool Equals(object obj) { throw null; }\n [System.ComponentModel.EditorBrowsableAttribute(System.ComponentModel.EditorBrowsableState.Never)]\n public override int GetHashCode() { throw null; }\n public static bool operator ==(Azure.ResourceManager.ImpactReporting.Models.ConfidenceLevel left, Azure.ResourceManager.ImpactReporting.Models.ConfidenceLevel right) { throw null; }\n public static implicit operator Azure.ResourceManager.ImpactReporting.Models.ConfidenceLevel (string value) { throw null; }\n public static bool operator !=(Azure.ResourceManager.ImpactReporting.Models.ConfidenceLevel left, Azure.ResourceManager.ImpactReporting.Models.ConfidenceLevel right) { throw null; }\n public override string ToString() { throw null; }\n }\n public partial class Connectivity : System.ClientModel.Primitives.IJsonModel, System.ClientModel.Primitives.IPersistableModel\n {\n public Connectivity() { }\n public int? Port { get { throw null; } set { } }\n public Azure.ResourceManager.ImpactReporting.Models.Protocol? Protocol { get { throw null; } set { } }\n public string SourceAzureResourceId { get { throw null; } set { } }\n public string TargetAzureResourceId { get { throw null; } set { } }\n protected virtual void JsonModelWriteCore(System.Text.Json.Utf8JsonWriter writer, System.ClientModel.Primitives.ModelReaderWriterOptions options) { }\n Azure.ResourceManager.ImpactReporting.Models.Connectivity System.ClientModel.Primitives.IJsonModel.Create(ref System.Text.Json.Utf8JsonReader reader, System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n void System.ClientModel.Primitives.IJsonModel.Write(System.Text.Json.Utf8JsonWriter writer, System.ClientModel.Primitives.ModelReaderWriterOptions options) { }\n Azure.ResourceManager.ImpactReporting.Models.Connectivity System.ClientModel.Primitives.IPersistableModel.Create(System.BinaryData data, System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n string System.ClientModel.Primitives.IPersistableModel.GetFormatFromOptions(System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n System.BinaryData System.ClientModel.Primitives.IPersistableModel.Write(System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n }\n public partial class ConnectorPatch : System.ClientModel.Primitives.IJsonModel, System.ClientModel.Primitives.IPersistableModel\n {\n public ConnectorPatch() { }\n public Azure.ResourceManager.ImpactReporting.Models.Platform? ConnectorType { get { throw null; } set { } }\n protected virtual void JsonModelWriteCore(System.Text.Json.Utf8JsonWriter writer, System.ClientModel.Primitives.ModelReaderWriterOptions options) { }\n Azure.ResourceManager.ImpactReporting.Models.ConnectorPatch System.ClientModel.Primitives.IJsonModel.Create(ref System.Text.Json.Utf8JsonReader reader, System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n void System.ClientModel.Primitives.IJsonModel.Write(System.Text.Json.Utf8JsonWriter writer, System.ClientModel.Primitives.ModelReaderWriterOptions options) { }\n Azure.ResourceManager.ImpactReporting.Models.ConnectorPatch System.ClientModel.Primitives.IPersistableModel.Create(System.BinaryData data, System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n string System.ClientModel.Primitives.IPersistableModel.GetFormatFromOptions(System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n System.BinaryData System.ClientModel.Primitives.IPersistableModel.Write(System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n }\n public partial class ConnectorProperties : System.ClientModel.Primitives.IJsonModel, System.ClientModel.Primitives.IPersistableModel\n {\n public ConnectorProperties(string connectorId, string tenantId, Azure.ResourceManager.ImpactReporting.Models.Platform connectorType, System.DateTimeOffset lastRunTimeStamp) { }\n public string ConnectorId { get { throw null; } }\n public Azure.ResourceManager.ImpactReporting.Models.Platform ConnectorType { get { throw null; } set { } }\n public System.DateTimeOffset LastRunTimeStamp { get { throw null; } }\n public Azure.ResourceManager.ImpactReporting.Models.ProvisioningState? ProvisioningState { get { throw null; } }\n public string TenantId { get { throw null; } }\n protected virtual void JsonModelWriteCore(System.Text.Json.Utf8JsonWriter writer, System.ClientModel.Primitives.ModelReaderWriterOptions options) { }\n Azure.ResourceManager.ImpactReporting.Models.ConnectorProperties System.ClientModel.Primitives.IJsonModel.Create(ref System.Text.Json.Utf8JsonReader reader, System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n void System.ClientModel.Primitives.IJsonModel.Write(System.Text.Json.Utf8JsonWriter writer, System.ClientModel.Primitives.ModelReaderWriterOptions options) { }\n Azure.ResourceManager.ImpactReporting.Models.ConnectorProperties System.ClientModel.Primitives.IPersistableModel.Create(System.BinaryData data, System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n string System.ClientModel.Primitives.IPersistableModel.GetFormatFromOptions(System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n System.BinaryData System.ClientModel.Primitives.IPersistableModel.Write(System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n }\n public partial class Content : System.ClientModel.Primitives.IJsonModel, System.ClientModel.Primitives.IPersistableModel\n {\n public Content(string title, string description) { }\n public string Description { get { throw null; } set { } }\n public string Title { get { throw null; } set { } }\n protected virtual void JsonModelWriteCore(System.Text.Json.Utf8JsonWriter writer, System.ClientModel.Primitives.ModelReaderWriterOptions options) { }\n Azure.ResourceManager.ImpactReporting.Models.Content System.ClientModel.Primitives.IJsonModel.Create(ref System.Text.Json.Utf8JsonReader reader, System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n void System.ClientModel.Primitives.IJsonModel.Write(System.Text.Json.Utf8JsonWriter writer, System.ClientModel.Primitives.ModelReaderWriterOptions options) { }\n Azure.ResourceManager.ImpactReporting.Models.Content System.ClientModel.Primitives.IPersistableModel.Create(System.BinaryData data, System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n string System.ClientModel.Primitives.IPersistableModel.GetFormatFromOptions(System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n System.BinaryData System.ClientModel.Primitives.IPersistableModel.Write(System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n }\n public partial class ErrorDetailProperties : System.ClientModel.Primitives.IJsonModel, System.ClientModel.Primitives.IPersistableModel", + "repo_full_name": "Azure/azure-sdk-for-net", + "discussion_comments": [ + { + "comment_id": "1986669206", + "repo_full_name": "Azure/azure-sdk-for-net", + "pr_number": 48446, + "pr_file": "sdk/impactreporting/Azure.ResourceManager.ImpactReporting/api/Azure.ResourceManager.ImpactReporting.net8.0.cs", + "discussion_id": "1986669206", + "commented_code": "@@ -0,0 +1,616 @@\n+namespace Azure.ResourceManager.ImpactReporting\n+{\n+ public partial class ConnectorCollection : Azure.ResourceManager.ArmCollection, System.Collections.Generic.IAsyncEnumerable, System.Collections.Generic.IEnumerable, System.Collections.IEnumerable\n+ {\n+ protected ConnectorCollection() { }\n+ public virtual Azure.ResourceManager.ArmOperation CreateOrUpdate(Azure.WaitUntil waitUntil, string connectorName, Azure.ResourceManager.ImpactReporting.ConnectorData data, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n+ public virtual System.Threading.Tasks.Task> CreateOrUpdateAsync(Azure.WaitUntil waitUntil, string connectorName, Azure.ResourceManager.ImpactReporting.ConnectorData data, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n+ public virtual Azure.Response Exists(string connectorName, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n+ public virtual System.Threading.Tasks.Task> ExistsAsync(string connectorName, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n+ public virtual Azure.Response Get(string connectorName, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n+ public virtual Azure.Pageable GetAll(System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n+ public virtual Azure.AsyncPageable GetAllAsync(System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n+ public virtual System.Threading.Tasks.Task> GetAsync(string connectorName, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n+ public virtual Azure.NullableResponse GetIfExists(string connectorName, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n+ public virtual System.Threading.Tasks.Task> GetIfExistsAsync(string connectorName, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n+ System.Collections.Generic.IAsyncEnumerator System.Collections.Generic.IAsyncEnumerable.GetAsyncEnumerator(System.Threading.CancellationToken cancellationToken) { throw null; }\n+ System.Collections.Generic.IEnumerator System.Collections.Generic.IEnumerable.GetEnumerator() { throw null; }\n+ System.Collections.IEnumerator System.Collections.IEnumerable.GetEnumerator() { throw null; }\n+ }\n+ public partial class ConnectorData : Azure.ResourceManager.Models.ResourceData, System.ClientModel.Primitives.IJsonModel, System.ClientModel.Primitives.IPersistableModel\n+ {\n+ public ConnectorData() { }\n+ public Azure.ResourceManager.ImpactReporting.Models.ConnectorProperties Properties { get { throw null; } set { } }\n+ protected override void JsonModelWriteCore(System.Text.Json.Utf8JsonWriter writer, System.ClientModel.Primitives.ModelReaderWriterOptions options) { }\n+ Azure.ResourceManager.ImpactReporting.ConnectorData System.ClientModel.Primitives.IJsonModel.Create(ref System.Text.Json.Utf8JsonReader reader, System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n+ void System.ClientModel.Primitives.IJsonModel.Write(System.Text.Json.Utf8JsonWriter writer, System.ClientModel.Primitives.ModelReaderWriterOptions options) { }\n+ Azure.ResourceManager.ImpactReporting.ConnectorData System.ClientModel.Primitives.IPersistableModel.Create(System.BinaryData data, System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n+ string System.ClientModel.Primitives.IPersistableModel.GetFormatFromOptions(System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n+ System.BinaryData System.ClientModel.Primitives.IPersistableModel.Write(System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n+ }\n+ public partial class ConnectorResource : Azure.ResourceManager.ArmResource, System.ClientModel.Primitives.IJsonModel, System.ClientModel.Primitives.IPersistableModel\n+ {\n+ public static readonly Azure.Core.ResourceType ResourceType;\n+ protected ConnectorResource() { }\n+ public virtual Azure.ResourceManager.ImpactReporting.ConnectorData Data { get { throw null; } }\n+ public virtual bool HasData { get { throw null; } }\n+ public static Azure.Core.ResourceIdentifier CreateResourceIdentifier(string subscriptionId, string connectorName) { throw null; }\n+ public virtual Azure.ResourceManager.ArmOperation Delete(Azure.WaitUntil waitUntil, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n+ public virtual System.Threading.Tasks.Task DeleteAsync(Azure.WaitUntil waitUntil, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n+ public virtual Azure.Response Get(System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n+ public virtual System.Threading.Tasks.Task> GetAsync(System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n+ Azure.ResourceManager.ImpactReporting.ConnectorData System.ClientModel.Primitives.IJsonModel.Create(ref System.Text.Json.Utf8JsonReader reader, System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n+ void System.ClientModel.Primitives.IJsonModel.Write(System.Text.Json.Utf8JsonWriter writer, System.ClientModel.Primitives.ModelReaderWriterOptions options) { }\n+ Azure.ResourceManager.ImpactReporting.ConnectorData System.ClientModel.Primitives.IPersistableModel.Create(System.BinaryData data, System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n+ string System.ClientModel.Primitives.IPersistableModel.GetFormatFromOptions(System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n+ System.BinaryData System.ClientModel.Primitives.IPersistableModel.Write(System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n+ public virtual Azure.Response Update(Azure.ResourceManager.ImpactReporting.Models.ConnectorPatch patch, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n+ public virtual System.Threading.Tasks.Task> UpdateAsync(Azure.ResourceManager.ImpactReporting.Models.ConnectorPatch patch, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n+ }\n+ public partial class ImpactCategoryCollection : Azure.ResourceManager.ArmCollection\n+ {\n+ protected ImpactCategoryCollection() { }\n+ public virtual Azure.Response Exists(string impactCategoryName, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n+ public virtual System.Threading.Tasks.Task> ExistsAsync(string impactCategoryName, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n+ public virtual Azure.Response Get(string impactCategoryName, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n+ public virtual Azure.Pageable GetAll(string resourceType, string categoryName = null, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n+ public virtual Azure.AsyncPageable GetAllAsync(string resourceType, string categoryName = null, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n+ public virtual System.Threading.Tasks.Task> GetAsync(string impactCategoryName, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n+ public virtual Azure.NullableResponse GetIfExists(string impactCategoryName, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n+ public virtual System.Threading.Tasks.Task> GetIfExistsAsync(string impactCategoryName, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n+ }\n+ public partial class ImpactCategoryData : Azure.ResourceManager.Models.ResourceData, System.ClientModel.Primitives.IJsonModel, System.ClientModel.Primitives.IPersistableModel\n+ {\n+ internal ImpactCategoryData() { }\n+ public Azure.ResourceManager.ImpactReporting.Models.ImpactCategoryProperties Properties { get { throw null; } }\n+ protected override void JsonModelWriteCore(System.Text.Json.Utf8JsonWriter writer, System.ClientModel.Primitives.ModelReaderWriterOptions options) { }\n+ Azure.ResourceManager.ImpactReporting.ImpactCategoryData System.ClientModel.Primitives.IJsonModel.Create(ref System.Text.Json.Utf8JsonReader reader, System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n+ void System.ClientModel.Primitives.IJsonModel.Write(System.Text.Json.Utf8JsonWriter writer, System.ClientModel.Primitives.ModelReaderWriterOptions options) { }\n+ Azure.ResourceManager.ImpactReporting.ImpactCategoryData System.ClientModel.Primitives.IPersistableModel.Create(System.BinaryData data, System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n+ string System.ClientModel.Primitives.IPersistableModel.GetFormatFromOptions(System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n+ System.BinaryData System.ClientModel.Primitives.IPersistableModel.Write(System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n+ }\n+ public partial class ImpactCategoryResource : Azure.ResourceManager.ArmResource, System.ClientModel.Primitives.IJsonModel, System.ClientModel.Primitives.IPersistableModel\n+ {\n+ public static readonly Azure.Core.ResourceType ResourceType;\n+ protected ImpactCategoryResource() { }\n+ public virtual Azure.ResourceManager.ImpactReporting.ImpactCategoryData Data { get { throw null; } }\n+ public virtual bool HasData { get { throw null; } }\n+ public static Azure.Core.ResourceIdentifier CreateResourceIdentifier(string subscriptionId, string impactCategoryName) { throw null; }\n+ public virtual Azure.Response Get(System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n+ public virtual System.Threading.Tasks.Task> GetAsync(System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n+ Azure.ResourceManager.ImpactReporting.ImpactCategoryData System.ClientModel.Primitives.IJsonModel.Create(ref System.Text.Json.Utf8JsonReader reader, System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n+ void System.ClientModel.Primitives.IJsonModel.Write(System.Text.Json.Utf8JsonWriter writer, System.ClientModel.Primitives.ModelReaderWriterOptions options) { }\n+ Azure.ResourceManager.ImpactReporting.ImpactCategoryData System.ClientModel.Primitives.IPersistableModel.Create(System.BinaryData data, System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n+ string System.ClientModel.Primitives.IPersistableModel.GetFormatFromOptions(System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n+ System.BinaryData System.ClientModel.Primitives.IPersistableModel.Write(System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n+ }\n+ public static partial class ImpactReportingExtensions\n+ {\n+ public static Azure.Response GetConnector(this Azure.ResourceManager.Resources.SubscriptionResource subscriptionResource, string connectorName, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n+ public static System.Threading.Tasks.Task> GetConnectorAsync(this Azure.ResourceManager.Resources.SubscriptionResource subscriptionResource, string connectorName, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n+ public static Azure.ResourceManager.ImpactReporting.ConnectorResource GetConnectorResource(this Azure.ResourceManager.ArmClient client, Azure.Core.ResourceIdentifier id) { throw null; }\n+ public static Azure.ResourceManager.ImpactReporting.ConnectorCollection GetConnectors(this Azure.ResourceManager.Resources.SubscriptionResource subscriptionResource) { throw null; }\n+ public static Azure.ResourceManager.ImpactReporting.ImpactCategoryCollection GetImpactCategories(this Azure.ResourceManager.Resources.SubscriptionResource subscriptionResource) { throw null; }\n+ public static Azure.Response GetImpactCategory(this Azure.ResourceManager.Resources.SubscriptionResource subscriptionResource, string impactCategoryName, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n+ public static System.Threading.Tasks.Task> GetImpactCategoryAsync(this Azure.ResourceManager.Resources.SubscriptionResource subscriptionResource, string impactCategoryName, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n+ public static Azure.ResourceManager.ImpactReporting.ImpactCategoryResource GetImpactCategoryResource(this Azure.ResourceManager.ArmClient client, Azure.Core.ResourceIdentifier id) { throw null; }\n+ public static Azure.ResourceManager.ImpactReporting.InsightResource GetInsightResource(this Azure.ResourceManager.ArmClient client, Azure.Core.ResourceIdentifier id) { throw null; }\n+ public static Azure.Response GetWorkloadImpact(this Azure.ResourceManager.Resources.SubscriptionResource subscriptionResource, string workloadImpactName, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n+ public static System.Threading.Tasks.Task> GetWorkloadImpactAsync(this Azure.ResourceManager.Resources.SubscriptionResource subscriptionResource, string workloadImpactName, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n+ public static Azure.ResourceManager.ImpactReporting.WorkloadImpactResource GetWorkloadImpactResource(this Azure.ResourceManager.ArmClient client, Azure.Core.ResourceIdentifier id) { throw null; }\n+ public static Azure.ResourceManager.ImpactReporting.WorkloadImpactCollection GetWorkloadImpacts(this Azure.ResourceManager.Resources.SubscriptionResource subscriptionResource) { throw null; }\n+ }\n+ public partial class InsightCollection : Azure.ResourceManager.ArmCollection, System.Collections.Generic.IAsyncEnumerable, System.Collections.Generic.IEnumerable, System.Collections.IEnumerable\n+ {\n+ protected InsightCollection() { }\n+ public virtual Azure.ResourceManager.ArmOperation CreateOrUpdate(Azure.WaitUntil waitUntil, string insightName, Azure.ResourceManager.ImpactReporting.InsightData data, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n+ public virtual System.Threading.Tasks.Task> CreateOrUpdateAsync(Azure.WaitUntil waitUntil, string insightName, Azure.ResourceManager.ImpactReporting.InsightData data, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n+ public virtual Azure.Response Exists(string insightName, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n+ public virtual System.Threading.Tasks.Task> ExistsAsync(string insightName, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n+ public virtual Azure.Response Get(string insightName, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n+ public virtual Azure.Pageable GetAll(System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n+ public virtual Azure.AsyncPageable GetAllAsync(System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n+ public virtual System.Threading.Tasks.Task> GetAsync(string insightName, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n+ public virtual Azure.NullableResponse GetIfExists(string insightName, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n+ public virtual System.Threading.Tasks.Task> GetIfExistsAsync(string insightName, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n+ System.Collections.Generic.IAsyncEnumerator System.Collections.Generic.IAsyncEnumerable.GetAsyncEnumerator(System.Threading.CancellationToken cancellationToken) { throw null; }\n+ System.Collections.Generic.IEnumerator System.Collections.Generic.IEnumerable.GetEnumerator() { throw null; }\n+ System.Collections.IEnumerator System.Collections.IEnumerable.GetEnumerator() { throw null; }\n+ }\n+ public partial class InsightData : Azure.ResourceManager.Models.ResourceData, System.ClientModel.Primitives.IJsonModel, System.ClientModel.Primitives.IPersistableModel\n+ {\n+ public InsightData() { }\n+ public Azure.ResourceManager.ImpactReporting.Models.InsightProperties Properties { get { throw null; } set { } }\n+ protected override void JsonModelWriteCore(System.Text.Json.Utf8JsonWriter writer, System.ClientModel.Primitives.ModelReaderWriterOptions options) { }\n+ Azure.ResourceManager.ImpactReporting.InsightData System.ClientModel.Primitives.IJsonModel.Create(ref System.Text.Json.Utf8JsonReader reader, System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n+ void System.ClientModel.Primitives.IJsonModel.Write(System.Text.Json.Utf8JsonWriter writer, System.ClientModel.Primitives.ModelReaderWriterOptions options) { }\n+ Azure.ResourceManager.ImpactReporting.InsightData System.ClientModel.Primitives.IPersistableModel.Create(System.BinaryData data, System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n+ string System.ClientModel.Primitives.IPersistableModel.GetFormatFromOptions(System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n+ System.BinaryData System.ClientModel.Primitives.IPersistableModel.Write(System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n+ }\n+ public partial class InsightResource : Azure.ResourceManager.ArmResource, System.ClientModel.Primitives.IJsonModel, System.ClientModel.Primitives.IPersistableModel\n+ {\n+ public static readonly Azure.Core.ResourceType ResourceType;\n+ protected InsightResource() { }\n+ public virtual Azure.ResourceManager.ImpactReporting.InsightData Data { get { throw null; } }\n+ public virtual bool HasData { get { throw null; } }\n+ public static Azure.Core.ResourceIdentifier CreateResourceIdentifier(string subscriptionId, string workloadImpactName, string insightName) { throw null; }\n+ public virtual Azure.ResourceManager.ArmOperation Delete(Azure.WaitUntil waitUntil, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n+ public virtual System.Threading.Tasks.Task DeleteAsync(Azure.WaitUntil waitUntil, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n+ public virtual Azure.Response Get(System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n+ public virtual System.Threading.Tasks.Task> GetAsync(System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n+ Azure.ResourceManager.ImpactReporting.InsightData System.ClientModel.Primitives.IJsonModel.Create(ref System.Text.Json.Utf8JsonReader reader, System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n+ void System.ClientModel.Primitives.IJsonModel.Write(System.Text.Json.Utf8JsonWriter writer, System.ClientModel.Primitives.ModelReaderWriterOptions options) { }\n+ Azure.ResourceManager.ImpactReporting.InsightData System.ClientModel.Primitives.IPersistableModel.Create(System.BinaryData data, System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n+ string System.ClientModel.Primitives.IPersistableModel.GetFormatFromOptions(System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n+ System.BinaryData System.ClientModel.Primitives.IPersistableModel.Write(System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n+ public virtual Azure.ResourceManager.ArmOperation Update(Azure.WaitUntil waitUntil, Azure.ResourceManager.ImpactReporting.InsightData data, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n+ public virtual System.Threading.Tasks.Task> UpdateAsync(Azure.WaitUntil waitUntil, Azure.ResourceManager.ImpactReporting.InsightData data, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n+ }\n+ public partial class WorkloadImpactCollection : Azure.ResourceManager.ArmCollection, System.Collections.Generic.IAsyncEnumerable, System.Collections.Generic.IEnumerable, System.Collections.IEnumerable\n+ {\n+ protected WorkloadImpactCollection() { }\n+ public virtual Azure.ResourceManager.ArmOperation CreateOrUpdate(Azure.WaitUntil waitUntil, string workloadImpactName, Azure.ResourceManager.ImpactReporting.WorkloadImpactData data, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n+ public virtual System.Threading.Tasks.Task> CreateOrUpdateAsync(Azure.WaitUntil waitUntil, string workloadImpactName, Azure.ResourceManager.ImpactReporting.WorkloadImpactData data, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n+ public virtual Azure.Response Exists(string workloadImpactName, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n+ public virtual System.Threading.Tasks.Task> ExistsAsync(string workloadImpactName, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n+ public virtual Azure.Response Get(string workloadImpactName, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n+ public virtual Azure.Pageable GetAll(System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n+ public virtual Azure.AsyncPageable GetAllAsync(System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n+ public virtual System.Threading.Tasks.Task> GetAsync(string workloadImpactName, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n+ public virtual Azure.NullableResponse GetIfExists(string workloadImpactName, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n+ public virtual System.Threading.Tasks.Task> GetIfExistsAsync(string workloadImpactName, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n+ System.Collections.Generic.IAsyncEnumerator System.Collections.Generic.IAsyncEnumerable.GetAsyncEnumerator(System.Threading.CancellationToken cancellationToken) { throw null; }\n+ System.Collections.Generic.IEnumerator System.Collections.Generic.IEnumerable.GetEnumerator() { throw null; }\n+ System.Collections.IEnumerator System.Collections.IEnumerable.GetEnumerator() { throw null; }\n+ }\n+ public partial class WorkloadImpactData : Azure.ResourceManager.Models.ResourceData, System.ClientModel.Primitives.IJsonModel, System.ClientModel.Primitives.IPersistableModel\n+ {\n+ public WorkloadImpactData() { }\n+ public Azure.ResourceManager.ImpactReporting.Models.WorkloadImpactProperties Properties { get { throw null; } set { } }\n+ protected override void JsonModelWriteCore(System.Text.Json.Utf8JsonWriter writer, System.ClientModel.Primitives.ModelReaderWriterOptions options) { }\n+ Azure.ResourceManager.ImpactReporting.WorkloadImpactData System.ClientModel.Primitives.IJsonModel.Create(ref System.Text.Json.Utf8JsonReader reader, System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n+ void System.ClientModel.Primitives.IJsonModel.Write(System.Text.Json.Utf8JsonWriter writer, System.ClientModel.Primitives.ModelReaderWriterOptions options) { }\n+ Azure.ResourceManager.ImpactReporting.WorkloadImpactData System.ClientModel.Primitives.IPersistableModel.Create(System.BinaryData data, System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n+ string System.ClientModel.Primitives.IPersistableModel.GetFormatFromOptions(System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n+ System.BinaryData System.ClientModel.Primitives.IPersistableModel.Write(System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n+ }\n+ public partial class WorkloadImpactResource : Azure.ResourceManager.ArmResource, System.ClientModel.Primitives.IJsonModel, System.ClientModel.Primitives.IPersistableModel\n+ {\n+ public static readonly Azure.Core.ResourceType ResourceType;\n+ protected WorkloadImpactResource() { }\n+ public virtual Azure.ResourceManager.ImpactReporting.WorkloadImpactData Data { get { throw null; } }\n+ public virtual bool HasData { get { throw null; } }\n+ public static Azure.Core.ResourceIdentifier CreateResourceIdentifier(string subscriptionId, string workloadImpactName) { throw null; }\n+ public virtual Azure.ResourceManager.ArmOperation Delete(Azure.WaitUntil waitUntil, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n+ public virtual System.Threading.Tasks.Task DeleteAsync(Azure.WaitUntil waitUntil, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n+ public virtual Azure.Response Get(System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n+ public virtual System.Threading.Tasks.Task> GetAsync(System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n+ public virtual Azure.Response GetInsight(string insightName, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n+ public virtual System.Threading.Tasks.Task> GetInsightAsync(string insightName, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n+ public virtual Azure.ResourceManager.ImpactReporting.InsightCollection GetInsights() { throw null; }\n+ Azure.ResourceManager.ImpactReporting.WorkloadImpactData System.ClientModel.Primitives.IJsonModel.Create(ref System.Text.Json.Utf8JsonReader reader, System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n+ void System.ClientModel.Primitives.IJsonModel.Write(System.Text.Json.Utf8JsonWriter writer, System.ClientModel.Primitives.ModelReaderWriterOptions options) { }\n+ Azure.ResourceManager.ImpactReporting.WorkloadImpactData System.ClientModel.Primitives.IPersistableModel.Create(System.BinaryData data, System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n+ string System.ClientModel.Primitives.IPersistableModel.GetFormatFromOptions(System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n+ System.BinaryData System.ClientModel.Primitives.IPersistableModel.Write(System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n+ public virtual Azure.ResourceManager.ArmOperation Update(Azure.WaitUntil waitUntil, Azure.ResourceManager.ImpactReporting.WorkloadImpactData data, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n+ public virtual System.Threading.Tasks.Task> UpdateAsync(Azure.WaitUntil waitUntil, Azure.ResourceManager.ImpactReporting.WorkloadImpactData data, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n+ }\n+}\n+namespace Azure.ResourceManager.ImpactReporting.Mocking\n+{\n+ public partial class MockableImpactReportingArmClient : Azure.ResourceManager.ArmResource\n+ {\n+ protected MockableImpactReportingArmClient() { }\n+ public virtual Azure.ResourceManager.ImpactReporting.ConnectorResource GetConnectorResource(Azure.Core.ResourceIdentifier id) { throw null; }\n+ public virtual Azure.ResourceManager.ImpactReporting.ImpactCategoryResource GetImpactCategoryResource(Azure.Core.ResourceIdentifier id) { throw null; }\n+ public virtual Azure.ResourceManager.ImpactReporting.InsightResource GetInsightResource(Azure.Core.ResourceIdentifier id) { throw null; }\n+ public virtual Azure.ResourceManager.ImpactReporting.WorkloadImpactResource GetWorkloadImpactResource(Azure.Core.ResourceIdentifier id) { throw null; }\n+ }\n+ public partial class MockableImpactReportingSubscriptionResource : Azure.ResourceManager.ArmResource\n+ {\n+ protected MockableImpactReportingSubscriptionResource() { }\n+ public virtual Azure.Response GetConnector(string connectorName, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n+ public virtual System.Threading.Tasks.Task> GetConnectorAsync(string connectorName, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n+ public virtual Azure.ResourceManager.ImpactReporting.ConnectorCollection GetConnectors() { throw null; }\n+ public virtual Azure.ResourceManager.ImpactReporting.ImpactCategoryCollection GetImpactCategories() { throw null; }\n+ public virtual Azure.Response GetImpactCategory(string impactCategoryName, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n+ public virtual System.Threading.Tasks.Task> GetImpactCategoryAsync(string impactCategoryName, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n+ public virtual Azure.Response GetWorkloadImpact(string workloadImpactName, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n+ public virtual System.Threading.Tasks.Task> GetWorkloadImpactAsync(string workloadImpactName, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n+ public virtual Azure.ResourceManager.ImpactReporting.WorkloadImpactCollection GetWorkloadImpacts() { throw null; }\n+ }\n+}\n+namespace Azure.ResourceManager.ImpactReporting.Models\n+{\n+ public static partial class ArmImpactReportingModelFactory\n+ {\n+ public static Azure.ResourceManager.ImpactReporting.ConnectorData ConnectorData(Azure.Core.ResourceIdentifier id = null, string name = null, Azure.Core.ResourceType resourceType = default(Azure.Core.ResourceType), Azure.ResourceManager.Models.SystemData systemData = null, Azure.ResourceManager.ImpactReporting.Models.ConnectorProperties properties = null) { throw null; }\n+ public static Azure.ResourceManager.ImpactReporting.Models.ConnectorProperties ConnectorProperties(Azure.ResourceManager.ImpactReporting.Models.ProvisioningState? provisioningState = default(Azure.ResourceManager.ImpactReporting.Models.ProvisioningState?), string connectorId = null, string tenantId = null, Azure.ResourceManager.ImpactReporting.Models.Platform connectorType = default(Azure.ResourceManager.ImpactReporting.Models.Platform), System.DateTimeOffset lastRunTimeStamp = default(System.DateTimeOffset)) { throw null; }\n+ public static Azure.ResourceManager.ImpactReporting.ImpactCategoryData ImpactCategoryData(Azure.Core.ResourceIdentifier id = null, string name = null, Azure.Core.ResourceType resourceType = default(Azure.Core.ResourceType), Azure.ResourceManager.Models.SystemData systemData = null, Azure.ResourceManager.ImpactReporting.Models.ImpactCategoryProperties properties = null) { throw null; }\n+ public static Azure.ResourceManager.ImpactReporting.Models.ImpactCategoryProperties ImpactCategoryProperties(Azure.ResourceManager.ImpactReporting.Models.ProvisioningState? provisioningState = default(Azure.ResourceManager.ImpactReporting.Models.ProvisioningState?), string categoryId = null, string parentCategoryId = null, string description = null, System.Collections.Generic.IEnumerable requiredImpactProperties = null) { throw null; }\n+ public static Azure.ResourceManager.ImpactReporting.InsightData InsightData(Azure.Core.ResourceIdentifier id = null, string name = null, Azure.Core.ResourceType resourceType = default(Azure.Core.ResourceType), Azure.ResourceManager.Models.SystemData systemData = null, Azure.ResourceManager.ImpactReporting.Models.InsightProperties properties = null) { throw null; }\n+ public static Azure.ResourceManager.ImpactReporting.Models.InsightProperties InsightProperties(Azure.ResourceManager.ImpactReporting.Models.ProvisioningState? provisioningState = default(Azure.ResourceManager.ImpactReporting.Models.ProvisioningState?), string category = null, string status = null, string eventId = null, string groupId = null, Azure.ResourceManager.ImpactReporting.Models.Content content = null, System.DateTimeOffset? eventOn = default(System.DateTimeOffset?), string insightUniqueId = null, Azure.ResourceManager.ImpactReporting.Models.ImpactDetails impact = null, Azure.ResourceManager.ImpactReporting.Models.InsightPropertiesAdditionalDetails additionalDetails = null) { throw null; }\n+ public static Azure.ResourceManager.ImpactReporting.Models.RequiredImpactProperties RequiredImpactProperties(string name = null, System.Collections.Generic.IEnumerable allowedValues = null) { throw null; }\n+ public static Azure.ResourceManager.ImpactReporting.WorkloadImpactData WorkloadImpactData(Azure.Core.ResourceIdentifier id = null, string name = null, Azure.Core.ResourceType resourceType = default(Azure.Core.ResourceType), Azure.ResourceManager.Models.SystemData systemData = null, Azure.ResourceManager.ImpactReporting.Models.WorkloadImpactProperties properties = null) { throw null; }\n+ public static Azure.ResourceManager.ImpactReporting.Models.WorkloadImpactProperties WorkloadImpactProperties(Azure.ResourceManager.ImpactReporting.Models.ProvisioningState? provisioningState = default(Azure.ResourceManager.ImpactReporting.Models.ProvisioningState?), System.DateTimeOffset startOn = default(System.DateTimeOffset), System.DateTimeOffset? endOn = default(System.DateTimeOffset?), string impactedResourceId = null, string impactUniqueId = null, System.DateTimeOffset? reportedTimeUtc = default(System.DateTimeOffset?), string impactCategory = null, string impactDescription = null, System.Collections.Generic.IEnumerable armCorrelationIds = null, System.Collections.Generic.IEnumerable performance = null, Azure.ResourceManager.ImpactReporting.Models.Connectivity connectivity = null, Azure.ResourceManager.ImpactReporting.Models.WorkloadImpactPropertiesAdditionalProperties additionalProperties = null, Azure.ResourceManager.ImpactReporting.Models.ErrorDetailProperties errorDetails = null, Azure.ResourceManager.ImpactReporting.Models.Workload workload = null, string impactGroupId = null, Azure.ResourceManager.ImpactReporting.Models.ConfidenceLevel? confidenceLevel = default(Azure.ResourceManager.ImpactReporting.Models.ConfidenceLevel?), Azure.ResourceManager.ImpactReporting.Models.ClientIncidentDetails clientIncidentDetails = null) { throw null; }\n+ }\n+ public partial class ClientIncidentDetails : System.ClientModel.Primitives.IJsonModel, System.ClientModel.Primitives.IPersistableModel\n+ {\n+ public ClientIncidentDetails() { }\n+ public string ClientIncidentId { get { throw null; } set { } }\n+ public Azure.ResourceManager.ImpactReporting.Models.IncidentSource? ClientIncidentSource { get { throw null; } set { } }\n+ protected virtual void JsonModelWriteCore(System.Text.Json.Utf8JsonWriter writer, System.ClientModel.Primitives.ModelReaderWriterOptions options) { }\n+ Azure.ResourceManager.ImpactReporting.Models.ClientIncidentDetails System.ClientModel.Primitives.IJsonModel.Create(ref System.Text.Json.Utf8JsonReader reader, System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n+ void System.ClientModel.Primitives.IJsonModel.Write(System.Text.Json.Utf8JsonWriter writer, System.ClientModel.Primitives.ModelReaderWriterOptions options) { }\n+ Azure.ResourceManager.ImpactReporting.Models.ClientIncidentDetails System.ClientModel.Primitives.IPersistableModel.Create(System.BinaryData data, System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n+ string System.ClientModel.Primitives.IPersistableModel.GetFormatFromOptions(System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n+ System.BinaryData System.ClientModel.Primitives.IPersistableModel.Write(System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n+ }\n+ [System.Runtime.InteropServices.StructLayoutAttribute(System.Runtime.InteropServices.LayoutKind.Sequential)]\n+ public readonly partial struct ConfidenceLevel : System.IEquatable\n+ {\n+ private readonly object _dummy;\n+ private readonly int _dummyPrimitive;\n+ public ConfidenceLevel(string value) { throw null; }\n+ public static Azure.ResourceManager.ImpactReporting.Models.ConfidenceLevel High { get { throw null; } }\n+ public static Azure.ResourceManager.ImpactReporting.Models.ConfidenceLevel Low { get { throw null; } }\n+ public static Azure.ResourceManager.ImpactReporting.Models.ConfidenceLevel Medium { get { throw null; } }\n+ public bool Equals(Azure.ResourceManager.ImpactReporting.Models.ConfidenceLevel other) { throw null; }\n+ [System.ComponentModel.EditorBrowsableAttribute(System.ComponentModel.EditorBrowsableState.Never)]\n+ public override bool Equals(object obj) { throw null; }\n+ [System.ComponentModel.EditorBrowsableAttribute(System.ComponentModel.EditorBrowsableState.Never)]\n+ public override int GetHashCode() { throw null; }\n+ public static bool operator ==(Azure.ResourceManager.ImpactReporting.Models.ConfidenceLevel left, Azure.ResourceManager.ImpactReporting.Models.ConfidenceLevel right) { throw null; }\n+ public static implicit operator Azure.ResourceManager.ImpactReporting.Models.ConfidenceLevel (string value) { throw null; }\n+ public static bool operator !=(Azure.ResourceManager.ImpactReporting.Models.ConfidenceLevel left, Azure.ResourceManager.ImpactReporting.Models.ConfidenceLevel right) { throw null; }\n+ public override string ToString() { throw null; }\n+ }\n+ public partial class Connectivity : System.ClientModel.Primitives.IJsonModel, System.ClientModel.Primitives.IPersistableModel\n+ {\n+ public Connectivity() { }\n+ public int? Port { get { throw null; } set { } }\n+ public Azure.ResourceManager.ImpactReporting.Models.Protocol? Protocol { get { throw null; } set { } }\n+ public string SourceAzureResourceId { get { throw null; } set { } }\n+ public string TargetAzureResourceId { get { throw null; } set { } }\n+ protected virtual void JsonModelWriteCore(System.Text.Json.Utf8JsonWriter writer, System.ClientModel.Primitives.ModelReaderWriterOptions options) { }\n+ Azure.ResourceManager.ImpactReporting.Models.Connectivity System.ClientModel.Primitives.IJsonModel.Create(ref System.Text.Json.Utf8JsonReader reader, System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n+ void System.ClientModel.Primitives.IJsonModel.Write(System.Text.Json.Utf8JsonWriter writer, System.ClientModel.Primitives.ModelReaderWriterOptions options) { }\n+ Azure.ResourceManager.ImpactReporting.Models.Connectivity System.ClientModel.Primitives.IPersistableModel.Create(System.BinaryData data, System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n+ string System.ClientModel.Primitives.IPersistableModel.GetFormatFromOptions(System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n+ System.BinaryData System.ClientModel.Primitives.IPersistableModel.Write(System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n+ }\n+ public partial class ConnectorPatch : System.ClientModel.Primitives.IJsonModel, System.ClientModel.Primitives.IPersistableModel\n+ {\n+ public ConnectorPatch() { }\n+ public Azure.ResourceManager.ImpactReporting.Models.Platform? ConnectorType { get { throw null; } set { } }\n+ protected virtual void JsonModelWriteCore(System.Text.Json.Utf8JsonWriter writer, System.ClientModel.Primitives.ModelReaderWriterOptions options) { }\n+ Azure.ResourceManager.ImpactReporting.Models.ConnectorPatch System.ClientModel.Primitives.IJsonModel.Create(ref System.Text.Json.Utf8JsonReader reader, System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n+ void System.ClientModel.Primitives.IJsonModel.Write(System.Text.Json.Utf8JsonWriter writer, System.ClientModel.Primitives.ModelReaderWriterOptions options) { }\n+ Azure.ResourceManager.ImpactReporting.Models.ConnectorPatch System.ClientModel.Primitives.IPersistableModel.Create(System.BinaryData data, System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n+ string System.ClientModel.Primitives.IPersistableModel.GetFormatFromOptions(System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n+ System.BinaryData System.ClientModel.Primitives.IPersistableModel.Write(System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n+ }\n+ public partial class ConnectorProperties : System.ClientModel.Primitives.IJsonModel, System.ClientModel.Primitives.IPersistableModel\n+ {\n+ public ConnectorProperties(string connectorId, string tenantId, Azure.ResourceManager.ImpactReporting.Models.Platform connectorType, System.DateTimeOffset lastRunTimeStamp) { }\n+ public string ConnectorId { get { throw null; } }\n+ public Azure.ResourceManager.ImpactReporting.Models.Platform ConnectorType { get { throw null; } set { } }\n+ public System.DateTimeOffset LastRunTimeStamp { get { throw null; } }\n+ public Azure.ResourceManager.ImpactReporting.Models.ProvisioningState? ProvisioningState { get { throw null; } }\n+ public string TenantId { get { throw null; } }\n+ protected virtual void JsonModelWriteCore(System.Text.Json.Utf8JsonWriter writer, System.ClientModel.Primitives.ModelReaderWriterOptions options) { }\n+ Azure.ResourceManager.ImpactReporting.Models.ConnectorProperties System.ClientModel.Primitives.IJsonModel.Create(ref System.Text.Json.Utf8JsonReader reader, System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n+ void System.ClientModel.Primitives.IJsonModel.Write(System.Text.Json.Utf8JsonWriter writer, System.ClientModel.Primitives.ModelReaderWriterOptions options) { }\n+ Azure.ResourceManager.ImpactReporting.Models.ConnectorProperties System.ClientModel.Primitives.IPersistableModel.Create(System.BinaryData data, System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n+ string System.ClientModel.Primitives.IPersistableModel.GetFormatFromOptions(System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n+ System.BinaryData System.ClientModel.Primitives.IPersistableModel.Write(System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n+ }\n+ public partial class Content : System.ClientModel.Primitives.IJsonModel, System.ClientModel.Primitives.IPersistableModel\n+ {\n+ public Content(string title, string description) { }\n+ public string Description { get { throw null; } set { } }\n+ public string Title { get { throw null; } set { } }\n+ protected virtual void JsonModelWriteCore(System.Text.Json.Utf8JsonWriter writer, System.ClientModel.Primitives.ModelReaderWriterOptions options) { }\n+ Azure.ResourceManager.ImpactReporting.Models.Content System.ClientModel.Primitives.IJsonModel.Create(ref System.Text.Json.Utf8JsonReader reader, System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n+ void System.ClientModel.Primitives.IJsonModel.Write(System.Text.Json.Utf8JsonWriter writer, System.ClientModel.Primitives.ModelReaderWriterOptions options) { }\n+ Azure.ResourceManager.ImpactReporting.Models.Content System.ClientModel.Primitives.IPersistableModel.Create(System.BinaryData data, System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n+ string System.ClientModel.Primitives.IPersistableModel.GetFormatFromOptions(System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n+ System.BinaryData System.ClientModel.Primitives.IPersistableModel.Write(System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n+ }\n+ public partial class ErrorDetailProperties : System.ClientModel.Primitives.IJsonModel, System.ClientModel.Primitives.IPersistableModel", + "comment_created_at": "2025-03-10T06:17:01+00:00", + "comment_author": "ArcturusZhang", + "comment_body": "this is also generic, we need a more verbose name for this type.\r\nUsually for this kind of models, we just prepend the RP name `ImpactReporting` to it so that we have `ImpactReportingErrorDetailProperties`.", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/azure-sdk-for-net-use-domain-specific-type-names.md b/_reviewers/azure-sdk-for-net-use-domain-specific-type-names.md index a12bf36..b2364af 100644 --- a/_reviewers/azure-sdk-for-net-use-domain-specific-type-names.md +++ b/_reviewers/azure-sdk-for-net-use-domain-specific-type-names.md @@ -32,139 +32,3 @@ This makes the code more maintainable by: - Making type purposes immediately clear from their names - Improving code searchability and navigation - Reducing cognitive load when reading code - - -[ - { - "discussion_id": "2179522628", - "pr_number": 51009, - "pr_file": "sdk/mongodbatlas/Azure.ResourceManager.MongoDBAtlas/api/Azure.ResourceManager.MongoDBAtlas.net8.0.cs", - "created_at": "2025-07-02T09:02:01+00:00", - "commented_code": "string System.ClientModel.Primitives.IPersistableModel.GetFormatFromOptions(System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n System.BinaryData System.ClientModel.Primitives.IPersistableModel.Write(System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n }\n public partial class OrganizationResourceUpdateProperties : System.ClientModel.Primitives.IJsonModel, System.ClientModel.Primitives.IPersistableModel", - "repo_full_name": "Azure/azure-sdk-for-net", - "discussion_comments": [ - { - "comment_id": "2179522628", - "repo_full_name": "Azure/azure-sdk-for-net", - "pr_number": 51009, - "pr_file": "sdk/mongodbatlas/Azure.ResourceManager.MongoDBAtlas/api/Azure.ResourceManager.MongoDBAtlas.net8.0.cs", - "discussion_id": "2179522628", - "commented_code": "@@ -210,4 +223,16 @@ protected virtual void JsonModelWriteCore(System.Text.Json.Utf8JsonWriter writer\n string System.ClientModel.Primitives.IPersistableModel.GetFormatFromOptions(System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n System.BinaryData System.ClientModel.Primitives.IPersistableModel.Write(System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n }\n+ public partial class OrganizationResourceUpdateProperties : System.ClientModel.Primitives.IJsonModel, System.ClientModel.Primitives.IPersistableModel", - "comment_created_at": "2025-07-02T09:02:01+00:00", - "comment_author": "ArcturusZhang", - "comment_body": ":( I think we should rename this to `MongoDBAtlasOrganizationUpdateProperties`.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2104120779", - "pr_number": 50236, - "pr_file": "sdk/containerapps/Azure.ResourceManager.AppContainers/api/Azure.ResourceManager.AppContainers.net8.0.cs", - "created_at": "2025-05-23T08:42:50+00:00", - "commented_code": "public static Azure.ResourceManager.AppContainers.ContainerAppManagedEnvironmentStorageData ContainerAppManagedEnvironmentStorageData(Azure.Core.ResourceIdentifier id, string name, Azure.Core.ResourceType resourceType, Azure.ResourceManager.Models.SystemData systemData, Azure.ResourceManager.AppContainers.Models.ContainerAppAzureFileProperties managedEnvironmentStorageAzureFile = null) { throw null; }\n public static Azure.ResourceManager.AppContainers.ContainerAppManagedEnvironmentStorageData ContainerAppManagedEnvironmentStorageData(Azure.Core.ResourceIdentifier id = null, string name = null, Azure.Core.ResourceType resourceType = default(Azure.Core.ResourceType), Azure.ResourceManager.Models.SystemData systemData = null, Azure.ResourceManager.AppContainers.Models.ManagedEnvironmentStorageProperties properties = null) { throw null; }\n public static Azure.ResourceManager.AppContainers.Models.ContainerAppNameAvailabilityResult ContainerAppNameAvailabilityResult(bool? isNameAvailable = default(bool?), Azure.ResourceManager.AppContainers.Models.ContainerAppNameUnavailableReason? reason = default(Azure.ResourceManager.AppContainers.Models.ContainerAppNameUnavailableReason?), string message = null) { throw null; }\n public static Azure.ResourceManager.AppContainers.Models.ContainerAppReplicaContainer ContainerAppReplicaContainer(string name = null, string containerId = null, bool? isReady = default(bool?), bool? isStarted = default(bool?), int? restartCount = default(int?), Azure.ResourceManager.AppContainers.Models.ContainerAppContainerRunningState? runningState = default(Azure.ResourceManager.AppContainers.Models.ContainerAppContainerRunningState?), string runningStateDetails = null, string logStreamEndpoint = null, string execEndpoint = null) { throw null; }\n [System.ComponentModel.EditorBrowsableAttribute(System.ComponentModel.EditorBrowsableState.Never)]\n public static Azure.ResourceManager.AppContainers.Models.ContainerAppReplicaContainer ContainerAppReplicaContainer(string name, string containerId, bool? isReady, bool? isStarted, int? restartCount, Azure.ResourceManager.AppContainers.Models.ContainerAppContainerRunningState? runningState, string runningStateDetails, string logStreamEndpoint, string execEndpoint) { throw null; }\n public static Azure.ResourceManager.AppContainers.Models.ContainerAppReplicaContainer ContainerAppReplicaContainer(string name = null, string containerId = null, bool? isReady = default(bool?), bool? isStarted = default(bool?), int? restartCount = default(int?), Azure.ResourceManager.AppContainers.Models.ContainerAppContainerRunningState? runningState = default(Azure.ResourceManager.AppContainers.Models.ContainerAppContainerRunningState?), string runningStateDetails = null, string logStreamEndpoint = null, string execEndpoint = null, string debugEndpoint = null) { throw null; }\n public static Azure.ResourceManager.AppContainers.ContainerAppReplicaData ContainerAppReplicaData(Azure.Core.ResourceIdentifier id = null, string name = null, Azure.Core.ResourceType resourceType = default(Azure.Core.ResourceType), Azure.ResourceManager.Models.SystemData systemData = null, System.DateTimeOffset? createdOn = default(System.DateTimeOffset?), Azure.ResourceManager.AppContainers.Models.ContainerAppReplicaRunningState? runningState = default(Azure.ResourceManager.AppContainers.Models.ContainerAppReplicaRunningState?), string runningStateDetails = null, System.Collections.Generic.IEnumerable containers = null, System.Collections.Generic.IEnumerable initContainers = null) { throw null; }\n public static Azure.ResourceManager.AppContainers.ContainerAppRevisionData ContainerAppRevisionData(Azure.Core.ResourceIdentifier id = null, string name = null, Azure.Core.ResourceType resourceType = default(Azure.Core.ResourceType), Azure.ResourceManager.Models.SystemData systemData = null, System.DateTimeOffset? createdOn = default(System.DateTimeOffset?), System.DateTimeOffset? lastActiveOn = default(System.DateTimeOffset?), string fqdn = null, Azure.ResourceManager.AppContainers.Models.ContainerAppTemplate template = null, bool? isActive = default(bool?), int? replicas = default(int?), int? trafficWeight = default(int?), string provisioningError = null, Azure.ResourceManager.AppContainers.Models.ContainerAppRevisionHealthState? healthState = default(Azure.ResourceManager.AppContainers.Models.ContainerAppRevisionHealthState?), Azure.ResourceManager.AppContainers.Models.ContainerAppRevisionProvisioningState? provisioningState = default(Azure.ResourceManager.AppContainers.Models.ContainerAppRevisionProvisioningState?), Azure.ResourceManager.AppContainers.Models.RevisionRunningState? runningState = default(Azure.ResourceManager.AppContainers.Models.RevisionRunningState?)) { throw null; }\n public static Azure.ResourceManager.AppContainers.ContainerAppRevisionData ContainerAppRevisionData(Azure.Core.ResourceIdentifier id = null, string name = null, Azure.Core.ResourceType resourceType = default(Azure.Core.ResourceType), Azure.ResourceManager.Models.SystemData systemData = null, System.DateTimeOffset? createdOn = default(System.DateTimeOffset?), System.DateTimeOffset? lastActiveOn = default(System.DateTimeOffset?), string fqdn = null, Azure.ResourceManager.AppContainers.Models.ContainerAppTemplate template = null, bool? isActive = default(bool?), int? replicas = default(int?), int? trafficWeight = default(int?), System.Collections.Generic.IEnumerable labels = null, string provisioningError = null, Azure.ResourceManager.AppContainers.Models.ContainerAppRevisionHealthState? healthState = default(Azure.ResourceManager.AppContainers.Models.ContainerAppRevisionHealthState?), Azure.ResourceManager.AppContainers.Models.ContainerAppRevisionProvisioningState? provisioningState = default(Azure.ResourceManager.AppContainers.Models.ContainerAppRevisionProvisioningState?), Azure.ResourceManager.AppContainers.Models.RevisionRunningState? runningState = default(Azure.ResourceManager.AppContainers.Models.RevisionRunningState?)) { throw null; }\n [System.ComponentModel.EditorBrowsableAttribute(System.ComponentModel.EditorBrowsableState.Never)]\n public static Azure.ResourceManager.AppContainers.ContainerAppRevisionData ContainerAppRevisionData(Azure.Core.ResourceIdentifier id, string name, Azure.Core.ResourceType resourceType, Azure.ResourceManager.Models.SystemData systemData, System.DateTimeOffset? createdOn, System.DateTimeOffset? lastActiveOn, string fqdn, Azure.ResourceManager.AppContainers.Models.ContainerAppTemplate template, bool? isActive, int? replicas, int? trafficWeight, string provisioningError, Azure.ResourceManager.AppContainers.Models.ContainerAppRevisionHealthState? healthState, Azure.ResourceManager.AppContainers.Models.ContainerAppRevisionProvisioningState? provisioningState, Azure.ResourceManager.AppContainers.Models.RevisionRunningState? runningState) { throw null; }\n public static Azure.ResourceManager.AppContainers.Models.ContainerAppsBuildConfiguration ContainerAppsBuildConfiguration(string baseOS = null, string platform = null, string platformVersion = null, System.Collections.Generic.IEnumerable environmentVariables = null, System.Collections.Generic.IEnumerable preBuildSteps = null) { throw null; }\n public static Azure.ResourceManager.AppContainers.ContainerAppsBuildResourceData ContainerAppsBuildResourceData(Azure.Core.ResourceIdentifier id = null, string name = null, Azure.Core.ResourceType resourceType = default(Azure.Core.ResourceType), Azure.ResourceManager.Models.SystemData systemData = null, Azure.ResourceManager.AppContainers.Models.BuildProvisioningState? provisioningState = default(Azure.ResourceManager.AppContainers.Models.BuildProvisioningState?), Azure.ResourceManager.AppContainers.Models.BuildStatus? buildStatus = default(Azure.ResourceManager.AppContainers.Models.BuildStatus?), Azure.ResourceManager.AppContainers.Models.ContainerRegistryWithCustomImage destinationContainerRegistry = null, Azure.ResourceManager.AppContainers.Models.ContainerAppsBuildConfiguration configuration = null, string logStreamEndpoint = null) { throw null; }\n public static Azure.ResourceManager.AppContainers.Models.ContainerAppSecret ContainerAppSecret(string name = null, string value = null, string identity = null, System.Uri keyVaultUri = null) { throw null; }\n public static Azure.ResourceManager.AppContainers.ContainerAppSourceControlData ContainerAppSourceControlData(Azure.Core.ResourceIdentifier id = null, string name = null, Azure.Core.ResourceType resourceType = default(Azure.Core.ResourceType), Azure.ResourceManager.Models.SystemData systemData = null, Azure.ResourceManager.AppContainers.Models.ContainerAppSourceControlOperationState? operationState = default(Azure.ResourceManager.AppContainers.Models.ContainerAppSourceControlOperationState?), System.Uri repoUri = null, string branch = null, Azure.ResourceManager.AppContainers.Models.ContainerAppGitHubActionConfiguration gitHubActionConfiguration = null) { throw null; }\n public static Azure.ResourceManager.AppContainers.ContainerAppsPatchResourceData ContainerAppsPatchResourceData(Azure.Core.ResourceIdentifier id = null, string name = null, Azure.Core.ResourceType resourceType = default(Azure.Core.ResourceType), Azure.ResourceManager.Models.SystemData systemData = null, Azure.ResourceManager.AppContainers.Models.PatchProperties properties = null) { throw null; }\n public static Azure.ResourceManager.AppContainers.Models.ContainerAppUsage ContainerAppUsage(Azure.ResourceManager.AppContainers.Models.ContainerAppUsageUnit unit = default(Azure.ResourceManager.AppContainers.Models.ContainerAppUsageUnit), float currentValue = 0f, float limit = 0f, Azure.ResourceManager.AppContainers.Models.ContainerAppUsageName name = null) { throw null; }\n public static Azure.ResourceManager.AppContainers.Models.ContainerAppUsageName ContainerAppUsageName(string value = null, string localizedValue = null) { throw null; }\n public static Azure.ResourceManager.AppContainers.Models.ContainerAppWorkloadProfileState ContainerAppWorkloadProfileState(Azure.Core.ResourceIdentifier id = null, string name = null, Azure.Core.ResourceType resourceType = default(Azure.Core.ResourceType), Azure.ResourceManager.Models.SystemData systemData = null, Azure.ResourceManager.AppContainers.Models.ContainerAppWorkloadProfileStateProperties properties = null) { throw null; }\n public static Azure.ResourceManager.AppContainers.Models.ContainerExecutionStatus ContainerExecutionStatus(string name = null, int? code = default(int?), string additionalInformation = null, string status = null) { throw null; }\n public static Azure.ResourceManager.AppContainers.DaprComponentResiliencyPolicyData DaprComponentResiliencyPolicyData(Azure.Core.ResourceIdentifier id = null, string name = null, Azure.Core.ResourceType resourceType = default(Azure.Core.ResourceType), Azure.ResourceManager.Models.SystemData systemData = null, Azure.ResourceManager.AppContainers.Models.DaprComponentResiliencyPolicyConfiguration inboundPolicy = null, Azure.ResourceManager.AppContainers.Models.DaprComponentResiliencyPolicyConfiguration outboundPolicy = null) { throw null; }\n public static Azure.ResourceManager.AppContainers.DaprSubscriptionData DaprSubscriptionData(Azure.Core.ResourceIdentifier id = null, string name = null, Azure.Core.ResourceType resourceType = default(Azure.Core.ResourceType), Azure.ResourceManager.Models.SystemData systemData = null, string pubsubName = null, string topic = null, string deadLetterTopic = null, Azure.ResourceManager.AppContainers.Models.DaprSubscriptionRoutes routes = null, System.Collections.Generic.IEnumerable scopes = null, System.Collections.Generic.IDictionary metadata = null, Azure.ResourceManager.AppContainers.Models.DaprSubscriptionBulkSubscribeConfig bulkSubscribe = null) { throw null; }\n public static Azure.ResourceManager.AppContainers.DotNetComponentData DotNetComponentData(Azure.Core.ResourceIdentifier id = null, string name = null, Azure.Core.ResourceType resourceType = default(Azure.Core.ResourceType), Azure.ResourceManager.Models.SystemData systemData = null, Azure.ResourceManager.AppContainers.Models.DotNetComponentType? componentType = default(Azure.ResourceManager.AppContainers.Models.DotNetComponentType?), Azure.ResourceManager.AppContainers.Models.DotNetComponentProvisioningState? provisioningState = default(Azure.ResourceManager.AppContainers.Models.DotNetComponentProvisioningState?), System.Collections.Generic.IEnumerable configurations = null, System.Collections.Generic.IEnumerable serviceBinds = null) { throw null; }\n public static Azure.ResourceManager.AppContainers.HttpRouteConfigData HttpRouteConfigData(Azure.Core.ResourceIdentifier id = null, string name = null, Azure.Core.ResourceType resourceType = default(Azure.Core.ResourceType), Azure.ResourceManager.Models.SystemData systemData = null, Azure.ResourceManager.AppContainers.Models.HttpRouteConfigProperties properties = null) { throw null; }\n public static Azure.ResourceManager.AppContainers.Models.HttpRouteConfigProperties HttpRouteConfigProperties(Azure.ResourceManager.AppContainers.Models.HttpRouteProvisioningState? provisioningState = default(Azure.ResourceManager.AppContainers.Models.HttpRouteProvisioningState?), System.Collections.Generic.IEnumerable provisioningErrors = null, string fqdn = null, System.Collections.Generic.IEnumerable customDomains = null, System.Collections.Generic.IEnumerable rules = null) { throw null; }\n public static Azure.ResourceManager.AppContainers.Models.HttpRouteProvisioningErrors HttpRouteProvisioningErrors(System.DateTimeOffset? timestamp = default(System.DateTimeOffset?), string message = null) { throw null; }\n public static Azure.ResourceManager.AppContainers.JavaComponentData JavaComponentData(Azure.Core.ResourceIdentifier id = null, string name = null, Azure.Core.ResourceType resourceType = default(Azure.Core.ResourceType), Azure.ResourceManager.Models.SystemData systemData = null, Azure.ResourceManager.AppContainers.Models.JavaComponentProperties properties = null) { throw null; }\n public static Azure.ResourceManager.AppContainers.Models.JavaComponentProperties JavaComponentProperties(string componentType = null, Azure.ResourceManager.AppContainers.Models.JavaComponentProvisioningState? provisioningState = default(Azure.ResourceManager.AppContainers.Models.JavaComponentProvisioningState?), System.Collections.Generic.IEnumerable configurations = null, Azure.ResourceManager.AppContainers.Models.JavaComponentPropertiesScale scale = null, System.Collections.Generic.IEnumerable serviceBinds = null) { throw null; }\n public static Azure.ResourceManager.AppContainers.LabelHistoryData LabelHistoryData(Azure.Core.ResourceIdentifier id = null, string name = null, Azure.Core.ResourceType resourceType = default(Azure.Core.ResourceType), Azure.ResourceManager.Models.SystemData systemData = null, System.Collections.Generic.IEnumerable labelHistoryRecords = null) { throw null; }\n public static Azure.ResourceManager.AppContainers.Models.LabelHistoryRecordItem LabelHistoryRecordItem(string revision = null, System.DateTimeOffset? start = default(System.DateTimeOffset?), System.DateTimeOffset? stop = default(System.DateTimeOffset?), Azure.ResourceManager.AppContainers.Models.Status? status = default(Azure.ResourceManager.AppContainers.Models.Status?)) { throw null; }\n public static Azure.ResourceManager.AppContainers.LogicAppData LogicAppData(Azure.Core.ResourceIdentifier id = null, string name = null, Azure.Core.ResourceType resourceType = default(Azure.Core.ResourceType), Azure.ResourceManager.Models.SystemData systemData = null, System.BinaryData properties = null) { throw null; }\n public static Azure.ResourceManager.AppContainers.MaintenanceConfigurationResourceData MaintenanceConfigurationResourceData(Azure.Core.ResourceIdentifier id = null, string name = null, Azure.Core.ResourceType resourceType = default(Azure.Core.ResourceType), Azure.ResourceManager.Models.SystemData systemData = null, System.Collections.Generic.IEnumerable scheduledEntries = null) { throw null; }\n public static Azure.ResourceManager.AppContainers.Models.ManagedCertificateProperties ManagedCertificateProperties(Azure.ResourceManager.AppContainers.Models.ContainerAppCertificateProvisioningState? provisioningState = default(Azure.ResourceManager.AppContainers.Models.ContainerAppCertificateProvisioningState?), string subjectName = null, string error = null, Azure.ResourceManager.AppContainers.Models.ManagedCertificateDomainControlValidation? domainControlValidation = default(Azure.ResourceManager.AppContainers.Models.ManagedCertificateDomainControlValidation?), string validationToken = null) { throw null; }\n public static Azure.ResourceManager.AppContainers.Models.NacosComponent NacosComponent(Azure.ResourceManager.AppContainers.Models.JavaComponentProvisioningState? provisioningState = default(Azure.ResourceManager.AppContainers.Models.JavaComponentProvisioningState?), System.Collections.Generic.IEnumerable configurations = null, Azure.ResourceManager.AppContainers.Models.JavaComponentPropertiesScale scale = null, System.Collections.Generic.IEnumerable serviceBinds = null, string ingressFqdn = null) { throw null; }\n public static Azure.ResourceManager.AppContainers.Models.PatchDetails PatchDetails(string targetContainerName = null, string targetImage = null, System.DateTimeOffset lastDetectionOn = default(System.DateTimeOffset), Azure.ResourceManager.AppContainers.Models.DetectionStatus detectionStatus = default(Azure.ResourceManager.AppContainers.Models.DetectionStatus), string newImageName = null, Azure.ResourceManager.AppContainers.Models.PatchDetailsNewLayer newLayer = null, Azure.ResourceManager.AppContainers.Models.PatchDetailsOldLayer oldLayer = null, Azure.ResourceManager.AppContainers.Models.PatchType? patchType = default(Azure.ResourceManager.AppContainers.Models.PatchType?)) { throw null; }\n public static Azure.ResourceManager.AppContainers.Models.PatchDetailsNewLayer PatchDetailsNewLayer(string name = null, string frameworkAndVersion = null, string osAndVersion = null) { throw null; }\n public static Azure.ResourceManager.AppContainers.Models.PatchDetailsOldLayer PatchDetailsOldLayer(string name = null, string frameworkAndVersion = null, string osAndVersion = null) { throw null; }\n public static Azure.ResourceManager.AppContainers.Models.PatchProperties PatchProperties(Azure.Core.ResourceIdentifier targetEnvironmentId = null, Azure.Core.ResourceIdentifier targetContainerAppId = null, Azure.Core.ResourceIdentifier targetRevisionId = null, Azure.ResourceManager.AppContainers.Models.PatchApplyStatus? patchApplyStatus = default(Azure.ResourceManager.AppContainers.Models.PatchApplyStatus?), System.DateTimeOffset? createdOn = default(System.DateTimeOffset?), System.DateTimeOffset? lastModifiedOn = default(System.DateTimeOffset?), System.Collections.Generic.IEnumerable patchDetails = null) { throw null; }\n public static Azure.ResourceManager.AppContainers.Models.ReplicaExecutionStatus ReplicaExecutionStatus(string name = null, System.Collections.Generic.IEnumerable containers = null) { throw null; }\n public static Azure.ResourceManager.AppContainers.SessionPoolData SessionPoolData(Azure.Core.ResourceIdentifier id = null, string name = null, Azure.Core.ResourceType resourceType = default(Azure.Core.ResourceType), Azure.ResourceManager.Models.SystemData systemData = null, System.Collections.Generic.IDictionary tags = null, Azure.Core.AzureLocation location = default(Azure.Core.AzureLocation), Azure.ResourceManager.Models.ManagedServiceIdentity identity = null, Azure.Core.ResourceIdentifier environmentId = null, Azure.ResourceManager.AppContainers.Models.ContainerType? containerType = default(Azure.ResourceManager.AppContainers.Models.ContainerType?), Azure.ResourceManager.AppContainers.Models.PoolManagementType? poolManagementType = default(Azure.ResourceManager.AppContainers.Models.PoolManagementType?), int? nodeCount = default(int?), Azure.ResourceManager.AppContainers.Models.SessionPoolScaleConfiguration scaleConfiguration = null, System.Collections.Generic.IEnumerable secrets = null, Azure.ResourceManager.AppContainers.Models.SessionPoolLifecycleConfiguration dynamicPoolLifecycleConfiguration = null, Azure.ResourceManager.AppContainers.Models.CustomContainerTemplate customContainerTemplate = null, Azure.ResourceManager.AppContainers.Models.SessionNetworkStatus? sessionNetworkStatus = default(Azure.ResourceManager.AppContainers.Models.SessionNetworkStatus?), System.Uri poolManagementEndpoint = null, Azure.ResourceManager.AppContainers.Models.SessionPoolProvisioningState? provisioningState = default(Azure.ResourceManager.AppContainers.Models.SessionPoolProvisioningState?), System.Collections.Generic.IEnumerable managedIdentitySettings = null) { throw null; }\n public static Azure.ResourceManager.AppContainers.Models.SpringBootAdminComponent SpringBootAdminComponent(Azure.ResourceManager.AppContainers.Models.JavaComponentProvisioningState? provisioningState = default(Azure.ResourceManager.AppContainers.Models.JavaComponentProvisioningState?), System.Collections.Generic.IEnumerable configurations = null, Azure.ResourceManager.AppContainers.Models.JavaComponentPropertiesScale scale = null, System.Collections.Generic.IEnumerable serviceBinds = null, string ingressFqdn = null) { throw null; }\n public static Azure.ResourceManager.AppContainers.Models.SpringCloudConfigComponent SpringCloudConfigComponent(Azure.ResourceManager.AppContainers.Models.JavaComponentProvisioningState? provisioningState = default(Azure.ResourceManager.AppContainers.Models.JavaComponentProvisioningState?), System.Collections.Generic.IEnumerable configurations = null, Azure.ResourceManager.AppContainers.Models.JavaComponentPropertiesScale scale = null, System.Collections.Generic.IEnumerable serviceBinds = null) { throw null; }\n public static Azure.ResourceManager.AppContainers.Models.SpringCloudEurekaComponent SpringCloudEurekaComponent(Azure.ResourceManager.AppContainers.Models.JavaComponentProvisioningState? provisioningState = default(Azure.ResourceManager.AppContainers.Models.JavaComponentProvisioningState?), System.Collections.Generic.IEnumerable configurations = null, Azure.ResourceManager.AppContainers.Models.JavaComponentPropertiesScale scale = null, System.Collections.Generic.IEnumerable serviceBinds = null, string ingressFqdn = null) { throw null; }\n public static Azure.ResourceManager.AppContainers.Models.SpringCloudGatewayComponent SpringCloudGatewayComponent(Azure.ResourceManager.AppContainers.Models.JavaComponentProvisioningState? provisioningState = default(Azure.ResourceManager.AppContainers.Models.JavaComponentProvisioningState?), System.Collections.Generic.IEnumerable configurations = null, Azure.ResourceManager.AppContainers.Models.JavaComponentPropertiesScale scale = null, System.Collections.Generic.IEnumerable serviceBinds = null, string ingressFqdn = null, System.Collections.Generic.IEnumerable springCloudGatewayRoutes = null) { throw null; }\n public static Azure.ResourceManager.AppContainers.WorkflowEnvelopeData WorkflowEnvelopeData(Azure.Core.ResourceIdentifier id = null, string name = null, Azure.Core.ResourceType resourceType = default(Azure.Core.ResourceType), Azure.ResourceManager.Models.SystemData systemData = null, string kind = null, Azure.Core.AzureLocation? location = default(Azure.Core.AzureLocation?), Azure.ResourceManager.AppContainers.Models.WorkflowEnvelopeProperties properties = null) { throw null; }\n public static Azure.ResourceManager.AppContainers.Models.WorkflowEnvelopeProperties WorkflowEnvelopeProperties(System.BinaryData files = null, Azure.ResourceManager.AppContainers.Models.WorkflowState? flowState = default(Azure.ResourceManager.AppContainers.Models.WorkflowState?), Azure.ResourceManager.AppContainers.Models.WorkflowHealth health = null) { throw null; }\n public static Azure.ResourceManager.AppContainers.Models.WorkflowHealth WorkflowHealth(Azure.ResourceManager.AppContainers.Models.WorkflowHealthState state = Azure.ResourceManager.AppContainers.Models.WorkflowHealthState.Unknown, Azure.ResponseError error = null) { throw null; }\n }\n public partial class BlobStorageTokenStore : System.ClientModel.Primitives.IJsonModel, System.ClientModel.Primitives.IPersistableModel\n {\n public BlobStorageTokenStore() { }\n public System.Uri BlobContainerUri { get { throw null; } set { } }\n public string ClientId { get { throw null; } set { } }\n public string ManagedIdentityResourceId { get { throw null; } set { } }\n public string SasUrlSettingName { get { throw null; } set { } }\n protected virtual void JsonModelWriteCore(System.Text.Json.Utf8JsonWriter writer, System.ClientModel.Primitives.ModelReaderWriterOptions options) { }\n Azure.ResourceManager.AppContainers.Models.BlobStorageTokenStore System.ClientModel.Primitives.IJsonModel.Create(ref System.Text.Json.Utf8JsonReader reader, System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n void System.ClientModel.Primitives.IJsonModel.Write(System.Text.Json.Utf8JsonWriter writer, System.ClientModel.Primitives.ModelReaderWriterOptions options) { }\n Azure.ResourceManager.AppContainers.Models.BlobStorageTokenStore System.ClientModel.Primitives.IPersistableModel.Create(System.BinaryData data, System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n string System.ClientModel.Primitives.IPersistableModel.GetFormatFromOptions(System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n System.BinaryData System.ClientModel.Primitives.IPersistableModel.Write(System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n }\n public partial class BuildConfiguration : System.ClientModel.Primitives.IJsonModel, System.ClientModel.Primitives.IPersistableModel\n {\n public BuildConfiguration() { }\n public string BaseOS { get { throw null; } set { } }\n public System.Collections.Generic.IList EnvironmentVariables { get { throw null; } }\n public string Platform { get { throw null; } set { } }\n public string PlatformVersion { get { throw null; } set { } }\n public System.Collections.Generic.IList PreBuildSteps { get { throw null; } }\n protected virtual void JsonModelWriteCore(System.Text.Json.Utf8JsonWriter writer, System.ClientModel.Primitives.ModelReaderWriterOptions options) { }\n Azure.ResourceManager.AppContainers.Models.BuildConfiguration System.ClientModel.Primitives.IJsonModel.Create(ref System.Text.Json.Utf8JsonReader reader, System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n void System.ClientModel.Primitives.IJsonModel.Write(System.Text.Json.Utf8JsonWriter writer, System.ClientModel.Primitives.ModelReaderWriterOptions options) { }\n Azure.ResourceManager.AppContainers.Models.BuildConfiguration System.ClientModel.Primitives.IPersistableModel.Create(System.BinaryData data, System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n string System.ClientModel.Primitives.IPersistableModel.GetFormatFromOptions(System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n System.BinaryData System.ClientModel.Primitives.IPersistableModel.Write(System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n }\n [System.Runtime.InteropServices.StructLayoutAttribute(System.Runtime.InteropServices.LayoutKind.Sequential)]\n public readonly partial struct ContainerAppAccessMode : System.IEquatable\n public readonly partial struct BuilderProvisioningState : System.IEquatable", - "repo_full_name": "Azure/azure-sdk-for-net", - "discussion_comments": [ - { - "comment_id": "2104120779", - "repo_full_name": "Azure/azure-sdk-for-net", - "pr_number": 50236, - "pr_file": "sdk/containerapps/Azure.ResourceManager.AppContainers/api/Azure.ResourceManager.AppContainers.net8.0.cs", - "discussion_id": "2104120779", - "commented_code": "@@ -1465,66 +2391,301 @@ public static partial class ArmAppContainersModelFactory\n public static Azure.ResourceManager.AppContainers.ContainerAppManagedEnvironmentStorageData ContainerAppManagedEnvironmentStorageData(Azure.Core.ResourceIdentifier id, string name, Azure.Core.ResourceType resourceType, Azure.ResourceManager.Models.SystemData systemData, Azure.ResourceManager.AppContainers.Models.ContainerAppAzureFileProperties managedEnvironmentStorageAzureFile = null) { throw null; }\n public static Azure.ResourceManager.AppContainers.ContainerAppManagedEnvironmentStorageData ContainerAppManagedEnvironmentStorageData(Azure.Core.ResourceIdentifier id = null, string name = null, Azure.Core.ResourceType resourceType = default(Azure.Core.ResourceType), Azure.ResourceManager.Models.SystemData systemData = null, Azure.ResourceManager.AppContainers.Models.ManagedEnvironmentStorageProperties properties = null) { throw null; }\n public static Azure.ResourceManager.AppContainers.Models.ContainerAppNameAvailabilityResult ContainerAppNameAvailabilityResult(bool? isNameAvailable = default(bool?), Azure.ResourceManager.AppContainers.Models.ContainerAppNameUnavailableReason? reason = default(Azure.ResourceManager.AppContainers.Models.ContainerAppNameUnavailableReason?), string message = null) { throw null; }\n- public static Azure.ResourceManager.AppContainers.Models.ContainerAppReplicaContainer ContainerAppReplicaContainer(string name = null, string containerId = null, bool? isReady = default(bool?), bool? isStarted = default(bool?), int? restartCount = default(int?), Azure.ResourceManager.AppContainers.Models.ContainerAppContainerRunningState? runningState = default(Azure.ResourceManager.AppContainers.Models.ContainerAppContainerRunningState?), string runningStateDetails = null, string logStreamEndpoint = null, string execEndpoint = null) { throw null; }\n+ [System.ComponentModel.EditorBrowsableAttribute(System.ComponentModel.EditorBrowsableState.Never)]\n+ public static Azure.ResourceManager.AppContainers.Models.ContainerAppReplicaContainer ContainerAppReplicaContainer(string name, string containerId, bool? isReady, bool? isStarted, int? restartCount, Azure.ResourceManager.AppContainers.Models.ContainerAppContainerRunningState? runningState, string runningStateDetails, string logStreamEndpoint, string execEndpoint) { throw null; }\n+ public static Azure.ResourceManager.AppContainers.Models.ContainerAppReplicaContainer ContainerAppReplicaContainer(string name = null, string containerId = null, bool? isReady = default(bool?), bool? isStarted = default(bool?), int? restartCount = default(int?), Azure.ResourceManager.AppContainers.Models.ContainerAppContainerRunningState? runningState = default(Azure.ResourceManager.AppContainers.Models.ContainerAppContainerRunningState?), string runningStateDetails = null, string logStreamEndpoint = null, string execEndpoint = null, string debugEndpoint = null) { throw null; }\n public static Azure.ResourceManager.AppContainers.ContainerAppReplicaData ContainerAppReplicaData(Azure.Core.ResourceIdentifier id = null, string name = null, Azure.Core.ResourceType resourceType = default(Azure.Core.ResourceType), Azure.ResourceManager.Models.SystemData systemData = null, System.DateTimeOffset? createdOn = default(System.DateTimeOffset?), Azure.ResourceManager.AppContainers.Models.ContainerAppReplicaRunningState? runningState = default(Azure.ResourceManager.AppContainers.Models.ContainerAppReplicaRunningState?), string runningStateDetails = null, System.Collections.Generic.IEnumerable containers = null, System.Collections.Generic.IEnumerable initContainers = null) { throw null; }\n- public static Azure.ResourceManager.AppContainers.ContainerAppRevisionData ContainerAppRevisionData(Azure.Core.ResourceIdentifier id = null, string name = null, Azure.Core.ResourceType resourceType = default(Azure.Core.ResourceType), Azure.ResourceManager.Models.SystemData systemData = null, System.DateTimeOffset? createdOn = default(System.DateTimeOffset?), System.DateTimeOffset? lastActiveOn = default(System.DateTimeOffset?), string fqdn = null, Azure.ResourceManager.AppContainers.Models.ContainerAppTemplate template = null, bool? isActive = default(bool?), int? replicas = default(int?), int? trafficWeight = default(int?), string provisioningError = null, Azure.ResourceManager.AppContainers.Models.ContainerAppRevisionHealthState? healthState = default(Azure.ResourceManager.AppContainers.Models.ContainerAppRevisionHealthState?), Azure.ResourceManager.AppContainers.Models.ContainerAppRevisionProvisioningState? provisioningState = default(Azure.ResourceManager.AppContainers.Models.ContainerAppRevisionProvisioningState?), Azure.ResourceManager.AppContainers.Models.RevisionRunningState? runningState = default(Azure.ResourceManager.AppContainers.Models.RevisionRunningState?)) { throw null; }\n+ public static Azure.ResourceManager.AppContainers.ContainerAppRevisionData ContainerAppRevisionData(Azure.Core.ResourceIdentifier id = null, string name = null, Azure.Core.ResourceType resourceType = default(Azure.Core.ResourceType), Azure.ResourceManager.Models.SystemData systemData = null, System.DateTimeOffset? createdOn = default(System.DateTimeOffset?), System.DateTimeOffset? lastActiveOn = default(System.DateTimeOffset?), string fqdn = null, Azure.ResourceManager.AppContainers.Models.ContainerAppTemplate template = null, bool? isActive = default(bool?), int? replicas = default(int?), int? trafficWeight = default(int?), System.Collections.Generic.IEnumerable labels = null, string provisioningError = null, Azure.ResourceManager.AppContainers.Models.ContainerAppRevisionHealthState? healthState = default(Azure.ResourceManager.AppContainers.Models.ContainerAppRevisionHealthState?), Azure.ResourceManager.AppContainers.Models.ContainerAppRevisionProvisioningState? provisioningState = default(Azure.ResourceManager.AppContainers.Models.ContainerAppRevisionProvisioningState?), Azure.ResourceManager.AppContainers.Models.RevisionRunningState? runningState = default(Azure.ResourceManager.AppContainers.Models.RevisionRunningState?)) { throw null; }\n+ [System.ComponentModel.EditorBrowsableAttribute(System.ComponentModel.EditorBrowsableState.Never)]\n+ public static Azure.ResourceManager.AppContainers.ContainerAppRevisionData ContainerAppRevisionData(Azure.Core.ResourceIdentifier id, string name, Azure.Core.ResourceType resourceType, Azure.ResourceManager.Models.SystemData systemData, System.DateTimeOffset? createdOn, System.DateTimeOffset? lastActiveOn, string fqdn, Azure.ResourceManager.AppContainers.Models.ContainerAppTemplate template, bool? isActive, int? replicas, int? trafficWeight, string provisioningError, Azure.ResourceManager.AppContainers.Models.ContainerAppRevisionHealthState? healthState, Azure.ResourceManager.AppContainers.Models.ContainerAppRevisionProvisioningState? provisioningState, Azure.ResourceManager.AppContainers.Models.RevisionRunningState? runningState) { throw null; }\n+ public static Azure.ResourceManager.AppContainers.Models.ContainerAppsBuildConfiguration ContainerAppsBuildConfiguration(string baseOS = null, string platform = null, string platformVersion = null, System.Collections.Generic.IEnumerable environmentVariables = null, System.Collections.Generic.IEnumerable preBuildSteps = null) { throw null; }\n+ public static Azure.ResourceManager.AppContainers.ContainerAppsBuildResourceData ContainerAppsBuildResourceData(Azure.Core.ResourceIdentifier id = null, string name = null, Azure.Core.ResourceType resourceType = default(Azure.Core.ResourceType), Azure.ResourceManager.Models.SystemData systemData = null, Azure.ResourceManager.AppContainers.Models.BuildProvisioningState? provisioningState = default(Azure.ResourceManager.AppContainers.Models.BuildProvisioningState?), Azure.ResourceManager.AppContainers.Models.BuildStatus? buildStatus = default(Azure.ResourceManager.AppContainers.Models.BuildStatus?), Azure.ResourceManager.AppContainers.Models.ContainerRegistryWithCustomImage destinationContainerRegistry = null, Azure.ResourceManager.AppContainers.Models.ContainerAppsBuildConfiguration configuration = null, string logStreamEndpoint = null) { throw null; }\n public static Azure.ResourceManager.AppContainers.Models.ContainerAppSecret ContainerAppSecret(string name = null, string value = null, string identity = null, System.Uri keyVaultUri = null) { throw null; }\n public static Azure.ResourceManager.AppContainers.ContainerAppSourceControlData ContainerAppSourceControlData(Azure.Core.ResourceIdentifier id = null, string name = null, Azure.Core.ResourceType resourceType = default(Azure.Core.ResourceType), Azure.ResourceManager.Models.SystemData systemData = null, Azure.ResourceManager.AppContainers.Models.ContainerAppSourceControlOperationState? operationState = default(Azure.ResourceManager.AppContainers.Models.ContainerAppSourceControlOperationState?), System.Uri repoUri = null, string branch = null, Azure.ResourceManager.AppContainers.Models.ContainerAppGitHubActionConfiguration gitHubActionConfiguration = null) { throw null; }\n+ public static Azure.ResourceManager.AppContainers.ContainerAppsPatchResourceData ContainerAppsPatchResourceData(Azure.Core.ResourceIdentifier id = null, string name = null, Azure.Core.ResourceType resourceType = default(Azure.Core.ResourceType), Azure.ResourceManager.Models.SystemData systemData = null, Azure.ResourceManager.AppContainers.Models.PatchProperties properties = null) { throw null; }\n public static Azure.ResourceManager.AppContainers.Models.ContainerAppUsage ContainerAppUsage(Azure.ResourceManager.AppContainers.Models.ContainerAppUsageUnit unit = default(Azure.ResourceManager.AppContainers.Models.ContainerAppUsageUnit), float currentValue = 0f, float limit = 0f, Azure.ResourceManager.AppContainers.Models.ContainerAppUsageName name = null) { throw null; }\n public static Azure.ResourceManager.AppContainers.Models.ContainerAppUsageName ContainerAppUsageName(string value = null, string localizedValue = null) { throw null; }\n public static Azure.ResourceManager.AppContainers.Models.ContainerAppWorkloadProfileState ContainerAppWorkloadProfileState(Azure.Core.ResourceIdentifier id = null, string name = null, Azure.Core.ResourceType resourceType = default(Azure.Core.ResourceType), Azure.ResourceManager.Models.SystemData systemData = null, Azure.ResourceManager.AppContainers.Models.ContainerAppWorkloadProfileStateProperties properties = null) { throw null; }\n+ public static Azure.ResourceManager.AppContainers.Models.ContainerExecutionStatus ContainerExecutionStatus(string name = null, int? code = default(int?), string additionalInformation = null, string status = null) { throw null; }\n+ public static Azure.ResourceManager.AppContainers.DaprComponentResiliencyPolicyData DaprComponentResiliencyPolicyData(Azure.Core.ResourceIdentifier id = null, string name = null, Azure.Core.ResourceType resourceType = default(Azure.Core.ResourceType), Azure.ResourceManager.Models.SystemData systemData = null, Azure.ResourceManager.AppContainers.Models.DaprComponentResiliencyPolicyConfiguration inboundPolicy = null, Azure.ResourceManager.AppContainers.Models.DaprComponentResiliencyPolicyConfiguration outboundPolicy = null) { throw null; }\n+ public static Azure.ResourceManager.AppContainers.DaprSubscriptionData DaprSubscriptionData(Azure.Core.ResourceIdentifier id = null, string name = null, Azure.Core.ResourceType resourceType = default(Azure.Core.ResourceType), Azure.ResourceManager.Models.SystemData systemData = null, string pubsubName = null, string topic = null, string deadLetterTopic = null, Azure.ResourceManager.AppContainers.Models.DaprSubscriptionRoutes routes = null, System.Collections.Generic.IEnumerable scopes = null, System.Collections.Generic.IDictionary metadata = null, Azure.ResourceManager.AppContainers.Models.DaprSubscriptionBulkSubscribeConfig bulkSubscribe = null) { throw null; }\n+ public static Azure.ResourceManager.AppContainers.DotNetComponentData DotNetComponentData(Azure.Core.ResourceIdentifier id = null, string name = null, Azure.Core.ResourceType resourceType = default(Azure.Core.ResourceType), Azure.ResourceManager.Models.SystemData systemData = null, Azure.ResourceManager.AppContainers.Models.DotNetComponentType? componentType = default(Azure.ResourceManager.AppContainers.Models.DotNetComponentType?), Azure.ResourceManager.AppContainers.Models.DotNetComponentProvisioningState? provisioningState = default(Azure.ResourceManager.AppContainers.Models.DotNetComponentProvisioningState?), System.Collections.Generic.IEnumerable configurations = null, System.Collections.Generic.IEnumerable serviceBinds = null) { throw null; }\n+ public static Azure.ResourceManager.AppContainers.HttpRouteConfigData HttpRouteConfigData(Azure.Core.ResourceIdentifier id = null, string name = null, Azure.Core.ResourceType resourceType = default(Azure.Core.ResourceType), Azure.ResourceManager.Models.SystemData systemData = null, Azure.ResourceManager.AppContainers.Models.HttpRouteConfigProperties properties = null) { throw null; }\n+ public static Azure.ResourceManager.AppContainers.Models.HttpRouteConfigProperties HttpRouteConfigProperties(Azure.ResourceManager.AppContainers.Models.HttpRouteProvisioningState? provisioningState = default(Azure.ResourceManager.AppContainers.Models.HttpRouteProvisioningState?), System.Collections.Generic.IEnumerable provisioningErrors = null, string fqdn = null, System.Collections.Generic.IEnumerable customDomains = null, System.Collections.Generic.IEnumerable rules = null) { throw null; }\n+ public static Azure.ResourceManager.AppContainers.Models.HttpRouteProvisioningErrors HttpRouteProvisioningErrors(System.DateTimeOffset? timestamp = default(System.DateTimeOffset?), string message = null) { throw null; }\n public static Azure.ResourceManager.AppContainers.JavaComponentData JavaComponentData(Azure.Core.ResourceIdentifier id = null, string name = null, Azure.Core.ResourceType resourceType = default(Azure.Core.ResourceType), Azure.ResourceManager.Models.SystemData systemData = null, Azure.ResourceManager.AppContainers.Models.JavaComponentProperties properties = null) { throw null; }\n public static Azure.ResourceManager.AppContainers.Models.JavaComponentProperties JavaComponentProperties(string componentType = null, Azure.ResourceManager.AppContainers.Models.JavaComponentProvisioningState? provisioningState = default(Azure.ResourceManager.AppContainers.Models.JavaComponentProvisioningState?), System.Collections.Generic.IEnumerable configurations = null, Azure.ResourceManager.AppContainers.Models.JavaComponentPropertiesScale scale = null, System.Collections.Generic.IEnumerable serviceBinds = null) { throw null; }\n+ public static Azure.ResourceManager.AppContainers.LabelHistoryData LabelHistoryData(Azure.Core.ResourceIdentifier id = null, string name = null, Azure.Core.ResourceType resourceType = default(Azure.Core.ResourceType), Azure.ResourceManager.Models.SystemData systemData = null, System.Collections.Generic.IEnumerable labelHistoryRecords = null) { throw null; }\n+ public static Azure.ResourceManager.AppContainers.Models.LabelHistoryRecordItem LabelHistoryRecordItem(string revision = null, System.DateTimeOffset? start = default(System.DateTimeOffset?), System.DateTimeOffset? stop = default(System.DateTimeOffset?), Azure.ResourceManager.AppContainers.Models.Status? status = default(Azure.ResourceManager.AppContainers.Models.Status?)) { throw null; }\n+ public static Azure.ResourceManager.AppContainers.LogicAppData LogicAppData(Azure.Core.ResourceIdentifier id = null, string name = null, Azure.Core.ResourceType resourceType = default(Azure.Core.ResourceType), Azure.ResourceManager.Models.SystemData systemData = null, System.BinaryData properties = null) { throw null; }\n+ public static Azure.ResourceManager.AppContainers.MaintenanceConfigurationResourceData MaintenanceConfigurationResourceData(Azure.Core.ResourceIdentifier id = null, string name = null, Azure.Core.ResourceType resourceType = default(Azure.Core.ResourceType), Azure.ResourceManager.Models.SystemData systemData = null, System.Collections.Generic.IEnumerable scheduledEntries = null) { throw null; }\n public static Azure.ResourceManager.AppContainers.Models.ManagedCertificateProperties ManagedCertificateProperties(Azure.ResourceManager.AppContainers.Models.ContainerAppCertificateProvisioningState? provisioningState = default(Azure.ResourceManager.AppContainers.Models.ContainerAppCertificateProvisioningState?), string subjectName = null, string error = null, Azure.ResourceManager.AppContainers.Models.ManagedCertificateDomainControlValidation? domainControlValidation = default(Azure.ResourceManager.AppContainers.Models.ManagedCertificateDomainControlValidation?), string validationToken = null) { throw null; }\n+ public static Azure.ResourceManager.AppContainers.Models.NacosComponent NacosComponent(Azure.ResourceManager.AppContainers.Models.JavaComponentProvisioningState? provisioningState = default(Azure.ResourceManager.AppContainers.Models.JavaComponentProvisioningState?), System.Collections.Generic.IEnumerable configurations = null, Azure.ResourceManager.AppContainers.Models.JavaComponentPropertiesScale scale = null, System.Collections.Generic.IEnumerable serviceBinds = null, string ingressFqdn = null) { throw null; }\n+ public static Azure.ResourceManager.AppContainers.Models.PatchDetails PatchDetails(string targetContainerName = null, string targetImage = null, System.DateTimeOffset lastDetectionOn = default(System.DateTimeOffset), Azure.ResourceManager.AppContainers.Models.DetectionStatus detectionStatus = default(Azure.ResourceManager.AppContainers.Models.DetectionStatus), string newImageName = null, Azure.ResourceManager.AppContainers.Models.PatchDetailsNewLayer newLayer = null, Azure.ResourceManager.AppContainers.Models.PatchDetailsOldLayer oldLayer = null, Azure.ResourceManager.AppContainers.Models.PatchType? patchType = default(Azure.ResourceManager.AppContainers.Models.PatchType?)) { throw null; }\n+ public static Azure.ResourceManager.AppContainers.Models.PatchDetailsNewLayer PatchDetailsNewLayer(string name = null, string frameworkAndVersion = null, string osAndVersion = null) { throw null; }\n+ public static Azure.ResourceManager.AppContainers.Models.PatchDetailsOldLayer PatchDetailsOldLayer(string name = null, string frameworkAndVersion = null, string osAndVersion = null) { throw null; }\n+ public static Azure.ResourceManager.AppContainers.Models.PatchProperties PatchProperties(Azure.Core.ResourceIdentifier targetEnvironmentId = null, Azure.Core.ResourceIdentifier targetContainerAppId = null, Azure.Core.ResourceIdentifier targetRevisionId = null, Azure.ResourceManager.AppContainers.Models.PatchApplyStatus? patchApplyStatus = default(Azure.ResourceManager.AppContainers.Models.PatchApplyStatus?), System.DateTimeOffset? createdOn = default(System.DateTimeOffset?), System.DateTimeOffset? lastModifiedOn = default(System.DateTimeOffset?), System.Collections.Generic.IEnumerable patchDetails = null) { throw null; }\n+ public static Azure.ResourceManager.AppContainers.Models.ReplicaExecutionStatus ReplicaExecutionStatus(string name = null, System.Collections.Generic.IEnumerable containers = null) { throw null; }\n public static Azure.ResourceManager.AppContainers.SessionPoolData SessionPoolData(Azure.Core.ResourceIdentifier id = null, string name = null, Azure.Core.ResourceType resourceType = default(Azure.Core.ResourceType), Azure.ResourceManager.Models.SystemData systemData = null, System.Collections.Generic.IDictionary tags = null, Azure.Core.AzureLocation location = default(Azure.Core.AzureLocation), Azure.ResourceManager.Models.ManagedServiceIdentity identity = null, Azure.Core.ResourceIdentifier environmentId = null, Azure.ResourceManager.AppContainers.Models.ContainerType? containerType = default(Azure.ResourceManager.AppContainers.Models.ContainerType?), Azure.ResourceManager.AppContainers.Models.PoolManagementType? poolManagementType = default(Azure.ResourceManager.AppContainers.Models.PoolManagementType?), int? nodeCount = default(int?), Azure.ResourceManager.AppContainers.Models.SessionPoolScaleConfiguration scaleConfiguration = null, System.Collections.Generic.IEnumerable secrets = null, Azure.ResourceManager.AppContainers.Models.SessionPoolLifecycleConfiguration dynamicPoolLifecycleConfiguration = null, Azure.ResourceManager.AppContainers.Models.CustomContainerTemplate customContainerTemplate = null, Azure.ResourceManager.AppContainers.Models.SessionNetworkStatus? sessionNetworkStatus = default(Azure.ResourceManager.AppContainers.Models.SessionNetworkStatus?), System.Uri poolManagementEndpoint = null, Azure.ResourceManager.AppContainers.Models.SessionPoolProvisioningState? provisioningState = default(Azure.ResourceManager.AppContainers.Models.SessionPoolProvisioningState?), System.Collections.Generic.IEnumerable managedIdentitySettings = null) { throw null; }\n public static Azure.ResourceManager.AppContainers.Models.SpringBootAdminComponent SpringBootAdminComponent(Azure.ResourceManager.AppContainers.Models.JavaComponentProvisioningState? provisioningState = default(Azure.ResourceManager.AppContainers.Models.JavaComponentProvisioningState?), System.Collections.Generic.IEnumerable configurations = null, Azure.ResourceManager.AppContainers.Models.JavaComponentPropertiesScale scale = null, System.Collections.Generic.IEnumerable serviceBinds = null, string ingressFqdn = null) { throw null; }\n public static Azure.ResourceManager.AppContainers.Models.SpringCloudConfigComponent SpringCloudConfigComponent(Azure.ResourceManager.AppContainers.Models.JavaComponentProvisioningState? provisioningState = default(Azure.ResourceManager.AppContainers.Models.JavaComponentProvisioningState?), System.Collections.Generic.IEnumerable configurations = null, Azure.ResourceManager.AppContainers.Models.JavaComponentPropertiesScale scale = null, System.Collections.Generic.IEnumerable serviceBinds = null) { throw null; }\n public static Azure.ResourceManager.AppContainers.Models.SpringCloudEurekaComponent SpringCloudEurekaComponent(Azure.ResourceManager.AppContainers.Models.JavaComponentProvisioningState? provisioningState = default(Azure.ResourceManager.AppContainers.Models.JavaComponentProvisioningState?), System.Collections.Generic.IEnumerable configurations = null, Azure.ResourceManager.AppContainers.Models.JavaComponentPropertiesScale scale = null, System.Collections.Generic.IEnumerable serviceBinds = null, string ingressFqdn = null) { throw null; }\n+ public static Azure.ResourceManager.AppContainers.Models.SpringCloudGatewayComponent SpringCloudGatewayComponent(Azure.ResourceManager.AppContainers.Models.JavaComponentProvisioningState? provisioningState = default(Azure.ResourceManager.AppContainers.Models.JavaComponentProvisioningState?), System.Collections.Generic.IEnumerable configurations = null, Azure.ResourceManager.AppContainers.Models.JavaComponentPropertiesScale scale = null, System.Collections.Generic.IEnumerable serviceBinds = null, string ingressFqdn = null, System.Collections.Generic.IEnumerable springCloudGatewayRoutes = null) { throw null; }\n+ public static Azure.ResourceManager.AppContainers.WorkflowEnvelopeData WorkflowEnvelopeData(Azure.Core.ResourceIdentifier id = null, string name = null, Azure.Core.ResourceType resourceType = default(Azure.Core.ResourceType), Azure.ResourceManager.Models.SystemData systemData = null, string kind = null, Azure.Core.AzureLocation? location = default(Azure.Core.AzureLocation?), Azure.ResourceManager.AppContainers.Models.WorkflowEnvelopeProperties properties = null) { throw null; }\n+ public static Azure.ResourceManager.AppContainers.Models.WorkflowEnvelopeProperties WorkflowEnvelopeProperties(System.BinaryData files = null, Azure.ResourceManager.AppContainers.Models.WorkflowState? flowState = default(Azure.ResourceManager.AppContainers.Models.WorkflowState?), Azure.ResourceManager.AppContainers.Models.WorkflowHealth health = null) { throw null; }\n+ public static Azure.ResourceManager.AppContainers.Models.WorkflowHealth WorkflowHealth(Azure.ResourceManager.AppContainers.Models.WorkflowHealthState state = Azure.ResourceManager.AppContainers.Models.WorkflowHealthState.Unknown, Azure.ResponseError error = null) { throw null; }\n+ }\n+ public partial class BlobStorageTokenStore : System.ClientModel.Primitives.IJsonModel, System.ClientModel.Primitives.IPersistableModel\n+ {\n+ public BlobStorageTokenStore() { }\n+ public System.Uri BlobContainerUri { get { throw null; } set { } }\n+ public string ClientId { get { throw null; } set { } }\n+ public string ManagedIdentityResourceId { get { throw null; } set { } }\n+ public string SasUrlSettingName { get { throw null; } set { } }\n+ protected virtual void JsonModelWriteCore(System.Text.Json.Utf8JsonWriter writer, System.ClientModel.Primitives.ModelReaderWriterOptions options) { }\n+ Azure.ResourceManager.AppContainers.Models.BlobStorageTokenStore System.ClientModel.Primitives.IJsonModel.Create(ref System.Text.Json.Utf8JsonReader reader, System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n+ void System.ClientModel.Primitives.IJsonModel.Write(System.Text.Json.Utf8JsonWriter writer, System.ClientModel.Primitives.ModelReaderWriterOptions options) { }\n+ Azure.ResourceManager.AppContainers.Models.BlobStorageTokenStore System.ClientModel.Primitives.IPersistableModel.Create(System.BinaryData data, System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n+ string System.ClientModel.Primitives.IPersistableModel.GetFormatFromOptions(System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n+ System.BinaryData System.ClientModel.Primitives.IPersistableModel.Write(System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n+ }\n+ public partial class BuildConfiguration : System.ClientModel.Primitives.IJsonModel, System.ClientModel.Primitives.IPersistableModel\n+ {\n+ public BuildConfiguration() { }\n+ public string BaseOS { get { throw null; } set { } }\n+ public System.Collections.Generic.IList EnvironmentVariables { get { throw null; } }\n+ public string Platform { get { throw null; } set { } }\n+ public string PlatformVersion { get { throw null; } set { } }\n+ public System.Collections.Generic.IList PreBuildSteps { get { throw null; } }\n+ protected virtual void JsonModelWriteCore(System.Text.Json.Utf8JsonWriter writer, System.ClientModel.Primitives.ModelReaderWriterOptions options) { }\n+ Azure.ResourceManager.AppContainers.Models.BuildConfiguration System.ClientModel.Primitives.IJsonModel.Create(ref System.Text.Json.Utf8JsonReader reader, System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n+ void System.ClientModel.Primitives.IJsonModel.Write(System.Text.Json.Utf8JsonWriter writer, System.ClientModel.Primitives.ModelReaderWriterOptions options) { }\n+ Azure.ResourceManager.AppContainers.Models.BuildConfiguration System.ClientModel.Primitives.IPersistableModel.Create(System.BinaryData data, System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n+ string System.ClientModel.Primitives.IPersistableModel.GetFormatFromOptions(System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n+ System.BinaryData System.ClientModel.Primitives.IPersistableModel.Write(System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n }\n [System.Runtime.InteropServices.StructLayoutAttribute(System.Runtime.InteropServices.LayoutKind.Sequential)]\n- public readonly partial struct ContainerAppAccessMode : System.IEquatable\n+ public readonly partial struct BuilderProvisioningState : System.IEquatable", - "comment_created_at": "2025-05-23T08:42:50+00:00", - "comment_author": "ArcturusZhang", - "comment_body": "we should rename this to `AppContainersBuilderProvisioningState`", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2104126288", - "pr_number": 50236, - "pr_file": "sdk/containerapps/Azure.ResourceManager.AppContainers/api/Azure.ResourceManager.AppContainers.net8.0.cs", - "created_at": "2025-05-23T08:45:45+00:00", - "commented_code": "string System.ClientModel.Primitives.IPersistableModel.GetFormatFromOptions(System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n System.BinaryData System.ClientModel.Primitives.IPersistableModel.Write(System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n }\n public partial class ContainerExecutionStatus : System.ClientModel.Primitives.IJsonModel, System.ClientModel.Primitives.IPersistableModel\n {\n internal ContainerExecutionStatus() { }\n public string AdditionalInformation { get { throw null; } }\n public int? Code { get { throw null; } }\n public string Name { get { throw null; } }\n public string Status { get { throw null; } }\n protected virtual void JsonModelWriteCore(System.Text.Json.Utf8JsonWriter writer, System.ClientModel.Primitives.ModelReaderWriterOptions options) { }\n Azure.ResourceManager.AppContainers.Models.ContainerExecutionStatus System.ClientModel.Primitives.IJsonModel.Create(ref System.Text.Json.Utf8JsonReader reader, System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n void System.ClientModel.Primitives.IJsonModel.Write(System.Text.Json.Utf8JsonWriter writer, System.ClientModel.Primitives.ModelReaderWriterOptions options) { }\n Azure.ResourceManager.AppContainers.Models.ContainerExecutionStatus System.ClientModel.Primitives.IPersistableModel.Create(System.BinaryData data, System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n string System.ClientModel.Primitives.IPersistableModel.GetFormatFromOptions(System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n System.BinaryData System.ClientModel.Primitives.IPersistableModel.Write(System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n }\n public partial class ContainerRegistry : System.ClientModel.Primitives.IJsonModel, System.ClientModel.Primitives.IPersistableModel", - "repo_full_name": "Azure/azure-sdk-for-net", - "discussion_comments": [ - { - "comment_id": "2104126288", - "repo_full_name": "Azure/azure-sdk-for-net", - "pr_number": 50236, - "pr_file": "sdk/containerapps/Azure.ResourceManager.AppContainers/api/Azure.ResourceManager.AppContainers.net8.0.cs", - "discussion_id": "2104126288", - "commented_code": "@@ -3686,6 +4892,44 @@ protected virtual void JsonModelWriteCore(System.Text.Json.Utf8JsonWriter writer\n string System.ClientModel.Primitives.IPersistableModel.GetFormatFromOptions(System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n System.BinaryData System.ClientModel.Primitives.IPersistableModel.Write(System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n }\n+ public partial class ContainerExecutionStatus : System.ClientModel.Primitives.IJsonModel, System.ClientModel.Primitives.IPersistableModel\n+ {\n+ internal ContainerExecutionStatus() { }\n+ public string AdditionalInformation { get { throw null; } }\n+ public int? Code { get { throw null; } }\n+ public string Name { get { throw null; } }\n+ public string Status { get { throw null; } }\n+ protected virtual void JsonModelWriteCore(System.Text.Json.Utf8JsonWriter writer, System.ClientModel.Primitives.ModelReaderWriterOptions options) { }\n+ Azure.ResourceManager.AppContainers.Models.ContainerExecutionStatus System.ClientModel.Primitives.IJsonModel.Create(ref System.Text.Json.Utf8JsonReader reader, System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n+ void System.ClientModel.Primitives.IJsonModel.Write(System.Text.Json.Utf8JsonWriter writer, System.ClientModel.Primitives.ModelReaderWriterOptions options) { }\n+ Azure.ResourceManager.AppContainers.Models.ContainerExecutionStatus System.ClientModel.Primitives.IPersistableModel.Create(System.BinaryData data, System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n+ string System.ClientModel.Primitives.IPersistableModel.GetFormatFromOptions(System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n+ System.BinaryData System.ClientModel.Primitives.IPersistableModel.Write(System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n+ }\n+ public partial class ContainerRegistry : System.ClientModel.Primitives.IJsonModel, System.ClientModel.Primitives.IPersistableModel", - "comment_created_at": "2025-05-23T08:45:45+00:00", - "comment_author": "ArcturusZhang", - "comment_body": "this name collides with the namespace of another RP, we should rename it.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2104143986", - "pr_number": 50236, - "pr_file": "sdk/containerapps/Azure.ResourceManager.AppContainers/api/Azure.ResourceManager.AppContainers.net8.0.cs", - "created_at": "2025-05-23T08:54:42+00:00", - "commented_code": "string System.ClientModel.Primitives.IPersistableModel.GetFormatFromOptions(System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n System.BinaryData System.ClientModel.Primitives.IPersistableModel.Write(System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n }\n public partial class Header : System.ClientModel.Primitives.IJsonModel, System.ClientModel.Primitives.IPersistableModel\n {\n public Header() { }\n public string Key { get { throw null; } set { } }\n public string Value { get { throw null; } set { } }\n protected virtual void JsonModelWriteCore(System.Text.Json.Utf8JsonWriter writer, System.ClientModel.Primitives.ModelReaderWriterOptions options) { }\n Azure.ResourceManager.AppContainers.Models.Header System.ClientModel.Primitives.IJsonModel.Create(ref System.Text.Json.Utf8JsonReader reader, System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n void System.ClientModel.Primitives.IJsonModel.Write(System.Text.Json.Utf8JsonWriter writer, System.ClientModel.Primitives.ModelReaderWriterOptions options) { }\n Azure.ResourceManager.AppContainers.Models.Header System.ClientModel.Primitives.IPersistableModel.Create(System.BinaryData data, System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n string System.ClientModel.Primitives.IPersistableModel.GetFormatFromOptions(System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n System.BinaryData System.ClientModel.Primitives.IPersistableModel.Write(System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n }\n public partial class HeaderMatch : System.ClientModel.Primitives.IJsonModel, System.ClientModel.Primitives.IPersistableModel\n {\n public HeaderMatch() { }\n public string ExactMatch { get { throw null; } set { } }\n public string Header { get { throw null; } set { } }\n public string PrefixMatch { get { throw null; } set { } }\n public string RegexMatch { get { throw null; } set { } }\n public string SuffixMatch { get { throw null; } set { } }\n protected virtual void JsonModelWriteCore(System.Text.Json.Utf8JsonWriter writer, System.ClientModel.Primitives.ModelReaderWriterOptions options) { }\n Azure.ResourceManager.AppContainers.Models.HeaderMatch System.ClientModel.Primitives.IJsonModel.Create(ref System.Text.Json.Utf8JsonReader reader, System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n void System.ClientModel.Primitives.IJsonModel.Write(System.Text.Json.Utf8JsonWriter writer, System.ClientModel.Primitives.ModelReaderWriterOptions options) { }\n Azure.ResourceManager.AppContainers.Models.HeaderMatch System.ClientModel.Primitives.IPersistableModel.Create(System.BinaryData data, System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n string System.ClientModel.Primitives.IPersistableModel.GetFormatFromOptions(System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n System.BinaryData System.ClientModel.Primitives.IPersistableModel.Write(System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n }\n public partial class HttpConnectionPool : System.ClientModel.Primitives.IJsonModel, System.ClientModel.Primitives.IPersistableModel\n {\n public HttpConnectionPool() { }\n public int? Http1MaxPendingRequests { get { throw null; } set { } }\n public int? Http2MaxRequests { get { throw null; } set { } }\n protected virtual void JsonModelWriteCore(System.Text.Json.Utf8JsonWriter writer, System.ClientModel.Primitives.ModelReaderWriterOptions options) { }\n Azure.ResourceManager.AppContainers.Models.HttpConnectionPool System.ClientModel.Primitives.IJsonModel.Create(ref System.Text.Json.Utf8JsonReader reader, System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n void System.ClientModel.Primitives.IJsonModel.Write(System.Text.Json.Utf8JsonWriter writer, System.ClientModel.Primitives.ModelReaderWriterOptions options) { }\n Azure.ResourceManager.AppContainers.Models.HttpConnectionPool System.ClientModel.Primitives.IPersistableModel.Create(System.BinaryData data, System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n string System.ClientModel.Primitives.IPersistableModel.GetFormatFromOptions(System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n System.BinaryData System.ClientModel.Primitives.IPersistableModel.Write(System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n }\n public partial class HttpGet : System.ClientModel.Primitives.IJsonModel, System.ClientModel.Primitives.IPersistableModel", - "repo_full_name": "Azure/azure-sdk-for-net", - "discussion_comments": [ - { - "comment_id": "2104143986", - "repo_full_name": "Azure/azure-sdk-for-net", - "pr_number": 50236, - "pr_file": "sdk/containerapps/Azure.ResourceManager.AppContainers/api/Azure.ResourceManager.AppContainers.net8.0.cs", - "discussion_id": "2104143986", - "commented_code": "@@ -3742,6 +5242,222 @@ protected virtual void JsonModelWriteCore(System.Text.Json.Utf8JsonWriter writer\n string System.ClientModel.Primitives.IPersistableModel.GetFormatFromOptions(System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n System.BinaryData System.ClientModel.Primitives.IPersistableModel.Write(System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n }\n+ public partial class Header : System.ClientModel.Primitives.IJsonModel, System.ClientModel.Primitives.IPersistableModel\n+ {\n+ public Header() { }\n+ public string Key { get { throw null; } set { } }\n+ public string Value { get { throw null; } set { } }\n+ protected virtual void JsonModelWriteCore(System.Text.Json.Utf8JsonWriter writer, System.ClientModel.Primitives.ModelReaderWriterOptions options) { }\n+ Azure.ResourceManager.AppContainers.Models.Header System.ClientModel.Primitives.IJsonModel.Create(ref System.Text.Json.Utf8JsonReader reader, System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n+ void System.ClientModel.Primitives.IJsonModel.Write(System.Text.Json.Utf8JsonWriter writer, System.ClientModel.Primitives.ModelReaderWriterOptions options) { }\n+ Azure.ResourceManager.AppContainers.Models.Header System.ClientModel.Primitives.IPersistableModel.Create(System.BinaryData data, System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n+ string System.ClientModel.Primitives.IPersistableModel.GetFormatFromOptions(System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n+ System.BinaryData System.ClientModel.Primitives.IPersistableModel.Write(System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n+ }\n+ public partial class HeaderMatch : System.ClientModel.Primitives.IJsonModel, System.ClientModel.Primitives.IPersistableModel\n+ {\n+ public HeaderMatch() { }\n+ public string ExactMatch { get { throw null; } set { } }\n+ public string Header { get { throw null; } set { } }\n+ public string PrefixMatch { get { throw null; } set { } }\n+ public string RegexMatch { get { throw null; } set { } }\n+ public string SuffixMatch { get { throw null; } set { } }\n+ protected virtual void JsonModelWriteCore(System.Text.Json.Utf8JsonWriter writer, System.ClientModel.Primitives.ModelReaderWriterOptions options) { }\n+ Azure.ResourceManager.AppContainers.Models.HeaderMatch System.ClientModel.Primitives.IJsonModel.Create(ref System.Text.Json.Utf8JsonReader reader, System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n+ void System.ClientModel.Primitives.IJsonModel.Write(System.Text.Json.Utf8JsonWriter writer, System.ClientModel.Primitives.ModelReaderWriterOptions options) { }\n+ Azure.ResourceManager.AppContainers.Models.HeaderMatch System.ClientModel.Primitives.IPersistableModel.Create(System.BinaryData data, System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n+ string System.ClientModel.Primitives.IPersistableModel.GetFormatFromOptions(System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n+ System.BinaryData System.ClientModel.Primitives.IPersistableModel.Write(System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n+ }\n+ public partial class HttpConnectionPool : System.ClientModel.Primitives.IJsonModel, System.ClientModel.Primitives.IPersistableModel\n+ {\n+ public HttpConnectionPool() { }\n+ public int? Http1MaxPendingRequests { get { throw null; } set { } }\n+ public int? Http2MaxRequests { get { throw null; } set { } }\n+ protected virtual void JsonModelWriteCore(System.Text.Json.Utf8JsonWriter writer, System.ClientModel.Primitives.ModelReaderWriterOptions options) { }\n+ Azure.ResourceManager.AppContainers.Models.HttpConnectionPool System.ClientModel.Primitives.IJsonModel.Create(ref System.Text.Json.Utf8JsonReader reader, System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n+ void System.ClientModel.Primitives.IJsonModel.Write(System.Text.Json.Utf8JsonWriter writer, System.ClientModel.Primitives.ModelReaderWriterOptions options) { }\n+ Azure.ResourceManager.AppContainers.Models.HttpConnectionPool System.ClientModel.Primitives.IPersistableModel.Create(System.BinaryData data, System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n+ string System.ClientModel.Primitives.IPersistableModel.GetFormatFromOptions(System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n+ System.BinaryData System.ClientModel.Primitives.IPersistableModel.Write(System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n+ }\n+ public partial class HttpGet : System.ClientModel.Primitives.IJsonModel, System.ClientModel.Primitives.IPersistableModel", - "comment_created_at": "2025-05-23T08:54:42+00:00", - "comment_author": "ArcturusZhang", - "comment_body": "this is very likely to collide with system types, we need to specify its name", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1986643644", - "pr_number": 48446, - "pr_file": "sdk/impactreporting/Azure.ResourceManager.ImpactReporting/api/Azure.ResourceManager.ImpactReporting.net8.0.cs", - "created_at": "2025-03-10T05:50:44+00:00", - "commented_code": "namespace Azure.ResourceManager.ImpactReporting\n{\n public partial class ConnectorCollection : Azure.ResourceManager.ArmCollection, System.Collections.Generic.IAsyncEnumerable, System.Collections.Generic.IEnumerable, System.Collections.IEnumerable\n {\n protected ConnectorCollection() { }\n public virtual Azure.ResourceManager.ArmOperation CreateOrUpdate(Azure.WaitUntil waitUntil, string connectorName, Azure.ResourceManager.ImpactReporting.ConnectorData data, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n public virtual System.Threading.Tasks.Task> CreateOrUpdateAsync(Azure.WaitUntil waitUntil, string connectorName, Azure.ResourceManager.ImpactReporting.ConnectorData data, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n public virtual Azure.Response Exists(string connectorName, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n public virtual System.Threading.Tasks.Task> ExistsAsync(string connectorName, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n public virtual Azure.Response Get(string connectorName, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n public virtual Azure.Pageable GetAll(System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n public virtual Azure.AsyncPageable GetAllAsync(System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n public virtual System.Threading.Tasks.Task> GetAsync(string connectorName, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n public virtual Azure.NullableResponse GetIfExists(string connectorName, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n public virtual System.Threading.Tasks.Task> GetIfExistsAsync(string connectorName, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n System.Collections.Generic.IAsyncEnumerator System.Collections.Generic.IAsyncEnumerable.GetAsyncEnumerator(System.Threading.CancellationToken cancellationToken) { throw null; }\n System.Collections.Generic.IEnumerator System.Collections.Generic.IEnumerable.GetEnumerator() { throw null; }\n System.Collections.IEnumerator System.Collections.IEnumerable.GetEnumerator() { throw null; }\n }\n public partial class ConnectorData : Azure.ResourceManager.Models.ResourceData, System.ClientModel.Primitives.IJsonModel, System.ClientModel.Primitives.IPersistableModel", - "repo_full_name": "Azure/azure-sdk-for-net", - "discussion_comments": [ - { - "comment_id": "1986643644", - "repo_full_name": "Azure/azure-sdk-for-net", - "pr_number": 48446, - "pr_file": "sdk/impactreporting/Azure.ResourceManager.ImpactReporting/api/Azure.ResourceManager.ImpactReporting.net8.0.cs", - "discussion_id": "1986643644", - "commented_code": "@@ -0,0 +1,616 @@\n+namespace Azure.ResourceManager.ImpactReporting\n+{\n+ public partial class ConnectorCollection : Azure.ResourceManager.ArmCollection, System.Collections.Generic.IAsyncEnumerable, System.Collections.Generic.IEnumerable, System.Collections.IEnumerable\n+ {\n+ protected ConnectorCollection() { }\n+ public virtual Azure.ResourceManager.ArmOperation CreateOrUpdate(Azure.WaitUntil waitUntil, string connectorName, Azure.ResourceManager.ImpactReporting.ConnectorData data, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n+ public virtual System.Threading.Tasks.Task> CreateOrUpdateAsync(Azure.WaitUntil waitUntil, string connectorName, Azure.ResourceManager.ImpactReporting.ConnectorData data, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n+ public virtual Azure.Response Exists(string connectorName, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n+ public virtual System.Threading.Tasks.Task> ExistsAsync(string connectorName, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n+ public virtual Azure.Response Get(string connectorName, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n+ public virtual Azure.Pageable GetAll(System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n+ public virtual Azure.AsyncPageable GetAllAsync(System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n+ public virtual System.Threading.Tasks.Task> GetAsync(string connectorName, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n+ public virtual Azure.NullableResponse GetIfExists(string connectorName, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n+ public virtual System.Threading.Tasks.Task> GetIfExistsAsync(string connectorName, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n+ System.Collections.Generic.IAsyncEnumerator System.Collections.Generic.IAsyncEnumerable.GetAsyncEnumerator(System.Threading.CancellationToken cancellationToken) { throw null; }\n+ System.Collections.Generic.IEnumerator System.Collections.Generic.IEnumerable.GetEnumerator() { throw null; }\n+ System.Collections.IEnumerator System.Collections.IEnumerable.GetEnumerator() { throw null; }\n+ }\n+ public partial class ConnectorData : Azure.ResourceManager.Models.ResourceData, System.ClientModel.Primitives.IJsonModel, System.ClientModel.Primitives.IPersistableModel", - "comment_created_at": "2025-03-10T05:50:44+00:00", - "comment_author": "ArcturusZhang", - "comment_body": "we need a better name than this `ConnectorData` - this is quite generic.\r\nWe need to add a `@@clientName` in your spec to rename this model to a more verbose name.\r\nPer rules in the .net generator, the corresponding resource and collection class will change accordingly.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1986669206", - "pr_number": 48446, - "pr_file": "sdk/impactreporting/Azure.ResourceManager.ImpactReporting/api/Azure.ResourceManager.ImpactReporting.net8.0.cs", - "created_at": "2025-03-10T06:17:01+00:00", - "commented_code": "namespace Azure.ResourceManager.ImpactReporting\n{\n public partial class ConnectorCollection : Azure.ResourceManager.ArmCollection, System.Collections.Generic.IAsyncEnumerable, System.Collections.Generic.IEnumerable, System.Collections.IEnumerable\n {\n protected ConnectorCollection() { }\n public virtual Azure.ResourceManager.ArmOperation CreateOrUpdate(Azure.WaitUntil waitUntil, string connectorName, Azure.ResourceManager.ImpactReporting.ConnectorData data, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n public virtual System.Threading.Tasks.Task> CreateOrUpdateAsync(Azure.WaitUntil waitUntil, string connectorName, Azure.ResourceManager.ImpactReporting.ConnectorData data, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n public virtual Azure.Response Exists(string connectorName, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n public virtual System.Threading.Tasks.Task> ExistsAsync(string connectorName, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n public virtual Azure.Response Get(string connectorName, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n public virtual Azure.Pageable GetAll(System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n public virtual Azure.AsyncPageable GetAllAsync(System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n public virtual System.Threading.Tasks.Task> GetAsync(string connectorName, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n public virtual Azure.NullableResponse GetIfExists(string connectorName, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n public virtual System.Threading.Tasks.Task> GetIfExistsAsync(string connectorName, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n System.Collections.Generic.IAsyncEnumerator System.Collections.Generic.IAsyncEnumerable.GetAsyncEnumerator(System.Threading.CancellationToken cancellationToken) { throw null; }\n System.Collections.Generic.IEnumerator System.Collections.Generic.IEnumerable.GetEnumerator() { throw null; }\n System.Collections.IEnumerator System.Collections.IEnumerable.GetEnumerator() { throw null; }\n }\n public partial class ConnectorData : Azure.ResourceManager.Models.ResourceData, System.ClientModel.Primitives.IJsonModel, System.ClientModel.Primitives.IPersistableModel\n {\n public ConnectorData() { }\n public Azure.ResourceManager.ImpactReporting.Models.ConnectorProperties Properties { get { throw null; } set { } }\n protected override void JsonModelWriteCore(System.Text.Json.Utf8JsonWriter writer, System.ClientModel.Primitives.ModelReaderWriterOptions options) { }\n Azure.ResourceManager.ImpactReporting.ConnectorData System.ClientModel.Primitives.IJsonModel.Create(ref System.Text.Json.Utf8JsonReader reader, System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n void System.ClientModel.Primitives.IJsonModel.Write(System.Text.Json.Utf8JsonWriter writer, System.ClientModel.Primitives.ModelReaderWriterOptions options) { }\n Azure.ResourceManager.ImpactReporting.ConnectorData System.ClientModel.Primitives.IPersistableModel.Create(System.BinaryData data, System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n string System.ClientModel.Primitives.IPersistableModel.GetFormatFromOptions(System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n System.BinaryData System.ClientModel.Primitives.IPersistableModel.Write(System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n }\n public partial class ConnectorResource : Azure.ResourceManager.ArmResource, System.ClientModel.Primitives.IJsonModel, System.ClientModel.Primitives.IPersistableModel\n {\n public static readonly Azure.Core.ResourceType ResourceType;\n protected ConnectorResource() { }\n public virtual Azure.ResourceManager.ImpactReporting.ConnectorData Data { get { throw null; } }\n public virtual bool HasData { get { throw null; } }\n public static Azure.Core.ResourceIdentifier CreateResourceIdentifier(string subscriptionId, string connectorName) { throw null; }\n public virtual Azure.ResourceManager.ArmOperation Delete(Azure.WaitUntil waitUntil, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n public virtual System.Threading.Tasks.Task DeleteAsync(Azure.WaitUntil waitUntil, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n public virtual Azure.Response Get(System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n public virtual System.Threading.Tasks.Task> GetAsync(System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n Azure.ResourceManager.ImpactReporting.ConnectorData System.ClientModel.Primitives.IJsonModel.Create(ref System.Text.Json.Utf8JsonReader reader, System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n void System.ClientModel.Primitives.IJsonModel.Write(System.Text.Json.Utf8JsonWriter writer, System.ClientModel.Primitives.ModelReaderWriterOptions options) { }\n Azure.ResourceManager.ImpactReporting.ConnectorData System.ClientModel.Primitives.IPersistableModel.Create(System.BinaryData data, System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n string System.ClientModel.Primitives.IPersistableModel.GetFormatFromOptions(System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n System.BinaryData System.ClientModel.Primitives.IPersistableModel.Write(System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n public virtual Azure.Response Update(Azure.ResourceManager.ImpactReporting.Models.ConnectorPatch patch, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n public virtual System.Threading.Tasks.Task> UpdateAsync(Azure.ResourceManager.ImpactReporting.Models.ConnectorPatch patch, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n }\n public partial class ImpactCategoryCollection : Azure.ResourceManager.ArmCollection\n {\n protected ImpactCategoryCollection() { }\n public virtual Azure.Response Exists(string impactCategoryName, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n public virtual System.Threading.Tasks.Task> ExistsAsync(string impactCategoryName, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n public virtual Azure.Response Get(string impactCategoryName, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n public virtual Azure.Pageable GetAll(string resourceType, string categoryName = null, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n public virtual Azure.AsyncPageable GetAllAsync(string resourceType, string categoryName = null, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n public virtual System.Threading.Tasks.Task> GetAsync(string impactCategoryName, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n public virtual Azure.NullableResponse GetIfExists(string impactCategoryName, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n public virtual System.Threading.Tasks.Task> GetIfExistsAsync(string impactCategoryName, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n }\n public partial class ImpactCategoryData : Azure.ResourceManager.Models.ResourceData, System.ClientModel.Primitives.IJsonModel, System.ClientModel.Primitives.IPersistableModel\n {\n internal ImpactCategoryData() { }\n public Azure.ResourceManager.ImpactReporting.Models.ImpactCategoryProperties Properties { get { throw null; } }\n protected override void JsonModelWriteCore(System.Text.Json.Utf8JsonWriter writer, System.ClientModel.Primitives.ModelReaderWriterOptions options) { }\n Azure.ResourceManager.ImpactReporting.ImpactCategoryData System.ClientModel.Primitives.IJsonModel.Create(ref System.Text.Json.Utf8JsonReader reader, System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n void System.ClientModel.Primitives.IJsonModel.Write(System.Text.Json.Utf8JsonWriter writer, System.ClientModel.Primitives.ModelReaderWriterOptions options) { }\n Azure.ResourceManager.ImpactReporting.ImpactCategoryData System.ClientModel.Primitives.IPersistableModel.Create(System.BinaryData data, System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n string System.ClientModel.Primitives.IPersistableModel.GetFormatFromOptions(System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n System.BinaryData System.ClientModel.Primitives.IPersistableModel.Write(System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n }\n public partial class ImpactCategoryResource : Azure.ResourceManager.ArmResource, System.ClientModel.Primitives.IJsonModel, System.ClientModel.Primitives.IPersistableModel\n {\n public static readonly Azure.Core.ResourceType ResourceType;\n protected ImpactCategoryResource() { }\n public virtual Azure.ResourceManager.ImpactReporting.ImpactCategoryData Data { get { throw null; } }\n public virtual bool HasData { get { throw null; } }\n public static Azure.Core.ResourceIdentifier CreateResourceIdentifier(string subscriptionId, string impactCategoryName) { throw null; }\n public virtual Azure.Response Get(System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n public virtual System.Threading.Tasks.Task> GetAsync(System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n Azure.ResourceManager.ImpactReporting.ImpactCategoryData System.ClientModel.Primitives.IJsonModel.Create(ref System.Text.Json.Utf8JsonReader reader, System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n void System.ClientModel.Primitives.IJsonModel.Write(System.Text.Json.Utf8JsonWriter writer, System.ClientModel.Primitives.ModelReaderWriterOptions options) { }\n Azure.ResourceManager.ImpactReporting.ImpactCategoryData System.ClientModel.Primitives.IPersistableModel.Create(System.BinaryData data, System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n string System.ClientModel.Primitives.IPersistableModel.GetFormatFromOptions(System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n System.BinaryData System.ClientModel.Primitives.IPersistableModel.Write(System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n }\n public static partial class ImpactReportingExtensions\n {\n public static Azure.Response GetConnector(this Azure.ResourceManager.Resources.SubscriptionResource subscriptionResource, string connectorName, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n public static System.Threading.Tasks.Task> GetConnectorAsync(this Azure.ResourceManager.Resources.SubscriptionResource subscriptionResource, string connectorName, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n public static Azure.ResourceManager.ImpactReporting.ConnectorResource GetConnectorResource(this Azure.ResourceManager.ArmClient client, Azure.Core.ResourceIdentifier id) { throw null; }\n public static Azure.ResourceManager.ImpactReporting.ConnectorCollection GetConnectors(this Azure.ResourceManager.Resources.SubscriptionResource subscriptionResource) { throw null; }\n public static Azure.ResourceManager.ImpactReporting.ImpactCategoryCollection GetImpactCategories(this Azure.ResourceManager.Resources.SubscriptionResource subscriptionResource) { throw null; }\n public static Azure.Response GetImpactCategory(this Azure.ResourceManager.Resources.SubscriptionResource subscriptionResource, string impactCategoryName, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n public static System.Threading.Tasks.Task> GetImpactCategoryAsync(this Azure.ResourceManager.Resources.SubscriptionResource subscriptionResource, string impactCategoryName, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n public static Azure.ResourceManager.ImpactReporting.ImpactCategoryResource GetImpactCategoryResource(this Azure.ResourceManager.ArmClient client, Azure.Core.ResourceIdentifier id) { throw null; }\n public static Azure.ResourceManager.ImpactReporting.InsightResource GetInsightResource(this Azure.ResourceManager.ArmClient client, Azure.Core.ResourceIdentifier id) { throw null; }\n public static Azure.Response GetWorkloadImpact(this Azure.ResourceManager.Resources.SubscriptionResource subscriptionResource, string workloadImpactName, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n public static System.Threading.Tasks.Task> GetWorkloadImpactAsync(this Azure.ResourceManager.Resources.SubscriptionResource subscriptionResource, string workloadImpactName, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n public static Azure.ResourceManager.ImpactReporting.WorkloadImpactResource GetWorkloadImpactResource(this Azure.ResourceManager.ArmClient client, Azure.Core.ResourceIdentifier id) { throw null; }\n public static Azure.ResourceManager.ImpactReporting.WorkloadImpactCollection GetWorkloadImpacts(this Azure.ResourceManager.Resources.SubscriptionResource subscriptionResource) { throw null; }\n }\n public partial class InsightCollection : Azure.ResourceManager.ArmCollection, System.Collections.Generic.IAsyncEnumerable, System.Collections.Generic.IEnumerable, System.Collections.IEnumerable\n {\n protected InsightCollection() { }\n public virtual Azure.ResourceManager.ArmOperation CreateOrUpdate(Azure.WaitUntil waitUntil, string insightName, Azure.ResourceManager.ImpactReporting.InsightData data, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n public virtual System.Threading.Tasks.Task> CreateOrUpdateAsync(Azure.WaitUntil waitUntil, string insightName, Azure.ResourceManager.ImpactReporting.InsightData data, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n public virtual Azure.Response Exists(string insightName, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n public virtual System.Threading.Tasks.Task> ExistsAsync(string insightName, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n public virtual Azure.Response Get(string insightName, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n public virtual Azure.Pageable GetAll(System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n public virtual Azure.AsyncPageable GetAllAsync(System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n public virtual System.Threading.Tasks.Task> GetAsync(string insightName, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n public virtual Azure.NullableResponse GetIfExists(string insightName, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n public virtual System.Threading.Tasks.Task> GetIfExistsAsync(string insightName, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n System.Collections.Generic.IAsyncEnumerator System.Collections.Generic.IAsyncEnumerable.GetAsyncEnumerator(System.Threading.CancellationToken cancellationToken) { throw null; }\n System.Collections.Generic.IEnumerator System.Collections.Generic.IEnumerable.GetEnumerator() { throw null; }\n System.Collections.IEnumerator System.Collections.IEnumerable.GetEnumerator() { throw null; }\n }\n public partial class InsightData : Azure.ResourceManager.Models.ResourceData, System.ClientModel.Primitives.IJsonModel, System.ClientModel.Primitives.IPersistableModel\n {\n public InsightData() { }\n public Azure.ResourceManager.ImpactReporting.Models.InsightProperties Properties { get { throw null; } set { } }\n protected override void JsonModelWriteCore(System.Text.Json.Utf8JsonWriter writer, System.ClientModel.Primitives.ModelReaderWriterOptions options) { }\n Azure.ResourceManager.ImpactReporting.InsightData System.ClientModel.Primitives.IJsonModel.Create(ref System.Text.Json.Utf8JsonReader reader, System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n void System.ClientModel.Primitives.IJsonModel.Write(System.Text.Json.Utf8JsonWriter writer, System.ClientModel.Primitives.ModelReaderWriterOptions options) { }\n Azure.ResourceManager.ImpactReporting.InsightData System.ClientModel.Primitives.IPersistableModel.Create(System.BinaryData data, System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n string System.ClientModel.Primitives.IPersistableModel.GetFormatFromOptions(System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n System.BinaryData System.ClientModel.Primitives.IPersistableModel.Write(System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n }\n public partial class InsightResource : Azure.ResourceManager.ArmResource, System.ClientModel.Primitives.IJsonModel, System.ClientModel.Primitives.IPersistableModel\n {\n public static readonly Azure.Core.ResourceType ResourceType;\n protected InsightResource() { }\n public virtual Azure.ResourceManager.ImpactReporting.InsightData Data { get { throw null; } }\n public virtual bool HasData { get { throw null; } }\n public static Azure.Core.ResourceIdentifier CreateResourceIdentifier(string subscriptionId, string workloadImpactName, string insightName) { throw null; }\n public virtual Azure.ResourceManager.ArmOperation Delete(Azure.WaitUntil waitUntil, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n public virtual System.Threading.Tasks.Task DeleteAsync(Azure.WaitUntil waitUntil, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n public virtual Azure.Response Get(System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n public virtual System.Threading.Tasks.Task> GetAsync(System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n Azure.ResourceManager.ImpactReporting.InsightData System.ClientModel.Primitives.IJsonModel.Create(ref System.Text.Json.Utf8JsonReader reader, System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n void System.ClientModel.Primitives.IJsonModel.Write(System.Text.Json.Utf8JsonWriter writer, System.ClientModel.Primitives.ModelReaderWriterOptions options) { }\n Azure.ResourceManager.ImpactReporting.InsightData System.ClientModel.Primitives.IPersistableModel.Create(System.BinaryData data, System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n string System.ClientModel.Primitives.IPersistableModel.GetFormatFromOptions(System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n System.BinaryData System.ClientModel.Primitives.IPersistableModel.Write(System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n public virtual Azure.ResourceManager.ArmOperation Update(Azure.WaitUntil waitUntil, Azure.ResourceManager.ImpactReporting.InsightData data, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n public virtual System.Threading.Tasks.Task> UpdateAsync(Azure.WaitUntil waitUntil, Azure.ResourceManager.ImpactReporting.InsightData data, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n }\n public partial class WorkloadImpactCollection : Azure.ResourceManager.ArmCollection, System.Collections.Generic.IAsyncEnumerable, System.Collections.Generic.IEnumerable, System.Collections.IEnumerable\n {\n protected WorkloadImpactCollection() { }\n public virtual Azure.ResourceManager.ArmOperation CreateOrUpdate(Azure.WaitUntil waitUntil, string workloadImpactName, Azure.ResourceManager.ImpactReporting.WorkloadImpactData data, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n public virtual System.Threading.Tasks.Task> CreateOrUpdateAsync(Azure.WaitUntil waitUntil, string workloadImpactName, Azure.ResourceManager.ImpactReporting.WorkloadImpactData data, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n public virtual Azure.Response Exists(string workloadImpactName, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n public virtual System.Threading.Tasks.Task> ExistsAsync(string workloadImpactName, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n public virtual Azure.Response Get(string workloadImpactName, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n public virtual Azure.Pageable GetAll(System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n public virtual Azure.AsyncPageable GetAllAsync(System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n public virtual System.Threading.Tasks.Task> GetAsync(string workloadImpactName, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n public virtual Azure.NullableResponse GetIfExists(string workloadImpactName, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n public virtual System.Threading.Tasks.Task> GetIfExistsAsync(string workloadImpactName, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n System.Collections.Generic.IAsyncEnumerator System.Collections.Generic.IAsyncEnumerable.GetAsyncEnumerator(System.Threading.CancellationToken cancellationToken) { throw null; }\n System.Collections.Generic.IEnumerator System.Collections.Generic.IEnumerable.GetEnumerator() { throw null; }\n System.Collections.IEnumerator System.Collections.IEnumerable.GetEnumerator() { throw null; }\n }\n public partial class WorkloadImpactData : Azure.ResourceManager.Models.ResourceData, System.ClientModel.Primitives.IJsonModel, System.ClientModel.Primitives.IPersistableModel\n {\n public WorkloadImpactData() { }\n public Azure.ResourceManager.ImpactReporting.Models.WorkloadImpactProperties Properties { get { throw null; } set { } }\n protected override void JsonModelWriteCore(System.Text.Json.Utf8JsonWriter writer, System.ClientModel.Primitives.ModelReaderWriterOptions options) { }\n Azure.ResourceManager.ImpactReporting.WorkloadImpactData System.ClientModel.Primitives.IJsonModel.Create(ref System.Text.Json.Utf8JsonReader reader, System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n void System.ClientModel.Primitives.IJsonModel.Write(System.Text.Json.Utf8JsonWriter writer, System.ClientModel.Primitives.ModelReaderWriterOptions options) { }\n Azure.ResourceManager.ImpactReporting.WorkloadImpactData System.ClientModel.Primitives.IPersistableModel.Create(System.BinaryData data, System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n string System.ClientModel.Primitives.IPersistableModel.GetFormatFromOptions(System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n System.BinaryData System.ClientModel.Primitives.IPersistableModel.Write(System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n }\n public partial class WorkloadImpactResource : Azure.ResourceManager.ArmResource, System.ClientModel.Primitives.IJsonModel, System.ClientModel.Primitives.IPersistableModel\n {\n public static readonly Azure.Core.ResourceType ResourceType;\n protected WorkloadImpactResource() { }\n public virtual Azure.ResourceManager.ImpactReporting.WorkloadImpactData Data { get { throw null; } }\n public virtual bool HasData { get { throw null; } }\n public static Azure.Core.ResourceIdentifier CreateResourceIdentifier(string subscriptionId, string workloadImpactName) { throw null; }\n public virtual Azure.ResourceManager.ArmOperation Delete(Azure.WaitUntil waitUntil, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n public virtual System.Threading.Tasks.Task DeleteAsync(Azure.WaitUntil waitUntil, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n public virtual Azure.Response Get(System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n public virtual System.Threading.Tasks.Task> GetAsync(System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n public virtual Azure.Response GetInsight(string insightName, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n public virtual System.Threading.Tasks.Task> GetInsightAsync(string insightName, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n public virtual Azure.ResourceManager.ImpactReporting.InsightCollection GetInsights() { throw null; }\n Azure.ResourceManager.ImpactReporting.WorkloadImpactData System.ClientModel.Primitives.IJsonModel.Create(ref System.Text.Json.Utf8JsonReader reader, System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n void System.ClientModel.Primitives.IJsonModel.Write(System.Text.Json.Utf8JsonWriter writer, System.ClientModel.Primitives.ModelReaderWriterOptions options) { }\n Azure.ResourceManager.ImpactReporting.WorkloadImpactData System.ClientModel.Primitives.IPersistableModel.Create(System.BinaryData data, System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n string System.ClientModel.Primitives.IPersistableModel.GetFormatFromOptions(System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n System.BinaryData System.ClientModel.Primitives.IPersistableModel.Write(System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n public virtual Azure.ResourceManager.ArmOperation Update(Azure.WaitUntil waitUntil, Azure.ResourceManager.ImpactReporting.WorkloadImpactData data, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n public virtual System.Threading.Tasks.Task> UpdateAsync(Azure.WaitUntil waitUntil, Azure.ResourceManager.ImpactReporting.WorkloadImpactData data, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n }\n}\nnamespace Azure.ResourceManager.ImpactReporting.Mocking\n{\n public partial class MockableImpactReportingArmClient : Azure.ResourceManager.ArmResource\n {\n protected MockableImpactReportingArmClient() { }\n public virtual Azure.ResourceManager.ImpactReporting.ConnectorResource GetConnectorResource(Azure.Core.ResourceIdentifier id) { throw null; }\n public virtual Azure.ResourceManager.ImpactReporting.ImpactCategoryResource GetImpactCategoryResource(Azure.Core.ResourceIdentifier id) { throw null; }\n public virtual Azure.ResourceManager.ImpactReporting.InsightResource GetInsightResource(Azure.Core.ResourceIdentifier id) { throw null; }\n public virtual Azure.ResourceManager.ImpactReporting.WorkloadImpactResource GetWorkloadImpactResource(Azure.Core.ResourceIdentifier id) { throw null; }\n }\n public partial class MockableImpactReportingSubscriptionResource : Azure.ResourceManager.ArmResource\n {\n protected MockableImpactReportingSubscriptionResource() { }\n public virtual Azure.Response GetConnector(string connectorName, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n public virtual System.Threading.Tasks.Task> GetConnectorAsync(string connectorName, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n public virtual Azure.ResourceManager.ImpactReporting.ConnectorCollection GetConnectors() { throw null; }\n public virtual Azure.ResourceManager.ImpactReporting.ImpactCategoryCollection GetImpactCategories() { throw null; }\n public virtual Azure.Response GetImpactCategory(string impactCategoryName, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n public virtual System.Threading.Tasks.Task> GetImpactCategoryAsync(string impactCategoryName, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n public virtual Azure.Response GetWorkloadImpact(string workloadImpactName, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n public virtual System.Threading.Tasks.Task> GetWorkloadImpactAsync(string workloadImpactName, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n public virtual Azure.ResourceManager.ImpactReporting.WorkloadImpactCollection GetWorkloadImpacts() { throw null; }\n }\n}\nnamespace Azure.ResourceManager.ImpactReporting.Models\n{\n public static partial class ArmImpactReportingModelFactory\n {\n public static Azure.ResourceManager.ImpactReporting.ConnectorData ConnectorData(Azure.Core.ResourceIdentifier id = null, string name = null, Azure.Core.ResourceType resourceType = default(Azure.Core.ResourceType), Azure.ResourceManager.Models.SystemData systemData = null, Azure.ResourceManager.ImpactReporting.Models.ConnectorProperties properties = null) { throw null; }\n public static Azure.ResourceManager.ImpactReporting.Models.ConnectorProperties ConnectorProperties(Azure.ResourceManager.ImpactReporting.Models.ProvisioningState? provisioningState = default(Azure.ResourceManager.ImpactReporting.Models.ProvisioningState?), string connectorId = null, string tenantId = null, Azure.ResourceManager.ImpactReporting.Models.Platform connectorType = default(Azure.ResourceManager.ImpactReporting.Models.Platform), System.DateTimeOffset lastRunTimeStamp = default(System.DateTimeOffset)) { throw null; }\n public static Azure.ResourceManager.ImpactReporting.ImpactCategoryData ImpactCategoryData(Azure.Core.ResourceIdentifier id = null, string name = null, Azure.Core.ResourceType resourceType = default(Azure.Core.ResourceType), Azure.ResourceManager.Models.SystemData systemData = null, Azure.ResourceManager.ImpactReporting.Models.ImpactCategoryProperties properties = null) { throw null; }\n public static Azure.ResourceManager.ImpactReporting.Models.ImpactCategoryProperties ImpactCategoryProperties(Azure.ResourceManager.ImpactReporting.Models.ProvisioningState? provisioningState = default(Azure.ResourceManager.ImpactReporting.Models.ProvisioningState?), string categoryId = null, string parentCategoryId = null, string description = null, System.Collections.Generic.IEnumerable requiredImpactProperties = null) { throw null; }\n public static Azure.ResourceManager.ImpactReporting.InsightData InsightData(Azure.Core.ResourceIdentifier id = null, string name = null, Azure.Core.ResourceType resourceType = default(Azure.Core.ResourceType), Azure.ResourceManager.Models.SystemData systemData = null, Azure.ResourceManager.ImpactReporting.Models.InsightProperties properties = null) { throw null; }\n public static Azure.ResourceManager.ImpactReporting.Models.InsightProperties InsightProperties(Azure.ResourceManager.ImpactReporting.Models.ProvisioningState? provisioningState = default(Azure.ResourceManager.ImpactReporting.Models.ProvisioningState?), string category = null, string status = null, string eventId = null, string groupId = null, Azure.ResourceManager.ImpactReporting.Models.Content content = null, System.DateTimeOffset? eventOn = default(System.DateTimeOffset?), string insightUniqueId = null, Azure.ResourceManager.ImpactReporting.Models.ImpactDetails impact = null, Azure.ResourceManager.ImpactReporting.Models.InsightPropertiesAdditionalDetails additionalDetails = null) { throw null; }\n public static Azure.ResourceManager.ImpactReporting.Models.RequiredImpactProperties RequiredImpactProperties(string name = null, System.Collections.Generic.IEnumerable allowedValues = null) { throw null; }\n public static Azure.ResourceManager.ImpactReporting.WorkloadImpactData WorkloadImpactData(Azure.Core.ResourceIdentifier id = null, string name = null, Azure.Core.ResourceType resourceType = default(Azure.Core.ResourceType), Azure.ResourceManager.Models.SystemData systemData = null, Azure.ResourceManager.ImpactReporting.Models.WorkloadImpactProperties properties = null) { throw null; }\n public static Azure.ResourceManager.ImpactReporting.Models.WorkloadImpactProperties WorkloadImpactProperties(Azure.ResourceManager.ImpactReporting.Models.ProvisioningState? provisioningState = default(Azure.ResourceManager.ImpactReporting.Models.ProvisioningState?), System.DateTimeOffset startOn = default(System.DateTimeOffset), System.DateTimeOffset? endOn = default(System.DateTimeOffset?), string impactedResourceId = null, string impactUniqueId = null, System.DateTimeOffset? reportedTimeUtc = default(System.DateTimeOffset?), string impactCategory = null, string impactDescription = null, System.Collections.Generic.IEnumerable armCorrelationIds = null, System.Collections.Generic.IEnumerable performance = null, Azure.ResourceManager.ImpactReporting.Models.Connectivity connectivity = null, Azure.ResourceManager.ImpactReporting.Models.WorkloadImpactPropertiesAdditionalProperties additionalProperties = null, Azure.ResourceManager.ImpactReporting.Models.ErrorDetailProperties errorDetails = null, Azure.ResourceManager.ImpactReporting.Models.Workload workload = null, string impactGroupId = null, Azure.ResourceManager.ImpactReporting.Models.ConfidenceLevel? confidenceLevel = default(Azure.ResourceManager.ImpactReporting.Models.ConfidenceLevel?), Azure.ResourceManager.ImpactReporting.Models.ClientIncidentDetails clientIncidentDetails = null) { throw null; }\n }\n public partial class ClientIncidentDetails : System.ClientModel.Primitives.IJsonModel, System.ClientModel.Primitives.IPersistableModel\n {\n public ClientIncidentDetails() { }\n public string ClientIncidentId { get { throw null; } set { } }\n public Azure.ResourceManager.ImpactReporting.Models.IncidentSource? ClientIncidentSource { get { throw null; } set { } }\n protected virtual void JsonModelWriteCore(System.Text.Json.Utf8JsonWriter writer, System.ClientModel.Primitives.ModelReaderWriterOptions options) { }\n Azure.ResourceManager.ImpactReporting.Models.ClientIncidentDetails System.ClientModel.Primitives.IJsonModel.Create(ref System.Text.Json.Utf8JsonReader reader, System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n void System.ClientModel.Primitives.IJsonModel.Write(System.Text.Json.Utf8JsonWriter writer, System.ClientModel.Primitives.ModelReaderWriterOptions options) { }\n Azure.ResourceManager.ImpactReporting.Models.ClientIncidentDetails System.ClientModel.Primitives.IPersistableModel.Create(System.BinaryData data, System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n string System.ClientModel.Primitives.IPersistableModel.GetFormatFromOptions(System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n System.BinaryData System.ClientModel.Primitives.IPersistableModel.Write(System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n }\n [System.Runtime.InteropServices.StructLayoutAttribute(System.Runtime.InteropServices.LayoutKind.Sequential)]\n public readonly partial struct ConfidenceLevel : System.IEquatable\n {\n private readonly object _dummy;\n private readonly int _dummyPrimitive;\n public ConfidenceLevel(string value) { throw null; }\n public static Azure.ResourceManager.ImpactReporting.Models.ConfidenceLevel High { get { throw null; } }\n public static Azure.ResourceManager.ImpactReporting.Models.ConfidenceLevel Low { get { throw null; } }\n public static Azure.ResourceManager.ImpactReporting.Models.ConfidenceLevel Medium { get { throw null; } }\n public bool Equals(Azure.ResourceManager.ImpactReporting.Models.ConfidenceLevel other) { throw null; }\n [System.ComponentModel.EditorBrowsableAttribute(System.ComponentModel.EditorBrowsableState.Never)]\n public override bool Equals(object obj) { throw null; }\n [System.ComponentModel.EditorBrowsableAttribute(System.ComponentModel.EditorBrowsableState.Never)]\n public override int GetHashCode() { throw null; }\n public static bool operator ==(Azure.ResourceManager.ImpactReporting.Models.ConfidenceLevel left, Azure.ResourceManager.ImpactReporting.Models.ConfidenceLevel right) { throw null; }\n public static implicit operator Azure.ResourceManager.ImpactReporting.Models.ConfidenceLevel (string value) { throw null; }\n public static bool operator !=(Azure.ResourceManager.ImpactReporting.Models.ConfidenceLevel left, Azure.ResourceManager.ImpactReporting.Models.ConfidenceLevel right) { throw null; }\n public override string ToString() { throw null; }\n }\n public partial class Connectivity : System.ClientModel.Primitives.IJsonModel, System.ClientModel.Primitives.IPersistableModel\n {\n public Connectivity() { }\n public int? Port { get { throw null; } set { } }\n public Azure.ResourceManager.ImpactReporting.Models.Protocol? Protocol { get { throw null; } set { } }\n public string SourceAzureResourceId { get { throw null; } set { } }\n public string TargetAzureResourceId { get { throw null; } set { } }\n protected virtual void JsonModelWriteCore(System.Text.Json.Utf8JsonWriter writer, System.ClientModel.Primitives.ModelReaderWriterOptions options) { }\n Azure.ResourceManager.ImpactReporting.Models.Connectivity System.ClientModel.Primitives.IJsonModel.Create(ref System.Text.Json.Utf8JsonReader reader, System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n void System.ClientModel.Primitives.IJsonModel.Write(System.Text.Json.Utf8JsonWriter writer, System.ClientModel.Primitives.ModelReaderWriterOptions options) { }\n Azure.ResourceManager.ImpactReporting.Models.Connectivity System.ClientModel.Primitives.IPersistableModel.Create(System.BinaryData data, System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n string System.ClientModel.Primitives.IPersistableModel.GetFormatFromOptions(System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n System.BinaryData System.ClientModel.Primitives.IPersistableModel.Write(System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n }\n public partial class ConnectorPatch : System.ClientModel.Primitives.IJsonModel, System.ClientModel.Primitives.IPersistableModel\n {\n public ConnectorPatch() { }\n public Azure.ResourceManager.ImpactReporting.Models.Platform? ConnectorType { get { throw null; } set { } }\n protected virtual void JsonModelWriteCore(System.Text.Json.Utf8JsonWriter writer, System.ClientModel.Primitives.ModelReaderWriterOptions options) { }\n Azure.ResourceManager.ImpactReporting.Models.ConnectorPatch System.ClientModel.Primitives.IJsonModel.Create(ref System.Text.Json.Utf8JsonReader reader, System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n void System.ClientModel.Primitives.IJsonModel.Write(System.Text.Json.Utf8JsonWriter writer, System.ClientModel.Primitives.ModelReaderWriterOptions options) { }\n Azure.ResourceManager.ImpactReporting.Models.ConnectorPatch System.ClientModel.Primitives.IPersistableModel.Create(System.BinaryData data, System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n string System.ClientModel.Primitives.IPersistableModel.GetFormatFromOptions(System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n System.BinaryData System.ClientModel.Primitives.IPersistableModel.Write(System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n }\n public partial class ConnectorProperties : System.ClientModel.Primitives.IJsonModel, System.ClientModel.Primitives.IPersistableModel\n {\n public ConnectorProperties(string connectorId, string tenantId, Azure.ResourceManager.ImpactReporting.Models.Platform connectorType, System.DateTimeOffset lastRunTimeStamp) { }\n public string ConnectorId { get { throw null; } }\n public Azure.ResourceManager.ImpactReporting.Models.Platform ConnectorType { get { throw null; } set { } }\n public System.DateTimeOffset LastRunTimeStamp { get { throw null; } }\n public Azure.ResourceManager.ImpactReporting.Models.ProvisioningState? ProvisioningState { get { throw null; } }\n public string TenantId { get { throw null; } }\n protected virtual void JsonModelWriteCore(System.Text.Json.Utf8JsonWriter writer, System.ClientModel.Primitives.ModelReaderWriterOptions options) { }\n Azure.ResourceManager.ImpactReporting.Models.ConnectorProperties System.ClientModel.Primitives.IJsonModel.Create(ref System.Text.Json.Utf8JsonReader reader, System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n void System.ClientModel.Primitives.IJsonModel.Write(System.Text.Json.Utf8JsonWriter writer, System.ClientModel.Primitives.ModelReaderWriterOptions options) { }\n Azure.ResourceManager.ImpactReporting.Models.ConnectorProperties System.ClientModel.Primitives.IPersistableModel.Create(System.BinaryData data, System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n string System.ClientModel.Primitives.IPersistableModel.GetFormatFromOptions(System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n System.BinaryData System.ClientModel.Primitives.IPersistableModel.Write(System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n }\n public partial class Content : System.ClientModel.Primitives.IJsonModel, System.ClientModel.Primitives.IPersistableModel\n {\n public Content(string title, string description) { }\n public string Description { get { throw null; } set { } }\n public string Title { get { throw null; } set { } }\n protected virtual void JsonModelWriteCore(System.Text.Json.Utf8JsonWriter writer, System.ClientModel.Primitives.ModelReaderWriterOptions options) { }\n Azure.ResourceManager.ImpactReporting.Models.Content System.ClientModel.Primitives.IJsonModel.Create(ref System.Text.Json.Utf8JsonReader reader, System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n void System.ClientModel.Primitives.IJsonModel.Write(System.Text.Json.Utf8JsonWriter writer, System.ClientModel.Primitives.ModelReaderWriterOptions options) { }\n Azure.ResourceManager.ImpactReporting.Models.Content System.ClientModel.Primitives.IPersistableModel.Create(System.BinaryData data, System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n string System.ClientModel.Primitives.IPersistableModel.GetFormatFromOptions(System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n System.BinaryData System.ClientModel.Primitives.IPersistableModel.Write(System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n }\n public partial class ErrorDetailProperties : System.ClientModel.Primitives.IJsonModel, System.ClientModel.Primitives.IPersistableModel", - "repo_full_name": "Azure/azure-sdk-for-net", - "discussion_comments": [ - { - "comment_id": "1986669206", - "repo_full_name": "Azure/azure-sdk-for-net", - "pr_number": 48446, - "pr_file": "sdk/impactreporting/Azure.ResourceManager.ImpactReporting/api/Azure.ResourceManager.ImpactReporting.net8.0.cs", - "discussion_id": "1986669206", - "commented_code": "@@ -0,0 +1,616 @@\n+namespace Azure.ResourceManager.ImpactReporting\n+{\n+ public partial class ConnectorCollection : Azure.ResourceManager.ArmCollection, System.Collections.Generic.IAsyncEnumerable, System.Collections.Generic.IEnumerable, System.Collections.IEnumerable\n+ {\n+ protected ConnectorCollection() { }\n+ public virtual Azure.ResourceManager.ArmOperation CreateOrUpdate(Azure.WaitUntil waitUntil, string connectorName, Azure.ResourceManager.ImpactReporting.ConnectorData data, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n+ public virtual System.Threading.Tasks.Task> CreateOrUpdateAsync(Azure.WaitUntil waitUntil, string connectorName, Azure.ResourceManager.ImpactReporting.ConnectorData data, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n+ public virtual Azure.Response Exists(string connectorName, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n+ public virtual System.Threading.Tasks.Task> ExistsAsync(string connectorName, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n+ public virtual Azure.Response Get(string connectorName, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n+ public virtual Azure.Pageable GetAll(System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n+ public virtual Azure.AsyncPageable GetAllAsync(System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n+ public virtual System.Threading.Tasks.Task> GetAsync(string connectorName, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n+ public virtual Azure.NullableResponse GetIfExists(string connectorName, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n+ public virtual System.Threading.Tasks.Task> GetIfExistsAsync(string connectorName, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n+ System.Collections.Generic.IAsyncEnumerator System.Collections.Generic.IAsyncEnumerable.GetAsyncEnumerator(System.Threading.CancellationToken cancellationToken) { throw null; }\n+ System.Collections.Generic.IEnumerator System.Collections.Generic.IEnumerable.GetEnumerator() { throw null; }\n+ System.Collections.IEnumerator System.Collections.IEnumerable.GetEnumerator() { throw null; }\n+ }\n+ public partial class ConnectorData : Azure.ResourceManager.Models.ResourceData, System.ClientModel.Primitives.IJsonModel, System.ClientModel.Primitives.IPersistableModel\n+ {\n+ public ConnectorData() { }\n+ public Azure.ResourceManager.ImpactReporting.Models.ConnectorProperties Properties { get { throw null; } set { } }\n+ protected override void JsonModelWriteCore(System.Text.Json.Utf8JsonWriter writer, System.ClientModel.Primitives.ModelReaderWriterOptions options) { }\n+ Azure.ResourceManager.ImpactReporting.ConnectorData System.ClientModel.Primitives.IJsonModel.Create(ref System.Text.Json.Utf8JsonReader reader, System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n+ void System.ClientModel.Primitives.IJsonModel.Write(System.Text.Json.Utf8JsonWriter writer, System.ClientModel.Primitives.ModelReaderWriterOptions options) { }\n+ Azure.ResourceManager.ImpactReporting.ConnectorData System.ClientModel.Primitives.IPersistableModel.Create(System.BinaryData data, System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n+ string System.ClientModel.Primitives.IPersistableModel.GetFormatFromOptions(System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n+ System.BinaryData System.ClientModel.Primitives.IPersistableModel.Write(System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n+ }\n+ public partial class ConnectorResource : Azure.ResourceManager.ArmResource, System.ClientModel.Primitives.IJsonModel, System.ClientModel.Primitives.IPersistableModel\n+ {\n+ public static readonly Azure.Core.ResourceType ResourceType;\n+ protected ConnectorResource() { }\n+ public virtual Azure.ResourceManager.ImpactReporting.ConnectorData Data { get { throw null; } }\n+ public virtual bool HasData { get { throw null; } }\n+ public static Azure.Core.ResourceIdentifier CreateResourceIdentifier(string subscriptionId, string connectorName) { throw null; }\n+ public virtual Azure.ResourceManager.ArmOperation Delete(Azure.WaitUntil waitUntil, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n+ public virtual System.Threading.Tasks.Task DeleteAsync(Azure.WaitUntil waitUntil, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n+ public virtual Azure.Response Get(System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n+ public virtual System.Threading.Tasks.Task> GetAsync(System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n+ Azure.ResourceManager.ImpactReporting.ConnectorData System.ClientModel.Primitives.IJsonModel.Create(ref System.Text.Json.Utf8JsonReader reader, System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n+ void System.ClientModel.Primitives.IJsonModel.Write(System.Text.Json.Utf8JsonWriter writer, System.ClientModel.Primitives.ModelReaderWriterOptions options) { }\n+ Azure.ResourceManager.ImpactReporting.ConnectorData System.ClientModel.Primitives.IPersistableModel.Create(System.BinaryData data, System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n+ string System.ClientModel.Primitives.IPersistableModel.GetFormatFromOptions(System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n+ System.BinaryData System.ClientModel.Primitives.IPersistableModel.Write(System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n+ public virtual Azure.Response Update(Azure.ResourceManager.ImpactReporting.Models.ConnectorPatch patch, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n+ public virtual System.Threading.Tasks.Task> UpdateAsync(Azure.ResourceManager.ImpactReporting.Models.ConnectorPatch patch, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n+ }\n+ public partial class ImpactCategoryCollection : Azure.ResourceManager.ArmCollection\n+ {\n+ protected ImpactCategoryCollection() { }\n+ public virtual Azure.Response Exists(string impactCategoryName, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n+ public virtual System.Threading.Tasks.Task> ExistsAsync(string impactCategoryName, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n+ public virtual Azure.Response Get(string impactCategoryName, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n+ public virtual Azure.Pageable GetAll(string resourceType, string categoryName = null, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n+ public virtual Azure.AsyncPageable GetAllAsync(string resourceType, string categoryName = null, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n+ public virtual System.Threading.Tasks.Task> GetAsync(string impactCategoryName, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n+ public virtual Azure.NullableResponse GetIfExists(string impactCategoryName, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n+ public virtual System.Threading.Tasks.Task> GetIfExistsAsync(string impactCategoryName, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n+ }\n+ public partial class ImpactCategoryData : Azure.ResourceManager.Models.ResourceData, System.ClientModel.Primitives.IJsonModel, System.ClientModel.Primitives.IPersistableModel\n+ {\n+ internal ImpactCategoryData() { }\n+ public Azure.ResourceManager.ImpactReporting.Models.ImpactCategoryProperties Properties { get { throw null; } }\n+ protected override void JsonModelWriteCore(System.Text.Json.Utf8JsonWriter writer, System.ClientModel.Primitives.ModelReaderWriterOptions options) { }\n+ Azure.ResourceManager.ImpactReporting.ImpactCategoryData System.ClientModel.Primitives.IJsonModel.Create(ref System.Text.Json.Utf8JsonReader reader, System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n+ void System.ClientModel.Primitives.IJsonModel.Write(System.Text.Json.Utf8JsonWriter writer, System.ClientModel.Primitives.ModelReaderWriterOptions options) { }\n+ Azure.ResourceManager.ImpactReporting.ImpactCategoryData System.ClientModel.Primitives.IPersistableModel.Create(System.BinaryData data, System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n+ string System.ClientModel.Primitives.IPersistableModel.GetFormatFromOptions(System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n+ System.BinaryData System.ClientModel.Primitives.IPersistableModel.Write(System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n+ }\n+ public partial class ImpactCategoryResource : Azure.ResourceManager.ArmResource, System.ClientModel.Primitives.IJsonModel, System.ClientModel.Primitives.IPersistableModel\n+ {\n+ public static readonly Azure.Core.ResourceType ResourceType;\n+ protected ImpactCategoryResource() { }\n+ public virtual Azure.ResourceManager.ImpactReporting.ImpactCategoryData Data { get { throw null; } }\n+ public virtual bool HasData { get { throw null; } }\n+ public static Azure.Core.ResourceIdentifier CreateResourceIdentifier(string subscriptionId, string impactCategoryName) { throw null; }\n+ public virtual Azure.Response Get(System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n+ public virtual System.Threading.Tasks.Task> GetAsync(System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n+ Azure.ResourceManager.ImpactReporting.ImpactCategoryData System.ClientModel.Primitives.IJsonModel.Create(ref System.Text.Json.Utf8JsonReader reader, System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n+ void System.ClientModel.Primitives.IJsonModel.Write(System.Text.Json.Utf8JsonWriter writer, System.ClientModel.Primitives.ModelReaderWriterOptions options) { }\n+ Azure.ResourceManager.ImpactReporting.ImpactCategoryData System.ClientModel.Primitives.IPersistableModel.Create(System.BinaryData data, System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n+ string System.ClientModel.Primitives.IPersistableModel.GetFormatFromOptions(System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n+ System.BinaryData System.ClientModel.Primitives.IPersistableModel.Write(System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n+ }\n+ public static partial class ImpactReportingExtensions\n+ {\n+ public static Azure.Response GetConnector(this Azure.ResourceManager.Resources.SubscriptionResource subscriptionResource, string connectorName, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n+ public static System.Threading.Tasks.Task> GetConnectorAsync(this Azure.ResourceManager.Resources.SubscriptionResource subscriptionResource, string connectorName, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n+ public static Azure.ResourceManager.ImpactReporting.ConnectorResource GetConnectorResource(this Azure.ResourceManager.ArmClient client, Azure.Core.ResourceIdentifier id) { throw null; }\n+ public static Azure.ResourceManager.ImpactReporting.ConnectorCollection GetConnectors(this Azure.ResourceManager.Resources.SubscriptionResource subscriptionResource) { throw null; }\n+ public static Azure.ResourceManager.ImpactReporting.ImpactCategoryCollection GetImpactCategories(this Azure.ResourceManager.Resources.SubscriptionResource subscriptionResource) { throw null; }\n+ public static Azure.Response GetImpactCategory(this Azure.ResourceManager.Resources.SubscriptionResource subscriptionResource, string impactCategoryName, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n+ public static System.Threading.Tasks.Task> GetImpactCategoryAsync(this Azure.ResourceManager.Resources.SubscriptionResource subscriptionResource, string impactCategoryName, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n+ public static Azure.ResourceManager.ImpactReporting.ImpactCategoryResource GetImpactCategoryResource(this Azure.ResourceManager.ArmClient client, Azure.Core.ResourceIdentifier id) { throw null; }\n+ public static Azure.ResourceManager.ImpactReporting.InsightResource GetInsightResource(this Azure.ResourceManager.ArmClient client, Azure.Core.ResourceIdentifier id) { throw null; }\n+ public static Azure.Response GetWorkloadImpact(this Azure.ResourceManager.Resources.SubscriptionResource subscriptionResource, string workloadImpactName, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n+ public static System.Threading.Tasks.Task> GetWorkloadImpactAsync(this Azure.ResourceManager.Resources.SubscriptionResource subscriptionResource, string workloadImpactName, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n+ public static Azure.ResourceManager.ImpactReporting.WorkloadImpactResource GetWorkloadImpactResource(this Azure.ResourceManager.ArmClient client, Azure.Core.ResourceIdentifier id) { throw null; }\n+ public static Azure.ResourceManager.ImpactReporting.WorkloadImpactCollection GetWorkloadImpacts(this Azure.ResourceManager.Resources.SubscriptionResource subscriptionResource) { throw null; }\n+ }\n+ public partial class InsightCollection : Azure.ResourceManager.ArmCollection, System.Collections.Generic.IAsyncEnumerable, System.Collections.Generic.IEnumerable, System.Collections.IEnumerable\n+ {\n+ protected InsightCollection() { }\n+ public virtual Azure.ResourceManager.ArmOperation CreateOrUpdate(Azure.WaitUntil waitUntil, string insightName, Azure.ResourceManager.ImpactReporting.InsightData data, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n+ public virtual System.Threading.Tasks.Task> CreateOrUpdateAsync(Azure.WaitUntil waitUntil, string insightName, Azure.ResourceManager.ImpactReporting.InsightData data, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n+ public virtual Azure.Response Exists(string insightName, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n+ public virtual System.Threading.Tasks.Task> ExistsAsync(string insightName, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n+ public virtual Azure.Response Get(string insightName, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n+ public virtual Azure.Pageable GetAll(System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n+ public virtual Azure.AsyncPageable GetAllAsync(System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n+ public virtual System.Threading.Tasks.Task> GetAsync(string insightName, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n+ public virtual Azure.NullableResponse GetIfExists(string insightName, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n+ public virtual System.Threading.Tasks.Task> GetIfExistsAsync(string insightName, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n+ System.Collections.Generic.IAsyncEnumerator System.Collections.Generic.IAsyncEnumerable.GetAsyncEnumerator(System.Threading.CancellationToken cancellationToken) { throw null; }\n+ System.Collections.Generic.IEnumerator System.Collections.Generic.IEnumerable.GetEnumerator() { throw null; }\n+ System.Collections.IEnumerator System.Collections.IEnumerable.GetEnumerator() { throw null; }\n+ }\n+ public partial class InsightData : Azure.ResourceManager.Models.ResourceData, System.ClientModel.Primitives.IJsonModel, System.ClientModel.Primitives.IPersistableModel\n+ {\n+ public InsightData() { }\n+ public Azure.ResourceManager.ImpactReporting.Models.InsightProperties Properties { get { throw null; } set { } }\n+ protected override void JsonModelWriteCore(System.Text.Json.Utf8JsonWriter writer, System.ClientModel.Primitives.ModelReaderWriterOptions options) { }\n+ Azure.ResourceManager.ImpactReporting.InsightData System.ClientModel.Primitives.IJsonModel.Create(ref System.Text.Json.Utf8JsonReader reader, System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n+ void System.ClientModel.Primitives.IJsonModel.Write(System.Text.Json.Utf8JsonWriter writer, System.ClientModel.Primitives.ModelReaderWriterOptions options) { }\n+ Azure.ResourceManager.ImpactReporting.InsightData System.ClientModel.Primitives.IPersistableModel.Create(System.BinaryData data, System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n+ string System.ClientModel.Primitives.IPersistableModel.GetFormatFromOptions(System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n+ System.BinaryData System.ClientModel.Primitives.IPersistableModel.Write(System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n+ }\n+ public partial class InsightResource : Azure.ResourceManager.ArmResource, System.ClientModel.Primitives.IJsonModel, System.ClientModel.Primitives.IPersistableModel\n+ {\n+ public static readonly Azure.Core.ResourceType ResourceType;\n+ protected InsightResource() { }\n+ public virtual Azure.ResourceManager.ImpactReporting.InsightData Data { get { throw null; } }\n+ public virtual bool HasData { get { throw null; } }\n+ public static Azure.Core.ResourceIdentifier CreateResourceIdentifier(string subscriptionId, string workloadImpactName, string insightName) { throw null; }\n+ public virtual Azure.ResourceManager.ArmOperation Delete(Azure.WaitUntil waitUntil, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n+ public virtual System.Threading.Tasks.Task DeleteAsync(Azure.WaitUntil waitUntil, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n+ public virtual Azure.Response Get(System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n+ public virtual System.Threading.Tasks.Task> GetAsync(System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n+ Azure.ResourceManager.ImpactReporting.InsightData System.ClientModel.Primitives.IJsonModel.Create(ref System.Text.Json.Utf8JsonReader reader, System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n+ void System.ClientModel.Primitives.IJsonModel.Write(System.Text.Json.Utf8JsonWriter writer, System.ClientModel.Primitives.ModelReaderWriterOptions options) { }\n+ Azure.ResourceManager.ImpactReporting.InsightData System.ClientModel.Primitives.IPersistableModel.Create(System.BinaryData data, System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n+ string System.ClientModel.Primitives.IPersistableModel.GetFormatFromOptions(System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n+ System.BinaryData System.ClientModel.Primitives.IPersistableModel.Write(System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n+ public virtual Azure.ResourceManager.ArmOperation Update(Azure.WaitUntil waitUntil, Azure.ResourceManager.ImpactReporting.InsightData data, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n+ public virtual System.Threading.Tasks.Task> UpdateAsync(Azure.WaitUntil waitUntil, Azure.ResourceManager.ImpactReporting.InsightData data, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n+ }\n+ public partial class WorkloadImpactCollection : Azure.ResourceManager.ArmCollection, System.Collections.Generic.IAsyncEnumerable, System.Collections.Generic.IEnumerable, System.Collections.IEnumerable\n+ {\n+ protected WorkloadImpactCollection() { }\n+ public virtual Azure.ResourceManager.ArmOperation CreateOrUpdate(Azure.WaitUntil waitUntil, string workloadImpactName, Azure.ResourceManager.ImpactReporting.WorkloadImpactData data, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n+ public virtual System.Threading.Tasks.Task> CreateOrUpdateAsync(Azure.WaitUntil waitUntil, string workloadImpactName, Azure.ResourceManager.ImpactReporting.WorkloadImpactData data, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n+ public virtual Azure.Response Exists(string workloadImpactName, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n+ public virtual System.Threading.Tasks.Task> ExistsAsync(string workloadImpactName, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n+ public virtual Azure.Response Get(string workloadImpactName, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n+ public virtual Azure.Pageable GetAll(System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n+ public virtual Azure.AsyncPageable GetAllAsync(System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n+ public virtual System.Threading.Tasks.Task> GetAsync(string workloadImpactName, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n+ public virtual Azure.NullableResponse GetIfExists(string workloadImpactName, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n+ public virtual System.Threading.Tasks.Task> GetIfExistsAsync(string workloadImpactName, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n+ System.Collections.Generic.IAsyncEnumerator System.Collections.Generic.IAsyncEnumerable.GetAsyncEnumerator(System.Threading.CancellationToken cancellationToken) { throw null; }\n+ System.Collections.Generic.IEnumerator System.Collections.Generic.IEnumerable.GetEnumerator() { throw null; }\n+ System.Collections.IEnumerator System.Collections.IEnumerable.GetEnumerator() { throw null; }\n+ }\n+ public partial class WorkloadImpactData : Azure.ResourceManager.Models.ResourceData, System.ClientModel.Primitives.IJsonModel, System.ClientModel.Primitives.IPersistableModel\n+ {\n+ public WorkloadImpactData() { }\n+ public Azure.ResourceManager.ImpactReporting.Models.WorkloadImpactProperties Properties { get { throw null; } set { } }\n+ protected override void JsonModelWriteCore(System.Text.Json.Utf8JsonWriter writer, System.ClientModel.Primitives.ModelReaderWriterOptions options) { }\n+ Azure.ResourceManager.ImpactReporting.WorkloadImpactData System.ClientModel.Primitives.IJsonModel.Create(ref System.Text.Json.Utf8JsonReader reader, System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n+ void System.ClientModel.Primitives.IJsonModel.Write(System.Text.Json.Utf8JsonWriter writer, System.ClientModel.Primitives.ModelReaderWriterOptions options) { }\n+ Azure.ResourceManager.ImpactReporting.WorkloadImpactData System.ClientModel.Primitives.IPersistableModel.Create(System.BinaryData data, System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n+ string System.ClientModel.Primitives.IPersistableModel.GetFormatFromOptions(System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n+ System.BinaryData System.ClientModel.Primitives.IPersistableModel.Write(System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n+ }\n+ public partial class WorkloadImpactResource : Azure.ResourceManager.ArmResource, System.ClientModel.Primitives.IJsonModel, System.ClientModel.Primitives.IPersistableModel\n+ {\n+ public static readonly Azure.Core.ResourceType ResourceType;\n+ protected WorkloadImpactResource() { }\n+ public virtual Azure.ResourceManager.ImpactReporting.WorkloadImpactData Data { get { throw null; } }\n+ public virtual bool HasData { get { throw null; } }\n+ public static Azure.Core.ResourceIdentifier CreateResourceIdentifier(string subscriptionId, string workloadImpactName) { throw null; }\n+ public virtual Azure.ResourceManager.ArmOperation Delete(Azure.WaitUntil waitUntil, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n+ public virtual System.Threading.Tasks.Task DeleteAsync(Azure.WaitUntil waitUntil, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n+ public virtual Azure.Response Get(System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n+ public virtual System.Threading.Tasks.Task> GetAsync(System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n+ public virtual Azure.Response GetInsight(string insightName, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n+ public virtual System.Threading.Tasks.Task> GetInsightAsync(string insightName, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n+ public virtual Azure.ResourceManager.ImpactReporting.InsightCollection GetInsights() { throw null; }\n+ Azure.ResourceManager.ImpactReporting.WorkloadImpactData System.ClientModel.Primitives.IJsonModel.Create(ref System.Text.Json.Utf8JsonReader reader, System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n+ void System.ClientModel.Primitives.IJsonModel.Write(System.Text.Json.Utf8JsonWriter writer, System.ClientModel.Primitives.ModelReaderWriterOptions options) { }\n+ Azure.ResourceManager.ImpactReporting.WorkloadImpactData System.ClientModel.Primitives.IPersistableModel.Create(System.BinaryData data, System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n+ string System.ClientModel.Primitives.IPersistableModel.GetFormatFromOptions(System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n+ System.BinaryData System.ClientModel.Primitives.IPersistableModel.Write(System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n+ public virtual Azure.ResourceManager.ArmOperation Update(Azure.WaitUntil waitUntil, Azure.ResourceManager.ImpactReporting.WorkloadImpactData data, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n+ public virtual System.Threading.Tasks.Task> UpdateAsync(Azure.WaitUntil waitUntil, Azure.ResourceManager.ImpactReporting.WorkloadImpactData data, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n+ }\n+}\n+namespace Azure.ResourceManager.ImpactReporting.Mocking\n+{\n+ public partial class MockableImpactReportingArmClient : Azure.ResourceManager.ArmResource\n+ {\n+ protected MockableImpactReportingArmClient() { }\n+ public virtual Azure.ResourceManager.ImpactReporting.ConnectorResource GetConnectorResource(Azure.Core.ResourceIdentifier id) { throw null; }\n+ public virtual Azure.ResourceManager.ImpactReporting.ImpactCategoryResource GetImpactCategoryResource(Azure.Core.ResourceIdentifier id) { throw null; }\n+ public virtual Azure.ResourceManager.ImpactReporting.InsightResource GetInsightResource(Azure.Core.ResourceIdentifier id) { throw null; }\n+ public virtual Azure.ResourceManager.ImpactReporting.WorkloadImpactResource GetWorkloadImpactResource(Azure.Core.ResourceIdentifier id) { throw null; }\n+ }\n+ public partial class MockableImpactReportingSubscriptionResource : Azure.ResourceManager.ArmResource\n+ {\n+ protected MockableImpactReportingSubscriptionResource() { }\n+ public virtual Azure.Response GetConnector(string connectorName, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n+ public virtual System.Threading.Tasks.Task> GetConnectorAsync(string connectorName, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n+ public virtual Azure.ResourceManager.ImpactReporting.ConnectorCollection GetConnectors() { throw null; }\n+ public virtual Azure.ResourceManager.ImpactReporting.ImpactCategoryCollection GetImpactCategories() { throw null; }\n+ public virtual Azure.Response GetImpactCategory(string impactCategoryName, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n+ public virtual System.Threading.Tasks.Task> GetImpactCategoryAsync(string impactCategoryName, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n+ public virtual Azure.Response GetWorkloadImpact(string workloadImpactName, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n+ public virtual System.Threading.Tasks.Task> GetWorkloadImpactAsync(string workloadImpactName, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }\n+ public virtual Azure.ResourceManager.ImpactReporting.WorkloadImpactCollection GetWorkloadImpacts() { throw null; }\n+ }\n+}\n+namespace Azure.ResourceManager.ImpactReporting.Models\n+{\n+ public static partial class ArmImpactReportingModelFactory\n+ {\n+ public static Azure.ResourceManager.ImpactReporting.ConnectorData ConnectorData(Azure.Core.ResourceIdentifier id = null, string name = null, Azure.Core.ResourceType resourceType = default(Azure.Core.ResourceType), Azure.ResourceManager.Models.SystemData systemData = null, Azure.ResourceManager.ImpactReporting.Models.ConnectorProperties properties = null) { throw null; }\n+ public static Azure.ResourceManager.ImpactReporting.Models.ConnectorProperties ConnectorProperties(Azure.ResourceManager.ImpactReporting.Models.ProvisioningState? provisioningState = default(Azure.ResourceManager.ImpactReporting.Models.ProvisioningState?), string connectorId = null, string tenantId = null, Azure.ResourceManager.ImpactReporting.Models.Platform connectorType = default(Azure.ResourceManager.ImpactReporting.Models.Platform), System.DateTimeOffset lastRunTimeStamp = default(System.DateTimeOffset)) { throw null; }\n+ public static Azure.ResourceManager.ImpactReporting.ImpactCategoryData ImpactCategoryData(Azure.Core.ResourceIdentifier id = null, string name = null, Azure.Core.ResourceType resourceType = default(Azure.Core.ResourceType), Azure.ResourceManager.Models.SystemData systemData = null, Azure.ResourceManager.ImpactReporting.Models.ImpactCategoryProperties properties = null) { throw null; }\n+ public static Azure.ResourceManager.ImpactReporting.Models.ImpactCategoryProperties ImpactCategoryProperties(Azure.ResourceManager.ImpactReporting.Models.ProvisioningState? provisioningState = default(Azure.ResourceManager.ImpactReporting.Models.ProvisioningState?), string categoryId = null, string parentCategoryId = null, string description = null, System.Collections.Generic.IEnumerable requiredImpactProperties = null) { throw null; }\n+ public static Azure.ResourceManager.ImpactReporting.InsightData InsightData(Azure.Core.ResourceIdentifier id = null, string name = null, Azure.Core.ResourceType resourceType = default(Azure.Core.ResourceType), Azure.ResourceManager.Models.SystemData systemData = null, Azure.ResourceManager.ImpactReporting.Models.InsightProperties properties = null) { throw null; }\n+ public static Azure.ResourceManager.ImpactReporting.Models.InsightProperties InsightProperties(Azure.ResourceManager.ImpactReporting.Models.ProvisioningState? provisioningState = default(Azure.ResourceManager.ImpactReporting.Models.ProvisioningState?), string category = null, string status = null, string eventId = null, string groupId = null, Azure.ResourceManager.ImpactReporting.Models.Content content = null, System.DateTimeOffset? eventOn = default(System.DateTimeOffset?), string insightUniqueId = null, Azure.ResourceManager.ImpactReporting.Models.ImpactDetails impact = null, Azure.ResourceManager.ImpactReporting.Models.InsightPropertiesAdditionalDetails additionalDetails = null) { throw null; }\n+ public static Azure.ResourceManager.ImpactReporting.Models.RequiredImpactProperties RequiredImpactProperties(string name = null, System.Collections.Generic.IEnumerable allowedValues = null) { throw null; }\n+ public static Azure.ResourceManager.ImpactReporting.WorkloadImpactData WorkloadImpactData(Azure.Core.ResourceIdentifier id = null, string name = null, Azure.Core.ResourceType resourceType = default(Azure.Core.ResourceType), Azure.ResourceManager.Models.SystemData systemData = null, Azure.ResourceManager.ImpactReporting.Models.WorkloadImpactProperties properties = null) { throw null; }\n+ public static Azure.ResourceManager.ImpactReporting.Models.WorkloadImpactProperties WorkloadImpactProperties(Azure.ResourceManager.ImpactReporting.Models.ProvisioningState? provisioningState = default(Azure.ResourceManager.ImpactReporting.Models.ProvisioningState?), System.DateTimeOffset startOn = default(System.DateTimeOffset), System.DateTimeOffset? endOn = default(System.DateTimeOffset?), string impactedResourceId = null, string impactUniqueId = null, System.DateTimeOffset? reportedTimeUtc = default(System.DateTimeOffset?), string impactCategory = null, string impactDescription = null, System.Collections.Generic.IEnumerable armCorrelationIds = null, System.Collections.Generic.IEnumerable performance = null, Azure.ResourceManager.ImpactReporting.Models.Connectivity connectivity = null, Azure.ResourceManager.ImpactReporting.Models.WorkloadImpactPropertiesAdditionalProperties additionalProperties = null, Azure.ResourceManager.ImpactReporting.Models.ErrorDetailProperties errorDetails = null, Azure.ResourceManager.ImpactReporting.Models.Workload workload = null, string impactGroupId = null, Azure.ResourceManager.ImpactReporting.Models.ConfidenceLevel? confidenceLevel = default(Azure.ResourceManager.ImpactReporting.Models.ConfidenceLevel?), Azure.ResourceManager.ImpactReporting.Models.ClientIncidentDetails clientIncidentDetails = null) { throw null; }\n+ }\n+ public partial class ClientIncidentDetails : System.ClientModel.Primitives.IJsonModel, System.ClientModel.Primitives.IPersistableModel\n+ {\n+ public ClientIncidentDetails() { }\n+ public string ClientIncidentId { get { throw null; } set { } }\n+ public Azure.ResourceManager.ImpactReporting.Models.IncidentSource? ClientIncidentSource { get { throw null; } set { } }\n+ protected virtual void JsonModelWriteCore(System.Text.Json.Utf8JsonWriter writer, System.ClientModel.Primitives.ModelReaderWriterOptions options) { }\n+ Azure.ResourceManager.ImpactReporting.Models.ClientIncidentDetails System.ClientModel.Primitives.IJsonModel.Create(ref System.Text.Json.Utf8JsonReader reader, System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n+ void System.ClientModel.Primitives.IJsonModel.Write(System.Text.Json.Utf8JsonWriter writer, System.ClientModel.Primitives.ModelReaderWriterOptions options) { }\n+ Azure.ResourceManager.ImpactReporting.Models.ClientIncidentDetails System.ClientModel.Primitives.IPersistableModel.Create(System.BinaryData data, System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n+ string System.ClientModel.Primitives.IPersistableModel.GetFormatFromOptions(System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n+ System.BinaryData System.ClientModel.Primitives.IPersistableModel.Write(System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n+ }\n+ [System.Runtime.InteropServices.StructLayoutAttribute(System.Runtime.InteropServices.LayoutKind.Sequential)]\n+ public readonly partial struct ConfidenceLevel : System.IEquatable\n+ {\n+ private readonly object _dummy;\n+ private readonly int _dummyPrimitive;\n+ public ConfidenceLevel(string value) { throw null; }\n+ public static Azure.ResourceManager.ImpactReporting.Models.ConfidenceLevel High { get { throw null; } }\n+ public static Azure.ResourceManager.ImpactReporting.Models.ConfidenceLevel Low { get { throw null; } }\n+ public static Azure.ResourceManager.ImpactReporting.Models.ConfidenceLevel Medium { get { throw null; } }\n+ public bool Equals(Azure.ResourceManager.ImpactReporting.Models.ConfidenceLevel other) { throw null; }\n+ [System.ComponentModel.EditorBrowsableAttribute(System.ComponentModel.EditorBrowsableState.Never)]\n+ public override bool Equals(object obj) { throw null; }\n+ [System.ComponentModel.EditorBrowsableAttribute(System.ComponentModel.EditorBrowsableState.Never)]\n+ public override int GetHashCode() { throw null; }\n+ public static bool operator ==(Azure.ResourceManager.ImpactReporting.Models.ConfidenceLevel left, Azure.ResourceManager.ImpactReporting.Models.ConfidenceLevel right) { throw null; }\n+ public static implicit operator Azure.ResourceManager.ImpactReporting.Models.ConfidenceLevel (string value) { throw null; }\n+ public static bool operator !=(Azure.ResourceManager.ImpactReporting.Models.ConfidenceLevel left, Azure.ResourceManager.ImpactReporting.Models.ConfidenceLevel right) { throw null; }\n+ public override string ToString() { throw null; }\n+ }\n+ public partial class Connectivity : System.ClientModel.Primitives.IJsonModel, System.ClientModel.Primitives.IPersistableModel\n+ {\n+ public Connectivity() { }\n+ public int? Port { get { throw null; } set { } }\n+ public Azure.ResourceManager.ImpactReporting.Models.Protocol? Protocol { get { throw null; } set { } }\n+ public string SourceAzureResourceId { get { throw null; } set { } }\n+ public string TargetAzureResourceId { get { throw null; } set { } }\n+ protected virtual void JsonModelWriteCore(System.Text.Json.Utf8JsonWriter writer, System.ClientModel.Primitives.ModelReaderWriterOptions options) { }\n+ Azure.ResourceManager.ImpactReporting.Models.Connectivity System.ClientModel.Primitives.IJsonModel.Create(ref System.Text.Json.Utf8JsonReader reader, System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n+ void System.ClientModel.Primitives.IJsonModel.Write(System.Text.Json.Utf8JsonWriter writer, System.ClientModel.Primitives.ModelReaderWriterOptions options) { }\n+ Azure.ResourceManager.ImpactReporting.Models.Connectivity System.ClientModel.Primitives.IPersistableModel.Create(System.BinaryData data, System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n+ string System.ClientModel.Primitives.IPersistableModel.GetFormatFromOptions(System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n+ System.BinaryData System.ClientModel.Primitives.IPersistableModel.Write(System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n+ }\n+ public partial class ConnectorPatch : System.ClientModel.Primitives.IJsonModel, System.ClientModel.Primitives.IPersistableModel\n+ {\n+ public ConnectorPatch() { }\n+ public Azure.ResourceManager.ImpactReporting.Models.Platform? ConnectorType { get { throw null; } set { } }\n+ protected virtual void JsonModelWriteCore(System.Text.Json.Utf8JsonWriter writer, System.ClientModel.Primitives.ModelReaderWriterOptions options) { }\n+ Azure.ResourceManager.ImpactReporting.Models.ConnectorPatch System.ClientModel.Primitives.IJsonModel.Create(ref System.Text.Json.Utf8JsonReader reader, System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n+ void System.ClientModel.Primitives.IJsonModel.Write(System.Text.Json.Utf8JsonWriter writer, System.ClientModel.Primitives.ModelReaderWriterOptions options) { }\n+ Azure.ResourceManager.ImpactReporting.Models.ConnectorPatch System.ClientModel.Primitives.IPersistableModel.Create(System.BinaryData data, System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n+ string System.ClientModel.Primitives.IPersistableModel.GetFormatFromOptions(System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n+ System.BinaryData System.ClientModel.Primitives.IPersistableModel.Write(System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n+ }\n+ public partial class ConnectorProperties : System.ClientModel.Primitives.IJsonModel, System.ClientModel.Primitives.IPersistableModel\n+ {\n+ public ConnectorProperties(string connectorId, string tenantId, Azure.ResourceManager.ImpactReporting.Models.Platform connectorType, System.DateTimeOffset lastRunTimeStamp) { }\n+ public string ConnectorId { get { throw null; } }\n+ public Azure.ResourceManager.ImpactReporting.Models.Platform ConnectorType { get { throw null; } set { } }\n+ public System.DateTimeOffset LastRunTimeStamp { get { throw null; } }\n+ public Azure.ResourceManager.ImpactReporting.Models.ProvisioningState? ProvisioningState { get { throw null; } }\n+ public string TenantId { get { throw null; } }\n+ protected virtual void JsonModelWriteCore(System.Text.Json.Utf8JsonWriter writer, System.ClientModel.Primitives.ModelReaderWriterOptions options) { }\n+ Azure.ResourceManager.ImpactReporting.Models.ConnectorProperties System.ClientModel.Primitives.IJsonModel.Create(ref System.Text.Json.Utf8JsonReader reader, System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n+ void System.ClientModel.Primitives.IJsonModel.Write(System.Text.Json.Utf8JsonWriter writer, System.ClientModel.Primitives.ModelReaderWriterOptions options) { }\n+ Azure.ResourceManager.ImpactReporting.Models.ConnectorProperties System.ClientModel.Primitives.IPersistableModel.Create(System.BinaryData data, System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n+ string System.ClientModel.Primitives.IPersistableModel.GetFormatFromOptions(System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n+ System.BinaryData System.ClientModel.Primitives.IPersistableModel.Write(System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n+ }\n+ public partial class Content : System.ClientModel.Primitives.IJsonModel, System.ClientModel.Primitives.IPersistableModel\n+ {\n+ public Content(string title, string description) { }\n+ public string Description { get { throw null; } set { } }\n+ public string Title { get { throw null; } set { } }\n+ protected virtual void JsonModelWriteCore(System.Text.Json.Utf8JsonWriter writer, System.ClientModel.Primitives.ModelReaderWriterOptions options) { }\n+ Azure.ResourceManager.ImpactReporting.Models.Content System.ClientModel.Primitives.IJsonModel.Create(ref System.Text.Json.Utf8JsonReader reader, System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n+ void System.ClientModel.Primitives.IJsonModel.Write(System.Text.Json.Utf8JsonWriter writer, System.ClientModel.Primitives.ModelReaderWriterOptions options) { }\n+ Azure.ResourceManager.ImpactReporting.Models.Content System.ClientModel.Primitives.IPersistableModel.Create(System.BinaryData data, System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n+ string System.ClientModel.Primitives.IPersistableModel.GetFormatFromOptions(System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n+ System.BinaryData System.ClientModel.Primitives.IPersistableModel.Write(System.ClientModel.Primitives.ModelReaderWriterOptions options) { throw null; }\n+ }\n+ public partial class ErrorDetailProperties : System.ClientModel.Primitives.IJsonModel, System.ClientModel.Primitives.IPersistableModel", - "comment_created_at": "2025-03-10T06:17:01+00:00", - "comment_author": "ArcturusZhang", - "comment_body": "this is also generic, we need a more verbose name for this type.\r\nUsually for this kind of models, we just prepend the RP name `ImpactReporting` to it so that we have `ImpactReportingErrorDetailProperties`.", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/azure-sdk-for-net-use-higher-level-telemetry.json b/_reviewers/azure-sdk-for-net-use-higher-level-telemetry.json new file mode 100644 index 0000000..5ed9250 --- /dev/null +++ b/_reviewers/azure-sdk-for-net-use-higher-level-telemetry.json @@ -0,0 +1,70 @@ +[ + { + "discussion_id": "2162531642", + "pr_number": 50119, + "pr_file": "sdk/ai/Azure.AI.Agents.Persistent/README.md", + "created_at": "2025-06-23T21:05:31+00:00", + "commented_code": "run.LastError?.Message);\n```\n`\n\n#### Tracing\n\nYou can add an Application Insights Azure resource to your Azure AI Foundry project. See the Tracing tab in your AI Foundry project. If one was enabled, you use the Application Insights connection string, configure your Agents, and observe the full execution path through Azure Monitor. Typically, you might want to start tracing before you create an Agent.\n\nTracing also requires enabling OpenTelemetry support. One way to do this is to set the `AZURE_EXPERIMENTAL_ENABLE_ACTIVITY_SOURCE` environment variable value to `true`.", + "repo_full_name": "Azure/azure-sdk-for-net", + "discussion_comments": [ + { + "comment_id": "2162531642", + "repo_full_name": "Azure/azure-sdk-for-net", + "pr_number": 50119, + "pr_file": "sdk/ai/Azure.AI.Agents.Persistent/README.md", + "discussion_id": "2162531642", + "commented_code": "@@ -866,6 +869,51 @@ Assert.AreEqual(\n run.LastError?.Message);\n ```\n `\n+\n+#### Tracing\n+\n+You can add an Application Insights Azure resource to your Azure AI Foundry project. See the Tracing tab in your AI Foundry project. If one was enabled, you use the Application Insights connection string, configure your Agents, and observe the full execution path through Azure Monitor. Typically, you might want to start tracing before you create an Agent.\n+\n+Tracing also requires enabling OpenTelemetry support. One way to do this is to set the `AZURE_EXPERIMENTAL_ENABLE_ACTIVITY_SOURCE` environment variable value to `true`.", + "comment_created_at": "2025-06-23T21:05:31+00:00", + "comment_author": "lmolkova", + "comment_body": "do we also support `AppContext.SetSwitch(\"Azure.Experimental.EnableActivitySource\", true);` ?\r\n\r\nsee https://github.com/Azure/azure-sdk-for-net/blob/main/sdk/ai/Azure.AI.Inference/samples/Sample8_ChatCompletionsWithOpenTelemetry.md for an example of it being documented and https://github.com/Azure/azure-sdk-for-net/blob/main/sdk/core/Azure.Core/samples/Diagnostics.md to reference for more details", + "pr_file_module": null + }, + { + "comment_id": "2162589182", + "repo_full_name": "Azure/azure-sdk-for-net", + "pr_number": 50119, + "pr_file": "sdk/ai/Azure.AI.Agents.Persistent/README.md", + "discussion_id": "2162531642", + "commented_code": "@@ -866,6 +869,51 @@ Assert.AreEqual(\n run.LastError?.Message);\n ```\n `\n+\n+#### Tracing\n+\n+You can add an Application Insights Azure resource to your Azure AI Foundry project. See the Tracing tab in your AI Foundry project. If one was enabled, you use the Application Insights connection string, configure your Agents, and observe the full execution path through Azure Monitor. Typically, you might want to start tracing before you create an Agent.\n+\n+Tracing also requires enabling OpenTelemetry support. One way to do this is to set the `AZURE_EXPERIMENTAL_ENABLE_ACTIVITY_SOURCE` environment variable value to `true`.", + "comment_created_at": "2025-06-23T21:52:19+00:00", + "comment_author": "M-Hietala", + "comment_body": "added", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2162538146", + "pr_number": 50119, + "pr_file": "sdk/ai/Azure.AI.Agents.Persistent/README.md", + "created_at": "2025-06-23T21:09:54+00:00", + "commented_code": "run.LastError?.Message);\n```\n`\n\n#### Tracing\n\nYou can add an Application Insights Azure resource to your Azure AI Foundry project. See the Tracing tab in your AI Foundry project. If one was enabled, you use the Application Insights connection string, configure your Agents, and observe the full execution path through Azure Monitor. Typically, you might want to start tracing before you create an Agent.\n\nTracing also requires enabling OpenTelemetry support. One way to do this is to set the `AZURE_EXPERIMENTAL_ENABLE_ACTIVITY_SOURCE` environment variable value to `true`.\n\nTo enabled content recording, set the `AZURE_TRACING_GEN_AI_CONTENT_RECORDING_ENABLED` environment variable to `true`. Content in this context refers to chat message content, function call tool related function names, function parameter names and values.\n\n##### Tracing to Azure Montior\n\nFor tracing to Azure Monitor from your application, install the Azure.Monitor.OpenTelemetry.Exporter with [NuGet](https://www.nuget.org/ ):\n\n```dotnetcli\ndotnet add package Azure.Monitor.OpenTelemetry.Exporter", + "repo_full_name": "Azure/azure-sdk-for-net", + "discussion_comments": [ + { + "comment_id": "2162538146", + "repo_full_name": "Azure/azure-sdk-for-net", + "pr_number": 50119, + "pr_file": "sdk/ai/Azure.AI.Agents.Persistent/README.md", + "discussion_id": "2162538146", + "commented_code": "@@ -866,6 +869,51 @@ Assert.AreEqual(\n run.LastError?.Message);\n ```\n `\n+\n+#### Tracing\n+\n+You can add an Application Insights Azure resource to your Azure AI Foundry project. See the Tracing tab in your AI Foundry project. If one was enabled, you use the Application Insights connection string, configure your Agents, and observe the full execution path through Azure Monitor. Typically, you might want to start tracing before you create an Agent.\n+\n+Tracing also requires enabling OpenTelemetry support. One way to do this is to set the `AZURE_EXPERIMENTAL_ENABLE_ACTIVITY_SOURCE` environment variable value to `true`.\n+\n+To enabled content recording, set the `AZURE_TRACING_GEN_AI_CONTENT_RECORDING_ENABLED` environment variable to `true`. Content in this context refers to chat message content, function call tool related function names, function parameter names and values.\n+\n+##### Tracing to Azure Montior\n+\n+For tracing to Azure Monitor from your application, install the Azure.Monitor.OpenTelemetry.Exporter with [NuGet](https://www.nuget.org/ ):\n+\n+```dotnetcli\n+dotnet add package Azure.Monitor.OpenTelemetry.Exporter", + "comment_created_at": "2025-06-23T21:09:54+00:00", + "comment_author": "lmolkova", + "comment_body": "it's actually way easier and better to use az.mon.ote.aspnetcore package - https://github.com/Azure/azure-sdk-for-net/blob/0c2a40252d1c7ed86188153bd4aa8ddeed868371/sdk/monitor/Azure.Monitor.OpenTelemetry.AspNetCore/README.md, consider switching to it or at least note that it should be the first choice.\r\n\r\nDirect usage of the exporter is very low-level and not expected in AzMon use cases\r\n\r\n", + "pr_file_module": null + }, + { + "comment_id": "2162589001", + "repo_full_name": "Azure/azure-sdk-for-net", + "pr_number": 50119, + "pr_file": "sdk/ai/Azure.AI.Agents.Persistent/README.md", + "discussion_id": "2162538146", + "commented_code": "@@ -866,6 +869,51 @@ Assert.AreEqual(\n run.LastError?.Message);\n ```\n `\n+\n+#### Tracing\n+\n+You can add an Application Insights Azure resource to your Azure AI Foundry project. See the Tracing tab in your AI Foundry project. If one was enabled, you use the Application Insights connection string, configure your Agents, and observe the full execution path through Azure Monitor. Typically, you might want to start tracing before you create an Agent.\n+\n+Tracing also requires enabling OpenTelemetry support. One way to do this is to set the `AZURE_EXPERIMENTAL_ENABLE_ACTIVITY_SOURCE` environment variable value to `true`.\n+\n+To enabled content recording, set the `AZURE_TRACING_GEN_AI_CONTENT_RECORDING_ENABLED` environment variable to `true`. Content in this context refers to chat message content, function call tool related function names, function parameter names and values.\n+\n+##### Tracing to Azure Montior\n+\n+For tracing to Azure Monitor from your application, install the Azure.Monitor.OpenTelemetry.Exporter with [NuGet](https://www.nuget.org/ ):\n+\n+```dotnetcli\n+dotnet add package Azure.Monitor.OpenTelemetry.Exporter", + "comment_created_at": "2025-06-23T21:52:09+00:00", + "comment_author": "M-Hietala", + "comment_body": "added", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/azure-sdk-for-net-use-higher-level-telemetry.md b/_reviewers/azure-sdk-for-net-use-higher-level-telemetry.md index 8707df2..f723734 100644 --- a/_reviewers/azure-sdk-for-net-use-higher-level-telemetry.md +++ b/_reviewers/azure-sdk-for-net-use-higher-level-telemetry.md @@ -29,75 +29,3 @@ AppContext.SetSwitch("Azure.Experimental.EnableActivitySource", true); ``` Using higher-level telemetry packages simplifies implementation, provides better defaults, and follows Azure SDK best practices for observability. This approach reduces the complexity of monitoring code and improves maintainability while ensuring comprehensive system visibility. - - -[ - { - "discussion_id": "2162531642", - "pr_number": 50119, - "pr_file": "sdk/ai/Azure.AI.Agents.Persistent/README.md", - "created_at": "2025-06-23T21:05:31+00:00", - "commented_code": "run.LastError?.Message);\n```\n`\n\n#### Tracing\n\nYou can add an Application Insights Azure resource to your Azure AI Foundry project. See the Tracing tab in your AI Foundry project. If one was enabled, you use the Application Insights connection string, configure your Agents, and observe the full execution path through Azure Monitor. Typically, you might want to start tracing before you create an Agent.\n\nTracing also requires enabling OpenTelemetry support. One way to do this is to set the `AZURE_EXPERIMENTAL_ENABLE_ACTIVITY_SOURCE` environment variable value to `true`.", - "repo_full_name": "Azure/azure-sdk-for-net", - "discussion_comments": [ - { - "comment_id": "2162531642", - "repo_full_name": "Azure/azure-sdk-for-net", - "pr_number": 50119, - "pr_file": "sdk/ai/Azure.AI.Agents.Persistent/README.md", - "discussion_id": "2162531642", - "commented_code": "@@ -866,6 +869,51 @@ Assert.AreEqual(\n run.LastError?.Message);\n ```\n `\n+\n+#### Tracing\n+\n+You can add an Application Insights Azure resource to your Azure AI Foundry project. See the Tracing tab in your AI Foundry project. If one was enabled, you use the Application Insights connection string, configure your Agents, and observe the full execution path through Azure Monitor. Typically, you might want to start tracing before you create an Agent.\n+\n+Tracing also requires enabling OpenTelemetry support. One way to do this is to set the `AZURE_EXPERIMENTAL_ENABLE_ACTIVITY_SOURCE` environment variable value to `true`.", - "comment_created_at": "2025-06-23T21:05:31+00:00", - "comment_author": "lmolkova", - "comment_body": "do we also support `AppContext.SetSwitch(\"Azure.Experimental.EnableActivitySource\", true);` ?\r\n\r\nsee https://github.com/Azure/azure-sdk-for-net/blob/main/sdk/ai/Azure.AI.Inference/samples/Sample8_ChatCompletionsWithOpenTelemetry.md for an example of it being documented and https://github.com/Azure/azure-sdk-for-net/blob/main/sdk/core/Azure.Core/samples/Diagnostics.md to reference for more details", - "pr_file_module": null - }, - { - "comment_id": "2162589182", - "repo_full_name": "Azure/azure-sdk-for-net", - "pr_number": 50119, - "pr_file": "sdk/ai/Azure.AI.Agents.Persistent/README.md", - "discussion_id": "2162531642", - "commented_code": "@@ -866,6 +869,51 @@ Assert.AreEqual(\n run.LastError?.Message);\n ```\n `\n+\n+#### Tracing\n+\n+You can add an Application Insights Azure resource to your Azure AI Foundry project. See the Tracing tab in your AI Foundry project. If one was enabled, you use the Application Insights connection string, configure your Agents, and observe the full execution path through Azure Monitor. Typically, you might want to start tracing before you create an Agent.\n+\n+Tracing also requires enabling OpenTelemetry support. One way to do this is to set the `AZURE_EXPERIMENTAL_ENABLE_ACTIVITY_SOURCE` environment variable value to `true`.", - "comment_created_at": "2025-06-23T21:52:19+00:00", - "comment_author": "M-Hietala", - "comment_body": "added", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2162538146", - "pr_number": 50119, - "pr_file": "sdk/ai/Azure.AI.Agents.Persistent/README.md", - "created_at": "2025-06-23T21:09:54+00:00", - "commented_code": "run.LastError?.Message);\n```\n`\n\n#### Tracing\n\nYou can add an Application Insights Azure resource to your Azure AI Foundry project. See the Tracing tab in your AI Foundry project. If one was enabled, you use the Application Insights connection string, configure your Agents, and observe the full execution path through Azure Monitor. Typically, you might want to start tracing before you create an Agent.\n\nTracing also requires enabling OpenTelemetry support. One way to do this is to set the `AZURE_EXPERIMENTAL_ENABLE_ACTIVITY_SOURCE` environment variable value to `true`.\n\nTo enabled content recording, set the `AZURE_TRACING_GEN_AI_CONTENT_RECORDING_ENABLED` environment variable to `true`. Content in this context refers to chat message content, function call tool related function names, function parameter names and values.\n\n##### Tracing to Azure Montior\n\nFor tracing to Azure Monitor from your application, install the Azure.Monitor.OpenTelemetry.Exporter with [NuGet](https://www.nuget.org/ ):\n\n```dotnetcli\ndotnet add package Azure.Monitor.OpenTelemetry.Exporter", - "repo_full_name": "Azure/azure-sdk-for-net", - "discussion_comments": [ - { - "comment_id": "2162538146", - "repo_full_name": "Azure/azure-sdk-for-net", - "pr_number": 50119, - "pr_file": "sdk/ai/Azure.AI.Agents.Persistent/README.md", - "discussion_id": "2162538146", - "commented_code": "@@ -866,6 +869,51 @@ Assert.AreEqual(\n run.LastError?.Message);\n ```\n `\n+\n+#### Tracing\n+\n+You can add an Application Insights Azure resource to your Azure AI Foundry project. See the Tracing tab in your AI Foundry project. If one was enabled, you use the Application Insights connection string, configure your Agents, and observe the full execution path through Azure Monitor. Typically, you might want to start tracing before you create an Agent.\n+\n+Tracing also requires enabling OpenTelemetry support. One way to do this is to set the `AZURE_EXPERIMENTAL_ENABLE_ACTIVITY_SOURCE` environment variable value to `true`.\n+\n+To enabled content recording, set the `AZURE_TRACING_GEN_AI_CONTENT_RECORDING_ENABLED` environment variable to `true`. Content in this context refers to chat message content, function call tool related function names, function parameter names and values.\n+\n+##### Tracing to Azure Montior\n+\n+For tracing to Azure Monitor from your application, install the Azure.Monitor.OpenTelemetry.Exporter with [NuGet](https://www.nuget.org/ ):\n+\n+```dotnetcli\n+dotnet add package Azure.Monitor.OpenTelemetry.Exporter", - "comment_created_at": "2025-06-23T21:09:54+00:00", - "comment_author": "lmolkova", - "comment_body": "it's actually way easier and better to use az.mon.ote.aspnetcore package - https://github.com/Azure/azure-sdk-for-net/blob/0c2a40252d1c7ed86188153bd4aa8ddeed868371/sdk/monitor/Azure.Monitor.OpenTelemetry.AspNetCore/README.md, consider switching to it or at least note that it should be the first choice.\r\n\r\nDirect usage of the exporter is very low-level and not expected in AzMon use cases\r\n\r\n", - "pr_file_module": null - }, - { - "comment_id": "2162589001", - "repo_full_name": "Azure/azure-sdk-for-net", - "pr_number": 50119, - "pr_file": "sdk/ai/Azure.AI.Agents.Persistent/README.md", - "discussion_id": "2162538146", - "commented_code": "@@ -866,6 +869,51 @@ Assert.AreEqual(\n run.LastError?.Message);\n ```\n `\n+\n+#### Tracing\n+\n+You can add an Application Insights Azure resource to your Azure AI Foundry project. See the Tracing tab in your AI Foundry project. If one was enabled, you use the Application Insights connection string, configure your Agents, and observe the full execution path through Azure Monitor. Typically, you might want to start tracing before you create an Agent.\n+\n+Tracing also requires enabling OpenTelemetry support. One way to do this is to set the `AZURE_EXPERIMENTAL_ENABLE_ACTIVITY_SOURCE` environment variable value to `true`.\n+\n+To enabled content recording, set the `AZURE_TRACING_GEN_AI_CONTENT_RECORDING_ENABLED` environment variable to `true`. Content in this context refers to chat message content, function call tool related function names, function parameter names and values.\n+\n+##### Tracing to Azure Montior\n+\n+For tracing to Azure Monitor from your application, install the Azure.Monitor.OpenTelemetry.Exporter with [NuGet](https://www.nuget.org/ ):\n+\n+```dotnetcli\n+dotnet add package Azure.Monitor.OpenTelemetry.Exporter", - "comment_created_at": "2025-06-23T21:52:09+00:00", - "comment_author": "M-Hietala", - "comment_body": "added", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/azure-sdk-for-net-user-friendly-configuration-values.json b/_reviewers/azure-sdk-for-net-user-friendly-configuration-values.json new file mode 100644 index 0000000..24a6809 --- /dev/null +++ b/_reviewers/azure-sdk-for-net-user-friendly-configuration-values.json @@ -0,0 +1,150 @@ +[ + { + "discussion_id": "2159569308", + "pr_number": 50749, + "pr_file": "sdk/identity/Azure.Identity/CHANGELOG.md", + "created_at": "2025-06-20T19:38:26+00:00", + "commented_code": "## 1.15.0-beta.1 (Unreleased)\n\n### Features Added\n- Added support for the `AZURE_TOKEN_CREDENTIALS` environment variable to `DefaultAzureCredential`, which allows for additional credential selection options. The valid values now include any of the credential names available in the default chain (VisualStudioCredential, VisualStudioCodeCredential, AzureCliCredential, AzurePowerShellCredential, AzureDeveloperCliCredential, EnvironmentCredential, WorkloadIdentityCredential, ManagedIdentityCredential, InteractiveBrowserCredential, or BrokerAuthenticationCredential.) **Note:** BrokerAuthenticationCredential require that the project include a reference to Azure.Identity.Broker.", + "repo_full_name": "Azure/azure-sdk-for-net", + "discussion_comments": [ + { + "comment_id": "2159569308", + "repo_full_name": "Azure/azure-sdk-for-net", + "pr_number": 50749, + "pr_file": "sdk/identity/Azure.Identity/CHANGELOG.md", + "discussion_id": "2159569308", + "commented_code": "@@ -3,6 +3,7 @@\n ## 1.15.0-beta.1 (Unreleased)\n \n ### Features Added\n+- Added support for the `AZURE_TOKEN_CREDENTIALS` environment variable to `DefaultAzureCredential`, which allows for additional credential selection options. The valid values now include any of the credential names available in the default chain (VisualStudioCredential, VisualStudioCodeCredential, AzureCliCredential, AzurePowerShellCredential, AzureDeveloperCliCredential, EnvironmentCredential, WorkloadIdentityCredential, ManagedIdentityCredential, InteractiveBrowserCredential, or BrokerAuthenticationCredential.) **Note:** BrokerAuthenticationCredential require that the project include a reference to Azure.Identity.Broker.", + "comment_created_at": "2025-06-20T19:38:26+00:00", + "comment_author": "scottaddie", + "comment_body": "```suggestion\r\n- Expanded the set of acceptable values for environment variable `AZURE_TOKEN_CREDENTIALS` to allow for selection of a specific credential in the `DefaultAzureCredential` chain. The valid values now include any of the credential names available in the default chain (`VisualStudioCredential`, `VisualStudioCodeCredential`, `AzureCliCredential`, `AzurePowerShellCredential`, `AzureDeveloperCliCredential`, `EnvironmentCredential`, `WorkloadIdentityCredential`, `ManagedIdentityCredential`, `InteractiveBrowserCredential`, or `BrokerAuthenticationCredential`.) **Note:** BrokerAuthenticationCredential requires that the project include a reference to package Azure.Identity.Broker.\r\n```", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2159570531", + "pr_number": 50749, + "pr_file": "sdk/identity/Azure.Identity/TROUBLESHOOTING.md", + "created_at": "2025-06-20T19:39:46+00:00", + "commented_code": "|---|---|---|\n|`CredentialUnavailableException` raised with message. \"DefaultAzureCredential failed to retrieve a token from the included credentials.\"|All credentials in the `DefaultAzureCredential` chain failed to retrieve a token, each throwing a `CredentialUnavailableException`.|
  • [Enable logging](#enable-and-configure-logging) to verify the credentials being tried, and get further diagnostic information.
  • Consult the troubleshooting guide for underlying credential types for more information.
    • [EnvironmentCredential](#troubleshoot-environmentcredential-authentication-issues)
    • [WorkloadIdentityCredential](#troubleshoot-workloadidentitycredential-authentication-issues)
    • [ManagedIdentityCredential](#troubleshoot-managedidentitycredential-authentication-issues)
    • [VisualStudioCredential](#troubleshoot-visualstudiocredential-authentication-issues)
    • [AzureCliCredential](#troubleshoot-azureclicredential-authentication-issues)
    • [AzurePowerShellCredential](#troubleshoot-azurepowershellcredential-authentication-issues)
    |\n|`RequestFailedException` raised from the client with a status code of 401 or 403|Authentication succeeded but the authorizing Azure service responded with a 401 (Authenticate) or 403 (Forbidden) status code. This error can often be caused by the `DefaultAzureCredential` authenticating an account other than the intended or that the intended account doesn't have the correct permissions or roles assigned.|
    • [Enable logging](#enable-and-configure-logging) to determine which credential in the chain returned the authenticating token.
    • In the case a credential other than the expected is returning a token, bypass this by either signing out of the corresponding development tool, or excluding the credential with the ExcludeXXXCredential property in the `DefaultAzureCredentialOptions`
    • Ensure that the correct role is assigned to the account being used. For example, a service specific role rather than the subscription Owner role.
    |\n|`InvalidOperationException` raised with message: \"Invalid value for environment variable AZURE_TOKEN_CREDENTIALS ...\" | An invalid value was set for the AZURE_TOKEN_CREDENTIALS environment variable | Set the environment variable to one of the following values: dev, prod, VisualStudioCredential, VisualStudioCodeCredential, AzureCliCredential, AzurePowerShellCredential, AzureDeveloperCliCredential, EnvironmentCredential, WorkloadIdentityCredential, ManagedIdentityCredential, InteractiveBrowserCredential, or BrokerAuthenticationCredential. **Note:** VisualStudioCodeCredential and BrokerAuthenticationCredential require that the project include a reference to Azure.Identity.Broker. |", + "repo_full_name": "Azure/azure-sdk-for-net", + "discussion_comments": [ + { + "comment_id": "2159570531", + "repo_full_name": "Azure/azure-sdk-for-net", + "pr_number": 50749, + "pr_file": "sdk/identity/Azure.Identity/TROUBLESHOOTING.md", + "discussion_id": "2159570531", + "commented_code": "@@ -125,6 +125,7 @@ DefaultAzureCredentialOptions options = new\n |---|---|---|\n |`CredentialUnavailableException` raised with message. \"DefaultAzureCredential failed to retrieve a token from the included credentials.\"|All credentials in the `DefaultAzureCredential` chain failed to retrieve a token, each throwing a `CredentialUnavailableException`.|
    • [Enable logging](#enable-and-configure-logging) to verify the credentials being tried, and get further diagnostic information.
    • Consult the troubleshooting guide for underlying credential types for more information.
      • [EnvironmentCredential](#troubleshoot-environmentcredential-authentication-issues)
      • [WorkloadIdentityCredential](#troubleshoot-workloadidentitycredential-authentication-issues)
      • [ManagedIdentityCredential](#troubleshoot-managedidentitycredential-authentication-issues)
      • [VisualStudioCredential](#troubleshoot-visualstudiocredential-authentication-issues)
      • [AzureCliCredential](#troubleshoot-azureclicredential-authentication-issues)
      • [AzurePowerShellCredential](#troubleshoot-azurepowershellcredential-authentication-issues)
      |\n |`RequestFailedException` raised from the client with a status code of 401 or 403|Authentication succeeded but the authorizing Azure service responded with a 401 (Authenticate) or 403 (Forbidden) status code. This error can often be caused by the `DefaultAzureCredential` authenticating an account other than the intended or that the intended account doesn't have the correct permissions or roles assigned.|
      • [Enable logging](#enable-and-configure-logging) to determine which credential in the chain returned the authenticating token.
      • In the case a credential other than the expected is returning a token, bypass this by either signing out of the corresponding development tool, or excluding the credential with the ExcludeXXXCredential property in the `DefaultAzureCredentialOptions`
      • Ensure that the correct role is assigned to the account being used. For example, a service specific role rather than the subscription Owner role.
      |\n+|`InvalidOperationException` raised with message: \"Invalid value for environment variable AZURE_TOKEN_CREDENTIALS ...\" | An invalid value was set for the AZURE_TOKEN_CREDENTIALS environment variable | Set the environment variable to one of the following values: dev, prod, VisualStudioCredential, VisualStudioCodeCredential, AzureCliCredential, AzurePowerShellCredential, AzureDeveloperCliCredential, EnvironmentCredential, WorkloadIdentityCredential, ManagedIdentityCredential, InteractiveBrowserCredential, or BrokerAuthenticationCredential. **Note:** VisualStudioCodeCredential and BrokerAuthenticationCredential require that the project include a reference to Azure.Identity.Broker. |", + "comment_created_at": "2025-06-20T19:39:46+00:00", + "comment_author": "scottaddie", + "comment_body": "I noticed the CHANGELOG entry doesn't mention anything about VS Code credential in the last sentence. Should it? Note that credential is mentioned in the last sentence here.\r\n```suggestion\r\n|`InvalidOperationException` raised with message: \"Invalid value for environment variable AZURE_TOKEN_CREDENTIALS ...\" | An invalid value was set for the AZURE_TOKEN_CREDENTIALS environment variable | Set the environment variable to one of the following values: dev, prod, `VisualStudioCredential`, `VisualStudioCodeCredential`, `AzureCliCredential`, `AzurePowerShellCredential`, `AzureDeveloperCliCredential`, `EnvironmentCredential`, `WorkloadIdentityCredential`, `ManagedIdentityCredential`, `InteractiveBrowserCredential`, or `BrokerAuthenticationCredential`. **Note:** `VisualStudioCodeCredential` and `BrokerAuthenticationCredential` require that the project include a reference to package Azure.Identity.Broker. |\r\n```", + "pr_file_module": null + }, + { + "comment_id": "2160166052", + "repo_full_name": "Azure/azure-sdk-for-net", + "pr_number": 50749, + "pr_file": "sdk/identity/Azure.Identity/TROUBLESHOOTING.md", + "discussion_id": "2159570531", + "commented_code": "@@ -125,6 +125,7 @@ DefaultAzureCredentialOptions options = new\n |---|---|---|\n |`CredentialUnavailableException` raised with message. \"DefaultAzureCredential failed to retrieve a token from the included credentials.\"|All credentials in the `DefaultAzureCredential` chain failed to retrieve a token, each throwing a `CredentialUnavailableException`.|
      • [Enable logging](#enable-and-configure-logging) to verify the credentials being tried, and get further diagnostic information.
      • Consult the troubleshooting guide for underlying credential types for more information.
        • [EnvironmentCredential](#troubleshoot-environmentcredential-authentication-issues)
        • [WorkloadIdentityCredential](#troubleshoot-workloadidentitycredential-authentication-issues)
        • [ManagedIdentityCredential](#troubleshoot-managedidentitycredential-authentication-issues)
        • [VisualStudioCredential](#troubleshoot-visualstudiocredential-authentication-issues)
        • [AzureCliCredential](#troubleshoot-azureclicredential-authentication-issues)
        • [AzurePowerShellCredential](#troubleshoot-azurepowershellcredential-authentication-issues)
        |\n |`RequestFailedException` raised from the client with a status code of 401 or 403|Authentication succeeded but the authorizing Azure service responded with a 401 (Authenticate) or 403 (Forbidden) status code. This error can often be caused by the `DefaultAzureCredential` authenticating an account other than the intended or that the intended account doesn't have the correct permissions or roles assigned.|
        • [Enable logging](#enable-and-configure-logging) to determine which credential in the chain returned the authenticating token.
        • In the case a credential other than the expected is returning a token, bypass this by either signing out of the corresponding development tool, or excluding the credential with the ExcludeXXXCredential property in the `DefaultAzureCredentialOptions`
        • Ensure that the correct role is assigned to the account being used. For example, a service specific role rather than the subscription Owner role.
        |\n+|`InvalidOperationException` raised with message: \"Invalid value for environment variable AZURE_TOKEN_CREDENTIALS ...\" | An invalid value was set for the AZURE_TOKEN_CREDENTIALS environment variable | Set the environment variable to one of the following values: dev, prod, VisualStudioCredential, VisualStudioCodeCredential, AzureCliCredential, AzurePowerShellCredential, AzureDeveloperCliCredential, EnvironmentCredential, WorkloadIdentityCredential, ManagedIdentityCredential, InteractiveBrowserCredential, or BrokerAuthenticationCredential. **Note:** VisualStudioCodeCredential and BrokerAuthenticationCredential require that the project include a reference to Azure.Identity.Broker. |", + "comment_created_at": "2025-06-21T21:58:31+00:00", + "comment_author": "christothes", + "comment_body": "I meant to exclude it until the next PR.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2138876254", + "pr_number": 50531, + "pr_file": "README.md", + "created_at": "2025-06-10T22:40:31+00:00", + "commented_code": "Telemetry collection is on by default.\n\nTo opt out, you can disable telemetry at client construction. Creating a [synchronous policy](https://github.com/Azure/azure-sdk-for-net/blob/main/sdk/core/Azure.Core/samples/Pipeline.md#implementing-a-synchronous-policy) in your application gives you access to the headers sent with each request, allowing you to remove the `User-Agent` header containing client telemetry. To use the policy, you will add it to the options for your specific client as part of client creation. This will disable Azure SDK telemetry for all methods in the client. You will need to register the policy with each client created.\nTo opt out, you can disable telemetry by setting the `IsTelemetryEnabled` property to `false` in the client's `DiagnosticsOptions`. This will prevent the Azure SDK from adding telemetry information to the `User-Agent` header for all requests made by the client.\n\n> NOTE: `HttpClient` may set default user agent headers as part of the .NET platform behavior. This value does not contain any Azure SDK telemetry information.\n\nAn example policy implementation looks like:\n```C# Snippet:RemoveUserAgentPolicy\npublic class RemoveUserAgentPolicy : HttpPipelineSynchronousPolicy\n{\n public override void OnSendingRequest(HttpMessage message)\n {\n message.Request.Headers.Remove(HttpHeader.Names.UserAgent);\n }\n}\n```\n\nTo use it with a client, you would register it to run for every retry attempt as part of your client options:\n```C# Snippet:RemoveUserAgentPolicyUse\nYou can disable telemetry when creating a client by setting the `IsTelemetryEnabled` property in the diagnostics options:\n```C#\nUri serviceEndpoint = new Uri(\"https://example.contoso.com\");\nTokenCredential credential = new DefaultAzureCredential();\n \nSampleClientOptions clientOptions = new SampleClientOptions();\nclientOptions.AddPolicy(new RemoveUserAgentPolicy(), HttpPipelinePosition.PerRetry);\n\nSampleClientOptions clientOptions = new SampleClientOptions()\n{\n Diagnostics = { IsTelemetryEnabled = false }\n};\n\nSampleClient client = new SampleClient(serviceEndpoint, credential, clientOptions);\n```\n\nAlternatively, you can disable telemetry globally by setting the `AZURE_TELEMETRY_DISABLED` environment variable to `true` before creating any clients.", + "repo_full_name": "Azure/azure-sdk-for-net", + "discussion_comments": [ + { + "comment_id": "2138876254", + "repo_full_name": "Azure/azure-sdk-for-net", + "pr_number": 50531, + "pr_file": "README.md", + "discussion_id": "2138876254", + "commented_code": "@@ -60,32 +60,25 @@ The software may collect information about you and your use of the software and\n \n Telemetry collection is on by default.\n \n-To opt out, you can disable telemetry at client construction. Creating a [synchronous policy](https://github.com/Azure/azure-sdk-for-net/blob/main/sdk/core/Azure.Core/samples/Pipeline.md#implementing-a-synchronous-policy) in your application gives you access to the headers sent with each request, allowing you to remove the `User-Agent` header containing client telemetry. To use the policy, you will add it to the options for your specific client as part of client creation. This will disable Azure SDK telemetry for all methods in the client. You will need to register the policy with each client created.\n+To opt out, you can disable telemetry by setting the `IsTelemetryEnabled` property to `false` in the client's `DiagnosticsOptions`. This will prevent the Azure SDK from adding telemetry information to the `User-Agent` header for all requests made by the client.\n \n > NOTE: `HttpClient` may set default user agent headers as part of the .NET platform behavior. This value does not contain any Azure SDK telemetry information.\n \n-An example policy implementation looks like:\n-```C# Snippet:RemoveUserAgentPolicy\n-public class RemoveUserAgentPolicy : HttpPipelineSynchronousPolicy\n-{\n- public override void OnSendingRequest(HttpMessage message)\n- {\n- message.Request.Headers.Remove(HttpHeader.Names.UserAgent);\n- }\n-}\n-```\n-\n-To use it with a client, you would register it to run for every retry attempt as part of your client options:\n-```C# Snippet:RemoveUserAgentPolicyUse\n+You can disable telemetry when creating a client by setting the `IsTelemetryEnabled` property in the diagnostics options:\n+```C#\n Uri serviceEndpoint = new Uri(\"https://example.contoso.com\");\n TokenCredential credential = new DefaultAzureCredential();\n- \n-SampleClientOptions clientOptions = new SampleClientOptions();\n-clientOptions.AddPolicy(new RemoveUserAgentPolicy(), HttpPipelinePosition.PerRetry);\n+\n+SampleClientOptions clientOptions = new SampleClientOptions()\n+{\n+ Diagnostics = { IsTelemetryEnabled = false }\n+};\n \n SampleClient client = new SampleClient(serviceEndpoint, credential, clientOptions);\n ```\n \n+Alternatively, you can disable telemetry globally by setting the `AZURE_TELEMETRY_DISABLED` environment variable to `true` before creating any clients.", + "comment_created_at": "2025-06-10T22:40:31+00:00", + "comment_author": "KrzysztofCwalina", + "comment_body": "is that really true? @copilot, where did you find out about this?", + "pr_file_module": null + }, + { + "comment_id": "2138883720", + "repo_full_name": "Azure/azure-sdk-for-net", + "pr_number": 50531, + "pr_file": "README.md", + "discussion_id": "2138876254", + "commented_code": "@@ -60,32 +60,25 @@ The software may collect information about you and your use of the software and\n \n Telemetry collection is on by default.\n \n-To opt out, you can disable telemetry at client construction. Creating a [synchronous policy](https://github.com/Azure/azure-sdk-for-net/blob/main/sdk/core/Azure.Core/samples/Pipeline.md#implementing-a-synchronous-policy) in your application gives you access to the headers sent with each request, allowing you to remove the `User-Agent` header containing client telemetry. To use the policy, you will add it to the options for your specific client as part of client creation. This will disable Azure SDK telemetry for all methods in the client. You will need to register the policy with each client created.\n+To opt out, you can disable telemetry by setting the `IsTelemetryEnabled` property to `false` in the client's `DiagnosticsOptions`. This will prevent the Azure SDK from adding telemetry information to the `User-Agent` header for all requests made by the client.\n \n > NOTE: `HttpClient` may set default user agent headers as part of the .NET platform behavior. This value does not contain any Azure SDK telemetry information.\n \n-An example policy implementation looks like:\n-```C# Snippet:RemoveUserAgentPolicy\n-public class RemoveUserAgentPolicy : HttpPipelineSynchronousPolicy\n-{\n- public override void OnSendingRequest(HttpMessage message)\n- {\n- message.Request.Headers.Remove(HttpHeader.Names.UserAgent);\n- }\n-}\n-```\n-\n-To use it with a client, you would register it to run for every retry attempt as part of your client options:\n-```C# Snippet:RemoveUserAgentPolicyUse\n+You can disable telemetry when creating a client by setting the `IsTelemetryEnabled` property in the diagnostics options:\n+```C#\n Uri serviceEndpoint = new Uri(\"https://example.contoso.com\");\n TokenCredential credential = new DefaultAzureCredential();\n- \n-SampleClientOptions clientOptions = new SampleClientOptions();\n-clientOptions.AddPolicy(new RemoveUserAgentPolicy(), HttpPipelinePosition.PerRetry);\n+\n+SampleClientOptions clientOptions = new SampleClientOptions()\n+{\n+ Diagnostics = { IsTelemetryEnabled = false }\n+};\n \n SampleClient client = new SampleClient(serviceEndpoint, credential, clientOptions);\n ```\n \n+Alternatively, you can disable telemetry globally by setting the `AZURE_TELEMETRY_DISABLED` environment variable to `true` before creating any clients.", + "comment_created_at": "2025-06-10T22:45:13+00:00", + "comment_author": "Copilot", + "comment_body": "Yes, this is accurate. The `AZURE_TELEMETRY_DISABLED` environment variable support is implemented in the Azure.Core source code:\n\n1. **DiagnosticsOptions.cs:75** - `IsTelemetryEnabled = !EnvironmentVariableToBool(Environment.GetEnvironmentVariable(\"AZURE_TELEMETRY_DISABLED\")) ?? true;`\n2. **DiagnosticsOptions.cs:93-95** - XML documentation explicitly states: \"The default value can be controlled process wide by setting `AZURE_TELEMETRY_DISABLED` to `true`, `false`, `1` or `0`.\"\n3. **ClientOptionsTests.cs** - Contains unit test `CanDisableTelemetryWithEnvironmentVariable` that verifies this functionality\n\nThe environment variable is read during DiagnosticsOptions initialization and affects the default value of `IsTelemetryEnabled` for all clients.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2127558101", + "pr_number": 50436, + "pr_file": "sdk/extensions/Microsoft.Extensions.Azure/CHANGELOG.md", + "created_at": "2025-06-04T22:46:49+00:00", + "commented_code": "### Features Added\n\n- Added support for [federated managed identity](https://learn.microsoft.com/entra/workload-id/workload-identity-federation-config-app-trust-managed-identity?tabs=microsoft-entra-admin-center#azureidentity) support in the client factory by specifying configuration item `credential` as \"managedidentityasfederatedidentity\" and providing the following named configuration items:\n\n - `tenantId` : The tenant where the target resource was created\n - `clientId` : The client identifier for the application, which must be granted access on the target resource\n - `managedIdentityClientId` : The managed identity which you configured as a Federated Identity Credential (FIC)\n - `federatedAudience`: One of the following token audiences, specific to the cloud that you're running in:", + "repo_full_name": "Azure/azure-sdk-for-net", + "discussion_comments": [ + { + "comment_id": "2127558101", + "repo_full_name": "Azure/azure-sdk-for-net", + "pr_number": 50436, + "pr_file": "sdk/extensions/Microsoft.Extensions.Azure/CHANGELOG.md", + "discussion_id": "2127558101", + "commented_code": "@@ -4,6 +4,16 @@\n \n ### Features Added\n \n+- Added support for [federated managed identity](https://learn.microsoft.com/entra/workload-id/workload-identity-federation-config-app-trust-managed-identity?tabs=microsoft-entra-admin-center#azureidentity) support in the client factory by specifying configuration item `credential` as \"managedidentityasfederatedidentity\" and providing the following named configuration items:\n+\n+ - `tenantId` : The tenant where the target resource was created\n+ - `clientId` : The client identifier for the application, which must be granted access on the target resource\n+ - `managedIdentityClientId` : The managed identity which you configured as a Federated Identity Credential (FIC)\n+ - `federatedAudience`: One of the following token audiences, specific to the cloud that you're running in:", + "comment_created_at": "2025-06-04T22:46:49+00:00", + "comment_author": "christothes", + "comment_body": "Should we consider making this value something like `AudienceCloud` and accept values like `global` (default), `usgov`, `china` so that users don't need to remember the magic audience strings?", + "pr_file_module": null + }, + { + "comment_id": "2127560619", + "repo_full_name": "Azure/azure-sdk-for-net", + "pr_number": 50436, + "pr_file": "sdk/extensions/Microsoft.Extensions.Azure/CHANGELOG.md", + "discussion_id": "2127558101", + "commented_code": "@@ -4,6 +4,16 @@\n \n ### Features Added\n \n+- Added support for [federated managed identity](https://learn.microsoft.com/entra/workload-id/workload-identity-federation-config-app-trust-managed-identity?tabs=microsoft-entra-admin-center#azureidentity) support in the client factory by specifying configuration item `credential` as \"managedidentityasfederatedidentity\" and providing the following named configuration items:\n+\n+ - `tenantId` : The tenant where the target resource was created\n+ - `clientId` : The client identifier for the application, which must be granted access on the target resource\n+ - `managedIdentityClientId` : The managed identity which you configured as a Federated Identity Credential (FIC)\n+ - `federatedAudience`: One of the following token audiences, specific to the cloud that you're running in:", + "comment_created_at": "2025-06-04T22:49:01+00:00", + "comment_author": "jsquire", + "comment_body": "I'm good with that, but its @scottaddie's call. I followed the pattern that he asked for and I'm not sure if there are any wider considerations.", + "pr_file_module": null + }, + { + "comment_id": "2130246074", + "repo_full_name": "Azure/azure-sdk-for-net", + "pr_number": 50436, + "pr_file": "sdk/extensions/Microsoft.Extensions.Azure/CHANGELOG.md", + "discussion_id": "2127558101", + "commented_code": "@@ -4,6 +4,16 @@\n \n ### Features Added\n \n+- Added support for [federated managed identity](https://learn.microsoft.com/entra/workload-id/workload-identity-federation-config-app-trust-managed-identity?tabs=microsoft-entra-admin-center#azureidentity) support in the client factory by specifying configuration item `credential` as \"managedidentityasfederatedidentity\" and providing the following named configuration items:\n+\n+ - `tenantId` : The tenant where the target resource was created\n+ - `clientId` : The client identifier for the application, which must be granted access on the target resource\n+ - `managedIdentityClientId` : The managed identity which you configured as a Federated Identity Credential (FIC)\n+ - `federatedAudience`: One of the following token audiences, specific to the cloud that you're running in:", + "comment_created_at": "2025-06-05T20:30:31+00:00", + "comment_author": "scottaddie", + "comment_body": "+1 to Chris' suggestion. Most customers won't know this magic audience string.", + "pr_file_module": null + }, + { + "comment_id": "2132973364", + "repo_full_name": "Azure/azure-sdk-for-net", + "pr_number": 50436, + "pr_file": "sdk/extensions/Microsoft.Extensions.Azure/CHANGELOG.md", + "discussion_id": "2127558101", + "commented_code": "@@ -4,6 +4,16 @@\n \n ### Features Added\n \n+- Added support for [federated managed identity](https://learn.microsoft.com/entra/workload-id/workload-identity-federation-config-app-trust-managed-identity?tabs=microsoft-entra-admin-center#azureidentity) support in the client factory by specifying configuration item `credential` as \"managedidentityasfederatedidentity\" and providing the following named configuration items:\n+\n+ - `tenantId` : The tenant where the target resource was created\n+ - `clientId` : The client identifier for the application, which must be granted access on the target resource\n+ - `managedIdentityClientId` : The managed identity which you configured as a Federated Identity Credential (FIC)\n+ - `federatedAudience`: One of the following token audiences, specific to the cloud that you're running in:", + "comment_created_at": "2025-06-06T22:27:09+00:00", + "comment_author": "jsquire", + "comment_body": "Changed to `azureCloud`", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/azure-sdk-for-net-user-friendly-configuration-values.md b/_reviewers/azure-sdk-for-net-user-friendly-configuration-values.md index 25e7393..d8f7045 100644 --- a/_reviewers/azure-sdk-for-net-user-friendly-configuration-values.md +++ b/_reviewers/azure-sdk-for-net-user-friendly-configuration-values.md @@ -36,155 +36,3 @@ clientOptions.Diagnostics = { IsTelemetryEnabled = false }; ``` For environment variables, document all accepted values and include examples showing both the variable name and possible values. - - -[ - { - "discussion_id": "2159569308", - "pr_number": 50749, - "pr_file": "sdk/identity/Azure.Identity/CHANGELOG.md", - "created_at": "2025-06-20T19:38:26+00:00", - "commented_code": "## 1.15.0-beta.1 (Unreleased)\n\n### Features Added\n- Added support for the `AZURE_TOKEN_CREDENTIALS` environment variable to `DefaultAzureCredential`, which allows for additional credential selection options. The valid values now include any of the credential names available in the default chain (VisualStudioCredential, VisualStudioCodeCredential, AzureCliCredential, AzurePowerShellCredential, AzureDeveloperCliCredential, EnvironmentCredential, WorkloadIdentityCredential, ManagedIdentityCredential, InteractiveBrowserCredential, or BrokerAuthenticationCredential.) **Note:** BrokerAuthenticationCredential require that the project include a reference to Azure.Identity.Broker.", - "repo_full_name": "Azure/azure-sdk-for-net", - "discussion_comments": [ - { - "comment_id": "2159569308", - "repo_full_name": "Azure/azure-sdk-for-net", - "pr_number": 50749, - "pr_file": "sdk/identity/Azure.Identity/CHANGELOG.md", - "discussion_id": "2159569308", - "commented_code": "@@ -3,6 +3,7 @@\n ## 1.15.0-beta.1 (Unreleased)\n \n ### Features Added\n+- Added support for the `AZURE_TOKEN_CREDENTIALS` environment variable to `DefaultAzureCredential`, which allows for additional credential selection options. The valid values now include any of the credential names available in the default chain (VisualStudioCredential, VisualStudioCodeCredential, AzureCliCredential, AzurePowerShellCredential, AzureDeveloperCliCredential, EnvironmentCredential, WorkloadIdentityCredential, ManagedIdentityCredential, InteractiveBrowserCredential, or BrokerAuthenticationCredential.) **Note:** BrokerAuthenticationCredential require that the project include a reference to Azure.Identity.Broker.", - "comment_created_at": "2025-06-20T19:38:26+00:00", - "comment_author": "scottaddie", - "comment_body": "```suggestion\r\n- Expanded the set of acceptable values for environment variable `AZURE_TOKEN_CREDENTIALS` to allow for selection of a specific credential in the `DefaultAzureCredential` chain. The valid values now include any of the credential names available in the default chain (`VisualStudioCredential`, `VisualStudioCodeCredential`, `AzureCliCredential`, `AzurePowerShellCredential`, `AzureDeveloperCliCredential`, `EnvironmentCredential`, `WorkloadIdentityCredential`, `ManagedIdentityCredential`, `InteractiveBrowserCredential`, or `BrokerAuthenticationCredential`.) **Note:** BrokerAuthenticationCredential requires that the project include a reference to package Azure.Identity.Broker.\r\n```", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2159570531", - "pr_number": 50749, - "pr_file": "sdk/identity/Azure.Identity/TROUBLESHOOTING.md", - "created_at": "2025-06-20T19:39:46+00:00", - "commented_code": "|---|---|---|\n|`CredentialUnavailableException` raised with message. \"DefaultAzureCredential failed to retrieve a token from the included credentials.\"|All credentials in the `DefaultAzureCredential` chain failed to retrieve a token, each throwing a `CredentialUnavailableException`.|
        • [Enable logging](#enable-and-configure-logging) to verify the credentials being tried, and get further diagnostic information.
        • Consult the troubleshooting guide for underlying credential types for more information.
          • [EnvironmentCredential](#troubleshoot-environmentcredential-authentication-issues)
          • [WorkloadIdentityCredential](#troubleshoot-workloadidentitycredential-authentication-issues)
          • [ManagedIdentityCredential](#troubleshoot-managedidentitycredential-authentication-issues)
          • [VisualStudioCredential](#troubleshoot-visualstudiocredential-authentication-issues)
          • [AzureCliCredential](#troubleshoot-azureclicredential-authentication-issues)
          • [AzurePowerShellCredential](#troubleshoot-azurepowershellcredential-authentication-issues)
          |\n|`RequestFailedException` raised from the client with a status code of 401 or 403|Authentication succeeded but the authorizing Azure service responded with a 401 (Authenticate) or 403 (Forbidden) status code. This error can often be caused by the `DefaultAzureCredential` authenticating an account other than the intended or that the intended account doesn't have the correct permissions or roles assigned.|
          • [Enable logging](#enable-and-configure-logging) to determine which credential in the chain returned the authenticating token.
          • In the case a credential other than the expected is returning a token, bypass this by either signing out of the corresponding development tool, or excluding the credential with the ExcludeXXXCredential property in the `DefaultAzureCredentialOptions`
          • Ensure that the correct role is assigned to the account being used. For example, a service specific role rather than the subscription Owner role.
          |\n|`InvalidOperationException` raised with message: \"Invalid value for environment variable AZURE_TOKEN_CREDENTIALS ...\" | An invalid value was set for the AZURE_TOKEN_CREDENTIALS environment variable | Set the environment variable to one of the following values: dev, prod, VisualStudioCredential, VisualStudioCodeCredential, AzureCliCredential, AzurePowerShellCredential, AzureDeveloperCliCredential, EnvironmentCredential, WorkloadIdentityCredential, ManagedIdentityCredential, InteractiveBrowserCredential, or BrokerAuthenticationCredential. **Note:** VisualStudioCodeCredential and BrokerAuthenticationCredential require that the project include a reference to Azure.Identity.Broker. |", - "repo_full_name": "Azure/azure-sdk-for-net", - "discussion_comments": [ - { - "comment_id": "2159570531", - "repo_full_name": "Azure/azure-sdk-for-net", - "pr_number": 50749, - "pr_file": "sdk/identity/Azure.Identity/TROUBLESHOOTING.md", - "discussion_id": "2159570531", - "commented_code": "@@ -125,6 +125,7 @@ DefaultAzureCredentialOptions options = new\n |---|---|---|\n |`CredentialUnavailableException` raised with message. \"DefaultAzureCredential failed to retrieve a token from the included credentials.\"|All credentials in the `DefaultAzureCredential` chain failed to retrieve a token, each throwing a `CredentialUnavailableException`.|
          • [Enable logging](#enable-and-configure-logging) to verify the credentials being tried, and get further diagnostic information.
          • Consult the troubleshooting guide for underlying credential types for more information.
            • [EnvironmentCredential](#troubleshoot-environmentcredential-authentication-issues)
            • [WorkloadIdentityCredential](#troubleshoot-workloadidentitycredential-authentication-issues)
            • [ManagedIdentityCredential](#troubleshoot-managedidentitycredential-authentication-issues)
            • [VisualStudioCredential](#troubleshoot-visualstudiocredential-authentication-issues)
            • [AzureCliCredential](#troubleshoot-azureclicredential-authentication-issues)
            • [AzurePowerShellCredential](#troubleshoot-azurepowershellcredential-authentication-issues)
            |\n |`RequestFailedException` raised from the client with a status code of 401 or 403|Authentication succeeded but the authorizing Azure service responded with a 401 (Authenticate) or 403 (Forbidden) status code. This error can often be caused by the `DefaultAzureCredential` authenticating an account other than the intended or that the intended account doesn't have the correct permissions or roles assigned.|
            • [Enable logging](#enable-and-configure-logging) to determine which credential in the chain returned the authenticating token.
            • In the case a credential other than the expected is returning a token, bypass this by either signing out of the corresponding development tool, or excluding the credential with the ExcludeXXXCredential property in the `DefaultAzureCredentialOptions`
            • Ensure that the correct role is assigned to the account being used. For example, a service specific role rather than the subscription Owner role.
            |\n+|`InvalidOperationException` raised with message: \"Invalid value for environment variable AZURE_TOKEN_CREDENTIALS ...\" | An invalid value was set for the AZURE_TOKEN_CREDENTIALS environment variable | Set the environment variable to one of the following values: dev, prod, VisualStudioCredential, VisualStudioCodeCredential, AzureCliCredential, AzurePowerShellCredential, AzureDeveloperCliCredential, EnvironmentCredential, WorkloadIdentityCredential, ManagedIdentityCredential, InteractiveBrowserCredential, or BrokerAuthenticationCredential. **Note:** VisualStudioCodeCredential and BrokerAuthenticationCredential require that the project include a reference to Azure.Identity.Broker. |", - "comment_created_at": "2025-06-20T19:39:46+00:00", - "comment_author": "scottaddie", - "comment_body": "I noticed the CHANGELOG entry doesn't mention anything about VS Code credential in the last sentence. Should it? Note that credential is mentioned in the last sentence here.\r\n```suggestion\r\n|`InvalidOperationException` raised with message: \"Invalid value for environment variable AZURE_TOKEN_CREDENTIALS ...\" | An invalid value was set for the AZURE_TOKEN_CREDENTIALS environment variable | Set the environment variable to one of the following values: dev, prod, `VisualStudioCredential`, `VisualStudioCodeCredential`, `AzureCliCredential`, `AzurePowerShellCredential`, `AzureDeveloperCliCredential`, `EnvironmentCredential`, `WorkloadIdentityCredential`, `ManagedIdentityCredential`, `InteractiveBrowserCredential`, or `BrokerAuthenticationCredential`. **Note:** `VisualStudioCodeCredential` and `BrokerAuthenticationCredential` require that the project include a reference to package Azure.Identity.Broker. |\r\n```", - "pr_file_module": null - }, - { - "comment_id": "2160166052", - "repo_full_name": "Azure/azure-sdk-for-net", - "pr_number": 50749, - "pr_file": "sdk/identity/Azure.Identity/TROUBLESHOOTING.md", - "discussion_id": "2159570531", - "commented_code": "@@ -125,6 +125,7 @@ DefaultAzureCredentialOptions options = new\n |---|---|---|\n |`CredentialUnavailableException` raised with message. \"DefaultAzureCredential failed to retrieve a token from the included credentials.\"|All credentials in the `DefaultAzureCredential` chain failed to retrieve a token, each throwing a `CredentialUnavailableException`.|
            • [Enable logging](#enable-and-configure-logging) to verify the credentials being tried, and get further diagnostic information.
            • Consult the troubleshooting guide for underlying credential types for more information.
              • [EnvironmentCredential](#troubleshoot-environmentcredential-authentication-issues)
              • [WorkloadIdentityCredential](#troubleshoot-workloadidentitycredential-authentication-issues)
              • [ManagedIdentityCredential](#troubleshoot-managedidentitycredential-authentication-issues)
              • [VisualStudioCredential](#troubleshoot-visualstudiocredential-authentication-issues)
              • [AzureCliCredential](#troubleshoot-azureclicredential-authentication-issues)
              • [AzurePowerShellCredential](#troubleshoot-azurepowershellcredential-authentication-issues)
              |\n |`RequestFailedException` raised from the client with a status code of 401 or 403|Authentication succeeded but the authorizing Azure service responded with a 401 (Authenticate) or 403 (Forbidden) status code. This error can often be caused by the `DefaultAzureCredential` authenticating an account other than the intended or that the intended account doesn't have the correct permissions or roles assigned.|
              • [Enable logging](#enable-and-configure-logging) to determine which credential in the chain returned the authenticating token.
              • In the case a credential other than the expected is returning a token, bypass this by either signing out of the corresponding development tool, or excluding the credential with the ExcludeXXXCredential property in the `DefaultAzureCredentialOptions`
              • Ensure that the correct role is assigned to the account being used. For example, a service specific role rather than the subscription Owner role.
              |\n+|`InvalidOperationException` raised with message: \"Invalid value for environment variable AZURE_TOKEN_CREDENTIALS ...\" | An invalid value was set for the AZURE_TOKEN_CREDENTIALS environment variable | Set the environment variable to one of the following values: dev, prod, VisualStudioCredential, VisualStudioCodeCredential, AzureCliCredential, AzurePowerShellCredential, AzureDeveloperCliCredential, EnvironmentCredential, WorkloadIdentityCredential, ManagedIdentityCredential, InteractiveBrowserCredential, or BrokerAuthenticationCredential. **Note:** VisualStudioCodeCredential and BrokerAuthenticationCredential require that the project include a reference to Azure.Identity.Broker. |", - "comment_created_at": "2025-06-21T21:58:31+00:00", - "comment_author": "christothes", - "comment_body": "I meant to exclude it until the next PR.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2138876254", - "pr_number": 50531, - "pr_file": "README.md", - "created_at": "2025-06-10T22:40:31+00:00", - "commented_code": "Telemetry collection is on by default.\n\nTo opt out, you can disable telemetry at client construction. Creating a [synchronous policy](https://github.com/Azure/azure-sdk-for-net/blob/main/sdk/core/Azure.Core/samples/Pipeline.md#implementing-a-synchronous-policy) in your application gives you access to the headers sent with each request, allowing you to remove the `User-Agent` header containing client telemetry. To use the policy, you will add it to the options for your specific client as part of client creation. This will disable Azure SDK telemetry for all methods in the client. You will need to register the policy with each client created.\nTo opt out, you can disable telemetry by setting the `IsTelemetryEnabled` property to `false` in the client's `DiagnosticsOptions`. This will prevent the Azure SDK from adding telemetry information to the `User-Agent` header for all requests made by the client.\n\n> NOTE: `HttpClient` may set default user agent headers as part of the .NET platform behavior. This value does not contain any Azure SDK telemetry information.\n\nAn example policy implementation looks like:\n```C# Snippet:RemoveUserAgentPolicy\npublic class RemoveUserAgentPolicy : HttpPipelineSynchronousPolicy\n{\n public override void OnSendingRequest(HttpMessage message)\n {\n message.Request.Headers.Remove(HttpHeader.Names.UserAgent);\n }\n}\n```\n\nTo use it with a client, you would register it to run for every retry attempt as part of your client options:\n```C# Snippet:RemoveUserAgentPolicyUse\nYou can disable telemetry when creating a client by setting the `IsTelemetryEnabled` property in the diagnostics options:\n```C#\nUri serviceEndpoint = new Uri(\"https://example.contoso.com\");\nTokenCredential credential = new DefaultAzureCredential();\n \nSampleClientOptions clientOptions = new SampleClientOptions();\nclientOptions.AddPolicy(new RemoveUserAgentPolicy(), HttpPipelinePosition.PerRetry);\n\nSampleClientOptions clientOptions = new SampleClientOptions()\n{\n Diagnostics = { IsTelemetryEnabled = false }\n};\n\nSampleClient client = new SampleClient(serviceEndpoint, credential, clientOptions);\n```\n\nAlternatively, you can disable telemetry globally by setting the `AZURE_TELEMETRY_DISABLED` environment variable to `true` before creating any clients.", - "repo_full_name": "Azure/azure-sdk-for-net", - "discussion_comments": [ - { - "comment_id": "2138876254", - "repo_full_name": "Azure/azure-sdk-for-net", - "pr_number": 50531, - "pr_file": "README.md", - "discussion_id": "2138876254", - "commented_code": "@@ -60,32 +60,25 @@ The software may collect information about you and your use of the software and\n \n Telemetry collection is on by default.\n \n-To opt out, you can disable telemetry at client construction. Creating a [synchronous policy](https://github.com/Azure/azure-sdk-for-net/blob/main/sdk/core/Azure.Core/samples/Pipeline.md#implementing-a-synchronous-policy) in your application gives you access to the headers sent with each request, allowing you to remove the `User-Agent` header containing client telemetry. To use the policy, you will add it to the options for your specific client as part of client creation. This will disable Azure SDK telemetry for all methods in the client. You will need to register the policy with each client created.\n+To opt out, you can disable telemetry by setting the `IsTelemetryEnabled` property to `false` in the client's `DiagnosticsOptions`. This will prevent the Azure SDK from adding telemetry information to the `User-Agent` header for all requests made by the client.\n \n > NOTE: `HttpClient` may set default user agent headers as part of the .NET platform behavior. This value does not contain any Azure SDK telemetry information.\n \n-An example policy implementation looks like:\n-```C# Snippet:RemoveUserAgentPolicy\n-public class RemoveUserAgentPolicy : HttpPipelineSynchronousPolicy\n-{\n- public override void OnSendingRequest(HttpMessage message)\n- {\n- message.Request.Headers.Remove(HttpHeader.Names.UserAgent);\n- }\n-}\n-```\n-\n-To use it with a client, you would register it to run for every retry attempt as part of your client options:\n-```C# Snippet:RemoveUserAgentPolicyUse\n+You can disable telemetry when creating a client by setting the `IsTelemetryEnabled` property in the diagnostics options:\n+```C#\n Uri serviceEndpoint = new Uri(\"https://example.contoso.com\");\n TokenCredential credential = new DefaultAzureCredential();\n- \n-SampleClientOptions clientOptions = new SampleClientOptions();\n-clientOptions.AddPolicy(new RemoveUserAgentPolicy(), HttpPipelinePosition.PerRetry);\n+\n+SampleClientOptions clientOptions = new SampleClientOptions()\n+{\n+ Diagnostics = { IsTelemetryEnabled = false }\n+};\n \n SampleClient client = new SampleClient(serviceEndpoint, credential, clientOptions);\n ```\n \n+Alternatively, you can disable telemetry globally by setting the `AZURE_TELEMETRY_DISABLED` environment variable to `true` before creating any clients.", - "comment_created_at": "2025-06-10T22:40:31+00:00", - "comment_author": "KrzysztofCwalina", - "comment_body": "is that really true? @copilot, where did you find out about this?", - "pr_file_module": null - }, - { - "comment_id": "2138883720", - "repo_full_name": "Azure/azure-sdk-for-net", - "pr_number": 50531, - "pr_file": "README.md", - "discussion_id": "2138876254", - "commented_code": "@@ -60,32 +60,25 @@ The software may collect information about you and your use of the software and\n \n Telemetry collection is on by default.\n \n-To opt out, you can disable telemetry at client construction. Creating a [synchronous policy](https://github.com/Azure/azure-sdk-for-net/blob/main/sdk/core/Azure.Core/samples/Pipeline.md#implementing-a-synchronous-policy) in your application gives you access to the headers sent with each request, allowing you to remove the `User-Agent` header containing client telemetry. To use the policy, you will add it to the options for your specific client as part of client creation. This will disable Azure SDK telemetry for all methods in the client. You will need to register the policy with each client created.\n+To opt out, you can disable telemetry by setting the `IsTelemetryEnabled` property to `false` in the client's `DiagnosticsOptions`. This will prevent the Azure SDK from adding telemetry information to the `User-Agent` header for all requests made by the client.\n \n > NOTE: `HttpClient` may set default user agent headers as part of the .NET platform behavior. This value does not contain any Azure SDK telemetry information.\n \n-An example policy implementation looks like:\n-```C# Snippet:RemoveUserAgentPolicy\n-public class RemoveUserAgentPolicy : HttpPipelineSynchronousPolicy\n-{\n- public override void OnSendingRequest(HttpMessage message)\n- {\n- message.Request.Headers.Remove(HttpHeader.Names.UserAgent);\n- }\n-}\n-```\n-\n-To use it with a client, you would register it to run for every retry attempt as part of your client options:\n-```C# Snippet:RemoveUserAgentPolicyUse\n+You can disable telemetry when creating a client by setting the `IsTelemetryEnabled` property in the diagnostics options:\n+```C#\n Uri serviceEndpoint = new Uri(\"https://example.contoso.com\");\n TokenCredential credential = new DefaultAzureCredential();\n- \n-SampleClientOptions clientOptions = new SampleClientOptions();\n-clientOptions.AddPolicy(new RemoveUserAgentPolicy(), HttpPipelinePosition.PerRetry);\n+\n+SampleClientOptions clientOptions = new SampleClientOptions()\n+{\n+ Diagnostics = { IsTelemetryEnabled = false }\n+};\n \n SampleClient client = new SampleClient(serviceEndpoint, credential, clientOptions);\n ```\n \n+Alternatively, you can disable telemetry globally by setting the `AZURE_TELEMETRY_DISABLED` environment variable to `true` before creating any clients.", - "comment_created_at": "2025-06-10T22:45:13+00:00", - "comment_author": "Copilot", - "comment_body": "Yes, this is accurate. The `AZURE_TELEMETRY_DISABLED` environment variable support is implemented in the Azure.Core source code:\n\n1. **DiagnosticsOptions.cs:75** - `IsTelemetryEnabled = !EnvironmentVariableToBool(Environment.GetEnvironmentVariable(\"AZURE_TELEMETRY_DISABLED\")) ?? true;`\n2. **DiagnosticsOptions.cs:93-95** - XML documentation explicitly states: \"The default value can be controlled process wide by setting `AZURE_TELEMETRY_DISABLED` to `true`, `false`, `1` or `0`.\"\n3. **ClientOptionsTests.cs** - Contains unit test `CanDisableTelemetryWithEnvironmentVariable` that verifies this functionality\n\nThe environment variable is read during DiagnosticsOptions initialization and affects the default value of `IsTelemetryEnabled` for all clients.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2127558101", - "pr_number": 50436, - "pr_file": "sdk/extensions/Microsoft.Extensions.Azure/CHANGELOG.md", - "created_at": "2025-06-04T22:46:49+00:00", - "commented_code": "### Features Added\n\n- Added support for [federated managed identity](https://learn.microsoft.com/entra/workload-id/workload-identity-federation-config-app-trust-managed-identity?tabs=microsoft-entra-admin-center#azureidentity) support in the client factory by specifying configuration item `credential` as \"managedidentityasfederatedidentity\" and providing the following named configuration items:\n\n - `tenantId` : The tenant where the target resource was created\n - `clientId` : The client identifier for the application, which must be granted access on the target resource\n - `managedIdentityClientId` : The managed identity which you configured as a Federated Identity Credential (FIC)\n - `federatedAudience`: One of the following token audiences, specific to the cloud that you're running in:", - "repo_full_name": "Azure/azure-sdk-for-net", - "discussion_comments": [ - { - "comment_id": "2127558101", - "repo_full_name": "Azure/azure-sdk-for-net", - "pr_number": 50436, - "pr_file": "sdk/extensions/Microsoft.Extensions.Azure/CHANGELOG.md", - "discussion_id": "2127558101", - "commented_code": "@@ -4,6 +4,16 @@\n \n ### Features Added\n \n+- Added support for [federated managed identity](https://learn.microsoft.com/entra/workload-id/workload-identity-federation-config-app-trust-managed-identity?tabs=microsoft-entra-admin-center#azureidentity) support in the client factory by specifying configuration item `credential` as \"managedidentityasfederatedidentity\" and providing the following named configuration items:\n+\n+ - `tenantId` : The tenant where the target resource was created\n+ - `clientId` : The client identifier for the application, which must be granted access on the target resource\n+ - `managedIdentityClientId` : The managed identity which you configured as a Federated Identity Credential (FIC)\n+ - `federatedAudience`: One of the following token audiences, specific to the cloud that you're running in:", - "comment_created_at": "2025-06-04T22:46:49+00:00", - "comment_author": "christothes", - "comment_body": "Should we consider making this value something like `AudienceCloud` and accept values like `global` (default), `usgov`, `china` so that users don't need to remember the magic audience strings?", - "pr_file_module": null - }, - { - "comment_id": "2127560619", - "repo_full_name": "Azure/azure-sdk-for-net", - "pr_number": 50436, - "pr_file": "sdk/extensions/Microsoft.Extensions.Azure/CHANGELOG.md", - "discussion_id": "2127558101", - "commented_code": "@@ -4,6 +4,16 @@\n \n ### Features Added\n \n+- Added support for [federated managed identity](https://learn.microsoft.com/entra/workload-id/workload-identity-federation-config-app-trust-managed-identity?tabs=microsoft-entra-admin-center#azureidentity) support in the client factory by specifying configuration item `credential` as \"managedidentityasfederatedidentity\" and providing the following named configuration items:\n+\n+ - `tenantId` : The tenant where the target resource was created\n+ - `clientId` : The client identifier for the application, which must be granted access on the target resource\n+ - `managedIdentityClientId` : The managed identity which you configured as a Federated Identity Credential (FIC)\n+ - `federatedAudience`: One of the following token audiences, specific to the cloud that you're running in:", - "comment_created_at": "2025-06-04T22:49:01+00:00", - "comment_author": "jsquire", - "comment_body": "I'm good with that, but its @scottaddie's call. I followed the pattern that he asked for and I'm not sure if there are any wider considerations.", - "pr_file_module": null - }, - { - "comment_id": "2130246074", - "repo_full_name": "Azure/azure-sdk-for-net", - "pr_number": 50436, - "pr_file": "sdk/extensions/Microsoft.Extensions.Azure/CHANGELOG.md", - "discussion_id": "2127558101", - "commented_code": "@@ -4,6 +4,16 @@\n \n ### Features Added\n \n+- Added support for [federated managed identity](https://learn.microsoft.com/entra/workload-id/workload-identity-federation-config-app-trust-managed-identity?tabs=microsoft-entra-admin-center#azureidentity) support in the client factory by specifying configuration item `credential` as \"managedidentityasfederatedidentity\" and providing the following named configuration items:\n+\n+ - `tenantId` : The tenant where the target resource was created\n+ - `clientId` : The client identifier for the application, which must be granted access on the target resource\n+ - `managedIdentityClientId` : The managed identity which you configured as a Federated Identity Credential (FIC)\n+ - `federatedAudience`: One of the following token audiences, specific to the cloud that you're running in:", - "comment_created_at": "2025-06-05T20:30:31+00:00", - "comment_author": "scottaddie", - "comment_body": "+1 to Chris' suggestion. Most customers won't know this magic audience string.", - "pr_file_module": null - }, - { - "comment_id": "2132973364", - "repo_full_name": "Azure/azure-sdk-for-net", - "pr_number": 50436, - "pr_file": "sdk/extensions/Microsoft.Extensions.Azure/CHANGELOG.md", - "discussion_id": "2127558101", - "commented_code": "@@ -4,6 +4,16 @@\n \n ### Features Added\n \n+- Added support for [federated managed identity](https://learn.microsoft.com/entra/workload-id/workload-identity-federation-config-app-trust-managed-identity?tabs=microsoft-entra-admin-center#azureidentity) support in the client factory by specifying configuration item `credential` as \"managedidentityasfederatedidentity\" and providing the following named configuration items:\n+\n+ - `tenantId` : The tenant where the target resource was created\n+ - `clientId` : The client identifier for the application, which must be granted access on the target resource\n+ - `managedIdentityClientId` : The managed identity which you configured as a Federated Identity Credential (FIC)\n+ - `federatedAudience`: One of the following token audiences, specific to the cloud that you're running in:", - "comment_created_at": "2025-06-06T22:27:09+00:00", - "comment_author": "jsquire", - "comment_body": "Changed to `azureCloud`", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/azure-sdk-for-net-write-deterministic-tests.json b/_reviewers/azure-sdk-for-net-write-deterministic-tests.json new file mode 100644 index 0000000..098af76 --- /dev/null +++ b/_reviewers/azure-sdk-for-net-write-deterministic-tests.json @@ -0,0 +1,102 @@ +[ + { + "discussion_id": "2155220117", + "pr_number": 50534, + "pr_file": "sdk/core/Azure.Core/Benchmarks.Nuget/PipelineScenario.cs", + "created_at": "2025-06-18T18:14:23+00:00", + "commented_code": "using System;\nusing System.Net.Http;\nusing System.Threading;\nusing System.Threading.Tasks;\nusing Azure.Core;\nusing Azure.Core.Pipeline;\n\nnamespace Benchmarks.Nuget\n{\n public class PipelineScenario\n {\n public readonly HttpPipeline _pipeline;\n\n public PipelineScenario()\n {\n var options = new BenchmarkClientOptions\n {\n Transport = new HttpClientTransport(new HttpClient())\n };\n _pipeline = HttpPipelineBuilder.Build(options);\n }\n\n public async Task SendAsync()\n {\n var message = _pipeline.CreateMessage();\n message.Request.Uri.Reset(new Uri(\"https://www.example.com\"));\n await _pipeline.SendAsync(message, CancellationToken.None);", + "repo_full_name": "Azure/azure-sdk-for-net", + "discussion_comments": [ + { + "comment_id": "2155220117", + "repo_full_name": "Azure/azure-sdk-for-net", + "pr_number": 50534, + "pr_file": "sdk/core/Azure.Core/Benchmarks.Nuget/PipelineScenario.cs", + "discussion_id": "2155220117", + "commented_code": "@@ -0,0 +1,33 @@\n+using System;\n+using System.Net.Http;\n+using System.Threading;\n+using System.Threading.Tasks;\n+using Azure.Core;\n+using Azure.Core.Pipeline;\n+\n+namespace Benchmarks.Nuget\n+{\n+ public class PipelineScenario\n+ {\n+ public readonly HttpPipeline _pipeline;\n+\n+ public PipelineScenario()\n+ {\n+ var options = new BenchmarkClientOptions\n+ {\n+ Transport = new HttpClientTransport(new HttpClient())\n+ };\n+ _pipeline = HttpPipelineBuilder.Build(options);\n+ }\n+\n+ public async Task SendAsync()\n+ {\n+ var message = _pipeline.CreateMessage();\n+ message.Request.Uri.Reset(new Uri(\"https://www.example.com\"));\n+ await _pipeline.SendAsync(message, CancellationToken.None);", + "comment_created_at": "2025-06-18T18:14:23+00:00", + "comment_author": "m-redding", + "comment_body": "When you're running this, does this work with the example.com uri?\r\n\r\nYou could consider a few alternatives since we want to avoid network calls to isolate client code:\r\n- mocking the transport - it looks like PipelineBenchmark used to use a mock HTTP handler implementation https://github.com/Azure/azure-sdk-for-net/blob/main/sdk/core/Azure.Core/perf/PipelineBenchmark.cs#L66\r\n- using a TestServer. This is an example of using a test server https://github.com/Azure/azure-sdk-for-net/blob/main/sdk/core/System.ClientModel/tests/Pipeline/ClientPipelineFunctionalTests.cs#L40-L46\r\n- integrating with the test proxy https://github.com/Azure/azure-sdk-tools/blob/main/tools/test-proxy/Azure.Sdk.Tools.TestProxy/README.md", + "pr_file_module": null + }, + { + "comment_id": "2157684780", + "repo_full_name": "Azure/azure-sdk-for-net", + "pr_number": 50534, + "pr_file": "sdk/core/Azure.Core/Benchmarks.Nuget/PipelineScenario.cs", + "discussion_id": "2155220117", + "commented_code": "@@ -0,0 +1,33 @@\n+using System;\n+using System.Net.Http;\n+using System.Threading;\n+using System.Threading.Tasks;\n+using Azure.Core;\n+using Azure.Core.Pipeline;\n+\n+namespace Benchmarks.Nuget\n+{\n+ public class PipelineScenario\n+ {\n+ public readonly HttpPipeline _pipeline;\n+\n+ public PipelineScenario()\n+ {\n+ var options = new BenchmarkClientOptions\n+ {\n+ Transport = new HttpClientTransport(new HttpClient())\n+ };\n+ _pipeline = HttpPipelineBuilder.Build(options);\n+ }\n+\n+ public async Task SendAsync()\n+ {\n+ var message = _pipeline.CreateMessage();\n+ message.Request.Uri.Reset(new Uri(\"https://www.example.com\"));\n+ await _pipeline.SendAsync(message, CancellationToken.None);", + "comment_created_at": "2025-06-19T21:37:22+00:00", + "comment_author": "sa7936", + "comment_body": "I put back the mocking of transport calls ", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2124976957", + "pr_number": 50394, + "pr_file": "sdk/servicebus/Azure.Messaging.ServiceBus/tests/Primitives/ServiceBusRetryPolicyTests.cs", + "created_at": "2025-06-03T21:40:01+00:00", + "commented_code": "return null;\n }\n }\n\n private class CustomServerBusyMockRetryPolicy : ServiceBusRetryPolicy\n {\n public const int MaxRetries = 3;\n\n private int _retryCount = 0;\n\n public int CalculateRetryDelayCallCount { get; private set; }\n\n public void SetServerBusyForTest()\n {\n // Set the private _serverBusyState to ServerBusyState (1)\n var field = typeof(ServiceBusRetryPolicy).GetField(\"_serverBusyState\", System.Reflection.BindingFlags.NonPublic | System.Reflection.BindingFlags.Instance);", + "repo_full_name": "Azure/azure-sdk-for-net", + "discussion_comments": [ + { + "comment_id": "2124976957", + "repo_full_name": "Azure/azure-sdk-for-net", + "pr_number": 50394, + "pr_file": "sdk/servicebus/Azure.Messaging.ServiceBus/tests/Primitives/ServiceBusRetryPolicyTests.cs", + "discussion_id": "2124976957", + "commented_code": "@@ -114,5 +175,81 @@ public override TimeSpan CalculateTryTimeout(int attemptCount)\n return null;\n }\n }\n+\n+ private class CustomServerBusyMockRetryPolicy : ServiceBusRetryPolicy\n+ {\n+ public const int MaxRetries = 3;\n+\n+ private int _retryCount = 0;\n+\n+ public int CalculateRetryDelayCallCount { get; private set; }\n+\n+ public void SetServerBusyForTest()\n+ {\n+ // Set the private _serverBusyState to ServerBusyState (1)\n+ var field = typeof(ServiceBusRetryPolicy).GetField(\"_serverBusyState\", System.Reflection.BindingFlags.NonPublic | System.Reflection.BindingFlags.Instance);", + "comment_created_at": "2025-06-03T21:40:01+00:00", + "comment_author": "Copilot", + "comment_body": "Consider exposing a protected setter or an internal method in ServiceBusRetryPolicy to modify the server busy state instead of using reflection in test helper classes. This will reduce brittleness in tests if the underlying field name changes.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2112263732", + "pr_number": 50304, + "pr_file": "sdk/storage/Azure.Storage.DataMovement.Files.Shares/tests/ShareFileStartTransferDownloadTests.cs", + "created_at": "2025-05-28T16:04:05+00:00", + "commented_code": "using Stream sourceStream = await hardlinkClient.OpenReadAsync();\n using Stream destinationStream = File.OpenRead(destinationResource.Uri.LocalPath);\n Assert.AreEqual(sourceStream.Length, destinationStream.Length);\n Assert.AreEqual(sourceStream, destinationStream);", + "repo_full_name": "Azure/azure-sdk-for-net", + "discussion_comments": [ + { + "comment_id": "2112263732", + "repo_full_name": "Azure/azure-sdk-for-net", + "pr_number": 50304, + "pr_file": "sdk/storage/Azure.Storage.DataMovement.Files.Shares/tests/ShareFileStartTransferDownloadTests.cs", + "discussion_id": "2112263732", + "commented_code": "@@ -132,6 +132,7 @@ await TestTransferWithTimeout.WaitForCompletionAsync(\n using Stream sourceStream = await hardlinkClient.OpenReadAsync();\n using Stream destinationStream = File.OpenRead(destinationResource.Uri.LocalPath);\n Assert.AreEqual(sourceStream.Length, destinationStream.Length);\n+ Assert.AreEqual(sourceStream, destinationStream);", + "comment_created_at": "2025-05-28T16:04:05+00:00", + "comment_author": "Copilot", + "comment_body": "Comparing two Stream objects with Assert.AreEqual checks reference equality rather than content. Consider reading both streams into byte arrays and comparing their sequences, or using a helper method to verify that the contents match.\n```suggestion\n byte[] sourceBytes;\n byte[] destinationBytes;\n\n using (MemoryStream sourceMemoryStream = new MemoryStream())\n {\n await sourceStream.CopyToAsync(sourceMemoryStream);\n sourceBytes = sourceMemoryStream.ToArray();\n }\n\n using (MemoryStream destinationMemoryStream = new MemoryStream())\n {\n await destinationStream.CopyToAsync(destinationMemoryStream);\n destinationBytes = destinationMemoryStream.ToArray();\n }\n\n Assert.AreEqual(sourceBytes.Length, destinationBytes.Length, \"Stream lengths do not match.\");\n Assert.AreEqual(sourceBytes, destinationBytes, \"Stream contents do not match.\");\n```", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2103231256", + "pr_number": 50222, + "pr_file": "sdk/communication/Azure.Communication.CallAutomation/tests/CallRecordings/CallRecordingAutomatedLiveTests.cs", + "created_at": "2025-05-22T19:06:18+00:00", + "commented_code": "Assert.AreEqual(StatusCodes.Status200OK, startRecordingResponse.GetRawResponse().Status);\n Assert.NotNull(startRecordingResponse.Value.RecordingId);\n\n // Update the property name from 'PlaySourceId' to 'PlaySourceCacheId' as per the provided type signature.\n var playSource = new FileSource(new Uri(TestEnvironment.FileSourceUrl)) { PlaySourceCacheId = \"test-audio\" };\n var playResponse = await response.CallConnection.GetCallMedia().PlayToAllAsync(playSource);\n Assert.NotNull(playResponse);\n Assert.AreEqual(202, playResponse.GetRawResponse().Status);\n\n await Task.Delay(TimeSpan.FromSeconds(5)).ConfigureAwait(false);\n\n // try stop recording\n var stopRecordingResponse = await client.GetCallRecording().StopAsync(startRecordingResponse.Value.RecordingId);\n Assert.AreEqual(StatusCodes.Status204NoContent, stopRecordingResponse.Status);\n\n await Task.Delay(TimeSpan.FromSeconds(5)).ConfigureAwait(false);", + "repo_full_name": "Azure/azure-sdk-for-net", + "discussion_comments": [ + { + "comment_id": "2103231256", + "repo_full_name": "Azure/azure-sdk-for-net", + "pr_number": 50222, + "pr_file": "sdk/communication/Azure.Communication.CallAutomation/tests/CallRecordings/CallRecordingAutomatedLiveTests.cs", + "discussion_id": "2103231256", + "commented_code": "@@ -472,10 +472,20 @@ public async Task GetRecordingTest()\n Assert.AreEqual(StatusCodes.Status200OK, startRecordingResponse.GetRawResponse().Status);\n Assert.NotNull(startRecordingResponse.Value.RecordingId);\n \n+ // Update the property name from 'PlaySourceId' to 'PlaySourceCacheId' as per the provided type signature.\n+ var playSource = new FileSource(new Uri(TestEnvironment.FileSourceUrl)) { PlaySourceCacheId = \"test-audio\" };\n+ var playResponse = await response.CallConnection.GetCallMedia().PlayToAllAsync(playSource);\n+ Assert.NotNull(playResponse);\n+ Assert.AreEqual(202, playResponse.GetRawResponse().Status);\n+\n+ await Task.Delay(TimeSpan.FromSeconds(5)).ConfigureAwait(false);\n+\n // try stop recording\n var stopRecordingResponse = await client.GetCallRecording().StopAsync(startRecordingResponse.Value.RecordingId);\n Assert.AreEqual(StatusCodes.Status204NoContent, stopRecordingResponse.Status);\n \n+ await Task.Delay(TimeSpan.FromSeconds(5)).ConfigureAwait(false);", + "comment_created_at": "2025-05-22T19:06:18+00:00", + "comment_author": "Copilot", + "comment_body": "Using fixed delays can lead to flaky tests; consider polling the recording status with a timeout or exponential backoff instead of a hardcoded Task.Delay.\n```suggestion\n await WaitForConditionAsync(\n async () =>\n {\n var recordingState = await client.GetCallRecording().GetRecordingStateAsync(startRecordingResponse.Value.RecordingId).ConfigureAwait(false);\n return recordingState.Value == RecordingState.Active;\n },\n TimeSpan.FromSeconds(10),\n TimeSpan.FromMilliseconds(500)\n ).ConfigureAwait(false);\n\n // try stop recording\n var stopRecordingResponse = await client.GetCallRecording().StopAsync(startRecordingResponse.Value.RecordingId);\n Assert.AreEqual(StatusCodes.Status204NoContent, stopRecordingResponse.Status);\n\n await WaitForConditionAsync(\n async () =>\n {\n var recordingState = await client.GetCallRecording().GetRecordingStateAsync(startRecordingResponse.Value.RecordingId).ConfigureAwait(false);\n return recordingState.Value == RecordingState.Stopped;\n },\n TimeSpan.FromSeconds(10),\n TimeSpan.FromMilliseconds(500)\n ).ConfigureAwait(false);\n```", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/azure-sdk-for-net-write-deterministic-tests.md b/_reviewers/azure-sdk-for-net-write-deterministic-tests.md index b64d7b3..cb43b55 100644 --- a/_reviewers/azure-sdk-for-net-write-deterministic-tests.md +++ b/_reviewers/azure-sdk-for-net-write-deterministic-tests.md @@ -68,107 +68,3 @@ Tests should be deterministic, reliable, and isolated from external dependencies ``` These practices ensure tests remain stable across environments and over time, reducing maintenance costs and improving developer productivity. - - -[ - { - "discussion_id": "2155220117", - "pr_number": 50534, - "pr_file": "sdk/core/Azure.Core/Benchmarks.Nuget/PipelineScenario.cs", - "created_at": "2025-06-18T18:14:23+00:00", - "commented_code": "\ufeffusing System;\nusing System.Net.Http;\nusing System.Threading;\nusing System.Threading.Tasks;\nusing Azure.Core;\nusing Azure.Core.Pipeline;\n\nnamespace Benchmarks.Nuget\n{\n public class PipelineScenario\n {\n public readonly HttpPipeline _pipeline;\n\n public PipelineScenario()\n {\n var options = new BenchmarkClientOptions\n {\n Transport = new HttpClientTransport(new HttpClient())\n };\n _pipeline = HttpPipelineBuilder.Build(options);\n }\n\n public async Task SendAsync()\n {\n var message = _pipeline.CreateMessage();\n message.Request.Uri.Reset(new Uri(\"https://www.example.com\"));\n await _pipeline.SendAsync(message, CancellationToken.None);", - "repo_full_name": "Azure/azure-sdk-for-net", - "discussion_comments": [ - { - "comment_id": "2155220117", - "repo_full_name": "Azure/azure-sdk-for-net", - "pr_number": 50534, - "pr_file": "sdk/core/Azure.Core/Benchmarks.Nuget/PipelineScenario.cs", - "discussion_id": "2155220117", - "commented_code": "@@ -0,0 +1,33 @@\n+\ufeffusing System;\n+using System.Net.Http;\n+using System.Threading;\n+using System.Threading.Tasks;\n+using Azure.Core;\n+using Azure.Core.Pipeline;\n+\n+namespace Benchmarks.Nuget\n+{\n+ public class PipelineScenario\n+ {\n+ public readonly HttpPipeline _pipeline;\n+\n+ public PipelineScenario()\n+ {\n+ var options = new BenchmarkClientOptions\n+ {\n+ Transport = new HttpClientTransport(new HttpClient())\n+ };\n+ _pipeline = HttpPipelineBuilder.Build(options);\n+ }\n+\n+ public async Task SendAsync()\n+ {\n+ var message = _pipeline.CreateMessage();\n+ message.Request.Uri.Reset(new Uri(\"https://www.example.com\"));\n+ await _pipeline.SendAsync(message, CancellationToken.None);", - "comment_created_at": "2025-06-18T18:14:23+00:00", - "comment_author": "m-redding", - "comment_body": "When you're running this, does this work with the example.com uri?\r\n\r\nYou could consider a few alternatives since we want to avoid network calls to isolate client code:\r\n- mocking the transport - it looks like PipelineBenchmark used to use a mock HTTP handler implementation https://github.com/Azure/azure-sdk-for-net/blob/main/sdk/core/Azure.Core/perf/PipelineBenchmark.cs#L66\r\n- using a TestServer. This is an example of using a test server https://github.com/Azure/azure-sdk-for-net/blob/main/sdk/core/System.ClientModel/tests/Pipeline/ClientPipelineFunctionalTests.cs#L40-L46\r\n- integrating with the test proxy https://github.com/Azure/azure-sdk-tools/blob/main/tools/test-proxy/Azure.Sdk.Tools.TestProxy/README.md", - "pr_file_module": null - }, - { - "comment_id": "2157684780", - "repo_full_name": "Azure/azure-sdk-for-net", - "pr_number": 50534, - "pr_file": "sdk/core/Azure.Core/Benchmarks.Nuget/PipelineScenario.cs", - "discussion_id": "2155220117", - "commented_code": "@@ -0,0 +1,33 @@\n+\ufeffusing System;\n+using System.Net.Http;\n+using System.Threading;\n+using System.Threading.Tasks;\n+using Azure.Core;\n+using Azure.Core.Pipeline;\n+\n+namespace Benchmarks.Nuget\n+{\n+ public class PipelineScenario\n+ {\n+ public readonly HttpPipeline _pipeline;\n+\n+ public PipelineScenario()\n+ {\n+ var options = new BenchmarkClientOptions\n+ {\n+ Transport = new HttpClientTransport(new HttpClient())\n+ };\n+ _pipeline = HttpPipelineBuilder.Build(options);\n+ }\n+\n+ public async Task SendAsync()\n+ {\n+ var message = _pipeline.CreateMessage();\n+ message.Request.Uri.Reset(new Uri(\"https://www.example.com\"));\n+ await _pipeline.SendAsync(message, CancellationToken.None);", - "comment_created_at": "2025-06-19T21:37:22+00:00", - "comment_author": "sa7936", - "comment_body": "I put back the mocking of transport calls ", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2124976957", - "pr_number": 50394, - "pr_file": "sdk/servicebus/Azure.Messaging.ServiceBus/tests/Primitives/ServiceBusRetryPolicyTests.cs", - "created_at": "2025-06-03T21:40:01+00:00", - "commented_code": "return null;\n }\n }\n\n private class CustomServerBusyMockRetryPolicy : ServiceBusRetryPolicy\n {\n public const int MaxRetries = 3;\n\n private int _retryCount = 0;\n\n public int CalculateRetryDelayCallCount { get; private set; }\n\n public void SetServerBusyForTest()\n {\n // Set the private _serverBusyState to ServerBusyState (1)\n var field = typeof(ServiceBusRetryPolicy).GetField(\"_serverBusyState\", System.Reflection.BindingFlags.NonPublic | System.Reflection.BindingFlags.Instance);", - "repo_full_name": "Azure/azure-sdk-for-net", - "discussion_comments": [ - { - "comment_id": "2124976957", - "repo_full_name": "Azure/azure-sdk-for-net", - "pr_number": 50394, - "pr_file": "sdk/servicebus/Azure.Messaging.ServiceBus/tests/Primitives/ServiceBusRetryPolicyTests.cs", - "discussion_id": "2124976957", - "commented_code": "@@ -114,5 +175,81 @@ public override TimeSpan CalculateTryTimeout(int attemptCount)\n return null;\n }\n }\n+\n+ private class CustomServerBusyMockRetryPolicy : ServiceBusRetryPolicy\n+ {\n+ public const int MaxRetries = 3;\n+\n+ private int _retryCount = 0;\n+\n+ public int CalculateRetryDelayCallCount { get; private set; }\n+\n+ public void SetServerBusyForTest()\n+ {\n+ // Set the private _serverBusyState to ServerBusyState (1)\n+ var field = typeof(ServiceBusRetryPolicy).GetField(\"_serverBusyState\", System.Reflection.BindingFlags.NonPublic | System.Reflection.BindingFlags.Instance);", - "comment_created_at": "2025-06-03T21:40:01+00:00", - "comment_author": "Copilot", - "comment_body": "Consider exposing a protected setter or an internal method in ServiceBusRetryPolicy to modify the server busy state instead of using reflection in test helper classes. This will reduce brittleness in tests if the underlying field name changes.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2112263732", - "pr_number": 50304, - "pr_file": "sdk/storage/Azure.Storage.DataMovement.Files.Shares/tests/ShareFileStartTransferDownloadTests.cs", - "created_at": "2025-05-28T16:04:05+00:00", - "commented_code": "using Stream sourceStream = await hardlinkClient.OpenReadAsync();\n using Stream destinationStream = File.OpenRead(destinationResource.Uri.LocalPath);\n Assert.AreEqual(sourceStream.Length, destinationStream.Length);\n Assert.AreEqual(sourceStream, destinationStream);", - "repo_full_name": "Azure/azure-sdk-for-net", - "discussion_comments": [ - { - "comment_id": "2112263732", - "repo_full_name": "Azure/azure-sdk-for-net", - "pr_number": 50304, - "pr_file": "sdk/storage/Azure.Storage.DataMovement.Files.Shares/tests/ShareFileStartTransferDownloadTests.cs", - "discussion_id": "2112263732", - "commented_code": "@@ -132,6 +132,7 @@ await TestTransferWithTimeout.WaitForCompletionAsync(\n using Stream sourceStream = await hardlinkClient.OpenReadAsync();\n using Stream destinationStream = File.OpenRead(destinationResource.Uri.LocalPath);\n Assert.AreEqual(sourceStream.Length, destinationStream.Length);\n+ Assert.AreEqual(sourceStream, destinationStream);", - "comment_created_at": "2025-05-28T16:04:05+00:00", - "comment_author": "Copilot", - "comment_body": "Comparing two Stream objects with Assert.AreEqual checks reference equality rather than content. Consider reading both streams into byte arrays and comparing their sequences, or using a helper method to verify that the contents match.\n```suggestion\n byte[] sourceBytes;\n byte[] destinationBytes;\n\n using (MemoryStream sourceMemoryStream = new MemoryStream())\n {\n await sourceStream.CopyToAsync(sourceMemoryStream);\n sourceBytes = sourceMemoryStream.ToArray();\n }\n\n using (MemoryStream destinationMemoryStream = new MemoryStream())\n {\n await destinationStream.CopyToAsync(destinationMemoryStream);\n destinationBytes = destinationMemoryStream.ToArray();\n }\n\n Assert.AreEqual(sourceBytes.Length, destinationBytes.Length, \"Stream lengths do not match.\");\n Assert.AreEqual(sourceBytes, destinationBytes, \"Stream contents do not match.\");\n```", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2103231256", - "pr_number": 50222, - "pr_file": "sdk/communication/Azure.Communication.CallAutomation/tests/CallRecordings/CallRecordingAutomatedLiveTests.cs", - "created_at": "2025-05-22T19:06:18+00:00", - "commented_code": "Assert.AreEqual(StatusCodes.Status200OK, startRecordingResponse.GetRawResponse().Status);\n Assert.NotNull(startRecordingResponse.Value.RecordingId);\n\n // Update the property name from 'PlaySourceId' to 'PlaySourceCacheId' as per the provided type signature.\n var playSource = new FileSource(new Uri(TestEnvironment.FileSourceUrl)) { PlaySourceCacheId = \"test-audio\" };\n var playResponse = await response.CallConnection.GetCallMedia().PlayToAllAsync(playSource);\n Assert.NotNull(playResponse);\n Assert.AreEqual(202, playResponse.GetRawResponse().Status);\n\n await Task.Delay(TimeSpan.FromSeconds(5)).ConfigureAwait(false);\n\n // try stop recording\n var stopRecordingResponse = await client.GetCallRecording().StopAsync(startRecordingResponse.Value.RecordingId);\n Assert.AreEqual(StatusCodes.Status204NoContent, stopRecordingResponse.Status);\n\n await Task.Delay(TimeSpan.FromSeconds(5)).ConfigureAwait(false);", - "repo_full_name": "Azure/azure-sdk-for-net", - "discussion_comments": [ - { - "comment_id": "2103231256", - "repo_full_name": "Azure/azure-sdk-for-net", - "pr_number": 50222, - "pr_file": "sdk/communication/Azure.Communication.CallAutomation/tests/CallRecordings/CallRecordingAutomatedLiveTests.cs", - "discussion_id": "2103231256", - "commented_code": "@@ -472,10 +472,20 @@ public async Task GetRecordingTest()\n Assert.AreEqual(StatusCodes.Status200OK, startRecordingResponse.GetRawResponse().Status);\n Assert.NotNull(startRecordingResponse.Value.RecordingId);\n \n+ // Update the property name from 'PlaySourceId' to 'PlaySourceCacheId' as per the provided type signature.\n+ var playSource = new FileSource(new Uri(TestEnvironment.FileSourceUrl)) { PlaySourceCacheId = \"test-audio\" };\n+ var playResponse = await response.CallConnection.GetCallMedia().PlayToAllAsync(playSource);\n+ Assert.NotNull(playResponse);\n+ Assert.AreEqual(202, playResponse.GetRawResponse().Status);\n+\n+ await Task.Delay(TimeSpan.FromSeconds(5)).ConfigureAwait(false);\n+\n // try stop recording\n var stopRecordingResponse = await client.GetCallRecording().StopAsync(startRecordingResponse.Value.RecordingId);\n Assert.AreEqual(StatusCodes.Status204NoContent, stopRecordingResponse.Status);\n \n+ await Task.Delay(TimeSpan.FromSeconds(5)).ConfigureAwait(false);", - "comment_created_at": "2025-05-22T19:06:18+00:00", - "comment_author": "Copilot", - "comment_body": "Using fixed delays can lead to flaky tests; consider polling the recording status with a timeout or exponential backoff instead of a hardcoded Task.Delay.\n```suggestion\n await WaitForConditionAsync(\n async () =>\n {\n var recordingState = await client.GetCallRecording().GetRecordingStateAsync(startRecordingResponse.Value.RecordingId).ConfigureAwait(false);\n return recordingState.Value == RecordingState.Active;\n },\n TimeSpan.FromSeconds(10),\n TimeSpan.FromMilliseconds(500)\n ).ConfigureAwait(false);\n\n // try stop recording\n var stopRecordingResponse = await client.GetCallRecording().StopAsync(startRecordingResponse.Value.RecordingId);\n Assert.AreEqual(StatusCodes.Status204NoContent, stopRecordingResponse.Status);\n\n await WaitForConditionAsync(\n async () =>\n {\n var recordingState = await client.GetCallRecording().GetRecordingStateAsync(startRecordingResponse.Value.RecordingId).ConfigureAwait(false);\n return recordingState.Value == RecordingState.Stopped;\n },\n TimeSpan.FromSeconds(10),\n TimeSpan.FromMilliseconds(500)\n ).ConfigureAwait(false);\n```", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/chef-choose-semantic-algorithms.json b/_reviewers/chef-choose-semantic-algorithms.json new file mode 100644 index 0000000..0109ac0 --- /dev/null +++ b/_reviewers/chef-choose-semantic-algorithms.json @@ -0,0 +1,206 @@ +[ + { + "discussion_id": "2098810301", + "pr_number": 14794, + "pr_file": "lib/chef/resource/helpers/path_helpers.rb", + "created_at": "2025-05-20T20:24:51+00:00", + "commented_code": "require \"chef-utils/dist\" unless defined?(ChefUtils::Dist)\nclass Chef\n module ResourceHelpers\n # Helpers for path manipulation\n module PathHelpers\n extend self\n # The habitat binary path for Infra Client\n # @return [String]\n def chef_client_hab_binary_path\n # Find the most recent version by listing directories\n # This is heavy operation and should be avoided but currently habitat does not create a symlink by default\n # and binlink will be created only if `binlink` option is passed so we cannot assume binlink will be present.\n windows = RUBY_PLATFORM =~ /mswin|mingw|windows/ || defined?(ChefUtils) && ChefUtils.windows?\n base_path = \"/hab/pkgs/chef/#{ChefUtils::Dist::Infra::HABITAT_PKG}\"\n base_path = \"C:/#{base_path}\" if windows\n if File.directory?(base_path)\n # Get all version directories\n versions = Dir.glob(\"#{base_path}/*\").select { |d| File.directory?(d) }\n\n if versions.any?\n # Get the latest version (based on modification time)\n latest_version_dir = versions.max_by { |v| File.mtime(v) }", + "repo_full_name": "chef/chef", + "discussion_comments": [ + { + "comment_id": "2098810301", + "repo_full_name": "chef/chef", + "pr_number": 14794, + "pr_file": "lib/chef/resource/helpers/path_helpers.rb", + "discussion_id": "2098810301", + "commented_code": "@@ -0,0 +1,41 @@\n+require \"chef-utils/dist\" unless defined?(ChefUtils::Dist)\n+class Chef\n+ module ResourceHelpers\n+ # Helpers for path manipulation\n+ module PathHelpers\n+ extend self\n+ # The habitat binary path for Infra Client\n+ # @return [String]\n+ def chef_client_hab_binary_path\n+ # Find the most recent version by listing directories\n+ # This is heavy operation and should be avoided but currently habitat does not create a symlink by default\n+ # and binlink will be created only if `binlink` option is passed so we cannot assume binlink will be present.\n+ windows = RUBY_PLATFORM =~ /mswin|mingw|windows/ || defined?(ChefUtils) && ChefUtils.windows?\n+ base_path = \"/hab/pkgs/chef/#{ChefUtils::Dist::Infra::HABITAT_PKG}\"\n+ base_path = \"C:/#{base_path}\" if windows\n+ if File.directory?(base_path)\n+ # Get all version directories\n+ versions = Dir.glob(\"#{base_path}/*\").select { |d| File.directory?(d) }\n+\n+ if versions.any?\n+ # Get the latest version (based on modification time)\n+ latest_version_dir = versions.max_by { |v| File.mtime(v) }", + "comment_created_at": "2025-05-20T20:24:51+00:00", + "comment_author": "tpowell-progress", + "comment_body": "This is potentially problematic and not necessarily deterministic in that it will likely pick the most recent install vs. probably the intended \"most recent version\" for this logic. Further, although unlikely, any mtime update of the base package directory will place that package at the front of the line for being chosen, so if you renamed the directory or something for a test, you might end up forcing that version to always be chosen?", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1688678713", + "pr_number": 14462, + "pr_file": "lib/chef/provider/package/chocolatey.rb", + "created_at": "2024-07-23T20:29:24+00:00", + "commented_code": "# Choco V2 uses 'Search' for remote repositories and 'List' for local packages\n def query_command\n return \"list\" if get_choco_version.match?(/^1/)\n return \"list\" if get_choco_version.match?(/^1/) && !get_choco_version.match?(/^1.4/)", + "repo_full_name": "chef/chef", + "discussion_comments": [ + { + "comment_id": "1688678713", + "repo_full_name": "chef/chef", + "pr_number": 14462, + "pr_file": "lib/chef/provider/package/chocolatey.rb", + "discussion_id": "1688678713", + "commented_code": "@@ -153,7 +153,7 @@ def get_choco_version\n \n # Choco V2 uses 'Search' for remote repositories and 'List' for local packages\n def query_command\n- return \"list\" if get_choco_version.match?(/^1/)\n+ return \"list\" if get_choco_version.match?(/^1/) && !get_choco_version.match?(/^1.4/)", + "comment_created_at": "2024-07-23T20:29:24+00:00", + "comment_author": "jaymzh", + "comment_body": "This will break again when 1.5 comes out. We should be a bit more thorough here. Maybe something like...\r\n\r\n```suggestion\r\n v_bits = get_choco_version.split('.')\r\n # 1.4+ and 2+\r\n if v_bits[0].to_i > 1 || v_bits[1] > 3\r\n return \"search\"\r\n end\r\n \"list\"\r\n```", + "pr_file_module": null + }, + { + "comment_id": "1693325682", + "repo_full_name": "chef/chef", + "pr_number": 14462, + "pr_file": "lib/chef/provider/package/chocolatey.rb", + "discussion_id": "1688678713", + "commented_code": "@@ -153,7 +153,7 @@ def get_choco_version\n \n # Choco V2 uses 'Search' for remote repositories and 'List' for local packages\n def query_command\n- return \"list\" if get_choco_version.match?(/^1/)\n+ return \"list\" if get_choco_version.match?(/^1/) && !get_choco_version.match?(/^1.4/)", + "comment_created_at": "2024-07-26T16:29:48+00:00", + "comment_author": "tpowell-progress", + "comment_body": "@jaymzh I'll raise you a `Gem::Dependency.new('', '< 1.4.0').match?('', get_choco_version)`", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1369665177", + "pr_number": 14043, + "pr_file": "lib/chef/resource/chocolatey_installer.rb", + "created_at": "2023-10-24T06:06:30+00:00", + "commented_code": "class Chef\n class Resource\n class ChocolateyInstaller < Chef::Resource\n provides :chocolatey_installer\n\n description \"Use the Chocolatey Installer resource to ensure that Choco is installed to your specification. Use the Chocolatey Feature resource to customize your install\"\n introduced \"18.3\"\n examples <<~DOC\n **Install Chocolatey**\n\n ```ruby\n chocolatey_installer 'latest' do\n action :install\n end\n ```\n\n **Uninstall Chocolatey**\n\n ```ruby\n chocolatey_installer 'Some random verbiage' do\n action :uninstall\n end\n ```\n\n **Install Chocolatey with Parameters**\n\n ```ruby\n chocolatey_installer 'latest' do\n action :install\n download_url \"https://www.contoso.com/foo\"\n chocolatey_version '2.12.24'\n end\n ```\n\n **Upgrade Chocolatey with Parameters**\n\n ```ruby\n chocolatey_installer 'latest' do\n action :upgrade\n chocolatey_version '2.12.24'\n end\n ```\n DOC\n\n allowed_actions :install, :uninstall, :upgrade\n\n property :download_url, String,\n description: \"The URL to download Chocolatey from. This defaults to the value of $env:ChocolateyDownloadUrl, if it is set, and otherwise falls back to the official Chocolatey community repository to download the Chocolatey package. It can be used for offline installation by providing a path to a Chocolatey.nupkg.\"\n\n property :chocolatey_version, String,\n description: \"Specifies a target version of Chocolatey to install. By default, the latest stable version is installed. This will use the value in $env:ChocolateyVersion by default, if that environment variable is present. This parameter is ignored if download_url is set.\"\n\n property :use_native_unzip, [TrueClass, FalseClass], default: false,\n description: \"If set, uses built-in Windows decompression tools instead of 7zip when unpacking the downloaded nupkg. This will be set by default if use_native_unzip is set to a value other than 'false' or '0'. This parameter will be ignored in PS 5+ in favour of using the Expand-Archive built in PowerShell cmdlet directly.\"\n\n property :ignore_proxy, [TrueClass, FalseClass], default: false,\n description: \"If set, ignores any configured proxy. This will override any proxy environment variables or parameters. This will be set by default if ignore_proxy is set to a value other than 'false' or '0'.\"\n\n property :proxy_url, String,\n description: \"Specifies the proxy URL to use during the download.\"\n\n property :proxy_user, String,\n description: \"The username to use to build a proxy credential with. Will be consumed by the proxy_credential property if both this property and proxy_password are set\"\n\n property :proxy_password, String,\n description: \"The password to use to build a proxy credential with. Will be consumed by the proxy_credential property if both this property and proxy_user are set\"\n\n load_current_value do\n current_state = is_choco_installed?\n current_value_does_not_exist! if current_state == false\n current_state\n end\n\n def is_choco_installed?\n ::File.exist?(\"#{ENV[\"ALLUSERSPROFILE\"]}\\\\chocolatey\\\\bin\\\\choco.exe\")\n end\n\n def get_choco_version\n powershell_exec(\"choco --version\").result\n end\n\n def existing_version", + "repo_full_name": "chef/chef", + "discussion_comments": [ + { + "comment_id": "1369665177", + "repo_full_name": "chef/chef", + "pr_number": 14043, + "pr_file": "lib/chef/resource/chocolatey_installer.rb", + "discussion_id": "1369665177", + "commented_code": "@@ -0,0 +1,198 @@\n+class Chef\n+ class Resource\n+ class ChocolateyInstaller < Chef::Resource\n+ provides :chocolatey_installer\n+\n+ description \"Use the Chocolatey Installer resource to ensure that Choco is installed to your specification. Use the Chocolatey Feature resource to customize your install\"\n+ introduced \"18.3\"\n+ examples <<~DOC\n+ **Install Chocolatey**\n+\n+ ```ruby\n+ chocolatey_installer 'latest' do\n+ action :install\n+ end\n+ ```\n+\n+ **Uninstall Chocolatey**\n+\n+ ```ruby\n+ chocolatey_installer 'Some random verbiage' do\n+ action :uninstall\n+ end\n+ ```\n+\n+ **Install Chocolatey with Parameters**\n+\n+ ```ruby\n+ chocolatey_installer 'latest' do\n+ action :install\n+ download_url \"https://www.contoso.com/foo\"\n+ chocolatey_version '2.12.24'\n+ end\n+ ```\n+\n+ **Upgrade Chocolatey with Parameters**\n+\n+ ```ruby\n+ chocolatey_installer 'latest' do\n+ action :upgrade\n+ chocolatey_version '2.12.24'\n+ end\n+ ```\n+ DOC\n+\n+ allowed_actions :install, :uninstall, :upgrade\n+\n+ property :download_url, String,\n+ description: \"The URL to download Chocolatey from. This defaults to the value of $env:ChocolateyDownloadUrl, if it is set, and otherwise falls back to the official Chocolatey community repository to download the Chocolatey package. It can be used for offline installation by providing a path to a Chocolatey.nupkg.\"\n+\n+ property :chocolatey_version, String,\n+ description: \"Specifies a target version of Chocolatey to install. By default, the latest stable version is installed. This will use the value in $env:ChocolateyVersion by default, if that environment variable is present. This parameter is ignored if download_url is set.\"\n+\n+ property :use_native_unzip, [TrueClass, FalseClass], default: false,\n+ description: \"If set, uses built-in Windows decompression tools instead of 7zip when unpacking the downloaded nupkg. This will be set by default if use_native_unzip is set to a value other than 'false' or '0'. This parameter will be ignored in PS 5+ in favour of using the Expand-Archive built in PowerShell cmdlet directly.\"\n+\n+ property :ignore_proxy, [TrueClass, FalseClass], default: false,\n+ description: \"If set, ignores any configured proxy. This will override any proxy environment variables or parameters. This will be set by default if ignore_proxy is set to a value other than 'false' or '0'.\"\n+\n+ property :proxy_url, String,\n+ description: \"Specifies the proxy URL to use during the download.\"\n+\n+ property :proxy_user, String,\n+ description: \"The username to use to build a proxy credential with. Will be consumed by the proxy_credential property if both this property and proxy_password are set\"\n+\n+ property :proxy_password, String,\n+ description: \"The password to use to build a proxy credential with. Will be consumed by the proxy_credential property if both this property and proxy_user are set\"\n+\n+ load_current_value do\n+ current_state = is_choco_installed?\n+ current_value_does_not_exist! if current_state == false\n+ current_state\n+ end\n+\n+ def is_choco_installed?\n+ ::File.exist?(\"#{ENV[\"ALLUSERSPROFILE\"]}\\\\chocolatey\\\\bin\\\\choco.exe\")\n+ end\n+\n+ def get_choco_version\n+ powershell_exec(\"choco --version\").result\n+ end\n+\n+ def existing_version", + "comment_created_at": "2023-10-24T06:06:30+00:00", + "comment_author": "jaymzh", + "comment_body": "Does it conform to gem versions? We have other version helpers if not. Gem versions are very strict.", + "pr_file_module": null + }, + { + "comment_id": "1370147997", + "repo_full_name": "chef/chef", + "pr_number": 14043, + "pr_file": "lib/chef/resource/chocolatey_installer.rb", + "discussion_id": "1369665177", + "commented_code": "@@ -0,0 +1,198 @@\n+class Chef\n+ class Resource\n+ class ChocolateyInstaller < Chef::Resource\n+ provides :chocolatey_installer\n+\n+ description \"Use the Chocolatey Installer resource to ensure that Choco is installed to your specification. Use the Chocolatey Feature resource to customize your install\"\n+ introduced \"18.3\"\n+ examples <<~DOC\n+ **Install Chocolatey**\n+\n+ ```ruby\n+ chocolatey_installer 'latest' do\n+ action :install\n+ end\n+ ```\n+\n+ **Uninstall Chocolatey**\n+\n+ ```ruby\n+ chocolatey_installer 'Some random verbiage' do\n+ action :uninstall\n+ end\n+ ```\n+\n+ **Install Chocolatey with Parameters**\n+\n+ ```ruby\n+ chocolatey_installer 'latest' do\n+ action :install\n+ download_url \"https://www.contoso.com/foo\"\n+ chocolatey_version '2.12.24'\n+ end\n+ ```\n+\n+ **Upgrade Chocolatey with Parameters**\n+\n+ ```ruby\n+ chocolatey_installer 'latest' do\n+ action :upgrade\n+ chocolatey_version '2.12.24'\n+ end\n+ ```\n+ DOC\n+\n+ allowed_actions :install, :uninstall, :upgrade\n+\n+ property :download_url, String,\n+ description: \"The URL to download Chocolatey from. This defaults to the value of $env:ChocolateyDownloadUrl, if it is set, and otherwise falls back to the official Chocolatey community repository to download the Chocolatey package. It can be used for offline installation by providing a path to a Chocolatey.nupkg.\"\n+\n+ property :chocolatey_version, String,\n+ description: \"Specifies a target version of Chocolatey to install. By default, the latest stable version is installed. This will use the value in $env:ChocolateyVersion by default, if that environment variable is present. This parameter is ignored if download_url is set.\"\n+\n+ property :use_native_unzip, [TrueClass, FalseClass], default: false,\n+ description: \"If set, uses built-in Windows decompression tools instead of 7zip when unpacking the downloaded nupkg. This will be set by default if use_native_unzip is set to a value other than 'false' or '0'. This parameter will be ignored in PS 5+ in favour of using the Expand-Archive built in PowerShell cmdlet directly.\"\n+\n+ property :ignore_proxy, [TrueClass, FalseClass], default: false,\n+ description: \"If set, ignores any configured proxy. This will override any proxy environment variables or parameters. This will be set by default if ignore_proxy is set to a value other than 'false' or '0'.\"\n+\n+ property :proxy_url, String,\n+ description: \"Specifies the proxy URL to use during the download.\"\n+\n+ property :proxy_user, String,\n+ description: \"The username to use to build a proxy credential with. Will be consumed by the proxy_credential property if both this property and proxy_password are set\"\n+\n+ property :proxy_password, String,\n+ description: \"The password to use to build a proxy credential with. Will be consumed by the proxy_credential property if both this property and proxy_user are set\"\n+\n+ load_current_value do\n+ current_state = is_choco_installed?\n+ current_value_does_not_exist! if current_state == false\n+ current_state\n+ end\n+\n+ def is_choco_installed?\n+ ::File.exist?(\"#{ENV[\"ALLUSERSPROFILE\"]}\\\\chocolatey\\\\bin\\\\choco.exe\")\n+ end\n+\n+ def get_choco_version\n+ powershell_exec(\"choco --version\").result\n+ end\n+\n+ def existing_version", + "comment_created_at": "2023-10-24T13:22:33+00:00", + "comment_author": "johnmccrae", + "comment_body": "I will convert that over then, I was just using that to have a consistent way of matching versions", + "pr_file_module": null + }, + { + "comment_id": "1370235896", + "repo_full_name": "chef/chef", + "pr_number": 14043, + "pr_file": "lib/chef/resource/chocolatey_installer.rb", + "discussion_id": "1369665177", + "commented_code": "@@ -0,0 +1,198 @@\n+class Chef\n+ class Resource\n+ class ChocolateyInstaller < Chef::Resource\n+ provides :chocolatey_installer\n+\n+ description \"Use the Chocolatey Installer resource to ensure that Choco is installed to your specification. Use the Chocolatey Feature resource to customize your install\"\n+ introduced \"18.3\"\n+ examples <<~DOC\n+ **Install Chocolatey**\n+\n+ ```ruby\n+ chocolatey_installer 'latest' do\n+ action :install\n+ end\n+ ```\n+\n+ **Uninstall Chocolatey**\n+\n+ ```ruby\n+ chocolatey_installer 'Some random verbiage' do\n+ action :uninstall\n+ end\n+ ```\n+\n+ **Install Chocolatey with Parameters**\n+\n+ ```ruby\n+ chocolatey_installer 'latest' do\n+ action :install\n+ download_url \"https://www.contoso.com/foo\"\n+ chocolatey_version '2.12.24'\n+ end\n+ ```\n+\n+ **Upgrade Chocolatey with Parameters**\n+\n+ ```ruby\n+ chocolatey_installer 'latest' do\n+ action :upgrade\n+ chocolatey_version '2.12.24'\n+ end\n+ ```\n+ DOC\n+\n+ allowed_actions :install, :uninstall, :upgrade\n+\n+ property :download_url, String,\n+ description: \"The URL to download Chocolatey from. This defaults to the value of $env:ChocolateyDownloadUrl, if it is set, and otherwise falls back to the official Chocolatey community repository to download the Chocolatey package. It can be used for offline installation by providing a path to a Chocolatey.nupkg.\"\n+\n+ property :chocolatey_version, String,\n+ description: \"Specifies a target version of Chocolatey to install. By default, the latest stable version is installed. This will use the value in $env:ChocolateyVersion by default, if that environment variable is present. This parameter is ignored if download_url is set.\"\n+\n+ property :use_native_unzip, [TrueClass, FalseClass], default: false,\n+ description: \"If set, uses built-in Windows decompression tools instead of 7zip when unpacking the downloaded nupkg. This will be set by default if use_native_unzip is set to a value other than 'false' or '0'. This parameter will be ignored in PS 5+ in favour of using the Expand-Archive built in PowerShell cmdlet directly.\"\n+\n+ property :ignore_proxy, [TrueClass, FalseClass], default: false,\n+ description: \"If set, ignores any configured proxy. This will override any proxy environment variables or parameters. This will be set by default if ignore_proxy is set to a value other than 'false' or '0'.\"\n+\n+ property :proxy_url, String,\n+ description: \"Specifies the proxy URL to use during the download.\"\n+\n+ property :proxy_user, String,\n+ description: \"The username to use to build a proxy credential with. Will be consumed by the proxy_credential property if both this property and proxy_password are set\"\n+\n+ property :proxy_password, String,\n+ description: \"The password to use to build a proxy credential with. Will be consumed by the proxy_credential property if both this property and proxy_user are set\"\n+\n+ load_current_value do\n+ current_state = is_choco_installed?\n+ current_value_does_not_exist! if current_state == false\n+ current_state\n+ end\n+\n+ def is_choco_installed?\n+ ::File.exist?(\"#{ENV[\"ALLUSERSPROFILE\"]}\\\\chocolatey\\\\bin\\\\choco.exe\")\n+ end\n+\n+ def get_choco_version\n+ powershell_exec(\"choco --version\").result\n+ end\n+\n+ def existing_version", + "comment_created_at": "2023-10-24T14:05:49+00:00", + "comment_author": "johnmccrae", + "comment_body": "BTW, got a pointer for me? Everything I read says to use Gem::Version", + "pr_file_module": null + }, + { + "comment_id": "1370368983", + "repo_full_name": "chef/chef", + "pr_number": 14043, + "pr_file": "lib/chef/resource/chocolatey_installer.rb", + "discussion_id": "1369665177", + "commented_code": "@@ -0,0 +1,198 @@\n+class Chef\n+ class Resource\n+ class ChocolateyInstaller < Chef::Resource\n+ provides :chocolatey_installer\n+\n+ description \"Use the Chocolatey Installer resource to ensure that Choco is installed to your specification. Use the Chocolatey Feature resource to customize your install\"\n+ introduced \"18.3\"\n+ examples <<~DOC\n+ **Install Chocolatey**\n+\n+ ```ruby\n+ chocolatey_installer 'latest' do\n+ action :install\n+ end\n+ ```\n+\n+ **Uninstall Chocolatey**\n+\n+ ```ruby\n+ chocolatey_installer 'Some random verbiage' do\n+ action :uninstall\n+ end\n+ ```\n+\n+ **Install Chocolatey with Parameters**\n+\n+ ```ruby\n+ chocolatey_installer 'latest' do\n+ action :install\n+ download_url \"https://www.contoso.com/foo\"\n+ chocolatey_version '2.12.24'\n+ end\n+ ```\n+\n+ **Upgrade Chocolatey with Parameters**\n+\n+ ```ruby\n+ chocolatey_installer 'latest' do\n+ action :upgrade\n+ chocolatey_version '2.12.24'\n+ end\n+ ```\n+ DOC\n+\n+ allowed_actions :install, :uninstall, :upgrade\n+\n+ property :download_url, String,\n+ description: \"The URL to download Chocolatey from. This defaults to the value of $env:ChocolateyDownloadUrl, if it is set, and otherwise falls back to the official Chocolatey community repository to download the Chocolatey package. It can be used for offline installation by providing a path to a Chocolatey.nupkg.\"\n+\n+ property :chocolatey_version, String,\n+ description: \"Specifies a target version of Chocolatey to install. By default, the latest stable version is installed. This will use the value in $env:ChocolateyVersion by default, if that environment variable is present. This parameter is ignored if download_url is set.\"\n+\n+ property :use_native_unzip, [TrueClass, FalseClass], default: false,\n+ description: \"If set, uses built-in Windows decompression tools instead of 7zip when unpacking the downloaded nupkg. This will be set by default if use_native_unzip is set to a value other than 'false' or '0'. This parameter will be ignored in PS 5+ in favour of using the Expand-Archive built in PowerShell cmdlet directly.\"\n+\n+ property :ignore_proxy, [TrueClass, FalseClass], default: false,\n+ description: \"If set, ignores any configured proxy. This will override any proxy environment variables or parameters. This will be set by default if ignore_proxy is set to a value other than 'false' or '0'.\"\n+\n+ property :proxy_url, String,\n+ description: \"Specifies the proxy URL to use during the download.\"\n+\n+ property :proxy_user, String,\n+ description: \"The username to use to build a proxy credential with. Will be consumed by the proxy_credential property if both this property and proxy_password are set\"\n+\n+ property :proxy_password, String,\n+ description: \"The password to use to build a proxy credential with. Will be consumed by the proxy_credential property if both this property and proxy_user are set\"\n+\n+ load_current_value do\n+ current_state = is_choco_installed?\n+ current_value_does_not_exist! if current_state == false\n+ current_state\n+ end\n+\n+ def is_choco_installed?\n+ ::File.exist?(\"#{ENV[\"ALLUSERSPROFILE\"]}\\\\chocolatey\\\\bin\\\\choco.exe\")\n+ end\n+\n+ def get_choco_version\n+ powershell_exec(\"choco --version\").result\n+ end\n+\n+ def existing_version", + "comment_created_at": "2023-10-24T15:06:29+00:00", + "comment_author": "tpowell-progress", + "comment_body": "If we're just comparing two *specific* gem versions, `Gem::Version` sounds good to me. If we're looking to have more general version specifiers (`~>`, etc...), it might still work, but we might want to consider how the user would expect things to work in context.", + "pr_file_module": null + }, + { + "comment_id": "1370489394", + "repo_full_name": "chef/chef", + "pr_number": 14043, + "pr_file": "lib/chef/resource/chocolatey_installer.rb", + "discussion_id": "1369665177", + "commented_code": "@@ -0,0 +1,198 @@\n+class Chef\n+ class Resource\n+ class ChocolateyInstaller < Chef::Resource\n+ provides :chocolatey_installer\n+\n+ description \"Use the Chocolatey Installer resource to ensure that Choco is installed to your specification. Use the Chocolatey Feature resource to customize your install\"\n+ introduced \"18.3\"\n+ examples <<~DOC\n+ **Install Chocolatey**\n+\n+ ```ruby\n+ chocolatey_installer 'latest' do\n+ action :install\n+ end\n+ ```\n+\n+ **Uninstall Chocolatey**\n+\n+ ```ruby\n+ chocolatey_installer 'Some random verbiage' do\n+ action :uninstall\n+ end\n+ ```\n+\n+ **Install Chocolatey with Parameters**\n+\n+ ```ruby\n+ chocolatey_installer 'latest' do\n+ action :install\n+ download_url \"https://www.contoso.com/foo\"\n+ chocolatey_version '2.12.24'\n+ end\n+ ```\n+\n+ **Upgrade Chocolatey with Parameters**\n+\n+ ```ruby\n+ chocolatey_installer 'latest' do\n+ action :upgrade\n+ chocolatey_version '2.12.24'\n+ end\n+ ```\n+ DOC\n+\n+ allowed_actions :install, :uninstall, :upgrade\n+\n+ property :download_url, String,\n+ description: \"The URL to download Chocolatey from. This defaults to the value of $env:ChocolateyDownloadUrl, if it is set, and otherwise falls back to the official Chocolatey community repository to download the Chocolatey package. It can be used for offline installation by providing a path to a Chocolatey.nupkg.\"\n+\n+ property :chocolatey_version, String,\n+ description: \"Specifies a target version of Chocolatey to install. By default, the latest stable version is installed. This will use the value in $env:ChocolateyVersion by default, if that environment variable is present. This parameter is ignored if download_url is set.\"\n+\n+ property :use_native_unzip, [TrueClass, FalseClass], default: false,\n+ description: \"If set, uses built-in Windows decompression tools instead of 7zip when unpacking the downloaded nupkg. This will be set by default if use_native_unzip is set to a value other than 'false' or '0'. This parameter will be ignored in PS 5+ in favour of using the Expand-Archive built in PowerShell cmdlet directly.\"\n+\n+ property :ignore_proxy, [TrueClass, FalseClass], default: false,\n+ description: \"If set, ignores any configured proxy. This will override any proxy environment variables or parameters. This will be set by default if ignore_proxy is set to a value other than 'false' or '0'.\"\n+\n+ property :proxy_url, String,\n+ description: \"Specifies the proxy URL to use during the download.\"\n+\n+ property :proxy_user, String,\n+ description: \"The username to use to build a proxy credential with. Will be consumed by the proxy_credential property if both this property and proxy_password are set\"\n+\n+ property :proxy_password, String,\n+ description: \"The password to use to build a proxy credential with. Will be consumed by the proxy_credential property if both this property and proxy_user are set\"\n+\n+ load_current_value do\n+ current_state = is_choco_installed?\n+ current_value_does_not_exist! if current_state == false\n+ current_state\n+ end\n+\n+ def is_choco_installed?\n+ ::File.exist?(\"#{ENV[\"ALLUSERSPROFILE\"]}\\\\chocolatey\\\\bin\\\\choco.exe\")\n+ end\n+\n+ def get_choco_version\n+ powershell_exec(\"choco --version\").result\n+ end\n+\n+ def existing_version", + "comment_created_at": "2023-10-24T16:23:19+00:00", + "comment_author": "johnmccrae", + "comment_body": "I'm a total dork, ignore me. I understand now. I was overthinking this.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1092455427", + "pr_number": 13535, + "pr_file": "lib/chef/resource/apt_repository.rb", + "created_at": "2023-01-31T20:37:07+00:00", + "commented_code": "end.compact\n end\n\n # run the specified command and extract the public key ids\n # accepts the command so it can be used to extract both the current keys\n # and the new keys\n # @param [Array] cmd the command to run\n #\n # @return [Array] an array of key ids\n def extract_public_keys_from_cmd(*cmd)\n so = shell_out(*cmd)\n # Sample output\n # pub:-:4096:1:D94AA3F0EFE21092:1336774248:::-:::scSC::::::23::0:\n so.stdout.split(/\\n/).map do |t|\n z = t.match(/(^pub:.?:\\d*:\\d*:\\w*:[\\d-]*):/)", + "repo_full_name": "chef/chef", + "discussion_comments": [ + { + "comment_id": "1092455427", + "repo_full_name": "chef/chef", + "pr_number": 13535, + "pr_file": "lib/chef/resource/apt_repository.rb", + "discussion_id": "1092455427", + "commented_code": "@@ -187,6 +187,22 @@ def extract_fingerprints_from_cmd(*cmd)\n end.compact\n end\n \n+ # run the specified command and extract the public key ids\n+ # accepts the command so it can be used to extract both the current keys\n+ # and the new keys\n+ # @param [Array] cmd the command to run\n+ #\n+ # @return [Array] an array of key ids\n+ def extract_public_keys_from_cmd(*cmd)\n+ so = shell_out(*cmd)\n+ # Sample output\n+ # pub:-:4096:1:D94AA3F0EFE21092:1336774248:::-:::scSC::::::23::0:\n+ so.stdout.split(/\\n/).map do |t|\n+ z = t.match(/(^pub:.?:\\d*:\\d*:\\w*:[\\d-]*):/)", + "comment_created_at": "2023-01-31T20:37:07+00:00", + "comment_author": "tpowell-progress", + "comment_body": "Suggest splitting on the `':'` and grabbing the first `n` fields rejoined with `':'` ... It's a little bit harder to sort out how many fields are being grabbed with the regex especially with most of this under the capture group, and the regex isn't performing significant validation, etc...", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "930358767", + "pr_number": 13069, + "pr_file": "lib/chef/property.rb", + "created_at": "2022-07-26T19:56:06+00:00", + "commented_code": "#\n def required?(action = nil)\n if !action.nil? && options[:required].is_a?(Array)\n options[:required].include?(action)\n (options[:required] & Array(action)).any?", + "repo_full_name": "chef/chef", + "discussion_comments": [ + { + "comment_id": "930358767", + "repo_full_name": "chef/chef", + "pr_number": 13069, + "pr_file": "lib/chef/property.rb", + "discussion_id": "930358767", + "commented_code": "@@ -305,7 +305,7 @@ def has_default?\n #\n def required?(action = nil)\n if !action.nil? && options[:required].is_a?(Array)\n- options[:required].include?(action)\n+ (options[:required] & Array(action)).any?", + "comment_created_at": "2022-07-26T19:56:06+00:00", + "comment_author": "marcparadise", + "comment_body": "How does this functionally differ from the original check? When running through some scenarios, I can't come up with one that returns a different result given pre-conditions of a non-nil action and :required having an Array. ", + "pr_file_module": null + }, + { + "comment_id": "934692971", + "repo_full_name": "chef/chef", + "pr_number": 13069, + "pr_file": "lib/chef/property.rb", + "discussion_id": "930358767", + "commented_code": "@@ -305,7 +305,7 @@ def has_default?\n #\n def required?(action = nil)\n if !action.nil? && options[:required].is_a?(Array)\n- options[:required].include?(action)\n+ (options[:required] & Array(action)).any?", + "comment_created_at": "2022-08-01T16:05:49+00:00", + "comment_author": "sabat", + "comment_body": "> How does this functionally differ from the original check?\r\n\r\nThe original cannot handle a check for multiple actions. It's fine if it's checking for just :create for example, but if `action` is an array with both :create and :delete it would fail with a simple `include?` test. I wrote this over a year ago and the details of the context now escape me, but IIRC there are situations where this needs to test for multiple actions—where `action` is going to be an array of actions, and `include?` only tests for the presence of a single object.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "438615154", + "pr_number": 9976, + "pr_file": "lib/chef/provider/package/apt.rb", + "created_at": "2020-06-11T08:09:39+00:00", + "commented_code": "end\n\n def install_package(name, version)\n name = check_availability(name)", + "repo_full_name": "chef/chef", + "discussion_comments": [ + { + "comment_id": "438615154", + "repo_full_name": "chef/chef", + "pr_number": 9976, + "pr_file": "lib/chef/provider/package/apt.rb", + "discussion_id": "438615154", + "commented_code": "@@ -89,6 +89,7 @@ def locked_packages\n end\n \n def install_package(name, version)\n+ name = check_availability(name)", + "comment_created_at": "2020-06-11T08:09:39+00:00", + "comment_author": "jaymzh", + "comment_body": "This only fixes... part of the bug.\r\n\r\nIt will ensure that if you try to purge `['a', 'b', 'c']` and `c` isn't a known package, that it won't try to purge `c`... which is good...\r\n\r\nBut the actual bug is that if `a` is installed, it'll automatically try to uninstall `b`, and `c` even if they are not installed.\r\n\r\nSo imagine:\r\n* a - installed\r\n* b - uninstalled, but available\r\n* c - uninstalled and unavailable\r\n\r\nCurrent behavior is Chef will try to purge `a`, `b` and `c`\r\n\r\nWith your PR it'll just try to purge `a` and `b`\r\n\r\nBut the *right* behavior is that it should only try to purge `a`.", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/chef-choose-semantic-algorithms.md b/_reviewers/chef-choose-semantic-algorithms.md index 326f7aa..e84c9d9 100644 --- a/_reviewers/chef-choose-semantic-algorithms.md +++ b/_reviewers/chef-choose-semantic-algorithms.md @@ -43,211 +43,3 @@ options[:required].include?(action) ``` Remember that algorithm selection significantly impacts code reliability, maintainability, and performance. Choose algorithms that reflect the true intent of the operation rather than just what seems convenient. - - -[ - { - "discussion_id": "2098810301", - "pr_number": 14794, - "pr_file": "lib/chef/resource/helpers/path_helpers.rb", - "created_at": "2025-05-20T20:24:51+00:00", - "commented_code": "require \"chef-utils/dist\" unless defined?(ChefUtils::Dist)\nclass Chef\n module ResourceHelpers\n # Helpers for path manipulation\n module PathHelpers\n extend self\n # The habitat binary path for Infra Client\n # @return [String]\n def chef_client_hab_binary_path\n # Find the most recent version by listing directories\n # This is heavy operation and should be avoided but currently habitat does not create a symlink by default\n # and binlink will be created only if `binlink` option is passed so we cannot assume binlink will be present.\n windows = RUBY_PLATFORM =~ /mswin|mingw|windows/ || defined?(ChefUtils) && ChefUtils.windows?\n base_path = \"/hab/pkgs/chef/#{ChefUtils::Dist::Infra::HABITAT_PKG}\"\n base_path = \"C:/#{base_path}\" if windows\n if File.directory?(base_path)\n # Get all version directories\n versions = Dir.glob(\"#{base_path}/*\").select { |d| File.directory?(d) }\n\n if versions.any?\n # Get the latest version (based on modification time)\n latest_version_dir = versions.max_by { |v| File.mtime(v) }", - "repo_full_name": "chef/chef", - "discussion_comments": [ - { - "comment_id": "2098810301", - "repo_full_name": "chef/chef", - "pr_number": 14794, - "pr_file": "lib/chef/resource/helpers/path_helpers.rb", - "discussion_id": "2098810301", - "commented_code": "@@ -0,0 +1,41 @@\n+require \"chef-utils/dist\" unless defined?(ChefUtils::Dist)\n+class Chef\n+ module ResourceHelpers\n+ # Helpers for path manipulation\n+ module PathHelpers\n+ extend self\n+ # The habitat binary path for Infra Client\n+ # @return [String]\n+ def chef_client_hab_binary_path\n+ # Find the most recent version by listing directories\n+ # This is heavy operation and should be avoided but currently habitat does not create a symlink by default\n+ # and binlink will be created only if `binlink` option is passed so we cannot assume binlink will be present.\n+ windows = RUBY_PLATFORM =~ /mswin|mingw|windows/ || defined?(ChefUtils) && ChefUtils.windows?\n+ base_path = \"/hab/pkgs/chef/#{ChefUtils::Dist::Infra::HABITAT_PKG}\"\n+ base_path = \"C:/#{base_path}\" if windows\n+ if File.directory?(base_path)\n+ # Get all version directories\n+ versions = Dir.glob(\"#{base_path}/*\").select { |d| File.directory?(d) }\n+\n+ if versions.any?\n+ # Get the latest version (based on modification time)\n+ latest_version_dir = versions.max_by { |v| File.mtime(v) }", - "comment_created_at": "2025-05-20T20:24:51+00:00", - "comment_author": "tpowell-progress", - "comment_body": "This is potentially problematic and not necessarily deterministic in that it will likely pick the most recent install vs. probably the intended \"most recent version\" for this logic. Further, although unlikely, any mtime update of the base package directory will place that package at the front of the line for being chosen, so if you renamed the directory or something for a test, you might end up forcing that version to always be chosen?", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1688678713", - "pr_number": 14462, - "pr_file": "lib/chef/provider/package/chocolatey.rb", - "created_at": "2024-07-23T20:29:24+00:00", - "commented_code": "# Choco V2 uses 'Search' for remote repositories and 'List' for local packages\n def query_command\n return \"list\" if get_choco_version.match?(/^1/)\n return \"list\" if get_choco_version.match?(/^1/) && !get_choco_version.match?(/^1.4/)", - "repo_full_name": "chef/chef", - "discussion_comments": [ - { - "comment_id": "1688678713", - "repo_full_name": "chef/chef", - "pr_number": 14462, - "pr_file": "lib/chef/provider/package/chocolatey.rb", - "discussion_id": "1688678713", - "commented_code": "@@ -153,7 +153,7 @@ def get_choco_version\n \n # Choco V2 uses 'Search' for remote repositories and 'List' for local packages\n def query_command\n- return \"list\" if get_choco_version.match?(/^1/)\n+ return \"list\" if get_choco_version.match?(/^1/) && !get_choco_version.match?(/^1.4/)", - "comment_created_at": "2024-07-23T20:29:24+00:00", - "comment_author": "jaymzh", - "comment_body": "This will break again when 1.5 comes out. We should be a bit more thorough here. Maybe something like...\r\n\r\n```suggestion\r\n v_bits = get_choco_version.split('.')\r\n # 1.4+ and 2+\r\n if v_bits[0].to_i > 1 || v_bits[1] > 3\r\n return \"search\"\r\n end\r\n \"list\"\r\n```", - "pr_file_module": null - }, - { - "comment_id": "1693325682", - "repo_full_name": "chef/chef", - "pr_number": 14462, - "pr_file": "lib/chef/provider/package/chocolatey.rb", - "discussion_id": "1688678713", - "commented_code": "@@ -153,7 +153,7 @@ def get_choco_version\n \n # Choco V2 uses 'Search' for remote repositories and 'List' for local packages\n def query_command\n- return \"list\" if get_choco_version.match?(/^1/)\n+ return \"list\" if get_choco_version.match?(/^1/) && !get_choco_version.match?(/^1.4/)", - "comment_created_at": "2024-07-26T16:29:48+00:00", - "comment_author": "tpowell-progress", - "comment_body": "@jaymzh I'll raise you a `Gem::Dependency.new('', '< 1.4.0').match?('', get_choco_version)`", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1369665177", - "pr_number": 14043, - "pr_file": "lib/chef/resource/chocolatey_installer.rb", - "created_at": "2023-10-24T06:06:30+00:00", - "commented_code": "class Chef\n class Resource\n class ChocolateyInstaller < Chef::Resource\n provides :chocolatey_installer\n\n description \"Use the Chocolatey Installer resource to ensure that Choco is installed to your specification. Use the Chocolatey Feature resource to customize your install\"\n introduced \"18.3\"\n examples <<~DOC\n **Install Chocolatey**\n\n ```ruby\n chocolatey_installer 'latest' do\n action :install\n end\n ```\n\n **Uninstall Chocolatey**\n\n ```ruby\n chocolatey_installer 'Some random verbiage' do\n action :uninstall\n end\n ```\n\n **Install Chocolatey with Parameters**\n\n ```ruby\n chocolatey_installer 'latest' do\n action :install\n download_url \"https://www.contoso.com/foo\"\n chocolatey_version '2.12.24'\n end\n ```\n\n **Upgrade Chocolatey with Parameters**\n\n ```ruby\n chocolatey_installer 'latest' do\n action :upgrade\n chocolatey_version '2.12.24'\n end\n ```\n DOC\n\n allowed_actions :install, :uninstall, :upgrade\n\n property :download_url, String,\n description: \"The URL to download Chocolatey from. This defaults to the value of $env:ChocolateyDownloadUrl, if it is set, and otherwise falls back to the official Chocolatey community repository to download the Chocolatey package. It can be used for offline installation by providing a path to a Chocolatey.nupkg.\"\n\n property :chocolatey_version, String,\n description: \"Specifies a target version of Chocolatey to install. By default, the latest stable version is installed. This will use the value in $env:ChocolateyVersion by default, if that environment variable is present. This parameter is ignored if download_url is set.\"\n\n property :use_native_unzip, [TrueClass, FalseClass], default: false,\n description: \"If set, uses built-in Windows decompression tools instead of 7zip when unpacking the downloaded nupkg. This will be set by default if use_native_unzip is set to a value other than 'false' or '0'. This parameter will be ignored in PS 5+ in favour of using the Expand-Archive built in PowerShell cmdlet directly.\"\n\n property :ignore_proxy, [TrueClass, FalseClass], default: false,\n description: \"If set, ignores any configured proxy. This will override any proxy environment variables or parameters. This will be set by default if ignore_proxy is set to a value other than 'false' or '0'.\"\n\n property :proxy_url, String,\n description: \"Specifies the proxy URL to use during the download.\"\n\n property :proxy_user, String,\n description: \"The username to use to build a proxy credential with. Will be consumed by the proxy_credential property if both this property and proxy_password are set\"\n\n property :proxy_password, String,\n description: \"The password to use to build a proxy credential with. Will be consumed by the proxy_credential property if both this property and proxy_user are set\"\n\n load_current_value do\n current_state = is_choco_installed?\n current_value_does_not_exist! if current_state == false\n current_state\n end\n\n def is_choco_installed?\n ::File.exist?(\"#{ENV[\"ALLUSERSPROFILE\"]}\\\\chocolatey\\\\bin\\\\choco.exe\")\n end\n\n def get_choco_version\n powershell_exec(\"choco --version\").result\n end\n\n def existing_version", - "repo_full_name": "chef/chef", - "discussion_comments": [ - { - "comment_id": "1369665177", - "repo_full_name": "chef/chef", - "pr_number": 14043, - "pr_file": "lib/chef/resource/chocolatey_installer.rb", - "discussion_id": "1369665177", - "commented_code": "@@ -0,0 +1,198 @@\n+class Chef\n+ class Resource\n+ class ChocolateyInstaller < Chef::Resource\n+ provides :chocolatey_installer\n+\n+ description \"Use the Chocolatey Installer resource to ensure that Choco is installed to your specification. Use the Chocolatey Feature resource to customize your install\"\n+ introduced \"18.3\"\n+ examples <<~DOC\n+ **Install Chocolatey**\n+\n+ ```ruby\n+ chocolatey_installer 'latest' do\n+ action :install\n+ end\n+ ```\n+\n+ **Uninstall Chocolatey**\n+\n+ ```ruby\n+ chocolatey_installer 'Some random verbiage' do\n+ action :uninstall\n+ end\n+ ```\n+\n+ **Install Chocolatey with Parameters**\n+\n+ ```ruby\n+ chocolatey_installer 'latest' do\n+ action :install\n+ download_url \"https://www.contoso.com/foo\"\n+ chocolatey_version '2.12.24'\n+ end\n+ ```\n+\n+ **Upgrade Chocolatey with Parameters**\n+\n+ ```ruby\n+ chocolatey_installer 'latest' do\n+ action :upgrade\n+ chocolatey_version '2.12.24'\n+ end\n+ ```\n+ DOC\n+\n+ allowed_actions :install, :uninstall, :upgrade\n+\n+ property :download_url, String,\n+ description: \"The URL to download Chocolatey from. This defaults to the value of $env:ChocolateyDownloadUrl, if it is set, and otherwise falls back to the official Chocolatey community repository to download the Chocolatey package. It can be used for offline installation by providing a path to a Chocolatey.nupkg.\"\n+\n+ property :chocolatey_version, String,\n+ description: \"Specifies a target version of Chocolatey to install. By default, the latest stable version is installed. This will use the value in $env:ChocolateyVersion by default, if that environment variable is present. This parameter is ignored if download_url is set.\"\n+\n+ property :use_native_unzip, [TrueClass, FalseClass], default: false,\n+ description: \"If set, uses built-in Windows decompression tools instead of 7zip when unpacking the downloaded nupkg. This will be set by default if use_native_unzip is set to a value other than 'false' or '0'. This parameter will be ignored in PS 5+ in favour of using the Expand-Archive built in PowerShell cmdlet directly.\"\n+\n+ property :ignore_proxy, [TrueClass, FalseClass], default: false,\n+ description: \"If set, ignores any configured proxy. This will override any proxy environment variables or parameters. This will be set by default if ignore_proxy is set to a value other than 'false' or '0'.\"\n+\n+ property :proxy_url, String,\n+ description: \"Specifies the proxy URL to use during the download.\"\n+\n+ property :proxy_user, String,\n+ description: \"The username to use to build a proxy credential with. Will be consumed by the proxy_credential property if both this property and proxy_password are set\"\n+\n+ property :proxy_password, String,\n+ description: \"The password to use to build a proxy credential with. Will be consumed by the proxy_credential property if both this property and proxy_user are set\"\n+\n+ load_current_value do\n+ current_state = is_choco_installed?\n+ current_value_does_not_exist! if current_state == false\n+ current_state\n+ end\n+\n+ def is_choco_installed?\n+ ::File.exist?(\"#{ENV[\"ALLUSERSPROFILE\"]}\\\\chocolatey\\\\bin\\\\choco.exe\")\n+ end\n+\n+ def get_choco_version\n+ powershell_exec(\"choco --version\").result\n+ end\n+\n+ def existing_version", - "comment_created_at": "2023-10-24T06:06:30+00:00", - "comment_author": "jaymzh", - "comment_body": "Does it conform to gem versions? We have other version helpers if not. Gem versions are very strict.", - "pr_file_module": null - }, - { - "comment_id": "1370147997", - "repo_full_name": "chef/chef", - "pr_number": 14043, - "pr_file": "lib/chef/resource/chocolatey_installer.rb", - "discussion_id": "1369665177", - "commented_code": "@@ -0,0 +1,198 @@\n+class Chef\n+ class Resource\n+ class ChocolateyInstaller < Chef::Resource\n+ provides :chocolatey_installer\n+\n+ description \"Use the Chocolatey Installer resource to ensure that Choco is installed to your specification. Use the Chocolatey Feature resource to customize your install\"\n+ introduced \"18.3\"\n+ examples <<~DOC\n+ **Install Chocolatey**\n+\n+ ```ruby\n+ chocolatey_installer 'latest' do\n+ action :install\n+ end\n+ ```\n+\n+ **Uninstall Chocolatey**\n+\n+ ```ruby\n+ chocolatey_installer 'Some random verbiage' do\n+ action :uninstall\n+ end\n+ ```\n+\n+ **Install Chocolatey with Parameters**\n+\n+ ```ruby\n+ chocolatey_installer 'latest' do\n+ action :install\n+ download_url \"https://www.contoso.com/foo\"\n+ chocolatey_version '2.12.24'\n+ end\n+ ```\n+\n+ **Upgrade Chocolatey with Parameters**\n+\n+ ```ruby\n+ chocolatey_installer 'latest' do\n+ action :upgrade\n+ chocolatey_version '2.12.24'\n+ end\n+ ```\n+ DOC\n+\n+ allowed_actions :install, :uninstall, :upgrade\n+\n+ property :download_url, String,\n+ description: \"The URL to download Chocolatey from. This defaults to the value of $env:ChocolateyDownloadUrl, if it is set, and otherwise falls back to the official Chocolatey community repository to download the Chocolatey package. It can be used for offline installation by providing a path to a Chocolatey.nupkg.\"\n+\n+ property :chocolatey_version, String,\n+ description: \"Specifies a target version of Chocolatey to install. By default, the latest stable version is installed. This will use the value in $env:ChocolateyVersion by default, if that environment variable is present. This parameter is ignored if download_url is set.\"\n+\n+ property :use_native_unzip, [TrueClass, FalseClass], default: false,\n+ description: \"If set, uses built-in Windows decompression tools instead of 7zip when unpacking the downloaded nupkg. This will be set by default if use_native_unzip is set to a value other than 'false' or '0'. This parameter will be ignored in PS 5+ in favour of using the Expand-Archive built in PowerShell cmdlet directly.\"\n+\n+ property :ignore_proxy, [TrueClass, FalseClass], default: false,\n+ description: \"If set, ignores any configured proxy. This will override any proxy environment variables or parameters. This will be set by default if ignore_proxy is set to a value other than 'false' or '0'.\"\n+\n+ property :proxy_url, String,\n+ description: \"Specifies the proxy URL to use during the download.\"\n+\n+ property :proxy_user, String,\n+ description: \"The username to use to build a proxy credential with. Will be consumed by the proxy_credential property if both this property and proxy_password are set\"\n+\n+ property :proxy_password, String,\n+ description: \"The password to use to build a proxy credential with. Will be consumed by the proxy_credential property if both this property and proxy_user are set\"\n+\n+ load_current_value do\n+ current_state = is_choco_installed?\n+ current_value_does_not_exist! if current_state == false\n+ current_state\n+ end\n+\n+ def is_choco_installed?\n+ ::File.exist?(\"#{ENV[\"ALLUSERSPROFILE\"]}\\\\chocolatey\\\\bin\\\\choco.exe\")\n+ end\n+\n+ def get_choco_version\n+ powershell_exec(\"choco --version\").result\n+ end\n+\n+ def existing_version", - "comment_created_at": "2023-10-24T13:22:33+00:00", - "comment_author": "johnmccrae", - "comment_body": "I will convert that over then, I was just using that to have a consistent way of matching versions", - "pr_file_module": null - }, - { - "comment_id": "1370235896", - "repo_full_name": "chef/chef", - "pr_number": 14043, - "pr_file": "lib/chef/resource/chocolatey_installer.rb", - "discussion_id": "1369665177", - "commented_code": "@@ -0,0 +1,198 @@\n+class Chef\n+ class Resource\n+ class ChocolateyInstaller < Chef::Resource\n+ provides :chocolatey_installer\n+\n+ description \"Use the Chocolatey Installer resource to ensure that Choco is installed to your specification. Use the Chocolatey Feature resource to customize your install\"\n+ introduced \"18.3\"\n+ examples <<~DOC\n+ **Install Chocolatey**\n+\n+ ```ruby\n+ chocolatey_installer 'latest' do\n+ action :install\n+ end\n+ ```\n+\n+ **Uninstall Chocolatey**\n+\n+ ```ruby\n+ chocolatey_installer 'Some random verbiage' do\n+ action :uninstall\n+ end\n+ ```\n+\n+ **Install Chocolatey with Parameters**\n+\n+ ```ruby\n+ chocolatey_installer 'latest' do\n+ action :install\n+ download_url \"https://www.contoso.com/foo\"\n+ chocolatey_version '2.12.24'\n+ end\n+ ```\n+\n+ **Upgrade Chocolatey with Parameters**\n+\n+ ```ruby\n+ chocolatey_installer 'latest' do\n+ action :upgrade\n+ chocolatey_version '2.12.24'\n+ end\n+ ```\n+ DOC\n+\n+ allowed_actions :install, :uninstall, :upgrade\n+\n+ property :download_url, String,\n+ description: \"The URL to download Chocolatey from. This defaults to the value of $env:ChocolateyDownloadUrl, if it is set, and otherwise falls back to the official Chocolatey community repository to download the Chocolatey package. It can be used for offline installation by providing a path to a Chocolatey.nupkg.\"\n+\n+ property :chocolatey_version, String,\n+ description: \"Specifies a target version of Chocolatey to install. By default, the latest stable version is installed. This will use the value in $env:ChocolateyVersion by default, if that environment variable is present. This parameter is ignored if download_url is set.\"\n+\n+ property :use_native_unzip, [TrueClass, FalseClass], default: false,\n+ description: \"If set, uses built-in Windows decompression tools instead of 7zip when unpacking the downloaded nupkg. This will be set by default if use_native_unzip is set to a value other than 'false' or '0'. This parameter will be ignored in PS 5+ in favour of using the Expand-Archive built in PowerShell cmdlet directly.\"\n+\n+ property :ignore_proxy, [TrueClass, FalseClass], default: false,\n+ description: \"If set, ignores any configured proxy. This will override any proxy environment variables or parameters. This will be set by default if ignore_proxy is set to a value other than 'false' or '0'.\"\n+\n+ property :proxy_url, String,\n+ description: \"Specifies the proxy URL to use during the download.\"\n+\n+ property :proxy_user, String,\n+ description: \"The username to use to build a proxy credential with. Will be consumed by the proxy_credential property if both this property and proxy_password are set\"\n+\n+ property :proxy_password, String,\n+ description: \"The password to use to build a proxy credential with. Will be consumed by the proxy_credential property if both this property and proxy_user are set\"\n+\n+ load_current_value do\n+ current_state = is_choco_installed?\n+ current_value_does_not_exist! if current_state == false\n+ current_state\n+ end\n+\n+ def is_choco_installed?\n+ ::File.exist?(\"#{ENV[\"ALLUSERSPROFILE\"]}\\\\chocolatey\\\\bin\\\\choco.exe\")\n+ end\n+\n+ def get_choco_version\n+ powershell_exec(\"choco --version\").result\n+ end\n+\n+ def existing_version", - "comment_created_at": "2023-10-24T14:05:49+00:00", - "comment_author": "johnmccrae", - "comment_body": "BTW, got a pointer for me? Everything I read says to use Gem::Version", - "pr_file_module": null - }, - { - "comment_id": "1370368983", - "repo_full_name": "chef/chef", - "pr_number": 14043, - "pr_file": "lib/chef/resource/chocolatey_installer.rb", - "discussion_id": "1369665177", - "commented_code": "@@ -0,0 +1,198 @@\n+class Chef\n+ class Resource\n+ class ChocolateyInstaller < Chef::Resource\n+ provides :chocolatey_installer\n+\n+ description \"Use the Chocolatey Installer resource to ensure that Choco is installed to your specification. Use the Chocolatey Feature resource to customize your install\"\n+ introduced \"18.3\"\n+ examples <<~DOC\n+ **Install Chocolatey**\n+\n+ ```ruby\n+ chocolatey_installer 'latest' do\n+ action :install\n+ end\n+ ```\n+\n+ **Uninstall Chocolatey**\n+\n+ ```ruby\n+ chocolatey_installer 'Some random verbiage' do\n+ action :uninstall\n+ end\n+ ```\n+\n+ **Install Chocolatey with Parameters**\n+\n+ ```ruby\n+ chocolatey_installer 'latest' do\n+ action :install\n+ download_url \"https://www.contoso.com/foo\"\n+ chocolatey_version '2.12.24'\n+ end\n+ ```\n+\n+ **Upgrade Chocolatey with Parameters**\n+\n+ ```ruby\n+ chocolatey_installer 'latest' do\n+ action :upgrade\n+ chocolatey_version '2.12.24'\n+ end\n+ ```\n+ DOC\n+\n+ allowed_actions :install, :uninstall, :upgrade\n+\n+ property :download_url, String,\n+ description: \"The URL to download Chocolatey from. This defaults to the value of $env:ChocolateyDownloadUrl, if it is set, and otherwise falls back to the official Chocolatey community repository to download the Chocolatey package. It can be used for offline installation by providing a path to a Chocolatey.nupkg.\"\n+\n+ property :chocolatey_version, String,\n+ description: \"Specifies a target version of Chocolatey to install. By default, the latest stable version is installed. This will use the value in $env:ChocolateyVersion by default, if that environment variable is present. This parameter is ignored if download_url is set.\"\n+\n+ property :use_native_unzip, [TrueClass, FalseClass], default: false,\n+ description: \"If set, uses built-in Windows decompression tools instead of 7zip when unpacking the downloaded nupkg. This will be set by default if use_native_unzip is set to a value other than 'false' or '0'. This parameter will be ignored in PS 5+ in favour of using the Expand-Archive built in PowerShell cmdlet directly.\"\n+\n+ property :ignore_proxy, [TrueClass, FalseClass], default: false,\n+ description: \"If set, ignores any configured proxy. This will override any proxy environment variables or parameters. This will be set by default if ignore_proxy is set to a value other than 'false' or '0'.\"\n+\n+ property :proxy_url, String,\n+ description: \"Specifies the proxy URL to use during the download.\"\n+\n+ property :proxy_user, String,\n+ description: \"The username to use to build a proxy credential with. Will be consumed by the proxy_credential property if both this property and proxy_password are set\"\n+\n+ property :proxy_password, String,\n+ description: \"The password to use to build a proxy credential with. Will be consumed by the proxy_credential property if both this property and proxy_user are set\"\n+\n+ load_current_value do\n+ current_state = is_choco_installed?\n+ current_value_does_not_exist! if current_state == false\n+ current_state\n+ end\n+\n+ def is_choco_installed?\n+ ::File.exist?(\"#{ENV[\"ALLUSERSPROFILE\"]}\\\\chocolatey\\\\bin\\\\choco.exe\")\n+ end\n+\n+ def get_choco_version\n+ powershell_exec(\"choco --version\").result\n+ end\n+\n+ def existing_version", - "comment_created_at": "2023-10-24T15:06:29+00:00", - "comment_author": "tpowell-progress", - "comment_body": "If we're just comparing two *specific* gem versions, `Gem::Version` sounds good to me. If we're looking to have more general version specifiers (`~>`, etc...), it might still work, but we might want to consider how the user would expect things to work in context.", - "pr_file_module": null - }, - { - "comment_id": "1370489394", - "repo_full_name": "chef/chef", - "pr_number": 14043, - "pr_file": "lib/chef/resource/chocolatey_installer.rb", - "discussion_id": "1369665177", - "commented_code": "@@ -0,0 +1,198 @@\n+class Chef\n+ class Resource\n+ class ChocolateyInstaller < Chef::Resource\n+ provides :chocolatey_installer\n+\n+ description \"Use the Chocolatey Installer resource to ensure that Choco is installed to your specification. Use the Chocolatey Feature resource to customize your install\"\n+ introduced \"18.3\"\n+ examples <<~DOC\n+ **Install Chocolatey**\n+\n+ ```ruby\n+ chocolatey_installer 'latest' do\n+ action :install\n+ end\n+ ```\n+\n+ **Uninstall Chocolatey**\n+\n+ ```ruby\n+ chocolatey_installer 'Some random verbiage' do\n+ action :uninstall\n+ end\n+ ```\n+\n+ **Install Chocolatey with Parameters**\n+\n+ ```ruby\n+ chocolatey_installer 'latest' do\n+ action :install\n+ download_url \"https://www.contoso.com/foo\"\n+ chocolatey_version '2.12.24'\n+ end\n+ ```\n+\n+ **Upgrade Chocolatey with Parameters**\n+\n+ ```ruby\n+ chocolatey_installer 'latest' do\n+ action :upgrade\n+ chocolatey_version '2.12.24'\n+ end\n+ ```\n+ DOC\n+\n+ allowed_actions :install, :uninstall, :upgrade\n+\n+ property :download_url, String,\n+ description: \"The URL to download Chocolatey from. This defaults to the value of $env:ChocolateyDownloadUrl, if it is set, and otherwise falls back to the official Chocolatey community repository to download the Chocolatey package. It can be used for offline installation by providing a path to a Chocolatey.nupkg.\"\n+\n+ property :chocolatey_version, String,\n+ description: \"Specifies a target version of Chocolatey to install. By default, the latest stable version is installed. This will use the value in $env:ChocolateyVersion by default, if that environment variable is present. This parameter is ignored if download_url is set.\"\n+\n+ property :use_native_unzip, [TrueClass, FalseClass], default: false,\n+ description: \"If set, uses built-in Windows decompression tools instead of 7zip when unpacking the downloaded nupkg. This will be set by default if use_native_unzip is set to a value other than 'false' or '0'. This parameter will be ignored in PS 5+ in favour of using the Expand-Archive built in PowerShell cmdlet directly.\"\n+\n+ property :ignore_proxy, [TrueClass, FalseClass], default: false,\n+ description: \"If set, ignores any configured proxy. This will override any proxy environment variables or parameters. This will be set by default if ignore_proxy is set to a value other than 'false' or '0'.\"\n+\n+ property :proxy_url, String,\n+ description: \"Specifies the proxy URL to use during the download.\"\n+\n+ property :proxy_user, String,\n+ description: \"The username to use to build a proxy credential with. Will be consumed by the proxy_credential property if both this property and proxy_password are set\"\n+\n+ property :proxy_password, String,\n+ description: \"The password to use to build a proxy credential with. Will be consumed by the proxy_credential property if both this property and proxy_user are set\"\n+\n+ load_current_value do\n+ current_state = is_choco_installed?\n+ current_value_does_not_exist! if current_state == false\n+ current_state\n+ end\n+\n+ def is_choco_installed?\n+ ::File.exist?(\"#{ENV[\"ALLUSERSPROFILE\"]}\\\\chocolatey\\\\bin\\\\choco.exe\")\n+ end\n+\n+ def get_choco_version\n+ powershell_exec(\"choco --version\").result\n+ end\n+\n+ def existing_version", - "comment_created_at": "2023-10-24T16:23:19+00:00", - "comment_author": "johnmccrae", - "comment_body": "I'm a total dork, ignore me. I understand now. I was overthinking this.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1092455427", - "pr_number": 13535, - "pr_file": "lib/chef/resource/apt_repository.rb", - "created_at": "2023-01-31T20:37:07+00:00", - "commented_code": "end.compact\n end\n\n # run the specified command and extract the public key ids\n # accepts the command so it can be used to extract both the current keys\n # and the new keys\n # @param [Array] cmd the command to run\n #\n # @return [Array] an array of key ids\n def extract_public_keys_from_cmd(*cmd)\n so = shell_out(*cmd)\n # Sample output\n # pub:-:4096:1:D94AA3F0EFE21092:1336774248:::-:::scSC::::::23::0:\n so.stdout.split(/\\n/).map do |t|\n z = t.match(/(^pub:.?:\\d*:\\d*:\\w*:[\\d-]*):/)", - "repo_full_name": "chef/chef", - "discussion_comments": [ - { - "comment_id": "1092455427", - "repo_full_name": "chef/chef", - "pr_number": 13535, - "pr_file": "lib/chef/resource/apt_repository.rb", - "discussion_id": "1092455427", - "commented_code": "@@ -187,6 +187,22 @@ def extract_fingerprints_from_cmd(*cmd)\n end.compact\n end\n \n+ # run the specified command and extract the public key ids\n+ # accepts the command so it can be used to extract both the current keys\n+ # and the new keys\n+ # @param [Array] cmd the command to run\n+ #\n+ # @return [Array] an array of key ids\n+ def extract_public_keys_from_cmd(*cmd)\n+ so = shell_out(*cmd)\n+ # Sample output\n+ # pub:-:4096:1:D94AA3F0EFE21092:1336774248:::-:::scSC::::::23::0:\n+ so.stdout.split(/\\n/).map do |t|\n+ z = t.match(/(^pub:.?:\\d*:\\d*:\\w*:[\\d-]*):/)", - "comment_created_at": "2023-01-31T20:37:07+00:00", - "comment_author": "tpowell-progress", - "comment_body": "Suggest splitting on the `':'` and grabbing the first `n` fields rejoined with `':'` ... It's a little bit harder to sort out how many fields are being grabbed with the regex especially with most of this under the capture group, and the regex isn't performing significant validation, etc...", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "930358767", - "pr_number": 13069, - "pr_file": "lib/chef/property.rb", - "created_at": "2022-07-26T19:56:06+00:00", - "commented_code": "#\n def required?(action = nil)\n if !action.nil? && options[:required].is_a?(Array)\n options[:required].include?(action)\n (options[:required] & Array(action)).any?", - "repo_full_name": "chef/chef", - "discussion_comments": [ - { - "comment_id": "930358767", - "repo_full_name": "chef/chef", - "pr_number": 13069, - "pr_file": "lib/chef/property.rb", - "discussion_id": "930358767", - "commented_code": "@@ -305,7 +305,7 @@ def has_default?\n #\n def required?(action = nil)\n if !action.nil? && options[:required].is_a?(Array)\n- options[:required].include?(action)\n+ (options[:required] & Array(action)).any?", - "comment_created_at": "2022-07-26T19:56:06+00:00", - "comment_author": "marcparadise", - "comment_body": "How does this functionally differ from the original check? When running through some scenarios, I can't come up with one that returns a different result given pre-conditions of a non-nil action and :required having an Array. ", - "pr_file_module": null - }, - { - "comment_id": "934692971", - "repo_full_name": "chef/chef", - "pr_number": 13069, - "pr_file": "lib/chef/property.rb", - "discussion_id": "930358767", - "commented_code": "@@ -305,7 +305,7 @@ def has_default?\n #\n def required?(action = nil)\n if !action.nil? && options[:required].is_a?(Array)\n- options[:required].include?(action)\n+ (options[:required] & Array(action)).any?", - "comment_created_at": "2022-08-01T16:05:49+00:00", - "comment_author": "sabat", - "comment_body": "> How does this functionally differ from the original check?\r\n\r\nThe original cannot handle a check for multiple actions. It's fine if it's checking for just :create for example, but if `action` is an array with both :create and :delete it would fail with a simple `include?` test. I wrote this over a year ago and the details of the context now escape me, but IIRC there are situations where this needs to test for multiple actions\u2014where `action` is going to be an array of actions, and `include?` only tests for the presence of a single object.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "438615154", - "pr_number": 9976, - "pr_file": "lib/chef/provider/package/apt.rb", - "created_at": "2020-06-11T08:09:39+00:00", - "commented_code": "end\n\n def install_package(name, version)\n name = check_availability(name)", - "repo_full_name": "chef/chef", - "discussion_comments": [ - { - "comment_id": "438615154", - "repo_full_name": "chef/chef", - "pr_number": 9976, - "pr_file": "lib/chef/provider/package/apt.rb", - "discussion_id": "438615154", - "commented_code": "@@ -89,6 +89,7 @@ def locked_packages\n end\n \n def install_package(name, version)\n+ name = check_availability(name)", - "comment_created_at": "2020-06-11T08:09:39+00:00", - "comment_author": "jaymzh", - "comment_body": "This only fixes... part of the bug.\r\n\r\nIt will ensure that if you try to purge `['a', 'b', 'c']` and `c` isn't a known package, that it won't try to purge `c`... which is good...\r\n\r\nBut the actual bug is that if `a` is installed, it'll automatically try to uninstall `b`, and `c` even if they are not installed.\r\n\r\nSo imagine:\r\n* a - installed\r\n* b - uninstalled, but available\r\n* c - uninstalled and unavailable\r\n\r\nCurrent behavior is Chef will try to purge `a`, `b` and `c`\r\n\r\nWith your PR it'll just try to purge `a` and `b`\r\n\r\nBut the *right* behavior is that it should only try to purge `a`.", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/chef-clear-abstraction-boundaries.json b/_reviewers/chef-clear-abstraction-boundaries.json new file mode 100644 index 0000000..71715d1 --- /dev/null +++ b/_reviewers/chef-clear-abstraction-boundaries.json @@ -0,0 +1,128 @@ +[ + { + "discussion_id": "934839822", + "pr_number": 13105, + "pr_file": "knife/lib/chef/knife/cookbook_upload.rb", + "created_at": "2022-08-01T19:18:38+00:00", + "commented_code": "end\n end\n end\n { \"status\" => 200, \"message\" => \"Success\" } if @calling_request != \"CLI\"", + "repo_full_name": "chef/chef", + "discussion_comments": [ + { + "comment_id": "934839822", + "repo_full_name": "chef/chef", + "pr_number": 13105, + "pr_file": "knife/lib/chef/knife/cookbook_upload.rb", + "discussion_id": "934839822", + "commented_code": "@@ -163,6 +163,13 @@ def run\n end\n end\n end\n+ { \"status\" => 200, \"message\" => \"Success\" } if @calling_request != \"CLI\"", + "comment_created_at": "2022-08-01T19:18:38+00:00", + "comment_author": "marcparadise", + "comment_body": "\r\nThis isn't core to the knife cookbook upload command, so it seems very odd to have a CLI tool returning json as if it were a single HTTP request handler.\r\n\r\nWould it make more sense to have a command that wraps cookbook upload, and adds the necessary return information and error handling behavior? \r\n\r\n", + "pr_file_module": null + }, + { + "comment_id": "935854812", + "repo_full_name": "chef/chef", + "pr_number": 13105, + "pr_file": "knife/lib/chef/knife/cookbook_upload.rb", + "discussion_id": "934839822", + "commented_code": "@@ -163,6 +163,13 @@ def run\n end\n end\n end\n+ { \"status\" => 200, \"message\" => \"Success\" } if @calling_request != \"CLI\"", + "comment_created_at": "2022-08-02T17:32:10+00:00", + "comment_author": "sanjain-progress", + "comment_body": "Yeah agree, CLI tool returning JSON looks odd to me also. Thanks for pointing it out.\r\n\r\nI will make further changes accordingly.\r\n\r\n\r\n", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "719579329", + "pr_number": 11984, + "pr_file": "lib/chef/mixin/powershell_exec.rb", + "created_at": "2021-09-30T16:36:22+00:00", + "commented_code": "#\n # @param script [String] script to run\n # @param interpreter [Symbol] the interpreter type, `:powershell` or `:pwsh`\n # @param timeout [Integer, nil] timeout in seconds.\n # @return [Chef::PowerShell] output\n def powershell_exec(script, interpreter = :powershell)\n case interpreter\n def powershell_exec(script, options = {})\n timeout = options.fetch(:timeout, nil)\n case options.fetch(:interpreter, :powershell)\n when :powershell\n Chef::PowerShell.new(script)\n Chef::PowerShell.new(script, timeout: timeout)\n when :pwsh\n Chef::Pwsh.new(script)\n Chef::Pwsh.new(script, timeout: timeout)\n else\n raise ArgumentError, \"Expected interpreter of :powershell or :pwsh\"\n end\n end\n\n # The same as the #powershell_exec method except this will raise\n # Chef::PowerShell::CommandFailed if the command fails\n def powershell_exec!(script, interpreter = :powershell)", + "repo_full_name": "chef/chef", + "discussion_comments": [ + { + "comment_id": "719579329", + "repo_full_name": "chef/chef", + "pr_number": 11984, + "pr_file": "lib/chef/mixin/powershell_exec.rb", + "discussion_id": "719579329", + "commented_code": "@@ -104,22 +104,24 @@ module PowershellExec\n #\n # @param script [String] script to run\n # @param interpreter [Symbol] the interpreter type, `:powershell` or `:pwsh`\n+ # @param timeout [Integer, nil] timeout in seconds.\n # @return [Chef::PowerShell] output\n- def powershell_exec(script, interpreter = :powershell)\n- case interpreter\n+ def powershell_exec(script, options = {})\n+ timeout = options.fetch(:timeout, nil)\n+ case options.fetch(:interpreter, :powershell)\n when :powershell\n- Chef::PowerShell.new(script)\n+ Chef::PowerShell.new(script, timeout: timeout)\n when :pwsh\n- Chef::Pwsh.new(script)\n+ Chef::Pwsh.new(script, timeout: timeout)\n else\n raise ArgumentError, \"Expected interpreter of :powershell or :pwsh\"\n end\n end\n \n # The same as the #powershell_exec method except this will raise\n # Chef::PowerShell::CommandFailed if the command fails\n- def powershell_exec!(script, interpreter = :powershell)", + "comment_created_at": "2021-09-30T16:36:22+00:00", + "comment_author": "tas50", + "comment_body": "don't we still want the ps interpreter?", + "pr_file_module": null + }, + { + "comment_id": "719592189", + "repo_full_name": "chef/chef", + "pr_number": 11984, + "pr_file": "lib/chef/mixin/powershell_exec.rb", + "discussion_id": "719579329", + "commented_code": "@@ -104,22 +104,24 @@ module PowershellExec\n #\n # @param script [String] script to run\n # @param interpreter [Symbol] the interpreter type, `:powershell` or `:pwsh`\n+ # @param timeout [Integer, nil] timeout in seconds.\n # @return [Chef::PowerShell] output\n- def powershell_exec(script, interpreter = :powershell)\n- case interpreter\n+ def powershell_exec(script, options = {})\n+ timeout = options.fetch(:timeout, nil)\n+ case options.fetch(:interpreter, :powershell)\n when :powershell\n- Chef::PowerShell.new(script)\n+ Chef::PowerShell.new(script, timeout: timeout)\n when :pwsh\n- Chef::Pwsh.new(script)\n+ Chef::Pwsh.new(script, timeout: timeout)\n else\n raise ArgumentError, \"Expected interpreter of :powershell or :pwsh\"\n end\n end\n \n # The same as the #powershell_exec method except this will raise\n # Chef::PowerShell::CommandFailed if the command fails\n- def powershell_exec!(script, interpreter = :powershell)", + "comment_created_at": "2021-09-30T16:53:33+00:00", + "comment_author": "rishichawda", + "comment_body": "yep it is available. the options here get pass directly to `powershell_exec` call instead of individually writing down all parameters and then again passing them to the method call. https://github.com/chef/chef/blob/b2d46ab518ae79c892ff14c6e78a09acae03883a/lib/chef/mixin/powershell_exec.rb#L123-L124\r\nthis looked cleaner than doing\r\n```ruby\r\ndef powershell_exec!(script, interpreter: :powershell, timeout: -1)\r\n cmd = powershell_exec(script, interpreter: interpreter, timeout: timeout)\r\n```\r\nwe only read the values in `powershell_exec` so passing It as a hash argument made it a bit cleaner https://github.com/chef/chef/blob/b2d46ab518ae79c892ff14c6e78a09acae03883a/lib/chef/mixin/powershell_exec.rb#L109-L112", + "pr_file_module": null + }, + { + "comment_id": "741380411", + "repo_full_name": "chef/chef", + "pr_number": 11984, + "pr_file": "lib/chef/mixin/powershell_exec.rb", + "discussion_id": "719579329", + "commented_code": "@@ -104,22 +104,24 @@ module PowershellExec\n #\n # @param script [String] script to run\n # @param interpreter [Symbol] the interpreter type, `:powershell` or `:pwsh`\n+ # @param timeout [Integer, nil] timeout in seconds.\n # @return [Chef::PowerShell] output\n- def powershell_exec(script, interpreter = :powershell)\n- case interpreter\n+ def powershell_exec(script, options = {})\n+ timeout = options.fetch(:timeout, nil)\n+ case options.fetch(:interpreter, :powershell)\n when :powershell\n- Chef::PowerShell.new(script)\n+ Chef::PowerShell.new(script, timeout: timeout)\n when :pwsh\n- Chef::Pwsh.new(script)\n+ Chef::Pwsh.new(script, timeout: timeout)\n else\n raise ArgumentError, \"Expected interpreter of :powershell or :pwsh\"\n end\n end\n \n # The same as the #powershell_exec method except this will raise\n # Chef::PowerShell::CommandFailed if the command fails\n- def powershell_exec!(script, interpreter = :powershell)", + "comment_created_at": "2021-11-02T18:55:34+00:00", + "comment_author": "lamont-granquist", + "comment_body": "I think the switch from `interpreter = :powershell` to `interpreter: :powershell` is likely a breaking change.\r\n\r\nI'm going to see if I can revert that change and ship it quickly since this is now blocking john's work.", + "pr_file_module": null + }, + { + "comment_id": "741384407", + "repo_full_name": "chef/chef", + "pr_number": 11984, + "pr_file": "lib/chef/mixin/powershell_exec.rb", + "discussion_id": "719579329", + "commented_code": "@@ -104,22 +104,24 @@ module PowershellExec\n #\n # @param script [String] script to run\n # @param interpreter [Symbol] the interpreter type, `:powershell` or `:pwsh`\n+ # @param timeout [Integer, nil] timeout in seconds.\n # @return [Chef::PowerShell] output\n- def powershell_exec(script, interpreter = :powershell)\n- case interpreter\n+ def powershell_exec(script, options = {})\n+ timeout = options.fetch(:timeout, nil)\n+ case options.fetch(:interpreter, :powershell)\n when :powershell\n- Chef::PowerShell.new(script)\n+ Chef::PowerShell.new(script, timeout: timeout)\n when :pwsh\n- Chef::Pwsh.new(script)\n+ Chef::Pwsh.new(script, timeout: timeout)\n else\n raise ArgumentError, \"Expected interpreter of :powershell or :pwsh\"\n end\n end\n \n # The same as the #powershell_exec method except this will raise\n # Chef::PowerShell::CommandFailed if the command fails\n- def powershell_exec!(script, interpreter = :powershell)", + "comment_created_at": "2021-11-02T19:01:11+00:00", + "comment_author": "lamont-granquist", + "comment_body": "Also the `options = {}` shouldn't be used any more in favor of proper kwargs since it behaves differently on ruby-3.0 I think. It is also a lot easier to see the API on the method signature rather than having to look through the code (and messy method signatures are good feedback on the API being messy -- although a lot of times there's just nothing we can do about that).", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "687990671", + "pr_number": 11905, + "pr_file": "lib/chef/resource.rb", + "created_at": "2021-08-12T18:34:07+00:00", + "commented_code": "def suppress_up_to_date_messages?\n false\n end\n\n private\n\n def http_request_errors\n source.map do |msg|\n uri = URI.parse(msg)\n \"Error connecting to #{msg} - Failed to open TCP connection to #{uri.host}:#{uri.port}\"\n end\n end", + "repo_full_name": "chef/chef", + "discussion_comments": [ + { + "comment_id": "687990671", + "repo_full_name": "chef/chef", + "pr_number": 11905, + "pr_file": "lib/chef/resource.rb", + "discussion_id": "687990671", + "commented_code": "@@ -1627,6 +1635,15 @@ def lookup_provider_constant(name, action = :nothing)\n def suppress_up_to_date_messages?\n false\n end\n+\n+ private\n+\n+ def http_request_errors\n+ source.map do |msg|\n+ uri = URI.parse(msg)\n+ \"Error connecting to #{msg} - Failed to open TCP connection to #{uri.host}:#{uri.port}\"\n+ end\n+ end", + "comment_created_at": "2021-08-12T18:34:07+00:00", + "comment_author": "lamont-granquist", + "comment_body": "all the code here in the resource class should go away. i know the user request was to magically have the retries + retry_delay resource properties wired up to the Chef::HTTP object but that's the wrong abstraction and the wrong API. those properties have a different meaning and the control of the HTTP settings needs to be done via a different API.", + "pr_file_module": null + }, + { + "comment_id": "688369331", + "repo_full_name": "chef/chef", + "pr_number": 11905, + "pr_file": "lib/chef/resource.rb", + "discussion_id": "687990671", + "commented_code": "@@ -1627,6 +1635,15 @@ def lookup_provider_constant(name, action = :nothing)\n def suppress_up_to_date_messages?\n false\n end\n+\n+ private\n+\n+ def http_request_errors\n+ source.map do |msg|\n+ uri = URI.parse(msg)\n+ \"Error connecting to #{msg} - Failed to open TCP connection to #{uri.host}:#{uri.port}\"\n+ end\n+ end", + "comment_created_at": "2021-08-13T09:16:41+00:00", + "comment_author": "antima-gupta", + "comment_body": "Moved this code to remote_file resource.", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/chef-clear-abstraction-boundaries.md b/_reviewers/chef-clear-abstraction-boundaries.md index 2b7f83d..11248d6 100644 --- a/_reviewers/chef-clear-abstraction-boundaries.md +++ b/_reviewers/chef-clear-abstraction-boundaries.md @@ -68,133 +68,3 @@ Key principles: ``` Following these principles creates more maintainable, testable, and adaptable code by ensuring that each component has a clear, focused responsibility. - - -[ - { - "discussion_id": "934839822", - "pr_number": 13105, - "pr_file": "knife/lib/chef/knife/cookbook_upload.rb", - "created_at": "2022-08-01T19:18:38+00:00", - "commented_code": "end\n end\n end\n { \"status\" => 200, \"message\" => \"Success\" } if @calling_request != \"CLI\"", - "repo_full_name": "chef/chef", - "discussion_comments": [ - { - "comment_id": "934839822", - "repo_full_name": "chef/chef", - "pr_number": 13105, - "pr_file": "knife/lib/chef/knife/cookbook_upload.rb", - "discussion_id": "934839822", - "commented_code": "@@ -163,6 +163,13 @@ def run\n end\n end\n end\n+ { \"status\" => 200, \"message\" => \"Success\" } if @calling_request != \"CLI\"", - "comment_created_at": "2022-08-01T19:18:38+00:00", - "comment_author": "marcparadise", - "comment_body": "\r\nThis isn't core to the knife cookbook upload command, so it seems very odd to have a CLI tool returning json as if it were a single HTTP request handler.\r\n\r\nWould it make more sense to have a command that wraps cookbook upload, and adds the necessary return information and error handling behavior? \r\n\r\n", - "pr_file_module": null - }, - { - "comment_id": "935854812", - "repo_full_name": "chef/chef", - "pr_number": 13105, - "pr_file": "knife/lib/chef/knife/cookbook_upload.rb", - "discussion_id": "934839822", - "commented_code": "@@ -163,6 +163,13 @@ def run\n end\n end\n end\n+ { \"status\" => 200, \"message\" => \"Success\" } if @calling_request != \"CLI\"", - "comment_created_at": "2022-08-02T17:32:10+00:00", - "comment_author": "sanjain-progress", - "comment_body": "Yeah agree, CLI tool returning JSON looks odd to me also. Thanks for pointing it out.\r\n\r\nI will make further changes accordingly.\r\n\r\n\r\n", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "719579329", - "pr_number": 11984, - "pr_file": "lib/chef/mixin/powershell_exec.rb", - "created_at": "2021-09-30T16:36:22+00:00", - "commented_code": "#\n # @param script [String] script to run\n # @param interpreter [Symbol] the interpreter type, `:powershell` or `:pwsh`\n # @param timeout [Integer, nil] timeout in seconds.\n # @return [Chef::PowerShell] output\n def powershell_exec(script, interpreter = :powershell)\n case interpreter\n def powershell_exec(script, options = {})\n timeout = options.fetch(:timeout, nil)\n case options.fetch(:interpreter, :powershell)\n when :powershell\n Chef::PowerShell.new(script)\n Chef::PowerShell.new(script, timeout: timeout)\n when :pwsh\n Chef::Pwsh.new(script)\n Chef::Pwsh.new(script, timeout: timeout)\n else\n raise ArgumentError, \"Expected interpreter of :powershell or :pwsh\"\n end\n end\n\n # The same as the #powershell_exec method except this will raise\n # Chef::PowerShell::CommandFailed if the command fails\n def powershell_exec!(script, interpreter = :powershell)", - "repo_full_name": "chef/chef", - "discussion_comments": [ - { - "comment_id": "719579329", - "repo_full_name": "chef/chef", - "pr_number": 11984, - "pr_file": "lib/chef/mixin/powershell_exec.rb", - "discussion_id": "719579329", - "commented_code": "@@ -104,22 +104,24 @@ module PowershellExec\n #\n # @param script [String] script to run\n # @param interpreter [Symbol] the interpreter type, `:powershell` or `:pwsh`\n+ # @param timeout [Integer, nil] timeout in seconds.\n # @return [Chef::PowerShell] output\n- def powershell_exec(script, interpreter = :powershell)\n- case interpreter\n+ def powershell_exec(script, options = {})\n+ timeout = options.fetch(:timeout, nil)\n+ case options.fetch(:interpreter, :powershell)\n when :powershell\n- Chef::PowerShell.new(script)\n+ Chef::PowerShell.new(script, timeout: timeout)\n when :pwsh\n- Chef::Pwsh.new(script)\n+ Chef::Pwsh.new(script, timeout: timeout)\n else\n raise ArgumentError, \"Expected interpreter of :powershell or :pwsh\"\n end\n end\n \n # The same as the #powershell_exec method except this will raise\n # Chef::PowerShell::CommandFailed if the command fails\n- def powershell_exec!(script, interpreter = :powershell)", - "comment_created_at": "2021-09-30T16:36:22+00:00", - "comment_author": "tas50", - "comment_body": "don't we still want the ps interpreter?", - "pr_file_module": null - }, - { - "comment_id": "719592189", - "repo_full_name": "chef/chef", - "pr_number": 11984, - "pr_file": "lib/chef/mixin/powershell_exec.rb", - "discussion_id": "719579329", - "commented_code": "@@ -104,22 +104,24 @@ module PowershellExec\n #\n # @param script [String] script to run\n # @param interpreter [Symbol] the interpreter type, `:powershell` or `:pwsh`\n+ # @param timeout [Integer, nil] timeout in seconds.\n # @return [Chef::PowerShell] output\n- def powershell_exec(script, interpreter = :powershell)\n- case interpreter\n+ def powershell_exec(script, options = {})\n+ timeout = options.fetch(:timeout, nil)\n+ case options.fetch(:interpreter, :powershell)\n when :powershell\n- Chef::PowerShell.new(script)\n+ Chef::PowerShell.new(script, timeout: timeout)\n when :pwsh\n- Chef::Pwsh.new(script)\n+ Chef::Pwsh.new(script, timeout: timeout)\n else\n raise ArgumentError, \"Expected interpreter of :powershell or :pwsh\"\n end\n end\n \n # The same as the #powershell_exec method except this will raise\n # Chef::PowerShell::CommandFailed if the command fails\n- def powershell_exec!(script, interpreter = :powershell)", - "comment_created_at": "2021-09-30T16:53:33+00:00", - "comment_author": "rishichawda", - "comment_body": "yep it is available. the options here get pass directly to `powershell_exec` call instead of individually writing down all parameters and then again passing them to the method call. https://github.com/chef/chef/blob/b2d46ab518ae79c892ff14c6e78a09acae03883a/lib/chef/mixin/powershell_exec.rb#L123-L124\r\nthis looked cleaner than doing\r\n```ruby\r\ndef powershell_exec!(script, interpreter: :powershell, timeout: -1)\r\n cmd = powershell_exec(script, interpreter: interpreter, timeout: timeout)\r\n```\r\nwe only read the values in `powershell_exec` so passing It as a hash argument made it a bit cleaner https://github.com/chef/chef/blob/b2d46ab518ae79c892ff14c6e78a09acae03883a/lib/chef/mixin/powershell_exec.rb#L109-L112", - "pr_file_module": null - }, - { - "comment_id": "741380411", - "repo_full_name": "chef/chef", - "pr_number": 11984, - "pr_file": "lib/chef/mixin/powershell_exec.rb", - "discussion_id": "719579329", - "commented_code": "@@ -104,22 +104,24 @@ module PowershellExec\n #\n # @param script [String] script to run\n # @param interpreter [Symbol] the interpreter type, `:powershell` or `:pwsh`\n+ # @param timeout [Integer, nil] timeout in seconds.\n # @return [Chef::PowerShell] output\n- def powershell_exec(script, interpreter = :powershell)\n- case interpreter\n+ def powershell_exec(script, options = {})\n+ timeout = options.fetch(:timeout, nil)\n+ case options.fetch(:interpreter, :powershell)\n when :powershell\n- Chef::PowerShell.new(script)\n+ Chef::PowerShell.new(script, timeout: timeout)\n when :pwsh\n- Chef::Pwsh.new(script)\n+ Chef::Pwsh.new(script, timeout: timeout)\n else\n raise ArgumentError, \"Expected interpreter of :powershell or :pwsh\"\n end\n end\n \n # The same as the #powershell_exec method except this will raise\n # Chef::PowerShell::CommandFailed if the command fails\n- def powershell_exec!(script, interpreter = :powershell)", - "comment_created_at": "2021-11-02T18:55:34+00:00", - "comment_author": "lamont-granquist", - "comment_body": "I think the switch from `interpreter = :powershell` to `interpreter: :powershell` is likely a breaking change.\r\n\r\nI'm going to see if I can revert that change and ship it quickly since this is now blocking john's work.", - "pr_file_module": null - }, - { - "comment_id": "741384407", - "repo_full_name": "chef/chef", - "pr_number": 11984, - "pr_file": "lib/chef/mixin/powershell_exec.rb", - "discussion_id": "719579329", - "commented_code": "@@ -104,22 +104,24 @@ module PowershellExec\n #\n # @param script [String] script to run\n # @param interpreter [Symbol] the interpreter type, `:powershell` or `:pwsh`\n+ # @param timeout [Integer, nil] timeout in seconds.\n # @return [Chef::PowerShell] output\n- def powershell_exec(script, interpreter = :powershell)\n- case interpreter\n+ def powershell_exec(script, options = {})\n+ timeout = options.fetch(:timeout, nil)\n+ case options.fetch(:interpreter, :powershell)\n when :powershell\n- Chef::PowerShell.new(script)\n+ Chef::PowerShell.new(script, timeout: timeout)\n when :pwsh\n- Chef::Pwsh.new(script)\n+ Chef::Pwsh.new(script, timeout: timeout)\n else\n raise ArgumentError, \"Expected interpreter of :powershell or :pwsh\"\n end\n end\n \n # The same as the #powershell_exec method except this will raise\n # Chef::PowerShell::CommandFailed if the command fails\n- def powershell_exec!(script, interpreter = :powershell)", - "comment_created_at": "2021-11-02T19:01:11+00:00", - "comment_author": "lamont-granquist", - "comment_body": "Also the `options = {}` shouldn't be used any more in favor of proper kwargs since it behaves differently on ruby-3.0 I think. It is also a lot easier to see the API on the method signature rather than having to look through the code (and messy method signatures are good feedback on the API being messy -- although a lot of times there's just nothing we can do about that).", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "687990671", - "pr_number": 11905, - "pr_file": "lib/chef/resource.rb", - "created_at": "2021-08-12T18:34:07+00:00", - "commented_code": "def suppress_up_to_date_messages?\n false\n end\n\n private\n\n def http_request_errors\n source.map do |msg|\n uri = URI.parse(msg)\n \"Error connecting to #{msg} - Failed to open TCP connection to #{uri.host}:#{uri.port}\"\n end\n end", - "repo_full_name": "chef/chef", - "discussion_comments": [ - { - "comment_id": "687990671", - "repo_full_name": "chef/chef", - "pr_number": 11905, - "pr_file": "lib/chef/resource.rb", - "discussion_id": "687990671", - "commented_code": "@@ -1627,6 +1635,15 @@ def lookup_provider_constant(name, action = :nothing)\n def suppress_up_to_date_messages?\n false\n end\n+\n+ private\n+\n+ def http_request_errors\n+ source.map do |msg|\n+ uri = URI.parse(msg)\n+ \"Error connecting to #{msg} - Failed to open TCP connection to #{uri.host}:#{uri.port}\"\n+ end\n+ end", - "comment_created_at": "2021-08-12T18:34:07+00:00", - "comment_author": "lamont-granquist", - "comment_body": "all the code here in the resource class should go away. i know the user request was to magically have the retries + retry_delay resource properties wired up to the Chef::HTTP object but that's the wrong abstraction and the wrong API. those properties have a different meaning and the control of the HTTP settings needs to be done via a different API.", - "pr_file_module": null - }, - { - "comment_id": "688369331", - "repo_full_name": "chef/chef", - "pr_number": 11905, - "pr_file": "lib/chef/resource.rb", - "discussion_id": "687990671", - "commented_code": "@@ -1627,6 +1635,15 @@ def lookup_provider_constant(name, action = :nothing)\n def suppress_up_to_date_messages?\n false\n end\n+\n+ private\n+\n+ def http_request_errors\n+ source.map do |msg|\n+ uri = URI.parse(msg)\n+ \"Error connecting to #{msg} - Failed to open TCP connection to #{uri.host}:#{uri.port}\"\n+ end\n+ end", - "comment_created_at": "2021-08-13T09:16:41+00:00", - "comment_author": "antima-gupta", - "comment_body": "Moved this code to remote_file resource.", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/chef-consistent-descriptive-naming-patterns.json b/_reviewers/chef-consistent-descriptive-naming-patterns.json new file mode 100644 index 0000000..c217a96 --- /dev/null +++ b/_reviewers/chef-consistent-descriptive-naming-patterns.json @@ -0,0 +1,216 @@ +[ + { + "discussion_id": "1733480952", + "pr_number": 14352, + "pr_file": "chef-config/lib/chef-config/path_helper.rb", + "created_at": "2024-08-27T20:34:11+00:00", + "commented_code": "end\n\n def self.relative_path_from(from, to, windows: ChefUtils.windows?)\n Pathname.new(cleanpath(to, windows: windows)).relative_path_from(Pathname.new(cleanpath(from, windows: windows)))\n if windows\n Pathname.new(cleanpath(to, windows: windows)).relative_path_from(Pathname.new(cleanpath(from, windows: windows)))\n else\n # On non-Windows we can halve the number of cleanpath calls by doing a\n # single gsub! call here\n\n path = Pathname.new(to).relative_path_from(Pathname.new(from)).to_s\n # ensure all backslashes are forward slashes\n path.gsub!(BACKSLASH, File::SEPARATOR)", + "repo_full_name": "chef/chef", + "discussion_comments": [ + { + "comment_id": "1733480952", + "repo_full_name": "chef/chef", + "pr_number": 14352, + "pr_file": "chef-config/lib/chef-config/path_helper.rb", + "discussion_id": "1733480952", + "commented_code": "@@ -200,7 +200,17 @@ def self.escape_glob_dir(*parts)\n end\n \n def self.relative_path_from(from, to, windows: ChefUtils.windows?)\n- Pathname.new(cleanpath(to, windows: windows)).relative_path_from(Pathname.new(cleanpath(from, windows: windows)))\n+ if windows\n+ Pathname.new(cleanpath(to, windows: windows)).relative_path_from(Pathname.new(cleanpath(from, windows: windows)))\n+ else\n+ # On non-Windows we can halve the number of cleanpath calls by doing a\n+ # single gsub! call here\n+\n+ path = Pathname.new(to).relative_path_from(Pathname.new(from)).to_s\n+ # ensure all backslashes are forward slashes\n+ path.gsub!(BACKSLASH, File::SEPARATOR)", + "comment_created_at": "2024-08-27T20:34:11+00:00", + "comment_author": "jaymzh", + "comment_body": "this _is_ cleanpath, so just call cleanpath.\r\n\r\nAnd you can add the ! there if you want.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "492231182", + "pr_number": 10122, + "pr_file": "lib/chef/resource/windows_share.rb", + "created_at": "2020-09-21T17:33:02+00:00", + "commented_code": "# bool ? 1 : 0\n bool ? \"$true\" : \"$false\"\n end\n\n # Grant-SmbShareAccess sets only one permission to one user so we need to remove from the lower permission access\n # For change_users remove common full_users from it\n # For read_users remove common full_users as well as change_users\n def new_resource_users\n users = {}\n users[\"full_users\"] = new_resource.full_users\n\n users[\"change_users\"] = new_resource.change_users - new_resource.full_users\n users[\"read_users\"] = new_resource.read_users - (new_resource.full_users + new_resource.change_users)\n users\n end\n\n # Compare the full_users, change_users and read_users from current_resource and new_resource\n # @returns boolean True/False\n def compare_users", + "repo_full_name": "chef/chef", + "discussion_comments": [ + { + "comment_id": "492231182", + "repo_full_name": "chef/chef", + "pr_number": 10122, + "pr_file": "lib/chef/resource/windows_share.rb", + "discussion_id": "492231182", + "commented_code": "@@ -338,8 +335,38 @@ def bool_string(bool)\n # bool ? 1 : 0\n bool ? \"$true\" : \"$false\"\n end\n+\n+ # Grant-SmbShareAccess sets only one permission to one user so we need to remove from the lower permission access\n+ # For change_users remove common full_users from it\n+ # For read_users remove common full_users as well as change_users\n+ def new_resource_users\n+ users = {}\n+ users[\"full_users\"] = new_resource.full_users\n+\n+ users[\"change_users\"] = new_resource.change_users - new_resource.full_users\n+ users[\"read_users\"] = new_resource.read_users - (new_resource.full_users + new_resource.change_users)\n+ users\n+ end\n+\n+ # Compare the full_users, change_users and read_users from current_resource and new_resource\n+ # @returns boolean True/False\n+ def compare_users", + "comment_created_at": "2020-09-21T17:33:02+00:00", + "comment_author": "lamont-granquist", + "comment_body": "This should be named something like `def users_changed?` and the sense of it should be inverted (this makes it read nicer without the NOT when it gets used above)", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "877456722", + "pr_number": 12859, + "pr_file": "lib/chef/resource/windows_certificate.rb", + "created_at": "2022-05-19T19:22:21+00:00", + "commented_code": "end\n\n action :delete, description: \"Deletes a certificate.\" do\n cert_obj = fetch_cert\n cert_obj = verify_cert\n\n if cert_obj\n if cert_obj == true || cert_obj == \"Certificate Has Expired\"", + "repo_full_name": "chef/chef", + "discussion_comments": [ + { + "comment_id": "877456722", + "repo_full_name": "chef/chef", + "pr_number": 12859, + "pr_file": "lib/chef/resource/windows_certificate.rb", + "discussion_id": "877456722", + "commented_code": "@@ -128,14 +128,14 @@ class WindowsCertificate < Chef::Resource\n end\n \n action :delete, description: \"Deletes a certificate.\" do\n- cert_obj = fetch_cert\n+ cert_obj = verify_cert\n \n- if cert_obj\n+ if cert_obj == true || cert_obj == \"Certificate Has Expired\"", + "comment_created_at": "2022-05-19T19:22:21+00:00", + "comment_author": "marcparadise", + "comment_body": "`verify_cert` only returns a `true` or `false`, so this condition can never be met. \r\n\r\nGiven that, can we change `cert_obj` name to something like `cert_is_valid`? ", + "pr_file_module": null + }, + { + "comment_id": "878788518", + "repo_full_name": "chef/chef", + "pr_number": 12859, + "pr_file": "lib/chef/resource/windows_certificate.rb", + "discussion_id": "877456722", + "commented_code": "@@ -128,14 +128,14 @@ class WindowsCertificate < Chef::Resource\n end\n \n action :delete, description: \"Deletes a certificate.\" do\n- cert_obj = fetch_cert\n+ cert_obj = verify_cert\n \n- if cert_obj\n+ if cert_obj == true || cert_obj == \"Certificate Has Expired\"", + "comment_created_at": "2022-05-22T03:45:50+00:00", + "comment_author": "johnmccrae", + "comment_body": "ooh, good catch. Thanks. Done and done", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "540443869", + "pr_number": 10621, + "pr_file": "lib/chef/resource/homebrew_install.rb", + "created_at": "2020-12-10T19:37:23+00:00", + "commented_code": "#\n# Copyright:: Copyright (c) Chef Software Inc.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n# http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n#\n\nrequire_relative \"../resource\"\n\nclass Chef\n class Resource\n class HomebrewInstall < Chef::Resource\n unified_mode true\n\n provides :homebrew_install\n\n description \"Use the **homebrew_install** resource to install the Homebrew package manager on macOS systems.\"\n introduced \"16.8\"\n examples <<~DOC\n **Install Homebrew using the Internet to download Command Line Tools for Xcode**:\n ```ruby\n homebrew_install 'Install Homebrew and xcode command line tools if necessary' do\n user 'someuser'\n action :install\n end\n ```\n **Install Homebrew using a customer-managed source to download Command Line Tools for Xcode from**:\n ```ruby\n homebrew_install 'Install Homebrew and xcode command line tools if necessary' do\n xcode_tools_url 'https://somewhere.something.com/downloads/command_line_tools.dmg'\n xcode_tools_pkg_name 'Command Line Tools.pkg'\n user 'someuser'\n action :install\n end\n ```\n DOC\n\n property :xcode_tools_url, String,\n description: \"A url pointing to a 'Command Line Tools for Xcode' dmg file\"\n\n property :xcode_tools_pkg_name, String,\n description: \"The name of the pkg inside the dmg located at the xcode_tools_url\"\n\n property :brew_source_url, String,\n description: \"A url pointing to a Homebrew installer\",\n default: \"https://codeload.github.com/Homebrew/brew/zip/master\"\n\n property :user, String,\n description: \"The user to install Homebrew as. Note: Homebrew cannot be installed as root.\",\n required: true\n\n action :install do\n # Avoid all the work in the below resources if homebrew is already installed\n return if ::File.exist?(\"/usr/local/bin/brew\")\n\n # check if 'user' is root and raise an exception if so\n if Etc.getpwnam(new_resource.user).uid == 0\n msg = \"You are attempting to install Homebrew as Root. This is not permitted by Homebrew. Please run this as a standard user with admin rights\"\n raise Chef::Exceptions::InsufficientPermissions, msg\n end\n\n USER_HOME = Dir.home(new_resource.user).freeze", + "repo_full_name": "chef/chef", + "discussion_comments": [ + { + "comment_id": "540443869", + "repo_full_name": "chef/chef", + "pr_number": 10621, + "pr_file": "lib/chef/resource/homebrew_install.rb", + "discussion_id": "540443869", + "commented_code": "@@ -0,0 +1,148 @@\n+#\n+# Copyright:: Copyright (c) Chef Software Inc.\n+#\n+# Licensed under the Apache License, Version 2.0 (the \"License\");\n+# you may not use this file except in compliance with the License.\n+# You may obtain a copy of the License at\n+#\n+# http://www.apache.org/licenses/LICENSE-2.0\n+#\n+# Unless required by applicable law or agreed to in writing, software\n+# distributed under the License is distributed on an \"AS IS\" BASIS,\n+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n+# See the License for the specific language governing permissions and\n+# limitations under the License.\n+#\n+\n+require_relative \"../resource\"\n+\n+class Chef\n+ class Resource\n+ class HomebrewInstall < Chef::Resource\n+ unified_mode true\n+\n+ provides :homebrew_install\n+\n+ description \"Use the **homebrew_install** resource to install the Homebrew package manager on macOS systems.\"\n+ introduced \"16.8\"\n+ examples <<~DOC\n+ **Install Homebrew using the Internet to download Command Line Tools for Xcode**:\n+ ```ruby\n+ homebrew_install 'Install Homebrew and xcode command line tools if necessary' do\n+ user 'someuser'\n+ action :install\n+ end\n+ ```\n+ **Install Homebrew using a customer-managed source to download Command Line Tools for Xcode from**:\n+ ```ruby\n+ homebrew_install 'Install Homebrew and xcode command line tools if necessary' do\n+ xcode_tools_url 'https://somewhere.something.com/downloads/command_line_tools.dmg'\n+ xcode_tools_pkg_name 'Command Line Tools.pkg'\n+ user 'someuser'\n+ action :install\n+ end\n+ ```\n+ DOC\n+\n+ property :xcode_tools_url, String,\n+ description: \"A url pointing to a 'Command Line Tools for Xcode' dmg file\"\n+\n+ property :xcode_tools_pkg_name, String,\n+ description: \"The name of the pkg inside the dmg located at the xcode_tools_url\"\n+\n+ property :brew_source_url, String,\n+ description: \"A url pointing to a Homebrew installer\",\n+ default: \"https://codeload.github.com/Homebrew/brew/zip/master\"\n+\n+ property :user, String,\n+ description: \"The user to install Homebrew as. Note: Homebrew cannot be installed as root.\",\n+ required: true\n+\n+ action :install do\n+ # Avoid all the work in the below resources if homebrew is already installed\n+ return if ::File.exist?(\"/usr/local/bin/brew\")\n+\n+ # check if 'user' is root and raise an exception if so\n+ if Etc.getpwnam(new_resource.user).uid == 0\n+ msg = \"You are attempting to install Homebrew as Root. This is not permitted by Homebrew. Please run this as a standard user with admin rights\"\n+ raise Chef::Exceptions::InsufficientPermissions, msg\n+ end\n+\n+ USER_HOME = Dir.home(new_resource.user).freeze", + "comment_created_at": "2020-12-10T19:37:23+00:00", + "comment_author": "tas50", + "comment_body": "we should make these lowercase since they're not really constants and that gets confusing", + "pr_file_module": null + }, + { + "comment_id": "540525347", + "repo_full_name": "chef/chef", + "pr_number": 10621, + "pr_file": "lib/chef/resource/homebrew_install.rb", + "discussion_id": "540443869", + "commented_code": "@@ -0,0 +1,148 @@\n+#\n+# Copyright:: Copyright (c) Chef Software Inc.\n+#\n+# Licensed under the Apache License, Version 2.0 (the \"License\");\n+# you may not use this file except in compliance with the License.\n+# You may obtain a copy of the License at\n+#\n+# http://www.apache.org/licenses/LICENSE-2.0\n+#\n+# Unless required by applicable law or agreed to in writing, software\n+# distributed under the License is distributed on an \"AS IS\" BASIS,\n+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n+# See the License for the specific language governing permissions and\n+# limitations under the License.\n+#\n+\n+require_relative \"../resource\"\n+\n+class Chef\n+ class Resource\n+ class HomebrewInstall < Chef::Resource\n+ unified_mode true\n+\n+ provides :homebrew_install\n+\n+ description \"Use the **homebrew_install** resource to install the Homebrew package manager on macOS systems.\"\n+ introduced \"16.8\"\n+ examples <<~DOC\n+ **Install Homebrew using the Internet to download Command Line Tools for Xcode**:\n+ ```ruby\n+ homebrew_install 'Install Homebrew and xcode command line tools if necessary' do\n+ user 'someuser'\n+ action :install\n+ end\n+ ```\n+ **Install Homebrew using a customer-managed source to download Command Line Tools for Xcode from**:\n+ ```ruby\n+ homebrew_install 'Install Homebrew and xcode command line tools if necessary' do\n+ xcode_tools_url 'https://somewhere.something.com/downloads/command_line_tools.dmg'\n+ xcode_tools_pkg_name 'Command Line Tools.pkg'\n+ user 'someuser'\n+ action :install\n+ end\n+ ```\n+ DOC\n+\n+ property :xcode_tools_url, String,\n+ description: \"A url pointing to a 'Command Line Tools for Xcode' dmg file\"\n+\n+ property :xcode_tools_pkg_name, String,\n+ description: \"The name of the pkg inside the dmg located at the xcode_tools_url\"\n+\n+ property :brew_source_url, String,\n+ description: \"A url pointing to a Homebrew installer\",\n+ default: \"https://codeload.github.com/Homebrew/brew/zip/master\"\n+\n+ property :user, String,\n+ description: \"The user to install Homebrew as. Note: Homebrew cannot be installed as root.\",\n+ required: true\n+\n+ action :install do\n+ # Avoid all the work in the below resources if homebrew is already installed\n+ return if ::File.exist?(\"/usr/local/bin/brew\")\n+\n+ # check if 'user' is root and raise an exception if so\n+ if Etc.getpwnam(new_resource.user).uid == 0\n+ msg = \"You are attempting to install Homebrew as Root. This is not permitted by Homebrew. Please run this as a standard user with admin rights\"\n+ raise Chef::Exceptions::InsufficientPermissions, msg\n+ end\n+\n+ USER_HOME = Dir.home(new_resource.user).freeze", + "comment_created_at": "2020-12-10T21:55:45+00:00", + "comment_author": "johnmccrae", + "comment_body": "done", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "706317185", + "pr_number": 11834, + "pr_file": "lib/chef/http/authenticator.rb", + "created_at": "2021-09-10T16:30:00+00:00", + "commented_code": "@auth_credentials.client_name\n end\n\n def detect_certificate_key(client_name)\n self.class.detect_certificate_key(client_name)\n end\n\n def check_certstore_for_key(client_name)\n self.class.check_certstore_for_key(client_name)\n end\n\n def retrieve_certificate_key(client_name)\n self.class.retrieve_certificate_key(client_name)\n end\n\n # Detects if a private key exists in a certificate repository like Keychain (macOS) or Certificate Store (Windows)\n #\n # @param client_name - we're using the node name to store and retrieve any keys\n # Returns true if a key is found, false if not. False will trigger a registration event which will lead to a certificate based key being created\n #\n #\n def self.detect_certificate_key(client_name)", + "repo_full_name": "chef/chef", + "discussion_comments": [ + { + "comment_id": "706317185", + "repo_full_name": "chef/chef", + "pr_number": 11834, + "pr_file": "lib/chef/http/authenticator.rb", + "discussion_id": "706317185", + "commented_code": "@@ -83,8 +85,54 @@ def client_name\n @auth_credentials.client_name\n end\n \n+ def detect_certificate_key(client_name)\n+ self.class.detect_certificate_key(client_name)\n+ end\n+\n+ def check_certstore_for_key(client_name)\n+ self.class.check_certstore_for_key(client_name)\n+ end\n+\n+ def retrieve_certificate_key(client_name)\n+ self.class.retrieve_certificate_key(client_name)\n+ end\n+\n+ # Detects if a private key exists in a certificate repository like Keychain (macOS) or Certificate Store (Windows)\n+ #\n+ # @param client_name - we're using the node name to store and retrieve any keys\n+ # Returns true if a key is found, false if not. False will trigger a registration event which will lead to a certificate based key being created\n+ #\n+ #\n+ def self.detect_certificate_key(client_name)", + "comment_created_at": "2021-09-10T16:30:00+00:00", + "comment_author": "marcparadise", + "comment_body": "Could we use `certificate_key_exist?` instead of `detect_certificate_key`? ", + "pr_file_module": null + }, + { + "comment_id": "717020015", + "repo_full_name": "chef/chef", + "pr_number": 11834, + "pr_file": "lib/chef/http/authenticator.rb", + "discussion_id": "706317185", + "commented_code": "@@ -83,8 +85,54 @@ def client_name\n @auth_credentials.client_name\n end\n \n+ def detect_certificate_key(client_name)\n+ self.class.detect_certificate_key(client_name)\n+ end\n+\n+ def check_certstore_for_key(client_name)\n+ self.class.check_certstore_for_key(client_name)\n+ end\n+\n+ def retrieve_certificate_key(client_name)\n+ self.class.retrieve_certificate_key(client_name)\n+ end\n+\n+ # Detects if a private key exists in a certificate repository like Keychain (macOS) or Certificate Store (Windows)\n+ #\n+ # @param client_name - we're using the node name to store and retrieve any keys\n+ # Returns true if a key is found, false if not. False will trigger a registration event which will lead to a certificate based key being created\n+ #\n+ #\n+ def self.detect_certificate_key(client_name)", + "comment_created_at": "2021-09-27T20:26:24+00:00", + "comment_author": "johnmccrae", + "comment_body": "done", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "726525272", + "pr_number": 12159, + "pr_file": "lib/chef/resource/chocolatey_source.rb", + "created_at": "2021-10-11T19:38:40+00:00", + "commented_code": "property :disabled, [TrueClass, FalseClass], default: false, desired_state: false, skip_docs: true\n\n property :username, String,\n description: \"The username to use when authenticating against the source\",\n introduced: \"17.8\"\n\n property :password, String, sensitive: true, desired_state: false,\n description: \"The password to use when authenticating against the source\",\n introduced: \"17.8\"\n\n property :cert, String,\n description: \"The certificate to use when authenticating against the source\",\n introduced: \"17.8\"\n\n property :certpassword, String, sensitive: true, desired_state: false,", + "repo_full_name": "chef/chef", + "discussion_comments": [ + { + "comment_id": "726525272", + "repo_full_name": "chef/chef", + "pr_number": 12159, + "pr_file": "lib/chef/resource/chocolatey_source.rb", + "discussion_id": "726525272", + "commented_code": "@@ -63,6 +63,22 @@ class ChocolateySource < Chef::Resource\n \n property :disabled, [TrueClass, FalseClass], default: false, desired_state: false, skip_docs: true\n \n+ property :username, String,\n+ description: \"The username to use when authenticating against the source\",\n+ introduced: \"17.8\"\n+\n+ property :password, String, sensitive: true, desired_state: false,\n+ description: \"The password to use when authenticating against the source\",\n+ introduced: \"17.8\"\n+\n+ property :cert, String,\n+ description: \"The certificate to use when authenticating against the source\",\n+ introduced: \"17.8\"\n+\n+ property :certpassword, String, sensitive: true, desired_state: false,", + "comment_created_at": "2021-10-11T19:38:40+00:00", + "comment_author": "tas50", + "comment_body": "This should probably be `cert_password` for readability ", + "pr_file_module": null + }, + { + "comment_id": "726555644", + "repo_full_name": "chef/chef", + "pr_number": 12159, + "pr_file": "lib/chef/resource/chocolatey_source.rb", + "discussion_id": "726525272", + "commented_code": "@@ -63,6 +63,22 @@ class ChocolateySource < Chef::Resource\n \n property :disabled, [TrueClass, FalseClass], default: false, desired_state: false, skip_docs: true\n \n+ property :username, String,\n+ description: \"The username to use when authenticating against the source\",\n+ introduced: \"17.8\"\n+\n+ property :password, String, sensitive: true, desired_state: false,\n+ description: \"The password to use when authenticating against the source\",\n+ introduced: \"17.8\"\n+\n+ property :cert, String,\n+ description: \"The certificate to use when authenticating against the source\",\n+ introduced: \"17.8\"\n+\n+ property :certpassword, String, sensitive: true, desired_state: false,", + "comment_created_at": "2021-10-11T20:41:06+00:00", + "comment_author": "gep13", + "comment_body": "I will get that updated :+1:", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "716147106", + "pr_number": 12029, + "pr_file": "lib/chef/resource/powershell_package_source.rb", + "created_at": "2021-09-26T05:58:36+00:00", + "commented_code": "provides :powershell_package_source\n\n description \"Use the **powershell_package_source** resource to register a PowerShell package repository.\"\n description \"Use the **powershell_package_source** resource to register a PowerShell Repository or other Package Source type with. There are 2 distinct objects we care about here. The first is a Package Source like a PowerShell Repository or a Nuget Source. The second object is a provider that PowerShell uses to get to that source with, like PowerShellGet, Nuget, Chocolatey, etc. \"\n introduced \"14.3\"\n examples <<~DOC\n **Add a new PSRepository that is not trusted and which requires credentials to connect to**:\n\n ```ruby\n powershell_package_source 'PowerShellModules' do\n source_name \"PowerShellModules\"\n source_location \"https://pkgs.dev.azure.com/some-org/some-project/_packaging/some_feed/nuget/v2\"\n publish_location \"https://pkgs.dev.azure.com/some-org/some-project/_packaging/some_feed/nuget/v2\"\n trusted false\n user_name \"someuser@somelocation.io\"", + "repo_full_name": "chef/chef", + "discussion_comments": [ + { + "comment_id": "716147106", + "repo_full_name": "chef/chef", + "pr_number": 12029, + "pr_file": "lib/chef/resource/powershell_package_source.rb", + "discussion_id": "716147106", + "commented_code": "@@ -24,36 +25,131 @@ class PowershellPackageSource < Chef::Resource\n \n provides :powershell_package_source\n \n- description \"Use the **powershell_package_source** resource to register a PowerShell package repository.\"\n+ description \"Use the **powershell_package_source** resource to register a PowerShell Repository or other Package Source type with. There are 2 distinct objects we care about here. The first is a Package Source like a PowerShell Repository or a Nuget Source. The second object is a provider that PowerShell uses to get to that source with, like PowerShellGet, Nuget, Chocolatey, etc. \"\n introduced \"14.3\"\n+ examples <<~DOC\n+ **Add a new PSRepository that is not trusted and which requires credentials to connect to**:\n+\n+ ```ruby\n+ powershell_package_source 'PowerShellModules' do\n+ source_name \"PowerShellModules\"\n+ source_location \"https://pkgs.dev.azure.com/some-org/some-project/_packaging/some_feed/nuget/v2\"\n+ publish_location \"https://pkgs.dev.azure.com/some-org/some-project/_packaging/some_feed/nuget/v2\"\n+ trusted false\n+ user_name \"someuser@somelocation.io\"", + "comment_created_at": "2021-09-26T05:58:36+00:00", + "comment_author": "tas50", + "comment_body": "In our other resources we use 'user' and 'password'. We should be consistent here.", + "pr_file_module": null + }, + { + "comment_id": "719671548", + "repo_full_name": "chef/chef", + "pr_number": 12029, + "pr_file": "lib/chef/resource/powershell_package_source.rb", + "discussion_id": "716147106", + "commented_code": "@@ -24,36 +25,131 @@ class PowershellPackageSource < Chef::Resource\n \n provides :powershell_package_source\n \n- description \"Use the **powershell_package_source** resource to register a PowerShell package repository.\"\n+ description \"Use the **powershell_package_source** resource to register a PowerShell Repository or other Package Source type with. There are 2 distinct objects we care about here. The first is a Package Source like a PowerShell Repository or a Nuget Source. The second object is a provider that PowerShell uses to get to that source with, like PowerShellGet, Nuget, Chocolatey, etc. \"\n introduced \"14.3\"\n+ examples <<~DOC\n+ **Add a new PSRepository that is not trusted and which requires credentials to connect to**:\n+\n+ ```ruby\n+ powershell_package_source 'PowerShellModules' do\n+ source_name \"PowerShellModules\"\n+ source_location \"https://pkgs.dev.azure.com/some-org/some-project/_packaging/some_feed/nuget/v2\"\n+ publish_location \"https://pkgs.dev.azure.com/some-org/some-project/_packaging/some_feed/nuget/v2\"\n+ trusted false\n+ user_name \"someuser@somelocation.io\"", + "comment_created_at": "2021-09-30T18:47:31+00:00", + "comment_author": "johnmccrae", + "comment_body": "corrected", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/chef-consistent-descriptive-naming-patterns.md b/_reviewers/chef-consistent-descriptive-naming-patterns.md index 1e6a729..c97b39d 100644 --- a/_reviewers/chef-consistent-descriptive-naming-patterns.md +++ b/_reviewers/chef-consistent-descriptive-naming-patterns.md @@ -37,221 +37,3 @@ property :password # preferred over pass ``` This pattern improves code readability, maintains consistency, and reduces cognitive load when working across different parts of the codebase. - - -[ - { - "discussion_id": "1733480952", - "pr_number": 14352, - "pr_file": "chef-config/lib/chef-config/path_helper.rb", - "created_at": "2024-08-27T20:34:11+00:00", - "commented_code": "end\n\n def self.relative_path_from(from, to, windows: ChefUtils.windows?)\n Pathname.new(cleanpath(to, windows: windows)).relative_path_from(Pathname.new(cleanpath(from, windows: windows)))\n if windows\n Pathname.new(cleanpath(to, windows: windows)).relative_path_from(Pathname.new(cleanpath(from, windows: windows)))\n else\n # On non-Windows we can halve the number of cleanpath calls by doing a\n # single gsub! call here\n\n path = Pathname.new(to).relative_path_from(Pathname.new(from)).to_s\n # ensure all backslashes are forward slashes\n path.gsub!(BACKSLASH, File::SEPARATOR)", - "repo_full_name": "chef/chef", - "discussion_comments": [ - { - "comment_id": "1733480952", - "repo_full_name": "chef/chef", - "pr_number": 14352, - "pr_file": "chef-config/lib/chef-config/path_helper.rb", - "discussion_id": "1733480952", - "commented_code": "@@ -200,7 +200,17 @@ def self.escape_glob_dir(*parts)\n end\n \n def self.relative_path_from(from, to, windows: ChefUtils.windows?)\n- Pathname.new(cleanpath(to, windows: windows)).relative_path_from(Pathname.new(cleanpath(from, windows: windows)))\n+ if windows\n+ Pathname.new(cleanpath(to, windows: windows)).relative_path_from(Pathname.new(cleanpath(from, windows: windows)))\n+ else\n+ # On non-Windows we can halve the number of cleanpath calls by doing a\n+ # single gsub! call here\n+\n+ path = Pathname.new(to).relative_path_from(Pathname.new(from)).to_s\n+ # ensure all backslashes are forward slashes\n+ path.gsub!(BACKSLASH, File::SEPARATOR)", - "comment_created_at": "2024-08-27T20:34:11+00:00", - "comment_author": "jaymzh", - "comment_body": "this _is_ cleanpath, so just call cleanpath.\r\n\r\nAnd you can add the ! there if you want.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "492231182", - "pr_number": 10122, - "pr_file": "lib/chef/resource/windows_share.rb", - "created_at": "2020-09-21T17:33:02+00:00", - "commented_code": "# bool ? 1 : 0\n bool ? \"$true\" : \"$false\"\n end\n\n # Grant-SmbShareAccess sets only one permission to one user so we need to remove from the lower permission access\n # For change_users remove common full_users from it\n # For read_users remove common full_users as well as change_users\n def new_resource_users\n users = {}\n users[\"full_users\"] = new_resource.full_users\n\n users[\"change_users\"] = new_resource.change_users - new_resource.full_users\n users[\"read_users\"] = new_resource.read_users - (new_resource.full_users + new_resource.change_users)\n users\n end\n\n # Compare the full_users, change_users and read_users from current_resource and new_resource\n # @returns boolean True/False\n def compare_users", - "repo_full_name": "chef/chef", - "discussion_comments": [ - { - "comment_id": "492231182", - "repo_full_name": "chef/chef", - "pr_number": 10122, - "pr_file": "lib/chef/resource/windows_share.rb", - "discussion_id": "492231182", - "commented_code": "@@ -338,8 +335,38 @@ def bool_string(bool)\n # bool ? 1 : 0\n bool ? \"$true\" : \"$false\"\n end\n+\n+ # Grant-SmbShareAccess sets only one permission to one user so we need to remove from the lower permission access\n+ # For change_users remove common full_users from it\n+ # For read_users remove common full_users as well as change_users\n+ def new_resource_users\n+ users = {}\n+ users[\"full_users\"] = new_resource.full_users\n+\n+ users[\"change_users\"] = new_resource.change_users - new_resource.full_users\n+ users[\"read_users\"] = new_resource.read_users - (new_resource.full_users + new_resource.change_users)\n+ users\n+ end\n+\n+ # Compare the full_users, change_users and read_users from current_resource and new_resource\n+ # @returns boolean True/False\n+ def compare_users", - "comment_created_at": "2020-09-21T17:33:02+00:00", - "comment_author": "lamont-granquist", - "comment_body": "This should be named something like `def users_changed?` and the sense of it should be inverted (this makes it read nicer without the NOT when it gets used above)", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "877456722", - "pr_number": 12859, - "pr_file": "lib/chef/resource/windows_certificate.rb", - "created_at": "2022-05-19T19:22:21+00:00", - "commented_code": "end\n\n action :delete, description: \"Deletes a certificate.\" do\n cert_obj = fetch_cert\n cert_obj = verify_cert\n\n if cert_obj\n if cert_obj == true || cert_obj == \"Certificate Has Expired\"", - "repo_full_name": "chef/chef", - "discussion_comments": [ - { - "comment_id": "877456722", - "repo_full_name": "chef/chef", - "pr_number": 12859, - "pr_file": "lib/chef/resource/windows_certificate.rb", - "discussion_id": "877456722", - "commented_code": "@@ -128,14 +128,14 @@ class WindowsCertificate < Chef::Resource\n end\n \n action :delete, description: \"Deletes a certificate.\" do\n- cert_obj = fetch_cert\n+ cert_obj = verify_cert\n \n- if cert_obj\n+ if cert_obj == true || cert_obj == \"Certificate Has Expired\"", - "comment_created_at": "2022-05-19T19:22:21+00:00", - "comment_author": "marcparadise", - "comment_body": "`verify_cert` only returns a `true` or `false`, so this condition can never be met. \r\n\r\nGiven that, can we change `cert_obj` name to something like `cert_is_valid`? ", - "pr_file_module": null - }, - { - "comment_id": "878788518", - "repo_full_name": "chef/chef", - "pr_number": 12859, - "pr_file": "lib/chef/resource/windows_certificate.rb", - "discussion_id": "877456722", - "commented_code": "@@ -128,14 +128,14 @@ class WindowsCertificate < Chef::Resource\n end\n \n action :delete, description: \"Deletes a certificate.\" do\n- cert_obj = fetch_cert\n+ cert_obj = verify_cert\n \n- if cert_obj\n+ if cert_obj == true || cert_obj == \"Certificate Has Expired\"", - "comment_created_at": "2022-05-22T03:45:50+00:00", - "comment_author": "johnmccrae", - "comment_body": "ooh, good catch. Thanks. Done and done", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "540443869", - "pr_number": 10621, - "pr_file": "lib/chef/resource/homebrew_install.rb", - "created_at": "2020-12-10T19:37:23+00:00", - "commented_code": "#\n# Copyright:: Copyright (c) Chef Software Inc.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n# http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n#\n\nrequire_relative \"../resource\"\n\nclass Chef\n class Resource\n class HomebrewInstall < Chef::Resource\n unified_mode true\n\n provides :homebrew_install\n\n description \"Use the **homebrew_install** resource to install the Homebrew package manager on macOS systems.\"\n introduced \"16.8\"\n examples <<~DOC\n **Install Homebrew using the Internet to download Command Line Tools for Xcode**:\n ```ruby\n homebrew_install 'Install Homebrew and xcode command line tools if necessary' do\n user 'someuser'\n action :install\n end\n ```\n **Install Homebrew using a customer-managed source to download Command Line Tools for Xcode from**:\n ```ruby\n homebrew_install 'Install Homebrew and xcode command line tools if necessary' do\n xcode_tools_url 'https://somewhere.something.com/downloads/command_line_tools.dmg'\n xcode_tools_pkg_name 'Command Line Tools.pkg'\n user 'someuser'\n action :install\n end\n ```\n DOC\n\n property :xcode_tools_url, String,\n description: \"A url pointing to a 'Command Line Tools for Xcode' dmg file\"\n\n property :xcode_tools_pkg_name, String,\n description: \"The name of the pkg inside the dmg located at the xcode_tools_url\"\n\n property :brew_source_url, String,\n description: \"A url pointing to a Homebrew installer\",\n default: \"https://codeload.github.com/Homebrew/brew/zip/master\"\n\n property :user, String,\n description: \"The user to install Homebrew as. Note: Homebrew cannot be installed as root.\",\n required: true\n\n action :install do\n # Avoid all the work in the below resources if homebrew is already installed\n return if ::File.exist?(\"/usr/local/bin/brew\")\n\n # check if 'user' is root and raise an exception if so\n if Etc.getpwnam(new_resource.user).uid == 0\n msg = \"You are attempting to install Homebrew as Root. This is not permitted by Homebrew. Please run this as a standard user with admin rights\"\n raise Chef::Exceptions::InsufficientPermissions, msg\n end\n\n USER_HOME = Dir.home(new_resource.user).freeze", - "repo_full_name": "chef/chef", - "discussion_comments": [ - { - "comment_id": "540443869", - "repo_full_name": "chef/chef", - "pr_number": 10621, - "pr_file": "lib/chef/resource/homebrew_install.rb", - "discussion_id": "540443869", - "commented_code": "@@ -0,0 +1,148 @@\n+#\n+# Copyright:: Copyright (c) Chef Software Inc.\n+#\n+# Licensed under the Apache License, Version 2.0 (the \"License\");\n+# you may not use this file except in compliance with the License.\n+# You may obtain a copy of the License at\n+#\n+# http://www.apache.org/licenses/LICENSE-2.0\n+#\n+# Unless required by applicable law or agreed to in writing, software\n+# distributed under the License is distributed on an \"AS IS\" BASIS,\n+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n+# See the License for the specific language governing permissions and\n+# limitations under the License.\n+#\n+\n+require_relative \"../resource\"\n+\n+class Chef\n+ class Resource\n+ class HomebrewInstall < Chef::Resource\n+ unified_mode true\n+\n+ provides :homebrew_install\n+\n+ description \"Use the **homebrew_install** resource to install the Homebrew package manager on macOS systems.\"\n+ introduced \"16.8\"\n+ examples <<~DOC\n+ **Install Homebrew using the Internet to download Command Line Tools for Xcode**:\n+ ```ruby\n+ homebrew_install 'Install Homebrew and xcode command line tools if necessary' do\n+ user 'someuser'\n+ action :install\n+ end\n+ ```\n+ **Install Homebrew using a customer-managed source to download Command Line Tools for Xcode from**:\n+ ```ruby\n+ homebrew_install 'Install Homebrew and xcode command line tools if necessary' do\n+ xcode_tools_url 'https://somewhere.something.com/downloads/command_line_tools.dmg'\n+ xcode_tools_pkg_name 'Command Line Tools.pkg'\n+ user 'someuser'\n+ action :install\n+ end\n+ ```\n+ DOC\n+\n+ property :xcode_tools_url, String,\n+ description: \"A url pointing to a 'Command Line Tools for Xcode' dmg file\"\n+\n+ property :xcode_tools_pkg_name, String,\n+ description: \"The name of the pkg inside the dmg located at the xcode_tools_url\"\n+\n+ property :brew_source_url, String,\n+ description: \"A url pointing to a Homebrew installer\",\n+ default: \"https://codeload.github.com/Homebrew/brew/zip/master\"\n+\n+ property :user, String,\n+ description: \"The user to install Homebrew as. Note: Homebrew cannot be installed as root.\",\n+ required: true\n+\n+ action :install do\n+ # Avoid all the work in the below resources if homebrew is already installed\n+ return if ::File.exist?(\"/usr/local/bin/brew\")\n+\n+ # check if 'user' is root and raise an exception if so\n+ if Etc.getpwnam(new_resource.user).uid == 0\n+ msg = \"You are attempting to install Homebrew as Root. This is not permitted by Homebrew. Please run this as a standard user with admin rights\"\n+ raise Chef::Exceptions::InsufficientPermissions, msg\n+ end\n+\n+ USER_HOME = Dir.home(new_resource.user).freeze", - "comment_created_at": "2020-12-10T19:37:23+00:00", - "comment_author": "tas50", - "comment_body": "we should make these lowercase since they're not really constants and that gets confusing", - "pr_file_module": null - }, - { - "comment_id": "540525347", - "repo_full_name": "chef/chef", - "pr_number": 10621, - "pr_file": "lib/chef/resource/homebrew_install.rb", - "discussion_id": "540443869", - "commented_code": "@@ -0,0 +1,148 @@\n+#\n+# Copyright:: Copyright (c) Chef Software Inc.\n+#\n+# Licensed under the Apache License, Version 2.0 (the \"License\");\n+# you may not use this file except in compliance with the License.\n+# You may obtain a copy of the License at\n+#\n+# http://www.apache.org/licenses/LICENSE-2.0\n+#\n+# Unless required by applicable law or agreed to in writing, software\n+# distributed under the License is distributed on an \"AS IS\" BASIS,\n+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n+# See the License for the specific language governing permissions and\n+# limitations under the License.\n+#\n+\n+require_relative \"../resource\"\n+\n+class Chef\n+ class Resource\n+ class HomebrewInstall < Chef::Resource\n+ unified_mode true\n+\n+ provides :homebrew_install\n+\n+ description \"Use the **homebrew_install** resource to install the Homebrew package manager on macOS systems.\"\n+ introduced \"16.8\"\n+ examples <<~DOC\n+ **Install Homebrew using the Internet to download Command Line Tools for Xcode**:\n+ ```ruby\n+ homebrew_install 'Install Homebrew and xcode command line tools if necessary' do\n+ user 'someuser'\n+ action :install\n+ end\n+ ```\n+ **Install Homebrew using a customer-managed source to download Command Line Tools for Xcode from**:\n+ ```ruby\n+ homebrew_install 'Install Homebrew and xcode command line tools if necessary' do\n+ xcode_tools_url 'https://somewhere.something.com/downloads/command_line_tools.dmg'\n+ xcode_tools_pkg_name 'Command Line Tools.pkg'\n+ user 'someuser'\n+ action :install\n+ end\n+ ```\n+ DOC\n+\n+ property :xcode_tools_url, String,\n+ description: \"A url pointing to a 'Command Line Tools for Xcode' dmg file\"\n+\n+ property :xcode_tools_pkg_name, String,\n+ description: \"The name of the pkg inside the dmg located at the xcode_tools_url\"\n+\n+ property :brew_source_url, String,\n+ description: \"A url pointing to a Homebrew installer\",\n+ default: \"https://codeload.github.com/Homebrew/brew/zip/master\"\n+\n+ property :user, String,\n+ description: \"The user to install Homebrew as. Note: Homebrew cannot be installed as root.\",\n+ required: true\n+\n+ action :install do\n+ # Avoid all the work in the below resources if homebrew is already installed\n+ return if ::File.exist?(\"/usr/local/bin/brew\")\n+\n+ # check if 'user' is root and raise an exception if so\n+ if Etc.getpwnam(new_resource.user).uid == 0\n+ msg = \"You are attempting to install Homebrew as Root. This is not permitted by Homebrew. Please run this as a standard user with admin rights\"\n+ raise Chef::Exceptions::InsufficientPermissions, msg\n+ end\n+\n+ USER_HOME = Dir.home(new_resource.user).freeze", - "comment_created_at": "2020-12-10T21:55:45+00:00", - "comment_author": "johnmccrae", - "comment_body": "done", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "706317185", - "pr_number": 11834, - "pr_file": "lib/chef/http/authenticator.rb", - "created_at": "2021-09-10T16:30:00+00:00", - "commented_code": "@auth_credentials.client_name\n end\n\n def detect_certificate_key(client_name)\n self.class.detect_certificate_key(client_name)\n end\n\n def check_certstore_for_key(client_name)\n self.class.check_certstore_for_key(client_name)\n end\n\n def retrieve_certificate_key(client_name)\n self.class.retrieve_certificate_key(client_name)\n end\n\n # Detects if a private key exists in a certificate repository like Keychain (macOS) or Certificate Store (Windows)\n #\n # @param client_name - we're using the node name to store and retrieve any keys\n # Returns true if a key is found, false if not. False will trigger a registration event which will lead to a certificate based key being created\n #\n #\n def self.detect_certificate_key(client_name)", - "repo_full_name": "chef/chef", - "discussion_comments": [ - { - "comment_id": "706317185", - "repo_full_name": "chef/chef", - "pr_number": 11834, - "pr_file": "lib/chef/http/authenticator.rb", - "discussion_id": "706317185", - "commented_code": "@@ -83,8 +85,54 @@ def client_name\n @auth_credentials.client_name\n end\n \n+ def detect_certificate_key(client_name)\n+ self.class.detect_certificate_key(client_name)\n+ end\n+\n+ def check_certstore_for_key(client_name)\n+ self.class.check_certstore_for_key(client_name)\n+ end\n+\n+ def retrieve_certificate_key(client_name)\n+ self.class.retrieve_certificate_key(client_name)\n+ end\n+\n+ # Detects if a private key exists in a certificate repository like Keychain (macOS) or Certificate Store (Windows)\n+ #\n+ # @param client_name - we're using the node name to store and retrieve any keys\n+ # Returns true if a key is found, false if not. False will trigger a registration event which will lead to a certificate based key being created\n+ #\n+ #\n+ def self.detect_certificate_key(client_name)", - "comment_created_at": "2021-09-10T16:30:00+00:00", - "comment_author": "marcparadise", - "comment_body": "Could we use `certificate_key_exist?` instead of `detect_certificate_key`? ", - "pr_file_module": null - }, - { - "comment_id": "717020015", - "repo_full_name": "chef/chef", - "pr_number": 11834, - "pr_file": "lib/chef/http/authenticator.rb", - "discussion_id": "706317185", - "commented_code": "@@ -83,8 +85,54 @@ def client_name\n @auth_credentials.client_name\n end\n \n+ def detect_certificate_key(client_name)\n+ self.class.detect_certificate_key(client_name)\n+ end\n+\n+ def check_certstore_for_key(client_name)\n+ self.class.check_certstore_for_key(client_name)\n+ end\n+\n+ def retrieve_certificate_key(client_name)\n+ self.class.retrieve_certificate_key(client_name)\n+ end\n+\n+ # Detects if a private key exists in a certificate repository like Keychain (macOS) or Certificate Store (Windows)\n+ #\n+ # @param client_name - we're using the node name to store and retrieve any keys\n+ # Returns true if a key is found, false if not. False will trigger a registration event which will lead to a certificate based key being created\n+ #\n+ #\n+ def self.detect_certificate_key(client_name)", - "comment_created_at": "2021-09-27T20:26:24+00:00", - "comment_author": "johnmccrae", - "comment_body": "done", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "726525272", - "pr_number": 12159, - "pr_file": "lib/chef/resource/chocolatey_source.rb", - "created_at": "2021-10-11T19:38:40+00:00", - "commented_code": "property :disabled, [TrueClass, FalseClass], default: false, desired_state: false, skip_docs: true\n\n property :username, String,\n description: \"The username to use when authenticating against the source\",\n introduced: \"17.8\"\n\n property :password, String, sensitive: true, desired_state: false,\n description: \"The password to use when authenticating against the source\",\n introduced: \"17.8\"\n\n property :cert, String,\n description: \"The certificate to use when authenticating against the source\",\n introduced: \"17.8\"\n\n property :certpassword, String, sensitive: true, desired_state: false,", - "repo_full_name": "chef/chef", - "discussion_comments": [ - { - "comment_id": "726525272", - "repo_full_name": "chef/chef", - "pr_number": 12159, - "pr_file": "lib/chef/resource/chocolatey_source.rb", - "discussion_id": "726525272", - "commented_code": "@@ -63,6 +63,22 @@ class ChocolateySource < Chef::Resource\n \n property :disabled, [TrueClass, FalseClass], default: false, desired_state: false, skip_docs: true\n \n+ property :username, String,\n+ description: \"The username to use when authenticating against the source\",\n+ introduced: \"17.8\"\n+\n+ property :password, String, sensitive: true, desired_state: false,\n+ description: \"The password to use when authenticating against the source\",\n+ introduced: \"17.8\"\n+\n+ property :cert, String,\n+ description: \"The certificate to use when authenticating against the source\",\n+ introduced: \"17.8\"\n+\n+ property :certpassword, String, sensitive: true, desired_state: false,", - "comment_created_at": "2021-10-11T19:38:40+00:00", - "comment_author": "tas50", - "comment_body": "This should probably be `cert_password` for readability ", - "pr_file_module": null - }, - { - "comment_id": "726555644", - "repo_full_name": "chef/chef", - "pr_number": 12159, - "pr_file": "lib/chef/resource/chocolatey_source.rb", - "discussion_id": "726525272", - "commented_code": "@@ -63,6 +63,22 @@ class ChocolateySource < Chef::Resource\n \n property :disabled, [TrueClass, FalseClass], default: false, desired_state: false, skip_docs: true\n \n+ property :username, String,\n+ description: \"The username to use when authenticating against the source\",\n+ introduced: \"17.8\"\n+\n+ property :password, String, sensitive: true, desired_state: false,\n+ description: \"The password to use when authenticating against the source\",\n+ introduced: \"17.8\"\n+\n+ property :cert, String,\n+ description: \"The certificate to use when authenticating against the source\",\n+ introduced: \"17.8\"\n+\n+ property :certpassword, String, sensitive: true, desired_state: false,", - "comment_created_at": "2021-10-11T20:41:06+00:00", - "comment_author": "gep13", - "comment_body": "I will get that updated :+1:", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "716147106", - "pr_number": 12029, - "pr_file": "lib/chef/resource/powershell_package_source.rb", - "created_at": "2021-09-26T05:58:36+00:00", - "commented_code": "provides :powershell_package_source\n\n description \"Use the **powershell_package_source** resource to register a PowerShell package repository.\"\n description \"Use the **powershell_package_source** resource to register a PowerShell Repository or other Package Source type with. There are 2 distinct objects we care about here. The first is a Package Source like a PowerShell Repository or a Nuget Source. The second object is a provider that PowerShell uses to get to that source with, like PowerShellGet, Nuget, Chocolatey, etc. \"\n introduced \"14.3\"\n examples <<~DOC\n **Add a new PSRepository that is not trusted and which requires credentials to connect to**:\n\n ```ruby\n powershell_package_source 'PowerShellModules' do\n source_name \"PowerShellModules\"\n source_location \"https://pkgs.dev.azure.com/some-org/some-project/_packaging/some_feed/nuget/v2\"\n publish_location \"https://pkgs.dev.azure.com/some-org/some-project/_packaging/some_feed/nuget/v2\"\n trusted false\n user_name \"someuser@somelocation.io\"", - "repo_full_name": "chef/chef", - "discussion_comments": [ - { - "comment_id": "716147106", - "repo_full_name": "chef/chef", - "pr_number": 12029, - "pr_file": "lib/chef/resource/powershell_package_source.rb", - "discussion_id": "716147106", - "commented_code": "@@ -24,36 +25,131 @@ class PowershellPackageSource < Chef::Resource\n \n provides :powershell_package_source\n \n- description \"Use the **powershell_package_source** resource to register a PowerShell package repository.\"\n+ description \"Use the **powershell_package_source** resource to register a PowerShell Repository or other Package Source type with. There are 2 distinct objects we care about here. The first is a Package Source like a PowerShell Repository or a Nuget Source. The second object is a provider that PowerShell uses to get to that source with, like PowerShellGet, Nuget, Chocolatey, etc. \"\n introduced \"14.3\"\n+ examples <<~DOC\n+ **Add a new PSRepository that is not trusted and which requires credentials to connect to**:\n+\n+ ```ruby\n+ powershell_package_source 'PowerShellModules' do\n+ source_name \"PowerShellModules\"\n+ source_location \"https://pkgs.dev.azure.com/some-org/some-project/_packaging/some_feed/nuget/v2\"\n+ publish_location \"https://pkgs.dev.azure.com/some-org/some-project/_packaging/some_feed/nuget/v2\"\n+ trusted false\n+ user_name \"someuser@somelocation.io\"", - "comment_created_at": "2021-09-26T05:58:36+00:00", - "comment_author": "tas50", - "comment_body": "In our other resources we use 'user' and 'password'. We should be consistent here.", - "pr_file_module": null - }, - { - "comment_id": "719671548", - "repo_full_name": "chef/chef", - "pr_number": 12029, - "pr_file": "lib/chef/resource/powershell_package_source.rb", - "discussion_id": "716147106", - "commented_code": "@@ -24,36 +25,131 @@ class PowershellPackageSource < Chef::Resource\n \n provides :powershell_package_source\n \n- description \"Use the **powershell_package_source** resource to register a PowerShell package repository.\"\n+ description \"Use the **powershell_package_source** resource to register a PowerShell Repository or other Package Source type with. There are 2 distinct objects we care about here. The first is a Package Source like a PowerShell Repository or a Nuget Source. The second object is a provider that PowerShell uses to get to that source with, like PowerShellGet, Nuget, Chocolatey, etc. \"\n introduced \"14.3\"\n+ examples <<~DOC\n+ **Add a new PSRepository that is not trusted and which requires credentials to connect to**:\n+\n+ ```ruby\n+ powershell_package_source 'PowerShellModules' do\n+ source_name \"PowerShellModules\"\n+ source_location \"https://pkgs.dev.azure.com/some-org/some-project/_packaging/some_feed/nuget/v2\"\n+ publish_location \"https://pkgs.dev.azure.com/some-org/some-project/_packaging/some_feed/nuget/v2\"\n+ trusted false\n+ user_name \"someuser@somelocation.io\"", - "comment_created_at": "2021-09-30T18:47:31+00:00", - "comment_author": "johnmccrae", - "comment_body": "corrected", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/chef-document-configuration-completely.json b/_reviewers/chef-document-configuration-completely.json new file mode 100644 index 0000000..b8f388b --- /dev/null +++ b/_reviewers/chef-document-configuration-completely.json @@ -0,0 +1,102 @@ +[ + { + "discussion_id": "1339251317", + "pr_number": 13962, + "pr_file": "CONTRIBUTING.md", + "created_at": "2023-09-27T21:41:11+00:00", + "commented_code": "------------------------------------------------------------------------\n```\n\n### Gems\n\nWhen installing or updating individual gems, run the install or update commands with the `--conservative` flag.\n\n```\ngem install [gem_name] --conservative\ngem update [gem_name] --conservative`\n```\n\nIf a manual PR updates the `Gemfile.lock`, please include the output of your `gem update` or `gem install` command in the PR.", + "repo_full_name": "chef/chef", + "discussion_comments": [ + { + "comment_id": "1339251317", + "repo_full_name": "chef/chef", + "pr_number": 13962, + "pr_file": "CONTRIBUTING.md", + "discussion_id": "1339251317", + "commented_code": "@@ -127,6 +127,22 @@ Date: Wed Sep 18 11:44:40 2015 -0700\n ------------------------------------------------------------------------\n ```\n \n+### Gems\n+\n+When installing or updating individual gems, run the install or update commands with the `--conservative` flag.\n+\n+```\n+gem install [gem_name] --conservative\n+gem update [gem_name] --conservative`\n+```\n+\n+If a manual PR updates the `Gemfile.lock`, please include the output of your `gem update` or `gem install` command in the PR.\n+", + "comment_created_at": "2023-09-27T21:41:11+00:00", + "comment_author": "jaymzh", + "comment_body": "```suggestion\r\nWhen making a PR, if you need to add or remove gems, it should be done using the `--conservative` flag:\r\n\r\n```shell\r\ngem install [gem_name] --conservative\r\ngem update [gem_name] --conservative`\r\n```\r\n\r\nIf a manual PR updates the `Gemfile.lock`, please include the full output of your `gem` command in the commit message an PR description.\r\n\r\n```", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "694100300", + "pr_number": 11939, + "pr_file": "RELEASE_NOTES.md", + "created_at": "2021-08-23T15:55:04+00:00", + "commented_code": "This file holds \"in progress\" release notes for the current release under development and is intended for consumption by the Chef Documentation team. Please see for the official Chef release notes.\n\n## What's New in 17.4\n\n### Compliance Phase Improvements\n\n#### Chef InSpec 4.41.2\n\nChef InSpec has been updated from 4.38.3 to 4.41.2 with the following improvements for Compliance Phase\n\n- New Open Policy Agent resources `opa_cli` and `opa_api`\n- New `mongodb_session` resource\n- The `mssql_session` resource now allows named connections by no longer forcing a port.\n- The PostgreSQL resources (`postgres_session`, `postgres_conf`, `postgres_hba_conf`, and `postgres_ident_conf`) now work with Windows.\n- Fixed a bug where the year in an expiration date was misinterpreted in waiver files\n\n#### json-file Reporter Off By Default\n\nThe InSpec `json-file` reporter is no longer enabled by default in Compliance Phase. Outputting compliance data to file by default potentially exposed sensitive data to the filesystem, without much upside. If you are relying on this file for processing by external systems you can set the reporter attribute `node['audit']['reporter']` to `%w{json-file cli}` to restore the previous default.", + "repo_full_name": "chef/chef", + "discussion_comments": [ + { + "comment_id": "694100300", + "repo_full_name": "chef/chef", + "pr_number": 11939, + "pr_file": "RELEASE_NOTES.md", + "discussion_id": "694100300", + "commented_code": "@@ -1,5 +1,99 @@\n This file holds \"in progress\" release notes for the current release under development and is intended for consumption by the Chef Documentation team. Please see for the official Chef release notes.\n \n+## What's New in 17.4\n+\n+### Compliance Phase Improvements\n+\n+#### Chef InSpec 4.41.2\n+\n+Chef InSpec has been updated from 4.38.3 to 4.41.2 with the following improvements for Compliance Phase\n+\n+- New Open Policy Agent resources `opa_cli` and `opa_api`\n+- New `mongodb_session` resource\n+- The `mssql_session` resource now allows named connections by no longer forcing a port.\n+- The PostgreSQL resources (`postgres_session`, `postgres_conf`, `postgres_hba_conf`, and `postgres_ident_conf`) now work with Windows.\n+- Fixed a bug where the year in an expiration date was misinterpreted in waiver files\n+\n+#### json-file Reporter Off By Default\n+\n+The InSpec `json-file` reporter is no longer enabled by default in Compliance Phase. Outputting compliance data to file by default potentially exposed sensitive data to the filesystem, without much upside. If you are relying on this file for processing by external systems you can set the reporter attribute `node['audit']['reporter']` to `%w{json-file cli}` to restore the previous default.", + "comment_created_at": "2021-08-23T15:55:04+00:00", + "comment_author": "kagarmoe", + "comment_body": "```suggestion\r\nThe InSpec `json-file` reporter is no longer enabled by default in Compliance Phase. Outputting compliance data to file by default potentially exposed sensitive data to the filesystem, without much upside. If you rely on this file for processing by external systems you can produce it by setting the reporter attribute `node['audit']['reporter']` to `%w{json-file cli}`.\r\n```", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "694101268", + "pr_number": 11939, + "pr_file": "RELEASE_NOTES.md", + "created_at": "2021-08-23T15:56:18+00:00", + "commented_code": "This file holds \"in progress\" release notes for the current release under development and is intended for consumption by the Chef Documentation team. Please see for the official Chef release notes.\n\n## What's New in 17.4\n\n### Compliance Phase Improvements\n\n#### Chef InSpec 4.41.2\n\nChef InSpec has been updated from 4.38.3 to 4.41.2 with the following improvements for Compliance Phase\n\n- New Open Policy Agent resources `opa_cli` and `opa_api`\n- New `mongodb_session` resource\n- The `mssql_session` resource now allows named connections by no longer forcing a port.\n- The PostgreSQL resources (`postgres_session`, `postgres_conf`, `postgres_hba_conf`, and `postgres_ident_conf`) now work with Windows.\n- Fixed a bug where the year in an expiration date was misinterpreted in waiver files\n\n#### json-file Reporter Off By Default\n\nThe InSpec `json-file` reporter is no longer enabled by default in Compliance Phase. Outputting compliance data to file by default potentially exposed sensitive data to the filesystem, without much upside. If you are relying on this file for processing by external systems you can set the reporter attribute `node['audit']['reporter']` to `%w{json-file cli}` to restore the previous default.\n\n#### Chef Attribute Integration\n\nThe `chef_node_attribute_enabled` configuration option for Compliance Phase is now enabled by default. This provides a `chef_node` object in InSpec profiles containing all attributes from the Chef Infra Client including Ohai configuration attributes.\n\n#### Compliance Phase Inputs Attribute\n\nIn 2019 we renamed InSpec attributes to inputs to avoid confusion between InSpec attributes and Chef Infra attributes. Compliance Phase is now updated to use the updated inputs name. Instead of passing `node['audit']['attributes']` you can now use `node['audit']['inputs']`. Don't worry about rushing to update your code though because Compliance Phase will still work with the existing attributes, giving you time to migrate to the new name.\n\n### Secrets Manager Integration\n\nWe've updated our beta secrets management integration helper to make it easier to fetch secrets from AWS Secrets Manager and Azure Key Vault. We'd still love to hear from you if you are integrating Chef Infra with a secrets management system or you'd like to do so in the future. E-mail us at secrets_management_beta@progress.com.\n\n#### Simpler Azure Key Vault Names Declaration\n\nThe `secrets` helper has been updated to allow specifying the Azure Key Vault to fetch a secret from within the name instead of using the config hash:\n\n**Specifying the Vault in the Name**\n\n```ruby\nsecret(name: \"test-chef-infra-secrets/test-secret-1\", service: :azure_key_vault)\n```\n\n**Specifying the Vault in the Options Hash**\n\n```ruby\nsecret(name: \"test-secret-1\", service: :azure_key_vault, config: {vault: \"test-chef-infra-secrets\" })\n```\n\n#### AWS Default to Node's Region in AWS Secrets Manager\n\nWhen fetching secrets from AWS Secrets Manager the `secrets` helper will now default to fetching secrets from the region where the node resides. You can still specify the region if you'd like to fetch secrets from another region by passing the region config option:", + "repo_full_name": "chef/chef", + "discussion_comments": [ + { + "comment_id": "694101268", + "repo_full_name": "chef/chef", + "pr_number": 11939, + "pr_file": "RELEASE_NOTES.md", + "discussion_id": "694101268", + "commented_code": "@@ -1,5 +1,99 @@\n This file holds \"in progress\" release notes for the current release under development and is intended for consumption by the Chef Documentation team. Please see for the official Chef release notes.\n \n+## What's New in 17.4\n+\n+### Compliance Phase Improvements\n+\n+#### Chef InSpec 4.41.2\n+\n+Chef InSpec has been updated from 4.38.3 to 4.41.2 with the following improvements for Compliance Phase\n+\n+- New Open Policy Agent resources `opa_cli` and `opa_api`\n+- New `mongodb_session` resource\n+- The `mssql_session` resource now allows named connections by no longer forcing a port.\n+- The PostgreSQL resources (`postgres_session`, `postgres_conf`, `postgres_hba_conf`, and `postgres_ident_conf`) now work with Windows.\n+- Fixed a bug where the year in an expiration date was misinterpreted in waiver files\n+\n+#### json-file Reporter Off By Default\n+\n+The InSpec `json-file` reporter is no longer enabled by default in Compliance Phase. Outputting compliance data to file by default potentially exposed sensitive data to the filesystem, without much upside. If you are relying on this file for processing by external systems you can set the reporter attribute `node['audit']['reporter']` to `%w{json-file cli}` to restore the previous default.\n+\n+#### Chef Attribute Integration\n+\n+The `chef_node_attribute_enabled` configuration option for Compliance Phase is now enabled by default. This provides a `chef_node` object in InSpec profiles containing all attributes from the Chef Infra Client including Ohai configuration attributes.\n+\n+#### Compliance Phase Inputs Attribute\n+\n+In 2019 we renamed InSpec attributes to inputs to avoid confusion between InSpec attributes and Chef Infra attributes. Compliance Phase is now updated to use the updated inputs name. Instead of passing `node['audit']['attributes']` you can now use `node['audit']['inputs']`. Don't worry about rushing to update your code though because Compliance Phase will still work with the existing attributes, giving you time to migrate to the new name.\n+\n+### Secrets Manager Integration\n+\n+We've updated our beta secrets management integration helper to make it easier to fetch secrets from AWS Secrets Manager and Azure Key Vault. We'd still love to hear from you if you are integrating Chef Infra with a secrets management system or you'd like to do so in the future. E-mail us at secrets_management_beta@progress.com.\n+\n+#### Simpler Azure Key Vault Names Declaration\n+\n+The `secrets` helper has been updated to allow specifying the Azure Key Vault to fetch a secret from within the name instead of using the config hash:\n+\n+**Specifying the Vault in the Name**\n+\n+```ruby\n+secret(name: \"test-chef-infra-secrets/test-secret-1\", service: :azure_key_vault)\n+```\n+\n+**Specifying the Vault in the Options Hash**\n+\n+```ruby\n+secret(name: \"test-secret-1\", service: :azure_key_vault, config: {vault: \"test-chef-infra-secrets\" })\n+```\n+\n+#### AWS Default to Node's Region in AWS Secrets Manager\n+\n+When fetching secrets from AWS Secrets Manager the `secrets` helper will now default to fetching secrets from the region where the node resides. You can still specify the region if you'd like to fetch secrets from another region by passing the region config option:", + "comment_created_at": "2021-08-23T15:56:18+00:00", + "comment_author": "kagarmoe", + "comment_body": "```suggestion\r\nWhen fetching secrets from AWS Secrets Manager, the `secrets` helper now defaults to fetching secrets from the region where the node resides. If you need to fetch secrets from another region, you can use the region config option:\r\n```", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "688713182", + "pr_number": 11917, + "pr_file": "omnibus/README.md", + "created_at": "2021-08-13T18:43:14+00:00", + "commented_code": "You must have a sane Ruby environment with Bundler installed. Ensure all the required gems are installed:\n\n```shell\nbundle install --without development\nbundle config set without development", + "repo_full_name": "chef/chef", + "discussion_comments": [ + { + "comment_id": "688713182", + "repo_full_name": "chef/chef", + "pr_number": 11917, + "pr_file": "omnibus/README.md", + "discussion_id": "688713182", + "commented_code": "@@ -10,7 +10,8 @@ This project creates full-stack platform-specific packages for the following pro\n You must have a sane Ruby environment with Bundler installed. Ensure all the required gems are installed:\n \n ```shell\n-bundle install --without development\n+bundle config set without development", + "comment_created_at": "2021-08-13T18:43:14+00:00", + "comment_author": "tas50", + "comment_body": "The instructions in this file all need to be the local version of the bundle config command so that users don't set a global without.", + "pr_file_module": null + }, + { + "comment_id": "689228347", + "repo_full_name": "chef/chef", + "pr_number": 11917, + "pr_file": "omnibus/README.md", + "discussion_id": "688713182", + "commented_code": "@@ -10,7 +10,8 @@ This project creates full-stack platform-specific packages for the following pro\n You must have a sane Ruby environment with Bundler installed. Ensure all the required gems are installed:\n \n ```shell\n-bundle install --without development\n+bundle config set without development", + "comment_created_at": "2021-08-16T04:12:19+00:00", + "comment_author": "jayashrig158", + "comment_body": "Sure, I'll update that accordingly.", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/chef-document-configuration-completely.md b/_reviewers/chef-document-configuration-completely.md index 506d66d..693d476 100644 --- a/_reviewers/chef-document-configuration-completely.md +++ b/_reviewers/chef-document-configuration-completely.md @@ -30,107 +30,3 @@ bundle config set --local without development ``` When configuration defaults change, especially those with security implications, provide clear migration paths for users who need previous functionality. - - -[ - { - "discussion_id": "1339251317", - "pr_number": 13962, - "pr_file": "CONTRIBUTING.md", - "created_at": "2023-09-27T21:41:11+00:00", - "commented_code": "------------------------------------------------------------------------\n```\n\n### Gems\n\nWhen installing or updating individual gems, run the install or update commands with the `--conservative` flag.\n\n```\ngem install [gem_name] --conservative\ngem update [gem_name] --conservative`\n```\n\nIf a manual PR updates the `Gemfile.lock`, please include the output of your `gem update` or `gem install` command in the PR.", - "repo_full_name": "chef/chef", - "discussion_comments": [ - { - "comment_id": "1339251317", - "repo_full_name": "chef/chef", - "pr_number": 13962, - "pr_file": "CONTRIBUTING.md", - "discussion_id": "1339251317", - "commented_code": "@@ -127,6 +127,22 @@ Date: Wed Sep 18 11:44:40 2015 -0700\n ------------------------------------------------------------------------\n ```\n \n+### Gems\n+\n+When installing or updating individual gems, run the install or update commands with the `--conservative` flag.\n+\n+```\n+gem install [gem_name] --conservative\n+gem update [gem_name] --conservative`\n+```\n+\n+If a manual PR updates the `Gemfile.lock`, please include the output of your `gem update` or `gem install` command in the PR.\n+", - "comment_created_at": "2023-09-27T21:41:11+00:00", - "comment_author": "jaymzh", - "comment_body": "```suggestion\r\nWhen making a PR, if you need to add or remove gems, it should be done using the `--conservative` flag:\r\n\r\n```shell\r\ngem install [gem_name] --conservative\r\ngem update [gem_name] --conservative`\r\n```\r\n\r\nIf a manual PR updates the `Gemfile.lock`, please include the full output of your `gem` command in the commit message an PR description.\r\n\r\n```", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "694100300", - "pr_number": 11939, - "pr_file": "RELEASE_NOTES.md", - "created_at": "2021-08-23T15:55:04+00:00", - "commented_code": "This file holds \"in progress\" release notes for the current release under development and is intended for consumption by the Chef Documentation team. Please see for the official Chef release notes.\n\n## What's New in 17.4\n\n### Compliance Phase Improvements\n\n#### Chef InSpec 4.41.2\n\nChef InSpec has been updated from 4.38.3 to 4.41.2 with the following improvements for Compliance Phase\n\n- New Open Policy Agent resources `opa_cli` and `opa_api`\n- New `mongodb_session` resource\n- The `mssql_session` resource now allows named connections by no longer forcing a port.\n- The PostgreSQL resources (`postgres_session`, `postgres_conf`, `postgres_hba_conf`, and `postgres_ident_conf`) now work with Windows.\n- Fixed a bug where the year in an expiration date was misinterpreted in waiver files\n\n#### json-file Reporter Off By Default\n\nThe InSpec `json-file` reporter is no longer enabled by default in Compliance Phase. Outputting compliance data to file by default potentially exposed sensitive data to the filesystem, without much upside. If you are relying on this file for processing by external systems you can set the reporter attribute `node['audit']['reporter']` to `%w{json-file cli}` to restore the previous default.", - "repo_full_name": "chef/chef", - "discussion_comments": [ - { - "comment_id": "694100300", - "repo_full_name": "chef/chef", - "pr_number": 11939, - "pr_file": "RELEASE_NOTES.md", - "discussion_id": "694100300", - "commented_code": "@@ -1,5 +1,99 @@\n This file holds \"in progress\" release notes for the current release under development and is intended for consumption by the Chef Documentation team. Please see for the official Chef release notes.\n \n+## What's New in 17.4\n+\n+### Compliance Phase Improvements\n+\n+#### Chef InSpec 4.41.2\n+\n+Chef InSpec has been updated from 4.38.3 to 4.41.2 with the following improvements for Compliance Phase\n+\n+- New Open Policy Agent resources `opa_cli` and `opa_api`\n+- New `mongodb_session` resource\n+- The `mssql_session` resource now allows named connections by no longer forcing a port.\n+- The PostgreSQL resources (`postgres_session`, `postgres_conf`, `postgres_hba_conf`, and `postgres_ident_conf`) now work with Windows.\n+- Fixed a bug where the year in an expiration date was misinterpreted in waiver files\n+\n+#### json-file Reporter Off By Default\n+\n+The InSpec `json-file` reporter is no longer enabled by default in Compliance Phase. Outputting compliance data to file by default potentially exposed sensitive data to the filesystem, without much upside. If you are relying on this file for processing by external systems you can set the reporter attribute `node['audit']['reporter']` to `%w{json-file cli}` to restore the previous default.", - "comment_created_at": "2021-08-23T15:55:04+00:00", - "comment_author": "kagarmoe", - "comment_body": "```suggestion\r\nThe InSpec `json-file` reporter is no longer enabled by default in Compliance Phase. Outputting compliance data to file by default potentially exposed sensitive data to the filesystem, without much upside. If you rely on this file for processing by external systems you can produce it by setting the reporter attribute `node['audit']['reporter']` to `%w{json-file cli}`.\r\n```", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "694101268", - "pr_number": 11939, - "pr_file": "RELEASE_NOTES.md", - "created_at": "2021-08-23T15:56:18+00:00", - "commented_code": "This file holds \"in progress\" release notes for the current release under development and is intended for consumption by the Chef Documentation team. Please see for the official Chef release notes.\n\n## What's New in 17.4\n\n### Compliance Phase Improvements\n\n#### Chef InSpec 4.41.2\n\nChef InSpec has been updated from 4.38.3 to 4.41.2 with the following improvements for Compliance Phase\n\n- New Open Policy Agent resources `opa_cli` and `opa_api`\n- New `mongodb_session` resource\n- The `mssql_session` resource now allows named connections by no longer forcing a port.\n- The PostgreSQL resources (`postgres_session`, `postgres_conf`, `postgres_hba_conf`, and `postgres_ident_conf`) now work with Windows.\n- Fixed a bug where the year in an expiration date was misinterpreted in waiver files\n\n#### json-file Reporter Off By Default\n\nThe InSpec `json-file` reporter is no longer enabled by default in Compliance Phase. Outputting compliance data to file by default potentially exposed sensitive data to the filesystem, without much upside. If you are relying on this file for processing by external systems you can set the reporter attribute `node['audit']['reporter']` to `%w{json-file cli}` to restore the previous default.\n\n#### Chef Attribute Integration\n\nThe `chef_node_attribute_enabled` configuration option for Compliance Phase is now enabled by default. This provides a `chef_node` object in InSpec profiles containing all attributes from the Chef Infra Client including Ohai configuration attributes.\n\n#### Compliance Phase Inputs Attribute\n\nIn 2019 we renamed InSpec attributes to inputs to avoid confusion between InSpec attributes and Chef Infra attributes. Compliance Phase is now updated to use the updated inputs name. Instead of passing `node['audit']['attributes']` you can now use `node['audit']['inputs']`. Don't worry about rushing to update your code though because Compliance Phase will still work with the existing attributes, giving you time to migrate to the new name.\n\n### Secrets Manager Integration\n\nWe've updated our beta secrets management integration helper to make it easier to fetch secrets from AWS Secrets Manager and Azure Key Vault. We'd still love to hear from you if you are integrating Chef Infra with a secrets management system or you'd like to do so in the future. E-mail us at secrets_management_beta@progress.com.\n\n#### Simpler Azure Key Vault Names Declaration\n\nThe `secrets` helper has been updated to allow specifying the Azure Key Vault to fetch a secret from within the name instead of using the config hash:\n\n**Specifying the Vault in the Name**\n\n```ruby\nsecret(name: \"test-chef-infra-secrets/test-secret-1\", service: :azure_key_vault)\n```\n\n**Specifying the Vault in the Options Hash**\n\n```ruby\nsecret(name: \"test-secret-1\", service: :azure_key_vault, config: {vault: \"test-chef-infra-secrets\" })\n```\n\n#### AWS Default to Node's Region in AWS Secrets Manager\n\nWhen fetching secrets from AWS Secrets Manager the `secrets` helper will now default to fetching secrets from the region where the node resides. You can still specify the region if you'd like to fetch secrets from another region by passing the region config option:", - "repo_full_name": "chef/chef", - "discussion_comments": [ - { - "comment_id": "694101268", - "repo_full_name": "chef/chef", - "pr_number": 11939, - "pr_file": "RELEASE_NOTES.md", - "discussion_id": "694101268", - "commented_code": "@@ -1,5 +1,99 @@\n This file holds \"in progress\" release notes for the current release under development and is intended for consumption by the Chef Documentation team. Please see for the official Chef release notes.\n \n+## What's New in 17.4\n+\n+### Compliance Phase Improvements\n+\n+#### Chef InSpec 4.41.2\n+\n+Chef InSpec has been updated from 4.38.3 to 4.41.2 with the following improvements for Compliance Phase\n+\n+- New Open Policy Agent resources `opa_cli` and `opa_api`\n+- New `mongodb_session` resource\n+- The `mssql_session` resource now allows named connections by no longer forcing a port.\n+- The PostgreSQL resources (`postgres_session`, `postgres_conf`, `postgres_hba_conf`, and `postgres_ident_conf`) now work with Windows.\n+- Fixed a bug where the year in an expiration date was misinterpreted in waiver files\n+\n+#### json-file Reporter Off By Default\n+\n+The InSpec `json-file` reporter is no longer enabled by default in Compliance Phase. Outputting compliance data to file by default potentially exposed sensitive data to the filesystem, without much upside. If you are relying on this file for processing by external systems you can set the reporter attribute `node['audit']['reporter']` to `%w{json-file cli}` to restore the previous default.\n+\n+#### Chef Attribute Integration\n+\n+The `chef_node_attribute_enabled` configuration option for Compliance Phase is now enabled by default. This provides a `chef_node` object in InSpec profiles containing all attributes from the Chef Infra Client including Ohai configuration attributes.\n+\n+#### Compliance Phase Inputs Attribute\n+\n+In 2019 we renamed InSpec attributes to inputs to avoid confusion between InSpec attributes and Chef Infra attributes. Compliance Phase is now updated to use the updated inputs name. Instead of passing `node['audit']['attributes']` you can now use `node['audit']['inputs']`. Don't worry about rushing to update your code though because Compliance Phase will still work with the existing attributes, giving you time to migrate to the new name.\n+\n+### Secrets Manager Integration\n+\n+We've updated our beta secrets management integration helper to make it easier to fetch secrets from AWS Secrets Manager and Azure Key Vault. We'd still love to hear from you if you are integrating Chef Infra with a secrets management system or you'd like to do so in the future. E-mail us at secrets_management_beta@progress.com.\n+\n+#### Simpler Azure Key Vault Names Declaration\n+\n+The `secrets` helper has been updated to allow specifying the Azure Key Vault to fetch a secret from within the name instead of using the config hash:\n+\n+**Specifying the Vault in the Name**\n+\n+```ruby\n+secret(name: \"test-chef-infra-secrets/test-secret-1\", service: :azure_key_vault)\n+```\n+\n+**Specifying the Vault in the Options Hash**\n+\n+```ruby\n+secret(name: \"test-secret-1\", service: :azure_key_vault, config: {vault: \"test-chef-infra-secrets\" })\n+```\n+\n+#### AWS Default to Node's Region in AWS Secrets Manager\n+\n+When fetching secrets from AWS Secrets Manager the `secrets` helper will now default to fetching secrets from the region where the node resides. You can still specify the region if you'd like to fetch secrets from another region by passing the region config option:", - "comment_created_at": "2021-08-23T15:56:18+00:00", - "comment_author": "kagarmoe", - "comment_body": "```suggestion\r\nWhen fetching secrets from AWS Secrets Manager, the `secrets` helper now defaults to fetching secrets from the region where the node resides. If you need to fetch secrets from another region, you can use the region config option:\r\n```", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "688713182", - "pr_number": 11917, - "pr_file": "omnibus/README.md", - "created_at": "2021-08-13T18:43:14+00:00", - "commented_code": "You must have a sane Ruby environment with Bundler installed. Ensure all the required gems are installed:\n\n```shell\nbundle install --without development\nbundle config set without development", - "repo_full_name": "chef/chef", - "discussion_comments": [ - { - "comment_id": "688713182", - "repo_full_name": "chef/chef", - "pr_number": 11917, - "pr_file": "omnibus/README.md", - "discussion_id": "688713182", - "commented_code": "@@ -10,7 +10,8 @@ This project creates full-stack platform-specific packages for the following pro\n You must have a sane Ruby environment with Bundler installed. Ensure all the required gems are installed:\n \n ```shell\n-bundle install --without development\n+bundle config set without development", - "comment_created_at": "2021-08-13T18:43:14+00:00", - "comment_author": "tas50", - "comment_body": "The instructions in this file all need to be the local version of the bundle config command so that users don't set a global without.", - "pr_file_module": null - }, - { - "comment_id": "689228347", - "repo_full_name": "chef/chef", - "pr_number": 11917, - "pr_file": "omnibus/README.md", - "discussion_id": "688713182", - "commented_code": "@@ -10,7 +10,8 @@ This project creates full-stack platform-specific packages for the following pro\n You must have a sane Ruby environment with Bundler installed. Ensure all the required gems are installed:\n \n ```shell\n-bundle install --without development\n+bundle config set without development", - "comment_created_at": "2021-08-16T04:12:19+00:00", - "comment_author": "jayashrig158", - "comment_body": "Sure, I'll update that accordingly.", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/chef-document-configuration-decisions.json b/_reviewers/chef-document-configuration-decisions.json new file mode 100644 index 0000000..8f2abb4 --- /dev/null +++ b/_reviewers/chef-document-configuration-decisions.json @@ -0,0 +1,114 @@ +[ + { + "discussion_id": "1915607101", + "pr_number": 14799, + "pr_file": "kitchen-tests/kitchen.dokken.yml", + "created_at": "2025-01-14T21:15:11+00:00", + "commented_code": "- remote: /opt/chef/bin/ohai -v\n - remote: echo \"Installing appbundler and appbundle-updater gems:\"\n - remote: /opt/chef/embedded/bin/gem install appbundler appbundle-updater --no-doc\n - remote: sudo ln -s /usr/bin/install /bin/install", + "repo_full_name": "chef/chef", + "discussion_comments": [ + { + "comment_id": "1915607101", + "repo_full_name": "chef/chef", + "pr_number": 14799, + "pr_file": "kitchen-tests/kitchen.dokken.yml", + "discussion_id": "1915607101", + "commented_code": "@@ -25,6 +25,10 @@ lifecycle:\n - remote: /opt/chef/bin/ohai -v\n - remote: echo \"Installing appbundler and appbundle-updater gems:\"\n - remote: /opt/chef/embedded/bin/gem install appbundler appbundle-updater --no-doc\n+ - remote: sudo ln -s /usr/bin/install /bin/install", + "comment_created_at": "2025-01-14T21:15:11+00:00", + "comment_author": "jaymzh", + "comment_body": "```suggestion\r\n # back compat for pre-unified-/usr distros, do not add new OSes\r\n - remote: sudo ln -s /usr/bin/install /bin/install\r\n```", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1878739148", + "pr_number": 14755, + "pr_file": "kitchen-tests/kitchen.exec.macos.yml", + "created_at": "2024-12-10T19:51:27+00:00", + "commented_code": "platforms:\n - name: macos-latest # arm64", + "repo_full_name": "chef/chef", + "discussion_comments": [ + { + "comment_id": "1878739148", + "repo_full_name": "chef/chef", + "pr_number": 14755, + "pr_file": "kitchen-tests/kitchen.exec.macos.yml", + "discussion_id": "1878739148", + "commented_code": "@@ -27,6 +27,7 @@ lifecycle:\n \n platforms:\n - name: macos-latest # arm64", + "comment_created_at": "2024-12-10T19:51:27+00:00", + "comment_author": "Stromweld", + "comment_body": "for clarity and future proofing when they take 15 out of beta and make it the latest.\r\n```suggestion\r\n - name: macos-14 # arm64\r\n```", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1088744132", + "pr_number": 13531, + "pr_file": ".github/workflows/kitchen.yml", + "created_at": "2023-01-27T09:27:24+00:00", + "commented_code": "cd /home/runner/work/chef/chef/kitchen-tests\n bundle install\n bundle exec kitchen test end-to-end-${{ matrix.os }}\n linux-2004-host:", + "repo_full_name": "chef/chef", + "discussion_comments": [ + { + "comment_id": "1088744132", + "repo_full_name": "chef/chef", + "pr_number": 13531, + "pr_file": ".github/workflows/kitchen.yml", + "discussion_id": "1088744132", + "commented_code": "@@ -224,3 +223,35 @@ jobs:\n cd /home/runner/work/chef/chef/kitchen-tests\n bundle install\n bundle exec kitchen test end-to-end-${{ matrix.os }}\n+ linux-2004-host:", + "comment_created_at": "2023-01-27T09:27:24+00:00", + "comment_author": "neha-p6", + "comment_body": "Can we add a comment here stating we have to use 20.04 host for these operating other wise the dokken images throw timedatectl error, so that in future we would have quick reference when making any changes here", + "pr_file_module": null + }, + { + "comment_id": "1089000687", + "repo_full_name": "chef/chef", + "pr_number": 13531, + "pr_file": ".github/workflows/kitchen.yml", + "discussion_id": "1088744132", + "commented_code": "@@ -224,3 +223,35 @@ jobs:\n cd /home/runner/work/chef/chef/kitchen-tests\n bundle install\n bundle exec kitchen test end-to-end-${{ matrix.os }}\n+ linux-2004-host:", + "comment_created_at": "2023-01-27T14:07:54+00:00", + "comment_author": "tpowell-progress", + "comment_body": "Done.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1088743057", + "pr_number": 13533, + "pr_file": ".github/workflows/kitchen.yml", + "created_at": "2023-01-27T09:26:14+00:00", + "commented_code": "working-directory: kitchen-tests\n run: |\n bundle exec kitchen test end-to-end-${{ matrix.os }}\n linux-2004-host:", + "repo_full_name": "chef/chef", + "discussion_comments": [ + { + "comment_id": "1088743057", + "repo_full_name": "chef/chef", + "pr_number": 13533, + "pr_file": ".github/workflows/kitchen.yml", + "discussion_id": "1088743057", + "commented_code": "@@ -124,3 +122,27 @@ jobs:\n working-directory: kitchen-tests\n run: |\n bundle exec kitchen test end-to-end-${{ matrix.os }}\n+ linux-2004-host:", + "comment_created_at": "2023-01-27T09:26:14+00:00", + "comment_author": "neha-p6", + "comment_body": "Can we add a comment here stating we have to use 20.04 host for these operating other wise the dokken images throw timedatectl error?", + "pr_file_module": null + }, + { + "comment_id": "1088997869", + "repo_full_name": "chef/chef", + "pr_number": 13533, + "pr_file": ".github/workflows/kitchen.yml", + "discussion_id": "1088743057", + "commented_code": "@@ -124,3 +122,27 @@ jobs:\n working-directory: kitchen-tests\n run: |\n bundle exec kitchen test end-to-end-${{ matrix.os }}\n+ linux-2004-host:", + "comment_created_at": "2023-01-27T14:05:08+00:00", + "comment_author": "tpowell-progress", + "comment_body": "Done.", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/chef-document-configuration-decisions.md b/_reviewers/chef-document-configuration-decisions.md index 2ee3048..d79b6a4 100644 --- a/_reviewers/chef-document-configuration-decisions.md +++ b/_reviewers/chef-document-configuration-decisions.md @@ -30,119 +30,3 @@ Prefer specific version identifiers over relative terms (like "latest") to ensur # Instead of: name: macos-latest # arm64 - name: macos-14 # arm64 ``` - - -[ - { - "discussion_id": "1915607101", - "pr_number": 14799, - "pr_file": "kitchen-tests/kitchen.dokken.yml", - "created_at": "2025-01-14T21:15:11+00:00", - "commented_code": "- remote: /opt/chef/bin/ohai -v\n - remote: echo \"Installing appbundler and appbundle-updater gems:\"\n - remote: /opt/chef/embedded/bin/gem install appbundler appbundle-updater --no-doc\n - remote: sudo ln -s /usr/bin/install /bin/install", - "repo_full_name": "chef/chef", - "discussion_comments": [ - { - "comment_id": "1915607101", - "repo_full_name": "chef/chef", - "pr_number": 14799, - "pr_file": "kitchen-tests/kitchen.dokken.yml", - "discussion_id": "1915607101", - "commented_code": "@@ -25,6 +25,10 @@ lifecycle:\n - remote: /opt/chef/bin/ohai -v\n - remote: echo \"Installing appbundler and appbundle-updater gems:\"\n - remote: /opt/chef/embedded/bin/gem install appbundler appbundle-updater --no-doc\n+ - remote: sudo ln -s /usr/bin/install /bin/install", - "comment_created_at": "2025-01-14T21:15:11+00:00", - "comment_author": "jaymzh", - "comment_body": "```suggestion\r\n # back compat for pre-unified-/usr distros, do not add new OSes\r\n - remote: sudo ln -s /usr/bin/install /bin/install\r\n```", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1878739148", - "pr_number": 14755, - "pr_file": "kitchen-tests/kitchen.exec.macos.yml", - "created_at": "2024-12-10T19:51:27+00:00", - "commented_code": "platforms:\n - name: macos-latest # arm64", - "repo_full_name": "chef/chef", - "discussion_comments": [ - { - "comment_id": "1878739148", - "repo_full_name": "chef/chef", - "pr_number": 14755, - "pr_file": "kitchen-tests/kitchen.exec.macos.yml", - "discussion_id": "1878739148", - "commented_code": "@@ -27,6 +27,7 @@ lifecycle:\n \n platforms:\n - name: macos-latest # arm64", - "comment_created_at": "2024-12-10T19:51:27+00:00", - "comment_author": "Stromweld", - "comment_body": "for clarity and future proofing when they take 15 out of beta and make it the latest.\r\n```suggestion\r\n - name: macos-14 # arm64\r\n```", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1088744132", - "pr_number": 13531, - "pr_file": ".github/workflows/kitchen.yml", - "created_at": "2023-01-27T09:27:24+00:00", - "commented_code": "cd /home/runner/work/chef/chef/kitchen-tests\n bundle install\n bundle exec kitchen test end-to-end-${{ matrix.os }}\n linux-2004-host:", - "repo_full_name": "chef/chef", - "discussion_comments": [ - { - "comment_id": "1088744132", - "repo_full_name": "chef/chef", - "pr_number": 13531, - "pr_file": ".github/workflows/kitchen.yml", - "discussion_id": "1088744132", - "commented_code": "@@ -224,3 +223,35 @@ jobs:\n cd /home/runner/work/chef/chef/kitchen-tests\n bundle install\n bundle exec kitchen test end-to-end-${{ matrix.os }}\n+ linux-2004-host:", - "comment_created_at": "2023-01-27T09:27:24+00:00", - "comment_author": "neha-p6", - "comment_body": "Can we add a comment here stating we have to use 20.04 host for these operating other wise the dokken images throw timedatectl error, so that in future we would have quick reference when making any changes here", - "pr_file_module": null - }, - { - "comment_id": "1089000687", - "repo_full_name": "chef/chef", - "pr_number": 13531, - "pr_file": ".github/workflows/kitchen.yml", - "discussion_id": "1088744132", - "commented_code": "@@ -224,3 +223,35 @@ jobs:\n cd /home/runner/work/chef/chef/kitchen-tests\n bundle install\n bundle exec kitchen test end-to-end-${{ matrix.os }}\n+ linux-2004-host:", - "comment_created_at": "2023-01-27T14:07:54+00:00", - "comment_author": "tpowell-progress", - "comment_body": "Done.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1088743057", - "pr_number": 13533, - "pr_file": ".github/workflows/kitchen.yml", - "created_at": "2023-01-27T09:26:14+00:00", - "commented_code": "working-directory: kitchen-tests\n run: |\n bundle exec kitchen test end-to-end-${{ matrix.os }}\n linux-2004-host:", - "repo_full_name": "chef/chef", - "discussion_comments": [ - { - "comment_id": "1088743057", - "repo_full_name": "chef/chef", - "pr_number": 13533, - "pr_file": ".github/workflows/kitchen.yml", - "discussion_id": "1088743057", - "commented_code": "@@ -124,3 +122,27 @@ jobs:\n working-directory: kitchen-tests\n run: |\n bundle exec kitchen test end-to-end-${{ matrix.os }}\n+ linux-2004-host:", - "comment_created_at": "2023-01-27T09:26:14+00:00", - "comment_author": "neha-p6", - "comment_body": "Can we add a comment here stating we have to use 20.04 host for these operating other wise the dokken images throw timedatectl error?", - "pr_file_module": null - }, - { - "comment_id": "1088997869", - "repo_full_name": "chef/chef", - "pr_number": 13533, - "pr_file": ".github/workflows/kitchen.yml", - "discussion_id": "1088743057", - "commented_code": "@@ -124,3 +122,27 @@ jobs:\n working-directory: kitchen-tests\n run: |\n bundle exec kitchen test end-to-end-${{ matrix.os }}\n+ linux-2004-host:", - "comment_created_at": "2023-01-27T14:05:08+00:00", - "comment_author": "tpowell-progress", - "comment_body": "Done.", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/chef-document-with-examples.json b/_reviewers/chef-document-with-examples.json new file mode 100644 index 0000000..f660da2 --- /dev/null +++ b/_reviewers/chef-document-with-examples.json @@ -0,0 +1,264 @@ +[ + { + "discussion_id": "1092458036", + "pr_number": 13534, + "pr_file": "knife/lib/chef/knife/bootstrap.rb", + "created_at": "2023-01-31T20:40:06+00:00", + "commented_code": "option :connection_protocol,\n short: \"-o PROTOCOL\",\n long: \"--connection-protocol PROTOCOL\",\n description: \"The protocol to use to connect to the target node.\",\n in: SUPPORTED_CONNECTION_PROTOCOLS\n description: \"The protocol to use to connect to the target node.\"", + "repo_full_name": "chef/chef", + "discussion_comments": [ + { + "comment_id": "1092458036", + "repo_full_name": "chef/chef", + "pr_number": 13534, + "pr_file": "knife/lib/chef/knife/bootstrap.rb", + "discussion_id": "1092458036", + "commented_code": "@@ -54,8 +54,7 @@ class Bootstrap < Knife\n option :connection_protocol,\n short: \"-o PROTOCOL\",\n long: \"--connection-protocol PROTOCOL\",\n- description: \"The protocol to use to connect to the target node.\",\n- in: SUPPORTED_CONNECTION_PROTOCOLS\n+ description: \"The protocol to use to connect to the target node.\"", + "comment_created_at": "2023-01-31T20:40:06+00:00", + "comment_author": "jaymzh", + "comment_body": "maybe add some examples, `... (such as 'ssh' or 'winrm')`", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1949480111", + "pr_number": 14767, + "pr_file": "lib/chef/resource/registry_key.rb", + "created_at": "2025-02-10T16:34:00+00:00", + "commented_code": "}\n property :recursive, [TrueClass, FalseClass], default: false\n property :architecture, Symbol, default: :machine, equal_to: %i{machine x86_64 i386}\n property :only_record_changes, [TrueClass, FalseClass], default: false", + "repo_full_name": "chef/chef", + "discussion_comments": [ + { + "comment_id": "1949480111", + "repo_full_name": "chef/chef", + "pr_number": 14767, + "pr_file": "lib/chef/resource/registry_key.rb", + "discussion_id": "1949480111", + "commented_code": "@@ -150,6 +151,7 @@ class RegistryKey < Chef::Resource\n }\n property :recursive, [TrueClass, FalseClass], default: false\n property :architecture, Symbol, default: :machine, equal_to: %i{machine x86_64 i386}\n+ property :only_record_changes, [TrueClass, FalseClass], default: false", + "comment_created_at": "2025-02-10T16:34:00+00:00", + "comment_author": "tpowell-progress", + "comment_body": "This feels like it needs more documentation.", + "pr_file_module": null + }, + { + "comment_id": "1951393657", + "repo_full_name": "chef/chef", + "pr_number": 14767, + "pr_file": "lib/chef/resource/registry_key.rb", + "discussion_id": "1949480111", + "commented_code": "@@ -150,6 +151,7 @@ class RegistryKey < Chef::Resource\n }\n property :recursive, [TrueClass, FalseClass], default: false\n property :architecture, Symbol, default: :machine, equal_to: %i{machine x86_64 i386}\n+ property :only_record_changes, [TrueClass, FalseClass], default: false", + "comment_created_at": "2025-02-11T18:46:56+00:00", + "comment_author": "jaymzh", + "comment_body": "yes please. I thought there was a `:help` option to properties that was used for docs...", + "pr_file_module": null + }, + { + "comment_id": "1951704950", + "repo_full_name": "chef/chef", + "pr_number": 14767, + "pr_file": "lib/chef/resource/registry_key.rb", + "discussion_id": "1949480111", + "commented_code": "@@ -150,6 +151,7 @@ class RegistryKey < Chef::Resource\n }\n property :recursive, [TrueClass, FalseClass], default: false\n property :architecture, Symbol, default: :machine, equal_to: %i{machine x86_64 i386}\n+ property :only_record_changes, [TrueClass, FalseClass], default: false", + "comment_created_at": "2025-02-11T22:38:09+00:00", + "comment_author": "tpowell-progress", + "comment_body": "`, description: \"describe the property here\"` (see `lib/chef/resource.rb` for this value and also `introduced: \"...\"`)\r\n\r\nAlso add an example above using it.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1309270055", + "pr_number": 13873, + "pr_file": "lib/chef/resource/apt_package.rb", + "created_at": "2023-08-29T19:30:09+00:00", + "commented_code": "options '--no-install-recommends'\n end\n ```\n\n **Prevent the apt_package resource from installing packages with pattern matching names**:\n\n By default, the apt_package resource will install the named package.\n If it can't find a package with the exact same name, it will treat the package name as regex string and match with any package that matches that regex.\n This may lead Chef Infra Client to install one or more packages with names that match that regex.\n\n In this example, `anchor_package_regex true` prevents the apt_package resource from installing matching packages if it can't find the `lua5.3` package.", + "repo_full_name": "chef/chef", + "discussion_comments": [ + { + "comment_id": "1309270055", + "repo_full_name": "chef/chef", + "pr_number": 13873, + "pr_file": "lib/chef/resource/apt_package.rb", + "discussion_id": "1309270055", + "commented_code": "@@ -52,6 +52,21 @@ class AptPackage < Chef::Resource::Package\n options '--no-install-recommends'\n end\n ```\n+\n+ **Prevent the apt_package resource from installing packages with pattern matching names**:\n+\n+ By default, the apt_package resource will install the named package.\n+ If it can't find a package with the exact same name, it will treat the package name as regex string and match with any package that matches that regex.\n+ This may lead Chef Infra Client to install one or more packages with names that match that regex.\n+\n+ In this example, `anchor_package_regex true` prevents the apt_package resource from installing matching packages if it can't find the `lua5.3` package.", + "comment_created_at": "2023-08-29T19:30:09+00:00", + "comment_author": "jaymzh", + "comment_body": "```suggestion\r\n In this example, `anchor_package_regex true` helps prevent that by adding `^` and `$` anchors around the package name. Example: `lua5.3` would be `^lua5.3$`.\r\n```", + "pr_file_module": null + }, + { + "comment_id": "1312064464", + "repo_full_name": "chef/chef", + "pr_number": 13873, + "pr_file": "lib/chef/resource/apt_package.rb", + "discussion_id": "1309270055", + "commented_code": "@@ -52,6 +52,21 @@ class AptPackage < Chef::Resource::Package\n options '--no-install-recommends'\n end\n ```\n+\n+ **Prevent the apt_package resource from installing packages with pattern matching names**:\n+\n+ By default, the apt_package resource will install the named package.\n+ If it can't find a package with the exact same name, it will treat the package name as regex string and match with any package that matches that regex.\n+ This may lead Chef Infra Client to install one or more packages with names that match that regex.\n+\n+ In this example, `anchor_package_regex true` prevents the apt_package resource from installing matching packages if it can't find the `lua5.3` package.", + "comment_created_at": "2023-08-31T18:44:28+00:00", + "comment_author": "IanMadd", + "comment_body": "We can't use regex anchors in docs. So can you rewrite this without those.", + "pr_file_module": null + }, + { + "comment_id": "1316298270", + "repo_full_name": "chef/chef", + "pr_number": 13873, + "pr_file": "lib/chef/resource/apt_package.rb", + "discussion_id": "1309270055", + "commented_code": "@@ -52,6 +52,21 @@ class AptPackage < Chef::Resource::Package\n options '--no-install-recommends'\n end\n ```\n+\n+ **Prevent the apt_package resource from installing packages with pattern matching names**:\n+\n+ By default, the apt_package resource will install the named package.\n+ If it can't find a package with the exact same name, it will treat the package name as regex string and match with any package that matches that regex.\n+ This may lead Chef Infra Client to install one or more packages with names that match that regex.\n+\n+ In this example, `anchor_package_regex true` prevents the apt_package resource from installing matching packages if it can't find the `lua5.3` package.", + "comment_created_at": "2023-09-05T19:13:23+00:00", + "comment_author": "tpowell-progress", + "comment_body": "@IanMadd the apt package manager treats any specified package name as a regex, so we definitely need to be able to describe a regular expression in the documentation, and without the visual of the anchors, I don't think it's going to be as obvious what this setting accomplishes.", + "pr_file_module": null + }, + { + "comment_id": "1316300705", + "repo_full_name": "chef/chef", + "pr_number": 13873, + "pr_file": "lib/chef/resource/apt_package.rb", + "discussion_id": "1309270055", + "commented_code": "@@ -52,6 +52,21 @@ class AptPackage < Chef::Resource::Package\n options '--no-install-recommends'\n end\n ```\n+\n+ **Prevent the apt_package resource from installing packages with pattern matching names**:\n+\n+ By default, the apt_package resource will install the named package.\n+ If it can't find a package with the exact same name, it will treat the package name as regex string and match with any package that matches that regex.\n+ This may lead Chef Infra Client to install one or more packages with names that match that regex.\n+\n+ In this example, `anchor_package_regex true` prevents the apt_package resource from installing matching packages if it can't find the `lua5.3` package.", + "comment_created_at": "2023-09-05T19:15:44+00:00", + "comment_author": "jaymzh", + "comment_body": "That's insane. Things take regular expressions in Chef, how do you document that? Can we escape it? Regex.escape? There has to be some way to do that. We'll file a docs Issue here for that, lets get this solved so we can document our software.", + "pr_file_module": null + }, + { + "comment_id": "1316302624", + "repo_full_name": "chef/chef", + "pr_number": 13873, + "pr_file": "lib/chef/resource/apt_package.rb", + "discussion_id": "1309270055", + "commented_code": "@@ -52,6 +52,21 @@ class AptPackage < Chef::Resource::Package\n options '--no-install-recommends'\n end\n ```\n+\n+ **Prevent the apt_package resource from installing packages with pattern matching names**:\n+\n+ By default, the apt_package resource will install the named package.\n+ If it can't find a package with the exact same name, it will treat the package name as regex string and match with any package that matches that regex.\n+ This may lead Chef Infra Client to install one or more packages with names that match that regex.\n+\n+ In this example, `anchor_package_regex true` prevents the apt_package resource from installing matching packages if it can't find the `lua5.3` package.", + "comment_created_at": "2023-09-05T19:17:54+00:00", + "comment_author": "jaymzh", + "comment_body": "https://github.com/chef/chef/issues/13908", + "pr_file_module": null + }, + { + "comment_id": "1318696748", + "repo_full_name": "chef/chef", + "pr_number": 13873, + "pr_file": "lib/chef/resource/apt_package.rb", + "discussion_id": "1309270055", + "commented_code": "@@ -52,6 +52,21 @@ class AptPackage < Chef::Resource::Package\n options '--no-install-recommends'\n end\n ```\n+\n+ **Prevent the apt_package resource from installing packages with pattern matching names**:\n+\n+ By default, the apt_package resource will install the named package.\n+ If it can't find a package with the exact same name, it will treat the package name as regex string and match with any package that matches that regex.\n+ This may lead Chef Infra Client to install one or more packages with names that match that regex.\n+\n+ In this example, `anchor_package_regex true` prevents the apt_package resource from installing matching packages if it can't find the `lua5.3` package.", + "comment_created_at": "2023-09-07T14:24:15+00:00", + "comment_author": "IanMadd", + "comment_body": "@jaymzh @tpowell-progress \r\n\r\nThe point isn't to remove regex from docs, it's to use plain text descriptions as much as possible. If we were documenting a tool that accepted a regular expression string, I'd be all for it. But in this case the user is entering `true` and `false`, so if we can we should avoid the regex anchors and expressions and explain this in plain text.\r\n\r\n\r\nSo what about something like this:\r\n\r\n> In this example, `anchor_package_regex true` prevents the apt_package resource from treating `lua5.3` as a regular expression string if it can't find a package with the exact name `lua5.3`. This prevents apt_package from installing similarly named packages like `lua5.3-rrd` or `liblua5.3-dev`.\r\n\r\n", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1312069987", + "pr_number": 13873, + "pr_file": "lib/chef/resource/apt_package.rb", + "created_at": "2023-08-31T18:49:22+00:00", + "commented_code": "options '--no-install-recommends'\n end\n ```\n\n **Prevent the apt_package resource from installing packages with pattern matching names**:\n\n By default, the apt_package resource will install the named package.\n If it can't find a package with the exact same name, it will treat the package name as regex string and match with any package that matches that regex.", + "repo_full_name": "chef/chef", + "discussion_comments": [ + { + "comment_id": "1312069987", + "repo_full_name": "chef/chef", + "pr_number": 13873, + "pr_file": "lib/chef/resource/apt_package.rb", + "discussion_id": "1312069987", + "commented_code": "@@ -52,6 +52,21 @@ class AptPackage < Chef::Resource::Package\n options '--no-install-recommends'\n end\n ```\n+\n+ **Prevent the apt_package resource from installing packages with pattern matching names**:\n+\n+ By default, the apt_package resource will install the named package.\n+ If it can't find a package with the exact same name, it will treat the package name as regex string and match with any package that matches that regex.", + "comment_created_at": "2023-08-31T18:49:22+00:00", + "comment_author": "IanMadd", + "comment_body": "We can't use the term `regex` in docs.\r\n\r\n```suggestion\r\n If it can't find a package with the exact same name, it will treat the package name as regular expression string and match with any package that matches that regular expression.\r\n```", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "766320006", + "pr_number": 12358, + "pr_file": "chef-utils/lib/chef-utils/dsl/virtualization.rb", + "created_at": "2021-12-10T03:50:06+00:00", + "commented_code": "node.dig(\"virtualization\", \"system\") == \"vmware\" && node.dig(\"virtualization\", \"role\") == \"host\"\n end\n\n # Determine if the current node is virtualized on VMware Desktop (Fusion/Player/Workstation).\n #\n # @param [Chef::Node] node\n #", + "repo_full_name": "chef/chef", + "discussion_comments": [ + { + "comment_id": "766320006", + "repo_full_name": "chef/chef", + "pr_number": 12358, + "pr_file": "chef-utils/lib/chef-utils/dsl/virtualization.rb", + "discussion_id": "766320006", + "commented_code": "@@ -140,6 +140,26 @@ def vmware_host?(node = __getnode)\n node.dig(\"virtualization\", \"system\") == \"vmware\" && node.dig(\"virtualization\", \"role\") == \"host\"\n end\n \n+ # Determine if the current node is virtualized on VMware Desktop (Fusion/Player/Workstation).\n+ #\n+ # @param [Chef::Node] node\n+ #", + "comment_created_at": "2021-12-10T03:50:06+00:00", + "comment_author": "tas50", + "comment_body": "Can you add the yard `@since` to these for 17.9. We generate the vscode plugin off this file and use those to document when a helper became available.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "478450911", + "pr_number": 10171, + "pr_file": "lib/chef/provider/mount/linux.rb", + "created_at": "2020-08-27T14:13:14+00:00", + "commented_code": "#\n# Author:: Antima Gupta ()\n# Copyright:: Copyright (c) Chef Software Inc.\n# License:: Apache License, Version 2.0\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n# http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n#\n\nrequire_relative \"../mount\"\n\nclass Chef\n class Provider\n class Mount\n class Linux < Chef::Provider::Mount::Mount\n\n provides :mount, os: \"linux\"\n\n # Check to see if the volume is mounted.\n # \"findmnt\" outputs the mount points with volume.\n # Convert the mount_point of the resource to a real path in case it\n # contains symlinks in its parents dirs.\n\n def mounted?\n mounted = false\n\n real_mount_point = if ::File.exists? @new_resource.mount_point\n ::File.realpath(@new_resource.mount_point)\n else\n @new_resource.mount_point\n end\n\n shell_out!(\"findmnt -rn\").stdout.each_line do |line|\n case line\n when /\\A#{Regexp.escape(real_mount_point)}\\s+#{device_mount_regex}\\s/", + "repo_full_name": "chef/chef", + "discussion_comments": [ + { + "comment_id": "478450911", + "repo_full_name": "chef/chef", + "pr_number": 10171, + "pr_file": "lib/chef/provider/mount/linux.rb", + "discussion_id": "478450911", + "commented_code": "@@ -0,0 +1,60 @@\n+#\n+# Author:: Antima Gupta ()\n+# Copyright:: Copyright (c) Chef Software Inc.\n+# License:: Apache License, Version 2.0\n+#\n+# Licensed under the Apache License, Version 2.0 (the \"License\");\n+# you may not use this file except in compliance with the License.\n+# You may obtain a copy of the License at\n+#\n+# http://www.apache.org/licenses/LICENSE-2.0\n+#\n+# Unless required by applicable law or agreed to in writing, software\n+# distributed under the License is distributed on an \"AS IS\" BASIS,\n+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n+# See the License for the specific language governing permissions and\n+# limitations under the License.\n+#\n+\n+require_relative \"../mount\"\n+\n+class Chef\n+ class Provider\n+ class Mount\n+ class Linux < Chef::Provider::Mount::Mount\n+\n+ provides :mount, os: \"linux\"\n+\n+ # Check to see if the volume is mounted.\n+ # \"findmnt\" outputs the mount points with volume.\n+ # Convert the mount_point of the resource to a real path in case it\n+ # contains symlinks in its parents dirs.\n+\n+ def mounted?\n+ mounted = false\n+\n+ real_mount_point = if ::File.exists? @new_resource.mount_point\n+ ::File.realpath(@new_resource.mount_point)\n+ else\n+ @new_resource.mount_point\n+ end\n+\n+ shell_out!(\"findmnt -rn\").stdout.each_line do |line|\n+ case line\n+ when /\\A#{Regexp.escape(real_mount_point)}\\s+#{device_mount_regex}\\s/", + "comment_created_at": "2020-08-27T14:13:14+00:00", + "comment_author": "jaymzh", + "comment_body": "This looks really nice. One thing that would be useful though would be to add a comment for each `when` that has an example line you're matching.", + "pr_file_module": null + }, + { + "comment_id": "478613103", + "repo_full_name": "chef/chef", + "pr_number": 10171, + "pr_file": "lib/chef/provider/mount/linux.rb", + "discussion_id": "478450911", + "commented_code": "@@ -0,0 +1,60 @@\n+#\n+# Author:: Antima Gupta ()\n+# Copyright:: Copyright (c) Chef Software Inc.\n+# License:: Apache License, Version 2.0\n+#\n+# Licensed under the Apache License, Version 2.0 (the \"License\");\n+# you may not use this file except in compliance with the License.\n+# You may obtain a copy of the License at\n+#\n+# http://www.apache.org/licenses/LICENSE-2.0\n+#\n+# Unless required by applicable law or agreed to in writing, software\n+# distributed under the License is distributed on an \"AS IS\" BASIS,\n+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n+# See the License for the specific language governing permissions and\n+# limitations under the License.\n+#\n+\n+require_relative \"../mount\"\n+\n+class Chef\n+ class Provider\n+ class Mount\n+ class Linux < Chef::Provider::Mount::Mount\n+\n+ provides :mount, os: \"linux\"\n+\n+ # Check to see if the volume is mounted.\n+ # \"findmnt\" outputs the mount points with volume.\n+ # Convert the mount_point of the resource to a real path in case it\n+ # contains symlinks in its parents dirs.\n+\n+ def mounted?\n+ mounted = false\n+\n+ real_mount_point = if ::File.exists? @new_resource.mount_point\n+ ::File.realpath(@new_resource.mount_point)\n+ else\n+ @new_resource.mount_point\n+ end\n+\n+ shell_out!(\"findmnt -rn\").stdout.each_line do |line|\n+ case line\n+ when /\\A#{Regexp.escape(real_mount_point)}\\s+#{device_mount_regex}\\s/", + "comment_created_at": "2020-08-27T18:25:47+00:00", + "comment_author": "tas50", + "comment_body": "Permalinks to rubular are super handy in this sort of case https://rubular.com/", + "pr_file_module": null + }, + { + "comment_id": "479244129", + "repo_full_name": "chef/chef", + "pr_number": 10171, + "pr_file": "lib/chef/provider/mount/linux.rb", + "discussion_id": "478450911", + "commented_code": "@@ -0,0 +1,60 @@\n+#\n+# Author:: Antima Gupta ()\n+# Copyright:: Copyright (c) Chef Software Inc.\n+# License:: Apache License, Version 2.0\n+#\n+# Licensed under the Apache License, Version 2.0 (the \"License\");\n+# you may not use this file except in compliance with the License.\n+# You may obtain a copy of the License at\n+#\n+# http://www.apache.org/licenses/LICENSE-2.0\n+#\n+# Unless required by applicable law or agreed to in writing, software\n+# distributed under the License is distributed on an \"AS IS\" BASIS,\n+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n+# See the License for the specific language governing permissions and\n+# limitations under the License.\n+#\n+\n+require_relative \"../mount\"\n+\n+class Chef\n+ class Provider\n+ class Mount\n+ class Linux < Chef::Provider::Mount::Mount\n+\n+ provides :mount, os: \"linux\"\n+\n+ # Check to see if the volume is mounted.\n+ # \"findmnt\" outputs the mount points with volume.\n+ # Convert the mount_point of the resource to a real path in case it\n+ # contains symlinks in its parents dirs.\n+\n+ def mounted?\n+ mounted = false\n+\n+ real_mount_point = if ::File.exists? @new_resource.mount_point\n+ ::File.realpath(@new_resource.mount_point)\n+ else\n+ @new_resource.mount_point\n+ end\n+\n+ shell_out!(\"findmnt -rn\").stdout.each_line do |line|\n+ case line\n+ when /\\A#{Regexp.escape(real_mount_point)}\\s+#{device_mount_regex}\\s/", + "comment_created_at": "2020-08-28T12:38:43+00:00", + "comment_author": "antima-gupta", + "comment_body": "Agreed, I have added Permalink.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "719689343", + "pr_number": 12119, + "pr_file": "lib/chef/resource/powershell_package_source.rb", + "created_at": "2021-09-30T19:14:51+00:00", + "commented_code": "description: \"A label that names your package source.\",\n name_property: true\n\n property :new_name, String,\n description: \"Used when updating the name of a NON-PSRepository\"\n property :new_name, introduced: \"17.5.23\", String,\n description: \"Used to change the name of a standard PackageSource.\"\n\n property :source_location, String,\n property :source_location, introduced: \"17.5.23\", String,", + "repo_full_name": "chef/chef", + "discussion_comments": [ + { + "comment_id": "719689343", + "repo_full_name": "chef/chef", + "pr_number": 12119, + "pr_file": "lib/chef/resource/powershell_package_source.rb", + "discussion_id": "719689343", + "commented_code": "@@ -117,10 +117,10 @@ class PowershellPackageSource < Chef::Resource\n description: \"A label that names your package source.\",\n name_property: true\n \n- property :new_name, String,\n- description: \"Used when updating the name of a NON-PSRepository\"\n+ property :new_name, introduced: \"17.5.23\", String,\n+ description: \"Used to change the name of a standard PackageSource.\"\n \n- property :source_location, String,\n+ property :source_location, introduced: \"17.5.23\", String,", + "comment_created_at": "2021-09-30T19:14:51+00:00", + "comment_author": "tas50", + "comment_body": "```suggestion\r\n property :source_location, introduced: \"17.6\", String,\r\n```", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/chef-document-with-examples.md b/_reviewers/chef-document-with-examples.md index 4a5c2be..910004b 100644 --- a/_reviewers/chef-document-with-examples.md +++ b/_reviewers/chef-document-with-examples.md @@ -46,269 +46,3 @@ Always include comprehensive documentation with clear examples for properties, m 5. **Accessible language**: Use plain language descriptions over technical jargon when possible, and spell out "regular expression" instead of "regex" in user-facing documentation Comprehensive documentation reduces the learning curve for new developers and makes the codebase more maintainable. - - -[ - { - "discussion_id": "1092458036", - "pr_number": 13534, - "pr_file": "knife/lib/chef/knife/bootstrap.rb", - "created_at": "2023-01-31T20:40:06+00:00", - "commented_code": "option :connection_protocol,\n short: \"-o PROTOCOL\",\n long: \"--connection-protocol PROTOCOL\",\n description: \"The protocol to use to connect to the target node.\",\n in: SUPPORTED_CONNECTION_PROTOCOLS\n description: \"The protocol to use to connect to the target node.\"", - "repo_full_name": "chef/chef", - "discussion_comments": [ - { - "comment_id": "1092458036", - "repo_full_name": "chef/chef", - "pr_number": 13534, - "pr_file": "knife/lib/chef/knife/bootstrap.rb", - "discussion_id": "1092458036", - "commented_code": "@@ -54,8 +54,7 @@ class Bootstrap < Knife\n option :connection_protocol,\n short: \"-o PROTOCOL\",\n long: \"--connection-protocol PROTOCOL\",\n- description: \"The protocol to use to connect to the target node.\",\n- in: SUPPORTED_CONNECTION_PROTOCOLS\n+ description: \"The protocol to use to connect to the target node.\"", - "comment_created_at": "2023-01-31T20:40:06+00:00", - "comment_author": "jaymzh", - "comment_body": "maybe add some examples, `... (such as 'ssh' or 'winrm')`", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1949480111", - "pr_number": 14767, - "pr_file": "lib/chef/resource/registry_key.rb", - "created_at": "2025-02-10T16:34:00+00:00", - "commented_code": "}\n property :recursive, [TrueClass, FalseClass], default: false\n property :architecture, Symbol, default: :machine, equal_to: %i{machine x86_64 i386}\n property :only_record_changes, [TrueClass, FalseClass], default: false", - "repo_full_name": "chef/chef", - "discussion_comments": [ - { - "comment_id": "1949480111", - "repo_full_name": "chef/chef", - "pr_number": 14767, - "pr_file": "lib/chef/resource/registry_key.rb", - "discussion_id": "1949480111", - "commented_code": "@@ -150,6 +151,7 @@ class RegistryKey < Chef::Resource\n }\n property :recursive, [TrueClass, FalseClass], default: false\n property :architecture, Symbol, default: :machine, equal_to: %i{machine x86_64 i386}\n+ property :only_record_changes, [TrueClass, FalseClass], default: false", - "comment_created_at": "2025-02-10T16:34:00+00:00", - "comment_author": "tpowell-progress", - "comment_body": "This feels like it needs more documentation.", - "pr_file_module": null - }, - { - "comment_id": "1951393657", - "repo_full_name": "chef/chef", - "pr_number": 14767, - "pr_file": "lib/chef/resource/registry_key.rb", - "discussion_id": "1949480111", - "commented_code": "@@ -150,6 +151,7 @@ class RegistryKey < Chef::Resource\n }\n property :recursive, [TrueClass, FalseClass], default: false\n property :architecture, Symbol, default: :machine, equal_to: %i{machine x86_64 i386}\n+ property :only_record_changes, [TrueClass, FalseClass], default: false", - "comment_created_at": "2025-02-11T18:46:56+00:00", - "comment_author": "jaymzh", - "comment_body": "yes please. I thought there was a `:help` option to properties that was used for docs...", - "pr_file_module": null - }, - { - "comment_id": "1951704950", - "repo_full_name": "chef/chef", - "pr_number": 14767, - "pr_file": "lib/chef/resource/registry_key.rb", - "discussion_id": "1949480111", - "commented_code": "@@ -150,6 +151,7 @@ class RegistryKey < Chef::Resource\n }\n property :recursive, [TrueClass, FalseClass], default: false\n property :architecture, Symbol, default: :machine, equal_to: %i{machine x86_64 i386}\n+ property :only_record_changes, [TrueClass, FalseClass], default: false", - "comment_created_at": "2025-02-11T22:38:09+00:00", - "comment_author": "tpowell-progress", - "comment_body": "`, description: \"describe the property here\"` (see `lib/chef/resource.rb` for this value and also `introduced: \"...\"`)\r\n\r\nAlso add an example above using it.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1309270055", - "pr_number": 13873, - "pr_file": "lib/chef/resource/apt_package.rb", - "created_at": "2023-08-29T19:30:09+00:00", - "commented_code": "options '--no-install-recommends'\n end\n ```\n\n **Prevent the apt_package resource from installing packages with pattern matching names**:\n\n By default, the apt_package resource will install the named package.\n If it can't find a package with the exact same name, it will treat the package name as regex string and match with any package that matches that regex.\n This may lead Chef Infra Client to install one or more packages with names that match that regex.\n\n In this example, `anchor_package_regex true` prevents the apt_package resource from installing matching packages if it can't find the `lua5.3` package.", - "repo_full_name": "chef/chef", - "discussion_comments": [ - { - "comment_id": "1309270055", - "repo_full_name": "chef/chef", - "pr_number": 13873, - "pr_file": "lib/chef/resource/apt_package.rb", - "discussion_id": "1309270055", - "commented_code": "@@ -52,6 +52,21 @@ class AptPackage < Chef::Resource::Package\n options '--no-install-recommends'\n end\n ```\n+\n+ **Prevent the apt_package resource from installing packages with pattern matching names**:\n+\n+ By default, the apt_package resource will install the named package.\n+ If it can't find a package with the exact same name, it will treat the package name as regex string and match with any package that matches that regex.\n+ This may lead Chef Infra Client to install one or more packages with names that match that regex.\n+\n+ In this example, `anchor_package_regex true` prevents the apt_package resource from installing matching packages if it can't find the `lua5.3` package.", - "comment_created_at": "2023-08-29T19:30:09+00:00", - "comment_author": "jaymzh", - "comment_body": "```suggestion\r\n In this example, `anchor_package_regex true` helps prevent that by adding `^` and `$` anchors around the package name. Example: `lua5.3` would be `^lua5.3$`.\r\n```", - "pr_file_module": null - }, - { - "comment_id": "1312064464", - "repo_full_name": "chef/chef", - "pr_number": 13873, - "pr_file": "lib/chef/resource/apt_package.rb", - "discussion_id": "1309270055", - "commented_code": "@@ -52,6 +52,21 @@ class AptPackage < Chef::Resource::Package\n options '--no-install-recommends'\n end\n ```\n+\n+ **Prevent the apt_package resource from installing packages with pattern matching names**:\n+\n+ By default, the apt_package resource will install the named package.\n+ If it can't find a package with the exact same name, it will treat the package name as regex string and match with any package that matches that regex.\n+ This may lead Chef Infra Client to install one or more packages with names that match that regex.\n+\n+ In this example, `anchor_package_regex true` prevents the apt_package resource from installing matching packages if it can't find the `lua5.3` package.", - "comment_created_at": "2023-08-31T18:44:28+00:00", - "comment_author": "IanMadd", - "comment_body": "We can't use regex anchors in docs. So can you rewrite this without those.", - "pr_file_module": null - }, - { - "comment_id": "1316298270", - "repo_full_name": "chef/chef", - "pr_number": 13873, - "pr_file": "lib/chef/resource/apt_package.rb", - "discussion_id": "1309270055", - "commented_code": "@@ -52,6 +52,21 @@ class AptPackage < Chef::Resource::Package\n options '--no-install-recommends'\n end\n ```\n+\n+ **Prevent the apt_package resource from installing packages with pattern matching names**:\n+\n+ By default, the apt_package resource will install the named package.\n+ If it can't find a package with the exact same name, it will treat the package name as regex string and match with any package that matches that regex.\n+ This may lead Chef Infra Client to install one or more packages with names that match that regex.\n+\n+ In this example, `anchor_package_regex true` prevents the apt_package resource from installing matching packages if it can't find the `lua5.3` package.", - "comment_created_at": "2023-09-05T19:13:23+00:00", - "comment_author": "tpowell-progress", - "comment_body": "@IanMadd the apt package manager treats any specified package name as a regex, so we definitely need to be able to describe a regular expression in the documentation, and without the visual of the anchors, I don't think it's going to be as obvious what this setting accomplishes.", - "pr_file_module": null - }, - { - "comment_id": "1316300705", - "repo_full_name": "chef/chef", - "pr_number": 13873, - "pr_file": "lib/chef/resource/apt_package.rb", - "discussion_id": "1309270055", - "commented_code": "@@ -52,6 +52,21 @@ class AptPackage < Chef::Resource::Package\n options '--no-install-recommends'\n end\n ```\n+\n+ **Prevent the apt_package resource from installing packages with pattern matching names**:\n+\n+ By default, the apt_package resource will install the named package.\n+ If it can't find a package with the exact same name, it will treat the package name as regex string and match with any package that matches that regex.\n+ This may lead Chef Infra Client to install one or more packages with names that match that regex.\n+\n+ In this example, `anchor_package_regex true` prevents the apt_package resource from installing matching packages if it can't find the `lua5.3` package.", - "comment_created_at": "2023-09-05T19:15:44+00:00", - "comment_author": "jaymzh", - "comment_body": "That's insane. Things take regular expressions in Chef, how do you document that? Can we escape it? Regex.escape? There has to be some way to do that. We'll file a docs Issue here for that, lets get this solved so we can document our software.", - "pr_file_module": null - }, - { - "comment_id": "1316302624", - "repo_full_name": "chef/chef", - "pr_number": 13873, - "pr_file": "lib/chef/resource/apt_package.rb", - "discussion_id": "1309270055", - "commented_code": "@@ -52,6 +52,21 @@ class AptPackage < Chef::Resource::Package\n options '--no-install-recommends'\n end\n ```\n+\n+ **Prevent the apt_package resource from installing packages with pattern matching names**:\n+\n+ By default, the apt_package resource will install the named package.\n+ If it can't find a package with the exact same name, it will treat the package name as regex string and match with any package that matches that regex.\n+ This may lead Chef Infra Client to install one or more packages with names that match that regex.\n+\n+ In this example, `anchor_package_regex true` prevents the apt_package resource from installing matching packages if it can't find the `lua5.3` package.", - "comment_created_at": "2023-09-05T19:17:54+00:00", - "comment_author": "jaymzh", - "comment_body": "https://github.com/chef/chef/issues/13908", - "pr_file_module": null - }, - { - "comment_id": "1318696748", - "repo_full_name": "chef/chef", - "pr_number": 13873, - "pr_file": "lib/chef/resource/apt_package.rb", - "discussion_id": "1309270055", - "commented_code": "@@ -52,6 +52,21 @@ class AptPackage < Chef::Resource::Package\n options '--no-install-recommends'\n end\n ```\n+\n+ **Prevent the apt_package resource from installing packages with pattern matching names**:\n+\n+ By default, the apt_package resource will install the named package.\n+ If it can't find a package with the exact same name, it will treat the package name as regex string and match with any package that matches that regex.\n+ This may lead Chef Infra Client to install one or more packages with names that match that regex.\n+\n+ In this example, `anchor_package_regex true` prevents the apt_package resource from installing matching packages if it can't find the `lua5.3` package.", - "comment_created_at": "2023-09-07T14:24:15+00:00", - "comment_author": "IanMadd", - "comment_body": "@jaymzh @tpowell-progress \r\n\r\nThe point isn't to remove regex from docs, it's to use plain text descriptions as much as possible. If we were documenting a tool that accepted a regular expression string, I'd be all for it. But in this case the user is entering `true` and `false`, so if we can we should avoid the regex anchors and expressions and explain this in plain text.\r\n\r\n\r\nSo what about something like this:\r\n\r\n> In this example, `anchor_package_regex true` prevents the apt_package resource from treating `lua5.3` as a regular expression string if it can't find a package with the exact name `lua5.3`. This prevents apt_package from installing similarly named packages like `lua5.3-rrd` or `liblua5.3-dev`.\r\n\r\n", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1312069987", - "pr_number": 13873, - "pr_file": "lib/chef/resource/apt_package.rb", - "created_at": "2023-08-31T18:49:22+00:00", - "commented_code": "options '--no-install-recommends'\n end\n ```\n\n **Prevent the apt_package resource from installing packages with pattern matching names**:\n\n By default, the apt_package resource will install the named package.\n If it can't find a package with the exact same name, it will treat the package name as regex string and match with any package that matches that regex.", - "repo_full_name": "chef/chef", - "discussion_comments": [ - { - "comment_id": "1312069987", - "repo_full_name": "chef/chef", - "pr_number": 13873, - "pr_file": "lib/chef/resource/apt_package.rb", - "discussion_id": "1312069987", - "commented_code": "@@ -52,6 +52,21 @@ class AptPackage < Chef::Resource::Package\n options '--no-install-recommends'\n end\n ```\n+\n+ **Prevent the apt_package resource from installing packages with pattern matching names**:\n+\n+ By default, the apt_package resource will install the named package.\n+ If it can't find a package with the exact same name, it will treat the package name as regex string and match with any package that matches that regex.", - "comment_created_at": "2023-08-31T18:49:22+00:00", - "comment_author": "IanMadd", - "comment_body": "We can't use the term `regex` in docs.\r\n\r\n```suggestion\r\n If it can't find a package with the exact same name, it will treat the package name as regular expression string and match with any package that matches that regular expression.\r\n```", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "766320006", - "pr_number": 12358, - "pr_file": "chef-utils/lib/chef-utils/dsl/virtualization.rb", - "created_at": "2021-12-10T03:50:06+00:00", - "commented_code": "node.dig(\"virtualization\", \"system\") == \"vmware\" && node.dig(\"virtualization\", \"role\") == \"host\"\n end\n\n # Determine if the current node is virtualized on VMware Desktop (Fusion/Player/Workstation).\n #\n # @param [Chef::Node] node\n #", - "repo_full_name": "chef/chef", - "discussion_comments": [ - { - "comment_id": "766320006", - "repo_full_name": "chef/chef", - "pr_number": 12358, - "pr_file": "chef-utils/lib/chef-utils/dsl/virtualization.rb", - "discussion_id": "766320006", - "commented_code": "@@ -140,6 +140,26 @@ def vmware_host?(node = __getnode)\n node.dig(\"virtualization\", \"system\") == \"vmware\" && node.dig(\"virtualization\", \"role\") == \"host\"\n end\n \n+ # Determine if the current node is virtualized on VMware Desktop (Fusion/Player/Workstation).\n+ #\n+ # @param [Chef::Node] node\n+ #", - "comment_created_at": "2021-12-10T03:50:06+00:00", - "comment_author": "tas50", - "comment_body": "Can you add the yard `@since` to these for 17.9. We generate the vscode plugin off this file and use those to document when a helper became available.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "478450911", - "pr_number": 10171, - "pr_file": "lib/chef/provider/mount/linux.rb", - "created_at": "2020-08-27T14:13:14+00:00", - "commented_code": "#\n# Author:: Antima Gupta ()\n# Copyright:: Copyright (c) Chef Software Inc.\n# License:: Apache License, Version 2.0\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n# http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n#\n\nrequire_relative \"../mount\"\n\nclass Chef\n class Provider\n class Mount\n class Linux < Chef::Provider::Mount::Mount\n\n provides :mount, os: \"linux\"\n\n # Check to see if the volume is mounted.\n # \"findmnt\" outputs the mount points with volume.\n # Convert the mount_point of the resource to a real path in case it\n # contains symlinks in its parents dirs.\n\n def mounted?\n mounted = false\n\n real_mount_point = if ::File.exists? @new_resource.mount_point\n ::File.realpath(@new_resource.mount_point)\n else\n @new_resource.mount_point\n end\n\n shell_out!(\"findmnt -rn\").stdout.each_line do |line|\n case line\n when /\\A#{Regexp.escape(real_mount_point)}\\s+#{device_mount_regex}\\s/", - "repo_full_name": "chef/chef", - "discussion_comments": [ - { - "comment_id": "478450911", - "repo_full_name": "chef/chef", - "pr_number": 10171, - "pr_file": "lib/chef/provider/mount/linux.rb", - "discussion_id": "478450911", - "commented_code": "@@ -0,0 +1,60 @@\n+#\n+# Author:: Antima Gupta ()\n+# Copyright:: Copyright (c) Chef Software Inc.\n+# License:: Apache License, Version 2.0\n+#\n+# Licensed under the Apache License, Version 2.0 (the \"License\");\n+# you may not use this file except in compliance with the License.\n+# You may obtain a copy of the License at\n+#\n+# http://www.apache.org/licenses/LICENSE-2.0\n+#\n+# Unless required by applicable law or agreed to in writing, software\n+# distributed under the License is distributed on an \"AS IS\" BASIS,\n+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n+# See the License for the specific language governing permissions and\n+# limitations under the License.\n+#\n+\n+require_relative \"../mount\"\n+\n+class Chef\n+ class Provider\n+ class Mount\n+ class Linux < Chef::Provider::Mount::Mount\n+\n+ provides :mount, os: \"linux\"\n+\n+ # Check to see if the volume is mounted.\n+ # \"findmnt\" outputs the mount points with volume.\n+ # Convert the mount_point of the resource to a real path in case it\n+ # contains symlinks in its parents dirs.\n+\n+ def mounted?\n+ mounted = false\n+\n+ real_mount_point = if ::File.exists? @new_resource.mount_point\n+ ::File.realpath(@new_resource.mount_point)\n+ else\n+ @new_resource.mount_point\n+ end\n+\n+ shell_out!(\"findmnt -rn\").stdout.each_line do |line|\n+ case line\n+ when /\\A#{Regexp.escape(real_mount_point)}\\s+#{device_mount_regex}\\s/", - "comment_created_at": "2020-08-27T14:13:14+00:00", - "comment_author": "jaymzh", - "comment_body": "This looks really nice. One thing that would be useful though would be to add a comment for each `when` that has an example line you're matching.", - "pr_file_module": null - }, - { - "comment_id": "478613103", - "repo_full_name": "chef/chef", - "pr_number": 10171, - "pr_file": "lib/chef/provider/mount/linux.rb", - "discussion_id": "478450911", - "commented_code": "@@ -0,0 +1,60 @@\n+#\n+# Author:: Antima Gupta ()\n+# Copyright:: Copyright (c) Chef Software Inc.\n+# License:: Apache License, Version 2.0\n+#\n+# Licensed under the Apache License, Version 2.0 (the \"License\");\n+# you may not use this file except in compliance with the License.\n+# You may obtain a copy of the License at\n+#\n+# http://www.apache.org/licenses/LICENSE-2.0\n+#\n+# Unless required by applicable law or agreed to in writing, software\n+# distributed under the License is distributed on an \"AS IS\" BASIS,\n+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n+# See the License for the specific language governing permissions and\n+# limitations under the License.\n+#\n+\n+require_relative \"../mount\"\n+\n+class Chef\n+ class Provider\n+ class Mount\n+ class Linux < Chef::Provider::Mount::Mount\n+\n+ provides :mount, os: \"linux\"\n+\n+ # Check to see if the volume is mounted.\n+ # \"findmnt\" outputs the mount points with volume.\n+ # Convert the mount_point of the resource to a real path in case it\n+ # contains symlinks in its parents dirs.\n+\n+ def mounted?\n+ mounted = false\n+\n+ real_mount_point = if ::File.exists? @new_resource.mount_point\n+ ::File.realpath(@new_resource.mount_point)\n+ else\n+ @new_resource.mount_point\n+ end\n+\n+ shell_out!(\"findmnt -rn\").stdout.each_line do |line|\n+ case line\n+ when /\\A#{Regexp.escape(real_mount_point)}\\s+#{device_mount_regex}\\s/", - "comment_created_at": "2020-08-27T18:25:47+00:00", - "comment_author": "tas50", - "comment_body": "Permalinks to rubular are super handy in this sort of case https://rubular.com/", - "pr_file_module": null - }, - { - "comment_id": "479244129", - "repo_full_name": "chef/chef", - "pr_number": 10171, - "pr_file": "lib/chef/provider/mount/linux.rb", - "discussion_id": "478450911", - "commented_code": "@@ -0,0 +1,60 @@\n+#\n+# Author:: Antima Gupta ()\n+# Copyright:: Copyright (c) Chef Software Inc.\n+# License:: Apache License, Version 2.0\n+#\n+# Licensed under the Apache License, Version 2.0 (the \"License\");\n+# you may not use this file except in compliance with the License.\n+# You may obtain a copy of the License at\n+#\n+# http://www.apache.org/licenses/LICENSE-2.0\n+#\n+# Unless required by applicable law or agreed to in writing, software\n+# distributed under the License is distributed on an \"AS IS\" BASIS,\n+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n+# See the License for the specific language governing permissions and\n+# limitations under the License.\n+#\n+\n+require_relative \"../mount\"\n+\n+class Chef\n+ class Provider\n+ class Mount\n+ class Linux < Chef::Provider::Mount::Mount\n+\n+ provides :mount, os: \"linux\"\n+\n+ # Check to see if the volume is mounted.\n+ # \"findmnt\" outputs the mount points with volume.\n+ # Convert the mount_point of the resource to a real path in case it\n+ # contains symlinks in its parents dirs.\n+\n+ def mounted?\n+ mounted = false\n+\n+ real_mount_point = if ::File.exists? @new_resource.mount_point\n+ ::File.realpath(@new_resource.mount_point)\n+ else\n+ @new_resource.mount_point\n+ end\n+\n+ shell_out!(\"findmnt -rn\").stdout.each_line do |line|\n+ case line\n+ when /\\A#{Regexp.escape(real_mount_point)}\\s+#{device_mount_regex}\\s/", - "comment_created_at": "2020-08-28T12:38:43+00:00", - "comment_author": "antima-gupta", - "comment_body": "Agreed, I have added Permalink.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "719689343", - "pr_number": 12119, - "pr_file": "lib/chef/resource/powershell_package_source.rb", - "created_at": "2021-09-30T19:14:51+00:00", - "commented_code": "description: \"A label that names your package source.\",\n name_property: true\n\n property :new_name, String,\n description: \"Used when updating the name of a NON-PSRepository\"\n property :new_name, introduced: \"17.5.23\", String,\n description: \"Used to change the name of a standard PackageSource.\"\n\n property :source_location, String,\n property :source_location, introduced: \"17.5.23\", String,", - "repo_full_name": "chef/chef", - "discussion_comments": [ - { - "comment_id": "719689343", - "repo_full_name": "chef/chef", - "pr_number": 12119, - "pr_file": "lib/chef/resource/powershell_package_source.rb", - "discussion_id": "719689343", - "commented_code": "@@ -117,10 +117,10 @@ class PowershellPackageSource < Chef::Resource\n description: \"A label that names your package source.\",\n name_property: true\n \n- property :new_name, String,\n- description: \"Used when updating the name of a NON-PSRepository\"\n+ property :new_name, introduced: \"17.5.23\", String,\n+ description: \"Used to change the name of a standard PackageSource.\"\n \n- property :source_location, String,\n+ property :source_location, introduced: \"17.5.23\", String,", - "comment_created_at": "2021-09-30T19:14:51+00:00", - "comment_author": "tas50", - "comment_body": "```suggestion\r\n property :source_location, introduced: \"17.6\", String,\r\n```", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/chef-explicit-configuration-over-implicit.json b/_reviewers/chef-explicit-configuration-over-implicit.json new file mode 100644 index 0000000..d0b0468 --- /dev/null +++ b/_reviewers/chef-explicit-configuration-over-implicit.json @@ -0,0 +1,244 @@ +[ + { + "discussion_id": "392394938", + "pr_number": 9480, + "pr_file": "chef-config/lib/chef-config/config.rb", + "created_at": "2020-03-13T18:17:13+00:00", + "commented_code": "# can be set to a string or array of strings for URIs to set as rubygems sources\n default :rubygems_url, \"https://www.rubygems.org\"\n\n # globally sets the default of the clear_sources property on the gem_package and chef_gem resources\n default :clear_gem_sources, false", + "repo_full_name": "chef/chef", + "discussion_comments": [ + { + "comment_id": "392394938", + "repo_full_name": "chef/chef", + "pr_number": 9480, + "pr_file": "chef-config/lib/chef-config/config.rb", + "discussion_id": "392394938", + "commented_code": "@@ -1221,9 +1221,6 @@ def self.guess_internal_locale\n # can be set to a string or array of strings for URIs to set as rubygems sources\n default :rubygems_url, \"https://www.rubygems.org\"\n \n- # globally sets the default of the clear_sources property on the gem_package and chef_gem resources\n- default :clear_gem_sources, false", + "comment_created_at": "2020-03-13T18:17:13+00:00", + "comment_author": "lamont-granquist", + "comment_body": "this should probably be changed to an explicit nil default rather than relying on chef-config autovivification (for documentation purposes if nothing else, but i think we actually consume chef-config and turn on strict checking for something like berkshelf as well -- we always intended to turn on strict checking in chef-client but never did)", + "pr_file_module": null + }, + { + "comment_id": "392397904", + "repo_full_name": "chef/chef", + "pr_number": 9480, + "pr_file": "chef-config/lib/chef-config/config.rb", + "discussion_id": "392394938", + "commented_code": "@@ -1221,9 +1221,6 @@ def self.guess_internal_locale\n # can be set to a string or array of strings for URIs to set as rubygems sources\n default :rubygems_url, \"https://www.rubygems.org\"\n \n- # globally sets the default of the clear_sources property on the gem_package and chef_gem resources\n- default :clear_gem_sources, false", + "comment_created_at": "2020-03-13T18:23:01+00:00", + "comment_author": "phiggins", + "comment_body": "I wondered about that, I agree it makes sense to be more explicit.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1369663852", + "pr_number": 14043, + "pr_file": "lib/chef/resource/chocolatey_installer.rb", + "created_at": "2023-10-24T06:04:17+00:00", + "commented_code": "class Chef\n class Resource\n class ChocolateyInstaller < Chef::Resource\n provides :chocolatey_installer\n\n description \"Use the Chocolatey Installer resource to ensure that Choco is installed to your specification. Use the Chocolatey Feature resource to customize your install\"\n introduced \"18.3\"\n examples <<~DOC\n **Install Chocolatey**\n\n ```ruby\n chocolatey_installer 'latest' do\n action :install\n end\n ```\n\n **Uninstall Chocolatey**\n\n ```ruby\n chocolatey_installer 'Some random verbiage' do\n action :uninstall\n end\n ```\n\n **Install Chocolatey with Parameters**\n\n ```ruby\n chocolatey_installer 'latest' do\n action :install\n download_url \"https://www.contoso.com/foo\"\n chocolatey_version '2.12.24'\n end\n ```\n\n **Upgrade Chocolatey with Parameters**\n\n ```ruby\n chocolatey_installer 'latest' do\n action :upgrade\n chocolatey_version '2.12.24'\n end\n ```\n DOC\n\n allowed_actions :install, :uninstall, :upgrade\n\n property :download_url, String,\n description: \"The URL to download Chocolatey from. This defaults to the value of $env:ChocolateyDownloadUrl, if it is set, and otherwise falls back to the official Chocolatey community repository to download the Chocolatey package. It can be used for offline installation by providing a path to a Chocolatey.nupkg.\"", + "repo_full_name": "chef/chef", + "discussion_comments": [ + { + "comment_id": "1369663852", + "repo_full_name": "chef/chef", + "pr_number": 14043, + "pr_file": "lib/chef/resource/chocolatey_installer.rb", + "discussion_id": "1369663852", + "commented_code": "@@ -0,0 +1,198 @@\n+class Chef\n+ class Resource\n+ class ChocolateyInstaller < Chef::Resource\n+ provides :chocolatey_installer\n+\n+ description \"Use the Chocolatey Installer resource to ensure that Choco is installed to your specification. Use the Chocolatey Feature resource to customize your install\"\n+ introduced \"18.3\"\n+ examples <<~DOC\n+ **Install Chocolatey**\n+\n+ ```ruby\n+ chocolatey_installer 'latest' do\n+ action :install\n+ end\n+ ```\n+\n+ **Uninstall Chocolatey**\n+\n+ ```ruby\n+ chocolatey_installer 'Some random verbiage' do\n+ action :uninstall\n+ end\n+ ```\n+\n+ **Install Chocolatey with Parameters**\n+\n+ ```ruby\n+ chocolatey_installer 'latest' do\n+ action :install\n+ download_url \"https://www.contoso.com/foo\"\n+ chocolatey_version '2.12.24'\n+ end\n+ ```\n+\n+ **Upgrade Chocolatey with Parameters**\n+\n+ ```ruby\n+ chocolatey_installer 'latest' do\n+ action :upgrade\n+ chocolatey_version '2.12.24'\n+ end\n+ ```\n+ DOC\n+\n+ allowed_actions :install, :uninstall, :upgrade\n+\n+ property :download_url, String,\n+ description: \"The URL to download Chocolatey from. This defaults to the value of $env:ChocolateyDownloadUrl, if it is set, and otherwise falls back to the official Chocolatey community repository to download the Chocolatey package. It can be used for offline installation by providing a path to a Chocolatey.nupkg.\"", + "comment_created_at": "2023-10-24T06:04:17+00:00", + "comment_author": "jaymzh", + "comment_body": "We don't usually consult env vars from cookbooks. It breaks idempotency... Two prior with different environments running this could move the install back and forth between different versions. I highly highly recommend against this.", + "pr_file_module": null + }, + { + "comment_id": "1370132968", + "repo_full_name": "chef/chef", + "pr_number": 14043, + "pr_file": "lib/chef/resource/chocolatey_installer.rb", + "discussion_id": "1369663852", + "commented_code": "@@ -0,0 +1,198 @@\n+class Chef\n+ class Resource\n+ class ChocolateyInstaller < Chef::Resource\n+ provides :chocolatey_installer\n+\n+ description \"Use the Chocolatey Installer resource to ensure that Choco is installed to your specification. Use the Chocolatey Feature resource to customize your install\"\n+ introduced \"18.3\"\n+ examples <<~DOC\n+ **Install Chocolatey**\n+\n+ ```ruby\n+ chocolatey_installer 'latest' do\n+ action :install\n+ end\n+ ```\n+\n+ **Uninstall Chocolatey**\n+\n+ ```ruby\n+ chocolatey_installer 'Some random verbiage' do\n+ action :uninstall\n+ end\n+ ```\n+\n+ **Install Chocolatey with Parameters**\n+\n+ ```ruby\n+ chocolatey_installer 'latest' do\n+ action :install\n+ download_url \"https://www.contoso.com/foo\"\n+ chocolatey_version '2.12.24'\n+ end\n+ ```\n+\n+ **Upgrade Chocolatey with Parameters**\n+\n+ ```ruby\n+ chocolatey_installer 'latest' do\n+ action :upgrade\n+ chocolatey_version '2.12.24'\n+ end\n+ ```\n+ DOC\n+\n+ allowed_actions :install, :uninstall, :upgrade\n+\n+ property :download_url, String,\n+ description: \"The URL to download Chocolatey from. This defaults to the value of $env:ChocolateyDownloadUrl, if it is set, and otherwise falls back to the official Chocolatey community repository to download the Chocolatey package. It can be used for offline installation by providing a path to a Chocolatey.nupkg.\"", + "comment_created_at": "2023-10-24T13:11:30+00:00", + "comment_author": "johnmccrae", + "comment_body": "I have a question - this defaults to a choco repo if you specify nothing. The point of this url is the use case where a random update needs to be downloaded from an alternate source. It does not need to be idempotent. ", + "pr_file_module": null + }, + { + "comment_id": "1370782643", + "repo_full_name": "chef/chef", + "pr_number": 14043, + "pr_file": "lib/chef/resource/chocolatey_installer.rb", + "discussion_id": "1369663852", + "commented_code": "@@ -0,0 +1,198 @@\n+class Chef\n+ class Resource\n+ class ChocolateyInstaller < Chef::Resource\n+ provides :chocolatey_installer\n+\n+ description \"Use the Chocolatey Installer resource to ensure that Choco is installed to your specification. Use the Chocolatey Feature resource to customize your install\"\n+ introduced \"18.3\"\n+ examples <<~DOC\n+ **Install Chocolatey**\n+\n+ ```ruby\n+ chocolatey_installer 'latest' do\n+ action :install\n+ end\n+ ```\n+\n+ **Uninstall Chocolatey**\n+\n+ ```ruby\n+ chocolatey_installer 'Some random verbiage' do\n+ action :uninstall\n+ end\n+ ```\n+\n+ **Install Chocolatey with Parameters**\n+\n+ ```ruby\n+ chocolatey_installer 'latest' do\n+ action :install\n+ download_url \"https://www.contoso.com/foo\"\n+ chocolatey_version '2.12.24'\n+ end\n+ ```\n+\n+ **Upgrade Chocolatey with Parameters**\n+\n+ ```ruby\n+ chocolatey_installer 'latest' do\n+ action :upgrade\n+ chocolatey_version '2.12.24'\n+ end\n+ ```\n+ DOC\n+\n+ allowed_actions :install, :uninstall, :upgrade\n+\n+ property :download_url, String,\n+ description: \"The URL to download Chocolatey from. This defaults to the value of $env:ChocolateyDownloadUrl, if it is set, and otherwise falls back to the official Chocolatey community repository to download the Chocolatey package. It can be used for offline installation by providing a path to a Chocolatey.nupkg.\"", + "comment_created_at": "2023-10-24T20:34:50+00:00", + "comment_author": "tpowell-progress", + "comment_body": "Per our discussion, this is actually being used directly by the Chocolatey `install.ps1` script and the environment variable can be used to redirect the standard installer to an alternate location.", + "pr_file_module": null + }, + { + "comment_id": "1373353325", + "repo_full_name": "chef/chef", + "pr_number": 14043, + "pr_file": "lib/chef/resource/chocolatey_installer.rb", + "discussion_id": "1369663852", + "commented_code": "@@ -0,0 +1,198 @@\n+class Chef\n+ class Resource\n+ class ChocolateyInstaller < Chef::Resource\n+ provides :chocolatey_installer\n+\n+ description \"Use the Chocolatey Installer resource to ensure that Choco is installed to your specification. Use the Chocolatey Feature resource to customize your install\"\n+ introduced \"18.3\"\n+ examples <<~DOC\n+ **Install Chocolatey**\n+\n+ ```ruby\n+ chocolatey_installer 'latest' do\n+ action :install\n+ end\n+ ```\n+\n+ **Uninstall Chocolatey**\n+\n+ ```ruby\n+ chocolatey_installer 'Some random verbiage' do\n+ action :uninstall\n+ end\n+ ```\n+\n+ **Install Chocolatey with Parameters**\n+\n+ ```ruby\n+ chocolatey_installer 'latest' do\n+ action :install\n+ download_url \"https://www.contoso.com/foo\"\n+ chocolatey_version '2.12.24'\n+ end\n+ ```\n+\n+ **Upgrade Chocolatey with Parameters**\n+\n+ ```ruby\n+ chocolatey_installer 'latest' do\n+ action :upgrade\n+ chocolatey_version '2.12.24'\n+ end\n+ ```\n+ DOC\n+\n+ allowed_actions :install, :uninstall, :upgrade\n+\n+ property :download_url, String,\n+ description: \"The URL to download Chocolatey from. This defaults to the value of $env:ChocolateyDownloadUrl, if it is set, and otherwise falls back to the official Chocolatey community repository to download the Chocolatey package. It can be used for offline installation by providing a path to a Chocolatey.nupkg.\"", + "comment_created_at": "2023-10-26T15:21:37+00:00", + "comment_author": "tpowell-progress", + "comment_body": "@johnmccrae can we made the description more clear that it's the choco installer itself looking at these variables?", + "pr_file_module": null + }, + { + "comment_id": "1376478178", + "repo_full_name": "chef/chef", + "pr_number": 14043, + "pr_file": "lib/chef/resource/chocolatey_installer.rb", + "discussion_id": "1369663852", + "commented_code": "@@ -0,0 +1,198 @@\n+class Chef\n+ class Resource\n+ class ChocolateyInstaller < Chef::Resource\n+ provides :chocolatey_installer\n+\n+ description \"Use the Chocolatey Installer resource to ensure that Choco is installed to your specification. Use the Chocolatey Feature resource to customize your install\"\n+ introduced \"18.3\"\n+ examples <<~DOC\n+ **Install Chocolatey**\n+\n+ ```ruby\n+ chocolatey_installer 'latest' do\n+ action :install\n+ end\n+ ```\n+\n+ **Uninstall Chocolatey**\n+\n+ ```ruby\n+ chocolatey_installer 'Some random verbiage' do\n+ action :uninstall\n+ end\n+ ```\n+\n+ **Install Chocolatey with Parameters**\n+\n+ ```ruby\n+ chocolatey_installer 'latest' do\n+ action :install\n+ download_url \"https://www.contoso.com/foo\"\n+ chocolatey_version '2.12.24'\n+ end\n+ ```\n+\n+ **Upgrade Chocolatey with Parameters**\n+\n+ ```ruby\n+ chocolatey_installer 'latest' do\n+ action :upgrade\n+ chocolatey_version '2.12.24'\n+ end\n+ ```\n+ DOC\n+\n+ allowed_actions :install, :uninstall, :upgrade\n+\n+ property :download_url, String,\n+ description: \"The URL to download Chocolatey from. This defaults to the value of $env:ChocolateyDownloadUrl, if it is set, and otherwise falls back to the official Chocolatey community repository to download the Chocolatey package. It can be used for offline installation by providing a path to a Chocolatey.nupkg.\"", + "comment_created_at": "2023-10-30T16:10:07+00:00", + "comment_author": "johnmccrae", + "comment_body": "done and spelling error corrected too", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "596231507", + "pr_number": 11189, + "pr_file": "lib/chef/knife/core/bootstrap_context.rb", + "created_at": "2021-03-17T17:21:24+00:00", + "commented_code": "end\n\n unless chef_config[:file_cache_path].nil?\n client_rb << \"file_cache_path \\\"#{chef_config[:file_cache_path]}\\\"\\n\"\n client_rb << \"file_cache_path \\\"#{ChefConfig::PathHelper.escapepath(ChefConfig::Config.var_chef_dir(windows: false))}/cache\\\"\\n\"", + "repo_full_name": "chef/chef", + "discussion_comments": [ + { + "comment_id": "596231507", + "repo_full_name": "chef/chef", + "pr_number": 11189, + "pr_file": "lib/chef/knife/core/bootstrap_context.rb", + "discussion_id": "596231507", + "commented_code": "@@ -172,11 +172,11 @@ def config_content\n end\n \n unless chef_config[:file_cache_path].nil?\n- client_rb << \"file_cache_path \\\"#{chef_config[:file_cache_path]}\\\"\\n\"\n+ client_rb << \"file_cache_path \\\"#{ChefConfig::PathHelper.escapepath(ChefConfig::Config.var_chef_dir(windows: false))}/cache\\\"\\n\"", + "comment_created_at": "2021-03-17T17:21:24+00:00", + "comment_author": "marcparadise", + "comment_body": "is it safe to use `windows: false` here? Shouldn't that depend on the remote OS? ", + "pr_file_module": null + }, + { + "comment_id": "596244553", + "repo_full_name": "chef/chef", + "pr_number": 11189, + "pr_file": "lib/chef/knife/core/bootstrap_context.rb", + "discussion_id": "596231507", + "commented_code": "@@ -172,11 +172,11 @@ def config_content\n end\n \n unless chef_config[:file_cache_path].nil?\n- client_rb << \"file_cache_path \\\"#{chef_config[:file_cache_path]}\\\"\\n\"\n+ client_rb << \"file_cache_path \\\"#{ChefConfig::PathHelper.escapepath(ChefConfig::Config.var_chef_dir(windows: false))}/cache\\\"\\n\"", + "comment_created_at": "2021-03-17T17:37:22+00:00", + "comment_author": "lamont-granquist", + "comment_body": "we've got the windows_bootstrap_context which handles bootstrapping a windows node. this should properly be called unix_bootstrap_context or something, but history.", + "pr_file_module": null + }, + { + "comment_id": "596245747", + "repo_full_name": "chef/chef", + "pr_number": 11189, + "pr_file": "lib/chef/knife/core/bootstrap_context.rb", + "discussion_id": "596231507", + "commented_code": "@@ -172,11 +172,11 @@ def config_content\n end\n \n unless chef_config[:file_cache_path].nil?\n- client_rb << \"file_cache_path \\\"#{chef_config[:file_cache_path]}\\\"\\n\"\n+ client_rb << \"file_cache_path \\\"#{ChefConfig::PathHelper.escapepath(ChefConfig::Config.var_chef_dir(windows: false))}/cache\\\"\\n\"", + "comment_created_at": "2021-03-17T17:38:44+00:00", + "comment_author": "lamont-granquist", + "comment_body": "whats a bit more concerning is this is now hardcoding the last bit of this path.", + "pr_file_module": null + }, + { + "comment_id": "596250612", + "repo_full_name": "chef/chef", + "pr_number": 11189, + "pr_file": "lib/chef/knife/core/bootstrap_context.rb", + "discussion_id": "596231507", + "commented_code": "@@ -172,11 +172,11 @@ def config_content\n end\n \n unless chef_config[:file_cache_path].nil?\n- client_rb << \"file_cache_path \\\"#{chef_config[:file_cache_path]}\\\"\\n\"\n+ client_rb << \"file_cache_path \\\"#{ChefConfig::PathHelper.escapepath(ChefConfig::Config.var_chef_dir(windows: false))}/cache\\\"\\n\"", + "comment_created_at": "2021-03-17T17:45:07+00:00", + "comment_author": "lamont-granquist", + "comment_body": "we probably need to introduce new config variables `unix_bootstrap_file_cache_path` and `unix_bootstrap_file_backup_path` which should default to these values and allow the user to override them. then should probably do the same for windows.", + "pr_file_module": null + }, + { + "comment_id": "600197351", + "repo_full_name": "chef/chef", + "pr_number": 11189, + "pr_file": "lib/chef/knife/core/bootstrap_context.rb", + "discussion_id": "596231507", + "commented_code": "@@ -172,11 +172,11 @@ def config_content\n end\n \n unless chef_config[:file_cache_path].nil?\n- client_rb << \"file_cache_path \\\"#{chef_config[:file_cache_path]}\\\"\\n\"\n+ client_rb << \"file_cache_path \\\"#{ChefConfig::PathHelper.escapepath(ChefConfig::Config.var_chef_dir(windows: false))}/cache\\\"\\n\"", + "comment_created_at": "2021-03-24T06:07:36+00:00", + "comment_author": "snehaldwivedi", + "comment_body": "@lamont-granquist I have tried adding the new variable and separate definitions for defining `chef_dir`, `root_dir` & `cache_path` for Linux in [config.rb](https://github.com/chef/chef/blob/master/chef-config/lib/chef-config/config.rb). But I got stuck [here](https://github.com/chef/chef/blob/master/chef-config/lib/chef-config/config.rb#L372), where we need to get a home path of the remote node. I think we need to update `ChefUtils` to get the object of the remote OS.\r\n\r\nPlease let me know your suggestion on this.", + "pr_file_module": null + }, + { + "comment_id": "626866599", + "repo_full_name": "chef/chef", + "pr_number": 11189, + "pr_file": "lib/chef/knife/core/bootstrap_context.rb", + "discussion_id": "596231507", + "commented_code": "@@ -172,11 +172,11 @@ def config_content\n end\n \n unless chef_config[:file_cache_path].nil?\n- client_rb << \"file_cache_path \\\"#{chef_config[:file_cache_path]}\\\"\\n\"\n+ client_rb << \"file_cache_path \\\"#{ChefConfig::PathHelper.escapepath(ChefConfig::Config.var_chef_dir(windows: false))}/cache\\\"\\n\"", + "comment_created_at": "2021-05-05T20:13:08+00:00", + "comment_author": "btm", + "comment_body": "@snehaldwivedi ChefUtils does have a little bit of train in it and can get the remote OS, but it is for target mode, knife bootstrap has its own use of train.\r\n\r\n`ChefConfig::Mixin::TrainTransport` is used for target mode, not bootstrap.\r\n`ChefUtils::DSL::TrainHelpers` ultimately uses the train connection off the run_context for target mode\r\n`Chef::Knife::Bootstrap::TrainConnector` is used for knife bootstrap.\r\n\r\nThe amount of complexity that ChefConfig has around switching for windows or linux, local mode or target mode, and changing path names for the official or community distributions is already a lot and easy to break one of the many scenarios. Trying to add knife bootstrap or not knife bootstrap to that list is over the top.\r\n\r\nFor just knife bootstrap use in chef-config we could add something like this:\r\n```\r\ndefault(:unix_bootstrap_file_cache_path) do\r\n PathHelper.join(\"/var\", ChefUtils::Dist::Infra::DIR_SUFFIX, \"cache\", windows: false)\r\nend\r\n\r\ndefault(:windows_bootstrap_file_cache_path) do\r\n PathHelper.join(\"C:\", ChefUtils::Dist::Infra::DIR_SUFFIX, \"cache\", windows: true)\r\nend\r\n```\r\n\r\nIf someone needed something special (like probably we leave C: hardcoded) then they could put a different `windows_bootstrap_file_cache_path` in their config.rb.\r\n\r\nThen we mostly need to go through `bootstrap_context.rb` and `windows_bootstrap_context.rb` and make them use the appropriate new variable instead of `file_cache_path`, and update the tests. And do a little manual testing.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1893160030", + "pr_number": 14131, + "pr_file": "lib/chef/resource/apt_repository.rb", + "created_at": "2024-12-19T21:18:33+00:00", + "commented_code": "property :key_proxy, [String, nil, FalseClass],\n description: \"If set, a specified proxy is passed to GPG via `http-proxy=`.\"\n\n property :signed_by, [String, true, false, nil],\n description: \"If a string, specify the file and/or fingerprint the repo is signed with. If true, set Signed-With to use the specified key\",\n default: nil", + "repo_full_name": "chef/chef", + "discussion_comments": [ + { + "comment_id": "1893160030", + "repo_full_name": "chef/chef", + "pr_number": 14131, + "pr_file": "lib/chef/resource/apt_repository.rb", + "discussion_id": "1893160030", + "commented_code": "@@ -164,6 +164,10 @@ class AptRepository < Chef::Resource\n property :key_proxy, [String, nil, FalseClass],\n description: \"If set, a specified proxy is passed to GPG via `http-proxy=`.\"\n \n+ property :signed_by, [String, true, false, nil],\n+ description: \"If a string, specify the file and/or fingerprint the repo is signed with. If true, set Signed-With to use the specified key\",\n+ default: nil", + "comment_created_at": "2024-12-19T21:18:33+00:00", + "comment_author": "tmccombs", + "comment_body": "Should this default to true, or conditionally default to true?", + "pr_file_module": null + }, + { + "comment_id": "1894307957", + "repo_full_name": "chef/chef", + "pr_number": 14131, + "pr_file": "lib/chef/resource/apt_repository.rb", + "discussion_id": "1893160030", + "commented_code": "@@ -164,6 +164,10 @@ class AptRepository < Chef::Resource\n property :key_proxy, [String, nil, FalseClass],\n description: \"If set, a specified proxy is passed to GPG via `http-proxy=`.\"\n \n+ property :signed_by, [String, true, false, nil],\n+ description: \"If a string, specify the file and/or fingerprint the repo is signed with. If true, set Signed-With to use the specified key\",\n+ default: nil", + "comment_created_at": "2024-12-20T19:21:44+00:00", + "comment_author": "williamtheaker", + "comment_body": "Since 16.04 is the oldest version currently supported, I think this should be true by default.\r\nhttps://docs.chef.io/platforms/#commercial-support-4", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "561415718", + "pr_number": 10909, + "pr_file": "lib/chef/resource/windows_certificate.rb", + "created_at": "2021-01-21T00:14:29+00:00", + "commented_code": "default: \"MY\", equal_to: [\"TRUSTEDPUBLISHER\", \"TrustedPublisher\", \"CLIENTAUTHISSUER\", \"REMOTE DESKTOP\", \"ROOT\", \"TRUSTEDDEVICES\", \"WEBHOSTING\", \"CA\", \"AUTHROOT\", \"TRUSTEDPEOPLE\", \"MY\", \"SMARTCARDROOT\", \"TRUST\", \"DISALLOWED\"]\n\n property :user_store, [TrueClass, FalseClass],\n description: \"Use the user store of the local machine store if set to false.\",\n description: \"Use the CurrentUser store if set to true or the LocalMachine store if set to false.\",", + "repo_full_name": "chef/chef", + "discussion_comments": [ + { + "comment_id": "561415718", + "repo_full_name": "chef/chef", + "pr_number": 10909, + "pr_file": "lib/chef/resource/windows_certificate.rb", + "discussion_id": "561415718", + "commented_code": "@@ -76,7 +76,7 @@ class WindowsCertificate < Chef::Resource\n default: \"MY\", equal_to: [\"TRUSTEDPUBLISHER\", \"TrustedPublisher\", \"CLIENTAUTHISSUER\", \"REMOTE DESKTOP\", \"ROOT\", \"TRUSTEDDEVICES\", \"WEBHOSTING\", \"CA\", \"AUTHROOT\", \"TRUSTEDPEOPLE\", \"MY\", \"SMARTCARDROOT\", \"TRUST\", \"DISALLOWED\"]\n \n property :user_store, [TrueClass, FalseClass],\n- description: \"Use the user store of the local machine store if set to false.\",\n+ description: \"Use the CurrentUser store if set to true or the LocalMachine store if set to false.\",", + "comment_created_at": "2021-01-21T00:14:29+00:00", + "comment_author": "tas50", + "comment_body": "```suggestion\r\n description: \"Use the `CurrentUser` store instead of the default `LocalMachine` store.\",\r\n```", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/chef-explicit-configuration-over-implicit.md b/_reviewers/chef-explicit-configuration-over-implicit.md index 6928027..64ba69d 100644 --- a/_reviewers/chef-explicit-configuration-over-implicit.md +++ b/_reviewers/chef-explicit-configuration-over-implicit.md @@ -39,249 +39,3 @@ end ``` This approach improves code clarity, makes behavior more predictable, and ensures proper documentation of configuration options. It also facilitates better testing and maintenance by making all possible states explicit. - - -[ - { - "discussion_id": "392394938", - "pr_number": 9480, - "pr_file": "chef-config/lib/chef-config/config.rb", - "created_at": "2020-03-13T18:17:13+00:00", - "commented_code": "# can be set to a string or array of strings for URIs to set as rubygems sources\n default :rubygems_url, \"https://www.rubygems.org\"\n\n # globally sets the default of the clear_sources property on the gem_package and chef_gem resources\n default :clear_gem_sources, false", - "repo_full_name": "chef/chef", - "discussion_comments": [ - { - "comment_id": "392394938", - "repo_full_name": "chef/chef", - "pr_number": 9480, - "pr_file": "chef-config/lib/chef-config/config.rb", - "discussion_id": "392394938", - "commented_code": "@@ -1221,9 +1221,6 @@ def self.guess_internal_locale\n # can be set to a string or array of strings for URIs to set as rubygems sources\n default :rubygems_url, \"https://www.rubygems.org\"\n \n- # globally sets the default of the clear_sources property on the gem_package and chef_gem resources\n- default :clear_gem_sources, false", - "comment_created_at": "2020-03-13T18:17:13+00:00", - "comment_author": "lamont-granquist", - "comment_body": "this should probably be changed to an explicit nil default rather than relying on chef-config autovivification (for documentation purposes if nothing else, but i think we actually consume chef-config and turn on strict checking for something like berkshelf as well -- we always intended to turn on strict checking in chef-client but never did)", - "pr_file_module": null - }, - { - "comment_id": "392397904", - "repo_full_name": "chef/chef", - "pr_number": 9480, - "pr_file": "chef-config/lib/chef-config/config.rb", - "discussion_id": "392394938", - "commented_code": "@@ -1221,9 +1221,6 @@ def self.guess_internal_locale\n # can be set to a string or array of strings for URIs to set as rubygems sources\n default :rubygems_url, \"https://www.rubygems.org\"\n \n- # globally sets the default of the clear_sources property on the gem_package and chef_gem resources\n- default :clear_gem_sources, false", - "comment_created_at": "2020-03-13T18:23:01+00:00", - "comment_author": "phiggins", - "comment_body": "I wondered about that, I agree it makes sense to be more explicit.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1369663852", - "pr_number": 14043, - "pr_file": "lib/chef/resource/chocolatey_installer.rb", - "created_at": "2023-10-24T06:04:17+00:00", - "commented_code": "class Chef\n class Resource\n class ChocolateyInstaller < Chef::Resource\n provides :chocolatey_installer\n\n description \"Use the Chocolatey Installer resource to ensure that Choco is installed to your specification. Use the Chocolatey Feature resource to customize your install\"\n introduced \"18.3\"\n examples <<~DOC\n **Install Chocolatey**\n\n ```ruby\n chocolatey_installer 'latest' do\n action :install\n end\n ```\n\n **Uninstall Chocolatey**\n\n ```ruby\n chocolatey_installer 'Some random verbiage' do\n action :uninstall\n end\n ```\n\n **Install Chocolatey with Parameters**\n\n ```ruby\n chocolatey_installer 'latest' do\n action :install\n download_url \"https://www.contoso.com/foo\"\n chocolatey_version '2.12.24'\n end\n ```\n\n **Upgrade Chocolatey with Parameters**\n\n ```ruby\n chocolatey_installer 'latest' do\n action :upgrade\n chocolatey_version '2.12.24'\n end\n ```\n DOC\n\n allowed_actions :install, :uninstall, :upgrade\n\n property :download_url, String,\n description: \"The URL to download Chocolatey from. This defaults to the value of $env:ChocolateyDownloadUrl, if it is set, and otherwise falls back to the official Chocolatey community repository to download the Chocolatey package. It can be used for offline installation by providing a path to a Chocolatey.nupkg.\"", - "repo_full_name": "chef/chef", - "discussion_comments": [ - { - "comment_id": "1369663852", - "repo_full_name": "chef/chef", - "pr_number": 14043, - "pr_file": "lib/chef/resource/chocolatey_installer.rb", - "discussion_id": "1369663852", - "commented_code": "@@ -0,0 +1,198 @@\n+class Chef\n+ class Resource\n+ class ChocolateyInstaller < Chef::Resource\n+ provides :chocolatey_installer\n+\n+ description \"Use the Chocolatey Installer resource to ensure that Choco is installed to your specification. Use the Chocolatey Feature resource to customize your install\"\n+ introduced \"18.3\"\n+ examples <<~DOC\n+ **Install Chocolatey**\n+\n+ ```ruby\n+ chocolatey_installer 'latest' do\n+ action :install\n+ end\n+ ```\n+\n+ **Uninstall Chocolatey**\n+\n+ ```ruby\n+ chocolatey_installer 'Some random verbiage' do\n+ action :uninstall\n+ end\n+ ```\n+\n+ **Install Chocolatey with Parameters**\n+\n+ ```ruby\n+ chocolatey_installer 'latest' do\n+ action :install\n+ download_url \"https://www.contoso.com/foo\"\n+ chocolatey_version '2.12.24'\n+ end\n+ ```\n+\n+ **Upgrade Chocolatey with Parameters**\n+\n+ ```ruby\n+ chocolatey_installer 'latest' do\n+ action :upgrade\n+ chocolatey_version '2.12.24'\n+ end\n+ ```\n+ DOC\n+\n+ allowed_actions :install, :uninstall, :upgrade\n+\n+ property :download_url, String,\n+ description: \"The URL to download Chocolatey from. This defaults to the value of $env:ChocolateyDownloadUrl, if it is set, and otherwise falls back to the official Chocolatey community repository to download the Chocolatey package. It can be used for offline installation by providing a path to a Chocolatey.nupkg.\"", - "comment_created_at": "2023-10-24T06:04:17+00:00", - "comment_author": "jaymzh", - "comment_body": "We don't usually consult env vars from cookbooks. It breaks idempotency... Two prior with different environments running this could move the install back and forth between different versions. I highly highly recommend against this.", - "pr_file_module": null - }, - { - "comment_id": "1370132968", - "repo_full_name": "chef/chef", - "pr_number": 14043, - "pr_file": "lib/chef/resource/chocolatey_installer.rb", - "discussion_id": "1369663852", - "commented_code": "@@ -0,0 +1,198 @@\n+class Chef\n+ class Resource\n+ class ChocolateyInstaller < Chef::Resource\n+ provides :chocolatey_installer\n+\n+ description \"Use the Chocolatey Installer resource to ensure that Choco is installed to your specification. Use the Chocolatey Feature resource to customize your install\"\n+ introduced \"18.3\"\n+ examples <<~DOC\n+ **Install Chocolatey**\n+\n+ ```ruby\n+ chocolatey_installer 'latest' do\n+ action :install\n+ end\n+ ```\n+\n+ **Uninstall Chocolatey**\n+\n+ ```ruby\n+ chocolatey_installer 'Some random verbiage' do\n+ action :uninstall\n+ end\n+ ```\n+\n+ **Install Chocolatey with Parameters**\n+\n+ ```ruby\n+ chocolatey_installer 'latest' do\n+ action :install\n+ download_url \"https://www.contoso.com/foo\"\n+ chocolatey_version '2.12.24'\n+ end\n+ ```\n+\n+ **Upgrade Chocolatey with Parameters**\n+\n+ ```ruby\n+ chocolatey_installer 'latest' do\n+ action :upgrade\n+ chocolatey_version '2.12.24'\n+ end\n+ ```\n+ DOC\n+\n+ allowed_actions :install, :uninstall, :upgrade\n+\n+ property :download_url, String,\n+ description: \"The URL to download Chocolatey from. This defaults to the value of $env:ChocolateyDownloadUrl, if it is set, and otherwise falls back to the official Chocolatey community repository to download the Chocolatey package. It can be used for offline installation by providing a path to a Chocolatey.nupkg.\"", - "comment_created_at": "2023-10-24T13:11:30+00:00", - "comment_author": "johnmccrae", - "comment_body": "I have a question - this defaults to a choco repo if you specify nothing. The point of this url is the use case where a random update needs to be downloaded from an alternate source. It does not need to be idempotent. ", - "pr_file_module": null - }, - { - "comment_id": "1370782643", - "repo_full_name": "chef/chef", - "pr_number": 14043, - "pr_file": "lib/chef/resource/chocolatey_installer.rb", - "discussion_id": "1369663852", - "commented_code": "@@ -0,0 +1,198 @@\n+class Chef\n+ class Resource\n+ class ChocolateyInstaller < Chef::Resource\n+ provides :chocolatey_installer\n+\n+ description \"Use the Chocolatey Installer resource to ensure that Choco is installed to your specification. Use the Chocolatey Feature resource to customize your install\"\n+ introduced \"18.3\"\n+ examples <<~DOC\n+ **Install Chocolatey**\n+\n+ ```ruby\n+ chocolatey_installer 'latest' do\n+ action :install\n+ end\n+ ```\n+\n+ **Uninstall Chocolatey**\n+\n+ ```ruby\n+ chocolatey_installer 'Some random verbiage' do\n+ action :uninstall\n+ end\n+ ```\n+\n+ **Install Chocolatey with Parameters**\n+\n+ ```ruby\n+ chocolatey_installer 'latest' do\n+ action :install\n+ download_url \"https://www.contoso.com/foo\"\n+ chocolatey_version '2.12.24'\n+ end\n+ ```\n+\n+ **Upgrade Chocolatey with Parameters**\n+\n+ ```ruby\n+ chocolatey_installer 'latest' do\n+ action :upgrade\n+ chocolatey_version '2.12.24'\n+ end\n+ ```\n+ DOC\n+\n+ allowed_actions :install, :uninstall, :upgrade\n+\n+ property :download_url, String,\n+ description: \"The URL to download Chocolatey from. This defaults to the value of $env:ChocolateyDownloadUrl, if it is set, and otherwise falls back to the official Chocolatey community repository to download the Chocolatey package. It can be used for offline installation by providing a path to a Chocolatey.nupkg.\"", - "comment_created_at": "2023-10-24T20:34:50+00:00", - "comment_author": "tpowell-progress", - "comment_body": "Per our discussion, this is actually being used directly by the Chocolatey `install.ps1` script and the environment variable can be used to redirect the standard installer to an alternate location.", - "pr_file_module": null - }, - { - "comment_id": "1373353325", - "repo_full_name": "chef/chef", - "pr_number": 14043, - "pr_file": "lib/chef/resource/chocolatey_installer.rb", - "discussion_id": "1369663852", - "commented_code": "@@ -0,0 +1,198 @@\n+class Chef\n+ class Resource\n+ class ChocolateyInstaller < Chef::Resource\n+ provides :chocolatey_installer\n+\n+ description \"Use the Chocolatey Installer resource to ensure that Choco is installed to your specification. Use the Chocolatey Feature resource to customize your install\"\n+ introduced \"18.3\"\n+ examples <<~DOC\n+ **Install Chocolatey**\n+\n+ ```ruby\n+ chocolatey_installer 'latest' do\n+ action :install\n+ end\n+ ```\n+\n+ **Uninstall Chocolatey**\n+\n+ ```ruby\n+ chocolatey_installer 'Some random verbiage' do\n+ action :uninstall\n+ end\n+ ```\n+\n+ **Install Chocolatey with Parameters**\n+\n+ ```ruby\n+ chocolatey_installer 'latest' do\n+ action :install\n+ download_url \"https://www.contoso.com/foo\"\n+ chocolatey_version '2.12.24'\n+ end\n+ ```\n+\n+ **Upgrade Chocolatey with Parameters**\n+\n+ ```ruby\n+ chocolatey_installer 'latest' do\n+ action :upgrade\n+ chocolatey_version '2.12.24'\n+ end\n+ ```\n+ DOC\n+\n+ allowed_actions :install, :uninstall, :upgrade\n+\n+ property :download_url, String,\n+ description: \"The URL to download Chocolatey from. This defaults to the value of $env:ChocolateyDownloadUrl, if it is set, and otherwise falls back to the official Chocolatey community repository to download the Chocolatey package. It can be used for offline installation by providing a path to a Chocolatey.nupkg.\"", - "comment_created_at": "2023-10-26T15:21:37+00:00", - "comment_author": "tpowell-progress", - "comment_body": "@johnmccrae can we made the description more clear that it's the choco installer itself looking at these variables?", - "pr_file_module": null - }, - { - "comment_id": "1376478178", - "repo_full_name": "chef/chef", - "pr_number": 14043, - "pr_file": "lib/chef/resource/chocolatey_installer.rb", - "discussion_id": "1369663852", - "commented_code": "@@ -0,0 +1,198 @@\n+class Chef\n+ class Resource\n+ class ChocolateyInstaller < Chef::Resource\n+ provides :chocolatey_installer\n+\n+ description \"Use the Chocolatey Installer resource to ensure that Choco is installed to your specification. Use the Chocolatey Feature resource to customize your install\"\n+ introduced \"18.3\"\n+ examples <<~DOC\n+ **Install Chocolatey**\n+\n+ ```ruby\n+ chocolatey_installer 'latest' do\n+ action :install\n+ end\n+ ```\n+\n+ **Uninstall Chocolatey**\n+\n+ ```ruby\n+ chocolatey_installer 'Some random verbiage' do\n+ action :uninstall\n+ end\n+ ```\n+\n+ **Install Chocolatey with Parameters**\n+\n+ ```ruby\n+ chocolatey_installer 'latest' do\n+ action :install\n+ download_url \"https://www.contoso.com/foo\"\n+ chocolatey_version '2.12.24'\n+ end\n+ ```\n+\n+ **Upgrade Chocolatey with Parameters**\n+\n+ ```ruby\n+ chocolatey_installer 'latest' do\n+ action :upgrade\n+ chocolatey_version '2.12.24'\n+ end\n+ ```\n+ DOC\n+\n+ allowed_actions :install, :uninstall, :upgrade\n+\n+ property :download_url, String,\n+ description: \"The URL to download Chocolatey from. This defaults to the value of $env:ChocolateyDownloadUrl, if it is set, and otherwise falls back to the official Chocolatey community repository to download the Chocolatey package. It can be used for offline installation by providing a path to a Chocolatey.nupkg.\"", - "comment_created_at": "2023-10-30T16:10:07+00:00", - "comment_author": "johnmccrae", - "comment_body": "done and spelling error corrected too", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "596231507", - "pr_number": 11189, - "pr_file": "lib/chef/knife/core/bootstrap_context.rb", - "created_at": "2021-03-17T17:21:24+00:00", - "commented_code": "end\n\n unless chef_config[:file_cache_path].nil?\n client_rb << \"file_cache_path \\\"#{chef_config[:file_cache_path]}\\\"\\n\"\n client_rb << \"file_cache_path \\\"#{ChefConfig::PathHelper.escapepath(ChefConfig::Config.var_chef_dir(windows: false))}/cache\\\"\\n\"", - "repo_full_name": "chef/chef", - "discussion_comments": [ - { - "comment_id": "596231507", - "repo_full_name": "chef/chef", - "pr_number": 11189, - "pr_file": "lib/chef/knife/core/bootstrap_context.rb", - "discussion_id": "596231507", - "commented_code": "@@ -172,11 +172,11 @@ def config_content\n end\n \n unless chef_config[:file_cache_path].nil?\n- client_rb << \"file_cache_path \\\"#{chef_config[:file_cache_path]}\\\"\\n\"\n+ client_rb << \"file_cache_path \\\"#{ChefConfig::PathHelper.escapepath(ChefConfig::Config.var_chef_dir(windows: false))}/cache\\\"\\n\"", - "comment_created_at": "2021-03-17T17:21:24+00:00", - "comment_author": "marcparadise", - "comment_body": "is it safe to use `windows: false` here? Shouldn't that depend on the remote OS? ", - "pr_file_module": null - }, - { - "comment_id": "596244553", - "repo_full_name": "chef/chef", - "pr_number": 11189, - "pr_file": "lib/chef/knife/core/bootstrap_context.rb", - "discussion_id": "596231507", - "commented_code": "@@ -172,11 +172,11 @@ def config_content\n end\n \n unless chef_config[:file_cache_path].nil?\n- client_rb << \"file_cache_path \\\"#{chef_config[:file_cache_path]}\\\"\\n\"\n+ client_rb << \"file_cache_path \\\"#{ChefConfig::PathHelper.escapepath(ChefConfig::Config.var_chef_dir(windows: false))}/cache\\\"\\n\"", - "comment_created_at": "2021-03-17T17:37:22+00:00", - "comment_author": "lamont-granquist", - "comment_body": "we've got the windows_bootstrap_context which handles bootstrapping a windows node. this should properly be called unix_bootstrap_context or something, but history.", - "pr_file_module": null - }, - { - "comment_id": "596245747", - "repo_full_name": "chef/chef", - "pr_number": 11189, - "pr_file": "lib/chef/knife/core/bootstrap_context.rb", - "discussion_id": "596231507", - "commented_code": "@@ -172,11 +172,11 @@ def config_content\n end\n \n unless chef_config[:file_cache_path].nil?\n- client_rb << \"file_cache_path \\\"#{chef_config[:file_cache_path]}\\\"\\n\"\n+ client_rb << \"file_cache_path \\\"#{ChefConfig::PathHelper.escapepath(ChefConfig::Config.var_chef_dir(windows: false))}/cache\\\"\\n\"", - "comment_created_at": "2021-03-17T17:38:44+00:00", - "comment_author": "lamont-granquist", - "comment_body": "whats a bit more concerning is this is now hardcoding the last bit of this path.", - "pr_file_module": null - }, - { - "comment_id": "596250612", - "repo_full_name": "chef/chef", - "pr_number": 11189, - "pr_file": "lib/chef/knife/core/bootstrap_context.rb", - "discussion_id": "596231507", - "commented_code": "@@ -172,11 +172,11 @@ def config_content\n end\n \n unless chef_config[:file_cache_path].nil?\n- client_rb << \"file_cache_path \\\"#{chef_config[:file_cache_path]}\\\"\\n\"\n+ client_rb << \"file_cache_path \\\"#{ChefConfig::PathHelper.escapepath(ChefConfig::Config.var_chef_dir(windows: false))}/cache\\\"\\n\"", - "comment_created_at": "2021-03-17T17:45:07+00:00", - "comment_author": "lamont-granquist", - "comment_body": "we probably need to introduce new config variables `unix_bootstrap_file_cache_path` and `unix_bootstrap_file_backup_path` which should default to these values and allow the user to override them. then should probably do the same for windows.", - "pr_file_module": null - }, - { - "comment_id": "600197351", - "repo_full_name": "chef/chef", - "pr_number": 11189, - "pr_file": "lib/chef/knife/core/bootstrap_context.rb", - "discussion_id": "596231507", - "commented_code": "@@ -172,11 +172,11 @@ def config_content\n end\n \n unless chef_config[:file_cache_path].nil?\n- client_rb << \"file_cache_path \\\"#{chef_config[:file_cache_path]}\\\"\\n\"\n+ client_rb << \"file_cache_path \\\"#{ChefConfig::PathHelper.escapepath(ChefConfig::Config.var_chef_dir(windows: false))}/cache\\\"\\n\"", - "comment_created_at": "2021-03-24T06:07:36+00:00", - "comment_author": "snehaldwivedi", - "comment_body": "@lamont-granquist I have tried adding the new variable and separate definitions for defining `chef_dir`, `root_dir` & `cache_path` for Linux in [config.rb](https://github.com/chef/chef/blob/master/chef-config/lib/chef-config/config.rb). But I got stuck [here](https://github.com/chef/chef/blob/master/chef-config/lib/chef-config/config.rb#L372), where we need to get a home path of the remote node. I think we need to update `ChefUtils` to get the object of the remote OS.\r\n\r\nPlease let me know your suggestion on this.", - "pr_file_module": null - }, - { - "comment_id": "626866599", - "repo_full_name": "chef/chef", - "pr_number": 11189, - "pr_file": "lib/chef/knife/core/bootstrap_context.rb", - "discussion_id": "596231507", - "commented_code": "@@ -172,11 +172,11 @@ def config_content\n end\n \n unless chef_config[:file_cache_path].nil?\n- client_rb << \"file_cache_path \\\"#{chef_config[:file_cache_path]}\\\"\\n\"\n+ client_rb << \"file_cache_path \\\"#{ChefConfig::PathHelper.escapepath(ChefConfig::Config.var_chef_dir(windows: false))}/cache\\\"\\n\"", - "comment_created_at": "2021-05-05T20:13:08+00:00", - "comment_author": "btm", - "comment_body": "@snehaldwivedi ChefUtils does have a little bit of train in it and can get the remote OS, but it is for target mode, knife bootstrap has its own use of train.\r\n\r\n`ChefConfig::Mixin::TrainTransport` is used for target mode, not bootstrap.\r\n`ChefUtils::DSL::TrainHelpers` ultimately uses the train connection off the run_context for target mode\r\n`Chef::Knife::Bootstrap::TrainConnector` is used for knife bootstrap.\r\n\r\nThe amount of complexity that ChefConfig has around switching for windows or linux, local mode or target mode, and changing path names for the official or community distributions is already a lot and easy to break one of the many scenarios. Trying to add knife bootstrap or not knife bootstrap to that list is over the top.\r\n\r\nFor just knife bootstrap use in chef-config we could add something like this:\r\n```\r\ndefault(:unix_bootstrap_file_cache_path) do\r\n PathHelper.join(\"/var\", ChefUtils::Dist::Infra::DIR_SUFFIX, \"cache\", windows: false)\r\nend\r\n\r\ndefault(:windows_bootstrap_file_cache_path) do\r\n PathHelper.join(\"C:\", ChefUtils::Dist::Infra::DIR_SUFFIX, \"cache\", windows: true)\r\nend\r\n```\r\n\r\nIf someone needed something special (like probably we leave C: hardcoded) then they could put a different `windows_bootstrap_file_cache_path` in their config.rb.\r\n\r\nThen we mostly need to go through `bootstrap_context.rb` and `windows_bootstrap_context.rb` and make them use the appropriate new variable instead of `file_cache_path`, and update the tests. And do a little manual testing.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1893160030", - "pr_number": 14131, - "pr_file": "lib/chef/resource/apt_repository.rb", - "created_at": "2024-12-19T21:18:33+00:00", - "commented_code": "property :key_proxy, [String, nil, FalseClass],\n description: \"If set, a specified proxy is passed to GPG via `http-proxy=`.\"\n\n property :signed_by, [String, true, false, nil],\n description: \"If a string, specify the file and/or fingerprint the repo is signed with. If true, set Signed-With to use the specified key\",\n default: nil", - "repo_full_name": "chef/chef", - "discussion_comments": [ - { - "comment_id": "1893160030", - "repo_full_name": "chef/chef", - "pr_number": 14131, - "pr_file": "lib/chef/resource/apt_repository.rb", - "discussion_id": "1893160030", - "commented_code": "@@ -164,6 +164,10 @@ class AptRepository < Chef::Resource\n property :key_proxy, [String, nil, FalseClass],\n description: \"If set, a specified proxy is passed to GPG via `http-proxy=`.\"\n \n+ property :signed_by, [String, true, false, nil],\n+ description: \"If a string, specify the file and/or fingerprint the repo is signed with. If true, set Signed-With to use the specified key\",\n+ default: nil", - "comment_created_at": "2024-12-19T21:18:33+00:00", - "comment_author": "tmccombs", - "comment_body": "Should this default to true, or conditionally default to true?", - "pr_file_module": null - }, - { - "comment_id": "1894307957", - "repo_full_name": "chef/chef", - "pr_number": 14131, - "pr_file": "lib/chef/resource/apt_repository.rb", - "discussion_id": "1893160030", - "commented_code": "@@ -164,6 +164,10 @@ class AptRepository < Chef::Resource\n property :key_proxy, [String, nil, FalseClass],\n description: \"If set, a specified proxy is passed to GPG via `http-proxy=`.\"\n \n+ property :signed_by, [String, true, false, nil],\n+ description: \"If a string, specify the file and/or fingerprint the repo is signed with. If true, set Signed-With to use the specified key\",\n+ default: nil", - "comment_created_at": "2024-12-20T19:21:44+00:00", - "comment_author": "williamtheaker", - "comment_body": "Since 16.04 is the oldest version currently supported, I think this should be true by default.\r\nhttps://docs.chef.io/platforms/#commercial-support-4", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "561415718", - "pr_number": 10909, - "pr_file": "lib/chef/resource/windows_certificate.rb", - "created_at": "2021-01-21T00:14:29+00:00", - "commented_code": "default: \"MY\", equal_to: [\"TRUSTEDPUBLISHER\", \"TrustedPublisher\", \"CLIENTAUTHISSUER\", \"REMOTE DESKTOP\", \"ROOT\", \"TRUSTEDDEVICES\", \"WEBHOSTING\", \"CA\", \"AUTHROOT\", \"TRUSTEDPEOPLE\", \"MY\", \"SMARTCARDROOT\", \"TRUST\", \"DISALLOWED\"]\n\n property :user_store, [TrueClass, FalseClass],\n description: \"Use the user store of the local machine store if set to false.\",\n description: \"Use the CurrentUser store if set to true or the LocalMachine store if set to false.\",", - "repo_full_name": "chef/chef", - "discussion_comments": [ - { - "comment_id": "561415718", - "repo_full_name": "chef/chef", - "pr_number": 10909, - "pr_file": "lib/chef/resource/windows_certificate.rb", - "discussion_id": "561415718", - "commented_code": "@@ -76,7 +76,7 @@ class WindowsCertificate < Chef::Resource\n default: \"MY\", equal_to: [\"TRUSTEDPUBLISHER\", \"TrustedPublisher\", \"CLIENTAUTHISSUER\", \"REMOTE DESKTOP\", \"ROOT\", \"TRUSTEDDEVICES\", \"WEBHOSTING\", \"CA\", \"AUTHROOT\", \"TRUSTEDPEOPLE\", \"MY\", \"SMARTCARDROOT\", \"TRUST\", \"DISALLOWED\"]\n \n property :user_store, [TrueClass, FalseClass],\n- description: \"Use the user store of the local machine store if set to false.\",\n+ description: \"Use the CurrentUser store if set to true or the LocalMachine store if set to false.\",", - "comment_created_at": "2021-01-21T00:14:29+00:00", - "comment_author": "tas50", - "comment_body": "```suggestion\r\n description: \"Use the `CurrentUser` store instead of the default `LocalMachine` store.\",\r\n```", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/chef-externalize-configuration-values.json b/_reviewers/chef-externalize-configuration-values.json new file mode 100644 index 0000000..95c2b91 --- /dev/null +++ b/_reviewers/chef-externalize-configuration-values.json @@ -0,0 +1,158 @@ +[ + { + "discussion_id": "1713904952", + "pr_number": 14467, + "pr_file": ".expeditor/scripts/bk_container_prep.sh", + "created_at": "2024-08-12T14:34:36+00:00", + "commented_code": "export FORCE_FFI_YAJL=\"ext\"\nexport CHEF_LICENSE=\"accept-no-persist\"\nexport ENV[\"CHEF_LICENSE_SERVER\"] = \"http://hosted-license-service-lb-8000-606952349.us-west-2.elb.amazonaws.com:8000/\"", + "repo_full_name": "chef/chef", + "discussion_comments": [ + { + "comment_id": "1713904952", + "repo_full_name": "chef/chef", + "pr_number": 14467, + "pr_file": ".expeditor/scripts/bk_container_prep.sh", + "discussion_id": "1713904952", + "commented_code": "@@ -18,6 +18,8 @@ echo \"--- Preparing Container...\"\n \n export FORCE_FFI_YAJL=\"ext\"\n export CHEF_LICENSE=\"accept-no-persist\"\n+export ENV[\"CHEF_LICENSE_SERVER\"] = \"http://hosted-license-service-lb-8000-606952349.us-west-2.elb.amazonaws.com:8000/\"", + "comment_created_at": "2024-08-12T14:34:36+00:00", + "comment_author": "tpowell-progress", + "comment_body": "Feels like these env vars should be set in Buildkite itself.", + "pr_file_module": null + }, + { + "comment_id": "1714794576", + "repo_full_name": "chef/chef", + "pr_number": 14467, + "pr_file": ".expeditor/scripts/bk_container_prep.sh", + "discussion_id": "1713904952", + "commented_code": "@@ -18,6 +18,8 @@ echo \"--- Preparing Container...\"\n \n export FORCE_FFI_YAJL=\"ext\"\n export CHEF_LICENSE=\"accept-no-persist\"\n+export ENV[\"CHEF_LICENSE_SERVER\"] = \"http://hosted-license-service-lb-8000-606952349.us-west-2.elb.amazonaws.com:8000/\"", + "comment_created_at": "2024-08-13T07:18:59+00:00", + "comment_author": "ahasunos", + "comment_body": "will refactor them to be in a single place instead of spread out throughout the codebase.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1743781220", + "pr_number": 14467, + "pr_file": ".expeditor/scripts/bk_container_prep.sh", + "created_at": "2024-09-04T13:18:55+00:00", + "commented_code": "export FORCE_FFI_YAJL=\"ext\"\nexport CHEF_LICENSE=\"accept-no-persist\"\nexport CHEF_LICENSE_SERVER=\"http://hosted-license-service-lb-8000-606952349.us-west-2.elb.amazonaws.com:8000\"", + "repo_full_name": "chef/chef", + "discussion_comments": [ + { + "comment_id": "1743781220", + "repo_full_name": "chef/chef", + "pr_number": 14467, + "pr_file": ".expeditor/scripts/bk_container_prep.sh", + "discussion_id": "1743781220", + "commented_code": "@@ -18,6 +18,8 @@ echo \"--- Preparing Container...\"\n \n export FORCE_FFI_YAJL=\"ext\"\n export CHEF_LICENSE=\"accept-no-persist\"\n+export CHEF_LICENSE_SERVER=\"http://hosted-license-service-lb-8000-606952349.us-west-2.elb.amazonaws.com:8000\"", + "comment_created_at": "2024-09-04T13:18:55+00:00", + "comment_author": "tpowell-progress", + "comment_body": "We don't need to be pointing at a raw endpoint in the code. It's likely unstable, and we probably don't want to expose it, regardless. Needs to be injected via secrets or env just to not have to have a pull request (that will have failing pipelines due to this value being stale) to update this value.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1743784377", + "pr_number": 14467, + "pr_file": ".expeditor/scripts/bk_linux_exec.sh", + "created_at": "2024-09-04T13:20:33+00:00", + "commented_code": "export BUNDLE_GEMFILE=$PWD/kitchen-tests/Gemfile\nexport FORCE_FFI_YAJL=ext\nexport CHEF_LICENSE=\"accept-silent\"\nexport CHEF_LICENSE_SERVER=\"http://hosted-license-service-lb-8000-606952349.us-west-2.elb.amazonaws.com:8000\"", + "repo_full_name": "chef/chef", + "discussion_comments": [ + { + "comment_id": "1743784377", + "repo_full_name": "chef/chef", + "pr_number": 14467, + "pr_file": ".expeditor/scripts/bk_linux_exec.sh", + "discussion_id": "1743784377", + "commented_code": "@@ -28,6 +28,8 @@ asdf global ruby $ruby_version\n export BUNDLE_GEMFILE=$PWD/kitchen-tests/Gemfile\n export FORCE_FFI_YAJL=ext\n export CHEF_LICENSE=\"accept-silent\"\n+export CHEF_LICENSE_SERVER=\"http://hosted-license-service-lb-8000-606952349.us-west-2.elb.amazonaws.com:8000\"", + "comment_created_at": "2024-09-04T13:20:33+00:00", + "comment_author": "tpowell-progress", + "comment_body": "Secrets / env value instead of statically defined in code.\r\n", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1743784601", + "pr_number": 14467, + "pr_file": ".expeditor/scripts/prep_and_run_tests.sh", + "created_at": "2024-09-04T13:20:40+00:00", + "commented_code": "set -euo pipefail\n\nexport CHEF_LICENSE_SERVER=\"http://hosted-license-service-lb-8000-606952349.us-west-2.elb.amazonaws.com:8000/\"", + "repo_full_name": "chef/chef", + "discussion_comments": [ + { + "comment_id": "1743784601", + "repo_full_name": "chef/chef", + "pr_number": 14467, + "pr_file": ".expeditor/scripts/prep_and_run_tests.sh", + "discussion_id": "1743784601", + "commented_code": "@@ -2,6 +2,8 @@\n \n set -euo pipefail\n \n+export CHEF_LICENSE_SERVER=\"http://hosted-license-service-lb-8000-606952349.us-west-2.elb.amazonaws.com:8000/\"", + "comment_created_at": "2024-09-04T13:20:40+00:00", + "comment_author": "tpowell-progress", + "comment_body": "Secrets / env value instead of statically defined in code.\r\n", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1903930493", + "pr_number": 14775, + "pr_file": ".expeditor/scripts/build-infra-deb.sh", + "created_at": "2025-01-06T09:45:12+00:00", + "commented_code": "#!/bin/bash\n\nset -euo pipefail\n\nvalidate_env_vars() {\n if [ -z \"${CHEF_INFRA_MIGRATE_TAR_DEB:-}\" ] || [ -z \"${CHEF_INFRA_HAB_TAR_DEB:-}\" ]; then", + "repo_full_name": "chef/chef", + "discussion_comments": [ + { + "comment_id": "1903930493", + "repo_full_name": "chef/chef", + "pr_number": 14775, + "pr_file": ".expeditor/scripts/build-infra-deb.sh", + "discussion_id": "1903930493", + "commented_code": "@@ -0,0 +1,187 @@\n+#!/bin/bash\n+\n+set -euo pipefail\n+\n+validate_env_vars() {\n+ if [ -z \"${CHEF_INFRA_MIGRATE_TAR_DEB:-}\" ] || [ -z \"${CHEF_INFRA_HAB_TAR_DEB:-}\" ]; then", + "comment_created_at": "2025-01-06T09:45:12+00:00", + "comment_author": "sajjaphani", + "comment_body": "Remove `_DEB` from the environment variable to align with the other build script for RPM.\r\n", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "678445053", + "pr_number": 11871, + "pr_file": ".expeditor/promote-docker-images.sh", + "created_at": "2021-07-28T16:01:22+00:00", + "commented_code": "#! /bin/bash\n\nexport DOCKER_CLI_EXPERIMENTAL=enabled\n\nVERSION=$(cat VERSION)", + "repo_full_name": "chef/chef", + "discussion_comments": [ + { + "comment_id": "678445053", + "repo_full_name": "chef/chef", + "pr_number": 11871, + "pr_file": ".expeditor/promote-docker-images.sh", + "discussion_id": "678445053", + "commented_code": "@@ -0,0 +1,42 @@\n+#! /bin/bash\n+\n+export DOCKER_CLI_EXPERIMENTAL=enabled\n+\n+VERSION=$(cat VERSION)", + "comment_created_at": "2021-07-28T16:01:22+00:00", + "comment_author": "jeremiahsnapp", + "comment_body": "I don't think you want the contents of the VERSION file. Those contents could be different from the version that is being promoted. I think you can just have this script use `EXPEDITOR_VERSION`.", + "pr_file_module": null + }, + { + "comment_id": "678468868", + "repo_full_name": "chef/chef", + "pr_number": 11871, + "pr_file": ".expeditor/promote-docker-images.sh", + "discussion_id": "678445053", + "commented_code": "@@ -0,0 +1,42 @@\n+#! /bin/bash\n+\n+export DOCKER_CLI_EXPERIMENTAL=enabled\n+\n+VERSION=$(cat VERSION)", + "comment_created_at": "2021-07-28T16:31:33+00:00", + "comment_author": "nkierpiec", + "comment_body": "Ah, yea - the whole head of promotion thing would make this a problem - good catch", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/chef-externalize-configuration-values.md b/_reviewers/chef-externalize-configuration-values.md index 2f5952e..e823e48 100644 --- a/_reviewers/chef-externalize-configuration-values.md +++ b/_reviewers/chef-externalize-configuration-values.md @@ -38,163 +38,3 @@ VERSION="${EXPEDITOR_VERSION}" # Avoid VERSION=$(cat VERSION) ``` - - -[ - { - "discussion_id": "1713904952", - "pr_number": 14467, - "pr_file": ".expeditor/scripts/bk_container_prep.sh", - "created_at": "2024-08-12T14:34:36+00:00", - "commented_code": "export FORCE_FFI_YAJL=\"ext\"\nexport CHEF_LICENSE=\"accept-no-persist\"\nexport ENV[\"CHEF_LICENSE_SERVER\"] = \"http://hosted-license-service-lb-8000-606952349.us-west-2.elb.amazonaws.com:8000/\"", - "repo_full_name": "chef/chef", - "discussion_comments": [ - { - "comment_id": "1713904952", - "repo_full_name": "chef/chef", - "pr_number": 14467, - "pr_file": ".expeditor/scripts/bk_container_prep.sh", - "discussion_id": "1713904952", - "commented_code": "@@ -18,6 +18,8 @@ echo \"--- Preparing Container...\"\n \n export FORCE_FFI_YAJL=\"ext\"\n export CHEF_LICENSE=\"accept-no-persist\"\n+export ENV[\"CHEF_LICENSE_SERVER\"] = \"http://hosted-license-service-lb-8000-606952349.us-west-2.elb.amazonaws.com:8000/\"", - "comment_created_at": "2024-08-12T14:34:36+00:00", - "comment_author": "tpowell-progress", - "comment_body": "Feels like these env vars should be set in Buildkite itself.", - "pr_file_module": null - }, - { - "comment_id": "1714794576", - "repo_full_name": "chef/chef", - "pr_number": 14467, - "pr_file": ".expeditor/scripts/bk_container_prep.sh", - "discussion_id": "1713904952", - "commented_code": "@@ -18,6 +18,8 @@ echo \"--- Preparing Container...\"\n \n export FORCE_FFI_YAJL=\"ext\"\n export CHEF_LICENSE=\"accept-no-persist\"\n+export ENV[\"CHEF_LICENSE_SERVER\"] = \"http://hosted-license-service-lb-8000-606952349.us-west-2.elb.amazonaws.com:8000/\"", - "comment_created_at": "2024-08-13T07:18:59+00:00", - "comment_author": "ahasunos", - "comment_body": "will refactor them to be in a single place instead of spread out throughout the codebase.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1743781220", - "pr_number": 14467, - "pr_file": ".expeditor/scripts/bk_container_prep.sh", - "created_at": "2024-09-04T13:18:55+00:00", - "commented_code": "export FORCE_FFI_YAJL=\"ext\"\nexport CHEF_LICENSE=\"accept-no-persist\"\nexport CHEF_LICENSE_SERVER=\"http://hosted-license-service-lb-8000-606952349.us-west-2.elb.amazonaws.com:8000\"", - "repo_full_name": "chef/chef", - "discussion_comments": [ - { - "comment_id": "1743781220", - "repo_full_name": "chef/chef", - "pr_number": 14467, - "pr_file": ".expeditor/scripts/bk_container_prep.sh", - "discussion_id": "1743781220", - "commented_code": "@@ -18,6 +18,8 @@ echo \"--- Preparing Container...\"\n \n export FORCE_FFI_YAJL=\"ext\"\n export CHEF_LICENSE=\"accept-no-persist\"\n+export CHEF_LICENSE_SERVER=\"http://hosted-license-service-lb-8000-606952349.us-west-2.elb.amazonaws.com:8000\"", - "comment_created_at": "2024-09-04T13:18:55+00:00", - "comment_author": "tpowell-progress", - "comment_body": "We don't need to be pointing at a raw endpoint in the code. It's likely unstable, and we probably don't want to expose it, regardless. Needs to be injected via secrets or env just to not have to have a pull request (that will have failing pipelines due to this value being stale) to update this value.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1743784377", - "pr_number": 14467, - "pr_file": ".expeditor/scripts/bk_linux_exec.sh", - "created_at": "2024-09-04T13:20:33+00:00", - "commented_code": "export BUNDLE_GEMFILE=$PWD/kitchen-tests/Gemfile\nexport FORCE_FFI_YAJL=ext\nexport CHEF_LICENSE=\"accept-silent\"\nexport CHEF_LICENSE_SERVER=\"http://hosted-license-service-lb-8000-606952349.us-west-2.elb.amazonaws.com:8000\"", - "repo_full_name": "chef/chef", - "discussion_comments": [ - { - "comment_id": "1743784377", - "repo_full_name": "chef/chef", - "pr_number": 14467, - "pr_file": ".expeditor/scripts/bk_linux_exec.sh", - "discussion_id": "1743784377", - "commented_code": "@@ -28,6 +28,8 @@ asdf global ruby $ruby_version\n export BUNDLE_GEMFILE=$PWD/kitchen-tests/Gemfile\n export FORCE_FFI_YAJL=ext\n export CHEF_LICENSE=\"accept-silent\"\n+export CHEF_LICENSE_SERVER=\"http://hosted-license-service-lb-8000-606952349.us-west-2.elb.amazonaws.com:8000\"", - "comment_created_at": "2024-09-04T13:20:33+00:00", - "comment_author": "tpowell-progress", - "comment_body": "Secrets / env value instead of statically defined in code.\r\n", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1743784601", - "pr_number": 14467, - "pr_file": ".expeditor/scripts/prep_and_run_tests.sh", - "created_at": "2024-09-04T13:20:40+00:00", - "commented_code": "set -euo pipefail\n\nexport CHEF_LICENSE_SERVER=\"http://hosted-license-service-lb-8000-606952349.us-west-2.elb.amazonaws.com:8000/\"", - "repo_full_name": "chef/chef", - "discussion_comments": [ - { - "comment_id": "1743784601", - "repo_full_name": "chef/chef", - "pr_number": 14467, - "pr_file": ".expeditor/scripts/prep_and_run_tests.sh", - "discussion_id": "1743784601", - "commented_code": "@@ -2,6 +2,8 @@\n \n set -euo pipefail\n \n+export CHEF_LICENSE_SERVER=\"http://hosted-license-service-lb-8000-606952349.us-west-2.elb.amazonaws.com:8000/\"", - "comment_created_at": "2024-09-04T13:20:40+00:00", - "comment_author": "tpowell-progress", - "comment_body": "Secrets / env value instead of statically defined in code.\r\n", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1903930493", - "pr_number": 14775, - "pr_file": ".expeditor/scripts/build-infra-deb.sh", - "created_at": "2025-01-06T09:45:12+00:00", - "commented_code": "#!/bin/bash\n\nset -euo pipefail\n\nvalidate_env_vars() {\n if [ -z \"${CHEF_INFRA_MIGRATE_TAR_DEB:-}\" ] || [ -z \"${CHEF_INFRA_HAB_TAR_DEB:-}\" ]; then", - "repo_full_name": "chef/chef", - "discussion_comments": [ - { - "comment_id": "1903930493", - "repo_full_name": "chef/chef", - "pr_number": 14775, - "pr_file": ".expeditor/scripts/build-infra-deb.sh", - "discussion_id": "1903930493", - "commented_code": "@@ -0,0 +1,187 @@\n+#!/bin/bash\n+\n+set -euo pipefail\n+\n+validate_env_vars() {\n+ if [ -z \"${CHEF_INFRA_MIGRATE_TAR_DEB:-}\" ] || [ -z \"${CHEF_INFRA_HAB_TAR_DEB:-}\" ]; then", - "comment_created_at": "2025-01-06T09:45:12+00:00", - "comment_author": "sajjaphani", - "comment_body": "Remove `_DEB` from the environment variable to align with the other build script for RPM.\r\n", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "678445053", - "pr_number": 11871, - "pr_file": ".expeditor/promote-docker-images.sh", - "created_at": "2021-07-28T16:01:22+00:00", - "commented_code": "#! /bin/bash\n\nexport DOCKER_CLI_EXPERIMENTAL=enabled\n\nVERSION=$(cat VERSION)", - "repo_full_name": "chef/chef", - "discussion_comments": [ - { - "comment_id": "678445053", - "repo_full_name": "chef/chef", - "pr_number": 11871, - "pr_file": ".expeditor/promote-docker-images.sh", - "discussion_id": "678445053", - "commented_code": "@@ -0,0 +1,42 @@\n+#! /bin/bash\n+\n+export DOCKER_CLI_EXPERIMENTAL=enabled\n+\n+VERSION=$(cat VERSION)", - "comment_created_at": "2021-07-28T16:01:22+00:00", - "comment_author": "jeremiahsnapp", - "comment_body": "I don't think you want the contents of the VERSION file. Those contents could be different from the version that is being promoted. I think you can just have this script use `EXPEDITOR_VERSION`.", - "pr_file_module": null - }, - { - "comment_id": "678468868", - "repo_full_name": "chef/chef", - "pr_number": 11871, - "pr_file": ".expeditor/promote-docker-images.sh", - "discussion_id": "678445053", - "commented_code": "@@ -0,0 +1,42 @@\n+#! /bin/bash\n+\n+export DOCKER_CLI_EXPERIMENTAL=enabled\n+\n+VERSION=$(cat VERSION)", - "comment_created_at": "2021-07-28T16:31:33+00:00", - "comment_author": "nkierpiec", - "comment_body": "Ah, yea - the whole head of promotion thing would make this a problem - good catch", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/chef-extract-and-organize-methods.json b/_reviewers/chef-extract-and-organize-methods.json new file mode 100644 index 0000000..9a9cf42 --- /dev/null +++ b/_reviewers/chef-extract-and-organize-methods.json @@ -0,0 +1,286 @@ +[ + { + "discussion_id": "1377760869", + "pr_number": 14052, + "pr_file": "lib/chef/provider/powershell_script.rb", + "created_at": "2023-10-31T15:17:16+00:00", + "commented_code": "# executed, otherwise 0 or 1 based on whether $? is set to true\n # (success, where we return 0) or false (where we return 1).\n def wrapper_script\n <<~EOH\n # The script looks very slightly different with inline powershell since", + "repo_full_name": "chef/chef", + "discussion_comments": [ + { + "comment_id": "1377760869", + "repo_full_name": "chef/chef", + "pr_number": 14052, + "pr_file": "lib/chef/provider/powershell_script.rb", + "discussion_id": "1377760869", + "commented_code": "@@ -130,7 +193,78 @@ def validate_script_syntax!\n # executed, otherwise 0 or 1 based on whether $? is set to true\n # (success, where we return 0) or false (where we return 1).\n def wrapper_script\n- <<~EOH\n+ # The script looks very slightly different with inline powershell since", + "comment_created_at": "2023-10-31T15:17:16+00:00", + "comment_author": "tpowell-progress", + "comment_body": "Can we extract these two options into their own methods? Maybe something like `inline_powershell_wrapper...` and `shell_out_wrapper...`? The method is a bit long to start with, so the if/else is harder to read due to the length.", + "pr_file_module": null + }, + { + "comment_id": "1377894471", + "repo_full_name": "chef/chef", + "pr_number": 14052, + "pr_file": "lib/chef/provider/powershell_script.rb", + "discussion_id": "1377760869", + "commented_code": "@@ -130,7 +193,78 @@ def validate_script_syntax!\n # executed, otherwise 0 or 1 based on whether $? is set to true\n # (success, where we return 0) or false (where we return 1).\n def wrapper_script\n- <<~EOH\n+ # The script looks very slightly different with inline powershell since", + "comment_created_at": "2023-10-31T16:49:14+00:00", + "comment_author": "jaymzjulian", + "comment_body": "absolutely can. Retested with both traditional and inline powershell to ensure that didn't break things", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1385630851", + "pr_number": 14052, + "pr_file": "lib/chef/provider/powershell_script.rb", + "created_at": "2023-11-07T21:53:27+00:00", + "commented_code": "# last process run in the script if it is the last command\n # executed, otherwise 0 or 1 based on whether $? is set to true\n # (success, where we return 0) or false (where we return 1).\n def wrapper_script\n def inline_powershell_wrapper_script", + "repo_full_name": "chef/chef", + "discussion_comments": [ + { + "comment_id": "1385630851", + "repo_full_name": "chef/chef", + "pr_number": 14052, + "pr_file": "lib/chef/provider/powershell_script.rb", + "discussion_id": "1385630851", + "commented_code": "@@ -129,7 +192,80 @@ def validate_script_syntax!\n # last process run in the script if it is the last command\n # executed, otherwise 0 or 1 based on whether $? is set to true\n # (success, where we return 0) or false (where we return 1).\n- def wrapper_script\n+ def inline_powershell_wrapper_script", + "comment_created_at": "2023-11-07T21:53:27+00:00", + "comment_author": "tpowell-progress", + "comment_body": "Thinking this can potentially be deduplicated by some sort of templating or builder logic so that the behavior stays consistent between them. h/t: @dafyddcrosby ", + "pr_file_module": null + }, + { + "comment_id": "1385691294", + "repo_full_name": "chef/chef", + "pr_number": 14052, + "pr_file": "lib/chef/provider/powershell_script.rb", + "discussion_id": "1385630851", + "commented_code": "@@ -129,7 +192,80 @@ def validate_script_syntax!\n # last process run in the script if it is the last command\n # executed, otherwise 0 or 1 based on whether $? is set to true\n # (success, where we return 0) or false (where we return 1).\n- def wrapper_script\n+ def inline_powershell_wrapper_script", + "comment_created_at": "2023-11-07T22:51:22+00:00", + "comment_author": "jaymzjulian", + "comment_body": "yeah, i think that's sane to do - i'll make that happen", + "pr_file_module": null + }, + { + "comment_id": "1393521546", + "repo_full_name": "chef/chef", + "pr_number": 14052, + "pr_file": "lib/chef/provider/powershell_script.rb", + "discussion_id": "1385630851", + "commented_code": "@@ -129,7 +192,80 @@ def validate_script_syntax!\n # last process run in the script if it is the last command\n # executed, otherwise 0 or 1 based on whether $? is set to true\n # (success, where we return 0) or false (where we return 1).\n- def wrapper_script\n+ def inline_powershell_wrapper_script", + "comment_created_at": "2023-11-15T01:32:53+00:00", + "comment_author": "jaymzjulian", + "comment_body": "In the latest push, this is simplified to a single function with the two changes as simple conditions at those line points, which seemed much easier to read to my eye.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1297860584", + "pr_number": 13873, + "pr_file": "lib/chef/provider/package/apt.rb", + "created_at": "2023-08-18T00:00:50+00:00", + "commented_code": "end\n\n def resolve_virtual_package_name(pkg)\n showpkg = run_noninteractive(\"apt-cache\", \"showpkg\", pkg).stdout\n # apt-cache considers package names as regex by default. The anchor flag will decide whether to match name exact string or not\n resolved_pkg = new_resource.anchor_package_name_patterns ? pkg : \"^#{pkg}$\"", + "repo_full_name": "chef/chef", + "discussion_comments": [ + { + "comment_id": "1297860584", + "repo_full_name": "chef/chef", + "pr_number": 13873, + "pr_file": "lib/chef/provider/package/apt.rb", + "discussion_id": "1297860584", + "commented_code": "@@ -216,7 +218,9 @@ def resolve_package_versions(pkg)\n end\n \n def resolve_virtual_package_name(pkg)\n- showpkg = run_noninteractive(\"apt-cache\", \"showpkg\", pkg).stdout\n+ # apt-cache considers package names as regex by default. The anchor flag will decide whether to match name exact string or not\n+ resolved_pkg = new_resource.anchor_package_name_patterns ? pkg : \"^#{pkg}$\"", + "comment_created_at": "2023-08-18T00:00:50+00:00", + "comment_author": "jaymzh", + "comment_body": "rather than repeat this line/logic over and over, please extract this to a method (and I\"m reversing the log here per my previous comment):\r\n\r\n```ruby\r\ndef resolved_package(pkg)\r\n new_resource.anchor_package_regex ? \"^#{pkg}$\" : pkg\r\nend\r\n```", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "755493907", + "pr_number": 11910, + "pr_file": "lib/chef/provider/package/apt.rb", + "created_at": "2021-11-23T20:51:09+00:00", + "commented_code": "}\n end\n\n # Helper to construct Hash of names-to-package-information.", + "repo_full_name": "chef/chef", + "discussion_comments": [ + { + "comment_id": "755493907", + "repo_full_name": "chef/chef", + "pr_number": 11910, + "pr_file": "lib/chef/provider/package/apt.rb", + "discussion_id": "755493907", + "commented_code": "@@ -255,6 +273,81 @@ def package_data_for(pkg)\n }\n end\n \n+ # Helper to construct Hash of names-to-package-information.", + "comment_created_at": "2021-11-23T20:51:09+00:00", + "comment_author": "btm", + "comment_body": "Everything from here down looks like its copied from `Package::Dpkg`. We don't want to duplicate that. We already have the shared `Package::Deb` mixin between the Dpkg and Apt providers that we'd want to refactor this into.", + "pr_file_module": null + }, + { + "comment_id": "758063083", + "repo_full_name": "chef/chef", + "pr_number": 11910, + "pr_file": "lib/chef/provider/package/apt.rb", + "discussion_id": "755493907", + "commented_code": "@@ -255,6 +273,81 @@ def package_data_for(pkg)\n }\n end\n \n+ # Helper to construct Hash of names-to-package-information.", + "comment_created_at": "2021-11-29T06:15:14+00:00", + "comment_author": "manick-vel-11", + "comment_body": "I understand that most of the implementation is being referred from dpkg package. I will make use of `Package::Deb` and will separate out commonly used code by having a mixin between dpkg and apt providers.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "755494818", + "pr_number": 11910, + "pr_file": "lib/chef/provider/package/apt.rb", + "created_at": "2021-11-23T20:52:41+00:00", + "commented_code": "}\n end\n\n # Helper to construct Hash of names-to-package-information.\n #\n # @return [Hash] Mapping of package names to package information\n def name_pkginfo\n @name_pkginfo ||=\n begin\n pkginfos = resolved_source_array.map do |src|\n logger.trace(\"#{new_resource} checking #{src} dpkg status\")\n status = shell_out(\"dpkg-deb\", \"-W\", src)\n status.stdout\n end\n Hash[*package_name_array.zip(pkginfos).flatten]\n end\n end\n\n def name_package_name\n @name_package_name ||= name_pkginfo.transform_values { |v| v ? v.split(\"\\t\")[0] : nil }\n end\n\n # Return package names from the candidate source file(s).\n #\n # @return [Array] Array of actual package names read from the source files\n def get_package_name\n package_name_array.map { |name| name_package_name[name] }\n end\n\n def read_current_version_of_package(package_name)\n logger.trace(\"#{new_resource} checking install state of #{package_name}\")\n status = shell_out!(\"dpkg\", \"-s\", package_name, returns: [0, 1])\n package_installed = false\n status.stdout.each_line do |line|\n case line\n when /^Status: deinstall ok config-files/.freeze", + "repo_full_name": "chef/chef", + "discussion_comments": [ + { + "comment_id": "755494818", + "repo_full_name": "chef/chef", + "pr_number": 11910, + "pr_file": "lib/chef/provider/package/apt.rb", + "discussion_id": "755494818", + "commented_code": "@@ -255,6 +273,81 @@ def package_data_for(pkg)\n }\n end\n \n+ # Helper to construct Hash of names-to-package-information.\n+ #\n+ # @return [Hash] Mapping of package names to package information\n+ def name_pkginfo\n+ @name_pkginfo ||=\n+ begin\n+ pkginfos = resolved_source_array.map do |src|\n+ logger.trace(\"#{new_resource} checking #{src} dpkg status\")\n+ status = shell_out(\"dpkg-deb\", \"-W\", src)\n+ status.stdout\n+ end\n+ Hash[*package_name_array.zip(pkginfos).flatten]\n+ end\n+ end\n+\n+ def name_package_name\n+ @name_package_name ||= name_pkginfo.transform_values { |v| v ? v.split(\"\\t\")[0] : nil }\n+ end\n+\n+ # Return package names from the candidate source file(s).\n+ #\n+ # @return [Array] Array of actual package names read from the source files\n+ def get_package_name\n+ package_name_array.map { |name| name_package_name[name] }\n+ end\n+\n+ def read_current_version_of_package(package_name)\n+ logger.trace(\"#{new_resource} checking install state of #{package_name}\")\n+ status = shell_out!(\"dpkg\", \"-s\", package_name, returns: [0, 1])\n+ package_installed = false\n+ status.stdout.each_line do |line|\n+ case line\n+ when /^Status: deinstall ok config-files/.freeze", + "comment_created_at": "2021-11-23T20:52:41+00:00", + "comment_author": "btm", + "comment_body": "We especially don't want to duplicate these regexes. The pattern we use for defining these over in https://github.com/chef/chef/blob/main/lib/chef/provider/package/dpkg.rb#L28-L30 is intentional. But duplicating these, if there was a bug we needed to fix in one provider we'd have to remember that we duplicated this code in another provider and fix the same regex. We'd likely forget that and miss the fix.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "792112012", + "pr_number": 12182, + "pr_file": "lib/chef/provider/package/zypper.rb", + "created_at": "2022-01-25T20:55:32+00:00", + "commented_code": "end\n\n def candidate_version\n @candidate_version ||= package_name_array.each_with_index.map { |pkg, i| available_version(i) }\n if source_files_exist?\n logger.trace(\"#{new_resource} checking rpm status\")\n shell_out!(\"rpm\", \"-qp\", \"--queryformat\", \"%{NAME} %{VERSION}-%{RELEASE}\\n\", new_resource.source).stdout.each_line do |line|\n case line\n when /^(\\S+)\\s(\\S+)$/\n current_resource.package_name($1)\n new_resource.version($2)\n @candidate_version = $2\n end\n end", + "repo_full_name": "chef/chef", + "discussion_comments": [ + { + "comment_id": "792112012", + "repo_full_name": "chef/chef", + "pr_number": 12182, + "pr_file": "lib/chef/provider/package/zypper.rb", + "discussion_id": "792112012", + "commented_code": "@@ -70,7 +71,20 @@ def get_current_versions\n end\n \n def candidate_version\n- @candidate_version ||= package_name_array.each_with_index.map { |pkg, i| available_version(i) }\n+ if source_files_exist?\n+ logger.trace(\"#{new_resource} checking rpm status\")\n+ shell_out!(\"rpm\", \"-qp\", \"--queryformat\", \"%{NAME} %{VERSION}-%{RELEASE}\\n\", new_resource.source).stdout.each_line do |line|\n+ case line\n+ when /^(\\S+)\\s(\\S+)$/\n+ current_resource.package_name($1)\n+ new_resource.version($2)\n+ @candidate_version = $2\n+ end\n+ end", + "comment_created_at": "2022-01-25T20:55:32+00:00", + "comment_author": "lamont-granquist", + "comment_body": "the shell_out and its parsing here should probably be extracted out to its own method, see `resolve_source_to_version_obj` in the dnf or yum providers (although you don't have a version obj in this case so probably `resolve_source_to_version`.", + "pr_file_module": null + }, + { + "comment_id": "792118493", + "repo_full_name": "chef/chef", + "pr_number": 12182, + "pr_file": "lib/chef/provider/package/zypper.rb", + "discussion_id": "792112012", + "commented_code": "@@ -70,7 +71,20 @@ def get_current_versions\n end\n \n def candidate_version\n- @candidate_version ||= package_name_array.each_with_index.map { |pkg, i| available_version(i) }\n+ if source_files_exist?\n+ logger.trace(\"#{new_resource} checking rpm status\")\n+ shell_out!(\"rpm\", \"-qp\", \"--queryformat\", \"%{NAME} %{VERSION}-%{RELEASE}\\n\", new_resource.source).stdout.each_line do |line|\n+ case line\n+ when /^(\\S+)\\s(\\S+)$/\n+ current_resource.package_name($1)\n+ new_resource.version($2)\n+ @candidate_version = $2\n+ end\n+ end", + "comment_created_at": "2022-01-25T21:04:59+00:00", + "comment_author": "lamont-granquist", + "comment_body": "(in general the dnf provider is a reasonably good template for how the code change should look)", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "718895310", + "pr_number": 11898, + "pr_file": "lib/chef/resource/macos_userdefaults.rb", + "created_at": "2021-09-29T21:23:01+00:00", + "commented_code": "required: true\n\n property :host, [String, Symbol],\n description: \"Set either :current or a hostname to set the user default at the host level.\",\n description: \"Set either :current, :all or a hostname to set the user default at the host level.\",\n desired_state: false,\n introduced: \"16.3\"\n introduced: \"16.3\",\n coerce: proc { |value| to_cf_host(value) }\n\n property :value, [Integer, Float, String, TrueClass, FalseClass, Hash, Array],\n description: \"The value of the key. Note: With the `type` property set to `bool`, `String` forms of Boolean true/false values that Apple accepts in the defaults command will be coerced: 0/1, 'TRUE'/'FALSE,' 'true'/false', 'YES'/'NO', or 'yes'/'no'.\",\n required: [:write],\n coerce: proc { |v| v.is_a?(Hash) ? v.transform_keys(&:to_s) : v } # make sure keys are all strings for comparison\n required: [:write]\n\n property :type, String,\n description: \"The value type of the preference key.\",\n equal_to: %w{bool string int float array dict},\n desired_state: false\n desired_state: false,\n deprecated: true\n\n property :user, String,\n description: \"The system user that the default will be applied to.\",\n desired_state: false\n property :user, [String, Symbol],\n description: \"The system user that the default will be applied to. Set :current for current user, :all for all users or pass a valid username\",\n desired_state: false,\n coerce: proc { |value| to_cf_user(value) }\n\n property :sudo, [TrueClass, FalseClass],\n description: \"Set to true if the setting you wish to modify requires privileged access. This requires passwordless sudo for the `/usr/bin/defaults` command to be setup for the user running #{ChefUtils::Dist::Infra::PRODUCT}.\",\n default: false,\n desired_state: false\n desired_state: false,\n deprecated: true\n\n load_current_value do |new_resource|\n Chef::Log.debug \"#load_current_value: shelling out \\\"#{defaults_export_cmd(new_resource).join(\" \")}\\\" to determine state\"\n state = shell_out(defaults_export_cmd(new_resource), user: new_resource.user)\n Chef::Log.debug \"#load_current_value: attempting to read \\\"#{new_resource.domain}\\\" value from preferences to determine state\"\n\n if state.error? || state.stdout.empty?\n Chef::Log.debug \"#load_current_value: #{defaults_export_cmd(new_resource).join(\" \")} returned stdout: #{state.stdout} and stderr: #{state.stderr}\"\n current_value_does_not_exist!\n end\n pref = get_preference(new_resource)\n current_value_does_not_exist! if pref.nil?\n\n plist_data = ::Plist.parse_xml(state.stdout)\n\n # handle the situation where the key doesn't exist in the domain\n if plist_data.key?(new_resource.key)\n key new_resource.key\n else\n current_value_does_not_exist!\n end\n\n value plist_data[new_resource.key]\n end\n\n #\n # The defaults command to export a domain\n #\n # @return [Array] defaults command\n #\n def defaults_export_cmd(resource)\n state_cmd = [\"/usr/bin/defaults\"]\n\n if resource.host == \"current\"\n state_cmd.concat([\"-currentHost\"])\n elsif resource.host # they specified a non-nil value, which is a hostname\n state_cmd.concat([\"-host\", resource.host])\n end\n\n state_cmd.concat([\"export\", resource.domain, \"-\"])\n state_cmd\n key new_resource.key\n value pref\n end\n\n action :write, description: \"Write the value to the specified domain/key.\" do\n converge_if_changed do\n cmd = defaults_modify_cmd\n Chef::Log.debug(\"Updating defaults value by shelling out: #{cmd.join(\" \")}\")\n\n shell_out!(cmd, user: new_resource.user)\n Chef::Log.debug(\"Updating defaults value for #{new_resource.key} in #{new_resource.domain}\")\n set_preference(new_resource)\n end\n end\n\n action :delete, description: \"Delete a key from a domain.\" do\n # if it's not there there's nothing to remove\n return unless current_resource\n return if current_resource.nil?\n\n converge_by(\"delete domain:#{new_resource.domain} key:#{new_resource.key}\") do\n\n cmd = defaults_modify_cmd\n Chef::Log.debug(\"Removing defaults key by shelling out: #{cmd.join(\" \")}\")\n\n shell_out!(cmd, user: new_resource.user)\n Chef::Log.debug(\"Removing defaults key: #{new_resource.key}\")\n CF::Preferences.set!(new_resource.key, nil, new_resource.domain, new_resource.user, new_resource.host)\n end\n end\n\n action_class do\n #\n # The command used to write or delete delete values from domains\n #\n # @return [Array] Array representation of defaults command to run\n #\n def defaults_modify_cmd\n cmd = [\"/usr/bin/defaults\"]\n\n if new_resource.host == :current\n cmd.concat([\"-currentHost\"])\n elsif new_resource.host # they specified a non-nil value, which is a hostname\n cmd.concat([\"-host\", new_resource.host])\n end\n\n cmd.concat([action.to_s, new_resource.domain, new_resource.key])\n cmd.concat(processed_value) if action == :write\n cmd.prepend(\"sudo\") if new_resource.sudo\n cmd\n end\n def get_preference(new_resource)\n CF::Preferences.get(new_resource.key, new_resource.domain, new_resource.user, new_resource.host)\n end\n\n #\n # convert the provided value into the format defaults expects\n #\n # @return [array] array of values starting with the type if applicable\n #\n def processed_value\n type = new_resource.type || value_type(new_resource.value)\n\n # when dict this creates an array of values [\"Key1\", \"Value1\", \"Key2\", \"Value2\" ...]\n cmd_values = [\"-#{type}\"]\n\n case type\n when \"dict\"\n cmd_values.concat(new_resource.value.flatten)\n when \"array\"\n cmd_values.concat(new_resource.value)\n when \"bool\"\n cmd_values.concat(bool_to_defaults_bool(new_resource.value))\n else\n cmd_values.concat([new_resource.value])\n end\n\n cmd_values\n end\n def set_preference(new_resource)\n CF::Preferences.set!(new_resource.key, new_resource.value, new_resource.domain, new_resource.user, new_resource.host)\n end", + "repo_full_name": "chef/chef", + "discussion_comments": [ + { + "comment_id": "718895310", + "repo_full_name": "chef/chef", + "pr_number": 11898, + "pr_file": "lib/chef/resource/macos_userdefaults.rb", + "discussion_id": "718895310", + "commented_code": "@@ -78,173 +79,88 @@ class MacosUserDefaults < Chef::Resource\n required: true\n \n property :host, [String, Symbol],\n- description: \"Set either :current or a hostname to set the user default at the host level.\",\n+ description: \"Set either :current, :all or a hostname to set the user default at the host level.\",\n desired_state: false,\n- introduced: \"16.3\"\n+ introduced: \"16.3\",\n+ coerce: proc { |value| to_cf_host(value) }\n \n property :value, [Integer, Float, String, TrueClass, FalseClass, Hash, Array],\n description: \"The value of the key. Note: With the `type` property set to `bool`, `String` forms of Boolean true/false values that Apple accepts in the defaults command will be coerced: 0/1, 'TRUE'/'FALSE,' 'true'/false', 'YES'/'NO', or 'yes'/'no'.\",\n- required: [:write],\n- coerce: proc { |v| v.is_a?(Hash) ? v.transform_keys(&:to_s) : v } # make sure keys are all strings for comparison\n+ required: [:write]\n \n property :type, String,\n description: \"The value type of the preference key.\",\n equal_to: %w{bool string int float array dict},\n- desired_state: false\n+ desired_state: false,\n+ deprecated: true\n \n- property :user, String,\n- description: \"The system user that the default will be applied to.\",\n- desired_state: false\n+ property :user, [String, Symbol],\n+ description: \"The system user that the default will be applied to. Set :current for current user, :all for all users or pass a valid username\",\n+ desired_state: false,\n+ coerce: proc { |value| to_cf_user(value) }\n \n property :sudo, [TrueClass, FalseClass],\n description: \"Set to true if the setting you wish to modify requires privileged access. This requires passwordless sudo for the `/usr/bin/defaults` command to be setup for the user running #{ChefUtils::Dist::Infra::PRODUCT}.\",\n default: false,\n- desired_state: false\n+ desired_state: false,\n+ deprecated: true\n \n load_current_value do |new_resource|\n- Chef::Log.debug \"#load_current_value: shelling out \\\"#{defaults_export_cmd(new_resource).join(\" \")}\\\" to determine state\"\n- state = shell_out(defaults_export_cmd(new_resource), user: new_resource.user)\n+ Chef::Log.debug \"#load_current_value: attempting to read \\\"#{new_resource.domain}\\\" value from preferences to determine state\"\n \n- if state.error? || state.stdout.empty?\n- Chef::Log.debug \"#load_current_value: #{defaults_export_cmd(new_resource).join(\" \")} returned stdout: #{state.stdout} and stderr: #{state.stderr}\"\n- current_value_does_not_exist!\n- end\n+ pref = get_preference(new_resource)\n+ current_value_does_not_exist! if pref.nil?\n \n- plist_data = ::Plist.parse_xml(state.stdout)\n-\n- # handle the situation where the key doesn't exist in the domain\n- if plist_data.key?(new_resource.key)\n- key new_resource.key\n- else\n- current_value_does_not_exist!\n- end\n-\n- value plist_data[new_resource.key]\n- end\n-\n- #\n- # The defaults command to export a domain\n- #\n- # @return [Array] defaults command\n- #\n- def defaults_export_cmd(resource)\n- state_cmd = [\"/usr/bin/defaults\"]\n-\n- if resource.host == \"current\"\n- state_cmd.concat([\"-currentHost\"])\n- elsif resource.host # they specified a non-nil value, which is a hostname\n- state_cmd.concat([\"-host\", resource.host])\n- end\n-\n- state_cmd.concat([\"export\", resource.domain, \"-\"])\n- state_cmd\n+ key new_resource.key\n+ value pref\n end\n \n action :write, description: \"Write the value to the specified domain/key.\" do\n converge_if_changed do\n- cmd = defaults_modify_cmd\n- Chef::Log.debug(\"Updating defaults value by shelling out: #{cmd.join(\" \")}\")\n-\n- shell_out!(cmd, user: new_resource.user)\n+ Chef::Log.debug(\"Updating defaults value for #{new_resource.key} in #{new_resource.domain}\")\n+ set_preference(new_resource)\n end\n end\n \n action :delete, description: \"Delete a key from a domain.\" do\n # if it's not there there's nothing to remove\n- return unless current_resource\n+ return if current_resource.nil?\n \n converge_by(\"delete domain:#{new_resource.domain} key:#{new_resource.key}\") do\n-\n- cmd = defaults_modify_cmd\n- Chef::Log.debug(\"Removing defaults key by shelling out: #{cmd.join(\" \")}\")\n-\n- shell_out!(cmd, user: new_resource.user)\n+ Chef::Log.debug(\"Removing defaults key: #{new_resource.key}\")\n+ CF::Preferences.set!(new_resource.key, nil, new_resource.domain, new_resource.user, new_resource.host)\n end\n end\n \n- action_class do\n- #\n- # The command used to write or delete delete values from domains\n- #\n- # @return [Array] Array representation of defaults command to run\n- #\n- def defaults_modify_cmd\n- cmd = [\"/usr/bin/defaults\"]\n-\n- if new_resource.host == :current\n- cmd.concat([\"-currentHost\"])\n- elsif new_resource.host # they specified a non-nil value, which is a hostname\n- cmd.concat([\"-host\", new_resource.host])\n- end\n-\n- cmd.concat([action.to_s, new_resource.domain, new_resource.key])\n- cmd.concat(processed_value) if action == :write\n- cmd.prepend(\"sudo\") if new_resource.sudo\n- cmd\n- end\n+ def get_preference(new_resource)\n+ CF::Preferences.get(new_resource.key, new_resource.domain, new_resource.user, new_resource.host)\n+ end\n \n- #\n- # convert the provided value into the format defaults expects\n- #\n- # @return [array] array of values starting with the type if applicable\n- #\n- def processed_value\n- type = new_resource.type || value_type(new_resource.value)\n-\n- # when dict this creates an array of values [\"Key1\", \"Value1\", \"Key2\", \"Value2\" ...]\n- cmd_values = [\"-#{type}\"]\n-\n- case type\n- when \"dict\"\n- cmd_values.concat(new_resource.value.flatten)\n- when \"array\"\n- cmd_values.concat(new_resource.value)\n- when \"bool\"\n- cmd_values.concat(bool_to_defaults_bool(new_resource.value))\n- else\n- cmd_values.concat([new_resource.value])\n- end\n-\n- cmd_values\n- end\n+ def set_preference(new_resource)\n+ CF::Preferences.set!(new_resource.key, new_resource.value, new_resource.domain, new_resource.user, new_resource.host)\n+ end", + "comment_created_at": "2021-09-29T21:23:01+00:00", + "comment_author": "lamont-granquist", + "comment_body": "don't think this method is getting used (might be causing some confusion over the action_class block)", + "pr_file_module": null + }, + { + "comment_id": "719066530", + "repo_full_name": "chef/chef", + "pr_number": 11898, + "pr_file": "lib/chef/resource/macos_userdefaults.rb", + "discussion_id": "718895310", + "commented_code": "@@ -78,173 +79,88 @@ class MacosUserDefaults < Chef::Resource\n required: true\n \n property :host, [String, Symbol],\n- description: \"Set either :current or a hostname to set the user default at the host level.\",\n+ description: \"Set either :current, :all or a hostname to set the user default at the host level.\",\n desired_state: false,\n- introduced: \"16.3\"\n+ introduced: \"16.3\",\n+ coerce: proc { |value| to_cf_host(value) }\n \n property :value, [Integer, Float, String, TrueClass, FalseClass, Hash, Array],\n description: \"The value of the key. Note: With the `type` property set to `bool`, `String` forms of Boolean true/false values that Apple accepts in the defaults command will be coerced: 0/1, 'TRUE'/'FALSE,' 'true'/false', 'YES'/'NO', or 'yes'/'no'.\",\n- required: [:write],\n- coerce: proc { |v| v.is_a?(Hash) ? v.transform_keys(&:to_s) : v } # make sure keys are all strings for comparison\n+ required: [:write]\n \n property :type, String,\n description: \"The value type of the preference key.\",\n equal_to: %w{bool string int float array dict},\n- desired_state: false\n+ desired_state: false,\n+ deprecated: true\n \n- property :user, String,\n- description: \"The system user that the default will be applied to.\",\n- desired_state: false\n+ property :user, [String, Symbol],\n+ description: \"The system user that the default will be applied to. Set :current for current user, :all for all users or pass a valid username\",\n+ desired_state: false,\n+ coerce: proc { |value| to_cf_user(value) }\n \n property :sudo, [TrueClass, FalseClass],\n description: \"Set to true if the setting you wish to modify requires privileged access. This requires passwordless sudo for the `/usr/bin/defaults` command to be setup for the user running #{ChefUtils::Dist::Infra::PRODUCT}.\",\n default: false,\n- desired_state: false\n+ desired_state: false,\n+ deprecated: true\n \n load_current_value do |new_resource|\n- Chef::Log.debug \"#load_current_value: shelling out \\\"#{defaults_export_cmd(new_resource).join(\" \")}\\\" to determine state\"\n- state = shell_out(defaults_export_cmd(new_resource), user: new_resource.user)\n+ Chef::Log.debug \"#load_current_value: attempting to read \\\"#{new_resource.domain}\\\" value from preferences to determine state\"\n \n- if state.error? || state.stdout.empty?\n- Chef::Log.debug \"#load_current_value: #{defaults_export_cmd(new_resource).join(\" \")} returned stdout: #{state.stdout} and stderr: #{state.stderr}\"\n- current_value_does_not_exist!\n- end\n+ pref = get_preference(new_resource)\n+ current_value_does_not_exist! if pref.nil?\n \n- plist_data = ::Plist.parse_xml(state.stdout)\n-\n- # handle the situation where the key doesn't exist in the domain\n- if plist_data.key?(new_resource.key)\n- key new_resource.key\n- else\n- current_value_does_not_exist!\n- end\n-\n- value plist_data[new_resource.key]\n- end\n-\n- #\n- # The defaults command to export a domain\n- #\n- # @return [Array] defaults command\n- #\n- def defaults_export_cmd(resource)\n- state_cmd = [\"/usr/bin/defaults\"]\n-\n- if resource.host == \"current\"\n- state_cmd.concat([\"-currentHost\"])\n- elsif resource.host # they specified a non-nil value, which is a hostname\n- state_cmd.concat([\"-host\", resource.host])\n- end\n-\n- state_cmd.concat([\"export\", resource.domain, \"-\"])\n- state_cmd\n+ key new_resource.key\n+ value pref\n end\n \n action :write, description: \"Write the value to the specified domain/key.\" do\n converge_if_changed do\n- cmd = defaults_modify_cmd\n- Chef::Log.debug(\"Updating defaults value by shelling out: #{cmd.join(\" \")}\")\n-\n- shell_out!(cmd, user: new_resource.user)\n+ Chef::Log.debug(\"Updating defaults value for #{new_resource.key} in #{new_resource.domain}\")\n+ set_preference(new_resource)\n end\n end\n \n action :delete, description: \"Delete a key from a domain.\" do\n # if it's not there there's nothing to remove\n- return unless current_resource\n+ return if current_resource.nil?\n \n converge_by(\"delete domain:#{new_resource.domain} key:#{new_resource.key}\") do\n-\n- cmd = defaults_modify_cmd\n- Chef::Log.debug(\"Removing defaults key by shelling out: #{cmd.join(\" \")}\")\n-\n- shell_out!(cmd, user: new_resource.user)\n+ Chef::Log.debug(\"Removing defaults key: #{new_resource.key}\")\n+ CF::Preferences.set!(new_resource.key, nil, new_resource.domain, new_resource.user, new_resource.host)\n end\n end\n \n- action_class do\n- #\n- # The command used to write or delete delete values from domains\n- #\n- # @return [Array] Array representation of defaults command to run\n- #\n- def defaults_modify_cmd\n- cmd = [\"/usr/bin/defaults\"]\n-\n- if new_resource.host == :current\n- cmd.concat([\"-currentHost\"])\n- elsif new_resource.host # they specified a non-nil value, which is a hostname\n- cmd.concat([\"-host\", new_resource.host])\n- end\n-\n- cmd.concat([action.to_s, new_resource.domain, new_resource.key])\n- cmd.concat(processed_value) if action == :write\n- cmd.prepend(\"sudo\") if new_resource.sudo\n- cmd\n- end\n+ def get_preference(new_resource)\n+ CF::Preferences.get(new_resource.key, new_resource.domain, new_resource.user, new_resource.host)\n+ end\n \n- #\n- # convert the provided value into the format defaults expects\n- #\n- # @return [array] array of values starting with the type if applicable\n- #\n- def processed_value\n- type = new_resource.type || value_type(new_resource.value)\n-\n- # when dict this creates an array of values [\"Key1\", \"Value1\", \"Key2\", \"Value2\" ...]\n- cmd_values = [\"-#{type}\"]\n-\n- case type\n- when \"dict\"\n- cmd_values.concat(new_resource.value.flatten)\n- when \"array\"\n- cmd_values.concat(new_resource.value)\n- when \"bool\"\n- cmd_values.concat(bool_to_defaults_bool(new_resource.value))\n- else\n- cmd_values.concat([new_resource.value])\n- end\n-\n- cmd_values\n- end\n+ def set_preference(new_resource)\n+ CF::Preferences.set!(new_resource.key, new_resource.value, new_resource.domain, new_resource.user, new_resource.host)\n+ end", + "comment_created_at": "2021-09-30T05:22:36+00:00", + "comment_author": "rishichawda", + "comment_body": "oh yeah I might have overlooked this after I moved the call directly inside the action.. will remove this now.. Thank you! ", + "pr_file_module": null + }, + { + "comment_id": "719069595", + "repo_full_name": "chef/chef", + "pr_number": 11898, + "pr_file": "lib/chef/resource/macos_userdefaults.rb", + "discussion_id": "718895310", + "commented_code": "@@ -78,173 +79,88 @@ class MacosUserDefaults < Chef::Resource\n required: true\n \n property :host, [String, Symbol],\n- description: \"Set either :current or a hostname to set the user default at the host level.\",\n+ description: \"Set either :current, :all or a hostname to set the user default at the host level.\",\n desired_state: false,\n- introduced: \"16.3\"\n+ introduced: \"16.3\",\n+ coerce: proc { |value| to_cf_host(value) }\n \n property :value, [Integer, Float, String, TrueClass, FalseClass, Hash, Array],\n description: \"The value of the key. Note: With the `type` property set to `bool`, `String` forms of Boolean true/false values that Apple accepts in the defaults command will be coerced: 0/1, 'TRUE'/'FALSE,' 'true'/false', 'YES'/'NO', or 'yes'/'no'.\",\n- required: [:write],\n- coerce: proc { |v| v.is_a?(Hash) ? v.transform_keys(&:to_s) : v } # make sure keys are all strings for comparison\n+ required: [:write]\n \n property :type, String,\n description: \"The value type of the preference key.\",\n equal_to: %w{bool string int float array dict},\n- desired_state: false\n+ desired_state: false,\n+ deprecated: true\n \n- property :user, String,\n- description: \"The system user that the default will be applied to.\",\n- desired_state: false\n+ property :user, [String, Symbol],\n+ description: \"The system user that the default will be applied to. Set :current for current user, :all for all users or pass a valid username\",\n+ desired_state: false,\n+ coerce: proc { |value| to_cf_user(value) }\n \n property :sudo, [TrueClass, FalseClass],\n description: \"Set to true if the setting you wish to modify requires privileged access. This requires passwordless sudo for the `/usr/bin/defaults` command to be setup for the user running #{ChefUtils::Dist::Infra::PRODUCT}.\",\n default: false,\n- desired_state: false\n+ desired_state: false,\n+ deprecated: true\n \n load_current_value do |new_resource|\n- Chef::Log.debug \"#load_current_value: shelling out \\\"#{defaults_export_cmd(new_resource).join(\" \")}\\\" to determine state\"\n- state = shell_out(defaults_export_cmd(new_resource), user: new_resource.user)\n+ Chef::Log.debug \"#load_current_value: attempting to read \\\"#{new_resource.domain}\\\" value from preferences to determine state\"\n \n- if state.error? || state.stdout.empty?\n- Chef::Log.debug \"#load_current_value: #{defaults_export_cmd(new_resource).join(\" \")} returned stdout: #{state.stdout} and stderr: #{state.stderr}\"\n- current_value_does_not_exist!\n- end\n+ pref = get_preference(new_resource)\n+ current_value_does_not_exist! if pref.nil?\n \n- plist_data = ::Plist.parse_xml(state.stdout)\n-\n- # handle the situation where the key doesn't exist in the domain\n- if plist_data.key?(new_resource.key)\n- key new_resource.key\n- else\n- current_value_does_not_exist!\n- end\n-\n- value plist_data[new_resource.key]\n- end\n-\n- #\n- # The defaults command to export a domain\n- #\n- # @return [Array] defaults command\n- #\n- def defaults_export_cmd(resource)\n- state_cmd = [\"/usr/bin/defaults\"]\n-\n- if resource.host == \"current\"\n- state_cmd.concat([\"-currentHost\"])\n- elsif resource.host # they specified a non-nil value, which is a hostname\n- state_cmd.concat([\"-host\", resource.host])\n- end\n-\n- state_cmd.concat([\"export\", resource.domain, \"-\"])\n- state_cmd\n+ key new_resource.key\n+ value pref\n end\n \n action :write, description: \"Write the value to the specified domain/key.\" do\n converge_if_changed do\n- cmd = defaults_modify_cmd\n- Chef::Log.debug(\"Updating defaults value by shelling out: #{cmd.join(\" \")}\")\n-\n- shell_out!(cmd, user: new_resource.user)\n+ Chef::Log.debug(\"Updating defaults value for #{new_resource.key} in #{new_resource.domain}\")\n+ set_preference(new_resource)\n end\n end\n \n action :delete, description: \"Delete a key from a domain.\" do\n # if it's not there there's nothing to remove\n- return unless current_resource\n+ return if current_resource.nil?\n \n converge_by(\"delete domain:#{new_resource.domain} key:#{new_resource.key}\") do\n-\n- cmd = defaults_modify_cmd\n- Chef::Log.debug(\"Removing defaults key by shelling out: #{cmd.join(\" \")}\")\n-\n- shell_out!(cmd, user: new_resource.user)\n+ Chef::Log.debug(\"Removing defaults key: #{new_resource.key}\")\n+ CF::Preferences.set!(new_resource.key, nil, new_resource.domain, new_resource.user, new_resource.host)\n end\n end\n \n- action_class do\n- #\n- # The command used to write or delete delete values from domains\n- #\n- # @return [Array] Array representation of defaults command to run\n- #\n- def defaults_modify_cmd\n- cmd = [\"/usr/bin/defaults\"]\n-\n- if new_resource.host == :current\n- cmd.concat([\"-currentHost\"])\n- elsif new_resource.host # they specified a non-nil value, which is a hostname\n- cmd.concat([\"-host\", new_resource.host])\n- end\n-\n- cmd.concat([action.to_s, new_resource.domain, new_resource.key])\n- cmd.concat(processed_value) if action == :write\n- cmd.prepend(\"sudo\") if new_resource.sudo\n- cmd\n- end\n+ def get_preference(new_resource)\n+ CF::Preferences.get(new_resource.key, new_resource.domain, new_resource.user, new_resource.host)\n+ end\n \n- #\n- # convert the provided value into the format defaults expects\n- #\n- # @return [array] array of values starting with the type if applicable\n- #\n- def processed_value\n- type = new_resource.type || value_type(new_resource.value)\n-\n- # when dict this creates an array of values [\"Key1\", \"Value1\", \"Key2\", \"Value2\" ...]\n- cmd_values = [\"-#{type}\"]\n-\n- case type\n- when \"dict\"\n- cmd_values.concat(new_resource.value.flatten)\n- when \"array\"\n- cmd_values.concat(new_resource.value)\n- when \"bool\"\n- cmd_values.concat(bool_to_defaults_bool(new_resource.value))\n- else\n- cmd_values.concat([new_resource.value])\n- end\n-\n- cmd_values\n- end\n+ def set_preference(new_resource)\n+ CF::Preferences.set!(new_resource.key, new_resource.value, new_resource.domain, new_resource.user, new_resource.host)\n+ end", + "comment_created_at": "2021-09-30T05:30:21+00:00", + "comment_author": "rishichawda", + "comment_body": "the `:write` action was using this function but `:delete` was using the `.set!` method from `CF` directly. Changed the `:write` to also use `set!` method in https://github.com/chef/chef/pull/11898/commits/858cc2258c6b0ad8add05c3dd011c986164b90e2. ", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "688705114", + "pr_number": 11907, + "pr_file": "lib/chef/resource/archive_file.rb", + "created_at": "2021-08-13T18:28:12+00:00", + "commented_code": "converge_by(\"set owner of files extracted in #{new_resource.destination} to #{new_resource.owner}:#{new_resource.group}\") do\n archive = Archive::Reader.open_filename(new_resource.path)\n archive.each_entry do |e|\n FileUtils.chown(new_resource.owner, new_resource.group, \"#{new_resource.destination}/#{e.pathname}\")\n file_dest = strip_components_from_path(e.pathname)", + "repo_full_name": "chef/chef", + "discussion_comments": [ + { + "comment_id": "688705114", + "repo_full_name": "chef/chef", + "pr_number": 11907, + "pr_file": "lib/chef/resource/archive_file.rb", + "discussion_id": "688705114", + "commented_code": "@@ -119,7 +123,8 @@ class ArchiveFile < Chef::Resource\n converge_by(\"set owner of files extracted in #{new_resource.destination} to #{new_resource.owner}:#{new_resource.group}\") do\n archive = Archive::Reader.open_filename(new_resource.path)\n archive.each_entry do |e|\n- FileUtils.chown(new_resource.owner, new_resource.group, \"#{new_resource.destination}/#{e.pathname}\")\n+ file_dest = strip_components_from_path(e.pathname)", + "comment_created_at": "2021-08-13T18:28:12+00:00", + "comment_author": "jasonwbarnett", + "comment_body": "This seems to be repeated 3 times in this resource which is screaming for some sort of abstraction. \"This\" being stripping the component from the path and then a guard clause (if/unless) to do something. Thinking a small class responsible for taking in an artifact and both stripping components and then returning a list of files that are applicable. Anyhow, I need to think about it more and I don't think it should hold this up given the unit test coverage but just wanted to dump what was in my brain.", + "pr_file_module": null + }, + { + "comment_id": "689730505", + "repo_full_name": "chef/chef", + "pr_number": 11907, + "pr_file": "lib/chef/resource/archive_file.rb", + "discussion_id": "688705114", + "commented_code": "@@ -119,7 +123,8 @@ class ArchiveFile < Chef::Resource\n converge_by(\"set owner of files extracted in #{new_resource.destination} to #{new_resource.owner}:#{new_resource.group}\") do\n archive = Archive::Reader.open_filename(new_resource.path)\n archive.each_entry do |e|\n- FileUtils.chown(new_resource.owner, new_resource.group, \"#{new_resource.destination}/#{e.pathname}\")\n+ file_dest = strip_components_from_path(e.pathname)", + "comment_created_at": "2021-08-16T17:36:17+00:00", + "comment_author": "lamont-granquist", + "comment_body": "It looks to me like this functionality should go into `archive.each_entry(strip_components: new_resource.strip_components)` and the work should get done in ffi-libarchive and/or libarchive directly (no idea if libarchive supports that or not)", + "pr_file_module": null + }, + { + "comment_id": "689784245", + "repo_full_name": "chef/chef", + "pr_number": 11907, + "pr_file": "lib/chef/resource/archive_file.rb", + "discussion_id": "688705114", + "commented_code": "@@ -119,7 +123,8 @@ class ArchiveFile < Chef::Resource\n converge_by(\"set owner of files extracted in #{new_resource.destination} to #{new_resource.owner}:#{new_resource.group}\") do\n archive = Archive::Reader.open_filename(new_resource.path)\n archive.each_entry do |e|\n- FileUtils.chown(new_resource.owner, new_resource.group, \"#{new_resource.destination}/#{e.pathname}\")\n+ file_dest = strip_components_from_path(e.pathname)", + "comment_created_at": "2021-08-16T18:59:48+00:00", + "comment_author": "jasonwbarnett", + "comment_body": "Completely agree with that in a pure OO sense, but I didn't know if that was violating any other principles for FFI specifically. Are you good with a PR to extend functionality on top of libarchive?", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/chef-extract-and-organize-methods.md b/_reviewers/chef-extract-and-organize-methods.md index 381025c..bbfe2a6 100644 --- a/_reviewers/chef-extract-and-organize-methods.md +++ b/_reviewers/chef-extract-and-organize-methods.md @@ -59,291 +59,3 @@ end ``` This approach reduces cognitive load, improves reusability, and makes code easier to understand and modify. - - -[ - { - "discussion_id": "1377760869", - "pr_number": 14052, - "pr_file": "lib/chef/provider/powershell_script.rb", - "created_at": "2023-10-31T15:17:16+00:00", - "commented_code": "# executed, otherwise 0 or 1 based on whether $? is set to true\n # (success, where we return 0) or false (where we return 1).\n def wrapper_script\n <<~EOH\n # The script looks very slightly different with inline powershell since", - "repo_full_name": "chef/chef", - "discussion_comments": [ - { - "comment_id": "1377760869", - "repo_full_name": "chef/chef", - "pr_number": 14052, - "pr_file": "lib/chef/provider/powershell_script.rb", - "discussion_id": "1377760869", - "commented_code": "@@ -130,7 +193,78 @@ def validate_script_syntax!\n # executed, otherwise 0 or 1 based on whether $? is set to true\n # (success, where we return 0) or false (where we return 1).\n def wrapper_script\n- <<~EOH\n+ # The script looks very slightly different with inline powershell since", - "comment_created_at": "2023-10-31T15:17:16+00:00", - "comment_author": "tpowell-progress", - "comment_body": "Can we extract these two options into their own methods? Maybe something like `inline_powershell_wrapper...` and `shell_out_wrapper...`? The method is a bit long to start with, so the if/else is harder to read due to the length.", - "pr_file_module": null - }, - { - "comment_id": "1377894471", - "repo_full_name": "chef/chef", - "pr_number": 14052, - "pr_file": "lib/chef/provider/powershell_script.rb", - "discussion_id": "1377760869", - "commented_code": "@@ -130,7 +193,78 @@ def validate_script_syntax!\n # executed, otherwise 0 or 1 based on whether $? is set to true\n # (success, where we return 0) or false (where we return 1).\n def wrapper_script\n- <<~EOH\n+ # The script looks very slightly different with inline powershell since", - "comment_created_at": "2023-10-31T16:49:14+00:00", - "comment_author": "jaymzjulian", - "comment_body": "absolutely can. Retested with both traditional and inline powershell to ensure that didn't break things", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1385630851", - "pr_number": 14052, - "pr_file": "lib/chef/provider/powershell_script.rb", - "created_at": "2023-11-07T21:53:27+00:00", - "commented_code": "# last process run in the script if it is the last command\n # executed, otherwise 0 or 1 based on whether $? is set to true\n # (success, where we return 0) or false (where we return 1).\n def wrapper_script\n def inline_powershell_wrapper_script", - "repo_full_name": "chef/chef", - "discussion_comments": [ - { - "comment_id": "1385630851", - "repo_full_name": "chef/chef", - "pr_number": 14052, - "pr_file": "lib/chef/provider/powershell_script.rb", - "discussion_id": "1385630851", - "commented_code": "@@ -129,7 +192,80 @@ def validate_script_syntax!\n # last process run in the script if it is the last command\n # executed, otherwise 0 or 1 based on whether $? is set to true\n # (success, where we return 0) or false (where we return 1).\n- def wrapper_script\n+ def inline_powershell_wrapper_script", - "comment_created_at": "2023-11-07T21:53:27+00:00", - "comment_author": "tpowell-progress", - "comment_body": "Thinking this can potentially be deduplicated by some sort of templating or builder logic so that the behavior stays consistent between them. h/t: @dafyddcrosby ", - "pr_file_module": null - }, - { - "comment_id": "1385691294", - "repo_full_name": "chef/chef", - "pr_number": 14052, - "pr_file": "lib/chef/provider/powershell_script.rb", - "discussion_id": "1385630851", - "commented_code": "@@ -129,7 +192,80 @@ def validate_script_syntax!\n # last process run in the script if it is the last command\n # executed, otherwise 0 or 1 based on whether $? is set to true\n # (success, where we return 0) or false (where we return 1).\n- def wrapper_script\n+ def inline_powershell_wrapper_script", - "comment_created_at": "2023-11-07T22:51:22+00:00", - "comment_author": "jaymzjulian", - "comment_body": "yeah, i think that's sane to do - i'll make that happen", - "pr_file_module": null - }, - { - "comment_id": "1393521546", - "repo_full_name": "chef/chef", - "pr_number": 14052, - "pr_file": "lib/chef/provider/powershell_script.rb", - "discussion_id": "1385630851", - "commented_code": "@@ -129,7 +192,80 @@ def validate_script_syntax!\n # last process run in the script if it is the last command\n # executed, otherwise 0 or 1 based on whether $? is set to true\n # (success, where we return 0) or false (where we return 1).\n- def wrapper_script\n+ def inline_powershell_wrapper_script", - "comment_created_at": "2023-11-15T01:32:53+00:00", - "comment_author": "jaymzjulian", - "comment_body": "In the latest push, this is simplified to a single function with the two changes as simple conditions at those line points, which seemed much easier to read to my eye.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1297860584", - "pr_number": 13873, - "pr_file": "lib/chef/provider/package/apt.rb", - "created_at": "2023-08-18T00:00:50+00:00", - "commented_code": "end\n\n def resolve_virtual_package_name(pkg)\n showpkg = run_noninteractive(\"apt-cache\", \"showpkg\", pkg).stdout\n # apt-cache considers package names as regex by default. The anchor flag will decide whether to match name exact string or not\n resolved_pkg = new_resource.anchor_package_name_patterns ? pkg : \"^#{pkg}$\"", - "repo_full_name": "chef/chef", - "discussion_comments": [ - { - "comment_id": "1297860584", - "repo_full_name": "chef/chef", - "pr_number": 13873, - "pr_file": "lib/chef/provider/package/apt.rb", - "discussion_id": "1297860584", - "commented_code": "@@ -216,7 +218,9 @@ def resolve_package_versions(pkg)\n end\n \n def resolve_virtual_package_name(pkg)\n- showpkg = run_noninteractive(\"apt-cache\", \"showpkg\", pkg).stdout\n+ # apt-cache considers package names as regex by default. The anchor flag will decide whether to match name exact string or not\n+ resolved_pkg = new_resource.anchor_package_name_patterns ? pkg : \"^#{pkg}$\"", - "comment_created_at": "2023-08-18T00:00:50+00:00", - "comment_author": "jaymzh", - "comment_body": "rather than repeat this line/logic over and over, please extract this to a method (and I\"m reversing the log here per my previous comment):\r\n\r\n```ruby\r\ndef resolved_package(pkg)\r\n new_resource.anchor_package_regex ? \"^#{pkg}$\" : pkg\r\nend\r\n```", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "755493907", - "pr_number": 11910, - "pr_file": "lib/chef/provider/package/apt.rb", - "created_at": "2021-11-23T20:51:09+00:00", - "commented_code": "}\n end\n\n # Helper to construct Hash of names-to-package-information.", - "repo_full_name": "chef/chef", - "discussion_comments": [ - { - "comment_id": "755493907", - "repo_full_name": "chef/chef", - "pr_number": 11910, - "pr_file": "lib/chef/provider/package/apt.rb", - "discussion_id": "755493907", - "commented_code": "@@ -255,6 +273,81 @@ def package_data_for(pkg)\n }\n end\n \n+ # Helper to construct Hash of names-to-package-information.", - "comment_created_at": "2021-11-23T20:51:09+00:00", - "comment_author": "btm", - "comment_body": "Everything from here down looks like its copied from `Package::Dpkg`. We don't want to duplicate that. We already have the shared `Package::Deb` mixin between the Dpkg and Apt providers that we'd want to refactor this into.", - "pr_file_module": null - }, - { - "comment_id": "758063083", - "repo_full_name": "chef/chef", - "pr_number": 11910, - "pr_file": "lib/chef/provider/package/apt.rb", - "discussion_id": "755493907", - "commented_code": "@@ -255,6 +273,81 @@ def package_data_for(pkg)\n }\n end\n \n+ # Helper to construct Hash of names-to-package-information.", - "comment_created_at": "2021-11-29T06:15:14+00:00", - "comment_author": "manick-vel-11", - "comment_body": "I understand that most of the implementation is being referred from dpkg package. I will make use of `Package::Deb` and will separate out commonly used code by having a mixin between dpkg and apt providers.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "755494818", - "pr_number": 11910, - "pr_file": "lib/chef/provider/package/apt.rb", - "created_at": "2021-11-23T20:52:41+00:00", - "commented_code": "}\n end\n\n # Helper to construct Hash of names-to-package-information.\n #\n # @return [Hash] Mapping of package names to package information\n def name_pkginfo\n @name_pkginfo ||=\n begin\n pkginfos = resolved_source_array.map do |src|\n logger.trace(\"#{new_resource} checking #{src} dpkg status\")\n status = shell_out(\"dpkg-deb\", \"-W\", src)\n status.stdout\n end\n Hash[*package_name_array.zip(pkginfos).flatten]\n end\n end\n\n def name_package_name\n @name_package_name ||= name_pkginfo.transform_values { |v| v ? v.split(\"\\t\")[0] : nil }\n end\n\n # Return package names from the candidate source file(s).\n #\n # @return [Array] Array of actual package names read from the source files\n def get_package_name\n package_name_array.map { |name| name_package_name[name] }\n end\n\n def read_current_version_of_package(package_name)\n logger.trace(\"#{new_resource} checking install state of #{package_name}\")\n status = shell_out!(\"dpkg\", \"-s\", package_name, returns: [0, 1])\n package_installed = false\n status.stdout.each_line do |line|\n case line\n when /^Status: deinstall ok config-files/.freeze", - "repo_full_name": "chef/chef", - "discussion_comments": [ - { - "comment_id": "755494818", - "repo_full_name": "chef/chef", - "pr_number": 11910, - "pr_file": "lib/chef/provider/package/apt.rb", - "discussion_id": "755494818", - "commented_code": "@@ -255,6 +273,81 @@ def package_data_for(pkg)\n }\n end\n \n+ # Helper to construct Hash of names-to-package-information.\n+ #\n+ # @return [Hash] Mapping of package names to package information\n+ def name_pkginfo\n+ @name_pkginfo ||=\n+ begin\n+ pkginfos = resolved_source_array.map do |src|\n+ logger.trace(\"#{new_resource} checking #{src} dpkg status\")\n+ status = shell_out(\"dpkg-deb\", \"-W\", src)\n+ status.stdout\n+ end\n+ Hash[*package_name_array.zip(pkginfos).flatten]\n+ end\n+ end\n+\n+ def name_package_name\n+ @name_package_name ||= name_pkginfo.transform_values { |v| v ? v.split(\"\\t\")[0] : nil }\n+ end\n+\n+ # Return package names from the candidate source file(s).\n+ #\n+ # @return [Array] Array of actual package names read from the source files\n+ def get_package_name\n+ package_name_array.map { |name| name_package_name[name] }\n+ end\n+\n+ def read_current_version_of_package(package_name)\n+ logger.trace(\"#{new_resource} checking install state of #{package_name}\")\n+ status = shell_out!(\"dpkg\", \"-s\", package_name, returns: [0, 1])\n+ package_installed = false\n+ status.stdout.each_line do |line|\n+ case line\n+ when /^Status: deinstall ok config-files/.freeze", - "comment_created_at": "2021-11-23T20:52:41+00:00", - "comment_author": "btm", - "comment_body": "We especially don't want to duplicate these regexes. The pattern we use for defining these over in https://github.com/chef/chef/blob/main/lib/chef/provider/package/dpkg.rb#L28-L30 is intentional. But duplicating these, if there was a bug we needed to fix in one provider we'd have to remember that we duplicated this code in another provider and fix the same regex. We'd likely forget that and miss the fix.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "792112012", - "pr_number": 12182, - "pr_file": "lib/chef/provider/package/zypper.rb", - "created_at": "2022-01-25T20:55:32+00:00", - "commented_code": "end\n\n def candidate_version\n @candidate_version ||= package_name_array.each_with_index.map { |pkg, i| available_version(i) }\n if source_files_exist?\n logger.trace(\"#{new_resource} checking rpm status\")\n shell_out!(\"rpm\", \"-qp\", \"--queryformat\", \"%{NAME} %{VERSION}-%{RELEASE}\\n\", new_resource.source).stdout.each_line do |line|\n case line\n when /^(\\S+)\\s(\\S+)$/\n current_resource.package_name($1)\n new_resource.version($2)\n @candidate_version = $2\n end\n end", - "repo_full_name": "chef/chef", - "discussion_comments": [ - { - "comment_id": "792112012", - "repo_full_name": "chef/chef", - "pr_number": 12182, - "pr_file": "lib/chef/provider/package/zypper.rb", - "discussion_id": "792112012", - "commented_code": "@@ -70,7 +71,20 @@ def get_current_versions\n end\n \n def candidate_version\n- @candidate_version ||= package_name_array.each_with_index.map { |pkg, i| available_version(i) }\n+ if source_files_exist?\n+ logger.trace(\"#{new_resource} checking rpm status\")\n+ shell_out!(\"rpm\", \"-qp\", \"--queryformat\", \"%{NAME} %{VERSION}-%{RELEASE}\\n\", new_resource.source).stdout.each_line do |line|\n+ case line\n+ when /^(\\S+)\\s(\\S+)$/\n+ current_resource.package_name($1)\n+ new_resource.version($2)\n+ @candidate_version = $2\n+ end\n+ end", - "comment_created_at": "2022-01-25T20:55:32+00:00", - "comment_author": "lamont-granquist", - "comment_body": "the shell_out and its parsing here should probably be extracted out to its own method, see `resolve_source_to_version_obj` in the dnf or yum providers (although you don't have a version obj in this case so probably `resolve_source_to_version`.", - "pr_file_module": null - }, - { - "comment_id": "792118493", - "repo_full_name": "chef/chef", - "pr_number": 12182, - "pr_file": "lib/chef/provider/package/zypper.rb", - "discussion_id": "792112012", - "commented_code": "@@ -70,7 +71,20 @@ def get_current_versions\n end\n \n def candidate_version\n- @candidate_version ||= package_name_array.each_with_index.map { |pkg, i| available_version(i) }\n+ if source_files_exist?\n+ logger.trace(\"#{new_resource} checking rpm status\")\n+ shell_out!(\"rpm\", \"-qp\", \"--queryformat\", \"%{NAME} %{VERSION}-%{RELEASE}\\n\", new_resource.source).stdout.each_line do |line|\n+ case line\n+ when /^(\\S+)\\s(\\S+)$/\n+ current_resource.package_name($1)\n+ new_resource.version($2)\n+ @candidate_version = $2\n+ end\n+ end", - "comment_created_at": "2022-01-25T21:04:59+00:00", - "comment_author": "lamont-granquist", - "comment_body": "(in general the dnf provider is a reasonably good template for how the code change should look)", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "718895310", - "pr_number": 11898, - "pr_file": "lib/chef/resource/macos_userdefaults.rb", - "created_at": "2021-09-29T21:23:01+00:00", - "commented_code": "required: true\n\n property :host, [String, Symbol],\n description: \"Set either :current or a hostname to set the user default at the host level.\",\n description: \"Set either :current, :all or a hostname to set the user default at the host level.\",\n desired_state: false,\n introduced: \"16.3\"\n introduced: \"16.3\",\n coerce: proc { |value| to_cf_host(value) }\n\n property :value, [Integer, Float, String, TrueClass, FalseClass, Hash, Array],\n description: \"The value of the key. Note: With the `type` property set to `bool`, `String` forms of Boolean true/false values that Apple accepts in the defaults command will be coerced: 0/1, 'TRUE'/'FALSE,' 'true'/false', 'YES'/'NO', or 'yes'/'no'.\",\n required: [:write],\n coerce: proc { |v| v.is_a?(Hash) ? v.transform_keys(&:to_s) : v } # make sure keys are all strings for comparison\n required: [:write]\n\n property :type, String,\n description: \"The value type of the preference key.\",\n equal_to: %w{bool string int float array dict},\n desired_state: false\n desired_state: false,\n deprecated: true\n\n property :user, String,\n description: \"The system user that the default will be applied to.\",\n desired_state: false\n property :user, [String, Symbol],\n description: \"The system user that the default will be applied to. Set :current for current user, :all for all users or pass a valid username\",\n desired_state: false,\n coerce: proc { |value| to_cf_user(value) }\n\n property :sudo, [TrueClass, FalseClass],\n description: \"Set to true if the setting you wish to modify requires privileged access. This requires passwordless sudo for the `/usr/bin/defaults` command to be setup for the user running #{ChefUtils::Dist::Infra::PRODUCT}.\",\n default: false,\n desired_state: false\n desired_state: false,\n deprecated: true\n\n load_current_value do |new_resource|\n Chef::Log.debug \"#load_current_value: shelling out \\\"#{defaults_export_cmd(new_resource).join(\" \")}\\\" to determine state\"\n state = shell_out(defaults_export_cmd(new_resource), user: new_resource.user)\n Chef::Log.debug \"#load_current_value: attempting to read \\\"#{new_resource.domain}\\\" value from preferences to determine state\"\n\n if state.error? || state.stdout.empty?\n Chef::Log.debug \"#load_current_value: #{defaults_export_cmd(new_resource).join(\" \")} returned stdout: #{state.stdout} and stderr: #{state.stderr}\"\n current_value_does_not_exist!\n end\n pref = get_preference(new_resource)\n current_value_does_not_exist! if pref.nil?\n\n plist_data = ::Plist.parse_xml(state.stdout)\n\n # handle the situation where the key doesn't exist in the domain\n if plist_data.key?(new_resource.key)\n key new_resource.key\n else\n current_value_does_not_exist!\n end\n\n value plist_data[new_resource.key]\n end\n\n #\n # The defaults command to export a domain\n #\n # @return [Array] defaults command\n #\n def defaults_export_cmd(resource)\n state_cmd = [\"/usr/bin/defaults\"]\n\n if resource.host == \"current\"\n state_cmd.concat([\"-currentHost\"])\n elsif resource.host # they specified a non-nil value, which is a hostname\n state_cmd.concat([\"-host\", resource.host])\n end\n\n state_cmd.concat([\"export\", resource.domain, \"-\"])\n state_cmd\n key new_resource.key\n value pref\n end\n\n action :write, description: \"Write the value to the specified domain/key.\" do\n converge_if_changed do\n cmd = defaults_modify_cmd\n Chef::Log.debug(\"Updating defaults value by shelling out: #{cmd.join(\" \")}\")\n\n shell_out!(cmd, user: new_resource.user)\n Chef::Log.debug(\"Updating defaults value for #{new_resource.key} in #{new_resource.domain}\")\n set_preference(new_resource)\n end\n end\n\n action :delete, description: \"Delete a key from a domain.\" do\n # if it's not there there's nothing to remove\n return unless current_resource\n return if current_resource.nil?\n\n converge_by(\"delete domain:#{new_resource.domain} key:#{new_resource.key}\") do\n\n cmd = defaults_modify_cmd\n Chef::Log.debug(\"Removing defaults key by shelling out: #{cmd.join(\" \")}\")\n\n shell_out!(cmd, user: new_resource.user)\n Chef::Log.debug(\"Removing defaults key: #{new_resource.key}\")\n CF::Preferences.set!(new_resource.key, nil, new_resource.domain, new_resource.user, new_resource.host)\n end\n end\n\n action_class do\n #\n # The command used to write or delete delete values from domains\n #\n # @return [Array] Array representation of defaults command to run\n #\n def defaults_modify_cmd\n cmd = [\"/usr/bin/defaults\"]\n\n if new_resource.host == :current\n cmd.concat([\"-currentHost\"])\n elsif new_resource.host # they specified a non-nil value, which is a hostname\n cmd.concat([\"-host\", new_resource.host])\n end\n\n cmd.concat([action.to_s, new_resource.domain, new_resource.key])\n cmd.concat(processed_value) if action == :write\n cmd.prepend(\"sudo\") if new_resource.sudo\n cmd\n end\n def get_preference(new_resource)\n CF::Preferences.get(new_resource.key, new_resource.domain, new_resource.user, new_resource.host)\n end\n\n #\n # convert the provided value into the format defaults expects\n #\n # @return [array] array of values starting with the type if applicable\n #\n def processed_value\n type = new_resource.type || value_type(new_resource.value)\n\n # when dict this creates an array of values [\"Key1\", \"Value1\", \"Key2\", \"Value2\" ...]\n cmd_values = [\"-#{type}\"]\n\n case type\n when \"dict\"\n cmd_values.concat(new_resource.value.flatten)\n when \"array\"\n cmd_values.concat(new_resource.value)\n when \"bool\"\n cmd_values.concat(bool_to_defaults_bool(new_resource.value))\n else\n cmd_values.concat([new_resource.value])\n end\n\n cmd_values\n end\n def set_preference(new_resource)\n CF::Preferences.set!(new_resource.key, new_resource.value, new_resource.domain, new_resource.user, new_resource.host)\n end", - "repo_full_name": "chef/chef", - "discussion_comments": [ - { - "comment_id": "718895310", - "repo_full_name": "chef/chef", - "pr_number": 11898, - "pr_file": "lib/chef/resource/macos_userdefaults.rb", - "discussion_id": "718895310", - "commented_code": "@@ -78,173 +79,88 @@ class MacosUserDefaults < Chef::Resource\n required: true\n \n property :host, [String, Symbol],\n- description: \"Set either :current or a hostname to set the user default at the host level.\",\n+ description: \"Set either :current, :all or a hostname to set the user default at the host level.\",\n desired_state: false,\n- introduced: \"16.3\"\n+ introduced: \"16.3\",\n+ coerce: proc { |value| to_cf_host(value) }\n \n property :value, [Integer, Float, String, TrueClass, FalseClass, Hash, Array],\n description: \"The value of the key. Note: With the `type` property set to `bool`, `String` forms of Boolean true/false values that Apple accepts in the defaults command will be coerced: 0/1, 'TRUE'/'FALSE,' 'true'/false', 'YES'/'NO', or 'yes'/'no'.\",\n- required: [:write],\n- coerce: proc { |v| v.is_a?(Hash) ? v.transform_keys(&:to_s) : v } # make sure keys are all strings for comparison\n+ required: [:write]\n \n property :type, String,\n description: \"The value type of the preference key.\",\n equal_to: %w{bool string int float array dict},\n- desired_state: false\n+ desired_state: false,\n+ deprecated: true\n \n- property :user, String,\n- description: \"The system user that the default will be applied to.\",\n- desired_state: false\n+ property :user, [String, Symbol],\n+ description: \"The system user that the default will be applied to. Set :current for current user, :all for all users or pass a valid username\",\n+ desired_state: false,\n+ coerce: proc { |value| to_cf_user(value) }\n \n property :sudo, [TrueClass, FalseClass],\n description: \"Set to true if the setting you wish to modify requires privileged access. This requires passwordless sudo for the `/usr/bin/defaults` command to be setup for the user running #{ChefUtils::Dist::Infra::PRODUCT}.\",\n default: false,\n- desired_state: false\n+ desired_state: false,\n+ deprecated: true\n \n load_current_value do |new_resource|\n- Chef::Log.debug \"#load_current_value: shelling out \\\"#{defaults_export_cmd(new_resource).join(\" \")}\\\" to determine state\"\n- state = shell_out(defaults_export_cmd(new_resource), user: new_resource.user)\n+ Chef::Log.debug \"#load_current_value: attempting to read \\\"#{new_resource.domain}\\\" value from preferences to determine state\"\n \n- if state.error? || state.stdout.empty?\n- Chef::Log.debug \"#load_current_value: #{defaults_export_cmd(new_resource).join(\" \")} returned stdout: #{state.stdout} and stderr: #{state.stderr}\"\n- current_value_does_not_exist!\n- end\n+ pref = get_preference(new_resource)\n+ current_value_does_not_exist! if pref.nil?\n \n- plist_data = ::Plist.parse_xml(state.stdout)\n-\n- # handle the situation where the key doesn't exist in the domain\n- if plist_data.key?(new_resource.key)\n- key new_resource.key\n- else\n- current_value_does_not_exist!\n- end\n-\n- value plist_data[new_resource.key]\n- end\n-\n- #\n- # The defaults command to export a domain\n- #\n- # @return [Array] defaults command\n- #\n- def defaults_export_cmd(resource)\n- state_cmd = [\"/usr/bin/defaults\"]\n-\n- if resource.host == \"current\"\n- state_cmd.concat([\"-currentHost\"])\n- elsif resource.host # they specified a non-nil value, which is a hostname\n- state_cmd.concat([\"-host\", resource.host])\n- end\n-\n- state_cmd.concat([\"export\", resource.domain, \"-\"])\n- state_cmd\n+ key new_resource.key\n+ value pref\n end\n \n action :write, description: \"Write the value to the specified domain/key.\" do\n converge_if_changed do\n- cmd = defaults_modify_cmd\n- Chef::Log.debug(\"Updating defaults value by shelling out: #{cmd.join(\" \")}\")\n-\n- shell_out!(cmd, user: new_resource.user)\n+ Chef::Log.debug(\"Updating defaults value for #{new_resource.key} in #{new_resource.domain}\")\n+ set_preference(new_resource)\n end\n end\n \n action :delete, description: \"Delete a key from a domain.\" do\n # if it's not there there's nothing to remove\n- return unless current_resource\n+ return if current_resource.nil?\n \n converge_by(\"delete domain:#{new_resource.domain} key:#{new_resource.key}\") do\n-\n- cmd = defaults_modify_cmd\n- Chef::Log.debug(\"Removing defaults key by shelling out: #{cmd.join(\" \")}\")\n-\n- shell_out!(cmd, user: new_resource.user)\n+ Chef::Log.debug(\"Removing defaults key: #{new_resource.key}\")\n+ CF::Preferences.set!(new_resource.key, nil, new_resource.domain, new_resource.user, new_resource.host)\n end\n end\n \n- action_class do\n- #\n- # The command used to write or delete delete values from domains\n- #\n- # @return [Array] Array representation of defaults command to run\n- #\n- def defaults_modify_cmd\n- cmd = [\"/usr/bin/defaults\"]\n-\n- if new_resource.host == :current\n- cmd.concat([\"-currentHost\"])\n- elsif new_resource.host # they specified a non-nil value, which is a hostname\n- cmd.concat([\"-host\", new_resource.host])\n- end\n-\n- cmd.concat([action.to_s, new_resource.domain, new_resource.key])\n- cmd.concat(processed_value) if action == :write\n- cmd.prepend(\"sudo\") if new_resource.sudo\n- cmd\n- end\n+ def get_preference(new_resource)\n+ CF::Preferences.get(new_resource.key, new_resource.domain, new_resource.user, new_resource.host)\n+ end\n \n- #\n- # convert the provided value into the format defaults expects\n- #\n- # @return [array] array of values starting with the type if applicable\n- #\n- def processed_value\n- type = new_resource.type || value_type(new_resource.value)\n-\n- # when dict this creates an array of values [\"Key1\", \"Value1\", \"Key2\", \"Value2\" ...]\n- cmd_values = [\"-#{type}\"]\n-\n- case type\n- when \"dict\"\n- cmd_values.concat(new_resource.value.flatten)\n- when \"array\"\n- cmd_values.concat(new_resource.value)\n- when \"bool\"\n- cmd_values.concat(bool_to_defaults_bool(new_resource.value))\n- else\n- cmd_values.concat([new_resource.value])\n- end\n-\n- cmd_values\n- end\n+ def set_preference(new_resource)\n+ CF::Preferences.set!(new_resource.key, new_resource.value, new_resource.domain, new_resource.user, new_resource.host)\n+ end", - "comment_created_at": "2021-09-29T21:23:01+00:00", - "comment_author": "lamont-granquist", - "comment_body": "don't think this method is getting used (might be causing some confusion over the action_class block)", - "pr_file_module": null - }, - { - "comment_id": "719066530", - "repo_full_name": "chef/chef", - "pr_number": 11898, - "pr_file": "lib/chef/resource/macos_userdefaults.rb", - "discussion_id": "718895310", - "commented_code": "@@ -78,173 +79,88 @@ class MacosUserDefaults < Chef::Resource\n required: true\n \n property :host, [String, Symbol],\n- description: \"Set either :current or a hostname to set the user default at the host level.\",\n+ description: \"Set either :current, :all or a hostname to set the user default at the host level.\",\n desired_state: false,\n- introduced: \"16.3\"\n+ introduced: \"16.3\",\n+ coerce: proc { |value| to_cf_host(value) }\n \n property :value, [Integer, Float, String, TrueClass, FalseClass, Hash, Array],\n description: \"The value of the key. Note: With the `type` property set to `bool`, `String` forms of Boolean true/false values that Apple accepts in the defaults command will be coerced: 0/1, 'TRUE'/'FALSE,' 'true'/false', 'YES'/'NO', or 'yes'/'no'.\",\n- required: [:write],\n- coerce: proc { |v| v.is_a?(Hash) ? v.transform_keys(&:to_s) : v } # make sure keys are all strings for comparison\n+ required: [:write]\n \n property :type, String,\n description: \"The value type of the preference key.\",\n equal_to: %w{bool string int float array dict},\n- desired_state: false\n+ desired_state: false,\n+ deprecated: true\n \n- property :user, String,\n- description: \"The system user that the default will be applied to.\",\n- desired_state: false\n+ property :user, [String, Symbol],\n+ description: \"The system user that the default will be applied to. Set :current for current user, :all for all users or pass a valid username\",\n+ desired_state: false,\n+ coerce: proc { |value| to_cf_user(value) }\n \n property :sudo, [TrueClass, FalseClass],\n description: \"Set to true if the setting you wish to modify requires privileged access. This requires passwordless sudo for the `/usr/bin/defaults` command to be setup for the user running #{ChefUtils::Dist::Infra::PRODUCT}.\",\n default: false,\n- desired_state: false\n+ desired_state: false,\n+ deprecated: true\n \n load_current_value do |new_resource|\n- Chef::Log.debug \"#load_current_value: shelling out \\\"#{defaults_export_cmd(new_resource).join(\" \")}\\\" to determine state\"\n- state = shell_out(defaults_export_cmd(new_resource), user: new_resource.user)\n+ Chef::Log.debug \"#load_current_value: attempting to read \\\"#{new_resource.domain}\\\" value from preferences to determine state\"\n \n- if state.error? || state.stdout.empty?\n- Chef::Log.debug \"#load_current_value: #{defaults_export_cmd(new_resource).join(\" \")} returned stdout: #{state.stdout} and stderr: #{state.stderr}\"\n- current_value_does_not_exist!\n- end\n+ pref = get_preference(new_resource)\n+ current_value_does_not_exist! if pref.nil?\n \n- plist_data = ::Plist.parse_xml(state.stdout)\n-\n- # handle the situation where the key doesn't exist in the domain\n- if plist_data.key?(new_resource.key)\n- key new_resource.key\n- else\n- current_value_does_not_exist!\n- end\n-\n- value plist_data[new_resource.key]\n- end\n-\n- #\n- # The defaults command to export a domain\n- #\n- # @return [Array] defaults command\n- #\n- def defaults_export_cmd(resource)\n- state_cmd = [\"/usr/bin/defaults\"]\n-\n- if resource.host == \"current\"\n- state_cmd.concat([\"-currentHost\"])\n- elsif resource.host # they specified a non-nil value, which is a hostname\n- state_cmd.concat([\"-host\", resource.host])\n- end\n-\n- state_cmd.concat([\"export\", resource.domain, \"-\"])\n- state_cmd\n+ key new_resource.key\n+ value pref\n end\n \n action :write, description: \"Write the value to the specified domain/key.\" do\n converge_if_changed do\n- cmd = defaults_modify_cmd\n- Chef::Log.debug(\"Updating defaults value by shelling out: #{cmd.join(\" \")}\")\n-\n- shell_out!(cmd, user: new_resource.user)\n+ Chef::Log.debug(\"Updating defaults value for #{new_resource.key} in #{new_resource.domain}\")\n+ set_preference(new_resource)\n end\n end\n \n action :delete, description: \"Delete a key from a domain.\" do\n # if it's not there there's nothing to remove\n- return unless current_resource\n+ return if current_resource.nil?\n \n converge_by(\"delete domain:#{new_resource.domain} key:#{new_resource.key}\") do\n-\n- cmd = defaults_modify_cmd\n- Chef::Log.debug(\"Removing defaults key by shelling out: #{cmd.join(\" \")}\")\n-\n- shell_out!(cmd, user: new_resource.user)\n+ Chef::Log.debug(\"Removing defaults key: #{new_resource.key}\")\n+ CF::Preferences.set!(new_resource.key, nil, new_resource.domain, new_resource.user, new_resource.host)\n end\n end\n \n- action_class do\n- #\n- # The command used to write or delete delete values from domains\n- #\n- # @return [Array] Array representation of defaults command to run\n- #\n- def defaults_modify_cmd\n- cmd = [\"/usr/bin/defaults\"]\n-\n- if new_resource.host == :current\n- cmd.concat([\"-currentHost\"])\n- elsif new_resource.host # they specified a non-nil value, which is a hostname\n- cmd.concat([\"-host\", new_resource.host])\n- end\n-\n- cmd.concat([action.to_s, new_resource.domain, new_resource.key])\n- cmd.concat(processed_value) if action == :write\n- cmd.prepend(\"sudo\") if new_resource.sudo\n- cmd\n- end\n+ def get_preference(new_resource)\n+ CF::Preferences.get(new_resource.key, new_resource.domain, new_resource.user, new_resource.host)\n+ end\n \n- #\n- # convert the provided value into the format defaults expects\n- #\n- # @return [array] array of values starting with the type if applicable\n- #\n- def processed_value\n- type = new_resource.type || value_type(new_resource.value)\n-\n- # when dict this creates an array of values [\"Key1\", \"Value1\", \"Key2\", \"Value2\" ...]\n- cmd_values = [\"-#{type}\"]\n-\n- case type\n- when \"dict\"\n- cmd_values.concat(new_resource.value.flatten)\n- when \"array\"\n- cmd_values.concat(new_resource.value)\n- when \"bool\"\n- cmd_values.concat(bool_to_defaults_bool(new_resource.value))\n- else\n- cmd_values.concat([new_resource.value])\n- end\n-\n- cmd_values\n- end\n+ def set_preference(new_resource)\n+ CF::Preferences.set!(new_resource.key, new_resource.value, new_resource.domain, new_resource.user, new_resource.host)\n+ end", - "comment_created_at": "2021-09-30T05:22:36+00:00", - "comment_author": "rishichawda", - "comment_body": "oh yeah I might have overlooked this after I moved the call directly inside the action.. will remove this now.. Thank you! ", - "pr_file_module": null - }, - { - "comment_id": "719069595", - "repo_full_name": "chef/chef", - "pr_number": 11898, - "pr_file": "lib/chef/resource/macos_userdefaults.rb", - "discussion_id": "718895310", - "commented_code": "@@ -78,173 +79,88 @@ class MacosUserDefaults < Chef::Resource\n required: true\n \n property :host, [String, Symbol],\n- description: \"Set either :current or a hostname to set the user default at the host level.\",\n+ description: \"Set either :current, :all or a hostname to set the user default at the host level.\",\n desired_state: false,\n- introduced: \"16.3\"\n+ introduced: \"16.3\",\n+ coerce: proc { |value| to_cf_host(value) }\n \n property :value, [Integer, Float, String, TrueClass, FalseClass, Hash, Array],\n description: \"The value of the key. Note: With the `type` property set to `bool`, `String` forms of Boolean true/false values that Apple accepts in the defaults command will be coerced: 0/1, 'TRUE'/'FALSE,' 'true'/false', 'YES'/'NO', or 'yes'/'no'.\",\n- required: [:write],\n- coerce: proc { |v| v.is_a?(Hash) ? v.transform_keys(&:to_s) : v } # make sure keys are all strings for comparison\n+ required: [:write]\n \n property :type, String,\n description: \"The value type of the preference key.\",\n equal_to: %w{bool string int float array dict},\n- desired_state: false\n+ desired_state: false,\n+ deprecated: true\n \n- property :user, String,\n- description: \"The system user that the default will be applied to.\",\n- desired_state: false\n+ property :user, [String, Symbol],\n+ description: \"The system user that the default will be applied to. Set :current for current user, :all for all users or pass a valid username\",\n+ desired_state: false,\n+ coerce: proc { |value| to_cf_user(value) }\n \n property :sudo, [TrueClass, FalseClass],\n description: \"Set to true if the setting you wish to modify requires privileged access. This requires passwordless sudo for the `/usr/bin/defaults` command to be setup for the user running #{ChefUtils::Dist::Infra::PRODUCT}.\",\n default: false,\n- desired_state: false\n+ desired_state: false,\n+ deprecated: true\n \n load_current_value do |new_resource|\n- Chef::Log.debug \"#load_current_value: shelling out \\\"#{defaults_export_cmd(new_resource).join(\" \")}\\\" to determine state\"\n- state = shell_out(defaults_export_cmd(new_resource), user: new_resource.user)\n+ Chef::Log.debug \"#load_current_value: attempting to read \\\"#{new_resource.domain}\\\" value from preferences to determine state\"\n \n- if state.error? || state.stdout.empty?\n- Chef::Log.debug \"#load_current_value: #{defaults_export_cmd(new_resource).join(\" \")} returned stdout: #{state.stdout} and stderr: #{state.stderr}\"\n- current_value_does_not_exist!\n- end\n+ pref = get_preference(new_resource)\n+ current_value_does_not_exist! if pref.nil?\n \n- plist_data = ::Plist.parse_xml(state.stdout)\n-\n- # handle the situation where the key doesn't exist in the domain\n- if plist_data.key?(new_resource.key)\n- key new_resource.key\n- else\n- current_value_does_not_exist!\n- end\n-\n- value plist_data[new_resource.key]\n- end\n-\n- #\n- # The defaults command to export a domain\n- #\n- # @return [Array] defaults command\n- #\n- def defaults_export_cmd(resource)\n- state_cmd = [\"/usr/bin/defaults\"]\n-\n- if resource.host == \"current\"\n- state_cmd.concat([\"-currentHost\"])\n- elsif resource.host # they specified a non-nil value, which is a hostname\n- state_cmd.concat([\"-host\", resource.host])\n- end\n-\n- state_cmd.concat([\"export\", resource.domain, \"-\"])\n- state_cmd\n+ key new_resource.key\n+ value pref\n end\n \n action :write, description: \"Write the value to the specified domain/key.\" do\n converge_if_changed do\n- cmd = defaults_modify_cmd\n- Chef::Log.debug(\"Updating defaults value by shelling out: #{cmd.join(\" \")}\")\n-\n- shell_out!(cmd, user: new_resource.user)\n+ Chef::Log.debug(\"Updating defaults value for #{new_resource.key} in #{new_resource.domain}\")\n+ set_preference(new_resource)\n end\n end\n \n action :delete, description: \"Delete a key from a domain.\" do\n # if it's not there there's nothing to remove\n- return unless current_resource\n+ return if current_resource.nil?\n \n converge_by(\"delete domain:#{new_resource.domain} key:#{new_resource.key}\") do\n-\n- cmd = defaults_modify_cmd\n- Chef::Log.debug(\"Removing defaults key by shelling out: #{cmd.join(\" \")}\")\n-\n- shell_out!(cmd, user: new_resource.user)\n+ Chef::Log.debug(\"Removing defaults key: #{new_resource.key}\")\n+ CF::Preferences.set!(new_resource.key, nil, new_resource.domain, new_resource.user, new_resource.host)\n end\n end\n \n- action_class do\n- #\n- # The command used to write or delete delete values from domains\n- #\n- # @return [Array] Array representation of defaults command to run\n- #\n- def defaults_modify_cmd\n- cmd = [\"/usr/bin/defaults\"]\n-\n- if new_resource.host == :current\n- cmd.concat([\"-currentHost\"])\n- elsif new_resource.host # they specified a non-nil value, which is a hostname\n- cmd.concat([\"-host\", new_resource.host])\n- end\n-\n- cmd.concat([action.to_s, new_resource.domain, new_resource.key])\n- cmd.concat(processed_value) if action == :write\n- cmd.prepend(\"sudo\") if new_resource.sudo\n- cmd\n- end\n+ def get_preference(new_resource)\n+ CF::Preferences.get(new_resource.key, new_resource.domain, new_resource.user, new_resource.host)\n+ end\n \n- #\n- # convert the provided value into the format defaults expects\n- #\n- # @return [array] array of values starting with the type if applicable\n- #\n- def processed_value\n- type = new_resource.type || value_type(new_resource.value)\n-\n- # when dict this creates an array of values [\"Key1\", \"Value1\", \"Key2\", \"Value2\" ...]\n- cmd_values = [\"-#{type}\"]\n-\n- case type\n- when \"dict\"\n- cmd_values.concat(new_resource.value.flatten)\n- when \"array\"\n- cmd_values.concat(new_resource.value)\n- when \"bool\"\n- cmd_values.concat(bool_to_defaults_bool(new_resource.value))\n- else\n- cmd_values.concat([new_resource.value])\n- end\n-\n- cmd_values\n- end\n+ def set_preference(new_resource)\n+ CF::Preferences.set!(new_resource.key, new_resource.value, new_resource.domain, new_resource.user, new_resource.host)\n+ end", - "comment_created_at": "2021-09-30T05:30:21+00:00", - "comment_author": "rishichawda", - "comment_body": "the `:write` action was using this function but `:delete` was using the `.set!` method from `CF` directly. Changed the `:write` to also use `set!` method in https://github.com/chef/chef/pull/11898/commits/858cc2258c6b0ad8add05c3dd011c986164b90e2. ", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "688705114", - "pr_number": 11907, - "pr_file": "lib/chef/resource/archive_file.rb", - "created_at": "2021-08-13T18:28:12+00:00", - "commented_code": "converge_by(\"set owner of files extracted in #{new_resource.destination} to #{new_resource.owner}:#{new_resource.group}\") do\n archive = Archive::Reader.open_filename(new_resource.path)\n archive.each_entry do |e|\n FileUtils.chown(new_resource.owner, new_resource.group, \"#{new_resource.destination}/#{e.pathname}\")\n file_dest = strip_components_from_path(e.pathname)", - "repo_full_name": "chef/chef", - "discussion_comments": [ - { - "comment_id": "688705114", - "repo_full_name": "chef/chef", - "pr_number": 11907, - "pr_file": "lib/chef/resource/archive_file.rb", - "discussion_id": "688705114", - "commented_code": "@@ -119,7 +123,8 @@ class ArchiveFile < Chef::Resource\n converge_by(\"set owner of files extracted in #{new_resource.destination} to #{new_resource.owner}:#{new_resource.group}\") do\n archive = Archive::Reader.open_filename(new_resource.path)\n archive.each_entry do |e|\n- FileUtils.chown(new_resource.owner, new_resource.group, \"#{new_resource.destination}/#{e.pathname}\")\n+ file_dest = strip_components_from_path(e.pathname)", - "comment_created_at": "2021-08-13T18:28:12+00:00", - "comment_author": "jasonwbarnett", - "comment_body": "This seems to be repeated 3 times in this resource which is screaming for some sort of abstraction. \"This\" being stripping the component from the path and then a guard clause (if/unless) to do something. Thinking a small class responsible for taking in an artifact and both stripping components and then returning a list of files that are applicable. Anyhow, I need to think about it more and I don't think it should hold this up given the unit test coverage but just wanted to dump what was in my brain.", - "pr_file_module": null - }, - { - "comment_id": "689730505", - "repo_full_name": "chef/chef", - "pr_number": 11907, - "pr_file": "lib/chef/resource/archive_file.rb", - "discussion_id": "688705114", - "commented_code": "@@ -119,7 +123,8 @@ class ArchiveFile < Chef::Resource\n converge_by(\"set owner of files extracted in #{new_resource.destination} to #{new_resource.owner}:#{new_resource.group}\") do\n archive = Archive::Reader.open_filename(new_resource.path)\n archive.each_entry do |e|\n- FileUtils.chown(new_resource.owner, new_resource.group, \"#{new_resource.destination}/#{e.pathname}\")\n+ file_dest = strip_components_from_path(e.pathname)", - "comment_created_at": "2021-08-16T17:36:17+00:00", - "comment_author": "lamont-granquist", - "comment_body": "It looks to me like this functionality should go into `archive.each_entry(strip_components: new_resource.strip_components)` and the work should get done in ffi-libarchive and/or libarchive directly (no idea if libarchive supports that or not)", - "pr_file_module": null - }, - { - "comment_id": "689784245", - "repo_full_name": "chef/chef", - "pr_number": 11907, - "pr_file": "lib/chef/resource/archive_file.rb", - "discussion_id": "688705114", - "commented_code": "@@ -119,7 +123,8 @@ class ArchiveFile < Chef::Resource\n converge_by(\"set owner of files extracted in #{new_resource.destination} to #{new_resource.owner}:#{new_resource.group}\") do\n archive = Archive::Reader.open_filename(new_resource.path)\n archive.each_entry do |e|\n- FileUtils.chown(new_resource.owner, new_resource.group, \"#{new_resource.destination}/#{e.pathname}\")\n+ file_dest = strip_components_from_path(e.pathname)", - "comment_created_at": "2021-08-16T18:59:48+00:00", - "comment_author": "jasonwbarnett", - "comment_body": "Completely agree with that in a pure OO sense, but I didn't know if that was violating any other principles for FFI specifically. Are you good with a PR to extend functionality on top of libarchive?", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/chef-fail-fast-principle.json b/_reviewers/chef-fail-fast-principle.json new file mode 100644 index 0000000..5b2ec6a --- /dev/null +++ b/_reviewers/chef-fail-fast-principle.json @@ -0,0 +1,116 @@ +[ + { + "discussion_id": "1092469391", + "pr_number": 13529, + "pr_file": "habitat/plan.ps1", + "created_at": "2023-01-31T20:53:12+00:00", + "commented_code": "}\n Write-BuildLine \" ** Running the chef project's 'rake install' to install the path-based gems so they look like any other installed gem.\"\n $install_attempt = 0\n do {\n Start-Sleep -Seconds 5\n $install_attempt++\n Write-BuildLine \"Install attempt $install_attempt\"\n bundle exec rake install:local --trace=stdout\n } while ((-not $?) -and ($install_attempt -lt 5))\n bundle exec rake install --trace=stdout # this needs to be 'bundle exec'd because a Rakefile makes reference to Bundler", + "repo_full_name": "chef/chef", + "discussion_comments": [ + { + "comment_id": "1092469391", + "repo_full_name": "chef/chef", + "pr_number": 13529, + "pr_file": "habitat/plan.ps1", + "discussion_id": "1092469391", + "commented_code": "@@ -104,12 +104,13 @@ function Invoke-Build {\n }\n Write-BuildLine \" ** Running the chef project's 'rake install' to install the path-based gems so they look like any other installed gem.\"\n $install_attempt = 0\n- do {\n- Start-Sleep -Seconds 5\n- $install_attempt++\n- Write-BuildLine \"Install attempt $install_attempt\"\n- bundle exec rake install:local --trace=stdout\n- } while ((-not $?) -and ($install_attempt -lt 5))\n+ bundle exec rake install --trace=stdout # this needs to be 'bundle exec'd because a Rakefile makes reference to Bundler", + "comment_created_at": "2023-01-31T20:53:12+00:00", + "comment_author": "jaymzh", + "comment_body": "I don't think this file is related to the backport?", + "pr_file_module": null + }, + { + "comment_id": "1093114088", + "repo_full_name": "chef/chef", + "pr_number": 13529, + "pr_file": "habitat/plan.ps1", + "discussion_id": "1092469391", + "commented_code": "@@ -104,12 +104,13 @@ function Invoke-Build {\n }\n Write-BuildLine \" ** Running the chef project's 'rake install' to install the path-based gems so they look like any other installed gem.\"\n $install_attempt = 0\n- do {\n- Start-Sleep -Seconds 5\n- $install_attempt++\n- Write-BuildLine \"Install attempt $install_attempt\"\n- bundle exec rake install:local --trace=stdout\n- } while ((-not $?) -and ($install_attempt -lt 5))\n+ bundle exec rake install --trace=stdout # this needs to be 'bundle exec'd because a Rakefile makes reference to Bundler", + "comment_created_at": "2023-02-01T11:46:45+00:00", + "comment_author": "neha-p6", + "comment_body": "Yeah, it was for windows plan runner to spit out errors in verify pipeline.", + "pr_file_module": null + }, + { + "comment_id": "1093155925", + "repo_full_name": "chef/chef", + "pr_number": 13529, + "pr_file": "habitat/plan.ps1", + "discussion_id": "1092469391", + "commented_code": "@@ -104,12 +104,13 @@ function Invoke-Build {\n }\n Write-BuildLine \" ** Running the chef project's 'rake install' to install the path-based gems so they look like any other installed gem.\"\n $install_attempt = 0\n- do {\n- Start-Sleep -Seconds 5\n- $install_attempt++\n- Write-BuildLine \"Install attempt $install_attempt\"\n- bundle exec rake install:local --trace=stdout\n- } while ((-not $?) -and ($install_attempt -lt 5))\n+ bundle exec rake install --trace=stdout # this needs to be 'bundle exec'd because a Rakefile makes reference to Bundler", + "comment_created_at": "2023-02-01T12:30:06+00:00", + "comment_author": "neha-p6", + "comment_body": "Latest update: Windows plan issue is fixed. But I have updated this code block to retry installing gems once, instead of 5 time in a loop. That just increases build time and prevents us from seeing the error sooner. If it did not install after 2 attempts, it is not going to. No point in trying 5 times.\r\n", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1095097557", + "pr_number": 13546, + "pr_file": ".buildkite/hooks/pre-command", + "created_at": "2023-02-02T21:21:59+00:00", + "commented_code": "set -eu\n\n# Only execute in the verify pipeline\n[[ \"$BUILDKITE_PIPELINE_NAME\" =~ verify$ ]] || exit 0\n[[ \"$BUILDKITE_PIPELINE_NAME\" =~ verify$ ]] || [[ \"$BUILDKITE_PIPELINE_NAME\" =~ validate/release$ ]] || [[ \"$BUILDKITE_PIPELINE_NAME\" =~ validate/adhoc$ ]] || [[ \"$BUILDKITE_PIPELINE_NAME\" =~ validate/adhoc-canary$ ]] || exit 0", + "repo_full_name": "chef/chef", + "discussion_comments": [ + { + "comment_id": "1095097557", + "repo_full_name": "chef/chef", + "pr_number": 13546, + "pr_file": ".buildkite/hooks/pre-command", + "discussion_id": "1095097557", + "commented_code": "@@ -3,23 +3,23 @@\n set -eu\n \n # Only execute in the verify pipeline\n-[[ \"$BUILDKITE_PIPELINE_NAME\" =~ verify$ ]] || exit 0\n+[[ \"$BUILDKITE_PIPELINE_NAME\" =~ verify$ ]] || [[ \"$BUILDKITE_PIPELINE_NAME\" =~ validate/release$ ]] || [[ \"$BUILDKITE_PIPELINE_NAME\" =~ validate/adhoc$ ]] || [[ \"$BUILDKITE_PIPELINE_NAME\" =~ validate/adhoc-canary$ ]] || exit 0", + "comment_created_at": "2023-02-02T21:21:59+00:00", + "comment_author": "jesseprieur", + "comment_body": "Should we do this as a proper regex? `(verify|validate/(release|adhoc|canary))$`", + "pr_file_module": null + }, + { + "comment_id": "1095099057", + "repo_full_name": "chef/chef", + "pr_number": 13546, + "pr_file": ".buildkite/hooks/pre-command", + "discussion_id": "1095097557", + "commented_code": "@@ -3,23 +3,23 @@\n set -eu\n \n # Only execute in the verify pipeline\n-[[ \"$BUILDKITE_PIPELINE_NAME\" =~ verify$ ]] || exit 0\n+[[ \"$BUILDKITE_PIPELINE_NAME\" =~ verify$ ]] || [[ \"$BUILDKITE_PIPELINE_NAME\" =~ validate/release$ ]] || [[ \"$BUILDKITE_PIPELINE_NAME\" =~ validate/adhoc$ ]] || [[ \"$BUILDKITE_PIPELINE_NAME\" =~ validate/adhoc-canary$ ]] || exit 0", + "comment_created_at": "2023-02-02T21:23:53+00:00", + "comment_author": "evanahlberg", + "comment_body": "on main i did this.. forgot to change it: \r\n\r\n`[[ \"$BUILDKITE_PIPELINE_NAME\" =~ verify$ ]] || [[ \"$BUILDKITE_PIPELINE_NAME\" =~ validate/.* ]] || exit 0`", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1087220519", + "pr_number": 13530, + "pr_file": ".buildkite/hooks/pre-command", + "created_at": "2023-01-25T21:54:03+00:00", + "commented_code": "set -eu\n\n# Only execute in the verify pipeline\n[[ \"$BUILDKITE_PIPELINE_NAME\" =~ verify$ ]] || exit 0\n[[ \"$BUILDKITE_PIPELINE_NAME\" =~ verify$ ]] || [[ \"$BUILDKITE_PIPELINE_NAME\" =~ validate/release$ ]] || [[ \"$BUILDKITE_PIPELINE_NAME\" =~ validate/adhoc$ ]] || [[ \"$BUILDKITE_PIPELINE_NAME\" =~ validate/adhoc-canary$ ]] || exit 0", + "repo_full_name": "chef/chef", + "discussion_comments": [ + { + "comment_id": "1087220519", + "repo_full_name": "chef/chef", + "pr_number": 13530, + "pr_file": ".buildkite/hooks/pre-command", + "discussion_id": "1087220519", + "commented_code": "@@ -3,7 +3,7 @@\n set -eu\n \n # Only execute in the verify pipeline\n-[[ \"$BUILDKITE_PIPELINE_NAME\" =~ verify$ ]] || exit 0\n+[[ \"$BUILDKITE_PIPELINE_NAME\" =~ verify$ ]] || [[ \"$BUILDKITE_PIPELINE_NAME\" =~ validate/release$ ]] || [[ \"$BUILDKITE_PIPELINE_NAME\" =~ validate/adhoc$ ]] || [[ \"$BUILDKITE_PIPELINE_NAME\" =~ validate/adhoc-canary$ ]] || exit 0", + "comment_created_at": "2023-01-25T21:54:03+00:00", + "comment_author": "tpowell-progress", + "comment_body": "shouldn't you be able to just match on leading `'validate/'`", + "pr_file_module": null + }, + { + "comment_id": "1087236070", + "repo_full_name": "chef/chef", + "pr_number": 13530, + "pr_file": ".buildkite/hooks/pre-command", + "discussion_id": "1087220519", + "commented_code": "@@ -3,7 +3,7 @@\n set -eu\n \n # Only execute in the verify pipeline\n-[[ \"$BUILDKITE_PIPELINE_NAME\" =~ verify$ ]] || exit 0\n+[[ \"$BUILDKITE_PIPELINE_NAME\" =~ verify$ ]] || [[ \"$BUILDKITE_PIPELINE_NAME\" =~ validate/release$ ]] || [[ \"$BUILDKITE_PIPELINE_NAME\" =~ validate/adhoc$ ]] || [[ \"$BUILDKITE_PIPELINE_NAME\" =~ validate/adhoc-canary$ ]] || exit 0", + "comment_created_at": "2023-01-25T22:09:36+00:00", + "comment_author": "evanahlberg", + "comment_body": "yeah no problem, updated!", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/chef-fail-fast-principle.md b/_reviewers/chef-fail-fast-principle.md index bfcb280..36186eb 100644 --- a/_reviewers/chef-fail-fast-principle.md +++ b/_reviewers/chef-fail-fast-principle.md @@ -32,121 +32,3 @@ bundle exec rake install --trace=stdout ``` Similarly, when configuring pipeline conditions, use concise regex patterns that are easy to understand and maintain rather than complex, hard-to-read conditions that might hide logic errors. - - -[ - { - "discussion_id": "1092469391", - "pr_number": 13529, - "pr_file": "habitat/plan.ps1", - "created_at": "2023-01-31T20:53:12+00:00", - "commented_code": "}\n Write-BuildLine \" ** Running the chef project's 'rake install' to install the path-based gems so they look like any other installed gem.\"\n $install_attempt = 0\n do {\n Start-Sleep -Seconds 5\n $install_attempt++\n Write-BuildLine \"Install attempt $install_attempt\"\n bundle exec rake install:local --trace=stdout\n } while ((-not $?) -and ($install_attempt -lt 5))\n bundle exec rake install --trace=stdout # this needs to be 'bundle exec'd because a Rakefile makes reference to Bundler", - "repo_full_name": "chef/chef", - "discussion_comments": [ - { - "comment_id": "1092469391", - "repo_full_name": "chef/chef", - "pr_number": 13529, - "pr_file": "habitat/plan.ps1", - "discussion_id": "1092469391", - "commented_code": "@@ -104,12 +104,13 @@ function Invoke-Build {\n }\n Write-BuildLine \" ** Running the chef project's 'rake install' to install the path-based gems so they look like any other installed gem.\"\n $install_attempt = 0\n- do {\n- Start-Sleep -Seconds 5\n- $install_attempt++\n- Write-BuildLine \"Install attempt $install_attempt\"\n- bundle exec rake install:local --trace=stdout\n- } while ((-not $?) -and ($install_attempt -lt 5))\n+ bundle exec rake install --trace=stdout # this needs to be 'bundle exec'd because a Rakefile makes reference to Bundler", - "comment_created_at": "2023-01-31T20:53:12+00:00", - "comment_author": "jaymzh", - "comment_body": "I don't think this file is related to the backport?", - "pr_file_module": null - }, - { - "comment_id": "1093114088", - "repo_full_name": "chef/chef", - "pr_number": 13529, - "pr_file": "habitat/plan.ps1", - "discussion_id": "1092469391", - "commented_code": "@@ -104,12 +104,13 @@ function Invoke-Build {\n }\n Write-BuildLine \" ** Running the chef project's 'rake install' to install the path-based gems so they look like any other installed gem.\"\n $install_attempt = 0\n- do {\n- Start-Sleep -Seconds 5\n- $install_attempt++\n- Write-BuildLine \"Install attempt $install_attempt\"\n- bundle exec rake install:local --trace=stdout\n- } while ((-not $?) -and ($install_attempt -lt 5))\n+ bundle exec rake install --trace=stdout # this needs to be 'bundle exec'd because a Rakefile makes reference to Bundler", - "comment_created_at": "2023-02-01T11:46:45+00:00", - "comment_author": "neha-p6", - "comment_body": "Yeah, it was for windows plan runner to spit out errors in verify pipeline.", - "pr_file_module": null - }, - { - "comment_id": "1093155925", - "repo_full_name": "chef/chef", - "pr_number": 13529, - "pr_file": "habitat/plan.ps1", - "discussion_id": "1092469391", - "commented_code": "@@ -104,12 +104,13 @@ function Invoke-Build {\n }\n Write-BuildLine \" ** Running the chef project's 'rake install' to install the path-based gems so they look like any other installed gem.\"\n $install_attempt = 0\n- do {\n- Start-Sleep -Seconds 5\n- $install_attempt++\n- Write-BuildLine \"Install attempt $install_attempt\"\n- bundle exec rake install:local --trace=stdout\n- } while ((-not $?) -and ($install_attempt -lt 5))\n+ bundle exec rake install --trace=stdout # this needs to be 'bundle exec'd because a Rakefile makes reference to Bundler", - "comment_created_at": "2023-02-01T12:30:06+00:00", - "comment_author": "neha-p6", - "comment_body": "Latest update: Windows plan issue is fixed. But I have updated this code block to retry installing gems once, instead of 5 time in a loop. That just increases build time and prevents us from seeing the error sooner. If it did not install after 2 attempts, it is not going to. No point in trying 5 times.\r\n", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1095097557", - "pr_number": 13546, - "pr_file": ".buildkite/hooks/pre-command", - "created_at": "2023-02-02T21:21:59+00:00", - "commented_code": "set -eu\n\n# Only execute in the verify pipeline\n[[ \"$BUILDKITE_PIPELINE_NAME\" =~ verify$ ]] || exit 0\n[[ \"$BUILDKITE_PIPELINE_NAME\" =~ verify$ ]] || [[ \"$BUILDKITE_PIPELINE_NAME\" =~ validate/release$ ]] || [[ \"$BUILDKITE_PIPELINE_NAME\" =~ validate/adhoc$ ]] || [[ \"$BUILDKITE_PIPELINE_NAME\" =~ validate/adhoc-canary$ ]] || exit 0", - "repo_full_name": "chef/chef", - "discussion_comments": [ - { - "comment_id": "1095097557", - "repo_full_name": "chef/chef", - "pr_number": 13546, - "pr_file": ".buildkite/hooks/pre-command", - "discussion_id": "1095097557", - "commented_code": "@@ -3,23 +3,23 @@\n set -eu\n \n # Only execute in the verify pipeline\n-[[ \"$BUILDKITE_PIPELINE_NAME\" =~ verify$ ]] || exit 0\n+[[ \"$BUILDKITE_PIPELINE_NAME\" =~ verify$ ]] || [[ \"$BUILDKITE_PIPELINE_NAME\" =~ validate/release$ ]] || [[ \"$BUILDKITE_PIPELINE_NAME\" =~ validate/adhoc$ ]] || [[ \"$BUILDKITE_PIPELINE_NAME\" =~ validate/adhoc-canary$ ]] || exit 0", - "comment_created_at": "2023-02-02T21:21:59+00:00", - "comment_author": "jesseprieur", - "comment_body": "Should we do this as a proper regex? `(verify|validate/(release|adhoc|canary))$`", - "pr_file_module": null - }, - { - "comment_id": "1095099057", - "repo_full_name": "chef/chef", - "pr_number": 13546, - "pr_file": ".buildkite/hooks/pre-command", - "discussion_id": "1095097557", - "commented_code": "@@ -3,23 +3,23 @@\n set -eu\n \n # Only execute in the verify pipeline\n-[[ \"$BUILDKITE_PIPELINE_NAME\" =~ verify$ ]] || exit 0\n+[[ \"$BUILDKITE_PIPELINE_NAME\" =~ verify$ ]] || [[ \"$BUILDKITE_PIPELINE_NAME\" =~ validate/release$ ]] || [[ \"$BUILDKITE_PIPELINE_NAME\" =~ validate/adhoc$ ]] || [[ \"$BUILDKITE_PIPELINE_NAME\" =~ validate/adhoc-canary$ ]] || exit 0", - "comment_created_at": "2023-02-02T21:23:53+00:00", - "comment_author": "evanahlberg", - "comment_body": "on main i did this.. forgot to change it: \r\n\r\n`[[ \"$BUILDKITE_PIPELINE_NAME\" =~ verify$ ]] || [[ \"$BUILDKITE_PIPELINE_NAME\" =~ validate/.* ]] || exit 0`", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1087220519", - "pr_number": 13530, - "pr_file": ".buildkite/hooks/pre-command", - "created_at": "2023-01-25T21:54:03+00:00", - "commented_code": "set -eu\n\n# Only execute in the verify pipeline\n[[ \"$BUILDKITE_PIPELINE_NAME\" =~ verify$ ]] || exit 0\n[[ \"$BUILDKITE_PIPELINE_NAME\" =~ verify$ ]] || [[ \"$BUILDKITE_PIPELINE_NAME\" =~ validate/release$ ]] || [[ \"$BUILDKITE_PIPELINE_NAME\" =~ validate/adhoc$ ]] || [[ \"$BUILDKITE_PIPELINE_NAME\" =~ validate/adhoc-canary$ ]] || exit 0", - "repo_full_name": "chef/chef", - "discussion_comments": [ - { - "comment_id": "1087220519", - "repo_full_name": "chef/chef", - "pr_number": 13530, - "pr_file": ".buildkite/hooks/pre-command", - "discussion_id": "1087220519", - "commented_code": "@@ -3,7 +3,7 @@\n set -eu\n \n # Only execute in the verify pipeline\n-[[ \"$BUILDKITE_PIPELINE_NAME\" =~ verify$ ]] || exit 0\n+[[ \"$BUILDKITE_PIPELINE_NAME\" =~ verify$ ]] || [[ \"$BUILDKITE_PIPELINE_NAME\" =~ validate/release$ ]] || [[ \"$BUILDKITE_PIPELINE_NAME\" =~ validate/adhoc$ ]] || [[ \"$BUILDKITE_PIPELINE_NAME\" =~ validate/adhoc-canary$ ]] || exit 0", - "comment_created_at": "2023-01-25T21:54:03+00:00", - "comment_author": "tpowell-progress", - "comment_body": "shouldn't you be able to just match on leading `'validate/'`", - "pr_file_module": null - }, - { - "comment_id": "1087236070", - "repo_full_name": "chef/chef", - "pr_number": 13530, - "pr_file": ".buildkite/hooks/pre-command", - "discussion_id": "1087220519", - "commented_code": "@@ -3,7 +3,7 @@\n set -eu\n \n # Only execute in the verify pipeline\n-[[ \"$BUILDKITE_PIPELINE_NAME\" =~ verify$ ]] || exit 0\n+[[ \"$BUILDKITE_PIPELINE_NAME\" =~ verify$ ]] || [[ \"$BUILDKITE_PIPELINE_NAME\" =~ validate/release$ ]] || [[ \"$BUILDKITE_PIPELINE_NAME\" =~ validate/adhoc$ ]] || [[ \"$BUILDKITE_PIPELINE_NAME\" =~ validate/adhoc-canary$ ]] || exit 0", - "comment_created_at": "2023-01-25T22:09:36+00:00", - "comment_author": "evanahlberg", - "comment_body": "yeah no problem, updated!", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/chef-fail-gracefully-always.json b/_reviewers/chef-fail-gracefully-always.json new file mode 100644 index 0000000..b244313 --- /dev/null +++ b/_reviewers/chef-fail-gracefully-always.json @@ -0,0 +1,392 @@ +[ + { + "discussion_id": "2110148468", + "pr_number": 15015, + "pr_file": "lib/chef/provider/cron.rb", + "created_at": "2025-05-27T20:24:45+00:00", + "commented_code": "write_exception = false\n\n tempname = Dir::Tmpname.create([\"crontab-\"]) {}\n TargetIO::File.open(tempname, \"w\") do |tempfile|\n io = TargetIO::File.open(tempname, \"w\") do |tempfile|\n tempfile.write(crontab)\n end\n\n tempname = io.path if ChefConfig::Config.target_mode?\n\n so = shell_out!(\"crontab -u #{new_resource.user} #{tempname}\")\n\n TargetIO::File.unlink(tempname)\n TargetIO::File.unlink(tempname) if ChefConfig::Config.target_mode?", + "repo_full_name": "chef/chef", + "discussion_comments": [ + { + "comment_id": "2110148468", + "repo_full_name": "chef/chef", + "pr_number": 15015, + "pr_file": "lib/chef/provider/cron.rb", + "discussion_id": "2110148468", + "commented_code": "@@ -206,13 +206,15 @@ def write_crontab(crontab)\n write_exception = false\n \n tempname = Dir::Tmpname.create([\"crontab-\"]) {}\n- TargetIO::File.open(tempname, \"w\") do |tempfile|\n+ io = TargetIO::File.open(tempname, \"w\") do |tempfile|\n tempfile.write(crontab)\n end\n \n+ tempname = io.path if ChefConfig::Config.target_mode?\n+\n so = shell_out!(\"crontab -u #{new_resource.user} #{tempname}\")\n \n- TargetIO::File.unlink(tempname)\n+ TargetIO::File.unlink(tempname) if ChefConfig::Config.target_mode?", + "comment_created_at": "2025-05-27T20:24:45+00:00", + "comment_author": "jaymzh", + "comment_body": "we still need to remove the local tmpfile, even if we're not in targetmode, just like we did before. ", + "pr_file_module": null + }, + { + "comment_id": "2115981160", + "repo_full_name": "chef/chef", + "pr_number": 15015, + "pr_file": "lib/chef/provider/cron.rb", + "discussion_id": "2110148468", + "commented_code": "@@ -206,13 +206,15 @@ def write_crontab(crontab)\n write_exception = false\n \n tempname = Dir::Tmpname.create([\"crontab-\"]) {}\n- TargetIO::File.open(tempname, \"w\") do |tempfile|\n+ io = TargetIO::File.open(tempname, \"w\") do |tempfile|\n tempfile.write(crontab)\n end\n \n+ tempname = io.path if ChefConfig::Config.target_mode?\n+\n so = shell_out!(\"crontab -u #{new_resource.user} #{tempname}\")\n \n- TargetIO::File.unlink(tempname)\n+ TargetIO::File.unlink(tempname) if ChefConfig::Config.target_mode?", + "comment_created_at": "2025-05-30T13:56:30+00:00", + "comment_author": "thheinen", + "comment_body": "Ouch! Good catch, thanks.", + "pr_file_module": null + }, + { + "comment_id": "2121703656", + "repo_full_name": "chef/chef", + "pr_number": 15015, + "pr_file": "lib/chef/provider/cron.rb", + "discussion_id": "2110148468", + "commented_code": "@@ -206,13 +206,15 @@ def write_crontab(crontab)\n write_exception = false\n \n tempname = Dir::Tmpname.create([\"crontab-\"]) {}\n- TargetIO::File.open(tempname, \"w\") do |tempfile|\n+ io = TargetIO::File.open(tempname, \"w\") do |tempfile|\n tempfile.write(crontab)\n end\n \n+ tempname = io.path if ChefConfig::Config.target_mode?\n+\n so = shell_out!(\"crontab -u #{new_resource.user} #{tempname}\")\n \n- TargetIO::File.unlink(tempname)\n+ TargetIO::File.unlink(tempname) if ChefConfig::Config.target_mode?", + "comment_created_at": "2025-06-02T16:56:12+00:00", + "comment_author": "johnmccrae", + "comment_body": "@thheinen is this updated per Phil's last comment, or can we merge it?", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1906074481", + "pr_number": 14770, + "pr_file": "lib/chef/resource/archive_file.rb", + "created_at": "2025-01-07T21:40:32+00:00", + "commented_code": "archive.each_entry do |e|\n FileUtils.chown(new_resource.owner, new_resource.group, \"#{new_resource.destination}/#{e.pathname}\")\n end\n archive.close", + "repo_full_name": "chef/chef", + "discussion_comments": [ + { + "comment_id": "1906074481", + "repo_full_name": "chef/chef", + "pr_number": 14770, + "pr_file": "lib/chef/resource/archive_file.rb", + "discussion_id": "1906074481", + "commented_code": "@@ -129,6 +129,7 @@ class ArchiveFile < Chef::Resource\n archive.each_entry do |e|\n FileUtils.chown(new_resource.owner, new_resource.group, \"#{new_resource.destination}/#{e.pathname}\")\n end\n+ archive.close", + "comment_created_at": "2025-01-07T21:40:32+00:00", + "comment_author": "tpowell-progress", + "comment_body": "Looks like [`open_filename`](https://github.com/chef/ffi-libarchive/blob/545d3948e7a78835da48737c1490516905fad2e6/lib/ffi-libarchive/writer.rb#L5) takes a block, and so this would be cleaner and automatically `ensure` closure if these calls were converted to that pattern.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1371767924", + "pr_number": 14043, + "pr_file": "lib/chef/resource/chocolatey_installer.rb", + "created_at": "2023-10-25T13:30:41+00:00", + "commented_code": "class Chef\n class Resource\n class ChocolateyInstaller < Chef::Resource\n provides :chocolatey_installer\n\n description \"Use the chocolatey_installer resource to ensure that Choco is installed to your specification. Use the Chocolatey Feature resource to customize your install\"\n introduced \"18.3\"\n examples <<~DOC\n **Install Chocolatey**\n\n ```ruby\n chocolatey_installer 'latest' do\n action :install\n end\n ```\n\n **Uninstall Chocolatey**\n\n ```ruby\n chocolatey_installer 'Some random verbiage' do\n action :uninstall\n end\n ```\n\n **Install Chocolatey with Parameters**\n\n ```ruby\n chocolatey_installer 'latest' do\n action :install\n download_url \"https://www.contoso.com/foo\"\n chocolatey_version '2.12.24'\n end\n ```\n\n **Upgrade Chocolatey with Parameters**\n\n ```ruby\n chocolatey_installer 'latest' do\n action :upgrade\n chocolatey_version '2.12.24'\n end\n ```\n DOC\n\n allowed_actions :install, :uninstall, :upgrade\n\n property :download_url, String,\n description: \"The URL to download Chocolatey from. This defaults to the value of $env:ChocolateyDownloadUrl, if it is set, and otherwise falls back to the official Chocolatey community repository to download the Chocolatey package. It can be used for offline installation by providing a path to a Chocolatey.nupkg.\"\n\n property :chocolatey_version, String,\n description: \"Specifies a target version of Chocolatey to install. By default, the latest stable version is installed. This will use the value in $env:ChocolateyVersion by default, if that environment variable is present. This parameter is ignored if download_url is set.\"\n\n property :use_native_unzip, [TrueClass, FalseClass], default: false,\n description: \"If set, uses built-in Windows decompression tools instead of 7zip when unpacking the downloaded nupkg. This will be set by default if use_native_unzip is set to a value other than 'false' or '0'. This parameter will be ignored in PS 5+ in favour of using the Expand-Archive built in PowerShell cmdlet directly.\"\n\n property :ignore_proxy, [TrueClass, FalseClass], default: false,\n description: \"If set, ignores any configured proxy. This will override any proxy environment variables or parameters. This will be set by default if ignore_proxy is set to a value other than 'false' or '0'.\"\n\n property :proxy_url, String,\n description: \"Specifies the proxy URL to use during the download.\"\n\n property :proxy_user, String,\n description: \"The username to use to build a proxy credential with. Will be consumed by the proxy_credential property if both this property and proxy_password are set\"\n\n property :proxy_password, String,\n description: \"The password to use to build a proxy credential with. Will be consumed by the proxy_credential property if both this property and proxy_user are set\"\n\n load_current_value do\n current_state = is_choco_installed?\n current_value_does_not_exist! if current_state == false\n current_state\n end\n\n def is_choco_installed?\n ::File.exist?(\"#{ENV[\"ALLUSERSPROFILE\"]}\\\\chocolatey\\\\bin\\\\choco.exe\")\n end\n\n def get_choco_version\n powershell_exec(\"choco --version\").result\n end\n\n def existing_version\n Gem::Version.new(get_choco_version)\n end\n\n def define_resource_requirements\n requirements.assert(:install, :upgrade).each do |a|\n a.assertion do\n (new_resource.proxy_user.nil? != new_resource.proxy_password.nil?)", + "repo_full_name": "chef/chef", + "discussion_comments": [ + { + "comment_id": "1371767924", + "repo_full_name": "chef/chef", + "pr_number": 14043, + "pr_file": "lib/chef/resource/chocolatey_installer.rb", + "discussion_id": "1371767924", + "commented_code": "@@ -0,0 +1,195 @@\n+class Chef\n+ class Resource\n+ class ChocolateyInstaller < Chef::Resource\n+ provides :chocolatey_installer\n+\n+ description \"Use the chocolatey_installer resource to ensure that Choco is installed to your specification. Use the Chocolatey Feature resource to customize your install\"\n+ introduced \"18.3\"\n+ examples <<~DOC\n+ **Install Chocolatey**\n+\n+ ```ruby\n+ chocolatey_installer 'latest' do\n+ action :install\n+ end\n+ ```\n+\n+ **Uninstall Chocolatey**\n+\n+ ```ruby\n+ chocolatey_installer 'Some random verbiage' do\n+ action :uninstall\n+ end\n+ ```\n+\n+ **Install Chocolatey with Parameters**\n+\n+ ```ruby\n+ chocolatey_installer 'latest' do\n+ action :install\n+ download_url \"https://www.contoso.com/foo\"\n+ chocolatey_version '2.12.24'\n+ end\n+ ```\n+\n+ **Upgrade Chocolatey with Parameters**\n+\n+ ```ruby\n+ chocolatey_installer 'latest' do\n+ action :upgrade\n+ chocolatey_version '2.12.24'\n+ end\n+ ```\n+ DOC\n+\n+ allowed_actions :install, :uninstall, :upgrade\n+\n+ property :download_url, String,\n+ description: \"The URL to download Chocolatey from. This defaults to the value of $env:ChocolateyDownloadUrl, if it is set, and otherwise falls back to the official Chocolatey community repository to download the Chocolatey package. It can be used for offline installation by providing a path to a Chocolatey.nupkg.\"\n+\n+ property :chocolatey_version, String,\n+ description: \"Specifies a target version of Chocolatey to install. By default, the latest stable version is installed. This will use the value in $env:ChocolateyVersion by default, if that environment variable is present. This parameter is ignored if download_url is set.\"\n+\n+ property :use_native_unzip, [TrueClass, FalseClass], default: false,\n+ description: \"If set, uses built-in Windows decompression tools instead of 7zip when unpacking the downloaded nupkg. This will be set by default if use_native_unzip is set to a value other than 'false' or '0'. This parameter will be ignored in PS 5+ in favour of using the Expand-Archive built in PowerShell cmdlet directly.\"\n+\n+ property :ignore_proxy, [TrueClass, FalseClass], default: false,\n+ description: \"If set, ignores any configured proxy. This will override any proxy environment variables or parameters. This will be set by default if ignore_proxy is set to a value other than 'false' or '0'.\"\n+\n+ property :proxy_url, String,\n+ description: \"Specifies the proxy URL to use during the download.\"\n+\n+ property :proxy_user, String,\n+ description: \"The username to use to build a proxy credential with. Will be consumed by the proxy_credential property if both this property and proxy_password are set\"\n+\n+ property :proxy_password, String,\n+ description: \"The password to use to build a proxy credential with. Will be consumed by the proxy_credential property if both this property and proxy_user are set\"\n+\n+ load_current_value do\n+ current_state = is_choco_installed?\n+ current_value_does_not_exist! if current_state == false\n+ current_state\n+ end\n+\n+ def is_choco_installed?\n+ ::File.exist?(\"#{ENV[\"ALLUSERSPROFILE\"]}\\\\chocolatey\\\\bin\\\\choco.exe\")\n+ end\n+\n+ def get_choco_version\n+ powershell_exec(\"choco --version\").result\n+ end\n+\n+ def existing_version\n+ Gem::Version.new(get_choco_version)\n+ end\n+\n+ def define_resource_requirements\n+ requirements.assert(:install, :upgrade).each do |a|\n+ a.assertion do\n+ (new_resource.proxy_user.nil? != new_resource.proxy_password.nil?)", + "comment_created_at": "2023-10-25T13:30:41+00:00", + "comment_author": "tpowell-progress", + "comment_body": "Go ahead and remove the parens and comment that we want an error if only one of (proxy_user, proxy_password) are true.", + "pr_file_module": null + }, + { + "comment_id": "1376479210", + "repo_full_name": "chef/chef", + "pr_number": 14043, + "pr_file": "lib/chef/resource/chocolatey_installer.rb", + "discussion_id": "1371767924", + "commented_code": "@@ -0,0 +1,195 @@\n+class Chef\n+ class Resource\n+ class ChocolateyInstaller < Chef::Resource\n+ provides :chocolatey_installer\n+\n+ description \"Use the chocolatey_installer resource to ensure that Choco is installed to your specification. Use the Chocolatey Feature resource to customize your install\"\n+ introduced \"18.3\"\n+ examples <<~DOC\n+ **Install Chocolatey**\n+\n+ ```ruby\n+ chocolatey_installer 'latest' do\n+ action :install\n+ end\n+ ```\n+\n+ **Uninstall Chocolatey**\n+\n+ ```ruby\n+ chocolatey_installer 'Some random verbiage' do\n+ action :uninstall\n+ end\n+ ```\n+\n+ **Install Chocolatey with Parameters**\n+\n+ ```ruby\n+ chocolatey_installer 'latest' do\n+ action :install\n+ download_url \"https://www.contoso.com/foo\"\n+ chocolatey_version '2.12.24'\n+ end\n+ ```\n+\n+ **Upgrade Chocolatey with Parameters**\n+\n+ ```ruby\n+ chocolatey_installer 'latest' do\n+ action :upgrade\n+ chocolatey_version '2.12.24'\n+ end\n+ ```\n+ DOC\n+\n+ allowed_actions :install, :uninstall, :upgrade\n+\n+ property :download_url, String,\n+ description: \"The URL to download Chocolatey from. This defaults to the value of $env:ChocolateyDownloadUrl, if it is set, and otherwise falls back to the official Chocolatey community repository to download the Chocolatey package. It can be used for offline installation by providing a path to a Chocolatey.nupkg.\"\n+\n+ property :chocolatey_version, String,\n+ description: \"Specifies a target version of Chocolatey to install. By default, the latest stable version is installed. This will use the value in $env:ChocolateyVersion by default, if that environment variable is present. This parameter is ignored if download_url is set.\"\n+\n+ property :use_native_unzip, [TrueClass, FalseClass], default: false,\n+ description: \"If set, uses built-in Windows decompression tools instead of 7zip when unpacking the downloaded nupkg. This will be set by default if use_native_unzip is set to a value other than 'false' or '0'. This parameter will be ignored in PS 5+ in favour of using the Expand-Archive built in PowerShell cmdlet directly.\"\n+\n+ property :ignore_proxy, [TrueClass, FalseClass], default: false,\n+ description: \"If set, ignores any configured proxy. This will override any proxy environment variables or parameters. This will be set by default if ignore_proxy is set to a value other than 'false' or '0'.\"\n+\n+ property :proxy_url, String,\n+ description: \"Specifies the proxy URL to use during the download.\"\n+\n+ property :proxy_user, String,\n+ description: \"The username to use to build a proxy credential with. Will be consumed by the proxy_credential property if both this property and proxy_password are set\"\n+\n+ property :proxy_password, String,\n+ description: \"The password to use to build a proxy credential with. Will be consumed by the proxy_credential property if both this property and proxy_user are set\"\n+\n+ load_current_value do\n+ current_state = is_choco_installed?\n+ current_value_does_not_exist! if current_state == false\n+ current_state\n+ end\n+\n+ def is_choco_installed?\n+ ::File.exist?(\"#{ENV[\"ALLUSERSPROFILE\"]}\\\\chocolatey\\\\bin\\\\choco.exe\")\n+ end\n+\n+ def get_choco_version\n+ powershell_exec(\"choco --version\").result\n+ end\n+\n+ def existing_version\n+ Gem::Version.new(get_choco_version)\n+ end\n+\n+ def define_resource_requirements\n+ requirements.assert(:install, :upgrade).each do |a|\n+ a.assertion do\n+ (new_resource.proxy_user.nil? != new_resource.proxy_password.nil?)", + "comment_created_at": "2023-10-30T16:10:54+00:00", + "comment_author": "johnmccrae", + "comment_body": "done", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1004881372", + "pr_number": 13107, + "pr_file": "lib/chef/chef_fs/file_system.rb", + "created_at": "2022-10-25T19:23:01+00:00", + "commented_code": "ui.output \"Would update #{dest_path}\" if ui\n else\n src_value = src_entry.read if src_value.nil?\n result[\"total\"] += 1\n dest_entry.write(src_value)\n result[\"success_count\"] += 1\n ui.output \"Updated #{dest_path}\" if ui\n end\n end\n end\n end\n end\n rescue RubyFileError => e\n # result[\"failed\"].append(username)", + "repo_full_name": "chef/chef", + "discussion_comments": [ + { + "comment_id": "1004881372", + "repo_full_name": "chef/chef", + "pr_number": 13107, + "pr_file": "lib/chef/chef_fs/file_system.rb", + "discussion_id": "1004881372", + "commented_code": "@@ -389,14 +391,17 @@ def copy_entries(src_entry, dest_entry, new_dest_parent, recurse_depth, options,\n ui.output \"Would update #{dest_path}\" if ui\n else\n src_value = src_entry.read if src_value.nil?\n+ result[\"total\"] += 1\n dest_entry.write(src_value)\n+ result[\"success_count\"] += 1\n ui.output \"Updated #{dest_path}\" if ui\n end\n end\n end\n end\n end\n rescue RubyFileError => e\n+ # result[\"failed\"].append(username)", + "comment_created_at": "2022-10-25T19:23:01+00:00", + "comment_author": "marcparadise", + "comment_body": "We should be capturing failures here and in the other exception cases, so that we can return failure information (file and error) as well. ", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "986222711", + "pr_number": 13223, + "pr_file": "lib/chef/provider/user/linux.rb", + "created_at": "2022-10-03T20:57:22+00:00", + "commented_code": "@change_desc << \"change #{user_attrib} from #{cur_val} to #{new_val}\"\n end\n end\n\n !@change_desc.empty?", + "repo_full_name": "chef/chef", + "discussion_comments": [ + { + "comment_id": "986222711", + "repo_full_name": "chef/chef", + "pr_number": 13223, + "pr_file": "lib/chef/provider/user/linux.rb", + "discussion_id": "986222711", + "commented_code": "@@ -37,6 +37,8 @@ def compare_user\n @change_desc << \"change #{user_attrib} from #{cur_val} to #{new_val}\"\n end\n end\n+\n+ !@change_desc.empty?", + "comment_created_at": "2022-10-03T20:57:22+00:00", + "comment_author": "marcparadise", + "comment_body": "This is a little opaque - this function relies on the fact that super also modifies `@change_desc`. That's internal to the base class though, and not something that we should rely on.\r\n\r\nCould we instead do something like: \r\n```\r\nuser_changed = super\r\n...\r\n-snip-\r\n...\r\nuser_changed || !change_desc.empty? \r\n```\r\n", + "pr_file_module": null + }, + { + "comment_id": "986726796", + "repo_full_name": "chef/chef", + "pr_number": 13223, + "pr_file": "lib/chef/provider/user/linux.rb", + "discussion_id": "986222711", + "commented_code": "@@ -37,6 +37,8 @@ def compare_user\n @change_desc << \"change #{user_attrib} from #{cur_val} to #{new_val}\"\n end\n end\n+\n+ !@change_desc.empty?", + "comment_created_at": "2022-10-04T11:01:54+00:00", + "comment_author": "fretb", + "comment_body": "Hi Marc, good input, I changed it.", + "pr_file_module": null + }, + { + "comment_id": "987630641", + "repo_full_name": "chef/chef", + "pr_number": 13223, + "pr_file": "lib/chef/provider/user/linux.rb", + "discussion_id": "986222711", + "commented_code": "@@ -37,6 +37,8 @@ def compare_user\n @change_desc << \"change #{user_attrib} from #{cur_val} to #{new_val}\"\n end\n end\n+\n+ !@change_desc.empty?", + "comment_created_at": "2022-10-05T07:52:13+00:00", + "comment_author": "Tensibai", + "comment_body": "In this case @change_desc should be initialized after the call to super too or the risk is that super didn't create the change_desc property and ` @change_desc << \"change #{user_attrib} from #{cur_val} to #{new_val}\"` will raise a no method error for << on nil.\r\n", + "pr_file_module": null + }, + { + "comment_id": "987720632", + "repo_full_name": "chef/chef", + "pr_number": 13223, + "pr_file": "lib/chef/provider/user/linux.rb", + "discussion_id": "986222711", + "commented_code": "@@ -37,6 +37,8 @@ def compare_user\n @change_desc << \"change #{user_attrib} from #{cur_val} to #{new_val}\"\n end\n end\n+\n+ !@change_desc.empty?", + "comment_created_at": "2022-10-05T09:29:20+00:00", + "comment_author": "Tensibai", + "comment_body": "Another approach could be as in the mac user provider to rewrite the whole method: https://github.com/chef/chef/blob/main/lib/chef/provider/user/mac.rb#L222-L234 ", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "377245838", + "pr_number": 8340, + "pr_file": "lib/chef/provider/cron.rb", + "created_at": "2020-02-10T18:43:32+00:00", + "commented_code": "end\n attr_accessor :cron_exists, :cron_empty\n\n def check_resource_semantics!\n return if Process.euid == 0\n\n etype = Chef::Exceptions::Cron\n begin\n nuid = Etc.getpwnam(new_resource.user).uid\n rescue ArgumentError\n emessage = \"Error in #{new_resource.name}, #{new_resource.user} does not exists on this system.\"\n events.provider_requirement_failed(new_resource.action, new_resource.name, etype, emessage)\n if why_run?\n events.whyrun_assumption(new_resource.action, new_resource.name, \"Assuming user #{new_resource.user} would have been acted upon.\")\n else\n raise etype, emessage\n end\n end\n return if nuid == Process.euid\n\n emessage = \"Error in #{new_resource.name}, Chef can't modify another user crontab when not running as root.\"\n events.provider_requirement_failed(new_resource.action, new_resource.name, etype, emessage)\n raise etype, emessage\n end", + "repo_full_name": "chef/chef", + "discussion_comments": [ + { + "comment_id": "377245838", + "repo_full_name": "chef/chef", + "pr_number": 8340, + "pr_file": "lib/chef/provider/cron.rb", + "discussion_id": "377245838", + "commented_code": "@@ -41,11 +41,34 @@ def initialize(new_resource, run_context)\n end\n attr_accessor :cron_exists, :cron_empty\n \n+ def check_resource_semantics!\n+ return if Process.euid == 0\n+\n+ etype = Chef::Exceptions::Cron\n+ begin\n+ nuid = Etc.getpwnam(new_resource.user).uid\n+ rescue ArgumentError\n+ emessage = \"Error in #{new_resource.name}, #{new_resource.user} does not exists on this system.\"\n+ events.provider_requirement_failed(new_resource.action, new_resource.name, etype, emessage)\n+ if why_run?\n+ events.whyrun_assumption(new_resource.action, new_resource.name, \"Assuming user #{new_resource.user} would have been acted upon.\")\n+ else\n+ raise etype, emessage\n+ end\n+ end\n+ return if nuid == Process.euid\n+\n+ emessage = \"Error in #{new_resource.name}, Chef can't modify another user crontab when not running as root.\"\n+ events.provider_requirement_failed(new_resource.action, new_resource.name, etype, emessage)\n+ raise etype, emessage\n+ end", + "comment_created_at": "2020-02-10T18:43:32+00:00", + "comment_author": "lamont-granquist", + "comment_body": "this should probably be moved to `defined_resource_requirements` and use the typical assertion syntax there. if the point is to protect load_current_resource from exploding, then LCR really needs to be made 'safe' for the cases when the user doesn't exist and/or the thing isn't being run as root (should just return a bunch of nil stuff for what it can't get at).\r\n\r\nwhat you're doing here with directly handling your own why run assertions is waaaaaay too complicated to follow for 99% of humans.", + "pr_file_module": null + }, + { + "comment_id": "377363770", + "repo_full_name": "chef/chef", + "pr_number": 8340, + "pr_file": "lib/chef/provider/cron.rb", + "discussion_id": "377245838", + "commented_code": "@@ -41,11 +41,34 @@ def initialize(new_resource, run_context)\n end\n attr_accessor :cron_exists, :cron_empty\n \n+ def check_resource_semantics!\n+ return if Process.euid == 0\n+\n+ etype = Chef::Exceptions::Cron\n+ begin\n+ nuid = Etc.getpwnam(new_resource.user).uid\n+ rescue ArgumentError\n+ emessage = \"Error in #{new_resource.name}, #{new_resource.user} does not exists on this system.\"\n+ events.provider_requirement_failed(new_resource.action, new_resource.name, etype, emessage)\n+ if why_run?\n+ events.whyrun_assumption(new_resource.action, new_resource.name, \"Assuming user #{new_resource.user} would have been acted upon.\")\n+ else\n+ raise etype, emessage\n+ end\n+ end\n+ return if nuid == Process.euid\n+\n+ emessage = \"Error in #{new_resource.name}, Chef can't modify another user crontab when not running as root.\"\n+ events.provider_requirement_failed(new_resource.action, new_resource.name, etype, emessage)\n+ raise etype, emessage\n+ end", + "comment_created_at": "2020-02-10T22:48:27+00:00", + "comment_author": "Tensibai", + "comment_body": "Well, it's starting to be quite old :) I think this was made on 13 codebase at first.\r\n\r\nI'll see to understand the define_resource_requirement then, any hint to bootstrap me? ", + "pr_file_module": null + }, + { + "comment_id": "377364666", + "repo_full_name": "chef/chef", + "pr_number": 8340, + "pr_file": "lib/chef/provider/cron.rb", + "discussion_id": "377245838", + "commented_code": "@@ -41,11 +41,34 @@ def initialize(new_resource, run_context)\n end\n attr_accessor :cron_exists, :cron_empty\n \n+ def check_resource_semantics!\n+ return if Process.euid == 0\n+\n+ etype = Chef::Exceptions::Cron\n+ begin\n+ nuid = Etc.getpwnam(new_resource.user).uid\n+ rescue ArgumentError\n+ emessage = \"Error in #{new_resource.name}, #{new_resource.user} does not exists on this system.\"\n+ events.provider_requirement_failed(new_resource.action, new_resource.name, etype, emessage)\n+ if why_run?\n+ events.whyrun_assumption(new_resource.action, new_resource.name, \"Assuming user #{new_resource.user} would have been acted upon.\")\n+ else\n+ raise etype, emessage\n+ end\n+ end\n+ return if nuid == Process.euid\n+\n+ emessage = \"Error in #{new_resource.name}, Chef can't modify another user crontab when not running as root.\"\n+ events.provider_requirement_failed(new_resource.action, new_resource.name, etype, emessage)\n+ raise etype, emessage\n+ end", + "comment_created_at": "2020-02-10T22:50:38+00:00", + "comment_author": "Tensibai", + "comment_body": "BTW, the goal is to fail the run with an informative message if the user doesn't exist as the resource can't do it's job. ", + "pr_file_module": null + }, + { + "comment_id": "694170633", + "repo_full_name": "chef/chef", + "pr_number": 8340, + "pr_file": "lib/chef/provider/cron.rb", + "discussion_id": "377245838", + "commented_code": "@@ -41,11 +41,34 @@ def initialize(new_resource, run_context)\n end\n attr_accessor :cron_exists, :cron_empty\n \n+ def check_resource_semantics!\n+ return if Process.euid == 0\n+\n+ etype = Chef::Exceptions::Cron\n+ begin\n+ nuid = Etc.getpwnam(new_resource.user).uid\n+ rescue ArgumentError\n+ emessage = \"Error in #{new_resource.name}, #{new_resource.user} does not exists on this system.\"\n+ events.provider_requirement_failed(new_resource.action, new_resource.name, etype, emessage)\n+ if why_run?\n+ events.whyrun_assumption(new_resource.action, new_resource.name, \"Assuming user #{new_resource.user} would have been acted upon.\")\n+ else\n+ raise etype, emessage\n+ end\n+ end\n+ return if nuid == Process.euid\n+\n+ emessage = \"Error in #{new_resource.name}, Chef can't modify another user crontab when not running as root.\"\n+ events.provider_requirement_failed(new_resource.action, new_resource.name, etype, emessage)\n+ raise etype, emessage\n+ end", + "comment_created_at": "2021-08-23T17:29:03+00:00", + "comment_author": "lamont-granquist", + "comment_body": "here's an example of trying to modify a group that doesn't exist, which is the closest i could find offhand:\r\n\r\nhttps://github.com/chef/chef/blob/master/lib/chef/provider/group.rb#L54-L59", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "755499124", + "pr_number": 11579, + "pr_file": "lib/chef/provider/mount/linux.rb", + "created_at": "2021-11-23T20:59:26+00:00", + "commented_code": "# \"findmnt\" outputs the mount points with volume.\n # Convert the mount_point of the resource to a real path in case it\n # contains symlinks in its parents dirs.\n def loop_mount_points\n # get loop_mount_points only if not initialized earlier\n @loop_mount_points ||= shell_out!(\"losetup -a\").stdout", + "repo_full_name": "chef/chef", + "discussion_comments": [ + { + "comment_id": "755499124", + "repo_full_name": "chef/chef", + "pr_number": 11579, + "pr_file": "lib/chef/provider/mount/linux.rb", + "discussion_id": "755499124", + "commented_code": "@@ -29,10 +29,13 @@ class Linux < Chef::Provider::Mount::Mount\n # \"findmnt\" outputs the mount points with volume.\n # Convert the mount_point of the resource to a real path in case it\n # contains symlinks in its parents dirs.\n+ def loop_mount_points\n+ # get loop_mount_points only if not initialized earlier\n+ @loop_mount_points ||= shell_out!(\"losetup -a\").stdout", + "comment_created_at": "2021-11-23T20:59:26+00:00", + "comment_author": "lamont-granquist", + "comment_body": "We should probably `rescue Errno::ENOENT` and then return empty string here so that processing can continue without matching if losetup doesn't exist.\r\n", + "pr_file_module": null + }, + { + "comment_id": "759195679", + "repo_full_name": "chef/chef", + "pr_number": 11579, + "pr_file": "lib/chef/provider/mount/linux.rb", + "discussion_id": "755499124", + "commented_code": "@@ -29,10 +29,13 @@ class Linux < Chef::Provider::Mount::Mount\n # \"findmnt\" outputs the mount points with volume.\n # Convert the mount_point of the resource to a real path in case it\n # contains symlinks in its parents dirs.\n+ def loop_mount_points\n+ # get loop_mount_points only if not initialized earlier\n+ @loop_mount_points ||= shell_out!(\"losetup -a\").stdout", + "comment_created_at": "2021-11-30T11:41:43+00:00", + "comment_author": "msys-sgarg", + "comment_body": "did the change please check", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "549525942", + "pr_number": 10776, + "pr_file": "lib/chef/provider/service/systemd.rb", + "created_at": "2020-12-29T00:21:18+00:00", + "commented_code": "end\n end\n\n def systemd_service_status\n # If the status has already been collected, return that\n return @service_status unless @service_status.nil?\n\n # Collect all the status information for a service and returns it at once\n options, args = get_systemctl_options_args\n s = shell_out!(\"#{systemctl_path} #{args} show -p UnitFileState -p ActiveState #{new_resource.service_name}\", options)\n # e.g. /bin/systemctl --system show -p UnitFileState -p ActiveState sshd.service\n # Returns something like:\n # ActiveState=active\n # UnitFileState=enabled\n status = {}\n s.stdout.each_line do |line|\n k, v = line.strip.split(\"=\")\n status[k] = v\n end\n # Assert requisite keys exist\n unless status.key?(\"UnitFileState\") && status.key?(\"ActiveState\")\n raise Mixlib::ShellOut::ShellCommandFailed, \"'#{systemctl_path} show' not reporting status for #{new_resource.service_name}!\"", + "repo_full_name": "chef/chef", + "discussion_comments": [ + { + "comment_id": "549525942", + "repo_full_name": "chef/chef", + "pr_number": 10776, + "pr_file": "lib/chef/provider/service/systemd.rb", + "discussion_id": "549525942", + "commented_code": "@@ -76,6 +76,31 @@ def define_resource_requirements\n end\n end\n \n+ def systemd_service_status\n+ # If the status has already been collected, return that\n+ return @service_status unless @service_status.nil?\n+\n+ # Collect all the status information for a service and returns it at once\n+ options, args = get_systemctl_options_args\n+ s = shell_out!(\"#{systemctl_path} #{args} show -p UnitFileState -p ActiveState #{new_resource.service_name}\", options)\n+ # e.g. /bin/systemctl --system show -p UnitFileState -p ActiveState sshd.service\n+ # Returns something like:\n+ # ActiveState=active\n+ # UnitFileState=enabled\n+ status = {}\n+ s.stdout.each_line do |line|\n+ k, v = line.strip.split(\"=\")\n+ status[k] = v\n+ end\n+ # Assert requisite keys exist\n+ unless status.key?(\"UnitFileState\") && status.key?(\"ActiveState\")\n+ raise Mixlib::ShellOut::ShellCommandFailed, \"'#{systemctl_path} show' not reporting status for #{new_resource.service_name}!\"", + "comment_created_at": "2020-12-29T00:21:18+00:00", + "comment_author": "phiggins", + "comment_body": "I don't think raising a Mixlib::Shellout exception here is the right thing to do. Using that exception class implies that the error came from Mixlib::Shellout which isn't the case. With no better option, a RuntimeException (ie, a bare call to `raise` is probably what you want here.", + "pr_file_module": null + }, + { + "comment_id": "549538933", + "repo_full_name": "chef/chef", + "pr_number": 10776, + "pr_file": "lib/chef/provider/service/systemd.rb", + "discussion_id": "549525942", + "commented_code": "@@ -76,6 +76,31 @@ def define_resource_requirements\n end\n end\n \n+ def systemd_service_status\n+ # If the status has already been collected, return that\n+ return @service_status unless @service_status.nil?\n+\n+ # Collect all the status information for a service and returns it at once\n+ options, args = get_systemctl_options_args\n+ s = shell_out!(\"#{systemctl_path} #{args} show -p UnitFileState -p ActiveState #{new_resource.service_name}\", options)\n+ # e.g. /bin/systemctl --system show -p UnitFileState -p ActiveState sshd.service\n+ # Returns something like:\n+ # ActiveState=active\n+ # UnitFileState=enabled\n+ status = {}\n+ s.stdout.each_line do |line|\n+ k, v = line.strip.split(\"=\")\n+ status[k] = v\n+ end\n+ # Assert requisite keys exist\n+ unless status.key?(\"UnitFileState\") && status.key?(\"ActiveState\")\n+ raise Mixlib::ShellOut::ShellCommandFailed, \"'#{systemctl_path} show' not reporting status for #{new_resource.service_name}!\"", + "comment_created_at": "2020-12-29T01:44:45+00:00", + "comment_author": "joshuamiller01", + "comment_body": "My intent here is to surface that the shellout to systemctl failed; if `RuntimeException` is the preference, that works for me.", + "pr_file_module": null + }, + { + "comment_id": "549540924", + "repo_full_name": "chef/chef", + "pr_number": 10776, + "pr_file": "lib/chef/provider/service/systemd.rb", + "discussion_id": "549525942", + "commented_code": "@@ -76,6 +76,31 @@ def define_resource_requirements\n end\n end\n \n+ def systemd_service_status\n+ # If the status has already been collected, return that\n+ return @service_status unless @service_status.nil?\n+\n+ # Collect all the status information for a service and returns it at once\n+ options, args = get_systemctl_options_args\n+ s = shell_out!(\"#{systemctl_path} #{args} show -p UnitFileState -p ActiveState #{new_resource.service_name}\", options)\n+ # e.g. /bin/systemctl --system show -p UnitFileState -p ActiveState sshd.service\n+ # Returns something like:\n+ # ActiveState=active\n+ # UnitFileState=enabled\n+ status = {}\n+ s.stdout.each_line do |line|\n+ k, v = line.strip.split(\"=\")\n+ status[k] = v\n+ end\n+ # Assert requisite keys exist\n+ unless status.key?(\"UnitFileState\") && status.key?(\"ActiveState\")\n+ raise Mixlib::ShellOut::ShellCommandFailed, \"'#{systemctl_path} show' not reporting status for #{new_resource.service_name}!\"", + "comment_created_at": "2020-12-29T01:57:02+00:00", + "comment_author": "phiggins", + "comment_body": "@tas50 @lamont-granquist thoughts on this?", + "pr_file_module": null + }, + { + "comment_id": "551562835", + "repo_full_name": "chef/chef", + "pr_number": 10776, + "pr_file": "lib/chef/provider/service/systemd.rb", + "discussion_id": "549525942", + "commented_code": "@@ -76,6 +76,31 @@ def define_resource_requirements\n end\n end\n \n+ def systemd_service_status\n+ # If the status has already been collected, return that\n+ return @service_status unless @service_status.nil?\n+\n+ # Collect all the status information for a service and returns it at once\n+ options, args = get_systemctl_options_args\n+ s = shell_out!(\"#{systemctl_path} #{args} show -p UnitFileState -p ActiveState #{new_resource.service_name}\", options)\n+ # e.g. /bin/systemctl --system show -p UnitFileState -p ActiveState sshd.service\n+ # Returns something like:\n+ # ActiveState=active\n+ # UnitFileState=enabled\n+ status = {}\n+ s.stdout.each_line do |line|\n+ k, v = line.strip.split(\"=\")\n+ status[k] = v\n+ end\n+ # Assert requisite keys exist\n+ unless status.key?(\"UnitFileState\") && status.key?(\"ActiveState\")\n+ raise Mixlib::ShellOut::ShellCommandFailed, \"'#{systemctl_path} show' not reporting status for #{new_resource.service_name}!\"", + "comment_created_at": "2021-01-04T20:54:02+00:00", + "comment_author": "joshuamiller01", + "comment_body": "I'm going to change this to `raise Chef::Exceptions::Service` per Lamont's suggestion", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "644955836", + "pr_number": 11241, + "pr_file": "knife/lib/chef/knife/client_create.rb", + "created_at": "2021-06-03T16:35:17+00:00", + "commented_code": "client.public_key File.read(File.expand_path(config[:public_key]))\n end\n\n # Check the file before creating the client so the api is more transactional.\n if config[:file]\n file = config[:file]\n\n unless File.writable?(File.dirname(file))\n ui.fatal \"Dir #{File.dirname(file)} is not writable. Check permissions.\"\n exit 1\n end\n\n unless File.writable?(file)", + "repo_full_name": "chef/chef", + "discussion_comments": [ + { + "comment_id": "644955836", + "repo_full_name": "chef/chef", + "pr_number": 11241, + "pr_file": "knife/lib/chef/knife/client_create.rb", + "discussion_id": "644955836", + "commented_code": "@@ -81,6 +81,21 @@ def run\n client.public_key File.read(File.expand_path(config[:public_key]))\n end\n \n+ # Check the file before creating the client so the api is more transactional.\n+ if config[:file]\n+ file = config[:file]\n+\n+ unless File.writable?(File.dirname(file))\n+ ui.fatal \"Dir #{File.dirname(file)} is not writable. Check permissions.\"\n+ exit 1\n+ end\n+\n+ unless File.writable?(file)", + "comment_created_at": "2021-06-03T16:35:17+00:00", + "comment_author": "marcparadise", + "comment_body": "If the file does not exist, the File.writable? will return false: \r\n\r\n![capture_036](https://user-images.githubusercontent.com/1130204/120680301-2321e980-c468-11eb-8d11-9f81b8fcdd04.png)", + "pr_file_module": null + }, + { + "comment_id": "645288328", + "repo_full_name": "chef/chef", + "pr_number": 11241, + "pr_file": "knife/lib/chef/knife/client_create.rb", + "discussion_id": "644955836", + "commented_code": "@@ -81,6 +81,21 @@ def run\n client.public_key File.read(File.expand_path(config[:public_key]))\n end\n \n+ # Check the file before creating the client so the api is more transactional.\n+ if config[:file]\n+ file = config[:file]\n+\n+ unless File.writable?(File.dirname(file))\n+ ui.fatal \"Dir #{File.dirname(file)} is not writable. Check permissions.\"\n+ exit 1\n+ end\n+\n+ unless File.writable?(file)", + "comment_created_at": "2021-06-04T04:55:44+00:00", + "comment_author": "snehaldwivedi", + "comment_body": "yes, if the file is writable then only it will return true else false. You can also go through documentation [here](https://ruby-doc.org/core-2.5.0/File.html). Now as per your example:\r\n1. ` File.writable?(\"does-not-exist\")` it has checked if the given file is writable or not and returns `false` as it doesn't exist.\r\n2. `File.write(\"does-not-exist\", \"blah\")` it returns 4. Here it will create a new file \"does-not-exist\" and add a content \"blah\" can returns length of the content.\r\n3. ` File.writable?(\"does-not-exist\")` now after 2nd point where the file is not created now ` File.writable?` is return `true`.", + "pr_file_module": null + }, + { + "comment_id": "655453068", + "repo_full_name": "chef/chef", + "pr_number": 11241, + "pr_file": "knife/lib/chef/knife/client_create.rb", + "discussion_id": "644955836", + "commented_code": "@@ -81,6 +81,21 @@ def run\n client.public_key File.read(File.expand_path(config[:public_key]))\n end\n \n+ # Check the file before creating the client so the api is more transactional.\n+ if config[:file]\n+ file = config[:file]\n+\n+ unless File.writable?(File.dirname(file))\n+ ui.fatal \"Dir #{File.dirname(file)} is not writable. Check permissions.\"\n+ exit 1\n+ end\n+\n+ unless File.writable?(file)", + "comment_created_at": "2021-06-21T14:57:09+00:00", + "comment_author": "marcparadise", + "comment_body": "My concern is less functional, and more user experience - it is correct that the directory is not writable and the action should fail. \r\n\r\nBut in the case where the directory for `/tmp/test/afile.out` does not exist, we will report an error that `Dir '/tmp/test/' is not writable. Check permissions`. This is misleading, and we could provide more actionable feedback than \"check permissions\". It would be better to specifically verify that the directory exists and report that error independently of permissions-related errors. ", + "pr_file_module": null + }, + { + "comment_id": "655869743", + "repo_full_name": "chef/chef", + "pr_number": 11241, + "pr_file": "knife/lib/chef/knife/client_create.rb", + "discussion_id": "644955836", + "commented_code": "@@ -81,6 +81,21 @@ def run\n client.public_key File.read(File.expand_path(config[:public_key]))\n end\n \n+ # Check the file before creating the client so the api is more transactional.\n+ if config[:file]\n+ file = config[:file]\n+\n+ unless File.writable?(File.dirname(file))\n+ ui.fatal \"Dir #{File.dirname(file)} is not writable. Check permissions.\"\n+ exit 1\n+ end\n+\n+ unless File.writable?(file)", + "comment_created_at": "2021-06-22T04:39:00+00:00", + "comment_author": "snehaldwivedi", + "comment_body": "@marcparadise Got your point. I Will update the condition accordingly. ", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/chef-fail-gracefully-always.md b/_reviewers/chef-fail-gracefully-always.md index 6b67cd1..bae2f0f 100644 --- a/_reviewers/chef-fail-gracefully-always.md +++ b/_reviewers/chef-fail-gracefully-always.md @@ -72,397 +72,3 @@ end ``` Following these patterns will create more resilient code that fails predictably, provides clear diagnostics, and properly manages resources even during failures. - - -[ - { - "discussion_id": "2110148468", - "pr_number": 15015, - "pr_file": "lib/chef/provider/cron.rb", - "created_at": "2025-05-27T20:24:45+00:00", - "commented_code": "write_exception = false\n\n tempname = Dir::Tmpname.create([\"crontab-\"]) {}\n TargetIO::File.open(tempname, \"w\") do |tempfile|\n io = TargetIO::File.open(tempname, \"w\") do |tempfile|\n tempfile.write(crontab)\n end\n\n tempname = io.path if ChefConfig::Config.target_mode?\n\n so = shell_out!(\"crontab -u #{new_resource.user} #{tempname}\")\n\n TargetIO::File.unlink(tempname)\n TargetIO::File.unlink(tempname) if ChefConfig::Config.target_mode?", - "repo_full_name": "chef/chef", - "discussion_comments": [ - { - "comment_id": "2110148468", - "repo_full_name": "chef/chef", - "pr_number": 15015, - "pr_file": "lib/chef/provider/cron.rb", - "discussion_id": "2110148468", - "commented_code": "@@ -206,13 +206,15 @@ def write_crontab(crontab)\n write_exception = false\n \n tempname = Dir::Tmpname.create([\"crontab-\"]) {}\n- TargetIO::File.open(tempname, \"w\") do |tempfile|\n+ io = TargetIO::File.open(tempname, \"w\") do |tempfile|\n tempfile.write(crontab)\n end\n \n+ tempname = io.path if ChefConfig::Config.target_mode?\n+\n so = shell_out!(\"crontab -u #{new_resource.user} #{tempname}\")\n \n- TargetIO::File.unlink(tempname)\n+ TargetIO::File.unlink(tempname) if ChefConfig::Config.target_mode?", - "comment_created_at": "2025-05-27T20:24:45+00:00", - "comment_author": "jaymzh", - "comment_body": "we still need to remove the local tmpfile, even if we're not in targetmode, just like we did before. ", - "pr_file_module": null - }, - { - "comment_id": "2115981160", - "repo_full_name": "chef/chef", - "pr_number": 15015, - "pr_file": "lib/chef/provider/cron.rb", - "discussion_id": "2110148468", - "commented_code": "@@ -206,13 +206,15 @@ def write_crontab(crontab)\n write_exception = false\n \n tempname = Dir::Tmpname.create([\"crontab-\"]) {}\n- TargetIO::File.open(tempname, \"w\") do |tempfile|\n+ io = TargetIO::File.open(tempname, \"w\") do |tempfile|\n tempfile.write(crontab)\n end\n \n+ tempname = io.path if ChefConfig::Config.target_mode?\n+\n so = shell_out!(\"crontab -u #{new_resource.user} #{tempname}\")\n \n- TargetIO::File.unlink(tempname)\n+ TargetIO::File.unlink(tempname) if ChefConfig::Config.target_mode?", - "comment_created_at": "2025-05-30T13:56:30+00:00", - "comment_author": "thheinen", - "comment_body": "Ouch! Good catch, thanks.", - "pr_file_module": null - }, - { - "comment_id": "2121703656", - "repo_full_name": "chef/chef", - "pr_number": 15015, - "pr_file": "lib/chef/provider/cron.rb", - "discussion_id": "2110148468", - "commented_code": "@@ -206,13 +206,15 @@ def write_crontab(crontab)\n write_exception = false\n \n tempname = Dir::Tmpname.create([\"crontab-\"]) {}\n- TargetIO::File.open(tempname, \"w\") do |tempfile|\n+ io = TargetIO::File.open(tempname, \"w\") do |tempfile|\n tempfile.write(crontab)\n end\n \n+ tempname = io.path if ChefConfig::Config.target_mode?\n+\n so = shell_out!(\"crontab -u #{new_resource.user} #{tempname}\")\n \n- TargetIO::File.unlink(tempname)\n+ TargetIO::File.unlink(tempname) if ChefConfig::Config.target_mode?", - "comment_created_at": "2025-06-02T16:56:12+00:00", - "comment_author": "johnmccrae", - "comment_body": "@thheinen is this updated per Phil's last comment, or can we merge it?", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1906074481", - "pr_number": 14770, - "pr_file": "lib/chef/resource/archive_file.rb", - "created_at": "2025-01-07T21:40:32+00:00", - "commented_code": "archive.each_entry do |e|\n FileUtils.chown(new_resource.owner, new_resource.group, \"#{new_resource.destination}/#{e.pathname}\")\n end\n archive.close", - "repo_full_name": "chef/chef", - "discussion_comments": [ - { - "comment_id": "1906074481", - "repo_full_name": "chef/chef", - "pr_number": 14770, - "pr_file": "lib/chef/resource/archive_file.rb", - "discussion_id": "1906074481", - "commented_code": "@@ -129,6 +129,7 @@ class ArchiveFile < Chef::Resource\n archive.each_entry do |e|\n FileUtils.chown(new_resource.owner, new_resource.group, \"#{new_resource.destination}/#{e.pathname}\")\n end\n+ archive.close", - "comment_created_at": "2025-01-07T21:40:32+00:00", - "comment_author": "tpowell-progress", - "comment_body": "Looks like [`open_filename`](https://github.com/chef/ffi-libarchive/blob/545d3948e7a78835da48737c1490516905fad2e6/lib/ffi-libarchive/writer.rb#L5) takes a block, and so this would be cleaner and automatically `ensure` closure if these calls were converted to that pattern.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1371767924", - "pr_number": 14043, - "pr_file": "lib/chef/resource/chocolatey_installer.rb", - "created_at": "2023-10-25T13:30:41+00:00", - "commented_code": "class Chef\n class Resource\n class ChocolateyInstaller < Chef::Resource\n provides :chocolatey_installer\n\n description \"Use the chocolatey_installer resource to ensure that Choco is installed to your specification. Use the Chocolatey Feature resource to customize your install\"\n introduced \"18.3\"\n examples <<~DOC\n **Install Chocolatey**\n\n ```ruby\n chocolatey_installer 'latest' do\n action :install\n end\n ```\n\n **Uninstall Chocolatey**\n\n ```ruby\n chocolatey_installer 'Some random verbiage' do\n action :uninstall\n end\n ```\n\n **Install Chocolatey with Parameters**\n\n ```ruby\n chocolatey_installer 'latest' do\n action :install\n download_url \"https://www.contoso.com/foo\"\n chocolatey_version '2.12.24'\n end\n ```\n\n **Upgrade Chocolatey with Parameters**\n\n ```ruby\n chocolatey_installer 'latest' do\n action :upgrade\n chocolatey_version '2.12.24'\n end\n ```\n DOC\n\n allowed_actions :install, :uninstall, :upgrade\n\n property :download_url, String,\n description: \"The URL to download Chocolatey from. This defaults to the value of $env:ChocolateyDownloadUrl, if it is set, and otherwise falls back to the official Chocolatey community repository to download the Chocolatey package. It can be used for offline installation by providing a path to a Chocolatey.nupkg.\"\n\n property :chocolatey_version, String,\n description: \"Specifies a target version of Chocolatey to install. By default, the latest stable version is installed. This will use the value in $env:ChocolateyVersion by default, if that environment variable is present. This parameter is ignored if download_url is set.\"\n\n property :use_native_unzip, [TrueClass, FalseClass], default: false,\n description: \"If set, uses built-in Windows decompression tools instead of 7zip when unpacking the downloaded nupkg. This will be set by default if use_native_unzip is set to a value other than 'false' or '0'. This parameter will be ignored in PS 5+ in favour of using the Expand-Archive built in PowerShell cmdlet directly.\"\n\n property :ignore_proxy, [TrueClass, FalseClass], default: false,\n description: \"If set, ignores any configured proxy. This will override any proxy environment variables or parameters. This will be set by default if ignore_proxy is set to a value other than 'false' or '0'.\"\n\n property :proxy_url, String,\n description: \"Specifies the proxy URL to use during the download.\"\n\n property :proxy_user, String,\n description: \"The username to use to build a proxy credential with. Will be consumed by the proxy_credential property if both this property and proxy_password are set\"\n\n property :proxy_password, String,\n description: \"The password to use to build a proxy credential with. Will be consumed by the proxy_credential property if both this property and proxy_user are set\"\n\n load_current_value do\n current_state = is_choco_installed?\n current_value_does_not_exist! if current_state == false\n current_state\n end\n\n def is_choco_installed?\n ::File.exist?(\"#{ENV[\"ALLUSERSPROFILE\"]}\\\\chocolatey\\\\bin\\\\choco.exe\")\n end\n\n def get_choco_version\n powershell_exec(\"choco --version\").result\n end\n\n def existing_version\n Gem::Version.new(get_choco_version)\n end\n\n def define_resource_requirements\n requirements.assert(:install, :upgrade).each do |a|\n a.assertion do\n (new_resource.proxy_user.nil? != new_resource.proxy_password.nil?)", - "repo_full_name": "chef/chef", - "discussion_comments": [ - { - "comment_id": "1371767924", - "repo_full_name": "chef/chef", - "pr_number": 14043, - "pr_file": "lib/chef/resource/chocolatey_installer.rb", - "discussion_id": "1371767924", - "commented_code": "@@ -0,0 +1,195 @@\n+class Chef\n+ class Resource\n+ class ChocolateyInstaller < Chef::Resource\n+ provides :chocolatey_installer\n+\n+ description \"Use the chocolatey_installer resource to ensure that Choco is installed to your specification. Use the Chocolatey Feature resource to customize your install\"\n+ introduced \"18.3\"\n+ examples <<~DOC\n+ **Install Chocolatey**\n+\n+ ```ruby\n+ chocolatey_installer 'latest' do\n+ action :install\n+ end\n+ ```\n+\n+ **Uninstall Chocolatey**\n+\n+ ```ruby\n+ chocolatey_installer 'Some random verbiage' do\n+ action :uninstall\n+ end\n+ ```\n+\n+ **Install Chocolatey with Parameters**\n+\n+ ```ruby\n+ chocolatey_installer 'latest' do\n+ action :install\n+ download_url \"https://www.contoso.com/foo\"\n+ chocolatey_version '2.12.24'\n+ end\n+ ```\n+\n+ **Upgrade Chocolatey with Parameters**\n+\n+ ```ruby\n+ chocolatey_installer 'latest' do\n+ action :upgrade\n+ chocolatey_version '2.12.24'\n+ end\n+ ```\n+ DOC\n+\n+ allowed_actions :install, :uninstall, :upgrade\n+\n+ property :download_url, String,\n+ description: \"The URL to download Chocolatey from. This defaults to the value of $env:ChocolateyDownloadUrl, if it is set, and otherwise falls back to the official Chocolatey community repository to download the Chocolatey package. It can be used for offline installation by providing a path to a Chocolatey.nupkg.\"\n+\n+ property :chocolatey_version, String,\n+ description: \"Specifies a target version of Chocolatey to install. By default, the latest stable version is installed. This will use the value in $env:ChocolateyVersion by default, if that environment variable is present. This parameter is ignored if download_url is set.\"\n+\n+ property :use_native_unzip, [TrueClass, FalseClass], default: false,\n+ description: \"If set, uses built-in Windows decompression tools instead of 7zip when unpacking the downloaded nupkg. This will be set by default if use_native_unzip is set to a value other than 'false' or '0'. This parameter will be ignored in PS 5+ in favour of using the Expand-Archive built in PowerShell cmdlet directly.\"\n+\n+ property :ignore_proxy, [TrueClass, FalseClass], default: false,\n+ description: \"If set, ignores any configured proxy. This will override any proxy environment variables or parameters. This will be set by default if ignore_proxy is set to a value other than 'false' or '0'.\"\n+\n+ property :proxy_url, String,\n+ description: \"Specifies the proxy URL to use during the download.\"\n+\n+ property :proxy_user, String,\n+ description: \"The username to use to build a proxy credential with. Will be consumed by the proxy_credential property if both this property and proxy_password are set\"\n+\n+ property :proxy_password, String,\n+ description: \"The password to use to build a proxy credential with. Will be consumed by the proxy_credential property if both this property and proxy_user are set\"\n+\n+ load_current_value do\n+ current_state = is_choco_installed?\n+ current_value_does_not_exist! if current_state == false\n+ current_state\n+ end\n+\n+ def is_choco_installed?\n+ ::File.exist?(\"#{ENV[\"ALLUSERSPROFILE\"]}\\\\chocolatey\\\\bin\\\\choco.exe\")\n+ end\n+\n+ def get_choco_version\n+ powershell_exec(\"choco --version\").result\n+ end\n+\n+ def existing_version\n+ Gem::Version.new(get_choco_version)\n+ end\n+\n+ def define_resource_requirements\n+ requirements.assert(:install, :upgrade).each do |a|\n+ a.assertion do\n+ (new_resource.proxy_user.nil? != new_resource.proxy_password.nil?)", - "comment_created_at": "2023-10-25T13:30:41+00:00", - "comment_author": "tpowell-progress", - "comment_body": "Go ahead and remove the parens and comment that we want an error if only one of (proxy_user, proxy_password) are true.", - "pr_file_module": null - }, - { - "comment_id": "1376479210", - "repo_full_name": "chef/chef", - "pr_number": 14043, - "pr_file": "lib/chef/resource/chocolatey_installer.rb", - "discussion_id": "1371767924", - "commented_code": "@@ -0,0 +1,195 @@\n+class Chef\n+ class Resource\n+ class ChocolateyInstaller < Chef::Resource\n+ provides :chocolatey_installer\n+\n+ description \"Use the chocolatey_installer resource to ensure that Choco is installed to your specification. Use the Chocolatey Feature resource to customize your install\"\n+ introduced \"18.3\"\n+ examples <<~DOC\n+ **Install Chocolatey**\n+\n+ ```ruby\n+ chocolatey_installer 'latest' do\n+ action :install\n+ end\n+ ```\n+\n+ **Uninstall Chocolatey**\n+\n+ ```ruby\n+ chocolatey_installer 'Some random verbiage' do\n+ action :uninstall\n+ end\n+ ```\n+\n+ **Install Chocolatey with Parameters**\n+\n+ ```ruby\n+ chocolatey_installer 'latest' do\n+ action :install\n+ download_url \"https://www.contoso.com/foo\"\n+ chocolatey_version '2.12.24'\n+ end\n+ ```\n+\n+ **Upgrade Chocolatey with Parameters**\n+\n+ ```ruby\n+ chocolatey_installer 'latest' do\n+ action :upgrade\n+ chocolatey_version '2.12.24'\n+ end\n+ ```\n+ DOC\n+\n+ allowed_actions :install, :uninstall, :upgrade\n+\n+ property :download_url, String,\n+ description: \"The URL to download Chocolatey from. This defaults to the value of $env:ChocolateyDownloadUrl, if it is set, and otherwise falls back to the official Chocolatey community repository to download the Chocolatey package. It can be used for offline installation by providing a path to a Chocolatey.nupkg.\"\n+\n+ property :chocolatey_version, String,\n+ description: \"Specifies a target version of Chocolatey to install. By default, the latest stable version is installed. This will use the value in $env:ChocolateyVersion by default, if that environment variable is present. This parameter is ignored if download_url is set.\"\n+\n+ property :use_native_unzip, [TrueClass, FalseClass], default: false,\n+ description: \"If set, uses built-in Windows decompression tools instead of 7zip when unpacking the downloaded nupkg. This will be set by default if use_native_unzip is set to a value other than 'false' or '0'. This parameter will be ignored in PS 5+ in favour of using the Expand-Archive built in PowerShell cmdlet directly.\"\n+\n+ property :ignore_proxy, [TrueClass, FalseClass], default: false,\n+ description: \"If set, ignores any configured proxy. This will override any proxy environment variables or parameters. This will be set by default if ignore_proxy is set to a value other than 'false' or '0'.\"\n+\n+ property :proxy_url, String,\n+ description: \"Specifies the proxy URL to use during the download.\"\n+\n+ property :proxy_user, String,\n+ description: \"The username to use to build a proxy credential with. Will be consumed by the proxy_credential property if both this property and proxy_password are set\"\n+\n+ property :proxy_password, String,\n+ description: \"The password to use to build a proxy credential with. Will be consumed by the proxy_credential property if both this property and proxy_user are set\"\n+\n+ load_current_value do\n+ current_state = is_choco_installed?\n+ current_value_does_not_exist! if current_state == false\n+ current_state\n+ end\n+\n+ def is_choco_installed?\n+ ::File.exist?(\"#{ENV[\"ALLUSERSPROFILE\"]}\\\\chocolatey\\\\bin\\\\choco.exe\")\n+ end\n+\n+ def get_choco_version\n+ powershell_exec(\"choco --version\").result\n+ end\n+\n+ def existing_version\n+ Gem::Version.new(get_choco_version)\n+ end\n+\n+ def define_resource_requirements\n+ requirements.assert(:install, :upgrade).each do |a|\n+ a.assertion do\n+ (new_resource.proxy_user.nil? != new_resource.proxy_password.nil?)", - "comment_created_at": "2023-10-30T16:10:54+00:00", - "comment_author": "johnmccrae", - "comment_body": "done", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1004881372", - "pr_number": 13107, - "pr_file": "lib/chef/chef_fs/file_system.rb", - "created_at": "2022-10-25T19:23:01+00:00", - "commented_code": "ui.output \"Would update #{dest_path}\" if ui\n else\n src_value = src_entry.read if src_value.nil?\n result[\"total\"] += 1\n dest_entry.write(src_value)\n result[\"success_count\"] += 1\n ui.output \"Updated #{dest_path}\" if ui\n end\n end\n end\n end\n end\n rescue RubyFileError => e\n # result[\"failed\"].append(username)", - "repo_full_name": "chef/chef", - "discussion_comments": [ - { - "comment_id": "1004881372", - "repo_full_name": "chef/chef", - "pr_number": 13107, - "pr_file": "lib/chef/chef_fs/file_system.rb", - "discussion_id": "1004881372", - "commented_code": "@@ -389,14 +391,17 @@ def copy_entries(src_entry, dest_entry, new_dest_parent, recurse_depth, options,\n ui.output \"Would update #{dest_path}\" if ui\n else\n src_value = src_entry.read if src_value.nil?\n+ result[\"total\"] += 1\n dest_entry.write(src_value)\n+ result[\"success_count\"] += 1\n ui.output \"Updated #{dest_path}\" if ui\n end\n end\n end\n end\n end\n rescue RubyFileError => e\n+ # result[\"failed\"].append(username)", - "comment_created_at": "2022-10-25T19:23:01+00:00", - "comment_author": "marcparadise", - "comment_body": "We should be capturing failures here and in the other exception cases, so that we can return failure information (file and error) as well. ", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "986222711", - "pr_number": 13223, - "pr_file": "lib/chef/provider/user/linux.rb", - "created_at": "2022-10-03T20:57:22+00:00", - "commented_code": "@change_desc << \"change #{user_attrib} from #{cur_val} to #{new_val}\"\n end\n end\n\n !@change_desc.empty?", - "repo_full_name": "chef/chef", - "discussion_comments": [ - { - "comment_id": "986222711", - "repo_full_name": "chef/chef", - "pr_number": 13223, - "pr_file": "lib/chef/provider/user/linux.rb", - "discussion_id": "986222711", - "commented_code": "@@ -37,6 +37,8 @@ def compare_user\n @change_desc << \"change #{user_attrib} from #{cur_val} to #{new_val}\"\n end\n end\n+\n+ !@change_desc.empty?", - "comment_created_at": "2022-10-03T20:57:22+00:00", - "comment_author": "marcparadise", - "comment_body": "This is a little opaque - this function relies on the fact that super also modifies `@change_desc`. That's internal to the base class though, and not something that we should rely on.\r\n\r\nCould we instead do something like: \r\n```\r\nuser_changed = super\r\n...\r\n-snip-\r\n...\r\nuser_changed || !change_desc.empty? \r\n```\r\n", - "pr_file_module": null - }, - { - "comment_id": "986726796", - "repo_full_name": "chef/chef", - "pr_number": 13223, - "pr_file": "lib/chef/provider/user/linux.rb", - "discussion_id": "986222711", - "commented_code": "@@ -37,6 +37,8 @@ def compare_user\n @change_desc << \"change #{user_attrib} from #{cur_val} to #{new_val}\"\n end\n end\n+\n+ !@change_desc.empty?", - "comment_created_at": "2022-10-04T11:01:54+00:00", - "comment_author": "fretb", - "comment_body": "Hi Marc, good input, I changed it.", - "pr_file_module": null - }, - { - "comment_id": "987630641", - "repo_full_name": "chef/chef", - "pr_number": 13223, - "pr_file": "lib/chef/provider/user/linux.rb", - "discussion_id": "986222711", - "commented_code": "@@ -37,6 +37,8 @@ def compare_user\n @change_desc << \"change #{user_attrib} from #{cur_val} to #{new_val}\"\n end\n end\n+\n+ !@change_desc.empty?", - "comment_created_at": "2022-10-05T07:52:13+00:00", - "comment_author": "Tensibai", - "comment_body": "In this case @change_desc should be initialized after the call to super too or the risk is that super didn't create the change_desc property and ` @change_desc << \"change #{user_attrib} from #{cur_val} to #{new_val}\"` will raise a no method error for << on nil.\r\n", - "pr_file_module": null - }, - { - "comment_id": "987720632", - "repo_full_name": "chef/chef", - "pr_number": 13223, - "pr_file": "lib/chef/provider/user/linux.rb", - "discussion_id": "986222711", - "commented_code": "@@ -37,6 +37,8 @@ def compare_user\n @change_desc << \"change #{user_attrib} from #{cur_val} to #{new_val}\"\n end\n end\n+\n+ !@change_desc.empty?", - "comment_created_at": "2022-10-05T09:29:20+00:00", - "comment_author": "Tensibai", - "comment_body": "Another approach could be as in the mac user provider to rewrite the whole method: https://github.com/chef/chef/blob/main/lib/chef/provider/user/mac.rb#L222-L234 ", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "377245838", - "pr_number": 8340, - "pr_file": "lib/chef/provider/cron.rb", - "created_at": "2020-02-10T18:43:32+00:00", - "commented_code": "end\n attr_accessor :cron_exists, :cron_empty\n\n def check_resource_semantics!\n return if Process.euid == 0\n\n etype = Chef::Exceptions::Cron\n begin\n nuid = Etc.getpwnam(new_resource.user).uid\n rescue ArgumentError\n emessage = \"Error in #{new_resource.name}, #{new_resource.user} does not exists on this system.\"\n events.provider_requirement_failed(new_resource.action, new_resource.name, etype, emessage)\n if why_run?\n events.whyrun_assumption(new_resource.action, new_resource.name, \"Assuming user #{new_resource.user} would have been acted upon.\")\n else\n raise etype, emessage\n end\n end\n return if nuid == Process.euid\n\n emessage = \"Error in #{new_resource.name}, Chef can't modify another user crontab when not running as root.\"\n events.provider_requirement_failed(new_resource.action, new_resource.name, etype, emessage)\n raise etype, emessage\n end", - "repo_full_name": "chef/chef", - "discussion_comments": [ - { - "comment_id": "377245838", - "repo_full_name": "chef/chef", - "pr_number": 8340, - "pr_file": "lib/chef/provider/cron.rb", - "discussion_id": "377245838", - "commented_code": "@@ -41,11 +41,34 @@ def initialize(new_resource, run_context)\n end\n attr_accessor :cron_exists, :cron_empty\n \n+ def check_resource_semantics!\n+ return if Process.euid == 0\n+\n+ etype = Chef::Exceptions::Cron\n+ begin\n+ nuid = Etc.getpwnam(new_resource.user).uid\n+ rescue ArgumentError\n+ emessage = \"Error in #{new_resource.name}, #{new_resource.user} does not exists on this system.\"\n+ events.provider_requirement_failed(new_resource.action, new_resource.name, etype, emessage)\n+ if why_run?\n+ events.whyrun_assumption(new_resource.action, new_resource.name, \"Assuming user #{new_resource.user} would have been acted upon.\")\n+ else\n+ raise etype, emessage\n+ end\n+ end\n+ return if nuid == Process.euid\n+\n+ emessage = \"Error in #{new_resource.name}, Chef can't modify another user crontab when not running as root.\"\n+ events.provider_requirement_failed(new_resource.action, new_resource.name, etype, emessage)\n+ raise etype, emessage\n+ end", - "comment_created_at": "2020-02-10T18:43:32+00:00", - "comment_author": "lamont-granquist", - "comment_body": "this should probably be moved to `defined_resource_requirements` and use the typical assertion syntax there. if the point is to protect load_current_resource from exploding, then LCR really needs to be made 'safe' for the cases when the user doesn't exist and/or the thing isn't being run as root (should just return a bunch of nil stuff for what it can't get at).\r\n\r\nwhat you're doing here with directly handling your own why run assertions is waaaaaay too complicated to follow for 99% of humans.", - "pr_file_module": null - }, - { - "comment_id": "377363770", - "repo_full_name": "chef/chef", - "pr_number": 8340, - "pr_file": "lib/chef/provider/cron.rb", - "discussion_id": "377245838", - "commented_code": "@@ -41,11 +41,34 @@ def initialize(new_resource, run_context)\n end\n attr_accessor :cron_exists, :cron_empty\n \n+ def check_resource_semantics!\n+ return if Process.euid == 0\n+\n+ etype = Chef::Exceptions::Cron\n+ begin\n+ nuid = Etc.getpwnam(new_resource.user).uid\n+ rescue ArgumentError\n+ emessage = \"Error in #{new_resource.name}, #{new_resource.user} does not exists on this system.\"\n+ events.provider_requirement_failed(new_resource.action, new_resource.name, etype, emessage)\n+ if why_run?\n+ events.whyrun_assumption(new_resource.action, new_resource.name, \"Assuming user #{new_resource.user} would have been acted upon.\")\n+ else\n+ raise etype, emessage\n+ end\n+ end\n+ return if nuid == Process.euid\n+\n+ emessage = \"Error in #{new_resource.name}, Chef can't modify another user crontab when not running as root.\"\n+ events.provider_requirement_failed(new_resource.action, new_resource.name, etype, emessage)\n+ raise etype, emessage\n+ end", - "comment_created_at": "2020-02-10T22:48:27+00:00", - "comment_author": "Tensibai", - "comment_body": "Well, it's starting to be quite old :) I think this was made on 13 codebase at first.\r\n\r\nI'll see to understand the define_resource_requirement then, any hint to bootstrap me? ", - "pr_file_module": null - }, - { - "comment_id": "377364666", - "repo_full_name": "chef/chef", - "pr_number": 8340, - "pr_file": "lib/chef/provider/cron.rb", - "discussion_id": "377245838", - "commented_code": "@@ -41,11 +41,34 @@ def initialize(new_resource, run_context)\n end\n attr_accessor :cron_exists, :cron_empty\n \n+ def check_resource_semantics!\n+ return if Process.euid == 0\n+\n+ etype = Chef::Exceptions::Cron\n+ begin\n+ nuid = Etc.getpwnam(new_resource.user).uid\n+ rescue ArgumentError\n+ emessage = \"Error in #{new_resource.name}, #{new_resource.user} does not exists on this system.\"\n+ events.provider_requirement_failed(new_resource.action, new_resource.name, etype, emessage)\n+ if why_run?\n+ events.whyrun_assumption(new_resource.action, new_resource.name, \"Assuming user #{new_resource.user} would have been acted upon.\")\n+ else\n+ raise etype, emessage\n+ end\n+ end\n+ return if nuid == Process.euid\n+\n+ emessage = \"Error in #{new_resource.name}, Chef can't modify another user crontab when not running as root.\"\n+ events.provider_requirement_failed(new_resource.action, new_resource.name, etype, emessage)\n+ raise etype, emessage\n+ end", - "comment_created_at": "2020-02-10T22:50:38+00:00", - "comment_author": "Tensibai", - "comment_body": "BTW, the goal is to fail the run with an informative message if the user doesn't exist as the resource can't do it's job. ", - "pr_file_module": null - }, - { - "comment_id": "694170633", - "repo_full_name": "chef/chef", - "pr_number": 8340, - "pr_file": "lib/chef/provider/cron.rb", - "discussion_id": "377245838", - "commented_code": "@@ -41,11 +41,34 @@ def initialize(new_resource, run_context)\n end\n attr_accessor :cron_exists, :cron_empty\n \n+ def check_resource_semantics!\n+ return if Process.euid == 0\n+\n+ etype = Chef::Exceptions::Cron\n+ begin\n+ nuid = Etc.getpwnam(new_resource.user).uid\n+ rescue ArgumentError\n+ emessage = \"Error in #{new_resource.name}, #{new_resource.user} does not exists on this system.\"\n+ events.provider_requirement_failed(new_resource.action, new_resource.name, etype, emessage)\n+ if why_run?\n+ events.whyrun_assumption(new_resource.action, new_resource.name, \"Assuming user #{new_resource.user} would have been acted upon.\")\n+ else\n+ raise etype, emessage\n+ end\n+ end\n+ return if nuid == Process.euid\n+\n+ emessage = \"Error in #{new_resource.name}, Chef can't modify another user crontab when not running as root.\"\n+ events.provider_requirement_failed(new_resource.action, new_resource.name, etype, emessage)\n+ raise etype, emessage\n+ end", - "comment_created_at": "2021-08-23T17:29:03+00:00", - "comment_author": "lamont-granquist", - "comment_body": "here's an example of trying to modify a group that doesn't exist, which is the closest i could find offhand:\r\n\r\nhttps://github.com/chef/chef/blob/master/lib/chef/provider/group.rb#L54-L59", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "755499124", - "pr_number": 11579, - "pr_file": "lib/chef/provider/mount/linux.rb", - "created_at": "2021-11-23T20:59:26+00:00", - "commented_code": "# \"findmnt\" outputs the mount points with volume.\n # Convert the mount_point of the resource to a real path in case it\n # contains symlinks in its parents dirs.\n def loop_mount_points\n # get loop_mount_points only if not initialized earlier\n @loop_mount_points ||= shell_out!(\"losetup -a\").stdout", - "repo_full_name": "chef/chef", - "discussion_comments": [ - { - "comment_id": "755499124", - "repo_full_name": "chef/chef", - "pr_number": 11579, - "pr_file": "lib/chef/provider/mount/linux.rb", - "discussion_id": "755499124", - "commented_code": "@@ -29,10 +29,13 @@ class Linux < Chef::Provider::Mount::Mount\n # \"findmnt\" outputs the mount points with volume.\n # Convert the mount_point of the resource to a real path in case it\n # contains symlinks in its parents dirs.\n+ def loop_mount_points\n+ # get loop_mount_points only if not initialized earlier\n+ @loop_mount_points ||= shell_out!(\"losetup -a\").stdout", - "comment_created_at": "2021-11-23T20:59:26+00:00", - "comment_author": "lamont-granquist", - "comment_body": "We should probably `rescue Errno::ENOENT` and then return empty string here so that processing can continue without matching if losetup doesn't exist.\r\n", - "pr_file_module": null - }, - { - "comment_id": "759195679", - "repo_full_name": "chef/chef", - "pr_number": 11579, - "pr_file": "lib/chef/provider/mount/linux.rb", - "discussion_id": "755499124", - "commented_code": "@@ -29,10 +29,13 @@ class Linux < Chef::Provider::Mount::Mount\n # \"findmnt\" outputs the mount points with volume.\n # Convert the mount_point of the resource to a real path in case it\n # contains symlinks in its parents dirs.\n+ def loop_mount_points\n+ # get loop_mount_points only if not initialized earlier\n+ @loop_mount_points ||= shell_out!(\"losetup -a\").stdout", - "comment_created_at": "2021-11-30T11:41:43+00:00", - "comment_author": "msys-sgarg", - "comment_body": "did the change please check", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "549525942", - "pr_number": 10776, - "pr_file": "lib/chef/provider/service/systemd.rb", - "created_at": "2020-12-29T00:21:18+00:00", - "commented_code": "end\n end\n\n def systemd_service_status\n # If the status has already been collected, return that\n return @service_status unless @service_status.nil?\n\n # Collect all the status information for a service and returns it at once\n options, args = get_systemctl_options_args\n s = shell_out!(\"#{systemctl_path} #{args} show -p UnitFileState -p ActiveState #{new_resource.service_name}\", options)\n # e.g. /bin/systemctl --system show -p UnitFileState -p ActiveState sshd.service\n # Returns something like:\n # ActiveState=active\n # UnitFileState=enabled\n status = {}\n s.stdout.each_line do |line|\n k, v = line.strip.split(\"=\")\n status[k] = v\n end\n # Assert requisite keys exist\n unless status.key?(\"UnitFileState\") && status.key?(\"ActiveState\")\n raise Mixlib::ShellOut::ShellCommandFailed, \"'#{systemctl_path} show' not reporting status for #{new_resource.service_name}!\"", - "repo_full_name": "chef/chef", - "discussion_comments": [ - { - "comment_id": "549525942", - "repo_full_name": "chef/chef", - "pr_number": 10776, - "pr_file": "lib/chef/provider/service/systemd.rb", - "discussion_id": "549525942", - "commented_code": "@@ -76,6 +76,31 @@ def define_resource_requirements\n end\n end\n \n+ def systemd_service_status\n+ # If the status has already been collected, return that\n+ return @service_status unless @service_status.nil?\n+\n+ # Collect all the status information for a service and returns it at once\n+ options, args = get_systemctl_options_args\n+ s = shell_out!(\"#{systemctl_path} #{args} show -p UnitFileState -p ActiveState #{new_resource.service_name}\", options)\n+ # e.g. /bin/systemctl --system show -p UnitFileState -p ActiveState sshd.service\n+ # Returns something like:\n+ # ActiveState=active\n+ # UnitFileState=enabled\n+ status = {}\n+ s.stdout.each_line do |line|\n+ k, v = line.strip.split(\"=\")\n+ status[k] = v\n+ end\n+ # Assert requisite keys exist\n+ unless status.key?(\"UnitFileState\") && status.key?(\"ActiveState\")\n+ raise Mixlib::ShellOut::ShellCommandFailed, \"'#{systemctl_path} show' not reporting status for #{new_resource.service_name}!\"", - "comment_created_at": "2020-12-29T00:21:18+00:00", - "comment_author": "phiggins", - "comment_body": "I don't think raising a Mixlib::Shellout exception here is the right thing to do. Using that exception class implies that the error came from Mixlib::Shellout which isn't the case. With no better option, a RuntimeException (ie, a bare call to `raise` is probably what you want here.", - "pr_file_module": null - }, - { - "comment_id": "549538933", - "repo_full_name": "chef/chef", - "pr_number": 10776, - "pr_file": "lib/chef/provider/service/systemd.rb", - "discussion_id": "549525942", - "commented_code": "@@ -76,6 +76,31 @@ def define_resource_requirements\n end\n end\n \n+ def systemd_service_status\n+ # If the status has already been collected, return that\n+ return @service_status unless @service_status.nil?\n+\n+ # Collect all the status information for a service and returns it at once\n+ options, args = get_systemctl_options_args\n+ s = shell_out!(\"#{systemctl_path} #{args} show -p UnitFileState -p ActiveState #{new_resource.service_name}\", options)\n+ # e.g. /bin/systemctl --system show -p UnitFileState -p ActiveState sshd.service\n+ # Returns something like:\n+ # ActiveState=active\n+ # UnitFileState=enabled\n+ status = {}\n+ s.stdout.each_line do |line|\n+ k, v = line.strip.split(\"=\")\n+ status[k] = v\n+ end\n+ # Assert requisite keys exist\n+ unless status.key?(\"UnitFileState\") && status.key?(\"ActiveState\")\n+ raise Mixlib::ShellOut::ShellCommandFailed, \"'#{systemctl_path} show' not reporting status for #{new_resource.service_name}!\"", - "comment_created_at": "2020-12-29T01:44:45+00:00", - "comment_author": "joshuamiller01", - "comment_body": "My intent here is to surface that the shellout to systemctl failed; if `RuntimeException` is the preference, that works for me.", - "pr_file_module": null - }, - { - "comment_id": "549540924", - "repo_full_name": "chef/chef", - "pr_number": 10776, - "pr_file": "lib/chef/provider/service/systemd.rb", - "discussion_id": "549525942", - "commented_code": "@@ -76,6 +76,31 @@ def define_resource_requirements\n end\n end\n \n+ def systemd_service_status\n+ # If the status has already been collected, return that\n+ return @service_status unless @service_status.nil?\n+\n+ # Collect all the status information for a service and returns it at once\n+ options, args = get_systemctl_options_args\n+ s = shell_out!(\"#{systemctl_path} #{args} show -p UnitFileState -p ActiveState #{new_resource.service_name}\", options)\n+ # e.g. /bin/systemctl --system show -p UnitFileState -p ActiveState sshd.service\n+ # Returns something like:\n+ # ActiveState=active\n+ # UnitFileState=enabled\n+ status = {}\n+ s.stdout.each_line do |line|\n+ k, v = line.strip.split(\"=\")\n+ status[k] = v\n+ end\n+ # Assert requisite keys exist\n+ unless status.key?(\"UnitFileState\") && status.key?(\"ActiveState\")\n+ raise Mixlib::ShellOut::ShellCommandFailed, \"'#{systemctl_path} show' not reporting status for #{new_resource.service_name}!\"", - "comment_created_at": "2020-12-29T01:57:02+00:00", - "comment_author": "phiggins", - "comment_body": "@tas50 @lamont-granquist thoughts on this?", - "pr_file_module": null - }, - { - "comment_id": "551562835", - "repo_full_name": "chef/chef", - "pr_number": 10776, - "pr_file": "lib/chef/provider/service/systemd.rb", - "discussion_id": "549525942", - "commented_code": "@@ -76,6 +76,31 @@ def define_resource_requirements\n end\n end\n \n+ def systemd_service_status\n+ # If the status has already been collected, return that\n+ return @service_status unless @service_status.nil?\n+\n+ # Collect all the status information for a service and returns it at once\n+ options, args = get_systemctl_options_args\n+ s = shell_out!(\"#{systemctl_path} #{args} show -p UnitFileState -p ActiveState #{new_resource.service_name}\", options)\n+ # e.g. /bin/systemctl --system show -p UnitFileState -p ActiveState sshd.service\n+ # Returns something like:\n+ # ActiveState=active\n+ # UnitFileState=enabled\n+ status = {}\n+ s.stdout.each_line do |line|\n+ k, v = line.strip.split(\"=\")\n+ status[k] = v\n+ end\n+ # Assert requisite keys exist\n+ unless status.key?(\"UnitFileState\") && status.key?(\"ActiveState\")\n+ raise Mixlib::ShellOut::ShellCommandFailed, \"'#{systemctl_path} show' not reporting status for #{new_resource.service_name}!\"", - "comment_created_at": "2021-01-04T20:54:02+00:00", - "comment_author": "joshuamiller01", - "comment_body": "I'm going to change this to `raise Chef::Exceptions::Service` per Lamont's suggestion", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "644955836", - "pr_number": 11241, - "pr_file": "knife/lib/chef/knife/client_create.rb", - "created_at": "2021-06-03T16:35:17+00:00", - "commented_code": "client.public_key File.read(File.expand_path(config[:public_key]))\n end\n\n # Check the file before creating the client so the api is more transactional.\n if config[:file]\n file = config[:file]\n\n unless File.writable?(File.dirname(file))\n ui.fatal \"Dir #{File.dirname(file)} is not writable. Check permissions.\"\n exit 1\n end\n\n unless File.writable?(file)", - "repo_full_name": "chef/chef", - "discussion_comments": [ - { - "comment_id": "644955836", - "repo_full_name": "chef/chef", - "pr_number": 11241, - "pr_file": "knife/lib/chef/knife/client_create.rb", - "discussion_id": "644955836", - "commented_code": "@@ -81,6 +81,21 @@ def run\n client.public_key File.read(File.expand_path(config[:public_key]))\n end\n \n+ # Check the file before creating the client so the api is more transactional.\n+ if config[:file]\n+ file = config[:file]\n+\n+ unless File.writable?(File.dirname(file))\n+ ui.fatal \"Dir #{File.dirname(file)} is not writable. Check permissions.\"\n+ exit 1\n+ end\n+\n+ unless File.writable?(file)", - "comment_created_at": "2021-06-03T16:35:17+00:00", - "comment_author": "marcparadise", - "comment_body": "If the file does not exist, the File.writable? will return false: \r\n\r\n![capture_036](https://user-images.githubusercontent.com/1130204/120680301-2321e980-c468-11eb-8d11-9f81b8fcdd04.png)", - "pr_file_module": null - }, - { - "comment_id": "645288328", - "repo_full_name": "chef/chef", - "pr_number": 11241, - "pr_file": "knife/lib/chef/knife/client_create.rb", - "discussion_id": "644955836", - "commented_code": "@@ -81,6 +81,21 @@ def run\n client.public_key File.read(File.expand_path(config[:public_key]))\n end\n \n+ # Check the file before creating the client so the api is more transactional.\n+ if config[:file]\n+ file = config[:file]\n+\n+ unless File.writable?(File.dirname(file))\n+ ui.fatal \"Dir #{File.dirname(file)} is not writable. Check permissions.\"\n+ exit 1\n+ end\n+\n+ unless File.writable?(file)", - "comment_created_at": "2021-06-04T04:55:44+00:00", - "comment_author": "snehaldwivedi", - "comment_body": "yes, if the file is writable then only it will return true else false. You can also go through documentation [here](https://ruby-doc.org/core-2.5.0/File.html). Now as per your example:\r\n1. ` File.writable?(\"does-not-exist\")` it has checked if the given file is writable or not and returns `false` as it doesn't exist.\r\n2. `File.write(\"does-not-exist\", \"blah\")` it returns 4. Here it will create a new file \"does-not-exist\" and add a content \"blah\" can returns length of the content.\r\n3. ` File.writable?(\"does-not-exist\")` now after 2nd point where the file is not created now ` File.writable?` is return `true`.", - "pr_file_module": null - }, - { - "comment_id": "655453068", - "repo_full_name": "chef/chef", - "pr_number": 11241, - "pr_file": "knife/lib/chef/knife/client_create.rb", - "discussion_id": "644955836", - "commented_code": "@@ -81,6 +81,21 @@ def run\n client.public_key File.read(File.expand_path(config[:public_key]))\n end\n \n+ # Check the file before creating the client so the api is more transactional.\n+ if config[:file]\n+ file = config[:file]\n+\n+ unless File.writable?(File.dirname(file))\n+ ui.fatal \"Dir #{File.dirname(file)} is not writable. Check permissions.\"\n+ exit 1\n+ end\n+\n+ unless File.writable?(file)", - "comment_created_at": "2021-06-21T14:57:09+00:00", - "comment_author": "marcparadise", - "comment_body": "My concern is less functional, and more user experience - it is correct that the directory is not writable and the action should fail. \r\n\r\nBut in the case where the directory for `/tmp/test/afile.out` does not exist, we will report an error that `Dir '/tmp/test/' is not writable. Check permissions`. This is misleading, and we could provide more actionable feedback than \"check permissions\". It would be better to specifically verify that the directory exists and report that error independently of permissions-related errors. ", - "pr_file_module": null - }, - { - "comment_id": "655869743", - "repo_full_name": "chef/chef", - "pr_number": 11241, - "pr_file": "knife/lib/chef/knife/client_create.rb", - "discussion_id": "644955836", - "commented_code": "@@ -81,6 +81,21 @@ def run\n client.public_key File.read(File.expand_path(config[:public_key]))\n end\n \n+ # Check the file before creating the client so the api is more transactional.\n+ if config[:file]\n+ file = config[:file]\n+\n+ unless File.writable?(File.dirname(file))\n+ ui.fatal \"Dir #{File.dirname(file)} is not writable. Check permissions.\"\n+ exit 1\n+ end\n+\n+ unless File.writable?(file)", - "comment_created_at": "2021-06-22T04:39:00+00:00", - "comment_author": "snehaldwivedi", - "comment_body": "@marcparadise Got your point. I Will update the condition accordingly. ", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/chef-keep-actions-versions-current.json b/_reviewers/chef-keep-actions-versions-current.json new file mode 100644 index 0000000..a16b9e2 --- /dev/null +++ b/_reviewers/chef-keep-actions-versions-current.json @@ -0,0 +1,158 @@ +[ + { + "discussion_id": "1344871327", + "pr_number": 13982, + "pr_file": ".github/workflows/lint.yml", + "created_at": "2023-10-03T23:31:21+00:00", + "commented_code": "file-url: 'https://raw.githubusercontent.com/chef/chef_dictionary/main/chef.txt'\n file-name: 'chef_dictionary.txt'\n - uses: streetsidesoftware/cspell-action@v2.12.0\n\n danger:\n runs-on: ubuntu-latest\n permissions:\n contents: read\n pull-requests: write\n steps:\n - uses: actions/checkout@v2", + "repo_full_name": "chef/chef", + "discussion_comments": [ + { + "comment_id": "1344871327", + "repo_full_name": "chef/chef", + "pr_number": 13982, + "pr_file": ".github/workflows/lint.yml", + "discussion_id": "1344871327", + "commented_code": "@@ -33,3 +33,31 @@ jobs:\n file-url: 'https://raw.githubusercontent.com/chef/chef_dictionary/main/chef.txt'\n file-name: 'chef_dictionary.txt'\n - uses: streetsidesoftware/cspell-action@v2.12.0\n+\n+ danger:\n+ runs-on: ubuntu-latest\n+ permissions:\n+ contents: read\n+ pull-requests: write\n+ steps:\n+ - uses: actions/checkout@v2", + "comment_created_at": "2023-10-03T23:31:21+00:00", + "comment_author": "Stromweld", + "comment_body": "```suggestion\r\n - uses: actions/checkout@v4\r\n```", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1345029660", + "pr_number": 13983, + "pr_file": ".github/workflows/sonarqube.yml", + "created_at": "2023-10-04T00:59:58+00:00", + "commented_code": "jobs:\n sonarqube:\n # this only works on branch-based PRs\n if: ${{ !github.event.pull_request.head.repo.fork }}\n runs-on: ip-range-controlled\n steps:\n - uses: actions/checkout@v3", + "repo_full_name": "chef/chef", + "discussion_comments": [ + { + "comment_id": "1345029660", + "repo_full_name": "chef/chef", + "pr_number": 13983, + "pr_file": ".github/workflows/sonarqube.yml", + "discussion_id": "1345029660", + "commented_code": "@@ -12,6 +12,8 @@ on:\n \n jobs:\n sonarqube:\n+ # this only works on branch-based PRs\n+ if: ${{ !github.event.pull_request.head.repo.fork }}\n runs-on: ip-range-controlled\n steps:\n - uses: actions/checkout@v3", + "comment_created_at": "2023-10-04T00:59:58+00:00", + "comment_author": "Stromweld", + "comment_body": "```suggestion\r\n - uses: actions/checkout@v4\r\n```", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1039940703", + "pr_number": 13401, + "pr_file": ".github/workflows/kitchen.yml", + "created_at": "2022-12-05T18:29:53+00:00", + "commented_code": "strategy:\n fail-fast: false\n matrix:\n os: [macos-10.15] # macos-11.0 is not public for now\n os: [macos-11] # Github has deprecated macos-10.15 runner https://github.com/actions/runner-images/issues/5583\n runs-on: ${{ matrix.os }}\n steps:\n - uses: actions/checkout@v2", + "repo_full_name": "chef/chef", + "discussion_comments": [ + { + "comment_id": "1039940703", + "repo_full_name": "chef/chef", + "pr_number": 13401, + "pr_file": ".github/workflows/kitchen.yml", + "discussion_id": "1039940703", + "commented_code": "@@ -56,7 +56,7 @@ jobs:\n strategy:\n fail-fast: false\n matrix:\n- os: [macos-10.15] # macos-11.0 is not public for now\n+ os: [macos-11] # Github has deprecated macos-10.15 runner https://github.com/actions/runner-images/issues/5583\n runs-on: ${{ matrix.os }}\n steps:\n - uses: actions/checkout@v2", + "comment_created_at": "2022-12-05T18:29:53+00:00", + "comment_author": "Stromweld", + "comment_body": "should also bump actions/checkout to v3 since v2 uses deprecated version of nodejs and generates warnings.\r\n```suggestion\r\n - uses: actions/checkout@v3\r\n```", + "pr_file_module": null + }, + { + "comment_id": "1054310800", + "repo_full_name": "chef/chef", + "pr_number": 13401, + "pr_file": ".github/workflows/kitchen.yml", + "discussion_id": "1039940703", + "commented_code": "@@ -56,7 +56,7 @@ jobs:\n strategy:\n fail-fast: false\n matrix:\n- os: [macos-10.15] # macos-11.0 is not public for now\n+ os: [macos-11] # Github has deprecated macos-10.15 runner https://github.com/actions/runner-images/issues/5583\n runs-on: ${{ matrix.os }}\n steps:\n - uses: actions/checkout@v2", + "comment_created_at": "2022-12-21T12:01:04+00:00", + "comment_author": "neha-p6", + "comment_body": "done", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "844300973", + "pr_number": 12769, + "pr_file": ".github/workflows/func_spec.yml", + "created_at": "2022-04-06T19:07:49+00:00", + "commented_code": "strategy:\n fail-fast: false\n matrix:\n os: [windows-2019, windows-2016]\n os: [windows-2019, windows-2022]", + "repo_full_name": "chef/chef", + "discussion_comments": [ + { + "comment_id": "844300973", + "repo_full_name": "chef/chef", + "pr_number": 12769, + "pr_file": ".github/workflows/func_spec.yml", + "discussion_id": "844300973", + "commented_code": "@@ -12,7 +12,7 @@ jobs:\n strategy:\n fail-fast: false\n matrix:\n- os: [windows-2019, windows-2016]\n+ os: [windows-2019, windows-2022]", + "comment_created_at": "2022-04-06T19:07:49+00:00", + "comment_author": "PrajaktaPurohit", + "comment_body": "Were we testing on windows-16 for some specific reason?", + "pr_file_module": null + }, + { + "comment_id": "844303127", + "repo_full_name": "chef/chef", + "pr_number": 12769, + "pr_file": ".github/workflows/func_spec.yml", + "discussion_id": "844300973", + "commented_code": "@@ -12,7 +12,7 @@ jobs:\n strategy:\n fail-fast: false\n matrix:\n- os: [windows-2019, windows-2016]\n+ os: [windows-2019, windows-2022]", + "comment_created_at": "2022-04-06T19:10:29+00:00", + "comment_author": "johnmccrae", + "comment_body": "Windows Server 2016 is no longer supported as a test platform in Github Workflows as of the 1st of this month. ", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "634167413", + "pr_number": 11119, + "pr_file": ".github/workflows/kitchen.yml", + "created_at": "2021-05-18T08:37:06+00:00", + "commented_code": "runs-on: ${{ matrix.os }}\n steps:\n - uses: actions/checkout@v2\n - name: 'Install Chef/Ohai from Omnitruck'\n id: install_chef\n run: |\n . { Invoke-WebRequest -useb https://omnitruck.chef.io/install.ps1 } | Invoke-Expression; Install-Project -project chef -channel current\n $env:PATH = \"C:\\opscode\\chef\\bin;C:\\opscode\\chef\\embedded\\bin;\" + $env:PATH\n chef-client -v\n ohai -v\n rake --version\n bundle -v\n - name: Install Infra Client from current channel\n uses: actionshub/chef-install@master", + "repo_full_name": "chef/chef", + "discussion_comments": [ + { + "comment_id": "634167413", + "repo_full_name": "chef/chef", + "pr_number": 11119, + "pr_file": ".github/workflows/kitchen.yml", + "discussion_id": "634167413", + "commented_code": "@@ -16,15 +16,10 @@ jobs:\n runs-on: ${{ matrix.os }}\n steps:\n - uses: actions/checkout@v2\n- - name: 'Install Chef/Ohai from Omnitruck'\n- id: install_chef\n- run: |\n- . { Invoke-WebRequest -useb https://omnitruck.chef.io/install.ps1 } | Invoke-Expression; Install-Project -project chef -channel current\n- $env:PATH = \"C:\\opscode\\chef\\bin;C:\\opscode\\chef\\embedded\\bin;\" + $env:PATH\n- chef-client -v\n- ohai -v\n- rake --version\n- bundle -v\n+ - name: Install Infra Client from current channel\n+ uses: actionshub/chef-install@master", + "comment_created_at": "2021-05-18T08:37:06+00:00", + "comment_author": "damacus", + "comment_body": "```suggestion\r\n uses: actionshub/chef-install@main\r\n```\r\n\r\nactionshub has switched to the main branch now and @Xorima is looking to remove the master branch shortly 👍🏼 ", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "634172563", + "pr_number": 11119, + "pr_file": ".github/workflows/kitchen.yml", + "created_at": "2021-05-18T08:43:47+00:00", + "commented_code": "runs-on: ${{ matrix.os }}\n steps:\n - uses: actions/checkout@v2\n - name: 'Install Chef/Ohai from Omnitruck'\n id: install_chef\n run: |\n brew install coreutils\n curl -L https://omnitruck.chef.io/install.sh | sudo bash -s -- -c current\n /opt/chef/bin/chef-client -v\n /opt/chef/bin/ohai -v\n /opt/chef/embedded/bin/rake --version\n /opt/chef/embedded/bin/bundle -v\n - name: Install Infra Client from current channel\n uses: actionshub/chef-install@master", + "repo_full_name": "chef/chef", + "discussion_comments": [ + { + "comment_id": "634172563", + "repo_full_name": "chef/chef", + "pr_number": 11119, + "pr_file": ".github/workflows/kitchen.yml", + "discussion_id": "634172563", + "commented_code": "@@ -60,15 +50,10 @@ jobs:\n runs-on: ${{ matrix.os }}\n steps:\n - uses: actions/checkout@v2\n- - name: 'Install Chef/Ohai from Omnitruck'\n- id: install_chef\n- run: |\n- brew install coreutils\n- curl -L https://omnitruck.chef.io/install.sh | sudo bash -s -- -c current\n- /opt/chef/bin/chef-client -v\n- /opt/chef/bin/ohai -v\n- /opt/chef/embedded/bin/rake --version\n- /opt/chef/embedded/bin/bundle -v\n+ - name: Install Infra Client from current channel\n+ uses: actionshub/chef-install@master", + "comment_created_at": "2021-05-18T08:43:47+00:00", + "comment_author": "xorima", + "comment_body": "```suggestion\r\n uses: actionshub/chef-install@main\r\n```", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/chef-keep-actions-versions-current.md b/_reviewers/chef-keep-actions-versions-current.md index d6ee64b..1947e35 100644 --- a/_reviewers/chef-keep-actions-versions-current.md +++ b/_reviewers/chef-keep-actions-versions-current.md @@ -43,163 +43,3 @@ Always use the latest stable versions of GitHub Actions components in CI/CD work ``` Regular updates reduce security risks, ensure compatibility with GitHub's evolving infrastructure, and prevent build failures when older components are deprecated or removed. Create a recurring task to audit and update actions versions across your workflows. - - -[ - { - "discussion_id": "1344871327", - "pr_number": 13982, - "pr_file": ".github/workflows/lint.yml", - "created_at": "2023-10-03T23:31:21+00:00", - "commented_code": "file-url: 'https://raw.githubusercontent.com/chef/chef_dictionary/main/chef.txt'\n file-name: 'chef_dictionary.txt'\n - uses: streetsidesoftware/cspell-action@v2.12.0\n\n danger:\n runs-on: ubuntu-latest\n permissions:\n contents: read\n pull-requests: write\n steps:\n - uses: actions/checkout@v2", - "repo_full_name": "chef/chef", - "discussion_comments": [ - { - "comment_id": "1344871327", - "repo_full_name": "chef/chef", - "pr_number": 13982, - "pr_file": ".github/workflows/lint.yml", - "discussion_id": "1344871327", - "commented_code": "@@ -33,3 +33,31 @@ jobs:\n file-url: 'https://raw.githubusercontent.com/chef/chef_dictionary/main/chef.txt'\n file-name: 'chef_dictionary.txt'\n - uses: streetsidesoftware/cspell-action@v2.12.0\n+\n+ danger:\n+ runs-on: ubuntu-latest\n+ permissions:\n+ contents: read\n+ pull-requests: write\n+ steps:\n+ - uses: actions/checkout@v2", - "comment_created_at": "2023-10-03T23:31:21+00:00", - "comment_author": "Stromweld", - "comment_body": "```suggestion\r\n - uses: actions/checkout@v4\r\n```", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1345029660", - "pr_number": 13983, - "pr_file": ".github/workflows/sonarqube.yml", - "created_at": "2023-10-04T00:59:58+00:00", - "commented_code": "jobs:\n sonarqube:\n # this only works on branch-based PRs\n if: ${{ !github.event.pull_request.head.repo.fork }}\n runs-on: ip-range-controlled\n steps:\n - uses: actions/checkout@v3", - "repo_full_name": "chef/chef", - "discussion_comments": [ - { - "comment_id": "1345029660", - "repo_full_name": "chef/chef", - "pr_number": 13983, - "pr_file": ".github/workflows/sonarqube.yml", - "discussion_id": "1345029660", - "commented_code": "@@ -12,6 +12,8 @@ on:\n \n jobs:\n sonarqube:\n+ # this only works on branch-based PRs\n+ if: ${{ !github.event.pull_request.head.repo.fork }}\n runs-on: ip-range-controlled\n steps:\n - uses: actions/checkout@v3", - "comment_created_at": "2023-10-04T00:59:58+00:00", - "comment_author": "Stromweld", - "comment_body": "```suggestion\r\n - uses: actions/checkout@v4\r\n```", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1039940703", - "pr_number": 13401, - "pr_file": ".github/workflows/kitchen.yml", - "created_at": "2022-12-05T18:29:53+00:00", - "commented_code": "strategy:\n fail-fast: false\n matrix:\n os: [macos-10.15] # macos-11.0 is not public for now\n os: [macos-11] # Github has deprecated macos-10.15 runner https://github.com/actions/runner-images/issues/5583\n runs-on: ${{ matrix.os }}\n steps:\n - uses: actions/checkout@v2", - "repo_full_name": "chef/chef", - "discussion_comments": [ - { - "comment_id": "1039940703", - "repo_full_name": "chef/chef", - "pr_number": 13401, - "pr_file": ".github/workflows/kitchen.yml", - "discussion_id": "1039940703", - "commented_code": "@@ -56,7 +56,7 @@ jobs:\n strategy:\n fail-fast: false\n matrix:\n- os: [macos-10.15] # macos-11.0 is not public for now\n+ os: [macos-11] # Github has deprecated macos-10.15 runner https://github.com/actions/runner-images/issues/5583\n runs-on: ${{ matrix.os }}\n steps:\n - uses: actions/checkout@v2", - "comment_created_at": "2022-12-05T18:29:53+00:00", - "comment_author": "Stromweld", - "comment_body": "should also bump actions/checkout to v3 since v2 uses deprecated version of nodejs and generates warnings.\r\n```suggestion\r\n - uses: actions/checkout@v3\r\n```", - "pr_file_module": null - }, - { - "comment_id": "1054310800", - "repo_full_name": "chef/chef", - "pr_number": 13401, - "pr_file": ".github/workflows/kitchen.yml", - "discussion_id": "1039940703", - "commented_code": "@@ -56,7 +56,7 @@ jobs:\n strategy:\n fail-fast: false\n matrix:\n- os: [macos-10.15] # macos-11.0 is not public for now\n+ os: [macos-11] # Github has deprecated macos-10.15 runner https://github.com/actions/runner-images/issues/5583\n runs-on: ${{ matrix.os }}\n steps:\n - uses: actions/checkout@v2", - "comment_created_at": "2022-12-21T12:01:04+00:00", - "comment_author": "neha-p6", - "comment_body": "done", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "844300973", - "pr_number": 12769, - "pr_file": ".github/workflows/func_spec.yml", - "created_at": "2022-04-06T19:07:49+00:00", - "commented_code": "strategy:\n fail-fast: false\n matrix:\n os: [windows-2019, windows-2016]\n os: [windows-2019, windows-2022]", - "repo_full_name": "chef/chef", - "discussion_comments": [ - { - "comment_id": "844300973", - "repo_full_name": "chef/chef", - "pr_number": 12769, - "pr_file": ".github/workflows/func_spec.yml", - "discussion_id": "844300973", - "commented_code": "@@ -12,7 +12,7 @@ jobs:\n strategy:\n fail-fast: false\n matrix:\n- os: [windows-2019, windows-2016]\n+ os: [windows-2019, windows-2022]", - "comment_created_at": "2022-04-06T19:07:49+00:00", - "comment_author": "PrajaktaPurohit", - "comment_body": "Were we testing on windows-16 for some specific reason?", - "pr_file_module": null - }, - { - "comment_id": "844303127", - "repo_full_name": "chef/chef", - "pr_number": 12769, - "pr_file": ".github/workflows/func_spec.yml", - "discussion_id": "844300973", - "commented_code": "@@ -12,7 +12,7 @@ jobs:\n strategy:\n fail-fast: false\n matrix:\n- os: [windows-2019, windows-2016]\n+ os: [windows-2019, windows-2022]", - "comment_created_at": "2022-04-06T19:10:29+00:00", - "comment_author": "johnmccrae", - "comment_body": "Windows Server 2016 is no longer supported as a test platform in Github Workflows as of the 1st of this month. ", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "634167413", - "pr_number": 11119, - "pr_file": ".github/workflows/kitchen.yml", - "created_at": "2021-05-18T08:37:06+00:00", - "commented_code": "runs-on: ${{ matrix.os }}\n steps:\n - uses: actions/checkout@v2\n - name: 'Install Chef/Ohai from Omnitruck'\n id: install_chef\n run: |\n . { Invoke-WebRequest -useb https://omnitruck.chef.io/install.ps1 } | Invoke-Expression; Install-Project -project chef -channel current\n $env:PATH = \"C:\\opscode\\chef\\bin;C:\\opscode\\chef\\embedded\\bin;\" + $env:PATH\n chef-client -v\n ohai -v\n rake --version\n bundle -v\n - name: Install Infra Client from current channel\n uses: actionshub/chef-install@master", - "repo_full_name": "chef/chef", - "discussion_comments": [ - { - "comment_id": "634167413", - "repo_full_name": "chef/chef", - "pr_number": 11119, - "pr_file": ".github/workflows/kitchen.yml", - "discussion_id": "634167413", - "commented_code": "@@ -16,15 +16,10 @@ jobs:\n runs-on: ${{ matrix.os }}\n steps:\n - uses: actions/checkout@v2\n- - name: 'Install Chef/Ohai from Omnitruck'\n- id: install_chef\n- run: |\n- . { Invoke-WebRequest -useb https://omnitruck.chef.io/install.ps1 } | Invoke-Expression; Install-Project -project chef -channel current\n- $env:PATH = \"C:\\opscode\\chef\\bin;C:\\opscode\\chef\\embedded\\bin;\" + $env:PATH\n- chef-client -v\n- ohai -v\n- rake --version\n- bundle -v\n+ - name: Install Infra Client from current channel\n+ uses: actionshub/chef-install@master", - "comment_created_at": "2021-05-18T08:37:06+00:00", - "comment_author": "damacus", - "comment_body": "```suggestion\r\n uses: actionshub/chef-install@main\r\n```\r\n\r\nactionshub has switched to the main branch now and @Xorima is looking to remove the master branch shortly \ud83d\udc4d\ud83c\udffc ", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "634172563", - "pr_number": 11119, - "pr_file": ".github/workflows/kitchen.yml", - "created_at": "2021-05-18T08:43:47+00:00", - "commented_code": "runs-on: ${{ matrix.os }}\n steps:\n - uses: actions/checkout@v2\n - name: 'Install Chef/Ohai from Omnitruck'\n id: install_chef\n run: |\n brew install coreutils\n curl -L https://omnitruck.chef.io/install.sh | sudo bash -s -- -c current\n /opt/chef/bin/chef-client -v\n /opt/chef/bin/ohai -v\n /opt/chef/embedded/bin/rake --version\n /opt/chef/embedded/bin/bundle -v\n - name: Install Infra Client from current channel\n uses: actionshub/chef-install@master", - "repo_full_name": "chef/chef", - "discussion_comments": [ - { - "comment_id": "634172563", - "repo_full_name": "chef/chef", - "pr_number": 11119, - "pr_file": ".github/workflows/kitchen.yml", - "discussion_id": "634172563", - "commented_code": "@@ -60,15 +50,10 @@ jobs:\n runs-on: ${{ matrix.os }}\n steps:\n - uses: actions/checkout@v2\n- - name: 'Install Chef/Ohai from Omnitruck'\n- id: install_chef\n- run: |\n- brew install coreutils\n- curl -L https://omnitruck.chef.io/install.sh | sudo bash -s -- -c current\n- /opt/chef/bin/chef-client -v\n- /opt/chef/bin/ohai -v\n- /opt/chef/embedded/bin/rake --version\n- /opt/chef/embedded/bin/bundle -v\n+ - name: Install Infra Client from current channel\n+ uses: actionshub/chef-install@master", - "comment_created_at": "2021-05-18T08:43:47+00:00", - "comment_author": "xorima", - "comment_body": "```suggestion\r\n uses: actionshub/chef-install@main\r\n```", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/chef-limit-token-permissions.json b/_reviewers/chef-limit-token-permissions.json new file mode 100644 index 0000000..80eb27f --- /dev/null +++ b/_reviewers/chef-limit-token-permissions.json @@ -0,0 +1,266 @@ +[ + { + "discussion_id": "2178132199", + "pr_number": 15076, + "pr_file": ".github/workflows/verify.yml", + "created_at": "2025-07-01T17:09:07+00:00", + "commented_code": "name: Verify\n\non:\n push:\n branches: [main, feature/github-actions-migration] # Add your temp branch here\n pull_request:\n branches: [main]\n\nenv:\n CHEF_LICENSE_SERVER: http://hosted-license-service-lb-8000-606952349.us-west-2.elb.amazonaws.com:8000/\n CHEF_FOUNDATION_VERSION: ${{ vars.CHEF_FOUNDATION_VERSION || '3.2.12' }}\n CHEF_LICENSE: accept-no-persist\n\njobs:\n linux-matrix:\n name: ${{ matrix.test_type }} - ${{ matrix.os }}\n runs-on: ubuntu-22.04\n timeout-minutes: 60\n strategy:\n fail-fast: false\n matrix:\n os: [rocky-8, rocky-9, rhel-9, debian-9, ubuntu-2004]\n test_type: [Unit, Integration, Functional]\n container:\n image: chefes/omnibus-toolchain-${{ matrix.os }}:3.0.36\n options: --privileged --user root\n steps:\n - name: Check out code\n uses: actions/checkout@v4\n\n - name: Container prep\n run: .expeditor/scripts/bk_container_prep.sh\n\n - name: Run ${{ matrix.test_type }} tests\n run: .expeditor/scripts/prep_and_run_tests.sh ${{ matrix.test_type }}\n env:\n CHEF_LICENSE_SERVER: ${{ env.CHEF_LICENSE_SERVER }}\n\n windows-tests:", + "repo_full_name": "chef/chef", + "discussion_comments": [ + { + "comment_id": "2178132199", + "repo_full_name": "chef/chef", + "pr_number": 15076, + "pr_file": ".github/workflows/verify.yml", + "discussion_id": "2178132199", + "commented_code": "@@ -0,0 +1,167 @@\n+name: Verify\n+\n+on:\n+ push:\n+ branches: [main, feature/github-actions-migration] # Add your temp branch here\n+ pull_request:\n+ branches: [main]\n+\n+env:\n+ CHEF_LICENSE_SERVER: http://hosted-license-service-lb-8000-606952349.us-west-2.elb.amazonaws.com:8000/\n+ CHEF_FOUNDATION_VERSION: ${{ vars.CHEF_FOUNDATION_VERSION || '3.2.12' }}\n+ CHEF_LICENSE: accept-no-persist\n+\n+jobs:\n+ linux-matrix:\n+ name: ${{ matrix.test_type }} - ${{ matrix.os }}\n+ runs-on: ubuntu-22.04\n+ timeout-minutes: 60\n+ strategy:\n+ fail-fast: false\n+ matrix:\n+ os: [rocky-8, rocky-9, rhel-9, debian-9, ubuntu-2004]\n+ test_type: [Unit, Integration, Functional]\n+ container:\n+ image: chefes/omnibus-toolchain-${{ matrix.os }}:3.0.36\n+ options: --privileged --user root\n+ steps:\n+ - name: Check out code\n+ uses: actions/checkout@v4\n+\n+ - name: Container prep\n+ run: .expeditor/scripts/bk_container_prep.sh\n+\n+ - name: Run ${{ matrix.test_type }} tests\n+ run: .expeditor/scripts/prep_and_run_tests.sh ${{ matrix.test_type }}\n+ env:\n+ CHEF_LICENSE_SERVER: ${{ env.CHEF_LICENSE_SERVER }}\n+\n+ windows-tests:", + "comment_created_at": "2025-07-01T17:09:07+00:00", + "comment_author": "github-advanced-security[bot]", + "comment_body": "## Workflow does not contain permissions\n\nActions job or workflow does not limit the permissions of the GITHUB_TOKEN. Consider setting an explicit permissions block, using the following as a minimal starting point: {{contents: read}}\n\n[Show more details](https://github.com/chef/chef/security/code-scanning/292)", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2178132200", + "pr_number": 15076, + "pr_file": ".github/workflows/verify.yml", + "created_at": "2025-07-01T17:09:07+00:00", + "commented_code": "name: Verify\n\non:\n push:\n branches: [main, feature/github-actions-migration] # Add your temp branch here\n pull_request:\n branches: [main]\n\nenv:\n CHEF_LICENSE_SERVER: http://hosted-license-service-lb-8000-606952349.us-west-2.elb.amazonaws.com:8000/\n CHEF_FOUNDATION_VERSION: ${{ vars.CHEF_FOUNDATION_VERSION || '3.2.12' }}\n CHEF_LICENSE: accept-no-persist\n\njobs:\n linux-matrix:\n name: ${{ matrix.test_type }} - ${{ matrix.os }}\n runs-on: ubuntu-22.04\n timeout-minutes: 60\n strategy:\n fail-fast: false\n matrix:\n os: [rocky-8, rocky-9, rhel-9, debian-9, ubuntu-2004]\n test_type: [Unit, Integration, Functional]\n container:\n image: chefes/omnibus-toolchain-${{ matrix.os }}:3.0.36\n options: --privileged --user root\n steps:\n - name: Check out code\n uses: actions/checkout@v4\n\n - name: Container prep\n run: .expeditor/scripts/bk_container_prep.sh\n\n - name: Run ${{ matrix.test_type }} tests\n run: .expeditor/scripts/prep_and_run_tests.sh ${{ matrix.test_type }}\n env:\n CHEF_LICENSE_SERVER: ${{ env.CHEF_LICENSE_SERVER }}\n\n windows-tests:\n name: ${{ matrix.test_type }} - Windows 2019\n runs-on: windows-2019\n timeout-minutes: 120\n strategy:\n fail-fast: false\n matrix:\n test_type: [Unit, Integration, Functional]\n steps:\n - name: Check out code\n uses: actions/checkout@v4\n\n - name: Set up Ruby\n uses: ruby/setup-ruby@v1\n with:\n ruby-version: '3.1'\n bundler-cache: true\n\n - name: Run ${{ matrix.test_type }} tests\n shell: pwsh\n run: .\\.expeditor\\scripts\\prep_and_run_tests.ps1 ${{ matrix.test_type }}\n env:\n CHEF_LICENSE_SERVER: ${{ env.CHEF_LICENSE_SERVER }}\n\n gem-tests:", + "repo_full_name": "chef/chef", + "discussion_comments": [ + { + "comment_id": "2178132200", + "repo_full_name": "chef/chef", + "pr_number": 15076, + "pr_file": ".github/workflows/verify.yml", + "discussion_id": "2178132200", + "commented_code": "@@ -0,0 +1,167 @@\n+name: Verify\n+\n+on:\n+ push:\n+ branches: [main, feature/github-actions-migration] # Add your temp branch here\n+ pull_request:\n+ branches: [main]\n+\n+env:\n+ CHEF_LICENSE_SERVER: http://hosted-license-service-lb-8000-606952349.us-west-2.elb.amazonaws.com:8000/\n+ CHEF_FOUNDATION_VERSION: ${{ vars.CHEF_FOUNDATION_VERSION || '3.2.12' }}\n+ CHEF_LICENSE: accept-no-persist\n+\n+jobs:\n+ linux-matrix:\n+ name: ${{ matrix.test_type }} - ${{ matrix.os }}\n+ runs-on: ubuntu-22.04\n+ timeout-minutes: 60\n+ strategy:\n+ fail-fast: false\n+ matrix:\n+ os: [rocky-8, rocky-9, rhel-9, debian-9, ubuntu-2004]\n+ test_type: [Unit, Integration, Functional]\n+ container:\n+ image: chefes/omnibus-toolchain-${{ matrix.os }}:3.0.36\n+ options: --privileged --user root\n+ steps:\n+ - name: Check out code\n+ uses: actions/checkout@v4\n+\n+ - name: Container prep\n+ run: .expeditor/scripts/bk_container_prep.sh\n+\n+ - name: Run ${{ matrix.test_type }} tests\n+ run: .expeditor/scripts/prep_and_run_tests.sh ${{ matrix.test_type }}\n+ env:\n+ CHEF_LICENSE_SERVER: ${{ env.CHEF_LICENSE_SERVER }}\n+\n+ windows-tests:\n+ name: ${{ matrix.test_type }} - Windows 2019\n+ runs-on: windows-2019\n+ timeout-minutes: 120\n+ strategy:\n+ fail-fast: false\n+ matrix:\n+ test_type: [Unit, Integration, Functional]\n+ steps:\n+ - name: Check out code\n+ uses: actions/checkout@v4\n+\n+ - name: Set up Ruby\n+ uses: ruby/setup-ruby@v1\n+ with:\n+ ruby-version: '3.1'\n+ bundler-cache: true\n+\n+ - name: Run ${{ matrix.test_type }} tests\n+ shell: pwsh\n+ run: .\\.expeditor\\scripts\\prep_and_run_tests.ps1 ${{ matrix.test_type }}\n+ env:\n+ CHEF_LICENSE_SERVER: ${{ env.CHEF_LICENSE_SERVER }}\n+\n+ gem-tests:", + "comment_created_at": "2025-07-01T17:09:07+00:00", + "comment_author": "github-advanced-security[bot]", + "comment_body": "## Workflow does not contain permissions\n\nActions job or workflow does not limit the permissions of the GITHUB_TOKEN. Consider setting an explicit permissions block, using the following as a minimal starting point: {{contents: read}}\n\n[Show more details](https://github.com/chef/chef/security/code-scanning/293)", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2178132204", + "pr_number": 15076, + "pr_file": ".github/workflows/verify.yml", + "created_at": "2025-07-01T17:09:07+00:00", + "commented_code": "name: Verify\n\non:\n push:\n branches: [main, feature/github-actions-migration] # Add your temp branch here\n pull_request:\n branches: [main]\n\nenv:\n CHEF_LICENSE_SERVER: http://hosted-license-service-lb-8000-606952349.us-west-2.elb.amazonaws.com:8000/\n CHEF_FOUNDATION_VERSION: ${{ vars.CHEF_FOUNDATION_VERSION || '3.2.12' }}\n CHEF_LICENSE: accept-no-persist\n\njobs:\n linux-matrix:\n name: ${{ matrix.test_type }} - ${{ matrix.os }}\n runs-on: ubuntu-22.04\n timeout-minutes: 60\n strategy:\n fail-fast: false\n matrix:\n os: [rocky-8, rocky-9, rhel-9, debian-9, ubuntu-2004]\n test_type: [Unit, Integration, Functional]\n container:\n image: chefes/omnibus-toolchain-${{ matrix.os }}:3.0.36\n options: --privileged --user root\n steps:\n - name: Check out code\n uses: actions/checkout@v4\n\n - name: Container prep\n run: .expeditor/scripts/bk_container_prep.sh\n\n - name: Run ${{ matrix.test_type }} tests\n run: .expeditor/scripts/prep_and_run_tests.sh ${{ matrix.test_type }}\n env:\n CHEF_LICENSE_SERVER: ${{ env.CHEF_LICENSE_SERVER }}\n\n windows-tests:\n name: ${{ matrix.test_type }} - Windows 2019\n runs-on: windows-2019\n timeout-minutes: 120\n strategy:\n fail-fast: false\n matrix:\n test_type: [Unit, Integration, Functional]\n steps:\n - name: Check out code\n uses: actions/checkout@v4\n\n - name: Set up Ruby\n uses: ruby/setup-ruby@v1\n with:\n ruby-version: '3.1'\n bundler-cache: true\n\n - name: Run ${{ matrix.test_type }} tests\n shell: pwsh\n run: .\\.expeditor\\scripts\\prep_and_run_tests.ps1 ${{ matrix.test_type }}\n env:\n CHEF_LICENSE_SERVER: ${{ env.CHEF_LICENSE_SERVER }}\n\n gem-tests:\n name: ${{ matrix.gem }} gem\n runs-on: ubuntu-22.04\n timeout-minutes: 60\n strategy:\n fail-fast: false\n matrix:\n gem: [chef-zero, cheffish, chefspec, knife-windows, berkshelf]\n include:\n - gem: chef-zero\n test_cmd: bundle exec tasks/bin/run_external_test chef/chef-zero main rake pedant\n env_vars: |\n PEDANT_OPTS=--skip-oc_id\n CHEF_FS=true\n - gem: cheffish\n test_cmd: bundle exec tasks/bin/run_external_test chef/cheffish main rake spec\n - gem: chefspec\n test_cmd: bundle exec tasks/bin/run_external_test chef/chefspec main rake\n - gem: knife-windows\n test_cmd: bundle exec tasks/bin/run_external_test chef/knife-windows main rake spec\n - gem: berkshelf\n test_cmd: bundle exec tasks/bin/run_external_test chef/berkshelf main rake\n pre_install: apt-get update -y && apt-get install -y graphviz\n container:\n image: chefes/omnibus-toolchain-ubuntu-1804:3.0.36\n options: --user root\n steps:\n - name: Check out code\n uses: actions/checkout@v4\n\n - name: Container prep\n run: .expeditor/scripts/bk_container_prep.sh\n\n - name: Set up environment\n run: |\n export PATH=\"/root/.rbenv/shims:/opt/chef/bin:/home/ec2-user/.tfenv/bin:/home/ec2-user/.rbenv/shims:/home/ec2-user/.rbenv/bin:/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/usr/bin\"\n echo \"PATH=$PATH\" >> $GITHUB_ENV\n\n - name: Pre-install dependencies\n if: matrix.pre_install\n run: ${{ matrix.pre_install }}\n\n - name: Bundle install\n run: |\n bundle config set --local without omnibus_package\n bundle config set --local path 'vendor/bundle'\n bundle install --jobs=3 --retry=3\n\n - name: Run ${{ matrix.gem }} tests\n run: ${{ matrix.test_cmd }}\n env:\n CHEF_FOUNDATION_VERSION: ${{ env.CHEF_FOUNDATION_VERSION }}\n PEDANT_OPTS: ${{ matrix.gem == 'chef-zero' && '--skip-oc_id' || '' }}\n CHEF_FS: ${{ matrix.gem == 'chef-zero' && 'true' || '' }}\n\n habitat-linux-plan:", + "repo_full_name": "chef/chef", + "discussion_comments": [ + { + "comment_id": "2178132204", + "repo_full_name": "chef/chef", + "pr_number": 15076, + "pr_file": ".github/workflows/verify.yml", + "discussion_id": "2178132204", + "commented_code": "@@ -0,0 +1,167 @@\n+name: Verify\n+\n+on:\n+ push:\n+ branches: [main, feature/github-actions-migration] # Add your temp branch here\n+ pull_request:\n+ branches: [main]\n+\n+env:\n+ CHEF_LICENSE_SERVER: http://hosted-license-service-lb-8000-606952349.us-west-2.elb.amazonaws.com:8000/\n+ CHEF_FOUNDATION_VERSION: ${{ vars.CHEF_FOUNDATION_VERSION || '3.2.12' }}\n+ CHEF_LICENSE: accept-no-persist\n+\n+jobs:\n+ linux-matrix:\n+ name: ${{ matrix.test_type }} - ${{ matrix.os }}\n+ runs-on: ubuntu-22.04\n+ timeout-minutes: 60\n+ strategy:\n+ fail-fast: false\n+ matrix:\n+ os: [rocky-8, rocky-9, rhel-9, debian-9, ubuntu-2004]\n+ test_type: [Unit, Integration, Functional]\n+ container:\n+ image: chefes/omnibus-toolchain-${{ matrix.os }}:3.0.36\n+ options: --privileged --user root\n+ steps:\n+ - name: Check out code\n+ uses: actions/checkout@v4\n+\n+ - name: Container prep\n+ run: .expeditor/scripts/bk_container_prep.sh\n+\n+ - name: Run ${{ matrix.test_type }} tests\n+ run: .expeditor/scripts/prep_and_run_tests.sh ${{ matrix.test_type }}\n+ env:\n+ CHEF_LICENSE_SERVER: ${{ env.CHEF_LICENSE_SERVER }}\n+\n+ windows-tests:\n+ name: ${{ matrix.test_type }} - Windows 2019\n+ runs-on: windows-2019\n+ timeout-minutes: 120\n+ strategy:\n+ fail-fast: false\n+ matrix:\n+ test_type: [Unit, Integration, Functional]\n+ steps:\n+ - name: Check out code\n+ uses: actions/checkout@v4\n+\n+ - name: Set up Ruby\n+ uses: ruby/setup-ruby@v1\n+ with:\n+ ruby-version: '3.1'\n+ bundler-cache: true\n+\n+ - name: Run ${{ matrix.test_type }} tests\n+ shell: pwsh\n+ run: .\\.expeditor\\scripts\\prep_and_run_tests.ps1 ${{ matrix.test_type }}\n+ env:\n+ CHEF_LICENSE_SERVER: ${{ env.CHEF_LICENSE_SERVER }}\n+\n+ gem-tests:\n+ name: ${{ matrix.gem }} gem\n+ runs-on: ubuntu-22.04\n+ timeout-minutes: 60\n+ strategy:\n+ fail-fast: false\n+ matrix:\n+ gem: [chef-zero, cheffish, chefspec, knife-windows, berkshelf]\n+ include:\n+ - gem: chef-zero\n+ test_cmd: bundle exec tasks/bin/run_external_test chef/chef-zero main rake pedant\n+ env_vars: |\n+ PEDANT_OPTS=--skip-oc_id\n+ CHEF_FS=true\n+ - gem: cheffish\n+ test_cmd: bundle exec tasks/bin/run_external_test chef/cheffish main rake spec\n+ - gem: chefspec\n+ test_cmd: bundle exec tasks/bin/run_external_test chef/chefspec main rake\n+ - gem: knife-windows\n+ test_cmd: bundle exec tasks/bin/run_external_test chef/knife-windows main rake spec\n+ - gem: berkshelf\n+ test_cmd: bundle exec tasks/bin/run_external_test chef/berkshelf main rake\n+ pre_install: apt-get update -y && apt-get install -y graphviz\n+ container:\n+ image: chefes/omnibus-toolchain-ubuntu-1804:3.0.36\n+ options: --user root\n+ steps:\n+ - name: Check out code\n+ uses: actions/checkout@v4\n+\n+ - name: Container prep\n+ run: .expeditor/scripts/bk_container_prep.sh\n+\n+ - name: Set up environment\n+ run: |\n+ export PATH=\"/root/.rbenv/shims:/opt/chef/bin:/home/ec2-user/.tfenv/bin:/home/ec2-user/.rbenv/shims:/home/ec2-user/.rbenv/bin:/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/usr/bin\"\n+ echo \"PATH=$PATH\" >> $GITHUB_ENV\n+\n+ - name: Pre-install dependencies\n+ if: matrix.pre_install\n+ run: ${{ matrix.pre_install }}\n+\n+ - name: Bundle install\n+ run: |\n+ bundle config set --local without omnibus_package\n+ bundle config set --local path 'vendor/bundle'\n+ bundle install --jobs=3 --retry=3\n+\n+ - name: Run ${{ matrix.gem }} tests\n+ run: ${{ matrix.test_cmd }}\n+ env:\n+ CHEF_FOUNDATION_VERSION: ${{ env.CHEF_FOUNDATION_VERSION }}\n+ PEDANT_OPTS: ${{ matrix.gem == 'chef-zero' && '--skip-oc_id' || '' }}\n+ CHEF_FS: ${{ matrix.gem == 'chef-zero' && 'true' || '' }}\n+\n+ habitat-linux-plan:", + "comment_created_at": "2025-07-01T17:09:07+00:00", + "comment_author": "github-advanced-security[bot]", + "comment_body": "## Workflow does not contain permissions\n\nActions job or workflow does not limit the permissions of the GITHUB_TOKEN. Consider setting an explicit permissions block, using the following as a minimal starting point: {{contents: read}}\n\n[Show more details](https://github.com/chef/chef/security/code-scanning/294)", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2178132207", + "pr_number": 15076, + "pr_file": ".github/workflows/verify.yml", + "created_at": "2025-07-01T17:09:07+00:00", + "commented_code": "name: Verify\n\non:\n push:\n branches: [main, feature/github-actions-migration] # Add your temp branch here\n pull_request:\n branches: [main]\n\nenv:\n CHEF_LICENSE_SERVER: http://hosted-license-service-lb-8000-606952349.us-west-2.elb.amazonaws.com:8000/\n CHEF_FOUNDATION_VERSION: ${{ vars.CHEF_FOUNDATION_VERSION || '3.2.12' }}\n CHEF_LICENSE: accept-no-persist\n\njobs:\n linux-matrix:\n name: ${{ matrix.test_type }} - ${{ matrix.os }}\n runs-on: ubuntu-22.04\n timeout-minutes: 60\n strategy:\n fail-fast: false\n matrix:\n os: [rocky-8, rocky-9, rhel-9, debian-9, ubuntu-2004]\n test_type: [Unit, Integration, Functional]\n container:\n image: chefes/omnibus-toolchain-${{ matrix.os }}:3.0.36\n options: --privileged --user root\n steps:\n - name: Check out code\n uses: actions/checkout@v4\n\n - name: Container prep\n run: .expeditor/scripts/bk_container_prep.sh\n\n - name: Run ${{ matrix.test_type }} tests\n run: .expeditor/scripts/prep_and_run_tests.sh ${{ matrix.test_type }}\n env:\n CHEF_LICENSE_SERVER: ${{ env.CHEF_LICENSE_SERVER }}\n\n windows-tests:\n name: ${{ matrix.test_type }} - Windows 2019\n runs-on: windows-2019\n timeout-minutes: 120\n strategy:\n fail-fast: false\n matrix:\n test_type: [Unit, Integration, Functional]\n steps:\n - name: Check out code\n uses: actions/checkout@v4\n\n - name: Set up Ruby\n uses: ruby/setup-ruby@v1\n with:\n ruby-version: '3.1'\n bundler-cache: true\n\n - name: Run ${{ matrix.test_type }} tests\n shell: pwsh\n run: .\\.expeditor\\scripts\\prep_and_run_tests.ps1 ${{ matrix.test_type }}\n env:\n CHEF_LICENSE_SERVER: ${{ env.CHEF_LICENSE_SERVER }}\n\n gem-tests:\n name: ${{ matrix.gem }} gem\n runs-on: ubuntu-22.04\n timeout-minutes: 60\n strategy:\n fail-fast: false\n matrix:\n gem: [chef-zero, cheffish, chefspec, knife-windows, berkshelf]\n include:\n - gem: chef-zero\n test_cmd: bundle exec tasks/bin/run_external_test chef/chef-zero main rake pedant\n env_vars: |\n PEDANT_OPTS=--skip-oc_id\n CHEF_FS=true\n - gem: cheffish\n test_cmd: bundle exec tasks/bin/run_external_test chef/cheffish main rake spec\n - gem: chefspec\n test_cmd: bundle exec tasks/bin/run_external_test chef/chefspec main rake\n - gem: knife-windows\n test_cmd: bundle exec tasks/bin/run_external_test chef/knife-windows main rake spec\n - gem: berkshelf\n test_cmd: bundle exec tasks/bin/run_external_test chef/berkshelf main rake\n pre_install: apt-get update -y && apt-get install -y graphviz\n container:\n image: chefes/omnibus-toolchain-ubuntu-1804:3.0.36\n options: --user root\n steps:\n - name: Check out code\n uses: actions/checkout@v4\n\n - name: Container prep\n run: .expeditor/scripts/bk_container_prep.sh\n\n - name: Set up environment\n run: |\n export PATH=\"/root/.rbenv/shims:/opt/chef/bin:/home/ec2-user/.tfenv/bin:/home/ec2-user/.rbenv/shims:/home/ec2-user/.rbenv/bin:/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/usr/bin\"\n echo \"PATH=$PATH\" >> $GITHUB_ENV\n\n - name: Pre-install dependencies\n if: matrix.pre_install\n run: ${{ matrix.pre_install }}\n\n - name: Bundle install\n run: |\n bundle config set --local without omnibus_package\n bundle config set --local path 'vendor/bundle'\n bundle install --jobs=3 --retry=3\n\n - name: Run ${{ matrix.gem }} tests\n run: ${{ matrix.test_cmd }}\n env:\n CHEF_FOUNDATION_VERSION: ${{ env.CHEF_FOUNDATION_VERSION }}\n PEDANT_OPTS: ${{ matrix.gem == 'chef-zero' && '--skip-oc_id' || '' }}\n CHEF_FS: ${{ matrix.gem == 'chef-zero' && 'true' || '' }}\n\n habitat-linux-plan:\n name: Habitat Linux Plan\n runs-on: ubuntu-latest\n timeout-minutes: 60\n steps:\n - name: Check out code\n uses: actions/checkout@v4\n\n - name: Install Habitat\n run: sudo ./.expeditor/scripts/install-hab.sh 'x86_64-linux'\n\n - name: Verify plan\n run: sudo ./.expeditor/scripts/verify-plan.sh\n\n habitat-windows-plan:", + "repo_full_name": "chef/chef", + "discussion_comments": [ + { + "comment_id": "2178132207", + "repo_full_name": "chef/chef", + "pr_number": 15076, + "pr_file": ".github/workflows/verify.yml", + "discussion_id": "2178132207", + "commented_code": "@@ -0,0 +1,167 @@\n+name: Verify\n+\n+on:\n+ push:\n+ branches: [main, feature/github-actions-migration] # Add your temp branch here\n+ pull_request:\n+ branches: [main]\n+\n+env:\n+ CHEF_LICENSE_SERVER: http://hosted-license-service-lb-8000-606952349.us-west-2.elb.amazonaws.com:8000/\n+ CHEF_FOUNDATION_VERSION: ${{ vars.CHEF_FOUNDATION_VERSION || '3.2.12' }}\n+ CHEF_LICENSE: accept-no-persist\n+\n+jobs:\n+ linux-matrix:\n+ name: ${{ matrix.test_type }} - ${{ matrix.os }}\n+ runs-on: ubuntu-22.04\n+ timeout-minutes: 60\n+ strategy:\n+ fail-fast: false\n+ matrix:\n+ os: [rocky-8, rocky-9, rhel-9, debian-9, ubuntu-2004]\n+ test_type: [Unit, Integration, Functional]\n+ container:\n+ image: chefes/omnibus-toolchain-${{ matrix.os }}:3.0.36\n+ options: --privileged --user root\n+ steps:\n+ - name: Check out code\n+ uses: actions/checkout@v4\n+\n+ - name: Container prep\n+ run: .expeditor/scripts/bk_container_prep.sh\n+\n+ - name: Run ${{ matrix.test_type }} tests\n+ run: .expeditor/scripts/prep_and_run_tests.sh ${{ matrix.test_type }}\n+ env:\n+ CHEF_LICENSE_SERVER: ${{ env.CHEF_LICENSE_SERVER }}\n+\n+ windows-tests:\n+ name: ${{ matrix.test_type }} - Windows 2019\n+ runs-on: windows-2019\n+ timeout-minutes: 120\n+ strategy:\n+ fail-fast: false\n+ matrix:\n+ test_type: [Unit, Integration, Functional]\n+ steps:\n+ - name: Check out code\n+ uses: actions/checkout@v4\n+\n+ - name: Set up Ruby\n+ uses: ruby/setup-ruby@v1\n+ with:\n+ ruby-version: '3.1'\n+ bundler-cache: true\n+\n+ - name: Run ${{ matrix.test_type }} tests\n+ shell: pwsh\n+ run: .\\.expeditor\\scripts\\prep_and_run_tests.ps1 ${{ matrix.test_type }}\n+ env:\n+ CHEF_LICENSE_SERVER: ${{ env.CHEF_LICENSE_SERVER }}\n+\n+ gem-tests:\n+ name: ${{ matrix.gem }} gem\n+ runs-on: ubuntu-22.04\n+ timeout-minutes: 60\n+ strategy:\n+ fail-fast: false\n+ matrix:\n+ gem: [chef-zero, cheffish, chefspec, knife-windows, berkshelf]\n+ include:\n+ - gem: chef-zero\n+ test_cmd: bundle exec tasks/bin/run_external_test chef/chef-zero main rake pedant\n+ env_vars: |\n+ PEDANT_OPTS=--skip-oc_id\n+ CHEF_FS=true\n+ - gem: cheffish\n+ test_cmd: bundle exec tasks/bin/run_external_test chef/cheffish main rake spec\n+ - gem: chefspec\n+ test_cmd: bundle exec tasks/bin/run_external_test chef/chefspec main rake\n+ - gem: knife-windows\n+ test_cmd: bundle exec tasks/bin/run_external_test chef/knife-windows main rake spec\n+ - gem: berkshelf\n+ test_cmd: bundle exec tasks/bin/run_external_test chef/berkshelf main rake\n+ pre_install: apt-get update -y && apt-get install -y graphviz\n+ container:\n+ image: chefes/omnibus-toolchain-ubuntu-1804:3.0.36\n+ options: --user root\n+ steps:\n+ - name: Check out code\n+ uses: actions/checkout@v4\n+\n+ - name: Container prep\n+ run: .expeditor/scripts/bk_container_prep.sh\n+\n+ - name: Set up environment\n+ run: |\n+ export PATH=\"/root/.rbenv/shims:/opt/chef/bin:/home/ec2-user/.tfenv/bin:/home/ec2-user/.rbenv/shims:/home/ec2-user/.rbenv/bin:/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/usr/bin\"\n+ echo \"PATH=$PATH\" >> $GITHUB_ENV\n+\n+ - name: Pre-install dependencies\n+ if: matrix.pre_install\n+ run: ${{ matrix.pre_install }}\n+\n+ - name: Bundle install\n+ run: |\n+ bundle config set --local without omnibus_package\n+ bundle config set --local path 'vendor/bundle'\n+ bundle install --jobs=3 --retry=3\n+\n+ - name: Run ${{ matrix.gem }} tests\n+ run: ${{ matrix.test_cmd }}\n+ env:\n+ CHEF_FOUNDATION_VERSION: ${{ env.CHEF_FOUNDATION_VERSION }}\n+ PEDANT_OPTS: ${{ matrix.gem == 'chef-zero' && '--skip-oc_id' || '' }}\n+ CHEF_FS: ${{ matrix.gem == 'chef-zero' && 'true' || '' }}\n+\n+ habitat-linux-plan:\n+ name: Habitat Linux Plan\n+ runs-on: ubuntu-latest\n+ timeout-minutes: 60\n+ steps:\n+ - name: Check out code\n+ uses: actions/checkout@v4\n+\n+ - name: Install Habitat\n+ run: sudo ./.expeditor/scripts/install-hab.sh 'x86_64-linux'\n+\n+ - name: Verify plan\n+ run: sudo ./.expeditor/scripts/verify-plan.sh\n+\n+ habitat-windows-plan:", + "comment_created_at": "2025-07-01T17:09:07+00:00", + "comment_author": "github-advanced-security[bot]", + "comment_body": "## Workflow does not contain permissions\n\nActions job or workflow does not limit the permissions of the GITHUB_TOKEN. Consider setting an explicit permissions block, using the following as a minimal starting point: {{contents: read}}\n\n[Show more details](https://github.com/chef/chef/security/code-scanning/295)", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2178132209", + "pr_number": 15076, + "pr_file": ".github/workflows/verify.yml", + "created_at": "2025-07-01T17:09:07+00:00", + "commented_code": "name: Verify\n\non:\n push:\n branches: [main, feature/github-actions-migration] # Add your temp branch here\n pull_request:\n branches: [main]\n\nenv:\n CHEF_LICENSE_SERVER: http://hosted-license-service-lb-8000-606952349.us-west-2.elb.amazonaws.com:8000/\n CHEF_FOUNDATION_VERSION: ${{ vars.CHEF_FOUNDATION_VERSION || '3.2.12' }}\n CHEF_LICENSE: accept-no-persist\n\njobs:\n linux-matrix:\n name: ${{ matrix.test_type }} - ${{ matrix.os }}\n runs-on: ubuntu-22.04\n timeout-minutes: 60\n strategy:\n fail-fast: false\n matrix:\n os: [rocky-8, rocky-9, rhel-9, debian-9, ubuntu-2004]\n test_type: [Unit, Integration, Functional]\n container:\n image: chefes/omnibus-toolchain-${{ matrix.os }}:3.0.36\n options: --privileged --user root\n steps:\n - name: Check out code\n uses: actions/checkout@v4\n\n - name: Container prep\n run: .expeditor/scripts/bk_container_prep.sh\n\n - name: Run ${{ matrix.test_type }} tests\n run: .expeditor/scripts/prep_and_run_tests.sh ${{ matrix.test_type }}\n env:\n CHEF_LICENSE_SERVER: ${{ env.CHEF_LICENSE_SERVER }}\n\n windows-tests:\n name: ${{ matrix.test_type }} - Windows 2019\n runs-on: windows-2019\n timeout-minutes: 120\n strategy:\n fail-fast: false\n matrix:\n test_type: [Unit, Integration, Functional]\n steps:\n - name: Check out code\n uses: actions/checkout@v4\n\n - name: Set up Ruby\n uses: ruby/setup-ruby@v1\n with:\n ruby-version: '3.1'\n bundler-cache: true\n\n - name: Run ${{ matrix.test_type }} tests\n shell: pwsh\n run: .\\.expeditor\\scripts\\prep_and_run_tests.ps1 ${{ matrix.test_type }}\n env:\n CHEF_LICENSE_SERVER: ${{ env.CHEF_LICENSE_SERVER }}\n\n gem-tests:\n name: ${{ matrix.gem }} gem\n runs-on: ubuntu-22.04\n timeout-minutes: 60\n strategy:\n fail-fast: false\n matrix:\n gem: [chef-zero, cheffish, chefspec, knife-windows, berkshelf]\n include:\n - gem: chef-zero\n test_cmd: bundle exec tasks/bin/run_external_test chef/chef-zero main rake pedant\n env_vars: |\n PEDANT_OPTS=--skip-oc_id\n CHEF_FS=true\n - gem: cheffish\n test_cmd: bundle exec tasks/bin/run_external_test chef/cheffish main rake spec\n - gem: chefspec\n test_cmd: bundle exec tasks/bin/run_external_test chef/chefspec main rake\n - gem: knife-windows\n test_cmd: bundle exec tasks/bin/run_external_test chef/knife-windows main rake spec\n - gem: berkshelf\n test_cmd: bundle exec tasks/bin/run_external_test chef/berkshelf main rake\n pre_install: apt-get update -y && apt-get install -y graphviz\n container:\n image: chefes/omnibus-toolchain-ubuntu-1804:3.0.36\n options: --user root\n steps:\n - name: Check out code\n uses: actions/checkout@v4\n\n - name: Container prep\n run: .expeditor/scripts/bk_container_prep.sh\n\n - name: Set up environment\n run: |\n export PATH=\"/root/.rbenv/shims:/opt/chef/bin:/home/ec2-user/.tfenv/bin:/home/ec2-user/.rbenv/shims:/home/ec2-user/.rbenv/bin:/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/usr/bin\"\n echo \"PATH=$PATH\" >> $GITHUB_ENV\n\n - name: Pre-install dependencies\n if: matrix.pre_install\n run: ${{ matrix.pre_install }}\n\n - name: Bundle install\n run: |\n bundle config set --local without omnibus_package\n bundle config set --local path 'vendor/bundle'\n bundle install --jobs=3 --retry=3\n\n - name: Run ${{ matrix.gem }} tests\n run: ${{ matrix.test_cmd }}\n env:\n CHEF_FOUNDATION_VERSION: ${{ env.CHEF_FOUNDATION_VERSION }}\n PEDANT_OPTS: ${{ matrix.gem == 'chef-zero' && '--skip-oc_id' || '' }}\n CHEF_FS: ${{ matrix.gem == 'chef-zero' && 'true' || '' }}\n\n habitat-linux-plan:\n name: Habitat Linux Plan\n runs-on: ubuntu-latest\n timeout-minutes: 60\n steps:\n - name: Check out code\n uses: actions/checkout@v4\n\n - name: Install Habitat\n run: sudo ./.expeditor/scripts/install-hab.sh 'x86_64-linux'\n\n - name: Verify plan\n run: sudo ./.expeditor/scripts/verify-plan.sh\n\n habitat-windows-plan:\n name: Habitat Windows Plan\n runs-on: windows-2019\n timeout-minutes: 60\n steps:\n - name: Check out code\n uses: actions/checkout@v4\n\n - name: Verify Windows plan\n shell: pwsh\n run: ./.expeditor/scripts/verify-plan.ps1\n\n verify-summary:", + "repo_full_name": "chef/chef", + "discussion_comments": [ + { + "comment_id": "2178132209", + "repo_full_name": "chef/chef", + "pr_number": 15076, + "pr_file": ".github/workflows/verify.yml", + "discussion_id": "2178132209", + "commented_code": "@@ -0,0 +1,167 @@\n+name: Verify\n+\n+on:\n+ push:\n+ branches: [main, feature/github-actions-migration] # Add your temp branch here\n+ pull_request:\n+ branches: [main]\n+\n+env:\n+ CHEF_LICENSE_SERVER: http://hosted-license-service-lb-8000-606952349.us-west-2.elb.amazonaws.com:8000/\n+ CHEF_FOUNDATION_VERSION: ${{ vars.CHEF_FOUNDATION_VERSION || '3.2.12' }}\n+ CHEF_LICENSE: accept-no-persist\n+\n+jobs:\n+ linux-matrix:\n+ name: ${{ matrix.test_type }} - ${{ matrix.os }}\n+ runs-on: ubuntu-22.04\n+ timeout-minutes: 60\n+ strategy:\n+ fail-fast: false\n+ matrix:\n+ os: [rocky-8, rocky-9, rhel-9, debian-9, ubuntu-2004]\n+ test_type: [Unit, Integration, Functional]\n+ container:\n+ image: chefes/omnibus-toolchain-${{ matrix.os }}:3.0.36\n+ options: --privileged --user root\n+ steps:\n+ - name: Check out code\n+ uses: actions/checkout@v4\n+\n+ - name: Container prep\n+ run: .expeditor/scripts/bk_container_prep.sh\n+\n+ - name: Run ${{ matrix.test_type }} tests\n+ run: .expeditor/scripts/prep_and_run_tests.sh ${{ matrix.test_type }}\n+ env:\n+ CHEF_LICENSE_SERVER: ${{ env.CHEF_LICENSE_SERVER }}\n+\n+ windows-tests:\n+ name: ${{ matrix.test_type }} - Windows 2019\n+ runs-on: windows-2019\n+ timeout-minutes: 120\n+ strategy:\n+ fail-fast: false\n+ matrix:\n+ test_type: [Unit, Integration, Functional]\n+ steps:\n+ - name: Check out code\n+ uses: actions/checkout@v4\n+\n+ - name: Set up Ruby\n+ uses: ruby/setup-ruby@v1\n+ with:\n+ ruby-version: '3.1'\n+ bundler-cache: true\n+\n+ - name: Run ${{ matrix.test_type }} tests\n+ shell: pwsh\n+ run: .\\.expeditor\\scripts\\prep_and_run_tests.ps1 ${{ matrix.test_type }}\n+ env:\n+ CHEF_LICENSE_SERVER: ${{ env.CHEF_LICENSE_SERVER }}\n+\n+ gem-tests:\n+ name: ${{ matrix.gem }} gem\n+ runs-on: ubuntu-22.04\n+ timeout-minutes: 60\n+ strategy:\n+ fail-fast: false\n+ matrix:\n+ gem: [chef-zero, cheffish, chefspec, knife-windows, berkshelf]\n+ include:\n+ - gem: chef-zero\n+ test_cmd: bundle exec tasks/bin/run_external_test chef/chef-zero main rake pedant\n+ env_vars: |\n+ PEDANT_OPTS=--skip-oc_id\n+ CHEF_FS=true\n+ - gem: cheffish\n+ test_cmd: bundle exec tasks/bin/run_external_test chef/cheffish main rake spec\n+ - gem: chefspec\n+ test_cmd: bundle exec tasks/bin/run_external_test chef/chefspec main rake\n+ - gem: knife-windows\n+ test_cmd: bundle exec tasks/bin/run_external_test chef/knife-windows main rake spec\n+ - gem: berkshelf\n+ test_cmd: bundle exec tasks/bin/run_external_test chef/berkshelf main rake\n+ pre_install: apt-get update -y && apt-get install -y graphviz\n+ container:\n+ image: chefes/omnibus-toolchain-ubuntu-1804:3.0.36\n+ options: --user root\n+ steps:\n+ - name: Check out code\n+ uses: actions/checkout@v4\n+\n+ - name: Container prep\n+ run: .expeditor/scripts/bk_container_prep.sh\n+\n+ - name: Set up environment\n+ run: |\n+ export PATH=\"/root/.rbenv/shims:/opt/chef/bin:/home/ec2-user/.tfenv/bin:/home/ec2-user/.rbenv/shims:/home/ec2-user/.rbenv/bin:/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/usr/bin\"\n+ echo \"PATH=$PATH\" >> $GITHUB_ENV\n+\n+ - name: Pre-install dependencies\n+ if: matrix.pre_install\n+ run: ${{ matrix.pre_install }}\n+\n+ - name: Bundle install\n+ run: |\n+ bundle config set --local without omnibus_package\n+ bundle config set --local path 'vendor/bundle'\n+ bundle install --jobs=3 --retry=3\n+\n+ - name: Run ${{ matrix.gem }} tests\n+ run: ${{ matrix.test_cmd }}\n+ env:\n+ CHEF_FOUNDATION_VERSION: ${{ env.CHEF_FOUNDATION_VERSION }}\n+ PEDANT_OPTS: ${{ matrix.gem == 'chef-zero' && '--skip-oc_id' || '' }}\n+ CHEF_FS: ${{ matrix.gem == 'chef-zero' && 'true' || '' }}\n+\n+ habitat-linux-plan:\n+ name: Habitat Linux Plan\n+ runs-on: ubuntu-latest\n+ timeout-minutes: 60\n+ steps:\n+ - name: Check out code\n+ uses: actions/checkout@v4\n+\n+ - name: Install Habitat\n+ run: sudo ./.expeditor/scripts/install-hab.sh 'x86_64-linux'\n+\n+ - name: Verify plan\n+ run: sudo ./.expeditor/scripts/verify-plan.sh\n+\n+ habitat-windows-plan:\n+ name: Habitat Windows Plan\n+ runs-on: windows-2019\n+ timeout-minutes: 60\n+ steps:\n+ - name: Check out code\n+ uses: actions/checkout@v4\n+\n+ - name: Verify Windows plan\n+ shell: pwsh\n+ run: ./.expeditor/scripts/verify-plan.ps1\n+\n+ verify-summary:", + "comment_created_at": "2025-07-01T17:09:07+00:00", + "comment_author": "github-advanced-security[bot]", + "comment_body": "## Workflow does not contain permissions\n\nActions job or workflow does not limit the permissions of the GITHUB_TOKEN. Consider setting an explicit permissions block, using the following as a minimal starting point: {{contents: read}}\n\n[Show more details](https://github.com/chef/chef/security/code-scanning/296)", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2178166822", + "pr_number": 15076, + "pr_file": ".github/workflows/verify.yml", + "created_at": "2025-07-01T17:24:21+00:00", + "commented_code": "name: Verify\n\non:\n push:\n branches: [main, feature/github-actions-migration] # Add your temp branch here\n pull_request:\n branches: [main]\n\nenv:\n CHEF_LICENSE_SERVER: http://hosted-license-service-lb-8000-606952349.us-west-2.elb.amazonaws.com:8000/\n CHEF_FOUNDATION_VERSION: ${{ vars.CHEF_FOUNDATION_VERSION || '3.2.12' }}\n CHEF_LICENSE: accept-no-persist\n\njobs:\n linux-matrix:\n name: ${{ matrix.test_type }} - ${{ matrix.os }}\n runs-on: ubuntu-24.04\n timeout-minutes: 60\n strategy:\n fail-fast: false\n matrix:\n os: [rocky-8, rocky-9, rhel-9, debian-9, ubuntu-2004]\n test_type: [Unit, Integration, Functional]\n container:\n image: chefes/omnibus-toolchain-${{ matrix.os }}:3.0.36\n options: --privileged --user root\n steps:\n - name: Check out code\n uses: actions/checkout@v4\n\n - name: Container prep\n run: .expeditor/scripts/bk_container_prep.sh\n\n - name: Run ${{ matrix.test_type }} tests\n run: .expeditor/scripts/prep_and_run_tests.sh ${{ matrix.test_type }}\n env:\n CHEF_LICENSE_SERVER: ${{ env.CHEF_LICENSE_SERVER }}\n\n windows-tests:", + "repo_full_name": "chef/chef", + "discussion_comments": [ + { + "comment_id": "2178166822", + "repo_full_name": "chef/chef", + "pr_number": 15076, + "pr_file": ".github/workflows/verify.yml", + "discussion_id": "2178166822", + "commented_code": "@@ -0,0 +1,167 @@\n+name: Verify\n+\n+on:\n+ push:\n+ branches: [main, feature/github-actions-migration] # Add your temp branch here\n+ pull_request:\n+ branches: [main]\n+\n+env:\n+ CHEF_LICENSE_SERVER: http://hosted-license-service-lb-8000-606952349.us-west-2.elb.amazonaws.com:8000/\n+ CHEF_FOUNDATION_VERSION: ${{ vars.CHEF_FOUNDATION_VERSION || '3.2.12' }}\n+ CHEF_LICENSE: accept-no-persist\n+\n+jobs:\n+ linux-matrix:\n+ name: ${{ matrix.test_type }} - ${{ matrix.os }}\n+ runs-on: ubuntu-24.04\n+ timeout-minutes: 60\n+ strategy:\n+ fail-fast: false\n+ matrix:\n+ os: [rocky-8, rocky-9, rhel-9, debian-9, ubuntu-2004]\n+ test_type: [Unit, Integration, Functional]\n+ container:\n+ image: chefes/omnibus-toolchain-${{ matrix.os }}:3.0.36\n+ options: --privileged --user root\n+ steps:\n+ - name: Check out code\n+ uses: actions/checkout@v4\n+\n+ - name: Container prep\n+ run: .expeditor/scripts/bk_container_prep.sh\n+\n+ - name: Run ${{ matrix.test_type }} tests\n+ run: .expeditor/scripts/prep_and_run_tests.sh ${{ matrix.test_type }}\n+ env:\n+ CHEF_LICENSE_SERVER: ${{ env.CHEF_LICENSE_SERVER }}\n+\n+ windows-tests:", + "comment_created_at": "2025-07-01T17:24:21+00:00", + "comment_author": "github-advanced-security[bot]", + "comment_body": "## Workflow does not contain permissions\n\nActions job or workflow does not limit the permissions of the GITHUB_TOKEN. Consider setting an explicit permissions block, using the following as a minimal starting point: {{contents: read}}\n\n[Show more details](https://github.com/chef/chef/security/code-scanning/298)", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2178450773", + "pr_number": 15076, + "pr_file": ".github/workflows/verify.yml", + "created_at": "2025-07-01T20:20:39+00:00", + "commented_code": "name: Verify\n\non:\n push:\n branches: [main, feature/github-actions-migration] # Add your temp branch here\n pull_request:\n branches: [main]\n\nenv:\n CHEF_LICENSE_SERVER: http://hosted-license-service-lb-8000-606952349.us-west-2.elb.amazonaws.com:8000/\n CHEF_FOUNDATION_VERSION: ${{ vars.CHEF_FOUNDATION_VERSION || '3.2.12' }}\n CHEF_LICENSE: accept-no-persist\n\njobs:\n linux-matrix:\n name: ${{ matrix.test_type }} - ${{ matrix.os }}\n runs-on: ubuntu-22.04\n timeout-minutes: 60\n strategy:\n fail-fast: false\n matrix:\n os: [rocky-8, rocky-9, rhel-9, debian-9, ubuntu-2004]\n test_type: [Unit, Integration, Functional]\n container:\n image: chefes/omnibus-toolchain-${{ matrix.os }}:3.0.36\n options: --privileged --user root\n steps:\n - name: Check out code\n uses: actions/checkout@v4\n # Skip checkout actions for debian-9 due to GLIBC incompatibility\n if: matrix.os != 'debian-9'\n\n - name: Manual checkout for Debian 9\n if: matrix.os == 'debian-9'\n run: |\n # Clone the repository with full history to ensure all refs are available\n git clone --no-checkout https://github.com/${{ github.repository }} .\n\n # Fetch the specific ref if it's not available\n git fetch origin ${{ github.ref }}:${{ github.ref }} || true\n git fetch origin ${{ github.sha }} || true\n\n git checkout ${{ github.sha }}\n\n - name: Container prep\n run: .expeditor/scripts/bk_container_prep.sh\n\n - name: Run ${{ matrix.test_type }} tests\n run: .expeditor/scripts/prep_and_run_tests.sh ${{ matrix.test_type }}\n env:\n CHEF_LICENSE_SERVER: ${{ env.CHEF_LICENSE_SERVER }}\n\n windows-tests:\n name: ${{ matrix.test_type }} - Windows 2022\n runs-on: windows-2022\n timeout-minutes: 120\n strategy:\n fail-fast: false\n matrix:\n test_type: [Unit, Integration, Functional]\n steps:\n - name: Check out code\n uses: actions/checkout@v4\n\n - name: Set up Ruby\n uses: ruby/setup-ruby@v1\n with:\n ruby-version: '3.1'\n bundler-cache: true\n\n - name: Run ${{ matrix.test_type }} tests\n shell: pwsh\n run: .\\.expeditor\\scripts\\prep_and_run_tests.ps1 ${{ matrix.test_type }}\n env:\n CHEF_LICENSE_SERVER: ${{ env.CHEF_LICENSE_SERVER }}\n\n gem-tests:", + "repo_full_name": "chef/chef", + "discussion_comments": [ + { + "comment_id": "2178450773", + "repo_full_name": "chef/chef", + "pr_number": 15076, + "pr_file": ".github/workflows/verify.yml", + "discussion_id": "2178450773", + "commented_code": "@@ -0,0 +1,181 @@\n+name: Verify\n+\n+on:\n+ push:\n+ branches: [main, feature/github-actions-migration] # Add your temp branch here\n+ pull_request:\n+ branches: [main]\n+\n+env:\n+ CHEF_LICENSE_SERVER: http://hosted-license-service-lb-8000-606952349.us-west-2.elb.amazonaws.com:8000/\n+ CHEF_FOUNDATION_VERSION: ${{ vars.CHEF_FOUNDATION_VERSION || '3.2.12' }}\n+ CHEF_LICENSE: accept-no-persist\n+\n+jobs:\n+ linux-matrix:\n+ name: ${{ matrix.test_type }} - ${{ matrix.os }}\n+ runs-on: ubuntu-22.04\n+ timeout-minutes: 60\n+ strategy:\n+ fail-fast: false\n+ matrix:\n+ os: [rocky-8, rocky-9, rhel-9, debian-9, ubuntu-2004]\n+ test_type: [Unit, Integration, Functional]\n+ container:\n+ image: chefes/omnibus-toolchain-${{ matrix.os }}:3.0.36\n+ options: --privileged --user root\n+ steps:\n+ - name: Check out code\n+ uses: actions/checkout@v4\n+ # Skip checkout actions for debian-9 due to GLIBC incompatibility\n+ if: matrix.os != 'debian-9'\n+\n+ - name: Manual checkout for Debian 9\n+ if: matrix.os == 'debian-9'\n+ run: |\n+ # Clone the repository with full history to ensure all refs are available\n+ git clone --no-checkout https://github.com/${{ github.repository }} .\n+\n+ # Fetch the specific ref if it's not available\n+ git fetch origin ${{ github.ref }}:${{ github.ref }} || true\n+ git fetch origin ${{ github.sha }} || true\n+\n+ git checkout ${{ github.sha }}\n+\n+ - name: Container prep\n+ run: .expeditor/scripts/bk_container_prep.sh\n+\n+ - name: Run ${{ matrix.test_type }} tests\n+ run: .expeditor/scripts/prep_and_run_tests.sh ${{ matrix.test_type }}\n+ env:\n+ CHEF_LICENSE_SERVER: ${{ env.CHEF_LICENSE_SERVER }}\n+\n+ windows-tests:\n+ name: ${{ matrix.test_type }} - Windows 2022\n+ runs-on: windows-2022\n+ timeout-minutes: 120\n+ strategy:\n+ fail-fast: false\n+ matrix:\n+ test_type: [Unit, Integration, Functional]\n+ steps:\n+ - name: Check out code\n+ uses: actions/checkout@v4\n+\n+ - name: Set up Ruby\n+ uses: ruby/setup-ruby@v1\n+ with:\n+ ruby-version: '3.1'\n+ bundler-cache: true\n+\n+ - name: Run ${{ matrix.test_type }} tests\n+ shell: pwsh\n+ run: .\\.expeditor\\scripts\\prep_and_run_tests.ps1 ${{ matrix.test_type }}\n+ env:\n+ CHEF_LICENSE_SERVER: ${{ env.CHEF_LICENSE_SERVER }}\n+\n+ gem-tests:", + "comment_created_at": "2025-07-01T20:20:39+00:00", + "comment_author": "github-advanced-security[bot]", + "comment_body": "## Workflow does not contain permissions\n\nActions job or workflow does not limit the permissions of the GITHUB_TOKEN. Consider setting an explicit permissions block, using the following as a minimal starting point: {{contents: read}}\n\n[Show more details](https://github.com/chef/chef/security/code-scanning/299)", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2178450775", + "pr_number": 15076, + "pr_file": ".github/workflows/verify.yml", + "created_at": "2025-07-01T20:20:39+00:00", + "commented_code": "name: Verify\n\non:\n push:\n branches: [main, feature/github-actions-migration] # Add your temp branch here\n pull_request:\n branches: [main]\n\nenv:\n CHEF_LICENSE_SERVER: http://hosted-license-service-lb-8000-606952349.us-west-2.elb.amazonaws.com:8000/\n CHEF_FOUNDATION_VERSION: ${{ vars.CHEF_FOUNDATION_VERSION || '3.2.12' }}\n CHEF_LICENSE: accept-no-persist\n\njobs:\n linux-matrix:\n name: ${{ matrix.test_type }} - ${{ matrix.os }}\n runs-on: ubuntu-22.04\n timeout-minutes: 60\n strategy:\n fail-fast: false\n matrix:\n os: [rocky-8, rocky-9, rhel-9, debian-9, ubuntu-2004]\n test_type: [Unit, Integration, Functional]\n container:\n image: chefes/omnibus-toolchain-${{ matrix.os }}:3.0.36\n options: --privileged --user root\n steps:\n - name: Check out code\n uses: actions/checkout@v4\n # Skip checkout actions for debian-9 due to GLIBC incompatibility\n if: matrix.os != 'debian-9'\n\n - name: Manual checkout for Debian 9\n if: matrix.os == 'debian-9'\n run: |\n # Clone the repository with full history to ensure all refs are available\n git clone --no-checkout https://github.com/${{ github.repository }} .\n\n # Fetch the specific ref if it's not available\n git fetch origin ${{ github.ref }}:${{ github.ref }} || true\n git fetch origin ${{ github.sha }} || true\n\n git checkout ${{ github.sha }}\n\n - name: Container prep\n run: .expeditor/scripts/bk_container_prep.sh\n\n - name: Run ${{ matrix.test_type }} tests\n run: .expeditor/scripts/prep_and_run_tests.sh ${{ matrix.test_type }}\n env:\n CHEF_LICENSE_SERVER: ${{ env.CHEF_LICENSE_SERVER }}\n\n windows-tests:\n name: ${{ matrix.test_type }} - Windows 2022\n runs-on: windows-2022\n timeout-minutes: 120\n strategy:\n fail-fast: false\n matrix:\n test_type: [Unit, Integration, Functional]\n steps:\n - name: Check out code\n uses: actions/checkout@v4\n\n - name: Set up Ruby\n uses: ruby/setup-ruby@v1\n with:\n ruby-version: '3.1'\n bundler-cache: true\n\n - name: Run ${{ matrix.test_type }} tests\n shell: pwsh\n run: .\\.expeditor\\scripts\\prep_and_run_tests.ps1 ${{ matrix.test_type }}\n env:\n CHEF_LICENSE_SERVER: ${{ env.CHEF_LICENSE_SERVER }}\n\n gem-tests:\n name: ${{ matrix.gem }} gem\n runs-on: ubuntu-22.04\n timeout-minutes: 60\n strategy:\n fail-fast: false\n matrix:\n gem: [chef-zero, cheffish, chefspec, knife-windows, berkshelf]\n include:\n - gem: chef-zero\n test_cmd: bundle exec tasks/bin/run_external_test chef/chef-zero main rake pedant\n env_vars: |\n PEDANT_OPTS=--skip-oc_id\n CHEF_FS=true\n - gem: cheffish\n test_cmd: bundle exec tasks/bin/run_external_test chef/cheffish main rake spec\n - gem: chefspec\n test_cmd: bundle exec tasks/bin/run_external_test chef/chefspec main rake\n - gem: knife-windows\n test_cmd: bundle exec tasks/bin/run_external_test chef/knife-windows main rake spec\n - gem: berkshelf\n test_cmd: bundle exec tasks/bin/run_external_test chef/berkshelf main rake\n pre_install: apt-get update -y && apt-get install -y graphviz\n container:\n image: chefes/omnibus-toolchain-ubuntu-2204:3.0.36\n options: --user root\n steps:\n - name: Check out code\n uses: actions/checkout@v4\n\n - name: Container prep\n run: .expeditor/scripts/bk_container_prep.sh\n\n - name: Set up environment\n run: |\n export PATH=\"/root/.rbenv/shims:/opt/chef/bin:/home/ec2-user/.tfenv/bin:/home/ec2-user/.rbenv/shims:/home/ec2-user/.rbenv/bin:/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/usr/bin\"\n echo \"PATH=$PATH\" >> $GITHUB_ENV\n\n - name: Pre-install dependencies\n if: matrix.pre_install\n run: ${{ matrix.pre_install }}\n\n - name: Bundle install\n run: |\n bundle config set --local without omnibus_package\n bundle config set --local path 'vendor/bundle'\n bundle install --jobs=3 --retry=3\n\n - name: Run ${{ matrix.gem }} tests\n run: ${{ matrix.test_cmd }}\n env:\n CHEF_FOUNDATION_VERSION: ${{ env.CHEF_FOUNDATION_VERSION }}\n PEDANT_OPTS: ${{ matrix.gem == 'chef-zero' && '--skip-oc_id' || '' }}\n CHEF_FS: ${{ matrix.gem == 'chef-zero' && 'true' || '' }}\n\n habitat-linux-plan:\n name: Habitat Linux Plan\n runs-on: ubuntu-latest\n timeout-minutes: 60\n steps:\n - name: Check out code\n uses: actions/checkout@v4\n\n - name: Install Habitat\n run: sudo ./.expeditor/scripts/install-hab.sh 'x86_64-linux'\n\n - name: Verify plan\n run: sudo ./.expeditor/scripts/verify-plan.sh\n\n habitat-windows-plan:\n name: Habitat Windows Plan\n runs-on: windows-2022\n timeout-minutes: 60\n steps:\n - name: Check out code\n uses: actions/checkout@v4\n\n - name: Verify Windows plan\n shell: pwsh\n run: ./.expeditor/scripts/verify-plan.ps1\n\n verify-summary:", + "repo_full_name": "chef/chef", + "discussion_comments": [ + { + "comment_id": "2178450775", + "repo_full_name": "chef/chef", + "pr_number": 15076, + "pr_file": ".github/workflows/verify.yml", + "discussion_id": "2178450775", + "commented_code": "@@ -0,0 +1,181 @@\n+name: Verify\n+\n+on:\n+ push:\n+ branches: [main, feature/github-actions-migration] # Add your temp branch here\n+ pull_request:\n+ branches: [main]\n+\n+env:\n+ CHEF_LICENSE_SERVER: http://hosted-license-service-lb-8000-606952349.us-west-2.elb.amazonaws.com:8000/\n+ CHEF_FOUNDATION_VERSION: ${{ vars.CHEF_FOUNDATION_VERSION || '3.2.12' }}\n+ CHEF_LICENSE: accept-no-persist\n+\n+jobs:\n+ linux-matrix:\n+ name: ${{ matrix.test_type }} - ${{ matrix.os }}\n+ runs-on: ubuntu-22.04\n+ timeout-minutes: 60\n+ strategy:\n+ fail-fast: false\n+ matrix:\n+ os: [rocky-8, rocky-9, rhel-9, debian-9, ubuntu-2004]\n+ test_type: [Unit, Integration, Functional]\n+ container:\n+ image: chefes/omnibus-toolchain-${{ matrix.os }}:3.0.36\n+ options: --privileged --user root\n+ steps:\n+ - name: Check out code\n+ uses: actions/checkout@v4\n+ # Skip checkout actions for debian-9 due to GLIBC incompatibility\n+ if: matrix.os != 'debian-9'\n+\n+ - name: Manual checkout for Debian 9\n+ if: matrix.os == 'debian-9'\n+ run: |\n+ # Clone the repository with full history to ensure all refs are available\n+ git clone --no-checkout https://github.com/${{ github.repository }} .\n+\n+ # Fetch the specific ref if it's not available\n+ git fetch origin ${{ github.ref }}:${{ github.ref }} || true\n+ git fetch origin ${{ github.sha }} || true\n+\n+ git checkout ${{ github.sha }}\n+\n+ - name: Container prep\n+ run: .expeditor/scripts/bk_container_prep.sh\n+\n+ - name: Run ${{ matrix.test_type }} tests\n+ run: .expeditor/scripts/prep_and_run_tests.sh ${{ matrix.test_type }}\n+ env:\n+ CHEF_LICENSE_SERVER: ${{ env.CHEF_LICENSE_SERVER }}\n+\n+ windows-tests:\n+ name: ${{ matrix.test_type }} - Windows 2022\n+ runs-on: windows-2022\n+ timeout-minutes: 120\n+ strategy:\n+ fail-fast: false\n+ matrix:\n+ test_type: [Unit, Integration, Functional]\n+ steps:\n+ - name: Check out code\n+ uses: actions/checkout@v4\n+\n+ - name: Set up Ruby\n+ uses: ruby/setup-ruby@v1\n+ with:\n+ ruby-version: '3.1'\n+ bundler-cache: true\n+\n+ - name: Run ${{ matrix.test_type }} tests\n+ shell: pwsh\n+ run: .\\.expeditor\\scripts\\prep_and_run_tests.ps1 ${{ matrix.test_type }}\n+ env:\n+ CHEF_LICENSE_SERVER: ${{ env.CHEF_LICENSE_SERVER }}\n+\n+ gem-tests:\n+ name: ${{ matrix.gem }} gem\n+ runs-on: ubuntu-22.04\n+ timeout-minutes: 60\n+ strategy:\n+ fail-fast: false\n+ matrix:\n+ gem: [chef-zero, cheffish, chefspec, knife-windows, berkshelf]\n+ include:\n+ - gem: chef-zero\n+ test_cmd: bundle exec tasks/bin/run_external_test chef/chef-zero main rake pedant\n+ env_vars: |\n+ PEDANT_OPTS=--skip-oc_id\n+ CHEF_FS=true\n+ - gem: cheffish\n+ test_cmd: bundle exec tasks/bin/run_external_test chef/cheffish main rake spec\n+ - gem: chefspec\n+ test_cmd: bundle exec tasks/bin/run_external_test chef/chefspec main rake\n+ - gem: knife-windows\n+ test_cmd: bundle exec tasks/bin/run_external_test chef/knife-windows main rake spec\n+ - gem: berkshelf\n+ test_cmd: bundle exec tasks/bin/run_external_test chef/berkshelf main rake\n+ pre_install: apt-get update -y && apt-get install -y graphviz\n+ container:\n+ image: chefes/omnibus-toolchain-ubuntu-2204:3.0.36\n+ options: --user root\n+ steps:\n+ - name: Check out code\n+ uses: actions/checkout@v4\n+\n+ - name: Container prep\n+ run: .expeditor/scripts/bk_container_prep.sh\n+\n+ - name: Set up environment\n+ run: |\n+ export PATH=\"/root/.rbenv/shims:/opt/chef/bin:/home/ec2-user/.tfenv/bin:/home/ec2-user/.rbenv/shims:/home/ec2-user/.rbenv/bin:/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/usr/bin\"\n+ echo \"PATH=$PATH\" >> $GITHUB_ENV\n+\n+ - name: Pre-install dependencies\n+ if: matrix.pre_install\n+ run: ${{ matrix.pre_install }}\n+\n+ - name: Bundle install\n+ run: |\n+ bundle config set --local without omnibus_package\n+ bundle config set --local path 'vendor/bundle'\n+ bundle install --jobs=3 --retry=3\n+\n+ - name: Run ${{ matrix.gem }} tests\n+ run: ${{ matrix.test_cmd }}\n+ env:\n+ CHEF_FOUNDATION_VERSION: ${{ env.CHEF_FOUNDATION_VERSION }}\n+ PEDANT_OPTS: ${{ matrix.gem == 'chef-zero' && '--skip-oc_id' || '' }}\n+ CHEF_FS: ${{ matrix.gem == 'chef-zero' && 'true' || '' }}\n+\n+ habitat-linux-plan:\n+ name: Habitat Linux Plan\n+ runs-on: ubuntu-latest\n+ timeout-minutes: 60\n+ steps:\n+ - name: Check out code\n+ uses: actions/checkout@v4\n+\n+ - name: Install Habitat\n+ run: sudo ./.expeditor/scripts/install-hab.sh 'x86_64-linux'\n+\n+ - name: Verify plan\n+ run: sudo ./.expeditor/scripts/verify-plan.sh\n+\n+ habitat-windows-plan:\n+ name: Habitat Windows Plan\n+ runs-on: windows-2022\n+ timeout-minutes: 60\n+ steps:\n+ - name: Check out code\n+ uses: actions/checkout@v4\n+\n+ - name: Verify Windows plan\n+ shell: pwsh\n+ run: ./.expeditor/scripts/verify-plan.ps1\n+\n+ verify-summary:", + "comment_created_at": "2025-07-01T20:20:39+00:00", + "comment_author": "github-advanced-security[bot]", + "comment_body": "## Workflow does not contain permissions\n\nActions job or workflow does not limit the permissions of the GITHUB_TOKEN. Consider setting an explicit permissions block, using the following as a minimal starting point: {{contents: read}}\n\n[Show more details](https://github.com/chef/chef/security/code-scanning/300)", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2073245958", + "pr_number": 14794, + "pr_file": ".github/workflows/kitchen.yml", + "created_at": "2025-05-05T11:06:01+00:00", + "commented_code": "working-directory: kitchen-tests\n\njobs:\n # Test-kitchen-enterprise currently supports only linux platforms, \n # Hence we are testing on Windows using test-kitchen\n win_x86_64:\n name: win_x86_64(test-kitchen legacy)\n strategy:\n fail-fast: false\n matrix:\n~ os:\n~ - windows-2019\n~ - windows-2022\n~ runs-on: ${{ matrix.os }}\n~ env:\n~ CHEF_LICENSE: accept-no-persist\n~ KITCHEN_LOCAL_YAML: kitchen.exec.windows.yml\n~ steps:\n~ - uses: actions/checkout@v4\n~ - name: Install Chef\n~ uses: actionshub/chef-install@3.0.0\n - name: Kitchen Test\n run: kitchen test end-to-end-${{ matrix.os }}\n\n # Habitat does not support Mac yet. Hence we're testing on Mac systems using test-kitchen\n mac_x86_64:\n name: mac_x86_64(test-kitchen legacy)\n strategy:\n fail-fast: false\n matrix:\n~ os:\n~ - macos-13\n~ runs-on: ${{ matrix.os }}\n~ env:\n~ CHEF_LICENSE: accept-no-persist\n~ KITCHEN_LOCAL_YAML: kitchen.exec.macos.yml\n~ steps:\n~ - name: Check out code\n~ uses: actions/checkout@v4\n~ with:\n~ clean: true\n~ - name: Install Chef\n~ uses: actionshub/chef-install@3.0.0\n~ - name: Kitchen Test\n run: kitchen test end-to-end-${{ matrix.os }}\n\n mac_arm64:\n name: mac_arm64(test-kitchen legacy)\n strategy:\n fail-fast: false\n matrix:\n~ os:\n~ - macos-14 # currently macos-latest\n~ - macos-15\n~ runs-on: ${{ matrix.os }}\n~ env:\n~ CHEF_LICENSE: accept-no-persist\n~ KITCHEN_LOCAL_YAML: kitchen.exec.macos.yml\n~ steps:\n~ - name: Check out code\n~ uses: actions/checkout@v4\n~ with:\n~ clean: true\n~ - name: Install Chef\n~ uses: actionshub/chef-install@3.0.0\n - name: Kitchen Test\n run: kitchen test end-to-end-${{ matrix.os }}\n\n # This block does end-to-end recipe using test-kitchen-enterprise using Habitat package of chef19\n docr_lnx_x86_64:\n name: docr_lnx_x86_64(test-kitchen-enterprise)\n strategy:\n fail-fast: false\n matrix:\n~ os:\n~ - amazonlinux-2023\n~ - almalinux-8\n~ - almalinux-9\n~ - debian-11\n~ - debian-12\n~ - fedora-40\n~ # fedora-latest\n~ - opensuse-leap-15\n~ - oraclelinux-8\n~ - oraclelinux-9\n~ - rockylinux-8\n~ - rockylinux-9\n~ - ubuntu-2004\n~ - ubuntu-2204\n~ # - ubuntu-2404\n~ runs-on: ubuntu-latest\n~ env:\n~ FORCE_FFI_YAJL: ext\n~ CHEF_LICENSE: accept-no-persist\n~ KITCHEN_LOCAL_YAML: kitchen.dokken.yml\n steps:\n - name: Check out code\n uses: actions/checkout@v4\n - name: Install Chef\n uses: actionshub/chef-install@3.0.0\n - name: Install chef-test-kitchen-enterprise\n run: |\n curl https://raw.githubusercontent.com/habitat-sh/habitat/main/components/hab/install.sh | sudo bash -s -- -c stable\n sudo hab license accept\n sudo hab --version\n sudo hab pkg install --binlink --force chef/chef-test-kitchen-enterprise --channel unstable\n sudo kitchen --version\n sudo hab pkg install --binlink --force chef/chef-cli --channel unstable\n - name: Kitchen Test\n run: kitchen test end-to-end-${{ matrix.os }}\n\n # chef-test-kitchen-enterprise RC1 currently supports only kitchen-dokken driver hence in this block we \n # will continue testing with test-kitchen\n vm_lnx_x86_64:\n name: vm_lnx_x86_64(test-kitchen legacy)\n strategy:\n fail-fast: false\n matrix:\n~ os:\n~ - amazonlinux-2\n~ - amazonlinux-2023\n~ - almalinux-8\n~ - almalinux-9\n~ - debian-11\n~ - debian-12\n~ - fedora-40\n~ # - fedora-latest\n~ - opensuse-leap-15\n~ - oracle-7\n~ - oracle-8\n~ - oracle-9\n~ - rockylinux-8\n~ - rockylinux-9\n~ - ubuntu-2004\n~ - ubuntu-2204\n~ # - ubuntu-2404\n~ runs-on: ubuntu-22.04\n~ env:\n~ CHEF_LICENSE: accept-no-persist\n~ KITCHEN_LOCAL_YAML: kitchen.linux.ci.yml\n~ steps:\n~ - uses: actions/checkout@v4\n~ - name: Install Vagrant and VirtualBox\n~ run: |\n~ wget -O- https://apt.releases.hashicorp.com/gpg | gpg --dearmor | sudo tee /usr/share/keyrings/hashicorp-archive-keyring.gpg\n~ wget -O- https://www.virtualbox.org/download/oracle_vbox_2016.asc | gpg --dearmor | sudo tee /usr/share/keyrings/oracle-virtualbox-2016.gpg\n~ echo \"deb [signed-by=/usr/share/keyrings/hashicorp-archive-keyring.gpg] https://apt.releases.hashicorp.com $(lsb_release -cs) main\" | sudo tee /etc/apt/sources.list.d/hashicorp.list\n~ echo \"deb [arch=amd64 signed-by=/usr/share/keyrings/oracle-virtualbox-2016.gpg] https://download.virtualbox.org/virtualbox/debian $(lsb_release -cs) contrib\" | sudo tee /etc/apt/sources.list.d/virtualbox.list\n~ sudo apt-get update\n~ sudo apt-get install -y vagrant virtualbox-7.1\n~ sudo vagrant --version\n~ sudo VBoxManage --version\n~ - name: Install Chef\n~ uses: actionshub/chef-install@3.0.0\n~ - name: Kitchen Test\n~ run: |\n~ # To debug, uncomment these\n~ #export VAGRANT_LOG=debug\n~ #export VBOX_LOG=all.e.l.f\n #export VBOX_LOG_DEST=stdout\n #export K_DEBUG_OPTS=\"--log-level=debug\"\n export LOGNAME=$USER\n kitchen test end-to-end-${{ matrix.os }} $K_DEBUG_OPTS\n kitchen test end-to-end-${{ matrix.os }} $K_DEBUG_OPTS", + "repo_full_name": "chef/chef", + "discussion_comments": [ + { + "comment_id": "2073245958", + "repo_full_name": "chef/chef", + "pr_number": 14794, + "pr_file": ".github/workflows/kitchen.yml", + "discussion_id": "2073245958", + "commented_code": "@@ -16,144 +16,161 @@\n working-directory: kitchen-tests\n \n jobs:\n+ # Test-kitchen-enterprise currently supports only linux platforms, \n+ # Hence we are testing on Windows using test-kitchen\n win_x86_64:\n+ name: win_x86_64(test-kitchen legacy)\n strategy:\n fail-fast: false\n matrix:\n~ os:\n~ - windows-2019\n~ - windows-2022\n~ runs-on: ${{ matrix.os }}\n~ env:\n~ CHEF_LICENSE: accept-no-persist\n~ KITCHEN_LOCAL_YAML: kitchen.exec.windows.yml\n~ steps:\n~ - uses: actions/checkout@v4\n~ - name: Install Chef\n~ uses: actionshub/chef-install@3.0.0\n - name: Kitchen Test\n run: kitchen test end-to-end-${{ matrix.os }}\n \n+ # Habitat does not support Mac yet. Hence we're testing on Mac systems using test-kitchen\n mac_x86_64:\n+ name: mac_x86_64(test-kitchen legacy)\n strategy:\n fail-fast: false\n matrix:\n~ os:\n~ - macos-13\n~ runs-on: ${{ matrix.os }}\n~ env:\n~ CHEF_LICENSE: accept-no-persist\n~ KITCHEN_LOCAL_YAML: kitchen.exec.macos.yml\n~ steps:\n~ - name: Check out code\n~ uses: actions/checkout@v4\n~ with:\n~ clean: true\n~ - name: Install Chef\n~ uses: actionshub/chef-install@3.0.0\n~ - name: Kitchen Test\n run: kitchen test end-to-end-${{ matrix.os }}\n \n mac_arm64:\n+ name: mac_arm64(test-kitchen legacy)\n strategy:\n fail-fast: false\n matrix:\n~ os:\n~ - macos-14 # currently macos-latest\n~ - macos-15\n~ runs-on: ${{ matrix.os }}\n~ env:\n~ CHEF_LICENSE: accept-no-persist\n~ KITCHEN_LOCAL_YAML: kitchen.exec.macos.yml\n~ steps:\n~ - name: Check out code\n~ uses: actions/checkout@v4\n~ with:\n~ clean: true\n~ - name: Install Chef\n~ uses: actionshub/chef-install@3.0.0\n - name: Kitchen Test\n run: kitchen test end-to-end-${{ matrix.os }}\n \n+ # This block does end-to-end recipe using test-kitchen-enterprise using Habitat package of chef19\n docr_lnx_x86_64:\n+ name: docr_lnx_x86_64(test-kitchen-enterprise)\n strategy:\n fail-fast: false\n matrix:\n~ os:\n~ - amazonlinux-2023\n~ - almalinux-8\n~ - almalinux-9\n~ - debian-11\n~ - debian-12\n~ - fedora-40\n~ # fedora-latest\n~ - opensuse-leap-15\n~ - oraclelinux-8\n~ - oraclelinux-9\n~ - rockylinux-8\n~ - rockylinux-9\n~ - ubuntu-2004\n~ - ubuntu-2204\n~ # - ubuntu-2404\n~ runs-on: ubuntu-latest\n~ env:\n~ FORCE_FFI_YAJL: ext\n~ CHEF_LICENSE: accept-no-persist\n~ KITCHEN_LOCAL_YAML: kitchen.dokken.yml\n steps:\n - name: Check out code\n uses: actions/checkout@v4\n- - name: Install Chef\n- uses: actionshub/chef-install@3.0.0\n+ - name: Install chef-test-kitchen-enterprise\n+ run: |\n+ curl https://raw.githubusercontent.com/habitat-sh/habitat/main/components/hab/install.sh | sudo bash -s -- -c stable\n+ sudo hab license accept\n+ sudo hab --version\n+ sudo hab pkg install --binlink --force chef/chef-test-kitchen-enterprise --channel unstable\n+ sudo kitchen --version\n+ sudo hab pkg install --binlink --force chef/chef-cli --channel unstable\n - name: Kitchen Test\n run: kitchen test end-to-end-${{ matrix.os }}\n \n+ # chef-test-kitchen-enterprise RC1 currently supports only kitchen-dokken driver hence in this block we \n+ # will continue testing with test-kitchen\n vm_lnx_x86_64:\n+ name: vm_lnx_x86_64(test-kitchen legacy)\n strategy:\n fail-fast: false\n matrix:\n~ os:\n~ - amazonlinux-2\n~ - amazonlinux-2023\n~ - almalinux-8\n~ - almalinux-9\n~ - debian-11\n~ - debian-12\n~ - fedora-40\n~ # - fedora-latest\n~ - opensuse-leap-15\n~ - oracle-7\n~ - oracle-8\n~ - oracle-9\n~ - rockylinux-8\n~ - rockylinux-9\n~ - ubuntu-2004\n~ - ubuntu-2204\n~ # - ubuntu-2404\n~ runs-on: ubuntu-22.04\n~ env:\n~ CHEF_LICENSE: accept-no-persist\n~ KITCHEN_LOCAL_YAML: kitchen.linux.ci.yml\n~ steps:\n~ - uses: actions/checkout@v4\n~ - name: Install Vagrant and VirtualBox\n~ run: |\n~ wget -O- https://apt.releases.hashicorp.com/gpg | gpg --dearmor | sudo tee /usr/share/keyrings/hashicorp-archive-keyring.gpg\n~ wget -O- https://www.virtualbox.org/download/oracle_vbox_2016.asc | gpg --dearmor | sudo tee /usr/share/keyrings/oracle-virtualbox-2016.gpg\n~ echo \"deb [signed-by=/usr/share/keyrings/hashicorp-archive-keyring.gpg] https://apt.releases.hashicorp.com $(lsb_release -cs) main\" | sudo tee /etc/apt/sources.list.d/hashicorp.list\n~ echo \"deb [arch=amd64 signed-by=/usr/share/keyrings/oracle-virtualbox-2016.gpg] https://download.virtualbox.org/virtualbox/debian $(lsb_release -cs) contrib\" | sudo tee /etc/apt/sources.list.d/virtualbox.list\n~ sudo apt-get update\n~ sudo apt-get install -y vagrant virtualbox-7.1\n~ sudo vagrant --version\n~ sudo VBoxManage --version\n~ - name: Install Chef\n~ uses: actionshub/chef-install@3.0.0\n~ - name: Kitchen Test\n~ run: |\n~ # To debug, uncomment these\n~ #export VAGRANT_LOG=debug\n~ #export VBOX_LOG=all.e.l.f\n #export VBOX_LOG_DEST=stdout\n #export K_DEBUG_OPTS=\"--log-level=debug\"\n export LOGNAME=$USER\n- kitchen test end-to-end-${{ matrix.os }} $K_DEBUG_OPTS\n+ kitchen test end-to-end-${{ matrix.os }} $K_DEBUG_OPTS", + "comment_created_at": "2025-05-05T11:06:01+00:00", + "comment_author": "github-advanced-security[bot]", + "comment_body": "## Workflow does not contain permissions\n\nActions job or workflow does not limit the permissions of the GITHUB_TOKEN. Consider setting an explicit permissions block, using the following as a minimal starting point: {{contents: read}}\n\n[Show more details](https://github.com/chef/chef/security/code-scanning/267)", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2167257073", + "pr_number": 15068, + "pr_file": ".github/workflows/verify.yml", + "created_at": "2025-06-25T17:30:23+00:00", + "commented_code": "name: Verify Pipeline\n\non:\n workflow_dispatch:\n inputs:\n base_image_tag:\n description: 'Base image tag version'\n required: true\n default: '3.0.36'\n type: string\n custom_env_var:\n description: 'Custom environment variable'\n required: false\n default: 'production'\n type: string\n chef_license:\n description: 'Chef license acceptance'\n required: true\n default: 'accept-no-persist'\n type: string\n push:\n branches: [main]\n pull_request:\n branches: [main]\n\nenv:\n BASE_IMAGE_TAG: ${{ inputs.base_image_tag || '3.0.36' }}\n CUSTOM_ENV: ${{ inputs.custom_env_var || 'production' }}\n CHEF_LICENSE: ${{ inputs.chef_license || 'accept-no-persist' }}\n\njobs:\n # Linux Tests - Matrix for different test types and OS combinations\n linux-tests:\n name: Linux ${{ matrix.test_type }} - ${{ matrix.os }}\n runs-on: ubuntu-22.04\n strategy:\n fail-fast: false\n matrix:\n test_type: [Unit, Integration, Functional]\n os:\n - rocky-8\n - rocky-9\n - rhel-9\n - debian-9\n - ubuntu-1804\n - ubuntu-2004\n - ubuntu-2204\n - centos-7\n - amazonlinux-2\n - rockylinux-8\n - almalinux-9\n container:\n image: chefes/omnibus-toolchain-${{ matrix.os }}:${{ env.BASE_IMAGE_TAG }}\n options: --user root\n steps:\n - name: Check out code\n uses: actions/checkout@v4\n\n - name: Configure Git safe directory\n run: |\n git config --global --add safe.directory /__w/*/\n\n - name: Display Environment Info\n run: |\n echo \"Running ${{ matrix.test_type }} tests on ${{ matrix.os }}\"\n echo \"Base Image Tag: ${{ env.BASE_IMAGE_TAG }}\"\n echo \"Custom Environment: ${{ env.CUSTOM_ENV }}\"\n echo \"Chef License: ${{ env.CHEF_LICENSE }}\"\n cat /etc/os-release || echo \"OS release info not available\"\n\n - name: Install Dependencies\n run: |\n ./.expeditor/bk_container_prep.sh\n\n - name: Run ${{ matrix.test_type }} Tests\n run: |\n ./.expeditor/scripts/prep_and_run_tests.sh \"${{ matrix.test_type }}\"\n env:\n TEST_TYPE: ${{ matrix.test_type }}\n OS_TYPE: ${{ matrix.os }}\n CHEF_LICENSE: ${{ env.CHEF_LICENSE }}\n CUSTOM_ENV_VAR: ${{ env.CUSTOM_ENV }}\n\n # Windows Tests - Matrix for different test types and Windows versions\n windows-tests:", + "repo_full_name": "chef/chef", + "discussion_comments": [ + { + "comment_id": "2167257073", + "repo_full_name": "chef/chef", + "pr_number": 15068, + "pr_file": ".github/workflows/verify.yml", + "discussion_id": "2167257073", + "commented_code": "@@ -0,0 +1,161 @@\n+name: Verify Pipeline\n+\n+on:\n+ workflow_dispatch:\n+ inputs:\n+ base_image_tag:\n+ description: 'Base image tag version'\n+ required: true\n+ default: '3.0.36'\n+ type: string\n+ custom_env_var:\n+ description: 'Custom environment variable'\n+ required: false\n+ default: 'production'\n+ type: string\n+ chef_license:\n+ description: 'Chef license acceptance'\n+ required: true\n+ default: 'accept-no-persist'\n+ type: string\n+ push:\n+ branches: [main]\n+ pull_request:\n+ branches: [main]\n+\n+env:\n+ BASE_IMAGE_TAG: ${{ inputs.base_image_tag || '3.0.36' }}\n+ CUSTOM_ENV: ${{ inputs.custom_env_var || 'production' }}\n+ CHEF_LICENSE: ${{ inputs.chef_license || 'accept-no-persist' }}\n+\n+jobs:\n+ # Linux Tests - Matrix for different test types and OS combinations\n+ linux-tests:\n+ name: Linux ${{ matrix.test_type }} - ${{ matrix.os }}\n+ runs-on: ubuntu-22.04\n+ strategy:\n+ fail-fast: false\n+ matrix:\n+ test_type: [Unit, Integration, Functional]\n+ os:\n+ - rocky-8\n+ - rocky-9\n+ - rhel-9\n+ - debian-9\n+ - ubuntu-1804\n+ - ubuntu-2004\n+ - ubuntu-2204\n+ - centos-7\n+ - amazonlinux-2\n+ - rockylinux-8\n+ - almalinux-9\n+ container:\n+ image: chefes/omnibus-toolchain-${{ matrix.os }}:${{ env.BASE_IMAGE_TAG }}\n+ options: --user root\n+ steps:\n+ - name: Check out code\n+ uses: actions/checkout@v4\n+\n+ - name: Configure Git safe directory\n+ run: |\n+ git config --global --add safe.directory /__w/*/\n+\n+ - name: Display Environment Info\n+ run: |\n+ echo \"Running ${{ matrix.test_type }} tests on ${{ matrix.os }}\"\n+ echo \"Base Image Tag: ${{ env.BASE_IMAGE_TAG }}\"\n+ echo \"Custom Environment: ${{ env.CUSTOM_ENV }}\"\n+ echo \"Chef License: ${{ env.CHEF_LICENSE }}\"\n+ cat /etc/os-release || echo \"OS release info not available\"\n+\n+ - name: Install Dependencies\n+ run: |\n+ ./.expeditor/bk_container_prep.sh\n+\n+ - name: Run ${{ matrix.test_type }} Tests\n+ run: |\n+ ./.expeditor/scripts/prep_and_run_tests.sh \"${{ matrix.test_type }}\"\n+ env:\n+ TEST_TYPE: ${{ matrix.test_type }}\n+ OS_TYPE: ${{ matrix.os }}\n+ CHEF_LICENSE: ${{ env.CHEF_LICENSE }}\n+ CUSTOM_ENV_VAR: ${{ env.CUSTOM_ENV }}\n+\n+ # Windows Tests - Matrix for different test types and Windows versions\n+ windows-tests:", + "comment_created_at": "2025-06-25T17:30:23+00:00", + "comment_author": "github-advanced-security[bot]", + "comment_body": "## Workflow does not contain permissions\n\nActions job or workflow does not limit the permissions of the GITHUB_TOKEN. Consider setting an explicit permissions block, using the following as a minimal starting point: {{contents: read}}\n\n[Show more details](https://github.com/chef/chef/security/code-scanning/289)", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2167257077", + "pr_number": 15068, + "pr_file": ".github/workflows/verify.yml", + "created_at": "2025-06-25T17:30:23+00:00", + "commented_code": "name: Verify Pipeline\n\non:\n workflow_dispatch:\n inputs:\n base_image_tag:\n description: 'Base image tag version'\n required: true\n default: '3.0.36'\n type: string\n custom_env_var:\n description: 'Custom environment variable'\n required: false\n default: 'production'\n type: string\n chef_license:\n description: 'Chef license acceptance'\n required: true\n default: 'accept-no-persist'\n type: string\n push:\n branches: [main]\n pull_request:\n branches: [main]\n\nenv:\n BASE_IMAGE_TAG: ${{ inputs.base_image_tag || '3.0.36' }}\n CUSTOM_ENV: ${{ inputs.custom_env_var || 'production' }}\n CHEF_LICENSE: ${{ inputs.chef_license || 'accept-no-persist' }}\n\njobs:\n # Linux Tests - Matrix for different test types and OS combinations\n linux-tests:\n name: Linux ${{ matrix.test_type }} - ${{ matrix.os }}\n runs-on: ubuntu-22.04\n strategy:\n fail-fast: false\n matrix:\n test_type: [Unit, Integration, Functional]\n os:\n - rocky-8\n - rocky-9\n - rhel-9\n - debian-9\n - ubuntu-1804\n - ubuntu-2004\n - ubuntu-2204\n - centos-7\n - amazonlinux-2\n - rockylinux-8\n - almalinux-9\n container:\n image: chefes/omnibus-toolchain-${{ matrix.os }}:${{ env.BASE_IMAGE_TAG }}\n options: --user root\n steps:\n - name: Check out code\n uses: actions/checkout@v4\n\n - name: Configure Git safe directory\n run: |\n git config --global --add safe.directory /__w/*/\n\n - name: Display Environment Info\n run: |\n echo \"Running ${{ matrix.test_type }} tests on ${{ matrix.os }}\"\n echo \"Base Image Tag: ${{ env.BASE_IMAGE_TAG }}\"\n echo \"Custom Environment: ${{ env.CUSTOM_ENV }}\"\n echo \"Chef License: ${{ env.CHEF_LICENSE }}\"\n cat /etc/os-release || echo \"OS release info not available\"\n\n - name: Install Dependencies\n run: |\n ./.expeditor/bk_container_prep.sh\n\n - name: Run ${{ matrix.test_type }} Tests\n run: |\n ./.expeditor/scripts/prep_and_run_tests.sh \"${{ matrix.test_type }}\"\n env:\n TEST_TYPE: ${{ matrix.test_type }}\n OS_TYPE: ${{ matrix.os }}\n CHEF_LICENSE: ${{ env.CHEF_LICENSE }}\n CUSTOM_ENV_VAR: ${{ env.CUSTOM_ENV }}\n\n # Windows Tests - Matrix for different test types and Windows versions\n windows-tests:\n name: Windows ${{ matrix.test_type }} - ${{ matrix.os }}\n runs-on: ${{ matrix.os }}\n strategy:\n fail-fast: false\n matrix:\n test_type: [Unit, Integration, Functional]\n os:\n - windows-2019\n - windows-2022\n - windows-latest\n steps:\n - name: Check out code\n uses: actions/checkout@v4\n\n - name: Set up Ruby\n uses: ruby/setup-ruby@v1\n with:\n ruby-version: '3.1'\n bundler-cache: true\n\n - name: Display Environment Info\n shell: pwsh\n run: |\n Write-Host \"Running ${{ matrix.test_type }} tests on ${{ matrix.os }}\"\n Write-Host \"Base Image Tag: ${{ env.BASE_IMAGE_TAG }}\"\n Write-Host \"Custom Environment: ${{ env.CUSTOM_ENV }}\"\n Write-Host \"Chef License: ${{ env.CHEF_LICENSE }}\"\n Get-ComputerInfo | Select-Object WindowsProductName, WindowsVersion\n\n - name: Install Dependencies\n shell: pwsh\n run: |\n bundle config set --local without development\n bundle install --jobs=3 --retry=3\n\n - name: Run ${{ matrix.test_type }} Tests\n shell: pwsh\n run: |\n switch (\"${{ matrix.test_type }}\") {\n \"Unit\" {\n Write-Host \"Running Unit Tests...\"\n try { bundle exec rake spec:unit } catch { Write-Host \"Unit tests script not found\" }\n }\n \"Integration\" {\n Write-Host \"Running Integration Tests...\"\n try { bundle exec rake spec:integration } catch { Write-Host \"Integration tests script not found\" }\n }\n \"Functional\" {\n Write-Host \"Running Functional Tests...\"\n try { bundle exec rake spec:functional } catch { Write-Host \"Functional tests script not found\" }\n }\n }\n env:\n TEST_TYPE: ${{ matrix.test_type }}\n OS_TYPE: ${{ matrix.os }}\n CHEF_LICENSE: ${{ env.CHEF_LICENSE }}\n CUSTOM_ENV_VAR: ${{ env.CUSTOM_ENV }}\n\n # Summary job that depends on all test jobs\n test-summary:", + "repo_full_name": "chef/chef", + "discussion_comments": [ + { + "comment_id": "2167257077", + "repo_full_name": "chef/chef", + "pr_number": 15068, + "pr_file": ".github/workflows/verify.yml", + "discussion_id": "2167257077", + "commented_code": "@@ -0,0 +1,161 @@\n+name: Verify Pipeline\n+\n+on:\n+ workflow_dispatch:\n+ inputs:\n+ base_image_tag:\n+ description: 'Base image tag version'\n+ required: true\n+ default: '3.0.36'\n+ type: string\n+ custom_env_var:\n+ description: 'Custom environment variable'\n+ required: false\n+ default: 'production'\n+ type: string\n+ chef_license:\n+ description: 'Chef license acceptance'\n+ required: true\n+ default: 'accept-no-persist'\n+ type: string\n+ push:\n+ branches: [main]\n+ pull_request:\n+ branches: [main]\n+\n+env:\n+ BASE_IMAGE_TAG: ${{ inputs.base_image_tag || '3.0.36' }}\n+ CUSTOM_ENV: ${{ inputs.custom_env_var || 'production' }}\n+ CHEF_LICENSE: ${{ inputs.chef_license || 'accept-no-persist' }}\n+\n+jobs:\n+ # Linux Tests - Matrix for different test types and OS combinations\n+ linux-tests:\n+ name: Linux ${{ matrix.test_type }} - ${{ matrix.os }}\n+ runs-on: ubuntu-22.04\n+ strategy:\n+ fail-fast: false\n+ matrix:\n+ test_type: [Unit, Integration, Functional]\n+ os:\n+ - rocky-8\n+ - rocky-9\n+ - rhel-9\n+ - debian-9\n+ - ubuntu-1804\n+ - ubuntu-2004\n+ - ubuntu-2204\n+ - centos-7\n+ - amazonlinux-2\n+ - rockylinux-8\n+ - almalinux-9\n+ container:\n+ image: chefes/omnibus-toolchain-${{ matrix.os }}:${{ env.BASE_IMAGE_TAG }}\n+ options: --user root\n+ steps:\n+ - name: Check out code\n+ uses: actions/checkout@v4\n+\n+ - name: Configure Git safe directory\n+ run: |\n+ git config --global --add safe.directory /__w/*/\n+\n+ - name: Display Environment Info\n+ run: |\n+ echo \"Running ${{ matrix.test_type }} tests on ${{ matrix.os }}\"\n+ echo \"Base Image Tag: ${{ env.BASE_IMAGE_TAG }}\"\n+ echo \"Custom Environment: ${{ env.CUSTOM_ENV }}\"\n+ echo \"Chef License: ${{ env.CHEF_LICENSE }}\"\n+ cat /etc/os-release || echo \"OS release info not available\"\n+\n+ - name: Install Dependencies\n+ run: |\n+ ./.expeditor/bk_container_prep.sh\n+\n+ - name: Run ${{ matrix.test_type }} Tests\n+ run: |\n+ ./.expeditor/scripts/prep_and_run_tests.sh \"${{ matrix.test_type }}\"\n+ env:\n+ TEST_TYPE: ${{ matrix.test_type }}\n+ OS_TYPE: ${{ matrix.os }}\n+ CHEF_LICENSE: ${{ env.CHEF_LICENSE }}\n+ CUSTOM_ENV_VAR: ${{ env.CUSTOM_ENV }}\n+\n+ # Windows Tests - Matrix for different test types and Windows versions\n+ windows-tests:\n+ name: Windows ${{ matrix.test_type }} - ${{ matrix.os }}\n+ runs-on: ${{ matrix.os }}\n+ strategy:\n+ fail-fast: false\n+ matrix:\n+ test_type: [Unit, Integration, Functional]\n+ os:\n+ - windows-2019\n+ - windows-2022\n+ - windows-latest\n+ steps:\n+ - name: Check out code\n+ uses: actions/checkout@v4\n+\n+ - name: Set up Ruby\n+ uses: ruby/setup-ruby@v1\n+ with:\n+ ruby-version: '3.1'\n+ bundler-cache: true\n+\n+ - name: Display Environment Info\n+ shell: pwsh\n+ run: |\n+ Write-Host \"Running ${{ matrix.test_type }} tests on ${{ matrix.os }}\"\n+ Write-Host \"Base Image Tag: ${{ env.BASE_IMAGE_TAG }}\"\n+ Write-Host \"Custom Environment: ${{ env.CUSTOM_ENV }}\"\n+ Write-Host \"Chef License: ${{ env.CHEF_LICENSE }}\"\n+ Get-ComputerInfo | Select-Object WindowsProductName, WindowsVersion\n+\n+ - name: Install Dependencies\n+ shell: pwsh\n+ run: |\n+ bundle config set --local without development\n+ bundle install --jobs=3 --retry=3\n+\n+ - name: Run ${{ matrix.test_type }} Tests\n+ shell: pwsh\n+ run: |\n+ switch (\"${{ matrix.test_type }}\") {\n+ \"Unit\" {\n+ Write-Host \"Running Unit Tests...\"\n+ try { bundle exec rake spec:unit } catch { Write-Host \"Unit tests script not found\" }\n+ }\n+ \"Integration\" {\n+ Write-Host \"Running Integration Tests...\"\n+ try { bundle exec rake spec:integration } catch { Write-Host \"Integration tests script not found\" }\n+ }\n+ \"Functional\" {\n+ Write-Host \"Running Functional Tests...\"\n+ try { bundle exec rake spec:functional } catch { Write-Host \"Functional tests script not found\" }\n+ }\n+ }\n+ env:\n+ TEST_TYPE: ${{ matrix.test_type }}\n+ OS_TYPE: ${{ matrix.os }}\n+ CHEF_LICENSE: ${{ env.CHEF_LICENSE }}\n+ CUSTOM_ENV_VAR: ${{ env.CUSTOM_ENV }}\n+\n+ # Summary job that depends on all test jobs\n+ test-summary:", + "comment_created_at": "2025-06-25T17:30:23+00:00", + "comment_author": "github-advanced-security[bot]", + "comment_body": "## Workflow does not contain permissions\n\nActions job or workflow does not limit the permissions of the GITHUB_TOKEN. Consider setting an explicit permissions block, using the following as a minimal starting point: {{contents: read}}\n\n[Show more details](https://github.com/chef/chef/security/code-scanning/290)", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2167257081", + "pr_number": 15068, + "pr_file": ".github/workflows/verify.yml", + "created_at": "2025-06-25T17:30:23+00:00", + "commented_code": "name: Verify Pipeline\n\non:\n workflow_dispatch:\n inputs:\n base_image_tag:\n description: 'Base image tag version'\n required: true\n default: '3.0.36'\n type: string\n custom_env_var:\n description: 'Custom environment variable'\n required: false\n default: 'production'\n type: string\n chef_license:\n description: 'Chef license acceptance'\n required: true\n default: 'accept-no-persist'\n type: string\n push:\n branches: [main]\n pull_request:\n branches: [main]\n\nenv:\n BASE_IMAGE_TAG: ${{ inputs.base_image_tag || '3.0.36' }}\n CUSTOM_ENV: ${{ inputs.custom_env_var || 'production' }}\n CHEF_LICENSE: ${{ inputs.chef_license || 'accept-no-persist' }}\n\njobs:\n # Linux Tests - Matrix for different test types and OS combinations\n linux-tests:\n name: Linux ${{ matrix.test_type }} - ${{ matrix.os }}\n runs-on: ubuntu-22.04\n strategy:\n fail-fast: false\n matrix:\n test_type: [Unit, Integration, Functional]\n os:\n - rocky-8\n - rocky-9\n - rhel-9\n - debian-9\n - ubuntu-1804\n - ubuntu-2004\n - ubuntu-2204\n - centos-7\n - amazonlinux-2\n - rockylinux-8\n - almalinux-9\n container:\n image: chefes/omnibus-toolchain-${{ matrix.os }}:${{ env.BASE_IMAGE_TAG }}\n options: --user root\n steps:\n - name: Check out code\n uses: actions/checkout@v4\n\n - name: Configure Git safe directory\n run: |\n git config --global --add safe.directory /__w/*/\n\n - name: Display Environment Info\n run: |\n echo \"Running ${{ matrix.test_type }} tests on ${{ matrix.os }}\"\n echo \"Base Image Tag: ${{ env.BASE_IMAGE_TAG }}\"\n echo \"Custom Environment: ${{ env.CUSTOM_ENV }}\"\n echo \"Chef License: ${{ env.CHEF_LICENSE }}\"\n cat /etc/os-release || echo \"OS release info not available\"\n\n - name: Install Dependencies\n run: |\n ./.expeditor/bk_container_prep.sh\n\n - name: Run ${{ matrix.test_type }} Tests\n run: |\n ./.expeditor/scripts/prep_and_run_tests.sh \"${{ matrix.test_type }}\"\n env:\n TEST_TYPE: ${{ matrix.test_type }}\n OS_TYPE: ${{ matrix.os }}\n CHEF_LICENSE: ${{ env.CHEF_LICENSE }}\n CUSTOM_ENV_VAR: ${{ env.CUSTOM_ENV }}\n\n # Windows Tests - Matrix for different test types and Windows versions\n windows-tests:\n name: Windows ${{ matrix.test_type }} - ${{ matrix.os }}\n runs-on: ${{ matrix.os }}\n strategy:\n fail-fast: false\n matrix:\n test_type: [Unit, Integration, Functional]\n os:\n - windows-2019\n - windows-2022\n - windows-latest\n steps:\n - name: Check out code\n uses: actions/checkout@v4\n\n - name: Set up Ruby\n uses: ruby/setup-ruby@v1\n with:\n ruby-version: '3.1'\n bundler-cache: true\n\n - name: Display Environment Info\n shell: pwsh\n run: |\n Write-Host \"Running ${{ matrix.test_type }} tests on ${{ matrix.os }}\"\n Write-Host \"Base Image Tag: ${{ env.BASE_IMAGE_TAG }}\"\n Write-Host \"Custom Environment: ${{ env.CUSTOM_ENV }}\"\n Write-Host \"Chef License: ${{ env.CHEF_LICENSE }}\"\n Get-ComputerInfo | Select-Object WindowsProductName, WindowsVersion\n\n - name: Install Dependencies\n shell: pwsh\n run: |\n bundle config set --local without development\n bundle install --jobs=3 --retry=3\n\n - name: Run ${{ matrix.test_type }} Tests\n shell: pwsh\n run: |\n switch (\"${{ matrix.test_type }}\") {\n \"Unit\" {\n Write-Host \"Running Unit Tests...\"\n try { bundle exec rake spec:unit } catch { Write-Host \"Unit tests script not found\" }\n }\n \"Integration\" {\n Write-Host \"Running Integration Tests...\"\n try { bundle exec rake spec:integration } catch { Write-Host \"Integration tests script not found\" }\n }\n \"Functional\" {\n Write-Host \"Running Functional Tests...\"\n try { bundle exec rake spec:functional } catch { Write-Host \"Functional tests script not found\" }\n }\n }\n env:\n TEST_TYPE: ${{ matrix.test_type }}\n OS_TYPE: ${{ matrix.os }}\n CHEF_LICENSE: ${{ env.CHEF_LICENSE }}\n CUSTOM_ENV_VAR: ${{ env.CUSTOM_ENV }}\n\n # Summary job that depends on all test jobs\n test-summary:\n name: Test Summary\n runs-on: ubuntu-latest\n needs: [linux-tests, windows-tests]\n if: always()\n steps:\n - name: Check test results\n run: |\n echo \"Linux tests result: ${{ needs.linux-tests.result }}\"\n echo \"Windows tests result: ${{ needs.windows-tests.result }}\"\n\n if [[ \"${{ needs.linux-tests.result }}\" == \"failure\" || \"${{ needs.windows-tests.result }}\" == \"failure\" ]]; then\n echo \"Some tests failed\"\n exit 1\n else\n echo \"All tests passed or were skipped\"\n fi", + "repo_full_name": "chef/chef", + "discussion_comments": [ + { + "comment_id": "2167257081", + "repo_full_name": "chef/chef", + "pr_number": 15068, + "pr_file": ".github/workflows/verify.yml", + "discussion_id": "2167257081", + "commented_code": "@@ -0,0 +1,161 @@\n+name: Verify Pipeline\n+\n+on:\n+ workflow_dispatch:\n+ inputs:\n+ base_image_tag:\n+ description: 'Base image tag version'\n+ required: true\n+ default: '3.0.36'\n+ type: string\n+ custom_env_var:\n+ description: 'Custom environment variable'\n+ required: false\n+ default: 'production'\n+ type: string\n+ chef_license:\n+ description: 'Chef license acceptance'\n+ required: true\n+ default: 'accept-no-persist'\n+ type: string\n+ push:\n+ branches: [main]\n+ pull_request:\n+ branches: [main]\n+\n+env:\n+ BASE_IMAGE_TAG: ${{ inputs.base_image_tag || '3.0.36' }}\n+ CUSTOM_ENV: ${{ inputs.custom_env_var || 'production' }}\n+ CHEF_LICENSE: ${{ inputs.chef_license || 'accept-no-persist' }}\n+\n+jobs:\n+ # Linux Tests - Matrix for different test types and OS combinations\n+ linux-tests:\n+ name: Linux ${{ matrix.test_type }} - ${{ matrix.os }}\n+ runs-on: ubuntu-22.04\n+ strategy:\n+ fail-fast: false\n+ matrix:\n+ test_type: [Unit, Integration, Functional]\n+ os:\n+ - rocky-8\n+ - rocky-9\n+ - rhel-9\n+ - debian-9\n+ - ubuntu-1804\n+ - ubuntu-2004\n+ - ubuntu-2204\n+ - centos-7\n+ - amazonlinux-2\n+ - rockylinux-8\n+ - almalinux-9\n+ container:\n+ image: chefes/omnibus-toolchain-${{ matrix.os }}:${{ env.BASE_IMAGE_TAG }}\n+ options: --user root\n+ steps:\n+ - name: Check out code\n+ uses: actions/checkout@v4\n+\n+ - name: Configure Git safe directory\n+ run: |\n+ git config --global --add safe.directory /__w/*/\n+\n+ - name: Display Environment Info\n+ run: |\n+ echo \"Running ${{ matrix.test_type }} tests on ${{ matrix.os }}\"\n+ echo \"Base Image Tag: ${{ env.BASE_IMAGE_TAG }}\"\n+ echo \"Custom Environment: ${{ env.CUSTOM_ENV }}\"\n+ echo \"Chef License: ${{ env.CHEF_LICENSE }}\"\n+ cat /etc/os-release || echo \"OS release info not available\"\n+\n+ - name: Install Dependencies\n+ run: |\n+ ./.expeditor/bk_container_prep.sh\n+\n+ - name: Run ${{ matrix.test_type }} Tests\n+ run: |\n+ ./.expeditor/scripts/prep_and_run_tests.sh \"${{ matrix.test_type }}\"\n+ env:\n+ TEST_TYPE: ${{ matrix.test_type }}\n+ OS_TYPE: ${{ matrix.os }}\n+ CHEF_LICENSE: ${{ env.CHEF_LICENSE }}\n+ CUSTOM_ENV_VAR: ${{ env.CUSTOM_ENV }}\n+\n+ # Windows Tests - Matrix for different test types and Windows versions\n+ windows-tests:\n+ name: Windows ${{ matrix.test_type }} - ${{ matrix.os }}\n+ runs-on: ${{ matrix.os }}\n+ strategy:\n+ fail-fast: false\n+ matrix:\n+ test_type: [Unit, Integration, Functional]\n+ os:\n+ - windows-2019\n+ - windows-2022\n+ - windows-latest\n+ steps:\n+ - name: Check out code\n+ uses: actions/checkout@v4\n+\n+ - name: Set up Ruby\n+ uses: ruby/setup-ruby@v1\n+ with:\n+ ruby-version: '3.1'\n+ bundler-cache: true\n+\n+ - name: Display Environment Info\n+ shell: pwsh\n+ run: |\n+ Write-Host \"Running ${{ matrix.test_type }} tests on ${{ matrix.os }}\"\n+ Write-Host \"Base Image Tag: ${{ env.BASE_IMAGE_TAG }}\"\n+ Write-Host \"Custom Environment: ${{ env.CUSTOM_ENV }}\"\n+ Write-Host \"Chef License: ${{ env.CHEF_LICENSE }}\"\n+ Get-ComputerInfo | Select-Object WindowsProductName, WindowsVersion\n+\n+ - name: Install Dependencies\n+ shell: pwsh\n+ run: |\n+ bundle config set --local without development\n+ bundle install --jobs=3 --retry=3\n+\n+ - name: Run ${{ matrix.test_type }} Tests\n+ shell: pwsh\n+ run: |\n+ switch (\"${{ matrix.test_type }}\") {\n+ \"Unit\" {\n+ Write-Host \"Running Unit Tests...\"\n+ try { bundle exec rake spec:unit } catch { Write-Host \"Unit tests script not found\" }\n+ }\n+ \"Integration\" {\n+ Write-Host \"Running Integration Tests...\"\n+ try { bundle exec rake spec:integration } catch { Write-Host \"Integration tests script not found\" }\n+ }\n+ \"Functional\" {\n+ Write-Host \"Running Functional Tests...\"\n+ try { bundle exec rake spec:functional } catch { Write-Host \"Functional tests script not found\" }\n+ }\n+ }\n+ env:\n+ TEST_TYPE: ${{ matrix.test_type }}\n+ OS_TYPE: ${{ matrix.os }}\n+ CHEF_LICENSE: ${{ env.CHEF_LICENSE }}\n+ CUSTOM_ENV_VAR: ${{ env.CUSTOM_ENV }}\n+\n+ # Summary job that depends on all test jobs\n+ test-summary:\n+ name: Test Summary\n+ runs-on: ubuntu-latest\n+ needs: [linux-tests, windows-tests]\n+ if: always()\n+ steps:\n+ - name: Check test results\n+ run: |\n+ echo \"Linux tests result: ${{ needs.linux-tests.result }}\"\n+ echo \"Windows tests result: ${{ needs.windows-tests.result }}\"\n+\n+ if [[ \"${{ needs.linux-tests.result }}\" == \"failure\" || \"${{ needs.windows-tests.result }}\" == \"failure\" ]]; then\n+ echo \"Some tests failed\"\n+ exit 1\n+ else\n+ echo \"All tests passed or were skipped\"\n+ fi", + "comment_created_at": "2025-06-25T17:30:23+00:00", + "comment_author": "github-advanced-security[bot]", + "comment_body": "## Workflow does not contain permissions\n\nActions job or workflow does not limit the permissions of the GITHUB_TOKEN. Consider setting an explicit permissions block, using the following as a minimal starting point: {{}}\n\n[Show more details](https://github.com/chef/chef/security/code-scanning/291)", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/chef-limit-token-permissions.md b/_reviewers/chef-limit-token-permissions.md index 085cecd..90a52df 100644 --- a/_reviewers/chef-limit-token-permissions.md +++ b/_reviewers/chef-limit-token-permissions.md @@ -37,271 +37,3 @@ jobs: ``` This practice follows the principle of least privilege and reduces the potential impact if a workflow is compromised by a malicious pull request or action. - - -[ - { - "discussion_id": "2178132199", - "pr_number": 15076, - "pr_file": ".github/workflows/verify.yml", - "created_at": "2025-07-01T17:09:07+00:00", - "commented_code": "name: Verify\n\non:\n push:\n branches: [main, feature/github-actions-migration] # Add your temp branch here\n pull_request:\n branches: [main]\n\nenv:\n CHEF_LICENSE_SERVER: http://hosted-license-service-lb-8000-606952349.us-west-2.elb.amazonaws.com:8000/\n CHEF_FOUNDATION_VERSION: ${{ vars.CHEF_FOUNDATION_VERSION || '3.2.12' }}\n CHEF_LICENSE: accept-no-persist\n\njobs:\n linux-matrix:\n name: ${{ matrix.test_type }} - ${{ matrix.os }}\n runs-on: ubuntu-22.04\n timeout-minutes: 60\n strategy:\n fail-fast: false\n matrix:\n os: [rocky-8, rocky-9, rhel-9, debian-9, ubuntu-2004]\n test_type: [Unit, Integration, Functional]\n container:\n image: chefes/omnibus-toolchain-${{ matrix.os }}:3.0.36\n options: --privileged --user root\n steps:\n - name: Check out code\n uses: actions/checkout@v4\n\n - name: Container prep\n run: .expeditor/scripts/bk_container_prep.sh\n\n - name: Run ${{ matrix.test_type }} tests\n run: .expeditor/scripts/prep_and_run_tests.sh ${{ matrix.test_type }}\n env:\n CHEF_LICENSE_SERVER: ${{ env.CHEF_LICENSE_SERVER }}\n\n windows-tests:", - "repo_full_name": "chef/chef", - "discussion_comments": [ - { - "comment_id": "2178132199", - "repo_full_name": "chef/chef", - "pr_number": 15076, - "pr_file": ".github/workflows/verify.yml", - "discussion_id": "2178132199", - "commented_code": "@@ -0,0 +1,167 @@\n+name: Verify\n+\n+on:\n+ push:\n+ branches: [main, feature/github-actions-migration] # Add your temp branch here\n+ pull_request:\n+ branches: [main]\n+\n+env:\n+ CHEF_LICENSE_SERVER: http://hosted-license-service-lb-8000-606952349.us-west-2.elb.amazonaws.com:8000/\n+ CHEF_FOUNDATION_VERSION: ${{ vars.CHEF_FOUNDATION_VERSION || '3.2.12' }}\n+ CHEF_LICENSE: accept-no-persist\n+\n+jobs:\n+ linux-matrix:\n+ name: ${{ matrix.test_type }} - ${{ matrix.os }}\n+ runs-on: ubuntu-22.04\n+ timeout-minutes: 60\n+ strategy:\n+ fail-fast: false\n+ matrix:\n+ os: [rocky-8, rocky-9, rhel-9, debian-9, ubuntu-2004]\n+ test_type: [Unit, Integration, Functional]\n+ container:\n+ image: chefes/omnibus-toolchain-${{ matrix.os }}:3.0.36\n+ options: --privileged --user root\n+ steps:\n+ - name: Check out code\n+ uses: actions/checkout@v4\n+\n+ - name: Container prep\n+ run: .expeditor/scripts/bk_container_prep.sh\n+\n+ - name: Run ${{ matrix.test_type }} tests\n+ run: .expeditor/scripts/prep_and_run_tests.sh ${{ matrix.test_type }}\n+ env:\n+ CHEF_LICENSE_SERVER: ${{ env.CHEF_LICENSE_SERVER }}\n+\n+ windows-tests:", - "comment_created_at": "2025-07-01T17:09:07+00:00", - "comment_author": "github-advanced-security[bot]", - "comment_body": "## Workflow does not contain permissions\n\nActions job or workflow does not limit the permissions of the GITHUB_TOKEN. Consider setting an explicit permissions block, using the following as a minimal starting point: {{contents: read}}\n\n[Show more details](https://github.com/chef/chef/security/code-scanning/292)", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2178132200", - "pr_number": 15076, - "pr_file": ".github/workflows/verify.yml", - "created_at": "2025-07-01T17:09:07+00:00", - "commented_code": "name: Verify\n\non:\n push:\n branches: [main, feature/github-actions-migration] # Add your temp branch here\n pull_request:\n branches: [main]\n\nenv:\n CHEF_LICENSE_SERVER: http://hosted-license-service-lb-8000-606952349.us-west-2.elb.amazonaws.com:8000/\n CHEF_FOUNDATION_VERSION: ${{ vars.CHEF_FOUNDATION_VERSION || '3.2.12' }}\n CHEF_LICENSE: accept-no-persist\n\njobs:\n linux-matrix:\n name: ${{ matrix.test_type }} - ${{ matrix.os }}\n runs-on: ubuntu-22.04\n timeout-minutes: 60\n strategy:\n fail-fast: false\n matrix:\n os: [rocky-8, rocky-9, rhel-9, debian-9, ubuntu-2004]\n test_type: [Unit, Integration, Functional]\n container:\n image: chefes/omnibus-toolchain-${{ matrix.os }}:3.0.36\n options: --privileged --user root\n steps:\n - name: Check out code\n uses: actions/checkout@v4\n\n - name: Container prep\n run: .expeditor/scripts/bk_container_prep.sh\n\n - name: Run ${{ matrix.test_type }} tests\n run: .expeditor/scripts/prep_and_run_tests.sh ${{ matrix.test_type }}\n env:\n CHEF_LICENSE_SERVER: ${{ env.CHEF_LICENSE_SERVER }}\n\n windows-tests:\n name: ${{ matrix.test_type }} - Windows 2019\n runs-on: windows-2019\n timeout-minutes: 120\n strategy:\n fail-fast: false\n matrix:\n test_type: [Unit, Integration, Functional]\n steps:\n - name: Check out code\n uses: actions/checkout@v4\n\n - name: Set up Ruby\n uses: ruby/setup-ruby@v1\n with:\n ruby-version: '3.1'\n bundler-cache: true\n\n - name: Run ${{ matrix.test_type }} tests\n shell: pwsh\n run: .\\.expeditor\\scripts\\prep_and_run_tests.ps1 ${{ matrix.test_type }}\n env:\n CHEF_LICENSE_SERVER: ${{ env.CHEF_LICENSE_SERVER }}\n\n gem-tests:", - "repo_full_name": "chef/chef", - "discussion_comments": [ - { - "comment_id": "2178132200", - "repo_full_name": "chef/chef", - "pr_number": 15076, - "pr_file": ".github/workflows/verify.yml", - "discussion_id": "2178132200", - "commented_code": "@@ -0,0 +1,167 @@\n+name: Verify\n+\n+on:\n+ push:\n+ branches: [main, feature/github-actions-migration] # Add your temp branch here\n+ pull_request:\n+ branches: [main]\n+\n+env:\n+ CHEF_LICENSE_SERVER: http://hosted-license-service-lb-8000-606952349.us-west-2.elb.amazonaws.com:8000/\n+ CHEF_FOUNDATION_VERSION: ${{ vars.CHEF_FOUNDATION_VERSION || '3.2.12' }}\n+ CHEF_LICENSE: accept-no-persist\n+\n+jobs:\n+ linux-matrix:\n+ name: ${{ matrix.test_type }} - ${{ matrix.os }}\n+ runs-on: ubuntu-22.04\n+ timeout-minutes: 60\n+ strategy:\n+ fail-fast: false\n+ matrix:\n+ os: [rocky-8, rocky-9, rhel-9, debian-9, ubuntu-2004]\n+ test_type: [Unit, Integration, Functional]\n+ container:\n+ image: chefes/omnibus-toolchain-${{ matrix.os }}:3.0.36\n+ options: --privileged --user root\n+ steps:\n+ - name: Check out code\n+ uses: actions/checkout@v4\n+\n+ - name: Container prep\n+ run: .expeditor/scripts/bk_container_prep.sh\n+\n+ - name: Run ${{ matrix.test_type }} tests\n+ run: .expeditor/scripts/prep_and_run_tests.sh ${{ matrix.test_type }}\n+ env:\n+ CHEF_LICENSE_SERVER: ${{ env.CHEF_LICENSE_SERVER }}\n+\n+ windows-tests:\n+ name: ${{ matrix.test_type }} - Windows 2019\n+ runs-on: windows-2019\n+ timeout-minutes: 120\n+ strategy:\n+ fail-fast: false\n+ matrix:\n+ test_type: [Unit, Integration, Functional]\n+ steps:\n+ - name: Check out code\n+ uses: actions/checkout@v4\n+\n+ - name: Set up Ruby\n+ uses: ruby/setup-ruby@v1\n+ with:\n+ ruby-version: '3.1'\n+ bundler-cache: true\n+\n+ - name: Run ${{ matrix.test_type }} tests\n+ shell: pwsh\n+ run: .\\.expeditor\\scripts\\prep_and_run_tests.ps1 ${{ matrix.test_type }}\n+ env:\n+ CHEF_LICENSE_SERVER: ${{ env.CHEF_LICENSE_SERVER }}\n+\n+ gem-tests:", - "comment_created_at": "2025-07-01T17:09:07+00:00", - "comment_author": "github-advanced-security[bot]", - "comment_body": "## Workflow does not contain permissions\n\nActions job or workflow does not limit the permissions of the GITHUB_TOKEN. Consider setting an explicit permissions block, using the following as a minimal starting point: {{contents: read}}\n\n[Show more details](https://github.com/chef/chef/security/code-scanning/293)", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2178132204", - "pr_number": 15076, - "pr_file": ".github/workflows/verify.yml", - "created_at": "2025-07-01T17:09:07+00:00", - "commented_code": "name: Verify\n\non:\n push:\n branches: [main, feature/github-actions-migration] # Add your temp branch here\n pull_request:\n branches: [main]\n\nenv:\n CHEF_LICENSE_SERVER: http://hosted-license-service-lb-8000-606952349.us-west-2.elb.amazonaws.com:8000/\n CHEF_FOUNDATION_VERSION: ${{ vars.CHEF_FOUNDATION_VERSION || '3.2.12' }}\n CHEF_LICENSE: accept-no-persist\n\njobs:\n linux-matrix:\n name: ${{ matrix.test_type }} - ${{ matrix.os }}\n runs-on: ubuntu-22.04\n timeout-minutes: 60\n strategy:\n fail-fast: false\n matrix:\n os: [rocky-8, rocky-9, rhel-9, debian-9, ubuntu-2004]\n test_type: [Unit, Integration, Functional]\n container:\n image: chefes/omnibus-toolchain-${{ matrix.os }}:3.0.36\n options: --privileged --user root\n steps:\n - name: Check out code\n uses: actions/checkout@v4\n\n - name: Container prep\n run: .expeditor/scripts/bk_container_prep.sh\n\n - name: Run ${{ matrix.test_type }} tests\n run: .expeditor/scripts/prep_and_run_tests.sh ${{ matrix.test_type }}\n env:\n CHEF_LICENSE_SERVER: ${{ env.CHEF_LICENSE_SERVER }}\n\n windows-tests:\n name: ${{ matrix.test_type }} - Windows 2019\n runs-on: windows-2019\n timeout-minutes: 120\n strategy:\n fail-fast: false\n matrix:\n test_type: [Unit, Integration, Functional]\n steps:\n - name: Check out code\n uses: actions/checkout@v4\n\n - name: Set up Ruby\n uses: ruby/setup-ruby@v1\n with:\n ruby-version: '3.1'\n bundler-cache: true\n\n - name: Run ${{ matrix.test_type }} tests\n shell: pwsh\n run: .\\.expeditor\\scripts\\prep_and_run_tests.ps1 ${{ matrix.test_type }}\n env:\n CHEF_LICENSE_SERVER: ${{ env.CHEF_LICENSE_SERVER }}\n\n gem-tests:\n name: ${{ matrix.gem }} gem\n runs-on: ubuntu-22.04\n timeout-minutes: 60\n strategy:\n fail-fast: false\n matrix:\n gem: [chef-zero, cheffish, chefspec, knife-windows, berkshelf]\n include:\n - gem: chef-zero\n test_cmd: bundle exec tasks/bin/run_external_test chef/chef-zero main rake pedant\n env_vars: |\n PEDANT_OPTS=--skip-oc_id\n CHEF_FS=true\n - gem: cheffish\n test_cmd: bundle exec tasks/bin/run_external_test chef/cheffish main rake spec\n - gem: chefspec\n test_cmd: bundle exec tasks/bin/run_external_test chef/chefspec main rake\n - gem: knife-windows\n test_cmd: bundle exec tasks/bin/run_external_test chef/knife-windows main rake spec\n - gem: berkshelf\n test_cmd: bundle exec tasks/bin/run_external_test chef/berkshelf main rake\n pre_install: apt-get update -y && apt-get install -y graphviz\n container:\n image: chefes/omnibus-toolchain-ubuntu-1804:3.0.36\n options: --user root\n steps:\n - name: Check out code\n uses: actions/checkout@v4\n\n - name: Container prep\n run: .expeditor/scripts/bk_container_prep.sh\n\n - name: Set up environment\n run: |\n export PATH=\"/root/.rbenv/shims:/opt/chef/bin:/home/ec2-user/.tfenv/bin:/home/ec2-user/.rbenv/shims:/home/ec2-user/.rbenv/bin:/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/usr/bin\"\n echo \"PATH=$PATH\" >> $GITHUB_ENV\n\n - name: Pre-install dependencies\n if: matrix.pre_install\n run: ${{ matrix.pre_install }}\n\n - name: Bundle install\n run: |\n bundle config set --local without omnibus_package\n bundle config set --local path 'vendor/bundle'\n bundle install --jobs=3 --retry=3\n\n - name: Run ${{ matrix.gem }} tests\n run: ${{ matrix.test_cmd }}\n env:\n CHEF_FOUNDATION_VERSION: ${{ env.CHEF_FOUNDATION_VERSION }}\n PEDANT_OPTS: ${{ matrix.gem == 'chef-zero' && '--skip-oc_id' || '' }}\n CHEF_FS: ${{ matrix.gem == 'chef-zero' && 'true' || '' }}\n\n habitat-linux-plan:", - "repo_full_name": "chef/chef", - "discussion_comments": [ - { - "comment_id": "2178132204", - "repo_full_name": "chef/chef", - "pr_number": 15076, - "pr_file": ".github/workflows/verify.yml", - "discussion_id": "2178132204", - "commented_code": "@@ -0,0 +1,167 @@\n+name: Verify\n+\n+on:\n+ push:\n+ branches: [main, feature/github-actions-migration] # Add your temp branch here\n+ pull_request:\n+ branches: [main]\n+\n+env:\n+ CHEF_LICENSE_SERVER: http://hosted-license-service-lb-8000-606952349.us-west-2.elb.amazonaws.com:8000/\n+ CHEF_FOUNDATION_VERSION: ${{ vars.CHEF_FOUNDATION_VERSION || '3.2.12' }}\n+ CHEF_LICENSE: accept-no-persist\n+\n+jobs:\n+ linux-matrix:\n+ name: ${{ matrix.test_type }} - ${{ matrix.os }}\n+ runs-on: ubuntu-22.04\n+ timeout-minutes: 60\n+ strategy:\n+ fail-fast: false\n+ matrix:\n+ os: [rocky-8, rocky-9, rhel-9, debian-9, ubuntu-2004]\n+ test_type: [Unit, Integration, Functional]\n+ container:\n+ image: chefes/omnibus-toolchain-${{ matrix.os }}:3.0.36\n+ options: --privileged --user root\n+ steps:\n+ - name: Check out code\n+ uses: actions/checkout@v4\n+\n+ - name: Container prep\n+ run: .expeditor/scripts/bk_container_prep.sh\n+\n+ - name: Run ${{ matrix.test_type }} tests\n+ run: .expeditor/scripts/prep_and_run_tests.sh ${{ matrix.test_type }}\n+ env:\n+ CHEF_LICENSE_SERVER: ${{ env.CHEF_LICENSE_SERVER }}\n+\n+ windows-tests:\n+ name: ${{ matrix.test_type }} - Windows 2019\n+ runs-on: windows-2019\n+ timeout-minutes: 120\n+ strategy:\n+ fail-fast: false\n+ matrix:\n+ test_type: [Unit, Integration, Functional]\n+ steps:\n+ - name: Check out code\n+ uses: actions/checkout@v4\n+\n+ - name: Set up Ruby\n+ uses: ruby/setup-ruby@v1\n+ with:\n+ ruby-version: '3.1'\n+ bundler-cache: true\n+\n+ - name: Run ${{ matrix.test_type }} tests\n+ shell: pwsh\n+ run: .\\.expeditor\\scripts\\prep_and_run_tests.ps1 ${{ matrix.test_type }}\n+ env:\n+ CHEF_LICENSE_SERVER: ${{ env.CHEF_LICENSE_SERVER }}\n+\n+ gem-tests:\n+ name: ${{ matrix.gem }} gem\n+ runs-on: ubuntu-22.04\n+ timeout-minutes: 60\n+ strategy:\n+ fail-fast: false\n+ matrix:\n+ gem: [chef-zero, cheffish, chefspec, knife-windows, berkshelf]\n+ include:\n+ - gem: chef-zero\n+ test_cmd: bundle exec tasks/bin/run_external_test chef/chef-zero main rake pedant\n+ env_vars: |\n+ PEDANT_OPTS=--skip-oc_id\n+ CHEF_FS=true\n+ - gem: cheffish\n+ test_cmd: bundle exec tasks/bin/run_external_test chef/cheffish main rake spec\n+ - gem: chefspec\n+ test_cmd: bundle exec tasks/bin/run_external_test chef/chefspec main rake\n+ - gem: knife-windows\n+ test_cmd: bundle exec tasks/bin/run_external_test chef/knife-windows main rake spec\n+ - gem: berkshelf\n+ test_cmd: bundle exec tasks/bin/run_external_test chef/berkshelf main rake\n+ pre_install: apt-get update -y && apt-get install -y graphviz\n+ container:\n+ image: chefes/omnibus-toolchain-ubuntu-1804:3.0.36\n+ options: --user root\n+ steps:\n+ - name: Check out code\n+ uses: actions/checkout@v4\n+\n+ - name: Container prep\n+ run: .expeditor/scripts/bk_container_prep.sh\n+\n+ - name: Set up environment\n+ run: |\n+ export PATH=\"/root/.rbenv/shims:/opt/chef/bin:/home/ec2-user/.tfenv/bin:/home/ec2-user/.rbenv/shims:/home/ec2-user/.rbenv/bin:/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/usr/bin\"\n+ echo \"PATH=$PATH\" >> $GITHUB_ENV\n+\n+ - name: Pre-install dependencies\n+ if: matrix.pre_install\n+ run: ${{ matrix.pre_install }}\n+\n+ - name: Bundle install\n+ run: |\n+ bundle config set --local without omnibus_package\n+ bundle config set --local path 'vendor/bundle'\n+ bundle install --jobs=3 --retry=3\n+\n+ - name: Run ${{ matrix.gem }} tests\n+ run: ${{ matrix.test_cmd }}\n+ env:\n+ CHEF_FOUNDATION_VERSION: ${{ env.CHEF_FOUNDATION_VERSION }}\n+ PEDANT_OPTS: ${{ matrix.gem == 'chef-zero' && '--skip-oc_id' || '' }}\n+ CHEF_FS: ${{ matrix.gem == 'chef-zero' && 'true' || '' }}\n+\n+ habitat-linux-plan:", - "comment_created_at": "2025-07-01T17:09:07+00:00", - "comment_author": "github-advanced-security[bot]", - "comment_body": "## Workflow does not contain permissions\n\nActions job or workflow does not limit the permissions of the GITHUB_TOKEN. Consider setting an explicit permissions block, using the following as a minimal starting point: {{contents: read}}\n\n[Show more details](https://github.com/chef/chef/security/code-scanning/294)", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2178132207", - "pr_number": 15076, - "pr_file": ".github/workflows/verify.yml", - "created_at": "2025-07-01T17:09:07+00:00", - "commented_code": "name: Verify\n\non:\n push:\n branches: [main, feature/github-actions-migration] # Add your temp branch here\n pull_request:\n branches: [main]\n\nenv:\n CHEF_LICENSE_SERVER: http://hosted-license-service-lb-8000-606952349.us-west-2.elb.amazonaws.com:8000/\n CHEF_FOUNDATION_VERSION: ${{ vars.CHEF_FOUNDATION_VERSION || '3.2.12' }}\n CHEF_LICENSE: accept-no-persist\n\njobs:\n linux-matrix:\n name: ${{ matrix.test_type }} - ${{ matrix.os }}\n runs-on: ubuntu-22.04\n timeout-minutes: 60\n strategy:\n fail-fast: false\n matrix:\n os: [rocky-8, rocky-9, rhel-9, debian-9, ubuntu-2004]\n test_type: [Unit, Integration, Functional]\n container:\n image: chefes/omnibus-toolchain-${{ matrix.os }}:3.0.36\n options: --privileged --user root\n steps:\n - name: Check out code\n uses: actions/checkout@v4\n\n - name: Container prep\n run: .expeditor/scripts/bk_container_prep.sh\n\n - name: Run ${{ matrix.test_type }} tests\n run: .expeditor/scripts/prep_and_run_tests.sh ${{ matrix.test_type }}\n env:\n CHEF_LICENSE_SERVER: ${{ env.CHEF_LICENSE_SERVER }}\n\n windows-tests:\n name: ${{ matrix.test_type }} - Windows 2019\n runs-on: windows-2019\n timeout-minutes: 120\n strategy:\n fail-fast: false\n matrix:\n test_type: [Unit, Integration, Functional]\n steps:\n - name: Check out code\n uses: actions/checkout@v4\n\n - name: Set up Ruby\n uses: ruby/setup-ruby@v1\n with:\n ruby-version: '3.1'\n bundler-cache: true\n\n - name: Run ${{ matrix.test_type }} tests\n shell: pwsh\n run: .\\.expeditor\\scripts\\prep_and_run_tests.ps1 ${{ matrix.test_type }}\n env:\n CHEF_LICENSE_SERVER: ${{ env.CHEF_LICENSE_SERVER }}\n\n gem-tests:\n name: ${{ matrix.gem }} gem\n runs-on: ubuntu-22.04\n timeout-minutes: 60\n strategy:\n fail-fast: false\n matrix:\n gem: [chef-zero, cheffish, chefspec, knife-windows, berkshelf]\n include:\n - gem: chef-zero\n test_cmd: bundle exec tasks/bin/run_external_test chef/chef-zero main rake pedant\n env_vars: |\n PEDANT_OPTS=--skip-oc_id\n CHEF_FS=true\n - gem: cheffish\n test_cmd: bundle exec tasks/bin/run_external_test chef/cheffish main rake spec\n - gem: chefspec\n test_cmd: bundle exec tasks/bin/run_external_test chef/chefspec main rake\n - gem: knife-windows\n test_cmd: bundle exec tasks/bin/run_external_test chef/knife-windows main rake spec\n - gem: berkshelf\n test_cmd: bundle exec tasks/bin/run_external_test chef/berkshelf main rake\n pre_install: apt-get update -y && apt-get install -y graphviz\n container:\n image: chefes/omnibus-toolchain-ubuntu-1804:3.0.36\n options: --user root\n steps:\n - name: Check out code\n uses: actions/checkout@v4\n\n - name: Container prep\n run: .expeditor/scripts/bk_container_prep.sh\n\n - name: Set up environment\n run: |\n export PATH=\"/root/.rbenv/shims:/opt/chef/bin:/home/ec2-user/.tfenv/bin:/home/ec2-user/.rbenv/shims:/home/ec2-user/.rbenv/bin:/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/usr/bin\"\n echo \"PATH=$PATH\" >> $GITHUB_ENV\n\n - name: Pre-install dependencies\n if: matrix.pre_install\n run: ${{ matrix.pre_install }}\n\n - name: Bundle install\n run: |\n bundle config set --local without omnibus_package\n bundle config set --local path 'vendor/bundle'\n bundle install --jobs=3 --retry=3\n\n - name: Run ${{ matrix.gem }} tests\n run: ${{ matrix.test_cmd }}\n env:\n CHEF_FOUNDATION_VERSION: ${{ env.CHEF_FOUNDATION_VERSION }}\n PEDANT_OPTS: ${{ matrix.gem == 'chef-zero' && '--skip-oc_id' || '' }}\n CHEF_FS: ${{ matrix.gem == 'chef-zero' && 'true' || '' }}\n\n habitat-linux-plan:\n name: Habitat Linux Plan\n runs-on: ubuntu-latest\n timeout-minutes: 60\n steps:\n - name: Check out code\n uses: actions/checkout@v4\n\n - name: Install Habitat\n run: sudo ./.expeditor/scripts/install-hab.sh 'x86_64-linux'\n\n - name: Verify plan\n run: sudo ./.expeditor/scripts/verify-plan.sh\n\n habitat-windows-plan:", - "repo_full_name": "chef/chef", - "discussion_comments": [ - { - "comment_id": "2178132207", - "repo_full_name": "chef/chef", - "pr_number": 15076, - "pr_file": ".github/workflows/verify.yml", - "discussion_id": "2178132207", - "commented_code": "@@ -0,0 +1,167 @@\n+name: Verify\n+\n+on:\n+ push:\n+ branches: [main, feature/github-actions-migration] # Add your temp branch here\n+ pull_request:\n+ branches: [main]\n+\n+env:\n+ CHEF_LICENSE_SERVER: http://hosted-license-service-lb-8000-606952349.us-west-2.elb.amazonaws.com:8000/\n+ CHEF_FOUNDATION_VERSION: ${{ vars.CHEF_FOUNDATION_VERSION || '3.2.12' }}\n+ CHEF_LICENSE: accept-no-persist\n+\n+jobs:\n+ linux-matrix:\n+ name: ${{ matrix.test_type }} - ${{ matrix.os }}\n+ runs-on: ubuntu-22.04\n+ timeout-minutes: 60\n+ strategy:\n+ fail-fast: false\n+ matrix:\n+ os: [rocky-8, rocky-9, rhel-9, debian-9, ubuntu-2004]\n+ test_type: [Unit, Integration, Functional]\n+ container:\n+ image: chefes/omnibus-toolchain-${{ matrix.os }}:3.0.36\n+ options: --privileged --user root\n+ steps:\n+ - name: Check out code\n+ uses: actions/checkout@v4\n+\n+ - name: Container prep\n+ run: .expeditor/scripts/bk_container_prep.sh\n+\n+ - name: Run ${{ matrix.test_type }} tests\n+ run: .expeditor/scripts/prep_and_run_tests.sh ${{ matrix.test_type }}\n+ env:\n+ CHEF_LICENSE_SERVER: ${{ env.CHEF_LICENSE_SERVER }}\n+\n+ windows-tests:\n+ name: ${{ matrix.test_type }} - Windows 2019\n+ runs-on: windows-2019\n+ timeout-minutes: 120\n+ strategy:\n+ fail-fast: false\n+ matrix:\n+ test_type: [Unit, Integration, Functional]\n+ steps:\n+ - name: Check out code\n+ uses: actions/checkout@v4\n+\n+ - name: Set up Ruby\n+ uses: ruby/setup-ruby@v1\n+ with:\n+ ruby-version: '3.1'\n+ bundler-cache: true\n+\n+ - name: Run ${{ matrix.test_type }} tests\n+ shell: pwsh\n+ run: .\\.expeditor\\scripts\\prep_and_run_tests.ps1 ${{ matrix.test_type }}\n+ env:\n+ CHEF_LICENSE_SERVER: ${{ env.CHEF_LICENSE_SERVER }}\n+\n+ gem-tests:\n+ name: ${{ matrix.gem }} gem\n+ runs-on: ubuntu-22.04\n+ timeout-minutes: 60\n+ strategy:\n+ fail-fast: false\n+ matrix:\n+ gem: [chef-zero, cheffish, chefspec, knife-windows, berkshelf]\n+ include:\n+ - gem: chef-zero\n+ test_cmd: bundle exec tasks/bin/run_external_test chef/chef-zero main rake pedant\n+ env_vars: |\n+ PEDANT_OPTS=--skip-oc_id\n+ CHEF_FS=true\n+ - gem: cheffish\n+ test_cmd: bundle exec tasks/bin/run_external_test chef/cheffish main rake spec\n+ - gem: chefspec\n+ test_cmd: bundle exec tasks/bin/run_external_test chef/chefspec main rake\n+ - gem: knife-windows\n+ test_cmd: bundle exec tasks/bin/run_external_test chef/knife-windows main rake spec\n+ - gem: berkshelf\n+ test_cmd: bundle exec tasks/bin/run_external_test chef/berkshelf main rake\n+ pre_install: apt-get update -y && apt-get install -y graphviz\n+ container:\n+ image: chefes/omnibus-toolchain-ubuntu-1804:3.0.36\n+ options: --user root\n+ steps:\n+ - name: Check out code\n+ uses: actions/checkout@v4\n+\n+ - name: Container prep\n+ run: .expeditor/scripts/bk_container_prep.sh\n+\n+ - name: Set up environment\n+ run: |\n+ export PATH=\"/root/.rbenv/shims:/opt/chef/bin:/home/ec2-user/.tfenv/bin:/home/ec2-user/.rbenv/shims:/home/ec2-user/.rbenv/bin:/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/usr/bin\"\n+ echo \"PATH=$PATH\" >> $GITHUB_ENV\n+\n+ - name: Pre-install dependencies\n+ if: matrix.pre_install\n+ run: ${{ matrix.pre_install }}\n+\n+ - name: Bundle install\n+ run: |\n+ bundle config set --local without omnibus_package\n+ bundle config set --local path 'vendor/bundle'\n+ bundle install --jobs=3 --retry=3\n+\n+ - name: Run ${{ matrix.gem }} tests\n+ run: ${{ matrix.test_cmd }}\n+ env:\n+ CHEF_FOUNDATION_VERSION: ${{ env.CHEF_FOUNDATION_VERSION }}\n+ PEDANT_OPTS: ${{ matrix.gem == 'chef-zero' && '--skip-oc_id' || '' }}\n+ CHEF_FS: ${{ matrix.gem == 'chef-zero' && 'true' || '' }}\n+\n+ habitat-linux-plan:\n+ name: Habitat Linux Plan\n+ runs-on: ubuntu-latest\n+ timeout-minutes: 60\n+ steps:\n+ - name: Check out code\n+ uses: actions/checkout@v4\n+\n+ - name: Install Habitat\n+ run: sudo ./.expeditor/scripts/install-hab.sh 'x86_64-linux'\n+\n+ - name: Verify plan\n+ run: sudo ./.expeditor/scripts/verify-plan.sh\n+\n+ habitat-windows-plan:", - "comment_created_at": "2025-07-01T17:09:07+00:00", - "comment_author": "github-advanced-security[bot]", - "comment_body": "## Workflow does not contain permissions\n\nActions job or workflow does not limit the permissions of the GITHUB_TOKEN. Consider setting an explicit permissions block, using the following as a minimal starting point: {{contents: read}}\n\n[Show more details](https://github.com/chef/chef/security/code-scanning/295)", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2178132209", - "pr_number": 15076, - "pr_file": ".github/workflows/verify.yml", - "created_at": "2025-07-01T17:09:07+00:00", - "commented_code": "name: Verify\n\non:\n push:\n branches: [main, feature/github-actions-migration] # Add your temp branch here\n pull_request:\n branches: [main]\n\nenv:\n CHEF_LICENSE_SERVER: http://hosted-license-service-lb-8000-606952349.us-west-2.elb.amazonaws.com:8000/\n CHEF_FOUNDATION_VERSION: ${{ vars.CHEF_FOUNDATION_VERSION || '3.2.12' }}\n CHEF_LICENSE: accept-no-persist\n\njobs:\n linux-matrix:\n name: ${{ matrix.test_type }} - ${{ matrix.os }}\n runs-on: ubuntu-22.04\n timeout-minutes: 60\n strategy:\n fail-fast: false\n matrix:\n os: [rocky-8, rocky-9, rhel-9, debian-9, ubuntu-2004]\n test_type: [Unit, Integration, Functional]\n container:\n image: chefes/omnibus-toolchain-${{ matrix.os }}:3.0.36\n options: --privileged --user root\n steps:\n - name: Check out code\n uses: actions/checkout@v4\n\n - name: Container prep\n run: .expeditor/scripts/bk_container_prep.sh\n\n - name: Run ${{ matrix.test_type }} tests\n run: .expeditor/scripts/prep_and_run_tests.sh ${{ matrix.test_type }}\n env:\n CHEF_LICENSE_SERVER: ${{ env.CHEF_LICENSE_SERVER }}\n\n windows-tests:\n name: ${{ matrix.test_type }} - Windows 2019\n runs-on: windows-2019\n timeout-minutes: 120\n strategy:\n fail-fast: false\n matrix:\n test_type: [Unit, Integration, Functional]\n steps:\n - name: Check out code\n uses: actions/checkout@v4\n\n - name: Set up Ruby\n uses: ruby/setup-ruby@v1\n with:\n ruby-version: '3.1'\n bundler-cache: true\n\n - name: Run ${{ matrix.test_type }} tests\n shell: pwsh\n run: .\\.expeditor\\scripts\\prep_and_run_tests.ps1 ${{ matrix.test_type }}\n env:\n CHEF_LICENSE_SERVER: ${{ env.CHEF_LICENSE_SERVER }}\n\n gem-tests:\n name: ${{ matrix.gem }} gem\n runs-on: ubuntu-22.04\n timeout-minutes: 60\n strategy:\n fail-fast: false\n matrix:\n gem: [chef-zero, cheffish, chefspec, knife-windows, berkshelf]\n include:\n - gem: chef-zero\n test_cmd: bundle exec tasks/bin/run_external_test chef/chef-zero main rake pedant\n env_vars: |\n PEDANT_OPTS=--skip-oc_id\n CHEF_FS=true\n - gem: cheffish\n test_cmd: bundle exec tasks/bin/run_external_test chef/cheffish main rake spec\n - gem: chefspec\n test_cmd: bundle exec tasks/bin/run_external_test chef/chefspec main rake\n - gem: knife-windows\n test_cmd: bundle exec tasks/bin/run_external_test chef/knife-windows main rake spec\n - gem: berkshelf\n test_cmd: bundle exec tasks/bin/run_external_test chef/berkshelf main rake\n pre_install: apt-get update -y && apt-get install -y graphviz\n container:\n image: chefes/omnibus-toolchain-ubuntu-1804:3.0.36\n options: --user root\n steps:\n - name: Check out code\n uses: actions/checkout@v4\n\n - name: Container prep\n run: .expeditor/scripts/bk_container_prep.sh\n\n - name: Set up environment\n run: |\n export PATH=\"/root/.rbenv/shims:/opt/chef/bin:/home/ec2-user/.tfenv/bin:/home/ec2-user/.rbenv/shims:/home/ec2-user/.rbenv/bin:/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/usr/bin\"\n echo \"PATH=$PATH\" >> $GITHUB_ENV\n\n - name: Pre-install dependencies\n if: matrix.pre_install\n run: ${{ matrix.pre_install }}\n\n - name: Bundle install\n run: |\n bundle config set --local without omnibus_package\n bundle config set --local path 'vendor/bundle'\n bundle install --jobs=3 --retry=3\n\n - name: Run ${{ matrix.gem }} tests\n run: ${{ matrix.test_cmd }}\n env:\n CHEF_FOUNDATION_VERSION: ${{ env.CHEF_FOUNDATION_VERSION }}\n PEDANT_OPTS: ${{ matrix.gem == 'chef-zero' && '--skip-oc_id' || '' }}\n CHEF_FS: ${{ matrix.gem == 'chef-zero' && 'true' || '' }}\n\n habitat-linux-plan:\n name: Habitat Linux Plan\n runs-on: ubuntu-latest\n timeout-minutes: 60\n steps:\n - name: Check out code\n uses: actions/checkout@v4\n\n - name: Install Habitat\n run: sudo ./.expeditor/scripts/install-hab.sh 'x86_64-linux'\n\n - name: Verify plan\n run: sudo ./.expeditor/scripts/verify-plan.sh\n\n habitat-windows-plan:\n name: Habitat Windows Plan\n runs-on: windows-2019\n timeout-minutes: 60\n steps:\n - name: Check out code\n uses: actions/checkout@v4\n\n - name: Verify Windows plan\n shell: pwsh\n run: ./.expeditor/scripts/verify-plan.ps1\n\n verify-summary:", - "repo_full_name": "chef/chef", - "discussion_comments": [ - { - "comment_id": "2178132209", - "repo_full_name": "chef/chef", - "pr_number": 15076, - "pr_file": ".github/workflows/verify.yml", - "discussion_id": "2178132209", - "commented_code": "@@ -0,0 +1,167 @@\n+name: Verify\n+\n+on:\n+ push:\n+ branches: [main, feature/github-actions-migration] # Add your temp branch here\n+ pull_request:\n+ branches: [main]\n+\n+env:\n+ CHEF_LICENSE_SERVER: http://hosted-license-service-lb-8000-606952349.us-west-2.elb.amazonaws.com:8000/\n+ CHEF_FOUNDATION_VERSION: ${{ vars.CHEF_FOUNDATION_VERSION || '3.2.12' }}\n+ CHEF_LICENSE: accept-no-persist\n+\n+jobs:\n+ linux-matrix:\n+ name: ${{ matrix.test_type }} - ${{ matrix.os }}\n+ runs-on: ubuntu-22.04\n+ timeout-minutes: 60\n+ strategy:\n+ fail-fast: false\n+ matrix:\n+ os: [rocky-8, rocky-9, rhel-9, debian-9, ubuntu-2004]\n+ test_type: [Unit, Integration, Functional]\n+ container:\n+ image: chefes/omnibus-toolchain-${{ matrix.os }}:3.0.36\n+ options: --privileged --user root\n+ steps:\n+ - name: Check out code\n+ uses: actions/checkout@v4\n+\n+ - name: Container prep\n+ run: .expeditor/scripts/bk_container_prep.sh\n+\n+ - name: Run ${{ matrix.test_type }} tests\n+ run: .expeditor/scripts/prep_and_run_tests.sh ${{ matrix.test_type }}\n+ env:\n+ CHEF_LICENSE_SERVER: ${{ env.CHEF_LICENSE_SERVER }}\n+\n+ windows-tests:\n+ name: ${{ matrix.test_type }} - Windows 2019\n+ runs-on: windows-2019\n+ timeout-minutes: 120\n+ strategy:\n+ fail-fast: false\n+ matrix:\n+ test_type: [Unit, Integration, Functional]\n+ steps:\n+ - name: Check out code\n+ uses: actions/checkout@v4\n+\n+ - name: Set up Ruby\n+ uses: ruby/setup-ruby@v1\n+ with:\n+ ruby-version: '3.1'\n+ bundler-cache: true\n+\n+ - name: Run ${{ matrix.test_type }} tests\n+ shell: pwsh\n+ run: .\\.expeditor\\scripts\\prep_and_run_tests.ps1 ${{ matrix.test_type }}\n+ env:\n+ CHEF_LICENSE_SERVER: ${{ env.CHEF_LICENSE_SERVER }}\n+\n+ gem-tests:\n+ name: ${{ matrix.gem }} gem\n+ runs-on: ubuntu-22.04\n+ timeout-minutes: 60\n+ strategy:\n+ fail-fast: false\n+ matrix:\n+ gem: [chef-zero, cheffish, chefspec, knife-windows, berkshelf]\n+ include:\n+ - gem: chef-zero\n+ test_cmd: bundle exec tasks/bin/run_external_test chef/chef-zero main rake pedant\n+ env_vars: |\n+ PEDANT_OPTS=--skip-oc_id\n+ CHEF_FS=true\n+ - gem: cheffish\n+ test_cmd: bundle exec tasks/bin/run_external_test chef/cheffish main rake spec\n+ - gem: chefspec\n+ test_cmd: bundle exec tasks/bin/run_external_test chef/chefspec main rake\n+ - gem: knife-windows\n+ test_cmd: bundle exec tasks/bin/run_external_test chef/knife-windows main rake spec\n+ - gem: berkshelf\n+ test_cmd: bundle exec tasks/bin/run_external_test chef/berkshelf main rake\n+ pre_install: apt-get update -y && apt-get install -y graphviz\n+ container:\n+ image: chefes/omnibus-toolchain-ubuntu-1804:3.0.36\n+ options: --user root\n+ steps:\n+ - name: Check out code\n+ uses: actions/checkout@v4\n+\n+ - name: Container prep\n+ run: .expeditor/scripts/bk_container_prep.sh\n+\n+ - name: Set up environment\n+ run: |\n+ export PATH=\"/root/.rbenv/shims:/opt/chef/bin:/home/ec2-user/.tfenv/bin:/home/ec2-user/.rbenv/shims:/home/ec2-user/.rbenv/bin:/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/usr/bin\"\n+ echo \"PATH=$PATH\" >> $GITHUB_ENV\n+\n+ - name: Pre-install dependencies\n+ if: matrix.pre_install\n+ run: ${{ matrix.pre_install }}\n+\n+ - name: Bundle install\n+ run: |\n+ bundle config set --local without omnibus_package\n+ bundle config set --local path 'vendor/bundle'\n+ bundle install --jobs=3 --retry=3\n+\n+ - name: Run ${{ matrix.gem }} tests\n+ run: ${{ matrix.test_cmd }}\n+ env:\n+ CHEF_FOUNDATION_VERSION: ${{ env.CHEF_FOUNDATION_VERSION }}\n+ PEDANT_OPTS: ${{ matrix.gem == 'chef-zero' && '--skip-oc_id' || '' }}\n+ CHEF_FS: ${{ matrix.gem == 'chef-zero' && 'true' || '' }}\n+\n+ habitat-linux-plan:\n+ name: Habitat Linux Plan\n+ runs-on: ubuntu-latest\n+ timeout-minutes: 60\n+ steps:\n+ - name: Check out code\n+ uses: actions/checkout@v4\n+\n+ - name: Install Habitat\n+ run: sudo ./.expeditor/scripts/install-hab.sh 'x86_64-linux'\n+\n+ - name: Verify plan\n+ run: sudo ./.expeditor/scripts/verify-plan.sh\n+\n+ habitat-windows-plan:\n+ name: Habitat Windows Plan\n+ runs-on: windows-2019\n+ timeout-minutes: 60\n+ steps:\n+ - name: Check out code\n+ uses: actions/checkout@v4\n+\n+ - name: Verify Windows plan\n+ shell: pwsh\n+ run: ./.expeditor/scripts/verify-plan.ps1\n+\n+ verify-summary:", - "comment_created_at": "2025-07-01T17:09:07+00:00", - "comment_author": "github-advanced-security[bot]", - "comment_body": "## Workflow does not contain permissions\n\nActions job or workflow does not limit the permissions of the GITHUB_TOKEN. Consider setting an explicit permissions block, using the following as a minimal starting point: {{contents: read}}\n\n[Show more details](https://github.com/chef/chef/security/code-scanning/296)", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2178166822", - "pr_number": 15076, - "pr_file": ".github/workflows/verify.yml", - "created_at": "2025-07-01T17:24:21+00:00", - "commented_code": "name: Verify\n\non:\n push:\n branches: [main, feature/github-actions-migration] # Add your temp branch here\n pull_request:\n branches: [main]\n\nenv:\n CHEF_LICENSE_SERVER: http://hosted-license-service-lb-8000-606952349.us-west-2.elb.amazonaws.com:8000/\n CHEF_FOUNDATION_VERSION: ${{ vars.CHEF_FOUNDATION_VERSION || '3.2.12' }}\n CHEF_LICENSE: accept-no-persist\n\njobs:\n linux-matrix:\n name: ${{ matrix.test_type }} - ${{ matrix.os }}\n runs-on: ubuntu-24.04\n timeout-minutes: 60\n strategy:\n fail-fast: false\n matrix:\n os: [rocky-8, rocky-9, rhel-9, debian-9, ubuntu-2004]\n test_type: [Unit, Integration, Functional]\n container:\n image: chefes/omnibus-toolchain-${{ matrix.os }}:3.0.36\n options: --privileged --user root\n steps:\n - name: Check out code\n uses: actions/checkout@v4\n\n - name: Container prep\n run: .expeditor/scripts/bk_container_prep.sh\n\n - name: Run ${{ matrix.test_type }} tests\n run: .expeditor/scripts/prep_and_run_tests.sh ${{ matrix.test_type }}\n env:\n CHEF_LICENSE_SERVER: ${{ env.CHEF_LICENSE_SERVER }}\n\n windows-tests:", - "repo_full_name": "chef/chef", - "discussion_comments": [ - { - "comment_id": "2178166822", - "repo_full_name": "chef/chef", - "pr_number": 15076, - "pr_file": ".github/workflows/verify.yml", - "discussion_id": "2178166822", - "commented_code": "@@ -0,0 +1,167 @@\n+name: Verify\n+\n+on:\n+ push:\n+ branches: [main, feature/github-actions-migration] # Add your temp branch here\n+ pull_request:\n+ branches: [main]\n+\n+env:\n+ CHEF_LICENSE_SERVER: http://hosted-license-service-lb-8000-606952349.us-west-2.elb.amazonaws.com:8000/\n+ CHEF_FOUNDATION_VERSION: ${{ vars.CHEF_FOUNDATION_VERSION || '3.2.12' }}\n+ CHEF_LICENSE: accept-no-persist\n+\n+jobs:\n+ linux-matrix:\n+ name: ${{ matrix.test_type }} - ${{ matrix.os }}\n+ runs-on: ubuntu-24.04\n+ timeout-minutes: 60\n+ strategy:\n+ fail-fast: false\n+ matrix:\n+ os: [rocky-8, rocky-9, rhel-9, debian-9, ubuntu-2004]\n+ test_type: [Unit, Integration, Functional]\n+ container:\n+ image: chefes/omnibus-toolchain-${{ matrix.os }}:3.0.36\n+ options: --privileged --user root\n+ steps:\n+ - name: Check out code\n+ uses: actions/checkout@v4\n+\n+ - name: Container prep\n+ run: .expeditor/scripts/bk_container_prep.sh\n+\n+ - name: Run ${{ matrix.test_type }} tests\n+ run: .expeditor/scripts/prep_and_run_tests.sh ${{ matrix.test_type }}\n+ env:\n+ CHEF_LICENSE_SERVER: ${{ env.CHEF_LICENSE_SERVER }}\n+\n+ windows-tests:", - "comment_created_at": "2025-07-01T17:24:21+00:00", - "comment_author": "github-advanced-security[bot]", - "comment_body": "## Workflow does not contain permissions\n\nActions job or workflow does not limit the permissions of the GITHUB_TOKEN. Consider setting an explicit permissions block, using the following as a minimal starting point: {{contents: read}}\n\n[Show more details](https://github.com/chef/chef/security/code-scanning/298)", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2178450773", - "pr_number": 15076, - "pr_file": ".github/workflows/verify.yml", - "created_at": "2025-07-01T20:20:39+00:00", - "commented_code": "name: Verify\n\non:\n push:\n branches: [main, feature/github-actions-migration] # Add your temp branch here\n pull_request:\n branches: [main]\n\nenv:\n CHEF_LICENSE_SERVER: http://hosted-license-service-lb-8000-606952349.us-west-2.elb.amazonaws.com:8000/\n CHEF_FOUNDATION_VERSION: ${{ vars.CHEF_FOUNDATION_VERSION || '3.2.12' }}\n CHEF_LICENSE: accept-no-persist\n\njobs:\n linux-matrix:\n name: ${{ matrix.test_type }} - ${{ matrix.os }}\n runs-on: ubuntu-22.04\n timeout-minutes: 60\n strategy:\n fail-fast: false\n matrix:\n os: [rocky-8, rocky-9, rhel-9, debian-9, ubuntu-2004]\n test_type: [Unit, Integration, Functional]\n container:\n image: chefes/omnibus-toolchain-${{ matrix.os }}:3.0.36\n options: --privileged --user root\n steps:\n - name: Check out code\n uses: actions/checkout@v4\n # Skip checkout actions for debian-9 due to GLIBC incompatibility\n if: matrix.os != 'debian-9'\n\n - name: Manual checkout for Debian 9\n if: matrix.os == 'debian-9'\n run: |\n # Clone the repository with full history to ensure all refs are available\n git clone --no-checkout https://github.com/${{ github.repository }} .\n\n # Fetch the specific ref if it's not available\n git fetch origin ${{ github.ref }}:${{ github.ref }} || true\n git fetch origin ${{ github.sha }} || true\n\n git checkout ${{ github.sha }}\n\n - name: Container prep\n run: .expeditor/scripts/bk_container_prep.sh\n\n - name: Run ${{ matrix.test_type }} tests\n run: .expeditor/scripts/prep_and_run_tests.sh ${{ matrix.test_type }}\n env:\n CHEF_LICENSE_SERVER: ${{ env.CHEF_LICENSE_SERVER }}\n\n windows-tests:\n name: ${{ matrix.test_type }} - Windows 2022\n runs-on: windows-2022\n timeout-minutes: 120\n strategy:\n fail-fast: false\n matrix:\n test_type: [Unit, Integration, Functional]\n steps:\n - name: Check out code\n uses: actions/checkout@v4\n\n - name: Set up Ruby\n uses: ruby/setup-ruby@v1\n with:\n ruby-version: '3.1'\n bundler-cache: true\n\n - name: Run ${{ matrix.test_type }} tests\n shell: pwsh\n run: .\\.expeditor\\scripts\\prep_and_run_tests.ps1 ${{ matrix.test_type }}\n env:\n CHEF_LICENSE_SERVER: ${{ env.CHEF_LICENSE_SERVER }}\n\n gem-tests:", - "repo_full_name": "chef/chef", - "discussion_comments": [ - { - "comment_id": "2178450773", - "repo_full_name": "chef/chef", - "pr_number": 15076, - "pr_file": ".github/workflows/verify.yml", - "discussion_id": "2178450773", - "commented_code": "@@ -0,0 +1,181 @@\n+name: Verify\n+\n+on:\n+ push:\n+ branches: [main, feature/github-actions-migration] # Add your temp branch here\n+ pull_request:\n+ branches: [main]\n+\n+env:\n+ CHEF_LICENSE_SERVER: http://hosted-license-service-lb-8000-606952349.us-west-2.elb.amazonaws.com:8000/\n+ CHEF_FOUNDATION_VERSION: ${{ vars.CHEF_FOUNDATION_VERSION || '3.2.12' }}\n+ CHEF_LICENSE: accept-no-persist\n+\n+jobs:\n+ linux-matrix:\n+ name: ${{ matrix.test_type }} - ${{ matrix.os }}\n+ runs-on: ubuntu-22.04\n+ timeout-minutes: 60\n+ strategy:\n+ fail-fast: false\n+ matrix:\n+ os: [rocky-8, rocky-9, rhel-9, debian-9, ubuntu-2004]\n+ test_type: [Unit, Integration, Functional]\n+ container:\n+ image: chefes/omnibus-toolchain-${{ matrix.os }}:3.0.36\n+ options: --privileged --user root\n+ steps:\n+ - name: Check out code\n+ uses: actions/checkout@v4\n+ # Skip checkout actions for debian-9 due to GLIBC incompatibility\n+ if: matrix.os != 'debian-9'\n+\n+ - name: Manual checkout for Debian 9\n+ if: matrix.os == 'debian-9'\n+ run: |\n+ # Clone the repository with full history to ensure all refs are available\n+ git clone --no-checkout https://github.com/${{ github.repository }} .\n+\n+ # Fetch the specific ref if it's not available\n+ git fetch origin ${{ github.ref }}:${{ github.ref }} || true\n+ git fetch origin ${{ github.sha }} || true\n+\n+ git checkout ${{ github.sha }}\n+\n+ - name: Container prep\n+ run: .expeditor/scripts/bk_container_prep.sh\n+\n+ - name: Run ${{ matrix.test_type }} tests\n+ run: .expeditor/scripts/prep_and_run_tests.sh ${{ matrix.test_type }}\n+ env:\n+ CHEF_LICENSE_SERVER: ${{ env.CHEF_LICENSE_SERVER }}\n+\n+ windows-tests:\n+ name: ${{ matrix.test_type }} - Windows 2022\n+ runs-on: windows-2022\n+ timeout-minutes: 120\n+ strategy:\n+ fail-fast: false\n+ matrix:\n+ test_type: [Unit, Integration, Functional]\n+ steps:\n+ - name: Check out code\n+ uses: actions/checkout@v4\n+\n+ - name: Set up Ruby\n+ uses: ruby/setup-ruby@v1\n+ with:\n+ ruby-version: '3.1'\n+ bundler-cache: true\n+\n+ - name: Run ${{ matrix.test_type }} tests\n+ shell: pwsh\n+ run: .\\.expeditor\\scripts\\prep_and_run_tests.ps1 ${{ matrix.test_type }}\n+ env:\n+ CHEF_LICENSE_SERVER: ${{ env.CHEF_LICENSE_SERVER }}\n+\n+ gem-tests:", - "comment_created_at": "2025-07-01T20:20:39+00:00", - "comment_author": "github-advanced-security[bot]", - "comment_body": "## Workflow does not contain permissions\n\nActions job or workflow does not limit the permissions of the GITHUB_TOKEN. Consider setting an explicit permissions block, using the following as a minimal starting point: {{contents: read}}\n\n[Show more details](https://github.com/chef/chef/security/code-scanning/299)", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2178450775", - "pr_number": 15076, - "pr_file": ".github/workflows/verify.yml", - "created_at": "2025-07-01T20:20:39+00:00", - "commented_code": "name: Verify\n\non:\n push:\n branches: [main, feature/github-actions-migration] # Add your temp branch here\n pull_request:\n branches: [main]\n\nenv:\n CHEF_LICENSE_SERVER: http://hosted-license-service-lb-8000-606952349.us-west-2.elb.amazonaws.com:8000/\n CHEF_FOUNDATION_VERSION: ${{ vars.CHEF_FOUNDATION_VERSION || '3.2.12' }}\n CHEF_LICENSE: accept-no-persist\n\njobs:\n linux-matrix:\n name: ${{ matrix.test_type }} - ${{ matrix.os }}\n runs-on: ubuntu-22.04\n timeout-minutes: 60\n strategy:\n fail-fast: false\n matrix:\n os: [rocky-8, rocky-9, rhel-9, debian-9, ubuntu-2004]\n test_type: [Unit, Integration, Functional]\n container:\n image: chefes/omnibus-toolchain-${{ matrix.os }}:3.0.36\n options: --privileged --user root\n steps:\n - name: Check out code\n uses: actions/checkout@v4\n # Skip checkout actions for debian-9 due to GLIBC incompatibility\n if: matrix.os != 'debian-9'\n\n - name: Manual checkout for Debian 9\n if: matrix.os == 'debian-9'\n run: |\n # Clone the repository with full history to ensure all refs are available\n git clone --no-checkout https://github.com/${{ github.repository }} .\n\n # Fetch the specific ref if it's not available\n git fetch origin ${{ github.ref }}:${{ github.ref }} || true\n git fetch origin ${{ github.sha }} || true\n\n git checkout ${{ github.sha }}\n\n - name: Container prep\n run: .expeditor/scripts/bk_container_prep.sh\n\n - name: Run ${{ matrix.test_type }} tests\n run: .expeditor/scripts/prep_and_run_tests.sh ${{ matrix.test_type }}\n env:\n CHEF_LICENSE_SERVER: ${{ env.CHEF_LICENSE_SERVER }}\n\n windows-tests:\n name: ${{ matrix.test_type }} - Windows 2022\n runs-on: windows-2022\n timeout-minutes: 120\n strategy:\n fail-fast: false\n matrix:\n test_type: [Unit, Integration, Functional]\n steps:\n - name: Check out code\n uses: actions/checkout@v4\n\n - name: Set up Ruby\n uses: ruby/setup-ruby@v1\n with:\n ruby-version: '3.1'\n bundler-cache: true\n\n - name: Run ${{ matrix.test_type }} tests\n shell: pwsh\n run: .\\.expeditor\\scripts\\prep_and_run_tests.ps1 ${{ matrix.test_type }}\n env:\n CHEF_LICENSE_SERVER: ${{ env.CHEF_LICENSE_SERVER }}\n\n gem-tests:\n name: ${{ matrix.gem }} gem\n runs-on: ubuntu-22.04\n timeout-minutes: 60\n strategy:\n fail-fast: false\n matrix:\n gem: [chef-zero, cheffish, chefspec, knife-windows, berkshelf]\n include:\n - gem: chef-zero\n test_cmd: bundle exec tasks/bin/run_external_test chef/chef-zero main rake pedant\n env_vars: |\n PEDANT_OPTS=--skip-oc_id\n CHEF_FS=true\n - gem: cheffish\n test_cmd: bundle exec tasks/bin/run_external_test chef/cheffish main rake spec\n - gem: chefspec\n test_cmd: bundle exec tasks/bin/run_external_test chef/chefspec main rake\n - gem: knife-windows\n test_cmd: bundle exec tasks/bin/run_external_test chef/knife-windows main rake spec\n - gem: berkshelf\n test_cmd: bundle exec tasks/bin/run_external_test chef/berkshelf main rake\n pre_install: apt-get update -y && apt-get install -y graphviz\n container:\n image: chefes/omnibus-toolchain-ubuntu-2204:3.0.36\n options: --user root\n steps:\n - name: Check out code\n uses: actions/checkout@v4\n\n - name: Container prep\n run: .expeditor/scripts/bk_container_prep.sh\n\n - name: Set up environment\n run: |\n export PATH=\"/root/.rbenv/shims:/opt/chef/bin:/home/ec2-user/.tfenv/bin:/home/ec2-user/.rbenv/shims:/home/ec2-user/.rbenv/bin:/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/usr/bin\"\n echo \"PATH=$PATH\" >> $GITHUB_ENV\n\n - name: Pre-install dependencies\n if: matrix.pre_install\n run: ${{ matrix.pre_install }}\n\n - name: Bundle install\n run: |\n bundle config set --local without omnibus_package\n bundle config set --local path 'vendor/bundle'\n bundle install --jobs=3 --retry=3\n\n - name: Run ${{ matrix.gem }} tests\n run: ${{ matrix.test_cmd }}\n env:\n CHEF_FOUNDATION_VERSION: ${{ env.CHEF_FOUNDATION_VERSION }}\n PEDANT_OPTS: ${{ matrix.gem == 'chef-zero' && '--skip-oc_id' || '' }}\n CHEF_FS: ${{ matrix.gem == 'chef-zero' && 'true' || '' }}\n\n habitat-linux-plan:\n name: Habitat Linux Plan\n runs-on: ubuntu-latest\n timeout-minutes: 60\n steps:\n - name: Check out code\n uses: actions/checkout@v4\n\n - name: Install Habitat\n run: sudo ./.expeditor/scripts/install-hab.sh 'x86_64-linux'\n\n - name: Verify plan\n run: sudo ./.expeditor/scripts/verify-plan.sh\n\n habitat-windows-plan:\n name: Habitat Windows Plan\n runs-on: windows-2022\n timeout-minutes: 60\n steps:\n - name: Check out code\n uses: actions/checkout@v4\n\n - name: Verify Windows plan\n shell: pwsh\n run: ./.expeditor/scripts/verify-plan.ps1\n\n verify-summary:", - "repo_full_name": "chef/chef", - "discussion_comments": [ - { - "comment_id": "2178450775", - "repo_full_name": "chef/chef", - "pr_number": 15076, - "pr_file": ".github/workflows/verify.yml", - "discussion_id": "2178450775", - "commented_code": "@@ -0,0 +1,181 @@\n+name: Verify\n+\n+on:\n+ push:\n+ branches: [main, feature/github-actions-migration] # Add your temp branch here\n+ pull_request:\n+ branches: [main]\n+\n+env:\n+ CHEF_LICENSE_SERVER: http://hosted-license-service-lb-8000-606952349.us-west-2.elb.amazonaws.com:8000/\n+ CHEF_FOUNDATION_VERSION: ${{ vars.CHEF_FOUNDATION_VERSION || '3.2.12' }}\n+ CHEF_LICENSE: accept-no-persist\n+\n+jobs:\n+ linux-matrix:\n+ name: ${{ matrix.test_type }} - ${{ matrix.os }}\n+ runs-on: ubuntu-22.04\n+ timeout-minutes: 60\n+ strategy:\n+ fail-fast: false\n+ matrix:\n+ os: [rocky-8, rocky-9, rhel-9, debian-9, ubuntu-2004]\n+ test_type: [Unit, Integration, Functional]\n+ container:\n+ image: chefes/omnibus-toolchain-${{ matrix.os }}:3.0.36\n+ options: --privileged --user root\n+ steps:\n+ - name: Check out code\n+ uses: actions/checkout@v4\n+ # Skip checkout actions for debian-9 due to GLIBC incompatibility\n+ if: matrix.os != 'debian-9'\n+\n+ - name: Manual checkout for Debian 9\n+ if: matrix.os == 'debian-9'\n+ run: |\n+ # Clone the repository with full history to ensure all refs are available\n+ git clone --no-checkout https://github.com/${{ github.repository }} .\n+\n+ # Fetch the specific ref if it's not available\n+ git fetch origin ${{ github.ref }}:${{ github.ref }} || true\n+ git fetch origin ${{ github.sha }} || true\n+\n+ git checkout ${{ github.sha }}\n+\n+ - name: Container prep\n+ run: .expeditor/scripts/bk_container_prep.sh\n+\n+ - name: Run ${{ matrix.test_type }} tests\n+ run: .expeditor/scripts/prep_and_run_tests.sh ${{ matrix.test_type }}\n+ env:\n+ CHEF_LICENSE_SERVER: ${{ env.CHEF_LICENSE_SERVER }}\n+\n+ windows-tests:\n+ name: ${{ matrix.test_type }} - Windows 2022\n+ runs-on: windows-2022\n+ timeout-minutes: 120\n+ strategy:\n+ fail-fast: false\n+ matrix:\n+ test_type: [Unit, Integration, Functional]\n+ steps:\n+ - name: Check out code\n+ uses: actions/checkout@v4\n+\n+ - name: Set up Ruby\n+ uses: ruby/setup-ruby@v1\n+ with:\n+ ruby-version: '3.1'\n+ bundler-cache: true\n+\n+ - name: Run ${{ matrix.test_type }} tests\n+ shell: pwsh\n+ run: .\\.expeditor\\scripts\\prep_and_run_tests.ps1 ${{ matrix.test_type }}\n+ env:\n+ CHEF_LICENSE_SERVER: ${{ env.CHEF_LICENSE_SERVER }}\n+\n+ gem-tests:\n+ name: ${{ matrix.gem }} gem\n+ runs-on: ubuntu-22.04\n+ timeout-minutes: 60\n+ strategy:\n+ fail-fast: false\n+ matrix:\n+ gem: [chef-zero, cheffish, chefspec, knife-windows, berkshelf]\n+ include:\n+ - gem: chef-zero\n+ test_cmd: bundle exec tasks/bin/run_external_test chef/chef-zero main rake pedant\n+ env_vars: |\n+ PEDANT_OPTS=--skip-oc_id\n+ CHEF_FS=true\n+ - gem: cheffish\n+ test_cmd: bundle exec tasks/bin/run_external_test chef/cheffish main rake spec\n+ - gem: chefspec\n+ test_cmd: bundle exec tasks/bin/run_external_test chef/chefspec main rake\n+ - gem: knife-windows\n+ test_cmd: bundle exec tasks/bin/run_external_test chef/knife-windows main rake spec\n+ - gem: berkshelf\n+ test_cmd: bundle exec tasks/bin/run_external_test chef/berkshelf main rake\n+ pre_install: apt-get update -y && apt-get install -y graphviz\n+ container:\n+ image: chefes/omnibus-toolchain-ubuntu-2204:3.0.36\n+ options: --user root\n+ steps:\n+ - name: Check out code\n+ uses: actions/checkout@v4\n+\n+ - name: Container prep\n+ run: .expeditor/scripts/bk_container_prep.sh\n+\n+ - name: Set up environment\n+ run: |\n+ export PATH=\"/root/.rbenv/shims:/opt/chef/bin:/home/ec2-user/.tfenv/bin:/home/ec2-user/.rbenv/shims:/home/ec2-user/.rbenv/bin:/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/usr/bin\"\n+ echo \"PATH=$PATH\" >> $GITHUB_ENV\n+\n+ - name: Pre-install dependencies\n+ if: matrix.pre_install\n+ run: ${{ matrix.pre_install }}\n+\n+ - name: Bundle install\n+ run: |\n+ bundle config set --local without omnibus_package\n+ bundle config set --local path 'vendor/bundle'\n+ bundle install --jobs=3 --retry=3\n+\n+ - name: Run ${{ matrix.gem }} tests\n+ run: ${{ matrix.test_cmd }}\n+ env:\n+ CHEF_FOUNDATION_VERSION: ${{ env.CHEF_FOUNDATION_VERSION }}\n+ PEDANT_OPTS: ${{ matrix.gem == 'chef-zero' && '--skip-oc_id' || '' }}\n+ CHEF_FS: ${{ matrix.gem == 'chef-zero' && 'true' || '' }}\n+\n+ habitat-linux-plan:\n+ name: Habitat Linux Plan\n+ runs-on: ubuntu-latest\n+ timeout-minutes: 60\n+ steps:\n+ - name: Check out code\n+ uses: actions/checkout@v4\n+\n+ - name: Install Habitat\n+ run: sudo ./.expeditor/scripts/install-hab.sh 'x86_64-linux'\n+\n+ - name: Verify plan\n+ run: sudo ./.expeditor/scripts/verify-plan.sh\n+\n+ habitat-windows-plan:\n+ name: Habitat Windows Plan\n+ runs-on: windows-2022\n+ timeout-minutes: 60\n+ steps:\n+ - name: Check out code\n+ uses: actions/checkout@v4\n+\n+ - name: Verify Windows plan\n+ shell: pwsh\n+ run: ./.expeditor/scripts/verify-plan.ps1\n+\n+ verify-summary:", - "comment_created_at": "2025-07-01T20:20:39+00:00", - "comment_author": "github-advanced-security[bot]", - "comment_body": "## Workflow does not contain permissions\n\nActions job or workflow does not limit the permissions of the GITHUB_TOKEN. Consider setting an explicit permissions block, using the following as a minimal starting point: {{contents: read}}\n\n[Show more details](https://github.com/chef/chef/security/code-scanning/300)", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2073245958", - "pr_number": 14794, - "pr_file": ".github/workflows/kitchen.yml", - "created_at": "2025-05-05T11:06:01+00:00", - "commented_code": "working-directory: kitchen-tests\n\njobs:\n # Test-kitchen-enterprise currently supports only linux platforms, \n # Hence we are testing on Windows using test-kitchen\n win_x86_64:\n name: win_x86_64(test-kitchen legacy)\n strategy:\n fail-fast: false\n matrix:\n~ os:\n~ - windows-2019\n~ - windows-2022\n~ runs-on: ${{ matrix.os }}\n~ env:\n~ CHEF_LICENSE: accept-no-persist\n~ KITCHEN_LOCAL_YAML: kitchen.exec.windows.yml\n~ steps:\n~ - uses: actions/checkout@v4\n~ - name: Install Chef\n~ uses: actionshub/chef-install@3.0.0\n - name: Kitchen Test\n run: kitchen test end-to-end-${{ matrix.os }}\n\n # Habitat does not support Mac yet. Hence we're testing on Mac systems using test-kitchen\n mac_x86_64:\n name: mac_x86_64(test-kitchen legacy)\n strategy:\n fail-fast: false\n matrix:\n~ os:\n~ - macos-13\n~ runs-on: ${{ matrix.os }}\n~ env:\n~ CHEF_LICENSE: accept-no-persist\n~ KITCHEN_LOCAL_YAML: kitchen.exec.macos.yml\n~ steps:\n~ - name: Check out code\n~ uses: actions/checkout@v4\n~ with:\n~ clean: true\n~ - name: Install Chef\n~ uses: actionshub/chef-install@3.0.0\n~ - name: Kitchen Test\n run: kitchen test end-to-end-${{ matrix.os }}\n\n mac_arm64:\n name: mac_arm64(test-kitchen legacy)\n strategy:\n fail-fast: false\n matrix:\n~ os:\n~ - macos-14 # currently macos-latest\n~ - macos-15\n~ runs-on: ${{ matrix.os }}\n~ env:\n~ CHEF_LICENSE: accept-no-persist\n~ KITCHEN_LOCAL_YAML: kitchen.exec.macos.yml\n~ steps:\n~ - name: Check out code\n~ uses: actions/checkout@v4\n~ with:\n~ clean: true\n~ - name: Install Chef\n~ uses: actionshub/chef-install@3.0.0\n - name: Kitchen Test\n run: kitchen test end-to-end-${{ matrix.os }}\n\n # This block does end-to-end recipe using test-kitchen-enterprise using Habitat package of chef19\n docr_lnx_x86_64:\n name: docr_lnx_x86_64(test-kitchen-enterprise)\n strategy:\n fail-fast: false\n matrix:\n~ os:\n~ - amazonlinux-2023\n~ - almalinux-8\n~ - almalinux-9\n~ - debian-11\n~ - debian-12\n~ - fedora-40\n~ # fedora-latest\n~ - opensuse-leap-15\n~ - oraclelinux-8\n~ - oraclelinux-9\n~ - rockylinux-8\n~ - rockylinux-9\n~ - ubuntu-2004\n~ - ubuntu-2204\n~ # - ubuntu-2404\n~ runs-on: ubuntu-latest\n~ env:\n~ FORCE_FFI_YAJL: ext\n~ CHEF_LICENSE: accept-no-persist\n~ KITCHEN_LOCAL_YAML: kitchen.dokken.yml\n steps:\n - name: Check out code\n uses: actions/checkout@v4\n - name: Install Chef\n uses: actionshub/chef-install@3.0.0\n - name: Install chef-test-kitchen-enterprise\n run: |\n curl https://raw.githubusercontent.com/habitat-sh/habitat/main/components/hab/install.sh | sudo bash -s -- -c stable\n sudo hab license accept\n sudo hab --version\n sudo hab pkg install --binlink --force chef/chef-test-kitchen-enterprise --channel unstable\n sudo kitchen --version\n sudo hab pkg install --binlink --force chef/chef-cli --channel unstable\n - name: Kitchen Test\n run: kitchen test end-to-end-${{ matrix.os }}\n\n # chef-test-kitchen-enterprise RC1 currently supports only kitchen-dokken driver hence in this block we \n # will continue testing with test-kitchen\n vm_lnx_x86_64:\n name: vm_lnx_x86_64(test-kitchen legacy)\n strategy:\n fail-fast: false\n matrix:\n~ os:\n~ - amazonlinux-2\n~ - amazonlinux-2023\n~ - almalinux-8\n~ - almalinux-9\n~ - debian-11\n~ - debian-12\n~ - fedora-40\n~ # - fedora-latest\n~ - opensuse-leap-15\n~ - oracle-7\n~ - oracle-8\n~ - oracle-9\n~ - rockylinux-8\n~ - rockylinux-9\n~ - ubuntu-2004\n~ - ubuntu-2204\n~ # - ubuntu-2404\n~ runs-on: ubuntu-22.04\n~ env:\n~ CHEF_LICENSE: accept-no-persist\n~ KITCHEN_LOCAL_YAML: kitchen.linux.ci.yml\n~ steps:\n~ - uses: actions/checkout@v4\n~ - name: Install Vagrant and VirtualBox\n~ run: |\n~ wget -O- https://apt.releases.hashicorp.com/gpg | gpg --dearmor | sudo tee /usr/share/keyrings/hashicorp-archive-keyring.gpg\n~ wget -O- https://www.virtualbox.org/download/oracle_vbox_2016.asc | gpg --dearmor | sudo tee /usr/share/keyrings/oracle-virtualbox-2016.gpg\n~ echo \"deb [signed-by=/usr/share/keyrings/hashicorp-archive-keyring.gpg] https://apt.releases.hashicorp.com $(lsb_release -cs) main\" | sudo tee /etc/apt/sources.list.d/hashicorp.list\n~ echo \"deb [arch=amd64 signed-by=/usr/share/keyrings/oracle-virtualbox-2016.gpg] https://download.virtualbox.org/virtualbox/debian $(lsb_release -cs) contrib\" | sudo tee /etc/apt/sources.list.d/virtualbox.list\n~ sudo apt-get update\n~ sudo apt-get install -y vagrant virtualbox-7.1\n~ sudo vagrant --version\n~ sudo VBoxManage --version\n~ - name: Install Chef\n~ uses: actionshub/chef-install@3.0.0\n~ - name: Kitchen Test\n~ run: |\n~ # To debug, uncomment these\n~ #export VAGRANT_LOG=debug\n~ #export VBOX_LOG=all.e.l.f\n #export VBOX_LOG_DEST=stdout\n #export K_DEBUG_OPTS=\"--log-level=debug\"\n export LOGNAME=$USER\n kitchen test end-to-end-${{ matrix.os }} $K_DEBUG_OPTS\n kitchen test end-to-end-${{ matrix.os }} $K_DEBUG_OPTS", - "repo_full_name": "chef/chef", - "discussion_comments": [ - { - "comment_id": "2073245958", - "repo_full_name": "chef/chef", - "pr_number": 14794, - "pr_file": ".github/workflows/kitchen.yml", - "discussion_id": "2073245958", - "commented_code": "@@ -16,144 +16,161 @@\n working-directory: kitchen-tests\n \n jobs:\n+ # Test-kitchen-enterprise currently supports only linux platforms, \n+ # Hence we are testing on Windows using test-kitchen\n win_x86_64:\n+ name: win_x86_64(test-kitchen legacy)\n strategy:\n fail-fast: false\n matrix:\n~ os:\n~ - windows-2019\n~ - windows-2022\n~ runs-on: ${{ matrix.os }}\n~ env:\n~ CHEF_LICENSE: accept-no-persist\n~ KITCHEN_LOCAL_YAML: kitchen.exec.windows.yml\n~ steps:\n~ - uses: actions/checkout@v4\n~ - name: Install Chef\n~ uses: actionshub/chef-install@3.0.0\n - name: Kitchen Test\n run: kitchen test end-to-end-${{ matrix.os }}\n \n+ # Habitat does not support Mac yet. Hence we're testing on Mac systems using test-kitchen\n mac_x86_64:\n+ name: mac_x86_64(test-kitchen legacy)\n strategy:\n fail-fast: false\n matrix:\n~ os:\n~ - macos-13\n~ runs-on: ${{ matrix.os }}\n~ env:\n~ CHEF_LICENSE: accept-no-persist\n~ KITCHEN_LOCAL_YAML: kitchen.exec.macos.yml\n~ steps:\n~ - name: Check out code\n~ uses: actions/checkout@v4\n~ with:\n~ clean: true\n~ - name: Install Chef\n~ uses: actionshub/chef-install@3.0.0\n~ - name: Kitchen Test\n run: kitchen test end-to-end-${{ matrix.os }}\n \n mac_arm64:\n+ name: mac_arm64(test-kitchen legacy)\n strategy:\n fail-fast: false\n matrix:\n~ os:\n~ - macos-14 # currently macos-latest\n~ - macos-15\n~ runs-on: ${{ matrix.os }}\n~ env:\n~ CHEF_LICENSE: accept-no-persist\n~ KITCHEN_LOCAL_YAML: kitchen.exec.macos.yml\n~ steps:\n~ - name: Check out code\n~ uses: actions/checkout@v4\n~ with:\n~ clean: true\n~ - name: Install Chef\n~ uses: actionshub/chef-install@3.0.0\n - name: Kitchen Test\n run: kitchen test end-to-end-${{ matrix.os }}\n \n+ # This block does end-to-end recipe using test-kitchen-enterprise using Habitat package of chef19\n docr_lnx_x86_64:\n+ name: docr_lnx_x86_64(test-kitchen-enterprise)\n strategy:\n fail-fast: false\n matrix:\n~ os:\n~ - amazonlinux-2023\n~ - almalinux-8\n~ - almalinux-9\n~ - debian-11\n~ - debian-12\n~ - fedora-40\n~ # fedora-latest\n~ - opensuse-leap-15\n~ - oraclelinux-8\n~ - oraclelinux-9\n~ - rockylinux-8\n~ - rockylinux-9\n~ - ubuntu-2004\n~ - ubuntu-2204\n~ # - ubuntu-2404\n~ runs-on: ubuntu-latest\n~ env:\n~ FORCE_FFI_YAJL: ext\n~ CHEF_LICENSE: accept-no-persist\n~ KITCHEN_LOCAL_YAML: kitchen.dokken.yml\n steps:\n - name: Check out code\n uses: actions/checkout@v4\n- - name: Install Chef\n- uses: actionshub/chef-install@3.0.0\n+ - name: Install chef-test-kitchen-enterprise\n+ run: |\n+ curl https://raw.githubusercontent.com/habitat-sh/habitat/main/components/hab/install.sh | sudo bash -s -- -c stable\n+ sudo hab license accept\n+ sudo hab --version\n+ sudo hab pkg install --binlink --force chef/chef-test-kitchen-enterprise --channel unstable\n+ sudo kitchen --version\n+ sudo hab pkg install --binlink --force chef/chef-cli --channel unstable\n - name: Kitchen Test\n run: kitchen test end-to-end-${{ matrix.os }}\n \n+ # chef-test-kitchen-enterprise RC1 currently supports only kitchen-dokken driver hence in this block we \n+ # will continue testing with test-kitchen\n vm_lnx_x86_64:\n+ name: vm_lnx_x86_64(test-kitchen legacy)\n strategy:\n fail-fast: false\n matrix:\n~ os:\n~ - amazonlinux-2\n~ - amazonlinux-2023\n~ - almalinux-8\n~ - almalinux-9\n~ - debian-11\n~ - debian-12\n~ - fedora-40\n~ # - fedora-latest\n~ - opensuse-leap-15\n~ - oracle-7\n~ - oracle-8\n~ - oracle-9\n~ - rockylinux-8\n~ - rockylinux-9\n~ - ubuntu-2004\n~ - ubuntu-2204\n~ # - ubuntu-2404\n~ runs-on: ubuntu-22.04\n~ env:\n~ CHEF_LICENSE: accept-no-persist\n~ KITCHEN_LOCAL_YAML: kitchen.linux.ci.yml\n~ steps:\n~ - uses: actions/checkout@v4\n~ - name: Install Vagrant and VirtualBox\n~ run: |\n~ wget -O- https://apt.releases.hashicorp.com/gpg | gpg --dearmor | sudo tee /usr/share/keyrings/hashicorp-archive-keyring.gpg\n~ wget -O- https://www.virtualbox.org/download/oracle_vbox_2016.asc | gpg --dearmor | sudo tee /usr/share/keyrings/oracle-virtualbox-2016.gpg\n~ echo \"deb [signed-by=/usr/share/keyrings/hashicorp-archive-keyring.gpg] https://apt.releases.hashicorp.com $(lsb_release -cs) main\" | sudo tee /etc/apt/sources.list.d/hashicorp.list\n~ echo \"deb [arch=amd64 signed-by=/usr/share/keyrings/oracle-virtualbox-2016.gpg] https://download.virtualbox.org/virtualbox/debian $(lsb_release -cs) contrib\" | sudo tee /etc/apt/sources.list.d/virtualbox.list\n~ sudo apt-get update\n~ sudo apt-get install -y vagrant virtualbox-7.1\n~ sudo vagrant --version\n~ sudo VBoxManage --version\n~ - name: Install Chef\n~ uses: actionshub/chef-install@3.0.0\n~ - name: Kitchen Test\n~ run: |\n~ # To debug, uncomment these\n~ #export VAGRANT_LOG=debug\n~ #export VBOX_LOG=all.e.l.f\n #export VBOX_LOG_DEST=stdout\n #export K_DEBUG_OPTS=\"--log-level=debug\"\n export LOGNAME=$USER\n- kitchen test end-to-end-${{ matrix.os }} $K_DEBUG_OPTS\n+ kitchen test end-to-end-${{ matrix.os }} $K_DEBUG_OPTS", - "comment_created_at": "2025-05-05T11:06:01+00:00", - "comment_author": "github-advanced-security[bot]", - "comment_body": "## Workflow does not contain permissions\n\nActions job or workflow does not limit the permissions of the GITHUB_TOKEN. Consider setting an explicit permissions block, using the following as a minimal starting point: {{contents: read}}\n\n[Show more details](https://github.com/chef/chef/security/code-scanning/267)", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2167257073", - "pr_number": 15068, - "pr_file": ".github/workflows/verify.yml", - "created_at": "2025-06-25T17:30:23+00:00", - "commented_code": "name: Verify Pipeline\n\non:\n workflow_dispatch:\n inputs:\n base_image_tag:\n description: 'Base image tag version'\n required: true\n default: '3.0.36'\n type: string\n custom_env_var:\n description: 'Custom environment variable'\n required: false\n default: 'production'\n type: string\n chef_license:\n description: 'Chef license acceptance'\n required: true\n default: 'accept-no-persist'\n type: string\n push:\n branches: [main]\n pull_request:\n branches: [main]\n\nenv:\n BASE_IMAGE_TAG: ${{ inputs.base_image_tag || '3.0.36' }}\n CUSTOM_ENV: ${{ inputs.custom_env_var || 'production' }}\n CHEF_LICENSE: ${{ inputs.chef_license || 'accept-no-persist' }}\n\njobs:\n # Linux Tests - Matrix for different test types and OS combinations\n linux-tests:\n name: Linux ${{ matrix.test_type }} - ${{ matrix.os }}\n runs-on: ubuntu-22.04\n strategy:\n fail-fast: false\n matrix:\n test_type: [Unit, Integration, Functional]\n os:\n - rocky-8\n - rocky-9\n - rhel-9\n - debian-9\n - ubuntu-1804\n - ubuntu-2004\n - ubuntu-2204\n - centos-7\n - amazonlinux-2\n - rockylinux-8\n - almalinux-9\n container:\n image: chefes/omnibus-toolchain-${{ matrix.os }}:${{ env.BASE_IMAGE_TAG }}\n options: --user root\n steps:\n - name: Check out code\n uses: actions/checkout@v4\n\n - name: Configure Git safe directory\n run: |\n git config --global --add safe.directory /__w/*/\n\n - name: Display Environment Info\n run: |\n echo \"Running ${{ matrix.test_type }} tests on ${{ matrix.os }}\"\n echo \"Base Image Tag: ${{ env.BASE_IMAGE_TAG }}\"\n echo \"Custom Environment: ${{ env.CUSTOM_ENV }}\"\n echo \"Chef License: ${{ env.CHEF_LICENSE }}\"\n cat /etc/os-release || echo \"OS release info not available\"\n\n - name: Install Dependencies\n run: |\n ./.expeditor/bk_container_prep.sh\n\n - name: Run ${{ matrix.test_type }} Tests\n run: |\n ./.expeditor/scripts/prep_and_run_tests.sh \"${{ matrix.test_type }}\"\n env:\n TEST_TYPE: ${{ matrix.test_type }}\n OS_TYPE: ${{ matrix.os }}\n CHEF_LICENSE: ${{ env.CHEF_LICENSE }}\n CUSTOM_ENV_VAR: ${{ env.CUSTOM_ENV }}\n\n # Windows Tests - Matrix for different test types and Windows versions\n windows-tests:", - "repo_full_name": "chef/chef", - "discussion_comments": [ - { - "comment_id": "2167257073", - "repo_full_name": "chef/chef", - "pr_number": 15068, - "pr_file": ".github/workflows/verify.yml", - "discussion_id": "2167257073", - "commented_code": "@@ -0,0 +1,161 @@\n+name: Verify Pipeline\n+\n+on:\n+ workflow_dispatch:\n+ inputs:\n+ base_image_tag:\n+ description: 'Base image tag version'\n+ required: true\n+ default: '3.0.36'\n+ type: string\n+ custom_env_var:\n+ description: 'Custom environment variable'\n+ required: false\n+ default: 'production'\n+ type: string\n+ chef_license:\n+ description: 'Chef license acceptance'\n+ required: true\n+ default: 'accept-no-persist'\n+ type: string\n+ push:\n+ branches: [main]\n+ pull_request:\n+ branches: [main]\n+\n+env:\n+ BASE_IMAGE_TAG: ${{ inputs.base_image_tag || '3.0.36' }}\n+ CUSTOM_ENV: ${{ inputs.custom_env_var || 'production' }}\n+ CHEF_LICENSE: ${{ inputs.chef_license || 'accept-no-persist' }}\n+\n+jobs:\n+ # Linux Tests - Matrix for different test types and OS combinations\n+ linux-tests:\n+ name: Linux ${{ matrix.test_type }} - ${{ matrix.os }}\n+ runs-on: ubuntu-22.04\n+ strategy:\n+ fail-fast: false\n+ matrix:\n+ test_type: [Unit, Integration, Functional]\n+ os:\n+ - rocky-8\n+ - rocky-9\n+ - rhel-9\n+ - debian-9\n+ - ubuntu-1804\n+ - ubuntu-2004\n+ - ubuntu-2204\n+ - centos-7\n+ - amazonlinux-2\n+ - rockylinux-8\n+ - almalinux-9\n+ container:\n+ image: chefes/omnibus-toolchain-${{ matrix.os }}:${{ env.BASE_IMAGE_TAG }}\n+ options: --user root\n+ steps:\n+ - name: Check out code\n+ uses: actions/checkout@v4\n+\n+ - name: Configure Git safe directory\n+ run: |\n+ git config --global --add safe.directory /__w/*/\n+\n+ - name: Display Environment Info\n+ run: |\n+ echo \"Running ${{ matrix.test_type }} tests on ${{ matrix.os }}\"\n+ echo \"Base Image Tag: ${{ env.BASE_IMAGE_TAG }}\"\n+ echo \"Custom Environment: ${{ env.CUSTOM_ENV }}\"\n+ echo \"Chef License: ${{ env.CHEF_LICENSE }}\"\n+ cat /etc/os-release || echo \"OS release info not available\"\n+\n+ - name: Install Dependencies\n+ run: |\n+ ./.expeditor/bk_container_prep.sh\n+\n+ - name: Run ${{ matrix.test_type }} Tests\n+ run: |\n+ ./.expeditor/scripts/prep_and_run_tests.sh \"${{ matrix.test_type }}\"\n+ env:\n+ TEST_TYPE: ${{ matrix.test_type }}\n+ OS_TYPE: ${{ matrix.os }}\n+ CHEF_LICENSE: ${{ env.CHEF_LICENSE }}\n+ CUSTOM_ENV_VAR: ${{ env.CUSTOM_ENV }}\n+\n+ # Windows Tests - Matrix for different test types and Windows versions\n+ windows-tests:", - "comment_created_at": "2025-06-25T17:30:23+00:00", - "comment_author": "github-advanced-security[bot]", - "comment_body": "## Workflow does not contain permissions\n\nActions job or workflow does not limit the permissions of the GITHUB_TOKEN. Consider setting an explicit permissions block, using the following as a minimal starting point: {{contents: read}}\n\n[Show more details](https://github.com/chef/chef/security/code-scanning/289)", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2167257077", - "pr_number": 15068, - "pr_file": ".github/workflows/verify.yml", - "created_at": "2025-06-25T17:30:23+00:00", - "commented_code": "name: Verify Pipeline\n\non:\n workflow_dispatch:\n inputs:\n base_image_tag:\n description: 'Base image tag version'\n required: true\n default: '3.0.36'\n type: string\n custom_env_var:\n description: 'Custom environment variable'\n required: false\n default: 'production'\n type: string\n chef_license:\n description: 'Chef license acceptance'\n required: true\n default: 'accept-no-persist'\n type: string\n push:\n branches: [main]\n pull_request:\n branches: [main]\n\nenv:\n BASE_IMAGE_TAG: ${{ inputs.base_image_tag || '3.0.36' }}\n CUSTOM_ENV: ${{ inputs.custom_env_var || 'production' }}\n CHEF_LICENSE: ${{ inputs.chef_license || 'accept-no-persist' }}\n\njobs:\n # Linux Tests - Matrix for different test types and OS combinations\n linux-tests:\n name: Linux ${{ matrix.test_type }} - ${{ matrix.os }}\n runs-on: ubuntu-22.04\n strategy:\n fail-fast: false\n matrix:\n test_type: [Unit, Integration, Functional]\n os:\n - rocky-8\n - rocky-9\n - rhel-9\n - debian-9\n - ubuntu-1804\n - ubuntu-2004\n - ubuntu-2204\n - centos-7\n - amazonlinux-2\n - rockylinux-8\n - almalinux-9\n container:\n image: chefes/omnibus-toolchain-${{ matrix.os }}:${{ env.BASE_IMAGE_TAG }}\n options: --user root\n steps:\n - name: Check out code\n uses: actions/checkout@v4\n\n - name: Configure Git safe directory\n run: |\n git config --global --add safe.directory /__w/*/\n\n - name: Display Environment Info\n run: |\n echo \"Running ${{ matrix.test_type }} tests on ${{ matrix.os }}\"\n echo \"Base Image Tag: ${{ env.BASE_IMAGE_TAG }}\"\n echo \"Custom Environment: ${{ env.CUSTOM_ENV }}\"\n echo \"Chef License: ${{ env.CHEF_LICENSE }}\"\n cat /etc/os-release || echo \"OS release info not available\"\n\n - name: Install Dependencies\n run: |\n ./.expeditor/bk_container_prep.sh\n\n - name: Run ${{ matrix.test_type }} Tests\n run: |\n ./.expeditor/scripts/prep_and_run_tests.sh \"${{ matrix.test_type }}\"\n env:\n TEST_TYPE: ${{ matrix.test_type }}\n OS_TYPE: ${{ matrix.os }}\n CHEF_LICENSE: ${{ env.CHEF_LICENSE }}\n CUSTOM_ENV_VAR: ${{ env.CUSTOM_ENV }}\n\n # Windows Tests - Matrix for different test types and Windows versions\n windows-tests:\n name: Windows ${{ matrix.test_type }} - ${{ matrix.os }}\n runs-on: ${{ matrix.os }}\n strategy:\n fail-fast: false\n matrix:\n test_type: [Unit, Integration, Functional]\n os:\n - windows-2019\n - windows-2022\n - windows-latest\n steps:\n - name: Check out code\n uses: actions/checkout@v4\n\n - name: Set up Ruby\n uses: ruby/setup-ruby@v1\n with:\n ruby-version: '3.1'\n bundler-cache: true\n\n - name: Display Environment Info\n shell: pwsh\n run: |\n Write-Host \"Running ${{ matrix.test_type }} tests on ${{ matrix.os }}\"\n Write-Host \"Base Image Tag: ${{ env.BASE_IMAGE_TAG }}\"\n Write-Host \"Custom Environment: ${{ env.CUSTOM_ENV }}\"\n Write-Host \"Chef License: ${{ env.CHEF_LICENSE }}\"\n Get-ComputerInfo | Select-Object WindowsProductName, WindowsVersion\n\n - name: Install Dependencies\n shell: pwsh\n run: |\n bundle config set --local without development\n bundle install --jobs=3 --retry=3\n\n - name: Run ${{ matrix.test_type }} Tests\n shell: pwsh\n run: |\n switch (\"${{ matrix.test_type }}\") {\n \"Unit\" {\n Write-Host \"Running Unit Tests...\"\n try { bundle exec rake spec:unit } catch { Write-Host \"Unit tests script not found\" }\n }\n \"Integration\" {\n Write-Host \"Running Integration Tests...\"\n try { bundle exec rake spec:integration } catch { Write-Host \"Integration tests script not found\" }\n }\n \"Functional\" {\n Write-Host \"Running Functional Tests...\"\n try { bundle exec rake spec:functional } catch { Write-Host \"Functional tests script not found\" }\n }\n }\n env:\n TEST_TYPE: ${{ matrix.test_type }}\n OS_TYPE: ${{ matrix.os }}\n CHEF_LICENSE: ${{ env.CHEF_LICENSE }}\n CUSTOM_ENV_VAR: ${{ env.CUSTOM_ENV }}\n\n # Summary job that depends on all test jobs\n test-summary:", - "repo_full_name": "chef/chef", - "discussion_comments": [ - { - "comment_id": "2167257077", - "repo_full_name": "chef/chef", - "pr_number": 15068, - "pr_file": ".github/workflows/verify.yml", - "discussion_id": "2167257077", - "commented_code": "@@ -0,0 +1,161 @@\n+name: Verify Pipeline\n+\n+on:\n+ workflow_dispatch:\n+ inputs:\n+ base_image_tag:\n+ description: 'Base image tag version'\n+ required: true\n+ default: '3.0.36'\n+ type: string\n+ custom_env_var:\n+ description: 'Custom environment variable'\n+ required: false\n+ default: 'production'\n+ type: string\n+ chef_license:\n+ description: 'Chef license acceptance'\n+ required: true\n+ default: 'accept-no-persist'\n+ type: string\n+ push:\n+ branches: [main]\n+ pull_request:\n+ branches: [main]\n+\n+env:\n+ BASE_IMAGE_TAG: ${{ inputs.base_image_tag || '3.0.36' }}\n+ CUSTOM_ENV: ${{ inputs.custom_env_var || 'production' }}\n+ CHEF_LICENSE: ${{ inputs.chef_license || 'accept-no-persist' }}\n+\n+jobs:\n+ # Linux Tests - Matrix for different test types and OS combinations\n+ linux-tests:\n+ name: Linux ${{ matrix.test_type }} - ${{ matrix.os }}\n+ runs-on: ubuntu-22.04\n+ strategy:\n+ fail-fast: false\n+ matrix:\n+ test_type: [Unit, Integration, Functional]\n+ os:\n+ - rocky-8\n+ - rocky-9\n+ - rhel-9\n+ - debian-9\n+ - ubuntu-1804\n+ - ubuntu-2004\n+ - ubuntu-2204\n+ - centos-7\n+ - amazonlinux-2\n+ - rockylinux-8\n+ - almalinux-9\n+ container:\n+ image: chefes/omnibus-toolchain-${{ matrix.os }}:${{ env.BASE_IMAGE_TAG }}\n+ options: --user root\n+ steps:\n+ - name: Check out code\n+ uses: actions/checkout@v4\n+\n+ - name: Configure Git safe directory\n+ run: |\n+ git config --global --add safe.directory /__w/*/\n+\n+ - name: Display Environment Info\n+ run: |\n+ echo \"Running ${{ matrix.test_type }} tests on ${{ matrix.os }}\"\n+ echo \"Base Image Tag: ${{ env.BASE_IMAGE_TAG }}\"\n+ echo \"Custom Environment: ${{ env.CUSTOM_ENV }}\"\n+ echo \"Chef License: ${{ env.CHEF_LICENSE }}\"\n+ cat /etc/os-release || echo \"OS release info not available\"\n+\n+ - name: Install Dependencies\n+ run: |\n+ ./.expeditor/bk_container_prep.sh\n+\n+ - name: Run ${{ matrix.test_type }} Tests\n+ run: |\n+ ./.expeditor/scripts/prep_and_run_tests.sh \"${{ matrix.test_type }}\"\n+ env:\n+ TEST_TYPE: ${{ matrix.test_type }}\n+ OS_TYPE: ${{ matrix.os }}\n+ CHEF_LICENSE: ${{ env.CHEF_LICENSE }}\n+ CUSTOM_ENV_VAR: ${{ env.CUSTOM_ENV }}\n+\n+ # Windows Tests - Matrix for different test types and Windows versions\n+ windows-tests:\n+ name: Windows ${{ matrix.test_type }} - ${{ matrix.os }}\n+ runs-on: ${{ matrix.os }}\n+ strategy:\n+ fail-fast: false\n+ matrix:\n+ test_type: [Unit, Integration, Functional]\n+ os:\n+ - windows-2019\n+ - windows-2022\n+ - windows-latest\n+ steps:\n+ - name: Check out code\n+ uses: actions/checkout@v4\n+\n+ - name: Set up Ruby\n+ uses: ruby/setup-ruby@v1\n+ with:\n+ ruby-version: '3.1'\n+ bundler-cache: true\n+\n+ - name: Display Environment Info\n+ shell: pwsh\n+ run: |\n+ Write-Host \"Running ${{ matrix.test_type }} tests on ${{ matrix.os }}\"\n+ Write-Host \"Base Image Tag: ${{ env.BASE_IMAGE_TAG }}\"\n+ Write-Host \"Custom Environment: ${{ env.CUSTOM_ENV }}\"\n+ Write-Host \"Chef License: ${{ env.CHEF_LICENSE }}\"\n+ Get-ComputerInfo | Select-Object WindowsProductName, WindowsVersion\n+\n+ - name: Install Dependencies\n+ shell: pwsh\n+ run: |\n+ bundle config set --local without development\n+ bundle install --jobs=3 --retry=3\n+\n+ - name: Run ${{ matrix.test_type }} Tests\n+ shell: pwsh\n+ run: |\n+ switch (\"${{ matrix.test_type }}\") {\n+ \"Unit\" {\n+ Write-Host \"Running Unit Tests...\"\n+ try { bundle exec rake spec:unit } catch { Write-Host \"Unit tests script not found\" }\n+ }\n+ \"Integration\" {\n+ Write-Host \"Running Integration Tests...\"\n+ try { bundle exec rake spec:integration } catch { Write-Host \"Integration tests script not found\" }\n+ }\n+ \"Functional\" {\n+ Write-Host \"Running Functional Tests...\"\n+ try { bundle exec rake spec:functional } catch { Write-Host \"Functional tests script not found\" }\n+ }\n+ }\n+ env:\n+ TEST_TYPE: ${{ matrix.test_type }}\n+ OS_TYPE: ${{ matrix.os }}\n+ CHEF_LICENSE: ${{ env.CHEF_LICENSE }}\n+ CUSTOM_ENV_VAR: ${{ env.CUSTOM_ENV }}\n+\n+ # Summary job that depends on all test jobs\n+ test-summary:", - "comment_created_at": "2025-06-25T17:30:23+00:00", - "comment_author": "github-advanced-security[bot]", - "comment_body": "## Workflow does not contain permissions\n\nActions job or workflow does not limit the permissions of the GITHUB_TOKEN. Consider setting an explicit permissions block, using the following as a minimal starting point: {{contents: read}}\n\n[Show more details](https://github.com/chef/chef/security/code-scanning/290)", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2167257081", - "pr_number": 15068, - "pr_file": ".github/workflows/verify.yml", - "created_at": "2025-06-25T17:30:23+00:00", - "commented_code": "name: Verify Pipeline\n\non:\n workflow_dispatch:\n inputs:\n base_image_tag:\n description: 'Base image tag version'\n required: true\n default: '3.0.36'\n type: string\n custom_env_var:\n description: 'Custom environment variable'\n required: false\n default: 'production'\n type: string\n chef_license:\n description: 'Chef license acceptance'\n required: true\n default: 'accept-no-persist'\n type: string\n push:\n branches: [main]\n pull_request:\n branches: [main]\n\nenv:\n BASE_IMAGE_TAG: ${{ inputs.base_image_tag || '3.0.36' }}\n CUSTOM_ENV: ${{ inputs.custom_env_var || 'production' }}\n CHEF_LICENSE: ${{ inputs.chef_license || 'accept-no-persist' }}\n\njobs:\n # Linux Tests - Matrix for different test types and OS combinations\n linux-tests:\n name: Linux ${{ matrix.test_type }} - ${{ matrix.os }}\n runs-on: ubuntu-22.04\n strategy:\n fail-fast: false\n matrix:\n test_type: [Unit, Integration, Functional]\n os:\n - rocky-8\n - rocky-9\n - rhel-9\n - debian-9\n - ubuntu-1804\n - ubuntu-2004\n - ubuntu-2204\n - centos-7\n - amazonlinux-2\n - rockylinux-8\n - almalinux-9\n container:\n image: chefes/omnibus-toolchain-${{ matrix.os }}:${{ env.BASE_IMAGE_TAG }}\n options: --user root\n steps:\n - name: Check out code\n uses: actions/checkout@v4\n\n - name: Configure Git safe directory\n run: |\n git config --global --add safe.directory /__w/*/\n\n - name: Display Environment Info\n run: |\n echo \"Running ${{ matrix.test_type }} tests on ${{ matrix.os }}\"\n echo \"Base Image Tag: ${{ env.BASE_IMAGE_TAG }}\"\n echo \"Custom Environment: ${{ env.CUSTOM_ENV }}\"\n echo \"Chef License: ${{ env.CHEF_LICENSE }}\"\n cat /etc/os-release || echo \"OS release info not available\"\n\n - name: Install Dependencies\n run: |\n ./.expeditor/bk_container_prep.sh\n\n - name: Run ${{ matrix.test_type }} Tests\n run: |\n ./.expeditor/scripts/prep_and_run_tests.sh \"${{ matrix.test_type }}\"\n env:\n TEST_TYPE: ${{ matrix.test_type }}\n OS_TYPE: ${{ matrix.os }}\n CHEF_LICENSE: ${{ env.CHEF_LICENSE }}\n CUSTOM_ENV_VAR: ${{ env.CUSTOM_ENV }}\n\n # Windows Tests - Matrix for different test types and Windows versions\n windows-tests:\n name: Windows ${{ matrix.test_type }} - ${{ matrix.os }}\n runs-on: ${{ matrix.os }}\n strategy:\n fail-fast: false\n matrix:\n test_type: [Unit, Integration, Functional]\n os:\n - windows-2019\n - windows-2022\n - windows-latest\n steps:\n - name: Check out code\n uses: actions/checkout@v4\n\n - name: Set up Ruby\n uses: ruby/setup-ruby@v1\n with:\n ruby-version: '3.1'\n bundler-cache: true\n\n - name: Display Environment Info\n shell: pwsh\n run: |\n Write-Host \"Running ${{ matrix.test_type }} tests on ${{ matrix.os }}\"\n Write-Host \"Base Image Tag: ${{ env.BASE_IMAGE_TAG }}\"\n Write-Host \"Custom Environment: ${{ env.CUSTOM_ENV }}\"\n Write-Host \"Chef License: ${{ env.CHEF_LICENSE }}\"\n Get-ComputerInfo | Select-Object WindowsProductName, WindowsVersion\n\n - name: Install Dependencies\n shell: pwsh\n run: |\n bundle config set --local without development\n bundle install --jobs=3 --retry=3\n\n - name: Run ${{ matrix.test_type }} Tests\n shell: pwsh\n run: |\n switch (\"${{ matrix.test_type }}\") {\n \"Unit\" {\n Write-Host \"Running Unit Tests...\"\n try { bundle exec rake spec:unit } catch { Write-Host \"Unit tests script not found\" }\n }\n \"Integration\" {\n Write-Host \"Running Integration Tests...\"\n try { bundle exec rake spec:integration } catch { Write-Host \"Integration tests script not found\" }\n }\n \"Functional\" {\n Write-Host \"Running Functional Tests...\"\n try { bundle exec rake spec:functional } catch { Write-Host \"Functional tests script not found\" }\n }\n }\n env:\n TEST_TYPE: ${{ matrix.test_type }}\n OS_TYPE: ${{ matrix.os }}\n CHEF_LICENSE: ${{ env.CHEF_LICENSE }}\n CUSTOM_ENV_VAR: ${{ env.CUSTOM_ENV }}\n\n # Summary job that depends on all test jobs\n test-summary:\n name: Test Summary\n runs-on: ubuntu-latest\n needs: [linux-tests, windows-tests]\n if: always()\n steps:\n - name: Check test results\n run: |\n echo \"Linux tests result: ${{ needs.linux-tests.result }}\"\n echo \"Windows tests result: ${{ needs.windows-tests.result }}\"\n\n if [[ \"${{ needs.linux-tests.result }}\" == \"failure\" || \"${{ needs.windows-tests.result }}\" == \"failure\" ]]; then\n echo \"Some tests failed\"\n exit 1\n else\n echo \"All tests passed or were skipped\"\n fi", - "repo_full_name": "chef/chef", - "discussion_comments": [ - { - "comment_id": "2167257081", - "repo_full_name": "chef/chef", - "pr_number": 15068, - "pr_file": ".github/workflows/verify.yml", - "discussion_id": "2167257081", - "commented_code": "@@ -0,0 +1,161 @@\n+name: Verify Pipeline\n+\n+on:\n+ workflow_dispatch:\n+ inputs:\n+ base_image_tag:\n+ description: 'Base image tag version'\n+ required: true\n+ default: '3.0.36'\n+ type: string\n+ custom_env_var:\n+ description: 'Custom environment variable'\n+ required: false\n+ default: 'production'\n+ type: string\n+ chef_license:\n+ description: 'Chef license acceptance'\n+ required: true\n+ default: 'accept-no-persist'\n+ type: string\n+ push:\n+ branches: [main]\n+ pull_request:\n+ branches: [main]\n+\n+env:\n+ BASE_IMAGE_TAG: ${{ inputs.base_image_tag || '3.0.36' }}\n+ CUSTOM_ENV: ${{ inputs.custom_env_var || 'production' }}\n+ CHEF_LICENSE: ${{ inputs.chef_license || 'accept-no-persist' }}\n+\n+jobs:\n+ # Linux Tests - Matrix for different test types and OS combinations\n+ linux-tests:\n+ name: Linux ${{ matrix.test_type }} - ${{ matrix.os }}\n+ runs-on: ubuntu-22.04\n+ strategy:\n+ fail-fast: false\n+ matrix:\n+ test_type: [Unit, Integration, Functional]\n+ os:\n+ - rocky-8\n+ - rocky-9\n+ - rhel-9\n+ - debian-9\n+ - ubuntu-1804\n+ - ubuntu-2004\n+ - ubuntu-2204\n+ - centos-7\n+ - amazonlinux-2\n+ - rockylinux-8\n+ - almalinux-9\n+ container:\n+ image: chefes/omnibus-toolchain-${{ matrix.os }}:${{ env.BASE_IMAGE_TAG }}\n+ options: --user root\n+ steps:\n+ - name: Check out code\n+ uses: actions/checkout@v4\n+\n+ - name: Configure Git safe directory\n+ run: |\n+ git config --global --add safe.directory /__w/*/\n+\n+ - name: Display Environment Info\n+ run: |\n+ echo \"Running ${{ matrix.test_type }} tests on ${{ matrix.os }}\"\n+ echo \"Base Image Tag: ${{ env.BASE_IMAGE_TAG }}\"\n+ echo \"Custom Environment: ${{ env.CUSTOM_ENV }}\"\n+ echo \"Chef License: ${{ env.CHEF_LICENSE }}\"\n+ cat /etc/os-release || echo \"OS release info not available\"\n+\n+ - name: Install Dependencies\n+ run: |\n+ ./.expeditor/bk_container_prep.sh\n+\n+ - name: Run ${{ matrix.test_type }} Tests\n+ run: |\n+ ./.expeditor/scripts/prep_and_run_tests.sh \"${{ matrix.test_type }}\"\n+ env:\n+ TEST_TYPE: ${{ matrix.test_type }}\n+ OS_TYPE: ${{ matrix.os }}\n+ CHEF_LICENSE: ${{ env.CHEF_LICENSE }}\n+ CUSTOM_ENV_VAR: ${{ env.CUSTOM_ENV }}\n+\n+ # Windows Tests - Matrix for different test types and Windows versions\n+ windows-tests:\n+ name: Windows ${{ matrix.test_type }} - ${{ matrix.os }}\n+ runs-on: ${{ matrix.os }}\n+ strategy:\n+ fail-fast: false\n+ matrix:\n+ test_type: [Unit, Integration, Functional]\n+ os:\n+ - windows-2019\n+ - windows-2022\n+ - windows-latest\n+ steps:\n+ - name: Check out code\n+ uses: actions/checkout@v4\n+\n+ - name: Set up Ruby\n+ uses: ruby/setup-ruby@v1\n+ with:\n+ ruby-version: '3.1'\n+ bundler-cache: true\n+\n+ - name: Display Environment Info\n+ shell: pwsh\n+ run: |\n+ Write-Host \"Running ${{ matrix.test_type }} tests on ${{ matrix.os }}\"\n+ Write-Host \"Base Image Tag: ${{ env.BASE_IMAGE_TAG }}\"\n+ Write-Host \"Custom Environment: ${{ env.CUSTOM_ENV }}\"\n+ Write-Host \"Chef License: ${{ env.CHEF_LICENSE }}\"\n+ Get-ComputerInfo | Select-Object WindowsProductName, WindowsVersion\n+\n+ - name: Install Dependencies\n+ shell: pwsh\n+ run: |\n+ bundle config set --local without development\n+ bundle install --jobs=3 --retry=3\n+\n+ - name: Run ${{ matrix.test_type }} Tests\n+ shell: pwsh\n+ run: |\n+ switch (\"${{ matrix.test_type }}\") {\n+ \"Unit\" {\n+ Write-Host \"Running Unit Tests...\"\n+ try { bundle exec rake spec:unit } catch { Write-Host \"Unit tests script not found\" }\n+ }\n+ \"Integration\" {\n+ Write-Host \"Running Integration Tests...\"\n+ try { bundle exec rake spec:integration } catch { Write-Host \"Integration tests script not found\" }\n+ }\n+ \"Functional\" {\n+ Write-Host \"Running Functional Tests...\"\n+ try { bundle exec rake spec:functional } catch { Write-Host \"Functional tests script not found\" }\n+ }\n+ }\n+ env:\n+ TEST_TYPE: ${{ matrix.test_type }}\n+ OS_TYPE: ${{ matrix.os }}\n+ CHEF_LICENSE: ${{ env.CHEF_LICENSE }}\n+ CUSTOM_ENV_VAR: ${{ env.CUSTOM_ENV }}\n+\n+ # Summary job that depends on all test jobs\n+ test-summary:\n+ name: Test Summary\n+ runs-on: ubuntu-latest\n+ needs: [linux-tests, windows-tests]\n+ if: always()\n+ steps:\n+ - name: Check test results\n+ run: |\n+ echo \"Linux tests result: ${{ needs.linux-tests.result }}\"\n+ echo \"Windows tests result: ${{ needs.windows-tests.result }}\"\n+\n+ if [[ \"${{ needs.linux-tests.result }}\" == \"failure\" || \"${{ needs.windows-tests.result }}\" == \"failure\" ]]; then\n+ echo \"Some tests failed\"\n+ exit 1\n+ else\n+ echo \"All tests passed or were skipped\"\n+ fi", - "comment_created_at": "2025-06-25T17:30:23+00:00", - "comment_author": "github-advanced-security[bot]", - "comment_body": "## Workflow does not contain permissions\n\nActions job or workflow does not limit the permissions of the GITHUB_TOKEN. Consider setting an explicit permissions block, using the following as a minimal starting point: {{}}\n\n[Show more details](https://github.com/chef/chef/security/code-scanning/291)", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/chef-log-message-quality.json b/_reviewers/chef-log-message-quality.json new file mode 100644 index 0000000..001b627 --- /dev/null +++ b/_reviewers/chef-log-message-quality.json @@ -0,0 +1,46 @@ +[ + { + "discussion_id": "697656336", + "pr_number": 11976, + "pr_file": "RELEASE_NOTES.md", + "created_at": "2021-08-27T18:56:24+00:00", + "commented_code": "This file holds \"in progress\" release notes for the current release under development and is intended for consumption by the Chef Documentation team. Please see for the official Chef release notes.\n\n## What's New in 17.4\n## What's New in 17.4.38\n\n### Bug fixes\n\n- Resolved a regression introduced in Chef Infra Client 17.4 that would cause HWRP-style resources inheriting from LWRPBase to fail.\n\n### Enhancements\n\n- Log output has been improved to better deliniate when the Infra Phase and Compliance Phase start and end.\n- Ohai data collection of Amazon EC2 metadata has been improved to collect additional data for some configurations.\n- Removed ERROR logs when retrying failed communication with the Chef Infra Server", + "repo_full_name": "chef/chef", + "discussion_comments": [ + { + "comment_id": "697656336", + "repo_full_name": "chef/chef", + "pr_number": 11976, + "pr_file": "RELEASE_NOTES.md", + "discussion_id": "697656336", + "commented_code": "@@ -1,6 +1,35 @@\n This file holds \"in progress\" release notes for the current release under development and is intended for consumption by the Chef Documentation team. Please see for the official Chef release notes.\n \n-## What's New in 17.4\n+## What's New in 17.4.38\n+\n+### Bug fixes\n+\n+- Resolved a regression introduced in Chef Infra Client 17.4 that would cause HWRP-style resources inheriting from LWRPBase to fail.\n+\n+### Enhancements\n+\n+- Log output has been improved to better deliniate when the Infra Phase and Compliance Phase start and end.\n+- Ohai data collection of Amazon EC2 metadata has been improved to collect additional data for some configurations.\n+- Removed ERROR logs when retrying failed communication with the Chef Infra Server", + "comment_created_at": "2021-08-27T18:56:24+00:00", + "comment_author": "IanMadd", + "comment_body": "```suggestion\r\n- Removed ERROR logs when retrying failed communication with the Chef Infra Server.\r\n```", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "697659082", + "pr_number": 11976, + "pr_file": "RELEASE_NOTES.md", + "created_at": "2021-08-27T19:01:11+00:00", + "commented_code": "This file holds \"in progress\" release notes for the current release under development and is intended for consumption by the Chef Documentation team. Please see for the official Chef release notes.\n\n## What's New in 17.4\n## What's New in 17.4.38\n\n### Bug fixes\n\n- Resolved a regression introduced in Chef Infra Client 17.4 that would cause HWRP-style resources inheriting from LWRPBase to fail.\n\n### Enhancements\n\n- Log output has been improved to better deliniate when the Infra Phase and Compliance Phase start and end.", + "repo_full_name": "chef/chef", + "discussion_comments": [ + { + "comment_id": "697659082", + "repo_full_name": "chef/chef", + "pr_number": 11976, + "pr_file": "RELEASE_NOTES.md", + "discussion_id": "697659082", + "commented_code": "@@ -1,6 +1,35 @@\n This file holds \"in progress\" release notes for the current release under development and is intended for consumption by the Chef Documentation team. Please see for the official Chef release notes.\n \n-## What's New in 17.4\n+## What's New in 17.4.38\n+\n+### Bug fixes\n+\n+- Resolved a regression introduced in Chef Infra Client 17.4 that would cause HWRP-style resources inheriting from LWRPBase to fail.\n+\n+### Enhancements\n+\n+- Log output has been improved to better deliniate when the Infra Phase and Compliance Phase start and end.", + "comment_created_at": "2021-08-27T19:01:11+00:00", + "comment_author": "kagarmoe", + "comment_body": "```suggestion\r\n- Improved log output to clearly define where the Infra Phase ends and the Compliance Phase begins.\r\n```", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/chef-log-message-quality.md b/_reviewers/chef-log-message-quality.md index 1803c04..84f46eb 100644 --- a/_reviewers/chef-log-message-quality.md +++ b/_reviewers/chef-log-message-quality.md @@ -26,51 +26,3 @@ logger.info("Transitioning from Infra Phase to Compliance Phase.") ``` This standard improves log readability during troubleshooting and ensures documentation accurately reflects system behavior. - - -[ - { - "discussion_id": "697656336", - "pr_number": 11976, - "pr_file": "RELEASE_NOTES.md", - "created_at": "2021-08-27T18:56:24+00:00", - "commented_code": "This file holds \"in progress\" release notes for the current release under development and is intended for consumption by the Chef Documentation team. Please see for the official Chef release notes.\n\n## What's New in 17.4\n## What's New in 17.4.38\n\n### Bug fixes\n\n- Resolved a regression introduced in Chef Infra Client 17.4 that would cause HWRP-style resources inheriting from LWRPBase to fail.\n\n### Enhancements\n\n- Log output has been improved to better deliniate when the Infra Phase and Compliance Phase start and end.\n- Ohai data collection of Amazon EC2 metadata has been improved to collect additional data for some configurations.\n- Removed ERROR logs when retrying failed communication with the Chef Infra Server", - "repo_full_name": "chef/chef", - "discussion_comments": [ - { - "comment_id": "697656336", - "repo_full_name": "chef/chef", - "pr_number": 11976, - "pr_file": "RELEASE_NOTES.md", - "discussion_id": "697656336", - "commented_code": "@@ -1,6 +1,35 @@\n This file holds \"in progress\" release notes for the current release under development and is intended for consumption by the Chef Documentation team. Please see for the official Chef release notes.\n \n-## What's New in 17.4\n+## What's New in 17.4.38\n+\n+### Bug fixes\n+\n+- Resolved a regression introduced in Chef Infra Client 17.4 that would cause HWRP-style resources inheriting from LWRPBase to fail.\n+\n+### Enhancements\n+\n+- Log output has been improved to better deliniate when the Infra Phase and Compliance Phase start and end.\n+- Ohai data collection of Amazon EC2 metadata has been improved to collect additional data for some configurations.\n+- Removed ERROR logs when retrying failed communication with the Chef Infra Server", - "comment_created_at": "2021-08-27T18:56:24+00:00", - "comment_author": "IanMadd", - "comment_body": "```suggestion\r\n- Removed ERROR logs when retrying failed communication with the Chef Infra Server.\r\n```", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "697659082", - "pr_number": 11976, - "pr_file": "RELEASE_NOTES.md", - "created_at": "2021-08-27T19:01:11+00:00", - "commented_code": "This file holds \"in progress\" release notes for the current release under development and is intended for consumption by the Chef Documentation team. Please see for the official Chef release notes.\n\n## What's New in 17.4\n## What's New in 17.4.38\n\n### Bug fixes\n\n- Resolved a regression introduced in Chef Infra Client 17.4 that would cause HWRP-style resources inheriting from LWRPBase to fail.\n\n### Enhancements\n\n- Log output has been improved to better deliniate when the Infra Phase and Compliance Phase start and end.", - "repo_full_name": "chef/chef", - "discussion_comments": [ - { - "comment_id": "697659082", - "repo_full_name": "chef/chef", - "pr_number": 11976, - "pr_file": "RELEASE_NOTES.md", - "discussion_id": "697659082", - "commented_code": "@@ -1,6 +1,35 @@\n This file holds \"in progress\" release notes for the current release under development and is intended for consumption by the Chef Documentation team. Please see for the official Chef release notes.\n \n-## What's New in 17.4\n+## What's New in 17.4.38\n+\n+### Bug fixes\n+\n+- Resolved a regression introduced in Chef Infra Client 17.4 that would cause HWRP-style resources inheriting from LWRPBase to fail.\n+\n+### Enhancements\n+\n+- Log output has been improved to better deliniate when the Infra Phase and Compliance Phase start and end.", - "comment_created_at": "2021-08-27T19:01:11+00:00", - "comment_author": "kagarmoe", - "comment_body": "```suggestion\r\n- Improved log output to clearly define where the Infra Phase ends and the Compliance Phase begins.\r\n```", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/chef-memoize-expensive-operations.json b/_reviewers/chef-memoize-expensive-operations.json new file mode 100644 index 0000000..9a9b357 --- /dev/null +++ b/_reviewers/chef-memoize-expensive-operations.json @@ -0,0 +1,254 @@ +[ + { + "discussion_id": "1374582897", + "pr_number": 13972, + "pr_file": "lib/chef/provider/package/chocolatey.rb", + "created_at": "2023-10-27T13:28:46+00:00", + "commented_code": "#\n # @return [Hash] name-to-version mapping of available packages\n def available_packages\n return @available_packages if @available_packages\n return @available_packages unless @available_packages.nil?\n\n # @available_packages is per object - each resource is an object, meaning if you\n # have a LOT of chocolatey package installs, then this quickly gets very slow.\n # So we use @@choco_available_packages instead - BUT it's important to ensure that\n # the cache is valid before you do this. There are two cache items that can change:\n # a) the sources - we check this with cache_is_valid?\n if cache_is_valid? && @@choco_available_packages.is_a?(Hash) &&\n @@choco_available_packages[new_resource.list_options]\n\n # Ensure we have the package names, or else double check...\n need_redo = false\n package_name_array.each do |pkg|\n need_redo = true unless @@choco_available_packages[new_resource.list_options][pkg.downcase]\n end\n return @@choco_available_packages[new_resource.list_options] unless need_redo\n end\n if new_resource.list_options\n Chef::Log.info(\"Fetching chocolatey package list with options #{new_resource.list_options.inspect}\")\n else\n Chef::Log.info(\"Fetching chocolatey package list\")\n end\n\n @available_packages = {}\n package_name_array.each do |pkg|\n # Only reset the array if the cache is invalid - if we're just augmenting it, don't\n # clear it\n @@choco_available_packages = {} if @@choco_available_packages.nil? || !cache_is_valid?\n if @@choco_available_packages[new_resource.list_options].nil?\n @@choco_available_packages[new_resource.list_options] = {}\n end\n\n # Attempt to get everything we need in a single query.\n # Grab 25 packages at a time at most, to avoid hurting servers too badly\n #\n # For v1 we actually used to grab the entire list of packages, but we found\n # that it could cause undue load on really large package lists\n collect_package_requests(\n ignore_list: @@choco_available_packages[new_resource.list_options].keys\n ).each_slice(25) do |pkg_set|\n available_versions =\n begin\n cmd = [ query_command, \"-r\", pkg ]\n cmd += common_options\n cmd.push( new_resource.list_options ) if new_resource.list_options\n\n raw = parse_list_output(*cmd)\n raw.keys.each_with_object({}) do |name, available|\n available[name] = desired_name_versions[name] || raw[name]\n end\n cmd = [ query_command, \"-r\" ]\n\n # Chocolatey doesn't actually take a wildcard for this query, however\n # it will return all packages when using '*' as a query\n unless pkg_set == [\"*\"]\n cmd += pkg_set\n end\n @available_packages.merge! available_versions\n cmd += common_options\n cmd.push( new_resource.list_options ) if new_resource.list_options\n\n Chef::Log.debug(\"Choco List Command: #{cmd}\")\n\n raw = parse_list_output(*cmd)\n raw.keys.each_with_object({}) do |name, available|\n available[name] = desired_name_versions[name] || raw[name]\n end\n end\n @@choco_available_packages[new_resource.list_options].merge!(available_versions)\n end\n @available_packages\n # Mark the cache as valid, with the required metadata\n set_package_cache\n # Why both? So when we fail to find a package once, we don't try on every\n # retry, even though it would be reasonable to do so if queried in another\n # resource (because the chocolatey configuration may well have changed!)\n @available_packages = @@choco_available_packages[new_resource.list_options]\n end\n\n # Installed packages in chocolatey as a Hash of names mapped to versions\n # (names are downcased for case-insensitive matching)\n # (names are downcased for case-insensitive matching). Depending on the user\n # preference, we get these either from the local database, or from the choco\n # list command\n #\n # @return [Hash] name-to-version mapping of installed packages\n def installed_packages\n if new_resource.use_choco_list == false || !Chef::Config[:always_use_choco_list]\n installed_packages_via_choco\n else\n installed_packages_via_disk\n end\n end\n\n # Beginning with Choco 2.0, \"list\" returns local packages only while \"search\" returns packages from external package sources\n #\n # @return [Hash] name-to-version mapping of installed packages\n def installed_packages\n def installed_packages_via_choco\n @installed_packages ||= Hash[*parse_list_output(\"list\", \"-l\", \"-r\").flatten]\n @installed_packages\n end\n\n # Return packages sourced from the local disk - because this doesn't have\n # shell out overhead, this ends up being a significant performance win\n # vs calling choco list\n #\n # @return [Hash] name-to-version mapping of installed packages\n def installed_packages_via_disk\n @installed_packages ||= begin\n targets = new_resource.name\n target_dirs = []\n # If we're using a single package name, have it at the head of the list\n # so we can get more performance\n target_dirs << targets.first.downcase if targets.length == 1", + "repo_full_name": "chef/chef", + "discussion_comments": [ + { + "comment_id": "1374582897", + "repo_full_name": "chef/chef", + "pr_number": 13972, + "pr_file": "lib/chef/provider/package/chocolatey.rb", + "discussion_id": "1374582897", + "commented_code": "@@ -230,37 +309,124 @@ def cmd_args(include_source: true)\n #\n # @return [Hash] name-to-version mapping of available packages\n def available_packages\n- return @available_packages if @available_packages\n+ return @available_packages unless @available_packages.nil?\n+\n+ # @available_packages is per object - each resource is an object, meaning if you\n+ # have a LOT of chocolatey package installs, then this quickly gets very slow.\n+ # So we use @@choco_available_packages instead - BUT it's important to ensure that\n+ # the cache is valid before you do this. There are two cache items that can change:\n+ # a) the sources - we check this with cache_is_valid?\n+ if cache_is_valid? && @@choco_available_packages.is_a?(Hash) &&\n+ @@choco_available_packages[new_resource.list_options]\n+\n+ # Ensure we have the package names, or else double check...\n+ need_redo = false\n+ package_name_array.each do |pkg|\n+ need_redo = true unless @@choco_available_packages[new_resource.list_options][pkg.downcase]\n+ end\n+ return @@choco_available_packages[new_resource.list_options] unless need_redo\n+ end\n+ if new_resource.list_options\n+ Chef::Log.info(\"Fetching chocolatey package list with options #{new_resource.list_options.inspect}\")\n+ else\n+ Chef::Log.info(\"Fetching chocolatey package list\")\n+ end\n \n- @available_packages = {}\n- package_name_array.each do |pkg|\n+ # Only reset the array if the cache is invalid - if we're just augmenting it, don't\n+ # clear it\n+ @@choco_available_packages = {} if @@choco_available_packages.nil? || !cache_is_valid?\n+ if @@choco_available_packages[new_resource.list_options].nil?\n+ @@choco_available_packages[new_resource.list_options] = {}\n+ end\n+\n+ # Attempt to get everything we need in a single query.\n+ # Grab 25 packages at a time at most, to avoid hurting servers too badly\n+ #\n+ # For v1 we actually used to grab the entire list of packages, but we found\n+ # that it could cause undue load on really large package lists\n+ collect_package_requests(\n+ ignore_list: @@choco_available_packages[new_resource.list_options].keys\n+ ).each_slice(25) do |pkg_set|\n available_versions =\n begin\n- cmd = [ query_command, \"-r\", pkg ]\n- cmd += common_options\n- cmd.push( new_resource.list_options ) if new_resource.list_options\n-\n- raw = parse_list_output(*cmd)\n- raw.keys.each_with_object({}) do |name, available|\n- available[name] = desired_name_versions[name] || raw[name]\n- end\n+ cmd = [ query_command, \"-r\" ]\n+\n+ # Chocolatey doesn't actually take a wildcard for this query, however\n+ # it will return all packages when using '*' as a query\n+ unless pkg_set == [\"*\"]\n+ cmd += pkg_set\n end\n- @available_packages.merge! available_versions\n+ cmd += common_options\n+ cmd.push( new_resource.list_options ) if new_resource.list_options\n+\n+ Chef::Log.debug(\"Choco List Command: #{cmd}\")\n+\n+ raw = parse_list_output(*cmd)\n+ raw.keys.each_with_object({}) do |name, available|\n+ available[name] = desired_name_versions[name] || raw[name]\n+ end\n+ end\n+ @@choco_available_packages[new_resource.list_options].merge!(available_versions)\n end\n- @available_packages\n+ # Mark the cache as valid, with the required metadata\n+ set_package_cache\n+ # Why both? So when we fail to find a package once, we don't try on every\n+ # retry, even though it would be reasonable to do so if queried in another\n+ # resource (because the chocolatey configuration may well have changed!)\n+ @available_packages = @@choco_available_packages[new_resource.list_options]\n end\n \n # Installed packages in chocolatey as a Hash of names mapped to versions\n- # (names are downcased for case-insensitive matching)\n+ # (names are downcased for case-insensitive matching). Depending on the user\n+ # preference, we get these either from the local database, or from the choco\n+ # list command\n #\n+ # @return [Hash] name-to-version mapping of installed packages\n+ def installed_packages\n+ if new_resource.use_choco_list == false || !Chef::Config[:always_use_choco_list]\n+ installed_packages_via_choco\n+ else\n+ installed_packages_via_disk\n+ end\n+ end\n+\n # Beginning with Choco 2.0, \"list\" returns local packages only while \"search\" returns packages from external package sources\n #\n # @return [Hash] name-to-version mapping of installed packages\n- def installed_packages\n+ def installed_packages_via_choco\n @installed_packages ||= Hash[*parse_list_output(\"list\", \"-l\", \"-r\").flatten]\n @installed_packages\n end\n \n+ # Return packages sourced from the local disk - because this doesn't have\n+ # shell out overhead, this ends up being a significant performance win\n+ # vs calling choco list\n+ #\n+ # @return [Hash] name-to-version mapping of installed packages\n+ def installed_packages_via_disk\n+ @installed_packages ||= begin\n+ targets = new_resource.name\n+ target_dirs = []\n+ # If we're using a single package name, have it at the head of the list\n+ # so we can get more performance\n+ target_dirs << targets.first.downcase if targets.length == 1", + "comment_created_at": "2023-10-27T13:28:46+00:00", + "comment_author": "tpowell-progress", + "comment_body": "@jaymzjulian ok, one thing I'm not understanding in the current context is... is there ever an `(array length > 1)` scenario, and if so, what happens then?", + "pr_file_module": null + }, + { + "comment_id": "1374840040", + "repo_full_name": "chef/chef", + "pr_number": 13972, + "pr_file": "lib/chef/provider/package/chocolatey.rb", + "discussion_id": "1374582897", + "commented_code": "@@ -230,37 +309,124 @@ def cmd_args(include_source: true)\n #\n # @return [Hash] name-to-version mapping of available packages\n def available_packages\n- return @available_packages if @available_packages\n+ return @available_packages unless @available_packages.nil?\n+\n+ # @available_packages is per object - each resource is an object, meaning if you\n+ # have a LOT of chocolatey package installs, then this quickly gets very slow.\n+ # So we use @@choco_available_packages instead - BUT it's important to ensure that\n+ # the cache is valid before you do this. There are two cache items that can change:\n+ # a) the sources - we check this with cache_is_valid?\n+ if cache_is_valid? && @@choco_available_packages.is_a?(Hash) &&\n+ @@choco_available_packages[new_resource.list_options]\n+\n+ # Ensure we have the package names, or else double check...\n+ need_redo = false\n+ package_name_array.each do |pkg|\n+ need_redo = true unless @@choco_available_packages[new_resource.list_options][pkg.downcase]\n+ end\n+ return @@choco_available_packages[new_resource.list_options] unless need_redo\n+ end\n+ if new_resource.list_options\n+ Chef::Log.info(\"Fetching chocolatey package list with options #{new_resource.list_options.inspect}\")\n+ else\n+ Chef::Log.info(\"Fetching chocolatey package list\")\n+ end\n \n- @available_packages = {}\n- package_name_array.each do |pkg|\n+ # Only reset the array if the cache is invalid - if we're just augmenting it, don't\n+ # clear it\n+ @@choco_available_packages = {} if @@choco_available_packages.nil? || !cache_is_valid?\n+ if @@choco_available_packages[new_resource.list_options].nil?\n+ @@choco_available_packages[new_resource.list_options] = {}\n+ end\n+\n+ # Attempt to get everything we need in a single query.\n+ # Grab 25 packages at a time at most, to avoid hurting servers too badly\n+ #\n+ # For v1 we actually used to grab the entire list of packages, but we found\n+ # that it could cause undue load on really large package lists\n+ collect_package_requests(\n+ ignore_list: @@choco_available_packages[new_resource.list_options].keys\n+ ).each_slice(25) do |pkg_set|\n available_versions =\n begin\n- cmd = [ query_command, \"-r\", pkg ]\n- cmd += common_options\n- cmd.push( new_resource.list_options ) if new_resource.list_options\n-\n- raw = parse_list_output(*cmd)\n- raw.keys.each_with_object({}) do |name, available|\n- available[name] = desired_name_versions[name] || raw[name]\n- end\n+ cmd = [ query_command, \"-r\" ]\n+\n+ # Chocolatey doesn't actually take a wildcard for this query, however\n+ # it will return all packages when using '*' as a query\n+ unless pkg_set == [\"*\"]\n+ cmd += pkg_set\n end\n- @available_packages.merge! available_versions\n+ cmd += common_options\n+ cmd.push( new_resource.list_options ) if new_resource.list_options\n+\n+ Chef::Log.debug(\"Choco List Command: #{cmd}\")\n+\n+ raw = parse_list_output(*cmd)\n+ raw.keys.each_with_object({}) do |name, available|\n+ available[name] = desired_name_versions[name] || raw[name]\n+ end\n+ end\n+ @@choco_available_packages[new_resource.list_options].merge!(available_versions)\n end\n- @available_packages\n+ # Mark the cache as valid, with the required metadata\n+ set_package_cache\n+ # Why both? So when we fail to find a package once, we don't try on every\n+ # retry, even though it would be reasonable to do so if queried in another\n+ # resource (because the chocolatey configuration may well have changed!)\n+ @available_packages = @@choco_available_packages[new_resource.list_options]\n end\n \n # Installed packages in chocolatey as a Hash of names mapped to versions\n- # (names are downcased for case-insensitive matching)\n+ # (names are downcased for case-insensitive matching). Depending on the user\n+ # preference, we get these either from the local database, or from the choco\n+ # list command\n #\n+ # @return [Hash] name-to-version mapping of installed packages\n+ def installed_packages\n+ if new_resource.use_choco_list == false || !Chef::Config[:always_use_choco_list]\n+ installed_packages_via_choco\n+ else\n+ installed_packages_via_disk\n+ end\n+ end\n+\n # Beginning with Choco 2.0, \"list\" returns local packages only while \"search\" returns packages from external package sources\n #\n # @return [Hash] name-to-version mapping of installed packages\n- def installed_packages\n+ def installed_packages_via_choco\n @installed_packages ||= Hash[*parse_list_output(\"list\", \"-l\", \"-r\").flatten]\n @installed_packages\n end\n \n+ # Return packages sourced from the local disk - because this doesn't have\n+ # shell out overhead, this ends up being a significant performance win\n+ # vs calling choco list\n+ #\n+ # @return [Hash] name-to-version mapping of installed packages\n+ def installed_packages_via_disk\n+ @installed_packages ||= begin\n+ targets = new_resource.name\n+ target_dirs = []\n+ # If we're using a single package name, have it at the head of the list\n+ # so we can get more performance\n+ target_dirs << targets.first.downcase if targets.length == 1", + "comment_created_at": "2023-10-27T17:03:53+00:00", + "comment_author": "jaymzjulian", + "comment_body": "This is an optimization to try and push our particular package to the front of the queue if there is one and only one package specified. If that doesn't happen, target_dirs instead gets filled at line 416 with:\r\n`target_dirs += get_local_pkg_dirs(choco_lib_path)`\r\n\r\nwhich gets all of the possible nupkg folders to search through. This takes the 90% case to being \"search half of the package folders on average\" to \"search one package on average\"\r\n\r\nEDIT: I added a comment to make this clearer", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1267241022", + "pr_number": 13833, + "pr_file": "lib/chef/provider/package/chocolatey.rb", + "created_at": "2023-07-18T19:46:48+00:00", + "commented_code": "cmd_args\n end\n\n # Choco V2 uses 'Search' for remote repositories and 'List' for local packages\n def query_command\n choco_version = powershell_exec!(\"choco --version\").result", + "repo_full_name": "chef/chef", + "discussion_comments": [ + { + "comment_id": "1267241022", + "repo_full_name": "chef/chef", + "pr_number": 13833, + "pr_file": "lib/chef/provider/package/chocolatey.rb", + "discussion_id": "1267241022", + "commented_code": "@@ -213,19 +213,26 @@ def cmd_args(include_source: true)\n cmd_args\n end\n \n+ # Choco V2 uses 'Search' for remote repositories and 'List' for local packages\n+ def query_command\n+ choco_version = powershell_exec!(\"choco --version\").result", + "comment_created_at": "2023-07-18T19:46:48+00:00", + "comment_author": "jaymzh", + "comment_body": "This needs to be memoized. Probably something like:\r\n\r\n```ruby\r\ndef choco_ver\r\n @choco_ver ||= begin\r\n powershell_exec!(\"choco --version\").result[0].to_i\r\n end\r\nend\r\n\r\ndef query_command\r\n choco_version == 1 ? 'list' : 'search'\r\nend\r\n```", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "633920952", + "pr_number": 11579, + "pr_file": "lib/chef/provider/mount/linux.rb", + "created_at": "2021-05-17T23:04:31+00:00", + "commented_code": "def mounted?\n mounted = false\n loop_mount_points = shell_out!(\"losetup --list\").stdout", + "repo_full_name": "chef/chef", + "discussion_comments": [ + { + "comment_id": "633920952", + "repo_full_name": "chef/chef", + "pr_number": 11579, + "pr_file": "lib/chef/provider/mount/linux.rb", + "discussion_id": "633920952", + "commented_code": "@@ -32,6 +32,7 @@ class Linux < Chef::Provider::Mount::Mount\n \n def mounted?\n mounted = false\n+ loop_mount_points = shell_out!(\"losetup --list\").stdout", + "comment_created_at": "2021-05-17T23:04:31+00:00", + "comment_author": "tas50", + "comment_body": "Won't this run every time even if we don't need the data now?", + "pr_file_module": null + }, + { + "comment_id": "634258415", + "repo_full_name": "chef/chef", + "pr_number": 11579, + "pr_file": "lib/chef/provider/mount/linux.rb", + "discussion_id": "633920952", + "commented_code": "@@ -32,6 +32,7 @@ class Linux < Chef::Provider::Mount::Mount\n \n def mounted?\n mounted = false\n+ loop_mount_points = shell_out!(\"losetup --list\").stdout", + "comment_created_at": "2021-05-18T10:40:47+00:00", + "comment_author": "msys-sgarg", + "comment_body": "That's right, earlier suggestion was to initialize this list of mount points as soon as the `Mount` class is initialised but there the specs fail.. Since rspec's mock up functionality is not ready by the time `Mount` class is initialized. By running it once here we make sure its not run in the loop while we are going through all the mounts points in system with command `findmnt -rn`.", + "pr_file_module": null + }, + { + "comment_id": "634911054", + "repo_full_name": "chef/chef", + "pr_number": 11579, + "pr_file": "lib/chef/provider/mount/linux.rb", + "discussion_id": "633920952", + "commented_code": "@@ -32,6 +32,7 @@ class Linux < Chef::Provider::Mount::Mount\n \n def mounted?\n mounted = false\n+ loop_mount_points = shell_out!(\"losetup --list\").stdout", + "comment_created_at": "2021-05-19T04:51:11+00:00", + "comment_author": "msys-sgarg", + "comment_body": "Also at this point of time in code its not possible for us to know if the mount is of type loop or not for sure . We need this data afterwards to make sure we have information of `loop` mount points", + "pr_file_module": null + }, + { + "comment_id": "636954878", + "repo_full_name": "chef/chef", + "pr_number": 11579, + "pr_file": "lib/chef/provider/mount/linux.rb", + "discussion_id": "633920952", + "commented_code": "@@ -32,6 +32,7 @@ class Linux < Chef::Provider::Mount::Mount\n \n def mounted?\n mounted = false\n+ loop_mount_points = shell_out!(\"losetup --list\").stdout", + "comment_created_at": "2021-05-21T14:15:46+00:00", + "comment_author": "msys-sgarg", + "comment_body": "@tas50 does the PR look good ? or any need of updates ?", + "pr_file_module": null + }, + { + "comment_id": "637259636", + "repo_full_name": "chef/chef", + "pr_number": 11579, + "pr_file": "lib/chef/provider/mount/linux.rb", + "discussion_id": "633920952", + "commented_code": "@@ -32,6 +32,7 @@ class Linux < Chef::Provider::Mount::Mount\n \n def mounted?\n mounted = false\n+ loop_mount_points = shell_out!(\"losetup --list\").stdout", + "comment_created_at": "2021-05-21T22:12:04+00:00", + "comment_author": "tas50", + "comment_body": "I still don't understand why we can't just do this shellout when we need the data vs. frontloading the work and doing extra shellouts we might not need.", + "pr_file_module": null + }, + { + "comment_id": "755491999", + "repo_full_name": "chef/chef", + "pr_number": 11579, + "pr_file": "lib/chef/provider/mount/linux.rb", + "discussion_id": "633920952", + "commented_code": "@@ -32,6 +32,7 @@ class Linux < Chef::Provider::Mount::Mount\n \n def mounted?\n mounted = false\n+ loop_mount_points = shell_out!(\"losetup --list\").stdout", + "comment_created_at": "2021-11-23T20:48:06+00:00", + "comment_author": "jaymzh", + "comment_body": "@tas50 because calling losetup 93048234092834 times is bad. I asked for that.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "637259296", + "pr_number": 11579, + "pr_file": "lib/chef/provider/mount/linux.rb", + "created_at": "2021-05-21T22:10:58+00:00", + "commented_code": "def mounted?\n mounted = false\n\n real_mount_point = if ::File.exists? @new_resource.mount_point\n ::File.realpath(@new_resource.mount_point)\n else\n @new_resource.mount_point\n end\n\n # get the output of losetup command to check loop mount points\n loop_mount_points = shell_out!(\"losetup -a\").stdout", + "repo_full_name": "chef/chef", + "discussion_comments": [ + { + "comment_id": "637259296", + "repo_full_name": "chef/chef", + "pr_number": 11579, + "pr_file": "lib/chef/provider/mount/linux.rb", + "discussion_id": "637259296", + "commented_code": "@@ -32,19 +33,29 @@ class Linux < Chef::Provider::Mount::Mount\n \n def mounted?\n mounted = false\n-\n real_mount_point = if ::File.exists? @new_resource.mount_point\n ::File.realpath(@new_resource.mount_point)\n else\n @new_resource.mount_point\n end\n \n+ # get the output of losetup command to check loop mount points\n+ loop_mount_points = shell_out!(\"losetup -a\").stdout", + "comment_created_at": "2021-05-21T22:10:58+00:00", + "comment_author": "tas50", + "comment_body": "This should really only be done when we need it down in the case statement on line 45. Otherwise we're shelling out here for no reason when the first when of the case statement is reached.", + "pr_file_module": null + }, + { + "comment_id": "637683618", + "repo_full_name": "chef/chef", + "pr_number": 11579, + "pr_file": "lib/chef/provider/mount/linux.rb", + "discussion_id": "637259296", + "commented_code": "@@ -32,19 +33,29 @@ class Linux < Chef::Provider::Mount::Mount\n \n def mounted?\n mounted = false\n-\n real_mount_point = if ::File.exists? @new_resource.mount_point\n ::File.realpath(@new_resource.mount_point)\n else\n @new_resource.mount_point\n end\n \n+ # get the output of losetup command to check loop mount points\n+ loop_mount_points = shell_out!(\"losetup -a\").stdout", + "comment_created_at": "2021-05-24T04:43:35+00:00", + "comment_author": "msys-sgarg", + "comment_body": "Hi, as per this comment on earlier PR [11376 comment](https://github.com/chef/chef/pull/11376#discussion_r616854963), it was not a good idea to keep the `shell_out` in case-statement, hence i moved it at the top of loop. ", + "pr_file_module": null + }, + { + "comment_id": "637731789", + "repo_full_name": "chef/chef", + "pr_number": 11579, + "pr_file": "lib/chef/provider/mount/linux.rb", + "discussion_id": "637259296", + "commented_code": "@@ -32,19 +33,29 @@ class Linux < Chef::Provider::Mount::Mount\n \n def mounted?\n mounted = false\n-\n real_mount_point = if ::File.exists? @new_resource.mount_point\n ::File.realpath(@new_resource.mount_point)\n else\n @new_resource.mount_point\n end\n \n+ # get the output of losetup command to check loop mount points\n+ loop_mount_points = shell_out!(\"losetup -a\").stdout", + "comment_created_at": "2021-05-24T07:01:16+00:00", + "comment_author": "msys-sgarg", + "comment_body": "How about this approach ? \r\n```\r\n def loop_mount_points\r\n #get loop_moint_points only if not initialized earlier\r\n @loop_mount_points ||= shell_out!(\"losetup -a\").stdout\r\n end\r\n\r\n def mounted?\r\n mounted = false\r\n real_mount_point = if ::File.exists? @new_resource.mount_point\r\n ::File.realpath(@new_resource.mount_point)\r\n else\r\n @new_resource.mount_point\r\n end\r\n\r\n shell_out!(\"findmnt -rn\").stdout.each_line do |line|\r\n case line\r\n # Permalink for device already mounted to mount point for : https://rubular.com/r/L0RNnD4gf2DJGl\r\n when /\\A#{Regexp.escape(real_mount_point)}\\s+#{device_mount_regex}\\s/\r\n mounted = true\r\n logger.trace(\"Special device #{device_logstring} mounted as #{real_mount_point}\")\r\n # Permalink for loop type devices mount points https://rubular.com/r/a0bS4p2RvXsGxx\r\n when %r{\\A#{Regexp.escape(real_mount_point)}\\s+\\/dev\\/loop+[0-9]+\\s}\r\n loop_mount_points.each_line do |mount_point|\r\n if mount_point.include? device_real\r\n mounted = true\r\n break\r\n end\r\n end\r\n ```", + "pr_file_module": null + }, + { + "comment_id": "639696774", + "repo_full_name": "chef/chef", + "pr_number": 11579, + "pr_file": "lib/chef/provider/mount/linux.rb", + "discussion_id": "637259296", + "commented_code": "@@ -32,19 +33,29 @@ class Linux < Chef::Provider::Mount::Mount\n \n def mounted?\n mounted = false\n-\n real_mount_point = if ::File.exists? @new_resource.mount_point\n ::File.realpath(@new_resource.mount_point)\n else\n @new_resource.mount_point\n end\n \n+ # get the output of losetup command to check loop mount points\n+ loop_mount_points = shell_out!(\"losetup -a\").stdout", + "comment_created_at": "2021-05-26T12:54:17+00:00", + "comment_author": "msys-sgarg", + "comment_body": "@tas50, please let me know if the above approach looks fine, i will make finalise the changes. Thanks", + "pr_file_module": null + }, + { + "comment_id": "648262847", + "repo_full_name": "chef/chef", + "pr_number": 11579, + "pr_file": "lib/chef/provider/mount/linux.rb", + "discussion_id": "637259296", + "commented_code": "@@ -32,19 +33,29 @@ class Linux < Chef::Provider::Mount::Mount\n \n def mounted?\n mounted = false\n-\n real_mount_point = if ::File.exists? @new_resource.mount_point\n ::File.realpath(@new_resource.mount_point)\n else\n @new_resource.mount_point\n end\n \n+ # get the output of losetup command to check loop mount points\n+ loop_mount_points = shell_out!(\"losetup -a\").stdout", + "comment_created_at": "2021-06-09T12:42:21+00:00", + "comment_author": "msys-sgarg", + "comment_body": "@tas50 Hi, shall I go ahead with this approach ? and update the PR ? ", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "549524521", + "pr_number": 10776, + "pr_file": "lib/chef/provider/service/systemd.rb", + "created_at": "2020-12-29T00:12:09+00:00", + "commented_code": "end\n end\n\n def systemd_service_status\n # If the status has already been collected, return that\n return @service_status unless @service_status.nil?\n\n # Collect all the status information for a service and returns it at once\n options, args = get_systemctl_options_args\n s = shell_out!(\"#{systemctl_path} #{args} show -p UnitFileState -p ActiveState #{new_resource.service_name}\", options)\n # e.g. /bin/systemctl --system show -p UnitFileState -p ActiveState sshd.service\n # Returns something like:\n # ActiveState=active\n # UnitFileState=enabled\n status = {}\n s.stdout.each_line do |line|\n k, v = line.strip.split(\"=\")\n status[k] = v\n end\n # Assert requisite keys exist\n unless status.key?(\"UnitFileState\") && status.key?(\"ActiveState\")\n raise Mixlib::ShellOut::ShellCommandFailed, \"'#{systemctl_path} show' not reporting status for #{new_resource.service_name}!\"\n end\n\n @service_status = status\n status\n end", + "repo_full_name": "chef/chef", + "discussion_comments": [ + { + "comment_id": "549524521", + "repo_full_name": "chef/chef", + "pr_number": 10776, + "pr_file": "lib/chef/provider/service/systemd.rb", + "discussion_id": "549524521", + "commented_code": "@@ -76,6 +76,31 @@ def define_resource_requirements\n end\n end\n \n+ def systemd_service_status\n+ # If the status has already been collected, return that\n+ return @service_status unless @service_status.nil?\n+\n+ # Collect all the status information for a service and returns it at once\n+ options, args = get_systemctl_options_args\n+ s = shell_out!(\"#{systemctl_path} #{args} show -p UnitFileState -p ActiveState #{new_resource.service_name}\", options)\n+ # e.g. /bin/systemctl --system show -p UnitFileState -p ActiveState sshd.service\n+ # Returns something like:\n+ # ActiveState=active\n+ # UnitFileState=enabled\n+ status = {}\n+ s.stdout.each_line do |line|\n+ k, v = line.strip.split(\"=\")\n+ status[k] = v\n+ end\n+ # Assert requisite keys exist\n+ unless status.key?(\"UnitFileState\") && status.key?(\"ActiveState\")\n+ raise Mixlib::ShellOut::ShellCommandFailed, \"'#{systemctl_path} show' not reporting status for #{new_resource.service_name}!\"\n+ end\n+\n+ @service_status = status\n+ status\n+ end", + "comment_created_at": "2020-12-29T00:12:09+00:00", + "comment_author": "phiggins", + "comment_body": "The idiomatic ruby way to write this is memoized with a block:\r\n\r\n```suggestion\r\n def systemd_service_status\r\n @systemd_service_status ||= begin\r\n # Collect all the status information for a service and returns it at once\r\n options, args = get_systemctl_options_args\r\n s = shell_out!(\"#{systemctl_path} #{args} show -p UnitFileState -p ActiveState #{new_resource.service_name}\", options)\r\n # e.g. /bin/systemctl --system show -p UnitFileState -p ActiveState sshd.service\r\n # Returns something like:\r\n # ActiveState=active\r\n # UnitFileState=enabled\r\n status = {}\r\n s.stdout.each_line do |line|\r\n k, v = line.strip.split(\"=\")\r\n status[k] = v\r\n end\r\n \r\n # Assert requisite keys exist\r\n unless status.key?(\"UnitFileState\") && status.key?(\"ActiveState\")\r\n raise Mixlib::ShellOut::ShellCommandFailed, \"'#{systemctl_path} show' not reporting status for #{new_resource.service_name}!\"\r\n end\r\n \r\n status\r\n end\r\n end\r\n```", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "551491650", + "pr_number": 10776, + "pr_file": "lib/chef/provider/service/systemd.rb", + "created_at": "2021-01-04T18:33:15+00:00", + "commented_code": "end\n end\n\n def systemd_service_status\n @service_status ||= begin\n # Collect all the status information for a service and returns it at once\n options, args = get_systemctl_options_args\n s = shell_out!(\"#{systemctl_path} #{args} show -p UnitFileState -p ActiveState #{new_resource.service_name}\", options)", + "repo_full_name": "chef/chef", + "discussion_comments": [ + { + "comment_id": "551491650", + "repo_full_name": "chef/chef", + "pr_number": 10776, + "pr_file": "lib/chef/provider/service/systemd.rb", + "discussion_id": "551491650", + "commented_code": "@@ -76,6 +76,30 @@ def define_resource_requirements\n end\n end\n \n+ def systemd_service_status\n+ @service_status ||= begin\n+ # Collect all the status information for a service and returns it at once\n+ options, args = get_systemctl_options_args\n+ s = shell_out!(\"#{systemctl_path} #{args} show -p UnitFileState -p ActiveState #{new_resource.service_name}\", options)", + "comment_created_at": "2021-01-04T18:33:15+00:00", + "comment_author": "lamont-granquist", + "comment_body": "you should use the *splat args style of calling here so:\r\n\r\n```\r\nshell_out!(systemctl_path, args, \"show\", \"-p\", \"UnitFileState\", \"-p\", \"ActiveState\", new_resource.service_name, options)\r\n```\r\n\r\nif that gets done correctly then it should avoid invoking the shell to parse the arguments, which should increase the perf even more (and has other benefits in not needing to worry about filenames with spaces and quotes around them, etc, etc).", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/chef-memoize-expensive-operations.md b/_reviewers/chef-memoize-expensive-operations.md index 41d72f0..9abc57a 100644 --- a/_reviewers/chef-memoize-expensive-operations.md +++ b/_reviewers/chef-memoize-expensive-operations.md @@ -47,259 +47,3 @@ target_dirs << targets.first.downcase if targets.length == 1 ``` Always benchmark critical code paths before and after optimization to verify improvements. - - -[ - { - "discussion_id": "1374582897", - "pr_number": 13972, - "pr_file": "lib/chef/provider/package/chocolatey.rb", - "created_at": "2023-10-27T13:28:46+00:00", - "commented_code": "#\n # @return [Hash] name-to-version mapping of available packages\n def available_packages\n return @available_packages if @available_packages\n return @available_packages unless @available_packages.nil?\n\n # @available_packages is per object - each resource is an object, meaning if you\n # have a LOT of chocolatey package installs, then this quickly gets very slow.\n # So we use @@choco_available_packages instead - BUT it's important to ensure that\n # the cache is valid before you do this. There are two cache items that can change:\n # a) the sources - we check this with cache_is_valid?\n if cache_is_valid? && @@choco_available_packages.is_a?(Hash) &&\n @@choco_available_packages[new_resource.list_options]\n\n # Ensure we have the package names, or else double check...\n need_redo = false\n package_name_array.each do |pkg|\n need_redo = true unless @@choco_available_packages[new_resource.list_options][pkg.downcase]\n end\n return @@choco_available_packages[new_resource.list_options] unless need_redo\n end\n if new_resource.list_options\n Chef::Log.info(\"Fetching chocolatey package list with options #{new_resource.list_options.inspect}\")\n else\n Chef::Log.info(\"Fetching chocolatey package list\")\n end\n\n @available_packages = {}\n package_name_array.each do |pkg|\n # Only reset the array if the cache is invalid - if we're just augmenting it, don't\n # clear it\n @@choco_available_packages = {} if @@choco_available_packages.nil? || !cache_is_valid?\n if @@choco_available_packages[new_resource.list_options].nil?\n @@choco_available_packages[new_resource.list_options] = {}\n end\n\n # Attempt to get everything we need in a single query.\n # Grab 25 packages at a time at most, to avoid hurting servers too badly\n #\n # For v1 we actually used to grab the entire list of packages, but we found\n # that it could cause undue load on really large package lists\n collect_package_requests(\n ignore_list: @@choco_available_packages[new_resource.list_options].keys\n ).each_slice(25) do |pkg_set|\n available_versions =\n begin\n cmd = [ query_command, \"-r\", pkg ]\n cmd += common_options\n cmd.push( new_resource.list_options ) if new_resource.list_options\n\n raw = parse_list_output(*cmd)\n raw.keys.each_with_object({}) do |name, available|\n available[name] = desired_name_versions[name] || raw[name]\n end\n cmd = [ query_command, \"-r\" ]\n\n # Chocolatey doesn't actually take a wildcard for this query, however\n # it will return all packages when using '*' as a query\n unless pkg_set == [\"*\"]\n cmd += pkg_set\n end\n @available_packages.merge! available_versions\n cmd += common_options\n cmd.push( new_resource.list_options ) if new_resource.list_options\n\n Chef::Log.debug(\"Choco List Command: #{cmd}\")\n\n raw = parse_list_output(*cmd)\n raw.keys.each_with_object({}) do |name, available|\n available[name] = desired_name_versions[name] || raw[name]\n end\n end\n @@choco_available_packages[new_resource.list_options].merge!(available_versions)\n end\n @available_packages\n # Mark the cache as valid, with the required metadata\n set_package_cache\n # Why both? So when we fail to find a package once, we don't try on every\n # retry, even though it would be reasonable to do so if queried in another\n # resource (because the chocolatey configuration may well have changed!)\n @available_packages = @@choco_available_packages[new_resource.list_options]\n end\n\n # Installed packages in chocolatey as a Hash of names mapped to versions\n # (names are downcased for case-insensitive matching)\n # (names are downcased for case-insensitive matching). Depending on the user\n # preference, we get these either from the local database, or from the choco\n # list command\n #\n # @return [Hash] name-to-version mapping of installed packages\n def installed_packages\n if new_resource.use_choco_list == false || !Chef::Config[:always_use_choco_list]\n installed_packages_via_choco\n else\n installed_packages_via_disk\n end\n end\n\n # Beginning with Choco 2.0, \"list\" returns local packages only while \"search\" returns packages from external package sources\n #\n # @return [Hash] name-to-version mapping of installed packages\n def installed_packages\n def installed_packages_via_choco\n @installed_packages ||= Hash[*parse_list_output(\"list\", \"-l\", \"-r\").flatten]\n @installed_packages\n end\n\n # Return packages sourced from the local disk - because this doesn't have\n # shell out overhead, this ends up being a significant performance win\n # vs calling choco list\n #\n # @return [Hash] name-to-version mapping of installed packages\n def installed_packages_via_disk\n @installed_packages ||= begin\n targets = new_resource.name\n target_dirs = []\n # If we're using a single package name, have it at the head of the list\n # so we can get more performance\n target_dirs << targets.first.downcase if targets.length == 1", - "repo_full_name": "chef/chef", - "discussion_comments": [ - { - "comment_id": "1374582897", - "repo_full_name": "chef/chef", - "pr_number": 13972, - "pr_file": "lib/chef/provider/package/chocolatey.rb", - "discussion_id": "1374582897", - "commented_code": "@@ -230,37 +309,124 @@ def cmd_args(include_source: true)\n #\n # @return [Hash] name-to-version mapping of available packages\n def available_packages\n- return @available_packages if @available_packages\n+ return @available_packages unless @available_packages.nil?\n+\n+ # @available_packages is per object - each resource is an object, meaning if you\n+ # have a LOT of chocolatey package installs, then this quickly gets very slow.\n+ # So we use @@choco_available_packages instead - BUT it's important to ensure that\n+ # the cache is valid before you do this. There are two cache items that can change:\n+ # a) the sources - we check this with cache_is_valid?\n+ if cache_is_valid? && @@choco_available_packages.is_a?(Hash) &&\n+ @@choco_available_packages[new_resource.list_options]\n+\n+ # Ensure we have the package names, or else double check...\n+ need_redo = false\n+ package_name_array.each do |pkg|\n+ need_redo = true unless @@choco_available_packages[new_resource.list_options][pkg.downcase]\n+ end\n+ return @@choco_available_packages[new_resource.list_options] unless need_redo\n+ end\n+ if new_resource.list_options\n+ Chef::Log.info(\"Fetching chocolatey package list with options #{new_resource.list_options.inspect}\")\n+ else\n+ Chef::Log.info(\"Fetching chocolatey package list\")\n+ end\n \n- @available_packages = {}\n- package_name_array.each do |pkg|\n+ # Only reset the array if the cache is invalid - if we're just augmenting it, don't\n+ # clear it\n+ @@choco_available_packages = {} if @@choco_available_packages.nil? || !cache_is_valid?\n+ if @@choco_available_packages[new_resource.list_options].nil?\n+ @@choco_available_packages[new_resource.list_options] = {}\n+ end\n+\n+ # Attempt to get everything we need in a single query.\n+ # Grab 25 packages at a time at most, to avoid hurting servers too badly\n+ #\n+ # For v1 we actually used to grab the entire list of packages, but we found\n+ # that it could cause undue load on really large package lists\n+ collect_package_requests(\n+ ignore_list: @@choco_available_packages[new_resource.list_options].keys\n+ ).each_slice(25) do |pkg_set|\n available_versions =\n begin\n- cmd = [ query_command, \"-r\", pkg ]\n- cmd += common_options\n- cmd.push( new_resource.list_options ) if new_resource.list_options\n-\n- raw = parse_list_output(*cmd)\n- raw.keys.each_with_object({}) do |name, available|\n- available[name] = desired_name_versions[name] || raw[name]\n- end\n+ cmd = [ query_command, \"-r\" ]\n+\n+ # Chocolatey doesn't actually take a wildcard for this query, however\n+ # it will return all packages when using '*' as a query\n+ unless pkg_set == [\"*\"]\n+ cmd += pkg_set\n end\n- @available_packages.merge! available_versions\n+ cmd += common_options\n+ cmd.push( new_resource.list_options ) if new_resource.list_options\n+\n+ Chef::Log.debug(\"Choco List Command: #{cmd}\")\n+\n+ raw = parse_list_output(*cmd)\n+ raw.keys.each_with_object({}) do |name, available|\n+ available[name] = desired_name_versions[name] || raw[name]\n+ end\n+ end\n+ @@choco_available_packages[new_resource.list_options].merge!(available_versions)\n end\n- @available_packages\n+ # Mark the cache as valid, with the required metadata\n+ set_package_cache\n+ # Why both? So when we fail to find a package once, we don't try on every\n+ # retry, even though it would be reasonable to do so if queried in another\n+ # resource (because the chocolatey configuration may well have changed!)\n+ @available_packages = @@choco_available_packages[new_resource.list_options]\n end\n \n # Installed packages in chocolatey as a Hash of names mapped to versions\n- # (names are downcased for case-insensitive matching)\n+ # (names are downcased for case-insensitive matching). Depending on the user\n+ # preference, we get these either from the local database, or from the choco\n+ # list command\n #\n+ # @return [Hash] name-to-version mapping of installed packages\n+ def installed_packages\n+ if new_resource.use_choco_list == false || !Chef::Config[:always_use_choco_list]\n+ installed_packages_via_choco\n+ else\n+ installed_packages_via_disk\n+ end\n+ end\n+\n # Beginning with Choco 2.0, \"list\" returns local packages only while \"search\" returns packages from external package sources\n #\n # @return [Hash] name-to-version mapping of installed packages\n- def installed_packages\n+ def installed_packages_via_choco\n @installed_packages ||= Hash[*parse_list_output(\"list\", \"-l\", \"-r\").flatten]\n @installed_packages\n end\n \n+ # Return packages sourced from the local disk - because this doesn't have\n+ # shell out overhead, this ends up being a significant performance win\n+ # vs calling choco list\n+ #\n+ # @return [Hash] name-to-version mapping of installed packages\n+ def installed_packages_via_disk\n+ @installed_packages ||= begin\n+ targets = new_resource.name\n+ target_dirs = []\n+ # If we're using a single package name, have it at the head of the list\n+ # so we can get more performance\n+ target_dirs << targets.first.downcase if targets.length == 1", - "comment_created_at": "2023-10-27T13:28:46+00:00", - "comment_author": "tpowell-progress", - "comment_body": "@jaymzjulian ok, one thing I'm not understanding in the current context is... is there ever an `(array length > 1)` scenario, and if so, what happens then?", - "pr_file_module": null - }, - { - "comment_id": "1374840040", - "repo_full_name": "chef/chef", - "pr_number": 13972, - "pr_file": "lib/chef/provider/package/chocolatey.rb", - "discussion_id": "1374582897", - "commented_code": "@@ -230,37 +309,124 @@ def cmd_args(include_source: true)\n #\n # @return [Hash] name-to-version mapping of available packages\n def available_packages\n- return @available_packages if @available_packages\n+ return @available_packages unless @available_packages.nil?\n+\n+ # @available_packages is per object - each resource is an object, meaning if you\n+ # have a LOT of chocolatey package installs, then this quickly gets very slow.\n+ # So we use @@choco_available_packages instead - BUT it's important to ensure that\n+ # the cache is valid before you do this. There are two cache items that can change:\n+ # a) the sources - we check this with cache_is_valid?\n+ if cache_is_valid? && @@choco_available_packages.is_a?(Hash) &&\n+ @@choco_available_packages[new_resource.list_options]\n+\n+ # Ensure we have the package names, or else double check...\n+ need_redo = false\n+ package_name_array.each do |pkg|\n+ need_redo = true unless @@choco_available_packages[new_resource.list_options][pkg.downcase]\n+ end\n+ return @@choco_available_packages[new_resource.list_options] unless need_redo\n+ end\n+ if new_resource.list_options\n+ Chef::Log.info(\"Fetching chocolatey package list with options #{new_resource.list_options.inspect}\")\n+ else\n+ Chef::Log.info(\"Fetching chocolatey package list\")\n+ end\n \n- @available_packages = {}\n- package_name_array.each do |pkg|\n+ # Only reset the array if the cache is invalid - if we're just augmenting it, don't\n+ # clear it\n+ @@choco_available_packages = {} if @@choco_available_packages.nil? || !cache_is_valid?\n+ if @@choco_available_packages[new_resource.list_options].nil?\n+ @@choco_available_packages[new_resource.list_options] = {}\n+ end\n+\n+ # Attempt to get everything we need in a single query.\n+ # Grab 25 packages at a time at most, to avoid hurting servers too badly\n+ #\n+ # For v1 we actually used to grab the entire list of packages, but we found\n+ # that it could cause undue load on really large package lists\n+ collect_package_requests(\n+ ignore_list: @@choco_available_packages[new_resource.list_options].keys\n+ ).each_slice(25) do |pkg_set|\n available_versions =\n begin\n- cmd = [ query_command, \"-r\", pkg ]\n- cmd += common_options\n- cmd.push( new_resource.list_options ) if new_resource.list_options\n-\n- raw = parse_list_output(*cmd)\n- raw.keys.each_with_object({}) do |name, available|\n- available[name] = desired_name_versions[name] || raw[name]\n- end\n+ cmd = [ query_command, \"-r\" ]\n+\n+ # Chocolatey doesn't actually take a wildcard for this query, however\n+ # it will return all packages when using '*' as a query\n+ unless pkg_set == [\"*\"]\n+ cmd += pkg_set\n end\n- @available_packages.merge! available_versions\n+ cmd += common_options\n+ cmd.push( new_resource.list_options ) if new_resource.list_options\n+\n+ Chef::Log.debug(\"Choco List Command: #{cmd}\")\n+\n+ raw = parse_list_output(*cmd)\n+ raw.keys.each_with_object({}) do |name, available|\n+ available[name] = desired_name_versions[name] || raw[name]\n+ end\n+ end\n+ @@choco_available_packages[new_resource.list_options].merge!(available_versions)\n end\n- @available_packages\n+ # Mark the cache as valid, with the required metadata\n+ set_package_cache\n+ # Why both? So when we fail to find a package once, we don't try on every\n+ # retry, even though it would be reasonable to do so if queried in another\n+ # resource (because the chocolatey configuration may well have changed!)\n+ @available_packages = @@choco_available_packages[new_resource.list_options]\n end\n \n # Installed packages in chocolatey as a Hash of names mapped to versions\n- # (names are downcased for case-insensitive matching)\n+ # (names are downcased for case-insensitive matching). Depending on the user\n+ # preference, we get these either from the local database, or from the choco\n+ # list command\n #\n+ # @return [Hash] name-to-version mapping of installed packages\n+ def installed_packages\n+ if new_resource.use_choco_list == false || !Chef::Config[:always_use_choco_list]\n+ installed_packages_via_choco\n+ else\n+ installed_packages_via_disk\n+ end\n+ end\n+\n # Beginning with Choco 2.0, \"list\" returns local packages only while \"search\" returns packages from external package sources\n #\n # @return [Hash] name-to-version mapping of installed packages\n- def installed_packages\n+ def installed_packages_via_choco\n @installed_packages ||= Hash[*parse_list_output(\"list\", \"-l\", \"-r\").flatten]\n @installed_packages\n end\n \n+ # Return packages sourced from the local disk - because this doesn't have\n+ # shell out overhead, this ends up being a significant performance win\n+ # vs calling choco list\n+ #\n+ # @return [Hash] name-to-version mapping of installed packages\n+ def installed_packages_via_disk\n+ @installed_packages ||= begin\n+ targets = new_resource.name\n+ target_dirs = []\n+ # If we're using a single package name, have it at the head of the list\n+ # so we can get more performance\n+ target_dirs << targets.first.downcase if targets.length == 1", - "comment_created_at": "2023-10-27T17:03:53+00:00", - "comment_author": "jaymzjulian", - "comment_body": "This is an optimization to try and push our particular package to the front of the queue if there is one and only one package specified. If that doesn't happen, target_dirs instead gets filled at line 416 with:\r\n`target_dirs += get_local_pkg_dirs(choco_lib_path)`\r\n\r\nwhich gets all of the possible nupkg folders to search through. This takes the 90% case to being \"search half of the package folders on average\" to \"search one package on average\"\r\n\r\nEDIT: I added a comment to make this clearer", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1267241022", - "pr_number": 13833, - "pr_file": "lib/chef/provider/package/chocolatey.rb", - "created_at": "2023-07-18T19:46:48+00:00", - "commented_code": "cmd_args\n end\n\n # Choco V2 uses 'Search' for remote repositories and 'List' for local packages\n def query_command\n choco_version = powershell_exec!(\"choco --version\").result", - "repo_full_name": "chef/chef", - "discussion_comments": [ - { - "comment_id": "1267241022", - "repo_full_name": "chef/chef", - "pr_number": 13833, - "pr_file": "lib/chef/provider/package/chocolatey.rb", - "discussion_id": "1267241022", - "commented_code": "@@ -213,19 +213,26 @@ def cmd_args(include_source: true)\n cmd_args\n end\n \n+ # Choco V2 uses 'Search' for remote repositories and 'List' for local packages\n+ def query_command\n+ choco_version = powershell_exec!(\"choco --version\").result", - "comment_created_at": "2023-07-18T19:46:48+00:00", - "comment_author": "jaymzh", - "comment_body": "This needs to be memoized. Probably something like:\r\n\r\n```ruby\r\ndef choco_ver\r\n @choco_ver ||= begin\r\n powershell_exec!(\"choco --version\").result[0].to_i\r\n end\r\nend\r\n\r\ndef query_command\r\n choco_version == 1 ? 'list' : 'search'\r\nend\r\n```", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "633920952", - "pr_number": 11579, - "pr_file": "lib/chef/provider/mount/linux.rb", - "created_at": "2021-05-17T23:04:31+00:00", - "commented_code": "def mounted?\n mounted = false\n loop_mount_points = shell_out!(\"losetup --list\").stdout", - "repo_full_name": "chef/chef", - "discussion_comments": [ - { - "comment_id": "633920952", - "repo_full_name": "chef/chef", - "pr_number": 11579, - "pr_file": "lib/chef/provider/mount/linux.rb", - "discussion_id": "633920952", - "commented_code": "@@ -32,6 +32,7 @@ class Linux < Chef::Provider::Mount::Mount\n \n def mounted?\n mounted = false\n+ loop_mount_points = shell_out!(\"losetup --list\").stdout", - "comment_created_at": "2021-05-17T23:04:31+00:00", - "comment_author": "tas50", - "comment_body": "Won't this run every time even if we don't need the data now?", - "pr_file_module": null - }, - { - "comment_id": "634258415", - "repo_full_name": "chef/chef", - "pr_number": 11579, - "pr_file": "lib/chef/provider/mount/linux.rb", - "discussion_id": "633920952", - "commented_code": "@@ -32,6 +32,7 @@ class Linux < Chef::Provider::Mount::Mount\n \n def mounted?\n mounted = false\n+ loop_mount_points = shell_out!(\"losetup --list\").stdout", - "comment_created_at": "2021-05-18T10:40:47+00:00", - "comment_author": "msys-sgarg", - "comment_body": "That's right, earlier suggestion was to initialize this list of mount points as soon as the `Mount` class is initialised but there the specs fail.. Since rspec's mock up functionality is not ready by the time `Mount` class is initialized. By running it once here we make sure its not run in the loop while we are going through all the mounts points in system with command `findmnt -rn`.", - "pr_file_module": null - }, - { - "comment_id": "634911054", - "repo_full_name": "chef/chef", - "pr_number": 11579, - "pr_file": "lib/chef/provider/mount/linux.rb", - "discussion_id": "633920952", - "commented_code": "@@ -32,6 +32,7 @@ class Linux < Chef::Provider::Mount::Mount\n \n def mounted?\n mounted = false\n+ loop_mount_points = shell_out!(\"losetup --list\").stdout", - "comment_created_at": "2021-05-19T04:51:11+00:00", - "comment_author": "msys-sgarg", - "comment_body": "Also at this point of time in code its not possible for us to know if the mount is of type loop or not for sure . We need this data afterwards to make sure we have information of `loop` mount points", - "pr_file_module": null - }, - { - "comment_id": "636954878", - "repo_full_name": "chef/chef", - "pr_number": 11579, - "pr_file": "lib/chef/provider/mount/linux.rb", - "discussion_id": "633920952", - "commented_code": "@@ -32,6 +32,7 @@ class Linux < Chef::Provider::Mount::Mount\n \n def mounted?\n mounted = false\n+ loop_mount_points = shell_out!(\"losetup --list\").stdout", - "comment_created_at": "2021-05-21T14:15:46+00:00", - "comment_author": "msys-sgarg", - "comment_body": "@tas50 does the PR look good ? or any need of updates ?", - "pr_file_module": null - }, - { - "comment_id": "637259636", - "repo_full_name": "chef/chef", - "pr_number": 11579, - "pr_file": "lib/chef/provider/mount/linux.rb", - "discussion_id": "633920952", - "commented_code": "@@ -32,6 +32,7 @@ class Linux < Chef::Provider::Mount::Mount\n \n def mounted?\n mounted = false\n+ loop_mount_points = shell_out!(\"losetup --list\").stdout", - "comment_created_at": "2021-05-21T22:12:04+00:00", - "comment_author": "tas50", - "comment_body": "I still don't understand why we can't just do this shellout when we need the data vs. frontloading the work and doing extra shellouts we might not need.", - "pr_file_module": null - }, - { - "comment_id": "755491999", - "repo_full_name": "chef/chef", - "pr_number": 11579, - "pr_file": "lib/chef/provider/mount/linux.rb", - "discussion_id": "633920952", - "commented_code": "@@ -32,6 +32,7 @@ class Linux < Chef::Provider::Mount::Mount\n \n def mounted?\n mounted = false\n+ loop_mount_points = shell_out!(\"losetup --list\").stdout", - "comment_created_at": "2021-11-23T20:48:06+00:00", - "comment_author": "jaymzh", - "comment_body": "@tas50 because calling losetup 93048234092834 times is bad. I asked for that.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "637259296", - "pr_number": 11579, - "pr_file": "lib/chef/provider/mount/linux.rb", - "created_at": "2021-05-21T22:10:58+00:00", - "commented_code": "def mounted?\n mounted = false\n\n real_mount_point = if ::File.exists? @new_resource.mount_point\n ::File.realpath(@new_resource.mount_point)\n else\n @new_resource.mount_point\n end\n\n # get the output of losetup command to check loop mount points\n loop_mount_points = shell_out!(\"losetup -a\").stdout", - "repo_full_name": "chef/chef", - "discussion_comments": [ - { - "comment_id": "637259296", - "repo_full_name": "chef/chef", - "pr_number": 11579, - "pr_file": "lib/chef/provider/mount/linux.rb", - "discussion_id": "637259296", - "commented_code": "@@ -32,19 +33,29 @@ class Linux < Chef::Provider::Mount::Mount\n \n def mounted?\n mounted = false\n-\n real_mount_point = if ::File.exists? @new_resource.mount_point\n ::File.realpath(@new_resource.mount_point)\n else\n @new_resource.mount_point\n end\n \n+ # get the output of losetup command to check loop mount points\n+ loop_mount_points = shell_out!(\"losetup -a\").stdout", - "comment_created_at": "2021-05-21T22:10:58+00:00", - "comment_author": "tas50", - "comment_body": "This should really only be done when we need it down in the case statement on line 45. Otherwise we're shelling out here for no reason when the first when of the case statement is reached.", - "pr_file_module": null - }, - { - "comment_id": "637683618", - "repo_full_name": "chef/chef", - "pr_number": 11579, - "pr_file": "lib/chef/provider/mount/linux.rb", - "discussion_id": "637259296", - "commented_code": "@@ -32,19 +33,29 @@ class Linux < Chef::Provider::Mount::Mount\n \n def mounted?\n mounted = false\n-\n real_mount_point = if ::File.exists? @new_resource.mount_point\n ::File.realpath(@new_resource.mount_point)\n else\n @new_resource.mount_point\n end\n \n+ # get the output of losetup command to check loop mount points\n+ loop_mount_points = shell_out!(\"losetup -a\").stdout", - "comment_created_at": "2021-05-24T04:43:35+00:00", - "comment_author": "msys-sgarg", - "comment_body": "Hi, as per this comment on earlier PR [11376 comment](https://github.com/chef/chef/pull/11376#discussion_r616854963), it was not a good idea to keep the `shell_out` in case-statement, hence i moved it at the top of loop. ", - "pr_file_module": null - }, - { - "comment_id": "637731789", - "repo_full_name": "chef/chef", - "pr_number": 11579, - "pr_file": "lib/chef/provider/mount/linux.rb", - "discussion_id": "637259296", - "commented_code": "@@ -32,19 +33,29 @@ class Linux < Chef::Provider::Mount::Mount\n \n def mounted?\n mounted = false\n-\n real_mount_point = if ::File.exists? @new_resource.mount_point\n ::File.realpath(@new_resource.mount_point)\n else\n @new_resource.mount_point\n end\n \n+ # get the output of losetup command to check loop mount points\n+ loop_mount_points = shell_out!(\"losetup -a\").stdout", - "comment_created_at": "2021-05-24T07:01:16+00:00", - "comment_author": "msys-sgarg", - "comment_body": "How about this approach ? \r\n```\r\n def loop_mount_points\r\n #get loop_moint_points only if not initialized earlier\r\n @loop_mount_points ||= shell_out!(\"losetup -a\").stdout\r\n end\r\n\r\n def mounted?\r\n mounted = false\r\n real_mount_point = if ::File.exists? @new_resource.mount_point\r\n ::File.realpath(@new_resource.mount_point)\r\n else\r\n @new_resource.mount_point\r\n end\r\n\r\n shell_out!(\"findmnt -rn\").stdout.each_line do |line|\r\n case line\r\n # Permalink for device already mounted to mount point for : https://rubular.com/r/L0RNnD4gf2DJGl\r\n when /\\A#{Regexp.escape(real_mount_point)}\\s+#{device_mount_regex}\\s/\r\n mounted = true\r\n logger.trace(\"Special device #{device_logstring} mounted as #{real_mount_point}\")\r\n # Permalink for loop type devices mount points https://rubular.com/r/a0bS4p2RvXsGxx\r\n when %r{\\A#{Regexp.escape(real_mount_point)}\\s+\\/dev\\/loop+[0-9]+\\s}\r\n loop_mount_points.each_line do |mount_point|\r\n if mount_point.include? device_real\r\n mounted = true\r\n break\r\n end\r\n end\r\n ```", - "pr_file_module": null - }, - { - "comment_id": "639696774", - "repo_full_name": "chef/chef", - "pr_number": 11579, - "pr_file": "lib/chef/provider/mount/linux.rb", - "discussion_id": "637259296", - "commented_code": "@@ -32,19 +33,29 @@ class Linux < Chef::Provider::Mount::Mount\n \n def mounted?\n mounted = false\n-\n real_mount_point = if ::File.exists? @new_resource.mount_point\n ::File.realpath(@new_resource.mount_point)\n else\n @new_resource.mount_point\n end\n \n+ # get the output of losetup command to check loop mount points\n+ loop_mount_points = shell_out!(\"losetup -a\").stdout", - "comment_created_at": "2021-05-26T12:54:17+00:00", - "comment_author": "msys-sgarg", - "comment_body": "@tas50, please let me know if the above approach looks fine, i will make finalise the changes. Thanks", - "pr_file_module": null - }, - { - "comment_id": "648262847", - "repo_full_name": "chef/chef", - "pr_number": 11579, - "pr_file": "lib/chef/provider/mount/linux.rb", - "discussion_id": "637259296", - "commented_code": "@@ -32,19 +33,29 @@ class Linux < Chef::Provider::Mount::Mount\n \n def mounted?\n mounted = false\n-\n real_mount_point = if ::File.exists? @new_resource.mount_point\n ::File.realpath(@new_resource.mount_point)\n else\n @new_resource.mount_point\n end\n \n+ # get the output of losetup command to check loop mount points\n+ loop_mount_points = shell_out!(\"losetup -a\").stdout", - "comment_created_at": "2021-06-09T12:42:21+00:00", - "comment_author": "msys-sgarg", - "comment_body": "@tas50 Hi, shall I go ahead with this approach ? and update the PR ? ", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "549524521", - "pr_number": 10776, - "pr_file": "lib/chef/provider/service/systemd.rb", - "created_at": "2020-12-29T00:12:09+00:00", - "commented_code": "end\n end\n\n def systemd_service_status\n # If the status has already been collected, return that\n return @service_status unless @service_status.nil?\n\n # Collect all the status information for a service and returns it at once\n options, args = get_systemctl_options_args\n s = shell_out!(\"#{systemctl_path} #{args} show -p UnitFileState -p ActiveState #{new_resource.service_name}\", options)\n # e.g. /bin/systemctl --system show -p UnitFileState -p ActiveState sshd.service\n # Returns something like:\n # ActiveState=active\n # UnitFileState=enabled\n status = {}\n s.stdout.each_line do |line|\n k, v = line.strip.split(\"=\")\n status[k] = v\n end\n # Assert requisite keys exist\n unless status.key?(\"UnitFileState\") && status.key?(\"ActiveState\")\n raise Mixlib::ShellOut::ShellCommandFailed, \"'#{systemctl_path} show' not reporting status for #{new_resource.service_name}!\"\n end\n\n @service_status = status\n status\n end", - "repo_full_name": "chef/chef", - "discussion_comments": [ - { - "comment_id": "549524521", - "repo_full_name": "chef/chef", - "pr_number": 10776, - "pr_file": "lib/chef/provider/service/systemd.rb", - "discussion_id": "549524521", - "commented_code": "@@ -76,6 +76,31 @@ def define_resource_requirements\n end\n end\n \n+ def systemd_service_status\n+ # If the status has already been collected, return that\n+ return @service_status unless @service_status.nil?\n+\n+ # Collect all the status information for a service and returns it at once\n+ options, args = get_systemctl_options_args\n+ s = shell_out!(\"#{systemctl_path} #{args} show -p UnitFileState -p ActiveState #{new_resource.service_name}\", options)\n+ # e.g. /bin/systemctl --system show -p UnitFileState -p ActiveState sshd.service\n+ # Returns something like:\n+ # ActiveState=active\n+ # UnitFileState=enabled\n+ status = {}\n+ s.stdout.each_line do |line|\n+ k, v = line.strip.split(\"=\")\n+ status[k] = v\n+ end\n+ # Assert requisite keys exist\n+ unless status.key?(\"UnitFileState\") && status.key?(\"ActiveState\")\n+ raise Mixlib::ShellOut::ShellCommandFailed, \"'#{systemctl_path} show' not reporting status for #{new_resource.service_name}!\"\n+ end\n+\n+ @service_status = status\n+ status\n+ end", - "comment_created_at": "2020-12-29T00:12:09+00:00", - "comment_author": "phiggins", - "comment_body": "The idiomatic ruby way to write this is memoized with a block:\r\n\r\n```suggestion\r\n def systemd_service_status\r\n @systemd_service_status ||= begin\r\n # Collect all the status information for a service and returns it at once\r\n options, args = get_systemctl_options_args\r\n s = shell_out!(\"#{systemctl_path} #{args} show -p UnitFileState -p ActiveState #{new_resource.service_name}\", options)\r\n # e.g. /bin/systemctl --system show -p UnitFileState -p ActiveState sshd.service\r\n # Returns something like:\r\n # ActiveState=active\r\n # UnitFileState=enabled\r\n status = {}\r\n s.stdout.each_line do |line|\r\n k, v = line.strip.split(\"=\")\r\n status[k] = v\r\n end\r\n \r\n # Assert requisite keys exist\r\n unless status.key?(\"UnitFileState\") && status.key?(\"ActiveState\")\r\n raise Mixlib::ShellOut::ShellCommandFailed, \"'#{systemctl_path} show' not reporting status for #{new_resource.service_name}!\"\r\n end\r\n \r\n status\r\n end\r\n end\r\n```", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "551491650", - "pr_number": 10776, - "pr_file": "lib/chef/provider/service/systemd.rb", - "created_at": "2021-01-04T18:33:15+00:00", - "commented_code": "end\n end\n\n def systemd_service_status\n @service_status ||= begin\n # Collect all the status information for a service and returns it at once\n options, args = get_systemctl_options_args\n s = shell_out!(\"#{systemctl_path} #{args} show -p UnitFileState -p ActiveState #{new_resource.service_name}\", options)", - "repo_full_name": "chef/chef", - "discussion_comments": [ - { - "comment_id": "551491650", - "repo_full_name": "chef/chef", - "pr_number": 10776, - "pr_file": "lib/chef/provider/service/systemd.rb", - "discussion_id": "551491650", - "commented_code": "@@ -76,6 +76,30 @@ def define_resource_requirements\n end\n end\n \n+ def systemd_service_status\n+ @service_status ||= begin\n+ # Collect all the status information for a service and returns it at once\n+ options, args = get_systemctl_options_args\n+ s = shell_out!(\"#{systemctl_path} #{args} show -p UnitFileState -p ActiveState #{new_resource.service_name}\", options)", - "comment_created_at": "2021-01-04T18:33:15+00:00", - "comment_author": "lamont-granquist", - "comment_body": "you should use the *splat args style of calling here so:\r\n\r\n```\r\nshell_out!(systemctl_path, args, \"show\", \"-p\", \"UnitFileState\", \"-p\", \"ActiveState\", new_resource.service_name, options)\r\n```\r\n\r\nif that gets done correctly then it should avoid invoking the shell to parse the arguments, which should increase the perf even more (and has other benefits in not needing to worry about filenames with spaces and quotes around them, etc, etc).", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/chef-never-commit-secrets.json b/_reviewers/chef-never-commit-secrets.json new file mode 100644 index 0000000..8562fdb --- /dev/null +++ b/_reviewers/chef-never-commit-secrets.json @@ -0,0 +1,134 @@ +[ + { + "discussion_id": "2162467475", + "pr_number": 15058, + "pr_file": "spec/data/trusted_certs/intermediate.pem", + "created_at": "2025-06-23T20:24:41+00:00", + "commented_code": "-----BEGIN CERTIFICATE-----\r\nMIIGrTCCBJWgAwIBAgIQDo0oQK5IJZBWGLOoqeF6RzANBgkqhkiG9w0BAQwFADBJ\r\nMQswCQYDVQQGEwJVUzEXMBUGA1UEChMORGlnaUNlcnQsIEluYy4xITAfBgNVBAMT\r\nGERpZ2lDZXJ0IFJTQTQwOTYgUm9vdCBHNTAeFw0yMTA0MTQwMDAwMDBaFw0zMTA0\r\nMTMyMzU5NTlaMFQxCzAJBgNVBAYTAlVTMRcwFQYDVQQKEw5EaWdpQ2VydCwgSW5j\r\nLjEsMCoGA1UEAxMjRGlnaUNlcnQgRzUgUlNBNDA5NiBTSEEzODQgMjAyMSBDQTEw\r\nggIiMA0GCSqGSIb3DQEBAQUAA4ICDwAwggIKAoICAQDCwLlUmeGwUTj93uzejg2I\r\ntHjaSqm+knZ8az09cBAZFLFU9sKDzBHgf43/GpIWIHGLDUGXXZkKtkjJhl6POqda\r\nXWt/4avSsQgkELz2uefSxhzELBl4o1U50EULTlri3zUBQ11Jr/hfJLxdMAJqKv21\r\niVD8GfFDs12Hy08h7IxuA5ROVdBQS2OiU/6Vd4A3uVpzyjaxQsfAvkwz9+3jsozf\r\nG+kWW+6Fxa3Vt4EbX+3afaBLeIyBlQvPd3pUY8irY3T6MHlglEblraxyGZ3ifvFu\r\nVt7S98D5+U4CMFzzGSzCCqMxTkgasTMhP8+PjXRN+mL56xyfw/uVmN9vRPqgbRUD\r\ng95zx+CRFXgpUQ8yslpl+ECSqCe0cYxm+jWz00VFWtUZAwpE4REGOVdmNGrfNR16\r\nh7dggpFVfeFy7qCwd9up/sWkBmkZB1zL9ENjg68EH5aEbh+jlbF6HuLv4+jibVlD\r\n/r+ZW/vJgnMXmUYW1gDl3L//vQ/V4ElqRYzxsSVsq3dwW0SYzI31PKFEb8sqI5IN\r\nP10MtFtZ1DgISF9I8LJ35dBDqguoonGC0/d+iq2S7ipcpFIo/u3tK/Nu0QvKMEN6\r\nDlx6Yhssscj2PhiADKjhRnweWUj/2eKuX8Cb6UmXvh+R4Dm0iEIGop1/r37GUo0z\r\nnqNszrYZz1zd4GWG6puFWQIDAQABo4IBhDCCAYAwEgYDVR0TAQH/BAgwBgEB/wIB\r\nADAdBgNVHQ4EFgQUbYE39zhEfkdCe1al7Lt3ZyEJ9DwwHwYDVR0jBBgwFoAUYm23\r\nkU/E6qNiYI+g0L61jwZ8aAAwDgYDVR0PAQH/BAQDAgGGMB0GA1UdJQQWMBQGCCsG\r\nAQUFBwMBBggrBgEFBQcDAjB3BggrBgEFBQcBAQRrMGkwJAYIKwYBBQUHMAGGGGh0\r\ndHA6Ly9vY3NwLmRpZ2ljZXJ0LmNvbTBBBggrBgEFBQcwAoY1aHR0cDovL2NhY2Vy\r\ndHMuZGlnaWNlcnQuY29tL0RpZ2lDZXJ0UlNBNDA5NlJvb3RHNS5jcnQwQwYDVR0f\r\nBDwwOjA4oDagNIYyaHR0cDovL2NybDMuZGlnaWNlcnQuY29tL0RpZ2lDZXJ0UlNB\r\nNDA5NlJvb3RHNS5jcmwwPQYDVR0gBDYwNDALBglghkgBhv1sAgEwBwYFZ4EMAQEw\r\nCAYGZ4EMAQIBMAgGBmeBDAECAjAIBgZngQwBAgMwDQYJKoZIhvcNAQEMBQADggIB\r\nAGHJE9aY60MSsfdEfqIcrdE0c1dXxis9E1l9at6g18Jpyc1C6PsUHdmo6rJWq8Xe\r\nNNPkD/4fKhJsrd9TRlUlpIgKiJZW1ituKHV6Ghm7DIRSyx0aMpP9NJ3heV3CIgZr\r\nMLtJEFuG5WfolWIfu7sle2lYjA3HxA/xQo803jGOhxbEDX/BTzHo/1X7YGvwpRqJ\r\n+7J1B+2l+TA1r9vAlLfIDQRazVYRNxHpJDOwU0ffKaEPbRrgPtogO+8hLSml9Zoe\r\nY8w94f31XbvBFxSbSVpX+/QctNdwx2VuIoRcT8WZ0lZ9aenna5q5AE1C8oTtbw2T\r\nqoz4NCaM5XPgjvb0DGPBeH8jWveNo1BmClQA2qYXL55f00m8AZ4Hf6oYANt/zbuM\r\nQPhAoSHWwW4V4Pug3XPXM70LlY50y9kPD/57eHryhO2oXQLLx+l6mg8xzL6vKsHT\r\nE30whFM32vVTpjejLZ9hJBAJURFaUrH2TZyAmoVbCNy50yuHYQ6FooYpbsbnpYPi\r\nKW/E9bc201rqm/GQOWJ4zOJ8a5Etn3zY+rlPaxjJvxc3pSMfgtwwrm9KGXHsI1Gf\r\nULMwUbXclKV2qR8d6ECtUOIRxoQKutN85lmwB05yddu6uQQg0hHeaGFUk7EU90SV\r\nib/FA/op9sXfS3CkOnHQISY0JbWxrzC6eHaKeQi6lR1I\r\n-----END CERTIFICATE-----\r\n-----BEGIN CERTIFICATE-----\nMIIDSTCCAjGgAwIBAgIBAjANBgkqhkiG9w0BAQsFADAwMQswCQYDVQQGEwJVUzEQ\nMA4GA1UECgwHVGVzdE9yZzEPMA0GA1UEAwwGUm9vdENBMB4XDTI1MDYyMzIwMjA1\nMFoXDTI4MDYyMjIwMjA1MFowODELMAkGA1UEBhMCVVMxEDAOBgNVBAoMB1Rlc3RP\ncmcxFzAVBgNVBAMMDkludGVybWVkaWF0ZUNBMIIBIjANBgkqhkiG9w0BAQEFAAOC\nAQ8AMIIBCgKCAQEAws9YXnQv2XblYOQsOTV8n9mGZ//DulhKpU0T0/dNfEbXd/uN\ns2VbJg1xcNWJjSzdNe8Y6dqdnNoCpOKsNGjkkJafhGNdgGF1+Y8vc0VkZDozxGAc\nBvK/s2OOzDoInS3+PSo9TBwS5SPwMSjdPhKkgTsLw0N0n6CrG26+bK4shJuJyTEG\nqOd/cw777fV8FOkh/9pHiXzX+eezVG/vn7O3NCm+WtF/flUuVtjvOmXAI/siNI4S\nc5DPawny+SAoM4ebOOfFPbkdHx1fauLr9aHeevjWtI/9tpwjrrOzMUeau9BNWioh\nnW4PDtsTswUUliiWMQYP8/txbzee7lAKTxTRuQIDAQABo2YwZDASBgNVHRMBAf8E\nCDAGAQH/AgEAMA4GA1UdDwEB/wQEAwIBBjAdBgNVHQ4EFgQUr6tNSHRY9NQ8u25m\nqku4hNl+uJ4wHwYDVR0jBBgwFoAUExSy9vgkJDrak4LmMpoJeclgms0wDQYJKoZI\nhvcNAQELBQADggEBADFO7Gah2Ca+lACLVeXtVqHFMiJ9qvs6rBGcNdUghKyFuogQ\n1knPFU946KvwDHVKC/+xCiVgdxnPG6AucSXnpDRrNN330FPEhCgJ83sYhRMzikCY\nx4KD8MG0wFLnDOnv9TJd10meLrsyUkGjHRYQzstqk77lIrnQW4OrXhTcOmhv++8B\n5ZqHryfzWL44enx5UkbXSci3zPhf2FGQzSGECUPilgskC3O53mRokV3RbJZvpXZF\n2EVYZrqFCswmJ+UZkYHuyyUNyTJ7RsOzeEQ7m5TatmpBYuLOisw67pUH6d6zQAI6\noFpGWPcsZrt6JnrjAWM8MlbFLTh98WhooGEgUws=\n-----END CERTIFICATE-----\n-----BEGIN RSA PRIVATE KEY-----", + "repo_full_name": "chef/chef", + "discussion_comments": [ + { + "comment_id": "2162467475", + "repo_full_name": "chef/chef", + "pr_number": 15058, + "pr_file": "spec/data/trusted_certs/intermediate.pem", + "discussion_id": "2162467475", + "commented_code": "@@ -1,38 +1,47 @@\n------BEGIN CERTIFICATE-----\r\n-MIIGrTCCBJWgAwIBAgIQDo0oQK5IJZBWGLOoqeF6RzANBgkqhkiG9w0BAQwFADBJ\r\n-MQswCQYDVQQGEwJVUzEXMBUGA1UEChMORGlnaUNlcnQsIEluYy4xITAfBgNVBAMT\r\n-GERpZ2lDZXJ0IFJTQTQwOTYgUm9vdCBHNTAeFw0yMTA0MTQwMDAwMDBaFw0zMTA0\r\n-MTMyMzU5NTlaMFQxCzAJBgNVBAYTAlVTMRcwFQYDVQQKEw5EaWdpQ2VydCwgSW5j\r\n-LjEsMCoGA1UEAxMjRGlnaUNlcnQgRzUgUlNBNDA5NiBTSEEzODQgMjAyMSBDQTEw\r\n-ggIiMA0GCSqGSIb3DQEBAQUAA4ICDwAwggIKAoICAQDCwLlUmeGwUTj93uzejg2I\r\n-tHjaSqm+knZ8az09cBAZFLFU9sKDzBHgf43/GpIWIHGLDUGXXZkKtkjJhl6POqda\r\n-XWt/4avSsQgkELz2uefSxhzELBl4o1U50EULTlri3zUBQ11Jr/hfJLxdMAJqKv21\r\n-iVD8GfFDs12Hy08h7IxuA5ROVdBQS2OiU/6Vd4A3uVpzyjaxQsfAvkwz9+3jsozf\r\n-G+kWW+6Fxa3Vt4EbX+3afaBLeIyBlQvPd3pUY8irY3T6MHlglEblraxyGZ3ifvFu\r\n-Vt7S98D5+U4CMFzzGSzCCqMxTkgasTMhP8+PjXRN+mL56xyfw/uVmN9vRPqgbRUD\r\n-g95zx+CRFXgpUQ8yslpl+ECSqCe0cYxm+jWz00VFWtUZAwpE4REGOVdmNGrfNR16\r\n-h7dggpFVfeFy7qCwd9up/sWkBmkZB1zL9ENjg68EH5aEbh+jlbF6HuLv4+jibVlD\r\n-/r+ZW/vJgnMXmUYW1gDl3L//vQ/V4ElqRYzxsSVsq3dwW0SYzI31PKFEb8sqI5IN\r\n-P10MtFtZ1DgISF9I8LJ35dBDqguoonGC0/d+iq2S7ipcpFIo/u3tK/Nu0QvKMEN6\r\n-Dlx6Yhssscj2PhiADKjhRnweWUj/2eKuX8Cb6UmXvh+R4Dm0iEIGop1/r37GUo0z\r\n-nqNszrYZz1zd4GWG6puFWQIDAQABo4IBhDCCAYAwEgYDVR0TAQH/BAgwBgEB/wIB\r\n-ADAdBgNVHQ4EFgQUbYE39zhEfkdCe1al7Lt3ZyEJ9DwwHwYDVR0jBBgwFoAUYm23\r\n-kU/E6qNiYI+g0L61jwZ8aAAwDgYDVR0PAQH/BAQDAgGGMB0GA1UdJQQWMBQGCCsG\r\n-AQUFBwMBBggrBgEFBQcDAjB3BggrBgEFBQcBAQRrMGkwJAYIKwYBBQUHMAGGGGh0\r\n-dHA6Ly9vY3NwLmRpZ2ljZXJ0LmNvbTBBBggrBgEFBQcwAoY1aHR0cDovL2NhY2Vy\r\n-dHMuZGlnaWNlcnQuY29tL0RpZ2lDZXJ0UlNBNDA5NlJvb3RHNS5jcnQwQwYDVR0f\r\n-BDwwOjA4oDagNIYyaHR0cDovL2NybDMuZGlnaWNlcnQuY29tL0RpZ2lDZXJ0UlNB\r\n-NDA5NlJvb3RHNS5jcmwwPQYDVR0gBDYwNDALBglghkgBhv1sAgEwBwYFZ4EMAQEw\r\n-CAYGZ4EMAQIBMAgGBmeBDAECAjAIBgZngQwBAgMwDQYJKoZIhvcNAQEMBQADggIB\r\n-AGHJE9aY60MSsfdEfqIcrdE0c1dXxis9E1l9at6g18Jpyc1C6PsUHdmo6rJWq8Xe\r\n-NNPkD/4fKhJsrd9TRlUlpIgKiJZW1ituKHV6Ghm7DIRSyx0aMpP9NJ3heV3CIgZr\r\n-MLtJEFuG5WfolWIfu7sle2lYjA3HxA/xQo803jGOhxbEDX/BTzHo/1X7YGvwpRqJ\r\n-+7J1B+2l+TA1r9vAlLfIDQRazVYRNxHpJDOwU0ffKaEPbRrgPtogO+8hLSml9Zoe\r\n-Y8w94f31XbvBFxSbSVpX+/QctNdwx2VuIoRcT8WZ0lZ9aenna5q5AE1C8oTtbw2T\r\n-qoz4NCaM5XPgjvb0DGPBeH8jWveNo1BmClQA2qYXL55f00m8AZ4Hf6oYANt/zbuM\r\n-QPhAoSHWwW4V4Pug3XPXM70LlY50y9kPD/57eHryhO2oXQLLx+l6mg8xzL6vKsHT\r\n-E30whFM32vVTpjejLZ9hJBAJURFaUrH2TZyAmoVbCNy50yuHYQ6FooYpbsbnpYPi\r\n-KW/E9bc201rqm/GQOWJ4zOJ8a5Etn3zY+rlPaxjJvxc3pSMfgtwwrm9KGXHsI1Gf\r\n-ULMwUbXclKV2qR8d6ECtUOIRxoQKutN85lmwB05yddu6uQQg0hHeaGFUk7EU90SV\r\n-ib/FA/op9sXfS3CkOnHQISY0JbWxrzC6eHaKeQi6lR1I\r\n------END CERTIFICATE-----\r\n+-----BEGIN CERTIFICATE-----\n+MIIDSTCCAjGgAwIBAgIBAjANBgkqhkiG9w0BAQsFADAwMQswCQYDVQQGEwJVUzEQ\n+MA4GA1UECgwHVGVzdE9yZzEPMA0GA1UEAwwGUm9vdENBMB4XDTI1MDYyMzIwMjA1\n+MFoXDTI4MDYyMjIwMjA1MFowODELMAkGA1UEBhMCVVMxEDAOBgNVBAoMB1Rlc3RP\n+cmcxFzAVBgNVBAMMDkludGVybWVkaWF0ZUNBMIIBIjANBgkqhkiG9w0BAQEFAAOC\n+AQ8AMIIBCgKCAQEAws9YXnQv2XblYOQsOTV8n9mGZ//DulhKpU0T0/dNfEbXd/uN\n+s2VbJg1xcNWJjSzdNe8Y6dqdnNoCpOKsNGjkkJafhGNdgGF1+Y8vc0VkZDozxGAc\n+BvK/s2OOzDoInS3+PSo9TBwS5SPwMSjdPhKkgTsLw0N0n6CrG26+bK4shJuJyTEG\n+qOd/cw777fV8FOkh/9pHiXzX+eezVG/vn7O3NCm+WtF/flUuVtjvOmXAI/siNI4S\n+c5DPawny+SAoM4ebOOfFPbkdHx1fauLr9aHeevjWtI/9tpwjrrOzMUeau9BNWioh\n+nW4PDtsTswUUliiWMQYP8/txbzee7lAKTxTRuQIDAQABo2YwZDASBgNVHRMBAf8E\n+CDAGAQH/AgEAMA4GA1UdDwEB/wQEAwIBBjAdBgNVHQ4EFgQUr6tNSHRY9NQ8u25m\n+qku4hNl+uJ4wHwYDVR0jBBgwFoAUExSy9vgkJDrak4LmMpoJeclgms0wDQYJKoZI\n+hvcNAQELBQADggEBADFO7Gah2Ca+lACLVeXtVqHFMiJ9qvs6rBGcNdUghKyFuogQ\n+1knPFU946KvwDHVKC/+xCiVgdxnPG6AucSXnpDRrNN330FPEhCgJ83sYhRMzikCY\n+x4KD8MG0wFLnDOnv9TJd10meLrsyUkGjHRYQzstqk77lIrnQW4OrXhTcOmhv++8B\n+5ZqHryfzWL44enx5UkbXSci3zPhf2FGQzSGECUPilgskC3O53mRokV3RbJZvpXZF\n+2EVYZrqFCswmJ+UZkYHuyyUNyTJ7RsOzeEQ7m5TatmpBYuLOisw67pUH6d6zQAI6\n+oFpGWPcsZrt6JnrjAWM8MlbFLTh98WhooGEgUws=\n+-----END CERTIFICATE-----\n+-----BEGIN RSA PRIVATE KEY-----", + "comment_created_at": "2025-06-23T20:24:41+00:00", + "comment_author": "github-advanced-security[bot]", + "comment_body": "## Cryptographic private keys should not be disclosed\n\nMake sure this private key gets revoked, changed, and removed from the code.

              See more on SonarQube Cloud

              \n\n[Show more details](https://github.com/chef/chef/security/code-scanning/284)", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2162467478", + "pr_number": 15058, + "pr_file": "spec/data/trusted_certs/opscode.pem", + "created_at": "2025-06-23T20:24:41+00:00", + "commented_code": "-----BEGIN CERTIFICATE-----\nMIIGTjCCBTagAwIBAgIQBK55YGZmkBq5xX+mbFvczTANBgkqhkiG9w0BAQsFADBl\nMQswCQYDVQQGEwJVUzEVMBMGA1UEChMMRGlnaUNlcnQgSW5jMRkwFwYDVQQLExB3\nd3cuZGlnaWNlcnQuY29tMSQwIgYDVQQDExtEaWdpQ2VydCBBc3N1cmVkIElEIFJv\nb3QgQ0EwHhcNMTMxMTA1MTIwMDAwWhcNMjgxMTA1MTIwMDAwWjBlMQswCQYDVQQG\nEwJVUzEVMBMGA1UEChMMRGlnaUNlcnQgSW5jMRkwFwYDVQQLExB3d3cuZGlnaWNl\ncnQuY29tMSQwIgYDVQQDExtEaWdpQ2VydCBTSEEyIEFzc3VyZWQgSUQgQ0EwggEi\nMA0GCSqGSIb3DQEBAQUAA4IBDwAwggEKAoIBAQDc+BEjP2q178AneRstBYeiEEMx\n3w7UFRtPd6Qizj6McPC+B47dJyq8AR22LArK3WlYH0HtagUf2mN4WR4iLCv4un7J\nNTtW8R98Qn4lsCMZxkU41z1E+SB8YK4csFoYBL6PO/ep8JSapgxjSbZBF1NAMr1P\n5lB6UB8lRejxia/N/17/UPPwFxH/vcWJ9b1iudj7jkUEhW2ZzcVITf0mqwI2Reo2\n119q4hqCQQrc6dn1kReOxiGtODwT5h5/ZpzVTdlG2vbPUqd9OyTDtMFRNcab69Tv\nfuR7A+FEvXoLN+BPy4KKDXEY5KbgiSwb87JzPMGwkp4Yfb2rfcV9CKEswp9zAgMB\nAAGjggL4MIIC9DASBgNVHRMBAf8ECDAGAQH/AgEAMA4GA1UdDwEB/wQEAwIBhjA0\nBggrBgEFBQcBAQQoMCYwJAYIKwYBBQUHMAGGGGh0dHA6Ly9vY3NwLmRpZ2ljZXJ0\nLmNvbTCBgQYDVR0fBHoweDA6oDigNoY0aHR0cDovL2NybDQuZGlnaWNlcnQuY29t\nL0RpZ2lDZXJ0QXNzdXJlZElEUm9vdENBLmNybDA6oDigNoY0aHR0cDovL2NybDMu\nZGlnaWNlcnQuY29tL0RpZ2lDZXJ0QXNzdXJlZElEUm9vdENBLmNybDAdBgNVHSUE\nFjAUBggrBgEFBQcDAgYIKwYBBQUHAwQwggGzBgNVHSAEggGqMIIBpjCCAaIGCmCG\nSAGG/WwAAgQwggGSMCgGCCsGAQUFBwIBFhxodHRwczovL3d3dy5kaWdpY2VydC5j\nb20vQ1BTMIIBZAYIKwYBBQUHAgIwggFWHoIBUgBBAG4AeQAgAHUAcwBlACAAbwBm\nACAAdABoAGkAcwAgAEMAZQByAHQAaQBmAGkAYwBhAHQAZQAgAGMAbwBuAHMAdABp\nAHQAdQB0AGUAcwAgAGEAYwBjAGUAcAB0AGEAbgBjAGUAIABvAGYAIAB0AGgAZQAg\nAEQAaQBnAGkAQwBlAHIAdAAgAEMAUAAvAEMAUABTACAAYQBuAGQAIAB0AGgAZQAg\nAFIAZQBsAHkAaQBuAGcAIABQAGEAcgB0AHkAIABBAGcAcgBlAGUAbQBlAG4AdAAg\nAHcAaABpAGMAaAAgAGwAaQBtAGkAdAAgAGwAaQBhAGIAaQBsAGkAdAB5ACAAYQBu\nAGQAIABhAHIAZQAgAGkAbgBjAG8AcgBwAG8AcgBhAHQAZQBkACAAaABlAHIAZQBp\nAG4AIABiAHkAIAByAGUAZgBlAHIAZQBuAGMAZQAuMB0GA1UdDgQWBBTnAiOAAE/Y\n17yUC9k/dDlJMjyKeTAfBgNVHSMEGDAWgBRF66Kv9JLLgjEtUYunpyGd823IDzAN\nBgkqhkiG9w0BAQsFAAOCAQEATtSJJ7n9HYd3fg8oBZDxCi/JOz69k5yQxq/6kVGH\nMlRr6MrBcVFcmY61+uBiGZmmB5p8Eyfb5QKihBLZFfYKRFfENI9tcx861qABPd7j\nguRFa7LrJf2AXh05kL5bQvbOkWDj+aBWDEgQzjNoe82Tq/Bqy09YD7l7XRsEgZ6n\nIuJXSSfukpMIvmkIUwI6Ll3IGfRQgE4C2bBdkbSTh/mWloFVQI5m7YLYuyhf7Uxh\n7QZYKBlTEUS8RyApsgRs2IlUmTt122d4LB6SeMZVPVgSETJuvUMMTTTbe8ZC2+y+\nq5thTAaS447fISpQVwTAYKI11SSeZjcJSc/V+GWz4OJuwg==\nMIIDRDCCAiygAwIBAgIBAzANBgkqhkiG9w0BAQsFADA4MQswCQYDVQQGEwJVUzEQ\nMA4GA1UECgwHVGVzdE9yZzEXMBUGA1UEAwwOSW50ZXJtZWRpYXRlQ0EwHhcNMjUw\nNjIzMjAyMDUwWhcNMjYwNjIzMjAyMDUwWjAxMQswCQYDVQQGEwJVUzEQMA4GA1UE\nCgwHVGVzdE9yZzEQMA4GA1UEAwwHb3BzY29kZTCCASIwDQYJKoZIhvcNAQEBBQAD\nggEPADCCAQoCggEBAM7L6OKflUY72ui8LiGYHQ2/7ZQtPVb8LarLaE+RCNGuFKxx\nN6eN+aEjXeMulvQOuOXuMMxU/PI2xN+H2kXbUi2dE2tfMH3w7MqmY0b2IPW4Xsvr\nQKc0iZDEsRwy8AJJfEaSuYkheRXJ1t2ijS9qCUzgoEhegSWbULp0jVER74Lkk3RN\nDhuare8SMIfSjFZQy9yugg02SgTSpciA4TDUNGxyv9O+111bCRfbQjLDz1WsAk6P\nMe5ehP6B6/6GumE47SsSt3QgXOuDS6wuI6yR3YBYTDHglURax5GB1jxMGFRXk/q0\nPueDiWyhYJEHySsgX1GI6kuzpm9cVjzQkvqs49MCAwEAAaNgMF4wDAYDVR0TAQH/\nBAIwADAOBgNVHQ8BAf8EBAMCB4AwHQYDVR0OBBYEFJ/+n6xxe6FQ682tbP1EWVcE\n2rXyMB8GA1UdIwQYMBaAFK+rTUh0WPTUPLtuZqpLuITZfrieMA0GCSqGSIb3DQEB\nCwUAA4IBAQBqgcMrGofwE4KPTcXLsYKd/xlOoTZDzjCI4wdlA1bLtIb46hbWCUJR\ncCmFuMHocSsPL99Fw03xsoToI8g2JlkrIRXtuWXwiDXnDdmgBwZVqIcgTxg0a0Yn\n6EpL8vxYJT1AZc0Os74qUCiJ+kHzlXkN+6SjilPnOIfuCtOEL+aHW6DXk7lkSF6d\nfMRJZR2l7HJkCy3IqZGxkM4hgbKd/VOpF++UF/YgkN3f+QtsPbrDAWJHZD96yOjM\nbvDssC1rAS1HbjztAaFN2Qsfv/BWb4oZfT5V9v3hWKQUk2VaSW1fqhPptM4dnbLa\n7ofGN9p5Wn4PQAH8CGGExHm8xSnh2Hga\n-----END CERTIFICATE-----\n-----BEGIN RSA PRIVATE KEY-----", + "repo_full_name": "chef/chef", + "discussion_comments": [ + { + "comment_id": "2162467478", + "repo_full_name": "chef/chef", + "pr_number": 15058, + "pr_file": "spec/data/trusted_certs/opscode.pem", + "discussion_id": "2162467478", + "commented_code": "@@ -1,36 +1,47 @@\n -----BEGIN CERTIFICATE-----\n-MIIGTjCCBTagAwIBAgIQBK55YGZmkBq5xX+mbFvczTANBgkqhkiG9w0BAQsFADBl\n-MQswCQYDVQQGEwJVUzEVMBMGA1UEChMMRGlnaUNlcnQgSW5jMRkwFwYDVQQLExB3\n-d3cuZGlnaWNlcnQuY29tMSQwIgYDVQQDExtEaWdpQ2VydCBBc3N1cmVkIElEIFJv\n-b3QgQ0EwHhcNMTMxMTA1MTIwMDAwWhcNMjgxMTA1MTIwMDAwWjBlMQswCQYDVQQG\n-EwJVUzEVMBMGA1UEChMMRGlnaUNlcnQgSW5jMRkwFwYDVQQLExB3d3cuZGlnaWNl\n-cnQuY29tMSQwIgYDVQQDExtEaWdpQ2VydCBTSEEyIEFzc3VyZWQgSUQgQ0EwggEi\n-MA0GCSqGSIb3DQEBAQUAA4IBDwAwggEKAoIBAQDc+BEjP2q178AneRstBYeiEEMx\n-3w7UFRtPd6Qizj6McPC+B47dJyq8AR22LArK3WlYH0HtagUf2mN4WR4iLCv4un7J\n-NTtW8R98Qn4lsCMZxkU41z1E+SB8YK4csFoYBL6PO/ep8JSapgxjSbZBF1NAMr1P\n-5lB6UB8lRejxia/N/17/UPPwFxH/vcWJ9b1iudj7jkUEhW2ZzcVITf0mqwI2Reo2\n-119q4hqCQQrc6dn1kReOxiGtODwT5h5/ZpzVTdlG2vbPUqd9OyTDtMFRNcab69Tv\n-fuR7A+FEvXoLN+BPy4KKDXEY5KbgiSwb87JzPMGwkp4Yfb2rfcV9CKEswp9zAgMB\n-AAGjggL4MIIC9DASBgNVHRMBAf8ECDAGAQH/AgEAMA4GA1UdDwEB/wQEAwIBhjA0\n-BggrBgEFBQcBAQQoMCYwJAYIKwYBBQUHMAGGGGh0dHA6Ly9vY3NwLmRpZ2ljZXJ0\n-LmNvbTCBgQYDVR0fBHoweDA6oDigNoY0aHR0cDovL2NybDQuZGlnaWNlcnQuY29t\n-L0RpZ2lDZXJ0QXNzdXJlZElEUm9vdENBLmNybDA6oDigNoY0aHR0cDovL2NybDMu\n-ZGlnaWNlcnQuY29tL0RpZ2lDZXJ0QXNzdXJlZElEUm9vdENBLmNybDAdBgNVHSUE\n-FjAUBggrBgEFBQcDAgYIKwYBBQUHAwQwggGzBgNVHSAEggGqMIIBpjCCAaIGCmCG\n-SAGG/WwAAgQwggGSMCgGCCsGAQUFBwIBFhxodHRwczovL3d3dy5kaWdpY2VydC5j\n-b20vQ1BTMIIBZAYIKwYBBQUHAgIwggFWHoIBUgBBAG4AeQAgAHUAcwBlACAAbwBm\n-ACAAdABoAGkAcwAgAEMAZQByAHQAaQBmAGkAYwBhAHQAZQAgAGMAbwBuAHMAdABp\n-AHQAdQB0AGUAcwAgAGEAYwBjAGUAcAB0AGEAbgBjAGUAIABvAGYAIAB0AGgAZQAg\n-AEQAaQBnAGkAQwBlAHIAdAAgAEMAUAAvAEMAUABTACAAYQBuAGQAIAB0AGgAZQAg\n-AFIAZQBsAHkAaQBuAGcAIABQAGEAcgB0AHkAIABBAGcAcgBlAGUAbQBlAG4AdAAg\n-AHcAaABpAGMAaAAgAGwAaQBtAGkAdAAgAGwAaQBhAGIAaQBsAGkAdAB5ACAAYQBu\n-AGQAIABhAHIAZQAgAGkAbgBjAG8AcgBwAG8AcgBhAHQAZQBkACAAaABlAHIAZQBp\n-AG4AIABiAHkAIAByAGUAZgBlAHIAZQBuAGMAZQAuMB0GA1UdDgQWBBTnAiOAAE/Y\n-17yUC9k/dDlJMjyKeTAfBgNVHSMEGDAWgBRF66Kv9JLLgjEtUYunpyGd823IDzAN\n-BgkqhkiG9w0BAQsFAAOCAQEATtSJJ7n9HYd3fg8oBZDxCi/JOz69k5yQxq/6kVGH\n-MlRr6MrBcVFcmY61+uBiGZmmB5p8Eyfb5QKihBLZFfYKRFfENI9tcx861qABPd7j\n-guRFa7LrJf2AXh05kL5bQvbOkWDj+aBWDEgQzjNoe82Tq/Bqy09YD7l7XRsEgZ6n\n-IuJXSSfukpMIvmkIUwI6Ll3IGfRQgE4C2bBdkbSTh/mWloFVQI5m7YLYuyhf7Uxh\n-7QZYKBlTEUS8RyApsgRs2IlUmTt122d4LB6SeMZVPVgSETJuvUMMTTTbe8ZC2+y+\n-q5thTAaS447fISpQVwTAYKI11SSeZjcJSc/V+GWz4OJuwg==\n+MIIDRDCCAiygAwIBAgIBAzANBgkqhkiG9w0BAQsFADA4MQswCQYDVQQGEwJVUzEQ\n+MA4GA1UECgwHVGVzdE9yZzEXMBUGA1UEAwwOSW50ZXJtZWRpYXRlQ0EwHhcNMjUw\n+NjIzMjAyMDUwWhcNMjYwNjIzMjAyMDUwWjAxMQswCQYDVQQGEwJVUzEQMA4GA1UE\n+CgwHVGVzdE9yZzEQMA4GA1UEAwwHb3BzY29kZTCCASIwDQYJKoZIhvcNAQEBBQAD\n+ggEPADCCAQoCggEBAM7L6OKflUY72ui8LiGYHQ2/7ZQtPVb8LarLaE+RCNGuFKxx\n+N6eN+aEjXeMulvQOuOXuMMxU/PI2xN+H2kXbUi2dE2tfMH3w7MqmY0b2IPW4Xsvr\n+QKc0iZDEsRwy8AJJfEaSuYkheRXJ1t2ijS9qCUzgoEhegSWbULp0jVER74Lkk3RN\n+Dhuare8SMIfSjFZQy9yugg02SgTSpciA4TDUNGxyv9O+111bCRfbQjLDz1WsAk6P\n+Me5ehP6B6/6GumE47SsSt3QgXOuDS6wuI6yR3YBYTDHglURax5GB1jxMGFRXk/q0\n+PueDiWyhYJEHySsgX1GI6kuzpm9cVjzQkvqs49MCAwEAAaNgMF4wDAYDVR0TAQH/\n+BAIwADAOBgNVHQ8BAf8EBAMCB4AwHQYDVR0OBBYEFJ/+n6xxe6FQ682tbP1EWVcE\n+2rXyMB8GA1UdIwQYMBaAFK+rTUh0WPTUPLtuZqpLuITZfrieMA0GCSqGSIb3DQEB\n+CwUAA4IBAQBqgcMrGofwE4KPTcXLsYKd/xlOoTZDzjCI4wdlA1bLtIb46hbWCUJR\n+cCmFuMHocSsPL99Fw03xsoToI8g2JlkrIRXtuWXwiDXnDdmgBwZVqIcgTxg0a0Yn\n+6EpL8vxYJT1AZc0Os74qUCiJ+kHzlXkN+6SjilPnOIfuCtOEL+aHW6DXk7lkSF6d\n+fMRJZR2l7HJkCy3IqZGxkM4hgbKd/VOpF++UF/YgkN3f+QtsPbrDAWJHZD96yOjM\n+bvDssC1rAS1HbjztAaFN2Qsfv/BWb4oZfT5V9v3hWKQUk2VaSW1fqhPptM4dnbLa\n+7ofGN9p5Wn4PQAH8CGGExHm8xSnh2Hga\n -----END CERTIFICATE-----\n+-----BEGIN RSA PRIVATE KEY-----", + "comment_created_at": "2025-06-23T20:24:41+00:00", + "comment_author": "github-advanced-security[bot]", + "comment_body": "## Cryptographic private keys should not be disclosed\n\nMake sure this private key gets revoked, changed, and removed from the code.

              See more on SonarQube Cloud

              \n\n[Show more details](https://github.com/chef/chef/security/code-scanning/283)", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2162467480", + "pr_number": 15058, + "pr_file": "spec/data/trusted_certs/root.pem", + "created_at": "2025-06-23T20:24:41+00:00", + "commented_code": "-----BEGIN CERTIFICATE-----\nMIIDrzCCApegAwIBAgIQCDvgVpBCRrGhdWrJWZHHSjANBgkqhkiG9w0BAQUFADBh\nMQswCQYDVQQGEwJVUzEVMBMGA1UEChMMRGlnaUNlcnQgSW5jMRkwFwYDVQQLExB3\nd3cuZGlnaWNlcnQuY29tMSAwHgYDVQQDExdEaWdpQ2VydCBHbG9iYWwgUm9vdCBD\nQTAeFw0wNjExMTAwMDAwMDBaFw0zMTExMTAwMDAwMDBaMGExCzAJBgNVBAYTAlVT\nMRUwEwYDVQQKEwxEaWdpQ2VydCBJbmMxGTAXBgNVBAsTEHd3dy5kaWdpY2VydC5j\nb20xIDAeBgNVBAMTF0RpZ2lDZXJ0IEdsb2JhbCBSb290IENBMIIBIjANBgkqhkiG\n9w0BAQEFAAOCAQ8AMIIBCgKCAQEA4jvhEXLeqKTTo1eqUKKPC3eQyaKl7hLOllsB\nCSDMAZOnTjC3U/dDxGkAV53ijSLdhwZAAIEJzs4bg7/fzTtxRuLWZscFs3YnFo97\nnh6Vfe63SKMI2tavegw5BmV/Sl0fvBf4q77uKNd0f3p4mVmFaG5cIzJLv07A6Fpt\n43C/dxC//AH2hdmoRBBYMql1GNXRor5H4idq9Joz+EkIYIvUX7Q6hL+hqkpMfT7P\nT19sdl6gSzeRntwi5m3OFBqOasv+zbMUZBfHWymeMr/y7vrTC0LUq7dBMtoM1O/4\ngdW7jVg/tRvoSSiicNoxBN33shbyTApOB6jtSj1etX+jkMOvJwIDAQABo2MwYTAO\nBgNVHQ8BAf8EBAMCAYYwDwYDVR0TAQH/BAUwAwEB/zAdBgNVHQ4EFgQUA95QNVbR\nTLtm8KPiGxvDl7I90VUwHwYDVR0jBBgwFoAUA95QNVbRTLtm8KPiGxvDl7I90VUw\nDQYJKoZIhvcNAQEFBQADggEBAMucN6pIExIK+t1EnE9SsPTfrgT1eXkIoyQY/Esr\nhMAtudXH/vTBH1jLuG2cenTnmCmrEbXjcKChzUyImZOMkXDiqw8cvpOp/2PV5Adg\n06O/nVsJ8dWO41P0jmP6P6fbtGbfYmbW0W5BjfIttep3Sp+dWOIrWcBAI+0tKIJF\nPnlUkiaY4IBIqDfv8NZ5YBberOgOzW6sRBc4L0na4UU+Krk2U886UAb3LujEV0ls\nYSEY1QSteDwsOoBrp+uvFRTp2InBuThs4pFsiv9kuXclVzDAGySj4dzp30d8tbQk\nCAUw7C29C79Fv1C5qfPrmAESrciIxpg0X40KPMbp1ZWVbd4=\nMIIDPjCCAiagAwIBAgIBATANBgkqhkiG9w0BAQsFADAwMQswCQYDVQQGEwJVUzEQ\nMA4GA1UECgwHVGVzdE9yZzEPMA0GA1UEAwwGUm9vdENBMB4XDTI1MDYyMzIwMjA1\nMFoXDTMwMDYyMjIwMjA1MFowMDELMAkGA1UEBhMCVVMxEDAOBgNVBAoMB1Rlc3RP\ncmcxDzANBgNVBAMMBlJvb3RDQTCCASIwDQYJKoZIhvcNAQEBBQADggEPADCCAQoC\nggEBANLtMQLXYptRVUSLiIirbRSfYZj9MzeLrWpF4ZcCs58FB5RJWoRWj7HXTL2N\na08y962pFBJuOyJ9ufPEQOjiPFVyy0bkIcOaNkekJ0MMin2FJnuKDIYzeNKogfqU\nhvWZosVev4m1wDvPqtVwan7iwUStnmOJBESUs0XwmVmTstRODmbMhW6l40sJA7e3\nSwJ0GarZCzIg1ZWUkSFRwh+vO9OzArakhIV4zDuHJHQJoBeStFWuKLOJQo9J51dt\ndfBH0xj/cE2lsCfMBItYsO5gOHllhvCEd6nxc5+S+a9jc0t04N1otR0A4F2x9ARt\nK/fpKJw4EMp7cJn06wP4mpjLN6ECAwEAAaNjMGEwDwYDVR0TAQH/BAUwAwEB/zAO\nBgNVHQ8BAf8EBAMCAQYwHQYDVR0OBBYEFBMUsvb4JCQ62pOC5jKaCXnJYJrNMB8G\nA1UdIwQYMBaAFBMUsvb4JCQ62pOC5jKaCXnJYJrNMA0GCSqGSIb3DQEBCwUAA4IB\nAQC76EndKB5+u+U/pQ7AgZCp4pTJTWzTRs+iBum9qVn60ndB/37jsxDJLyDAHlaB\n6jkj2A5n28ucdtEAygHj+4wKGbDY8bCnhg1COMK4Db4h0FSYAUq9OEWdiC6LVrBe\nkMBJJGUSzKzmonSp26LU3TdoBV5BWAvqwsGPfpyRAEPKblqtjh++3NccOjMPlcS0\nIRydyxIBzTMqM41radvAJ+5kl6X92XB1kJnqslRxh00ZAX9CdrGc8mozDyYlbvno\nfYFREnPETFukDP87s3jtVy2Suf+qczu2d9qT6dd9oAY47ypCdX2xag5HFtZgpb15\n6AM5YrQtrIGvAoXGfuKDBTPA\n-----END CERTIFICATE-----\n-----BEGIN RSA PRIVATE KEY-----", + "repo_full_name": "chef/chef", + "discussion_comments": [ + { + "comment_id": "2162467480", + "repo_full_name": "chef/chef", + "pr_number": 15058, + "pr_file": "spec/data/trusted_certs/root.pem", + "discussion_id": "2162467480", + "commented_code": "@@ -1,22 +1,47 @@\n -----BEGIN CERTIFICATE-----\n-MIIDrzCCApegAwIBAgIQCDvgVpBCRrGhdWrJWZHHSjANBgkqhkiG9w0BAQUFADBh\n-MQswCQYDVQQGEwJVUzEVMBMGA1UEChMMRGlnaUNlcnQgSW5jMRkwFwYDVQQLExB3\n-d3cuZGlnaWNlcnQuY29tMSAwHgYDVQQDExdEaWdpQ2VydCBHbG9iYWwgUm9vdCBD\n-QTAeFw0wNjExMTAwMDAwMDBaFw0zMTExMTAwMDAwMDBaMGExCzAJBgNVBAYTAlVT\n-MRUwEwYDVQQKEwxEaWdpQ2VydCBJbmMxGTAXBgNVBAsTEHd3dy5kaWdpY2VydC5j\n-b20xIDAeBgNVBAMTF0RpZ2lDZXJ0IEdsb2JhbCBSb290IENBMIIBIjANBgkqhkiG\n-9w0BAQEFAAOCAQ8AMIIBCgKCAQEA4jvhEXLeqKTTo1eqUKKPC3eQyaKl7hLOllsB\n-CSDMAZOnTjC3U/dDxGkAV53ijSLdhwZAAIEJzs4bg7/fzTtxRuLWZscFs3YnFo97\n-nh6Vfe63SKMI2tavegw5BmV/Sl0fvBf4q77uKNd0f3p4mVmFaG5cIzJLv07A6Fpt\n-43C/dxC//AH2hdmoRBBYMql1GNXRor5H4idq9Joz+EkIYIvUX7Q6hL+hqkpMfT7P\n-T19sdl6gSzeRntwi5m3OFBqOasv+zbMUZBfHWymeMr/y7vrTC0LUq7dBMtoM1O/4\n-gdW7jVg/tRvoSSiicNoxBN33shbyTApOB6jtSj1etX+jkMOvJwIDAQABo2MwYTAO\n-BgNVHQ8BAf8EBAMCAYYwDwYDVR0TAQH/BAUwAwEB/zAdBgNVHQ4EFgQUA95QNVbR\n-TLtm8KPiGxvDl7I90VUwHwYDVR0jBBgwFoAUA95QNVbRTLtm8KPiGxvDl7I90VUw\n-DQYJKoZIhvcNAQEFBQADggEBAMucN6pIExIK+t1EnE9SsPTfrgT1eXkIoyQY/Esr\n-hMAtudXH/vTBH1jLuG2cenTnmCmrEbXjcKChzUyImZOMkXDiqw8cvpOp/2PV5Adg\n-06O/nVsJ8dWO41P0jmP6P6fbtGbfYmbW0W5BjfIttep3Sp+dWOIrWcBAI+0tKIJF\n-PnlUkiaY4IBIqDfv8NZ5YBberOgOzW6sRBc4L0na4UU+Krk2U886UAb3LujEV0ls\n-YSEY1QSteDwsOoBrp+uvFRTp2InBuThs4pFsiv9kuXclVzDAGySj4dzp30d8tbQk\n-CAUw7C29C79Fv1C5qfPrmAESrciIxpg0X40KPMbp1ZWVbd4=\n+MIIDPjCCAiagAwIBAgIBATANBgkqhkiG9w0BAQsFADAwMQswCQYDVQQGEwJVUzEQ\n+MA4GA1UECgwHVGVzdE9yZzEPMA0GA1UEAwwGUm9vdENBMB4XDTI1MDYyMzIwMjA1\n+MFoXDTMwMDYyMjIwMjA1MFowMDELMAkGA1UEBhMCVVMxEDAOBgNVBAoMB1Rlc3RP\n+cmcxDzANBgNVBAMMBlJvb3RDQTCCASIwDQYJKoZIhvcNAQEBBQADggEPADCCAQoC\n+ggEBANLtMQLXYptRVUSLiIirbRSfYZj9MzeLrWpF4ZcCs58FB5RJWoRWj7HXTL2N\n+a08y962pFBJuOyJ9ufPEQOjiPFVyy0bkIcOaNkekJ0MMin2FJnuKDIYzeNKogfqU\n+hvWZosVev4m1wDvPqtVwan7iwUStnmOJBESUs0XwmVmTstRODmbMhW6l40sJA7e3\n+SwJ0GarZCzIg1ZWUkSFRwh+vO9OzArakhIV4zDuHJHQJoBeStFWuKLOJQo9J51dt\n+dfBH0xj/cE2lsCfMBItYsO5gOHllhvCEd6nxc5+S+a9jc0t04N1otR0A4F2x9ARt\n+K/fpKJw4EMp7cJn06wP4mpjLN6ECAwEAAaNjMGEwDwYDVR0TAQH/BAUwAwEB/zAO\n+BgNVHQ8BAf8EBAMCAQYwHQYDVR0OBBYEFBMUsvb4JCQ62pOC5jKaCXnJYJrNMB8G\n+A1UdIwQYMBaAFBMUsvb4JCQ62pOC5jKaCXnJYJrNMA0GCSqGSIb3DQEBCwUAA4IB\n+AQC76EndKB5+u+U/pQ7AgZCp4pTJTWzTRs+iBum9qVn60ndB/37jsxDJLyDAHlaB\n+6jkj2A5n28ucdtEAygHj+4wKGbDY8bCnhg1COMK4Db4h0FSYAUq9OEWdiC6LVrBe\n+kMBJJGUSzKzmonSp26LU3TdoBV5BWAvqwsGPfpyRAEPKblqtjh++3NccOjMPlcS0\n+IRydyxIBzTMqM41radvAJ+5kl6X92XB1kJnqslRxh00ZAX9CdrGc8mozDyYlbvno\n+fYFREnPETFukDP87s3jtVy2Suf+qczu2d9qT6dd9oAY47ypCdX2xag5HFtZgpb15\n+6AM5YrQtrIGvAoXGfuKDBTPA\n -----END CERTIFICATE-----\n+-----BEGIN RSA PRIVATE KEY-----", + "comment_created_at": "2025-06-23T20:24:41+00:00", + "comment_author": "github-advanced-security[bot]", + "comment_body": "## Cryptographic private keys should not be disclosed\n\nMake sure this private key gets revoked, changed, and removed from the code.

              See more on SonarQube Cloud

              \n\n[Show more details](https://github.com/chef/chef/security/code-scanning/285)", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2162660890", + "pr_number": 15065, + "pr_file": "spec/data/trusted_certs/intermediate.pem", + "created_at": "2025-06-23T23:01:44+00:00", + "commented_code": "-----BEGIN CERTIFICATE-----\r\nMIIGrTCCBJWgAwIBAgIQDo0oQK5IJZBWGLOoqeF6RzANBgkqhkiG9w0BAQwFADBJ\r\nMQswCQYDVQQGEwJVUzEXMBUGA1UEChMORGlnaUNlcnQsIEluYy4xITAfBgNVBAMT\r\nGERpZ2lDZXJ0IFJTQTQwOTYgUm9vdCBHNTAeFw0yMTA0MTQwMDAwMDBaFw0zMTA0\r\nMTMyMzU5NTlaMFQxCzAJBgNVBAYTAlVTMRcwFQYDVQQKEw5EaWdpQ2VydCwgSW5j\r\nLjEsMCoGA1UEAxMjRGlnaUNlcnQgRzUgUlNBNDA5NiBTSEEzODQgMjAyMSBDQTEw\r\nggIiMA0GCSqGSIb3DQEBAQUAA4ICDwAwggIKAoICAQDCwLlUmeGwUTj93uzejg2I\r\ntHjaSqm+knZ8az09cBAZFLFU9sKDzBHgf43/GpIWIHGLDUGXXZkKtkjJhl6POqda\r\nXWt/4avSsQgkELz2uefSxhzELBl4o1U50EULTlri3zUBQ11Jr/hfJLxdMAJqKv21\r\niVD8GfFDs12Hy08h7IxuA5ROVdBQS2OiU/6Vd4A3uVpzyjaxQsfAvkwz9+3jsozf\r\nG+kWW+6Fxa3Vt4EbX+3afaBLeIyBlQvPd3pUY8irY3T6MHlglEblraxyGZ3ifvFu\r\nVt7S98D5+U4CMFzzGSzCCqMxTkgasTMhP8+PjXRN+mL56xyfw/uVmN9vRPqgbRUD\r\ng95zx+CRFXgpUQ8yslpl+ECSqCe0cYxm+jWz00VFWtUZAwpE4REGOVdmNGrfNR16\r\nh7dggpFVfeFy7qCwd9up/sWkBmkZB1zL9ENjg68EH5aEbh+jlbF6HuLv4+jibVlD\r\n/r+ZW/vJgnMXmUYW1gDl3L//vQ/V4ElqRYzxsSVsq3dwW0SYzI31PKFEb8sqI5IN\r\nP10MtFtZ1DgISF9I8LJ35dBDqguoonGC0/d+iq2S7ipcpFIo/u3tK/Nu0QvKMEN6\r\nDlx6Yhssscj2PhiADKjhRnweWUj/2eKuX8Cb6UmXvh+R4Dm0iEIGop1/r37GUo0z\r\nnqNszrYZz1zd4GWG6puFWQIDAQABo4IBhDCCAYAwEgYDVR0TAQH/BAgwBgEB/wIB\r\nADAdBgNVHQ4EFgQUbYE39zhEfkdCe1al7Lt3ZyEJ9DwwHwYDVR0jBBgwFoAUYm23\r\nkU/E6qNiYI+g0L61jwZ8aAAwDgYDVR0PAQH/BAQDAgGGMB0GA1UdJQQWMBQGCCsG\r\nAQUFBwMBBggrBgEFBQcDAjB3BggrBgEFBQcBAQRrMGkwJAYIKwYBBQUHMAGGGGh0\r\ndHA6Ly9vY3NwLmRpZ2ljZXJ0LmNvbTBBBggrBgEFBQcwAoY1aHR0cDovL2NhY2Vy\r\ndHMuZGlnaWNlcnQuY29tL0RpZ2lDZXJ0UlNBNDA5NlJvb3RHNS5jcnQwQwYDVR0f\r\nBDwwOjA4oDagNIYyaHR0cDovL2NybDMuZGlnaWNlcnQuY29tL0RpZ2lDZXJ0UlNB\r\nNDA5NlJvb3RHNS5jcmwwPQYDVR0gBDYwNDALBglghkgBhv1sAgEwBwYFZ4EMAQEw\r\nCAYGZ4EMAQIBMAgGBmeBDAECAjAIBgZngQwBAgMwDQYJKoZIhvcNAQEMBQADggIB\r\nAGHJE9aY60MSsfdEfqIcrdE0c1dXxis9E1l9at6g18Jpyc1C6PsUHdmo6rJWq8Xe\r\nNNPkD/4fKhJsrd9TRlUlpIgKiJZW1ituKHV6Ghm7DIRSyx0aMpP9NJ3heV3CIgZr\r\nMLtJEFuG5WfolWIfu7sle2lYjA3HxA/xQo803jGOhxbEDX/BTzHo/1X7YGvwpRqJ\r\n+7J1B+2l+TA1r9vAlLfIDQRazVYRNxHpJDOwU0ffKaEPbRrgPtogO+8hLSml9Zoe\r\nY8w94f31XbvBFxSbSVpX+/QctNdwx2VuIoRcT8WZ0lZ9aenna5q5AE1C8oTtbw2T\r\nqoz4NCaM5XPgjvb0DGPBeH8jWveNo1BmClQA2qYXL55f00m8AZ4Hf6oYANt/zbuM\r\nQPhAoSHWwW4V4Pug3XPXM70LlY50y9kPD/57eHryhO2oXQLLx+l6mg8xzL6vKsHT\r\nE30whFM32vVTpjejLZ9hJBAJURFaUrH2TZyAmoVbCNy50yuHYQ6FooYpbsbnpYPi\r\nKW/E9bc201rqm/GQOWJ4zOJ8a5Etn3zY+rlPaxjJvxc3pSMfgtwwrm9KGXHsI1Gf\r\nULMwUbXclKV2qR8d6ECtUOIRxoQKutN85lmwB05yddu6uQQg0hHeaGFUk7EU90SV\r\nib/FA/op9sXfS3CkOnHQISY0JbWxrzC6eHaKeQi6lR1I\r\n-----END CERTIFICATE-----\r\n-----BEGIN CERTIFICATE-----\nMIIDSTCCAjGgAwIBAgIBAjANBgkqhkiG9w0BAQsFADAwMQswCQYDVQQGEwJVUzEQ\nMA4GA1UECgwHVGVzdE9yZzEPMA0GA1UEAwwGUm9vdENBMB4XDTI1MDYyMzIyNTgz\nNloXDTI4MDYyMjIyNTgzNlowODELMAkGA1UEBhMCVVMxEDAOBgNVBAoMB1Rlc3RP\ncmcxFzAVBgNVBAMMDkludGVybWVkaWF0ZUNBMIIBIjANBgkqhkiG9w0BAQEFAAOC\nAQ8AMIIBCgKCAQEA8eYKQYc01Fkg05bCyLIz7ql4BSyJIQr9JiszF4u3gQL1TQeA\nkPkzjQxFAyx271M/PLMBfQPCnjST/ANG41oFIHi9QkwZZUtd2fzMN3xasE3IA0Qx\nGLF3sEnlX3U7pn/tshnuie3XVfyjg+vOnkdQaazVY+2+sPaGSzRktVPFmS4/6a+g\nrf6jvrmRo59/u405yLgOQs41PAcrpG/DMssPsqIdFrSbeXRM10fyo2Oo4UDM08Uv\nOUMZK9LbbFOMKU+YDoIX4QKdeo6tf/zJAJsfyz/x42MGXyP+c6tbH1v7KNw9FGkN\n6O88EeWUtLr+CXXa6SrlGBQfuA+kzoIhRbTcYwIDAQABo2YwZDASBgNVHRMBAf8E\nCDAGAQH/AgEAMA4GA1UdDwEB/wQEAwIBBjAdBgNVHQ4EFgQUEBoB0gRIRNzLuuVY\nwNtSSPeyFCIwHwYDVR0jBBgwFoAU0jIhxNIirUycwI3J5aA4zEQu2/EwDQYJKoZI\nhvcNAQELBQADggEBAIm610QOb0ZRtxKLLoOtBgH4UqLuGqiWnVvIkc1FKVYlrtTJ\ndh27S0KhtducOKRhYp9XINBCD2QfsFwZhAeo2BE4FffbyH2R0u79kuqojufooaPW\n6UiKZKkzdo0g+0Hv00GrXGqS8Q1NqPoLRr33KeB72Od6xZmGNJF3NI0urP0I9jVk\nXohMrCqD39S8XQ7CYXhLW4onRiiQHuZe6frDzJX2NdwtXqd+W0ujvKfb33mdK+C5\nGZi8duiHK7nT6vBrsVy6R9YCPQbDWBc34dKShx01Ml+3BvJ0jGEiY5jYukYylGwJ\nXyatDe8dz3g4KGfGnSaMhgf154VDLIuXz/U0BUE=\n-----END CERTIFICATE-----\n-----BEGIN RSA PRIVATE KEY-----", + "repo_full_name": "chef/chef", + "discussion_comments": [ + { + "comment_id": "2162660890", + "repo_full_name": "chef/chef", + "pr_number": 15065, + "pr_file": "spec/data/trusted_certs/intermediate.pem", + "discussion_id": "2162660890", + "commented_code": "@@ -1,38 +1,47 @@\n------BEGIN CERTIFICATE-----\r\n-MIIGrTCCBJWgAwIBAgIQDo0oQK5IJZBWGLOoqeF6RzANBgkqhkiG9w0BAQwFADBJ\r\n-MQswCQYDVQQGEwJVUzEXMBUGA1UEChMORGlnaUNlcnQsIEluYy4xITAfBgNVBAMT\r\n-GERpZ2lDZXJ0IFJTQTQwOTYgUm9vdCBHNTAeFw0yMTA0MTQwMDAwMDBaFw0zMTA0\r\n-MTMyMzU5NTlaMFQxCzAJBgNVBAYTAlVTMRcwFQYDVQQKEw5EaWdpQ2VydCwgSW5j\r\n-LjEsMCoGA1UEAxMjRGlnaUNlcnQgRzUgUlNBNDA5NiBTSEEzODQgMjAyMSBDQTEw\r\n-ggIiMA0GCSqGSIb3DQEBAQUAA4ICDwAwggIKAoICAQDCwLlUmeGwUTj93uzejg2I\r\n-tHjaSqm+knZ8az09cBAZFLFU9sKDzBHgf43/GpIWIHGLDUGXXZkKtkjJhl6POqda\r\n-XWt/4avSsQgkELz2uefSxhzELBl4o1U50EULTlri3zUBQ11Jr/hfJLxdMAJqKv21\r\n-iVD8GfFDs12Hy08h7IxuA5ROVdBQS2OiU/6Vd4A3uVpzyjaxQsfAvkwz9+3jsozf\r\n-G+kWW+6Fxa3Vt4EbX+3afaBLeIyBlQvPd3pUY8irY3T6MHlglEblraxyGZ3ifvFu\r\n-Vt7S98D5+U4CMFzzGSzCCqMxTkgasTMhP8+PjXRN+mL56xyfw/uVmN9vRPqgbRUD\r\n-g95zx+CRFXgpUQ8yslpl+ECSqCe0cYxm+jWz00VFWtUZAwpE4REGOVdmNGrfNR16\r\n-h7dggpFVfeFy7qCwd9up/sWkBmkZB1zL9ENjg68EH5aEbh+jlbF6HuLv4+jibVlD\r\n-/r+ZW/vJgnMXmUYW1gDl3L//vQ/V4ElqRYzxsSVsq3dwW0SYzI31PKFEb8sqI5IN\r\n-P10MtFtZ1DgISF9I8LJ35dBDqguoonGC0/d+iq2S7ipcpFIo/u3tK/Nu0QvKMEN6\r\n-Dlx6Yhssscj2PhiADKjhRnweWUj/2eKuX8Cb6UmXvh+R4Dm0iEIGop1/r37GUo0z\r\n-nqNszrYZz1zd4GWG6puFWQIDAQABo4IBhDCCAYAwEgYDVR0TAQH/BAgwBgEB/wIB\r\n-ADAdBgNVHQ4EFgQUbYE39zhEfkdCe1al7Lt3ZyEJ9DwwHwYDVR0jBBgwFoAUYm23\r\n-kU/E6qNiYI+g0L61jwZ8aAAwDgYDVR0PAQH/BAQDAgGGMB0GA1UdJQQWMBQGCCsG\r\n-AQUFBwMBBggrBgEFBQcDAjB3BggrBgEFBQcBAQRrMGkwJAYIKwYBBQUHMAGGGGh0\r\n-dHA6Ly9vY3NwLmRpZ2ljZXJ0LmNvbTBBBggrBgEFBQcwAoY1aHR0cDovL2NhY2Vy\r\n-dHMuZGlnaWNlcnQuY29tL0RpZ2lDZXJ0UlNBNDA5NlJvb3RHNS5jcnQwQwYDVR0f\r\n-BDwwOjA4oDagNIYyaHR0cDovL2NybDMuZGlnaWNlcnQuY29tL0RpZ2lDZXJ0UlNB\r\n-NDA5NlJvb3RHNS5jcmwwPQYDVR0gBDYwNDALBglghkgBhv1sAgEwBwYFZ4EMAQEw\r\n-CAYGZ4EMAQIBMAgGBmeBDAECAjAIBgZngQwBAgMwDQYJKoZIhvcNAQEMBQADggIB\r\n-AGHJE9aY60MSsfdEfqIcrdE0c1dXxis9E1l9at6g18Jpyc1C6PsUHdmo6rJWq8Xe\r\n-NNPkD/4fKhJsrd9TRlUlpIgKiJZW1ituKHV6Ghm7DIRSyx0aMpP9NJ3heV3CIgZr\r\n-MLtJEFuG5WfolWIfu7sle2lYjA3HxA/xQo803jGOhxbEDX/BTzHo/1X7YGvwpRqJ\r\n-+7J1B+2l+TA1r9vAlLfIDQRazVYRNxHpJDOwU0ffKaEPbRrgPtogO+8hLSml9Zoe\r\n-Y8w94f31XbvBFxSbSVpX+/QctNdwx2VuIoRcT8WZ0lZ9aenna5q5AE1C8oTtbw2T\r\n-qoz4NCaM5XPgjvb0DGPBeH8jWveNo1BmClQA2qYXL55f00m8AZ4Hf6oYANt/zbuM\r\n-QPhAoSHWwW4V4Pug3XPXM70LlY50y9kPD/57eHryhO2oXQLLx+l6mg8xzL6vKsHT\r\n-E30whFM32vVTpjejLZ9hJBAJURFaUrH2TZyAmoVbCNy50yuHYQ6FooYpbsbnpYPi\r\n-KW/E9bc201rqm/GQOWJ4zOJ8a5Etn3zY+rlPaxjJvxc3pSMfgtwwrm9KGXHsI1Gf\r\n-ULMwUbXclKV2qR8d6ECtUOIRxoQKutN85lmwB05yddu6uQQg0hHeaGFUk7EU90SV\r\n-ib/FA/op9sXfS3CkOnHQISY0JbWxrzC6eHaKeQi6lR1I\r\n------END CERTIFICATE-----\r\n+-----BEGIN CERTIFICATE-----\n+MIIDSTCCAjGgAwIBAgIBAjANBgkqhkiG9w0BAQsFADAwMQswCQYDVQQGEwJVUzEQ\n+MA4GA1UECgwHVGVzdE9yZzEPMA0GA1UEAwwGUm9vdENBMB4XDTI1MDYyMzIyNTgz\n+NloXDTI4MDYyMjIyNTgzNlowODELMAkGA1UEBhMCVVMxEDAOBgNVBAoMB1Rlc3RP\n+cmcxFzAVBgNVBAMMDkludGVybWVkaWF0ZUNBMIIBIjANBgkqhkiG9w0BAQEFAAOC\n+AQ8AMIIBCgKCAQEA8eYKQYc01Fkg05bCyLIz7ql4BSyJIQr9JiszF4u3gQL1TQeA\n+kPkzjQxFAyx271M/PLMBfQPCnjST/ANG41oFIHi9QkwZZUtd2fzMN3xasE3IA0Qx\n+GLF3sEnlX3U7pn/tshnuie3XVfyjg+vOnkdQaazVY+2+sPaGSzRktVPFmS4/6a+g\n+rf6jvrmRo59/u405yLgOQs41PAcrpG/DMssPsqIdFrSbeXRM10fyo2Oo4UDM08Uv\n+OUMZK9LbbFOMKU+YDoIX4QKdeo6tf/zJAJsfyz/x42MGXyP+c6tbH1v7KNw9FGkN\n+6O88EeWUtLr+CXXa6SrlGBQfuA+kzoIhRbTcYwIDAQABo2YwZDASBgNVHRMBAf8E\n+CDAGAQH/AgEAMA4GA1UdDwEB/wQEAwIBBjAdBgNVHQ4EFgQUEBoB0gRIRNzLuuVY\n+wNtSSPeyFCIwHwYDVR0jBBgwFoAU0jIhxNIirUycwI3J5aA4zEQu2/EwDQYJKoZI\n+hvcNAQELBQADggEBAIm610QOb0ZRtxKLLoOtBgH4UqLuGqiWnVvIkc1FKVYlrtTJ\n+dh27S0KhtducOKRhYp9XINBCD2QfsFwZhAeo2BE4FffbyH2R0u79kuqojufooaPW\n+6UiKZKkzdo0g+0Hv00GrXGqS8Q1NqPoLRr33KeB72Od6xZmGNJF3NI0urP0I9jVk\n+XohMrCqD39S8XQ7CYXhLW4onRiiQHuZe6frDzJX2NdwtXqd+W0ujvKfb33mdK+C5\n+GZi8duiHK7nT6vBrsVy6R9YCPQbDWBc34dKShx01Ml+3BvJ0jGEiY5jYukYylGwJ\n+XyatDe8dz3g4KGfGnSaMhgf154VDLIuXz/U0BUE=\n+-----END CERTIFICATE-----\n+-----BEGIN RSA PRIVATE KEY-----", + "comment_created_at": "2025-06-23T23:01:44+00:00", + "comment_author": "github-advanced-security[bot]", + "comment_body": "## Cryptographic private keys should not be disclosed\n\nMake sure this private key gets revoked, changed, and removed from the code.

              See more on SonarQube Cloud

              \n\n[Show more details](https://github.com/chef/chef/security/code-scanning/287)", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2162660892", + "pr_number": 15065, + "pr_file": "spec/data/trusted_certs/opscode.pem", + "created_at": "2025-06-23T23:01:44+00:00", + "commented_code": "-----BEGIN CERTIFICATE-----\nMIIGTjCCBTagAwIBAgIQBK55YGZmkBq5xX+mbFvczTANBgkqhkiG9w0BAQsFADBl\nMQswCQYDVQQGEwJVUzEVMBMGA1UEChMMRGlnaUNlcnQgSW5jMRkwFwYDVQQLExB3\nd3cuZGlnaWNlcnQuY29tMSQwIgYDVQQDExtEaWdpQ2VydCBBc3N1cmVkIElEIFJv\nb3QgQ0EwHhcNMTMxMTA1MTIwMDAwWhcNMjgxMTA1MTIwMDAwWjBlMQswCQYDVQQG\nEwJVUzEVMBMGA1UEChMMRGlnaUNlcnQgSW5jMRkwFwYDVQQLExB3d3cuZGlnaWNl\ncnQuY29tMSQwIgYDVQQDExtEaWdpQ2VydCBTSEEyIEFzc3VyZWQgSUQgQ0EwggEi\nMA0GCSqGSIb3DQEBAQUAA4IBDwAwggEKAoIBAQDc+BEjP2q178AneRstBYeiEEMx\n3w7UFRtPd6Qizj6McPC+B47dJyq8AR22LArK3WlYH0HtagUf2mN4WR4iLCv4un7J\nNTtW8R98Qn4lsCMZxkU41z1E+SB8YK4csFoYBL6PO/ep8JSapgxjSbZBF1NAMr1P\n5lB6UB8lRejxia/N/17/UPPwFxH/vcWJ9b1iudj7jkUEhW2ZzcVITf0mqwI2Reo2\n119q4hqCQQrc6dn1kReOxiGtODwT5h5/ZpzVTdlG2vbPUqd9OyTDtMFRNcab69Tv\nfuR7A+FEvXoLN+BPy4KKDXEY5KbgiSwb87JzPMGwkp4Yfb2rfcV9CKEswp9zAgMB\nAAGjggL4MIIC9DASBgNVHRMBAf8ECDAGAQH/AgEAMA4GA1UdDwEB/wQEAwIBhjA0\nBggrBgEFBQcBAQQoMCYwJAYIKwYBBQUHMAGGGGh0dHA6Ly9vY3NwLmRpZ2ljZXJ0\nLmNvbTCBgQYDVR0fBHoweDA6oDigNoY0aHR0cDovL2NybDQuZGlnaWNlcnQuY29t\nL0RpZ2lDZXJ0QXNzdXJlZElEUm9vdENBLmNybDA6oDigNoY0aHR0cDovL2NybDMu\nZGlnaWNlcnQuY29tL0RpZ2lDZXJ0QXNzdXJlZElEUm9vdENBLmNybDAdBgNVHSUE\nFjAUBggrBgEFBQcDAgYIKwYBBQUHAwQwggGzBgNVHSAEggGqMIIBpjCCAaIGCmCG\nSAGG/WwAAgQwggGSMCgGCCsGAQUFBwIBFhxodHRwczovL3d3dy5kaWdpY2VydC5j\nb20vQ1BTMIIBZAYIKwYBBQUHAgIwggFWHoIBUgBBAG4AeQAgAHUAcwBlACAAbwBm\nACAAdABoAGkAcwAgAEMAZQByAHQAaQBmAGkAYwBhAHQAZQAgAGMAbwBuAHMAdABp\nAHQAdQB0AGUAcwAgAGEAYwBjAGUAcAB0AGEAbgBjAGUAIABvAGYAIAB0AGgAZQAg\nAEQAaQBnAGkAQwBlAHIAdAAgAEMAUAAvAEMAUABTACAAYQBuAGQAIAB0AGgAZQAg\nAFIAZQBsAHkAaQBuAGcAIABQAGEAcgB0AHkAIABBAGcAcgBlAGUAbQBlAG4AdAAg\nAHcAaABpAGMAaAAgAGwAaQBtAGkAdAAgAGwAaQBhAGIAaQBsAGkAdAB5ACAAYQBu\nAGQAIABhAHIAZQAgAGkAbgBjAG8AcgBwAG8AcgBhAHQAZQBkACAAaABlAHIAZQBp\nAG4AIABiAHkAIAByAGUAZgBlAHIAZQBuAGMAZQAuMB0GA1UdDgQWBBTnAiOAAE/Y\n17yUC9k/dDlJMjyKeTAfBgNVHSMEGDAWgBRF66Kv9JLLgjEtUYunpyGd823IDzAN\nBgkqhkiG9w0BAQsFAAOCAQEATtSJJ7n9HYd3fg8oBZDxCi/JOz69k5yQxq/6kVGH\nMlRr6MrBcVFcmY61+uBiGZmmB5p8Eyfb5QKihBLZFfYKRFfENI9tcx861qABPd7j\nguRFa7LrJf2AXh05kL5bQvbOkWDj+aBWDEgQzjNoe82Tq/Bqy09YD7l7XRsEgZ6n\nIuJXSSfukpMIvmkIUwI6Ll3IGfRQgE4C2bBdkbSTh/mWloFVQI5m7YLYuyhf7Uxh\n7QZYKBlTEUS8RyApsgRs2IlUmTt122d4LB6SeMZVPVgSETJuvUMMTTTbe8ZC2+y+\nq5thTAaS447fISpQVwTAYKI11SSeZjcJSc/V+GWz4OJuwg==\nMIIDRDCCAiygAwIBAgIBAzANBgkqhkiG9w0BAQsFADA4MQswCQYDVQQGEwJVUzEQ\nMA4GA1UECgwHVGVzdE9yZzEXMBUGA1UEAwwOSW50ZXJtZWRpYXRlQ0EwHhcNMjUw\nNjIzMjI1ODM2WhcNMjYwNjIzMjI1ODM2WjAxMQswCQYDVQQGEwJVUzEQMA4GA1UE\nCgwHVGVzdE9yZzEQMA4GA1UEAwwHb3BzY29kZTCCASIwDQYJKoZIhvcNAQEBBQAD\nggEPADCCAQoCggEBANDL1IUIf2sOksEgw3AO2PlYZ9usimr2G3OQ3wozAfBnP11x\n8Jy2KTCY3f9tmxNVcQJYDQchaXF9W0YvUnQ5N61ab1i81MxI0alf66isAkb59krs\n1XjY/SKw09362cgQgUrlPO/U8rksJK6Xcrgien9GtxkU9T9S7roRsGSaTj1kc81C\n0UMm84/6vOXEkj9y/U0ce/y2P+pe6ySJZ+4Cnf2dbvb7QUdYjg1DDFvaqo0ctqUE\n7fKi8/zVaydYJBFhp8I8/5h2Z5KG2Gj7ChEy+l6xp9nEg4hMKvNFHxmQP5WmnPsM\nGUPRkTTiIz+GtyG3gPsYOABLJaDUncVD+J3OLv8CAwEAAaNgMF4wDAYDVR0TAQH/\nBAIwADAOBgNVHQ8BAf8EBAMCB4AwHQYDVR0OBBYEFOfOw3p0XmA6bkcfO8hGdhN0\n7LaJMB8GA1UdIwQYMBaAFBAaAdIESETcy7rlWMDbUkj3shQiMA0GCSqGSIb3DQEB\nCwUAA4IBAQBn6+HqXbW0WFdTnFbCQ8+QTBPc8s0IiejoAbY61b8l6o82ajRTAX3V\nIU1/sLREuTjrrFs5ouy6NeCndPPTIulpCAHHWMeEjPk6x09bZUcKcZBDS5A2uvNL\nRZ3oc7kJhCbSZ36D1WNZsNrNG2vr1qPmxVHD6NuOQDreibjdD7+lM+XrHFuM+WYw\nL5GL/qvZJAb2VRh4MpBufqlp7yu2hycWzGL3BQOnqAFxfsQbrqTaqpOvgxO9vlcL\nTP5ZTswNOuPs3MMIRlsBW3NQU4fOQa8Aw2V4eJ6SATY2ZH1J3PD/kpAujw7aKft/\nX+0v2v5fTjyfuA8TrdnY0sqKx/icy3wr\n-----END CERTIFICATE-----\n-----BEGIN RSA PRIVATE KEY-----", + "repo_full_name": "chef/chef", + "discussion_comments": [ + { + "comment_id": "2162660892", + "repo_full_name": "chef/chef", + "pr_number": 15065, + "pr_file": "spec/data/trusted_certs/opscode.pem", + "discussion_id": "2162660892", + "commented_code": "@@ -1,36 +1,47 @@\n -----BEGIN CERTIFICATE-----\n-MIIGTjCCBTagAwIBAgIQBK55YGZmkBq5xX+mbFvczTANBgkqhkiG9w0BAQsFADBl\n-MQswCQYDVQQGEwJVUzEVMBMGA1UEChMMRGlnaUNlcnQgSW5jMRkwFwYDVQQLExB3\n-d3cuZGlnaWNlcnQuY29tMSQwIgYDVQQDExtEaWdpQ2VydCBBc3N1cmVkIElEIFJv\n-b3QgQ0EwHhcNMTMxMTA1MTIwMDAwWhcNMjgxMTA1MTIwMDAwWjBlMQswCQYDVQQG\n-EwJVUzEVMBMGA1UEChMMRGlnaUNlcnQgSW5jMRkwFwYDVQQLExB3d3cuZGlnaWNl\n-cnQuY29tMSQwIgYDVQQDExtEaWdpQ2VydCBTSEEyIEFzc3VyZWQgSUQgQ0EwggEi\n-MA0GCSqGSIb3DQEBAQUAA4IBDwAwggEKAoIBAQDc+BEjP2q178AneRstBYeiEEMx\n-3w7UFRtPd6Qizj6McPC+B47dJyq8AR22LArK3WlYH0HtagUf2mN4WR4iLCv4un7J\n-NTtW8R98Qn4lsCMZxkU41z1E+SB8YK4csFoYBL6PO/ep8JSapgxjSbZBF1NAMr1P\n-5lB6UB8lRejxia/N/17/UPPwFxH/vcWJ9b1iudj7jkUEhW2ZzcVITf0mqwI2Reo2\n-119q4hqCQQrc6dn1kReOxiGtODwT5h5/ZpzVTdlG2vbPUqd9OyTDtMFRNcab69Tv\n-fuR7A+FEvXoLN+BPy4KKDXEY5KbgiSwb87JzPMGwkp4Yfb2rfcV9CKEswp9zAgMB\n-AAGjggL4MIIC9DASBgNVHRMBAf8ECDAGAQH/AgEAMA4GA1UdDwEB/wQEAwIBhjA0\n-BggrBgEFBQcBAQQoMCYwJAYIKwYBBQUHMAGGGGh0dHA6Ly9vY3NwLmRpZ2ljZXJ0\n-LmNvbTCBgQYDVR0fBHoweDA6oDigNoY0aHR0cDovL2NybDQuZGlnaWNlcnQuY29t\n-L0RpZ2lDZXJ0QXNzdXJlZElEUm9vdENBLmNybDA6oDigNoY0aHR0cDovL2NybDMu\n-ZGlnaWNlcnQuY29tL0RpZ2lDZXJ0QXNzdXJlZElEUm9vdENBLmNybDAdBgNVHSUE\n-FjAUBggrBgEFBQcDAgYIKwYBBQUHAwQwggGzBgNVHSAEggGqMIIBpjCCAaIGCmCG\n-SAGG/WwAAgQwggGSMCgGCCsGAQUFBwIBFhxodHRwczovL3d3dy5kaWdpY2VydC5j\n-b20vQ1BTMIIBZAYIKwYBBQUHAgIwggFWHoIBUgBBAG4AeQAgAHUAcwBlACAAbwBm\n-ACAAdABoAGkAcwAgAEMAZQByAHQAaQBmAGkAYwBhAHQAZQAgAGMAbwBuAHMAdABp\n-AHQAdQB0AGUAcwAgAGEAYwBjAGUAcAB0AGEAbgBjAGUAIABvAGYAIAB0AGgAZQAg\n-AEQAaQBnAGkAQwBlAHIAdAAgAEMAUAAvAEMAUABTACAAYQBuAGQAIAB0AGgAZQAg\n-AFIAZQBsAHkAaQBuAGcAIABQAGEAcgB0AHkAIABBAGcAcgBlAGUAbQBlAG4AdAAg\n-AHcAaABpAGMAaAAgAGwAaQBtAGkAdAAgAGwAaQBhAGIAaQBsAGkAdAB5ACAAYQBu\n-AGQAIABhAHIAZQAgAGkAbgBjAG8AcgBwAG8AcgBhAHQAZQBkACAAaABlAHIAZQBp\n-AG4AIABiAHkAIAByAGUAZgBlAHIAZQBuAGMAZQAuMB0GA1UdDgQWBBTnAiOAAE/Y\n-17yUC9k/dDlJMjyKeTAfBgNVHSMEGDAWgBRF66Kv9JLLgjEtUYunpyGd823IDzAN\n-BgkqhkiG9w0BAQsFAAOCAQEATtSJJ7n9HYd3fg8oBZDxCi/JOz69k5yQxq/6kVGH\n-MlRr6MrBcVFcmY61+uBiGZmmB5p8Eyfb5QKihBLZFfYKRFfENI9tcx861qABPd7j\n-guRFa7LrJf2AXh05kL5bQvbOkWDj+aBWDEgQzjNoe82Tq/Bqy09YD7l7XRsEgZ6n\n-IuJXSSfukpMIvmkIUwI6Ll3IGfRQgE4C2bBdkbSTh/mWloFVQI5m7YLYuyhf7Uxh\n-7QZYKBlTEUS8RyApsgRs2IlUmTt122d4LB6SeMZVPVgSETJuvUMMTTTbe8ZC2+y+\n-q5thTAaS447fISpQVwTAYKI11SSeZjcJSc/V+GWz4OJuwg==\n+MIIDRDCCAiygAwIBAgIBAzANBgkqhkiG9w0BAQsFADA4MQswCQYDVQQGEwJVUzEQ\n+MA4GA1UECgwHVGVzdE9yZzEXMBUGA1UEAwwOSW50ZXJtZWRpYXRlQ0EwHhcNMjUw\n+NjIzMjI1ODM2WhcNMjYwNjIzMjI1ODM2WjAxMQswCQYDVQQGEwJVUzEQMA4GA1UE\n+CgwHVGVzdE9yZzEQMA4GA1UEAwwHb3BzY29kZTCCASIwDQYJKoZIhvcNAQEBBQAD\n+ggEPADCCAQoCggEBANDL1IUIf2sOksEgw3AO2PlYZ9usimr2G3OQ3wozAfBnP11x\n+8Jy2KTCY3f9tmxNVcQJYDQchaXF9W0YvUnQ5N61ab1i81MxI0alf66isAkb59krs\n+1XjY/SKw09362cgQgUrlPO/U8rksJK6Xcrgien9GtxkU9T9S7roRsGSaTj1kc81C\n+0UMm84/6vOXEkj9y/U0ce/y2P+pe6ySJZ+4Cnf2dbvb7QUdYjg1DDFvaqo0ctqUE\n+7fKi8/zVaydYJBFhp8I8/5h2Z5KG2Gj7ChEy+l6xp9nEg4hMKvNFHxmQP5WmnPsM\n+GUPRkTTiIz+GtyG3gPsYOABLJaDUncVD+J3OLv8CAwEAAaNgMF4wDAYDVR0TAQH/\n+BAIwADAOBgNVHQ8BAf8EBAMCB4AwHQYDVR0OBBYEFOfOw3p0XmA6bkcfO8hGdhN0\n+7LaJMB8GA1UdIwQYMBaAFBAaAdIESETcy7rlWMDbUkj3shQiMA0GCSqGSIb3DQEB\n+CwUAA4IBAQBn6+HqXbW0WFdTnFbCQ8+QTBPc8s0IiejoAbY61b8l6o82ajRTAX3V\n+IU1/sLREuTjrrFs5ouy6NeCndPPTIulpCAHHWMeEjPk6x09bZUcKcZBDS5A2uvNL\n+RZ3oc7kJhCbSZ36D1WNZsNrNG2vr1qPmxVHD6NuOQDreibjdD7+lM+XrHFuM+WYw\n+L5GL/qvZJAb2VRh4MpBufqlp7yu2hycWzGL3BQOnqAFxfsQbrqTaqpOvgxO9vlcL\n+TP5ZTswNOuPs3MMIRlsBW3NQU4fOQa8Aw2V4eJ6SATY2ZH1J3PD/kpAujw7aKft/\n+X+0v2v5fTjyfuA8TrdnY0sqKx/icy3wr\n -----END CERTIFICATE-----\n+-----BEGIN RSA PRIVATE KEY-----", + "comment_created_at": "2025-06-23T23:01:44+00:00", + "comment_author": "github-advanced-security[bot]", + "comment_body": "## Cryptographic private keys should not be disclosed\n\nMake sure this private key gets revoked, changed, and removed from the code.

              See more on SonarQube Cloud

              \n\n[Show more details](https://github.com/chef/chef/security/code-scanning/286)", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2162660894", + "pr_number": 15065, + "pr_file": "spec/data/trusted_certs/root.pem", + "created_at": "2025-06-23T23:01:44+00:00", + "commented_code": "-----BEGIN CERTIFICATE-----\nMIIDrzCCApegAwIBAgIQCDvgVpBCRrGhdWrJWZHHSjANBgkqhkiG9w0BAQUFADBh\nMQswCQYDVQQGEwJVUzEVMBMGA1UEChMMRGlnaUNlcnQgSW5jMRkwFwYDVQQLExB3\nd3cuZGlnaWNlcnQuY29tMSAwHgYDVQQDExdEaWdpQ2VydCBHbG9iYWwgUm9vdCBD\nQTAeFw0wNjExMTAwMDAwMDBaFw0zMTExMTAwMDAwMDBaMGExCzAJBgNVBAYTAlVT\nMRUwEwYDVQQKEwxEaWdpQ2VydCBJbmMxGTAXBgNVBAsTEHd3dy5kaWdpY2VydC5j\nb20xIDAeBgNVBAMTF0RpZ2lDZXJ0IEdsb2JhbCBSb290IENBMIIBIjANBgkqhkiG\n9w0BAQEFAAOCAQ8AMIIBCgKCAQEA4jvhEXLeqKTTo1eqUKKPC3eQyaKl7hLOllsB\nCSDMAZOnTjC3U/dDxGkAV53ijSLdhwZAAIEJzs4bg7/fzTtxRuLWZscFs3YnFo97\nnh6Vfe63SKMI2tavegw5BmV/Sl0fvBf4q77uKNd0f3p4mVmFaG5cIzJLv07A6Fpt\n43C/dxC//AH2hdmoRBBYMql1GNXRor5H4idq9Joz+EkIYIvUX7Q6hL+hqkpMfT7P\nT19sdl6gSzeRntwi5m3OFBqOasv+zbMUZBfHWymeMr/y7vrTC0LUq7dBMtoM1O/4\ngdW7jVg/tRvoSSiicNoxBN33shbyTApOB6jtSj1etX+jkMOvJwIDAQABo2MwYTAO\nBgNVHQ8BAf8EBAMCAYYwDwYDVR0TAQH/BAUwAwEB/zAdBgNVHQ4EFgQUA95QNVbR\nTLtm8KPiGxvDl7I90VUwHwYDVR0jBBgwFoAUA95QNVbRTLtm8KPiGxvDl7I90VUw\nDQYJKoZIhvcNAQEFBQADggEBAMucN6pIExIK+t1EnE9SsPTfrgT1eXkIoyQY/Esr\nhMAtudXH/vTBH1jLuG2cenTnmCmrEbXjcKChzUyImZOMkXDiqw8cvpOp/2PV5Adg\n06O/nVsJ8dWO41P0jmP6P6fbtGbfYmbW0W5BjfIttep3Sp+dWOIrWcBAI+0tKIJF\nPnlUkiaY4IBIqDfv8NZ5YBberOgOzW6sRBc4L0na4UU+Krk2U886UAb3LujEV0ls\nYSEY1QSteDwsOoBrp+uvFRTp2InBuThs4pFsiv9kuXclVzDAGySj4dzp30d8tbQk\nCAUw7C29C79Fv1C5qfPrmAESrciIxpg0X40KPMbp1ZWVbd4=\nMIIDPjCCAiagAwIBAgIBATANBgkqhkiG9w0BAQsFADAwMQswCQYDVQQGEwJVUzEQ\nMA4GA1UECgwHVGVzdE9yZzEPMA0GA1UEAwwGUm9vdENBMB4XDTI1MDYyMzIyNTgz\nNVoXDTMwMDYyMjIyNTgzNVowMDELMAkGA1UEBhMCVVMxEDAOBgNVBAoMB1Rlc3RP\ncmcxDzANBgNVBAMMBlJvb3RDQTCCASIwDQYJKoZIhvcNAQEBBQADggEPADCCAQoC\nggEBALHY+ebd8qHYjtGHWkqG5jk2PMCOMIxt8J+ftU3F3pU2DOHu58j6V7q7KmCW\n45m79Ca7oZ4db1p6KWmn29gpTzBRQcGMe+ZZYdlsymWFk/inoVe66UsRFzw7FLR3\nV+RY9LWw9j6mItfwvUEu43yqrZCv0+2rTxp5sLta9CaYFdS7Bhrtm5raR4MHQogw\nYCFdCPMcqdzef6EnkHA/wh6e0cY+1EsTfGJ3Dk3HZR7uCZwp7aUUqmSTIqmLzeo0\nRfmN4PcnNECDobNAHtcF/ArbXXm2h6wOX24zMsWPs87i0HVLLn4LI+cIsTCmgdvH\nkOsQFdRJCxJE8u3s5qUnFxmy1BsCAwEAAaNjMGEwDwYDVR0TAQH/BAUwAwEB/zAO\nBgNVHQ8BAf8EBAMCAQYwHQYDVR0OBBYEFNIyIcTSIq1MnMCNyeWgOMxELtvxMB8G\nA1UdIwQYMBaAFNIyIcTSIq1MnMCNyeWgOMxELtvxMA0GCSqGSIb3DQEBCwUAA4IB\nAQAl9Jq+HvXWbnBbbHfZYtbMRsIrERXaZ3ACHWxh/fGS1K/adfvRFRt1jhDUgWm9\ntnW85hvPSaJTCIl2HHka6zX+OU8DvNLdL4rxJcy3h4JYyoUspzCHYSs3/B+aHJdi\nsygCd84YUBhVaco4KoX8asx1VeaZKSIMv+swxLPlKGv0ySxJUMV2NwPAwjG+tVFu\nBaNNl3aGx3OhRN2+6rNOxy5HM0QmCTIDgDESf6DsuRCQfV2z2eIYHHVbf2wtdtuD\nn8ioMqMiekFPCh6xsf4EBoAPheSkDo2uHrVSX6Xi8nZZjKKhFtEcDmeOHr1iwICt\nR58gaSLhCEl7nXgcqUmb3caA\n-----END CERTIFICATE-----\n-----BEGIN RSA PRIVATE KEY-----", + "repo_full_name": "chef/chef", + "discussion_comments": [ + { + "comment_id": "2162660894", + "repo_full_name": "chef/chef", + "pr_number": 15065, + "pr_file": "spec/data/trusted_certs/root.pem", + "discussion_id": "2162660894", + "commented_code": "@@ -1,22 +1,47 @@\n -----BEGIN CERTIFICATE-----\n-MIIDrzCCApegAwIBAgIQCDvgVpBCRrGhdWrJWZHHSjANBgkqhkiG9w0BAQUFADBh\n-MQswCQYDVQQGEwJVUzEVMBMGA1UEChMMRGlnaUNlcnQgSW5jMRkwFwYDVQQLExB3\n-d3cuZGlnaWNlcnQuY29tMSAwHgYDVQQDExdEaWdpQ2VydCBHbG9iYWwgUm9vdCBD\n-QTAeFw0wNjExMTAwMDAwMDBaFw0zMTExMTAwMDAwMDBaMGExCzAJBgNVBAYTAlVT\n-MRUwEwYDVQQKEwxEaWdpQ2VydCBJbmMxGTAXBgNVBAsTEHd3dy5kaWdpY2VydC5j\n-b20xIDAeBgNVBAMTF0RpZ2lDZXJ0IEdsb2JhbCBSb290IENBMIIBIjANBgkqhkiG\n-9w0BAQEFAAOCAQ8AMIIBCgKCAQEA4jvhEXLeqKTTo1eqUKKPC3eQyaKl7hLOllsB\n-CSDMAZOnTjC3U/dDxGkAV53ijSLdhwZAAIEJzs4bg7/fzTtxRuLWZscFs3YnFo97\n-nh6Vfe63SKMI2tavegw5BmV/Sl0fvBf4q77uKNd0f3p4mVmFaG5cIzJLv07A6Fpt\n-43C/dxC//AH2hdmoRBBYMql1GNXRor5H4idq9Joz+EkIYIvUX7Q6hL+hqkpMfT7P\n-T19sdl6gSzeRntwi5m3OFBqOasv+zbMUZBfHWymeMr/y7vrTC0LUq7dBMtoM1O/4\n-gdW7jVg/tRvoSSiicNoxBN33shbyTApOB6jtSj1etX+jkMOvJwIDAQABo2MwYTAO\n-BgNVHQ8BAf8EBAMCAYYwDwYDVR0TAQH/BAUwAwEB/zAdBgNVHQ4EFgQUA95QNVbR\n-TLtm8KPiGxvDl7I90VUwHwYDVR0jBBgwFoAUA95QNVbRTLtm8KPiGxvDl7I90VUw\n-DQYJKoZIhvcNAQEFBQADggEBAMucN6pIExIK+t1EnE9SsPTfrgT1eXkIoyQY/Esr\n-hMAtudXH/vTBH1jLuG2cenTnmCmrEbXjcKChzUyImZOMkXDiqw8cvpOp/2PV5Adg\n-06O/nVsJ8dWO41P0jmP6P6fbtGbfYmbW0W5BjfIttep3Sp+dWOIrWcBAI+0tKIJF\n-PnlUkiaY4IBIqDfv8NZ5YBberOgOzW6sRBc4L0na4UU+Krk2U886UAb3LujEV0ls\n-YSEY1QSteDwsOoBrp+uvFRTp2InBuThs4pFsiv9kuXclVzDAGySj4dzp30d8tbQk\n-CAUw7C29C79Fv1C5qfPrmAESrciIxpg0X40KPMbp1ZWVbd4=\n+MIIDPjCCAiagAwIBAgIBATANBgkqhkiG9w0BAQsFADAwMQswCQYDVQQGEwJVUzEQ\n+MA4GA1UECgwHVGVzdE9yZzEPMA0GA1UEAwwGUm9vdENBMB4XDTI1MDYyMzIyNTgz\n+NVoXDTMwMDYyMjIyNTgzNVowMDELMAkGA1UEBhMCVVMxEDAOBgNVBAoMB1Rlc3RP\n+cmcxDzANBgNVBAMMBlJvb3RDQTCCASIwDQYJKoZIhvcNAQEBBQADggEPADCCAQoC\n+ggEBALHY+ebd8qHYjtGHWkqG5jk2PMCOMIxt8J+ftU3F3pU2DOHu58j6V7q7KmCW\n+45m79Ca7oZ4db1p6KWmn29gpTzBRQcGMe+ZZYdlsymWFk/inoVe66UsRFzw7FLR3\n+V+RY9LWw9j6mItfwvUEu43yqrZCv0+2rTxp5sLta9CaYFdS7Bhrtm5raR4MHQogw\n+YCFdCPMcqdzef6EnkHA/wh6e0cY+1EsTfGJ3Dk3HZR7uCZwp7aUUqmSTIqmLzeo0\n+RfmN4PcnNECDobNAHtcF/ArbXXm2h6wOX24zMsWPs87i0HVLLn4LI+cIsTCmgdvH\n+kOsQFdRJCxJE8u3s5qUnFxmy1BsCAwEAAaNjMGEwDwYDVR0TAQH/BAUwAwEB/zAO\n+BgNVHQ8BAf8EBAMCAQYwHQYDVR0OBBYEFNIyIcTSIq1MnMCNyeWgOMxELtvxMB8G\n+A1UdIwQYMBaAFNIyIcTSIq1MnMCNyeWgOMxELtvxMA0GCSqGSIb3DQEBCwUAA4IB\n+AQAl9Jq+HvXWbnBbbHfZYtbMRsIrERXaZ3ACHWxh/fGS1K/adfvRFRt1jhDUgWm9\n+tnW85hvPSaJTCIl2HHka6zX+OU8DvNLdL4rxJcy3h4JYyoUspzCHYSs3/B+aHJdi\n+sygCd84YUBhVaco4KoX8asx1VeaZKSIMv+swxLPlKGv0ySxJUMV2NwPAwjG+tVFu\n+BaNNl3aGx3OhRN2+6rNOxy5HM0QmCTIDgDESf6DsuRCQfV2z2eIYHHVbf2wtdtuD\n+n8ioMqMiekFPCh6xsf4EBoAPheSkDo2uHrVSX6Xi8nZZjKKhFtEcDmeOHr1iwICt\n+R58gaSLhCEl7nXgcqUmb3caA\n -----END CERTIFICATE-----\n+-----BEGIN RSA PRIVATE KEY-----", + "comment_created_at": "2025-06-23T23:01:44+00:00", + "comment_author": "github-advanced-security[bot]", + "comment_body": "## Cryptographic private keys should not be disclosed\n\nMake sure this private key gets revoked, changed, and removed from the code.

              See more on SonarQube Cloud

              \n\n[Show more details](https://github.com/chef/chef/security/code-scanning/288)", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/chef-never-commit-secrets.md b/_reviewers/chef-never-commit-secrets.md index c72ad83..238ce3e 100644 --- a/_reviewers/chef-never-commit-secrets.md +++ b/_reviewers/chef-never-commit-secrets.md @@ -37,139 +37,3 @@ Security best practices for handling secrets: - Include secret files in .gitignore to prevent accidental commits Properly managing cryptographic material ensures your systems remain secure and prevents costly security incidents resulting from leaked credentials. - - -[ - { - "discussion_id": "2162467475", - "pr_number": 15058, - "pr_file": "spec/data/trusted_certs/intermediate.pem", - "created_at": "2025-06-23T20:24:41+00:00", - "commented_code": "-----BEGIN CERTIFICATE-----\r\nMIIGrTCCBJWgAwIBAgIQDo0oQK5IJZBWGLOoqeF6RzANBgkqhkiG9w0BAQwFADBJ\r\nMQswCQYDVQQGEwJVUzEXMBUGA1UEChMORGlnaUNlcnQsIEluYy4xITAfBgNVBAMT\r\nGERpZ2lDZXJ0IFJTQTQwOTYgUm9vdCBHNTAeFw0yMTA0MTQwMDAwMDBaFw0zMTA0\r\nMTMyMzU5NTlaMFQxCzAJBgNVBAYTAlVTMRcwFQYDVQQKEw5EaWdpQ2VydCwgSW5j\r\nLjEsMCoGA1UEAxMjRGlnaUNlcnQgRzUgUlNBNDA5NiBTSEEzODQgMjAyMSBDQTEw\r\nggIiMA0GCSqGSIb3DQEBAQUAA4ICDwAwggIKAoICAQDCwLlUmeGwUTj93uzejg2I\r\ntHjaSqm+knZ8az09cBAZFLFU9sKDzBHgf43/GpIWIHGLDUGXXZkKtkjJhl6POqda\r\nXWt/4avSsQgkELz2uefSxhzELBl4o1U50EULTlri3zUBQ11Jr/hfJLxdMAJqKv21\r\niVD8GfFDs12Hy08h7IxuA5ROVdBQS2OiU/6Vd4A3uVpzyjaxQsfAvkwz9+3jsozf\r\nG+kWW+6Fxa3Vt4EbX+3afaBLeIyBlQvPd3pUY8irY3T6MHlglEblraxyGZ3ifvFu\r\nVt7S98D5+U4CMFzzGSzCCqMxTkgasTMhP8+PjXRN+mL56xyfw/uVmN9vRPqgbRUD\r\ng95zx+CRFXgpUQ8yslpl+ECSqCe0cYxm+jWz00VFWtUZAwpE4REGOVdmNGrfNR16\r\nh7dggpFVfeFy7qCwd9up/sWkBmkZB1zL9ENjg68EH5aEbh+jlbF6HuLv4+jibVlD\r\n/r+ZW/vJgnMXmUYW1gDl3L//vQ/V4ElqRYzxsSVsq3dwW0SYzI31PKFEb8sqI5IN\r\nP10MtFtZ1DgISF9I8LJ35dBDqguoonGC0/d+iq2S7ipcpFIo/u3tK/Nu0QvKMEN6\r\nDlx6Yhssscj2PhiADKjhRnweWUj/2eKuX8Cb6UmXvh+R4Dm0iEIGop1/r37GUo0z\r\nnqNszrYZz1zd4GWG6puFWQIDAQABo4IBhDCCAYAwEgYDVR0TAQH/BAgwBgEB/wIB\r\nADAdBgNVHQ4EFgQUbYE39zhEfkdCe1al7Lt3ZyEJ9DwwHwYDVR0jBBgwFoAUYm23\r\nkU/E6qNiYI+g0L61jwZ8aAAwDgYDVR0PAQH/BAQDAgGGMB0GA1UdJQQWMBQGCCsG\r\nAQUFBwMBBggrBgEFBQcDAjB3BggrBgEFBQcBAQRrMGkwJAYIKwYBBQUHMAGGGGh0\r\ndHA6Ly9vY3NwLmRpZ2ljZXJ0LmNvbTBBBggrBgEFBQcwAoY1aHR0cDovL2NhY2Vy\r\ndHMuZGlnaWNlcnQuY29tL0RpZ2lDZXJ0UlNBNDA5NlJvb3RHNS5jcnQwQwYDVR0f\r\nBDwwOjA4oDagNIYyaHR0cDovL2NybDMuZGlnaWNlcnQuY29tL0RpZ2lDZXJ0UlNB\r\nNDA5NlJvb3RHNS5jcmwwPQYDVR0gBDYwNDALBglghkgBhv1sAgEwBwYFZ4EMAQEw\r\nCAYGZ4EMAQIBMAgGBmeBDAECAjAIBgZngQwBAgMwDQYJKoZIhvcNAQEMBQADggIB\r\nAGHJE9aY60MSsfdEfqIcrdE0c1dXxis9E1l9at6g18Jpyc1C6PsUHdmo6rJWq8Xe\r\nNNPkD/4fKhJsrd9TRlUlpIgKiJZW1ituKHV6Ghm7DIRSyx0aMpP9NJ3heV3CIgZr\r\nMLtJEFuG5WfolWIfu7sle2lYjA3HxA/xQo803jGOhxbEDX/BTzHo/1X7YGvwpRqJ\r\n+7J1B+2l+TA1r9vAlLfIDQRazVYRNxHpJDOwU0ffKaEPbRrgPtogO+8hLSml9Zoe\r\nY8w94f31XbvBFxSbSVpX+/QctNdwx2VuIoRcT8WZ0lZ9aenna5q5AE1C8oTtbw2T\r\nqoz4NCaM5XPgjvb0DGPBeH8jWveNo1BmClQA2qYXL55f00m8AZ4Hf6oYANt/zbuM\r\nQPhAoSHWwW4V4Pug3XPXM70LlY50y9kPD/57eHryhO2oXQLLx+l6mg8xzL6vKsHT\r\nE30whFM32vVTpjejLZ9hJBAJURFaUrH2TZyAmoVbCNy50yuHYQ6FooYpbsbnpYPi\r\nKW/E9bc201rqm/GQOWJ4zOJ8a5Etn3zY+rlPaxjJvxc3pSMfgtwwrm9KGXHsI1Gf\r\nULMwUbXclKV2qR8d6ECtUOIRxoQKutN85lmwB05yddu6uQQg0hHeaGFUk7EU90SV\r\nib/FA/op9sXfS3CkOnHQISY0JbWxrzC6eHaKeQi6lR1I\r\n-----END CERTIFICATE-----\r\n-----BEGIN CERTIFICATE-----\nMIIDSTCCAjGgAwIBAgIBAjANBgkqhkiG9w0BAQsFADAwMQswCQYDVQQGEwJVUzEQ\nMA4GA1UECgwHVGVzdE9yZzEPMA0GA1UEAwwGUm9vdENBMB4XDTI1MDYyMzIwMjA1\nMFoXDTI4MDYyMjIwMjA1MFowODELMAkGA1UEBhMCVVMxEDAOBgNVBAoMB1Rlc3RP\ncmcxFzAVBgNVBAMMDkludGVybWVkaWF0ZUNBMIIBIjANBgkqhkiG9w0BAQEFAAOC\nAQ8AMIIBCgKCAQEAws9YXnQv2XblYOQsOTV8n9mGZ//DulhKpU0T0/dNfEbXd/uN\ns2VbJg1xcNWJjSzdNe8Y6dqdnNoCpOKsNGjkkJafhGNdgGF1+Y8vc0VkZDozxGAc\nBvK/s2OOzDoInS3+PSo9TBwS5SPwMSjdPhKkgTsLw0N0n6CrG26+bK4shJuJyTEG\nqOd/cw777fV8FOkh/9pHiXzX+eezVG/vn7O3NCm+WtF/flUuVtjvOmXAI/siNI4S\nc5DPawny+SAoM4ebOOfFPbkdHx1fauLr9aHeevjWtI/9tpwjrrOzMUeau9BNWioh\nnW4PDtsTswUUliiWMQYP8/txbzee7lAKTxTRuQIDAQABo2YwZDASBgNVHRMBAf8E\nCDAGAQH/AgEAMA4GA1UdDwEB/wQEAwIBBjAdBgNVHQ4EFgQUr6tNSHRY9NQ8u25m\nqku4hNl+uJ4wHwYDVR0jBBgwFoAUExSy9vgkJDrak4LmMpoJeclgms0wDQYJKoZI\nhvcNAQELBQADggEBADFO7Gah2Ca+lACLVeXtVqHFMiJ9qvs6rBGcNdUghKyFuogQ\n1knPFU946KvwDHVKC/+xCiVgdxnPG6AucSXnpDRrNN330FPEhCgJ83sYhRMzikCY\nx4KD8MG0wFLnDOnv9TJd10meLrsyUkGjHRYQzstqk77lIrnQW4OrXhTcOmhv++8B\n5ZqHryfzWL44enx5UkbXSci3zPhf2FGQzSGECUPilgskC3O53mRokV3RbJZvpXZF\n2EVYZrqFCswmJ+UZkYHuyyUNyTJ7RsOzeEQ7m5TatmpBYuLOisw67pUH6d6zQAI6\noFpGWPcsZrt6JnrjAWM8MlbFLTh98WhooGEgUws=\n-----END CERTIFICATE-----\n-----BEGIN RSA PRIVATE KEY-----", - "repo_full_name": "chef/chef", - "discussion_comments": [ - { - "comment_id": "2162467475", - "repo_full_name": "chef/chef", - "pr_number": 15058, - "pr_file": "spec/data/trusted_certs/intermediate.pem", - "discussion_id": "2162467475", - "commented_code": "@@ -1,38 +1,47 @@\n------BEGIN CERTIFICATE-----\r\n-MIIGrTCCBJWgAwIBAgIQDo0oQK5IJZBWGLOoqeF6RzANBgkqhkiG9w0BAQwFADBJ\r\n-MQswCQYDVQQGEwJVUzEXMBUGA1UEChMORGlnaUNlcnQsIEluYy4xITAfBgNVBAMT\r\n-GERpZ2lDZXJ0IFJTQTQwOTYgUm9vdCBHNTAeFw0yMTA0MTQwMDAwMDBaFw0zMTA0\r\n-MTMyMzU5NTlaMFQxCzAJBgNVBAYTAlVTMRcwFQYDVQQKEw5EaWdpQ2VydCwgSW5j\r\n-LjEsMCoGA1UEAxMjRGlnaUNlcnQgRzUgUlNBNDA5NiBTSEEzODQgMjAyMSBDQTEw\r\n-ggIiMA0GCSqGSIb3DQEBAQUAA4ICDwAwggIKAoICAQDCwLlUmeGwUTj93uzejg2I\r\n-tHjaSqm+knZ8az09cBAZFLFU9sKDzBHgf43/GpIWIHGLDUGXXZkKtkjJhl6POqda\r\n-XWt/4avSsQgkELz2uefSxhzELBl4o1U50EULTlri3zUBQ11Jr/hfJLxdMAJqKv21\r\n-iVD8GfFDs12Hy08h7IxuA5ROVdBQS2OiU/6Vd4A3uVpzyjaxQsfAvkwz9+3jsozf\r\n-G+kWW+6Fxa3Vt4EbX+3afaBLeIyBlQvPd3pUY8irY3T6MHlglEblraxyGZ3ifvFu\r\n-Vt7S98D5+U4CMFzzGSzCCqMxTkgasTMhP8+PjXRN+mL56xyfw/uVmN9vRPqgbRUD\r\n-g95zx+CRFXgpUQ8yslpl+ECSqCe0cYxm+jWz00VFWtUZAwpE4REGOVdmNGrfNR16\r\n-h7dggpFVfeFy7qCwd9up/sWkBmkZB1zL9ENjg68EH5aEbh+jlbF6HuLv4+jibVlD\r\n-/r+ZW/vJgnMXmUYW1gDl3L//vQ/V4ElqRYzxsSVsq3dwW0SYzI31PKFEb8sqI5IN\r\n-P10MtFtZ1DgISF9I8LJ35dBDqguoonGC0/d+iq2S7ipcpFIo/u3tK/Nu0QvKMEN6\r\n-Dlx6Yhssscj2PhiADKjhRnweWUj/2eKuX8Cb6UmXvh+R4Dm0iEIGop1/r37GUo0z\r\n-nqNszrYZz1zd4GWG6puFWQIDAQABo4IBhDCCAYAwEgYDVR0TAQH/BAgwBgEB/wIB\r\n-ADAdBgNVHQ4EFgQUbYE39zhEfkdCe1al7Lt3ZyEJ9DwwHwYDVR0jBBgwFoAUYm23\r\n-kU/E6qNiYI+g0L61jwZ8aAAwDgYDVR0PAQH/BAQDAgGGMB0GA1UdJQQWMBQGCCsG\r\n-AQUFBwMBBggrBgEFBQcDAjB3BggrBgEFBQcBAQRrMGkwJAYIKwYBBQUHMAGGGGh0\r\n-dHA6Ly9vY3NwLmRpZ2ljZXJ0LmNvbTBBBggrBgEFBQcwAoY1aHR0cDovL2NhY2Vy\r\n-dHMuZGlnaWNlcnQuY29tL0RpZ2lDZXJ0UlNBNDA5NlJvb3RHNS5jcnQwQwYDVR0f\r\n-BDwwOjA4oDagNIYyaHR0cDovL2NybDMuZGlnaWNlcnQuY29tL0RpZ2lDZXJ0UlNB\r\n-NDA5NlJvb3RHNS5jcmwwPQYDVR0gBDYwNDALBglghkgBhv1sAgEwBwYFZ4EMAQEw\r\n-CAYGZ4EMAQIBMAgGBmeBDAECAjAIBgZngQwBAgMwDQYJKoZIhvcNAQEMBQADggIB\r\n-AGHJE9aY60MSsfdEfqIcrdE0c1dXxis9E1l9at6g18Jpyc1C6PsUHdmo6rJWq8Xe\r\n-NNPkD/4fKhJsrd9TRlUlpIgKiJZW1ituKHV6Ghm7DIRSyx0aMpP9NJ3heV3CIgZr\r\n-MLtJEFuG5WfolWIfu7sle2lYjA3HxA/xQo803jGOhxbEDX/BTzHo/1X7YGvwpRqJ\r\n-+7J1B+2l+TA1r9vAlLfIDQRazVYRNxHpJDOwU0ffKaEPbRrgPtogO+8hLSml9Zoe\r\n-Y8w94f31XbvBFxSbSVpX+/QctNdwx2VuIoRcT8WZ0lZ9aenna5q5AE1C8oTtbw2T\r\n-qoz4NCaM5XPgjvb0DGPBeH8jWveNo1BmClQA2qYXL55f00m8AZ4Hf6oYANt/zbuM\r\n-QPhAoSHWwW4V4Pug3XPXM70LlY50y9kPD/57eHryhO2oXQLLx+l6mg8xzL6vKsHT\r\n-E30whFM32vVTpjejLZ9hJBAJURFaUrH2TZyAmoVbCNy50yuHYQ6FooYpbsbnpYPi\r\n-KW/E9bc201rqm/GQOWJ4zOJ8a5Etn3zY+rlPaxjJvxc3pSMfgtwwrm9KGXHsI1Gf\r\n-ULMwUbXclKV2qR8d6ECtUOIRxoQKutN85lmwB05yddu6uQQg0hHeaGFUk7EU90SV\r\n-ib/FA/op9sXfS3CkOnHQISY0JbWxrzC6eHaKeQi6lR1I\r\n------END CERTIFICATE-----\r\n+-----BEGIN CERTIFICATE-----\n+MIIDSTCCAjGgAwIBAgIBAjANBgkqhkiG9w0BAQsFADAwMQswCQYDVQQGEwJVUzEQ\n+MA4GA1UECgwHVGVzdE9yZzEPMA0GA1UEAwwGUm9vdENBMB4XDTI1MDYyMzIwMjA1\n+MFoXDTI4MDYyMjIwMjA1MFowODELMAkGA1UEBhMCVVMxEDAOBgNVBAoMB1Rlc3RP\n+cmcxFzAVBgNVBAMMDkludGVybWVkaWF0ZUNBMIIBIjANBgkqhkiG9w0BAQEFAAOC\n+AQ8AMIIBCgKCAQEAws9YXnQv2XblYOQsOTV8n9mGZ//DulhKpU0T0/dNfEbXd/uN\n+s2VbJg1xcNWJjSzdNe8Y6dqdnNoCpOKsNGjkkJafhGNdgGF1+Y8vc0VkZDozxGAc\n+BvK/s2OOzDoInS3+PSo9TBwS5SPwMSjdPhKkgTsLw0N0n6CrG26+bK4shJuJyTEG\n+qOd/cw777fV8FOkh/9pHiXzX+eezVG/vn7O3NCm+WtF/flUuVtjvOmXAI/siNI4S\n+c5DPawny+SAoM4ebOOfFPbkdHx1fauLr9aHeevjWtI/9tpwjrrOzMUeau9BNWioh\n+nW4PDtsTswUUliiWMQYP8/txbzee7lAKTxTRuQIDAQABo2YwZDASBgNVHRMBAf8E\n+CDAGAQH/AgEAMA4GA1UdDwEB/wQEAwIBBjAdBgNVHQ4EFgQUr6tNSHRY9NQ8u25m\n+qku4hNl+uJ4wHwYDVR0jBBgwFoAUExSy9vgkJDrak4LmMpoJeclgms0wDQYJKoZI\n+hvcNAQELBQADggEBADFO7Gah2Ca+lACLVeXtVqHFMiJ9qvs6rBGcNdUghKyFuogQ\n+1knPFU946KvwDHVKC/+xCiVgdxnPG6AucSXnpDRrNN330FPEhCgJ83sYhRMzikCY\n+x4KD8MG0wFLnDOnv9TJd10meLrsyUkGjHRYQzstqk77lIrnQW4OrXhTcOmhv++8B\n+5ZqHryfzWL44enx5UkbXSci3zPhf2FGQzSGECUPilgskC3O53mRokV3RbJZvpXZF\n+2EVYZrqFCswmJ+UZkYHuyyUNyTJ7RsOzeEQ7m5TatmpBYuLOisw67pUH6d6zQAI6\n+oFpGWPcsZrt6JnrjAWM8MlbFLTh98WhooGEgUws=\n+-----END CERTIFICATE-----\n+-----BEGIN RSA PRIVATE KEY-----", - "comment_created_at": "2025-06-23T20:24:41+00:00", - "comment_author": "github-advanced-security[bot]", - "comment_body": "## Cryptographic private keys should not be disclosed\n\nMake sure this private key gets revoked, changed, and removed from the code.

              See more on SonarQube Cloud

              \n\n[Show more details](https://github.com/chef/chef/security/code-scanning/284)", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2162467478", - "pr_number": 15058, - "pr_file": "spec/data/trusted_certs/opscode.pem", - "created_at": "2025-06-23T20:24:41+00:00", - "commented_code": "-----BEGIN CERTIFICATE-----\nMIIGTjCCBTagAwIBAgIQBK55YGZmkBq5xX+mbFvczTANBgkqhkiG9w0BAQsFADBl\nMQswCQYDVQQGEwJVUzEVMBMGA1UEChMMRGlnaUNlcnQgSW5jMRkwFwYDVQQLExB3\nd3cuZGlnaWNlcnQuY29tMSQwIgYDVQQDExtEaWdpQ2VydCBBc3N1cmVkIElEIFJv\nb3QgQ0EwHhcNMTMxMTA1MTIwMDAwWhcNMjgxMTA1MTIwMDAwWjBlMQswCQYDVQQG\nEwJVUzEVMBMGA1UEChMMRGlnaUNlcnQgSW5jMRkwFwYDVQQLExB3d3cuZGlnaWNl\ncnQuY29tMSQwIgYDVQQDExtEaWdpQ2VydCBTSEEyIEFzc3VyZWQgSUQgQ0EwggEi\nMA0GCSqGSIb3DQEBAQUAA4IBDwAwggEKAoIBAQDc+BEjP2q178AneRstBYeiEEMx\n3w7UFRtPd6Qizj6McPC+B47dJyq8AR22LArK3WlYH0HtagUf2mN4WR4iLCv4un7J\nNTtW8R98Qn4lsCMZxkU41z1E+SB8YK4csFoYBL6PO/ep8JSapgxjSbZBF1NAMr1P\n5lB6UB8lRejxia/N/17/UPPwFxH/vcWJ9b1iudj7jkUEhW2ZzcVITf0mqwI2Reo2\n119q4hqCQQrc6dn1kReOxiGtODwT5h5/ZpzVTdlG2vbPUqd9OyTDtMFRNcab69Tv\nfuR7A+FEvXoLN+BPy4KKDXEY5KbgiSwb87JzPMGwkp4Yfb2rfcV9CKEswp9zAgMB\nAAGjggL4MIIC9DASBgNVHRMBAf8ECDAGAQH/AgEAMA4GA1UdDwEB/wQEAwIBhjA0\nBggrBgEFBQcBAQQoMCYwJAYIKwYBBQUHMAGGGGh0dHA6Ly9vY3NwLmRpZ2ljZXJ0\nLmNvbTCBgQYDVR0fBHoweDA6oDigNoY0aHR0cDovL2NybDQuZGlnaWNlcnQuY29t\nL0RpZ2lDZXJ0QXNzdXJlZElEUm9vdENBLmNybDA6oDigNoY0aHR0cDovL2NybDMu\nZGlnaWNlcnQuY29tL0RpZ2lDZXJ0QXNzdXJlZElEUm9vdENBLmNybDAdBgNVHSUE\nFjAUBggrBgEFBQcDAgYIKwYBBQUHAwQwggGzBgNVHSAEggGqMIIBpjCCAaIGCmCG\nSAGG/WwAAgQwggGSMCgGCCsGAQUFBwIBFhxodHRwczovL3d3dy5kaWdpY2VydC5j\nb20vQ1BTMIIBZAYIKwYBBQUHAgIwggFWHoIBUgBBAG4AeQAgAHUAcwBlACAAbwBm\nACAAdABoAGkAcwAgAEMAZQByAHQAaQBmAGkAYwBhAHQAZQAgAGMAbwBuAHMAdABp\nAHQAdQB0AGUAcwAgAGEAYwBjAGUAcAB0AGEAbgBjAGUAIABvAGYAIAB0AGgAZQAg\nAEQAaQBnAGkAQwBlAHIAdAAgAEMAUAAvAEMAUABTACAAYQBuAGQAIAB0AGgAZQAg\nAFIAZQBsAHkAaQBuAGcAIABQAGEAcgB0AHkAIABBAGcAcgBlAGUAbQBlAG4AdAAg\nAHcAaABpAGMAaAAgAGwAaQBtAGkAdAAgAGwAaQBhAGIAaQBsAGkAdAB5ACAAYQBu\nAGQAIABhAHIAZQAgAGkAbgBjAG8AcgBwAG8AcgBhAHQAZQBkACAAaABlAHIAZQBp\nAG4AIABiAHkAIAByAGUAZgBlAHIAZQBuAGMAZQAuMB0GA1UdDgQWBBTnAiOAAE/Y\n17yUC9k/dDlJMjyKeTAfBgNVHSMEGDAWgBRF66Kv9JLLgjEtUYunpyGd823IDzAN\nBgkqhkiG9w0BAQsFAAOCAQEATtSJJ7n9HYd3fg8oBZDxCi/JOz69k5yQxq/6kVGH\nMlRr6MrBcVFcmY61+uBiGZmmB5p8Eyfb5QKihBLZFfYKRFfENI9tcx861qABPd7j\nguRFa7LrJf2AXh05kL5bQvbOkWDj+aBWDEgQzjNoe82Tq/Bqy09YD7l7XRsEgZ6n\nIuJXSSfukpMIvmkIUwI6Ll3IGfRQgE4C2bBdkbSTh/mWloFVQI5m7YLYuyhf7Uxh\n7QZYKBlTEUS8RyApsgRs2IlUmTt122d4LB6SeMZVPVgSETJuvUMMTTTbe8ZC2+y+\nq5thTAaS447fISpQVwTAYKI11SSeZjcJSc/V+GWz4OJuwg==\nMIIDRDCCAiygAwIBAgIBAzANBgkqhkiG9w0BAQsFADA4MQswCQYDVQQGEwJVUzEQ\nMA4GA1UECgwHVGVzdE9yZzEXMBUGA1UEAwwOSW50ZXJtZWRpYXRlQ0EwHhcNMjUw\nNjIzMjAyMDUwWhcNMjYwNjIzMjAyMDUwWjAxMQswCQYDVQQGEwJVUzEQMA4GA1UE\nCgwHVGVzdE9yZzEQMA4GA1UEAwwHb3BzY29kZTCCASIwDQYJKoZIhvcNAQEBBQAD\nggEPADCCAQoCggEBAM7L6OKflUY72ui8LiGYHQ2/7ZQtPVb8LarLaE+RCNGuFKxx\nN6eN+aEjXeMulvQOuOXuMMxU/PI2xN+H2kXbUi2dE2tfMH3w7MqmY0b2IPW4Xsvr\nQKc0iZDEsRwy8AJJfEaSuYkheRXJ1t2ijS9qCUzgoEhegSWbULp0jVER74Lkk3RN\nDhuare8SMIfSjFZQy9yugg02SgTSpciA4TDUNGxyv9O+111bCRfbQjLDz1WsAk6P\nMe5ehP6B6/6GumE47SsSt3QgXOuDS6wuI6yR3YBYTDHglURax5GB1jxMGFRXk/q0\nPueDiWyhYJEHySsgX1GI6kuzpm9cVjzQkvqs49MCAwEAAaNgMF4wDAYDVR0TAQH/\nBAIwADAOBgNVHQ8BAf8EBAMCB4AwHQYDVR0OBBYEFJ/+n6xxe6FQ682tbP1EWVcE\n2rXyMB8GA1UdIwQYMBaAFK+rTUh0WPTUPLtuZqpLuITZfrieMA0GCSqGSIb3DQEB\nCwUAA4IBAQBqgcMrGofwE4KPTcXLsYKd/xlOoTZDzjCI4wdlA1bLtIb46hbWCUJR\ncCmFuMHocSsPL99Fw03xsoToI8g2JlkrIRXtuWXwiDXnDdmgBwZVqIcgTxg0a0Yn\n6EpL8vxYJT1AZc0Os74qUCiJ+kHzlXkN+6SjilPnOIfuCtOEL+aHW6DXk7lkSF6d\nfMRJZR2l7HJkCy3IqZGxkM4hgbKd/VOpF++UF/YgkN3f+QtsPbrDAWJHZD96yOjM\nbvDssC1rAS1HbjztAaFN2Qsfv/BWb4oZfT5V9v3hWKQUk2VaSW1fqhPptM4dnbLa\n7ofGN9p5Wn4PQAH8CGGExHm8xSnh2Hga\n-----END CERTIFICATE-----\n-----BEGIN RSA PRIVATE KEY-----", - "repo_full_name": "chef/chef", - "discussion_comments": [ - { - "comment_id": "2162467478", - "repo_full_name": "chef/chef", - "pr_number": 15058, - "pr_file": "spec/data/trusted_certs/opscode.pem", - "discussion_id": "2162467478", - "commented_code": "@@ -1,36 +1,47 @@\n -----BEGIN CERTIFICATE-----\n-MIIGTjCCBTagAwIBAgIQBK55YGZmkBq5xX+mbFvczTANBgkqhkiG9w0BAQsFADBl\n-MQswCQYDVQQGEwJVUzEVMBMGA1UEChMMRGlnaUNlcnQgSW5jMRkwFwYDVQQLExB3\n-d3cuZGlnaWNlcnQuY29tMSQwIgYDVQQDExtEaWdpQ2VydCBBc3N1cmVkIElEIFJv\n-b3QgQ0EwHhcNMTMxMTA1MTIwMDAwWhcNMjgxMTA1MTIwMDAwWjBlMQswCQYDVQQG\n-EwJVUzEVMBMGA1UEChMMRGlnaUNlcnQgSW5jMRkwFwYDVQQLExB3d3cuZGlnaWNl\n-cnQuY29tMSQwIgYDVQQDExtEaWdpQ2VydCBTSEEyIEFzc3VyZWQgSUQgQ0EwggEi\n-MA0GCSqGSIb3DQEBAQUAA4IBDwAwggEKAoIBAQDc+BEjP2q178AneRstBYeiEEMx\n-3w7UFRtPd6Qizj6McPC+B47dJyq8AR22LArK3WlYH0HtagUf2mN4WR4iLCv4un7J\n-NTtW8R98Qn4lsCMZxkU41z1E+SB8YK4csFoYBL6PO/ep8JSapgxjSbZBF1NAMr1P\n-5lB6UB8lRejxia/N/17/UPPwFxH/vcWJ9b1iudj7jkUEhW2ZzcVITf0mqwI2Reo2\n-119q4hqCQQrc6dn1kReOxiGtODwT5h5/ZpzVTdlG2vbPUqd9OyTDtMFRNcab69Tv\n-fuR7A+FEvXoLN+BPy4KKDXEY5KbgiSwb87JzPMGwkp4Yfb2rfcV9CKEswp9zAgMB\n-AAGjggL4MIIC9DASBgNVHRMBAf8ECDAGAQH/AgEAMA4GA1UdDwEB/wQEAwIBhjA0\n-BggrBgEFBQcBAQQoMCYwJAYIKwYBBQUHMAGGGGh0dHA6Ly9vY3NwLmRpZ2ljZXJ0\n-LmNvbTCBgQYDVR0fBHoweDA6oDigNoY0aHR0cDovL2NybDQuZGlnaWNlcnQuY29t\n-L0RpZ2lDZXJ0QXNzdXJlZElEUm9vdENBLmNybDA6oDigNoY0aHR0cDovL2NybDMu\n-ZGlnaWNlcnQuY29tL0RpZ2lDZXJ0QXNzdXJlZElEUm9vdENBLmNybDAdBgNVHSUE\n-FjAUBggrBgEFBQcDAgYIKwYBBQUHAwQwggGzBgNVHSAEggGqMIIBpjCCAaIGCmCG\n-SAGG/WwAAgQwggGSMCgGCCsGAQUFBwIBFhxodHRwczovL3d3dy5kaWdpY2VydC5j\n-b20vQ1BTMIIBZAYIKwYBBQUHAgIwggFWHoIBUgBBAG4AeQAgAHUAcwBlACAAbwBm\n-ACAAdABoAGkAcwAgAEMAZQByAHQAaQBmAGkAYwBhAHQAZQAgAGMAbwBuAHMAdABp\n-AHQAdQB0AGUAcwAgAGEAYwBjAGUAcAB0AGEAbgBjAGUAIABvAGYAIAB0AGgAZQAg\n-AEQAaQBnAGkAQwBlAHIAdAAgAEMAUAAvAEMAUABTACAAYQBuAGQAIAB0AGgAZQAg\n-AFIAZQBsAHkAaQBuAGcAIABQAGEAcgB0AHkAIABBAGcAcgBlAGUAbQBlAG4AdAAg\n-AHcAaABpAGMAaAAgAGwAaQBtAGkAdAAgAGwAaQBhAGIAaQBsAGkAdAB5ACAAYQBu\n-AGQAIABhAHIAZQAgAGkAbgBjAG8AcgBwAG8AcgBhAHQAZQBkACAAaABlAHIAZQBp\n-AG4AIABiAHkAIAByAGUAZgBlAHIAZQBuAGMAZQAuMB0GA1UdDgQWBBTnAiOAAE/Y\n-17yUC9k/dDlJMjyKeTAfBgNVHSMEGDAWgBRF66Kv9JLLgjEtUYunpyGd823IDzAN\n-BgkqhkiG9w0BAQsFAAOCAQEATtSJJ7n9HYd3fg8oBZDxCi/JOz69k5yQxq/6kVGH\n-MlRr6MrBcVFcmY61+uBiGZmmB5p8Eyfb5QKihBLZFfYKRFfENI9tcx861qABPd7j\n-guRFa7LrJf2AXh05kL5bQvbOkWDj+aBWDEgQzjNoe82Tq/Bqy09YD7l7XRsEgZ6n\n-IuJXSSfukpMIvmkIUwI6Ll3IGfRQgE4C2bBdkbSTh/mWloFVQI5m7YLYuyhf7Uxh\n-7QZYKBlTEUS8RyApsgRs2IlUmTt122d4LB6SeMZVPVgSETJuvUMMTTTbe8ZC2+y+\n-q5thTAaS447fISpQVwTAYKI11SSeZjcJSc/V+GWz4OJuwg==\n+MIIDRDCCAiygAwIBAgIBAzANBgkqhkiG9w0BAQsFADA4MQswCQYDVQQGEwJVUzEQ\n+MA4GA1UECgwHVGVzdE9yZzEXMBUGA1UEAwwOSW50ZXJtZWRpYXRlQ0EwHhcNMjUw\n+NjIzMjAyMDUwWhcNMjYwNjIzMjAyMDUwWjAxMQswCQYDVQQGEwJVUzEQMA4GA1UE\n+CgwHVGVzdE9yZzEQMA4GA1UEAwwHb3BzY29kZTCCASIwDQYJKoZIhvcNAQEBBQAD\n+ggEPADCCAQoCggEBAM7L6OKflUY72ui8LiGYHQ2/7ZQtPVb8LarLaE+RCNGuFKxx\n+N6eN+aEjXeMulvQOuOXuMMxU/PI2xN+H2kXbUi2dE2tfMH3w7MqmY0b2IPW4Xsvr\n+QKc0iZDEsRwy8AJJfEaSuYkheRXJ1t2ijS9qCUzgoEhegSWbULp0jVER74Lkk3RN\n+Dhuare8SMIfSjFZQy9yugg02SgTSpciA4TDUNGxyv9O+111bCRfbQjLDz1WsAk6P\n+Me5ehP6B6/6GumE47SsSt3QgXOuDS6wuI6yR3YBYTDHglURax5GB1jxMGFRXk/q0\n+PueDiWyhYJEHySsgX1GI6kuzpm9cVjzQkvqs49MCAwEAAaNgMF4wDAYDVR0TAQH/\n+BAIwADAOBgNVHQ8BAf8EBAMCB4AwHQYDVR0OBBYEFJ/+n6xxe6FQ682tbP1EWVcE\n+2rXyMB8GA1UdIwQYMBaAFK+rTUh0WPTUPLtuZqpLuITZfrieMA0GCSqGSIb3DQEB\n+CwUAA4IBAQBqgcMrGofwE4KPTcXLsYKd/xlOoTZDzjCI4wdlA1bLtIb46hbWCUJR\n+cCmFuMHocSsPL99Fw03xsoToI8g2JlkrIRXtuWXwiDXnDdmgBwZVqIcgTxg0a0Yn\n+6EpL8vxYJT1AZc0Os74qUCiJ+kHzlXkN+6SjilPnOIfuCtOEL+aHW6DXk7lkSF6d\n+fMRJZR2l7HJkCy3IqZGxkM4hgbKd/VOpF++UF/YgkN3f+QtsPbrDAWJHZD96yOjM\n+bvDssC1rAS1HbjztAaFN2Qsfv/BWb4oZfT5V9v3hWKQUk2VaSW1fqhPptM4dnbLa\n+7ofGN9p5Wn4PQAH8CGGExHm8xSnh2Hga\n -----END CERTIFICATE-----\n+-----BEGIN RSA PRIVATE KEY-----", - "comment_created_at": "2025-06-23T20:24:41+00:00", - "comment_author": "github-advanced-security[bot]", - "comment_body": "## Cryptographic private keys should not be disclosed\n\nMake sure this private key gets revoked, changed, and removed from the code.

              See more on SonarQube Cloud

              \n\n[Show more details](https://github.com/chef/chef/security/code-scanning/283)", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2162467480", - "pr_number": 15058, - "pr_file": "spec/data/trusted_certs/root.pem", - "created_at": "2025-06-23T20:24:41+00:00", - "commented_code": "-----BEGIN CERTIFICATE-----\nMIIDrzCCApegAwIBAgIQCDvgVpBCRrGhdWrJWZHHSjANBgkqhkiG9w0BAQUFADBh\nMQswCQYDVQQGEwJVUzEVMBMGA1UEChMMRGlnaUNlcnQgSW5jMRkwFwYDVQQLExB3\nd3cuZGlnaWNlcnQuY29tMSAwHgYDVQQDExdEaWdpQ2VydCBHbG9iYWwgUm9vdCBD\nQTAeFw0wNjExMTAwMDAwMDBaFw0zMTExMTAwMDAwMDBaMGExCzAJBgNVBAYTAlVT\nMRUwEwYDVQQKEwxEaWdpQ2VydCBJbmMxGTAXBgNVBAsTEHd3dy5kaWdpY2VydC5j\nb20xIDAeBgNVBAMTF0RpZ2lDZXJ0IEdsb2JhbCBSb290IENBMIIBIjANBgkqhkiG\n9w0BAQEFAAOCAQ8AMIIBCgKCAQEA4jvhEXLeqKTTo1eqUKKPC3eQyaKl7hLOllsB\nCSDMAZOnTjC3U/dDxGkAV53ijSLdhwZAAIEJzs4bg7/fzTtxRuLWZscFs3YnFo97\nnh6Vfe63SKMI2tavegw5BmV/Sl0fvBf4q77uKNd0f3p4mVmFaG5cIzJLv07A6Fpt\n43C/dxC//AH2hdmoRBBYMql1GNXRor5H4idq9Joz+EkIYIvUX7Q6hL+hqkpMfT7P\nT19sdl6gSzeRntwi5m3OFBqOasv+zbMUZBfHWymeMr/y7vrTC0LUq7dBMtoM1O/4\ngdW7jVg/tRvoSSiicNoxBN33shbyTApOB6jtSj1etX+jkMOvJwIDAQABo2MwYTAO\nBgNVHQ8BAf8EBAMCAYYwDwYDVR0TAQH/BAUwAwEB/zAdBgNVHQ4EFgQUA95QNVbR\nTLtm8KPiGxvDl7I90VUwHwYDVR0jBBgwFoAUA95QNVbRTLtm8KPiGxvDl7I90VUw\nDQYJKoZIhvcNAQEFBQADggEBAMucN6pIExIK+t1EnE9SsPTfrgT1eXkIoyQY/Esr\nhMAtudXH/vTBH1jLuG2cenTnmCmrEbXjcKChzUyImZOMkXDiqw8cvpOp/2PV5Adg\n06O/nVsJ8dWO41P0jmP6P6fbtGbfYmbW0W5BjfIttep3Sp+dWOIrWcBAI+0tKIJF\nPnlUkiaY4IBIqDfv8NZ5YBberOgOzW6sRBc4L0na4UU+Krk2U886UAb3LujEV0ls\nYSEY1QSteDwsOoBrp+uvFRTp2InBuThs4pFsiv9kuXclVzDAGySj4dzp30d8tbQk\nCAUw7C29C79Fv1C5qfPrmAESrciIxpg0X40KPMbp1ZWVbd4=\nMIIDPjCCAiagAwIBAgIBATANBgkqhkiG9w0BAQsFADAwMQswCQYDVQQGEwJVUzEQ\nMA4GA1UECgwHVGVzdE9yZzEPMA0GA1UEAwwGUm9vdENBMB4XDTI1MDYyMzIwMjA1\nMFoXDTMwMDYyMjIwMjA1MFowMDELMAkGA1UEBhMCVVMxEDAOBgNVBAoMB1Rlc3RP\ncmcxDzANBgNVBAMMBlJvb3RDQTCCASIwDQYJKoZIhvcNAQEBBQADggEPADCCAQoC\nggEBANLtMQLXYptRVUSLiIirbRSfYZj9MzeLrWpF4ZcCs58FB5RJWoRWj7HXTL2N\na08y962pFBJuOyJ9ufPEQOjiPFVyy0bkIcOaNkekJ0MMin2FJnuKDIYzeNKogfqU\nhvWZosVev4m1wDvPqtVwan7iwUStnmOJBESUs0XwmVmTstRODmbMhW6l40sJA7e3\nSwJ0GarZCzIg1ZWUkSFRwh+vO9OzArakhIV4zDuHJHQJoBeStFWuKLOJQo9J51dt\ndfBH0xj/cE2lsCfMBItYsO5gOHllhvCEd6nxc5+S+a9jc0t04N1otR0A4F2x9ARt\nK/fpKJw4EMp7cJn06wP4mpjLN6ECAwEAAaNjMGEwDwYDVR0TAQH/BAUwAwEB/zAO\nBgNVHQ8BAf8EBAMCAQYwHQYDVR0OBBYEFBMUsvb4JCQ62pOC5jKaCXnJYJrNMB8G\nA1UdIwQYMBaAFBMUsvb4JCQ62pOC5jKaCXnJYJrNMA0GCSqGSIb3DQEBCwUAA4IB\nAQC76EndKB5+u+U/pQ7AgZCp4pTJTWzTRs+iBum9qVn60ndB/37jsxDJLyDAHlaB\n6jkj2A5n28ucdtEAygHj+4wKGbDY8bCnhg1COMK4Db4h0FSYAUq9OEWdiC6LVrBe\nkMBJJGUSzKzmonSp26LU3TdoBV5BWAvqwsGPfpyRAEPKblqtjh++3NccOjMPlcS0\nIRydyxIBzTMqM41radvAJ+5kl6X92XB1kJnqslRxh00ZAX9CdrGc8mozDyYlbvno\nfYFREnPETFukDP87s3jtVy2Suf+qczu2d9qT6dd9oAY47ypCdX2xag5HFtZgpb15\n6AM5YrQtrIGvAoXGfuKDBTPA\n-----END CERTIFICATE-----\n-----BEGIN RSA PRIVATE KEY-----", - "repo_full_name": "chef/chef", - "discussion_comments": [ - { - "comment_id": "2162467480", - "repo_full_name": "chef/chef", - "pr_number": 15058, - "pr_file": "spec/data/trusted_certs/root.pem", - "discussion_id": "2162467480", - "commented_code": "@@ -1,22 +1,47 @@\n -----BEGIN CERTIFICATE-----\n-MIIDrzCCApegAwIBAgIQCDvgVpBCRrGhdWrJWZHHSjANBgkqhkiG9w0BAQUFADBh\n-MQswCQYDVQQGEwJVUzEVMBMGA1UEChMMRGlnaUNlcnQgSW5jMRkwFwYDVQQLExB3\n-d3cuZGlnaWNlcnQuY29tMSAwHgYDVQQDExdEaWdpQ2VydCBHbG9iYWwgUm9vdCBD\n-QTAeFw0wNjExMTAwMDAwMDBaFw0zMTExMTAwMDAwMDBaMGExCzAJBgNVBAYTAlVT\n-MRUwEwYDVQQKEwxEaWdpQ2VydCBJbmMxGTAXBgNVBAsTEHd3dy5kaWdpY2VydC5j\n-b20xIDAeBgNVBAMTF0RpZ2lDZXJ0IEdsb2JhbCBSb290IENBMIIBIjANBgkqhkiG\n-9w0BAQEFAAOCAQ8AMIIBCgKCAQEA4jvhEXLeqKTTo1eqUKKPC3eQyaKl7hLOllsB\n-CSDMAZOnTjC3U/dDxGkAV53ijSLdhwZAAIEJzs4bg7/fzTtxRuLWZscFs3YnFo97\n-nh6Vfe63SKMI2tavegw5BmV/Sl0fvBf4q77uKNd0f3p4mVmFaG5cIzJLv07A6Fpt\n-43C/dxC//AH2hdmoRBBYMql1GNXRor5H4idq9Joz+EkIYIvUX7Q6hL+hqkpMfT7P\n-T19sdl6gSzeRntwi5m3OFBqOasv+zbMUZBfHWymeMr/y7vrTC0LUq7dBMtoM1O/4\n-gdW7jVg/tRvoSSiicNoxBN33shbyTApOB6jtSj1etX+jkMOvJwIDAQABo2MwYTAO\n-BgNVHQ8BAf8EBAMCAYYwDwYDVR0TAQH/BAUwAwEB/zAdBgNVHQ4EFgQUA95QNVbR\n-TLtm8KPiGxvDl7I90VUwHwYDVR0jBBgwFoAUA95QNVbRTLtm8KPiGxvDl7I90VUw\n-DQYJKoZIhvcNAQEFBQADggEBAMucN6pIExIK+t1EnE9SsPTfrgT1eXkIoyQY/Esr\n-hMAtudXH/vTBH1jLuG2cenTnmCmrEbXjcKChzUyImZOMkXDiqw8cvpOp/2PV5Adg\n-06O/nVsJ8dWO41P0jmP6P6fbtGbfYmbW0W5BjfIttep3Sp+dWOIrWcBAI+0tKIJF\n-PnlUkiaY4IBIqDfv8NZ5YBberOgOzW6sRBc4L0na4UU+Krk2U886UAb3LujEV0ls\n-YSEY1QSteDwsOoBrp+uvFRTp2InBuThs4pFsiv9kuXclVzDAGySj4dzp30d8tbQk\n-CAUw7C29C79Fv1C5qfPrmAESrciIxpg0X40KPMbp1ZWVbd4=\n+MIIDPjCCAiagAwIBAgIBATANBgkqhkiG9w0BAQsFADAwMQswCQYDVQQGEwJVUzEQ\n+MA4GA1UECgwHVGVzdE9yZzEPMA0GA1UEAwwGUm9vdENBMB4XDTI1MDYyMzIwMjA1\n+MFoXDTMwMDYyMjIwMjA1MFowMDELMAkGA1UEBhMCVVMxEDAOBgNVBAoMB1Rlc3RP\n+cmcxDzANBgNVBAMMBlJvb3RDQTCCASIwDQYJKoZIhvcNAQEBBQADggEPADCCAQoC\n+ggEBANLtMQLXYptRVUSLiIirbRSfYZj9MzeLrWpF4ZcCs58FB5RJWoRWj7HXTL2N\n+a08y962pFBJuOyJ9ufPEQOjiPFVyy0bkIcOaNkekJ0MMin2FJnuKDIYzeNKogfqU\n+hvWZosVev4m1wDvPqtVwan7iwUStnmOJBESUs0XwmVmTstRODmbMhW6l40sJA7e3\n+SwJ0GarZCzIg1ZWUkSFRwh+vO9OzArakhIV4zDuHJHQJoBeStFWuKLOJQo9J51dt\n+dfBH0xj/cE2lsCfMBItYsO5gOHllhvCEd6nxc5+S+a9jc0t04N1otR0A4F2x9ARt\n+K/fpKJw4EMp7cJn06wP4mpjLN6ECAwEAAaNjMGEwDwYDVR0TAQH/BAUwAwEB/zAO\n+BgNVHQ8BAf8EBAMCAQYwHQYDVR0OBBYEFBMUsvb4JCQ62pOC5jKaCXnJYJrNMB8G\n+A1UdIwQYMBaAFBMUsvb4JCQ62pOC5jKaCXnJYJrNMA0GCSqGSIb3DQEBCwUAA4IB\n+AQC76EndKB5+u+U/pQ7AgZCp4pTJTWzTRs+iBum9qVn60ndB/37jsxDJLyDAHlaB\n+6jkj2A5n28ucdtEAygHj+4wKGbDY8bCnhg1COMK4Db4h0FSYAUq9OEWdiC6LVrBe\n+kMBJJGUSzKzmonSp26LU3TdoBV5BWAvqwsGPfpyRAEPKblqtjh++3NccOjMPlcS0\n+IRydyxIBzTMqM41radvAJ+5kl6X92XB1kJnqslRxh00ZAX9CdrGc8mozDyYlbvno\n+fYFREnPETFukDP87s3jtVy2Suf+qczu2d9qT6dd9oAY47ypCdX2xag5HFtZgpb15\n+6AM5YrQtrIGvAoXGfuKDBTPA\n -----END CERTIFICATE-----\n+-----BEGIN RSA PRIVATE KEY-----", - "comment_created_at": "2025-06-23T20:24:41+00:00", - "comment_author": "github-advanced-security[bot]", - "comment_body": "## Cryptographic private keys should not be disclosed\n\nMake sure this private key gets revoked, changed, and removed from the code.

              See more on SonarQube Cloud

              \n\n[Show more details](https://github.com/chef/chef/security/code-scanning/285)", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2162660890", - "pr_number": 15065, - "pr_file": "spec/data/trusted_certs/intermediate.pem", - "created_at": "2025-06-23T23:01:44+00:00", - "commented_code": "-----BEGIN CERTIFICATE-----\r\nMIIGrTCCBJWgAwIBAgIQDo0oQK5IJZBWGLOoqeF6RzANBgkqhkiG9w0BAQwFADBJ\r\nMQswCQYDVQQGEwJVUzEXMBUGA1UEChMORGlnaUNlcnQsIEluYy4xITAfBgNVBAMT\r\nGERpZ2lDZXJ0IFJTQTQwOTYgUm9vdCBHNTAeFw0yMTA0MTQwMDAwMDBaFw0zMTA0\r\nMTMyMzU5NTlaMFQxCzAJBgNVBAYTAlVTMRcwFQYDVQQKEw5EaWdpQ2VydCwgSW5j\r\nLjEsMCoGA1UEAxMjRGlnaUNlcnQgRzUgUlNBNDA5NiBTSEEzODQgMjAyMSBDQTEw\r\nggIiMA0GCSqGSIb3DQEBAQUAA4ICDwAwggIKAoICAQDCwLlUmeGwUTj93uzejg2I\r\ntHjaSqm+knZ8az09cBAZFLFU9sKDzBHgf43/GpIWIHGLDUGXXZkKtkjJhl6POqda\r\nXWt/4avSsQgkELz2uefSxhzELBl4o1U50EULTlri3zUBQ11Jr/hfJLxdMAJqKv21\r\niVD8GfFDs12Hy08h7IxuA5ROVdBQS2OiU/6Vd4A3uVpzyjaxQsfAvkwz9+3jsozf\r\nG+kWW+6Fxa3Vt4EbX+3afaBLeIyBlQvPd3pUY8irY3T6MHlglEblraxyGZ3ifvFu\r\nVt7S98D5+U4CMFzzGSzCCqMxTkgasTMhP8+PjXRN+mL56xyfw/uVmN9vRPqgbRUD\r\ng95zx+CRFXgpUQ8yslpl+ECSqCe0cYxm+jWz00VFWtUZAwpE4REGOVdmNGrfNR16\r\nh7dggpFVfeFy7qCwd9up/sWkBmkZB1zL9ENjg68EH5aEbh+jlbF6HuLv4+jibVlD\r\n/r+ZW/vJgnMXmUYW1gDl3L//vQ/V4ElqRYzxsSVsq3dwW0SYzI31PKFEb8sqI5IN\r\nP10MtFtZ1DgISF9I8LJ35dBDqguoonGC0/d+iq2S7ipcpFIo/u3tK/Nu0QvKMEN6\r\nDlx6Yhssscj2PhiADKjhRnweWUj/2eKuX8Cb6UmXvh+R4Dm0iEIGop1/r37GUo0z\r\nnqNszrYZz1zd4GWG6puFWQIDAQABo4IBhDCCAYAwEgYDVR0TAQH/BAgwBgEB/wIB\r\nADAdBgNVHQ4EFgQUbYE39zhEfkdCe1al7Lt3ZyEJ9DwwHwYDVR0jBBgwFoAUYm23\r\nkU/E6qNiYI+g0L61jwZ8aAAwDgYDVR0PAQH/BAQDAgGGMB0GA1UdJQQWMBQGCCsG\r\nAQUFBwMBBggrBgEFBQcDAjB3BggrBgEFBQcBAQRrMGkwJAYIKwYBBQUHMAGGGGh0\r\ndHA6Ly9vY3NwLmRpZ2ljZXJ0LmNvbTBBBggrBgEFBQcwAoY1aHR0cDovL2NhY2Vy\r\ndHMuZGlnaWNlcnQuY29tL0RpZ2lDZXJ0UlNBNDA5NlJvb3RHNS5jcnQwQwYDVR0f\r\nBDwwOjA4oDagNIYyaHR0cDovL2NybDMuZGlnaWNlcnQuY29tL0RpZ2lDZXJ0UlNB\r\nNDA5NlJvb3RHNS5jcmwwPQYDVR0gBDYwNDALBglghkgBhv1sAgEwBwYFZ4EMAQEw\r\nCAYGZ4EMAQIBMAgGBmeBDAECAjAIBgZngQwBAgMwDQYJKoZIhvcNAQEMBQADggIB\r\nAGHJE9aY60MSsfdEfqIcrdE0c1dXxis9E1l9at6g18Jpyc1C6PsUHdmo6rJWq8Xe\r\nNNPkD/4fKhJsrd9TRlUlpIgKiJZW1ituKHV6Ghm7DIRSyx0aMpP9NJ3heV3CIgZr\r\nMLtJEFuG5WfolWIfu7sle2lYjA3HxA/xQo803jGOhxbEDX/BTzHo/1X7YGvwpRqJ\r\n+7J1B+2l+TA1r9vAlLfIDQRazVYRNxHpJDOwU0ffKaEPbRrgPtogO+8hLSml9Zoe\r\nY8w94f31XbvBFxSbSVpX+/QctNdwx2VuIoRcT8WZ0lZ9aenna5q5AE1C8oTtbw2T\r\nqoz4NCaM5XPgjvb0DGPBeH8jWveNo1BmClQA2qYXL55f00m8AZ4Hf6oYANt/zbuM\r\nQPhAoSHWwW4V4Pug3XPXM70LlY50y9kPD/57eHryhO2oXQLLx+l6mg8xzL6vKsHT\r\nE30whFM32vVTpjejLZ9hJBAJURFaUrH2TZyAmoVbCNy50yuHYQ6FooYpbsbnpYPi\r\nKW/E9bc201rqm/GQOWJ4zOJ8a5Etn3zY+rlPaxjJvxc3pSMfgtwwrm9KGXHsI1Gf\r\nULMwUbXclKV2qR8d6ECtUOIRxoQKutN85lmwB05yddu6uQQg0hHeaGFUk7EU90SV\r\nib/FA/op9sXfS3CkOnHQISY0JbWxrzC6eHaKeQi6lR1I\r\n-----END CERTIFICATE-----\r\n-----BEGIN CERTIFICATE-----\nMIIDSTCCAjGgAwIBAgIBAjANBgkqhkiG9w0BAQsFADAwMQswCQYDVQQGEwJVUzEQ\nMA4GA1UECgwHVGVzdE9yZzEPMA0GA1UEAwwGUm9vdENBMB4XDTI1MDYyMzIyNTgz\nNloXDTI4MDYyMjIyNTgzNlowODELMAkGA1UEBhMCVVMxEDAOBgNVBAoMB1Rlc3RP\ncmcxFzAVBgNVBAMMDkludGVybWVkaWF0ZUNBMIIBIjANBgkqhkiG9w0BAQEFAAOC\nAQ8AMIIBCgKCAQEA8eYKQYc01Fkg05bCyLIz7ql4BSyJIQr9JiszF4u3gQL1TQeA\nkPkzjQxFAyx271M/PLMBfQPCnjST/ANG41oFIHi9QkwZZUtd2fzMN3xasE3IA0Qx\nGLF3sEnlX3U7pn/tshnuie3XVfyjg+vOnkdQaazVY+2+sPaGSzRktVPFmS4/6a+g\nrf6jvrmRo59/u405yLgOQs41PAcrpG/DMssPsqIdFrSbeXRM10fyo2Oo4UDM08Uv\nOUMZK9LbbFOMKU+YDoIX4QKdeo6tf/zJAJsfyz/x42MGXyP+c6tbH1v7KNw9FGkN\n6O88EeWUtLr+CXXa6SrlGBQfuA+kzoIhRbTcYwIDAQABo2YwZDASBgNVHRMBAf8E\nCDAGAQH/AgEAMA4GA1UdDwEB/wQEAwIBBjAdBgNVHQ4EFgQUEBoB0gRIRNzLuuVY\nwNtSSPeyFCIwHwYDVR0jBBgwFoAU0jIhxNIirUycwI3J5aA4zEQu2/EwDQYJKoZI\nhvcNAQELBQADggEBAIm610QOb0ZRtxKLLoOtBgH4UqLuGqiWnVvIkc1FKVYlrtTJ\ndh27S0KhtducOKRhYp9XINBCD2QfsFwZhAeo2BE4FffbyH2R0u79kuqojufooaPW\n6UiKZKkzdo0g+0Hv00GrXGqS8Q1NqPoLRr33KeB72Od6xZmGNJF3NI0urP0I9jVk\nXohMrCqD39S8XQ7CYXhLW4onRiiQHuZe6frDzJX2NdwtXqd+W0ujvKfb33mdK+C5\nGZi8duiHK7nT6vBrsVy6R9YCPQbDWBc34dKShx01Ml+3BvJ0jGEiY5jYukYylGwJ\nXyatDe8dz3g4KGfGnSaMhgf154VDLIuXz/U0BUE=\n-----END CERTIFICATE-----\n-----BEGIN RSA PRIVATE KEY-----", - "repo_full_name": "chef/chef", - "discussion_comments": [ - { - "comment_id": "2162660890", - "repo_full_name": "chef/chef", - "pr_number": 15065, - "pr_file": "spec/data/trusted_certs/intermediate.pem", - "discussion_id": "2162660890", - "commented_code": "@@ -1,38 +1,47 @@\n------BEGIN CERTIFICATE-----\r\n-MIIGrTCCBJWgAwIBAgIQDo0oQK5IJZBWGLOoqeF6RzANBgkqhkiG9w0BAQwFADBJ\r\n-MQswCQYDVQQGEwJVUzEXMBUGA1UEChMORGlnaUNlcnQsIEluYy4xITAfBgNVBAMT\r\n-GERpZ2lDZXJ0IFJTQTQwOTYgUm9vdCBHNTAeFw0yMTA0MTQwMDAwMDBaFw0zMTA0\r\n-MTMyMzU5NTlaMFQxCzAJBgNVBAYTAlVTMRcwFQYDVQQKEw5EaWdpQ2VydCwgSW5j\r\n-LjEsMCoGA1UEAxMjRGlnaUNlcnQgRzUgUlNBNDA5NiBTSEEzODQgMjAyMSBDQTEw\r\n-ggIiMA0GCSqGSIb3DQEBAQUAA4ICDwAwggIKAoICAQDCwLlUmeGwUTj93uzejg2I\r\n-tHjaSqm+knZ8az09cBAZFLFU9sKDzBHgf43/GpIWIHGLDUGXXZkKtkjJhl6POqda\r\n-XWt/4avSsQgkELz2uefSxhzELBl4o1U50EULTlri3zUBQ11Jr/hfJLxdMAJqKv21\r\n-iVD8GfFDs12Hy08h7IxuA5ROVdBQS2OiU/6Vd4A3uVpzyjaxQsfAvkwz9+3jsozf\r\n-G+kWW+6Fxa3Vt4EbX+3afaBLeIyBlQvPd3pUY8irY3T6MHlglEblraxyGZ3ifvFu\r\n-Vt7S98D5+U4CMFzzGSzCCqMxTkgasTMhP8+PjXRN+mL56xyfw/uVmN9vRPqgbRUD\r\n-g95zx+CRFXgpUQ8yslpl+ECSqCe0cYxm+jWz00VFWtUZAwpE4REGOVdmNGrfNR16\r\n-h7dggpFVfeFy7qCwd9up/sWkBmkZB1zL9ENjg68EH5aEbh+jlbF6HuLv4+jibVlD\r\n-/r+ZW/vJgnMXmUYW1gDl3L//vQ/V4ElqRYzxsSVsq3dwW0SYzI31PKFEb8sqI5IN\r\n-P10MtFtZ1DgISF9I8LJ35dBDqguoonGC0/d+iq2S7ipcpFIo/u3tK/Nu0QvKMEN6\r\n-Dlx6Yhssscj2PhiADKjhRnweWUj/2eKuX8Cb6UmXvh+R4Dm0iEIGop1/r37GUo0z\r\n-nqNszrYZz1zd4GWG6puFWQIDAQABo4IBhDCCAYAwEgYDVR0TAQH/BAgwBgEB/wIB\r\n-ADAdBgNVHQ4EFgQUbYE39zhEfkdCe1al7Lt3ZyEJ9DwwHwYDVR0jBBgwFoAUYm23\r\n-kU/E6qNiYI+g0L61jwZ8aAAwDgYDVR0PAQH/BAQDAgGGMB0GA1UdJQQWMBQGCCsG\r\n-AQUFBwMBBggrBgEFBQcDAjB3BggrBgEFBQcBAQRrMGkwJAYIKwYBBQUHMAGGGGh0\r\n-dHA6Ly9vY3NwLmRpZ2ljZXJ0LmNvbTBBBggrBgEFBQcwAoY1aHR0cDovL2NhY2Vy\r\n-dHMuZGlnaWNlcnQuY29tL0RpZ2lDZXJ0UlNBNDA5NlJvb3RHNS5jcnQwQwYDVR0f\r\n-BDwwOjA4oDagNIYyaHR0cDovL2NybDMuZGlnaWNlcnQuY29tL0RpZ2lDZXJ0UlNB\r\n-NDA5NlJvb3RHNS5jcmwwPQYDVR0gBDYwNDALBglghkgBhv1sAgEwBwYFZ4EMAQEw\r\n-CAYGZ4EMAQIBMAgGBmeBDAECAjAIBgZngQwBAgMwDQYJKoZIhvcNAQEMBQADggIB\r\n-AGHJE9aY60MSsfdEfqIcrdE0c1dXxis9E1l9at6g18Jpyc1C6PsUHdmo6rJWq8Xe\r\n-NNPkD/4fKhJsrd9TRlUlpIgKiJZW1ituKHV6Ghm7DIRSyx0aMpP9NJ3heV3CIgZr\r\n-MLtJEFuG5WfolWIfu7sle2lYjA3HxA/xQo803jGOhxbEDX/BTzHo/1X7YGvwpRqJ\r\n-+7J1B+2l+TA1r9vAlLfIDQRazVYRNxHpJDOwU0ffKaEPbRrgPtogO+8hLSml9Zoe\r\n-Y8w94f31XbvBFxSbSVpX+/QctNdwx2VuIoRcT8WZ0lZ9aenna5q5AE1C8oTtbw2T\r\n-qoz4NCaM5XPgjvb0DGPBeH8jWveNo1BmClQA2qYXL55f00m8AZ4Hf6oYANt/zbuM\r\n-QPhAoSHWwW4V4Pug3XPXM70LlY50y9kPD/57eHryhO2oXQLLx+l6mg8xzL6vKsHT\r\n-E30whFM32vVTpjejLZ9hJBAJURFaUrH2TZyAmoVbCNy50yuHYQ6FooYpbsbnpYPi\r\n-KW/E9bc201rqm/GQOWJ4zOJ8a5Etn3zY+rlPaxjJvxc3pSMfgtwwrm9KGXHsI1Gf\r\n-ULMwUbXclKV2qR8d6ECtUOIRxoQKutN85lmwB05yddu6uQQg0hHeaGFUk7EU90SV\r\n-ib/FA/op9sXfS3CkOnHQISY0JbWxrzC6eHaKeQi6lR1I\r\n------END CERTIFICATE-----\r\n+-----BEGIN CERTIFICATE-----\n+MIIDSTCCAjGgAwIBAgIBAjANBgkqhkiG9w0BAQsFADAwMQswCQYDVQQGEwJVUzEQ\n+MA4GA1UECgwHVGVzdE9yZzEPMA0GA1UEAwwGUm9vdENBMB4XDTI1MDYyMzIyNTgz\n+NloXDTI4MDYyMjIyNTgzNlowODELMAkGA1UEBhMCVVMxEDAOBgNVBAoMB1Rlc3RP\n+cmcxFzAVBgNVBAMMDkludGVybWVkaWF0ZUNBMIIBIjANBgkqhkiG9w0BAQEFAAOC\n+AQ8AMIIBCgKCAQEA8eYKQYc01Fkg05bCyLIz7ql4BSyJIQr9JiszF4u3gQL1TQeA\n+kPkzjQxFAyx271M/PLMBfQPCnjST/ANG41oFIHi9QkwZZUtd2fzMN3xasE3IA0Qx\n+GLF3sEnlX3U7pn/tshnuie3XVfyjg+vOnkdQaazVY+2+sPaGSzRktVPFmS4/6a+g\n+rf6jvrmRo59/u405yLgOQs41PAcrpG/DMssPsqIdFrSbeXRM10fyo2Oo4UDM08Uv\n+OUMZK9LbbFOMKU+YDoIX4QKdeo6tf/zJAJsfyz/x42MGXyP+c6tbH1v7KNw9FGkN\n+6O88EeWUtLr+CXXa6SrlGBQfuA+kzoIhRbTcYwIDAQABo2YwZDASBgNVHRMBAf8E\n+CDAGAQH/AgEAMA4GA1UdDwEB/wQEAwIBBjAdBgNVHQ4EFgQUEBoB0gRIRNzLuuVY\n+wNtSSPeyFCIwHwYDVR0jBBgwFoAU0jIhxNIirUycwI3J5aA4zEQu2/EwDQYJKoZI\n+hvcNAQELBQADggEBAIm610QOb0ZRtxKLLoOtBgH4UqLuGqiWnVvIkc1FKVYlrtTJ\n+dh27S0KhtducOKRhYp9XINBCD2QfsFwZhAeo2BE4FffbyH2R0u79kuqojufooaPW\n+6UiKZKkzdo0g+0Hv00GrXGqS8Q1NqPoLRr33KeB72Od6xZmGNJF3NI0urP0I9jVk\n+XohMrCqD39S8XQ7CYXhLW4onRiiQHuZe6frDzJX2NdwtXqd+W0ujvKfb33mdK+C5\n+GZi8duiHK7nT6vBrsVy6R9YCPQbDWBc34dKShx01Ml+3BvJ0jGEiY5jYukYylGwJ\n+XyatDe8dz3g4KGfGnSaMhgf154VDLIuXz/U0BUE=\n+-----END CERTIFICATE-----\n+-----BEGIN RSA PRIVATE KEY-----", - "comment_created_at": "2025-06-23T23:01:44+00:00", - "comment_author": "github-advanced-security[bot]", - "comment_body": "## Cryptographic private keys should not be disclosed\n\nMake sure this private key gets revoked, changed, and removed from the code.

              See more on SonarQube Cloud

              \n\n[Show more details](https://github.com/chef/chef/security/code-scanning/287)", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2162660892", - "pr_number": 15065, - "pr_file": "spec/data/trusted_certs/opscode.pem", - "created_at": "2025-06-23T23:01:44+00:00", - "commented_code": "-----BEGIN CERTIFICATE-----\nMIIGTjCCBTagAwIBAgIQBK55YGZmkBq5xX+mbFvczTANBgkqhkiG9w0BAQsFADBl\nMQswCQYDVQQGEwJVUzEVMBMGA1UEChMMRGlnaUNlcnQgSW5jMRkwFwYDVQQLExB3\nd3cuZGlnaWNlcnQuY29tMSQwIgYDVQQDExtEaWdpQ2VydCBBc3N1cmVkIElEIFJv\nb3QgQ0EwHhcNMTMxMTA1MTIwMDAwWhcNMjgxMTA1MTIwMDAwWjBlMQswCQYDVQQG\nEwJVUzEVMBMGA1UEChMMRGlnaUNlcnQgSW5jMRkwFwYDVQQLExB3d3cuZGlnaWNl\ncnQuY29tMSQwIgYDVQQDExtEaWdpQ2VydCBTSEEyIEFzc3VyZWQgSUQgQ0EwggEi\nMA0GCSqGSIb3DQEBAQUAA4IBDwAwggEKAoIBAQDc+BEjP2q178AneRstBYeiEEMx\n3w7UFRtPd6Qizj6McPC+B47dJyq8AR22LArK3WlYH0HtagUf2mN4WR4iLCv4un7J\nNTtW8R98Qn4lsCMZxkU41z1E+SB8YK4csFoYBL6PO/ep8JSapgxjSbZBF1NAMr1P\n5lB6UB8lRejxia/N/17/UPPwFxH/vcWJ9b1iudj7jkUEhW2ZzcVITf0mqwI2Reo2\n119q4hqCQQrc6dn1kReOxiGtODwT5h5/ZpzVTdlG2vbPUqd9OyTDtMFRNcab69Tv\nfuR7A+FEvXoLN+BPy4KKDXEY5KbgiSwb87JzPMGwkp4Yfb2rfcV9CKEswp9zAgMB\nAAGjggL4MIIC9DASBgNVHRMBAf8ECDAGAQH/AgEAMA4GA1UdDwEB/wQEAwIBhjA0\nBggrBgEFBQcBAQQoMCYwJAYIKwYBBQUHMAGGGGh0dHA6Ly9vY3NwLmRpZ2ljZXJ0\nLmNvbTCBgQYDVR0fBHoweDA6oDigNoY0aHR0cDovL2NybDQuZGlnaWNlcnQuY29t\nL0RpZ2lDZXJ0QXNzdXJlZElEUm9vdENBLmNybDA6oDigNoY0aHR0cDovL2NybDMu\nZGlnaWNlcnQuY29tL0RpZ2lDZXJ0QXNzdXJlZElEUm9vdENBLmNybDAdBgNVHSUE\nFjAUBggrBgEFBQcDAgYIKwYBBQUHAwQwggGzBgNVHSAEggGqMIIBpjCCAaIGCmCG\nSAGG/WwAAgQwggGSMCgGCCsGAQUFBwIBFhxodHRwczovL3d3dy5kaWdpY2VydC5j\nb20vQ1BTMIIBZAYIKwYBBQUHAgIwggFWHoIBUgBBAG4AeQAgAHUAcwBlACAAbwBm\nACAAdABoAGkAcwAgAEMAZQByAHQAaQBmAGkAYwBhAHQAZQAgAGMAbwBuAHMAdABp\nAHQAdQB0AGUAcwAgAGEAYwBjAGUAcAB0AGEAbgBjAGUAIABvAGYAIAB0AGgAZQAg\nAEQAaQBnAGkAQwBlAHIAdAAgAEMAUAAvAEMAUABTACAAYQBuAGQAIAB0AGgAZQAg\nAFIAZQBsAHkAaQBuAGcAIABQAGEAcgB0AHkAIABBAGcAcgBlAGUAbQBlAG4AdAAg\nAHcAaABpAGMAaAAgAGwAaQBtAGkAdAAgAGwAaQBhAGIAaQBsAGkAdAB5ACAAYQBu\nAGQAIABhAHIAZQAgAGkAbgBjAG8AcgBwAG8AcgBhAHQAZQBkACAAaABlAHIAZQBp\nAG4AIABiAHkAIAByAGUAZgBlAHIAZQBuAGMAZQAuMB0GA1UdDgQWBBTnAiOAAE/Y\n17yUC9k/dDlJMjyKeTAfBgNVHSMEGDAWgBRF66Kv9JLLgjEtUYunpyGd823IDzAN\nBgkqhkiG9w0BAQsFAAOCAQEATtSJJ7n9HYd3fg8oBZDxCi/JOz69k5yQxq/6kVGH\nMlRr6MrBcVFcmY61+uBiGZmmB5p8Eyfb5QKihBLZFfYKRFfENI9tcx861qABPd7j\nguRFa7LrJf2AXh05kL5bQvbOkWDj+aBWDEgQzjNoe82Tq/Bqy09YD7l7XRsEgZ6n\nIuJXSSfukpMIvmkIUwI6Ll3IGfRQgE4C2bBdkbSTh/mWloFVQI5m7YLYuyhf7Uxh\n7QZYKBlTEUS8RyApsgRs2IlUmTt122d4LB6SeMZVPVgSETJuvUMMTTTbe8ZC2+y+\nq5thTAaS447fISpQVwTAYKI11SSeZjcJSc/V+GWz4OJuwg==\nMIIDRDCCAiygAwIBAgIBAzANBgkqhkiG9w0BAQsFADA4MQswCQYDVQQGEwJVUzEQ\nMA4GA1UECgwHVGVzdE9yZzEXMBUGA1UEAwwOSW50ZXJtZWRpYXRlQ0EwHhcNMjUw\nNjIzMjI1ODM2WhcNMjYwNjIzMjI1ODM2WjAxMQswCQYDVQQGEwJVUzEQMA4GA1UE\nCgwHVGVzdE9yZzEQMA4GA1UEAwwHb3BzY29kZTCCASIwDQYJKoZIhvcNAQEBBQAD\nggEPADCCAQoCggEBANDL1IUIf2sOksEgw3AO2PlYZ9usimr2G3OQ3wozAfBnP11x\n8Jy2KTCY3f9tmxNVcQJYDQchaXF9W0YvUnQ5N61ab1i81MxI0alf66isAkb59krs\n1XjY/SKw09362cgQgUrlPO/U8rksJK6Xcrgien9GtxkU9T9S7roRsGSaTj1kc81C\n0UMm84/6vOXEkj9y/U0ce/y2P+pe6ySJZ+4Cnf2dbvb7QUdYjg1DDFvaqo0ctqUE\n7fKi8/zVaydYJBFhp8I8/5h2Z5KG2Gj7ChEy+l6xp9nEg4hMKvNFHxmQP5WmnPsM\nGUPRkTTiIz+GtyG3gPsYOABLJaDUncVD+J3OLv8CAwEAAaNgMF4wDAYDVR0TAQH/\nBAIwADAOBgNVHQ8BAf8EBAMCB4AwHQYDVR0OBBYEFOfOw3p0XmA6bkcfO8hGdhN0\n7LaJMB8GA1UdIwQYMBaAFBAaAdIESETcy7rlWMDbUkj3shQiMA0GCSqGSIb3DQEB\nCwUAA4IBAQBn6+HqXbW0WFdTnFbCQ8+QTBPc8s0IiejoAbY61b8l6o82ajRTAX3V\nIU1/sLREuTjrrFs5ouy6NeCndPPTIulpCAHHWMeEjPk6x09bZUcKcZBDS5A2uvNL\nRZ3oc7kJhCbSZ36D1WNZsNrNG2vr1qPmxVHD6NuOQDreibjdD7+lM+XrHFuM+WYw\nL5GL/qvZJAb2VRh4MpBufqlp7yu2hycWzGL3BQOnqAFxfsQbrqTaqpOvgxO9vlcL\nTP5ZTswNOuPs3MMIRlsBW3NQU4fOQa8Aw2V4eJ6SATY2ZH1J3PD/kpAujw7aKft/\nX+0v2v5fTjyfuA8TrdnY0sqKx/icy3wr\n-----END CERTIFICATE-----\n-----BEGIN RSA PRIVATE KEY-----", - "repo_full_name": "chef/chef", - "discussion_comments": [ - { - "comment_id": "2162660892", - "repo_full_name": "chef/chef", - "pr_number": 15065, - "pr_file": "spec/data/trusted_certs/opscode.pem", - "discussion_id": "2162660892", - "commented_code": "@@ -1,36 +1,47 @@\n -----BEGIN CERTIFICATE-----\n-MIIGTjCCBTagAwIBAgIQBK55YGZmkBq5xX+mbFvczTANBgkqhkiG9w0BAQsFADBl\n-MQswCQYDVQQGEwJVUzEVMBMGA1UEChMMRGlnaUNlcnQgSW5jMRkwFwYDVQQLExB3\n-d3cuZGlnaWNlcnQuY29tMSQwIgYDVQQDExtEaWdpQ2VydCBBc3N1cmVkIElEIFJv\n-b3QgQ0EwHhcNMTMxMTA1MTIwMDAwWhcNMjgxMTA1MTIwMDAwWjBlMQswCQYDVQQG\n-EwJVUzEVMBMGA1UEChMMRGlnaUNlcnQgSW5jMRkwFwYDVQQLExB3d3cuZGlnaWNl\n-cnQuY29tMSQwIgYDVQQDExtEaWdpQ2VydCBTSEEyIEFzc3VyZWQgSUQgQ0EwggEi\n-MA0GCSqGSIb3DQEBAQUAA4IBDwAwggEKAoIBAQDc+BEjP2q178AneRstBYeiEEMx\n-3w7UFRtPd6Qizj6McPC+B47dJyq8AR22LArK3WlYH0HtagUf2mN4WR4iLCv4un7J\n-NTtW8R98Qn4lsCMZxkU41z1E+SB8YK4csFoYBL6PO/ep8JSapgxjSbZBF1NAMr1P\n-5lB6UB8lRejxia/N/17/UPPwFxH/vcWJ9b1iudj7jkUEhW2ZzcVITf0mqwI2Reo2\n-119q4hqCQQrc6dn1kReOxiGtODwT5h5/ZpzVTdlG2vbPUqd9OyTDtMFRNcab69Tv\n-fuR7A+FEvXoLN+BPy4KKDXEY5KbgiSwb87JzPMGwkp4Yfb2rfcV9CKEswp9zAgMB\n-AAGjggL4MIIC9DASBgNVHRMBAf8ECDAGAQH/AgEAMA4GA1UdDwEB/wQEAwIBhjA0\n-BggrBgEFBQcBAQQoMCYwJAYIKwYBBQUHMAGGGGh0dHA6Ly9vY3NwLmRpZ2ljZXJ0\n-LmNvbTCBgQYDVR0fBHoweDA6oDigNoY0aHR0cDovL2NybDQuZGlnaWNlcnQuY29t\n-L0RpZ2lDZXJ0QXNzdXJlZElEUm9vdENBLmNybDA6oDigNoY0aHR0cDovL2NybDMu\n-ZGlnaWNlcnQuY29tL0RpZ2lDZXJ0QXNzdXJlZElEUm9vdENBLmNybDAdBgNVHSUE\n-FjAUBggrBgEFBQcDAgYIKwYBBQUHAwQwggGzBgNVHSAEggGqMIIBpjCCAaIGCmCG\n-SAGG/WwAAgQwggGSMCgGCCsGAQUFBwIBFhxodHRwczovL3d3dy5kaWdpY2VydC5j\n-b20vQ1BTMIIBZAYIKwYBBQUHAgIwggFWHoIBUgBBAG4AeQAgAHUAcwBlACAAbwBm\n-ACAAdABoAGkAcwAgAEMAZQByAHQAaQBmAGkAYwBhAHQAZQAgAGMAbwBuAHMAdABp\n-AHQAdQB0AGUAcwAgAGEAYwBjAGUAcAB0AGEAbgBjAGUAIABvAGYAIAB0AGgAZQAg\n-AEQAaQBnAGkAQwBlAHIAdAAgAEMAUAAvAEMAUABTACAAYQBuAGQAIAB0AGgAZQAg\n-AFIAZQBsAHkAaQBuAGcAIABQAGEAcgB0AHkAIABBAGcAcgBlAGUAbQBlAG4AdAAg\n-AHcAaABpAGMAaAAgAGwAaQBtAGkAdAAgAGwAaQBhAGIAaQBsAGkAdAB5ACAAYQBu\n-AGQAIABhAHIAZQAgAGkAbgBjAG8AcgBwAG8AcgBhAHQAZQBkACAAaABlAHIAZQBp\n-AG4AIABiAHkAIAByAGUAZgBlAHIAZQBuAGMAZQAuMB0GA1UdDgQWBBTnAiOAAE/Y\n-17yUC9k/dDlJMjyKeTAfBgNVHSMEGDAWgBRF66Kv9JLLgjEtUYunpyGd823IDzAN\n-BgkqhkiG9w0BAQsFAAOCAQEATtSJJ7n9HYd3fg8oBZDxCi/JOz69k5yQxq/6kVGH\n-MlRr6MrBcVFcmY61+uBiGZmmB5p8Eyfb5QKihBLZFfYKRFfENI9tcx861qABPd7j\n-guRFa7LrJf2AXh05kL5bQvbOkWDj+aBWDEgQzjNoe82Tq/Bqy09YD7l7XRsEgZ6n\n-IuJXSSfukpMIvmkIUwI6Ll3IGfRQgE4C2bBdkbSTh/mWloFVQI5m7YLYuyhf7Uxh\n-7QZYKBlTEUS8RyApsgRs2IlUmTt122d4LB6SeMZVPVgSETJuvUMMTTTbe8ZC2+y+\n-q5thTAaS447fISpQVwTAYKI11SSeZjcJSc/V+GWz4OJuwg==\n+MIIDRDCCAiygAwIBAgIBAzANBgkqhkiG9w0BAQsFADA4MQswCQYDVQQGEwJVUzEQ\n+MA4GA1UECgwHVGVzdE9yZzEXMBUGA1UEAwwOSW50ZXJtZWRpYXRlQ0EwHhcNMjUw\n+NjIzMjI1ODM2WhcNMjYwNjIzMjI1ODM2WjAxMQswCQYDVQQGEwJVUzEQMA4GA1UE\n+CgwHVGVzdE9yZzEQMA4GA1UEAwwHb3BzY29kZTCCASIwDQYJKoZIhvcNAQEBBQAD\n+ggEPADCCAQoCggEBANDL1IUIf2sOksEgw3AO2PlYZ9usimr2G3OQ3wozAfBnP11x\n+8Jy2KTCY3f9tmxNVcQJYDQchaXF9W0YvUnQ5N61ab1i81MxI0alf66isAkb59krs\n+1XjY/SKw09362cgQgUrlPO/U8rksJK6Xcrgien9GtxkU9T9S7roRsGSaTj1kc81C\n+0UMm84/6vOXEkj9y/U0ce/y2P+pe6ySJZ+4Cnf2dbvb7QUdYjg1DDFvaqo0ctqUE\n+7fKi8/zVaydYJBFhp8I8/5h2Z5KG2Gj7ChEy+l6xp9nEg4hMKvNFHxmQP5WmnPsM\n+GUPRkTTiIz+GtyG3gPsYOABLJaDUncVD+J3OLv8CAwEAAaNgMF4wDAYDVR0TAQH/\n+BAIwADAOBgNVHQ8BAf8EBAMCB4AwHQYDVR0OBBYEFOfOw3p0XmA6bkcfO8hGdhN0\n+7LaJMB8GA1UdIwQYMBaAFBAaAdIESETcy7rlWMDbUkj3shQiMA0GCSqGSIb3DQEB\n+CwUAA4IBAQBn6+HqXbW0WFdTnFbCQ8+QTBPc8s0IiejoAbY61b8l6o82ajRTAX3V\n+IU1/sLREuTjrrFs5ouy6NeCndPPTIulpCAHHWMeEjPk6x09bZUcKcZBDS5A2uvNL\n+RZ3oc7kJhCbSZ36D1WNZsNrNG2vr1qPmxVHD6NuOQDreibjdD7+lM+XrHFuM+WYw\n+L5GL/qvZJAb2VRh4MpBufqlp7yu2hycWzGL3BQOnqAFxfsQbrqTaqpOvgxO9vlcL\n+TP5ZTswNOuPs3MMIRlsBW3NQU4fOQa8Aw2V4eJ6SATY2ZH1J3PD/kpAujw7aKft/\n+X+0v2v5fTjyfuA8TrdnY0sqKx/icy3wr\n -----END CERTIFICATE-----\n+-----BEGIN RSA PRIVATE KEY-----", - "comment_created_at": "2025-06-23T23:01:44+00:00", - "comment_author": "github-advanced-security[bot]", - "comment_body": "## Cryptographic private keys should not be disclosed\n\nMake sure this private key gets revoked, changed, and removed from the code.

              See more on SonarQube Cloud

              \n\n[Show more details](https://github.com/chef/chef/security/code-scanning/286)", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2162660894", - "pr_number": 15065, - "pr_file": "spec/data/trusted_certs/root.pem", - "created_at": "2025-06-23T23:01:44+00:00", - "commented_code": "-----BEGIN CERTIFICATE-----\nMIIDrzCCApegAwIBAgIQCDvgVpBCRrGhdWrJWZHHSjANBgkqhkiG9w0BAQUFADBh\nMQswCQYDVQQGEwJVUzEVMBMGA1UEChMMRGlnaUNlcnQgSW5jMRkwFwYDVQQLExB3\nd3cuZGlnaWNlcnQuY29tMSAwHgYDVQQDExdEaWdpQ2VydCBHbG9iYWwgUm9vdCBD\nQTAeFw0wNjExMTAwMDAwMDBaFw0zMTExMTAwMDAwMDBaMGExCzAJBgNVBAYTAlVT\nMRUwEwYDVQQKEwxEaWdpQ2VydCBJbmMxGTAXBgNVBAsTEHd3dy5kaWdpY2VydC5j\nb20xIDAeBgNVBAMTF0RpZ2lDZXJ0IEdsb2JhbCBSb290IENBMIIBIjANBgkqhkiG\n9w0BAQEFAAOCAQ8AMIIBCgKCAQEA4jvhEXLeqKTTo1eqUKKPC3eQyaKl7hLOllsB\nCSDMAZOnTjC3U/dDxGkAV53ijSLdhwZAAIEJzs4bg7/fzTtxRuLWZscFs3YnFo97\nnh6Vfe63SKMI2tavegw5BmV/Sl0fvBf4q77uKNd0f3p4mVmFaG5cIzJLv07A6Fpt\n43C/dxC//AH2hdmoRBBYMql1GNXRor5H4idq9Joz+EkIYIvUX7Q6hL+hqkpMfT7P\nT19sdl6gSzeRntwi5m3OFBqOasv+zbMUZBfHWymeMr/y7vrTC0LUq7dBMtoM1O/4\ngdW7jVg/tRvoSSiicNoxBN33shbyTApOB6jtSj1etX+jkMOvJwIDAQABo2MwYTAO\nBgNVHQ8BAf8EBAMCAYYwDwYDVR0TAQH/BAUwAwEB/zAdBgNVHQ4EFgQUA95QNVbR\nTLtm8KPiGxvDl7I90VUwHwYDVR0jBBgwFoAUA95QNVbRTLtm8KPiGxvDl7I90VUw\nDQYJKoZIhvcNAQEFBQADggEBAMucN6pIExIK+t1EnE9SsPTfrgT1eXkIoyQY/Esr\nhMAtudXH/vTBH1jLuG2cenTnmCmrEbXjcKChzUyImZOMkXDiqw8cvpOp/2PV5Adg\n06O/nVsJ8dWO41P0jmP6P6fbtGbfYmbW0W5BjfIttep3Sp+dWOIrWcBAI+0tKIJF\nPnlUkiaY4IBIqDfv8NZ5YBberOgOzW6sRBc4L0na4UU+Krk2U886UAb3LujEV0ls\nYSEY1QSteDwsOoBrp+uvFRTp2InBuThs4pFsiv9kuXclVzDAGySj4dzp30d8tbQk\nCAUw7C29C79Fv1C5qfPrmAESrciIxpg0X40KPMbp1ZWVbd4=\nMIIDPjCCAiagAwIBAgIBATANBgkqhkiG9w0BAQsFADAwMQswCQYDVQQGEwJVUzEQ\nMA4GA1UECgwHVGVzdE9yZzEPMA0GA1UEAwwGUm9vdENBMB4XDTI1MDYyMzIyNTgz\nNVoXDTMwMDYyMjIyNTgzNVowMDELMAkGA1UEBhMCVVMxEDAOBgNVBAoMB1Rlc3RP\ncmcxDzANBgNVBAMMBlJvb3RDQTCCASIwDQYJKoZIhvcNAQEBBQADggEPADCCAQoC\nggEBALHY+ebd8qHYjtGHWkqG5jk2PMCOMIxt8J+ftU3F3pU2DOHu58j6V7q7KmCW\n45m79Ca7oZ4db1p6KWmn29gpTzBRQcGMe+ZZYdlsymWFk/inoVe66UsRFzw7FLR3\nV+RY9LWw9j6mItfwvUEu43yqrZCv0+2rTxp5sLta9CaYFdS7Bhrtm5raR4MHQogw\nYCFdCPMcqdzef6EnkHA/wh6e0cY+1EsTfGJ3Dk3HZR7uCZwp7aUUqmSTIqmLzeo0\nRfmN4PcnNECDobNAHtcF/ArbXXm2h6wOX24zMsWPs87i0HVLLn4LI+cIsTCmgdvH\nkOsQFdRJCxJE8u3s5qUnFxmy1BsCAwEAAaNjMGEwDwYDVR0TAQH/BAUwAwEB/zAO\nBgNVHQ8BAf8EBAMCAQYwHQYDVR0OBBYEFNIyIcTSIq1MnMCNyeWgOMxELtvxMB8G\nA1UdIwQYMBaAFNIyIcTSIq1MnMCNyeWgOMxELtvxMA0GCSqGSIb3DQEBCwUAA4IB\nAQAl9Jq+HvXWbnBbbHfZYtbMRsIrERXaZ3ACHWxh/fGS1K/adfvRFRt1jhDUgWm9\ntnW85hvPSaJTCIl2HHka6zX+OU8DvNLdL4rxJcy3h4JYyoUspzCHYSs3/B+aHJdi\nsygCd84YUBhVaco4KoX8asx1VeaZKSIMv+swxLPlKGv0ySxJUMV2NwPAwjG+tVFu\nBaNNl3aGx3OhRN2+6rNOxy5HM0QmCTIDgDESf6DsuRCQfV2z2eIYHHVbf2wtdtuD\nn8ioMqMiekFPCh6xsf4EBoAPheSkDo2uHrVSX6Xi8nZZjKKhFtEcDmeOHr1iwICt\nR58gaSLhCEl7nXgcqUmb3caA\n-----END CERTIFICATE-----\n-----BEGIN RSA PRIVATE KEY-----", - "repo_full_name": "chef/chef", - "discussion_comments": [ - { - "comment_id": "2162660894", - "repo_full_name": "chef/chef", - "pr_number": 15065, - "pr_file": "spec/data/trusted_certs/root.pem", - "discussion_id": "2162660894", - "commented_code": "@@ -1,22 +1,47 @@\n -----BEGIN CERTIFICATE-----\n-MIIDrzCCApegAwIBAgIQCDvgVpBCRrGhdWrJWZHHSjANBgkqhkiG9w0BAQUFADBh\n-MQswCQYDVQQGEwJVUzEVMBMGA1UEChMMRGlnaUNlcnQgSW5jMRkwFwYDVQQLExB3\n-d3cuZGlnaWNlcnQuY29tMSAwHgYDVQQDExdEaWdpQ2VydCBHbG9iYWwgUm9vdCBD\n-QTAeFw0wNjExMTAwMDAwMDBaFw0zMTExMTAwMDAwMDBaMGExCzAJBgNVBAYTAlVT\n-MRUwEwYDVQQKEwxEaWdpQ2VydCBJbmMxGTAXBgNVBAsTEHd3dy5kaWdpY2VydC5j\n-b20xIDAeBgNVBAMTF0RpZ2lDZXJ0IEdsb2JhbCBSb290IENBMIIBIjANBgkqhkiG\n-9w0BAQEFAAOCAQ8AMIIBCgKCAQEA4jvhEXLeqKTTo1eqUKKPC3eQyaKl7hLOllsB\n-CSDMAZOnTjC3U/dDxGkAV53ijSLdhwZAAIEJzs4bg7/fzTtxRuLWZscFs3YnFo97\n-nh6Vfe63SKMI2tavegw5BmV/Sl0fvBf4q77uKNd0f3p4mVmFaG5cIzJLv07A6Fpt\n-43C/dxC//AH2hdmoRBBYMql1GNXRor5H4idq9Joz+EkIYIvUX7Q6hL+hqkpMfT7P\n-T19sdl6gSzeRntwi5m3OFBqOasv+zbMUZBfHWymeMr/y7vrTC0LUq7dBMtoM1O/4\n-gdW7jVg/tRvoSSiicNoxBN33shbyTApOB6jtSj1etX+jkMOvJwIDAQABo2MwYTAO\n-BgNVHQ8BAf8EBAMCAYYwDwYDVR0TAQH/BAUwAwEB/zAdBgNVHQ4EFgQUA95QNVbR\n-TLtm8KPiGxvDl7I90VUwHwYDVR0jBBgwFoAUA95QNVbRTLtm8KPiGxvDl7I90VUw\n-DQYJKoZIhvcNAQEFBQADggEBAMucN6pIExIK+t1EnE9SsPTfrgT1eXkIoyQY/Esr\n-hMAtudXH/vTBH1jLuG2cenTnmCmrEbXjcKChzUyImZOMkXDiqw8cvpOp/2PV5Adg\n-06O/nVsJ8dWO41P0jmP6P6fbtGbfYmbW0W5BjfIttep3Sp+dWOIrWcBAI+0tKIJF\n-PnlUkiaY4IBIqDfv8NZ5YBberOgOzW6sRBc4L0na4UU+Krk2U886UAb3LujEV0ls\n-YSEY1QSteDwsOoBrp+uvFRTp2InBuThs4pFsiv9kuXclVzDAGySj4dzp30d8tbQk\n-CAUw7C29C79Fv1C5qfPrmAESrciIxpg0X40KPMbp1ZWVbd4=\n+MIIDPjCCAiagAwIBAgIBATANBgkqhkiG9w0BAQsFADAwMQswCQYDVQQGEwJVUzEQ\n+MA4GA1UECgwHVGVzdE9yZzEPMA0GA1UEAwwGUm9vdENBMB4XDTI1MDYyMzIyNTgz\n+NVoXDTMwMDYyMjIyNTgzNVowMDELMAkGA1UEBhMCVVMxEDAOBgNVBAoMB1Rlc3RP\n+cmcxDzANBgNVBAMMBlJvb3RDQTCCASIwDQYJKoZIhvcNAQEBBQADggEPADCCAQoC\n+ggEBALHY+ebd8qHYjtGHWkqG5jk2PMCOMIxt8J+ftU3F3pU2DOHu58j6V7q7KmCW\n+45m79Ca7oZ4db1p6KWmn29gpTzBRQcGMe+ZZYdlsymWFk/inoVe66UsRFzw7FLR3\n+V+RY9LWw9j6mItfwvUEu43yqrZCv0+2rTxp5sLta9CaYFdS7Bhrtm5raR4MHQogw\n+YCFdCPMcqdzef6EnkHA/wh6e0cY+1EsTfGJ3Dk3HZR7uCZwp7aUUqmSTIqmLzeo0\n+RfmN4PcnNECDobNAHtcF/ArbXXm2h6wOX24zMsWPs87i0HVLLn4LI+cIsTCmgdvH\n+kOsQFdRJCxJE8u3s5qUnFxmy1BsCAwEAAaNjMGEwDwYDVR0TAQH/BAUwAwEB/zAO\n+BgNVHQ8BAf8EBAMCAQYwHQYDVR0OBBYEFNIyIcTSIq1MnMCNyeWgOMxELtvxMB8G\n+A1UdIwQYMBaAFNIyIcTSIq1MnMCNyeWgOMxELtvxMA0GCSqGSIb3DQEBCwUAA4IB\n+AQAl9Jq+HvXWbnBbbHfZYtbMRsIrERXaZ3ACHWxh/fGS1K/adfvRFRt1jhDUgWm9\n+tnW85hvPSaJTCIl2HHka6zX+OU8DvNLdL4rxJcy3h4JYyoUspzCHYSs3/B+aHJdi\n+sygCd84YUBhVaco4KoX8asx1VeaZKSIMv+swxLPlKGv0ySxJUMV2NwPAwjG+tVFu\n+BaNNl3aGx3OhRN2+6rNOxy5HM0QmCTIDgDESf6DsuRCQfV2z2eIYHHVbf2wtdtuD\n+n8ioMqMiekFPCh6xsf4EBoAPheSkDo2uHrVSX6Xi8nZZjKKhFtEcDmeOHr1iwICt\n+R58gaSLhCEl7nXgcqUmb3caA\n -----END CERTIFICATE-----\n+-----BEGIN RSA PRIVATE KEY-----", - "comment_created_at": "2025-06-23T23:01:44+00:00", - "comment_author": "github-advanced-security[bot]", - "comment_body": "## Cryptographic private keys should not be disclosed\n\nMake sure this private key gets revoked, changed, and removed from the code.

              See more on SonarQube Cloud

              \n\n[Show more details](https://github.com/chef/chef/security/code-scanning/288)", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/chef-prefer-guard-clauses.json b/_reviewers/chef-prefer-guard-clauses.json new file mode 100644 index 0000000..710ece2 --- /dev/null +++ b/_reviewers/chef-prefer-guard-clauses.json @@ -0,0 +1,172 @@ +[ + { + "discussion_id": "1763995053", + "pr_number": 14582, + "pr_file": "lib/chef/action_collection.rb", + "created_at": "2024-09-17T20:44:45+00:00", + "commented_code": "# (see EventDispatch::Base#)\n #\n def resource_skipped(resource, action, conditional)\n current_record.status = :skipped\n current_record.conditional = conditional\n unless current_record.nil?\n current_record.status = :skipped\n current_record.conditional = conditional\n end", + "repo_full_name": "chef/chef", + "discussion_comments": [ + { + "comment_id": "1763995053", + "repo_full_name": "chef/chef", + "pr_number": 14582, + "pr_file": "lib/chef/action_collection.rb", + "discussion_id": "1763995053", + "commented_code": "@@ -189,8 +189,10 @@ def resource_up_to_date(new_resource, action)\n # (see EventDispatch::Base#)\n #\n def resource_skipped(resource, action, conditional)\n- current_record.status = :skipped\n- current_record.conditional = conditional\n+ unless current_record.nil?\n+ current_record.status = :skipped\n+ current_record.conditional = conditional\n+ end", + "comment_created_at": "2024-09-17T20:44:45+00:00", + "comment_author": "tpowell-progress", + "comment_body": "```suggestion\r\n return if current_record.nil?\r\n \r\n current_record.status = :skipped\r\n current_record.conditional = conditional\r\n```", + "pr_file_module": null + }, + { + "comment_id": "1763996558", + "repo_full_name": "chef/chef", + "pr_number": 14582, + "pr_file": "lib/chef/action_collection.rb", + "discussion_id": "1763995053", + "commented_code": "@@ -189,8 +189,10 @@ def resource_up_to_date(new_resource, action)\n # (see EventDispatch::Base#)\n #\n def resource_skipped(resource, action, conditional)\n- current_record.status = :skipped\n- current_record.conditional = conditional\n+ unless current_record.nil?\n+ current_record.status = :skipped\n+ current_record.conditional = conditional\n+ end", + "comment_created_at": "2024-09-17T20:46:07+00:00", + "comment_author": "tpowell-progress", + "comment_body": "Use a guard instead of `unless` block.", + "pr_file_module": null + }, + { + "comment_id": "1764248147", + "repo_full_name": "chef/chef", + "pr_number": 14582, + "pr_file": "lib/chef/action_collection.rb", + "discussion_id": "1763995053", + "commented_code": "@@ -189,8 +189,10 @@ def resource_up_to_date(new_resource, action)\n # (see EventDispatch::Base#)\n #\n def resource_skipped(resource, action, conditional)\n- current_record.status = :skipped\n- current_record.conditional = conditional\n+ unless current_record.nil?\n+ current_record.status = :skipped\n+ current_record.conditional = conditional\n+ end", + "comment_created_at": "2024-09-18T00:46:28+00:00", + "comment_author": "jamespdo", + "comment_body": "updated per request, please rereview", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1904548771", + "pr_number": 14777, + "pr_file": "knife/lib/chef/utils/licensing_handler.rb", + "created_at": "2025-01-06T19:25:36+00:00", + "commented_code": "class << self\n def validate!\n license_keys = ChefLicensing::LicenseKeyFetcher.fetch\n\n return new(nil, nil) if license_keys.blank?\n license_keys = begin\n ChefLicensing::LicenseKeyFetcher.fetch\n # If the env is airgapped or the local licensing service is unreachable,\n # the licensing gem will raise ChefLicensing::RestfulClientConnectionError.\n # In such cases, we are assuming the license is not available.\n rescue ChefLicensing::RestfulClientConnectionError\n []\n end\n\n return new(nil, nil) if license_keys.nil? || license_keys.empty?", + "repo_full_name": "chef/chef", + "discussion_comments": [ + { + "comment_id": "1904548771", + "repo_full_name": "chef/chef", + "pr_number": 14777, + "pr_file": "knife/lib/chef/utils/licensing_handler.rb", + "discussion_id": "1904548771", + "commented_code": "@@ -30,9 +30,16 @@ def install_sh_url\n \n class << self\n def validate!\n- license_keys = ChefLicensing::LicenseKeyFetcher.fetch\n-\n- return new(nil, nil) if license_keys.blank?\n+ license_keys = begin\n+ ChefLicensing::LicenseKeyFetcher.fetch\n+ # If the env is airgapped or the local licensing service is unreachable,\n+ # the licensing gem will raise ChefLicensing::RestfulClientConnectionError.\n+ # In such cases, we are assuming the license is not available.\n+ rescue ChefLicensing::RestfulClientConnectionError\n+ []\n+ end\n+\n+ return new(nil, nil) if license_keys.nil? || license_keys.empty?", + "comment_created_at": "2025-01-06T19:25:36+00:00", + "comment_author": "Stromweld", + "comment_body": "This can be simplified to use safe navigation that checks for nil before trying method empty. If nil it'll return nill which is then false in the if statement.\r\n```suggestion\r\n return new(nil, nil) if license_keys&.empty?\r\n```", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1369667380", + "pr_number": 14043, + "pr_file": "lib/chef/resource/chocolatey_installer.rb", + "created_at": "2023-10-24T06:10:14+00:00", + "commented_code": "class Chef\n class Resource\n class ChocolateyInstaller < Chef::Resource\n provides :chocolatey_installer\n\n description \"Use the Chocolatey Installer resource to ensure that Choco is installed to your specification. Use the Chocolatey Feature resource to customize your install\"\n introduced \"18.3\"\n examples <<~DOC\n **Install Chocolatey**\n\n ```ruby\n chocolatey_installer 'latest' do\n action :install\n end\n ```\n\n **Uninstall Chocolatey**\n\n ```ruby\n chocolatey_installer 'Some random verbiage' do\n action :uninstall\n end\n ```\n\n **Install Chocolatey with Parameters**\n\n ```ruby\n chocolatey_installer 'latest' do\n action :install\n download_url \"https://www.contoso.com/foo\"\n chocolatey_version '2.12.24'\n end\n ```\n\n **Upgrade Chocolatey with Parameters**\n\n ```ruby\n chocolatey_installer 'latest' do\n action :upgrade\n chocolatey_version '2.12.24'\n end\n ```\n DOC\n\n allowed_actions :install, :uninstall, :upgrade\n\n property :download_url, String,\n description: \"The URL to download Chocolatey from. This defaults to the value of $env:ChocolateyDownloadUrl, if it is set, and otherwise falls back to the official Chocolatey community repository to download the Chocolatey package. It can be used for offline installation by providing a path to a Chocolatey.nupkg.\"\n\n property :chocolatey_version, String,\n description: \"Specifies a target version of Chocolatey to install. By default, the latest stable version is installed. This will use the value in $env:ChocolateyVersion by default, if that environment variable is present. This parameter is ignored if download_url is set.\"\n\n property :use_native_unzip, [TrueClass, FalseClass], default: false,\n description: \"If set, uses built-in Windows decompression tools instead of 7zip when unpacking the downloaded nupkg. This will be set by default if use_native_unzip is set to a value other than 'false' or '0'. This parameter will be ignored in PS 5+ in favour of using the Expand-Archive built in PowerShell cmdlet directly.\"\n\n property :ignore_proxy, [TrueClass, FalseClass], default: false,\n description: \"If set, ignores any configured proxy. This will override any proxy environment variables or parameters. This will be set by default if ignore_proxy is set to a value other than 'false' or '0'.\"\n\n property :proxy_url, String,\n description: \"Specifies the proxy URL to use during the download.\"\n\n property :proxy_user, String,\n description: \"The username to use to build a proxy credential with. Will be consumed by the proxy_credential property if both this property and proxy_password are set\"\n\n property :proxy_password, String,\n description: \"The password to use to build a proxy credential with. Will be consumed by the proxy_credential property if both this property and proxy_user are set\"\n\n load_current_value do\n current_state = is_choco_installed?\n current_value_does_not_exist! if current_state == false\n current_state\n end\n\n def is_choco_installed?\n ::File.exist?(\"#{ENV[\"ALLUSERSPROFILE\"]}\\\\chocolatey\\\\bin\\\\choco.exe\")\n end\n\n def get_choco_version\n powershell_exec(\"choco --version\").result\n end\n\n def existing_version\n Gem::Version.new(get_choco_version)\n end\n\n def define_resource_requirements\n [ new_resource.proxy_user, new_resource.proxy_password ].each do\n requirements.assert(:install, :upgrade).each do |a|\n a.assertion do\n (!new_resource.proxy_user.nil? && new_resource.proxy_password.nil?) || (new_resource.proxy_user.nil? && !new_resource.proxy_password.nil?)\n end\n a.failure_message(Chef::Exceptions::ValidationFailed, \"You must specify both a proxy_user and a proxy_password\")\n a.whyrun(\"Assuming that if you have configured a 'proxy_user' you must also supply a 'proxy_password'\")\n end\n end\n end\n\n action :install, description: \"Installs Chocolatey package manager\" do\n unless new_resource.download_url.nil?", + "repo_full_name": "chef/chef", + "discussion_comments": [ + { + "comment_id": "1369667380", + "repo_full_name": "chef/chef", + "pr_number": 14043, + "pr_file": "lib/chef/resource/chocolatey_installer.rb", + "discussion_id": "1369667380", + "commented_code": "@@ -0,0 +1,198 @@\n+class Chef\n+ class Resource\n+ class ChocolateyInstaller < Chef::Resource\n+ provides :chocolatey_installer\n+\n+ description \"Use the Chocolatey Installer resource to ensure that Choco is installed to your specification. Use the Chocolatey Feature resource to customize your install\"\n+ introduced \"18.3\"\n+ examples <<~DOC\n+ **Install Chocolatey**\n+\n+ ```ruby\n+ chocolatey_installer 'latest' do\n+ action :install\n+ end\n+ ```\n+\n+ **Uninstall Chocolatey**\n+\n+ ```ruby\n+ chocolatey_installer 'Some random verbiage' do\n+ action :uninstall\n+ end\n+ ```\n+\n+ **Install Chocolatey with Parameters**\n+\n+ ```ruby\n+ chocolatey_installer 'latest' do\n+ action :install\n+ download_url \"https://www.contoso.com/foo\"\n+ chocolatey_version '2.12.24'\n+ end\n+ ```\n+\n+ **Upgrade Chocolatey with Parameters**\n+\n+ ```ruby\n+ chocolatey_installer 'latest' do\n+ action :upgrade\n+ chocolatey_version '2.12.24'\n+ end\n+ ```\n+ DOC\n+\n+ allowed_actions :install, :uninstall, :upgrade\n+\n+ property :download_url, String,\n+ description: \"The URL to download Chocolatey from. This defaults to the value of $env:ChocolateyDownloadUrl, if it is set, and otherwise falls back to the official Chocolatey community repository to download the Chocolatey package. It can be used for offline installation by providing a path to a Chocolatey.nupkg.\"\n+\n+ property :chocolatey_version, String,\n+ description: \"Specifies a target version of Chocolatey to install. By default, the latest stable version is installed. This will use the value in $env:ChocolateyVersion by default, if that environment variable is present. This parameter is ignored if download_url is set.\"\n+\n+ property :use_native_unzip, [TrueClass, FalseClass], default: false,\n+ description: \"If set, uses built-in Windows decompression tools instead of 7zip when unpacking the downloaded nupkg. This will be set by default if use_native_unzip is set to a value other than 'false' or '0'. This parameter will be ignored in PS 5+ in favour of using the Expand-Archive built in PowerShell cmdlet directly.\"\n+\n+ property :ignore_proxy, [TrueClass, FalseClass], default: false,\n+ description: \"If set, ignores any configured proxy. This will override any proxy environment variables or parameters. This will be set by default if ignore_proxy is set to a value other than 'false' or '0'.\"\n+\n+ property :proxy_url, String,\n+ description: \"Specifies the proxy URL to use during the download.\"\n+\n+ property :proxy_user, String,\n+ description: \"The username to use to build a proxy credential with. Will be consumed by the proxy_credential property if both this property and proxy_password are set\"\n+\n+ property :proxy_password, String,\n+ description: \"The password to use to build a proxy credential with. Will be consumed by the proxy_credential property if both this property and proxy_user are set\"\n+\n+ load_current_value do\n+ current_state = is_choco_installed?\n+ current_value_does_not_exist! if current_state == false\n+ current_state\n+ end\n+\n+ def is_choco_installed?\n+ ::File.exist?(\"#{ENV[\"ALLUSERSPROFILE\"]}\\\\chocolatey\\\\bin\\\\choco.exe\")\n+ end\n+\n+ def get_choco_version\n+ powershell_exec(\"choco --version\").result\n+ end\n+\n+ def existing_version\n+ Gem::Version.new(get_choco_version)\n+ end\n+\n+ def define_resource_requirements\n+ [ new_resource.proxy_user, new_resource.proxy_password ].each do\n+ requirements.assert(:install, :upgrade).each do |a|\n+ a.assertion do\n+ (!new_resource.proxy_user.nil? && new_resource.proxy_password.nil?) || (new_resource.proxy_user.nil? && !new_resource.proxy_password.nil?)\n+ end\n+ a.failure_message(Chef::Exceptions::ValidationFailed, \"You must specify both a proxy_user and a proxy_password\")\n+ a.whyrun(\"Assuming that if you have configured a 'proxy_user' you must also supply a 'proxy_password'\")\n+ end\n+ end\n+ end\n+\n+ action :install, description: \"Installs Chocolatey package manager\" do\n+ unless new_resource.download_url.nil?", + "comment_created_at": "2023-10-24T06:10:14+00:00", + "comment_author": "jaymzh", + "comment_body": "You can just do\r\n```suggestion\r\n if new_resource.download_url\r\n```\r\n\r\nRuby treats that was defined, not nil, not false.", + "pr_file_module": null + }, + { + "comment_id": "1370275715", + "repo_full_name": "chef/chef", + "pr_number": 14043, + "pr_file": "lib/chef/resource/chocolatey_installer.rb", + "discussion_id": "1369667380", + "commented_code": "@@ -0,0 +1,198 @@\n+class Chef\n+ class Resource\n+ class ChocolateyInstaller < Chef::Resource\n+ provides :chocolatey_installer\n+\n+ description \"Use the Chocolatey Installer resource to ensure that Choco is installed to your specification. Use the Chocolatey Feature resource to customize your install\"\n+ introduced \"18.3\"\n+ examples <<~DOC\n+ **Install Chocolatey**\n+\n+ ```ruby\n+ chocolatey_installer 'latest' do\n+ action :install\n+ end\n+ ```\n+\n+ **Uninstall Chocolatey**\n+\n+ ```ruby\n+ chocolatey_installer 'Some random verbiage' do\n+ action :uninstall\n+ end\n+ ```\n+\n+ **Install Chocolatey with Parameters**\n+\n+ ```ruby\n+ chocolatey_installer 'latest' do\n+ action :install\n+ download_url \"https://www.contoso.com/foo\"\n+ chocolatey_version '2.12.24'\n+ end\n+ ```\n+\n+ **Upgrade Chocolatey with Parameters**\n+\n+ ```ruby\n+ chocolatey_installer 'latest' do\n+ action :upgrade\n+ chocolatey_version '2.12.24'\n+ end\n+ ```\n+ DOC\n+\n+ allowed_actions :install, :uninstall, :upgrade\n+\n+ property :download_url, String,\n+ description: \"The URL to download Chocolatey from. This defaults to the value of $env:ChocolateyDownloadUrl, if it is set, and otherwise falls back to the official Chocolatey community repository to download the Chocolatey package. It can be used for offline installation by providing a path to a Chocolatey.nupkg.\"\n+\n+ property :chocolatey_version, String,\n+ description: \"Specifies a target version of Chocolatey to install. By default, the latest stable version is installed. This will use the value in $env:ChocolateyVersion by default, if that environment variable is present. This parameter is ignored if download_url is set.\"\n+\n+ property :use_native_unzip, [TrueClass, FalseClass], default: false,\n+ description: \"If set, uses built-in Windows decompression tools instead of 7zip when unpacking the downloaded nupkg. This will be set by default if use_native_unzip is set to a value other than 'false' or '0'. This parameter will be ignored in PS 5+ in favour of using the Expand-Archive built in PowerShell cmdlet directly.\"\n+\n+ property :ignore_proxy, [TrueClass, FalseClass], default: false,\n+ description: \"If set, ignores any configured proxy. This will override any proxy environment variables or parameters. This will be set by default if ignore_proxy is set to a value other than 'false' or '0'.\"\n+\n+ property :proxy_url, String,\n+ description: \"Specifies the proxy URL to use during the download.\"\n+\n+ property :proxy_user, String,\n+ description: \"The username to use to build a proxy credential with. Will be consumed by the proxy_credential property if both this property and proxy_password are set\"\n+\n+ property :proxy_password, String,\n+ description: \"The password to use to build a proxy credential with. Will be consumed by the proxy_credential property if both this property and proxy_user are set\"\n+\n+ load_current_value do\n+ current_state = is_choco_installed?\n+ current_value_does_not_exist! if current_state == false\n+ current_state\n+ end\n+\n+ def is_choco_installed?\n+ ::File.exist?(\"#{ENV[\"ALLUSERSPROFILE\"]}\\\\chocolatey\\\\bin\\\\choco.exe\")\n+ end\n+\n+ def get_choco_version\n+ powershell_exec(\"choco --version\").result\n+ end\n+\n+ def existing_version\n+ Gem::Version.new(get_choco_version)\n+ end\n+\n+ def define_resource_requirements\n+ [ new_resource.proxy_user, new_resource.proxy_password ].each do\n+ requirements.assert(:install, :upgrade).each do |a|\n+ a.assertion do\n+ (!new_resource.proxy_user.nil? && new_resource.proxy_password.nil?) || (new_resource.proxy_user.nil? && !new_resource.proxy_password.nil?)\n+ end\n+ a.failure_message(Chef::Exceptions::ValidationFailed, \"You must specify both a proxy_user and a proxy_password\")\n+ a.whyrun(\"Assuming that if you have configured a 'proxy_user' you must also supply a 'proxy_password'\")\n+ end\n+ end\n+ end\n+\n+ action :install, description: \"Installs Chocolatey package manager\" do\n+ unless new_resource.download_url.nil?", + "comment_created_at": "2023-10-24T14:23:02+00:00", + "comment_author": "johnmccrae", + "comment_body": "done", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "438613990", + "pr_number": 9976, + "pr_file": "lib/chef/provider/package/apt.rb", + "created_at": "2020-06-11T08:07:13+00:00", + "commented_code": "}\n end\n\n def check_availability(name)\n name.map do |pkg|\n showpkg = run_noninteractive(\"apt-cache\", \"search\", pkg).stdout\n showpkg.empty? ? logger.warn(\"Unable to locate package #{pkg} \") : pkg\n end", + "repo_full_name": "chef/chef", + "discussion_comments": [ + { + "comment_id": "438613990", + "repo_full_name": "chef/chef", + "pr_number": 9976, + "pr_file": "lib/chef/provider/package/apt.rb", + "discussion_id": "438613990", + "commented_code": "@@ -229,6 +234,12 @@ def package_data_for(pkg)\n }\n end\n \n+ def check_availability(name)\n+ name.map do |pkg|\n+ showpkg = run_noninteractive(\"apt-cache\", \"search\", pkg).stdout\n+ showpkg.empty? ? logger.warn(\"Unable to locate package #{pkg} \") : pkg\n+ end", + "comment_created_at": "2020-06-11T08:07:13+00:00", + "comment_author": "jaymzh", + "comment_body": "do you nee to call `.compact` here so you don't return a bunch of `nil`s?", + "pr_file_module": null + }, + { + "comment_id": "438754307", + "repo_full_name": "chef/chef", + "pr_number": 9976, + "pr_file": "lib/chef/provider/package/apt.rb", + "discussion_id": "438613990", + "commented_code": "@@ -229,6 +234,12 @@ def package_data_for(pkg)\n }\n end\n \n+ def check_availability(name)\n+ name.map do |pkg|\n+ showpkg = run_noninteractive(\"apt-cache\", \"search\", pkg).stdout\n+ showpkg.empty? ? logger.warn(\"Unable to locate package #{pkg} \") : pkg\n+ end", + "comment_created_at": "2020-06-11T12:44:37+00:00", + "comment_author": "dheerajd-msys", + "comment_body": "Thanks @jaymzh, Updated.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "778370434", + "pr_number": 12416, + "pr_file": "lib/chef/resource/chef_client_config.rb", + "created_at": "2022-01-04T20:33:46+00:00", + "commented_code": "introduced: \"17.8\"\n\n action :create, description: \"Create a client.rb config file for configuring #{ChefUtils::Dist::Infra::PRODUCT}.\" do\n unless ::Dir.exist?(new_resource.config_directory)\n directory new_resource.config_directory do\n [\n new_resource.config_directory,\n (::File.dirname(new_resource.log_location) unless new_resource.log_location.nil?),\n new_resource.file_backup_path,\n new_resource.file_cache_path,\n ::File.join(new_resource.config_directory, \"client.d\"),\n (::File.dirname(new_resource.pid_file) unless new_resource.pid_file.nil?),\n ].each do |dir_path|\n next if dir_path.nil?\n next if ::Dir.exist?(dir_path)\n\n directory dir_path do\n user new_resource.user unless new_resource.user.nil?\n group new_resource.group unless new_resource.group.nil?\n mode \"0750\"\n recursive true\n end\n end\n\n unless ::Dir.exist?(::File.join(new_resource.config_directory, \"client.d\"))\n directory ::File.join(new_resource.config_directory, \"client.d\") do\n user new_resource.user unless new_resource.user.nil?\n group new_resource.group unless new_resource.group.nil?\n mode \"0750\"\n mode dir_path == ::File&.dirname(new_resource.log_location) ? \"0755\" : \"0750\"", + "repo_full_name": "chef/chef", + "discussion_comments": [ + { + "comment_id": "778370434", + "repo_full_name": "chef/chef", + "pr_number": 12416, + "pr_file": "lib/chef/resource/chef_client_config.rb", + "discussion_id": "778370434", + "commented_code": "@@ -250,20 +250,21 @@ def string_to_symbol(prop_val)\n introduced: \"17.8\"\n \n action :create, description: \"Create a client.rb config file for configuring #{ChefUtils::Dist::Infra::PRODUCT}.\" do\n- unless ::Dir.exist?(new_resource.config_directory)\n- directory new_resource.config_directory do\n+ [\n+ new_resource.config_directory,\n+ (::File.dirname(new_resource.log_location) unless new_resource.log_location.nil?),\n+ new_resource.file_backup_path,\n+ new_resource.file_cache_path,\n+ ::File.join(new_resource.config_directory, \"client.d\"),\n+ (::File.dirname(new_resource.pid_file) unless new_resource.pid_file.nil?),\n+ ].each do |dir_path|\n+ next if dir_path.nil?\n+ next if ::Dir.exist?(dir_path)\n+\n+ directory dir_path do\n user new_resource.user unless new_resource.user.nil?\n group new_resource.group unless new_resource.group.nil?\n- mode \"0750\"\n- recursive true\n- end\n- end\n-\n- unless ::Dir.exist?(::File.join(new_resource.config_directory, \"client.d\"))\n- directory ::File.join(new_resource.config_directory, \"client.d\") do\n- user new_resource.user unless new_resource.user.nil?\n- group new_resource.group unless new_resource.group.nil?\n- mode \"0750\"\n+ mode dir_path == ::File&.dirname(new_resource.log_location) ? \"0755\" : \"0750\"", + "comment_created_at": "2022-01-04T20:33:46+00:00", + "comment_author": "lamont-granquist", + "comment_body": "The `&.` here could be `.` since `::File` will never be nil", + "pr_file_module": null + }, + { + "comment_id": "779101985", + "repo_full_name": "chef/chef", + "pr_number": 12416, + "pr_file": "lib/chef/resource/chef_client_config.rb", + "discussion_id": "778370434", + "commented_code": "@@ -250,20 +250,21 @@ def string_to_symbol(prop_val)\n introduced: \"17.8\"\n \n action :create, description: \"Create a client.rb config file for configuring #{ChefUtils::Dist::Infra::PRODUCT}.\" do\n- unless ::Dir.exist?(new_resource.config_directory)\n- directory new_resource.config_directory do\n+ [\n+ new_resource.config_directory,\n+ (::File.dirname(new_resource.log_location) unless new_resource.log_location.nil?),\n+ new_resource.file_backup_path,\n+ new_resource.file_cache_path,\n+ ::File.join(new_resource.config_directory, \"client.d\"),\n+ (::File.dirname(new_resource.pid_file) unless new_resource.pid_file.nil?),\n+ ].each do |dir_path|\n+ next if dir_path.nil?\n+ next if ::Dir.exist?(dir_path)\n+\n+ directory dir_path do\n user new_resource.user unless new_resource.user.nil?\n group new_resource.group unless new_resource.group.nil?\n- mode \"0750\"\n- recursive true\n- end\n- end\n-\n- unless ::Dir.exist?(::File.join(new_resource.config_directory, \"client.d\"))\n- directory ::File.join(new_resource.config_directory, \"client.d\") do\n- user new_resource.user unless new_resource.user.nil?\n- group new_resource.group unless new_resource.group.nil?\n- mode \"0750\"\n+ mode dir_path == ::File&.dirname(new_resource.log_location) ? \"0755\" : \"0750\"", + "comment_created_at": "2022-01-05T20:02:04+00:00", + "comment_author": "Stromweld", + "comment_body": "removed &", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/chef-prefer-guard-clauses.md b/_reviewers/chef-prefer-guard-clauses.md index 497e3f1..af71c8d 100644 --- a/_reviewers/chef-prefer-guard-clauses.md +++ b/_reviewers/chef-prefer-guard-clauses.md @@ -68,177 +68,3 @@ Only use safe navigation when the object might actually be nil. Don't use it on # Better: ::File.dirname(path) # Since ::File will never be nil ``` - - -[ - { - "discussion_id": "1763995053", - "pr_number": 14582, - "pr_file": "lib/chef/action_collection.rb", - "created_at": "2024-09-17T20:44:45+00:00", - "commented_code": "# (see EventDispatch::Base#)\n #\n def resource_skipped(resource, action, conditional)\n current_record.status = :skipped\n current_record.conditional = conditional\n unless current_record.nil?\n current_record.status = :skipped\n current_record.conditional = conditional\n end", - "repo_full_name": "chef/chef", - "discussion_comments": [ - { - "comment_id": "1763995053", - "repo_full_name": "chef/chef", - "pr_number": 14582, - "pr_file": "lib/chef/action_collection.rb", - "discussion_id": "1763995053", - "commented_code": "@@ -189,8 +189,10 @@ def resource_up_to_date(new_resource, action)\n # (see EventDispatch::Base#)\n #\n def resource_skipped(resource, action, conditional)\n- current_record.status = :skipped\n- current_record.conditional = conditional\n+ unless current_record.nil?\n+ current_record.status = :skipped\n+ current_record.conditional = conditional\n+ end", - "comment_created_at": "2024-09-17T20:44:45+00:00", - "comment_author": "tpowell-progress", - "comment_body": "```suggestion\r\n return if current_record.nil?\r\n \r\n current_record.status = :skipped\r\n current_record.conditional = conditional\r\n```", - "pr_file_module": null - }, - { - "comment_id": "1763996558", - "repo_full_name": "chef/chef", - "pr_number": 14582, - "pr_file": "lib/chef/action_collection.rb", - "discussion_id": "1763995053", - "commented_code": "@@ -189,8 +189,10 @@ def resource_up_to_date(new_resource, action)\n # (see EventDispatch::Base#)\n #\n def resource_skipped(resource, action, conditional)\n- current_record.status = :skipped\n- current_record.conditional = conditional\n+ unless current_record.nil?\n+ current_record.status = :skipped\n+ current_record.conditional = conditional\n+ end", - "comment_created_at": "2024-09-17T20:46:07+00:00", - "comment_author": "tpowell-progress", - "comment_body": "Use a guard instead of `unless` block.", - "pr_file_module": null - }, - { - "comment_id": "1764248147", - "repo_full_name": "chef/chef", - "pr_number": 14582, - "pr_file": "lib/chef/action_collection.rb", - "discussion_id": "1763995053", - "commented_code": "@@ -189,8 +189,10 @@ def resource_up_to_date(new_resource, action)\n # (see EventDispatch::Base#)\n #\n def resource_skipped(resource, action, conditional)\n- current_record.status = :skipped\n- current_record.conditional = conditional\n+ unless current_record.nil?\n+ current_record.status = :skipped\n+ current_record.conditional = conditional\n+ end", - "comment_created_at": "2024-09-18T00:46:28+00:00", - "comment_author": "jamespdo", - "comment_body": "updated per request, please rereview", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1904548771", - "pr_number": 14777, - "pr_file": "knife/lib/chef/utils/licensing_handler.rb", - "created_at": "2025-01-06T19:25:36+00:00", - "commented_code": "class << self\n def validate!\n license_keys = ChefLicensing::LicenseKeyFetcher.fetch\n\n return new(nil, nil) if license_keys.blank?\n license_keys = begin\n ChefLicensing::LicenseKeyFetcher.fetch\n # If the env is airgapped or the local licensing service is unreachable,\n # the licensing gem will raise ChefLicensing::RestfulClientConnectionError.\n # In such cases, we are assuming the license is not available.\n rescue ChefLicensing::RestfulClientConnectionError\n []\n end\n\n return new(nil, nil) if license_keys.nil? || license_keys.empty?", - "repo_full_name": "chef/chef", - "discussion_comments": [ - { - "comment_id": "1904548771", - "repo_full_name": "chef/chef", - "pr_number": 14777, - "pr_file": "knife/lib/chef/utils/licensing_handler.rb", - "discussion_id": "1904548771", - "commented_code": "@@ -30,9 +30,16 @@ def install_sh_url\n \n class << self\n def validate!\n- license_keys = ChefLicensing::LicenseKeyFetcher.fetch\n-\n- return new(nil, nil) if license_keys.blank?\n+ license_keys = begin\n+ ChefLicensing::LicenseKeyFetcher.fetch\n+ # If the env is airgapped or the local licensing service is unreachable,\n+ # the licensing gem will raise ChefLicensing::RestfulClientConnectionError.\n+ # In such cases, we are assuming the license is not available.\n+ rescue ChefLicensing::RestfulClientConnectionError\n+ []\n+ end\n+\n+ return new(nil, nil) if license_keys.nil? || license_keys.empty?", - "comment_created_at": "2025-01-06T19:25:36+00:00", - "comment_author": "Stromweld", - "comment_body": "This can be simplified to use safe navigation that checks for nil before trying method empty. If nil it'll return nill which is then false in the if statement.\r\n```suggestion\r\n return new(nil, nil) if license_keys&.empty?\r\n```", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1369667380", - "pr_number": 14043, - "pr_file": "lib/chef/resource/chocolatey_installer.rb", - "created_at": "2023-10-24T06:10:14+00:00", - "commented_code": "class Chef\n class Resource\n class ChocolateyInstaller < Chef::Resource\n provides :chocolatey_installer\n\n description \"Use the Chocolatey Installer resource to ensure that Choco is installed to your specification. Use the Chocolatey Feature resource to customize your install\"\n introduced \"18.3\"\n examples <<~DOC\n **Install Chocolatey**\n\n ```ruby\n chocolatey_installer 'latest' do\n action :install\n end\n ```\n\n **Uninstall Chocolatey**\n\n ```ruby\n chocolatey_installer 'Some random verbiage' do\n action :uninstall\n end\n ```\n\n **Install Chocolatey with Parameters**\n\n ```ruby\n chocolatey_installer 'latest' do\n action :install\n download_url \"https://www.contoso.com/foo\"\n chocolatey_version '2.12.24'\n end\n ```\n\n **Upgrade Chocolatey with Parameters**\n\n ```ruby\n chocolatey_installer 'latest' do\n action :upgrade\n chocolatey_version '2.12.24'\n end\n ```\n DOC\n\n allowed_actions :install, :uninstall, :upgrade\n\n property :download_url, String,\n description: \"The URL to download Chocolatey from. This defaults to the value of $env:ChocolateyDownloadUrl, if it is set, and otherwise falls back to the official Chocolatey community repository to download the Chocolatey package. It can be used for offline installation by providing a path to a Chocolatey.nupkg.\"\n\n property :chocolatey_version, String,\n description: \"Specifies a target version of Chocolatey to install. By default, the latest stable version is installed. This will use the value in $env:ChocolateyVersion by default, if that environment variable is present. This parameter is ignored if download_url is set.\"\n\n property :use_native_unzip, [TrueClass, FalseClass], default: false,\n description: \"If set, uses built-in Windows decompression tools instead of 7zip when unpacking the downloaded nupkg. This will be set by default if use_native_unzip is set to a value other than 'false' or '0'. This parameter will be ignored in PS 5+ in favour of using the Expand-Archive built in PowerShell cmdlet directly.\"\n\n property :ignore_proxy, [TrueClass, FalseClass], default: false,\n description: \"If set, ignores any configured proxy. This will override any proxy environment variables or parameters. This will be set by default if ignore_proxy is set to a value other than 'false' or '0'.\"\n\n property :proxy_url, String,\n description: \"Specifies the proxy URL to use during the download.\"\n\n property :proxy_user, String,\n description: \"The username to use to build a proxy credential with. Will be consumed by the proxy_credential property if both this property and proxy_password are set\"\n\n property :proxy_password, String,\n description: \"The password to use to build a proxy credential with. Will be consumed by the proxy_credential property if both this property and proxy_user are set\"\n\n load_current_value do\n current_state = is_choco_installed?\n current_value_does_not_exist! if current_state == false\n current_state\n end\n\n def is_choco_installed?\n ::File.exist?(\"#{ENV[\"ALLUSERSPROFILE\"]}\\\\chocolatey\\\\bin\\\\choco.exe\")\n end\n\n def get_choco_version\n powershell_exec(\"choco --version\").result\n end\n\n def existing_version\n Gem::Version.new(get_choco_version)\n end\n\n def define_resource_requirements\n [ new_resource.proxy_user, new_resource.proxy_password ].each do\n requirements.assert(:install, :upgrade).each do |a|\n a.assertion do\n (!new_resource.proxy_user.nil? && new_resource.proxy_password.nil?) || (new_resource.proxy_user.nil? && !new_resource.proxy_password.nil?)\n end\n a.failure_message(Chef::Exceptions::ValidationFailed, \"You must specify both a proxy_user and a proxy_password\")\n a.whyrun(\"Assuming that if you have configured a 'proxy_user' you must also supply a 'proxy_password'\")\n end\n end\n end\n\n action :install, description: \"Installs Chocolatey package manager\" do\n unless new_resource.download_url.nil?", - "repo_full_name": "chef/chef", - "discussion_comments": [ - { - "comment_id": "1369667380", - "repo_full_name": "chef/chef", - "pr_number": 14043, - "pr_file": "lib/chef/resource/chocolatey_installer.rb", - "discussion_id": "1369667380", - "commented_code": "@@ -0,0 +1,198 @@\n+class Chef\n+ class Resource\n+ class ChocolateyInstaller < Chef::Resource\n+ provides :chocolatey_installer\n+\n+ description \"Use the Chocolatey Installer resource to ensure that Choco is installed to your specification. Use the Chocolatey Feature resource to customize your install\"\n+ introduced \"18.3\"\n+ examples <<~DOC\n+ **Install Chocolatey**\n+\n+ ```ruby\n+ chocolatey_installer 'latest' do\n+ action :install\n+ end\n+ ```\n+\n+ **Uninstall Chocolatey**\n+\n+ ```ruby\n+ chocolatey_installer 'Some random verbiage' do\n+ action :uninstall\n+ end\n+ ```\n+\n+ **Install Chocolatey with Parameters**\n+\n+ ```ruby\n+ chocolatey_installer 'latest' do\n+ action :install\n+ download_url \"https://www.contoso.com/foo\"\n+ chocolatey_version '2.12.24'\n+ end\n+ ```\n+\n+ **Upgrade Chocolatey with Parameters**\n+\n+ ```ruby\n+ chocolatey_installer 'latest' do\n+ action :upgrade\n+ chocolatey_version '2.12.24'\n+ end\n+ ```\n+ DOC\n+\n+ allowed_actions :install, :uninstall, :upgrade\n+\n+ property :download_url, String,\n+ description: \"The URL to download Chocolatey from. This defaults to the value of $env:ChocolateyDownloadUrl, if it is set, and otherwise falls back to the official Chocolatey community repository to download the Chocolatey package. It can be used for offline installation by providing a path to a Chocolatey.nupkg.\"\n+\n+ property :chocolatey_version, String,\n+ description: \"Specifies a target version of Chocolatey to install. By default, the latest stable version is installed. This will use the value in $env:ChocolateyVersion by default, if that environment variable is present. This parameter is ignored if download_url is set.\"\n+\n+ property :use_native_unzip, [TrueClass, FalseClass], default: false,\n+ description: \"If set, uses built-in Windows decompression tools instead of 7zip when unpacking the downloaded nupkg. This will be set by default if use_native_unzip is set to a value other than 'false' or '0'. This parameter will be ignored in PS 5+ in favour of using the Expand-Archive built in PowerShell cmdlet directly.\"\n+\n+ property :ignore_proxy, [TrueClass, FalseClass], default: false,\n+ description: \"If set, ignores any configured proxy. This will override any proxy environment variables or parameters. This will be set by default if ignore_proxy is set to a value other than 'false' or '0'.\"\n+\n+ property :proxy_url, String,\n+ description: \"Specifies the proxy URL to use during the download.\"\n+\n+ property :proxy_user, String,\n+ description: \"The username to use to build a proxy credential with. Will be consumed by the proxy_credential property if both this property and proxy_password are set\"\n+\n+ property :proxy_password, String,\n+ description: \"The password to use to build a proxy credential with. Will be consumed by the proxy_credential property if both this property and proxy_user are set\"\n+\n+ load_current_value do\n+ current_state = is_choco_installed?\n+ current_value_does_not_exist! if current_state == false\n+ current_state\n+ end\n+\n+ def is_choco_installed?\n+ ::File.exist?(\"#{ENV[\"ALLUSERSPROFILE\"]}\\\\chocolatey\\\\bin\\\\choco.exe\")\n+ end\n+\n+ def get_choco_version\n+ powershell_exec(\"choco --version\").result\n+ end\n+\n+ def existing_version\n+ Gem::Version.new(get_choco_version)\n+ end\n+\n+ def define_resource_requirements\n+ [ new_resource.proxy_user, new_resource.proxy_password ].each do\n+ requirements.assert(:install, :upgrade).each do |a|\n+ a.assertion do\n+ (!new_resource.proxy_user.nil? && new_resource.proxy_password.nil?) || (new_resource.proxy_user.nil? && !new_resource.proxy_password.nil?)\n+ end\n+ a.failure_message(Chef::Exceptions::ValidationFailed, \"You must specify both a proxy_user and a proxy_password\")\n+ a.whyrun(\"Assuming that if you have configured a 'proxy_user' you must also supply a 'proxy_password'\")\n+ end\n+ end\n+ end\n+\n+ action :install, description: \"Installs Chocolatey package manager\" do\n+ unless new_resource.download_url.nil?", - "comment_created_at": "2023-10-24T06:10:14+00:00", - "comment_author": "jaymzh", - "comment_body": "You can just do\r\n```suggestion\r\n if new_resource.download_url\r\n```\r\n\r\nRuby treats that was defined, not nil, not false.", - "pr_file_module": null - }, - { - "comment_id": "1370275715", - "repo_full_name": "chef/chef", - "pr_number": 14043, - "pr_file": "lib/chef/resource/chocolatey_installer.rb", - "discussion_id": "1369667380", - "commented_code": "@@ -0,0 +1,198 @@\n+class Chef\n+ class Resource\n+ class ChocolateyInstaller < Chef::Resource\n+ provides :chocolatey_installer\n+\n+ description \"Use the Chocolatey Installer resource to ensure that Choco is installed to your specification. Use the Chocolatey Feature resource to customize your install\"\n+ introduced \"18.3\"\n+ examples <<~DOC\n+ **Install Chocolatey**\n+\n+ ```ruby\n+ chocolatey_installer 'latest' do\n+ action :install\n+ end\n+ ```\n+\n+ **Uninstall Chocolatey**\n+\n+ ```ruby\n+ chocolatey_installer 'Some random verbiage' do\n+ action :uninstall\n+ end\n+ ```\n+\n+ **Install Chocolatey with Parameters**\n+\n+ ```ruby\n+ chocolatey_installer 'latest' do\n+ action :install\n+ download_url \"https://www.contoso.com/foo\"\n+ chocolatey_version '2.12.24'\n+ end\n+ ```\n+\n+ **Upgrade Chocolatey with Parameters**\n+\n+ ```ruby\n+ chocolatey_installer 'latest' do\n+ action :upgrade\n+ chocolatey_version '2.12.24'\n+ end\n+ ```\n+ DOC\n+\n+ allowed_actions :install, :uninstall, :upgrade\n+\n+ property :download_url, String,\n+ description: \"The URL to download Chocolatey from. This defaults to the value of $env:ChocolateyDownloadUrl, if it is set, and otherwise falls back to the official Chocolatey community repository to download the Chocolatey package. It can be used for offline installation by providing a path to a Chocolatey.nupkg.\"\n+\n+ property :chocolatey_version, String,\n+ description: \"Specifies a target version of Chocolatey to install. By default, the latest stable version is installed. This will use the value in $env:ChocolateyVersion by default, if that environment variable is present. This parameter is ignored if download_url is set.\"\n+\n+ property :use_native_unzip, [TrueClass, FalseClass], default: false,\n+ description: \"If set, uses built-in Windows decompression tools instead of 7zip when unpacking the downloaded nupkg. This will be set by default if use_native_unzip is set to a value other than 'false' or '0'. This parameter will be ignored in PS 5+ in favour of using the Expand-Archive built in PowerShell cmdlet directly.\"\n+\n+ property :ignore_proxy, [TrueClass, FalseClass], default: false,\n+ description: \"If set, ignores any configured proxy. This will override any proxy environment variables or parameters. This will be set by default if ignore_proxy is set to a value other than 'false' or '0'.\"\n+\n+ property :proxy_url, String,\n+ description: \"Specifies the proxy URL to use during the download.\"\n+\n+ property :proxy_user, String,\n+ description: \"The username to use to build a proxy credential with. Will be consumed by the proxy_credential property if both this property and proxy_password are set\"\n+\n+ property :proxy_password, String,\n+ description: \"The password to use to build a proxy credential with. Will be consumed by the proxy_credential property if both this property and proxy_user are set\"\n+\n+ load_current_value do\n+ current_state = is_choco_installed?\n+ current_value_does_not_exist! if current_state == false\n+ current_state\n+ end\n+\n+ def is_choco_installed?\n+ ::File.exist?(\"#{ENV[\"ALLUSERSPROFILE\"]}\\\\chocolatey\\\\bin\\\\choco.exe\")\n+ end\n+\n+ def get_choco_version\n+ powershell_exec(\"choco --version\").result\n+ end\n+\n+ def existing_version\n+ Gem::Version.new(get_choco_version)\n+ end\n+\n+ def define_resource_requirements\n+ [ new_resource.proxy_user, new_resource.proxy_password ].each do\n+ requirements.assert(:install, :upgrade).each do |a|\n+ a.assertion do\n+ (!new_resource.proxy_user.nil? && new_resource.proxy_password.nil?) || (new_resource.proxy_user.nil? && !new_resource.proxy_password.nil?)\n+ end\n+ a.failure_message(Chef::Exceptions::ValidationFailed, \"You must specify both a proxy_user and a proxy_password\")\n+ a.whyrun(\"Assuming that if you have configured a 'proxy_user' you must also supply a 'proxy_password'\")\n+ end\n+ end\n+ end\n+\n+ action :install, description: \"Installs Chocolatey package manager\" do\n+ unless new_resource.download_url.nil?", - "comment_created_at": "2023-10-24T14:23:02+00:00", - "comment_author": "johnmccrae", - "comment_body": "done", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "438613990", - "pr_number": 9976, - "pr_file": "lib/chef/provider/package/apt.rb", - "created_at": "2020-06-11T08:07:13+00:00", - "commented_code": "}\n end\n\n def check_availability(name)\n name.map do |pkg|\n showpkg = run_noninteractive(\"apt-cache\", \"search\", pkg).stdout\n showpkg.empty? ? logger.warn(\"Unable to locate package #{pkg} \") : pkg\n end", - "repo_full_name": "chef/chef", - "discussion_comments": [ - { - "comment_id": "438613990", - "repo_full_name": "chef/chef", - "pr_number": 9976, - "pr_file": "lib/chef/provider/package/apt.rb", - "discussion_id": "438613990", - "commented_code": "@@ -229,6 +234,12 @@ def package_data_for(pkg)\n }\n end\n \n+ def check_availability(name)\n+ name.map do |pkg|\n+ showpkg = run_noninteractive(\"apt-cache\", \"search\", pkg).stdout\n+ showpkg.empty? ? logger.warn(\"Unable to locate package #{pkg} \") : pkg\n+ end", - "comment_created_at": "2020-06-11T08:07:13+00:00", - "comment_author": "jaymzh", - "comment_body": "do you nee to call `.compact` here so you don't return a bunch of `nil`s?", - "pr_file_module": null - }, - { - "comment_id": "438754307", - "repo_full_name": "chef/chef", - "pr_number": 9976, - "pr_file": "lib/chef/provider/package/apt.rb", - "discussion_id": "438613990", - "commented_code": "@@ -229,6 +234,12 @@ def package_data_for(pkg)\n }\n end\n \n+ def check_availability(name)\n+ name.map do |pkg|\n+ showpkg = run_noninteractive(\"apt-cache\", \"search\", pkg).stdout\n+ showpkg.empty? ? logger.warn(\"Unable to locate package #{pkg} \") : pkg\n+ end", - "comment_created_at": "2020-06-11T12:44:37+00:00", - "comment_author": "dheerajd-msys", - "comment_body": "Thanks @jaymzh, Updated.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "778370434", - "pr_number": 12416, - "pr_file": "lib/chef/resource/chef_client_config.rb", - "created_at": "2022-01-04T20:33:46+00:00", - "commented_code": "introduced: \"17.8\"\n\n action :create, description: \"Create a client.rb config file for configuring #{ChefUtils::Dist::Infra::PRODUCT}.\" do\n unless ::Dir.exist?(new_resource.config_directory)\n directory new_resource.config_directory do\n [\n new_resource.config_directory,\n (::File.dirname(new_resource.log_location) unless new_resource.log_location.nil?),\n new_resource.file_backup_path,\n new_resource.file_cache_path,\n ::File.join(new_resource.config_directory, \"client.d\"),\n (::File.dirname(new_resource.pid_file) unless new_resource.pid_file.nil?),\n ].each do |dir_path|\n next if dir_path.nil?\n next if ::Dir.exist?(dir_path)\n\n directory dir_path do\n user new_resource.user unless new_resource.user.nil?\n group new_resource.group unless new_resource.group.nil?\n mode \"0750\"\n recursive true\n end\n end\n\n unless ::Dir.exist?(::File.join(new_resource.config_directory, \"client.d\"))\n directory ::File.join(new_resource.config_directory, \"client.d\") do\n user new_resource.user unless new_resource.user.nil?\n group new_resource.group unless new_resource.group.nil?\n mode \"0750\"\n mode dir_path == ::File&.dirname(new_resource.log_location) ? \"0755\" : \"0750\"", - "repo_full_name": "chef/chef", - "discussion_comments": [ - { - "comment_id": "778370434", - "repo_full_name": "chef/chef", - "pr_number": 12416, - "pr_file": "lib/chef/resource/chef_client_config.rb", - "discussion_id": "778370434", - "commented_code": "@@ -250,20 +250,21 @@ def string_to_symbol(prop_val)\n introduced: \"17.8\"\n \n action :create, description: \"Create a client.rb config file for configuring #{ChefUtils::Dist::Infra::PRODUCT}.\" do\n- unless ::Dir.exist?(new_resource.config_directory)\n- directory new_resource.config_directory do\n+ [\n+ new_resource.config_directory,\n+ (::File.dirname(new_resource.log_location) unless new_resource.log_location.nil?),\n+ new_resource.file_backup_path,\n+ new_resource.file_cache_path,\n+ ::File.join(new_resource.config_directory, \"client.d\"),\n+ (::File.dirname(new_resource.pid_file) unless new_resource.pid_file.nil?),\n+ ].each do |dir_path|\n+ next if dir_path.nil?\n+ next if ::Dir.exist?(dir_path)\n+\n+ directory dir_path do\n user new_resource.user unless new_resource.user.nil?\n group new_resource.group unless new_resource.group.nil?\n- mode \"0750\"\n- recursive true\n- end\n- end\n-\n- unless ::Dir.exist?(::File.join(new_resource.config_directory, \"client.d\"))\n- directory ::File.join(new_resource.config_directory, \"client.d\") do\n- user new_resource.user unless new_resource.user.nil?\n- group new_resource.group unless new_resource.group.nil?\n- mode \"0750\"\n+ mode dir_path == ::File&.dirname(new_resource.log_location) ? \"0755\" : \"0750\"", - "comment_created_at": "2022-01-04T20:33:46+00:00", - "comment_author": "lamont-granquist", - "comment_body": "The `&.` here could be `.` since `::File` will never be nil", - "pr_file_module": null - }, - { - "comment_id": "779101985", - "repo_full_name": "chef/chef", - "pr_number": 12416, - "pr_file": "lib/chef/resource/chef_client_config.rb", - "discussion_id": "778370434", - "commented_code": "@@ -250,20 +250,21 @@ def string_to_symbol(prop_val)\n introduced: \"17.8\"\n \n action :create, description: \"Create a client.rb config file for configuring #{ChefUtils::Dist::Infra::PRODUCT}.\" do\n- unless ::Dir.exist?(new_resource.config_directory)\n- directory new_resource.config_directory do\n+ [\n+ new_resource.config_directory,\n+ (::File.dirname(new_resource.log_location) unless new_resource.log_location.nil?),\n+ new_resource.file_backup_path,\n+ new_resource.file_cache_path,\n+ ::File.join(new_resource.config_directory, \"client.d\"),\n+ (::File.dirname(new_resource.pid_file) unless new_resource.pid_file.nil?),\n+ ].each do |dir_path|\n+ next if dir_path.nil?\n+ next if ::Dir.exist?(dir_path)\n+\n+ directory dir_path do\n user new_resource.user unless new_resource.user.nil?\n group new_resource.group unless new_resource.group.nil?\n- mode \"0750\"\n- recursive true\n- end\n- end\n-\n- unless ::Dir.exist?(::File.join(new_resource.config_directory, \"client.d\"))\n- directory ::File.join(new_resource.config_directory, \"client.d\") do\n- user new_resource.user unless new_resource.user.nil?\n- group new_resource.group unless new_resource.group.nil?\n- mode \"0750\"\n+ mode dir_path == ::File&.dirname(new_resource.log_location) ? \"0755\" : \"0750\"", - "comment_created_at": "2022-01-05T20:02:04+00:00", - "comment_author": "Stromweld", - "comment_body": "removed &", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/chef-remove-commented-code.json b/_reviewers/chef-remove-commented-code.json new file mode 100644 index 0000000..1b61529 --- /dev/null +++ b/_reviewers/chef-remove-commented-code.json @@ -0,0 +1,80 @@ +[ + { + "discussion_id": "1170982646", + "pr_number": 13694, + "pr_file": ".expeditor/adhoc-canary.omnibus.yml", + "created_at": "2023-04-19T08:14:27+00:00", + "commented_code": "- ubuntu-16.04-x86_64\n - ubuntu-18.04-x86_64\n - ubuntu-20.04-x86_64\n windows-2012r2-i386:\n - windows-2012r2-i386\n # windows-2012r2-i386:", + "repo_full_name": "chef/chef", + "discussion_comments": [ + { + "comment_id": "1170982646", + "repo_full_name": "chef/chef", + "pr_number": 13694, + "pr_file": ".expeditor/adhoc-canary.omnibus.yml", + "discussion_id": "1170982646", + "commented_code": "@@ -66,8 +66,8 @@ builder-to-testers-map:\n - ubuntu-16.04-x86_64\n - ubuntu-18.04-x86_64\n - ubuntu-20.04-x86_64\n- windows-2012r2-i386:\n- - windows-2012r2-i386\n+ # windows-2012r2-i386:", + "comment_created_at": "2023-04-19T08:14:27+00:00", + "comment_author": "neha-p6", + "comment_body": "can we remove the commented code? We will still know what was removed from this commit if anything is needed in future", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1170982889", + "pr_number": 13694, + "pr_file": ".expeditor/release.omnibus.yml", + "created_at": "2023-04-19T08:14:40+00:00", + "commented_code": "- sles-15-x86_64\n sles-15-aarch64:\n - sles-15-aarch64\n solaris2-5.11-i386:\n - solaris2-5.11-i386\n # solaris2-5.11-i386:\n # - solaris2-5.11-i386", + "repo_full_name": "chef/chef", + "discussion_comments": [ + { + "comment_id": "1170982889", + "repo_full_name": "chef/chef", + "pr_number": 13694, + "pr_file": ".expeditor/release.omnibus.yml", + "discussion_id": "1170982889", + "commented_code": "@@ -55,8 +55,8 @@ builder-to-testers-map:\n - sles-15-x86_64\n sles-15-aarch64:\n - sles-15-aarch64\n- solaris2-5.11-i386:\n- - solaris2-5.11-i386\n+ # solaris2-5.11-i386:\n+ # - solaris2-5.11-i386", + "comment_created_at": "2023-04-19T08:14:40+00:00", + "comment_author": "neha-p6", + "comment_body": "can we remove the commented code? We will still know what was removed from this commit if anything is needed in future", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1170983093", + "pr_number": 13694, + "pr_file": ".expeditor/release.omnibus.yml", + "created_at": "2023-04-19T08:14:51+00:00", + "commented_code": "- ubuntu-16.04-x86_64\n - ubuntu-18.04-x86_64\n - ubuntu-20.04-x86_64\n windows-2012r2-i386:\n - windows-2012r2-i386\n # windows-2012r2-i386:\n # - windows-2012r2-i386\n windows-2012r2-x86_64:", + "repo_full_name": "chef/chef", + "discussion_comments": [ + { + "comment_id": "1170983093", + "repo_full_name": "chef/chef", + "pr_number": 13694, + "pr_file": ".expeditor/release.omnibus.yml", + "discussion_id": "1170983093", + "commented_code": "@@ -66,8 +66,8 @@ builder-to-testers-map:\n - ubuntu-16.04-x86_64\n - ubuntu-18.04-x86_64\n - ubuntu-20.04-x86_64\n- windows-2012r2-i386:\n- - windows-2012r2-i386\n+ # windows-2012r2-i386:\n+ # - windows-2012r2-i386\n windows-2012r2-x86_64:", + "comment_created_at": "2023-04-19T08:14:51+00:00", + "comment_author": "neha-p6", + "comment_body": "can we remove the commented code? We will still know what was removed from this commit if anything is needed in future", + "pr_file_module": null + }, + { + "comment_id": "1171771397", + "repo_full_name": "chef/chef", + "pr_number": 13694, + "pr_file": ".expeditor/release.omnibus.yml", + "discussion_id": "1170983093", + "commented_code": "@@ -66,8 +66,8 @@ builder-to-testers-map:\n - ubuntu-16.04-x86_64\n - ubuntu-18.04-x86_64\n - ubuntu-20.04-x86_64\n- windows-2012r2-i386:\n- - windows-2012r2-i386\n+ # windows-2012r2-i386:\n+ # - windows-2012r2-i386\n windows-2012r2-x86_64:", + "comment_created_at": "2023-04-19T19:29:47+00:00", + "comment_author": "johnmccrae", + "comment_body": "Great point. Thanks. Doing that now. ", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/chef-remove-commented-code.md b/_reviewers/chef-remove-commented-code.md index c9a6789..ad0df9b 100644 --- a/_reviewers/chef-remove-commented-code.md +++ b/_reviewers/chef-remove-commented-code.md @@ -25,85 +25,3 @@ builder-to-testers-map: ``` Instead, simply remove the commented lines completely. If the code needs to be referenced later, it can be found in the commit history. - - -[ - { - "discussion_id": "1170982646", - "pr_number": 13694, - "pr_file": ".expeditor/adhoc-canary.omnibus.yml", - "created_at": "2023-04-19T08:14:27+00:00", - "commented_code": "- ubuntu-16.04-x86_64\n - ubuntu-18.04-x86_64\n - ubuntu-20.04-x86_64\n windows-2012r2-i386:\n - windows-2012r2-i386\n # windows-2012r2-i386:", - "repo_full_name": "chef/chef", - "discussion_comments": [ - { - "comment_id": "1170982646", - "repo_full_name": "chef/chef", - "pr_number": 13694, - "pr_file": ".expeditor/adhoc-canary.omnibus.yml", - "discussion_id": "1170982646", - "commented_code": "@@ -66,8 +66,8 @@ builder-to-testers-map:\n - ubuntu-16.04-x86_64\n - ubuntu-18.04-x86_64\n - ubuntu-20.04-x86_64\n- windows-2012r2-i386:\n- - windows-2012r2-i386\n+ # windows-2012r2-i386:", - "comment_created_at": "2023-04-19T08:14:27+00:00", - "comment_author": "neha-p6", - "comment_body": "can we remove the commented code? We will still know what was removed from this commit if anything is needed in future", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1170982889", - "pr_number": 13694, - "pr_file": ".expeditor/release.omnibus.yml", - "created_at": "2023-04-19T08:14:40+00:00", - "commented_code": "- sles-15-x86_64\n sles-15-aarch64:\n - sles-15-aarch64\n solaris2-5.11-i386:\n - solaris2-5.11-i386\n # solaris2-5.11-i386:\n # - solaris2-5.11-i386", - "repo_full_name": "chef/chef", - "discussion_comments": [ - { - "comment_id": "1170982889", - "repo_full_name": "chef/chef", - "pr_number": 13694, - "pr_file": ".expeditor/release.omnibus.yml", - "discussion_id": "1170982889", - "commented_code": "@@ -55,8 +55,8 @@ builder-to-testers-map:\n - sles-15-x86_64\n sles-15-aarch64:\n - sles-15-aarch64\n- solaris2-5.11-i386:\n- - solaris2-5.11-i386\n+ # solaris2-5.11-i386:\n+ # - solaris2-5.11-i386", - "comment_created_at": "2023-04-19T08:14:40+00:00", - "comment_author": "neha-p6", - "comment_body": "can we remove the commented code? We will still know what was removed from this commit if anything is needed in future", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1170983093", - "pr_number": 13694, - "pr_file": ".expeditor/release.omnibus.yml", - "created_at": "2023-04-19T08:14:51+00:00", - "commented_code": "- ubuntu-16.04-x86_64\n - ubuntu-18.04-x86_64\n - ubuntu-20.04-x86_64\n windows-2012r2-i386:\n - windows-2012r2-i386\n # windows-2012r2-i386:\n # - windows-2012r2-i386\n windows-2012r2-x86_64:", - "repo_full_name": "chef/chef", - "discussion_comments": [ - { - "comment_id": "1170983093", - "repo_full_name": "chef/chef", - "pr_number": 13694, - "pr_file": ".expeditor/release.omnibus.yml", - "discussion_id": "1170983093", - "commented_code": "@@ -66,8 +66,8 @@ builder-to-testers-map:\n - ubuntu-16.04-x86_64\n - ubuntu-18.04-x86_64\n - ubuntu-20.04-x86_64\n- windows-2012r2-i386:\n- - windows-2012r2-i386\n+ # windows-2012r2-i386:\n+ # - windows-2012r2-i386\n windows-2012r2-x86_64:", - "comment_created_at": "2023-04-19T08:14:51+00:00", - "comment_author": "neha-p6", - "comment_body": "can we remove the commented code? We will still know what was removed from this commit if anything is needed in future", - "pr_file_module": null - }, - { - "comment_id": "1171771397", - "repo_full_name": "chef/chef", - "pr_number": 13694, - "pr_file": ".expeditor/release.omnibus.yml", - "discussion_id": "1170983093", - "commented_code": "@@ -66,8 +66,8 @@ builder-to-testers-map:\n - ubuntu-16.04-x86_64\n - ubuntu-18.04-x86_64\n - ubuntu-20.04-x86_64\n- windows-2012r2-i386:\n- - windows-2012r2-i386\n+ # windows-2012r2-i386:\n+ # - windows-2012r2-i386\n windows-2012r2-x86_64:", - "comment_created_at": "2023-04-19T19:29:47+00:00", - "comment_author": "johnmccrae", - "comment_body": "Great point. Thanks. Doing that now. ", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/chef-secure-credential-management.json b/_reviewers/chef-secure-credential-management.json new file mode 100644 index 0000000..704e336 --- /dev/null +++ b/_reviewers/chef-secure-credential-management.json @@ -0,0 +1,256 @@ +[ + { + "discussion_id": "1792491323", + "pr_number": 14511, + "pr_file": "lib/chef/telemetry/base.rb", + "created_at": "2024-10-08T20:48:36+00:00", + "commented_code": "# frozen_string_literal: true\nrequire \"chef-licensing\"\nrequire \"securerandom\" unless defined?(SecureRandom)\nrequire \"digest\" unless defined?(Digest)\nrequire \"chef-utils/dist\" unless defined?(ChefUtils::Dist)\nrequire_relative \"../telemetry/run_context_probe\" unless defined?(Chef::Telemetry::RunContextProbe)\n\nclass Chef\n class Telemetry\n class Base\n VERSION = 2.0\n TYPE = \"job\"\n JOB_TYPE = \"Infra\"\n\n attr_accessor :scratch, :ohai\n\n def fetch_license_ids\n Chef::Log.debug \"Fetching license IDs for telemetry\"\n @license_keys ||= ChefLicensing.license_keys\n end\n\n def create_wrapper\n Chef::Log.debug \"Initializing wrapper for telemetry\"\n {\n version: VERSION,\n createdTimeUTC: Time.now.getutc.iso8601,\n environment: Chef::Telemetry::RunContextProbe.guess_run_context,\n licenseIds: fetch_license_ids,\n source: \"#{ChefUtils::Dist::Infra::EXEC}:#{Chef::VERSION}\",\n type: TYPE,\n }\n end\n\n def run_starting(_opts = {})\n Chef::Log.debug \"Initiating telemetry for Chef\"\n end\n\n def run_ending(opts)\n payload = create_wrapper\n\n # To not load ohai information once loaded\n unless ohai\n @ohai = Ohai::System.new\n # Load plugins to gather system data\n @ohai.all_plugins(%w{ os hostname platform dmi kernel})\n end\n\n payload[:platform] = ohai[:platform]\n\n payload[:jobs] = [{\n type: JOB_TYPE,\n # Target platform info\n environment: {\n host: ohai[:hostname],\n os: ohai[:os],\n version: ohai[:platform_version],\n architecture: ohai[:kernel][:machine],\n id: (ohai[:dmi][:system] && ohai[:dmi][:system][:uuid]) || \"\",\n },\n runtime: Chef::VERSION,\n content: [],\n steps: [],\n }]\n\n if opts[:run_context]\n opts[:run_context].cookbook_collection.each do |_, value|\n metadata = value.metadata\n payload[:jobs][0][:content] << {\n name: obscure(metadata&.name) || \"\",\n version: metadata&.version || \"\",\n maintainer: metadata&.maintainer || \"\",\n type: \"cookbook\",\n }\n end\n all_resources = opts[:run_context].resource_collection&.all_resources\n if all_resources\n all_resources.each do |resource|\n payload[:jobs][0][:steps] << {\n name: resource.recipe_name,\n resources: [],\n }\n\n payload[:jobs][0][:steps].last[:resources] << {\n type: \"chef-resource\",\n name: resource.resource_name.to_s,\n }\n end\n end\n end\n Chef::Log.debug \"Final data for telemetry upload -> #{payload}\"\n Chef::Log.debug \"Finishing telemetry for Chef\"\n # Return payload object for testing\n payload\n end\n\n # TBD Should we implement distribution name based on below usage?\n def determine_distribution_name\n run_context = Chef::Telemetry::RunContextProbe.guess_run_context\n case run_context\n when \"chef-zero\"\n ChefUtils::Dist::Zero::EXEC\n when \"chef-apply\"\n ChefUtils::Dist::Apply::EXEC\n when \"chef-solo\"\n ChefUtils::Dist::Solo::EXEC\n else\n ChefUtils::Dist::Infra::EXEC\n end\n end\n\n # Hash text if non-nil\n def obscure(cleartext)\n return nil if cleartext.nil?\n return nil if cleartext.empty?\n\n Digest::SHA2.new(256).hexdigest(cleartext)", + "repo_full_name": "chef/chef", + "discussion_comments": [ + { + "comment_id": "1792491323", + "repo_full_name": "chef/chef", + "pr_number": 14511, + "pr_file": "lib/chef/telemetry/base.rb", + "discussion_id": "1792491323", + "commented_code": "@@ -0,0 +1,120 @@\n+# frozen_string_literal: true\n+require \"chef-licensing\"\n+require \"securerandom\" unless defined?(SecureRandom)\n+require \"digest\" unless defined?(Digest)\n+require \"chef-utils/dist\" unless defined?(ChefUtils::Dist)\n+require_relative \"../telemetry/run_context_probe\" unless defined?(Chef::Telemetry::RunContextProbe)\n+\n+class Chef\n+ class Telemetry\n+ class Base\n+ VERSION = 2.0\n+ TYPE = \"job\"\n+ JOB_TYPE = \"Infra\"\n+\n+ attr_accessor :scratch, :ohai\n+\n+ def fetch_license_ids\n+ Chef::Log.debug \"Fetching license IDs for telemetry\"\n+ @license_keys ||= ChefLicensing.license_keys\n+ end\n+\n+ def create_wrapper\n+ Chef::Log.debug \"Initializing wrapper for telemetry\"\n+ {\n+ version: VERSION,\n+ createdTimeUTC: Time.now.getutc.iso8601,\n+ environment: Chef::Telemetry::RunContextProbe.guess_run_context,\n+ licenseIds: fetch_license_ids,\n+ source: \"#{ChefUtils::Dist::Infra::EXEC}:#{Chef::VERSION}\",\n+ type: TYPE,\n+ }\n+ end\n+\n+ def run_starting(_opts = {})\n+ Chef::Log.debug \"Initiating telemetry for Chef\"\n+ end\n+\n+ def run_ending(opts)\n+ payload = create_wrapper\n+\n+ # To not load ohai information once loaded\n+ unless ohai\n+ @ohai = Ohai::System.new\n+ # Load plugins to gather system data\n+ @ohai.all_plugins(%w{ os hostname platform dmi kernel})\n+ end\n+\n+ payload[:platform] = ohai[:platform]\n+\n+ payload[:jobs] = [{\n+ type: JOB_TYPE,\n+ # Target platform info\n+ environment: {\n+ host: ohai[:hostname],\n+ os: ohai[:os],\n+ version: ohai[:platform_version],\n+ architecture: ohai[:kernel][:machine],\n+ id: (ohai[:dmi][:system] && ohai[:dmi][:system][:uuid]) || \"\",\n+ },\n+ runtime: Chef::VERSION,\n+ content: [],\n+ steps: [],\n+ }]\n+\n+ if opts[:run_context]\n+ opts[:run_context].cookbook_collection.each do |_, value|\n+ metadata = value.metadata\n+ payload[:jobs][0][:content] << {\n+ name: obscure(metadata&.name) || \"\",\n+ version: metadata&.version || \"\",\n+ maintainer: metadata&.maintainer || \"\",\n+ type: \"cookbook\",\n+ }\n+ end\n+ all_resources = opts[:run_context].resource_collection&.all_resources\n+ if all_resources\n+ all_resources.each do |resource|\n+ payload[:jobs][0][:steps] << {\n+ name: resource.recipe_name,\n+ resources: [],\n+ }\n+\n+ payload[:jobs][0][:steps].last[:resources] << {\n+ type: \"chef-resource\",\n+ name: resource.resource_name.to_s,\n+ }\n+ end\n+ end\n+ end\n+ Chef::Log.debug \"Final data for telemetry upload -> #{payload}\"\n+ Chef::Log.debug \"Finishing telemetry for Chef\"\n+ # Return payload object for testing\n+ payload\n+ end\n+\n+ # TBD Should we implement distribution name based on below usage?\n+ def determine_distribution_name\n+ run_context = Chef::Telemetry::RunContextProbe.guess_run_context\n+ case run_context\n+ when \"chef-zero\"\n+ ChefUtils::Dist::Zero::EXEC\n+ when \"chef-apply\"\n+ ChefUtils::Dist::Apply::EXEC\n+ when \"chef-solo\"\n+ ChefUtils::Dist::Solo::EXEC\n+ else\n+ ChefUtils::Dist::Infra::EXEC\n+ end\n+ end\n+\n+ # Hash text if non-nil\n+ def obscure(cleartext)\n+ return nil if cleartext.nil?\n+ return nil if cleartext.empty?\n+\n+ Digest::SHA2.new(256).hexdigest(cleartext)", + "comment_created_at": "2024-10-08T20:48:36+00:00", + "comment_author": "dafyddcrosby", + "comment_body": "This is insufficient for obfuscation. Supermarket has ~4000 cookbooks, so building a rainbow table is trivial.", + "pr_file_module": null + }, + { + "comment_id": "1794919348", + "repo_full_name": "chef/chef", + "pr_number": 14511, + "pr_file": "lib/chef/telemetry/base.rb", + "discussion_id": "1792491323", + "commented_code": "@@ -0,0 +1,120 @@\n+# frozen_string_literal: true\n+require \"chef-licensing\"\n+require \"securerandom\" unless defined?(SecureRandom)\n+require \"digest\" unless defined?(Digest)\n+require \"chef-utils/dist\" unless defined?(ChefUtils::Dist)\n+require_relative \"../telemetry/run_context_probe\" unless defined?(Chef::Telemetry::RunContextProbe)\n+\n+class Chef\n+ class Telemetry\n+ class Base\n+ VERSION = 2.0\n+ TYPE = \"job\"\n+ JOB_TYPE = \"Infra\"\n+\n+ attr_accessor :scratch, :ohai\n+\n+ def fetch_license_ids\n+ Chef::Log.debug \"Fetching license IDs for telemetry\"\n+ @license_keys ||= ChefLicensing.license_keys\n+ end\n+\n+ def create_wrapper\n+ Chef::Log.debug \"Initializing wrapper for telemetry\"\n+ {\n+ version: VERSION,\n+ createdTimeUTC: Time.now.getutc.iso8601,\n+ environment: Chef::Telemetry::RunContextProbe.guess_run_context,\n+ licenseIds: fetch_license_ids,\n+ source: \"#{ChefUtils::Dist::Infra::EXEC}:#{Chef::VERSION}\",\n+ type: TYPE,\n+ }\n+ end\n+\n+ def run_starting(_opts = {})\n+ Chef::Log.debug \"Initiating telemetry for Chef\"\n+ end\n+\n+ def run_ending(opts)\n+ payload = create_wrapper\n+\n+ # To not load ohai information once loaded\n+ unless ohai\n+ @ohai = Ohai::System.new\n+ # Load plugins to gather system data\n+ @ohai.all_plugins(%w{ os hostname platform dmi kernel})\n+ end\n+\n+ payload[:platform] = ohai[:platform]\n+\n+ payload[:jobs] = [{\n+ type: JOB_TYPE,\n+ # Target platform info\n+ environment: {\n+ host: ohai[:hostname],\n+ os: ohai[:os],\n+ version: ohai[:platform_version],\n+ architecture: ohai[:kernel][:machine],\n+ id: (ohai[:dmi][:system] && ohai[:dmi][:system][:uuid]) || \"\",\n+ },\n+ runtime: Chef::VERSION,\n+ content: [],\n+ steps: [],\n+ }]\n+\n+ if opts[:run_context]\n+ opts[:run_context].cookbook_collection.each do |_, value|\n+ metadata = value.metadata\n+ payload[:jobs][0][:content] << {\n+ name: obscure(metadata&.name) || \"\",\n+ version: metadata&.version || \"\",\n+ maintainer: metadata&.maintainer || \"\",\n+ type: \"cookbook\",\n+ }\n+ end\n+ all_resources = opts[:run_context].resource_collection&.all_resources\n+ if all_resources\n+ all_resources.each do |resource|\n+ payload[:jobs][0][:steps] << {\n+ name: resource.recipe_name,\n+ resources: [],\n+ }\n+\n+ payload[:jobs][0][:steps].last[:resources] << {\n+ type: \"chef-resource\",\n+ name: resource.resource_name.to_s,\n+ }\n+ end\n+ end\n+ end\n+ Chef::Log.debug \"Final data for telemetry upload -> #{payload}\"\n+ Chef::Log.debug \"Finishing telemetry for Chef\"\n+ # Return payload object for testing\n+ payload\n+ end\n+\n+ # TBD Should we implement distribution name based on below usage?\n+ def determine_distribution_name\n+ run_context = Chef::Telemetry::RunContextProbe.guess_run_context\n+ case run_context\n+ when \"chef-zero\"\n+ ChefUtils::Dist::Zero::EXEC\n+ when \"chef-apply\"\n+ ChefUtils::Dist::Apply::EXEC\n+ when \"chef-solo\"\n+ ChefUtils::Dist::Solo::EXEC\n+ else\n+ ChefUtils::Dist::Infra::EXEC\n+ end\n+ end\n+\n+ # Hash text if non-nil\n+ def obscure(cleartext)\n+ return nil if cleartext.nil?\n+ return nil if cleartext.empty?\n+\n+ Digest::SHA2.new(256).hexdigest(cleartext)", + "comment_created_at": "2024-10-10T08:01:12+00:00", + "comment_author": "Nik08", + "comment_body": "Do you think implementing salting with this might be helpful?\r\n\r\n```\r\nsalt = SecureRandom.hex(16) # Generate a random 16-byte salt\r\nhash = Digest::SHA2.new(256).hexdigest(salt + cleartext)\r\n```", + "pr_file_module": null + }, + { + "comment_id": "1808562848", + "repo_full_name": "chef/chef", + "pr_number": 14511, + "pr_file": "lib/chef/telemetry/base.rb", + "discussion_id": "1792491323", + "commented_code": "@@ -0,0 +1,120 @@\n+# frozen_string_literal: true\n+require \"chef-licensing\"\n+require \"securerandom\" unless defined?(SecureRandom)\n+require \"digest\" unless defined?(Digest)\n+require \"chef-utils/dist\" unless defined?(ChefUtils::Dist)\n+require_relative \"../telemetry/run_context_probe\" unless defined?(Chef::Telemetry::RunContextProbe)\n+\n+class Chef\n+ class Telemetry\n+ class Base\n+ VERSION = 2.0\n+ TYPE = \"job\"\n+ JOB_TYPE = \"Infra\"\n+\n+ attr_accessor :scratch, :ohai\n+\n+ def fetch_license_ids\n+ Chef::Log.debug \"Fetching license IDs for telemetry\"\n+ @license_keys ||= ChefLicensing.license_keys\n+ end\n+\n+ def create_wrapper\n+ Chef::Log.debug \"Initializing wrapper for telemetry\"\n+ {\n+ version: VERSION,\n+ createdTimeUTC: Time.now.getutc.iso8601,\n+ environment: Chef::Telemetry::RunContextProbe.guess_run_context,\n+ licenseIds: fetch_license_ids,\n+ source: \"#{ChefUtils::Dist::Infra::EXEC}:#{Chef::VERSION}\",\n+ type: TYPE,\n+ }\n+ end\n+\n+ def run_starting(_opts = {})\n+ Chef::Log.debug \"Initiating telemetry for Chef\"\n+ end\n+\n+ def run_ending(opts)\n+ payload = create_wrapper\n+\n+ # To not load ohai information once loaded\n+ unless ohai\n+ @ohai = Ohai::System.new\n+ # Load plugins to gather system data\n+ @ohai.all_plugins(%w{ os hostname platform dmi kernel})\n+ end\n+\n+ payload[:platform] = ohai[:platform]\n+\n+ payload[:jobs] = [{\n+ type: JOB_TYPE,\n+ # Target platform info\n+ environment: {\n+ host: ohai[:hostname],\n+ os: ohai[:os],\n+ version: ohai[:platform_version],\n+ architecture: ohai[:kernel][:machine],\n+ id: (ohai[:dmi][:system] && ohai[:dmi][:system][:uuid]) || \"\",\n+ },\n+ runtime: Chef::VERSION,\n+ content: [],\n+ steps: [],\n+ }]\n+\n+ if opts[:run_context]\n+ opts[:run_context].cookbook_collection.each do |_, value|\n+ metadata = value.metadata\n+ payload[:jobs][0][:content] << {\n+ name: obscure(metadata&.name) || \"\",\n+ version: metadata&.version || \"\",\n+ maintainer: metadata&.maintainer || \"\",\n+ type: \"cookbook\",\n+ }\n+ end\n+ all_resources = opts[:run_context].resource_collection&.all_resources\n+ if all_resources\n+ all_resources.each do |resource|\n+ payload[:jobs][0][:steps] << {\n+ name: resource.recipe_name,\n+ resources: [],\n+ }\n+\n+ payload[:jobs][0][:steps].last[:resources] << {\n+ type: \"chef-resource\",\n+ name: resource.resource_name.to_s,\n+ }\n+ end\n+ end\n+ end\n+ Chef::Log.debug \"Final data for telemetry upload -> #{payload}\"\n+ Chef::Log.debug \"Finishing telemetry for Chef\"\n+ # Return payload object for testing\n+ payload\n+ end\n+\n+ # TBD Should we implement distribution name based on below usage?\n+ def determine_distribution_name\n+ run_context = Chef::Telemetry::RunContextProbe.guess_run_context\n+ case run_context\n+ when \"chef-zero\"\n+ ChefUtils::Dist::Zero::EXEC\n+ when \"chef-apply\"\n+ ChefUtils::Dist::Apply::EXEC\n+ when \"chef-solo\"\n+ ChefUtils::Dist::Solo::EXEC\n+ else\n+ ChefUtils::Dist::Infra::EXEC\n+ end\n+ end\n+\n+ # Hash text if non-nil\n+ def obscure(cleartext)\n+ return nil if cleartext.nil?\n+ return nil if cleartext.empty?\n+\n+ Digest::SHA2.new(256).hexdigest(cleartext)", + "comment_created_at": "2024-10-21T11:06:28+00:00", + "comment_author": "GeorgeWestwater", + "comment_body": "@dafyddcrosby - the entire intent on capturing the SHA of the without salt was to actually have that rainbow table.\r\n\r\nWe are looking to understand, what cookbooks are being used from public supermarket. Ideally with this information we can track which cookbook (and version). That are used. That will help us understand what are the most popular cookbooks (download data is not enough for that), which cookbooks being used but are no longer maintained, and as the usage data is tried to a license we could explore the possibility of notify people of cookbook updates/vulnerabilities (inspired from depend-a-bot). \r\n\r\nThe thought process here was cookbook names could contain sensitive information, so transmitting it without a hash was not acceptable, however adding in the salt would make this data meaningless, unless the salt was part of your license (which would also downgrade the usefulness of the salt). \r\n\r\nWe are not interested in collecting information on private cookbooks or user-specific content. I am open to any suggestions on alternative ways to collect this information?", + "pr_file_module": null + }, + { + "comment_id": "1817165143", + "repo_full_name": "chef/chef", + "pr_number": 14511, + "pr_file": "lib/chef/telemetry/base.rb", + "discussion_id": "1792491323", + "commented_code": "@@ -0,0 +1,120 @@\n+# frozen_string_literal: true\n+require \"chef-licensing\"\n+require \"securerandom\" unless defined?(SecureRandom)\n+require \"digest\" unless defined?(Digest)\n+require \"chef-utils/dist\" unless defined?(ChefUtils::Dist)\n+require_relative \"../telemetry/run_context_probe\" unless defined?(Chef::Telemetry::RunContextProbe)\n+\n+class Chef\n+ class Telemetry\n+ class Base\n+ VERSION = 2.0\n+ TYPE = \"job\"\n+ JOB_TYPE = \"Infra\"\n+\n+ attr_accessor :scratch, :ohai\n+\n+ def fetch_license_ids\n+ Chef::Log.debug \"Fetching license IDs for telemetry\"\n+ @license_keys ||= ChefLicensing.license_keys\n+ end\n+\n+ def create_wrapper\n+ Chef::Log.debug \"Initializing wrapper for telemetry\"\n+ {\n+ version: VERSION,\n+ createdTimeUTC: Time.now.getutc.iso8601,\n+ environment: Chef::Telemetry::RunContextProbe.guess_run_context,\n+ licenseIds: fetch_license_ids,\n+ source: \"#{ChefUtils::Dist::Infra::EXEC}:#{Chef::VERSION}\",\n+ type: TYPE,\n+ }\n+ end\n+\n+ def run_starting(_opts = {})\n+ Chef::Log.debug \"Initiating telemetry for Chef\"\n+ end\n+\n+ def run_ending(opts)\n+ payload = create_wrapper\n+\n+ # To not load ohai information once loaded\n+ unless ohai\n+ @ohai = Ohai::System.new\n+ # Load plugins to gather system data\n+ @ohai.all_plugins(%w{ os hostname platform dmi kernel})\n+ end\n+\n+ payload[:platform] = ohai[:platform]\n+\n+ payload[:jobs] = [{\n+ type: JOB_TYPE,\n+ # Target platform info\n+ environment: {\n+ host: ohai[:hostname],\n+ os: ohai[:os],\n+ version: ohai[:platform_version],\n+ architecture: ohai[:kernel][:machine],\n+ id: (ohai[:dmi][:system] && ohai[:dmi][:system][:uuid]) || \"\",\n+ },\n+ runtime: Chef::VERSION,\n+ content: [],\n+ steps: [],\n+ }]\n+\n+ if opts[:run_context]\n+ opts[:run_context].cookbook_collection.each do |_, value|\n+ metadata = value.metadata\n+ payload[:jobs][0][:content] << {\n+ name: obscure(metadata&.name) || \"\",\n+ version: metadata&.version || \"\",\n+ maintainer: metadata&.maintainer || \"\",\n+ type: \"cookbook\",\n+ }\n+ end\n+ all_resources = opts[:run_context].resource_collection&.all_resources\n+ if all_resources\n+ all_resources.each do |resource|\n+ payload[:jobs][0][:steps] << {\n+ name: resource.recipe_name,\n+ resources: [],\n+ }\n+\n+ payload[:jobs][0][:steps].last[:resources] << {\n+ type: \"chef-resource\",\n+ name: resource.resource_name.to_s,\n+ }\n+ end\n+ end\n+ end\n+ Chef::Log.debug \"Final data for telemetry upload -> #{payload}\"\n+ Chef::Log.debug \"Finishing telemetry for Chef\"\n+ # Return payload object for testing\n+ payload\n+ end\n+\n+ # TBD Should we implement distribution name based on below usage?\n+ def determine_distribution_name\n+ run_context = Chef::Telemetry::RunContextProbe.guess_run_context\n+ case run_context\n+ when \"chef-zero\"\n+ ChefUtils::Dist::Zero::EXEC\n+ when \"chef-apply\"\n+ ChefUtils::Dist::Apply::EXEC\n+ when \"chef-solo\"\n+ ChefUtils::Dist::Solo::EXEC\n+ else\n+ ChefUtils::Dist::Infra::EXEC\n+ end\n+ end\n+\n+ # Hash text if non-nil\n+ def obscure(cleartext)\n+ return nil if cleartext.nil?\n+ return nil if cleartext.empty?\n+\n+ Digest::SHA2.new(256).hexdigest(cleartext)", + "comment_created_at": "2024-10-25T18:12:18+00:00", + "comment_author": "dafyddcrosby", + "comment_body": "> however adding in the salt would make this data meaningless, unless the salt was part of your license (which would also downgrade the usefulness of the salt).\r\n\r\nWith no salt, one could just grab a rainbow table off https://crackstation.net/ and call it a day. Whereas if you used eg the serial off the trial key as a salt, while you could still quickly generate one-off rainbow tables of cookbooks out of the known cookbook names in supermarket, it increases the cost of finding *unknown* cookbook names.\r\n\r\nConceptually, the client sends a set of hashes along with the serial. Server-side, you have something like this pseudocode:\r\n```\r\ndef find_supermarket_cookbooks(serial, set)\r\n table = SUPERMARKET_COOKBOOK_NAMES.to_h {|cb| [Digest::SHA2.new(256).hexdigest(serial + cb), cb] }\r\n set.map! {|cb_hash| table[cb_hash]}\r\n set.compact!\r\n set\r\nend\r\n```\r\n\r\nIt's not a perfect solution, but gets you the information you want re: supermarket cookbooks, and is still largely cacheable so that you don't need to recompute hashes on the backend after the first upload from a license. ", + "pr_file_module": null + }, + { + "comment_id": "1820583786", + "repo_full_name": "chef/chef", + "pr_number": 14511, + "pr_file": "lib/chef/telemetry/base.rb", + "discussion_id": "1792491323", + "commented_code": "@@ -0,0 +1,120 @@\n+# frozen_string_literal: true\n+require \"chef-licensing\"\n+require \"securerandom\" unless defined?(SecureRandom)\n+require \"digest\" unless defined?(Digest)\n+require \"chef-utils/dist\" unless defined?(ChefUtils::Dist)\n+require_relative \"../telemetry/run_context_probe\" unless defined?(Chef::Telemetry::RunContextProbe)\n+\n+class Chef\n+ class Telemetry\n+ class Base\n+ VERSION = 2.0\n+ TYPE = \"job\"\n+ JOB_TYPE = \"Infra\"\n+\n+ attr_accessor :scratch, :ohai\n+\n+ def fetch_license_ids\n+ Chef::Log.debug \"Fetching license IDs for telemetry\"\n+ @license_keys ||= ChefLicensing.license_keys\n+ end\n+\n+ def create_wrapper\n+ Chef::Log.debug \"Initializing wrapper for telemetry\"\n+ {\n+ version: VERSION,\n+ createdTimeUTC: Time.now.getutc.iso8601,\n+ environment: Chef::Telemetry::RunContextProbe.guess_run_context,\n+ licenseIds: fetch_license_ids,\n+ source: \"#{ChefUtils::Dist::Infra::EXEC}:#{Chef::VERSION}\",\n+ type: TYPE,\n+ }\n+ end\n+\n+ def run_starting(_opts = {})\n+ Chef::Log.debug \"Initiating telemetry for Chef\"\n+ end\n+\n+ def run_ending(opts)\n+ payload = create_wrapper\n+\n+ # To not load ohai information once loaded\n+ unless ohai\n+ @ohai = Ohai::System.new\n+ # Load plugins to gather system data\n+ @ohai.all_plugins(%w{ os hostname platform dmi kernel})\n+ end\n+\n+ payload[:platform] = ohai[:platform]\n+\n+ payload[:jobs] = [{\n+ type: JOB_TYPE,\n+ # Target platform info\n+ environment: {\n+ host: ohai[:hostname],\n+ os: ohai[:os],\n+ version: ohai[:platform_version],\n+ architecture: ohai[:kernel][:machine],\n+ id: (ohai[:dmi][:system] && ohai[:dmi][:system][:uuid]) || \"\",\n+ },\n+ runtime: Chef::VERSION,\n+ content: [],\n+ steps: [],\n+ }]\n+\n+ if opts[:run_context]\n+ opts[:run_context].cookbook_collection.each do |_, value|\n+ metadata = value.metadata\n+ payload[:jobs][0][:content] << {\n+ name: obscure(metadata&.name) || \"\",\n+ version: metadata&.version || \"\",\n+ maintainer: metadata&.maintainer || \"\",\n+ type: \"cookbook\",\n+ }\n+ end\n+ all_resources = opts[:run_context].resource_collection&.all_resources\n+ if all_resources\n+ all_resources.each do |resource|\n+ payload[:jobs][0][:steps] << {\n+ name: resource.recipe_name,\n+ resources: [],\n+ }\n+\n+ payload[:jobs][0][:steps].last[:resources] << {\n+ type: \"chef-resource\",\n+ name: resource.resource_name.to_s,\n+ }\n+ end\n+ end\n+ end\n+ Chef::Log.debug \"Final data for telemetry upload -> #{payload}\"\n+ Chef::Log.debug \"Finishing telemetry for Chef\"\n+ # Return payload object for testing\n+ payload\n+ end\n+\n+ # TBD Should we implement distribution name based on below usage?\n+ def determine_distribution_name\n+ run_context = Chef::Telemetry::RunContextProbe.guess_run_context\n+ case run_context\n+ when \"chef-zero\"\n+ ChefUtils::Dist::Zero::EXEC\n+ when \"chef-apply\"\n+ ChefUtils::Dist::Apply::EXEC\n+ when \"chef-solo\"\n+ ChefUtils::Dist::Solo::EXEC\n+ else\n+ ChefUtils::Dist::Infra::EXEC\n+ end\n+ end\n+\n+ # Hash text if non-nil\n+ def obscure(cleartext)\n+ return nil if cleartext.nil?\n+ return nil if cleartext.empty?\n+\n+ Digest::SHA2.new(256).hexdigest(cleartext)", + "comment_created_at": "2024-10-29T11:02:11+00:00", + "comment_author": "GeorgeWestwater", + "comment_body": "@dafyddcrosby - one question and one comment\r\n\r\nQuestion: what is the concern about someone being able to reverse some of the cookbook names?\r\n\r\nComment: If I am understanding your suggestion correctly (salt by license) that would have to result in us having one rainbow table per license issued. Am I understanding this correctly?", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1099196196", + "pr_number": 13552, + "pr_file": "lib/chef/http/authenticator.rb", + "created_at": "2023-02-07T20:36:50+00:00", + "commented_code": "@win32registry = Chef::Win32::Registry.new\n path = \"HKEY_LOCAL_MACHINE\\\\Software\\\\Progress\\\\Authentication\"\n # does the registry key even exist?\n present = @win32registry.get_values(path)\n if present.nil? || present.empty?\n # password_blob should be an array of hashes\n password_blob = @win32registry.get_values(path)\n if password_blob.nil? || password_blob.empty?\n raise Chef::Exceptions::Win32RegKeyMissing\n end\n\n present.each do |secret|\n if secret[:name] == \"PfxPass\"\n password = decrypt_pfx_pass(secret[:data])\n return password\n # Did someone have just the password stored in the registry?\n raw_data = password_blob.map { |x| x[:data] }\n vector = raw_data[2]\n if !!vector\n decrypted_password = decrypt_pfx_pass_with_vector(password_blob)\n else\n decrypted_password = decrypt_pfx_pass_with_password(password_blob)\n if !!decrypted_password\n migrate_pass_to_use_vector\n else\n Chef::Log.error(\"Failed to retrieve certificate password\")\n end\n end\n\n raise Chef::Exceptions::Win32RegKeyMissing\n\n decrypted_password\n rescue Chef::Exceptions::Win32RegKeyMissing\n # if we don't have a password, log that and generate one\n Chef::Log.warn \"Authentication Hive and values not present in registry, creating them now\"\n new_path = \"HKEY_LOCAL_MACHINE\\\\Software\\\\Progress\\\\Authentication\"\n unless @win32registry.key_exists?(new_path)\n @win32registry.create_key(new_path, true)\n end\n require \"securerandom\" unless defined?(SecureRandom)\n size = 14\n password = SecureRandom.alphanumeric(size)\n encrypted_pass = encrypt_pfx_pass(password)\n values = { name: \"PfxPass\", type: :string, data: encrypted_pass }\n @win32registry.set_value(new_path, values)\n password = create_and_store_new_password\n password\n end\n\n def self.encrypt_pfx_pass(password)\n powershell_code = <<~CODE\n $encrypted_string = ConvertTo-SecureString \"#{password}\" -AsPlainText -Force\n $secure_string = ConvertFrom-SecureString $encrypted_string\n return $secure_string\n $AES = [System.Security.Cryptography.Aes]::Create()\n $key_temp = [System.Convert]::ToBase64String($AES.Key)\n $iv_temp = [System.Convert]::ToBase64String($AES.IV)\n $encryptor = $AES.CreateEncryptor()\n [System.Byte[]]$Bytes = [System.Text.Encoding]::Unicode.GetBytes(\"#{password}\")\n $EncryptedBytes = $encryptor.TransformFinalBlock($Bytes,0,$Bytes.Length)\n $EncryptedBase64String = [System.Convert]::ToBase64String($EncryptedBytes)\n # create array of encrypted pass, key, iv\n $password_blob = @($EncryptedBase64String, $key_temp, $iv_temp)\n return $password_blob\n CODE\n powershell_exec!(powershell_code).result\n end\n\n def self.decrypt_pfx_pass(password)\n def self.decrypt_pfx_pass_with_vector(password_blob)\n raw_data = password_blob.map { |x| x[:data] }\n password = raw_data[0]\n key = raw_data[1]\n vector = raw_data[2]\n\n powershell_code = <<~CODE\n $KeyBytes = [System.Convert]::FromBase64String(\"#{key}\")\n $IVBytes = [System.Convert]::FromBase64String(\"#{vector}\")\n $aes = [System.Security.Cryptography.Aes]::Create()\n $aes.Key = $KeyBytes\n $aes.IV = $IVBytes\n $EncryptedBytes = [System.Convert]::FromBase64String(\"#{password}\")\n $Decryptor = $aes.CreateDecryptor()\n $DecryptedBytes = $Decryptor.TransformFinalBlock($EncryptedBytes,0,$EncryptedBytes.Length)\n $DecryptedString = [System.Text.Encoding]::Unicode.GetString($DecryptedBytes)\n return $DecryptedString\n CODE\n results = powershell_exec!(powershell_code).result\n end\n\n def self.decrypt_pfx_pass_with_password(password_blob)\n password = \"\"\n password_blob.each do |secret|\n password = secret[:data]\n end", + "repo_full_name": "chef/chef", + "discussion_comments": [ + { + "comment_id": "1099196196", + "repo_full_name": "chef/chef", + "pr_number": 13552, + "pr_file": "lib/chef/http/authenticator.rb", + "discussion_id": "1099196196", + "commented_code": "@@ -167,46 +168,79 @@ def self.get_cert_password\n @win32registry = Chef::Win32::Registry.new\n path = \"HKEY_LOCAL_MACHINE\\\\Software\\\\Progress\\\\Authentication\"\n # does the registry key even exist?\n- present = @win32registry.get_values(path)\n- if present.nil? || present.empty?\n+ # password_blob should be an array of hashes\n+ password_blob = @win32registry.get_values(path)\n+ if password_blob.nil? || password_blob.empty?\n raise Chef::Exceptions::Win32RegKeyMissing\n end\n \n- present.each do |secret|\n- if secret[:name] == \"PfxPass\"\n- password = decrypt_pfx_pass(secret[:data])\n- return password\n+ # Did someone have just the password stored in the registry?\n+ raw_data = password_blob.map { |x| x[:data] }\n+ vector = raw_data[2]\n+ if !!vector\n+ decrypted_password = decrypt_pfx_pass_with_vector(password_blob)\n+ else\n+ decrypted_password = decrypt_pfx_pass_with_password(password_blob)\n+ if !!decrypted_password\n+ migrate_pass_to_use_vector\n+ else\n+ Chef::Log.error(\"Failed to retrieve certificate password\")\n end\n end\n-\n- raise Chef::Exceptions::Win32RegKeyMissing\n-\n+ decrypted_password\n rescue Chef::Exceptions::Win32RegKeyMissing\n # if we don't have a password, log that and generate one\n Chef::Log.warn \"Authentication Hive and values not present in registry, creating them now\"\n new_path = \"HKEY_LOCAL_MACHINE\\\\Software\\\\Progress\\\\Authentication\"\n unless @win32registry.key_exists?(new_path)\n @win32registry.create_key(new_path, true)\n end\n- require \"securerandom\" unless defined?(SecureRandom)\n- size = 14\n- password = SecureRandom.alphanumeric(size)\n- encrypted_pass = encrypt_pfx_pass(password)\n- values = { name: \"PfxPass\", type: :string, data: encrypted_pass }\n- @win32registry.set_value(new_path, values)\n+ password = create_and_store_new_password\n password\n end\n \n def self.encrypt_pfx_pass(password)\n powershell_code = <<~CODE\n- $encrypted_string = ConvertTo-SecureString \"#{password}\" -AsPlainText -Force\n- $secure_string = ConvertFrom-SecureString $encrypted_string\n- return $secure_string\n+ $AES = [System.Security.Cryptography.Aes]::Create()\n+ $key_temp = [System.Convert]::ToBase64String($AES.Key)\n+ $iv_temp = [System.Convert]::ToBase64String($AES.IV)\n+ $encryptor = $AES.CreateEncryptor()\n+ [System.Byte[]]$Bytes = [System.Text.Encoding]::Unicode.GetBytes(\"#{password}\")\n+ $EncryptedBytes = $encryptor.TransformFinalBlock($Bytes,0,$Bytes.Length)\n+ $EncryptedBase64String = [System.Convert]::ToBase64String($EncryptedBytes)\n+ # create array of encrypted pass, key, iv\n+ $password_blob = @($EncryptedBase64String, $key_temp, $iv_temp)\n+ return $password_blob\n CODE\n powershell_exec!(powershell_code).result\n end\n \n- def self.decrypt_pfx_pass(password)\n+ def self.decrypt_pfx_pass_with_vector(password_blob)\n+ raw_data = password_blob.map { |x| x[:data] }\n+ password = raw_data[0]\n+ key = raw_data[1]\n+ vector = raw_data[2]\n+\n+ powershell_code = <<~CODE\n+ $KeyBytes = [System.Convert]::FromBase64String(\"#{key}\")\n+ $IVBytes = [System.Convert]::FromBase64String(\"#{vector}\")\n+ $aes = [System.Security.Cryptography.Aes]::Create()\n+ $aes.Key = $KeyBytes\n+ $aes.IV = $IVBytes\n+ $EncryptedBytes = [System.Convert]::FromBase64String(\"#{password}\")\n+ $Decryptor = $aes.CreateDecryptor()\n+ $DecryptedBytes = $Decryptor.TransformFinalBlock($EncryptedBytes,0,$EncryptedBytes.Length)\n+ $DecryptedString = [System.Text.Encoding]::Unicode.GetString($DecryptedBytes)\n+ return $DecryptedString\n+ CODE\n+ results = powershell_exec!(powershell_code).result\n+ end\n+\n+ def self.decrypt_pfx_pass_with_password(password_blob)\n+ password = \"\"\n+ password_blob.each do |secret|\n+ password = secret[:data]\n+ end", + "comment_created_at": "2023-02-07T20:36:50+00:00", + "comment_author": "jaymzh", + "comment_body": "This used to be (the equivalent of):\r\n```ruby\r\npassword_blob.each do |secret|\r\n if secret[:name] == \"PfxPass\"\r\n password = secret[:data]\r\n break\r\n end\r\nend\r\n```\r\n\r\nYour code here will, I think, overwrite `password` with non-PfxPass", + "pr_file_module": null + }, + { + "comment_id": "1099616027", + "repo_full_name": "chef/chef", + "pr_number": 13552, + "pr_file": "lib/chef/http/authenticator.rb", + "discussion_id": "1099196196", + "commented_code": "@@ -167,46 +168,79 @@ def self.get_cert_password\n @win32registry = Chef::Win32::Registry.new\n path = \"HKEY_LOCAL_MACHINE\\\\Software\\\\Progress\\\\Authentication\"\n # does the registry key even exist?\n- present = @win32registry.get_values(path)\n- if present.nil? || present.empty?\n+ # password_blob should be an array of hashes\n+ password_blob = @win32registry.get_values(path)\n+ if password_blob.nil? || password_blob.empty?\n raise Chef::Exceptions::Win32RegKeyMissing\n end\n \n- present.each do |secret|\n- if secret[:name] == \"PfxPass\"\n- password = decrypt_pfx_pass(secret[:data])\n- return password\n+ # Did someone have just the password stored in the registry?\n+ raw_data = password_blob.map { |x| x[:data] }\n+ vector = raw_data[2]\n+ if !!vector\n+ decrypted_password = decrypt_pfx_pass_with_vector(password_blob)\n+ else\n+ decrypted_password = decrypt_pfx_pass_with_password(password_blob)\n+ if !!decrypted_password\n+ migrate_pass_to_use_vector\n+ else\n+ Chef::Log.error(\"Failed to retrieve certificate password\")\n end\n end\n-\n- raise Chef::Exceptions::Win32RegKeyMissing\n-\n+ decrypted_password\n rescue Chef::Exceptions::Win32RegKeyMissing\n # if we don't have a password, log that and generate one\n Chef::Log.warn \"Authentication Hive and values not present in registry, creating them now\"\n new_path = \"HKEY_LOCAL_MACHINE\\\\Software\\\\Progress\\\\Authentication\"\n unless @win32registry.key_exists?(new_path)\n @win32registry.create_key(new_path, true)\n end\n- require \"securerandom\" unless defined?(SecureRandom)\n- size = 14\n- password = SecureRandom.alphanumeric(size)\n- encrypted_pass = encrypt_pfx_pass(password)\n- values = { name: \"PfxPass\", type: :string, data: encrypted_pass }\n- @win32registry.set_value(new_path, values)\n+ password = create_and_store_new_password\n password\n end\n \n def self.encrypt_pfx_pass(password)\n powershell_code = <<~CODE\n- $encrypted_string = ConvertTo-SecureString \"#{password}\" -AsPlainText -Force\n- $secure_string = ConvertFrom-SecureString $encrypted_string\n- return $secure_string\n+ $AES = [System.Security.Cryptography.Aes]::Create()\n+ $key_temp = [System.Convert]::ToBase64String($AES.Key)\n+ $iv_temp = [System.Convert]::ToBase64String($AES.IV)\n+ $encryptor = $AES.CreateEncryptor()\n+ [System.Byte[]]$Bytes = [System.Text.Encoding]::Unicode.GetBytes(\"#{password}\")\n+ $EncryptedBytes = $encryptor.TransformFinalBlock($Bytes,0,$Bytes.Length)\n+ $EncryptedBase64String = [System.Convert]::ToBase64String($EncryptedBytes)\n+ # create array of encrypted pass, key, iv\n+ $password_blob = @($EncryptedBase64String, $key_temp, $iv_temp)\n+ return $password_blob\n CODE\n powershell_exec!(powershell_code).result\n end\n \n- def self.decrypt_pfx_pass(password)\n+ def self.decrypt_pfx_pass_with_vector(password_blob)\n+ raw_data = password_blob.map { |x| x[:data] }\n+ password = raw_data[0]\n+ key = raw_data[1]\n+ vector = raw_data[2]\n+\n+ powershell_code = <<~CODE\n+ $KeyBytes = [System.Convert]::FromBase64String(\"#{key}\")\n+ $IVBytes = [System.Convert]::FromBase64String(\"#{vector}\")\n+ $aes = [System.Security.Cryptography.Aes]::Create()\n+ $aes.Key = $KeyBytes\n+ $aes.IV = $IVBytes\n+ $EncryptedBytes = [System.Convert]::FromBase64String(\"#{password}\")\n+ $Decryptor = $aes.CreateDecryptor()\n+ $DecryptedBytes = $Decryptor.TransformFinalBlock($EncryptedBytes,0,$EncryptedBytes.Length)\n+ $DecryptedString = [System.Text.Encoding]::Unicode.GetString($DecryptedBytes)\n+ return $DecryptedString\n+ CODE\n+ results = powershell_exec!(powershell_code).result\n+ end\n+\n+ def self.decrypt_pfx_pass_with_password(password_blob)\n+ password = \"\"\n+ password_blob.each do |secret|\n+ password = secret[:data]\n+ end", + "comment_created_at": "2023-02-08T03:31:37+00:00", + "comment_author": "johnmccrae", + "comment_body": "For extra protection, I am using the old technique to ensure I am actually grabbing only the PfxPass detail", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1099199876", + "pr_number": 13552, + "pr_file": "lib/chef/http/authenticator.rb", + "created_at": "2023-02-07T20:38:46+00:00", + "commented_code": "powershell_exec!(powershell_code).result\n end\n\n def self.retrieve_certificate_key(client_name)\n require \"openssl\" unless defined?(OpenSSL)\n def self.migrate_pass_to_use_vector\n powershell_code = <<~CODE\n Remove-ItemProperty -Path \"HKLM:\\\\Software\\\\Progress\\\\Authentication\" -Name \"PfXPass\"\n CODE\n powershell_exec!(powershell_code)\n create_and_store_new_password\n end\n\n def self.create_and_store_new_password\n @win32registry = Chef::Win32::Registry.new\n path = \"HKEY_LOCAL_MACHINE\\\\Software\\\\Progress\\\\Authentication\"\n require \"securerandom\" unless defined?(SecureRandom)\n size = 14\n password = SecureRandom.alphanumeric(size)\n encrypted_blob = encrypt_pfx_pass(password)", + "repo_full_name": "chef/chef", + "discussion_comments": [ + { + "comment_id": "1099199876", + "repo_full_name": "chef/chef", + "pr_number": 13552, + "pr_file": "lib/chef/http/authenticator.rb", + "discussion_id": "1099199876", + "commented_code": "@@ -215,10 +249,38 @@ def self.decrypt_pfx_pass(password)\n powershell_exec!(powershell_code).result\n end\n \n- def self.retrieve_certificate_key(client_name)\n- require \"openssl\" unless defined?(OpenSSL)\n+ def self.migrate_pass_to_use_vector\n+ powershell_code = <<~CODE\n+ Remove-ItemProperty -Path \"HKLM:\\\\Software\\\\Progress\\\\Authentication\" -Name \"PfXPass\"\n+ CODE\n+ powershell_exec!(powershell_code)\n+ create_and_store_new_password\n+ end\n \n+ def self.create_and_store_new_password\n+ @win32registry = Chef::Win32::Registry.new\n+ path = \"HKEY_LOCAL_MACHINE\\\\Software\\\\Progress\\\\Authentication\"\n+ require \"securerandom\" unless defined?(SecureRandom)\n+ size = 14\n+ password = SecureRandom.alphanumeric(size)\n+ encrypted_blob = encrypt_pfx_pass(password)", + "comment_created_at": "2023-02-07T20:38:46+00:00", + "comment_author": "jaymzh", + "comment_body": "Shouldn't we want to use the same password here from when it was an encrypted string? Isn't that what the key is encrypted with? If we make a new one, we just break the users' ability to use their key? Perhaps I'm mis-reading something, but it feels like we should just properly encrypted the existing password.", + "pr_file_module": null + }, + { + "comment_id": "1099679312", + "repo_full_name": "chef/chef", + "pr_number": 13552, + "pr_file": "lib/chef/http/authenticator.rb", + "discussion_id": "1099199876", + "commented_code": "@@ -215,10 +249,38 @@ def self.decrypt_pfx_pass(password)\n powershell_exec!(powershell_code).result\n end\n \n- def self.retrieve_certificate_key(client_name)\n- require \"openssl\" unless defined?(OpenSSL)\n+ def self.migrate_pass_to_use_vector\n+ powershell_code = <<~CODE\n+ Remove-ItemProperty -Path \"HKLM:\\\\Software\\\\Progress\\\\Authentication\" -Name \"PfXPass\"\n+ CODE\n+ powershell_exec!(powershell_code)\n+ create_and_store_new_password\n+ end\n \n+ def self.create_and_store_new_password\n+ @win32registry = Chef::Win32::Registry.new\n+ path = \"HKEY_LOCAL_MACHINE\\\\Software\\\\Progress\\\\Authentication\"\n+ require \"securerandom\" unless defined?(SecureRandom)\n+ size = 14\n+ password = SecureRandom.alphanumeric(size)\n+ encrypted_blob = encrypt_pfx_pass(password)", + "comment_created_at": "2023-02-08T05:41:15+00:00", + "comment_author": "johnmccrae", + "comment_body": "The problem I am having here is that the old password was created using ConvertTo-SecureString which is user dependent (since no AES Key is used, it uses DPAPI which is user specific). I'll ensure that there are no paths that can lead to a busted password.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "823233225", + "pr_number": 12640, + "pr_file": "lib/chef/client.rb", + "created_at": "2022-03-10T00:44:59+00:00", + "commented_code": "raise\n end\n\n #\n # In the brave new world of No Certs On Disk, we want to put the pem file into Keychain or the Certstore\n # But is it already there?\n def check_certstore_for_key(cert_name)\n require \"win32-certstore\"\n win32certstore = ::Win32::Certstore.open(\"MY\")\n win32certstore.search(\"#{cert_name}\")\n end\n\n def generate_pfx_package(cert_name, date = nil)\n require_relative \"mixin/powershell_exec\"\n extend Chef::Mixin::PowershellExec\n ::Chef::HTTP::Authenticator.get_cert_password\n powershell_code = <<~EOH\n\n $date = \"#{date}\"\n\n $certSplat = @{\n Subject = \"#{cert_name}\"\n KeyExportPolicy = 'Exportable'\n KeyUsage = @('KeyEncipherment','DigitalSignature')\n CertStoreLocation = 'Cert:\\\\LocalMachine\\\\My'\n TextExtension = @(\"2.5.29.37={text}1.3.6.1.5.5.7.3.2,1.3.6.1.5.5.7.3.1\")\n };\n if ([string]$date -as [DateTime]){\n $certSplat.add('NotAfter', $date)\n }\n\n New-SelfSignedCertificate @certSplat;\n EOH\n powershell_exec!(powershell_code)\n end\n\n def move_key_and_register(cert_name)\n require \"time\" unless defined?(Time)\n autoload :URI, \"uri\"\n\n KeyMigration.instance.key_migrated = true\n\n node = Chef::Config[:node_name]\n d = Time.now\n end_date = Time.new(d.year, d.month + 3, d.day, d.hour, d.min, d.sec).utc.iso8601\n\n payload = {\n name: node,\n clientname: node,\n public_key: \"\",\n expiration_date: end_date,\n }\n\n generate_pfx_package(cert_name, end_date)\n payload[:public_key] = get_public_key(cert_name)\n base_url = \"#{Chef::Config[:chef_server_url]}\"\n client = Chef::ServerAPI.new(base_url, client_name: Chef::Config[:validation_client_name], signing_key_filename: Chef::Config[:validation_key])\n client.post(base_url + \"/clients\", payload)\n KeyMigration.instance.key_migrated = false", + "repo_full_name": "chef/chef", + "discussion_comments": [ + { + "comment_id": "823233225", + "repo_full_name": "chef/chef", + "pr_number": 12640, + "pr_file": "lib/chef/client.rb", + "discussion_id": "823233225", + "commented_code": "@@ -665,6 +683,83 @@ def register(client_name = node_name, config = Chef::Config)\n raise\n end\n \n+ #\n+ # In the brave new world of No Certs On Disk, we want to put the pem file into Keychain or the Certstore\n+ # But is it already there?\n+ def check_certstore_for_key(cert_name)\n+ require \"win32-certstore\"\n+ win32certstore = ::Win32::Certstore.open(\"MY\")\n+ win32certstore.search(\"#{cert_name}\")\n+ end\n+\n+ def generate_pfx_package(cert_name, date = nil)\n+ require_relative \"mixin/powershell_exec\"\n+ extend Chef::Mixin::PowershellExec\n+ ::Chef::HTTP::Authenticator.get_cert_password\n+ powershell_code = <<~EOH\n+\n+ $date = \"#{date}\"\n+\n+ $certSplat = @{\n+ Subject = \"#{cert_name}\"\n+ KeyExportPolicy = 'Exportable'\n+ KeyUsage = @('KeyEncipherment','DigitalSignature')\n+ CertStoreLocation = 'Cert:\\\\LocalMachine\\\\My'\n+ TextExtension = @(\"2.5.29.37={text}1.3.6.1.5.5.7.3.2,1.3.6.1.5.5.7.3.1\")\n+ };\n+ if ([string]$date -as [DateTime]){\n+ $certSplat.add('NotAfter', $date)\n+ }\n+\n+ New-SelfSignedCertificate @certSplat;\n+ EOH\n+ powershell_exec!(powershell_code)\n+ end\n+\n+ def move_key_and_register(cert_name)\n+ require \"time\" unless defined?(Time)\n+ autoload :URI, \"uri\"\n+\n+ KeyMigration.instance.key_migrated = true\n+\n+ node = Chef::Config[:node_name]\n+ d = Time.now\n+ end_date = Time.new(d.year, d.month + 3, d.day, d.hour, d.min, d.sec).utc.iso8601\n+\n+ payload = {\n+ name: node,\n+ clientname: node,\n+ public_key: \"\",\n+ expiration_date: end_date,\n+ }\n+\n+ generate_pfx_package(cert_name, end_date)\n+ payload[:public_key] = get_public_key(cert_name)\n+ base_url = \"#{Chef::Config[:chef_server_url]}\"\n+ client = Chef::ServerAPI.new(base_url, client_name: Chef::Config[:validation_client_name], signing_key_filename: Chef::Config[:validation_key])\n+ client.post(base_url + \"/clients\", payload)\n+ KeyMigration.instance.key_migrated = false", + "comment_created_at": "2022-03-10T00:44:59+00:00", + "comment_author": "mwrock", + "comment_body": "Not clear why you are setting to `false`. Didn't the key just get migrated?", + "pr_file_module": null + }, + { + "comment_id": "826446240", + "repo_full_name": "chef/chef", + "pr_number": 12640, + "pr_file": "lib/chef/client.rb", + "discussion_id": "823233225", + "commented_code": "@@ -665,6 +683,83 @@ def register(client_name = node_name, config = Chef::Config)\n raise\n end\n \n+ #\n+ # In the brave new world of No Certs On Disk, we want to put the pem file into Keychain or the Certstore\n+ # But is it already there?\n+ def check_certstore_for_key(cert_name)\n+ require \"win32-certstore\"\n+ win32certstore = ::Win32::Certstore.open(\"MY\")\n+ win32certstore.search(\"#{cert_name}\")\n+ end\n+\n+ def generate_pfx_package(cert_name, date = nil)\n+ require_relative \"mixin/powershell_exec\"\n+ extend Chef::Mixin::PowershellExec\n+ ::Chef::HTTP::Authenticator.get_cert_password\n+ powershell_code = <<~EOH\n+\n+ $date = \"#{date}\"\n+\n+ $certSplat = @{\n+ Subject = \"#{cert_name}\"\n+ KeyExportPolicy = 'Exportable'\n+ KeyUsage = @('KeyEncipherment','DigitalSignature')\n+ CertStoreLocation = 'Cert:\\\\LocalMachine\\\\My'\n+ TextExtension = @(\"2.5.29.37={text}1.3.6.1.5.5.7.3.2,1.3.6.1.5.5.7.3.1\")\n+ };\n+ if ([string]$date -as [DateTime]){\n+ $certSplat.add('NotAfter', $date)\n+ }\n+\n+ New-SelfSignedCertificate @certSplat;\n+ EOH\n+ powershell_exec!(powershell_code)\n+ end\n+\n+ def move_key_and_register(cert_name)\n+ require \"time\" unless defined?(Time)\n+ autoload :URI, \"uri\"\n+\n+ KeyMigration.instance.key_migrated = true\n+\n+ node = Chef::Config[:node_name]\n+ d = Time.now\n+ end_date = Time.new(d.year, d.month + 3, d.day, d.hour, d.min, d.sec).utc.iso8601\n+\n+ payload = {\n+ name: node,\n+ clientname: node,\n+ public_key: \"\",\n+ expiration_date: end_date,\n+ }\n+\n+ generate_pfx_package(cert_name, end_date)\n+ payload[:public_key] = get_public_key(cert_name)\n+ base_url = \"#{Chef::Config[:chef_server_url]}\"\n+ client = Chef::ServerAPI.new(base_url, client_name: Chef::Config[:validation_client_name], signing_key_filename: Chef::Config[:validation_key])\n+ client.post(base_url + \"/clients\", payload)\n+ KeyMigration.instance.key_migrated = false", + "comment_created_at": "2022-03-14T22:59:06+00:00", + "comment_author": "johnmccrae", + "comment_body": "So what happens at the top of that method is that I set the flag Keymigration to True - Line 739 executes and calls code in Authenticator.rb to load the pem file needed to build the headers for the client object. In this case, I need to validator so code in authenticator sees the Keymigraiton flag and hands me back the validator pem. On line 740 I update the chef server with the new key for the client. I now need the chef client to connect with the new cert I stuffed into the Certstore (line 738 generate_pfx_package) so I set the Keymigraiton flag back to false to prevent the Load_Signing_key code in authenticator from handing me back the validator pem when the client goes on to run a CCR", + "pr_file_module": null + }, + { + "comment_id": "827295020", + "repo_full_name": "chef/chef", + "pr_number": 12640, + "pr_file": "lib/chef/client.rb", + "discussion_id": "823233225", + "commented_code": "@@ -665,6 +683,83 @@ def register(client_name = node_name, config = Chef::Config)\n raise\n end\n \n+ #\n+ # In the brave new world of No Certs On Disk, we want to put the pem file into Keychain or the Certstore\n+ # But is it already there?\n+ def check_certstore_for_key(cert_name)\n+ require \"win32-certstore\"\n+ win32certstore = ::Win32::Certstore.open(\"MY\")\n+ win32certstore.search(\"#{cert_name}\")\n+ end\n+\n+ def generate_pfx_package(cert_name, date = nil)\n+ require_relative \"mixin/powershell_exec\"\n+ extend Chef::Mixin::PowershellExec\n+ ::Chef::HTTP::Authenticator.get_cert_password\n+ powershell_code = <<~EOH\n+\n+ $date = \"#{date}\"\n+\n+ $certSplat = @{\n+ Subject = \"#{cert_name}\"\n+ KeyExportPolicy = 'Exportable'\n+ KeyUsage = @('KeyEncipherment','DigitalSignature')\n+ CertStoreLocation = 'Cert:\\\\LocalMachine\\\\My'\n+ TextExtension = @(\"2.5.29.37={text}1.3.6.1.5.5.7.3.2,1.3.6.1.5.5.7.3.1\")\n+ };\n+ if ([string]$date -as [DateTime]){\n+ $certSplat.add('NotAfter', $date)\n+ }\n+\n+ New-SelfSignedCertificate @certSplat;\n+ EOH\n+ powershell_exec!(powershell_code)\n+ end\n+\n+ def move_key_and_register(cert_name)\n+ require \"time\" unless defined?(Time)\n+ autoload :URI, \"uri\"\n+\n+ KeyMigration.instance.key_migrated = true\n+\n+ node = Chef::Config[:node_name]\n+ d = Time.now\n+ end_date = Time.new(d.year, d.month + 3, d.day, d.hour, d.min, d.sec).utc.iso8601\n+\n+ payload = {\n+ name: node,\n+ clientname: node,\n+ public_key: \"\",\n+ expiration_date: end_date,\n+ }\n+\n+ generate_pfx_package(cert_name, end_date)\n+ payload[:public_key] = get_public_key(cert_name)\n+ base_url = \"#{Chef::Config[:chef_server_url]}\"\n+ client = Chef::ServerAPI.new(base_url, client_name: Chef::Config[:validation_client_name], signing_key_filename: Chef::Config[:validation_key])\n+ client.post(base_url + \"/clients\", payload)\n+ KeyMigration.instance.key_migrated = false", + "comment_created_at": "2022-03-15T18:31:18+00:00", + "comment_author": "mwrock", + "comment_body": "I'd suggest not storing the new key in `Cert:\\\\LocalMachine\\\\My` when you first create it. I'd either create one with openssl to avoid the certstore or store in current user. Then after the client post, migrate the key to `Cert:\\\\LocalMachine\\\\My`", + "pr_file_module": null + }, + { + "comment_id": "827296110", + "repo_full_name": "chef/chef", + "pr_number": 12640, + "pr_file": "lib/chef/client.rb", + "discussion_id": "823233225", + "commented_code": "@@ -665,6 +683,83 @@ def register(client_name = node_name, config = Chef::Config)\n raise\n end\n \n+ #\n+ # In the brave new world of No Certs On Disk, we want to put the pem file into Keychain or the Certstore\n+ # But is it already there?\n+ def check_certstore_for_key(cert_name)\n+ require \"win32-certstore\"\n+ win32certstore = ::Win32::Certstore.open(\"MY\")\n+ win32certstore.search(\"#{cert_name}\")\n+ end\n+\n+ def generate_pfx_package(cert_name, date = nil)\n+ require_relative \"mixin/powershell_exec\"\n+ extend Chef::Mixin::PowershellExec\n+ ::Chef::HTTP::Authenticator.get_cert_password\n+ powershell_code = <<~EOH\n+\n+ $date = \"#{date}\"\n+\n+ $certSplat = @{\n+ Subject = \"#{cert_name}\"\n+ KeyExportPolicy = 'Exportable'\n+ KeyUsage = @('KeyEncipherment','DigitalSignature')\n+ CertStoreLocation = 'Cert:\\\\LocalMachine\\\\My'\n+ TextExtension = @(\"2.5.29.37={text}1.3.6.1.5.5.7.3.2,1.3.6.1.5.5.7.3.1\")\n+ };\n+ if ([string]$date -as [DateTime]){\n+ $certSplat.add('NotAfter', $date)\n+ }\n+\n+ New-SelfSignedCertificate @certSplat;\n+ EOH\n+ powershell_exec!(powershell_code)\n+ end\n+\n+ def move_key_and_register(cert_name)\n+ require \"time\" unless defined?(Time)\n+ autoload :URI, \"uri\"\n+\n+ KeyMigration.instance.key_migrated = true\n+\n+ node = Chef::Config[:node_name]\n+ d = Time.now\n+ end_date = Time.new(d.year, d.month + 3, d.day, d.hour, d.min, d.sec).utc.iso8601\n+\n+ payload = {\n+ name: node,\n+ clientname: node,\n+ public_key: \"\",\n+ expiration_date: end_date,\n+ }\n+\n+ generate_pfx_package(cert_name, end_date)\n+ payload[:public_key] = get_public_key(cert_name)\n+ base_url = \"#{Chef::Config[:chef_server_url]}\"\n+ client = Chef::ServerAPI.new(base_url, client_name: Chef::Config[:validation_client_name], signing_key_filename: Chef::Config[:validation_key])\n+ client.post(base_url + \"/clients\", payload)\n+ KeyMigration.instance.key_migrated = false", + "comment_created_at": "2022-03-15T18:32:38+00:00", + "comment_author": "mwrock", + "comment_body": "This would eliminate the need for a static `key_migrated` flag which feels fragile and confusing.", + "pr_file_module": null + }, + { + "comment_id": "831624108", + "repo_full_name": "chef/chef", + "pr_number": 12640, + "pr_file": "lib/chef/client.rb", + "discussion_id": "823233225", + "commented_code": "@@ -665,6 +683,83 @@ def register(client_name = node_name, config = Chef::Config)\n raise\n end\n \n+ #\n+ # In the brave new world of No Certs On Disk, we want to put the pem file into Keychain or the Certstore\n+ # But is it already there?\n+ def check_certstore_for_key(cert_name)\n+ require \"win32-certstore\"\n+ win32certstore = ::Win32::Certstore.open(\"MY\")\n+ win32certstore.search(\"#{cert_name}\")\n+ end\n+\n+ def generate_pfx_package(cert_name, date = nil)\n+ require_relative \"mixin/powershell_exec\"\n+ extend Chef::Mixin::PowershellExec\n+ ::Chef::HTTP::Authenticator.get_cert_password\n+ powershell_code = <<~EOH\n+\n+ $date = \"#{date}\"\n+\n+ $certSplat = @{\n+ Subject = \"#{cert_name}\"\n+ KeyExportPolicy = 'Exportable'\n+ KeyUsage = @('KeyEncipherment','DigitalSignature')\n+ CertStoreLocation = 'Cert:\\\\LocalMachine\\\\My'\n+ TextExtension = @(\"2.5.29.37={text}1.3.6.1.5.5.7.3.2,1.3.6.1.5.5.7.3.1\")\n+ };\n+ if ([string]$date -as [DateTime]){\n+ $certSplat.add('NotAfter', $date)\n+ }\n+\n+ New-SelfSignedCertificate @certSplat;\n+ EOH\n+ powershell_exec!(powershell_code)\n+ end\n+\n+ def move_key_and_register(cert_name)\n+ require \"time\" unless defined?(Time)\n+ autoload :URI, \"uri\"\n+\n+ KeyMigration.instance.key_migrated = true\n+\n+ node = Chef::Config[:node_name]\n+ d = Time.now\n+ end_date = Time.new(d.year, d.month + 3, d.day, d.hour, d.min, d.sec).utc.iso8601\n+\n+ payload = {\n+ name: node,\n+ clientname: node,\n+ public_key: \"\",\n+ expiration_date: end_date,\n+ }\n+\n+ generate_pfx_package(cert_name, end_date)\n+ payload[:public_key] = get_public_key(cert_name)\n+ base_url = \"#{Chef::Config[:chef_server_url]}\"\n+ client = Chef::ServerAPI.new(base_url, client_name: Chef::Config[:validation_client_name], signing_key_filename: Chef::Config[:validation_key])\n+ client.post(base_url + \"/clients\", payload)\n+ KeyMigration.instance.key_migrated = false", + "comment_created_at": "2022-03-21T23:34:03+00:00", + "comment_author": "johnmccrae", + "comment_body": "This makes a lot more sense. ", + "pr_file_module": null + }, + { + "comment_id": "833469005", + "repo_full_name": "chef/chef", + "pr_number": 12640, + "pr_file": "lib/chef/client.rb", + "discussion_id": "823233225", + "commented_code": "@@ -665,6 +683,83 @@ def register(client_name = node_name, config = Chef::Config)\n raise\n end\n \n+ #\n+ # In the brave new world of No Certs On Disk, we want to put the pem file into Keychain or the Certstore\n+ # But is it already there?\n+ def check_certstore_for_key(cert_name)\n+ require \"win32-certstore\"\n+ win32certstore = ::Win32::Certstore.open(\"MY\")\n+ win32certstore.search(\"#{cert_name}\")\n+ end\n+\n+ def generate_pfx_package(cert_name, date = nil)\n+ require_relative \"mixin/powershell_exec\"\n+ extend Chef::Mixin::PowershellExec\n+ ::Chef::HTTP::Authenticator.get_cert_password\n+ powershell_code = <<~EOH\n+\n+ $date = \"#{date}\"\n+\n+ $certSplat = @{\n+ Subject = \"#{cert_name}\"\n+ KeyExportPolicy = 'Exportable'\n+ KeyUsage = @('KeyEncipherment','DigitalSignature')\n+ CertStoreLocation = 'Cert:\\\\LocalMachine\\\\My'\n+ TextExtension = @(\"2.5.29.37={text}1.3.6.1.5.5.7.3.2,1.3.6.1.5.5.7.3.1\")\n+ };\n+ if ([string]$date -as [DateTime]){\n+ $certSplat.add('NotAfter', $date)\n+ }\n+\n+ New-SelfSignedCertificate @certSplat;\n+ EOH\n+ powershell_exec!(powershell_code)\n+ end\n+\n+ def move_key_and_register(cert_name)\n+ require \"time\" unless defined?(Time)\n+ autoload :URI, \"uri\"\n+\n+ KeyMigration.instance.key_migrated = true\n+\n+ node = Chef::Config[:node_name]\n+ d = Time.now\n+ end_date = Time.new(d.year, d.month + 3, d.day, d.hour, d.min, d.sec).utc.iso8601\n+\n+ payload = {\n+ name: node,\n+ clientname: node,\n+ public_key: \"\",\n+ expiration_date: end_date,\n+ }\n+\n+ generate_pfx_package(cert_name, end_date)\n+ payload[:public_key] = get_public_key(cert_name)\n+ base_url = \"#{Chef::Config[:chef_server_url]}\"\n+ client = Chef::ServerAPI.new(base_url, client_name: Chef::Config[:validation_client_name], signing_key_filename: Chef::Config[:validation_key])\n+ client.post(base_url + \"/clients\", payload)\n+ KeyMigration.instance.key_migrated = false", + "comment_created_at": "2022-03-23T16:22:04+00:00", + "comment_author": "johnmccrae", + "comment_body": "Done and working and it makes we more sense now and I got rid of the Singleton class at the top too ", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "827353584", + "pr_number": 12640, + "pr_file": "lib/chef/http/authenticator.rb", + "created_at": "2022-03-15T19:48:11+00:00", + "commented_code": "present.each do |secret|\n if secret[:name] == \"PfxPass\"\n return secret[:data]\n password = decrypt_pfx_pass(secret[:data])\n return password\n end\n end\n\n # if we make it this far, that means there is no valid password in the Registry. Fail out to correct that.\n raise Chef::Exceptions::Win32RegKeyMissing\n\n rescue Chef::Exceptions::Win32RegKeyMissing\n # if we don't have a password, log that and generate one\n Chef::Log.warn \"Authentication Hive and value not present in registry, creating it now\"\n Chef::Log.warn \"Authentication Hive and values not present in registry, creating them now\"\n new_path = \"HKEY_LOCAL_MACHINE\\\\Software\\\\Progress\\\\Authentication\"\n unless @win32registry.key_exists?(new_path)\n @win32registry.create_key(new_path, true)\n end\n password = SOME_CHARS.sample(1 + rand(SOME_CHARS.count)).join[0...14]\n values = { name: \"PfxPass\", type: :string, data: password }\n encrypted_pass = encrypt_pfx_pass(password)\n values = { name: \"PfxPass\", type: :string, data: encrypted_pass }\n @win32registry.set_value(new_path, values)\n password\n end\n\n def self.encrypt_pfx_pass(password)\n powershell_code = <<~CODE", + "repo_full_name": "chef/chef", + "discussion_comments": [ + { + "comment_id": "827353584", + "repo_full_name": "chef/chef", + "pr_number": 12640, + "pr_file": "lib/chef/http/authenticator.rb", + "discussion_id": "827353584", + "commented_code": "@@ -166,60 +178,85 @@ def self.get_cert_password\n \n present.each do |secret|\n if secret[:name] == \"PfxPass\"\n- return secret[:data]\n+ password = decrypt_pfx_pass(secret[:data])\n+ return password\n end\n end\n \n- # if we make it this far, that means there is no valid password in the Registry. Fail out to correct that.\n raise Chef::Exceptions::Win32RegKeyMissing\n \n rescue Chef::Exceptions::Win32RegKeyMissing\n # if we don't have a password, log that and generate one\n- Chef::Log.warn \"Authentication Hive and value not present in registry, creating it now\"\n+ Chef::Log.warn \"Authentication Hive and values not present in registry, creating them now\"\n new_path = \"HKEY_LOCAL_MACHINE\\\\Software\\\\Progress\\\\Authentication\"\n unless @win32registry.key_exists?(new_path)\n @win32registry.create_key(new_path, true)\n end\n password = SOME_CHARS.sample(1 + rand(SOME_CHARS.count)).join[0...14]\n- values = { name: \"PfxPass\", type: :string, data: password }\n+ encrypted_pass = encrypt_pfx_pass(password)\n+ values = { name: \"PfxPass\", type: :string, data: encrypted_pass }\n @win32registry.set_value(new_path, values)\n password\n end\n \n+ def self.encrypt_pfx_pass(password)\n+ powershell_code = <<~CODE", + "comment_created_at": "2022-03-15T19:48:11+00:00", + "comment_author": "mwrock", + "comment_body": "I'd use `Chef::ReservedNames::Win32::Crypto.encrypt(str)` for this. You would want to add a `decrypt` method.", + "pr_file_module": null + }, + { + "comment_id": "833549896", + "repo_full_name": "chef/chef", + "pr_number": 12640, + "pr_file": "lib/chef/http/authenticator.rb", + "discussion_id": "827353584", + "commented_code": "@@ -166,60 +178,85 @@ def self.get_cert_password\n \n present.each do |secret|\n if secret[:name] == \"PfxPass\"\n- return secret[:data]\n+ password = decrypt_pfx_pass(secret[:data])\n+ return password\n end\n end\n \n- # if we make it this far, that means there is no valid password in the Registry. Fail out to correct that.\n raise Chef::Exceptions::Win32RegKeyMissing\n \n rescue Chef::Exceptions::Win32RegKeyMissing\n # if we don't have a password, log that and generate one\n- Chef::Log.warn \"Authentication Hive and value not present in registry, creating it now\"\n+ Chef::Log.warn \"Authentication Hive and values not present in registry, creating them now\"\n new_path = \"HKEY_LOCAL_MACHINE\\\\Software\\\\Progress\\\\Authentication\"\n unless @win32registry.key_exists?(new_path)\n @win32registry.create_key(new_path, true)\n end\n password = SOME_CHARS.sample(1 + rand(SOME_CHARS.count)).join[0...14]\n- values = { name: \"PfxPass\", type: :string, data: password }\n+ encrypted_pass = encrypt_pfx_pass(password)\n+ values = { name: \"PfxPass\", type: :string, data: encrypted_pass }\n @win32registry.set_value(new_path, values)\n password\n end\n \n+ def self.encrypt_pfx_pass(password)\n+ powershell_code = <<~CODE", + "comment_created_at": "2022-03-23T17:37:35+00:00", + "comment_author": "johnmccrae", + "comment_body": "As discussed, I am punting for now since the ReservedNames code is interchangeable with the PowerShell approach. Taking a backlog item to do that work", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/chef-secure-credential-management.md b/_reviewers/chef-secure-credential-management.md index e7e8cea..f00fac9 100644 --- a/_reviewers/chef-secure-credential-management.md +++ b/_reviewers/chef-secure-credential-management.md @@ -43,261 +43,3 @@ Chef::ReservedNames::Win32::Crypto.encrypt(password) ``` 5. Validate that credential operations succeed and provide meaningful error handling when they fail, without exposing sensitive details in logs or error messages. - - -[ - { - "discussion_id": "1792491323", - "pr_number": 14511, - "pr_file": "lib/chef/telemetry/base.rb", - "created_at": "2024-10-08T20:48:36+00:00", - "commented_code": "# frozen_string_literal: true\nrequire \"chef-licensing\"\nrequire \"securerandom\" unless defined?(SecureRandom)\nrequire \"digest\" unless defined?(Digest)\nrequire \"chef-utils/dist\" unless defined?(ChefUtils::Dist)\nrequire_relative \"../telemetry/run_context_probe\" unless defined?(Chef::Telemetry::RunContextProbe)\n\nclass Chef\n class Telemetry\n class Base\n VERSION = 2.0\n TYPE = \"job\"\n JOB_TYPE = \"Infra\"\n\n attr_accessor :scratch, :ohai\n\n def fetch_license_ids\n Chef::Log.debug \"Fetching license IDs for telemetry\"\n @license_keys ||= ChefLicensing.license_keys\n end\n\n def create_wrapper\n Chef::Log.debug \"Initializing wrapper for telemetry\"\n {\n version: VERSION,\n createdTimeUTC: Time.now.getutc.iso8601,\n environment: Chef::Telemetry::RunContextProbe.guess_run_context,\n licenseIds: fetch_license_ids,\n source: \"#{ChefUtils::Dist::Infra::EXEC}:#{Chef::VERSION}\",\n type: TYPE,\n }\n end\n\n def run_starting(_opts = {})\n Chef::Log.debug \"Initiating telemetry for Chef\"\n end\n\n def run_ending(opts)\n payload = create_wrapper\n\n # To not load ohai information once loaded\n unless ohai\n @ohai = Ohai::System.new\n # Load plugins to gather system data\n @ohai.all_plugins(%w{ os hostname platform dmi kernel})\n end\n\n payload[:platform] = ohai[:platform]\n\n payload[:jobs] = [{\n type: JOB_TYPE,\n # Target platform info\n environment: {\n host: ohai[:hostname],\n os: ohai[:os],\n version: ohai[:platform_version],\n architecture: ohai[:kernel][:machine],\n id: (ohai[:dmi][:system] && ohai[:dmi][:system][:uuid]) || \"\",\n },\n runtime: Chef::VERSION,\n content: [],\n steps: [],\n }]\n\n if opts[:run_context]\n opts[:run_context].cookbook_collection.each do |_, value|\n metadata = value.metadata\n payload[:jobs][0][:content] << {\n name: obscure(metadata&.name) || \"\",\n version: metadata&.version || \"\",\n maintainer: metadata&.maintainer || \"\",\n type: \"cookbook\",\n }\n end\n all_resources = opts[:run_context].resource_collection&.all_resources\n if all_resources\n all_resources.each do |resource|\n payload[:jobs][0][:steps] << {\n name: resource.recipe_name,\n resources: [],\n }\n\n payload[:jobs][0][:steps].last[:resources] << {\n type: \"chef-resource\",\n name: resource.resource_name.to_s,\n }\n end\n end\n end\n Chef::Log.debug \"Final data for telemetry upload -> #{payload}\"\n Chef::Log.debug \"Finishing telemetry for Chef\"\n # Return payload object for testing\n payload\n end\n\n # TBD Should we implement distribution name based on below usage?\n def determine_distribution_name\n run_context = Chef::Telemetry::RunContextProbe.guess_run_context\n case run_context\n when \"chef-zero\"\n ChefUtils::Dist::Zero::EXEC\n when \"chef-apply\"\n ChefUtils::Dist::Apply::EXEC\n when \"chef-solo\"\n ChefUtils::Dist::Solo::EXEC\n else\n ChefUtils::Dist::Infra::EXEC\n end\n end\n\n # Hash text if non-nil\n def obscure(cleartext)\n return nil if cleartext.nil?\n return nil if cleartext.empty?\n\n Digest::SHA2.new(256).hexdigest(cleartext)", - "repo_full_name": "chef/chef", - "discussion_comments": [ - { - "comment_id": "1792491323", - "repo_full_name": "chef/chef", - "pr_number": 14511, - "pr_file": "lib/chef/telemetry/base.rb", - "discussion_id": "1792491323", - "commented_code": "@@ -0,0 +1,120 @@\n+# frozen_string_literal: true\n+require \"chef-licensing\"\n+require \"securerandom\" unless defined?(SecureRandom)\n+require \"digest\" unless defined?(Digest)\n+require \"chef-utils/dist\" unless defined?(ChefUtils::Dist)\n+require_relative \"../telemetry/run_context_probe\" unless defined?(Chef::Telemetry::RunContextProbe)\n+\n+class Chef\n+ class Telemetry\n+ class Base\n+ VERSION = 2.0\n+ TYPE = \"job\"\n+ JOB_TYPE = \"Infra\"\n+\n+ attr_accessor :scratch, :ohai\n+\n+ def fetch_license_ids\n+ Chef::Log.debug \"Fetching license IDs for telemetry\"\n+ @license_keys ||= ChefLicensing.license_keys\n+ end\n+\n+ def create_wrapper\n+ Chef::Log.debug \"Initializing wrapper for telemetry\"\n+ {\n+ version: VERSION,\n+ createdTimeUTC: Time.now.getutc.iso8601,\n+ environment: Chef::Telemetry::RunContextProbe.guess_run_context,\n+ licenseIds: fetch_license_ids,\n+ source: \"#{ChefUtils::Dist::Infra::EXEC}:#{Chef::VERSION}\",\n+ type: TYPE,\n+ }\n+ end\n+\n+ def run_starting(_opts = {})\n+ Chef::Log.debug \"Initiating telemetry for Chef\"\n+ end\n+\n+ def run_ending(opts)\n+ payload = create_wrapper\n+\n+ # To not load ohai information once loaded\n+ unless ohai\n+ @ohai = Ohai::System.new\n+ # Load plugins to gather system data\n+ @ohai.all_plugins(%w{ os hostname platform dmi kernel})\n+ end\n+\n+ payload[:platform] = ohai[:platform]\n+\n+ payload[:jobs] = [{\n+ type: JOB_TYPE,\n+ # Target platform info\n+ environment: {\n+ host: ohai[:hostname],\n+ os: ohai[:os],\n+ version: ohai[:platform_version],\n+ architecture: ohai[:kernel][:machine],\n+ id: (ohai[:dmi][:system] && ohai[:dmi][:system][:uuid]) || \"\",\n+ },\n+ runtime: Chef::VERSION,\n+ content: [],\n+ steps: [],\n+ }]\n+\n+ if opts[:run_context]\n+ opts[:run_context].cookbook_collection.each do |_, value|\n+ metadata = value.metadata\n+ payload[:jobs][0][:content] << {\n+ name: obscure(metadata&.name) || \"\",\n+ version: metadata&.version || \"\",\n+ maintainer: metadata&.maintainer || \"\",\n+ type: \"cookbook\",\n+ }\n+ end\n+ all_resources = opts[:run_context].resource_collection&.all_resources\n+ if all_resources\n+ all_resources.each do |resource|\n+ payload[:jobs][0][:steps] << {\n+ name: resource.recipe_name,\n+ resources: [],\n+ }\n+\n+ payload[:jobs][0][:steps].last[:resources] << {\n+ type: \"chef-resource\",\n+ name: resource.resource_name.to_s,\n+ }\n+ end\n+ end\n+ end\n+ Chef::Log.debug \"Final data for telemetry upload -> #{payload}\"\n+ Chef::Log.debug \"Finishing telemetry for Chef\"\n+ # Return payload object for testing\n+ payload\n+ end\n+\n+ # TBD Should we implement distribution name based on below usage?\n+ def determine_distribution_name\n+ run_context = Chef::Telemetry::RunContextProbe.guess_run_context\n+ case run_context\n+ when \"chef-zero\"\n+ ChefUtils::Dist::Zero::EXEC\n+ when \"chef-apply\"\n+ ChefUtils::Dist::Apply::EXEC\n+ when \"chef-solo\"\n+ ChefUtils::Dist::Solo::EXEC\n+ else\n+ ChefUtils::Dist::Infra::EXEC\n+ end\n+ end\n+\n+ # Hash text if non-nil\n+ def obscure(cleartext)\n+ return nil if cleartext.nil?\n+ return nil if cleartext.empty?\n+\n+ Digest::SHA2.new(256).hexdigest(cleartext)", - "comment_created_at": "2024-10-08T20:48:36+00:00", - "comment_author": "dafyddcrosby", - "comment_body": "This is insufficient for obfuscation. Supermarket has ~4000 cookbooks, so building a rainbow table is trivial.", - "pr_file_module": null - }, - { - "comment_id": "1794919348", - "repo_full_name": "chef/chef", - "pr_number": 14511, - "pr_file": "lib/chef/telemetry/base.rb", - "discussion_id": "1792491323", - "commented_code": "@@ -0,0 +1,120 @@\n+# frozen_string_literal: true\n+require \"chef-licensing\"\n+require \"securerandom\" unless defined?(SecureRandom)\n+require \"digest\" unless defined?(Digest)\n+require \"chef-utils/dist\" unless defined?(ChefUtils::Dist)\n+require_relative \"../telemetry/run_context_probe\" unless defined?(Chef::Telemetry::RunContextProbe)\n+\n+class Chef\n+ class Telemetry\n+ class Base\n+ VERSION = 2.0\n+ TYPE = \"job\"\n+ JOB_TYPE = \"Infra\"\n+\n+ attr_accessor :scratch, :ohai\n+\n+ def fetch_license_ids\n+ Chef::Log.debug \"Fetching license IDs for telemetry\"\n+ @license_keys ||= ChefLicensing.license_keys\n+ end\n+\n+ def create_wrapper\n+ Chef::Log.debug \"Initializing wrapper for telemetry\"\n+ {\n+ version: VERSION,\n+ createdTimeUTC: Time.now.getutc.iso8601,\n+ environment: Chef::Telemetry::RunContextProbe.guess_run_context,\n+ licenseIds: fetch_license_ids,\n+ source: \"#{ChefUtils::Dist::Infra::EXEC}:#{Chef::VERSION}\",\n+ type: TYPE,\n+ }\n+ end\n+\n+ def run_starting(_opts = {})\n+ Chef::Log.debug \"Initiating telemetry for Chef\"\n+ end\n+\n+ def run_ending(opts)\n+ payload = create_wrapper\n+\n+ # To not load ohai information once loaded\n+ unless ohai\n+ @ohai = Ohai::System.new\n+ # Load plugins to gather system data\n+ @ohai.all_plugins(%w{ os hostname platform dmi kernel})\n+ end\n+\n+ payload[:platform] = ohai[:platform]\n+\n+ payload[:jobs] = [{\n+ type: JOB_TYPE,\n+ # Target platform info\n+ environment: {\n+ host: ohai[:hostname],\n+ os: ohai[:os],\n+ version: ohai[:platform_version],\n+ architecture: ohai[:kernel][:machine],\n+ id: (ohai[:dmi][:system] && ohai[:dmi][:system][:uuid]) || \"\",\n+ },\n+ runtime: Chef::VERSION,\n+ content: [],\n+ steps: [],\n+ }]\n+\n+ if opts[:run_context]\n+ opts[:run_context].cookbook_collection.each do |_, value|\n+ metadata = value.metadata\n+ payload[:jobs][0][:content] << {\n+ name: obscure(metadata&.name) || \"\",\n+ version: metadata&.version || \"\",\n+ maintainer: metadata&.maintainer || \"\",\n+ type: \"cookbook\",\n+ }\n+ end\n+ all_resources = opts[:run_context].resource_collection&.all_resources\n+ if all_resources\n+ all_resources.each do |resource|\n+ payload[:jobs][0][:steps] << {\n+ name: resource.recipe_name,\n+ resources: [],\n+ }\n+\n+ payload[:jobs][0][:steps].last[:resources] << {\n+ type: \"chef-resource\",\n+ name: resource.resource_name.to_s,\n+ }\n+ end\n+ end\n+ end\n+ Chef::Log.debug \"Final data for telemetry upload -> #{payload}\"\n+ Chef::Log.debug \"Finishing telemetry for Chef\"\n+ # Return payload object for testing\n+ payload\n+ end\n+\n+ # TBD Should we implement distribution name based on below usage?\n+ def determine_distribution_name\n+ run_context = Chef::Telemetry::RunContextProbe.guess_run_context\n+ case run_context\n+ when \"chef-zero\"\n+ ChefUtils::Dist::Zero::EXEC\n+ when \"chef-apply\"\n+ ChefUtils::Dist::Apply::EXEC\n+ when \"chef-solo\"\n+ ChefUtils::Dist::Solo::EXEC\n+ else\n+ ChefUtils::Dist::Infra::EXEC\n+ end\n+ end\n+\n+ # Hash text if non-nil\n+ def obscure(cleartext)\n+ return nil if cleartext.nil?\n+ return nil if cleartext.empty?\n+\n+ Digest::SHA2.new(256).hexdigest(cleartext)", - "comment_created_at": "2024-10-10T08:01:12+00:00", - "comment_author": "Nik08", - "comment_body": "Do you think implementing salting with this might be helpful?\r\n\r\n```\r\nsalt = SecureRandom.hex(16) # Generate a random 16-byte salt\r\nhash = Digest::SHA2.new(256).hexdigest(salt + cleartext)\r\n```", - "pr_file_module": null - }, - { - "comment_id": "1808562848", - "repo_full_name": "chef/chef", - "pr_number": 14511, - "pr_file": "lib/chef/telemetry/base.rb", - "discussion_id": "1792491323", - "commented_code": "@@ -0,0 +1,120 @@\n+# frozen_string_literal: true\n+require \"chef-licensing\"\n+require \"securerandom\" unless defined?(SecureRandom)\n+require \"digest\" unless defined?(Digest)\n+require \"chef-utils/dist\" unless defined?(ChefUtils::Dist)\n+require_relative \"../telemetry/run_context_probe\" unless defined?(Chef::Telemetry::RunContextProbe)\n+\n+class Chef\n+ class Telemetry\n+ class Base\n+ VERSION = 2.0\n+ TYPE = \"job\"\n+ JOB_TYPE = \"Infra\"\n+\n+ attr_accessor :scratch, :ohai\n+\n+ def fetch_license_ids\n+ Chef::Log.debug \"Fetching license IDs for telemetry\"\n+ @license_keys ||= ChefLicensing.license_keys\n+ end\n+\n+ def create_wrapper\n+ Chef::Log.debug \"Initializing wrapper for telemetry\"\n+ {\n+ version: VERSION,\n+ createdTimeUTC: Time.now.getutc.iso8601,\n+ environment: Chef::Telemetry::RunContextProbe.guess_run_context,\n+ licenseIds: fetch_license_ids,\n+ source: \"#{ChefUtils::Dist::Infra::EXEC}:#{Chef::VERSION}\",\n+ type: TYPE,\n+ }\n+ end\n+\n+ def run_starting(_opts = {})\n+ Chef::Log.debug \"Initiating telemetry for Chef\"\n+ end\n+\n+ def run_ending(opts)\n+ payload = create_wrapper\n+\n+ # To not load ohai information once loaded\n+ unless ohai\n+ @ohai = Ohai::System.new\n+ # Load plugins to gather system data\n+ @ohai.all_plugins(%w{ os hostname platform dmi kernel})\n+ end\n+\n+ payload[:platform] = ohai[:platform]\n+\n+ payload[:jobs] = [{\n+ type: JOB_TYPE,\n+ # Target platform info\n+ environment: {\n+ host: ohai[:hostname],\n+ os: ohai[:os],\n+ version: ohai[:platform_version],\n+ architecture: ohai[:kernel][:machine],\n+ id: (ohai[:dmi][:system] && ohai[:dmi][:system][:uuid]) || \"\",\n+ },\n+ runtime: Chef::VERSION,\n+ content: [],\n+ steps: [],\n+ }]\n+\n+ if opts[:run_context]\n+ opts[:run_context].cookbook_collection.each do |_, value|\n+ metadata = value.metadata\n+ payload[:jobs][0][:content] << {\n+ name: obscure(metadata&.name) || \"\",\n+ version: metadata&.version || \"\",\n+ maintainer: metadata&.maintainer || \"\",\n+ type: \"cookbook\",\n+ }\n+ end\n+ all_resources = opts[:run_context].resource_collection&.all_resources\n+ if all_resources\n+ all_resources.each do |resource|\n+ payload[:jobs][0][:steps] << {\n+ name: resource.recipe_name,\n+ resources: [],\n+ }\n+\n+ payload[:jobs][0][:steps].last[:resources] << {\n+ type: \"chef-resource\",\n+ name: resource.resource_name.to_s,\n+ }\n+ end\n+ end\n+ end\n+ Chef::Log.debug \"Final data for telemetry upload -> #{payload}\"\n+ Chef::Log.debug \"Finishing telemetry for Chef\"\n+ # Return payload object for testing\n+ payload\n+ end\n+\n+ # TBD Should we implement distribution name based on below usage?\n+ def determine_distribution_name\n+ run_context = Chef::Telemetry::RunContextProbe.guess_run_context\n+ case run_context\n+ when \"chef-zero\"\n+ ChefUtils::Dist::Zero::EXEC\n+ when \"chef-apply\"\n+ ChefUtils::Dist::Apply::EXEC\n+ when \"chef-solo\"\n+ ChefUtils::Dist::Solo::EXEC\n+ else\n+ ChefUtils::Dist::Infra::EXEC\n+ end\n+ end\n+\n+ # Hash text if non-nil\n+ def obscure(cleartext)\n+ return nil if cleartext.nil?\n+ return nil if cleartext.empty?\n+\n+ Digest::SHA2.new(256).hexdigest(cleartext)", - "comment_created_at": "2024-10-21T11:06:28+00:00", - "comment_author": "GeorgeWestwater", - "comment_body": "@dafyddcrosby - the entire intent on capturing the SHA of the without salt was to actually have that rainbow table.\r\n\r\nWe are looking to understand, what cookbooks are being used from public supermarket. Ideally with this information we can track which cookbook (and version). That are used. That will help us understand what are the most popular cookbooks (download data is not enough for that), which cookbooks being used but are no longer maintained, and as the usage data is tried to a license we could explore the possibility of notify people of cookbook updates/vulnerabilities (inspired from depend-a-bot). \r\n\r\nThe thought process here was cookbook names could contain sensitive information, so transmitting it without a hash was not acceptable, however adding in the salt would make this data meaningless, unless the salt was part of your license (which would also downgrade the usefulness of the salt). \r\n\r\nWe are not interested in collecting information on private cookbooks or user-specific content. I am open to any suggestions on alternative ways to collect this information?", - "pr_file_module": null - }, - { - "comment_id": "1817165143", - "repo_full_name": "chef/chef", - "pr_number": 14511, - "pr_file": "lib/chef/telemetry/base.rb", - "discussion_id": "1792491323", - "commented_code": "@@ -0,0 +1,120 @@\n+# frozen_string_literal: true\n+require \"chef-licensing\"\n+require \"securerandom\" unless defined?(SecureRandom)\n+require \"digest\" unless defined?(Digest)\n+require \"chef-utils/dist\" unless defined?(ChefUtils::Dist)\n+require_relative \"../telemetry/run_context_probe\" unless defined?(Chef::Telemetry::RunContextProbe)\n+\n+class Chef\n+ class Telemetry\n+ class Base\n+ VERSION = 2.0\n+ TYPE = \"job\"\n+ JOB_TYPE = \"Infra\"\n+\n+ attr_accessor :scratch, :ohai\n+\n+ def fetch_license_ids\n+ Chef::Log.debug \"Fetching license IDs for telemetry\"\n+ @license_keys ||= ChefLicensing.license_keys\n+ end\n+\n+ def create_wrapper\n+ Chef::Log.debug \"Initializing wrapper for telemetry\"\n+ {\n+ version: VERSION,\n+ createdTimeUTC: Time.now.getutc.iso8601,\n+ environment: Chef::Telemetry::RunContextProbe.guess_run_context,\n+ licenseIds: fetch_license_ids,\n+ source: \"#{ChefUtils::Dist::Infra::EXEC}:#{Chef::VERSION}\",\n+ type: TYPE,\n+ }\n+ end\n+\n+ def run_starting(_opts = {})\n+ Chef::Log.debug \"Initiating telemetry for Chef\"\n+ end\n+\n+ def run_ending(opts)\n+ payload = create_wrapper\n+\n+ # To not load ohai information once loaded\n+ unless ohai\n+ @ohai = Ohai::System.new\n+ # Load plugins to gather system data\n+ @ohai.all_plugins(%w{ os hostname platform dmi kernel})\n+ end\n+\n+ payload[:platform] = ohai[:platform]\n+\n+ payload[:jobs] = [{\n+ type: JOB_TYPE,\n+ # Target platform info\n+ environment: {\n+ host: ohai[:hostname],\n+ os: ohai[:os],\n+ version: ohai[:platform_version],\n+ architecture: ohai[:kernel][:machine],\n+ id: (ohai[:dmi][:system] && ohai[:dmi][:system][:uuid]) || \"\",\n+ },\n+ runtime: Chef::VERSION,\n+ content: [],\n+ steps: [],\n+ }]\n+\n+ if opts[:run_context]\n+ opts[:run_context].cookbook_collection.each do |_, value|\n+ metadata = value.metadata\n+ payload[:jobs][0][:content] << {\n+ name: obscure(metadata&.name) || \"\",\n+ version: metadata&.version || \"\",\n+ maintainer: metadata&.maintainer || \"\",\n+ type: \"cookbook\",\n+ }\n+ end\n+ all_resources = opts[:run_context].resource_collection&.all_resources\n+ if all_resources\n+ all_resources.each do |resource|\n+ payload[:jobs][0][:steps] << {\n+ name: resource.recipe_name,\n+ resources: [],\n+ }\n+\n+ payload[:jobs][0][:steps].last[:resources] << {\n+ type: \"chef-resource\",\n+ name: resource.resource_name.to_s,\n+ }\n+ end\n+ end\n+ end\n+ Chef::Log.debug \"Final data for telemetry upload -> #{payload}\"\n+ Chef::Log.debug \"Finishing telemetry for Chef\"\n+ # Return payload object for testing\n+ payload\n+ end\n+\n+ # TBD Should we implement distribution name based on below usage?\n+ def determine_distribution_name\n+ run_context = Chef::Telemetry::RunContextProbe.guess_run_context\n+ case run_context\n+ when \"chef-zero\"\n+ ChefUtils::Dist::Zero::EXEC\n+ when \"chef-apply\"\n+ ChefUtils::Dist::Apply::EXEC\n+ when \"chef-solo\"\n+ ChefUtils::Dist::Solo::EXEC\n+ else\n+ ChefUtils::Dist::Infra::EXEC\n+ end\n+ end\n+\n+ # Hash text if non-nil\n+ def obscure(cleartext)\n+ return nil if cleartext.nil?\n+ return nil if cleartext.empty?\n+\n+ Digest::SHA2.new(256).hexdigest(cleartext)", - "comment_created_at": "2024-10-25T18:12:18+00:00", - "comment_author": "dafyddcrosby", - "comment_body": "> however adding in the salt would make this data meaningless, unless the salt was part of your license (which would also downgrade the usefulness of the salt).\r\n\r\nWith no salt, one could just grab a rainbow table off https://crackstation.net/ and call it a day. Whereas if you used eg the serial off the trial key as a salt, while you could still quickly generate one-off rainbow tables of cookbooks out of the known cookbook names in supermarket, it increases the cost of finding *unknown* cookbook names.\r\n\r\nConceptually, the client sends a set of hashes along with the serial. Server-side, you have something like this pseudocode:\r\n```\r\ndef find_supermarket_cookbooks(serial, set)\r\n table = SUPERMARKET_COOKBOOK_NAMES.to_h {|cb| [Digest::SHA2.new(256).hexdigest(serial + cb), cb] }\r\n set.map! {|cb_hash| table[cb_hash]}\r\n set.compact!\r\n set\r\nend\r\n```\r\n\r\nIt's not a perfect solution, but gets you the information you want re: supermarket cookbooks, and is still largely cacheable so that you don't need to recompute hashes on the backend after the first upload from a license. ", - "pr_file_module": null - }, - { - "comment_id": "1820583786", - "repo_full_name": "chef/chef", - "pr_number": 14511, - "pr_file": "lib/chef/telemetry/base.rb", - "discussion_id": "1792491323", - "commented_code": "@@ -0,0 +1,120 @@\n+# frozen_string_literal: true\n+require \"chef-licensing\"\n+require \"securerandom\" unless defined?(SecureRandom)\n+require \"digest\" unless defined?(Digest)\n+require \"chef-utils/dist\" unless defined?(ChefUtils::Dist)\n+require_relative \"../telemetry/run_context_probe\" unless defined?(Chef::Telemetry::RunContextProbe)\n+\n+class Chef\n+ class Telemetry\n+ class Base\n+ VERSION = 2.0\n+ TYPE = \"job\"\n+ JOB_TYPE = \"Infra\"\n+\n+ attr_accessor :scratch, :ohai\n+\n+ def fetch_license_ids\n+ Chef::Log.debug \"Fetching license IDs for telemetry\"\n+ @license_keys ||= ChefLicensing.license_keys\n+ end\n+\n+ def create_wrapper\n+ Chef::Log.debug \"Initializing wrapper for telemetry\"\n+ {\n+ version: VERSION,\n+ createdTimeUTC: Time.now.getutc.iso8601,\n+ environment: Chef::Telemetry::RunContextProbe.guess_run_context,\n+ licenseIds: fetch_license_ids,\n+ source: \"#{ChefUtils::Dist::Infra::EXEC}:#{Chef::VERSION}\",\n+ type: TYPE,\n+ }\n+ end\n+\n+ def run_starting(_opts = {})\n+ Chef::Log.debug \"Initiating telemetry for Chef\"\n+ end\n+\n+ def run_ending(opts)\n+ payload = create_wrapper\n+\n+ # To not load ohai information once loaded\n+ unless ohai\n+ @ohai = Ohai::System.new\n+ # Load plugins to gather system data\n+ @ohai.all_plugins(%w{ os hostname platform dmi kernel})\n+ end\n+\n+ payload[:platform] = ohai[:platform]\n+\n+ payload[:jobs] = [{\n+ type: JOB_TYPE,\n+ # Target platform info\n+ environment: {\n+ host: ohai[:hostname],\n+ os: ohai[:os],\n+ version: ohai[:platform_version],\n+ architecture: ohai[:kernel][:machine],\n+ id: (ohai[:dmi][:system] && ohai[:dmi][:system][:uuid]) || \"\",\n+ },\n+ runtime: Chef::VERSION,\n+ content: [],\n+ steps: [],\n+ }]\n+\n+ if opts[:run_context]\n+ opts[:run_context].cookbook_collection.each do |_, value|\n+ metadata = value.metadata\n+ payload[:jobs][0][:content] << {\n+ name: obscure(metadata&.name) || \"\",\n+ version: metadata&.version || \"\",\n+ maintainer: metadata&.maintainer || \"\",\n+ type: \"cookbook\",\n+ }\n+ end\n+ all_resources = opts[:run_context].resource_collection&.all_resources\n+ if all_resources\n+ all_resources.each do |resource|\n+ payload[:jobs][0][:steps] << {\n+ name: resource.recipe_name,\n+ resources: [],\n+ }\n+\n+ payload[:jobs][0][:steps].last[:resources] << {\n+ type: \"chef-resource\",\n+ name: resource.resource_name.to_s,\n+ }\n+ end\n+ end\n+ end\n+ Chef::Log.debug \"Final data for telemetry upload -> #{payload}\"\n+ Chef::Log.debug \"Finishing telemetry for Chef\"\n+ # Return payload object for testing\n+ payload\n+ end\n+\n+ # TBD Should we implement distribution name based on below usage?\n+ def determine_distribution_name\n+ run_context = Chef::Telemetry::RunContextProbe.guess_run_context\n+ case run_context\n+ when \"chef-zero\"\n+ ChefUtils::Dist::Zero::EXEC\n+ when \"chef-apply\"\n+ ChefUtils::Dist::Apply::EXEC\n+ when \"chef-solo\"\n+ ChefUtils::Dist::Solo::EXEC\n+ else\n+ ChefUtils::Dist::Infra::EXEC\n+ end\n+ end\n+\n+ # Hash text if non-nil\n+ def obscure(cleartext)\n+ return nil if cleartext.nil?\n+ return nil if cleartext.empty?\n+\n+ Digest::SHA2.new(256).hexdigest(cleartext)", - "comment_created_at": "2024-10-29T11:02:11+00:00", - "comment_author": "GeorgeWestwater", - "comment_body": "@dafyddcrosby - one question and one comment\r\n\r\nQuestion: what is the concern about someone being able to reverse some of the cookbook names?\r\n\r\nComment: If I am understanding your suggestion correctly (salt by license) that would have to result in us having one rainbow table per license issued. Am I understanding this correctly?", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1099196196", - "pr_number": 13552, - "pr_file": "lib/chef/http/authenticator.rb", - "created_at": "2023-02-07T20:36:50+00:00", - "commented_code": "@win32registry = Chef::Win32::Registry.new\n path = \"HKEY_LOCAL_MACHINE\\\\Software\\\\Progress\\\\Authentication\"\n # does the registry key even exist?\n present = @win32registry.get_values(path)\n if present.nil? || present.empty?\n # password_blob should be an array of hashes\n password_blob = @win32registry.get_values(path)\n if password_blob.nil? || password_blob.empty?\n raise Chef::Exceptions::Win32RegKeyMissing\n end\n\n present.each do |secret|\n if secret[:name] == \"PfxPass\"\n password = decrypt_pfx_pass(secret[:data])\n return password\n # Did someone have just the password stored in the registry?\n raw_data = password_blob.map { |x| x[:data] }\n vector = raw_data[2]\n if !!vector\n decrypted_password = decrypt_pfx_pass_with_vector(password_blob)\n else\n decrypted_password = decrypt_pfx_pass_with_password(password_blob)\n if !!decrypted_password\n migrate_pass_to_use_vector\n else\n Chef::Log.error(\"Failed to retrieve certificate password\")\n end\n end\n\n raise Chef::Exceptions::Win32RegKeyMissing\n\n decrypted_password\n rescue Chef::Exceptions::Win32RegKeyMissing\n # if we don't have a password, log that and generate one\n Chef::Log.warn \"Authentication Hive and values not present in registry, creating them now\"\n new_path = \"HKEY_LOCAL_MACHINE\\\\Software\\\\Progress\\\\Authentication\"\n unless @win32registry.key_exists?(new_path)\n @win32registry.create_key(new_path, true)\n end\n require \"securerandom\" unless defined?(SecureRandom)\n size = 14\n password = SecureRandom.alphanumeric(size)\n encrypted_pass = encrypt_pfx_pass(password)\n values = { name: \"PfxPass\", type: :string, data: encrypted_pass }\n @win32registry.set_value(new_path, values)\n password = create_and_store_new_password\n password\n end\n\n def self.encrypt_pfx_pass(password)\n powershell_code = <<~CODE\n $encrypted_string = ConvertTo-SecureString \"#{password}\" -AsPlainText -Force\n $secure_string = ConvertFrom-SecureString $encrypted_string\n return $secure_string\n $AES = [System.Security.Cryptography.Aes]::Create()\n $key_temp = [System.Convert]::ToBase64String($AES.Key)\n $iv_temp = [System.Convert]::ToBase64String($AES.IV)\n $encryptor = $AES.CreateEncryptor()\n [System.Byte[]]$Bytes = [System.Text.Encoding]::Unicode.GetBytes(\"#{password}\")\n $EncryptedBytes = $encryptor.TransformFinalBlock($Bytes,0,$Bytes.Length)\n $EncryptedBase64String = [System.Convert]::ToBase64String($EncryptedBytes)\n # create array of encrypted pass, key, iv\n $password_blob = @($EncryptedBase64String, $key_temp, $iv_temp)\n return $password_blob\n CODE\n powershell_exec!(powershell_code).result\n end\n\n def self.decrypt_pfx_pass(password)\n def self.decrypt_pfx_pass_with_vector(password_blob)\n raw_data = password_blob.map { |x| x[:data] }\n password = raw_data[0]\n key = raw_data[1]\n vector = raw_data[2]\n\n powershell_code = <<~CODE\n $KeyBytes = [System.Convert]::FromBase64String(\"#{key}\")\n $IVBytes = [System.Convert]::FromBase64String(\"#{vector}\")\n $aes = [System.Security.Cryptography.Aes]::Create()\n $aes.Key = $KeyBytes\n $aes.IV = $IVBytes\n $EncryptedBytes = [System.Convert]::FromBase64String(\"#{password}\")\n $Decryptor = $aes.CreateDecryptor()\n $DecryptedBytes = $Decryptor.TransformFinalBlock($EncryptedBytes,0,$EncryptedBytes.Length)\n $DecryptedString = [System.Text.Encoding]::Unicode.GetString($DecryptedBytes)\n return $DecryptedString\n CODE\n results = powershell_exec!(powershell_code).result\n end\n\n def self.decrypt_pfx_pass_with_password(password_blob)\n password = \"\"\n password_blob.each do |secret|\n password = secret[:data]\n end", - "repo_full_name": "chef/chef", - "discussion_comments": [ - { - "comment_id": "1099196196", - "repo_full_name": "chef/chef", - "pr_number": 13552, - "pr_file": "lib/chef/http/authenticator.rb", - "discussion_id": "1099196196", - "commented_code": "@@ -167,46 +168,79 @@ def self.get_cert_password\n @win32registry = Chef::Win32::Registry.new\n path = \"HKEY_LOCAL_MACHINE\\\\Software\\\\Progress\\\\Authentication\"\n # does the registry key even exist?\n- present = @win32registry.get_values(path)\n- if present.nil? || present.empty?\n+ # password_blob should be an array of hashes\n+ password_blob = @win32registry.get_values(path)\n+ if password_blob.nil? || password_blob.empty?\n raise Chef::Exceptions::Win32RegKeyMissing\n end\n \n- present.each do |secret|\n- if secret[:name] == \"PfxPass\"\n- password = decrypt_pfx_pass(secret[:data])\n- return password\n+ # Did someone have just the password stored in the registry?\n+ raw_data = password_blob.map { |x| x[:data] }\n+ vector = raw_data[2]\n+ if !!vector\n+ decrypted_password = decrypt_pfx_pass_with_vector(password_blob)\n+ else\n+ decrypted_password = decrypt_pfx_pass_with_password(password_blob)\n+ if !!decrypted_password\n+ migrate_pass_to_use_vector\n+ else\n+ Chef::Log.error(\"Failed to retrieve certificate password\")\n end\n end\n-\n- raise Chef::Exceptions::Win32RegKeyMissing\n-\n+ decrypted_password\n rescue Chef::Exceptions::Win32RegKeyMissing\n # if we don't have a password, log that and generate one\n Chef::Log.warn \"Authentication Hive and values not present in registry, creating them now\"\n new_path = \"HKEY_LOCAL_MACHINE\\\\Software\\\\Progress\\\\Authentication\"\n unless @win32registry.key_exists?(new_path)\n @win32registry.create_key(new_path, true)\n end\n- require \"securerandom\" unless defined?(SecureRandom)\n- size = 14\n- password = SecureRandom.alphanumeric(size)\n- encrypted_pass = encrypt_pfx_pass(password)\n- values = { name: \"PfxPass\", type: :string, data: encrypted_pass }\n- @win32registry.set_value(new_path, values)\n+ password = create_and_store_new_password\n password\n end\n \n def self.encrypt_pfx_pass(password)\n powershell_code = <<~CODE\n- $encrypted_string = ConvertTo-SecureString \"#{password}\" -AsPlainText -Force\n- $secure_string = ConvertFrom-SecureString $encrypted_string\n- return $secure_string\n+ $AES = [System.Security.Cryptography.Aes]::Create()\n+ $key_temp = [System.Convert]::ToBase64String($AES.Key)\n+ $iv_temp = [System.Convert]::ToBase64String($AES.IV)\n+ $encryptor = $AES.CreateEncryptor()\n+ [System.Byte[]]$Bytes = [System.Text.Encoding]::Unicode.GetBytes(\"#{password}\")\n+ $EncryptedBytes = $encryptor.TransformFinalBlock($Bytes,0,$Bytes.Length)\n+ $EncryptedBase64String = [System.Convert]::ToBase64String($EncryptedBytes)\n+ # create array of encrypted pass, key, iv\n+ $password_blob = @($EncryptedBase64String, $key_temp, $iv_temp)\n+ return $password_blob\n CODE\n powershell_exec!(powershell_code).result\n end\n \n- def self.decrypt_pfx_pass(password)\n+ def self.decrypt_pfx_pass_with_vector(password_blob)\n+ raw_data = password_blob.map { |x| x[:data] }\n+ password = raw_data[0]\n+ key = raw_data[1]\n+ vector = raw_data[2]\n+\n+ powershell_code = <<~CODE\n+ $KeyBytes = [System.Convert]::FromBase64String(\"#{key}\")\n+ $IVBytes = [System.Convert]::FromBase64String(\"#{vector}\")\n+ $aes = [System.Security.Cryptography.Aes]::Create()\n+ $aes.Key = $KeyBytes\n+ $aes.IV = $IVBytes\n+ $EncryptedBytes = [System.Convert]::FromBase64String(\"#{password}\")\n+ $Decryptor = $aes.CreateDecryptor()\n+ $DecryptedBytes = $Decryptor.TransformFinalBlock($EncryptedBytes,0,$EncryptedBytes.Length)\n+ $DecryptedString = [System.Text.Encoding]::Unicode.GetString($DecryptedBytes)\n+ return $DecryptedString\n+ CODE\n+ results = powershell_exec!(powershell_code).result\n+ end\n+\n+ def self.decrypt_pfx_pass_with_password(password_blob)\n+ password = \"\"\n+ password_blob.each do |secret|\n+ password = secret[:data]\n+ end", - "comment_created_at": "2023-02-07T20:36:50+00:00", - "comment_author": "jaymzh", - "comment_body": "This used to be (the equivalent of):\r\n```ruby\r\npassword_blob.each do |secret|\r\n if secret[:name] == \"PfxPass\"\r\n password = secret[:data]\r\n break\r\n end\r\nend\r\n```\r\n\r\nYour code here will, I think, overwrite `password` with non-PfxPass", - "pr_file_module": null - }, - { - "comment_id": "1099616027", - "repo_full_name": "chef/chef", - "pr_number": 13552, - "pr_file": "lib/chef/http/authenticator.rb", - "discussion_id": "1099196196", - "commented_code": "@@ -167,46 +168,79 @@ def self.get_cert_password\n @win32registry = Chef::Win32::Registry.new\n path = \"HKEY_LOCAL_MACHINE\\\\Software\\\\Progress\\\\Authentication\"\n # does the registry key even exist?\n- present = @win32registry.get_values(path)\n- if present.nil? || present.empty?\n+ # password_blob should be an array of hashes\n+ password_blob = @win32registry.get_values(path)\n+ if password_blob.nil? || password_blob.empty?\n raise Chef::Exceptions::Win32RegKeyMissing\n end\n \n- present.each do |secret|\n- if secret[:name] == \"PfxPass\"\n- password = decrypt_pfx_pass(secret[:data])\n- return password\n+ # Did someone have just the password stored in the registry?\n+ raw_data = password_blob.map { |x| x[:data] }\n+ vector = raw_data[2]\n+ if !!vector\n+ decrypted_password = decrypt_pfx_pass_with_vector(password_blob)\n+ else\n+ decrypted_password = decrypt_pfx_pass_with_password(password_blob)\n+ if !!decrypted_password\n+ migrate_pass_to_use_vector\n+ else\n+ Chef::Log.error(\"Failed to retrieve certificate password\")\n end\n end\n-\n- raise Chef::Exceptions::Win32RegKeyMissing\n-\n+ decrypted_password\n rescue Chef::Exceptions::Win32RegKeyMissing\n # if we don't have a password, log that and generate one\n Chef::Log.warn \"Authentication Hive and values not present in registry, creating them now\"\n new_path = \"HKEY_LOCAL_MACHINE\\\\Software\\\\Progress\\\\Authentication\"\n unless @win32registry.key_exists?(new_path)\n @win32registry.create_key(new_path, true)\n end\n- require \"securerandom\" unless defined?(SecureRandom)\n- size = 14\n- password = SecureRandom.alphanumeric(size)\n- encrypted_pass = encrypt_pfx_pass(password)\n- values = { name: \"PfxPass\", type: :string, data: encrypted_pass }\n- @win32registry.set_value(new_path, values)\n+ password = create_and_store_new_password\n password\n end\n \n def self.encrypt_pfx_pass(password)\n powershell_code = <<~CODE\n- $encrypted_string = ConvertTo-SecureString \"#{password}\" -AsPlainText -Force\n- $secure_string = ConvertFrom-SecureString $encrypted_string\n- return $secure_string\n+ $AES = [System.Security.Cryptography.Aes]::Create()\n+ $key_temp = [System.Convert]::ToBase64String($AES.Key)\n+ $iv_temp = [System.Convert]::ToBase64String($AES.IV)\n+ $encryptor = $AES.CreateEncryptor()\n+ [System.Byte[]]$Bytes = [System.Text.Encoding]::Unicode.GetBytes(\"#{password}\")\n+ $EncryptedBytes = $encryptor.TransformFinalBlock($Bytes,0,$Bytes.Length)\n+ $EncryptedBase64String = [System.Convert]::ToBase64String($EncryptedBytes)\n+ # create array of encrypted pass, key, iv\n+ $password_blob = @($EncryptedBase64String, $key_temp, $iv_temp)\n+ return $password_blob\n CODE\n powershell_exec!(powershell_code).result\n end\n \n- def self.decrypt_pfx_pass(password)\n+ def self.decrypt_pfx_pass_with_vector(password_blob)\n+ raw_data = password_blob.map { |x| x[:data] }\n+ password = raw_data[0]\n+ key = raw_data[1]\n+ vector = raw_data[2]\n+\n+ powershell_code = <<~CODE\n+ $KeyBytes = [System.Convert]::FromBase64String(\"#{key}\")\n+ $IVBytes = [System.Convert]::FromBase64String(\"#{vector}\")\n+ $aes = [System.Security.Cryptography.Aes]::Create()\n+ $aes.Key = $KeyBytes\n+ $aes.IV = $IVBytes\n+ $EncryptedBytes = [System.Convert]::FromBase64String(\"#{password}\")\n+ $Decryptor = $aes.CreateDecryptor()\n+ $DecryptedBytes = $Decryptor.TransformFinalBlock($EncryptedBytes,0,$EncryptedBytes.Length)\n+ $DecryptedString = [System.Text.Encoding]::Unicode.GetString($DecryptedBytes)\n+ return $DecryptedString\n+ CODE\n+ results = powershell_exec!(powershell_code).result\n+ end\n+\n+ def self.decrypt_pfx_pass_with_password(password_blob)\n+ password = \"\"\n+ password_blob.each do |secret|\n+ password = secret[:data]\n+ end", - "comment_created_at": "2023-02-08T03:31:37+00:00", - "comment_author": "johnmccrae", - "comment_body": "For extra protection, I am using the old technique to ensure I am actually grabbing only the PfxPass detail", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1099199876", - "pr_number": 13552, - "pr_file": "lib/chef/http/authenticator.rb", - "created_at": "2023-02-07T20:38:46+00:00", - "commented_code": "powershell_exec!(powershell_code).result\n end\n\n def self.retrieve_certificate_key(client_name)\n require \"openssl\" unless defined?(OpenSSL)\n def self.migrate_pass_to_use_vector\n powershell_code = <<~CODE\n Remove-ItemProperty -Path \"HKLM:\\\\Software\\\\Progress\\\\Authentication\" -Name \"PfXPass\"\n CODE\n powershell_exec!(powershell_code)\n create_and_store_new_password\n end\n\n def self.create_and_store_new_password\n @win32registry = Chef::Win32::Registry.new\n path = \"HKEY_LOCAL_MACHINE\\\\Software\\\\Progress\\\\Authentication\"\n require \"securerandom\" unless defined?(SecureRandom)\n size = 14\n password = SecureRandom.alphanumeric(size)\n encrypted_blob = encrypt_pfx_pass(password)", - "repo_full_name": "chef/chef", - "discussion_comments": [ - { - "comment_id": "1099199876", - "repo_full_name": "chef/chef", - "pr_number": 13552, - "pr_file": "lib/chef/http/authenticator.rb", - "discussion_id": "1099199876", - "commented_code": "@@ -215,10 +249,38 @@ def self.decrypt_pfx_pass(password)\n powershell_exec!(powershell_code).result\n end\n \n- def self.retrieve_certificate_key(client_name)\n- require \"openssl\" unless defined?(OpenSSL)\n+ def self.migrate_pass_to_use_vector\n+ powershell_code = <<~CODE\n+ Remove-ItemProperty -Path \"HKLM:\\\\Software\\\\Progress\\\\Authentication\" -Name \"PfXPass\"\n+ CODE\n+ powershell_exec!(powershell_code)\n+ create_and_store_new_password\n+ end\n \n+ def self.create_and_store_new_password\n+ @win32registry = Chef::Win32::Registry.new\n+ path = \"HKEY_LOCAL_MACHINE\\\\Software\\\\Progress\\\\Authentication\"\n+ require \"securerandom\" unless defined?(SecureRandom)\n+ size = 14\n+ password = SecureRandom.alphanumeric(size)\n+ encrypted_blob = encrypt_pfx_pass(password)", - "comment_created_at": "2023-02-07T20:38:46+00:00", - "comment_author": "jaymzh", - "comment_body": "Shouldn't we want to use the same password here from when it was an encrypted string? Isn't that what the key is encrypted with? If we make a new one, we just break the users' ability to use their key? Perhaps I'm mis-reading something, but it feels like we should just properly encrypted the existing password.", - "pr_file_module": null - }, - { - "comment_id": "1099679312", - "repo_full_name": "chef/chef", - "pr_number": 13552, - "pr_file": "lib/chef/http/authenticator.rb", - "discussion_id": "1099199876", - "commented_code": "@@ -215,10 +249,38 @@ def self.decrypt_pfx_pass(password)\n powershell_exec!(powershell_code).result\n end\n \n- def self.retrieve_certificate_key(client_name)\n- require \"openssl\" unless defined?(OpenSSL)\n+ def self.migrate_pass_to_use_vector\n+ powershell_code = <<~CODE\n+ Remove-ItemProperty -Path \"HKLM:\\\\Software\\\\Progress\\\\Authentication\" -Name \"PfXPass\"\n+ CODE\n+ powershell_exec!(powershell_code)\n+ create_and_store_new_password\n+ end\n \n+ def self.create_and_store_new_password\n+ @win32registry = Chef::Win32::Registry.new\n+ path = \"HKEY_LOCAL_MACHINE\\\\Software\\\\Progress\\\\Authentication\"\n+ require \"securerandom\" unless defined?(SecureRandom)\n+ size = 14\n+ password = SecureRandom.alphanumeric(size)\n+ encrypted_blob = encrypt_pfx_pass(password)", - "comment_created_at": "2023-02-08T05:41:15+00:00", - "comment_author": "johnmccrae", - "comment_body": "The problem I am having here is that the old password was created using ConvertTo-SecureString which is user dependent (since no AES Key is used, it uses DPAPI which is user specific). I'll ensure that there are no paths that can lead to a busted password.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "823233225", - "pr_number": 12640, - "pr_file": "lib/chef/client.rb", - "created_at": "2022-03-10T00:44:59+00:00", - "commented_code": "raise\n end\n\n #\n # In the brave new world of No Certs On Disk, we want to put the pem file into Keychain or the Certstore\n # But is it already there?\n def check_certstore_for_key(cert_name)\n require \"win32-certstore\"\n win32certstore = ::Win32::Certstore.open(\"MY\")\n win32certstore.search(\"#{cert_name}\")\n end\n\n def generate_pfx_package(cert_name, date = nil)\n require_relative \"mixin/powershell_exec\"\n extend Chef::Mixin::PowershellExec\n ::Chef::HTTP::Authenticator.get_cert_password\n powershell_code = <<~EOH\n\n $date = \"#{date}\"\n\n $certSplat = @{\n Subject = \"#{cert_name}\"\n KeyExportPolicy = 'Exportable'\n KeyUsage = @('KeyEncipherment','DigitalSignature')\n CertStoreLocation = 'Cert:\\\\LocalMachine\\\\My'\n TextExtension = @(\"2.5.29.37={text}1.3.6.1.5.5.7.3.2,1.3.6.1.5.5.7.3.1\")\n };\n if ([string]$date -as [DateTime]){\n $certSplat.add('NotAfter', $date)\n }\n\n New-SelfSignedCertificate @certSplat;\n EOH\n powershell_exec!(powershell_code)\n end\n\n def move_key_and_register(cert_name)\n require \"time\" unless defined?(Time)\n autoload :URI, \"uri\"\n\n KeyMigration.instance.key_migrated = true\n\n node = Chef::Config[:node_name]\n d = Time.now\n end_date = Time.new(d.year, d.month + 3, d.day, d.hour, d.min, d.sec).utc.iso8601\n\n payload = {\n name: node,\n clientname: node,\n public_key: \"\",\n expiration_date: end_date,\n }\n\n generate_pfx_package(cert_name, end_date)\n payload[:public_key] = get_public_key(cert_name)\n base_url = \"#{Chef::Config[:chef_server_url]}\"\n client = Chef::ServerAPI.new(base_url, client_name: Chef::Config[:validation_client_name], signing_key_filename: Chef::Config[:validation_key])\n client.post(base_url + \"/clients\", payload)\n KeyMigration.instance.key_migrated = false", - "repo_full_name": "chef/chef", - "discussion_comments": [ - { - "comment_id": "823233225", - "repo_full_name": "chef/chef", - "pr_number": 12640, - "pr_file": "lib/chef/client.rb", - "discussion_id": "823233225", - "commented_code": "@@ -665,6 +683,83 @@ def register(client_name = node_name, config = Chef::Config)\n raise\n end\n \n+ #\n+ # In the brave new world of No Certs On Disk, we want to put the pem file into Keychain or the Certstore\n+ # But is it already there?\n+ def check_certstore_for_key(cert_name)\n+ require \"win32-certstore\"\n+ win32certstore = ::Win32::Certstore.open(\"MY\")\n+ win32certstore.search(\"#{cert_name}\")\n+ end\n+\n+ def generate_pfx_package(cert_name, date = nil)\n+ require_relative \"mixin/powershell_exec\"\n+ extend Chef::Mixin::PowershellExec\n+ ::Chef::HTTP::Authenticator.get_cert_password\n+ powershell_code = <<~EOH\n+\n+ $date = \"#{date}\"\n+\n+ $certSplat = @{\n+ Subject = \"#{cert_name}\"\n+ KeyExportPolicy = 'Exportable'\n+ KeyUsage = @('KeyEncipherment','DigitalSignature')\n+ CertStoreLocation = 'Cert:\\\\LocalMachine\\\\My'\n+ TextExtension = @(\"2.5.29.37={text}1.3.6.1.5.5.7.3.2,1.3.6.1.5.5.7.3.1\")\n+ };\n+ if ([string]$date -as [DateTime]){\n+ $certSplat.add('NotAfter', $date)\n+ }\n+\n+ New-SelfSignedCertificate @certSplat;\n+ EOH\n+ powershell_exec!(powershell_code)\n+ end\n+\n+ def move_key_and_register(cert_name)\n+ require \"time\" unless defined?(Time)\n+ autoload :URI, \"uri\"\n+\n+ KeyMigration.instance.key_migrated = true\n+\n+ node = Chef::Config[:node_name]\n+ d = Time.now\n+ end_date = Time.new(d.year, d.month + 3, d.day, d.hour, d.min, d.sec).utc.iso8601\n+\n+ payload = {\n+ name: node,\n+ clientname: node,\n+ public_key: \"\",\n+ expiration_date: end_date,\n+ }\n+\n+ generate_pfx_package(cert_name, end_date)\n+ payload[:public_key] = get_public_key(cert_name)\n+ base_url = \"#{Chef::Config[:chef_server_url]}\"\n+ client = Chef::ServerAPI.new(base_url, client_name: Chef::Config[:validation_client_name], signing_key_filename: Chef::Config[:validation_key])\n+ client.post(base_url + \"/clients\", payload)\n+ KeyMigration.instance.key_migrated = false", - "comment_created_at": "2022-03-10T00:44:59+00:00", - "comment_author": "mwrock", - "comment_body": "Not clear why you are setting to `false`. Didn't the key just get migrated?", - "pr_file_module": null - }, - { - "comment_id": "826446240", - "repo_full_name": "chef/chef", - "pr_number": 12640, - "pr_file": "lib/chef/client.rb", - "discussion_id": "823233225", - "commented_code": "@@ -665,6 +683,83 @@ def register(client_name = node_name, config = Chef::Config)\n raise\n end\n \n+ #\n+ # In the brave new world of No Certs On Disk, we want to put the pem file into Keychain or the Certstore\n+ # But is it already there?\n+ def check_certstore_for_key(cert_name)\n+ require \"win32-certstore\"\n+ win32certstore = ::Win32::Certstore.open(\"MY\")\n+ win32certstore.search(\"#{cert_name}\")\n+ end\n+\n+ def generate_pfx_package(cert_name, date = nil)\n+ require_relative \"mixin/powershell_exec\"\n+ extend Chef::Mixin::PowershellExec\n+ ::Chef::HTTP::Authenticator.get_cert_password\n+ powershell_code = <<~EOH\n+\n+ $date = \"#{date}\"\n+\n+ $certSplat = @{\n+ Subject = \"#{cert_name}\"\n+ KeyExportPolicy = 'Exportable'\n+ KeyUsage = @('KeyEncipherment','DigitalSignature')\n+ CertStoreLocation = 'Cert:\\\\LocalMachine\\\\My'\n+ TextExtension = @(\"2.5.29.37={text}1.3.6.1.5.5.7.3.2,1.3.6.1.5.5.7.3.1\")\n+ };\n+ if ([string]$date -as [DateTime]){\n+ $certSplat.add('NotAfter', $date)\n+ }\n+\n+ New-SelfSignedCertificate @certSplat;\n+ EOH\n+ powershell_exec!(powershell_code)\n+ end\n+\n+ def move_key_and_register(cert_name)\n+ require \"time\" unless defined?(Time)\n+ autoload :URI, \"uri\"\n+\n+ KeyMigration.instance.key_migrated = true\n+\n+ node = Chef::Config[:node_name]\n+ d = Time.now\n+ end_date = Time.new(d.year, d.month + 3, d.day, d.hour, d.min, d.sec).utc.iso8601\n+\n+ payload = {\n+ name: node,\n+ clientname: node,\n+ public_key: \"\",\n+ expiration_date: end_date,\n+ }\n+\n+ generate_pfx_package(cert_name, end_date)\n+ payload[:public_key] = get_public_key(cert_name)\n+ base_url = \"#{Chef::Config[:chef_server_url]}\"\n+ client = Chef::ServerAPI.new(base_url, client_name: Chef::Config[:validation_client_name], signing_key_filename: Chef::Config[:validation_key])\n+ client.post(base_url + \"/clients\", payload)\n+ KeyMigration.instance.key_migrated = false", - "comment_created_at": "2022-03-14T22:59:06+00:00", - "comment_author": "johnmccrae", - "comment_body": "So what happens at the top of that method is that I set the flag Keymigration to True - Line 739 executes and calls code in Authenticator.rb to load the pem file needed to build the headers for the client object. In this case, I need to validator so code in authenticator sees the Keymigraiton flag and hands me back the validator pem. On line 740 I update the chef server with the new key for the client. I now need the chef client to connect with the new cert I stuffed into the Certstore (line 738 generate_pfx_package) so I set the Keymigraiton flag back to false to prevent the Load_Signing_key code in authenticator from handing me back the validator pem when the client goes on to run a CCR", - "pr_file_module": null - }, - { - "comment_id": "827295020", - "repo_full_name": "chef/chef", - "pr_number": 12640, - "pr_file": "lib/chef/client.rb", - "discussion_id": "823233225", - "commented_code": "@@ -665,6 +683,83 @@ def register(client_name = node_name, config = Chef::Config)\n raise\n end\n \n+ #\n+ # In the brave new world of No Certs On Disk, we want to put the pem file into Keychain or the Certstore\n+ # But is it already there?\n+ def check_certstore_for_key(cert_name)\n+ require \"win32-certstore\"\n+ win32certstore = ::Win32::Certstore.open(\"MY\")\n+ win32certstore.search(\"#{cert_name}\")\n+ end\n+\n+ def generate_pfx_package(cert_name, date = nil)\n+ require_relative \"mixin/powershell_exec\"\n+ extend Chef::Mixin::PowershellExec\n+ ::Chef::HTTP::Authenticator.get_cert_password\n+ powershell_code = <<~EOH\n+\n+ $date = \"#{date}\"\n+\n+ $certSplat = @{\n+ Subject = \"#{cert_name}\"\n+ KeyExportPolicy = 'Exportable'\n+ KeyUsage = @('KeyEncipherment','DigitalSignature')\n+ CertStoreLocation = 'Cert:\\\\LocalMachine\\\\My'\n+ TextExtension = @(\"2.5.29.37={text}1.3.6.1.5.5.7.3.2,1.3.6.1.5.5.7.3.1\")\n+ };\n+ if ([string]$date -as [DateTime]){\n+ $certSplat.add('NotAfter', $date)\n+ }\n+\n+ New-SelfSignedCertificate @certSplat;\n+ EOH\n+ powershell_exec!(powershell_code)\n+ end\n+\n+ def move_key_and_register(cert_name)\n+ require \"time\" unless defined?(Time)\n+ autoload :URI, \"uri\"\n+\n+ KeyMigration.instance.key_migrated = true\n+\n+ node = Chef::Config[:node_name]\n+ d = Time.now\n+ end_date = Time.new(d.year, d.month + 3, d.day, d.hour, d.min, d.sec).utc.iso8601\n+\n+ payload = {\n+ name: node,\n+ clientname: node,\n+ public_key: \"\",\n+ expiration_date: end_date,\n+ }\n+\n+ generate_pfx_package(cert_name, end_date)\n+ payload[:public_key] = get_public_key(cert_name)\n+ base_url = \"#{Chef::Config[:chef_server_url]}\"\n+ client = Chef::ServerAPI.new(base_url, client_name: Chef::Config[:validation_client_name], signing_key_filename: Chef::Config[:validation_key])\n+ client.post(base_url + \"/clients\", payload)\n+ KeyMigration.instance.key_migrated = false", - "comment_created_at": "2022-03-15T18:31:18+00:00", - "comment_author": "mwrock", - "comment_body": "I'd suggest not storing the new key in `Cert:\\\\LocalMachine\\\\My` when you first create it. I'd either create one with openssl to avoid the certstore or store in current user. Then after the client post, migrate the key to `Cert:\\\\LocalMachine\\\\My`", - "pr_file_module": null - }, - { - "comment_id": "827296110", - "repo_full_name": "chef/chef", - "pr_number": 12640, - "pr_file": "lib/chef/client.rb", - "discussion_id": "823233225", - "commented_code": "@@ -665,6 +683,83 @@ def register(client_name = node_name, config = Chef::Config)\n raise\n end\n \n+ #\n+ # In the brave new world of No Certs On Disk, we want to put the pem file into Keychain or the Certstore\n+ # But is it already there?\n+ def check_certstore_for_key(cert_name)\n+ require \"win32-certstore\"\n+ win32certstore = ::Win32::Certstore.open(\"MY\")\n+ win32certstore.search(\"#{cert_name}\")\n+ end\n+\n+ def generate_pfx_package(cert_name, date = nil)\n+ require_relative \"mixin/powershell_exec\"\n+ extend Chef::Mixin::PowershellExec\n+ ::Chef::HTTP::Authenticator.get_cert_password\n+ powershell_code = <<~EOH\n+\n+ $date = \"#{date}\"\n+\n+ $certSplat = @{\n+ Subject = \"#{cert_name}\"\n+ KeyExportPolicy = 'Exportable'\n+ KeyUsage = @('KeyEncipherment','DigitalSignature')\n+ CertStoreLocation = 'Cert:\\\\LocalMachine\\\\My'\n+ TextExtension = @(\"2.5.29.37={text}1.3.6.1.5.5.7.3.2,1.3.6.1.5.5.7.3.1\")\n+ };\n+ if ([string]$date -as [DateTime]){\n+ $certSplat.add('NotAfter', $date)\n+ }\n+\n+ New-SelfSignedCertificate @certSplat;\n+ EOH\n+ powershell_exec!(powershell_code)\n+ end\n+\n+ def move_key_and_register(cert_name)\n+ require \"time\" unless defined?(Time)\n+ autoload :URI, \"uri\"\n+\n+ KeyMigration.instance.key_migrated = true\n+\n+ node = Chef::Config[:node_name]\n+ d = Time.now\n+ end_date = Time.new(d.year, d.month + 3, d.day, d.hour, d.min, d.sec).utc.iso8601\n+\n+ payload = {\n+ name: node,\n+ clientname: node,\n+ public_key: \"\",\n+ expiration_date: end_date,\n+ }\n+\n+ generate_pfx_package(cert_name, end_date)\n+ payload[:public_key] = get_public_key(cert_name)\n+ base_url = \"#{Chef::Config[:chef_server_url]}\"\n+ client = Chef::ServerAPI.new(base_url, client_name: Chef::Config[:validation_client_name], signing_key_filename: Chef::Config[:validation_key])\n+ client.post(base_url + \"/clients\", payload)\n+ KeyMigration.instance.key_migrated = false", - "comment_created_at": "2022-03-15T18:32:38+00:00", - "comment_author": "mwrock", - "comment_body": "This would eliminate the need for a static `key_migrated` flag which feels fragile and confusing.", - "pr_file_module": null - }, - { - "comment_id": "831624108", - "repo_full_name": "chef/chef", - "pr_number": 12640, - "pr_file": "lib/chef/client.rb", - "discussion_id": "823233225", - "commented_code": "@@ -665,6 +683,83 @@ def register(client_name = node_name, config = Chef::Config)\n raise\n end\n \n+ #\n+ # In the brave new world of No Certs On Disk, we want to put the pem file into Keychain or the Certstore\n+ # But is it already there?\n+ def check_certstore_for_key(cert_name)\n+ require \"win32-certstore\"\n+ win32certstore = ::Win32::Certstore.open(\"MY\")\n+ win32certstore.search(\"#{cert_name}\")\n+ end\n+\n+ def generate_pfx_package(cert_name, date = nil)\n+ require_relative \"mixin/powershell_exec\"\n+ extend Chef::Mixin::PowershellExec\n+ ::Chef::HTTP::Authenticator.get_cert_password\n+ powershell_code = <<~EOH\n+\n+ $date = \"#{date}\"\n+\n+ $certSplat = @{\n+ Subject = \"#{cert_name}\"\n+ KeyExportPolicy = 'Exportable'\n+ KeyUsage = @('KeyEncipherment','DigitalSignature')\n+ CertStoreLocation = 'Cert:\\\\LocalMachine\\\\My'\n+ TextExtension = @(\"2.5.29.37={text}1.3.6.1.5.5.7.3.2,1.3.6.1.5.5.7.3.1\")\n+ };\n+ if ([string]$date -as [DateTime]){\n+ $certSplat.add('NotAfter', $date)\n+ }\n+\n+ New-SelfSignedCertificate @certSplat;\n+ EOH\n+ powershell_exec!(powershell_code)\n+ end\n+\n+ def move_key_and_register(cert_name)\n+ require \"time\" unless defined?(Time)\n+ autoload :URI, \"uri\"\n+\n+ KeyMigration.instance.key_migrated = true\n+\n+ node = Chef::Config[:node_name]\n+ d = Time.now\n+ end_date = Time.new(d.year, d.month + 3, d.day, d.hour, d.min, d.sec).utc.iso8601\n+\n+ payload = {\n+ name: node,\n+ clientname: node,\n+ public_key: \"\",\n+ expiration_date: end_date,\n+ }\n+\n+ generate_pfx_package(cert_name, end_date)\n+ payload[:public_key] = get_public_key(cert_name)\n+ base_url = \"#{Chef::Config[:chef_server_url]}\"\n+ client = Chef::ServerAPI.new(base_url, client_name: Chef::Config[:validation_client_name], signing_key_filename: Chef::Config[:validation_key])\n+ client.post(base_url + \"/clients\", payload)\n+ KeyMigration.instance.key_migrated = false", - "comment_created_at": "2022-03-21T23:34:03+00:00", - "comment_author": "johnmccrae", - "comment_body": "This makes a lot more sense. ", - "pr_file_module": null - }, - { - "comment_id": "833469005", - "repo_full_name": "chef/chef", - "pr_number": 12640, - "pr_file": "lib/chef/client.rb", - "discussion_id": "823233225", - "commented_code": "@@ -665,6 +683,83 @@ def register(client_name = node_name, config = Chef::Config)\n raise\n end\n \n+ #\n+ # In the brave new world of No Certs On Disk, we want to put the pem file into Keychain or the Certstore\n+ # But is it already there?\n+ def check_certstore_for_key(cert_name)\n+ require \"win32-certstore\"\n+ win32certstore = ::Win32::Certstore.open(\"MY\")\n+ win32certstore.search(\"#{cert_name}\")\n+ end\n+\n+ def generate_pfx_package(cert_name, date = nil)\n+ require_relative \"mixin/powershell_exec\"\n+ extend Chef::Mixin::PowershellExec\n+ ::Chef::HTTP::Authenticator.get_cert_password\n+ powershell_code = <<~EOH\n+\n+ $date = \"#{date}\"\n+\n+ $certSplat = @{\n+ Subject = \"#{cert_name}\"\n+ KeyExportPolicy = 'Exportable'\n+ KeyUsage = @('KeyEncipherment','DigitalSignature')\n+ CertStoreLocation = 'Cert:\\\\LocalMachine\\\\My'\n+ TextExtension = @(\"2.5.29.37={text}1.3.6.1.5.5.7.3.2,1.3.6.1.5.5.7.3.1\")\n+ };\n+ if ([string]$date -as [DateTime]){\n+ $certSplat.add('NotAfter', $date)\n+ }\n+\n+ New-SelfSignedCertificate @certSplat;\n+ EOH\n+ powershell_exec!(powershell_code)\n+ end\n+\n+ def move_key_and_register(cert_name)\n+ require \"time\" unless defined?(Time)\n+ autoload :URI, \"uri\"\n+\n+ KeyMigration.instance.key_migrated = true\n+\n+ node = Chef::Config[:node_name]\n+ d = Time.now\n+ end_date = Time.new(d.year, d.month + 3, d.day, d.hour, d.min, d.sec).utc.iso8601\n+\n+ payload = {\n+ name: node,\n+ clientname: node,\n+ public_key: \"\",\n+ expiration_date: end_date,\n+ }\n+\n+ generate_pfx_package(cert_name, end_date)\n+ payload[:public_key] = get_public_key(cert_name)\n+ base_url = \"#{Chef::Config[:chef_server_url]}\"\n+ client = Chef::ServerAPI.new(base_url, client_name: Chef::Config[:validation_client_name], signing_key_filename: Chef::Config[:validation_key])\n+ client.post(base_url + \"/clients\", payload)\n+ KeyMigration.instance.key_migrated = false", - "comment_created_at": "2022-03-23T16:22:04+00:00", - "comment_author": "johnmccrae", - "comment_body": "Done and working and it makes we more sense now and I got rid of the Singleton class at the top too ", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "827353584", - "pr_number": 12640, - "pr_file": "lib/chef/http/authenticator.rb", - "created_at": "2022-03-15T19:48:11+00:00", - "commented_code": "present.each do |secret|\n if secret[:name] == \"PfxPass\"\n return secret[:data]\n password = decrypt_pfx_pass(secret[:data])\n return password\n end\n end\n\n # if we make it this far, that means there is no valid password in the Registry. Fail out to correct that.\n raise Chef::Exceptions::Win32RegKeyMissing\n\n rescue Chef::Exceptions::Win32RegKeyMissing\n # if we don't have a password, log that and generate one\n Chef::Log.warn \"Authentication Hive and value not present in registry, creating it now\"\n Chef::Log.warn \"Authentication Hive and values not present in registry, creating them now\"\n new_path = \"HKEY_LOCAL_MACHINE\\\\Software\\\\Progress\\\\Authentication\"\n unless @win32registry.key_exists?(new_path)\n @win32registry.create_key(new_path, true)\n end\n password = SOME_CHARS.sample(1 + rand(SOME_CHARS.count)).join[0...14]\n values = { name: \"PfxPass\", type: :string, data: password }\n encrypted_pass = encrypt_pfx_pass(password)\n values = { name: \"PfxPass\", type: :string, data: encrypted_pass }\n @win32registry.set_value(new_path, values)\n password\n end\n\n def self.encrypt_pfx_pass(password)\n powershell_code = <<~CODE", - "repo_full_name": "chef/chef", - "discussion_comments": [ - { - "comment_id": "827353584", - "repo_full_name": "chef/chef", - "pr_number": 12640, - "pr_file": "lib/chef/http/authenticator.rb", - "discussion_id": "827353584", - "commented_code": "@@ -166,60 +178,85 @@ def self.get_cert_password\n \n present.each do |secret|\n if secret[:name] == \"PfxPass\"\n- return secret[:data]\n+ password = decrypt_pfx_pass(secret[:data])\n+ return password\n end\n end\n \n- # if we make it this far, that means there is no valid password in the Registry. Fail out to correct that.\n raise Chef::Exceptions::Win32RegKeyMissing\n \n rescue Chef::Exceptions::Win32RegKeyMissing\n # if we don't have a password, log that and generate one\n- Chef::Log.warn \"Authentication Hive and value not present in registry, creating it now\"\n+ Chef::Log.warn \"Authentication Hive and values not present in registry, creating them now\"\n new_path = \"HKEY_LOCAL_MACHINE\\\\Software\\\\Progress\\\\Authentication\"\n unless @win32registry.key_exists?(new_path)\n @win32registry.create_key(new_path, true)\n end\n password = SOME_CHARS.sample(1 + rand(SOME_CHARS.count)).join[0...14]\n- values = { name: \"PfxPass\", type: :string, data: password }\n+ encrypted_pass = encrypt_pfx_pass(password)\n+ values = { name: \"PfxPass\", type: :string, data: encrypted_pass }\n @win32registry.set_value(new_path, values)\n password\n end\n \n+ def self.encrypt_pfx_pass(password)\n+ powershell_code = <<~CODE", - "comment_created_at": "2022-03-15T19:48:11+00:00", - "comment_author": "mwrock", - "comment_body": "I'd use `Chef::ReservedNames::Win32::Crypto.encrypt(str)` for this. You would want to add a `decrypt` method.", - "pr_file_module": null - }, - { - "comment_id": "833549896", - "repo_full_name": "chef/chef", - "pr_number": 12640, - "pr_file": "lib/chef/http/authenticator.rb", - "discussion_id": "827353584", - "commented_code": "@@ -166,60 +178,85 @@ def self.get_cert_password\n \n present.each do |secret|\n if secret[:name] == \"PfxPass\"\n- return secret[:data]\n+ password = decrypt_pfx_pass(secret[:data])\n+ return password\n end\n end\n \n- # if we make it this far, that means there is no valid password in the Registry. Fail out to correct that.\n raise Chef::Exceptions::Win32RegKeyMissing\n \n rescue Chef::Exceptions::Win32RegKeyMissing\n # if we don't have a password, log that and generate one\n- Chef::Log.warn \"Authentication Hive and value not present in registry, creating it now\"\n+ Chef::Log.warn \"Authentication Hive and values not present in registry, creating them now\"\n new_path = \"HKEY_LOCAL_MACHINE\\\\Software\\\\Progress\\\\Authentication\"\n unless @win32registry.key_exists?(new_path)\n @win32registry.create_key(new_path, true)\n end\n password = SOME_CHARS.sample(1 + rand(SOME_CHARS.count)).join[0...14]\n- values = { name: \"PfxPass\", type: :string, data: password }\n+ encrypted_pass = encrypt_pfx_pass(password)\n+ values = { name: \"PfxPass\", type: :string, data: encrypted_pass }\n @win32registry.set_value(new_path, values)\n password\n end\n \n+ def self.encrypt_pfx_pass(password)\n+ powershell_code = <<~CODE", - "comment_created_at": "2022-03-23T17:37:35+00:00", - "comment_author": "johnmccrae", - "comment_body": "As discussed, I am punting for now since the ReservedNames code is interchangeable with the PowerShell approach. Taking a backlog item to do that work", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/chef-secure-network-operations.json b/_reviewers/chef-secure-network-operations.json new file mode 100644 index 0000000..5c8cc9d --- /dev/null +++ b/_reviewers/chef-secure-network-operations.json @@ -0,0 +1,162 @@ +[ + { + "discussion_id": "1369668732", + "pr_number": 14043, + "pr_file": "lib/chef/resource/chocolatey_installer.rb", + "created_at": "2023-10-24T06:12:24+00:00", + "commented_code": "class Chef\n class Resource\n class ChocolateyInstaller < Chef::Resource\n provides :chocolatey_installer\n\n description \"Use the Chocolatey Installer resource to ensure that Choco is installed to your specification. Use the Chocolatey Feature resource to customize your install\"\n introduced \"18.3\"\n examples <<~DOC\n **Install Chocolatey**\n\n ```ruby\n chocolatey_installer 'latest' do\n action :install\n end\n ```\n\n **Uninstall Chocolatey**\n\n ```ruby\n chocolatey_installer 'Some random verbiage' do\n action :uninstall\n end\n ```\n\n **Install Chocolatey with Parameters**\n\n ```ruby\n chocolatey_installer 'latest' do\n action :install\n download_url \"https://www.contoso.com/foo\"\n chocolatey_version '2.12.24'\n end\n ```\n\n **Upgrade Chocolatey with Parameters**\n\n ```ruby\n chocolatey_installer 'latest' do\n action :upgrade\n chocolatey_version '2.12.24'\n end\n ```\n DOC\n\n allowed_actions :install, :uninstall, :upgrade\n\n property :download_url, String,\n description: \"The URL to download Chocolatey from. This defaults to the value of $env:ChocolateyDownloadUrl, if it is set, and otherwise falls back to the official Chocolatey community repository to download the Chocolatey package. It can be used for offline installation by providing a path to a Chocolatey.nupkg.\"\n\n property :chocolatey_version, String,\n description: \"Specifies a target version of Chocolatey to install. By default, the latest stable version is installed. This will use the value in $env:ChocolateyVersion by default, if that environment variable is present. This parameter is ignored if download_url is set.\"\n\n property :use_native_unzip, [TrueClass, FalseClass], default: false,\n description: \"If set, uses built-in Windows decompression tools instead of 7zip when unpacking the downloaded nupkg. This will be set by default if use_native_unzip is set to a value other than 'false' or '0'. This parameter will be ignored in PS 5+ in favour of using the Expand-Archive built in PowerShell cmdlet directly.\"\n\n property :ignore_proxy, [TrueClass, FalseClass], default: false,\n description: \"If set, ignores any configured proxy. This will override any proxy environment variables or parameters. This will be set by default if ignore_proxy is set to a value other than 'false' or '0'.\"\n\n property :proxy_url, String,\n description: \"Specifies the proxy URL to use during the download.\"\n\n property :proxy_user, String,\n description: \"The username to use to build a proxy credential with. Will be consumed by the proxy_credential property if both this property and proxy_password are set\"\n\n property :proxy_password, String,\n description: \"The password to use to build a proxy credential with. Will be consumed by the proxy_credential property if both this property and proxy_user are set\"\n\n load_current_value do\n current_state = is_choco_installed?\n current_value_does_not_exist! if current_state == false\n current_state\n end\n\n def is_choco_installed?\n ::File.exist?(\"#{ENV[\"ALLUSERSPROFILE\"]}\\\\chocolatey\\\\bin\\\\choco.exe\")\n end\n\n def get_choco_version\n powershell_exec(\"choco --version\").result\n end\n\n def existing_version\n Gem::Version.new(get_choco_version)\n end\n\n def define_resource_requirements\n [ new_resource.proxy_user, new_resource.proxy_password ].each do\n requirements.assert(:install, :upgrade).each do |a|\n a.assertion do\n (!new_resource.proxy_user.nil? && new_resource.proxy_password.nil?) || (new_resource.proxy_user.nil? && !new_resource.proxy_password.nil?)\n end\n a.failure_message(Chef::Exceptions::ValidationFailed, \"You must specify both a proxy_user and a proxy_password\")\n a.whyrun(\"Assuming that if you have configured a 'proxy_user' you must also supply a 'proxy_password'\")\n end\n end\n end\n\n action :install, description: \"Installs Chocolatey package manager\" do\n unless new_resource.download_url.nil?\n powershell_exec(\"Set-Item -path env:ChocolateyDownloadUrl -Value #{new_resource.download_url}\")\n end\n\n unless new_resource.chocolatey_version.nil?\n powershell_exec(\"Set-Item -path env:ChocolateyVersion -Value #{new_resource.chocolatey_version}\")\n end\n\n if new_resource.use_native_unzip == true\n powershell_exec(\"Set-Item -path env:ChocolateyUseWindowsCompression -Value true\")\n end\n\n if new_resource.ignore_proxy == true\n powershell_exec(\"Set-Item -path env:ChocolateyIgnoreProxy -Value true\")\n end\n\n unless new_resource.proxy_url.nil?\n powershell_exec(\"Set-Item -path env:ChocolateyProxyLocation -Value #{new_resource.proxy_url}\")\n end\n\n unless !new_resource.proxy_user.nil? && !new_resource.proxy_password.nil?\n powershell_exec(\"Set-Item -path env:ChocolateyProxyUser -Value #{new_resource.proxy_user}; Set-Item -path env:ChocolateyProxyPassword -Value #{new_resource.proxy_password}\")\n end\n\n converge_if_changed do\n powershell_exec(\"iex ((New-Object System.Net.WebClient).DownloadString('https://chocolatey.org/install.ps1'))\").error!", + "repo_full_name": "chef/chef", + "discussion_comments": [ + { + "comment_id": "1369668732", + "repo_full_name": "chef/chef", + "pr_number": 14043, + "pr_file": "lib/chef/resource/chocolatey_installer.rb", + "discussion_id": "1369668732", + "commented_code": "@@ -0,0 +1,198 @@\n+class Chef\n+ class Resource\n+ class ChocolateyInstaller < Chef::Resource\n+ provides :chocolatey_installer\n+\n+ description \"Use the Chocolatey Installer resource to ensure that Choco is installed to your specification. Use the Chocolatey Feature resource to customize your install\"\n+ introduced \"18.3\"\n+ examples <<~DOC\n+ **Install Chocolatey**\n+\n+ ```ruby\n+ chocolatey_installer 'latest' do\n+ action :install\n+ end\n+ ```\n+\n+ **Uninstall Chocolatey**\n+\n+ ```ruby\n+ chocolatey_installer 'Some random verbiage' do\n+ action :uninstall\n+ end\n+ ```\n+\n+ **Install Chocolatey with Parameters**\n+\n+ ```ruby\n+ chocolatey_installer 'latest' do\n+ action :install\n+ download_url \"https://www.contoso.com/foo\"\n+ chocolatey_version '2.12.24'\n+ end\n+ ```\n+\n+ **Upgrade Chocolatey with Parameters**\n+\n+ ```ruby\n+ chocolatey_installer 'latest' do\n+ action :upgrade\n+ chocolatey_version '2.12.24'\n+ end\n+ ```\n+ DOC\n+\n+ allowed_actions :install, :uninstall, :upgrade\n+\n+ property :download_url, String,\n+ description: \"The URL to download Chocolatey from. This defaults to the value of $env:ChocolateyDownloadUrl, if it is set, and otherwise falls back to the official Chocolatey community repository to download the Chocolatey package. It can be used for offline installation by providing a path to a Chocolatey.nupkg.\"\n+\n+ property :chocolatey_version, String,\n+ description: \"Specifies a target version of Chocolatey to install. By default, the latest stable version is installed. This will use the value in $env:ChocolateyVersion by default, if that environment variable is present. This parameter is ignored if download_url is set.\"\n+\n+ property :use_native_unzip, [TrueClass, FalseClass], default: false,\n+ description: \"If set, uses built-in Windows decompression tools instead of 7zip when unpacking the downloaded nupkg. This will be set by default if use_native_unzip is set to a value other than 'false' or '0'. This parameter will be ignored in PS 5+ in favour of using the Expand-Archive built in PowerShell cmdlet directly.\"\n+\n+ property :ignore_proxy, [TrueClass, FalseClass], default: false,\n+ description: \"If set, ignores any configured proxy. This will override any proxy environment variables or parameters. This will be set by default if ignore_proxy is set to a value other than 'false' or '0'.\"\n+\n+ property :proxy_url, String,\n+ description: \"Specifies the proxy URL to use during the download.\"\n+\n+ property :proxy_user, String,\n+ description: \"The username to use to build a proxy credential with. Will be consumed by the proxy_credential property if both this property and proxy_password are set\"\n+\n+ property :proxy_password, String,\n+ description: \"The password to use to build a proxy credential with. Will be consumed by the proxy_credential property if both this property and proxy_user are set\"\n+\n+ load_current_value do\n+ current_state = is_choco_installed?\n+ current_value_does_not_exist! if current_state == false\n+ current_state\n+ end\n+\n+ def is_choco_installed?\n+ ::File.exist?(\"#{ENV[\"ALLUSERSPROFILE\"]}\\\\chocolatey\\\\bin\\\\choco.exe\")\n+ end\n+\n+ def get_choco_version\n+ powershell_exec(\"choco --version\").result\n+ end\n+\n+ def existing_version\n+ Gem::Version.new(get_choco_version)\n+ end\n+\n+ def define_resource_requirements\n+ [ new_resource.proxy_user, new_resource.proxy_password ].each do\n+ requirements.assert(:install, :upgrade).each do |a|\n+ a.assertion do\n+ (!new_resource.proxy_user.nil? && new_resource.proxy_password.nil?) || (new_resource.proxy_user.nil? && !new_resource.proxy_password.nil?)\n+ end\n+ a.failure_message(Chef::Exceptions::ValidationFailed, \"You must specify both a proxy_user and a proxy_password\")\n+ a.whyrun(\"Assuming that if you have configured a 'proxy_user' you must also supply a 'proxy_password'\")\n+ end\n+ end\n+ end\n+\n+ action :install, description: \"Installs Chocolatey package manager\" do\n+ unless new_resource.download_url.nil?\n+ powershell_exec(\"Set-Item -path env:ChocolateyDownloadUrl -Value #{new_resource.download_url}\")\n+ end\n+\n+ unless new_resource.chocolatey_version.nil?\n+ powershell_exec(\"Set-Item -path env:ChocolateyVersion -Value #{new_resource.chocolatey_version}\")\n+ end\n+\n+ if new_resource.use_native_unzip == true\n+ powershell_exec(\"Set-Item -path env:ChocolateyUseWindowsCompression -Value true\")\n+ end\n+\n+ if new_resource.ignore_proxy == true\n+ powershell_exec(\"Set-Item -path env:ChocolateyIgnoreProxy -Value true\")\n+ end\n+\n+ unless new_resource.proxy_url.nil?\n+ powershell_exec(\"Set-Item -path env:ChocolateyProxyLocation -Value #{new_resource.proxy_url}\")\n+ end\n+\n+ unless !new_resource.proxy_user.nil? && !new_resource.proxy_password.nil?\n+ powershell_exec(\"Set-Item -path env:ChocolateyProxyUser -Value #{new_resource.proxy_user}; Set-Item -path env:ChocolateyProxyPassword -Value #{new_resource.proxy_password}\")\n+ end\n+\n+ converge_if_changed do\n+ powershell_exec(\"iex ((New-Object System.Net.WebClient).DownloadString('https://chocolatey.org/install.ps1'))\").error!", + "comment_created_at": "2023-10-24T06:12:24+00:00", + "comment_author": "jaymzh", + "comment_body": "This downloads it, but it doesn't run the ps1 that's downloaded... So it's not installed... I think.", + "pr_file_module": null + }, + { + "comment_id": "1370277897", + "repo_full_name": "chef/chef", + "pr_number": 14043, + "pr_file": "lib/chef/resource/chocolatey_installer.rb", + "discussion_id": "1369668732", + "commented_code": "@@ -0,0 +1,198 @@\n+class Chef\n+ class Resource\n+ class ChocolateyInstaller < Chef::Resource\n+ provides :chocolatey_installer\n+\n+ description \"Use the Chocolatey Installer resource to ensure that Choco is installed to your specification. Use the Chocolatey Feature resource to customize your install\"\n+ introduced \"18.3\"\n+ examples <<~DOC\n+ **Install Chocolatey**\n+\n+ ```ruby\n+ chocolatey_installer 'latest' do\n+ action :install\n+ end\n+ ```\n+\n+ **Uninstall Chocolatey**\n+\n+ ```ruby\n+ chocolatey_installer 'Some random verbiage' do\n+ action :uninstall\n+ end\n+ ```\n+\n+ **Install Chocolatey with Parameters**\n+\n+ ```ruby\n+ chocolatey_installer 'latest' do\n+ action :install\n+ download_url \"https://www.contoso.com/foo\"\n+ chocolatey_version '2.12.24'\n+ end\n+ ```\n+\n+ **Upgrade Chocolatey with Parameters**\n+\n+ ```ruby\n+ chocolatey_installer 'latest' do\n+ action :upgrade\n+ chocolatey_version '2.12.24'\n+ end\n+ ```\n+ DOC\n+\n+ allowed_actions :install, :uninstall, :upgrade\n+\n+ property :download_url, String,\n+ description: \"The URL to download Chocolatey from. This defaults to the value of $env:ChocolateyDownloadUrl, if it is set, and otherwise falls back to the official Chocolatey community repository to download the Chocolatey package. It can be used for offline installation by providing a path to a Chocolatey.nupkg.\"\n+\n+ property :chocolatey_version, String,\n+ description: \"Specifies a target version of Chocolatey to install. By default, the latest stable version is installed. This will use the value in $env:ChocolateyVersion by default, if that environment variable is present. This parameter is ignored if download_url is set.\"\n+\n+ property :use_native_unzip, [TrueClass, FalseClass], default: false,\n+ description: \"If set, uses built-in Windows decompression tools instead of 7zip when unpacking the downloaded nupkg. This will be set by default if use_native_unzip is set to a value other than 'false' or '0'. This parameter will be ignored in PS 5+ in favour of using the Expand-Archive built in PowerShell cmdlet directly.\"\n+\n+ property :ignore_proxy, [TrueClass, FalseClass], default: false,\n+ description: \"If set, ignores any configured proxy. This will override any proxy environment variables or parameters. This will be set by default if ignore_proxy is set to a value other than 'false' or '0'.\"\n+\n+ property :proxy_url, String,\n+ description: \"Specifies the proxy URL to use during the download.\"\n+\n+ property :proxy_user, String,\n+ description: \"The username to use to build a proxy credential with. Will be consumed by the proxy_credential property if both this property and proxy_password are set\"\n+\n+ property :proxy_password, String,\n+ description: \"The password to use to build a proxy credential with. Will be consumed by the proxy_credential property if both this property and proxy_user are set\"\n+\n+ load_current_value do\n+ current_state = is_choco_installed?\n+ current_value_does_not_exist! if current_state == false\n+ current_state\n+ end\n+\n+ def is_choco_installed?\n+ ::File.exist?(\"#{ENV[\"ALLUSERSPROFILE\"]}\\\\chocolatey\\\\bin\\\\choco.exe\")\n+ end\n+\n+ def get_choco_version\n+ powershell_exec(\"choco --version\").result\n+ end\n+\n+ def existing_version\n+ Gem::Version.new(get_choco_version)\n+ end\n+\n+ def define_resource_requirements\n+ [ new_resource.proxy_user, new_resource.proxy_password ].each do\n+ requirements.assert(:install, :upgrade).each do |a|\n+ a.assertion do\n+ (!new_resource.proxy_user.nil? && new_resource.proxy_password.nil?) || (new_resource.proxy_user.nil? && !new_resource.proxy_password.nil?)\n+ end\n+ a.failure_message(Chef::Exceptions::ValidationFailed, \"You must specify both a proxy_user and a proxy_password\")\n+ a.whyrun(\"Assuming that if you have configured a 'proxy_user' you must also supply a 'proxy_password'\")\n+ end\n+ end\n+ end\n+\n+ action :install, description: \"Installs Chocolatey package manager\" do\n+ unless new_resource.download_url.nil?\n+ powershell_exec(\"Set-Item -path env:ChocolateyDownloadUrl -Value #{new_resource.download_url}\")\n+ end\n+\n+ unless new_resource.chocolatey_version.nil?\n+ powershell_exec(\"Set-Item -path env:ChocolateyVersion -Value #{new_resource.chocolatey_version}\")\n+ end\n+\n+ if new_resource.use_native_unzip == true\n+ powershell_exec(\"Set-Item -path env:ChocolateyUseWindowsCompression -Value true\")\n+ end\n+\n+ if new_resource.ignore_proxy == true\n+ powershell_exec(\"Set-Item -path env:ChocolateyIgnoreProxy -Value true\")\n+ end\n+\n+ unless new_resource.proxy_url.nil?\n+ powershell_exec(\"Set-Item -path env:ChocolateyProxyLocation -Value #{new_resource.proxy_url}\")\n+ end\n+\n+ unless !new_resource.proxy_user.nil? && !new_resource.proxy_password.nil?\n+ powershell_exec(\"Set-Item -path env:ChocolateyProxyUser -Value #{new_resource.proxy_user}; Set-Item -path env:ChocolateyProxyPassword -Value #{new_resource.proxy_password}\")\n+ end\n+\n+ converge_if_changed do\n+ powershell_exec(\"iex ((New-Object System.Net.WebClient).DownloadString('https://chocolatey.org/install.ps1'))\").error!", + "comment_created_at": "2023-10-24T14:24:11+00:00", + "comment_author": "johnmccrae", + "comment_body": "? I ran this on bare metal and it worked?", + "pr_file_module": null + }, + { + "comment_id": "1370284132", + "repo_full_name": "chef/chef", + "pr_number": 14043, + "pr_file": "lib/chef/resource/chocolatey_installer.rb", + "discussion_id": "1369668732", + "commented_code": "@@ -0,0 +1,198 @@\n+class Chef\n+ class Resource\n+ class ChocolateyInstaller < Chef::Resource\n+ provides :chocolatey_installer\n+\n+ description \"Use the Chocolatey Installer resource to ensure that Choco is installed to your specification. Use the Chocolatey Feature resource to customize your install\"\n+ introduced \"18.3\"\n+ examples <<~DOC\n+ **Install Chocolatey**\n+\n+ ```ruby\n+ chocolatey_installer 'latest' do\n+ action :install\n+ end\n+ ```\n+\n+ **Uninstall Chocolatey**\n+\n+ ```ruby\n+ chocolatey_installer 'Some random verbiage' do\n+ action :uninstall\n+ end\n+ ```\n+\n+ **Install Chocolatey with Parameters**\n+\n+ ```ruby\n+ chocolatey_installer 'latest' do\n+ action :install\n+ download_url \"https://www.contoso.com/foo\"\n+ chocolatey_version '2.12.24'\n+ end\n+ ```\n+\n+ **Upgrade Chocolatey with Parameters**\n+\n+ ```ruby\n+ chocolatey_installer 'latest' do\n+ action :upgrade\n+ chocolatey_version '2.12.24'\n+ end\n+ ```\n+ DOC\n+\n+ allowed_actions :install, :uninstall, :upgrade\n+\n+ property :download_url, String,\n+ description: \"The URL to download Chocolatey from. This defaults to the value of $env:ChocolateyDownloadUrl, if it is set, and otherwise falls back to the official Chocolatey community repository to download the Chocolatey package. It can be used for offline installation by providing a path to a Chocolatey.nupkg.\"\n+\n+ property :chocolatey_version, String,\n+ description: \"Specifies a target version of Chocolatey to install. By default, the latest stable version is installed. This will use the value in $env:ChocolateyVersion by default, if that environment variable is present. This parameter is ignored if download_url is set.\"\n+\n+ property :use_native_unzip, [TrueClass, FalseClass], default: false,\n+ description: \"If set, uses built-in Windows decompression tools instead of 7zip when unpacking the downloaded nupkg. This will be set by default if use_native_unzip is set to a value other than 'false' or '0'. This parameter will be ignored in PS 5+ in favour of using the Expand-Archive built in PowerShell cmdlet directly.\"\n+\n+ property :ignore_proxy, [TrueClass, FalseClass], default: false,\n+ description: \"If set, ignores any configured proxy. This will override any proxy environment variables or parameters. This will be set by default if ignore_proxy is set to a value other than 'false' or '0'.\"\n+\n+ property :proxy_url, String,\n+ description: \"Specifies the proxy URL to use during the download.\"\n+\n+ property :proxy_user, String,\n+ description: \"The username to use to build a proxy credential with. Will be consumed by the proxy_credential property if both this property and proxy_password are set\"\n+\n+ property :proxy_password, String,\n+ description: \"The password to use to build a proxy credential with. Will be consumed by the proxy_credential property if both this property and proxy_user are set\"\n+\n+ load_current_value do\n+ current_state = is_choco_installed?\n+ current_value_does_not_exist! if current_state == false\n+ current_state\n+ end\n+\n+ def is_choco_installed?\n+ ::File.exist?(\"#{ENV[\"ALLUSERSPROFILE\"]}\\\\chocolatey\\\\bin\\\\choco.exe\")\n+ end\n+\n+ def get_choco_version\n+ powershell_exec(\"choco --version\").result\n+ end\n+\n+ def existing_version\n+ Gem::Version.new(get_choco_version)\n+ end\n+\n+ def define_resource_requirements\n+ [ new_resource.proxy_user, new_resource.proxy_password ].each do\n+ requirements.assert(:install, :upgrade).each do |a|\n+ a.assertion do\n+ (!new_resource.proxy_user.nil? && new_resource.proxy_password.nil?) || (new_resource.proxy_user.nil? && !new_resource.proxy_password.nil?)\n+ end\n+ a.failure_message(Chef::Exceptions::ValidationFailed, \"You must specify both a proxy_user and a proxy_password\")\n+ a.whyrun(\"Assuming that if you have configured a 'proxy_user' you must also supply a 'proxy_password'\")\n+ end\n+ end\n+ end\n+\n+ action :install, description: \"Installs Chocolatey package manager\" do\n+ unless new_resource.download_url.nil?\n+ powershell_exec(\"Set-Item -path env:ChocolateyDownloadUrl -Value #{new_resource.download_url}\")\n+ end\n+\n+ unless new_resource.chocolatey_version.nil?\n+ powershell_exec(\"Set-Item -path env:ChocolateyVersion -Value #{new_resource.chocolatey_version}\")\n+ end\n+\n+ if new_resource.use_native_unzip == true\n+ powershell_exec(\"Set-Item -path env:ChocolateyUseWindowsCompression -Value true\")\n+ end\n+\n+ if new_resource.ignore_proxy == true\n+ powershell_exec(\"Set-Item -path env:ChocolateyIgnoreProxy -Value true\")\n+ end\n+\n+ unless new_resource.proxy_url.nil?\n+ powershell_exec(\"Set-Item -path env:ChocolateyProxyLocation -Value #{new_resource.proxy_url}\")\n+ end\n+\n+ unless !new_resource.proxy_user.nil? && !new_resource.proxy_password.nil?\n+ powershell_exec(\"Set-Item -path env:ChocolateyProxyUser -Value #{new_resource.proxy_user}; Set-Item -path env:ChocolateyProxyPassword -Value #{new_resource.proxy_password}\")\n+ end\n+\n+ converge_if_changed do\n+ powershell_exec(\"iex ((New-Object System.Net.WebClient).DownloadString('https://chocolatey.org/install.ps1'))\").error!", + "comment_created_at": "2023-10-24T14:27:24+00:00", + "comment_author": "johnmccrae", + "comment_body": "testing again", + "pr_file_module": null + }, + { + "comment_id": "1370407701", + "repo_full_name": "chef/chef", + "pr_number": 14043, + "pr_file": "lib/chef/resource/chocolatey_installer.rb", + "discussion_id": "1369668732", + "commented_code": "@@ -0,0 +1,198 @@\n+class Chef\n+ class Resource\n+ class ChocolateyInstaller < Chef::Resource\n+ provides :chocolatey_installer\n+\n+ description \"Use the Chocolatey Installer resource to ensure that Choco is installed to your specification. Use the Chocolatey Feature resource to customize your install\"\n+ introduced \"18.3\"\n+ examples <<~DOC\n+ **Install Chocolatey**\n+\n+ ```ruby\n+ chocolatey_installer 'latest' do\n+ action :install\n+ end\n+ ```\n+\n+ **Uninstall Chocolatey**\n+\n+ ```ruby\n+ chocolatey_installer 'Some random verbiage' do\n+ action :uninstall\n+ end\n+ ```\n+\n+ **Install Chocolatey with Parameters**\n+\n+ ```ruby\n+ chocolatey_installer 'latest' do\n+ action :install\n+ download_url \"https://www.contoso.com/foo\"\n+ chocolatey_version '2.12.24'\n+ end\n+ ```\n+\n+ **Upgrade Chocolatey with Parameters**\n+\n+ ```ruby\n+ chocolatey_installer 'latest' do\n+ action :upgrade\n+ chocolatey_version '2.12.24'\n+ end\n+ ```\n+ DOC\n+\n+ allowed_actions :install, :uninstall, :upgrade\n+\n+ property :download_url, String,\n+ description: \"The URL to download Chocolatey from. This defaults to the value of $env:ChocolateyDownloadUrl, if it is set, and otherwise falls back to the official Chocolatey community repository to download the Chocolatey package. It can be used for offline installation by providing a path to a Chocolatey.nupkg.\"\n+\n+ property :chocolatey_version, String,\n+ description: \"Specifies a target version of Chocolatey to install. By default, the latest stable version is installed. This will use the value in $env:ChocolateyVersion by default, if that environment variable is present. This parameter is ignored if download_url is set.\"\n+\n+ property :use_native_unzip, [TrueClass, FalseClass], default: false,\n+ description: \"If set, uses built-in Windows decompression tools instead of 7zip when unpacking the downloaded nupkg. This will be set by default if use_native_unzip is set to a value other than 'false' or '0'. This parameter will be ignored in PS 5+ in favour of using the Expand-Archive built in PowerShell cmdlet directly.\"\n+\n+ property :ignore_proxy, [TrueClass, FalseClass], default: false,\n+ description: \"If set, ignores any configured proxy. This will override any proxy environment variables or parameters. This will be set by default if ignore_proxy is set to a value other than 'false' or '0'.\"\n+\n+ property :proxy_url, String,\n+ description: \"Specifies the proxy URL to use during the download.\"\n+\n+ property :proxy_user, String,\n+ description: \"The username to use to build a proxy credential with. Will be consumed by the proxy_credential property if both this property and proxy_password are set\"\n+\n+ property :proxy_password, String,\n+ description: \"The password to use to build a proxy credential with. Will be consumed by the proxy_credential property if both this property and proxy_user are set\"\n+\n+ load_current_value do\n+ current_state = is_choco_installed?\n+ current_value_does_not_exist! if current_state == false\n+ current_state\n+ end\n+\n+ def is_choco_installed?\n+ ::File.exist?(\"#{ENV[\"ALLUSERSPROFILE\"]}\\\\chocolatey\\\\bin\\\\choco.exe\")\n+ end\n+\n+ def get_choco_version\n+ powershell_exec(\"choco --version\").result\n+ end\n+\n+ def existing_version\n+ Gem::Version.new(get_choco_version)\n+ end\n+\n+ def define_resource_requirements\n+ [ new_resource.proxy_user, new_resource.proxy_password ].each do\n+ requirements.assert(:install, :upgrade).each do |a|\n+ a.assertion do\n+ (!new_resource.proxy_user.nil? && new_resource.proxy_password.nil?) || (new_resource.proxy_user.nil? && !new_resource.proxy_password.nil?)\n+ end\n+ a.failure_message(Chef::Exceptions::ValidationFailed, \"You must specify both a proxy_user and a proxy_password\")\n+ a.whyrun(\"Assuming that if you have configured a 'proxy_user' you must also supply a 'proxy_password'\")\n+ end\n+ end\n+ end\n+\n+ action :install, description: \"Installs Chocolatey package manager\" do\n+ unless new_resource.download_url.nil?\n+ powershell_exec(\"Set-Item -path env:ChocolateyDownloadUrl -Value #{new_resource.download_url}\")\n+ end\n+\n+ unless new_resource.chocolatey_version.nil?\n+ powershell_exec(\"Set-Item -path env:ChocolateyVersion -Value #{new_resource.chocolatey_version}\")\n+ end\n+\n+ if new_resource.use_native_unzip == true\n+ powershell_exec(\"Set-Item -path env:ChocolateyUseWindowsCompression -Value true\")\n+ end\n+\n+ if new_resource.ignore_proxy == true\n+ powershell_exec(\"Set-Item -path env:ChocolateyIgnoreProxy -Value true\")\n+ end\n+\n+ unless new_resource.proxy_url.nil?\n+ powershell_exec(\"Set-Item -path env:ChocolateyProxyLocation -Value #{new_resource.proxy_url}\")\n+ end\n+\n+ unless !new_resource.proxy_user.nil? && !new_resource.proxy_password.nil?\n+ powershell_exec(\"Set-Item -path env:ChocolateyProxyUser -Value #{new_resource.proxy_user}; Set-Item -path env:ChocolateyProxyPassword -Value #{new_resource.proxy_password}\")\n+ end\n+\n+ converge_if_changed do\n+ powershell_exec(\"iex ((New-Object System.Net.WebClient).DownloadString('https://chocolatey.org/install.ps1'))\").error!", + "comment_created_at": "2023-10-24T15:26:16+00:00", + "comment_author": "johnmccrae", + "comment_body": "I see what you're asking now. the IEX at the beginning of the block is a short-cut for Invoke-Expression. So the IEX calls the block which downloads the installer ps1 file then executes it.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1370779209", + "pr_number": 14043, + "pr_file": "lib/chef/resource/chocolatey_installer.rb", + "created_at": "2023-10-24T20:31:00+00:00", + "commented_code": "class Chef\n class Resource\n class ChocolateyInstaller < Chef::Resource\n provides :chocolatey_installer\n\n description \"Use the Chocolatey Installer resource to ensure that Choco is installed to your specification. Use the Chocolatey Feature resource to customize your install\"\n introduced \"18.3\"\n examples <<~DOC\n **Install Chocolatey**\n\n ```ruby\n chocolatey_installer 'latest' do\n action :install\n end\n ```\n\n **Uninstall Chocolatey**\n\n ```ruby\n chocolatey_installer 'Some random verbiage' do\n action :uninstall\n end\n ```\n\n **Install Chocolatey with Parameters**\n\n ```ruby\n chocolatey_installer 'latest' do\n action :install\n download_url \"https://www.contoso.com/foo\"\n chocolatey_version '2.12.24'\n end\n ```\n\n **Upgrade Chocolatey with Parameters**\n\n ```ruby\n chocolatey_installer 'latest' do\n action :upgrade\n chocolatey_version '2.12.24'\n end\n ```\n DOC\n\n allowed_actions :install, :uninstall, :upgrade\n\n property :download_url, String,\n description: \"The URL to download Chocolatey from. This defaults to the value of $env:ChocolateyDownloadUrl, if it is set, and otherwise falls back to the official Chocolatey community repository to download the Chocolatey package. It can be used for offline installation by providing a path to a Chocolatey.nupkg.\"\n\n property :chocolatey_version, String,\n description: \"Specifies a target version of Chocolatey to install. By default, the latest stable version is installed. This will use the value in $env:ChocolateyVersion by default, if that environment variable is present. This parameter is ignored if download_url is set.\"\n\n property :use_native_unzip, [TrueClass, FalseClass], default: false,\n description: \"If set, uses built-in Windows decompression tools instead of 7zip when unpacking the downloaded nupkg. This will be set by default if use_native_unzip is set to a value other than 'false' or '0'. This parameter will be ignored in PS 5+ in favour of using the Expand-Archive built in PowerShell cmdlet directly.\"\n\n property :ignore_proxy, [TrueClass, FalseClass], default: false,\n description: \"If set, ignores any configured proxy. This will override any proxy environment variables or parameters. This will be set by default if ignore_proxy is set to a value other than 'false' or '0'.\"\n\n property :proxy_url, String,\n description: \"Specifies the proxy URL to use during the download.\"\n\n property :proxy_user, String,\n description: \"The username to use to build a proxy credential with. Will be consumed by the proxy_credential property if both this property and proxy_password are set\"\n\n property :proxy_password, String,\n description: \"The password to use to build a proxy credential with. Will be consumed by the proxy_credential property if both this property and proxy_user are set\"\n\n load_current_value do\n current_state = is_choco_installed?\n current_value_does_not_exist! if current_state == false\n current_state\n end\n\n def is_choco_installed?\n ::File.exist?(\"#{ENV[\"ALLUSERSPROFILE\"]}\\\\chocolatey\\\\bin\\\\choco.exe\")\n end\n\n def get_choco_version\n powershell_exec(\"choco --version\").result\n end\n\n def existing_version\n Gem::Version.new(get_choco_version)\n end\n\n def define_resource_requirements\n [ new_resource.proxy_user, new_resource.proxy_password ].each do\n requirements.assert(:install, :upgrade).each do |a|\n a.assertion do\n (!new_resource.proxy_user.nil? && new_resource.proxy_password.nil?) || (new_resource.proxy_user.nil? && !new_resource.proxy_password.nil?)\n end\n a.failure_message(Chef::Exceptions::ValidationFailed, \"You must specify both a proxy_user and a proxy_password\")\n a.whyrun(\"Assuming that if you have configured a 'proxy_user' you must also supply a 'proxy_password'\")\n end\n end\n end\n\n action :install, description: \"Installs Chocolatey package manager\" do\n unless new_resource.download_url.nil?\n powershell_exec(\"Set-Item -path env:ChocolateyDownloadUrl -Value #{new_resource.download_url}\")\n end\n\n unless new_resource.chocolatey_version.nil?\n powershell_exec(\"Set-Item -path env:ChocolateyVersion -Value #{new_resource.chocolatey_version}\")\n end\n\n if new_resource.use_native_unzip == true\n powershell_exec(\"Set-Item -path env:ChocolateyUseWindowsCompression -Value true\")\n end\n\n if new_resource.ignore_proxy == true\n powershell_exec(\"Set-Item -path env:ChocolateyIgnoreProxy -Value true\")\n end\n\n unless new_resource.proxy_url.nil?\n powershell_exec(\"Set-Item -path env:ChocolateyProxyLocation -Value #{new_resource.proxy_url}\")\n end\n\n unless !new_resource.proxy_user.nil? && !new_resource.proxy_password.nil?\n powershell_exec(\"Set-Item -path env:ChocolateyProxyUser -Value #{new_resource.proxy_user}; Set-Item -path env:ChocolateyProxyPassword -Value #{new_resource.proxy_password}\")\n end\n\n converge_if_changed do\n powershell_exec(\"iex ((New-Object System.Net.WebClient).DownloadString('https://chocolatey.org/install.ps1'))\").error!", + "repo_full_name": "chef/chef", + "discussion_comments": [ + { + "comment_id": "1370779209", + "repo_full_name": "chef/chef", + "pr_number": 14043, + "pr_file": "lib/chef/resource/chocolatey_installer.rb", + "discussion_id": "1370779209", + "commented_code": "@@ -0,0 +1,198 @@\n+class Chef\n+ class Resource\n+ class ChocolateyInstaller < Chef::Resource\n+ provides :chocolatey_installer\n+\n+ description \"Use the Chocolatey Installer resource to ensure that Choco is installed to your specification. Use the Chocolatey Feature resource to customize your install\"\n+ introduced \"18.3\"\n+ examples <<~DOC\n+ **Install Chocolatey**\n+\n+ ```ruby\n+ chocolatey_installer 'latest' do\n+ action :install\n+ end\n+ ```\n+\n+ **Uninstall Chocolatey**\n+\n+ ```ruby\n+ chocolatey_installer 'Some random verbiage' do\n+ action :uninstall\n+ end\n+ ```\n+\n+ **Install Chocolatey with Parameters**\n+\n+ ```ruby\n+ chocolatey_installer 'latest' do\n+ action :install\n+ download_url \"https://www.contoso.com/foo\"\n+ chocolatey_version '2.12.24'\n+ end\n+ ```\n+\n+ **Upgrade Chocolatey with Parameters**\n+\n+ ```ruby\n+ chocolatey_installer 'latest' do\n+ action :upgrade\n+ chocolatey_version '2.12.24'\n+ end\n+ ```\n+ DOC\n+\n+ allowed_actions :install, :uninstall, :upgrade\n+\n+ property :download_url, String,\n+ description: \"The URL to download Chocolatey from. This defaults to the value of $env:ChocolateyDownloadUrl, if it is set, and otherwise falls back to the official Chocolatey community repository to download the Chocolatey package. It can be used for offline installation by providing a path to a Chocolatey.nupkg.\"\n+\n+ property :chocolatey_version, String,\n+ description: \"Specifies a target version of Chocolatey to install. By default, the latest stable version is installed. This will use the value in $env:ChocolateyVersion by default, if that environment variable is present. This parameter is ignored if download_url is set.\"\n+\n+ property :use_native_unzip, [TrueClass, FalseClass], default: false,\n+ description: \"If set, uses built-in Windows decompression tools instead of 7zip when unpacking the downloaded nupkg. This will be set by default if use_native_unzip is set to a value other than 'false' or '0'. This parameter will be ignored in PS 5+ in favour of using the Expand-Archive built in PowerShell cmdlet directly.\"\n+\n+ property :ignore_proxy, [TrueClass, FalseClass], default: false,\n+ description: \"If set, ignores any configured proxy. This will override any proxy environment variables or parameters. This will be set by default if ignore_proxy is set to a value other than 'false' or '0'.\"\n+\n+ property :proxy_url, String,\n+ description: \"Specifies the proxy URL to use during the download.\"\n+\n+ property :proxy_user, String,\n+ description: \"The username to use to build a proxy credential with. Will be consumed by the proxy_credential property if both this property and proxy_password are set\"\n+\n+ property :proxy_password, String,\n+ description: \"The password to use to build a proxy credential with. Will be consumed by the proxy_credential property if both this property and proxy_user are set\"\n+\n+ load_current_value do\n+ current_state = is_choco_installed?\n+ current_value_does_not_exist! if current_state == false\n+ current_state\n+ end\n+\n+ def is_choco_installed?\n+ ::File.exist?(\"#{ENV[\"ALLUSERSPROFILE\"]}\\\\chocolatey\\\\bin\\\\choco.exe\")\n+ end\n+\n+ def get_choco_version\n+ powershell_exec(\"choco --version\").result\n+ end\n+\n+ def existing_version\n+ Gem::Version.new(get_choco_version)\n+ end\n+\n+ def define_resource_requirements\n+ [ new_resource.proxy_user, new_resource.proxy_password ].each do\n+ requirements.assert(:install, :upgrade).each do |a|\n+ a.assertion do\n+ (!new_resource.proxy_user.nil? && new_resource.proxy_password.nil?) || (new_resource.proxy_user.nil? && !new_resource.proxy_password.nil?)\n+ end\n+ a.failure_message(Chef::Exceptions::ValidationFailed, \"You must specify both a proxy_user and a proxy_password\")\n+ a.whyrun(\"Assuming that if you have configured a 'proxy_user' you must also supply a 'proxy_password'\")\n+ end\n+ end\n+ end\n+\n+ action :install, description: \"Installs Chocolatey package manager\" do\n+ unless new_resource.download_url.nil?\n+ powershell_exec(\"Set-Item -path env:ChocolateyDownloadUrl -Value #{new_resource.download_url}\")\n+ end\n+\n+ unless new_resource.chocolatey_version.nil?\n+ powershell_exec(\"Set-Item -path env:ChocolateyVersion -Value #{new_resource.chocolatey_version}\")\n+ end\n+\n+ if new_resource.use_native_unzip == true\n+ powershell_exec(\"Set-Item -path env:ChocolateyUseWindowsCompression -Value true\")\n+ end\n+\n+ if new_resource.ignore_proxy == true\n+ powershell_exec(\"Set-Item -path env:ChocolateyIgnoreProxy -Value true\")\n+ end\n+\n+ unless new_resource.proxy_url.nil?\n+ powershell_exec(\"Set-Item -path env:ChocolateyProxyLocation -Value #{new_resource.proxy_url}\")\n+ end\n+\n+ unless !new_resource.proxy_user.nil? && !new_resource.proxy_password.nil?\n+ powershell_exec(\"Set-Item -path env:ChocolateyProxyUser -Value #{new_resource.proxy_user}; Set-Item -path env:ChocolateyProxyPassword -Value #{new_resource.proxy_password}\")\n+ end\n+\n+ converge_if_changed do\n+ powershell_exec(\"iex ((New-Object System.Net.WebClient).DownloadString('https://chocolatey.org/install.ps1'))\").error!", + "comment_created_at": "2023-10-24T20:31:00+00:00", + "comment_author": "tpowell-progress", + "comment_body": "```suggestion\r\n # note that Invoke-Expression is being called on the downloaded script (outer parens), not triggering the script download (inner parens)\r\n powershell_exec(\"Invoke-Expression ((New-Object System.Net.WebClient).DownloadString('https://chocolatey.org/install.ps1'))\").error!\r\n```", + "pr_file_module": null + }, + { + "comment_id": "1371774835", + "repo_full_name": "chef/chef", + "pr_number": 14043, + "pr_file": "lib/chef/resource/chocolatey_installer.rb", + "discussion_id": "1370779209", + "commented_code": "@@ -0,0 +1,198 @@\n+class Chef\n+ class Resource\n+ class ChocolateyInstaller < Chef::Resource\n+ provides :chocolatey_installer\n+\n+ description \"Use the Chocolatey Installer resource to ensure that Choco is installed to your specification. Use the Chocolatey Feature resource to customize your install\"\n+ introduced \"18.3\"\n+ examples <<~DOC\n+ **Install Chocolatey**\n+\n+ ```ruby\n+ chocolatey_installer 'latest' do\n+ action :install\n+ end\n+ ```\n+\n+ **Uninstall Chocolatey**\n+\n+ ```ruby\n+ chocolatey_installer 'Some random verbiage' do\n+ action :uninstall\n+ end\n+ ```\n+\n+ **Install Chocolatey with Parameters**\n+\n+ ```ruby\n+ chocolatey_installer 'latest' do\n+ action :install\n+ download_url \"https://www.contoso.com/foo\"\n+ chocolatey_version '2.12.24'\n+ end\n+ ```\n+\n+ **Upgrade Chocolatey with Parameters**\n+\n+ ```ruby\n+ chocolatey_installer 'latest' do\n+ action :upgrade\n+ chocolatey_version '2.12.24'\n+ end\n+ ```\n+ DOC\n+\n+ allowed_actions :install, :uninstall, :upgrade\n+\n+ property :download_url, String,\n+ description: \"The URL to download Chocolatey from. This defaults to the value of $env:ChocolateyDownloadUrl, if it is set, and otherwise falls back to the official Chocolatey community repository to download the Chocolatey package. It can be used for offline installation by providing a path to a Chocolatey.nupkg.\"\n+\n+ property :chocolatey_version, String,\n+ description: \"Specifies a target version of Chocolatey to install. By default, the latest stable version is installed. This will use the value in $env:ChocolateyVersion by default, if that environment variable is present. This parameter is ignored if download_url is set.\"\n+\n+ property :use_native_unzip, [TrueClass, FalseClass], default: false,\n+ description: \"If set, uses built-in Windows decompression tools instead of 7zip when unpacking the downloaded nupkg. This will be set by default if use_native_unzip is set to a value other than 'false' or '0'. This parameter will be ignored in PS 5+ in favour of using the Expand-Archive built in PowerShell cmdlet directly.\"\n+\n+ property :ignore_proxy, [TrueClass, FalseClass], default: false,\n+ description: \"If set, ignores any configured proxy. This will override any proxy environment variables or parameters. This will be set by default if ignore_proxy is set to a value other than 'false' or '0'.\"\n+\n+ property :proxy_url, String,\n+ description: \"Specifies the proxy URL to use during the download.\"\n+\n+ property :proxy_user, String,\n+ description: \"The username to use to build a proxy credential with. Will be consumed by the proxy_credential property if both this property and proxy_password are set\"\n+\n+ property :proxy_password, String,\n+ description: \"The password to use to build a proxy credential with. Will be consumed by the proxy_credential property if both this property and proxy_user are set\"\n+\n+ load_current_value do\n+ current_state = is_choco_installed?\n+ current_value_does_not_exist! if current_state == false\n+ current_state\n+ end\n+\n+ def is_choco_installed?\n+ ::File.exist?(\"#{ENV[\"ALLUSERSPROFILE\"]}\\\\chocolatey\\\\bin\\\\choco.exe\")\n+ end\n+\n+ def get_choco_version\n+ powershell_exec(\"choco --version\").result\n+ end\n+\n+ def existing_version\n+ Gem::Version.new(get_choco_version)\n+ end\n+\n+ def define_resource_requirements\n+ [ new_resource.proxy_user, new_resource.proxy_password ].each do\n+ requirements.assert(:install, :upgrade).each do |a|\n+ a.assertion do\n+ (!new_resource.proxy_user.nil? && new_resource.proxy_password.nil?) || (new_resource.proxy_user.nil? && !new_resource.proxy_password.nil?)\n+ end\n+ a.failure_message(Chef::Exceptions::ValidationFailed, \"You must specify both a proxy_user and a proxy_password\")\n+ a.whyrun(\"Assuming that if you have configured a 'proxy_user' you must also supply a 'proxy_password'\")\n+ end\n+ end\n+ end\n+\n+ action :install, description: \"Installs Chocolatey package manager\" do\n+ unless new_resource.download_url.nil?\n+ powershell_exec(\"Set-Item -path env:ChocolateyDownloadUrl -Value #{new_resource.download_url}\")\n+ end\n+\n+ unless new_resource.chocolatey_version.nil?\n+ powershell_exec(\"Set-Item -path env:ChocolateyVersion -Value #{new_resource.chocolatey_version}\")\n+ end\n+\n+ if new_resource.use_native_unzip == true\n+ powershell_exec(\"Set-Item -path env:ChocolateyUseWindowsCompression -Value true\")\n+ end\n+\n+ if new_resource.ignore_proxy == true\n+ powershell_exec(\"Set-Item -path env:ChocolateyIgnoreProxy -Value true\")\n+ end\n+\n+ unless new_resource.proxy_url.nil?\n+ powershell_exec(\"Set-Item -path env:ChocolateyProxyLocation -Value #{new_resource.proxy_url}\")\n+ end\n+\n+ unless !new_resource.proxy_user.nil? && !new_resource.proxy_password.nil?\n+ powershell_exec(\"Set-Item -path env:ChocolateyProxyUser -Value #{new_resource.proxy_user}; Set-Item -path env:ChocolateyProxyPassword -Value #{new_resource.proxy_password}\")\n+ end\n+\n+ converge_if_changed do\n+ powershell_exec(\"iex ((New-Object System.Net.WebClient).DownloadString('https://chocolatey.org/install.ps1'))\").error!", + "comment_created_at": "2023-10-25T13:35:03+00:00", + "comment_author": "johnmccrae", + "comment_body": "done", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "831753880", + "pr_number": 12694, + "pr_file": "lib/chef/resource/selinux_module.rb", + "created_at": "2022-03-22T04:14:24+00:00", + "commented_code": "#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n# http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\nrequire_relative \"../resource\"\n\nclass Chef\n class Resource\n \tclass SelinuxModule < Chef::Resource\n \t\tunified_mode true\n\n \t\tprovides :selinux_module\n\n \t\tproperty :module_name, String,\n \t\t name_property: true,\n \t\t description: 'Override the module name'\n\n \t\tproperty :source, String,\n \t\t description: 'Module source file name'\n\n \t\tproperty :content, String,\n \t\t description: 'Module source as String'\n\n \t\tproperty :cookbook, String,\n \t\t description: 'Cookbook to source from module source file from(if it is not located in the current cookbook). The default value is the current cookbook.',\n desired_state: false\n\n \t\tproperty :base_dir, String,\n \t\t default: '/etc/selinux/local',\n \t\t description: 'Directory to create module source file in'\n\n \t\taction_class do\n \t\t def selinux_module_filepath(type)\n \t\t path = ::File.join(new_resource.base_dir, \"#{new_resource.module_name}\")\n \t\t path.concat(\".#{type}\") if type\n \t\t end\n\n \t\t def list_installed_modules\n \t\t shell_out!('semodule --list-modules').stdout.split(\"\\n\").map { |x| x.split(/\\s/).first }\n \t\t end\n \t\tend\n\n \t\taction :create do\n \t\t directory new_resource.base_dir\n\n \t\t if property_is_set?(:content)\n \t\t file selinux_module_filepath('te') do\n \t\t content new_resource.content\n\n \t\t mode '0600'\n \t\t owner 'root'\n \t\t group 'root'\n\n \t\t action :create\n\n \t\t notifies :run, \"execute[Compiling SELinux modules at '#{new_resource.base_dir}']\", :immediately\n \t\t end\n \t\t else\n \t\t cookbook_file selinux_module_filepath('te') do\n \t\t cookbook new_resource.cookbook\n \t\t source new_resource.source\n\n \t\t mode '0600'\n \t\t owner 'root'\n \t\t group 'root'\n\n \t\t action :create\n\n \t\t notifies :run, \"execute[Compiling SELinux modules at '#{new_resource.base_dir}']\", :immediately\n \t\t end\n \t\t end\n\n \t\t execute \"Compiling SELinux modules at '#{new_resource.base_dir}'\" do\n \t\t cwd new_resource.base_dir\n \t\t command \"make -C #{new_resource.base_dir} -f /usr/share/selinux/devel/Makefile\"\n \t\t timeout 120\n \t\t user 'root'\n\n \t\t action :nothing\n\n \t\t notifies :run, \"execute[Install SELinux module '#{selinux_module_filepath('pp')}']\", :immediately\n \t\t end\n\n \t\t raise \"Compilation must have failed, no 'pp' file found at: '#{selinux_module_filepath('pp')}'\" unless ::File.exist?(selinux_module_filepath('pp'))\n\n \t\t execute \"Install SELinux module '#{selinux_module_filepath('pp')}'\" do\n \t\t command \"semodule --install '#{selinux_module_filepath('pp')}'\"\n \t\t action :nothing\n \t\t end\n \t\tend\n\n \t\taction :delete do\n \t\t %w(fc if pp te).each do |type|\n \t\t next unless ::File.exist?(selinux_module_filepath(type))\n\n \t\t file selinux_module_filepath(type) do\n \t\t action :delete\n \t\t end\n \t\t end\n \t\tend\n\n \t\taction :install do\n \t\t raise \"Module must be compiled before it can be installed, no 'pp' file found at: '#{selinux_module_filepath('pp')}'\" unless ::File.exist?(selinux_module_filepath('pp'))\n\n \t\t unless list_installed_modules.include? new_resource.module_name\n \t\t converge_by \"Install SELinux module #{selinux_module_filepath('pp')}\" do\n \t\t shell_out!(\"semodule --install '#{selinux_module_filepath('pp')}'\")", + "repo_full_name": "chef/chef", + "discussion_comments": [ + { + "comment_id": "831753880", + "repo_full_name": "chef/chef", + "pr_number": 12694, + "pr_file": "lib/chef/resource/selinux_module.rb", + "discussion_id": "831753880", + "commented_code": "@@ -0,0 +1,130 @@\n+#\n+# Licensed under the Apache License, Version 2.0 (the \"License\");\n+# you may not use this file except in compliance with the License.\n+# You may obtain a copy of the License at\n+#\n+# http://www.apache.org/licenses/LICENSE-2.0\n+#\n+# Unless required by applicable law or agreed to in writing, software\n+# distributed under the License is distributed on an \"AS IS\" BASIS,\n+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n+# See the License for the specific language governing permissions and\n+# limitations under the License.\n+\n+require_relative \"../resource\"\n+\n+class Chef\n+ class Resource\n+ \tclass SelinuxModule < Chef::Resource\n+ \t\tunified_mode true\n+\n+ \t\tprovides :selinux_module\n+\n+ \t\tproperty :module_name, String,\n+ \t\t name_property: true,\n+ \t\t description: 'Override the module name'\n+\n+ \t\tproperty :source, String,\n+ \t\t description: 'Module source file name'\n+\n+ \t\tproperty :content, String,\n+ \t\t description: 'Module source as String'\n+\n+ \t\tproperty :cookbook, String,\n+ \t\t description: 'Cookbook to source from module source file from(if it is not located in the current cookbook). The default value is the current cookbook.',\n+ desired_state: false\n+\n+ \t\tproperty :base_dir, String,\n+ \t\t default: '/etc/selinux/local',\n+ \t\t description: 'Directory to create module source file in'\n+\n+ \t\taction_class do\n+ \t\t def selinux_module_filepath(type)\n+ \t\t path = ::File.join(new_resource.base_dir, \"#{new_resource.module_name}\")\n+ \t\t path.concat(\".#{type}\") if type\n+ \t\t end\n+\n+ \t\t def list_installed_modules\n+ \t\t shell_out!('semodule --list-modules').stdout.split(\"\\n\").map { |x| x.split(/\\s/).first }\n+ \t\t end\n+ \t\tend\n+\n+ \t\taction :create do\n+ \t\t directory new_resource.base_dir\n+\n+ \t\t if property_is_set?(:content)\n+ \t\t file selinux_module_filepath('te') do\n+ \t\t content new_resource.content\n+\n+ \t\t mode '0600'\n+ \t\t owner 'root'\n+ \t\t group 'root'\n+\n+ \t\t action :create\n+\n+ \t\t notifies :run, \"execute[Compiling SELinux modules at '#{new_resource.base_dir}']\", :immediately\n+ \t\t end\n+ \t\t else\n+ \t\t cookbook_file selinux_module_filepath('te') do\n+ \t\t cookbook new_resource.cookbook\n+ \t\t source new_resource.source\n+\n+ \t\t mode '0600'\n+ \t\t owner 'root'\n+ \t\t group 'root'\n+\n+ \t\t action :create\n+\n+ \t\t notifies :run, \"execute[Compiling SELinux modules at '#{new_resource.base_dir}']\", :immediately\n+ \t\t end\n+ \t\t end\n+\n+ \t\t execute \"Compiling SELinux modules at '#{new_resource.base_dir}'\" do\n+ \t\t cwd new_resource.base_dir\n+ \t\t command \"make -C #{new_resource.base_dir} -f /usr/share/selinux/devel/Makefile\"\n+ \t\t timeout 120\n+ \t\t user 'root'\n+\n+ \t\t action :nothing\n+\n+ \t\t notifies :run, \"execute[Install SELinux module '#{selinux_module_filepath('pp')}']\", :immediately\n+ \t\t end\n+\n+ \t\t raise \"Compilation must have failed, no 'pp' file found at: '#{selinux_module_filepath('pp')}'\" unless ::File.exist?(selinux_module_filepath('pp'))\n+\n+ \t\t execute \"Install SELinux module '#{selinux_module_filepath('pp')}'\" do\n+ \t\t command \"semodule --install '#{selinux_module_filepath('pp')}'\"\n+ \t\t action :nothing\n+ \t\t end\n+ \t\tend\n+\n+ \t\taction :delete do\n+ \t\t %w(fc if pp te).each do |type|\n+ \t\t next unless ::File.exist?(selinux_module_filepath(type))\n+\n+ \t\t file selinux_module_filepath(type) do\n+ \t\t action :delete\n+ \t\t end\n+ \t\t end\n+ \t\tend\n+\n+ \t\taction :install do\n+ \t\t raise \"Module must be compiled before it can be installed, no 'pp' file found at: '#{selinux_module_filepath('pp')}'\" unless ::File.exist?(selinux_module_filepath('pp'))\n+\n+ \t\t unless list_installed_modules.include? new_resource.module_name\n+ \t\t converge_by \"Install SELinux module #{selinux_module_filepath('pp')}\" do\n+ \t\t shell_out!(\"semodule --install '#{selinux_module_filepath('pp')}'\")", + "comment_created_at": "2022-03-22T04:14:24+00:00", + "comment_author": "lamont-granquist", + "comment_body": "one thing i'd suggest is using:\r\n\r\n```\r\nshell_out!(\"semodule\", \"--install\", selinux_module_filepath('pp'))\r\n```\r\n\r\nthat avoids starting up a shell to parse the command line, and feeds the arguments into argv[0..2] and as a result the quoting of the 2nd argument is no longer necessary.", + "pr_file_module": null + }, + { + "comment_id": "846295912", + "repo_full_name": "chef/chef", + "pr_number": 12694, + "pr_file": "lib/chef/resource/selinux_module.rb", + "discussion_id": "831753880", + "commented_code": "@@ -0,0 +1,130 @@\n+#\n+# Licensed under the Apache License, Version 2.0 (the \"License\");\n+# you may not use this file except in compliance with the License.\n+# You may obtain a copy of the License at\n+#\n+# http://www.apache.org/licenses/LICENSE-2.0\n+#\n+# Unless required by applicable law or agreed to in writing, software\n+# distributed under the License is distributed on an \"AS IS\" BASIS,\n+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n+# See the License for the specific language governing permissions and\n+# limitations under the License.\n+\n+require_relative \"../resource\"\n+\n+class Chef\n+ class Resource\n+ \tclass SelinuxModule < Chef::Resource\n+ \t\tunified_mode true\n+\n+ \t\tprovides :selinux_module\n+\n+ \t\tproperty :module_name, String,\n+ \t\t name_property: true,\n+ \t\t description: 'Override the module name'\n+\n+ \t\tproperty :source, String,\n+ \t\t description: 'Module source file name'\n+\n+ \t\tproperty :content, String,\n+ \t\t description: 'Module source as String'\n+\n+ \t\tproperty :cookbook, String,\n+ \t\t description: 'Cookbook to source from module source file from(if it is not located in the current cookbook). The default value is the current cookbook.',\n+ desired_state: false\n+\n+ \t\tproperty :base_dir, String,\n+ \t\t default: '/etc/selinux/local',\n+ \t\t description: 'Directory to create module source file in'\n+\n+ \t\taction_class do\n+ \t\t def selinux_module_filepath(type)\n+ \t\t path = ::File.join(new_resource.base_dir, \"#{new_resource.module_name}\")\n+ \t\t path.concat(\".#{type}\") if type\n+ \t\t end\n+\n+ \t\t def list_installed_modules\n+ \t\t shell_out!('semodule --list-modules').stdout.split(\"\\n\").map { |x| x.split(/\\s/).first }\n+ \t\t end\n+ \t\tend\n+\n+ \t\taction :create do\n+ \t\t directory new_resource.base_dir\n+\n+ \t\t if property_is_set?(:content)\n+ \t\t file selinux_module_filepath('te') do\n+ \t\t content new_resource.content\n+\n+ \t\t mode '0600'\n+ \t\t owner 'root'\n+ \t\t group 'root'\n+\n+ \t\t action :create\n+\n+ \t\t notifies :run, \"execute[Compiling SELinux modules at '#{new_resource.base_dir}']\", :immediately\n+ \t\t end\n+ \t\t else\n+ \t\t cookbook_file selinux_module_filepath('te') do\n+ \t\t cookbook new_resource.cookbook\n+ \t\t source new_resource.source\n+\n+ \t\t mode '0600'\n+ \t\t owner 'root'\n+ \t\t group 'root'\n+\n+ \t\t action :create\n+\n+ \t\t notifies :run, \"execute[Compiling SELinux modules at '#{new_resource.base_dir}']\", :immediately\n+ \t\t end\n+ \t\t end\n+\n+ \t\t execute \"Compiling SELinux modules at '#{new_resource.base_dir}'\" do\n+ \t\t cwd new_resource.base_dir\n+ \t\t command \"make -C #{new_resource.base_dir} -f /usr/share/selinux/devel/Makefile\"\n+ \t\t timeout 120\n+ \t\t user 'root'\n+\n+ \t\t action :nothing\n+\n+ \t\t notifies :run, \"execute[Install SELinux module '#{selinux_module_filepath('pp')}']\", :immediately\n+ \t\t end\n+\n+ \t\t raise \"Compilation must have failed, no 'pp' file found at: '#{selinux_module_filepath('pp')}'\" unless ::File.exist?(selinux_module_filepath('pp'))\n+\n+ \t\t execute \"Install SELinux module '#{selinux_module_filepath('pp')}'\" do\n+ \t\t command \"semodule --install '#{selinux_module_filepath('pp')}'\"\n+ \t\t action :nothing\n+ \t\t end\n+ \t\tend\n+\n+ \t\taction :delete do\n+ \t\t %w(fc if pp te).each do |type|\n+ \t\t next unless ::File.exist?(selinux_module_filepath(type))\n+\n+ \t\t file selinux_module_filepath(type) do\n+ \t\t action :delete\n+ \t\t end\n+ \t\t end\n+ \t\tend\n+\n+ \t\taction :install do\n+ \t\t raise \"Module must be compiled before it can be installed, no 'pp' file found at: '#{selinux_module_filepath('pp')}'\" unless ::File.exist?(selinux_module_filepath('pp'))\n+\n+ \t\t unless list_installed_modules.include? new_resource.module_name\n+ \t\t converge_by \"Install SELinux module #{selinux_module_filepath('pp')}\" do\n+ \t\t shell_out!(\"semodule --install '#{selinux_module_filepath('pp')}'\")", + "comment_created_at": "2022-04-08T16:24:41+00:00", + "comment_author": "neha-p6", + "comment_body": "Updated for now few occurrences. (Not sure if we should be splitting ones with pipe operator) ", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "687989783", + "pr_number": 11905, + "pr_file": "lib/chef/provider/remote_file/http.rb", + "created_at": "2021-08-12T18:32:45+00:00", + "commented_code": "def http_client_opts\n opts = {}\n # For http request retry and retry_delay was coming from config (default values i.e. retry: 5, retry_delay: 2)\n # Added retries, retry_delay options to set \"retries\" and \"retry_delay\" properly\n opts[:retries] = new_resource.retries\n opts[:retry_delay] = new_resource.retry_delay", + "repo_full_name": "chef/chef", + "discussion_comments": [ + { + "comment_id": "687989783", + "repo_full_name": "chef/chef", + "pr_number": 11905, + "pr_file": "lib/chef/provider/remote_file/http.rb", + "discussion_id": "687989783", + "commented_code": "@@ -122,6 +122,10 @@ def etag_from(response)\n \n def http_client_opts\n opts = {}\n+ # For http request retry and retry_delay was coming from config (default values i.e. retry: 5, retry_delay: 2)\n+ # Added retries, retry_delay options to set \"retries\" and \"retry_delay\" properly\n+ opts[:retries] = new_resource.retries\n+ opts[:retry_delay] = new_resource.retry_delay", + "comment_created_at": "2021-08-12T18:32:45+00:00", + "comment_author": "lamont-granquist", + "comment_body": "instead of this there should be a `property :http_options` hash added to the remote_file resource and that should be merged with the defaults here in http_client_opts", + "pr_file_module": null + }, + { + "comment_id": "688368989", + "repo_full_name": "chef/chef", + "pr_number": 11905, + "pr_file": "lib/chef/provider/remote_file/http.rb", + "discussion_id": "687989783", + "commented_code": "@@ -122,6 +122,10 @@ def etag_from(response)\n \n def http_client_opts\n opts = {}\n+ # For http request retry and retry_delay was coming from config (default values i.e. retry: 5, retry_delay: 2)\n+ # Added retries, retry_delay options to set \"retries\" and \"retry_delay\" properly\n+ opts[:retries] = new_resource.retries\n+ opts[:retry_delay] = new_resource.retry_delay", + "comment_created_at": "2021-08-13T09:16:12+00:00", + "comment_author": "antima-gupta", + "comment_body": "Added `property :http_options` , please have look.", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/chef-secure-network-operations.md b/_reviewers/chef-secure-network-operations.md index bf47dfc..8f8228e 100644 --- a/_reviewers/chef-secure-network-operations.md +++ b/_reviewers/chef-secure-network-operations.md @@ -52,167 +52,3 @@ end ``` These practices ensure network operations are secure against injection attacks, clearly documented for maintainers, and configurable for different network environments. - - -[ - { - "discussion_id": "1369668732", - "pr_number": 14043, - "pr_file": "lib/chef/resource/chocolatey_installer.rb", - "created_at": "2023-10-24T06:12:24+00:00", - "commented_code": "class Chef\n class Resource\n class ChocolateyInstaller < Chef::Resource\n provides :chocolatey_installer\n\n description \"Use the Chocolatey Installer resource to ensure that Choco is installed to your specification. Use the Chocolatey Feature resource to customize your install\"\n introduced \"18.3\"\n examples <<~DOC\n **Install Chocolatey**\n\n ```ruby\n chocolatey_installer 'latest' do\n action :install\n end\n ```\n\n **Uninstall Chocolatey**\n\n ```ruby\n chocolatey_installer 'Some random verbiage' do\n action :uninstall\n end\n ```\n\n **Install Chocolatey with Parameters**\n\n ```ruby\n chocolatey_installer 'latest' do\n action :install\n download_url \"https://www.contoso.com/foo\"\n chocolatey_version '2.12.24'\n end\n ```\n\n **Upgrade Chocolatey with Parameters**\n\n ```ruby\n chocolatey_installer 'latest' do\n action :upgrade\n chocolatey_version '2.12.24'\n end\n ```\n DOC\n\n allowed_actions :install, :uninstall, :upgrade\n\n property :download_url, String,\n description: \"The URL to download Chocolatey from. This defaults to the value of $env:ChocolateyDownloadUrl, if it is set, and otherwise falls back to the official Chocolatey community repository to download the Chocolatey package. It can be used for offline installation by providing a path to a Chocolatey.nupkg.\"\n\n property :chocolatey_version, String,\n description: \"Specifies a target version of Chocolatey to install. By default, the latest stable version is installed. This will use the value in $env:ChocolateyVersion by default, if that environment variable is present. This parameter is ignored if download_url is set.\"\n\n property :use_native_unzip, [TrueClass, FalseClass], default: false,\n description: \"If set, uses built-in Windows decompression tools instead of 7zip when unpacking the downloaded nupkg. This will be set by default if use_native_unzip is set to a value other than 'false' or '0'. This parameter will be ignored in PS 5+ in favour of using the Expand-Archive built in PowerShell cmdlet directly.\"\n\n property :ignore_proxy, [TrueClass, FalseClass], default: false,\n description: \"If set, ignores any configured proxy. This will override any proxy environment variables or parameters. This will be set by default if ignore_proxy is set to a value other than 'false' or '0'.\"\n\n property :proxy_url, String,\n description: \"Specifies the proxy URL to use during the download.\"\n\n property :proxy_user, String,\n description: \"The username to use to build a proxy credential with. Will be consumed by the proxy_credential property if both this property and proxy_password are set\"\n\n property :proxy_password, String,\n description: \"The password to use to build a proxy credential with. Will be consumed by the proxy_credential property if both this property and proxy_user are set\"\n\n load_current_value do\n current_state = is_choco_installed?\n current_value_does_not_exist! if current_state == false\n current_state\n end\n\n def is_choco_installed?\n ::File.exist?(\"#{ENV[\"ALLUSERSPROFILE\"]}\\\\chocolatey\\\\bin\\\\choco.exe\")\n end\n\n def get_choco_version\n powershell_exec(\"choco --version\").result\n end\n\n def existing_version\n Gem::Version.new(get_choco_version)\n end\n\n def define_resource_requirements\n [ new_resource.proxy_user, new_resource.proxy_password ].each do\n requirements.assert(:install, :upgrade).each do |a|\n a.assertion do\n (!new_resource.proxy_user.nil? && new_resource.proxy_password.nil?) || (new_resource.proxy_user.nil? && !new_resource.proxy_password.nil?)\n end\n a.failure_message(Chef::Exceptions::ValidationFailed, \"You must specify both a proxy_user and a proxy_password\")\n a.whyrun(\"Assuming that if you have configured a 'proxy_user' you must also supply a 'proxy_password'\")\n end\n end\n end\n\n action :install, description: \"Installs Chocolatey package manager\" do\n unless new_resource.download_url.nil?\n powershell_exec(\"Set-Item -path env:ChocolateyDownloadUrl -Value #{new_resource.download_url}\")\n end\n\n unless new_resource.chocolatey_version.nil?\n powershell_exec(\"Set-Item -path env:ChocolateyVersion -Value #{new_resource.chocolatey_version}\")\n end\n\n if new_resource.use_native_unzip == true\n powershell_exec(\"Set-Item -path env:ChocolateyUseWindowsCompression -Value true\")\n end\n\n if new_resource.ignore_proxy == true\n powershell_exec(\"Set-Item -path env:ChocolateyIgnoreProxy -Value true\")\n end\n\n unless new_resource.proxy_url.nil?\n powershell_exec(\"Set-Item -path env:ChocolateyProxyLocation -Value #{new_resource.proxy_url}\")\n end\n\n unless !new_resource.proxy_user.nil? && !new_resource.proxy_password.nil?\n powershell_exec(\"Set-Item -path env:ChocolateyProxyUser -Value #{new_resource.proxy_user}; Set-Item -path env:ChocolateyProxyPassword -Value #{new_resource.proxy_password}\")\n end\n\n converge_if_changed do\n powershell_exec(\"iex ((New-Object System.Net.WebClient).DownloadString('https://chocolatey.org/install.ps1'))\").error!", - "repo_full_name": "chef/chef", - "discussion_comments": [ - { - "comment_id": "1369668732", - "repo_full_name": "chef/chef", - "pr_number": 14043, - "pr_file": "lib/chef/resource/chocolatey_installer.rb", - "discussion_id": "1369668732", - "commented_code": "@@ -0,0 +1,198 @@\n+class Chef\n+ class Resource\n+ class ChocolateyInstaller < Chef::Resource\n+ provides :chocolatey_installer\n+\n+ description \"Use the Chocolatey Installer resource to ensure that Choco is installed to your specification. Use the Chocolatey Feature resource to customize your install\"\n+ introduced \"18.3\"\n+ examples <<~DOC\n+ **Install Chocolatey**\n+\n+ ```ruby\n+ chocolatey_installer 'latest' do\n+ action :install\n+ end\n+ ```\n+\n+ **Uninstall Chocolatey**\n+\n+ ```ruby\n+ chocolatey_installer 'Some random verbiage' do\n+ action :uninstall\n+ end\n+ ```\n+\n+ **Install Chocolatey with Parameters**\n+\n+ ```ruby\n+ chocolatey_installer 'latest' do\n+ action :install\n+ download_url \"https://www.contoso.com/foo\"\n+ chocolatey_version '2.12.24'\n+ end\n+ ```\n+\n+ **Upgrade Chocolatey with Parameters**\n+\n+ ```ruby\n+ chocolatey_installer 'latest' do\n+ action :upgrade\n+ chocolatey_version '2.12.24'\n+ end\n+ ```\n+ DOC\n+\n+ allowed_actions :install, :uninstall, :upgrade\n+\n+ property :download_url, String,\n+ description: \"The URL to download Chocolatey from. This defaults to the value of $env:ChocolateyDownloadUrl, if it is set, and otherwise falls back to the official Chocolatey community repository to download the Chocolatey package. It can be used for offline installation by providing a path to a Chocolatey.nupkg.\"\n+\n+ property :chocolatey_version, String,\n+ description: \"Specifies a target version of Chocolatey to install. By default, the latest stable version is installed. This will use the value in $env:ChocolateyVersion by default, if that environment variable is present. This parameter is ignored if download_url is set.\"\n+\n+ property :use_native_unzip, [TrueClass, FalseClass], default: false,\n+ description: \"If set, uses built-in Windows decompression tools instead of 7zip when unpacking the downloaded nupkg. This will be set by default if use_native_unzip is set to a value other than 'false' or '0'. This parameter will be ignored in PS 5+ in favour of using the Expand-Archive built in PowerShell cmdlet directly.\"\n+\n+ property :ignore_proxy, [TrueClass, FalseClass], default: false,\n+ description: \"If set, ignores any configured proxy. This will override any proxy environment variables or parameters. This will be set by default if ignore_proxy is set to a value other than 'false' or '0'.\"\n+\n+ property :proxy_url, String,\n+ description: \"Specifies the proxy URL to use during the download.\"\n+\n+ property :proxy_user, String,\n+ description: \"The username to use to build a proxy credential with. Will be consumed by the proxy_credential property if both this property and proxy_password are set\"\n+\n+ property :proxy_password, String,\n+ description: \"The password to use to build a proxy credential with. Will be consumed by the proxy_credential property if both this property and proxy_user are set\"\n+\n+ load_current_value do\n+ current_state = is_choco_installed?\n+ current_value_does_not_exist! if current_state == false\n+ current_state\n+ end\n+\n+ def is_choco_installed?\n+ ::File.exist?(\"#{ENV[\"ALLUSERSPROFILE\"]}\\\\chocolatey\\\\bin\\\\choco.exe\")\n+ end\n+\n+ def get_choco_version\n+ powershell_exec(\"choco --version\").result\n+ end\n+\n+ def existing_version\n+ Gem::Version.new(get_choco_version)\n+ end\n+\n+ def define_resource_requirements\n+ [ new_resource.proxy_user, new_resource.proxy_password ].each do\n+ requirements.assert(:install, :upgrade).each do |a|\n+ a.assertion do\n+ (!new_resource.proxy_user.nil? && new_resource.proxy_password.nil?) || (new_resource.proxy_user.nil? && !new_resource.proxy_password.nil?)\n+ end\n+ a.failure_message(Chef::Exceptions::ValidationFailed, \"You must specify both a proxy_user and a proxy_password\")\n+ a.whyrun(\"Assuming that if you have configured a 'proxy_user' you must also supply a 'proxy_password'\")\n+ end\n+ end\n+ end\n+\n+ action :install, description: \"Installs Chocolatey package manager\" do\n+ unless new_resource.download_url.nil?\n+ powershell_exec(\"Set-Item -path env:ChocolateyDownloadUrl -Value #{new_resource.download_url}\")\n+ end\n+\n+ unless new_resource.chocolatey_version.nil?\n+ powershell_exec(\"Set-Item -path env:ChocolateyVersion -Value #{new_resource.chocolatey_version}\")\n+ end\n+\n+ if new_resource.use_native_unzip == true\n+ powershell_exec(\"Set-Item -path env:ChocolateyUseWindowsCompression -Value true\")\n+ end\n+\n+ if new_resource.ignore_proxy == true\n+ powershell_exec(\"Set-Item -path env:ChocolateyIgnoreProxy -Value true\")\n+ end\n+\n+ unless new_resource.proxy_url.nil?\n+ powershell_exec(\"Set-Item -path env:ChocolateyProxyLocation -Value #{new_resource.proxy_url}\")\n+ end\n+\n+ unless !new_resource.proxy_user.nil? && !new_resource.proxy_password.nil?\n+ powershell_exec(\"Set-Item -path env:ChocolateyProxyUser -Value #{new_resource.proxy_user}; Set-Item -path env:ChocolateyProxyPassword -Value #{new_resource.proxy_password}\")\n+ end\n+\n+ converge_if_changed do\n+ powershell_exec(\"iex ((New-Object System.Net.WebClient).DownloadString('https://chocolatey.org/install.ps1'))\").error!", - "comment_created_at": "2023-10-24T06:12:24+00:00", - "comment_author": "jaymzh", - "comment_body": "This downloads it, but it doesn't run the ps1 that's downloaded... So it's not installed... I think.", - "pr_file_module": null - }, - { - "comment_id": "1370277897", - "repo_full_name": "chef/chef", - "pr_number": 14043, - "pr_file": "lib/chef/resource/chocolatey_installer.rb", - "discussion_id": "1369668732", - "commented_code": "@@ -0,0 +1,198 @@\n+class Chef\n+ class Resource\n+ class ChocolateyInstaller < Chef::Resource\n+ provides :chocolatey_installer\n+\n+ description \"Use the Chocolatey Installer resource to ensure that Choco is installed to your specification. Use the Chocolatey Feature resource to customize your install\"\n+ introduced \"18.3\"\n+ examples <<~DOC\n+ **Install Chocolatey**\n+\n+ ```ruby\n+ chocolatey_installer 'latest' do\n+ action :install\n+ end\n+ ```\n+\n+ **Uninstall Chocolatey**\n+\n+ ```ruby\n+ chocolatey_installer 'Some random verbiage' do\n+ action :uninstall\n+ end\n+ ```\n+\n+ **Install Chocolatey with Parameters**\n+\n+ ```ruby\n+ chocolatey_installer 'latest' do\n+ action :install\n+ download_url \"https://www.contoso.com/foo\"\n+ chocolatey_version '2.12.24'\n+ end\n+ ```\n+\n+ **Upgrade Chocolatey with Parameters**\n+\n+ ```ruby\n+ chocolatey_installer 'latest' do\n+ action :upgrade\n+ chocolatey_version '2.12.24'\n+ end\n+ ```\n+ DOC\n+\n+ allowed_actions :install, :uninstall, :upgrade\n+\n+ property :download_url, String,\n+ description: \"The URL to download Chocolatey from. This defaults to the value of $env:ChocolateyDownloadUrl, if it is set, and otherwise falls back to the official Chocolatey community repository to download the Chocolatey package. It can be used for offline installation by providing a path to a Chocolatey.nupkg.\"\n+\n+ property :chocolatey_version, String,\n+ description: \"Specifies a target version of Chocolatey to install. By default, the latest stable version is installed. This will use the value in $env:ChocolateyVersion by default, if that environment variable is present. This parameter is ignored if download_url is set.\"\n+\n+ property :use_native_unzip, [TrueClass, FalseClass], default: false,\n+ description: \"If set, uses built-in Windows decompression tools instead of 7zip when unpacking the downloaded nupkg. This will be set by default if use_native_unzip is set to a value other than 'false' or '0'. This parameter will be ignored in PS 5+ in favour of using the Expand-Archive built in PowerShell cmdlet directly.\"\n+\n+ property :ignore_proxy, [TrueClass, FalseClass], default: false,\n+ description: \"If set, ignores any configured proxy. This will override any proxy environment variables or parameters. This will be set by default if ignore_proxy is set to a value other than 'false' or '0'.\"\n+\n+ property :proxy_url, String,\n+ description: \"Specifies the proxy URL to use during the download.\"\n+\n+ property :proxy_user, String,\n+ description: \"The username to use to build a proxy credential with. Will be consumed by the proxy_credential property if both this property and proxy_password are set\"\n+\n+ property :proxy_password, String,\n+ description: \"The password to use to build a proxy credential with. Will be consumed by the proxy_credential property if both this property and proxy_user are set\"\n+\n+ load_current_value do\n+ current_state = is_choco_installed?\n+ current_value_does_not_exist! if current_state == false\n+ current_state\n+ end\n+\n+ def is_choco_installed?\n+ ::File.exist?(\"#{ENV[\"ALLUSERSPROFILE\"]}\\\\chocolatey\\\\bin\\\\choco.exe\")\n+ end\n+\n+ def get_choco_version\n+ powershell_exec(\"choco --version\").result\n+ end\n+\n+ def existing_version\n+ Gem::Version.new(get_choco_version)\n+ end\n+\n+ def define_resource_requirements\n+ [ new_resource.proxy_user, new_resource.proxy_password ].each do\n+ requirements.assert(:install, :upgrade).each do |a|\n+ a.assertion do\n+ (!new_resource.proxy_user.nil? && new_resource.proxy_password.nil?) || (new_resource.proxy_user.nil? && !new_resource.proxy_password.nil?)\n+ end\n+ a.failure_message(Chef::Exceptions::ValidationFailed, \"You must specify both a proxy_user and a proxy_password\")\n+ a.whyrun(\"Assuming that if you have configured a 'proxy_user' you must also supply a 'proxy_password'\")\n+ end\n+ end\n+ end\n+\n+ action :install, description: \"Installs Chocolatey package manager\" do\n+ unless new_resource.download_url.nil?\n+ powershell_exec(\"Set-Item -path env:ChocolateyDownloadUrl -Value #{new_resource.download_url}\")\n+ end\n+\n+ unless new_resource.chocolatey_version.nil?\n+ powershell_exec(\"Set-Item -path env:ChocolateyVersion -Value #{new_resource.chocolatey_version}\")\n+ end\n+\n+ if new_resource.use_native_unzip == true\n+ powershell_exec(\"Set-Item -path env:ChocolateyUseWindowsCompression -Value true\")\n+ end\n+\n+ if new_resource.ignore_proxy == true\n+ powershell_exec(\"Set-Item -path env:ChocolateyIgnoreProxy -Value true\")\n+ end\n+\n+ unless new_resource.proxy_url.nil?\n+ powershell_exec(\"Set-Item -path env:ChocolateyProxyLocation -Value #{new_resource.proxy_url}\")\n+ end\n+\n+ unless !new_resource.proxy_user.nil? && !new_resource.proxy_password.nil?\n+ powershell_exec(\"Set-Item -path env:ChocolateyProxyUser -Value #{new_resource.proxy_user}; Set-Item -path env:ChocolateyProxyPassword -Value #{new_resource.proxy_password}\")\n+ end\n+\n+ converge_if_changed do\n+ powershell_exec(\"iex ((New-Object System.Net.WebClient).DownloadString('https://chocolatey.org/install.ps1'))\").error!", - "comment_created_at": "2023-10-24T14:24:11+00:00", - "comment_author": "johnmccrae", - "comment_body": "? I ran this on bare metal and it worked?", - "pr_file_module": null - }, - { - "comment_id": "1370284132", - "repo_full_name": "chef/chef", - "pr_number": 14043, - "pr_file": "lib/chef/resource/chocolatey_installer.rb", - "discussion_id": "1369668732", - "commented_code": "@@ -0,0 +1,198 @@\n+class Chef\n+ class Resource\n+ class ChocolateyInstaller < Chef::Resource\n+ provides :chocolatey_installer\n+\n+ description \"Use the Chocolatey Installer resource to ensure that Choco is installed to your specification. Use the Chocolatey Feature resource to customize your install\"\n+ introduced \"18.3\"\n+ examples <<~DOC\n+ **Install Chocolatey**\n+\n+ ```ruby\n+ chocolatey_installer 'latest' do\n+ action :install\n+ end\n+ ```\n+\n+ **Uninstall Chocolatey**\n+\n+ ```ruby\n+ chocolatey_installer 'Some random verbiage' do\n+ action :uninstall\n+ end\n+ ```\n+\n+ **Install Chocolatey with Parameters**\n+\n+ ```ruby\n+ chocolatey_installer 'latest' do\n+ action :install\n+ download_url \"https://www.contoso.com/foo\"\n+ chocolatey_version '2.12.24'\n+ end\n+ ```\n+\n+ **Upgrade Chocolatey with Parameters**\n+\n+ ```ruby\n+ chocolatey_installer 'latest' do\n+ action :upgrade\n+ chocolatey_version '2.12.24'\n+ end\n+ ```\n+ DOC\n+\n+ allowed_actions :install, :uninstall, :upgrade\n+\n+ property :download_url, String,\n+ description: \"The URL to download Chocolatey from. This defaults to the value of $env:ChocolateyDownloadUrl, if it is set, and otherwise falls back to the official Chocolatey community repository to download the Chocolatey package. It can be used for offline installation by providing a path to a Chocolatey.nupkg.\"\n+\n+ property :chocolatey_version, String,\n+ description: \"Specifies a target version of Chocolatey to install. By default, the latest stable version is installed. This will use the value in $env:ChocolateyVersion by default, if that environment variable is present. This parameter is ignored if download_url is set.\"\n+\n+ property :use_native_unzip, [TrueClass, FalseClass], default: false,\n+ description: \"If set, uses built-in Windows decompression tools instead of 7zip when unpacking the downloaded nupkg. This will be set by default if use_native_unzip is set to a value other than 'false' or '0'. This parameter will be ignored in PS 5+ in favour of using the Expand-Archive built in PowerShell cmdlet directly.\"\n+\n+ property :ignore_proxy, [TrueClass, FalseClass], default: false,\n+ description: \"If set, ignores any configured proxy. This will override any proxy environment variables or parameters. This will be set by default if ignore_proxy is set to a value other than 'false' or '0'.\"\n+\n+ property :proxy_url, String,\n+ description: \"Specifies the proxy URL to use during the download.\"\n+\n+ property :proxy_user, String,\n+ description: \"The username to use to build a proxy credential with. Will be consumed by the proxy_credential property if both this property and proxy_password are set\"\n+\n+ property :proxy_password, String,\n+ description: \"The password to use to build a proxy credential with. Will be consumed by the proxy_credential property if both this property and proxy_user are set\"\n+\n+ load_current_value do\n+ current_state = is_choco_installed?\n+ current_value_does_not_exist! if current_state == false\n+ current_state\n+ end\n+\n+ def is_choco_installed?\n+ ::File.exist?(\"#{ENV[\"ALLUSERSPROFILE\"]}\\\\chocolatey\\\\bin\\\\choco.exe\")\n+ end\n+\n+ def get_choco_version\n+ powershell_exec(\"choco --version\").result\n+ end\n+\n+ def existing_version\n+ Gem::Version.new(get_choco_version)\n+ end\n+\n+ def define_resource_requirements\n+ [ new_resource.proxy_user, new_resource.proxy_password ].each do\n+ requirements.assert(:install, :upgrade).each do |a|\n+ a.assertion do\n+ (!new_resource.proxy_user.nil? && new_resource.proxy_password.nil?) || (new_resource.proxy_user.nil? && !new_resource.proxy_password.nil?)\n+ end\n+ a.failure_message(Chef::Exceptions::ValidationFailed, \"You must specify both a proxy_user and a proxy_password\")\n+ a.whyrun(\"Assuming that if you have configured a 'proxy_user' you must also supply a 'proxy_password'\")\n+ end\n+ end\n+ end\n+\n+ action :install, description: \"Installs Chocolatey package manager\" do\n+ unless new_resource.download_url.nil?\n+ powershell_exec(\"Set-Item -path env:ChocolateyDownloadUrl -Value #{new_resource.download_url}\")\n+ end\n+\n+ unless new_resource.chocolatey_version.nil?\n+ powershell_exec(\"Set-Item -path env:ChocolateyVersion -Value #{new_resource.chocolatey_version}\")\n+ end\n+\n+ if new_resource.use_native_unzip == true\n+ powershell_exec(\"Set-Item -path env:ChocolateyUseWindowsCompression -Value true\")\n+ end\n+\n+ if new_resource.ignore_proxy == true\n+ powershell_exec(\"Set-Item -path env:ChocolateyIgnoreProxy -Value true\")\n+ end\n+\n+ unless new_resource.proxy_url.nil?\n+ powershell_exec(\"Set-Item -path env:ChocolateyProxyLocation -Value #{new_resource.proxy_url}\")\n+ end\n+\n+ unless !new_resource.proxy_user.nil? && !new_resource.proxy_password.nil?\n+ powershell_exec(\"Set-Item -path env:ChocolateyProxyUser -Value #{new_resource.proxy_user}; Set-Item -path env:ChocolateyProxyPassword -Value #{new_resource.proxy_password}\")\n+ end\n+\n+ converge_if_changed do\n+ powershell_exec(\"iex ((New-Object System.Net.WebClient).DownloadString('https://chocolatey.org/install.ps1'))\").error!", - "comment_created_at": "2023-10-24T14:27:24+00:00", - "comment_author": "johnmccrae", - "comment_body": "testing again", - "pr_file_module": null - }, - { - "comment_id": "1370407701", - "repo_full_name": "chef/chef", - "pr_number": 14043, - "pr_file": "lib/chef/resource/chocolatey_installer.rb", - "discussion_id": "1369668732", - "commented_code": "@@ -0,0 +1,198 @@\n+class Chef\n+ class Resource\n+ class ChocolateyInstaller < Chef::Resource\n+ provides :chocolatey_installer\n+\n+ description \"Use the Chocolatey Installer resource to ensure that Choco is installed to your specification. Use the Chocolatey Feature resource to customize your install\"\n+ introduced \"18.3\"\n+ examples <<~DOC\n+ **Install Chocolatey**\n+\n+ ```ruby\n+ chocolatey_installer 'latest' do\n+ action :install\n+ end\n+ ```\n+\n+ **Uninstall Chocolatey**\n+\n+ ```ruby\n+ chocolatey_installer 'Some random verbiage' do\n+ action :uninstall\n+ end\n+ ```\n+\n+ **Install Chocolatey with Parameters**\n+\n+ ```ruby\n+ chocolatey_installer 'latest' do\n+ action :install\n+ download_url \"https://www.contoso.com/foo\"\n+ chocolatey_version '2.12.24'\n+ end\n+ ```\n+\n+ **Upgrade Chocolatey with Parameters**\n+\n+ ```ruby\n+ chocolatey_installer 'latest' do\n+ action :upgrade\n+ chocolatey_version '2.12.24'\n+ end\n+ ```\n+ DOC\n+\n+ allowed_actions :install, :uninstall, :upgrade\n+\n+ property :download_url, String,\n+ description: \"The URL to download Chocolatey from. This defaults to the value of $env:ChocolateyDownloadUrl, if it is set, and otherwise falls back to the official Chocolatey community repository to download the Chocolatey package. It can be used for offline installation by providing a path to a Chocolatey.nupkg.\"\n+\n+ property :chocolatey_version, String,\n+ description: \"Specifies a target version of Chocolatey to install. By default, the latest stable version is installed. This will use the value in $env:ChocolateyVersion by default, if that environment variable is present. This parameter is ignored if download_url is set.\"\n+\n+ property :use_native_unzip, [TrueClass, FalseClass], default: false,\n+ description: \"If set, uses built-in Windows decompression tools instead of 7zip when unpacking the downloaded nupkg. This will be set by default if use_native_unzip is set to a value other than 'false' or '0'. This parameter will be ignored in PS 5+ in favour of using the Expand-Archive built in PowerShell cmdlet directly.\"\n+\n+ property :ignore_proxy, [TrueClass, FalseClass], default: false,\n+ description: \"If set, ignores any configured proxy. This will override any proxy environment variables or parameters. This will be set by default if ignore_proxy is set to a value other than 'false' or '0'.\"\n+\n+ property :proxy_url, String,\n+ description: \"Specifies the proxy URL to use during the download.\"\n+\n+ property :proxy_user, String,\n+ description: \"The username to use to build a proxy credential with. Will be consumed by the proxy_credential property if both this property and proxy_password are set\"\n+\n+ property :proxy_password, String,\n+ description: \"The password to use to build a proxy credential with. Will be consumed by the proxy_credential property if both this property and proxy_user are set\"\n+\n+ load_current_value do\n+ current_state = is_choco_installed?\n+ current_value_does_not_exist! if current_state == false\n+ current_state\n+ end\n+\n+ def is_choco_installed?\n+ ::File.exist?(\"#{ENV[\"ALLUSERSPROFILE\"]}\\\\chocolatey\\\\bin\\\\choco.exe\")\n+ end\n+\n+ def get_choco_version\n+ powershell_exec(\"choco --version\").result\n+ end\n+\n+ def existing_version\n+ Gem::Version.new(get_choco_version)\n+ end\n+\n+ def define_resource_requirements\n+ [ new_resource.proxy_user, new_resource.proxy_password ].each do\n+ requirements.assert(:install, :upgrade).each do |a|\n+ a.assertion do\n+ (!new_resource.proxy_user.nil? && new_resource.proxy_password.nil?) || (new_resource.proxy_user.nil? && !new_resource.proxy_password.nil?)\n+ end\n+ a.failure_message(Chef::Exceptions::ValidationFailed, \"You must specify both a proxy_user and a proxy_password\")\n+ a.whyrun(\"Assuming that if you have configured a 'proxy_user' you must also supply a 'proxy_password'\")\n+ end\n+ end\n+ end\n+\n+ action :install, description: \"Installs Chocolatey package manager\" do\n+ unless new_resource.download_url.nil?\n+ powershell_exec(\"Set-Item -path env:ChocolateyDownloadUrl -Value #{new_resource.download_url}\")\n+ end\n+\n+ unless new_resource.chocolatey_version.nil?\n+ powershell_exec(\"Set-Item -path env:ChocolateyVersion -Value #{new_resource.chocolatey_version}\")\n+ end\n+\n+ if new_resource.use_native_unzip == true\n+ powershell_exec(\"Set-Item -path env:ChocolateyUseWindowsCompression -Value true\")\n+ end\n+\n+ if new_resource.ignore_proxy == true\n+ powershell_exec(\"Set-Item -path env:ChocolateyIgnoreProxy -Value true\")\n+ end\n+\n+ unless new_resource.proxy_url.nil?\n+ powershell_exec(\"Set-Item -path env:ChocolateyProxyLocation -Value #{new_resource.proxy_url}\")\n+ end\n+\n+ unless !new_resource.proxy_user.nil? && !new_resource.proxy_password.nil?\n+ powershell_exec(\"Set-Item -path env:ChocolateyProxyUser -Value #{new_resource.proxy_user}; Set-Item -path env:ChocolateyProxyPassword -Value #{new_resource.proxy_password}\")\n+ end\n+\n+ converge_if_changed do\n+ powershell_exec(\"iex ((New-Object System.Net.WebClient).DownloadString('https://chocolatey.org/install.ps1'))\").error!", - "comment_created_at": "2023-10-24T15:26:16+00:00", - "comment_author": "johnmccrae", - "comment_body": "I see what you're asking now. the IEX at the beginning of the block is a short-cut for Invoke-Expression. So the IEX calls the block which downloads the installer ps1 file then executes it.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1370779209", - "pr_number": 14043, - "pr_file": "lib/chef/resource/chocolatey_installer.rb", - "created_at": "2023-10-24T20:31:00+00:00", - "commented_code": "class Chef\n class Resource\n class ChocolateyInstaller < Chef::Resource\n provides :chocolatey_installer\n\n description \"Use the Chocolatey Installer resource to ensure that Choco is installed to your specification. Use the Chocolatey Feature resource to customize your install\"\n introduced \"18.3\"\n examples <<~DOC\n **Install Chocolatey**\n\n ```ruby\n chocolatey_installer 'latest' do\n action :install\n end\n ```\n\n **Uninstall Chocolatey**\n\n ```ruby\n chocolatey_installer 'Some random verbiage' do\n action :uninstall\n end\n ```\n\n **Install Chocolatey with Parameters**\n\n ```ruby\n chocolatey_installer 'latest' do\n action :install\n download_url \"https://www.contoso.com/foo\"\n chocolatey_version '2.12.24'\n end\n ```\n\n **Upgrade Chocolatey with Parameters**\n\n ```ruby\n chocolatey_installer 'latest' do\n action :upgrade\n chocolatey_version '2.12.24'\n end\n ```\n DOC\n\n allowed_actions :install, :uninstall, :upgrade\n\n property :download_url, String,\n description: \"The URL to download Chocolatey from. This defaults to the value of $env:ChocolateyDownloadUrl, if it is set, and otherwise falls back to the official Chocolatey community repository to download the Chocolatey package. It can be used for offline installation by providing a path to a Chocolatey.nupkg.\"\n\n property :chocolatey_version, String,\n description: \"Specifies a target version of Chocolatey to install. By default, the latest stable version is installed. This will use the value in $env:ChocolateyVersion by default, if that environment variable is present. This parameter is ignored if download_url is set.\"\n\n property :use_native_unzip, [TrueClass, FalseClass], default: false,\n description: \"If set, uses built-in Windows decompression tools instead of 7zip when unpacking the downloaded nupkg. This will be set by default if use_native_unzip is set to a value other than 'false' or '0'. This parameter will be ignored in PS 5+ in favour of using the Expand-Archive built in PowerShell cmdlet directly.\"\n\n property :ignore_proxy, [TrueClass, FalseClass], default: false,\n description: \"If set, ignores any configured proxy. This will override any proxy environment variables or parameters. This will be set by default if ignore_proxy is set to a value other than 'false' or '0'.\"\n\n property :proxy_url, String,\n description: \"Specifies the proxy URL to use during the download.\"\n\n property :proxy_user, String,\n description: \"The username to use to build a proxy credential with. Will be consumed by the proxy_credential property if both this property and proxy_password are set\"\n\n property :proxy_password, String,\n description: \"The password to use to build a proxy credential with. Will be consumed by the proxy_credential property if both this property and proxy_user are set\"\n\n load_current_value do\n current_state = is_choco_installed?\n current_value_does_not_exist! if current_state == false\n current_state\n end\n\n def is_choco_installed?\n ::File.exist?(\"#{ENV[\"ALLUSERSPROFILE\"]}\\\\chocolatey\\\\bin\\\\choco.exe\")\n end\n\n def get_choco_version\n powershell_exec(\"choco --version\").result\n end\n\n def existing_version\n Gem::Version.new(get_choco_version)\n end\n\n def define_resource_requirements\n [ new_resource.proxy_user, new_resource.proxy_password ].each do\n requirements.assert(:install, :upgrade).each do |a|\n a.assertion do\n (!new_resource.proxy_user.nil? && new_resource.proxy_password.nil?) || (new_resource.proxy_user.nil? && !new_resource.proxy_password.nil?)\n end\n a.failure_message(Chef::Exceptions::ValidationFailed, \"You must specify both a proxy_user and a proxy_password\")\n a.whyrun(\"Assuming that if you have configured a 'proxy_user' you must also supply a 'proxy_password'\")\n end\n end\n end\n\n action :install, description: \"Installs Chocolatey package manager\" do\n unless new_resource.download_url.nil?\n powershell_exec(\"Set-Item -path env:ChocolateyDownloadUrl -Value #{new_resource.download_url}\")\n end\n\n unless new_resource.chocolatey_version.nil?\n powershell_exec(\"Set-Item -path env:ChocolateyVersion -Value #{new_resource.chocolatey_version}\")\n end\n\n if new_resource.use_native_unzip == true\n powershell_exec(\"Set-Item -path env:ChocolateyUseWindowsCompression -Value true\")\n end\n\n if new_resource.ignore_proxy == true\n powershell_exec(\"Set-Item -path env:ChocolateyIgnoreProxy -Value true\")\n end\n\n unless new_resource.proxy_url.nil?\n powershell_exec(\"Set-Item -path env:ChocolateyProxyLocation -Value #{new_resource.proxy_url}\")\n end\n\n unless !new_resource.proxy_user.nil? && !new_resource.proxy_password.nil?\n powershell_exec(\"Set-Item -path env:ChocolateyProxyUser -Value #{new_resource.proxy_user}; Set-Item -path env:ChocolateyProxyPassword -Value #{new_resource.proxy_password}\")\n end\n\n converge_if_changed do\n powershell_exec(\"iex ((New-Object System.Net.WebClient).DownloadString('https://chocolatey.org/install.ps1'))\").error!", - "repo_full_name": "chef/chef", - "discussion_comments": [ - { - "comment_id": "1370779209", - "repo_full_name": "chef/chef", - "pr_number": 14043, - "pr_file": "lib/chef/resource/chocolatey_installer.rb", - "discussion_id": "1370779209", - "commented_code": "@@ -0,0 +1,198 @@\n+class Chef\n+ class Resource\n+ class ChocolateyInstaller < Chef::Resource\n+ provides :chocolatey_installer\n+\n+ description \"Use the Chocolatey Installer resource to ensure that Choco is installed to your specification. Use the Chocolatey Feature resource to customize your install\"\n+ introduced \"18.3\"\n+ examples <<~DOC\n+ **Install Chocolatey**\n+\n+ ```ruby\n+ chocolatey_installer 'latest' do\n+ action :install\n+ end\n+ ```\n+\n+ **Uninstall Chocolatey**\n+\n+ ```ruby\n+ chocolatey_installer 'Some random verbiage' do\n+ action :uninstall\n+ end\n+ ```\n+\n+ **Install Chocolatey with Parameters**\n+\n+ ```ruby\n+ chocolatey_installer 'latest' do\n+ action :install\n+ download_url \"https://www.contoso.com/foo\"\n+ chocolatey_version '2.12.24'\n+ end\n+ ```\n+\n+ **Upgrade Chocolatey with Parameters**\n+\n+ ```ruby\n+ chocolatey_installer 'latest' do\n+ action :upgrade\n+ chocolatey_version '2.12.24'\n+ end\n+ ```\n+ DOC\n+\n+ allowed_actions :install, :uninstall, :upgrade\n+\n+ property :download_url, String,\n+ description: \"The URL to download Chocolatey from. This defaults to the value of $env:ChocolateyDownloadUrl, if it is set, and otherwise falls back to the official Chocolatey community repository to download the Chocolatey package. It can be used for offline installation by providing a path to a Chocolatey.nupkg.\"\n+\n+ property :chocolatey_version, String,\n+ description: \"Specifies a target version of Chocolatey to install. By default, the latest stable version is installed. This will use the value in $env:ChocolateyVersion by default, if that environment variable is present. This parameter is ignored if download_url is set.\"\n+\n+ property :use_native_unzip, [TrueClass, FalseClass], default: false,\n+ description: \"If set, uses built-in Windows decompression tools instead of 7zip when unpacking the downloaded nupkg. This will be set by default if use_native_unzip is set to a value other than 'false' or '0'. This parameter will be ignored in PS 5+ in favour of using the Expand-Archive built in PowerShell cmdlet directly.\"\n+\n+ property :ignore_proxy, [TrueClass, FalseClass], default: false,\n+ description: \"If set, ignores any configured proxy. This will override any proxy environment variables or parameters. This will be set by default if ignore_proxy is set to a value other than 'false' or '0'.\"\n+\n+ property :proxy_url, String,\n+ description: \"Specifies the proxy URL to use during the download.\"\n+\n+ property :proxy_user, String,\n+ description: \"The username to use to build a proxy credential with. Will be consumed by the proxy_credential property if both this property and proxy_password are set\"\n+\n+ property :proxy_password, String,\n+ description: \"The password to use to build a proxy credential with. Will be consumed by the proxy_credential property if both this property and proxy_user are set\"\n+\n+ load_current_value do\n+ current_state = is_choco_installed?\n+ current_value_does_not_exist! if current_state == false\n+ current_state\n+ end\n+\n+ def is_choco_installed?\n+ ::File.exist?(\"#{ENV[\"ALLUSERSPROFILE\"]}\\\\chocolatey\\\\bin\\\\choco.exe\")\n+ end\n+\n+ def get_choco_version\n+ powershell_exec(\"choco --version\").result\n+ end\n+\n+ def existing_version\n+ Gem::Version.new(get_choco_version)\n+ end\n+\n+ def define_resource_requirements\n+ [ new_resource.proxy_user, new_resource.proxy_password ].each do\n+ requirements.assert(:install, :upgrade).each do |a|\n+ a.assertion do\n+ (!new_resource.proxy_user.nil? && new_resource.proxy_password.nil?) || (new_resource.proxy_user.nil? && !new_resource.proxy_password.nil?)\n+ end\n+ a.failure_message(Chef::Exceptions::ValidationFailed, \"You must specify both a proxy_user and a proxy_password\")\n+ a.whyrun(\"Assuming that if you have configured a 'proxy_user' you must also supply a 'proxy_password'\")\n+ end\n+ end\n+ end\n+\n+ action :install, description: \"Installs Chocolatey package manager\" do\n+ unless new_resource.download_url.nil?\n+ powershell_exec(\"Set-Item -path env:ChocolateyDownloadUrl -Value #{new_resource.download_url}\")\n+ end\n+\n+ unless new_resource.chocolatey_version.nil?\n+ powershell_exec(\"Set-Item -path env:ChocolateyVersion -Value #{new_resource.chocolatey_version}\")\n+ end\n+\n+ if new_resource.use_native_unzip == true\n+ powershell_exec(\"Set-Item -path env:ChocolateyUseWindowsCompression -Value true\")\n+ end\n+\n+ if new_resource.ignore_proxy == true\n+ powershell_exec(\"Set-Item -path env:ChocolateyIgnoreProxy -Value true\")\n+ end\n+\n+ unless new_resource.proxy_url.nil?\n+ powershell_exec(\"Set-Item -path env:ChocolateyProxyLocation -Value #{new_resource.proxy_url}\")\n+ end\n+\n+ unless !new_resource.proxy_user.nil? && !new_resource.proxy_password.nil?\n+ powershell_exec(\"Set-Item -path env:ChocolateyProxyUser -Value #{new_resource.proxy_user}; Set-Item -path env:ChocolateyProxyPassword -Value #{new_resource.proxy_password}\")\n+ end\n+\n+ converge_if_changed do\n+ powershell_exec(\"iex ((New-Object System.Net.WebClient).DownloadString('https://chocolatey.org/install.ps1'))\").error!", - "comment_created_at": "2023-10-24T20:31:00+00:00", - "comment_author": "tpowell-progress", - "comment_body": "```suggestion\r\n # note that Invoke-Expression is being called on the downloaded script (outer parens), not triggering the script download (inner parens)\r\n powershell_exec(\"Invoke-Expression ((New-Object System.Net.WebClient).DownloadString('https://chocolatey.org/install.ps1'))\").error!\r\n```", - "pr_file_module": null - }, - { - "comment_id": "1371774835", - "repo_full_name": "chef/chef", - "pr_number": 14043, - "pr_file": "lib/chef/resource/chocolatey_installer.rb", - "discussion_id": "1370779209", - "commented_code": "@@ -0,0 +1,198 @@\n+class Chef\n+ class Resource\n+ class ChocolateyInstaller < Chef::Resource\n+ provides :chocolatey_installer\n+\n+ description \"Use the Chocolatey Installer resource to ensure that Choco is installed to your specification. Use the Chocolatey Feature resource to customize your install\"\n+ introduced \"18.3\"\n+ examples <<~DOC\n+ **Install Chocolatey**\n+\n+ ```ruby\n+ chocolatey_installer 'latest' do\n+ action :install\n+ end\n+ ```\n+\n+ **Uninstall Chocolatey**\n+\n+ ```ruby\n+ chocolatey_installer 'Some random verbiage' do\n+ action :uninstall\n+ end\n+ ```\n+\n+ **Install Chocolatey with Parameters**\n+\n+ ```ruby\n+ chocolatey_installer 'latest' do\n+ action :install\n+ download_url \"https://www.contoso.com/foo\"\n+ chocolatey_version '2.12.24'\n+ end\n+ ```\n+\n+ **Upgrade Chocolatey with Parameters**\n+\n+ ```ruby\n+ chocolatey_installer 'latest' do\n+ action :upgrade\n+ chocolatey_version '2.12.24'\n+ end\n+ ```\n+ DOC\n+\n+ allowed_actions :install, :uninstall, :upgrade\n+\n+ property :download_url, String,\n+ description: \"The URL to download Chocolatey from. This defaults to the value of $env:ChocolateyDownloadUrl, if it is set, and otherwise falls back to the official Chocolatey community repository to download the Chocolatey package. It can be used for offline installation by providing a path to a Chocolatey.nupkg.\"\n+\n+ property :chocolatey_version, String,\n+ description: \"Specifies a target version of Chocolatey to install. By default, the latest stable version is installed. This will use the value in $env:ChocolateyVersion by default, if that environment variable is present. This parameter is ignored if download_url is set.\"\n+\n+ property :use_native_unzip, [TrueClass, FalseClass], default: false,\n+ description: \"If set, uses built-in Windows decompression tools instead of 7zip when unpacking the downloaded nupkg. This will be set by default if use_native_unzip is set to a value other than 'false' or '0'. This parameter will be ignored in PS 5+ in favour of using the Expand-Archive built in PowerShell cmdlet directly.\"\n+\n+ property :ignore_proxy, [TrueClass, FalseClass], default: false,\n+ description: \"If set, ignores any configured proxy. This will override any proxy environment variables or parameters. This will be set by default if ignore_proxy is set to a value other than 'false' or '0'.\"\n+\n+ property :proxy_url, String,\n+ description: \"Specifies the proxy URL to use during the download.\"\n+\n+ property :proxy_user, String,\n+ description: \"The username to use to build a proxy credential with. Will be consumed by the proxy_credential property if both this property and proxy_password are set\"\n+\n+ property :proxy_password, String,\n+ description: \"The password to use to build a proxy credential with. Will be consumed by the proxy_credential property if both this property and proxy_user are set\"\n+\n+ load_current_value do\n+ current_state = is_choco_installed?\n+ current_value_does_not_exist! if current_state == false\n+ current_state\n+ end\n+\n+ def is_choco_installed?\n+ ::File.exist?(\"#{ENV[\"ALLUSERSPROFILE\"]}\\\\chocolatey\\\\bin\\\\choco.exe\")\n+ end\n+\n+ def get_choco_version\n+ powershell_exec(\"choco --version\").result\n+ end\n+\n+ def existing_version\n+ Gem::Version.new(get_choco_version)\n+ end\n+\n+ def define_resource_requirements\n+ [ new_resource.proxy_user, new_resource.proxy_password ].each do\n+ requirements.assert(:install, :upgrade).each do |a|\n+ a.assertion do\n+ (!new_resource.proxy_user.nil? && new_resource.proxy_password.nil?) || (new_resource.proxy_user.nil? && !new_resource.proxy_password.nil?)\n+ end\n+ a.failure_message(Chef::Exceptions::ValidationFailed, \"You must specify both a proxy_user and a proxy_password\")\n+ a.whyrun(\"Assuming that if you have configured a 'proxy_user' you must also supply a 'proxy_password'\")\n+ end\n+ end\n+ end\n+\n+ action :install, description: \"Installs Chocolatey package manager\" do\n+ unless new_resource.download_url.nil?\n+ powershell_exec(\"Set-Item -path env:ChocolateyDownloadUrl -Value #{new_resource.download_url}\")\n+ end\n+\n+ unless new_resource.chocolatey_version.nil?\n+ powershell_exec(\"Set-Item -path env:ChocolateyVersion -Value #{new_resource.chocolatey_version}\")\n+ end\n+\n+ if new_resource.use_native_unzip == true\n+ powershell_exec(\"Set-Item -path env:ChocolateyUseWindowsCompression -Value true\")\n+ end\n+\n+ if new_resource.ignore_proxy == true\n+ powershell_exec(\"Set-Item -path env:ChocolateyIgnoreProxy -Value true\")\n+ end\n+\n+ unless new_resource.proxy_url.nil?\n+ powershell_exec(\"Set-Item -path env:ChocolateyProxyLocation -Value #{new_resource.proxy_url}\")\n+ end\n+\n+ unless !new_resource.proxy_user.nil? && !new_resource.proxy_password.nil?\n+ powershell_exec(\"Set-Item -path env:ChocolateyProxyUser -Value #{new_resource.proxy_user}; Set-Item -path env:ChocolateyProxyPassword -Value #{new_resource.proxy_password}\")\n+ end\n+\n+ converge_if_changed do\n+ powershell_exec(\"iex ((New-Object System.Net.WebClient).DownloadString('https://chocolatey.org/install.ps1'))\").error!", - "comment_created_at": "2023-10-25T13:35:03+00:00", - "comment_author": "johnmccrae", - "comment_body": "done", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "831753880", - "pr_number": 12694, - "pr_file": "lib/chef/resource/selinux_module.rb", - "created_at": "2022-03-22T04:14:24+00:00", - "commented_code": "#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n# http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\nrequire_relative \"../resource\"\n\nclass Chef\n class Resource\n \tclass SelinuxModule < Chef::Resource\n \t\tunified_mode true\n\n \t\tprovides :selinux_module\n\n \t\tproperty :module_name, String,\n \t\t name_property: true,\n \t\t description: 'Override the module name'\n\n \t\tproperty :source, String,\n \t\t description: 'Module source file name'\n\n \t\tproperty :content, String,\n \t\t description: 'Module source as String'\n\n \t\tproperty :cookbook, String,\n \t\t description: 'Cookbook to source from module source file from(if it is not located in the current cookbook). The default value is the current cookbook.',\n desired_state: false\n\n \t\tproperty :base_dir, String,\n \t\t default: '/etc/selinux/local',\n \t\t description: 'Directory to create module source file in'\n\n \t\taction_class do\n \t\t def selinux_module_filepath(type)\n \t\t path = ::File.join(new_resource.base_dir, \"#{new_resource.module_name}\")\n \t\t path.concat(\".#{type}\") if type\n \t\t end\n\n \t\t def list_installed_modules\n \t\t shell_out!('semodule --list-modules').stdout.split(\"\\n\").map { |x| x.split(/\\s/).first }\n \t\t end\n \t\tend\n\n \t\taction :create do\n \t\t directory new_resource.base_dir\n\n \t\t if property_is_set?(:content)\n \t\t file selinux_module_filepath('te') do\n \t\t content new_resource.content\n\n \t\t mode '0600'\n \t\t owner 'root'\n \t\t group 'root'\n\n \t\t action :create\n\n \t\t notifies :run, \"execute[Compiling SELinux modules at '#{new_resource.base_dir}']\", :immediately\n \t\t end\n \t\t else\n \t\t cookbook_file selinux_module_filepath('te') do\n \t\t cookbook new_resource.cookbook\n \t\t source new_resource.source\n\n \t\t mode '0600'\n \t\t owner 'root'\n \t\t group 'root'\n\n \t\t action :create\n\n \t\t notifies :run, \"execute[Compiling SELinux modules at '#{new_resource.base_dir}']\", :immediately\n \t\t end\n \t\t end\n\n \t\t execute \"Compiling SELinux modules at '#{new_resource.base_dir}'\" do\n \t\t cwd new_resource.base_dir\n \t\t command \"make -C #{new_resource.base_dir} -f /usr/share/selinux/devel/Makefile\"\n \t\t timeout 120\n \t\t user 'root'\n\n \t\t action :nothing\n\n \t\t notifies :run, \"execute[Install SELinux module '#{selinux_module_filepath('pp')}']\", :immediately\n \t\t end\n\n \t\t raise \"Compilation must have failed, no 'pp' file found at: '#{selinux_module_filepath('pp')}'\" unless ::File.exist?(selinux_module_filepath('pp'))\n\n \t\t execute \"Install SELinux module '#{selinux_module_filepath('pp')}'\" do\n \t\t command \"semodule --install '#{selinux_module_filepath('pp')}'\"\n \t\t action :nothing\n \t\t end\n \t\tend\n\n \t\taction :delete do\n \t\t %w(fc if pp te).each do |type|\n \t\t next unless ::File.exist?(selinux_module_filepath(type))\n\n \t\t file selinux_module_filepath(type) do\n \t\t action :delete\n \t\t end\n \t\t end\n \t\tend\n\n \t\taction :install do\n \t\t raise \"Module must be compiled before it can be installed, no 'pp' file found at: '#{selinux_module_filepath('pp')}'\" unless ::File.exist?(selinux_module_filepath('pp'))\n\n \t\t unless list_installed_modules.include? new_resource.module_name\n \t\t converge_by \"Install SELinux module #{selinux_module_filepath('pp')}\" do\n \t\t shell_out!(\"semodule --install '#{selinux_module_filepath('pp')}'\")", - "repo_full_name": "chef/chef", - "discussion_comments": [ - { - "comment_id": "831753880", - "repo_full_name": "chef/chef", - "pr_number": 12694, - "pr_file": "lib/chef/resource/selinux_module.rb", - "discussion_id": "831753880", - "commented_code": "@@ -0,0 +1,130 @@\n+#\n+# Licensed under the Apache License, Version 2.0 (the \"License\");\n+# you may not use this file except in compliance with the License.\n+# You may obtain a copy of the License at\n+#\n+# http://www.apache.org/licenses/LICENSE-2.0\n+#\n+# Unless required by applicable law or agreed to in writing, software\n+# distributed under the License is distributed on an \"AS IS\" BASIS,\n+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n+# See the License for the specific language governing permissions and\n+# limitations under the License.\n+\n+require_relative \"../resource\"\n+\n+class Chef\n+ class Resource\n+ \tclass SelinuxModule < Chef::Resource\n+ \t\tunified_mode true\n+\n+ \t\tprovides :selinux_module\n+\n+ \t\tproperty :module_name, String,\n+ \t\t name_property: true,\n+ \t\t description: 'Override the module name'\n+\n+ \t\tproperty :source, String,\n+ \t\t description: 'Module source file name'\n+\n+ \t\tproperty :content, String,\n+ \t\t description: 'Module source as String'\n+\n+ \t\tproperty :cookbook, String,\n+ \t\t description: 'Cookbook to source from module source file from(if it is not located in the current cookbook). The default value is the current cookbook.',\n+ desired_state: false\n+\n+ \t\tproperty :base_dir, String,\n+ \t\t default: '/etc/selinux/local',\n+ \t\t description: 'Directory to create module source file in'\n+\n+ \t\taction_class do\n+ \t\t def selinux_module_filepath(type)\n+ \t\t path = ::File.join(new_resource.base_dir, \"#{new_resource.module_name}\")\n+ \t\t path.concat(\".#{type}\") if type\n+ \t\t end\n+\n+ \t\t def list_installed_modules\n+ \t\t shell_out!('semodule --list-modules').stdout.split(\"\\n\").map { |x| x.split(/\\s/).first }\n+ \t\t end\n+ \t\tend\n+\n+ \t\taction :create do\n+ \t\t directory new_resource.base_dir\n+\n+ \t\t if property_is_set?(:content)\n+ \t\t file selinux_module_filepath('te') do\n+ \t\t content new_resource.content\n+\n+ \t\t mode '0600'\n+ \t\t owner 'root'\n+ \t\t group 'root'\n+\n+ \t\t action :create\n+\n+ \t\t notifies :run, \"execute[Compiling SELinux modules at '#{new_resource.base_dir}']\", :immediately\n+ \t\t end\n+ \t\t else\n+ \t\t cookbook_file selinux_module_filepath('te') do\n+ \t\t cookbook new_resource.cookbook\n+ \t\t source new_resource.source\n+\n+ \t\t mode '0600'\n+ \t\t owner 'root'\n+ \t\t group 'root'\n+\n+ \t\t action :create\n+\n+ \t\t notifies :run, \"execute[Compiling SELinux modules at '#{new_resource.base_dir}']\", :immediately\n+ \t\t end\n+ \t\t end\n+\n+ \t\t execute \"Compiling SELinux modules at '#{new_resource.base_dir}'\" do\n+ \t\t cwd new_resource.base_dir\n+ \t\t command \"make -C #{new_resource.base_dir} -f /usr/share/selinux/devel/Makefile\"\n+ \t\t timeout 120\n+ \t\t user 'root'\n+\n+ \t\t action :nothing\n+\n+ \t\t notifies :run, \"execute[Install SELinux module '#{selinux_module_filepath('pp')}']\", :immediately\n+ \t\t end\n+\n+ \t\t raise \"Compilation must have failed, no 'pp' file found at: '#{selinux_module_filepath('pp')}'\" unless ::File.exist?(selinux_module_filepath('pp'))\n+\n+ \t\t execute \"Install SELinux module '#{selinux_module_filepath('pp')}'\" do\n+ \t\t command \"semodule --install '#{selinux_module_filepath('pp')}'\"\n+ \t\t action :nothing\n+ \t\t end\n+ \t\tend\n+\n+ \t\taction :delete do\n+ \t\t %w(fc if pp te).each do |type|\n+ \t\t next unless ::File.exist?(selinux_module_filepath(type))\n+\n+ \t\t file selinux_module_filepath(type) do\n+ \t\t action :delete\n+ \t\t end\n+ \t\t end\n+ \t\tend\n+\n+ \t\taction :install do\n+ \t\t raise \"Module must be compiled before it can be installed, no 'pp' file found at: '#{selinux_module_filepath('pp')}'\" unless ::File.exist?(selinux_module_filepath('pp'))\n+\n+ \t\t unless list_installed_modules.include? new_resource.module_name\n+ \t\t converge_by \"Install SELinux module #{selinux_module_filepath('pp')}\" do\n+ \t\t shell_out!(\"semodule --install '#{selinux_module_filepath('pp')}'\")", - "comment_created_at": "2022-03-22T04:14:24+00:00", - "comment_author": "lamont-granquist", - "comment_body": "one thing i'd suggest is using:\r\n\r\n```\r\nshell_out!(\"semodule\", \"--install\", selinux_module_filepath('pp'))\r\n```\r\n\r\nthat avoids starting up a shell to parse the command line, and feeds the arguments into argv[0..2] and as a result the quoting of the 2nd argument is no longer necessary.", - "pr_file_module": null - }, - { - "comment_id": "846295912", - "repo_full_name": "chef/chef", - "pr_number": 12694, - "pr_file": "lib/chef/resource/selinux_module.rb", - "discussion_id": "831753880", - "commented_code": "@@ -0,0 +1,130 @@\n+#\n+# Licensed under the Apache License, Version 2.0 (the \"License\");\n+# you may not use this file except in compliance with the License.\n+# You may obtain a copy of the License at\n+#\n+# http://www.apache.org/licenses/LICENSE-2.0\n+#\n+# Unless required by applicable law or agreed to in writing, software\n+# distributed under the License is distributed on an \"AS IS\" BASIS,\n+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n+# See the License for the specific language governing permissions and\n+# limitations under the License.\n+\n+require_relative \"../resource\"\n+\n+class Chef\n+ class Resource\n+ \tclass SelinuxModule < Chef::Resource\n+ \t\tunified_mode true\n+\n+ \t\tprovides :selinux_module\n+\n+ \t\tproperty :module_name, String,\n+ \t\t name_property: true,\n+ \t\t description: 'Override the module name'\n+\n+ \t\tproperty :source, String,\n+ \t\t description: 'Module source file name'\n+\n+ \t\tproperty :content, String,\n+ \t\t description: 'Module source as String'\n+\n+ \t\tproperty :cookbook, String,\n+ \t\t description: 'Cookbook to source from module source file from(if it is not located in the current cookbook). The default value is the current cookbook.',\n+ desired_state: false\n+\n+ \t\tproperty :base_dir, String,\n+ \t\t default: '/etc/selinux/local',\n+ \t\t description: 'Directory to create module source file in'\n+\n+ \t\taction_class do\n+ \t\t def selinux_module_filepath(type)\n+ \t\t path = ::File.join(new_resource.base_dir, \"#{new_resource.module_name}\")\n+ \t\t path.concat(\".#{type}\") if type\n+ \t\t end\n+\n+ \t\t def list_installed_modules\n+ \t\t shell_out!('semodule --list-modules').stdout.split(\"\\n\").map { |x| x.split(/\\s/).first }\n+ \t\t end\n+ \t\tend\n+\n+ \t\taction :create do\n+ \t\t directory new_resource.base_dir\n+\n+ \t\t if property_is_set?(:content)\n+ \t\t file selinux_module_filepath('te') do\n+ \t\t content new_resource.content\n+\n+ \t\t mode '0600'\n+ \t\t owner 'root'\n+ \t\t group 'root'\n+\n+ \t\t action :create\n+\n+ \t\t notifies :run, \"execute[Compiling SELinux modules at '#{new_resource.base_dir}']\", :immediately\n+ \t\t end\n+ \t\t else\n+ \t\t cookbook_file selinux_module_filepath('te') do\n+ \t\t cookbook new_resource.cookbook\n+ \t\t source new_resource.source\n+\n+ \t\t mode '0600'\n+ \t\t owner 'root'\n+ \t\t group 'root'\n+\n+ \t\t action :create\n+\n+ \t\t notifies :run, \"execute[Compiling SELinux modules at '#{new_resource.base_dir}']\", :immediately\n+ \t\t end\n+ \t\t end\n+\n+ \t\t execute \"Compiling SELinux modules at '#{new_resource.base_dir}'\" do\n+ \t\t cwd new_resource.base_dir\n+ \t\t command \"make -C #{new_resource.base_dir} -f /usr/share/selinux/devel/Makefile\"\n+ \t\t timeout 120\n+ \t\t user 'root'\n+\n+ \t\t action :nothing\n+\n+ \t\t notifies :run, \"execute[Install SELinux module '#{selinux_module_filepath('pp')}']\", :immediately\n+ \t\t end\n+\n+ \t\t raise \"Compilation must have failed, no 'pp' file found at: '#{selinux_module_filepath('pp')}'\" unless ::File.exist?(selinux_module_filepath('pp'))\n+\n+ \t\t execute \"Install SELinux module '#{selinux_module_filepath('pp')}'\" do\n+ \t\t command \"semodule --install '#{selinux_module_filepath('pp')}'\"\n+ \t\t action :nothing\n+ \t\t end\n+ \t\tend\n+\n+ \t\taction :delete do\n+ \t\t %w(fc if pp te).each do |type|\n+ \t\t next unless ::File.exist?(selinux_module_filepath(type))\n+\n+ \t\t file selinux_module_filepath(type) do\n+ \t\t action :delete\n+ \t\t end\n+ \t\t end\n+ \t\tend\n+\n+ \t\taction :install do\n+ \t\t raise \"Module must be compiled before it can be installed, no 'pp' file found at: '#{selinux_module_filepath('pp')}'\" unless ::File.exist?(selinux_module_filepath('pp'))\n+\n+ \t\t unless list_installed_modules.include? new_resource.module_name\n+ \t\t converge_by \"Install SELinux module #{selinux_module_filepath('pp')}\" do\n+ \t\t shell_out!(\"semodule --install '#{selinux_module_filepath('pp')}'\")", - "comment_created_at": "2022-04-08T16:24:41+00:00", - "comment_author": "neha-p6", - "comment_body": "Updated for now few occurrences. (Not sure if we should be splitting ones with pipe operator) ", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "687989783", - "pr_number": 11905, - "pr_file": "lib/chef/provider/remote_file/http.rb", - "created_at": "2021-08-12T18:32:45+00:00", - "commented_code": "def http_client_opts\n opts = {}\n # For http request retry and retry_delay was coming from config (default values i.e. retry: 5, retry_delay: 2)\n # Added retries, retry_delay options to set \"retries\" and \"retry_delay\" properly\n opts[:retries] = new_resource.retries\n opts[:retry_delay] = new_resource.retry_delay", - "repo_full_name": "chef/chef", - "discussion_comments": [ - { - "comment_id": "687989783", - "repo_full_name": "chef/chef", - "pr_number": 11905, - "pr_file": "lib/chef/provider/remote_file/http.rb", - "discussion_id": "687989783", - "commented_code": "@@ -122,6 +122,10 @@ def etag_from(response)\n \n def http_client_opts\n opts = {}\n+ # For http request retry and retry_delay was coming from config (default values i.e. retry: 5, retry_delay: 2)\n+ # Added retries, retry_delay options to set \"retries\" and \"retry_delay\" properly\n+ opts[:retries] = new_resource.retries\n+ opts[:retry_delay] = new_resource.retry_delay", - "comment_created_at": "2021-08-12T18:32:45+00:00", - "comment_author": "lamont-granquist", - "comment_body": "instead of this there should be a `property :http_options` hash added to the remote_file resource and that should be merged with the defaults here in http_client_opts", - "pr_file_module": null - }, - { - "comment_id": "688368989", - "repo_full_name": "chef/chef", - "pr_number": 11905, - "pr_file": "lib/chef/provider/remote_file/http.rb", - "discussion_id": "687989783", - "commented_code": "@@ -122,6 +122,10 @@ def etag_from(response)\n \n def http_client_opts\n opts = {}\n+ # For http request retry and retry_delay was coming from config (default values i.e. retry: 5, retry_delay: 2)\n+ # Added retries, retry_delay options to set \"retries\" and \"retry_delay\" properly\n+ opts[:retries] = new_resource.retries\n+ opts[:retry_delay] = new_resource.retry_delay", - "comment_created_at": "2021-08-13T09:16:12+00:00", - "comment_author": "antima-gupta", - "comment_body": "Added `property :http_options` , please have look.", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/chef-standardize-bash-error-handling.json b/_reviewers/chef-standardize-bash-error-handling.json new file mode 100644 index 0000000..bc1448c --- /dev/null +++ b/_reviewers/chef-standardize-bash-error-handling.json @@ -0,0 +1,46 @@ +[ + { + "discussion_id": "2142594793", + "pr_number": 15049, + "pr_file": ".expeditor/build-docker-images.sh", + "created_at": "2025-06-12T12:21:24+00:00", + "commented_code": "#! /bin/bash\nset -eux -o pipefail", + "repo_full_name": "chef/chef", + "discussion_comments": [ + { + "comment_id": "2142594793", + "repo_full_name": "chef/chef", + "pr_number": 15049, + "pr_file": ".expeditor/build-docker-images.sh", + "discussion_id": "2142594793", + "commented_code": "@@ -1,6 +1,9 @@\n #! /bin/bash\n set -eux -o pipefail", + "comment_created_at": "2025-06-12T12:21:24+00:00", + "comment_author": "sean-sype-simmons", + "comment_body": "please update to: `set -eou pipefail` ", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2142595017", + "pr_number": 15049, + "pr_file": ".expeditor/docker-manifest-create.sh", + "created_at": "2025-06-12T12:21:31+00:00", + "commented_code": "set -eu -o pipefail", + "repo_full_name": "chef/chef", + "discussion_comments": [ + { + "comment_id": "2142595017", + "repo_full_name": "chef/chef", + "pr_number": 15049, + "pr_file": ".expeditor/docker-manifest-create.sh", + "discussion_id": "2142595017", + "commented_code": "@@ -2,6 +2,8 @@\n set -eu -o pipefail", + "comment_created_at": "2025-06-12T12:21:31+00:00", + "comment_author": "sean-sype-simmons", + "comment_body": "please update to: `set -eou pipefail` ", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/chef-standardize-bash-error-handling.md b/_reviewers/chef-standardize-bash-error-handling.md index fa74bed..843cb5f 100644 --- a/_reviewers/chef-standardize-bash-error-handling.md +++ b/_reviewers/chef-standardize-bash-error-handling.md @@ -20,51 +20,3 @@ set -eou pipefail # Rest of your script ``` - - -[ - { - "discussion_id": "2142594793", - "pr_number": 15049, - "pr_file": ".expeditor/build-docker-images.sh", - "created_at": "2025-06-12T12:21:24+00:00", - "commented_code": "#! /bin/bash\nset -eux -o pipefail", - "repo_full_name": "chef/chef", - "discussion_comments": [ - { - "comment_id": "2142594793", - "repo_full_name": "chef/chef", - "pr_number": 15049, - "pr_file": ".expeditor/build-docker-images.sh", - "discussion_id": "2142594793", - "commented_code": "@@ -1,6 +1,9 @@\n #! /bin/bash\n set -eux -o pipefail", - "comment_created_at": "2025-06-12T12:21:24+00:00", - "comment_author": "sean-sype-simmons", - "comment_body": "please update to: `set -eou pipefail` ", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2142595017", - "pr_number": 15049, - "pr_file": ".expeditor/docker-manifest-create.sh", - "created_at": "2025-06-12T12:21:31+00:00", - "commented_code": "set -eu -o pipefail", - "repo_full_name": "chef/chef", - "discussion_comments": [ - { - "comment_id": "2142595017", - "repo_full_name": "chef/chef", - "pr_number": 15049, - "pr_file": ".expeditor/docker-manifest-create.sh", - "discussion_id": "2142595017", - "commented_code": "@@ -2,6 +2,8 @@\n set -eu -o pipefail", - "comment_created_at": "2025-06-12T12:21:31+00:00", - "comment_author": "sean-sype-simmons", - "comment_body": "please update to: `set -eou pipefail` ", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/chef-standardize-installation-directory-paths.json b/_reviewers/chef-standardize-installation-directory-paths.json new file mode 100644 index 0000000..c7c39c1 --- /dev/null +++ b/_reviewers/chef-standardize-installation-directory-paths.json @@ -0,0 +1,186 @@ +[ + { + "discussion_id": "1928142368", + "pr_number": 14820, + "pr_file": "knife/Gemfile", + "created_at": "2025-01-24T06:18:23+00:00", + "commented_code": "gem \"crack\", \"< 0.4.6\" # due to https://github.com/jnunemaker/crack/pull/75\n gem \"rake\", \">= 12.3.3\"\n gem \"rspec\"\n gem \"chef-bin\", path: \"../chef-bin\"\n gem \"chef-bin\", \"~> 18.6\" #path: \"../chef-bin\"", + "repo_full_name": "chef/chef", + "discussion_comments": [ + { + "comment_id": "1928142368", + "repo_full_name": "chef/chef", + "pr_number": 14820, + "pr_file": "knife/Gemfile", + "discussion_id": "1928142368", + "commented_code": "@@ -8,7 +8,7 @@ group(:development, :test) do\n gem \"crack\", \"< 0.4.6\" # due to https://github.com/jnunemaker/crack/pull/75\n gem \"rake\", \">= 12.3.3\"\n gem \"rspec\"\n- gem \"chef-bin\", path: \"../chef-bin\"\n+ gem \"chef-bin\", \"~> 18.6\" #path: \"../chef-bin\"", + "comment_created_at": "2025-01-24T06:18:23+00:00", + "comment_author": "neha-p6", + "comment_body": "move this to gemspec as well.", + "pr_file_module": null + }, + { + "comment_id": "1928166319", + "repo_full_name": "chef/chef", + "pr_number": 14820, + "pr_file": "knife/Gemfile", + "discussion_id": "1928142368", + "commented_code": "@@ -8,7 +8,7 @@ group(:development, :test) do\n gem \"crack\", \"< 0.4.6\" # due to https://github.com/jnunemaker/crack/pull/75\n gem \"rake\", \">= 12.3.3\"\n gem \"rspec\"\n- gem \"chef-bin\", path: \"../chef-bin\"\n+ gem \"chef-bin\", \"~> 18.6\" #path: \"../chef-bin\"", + "comment_created_at": "2025-01-24T06:50:44+00:00", + "comment_author": "ashiqueps", + "comment_body": "Done", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1776206090", + "pr_number": 14605, + "pr_file": ".expeditor/scripts/infra-hab.spec", + "created_at": "2024-09-26T02:02:16+00:00", + "commented_code": "# Disable any shell actions, replace them with simply 'true'\n%define __spec_prep_post true\n%define __spec_prep_pre true\n%define __spec_build_post true\n%define __spec_build_pre true\n%define __spec_install_post true\n%define __spec_install_pre true\n%define __spec_clean_post true\n%define __spec_clean_pre true\n\n# Use SHA256 checksums for all files\n%define _binary_filedigest_algorithm 8\n\n%define _binary_payload w1.xzdio\n\n# Disable creation of build-id links\n%define _build_id_links none\n\n# Metadata\nName: chef\nVersion: %{VERSION}\nRelease: 1%{?dist}\nSummary: The full stack of chef\nAutoReqProv: no\nBuildRoot: %buildroot\nPrefix: /\nGroup: default\nLicense: Chef EULA\nVendor: Omnibus \nURL: https://www.chef.io\nPackager: Chef Software, Inc. \n%description\nThe full stack of chef\n\n%prep\n# noop\n\n%build\n# noop\n\n%install\n# noop\n\n%clean\n# noop\n\n%pre\n#!/bin/sh\n#\n# Perform necessary inspec setup steps\n# before package is installed.\n#\n\necho \"You're about to install Chef Infra Client!\"\n\n%post\n#!/bin/sh\n# WARNING: REQUIRES /bin/sh\n#\n# - must run on /bin/sh on solaris 9\n# - must run on /bin/sh on AIX 6.x\n# - this file is sh not bash so do not introduce bash-isms\n# - if you are under 40, get peer review from your elders.\n#\n# Install Chef Infra Client\n#\n\n# Create wrapper binaries into /hab/chef directory\nmkdir -p /hab/chef/bin", + "repo_full_name": "chef/chef", + "discussion_comments": [ + { + "comment_id": "1776206090", + "repo_full_name": "chef/chef", + "pr_number": 14605, + "pr_file": ".expeditor/scripts/infra-hab.spec", + "discussion_id": "1776206090", + "commented_code": "@@ -0,0 +1,243 @@\n+# Disable any shell actions, replace them with simply 'true'\n+%define __spec_prep_post true\n+%define __spec_prep_pre true\n+%define __spec_build_post true\n+%define __spec_build_pre true\n+%define __spec_install_post true\n+%define __spec_install_pre true\n+%define __spec_clean_post true\n+%define __spec_clean_pre true\n+\n+# Use SHA256 checksums for all files\n+%define _binary_filedigest_algorithm 8\n+\n+%define _binary_payload w1.xzdio\n+\n+# Disable creation of build-id links\n+%define _build_id_links none\n+\n+# Metadata\n+Name: chef\n+Version: %{VERSION}\n+Release: 1%{?dist}\n+Summary: The full stack of chef\n+AutoReqProv: no\n+BuildRoot: %buildroot\n+Prefix: /\n+Group: default\n+License: Chef EULA\n+Vendor: Omnibus \n+URL: https://www.chef.io\n+Packager: Chef Software, Inc. \n+%description\n+The full stack of chef\n+\n+%prep\n+# noop\n+\n+%build\n+# noop\n+\n+%install\n+# noop\n+\n+%clean\n+# noop\n+\n+%pre\n+#!/bin/sh\n+#\n+# Perform necessary inspec setup steps\n+# before package is installed.\n+#\n+\n+echo \"You're about to install Chef Infra Client!\"\n+\n+%post\n+#!/bin/sh\n+# WARNING: REQUIRES /bin/sh\n+#\n+# - must run on /bin/sh on solaris 9\n+# - must run on /bin/sh on AIX 6.x\n+# - this file is sh not bash so do not introduce bash-isms\n+# - if you are under 40, get peer review from your elders.\n+#\n+# Install Chef Infra Client\n+#\n+\n+# Create wrapper binaries into /hab/chef directory\n+mkdir -p /hab/chef/bin", + "comment_created_at": "2024-09-26T02:02:16+00:00", + "comment_author": "mwrock", + "comment_body": "I think we want to put these in `/opt/chef` or wherever they exist today.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1907297240", + "pr_number": 14772, + "pr_file": ".expeditor/scripts/chef-infra-client.spec", + "created_at": "2025-01-08T14:45:22+00:00", + "commented_code": "# Disable any shell actions, replace them with simply 'true'\n%define __spec_prep_post true\n%define __spec_prep_pre true\n%define __spec_build_post true\n%define __spec_build_pre true\n%define __spec_install_post true\n%define __spec_install_pre true\n%define __spec_clean_post true\n%define __spec_clean_pre true\n\n# Use SHA256 checksums for all files\n%define _binary_filedigest_algorithm 8\n\n%define _binary_payload w1.xzdio\n\n# Disable creation of build-id links\n%define _build_id_links none\n\nName: chef-infra-client\nVersion: %{VERSION}\nRelease: 1%{?dist}\nSummary: The full stack of Chef Infra Client\nAutoReqProv: \tno\nBuildRoot: \t %buildroot\nPrefix: \t /\nGroup: \t\t default\nLicense: Chef EULA\nURL: \t\t https://www.chef.io\nPackager: \t Chef Software, Inc. \nSource0: %{CHEF_INFRA_TAR}\nSource1: %{CHEF_MIGRATE_TAR}\n\n%description\nThe full stack of Chef Infra Client\n\n%prep\n# noop\n\n%build\n# noop\n\n%install\n# Create the installation directory for the migration tools\n#mkdir -p %{buildroot}/opt/chef\nmkdir -p %{buildroot}/opt/chef/{bin,bundle}\n\n# Untar the migration tools into /opt/chef/bin\ntar -xf %{SOURCE1} -C %{buildroot}/opt/chef/bin\n\n# Copy the chef infra tarball into /opt/chef/bundle\ncp %{SOURCE0} %{buildroot}/opt/chef/bundle/\n\n%files\n/opt/chef\n/opt/chef/bin", + "repo_full_name": "chef/chef", + "discussion_comments": [ + { + "comment_id": "1907297240", + "repo_full_name": "chef/chef", + "pr_number": 14772, + "pr_file": ".expeditor/scripts/chef-infra-client.spec", + "discussion_id": "1907297240", + "commented_code": "@@ -0,0 +1,89 @@\n+# Disable any shell actions, replace them with simply 'true'\n+%define __spec_prep_post true\n+%define __spec_prep_pre true\n+%define __spec_build_post true\n+%define __spec_build_pre true\n+%define __spec_install_post true\n+%define __spec_install_pre true\n+%define __spec_clean_post true\n+%define __spec_clean_pre true\n+\n+# Use SHA256 checksums for all files\n+%define _binary_filedigest_algorithm 8\n+\n+%define _binary_payload w1.xzdio\n+\n+# Disable creation of build-id links\n+%define _build_id_links none\n+\n+Name: chef-infra-client\n+Version: %{VERSION}\n+Release: 1%{?dist}\n+Summary: The full stack of Chef Infra Client\n+AutoReqProv: \tno\n+BuildRoot: \t %buildroot\n+Prefix: \t /\n+Group: \t\t default\n+License: Chef EULA\n+URL: \t\t https://www.chef.io\n+Packager: \t Chef Software, Inc. \n+Source0: %{CHEF_INFRA_TAR}\n+Source1: %{CHEF_MIGRATE_TAR}\n+\n+%description\n+The full stack of Chef Infra Client\n+\n+%prep\n+# noop\n+\n+%build\n+# noop\n+\n+%install\n+# Create the installation directory for the migration tools\n+#mkdir -p %{buildroot}/opt/chef\n+mkdir -p %{buildroot}/opt/chef/{bin,bundle}\n+\n+# Untar the migration tools into /opt/chef/bin\n+tar -xf %{SOURCE1} -C %{buildroot}/opt/chef/bin\n+\n+# Copy the chef infra tarball into /opt/chef/bundle\n+cp %{SOURCE0} %{buildroot}/opt/chef/bundle/\n+\n+%files\n+/opt/chef\n+/opt/chef/bin", + "comment_created_at": "2025-01-08T14:45:22+00:00", + "comment_author": "marcparadise", + "comment_body": "Because /opt/chef/bin is already owned by the chef-infra package, we probably don't want to install anything here. \r\nCould we instead use a different directory, something like '/opt/chef/migration' or even include it in '/opt/chef/bundle\"? ", + "pr_file_module": null + }, + { + "comment_id": "1907382449", + "repo_full_name": "chef/chef", + "pr_number": 14772, + "pr_file": ".expeditor/scripts/chef-infra-client.spec", + "discussion_id": "1907297240", + "commented_code": "@@ -0,0 +1,89 @@\n+# Disable any shell actions, replace them with simply 'true'\n+%define __spec_prep_post true\n+%define __spec_prep_pre true\n+%define __spec_build_post true\n+%define __spec_build_pre true\n+%define __spec_install_post true\n+%define __spec_install_pre true\n+%define __spec_clean_post true\n+%define __spec_clean_pre true\n+\n+# Use SHA256 checksums for all files\n+%define _binary_filedigest_algorithm 8\n+\n+%define _binary_payload w1.xzdio\n+\n+# Disable creation of build-id links\n+%define _build_id_links none\n+\n+Name: chef-infra-client\n+Version: %{VERSION}\n+Release: 1%{?dist}\n+Summary: The full stack of Chef Infra Client\n+AutoReqProv: \tno\n+BuildRoot: \t %buildroot\n+Prefix: \t /\n+Group: \t\t default\n+License: Chef EULA\n+URL: \t\t https://www.chef.io\n+Packager: \t Chef Software, Inc. \n+Source0: %{CHEF_INFRA_TAR}\n+Source1: %{CHEF_MIGRATE_TAR}\n+\n+%description\n+The full stack of Chef Infra Client\n+\n+%prep\n+# noop\n+\n+%build\n+# noop\n+\n+%install\n+# Create the installation directory for the migration tools\n+#mkdir -p %{buildroot}/opt/chef\n+mkdir -p %{buildroot}/opt/chef/{bin,bundle}\n+\n+# Untar the migration tools into /opt/chef/bin\n+tar -xf %{SOURCE1} -C %{buildroot}/opt/chef/bin\n+\n+# Copy the chef infra tarball into /opt/chef/bundle\n+cp %{SOURCE0} %{buildroot}/opt/chef/bundle/\n+\n+%files\n+/opt/chef\n+/opt/chef/bin", + "comment_created_at": "2025-01-08T15:34:21+00:00", + "comment_author": "sajjaphani", + "comment_body": "Would it be a good idea to unpack the migration tool into /opt/chef/migration/bin instead?", + "pr_file_module": null + }, + { + "comment_id": "1907395853", + "repo_full_name": "chef/chef", + "pr_number": 14772, + "pr_file": ".expeditor/scripts/chef-infra-client.spec", + "discussion_id": "1907297240", + "commented_code": "@@ -0,0 +1,89 @@\n+# Disable any shell actions, replace them with simply 'true'\n+%define __spec_prep_post true\n+%define __spec_prep_pre true\n+%define __spec_build_post true\n+%define __spec_build_pre true\n+%define __spec_install_post true\n+%define __spec_install_pre true\n+%define __spec_clean_post true\n+%define __spec_clean_pre true\n+\n+# Use SHA256 checksums for all files\n+%define _binary_filedigest_algorithm 8\n+\n+%define _binary_payload w1.xzdio\n+\n+# Disable creation of build-id links\n+%define _build_id_links none\n+\n+Name: chef-infra-client\n+Version: %{VERSION}\n+Release: 1%{?dist}\n+Summary: The full stack of Chef Infra Client\n+AutoReqProv: \tno\n+BuildRoot: \t %buildroot\n+Prefix: \t /\n+Group: \t\t default\n+License: Chef EULA\n+URL: \t\t https://www.chef.io\n+Packager: \t Chef Software, Inc. \n+Source0: %{CHEF_INFRA_TAR}\n+Source1: %{CHEF_MIGRATE_TAR}\n+\n+%description\n+The full stack of Chef Infra Client\n+\n+%prep\n+# noop\n+\n+%build\n+# noop\n+\n+%install\n+# Create the installation directory for the migration tools\n+#mkdir -p %{buildroot}/opt/chef\n+mkdir -p %{buildroot}/opt/chef/{bin,bundle}\n+\n+# Untar the migration tools into /opt/chef/bin\n+tar -xf %{SOURCE1} -C %{buildroot}/opt/chef/bin\n+\n+# Copy the chef infra tarball into /opt/chef/bundle\n+cp %{SOURCE0} %{buildroot}/opt/chef/bundle/\n+\n+%files\n+/opt/chef\n+/opt/chef/bin", + "comment_created_at": "2025-01-08T15:42:48+00:00", + "comment_author": "Stromweld", + "comment_body": "since new install lives in /hab why aren't we using that directory and in the end there shouldn't be any /opt/chef anything unless using omnibus version. That'll make it easier for customers to understand anything related to hab builds lives in /hab/somewhere and omnibus in /opt/chef instead of 1 tool over here and all others over there. This also then makes Cookbook checks easy to accommodate both by simply checking for the existence of /opt/chef.", + "pr_file_module": null + }, + { + "comment_id": "1908191760", + "repo_full_name": "chef/chef", + "pr_number": 14772, + "pr_file": ".expeditor/scripts/chef-infra-client.spec", + "discussion_id": "1907297240", + "commented_code": "@@ -0,0 +1,89 @@\n+# Disable any shell actions, replace them with simply 'true'\n+%define __spec_prep_post true\n+%define __spec_prep_pre true\n+%define __spec_build_post true\n+%define __spec_build_pre true\n+%define __spec_install_post true\n+%define __spec_install_pre true\n+%define __spec_clean_post true\n+%define __spec_clean_pre true\n+\n+# Use SHA256 checksums for all files\n+%define _binary_filedigest_algorithm 8\n+\n+%define _binary_payload w1.xzdio\n+\n+# Disable creation of build-id links\n+%define _build_id_links none\n+\n+Name: chef-infra-client\n+Version: %{VERSION}\n+Release: 1%{?dist}\n+Summary: The full stack of Chef Infra Client\n+AutoReqProv: \tno\n+BuildRoot: \t %buildroot\n+Prefix: \t /\n+Group: \t\t default\n+License: Chef EULA\n+URL: \t\t https://www.chef.io\n+Packager: \t Chef Software, Inc. \n+Source0: %{CHEF_INFRA_TAR}\n+Source1: %{CHEF_MIGRATE_TAR}\n+\n+%description\n+The full stack of Chef Infra Client\n+\n+%prep\n+# noop\n+\n+%build\n+# noop\n+\n+%install\n+# Create the installation directory for the migration tools\n+#mkdir -p %{buildroot}/opt/chef\n+mkdir -p %{buildroot}/opt/chef/{bin,bundle}\n+\n+# Untar the migration tools into /opt/chef/bin\n+tar -xf %{SOURCE1} -C %{buildroot}/opt/chef/bin\n+\n+# Copy the chef infra tarball into /opt/chef/bundle\n+cp %{SOURCE0} %{buildroot}/opt/chef/bundle/\n+\n+%files\n+/opt/chef\n+/opt/chef/bin", + "comment_created_at": "2025-01-09T05:34:40+00:00", + "comment_author": "sajjaphani", + "comment_body": "@marcparadise @Stromweld It appears that using /opt/chef or /hab may have similar drawbacks, as it could interfere with their structures. Could we use a separate directory, /opt/chef-migrate, instead, where bin would hold the binary of the migration tool and bundle would hold the tar file?", + "pr_file_module": null + }, + { + "comment_id": "1917826273", + "repo_full_name": "chef/chef", + "pr_number": 14772, + "pr_file": ".expeditor/scripts/chef-infra-client.spec", + "discussion_id": "1907297240", + "commented_code": "@@ -0,0 +1,89 @@\n+# Disable any shell actions, replace them with simply 'true'\n+%define __spec_prep_post true\n+%define __spec_prep_pre true\n+%define __spec_build_post true\n+%define __spec_build_pre true\n+%define __spec_install_post true\n+%define __spec_install_pre true\n+%define __spec_clean_post true\n+%define __spec_clean_pre true\n+\n+# Use SHA256 checksums for all files\n+%define _binary_filedigest_algorithm 8\n+\n+%define _binary_payload w1.xzdio\n+\n+# Disable creation of build-id links\n+%define _build_id_links none\n+\n+Name: chef-infra-client\n+Version: %{VERSION}\n+Release: 1%{?dist}\n+Summary: The full stack of Chef Infra Client\n+AutoReqProv: \tno\n+BuildRoot: \t %buildroot\n+Prefix: \t /\n+Group: \t\t default\n+License: Chef EULA\n+URL: \t\t https://www.chef.io\n+Packager: \t Chef Software, Inc. \n+Source0: %{CHEF_INFRA_TAR}\n+Source1: %{CHEF_MIGRATE_TAR}\n+\n+%description\n+The full stack of Chef Infra Client\n+\n+%prep\n+# noop\n+\n+%build\n+# noop\n+\n+%install\n+# Create the installation directory for the migration tools\n+#mkdir -p %{buildroot}/opt/chef\n+mkdir -p %{buildroot}/opt/chef/{bin,bundle}\n+\n+# Untar the migration tools into /opt/chef/bin\n+tar -xf %{SOURCE1} -C %{buildroot}/opt/chef/bin\n+\n+# Copy the chef infra tarball into /opt/chef/bundle\n+cp %{SOURCE0} %{buildroot}/opt/chef/bundle/\n+\n+%files\n+/opt/chef\n+/opt/chef/bin", + "comment_created_at": "2025-01-16T06:41:20+00:00", + "comment_author": "sajjaphani", + "comment_body": "Changed the directory to place the migration binary and bundle into `/hab/migration`. The binary is now located in `/hab/migration/bin`, and the infra tarball is stored in `/hab/migration/bundle`.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1907298683", + "pr_number": 14772, + "pr_file": ".expeditor/scripts/chef-infra-client.spec", + "created_at": "2025-01-08T14:46:21+00:00", + "commented_code": "# Disable any shell actions, replace them with simply 'true'\n%define __spec_prep_post true\n%define __spec_prep_pre true\n%define __spec_build_post true\n%define __spec_build_pre true\n%define __spec_install_post true\n%define __spec_install_pre true\n%define __spec_clean_post true\n%define __spec_clean_pre true\n\n# Use SHA256 checksums for all files\n%define _binary_filedigest_algorithm 8\n\n%define _binary_payload w1.xzdio\n\n# Disable creation of build-id links\n%define _build_id_links none\n\nName: chef-infra-client\nVersion: %{VERSION}\nRelease: 1%{?dist}\nSummary: The full stack of Chef Infra Client\nAutoReqProv: \tno\nBuildRoot: \t %buildroot\nPrefix: \t /\nGroup: \t\t default\nLicense: Chef EULA\nURL: \t\t https://www.chef.io\nPackager: \t Chef Software, Inc. \nSource0: %{CHEF_INFRA_TAR}\nSource1: %{CHEF_MIGRATE_TAR}\n\n%description\nThe full stack of Chef Infra Client\n\n%prep\n# noop\n\n%build\n# noop\n\n%install\n# Create the installation directory for the migration tools\n#mkdir -p %{buildroot}/opt/chef\nmkdir -p %{buildroot}/opt/chef/{bin,bundle}\n\n# Untar the migration tools into /opt/chef/bin\ntar -xf %{SOURCE1} -C %{buildroot}/opt/chef/bin\n\n# Copy the chef infra tarball into /opt/chef/bundle\ncp %{SOURCE0} %{buildroot}/opt/chef/bundle/\n\n%files\n/opt/chef\n/opt/chef/bin\n/opt/chef/bin/*\n/opt/chef/bundle\n/opt/chef/bundle/*\n\n%post\n\n# Determine if --fresh_install needs to be passed based on the existence of the /opt/chef directory\nMIGRATE_CMD=\"/opt/chef/bin/chef-migrate apply airgap\"\nif [ ! -f /opt/chef/bin/chef-client ]; then", + "repo_full_name": "chef/chef", + "discussion_comments": [ + { + "comment_id": "1907298683", + "repo_full_name": "chef/chef", + "pr_number": 14772, + "pr_file": ".expeditor/scripts/chef-infra-client.spec", + "discussion_id": "1907298683", + "commented_code": "@@ -0,0 +1,89 @@\n+# Disable any shell actions, replace them with simply 'true'\n+%define __spec_prep_post true\n+%define __spec_prep_pre true\n+%define __spec_build_post true\n+%define __spec_build_pre true\n+%define __spec_install_post true\n+%define __spec_install_pre true\n+%define __spec_clean_post true\n+%define __spec_clean_pre true\n+\n+# Use SHA256 checksums for all files\n+%define _binary_filedigest_algorithm 8\n+\n+%define _binary_payload w1.xzdio\n+\n+# Disable creation of build-id links\n+%define _build_id_links none\n+\n+Name: chef-infra-client\n+Version: %{VERSION}\n+Release: 1%{?dist}\n+Summary: The full stack of Chef Infra Client\n+AutoReqProv: \tno\n+BuildRoot: \t %buildroot\n+Prefix: \t /\n+Group: \t\t default\n+License: Chef EULA\n+URL: \t\t https://www.chef.io\n+Packager: \t Chef Software, Inc. \n+Source0: %{CHEF_INFRA_TAR}\n+Source1: %{CHEF_MIGRATE_TAR}\n+\n+%description\n+The full stack of Chef Infra Client\n+\n+%prep\n+# noop\n+\n+%build\n+# noop\n+\n+%install\n+# Create the installation directory for the migration tools\n+#mkdir -p %{buildroot}/opt/chef\n+mkdir -p %{buildroot}/opt/chef/{bin,bundle}\n+\n+# Untar the migration tools into /opt/chef/bin\n+tar -xf %{SOURCE1} -C %{buildroot}/opt/chef/bin\n+\n+# Copy the chef infra tarball into /opt/chef/bundle\n+cp %{SOURCE0} %{buildroot}/opt/chef/bundle/\n+\n+%files\n+/opt/chef\n+/opt/chef/bin\n+/opt/chef/bin/*\n+/opt/chef/bundle\n+/opt/chef/bundle/*\n+\n+%post\n+\n+# Determine if --fresh_install needs to be passed based on the existence of the /opt/chef directory\n+MIGRATE_CMD=\"/opt/chef/bin/chef-migrate apply airgap\"\n+if [ ! -f /opt/chef/bin/chef-client ]; then", + "comment_created_at": "2025-01-08T14:46:21+00:00", + "comment_author": "marcparadise", + "comment_body": "/opt/chef/bin/chef-client may not exist, but chef-client could still be installed on the system and available int he path via workstation. In that case, is 'fresh-install' still correct? ", + "pr_file_module": null + }, + { + "comment_id": "1907375809", + "repo_full_name": "chef/chef", + "pr_number": 14772, + "pr_file": ".expeditor/scripts/chef-infra-client.spec", + "discussion_id": "1907298683", + "commented_code": "@@ -0,0 +1,89 @@\n+# Disable any shell actions, replace them with simply 'true'\n+%define __spec_prep_post true\n+%define __spec_prep_pre true\n+%define __spec_build_post true\n+%define __spec_build_pre true\n+%define __spec_install_post true\n+%define __spec_install_pre true\n+%define __spec_clean_post true\n+%define __spec_clean_pre true\n+\n+# Use SHA256 checksums for all files\n+%define _binary_filedigest_algorithm 8\n+\n+%define _binary_payload w1.xzdio\n+\n+# Disable creation of build-id links\n+%define _build_id_links none\n+\n+Name: chef-infra-client\n+Version: %{VERSION}\n+Release: 1%{?dist}\n+Summary: The full stack of Chef Infra Client\n+AutoReqProv: \tno\n+BuildRoot: \t %buildroot\n+Prefix: \t /\n+Group: \t\t default\n+License: Chef EULA\n+URL: \t\t https://www.chef.io\n+Packager: \t Chef Software, Inc. \n+Source0: %{CHEF_INFRA_TAR}\n+Source1: %{CHEF_MIGRATE_TAR}\n+\n+%description\n+The full stack of Chef Infra Client\n+\n+%prep\n+# noop\n+\n+%build\n+# noop\n+\n+%install\n+# Create the installation directory for the migration tools\n+#mkdir -p %{buildroot}/opt/chef\n+mkdir -p %{buildroot}/opt/chef/{bin,bundle}\n+\n+# Untar the migration tools into /opt/chef/bin\n+tar -xf %{SOURCE1} -C %{buildroot}/opt/chef/bin\n+\n+# Copy the chef infra tarball into /opt/chef/bundle\n+cp %{SOURCE0} %{buildroot}/opt/chef/bundle/\n+\n+%files\n+/opt/chef\n+/opt/chef/bin\n+/opt/chef/bin/*\n+/opt/chef/bundle\n+/opt/chef/bundle/*\n+\n+%post\n+\n+# Determine if --fresh_install needs to be passed based on the existence of the /opt/chef directory\n+MIGRATE_CMD=\"/opt/chef/bin/chef-migrate apply airgap\"\n+if [ ! -f /opt/chef/bin/chef-client ]; then", + "comment_created_at": "2025-01-08T15:29:59+00:00", + "comment_author": "sajjaphani", + "comment_body": "The migration tool has an option to uninstall the Omnibus installation during migration. However, I am not sure whether it accounts for the Workstation. Therefore, I am specifically checking the Omnibus installation here", + "pr_file_module": null + }, + { + "comment_id": "1917069354", + "repo_full_name": "chef/chef", + "pr_number": 14772, + "pr_file": ".expeditor/scripts/chef-infra-client.spec", + "discussion_id": "1907298683", + "commented_code": "@@ -0,0 +1,89 @@\n+# Disable any shell actions, replace them with simply 'true'\n+%define __spec_prep_post true\n+%define __spec_prep_pre true\n+%define __spec_build_post true\n+%define __spec_build_pre true\n+%define __spec_install_post true\n+%define __spec_install_pre true\n+%define __spec_clean_post true\n+%define __spec_clean_pre true\n+\n+# Use SHA256 checksums for all files\n+%define _binary_filedigest_algorithm 8\n+\n+%define _binary_payload w1.xzdio\n+\n+# Disable creation of build-id links\n+%define _build_id_links none\n+\n+Name: chef-infra-client\n+Version: %{VERSION}\n+Release: 1%{?dist}\n+Summary: The full stack of Chef Infra Client\n+AutoReqProv: \tno\n+BuildRoot: \t %buildroot\n+Prefix: \t /\n+Group: \t\t default\n+License: Chef EULA\n+URL: \t\t https://www.chef.io\n+Packager: \t Chef Software, Inc. \n+Source0: %{CHEF_INFRA_TAR}\n+Source1: %{CHEF_MIGRATE_TAR}\n+\n+%description\n+The full stack of Chef Infra Client\n+\n+%prep\n+# noop\n+\n+%build\n+# noop\n+\n+%install\n+# Create the installation directory for the migration tools\n+#mkdir -p %{buildroot}/opt/chef\n+mkdir -p %{buildroot}/opt/chef/{bin,bundle}\n+\n+# Untar the migration tools into /opt/chef/bin\n+tar -xf %{SOURCE1} -C %{buildroot}/opt/chef/bin\n+\n+# Copy the chef infra tarball into /opt/chef/bundle\n+cp %{SOURCE0} %{buildroot}/opt/chef/bundle/\n+\n+%files\n+/opt/chef\n+/opt/chef/bin\n+/opt/chef/bin/*\n+/opt/chef/bundle\n+/opt/chef/bundle/*\n+\n+%post\n+\n+# Determine if --fresh_install needs to be passed based on the existence of the /opt/chef directory\n+MIGRATE_CMD=\"/opt/chef/bin/chef-migrate apply airgap\"\n+if [ ! -f /opt/chef/bin/chef-client ]; then", + "comment_created_at": "2025-01-15T17:25:10+00:00", + "comment_author": "marcparadise", + "comment_body": "Let's instead look at declaring a package-level conflict, so that we can't install infra-19 when chef-workstation 'legacy' is installed. \r\nThat will let us avoid the entire class of problems that arise trying to manually manage conflicting files across packages. \r\n\r\nThis can be better supported once workstation itself is released with habitat. ", + "pr_file_module": null + }, + { + "comment_id": "1917787320", + "repo_full_name": "chef/chef", + "pr_number": 14772, + "pr_file": ".expeditor/scripts/chef-infra-client.spec", + "discussion_id": "1907298683", + "commented_code": "@@ -0,0 +1,89 @@\n+# Disable any shell actions, replace them with simply 'true'\n+%define __spec_prep_post true\n+%define __spec_prep_pre true\n+%define __spec_build_post true\n+%define __spec_build_pre true\n+%define __spec_install_post true\n+%define __spec_install_pre true\n+%define __spec_clean_post true\n+%define __spec_clean_pre true\n+\n+# Use SHA256 checksums for all files\n+%define _binary_filedigest_algorithm 8\n+\n+%define _binary_payload w1.xzdio\n+\n+# Disable creation of build-id links\n+%define _build_id_links none\n+\n+Name: chef-infra-client\n+Version: %{VERSION}\n+Release: 1%{?dist}\n+Summary: The full stack of Chef Infra Client\n+AutoReqProv: \tno\n+BuildRoot: \t %buildroot\n+Prefix: \t /\n+Group: \t\t default\n+License: Chef EULA\n+URL: \t\t https://www.chef.io\n+Packager: \t Chef Software, Inc. \n+Source0: %{CHEF_INFRA_TAR}\n+Source1: %{CHEF_MIGRATE_TAR}\n+\n+%description\n+The full stack of Chef Infra Client\n+\n+%prep\n+# noop\n+\n+%build\n+# noop\n+\n+%install\n+# Create the installation directory for the migration tools\n+#mkdir -p %{buildroot}/opt/chef\n+mkdir -p %{buildroot}/opt/chef/{bin,bundle}\n+\n+# Untar the migration tools into /opt/chef/bin\n+tar -xf %{SOURCE1} -C %{buildroot}/opt/chef/bin\n+\n+# Copy the chef infra tarball into /opt/chef/bundle\n+cp %{SOURCE0} %{buildroot}/opt/chef/bundle/\n+\n+%files\n+/opt/chef\n+/opt/chef/bin\n+/opt/chef/bin/*\n+/opt/chef/bundle\n+/opt/chef/bundle/*\n+\n+%post\n+\n+# Determine if --fresh_install needs to be passed based on the existence of the /opt/chef directory\n+MIGRATE_CMD=\"/opt/chef/bin/chef-migrate apply airgap\"\n+if [ ! -f /opt/chef/bin/chef-client ]; then", + "comment_created_at": "2025-01-16T05:52:15+00:00", + "comment_author": "sajjaphani", + "comment_body": "Prevents to proceed if chef-workstation is installed: https://github.com/chef/chef/pull/14772/commits/10c403f58b4e4d08c26484cb5b461cae0b7e2983", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/chef-standardize-installation-directory-paths.md b/_reviewers/chef-standardize-installation-directory-paths.md index e433b8a..8d2ed0b 100644 --- a/_reviewers/chef-standardize-installation-directory-paths.md +++ b/_reviewers/chef-standardize-installation-directory-paths.md @@ -39,191 +39,3 @@ This separation enables: - Easier cookbook checks and compatibility verification - Simplified upgrade paths - Prevention of file conflicts between different versions - - -[ - { - "discussion_id": "1928142368", - "pr_number": 14820, - "pr_file": "knife/Gemfile", - "created_at": "2025-01-24T06:18:23+00:00", - "commented_code": "gem \"crack\", \"< 0.4.6\" # due to https://github.com/jnunemaker/crack/pull/75\n gem \"rake\", \">= 12.3.3\"\n gem \"rspec\"\n gem \"chef-bin\", path: \"../chef-bin\"\n gem \"chef-bin\", \"~> 18.6\" #path: \"../chef-bin\"", - "repo_full_name": "chef/chef", - "discussion_comments": [ - { - "comment_id": "1928142368", - "repo_full_name": "chef/chef", - "pr_number": 14820, - "pr_file": "knife/Gemfile", - "discussion_id": "1928142368", - "commented_code": "@@ -8,7 +8,7 @@ group(:development, :test) do\n gem \"crack\", \"< 0.4.6\" # due to https://github.com/jnunemaker/crack/pull/75\n gem \"rake\", \">= 12.3.3\"\n gem \"rspec\"\n- gem \"chef-bin\", path: \"../chef-bin\"\n+ gem \"chef-bin\", \"~> 18.6\" #path: \"../chef-bin\"", - "comment_created_at": "2025-01-24T06:18:23+00:00", - "comment_author": "neha-p6", - "comment_body": "move this to gemspec as well.", - "pr_file_module": null - }, - { - "comment_id": "1928166319", - "repo_full_name": "chef/chef", - "pr_number": 14820, - "pr_file": "knife/Gemfile", - "discussion_id": "1928142368", - "commented_code": "@@ -8,7 +8,7 @@ group(:development, :test) do\n gem \"crack\", \"< 0.4.6\" # due to https://github.com/jnunemaker/crack/pull/75\n gem \"rake\", \">= 12.3.3\"\n gem \"rspec\"\n- gem \"chef-bin\", path: \"../chef-bin\"\n+ gem \"chef-bin\", \"~> 18.6\" #path: \"../chef-bin\"", - "comment_created_at": "2025-01-24T06:50:44+00:00", - "comment_author": "ashiqueps", - "comment_body": "Done", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1776206090", - "pr_number": 14605, - "pr_file": ".expeditor/scripts/infra-hab.spec", - "created_at": "2024-09-26T02:02:16+00:00", - "commented_code": "# Disable any shell actions, replace them with simply 'true'\n%define __spec_prep_post true\n%define __spec_prep_pre true\n%define __spec_build_post true\n%define __spec_build_pre true\n%define __spec_install_post true\n%define __spec_install_pre true\n%define __spec_clean_post true\n%define __spec_clean_pre true\n\n# Use SHA256 checksums for all files\n%define _binary_filedigest_algorithm 8\n\n%define _binary_payload w1.xzdio\n\n# Disable creation of build-id links\n%define _build_id_links none\n\n# Metadata\nName: chef\nVersion: %{VERSION}\nRelease: 1%{?dist}\nSummary: The full stack of chef\nAutoReqProv: no\nBuildRoot: %buildroot\nPrefix: /\nGroup: default\nLicense: Chef EULA\nVendor: Omnibus \nURL: https://www.chef.io\nPackager: Chef Software, Inc. \n%description\nThe full stack of chef\n\n%prep\n# noop\n\n%build\n# noop\n\n%install\n# noop\n\n%clean\n# noop\n\n%pre\n#!/bin/sh\n#\n# Perform necessary inspec setup steps\n# before package is installed.\n#\n\necho \"You're about to install Chef Infra Client!\"\n\n%post\n#!/bin/sh\n# WARNING: REQUIRES /bin/sh\n#\n# - must run on /bin/sh on solaris 9\n# - must run on /bin/sh on AIX 6.x\n# - this file is sh not bash so do not introduce bash-isms\n# - if you are under 40, get peer review from your elders.\n#\n# Install Chef Infra Client\n#\n\n# Create wrapper binaries into /hab/chef directory\nmkdir -p /hab/chef/bin", - "repo_full_name": "chef/chef", - "discussion_comments": [ - { - "comment_id": "1776206090", - "repo_full_name": "chef/chef", - "pr_number": 14605, - "pr_file": ".expeditor/scripts/infra-hab.spec", - "discussion_id": "1776206090", - "commented_code": "@@ -0,0 +1,243 @@\n+# Disable any shell actions, replace them with simply 'true'\n+%define __spec_prep_post true\n+%define __spec_prep_pre true\n+%define __spec_build_post true\n+%define __spec_build_pre true\n+%define __spec_install_post true\n+%define __spec_install_pre true\n+%define __spec_clean_post true\n+%define __spec_clean_pre true\n+\n+# Use SHA256 checksums for all files\n+%define _binary_filedigest_algorithm 8\n+\n+%define _binary_payload w1.xzdio\n+\n+# Disable creation of build-id links\n+%define _build_id_links none\n+\n+# Metadata\n+Name: chef\n+Version: %{VERSION}\n+Release: 1%{?dist}\n+Summary: The full stack of chef\n+AutoReqProv: no\n+BuildRoot: %buildroot\n+Prefix: /\n+Group: default\n+License: Chef EULA\n+Vendor: Omnibus \n+URL: https://www.chef.io\n+Packager: Chef Software, Inc. \n+%description\n+The full stack of chef\n+\n+%prep\n+# noop\n+\n+%build\n+# noop\n+\n+%install\n+# noop\n+\n+%clean\n+# noop\n+\n+%pre\n+#!/bin/sh\n+#\n+# Perform necessary inspec setup steps\n+# before package is installed.\n+#\n+\n+echo \"You're about to install Chef Infra Client!\"\n+\n+%post\n+#!/bin/sh\n+# WARNING: REQUIRES /bin/sh\n+#\n+# - must run on /bin/sh on solaris 9\n+# - must run on /bin/sh on AIX 6.x\n+# - this file is sh not bash so do not introduce bash-isms\n+# - if you are under 40, get peer review from your elders.\n+#\n+# Install Chef Infra Client\n+#\n+\n+# Create wrapper binaries into /hab/chef directory\n+mkdir -p /hab/chef/bin", - "comment_created_at": "2024-09-26T02:02:16+00:00", - "comment_author": "mwrock", - "comment_body": "I think we want to put these in `/opt/chef` or wherever they exist today.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1907297240", - "pr_number": 14772, - "pr_file": ".expeditor/scripts/chef-infra-client.spec", - "created_at": "2025-01-08T14:45:22+00:00", - "commented_code": "# Disable any shell actions, replace them with simply 'true'\n%define __spec_prep_post true\n%define __spec_prep_pre true\n%define __spec_build_post true\n%define __spec_build_pre true\n%define __spec_install_post true\n%define __spec_install_pre true\n%define __spec_clean_post true\n%define __spec_clean_pre true\n\n# Use SHA256 checksums for all files\n%define _binary_filedigest_algorithm 8\n\n%define _binary_payload w1.xzdio\n\n# Disable creation of build-id links\n%define _build_id_links none\n\nName: chef-infra-client\nVersion: %{VERSION}\nRelease: 1%{?dist}\nSummary: The full stack of Chef Infra Client\nAutoReqProv: \tno\nBuildRoot: \t %buildroot\nPrefix: \t /\nGroup: \t\t default\nLicense: Chef EULA\nURL: \t\t https://www.chef.io\nPackager: \t Chef Software, Inc. \nSource0: %{CHEF_INFRA_TAR}\nSource1: %{CHEF_MIGRATE_TAR}\n\n%description\nThe full stack of Chef Infra Client\n\n%prep\n# noop\n\n%build\n# noop\n\n%install\n# Create the installation directory for the migration tools\n#mkdir -p %{buildroot}/opt/chef\nmkdir -p %{buildroot}/opt/chef/{bin,bundle}\n\n# Untar the migration tools into /opt/chef/bin\ntar -xf %{SOURCE1} -C %{buildroot}/opt/chef/bin\n\n# Copy the chef infra tarball into /opt/chef/bundle\ncp %{SOURCE0} %{buildroot}/opt/chef/bundle/\n\n%files\n/opt/chef\n/opt/chef/bin", - "repo_full_name": "chef/chef", - "discussion_comments": [ - { - "comment_id": "1907297240", - "repo_full_name": "chef/chef", - "pr_number": 14772, - "pr_file": ".expeditor/scripts/chef-infra-client.spec", - "discussion_id": "1907297240", - "commented_code": "@@ -0,0 +1,89 @@\n+# Disable any shell actions, replace them with simply 'true'\n+%define __spec_prep_post true\n+%define __spec_prep_pre true\n+%define __spec_build_post true\n+%define __spec_build_pre true\n+%define __spec_install_post true\n+%define __spec_install_pre true\n+%define __spec_clean_post true\n+%define __spec_clean_pre true\n+\n+# Use SHA256 checksums for all files\n+%define _binary_filedigest_algorithm 8\n+\n+%define _binary_payload w1.xzdio\n+\n+# Disable creation of build-id links\n+%define _build_id_links none\n+\n+Name: chef-infra-client\n+Version: %{VERSION}\n+Release: 1%{?dist}\n+Summary: The full stack of Chef Infra Client\n+AutoReqProv: \tno\n+BuildRoot: \t %buildroot\n+Prefix: \t /\n+Group: \t\t default\n+License: Chef EULA\n+URL: \t\t https://www.chef.io\n+Packager: \t Chef Software, Inc. \n+Source0: %{CHEF_INFRA_TAR}\n+Source1: %{CHEF_MIGRATE_TAR}\n+\n+%description\n+The full stack of Chef Infra Client\n+\n+%prep\n+# noop\n+\n+%build\n+# noop\n+\n+%install\n+# Create the installation directory for the migration tools\n+#mkdir -p %{buildroot}/opt/chef\n+mkdir -p %{buildroot}/opt/chef/{bin,bundle}\n+\n+# Untar the migration tools into /opt/chef/bin\n+tar -xf %{SOURCE1} -C %{buildroot}/opt/chef/bin\n+\n+# Copy the chef infra tarball into /opt/chef/bundle\n+cp %{SOURCE0} %{buildroot}/opt/chef/bundle/\n+\n+%files\n+/opt/chef\n+/opt/chef/bin", - "comment_created_at": "2025-01-08T14:45:22+00:00", - "comment_author": "marcparadise", - "comment_body": "Because /opt/chef/bin is already owned by the chef-infra package, we probably don't want to install anything here. \r\nCould we instead use a different directory, something like '/opt/chef/migration' or even include it in '/opt/chef/bundle\"? ", - "pr_file_module": null - }, - { - "comment_id": "1907382449", - "repo_full_name": "chef/chef", - "pr_number": 14772, - "pr_file": ".expeditor/scripts/chef-infra-client.spec", - "discussion_id": "1907297240", - "commented_code": "@@ -0,0 +1,89 @@\n+# Disable any shell actions, replace them with simply 'true'\n+%define __spec_prep_post true\n+%define __spec_prep_pre true\n+%define __spec_build_post true\n+%define __spec_build_pre true\n+%define __spec_install_post true\n+%define __spec_install_pre true\n+%define __spec_clean_post true\n+%define __spec_clean_pre true\n+\n+# Use SHA256 checksums for all files\n+%define _binary_filedigest_algorithm 8\n+\n+%define _binary_payload w1.xzdio\n+\n+# Disable creation of build-id links\n+%define _build_id_links none\n+\n+Name: chef-infra-client\n+Version: %{VERSION}\n+Release: 1%{?dist}\n+Summary: The full stack of Chef Infra Client\n+AutoReqProv: \tno\n+BuildRoot: \t %buildroot\n+Prefix: \t /\n+Group: \t\t default\n+License: Chef EULA\n+URL: \t\t https://www.chef.io\n+Packager: \t Chef Software, Inc. \n+Source0: %{CHEF_INFRA_TAR}\n+Source1: %{CHEF_MIGRATE_TAR}\n+\n+%description\n+The full stack of Chef Infra Client\n+\n+%prep\n+# noop\n+\n+%build\n+# noop\n+\n+%install\n+# Create the installation directory for the migration tools\n+#mkdir -p %{buildroot}/opt/chef\n+mkdir -p %{buildroot}/opt/chef/{bin,bundle}\n+\n+# Untar the migration tools into /opt/chef/bin\n+tar -xf %{SOURCE1} -C %{buildroot}/opt/chef/bin\n+\n+# Copy the chef infra tarball into /opt/chef/bundle\n+cp %{SOURCE0} %{buildroot}/opt/chef/bundle/\n+\n+%files\n+/opt/chef\n+/opt/chef/bin", - "comment_created_at": "2025-01-08T15:34:21+00:00", - "comment_author": "sajjaphani", - "comment_body": "Would it be a good idea to unpack the migration tool into /opt/chef/migration/bin instead?", - "pr_file_module": null - }, - { - "comment_id": "1907395853", - "repo_full_name": "chef/chef", - "pr_number": 14772, - "pr_file": ".expeditor/scripts/chef-infra-client.spec", - "discussion_id": "1907297240", - "commented_code": "@@ -0,0 +1,89 @@\n+# Disable any shell actions, replace them with simply 'true'\n+%define __spec_prep_post true\n+%define __spec_prep_pre true\n+%define __spec_build_post true\n+%define __spec_build_pre true\n+%define __spec_install_post true\n+%define __spec_install_pre true\n+%define __spec_clean_post true\n+%define __spec_clean_pre true\n+\n+# Use SHA256 checksums for all files\n+%define _binary_filedigest_algorithm 8\n+\n+%define _binary_payload w1.xzdio\n+\n+# Disable creation of build-id links\n+%define _build_id_links none\n+\n+Name: chef-infra-client\n+Version: %{VERSION}\n+Release: 1%{?dist}\n+Summary: The full stack of Chef Infra Client\n+AutoReqProv: \tno\n+BuildRoot: \t %buildroot\n+Prefix: \t /\n+Group: \t\t default\n+License: Chef EULA\n+URL: \t\t https://www.chef.io\n+Packager: \t Chef Software, Inc. \n+Source0: %{CHEF_INFRA_TAR}\n+Source1: %{CHEF_MIGRATE_TAR}\n+\n+%description\n+The full stack of Chef Infra Client\n+\n+%prep\n+# noop\n+\n+%build\n+# noop\n+\n+%install\n+# Create the installation directory for the migration tools\n+#mkdir -p %{buildroot}/opt/chef\n+mkdir -p %{buildroot}/opt/chef/{bin,bundle}\n+\n+# Untar the migration tools into /opt/chef/bin\n+tar -xf %{SOURCE1} -C %{buildroot}/opt/chef/bin\n+\n+# Copy the chef infra tarball into /opt/chef/bundle\n+cp %{SOURCE0} %{buildroot}/opt/chef/bundle/\n+\n+%files\n+/opt/chef\n+/opt/chef/bin", - "comment_created_at": "2025-01-08T15:42:48+00:00", - "comment_author": "Stromweld", - "comment_body": "since new install lives in /hab why aren't we using that directory and in the end there shouldn't be any /opt/chef anything unless using omnibus version. That'll make it easier for customers to understand anything related to hab builds lives in /hab/somewhere and omnibus in /opt/chef instead of 1 tool over here and all others over there. This also then makes Cookbook checks easy to accommodate both by simply checking for the existence of /opt/chef.", - "pr_file_module": null - }, - { - "comment_id": "1908191760", - "repo_full_name": "chef/chef", - "pr_number": 14772, - "pr_file": ".expeditor/scripts/chef-infra-client.spec", - "discussion_id": "1907297240", - "commented_code": "@@ -0,0 +1,89 @@\n+# Disable any shell actions, replace them with simply 'true'\n+%define __spec_prep_post true\n+%define __spec_prep_pre true\n+%define __spec_build_post true\n+%define __spec_build_pre true\n+%define __spec_install_post true\n+%define __spec_install_pre true\n+%define __spec_clean_post true\n+%define __spec_clean_pre true\n+\n+# Use SHA256 checksums for all files\n+%define _binary_filedigest_algorithm 8\n+\n+%define _binary_payload w1.xzdio\n+\n+# Disable creation of build-id links\n+%define _build_id_links none\n+\n+Name: chef-infra-client\n+Version: %{VERSION}\n+Release: 1%{?dist}\n+Summary: The full stack of Chef Infra Client\n+AutoReqProv: \tno\n+BuildRoot: \t %buildroot\n+Prefix: \t /\n+Group: \t\t default\n+License: Chef EULA\n+URL: \t\t https://www.chef.io\n+Packager: \t Chef Software, Inc. \n+Source0: %{CHEF_INFRA_TAR}\n+Source1: %{CHEF_MIGRATE_TAR}\n+\n+%description\n+The full stack of Chef Infra Client\n+\n+%prep\n+# noop\n+\n+%build\n+# noop\n+\n+%install\n+# Create the installation directory for the migration tools\n+#mkdir -p %{buildroot}/opt/chef\n+mkdir -p %{buildroot}/opt/chef/{bin,bundle}\n+\n+# Untar the migration tools into /opt/chef/bin\n+tar -xf %{SOURCE1} -C %{buildroot}/opt/chef/bin\n+\n+# Copy the chef infra tarball into /opt/chef/bundle\n+cp %{SOURCE0} %{buildroot}/opt/chef/bundle/\n+\n+%files\n+/opt/chef\n+/opt/chef/bin", - "comment_created_at": "2025-01-09T05:34:40+00:00", - "comment_author": "sajjaphani", - "comment_body": "@marcparadise @Stromweld It appears that using /opt/chef or /hab may have similar drawbacks, as it could interfere with their structures. Could we use a separate directory, /opt/chef-migrate, instead, where bin would hold the binary of the migration tool and bundle would hold the tar file?", - "pr_file_module": null - }, - { - "comment_id": "1917826273", - "repo_full_name": "chef/chef", - "pr_number": 14772, - "pr_file": ".expeditor/scripts/chef-infra-client.spec", - "discussion_id": "1907297240", - "commented_code": "@@ -0,0 +1,89 @@\n+# Disable any shell actions, replace them with simply 'true'\n+%define __spec_prep_post true\n+%define __spec_prep_pre true\n+%define __spec_build_post true\n+%define __spec_build_pre true\n+%define __spec_install_post true\n+%define __spec_install_pre true\n+%define __spec_clean_post true\n+%define __spec_clean_pre true\n+\n+# Use SHA256 checksums for all files\n+%define _binary_filedigest_algorithm 8\n+\n+%define _binary_payload w1.xzdio\n+\n+# Disable creation of build-id links\n+%define _build_id_links none\n+\n+Name: chef-infra-client\n+Version: %{VERSION}\n+Release: 1%{?dist}\n+Summary: The full stack of Chef Infra Client\n+AutoReqProv: \tno\n+BuildRoot: \t %buildroot\n+Prefix: \t /\n+Group: \t\t default\n+License: Chef EULA\n+URL: \t\t https://www.chef.io\n+Packager: \t Chef Software, Inc. \n+Source0: %{CHEF_INFRA_TAR}\n+Source1: %{CHEF_MIGRATE_TAR}\n+\n+%description\n+The full stack of Chef Infra Client\n+\n+%prep\n+# noop\n+\n+%build\n+# noop\n+\n+%install\n+# Create the installation directory for the migration tools\n+#mkdir -p %{buildroot}/opt/chef\n+mkdir -p %{buildroot}/opt/chef/{bin,bundle}\n+\n+# Untar the migration tools into /opt/chef/bin\n+tar -xf %{SOURCE1} -C %{buildroot}/opt/chef/bin\n+\n+# Copy the chef infra tarball into /opt/chef/bundle\n+cp %{SOURCE0} %{buildroot}/opt/chef/bundle/\n+\n+%files\n+/opt/chef\n+/opt/chef/bin", - "comment_created_at": "2025-01-16T06:41:20+00:00", - "comment_author": "sajjaphani", - "comment_body": "Changed the directory to place the migration binary and bundle into `/hab/migration`. The binary is now located in `/hab/migration/bin`, and the infra tarball is stored in `/hab/migration/bundle`.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1907298683", - "pr_number": 14772, - "pr_file": ".expeditor/scripts/chef-infra-client.spec", - "created_at": "2025-01-08T14:46:21+00:00", - "commented_code": "# Disable any shell actions, replace them with simply 'true'\n%define __spec_prep_post true\n%define __spec_prep_pre true\n%define __spec_build_post true\n%define __spec_build_pre true\n%define __spec_install_post true\n%define __spec_install_pre true\n%define __spec_clean_post true\n%define __spec_clean_pre true\n\n# Use SHA256 checksums for all files\n%define _binary_filedigest_algorithm 8\n\n%define _binary_payload w1.xzdio\n\n# Disable creation of build-id links\n%define _build_id_links none\n\nName: chef-infra-client\nVersion: %{VERSION}\nRelease: 1%{?dist}\nSummary: The full stack of Chef Infra Client\nAutoReqProv: \tno\nBuildRoot: \t %buildroot\nPrefix: \t /\nGroup: \t\t default\nLicense: Chef EULA\nURL: \t\t https://www.chef.io\nPackager: \t Chef Software, Inc. \nSource0: %{CHEF_INFRA_TAR}\nSource1: %{CHEF_MIGRATE_TAR}\n\n%description\nThe full stack of Chef Infra Client\n\n%prep\n# noop\n\n%build\n# noop\n\n%install\n# Create the installation directory for the migration tools\n#mkdir -p %{buildroot}/opt/chef\nmkdir -p %{buildroot}/opt/chef/{bin,bundle}\n\n# Untar the migration tools into /opt/chef/bin\ntar -xf %{SOURCE1} -C %{buildroot}/opt/chef/bin\n\n# Copy the chef infra tarball into /opt/chef/bundle\ncp %{SOURCE0} %{buildroot}/opt/chef/bundle/\n\n%files\n/opt/chef\n/opt/chef/bin\n/opt/chef/bin/*\n/opt/chef/bundle\n/opt/chef/bundle/*\n\n%post\n\n# Determine if --fresh_install needs to be passed based on the existence of the /opt/chef directory\nMIGRATE_CMD=\"/opt/chef/bin/chef-migrate apply airgap\"\nif [ ! -f /opt/chef/bin/chef-client ]; then", - "repo_full_name": "chef/chef", - "discussion_comments": [ - { - "comment_id": "1907298683", - "repo_full_name": "chef/chef", - "pr_number": 14772, - "pr_file": ".expeditor/scripts/chef-infra-client.spec", - "discussion_id": "1907298683", - "commented_code": "@@ -0,0 +1,89 @@\n+# Disable any shell actions, replace them with simply 'true'\n+%define __spec_prep_post true\n+%define __spec_prep_pre true\n+%define __spec_build_post true\n+%define __spec_build_pre true\n+%define __spec_install_post true\n+%define __spec_install_pre true\n+%define __spec_clean_post true\n+%define __spec_clean_pre true\n+\n+# Use SHA256 checksums for all files\n+%define _binary_filedigest_algorithm 8\n+\n+%define _binary_payload w1.xzdio\n+\n+# Disable creation of build-id links\n+%define _build_id_links none\n+\n+Name: chef-infra-client\n+Version: %{VERSION}\n+Release: 1%{?dist}\n+Summary: The full stack of Chef Infra Client\n+AutoReqProv: \tno\n+BuildRoot: \t %buildroot\n+Prefix: \t /\n+Group: \t\t default\n+License: Chef EULA\n+URL: \t\t https://www.chef.io\n+Packager: \t Chef Software, Inc. \n+Source0: %{CHEF_INFRA_TAR}\n+Source1: %{CHEF_MIGRATE_TAR}\n+\n+%description\n+The full stack of Chef Infra Client\n+\n+%prep\n+# noop\n+\n+%build\n+# noop\n+\n+%install\n+# Create the installation directory for the migration tools\n+#mkdir -p %{buildroot}/opt/chef\n+mkdir -p %{buildroot}/opt/chef/{bin,bundle}\n+\n+# Untar the migration tools into /opt/chef/bin\n+tar -xf %{SOURCE1} -C %{buildroot}/opt/chef/bin\n+\n+# Copy the chef infra tarball into /opt/chef/bundle\n+cp %{SOURCE0} %{buildroot}/opt/chef/bundle/\n+\n+%files\n+/opt/chef\n+/opt/chef/bin\n+/opt/chef/bin/*\n+/opt/chef/bundle\n+/opt/chef/bundle/*\n+\n+%post\n+\n+# Determine if --fresh_install needs to be passed based on the existence of the /opt/chef directory\n+MIGRATE_CMD=\"/opt/chef/bin/chef-migrate apply airgap\"\n+if [ ! -f /opt/chef/bin/chef-client ]; then", - "comment_created_at": "2025-01-08T14:46:21+00:00", - "comment_author": "marcparadise", - "comment_body": "/opt/chef/bin/chef-client may not exist, but chef-client could still be installed on the system and available int he path via workstation. In that case, is 'fresh-install' still correct? ", - "pr_file_module": null - }, - { - "comment_id": "1907375809", - "repo_full_name": "chef/chef", - "pr_number": 14772, - "pr_file": ".expeditor/scripts/chef-infra-client.spec", - "discussion_id": "1907298683", - "commented_code": "@@ -0,0 +1,89 @@\n+# Disable any shell actions, replace them with simply 'true'\n+%define __spec_prep_post true\n+%define __spec_prep_pre true\n+%define __spec_build_post true\n+%define __spec_build_pre true\n+%define __spec_install_post true\n+%define __spec_install_pre true\n+%define __spec_clean_post true\n+%define __spec_clean_pre true\n+\n+# Use SHA256 checksums for all files\n+%define _binary_filedigest_algorithm 8\n+\n+%define _binary_payload w1.xzdio\n+\n+# Disable creation of build-id links\n+%define _build_id_links none\n+\n+Name: chef-infra-client\n+Version: %{VERSION}\n+Release: 1%{?dist}\n+Summary: The full stack of Chef Infra Client\n+AutoReqProv: \tno\n+BuildRoot: \t %buildroot\n+Prefix: \t /\n+Group: \t\t default\n+License: Chef EULA\n+URL: \t\t https://www.chef.io\n+Packager: \t Chef Software, Inc. \n+Source0: %{CHEF_INFRA_TAR}\n+Source1: %{CHEF_MIGRATE_TAR}\n+\n+%description\n+The full stack of Chef Infra Client\n+\n+%prep\n+# noop\n+\n+%build\n+# noop\n+\n+%install\n+# Create the installation directory for the migration tools\n+#mkdir -p %{buildroot}/opt/chef\n+mkdir -p %{buildroot}/opt/chef/{bin,bundle}\n+\n+# Untar the migration tools into /opt/chef/bin\n+tar -xf %{SOURCE1} -C %{buildroot}/opt/chef/bin\n+\n+# Copy the chef infra tarball into /opt/chef/bundle\n+cp %{SOURCE0} %{buildroot}/opt/chef/bundle/\n+\n+%files\n+/opt/chef\n+/opt/chef/bin\n+/opt/chef/bin/*\n+/opt/chef/bundle\n+/opt/chef/bundle/*\n+\n+%post\n+\n+# Determine if --fresh_install needs to be passed based on the existence of the /opt/chef directory\n+MIGRATE_CMD=\"/opt/chef/bin/chef-migrate apply airgap\"\n+if [ ! -f /opt/chef/bin/chef-client ]; then", - "comment_created_at": "2025-01-08T15:29:59+00:00", - "comment_author": "sajjaphani", - "comment_body": "The migration tool has an option to uninstall the Omnibus installation during migration. However, I am not sure whether it accounts for the Workstation. Therefore, I am specifically checking the Omnibus installation here", - "pr_file_module": null - }, - { - "comment_id": "1917069354", - "repo_full_name": "chef/chef", - "pr_number": 14772, - "pr_file": ".expeditor/scripts/chef-infra-client.spec", - "discussion_id": "1907298683", - "commented_code": "@@ -0,0 +1,89 @@\n+# Disable any shell actions, replace them with simply 'true'\n+%define __spec_prep_post true\n+%define __spec_prep_pre true\n+%define __spec_build_post true\n+%define __spec_build_pre true\n+%define __spec_install_post true\n+%define __spec_install_pre true\n+%define __spec_clean_post true\n+%define __spec_clean_pre true\n+\n+# Use SHA256 checksums for all files\n+%define _binary_filedigest_algorithm 8\n+\n+%define _binary_payload w1.xzdio\n+\n+# Disable creation of build-id links\n+%define _build_id_links none\n+\n+Name: chef-infra-client\n+Version: %{VERSION}\n+Release: 1%{?dist}\n+Summary: The full stack of Chef Infra Client\n+AutoReqProv: \tno\n+BuildRoot: \t %buildroot\n+Prefix: \t /\n+Group: \t\t default\n+License: Chef EULA\n+URL: \t\t https://www.chef.io\n+Packager: \t Chef Software, Inc. \n+Source0: %{CHEF_INFRA_TAR}\n+Source1: %{CHEF_MIGRATE_TAR}\n+\n+%description\n+The full stack of Chef Infra Client\n+\n+%prep\n+# noop\n+\n+%build\n+# noop\n+\n+%install\n+# Create the installation directory for the migration tools\n+#mkdir -p %{buildroot}/opt/chef\n+mkdir -p %{buildroot}/opt/chef/{bin,bundle}\n+\n+# Untar the migration tools into /opt/chef/bin\n+tar -xf %{SOURCE1} -C %{buildroot}/opt/chef/bin\n+\n+# Copy the chef infra tarball into /opt/chef/bundle\n+cp %{SOURCE0} %{buildroot}/opt/chef/bundle/\n+\n+%files\n+/opt/chef\n+/opt/chef/bin\n+/opt/chef/bin/*\n+/opt/chef/bundle\n+/opt/chef/bundle/*\n+\n+%post\n+\n+# Determine if --fresh_install needs to be passed based on the existence of the /opt/chef directory\n+MIGRATE_CMD=\"/opt/chef/bin/chef-migrate apply airgap\"\n+if [ ! -f /opt/chef/bin/chef-client ]; then", - "comment_created_at": "2025-01-15T17:25:10+00:00", - "comment_author": "marcparadise", - "comment_body": "Let's instead look at declaring a package-level conflict, so that we can't install infra-19 when chef-workstation 'legacy' is installed. \r\nThat will let us avoid the entire class of problems that arise trying to manually manage conflicting files across packages. \r\n\r\nThis can be better supported once workstation itself is released with habitat. ", - "pr_file_module": null - }, - { - "comment_id": "1917787320", - "repo_full_name": "chef/chef", - "pr_number": 14772, - "pr_file": ".expeditor/scripts/chef-infra-client.spec", - "discussion_id": "1907298683", - "commented_code": "@@ -0,0 +1,89 @@\n+# Disable any shell actions, replace them with simply 'true'\n+%define __spec_prep_post true\n+%define __spec_prep_pre true\n+%define __spec_build_post true\n+%define __spec_build_pre true\n+%define __spec_install_post true\n+%define __spec_install_pre true\n+%define __spec_clean_post true\n+%define __spec_clean_pre true\n+\n+# Use SHA256 checksums for all files\n+%define _binary_filedigest_algorithm 8\n+\n+%define _binary_payload w1.xzdio\n+\n+# Disable creation of build-id links\n+%define _build_id_links none\n+\n+Name: chef-infra-client\n+Version: %{VERSION}\n+Release: 1%{?dist}\n+Summary: The full stack of Chef Infra Client\n+AutoReqProv: \tno\n+BuildRoot: \t %buildroot\n+Prefix: \t /\n+Group: \t\t default\n+License: Chef EULA\n+URL: \t\t https://www.chef.io\n+Packager: \t Chef Software, Inc. \n+Source0: %{CHEF_INFRA_TAR}\n+Source1: %{CHEF_MIGRATE_TAR}\n+\n+%description\n+The full stack of Chef Infra Client\n+\n+%prep\n+# noop\n+\n+%build\n+# noop\n+\n+%install\n+# Create the installation directory for the migration tools\n+#mkdir -p %{buildroot}/opt/chef\n+mkdir -p %{buildroot}/opt/chef/{bin,bundle}\n+\n+# Untar the migration tools into /opt/chef/bin\n+tar -xf %{SOURCE1} -C %{buildroot}/opt/chef/bin\n+\n+# Copy the chef infra tarball into /opt/chef/bundle\n+cp %{SOURCE0} %{buildroot}/opt/chef/bundle/\n+\n+%files\n+/opt/chef\n+/opt/chef/bin\n+/opt/chef/bin/*\n+/opt/chef/bundle\n+/opt/chef/bundle/*\n+\n+%post\n+\n+# Determine if --fresh_install needs to be passed based on the existence of the /opt/chef directory\n+MIGRATE_CMD=\"/opt/chef/bin/chef-migrate apply airgap\"\n+if [ ! -f /opt/chef/bin/chef-client ]; then", - "comment_created_at": "2025-01-16T05:52:15+00:00", - "comment_author": "sajjaphani", - "comment_body": "Prevents to proceed if chef-workstation is installed: https://github.com/chef/chef/pull/14772/commits/10c403f58b4e4d08c26484cb5b461cae0b7e2983", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/chef-standardize-shell-flags.json b/_reviewers/chef-standardize-shell-flags.json new file mode 100644 index 0000000..783e008 --- /dev/null +++ b/_reviewers/chef-standardize-shell-flags.json @@ -0,0 +1,68 @@ +[ + { + "discussion_id": "2142595259", + "pr_number": 15049, + "pr_file": ".expeditor/promote-docker-images.sh", + "created_at": "2025-06-12T12:21:39+00:00", + "commented_code": "set -eu -o pipefail", + "repo_full_name": "chef/chef", + "discussion_comments": [ + { + "comment_id": "2142595259", + "repo_full_name": "chef/chef", + "pr_number": 15049, + "pr_file": ".expeditor/promote-docker-images.sh", + "discussion_id": "2142595259", + "commented_code": "@@ -2,6 +2,8 @@\n set -eu -o pipefail", + "comment_created_at": "2025-06-12T12:21:39+00:00", + "comment_author": "sean-sype-simmons", + "comment_body": "please update to: `set -eou pipefail` ", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2142596054", + "pr_number": 15049, + "pr_file": ".expeditor/update_dep.sh", + "created_at": "2025-06-12T12:22:04+00:00", + "commented_code": "set -evx", + "repo_full_name": "chef/chef", + "discussion_comments": [ + { + "comment_id": "2142596054", + "repo_full_name": "chef/chef", + "pr_number": 15049, + "pr_file": ".expeditor/update_dep.sh", + "discussion_id": "2142596054", + "commented_code": "@@ -11,6 +11,9 @@\n \n set -evx", + "comment_created_at": "2025-06-12T12:22:04+00:00", + "comment_author": "sean-sype-simmons", + "comment_body": "please update to: `set -eou pipefail` ", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2142596218", + "pr_number": 15049, + "pr_file": ".expeditor/update_dockerfile.sh", + "created_at": "2025-06-12T12:22:09+00:00", + "commented_code": "############################################################################\n\nset -evx", + "repo_full_name": "chef/chef", + "discussion_comments": [ + { + "comment_id": "2142596218", + "repo_full_name": "chef/chef", + "pr_number": 15049, + "pr_file": ".expeditor/update_dockerfile.sh", + "discussion_id": "2142596218", + "commented_code": "@@ -10,5 +10,7 @@\n ############################################################################\n \n set -evx", + "comment_created_at": "2025-06-12T12:22:09+00:00", + "comment_author": "sean-sype-simmons", + "comment_body": "please update to: `set -eou pipefail` ", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/chef-standardize-shell-flags.md b/_reviewers/chef-standardize-shell-flags.md index f12de08..d78782b 100644 --- a/_reviewers/chef-standardize-shell-flags.md +++ b/_reviewers/chef-standardize-shell-flags.md @@ -29,73 +29,3 @@ set -eou pipefail ``` This standard helps prevent subtle bugs caused by unhandled errors or undefined variables and makes scripts more robust and predictable. - - -[ - { - "discussion_id": "2142595259", - "pr_number": 15049, - "pr_file": ".expeditor/promote-docker-images.sh", - "created_at": "2025-06-12T12:21:39+00:00", - "commented_code": "set -eu -o pipefail", - "repo_full_name": "chef/chef", - "discussion_comments": [ - { - "comment_id": "2142595259", - "repo_full_name": "chef/chef", - "pr_number": 15049, - "pr_file": ".expeditor/promote-docker-images.sh", - "discussion_id": "2142595259", - "commented_code": "@@ -2,6 +2,8 @@\n set -eu -o pipefail", - "comment_created_at": "2025-06-12T12:21:39+00:00", - "comment_author": "sean-sype-simmons", - "comment_body": "please update to: `set -eou pipefail` ", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2142596054", - "pr_number": 15049, - "pr_file": ".expeditor/update_dep.sh", - "created_at": "2025-06-12T12:22:04+00:00", - "commented_code": "set -evx", - "repo_full_name": "chef/chef", - "discussion_comments": [ - { - "comment_id": "2142596054", - "repo_full_name": "chef/chef", - "pr_number": 15049, - "pr_file": ".expeditor/update_dep.sh", - "discussion_id": "2142596054", - "commented_code": "@@ -11,6 +11,9 @@\n \n set -evx", - "comment_created_at": "2025-06-12T12:22:04+00:00", - "comment_author": "sean-sype-simmons", - "comment_body": "please update to: `set -eou pipefail` ", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2142596218", - "pr_number": 15049, - "pr_file": ".expeditor/update_dockerfile.sh", - "created_at": "2025-06-12T12:22:09+00:00", - "commented_code": "############################################################################\n\nset -evx", - "repo_full_name": "chef/chef", - "discussion_comments": [ - { - "comment_id": "2142596218", - "repo_full_name": "chef/chef", - "pr_number": 15049, - "pr_file": ".expeditor/update_dockerfile.sh", - "discussion_id": "2142596218", - "commented_code": "@@ -10,5 +10,7 @@\n ############################################################################\n \n set -evx", - "comment_created_at": "2025-06-12T12:22:09+00:00", - "comment_author": "sean-sype-simmons", - "comment_body": "please update to: `set -eou pipefail` ", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/chef-structure-cicd-scripts.json b/_reviewers/chef-structure-cicd-scripts.json new file mode 100644 index 0000000..c4ef6b6 --- /dev/null +++ b/_reviewers/chef-structure-cicd-scripts.json @@ -0,0 +1,46 @@ +[ + { + "discussion_id": "1082707950", + "pr_number": 13489, + "pr_file": ".buildkite/build-test-omnibus.sh", + "created_at": "2023-01-20T15:35:22+00:00", + "commented_code": "if [[ $BUILDKITE_ORGANIZATION_SLUG == \"chef-oss\" ]]; then\n echo \"- block: Build & Test Omnibus Packages\"\n echo \" prompt: Continue to run omnibus package build and tests for applicable platforms?\"\nfi\n\nFILTER=\"${OMNIBUS_FILTER:=*}\"\n\nplatforms=(\"amazon-2:centos-7\" \"centos-6:centos-6\" \"centos-7:centos-7\" \"centos-8:centos-8\" \"rhel-9:rhel-9\" \"debian-9:debian-9\" \"debian-10:debian-9\" \"debian-11:debian-9\" \"ubuntu-1604:ubuntu-1604\" \"ubuntu-1804:ubuntu-1604\" \"ubuntu-2004:ubuntu-1604\" \"ubuntu-2204:ubuntu-1604\" \"sles-15:sles-15\" \"windows-2019:windows-2019\")\n\nomnibus_build_platforms=()\nomnibus_test_platforms=()\n\n# build build array and test array based on filter\nfor platform in ${platforms[@]}; do\n case ${platform%:*} in\n $FILTER)\n omnibus_build_platforms[${#omnibus_build_platforms[@]}]=${platform#*:}\n omnibus_test_platforms[${#omnibus_test_platforms[@]}]=$platform\n ;;\n esac\ndone\n\n# remove duplicates from build array\nomnibus_build_platforms=($(printf \"%s\\n\" \"${omnibus_build_platforms[@]}\" | sort -u | tr '\\n' ' '))\n\nfor platform in ${omnibus_build_platforms[@]}; do\n if [[ $platform != *\"windows\"* ]]; then\n echo \"- label: \\\":hammer_and_wrench::docker: $platform\\\"\"\n echo \" retry:\"\n echo \" automatic:\"\n echo \" limit: 1\"\n echo \" key: build-$platform\"\n echo \" agents:\"\n echo \" queue: default-privileged\"\n echo \" plugins:\"\n echo \" - docker#v3.5.0:\"\n echo \" image: chefes/omnibus-toolchain-$platform:$OMNIBUS_TOOLCHAIN_VERSION\"\n echo \" privileged: true\"\n echo \" propagate-environment: true\"\n echo \" environment:\"\n echo \" - RPM_SIGNING_KEY\"\n echo \" - CHEF_FOUNDATION_VERSION\"\n echo \" commands:\"\n echo \" - ./.expeditor/scripts/omnibus_chef_build.sh\"\n echo \" timeout_in_minutes: 60\"\n else \n echo \"- label: \\\":hammer_and_wrench::windows: $platform\\\"\"\n echo \" retry:\"\n echo \" automatic:\"\n echo \" limit: 1\"\n echo \" key: build-$platform\"\n echo \" agents:\"\n echo \" queue: default-$platform-privileged\"\n echo \" plugins:\"\n echo \" - docker#v3.5.0:\"\n echo \" image: chefes/omnibus-toolchain-$platform:$OMNIBUS_TOOLCHAIN_VERSION\"\n echo \" shell:\"\n echo \" - powershell\"\n echo \" - \\\"-Command\\\"\"\n echo \" propagate-environment: true\"\n echo \" environment:\"\n echo \" - CHEF_FOUNDATION_VERSION\"\n echo \" - BUILDKITE_AGENT_ACCESS_TOKEN\"\n echo \" - AWS_ACCESS_KEY_ID\"\n echo \" - AWS_SECRET_ACCESS_KEY\"\n echo \" - AWS_SESSION_TOKEN\"\n echo \" volumes:\"\n echo ' - \"c:\\\\buildkite-agent:c:\\\\buildkite-agent\"'\n echo \" commands:\"\n echo \" - ./.expeditor/scripts/omnibus_chef_build.ps1\"\n echo \" timeout_in_minutes: 60\"\n fi\ndone\n\necho \"- wait: ~\"\n\nfor platform in ${omnibus_test_platforms[@]}; do\n if [[ $platform != *\"windows\"* ]]; then\n echo \"- env:\"\n echo \" OMNIBUS_BUILDER_KEY: build-${platform#*:}\"\n echo \" label: \\\":mag::docker: ${platform%:*}\\\"\"\n echo \" retry:\"\n echo \" automatic:\"\n echo \" limit: 1\"\n echo \" agents:\"\n echo \" queue: default-privileged\"\n echo \" plugins:\"\n echo \" - docker#v3.5.0:\"\n echo \" image: chefes/omnibus-toolchain-${platform%:*}:$OMNIBUS_TOOLCHAIN_VERSION\"\n echo \" privileged: true\"\n echo \" propagate-environment: true\"\n echo \" commands:\"\n echo \" - ./.expeditor/scripts/download_built_omnibus_pkgs.sh\"\n echo \" - omnibus/omnibus-test.sh\"\n echo \" timeout_in_minutes: 60\"\n else\n echo \"- env:\"\n echo \" OMNIBUS_BUILDER_KEY: build-windows-2019\"\n echo \" key: test-windows-2019\"\n echo ' label: \":mag::windows: windows-2019\"'\n echo \" retry:\"\n echo \" automatic:\"\n echo \" limit: 1\"\n echo \" agents:\"\n echo \" queue: default-windows-2019-privileged\"\n echo \" commands:\"\n echo \" - ./.expeditor/scripts/download_built_omnibus_pkgs.ps1\"\n echo \" - ./omnibus/omnibus-test.ps1\"\n echo \" timeout_in_minutes: 60\"\n fi\ndone", + "repo_full_name": "chef/chef", + "discussion_comments": [ + { + "comment_id": "1082707950", + "repo_full_name": "chef/chef", + "pr_number": 13489, + "pr_file": ".buildkite/build-test-omnibus.sh", + "discussion_id": "1082707950", + "commented_code": "@@ -0,0 +1,112 @@\n+if [[ $BUILDKITE_ORGANIZATION_SLUG == \"chef-oss\" ]]; then\n+ echo \"- block: Build & Test Omnibus Packages\"\n+ echo \" prompt: Continue to run omnibus package build and tests for applicable platforms?\"\n+fi\n+\n+FILTER=\"${OMNIBUS_FILTER:=*}\"\n+\n+platforms=(\"amazon-2:centos-7\" \"centos-6:centos-6\" \"centos-7:centos-7\" \"centos-8:centos-8\" \"rhel-9:rhel-9\" \"debian-9:debian-9\" \"debian-10:debian-9\" \"debian-11:debian-9\" \"ubuntu-1604:ubuntu-1604\" \"ubuntu-1804:ubuntu-1604\" \"ubuntu-2004:ubuntu-1604\" \"ubuntu-2204:ubuntu-1604\" \"sles-15:sles-15\" \"windows-2019:windows-2019\")\n+\n+omnibus_build_platforms=()\n+omnibus_test_platforms=()\n+\n+# build build array and test array based on filter\n+for platform in ${platforms[@]}; do\n+ case ${platform%:*} in\n+ $FILTER)\n+ omnibus_build_platforms[${#omnibus_build_platforms[@]}]=${platform#*:}\n+ omnibus_test_platforms[${#omnibus_test_platforms[@]}]=$platform\n+ ;;\n+ esac\n+done\n+\n+# remove duplicates from build array\n+omnibus_build_platforms=($(printf \"%s\\n\" \"${omnibus_build_platforms[@]}\" | sort -u | tr '\\n' ' '))\n+\n+for platform in ${omnibus_build_platforms[@]}; do\n+ if [[ $platform != *\"windows\"* ]]; then\n+ echo \"- label: \\\":hammer_and_wrench::docker: $platform\\\"\"\n+ echo \" retry:\"\n+ echo \" automatic:\"\n+ echo \" limit: 1\"\n+ echo \" key: build-$platform\"\n+ echo \" agents:\"\n+ echo \" queue: default-privileged\"\n+ echo \" plugins:\"\n+ echo \" - docker#v3.5.0:\"\n+ echo \" image: chefes/omnibus-toolchain-$platform:$OMNIBUS_TOOLCHAIN_VERSION\"\n+ echo \" privileged: true\"\n+ echo \" propagate-environment: true\"\n+ echo \" environment:\"\n+ echo \" - RPM_SIGNING_KEY\"\n+ echo \" - CHEF_FOUNDATION_VERSION\"\n+ echo \" commands:\"\n+ echo \" - ./.expeditor/scripts/omnibus_chef_build.sh\"\n+ echo \" timeout_in_minutes: 60\"\n+ else \n+ echo \"- label: \\\":hammer_and_wrench::windows: $platform\\\"\"\n+ echo \" retry:\"\n+ echo \" automatic:\"\n+ echo \" limit: 1\"\n+ echo \" key: build-$platform\"\n+ echo \" agents:\"\n+ echo \" queue: default-$platform-privileged\"\n+ echo \" plugins:\"\n+ echo \" - docker#v3.5.0:\"\n+ echo \" image: chefes/omnibus-toolchain-$platform:$OMNIBUS_TOOLCHAIN_VERSION\"\n+ echo \" shell:\"\n+ echo \" - powershell\"\n+ echo \" - \\\"-Command\\\"\"\n+ echo \" propagate-environment: true\"\n+ echo \" environment:\"\n+ echo \" - CHEF_FOUNDATION_VERSION\"\n+ echo \" - BUILDKITE_AGENT_ACCESS_TOKEN\"\n+ echo \" - AWS_ACCESS_KEY_ID\"\n+ echo \" - AWS_SECRET_ACCESS_KEY\"\n+ echo \" - AWS_SESSION_TOKEN\"\n+ echo \" volumes:\"\n+ echo ' - \"c:\\\\buildkite-agent:c:\\\\buildkite-agent\"'\n+ echo \" commands:\"\n+ echo \" - ./.expeditor/scripts/omnibus_chef_build.ps1\"\n+ echo \" timeout_in_minutes: 60\"\n+ fi\n+done\n+\n+echo \"- wait: ~\"\n+\n+for platform in ${omnibus_test_platforms[@]}; do\n+ if [[ $platform != *\"windows\"* ]]; then\n+ echo \"- env:\"\n+ echo \" OMNIBUS_BUILDER_KEY: build-${platform#*:}\"\n+ echo \" label: \\\":mag::docker: ${platform%:*}\\\"\"\n+ echo \" retry:\"\n+ echo \" automatic:\"\n+ echo \" limit: 1\"\n+ echo \" agents:\"\n+ echo \" queue: default-privileged\"\n+ echo \" plugins:\"\n+ echo \" - docker#v3.5.0:\"\n+ echo \" image: chefes/omnibus-toolchain-${platform%:*}:$OMNIBUS_TOOLCHAIN_VERSION\"\n+ echo \" privileged: true\"\n+ echo \" propagate-environment: true\"\n+ echo \" commands:\"\n+ echo \" - ./.expeditor/scripts/download_built_omnibus_pkgs.sh\"\n+ echo \" - omnibus/omnibus-test.sh\"\n+ echo \" timeout_in_minutes: 60\"\n+ else\n+ echo \"- env:\"\n+ echo \" OMNIBUS_BUILDER_KEY: build-windows-2019\"\n+ echo \" key: test-windows-2019\"\n+ echo ' label: \":mag::windows: windows-2019\"'\n+ echo \" retry:\"\n+ echo \" automatic:\"\n+ echo \" limit: 1\"\n+ echo \" agents:\"\n+ echo \" queue: default-windows-2019-privileged\"\n+ echo \" commands:\"\n+ echo \" - ./.expeditor/scripts/download_built_omnibus_pkgs.ps1\"\n+ echo \" - ./omnibus/omnibus-test.ps1\"\n+ echo \" timeout_in_minutes: 60\"\n+ fi\n+done", + "comment_created_at": "2023-01-20T15:35:22+00:00", + "comment_author": "tpowell-progress", + "comment_body": "```suggestion\r\nfor platform in ${omnibus_build_platforms[@]}; do\r\n if [[ $platform != *\"windows\"* ]]; then\r\n cat <<- NONWINDOWS\r\n- label: \":hammer_and_wrench::docker: $platform\"\r\n retry:\r\n automatic:\r\n limit: 1\r\n key: build-$platform\r\n agents:\r\n queue: default-privileged\r\n plugins:\r\n - docker#v3.5.0:\r\n image: chefes/omnibus-toolchain-$platform:$OMNIBUS_TOOLCHAIN_VERSION\r\n privileged: true\r\n propagate-environment: true\r\n environment:\r\n - RPM_SIGNING_KEY\r\n - CHEF_FOUNDATION_VERSION\r\n commands:\r\n - ./.expeditor/scripts/omnibus_chef_build.sh\r\n timeout_in_minutes: 60\r\nNONWINDOWS\r\n else\r\n cat <<- WINDOWS\r\n- label: \":hammer_and_wrench::windows: $platform\"\r\n retry:\r\n automatic:\r\n limit: 1\r\n key: build-$platform\r\n agents:\r\n queue: default-$platform-privileged\r\n plugins:\r\n - docker#v3.5.0:\r\n image: chefes/omnibus-toolchain-$platform:$OMNIBUS_TOOLCHAIN_VERSION\r\n shell:\r\n - powershell\r\n - \"-Command\"\r\n propagate-environment: true\r\n environment:\r\n - CHEF_FOUNDATION_VERSION\r\n - BUILDKITE_AGENT_ACCESS_TOKEN\r\n - AWS_ACCESS_KEY_ID\r\n - AWS_SECRET_ACCESS_KEY\r\n - AWS_SESSION_TOKEN\r\n volumes:\r\n - \"c:\\\\buildkite-agent:c:\\\\buildkite-agent\"\r\n commands:\r\n - ./.expeditor/scripts/omnibus_chef_build.ps1\r\n timeout_in_minutes: 60\r\nWINDOWS\r\n fi\r\ndone\r\n```", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "678521712", + "pr_number": 11871, + "pr_file": ".expeditor/promote-docker-images.sh", + "created_at": "2021-07-28T17:43:21+00:00", + "commented_code": "#! /bin/bash\n\n# EXPEDITOR_VERSION exists within the artifact_published workload\n\nexport DOCKER_CLI_EXPERIMENTAL=enabled\n\necho \"--- Creating manifest for ${EXPEDITOR_TARGET_CHANNEL}\"\ndocker manifest create \"chef/chef:${EXPEDITOR_TARGET_CHANNEL}\" \\", + "repo_full_name": "chef/chef", + "discussion_comments": [ + { + "comment_id": "678521712", + "repo_full_name": "chef/chef", + "pr_number": 11871, + "pr_file": ".expeditor/promote-docker-images.sh", + "discussion_id": "678521712", + "commented_code": "@@ -0,0 +1,42 @@\n+#! /bin/bash\n+\n+# EXPEDITOR_VERSION exists within the artifact_published workload\n+\n+export DOCKER_CLI_EXPERIMENTAL=enabled\n+\n+echo \"--- Creating manifest for ${EXPEDITOR_TARGET_CHANNEL}\"\n+docker manifest create \"chef/chef:${EXPEDITOR_TARGET_CHANNEL}\" \\", + "comment_created_at": "2021-07-28T17:43:21+00:00", + "comment_author": "mimaslanka", + "comment_body": "Suggest that we move this to a function\r\n```\r\nfunction create_and_push_manifest () {\r\n echo \"--- Creating...\"\r\n docker manifest create ${1}:${2}\" \\\r\n --amend ${1}:${EXPEDITOR_VERSION}-arm64 \\\r\n --amend ....-amd64\r\n\r\n echo \"--- Pushing....\"\r\n docker manifest push....\r\n}\r\ncreate_and_push_manifest \"chef/chef\" \"${EXPEDITOR_TARGET_CHANNEL}\"\r\n \r\nif ....\r\n create_and_pus_manifest \"chef/chef\" \"latest\"\r\n ...\r\n```\r\n", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/chef-structure-cicd-scripts.md b/_reviewers/chef-structure-cicd-scripts.md index 9544ee2..f7c49ba 100644 --- a/_reviewers/chef-structure-cicd-scripts.md +++ b/_reviewers/chef-structure-cicd-scripts.md @@ -55,51 +55,3 @@ function create_and_push_manifest() { # Then call it multiple times: create_and_push_manifest "chef/chef" "${EXPEDITOR_TARGET_CHANNEL}" ``` - - -[ - { - "discussion_id": "1082707950", - "pr_number": 13489, - "pr_file": ".buildkite/build-test-omnibus.sh", - "created_at": "2023-01-20T15:35:22+00:00", - "commented_code": "if [[ $BUILDKITE_ORGANIZATION_SLUG == \"chef-oss\" ]]; then\n echo \"- block: Build & Test Omnibus Packages\"\n echo \" prompt: Continue to run omnibus package build and tests for applicable platforms?\"\nfi\n\nFILTER=\"${OMNIBUS_FILTER:=*}\"\n\nplatforms=(\"amazon-2:centos-7\" \"centos-6:centos-6\" \"centos-7:centos-7\" \"centos-8:centos-8\" \"rhel-9:rhel-9\" \"debian-9:debian-9\" \"debian-10:debian-9\" \"debian-11:debian-9\" \"ubuntu-1604:ubuntu-1604\" \"ubuntu-1804:ubuntu-1604\" \"ubuntu-2004:ubuntu-1604\" \"ubuntu-2204:ubuntu-1604\" \"sles-15:sles-15\" \"windows-2019:windows-2019\")\n\nomnibus_build_platforms=()\nomnibus_test_platforms=()\n\n# build build array and test array based on filter\nfor platform in ${platforms[@]}; do\n case ${platform%:*} in\n $FILTER)\n omnibus_build_platforms[${#omnibus_build_platforms[@]}]=${platform#*:}\n omnibus_test_platforms[${#omnibus_test_platforms[@]}]=$platform\n ;;\n esac\ndone\n\n# remove duplicates from build array\nomnibus_build_platforms=($(printf \"%s\\n\" \"${omnibus_build_platforms[@]}\" | sort -u | tr '\\n' ' '))\n\nfor platform in ${omnibus_build_platforms[@]}; do\n if [[ $platform != *\"windows\"* ]]; then\n echo \"- label: \\\":hammer_and_wrench::docker: $platform\\\"\"\n echo \" retry:\"\n echo \" automatic:\"\n echo \" limit: 1\"\n echo \" key: build-$platform\"\n echo \" agents:\"\n echo \" queue: default-privileged\"\n echo \" plugins:\"\n echo \" - docker#v3.5.0:\"\n echo \" image: chefes/omnibus-toolchain-$platform:$OMNIBUS_TOOLCHAIN_VERSION\"\n echo \" privileged: true\"\n echo \" propagate-environment: true\"\n echo \" environment:\"\n echo \" - RPM_SIGNING_KEY\"\n echo \" - CHEF_FOUNDATION_VERSION\"\n echo \" commands:\"\n echo \" - ./.expeditor/scripts/omnibus_chef_build.sh\"\n echo \" timeout_in_minutes: 60\"\n else \n echo \"- label: \\\":hammer_and_wrench::windows: $platform\\\"\"\n echo \" retry:\"\n echo \" automatic:\"\n echo \" limit: 1\"\n echo \" key: build-$platform\"\n echo \" agents:\"\n echo \" queue: default-$platform-privileged\"\n echo \" plugins:\"\n echo \" - docker#v3.5.0:\"\n echo \" image: chefes/omnibus-toolchain-$platform:$OMNIBUS_TOOLCHAIN_VERSION\"\n echo \" shell:\"\n echo \" - powershell\"\n echo \" - \\\"-Command\\\"\"\n echo \" propagate-environment: true\"\n echo \" environment:\"\n echo \" - CHEF_FOUNDATION_VERSION\"\n echo \" - BUILDKITE_AGENT_ACCESS_TOKEN\"\n echo \" - AWS_ACCESS_KEY_ID\"\n echo \" - AWS_SECRET_ACCESS_KEY\"\n echo \" - AWS_SESSION_TOKEN\"\n echo \" volumes:\"\n echo ' - \"c:\\\\buildkite-agent:c:\\\\buildkite-agent\"'\n echo \" commands:\"\n echo \" - ./.expeditor/scripts/omnibus_chef_build.ps1\"\n echo \" timeout_in_minutes: 60\"\n fi\ndone\n\necho \"- wait: ~\"\n\nfor platform in ${omnibus_test_platforms[@]}; do\n if [[ $platform != *\"windows\"* ]]; then\n echo \"- env:\"\n echo \" OMNIBUS_BUILDER_KEY: build-${platform#*:}\"\n echo \" label: \\\":mag::docker: ${platform%:*}\\\"\"\n echo \" retry:\"\n echo \" automatic:\"\n echo \" limit: 1\"\n echo \" agents:\"\n echo \" queue: default-privileged\"\n echo \" plugins:\"\n echo \" - docker#v3.5.0:\"\n echo \" image: chefes/omnibus-toolchain-${platform%:*}:$OMNIBUS_TOOLCHAIN_VERSION\"\n echo \" privileged: true\"\n echo \" propagate-environment: true\"\n echo \" commands:\"\n echo \" - ./.expeditor/scripts/download_built_omnibus_pkgs.sh\"\n echo \" - omnibus/omnibus-test.sh\"\n echo \" timeout_in_minutes: 60\"\n else\n echo \"- env:\"\n echo \" OMNIBUS_BUILDER_KEY: build-windows-2019\"\n echo \" key: test-windows-2019\"\n echo ' label: \":mag::windows: windows-2019\"'\n echo \" retry:\"\n echo \" automatic:\"\n echo \" limit: 1\"\n echo \" agents:\"\n echo \" queue: default-windows-2019-privileged\"\n echo \" commands:\"\n echo \" - ./.expeditor/scripts/download_built_omnibus_pkgs.ps1\"\n echo \" - ./omnibus/omnibus-test.ps1\"\n echo \" timeout_in_minutes: 60\"\n fi\ndone", - "repo_full_name": "chef/chef", - "discussion_comments": [ - { - "comment_id": "1082707950", - "repo_full_name": "chef/chef", - "pr_number": 13489, - "pr_file": ".buildkite/build-test-omnibus.sh", - "discussion_id": "1082707950", - "commented_code": "@@ -0,0 +1,112 @@\n+if [[ $BUILDKITE_ORGANIZATION_SLUG == \"chef-oss\" ]]; then\n+ echo \"- block: Build & Test Omnibus Packages\"\n+ echo \" prompt: Continue to run omnibus package build and tests for applicable platforms?\"\n+fi\n+\n+FILTER=\"${OMNIBUS_FILTER:=*}\"\n+\n+platforms=(\"amazon-2:centos-7\" \"centos-6:centos-6\" \"centos-7:centos-7\" \"centos-8:centos-8\" \"rhel-9:rhel-9\" \"debian-9:debian-9\" \"debian-10:debian-9\" \"debian-11:debian-9\" \"ubuntu-1604:ubuntu-1604\" \"ubuntu-1804:ubuntu-1604\" \"ubuntu-2004:ubuntu-1604\" \"ubuntu-2204:ubuntu-1604\" \"sles-15:sles-15\" \"windows-2019:windows-2019\")\n+\n+omnibus_build_platforms=()\n+omnibus_test_platforms=()\n+\n+# build build array and test array based on filter\n+for platform in ${platforms[@]}; do\n+ case ${platform%:*} in\n+ $FILTER)\n+ omnibus_build_platforms[${#omnibus_build_platforms[@]}]=${platform#*:}\n+ omnibus_test_platforms[${#omnibus_test_platforms[@]}]=$platform\n+ ;;\n+ esac\n+done\n+\n+# remove duplicates from build array\n+omnibus_build_platforms=($(printf \"%s\\n\" \"${omnibus_build_platforms[@]}\" | sort -u | tr '\\n' ' '))\n+\n+for platform in ${omnibus_build_platforms[@]}; do\n+ if [[ $platform != *\"windows\"* ]]; then\n+ echo \"- label: \\\":hammer_and_wrench::docker: $platform\\\"\"\n+ echo \" retry:\"\n+ echo \" automatic:\"\n+ echo \" limit: 1\"\n+ echo \" key: build-$platform\"\n+ echo \" agents:\"\n+ echo \" queue: default-privileged\"\n+ echo \" plugins:\"\n+ echo \" - docker#v3.5.0:\"\n+ echo \" image: chefes/omnibus-toolchain-$platform:$OMNIBUS_TOOLCHAIN_VERSION\"\n+ echo \" privileged: true\"\n+ echo \" propagate-environment: true\"\n+ echo \" environment:\"\n+ echo \" - RPM_SIGNING_KEY\"\n+ echo \" - CHEF_FOUNDATION_VERSION\"\n+ echo \" commands:\"\n+ echo \" - ./.expeditor/scripts/omnibus_chef_build.sh\"\n+ echo \" timeout_in_minutes: 60\"\n+ else \n+ echo \"- label: \\\":hammer_and_wrench::windows: $platform\\\"\"\n+ echo \" retry:\"\n+ echo \" automatic:\"\n+ echo \" limit: 1\"\n+ echo \" key: build-$platform\"\n+ echo \" agents:\"\n+ echo \" queue: default-$platform-privileged\"\n+ echo \" plugins:\"\n+ echo \" - docker#v3.5.0:\"\n+ echo \" image: chefes/omnibus-toolchain-$platform:$OMNIBUS_TOOLCHAIN_VERSION\"\n+ echo \" shell:\"\n+ echo \" - powershell\"\n+ echo \" - \\\"-Command\\\"\"\n+ echo \" propagate-environment: true\"\n+ echo \" environment:\"\n+ echo \" - CHEF_FOUNDATION_VERSION\"\n+ echo \" - BUILDKITE_AGENT_ACCESS_TOKEN\"\n+ echo \" - AWS_ACCESS_KEY_ID\"\n+ echo \" - AWS_SECRET_ACCESS_KEY\"\n+ echo \" - AWS_SESSION_TOKEN\"\n+ echo \" volumes:\"\n+ echo ' - \"c:\\\\buildkite-agent:c:\\\\buildkite-agent\"'\n+ echo \" commands:\"\n+ echo \" - ./.expeditor/scripts/omnibus_chef_build.ps1\"\n+ echo \" timeout_in_minutes: 60\"\n+ fi\n+done\n+\n+echo \"- wait: ~\"\n+\n+for platform in ${omnibus_test_platforms[@]}; do\n+ if [[ $platform != *\"windows\"* ]]; then\n+ echo \"- env:\"\n+ echo \" OMNIBUS_BUILDER_KEY: build-${platform#*:}\"\n+ echo \" label: \\\":mag::docker: ${platform%:*}\\\"\"\n+ echo \" retry:\"\n+ echo \" automatic:\"\n+ echo \" limit: 1\"\n+ echo \" agents:\"\n+ echo \" queue: default-privileged\"\n+ echo \" plugins:\"\n+ echo \" - docker#v3.5.0:\"\n+ echo \" image: chefes/omnibus-toolchain-${platform%:*}:$OMNIBUS_TOOLCHAIN_VERSION\"\n+ echo \" privileged: true\"\n+ echo \" propagate-environment: true\"\n+ echo \" commands:\"\n+ echo \" - ./.expeditor/scripts/download_built_omnibus_pkgs.sh\"\n+ echo \" - omnibus/omnibus-test.sh\"\n+ echo \" timeout_in_minutes: 60\"\n+ else\n+ echo \"- env:\"\n+ echo \" OMNIBUS_BUILDER_KEY: build-windows-2019\"\n+ echo \" key: test-windows-2019\"\n+ echo ' label: \":mag::windows: windows-2019\"'\n+ echo \" retry:\"\n+ echo \" automatic:\"\n+ echo \" limit: 1\"\n+ echo \" agents:\"\n+ echo \" queue: default-windows-2019-privileged\"\n+ echo \" commands:\"\n+ echo \" - ./.expeditor/scripts/download_built_omnibus_pkgs.ps1\"\n+ echo \" - ./omnibus/omnibus-test.ps1\"\n+ echo \" timeout_in_minutes: 60\"\n+ fi\n+done", - "comment_created_at": "2023-01-20T15:35:22+00:00", - "comment_author": "tpowell-progress", - "comment_body": "```suggestion\r\nfor platform in ${omnibus_build_platforms[@]}; do\r\n if [[ $platform != *\"windows\"* ]]; then\r\n cat <<- NONWINDOWS\r\n- label: \":hammer_and_wrench::docker: $platform\"\r\n retry:\r\n automatic:\r\n limit: 1\r\n key: build-$platform\r\n agents:\r\n queue: default-privileged\r\n plugins:\r\n - docker#v3.5.0:\r\n image: chefes/omnibus-toolchain-$platform:$OMNIBUS_TOOLCHAIN_VERSION\r\n privileged: true\r\n propagate-environment: true\r\n environment:\r\n - RPM_SIGNING_KEY\r\n - CHEF_FOUNDATION_VERSION\r\n commands:\r\n - ./.expeditor/scripts/omnibus_chef_build.sh\r\n timeout_in_minutes: 60\r\nNONWINDOWS\r\n else\r\n cat <<- WINDOWS\r\n- label: \":hammer_and_wrench::windows: $platform\"\r\n retry:\r\n automatic:\r\n limit: 1\r\n key: build-$platform\r\n agents:\r\n queue: default-$platform-privileged\r\n plugins:\r\n - docker#v3.5.0:\r\n image: chefes/omnibus-toolchain-$platform:$OMNIBUS_TOOLCHAIN_VERSION\r\n shell:\r\n - powershell\r\n - \"-Command\"\r\n propagate-environment: true\r\n environment:\r\n - CHEF_FOUNDATION_VERSION\r\n - BUILDKITE_AGENT_ACCESS_TOKEN\r\n - AWS_ACCESS_KEY_ID\r\n - AWS_SECRET_ACCESS_KEY\r\n - AWS_SESSION_TOKEN\r\n volumes:\r\n - \"c:\\\\buildkite-agent:c:\\\\buildkite-agent\"\r\n commands:\r\n - ./.expeditor/scripts/omnibus_chef_build.ps1\r\n timeout_in_minutes: 60\r\nWINDOWS\r\n fi\r\ndone\r\n```", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "678521712", - "pr_number": 11871, - "pr_file": ".expeditor/promote-docker-images.sh", - "created_at": "2021-07-28T17:43:21+00:00", - "commented_code": "#! /bin/bash\n\n# EXPEDITOR_VERSION exists within the artifact_published workload\n\nexport DOCKER_CLI_EXPERIMENTAL=enabled\n\necho \"--- Creating manifest for ${EXPEDITOR_TARGET_CHANNEL}\"\ndocker manifest create \"chef/chef:${EXPEDITOR_TARGET_CHANNEL}\" \\", - "repo_full_name": "chef/chef", - "discussion_comments": [ - { - "comment_id": "678521712", - "repo_full_name": "chef/chef", - "pr_number": 11871, - "pr_file": ".expeditor/promote-docker-images.sh", - "discussion_id": "678521712", - "commented_code": "@@ -0,0 +1,42 @@\n+#! /bin/bash\n+\n+# EXPEDITOR_VERSION exists within the artifact_published workload\n+\n+export DOCKER_CLI_EXPERIMENTAL=enabled\n+\n+echo \"--- Creating manifest for ${EXPEDITOR_TARGET_CHANNEL}\"\n+docker manifest create \"chef/chef:${EXPEDITOR_TARGET_CHANNEL}\" \\", - "comment_created_at": "2021-07-28T17:43:21+00:00", - "comment_author": "mimaslanka", - "comment_body": "Suggest that we move this to a function\r\n```\r\nfunction create_and_push_manifest () {\r\n echo \"--- Creating...\"\r\n docker manifest create ${1}:${2}\" \\\r\n --amend ${1}:${EXPEDITOR_VERSION}-arm64 \\\r\n --amend ....-amd64\r\n\r\n echo \"--- Pushing....\"\r\n docker manifest push....\r\n}\r\ncreate_and_push_manifest \"chef/chef\" \"${EXPEDITOR_TARGET_CHANNEL}\"\r\n \r\nif ....\r\n create_and_pus_manifest \"chef/chef\" \"latest\"\r\n ...\r\n```\r\n", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/chef-update-security-critical-dependencies.json b/_reviewers/chef-update-security-critical-dependencies.json new file mode 100644 index 0000000..b14a567 --- /dev/null +++ b/_reviewers/chef-update-security-critical-dependencies.json @@ -0,0 +1,24 @@ +[ + { + "discussion_id": "697660971", + "pr_number": 11976, + "pr_file": "RELEASE_NOTES.md", + "created_at": "2021-08-27T19:04:43+00:00", + "commented_code": "This file holds \"in progress\" release notes for the current release under development and is intended for consumption by the Chef Documentation team. Please see for the official Chef release notes.\n\n## What's New in 17.4\n## What's New in 17.4.38\n\n### Bug fixes\n\n- Resolved a regression introduced in Chef Infra Client 17.4 that would cause HWRP-style resources inheriting from LWRPBase to fail.\n\n### Enhancements\n\n- Log output has been improved to better deliniate when the Infra Phase and Compliance Phase start and end.\n- Ohai data collection of Amazon EC2 metadata has been improved to collect additional data for some configurations.\n- Removed ERROR logs when retrying failed communication with the Chef Infra Server.\n- Improved the `archive_file` resource by upgrading the `libarchive` library it uses, which includes the following improvements:\n - Support for PWB and v7 binary CPIO formats.\n - Support for the deflate algorithm in symbolic link decompression with zip files.\n - Various bug fixes when working with CAB, ZIP, 7zip, and RAR files.\n\n### Security\n\n#### OpenSSL 1.1.1l\n\nOpenSSL has been updated from 1.1.1k to 1.1.1l on macOS systems to resolve the following CVEs:\n\n- [CVE-2021-3711](https://nvd.nist.gov/vuln/detail/CVE-2021-3711)\n- [CVE-2021-3712](https://nvd.nist.gov/vuln/detail/CVE-2021-3712)\n\n#### libarchive 3.5.2\n\nUpdated the libarchive library which powers the archive_file resource from 3.5.1 to 3.5.2 to resolve security vulnerabilities in libarchive's handling of symbolic links.", + "repo_full_name": "chef/chef", + "discussion_comments": [ + { + "comment_id": "697660971", + "repo_full_name": "chef/chef", + "pr_number": 11976, + "pr_file": "RELEASE_NOTES.md", + "discussion_id": "697660971", + "commented_code": "@@ -1,6 +1,35 @@\n This file holds \"in progress\" release notes for the current release under development and is intended for consumption by the Chef Documentation team. Please see for the official Chef release notes.\n \n-## What's New in 17.4\n+## What's New in 17.4.38\n+\n+### Bug fixes\n+\n+- Resolved a regression introduced in Chef Infra Client 17.4 that would cause HWRP-style resources inheriting from LWRPBase to fail.\n+\n+### Enhancements\n+\n+- Log output has been improved to better deliniate when the Infra Phase and Compliance Phase start and end.\n+- Ohai data collection of Amazon EC2 metadata has been improved to collect additional data for some configurations.\n+- Removed ERROR logs when retrying failed communication with the Chef Infra Server.\n+- Improved the `archive_file` resource by upgrading the `libarchive` library it uses, which includes the following improvements:\n+ - Support for PWB and v7 binary CPIO formats.\n+ - Support for the deflate algorithm in symbolic link decompression with zip files.\n+ - Various bug fixes when working with CAB, ZIP, 7zip, and RAR files.\n+\n+### Security\n+\n+#### OpenSSL 1.1.1l\n+\n+OpenSSL has been updated from 1.1.1k to 1.1.1l on macOS systems to resolve the following CVEs:\n+\n+- [CVE-2021-3711](https://nvd.nist.gov/vuln/detail/CVE-2021-3711)\n+- [CVE-2021-3712](https://nvd.nist.gov/vuln/detail/CVE-2021-3712)\n+\n+#### libarchive 3.5.2\n+\n+Updated the libarchive library which powers the archive_file resource from 3.5.1 to 3.5.2 to resolve security vulnerabilities in libarchive's handling of symbolic links.", + "comment_created_at": "2021-08-27T19:04:43+00:00", + "comment_author": "kagarmoe", + "comment_body": "```suggestion\r\nUpdated the libarchive library that powers the `archive_file` resource from 3.5.1 to 3.5.2 to resolve security vulnerabilities in libarchive's handling of symbolic links.\r\n```", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/chef-update-security-critical-dependencies.md b/_reviewers/chef-update-security-critical-dependencies.md index 42e8d6e..942774c 100644 --- a/_reviewers/chef-update-security-critical-dependencies.md +++ b/_reviewers/chef-update-security-critical-dependencies.md @@ -36,29 +36,3 @@ When updating dependencies for security reasons, always include clear documentat 2. The version change (from/to) 3. What security vulnerability was addressed 4. Any potential impact on existing functionality - - -[ - { - "discussion_id": "697660971", - "pr_number": 11976, - "pr_file": "RELEASE_NOTES.md", - "created_at": "2021-08-27T19:04:43+00:00", - "commented_code": "This file holds \"in progress\" release notes for the current release under development and is intended for consumption by the Chef Documentation team. Please see for the official Chef release notes.\n\n## What's New in 17.4\n## What's New in 17.4.38\n\n### Bug fixes\n\n- Resolved a regression introduced in Chef Infra Client 17.4 that would cause HWRP-style resources inheriting from LWRPBase to fail.\n\n### Enhancements\n\n- Log output has been improved to better deliniate when the Infra Phase and Compliance Phase start and end.\n- Ohai data collection of Amazon EC2 metadata has been improved to collect additional data for some configurations.\n- Removed ERROR logs when retrying failed communication with the Chef Infra Server.\n- Improved the `archive_file` resource by upgrading the `libarchive` library it uses, which includes the following improvements:\n - Support for PWB and v7 binary CPIO formats.\n - Support for the deflate algorithm in symbolic link decompression with zip files.\n - Various bug fixes when working with CAB, ZIP, 7zip, and RAR files.\n\n### Security\n\n#### OpenSSL 1.1.1l\n\nOpenSSL has been updated from 1.1.1k to 1.1.1l on macOS systems to resolve the following CVEs:\n\n- [CVE-2021-3711](https://nvd.nist.gov/vuln/detail/CVE-2021-3711)\n- [CVE-2021-3712](https://nvd.nist.gov/vuln/detail/CVE-2021-3712)\n\n#### libarchive 3.5.2\n\nUpdated the libarchive library which powers the archive_file resource from 3.5.1 to 3.5.2 to resolve security vulnerabilities in libarchive's handling of symbolic links.", - "repo_full_name": "chef/chef", - "discussion_comments": [ - { - "comment_id": "697660971", - "repo_full_name": "chef/chef", - "pr_number": 11976, - "pr_file": "RELEASE_NOTES.md", - "discussion_id": "697660971", - "commented_code": "@@ -1,6 +1,35 @@\n This file holds \"in progress\" release notes for the current release under development and is intended for consumption by the Chef Documentation team. Please see for the official Chef release notes.\n \n-## What's New in 17.4\n+## What's New in 17.4.38\n+\n+### Bug fixes\n+\n+- Resolved a regression introduced in Chef Infra Client 17.4 that would cause HWRP-style resources inheriting from LWRPBase to fail.\n+\n+### Enhancements\n+\n+- Log output has been improved to better deliniate when the Infra Phase and Compliance Phase start and end.\n+- Ohai data collection of Amazon EC2 metadata has been improved to collect additional data for some configurations.\n+- Removed ERROR logs when retrying failed communication with the Chef Infra Server.\n+- Improved the `archive_file` resource by upgrading the `libarchive` library it uses, which includes the following improvements:\n+ - Support for PWB and v7 binary CPIO formats.\n+ - Support for the deflate algorithm in symbolic link decompression with zip files.\n+ - Various bug fixes when working with CAB, ZIP, 7zip, and RAR files.\n+\n+### Security\n+\n+#### OpenSSL 1.1.1l\n+\n+OpenSSL has been updated from 1.1.1k to 1.1.1l on macOS systems to resolve the following CVEs:\n+\n+- [CVE-2021-3711](https://nvd.nist.gov/vuln/detail/CVE-2021-3711)\n+- [CVE-2021-3712](https://nvd.nist.gov/vuln/detail/CVE-2021-3712)\n+\n+#### libarchive 3.5.2\n+\n+Updated the libarchive library which powers the archive_file resource from 3.5.1 to 3.5.2 to resolve security vulnerabilities in libarchive's handling of symbolic links.", - "comment_created_at": "2021-08-27T19:04:43+00:00", - "comment_author": "kagarmoe", - "comment_body": "```suggestion\r\nUpdated the libarchive library that powers the `archive_file` resource from 3.5.1 to 3.5.2 to resolve security vulnerabilities in libarchive's handling of symbolic links.\r\n```", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/chef-use-strict-test-doubles.json b/_reviewers/chef-use-strict-test-doubles.json new file mode 100644 index 0000000..a60a6a0 --- /dev/null +++ b/_reviewers/chef-use-strict-test-doubles.json @@ -0,0 +1,102 @@ +[ + { + "discussion_id": "477494373", + "pr_number": 10171, + "pr_file": "spec/unit/provider/mount/linux_spec.rb", + "created_at": "2020-08-26T18:13:37+00:00", + "commented_code": "require \"spec_helper\"\nrequire \"ostruct\"\n\ndescribe Chef::Provider::Mount::Linux do\n before(:each) do\n @node = Chef::Node.new\n @events = Chef::EventDispatch::Dispatcher.new\n @run_context = Chef::RunContext.new(@node, {}, @events)\n\n @new_resource = Chef::Resource::Mount.new(\"/tmp/foo\")\n @new_resource.device \"/dev/sdz1\"\n @new_resource.device_type :device\n @new_resource.fstype \"ext3\"\n\n @new_resource.supports remount: false\n\n @provider = Chef::Provider::Mount::Linux.new(@new_resource, @run_context)\n\n allow(::File).to receive(:exists?).with(\"/dev/sdz1\").and_return true\n allow(::File).to receive(:exists?).with(\"/tmp/foo\").and_return true\n allow(::File).to receive(:realpath).with(\"/dev/sdz1\").and_return \"/dev/sdz1\"\n allow(::File).to receive(:realpath).with(\"/tmp/foo\").and_return \"/tmp/foo\"\n end\n\n context \"to see if the volume is mounted\" do\n\n it \"should set mounted true if the mount point is found in the mounts list\" do\n allow(@provider).to receive(:shell_out!).and_return(OpenStruct.new(stdout: \"/tmp/foo /dev/sdz1 type ext3 (rw)\\n\"))", + "repo_full_name": "chef/chef", + "discussion_comments": [ + { + "comment_id": "477494373", + "repo_full_name": "chef/chef", + "pr_number": 10171, + "pr_file": "spec/unit/provider/mount/linux_spec.rb", + "discussion_id": "477494373", + "commented_code": "@@ -0,0 +1,91 @@\n+require \"spec_helper\"\n+require \"ostruct\"\n+\n+describe Chef::Provider::Mount::Linux do\n+ before(:each) do\n+ @node = Chef::Node.new\n+ @events = Chef::EventDispatch::Dispatcher.new\n+ @run_context = Chef::RunContext.new(@node, {}, @events)\n+\n+ @new_resource = Chef::Resource::Mount.new(\"/tmp/foo\")\n+ @new_resource.device \"/dev/sdz1\"\n+ @new_resource.device_type :device\n+ @new_resource.fstype \"ext3\"\n+\n+ @new_resource.supports remount: false\n+\n+ @provider = Chef::Provider::Mount::Linux.new(@new_resource, @run_context)\n+\n+ allow(::File).to receive(:exists?).with(\"/dev/sdz1\").and_return true\n+ allow(::File).to receive(:exists?).with(\"/tmp/foo\").and_return true\n+ allow(::File).to receive(:realpath).with(\"/dev/sdz1\").and_return \"/dev/sdz1\"\n+ allow(::File).to receive(:realpath).with(\"/tmp/foo\").and_return \"/tmp/foo\"\n+ end\n+\n+ context \"to see if the volume is mounted\" do\n+\n+ it \"should set mounted true if the mount point is found in the mounts list\" do\n+ allow(@provider).to receive(:shell_out!).and_return(OpenStruct.new(stdout: \"/tmp/foo /dev/sdz1 type ext3 (rw)\\n\"))", + "comment_created_at": "2020-08-26T18:13:37+00:00", + "comment_author": "phiggins", + "comment_body": "```suggestion\r\n allow(@provider).to receive(:shell_out!).and_return(double(stdout: \"/tmp/foo /dev/sdz1 type ext3 (rw)\\n\"))\r\n```\r\n\r\nSince an OpenStruct instance will respond to any method, this can cause problems in the future when the code changes in unexpected ways. Instead these tests should be explicit about what behavior is expected of the test objects.", + "pr_file_module": null + }, + { + "comment_id": "478256789", + "repo_full_name": "chef/chef", + "pr_number": 10171, + "pr_file": "spec/unit/provider/mount/linux_spec.rb", + "discussion_id": "477494373", + "commented_code": "@@ -0,0 +1,91 @@\n+require \"spec_helper\"\n+require \"ostruct\"\n+\n+describe Chef::Provider::Mount::Linux do\n+ before(:each) do\n+ @node = Chef::Node.new\n+ @events = Chef::EventDispatch::Dispatcher.new\n+ @run_context = Chef::RunContext.new(@node, {}, @events)\n+\n+ @new_resource = Chef::Resource::Mount.new(\"/tmp/foo\")\n+ @new_resource.device \"/dev/sdz1\"\n+ @new_resource.device_type :device\n+ @new_resource.fstype \"ext3\"\n+\n+ @new_resource.supports remount: false\n+\n+ @provider = Chef::Provider::Mount::Linux.new(@new_resource, @run_context)\n+\n+ allow(::File).to receive(:exists?).with(\"/dev/sdz1\").and_return true\n+ allow(::File).to receive(:exists?).with(\"/tmp/foo\").and_return true\n+ allow(::File).to receive(:realpath).with(\"/dev/sdz1\").and_return \"/dev/sdz1\"\n+ allow(::File).to receive(:realpath).with(\"/tmp/foo\").and_return \"/tmp/foo\"\n+ end\n+\n+ context \"to see if the volume is mounted\" do\n+\n+ it \"should set mounted true if the mount point is found in the mounts list\" do\n+ allow(@provider).to receive(:shell_out!).and_return(OpenStruct.new(stdout: \"/tmp/foo /dev/sdz1 type ext3 (rw)\\n\"))", + "comment_created_at": "2020-08-27T08:45:09+00:00", + "comment_author": "antima-gupta", + "comment_body": "Agreed, I have updated the changes.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "696936306", + "pr_number": 11942, + "pr_file": "spec/unit/secret_fetcher/hashi_vault_spec.rb", + "created_at": "2021-08-26T19:57:05+00:00", + "commented_code": "#\n# Author:: Marc Paradise \n# Copyright:: Copyright (c) Chef Software Inc.\n# License:: Apache License, Version 2.0\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n# http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n#\n#\n\nrequire_relative \"../../spec_helper\"\nrequire \"chef/secret_fetcher/hashi_vault\"\n\ndescribe Chef::SecretFetcher::HashiVault do\n let(:node) { {} }\n let(:run_context) { double(\"run_context\", node: node) }\n let(:fetcher_config) { {} }\n let(:fetcher) {\n Chef::SecretFetcher::HashiVault.new( fetcher_config, run_context )\n }\n\n context \"when validating HashiVault provided configuration\" do\n context \"and role_name is not provided\" do\n let(:fetcher_config) { { vault_addr: \"vault.example.com\" } }\n it \"raises ConfigurationInvalid\" do\n expect { fetcher.validate! }.to raise_error(Chef::Exceptions::Secret::ConfigurationInvalid)\n end\n end\n context \"and vault_addr is not provided\" do\n let(:fetcher_config) { { role_name: \"example-role\" } }\n it \"raises ConfigurationInvalid\" do\n expect { fetcher.validate! }.to raise_error(Chef::Exceptions::Secret::ConfigurationInvalid)\n end\n end\n end\n\n context \"when all required config is provided\" do\n let(:fetcher_config) { { vault_addr: \"vault.example.com\", role_name: \"example-role\" } }\n it \"obtains a token via AWS IAM auth\" do\n auth_stub = double(\"vault auth\", aws_iam: nil)", + "repo_full_name": "chef/chef", + "discussion_comments": [ + { + "comment_id": "696936306", + "repo_full_name": "chef/chef", + "pr_number": 11942, + "pr_file": "spec/unit/secret_fetcher/hashi_vault_spec.rb", + "discussion_id": "696936306", + "commented_code": "@@ -0,0 +1,57 @@\n+#\n+# Author:: Marc Paradise \n+# Copyright:: Copyright (c) Chef Software Inc.\n+# License:: Apache License, Version 2.0\n+#\n+# Licensed under the Apache License, Version 2.0 (the \"License\");\n+# you may not use this file except in compliance with the License.\n+# You may obtain a copy of the License at\n+#\n+# http://www.apache.org/licenses/LICENSE-2.0\n+#\n+# Unless required by applicable law or agreed to in writing, software\n+# distributed under the License is distributed on an \"AS IS\" BASIS,\n+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n+# See the License for the specific language governing permissions and\n+# limitations under the License.\n+#\n+#\n+\n+require_relative \"../../spec_helper\"\n+require \"chef/secret_fetcher/hashi_vault\"\n+\n+describe Chef::SecretFetcher::HashiVault do\n+ let(:node) { {} }\n+ let(:run_context) { double(\"run_context\", node: node) }\n+ let(:fetcher_config) { {} }\n+ let(:fetcher) {\n+ Chef::SecretFetcher::HashiVault.new( fetcher_config, run_context )\n+ }\n+\n+ context \"when validating HashiVault provided configuration\" do\n+ context \"and role_name is not provided\" do\n+ let(:fetcher_config) { { vault_addr: \"vault.example.com\" } }\n+ it \"raises ConfigurationInvalid\" do\n+ expect { fetcher.validate! }.to raise_error(Chef::Exceptions::Secret::ConfigurationInvalid)\n+ end\n+ end\n+ context \"and vault_addr is not provided\" do\n+ let(:fetcher_config) { { role_name: \"example-role\" } }\n+ it \"raises ConfigurationInvalid\" do\n+ expect { fetcher.validate! }.to raise_error(Chef::Exceptions::Secret::ConfigurationInvalid)\n+ end\n+ end\n+ end\n+\n+ context \"when all required config is provided\" do\n+ let(:fetcher_config) { { vault_addr: \"vault.example.com\", role_name: \"example-role\" } }\n+ it \"obtains a token via AWS IAM auth\" do\n+ auth_stub = double(\"vault auth\", aws_iam: nil)", + "comment_created_at": "2021-08-26T19:57:05+00:00", + "comment_author": "lamont-granquist", + "comment_body": "that should really be an `instance_double` of the appropriate type", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "662629591", + "pr_number": 11534, + "pr_file": "spec/unit/resource/rhsm_subscription_spec.rb", + "created_at": "2021-07-01T22:35:49+00:00", + "commented_code": "expect { resource.action :remove }.not_to raise_error\n end\n\n describe \"#action_attach\" do\n before do\n dummy = Mixlib::ShellOut.new\n allow_any_instance_of(Chef::Mixin::ShellOut).to receive(:shell_out!).with(\"subscription-manager attach --pool=#{resource.pool_id}\").and_return(dummy)\n allow(dummy).to receive(:stdout).and_return(\"Successfully attached a subscription for: My Subscription\")\n allow(dummy).to receive(:exitstatus).and_return(0)\n allow(dummy).to receive(:error?).and_return(false)\n node.automatic_attrs[:platform_family] = \"rhel\"\n node.automatic_attrs[:platform_version] = \"7.3\"\n allow_any_instance_of(Chef::Provider::Package::Yum).to receive(:installed_version).with(0).and_return(Chef::Provider::Package::Yum::Version.new(nil, nil, nil))\n allow_any_instance_of(Chef::Provider::Package::Yum).to receive(:available_version).with(0).and_return(Chef::Provider::Package::Yum::Version.new(nil, nil, nil))\n allow_any_instance_of(Chef::Provider::Package::Yum::PythonHelper).to receive(:close_rpmdb)", + "repo_full_name": "chef/chef", + "discussion_comments": [ + { + "comment_id": "662629591", + "repo_full_name": "chef/chef", + "pr_number": 11534, + "pr_file": "spec/unit/resource/rhsm_subscription_spec.rb", + "discussion_id": "662629591", + "commented_code": "@@ -38,6 +43,46 @@\n expect { resource.action :remove }.not_to raise_error\n end\n \n+ describe \"#action_attach\" do\n+ before do\n+ dummy = Mixlib::ShellOut.new\n+ allow_any_instance_of(Chef::Mixin::ShellOut).to receive(:shell_out!).with(\"subscription-manager attach --pool=#{resource.pool_id}\").and_return(dummy)\n+ allow(dummy).to receive(:stdout).and_return(\"Successfully attached a subscription for: My Subscription\")\n+ allow(dummy).to receive(:exitstatus).and_return(0)\n+ allow(dummy).to receive(:error?).and_return(false)\n+ node.automatic_attrs[:platform_family] = \"rhel\"\n+ node.automatic_attrs[:platform_version] = \"7.3\"\n+ allow_any_instance_of(Chef::Provider::Package::Yum).to receive(:installed_version).with(0).and_return(Chef::Provider::Package::Yum::Version.new(nil, nil, nil))\n+ allow_any_instance_of(Chef::Provider::Package::Yum).to receive(:available_version).with(0).and_return(Chef::Provider::Package::Yum::Version.new(nil, nil, nil))\n+ allow_any_instance_of(Chef::Provider::Package::Yum::PythonHelper).to receive(:close_rpmdb)", + "comment_created_at": "2021-07-01T22:35:49+00:00", + "comment_author": "lamont-granquist", + "comment_body": "You're trying way too hard here. You're testing that the details of the ResourceResolver correctly give back a Chef::Package::Provider::Yum when you're forcing the distro version, which is duplicating all the testing which goes on already to prove that happens.\r\n\r\nI'd just create a double object and:\r\n\r\n```\r\nexpect(provider).to receive(:build_resource).with().and_return(mydouble)\r\nexpect(mydouble).to receive(:run_action).with(:flush_cache)\r\n```", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "662633326", + "pr_number": 11534, + "pr_file": "spec/unit/resource/rhsm_subscription_spec.rb", + "created_at": "2021-07-01T22:46:57+00:00", + "commented_code": "expect { resource.action :remove }.not_to raise_error\n end\n\n describe \"#action_attach\" do\n before do\n dummy = Mixlib::ShellOut.new\n allow_any_instance_of(Chef::Mixin::ShellOut).to receive(:shell_out!).with(\"subscription-manager attach --pool=#{resource.pool_id}\").and_return(dummy)\n allow(dummy).to receive(:stdout).and_return(\"Successfully attached a subscription for: My Subscription\")\n allow(dummy).to receive(:exitstatus).and_return(0)\n allow(dummy).to receive(:error?).and_return(false)", + "repo_full_name": "chef/chef", + "discussion_comments": [ + { + "comment_id": "662633326", + "repo_full_name": "chef/chef", + "pr_number": 11534, + "pr_file": "spec/unit/resource/rhsm_subscription_spec.rb", + "discussion_id": "662633326", + "commented_code": "@@ -38,6 +43,46 @@\n expect { resource.action :remove }.not_to raise_error\n end\n \n+ describe \"#action_attach\" do\n+ before do\n+ dummy = Mixlib::ShellOut.new\n+ allow_any_instance_of(Chef::Mixin::ShellOut).to receive(:shell_out!).with(\"subscription-manager attach --pool=#{resource.pool_id}\").and_return(dummy)\n+ allow(dummy).to receive(:stdout).and_return(\"Successfully attached a subscription for: My Subscription\")\n+ allow(dummy).to receive(:exitstatus).and_return(0)\n+ allow(dummy).to receive(:error?).and_return(false)", + "comment_created_at": "2021-07-01T22:46:57+00:00", + "comment_author": "lamont-granquist", + "comment_body": "Here also you should stub at the level of:\r\n\r\n```\r\nso = instance_double(Mixlib::ShellOut)\r\nexpect(provider).to receive(:shell_out!).with(<...>).and_return(so)\r\n```", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/chef-use-strict-test-doubles.md b/_reviewers/chef-use-strict-test-doubles.md index eb615b1..fe88862 100644 --- a/_reviewers/chef-use-strict-test-doubles.md +++ b/_reviewers/chef-use-strict-test-doubles.md @@ -34,107 +34,3 @@ This approach: - Catches interface changes during test execution - Makes dependencies explicit in test code - Prevents tests from silently passing with invalid assumptions - - -[ - { - "discussion_id": "477494373", - "pr_number": 10171, - "pr_file": "spec/unit/provider/mount/linux_spec.rb", - "created_at": "2020-08-26T18:13:37+00:00", - "commented_code": "require \"spec_helper\"\nrequire \"ostruct\"\n\ndescribe Chef::Provider::Mount::Linux do\n before(:each) do\n @node = Chef::Node.new\n @events = Chef::EventDispatch::Dispatcher.new\n @run_context = Chef::RunContext.new(@node, {}, @events)\n\n @new_resource = Chef::Resource::Mount.new(\"/tmp/foo\")\n @new_resource.device \"/dev/sdz1\"\n @new_resource.device_type :device\n @new_resource.fstype \"ext3\"\n\n @new_resource.supports remount: false\n\n @provider = Chef::Provider::Mount::Linux.new(@new_resource, @run_context)\n\n allow(::File).to receive(:exists?).with(\"/dev/sdz1\").and_return true\n allow(::File).to receive(:exists?).with(\"/tmp/foo\").and_return true\n allow(::File).to receive(:realpath).with(\"/dev/sdz1\").and_return \"/dev/sdz1\"\n allow(::File).to receive(:realpath).with(\"/tmp/foo\").and_return \"/tmp/foo\"\n end\n\n context \"to see if the volume is mounted\" do\n\n it \"should set mounted true if the mount point is found in the mounts list\" do\n allow(@provider).to receive(:shell_out!).and_return(OpenStruct.new(stdout: \"/tmp/foo /dev/sdz1 type ext3 (rw)\\n\"))", - "repo_full_name": "chef/chef", - "discussion_comments": [ - { - "comment_id": "477494373", - "repo_full_name": "chef/chef", - "pr_number": 10171, - "pr_file": "spec/unit/provider/mount/linux_spec.rb", - "discussion_id": "477494373", - "commented_code": "@@ -0,0 +1,91 @@\n+require \"spec_helper\"\n+require \"ostruct\"\n+\n+describe Chef::Provider::Mount::Linux do\n+ before(:each) do\n+ @node = Chef::Node.new\n+ @events = Chef::EventDispatch::Dispatcher.new\n+ @run_context = Chef::RunContext.new(@node, {}, @events)\n+\n+ @new_resource = Chef::Resource::Mount.new(\"/tmp/foo\")\n+ @new_resource.device \"/dev/sdz1\"\n+ @new_resource.device_type :device\n+ @new_resource.fstype \"ext3\"\n+\n+ @new_resource.supports remount: false\n+\n+ @provider = Chef::Provider::Mount::Linux.new(@new_resource, @run_context)\n+\n+ allow(::File).to receive(:exists?).with(\"/dev/sdz1\").and_return true\n+ allow(::File).to receive(:exists?).with(\"/tmp/foo\").and_return true\n+ allow(::File).to receive(:realpath).with(\"/dev/sdz1\").and_return \"/dev/sdz1\"\n+ allow(::File).to receive(:realpath).with(\"/tmp/foo\").and_return \"/tmp/foo\"\n+ end\n+\n+ context \"to see if the volume is mounted\" do\n+\n+ it \"should set mounted true if the mount point is found in the mounts list\" do\n+ allow(@provider).to receive(:shell_out!).and_return(OpenStruct.new(stdout: \"/tmp/foo /dev/sdz1 type ext3 (rw)\\n\"))", - "comment_created_at": "2020-08-26T18:13:37+00:00", - "comment_author": "phiggins", - "comment_body": "```suggestion\r\n allow(@provider).to receive(:shell_out!).and_return(double(stdout: \"/tmp/foo /dev/sdz1 type ext3 (rw)\\n\"))\r\n```\r\n\r\nSince an OpenStruct instance will respond to any method, this can cause problems in the future when the code changes in unexpected ways. Instead these tests should be explicit about what behavior is expected of the test objects.", - "pr_file_module": null - }, - { - "comment_id": "478256789", - "repo_full_name": "chef/chef", - "pr_number": 10171, - "pr_file": "spec/unit/provider/mount/linux_spec.rb", - "discussion_id": "477494373", - "commented_code": "@@ -0,0 +1,91 @@\n+require \"spec_helper\"\n+require \"ostruct\"\n+\n+describe Chef::Provider::Mount::Linux do\n+ before(:each) do\n+ @node = Chef::Node.new\n+ @events = Chef::EventDispatch::Dispatcher.new\n+ @run_context = Chef::RunContext.new(@node, {}, @events)\n+\n+ @new_resource = Chef::Resource::Mount.new(\"/tmp/foo\")\n+ @new_resource.device \"/dev/sdz1\"\n+ @new_resource.device_type :device\n+ @new_resource.fstype \"ext3\"\n+\n+ @new_resource.supports remount: false\n+\n+ @provider = Chef::Provider::Mount::Linux.new(@new_resource, @run_context)\n+\n+ allow(::File).to receive(:exists?).with(\"/dev/sdz1\").and_return true\n+ allow(::File).to receive(:exists?).with(\"/tmp/foo\").and_return true\n+ allow(::File).to receive(:realpath).with(\"/dev/sdz1\").and_return \"/dev/sdz1\"\n+ allow(::File).to receive(:realpath).with(\"/tmp/foo\").and_return \"/tmp/foo\"\n+ end\n+\n+ context \"to see if the volume is mounted\" do\n+\n+ it \"should set mounted true if the mount point is found in the mounts list\" do\n+ allow(@provider).to receive(:shell_out!).and_return(OpenStruct.new(stdout: \"/tmp/foo /dev/sdz1 type ext3 (rw)\\n\"))", - "comment_created_at": "2020-08-27T08:45:09+00:00", - "comment_author": "antima-gupta", - "comment_body": "Agreed, I have updated the changes.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "696936306", - "pr_number": 11942, - "pr_file": "spec/unit/secret_fetcher/hashi_vault_spec.rb", - "created_at": "2021-08-26T19:57:05+00:00", - "commented_code": "#\n# Author:: Marc Paradise \n# Copyright:: Copyright (c) Chef Software Inc.\n# License:: Apache License, Version 2.0\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n# http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n#\n#\n\nrequire_relative \"../../spec_helper\"\nrequire \"chef/secret_fetcher/hashi_vault\"\n\ndescribe Chef::SecretFetcher::HashiVault do\n let(:node) { {} }\n let(:run_context) { double(\"run_context\", node: node) }\n let(:fetcher_config) { {} }\n let(:fetcher) {\n Chef::SecretFetcher::HashiVault.new( fetcher_config, run_context )\n }\n\n context \"when validating HashiVault provided configuration\" do\n context \"and role_name is not provided\" do\n let(:fetcher_config) { { vault_addr: \"vault.example.com\" } }\n it \"raises ConfigurationInvalid\" do\n expect { fetcher.validate! }.to raise_error(Chef::Exceptions::Secret::ConfigurationInvalid)\n end\n end\n context \"and vault_addr is not provided\" do\n let(:fetcher_config) { { role_name: \"example-role\" } }\n it \"raises ConfigurationInvalid\" do\n expect { fetcher.validate! }.to raise_error(Chef::Exceptions::Secret::ConfigurationInvalid)\n end\n end\n end\n\n context \"when all required config is provided\" do\n let(:fetcher_config) { { vault_addr: \"vault.example.com\", role_name: \"example-role\" } }\n it \"obtains a token via AWS IAM auth\" do\n auth_stub = double(\"vault auth\", aws_iam: nil)", - "repo_full_name": "chef/chef", - "discussion_comments": [ - { - "comment_id": "696936306", - "repo_full_name": "chef/chef", - "pr_number": 11942, - "pr_file": "spec/unit/secret_fetcher/hashi_vault_spec.rb", - "discussion_id": "696936306", - "commented_code": "@@ -0,0 +1,57 @@\n+#\n+# Author:: Marc Paradise \n+# Copyright:: Copyright (c) Chef Software Inc.\n+# License:: Apache License, Version 2.0\n+#\n+# Licensed under the Apache License, Version 2.0 (the \"License\");\n+# you may not use this file except in compliance with the License.\n+# You may obtain a copy of the License at\n+#\n+# http://www.apache.org/licenses/LICENSE-2.0\n+#\n+# Unless required by applicable law or agreed to in writing, software\n+# distributed under the License is distributed on an \"AS IS\" BASIS,\n+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n+# See the License for the specific language governing permissions and\n+# limitations under the License.\n+#\n+#\n+\n+require_relative \"../../spec_helper\"\n+require \"chef/secret_fetcher/hashi_vault\"\n+\n+describe Chef::SecretFetcher::HashiVault do\n+ let(:node) { {} }\n+ let(:run_context) { double(\"run_context\", node: node) }\n+ let(:fetcher_config) { {} }\n+ let(:fetcher) {\n+ Chef::SecretFetcher::HashiVault.new( fetcher_config, run_context )\n+ }\n+\n+ context \"when validating HashiVault provided configuration\" do\n+ context \"and role_name is not provided\" do\n+ let(:fetcher_config) { { vault_addr: \"vault.example.com\" } }\n+ it \"raises ConfigurationInvalid\" do\n+ expect { fetcher.validate! }.to raise_error(Chef::Exceptions::Secret::ConfigurationInvalid)\n+ end\n+ end\n+ context \"and vault_addr is not provided\" do\n+ let(:fetcher_config) { { role_name: \"example-role\" } }\n+ it \"raises ConfigurationInvalid\" do\n+ expect { fetcher.validate! }.to raise_error(Chef::Exceptions::Secret::ConfigurationInvalid)\n+ end\n+ end\n+ end\n+\n+ context \"when all required config is provided\" do\n+ let(:fetcher_config) { { vault_addr: \"vault.example.com\", role_name: \"example-role\" } }\n+ it \"obtains a token via AWS IAM auth\" do\n+ auth_stub = double(\"vault auth\", aws_iam: nil)", - "comment_created_at": "2021-08-26T19:57:05+00:00", - "comment_author": "lamont-granquist", - "comment_body": "that should really be an `instance_double` of the appropriate type", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "662629591", - "pr_number": 11534, - "pr_file": "spec/unit/resource/rhsm_subscription_spec.rb", - "created_at": "2021-07-01T22:35:49+00:00", - "commented_code": "expect { resource.action :remove }.not_to raise_error\n end\n\n describe \"#action_attach\" do\n before do\n dummy = Mixlib::ShellOut.new\n allow_any_instance_of(Chef::Mixin::ShellOut).to receive(:shell_out!).with(\"subscription-manager attach --pool=#{resource.pool_id}\").and_return(dummy)\n allow(dummy).to receive(:stdout).and_return(\"Successfully attached a subscription for: My Subscription\")\n allow(dummy).to receive(:exitstatus).and_return(0)\n allow(dummy).to receive(:error?).and_return(false)\n node.automatic_attrs[:platform_family] = \"rhel\"\n node.automatic_attrs[:platform_version] = \"7.3\"\n allow_any_instance_of(Chef::Provider::Package::Yum).to receive(:installed_version).with(0).and_return(Chef::Provider::Package::Yum::Version.new(nil, nil, nil))\n allow_any_instance_of(Chef::Provider::Package::Yum).to receive(:available_version).with(0).and_return(Chef::Provider::Package::Yum::Version.new(nil, nil, nil))\n allow_any_instance_of(Chef::Provider::Package::Yum::PythonHelper).to receive(:close_rpmdb)", - "repo_full_name": "chef/chef", - "discussion_comments": [ - { - "comment_id": "662629591", - "repo_full_name": "chef/chef", - "pr_number": 11534, - "pr_file": "spec/unit/resource/rhsm_subscription_spec.rb", - "discussion_id": "662629591", - "commented_code": "@@ -38,6 +43,46 @@\n expect { resource.action :remove }.not_to raise_error\n end\n \n+ describe \"#action_attach\" do\n+ before do\n+ dummy = Mixlib::ShellOut.new\n+ allow_any_instance_of(Chef::Mixin::ShellOut).to receive(:shell_out!).with(\"subscription-manager attach --pool=#{resource.pool_id}\").and_return(dummy)\n+ allow(dummy).to receive(:stdout).and_return(\"Successfully attached a subscription for: My Subscription\")\n+ allow(dummy).to receive(:exitstatus).and_return(0)\n+ allow(dummy).to receive(:error?).and_return(false)\n+ node.automatic_attrs[:platform_family] = \"rhel\"\n+ node.automatic_attrs[:platform_version] = \"7.3\"\n+ allow_any_instance_of(Chef::Provider::Package::Yum).to receive(:installed_version).with(0).and_return(Chef::Provider::Package::Yum::Version.new(nil, nil, nil))\n+ allow_any_instance_of(Chef::Provider::Package::Yum).to receive(:available_version).with(0).and_return(Chef::Provider::Package::Yum::Version.new(nil, nil, nil))\n+ allow_any_instance_of(Chef::Provider::Package::Yum::PythonHelper).to receive(:close_rpmdb)", - "comment_created_at": "2021-07-01T22:35:49+00:00", - "comment_author": "lamont-granquist", - "comment_body": "You're trying way too hard here. You're testing that the details of the ResourceResolver correctly give back a Chef::Package::Provider::Yum when you're forcing the distro version, which is duplicating all the testing which goes on already to prove that happens.\r\n\r\nI'd just create a double object and:\r\n\r\n```\r\nexpect(provider).to receive(:build_resource).with().and_return(mydouble)\r\nexpect(mydouble).to receive(:run_action).with(:flush_cache)\r\n```", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "662633326", - "pr_number": 11534, - "pr_file": "spec/unit/resource/rhsm_subscription_spec.rb", - "created_at": "2021-07-01T22:46:57+00:00", - "commented_code": "expect { resource.action :remove }.not_to raise_error\n end\n\n describe \"#action_attach\" do\n before do\n dummy = Mixlib::ShellOut.new\n allow_any_instance_of(Chef::Mixin::ShellOut).to receive(:shell_out!).with(\"subscription-manager attach --pool=#{resource.pool_id}\").and_return(dummy)\n allow(dummy).to receive(:stdout).and_return(\"Successfully attached a subscription for: My Subscription\")\n allow(dummy).to receive(:exitstatus).and_return(0)\n allow(dummy).to receive(:error?).and_return(false)", - "repo_full_name": "chef/chef", - "discussion_comments": [ - { - "comment_id": "662633326", - "repo_full_name": "chef/chef", - "pr_number": 11534, - "pr_file": "spec/unit/resource/rhsm_subscription_spec.rb", - "discussion_id": "662633326", - "commented_code": "@@ -38,6 +43,46 @@\n expect { resource.action :remove }.not_to raise_error\n end\n \n+ describe \"#action_attach\" do\n+ before do\n+ dummy = Mixlib::ShellOut.new\n+ allow_any_instance_of(Chef::Mixin::ShellOut).to receive(:shell_out!).with(\"subscription-manager attach --pool=#{resource.pool_id}\").and_return(dummy)\n+ allow(dummy).to receive(:stdout).and_return(\"Successfully attached a subscription for: My Subscription\")\n+ allow(dummy).to receive(:exitstatus).and_return(0)\n+ allow(dummy).to receive(:error?).and_return(false)", - "comment_created_at": "2021-07-01T22:46:57+00:00", - "comment_author": "lamont-granquist", - "comment_body": "Here also you should stub at the level of:\r\n\r\n```\r\nso = instance_double(Mixlib::ShellOut)\r\nexpect(provider).to receive(:shell_out!).with(<...>).and_return(so)\r\n```", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/chef-verify-automated-documentation.json b/_reviewers/chef-verify-automated-documentation.json new file mode 100644 index 0000000..9da7334 --- /dev/null +++ b/_reviewers/chef-verify-automated-documentation.json @@ -0,0 +1,58 @@ +[ + { + "discussion_id": "1064755765", + "pr_number": 13484, + "pr_file": "docs/dev/how_to/releasing_chef_infra.md", + "created_at": "2023-01-09T15:18:02+00:00", + "commented_code": "If there are any new or updated resources, the docs site will need to be updated. This is a `partially` automated process. If you are making a single resource update or changing wording, it may just be easier to do it by hand.\n\n`publish-release-notes.sh` pushes to S3 and then Netlify site needs to be rebuilt, so the Docs Site may not immediately. Reach out to the Docs team to trigger an update.", + "repo_full_name": "chef/chef", + "discussion_comments": [ + { + "comment_id": "1064755765", + "repo_full_name": "chef/chef", + "pr_number": 13484, + "pr_file": "docs/dev/how_to/releasing_chef_infra.md", + "discussion_id": "1064755765", + "commented_code": "@@ -28,23 +28,26 @@ The importance of our release notes cannot be understated. As developers, we und\n \n If there are any new or updated resources, the docs site will need to be updated. This is a `partially` automated process. If you are making a single resource update or changing wording, it may just be easier to do it by hand.\n \n+`publish-release-notes.sh` pushes to S3 and then Netlify site needs to be rebuilt, so the Docs Site may not immediately. Reach out to the Docs team to trigger an update.", + "comment_created_at": "2023-01-09T15:18:02+00:00", + "comment_author": "PrajaktaPurohit", + "comment_body": "```suggestion\r\n`publish-release-notes.sh` pushes to S3 and then Netlify site needs to be rebuilt, so the Docs Site may not immediately reflect the Release notes related updates. The way to rebuild Netlify is manually or by merging a PR in chef-web-docs repository. Reach out to the Docs team to trigger an update.\r\n```", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1046346041", + "pr_number": 13428, + "pr_file": "docs/dev/how_to/releasing_chef_infra.md", + "created_at": "2022-12-12T20:13:33+00:00", + "commented_code": "#### Resource Documentation Automation\n\n1. Run `rake docs_site:resources` to generate content to a `docs_site` directory\n 1. WARNING: Any resource that inherits it's basic structure from a parent resource is likely to comingle settings from both when you run that command. For example, there are 17-20 resources that consume the basic package.rb resource file. As a consequence, ALL of the children will most likely have pulled in properties and actions that do not apply to them. You have to be careful here.", + "repo_full_name": "chef/chef", + "discussion_comments": [ + { + "comment_id": "1046346041", + "repo_full_name": "chef/chef", + "pr_number": 13428, + "pr_file": "docs/dev/how_to/releasing_chef_infra.md", + "discussion_id": "1046346041", + "commented_code": "@@ -29,10 +31,18 @@ If there are any new or updated resources, the docs site will need to be updated\n #### Resource Documentation Automation\n \n 1. Run `rake docs_site:resources` to generate content to a `docs_site` directory\n+ 1. WARNING: Any resource that inherits it's basic structure from a parent resource is likely to comingle settings from both when you run that command. For example, there are 17-20 resources that consume the basic package.rb resource file. As a consequence, ALL of the children will most likely have pulled in properties and actions that do not apply to them. You have to be careful here.", + "comment_created_at": "2022-12-12T20:13:33+00:00", + "comment_author": "tpowell-progress", + "comment_body": "should be `commingle`", + "pr_file_module": null + }, + { + "comment_id": "1046449480", + "repo_full_name": "chef/chef", + "pr_number": 13428, + "pr_file": "docs/dev/how_to/releasing_chef_infra.md", + "discussion_id": "1046346041", + "commented_code": "@@ -29,10 +31,18 @@ If there are any new or updated resources, the docs site will need to be updated\n #### Resource Documentation Automation\n \n 1. Run `rake docs_site:resources` to generate content to a `docs_site` directory\n+ 1. WARNING: Any resource that inherits it's basic structure from a parent resource is likely to comingle settings from both when you run that command. For example, there are 17-20 resources that consume the basic package.rb resource file. As a consequence, ALL of the children will most likely have pulled in properties and actions that do not apply to them. You have to be careful here.", + "comment_created_at": "2022-12-12T22:19:50+00:00", + "comment_author": "johnmccrae", + "comment_body": "corrected", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/chef-verify-automated-documentation.md b/_reviewers/chef-verify-automated-documentation.md index 9ddbbdf..5bc0cc4 100644 --- a/_reviewers/chef-verify-automated-documentation.md +++ b/_reviewers/chef-verify-automated-documentation.md @@ -18,63 +18,3 @@ For example: - When using scripts like `publish-release-notes.sh` that push content to repositories, document the complete process including any subsequent manual steps (like triggering Netlify rebuilds) required for the documentation to become visible to users. Document both the automation commands and their limitations to ensure team members understand the complete documentation workflow. This prevents confusion and ensures accurate, complete documentation reaches the end users. - - -[ - { - "discussion_id": "1064755765", - "pr_number": 13484, - "pr_file": "docs/dev/how_to/releasing_chef_infra.md", - "created_at": "2023-01-09T15:18:02+00:00", - "commented_code": "If there are any new or updated resources, the docs site will need to be updated. This is a `partially` automated process. If you are making a single resource update or changing wording, it may just be easier to do it by hand.\n\n`publish-release-notes.sh` pushes to S3 and then Netlify site needs to be rebuilt, so the Docs Site may not immediately. Reach out to the Docs team to trigger an update.", - "repo_full_name": "chef/chef", - "discussion_comments": [ - { - "comment_id": "1064755765", - "repo_full_name": "chef/chef", - "pr_number": 13484, - "pr_file": "docs/dev/how_to/releasing_chef_infra.md", - "discussion_id": "1064755765", - "commented_code": "@@ -28,23 +28,26 @@ The importance of our release notes cannot be understated. As developers, we und\n \n If there are any new or updated resources, the docs site will need to be updated. This is a `partially` automated process. If you are making a single resource update or changing wording, it may just be easier to do it by hand.\n \n+`publish-release-notes.sh` pushes to S3 and then Netlify site needs to be rebuilt, so the Docs Site may not immediately. Reach out to the Docs team to trigger an update.", - "comment_created_at": "2023-01-09T15:18:02+00:00", - "comment_author": "PrajaktaPurohit", - "comment_body": "```suggestion\r\n`publish-release-notes.sh` pushes to S3 and then Netlify site needs to be rebuilt, so the Docs Site may not immediately reflect the Release notes related updates. The way to rebuild Netlify is manually or by merging a PR in chef-web-docs repository. Reach out to the Docs team to trigger an update.\r\n```", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1046346041", - "pr_number": 13428, - "pr_file": "docs/dev/how_to/releasing_chef_infra.md", - "created_at": "2022-12-12T20:13:33+00:00", - "commented_code": "#### Resource Documentation Automation\n\n1. Run `rake docs_site:resources` to generate content to a `docs_site` directory\n 1. WARNING: Any resource that inherits it's basic structure from a parent resource is likely to comingle settings from both when you run that command. For example, there are 17-20 resources that consume the basic package.rb resource file. As a consequence, ALL of the children will most likely have pulled in properties and actions that do not apply to them. You have to be careful here.", - "repo_full_name": "chef/chef", - "discussion_comments": [ - { - "comment_id": "1046346041", - "repo_full_name": "chef/chef", - "pr_number": 13428, - "pr_file": "docs/dev/how_to/releasing_chef_infra.md", - "discussion_id": "1046346041", - "commented_code": "@@ -29,10 +31,18 @@ If there are any new or updated resources, the docs site will need to be updated\n #### Resource Documentation Automation\n \n 1. Run `rake docs_site:resources` to generate content to a `docs_site` directory\n+ 1. WARNING: Any resource that inherits it's basic structure from a parent resource is likely to comingle settings from both when you run that command. For example, there are 17-20 resources that consume the basic package.rb resource file. As a consequence, ALL of the children will most likely have pulled in properties and actions that do not apply to them. You have to be careful here.", - "comment_created_at": "2022-12-12T20:13:33+00:00", - "comment_author": "tpowell-progress", - "comment_body": "should be `commingle`", - "pr_file_module": null - }, - { - "comment_id": "1046449480", - "repo_full_name": "chef/chef", - "pr_number": 13428, - "pr_file": "docs/dev/how_to/releasing_chef_infra.md", - "discussion_id": "1046346041", - "commented_code": "@@ -29,10 +31,18 @@ If there are any new or updated resources, the docs site will need to be updated\n #### Resource Documentation Automation\n \n 1. Run `rake docs_site:resources` to generate content to a `docs_site` directory\n+ 1. WARNING: Any resource that inherits it's basic structure from a parent resource is likely to comingle settings from both when you run that command. For example, there are 17-20 resources that consume the basic package.rb resource file. As a consequence, ALL of the children will most likely have pulled in properties and actions that do not apply to them. You have to be careful here.", - "comment_created_at": "2022-12-12T22:19:50+00:00", - "comment_author": "johnmccrae", - "comment_body": "corrected", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/elasticsearch-avoid-flaky-tests.json b/_reviewers/elasticsearch-avoid-flaky-tests.json new file mode 100644 index 0000000..833c482 --- /dev/null +++ b/_reviewers/elasticsearch-avoid-flaky-tests.json @@ -0,0 +1,70 @@ +[ + { + "discussion_id": "2145028439", + "pr_number": 129282, + "pr_file": "x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/45_semantic_text_match.yml", + "created_at": "2025-06-13T12:51:01+00:00", + "commented_code": "query: \"inference test\"\n\n - match: { hits.total.value: 0 }\n\n---\n\"Apply boost and query name\":\n - requires:\n cluster_features: \"semantic_text.query_rewrite.boost_and_query_name_fix\"\n reason: fix boosting and query name for semantic text match queries.\n\n - skip:\n features: [ \"headers\", \"close_to\" ]\n\n - do:\n index:\n index: test-sparse-index\n id: doc_1\n body:\n inference_field: [ \"inference test\", \"another inference test\" ]\n non_inference_field: \"non inference test\"\n refresh: true\n\n - do:\n headers:\n # Force JSON content type so that we use a parser that interprets the floating-point score as a double\n Content-Type: application/json\n search:\n index: test-sparse-index\n body:\n query:\n match:\n inference_field:\n query: \"inference test\"\n boost: 5.0\n _name: i-like-naming-my-queries\n\n - match: { hits.total.value: 1 }\n - match: { hits.hits.0._id: \"doc_1\" }\n - close_to: { hits.hits.0._score: { value: 1.8918664E18, error: 1e15 } }", + "repo_full_name": "elastic/elasticsearch", + "discussion_comments": [ + { + "comment_id": "2145028439", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 129282, + "pr_file": "x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/45_semantic_text_match.yml", + "discussion_id": "2145028439", + "commented_code": "@@ -277,3 +277,41 @@ setup:\n query: \"inference test\"\n \n - match: { hits.total.value: 0 }\n+\n+---\n+\"Apply boost and query name\":\n+ - requires:\n+ cluster_features: \"semantic_text.query_rewrite.boost_and_query_name_fix\"\n+ reason: fix boosting and query name for semantic text match queries.\n+\n+ - skip:\n+ features: [ \"headers\", \"close_to\" ]\n+\n+ - do:\n+ index:\n+ index: test-sparse-index\n+ id: doc_1\n+ body:\n+ inference_field: [ \"inference test\", \"another inference test\" ]\n+ non_inference_field: \"non inference test\"\n+ refresh: true\n+\n+ - do:\n+ headers:\n+ # Force JSON content type so that we use a parser that interprets the floating-point score as a double\n+ Content-Type: application/json\n+ search:\n+ index: test-sparse-index\n+ body:\n+ query:\n+ match:\n+ inference_field:\n+ query: \"inference test\"\n+ boost: 5.0\n+ _name: i-like-naming-my-queries\n+\n+ - match: { hits.total.value: 1 }\n+ - match: { hits.hits.0._id: \"doc_1\" }\n+ - close_to: { hits.hits.0._score: { value: 1.8918664E18, error: 1e15 } }", + "comment_created_at": "2025-06-13T12:51:01+00:00", + "comment_author": "kderusso", + "comment_body": "Can you please make sure this won't run into flakey test issues due to different shard counts impacting score? Sometimes this can be an issue with tests especially in serverless.\r\n\r\nAlso for completeness it might be good to check the score before putting the boost in to validate they are different. ", + "pr_file_module": null + }, + { + "comment_id": "2145394209", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 129282, + "pr_file": "x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/45_semantic_text_match.yml", + "discussion_id": "2145028439", + "commented_code": "@@ -277,3 +277,41 @@ setup:\n query: \"inference test\"\n \n - match: { hits.total.value: 0 }\n+\n+---\n+\"Apply boost and query name\":\n+ - requires:\n+ cluster_features: \"semantic_text.query_rewrite.boost_and_query_name_fix\"\n+ reason: fix boosting and query name for semantic text match queries.\n+\n+ - skip:\n+ features: [ \"headers\", \"close_to\" ]\n+\n+ - do:\n+ index:\n+ index: test-sparse-index\n+ id: doc_1\n+ body:\n+ inference_field: [ \"inference test\", \"another inference test\" ]\n+ non_inference_field: \"non inference test\"\n+ refresh: true\n+\n+ - do:\n+ headers:\n+ # Force JSON content type so that we use a parser that interprets the floating-point score as a double\n+ Content-Type: application/json\n+ search:\n+ index: test-sparse-index\n+ body:\n+ query:\n+ match:\n+ inference_field:\n+ query: \"inference test\"\n+ boost: 5.0\n+ _name: i-like-naming-my-queries\n+\n+ - match: { hits.total.value: 1 }\n+ - match: { hits.hits.0._id: \"doc_1\" }\n+ - close_to: { hits.hits.0._score: { value: 1.8918664E18, error: 1e15 } }", + "comment_created_at": "2025-06-13T15:47:10+00:00", + "comment_author": "Samiul-TheSoccerFan", + "comment_body": "Good point, thank you. I did validate the scores locally, but you are right that this is not currently reflected in the YAML tests. I will update the tests to include both boosted and unboosted queries to make the difference explicit.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1678832153", + "pr_number": 110857, + "pr_file": "rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/cat.shards/10_basic.yml", + "created_at": "2024-07-16T06:34:06+00:00", + "commented_code": "- match:\n $body: /foo \\s+ 1\\n/\n---\n\"Test cat shards with change in idle state\":\n - requires:\n cluster_features: [ \"gte_v8.16.0\" ]\n reason: search idle state was added in 8.16.0\n\n - do:\n indices.create:\n index: foo\n body:\n settings:\n number_of_shards: \"1\"\n number_of_replicas: \"0\"\n search.idle.after: \"1s\"\n\n - do:\n index:\n index: foo\n body: { test: foo }\n\n - do:\n cat.shards:\n index: foo\n v: false\n h: refresh.is_search_idle\n\n - match:\n $body: |\n /^(false \\n?)$/\n\n# very hacky way of sleeping\n - do:\n catch: request_timeout\n cluster.health:\n wait_for_nodes: 10\n timeout: \"2s\"", + "repo_full_name": "elastic/elasticsearch", + "discussion_comments": [ + { + "comment_id": "1678832153", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 110857, + "pr_file": "rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/cat.shards/10_basic.yml", + "discussion_id": "1678832153", + "commented_code": "@@ -267,3 +267,49 @@\n \n - match:\n $body: /foo \\s+ 1\\n/\n+---\n+\"Test cat shards with change in idle state\":\n+ - requires:\n+ cluster_features: [ \"gte_v8.16.0\" ]\n+ reason: search idle state was added in 8.16.0\n+\n+ - do:\n+ indices.create:\n+ index: foo\n+ body:\n+ settings:\n+ number_of_shards: \"1\"\n+ number_of_replicas: \"0\"\n+ search.idle.after: \"1s\"\n+\n+ - do:\n+ index:\n+ index: foo\n+ body: { test: foo }\n+\n+ - do:\n+ cat.shards:\n+ index: foo\n+ v: false\n+ h: refresh.is_search_idle\n+\n+ - match:\n+ $body: |\n+ /^(false \\n?)$/\n+\n+# very hacky way of sleeping\n+ - do:\n+ catch: request_timeout\n+ cluster.health:\n+ wait_for_nodes: 10\n+ timeout: \"2s\"", + "comment_created_at": "2024-07-16T06:34:06+00:00", + "comment_author": "DaveCTurner", + "comment_body": "Yeah I don't think we need this hack (and I worry that it'll be quite a flaky test in practice). In a YAML test we really just need to verify that the column exists at all.", + "pr_file_module": null + }, + { + "comment_id": "1679017815", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 110857, + "pr_file": "rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/cat.shards/10_basic.yml", + "discussion_id": "1678832153", + "commented_code": "@@ -267,3 +267,49 @@\n \n - match:\n $body: /foo \\s+ 1\\n/\n+---\n+\"Test cat shards with change in idle state\":\n+ - requires:\n+ cluster_features: [ \"gte_v8.16.0\" ]\n+ reason: search idle state was added in 8.16.0\n+\n+ - do:\n+ indices.create:\n+ index: foo\n+ body:\n+ settings:\n+ number_of_shards: \"1\"\n+ number_of_replicas: \"0\"\n+ search.idle.after: \"1s\"\n+\n+ - do:\n+ index:\n+ index: foo\n+ body: { test: foo }\n+\n+ - do:\n+ cat.shards:\n+ index: foo\n+ v: false\n+ h: refresh.is_search_idle\n+\n+ - match:\n+ $body: |\n+ /^(false \\n?)$/\n+\n+# very hacky way of sleeping\n+ - do:\n+ catch: request_timeout\n+ cluster.health:\n+ wait_for_nodes: 10\n+ timeout: \"2s\"", + "comment_created_at": "2024-07-16T08:55:34+00:00", + "comment_author": "limotova", + "comment_body": "👍\r\nThe first test in this file checks for that I believe", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/elasticsearch-avoid-flaky-tests.md b/_reviewers/elasticsearch-avoid-flaky-tests.md index 1030d20..1440b0d 100644 --- a/_reviewers/elasticsearch-avoid-flaky-tests.md +++ b/_reviewers/elasticsearch-avoid-flaky-tests.md @@ -58,75 +58,3 @@ Avoid "hacky" approaches that depend on timing. Instead, focus on testing that f - match: $body: /^(false|true)\s*\n?$/ ``` - - -[ - { - "discussion_id": "2145028439", - "pr_number": 129282, - "pr_file": "x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/45_semantic_text_match.yml", - "created_at": "2025-06-13T12:51:01+00:00", - "commented_code": "query: \"inference test\"\n\n - match: { hits.total.value: 0 }\n\n---\n\"Apply boost and query name\":\n - requires:\n cluster_features: \"semantic_text.query_rewrite.boost_and_query_name_fix\"\n reason: fix boosting and query name for semantic text match queries.\n\n - skip:\n features: [ \"headers\", \"close_to\" ]\n\n - do:\n index:\n index: test-sparse-index\n id: doc_1\n body:\n inference_field: [ \"inference test\", \"another inference test\" ]\n non_inference_field: \"non inference test\"\n refresh: true\n\n - do:\n headers:\n # Force JSON content type so that we use a parser that interprets the floating-point score as a double\n Content-Type: application/json\n search:\n index: test-sparse-index\n body:\n query:\n match:\n inference_field:\n query: \"inference test\"\n boost: 5.0\n _name: i-like-naming-my-queries\n\n - match: { hits.total.value: 1 }\n - match: { hits.hits.0._id: \"doc_1\" }\n - close_to: { hits.hits.0._score: { value: 1.8918664E18, error: 1e15 } }", - "repo_full_name": "elastic/elasticsearch", - "discussion_comments": [ - { - "comment_id": "2145028439", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 129282, - "pr_file": "x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/45_semantic_text_match.yml", - "discussion_id": "2145028439", - "commented_code": "@@ -277,3 +277,41 @@ setup:\n query: \"inference test\"\n \n - match: { hits.total.value: 0 }\n+\n+---\n+\"Apply boost and query name\":\n+ - requires:\n+ cluster_features: \"semantic_text.query_rewrite.boost_and_query_name_fix\"\n+ reason: fix boosting and query name for semantic text match queries.\n+\n+ - skip:\n+ features: [ \"headers\", \"close_to\" ]\n+\n+ - do:\n+ index:\n+ index: test-sparse-index\n+ id: doc_1\n+ body:\n+ inference_field: [ \"inference test\", \"another inference test\" ]\n+ non_inference_field: \"non inference test\"\n+ refresh: true\n+\n+ - do:\n+ headers:\n+ # Force JSON content type so that we use a parser that interprets the floating-point score as a double\n+ Content-Type: application/json\n+ search:\n+ index: test-sparse-index\n+ body:\n+ query:\n+ match:\n+ inference_field:\n+ query: \"inference test\"\n+ boost: 5.0\n+ _name: i-like-naming-my-queries\n+\n+ - match: { hits.total.value: 1 }\n+ - match: { hits.hits.0._id: \"doc_1\" }\n+ - close_to: { hits.hits.0._score: { value: 1.8918664E18, error: 1e15 } }", - "comment_created_at": "2025-06-13T12:51:01+00:00", - "comment_author": "kderusso", - "comment_body": "Can you please make sure this won't run into flakey test issues due to different shard counts impacting score? Sometimes this can be an issue with tests especially in serverless.\r\n\r\nAlso for completeness it might be good to check the score before putting the boost in to validate they are different. ", - "pr_file_module": null - }, - { - "comment_id": "2145394209", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 129282, - "pr_file": "x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/45_semantic_text_match.yml", - "discussion_id": "2145028439", - "commented_code": "@@ -277,3 +277,41 @@ setup:\n query: \"inference test\"\n \n - match: { hits.total.value: 0 }\n+\n+---\n+\"Apply boost and query name\":\n+ - requires:\n+ cluster_features: \"semantic_text.query_rewrite.boost_and_query_name_fix\"\n+ reason: fix boosting and query name for semantic text match queries.\n+\n+ - skip:\n+ features: [ \"headers\", \"close_to\" ]\n+\n+ - do:\n+ index:\n+ index: test-sparse-index\n+ id: doc_1\n+ body:\n+ inference_field: [ \"inference test\", \"another inference test\" ]\n+ non_inference_field: \"non inference test\"\n+ refresh: true\n+\n+ - do:\n+ headers:\n+ # Force JSON content type so that we use a parser that interprets the floating-point score as a double\n+ Content-Type: application/json\n+ search:\n+ index: test-sparse-index\n+ body:\n+ query:\n+ match:\n+ inference_field:\n+ query: \"inference test\"\n+ boost: 5.0\n+ _name: i-like-naming-my-queries\n+\n+ - match: { hits.total.value: 1 }\n+ - match: { hits.hits.0._id: \"doc_1\" }\n+ - close_to: { hits.hits.0._score: { value: 1.8918664E18, error: 1e15 } }", - "comment_created_at": "2025-06-13T15:47:10+00:00", - "comment_author": "Samiul-TheSoccerFan", - "comment_body": "Good point, thank you. I did validate the scores locally, but you are right that this is not currently reflected in the YAML tests. I will update the tests to include both boosted and unboosted queries to make the difference explicit.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1678832153", - "pr_number": 110857, - "pr_file": "rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/cat.shards/10_basic.yml", - "created_at": "2024-07-16T06:34:06+00:00", - "commented_code": "- match:\n $body: /foo \\s+ 1\\n/\n---\n\"Test cat shards with change in idle state\":\n - requires:\n cluster_features: [ \"gte_v8.16.0\" ]\n reason: search idle state was added in 8.16.0\n\n - do:\n indices.create:\n index: foo\n body:\n settings:\n number_of_shards: \"1\"\n number_of_replicas: \"0\"\n search.idle.after: \"1s\"\n\n - do:\n index:\n index: foo\n body: { test: foo }\n\n - do:\n cat.shards:\n index: foo\n v: false\n h: refresh.is_search_idle\n\n - match:\n $body: |\n /^(false \\n?)$/\n\n# very hacky way of sleeping\n - do:\n catch: request_timeout\n cluster.health:\n wait_for_nodes: 10\n timeout: \"2s\"", - "repo_full_name": "elastic/elasticsearch", - "discussion_comments": [ - { - "comment_id": "1678832153", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 110857, - "pr_file": "rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/cat.shards/10_basic.yml", - "discussion_id": "1678832153", - "commented_code": "@@ -267,3 +267,49 @@\n \n - match:\n $body: /foo \\s+ 1\\n/\n+---\n+\"Test cat shards with change in idle state\":\n+ - requires:\n+ cluster_features: [ \"gte_v8.16.0\" ]\n+ reason: search idle state was added in 8.16.0\n+\n+ - do:\n+ indices.create:\n+ index: foo\n+ body:\n+ settings:\n+ number_of_shards: \"1\"\n+ number_of_replicas: \"0\"\n+ search.idle.after: \"1s\"\n+\n+ - do:\n+ index:\n+ index: foo\n+ body: { test: foo }\n+\n+ - do:\n+ cat.shards:\n+ index: foo\n+ v: false\n+ h: refresh.is_search_idle\n+\n+ - match:\n+ $body: |\n+ /^(false \\n?)$/\n+\n+# very hacky way of sleeping\n+ - do:\n+ catch: request_timeout\n+ cluster.health:\n+ wait_for_nodes: 10\n+ timeout: \"2s\"", - "comment_created_at": "2024-07-16T06:34:06+00:00", - "comment_author": "DaveCTurner", - "comment_body": "Yeah I don't think we need this hack (and I worry that it'll be quite a flaky test in practice). In a YAML test we really just need to verify that the column exists at all.", - "pr_file_module": null - }, - { - "comment_id": "1679017815", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 110857, - "pr_file": "rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/cat.shards/10_basic.yml", - "discussion_id": "1678832153", - "commented_code": "@@ -267,3 +267,49 @@\n \n - match:\n $body: /foo \\s+ 1\\n/\n+---\n+\"Test cat shards with change in idle state\":\n+ - requires:\n+ cluster_features: [ \"gte_v8.16.0\" ]\n+ reason: search idle state was added in 8.16.0\n+\n+ - do:\n+ indices.create:\n+ index: foo\n+ body:\n+ settings:\n+ number_of_shards: \"1\"\n+ number_of_replicas: \"0\"\n+ search.idle.after: \"1s\"\n+\n+ - do:\n+ index:\n+ index: foo\n+ body: { test: foo }\n+\n+ - do:\n+ cat.shards:\n+ index: foo\n+ v: false\n+ h: refresh.is_search_idle\n+\n+ - match:\n+ $body: |\n+ /^(false \\n?)$/\n+\n+# very hacky way of sleeping\n+ - do:\n+ catch: request_timeout\n+ cluster.health:\n+ wait_for_nodes: 10\n+ timeout: \"2s\"", - "comment_created_at": "2024-07-16T08:55:34+00:00", - "comment_author": "limotova", - "comment_body": "\ud83d\udc4d\r\nThe first test in this file checks for that I believe", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/elasticsearch-clarity-over-uncertainty.json b/_reviewers/elasticsearch-clarity-over-uncertainty.json new file mode 100644 index 0000000..c2a3539 --- /dev/null +++ b/_reviewers/elasticsearch-clarity-over-uncertainty.json @@ -0,0 +1,174 @@ +[ + { + "discussion_id": "2175635549", + "pr_number": 130280, + "pr_file": "docs/reference/elasticsearch/index-lifecycle-actions/ilm-forcemerge.md", + "created_at": "2025-06-30T18:14:16+00:00", + "commented_code": "Phases allowed: hot, warm.\n\n[Force merges](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-forcemerge) the index into the specified maximum number of [segments](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-segments).\n\n::::{note}\nShards that are relocating during a `forcemerge` will not be merged.\n::::\n\n[Force merges](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-forcemerge) the index into the specified maximum number of [segments](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-segments). This operation is best effort. For example, shards that are relocating during a `forcemerge` will not be merged.\n\nTo use the `forcemerge` action in the `hot` phase, the `rollover` action **must** be present. If no rollover action is configured, {{ilm-init}} will reject the policy.\n\n:::::{admonition} Performance considerations\n:name: ilm-forcemerge-performance\n\nForce merge is a resource-intensive operation. If too many force merges are triggered at once, it can negatively impact your cluster. This can happen when you apply an {{ilm-init}} policy that includes a force merge action to existing indices. If they meet the `min_age` criteria, they can immediately proceed through multiple phases. You can prevent this by increasing the `min_age` or setting `index.lifecycle.origination_date` to change how the index age is calculated.\n\nIf you experience a force merge task queue backlog, you might need to increase the size of the force merge threadpool so indices can be force merged in parallel. To do this, configure the `thread_pool.force_merge.size` [cluster setting](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-cluster-get-settings).\n\n::::{important}\nThis can have cascading performance impacts. Monitor cluster performance and increment the size of the thread pool slowly to reduce the backlog.\n::::\nForce merge is a resource-intensive operation. If too many force merges are triggered at once, it can negatively impact your cluster. For example, this can happen when you \n* modify an existing {{ilm-init}} policy's phase `min_age` such that indices trigger into force-merging phase faster.\n* apply an {{ilm-init}} policy that includes a force merge action to existing indices. If they meet the `min_age` criteria, they can immediately proceed through multiple actions. You can prevent this by increasing the `min_age` or setting `index.lifecycle.origination_date` to change how the index age is calculated.\n* run the [{{ilm-init}} Move Step API](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-ilm-move-to-step) against multiple indices.\n\nIf you experience a force merge task queue backlog, you might need to increase the size of the force merge threadpool so indices can be force merged in parallel. To do this, configure the `thread_pool.force_merge.size` [cluster setting](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-cluster-get-settings). This is considered an expert setting override as this can have cascading performance impacts. Monitor cluster performance and increment the size of the thread pool slowly to reduce the backlog.\n\nForce merging will be performed by the nodes within the current phase of the index. A forcemerge in the `hot` phase will use hot nodes with potentially faster nodes, while impacting ingestion more. A forcemerge in the `warm` phase will use warm nodes and potentially take longer to perform, but without impacting ingestion in the `hot` tier.\nForce merging will be performed by the node hosting the shard. The [node's role](https://www.elastic.co/docs/deploy-manage/distributed-architecture/clusters-nodes-shards/node-roles#data-node-role) frequently is equal to the [data tier](https://www.elastic.co/docs/manage-data/lifecycle/data-tiers) of the {{ilm-init}}'s phase of the index, but this is not guaranteed. For example, a forcemerge in the", + "repo_full_name": "elastic/elasticsearch", + "discussion_comments": [ + { + "comment_id": "2175635549", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 130280, + "pr_file": "docs/reference/elasticsearch/index-lifecycle-actions/ilm-forcemerge.md", + "discussion_id": "2175635549", + "commented_code": "@@ -7,29 +7,26 @@ mapped_pages:\n \n Phases allowed: hot, warm.\n \n-[Force merges](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-forcemerge) the index into the specified maximum number of [segments](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-segments).\n-\n-::::{note}\n-Shards that are relocating during a `forcemerge` will not be merged.\n-::::\n-\n+[Force merges](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-forcemerge) the index into the specified maximum number of [segments](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-segments). This operation is best effort. For example, shards that are relocating during a `forcemerge` will not be merged.\n \n To use the `forcemerge` action in the `hot` phase, the `rollover` action **must** be present. If no rollover action is configured, {{ilm-init}} will reject the policy.\n \n :::::{admonition} Performance considerations\n :name: ilm-forcemerge-performance\n \n-Force merge is a resource-intensive operation. If too many force merges are triggered at once, it can negatively impact your cluster. This can happen when you apply an {{ilm-init}} policy that includes a force merge action to existing indices. If they meet the `min_age` criteria, they can immediately proceed through multiple phases. You can prevent this by increasing the `min_age` or setting `index.lifecycle.origination_date` to change how the index age is calculated.\n-\n-If you experience a force merge task queue backlog, you might need to increase the size of the force merge threadpool so indices can be force merged in parallel. To do this, configure the `thread_pool.force_merge.size` [cluster setting](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-cluster-get-settings).\n-\n-::::{important}\n-This can have cascading performance impacts. Monitor cluster performance and increment the size of the thread pool slowly to reduce the backlog.\n-::::\n+Force merge is a resource-intensive operation. If too many force merges are triggered at once, it can negatively impact your cluster. For example, this can happen when you \n+* modify an existing {{ilm-init}} policy's phase `min_age` such that indices trigger into force-merging phase faster.\n+* apply an {{ilm-init}} policy that includes a force merge action to existing indices. If they meet the `min_age` criteria, they can immediately proceed through multiple actions. You can prevent this by increasing the `min_age` or setting `index.lifecycle.origination_date` to change how the index age is calculated.\n+* run the [{{ilm-init}} Move Step API](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-ilm-move-to-step) against multiple indices.\n \n+If you experience a force merge task queue backlog, you might need to increase the size of the force merge threadpool so indices can be force merged in parallel. To do this, configure the `thread_pool.force_merge.size` [cluster setting](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-cluster-get-settings). This is considered an expert setting override as this can have cascading performance impacts. Monitor cluster performance and increment the size of the thread pool slowly to reduce the backlog.\n \n-Force merging will be performed by the nodes within the current phase of the index. A forcemerge in the `hot` phase will use hot nodes with potentially faster nodes, while impacting ingestion more. A forcemerge in the `warm` phase will use warm nodes and potentially take longer to perform, but without impacting ingestion in the `hot` tier.\n+Force merging will be performed by the node hosting the shard. The [node's role](https://www.elastic.co/docs/deploy-manage/distributed-architecture/clusters-nodes-shards/node-roles#data-node-role) frequently is equal to the [data tier](https://www.elastic.co/docs/manage-data/lifecycle/data-tiers) of the {{ilm-init}}'s phase of the index, but this is not guaranteed. For example, a forcemerge in the ", + "comment_created_at": "2025-06-30T18:14:16+00:00", + "comment_author": "kilfoyle", + "comment_body": "`but this is not guaranteed`\r\n\r\nShould this maybe be something like \"but you may choose to change this behavior.\"?\r\n\r\n\"Not guaranteed\" sounds to me like uncertainty in how the product behaves, rather than something a user might choose to configure differently.\r\n\r\n", + "pr_file_module": null + }, + { + "comment_id": "2175877399", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 130280, + "pr_file": "docs/reference/elasticsearch/index-lifecycle-actions/ilm-forcemerge.md", + "discussion_id": "2175635549", + "commented_code": "@@ -7,29 +7,26 @@ mapped_pages:\n \n Phases allowed: hot, warm.\n \n-[Force merges](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-forcemerge) the index into the specified maximum number of [segments](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-segments).\n-\n-::::{note}\n-Shards that are relocating during a `forcemerge` will not be merged.\n-::::\n-\n+[Force merges](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-forcemerge) the index into the specified maximum number of [segments](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-segments). This operation is best effort. For example, shards that are relocating during a `forcemerge` will not be merged.\n \n To use the `forcemerge` action in the `hot` phase, the `rollover` action **must** be present. If no rollover action is configured, {{ilm-init}} will reject the policy.\n \n :::::{admonition} Performance considerations\n :name: ilm-forcemerge-performance\n \n-Force merge is a resource-intensive operation. If too many force merges are triggered at once, it can negatively impact your cluster. This can happen when you apply an {{ilm-init}} policy that includes a force merge action to existing indices. If they meet the `min_age` criteria, they can immediately proceed through multiple phases. You can prevent this by increasing the `min_age` or setting `index.lifecycle.origination_date` to change how the index age is calculated.\n-\n-If you experience a force merge task queue backlog, you might need to increase the size of the force merge threadpool so indices can be force merged in parallel. To do this, configure the `thread_pool.force_merge.size` [cluster setting](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-cluster-get-settings).\n-\n-::::{important}\n-This can have cascading performance impacts. Monitor cluster performance and increment the size of the thread pool slowly to reduce the backlog.\n-::::\n+Force merge is a resource-intensive operation. If too many force merges are triggered at once, it can negatively impact your cluster. For example, this can happen when you \n+* modify an existing {{ilm-init}} policy's phase `min_age` such that indices trigger into force-merging phase faster.\n+* apply an {{ilm-init}} policy that includes a force merge action to existing indices. If they meet the `min_age` criteria, they can immediately proceed through multiple actions. You can prevent this by increasing the `min_age` or setting `index.lifecycle.origination_date` to change how the index age is calculated.\n+* run the [{{ilm-init}} Move Step API](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-ilm-move-to-step) against multiple indices.\n \n+If you experience a force merge task queue backlog, you might need to increase the size of the force merge threadpool so indices can be force merged in parallel. To do this, configure the `thread_pool.force_merge.size` [cluster setting](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-cluster-get-settings). This is considered an expert setting override as this can have cascading performance impacts. Monitor cluster performance and increment the size of the thread pool slowly to reduce the backlog.\n \n-Force merging will be performed by the nodes within the current phase of the index. A forcemerge in the `hot` phase will use hot nodes with potentially faster nodes, while impacting ingestion more. A forcemerge in the `warm` phase will use warm nodes and potentially take longer to perform, but without impacting ingestion in the `hot` tier.\n+Force merging will be performed by the node hosting the shard. The [node's role](https://www.elastic.co/docs/deploy-manage/distributed-architecture/clusters-nodes-shards/node-roles#data-node-role) frequently is equal to the [data tier](https://www.elastic.co/docs/manage-data/lifecycle/data-tiers) of the {{ilm-init}}'s phase of the index, but this is not guaranteed. For example, a forcemerge in the ", + "comment_created_at": "2025-06-30T20:43:49+00:00", + "comment_author": "stefnestor", + "comment_body": "It's poor phrasing my side sorry. I was trying to leave the door open because (a) users can disable ILM Migrate (and can use ILM Allocate to set allocation customizations on top) & (B) Searchable Snapshots merge on the phase-before's data tier; both'd nullify this earlier belief that force merge happens on phase's data tiers. But \"is not guaranteed\" is poor phrasing my part although nothing better comes to mind. Do you have any suggestions by chance?", + "pr_file_module": null + }, + { + "comment_id": "2175984495", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 130280, + "pr_file": "docs/reference/elasticsearch/index-lifecycle-actions/ilm-forcemerge.md", + "discussion_id": "2175635549", + "commented_code": "@@ -7,29 +7,26 @@ mapped_pages:\n \n Phases allowed: hot, warm.\n \n-[Force merges](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-forcemerge) the index into the specified maximum number of [segments](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-segments).\n-\n-::::{note}\n-Shards that are relocating during a `forcemerge` will not be merged.\n-::::\n-\n+[Force merges](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-forcemerge) the index into the specified maximum number of [segments](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-segments). This operation is best effort. For example, shards that are relocating during a `forcemerge` will not be merged.\n \n To use the `forcemerge` action in the `hot` phase, the `rollover` action **must** be present. If no rollover action is configured, {{ilm-init}} will reject the policy.\n \n :::::{admonition} Performance considerations\n :name: ilm-forcemerge-performance\n \n-Force merge is a resource-intensive operation. If too many force merges are triggered at once, it can negatively impact your cluster. This can happen when you apply an {{ilm-init}} policy that includes a force merge action to existing indices. If they meet the `min_age` criteria, they can immediately proceed through multiple phases. You can prevent this by increasing the `min_age` or setting `index.lifecycle.origination_date` to change how the index age is calculated.\n-\n-If you experience a force merge task queue backlog, you might need to increase the size of the force merge threadpool so indices can be force merged in parallel. To do this, configure the `thread_pool.force_merge.size` [cluster setting](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-cluster-get-settings).\n-\n-::::{important}\n-This can have cascading performance impacts. Monitor cluster performance and increment the size of the thread pool slowly to reduce the backlog.\n-::::\n+Force merge is a resource-intensive operation. If too many force merges are triggered at once, it can negatively impact your cluster. For example, this can happen when you \n+* modify an existing {{ilm-init}} policy's phase `min_age` such that indices trigger into force-merging phase faster.\n+* apply an {{ilm-init}} policy that includes a force merge action to existing indices. If they meet the `min_age` criteria, they can immediately proceed through multiple actions. You can prevent this by increasing the `min_age` or setting `index.lifecycle.origination_date` to change how the index age is calculated.\n+* run the [{{ilm-init}} Move Step API](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-ilm-move-to-step) against multiple indices.\n \n+If you experience a force merge task queue backlog, you might need to increase the size of the force merge threadpool so indices can be force merged in parallel. To do this, configure the `thread_pool.force_merge.size` [cluster setting](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-cluster-get-settings). This is considered an expert setting override as this can have cascading performance impacts. Monitor cluster performance and increment the size of the thread pool slowly to reduce the backlog.\n \n-Force merging will be performed by the nodes within the current phase of the index. A forcemerge in the `hot` phase will use hot nodes with potentially faster nodes, while impacting ingestion more. A forcemerge in the `warm` phase will use warm nodes and potentially take longer to perform, but without impacting ingestion in the `hot` tier.\n+Force merging will be performed by the node hosting the shard. The [node's role](https://www.elastic.co/docs/deploy-manage/distributed-architecture/clusters-nodes-shards/node-roles#data-node-role) frequently is equal to the [data tier](https://www.elastic.co/docs/manage-data/lifecycle/data-tiers) of the {{ilm-init}}'s phase of the index, but this is not guaranteed. For example, a forcemerge in the ", + "comment_created_at": "2025-06-30T21:19:33+00:00", + "comment_author": "kilfoyle", + "comment_body": "Yup! I'd suggest `but this is not guaranteed.` → `but you may choose to adjust this behavior.` \r\n\r\nI've added a [suggestion](https://github.com/elastic/elasticsearch/pull/130280/files#r2175981766) for that change.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2177917563", + "pr_number": 130280, + "pr_file": "docs/reference/elasticsearch/index-lifecycle-actions/ilm-forcemerge.md", + "created_at": "2025-07-01T15:34:28+00:00", + "commented_code": "Phases allowed: hot, warm.\n\n[Force merges](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-forcemerge) the index into the specified maximum number of [segments](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-segments).\n\n::::{note}\nShards that are relocating during a `forcemerge` will not be merged.\n::::\n\n[Force merges](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-forcemerge) the index into the specified maximum number of [segments](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-segments). This operation is performed on a best effort basis. For example, shards that are relocating during a `forcemerge` will not be merged.\n\nTo use the `forcemerge` action in the `hot` phase, the `rollover` action **must** be present. If no rollover action is configured, {{ilm-init}} will reject the policy.\n\n:::::{admonition} Performance considerations\n:name: ilm-forcemerge-performance\n\nForce merge is a resource-intensive operation. If too many force merges are triggered at once, it can negatively impact your cluster. This can happen when you apply an {{ilm-init}} policy that includes a force merge action to existing indices. If they meet the `min_age` criteria, they can immediately proceed through multiple phases. You can prevent this by increasing the `min_age` or setting `index.lifecycle.origination_date` to change how the index age is calculated.\nForce merge is a resource-intensive operation. If too many force merges are triggered at once, it can negatively impact your cluster. For example, this can happen when you \n* modify an existing {{ilm-init}} policy's phase `min_age` such that indices trigger into force-merging phase faster.", + "repo_full_name": "elastic/elasticsearch", + "discussion_comments": [ + { + "comment_id": "2177917563", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 130280, + "pr_file": "docs/reference/elasticsearch/index-lifecycle-actions/ilm-forcemerge.md", + "discussion_id": "2177917563", + "commented_code": "@@ -7,29 +7,30 @@ mapped_pages:\n \n Phases allowed: hot, warm.\n \n-[Force merges](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-forcemerge) the index into the specified maximum number of [segments](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-segments).\n-\n-::::{note}\n-Shards that are relocating during a `forcemerge` will not be merged.\n-::::\n-\n+[Force merges](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-forcemerge) the index into the specified maximum number of [segments](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-segments). This operation is performed on a best effort basis. For example, shards that are relocating during a `forcemerge` will not be merged.\n \n To use the `forcemerge` action in the `hot` phase, the `rollover` action **must** be present. If no rollover action is configured, {{ilm-init}} will reject the policy.\n \n :::::{admonition} Performance considerations\n :name: ilm-forcemerge-performance\n \n-Force merge is a resource-intensive operation. If too many force merges are triggered at once, it can negatively impact your cluster. This can happen when you apply an {{ilm-init}} policy that includes a force merge action to existing indices. If they meet the `min_age` criteria, they can immediately proceed through multiple phases. You can prevent this by increasing the `min_age` or setting `index.lifecycle.origination_date` to change how the index age is calculated.\n+Force merge is a resource-intensive operation. If too many force merges are triggered at once, it can negatively impact your cluster. For example, this can happen when you \n+* modify an existing {{ilm-init}} policy's phase `min_age` such that indices trigger into force-merging phase faster.", + "comment_created_at": "2025-07-01T15:34:28+00:00", + "comment_author": "nielsbauman", + "comment_body": "Small grammar nit: I liked David's suggestion (but I agree with Stef about not adding a link)\r\n```suggestion\r\n* modify an existing {{ilm-init}} policy's phase `min_age`, causing indices to trigger the force merge at a faster rate.\r\n```", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2177954296", + "pr_number": 130280, + "pr_file": "docs/reference/elasticsearch/index-lifecycle-actions/ilm-forcemerge.md", + "created_at": "2025-07-01T15:52:05+00:00", + "commented_code": "Phases allowed: hot, warm.\n\n[Force merges](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-forcemerge) the index into the specified maximum number of [segments](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-segments).\n\n::::{note}\nShards that are relocating during a `forcemerge` will not be merged.\n::::\n\n[Force merges](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-forcemerge) the index into the specified maximum number of [segments](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-segments). This operation is performed on a best effort basis. For example, shards that are relocating during a `forcemerge` will not be merged.\n\nTo use the `forcemerge` action in the `hot` phase, the `rollover` action **must** be present. If no rollover action is configured, {{ilm-init}} will reject the policy.\n\n:::::{admonition} Performance considerations\n:name: ilm-forcemerge-performance\n\nForce merge is a resource-intensive operation. If too many force merges are triggered at once, it can negatively impact your cluster. This can happen when you apply an {{ilm-init}} policy that includes a force merge action to existing indices. If they meet the `min_age` criteria, they can immediately proceed through multiple phases. You can prevent this by increasing the `min_age` or setting `index.lifecycle.origination_date` to change how the index age is calculated.\nForce merge is a resource-intensive operation. If too many force merges are triggered at once, it can negatively impact your cluster. For example, this can happen when you \n* modify an existing {{ilm-init}} policy's phase `min_age` such that indices trigger into force-merging phase faster.\n* apply an {{ilm-init}} policy that includes a force merge action to existing indices. If the indices meet the `min_age` criteria, they can immediately proceed through multiple actions. You can prevent this by increasing the `min_age` or setting `index.lifecycle.origination_date` to change how the index age is calculated.\n* run the [{{ilm-init}} Move Step API](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-ilm-move-to-step) against multiple indices.\n\nIf you experience a force merge task queue backlog, you might need to increase the size of the force merge threadpool so indices can be force merged in parallel. To do this, configure the `thread_pool.force_merge.size` [cluster setting](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-cluster-get-settings).\nIf you experience a force merge task queue backlog, you might need to increase the size of the force merge threadpool so indices can be force merged in parallel. To do this, configure the `thread_pool.force_merge.size` [cluster setting](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-cluster-get-settings). \n\n::::{important}\nThis can have cascading performance impacts. Monitor cluster performance and increment the size of the thread pool slowly to reduce the backlog.\nNote that `thread_pool.force_merge.size` is an advanced setting. Adjusting it can cause cascading performance impacts. Monitor cluster performance and increment the size of the thread pool slowly to reduce the backlog.\n::::\n\nForce merging will be performed by the node hosting the shard. The [node's role](docs-content://deploy-manage/distributed-architecture/clusters-nodes-shards/node-roles.md#set-node-roles) frequently matches the [data tier](docs-content://manage-data/lifecycle/data-tiers.md) of the {{ilm-init}}'s phase of the index, you may choose to adjust this behavior. For example:", + "repo_full_name": "elastic/elasticsearch", + "discussion_comments": [ + { + "comment_id": "2177954296", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 130280, + "pr_file": "docs/reference/elasticsearch/index-lifecycle-actions/ilm-forcemerge.md", + "discussion_id": "2177954296", + "commented_code": "@@ -7,29 +7,30 @@ mapped_pages:\n \n Phases allowed: hot, warm.\n \n-[Force merges](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-forcemerge) the index into the specified maximum number of [segments](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-segments).\n-\n-::::{note}\n-Shards that are relocating during a `forcemerge` will not be merged.\n-::::\n-\n+[Force merges](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-forcemerge) the index into the specified maximum number of [segments](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-segments). This operation is performed on a best effort basis. For example, shards that are relocating during a `forcemerge` will not be merged.\n \n To use the `forcemerge` action in the `hot` phase, the `rollover` action **must** be present. If no rollover action is configured, {{ilm-init}} will reject the policy.\n \n :::::{admonition} Performance considerations\n :name: ilm-forcemerge-performance\n \n-Force merge is a resource-intensive operation. If too many force merges are triggered at once, it can negatively impact your cluster. This can happen when you apply an {{ilm-init}} policy that includes a force merge action to existing indices. If they meet the `min_age` criteria, they can immediately proceed through multiple phases. You can prevent this by increasing the `min_age` or setting `index.lifecycle.origination_date` to change how the index age is calculated.\n+Force merge is a resource-intensive operation. If too many force merges are triggered at once, it can negatively impact your cluster. For example, this can happen when you \n+* modify an existing {{ilm-init}} policy's phase `min_age` such that indices trigger into force-merging phase faster.\n+* apply an {{ilm-init}} policy that includes a force merge action to existing indices. If the indices meet the `min_age` criteria, they can immediately proceed through multiple actions. You can prevent this by increasing the `min_age` or setting `index.lifecycle.origination_date` to change how the index age is calculated.\n+* run the [{{ilm-init}} Move Step API](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-ilm-move-to-step) against multiple indices.\n \n-If you experience a force merge task queue backlog, you might need to increase the size of the force merge threadpool so indices can be force merged in parallel. To do this, configure the `thread_pool.force_merge.size` [cluster setting](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-cluster-get-settings).\n+If you experience a force merge task queue backlog, you might need to increase the size of the force merge threadpool so indices can be force merged in parallel. To do this, configure the `thread_pool.force_merge.size` [cluster setting](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-cluster-get-settings). \n \n ::::{important}\n-This can have cascading performance impacts. Monitor cluster performance and increment the size of the thread pool slowly to reduce the backlog.\n+Note that `thread_pool.force_merge.size` is an advanced setting. Adjusting it can cause cascading performance impacts. Monitor cluster performance and increment the size of the thread pool slowly to reduce the backlog.\n ::::\n \n+Force merging will be performed by the node hosting the shard. The [node's role](docs-content://deploy-manage/distributed-architecture/clusters-nodes-shards/node-roles.md#set-node-roles) frequently matches the [data tier](docs-content://manage-data/lifecycle/data-tiers.md) of the {{ilm-init}}'s phase of the index, you may choose to adjust this behavior. For example: ", + "comment_created_at": "2025-07-01T15:52:05+00:00", + "comment_author": "nielsbauman", + "comment_body": "> The node's role frequently matches the data tier of the ILM's phase of the index, you may choose to adjust this behavior.\r\n\r\nI'm having trouble understanding this sentence, but maybe it's just me. Could you explain what you mean?", + "pr_file_module": null + }, + { + "comment_id": "2178769924", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 130280, + "pr_file": "docs/reference/elasticsearch/index-lifecycle-actions/ilm-forcemerge.md", + "discussion_id": "2177954296", + "commented_code": "@@ -7,29 +7,30 @@ mapped_pages:\n \n Phases allowed: hot, warm.\n \n-[Force merges](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-forcemerge) the index into the specified maximum number of [segments](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-segments).\n-\n-::::{note}\n-Shards that are relocating during a `forcemerge` will not be merged.\n-::::\n-\n+[Force merges](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-forcemerge) the index into the specified maximum number of [segments](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-segments). This operation is performed on a best effort basis. For example, shards that are relocating during a `forcemerge` will not be merged.\n \n To use the `forcemerge` action in the `hot` phase, the `rollover` action **must** be present. If no rollover action is configured, {{ilm-init}} will reject the policy.\n \n :::::{admonition} Performance considerations\n :name: ilm-forcemerge-performance\n \n-Force merge is a resource-intensive operation. If too many force merges are triggered at once, it can negatively impact your cluster. This can happen when you apply an {{ilm-init}} policy that includes a force merge action to existing indices. If they meet the `min_age` criteria, they can immediately proceed through multiple phases. You can prevent this by increasing the `min_age` or setting `index.lifecycle.origination_date` to change how the index age is calculated.\n+Force merge is a resource-intensive operation. If too many force merges are triggered at once, it can negatively impact your cluster. For example, this can happen when you \n+* modify an existing {{ilm-init}} policy's phase `min_age` such that indices trigger into force-merging phase faster.\n+* apply an {{ilm-init}} policy that includes a force merge action to existing indices. If the indices meet the `min_age` criteria, they can immediately proceed through multiple actions. You can prevent this by increasing the `min_age` or setting `index.lifecycle.origination_date` to change how the index age is calculated.\n+* run the [{{ilm-init}} Move Step API](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-ilm-move-to-step) against multiple indices.\n \n-If you experience a force merge task queue backlog, you might need to increase the size of the force merge threadpool so indices can be force merged in parallel. To do this, configure the `thread_pool.force_merge.size` [cluster setting](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-cluster-get-settings).\n+If you experience a force merge task queue backlog, you might need to increase the size of the force merge threadpool so indices can be force merged in parallel. To do this, configure the `thread_pool.force_merge.size` [cluster setting](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-cluster-get-settings). \n \n ::::{important}\n-This can have cascading performance impacts. Monitor cluster performance and increment the size of the thread pool slowly to reduce the backlog.\n+Note that `thread_pool.force_merge.size` is an advanced setting. Adjusting it can cause cascading performance impacts. Monitor cluster performance and increment the size of the thread pool slowly to reduce the backlog.\n ::::\n \n+Force merging will be performed by the node hosting the shard. The [node's role](docs-content://deploy-manage/distributed-architecture/clusters-nodes-shards/node-roles.md#set-node-roles) frequently matches the [data tier](docs-content://manage-data/lifecycle/data-tiers.md) of the {{ilm-init}}'s phase of the index, you may choose to adjust this behavior. For example: ", + "comment_created_at": "2025-07-02T00:39:50+00:00", + "comment_author": "stefnestor", + "comment_body": "Yeah @kilfoyle and I tried to has this out but I'm not sure I got it across the line, sorry.\r\n\r\nThe needed fix from current doc: force merges do not happen on the ilm phase's node tier. They happen on the node hosting the shard, this specifically is different when you use searchable snapshots (e.g. hot>frozen merges on hot inside `phase/action/step: frozen/searchable_snapshot/forcemerge`). \r\n\r\nI've debated just removing `you may choose to adjust this behavior`. David & I discussed it as relating to disabling ILM Migrate (and using node attribute allocation filters).", + "pr_file_module": null + }, + { + "comment_id": "2180471455", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 130280, + "pr_file": "docs/reference/elasticsearch/index-lifecycle-actions/ilm-forcemerge.md", + "discussion_id": "2177954296", + "commented_code": "@@ -7,29 +7,30 @@ mapped_pages:\n \n Phases allowed: hot, warm.\n \n-[Force merges](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-forcemerge) the index into the specified maximum number of [segments](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-segments).\n-\n-::::{note}\n-Shards that are relocating during a `forcemerge` will not be merged.\n-::::\n-\n+[Force merges](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-forcemerge) the index into the specified maximum number of [segments](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-segments). This operation is performed on a best effort basis. For example, shards that are relocating during a `forcemerge` will not be merged.\n \n To use the `forcemerge` action in the `hot` phase, the `rollover` action **must** be present. If no rollover action is configured, {{ilm-init}} will reject the policy.\n \n :::::{admonition} Performance considerations\n :name: ilm-forcemerge-performance\n \n-Force merge is a resource-intensive operation. If too many force merges are triggered at once, it can negatively impact your cluster. This can happen when you apply an {{ilm-init}} policy that includes a force merge action to existing indices. If they meet the `min_age` criteria, they can immediately proceed through multiple phases. You can prevent this by increasing the `min_age` or setting `index.lifecycle.origination_date` to change how the index age is calculated.\n+Force merge is a resource-intensive operation. If too many force merges are triggered at once, it can negatively impact your cluster. For example, this can happen when you \n+* modify an existing {{ilm-init}} policy's phase `min_age` such that indices trigger into force-merging phase faster.\n+* apply an {{ilm-init}} policy that includes a force merge action to existing indices. If the indices meet the `min_age` criteria, they can immediately proceed through multiple actions. You can prevent this by increasing the `min_age` or setting `index.lifecycle.origination_date` to change how the index age is calculated.\n+* run the [{{ilm-init}} Move Step API](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-ilm-move-to-step) against multiple indices.\n \n-If you experience a force merge task queue backlog, you might need to increase the size of the force merge threadpool so indices can be force merged in parallel. To do this, configure the `thread_pool.force_merge.size` [cluster setting](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-cluster-get-settings).\n+If you experience a force merge task queue backlog, you might need to increase the size of the force merge threadpool so indices can be force merged in parallel. To do this, configure the `thread_pool.force_merge.size` [cluster setting](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-cluster-get-settings). \n \n ::::{important}\n-This can have cascading performance impacts. Monitor cluster performance and increment the size of the thread pool slowly to reduce the backlog.\n+Note that `thread_pool.force_merge.size` is an advanced setting. Adjusting it can cause cascading performance impacts. Monitor cluster performance and increment the size of the thread pool slowly to reduce the backlog.\n ::::\n \n+Force merging will be performed by the node hosting the shard. The [node's role](docs-content://deploy-manage/distributed-architecture/clusters-nodes-shards/node-roles.md#set-node-roles) frequently matches the [data tier](docs-content://manage-data/lifecycle/data-tiers.md) of the {{ilm-init}}'s phase of the index, you may choose to adjust this behavior. For example: ", + "comment_created_at": "2025-07-02T16:14:52+00:00", + "comment_author": "nielsbauman", + "comment_body": "Ah, thanks for the explanation, that helps a lot! How about something like this:\r\n```suggestion\r\nForce merging will be performed by the node hosting the shard. Usually, the [node's role](docs-content://deploy-manage/distributed-architecture/clusters-nodes-shards/node-roles.md#set-node-roles) matches the [data tier](docs-content://manage-data/lifecycle/data-tiers.md) of the {{ilm-init}} phase that the index is in. One of the exceptions is when you have manually disabled [ILM Migrate](https://www.elastic.co/docs/reference/elasticsearch/index-lifecycle-actions/ilm-migrate) and have specified custom allocations using [ILM allocate](https://www.elastic.co/docs/reference/elasticsearch/index-lifecycle-actions/ilm-allocate). The other exception is searchable snapshots; force merges for [{{ilm-init}} Searchable Snapshots](./ilm-searchable-snapshot.md) using `force_merge_index` are performed in the phase that the index is in **prior** to the `searchable_snapshot` action. You may want to explicitly choose in which data tier the force merge should occur, for example:\r\n```\r\nin combination with your suggestion in the other thread below.\r\nOptionally, we could link to the `force_merge_index` option on https://www.elastic.co/docs/reference/elasticsearch/index-lifecycle-actions/ilm-searchable-snapshot#ilm-searchable-snapshot-options, for more details on force merging for searchable snapshots.", + "pr_file_module": null + }, + { + "comment_id": "2180536499", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 130280, + "pr_file": "docs/reference/elasticsearch/index-lifecycle-actions/ilm-forcemerge.md", + "discussion_id": "2177954296", + "commented_code": "@@ -7,29 +7,30 @@ mapped_pages:\n \n Phases allowed: hot, warm.\n \n-[Force merges](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-forcemerge) the index into the specified maximum number of [segments](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-segments).\n-\n-::::{note}\n-Shards that are relocating during a `forcemerge` will not be merged.\n-::::\n-\n+[Force merges](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-forcemerge) the index into the specified maximum number of [segments](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-segments). This operation is performed on a best effort basis. For example, shards that are relocating during a `forcemerge` will not be merged.\n \n To use the `forcemerge` action in the `hot` phase, the `rollover` action **must** be present. If no rollover action is configured, {{ilm-init}} will reject the policy.\n \n :::::{admonition} Performance considerations\n :name: ilm-forcemerge-performance\n \n-Force merge is a resource-intensive operation. If too many force merges are triggered at once, it can negatively impact your cluster. This can happen when you apply an {{ilm-init}} policy that includes a force merge action to existing indices. If they meet the `min_age` criteria, they can immediately proceed through multiple phases. You can prevent this by increasing the `min_age` or setting `index.lifecycle.origination_date` to change how the index age is calculated.\n+Force merge is a resource-intensive operation. If too many force merges are triggered at once, it can negatively impact your cluster. For example, this can happen when you \n+* modify an existing {{ilm-init}} policy's phase `min_age` such that indices trigger into force-merging phase faster.\n+* apply an {{ilm-init}} policy that includes a force merge action to existing indices. If the indices meet the `min_age` criteria, they can immediately proceed through multiple actions. You can prevent this by increasing the `min_age` or setting `index.lifecycle.origination_date` to change how the index age is calculated.\n+* run the [{{ilm-init}} Move Step API](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-ilm-move-to-step) against multiple indices.\n \n-If you experience a force merge task queue backlog, you might need to increase the size of the force merge threadpool so indices can be force merged in parallel. To do this, configure the `thread_pool.force_merge.size` [cluster setting](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-cluster-get-settings).\n+If you experience a force merge task queue backlog, you might need to increase the size of the force merge threadpool so indices can be force merged in parallel. To do this, configure the `thread_pool.force_merge.size` [cluster setting](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-cluster-get-settings). \n \n ::::{important}\n-This can have cascading performance impacts. Monitor cluster performance and increment the size of the thread pool slowly to reduce the backlog.\n+Note that `thread_pool.force_merge.size` is an advanced setting. Adjusting it can cause cascading performance impacts. Monitor cluster performance and increment the size of the thread pool slowly to reduce the backlog.\n ::::\n \n+Force merging will be performed by the node hosting the shard. The [node's role](docs-content://deploy-manage/distributed-architecture/clusters-nodes-shards/node-roles.md#set-node-roles) frequently matches the [data tier](docs-content://manage-data/lifecycle/data-tiers.md) of the {{ilm-init}}'s phase of the index, you may choose to adjust this behavior. For example: ", + "comment_created_at": "2025-07-02T16:49:07+00:00", + "comment_author": "nielsbauman", + "comment_body": "Sorry, I forgot to mention that we should fix those links that I added. Do you happen to know what the correct links are? I'm not familiar with the new docs system yet.", + "pr_file_module": null + }, + { + "comment_id": "2180571246", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 130280, + "pr_file": "docs/reference/elasticsearch/index-lifecycle-actions/ilm-forcemerge.md", + "discussion_id": "2177954296", + "commented_code": "@@ -7,29 +7,30 @@ mapped_pages:\n \n Phases allowed: hot, warm.\n \n-[Force merges](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-forcemerge) the index into the specified maximum number of [segments](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-segments).\n-\n-::::{note}\n-Shards that are relocating during a `forcemerge` will not be merged.\n-::::\n-\n+[Force merges](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-forcemerge) the index into the specified maximum number of [segments](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-segments). This operation is performed on a best effort basis. For example, shards that are relocating during a `forcemerge` will not be merged.\n \n To use the `forcemerge` action in the `hot` phase, the `rollover` action **must** be present. If no rollover action is configured, {{ilm-init}} will reject the policy.\n \n :::::{admonition} Performance considerations\n :name: ilm-forcemerge-performance\n \n-Force merge is a resource-intensive operation. If too many force merges are triggered at once, it can negatively impact your cluster. This can happen when you apply an {{ilm-init}} policy that includes a force merge action to existing indices. If they meet the `min_age` criteria, they can immediately proceed through multiple phases. You can prevent this by increasing the `min_age` or setting `index.lifecycle.origination_date` to change how the index age is calculated.\n+Force merge is a resource-intensive operation. If too many force merges are triggered at once, it can negatively impact your cluster. For example, this can happen when you \n+* modify an existing {{ilm-init}} policy's phase `min_age` such that indices trigger into force-merging phase faster.\n+* apply an {{ilm-init}} policy that includes a force merge action to existing indices. If the indices meet the `min_age` criteria, they can immediately proceed through multiple actions. You can prevent this by increasing the `min_age` or setting `index.lifecycle.origination_date` to change how the index age is calculated.\n+* run the [{{ilm-init}} Move Step API](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-ilm-move-to-step) against multiple indices.\n \n-If you experience a force merge task queue backlog, you might need to increase the size of the force merge threadpool so indices can be force merged in parallel. To do this, configure the `thread_pool.force_merge.size` [cluster setting](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-cluster-get-settings).\n+If you experience a force merge task queue backlog, you might need to increase the size of the force merge threadpool so indices can be force merged in parallel. To do this, configure the `thread_pool.force_merge.size` [cluster setting](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-cluster-get-settings). \n \n ::::{important}\n-This can have cascading performance impacts. Monitor cluster performance and increment the size of the thread pool slowly to reduce the backlog.\n+Note that `thread_pool.force_merge.size` is an advanced setting. Adjusting it can cause cascading performance impacts. Monitor cluster performance and increment the size of the thread pool slowly to reduce the backlog.\n ::::\n \n+Force merging will be performed by the node hosting the shard. The [node's role](docs-content://deploy-manage/distributed-architecture/clusters-nodes-shards/node-roles.md#set-node-roles) frequently matches the [data tier](docs-content://manage-data/lifecycle/data-tiers.md) of the {{ilm-init}}'s phase of the index, you may choose to adjust this behavior. For example: ", + "comment_created_at": "2025-07-02T17:05:41+00:00", + "comment_author": "stefnestor", + "comment_body": "David usually fixes them for me as I'm also getting up to speed on the new system 🙏 ", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2157622079", + "pr_number": 129704, + "pr_file": "docs/reference/elasticsearch/index-settings/serverless.md", + "created_at": "2025-06-19T20:25:23+00:00", + "commented_code": "---\nnavigation_title: Serverless index settings\napplies_to:\n serverless: all\n---\n\n# Index settings available in {{serverless-full}} projects\n\nThis page lists the {{es}} index settings available in {{serverless-full}} projects.\n\n### General settings\n\n* [`index.codec`](./index-modules.md#index-codec)\n* [`index.default_pipeline`](./index-modules.md#index-default-pipeline) \n* [`index.dense_vector.hnsw_filter_heuristic`](./index-modules.md#index-dense-vector-hnsw-filter-heuristic)\n* [`index.final_pipeline`](./index-modules.md#index-final-pipeline) \n* [`index.hidden`](./index-modules.md#index-hidden)\n* [`index.mode`](./index-modules.md#index-mode-setting)\n* [`index.query.default_field`](./index-modules.md#index-query-default-field) \n* [`index.refresh_interval`](./index-modules.md#index-refresh-interval-setting) \n\n### Index sorting settings\n\n* [`index.sort.field`](./sorting.md#index-sort-field)\n* [`index.sort.missing`](./sorting.md#index-sort-missing)\n* [`index.sort.mode`](./sorting.md#index-sort-mode)\n* [`index.sort.order`](./sorting.md#index-sort-order)\n\n### Index blocks settings\n\n* [`index.blocks.read_only`](./index-block.md#index-blocks-read-only) \n* [`index.blocks.read`](./index-block.md#index-blocks-read) \n* [`index.blocks.write`](./index-block.md#index-blocks-write) \n* [`index.blocks.metadata`](./index-block.md#index-blocks-metadata) \n\n### Field and mappings related settings\n\n* [`index.mapping.coerce`](../mapping-reference/coerce.md#coerce-setting)\n* [`index.mapping.ignore_above`](../mapping-reference/index-mapping-ignore-above.md)\n* [`index.mapping.ignore_malformed`](../mapping-reference/ignore-malformed.md#ignore-malformed-setting)\n* [`index.mapping.source.mode`](../mapping-reference/mapping-source-field.md)\n* [`index.mapping.synthetic_source_keep`](../mapping-reference/mapping-source-field.md)\n* [`index.mapping.total_fields.limit`](./mapping-limit.md#total-fields-limit)\n* [`index.mapping.total_fields.ignore_dynamic_beyond_limit`](./mapping-limit.md#ignore-dynamic-beyond-limit)\n\n### ILM and Data Stream settings", + "repo_full_name": "elastic/elasticsearch", + "discussion_comments": [ + { + "comment_id": "2157622079", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 129704, + "pr_file": "docs/reference/elasticsearch/index-settings/serverless.md", + "discussion_id": "2157622079", + "commented_code": "@@ -0,0 +1,61 @@\n+---\n+navigation_title: Serverless index settings\n+applies_to:\n+ serverless: all\n+---\n+\n+# Index settings available in {{serverless-full}} projects\n+\n+This page lists the {{es}} index settings available in {{serverless-full}} projects.\n+\n+### General settings\n+\n+* [`index.codec`](./index-modules.md#index-codec)\n+* [`index.default_pipeline`](./index-modules.md#index-default-pipeline) \n+* [`index.dense_vector.hnsw_filter_heuristic`](./index-modules.md#index-dense-vector-hnsw-filter-heuristic)\n+* [`index.final_pipeline`](./index-modules.md#index-final-pipeline) \n+* [`index.hidden`](./index-modules.md#index-hidden)\n+* [`index.mode`](./index-modules.md#index-mode-setting)\n+* [`index.query.default_field`](./index-modules.md#index-query-default-field) \n+* [`index.refresh_interval`](./index-modules.md#index-refresh-interval-setting) \n+\n+### Index sorting settings\n+\n+* [`index.sort.field`](./sorting.md#index-sort-field)\n+* [`index.sort.missing`](./sorting.md#index-sort-missing)\n+* [`index.sort.mode`](./sorting.md#index-sort-mode)\n+* [`index.sort.order`](./sorting.md#index-sort-order)\n+\n+### Index blocks settings\n+\n+* [`index.blocks.read_only`](./index-block.md#index-blocks-read-only) \n+* [`index.blocks.read`](./index-block.md#index-blocks-read) \n+* [`index.blocks.write`](./index-block.md#index-blocks-write) \n+* [`index.blocks.metadata`](./index-block.md#index-blocks-metadata) \n+\n+### Field and mappings related settings\n+\n+* [`index.mapping.coerce`](../mapping-reference/coerce.md#coerce-setting)\n+* [`index.mapping.ignore_above`](../mapping-reference/index-mapping-ignore-above.md)\n+* [`index.mapping.ignore_malformed`](../mapping-reference/ignore-malformed.md#ignore-malformed-setting)\n+* [`index.mapping.source.mode`](../mapping-reference/mapping-source-field.md)\n+* [`index.mapping.synthetic_source_keep`](../mapping-reference/mapping-source-field.md)\n+* [`index.mapping.total_fields.limit`](./mapping-limit.md#total-fields-limit)\n+* [`index.mapping.total_fields.ignore_dynamic_beyond_limit`](./mapping-limit.md#ignore-dynamic-beyond-limit)\n+\n+### ILM and Data Stream settings", + "comment_created_at": "2025-06-19T20:25:23+00:00", + "comment_author": "shainaraskas", + "comment_body": "ILM is not available in serverless so we don't have to say it here\r\n\r\ndata streams =/= data stream lifecycle\r\n\r\n```suggestion\r\n### Data stream lifecycle settings\r\n```", + "pr_file_module": null + }, + { + "comment_id": "2158597779", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 129704, + "pr_file": "docs/reference/elasticsearch/index-settings/serverless.md", + "discussion_id": "2157622079", + "commented_code": "@@ -0,0 +1,61 @@\n+---\n+navigation_title: Serverless index settings\n+applies_to:\n+ serverless: all\n+---\n+\n+# Index settings available in {{serverless-full}} projects\n+\n+This page lists the {{es}} index settings available in {{serverless-full}} projects.\n+\n+### General settings\n+\n+* [`index.codec`](./index-modules.md#index-codec)\n+* [`index.default_pipeline`](./index-modules.md#index-default-pipeline) \n+* [`index.dense_vector.hnsw_filter_heuristic`](./index-modules.md#index-dense-vector-hnsw-filter-heuristic)\n+* [`index.final_pipeline`](./index-modules.md#index-final-pipeline) \n+* [`index.hidden`](./index-modules.md#index-hidden)\n+* [`index.mode`](./index-modules.md#index-mode-setting)\n+* [`index.query.default_field`](./index-modules.md#index-query-default-field) \n+* [`index.refresh_interval`](./index-modules.md#index-refresh-interval-setting) \n+\n+### Index sorting settings\n+\n+* [`index.sort.field`](./sorting.md#index-sort-field)\n+* [`index.sort.missing`](./sorting.md#index-sort-missing)\n+* [`index.sort.mode`](./sorting.md#index-sort-mode)\n+* [`index.sort.order`](./sorting.md#index-sort-order)\n+\n+### Index blocks settings\n+\n+* [`index.blocks.read_only`](./index-block.md#index-blocks-read-only) \n+* [`index.blocks.read`](./index-block.md#index-blocks-read) \n+* [`index.blocks.write`](./index-block.md#index-blocks-write) \n+* [`index.blocks.metadata`](./index-block.md#index-blocks-metadata) \n+\n+### Field and mappings related settings\n+\n+* [`index.mapping.coerce`](../mapping-reference/coerce.md#coerce-setting)\n+* [`index.mapping.ignore_above`](../mapping-reference/index-mapping-ignore-above.md)\n+* [`index.mapping.ignore_malformed`](../mapping-reference/ignore-malformed.md#ignore-malformed-setting)\n+* [`index.mapping.source.mode`](../mapping-reference/mapping-source-field.md)\n+* [`index.mapping.synthetic_source_keep`](../mapping-reference/mapping-source-field.md)\n+* [`index.mapping.total_fields.limit`](./mapping-limit.md#total-fields-limit)\n+* [`index.mapping.total_fields.ignore_dynamic_beyond_limit`](./mapping-limit.md#ignore-dynamic-beyond-limit)\n+\n+### ILM and Data Stream settings", + "comment_created_at": "2025-06-20T10:21:41+00:00", + "comment_author": "eedugon", + "comment_body": "great catch", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/elasticsearch-clarity-over-uncertainty.md b/_reviewers/elasticsearch-clarity-over-uncertainty.md index 118dfde..6f19c5c 100644 --- a/_reviewers/elasticsearch-clarity-over-uncertainty.md +++ b/_reviewers/elasticsearch-clarity-over-uncertainty.md @@ -40,179 +40,3 @@ Example improvement: ``` Remember that marketing/positioning around limitations is particularly sensitive - always emphasize what users can do rather than what they cannot do. - - -[ - { - "discussion_id": "2175635549", - "pr_number": 130280, - "pr_file": "docs/reference/elasticsearch/index-lifecycle-actions/ilm-forcemerge.md", - "created_at": "2025-06-30T18:14:16+00:00", - "commented_code": "Phases allowed: hot, warm.\n\n[Force merges](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-forcemerge) the index into the specified maximum number of [segments](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-segments).\n\n::::{note}\nShards that are relocating during a `forcemerge` will not be merged.\n::::\n\n[Force merges](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-forcemerge) the index into the specified maximum number of [segments](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-segments). This operation is best effort. For example, shards that are relocating during a `forcemerge` will not be merged.\n\nTo use the `forcemerge` action in the `hot` phase, the `rollover` action **must** be present. If no rollover action is configured, {{ilm-init}} will reject the policy.\n\n:::::{admonition} Performance considerations\n:name: ilm-forcemerge-performance\n\nForce merge is a resource-intensive operation. If too many force merges are triggered at once, it can negatively impact your cluster. This can happen when you apply an {{ilm-init}} policy that includes a force merge action to existing indices. If they meet the `min_age` criteria, they can immediately proceed through multiple phases. You can prevent this by increasing the `min_age` or setting `index.lifecycle.origination_date` to change how the index age is calculated.\n\nIf you experience a force merge task queue backlog, you might need to increase the size of the force merge threadpool so indices can be force merged in parallel. To do this, configure the `thread_pool.force_merge.size` [cluster setting](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-cluster-get-settings).\n\n::::{important}\nThis can have cascading performance impacts. Monitor cluster performance and increment the size of the thread pool slowly to reduce the backlog.\n::::\nForce merge is a resource-intensive operation. If too many force merges are triggered at once, it can negatively impact your cluster. For example, this can happen when you \n* modify an existing {{ilm-init}} policy's phase `min_age` such that indices trigger into force-merging phase faster.\n* apply an {{ilm-init}} policy that includes a force merge action to existing indices. If they meet the `min_age` criteria, they can immediately proceed through multiple actions. You can prevent this by increasing the `min_age` or setting `index.lifecycle.origination_date` to change how the index age is calculated.\n* run the [{{ilm-init}} Move Step API](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-ilm-move-to-step) against multiple indices.\n\nIf you experience a force merge task queue backlog, you might need to increase the size of the force merge threadpool so indices can be force merged in parallel. To do this, configure the `thread_pool.force_merge.size` [cluster setting](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-cluster-get-settings). This is considered an expert setting override as this can have cascading performance impacts. Monitor cluster performance and increment the size of the thread pool slowly to reduce the backlog.\n\nForce merging will be performed by the nodes within the current phase of the index. A forcemerge in the `hot` phase will use hot nodes with potentially faster nodes, while impacting ingestion more. A forcemerge in the `warm` phase will use warm nodes and potentially take longer to perform, but without impacting ingestion in the `hot` tier.\nForce merging will be performed by the node hosting the shard. The [node's role](https://www.elastic.co/docs/deploy-manage/distributed-architecture/clusters-nodes-shards/node-roles#data-node-role) frequently is equal to the [data tier](https://www.elastic.co/docs/manage-data/lifecycle/data-tiers) of the {{ilm-init}}'s phase of the index, but this is not guaranteed. For example, a forcemerge in the", - "repo_full_name": "elastic/elasticsearch", - "discussion_comments": [ - { - "comment_id": "2175635549", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 130280, - "pr_file": "docs/reference/elasticsearch/index-lifecycle-actions/ilm-forcemerge.md", - "discussion_id": "2175635549", - "commented_code": "@@ -7,29 +7,26 @@ mapped_pages:\n \n Phases allowed: hot, warm.\n \n-[Force merges](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-forcemerge) the index into the specified maximum number of [segments](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-segments).\n-\n-::::{note}\n-Shards that are relocating during a `forcemerge` will not be merged.\n-::::\n-\n+[Force merges](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-forcemerge) the index into the specified maximum number of [segments](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-segments). This operation is best effort. For example, shards that are relocating during a `forcemerge` will not be merged.\n \n To use the `forcemerge` action in the `hot` phase, the `rollover` action **must** be present. If no rollover action is configured, {{ilm-init}} will reject the policy.\n \n :::::{admonition} Performance considerations\n :name: ilm-forcemerge-performance\n \n-Force merge is a resource-intensive operation. If too many force merges are triggered at once, it can negatively impact your cluster. This can happen when you apply an {{ilm-init}} policy that includes a force merge action to existing indices. If they meet the `min_age` criteria, they can immediately proceed through multiple phases. You can prevent this by increasing the `min_age` or setting `index.lifecycle.origination_date` to change how the index age is calculated.\n-\n-If you experience a force merge task queue backlog, you might need to increase the size of the force merge threadpool so indices can be force merged in parallel. To do this, configure the `thread_pool.force_merge.size` [cluster setting](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-cluster-get-settings).\n-\n-::::{important}\n-This can have cascading performance impacts. Monitor cluster performance and increment the size of the thread pool slowly to reduce the backlog.\n-::::\n+Force merge is a resource-intensive operation. If too many force merges are triggered at once, it can negatively impact your cluster. For example, this can happen when you \n+* modify an existing {{ilm-init}} policy's phase `min_age` such that indices trigger into force-merging phase faster.\n+* apply an {{ilm-init}} policy that includes a force merge action to existing indices. If they meet the `min_age` criteria, they can immediately proceed through multiple actions. You can prevent this by increasing the `min_age` or setting `index.lifecycle.origination_date` to change how the index age is calculated.\n+* run the [{{ilm-init}} Move Step API](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-ilm-move-to-step) against multiple indices.\n \n+If you experience a force merge task queue backlog, you might need to increase the size of the force merge threadpool so indices can be force merged in parallel. To do this, configure the `thread_pool.force_merge.size` [cluster setting](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-cluster-get-settings). This is considered an expert setting override as this can have cascading performance impacts. Monitor cluster performance and increment the size of the thread pool slowly to reduce the backlog.\n \n-Force merging will be performed by the nodes within the current phase of the index. A forcemerge in the `hot` phase will use hot nodes with potentially faster nodes, while impacting ingestion more. A forcemerge in the `warm` phase will use warm nodes and potentially take longer to perform, but without impacting ingestion in the `hot` tier.\n+Force merging will be performed by the node hosting the shard. The [node's role](https://www.elastic.co/docs/deploy-manage/distributed-architecture/clusters-nodes-shards/node-roles#data-node-role) frequently is equal to the [data tier](https://www.elastic.co/docs/manage-data/lifecycle/data-tiers) of the {{ilm-init}}'s phase of the index, but this is not guaranteed. For example, a forcemerge in the ", - "comment_created_at": "2025-06-30T18:14:16+00:00", - "comment_author": "kilfoyle", - "comment_body": "`but this is not guaranteed`\r\n\r\nShould this maybe be something like \"but you may choose to change this behavior.\"?\r\n\r\n\"Not guaranteed\" sounds to me like uncertainty in how the product behaves, rather than something a user might choose to configure differently.\r\n\r\n", - "pr_file_module": null - }, - { - "comment_id": "2175877399", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 130280, - "pr_file": "docs/reference/elasticsearch/index-lifecycle-actions/ilm-forcemerge.md", - "discussion_id": "2175635549", - "commented_code": "@@ -7,29 +7,26 @@ mapped_pages:\n \n Phases allowed: hot, warm.\n \n-[Force merges](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-forcemerge) the index into the specified maximum number of [segments](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-segments).\n-\n-::::{note}\n-Shards that are relocating during a `forcemerge` will not be merged.\n-::::\n-\n+[Force merges](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-forcemerge) the index into the specified maximum number of [segments](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-segments). This operation is best effort. For example, shards that are relocating during a `forcemerge` will not be merged.\n \n To use the `forcemerge` action in the `hot` phase, the `rollover` action **must** be present. If no rollover action is configured, {{ilm-init}} will reject the policy.\n \n :::::{admonition} Performance considerations\n :name: ilm-forcemerge-performance\n \n-Force merge is a resource-intensive operation. If too many force merges are triggered at once, it can negatively impact your cluster. This can happen when you apply an {{ilm-init}} policy that includes a force merge action to existing indices. If they meet the `min_age` criteria, they can immediately proceed through multiple phases. You can prevent this by increasing the `min_age` or setting `index.lifecycle.origination_date` to change how the index age is calculated.\n-\n-If you experience a force merge task queue backlog, you might need to increase the size of the force merge threadpool so indices can be force merged in parallel. To do this, configure the `thread_pool.force_merge.size` [cluster setting](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-cluster-get-settings).\n-\n-::::{important}\n-This can have cascading performance impacts. Monitor cluster performance and increment the size of the thread pool slowly to reduce the backlog.\n-::::\n+Force merge is a resource-intensive operation. If too many force merges are triggered at once, it can negatively impact your cluster. For example, this can happen when you \n+* modify an existing {{ilm-init}} policy's phase `min_age` such that indices trigger into force-merging phase faster.\n+* apply an {{ilm-init}} policy that includes a force merge action to existing indices. If they meet the `min_age` criteria, they can immediately proceed through multiple actions. You can prevent this by increasing the `min_age` or setting `index.lifecycle.origination_date` to change how the index age is calculated.\n+* run the [{{ilm-init}} Move Step API](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-ilm-move-to-step) against multiple indices.\n \n+If you experience a force merge task queue backlog, you might need to increase the size of the force merge threadpool so indices can be force merged in parallel. To do this, configure the `thread_pool.force_merge.size` [cluster setting](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-cluster-get-settings). This is considered an expert setting override as this can have cascading performance impacts. Monitor cluster performance and increment the size of the thread pool slowly to reduce the backlog.\n \n-Force merging will be performed by the nodes within the current phase of the index. A forcemerge in the `hot` phase will use hot nodes with potentially faster nodes, while impacting ingestion more. A forcemerge in the `warm` phase will use warm nodes and potentially take longer to perform, but without impacting ingestion in the `hot` tier.\n+Force merging will be performed by the node hosting the shard. The [node's role](https://www.elastic.co/docs/deploy-manage/distributed-architecture/clusters-nodes-shards/node-roles#data-node-role) frequently is equal to the [data tier](https://www.elastic.co/docs/manage-data/lifecycle/data-tiers) of the {{ilm-init}}'s phase of the index, but this is not guaranteed. For example, a forcemerge in the ", - "comment_created_at": "2025-06-30T20:43:49+00:00", - "comment_author": "stefnestor", - "comment_body": "It's poor phrasing my side sorry. I was trying to leave the door open because (a) users can disable ILM Migrate (and can use ILM Allocate to set allocation customizations on top) & (B) Searchable Snapshots merge on the phase-before's data tier; both'd nullify this earlier belief that force merge happens on phase's data tiers. But \"is not guaranteed\" is poor phrasing my part although nothing better comes to mind. Do you have any suggestions by chance?", - "pr_file_module": null - }, - { - "comment_id": "2175984495", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 130280, - "pr_file": "docs/reference/elasticsearch/index-lifecycle-actions/ilm-forcemerge.md", - "discussion_id": "2175635549", - "commented_code": "@@ -7,29 +7,26 @@ mapped_pages:\n \n Phases allowed: hot, warm.\n \n-[Force merges](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-forcemerge) the index into the specified maximum number of [segments](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-segments).\n-\n-::::{note}\n-Shards that are relocating during a `forcemerge` will not be merged.\n-::::\n-\n+[Force merges](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-forcemerge) the index into the specified maximum number of [segments](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-segments). This operation is best effort. For example, shards that are relocating during a `forcemerge` will not be merged.\n \n To use the `forcemerge` action in the `hot` phase, the `rollover` action **must** be present. If no rollover action is configured, {{ilm-init}} will reject the policy.\n \n :::::{admonition} Performance considerations\n :name: ilm-forcemerge-performance\n \n-Force merge is a resource-intensive operation. If too many force merges are triggered at once, it can negatively impact your cluster. This can happen when you apply an {{ilm-init}} policy that includes a force merge action to existing indices. If they meet the `min_age` criteria, they can immediately proceed through multiple phases. You can prevent this by increasing the `min_age` or setting `index.lifecycle.origination_date` to change how the index age is calculated.\n-\n-If you experience a force merge task queue backlog, you might need to increase the size of the force merge threadpool so indices can be force merged in parallel. To do this, configure the `thread_pool.force_merge.size` [cluster setting](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-cluster-get-settings).\n-\n-::::{important}\n-This can have cascading performance impacts. Monitor cluster performance and increment the size of the thread pool slowly to reduce the backlog.\n-::::\n+Force merge is a resource-intensive operation. If too many force merges are triggered at once, it can negatively impact your cluster. For example, this can happen when you \n+* modify an existing {{ilm-init}} policy's phase `min_age` such that indices trigger into force-merging phase faster.\n+* apply an {{ilm-init}} policy that includes a force merge action to existing indices. If they meet the `min_age` criteria, they can immediately proceed through multiple actions. You can prevent this by increasing the `min_age` or setting `index.lifecycle.origination_date` to change how the index age is calculated.\n+* run the [{{ilm-init}} Move Step API](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-ilm-move-to-step) against multiple indices.\n \n+If you experience a force merge task queue backlog, you might need to increase the size of the force merge threadpool so indices can be force merged in parallel. To do this, configure the `thread_pool.force_merge.size` [cluster setting](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-cluster-get-settings). This is considered an expert setting override as this can have cascading performance impacts. Monitor cluster performance and increment the size of the thread pool slowly to reduce the backlog.\n \n-Force merging will be performed by the nodes within the current phase of the index. A forcemerge in the `hot` phase will use hot nodes with potentially faster nodes, while impacting ingestion more. A forcemerge in the `warm` phase will use warm nodes and potentially take longer to perform, but without impacting ingestion in the `hot` tier.\n+Force merging will be performed by the node hosting the shard. The [node's role](https://www.elastic.co/docs/deploy-manage/distributed-architecture/clusters-nodes-shards/node-roles#data-node-role) frequently is equal to the [data tier](https://www.elastic.co/docs/manage-data/lifecycle/data-tiers) of the {{ilm-init}}'s phase of the index, but this is not guaranteed. For example, a forcemerge in the ", - "comment_created_at": "2025-06-30T21:19:33+00:00", - "comment_author": "kilfoyle", - "comment_body": "Yup! I'd suggest `but this is not guaranteed.` \u2192 `but you may choose to adjust this behavior.` \r\n\r\nI've added a [suggestion](https://github.com/elastic/elasticsearch/pull/130280/files#r2175981766) for that change.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2177917563", - "pr_number": 130280, - "pr_file": "docs/reference/elasticsearch/index-lifecycle-actions/ilm-forcemerge.md", - "created_at": "2025-07-01T15:34:28+00:00", - "commented_code": "Phases allowed: hot, warm.\n\n[Force merges](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-forcemerge) the index into the specified maximum number of [segments](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-segments).\n\n::::{note}\nShards that are relocating during a `forcemerge` will not be merged.\n::::\n\n[Force merges](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-forcemerge) the index into the specified maximum number of [segments](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-segments). This operation is performed on a best effort basis. For example, shards that are relocating during a `forcemerge` will not be merged.\n\nTo use the `forcemerge` action in the `hot` phase, the `rollover` action **must** be present. If no rollover action is configured, {{ilm-init}} will reject the policy.\n\n:::::{admonition} Performance considerations\n:name: ilm-forcemerge-performance\n\nForce merge is a resource-intensive operation. If too many force merges are triggered at once, it can negatively impact your cluster. This can happen when you apply an {{ilm-init}} policy that includes a force merge action to existing indices. If they meet the `min_age` criteria, they can immediately proceed through multiple phases. You can prevent this by increasing the `min_age` or setting `index.lifecycle.origination_date` to change how the index age is calculated.\nForce merge is a resource-intensive operation. If too many force merges are triggered at once, it can negatively impact your cluster. For example, this can happen when you \n* modify an existing {{ilm-init}} policy's phase `min_age` such that indices trigger into force-merging phase faster.", - "repo_full_name": "elastic/elasticsearch", - "discussion_comments": [ - { - "comment_id": "2177917563", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 130280, - "pr_file": "docs/reference/elasticsearch/index-lifecycle-actions/ilm-forcemerge.md", - "discussion_id": "2177917563", - "commented_code": "@@ -7,29 +7,30 @@ mapped_pages:\n \n Phases allowed: hot, warm.\n \n-[Force merges](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-forcemerge) the index into the specified maximum number of [segments](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-segments).\n-\n-::::{note}\n-Shards that are relocating during a `forcemerge` will not be merged.\n-::::\n-\n+[Force merges](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-forcemerge) the index into the specified maximum number of [segments](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-segments). This operation is performed on a best effort basis. For example, shards that are relocating during a `forcemerge` will not be merged.\n \n To use the `forcemerge` action in the `hot` phase, the `rollover` action **must** be present. If no rollover action is configured, {{ilm-init}} will reject the policy.\n \n :::::{admonition} Performance considerations\n :name: ilm-forcemerge-performance\n \n-Force merge is a resource-intensive operation. If too many force merges are triggered at once, it can negatively impact your cluster. This can happen when you apply an {{ilm-init}} policy that includes a force merge action to existing indices. If they meet the `min_age` criteria, they can immediately proceed through multiple phases. You can prevent this by increasing the `min_age` or setting `index.lifecycle.origination_date` to change how the index age is calculated.\n+Force merge is a resource-intensive operation. If too many force merges are triggered at once, it can negatively impact your cluster. For example, this can happen when you \n+* modify an existing {{ilm-init}} policy's phase `min_age` such that indices trigger into force-merging phase faster.", - "comment_created_at": "2025-07-01T15:34:28+00:00", - "comment_author": "nielsbauman", - "comment_body": "Small grammar nit: I liked David's suggestion (but I agree with Stef about not adding a link)\r\n```suggestion\r\n* modify an existing {{ilm-init}} policy's phase `min_age`, causing indices to trigger the force merge at a faster rate.\r\n```", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2177954296", - "pr_number": 130280, - "pr_file": "docs/reference/elasticsearch/index-lifecycle-actions/ilm-forcemerge.md", - "created_at": "2025-07-01T15:52:05+00:00", - "commented_code": "Phases allowed: hot, warm.\n\n[Force merges](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-forcemerge) the index into the specified maximum number of [segments](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-segments).\n\n::::{note}\nShards that are relocating during a `forcemerge` will not be merged.\n::::\n\n[Force merges](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-forcemerge) the index into the specified maximum number of [segments](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-segments). This operation is performed on a best effort basis. For example, shards that are relocating during a `forcemerge` will not be merged.\n\nTo use the `forcemerge` action in the `hot` phase, the `rollover` action **must** be present. If no rollover action is configured, {{ilm-init}} will reject the policy.\n\n:::::{admonition} Performance considerations\n:name: ilm-forcemerge-performance\n\nForce merge is a resource-intensive operation. If too many force merges are triggered at once, it can negatively impact your cluster. This can happen when you apply an {{ilm-init}} policy that includes a force merge action to existing indices. If they meet the `min_age` criteria, they can immediately proceed through multiple phases. You can prevent this by increasing the `min_age` or setting `index.lifecycle.origination_date` to change how the index age is calculated.\nForce merge is a resource-intensive operation. If too many force merges are triggered at once, it can negatively impact your cluster. For example, this can happen when you \n* modify an existing {{ilm-init}} policy's phase `min_age` such that indices trigger into force-merging phase faster.\n* apply an {{ilm-init}} policy that includes a force merge action to existing indices. If the indices meet the `min_age` criteria, they can immediately proceed through multiple actions. You can prevent this by increasing the `min_age` or setting `index.lifecycle.origination_date` to change how the index age is calculated.\n* run the [{{ilm-init}} Move Step API](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-ilm-move-to-step) against multiple indices.\n\nIf you experience a force merge task queue backlog, you might need to increase the size of the force merge threadpool so indices can be force merged in parallel. To do this, configure the `thread_pool.force_merge.size` [cluster setting](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-cluster-get-settings).\nIf you experience a force merge task queue backlog, you might need to increase the size of the force merge threadpool so indices can be force merged in parallel. To do this, configure the `thread_pool.force_merge.size` [cluster setting](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-cluster-get-settings). \n\n::::{important}\nThis can have cascading performance impacts. Monitor cluster performance and increment the size of the thread pool slowly to reduce the backlog.\nNote that `thread_pool.force_merge.size` is an advanced setting. Adjusting it can cause cascading performance impacts. Monitor cluster performance and increment the size of the thread pool slowly to reduce the backlog.\n::::\n\nForce merging will be performed by the node hosting the shard. The [node's role](docs-content://deploy-manage/distributed-architecture/clusters-nodes-shards/node-roles.md#set-node-roles) frequently matches the [data tier](docs-content://manage-data/lifecycle/data-tiers.md) of the {{ilm-init}}'s phase of the index, you may choose to adjust this behavior. For example:", - "repo_full_name": "elastic/elasticsearch", - "discussion_comments": [ - { - "comment_id": "2177954296", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 130280, - "pr_file": "docs/reference/elasticsearch/index-lifecycle-actions/ilm-forcemerge.md", - "discussion_id": "2177954296", - "commented_code": "@@ -7,29 +7,30 @@ mapped_pages:\n \n Phases allowed: hot, warm.\n \n-[Force merges](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-forcemerge) the index into the specified maximum number of [segments](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-segments).\n-\n-::::{note}\n-Shards that are relocating during a `forcemerge` will not be merged.\n-::::\n-\n+[Force merges](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-forcemerge) the index into the specified maximum number of [segments](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-segments). This operation is performed on a best effort basis. For example, shards that are relocating during a `forcemerge` will not be merged.\n \n To use the `forcemerge` action in the `hot` phase, the `rollover` action **must** be present. If no rollover action is configured, {{ilm-init}} will reject the policy.\n \n :::::{admonition} Performance considerations\n :name: ilm-forcemerge-performance\n \n-Force merge is a resource-intensive operation. If too many force merges are triggered at once, it can negatively impact your cluster. This can happen when you apply an {{ilm-init}} policy that includes a force merge action to existing indices. If they meet the `min_age` criteria, they can immediately proceed through multiple phases. You can prevent this by increasing the `min_age` or setting `index.lifecycle.origination_date` to change how the index age is calculated.\n+Force merge is a resource-intensive operation. If too many force merges are triggered at once, it can negatively impact your cluster. For example, this can happen when you \n+* modify an existing {{ilm-init}} policy's phase `min_age` such that indices trigger into force-merging phase faster.\n+* apply an {{ilm-init}} policy that includes a force merge action to existing indices. If the indices meet the `min_age` criteria, they can immediately proceed through multiple actions. You can prevent this by increasing the `min_age` or setting `index.lifecycle.origination_date` to change how the index age is calculated.\n+* run the [{{ilm-init}} Move Step API](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-ilm-move-to-step) against multiple indices.\n \n-If you experience a force merge task queue backlog, you might need to increase the size of the force merge threadpool so indices can be force merged in parallel. To do this, configure the `thread_pool.force_merge.size` [cluster setting](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-cluster-get-settings).\n+If you experience a force merge task queue backlog, you might need to increase the size of the force merge threadpool so indices can be force merged in parallel. To do this, configure the `thread_pool.force_merge.size` [cluster setting](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-cluster-get-settings). \n \n ::::{important}\n-This can have cascading performance impacts. Monitor cluster performance and increment the size of the thread pool slowly to reduce the backlog.\n+Note that `thread_pool.force_merge.size` is an advanced setting. Adjusting it can cause cascading performance impacts. Monitor cluster performance and increment the size of the thread pool slowly to reduce the backlog.\n ::::\n \n+Force merging will be performed by the node hosting the shard. The [node's role](docs-content://deploy-manage/distributed-architecture/clusters-nodes-shards/node-roles.md#set-node-roles) frequently matches the [data tier](docs-content://manage-data/lifecycle/data-tiers.md) of the {{ilm-init}}'s phase of the index, you may choose to adjust this behavior. For example: ", - "comment_created_at": "2025-07-01T15:52:05+00:00", - "comment_author": "nielsbauman", - "comment_body": "> The node's role frequently matches the data tier of the ILM's phase of the index, you may choose to adjust this behavior.\r\n\r\nI'm having trouble understanding this sentence, but maybe it's just me. Could you explain what you mean?", - "pr_file_module": null - }, - { - "comment_id": "2178769924", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 130280, - "pr_file": "docs/reference/elasticsearch/index-lifecycle-actions/ilm-forcemerge.md", - "discussion_id": "2177954296", - "commented_code": "@@ -7,29 +7,30 @@ mapped_pages:\n \n Phases allowed: hot, warm.\n \n-[Force merges](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-forcemerge) the index into the specified maximum number of [segments](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-segments).\n-\n-::::{note}\n-Shards that are relocating during a `forcemerge` will not be merged.\n-::::\n-\n+[Force merges](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-forcemerge) the index into the specified maximum number of [segments](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-segments). This operation is performed on a best effort basis. For example, shards that are relocating during a `forcemerge` will not be merged.\n \n To use the `forcemerge` action in the `hot` phase, the `rollover` action **must** be present. If no rollover action is configured, {{ilm-init}} will reject the policy.\n \n :::::{admonition} Performance considerations\n :name: ilm-forcemerge-performance\n \n-Force merge is a resource-intensive operation. If too many force merges are triggered at once, it can negatively impact your cluster. This can happen when you apply an {{ilm-init}} policy that includes a force merge action to existing indices. If they meet the `min_age` criteria, they can immediately proceed through multiple phases. You can prevent this by increasing the `min_age` or setting `index.lifecycle.origination_date` to change how the index age is calculated.\n+Force merge is a resource-intensive operation. If too many force merges are triggered at once, it can negatively impact your cluster. For example, this can happen when you \n+* modify an existing {{ilm-init}} policy's phase `min_age` such that indices trigger into force-merging phase faster.\n+* apply an {{ilm-init}} policy that includes a force merge action to existing indices. If the indices meet the `min_age` criteria, they can immediately proceed through multiple actions. You can prevent this by increasing the `min_age` or setting `index.lifecycle.origination_date` to change how the index age is calculated.\n+* run the [{{ilm-init}} Move Step API](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-ilm-move-to-step) against multiple indices.\n \n-If you experience a force merge task queue backlog, you might need to increase the size of the force merge threadpool so indices can be force merged in parallel. To do this, configure the `thread_pool.force_merge.size` [cluster setting](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-cluster-get-settings).\n+If you experience a force merge task queue backlog, you might need to increase the size of the force merge threadpool so indices can be force merged in parallel. To do this, configure the `thread_pool.force_merge.size` [cluster setting](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-cluster-get-settings). \n \n ::::{important}\n-This can have cascading performance impacts. Monitor cluster performance and increment the size of the thread pool slowly to reduce the backlog.\n+Note that `thread_pool.force_merge.size` is an advanced setting. Adjusting it can cause cascading performance impacts. Monitor cluster performance and increment the size of the thread pool slowly to reduce the backlog.\n ::::\n \n+Force merging will be performed by the node hosting the shard. The [node's role](docs-content://deploy-manage/distributed-architecture/clusters-nodes-shards/node-roles.md#set-node-roles) frequently matches the [data tier](docs-content://manage-data/lifecycle/data-tiers.md) of the {{ilm-init}}'s phase of the index, you may choose to adjust this behavior. For example: ", - "comment_created_at": "2025-07-02T00:39:50+00:00", - "comment_author": "stefnestor", - "comment_body": "Yeah @kilfoyle and I tried to has this out but I'm not sure I got it across the line, sorry.\r\n\r\nThe needed fix from current doc: force merges do not happen on the ilm phase's node tier. They happen on the node hosting the shard, this specifically is different when you use searchable snapshots (e.g. hot>frozen merges on hot inside `phase/action/step: frozen/searchable_snapshot/forcemerge`). \r\n\r\nI've debated just removing `you may choose to adjust this behavior`. David & I discussed it as relating to disabling ILM Migrate (and using node attribute allocation filters).", - "pr_file_module": null - }, - { - "comment_id": "2180471455", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 130280, - "pr_file": "docs/reference/elasticsearch/index-lifecycle-actions/ilm-forcemerge.md", - "discussion_id": "2177954296", - "commented_code": "@@ -7,29 +7,30 @@ mapped_pages:\n \n Phases allowed: hot, warm.\n \n-[Force merges](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-forcemerge) the index into the specified maximum number of [segments](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-segments).\n-\n-::::{note}\n-Shards that are relocating during a `forcemerge` will not be merged.\n-::::\n-\n+[Force merges](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-forcemerge) the index into the specified maximum number of [segments](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-segments). This operation is performed on a best effort basis. For example, shards that are relocating during a `forcemerge` will not be merged.\n \n To use the `forcemerge` action in the `hot` phase, the `rollover` action **must** be present. If no rollover action is configured, {{ilm-init}} will reject the policy.\n \n :::::{admonition} Performance considerations\n :name: ilm-forcemerge-performance\n \n-Force merge is a resource-intensive operation. If too many force merges are triggered at once, it can negatively impact your cluster. This can happen when you apply an {{ilm-init}} policy that includes a force merge action to existing indices. If they meet the `min_age` criteria, they can immediately proceed through multiple phases. You can prevent this by increasing the `min_age` or setting `index.lifecycle.origination_date` to change how the index age is calculated.\n+Force merge is a resource-intensive operation. If too many force merges are triggered at once, it can negatively impact your cluster. For example, this can happen when you \n+* modify an existing {{ilm-init}} policy's phase `min_age` such that indices trigger into force-merging phase faster.\n+* apply an {{ilm-init}} policy that includes a force merge action to existing indices. If the indices meet the `min_age` criteria, they can immediately proceed through multiple actions. You can prevent this by increasing the `min_age` or setting `index.lifecycle.origination_date` to change how the index age is calculated.\n+* run the [{{ilm-init}} Move Step API](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-ilm-move-to-step) against multiple indices.\n \n-If you experience a force merge task queue backlog, you might need to increase the size of the force merge threadpool so indices can be force merged in parallel. To do this, configure the `thread_pool.force_merge.size` [cluster setting](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-cluster-get-settings).\n+If you experience a force merge task queue backlog, you might need to increase the size of the force merge threadpool so indices can be force merged in parallel. To do this, configure the `thread_pool.force_merge.size` [cluster setting](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-cluster-get-settings). \n \n ::::{important}\n-This can have cascading performance impacts. Monitor cluster performance and increment the size of the thread pool slowly to reduce the backlog.\n+Note that `thread_pool.force_merge.size` is an advanced setting. Adjusting it can cause cascading performance impacts. Monitor cluster performance and increment the size of the thread pool slowly to reduce the backlog.\n ::::\n \n+Force merging will be performed by the node hosting the shard. The [node's role](docs-content://deploy-manage/distributed-architecture/clusters-nodes-shards/node-roles.md#set-node-roles) frequently matches the [data tier](docs-content://manage-data/lifecycle/data-tiers.md) of the {{ilm-init}}'s phase of the index, you may choose to adjust this behavior. For example: ", - "comment_created_at": "2025-07-02T16:14:52+00:00", - "comment_author": "nielsbauman", - "comment_body": "Ah, thanks for the explanation, that helps a lot! How about something like this:\r\n```suggestion\r\nForce merging will be performed by the node hosting the shard. Usually, the [node's role](docs-content://deploy-manage/distributed-architecture/clusters-nodes-shards/node-roles.md#set-node-roles) matches the [data tier](docs-content://manage-data/lifecycle/data-tiers.md) of the {{ilm-init}} phase that the index is in. One of the exceptions is when you have manually disabled [ILM Migrate](https://www.elastic.co/docs/reference/elasticsearch/index-lifecycle-actions/ilm-migrate) and have specified custom allocations using [ILM allocate](https://www.elastic.co/docs/reference/elasticsearch/index-lifecycle-actions/ilm-allocate). The other exception is searchable snapshots; force merges for [{{ilm-init}} Searchable Snapshots](./ilm-searchable-snapshot.md) using `force_merge_index` are performed in the phase that the index is in **prior** to the `searchable_snapshot` action. You may want to explicitly choose in which data tier the force merge should occur, for example:\r\n```\r\nin combination with your suggestion in the other thread below.\r\nOptionally, we could link to the `force_merge_index` option on https://www.elastic.co/docs/reference/elasticsearch/index-lifecycle-actions/ilm-searchable-snapshot#ilm-searchable-snapshot-options, for more details on force merging for searchable snapshots.", - "pr_file_module": null - }, - { - "comment_id": "2180536499", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 130280, - "pr_file": "docs/reference/elasticsearch/index-lifecycle-actions/ilm-forcemerge.md", - "discussion_id": "2177954296", - "commented_code": "@@ -7,29 +7,30 @@ mapped_pages:\n \n Phases allowed: hot, warm.\n \n-[Force merges](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-forcemerge) the index into the specified maximum number of [segments](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-segments).\n-\n-::::{note}\n-Shards that are relocating during a `forcemerge` will not be merged.\n-::::\n-\n+[Force merges](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-forcemerge) the index into the specified maximum number of [segments](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-segments). This operation is performed on a best effort basis. For example, shards that are relocating during a `forcemerge` will not be merged.\n \n To use the `forcemerge` action in the `hot` phase, the `rollover` action **must** be present. If no rollover action is configured, {{ilm-init}} will reject the policy.\n \n :::::{admonition} Performance considerations\n :name: ilm-forcemerge-performance\n \n-Force merge is a resource-intensive operation. If too many force merges are triggered at once, it can negatively impact your cluster. This can happen when you apply an {{ilm-init}} policy that includes a force merge action to existing indices. If they meet the `min_age` criteria, they can immediately proceed through multiple phases. You can prevent this by increasing the `min_age` or setting `index.lifecycle.origination_date` to change how the index age is calculated.\n+Force merge is a resource-intensive operation. If too many force merges are triggered at once, it can negatively impact your cluster. For example, this can happen when you \n+* modify an existing {{ilm-init}} policy's phase `min_age` such that indices trigger into force-merging phase faster.\n+* apply an {{ilm-init}} policy that includes a force merge action to existing indices. If the indices meet the `min_age` criteria, they can immediately proceed through multiple actions. You can prevent this by increasing the `min_age` or setting `index.lifecycle.origination_date` to change how the index age is calculated.\n+* run the [{{ilm-init}} Move Step API](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-ilm-move-to-step) against multiple indices.\n \n-If you experience a force merge task queue backlog, you might need to increase the size of the force merge threadpool so indices can be force merged in parallel. To do this, configure the `thread_pool.force_merge.size` [cluster setting](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-cluster-get-settings).\n+If you experience a force merge task queue backlog, you might need to increase the size of the force merge threadpool so indices can be force merged in parallel. To do this, configure the `thread_pool.force_merge.size` [cluster setting](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-cluster-get-settings). \n \n ::::{important}\n-This can have cascading performance impacts. Monitor cluster performance and increment the size of the thread pool slowly to reduce the backlog.\n+Note that `thread_pool.force_merge.size` is an advanced setting. Adjusting it can cause cascading performance impacts. Monitor cluster performance and increment the size of the thread pool slowly to reduce the backlog.\n ::::\n \n+Force merging will be performed by the node hosting the shard. The [node's role](docs-content://deploy-manage/distributed-architecture/clusters-nodes-shards/node-roles.md#set-node-roles) frequently matches the [data tier](docs-content://manage-data/lifecycle/data-tiers.md) of the {{ilm-init}}'s phase of the index, you may choose to adjust this behavior. For example: ", - "comment_created_at": "2025-07-02T16:49:07+00:00", - "comment_author": "nielsbauman", - "comment_body": "Sorry, I forgot to mention that we should fix those links that I added. Do you happen to know what the correct links are? I'm not familiar with the new docs system yet.", - "pr_file_module": null - }, - { - "comment_id": "2180571246", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 130280, - "pr_file": "docs/reference/elasticsearch/index-lifecycle-actions/ilm-forcemerge.md", - "discussion_id": "2177954296", - "commented_code": "@@ -7,29 +7,30 @@ mapped_pages:\n \n Phases allowed: hot, warm.\n \n-[Force merges](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-forcemerge) the index into the specified maximum number of [segments](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-segments).\n-\n-::::{note}\n-Shards that are relocating during a `forcemerge` will not be merged.\n-::::\n-\n+[Force merges](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-forcemerge) the index into the specified maximum number of [segments](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-segments). This operation is performed on a best effort basis. For example, shards that are relocating during a `forcemerge` will not be merged.\n \n To use the `forcemerge` action in the `hot` phase, the `rollover` action **must** be present. If no rollover action is configured, {{ilm-init}} will reject the policy.\n \n :::::{admonition} Performance considerations\n :name: ilm-forcemerge-performance\n \n-Force merge is a resource-intensive operation. If too many force merges are triggered at once, it can negatively impact your cluster. This can happen when you apply an {{ilm-init}} policy that includes a force merge action to existing indices. If they meet the `min_age` criteria, they can immediately proceed through multiple phases. You can prevent this by increasing the `min_age` or setting `index.lifecycle.origination_date` to change how the index age is calculated.\n+Force merge is a resource-intensive operation. If too many force merges are triggered at once, it can negatively impact your cluster. For example, this can happen when you \n+* modify an existing {{ilm-init}} policy's phase `min_age` such that indices trigger into force-merging phase faster.\n+* apply an {{ilm-init}} policy that includes a force merge action to existing indices. If the indices meet the `min_age` criteria, they can immediately proceed through multiple actions. You can prevent this by increasing the `min_age` or setting `index.lifecycle.origination_date` to change how the index age is calculated.\n+* run the [{{ilm-init}} Move Step API](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-ilm-move-to-step) against multiple indices.\n \n-If you experience a force merge task queue backlog, you might need to increase the size of the force merge threadpool so indices can be force merged in parallel. To do this, configure the `thread_pool.force_merge.size` [cluster setting](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-cluster-get-settings).\n+If you experience a force merge task queue backlog, you might need to increase the size of the force merge threadpool so indices can be force merged in parallel. To do this, configure the `thread_pool.force_merge.size` [cluster setting](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-cluster-get-settings). \n \n ::::{important}\n-This can have cascading performance impacts. Monitor cluster performance and increment the size of the thread pool slowly to reduce the backlog.\n+Note that `thread_pool.force_merge.size` is an advanced setting. Adjusting it can cause cascading performance impacts. Monitor cluster performance and increment the size of the thread pool slowly to reduce the backlog.\n ::::\n \n+Force merging will be performed by the node hosting the shard. The [node's role](docs-content://deploy-manage/distributed-architecture/clusters-nodes-shards/node-roles.md#set-node-roles) frequently matches the [data tier](docs-content://manage-data/lifecycle/data-tiers.md) of the {{ilm-init}}'s phase of the index, you may choose to adjust this behavior. For example: ", - "comment_created_at": "2025-07-02T17:05:41+00:00", - "comment_author": "stefnestor", - "comment_body": "David usually fixes them for me as I'm also getting up to speed on the new system \ud83d\ude4f ", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2157622079", - "pr_number": 129704, - "pr_file": "docs/reference/elasticsearch/index-settings/serverless.md", - "created_at": "2025-06-19T20:25:23+00:00", - "commented_code": "---\nnavigation_title: Serverless index settings\napplies_to:\n serverless: all\n---\n\n# Index settings available in {{serverless-full}} projects\n\nThis page lists the {{es}} index settings available in {{serverless-full}} projects.\n\n### General settings\n\n* [`index.codec`](./index-modules.md#index-codec)\n* [`index.default_pipeline`](./index-modules.md#index-default-pipeline) \n* [`index.dense_vector.hnsw_filter_heuristic`](./index-modules.md#index-dense-vector-hnsw-filter-heuristic)\n* [`index.final_pipeline`](./index-modules.md#index-final-pipeline) \n* [`index.hidden`](./index-modules.md#index-hidden)\n* [`index.mode`](./index-modules.md#index-mode-setting)\n* [`index.query.default_field`](./index-modules.md#index-query-default-field) \n* [`index.refresh_interval`](./index-modules.md#index-refresh-interval-setting) \n\n### Index sorting settings\n\n* [`index.sort.field`](./sorting.md#index-sort-field)\n* [`index.sort.missing`](./sorting.md#index-sort-missing)\n* [`index.sort.mode`](./sorting.md#index-sort-mode)\n* [`index.sort.order`](./sorting.md#index-sort-order)\n\n### Index blocks settings\n\n* [`index.blocks.read_only`](./index-block.md#index-blocks-read-only) \n* [`index.blocks.read`](./index-block.md#index-blocks-read) \n* [`index.blocks.write`](./index-block.md#index-blocks-write) \n* [`index.blocks.metadata`](./index-block.md#index-blocks-metadata) \n\n### Field and mappings related settings\n\n* [`index.mapping.coerce`](../mapping-reference/coerce.md#coerce-setting)\n* [`index.mapping.ignore_above`](../mapping-reference/index-mapping-ignore-above.md)\n* [`index.mapping.ignore_malformed`](../mapping-reference/ignore-malformed.md#ignore-malformed-setting)\n* [`index.mapping.source.mode`](../mapping-reference/mapping-source-field.md)\n* [`index.mapping.synthetic_source_keep`](../mapping-reference/mapping-source-field.md)\n* [`index.mapping.total_fields.limit`](./mapping-limit.md#total-fields-limit)\n* [`index.mapping.total_fields.ignore_dynamic_beyond_limit`](./mapping-limit.md#ignore-dynamic-beyond-limit)\n\n### ILM and Data Stream settings", - "repo_full_name": "elastic/elasticsearch", - "discussion_comments": [ - { - "comment_id": "2157622079", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 129704, - "pr_file": "docs/reference/elasticsearch/index-settings/serverless.md", - "discussion_id": "2157622079", - "commented_code": "@@ -0,0 +1,61 @@\n+---\n+navigation_title: Serverless index settings\n+applies_to:\n+ serverless: all\n+---\n+\n+# Index settings available in {{serverless-full}} projects\n+\n+This page lists the {{es}} index settings available in {{serverless-full}} projects.\n+\n+### General settings\n+\n+* [`index.codec`](./index-modules.md#index-codec)\n+* [`index.default_pipeline`](./index-modules.md#index-default-pipeline) \n+* [`index.dense_vector.hnsw_filter_heuristic`](./index-modules.md#index-dense-vector-hnsw-filter-heuristic)\n+* [`index.final_pipeline`](./index-modules.md#index-final-pipeline) \n+* [`index.hidden`](./index-modules.md#index-hidden)\n+* [`index.mode`](./index-modules.md#index-mode-setting)\n+* [`index.query.default_field`](./index-modules.md#index-query-default-field) \n+* [`index.refresh_interval`](./index-modules.md#index-refresh-interval-setting) \n+\n+### Index sorting settings\n+\n+* [`index.sort.field`](./sorting.md#index-sort-field)\n+* [`index.sort.missing`](./sorting.md#index-sort-missing)\n+* [`index.sort.mode`](./sorting.md#index-sort-mode)\n+* [`index.sort.order`](./sorting.md#index-sort-order)\n+\n+### Index blocks settings\n+\n+* [`index.blocks.read_only`](./index-block.md#index-blocks-read-only) \n+* [`index.blocks.read`](./index-block.md#index-blocks-read) \n+* [`index.blocks.write`](./index-block.md#index-blocks-write) \n+* [`index.blocks.metadata`](./index-block.md#index-blocks-metadata) \n+\n+### Field and mappings related settings\n+\n+* [`index.mapping.coerce`](../mapping-reference/coerce.md#coerce-setting)\n+* [`index.mapping.ignore_above`](../mapping-reference/index-mapping-ignore-above.md)\n+* [`index.mapping.ignore_malformed`](../mapping-reference/ignore-malformed.md#ignore-malformed-setting)\n+* [`index.mapping.source.mode`](../mapping-reference/mapping-source-field.md)\n+* [`index.mapping.synthetic_source_keep`](../mapping-reference/mapping-source-field.md)\n+* [`index.mapping.total_fields.limit`](./mapping-limit.md#total-fields-limit)\n+* [`index.mapping.total_fields.ignore_dynamic_beyond_limit`](./mapping-limit.md#ignore-dynamic-beyond-limit)\n+\n+### ILM and Data Stream settings", - "comment_created_at": "2025-06-19T20:25:23+00:00", - "comment_author": "shainaraskas", - "comment_body": "ILM is not available in serverless so we don't have to say it here\r\n\r\ndata streams =/= data stream lifecycle\r\n\r\n```suggestion\r\n### Data stream lifecycle settings\r\n```", - "pr_file_module": null - }, - { - "comment_id": "2158597779", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 129704, - "pr_file": "docs/reference/elasticsearch/index-settings/serverless.md", - "discussion_id": "2157622079", - "commented_code": "@@ -0,0 +1,61 @@\n+---\n+navigation_title: Serverless index settings\n+applies_to:\n+ serverless: all\n+---\n+\n+# Index settings available in {{serverless-full}} projects\n+\n+This page lists the {{es}} index settings available in {{serverless-full}} projects.\n+\n+### General settings\n+\n+* [`index.codec`](./index-modules.md#index-codec)\n+* [`index.default_pipeline`](./index-modules.md#index-default-pipeline) \n+* [`index.dense_vector.hnsw_filter_heuristic`](./index-modules.md#index-dense-vector-hnsw-filter-heuristic)\n+* [`index.final_pipeline`](./index-modules.md#index-final-pipeline) \n+* [`index.hidden`](./index-modules.md#index-hidden)\n+* [`index.mode`](./index-modules.md#index-mode-setting)\n+* [`index.query.default_field`](./index-modules.md#index-query-default-field) \n+* [`index.refresh_interval`](./index-modules.md#index-refresh-interval-setting) \n+\n+### Index sorting settings\n+\n+* [`index.sort.field`](./sorting.md#index-sort-field)\n+* [`index.sort.missing`](./sorting.md#index-sort-missing)\n+* [`index.sort.mode`](./sorting.md#index-sort-mode)\n+* [`index.sort.order`](./sorting.md#index-sort-order)\n+\n+### Index blocks settings\n+\n+* [`index.blocks.read_only`](./index-block.md#index-blocks-read-only) \n+* [`index.blocks.read`](./index-block.md#index-blocks-read) \n+* [`index.blocks.write`](./index-block.md#index-blocks-write) \n+* [`index.blocks.metadata`](./index-block.md#index-blocks-metadata) \n+\n+### Field and mappings related settings\n+\n+* [`index.mapping.coerce`](../mapping-reference/coerce.md#coerce-setting)\n+* [`index.mapping.ignore_above`](../mapping-reference/index-mapping-ignore-above.md)\n+* [`index.mapping.ignore_malformed`](../mapping-reference/ignore-malformed.md#ignore-malformed-setting)\n+* [`index.mapping.source.mode`](../mapping-reference/mapping-source-field.md)\n+* [`index.mapping.synthetic_source_keep`](../mapping-reference/mapping-source-field.md)\n+* [`index.mapping.total_fields.limit`](./mapping-limit.md#total-fields-limit)\n+* [`index.mapping.total_fields.ignore_dynamic_beyond_limit`](./mapping-limit.md#ignore-dynamic-beyond-limit)\n+\n+### ILM and Data Stream settings", - "comment_created_at": "2025-06-20T10:21:41+00:00", - "comment_author": "eedugon", - "comment_body": "great catch", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/elasticsearch-complete-api-parameter-documentation.json b/_reviewers/elasticsearch-complete-api-parameter-documentation.json new file mode 100644 index 0000000..d7d4cae --- /dev/null +++ b/_reviewers/elasticsearch-complete-api-parameter-documentation.json @@ -0,0 +1,46 @@ +[ + { + "discussion_id": "1576203682", + "pr_number": 107539, + "pr_file": "docs/reference/connector/apis/list-connectors-api.asciidoc", + "created_at": "2024-04-23T12:53:00+00:00", + "commented_code": "[[list-connector-api-prereq]]\n==== {api-prereq-title}\n\n* To sync data using connectors, it's essential to have the Elastic connectors service running.\n* To sync data using self-managed connectors, it's required to have the Elastic connectors service running.\n\n[[list-connector-api-path-params]]\n==== {api-path-parms-title}\n\n`size`::\n(Optional, integer) Maximum number of results to retrieve.\n// Is there a default?\n\n`from`::\n(Optional, integer) The offset from the first result to fetch.\n\n`index_name`::\n(Optional, string) A comma-separated list of data index names associated with connectors, used to filter search results.\n(Optional, string) A comma-separated list of index names associated with connectors, used to filter search results.\n// What is the maximum number of index names that can be specified?\n// Does this have to be an exact match or are regexes also accepted?\n// \"data index\" - I don't believe we refer to indices as data indices anywhere else\n\n`connector_name`::\n(Optional, string) A comma-separated list of connector names, used to filter search results.\n// What is the maximum number of names that can be specified?\n// Does this have to be an exact match or are regexes also accepted?\n// If we accept any name as connector name, what encoding is accepted as parameter? e.g. how would a request for a connector named \"My test connector\" look like?", + "repo_full_name": "elastic/elasticsearch", + "discussion_comments": [ + { + "comment_id": "1576203682", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 107539, + "pr_file": "docs/reference/connector/apis/list-connectors-api.asciidoc", + "discussion_id": "1576203682", + "commented_code": "@@ -18,25 +18,33 @@ Returns information about all stored connectors.\n [[list-connector-api-prereq]]\n ==== {api-prereq-title}\n \n-* To sync data using connectors, it's essential to have the Elastic connectors service running.\n+* To sync data using self-managed connectors, it's required to have the Elastic connectors service running.\n \n [[list-connector-api-path-params]]\n ==== {api-path-parms-title}\n \n `size`::\n (Optional, integer) Maximum number of results to retrieve.\n+// Is there a default?\n \n `from`::\n (Optional, integer) The offset from the first result to fetch.\n \n `index_name`::\n-(Optional, string) A comma-separated list of data index names associated with connectors, used to filter search results.\n+(Optional, string) A comma-separated list of index names associated with connectors, used to filter search results.\n+// What is the maximum number of index names that can be specified?\n+// Does this have to be an exact match or are regexes also accepted?\n+// \"data index\" - I don't believe we refer to indices as data indices anywhere else\n \n `connector_name`::\n (Optional, string) A comma-separated list of connector names, used to filter search results.\n+// What is the maximum number of names that can be specified?\n+// Does this have to be an exact match or are regexes also accepted?\n+// If we accept any name as connector name, what encoding is accepted as parameter? e.g. how would a request for a connector named \"My test connector\" look like?", + "comment_created_at": "2024-04-23T12:53:00+00:00", + "comment_author": "jedrazb", + "comment_body": "as this is a URL param it should be encoded as `My%20test%20connector` - I don't think we should explain how URL encoding works here", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1567516376", + "pr_number": 107344, + "pr_file": "docs/reference/connector/apis/create-connector-api.asciidoc", + "created_at": "2024-04-16T14:59:35+00:00", + "commented_code": "`index_name`::\n(Required, string) The target index for syncing data by the connector.\n\n// making this required does not align with the BYOI for connectors. I believe we discussed we're making this optional. \nIn Kibana, attaching an index also creates the index, if we specify an index here, will it be created? When? \n`name`::\n(Optional, string) The name of the connector.\n// having an optional name Killed my Kibana experience. Without an index or a name, there is no way for me to go to connector Overview.", + "repo_full_name": "elastic/elasticsearch", + "discussion_comments": [ + { + "comment_id": "1567516376", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 107344, + "pr_file": "docs/reference/connector/apis/create-connector-api.asciidoc", + "discussion_id": "1567516376", + "commented_code": "@@ -66,19 +68,22 @@ Creates a connector document in the internal index and initializes its configura\n \n `index_name`::\n (Required, string) The target index for syncing data by the connector.\n-\n+// making this required does not align with the BYOI for connectors. I believe we discussed we're making this optional. \n+In Kibana, attaching an index also creates the index, if we specify an index here, will it be created? When? \n `name`::\n (Optional, string) The name of the connector.\n+// having an optional name Killed my Kibana experience. Without an index or a name, there is no way for me to go to connector Overview.", + "comment_created_at": "2024-04-16T14:59:35+00:00", + "comment_author": "danajuratoni", + "comment_body": "Connector name comes with a set of validations in Kibana. What is the proposal for the next steps: relaxing Kibana validations or adding validations to the API?", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/elasticsearch-complete-api-parameter-documentation.md b/_reviewers/elasticsearch-complete-api-parameter-documentation.md index f1eb1ec..8cde0ac 100644 --- a/_reviewers/elasticsearch-complete-api-parameter-documentation.md +++ b/_reviewers/elasticsearch-complete-api-parameter-documentation.md @@ -36,51 +36,3 @@ Validation: Names must follow the same validation rules as defined in the creati ``` When updating APIs, ensure that parameter behavior aligns with UI expectations and vice versa to prevent disconnects between API functionality and user experience. - - -[ - { - "discussion_id": "1576203682", - "pr_number": 107539, - "pr_file": "docs/reference/connector/apis/list-connectors-api.asciidoc", - "created_at": "2024-04-23T12:53:00+00:00", - "commented_code": "[[list-connector-api-prereq]]\n==== {api-prereq-title}\n\n* To sync data using connectors, it's essential to have the Elastic connectors service running.\n* To sync data using self-managed connectors, it's required to have the Elastic connectors service running.\n\n[[list-connector-api-path-params]]\n==== {api-path-parms-title}\n\n`size`::\n(Optional, integer) Maximum number of results to retrieve.\n// Is there a default?\n\n`from`::\n(Optional, integer) The offset from the first result to fetch.\n\n`index_name`::\n(Optional, string) A comma-separated list of data index names associated with connectors, used to filter search results.\n(Optional, string) A comma-separated list of index names associated with connectors, used to filter search results.\n// What is the maximum number of index names that can be specified?\n// Does this have to be an exact match or are regexes also accepted?\n// \"data index\" - I don't believe we refer to indices as data indices anywhere else\n\n`connector_name`::\n(Optional, string) A comma-separated list of connector names, used to filter search results.\n// What is the maximum number of names that can be specified?\n// Does this have to be an exact match or are regexes also accepted?\n// If we accept any name as connector name, what encoding is accepted as parameter? e.g. how would a request for a connector named \"My test connector\" look like?", - "repo_full_name": "elastic/elasticsearch", - "discussion_comments": [ - { - "comment_id": "1576203682", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 107539, - "pr_file": "docs/reference/connector/apis/list-connectors-api.asciidoc", - "discussion_id": "1576203682", - "commented_code": "@@ -18,25 +18,33 @@ Returns information about all stored connectors.\n [[list-connector-api-prereq]]\n ==== {api-prereq-title}\n \n-* To sync data using connectors, it's essential to have the Elastic connectors service running.\n+* To sync data using self-managed connectors, it's required to have the Elastic connectors service running.\n \n [[list-connector-api-path-params]]\n ==== {api-path-parms-title}\n \n `size`::\n (Optional, integer) Maximum number of results to retrieve.\n+// Is there a default?\n \n `from`::\n (Optional, integer) The offset from the first result to fetch.\n \n `index_name`::\n-(Optional, string) A comma-separated list of data index names associated with connectors, used to filter search results.\n+(Optional, string) A comma-separated list of index names associated with connectors, used to filter search results.\n+// What is the maximum number of index names that can be specified?\n+// Does this have to be an exact match or are regexes also accepted?\n+// \"data index\" - I don't believe we refer to indices as data indices anywhere else\n \n `connector_name`::\n (Optional, string) A comma-separated list of connector names, used to filter search results.\n+// What is the maximum number of names that can be specified?\n+// Does this have to be an exact match or are regexes also accepted?\n+// If we accept any name as connector name, what encoding is accepted as parameter? e.g. how would a request for a connector named \"My test connector\" look like?", - "comment_created_at": "2024-04-23T12:53:00+00:00", - "comment_author": "jedrazb", - "comment_body": "as this is a URL param it should be encoded as `My%20test%20connector` - I don't think we should explain how URL encoding works here", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1567516376", - "pr_number": 107344, - "pr_file": "docs/reference/connector/apis/create-connector-api.asciidoc", - "created_at": "2024-04-16T14:59:35+00:00", - "commented_code": "`index_name`::\n(Required, string) The target index for syncing data by the connector.\n\n// making this required does not align with the BYOI for connectors. I believe we discussed we're making this optional. \nIn Kibana, attaching an index also creates the index, if we specify an index here, will it be created? When? \n`name`::\n(Optional, string) The name of the connector.\n// having an optional name Killed my Kibana experience. Without an index or a name, there is no way for me to go to connector Overview.", - "repo_full_name": "elastic/elasticsearch", - "discussion_comments": [ - { - "comment_id": "1567516376", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 107344, - "pr_file": "docs/reference/connector/apis/create-connector-api.asciidoc", - "discussion_id": "1567516376", - "commented_code": "@@ -66,19 +68,22 @@ Creates a connector document in the internal index and initializes its configura\n \n `index_name`::\n (Required, string) The target index for syncing data by the connector.\n-\n+// making this required does not align with the BYOI for connectors. I believe we discussed we're making this optional. \n+In Kibana, attaching an index also creates the index, if we specify an index here, will it be created? When? \n `name`::\n (Optional, string) The name of the connector.\n+// having an optional name Killed my Kibana experience. Without an index or a name, there is no way for me to go to connector Overview.", - "comment_created_at": "2024-04-16T14:59:35+00:00", - "comment_author": "danajuratoni", - "comment_body": "Connector name comes with a set of validations in Kibana. What is the proposal for the next steps: relaxing Kibana validations or adding validations to the API?", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/elasticsearch-configure-type-serialization.json b/_reviewers/elasticsearch-configure-type-serialization.json new file mode 100644 index 0000000..e580ed6 --- /dev/null +++ b/_reviewers/elasticsearch-configure-type-serialization.json @@ -0,0 +1,174 @@ +[ + { + "discussion_id": "2154405635", + "pr_number": 129492, + "pr_file": "docs/reference/search-connectors/es-connectors-mongodb.md", + "created_at": "2025-06-18T11:52:39+00:00", + "commented_code": "See [Known issues](/release-notes/known-issues.md) for any issues affecting all connectors.\n\n#### UUIDs are not correctly deserialised causing problems with ingesting documents into Elasticsearch\n\nMongoDB has special handling of UUID type: there is a legacy and a modern approach. You can read [official docs](https://pymongo.readthedocs.io/en/stable/examples/uuid.html) about the details.\n\nWith 8.18.3 better handling of standard UUID representation has been implemented - now MongoDB connector is able to properly deserialise them into valid UUIDs. However, for legacy UUIDs or older versions of the connector you might need to adjust the connection string to specify the UUID representation.", + "repo_full_name": "elastic/elasticsearch", + "discussion_comments": [ + { + "comment_id": "2154405635", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 129492, + "pr_file": "docs/reference/search-connectors/es-connectors-mongodb.md", + "discussion_id": "2154405635", + "commented_code": "@@ -251,6 +251,15 @@ A bug introduced in **8.12.0** causes the Connectors docker image to error out i\n \n See [Known issues](/release-notes/known-issues.md) for any issues affecting all connectors.\n \n+#### UUIDs are not correctly deserialised causing problems with ingesting documents into Elasticsearch\n+\n+MongoDB has special handling of UUID type: there is a legacy and a modern approach. You can read [official docs](https://pymongo.readthedocs.io/en/stable/examples/uuid.html) about the details.\n+\n+With 8.18.3 better handling of standard UUID representation has been implemented - now MongoDB connector is able to properly deserialise them into valid UUIDs. However, for legacy UUIDs or older versions of the connector you might need to adjust the connection string to specify the UUID representation.", + "comment_created_at": "2025-06-18T11:52:39+00:00", + "comment_author": "charlotte-hoblik", + "comment_body": "```suggestion\r\nWith connector framework version 9.0.3, we improved how standard UUIDs are handled. Now, the MongoDB connector can correctly deserialize UUIDs into valid Elasticsearch values. However, for legacy UUIDs or older connector versions, you might need to adjust the connection string to specify the UUID representation.\r\n```", + "pr_file_module": null + }, + { + "comment_id": "2154645835", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 129492, + "pr_file": "docs/reference/search-connectors/es-connectors-mongodb.md", + "discussion_id": "2154405635", + "commented_code": "@@ -251,6 +251,15 @@ A bug introduced in **8.12.0** causes the Connectors docker image to error out i\n \n See [Known issues](/release-notes/known-issues.md) for any issues affecting all connectors.\n \n+#### UUIDs are not correctly deserialised causing problems with ingesting documents into Elasticsearch\n+\n+MongoDB has special handling of UUID type: there is a legacy and a modern approach. You can read [official docs](https://pymongo.readthedocs.io/en/stable/examples/uuid.html) about the details.\n+\n+With 8.18.3 better handling of standard UUID representation has been implemented - now MongoDB connector is able to properly deserialise them into valid UUIDs. However, for legacy UUIDs or older versions of the connector you might need to adjust the connection string to specify the UUID representation.", + "comment_created_at": "2025-06-18T13:41:10+00:00", + "comment_author": "artem-shelkovnikov", + "comment_body": "Currently the change has been backported to 8.18 branch which should make it available in 8.18.3 release.", + "pr_file_module": null + }, + { + "comment_id": "2154655180", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 129492, + "pr_file": "docs/reference/search-connectors/es-connectors-mongodb.md", + "discussion_id": "2154405635", + "commented_code": "@@ -251,6 +251,15 @@ A bug introduced in **8.12.0** causes the Connectors docker image to error out i\n \n See [Known issues](/release-notes/known-issues.md) for any issues affecting all connectors.\n \n+#### UUIDs are not correctly deserialised causing problems with ingesting documents into Elasticsearch\n+\n+MongoDB has special handling of UUID type: there is a legacy and a modern approach. You can read [official docs](https://pymongo.readthedocs.io/en/stable/examples/uuid.html) about the details.\n+\n+With 8.18.3 better handling of standard UUID representation has been implemented - now MongoDB connector is able to properly deserialise them into valid UUIDs. However, for legacy UUIDs or older versions of the connector you might need to adjust the connection string to specify the UUID representation.", + "comment_created_at": "2025-06-18T13:45:04+00:00", + "comment_author": "charlotte-hoblik", + "comment_body": "You’re absolutely right, the change has been backported to 8.18.3. That said, due to a documentation system shift between the 8.x and 9.x, we’re currently maintaining this doc version against the 9.x branches. We’re still adding 8.18.3 info to the 8.x docs where relevant, so it’s covered in both places.", + "pr_file_module": null + }, + { + "comment_id": "2154658977", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 129492, + "pr_file": "docs/reference/search-connectors/es-connectors-mongodb.md", + "discussion_id": "2154405635", + "commented_code": "@@ -251,6 +251,15 @@ A bug introduced in **8.12.0** causes the Connectors docker image to error out i\n \n See [Known issues](/release-notes/known-issues.md) for any issues affecting all connectors.\n \n+#### UUIDs are not correctly deserialised causing problems with ingesting documents into Elasticsearch\n+\n+MongoDB has special handling of UUID type: there is a legacy and a modern approach. You can read [official docs](https://pymongo.readthedocs.io/en/stable/examples/uuid.html) about the details.\n+\n+With 8.18.3 better handling of standard UUID representation has been implemented - now MongoDB connector is able to properly deserialise them into valid UUIDs. However, for legacy UUIDs or older versions of the connector you might need to adjust the connection string to specify the UUID representation.", + "comment_created_at": "2025-06-18T13:46:41+00:00", + "comment_author": "artem-shelkovnikov", + "comment_body": "Ah sorry, I got confused by it - thank you for the suggestions, accepted all!", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2154429939", + "pr_number": 129492, + "pr_file": "docs/reference/search-connectors/es-connectors-mongodb.md", + "created_at": "2025-06-18T12:06:08+00:00", + "commented_code": "See [Known issues](/release-notes/known-issues.md) for any issues affecting all connectors.\n\n#### UUIDs are not correctly deserialised causing problems with ingesting documents into Elasticsearch\n\nMongoDB has special handling of UUID type: there is a legacy and a modern approach. You can read [official docs](https://pymongo.readthedocs.io/en/stable/examples/uuid.html) about the details.\n\nWith 8.18.3 better handling of standard UUID representation has been implemented - now MongoDB connector is able to properly deserialise them into valid UUIDs. However, for legacy UUIDs or older versions of the connector you might need to adjust the connection string to specify the UUID representation.\n\nFor example, if you are using modern UUID representation, adding `uuidRepresentation=standard` query parameter into the URL in the `host` Rich Configurable Field will make the connector properly handle UUIDs. With this change the full `host` Rich Configurable Field value could look like this: `mongodb+srv://my_username:my_password@cluster0.mongodb.net/mydb?w=majority&uuidRepresentation=standard`", + "repo_full_name": "elastic/elasticsearch", + "discussion_comments": [ + { + "comment_id": "2154429939", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 129492, + "pr_file": "docs/reference/search-connectors/es-connectors-mongodb.md", + "discussion_id": "2154429939", + "commented_code": "@@ -251,6 +251,15 @@ A bug introduced in **8.12.0** causes the Connectors docker image to error out i\n \n See [Known issues](/release-notes/known-issues.md) for any issues affecting all connectors.\n \n+#### UUIDs are not correctly deserialised causing problems with ingesting documents into Elasticsearch\n+\n+MongoDB has special handling of UUID type: there is a legacy and a modern approach. You can read [official docs](https://pymongo.readthedocs.io/en/stable/examples/uuid.html) about the details.\n+\n+With 8.18.3 better handling of standard UUID representation has been implemented - now MongoDB connector is able to properly deserialise them into valid UUIDs. However, for legacy UUIDs or older versions of the connector you might need to adjust the connection string to specify the UUID representation.\n+\n+For example, if you are using modern UUID representation, adding `uuidRepresentation=standard` query parameter into the URL in the `host` Rich Configurable Field will make the connector properly handle UUIDs. With this change the full `host` Rich Configurable Field value could look like this: `mongodb+srv://my_username:my_password@cluster0.mongodb.net/mydb?w=majority&uuidRepresentation=standard`", + "comment_created_at": "2025-06-18T12:06:08+00:00", + "comment_author": "charlotte-hoblik", + "comment_body": "```suggestion\r\nFor example, if you are using the modern UUID representation, adding the `uuidRepresentation=standard` query parameter to the MongoDB connection URI in the `host` Rich Configurable Field will allow the connector to properly handle UUIDs. With this change, the full `host` Rich Configurable Field value could look like this:`mongodb+srv://my_username:my_password@cluster0.mongodb.net/mydb?w=majority&uuidRepresentation=standard`\r\n```\r\n```", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2154435770", + "pr_number": 129492, + "pr_file": "docs/reference/search-connectors/es-connectors-mongodb.md", + "created_at": "2025-06-18T12:09:22+00:00", + "commented_code": "See [Known issues](/release-notes/known-issues.md) for any issues affecting all connectors.\n\n#### UUIDs are not correctly deserialised causing problems with ingesting documents into Elasticsearch\n\nMongoDB has special handling of UUID type: there is a legacy and a modern approach. You can read [official docs](https://pymongo.readthedocs.io/en/stable/examples/uuid.html) about the details.\n\nWith 8.18.3 better handling of standard UUID representation has been implemented - now MongoDB connector is able to properly deserialise them into valid UUIDs. However, for legacy UUIDs or older versions of the connector you might need to adjust the connection string to specify the UUID representation.\n\nFor example, if you are using modern UUID representation, adding `uuidRepresentation=standard` query parameter into the URL in the `host` Rich Configurable Field will make the connector properly handle UUIDs. With this change the full `host` Rich Configurable Field value could look like this: `mongodb+srv://my_username:my_password@cluster0.mongodb.net/mydb?w=majority&uuidRepresentation=standard`\n\nIf you are using, for example, legacy C# representation of UUIDs, then you should add `uuidRepresentation=csharpLegacy`, for Java it'll be `uuidRepresentation=javaLegacy` and for Python it'll be `uuidRepresentation=pythonLegacy`. Full explanation can be found in [official docs](https://pymongo.readthedocs.io/en/stable/examples/uuid.html#configuring-a-uuid-representation).", + "repo_full_name": "elastic/elasticsearch", + "discussion_comments": [ + { + "comment_id": "2154435770", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 129492, + "pr_file": "docs/reference/search-connectors/es-connectors-mongodb.md", + "discussion_id": "2154435770", + "commented_code": "@@ -251,6 +251,15 @@ A bug introduced in **8.12.0** causes the Connectors docker image to error out i\n \n See [Known issues](/release-notes/known-issues.md) for any issues affecting all connectors.\n \n+#### UUIDs are not correctly deserialised causing problems with ingesting documents into Elasticsearch\n+\n+MongoDB has special handling of UUID type: there is a legacy and a modern approach. You can read [official docs](https://pymongo.readthedocs.io/en/stable/examples/uuid.html) about the details.\n+\n+With 8.18.3 better handling of standard UUID representation has been implemented - now MongoDB connector is able to properly deserialise them into valid UUIDs. However, for legacy UUIDs or older versions of the connector you might need to adjust the connection string to specify the UUID representation.\n+\n+For example, if you are using modern UUID representation, adding `uuidRepresentation=standard` query parameter into the URL in the `host` Rich Configurable Field will make the connector properly handle UUIDs. With this change the full `host` Rich Configurable Field value could look like this: `mongodb+srv://my_username:my_password@cluster0.mongodb.net/mydb?w=majority&uuidRepresentation=standard`\n+\n+If you are using, for example, legacy C# representation of UUIDs, then you should add `uuidRepresentation=csharpLegacy`, for Java it'll be `uuidRepresentation=javaLegacy` and for Python it'll be `uuidRepresentation=pythonLegacy`. Full explanation can be found in [official docs](https://pymongo.readthedocs.io/en/stable/examples/uuid.html#configuring-a-uuid-representation).", + "comment_created_at": "2025-06-18T12:09:22+00:00", + "comment_author": "charlotte-hoblik", + "comment_body": "```suggestion\r\nIf you’re using a legacy UUID representation, you should adjust the connection URI accordingly. For example:\r\n\r\n- C#: `uuidRepresentation=csharpLegacy`\r\n- Java: `uuidRepresentation=javaLegacy`\r\n- Python: `uuidRepresentation=pythonLegacy`\r\n\r\nYou can find a full explanation in the [official docs](https://pymongo.readthedocs.io/en/stable/examples/uuid.html#configuring-a-uuid-representation).\r\n```", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2180393094", + "pr_number": 130410, + "pr_file": "docs/reference/query-languages/esql/esql-lookup-join.md", + "created_at": "2025-07-02T15:42:40+00:00", + "commented_code": "* `short` and `byte` are compatible with `integer` (all represented as `int`)\n * `float`, `half_float`, and `scaled_float` are compatible with `double` (all represented as `double`)\n * For text fields: You can only use text fields as the join key on the left-hand side of the join and only if they have a `.keyword` subfield\n * `DATE` and `DATE_NANOS` can only be joined against the exact same type.\n\nTo obtain a join key with a compatible type, use a [conversion function](/reference/query-languages/esql/functions-operators/type-conversion-functions.md) if needed.\n\nFor a complete list of supported data types and their internal representations, see the [Supported Field Types documentation](/reference/query-languages/esql/limitations.md#_supported_types).\nThe list of unsupported fields includes all types not supported by {{esql}} as described in the [Unsupported Field Types documentation](/reference/query-languages/esql/limitations.md#_unsupported_types).", + "repo_full_name": "elastic/elasticsearch", + "discussion_comments": [ + { + "comment_id": "2180393094", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 130410, + "pr_file": "docs/reference/query-languages/esql/esql-lookup-join.md", + "discussion_id": "2180393094", + "commented_code": "@@ -151,10 +151,15 @@ To use `LOOKUP JOIN`, the following requirements must be met:\n * `short` and `byte` are compatible with `integer` (all represented as `int`)\n * `float`, `half_float`, and `scaled_float` are compatible with `double` (all represented as `double`)\n * For text fields: You can only use text fields as the join key on the left-hand side of the join and only if they have a `.keyword` subfield\n+ * `DATE` and `DATE_NANOS` can only be joined against the exact same type.\n \n To obtain a join key with a compatible type, use a [conversion function](/reference/query-languages/esql/functions-operators/type-conversion-functions.md) if needed.\n \n-For a complete list of supported data types and their internal representations, see the [Supported Field Types documentation](/reference/query-languages/esql/limitations.md#_supported_types).\n+The list of unsupported fields includes all types not supported by {{esql}} as described in the [Unsupported Field Types documentation](/reference/query-languages/esql/limitations.md#_unsupported_types).", + "comment_created_at": "2025-07-02T15:42:40+00:00", + "comment_author": "leemthompo", + "comment_body": "@craigtaverner I think this section is a little messy now, and technically this should be all part of the bulleted list\r\n\r\nI suggest we revamp the **Prerequisites** section and use subheadings, and instead of prose/list items for the supported/unsupported types, perhaps a table would be a good alternative?\r\n\r\n", + "pr_file_module": null + }, + { + "comment_id": "2180396635", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 130410, + "pr_file": "docs/reference/query-languages/esql/esql-lookup-join.md", + "discussion_id": "2180393094", + "commented_code": "@@ -151,10 +151,15 @@ To use `LOOKUP JOIN`, the following requirements must be met:\n * `short` and `byte` are compatible with `integer` (all represented as `int`)\n * `float`, `half_float`, and `scaled_float` are compatible with `double` (all represented as `double`)\n * For text fields: You can only use text fields as the join key on the left-hand side of the join and only if they have a `.keyword` subfield\n+ * `DATE` and `DATE_NANOS` can only be joined against the exact same type.\n \n To obtain a join key with a compatible type, use a [conversion function](/reference/query-languages/esql/functions-operators/type-conversion-functions.md) if needed.\n \n-For a complete list of supported data types and their internal representations, see the [Supported Field Types documentation](/reference/query-languages/esql/limitations.md#_supported_types).\n+The list of unsupported fields includes all types not supported by {{esql}} as described in the [Unsupported Field Types documentation](/reference/query-languages/esql/limitations.md#_unsupported_types).", + "comment_created_at": "2025-07-02T15:44:36+00:00", + "comment_author": "leemthompo", + "comment_body": "Here's an approach, in docsv3 syntax:\r\n\r\n\r\n## Prerequisites\r\n\r\n### Index configuration\r\nIndices used for lookups must be configured with the [`lookup` index mode](/reference/elasticsearch/index-settings/index-modules.md#index-mode-setting).\r\n\r\n### Data type compatibility\r\nJoin keys must have compatible data types between the source and lookup indices. Types within the same compatibility group can be joined together:\r\n\r\n| Compatibility group | Types | Notes |\r\n|---------------------|-------|--------|\r\n| **Integer family** | `byte`, `short`, `integer` | All interchangeable |\r\n| **Float family** | `half_float`, `float`, `scaled_float`, `double` | All interchangeable |\r\n| **Keyword family** | `keyword`, `text.keyword` | Text fields only as join key on left-hand side and must have `.keyword` subfield |\r\n| **Date (Exact)** | `DATE` | Must match exactly |\r\n| **Date Nanos (Exact)** | `DATE_NANOS` | Must match exactly |\r\n| **Boolean** | `boolean` | Must match exactly |\r\n\r\n```{note}\r\nFor a complete list of all types supported in `LOOKUP JOIN`, refer to the [`LOOKUP JOIN` supported types table](/reference/query-languages/esql/commands/processing-commands.md#esql-lookup-join).\r\n```\r\n\r\n```{tip}\r\nTo obtain a join key with a compatible type, use a [conversion function](/reference/query-languages/esql/functions-operators/type-conversion-functions.md) if needed.\r\n```\r\n\r\n### Unsupported Types\r\nIn addition to the [{{esql}} unsupported field types](/reference/query-languages/esql/limitations.md#_unsupported_types), `LOOKUP JOIN` does not support:\r\n\r\n* `VERSION`\r\n* `UNSIGNED_LONG` \r\n* Spatial types like `GEO_POINT`, `GEO_SHAPE`\r\n* Temporal intervals like `DURATION`, `PERIOD`\r\n", + "pr_file_module": null + }, + { + "comment_id": "2180418260", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 130410, + "pr_file": "docs/reference/query-languages/esql/esql-lookup-join.md", + "discussion_id": "2180393094", + "commented_code": "@@ -151,10 +151,15 @@ To use `LOOKUP JOIN`, the following requirements must be met:\n * `short` and `byte` are compatible with `integer` (all represented as `int`)\n * `float`, `half_float`, and `scaled_float` are compatible with `double` (all represented as `double`)\n * For text fields: You can only use text fields as the join key on the left-hand side of the join and only if they have a `.keyword` subfield\n+ * `DATE` and `DATE_NANOS` can only be joined against the exact same type.\n \n To obtain a join key with a compatible type, use a [conversion function](/reference/query-languages/esql/functions-operators/type-conversion-functions.md) if needed.\n \n-For a complete list of supported data types and their internal representations, see the [Supported Field Types documentation](/reference/query-languages/esql/limitations.md#_supported_types).\n+The list of unsupported fields includes all types not supported by {{esql}} as described in the [Unsupported Field Types documentation](/reference/query-languages/esql/limitations.md#_unsupported_types).", + "comment_created_at": "2025-07-02T15:53:47+00:00", + "comment_author": "leemthompo", + "comment_body": "Or perhaps we can lose the overview table/list entirely and delegate to the new autogenerated table?", + "pr_file_module": null + }, + { + "comment_id": "2180435815", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 130410, + "pr_file": "docs/reference/query-languages/esql/esql-lookup-join.md", + "discussion_id": "2180393094", + "commented_code": "@@ -151,10 +151,15 @@ To use `LOOKUP JOIN`, the following requirements must be met:\n * `short` and `byte` are compatible with `integer` (all represented as `int`)\n * `float`, `half_float`, and `scaled_float` are compatible with `double` (all represented as `double`)\n * For text fields: You can only use text fields as the join key on the left-hand side of the join and only if they have a `.keyword` subfield\n+ * `DATE` and `DATE_NANOS` can only be joined against the exact same type.\n \n To obtain a join key with a compatible type, use a [conversion function](/reference/query-languages/esql/functions-operators/type-conversion-functions.md) if needed.\n \n-For a complete list of supported data types and their internal representations, see the [Supported Field Types documentation](/reference/query-languages/esql/limitations.md#_supported_types).\n+The list of unsupported fields includes all types not supported by {{esql}} as described in the [Unsupported Field Types documentation](/reference/query-languages/esql/limitations.md#_unsupported_types).", + "comment_created_at": "2025-07-02T15:59:14+00:00", + "comment_author": "leemthompo", + "comment_body": "anyways just food for thought, I think the data type compatibility info should be a standalone section in any case, whether it's under prerequisites or not :)", + "pr_file_module": null + }, + { + "comment_id": "2180453560", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 130410, + "pr_file": "docs/reference/query-languages/esql/esql-lookup-join.md", + "discussion_id": "2180393094", + "commented_code": "@@ -151,10 +151,15 @@ To use `LOOKUP JOIN`, the following requirements must be met:\n * `short` and `byte` are compatible with `integer` (all represented as `int`)\n * `float`, `half_float`, and `scaled_float` are compatible with `double` (all represented as `double`)\n * For text fields: You can only use text fields as the join key on the left-hand side of the join and only if they have a `.keyword` subfield\n+ * `DATE` and `DATE_NANOS` can only be joined against the exact same type.\n \n To obtain a join key with a compatible type, use a [conversion function](/reference/query-languages/esql/functions-operators/type-conversion-functions.md) if needed.\n \n-For a complete list of supported data types and their internal representations, see the [Supported Field Types documentation](/reference/query-languages/esql/limitations.md#_supported_types).\n+The list of unsupported fields includes all types not supported by {{esql}} as described in the [Unsupported Field Types documentation](/reference/query-languages/esql/limitations.md#_unsupported_types).", + "comment_created_at": "2025-07-02T16:07:59+00:00", + "comment_author": "craigtaverner", + "comment_body": "I incorporated your changes. I agree it makes things look more similar with two tables. But I think users might find both places independently, so it is not too bad to have two ways. The generated one is more up-to-date. I did make one fix to your table, because we can join between integers and float now too.", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/elasticsearch-configure-type-serialization.md b/_reviewers/elasticsearch-configure-type-serialization.md index fb54660..bf5f3d1 100644 --- a/_reviewers/elasticsearch-configure-type-serialization.md +++ b/_reviewers/elasticsearch-configure-type-serialization.md @@ -32,179 +32,3 @@ When joining data across different sources, ensure join keys have compatible dat - Text/string types may require specific formatting or normalization Always document data type compatibility requirements in your schema design and provide conversion functions where needed to ensure consistent data handling across systems. - - -[ - { - "discussion_id": "2154405635", - "pr_number": 129492, - "pr_file": "docs/reference/search-connectors/es-connectors-mongodb.md", - "created_at": "2025-06-18T11:52:39+00:00", - "commented_code": "See [Known issues](/release-notes/known-issues.md) for any issues affecting all connectors.\n\n#### UUIDs are not correctly deserialised causing problems with ingesting documents into Elasticsearch\n\nMongoDB has special handling of UUID type: there is a legacy and a modern approach. You can read [official docs](https://pymongo.readthedocs.io/en/stable/examples/uuid.html) about the details.\n\nWith 8.18.3 better handling of standard UUID representation has been implemented - now MongoDB connector is able to properly deserialise them into valid UUIDs. However, for legacy UUIDs or older versions of the connector you might need to adjust the connection string to specify the UUID representation.", - "repo_full_name": "elastic/elasticsearch", - "discussion_comments": [ - { - "comment_id": "2154405635", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 129492, - "pr_file": "docs/reference/search-connectors/es-connectors-mongodb.md", - "discussion_id": "2154405635", - "commented_code": "@@ -251,6 +251,15 @@ A bug introduced in **8.12.0** causes the Connectors docker image to error out i\n \n See [Known issues](/release-notes/known-issues.md) for any issues affecting all connectors.\n \n+#### UUIDs are not correctly deserialised causing problems with ingesting documents into Elasticsearch\n+\n+MongoDB has special handling of UUID type: there is a legacy and a modern approach. You can read [official docs](https://pymongo.readthedocs.io/en/stable/examples/uuid.html) about the details.\n+\n+With 8.18.3 better handling of standard UUID representation has been implemented - now MongoDB connector is able to properly deserialise them into valid UUIDs. However, for legacy UUIDs or older versions of the connector you might need to adjust the connection string to specify the UUID representation.", - "comment_created_at": "2025-06-18T11:52:39+00:00", - "comment_author": "charlotte-hoblik", - "comment_body": "```suggestion\r\nWith connector framework version 9.0.3, we improved how standard UUIDs are handled. Now, the MongoDB connector can correctly deserialize UUIDs into valid Elasticsearch values. However, for legacy UUIDs or older connector versions, you might need to adjust the connection string to specify the UUID representation.\r\n```", - "pr_file_module": null - }, - { - "comment_id": "2154645835", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 129492, - "pr_file": "docs/reference/search-connectors/es-connectors-mongodb.md", - "discussion_id": "2154405635", - "commented_code": "@@ -251,6 +251,15 @@ A bug introduced in **8.12.0** causes the Connectors docker image to error out i\n \n See [Known issues](/release-notes/known-issues.md) for any issues affecting all connectors.\n \n+#### UUIDs are not correctly deserialised causing problems with ingesting documents into Elasticsearch\n+\n+MongoDB has special handling of UUID type: there is a legacy and a modern approach. You can read [official docs](https://pymongo.readthedocs.io/en/stable/examples/uuid.html) about the details.\n+\n+With 8.18.3 better handling of standard UUID representation has been implemented - now MongoDB connector is able to properly deserialise them into valid UUIDs. However, for legacy UUIDs or older versions of the connector you might need to adjust the connection string to specify the UUID representation.", - "comment_created_at": "2025-06-18T13:41:10+00:00", - "comment_author": "artem-shelkovnikov", - "comment_body": "Currently the change has been backported to 8.18 branch which should make it available in 8.18.3 release.", - "pr_file_module": null - }, - { - "comment_id": "2154655180", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 129492, - "pr_file": "docs/reference/search-connectors/es-connectors-mongodb.md", - "discussion_id": "2154405635", - "commented_code": "@@ -251,6 +251,15 @@ A bug introduced in **8.12.0** causes the Connectors docker image to error out i\n \n See [Known issues](/release-notes/known-issues.md) for any issues affecting all connectors.\n \n+#### UUIDs are not correctly deserialised causing problems with ingesting documents into Elasticsearch\n+\n+MongoDB has special handling of UUID type: there is a legacy and a modern approach. You can read [official docs](https://pymongo.readthedocs.io/en/stable/examples/uuid.html) about the details.\n+\n+With 8.18.3 better handling of standard UUID representation has been implemented - now MongoDB connector is able to properly deserialise them into valid UUIDs. However, for legacy UUIDs or older versions of the connector you might need to adjust the connection string to specify the UUID representation.", - "comment_created_at": "2025-06-18T13:45:04+00:00", - "comment_author": "charlotte-hoblik", - "comment_body": "You\u2019re absolutely right, the change has been backported to 8.18.3. That said, due to a documentation system shift between the 8.x and 9.x, we\u2019re currently maintaining this doc version against the 9.x branches. We\u2019re still adding 8.18.3 info to the 8.x docs where relevant, so it\u2019s covered in both places.", - "pr_file_module": null - }, - { - "comment_id": "2154658977", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 129492, - "pr_file": "docs/reference/search-connectors/es-connectors-mongodb.md", - "discussion_id": "2154405635", - "commented_code": "@@ -251,6 +251,15 @@ A bug introduced in **8.12.0** causes the Connectors docker image to error out i\n \n See [Known issues](/release-notes/known-issues.md) for any issues affecting all connectors.\n \n+#### UUIDs are not correctly deserialised causing problems with ingesting documents into Elasticsearch\n+\n+MongoDB has special handling of UUID type: there is a legacy and a modern approach. You can read [official docs](https://pymongo.readthedocs.io/en/stable/examples/uuid.html) about the details.\n+\n+With 8.18.3 better handling of standard UUID representation has been implemented - now MongoDB connector is able to properly deserialise them into valid UUIDs. However, for legacy UUIDs or older versions of the connector you might need to adjust the connection string to specify the UUID representation.", - "comment_created_at": "2025-06-18T13:46:41+00:00", - "comment_author": "artem-shelkovnikov", - "comment_body": "Ah sorry, I got confused by it - thank you for the suggestions, accepted all!", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2154429939", - "pr_number": 129492, - "pr_file": "docs/reference/search-connectors/es-connectors-mongodb.md", - "created_at": "2025-06-18T12:06:08+00:00", - "commented_code": "See [Known issues](/release-notes/known-issues.md) for any issues affecting all connectors.\n\n#### UUIDs are not correctly deserialised causing problems with ingesting documents into Elasticsearch\n\nMongoDB has special handling of UUID type: there is a legacy and a modern approach. You can read [official docs](https://pymongo.readthedocs.io/en/stable/examples/uuid.html) about the details.\n\nWith 8.18.3 better handling of standard UUID representation has been implemented - now MongoDB connector is able to properly deserialise them into valid UUIDs. However, for legacy UUIDs or older versions of the connector you might need to adjust the connection string to specify the UUID representation.\n\nFor example, if you are using modern UUID representation, adding `uuidRepresentation=standard` query parameter into the URL in the `host` Rich Configurable Field will make the connector properly handle UUIDs. With this change the full `host` Rich Configurable Field value could look like this: `mongodb+srv://my_username:my_password@cluster0.mongodb.net/mydb?w=majority&uuidRepresentation=standard`", - "repo_full_name": "elastic/elasticsearch", - "discussion_comments": [ - { - "comment_id": "2154429939", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 129492, - "pr_file": "docs/reference/search-connectors/es-connectors-mongodb.md", - "discussion_id": "2154429939", - "commented_code": "@@ -251,6 +251,15 @@ A bug introduced in **8.12.0** causes the Connectors docker image to error out i\n \n See [Known issues](/release-notes/known-issues.md) for any issues affecting all connectors.\n \n+#### UUIDs are not correctly deserialised causing problems with ingesting documents into Elasticsearch\n+\n+MongoDB has special handling of UUID type: there is a legacy and a modern approach. You can read [official docs](https://pymongo.readthedocs.io/en/stable/examples/uuid.html) about the details.\n+\n+With 8.18.3 better handling of standard UUID representation has been implemented - now MongoDB connector is able to properly deserialise them into valid UUIDs. However, for legacy UUIDs or older versions of the connector you might need to adjust the connection string to specify the UUID representation.\n+\n+For example, if you are using modern UUID representation, adding `uuidRepresentation=standard` query parameter into the URL in the `host` Rich Configurable Field will make the connector properly handle UUIDs. With this change the full `host` Rich Configurable Field value could look like this: `mongodb+srv://my_username:my_password@cluster0.mongodb.net/mydb?w=majority&uuidRepresentation=standard`", - "comment_created_at": "2025-06-18T12:06:08+00:00", - "comment_author": "charlotte-hoblik", - "comment_body": "```suggestion\r\nFor example, if you are using the modern UUID representation, adding the `uuidRepresentation=standard` query parameter to the MongoDB connection URI in the `host` Rich Configurable Field will allow the connector to properly handle UUIDs. With this change, the full `host` Rich Configurable Field value could look like this:`mongodb+srv://my_username:my_password@cluster0.mongodb.net/mydb?w=majority&uuidRepresentation=standard`\r\n```\r\n```", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2154435770", - "pr_number": 129492, - "pr_file": "docs/reference/search-connectors/es-connectors-mongodb.md", - "created_at": "2025-06-18T12:09:22+00:00", - "commented_code": "See [Known issues](/release-notes/known-issues.md) for any issues affecting all connectors.\n\n#### UUIDs are not correctly deserialised causing problems with ingesting documents into Elasticsearch\n\nMongoDB has special handling of UUID type: there is a legacy and a modern approach. You can read [official docs](https://pymongo.readthedocs.io/en/stable/examples/uuid.html) about the details.\n\nWith 8.18.3 better handling of standard UUID representation has been implemented - now MongoDB connector is able to properly deserialise them into valid UUIDs. However, for legacy UUIDs or older versions of the connector you might need to adjust the connection string to specify the UUID representation.\n\nFor example, if you are using modern UUID representation, adding `uuidRepresentation=standard` query parameter into the URL in the `host` Rich Configurable Field will make the connector properly handle UUIDs. With this change the full `host` Rich Configurable Field value could look like this: `mongodb+srv://my_username:my_password@cluster0.mongodb.net/mydb?w=majority&uuidRepresentation=standard`\n\nIf you are using, for example, legacy C# representation of UUIDs, then you should add `uuidRepresentation=csharpLegacy`, for Java it'll be `uuidRepresentation=javaLegacy` and for Python it'll be `uuidRepresentation=pythonLegacy`. Full explanation can be found in [official docs](https://pymongo.readthedocs.io/en/stable/examples/uuid.html#configuring-a-uuid-representation).", - "repo_full_name": "elastic/elasticsearch", - "discussion_comments": [ - { - "comment_id": "2154435770", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 129492, - "pr_file": "docs/reference/search-connectors/es-connectors-mongodb.md", - "discussion_id": "2154435770", - "commented_code": "@@ -251,6 +251,15 @@ A bug introduced in **8.12.0** causes the Connectors docker image to error out i\n \n See [Known issues](/release-notes/known-issues.md) for any issues affecting all connectors.\n \n+#### UUIDs are not correctly deserialised causing problems with ingesting documents into Elasticsearch\n+\n+MongoDB has special handling of UUID type: there is a legacy and a modern approach. You can read [official docs](https://pymongo.readthedocs.io/en/stable/examples/uuid.html) about the details.\n+\n+With 8.18.3 better handling of standard UUID representation has been implemented - now MongoDB connector is able to properly deserialise them into valid UUIDs. However, for legacy UUIDs or older versions of the connector you might need to adjust the connection string to specify the UUID representation.\n+\n+For example, if you are using modern UUID representation, adding `uuidRepresentation=standard` query parameter into the URL in the `host` Rich Configurable Field will make the connector properly handle UUIDs. With this change the full `host` Rich Configurable Field value could look like this: `mongodb+srv://my_username:my_password@cluster0.mongodb.net/mydb?w=majority&uuidRepresentation=standard`\n+\n+If you are using, for example, legacy C# representation of UUIDs, then you should add `uuidRepresentation=csharpLegacy`, for Java it'll be `uuidRepresentation=javaLegacy` and for Python it'll be `uuidRepresentation=pythonLegacy`. Full explanation can be found in [official docs](https://pymongo.readthedocs.io/en/stable/examples/uuid.html#configuring-a-uuid-representation).", - "comment_created_at": "2025-06-18T12:09:22+00:00", - "comment_author": "charlotte-hoblik", - "comment_body": "```suggestion\r\nIf you\u2019re using a legacy UUID representation, you should adjust the connection URI accordingly. For example:\r\n\r\n- C#: `uuidRepresentation=csharpLegacy`\r\n- Java: `uuidRepresentation=javaLegacy`\r\n- Python: `uuidRepresentation=pythonLegacy`\r\n\r\nYou can find a full explanation in the [official docs](https://pymongo.readthedocs.io/en/stable/examples/uuid.html#configuring-a-uuid-representation).\r\n```", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2180393094", - "pr_number": 130410, - "pr_file": "docs/reference/query-languages/esql/esql-lookup-join.md", - "created_at": "2025-07-02T15:42:40+00:00", - "commented_code": "* `short` and `byte` are compatible with `integer` (all represented as `int`)\n * `float`, `half_float`, and `scaled_float` are compatible with `double` (all represented as `double`)\n * For text fields: You can only use text fields as the join key on the left-hand side of the join and only if they have a `.keyword` subfield\n * `DATE` and `DATE_NANOS` can only be joined against the exact same type.\n\nTo obtain a join key with a compatible type, use a [conversion function](/reference/query-languages/esql/functions-operators/type-conversion-functions.md) if needed.\n\nFor a complete list of supported data types and their internal representations, see the [Supported Field Types documentation](/reference/query-languages/esql/limitations.md#_supported_types).\nThe list of unsupported fields includes all types not supported by {{esql}} as described in the [Unsupported Field Types documentation](/reference/query-languages/esql/limitations.md#_unsupported_types).", - "repo_full_name": "elastic/elasticsearch", - "discussion_comments": [ - { - "comment_id": "2180393094", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 130410, - "pr_file": "docs/reference/query-languages/esql/esql-lookup-join.md", - "discussion_id": "2180393094", - "commented_code": "@@ -151,10 +151,15 @@ To use `LOOKUP JOIN`, the following requirements must be met:\n * `short` and `byte` are compatible with `integer` (all represented as `int`)\n * `float`, `half_float`, and `scaled_float` are compatible with `double` (all represented as `double`)\n * For text fields: You can only use text fields as the join key on the left-hand side of the join and only if they have a `.keyword` subfield\n+ * `DATE` and `DATE_NANOS` can only be joined against the exact same type.\n \n To obtain a join key with a compatible type, use a [conversion function](/reference/query-languages/esql/functions-operators/type-conversion-functions.md) if needed.\n \n-For a complete list of supported data types and their internal representations, see the [Supported Field Types documentation](/reference/query-languages/esql/limitations.md#_supported_types).\n+The list of unsupported fields includes all types not supported by {{esql}} as described in the [Unsupported Field Types documentation](/reference/query-languages/esql/limitations.md#_unsupported_types).", - "comment_created_at": "2025-07-02T15:42:40+00:00", - "comment_author": "leemthompo", - "comment_body": "@craigtaverner I think this section is a little messy now, and technically this should be all part of the bulleted list\r\n\r\nI suggest we revamp the **Prerequisites** section and use subheadings, and instead of prose/list items for the supported/unsupported types, perhaps a table would be a good alternative?\r\n\r\n", - "pr_file_module": null - }, - { - "comment_id": "2180396635", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 130410, - "pr_file": "docs/reference/query-languages/esql/esql-lookup-join.md", - "discussion_id": "2180393094", - "commented_code": "@@ -151,10 +151,15 @@ To use `LOOKUP JOIN`, the following requirements must be met:\n * `short` and `byte` are compatible with `integer` (all represented as `int`)\n * `float`, `half_float`, and `scaled_float` are compatible with `double` (all represented as `double`)\n * For text fields: You can only use text fields as the join key on the left-hand side of the join and only if they have a `.keyword` subfield\n+ * `DATE` and `DATE_NANOS` can only be joined against the exact same type.\n \n To obtain a join key with a compatible type, use a [conversion function](/reference/query-languages/esql/functions-operators/type-conversion-functions.md) if needed.\n \n-For a complete list of supported data types and their internal representations, see the [Supported Field Types documentation](/reference/query-languages/esql/limitations.md#_supported_types).\n+The list of unsupported fields includes all types not supported by {{esql}} as described in the [Unsupported Field Types documentation](/reference/query-languages/esql/limitations.md#_unsupported_types).", - "comment_created_at": "2025-07-02T15:44:36+00:00", - "comment_author": "leemthompo", - "comment_body": "Here's an approach, in docsv3 syntax:\r\n\r\n\r\n## Prerequisites\r\n\r\n### Index configuration\r\nIndices used for lookups must be configured with the [`lookup` index mode](/reference/elasticsearch/index-settings/index-modules.md#index-mode-setting).\r\n\r\n### Data type compatibility\r\nJoin keys must have compatible data types between the source and lookup indices. Types within the same compatibility group can be joined together:\r\n\r\n| Compatibility group | Types | Notes |\r\n|---------------------|-------|--------|\r\n| **Integer family** | `byte`, `short`, `integer` | All interchangeable |\r\n| **Float family** | `half_float`, `float`, `scaled_float`, `double` | All interchangeable |\r\n| **Keyword family** | `keyword`, `text.keyword` | Text fields only as join key on left-hand side and must have `.keyword` subfield |\r\n| **Date (Exact)** | `DATE` | Must match exactly |\r\n| **Date Nanos (Exact)** | `DATE_NANOS` | Must match exactly |\r\n| **Boolean** | `boolean` | Must match exactly |\r\n\r\n```{note}\r\nFor a complete list of all types supported in `LOOKUP JOIN`, refer to the [`LOOKUP JOIN` supported types table](/reference/query-languages/esql/commands/processing-commands.md#esql-lookup-join).\r\n```\r\n\r\n```{tip}\r\nTo obtain a join key with a compatible type, use a [conversion function](/reference/query-languages/esql/functions-operators/type-conversion-functions.md) if needed.\r\n```\r\n\r\n### Unsupported Types\r\nIn addition to the [{{esql}} unsupported field types](/reference/query-languages/esql/limitations.md#_unsupported_types), `LOOKUP JOIN` does not support:\r\n\r\n* `VERSION`\r\n* `UNSIGNED_LONG` \r\n* Spatial types like `GEO_POINT`, `GEO_SHAPE`\r\n* Temporal intervals like `DURATION`, `PERIOD`\r\n", - "pr_file_module": null - }, - { - "comment_id": "2180418260", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 130410, - "pr_file": "docs/reference/query-languages/esql/esql-lookup-join.md", - "discussion_id": "2180393094", - "commented_code": "@@ -151,10 +151,15 @@ To use `LOOKUP JOIN`, the following requirements must be met:\n * `short` and `byte` are compatible with `integer` (all represented as `int`)\n * `float`, `half_float`, and `scaled_float` are compatible with `double` (all represented as `double`)\n * For text fields: You can only use text fields as the join key on the left-hand side of the join and only if they have a `.keyword` subfield\n+ * `DATE` and `DATE_NANOS` can only be joined against the exact same type.\n \n To obtain a join key with a compatible type, use a [conversion function](/reference/query-languages/esql/functions-operators/type-conversion-functions.md) if needed.\n \n-For a complete list of supported data types and their internal representations, see the [Supported Field Types documentation](/reference/query-languages/esql/limitations.md#_supported_types).\n+The list of unsupported fields includes all types not supported by {{esql}} as described in the [Unsupported Field Types documentation](/reference/query-languages/esql/limitations.md#_unsupported_types).", - "comment_created_at": "2025-07-02T15:53:47+00:00", - "comment_author": "leemthompo", - "comment_body": "Or perhaps we can lose the overview table/list entirely and delegate to the new autogenerated table?", - "pr_file_module": null - }, - { - "comment_id": "2180435815", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 130410, - "pr_file": "docs/reference/query-languages/esql/esql-lookup-join.md", - "discussion_id": "2180393094", - "commented_code": "@@ -151,10 +151,15 @@ To use `LOOKUP JOIN`, the following requirements must be met:\n * `short` and `byte` are compatible with `integer` (all represented as `int`)\n * `float`, `half_float`, and `scaled_float` are compatible with `double` (all represented as `double`)\n * For text fields: You can only use text fields as the join key on the left-hand side of the join and only if they have a `.keyword` subfield\n+ * `DATE` and `DATE_NANOS` can only be joined against the exact same type.\n \n To obtain a join key with a compatible type, use a [conversion function](/reference/query-languages/esql/functions-operators/type-conversion-functions.md) if needed.\n \n-For a complete list of supported data types and their internal representations, see the [Supported Field Types documentation](/reference/query-languages/esql/limitations.md#_supported_types).\n+The list of unsupported fields includes all types not supported by {{esql}} as described in the [Unsupported Field Types documentation](/reference/query-languages/esql/limitations.md#_unsupported_types).", - "comment_created_at": "2025-07-02T15:59:14+00:00", - "comment_author": "leemthompo", - "comment_body": "anyways just food for thought, I think the data type compatibility info should be a standalone section in any case, whether it's under prerequisites or not :)", - "pr_file_module": null - }, - { - "comment_id": "2180453560", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 130410, - "pr_file": "docs/reference/query-languages/esql/esql-lookup-join.md", - "discussion_id": "2180393094", - "commented_code": "@@ -151,10 +151,15 @@ To use `LOOKUP JOIN`, the following requirements must be met:\n * `short` and `byte` are compatible with `integer` (all represented as `int`)\n * `float`, `half_float`, and `scaled_float` are compatible with `double` (all represented as `double`)\n * For text fields: You can only use text fields as the join key on the left-hand side of the join and only if they have a `.keyword` subfield\n+ * `DATE` and `DATE_NANOS` can only be joined against the exact same type.\n \n To obtain a join key with a compatible type, use a [conversion function](/reference/query-languages/esql/functions-operators/type-conversion-functions.md) if needed.\n \n-For a complete list of supported data types and their internal representations, see the [Supported Field Types documentation](/reference/query-languages/esql/limitations.md#_supported_types).\n+The list of unsupported fields includes all types not supported by {{esql}} as described in the [Unsupported Field Types documentation](/reference/query-languages/esql/limitations.md#_unsupported_types).", - "comment_created_at": "2025-07-02T16:07:59+00:00", - "comment_author": "craigtaverner", - "comment_body": "I incorporated your changes. I agree it makes things look more similar with two tables. But I think users might find both places independently, so it is not too bad to have two ways. The generated one is more up-to-date. I did make one fix to your table, because we can join between integers and float now too.", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/elasticsearch-defensive-null-handling.json b/_reviewers/elasticsearch-defensive-null-handling.json new file mode 100644 index 0000000..f1376e8 --- /dev/null +++ b/_reviewers/elasticsearch-defensive-null-handling.json @@ -0,0 +1,150 @@ +[ + { + "discussion_id": "2171707161", + "pr_number": 130000, + "pr_file": "server/src/main/java/org/elasticsearch/cluster/metadata/ProjectMetadata.java", + "created_at": "2025-06-27T11:29:43+00:00", + "commented_code": "public static ProjectMetadata fromXContent(XContentParser parser) throws IOException {\n XContentParser.Token token = parser.currentToken();\n if (token == null) {\n token = parser.nextToken();\n }", + "repo_full_name": "elastic/elasticsearch", + "discussion_comments": [ + { + "comment_id": "2171707161", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 130000, + "pr_file": "server/src/main/java/org/elasticsearch/cluster/metadata/ProjectMetadata.java", + "discussion_id": "2171707161", + "commented_code": "@@ -2089,6 +2089,9 @@ static boolean assertDataStreams(Map indices, DataStreamM\n \n public static ProjectMetadata fromXContent(XContentParser parser) throws IOException {\n XContentParser.Token token = parser.currentToken();\n+ if (token == null) {\n+ token = parser.nextToken();\n+ }", + "comment_created_at": "2025-06-27T11:29:43+00:00", + "comment_author": "pxsalehi", + "comment_body": "what's this all about?", + "pr_file_module": null + }, + { + "comment_id": "2174109969", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 130000, + "pr_file": "server/src/main/java/org/elasticsearch/cluster/metadata/ProjectMetadata.java", + "discussion_id": "2171707161", + "commented_code": "@@ -2089,6 +2089,9 @@ static boolean assertDataStreams(Map indices, DataStreamM\n \n public static ProjectMetadata fromXContent(XContentParser parser) throws IOException {\n XContentParser.Token token = parser.currentToken();\n+ if (token == null) {\n+ token = parser.nextToken();\n+ }", + "comment_created_at": "2025-06-30T02:42:36+00:00", + "comment_author": "ywangd", + "comment_body": "This is needed when the `ProjectMetadata` is parsed on its own, i.e. _not_ as part of `Metadata`. In the later case, the `parser.nextToken()` method is already called when parsing the outer structure. In this PR, we need to de/serialize `ProjectMetadata` on its own. Hence the need for active call to this method. We also use this pattern in other places such as [here](https://github.com/elastic/elasticsearch/blob/cacce50192ac29e1aaca2e75c170a90d15af292c/server/src/main/java/org/elasticsearch/index/snapshots/blobstore/BlobStoreIndexShardSnapshots.java#L276-L278).", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2163198891", + "pr_number": 129684, + "pr_file": "qa/lucene-index-compatibility/src/javaRestTest/java/org/elasticsearch/lucene/AbstractIndexCompatibilityTestCase.java", + "created_at": "2025-06-24T07:44:21+00:00", + "commented_code": "@SuppressWarnings(\"unchecked\")\n protected static void assertIndexSetting(String indexName, Setting setting, Matcher matcher) throws Exception {\n var indexSettings = getIndexSettingsAsMap(indexName);\n assertThat(Boolean.parseBoolean((String) indexSettings.get(setting.getKey())), matcher);\n assertThat(Booleans.parseBoolean((String) indexSettings.get(setting.getKey())), matcher);", + "repo_full_name": "elastic/elasticsearch", + "discussion_comments": [ + { + "comment_id": "2163198891", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 129684, + "pr_file": "qa/lucene-index-compatibility/src/javaRestTest/java/org/elasticsearch/lucene/AbstractIndexCompatibilityTestCase.java", + "discussion_id": "2163198891", + "commented_code": "@@ -330,7 +331,7 @@ protected static List indexBlocks(String indexName) throws Excepti\n @SuppressWarnings(\"unchecked\")\n protected static void assertIndexSetting(String indexName, Setting setting, Matcher matcher) throws Exception {\n var indexSettings = getIndexSettingsAsMap(indexName);\n- assertThat(Boolean.parseBoolean((String) indexSettings.get(setting.getKey())), matcher);\n+ assertThat(Booleans.parseBoolean((String) indexSettings.get(setting.getKey())), matcher);", + "comment_created_at": "2025-06-24T07:44:21+00:00", + "comment_author": "mosche", + "comment_body": "Please use `Booleans.parseBoolean(...., false)` here in case the setting is absent", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2167510376", + "pr_number": 130035, + "pr_file": "x-pack/plugin/migrate/src/main/java/org/elasticsearch/xpack/migrate/task/ReindexDataStreamTask.java", + "created_at": "2025-06-25T19:57:23+00:00", + "commented_code": "int totalIndices = initialTotalIndices;\n int totalIndicesToBeUpgraded = initialTotalIndicesToBeUpgraded;\n PersistentTasksCustomMetadata.PersistentTask persistentTask = PersistentTasksCustomMetadata.getTaskWithId(\n clusterService.state(),\n clusterService.state().projectState(projectId).metadata(),", + "repo_full_name": "elastic/elasticsearch", + "discussion_comments": [ + { + "comment_id": "2167510376", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 130035, + "pr_file": "x-pack/plugin/migrate/src/main/java/org/elasticsearch/xpack/migrate/task/ReindexDataStreamTask.java", + "discussion_id": "2167510376", + "commented_code": "@@ -70,7 +74,7 @@ public ReindexDataStreamStatus getStatus() {\n int totalIndices = initialTotalIndices;\n int totalIndicesToBeUpgraded = initialTotalIndicesToBeUpgraded;\n PersistentTasksCustomMetadata.PersistentTask persistentTask = PersistentTasksCustomMetadata.getTaskWithId(\n- clusterService.state(),\n+ clusterService.state().projectState(projectId).metadata(),", + "comment_created_at": "2025-06-25T19:57:23+00:00", + "comment_author": "nielsbauman", + "comment_body": "1. There is no need to convert to `ProjectState` here (we could call `clusterService.state().metadata().getProject(projectId)`).\r\n2. I see we have a `null`-check below for `persistentTask`. While we should handle project deletions (even soft-deletes) gracefully and stop persistent tasks first, I think it doesn't hurt to have some defense against deleted projects here. We could do something like:\r\n\t```java\r\n\tvar project = clusterService.state().metadata().projects().get(projectId);\r\n\tPersistentTasksCustomMetadata.PersistentTask persistentTask = project == null ? null : PersistentTasksCustomMetadata.getTaskWithId(project, getPersistentTask());\r\n\t```\r\n\tBut other variations of that are fine too of course.\r\n\r\nThe same goes for similar changes in this and other files, and for obtaining `sourceIndex` in `ReindexDataStreamIndexTransportAction.java`. Generally, if a block of code assumes that something exists (e.g. a task/custom/index), I don't do an extra `null`-check for the project, but if the code defends against missing objects, I check for missing project to maintain the level of defense.", + "pr_file_module": null + }, + { + "comment_id": "2167648998", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 130035, + "pr_file": "x-pack/plugin/migrate/src/main/java/org/elasticsearch/xpack/migrate/task/ReindexDataStreamTask.java", + "discussion_id": "2167510376", + "commented_code": "@@ -70,7 +74,7 @@ public ReindexDataStreamStatus getStatus() {\n int totalIndices = initialTotalIndices;\n int totalIndicesToBeUpgraded = initialTotalIndicesToBeUpgraded;\n PersistentTasksCustomMetadata.PersistentTask persistentTask = PersistentTasksCustomMetadata.getTaskWithId(\n- clusterService.state(),\n+ clusterService.state().projectState(projectId).metadata(),", + "comment_created_at": "2025-06-25T21:15:49+00:00", + "comment_author": "joegallo", + "comment_body": "14ac8331a512cb0696d8634722bea48fda3fd689 addresses your first point, but I'll have to grind a bit on the second one. (Which I'll do tomorrow.)", + "pr_file_module": null + }, + { + "comment_id": "2167691284", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 130035, + "pr_file": "x-pack/plugin/migrate/src/main/java/org/elasticsearch/xpack/migrate/task/ReindexDataStreamTask.java", + "discussion_id": "2167510376", + "commented_code": "@@ -70,7 +74,7 @@ public ReindexDataStreamStatus getStatus() {\n int totalIndices = initialTotalIndices;\n int totalIndicesToBeUpgraded = initialTotalIndicesToBeUpgraded;\n PersistentTasksCustomMetadata.PersistentTask persistentTask = PersistentTasksCustomMetadata.getTaskWithId(\n- clusterService.state(),\n+ clusterService.state().projectState(projectId).metadata(),", + "comment_created_at": "2025-06-25T21:45:18+00:00", + "comment_author": "joegallo", + "comment_body": "I think your second point is the only comment here that's still outstanding, I'll pick up on that one tomorrow (this comment is just a bookmark from myself to myself).", + "pr_file_module": null + }, + { + "comment_id": "2169242470", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 130035, + "pr_file": "x-pack/plugin/migrate/src/main/java/org/elasticsearch/xpack/migrate/task/ReindexDataStreamTask.java", + "discussion_id": "2167510376", + "commented_code": "@@ -70,7 +74,7 @@ public ReindexDataStreamStatus getStatus() {\n int totalIndices = initialTotalIndices;\n int totalIndicesToBeUpgraded = initialTotalIndicesToBeUpgraded;\n PersistentTasksCustomMetadata.PersistentTask persistentTask = PersistentTasksCustomMetadata.getTaskWithId(\n- clusterService.state(),\n+ clusterService.state().projectState(projectId).metadata(),", + "comment_created_at": "2025-06-26T14:41:50+00:00", + "comment_author": "joegallo", + "comment_body": "Okay, I think I've handled your second point via ef53eb2a0051c2a7167d11f6837c9e8ab1a6b289.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2128058868", + "pr_number": 128797, + "pr_file": "server/src/internalClusterTest/java/org/elasticsearch/recovery/RelocationIT.java", + "created_at": "2025-06-05T06:33:25+00:00", + "commented_code": "waitForDocs(numDocs, indexer);\n logger.info(\"--> {} docs indexed\", numDocs);\n\n IndicesService indicesService = internalCluster().getInstance(IndicesService.class, nodes[0]);\n IndexShard shard = indicesService.indexServiceSafe(resolveIndex(\"test\")).getShard(0);\n if (throttleIndexing) {\n // Activate index throttling on \"test\" index primary shard\n shard.activateThrottling();\n // Verify that indexing is throttled for this shard\n assertThat(shard.getEngineOrNull().isThrottled(), equalTo(true));", + "repo_full_name": "elastic/elasticsearch", + "discussion_comments": [ + { + "comment_id": "2128058868", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 128797, + "pr_file": "server/src/internalClusterTest/java/org/elasticsearch/recovery/RelocationIT.java", + "discussion_id": "2128058868", + "commented_code": "@@ -200,6 +277,14 @@ public void testRelocationWhileIndexingRandom() throws Exception {\n waitForDocs(numDocs, indexer);\n logger.info(\"--> {} docs indexed\", numDocs);\n \n+ IndicesService indicesService = internalCluster().getInstance(IndicesService.class, nodes[0]);\n+ IndexShard shard = indicesService.indexServiceSafe(resolveIndex(\"test\")).getShard(0);\n+ if (throttleIndexing) {\n+ // Activate index throttling on \"test\" index primary shard\n+ shard.activateThrottling();\n+ // Verify that indexing is throttled for this shard\n+ assertThat(shard.getEngineOrNull().isThrottled(), equalTo(true));", + "comment_created_at": "2025-06-05T06:33:25+00:00", + "comment_author": "henningandersen", + "comment_body": "```suggestion\r\n assertThat(shard.getEngine().isThrottled(), equalTo(true));\r\n```", + "pr_file_module": null + }, + { + "comment_id": "2129155634", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 128797, + "pr_file": "server/src/internalClusterTest/java/org/elasticsearch/recovery/RelocationIT.java", + "discussion_id": "2128058868", + "commented_code": "@@ -200,6 +277,14 @@ public void testRelocationWhileIndexingRandom() throws Exception {\n waitForDocs(numDocs, indexer);\n logger.info(\"--> {} docs indexed\", numDocs);\n \n+ IndicesService indicesService = internalCluster().getInstance(IndicesService.class, nodes[0]);\n+ IndexShard shard = indicesService.indexServiceSafe(resolveIndex(\"test\")).getShard(0);\n+ if (throttleIndexing) {\n+ // Activate index throttling on \"test\" index primary shard\n+ shard.activateThrottling();\n+ // Verify that indexing is throttled for this shard\n+ assertThat(shard.getEngineOrNull().isThrottled(), equalTo(true));", + "comment_created_at": "2025-06-05T15:42:46+00:00", + "comment_author": "ankikuma", + "comment_body": "getEngine() is not public right now. Can I change this to :\r\n\r\nEngine engine = shard.getEngineOrNull();\r\nassertThat(engine != null && engine.isThrottled(), equalTo(true));", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/elasticsearch-defensive-null-handling.md b/_reviewers/elasticsearch-defensive-null-handling.md index 44f6bad..6c82191 100644 --- a/_reviewers/elasticsearch-defensive-null-handling.md +++ b/_reviewers/elasticsearch-defensive-null-handling.md @@ -41,155 +41,3 @@ PersistentTasksCustomMetadata.PersistentTask persistentTask = ``` These patterns help create more robust code that can handle edge cases and prevent crashes in production. - - -[ - { - "discussion_id": "2171707161", - "pr_number": 130000, - "pr_file": "server/src/main/java/org/elasticsearch/cluster/metadata/ProjectMetadata.java", - "created_at": "2025-06-27T11:29:43+00:00", - "commented_code": "public static ProjectMetadata fromXContent(XContentParser parser) throws IOException {\n XContentParser.Token token = parser.currentToken();\n if (token == null) {\n token = parser.nextToken();\n }", - "repo_full_name": "elastic/elasticsearch", - "discussion_comments": [ - { - "comment_id": "2171707161", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 130000, - "pr_file": "server/src/main/java/org/elasticsearch/cluster/metadata/ProjectMetadata.java", - "discussion_id": "2171707161", - "commented_code": "@@ -2089,6 +2089,9 @@ static boolean assertDataStreams(Map indices, DataStreamM\n \n public static ProjectMetadata fromXContent(XContentParser parser) throws IOException {\n XContentParser.Token token = parser.currentToken();\n+ if (token == null) {\n+ token = parser.nextToken();\n+ }", - "comment_created_at": "2025-06-27T11:29:43+00:00", - "comment_author": "pxsalehi", - "comment_body": "what's this all about?", - "pr_file_module": null - }, - { - "comment_id": "2174109969", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 130000, - "pr_file": "server/src/main/java/org/elasticsearch/cluster/metadata/ProjectMetadata.java", - "discussion_id": "2171707161", - "commented_code": "@@ -2089,6 +2089,9 @@ static boolean assertDataStreams(Map indices, DataStreamM\n \n public static ProjectMetadata fromXContent(XContentParser parser) throws IOException {\n XContentParser.Token token = parser.currentToken();\n+ if (token == null) {\n+ token = parser.nextToken();\n+ }", - "comment_created_at": "2025-06-30T02:42:36+00:00", - "comment_author": "ywangd", - "comment_body": "This is needed when the `ProjectMetadata` is parsed on its own, i.e. _not_ as part of `Metadata`. In the later case, the `parser.nextToken()` method is already called when parsing the outer structure. In this PR, we need to de/serialize `ProjectMetadata` on its own. Hence the need for active call to this method. We also use this pattern in other places such as [here](https://github.com/elastic/elasticsearch/blob/cacce50192ac29e1aaca2e75c170a90d15af292c/server/src/main/java/org/elasticsearch/index/snapshots/blobstore/BlobStoreIndexShardSnapshots.java#L276-L278).", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2163198891", - "pr_number": 129684, - "pr_file": "qa/lucene-index-compatibility/src/javaRestTest/java/org/elasticsearch/lucene/AbstractIndexCompatibilityTestCase.java", - "created_at": "2025-06-24T07:44:21+00:00", - "commented_code": "@SuppressWarnings(\"unchecked\")\n protected static void assertIndexSetting(String indexName, Setting setting, Matcher matcher) throws Exception {\n var indexSettings = getIndexSettingsAsMap(indexName);\n assertThat(Boolean.parseBoolean((String) indexSettings.get(setting.getKey())), matcher);\n assertThat(Booleans.parseBoolean((String) indexSettings.get(setting.getKey())), matcher);", - "repo_full_name": "elastic/elasticsearch", - "discussion_comments": [ - { - "comment_id": "2163198891", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 129684, - "pr_file": "qa/lucene-index-compatibility/src/javaRestTest/java/org/elasticsearch/lucene/AbstractIndexCompatibilityTestCase.java", - "discussion_id": "2163198891", - "commented_code": "@@ -330,7 +331,7 @@ protected static List indexBlocks(String indexName) throws Excepti\n @SuppressWarnings(\"unchecked\")\n protected static void assertIndexSetting(String indexName, Setting setting, Matcher matcher) throws Exception {\n var indexSettings = getIndexSettingsAsMap(indexName);\n- assertThat(Boolean.parseBoolean((String) indexSettings.get(setting.getKey())), matcher);\n+ assertThat(Booleans.parseBoolean((String) indexSettings.get(setting.getKey())), matcher);", - "comment_created_at": "2025-06-24T07:44:21+00:00", - "comment_author": "mosche", - "comment_body": "Please use `Booleans.parseBoolean(...., false)` here in case the setting is absent", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2167510376", - "pr_number": 130035, - "pr_file": "x-pack/plugin/migrate/src/main/java/org/elasticsearch/xpack/migrate/task/ReindexDataStreamTask.java", - "created_at": "2025-06-25T19:57:23+00:00", - "commented_code": "int totalIndices = initialTotalIndices;\n int totalIndicesToBeUpgraded = initialTotalIndicesToBeUpgraded;\n PersistentTasksCustomMetadata.PersistentTask persistentTask = PersistentTasksCustomMetadata.getTaskWithId(\n clusterService.state(),\n clusterService.state().projectState(projectId).metadata(),", - "repo_full_name": "elastic/elasticsearch", - "discussion_comments": [ - { - "comment_id": "2167510376", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 130035, - "pr_file": "x-pack/plugin/migrate/src/main/java/org/elasticsearch/xpack/migrate/task/ReindexDataStreamTask.java", - "discussion_id": "2167510376", - "commented_code": "@@ -70,7 +74,7 @@ public ReindexDataStreamStatus getStatus() {\n int totalIndices = initialTotalIndices;\n int totalIndicesToBeUpgraded = initialTotalIndicesToBeUpgraded;\n PersistentTasksCustomMetadata.PersistentTask persistentTask = PersistentTasksCustomMetadata.getTaskWithId(\n- clusterService.state(),\n+ clusterService.state().projectState(projectId).metadata(),", - "comment_created_at": "2025-06-25T19:57:23+00:00", - "comment_author": "nielsbauman", - "comment_body": "1. There is no need to convert to `ProjectState` here (we could call `clusterService.state().metadata().getProject(projectId)`).\r\n2. I see we have a `null`-check below for `persistentTask`. While we should handle project deletions (even soft-deletes) gracefully and stop persistent tasks first, I think it doesn't hurt to have some defense against deleted projects here. We could do something like:\r\n\t```java\r\n\tvar project = clusterService.state().metadata().projects().get(projectId);\r\n\tPersistentTasksCustomMetadata.PersistentTask persistentTask = project == null ? null : PersistentTasksCustomMetadata.getTaskWithId(project, getPersistentTask());\r\n\t```\r\n\tBut other variations of that are fine too of course.\r\n\r\nThe same goes for similar changes in this and other files, and for obtaining `sourceIndex` in `ReindexDataStreamIndexTransportAction.java`. Generally, if a block of code assumes that something exists (e.g. a task/custom/index), I don't do an extra `null`-check for the project, but if the code defends against missing objects, I check for missing project to maintain the level of defense.", - "pr_file_module": null - }, - { - "comment_id": "2167648998", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 130035, - "pr_file": "x-pack/plugin/migrate/src/main/java/org/elasticsearch/xpack/migrate/task/ReindexDataStreamTask.java", - "discussion_id": "2167510376", - "commented_code": "@@ -70,7 +74,7 @@ public ReindexDataStreamStatus getStatus() {\n int totalIndices = initialTotalIndices;\n int totalIndicesToBeUpgraded = initialTotalIndicesToBeUpgraded;\n PersistentTasksCustomMetadata.PersistentTask persistentTask = PersistentTasksCustomMetadata.getTaskWithId(\n- clusterService.state(),\n+ clusterService.state().projectState(projectId).metadata(),", - "comment_created_at": "2025-06-25T21:15:49+00:00", - "comment_author": "joegallo", - "comment_body": "14ac8331a512cb0696d8634722bea48fda3fd689 addresses your first point, but I'll have to grind a bit on the second one. (Which I'll do tomorrow.)", - "pr_file_module": null - }, - { - "comment_id": "2167691284", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 130035, - "pr_file": "x-pack/plugin/migrate/src/main/java/org/elasticsearch/xpack/migrate/task/ReindexDataStreamTask.java", - "discussion_id": "2167510376", - "commented_code": "@@ -70,7 +74,7 @@ public ReindexDataStreamStatus getStatus() {\n int totalIndices = initialTotalIndices;\n int totalIndicesToBeUpgraded = initialTotalIndicesToBeUpgraded;\n PersistentTasksCustomMetadata.PersistentTask persistentTask = PersistentTasksCustomMetadata.getTaskWithId(\n- clusterService.state(),\n+ clusterService.state().projectState(projectId).metadata(),", - "comment_created_at": "2025-06-25T21:45:18+00:00", - "comment_author": "joegallo", - "comment_body": "I think your second point is the only comment here that's still outstanding, I'll pick up on that one tomorrow (this comment is just a bookmark from myself to myself).", - "pr_file_module": null - }, - { - "comment_id": "2169242470", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 130035, - "pr_file": "x-pack/plugin/migrate/src/main/java/org/elasticsearch/xpack/migrate/task/ReindexDataStreamTask.java", - "discussion_id": "2167510376", - "commented_code": "@@ -70,7 +74,7 @@ public ReindexDataStreamStatus getStatus() {\n int totalIndices = initialTotalIndices;\n int totalIndicesToBeUpgraded = initialTotalIndicesToBeUpgraded;\n PersistentTasksCustomMetadata.PersistentTask persistentTask = PersistentTasksCustomMetadata.getTaskWithId(\n- clusterService.state(),\n+ clusterService.state().projectState(projectId).metadata(),", - "comment_created_at": "2025-06-26T14:41:50+00:00", - "comment_author": "joegallo", - "comment_body": "Okay, I think I've handled your second point via ef53eb2a0051c2a7167d11f6837c9e8ab1a6b289.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2128058868", - "pr_number": 128797, - "pr_file": "server/src/internalClusterTest/java/org/elasticsearch/recovery/RelocationIT.java", - "created_at": "2025-06-05T06:33:25+00:00", - "commented_code": "waitForDocs(numDocs, indexer);\n logger.info(\"--> {} docs indexed\", numDocs);\n\n IndicesService indicesService = internalCluster().getInstance(IndicesService.class, nodes[0]);\n IndexShard shard = indicesService.indexServiceSafe(resolveIndex(\"test\")).getShard(0);\n if (throttleIndexing) {\n // Activate index throttling on \"test\" index primary shard\n shard.activateThrottling();\n // Verify that indexing is throttled for this shard\n assertThat(shard.getEngineOrNull().isThrottled(), equalTo(true));", - "repo_full_name": "elastic/elasticsearch", - "discussion_comments": [ - { - "comment_id": "2128058868", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 128797, - "pr_file": "server/src/internalClusterTest/java/org/elasticsearch/recovery/RelocationIT.java", - "discussion_id": "2128058868", - "commented_code": "@@ -200,6 +277,14 @@ public void testRelocationWhileIndexingRandom() throws Exception {\n waitForDocs(numDocs, indexer);\n logger.info(\"--> {} docs indexed\", numDocs);\n \n+ IndicesService indicesService = internalCluster().getInstance(IndicesService.class, nodes[0]);\n+ IndexShard shard = indicesService.indexServiceSafe(resolveIndex(\"test\")).getShard(0);\n+ if (throttleIndexing) {\n+ // Activate index throttling on \"test\" index primary shard\n+ shard.activateThrottling();\n+ // Verify that indexing is throttled for this shard\n+ assertThat(shard.getEngineOrNull().isThrottled(), equalTo(true));", - "comment_created_at": "2025-06-05T06:33:25+00:00", - "comment_author": "henningandersen", - "comment_body": "```suggestion\r\n assertThat(shard.getEngine().isThrottled(), equalTo(true));\r\n```", - "pr_file_module": null - }, - { - "comment_id": "2129155634", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 128797, - "pr_file": "server/src/internalClusterTest/java/org/elasticsearch/recovery/RelocationIT.java", - "discussion_id": "2128058868", - "commented_code": "@@ -200,6 +277,14 @@ public void testRelocationWhileIndexingRandom() throws Exception {\n waitForDocs(numDocs, indexer);\n logger.info(\"--> {} docs indexed\", numDocs);\n \n+ IndicesService indicesService = internalCluster().getInstance(IndicesService.class, nodes[0]);\n+ IndexShard shard = indicesService.indexServiceSafe(resolveIndex(\"test\")).getShard(0);\n+ if (throttleIndexing) {\n+ // Activate index throttling on \"test\" index primary shard\n+ shard.activateThrottling();\n+ // Verify that indexing is throttled for this shard\n+ assertThat(shard.getEngineOrNull().isThrottled(), equalTo(true));", - "comment_created_at": "2025-06-05T15:42:46+00:00", - "comment_author": "ankikuma", - "comment_body": "getEngine() is not public right now. Can I change this to :\r\n\r\nEngine engine = shard.getEngineOrNull();\r\nassertThat(engine != null && engine.isThrottled(), equalTo(true));", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/elasticsearch-design-for-evolution.json b/_reviewers/elasticsearch-design-for-evolution.json new file mode 100644 index 0000000..4a87295 --- /dev/null +++ b/_reviewers/elasticsearch-design-for-evolution.json @@ -0,0 +1,80 @@ +[ + { + "discussion_id": "2162056413", + "pr_number": 129872, + "pr_file": "server/src/main/java/org/elasticsearch/action/admin/cluster/state/RemoteClusterStateRequest.java", + "created_at": "2025-06-23T16:51:34+00:00", + "commented_code": "/*\n * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one\n * or more contributor license agreements. Licensed under the \"Elastic License\n * 2.0\", the \"GNU Affero General Public License v3.0 only\", and the \"Server Side\n * Public License v 1\"; you may not use this file except in compliance with, at\n * your election, the \"Elastic License 2.0\", the \"GNU Affero General Public\n * License v3.0 only\", or the \"Server Side Public License, v 1\".\n */\n\npackage org.elasticsearch.action.admin.cluster.state;\n\nimport org.elasticsearch.TransportVersions;\nimport org.elasticsearch.action.ActionRequest;\nimport org.elasticsearch.action.ActionRequestValidationException;\nimport org.elasticsearch.common.io.stream.StreamInput;\nimport org.elasticsearch.common.io.stream.StreamOutput;\nimport org.elasticsearch.tasks.CancellableTask;\nimport org.elasticsearch.tasks.Task;\nimport org.elasticsearch.tasks.TaskId;\n\nimport java.io.IOException;\nimport java.util.Map;\n\n/**\n * A remote-only version of {@link ClusterStateRequest} that should be used for cross-cluster requests.\n * It simply exists to handle incoming remote requests and forward them to the local transport action.\n */\npublic class RemoteClusterStateRequest extends ActionRequest {\n\n private final ClusterStateRequest clusterStateRequest;", + "repo_full_name": "elastic/elasticsearch", + "discussion_comments": [ + { + "comment_id": "2162056413", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 129872, + "pr_file": "server/src/main/java/org/elasticsearch/action/admin/cluster/state/RemoteClusterStateRequest.java", + "discussion_id": "2162056413", + "commented_code": "@@ -0,0 +1,77 @@\n+/*\n+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one\n+ * or more contributor license agreements. Licensed under the \"Elastic License\n+ * 2.0\", the \"GNU Affero General Public License v3.0 only\", and the \"Server Side\n+ * Public License v 1\"; you may not use this file except in compliance with, at\n+ * your election, the \"Elastic License 2.0\", the \"GNU Affero General Public\n+ * License v3.0 only\", or the \"Server Side Public License, v 1\".\n+ */\n+\n+package org.elasticsearch.action.admin.cluster.state;\n+\n+import org.elasticsearch.TransportVersions;\n+import org.elasticsearch.action.ActionRequest;\n+import org.elasticsearch.action.ActionRequestValidationException;\n+import org.elasticsearch.common.io.stream.StreamInput;\n+import org.elasticsearch.common.io.stream.StreamOutput;\n+import org.elasticsearch.tasks.CancellableTask;\n+import org.elasticsearch.tasks.Task;\n+import org.elasticsearch.tasks.TaskId;\n+\n+import java.io.IOException;\n+import java.util.Map;\n+\n+/**\n+ * A remote-only version of {@link ClusterStateRequest} that should be used for cross-cluster requests.\n+ * It simply exists to handle incoming remote requests and forward them to the local transport action.\n+ */\n+public class RemoteClusterStateRequest extends ActionRequest {\n+\n+ private final ClusterStateRequest clusterStateRequest;", + "comment_created_at": "2025-06-23T16:51:34+00:00", + "comment_author": "nielsbauman", + "comment_body": "Suggestions other than wrapping the whole request itself like this are welcome too.", + "pr_file_module": null + }, + { + "comment_id": "2179881540", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 129872, + "pr_file": "server/src/main/java/org/elasticsearch/action/admin/cluster/state/RemoteClusterStateRequest.java", + "discussion_id": "2162056413", + "commented_code": "@@ -0,0 +1,77 @@\n+/*\n+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one\n+ * or more contributor license agreements. Licensed under the \"Elastic License\n+ * 2.0\", the \"GNU Affero General Public License v3.0 only\", and the \"Server Side\n+ * Public License v 1\"; you may not use this file except in compliance with, at\n+ * your election, the \"Elastic License 2.0\", the \"GNU Affero General Public\n+ * License v3.0 only\", or the \"Server Side Public License, v 1\".\n+ */\n+\n+package org.elasticsearch.action.admin.cluster.state;\n+\n+import org.elasticsearch.TransportVersions;\n+import org.elasticsearch.action.ActionRequest;\n+import org.elasticsearch.action.ActionRequestValidationException;\n+import org.elasticsearch.common.io.stream.StreamInput;\n+import org.elasticsearch.common.io.stream.StreamOutput;\n+import org.elasticsearch.tasks.CancellableTask;\n+import org.elasticsearch.tasks.Task;\n+import org.elasticsearch.tasks.TaskId;\n+\n+import java.io.IOException;\n+import java.util.Map;\n+\n+/**\n+ * A remote-only version of {@link ClusterStateRequest} that should be used for cross-cluster requests.\n+ * It simply exists to handle incoming remote requests and forward them to the local transport action.\n+ */\n+public class RemoteClusterStateRequest extends ActionRequest {\n+\n+ private final ClusterStateRequest clusterStateRequest;", + "comment_created_at": "2025-07-02T12:05:18+00:00", + "comment_author": "DaveCTurner", + "comment_body": "I think I'd be inclined to duplicate all the fields rather than wrap up the local-only request, so that these things may diverge in future.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2004202386", + "pr_number": 125052, + "pr_file": "x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/hybrid/HybridRetrieverBuilder.java", + "created_at": "2025-03-19T20:08:41+00:00", + "commented_code": "/*\n * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one\n * or more contributor license agreements. Licensed under the Elastic License\n * 2.0; you may not use this file except in compliance with the Elastic License\n * 2.0.\n */\n\npackage org.elasticsearch.xpack.rank.hybrid;\n\nimport org.elasticsearch.common.collect.ImmutableOpenMap;\nimport org.elasticsearch.common.xcontent.LoggingDeprecationHandler;\nimport org.elasticsearch.common.xcontent.support.XContentMapValues;\nimport org.elasticsearch.index.query.MatchQueryBuilder;\nimport org.elasticsearch.index.query.QueryBuilder;\nimport org.elasticsearch.search.rank.RankBuilder;\nimport org.elasticsearch.search.retriever.CompoundRetrieverBuilder;\nimport org.elasticsearch.search.retriever.RetrieverBuilder;\nimport org.elasticsearch.search.retriever.RetrieverBuilderWrapper;\nimport org.elasticsearch.search.retriever.RetrieverParserContext;\nimport org.elasticsearch.search.retriever.StandardRetrieverBuilder;\nimport org.elasticsearch.xcontent.ConstructingObjectParser;\nimport org.elasticsearch.xcontent.NamedXContentRegistry;\nimport org.elasticsearch.xcontent.ParseField;\nimport org.elasticsearch.xcontent.XContentBuilder;\nimport org.elasticsearch.xcontent.XContentParseException;\nimport org.elasticsearch.xcontent.XContentParser;\nimport org.elasticsearch.xcontent.support.MapXContentParser;\nimport org.elasticsearch.xpack.inference.rank.textsimilarity.TextSimilarityRankRetrieverBuilder;\nimport org.elasticsearch.xpack.rank.linear.LinearRetrieverBuilder;\nimport org.elasticsearch.xpack.rank.linear.MinMaxScoreNormalizer;\nimport org.elasticsearch.xpack.rank.linear.ScoreNormalizer;\n\nimport java.io.IOException;\nimport java.util.ArrayList;\nimport java.util.Arrays;\nimport java.util.Collections;\nimport java.util.HashMap;\nimport java.util.List;\nimport java.util.Map;\nimport java.util.Objects;\n\nimport static org.elasticsearch.search.retriever.CompoundRetrieverBuilder.RANK_WINDOW_SIZE_FIELD;\nimport static org.elasticsearch.xcontent.ConstructingObjectParser.constructorArg;\nimport static org.elasticsearch.xcontent.ConstructingObjectParser.optionalConstructorArg;\nimport static org.elasticsearch.xpack.rank.hybrid.QuerySettings.TYPE_FIELD;\n\n// TODO:\n// - Retriever name support\n\npublic class HybridRetrieverBuilder extends RetrieverBuilderWrapper {\n public static final String NAME = \"hybrid\";\n public static final ParseField FIELDS_FIELD = new ParseField(\"fields\");\n public static final ParseField QUERY_FIELD = new ParseField(\"query\");\n public static final ParseField RERANK_FIELD = new ParseField(\"rerank\");", + "repo_full_name": "elastic/elasticsearch", + "discussion_comments": [ + { + "comment_id": "2004202386", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 125052, + "pr_file": "x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/hybrid/HybridRetrieverBuilder.java", + "discussion_id": "2004202386", + "commented_code": "@@ -0,0 +1,373 @@\n+/*\n+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one\n+ * or more contributor license agreements. Licensed under the Elastic License\n+ * 2.0; you may not use this file except in compliance with the Elastic License\n+ * 2.0.\n+ */\n+\n+package org.elasticsearch.xpack.rank.hybrid;\n+\n+import org.elasticsearch.common.collect.ImmutableOpenMap;\n+import org.elasticsearch.common.xcontent.LoggingDeprecationHandler;\n+import org.elasticsearch.common.xcontent.support.XContentMapValues;\n+import org.elasticsearch.index.query.MatchQueryBuilder;\n+import org.elasticsearch.index.query.QueryBuilder;\n+import org.elasticsearch.search.rank.RankBuilder;\n+import org.elasticsearch.search.retriever.CompoundRetrieverBuilder;\n+import org.elasticsearch.search.retriever.RetrieverBuilder;\n+import org.elasticsearch.search.retriever.RetrieverBuilderWrapper;\n+import org.elasticsearch.search.retriever.RetrieverParserContext;\n+import org.elasticsearch.search.retriever.StandardRetrieverBuilder;\n+import org.elasticsearch.xcontent.ConstructingObjectParser;\n+import org.elasticsearch.xcontent.NamedXContentRegistry;\n+import org.elasticsearch.xcontent.ParseField;\n+import org.elasticsearch.xcontent.XContentBuilder;\n+import org.elasticsearch.xcontent.XContentParseException;\n+import org.elasticsearch.xcontent.XContentParser;\n+import org.elasticsearch.xcontent.support.MapXContentParser;\n+import org.elasticsearch.xpack.inference.rank.textsimilarity.TextSimilarityRankRetrieverBuilder;\n+import org.elasticsearch.xpack.rank.linear.LinearRetrieverBuilder;\n+import org.elasticsearch.xpack.rank.linear.MinMaxScoreNormalizer;\n+import org.elasticsearch.xpack.rank.linear.ScoreNormalizer;\n+\n+import java.io.IOException;\n+import java.util.ArrayList;\n+import java.util.Arrays;\n+import java.util.Collections;\n+import java.util.HashMap;\n+import java.util.List;\n+import java.util.Map;\n+import java.util.Objects;\n+\n+import static org.elasticsearch.search.retriever.CompoundRetrieverBuilder.RANK_WINDOW_SIZE_FIELD;\n+import static org.elasticsearch.xcontent.ConstructingObjectParser.constructorArg;\n+import static org.elasticsearch.xcontent.ConstructingObjectParser.optionalConstructorArg;\n+import static org.elasticsearch.xpack.rank.hybrid.QuerySettings.TYPE_FIELD;\n+\n+// TODO:\n+// - Retriever name support\n+\n+public class HybridRetrieverBuilder extends RetrieverBuilderWrapper {\n+ public static final String NAME = \"hybrid\";\n+ public static final ParseField FIELDS_FIELD = new ParseField(\"fields\");\n+ public static final ParseField QUERY_FIELD = new ParseField(\"query\");\n+ public static final ParseField RERANK_FIELD = new ParseField(\"rerank\");", + "comment_created_at": "2025-03-19T20:08:41+00:00", + "comment_author": "kderusso", + "comment_body": "I like that we incorporated `rerank` into this POC - I'd also like to see `rule` incorporated as I think business rules will be a critical part of the hybrid retriever. In that vein I wonder if there's something more generic we can do with the retrievers that we call - Something like this, that could be easily extended as we add any additional future retrievers or want more customization. \r\n\r\n```\r\nPOST wiki-index/_search\r\n{\r\n \"retriever\": {\r\n \"hybrid\": {\r\n \"fields\": [\"content\", \"content.semantic\"],\r\n \"query\": \"foo\",\r\n \"rank_modifiers\": [\r\n \"rule\": { \r\n ... \r\n },\r\n \"rerank\": {\r\n \"inference_id\": \"my-reranker-service\",\r\n \"field\": \"content\"\r\n }]\r\n }\r\n }\r\n}\r\n```", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2172738742", + "pr_number": 128923, + "pr_file": "x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/http/retry/BaseResponseHandler.java", + "created_at": "2025-06-27T19:33:47+00:00", + "commented_code": "}\n\n protected Exception buildError(String message, Request request, HttpResult result) {\n var errorEntityMsg = errorParseFunction.apply(result);\n return buildError(message, request, result, errorEntityMsg);\n var errorResponse = errorParseFunction.apply(result);\n return buildError(message, request, result, errorResponse);\n }\n\n protected Exception buildError(String message, Request request, HttpResult result, ErrorResponse errorResponse) {\n var responseStatusCode = result.response().getStatusLine().getStatusCode();\n return new ElasticsearchStatusException(\n errorMessage(message, request, result, errorResponse, responseStatusCode),\n errorMessage(message, request, errorResponse, responseStatusCode),\n toRestStatus(responseStatusCode)\n );\n }\n\n protected String errorMessage(String message, Request request, HttpResult result, ErrorResponse errorResponse, int statusCode) {\n /**\n * Builds an error for a streaming request with a custom error type.\n * This method is used when an error response is received from the external service.\n * Only streaming requests support this format, and it should be used when the error response.\n *\n * @param message the error message to include in the exception\n * @param request the request that caused the error\n * @param result the HTTP result containing the error response\n * @param errorResponse the parsed error response from the HTTP result\n * @param errorResponseClass the class of the expected error response type\n * @return an instance of {@link UnifiedChatCompletionException} with details from the error response\n */\n protected UnifiedChatCompletionException buildChatCompletionError(\n String message,\n Request request,\n HttpResult result,\n ErrorResponse errorResponse,\n Class errorResponseClass\n ) {\n assert request.isStreaming() : \"Only streaming requests support this format\";\n var statusCode = result.response().getStatusLine().getStatusCode();\n var errorMessage = errorMessage(message, request, errorResponse, statusCode);\n var restStatus = toRestStatus(statusCode);\n\n return buildChatCompletionError(errorResponse, errorMessage, restStatus, errorResponseClass);\n }\n\n /**\n * Builds a {@link UnifiedChatCompletionException} for a streaming request.\n * This method is used when an error response is received from the external service.\n * Only streaming requests should use this method.\n *\n * @param errorResponse the error response parsed from the HTTP result\n * @param errorMessage the error message to include in the exception\n * @param restStatus the REST status code of the response\n * @param errorResponseClass the class of the expected error response type\n * @return an instance of {@link UnifiedChatCompletionException} with details from the error response\n */\n protected UnifiedChatCompletionException buildChatCompletionError(\n ErrorResponse errorResponse,\n String errorMessage,\n RestStatus restStatus,\n Class errorResponseClass\n ) {\n if (errorResponseClass.isInstance(errorResponse)) {\n return buildProviderSpecificChatCompletionError(errorResponse, errorMessage, restStatus);\n } else {\n return buildDefaultChatCompletionError(errorResponse, errorMessage, restStatus);\n }\n }\n\n /**\n * Builds a custom {@link UnifiedChatCompletionException} for a streaming request.\n * This method is called when a specific error response is found in the HTTP result.\n * It must be implemented by subclasses to handle specific error response formats.\n * Only streaming requests should use this method.\n *\n * @param errorResponse the error response parsed from the HTTP result\n * @param errorMessage the error message to include in the exception\n * @param restStatus the REST status code of the response\n * @return an instance of {@link UnifiedChatCompletionException} with details from the error response\n */\n protected UnifiedChatCompletionException buildProviderSpecificChatCompletionError(\n ErrorResponse errorResponse,\n String errorMessage,\n RestStatus restStatus\n ) {\n throw new UnsupportedOperationException(\n \"Custom error handling is not implemented. Please override buildProviderSpecificChatCompletionError method.\"\n );\n }\n\n /**\n * Builds a default {@link UnifiedChatCompletionException} for a streaming request.\n * This method is used when an error response is received but no specific error handling is implemented.\n * Only streaming requests should use this method.\n *\n * @param errorResponse the error response parsed from the HTTP result\n * @param errorMessage the error message to include in the exception\n * @param restStatus the REST status code of the response\n * @return an instance of {@link UnifiedChatCompletionException} with details from the error response\n */\n protected UnifiedChatCompletionException buildDefaultChatCompletionError(\n ErrorResponse errorResponse,\n String errorMessage,\n RestStatus restStatus\n ) {\n return new UnifiedChatCompletionException(\n restStatus,\n errorMessage,\n createErrorType(errorResponse),\n restStatus.name().toLowerCase(Locale.ROOT)\n );\n }\n\n /**\n * Builds a mid-stream error for a streaming request.\n * This method is used when an error occurs while processing a streaming response.\n * It must be implemented by subclasses to handle specific error response formats.\n * Only streaming requests should use this method.\n *\n * @param inferenceEntityId the ID of the inference entity\n * @param message the error message\n * @param e the exception that caused the error, can be null\n * @return a {@link UnifiedChatCompletionException} representing the mid-stream error\n */\n public UnifiedChatCompletionException buildMidStreamChatCompletionError(String inferenceEntityId, String message, Exception e) {\n throw new UnsupportedOperationException(\n \"Mid-stream error handling is not implemented. Please override buildMidStreamChatCompletionError method.\"\n );\n }\n\n /**\n * Builds a mid-stream error for a streaming request with a custom error type.\n * This method is used when an error occurs while processing a streaming response and allows for custom error handling.\n * Only streaming requests should use this method.\n *\n * @param inferenceEntityId the ID of the inference entity\n * @param message the error message\n * @param e the exception that caused the error, can be null\n * @param errorResponseClass the class of the expected error response type\n * @return a {@link UnifiedChatCompletionException} representing the mid-stream error\n */\n protected UnifiedChatCompletionException buildMidStreamChatCompletionError(\n String inferenceEntityId,\n String message,\n Exception e,\n Class errorResponseClass", + "repo_full_name": "elastic/elasticsearch", + "discussion_comments": [ + { + "comment_id": "2172738742", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 128923, + "pr_file": "x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/http/retry/BaseResponseHandler.java", + "discussion_id": "2172738742", + "commented_code": "@@ -109,19 +117,230 @@ private void checkForErrorObject(Request request, HttpResult result) {\n }\n \n protected Exception buildError(String message, Request request, HttpResult result) {\n- var errorEntityMsg = errorParseFunction.apply(result);\n- return buildError(message, request, result, errorEntityMsg);\n+ var errorResponse = errorParseFunction.apply(result);\n+ return buildError(message, request, result, errorResponse);\n }\n \n protected Exception buildError(String message, Request request, HttpResult result, ErrorResponse errorResponse) {\n var responseStatusCode = result.response().getStatusLine().getStatusCode();\n return new ElasticsearchStatusException(\n- errorMessage(message, request, result, errorResponse, responseStatusCode),\n+ errorMessage(message, request, errorResponse, responseStatusCode),\n toRestStatus(responseStatusCode)\n );\n }\n \n- protected String errorMessage(String message, Request request, HttpResult result, ErrorResponse errorResponse, int statusCode) {\n+ /**\n+ * Builds an error for a streaming request with a custom error type.\n+ * This method is used when an error response is received from the external service.\n+ * Only streaming requests support this format, and it should be used when the error response.\n+ *\n+ * @param message the error message to include in the exception\n+ * @param request the request that caused the error\n+ * @param result the HTTP result containing the error response\n+ * @param errorResponse the parsed error response from the HTTP result\n+ * @param errorResponseClass the class of the expected error response type\n+ * @return an instance of {@link UnifiedChatCompletionException} with details from the error response\n+ */\n+ protected UnifiedChatCompletionException buildChatCompletionError(\n+ String message,\n+ Request request,\n+ HttpResult result,\n+ ErrorResponse errorResponse,\n+ Class errorResponseClass\n+ ) {\n+ assert request.isStreaming() : \"Only streaming requests support this format\";\n+ var statusCode = result.response().getStatusLine().getStatusCode();\n+ var errorMessage = errorMessage(message, request, errorResponse, statusCode);\n+ var restStatus = toRestStatus(statusCode);\n+\n+ return buildChatCompletionError(errorResponse, errorMessage, restStatus, errorResponseClass);\n+ }\n+\n+ /**\n+ * Builds a {@link UnifiedChatCompletionException} for a streaming request.\n+ * This method is used when an error response is received from the external service.\n+ * Only streaming requests should use this method.\n+ *\n+ * @param errorResponse the error response parsed from the HTTP result\n+ * @param errorMessage the error message to include in the exception\n+ * @param restStatus the REST status code of the response\n+ * @param errorResponseClass the class of the expected error response type\n+ * @return an instance of {@link UnifiedChatCompletionException} with details from the error response\n+ */\n+ protected UnifiedChatCompletionException buildChatCompletionError(\n+ ErrorResponse errorResponse,\n+ String errorMessage,\n+ RestStatus restStatus,\n+ Class errorResponseClass\n+ ) {\n+ if (errorResponseClass.isInstance(errorResponse)) {\n+ return buildProviderSpecificChatCompletionError(errorResponse, errorMessage, restStatus);\n+ } else {\n+ return buildDefaultChatCompletionError(errorResponse, errorMessage, restStatus);\n+ }\n+ }\n+\n+ /**\n+ * Builds a custom {@link UnifiedChatCompletionException} for a streaming request.\n+ * This method is called when a specific error response is found in the HTTP result.\n+ * It must be implemented by subclasses to handle specific error response formats.\n+ * Only streaming requests should use this method.\n+ *\n+ * @param errorResponse the error response parsed from the HTTP result\n+ * @param errorMessage the error message to include in the exception\n+ * @param restStatus the REST status code of the response\n+ * @return an instance of {@link UnifiedChatCompletionException} with details from the error response\n+ */\n+ protected UnifiedChatCompletionException buildProviderSpecificChatCompletionError(\n+ ErrorResponse errorResponse,\n+ String errorMessage,\n+ RestStatus restStatus\n+ ) {\n+ throw new UnsupportedOperationException(\n+ \"Custom error handling is not implemented. Please override buildProviderSpecificChatCompletionError method.\"\n+ );\n+ }\n+\n+ /**\n+ * Builds a default {@link UnifiedChatCompletionException} for a streaming request.\n+ * This method is used when an error response is received but no specific error handling is implemented.\n+ * Only streaming requests should use this method.\n+ *\n+ * @param errorResponse the error response parsed from the HTTP result\n+ * @param errorMessage the error message to include in the exception\n+ * @param restStatus the REST status code of the response\n+ * @return an instance of {@link UnifiedChatCompletionException} with details from the error response\n+ */\n+ protected UnifiedChatCompletionException buildDefaultChatCompletionError(\n+ ErrorResponse errorResponse,\n+ String errorMessage,\n+ RestStatus restStatus\n+ ) {\n+ return new UnifiedChatCompletionException(\n+ restStatus,\n+ errorMessage,\n+ createErrorType(errorResponse),\n+ restStatus.name().toLowerCase(Locale.ROOT)\n+ );\n+ }\n+\n+ /**\n+ * Builds a mid-stream error for a streaming request.\n+ * This method is used when an error occurs while processing a streaming response.\n+ * It must be implemented by subclasses to handle specific error response formats.\n+ * Only streaming requests should use this method.\n+ *\n+ * @param inferenceEntityId the ID of the inference entity\n+ * @param message the error message\n+ * @param e the exception that caused the error, can be null\n+ * @return a {@link UnifiedChatCompletionException} representing the mid-stream error\n+ */\n+ public UnifiedChatCompletionException buildMidStreamChatCompletionError(String inferenceEntityId, String message, Exception e) {\n+ throw new UnsupportedOperationException(\n+ \"Mid-stream error handling is not implemented. Please override buildMidStreamChatCompletionError method.\"\n+ );\n+ }\n+\n+ /**\n+ * Builds a mid-stream error for a streaming request with a custom error type.\n+ * This method is used when an error occurs while processing a streaming response and allows for custom error handling.\n+ * Only streaming requests should use this method.\n+ *\n+ * @param inferenceEntityId the ID of the inference entity\n+ * @param message the error message\n+ * @param e the exception that caused the error, can be null\n+ * @param errorResponseClass the class of the expected error response type\n+ * @return a {@link UnifiedChatCompletionException} representing the mid-stream error\n+ */\n+ protected UnifiedChatCompletionException buildMidStreamChatCompletionError(\n+ String inferenceEntityId,\n+ String message,\n+ Exception e,\n+ Class errorResponseClass", + "comment_created_at": "2025-06-27T19:33:47+00:00", + "comment_author": "jonathan-buttner", + "comment_body": "Instead of passing in a class here how about we either:\r\n\r\n- Take a function that accepts a string and returns some error class\r\n- Take a builder-like interface that has a method that accepts a string and returns an error class", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/elasticsearch-design-for-evolution.md b/_reviewers/elasticsearch-design-for-evolution.md index 5313352..cc876e9 100644 --- a/_reviewers/elasticsearch-design-for-evolution.md +++ b/_reviewers/elasticsearch-design-for-evolution.md @@ -51,85 +51,3 @@ For extensible APIs, consider using collections or generic structures that can b ``` This approach makes APIs more resilient to changing requirements and easier to extend without breaking compatibility. - - -[ - { - "discussion_id": "2162056413", - "pr_number": 129872, - "pr_file": "server/src/main/java/org/elasticsearch/action/admin/cluster/state/RemoteClusterStateRequest.java", - "created_at": "2025-06-23T16:51:34+00:00", - "commented_code": "/*\n * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one\n * or more contributor license agreements. Licensed under the \"Elastic License\n * 2.0\", the \"GNU Affero General Public License v3.0 only\", and the \"Server Side\n * Public License v 1\"; you may not use this file except in compliance with, at\n * your election, the \"Elastic License 2.0\", the \"GNU Affero General Public\n * License v3.0 only\", or the \"Server Side Public License, v 1\".\n */\n\npackage org.elasticsearch.action.admin.cluster.state;\n\nimport org.elasticsearch.TransportVersions;\nimport org.elasticsearch.action.ActionRequest;\nimport org.elasticsearch.action.ActionRequestValidationException;\nimport org.elasticsearch.common.io.stream.StreamInput;\nimport org.elasticsearch.common.io.stream.StreamOutput;\nimport org.elasticsearch.tasks.CancellableTask;\nimport org.elasticsearch.tasks.Task;\nimport org.elasticsearch.tasks.TaskId;\n\nimport java.io.IOException;\nimport java.util.Map;\n\n/**\n * A remote-only version of {@link ClusterStateRequest} that should be used for cross-cluster requests.\n * It simply exists to handle incoming remote requests and forward them to the local transport action.\n */\npublic class RemoteClusterStateRequest extends ActionRequest {\n\n private final ClusterStateRequest clusterStateRequest;", - "repo_full_name": "elastic/elasticsearch", - "discussion_comments": [ - { - "comment_id": "2162056413", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 129872, - "pr_file": "server/src/main/java/org/elasticsearch/action/admin/cluster/state/RemoteClusterStateRequest.java", - "discussion_id": "2162056413", - "commented_code": "@@ -0,0 +1,77 @@\n+/*\n+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one\n+ * or more contributor license agreements. Licensed under the \"Elastic License\n+ * 2.0\", the \"GNU Affero General Public License v3.0 only\", and the \"Server Side\n+ * Public License v 1\"; you may not use this file except in compliance with, at\n+ * your election, the \"Elastic License 2.0\", the \"GNU Affero General Public\n+ * License v3.0 only\", or the \"Server Side Public License, v 1\".\n+ */\n+\n+package org.elasticsearch.action.admin.cluster.state;\n+\n+import org.elasticsearch.TransportVersions;\n+import org.elasticsearch.action.ActionRequest;\n+import org.elasticsearch.action.ActionRequestValidationException;\n+import org.elasticsearch.common.io.stream.StreamInput;\n+import org.elasticsearch.common.io.stream.StreamOutput;\n+import org.elasticsearch.tasks.CancellableTask;\n+import org.elasticsearch.tasks.Task;\n+import org.elasticsearch.tasks.TaskId;\n+\n+import java.io.IOException;\n+import java.util.Map;\n+\n+/**\n+ * A remote-only version of {@link ClusterStateRequest} that should be used for cross-cluster requests.\n+ * It simply exists to handle incoming remote requests and forward them to the local transport action.\n+ */\n+public class RemoteClusterStateRequest extends ActionRequest {\n+\n+ private final ClusterStateRequest clusterStateRequest;", - "comment_created_at": "2025-06-23T16:51:34+00:00", - "comment_author": "nielsbauman", - "comment_body": "Suggestions other than wrapping the whole request itself like this are welcome too.", - "pr_file_module": null - }, - { - "comment_id": "2179881540", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 129872, - "pr_file": "server/src/main/java/org/elasticsearch/action/admin/cluster/state/RemoteClusterStateRequest.java", - "discussion_id": "2162056413", - "commented_code": "@@ -0,0 +1,77 @@\n+/*\n+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one\n+ * or more contributor license agreements. Licensed under the \"Elastic License\n+ * 2.0\", the \"GNU Affero General Public License v3.0 only\", and the \"Server Side\n+ * Public License v 1\"; you may not use this file except in compliance with, at\n+ * your election, the \"Elastic License 2.0\", the \"GNU Affero General Public\n+ * License v3.0 only\", or the \"Server Side Public License, v 1\".\n+ */\n+\n+package org.elasticsearch.action.admin.cluster.state;\n+\n+import org.elasticsearch.TransportVersions;\n+import org.elasticsearch.action.ActionRequest;\n+import org.elasticsearch.action.ActionRequestValidationException;\n+import org.elasticsearch.common.io.stream.StreamInput;\n+import org.elasticsearch.common.io.stream.StreamOutput;\n+import org.elasticsearch.tasks.CancellableTask;\n+import org.elasticsearch.tasks.Task;\n+import org.elasticsearch.tasks.TaskId;\n+\n+import java.io.IOException;\n+import java.util.Map;\n+\n+/**\n+ * A remote-only version of {@link ClusterStateRequest} that should be used for cross-cluster requests.\n+ * It simply exists to handle incoming remote requests and forward them to the local transport action.\n+ */\n+public class RemoteClusterStateRequest extends ActionRequest {\n+\n+ private final ClusterStateRequest clusterStateRequest;", - "comment_created_at": "2025-07-02T12:05:18+00:00", - "comment_author": "DaveCTurner", - "comment_body": "I think I'd be inclined to duplicate all the fields rather than wrap up the local-only request, so that these things may diverge in future.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2004202386", - "pr_number": 125052, - "pr_file": "x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/hybrid/HybridRetrieverBuilder.java", - "created_at": "2025-03-19T20:08:41+00:00", - "commented_code": "/*\n * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one\n * or more contributor license agreements. Licensed under the Elastic License\n * 2.0; you may not use this file except in compliance with the Elastic License\n * 2.0.\n */\n\npackage org.elasticsearch.xpack.rank.hybrid;\n\nimport org.elasticsearch.common.collect.ImmutableOpenMap;\nimport org.elasticsearch.common.xcontent.LoggingDeprecationHandler;\nimport org.elasticsearch.common.xcontent.support.XContentMapValues;\nimport org.elasticsearch.index.query.MatchQueryBuilder;\nimport org.elasticsearch.index.query.QueryBuilder;\nimport org.elasticsearch.search.rank.RankBuilder;\nimport org.elasticsearch.search.retriever.CompoundRetrieverBuilder;\nimport org.elasticsearch.search.retriever.RetrieverBuilder;\nimport org.elasticsearch.search.retriever.RetrieverBuilderWrapper;\nimport org.elasticsearch.search.retriever.RetrieverParserContext;\nimport org.elasticsearch.search.retriever.StandardRetrieverBuilder;\nimport org.elasticsearch.xcontent.ConstructingObjectParser;\nimport org.elasticsearch.xcontent.NamedXContentRegistry;\nimport org.elasticsearch.xcontent.ParseField;\nimport org.elasticsearch.xcontent.XContentBuilder;\nimport org.elasticsearch.xcontent.XContentParseException;\nimport org.elasticsearch.xcontent.XContentParser;\nimport org.elasticsearch.xcontent.support.MapXContentParser;\nimport org.elasticsearch.xpack.inference.rank.textsimilarity.TextSimilarityRankRetrieverBuilder;\nimport org.elasticsearch.xpack.rank.linear.LinearRetrieverBuilder;\nimport org.elasticsearch.xpack.rank.linear.MinMaxScoreNormalizer;\nimport org.elasticsearch.xpack.rank.linear.ScoreNormalizer;\n\nimport java.io.IOException;\nimport java.util.ArrayList;\nimport java.util.Arrays;\nimport java.util.Collections;\nimport java.util.HashMap;\nimport java.util.List;\nimport java.util.Map;\nimport java.util.Objects;\n\nimport static org.elasticsearch.search.retriever.CompoundRetrieverBuilder.RANK_WINDOW_SIZE_FIELD;\nimport static org.elasticsearch.xcontent.ConstructingObjectParser.constructorArg;\nimport static org.elasticsearch.xcontent.ConstructingObjectParser.optionalConstructorArg;\nimport static org.elasticsearch.xpack.rank.hybrid.QuerySettings.TYPE_FIELD;\n\n// TODO:\n// - Retriever name support\n\npublic class HybridRetrieverBuilder extends RetrieverBuilderWrapper {\n public static final String NAME = \"hybrid\";\n public static final ParseField FIELDS_FIELD = new ParseField(\"fields\");\n public static final ParseField QUERY_FIELD = new ParseField(\"query\");\n public static final ParseField RERANK_FIELD = new ParseField(\"rerank\");", - "repo_full_name": "elastic/elasticsearch", - "discussion_comments": [ - { - "comment_id": "2004202386", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 125052, - "pr_file": "x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/hybrid/HybridRetrieverBuilder.java", - "discussion_id": "2004202386", - "commented_code": "@@ -0,0 +1,373 @@\n+/*\n+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one\n+ * or more contributor license agreements. Licensed under the Elastic License\n+ * 2.0; you may not use this file except in compliance with the Elastic License\n+ * 2.0.\n+ */\n+\n+package org.elasticsearch.xpack.rank.hybrid;\n+\n+import org.elasticsearch.common.collect.ImmutableOpenMap;\n+import org.elasticsearch.common.xcontent.LoggingDeprecationHandler;\n+import org.elasticsearch.common.xcontent.support.XContentMapValues;\n+import org.elasticsearch.index.query.MatchQueryBuilder;\n+import org.elasticsearch.index.query.QueryBuilder;\n+import org.elasticsearch.search.rank.RankBuilder;\n+import org.elasticsearch.search.retriever.CompoundRetrieverBuilder;\n+import org.elasticsearch.search.retriever.RetrieverBuilder;\n+import org.elasticsearch.search.retriever.RetrieverBuilderWrapper;\n+import org.elasticsearch.search.retriever.RetrieverParserContext;\n+import org.elasticsearch.search.retriever.StandardRetrieverBuilder;\n+import org.elasticsearch.xcontent.ConstructingObjectParser;\n+import org.elasticsearch.xcontent.NamedXContentRegistry;\n+import org.elasticsearch.xcontent.ParseField;\n+import org.elasticsearch.xcontent.XContentBuilder;\n+import org.elasticsearch.xcontent.XContentParseException;\n+import org.elasticsearch.xcontent.XContentParser;\n+import org.elasticsearch.xcontent.support.MapXContentParser;\n+import org.elasticsearch.xpack.inference.rank.textsimilarity.TextSimilarityRankRetrieverBuilder;\n+import org.elasticsearch.xpack.rank.linear.LinearRetrieverBuilder;\n+import org.elasticsearch.xpack.rank.linear.MinMaxScoreNormalizer;\n+import org.elasticsearch.xpack.rank.linear.ScoreNormalizer;\n+\n+import java.io.IOException;\n+import java.util.ArrayList;\n+import java.util.Arrays;\n+import java.util.Collections;\n+import java.util.HashMap;\n+import java.util.List;\n+import java.util.Map;\n+import java.util.Objects;\n+\n+import static org.elasticsearch.search.retriever.CompoundRetrieverBuilder.RANK_WINDOW_SIZE_FIELD;\n+import static org.elasticsearch.xcontent.ConstructingObjectParser.constructorArg;\n+import static org.elasticsearch.xcontent.ConstructingObjectParser.optionalConstructorArg;\n+import static org.elasticsearch.xpack.rank.hybrid.QuerySettings.TYPE_FIELD;\n+\n+// TODO:\n+// - Retriever name support\n+\n+public class HybridRetrieverBuilder extends RetrieverBuilderWrapper {\n+ public static final String NAME = \"hybrid\";\n+ public static final ParseField FIELDS_FIELD = new ParseField(\"fields\");\n+ public static final ParseField QUERY_FIELD = new ParseField(\"query\");\n+ public static final ParseField RERANK_FIELD = new ParseField(\"rerank\");", - "comment_created_at": "2025-03-19T20:08:41+00:00", - "comment_author": "kderusso", - "comment_body": "I like that we incorporated `rerank` into this POC - I'd also like to see `rule` incorporated as I think business rules will be a critical part of the hybrid retriever. In that vein I wonder if there's something more generic we can do with the retrievers that we call - Something like this, that could be easily extended as we add any additional future retrievers or want more customization. \r\n\r\n```\r\nPOST wiki-index/_search\r\n{\r\n \"retriever\": {\r\n \"hybrid\": {\r\n \"fields\": [\"content\", \"content.semantic\"],\r\n \"query\": \"foo\",\r\n \"rank_modifiers\": [\r\n \"rule\": { \r\n ... \r\n },\r\n \"rerank\": {\r\n \"inference_id\": \"my-reranker-service\",\r\n \"field\": \"content\"\r\n }]\r\n }\r\n }\r\n}\r\n```", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2172738742", - "pr_number": 128923, - "pr_file": "x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/http/retry/BaseResponseHandler.java", - "created_at": "2025-06-27T19:33:47+00:00", - "commented_code": "}\n\n protected Exception buildError(String message, Request request, HttpResult result) {\n var errorEntityMsg = errorParseFunction.apply(result);\n return buildError(message, request, result, errorEntityMsg);\n var errorResponse = errorParseFunction.apply(result);\n return buildError(message, request, result, errorResponse);\n }\n\n protected Exception buildError(String message, Request request, HttpResult result, ErrorResponse errorResponse) {\n var responseStatusCode = result.response().getStatusLine().getStatusCode();\n return new ElasticsearchStatusException(\n errorMessage(message, request, result, errorResponse, responseStatusCode),\n errorMessage(message, request, errorResponse, responseStatusCode),\n toRestStatus(responseStatusCode)\n );\n }\n\n protected String errorMessage(String message, Request request, HttpResult result, ErrorResponse errorResponse, int statusCode) {\n /**\n * Builds an error for a streaming request with a custom error type.\n * This method is used when an error response is received from the external service.\n * Only streaming requests support this format, and it should be used when the error response.\n *\n * @param message the error message to include in the exception\n * @param request the request that caused the error\n * @param result the HTTP result containing the error response\n * @param errorResponse the parsed error response from the HTTP result\n * @param errorResponseClass the class of the expected error response type\n * @return an instance of {@link UnifiedChatCompletionException} with details from the error response\n */\n protected UnifiedChatCompletionException buildChatCompletionError(\n String message,\n Request request,\n HttpResult result,\n ErrorResponse errorResponse,\n Class errorResponseClass\n ) {\n assert request.isStreaming() : \"Only streaming requests support this format\";\n var statusCode = result.response().getStatusLine().getStatusCode();\n var errorMessage = errorMessage(message, request, errorResponse, statusCode);\n var restStatus = toRestStatus(statusCode);\n\n return buildChatCompletionError(errorResponse, errorMessage, restStatus, errorResponseClass);\n }\n\n /**\n * Builds a {@link UnifiedChatCompletionException} for a streaming request.\n * This method is used when an error response is received from the external service.\n * Only streaming requests should use this method.\n *\n * @param errorResponse the error response parsed from the HTTP result\n * @param errorMessage the error message to include in the exception\n * @param restStatus the REST status code of the response\n * @param errorResponseClass the class of the expected error response type\n * @return an instance of {@link UnifiedChatCompletionException} with details from the error response\n */\n protected UnifiedChatCompletionException buildChatCompletionError(\n ErrorResponse errorResponse,\n String errorMessage,\n RestStatus restStatus,\n Class errorResponseClass\n ) {\n if (errorResponseClass.isInstance(errorResponse)) {\n return buildProviderSpecificChatCompletionError(errorResponse, errorMessage, restStatus);\n } else {\n return buildDefaultChatCompletionError(errorResponse, errorMessage, restStatus);\n }\n }\n\n /**\n * Builds a custom {@link UnifiedChatCompletionException} for a streaming request.\n * This method is called when a specific error response is found in the HTTP result.\n * It must be implemented by subclasses to handle specific error response formats.\n * Only streaming requests should use this method.\n *\n * @param errorResponse the error response parsed from the HTTP result\n * @param errorMessage the error message to include in the exception\n * @param restStatus the REST status code of the response\n * @return an instance of {@link UnifiedChatCompletionException} with details from the error response\n */\n protected UnifiedChatCompletionException buildProviderSpecificChatCompletionError(\n ErrorResponse errorResponse,\n String errorMessage,\n RestStatus restStatus\n ) {\n throw new UnsupportedOperationException(\n \"Custom error handling is not implemented. Please override buildProviderSpecificChatCompletionError method.\"\n );\n }\n\n /**\n * Builds a default {@link UnifiedChatCompletionException} for a streaming request.\n * This method is used when an error response is received but no specific error handling is implemented.\n * Only streaming requests should use this method.\n *\n * @param errorResponse the error response parsed from the HTTP result\n * @param errorMessage the error message to include in the exception\n * @param restStatus the REST status code of the response\n * @return an instance of {@link UnifiedChatCompletionException} with details from the error response\n */\n protected UnifiedChatCompletionException buildDefaultChatCompletionError(\n ErrorResponse errorResponse,\n String errorMessage,\n RestStatus restStatus\n ) {\n return new UnifiedChatCompletionException(\n restStatus,\n errorMessage,\n createErrorType(errorResponse),\n restStatus.name().toLowerCase(Locale.ROOT)\n );\n }\n\n /**\n * Builds a mid-stream error for a streaming request.\n * This method is used when an error occurs while processing a streaming response.\n * It must be implemented by subclasses to handle specific error response formats.\n * Only streaming requests should use this method.\n *\n * @param inferenceEntityId the ID of the inference entity\n * @param message the error message\n * @param e the exception that caused the error, can be null\n * @return a {@link UnifiedChatCompletionException} representing the mid-stream error\n */\n public UnifiedChatCompletionException buildMidStreamChatCompletionError(String inferenceEntityId, String message, Exception e) {\n throw new UnsupportedOperationException(\n \"Mid-stream error handling is not implemented. Please override buildMidStreamChatCompletionError method.\"\n );\n }\n\n /**\n * Builds a mid-stream error for a streaming request with a custom error type.\n * This method is used when an error occurs while processing a streaming response and allows for custom error handling.\n * Only streaming requests should use this method.\n *\n * @param inferenceEntityId the ID of the inference entity\n * @param message the error message\n * @param e the exception that caused the error, can be null\n * @param errorResponseClass the class of the expected error response type\n * @return a {@link UnifiedChatCompletionException} representing the mid-stream error\n */\n protected UnifiedChatCompletionException buildMidStreamChatCompletionError(\n String inferenceEntityId,\n String message,\n Exception e,\n Class errorResponseClass", - "repo_full_name": "elastic/elasticsearch", - "discussion_comments": [ - { - "comment_id": "2172738742", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 128923, - "pr_file": "x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/http/retry/BaseResponseHandler.java", - "discussion_id": "2172738742", - "commented_code": "@@ -109,19 +117,230 @@ private void checkForErrorObject(Request request, HttpResult result) {\n }\n \n protected Exception buildError(String message, Request request, HttpResult result) {\n- var errorEntityMsg = errorParseFunction.apply(result);\n- return buildError(message, request, result, errorEntityMsg);\n+ var errorResponse = errorParseFunction.apply(result);\n+ return buildError(message, request, result, errorResponse);\n }\n \n protected Exception buildError(String message, Request request, HttpResult result, ErrorResponse errorResponse) {\n var responseStatusCode = result.response().getStatusLine().getStatusCode();\n return new ElasticsearchStatusException(\n- errorMessage(message, request, result, errorResponse, responseStatusCode),\n+ errorMessage(message, request, errorResponse, responseStatusCode),\n toRestStatus(responseStatusCode)\n );\n }\n \n- protected String errorMessage(String message, Request request, HttpResult result, ErrorResponse errorResponse, int statusCode) {\n+ /**\n+ * Builds an error for a streaming request with a custom error type.\n+ * This method is used when an error response is received from the external service.\n+ * Only streaming requests support this format, and it should be used when the error response.\n+ *\n+ * @param message the error message to include in the exception\n+ * @param request the request that caused the error\n+ * @param result the HTTP result containing the error response\n+ * @param errorResponse the parsed error response from the HTTP result\n+ * @param errorResponseClass the class of the expected error response type\n+ * @return an instance of {@link UnifiedChatCompletionException} with details from the error response\n+ */\n+ protected UnifiedChatCompletionException buildChatCompletionError(\n+ String message,\n+ Request request,\n+ HttpResult result,\n+ ErrorResponse errorResponse,\n+ Class errorResponseClass\n+ ) {\n+ assert request.isStreaming() : \"Only streaming requests support this format\";\n+ var statusCode = result.response().getStatusLine().getStatusCode();\n+ var errorMessage = errorMessage(message, request, errorResponse, statusCode);\n+ var restStatus = toRestStatus(statusCode);\n+\n+ return buildChatCompletionError(errorResponse, errorMessage, restStatus, errorResponseClass);\n+ }\n+\n+ /**\n+ * Builds a {@link UnifiedChatCompletionException} for a streaming request.\n+ * This method is used when an error response is received from the external service.\n+ * Only streaming requests should use this method.\n+ *\n+ * @param errorResponse the error response parsed from the HTTP result\n+ * @param errorMessage the error message to include in the exception\n+ * @param restStatus the REST status code of the response\n+ * @param errorResponseClass the class of the expected error response type\n+ * @return an instance of {@link UnifiedChatCompletionException} with details from the error response\n+ */\n+ protected UnifiedChatCompletionException buildChatCompletionError(\n+ ErrorResponse errorResponse,\n+ String errorMessage,\n+ RestStatus restStatus,\n+ Class errorResponseClass\n+ ) {\n+ if (errorResponseClass.isInstance(errorResponse)) {\n+ return buildProviderSpecificChatCompletionError(errorResponse, errorMessage, restStatus);\n+ } else {\n+ return buildDefaultChatCompletionError(errorResponse, errorMessage, restStatus);\n+ }\n+ }\n+\n+ /**\n+ * Builds a custom {@link UnifiedChatCompletionException} for a streaming request.\n+ * This method is called when a specific error response is found in the HTTP result.\n+ * It must be implemented by subclasses to handle specific error response formats.\n+ * Only streaming requests should use this method.\n+ *\n+ * @param errorResponse the error response parsed from the HTTP result\n+ * @param errorMessage the error message to include in the exception\n+ * @param restStatus the REST status code of the response\n+ * @return an instance of {@link UnifiedChatCompletionException} with details from the error response\n+ */\n+ protected UnifiedChatCompletionException buildProviderSpecificChatCompletionError(\n+ ErrorResponse errorResponse,\n+ String errorMessage,\n+ RestStatus restStatus\n+ ) {\n+ throw new UnsupportedOperationException(\n+ \"Custom error handling is not implemented. Please override buildProviderSpecificChatCompletionError method.\"\n+ );\n+ }\n+\n+ /**\n+ * Builds a default {@link UnifiedChatCompletionException} for a streaming request.\n+ * This method is used when an error response is received but no specific error handling is implemented.\n+ * Only streaming requests should use this method.\n+ *\n+ * @param errorResponse the error response parsed from the HTTP result\n+ * @param errorMessage the error message to include in the exception\n+ * @param restStatus the REST status code of the response\n+ * @return an instance of {@link UnifiedChatCompletionException} with details from the error response\n+ */\n+ protected UnifiedChatCompletionException buildDefaultChatCompletionError(\n+ ErrorResponse errorResponse,\n+ String errorMessage,\n+ RestStatus restStatus\n+ ) {\n+ return new UnifiedChatCompletionException(\n+ restStatus,\n+ errorMessage,\n+ createErrorType(errorResponse),\n+ restStatus.name().toLowerCase(Locale.ROOT)\n+ );\n+ }\n+\n+ /**\n+ * Builds a mid-stream error for a streaming request.\n+ * This method is used when an error occurs while processing a streaming response.\n+ * It must be implemented by subclasses to handle specific error response formats.\n+ * Only streaming requests should use this method.\n+ *\n+ * @param inferenceEntityId the ID of the inference entity\n+ * @param message the error message\n+ * @param e the exception that caused the error, can be null\n+ * @return a {@link UnifiedChatCompletionException} representing the mid-stream error\n+ */\n+ public UnifiedChatCompletionException buildMidStreamChatCompletionError(String inferenceEntityId, String message, Exception e) {\n+ throw new UnsupportedOperationException(\n+ \"Mid-stream error handling is not implemented. Please override buildMidStreamChatCompletionError method.\"\n+ );\n+ }\n+\n+ /**\n+ * Builds a mid-stream error for a streaming request with a custom error type.\n+ * This method is used when an error occurs while processing a streaming response and allows for custom error handling.\n+ * Only streaming requests should use this method.\n+ *\n+ * @param inferenceEntityId the ID of the inference entity\n+ * @param message the error message\n+ * @param e the exception that caused the error, can be null\n+ * @param errorResponseClass the class of the expected error response type\n+ * @return a {@link UnifiedChatCompletionException} representing the mid-stream error\n+ */\n+ protected UnifiedChatCompletionException buildMidStreamChatCompletionError(\n+ String inferenceEntityId,\n+ String message,\n+ Exception e,\n+ Class errorResponseClass", - "comment_created_at": "2025-06-27T19:33:47+00:00", - "comment_author": "jonathan-buttner", - "comment_body": "Instead of passing in a class here how about we either:\r\n\r\n- Take a function that accepts a string and returns some error class\r\n- Take a builder-like interface that has a method that accepts a string and returns an error class", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/elasticsearch-document-performance-tradeoffs.json b/_reviewers/elasticsearch-document-performance-tradeoffs.json new file mode 100644 index 0000000..e1891e1 --- /dev/null +++ b/_reviewers/elasticsearch-document-performance-tradeoffs.json @@ -0,0 +1,106 @@ +[ + { + "discussion_id": "1627639397", + "pr_number": 109390, + "pr_file": "docs/reference/synonyms/apis/put-synonyms-set.asciidoc", + "created_at": "2024-06-05T12:09:05+00:00", + "commented_code": "Creates or updates a synonyms set.\n\nNOTE: Synonyms sets are limited to a maximum of 10,000 synonym rules per set.\nSynonym sets with more than 10,000 synonym rules will provide inconsistent search results.\nNOTE: Synonyms sets are limited to a maximum of 100,000 synonym rules per set.", + "repo_full_name": "elastic/elasticsearch", + "discussion_comments": [ + { + "comment_id": "1627639397", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 109390, + "pr_file": "docs/reference/synonyms/apis/put-synonyms-set.asciidoc", + "discussion_id": "1627639397", + "commented_code": "@@ -7,8 +7,7 @@\n \n Creates or updates a synonyms set.\n \n-NOTE: Synonyms sets are limited to a maximum of 10,000 synonym rules per set.\n-Synonym sets with more than 10,000 synonym rules will provide inconsistent search results.\n+NOTE: Synonyms sets are limited to a maximum of 100,000 synonym rules per set.", + "comment_created_at": "2024-06-05T12:09:05+00:00", + "comment_author": "pmpailis", + "comment_body": "Do we expect this increase to allowed rules to have any adverse side-effect in terms of performance? ", + "pr_file_module": null + }, + { + "comment_id": "1629432494", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 109390, + "pr_file": "docs/reference/synonyms/apis/put-synonyms-set.asciidoc", + "discussion_id": "1627639397", + "commented_code": "@@ -7,8 +7,7 @@\n \n Creates or updates a synonyms set.\n \n-NOTE: Synonyms sets are limited to a maximum of 10,000 synonym rules per set.\n-Synonym sets with more than 10,000 synonym rules will provide inconsistent search results.\n+NOTE: Synonyms sets are limited to a maximum of 100,000 synonym rules per set.", + "comment_created_at": "2024-06-06T12:23:58+00:00", + "comment_author": "carlosdelest", + "comment_body": "It will have an impact on heap when retrieving them - I need to perform some testing in order to check that we're not having too much memory usage for small clusters.\r\n\r\nOn non-API related operations, this should be no different than using file-based synonyms - and there's no limit set there.", + "pr_file_module": null + }, + { + "comment_id": "1633470327", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 109390, + "pr_file": "docs/reference/synonyms/apis/put-synonyms-set.asciidoc", + "discussion_id": "1627639397", + "commented_code": "@@ -7,8 +7,7 @@\n \n Creates or updates a synonyms set.\n \n-NOTE: Synonyms sets are limited to a maximum of 10,000 synonym rules per set.\n-Synonym sets with more than 10,000 synonym rules will provide inconsistent search results.\n+NOTE: Synonyms sets are limited to a maximum of 100,000 synonym rules per set.", + "comment_created_at": "2024-06-10T15:44:34+00:00", + "comment_author": "kderusso", + "comment_body": "Should we add a note that we recommend less than 10,000?", + "pr_file_module": null + }, + { + "comment_id": "1633550286", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 109390, + "pr_file": "docs/reference/synonyms/apis/put-synonyms-set.asciidoc", + "discussion_id": "1627639397", + "commented_code": "@@ -7,8 +7,7 @@\n \n Creates or updates a synonyms set.\n \n-NOTE: Synonyms sets are limited to a maximum of 10,000 synonym rules per set.\n-Synonym sets with more than 10,000 synonym rules will provide inconsistent search results.\n+NOTE: Synonyms sets are limited to a maximum of 100,000 synonym rules per set.", + "comment_created_at": "2024-06-10T16:48:32+00:00", + "comment_author": "carlosdelest", + "comment_body": "That makes sense - I'll be adding that.", + "pr_file_module": null + }, + { + "comment_id": "1636546585", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 109390, + "pr_file": "docs/reference/synonyms/apis/put-synonyms-set.asciidoc", + "discussion_id": "1627639397", + "commented_code": "@@ -7,8 +7,7 @@\n \n Creates or updates a synonyms set.\n \n-NOTE: Synonyms sets are limited to a maximum of 10,000 synonym rules per set.\n-Synonym sets with more than 10,000 synonym rules will provide inconsistent search results.\n+NOTE: Synonyms sets are limited to a maximum of 100,000 synonym rules per set.", + "comment_created_at": "2024-06-12T14:08:12+00:00", + "comment_author": "carlosdelest", + "comment_body": "Thinking about this again - I guess we're mainly limited by heap size on this. It's hard to put a limit on this for example and not on file-based synonyms, which would have the same problem.\r\n\r\nI'll try and do some memory usage tests and come back later with a proposal for the users.", + "pr_file_module": null + }, + { + "comment_id": "1650948165", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 109390, + "pr_file": "docs/reference/synonyms/apis/put-synonyms-set.asciidoc", + "discussion_id": "1627639397", + "commented_code": "@@ -7,8 +7,7 @@\n \n Creates or updates a synonyms set.\n \n-NOTE: Synonyms sets are limited to a maximum of 10,000 synonym rules per set.\n-Synonym sets with more than 10,000 synonym rules will provide inconsistent search results.\n+NOTE: Synonyms sets are limited to a maximum of 100,000 synonym rules per set.", + "comment_created_at": "2024-06-24T12:27:14+00:00", + "comment_author": "mayya-sharipova", + "comment_body": "May be we should avoid any recommendation in terms of sizing, just put a note that for a large synonyms sets put extra demand on memory. ", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1542709397", + "pr_number": 106861, + "pr_file": "docs/reference/search/field-caps.asciidoc", + "created_at": "2024-03-28T10:58:21+00:00", + "commented_code": "Defaults to `false`.\n\n`include_empty_fields`::\n (Optional, Boolean) If `false`, fields that never had a value in any shards are not included in the response. Fields that are not empty are always included. This flag does not consider deletions and updates. If a field was non-empty and all the documents containing that field were deleted or the field was removed by updates, it will still be returned even if the flag is `false`.\n (Optional, Boolean) If `false`, fields that never had a value in any shards are not included in the response. Fields that are not empty are always included. This flag does not consider deletions and updates. If a field was non-empty and all the documents containing that field were deleted or the field was removed by updates, it will still be returned even if the flag is `false`. To be used with caution, if set to `false` it gives accurate results at the cost of longer response times compared to the default behavior.", + "repo_full_name": "elastic/elasticsearch", + "discussion_comments": [ + { + "comment_id": "1542709397", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 106861, + "pr_file": "docs/reference/search/field-caps.asciidoc", + "discussion_id": "1542709397", + "commented_code": "@@ -78,7 +78,7 @@ include::{es-repo-dir}/rest-api/common-parms.asciidoc[tag=index-ignore-unavailab\n Defaults to `false`.\n \n `include_empty_fields`::\n- (Optional, Boolean) If `false`, fields that never had a value in any shards are not included in the response. Fields that are not empty are always included. This flag does not consider deletions and updates. If a field was non-empty and all the documents containing that field were deleted or the field was removed by updates, it will still be returned even if the flag is `false`.\n+ (Optional, Boolean) If `false`, fields that never had a value in any shards are not included in the response. Fields that are not empty are always included. This flag does not consider deletions and updates. If a field was non-empty and all the documents containing that field were deleted or the field was removed by updates, it will still be returned even if the flag is `false`. To be used with caution, if set to `false` it gives accurate results at the cost of longer response times compared to the default behavior.", + "comment_created_at": "2024-03-28T10:58:21+00:00", + "comment_author": "javanna", + "comment_body": "Could you expand on what the factor is that causes longer response times, so users have the knowledge to decide and adequately test?\r\n\r\nI wonder if we should have a warning flag like we do in other cases, that makes it more evident compared to a note in the text.", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/elasticsearch-document-performance-tradeoffs.md b/_reviewers/elasticsearch-document-performance-tradeoffs.md index 225c058..436c3b7 100644 --- a/_reviewers/elasticsearch-document-performance-tradeoffs.md +++ b/_reviewers/elasticsearch-document-performance-tradeoffs.md @@ -36,111 +36,3 @@ clusters. Monitor heap usage when working with large synonym sets. ``` This practice helps users make informed decisions about performance-sensitive configurations and prevents unexpected resource usage issues in production environments. - - -[ - { - "discussion_id": "1627639397", - "pr_number": 109390, - "pr_file": "docs/reference/synonyms/apis/put-synonyms-set.asciidoc", - "created_at": "2024-06-05T12:09:05+00:00", - "commented_code": "Creates or updates a synonyms set.\n\nNOTE: Synonyms sets are limited to a maximum of 10,000 synonym rules per set.\nSynonym sets with more than 10,000 synonym rules will provide inconsistent search results.\nNOTE: Synonyms sets are limited to a maximum of 100,000 synonym rules per set.", - "repo_full_name": "elastic/elasticsearch", - "discussion_comments": [ - { - "comment_id": "1627639397", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 109390, - "pr_file": "docs/reference/synonyms/apis/put-synonyms-set.asciidoc", - "discussion_id": "1627639397", - "commented_code": "@@ -7,8 +7,7 @@\n \n Creates or updates a synonyms set.\n \n-NOTE: Synonyms sets are limited to a maximum of 10,000 synonym rules per set.\n-Synonym sets with more than 10,000 synonym rules will provide inconsistent search results.\n+NOTE: Synonyms sets are limited to a maximum of 100,000 synonym rules per set.", - "comment_created_at": "2024-06-05T12:09:05+00:00", - "comment_author": "pmpailis", - "comment_body": "Do we expect this increase to allowed rules to have any adverse side-effect in terms of performance? ", - "pr_file_module": null - }, - { - "comment_id": "1629432494", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 109390, - "pr_file": "docs/reference/synonyms/apis/put-synonyms-set.asciidoc", - "discussion_id": "1627639397", - "commented_code": "@@ -7,8 +7,7 @@\n \n Creates or updates a synonyms set.\n \n-NOTE: Synonyms sets are limited to a maximum of 10,000 synonym rules per set.\n-Synonym sets with more than 10,000 synonym rules will provide inconsistent search results.\n+NOTE: Synonyms sets are limited to a maximum of 100,000 synonym rules per set.", - "comment_created_at": "2024-06-06T12:23:58+00:00", - "comment_author": "carlosdelest", - "comment_body": "It will have an impact on heap when retrieving them - I need to perform some testing in order to check that we're not having too much memory usage for small clusters.\r\n\r\nOn non-API related operations, this should be no different than using file-based synonyms - and there's no limit set there.", - "pr_file_module": null - }, - { - "comment_id": "1633470327", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 109390, - "pr_file": "docs/reference/synonyms/apis/put-synonyms-set.asciidoc", - "discussion_id": "1627639397", - "commented_code": "@@ -7,8 +7,7 @@\n \n Creates or updates a synonyms set.\n \n-NOTE: Synonyms sets are limited to a maximum of 10,000 synonym rules per set.\n-Synonym sets with more than 10,000 synonym rules will provide inconsistent search results.\n+NOTE: Synonyms sets are limited to a maximum of 100,000 synonym rules per set.", - "comment_created_at": "2024-06-10T15:44:34+00:00", - "comment_author": "kderusso", - "comment_body": "Should we add a note that we recommend less than 10,000?", - "pr_file_module": null - }, - { - "comment_id": "1633550286", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 109390, - "pr_file": "docs/reference/synonyms/apis/put-synonyms-set.asciidoc", - "discussion_id": "1627639397", - "commented_code": "@@ -7,8 +7,7 @@\n \n Creates or updates a synonyms set.\n \n-NOTE: Synonyms sets are limited to a maximum of 10,000 synonym rules per set.\n-Synonym sets with more than 10,000 synonym rules will provide inconsistent search results.\n+NOTE: Synonyms sets are limited to a maximum of 100,000 synonym rules per set.", - "comment_created_at": "2024-06-10T16:48:32+00:00", - "comment_author": "carlosdelest", - "comment_body": "That makes sense - I'll be adding that.", - "pr_file_module": null - }, - { - "comment_id": "1636546585", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 109390, - "pr_file": "docs/reference/synonyms/apis/put-synonyms-set.asciidoc", - "discussion_id": "1627639397", - "commented_code": "@@ -7,8 +7,7 @@\n \n Creates or updates a synonyms set.\n \n-NOTE: Synonyms sets are limited to a maximum of 10,000 synonym rules per set.\n-Synonym sets with more than 10,000 synonym rules will provide inconsistent search results.\n+NOTE: Synonyms sets are limited to a maximum of 100,000 synonym rules per set.", - "comment_created_at": "2024-06-12T14:08:12+00:00", - "comment_author": "carlosdelest", - "comment_body": "Thinking about this again - I guess we're mainly limited by heap size on this. It's hard to put a limit on this for example and not on file-based synonyms, which would have the same problem.\r\n\r\nI'll try and do some memory usage tests and come back later with a proposal for the users.", - "pr_file_module": null - }, - { - "comment_id": "1650948165", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 109390, - "pr_file": "docs/reference/synonyms/apis/put-synonyms-set.asciidoc", - "discussion_id": "1627639397", - "commented_code": "@@ -7,8 +7,7 @@\n \n Creates or updates a synonyms set.\n \n-NOTE: Synonyms sets are limited to a maximum of 10,000 synonym rules per set.\n-Synonym sets with more than 10,000 synonym rules will provide inconsistent search results.\n+NOTE: Synonyms sets are limited to a maximum of 100,000 synonym rules per set.", - "comment_created_at": "2024-06-24T12:27:14+00:00", - "comment_author": "mayya-sharipova", - "comment_body": "May be we should avoid any recommendation in terms of sizing, just put a note that for a large synonyms sets put extra demand on memory. ", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1542709397", - "pr_number": 106861, - "pr_file": "docs/reference/search/field-caps.asciidoc", - "created_at": "2024-03-28T10:58:21+00:00", - "commented_code": "Defaults to `false`.\n\n`include_empty_fields`::\n (Optional, Boolean) If `false`, fields that never had a value in any shards are not included in the response. Fields that are not empty are always included. This flag does not consider deletions and updates. If a field was non-empty and all the documents containing that field were deleted or the field was removed by updates, it will still be returned even if the flag is `false`.\n (Optional, Boolean) If `false`, fields that never had a value in any shards are not included in the response. Fields that are not empty are always included. This flag does not consider deletions and updates. If a field was non-empty and all the documents containing that field were deleted or the field was removed by updates, it will still be returned even if the flag is `false`. To be used with caution, if set to `false` it gives accurate results at the cost of longer response times compared to the default behavior.", - "repo_full_name": "elastic/elasticsearch", - "discussion_comments": [ - { - "comment_id": "1542709397", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 106861, - "pr_file": "docs/reference/search/field-caps.asciidoc", - "discussion_id": "1542709397", - "commented_code": "@@ -78,7 +78,7 @@ include::{es-repo-dir}/rest-api/common-parms.asciidoc[tag=index-ignore-unavailab\n Defaults to `false`.\n \n `include_empty_fields`::\n- (Optional, Boolean) If `false`, fields that never had a value in any shards are not included in the response. Fields that are not empty are always included. This flag does not consider deletions and updates. If a field was non-empty and all the documents containing that field were deleted or the field was removed by updates, it will still be returned even if the flag is `false`.\n+ (Optional, Boolean) If `false`, fields that never had a value in any shards are not included in the response. Fields that are not empty are always included. This flag does not consider deletions and updates. If a field was non-empty and all the documents containing that field were deleted or the field was removed by updates, it will still be returned even if the flag is `false`. To be used with caution, if set to `false` it gives accurate results at the cost of longer response times compared to the default behavior.", - "comment_created_at": "2024-03-28T10:58:21+00:00", - "comment_author": "javanna", - "comment_body": "Could you expand on what the factor is that causes longer response times, so users have the knowledge to decide and adequately test?\r\n\r\nI wonder if we should have a warning flag like we do in other cases, that makes it more evident compared to a note in the text.", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/elasticsearch-document-security-requirements-explicitly.json b/_reviewers/elasticsearch-document-security-requirements-explicitly.json new file mode 100644 index 0000000..047c030 --- /dev/null +++ b/_reviewers/elasticsearch-document-security-requirements-explicitly.json @@ -0,0 +1,58 @@ +[ + { + "discussion_id": "920428509", + "pr_number": 88438, + "pr_file": "docs/reference/features/apis/reset-features-api.asciidoc", + "created_at": "2022-07-13T19:26:57+00:00", + "commented_code": "Return a cluster to the same state as a new installation by resetting the feature state for all {es} features. This deletes all state information stored in system indices.\n\nIMPORTANT: This action deletes system indices and [starting from 8.1](https://github.com/elastic/elasticsearch/pull/81400) system index write-access has been removed from the superuser role. So, if you are performing this action from a user with the superuser role, you need to ensure that the user has another role with the `allow_restricted_indices` set to `true` to be able to delete all system indices.", + "repo_full_name": "elastic/elasticsearch", + "discussion_comments": [ + { + "comment_id": "920428509", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 88438, + "pr_file": "docs/reference/features/apis/reset-features-api.asciidoc", + "discussion_id": "920428509", + "commented_code": "@@ -26,6 +26,8 @@ POST /_features/_reset\n \n Return a cluster to the same state as a new installation by resetting the feature state for all {es} features. This deletes all state information stored in system indices.\n \n+IMPORTANT: This action deletes system indices and [starting from 8.1](https://github.com/elastic/elasticsearch/pull/81400) system index write-access has been removed from the superuser role. So, if you are performing this action from a user with the superuser role, you need to ensure that the user has another role with the `allow_restricted_indices` set to `true` to be able to delete all system indices.\n+", + "comment_created_at": "2022-07-13T19:26:57+00:00", + "comment_author": "lockewritesdocs", + "comment_body": "```suggestion\r\nIMPORTANT: This action deletes system indices. In {es} 8.1 and later, the superuser\r\nrole doesn't have write access to system indices. If you execute this request as a\r\nuser with the superuser role, you must have an additional role with the\r\n`allow_restricted_indices` privilege set to `true` to delete all system indices.\r\n\r\n```", + "pr_file_module": null + }, + { + "comment_id": "920431057", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 88438, + "pr_file": "docs/reference/features/apis/reset-features-api.asciidoc", + "discussion_id": "920428509", + "commented_code": "@@ -26,6 +26,8 @@ POST /_features/_reset\n \n Return a cluster to the same state as a new installation by resetting the feature state for all {es} features. This deletes all state information stored in system indices.\n \n+IMPORTANT: This action deletes system indices and [starting from 8.1](https://github.com/elastic/elasticsearch/pull/81400) system index write-access has been removed from the superuser role. So, if you are performing this action from a user with the superuser role, you need to ensure that the user has another role with the `allow_restricted_indices` set to `true` to be able to delete all system indices.\n+", + "comment_created_at": "2022-07-13T19:30:32+00:00", + "comment_author": "lockewritesdocs", + "comment_body": "Here's a suggested rewording of your original text. Let me know if this change makes sense 💡 ", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1183541072", + "pr_number": 86048, + "pr_file": "docs/reference/settings/security-settings.asciidoc", + "created_at": "2023-05-03T11:02:43+00:00", + "commented_code": "[[ssl-tls-profile-settings]]\n[discrete]\n===== Transport profile TLS/SSL settings\nNOTE: Transport profiles do not inherit TLS/SSL settings\nfrom the default transport. \n\nThe same settings that are available for the <>\nare also available for each transport profile. By default, the settings for a\ntransport profile will be the same as the default transport unless they\nare specified.\nare also available for each transport profile.", + "repo_full_name": "elastic/elasticsearch", + "discussion_comments": [ + { + "comment_id": "1183541072", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 86048, + "pr_file": "docs/reference/settings/security-settings.asciidoc", + "discussion_id": "1183541072", + "commented_code": "@@ -2345,10 +2345,11 @@ include::ssl-settings.asciidoc[]\n [[ssl-tls-profile-settings]]\n [discrete]\n ===== Transport profile TLS/SSL settings\n+NOTE: Transport profiles do not inherit TLS/SSL settings\n+from the default transport. \n+\n The same settings that are available for the <>\n-are also available for each transport profile. By default, the settings for a\n-transport profile will be the same as the default transport unless they\n-are specified.\n+are also available for each transport profile. ", + "comment_created_at": "2023-05-03T11:02:43+00:00", + "comment_author": "ywangd", + "comment_body": "The `xpack.security.transport.ssl.enabled` setting is an exception:\r\n1. It controls whether SSL is enabled for both the default transport and any transport profiles.\r\n2. Transport profiles do not have a corresponding setting for it. ", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/elasticsearch-document-security-requirements-explicitly.md b/_reviewers/elasticsearch-document-security-requirements-explicitly.md index 51b5a6f..c2a97c6 100644 --- a/_reviewers/elasticsearch-document-security-requirements-explicitly.md +++ b/_reviewers/elasticsearch-document-security-requirements-explicitly.md @@ -37,63 +37,3 @@ SSL for both default transport and any transport profiles. ``` Clear and complete security documentation prevents misconfigurations that could lead to vulnerabilities or access issues. - - -[ - { - "discussion_id": "920428509", - "pr_number": 88438, - "pr_file": "docs/reference/features/apis/reset-features-api.asciidoc", - "created_at": "2022-07-13T19:26:57+00:00", - "commented_code": "Return a cluster to the same state as a new installation by resetting the feature state for all {es} features. This deletes all state information stored in system indices.\n\nIMPORTANT: This action deletes system indices and [starting from 8.1](https://github.com/elastic/elasticsearch/pull/81400) system index write-access has been removed from the superuser role. So, if you are performing this action from a user with the superuser role, you need to ensure that the user has another role with the `allow_restricted_indices` set to `true` to be able to delete all system indices.", - "repo_full_name": "elastic/elasticsearch", - "discussion_comments": [ - { - "comment_id": "920428509", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 88438, - "pr_file": "docs/reference/features/apis/reset-features-api.asciidoc", - "discussion_id": "920428509", - "commented_code": "@@ -26,6 +26,8 @@ POST /_features/_reset\n \n Return a cluster to the same state as a new installation by resetting the feature state for all {es} features. This deletes all state information stored in system indices.\n \n+IMPORTANT: This action deletes system indices and [starting from 8.1](https://github.com/elastic/elasticsearch/pull/81400) system index write-access has been removed from the superuser role. So, if you are performing this action from a user with the superuser role, you need to ensure that the user has another role with the `allow_restricted_indices` set to `true` to be able to delete all system indices.\n+", - "comment_created_at": "2022-07-13T19:26:57+00:00", - "comment_author": "lockewritesdocs", - "comment_body": "```suggestion\r\nIMPORTANT: This action deletes system indices. In {es} 8.1 and later, the superuser\r\nrole doesn't have write access to system indices. If you execute this request as a\r\nuser with the superuser role, you must have an additional role with the\r\n`allow_restricted_indices` privilege set to `true` to delete all system indices.\r\n\r\n```", - "pr_file_module": null - }, - { - "comment_id": "920431057", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 88438, - "pr_file": "docs/reference/features/apis/reset-features-api.asciidoc", - "discussion_id": "920428509", - "commented_code": "@@ -26,6 +26,8 @@ POST /_features/_reset\n \n Return a cluster to the same state as a new installation by resetting the feature state for all {es} features. This deletes all state information stored in system indices.\n \n+IMPORTANT: This action deletes system indices and [starting from 8.1](https://github.com/elastic/elasticsearch/pull/81400) system index write-access has been removed from the superuser role. So, if you are performing this action from a user with the superuser role, you need to ensure that the user has another role with the `allow_restricted_indices` set to `true` to be able to delete all system indices.\n+", - "comment_created_at": "2022-07-13T19:30:32+00:00", - "comment_author": "lockewritesdocs", - "comment_body": "Here's a suggested rewording of your original text. Let me know if this change makes sense \ud83d\udca1 ", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1183541072", - "pr_number": 86048, - "pr_file": "docs/reference/settings/security-settings.asciidoc", - "created_at": "2023-05-03T11:02:43+00:00", - "commented_code": "[[ssl-tls-profile-settings]]\n[discrete]\n===== Transport profile TLS/SSL settings\nNOTE: Transport profiles do not inherit TLS/SSL settings\nfrom the default transport. \n\nThe same settings that are available for the <>\nare also available for each transport profile. By default, the settings for a\ntransport profile will be the same as the default transport unless they\nare specified.\nare also available for each transport profile.", - "repo_full_name": "elastic/elasticsearch", - "discussion_comments": [ - { - "comment_id": "1183541072", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 86048, - "pr_file": "docs/reference/settings/security-settings.asciidoc", - "discussion_id": "1183541072", - "commented_code": "@@ -2345,10 +2345,11 @@ include::ssl-settings.asciidoc[]\n [[ssl-tls-profile-settings]]\n [discrete]\n ===== Transport profile TLS/SSL settings\n+NOTE: Transport profiles do not inherit TLS/SSL settings\n+from the default transport. \n+\n The same settings that are available for the <>\n-are also available for each transport profile. By default, the settings for a\n-transport profile will be the same as the default transport unless they\n-are specified.\n+are also available for each transport profile. ", - "comment_created_at": "2023-05-03T11:02:43+00:00", - "comment_author": "ywangd", - "comment_body": "The `xpack.security.transport.ssl.enabled` setting is an exception:\r\n1. It controls whether SSL is enabled for both the default transport and any transport profiles.\r\n2. Transport profiles do not have a corresponding setting for it. ", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/elasticsearch-enforce-least-privilege.json b/_reviewers/elasticsearch-enforce-least-privilege.json new file mode 100644 index 0000000..1b9e3fa --- /dev/null +++ b/_reviewers/elasticsearch-enforce-least-privilege.json @@ -0,0 +1,72 @@ +[ + { + "discussion_id": "2163585024", + "pr_number": 129662, + "pr_file": "x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/authz/store/ReservedRolesStore.java", + "created_at": "2025-06-24T10:36:11+00:00", + "commented_code": "ReservedRolesStore.LISTS_ITEMS_INDEX,\n ReservedRolesStore.ALERTS_LEGACY_INDEX_REINDEXED_V8,\n ReservedRolesStore.LISTS_INDEX_REINDEXED_V8,\n ReservedRolesStore.LISTS_ITEMS_INDEX_REINDEXED_V8\n ReservedRolesStore.LISTS_ITEMS_INDEX_REINDEXED_V8,\n ReservedRolesStore.ASSET_CRITICALITY_INDEX\n )\n .privileges(\"read\", \"view_index_metadata\", \"write\", \"maintenance\")\n .build(),\n // Security - Entity Store is view only\n RoleDescriptor.IndicesPrivileges.builder()\n .indices(ReservedRolesStore.ENTITY_STORE_V1_LATEST_INDEX)\n .privileges(\"read\", \"view_index_metadata\")", + "repo_full_name": "elastic/elasticsearch", + "discussion_comments": [ + { + "comment_id": "2163585024", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 129662, + "pr_file": "x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/authz/store/ReservedRolesStore.java", + "discussion_id": "2163585024", + "commented_code": "@@ -842,10 +848,16 @@ private static RoleDescriptor buildEditorRoleDescriptor() {\n ReservedRolesStore.LISTS_ITEMS_INDEX,\n ReservedRolesStore.ALERTS_LEGACY_INDEX_REINDEXED_V8,\n ReservedRolesStore.LISTS_INDEX_REINDEXED_V8,\n- ReservedRolesStore.LISTS_ITEMS_INDEX_REINDEXED_V8\n+ ReservedRolesStore.LISTS_ITEMS_INDEX_REINDEXED_V8,\n+ ReservedRolesStore.ASSET_CRITICALITY_INDEX\n )\n .privileges(\"read\", \"view_index_metadata\", \"write\", \"maintenance\")\n .build(),\n+ // Security - Entity Store is view only\n+ RoleDescriptor.IndicesPrivileges.builder()\n+ .indices(ReservedRolesStore.ENTITY_STORE_V1_LATEST_INDEX)\n+ .privileges(\"read\", \"view_index_metadata\")", + "comment_created_at": "2025-06-24T10:36:11+00:00", + "comment_author": "richard-dennehy", + "comment_body": "Just to double check - Serverless Editor appears to have write access to these indices - is this difference intentional?", + "pr_file_module": null + }, + { + "comment_id": "2164316642", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 129662, + "pr_file": "x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/authz/store/ReservedRolesStore.java", + "discussion_id": "2163585024", + "commented_code": "@@ -842,10 +848,16 @@ private static RoleDescriptor buildEditorRoleDescriptor() {\n ReservedRolesStore.LISTS_ITEMS_INDEX,\n ReservedRolesStore.ALERTS_LEGACY_INDEX_REINDEXED_V8,\n ReservedRolesStore.LISTS_INDEX_REINDEXED_V8,\n- ReservedRolesStore.LISTS_ITEMS_INDEX_REINDEXED_V8\n+ ReservedRolesStore.LISTS_ITEMS_INDEX_REINDEXED_V8,\n+ ReservedRolesStore.ASSET_CRITICALITY_INDEX\n )\n .privileges(\"read\", \"view_index_metadata\", \"write\", \"maintenance\")\n .build(),\n+ // Security - Entity Store is view only\n+ RoleDescriptor.IndicesPrivileges.builder()\n+ .indices(ReservedRolesStore.ENTITY_STORE_V1_LATEST_INDEX)\n+ .privileges(\"read\", \"view_index_metadata\")", + "comment_created_at": "2025-06-24T15:23:47+00:00", + "comment_author": "opauloh", + "comment_body": "Thanks for bringing it to my attention! \r\n\r\nI will tag @hop-dev and @jaredburgettelastic to help on that. Mark / Jared, from what I could see, users don't really need to write directly into the Entity Store index, but do you happen to know a use case where they would need? Like running the API to clean the Entity Store or something else?", + "pr_file_module": null + }, + { + "comment_id": "2174986836", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 129662, + "pr_file": "x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/authz/store/ReservedRolesStore.java", + "discussion_id": "2163585024", + "commented_code": "@@ -842,10 +848,16 @@ private static RoleDescriptor buildEditorRoleDescriptor() {\n ReservedRolesStore.LISTS_ITEMS_INDEX,\n ReservedRolesStore.ALERTS_LEGACY_INDEX_REINDEXED_V8,\n ReservedRolesStore.LISTS_INDEX_REINDEXED_V8,\n- ReservedRolesStore.LISTS_ITEMS_INDEX_REINDEXED_V8\n+ ReservedRolesStore.LISTS_ITEMS_INDEX_REINDEXED_V8,\n+ ReservedRolesStore.ASSET_CRITICALITY_INDEX\n )\n .privileges(\"read\", \"view_index_metadata\", \"write\", \"maintenance\")\n .build(),\n+ // Security - Entity Store is view only\n+ RoleDescriptor.IndicesPrivileges.builder()\n+ .indices(ReservedRolesStore.ENTITY_STORE_V1_LATEST_INDEX)\n+ .privileges(\"read\", \"view_index_metadata\")", + "comment_created_at": "2025-06-30T12:43:58+00:00", + "comment_author": "hop-dev", + "comment_body": "No I think this was a mistake in the original PR apologies!", + "pr_file_module": null + }, + { + "comment_id": "2175678919", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 129662, + "pr_file": "x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/authz/store/ReservedRolesStore.java", + "discussion_id": "2163585024", + "commented_code": "@@ -842,10 +848,16 @@ private static RoleDescriptor buildEditorRoleDescriptor() {\n ReservedRolesStore.LISTS_ITEMS_INDEX,\n ReservedRolesStore.ALERTS_LEGACY_INDEX_REINDEXED_V8,\n ReservedRolesStore.LISTS_INDEX_REINDEXED_V8,\n- ReservedRolesStore.LISTS_ITEMS_INDEX_REINDEXED_V8\n+ ReservedRolesStore.LISTS_ITEMS_INDEX_REINDEXED_V8,\n+ ReservedRolesStore.ASSET_CRITICALITY_INDEX\n )\n .privileges(\"read\", \"view_index_metadata\", \"write\", \"maintenance\")\n .build(),\n+ // Security - Entity Store is view only\n+ RoleDescriptor.IndicesPrivileges.builder()\n+ .indices(ReservedRolesStore.ENTITY_STORE_V1_LATEST_INDEX)\n+ .privileges(\"read\", \"view_index_metadata\")", + "comment_created_at": "2025-06-30T18:44:09+00:00", + "comment_author": "opauloh", + "comment_body": "Thanks for confirming @hop-dev, so @richard-dennehy, the changes on this PR are correct, we will follow up on reducing the extra permissions on Serverless editor role to keep it consistent!", + "pr_file_module": null + }, + { + "comment_id": "2176832830", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 129662, + "pr_file": "x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/authz/store/ReservedRolesStore.java", + "discussion_id": "2163585024", + "commented_code": "@@ -842,10 +848,16 @@ private static RoleDescriptor buildEditorRoleDescriptor() {\n ReservedRolesStore.LISTS_ITEMS_INDEX,\n ReservedRolesStore.ALERTS_LEGACY_INDEX_REINDEXED_V8,\n ReservedRolesStore.LISTS_INDEX_REINDEXED_V8,\n- ReservedRolesStore.LISTS_ITEMS_INDEX_REINDEXED_V8\n+ ReservedRolesStore.LISTS_ITEMS_INDEX_REINDEXED_V8,\n+ ReservedRolesStore.ASSET_CRITICALITY_INDEX\n )\n .privileges(\"read\", \"view_index_metadata\", \"write\", \"maintenance\")\n .build(),\n+ // Security - Entity Store is view only\n+ RoleDescriptor.IndicesPrivileges.builder()\n+ .indices(ReservedRolesStore.ENTITY_STORE_V1_LATEST_INDEX)\n+ .privileges(\"read\", \"view_index_metadata\")", + "comment_created_at": "2025-07-01T08:33:31+00:00", + "comment_author": "hop-dev", + "comment_body": "@opauloh sorry I have just thought, because the transform runs as the user who enabled the entity store, I believe we do need write permissions? Have I got that wrong? ", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/elasticsearch-enforce-least-privilege.md b/_reviewers/elasticsearch-enforce-least-privilege.md index 6e7ef0f..8018f90 100644 --- a/_reviewers/elasticsearch-enforce-least-privilege.md +++ b/_reviewers/elasticsearch-enforce-least-privilege.md @@ -34,77 +34,3 @@ RoleDescriptor.IndicesPrivileges.builder() ``` When in doubt, start with more restrictive permissions and expand only when necessary based on functional requirements. Challenge assumptions about permission needs during code reviews, as shown in the discussion where write access was initially questioned and determined to be unnecessary. - - -[ - { - "discussion_id": "2163585024", - "pr_number": 129662, - "pr_file": "x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/authz/store/ReservedRolesStore.java", - "created_at": "2025-06-24T10:36:11+00:00", - "commented_code": "ReservedRolesStore.LISTS_ITEMS_INDEX,\n ReservedRolesStore.ALERTS_LEGACY_INDEX_REINDEXED_V8,\n ReservedRolesStore.LISTS_INDEX_REINDEXED_V8,\n ReservedRolesStore.LISTS_ITEMS_INDEX_REINDEXED_V8\n ReservedRolesStore.LISTS_ITEMS_INDEX_REINDEXED_V8,\n ReservedRolesStore.ASSET_CRITICALITY_INDEX\n )\n .privileges(\"read\", \"view_index_metadata\", \"write\", \"maintenance\")\n .build(),\n // Security - Entity Store is view only\n RoleDescriptor.IndicesPrivileges.builder()\n .indices(ReservedRolesStore.ENTITY_STORE_V1_LATEST_INDEX)\n .privileges(\"read\", \"view_index_metadata\")", - "repo_full_name": "elastic/elasticsearch", - "discussion_comments": [ - { - "comment_id": "2163585024", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 129662, - "pr_file": "x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/authz/store/ReservedRolesStore.java", - "discussion_id": "2163585024", - "commented_code": "@@ -842,10 +848,16 @@ private static RoleDescriptor buildEditorRoleDescriptor() {\n ReservedRolesStore.LISTS_ITEMS_INDEX,\n ReservedRolesStore.ALERTS_LEGACY_INDEX_REINDEXED_V8,\n ReservedRolesStore.LISTS_INDEX_REINDEXED_V8,\n- ReservedRolesStore.LISTS_ITEMS_INDEX_REINDEXED_V8\n+ ReservedRolesStore.LISTS_ITEMS_INDEX_REINDEXED_V8,\n+ ReservedRolesStore.ASSET_CRITICALITY_INDEX\n )\n .privileges(\"read\", \"view_index_metadata\", \"write\", \"maintenance\")\n .build(),\n+ // Security - Entity Store is view only\n+ RoleDescriptor.IndicesPrivileges.builder()\n+ .indices(ReservedRolesStore.ENTITY_STORE_V1_LATEST_INDEX)\n+ .privileges(\"read\", \"view_index_metadata\")", - "comment_created_at": "2025-06-24T10:36:11+00:00", - "comment_author": "richard-dennehy", - "comment_body": "Just to double check - Serverless Editor appears to have write access to these indices - is this difference intentional?", - "pr_file_module": null - }, - { - "comment_id": "2164316642", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 129662, - "pr_file": "x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/authz/store/ReservedRolesStore.java", - "discussion_id": "2163585024", - "commented_code": "@@ -842,10 +848,16 @@ private static RoleDescriptor buildEditorRoleDescriptor() {\n ReservedRolesStore.LISTS_ITEMS_INDEX,\n ReservedRolesStore.ALERTS_LEGACY_INDEX_REINDEXED_V8,\n ReservedRolesStore.LISTS_INDEX_REINDEXED_V8,\n- ReservedRolesStore.LISTS_ITEMS_INDEX_REINDEXED_V8\n+ ReservedRolesStore.LISTS_ITEMS_INDEX_REINDEXED_V8,\n+ ReservedRolesStore.ASSET_CRITICALITY_INDEX\n )\n .privileges(\"read\", \"view_index_metadata\", \"write\", \"maintenance\")\n .build(),\n+ // Security - Entity Store is view only\n+ RoleDescriptor.IndicesPrivileges.builder()\n+ .indices(ReservedRolesStore.ENTITY_STORE_V1_LATEST_INDEX)\n+ .privileges(\"read\", \"view_index_metadata\")", - "comment_created_at": "2025-06-24T15:23:47+00:00", - "comment_author": "opauloh", - "comment_body": "Thanks for bringing it to my attention! \r\n\r\nI will tag @hop-dev and @jaredburgettelastic to help on that. Mark / Jared, from what I could see, users don't really need to write directly into the Entity Store index, but do you happen to know a use case where they would need? Like running the API to clean the Entity Store or something else?", - "pr_file_module": null - }, - { - "comment_id": "2174986836", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 129662, - "pr_file": "x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/authz/store/ReservedRolesStore.java", - "discussion_id": "2163585024", - "commented_code": "@@ -842,10 +848,16 @@ private static RoleDescriptor buildEditorRoleDescriptor() {\n ReservedRolesStore.LISTS_ITEMS_INDEX,\n ReservedRolesStore.ALERTS_LEGACY_INDEX_REINDEXED_V8,\n ReservedRolesStore.LISTS_INDEX_REINDEXED_V8,\n- ReservedRolesStore.LISTS_ITEMS_INDEX_REINDEXED_V8\n+ ReservedRolesStore.LISTS_ITEMS_INDEX_REINDEXED_V8,\n+ ReservedRolesStore.ASSET_CRITICALITY_INDEX\n )\n .privileges(\"read\", \"view_index_metadata\", \"write\", \"maintenance\")\n .build(),\n+ // Security - Entity Store is view only\n+ RoleDescriptor.IndicesPrivileges.builder()\n+ .indices(ReservedRolesStore.ENTITY_STORE_V1_LATEST_INDEX)\n+ .privileges(\"read\", \"view_index_metadata\")", - "comment_created_at": "2025-06-30T12:43:58+00:00", - "comment_author": "hop-dev", - "comment_body": "No I think this was a mistake in the original PR apologies!", - "pr_file_module": null - }, - { - "comment_id": "2175678919", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 129662, - "pr_file": "x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/authz/store/ReservedRolesStore.java", - "discussion_id": "2163585024", - "commented_code": "@@ -842,10 +848,16 @@ private static RoleDescriptor buildEditorRoleDescriptor() {\n ReservedRolesStore.LISTS_ITEMS_INDEX,\n ReservedRolesStore.ALERTS_LEGACY_INDEX_REINDEXED_V8,\n ReservedRolesStore.LISTS_INDEX_REINDEXED_V8,\n- ReservedRolesStore.LISTS_ITEMS_INDEX_REINDEXED_V8\n+ ReservedRolesStore.LISTS_ITEMS_INDEX_REINDEXED_V8,\n+ ReservedRolesStore.ASSET_CRITICALITY_INDEX\n )\n .privileges(\"read\", \"view_index_metadata\", \"write\", \"maintenance\")\n .build(),\n+ // Security - Entity Store is view only\n+ RoleDescriptor.IndicesPrivileges.builder()\n+ .indices(ReservedRolesStore.ENTITY_STORE_V1_LATEST_INDEX)\n+ .privileges(\"read\", \"view_index_metadata\")", - "comment_created_at": "2025-06-30T18:44:09+00:00", - "comment_author": "opauloh", - "comment_body": "Thanks for confirming @hop-dev, so @richard-dennehy, the changes on this PR are correct, we will follow up on reducing the extra permissions on Serverless editor role to keep it consistent!", - "pr_file_module": null - }, - { - "comment_id": "2176832830", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 129662, - "pr_file": "x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/authz/store/ReservedRolesStore.java", - "discussion_id": "2163585024", - "commented_code": "@@ -842,10 +848,16 @@ private static RoleDescriptor buildEditorRoleDescriptor() {\n ReservedRolesStore.LISTS_ITEMS_INDEX,\n ReservedRolesStore.ALERTS_LEGACY_INDEX_REINDEXED_V8,\n ReservedRolesStore.LISTS_INDEX_REINDEXED_V8,\n- ReservedRolesStore.LISTS_ITEMS_INDEX_REINDEXED_V8\n+ ReservedRolesStore.LISTS_ITEMS_INDEX_REINDEXED_V8,\n+ ReservedRolesStore.ASSET_CRITICALITY_INDEX\n )\n .privileges(\"read\", \"view_index_metadata\", \"write\", \"maintenance\")\n .build(),\n+ // Security - Entity Store is view only\n+ RoleDescriptor.IndicesPrivileges.builder()\n+ .indices(ReservedRolesStore.ENTITY_STORE_V1_LATEST_INDEX)\n+ .privileges(\"read\", \"view_index_metadata\")", - "comment_created_at": "2025-07-01T08:33:31+00:00", - "comment_author": "hop-dev", - "comment_body": "@opauloh sorry I have just thought, because the transform runs as the user who enabled the entity store, I believe we do need write permissions? Have I got that wrong? ", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/elasticsearch-exceptions-for-critical-errors.json b/_reviewers/elasticsearch-exceptions-for-critical-errors.json new file mode 100644 index 0000000..f947f5b --- /dev/null +++ b/_reviewers/elasticsearch-exceptions-for-critical-errors.json @@ -0,0 +1,162 @@ +[ + { + "discussion_id": "2172022369", + "pr_number": 130206, + "pr_file": "server/src/main/java/org/elasticsearch/index/codec/vectors/cluster/KMeansLocal.java", + "created_at": "2025-06-27T13:12:53+00:00", + "commented_code": "continue;\n }\n float[] neighborCentroid = centroids[neighbor];\n float soar = ESVectorUtil.soarDistance(vector, neighborCentroid, diffs, soarLambda, vectorCentroidDist);\n final float soar;\n if (vectorCentroidDist > SOAR_MIN_DISTANCE) {\n soar = ESVectorUtil.soarDistance(vector, neighborCentroid, diffs, soarLambda, vectorCentroidDist);\n } else {\n // if the vector is very close to the centroid, we look for the second-nearest centroid\n soar = VectorUtil.squareDistance(vector, neighborCentroid);\n }\n if (soar < minSoar) {\n bestAssignment = neighbor;\n minSoar = soar;\n }\n }\n\n assert bestAssignment != -1 : \"Failed to assign soar vector to centroid\";", + "repo_full_name": "elastic/elasticsearch", + "discussion_comments": [ + { + "comment_id": "2172022369", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 130206, + "pr_file": "server/src/main/java/org/elasticsearch/index/codec/vectors/cluster/KMeansLocal.java", + "discussion_id": "2172022369", + "commented_code": "@@ -207,13 +215,19 @@ private int[] assignSpilled(FloatVectorValues vectors, List neighborhoods\n continue;\n }\n float[] neighborCentroid = centroids[neighbor];\n- float soar = ESVectorUtil.soarDistance(vector, neighborCentroid, diffs, soarLambda, vectorCentroidDist);\n+ final float soar;\n+ if (vectorCentroidDist > SOAR_MIN_DISTANCE) {\n+ soar = ESVectorUtil.soarDistance(vector, neighborCentroid, diffs, soarLambda, vectorCentroidDist);\n+ } else {\n+ // if the vector is very close to the centroid, we look for the second-nearest centroid\n+ soar = VectorUtil.squareDistance(vector, neighborCentroid);\n+ }\n if (soar < minSoar) {\n bestAssignment = neighbor;\n minSoar = soar;\n }\n }\n-\n+ assert bestAssignment != -1 : \"Failed to assign soar vector to centroid\";", + "comment_created_at": "2025-06-27T13:12:53+00:00", + "comment_author": "Copilot", + "comment_body": "Using an assertion to handle the unassigned bestAssignment might fail silently in production if assertions are disabled. Consider explicitly handling this error case (e.g., by throwing an exception) to ensure that assignment failures are caught.\n```suggestion\n if (bestAssignment == -1) {\n throw new IllegalStateException(\"Failed to assign soar vector to centroid\");\n }\n```", + "pr_file_module": null + }, + { + "comment_id": "2172025988", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 130206, + "pr_file": "server/src/main/java/org/elasticsearch/index/codec/vectors/cluster/KMeansLocal.java", + "discussion_id": "2172022369", + "commented_code": "@@ -207,13 +215,19 @@ private int[] assignSpilled(FloatVectorValues vectors, List neighborhoods\n continue;\n }\n float[] neighborCentroid = centroids[neighbor];\n- float soar = ESVectorUtil.soarDistance(vector, neighborCentroid, diffs, soarLambda, vectorCentroidDist);\n+ final float soar;\n+ if (vectorCentroidDist > SOAR_MIN_DISTANCE) {\n+ soar = ESVectorUtil.soarDistance(vector, neighborCentroid, diffs, soarLambda, vectorCentroidDist);\n+ } else {\n+ // if the vector is very close to the centroid, we look for the second-nearest centroid\n+ soar = VectorUtil.squareDistance(vector, neighborCentroid);\n+ }\n if (soar < minSoar) {\n bestAssignment = neighbor;\n minSoar = soar;\n }\n }\n-\n+ assert bestAssignment != -1 : \"Failed to assign soar vector to centroid\";", + "comment_created_at": "2025-06-27T13:14:55+00:00", + "comment_author": "benwtrent", + "comment_body": "This assert is fine. But we need to make sure that if we do accidentally set an invalid centroid, when going through the assignments later, we check for `-1`", + "pr_file_module": null + }, + { + "comment_id": "2172029264", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 130206, + "pr_file": "server/src/main/java/org/elasticsearch/index/codec/vectors/cluster/KMeansLocal.java", + "discussion_id": "2172022369", + "commented_code": "@@ -207,13 +215,19 @@ private int[] assignSpilled(FloatVectorValues vectors, List neighborhoods\n continue;\n }\n float[] neighborCentroid = centroids[neighbor];\n- float soar = ESVectorUtil.soarDistance(vector, neighborCentroid, diffs, soarLambda, vectorCentroidDist);\n+ final float soar;\n+ if (vectorCentroidDist > SOAR_MIN_DISTANCE) {\n+ soar = ESVectorUtil.soarDistance(vector, neighborCentroid, diffs, soarLambda, vectorCentroidDist);\n+ } else {\n+ // if the vector is very close to the centroid, we look for the second-nearest centroid\n+ soar = VectorUtil.squareDistance(vector, neighborCentroid);\n+ }\n if (soar < minSoar) {\n bestAssignment = neighbor;\n minSoar = soar;\n }\n }\n-\n+ assert bestAssignment != -1 : \"Failed to assign soar vector to centroid\";", + "comment_created_at": "2025-06-27T13:16:41+00:00", + "comment_author": "iverase", + "comment_body": "We are doing it as the code is working now, let see if we can add a test\r\n", + "pr_file_module": null + }, + { + "comment_id": "2172040204", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 130206, + "pr_file": "server/src/main/java/org/elasticsearch/index/codec/vectors/cluster/KMeansLocal.java", + "discussion_id": "2172022369", + "commented_code": "@@ -207,13 +215,19 @@ private int[] assignSpilled(FloatVectorValues vectors, List neighborhoods\n continue;\n }\n float[] neighborCentroid = centroids[neighbor];\n- float soar = ESVectorUtil.soarDistance(vector, neighborCentroid, diffs, soarLambda, vectorCentroidDist);\n+ final float soar;\n+ if (vectorCentroidDist > SOAR_MIN_DISTANCE) {\n+ soar = ESVectorUtil.soarDistance(vector, neighborCentroid, diffs, soarLambda, vectorCentroidDist);\n+ } else {\n+ // if the vector is very close to the centroid, we look for the second-nearest centroid\n+ soar = VectorUtil.squareDistance(vector, neighborCentroid);\n+ }\n if (soar < minSoar) {\n bestAssignment = neighbor;\n minSoar = soar;\n }\n }\n-\n+ assert bestAssignment != -1 : \"Failed to assign soar vector to centroid\";", + "comment_created_at": "2025-06-27T13:22:44+00:00", + "comment_author": "iverase", + "comment_body": "We don't really check but the way we create assignments per clusters (which is horrible slow and I want to change it) later on only take valid values so we can put in the array anything that will get ignored. ", + "pr_file_module": null + }, + { + "comment_id": "2172052178", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 130206, + "pr_file": "server/src/main/java/org/elasticsearch/index/codec/vectors/cluster/KMeansLocal.java", + "discussion_id": "2172022369", + "commented_code": "@@ -207,13 +215,19 @@ private int[] assignSpilled(FloatVectorValues vectors, List neighborhoods\n continue;\n }\n float[] neighborCentroid = centroids[neighbor];\n- float soar = ESVectorUtil.soarDistance(vector, neighborCentroid, diffs, soarLambda, vectorCentroidDist);\n+ final float soar;\n+ if (vectorCentroidDist > SOAR_MIN_DISTANCE) {\n+ soar = ESVectorUtil.soarDistance(vector, neighborCentroid, diffs, soarLambda, vectorCentroidDist);\n+ } else {\n+ // if the vector is very close to the centroid, we look for the second-nearest centroid\n+ soar = VectorUtil.squareDistance(vector, neighborCentroid);\n+ }\n if (soar < minSoar) {\n bestAssignment = neighbor;\n minSoar = soar;\n }\n }\n-\n+ assert bestAssignment != -1 : \"Failed to assign soar vector to centroid\";", + "comment_created_at": "2025-06-27T13:29:00+00:00", + "comment_author": "iverase", + "comment_body": "I pushed https://github.com/elastic/elasticsearch/pull/130206/commits/d26103a495bf0f83fbb2d28d1c00df2ad6b4ebe7 that is how I found this issue. We explicitly check for -1 in soar assignments.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1491692056", + "pr_number": 105558, + "pr_file": "server/src/main/java/org/elasticsearch/cluster/coordination/NodeJoinExecutor.java", + "created_at": "2024-02-15T21:27:47+00:00", + "commented_code": "} else {\n try {\n CompatibilityVersions compatibilityVersions = nodeJoinTask.compatibilityVersions();\n assert systemIndexVersionConsistent(compatibilityVersions.systemIndexMappingsVersion(), compatibilityVersionsMap);", + "repo_full_name": "elastic/elasticsearch", + "discussion_comments": [ + { + "comment_id": "1491692056", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 105558, + "pr_file": "server/src/main/java/org/elasticsearch/cluster/coordination/NodeJoinExecutor.java", + "discussion_id": "1491692056", + "commented_code": "@@ -158,6 +159,7 @@ public ClusterState execute(BatchExecutionContext batchExecutionContex\n } else {\n try {\n CompatibilityVersions compatibilityVersions = nodeJoinTask.compatibilityVersions();\n+ assert systemIndexVersionConsistent(compatibilityVersions.systemIndexMappingsVersion(), compatibilityVersionsMap);", + "comment_created_at": "2024-02-15T21:27:47+00:00", + "comment_author": "rjernst", + "comment_body": "Using assert means this won't run in production. Should we also ensure these match regardless of whether asserts are turned on?", + "pr_file_module": null + }, + { + "comment_id": "1491749688", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 105558, + "pr_file": "server/src/main/java/org/elasticsearch/cluster/coordination/NodeJoinExecutor.java", + "discussion_id": "1491692056", + "commented_code": "@@ -158,6 +159,7 @@ public ClusterState execute(BatchExecutionContext batchExecutionContex\n } else {\n try {\n CompatibilityVersions compatibilityVersions = nodeJoinTask.compatibilityVersions();\n+ assert systemIndexVersionConsistent(compatibilityVersions.systemIndexMappingsVersion(), compatibilityVersionsMap);", + "comment_created_at": "2024-02-15T22:39:30+00:00", + "comment_author": "williamrandolph", + "comment_body": "Definitely -- I put up this PR as a draft so I could run CI, work out a good test case, and figure out what we need to communicate to plugin authors. Once all that's done, I'll put this in an `ensureSystemIndexVersionConsistent` along the lines of the other checks in this block of code.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2033704333", + "pr_number": 122491, + "pr_file": "modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/CefParser.java", + "created_at": "2025-04-08T17:24:22+00:00", + "commented_code": "/*\n * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one\n * or more contributor license agreements. Licensed under the \"Elastic License\n * 2.0\", the \"GNU Affero General Public License v3.0 only\", and the \"Server Side\n * Public License v 1\"; you may not use this file except in compliance with, at\n * your election, the \"Elastic License 2.0\", the \"GNU Affero General Public\n * License v3.0 only\", or the \"Server Side Public License, v 1\".\n */\npackage org.elasticsearch.ingest.common;\n\nimport org.elasticsearch.common.Strings;\nimport org.elasticsearch.common.network.InetAddresses;\nimport org.elasticsearch.common.network.NetworkAddress;\nimport org.elasticsearch.common.time.DateFormatters;\nimport org.elasticsearch.core.Nullable;\n\nimport java.time.Instant;\nimport java.time.LocalDate;\nimport java.time.ZoneId;\nimport java.time.ZoneOffset;\nimport java.time.ZonedDateTime;\nimport java.time.format.DateTimeFormatter;\nimport java.time.format.DateTimeParseException;\nimport java.time.temporal.ChronoField;\nimport java.time.temporal.TemporalAccessor;\nimport java.time.temporal.WeekFields;\nimport java.util.ArrayList;\nimport java.util.HashMap;\nimport java.util.HashSet;\nimport java.util.List;\nimport java.util.Locale;\nimport java.util.Map;\nimport java.util.Objects;\nimport java.util.Set;\nimport java.util.regex.Matcher;\nimport java.util.regex.Pattern;\n\nimport static java.time.temporal.ChronoField.DAY_OF_MONTH;\nimport static java.time.temporal.ChronoField.HOUR_OF_DAY;\nimport static java.time.temporal.ChronoField.MINUTE_OF_DAY;\nimport static java.time.temporal.ChronoField.MONTH_OF_YEAR;\nimport static java.time.temporal.ChronoField.NANO_OF_SECOND;\nimport static java.time.temporal.ChronoField.SECOND_OF_DAY;\nimport static java.util.Map.entry;\nimport static org.elasticsearch.ingest.common.CefParser.DataType.DoubleType;\nimport static org.elasticsearch.ingest.common.CefParser.DataType.FloatType;\nimport static org.elasticsearch.ingest.common.CefParser.DataType.IPType;\nimport static org.elasticsearch.ingest.common.CefParser.DataType.IntegerType;\nimport static org.elasticsearch.ingest.common.CefParser.DataType.LongType;\nimport static org.elasticsearch.ingest.common.CefParser.DataType.MACAddressType;\nimport static org.elasticsearch.ingest.common.CefParser.DataType.StringType;\nimport static org.elasticsearch.ingest.common.CefParser.DataType.TimestampType;\n\nfinal class CefParser {\n private final boolean removeEmptyValues;\n private final ZoneId timezone;\n\n CefParser(ZoneId timezone, boolean removeEmptyValues) {\n this.removeEmptyValues = removeEmptyValues;\n this.timezone = timezone;\n }\n\n private static final Pattern HEADER_PATTERN = Pattern.compile(\"(?:\\\\\\\\\\\\||\\\\\\\\\\\\\\\\|[^|])*?\");\n private static final Pattern HEADER_NEXT_FIELD_PATTERN = Pattern.compile(\"(\" + HEADER_PATTERN.pattern() + \")\\\\|\");\n private static final Pattern HEADER_ESCAPE_CAPTURE = Pattern.compile(\"\\\\\\\\([\\\\\\\\|])\");\n\n // New patterns for extension parsing\n private static final String EXTENSION_KEY_PATTERN = \"(?:[\\\\w-]+(?:\\\\.[^\\\\.=\\\\s\\\\|\\\\\\\\\\\\[\\\\]]+)*(?:\\\\[[0-9]+\\\\])?(?==))\";\n private static final Pattern EXTENSION_KEY_ARRAY_CAPTURE = Pattern.compile(\"^([^\\\\[\\\\]]+)((?:\\\\[[0-9]+\\\\])+)$\");\n private static final String EXTENSION_VALUE_PATTERN = \"(?:[^\\\\s\\\\\\\\]|\\\\\\\\[^|]|\\\\s(?!\" + EXTENSION_KEY_PATTERN + \"=))*\";\n private static final Pattern EXTENSION_NEXT_KEY_VALUE_PATTERN = Pattern.compile(\n \"(\" + EXTENSION_KEY_PATTERN + \")=(\" + EXTENSION_VALUE_PATTERN + \")(?:\\\\s+|$)\"\n );\n\n // Comprehensive regex pattern to match various MAC address formats\n private static final String MAC_ADDRESS_REGEX = \"^(\" +\n // Combined colon and hyphen separated 6-group patterns\n \"(([0-9A-Fa-f]{2}[:|-]){5}[0-9A-Fa-f]{2})|\" +\n // Dot-separated 6-group pattern\n \"([0-9A-Fa-f]{4}\\\\.){2}[0-9A-Fa-f]{4}|\" +\n // Combined colon and hyphen separated 8-group patterns\n \"([0-9A-Fa-f]{2}[:|-]){7}[0-9A-Fa-f]{2}|\" +\n // Dot-separated EUI-64\n \"([0-9A-Fa-f]{4}\\\\.){3}[0-9A-Fa-f]{4}\" + \")$\";\n\n private static final Pattern MAC_ADDRESS_PATTERN = Pattern.compile(MAC_ADDRESS_REGEX);\n private static final int EUI48_HEX_LENGTH = 48 / 4;\n private static final int EUI64_HEX_LENGTH = 64 / 4;\n private static final int EUI64_HEX_WITH_SEPARATOR_MAX_LENGTH = EUI64_HEX_LENGTH + EUI64_HEX_LENGTH / 2 - 1;\n private static final Map EXTENSION_VALUE_SANITIZER_REVERSE_MAPPING = Map.ofEntries(\n entry(\"\\\\\\\\\", \"\\\\\"),\n entry(\"\\\\=\", \"=\"),\n entry(\"\\\\\\n\", \"\\n\"),\n entry(\"\\\\\\r\", \"\\r\")\n );\n\n enum DataType {\n IntegerType,\n LongType,\n FloatType,\n DoubleType,\n StringType,\n BooleanType,\n IPType,\n MACAddressType,\n TimestampType\n }\n\n private static final Map EXTENSION_MAPPINGS = Map.ofEntries(\n entry(\"agt\", new ExtensionMapping(\"agentAddress\", IPType, \"agent.ip\")),\n entry(\"agentDnsDomain\", new ExtensionMapping(\"agentDnsDomain\", StringType, \"agent.name\")),\n entry(\"ahost\", new ExtensionMapping(\"agentHostName\", StringType, \"agent.name\")),\n entry(\"aid\", new ExtensionMapping(\"agentId\", StringType, \"agent.id\")),\n entry(\"amac\", new ExtensionMapping(\"agentMacAddress\", MACAddressType, \"agent.mac\")),\n entry(\"agentNtDomain\", new ExtensionMapping(\"agentNtDomain\", StringType, null)),\n entry(\"art\", new ExtensionMapping(\"agentReceiptTime\", TimestampType, \"event.created\")),\n entry(\"atz\", new ExtensionMapping(\"agentTimeZone\", StringType, null)),\n entry(\"agentTranslatedAddress\", new ExtensionMapping(\"agentTranslatedAddress\", IPType, null)),\n entry(\"agentTranslatedZoneExternalID\", new ExtensionMapping(\"agentTranslatedZoneExternalID\", StringType, null)),\n entry(\"agentTranslatedZoneURI\", new ExtensionMapping(\"agentTranslatedZoneURI\", StringType, null)),\n entry(\"at\", new ExtensionMapping(\"agentType\", StringType, \"agent.type\")),\n entry(\"av\", new ExtensionMapping(\"agentVersion\", StringType, \"agent.version\")),\n entry(\"agentZoneExternalID\", new ExtensionMapping(\"agentZoneExternalID\", StringType, null)),\n entry(\"agentZoneURI\", new ExtensionMapping(\"agentZoneURI\", StringType, null)),\n entry(\"app\", new ExtensionMapping(\"applicationProtocol\", StringType, \"network.protocol\")),\n entry(\"cnt\", new ExtensionMapping(\"baseEventCount\", IntegerType, null)),\n entry(\"in\", new ExtensionMapping(\"bytesIn\", LongType, \"source.bytes\")),\n entry(\"out\", new ExtensionMapping(\"bytesOut\", LongType, \"destination.bytes\")),\n entry(\"customerExternalID\", new ExtensionMapping(\"customerExternalID\", StringType, \"organization.id\")),\n entry(\"customerURI\", new ExtensionMapping(\"customerURI\", StringType, \"organization.name\")),\n entry(\"dst\", new ExtensionMapping(\"destinationAddress\", IPType, \"destination.ip\")),\n entry(\"destinationDnsDomain\", new ExtensionMapping(\"destinationDnsDomain\", StringType, \"destination.registered_domain\")),\n entry(\"dlat\", new ExtensionMapping(\"destinationGeoLatitude\", DoubleType, \"destination.geo.location.lat\")),\n entry(\"dlong\", new ExtensionMapping(\"destinationGeoLongitude\", DoubleType, \"destination.geo.location.lon\")),\n entry(\"dhost\", new ExtensionMapping(\"destinationHostName\", StringType, \"destination.domain\")),\n entry(\"dmac\", new ExtensionMapping(\"destinationMacAddress\", MACAddressType, \"destination.mac\")),\n entry(\"dntdom\", new ExtensionMapping(\"destinationNtDomain\", StringType, \"destination.registered_domain\")),\n entry(\"dpt\", new ExtensionMapping(\"destinationPort\", IntegerType, \"destination.port\")),\n entry(\"dpid\", new ExtensionMapping(\"destinationProcessId\", LongType, \"destination.process.pid\")),\n entry(\"dproc\", new ExtensionMapping(\"destinationProcessName\", StringType, \"destination.process.name\")),\n entry(\"destinationServiceName\", new ExtensionMapping(\"destinationServiceName\", StringType, \"destination.service.name\")),\n entry(\"destinationTranslatedAddress\", new ExtensionMapping(\"destinationTranslatedAddress\", IPType, \"destination.nat.ip\")),\n entry(\"destinationTranslatedPort\", new ExtensionMapping(\"destinationTranslatedPort\", IntegerType, \"destination.nat.port\")),\n entry(\"destinationTranslatedZoneExternalID\", new ExtensionMapping(\"destinationTranslatedZoneExternalID\", StringType, null)),\n entry(\"destinationTranslatedZoneURI\", new ExtensionMapping(\"destinationTranslatedZoneURI\", StringType, null)),\n entry(\"duid\", new ExtensionMapping(\"destinationUserId\", StringType, \"destination.user.id\")),\n entry(\"duser\", new ExtensionMapping(\"destinationUserName\", StringType, \"destination.user.name\")),\n entry(\"dpriv\", new ExtensionMapping(\"destinationUserPrivileges\", StringType, \"destination.user.group.name\")),\n entry(\"destinationZoneExternalID\", new ExtensionMapping(\"destinationZoneExternalID\", StringType, null)),\n entry(\"destinationZoneURI\", new ExtensionMapping(\"destinationZoneURI\", StringType, null)),\n entry(\"act\", new ExtensionMapping(\"deviceAction\", StringType, \"event.action\")),\n entry(\"dvc\", new ExtensionMapping(\"deviceAddress\", IPType, \"observer.ip\")),\n entry(\"cfp1Label\", new ExtensionMapping(\"deviceCustomFloatingPoint1Label\", StringType, null)),\n entry(\"cfp3Label\", new ExtensionMapping(\"deviceCustomFloatingPoint3Label\", StringType, null)),\n entry(\"cfp4Label\", new ExtensionMapping(\"deviceCustomFloatingPoint4Label\", StringType, null)),\n entry(\"deviceCustomDate1\", new ExtensionMapping(\"deviceCustomDate1\", TimestampType, null)),\n entry(\"deviceCustomDate1Label\", new ExtensionMapping(\"deviceCustomDate1Label\", StringType, null)),\n entry(\"deviceCustomDate2\", new ExtensionMapping(\"deviceCustomDate2\", TimestampType, null)),\n entry(\"deviceCustomDate2Label\", new ExtensionMapping(\"deviceCustomDate2Label\", StringType, null)),\n entry(\"cfp1\", new ExtensionMapping(\"deviceCustomFloatingPoint1\", FloatType, null)),\n entry(\"cfp2\", new ExtensionMapping(\"deviceCustomFloatingPoint2\", FloatType, null)),\n entry(\"cfp2Label\", new ExtensionMapping(\"deviceCustomFloatingPoint2Label\", StringType, null)),\n entry(\"cfp3\", new ExtensionMapping(\"deviceCustomFloatingPoint3\", FloatType, null)),\n entry(\"cfp4\", new ExtensionMapping(\"deviceCustomFloatingPoint4\", FloatType, null)),\n entry(\"c6a1\", new ExtensionMapping(\"deviceCustomIPv6Address1\", IPType, null)),\n entry(\"c6a1Label\", new ExtensionMapping(\"deviceCustomIPv6Address1Label\", StringType, null)),\n entry(\"c6a2\", new ExtensionMapping(\"deviceCustomIPv6Address2\", IPType, null)),\n entry(\"c6a2Label\", new ExtensionMapping(\"deviceCustomIPv6Address2Label\", StringType, null)),\n entry(\"c6a3\", new ExtensionMapping(\"deviceCustomIPv6Address3\", IPType, null)),\n entry(\"c6a3Label\", new ExtensionMapping(\"deviceCustomIPv6Address3Label\", StringType, null)),\n entry(\"c6a4\", new ExtensionMapping(\"deviceCustomIPv6Address4\", IPType, null)),\n entry(\"C6a4Label\", new ExtensionMapping(\"deviceCustomIPv6Address4Label\", StringType, null)),\n entry(\"cn1\", new ExtensionMapping(\"deviceCustomNumber1\", LongType, null)),\n entry(\"cn1Label\", new ExtensionMapping(\"deviceCustomNumber1Label\", StringType, null)),\n entry(\"cn2\", new ExtensionMapping(\"deviceCustomNumber2\", LongType, null)),\n entry(\"cn2Label\", new ExtensionMapping(\"deviceCustomNumber2Label\", StringType, null)),\n entry(\"cn3\", new ExtensionMapping(\"deviceCustomNumber3\", LongType, null)),\n entry(\"cn3Label\", new ExtensionMapping(\"deviceCustomNumber3Label\", StringType, null)),\n entry(\"cs1\", new ExtensionMapping(\"deviceCustomString1\", StringType, null)),\n entry(\"cs1Label\", new ExtensionMapping(\"deviceCustomString1Label\", StringType, null)),\n entry(\"cs2\", new ExtensionMapping(\"deviceCustomString2\", StringType, null)),\n entry(\"cs2Label\", new ExtensionMapping(\"deviceCustomString2Label\", StringType, null)),\n entry(\"cs3\", new ExtensionMapping(\"deviceCustomString3\", StringType, null)),\n entry(\"cs3Label\", new ExtensionMapping(\"deviceCustomString3Label\", StringType, null)),\n entry(\"cs4\", new ExtensionMapping(\"deviceCustomString4\", StringType, null)),\n entry(\"cs4Label\", new ExtensionMapping(\"deviceCustomString4Label\", StringType, null)),\n entry(\"cs5\", new ExtensionMapping(\"deviceCustomString5\", StringType, null)),\n entry(\"cs5Label\", new ExtensionMapping(\"deviceCustomString5Label\", StringType, null)),\n entry(\"cs6\", new ExtensionMapping(\"deviceCustomString6\", StringType, null)),\n entry(\"cs6Label\", new ExtensionMapping(\"deviceCustomString6Label\", StringType, null)),\n entry(\"deviceDirection\", new ExtensionMapping(\"deviceDirection\", StringType, \"network.direction\")),\n entry(\"deviceDnsDomain\", new ExtensionMapping(\"deviceDnsDomain\", StringType, \"observer.registered_domain\")),\n entry(\"cat\", new ExtensionMapping(\"deviceEventCategory\", StringType, null)),\n entry(\"deviceExternalId\", new ExtensionMapping(\"deviceExternalId\", StringType, \"observer.name\")),\n entry(\"deviceFacility\", new ExtensionMapping(\"deviceFacility\", LongType, \"log.syslog.facility.code\")),\n entry(\"dvchost\", new ExtensionMapping(\"deviceHostName\", StringType, \"observer.hostname\")),\n entry(\"deviceInboundInterface\", new ExtensionMapping(\"deviceInboundInterface\", StringType, \"observer.ingress.interface.name\")),\n entry(\"dvcmac\", new ExtensionMapping(\"deviceMacAddress\", MACAddressType, \"observer.mac\")),\n entry(\"deviceNtDomain\", new ExtensionMapping(\"deviceNtDomain\", StringType, null)),\n entry(\"deviceOutboundInterface\", new ExtensionMapping(\"deviceOutboundInterface\", StringType, \"observer.egress.interface.name\")),\n entry(\"devicePayloadId\", new ExtensionMapping(\"devicePayloadId\", StringType, \"event.id\")),\n entry(\"dvcpid\", new ExtensionMapping(\"deviceProcessId\", LongType, \"process.pid\")),\n entry(\"deviceProcessName\", new ExtensionMapping(\"deviceProcessName\", StringType, \"process.name\")),\n entry(\"rt\", new ExtensionMapping(\"deviceReceiptTime\", TimestampType, \"@timestamp\")),\n entry(\"dtz\", new ExtensionMapping(\"deviceTimeZone\", StringType, \"event.timezone\")),\n entry(\"deviceTranslatedAddress\", new ExtensionMapping(\"deviceTranslatedAddress\", IPType, \"host.nat.ip\")),\n entry(\"deviceTranslatedZoneExternalID\", new ExtensionMapping(\"deviceTranslatedZoneExternalID\", StringType, null)),\n entry(\"deviceTranslatedZoneURI\", new ExtensionMapping(\"deviceTranslatedZoneURI\", StringType, null)),\n entry(\"deviceZoneExternalID\", new ExtensionMapping(\"deviceZoneExternalID\", StringType, null)),\n entry(\"deviceZoneURI\", new ExtensionMapping(\"deviceZoneURI\", StringType, null)),\n entry(\"end\", new ExtensionMapping(\"endTime\", TimestampType, \"event.end\")),\n entry(\"eventId\", new ExtensionMapping(\"eventId\", StringType, \"event.id\")),\n entry(\"outcome\", new ExtensionMapping(\"eventOutcome\", StringType, \"event.outcome\")),\n entry(\"externalId\", new ExtensionMapping(\"externalId\", StringType, null)),\n entry(\"fileCreateTime\", new ExtensionMapping(\"fileCreateTime\", TimestampType, \"file.created\")),\n entry(\"fileHash\", new ExtensionMapping(\"fileHash\", StringType, \"file.hash\")),\n entry(\"fileId\", new ExtensionMapping(\"fileId\", StringType, \"file.inode\")),\n entry(\"fileModificationTime\", new ExtensionMapping(\"fileModificationTime\", TimestampType, \"file.mtime\")),\n entry(\"flexNumber1\", new ExtensionMapping(\"deviceFlexNumber1\", LongType, null)),\n entry(\"flexNumber1Label\", new ExtensionMapping(\"deviceFlexNumber1Label\", StringType, null)),\n entry(\"flexNumber2\", new ExtensionMapping(\"deviceFlexNumber2\", LongType, null)),\n entry(\"flexNumber2Label\", new ExtensionMapping(\"deviceFlexNumber2Label\", StringType, null)),\n entry(\"fname\", new ExtensionMapping(\"filename\", StringType, \"file.name\")),\n entry(\"filePath\", new ExtensionMapping(\"filePath\", StringType, \"file.path\")),\n entry(\"filePermission\", new ExtensionMapping(\"filePermission\", StringType, \"file.group\")),\n entry(\"fsize\", new ExtensionMapping(\"fileSize\", LongType, \"file.size\")),\n entry(\"fileType\", new ExtensionMapping(\"fileType\", StringType, \"file.type\")),\n entry(\"flexDate1\", new ExtensionMapping(\"flexDate1\", TimestampType, null)),\n entry(\"flexDate1Label\", new ExtensionMapping(\"flexDate1Label\", StringType, null)),\n entry(\"flexString1\", new ExtensionMapping(\"flexString1\", StringType, null)),\n entry(\"flexString2\", new ExtensionMapping(\"flexString2\", StringType, null)),\n entry(\"flexString1Label\", new ExtensionMapping(\"flexString1Label\", StringType, null)),\n entry(\"flexString2Label\", new ExtensionMapping(\"flexString2Label\", StringType, null)),\n entry(\"msg\", new ExtensionMapping(\"message\", StringType, \"message\")),\n entry(\"oldFileCreateTime\", new ExtensionMapping(\"oldFileCreateTime\", TimestampType, null)),\n entry(\"oldFileHash\", new ExtensionMapping(\"oldFileHash\", StringType, null)),\n entry(\"oldFileId\", new ExtensionMapping(\"oldFileId\", StringType, null)),\n entry(\"oldFileModificationTime\", new ExtensionMapping(\"oldFileModificationTime\", TimestampType, null)),\n entry(\"oldFileName\", new ExtensionMapping(\"oldFileName\", StringType, null)),\n entry(\"oldFilePath\", new ExtensionMapping(\"oldFilePath\", StringType, null)),\n entry(\"oldFilePermission\", new ExtensionMapping(\"oldFilePermission\", StringType, null)),\n entry(\"oldFileSize\", new ExtensionMapping(\"oldFileSize\", IntegerType, null)),\n entry(\"oldFileType\", new ExtensionMapping(\"oldFileType\", StringType, null)),\n entry(\"rawEvent\", new ExtensionMapping(\"rawEvent\", StringType, \"event.original\")),\n entry(\"reason\", new ExtensionMapping(\"Reason\", StringType, \"event.reason\")),\n entry(\"requestClientApplication\", new ExtensionMapping(\"requestClientApplication\", StringType, \"user_agent.original\")),\n entry(\"requestContext\", new ExtensionMapping(\"requestContext\", StringType, \"http.request.referrer\")),\n entry(\"requestCookies\", new ExtensionMapping(\"requestCookies\", StringType, null)),\n entry(\"requestMethod\", new ExtensionMapping(\"requestMethod\", StringType, \"http.request.method\")),\n entry(\"request\", new ExtensionMapping(\"requestUrl\", StringType, \"url.original\")),\n entry(\"src\", new ExtensionMapping(\"sourceAddress\", IPType, \"source.ip\")),\n entry(\"sourceDnsDomain\", new ExtensionMapping(\"sourceDnsDomain\", StringType, \"source.domain\")),\n entry(\"slat\", new ExtensionMapping(\"sourceGeoLatitude\", DoubleType, \"source.geo.location.lat\")),\n entry(\"slong\", new ExtensionMapping(\"sourceGeoLongitude\", DoubleType, \"source.geo.location.lon\")),\n entry(\"shost\", new ExtensionMapping(\"sourceHostName\", StringType, \"source.domain\")),\n entry(\"smac\", new ExtensionMapping(\"sourceMacAddress\", MACAddressType, \"source.mac\")),\n entry(\"sntdom\", new ExtensionMapping(\"sourceNtDomain\", StringType, \"source.registered_domain\")),\n entry(\"spt\", new ExtensionMapping(\"sourcePort\", IntegerType, \"source.port\")),\n entry(\"spid\", new ExtensionMapping(\"sourceProcessId\", LongType, \"source.process.pid\")),\n entry(\"sproc\", new ExtensionMapping(\"sourceProcessName\", StringType, \"source.process.name\")),\n entry(\"sourceServiceName\", new ExtensionMapping(\"sourceServiceName\", StringType, \"source.service.name\")),\n entry(\"sourceTranslatedAddress\", new ExtensionMapping(\"sourceTranslatedAddress\", IPType, \"source.nat.ip\")),\n entry(\"sourceTranslatedPort\", new ExtensionMapping(\"sourceTranslatedPort\", IntegerType, \"source.nat.port\")),\n entry(\"sourceTranslatedZoneExternalID\", new ExtensionMapping(\"sourceTranslatedZoneExternalID\", StringType, null)),\n entry(\"sourceTranslatedZoneURI\", new ExtensionMapping(\"sourceTranslatedZoneURI\", StringType, null)),\n entry(\"suid\", new ExtensionMapping(\"sourceUserId\", StringType, \"source.user.id\")),\n entry(\"suser\", new ExtensionMapping(\"sourceUserName\", StringType, \"source.user.name\")),\n entry(\"spriv\", new ExtensionMapping(\"sourceUserPrivileges\", StringType, \"source.user.group.name\")),\n entry(\"sourceZoneExternalID\", new ExtensionMapping(\"sourceZoneExternalID\", StringType, null)),\n entry(\"sourceZoneURI\", new ExtensionMapping(\"sourceZoneURI\", StringType, null)),\n entry(\"start\", new ExtensionMapping(\"startTime\", TimestampType, \"event.start\")),\n entry(\"proto\", new ExtensionMapping(\"transportProtocol\", StringType, \"network.transport\")),\n entry(\"type\", new ExtensionMapping(\"type\", IntegerType, \"event.kind\")),\n entry(\"catdt\", new ExtensionMapping(\"categoryDeviceType\", StringType, null)),\n entry(\"mrt\", new ExtensionMapping(\"managerReceiptTime\", TimestampType, \"event.ingested\"))\n );\n\n private static final Set ERROR_MESSAGE_INCOMPLETE_CEF_HEADER = Set.of(\"incomplete CEF header\");\n private static final List TIME_LAYOUTS = List.of(\n // MMM dd HH:mm:ss.SSS zzz\n \"MMM dd HH:mm:ss.SSS z\",\n \"MMM dd HH:mm:ss.SSS Z\",\n \"MMM dd HH:mm:ss.SSS 'GMT'XX:XX\",\n // MMM dd HH:mm:sss.SSS\n \"MMM dd HH:mm:ss.SSS\",\n // MMM dd HH:mm:ss zzz\n \"MMM dd HH:mm:ss z\",\n \"MMM dd HH:mm:ss Z\",\n \"MMM dd HH:mm:ss 'GMT'XX:XX\",\n // MMM dd HH:mm:ss\n \"MMM dd HH:mm:ss\",\n // MMM dd yyyy HH:mm:ss.SSS zzz\n \"MMM dd yyyy HH:mm:ss.SSS z\",\n \"MMM dd yyyy HH:mm:ss.SSS Z\",\n \"MMM dd yyyy HH:mm:ss.SSS 'GMT'XX:XX\",\n // MMM dd yyyy HH:mm:ss.SSS\n \"MMM dd yyyy HH:mm:ss.SSS\",\n // MMM dd yyyy HH:mm:ss zzz\n \"MMM dd yyyy HH:mm:ss z\",\n \"MMM dd yyyy HH:mm:ss Z\",\n \"MMM dd yyyy HH:mm:ss 'GMT'XX:XX\",\n // MMM dd yyyy HH:mm:ss\n \"MMM dd yyyy HH:mm:ss\"\n );\n\n private static final List CHRONO_FIELDS = List.of(\n NANO_OF_SECOND,\n SECOND_OF_DAY,\n MINUTE_OF_DAY,\n HOUR_OF_DAY,\n DAY_OF_MONTH,\n MONTH_OF_YEAR\n );\n\n CefEvent process(String cefString) {\n List headers = new ArrayList<>();\n Matcher matcher = HEADER_NEXT_FIELD_PATTERN.matcher(cefString);\n int extensionStart = 0;\n\n for (int i = 0; i < 7 && matcher.find(); i++) {\n String field = matcher.group(1);\n field = HEADER_ESCAPE_CAPTURE.matcher(field).replaceAll(\"$1\");\n headers.add(field);\n extensionStart = matcher.end();\n }\n\n if (headers.isEmpty() == false && headers.getFirst().startsWith(\"CEF:\")) {\n CefEvent event = new CefEvent();\n // Add error message if there are not enough header fields\n if (headers.size() != 7) {\n event.addRootMapping(\"error.message\", new HashSet<>(ERROR_MESSAGE_INCOMPLETE_CEF_HEADER));", + "repo_full_name": "elastic/elasticsearch", + "discussion_comments": [ + { + "comment_id": "2033704333", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 122491, + "pr_file": "modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/CefParser.java", + "discussion_id": "2033704333", + "commented_code": "@@ -0,0 +1,565 @@\n+/*\n+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one\n+ * or more contributor license agreements. Licensed under the \"Elastic License\n+ * 2.0\", the \"GNU Affero General Public License v3.0 only\", and the \"Server Side\n+ * Public License v 1\"; you may not use this file except in compliance with, at\n+ * your election, the \"Elastic License 2.0\", the \"GNU Affero General Public\n+ * License v3.0 only\", or the \"Server Side Public License, v 1\".\n+ */\n+package org.elasticsearch.ingest.common;\n+\n+import org.elasticsearch.common.Strings;\n+import org.elasticsearch.common.network.InetAddresses;\n+import org.elasticsearch.common.network.NetworkAddress;\n+import org.elasticsearch.common.time.DateFormatters;\n+import org.elasticsearch.core.Nullable;\n+\n+import java.time.Instant;\n+import java.time.LocalDate;\n+import java.time.ZoneId;\n+import java.time.ZoneOffset;\n+import java.time.ZonedDateTime;\n+import java.time.format.DateTimeFormatter;\n+import java.time.format.DateTimeParseException;\n+import java.time.temporal.ChronoField;\n+import java.time.temporal.TemporalAccessor;\n+import java.time.temporal.WeekFields;\n+import java.util.ArrayList;\n+import java.util.HashMap;\n+import java.util.HashSet;\n+import java.util.List;\n+import java.util.Locale;\n+import java.util.Map;\n+import java.util.Objects;\n+import java.util.Set;\n+import java.util.regex.Matcher;\n+import java.util.regex.Pattern;\n+\n+import static java.time.temporal.ChronoField.DAY_OF_MONTH;\n+import static java.time.temporal.ChronoField.HOUR_OF_DAY;\n+import static java.time.temporal.ChronoField.MINUTE_OF_DAY;\n+import static java.time.temporal.ChronoField.MONTH_OF_YEAR;\n+import static java.time.temporal.ChronoField.NANO_OF_SECOND;\n+import static java.time.temporal.ChronoField.SECOND_OF_DAY;\n+import static java.util.Map.entry;\n+import static org.elasticsearch.ingest.common.CefParser.DataType.DoubleType;\n+import static org.elasticsearch.ingest.common.CefParser.DataType.FloatType;\n+import static org.elasticsearch.ingest.common.CefParser.DataType.IPType;\n+import static org.elasticsearch.ingest.common.CefParser.DataType.IntegerType;\n+import static org.elasticsearch.ingest.common.CefParser.DataType.LongType;\n+import static org.elasticsearch.ingest.common.CefParser.DataType.MACAddressType;\n+import static org.elasticsearch.ingest.common.CefParser.DataType.StringType;\n+import static org.elasticsearch.ingest.common.CefParser.DataType.TimestampType;\n+\n+final class CefParser {\n+ private final boolean removeEmptyValues;\n+ private final ZoneId timezone;\n+\n+ CefParser(ZoneId timezone, boolean removeEmptyValues) {\n+ this.removeEmptyValues = removeEmptyValues;\n+ this.timezone = timezone;\n+ }\n+\n+ private static final Pattern HEADER_PATTERN = Pattern.compile(\"(?:\\\\\\\\\\\\||\\\\\\\\\\\\\\\\|[^|])*?\");\n+ private static final Pattern HEADER_NEXT_FIELD_PATTERN = Pattern.compile(\"(\" + HEADER_PATTERN.pattern() + \")\\\\|\");\n+ private static final Pattern HEADER_ESCAPE_CAPTURE = Pattern.compile(\"\\\\\\\\([\\\\\\\\|])\");\n+\n+ // New patterns for extension parsing\n+ private static final String EXTENSION_KEY_PATTERN = \"(?:[\\\\w-]+(?:\\\\.[^\\\\.=\\\\s\\\\|\\\\\\\\\\\\[\\\\]]+)*(?:\\\\[[0-9]+\\\\])?(?==))\";\n+ private static final Pattern EXTENSION_KEY_ARRAY_CAPTURE = Pattern.compile(\"^([^\\\\[\\\\]]+)((?:\\\\[[0-9]+\\\\])+)$\");\n+ private static final String EXTENSION_VALUE_PATTERN = \"(?:[^\\\\s\\\\\\\\]|\\\\\\\\[^|]|\\\\s(?!\" + EXTENSION_KEY_PATTERN + \"=))*\";\n+ private static final Pattern EXTENSION_NEXT_KEY_VALUE_PATTERN = Pattern.compile(\n+ \"(\" + EXTENSION_KEY_PATTERN + \")=(\" + EXTENSION_VALUE_PATTERN + \")(?:\\\\s+|$)\"\n+ );\n+\n+ // Comprehensive regex pattern to match various MAC address formats\n+ private static final String MAC_ADDRESS_REGEX = \"^(\" +\n+ // Combined colon and hyphen separated 6-group patterns\n+ \"(([0-9A-Fa-f]{2}[:|-]){5}[0-9A-Fa-f]{2})|\" +\n+ // Dot-separated 6-group pattern\n+ \"([0-9A-Fa-f]{4}\\\\.){2}[0-9A-Fa-f]{4}|\" +\n+ // Combined colon and hyphen separated 8-group patterns\n+ \"([0-9A-Fa-f]{2}[:|-]){7}[0-9A-Fa-f]{2}|\" +\n+ // Dot-separated EUI-64\n+ \"([0-9A-Fa-f]{4}\\\\.){3}[0-9A-Fa-f]{4}\" + \")$\";\n+\n+ private static final Pattern MAC_ADDRESS_PATTERN = Pattern.compile(MAC_ADDRESS_REGEX);\n+ private static final int EUI48_HEX_LENGTH = 48 / 4;\n+ private static final int EUI64_HEX_LENGTH = 64 / 4;\n+ private static final int EUI64_HEX_WITH_SEPARATOR_MAX_LENGTH = EUI64_HEX_LENGTH + EUI64_HEX_LENGTH / 2 - 1;\n+ private static final Map EXTENSION_VALUE_SANITIZER_REVERSE_MAPPING = Map.ofEntries(\n+ entry(\"\\\\\\\\\", \"\\\\\"),\n+ entry(\"\\\\=\", \"=\"),\n+ entry(\"\\\\\\n\", \"\\n\"),\n+ entry(\"\\\\\\r\", \"\\r\")\n+ );\n+\n+ enum DataType {\n+ IntegerType,\n+ LongType,\n+ FloatType,\n+ DoubleType,\n+ StringType,\n+ BooleanType,\n+ IPType,\n+ MACAddressType,\n+ TimestampType\n+ }\n+\n+ private static final Map EXTENSION_MAPPINGS = Map.ofEntries(\n+ entry(\"agt\", new ExtensionMapping(\"agentAddress\", IPType, \"agent.ip\")),\n+ entry(\"agentDnsDomain\", new ExtensionMapping(\"agentDnsDomain\", StringType, \"agent.name\")),\n+ entry(\"ahost\", new ExtensionMapping(\"agentHostName\", StringType, \"agent.name\")),\n+ entry(\"aid\", new ExtensionMapping(\"agentId\", StringType, \"agent.id\")),\n+ entry(\"amac\", new ExtensionMapping(\"agentMacAddress\", MACAddressType, \"agent.mac\")),\n+ entry(\"agentNtDomain\", new ExtensionMapping(\"agentNtDomain\", StringType, null)),\n+ entry(\"art\", new ExtensionMapping(\"agentReceiptTime\", TimestampType, \"event.created\")),\n+ entry(\"atz\", new ExtensionMapping(\"agentTimeZone\", StringType, null)),\n+ entry(\"agentTranslatedAddress\", new ExtensionMapping(\"agentTranslatedAddress\", IPType, null)),\n+ entry(\"agentTranslatedZoneExternalID\", new ExtensionMapping(\"agentTranslatedZoneExternalID\", StringType, null)),\n+ entry(\"agentTranslatedZoneURI\", new ExtensionMapping(\"agentTranslatedZoneURI\", StringType, null)),\n+ entry(\"at\", new ExtensionMapping(\"agentType\", StringType, \"agent.type\")),\n+ entry(\"av\", new ExtensionMapping(\"agentVersion\", StringType, \"agent.version\")),\n+ entry(\"agentZoneExternalID\", new ExtensionMapping(\"agentZoneExternalID\", StringType, null)),\n+ entry(\"agentZoneURI\", new ExtensionMapping(\"agentZoneURI\", StringType, null)),\n+ entry(\"app\", new ExtensionMapping(\"applicationProtocol\", StringType, \"network.protocol\")),\n+ entry(\"cnt\", new ExtensionMapping(\"baseEventCount\", IntegerType, null)),\n+ entry(\"in\", new ExtensionMapping(\"bytesIn\", LongType, \"source.bytes\")),\n+ entry(\"out\", new ExtensionMapping(\"bytesOut\", LongType, \"destination.bytes\")),\n+ entry(\"customerExternalID\", new ExtensionMapping(\"customerExternalID\", StringType, \"organization.id\")),\n+ entry(\"customerURI\", new ExtensionMapping(\"customerURI\", StringType, \"organization.name\")),\n+ entry(\"dst\", new ExtensionMapping(\"destinationAddress\", IPType, \"destination.ip\")),\n+ entry(\"destinationDnsDomain\", new ExtensionMapping(\"destinationDnsDomain\", StringType, \"destination.registered_domain\")),\n+ entry(\"dlat\", new ExtensionMapping(\"destinationGeoLatitude\", DoubleType, \"destination.geo.location.lat\")),\n+ entry(\"dlong\", new ExtensionMapping(\"destinationGeoLongitude\", DoubleType, \"destination.geo.location.lon\")),\n+ entry(\"dhost\", new ExtensionMapping(\"destinationHostName\", StringType, \"destination.domain\")),\n+ entry(\"dmac\", new ExtensionMapping(\"destinationMacAddress\", MACAddressType, \"destination.mac\")),\n+ entry(\"dntdom\", new ExtensionMapping(\"destinationNtDomain\", StringType, \"destination.registered_domain\")),\n+ entry(\"dpt\", new ExtensionMapping(\"destinationPort\", IntegerType, \"destination.port\")),\n+ entry(\"dpid\", new ExtensionMapping(\"destinationProcessId\", LongType, \"destination.process.pid\")),\n+ entry(\"dproc\", new ExtensionMapping(\"destinationProcessName\", StringType, \"destination.process.name\")),\n+ entry(\"destinationServiceName\", new ExtensionMapping(\"destinationServiceName\", StringType, \"destination.service.name\")),\n+ entry(\"destinationTranslatedAddress\", new ExtensionMapping(\"destinationTranslatedAddress\", IPType, \"destination.nat.ip\")),\n+ entry(\"destinationTranslatedPort\", new ExtensionMapping(\"destinationTranslatedPort\", IntegerType, \"destination.nat.port\")),\n+ entry(\"destinationTranslatedZoneExternalID\", new ExtensionMapping(\"destinationTranslatedZoneExternalID\", StringType, null)),\n+ entry(\"destinationTranslatedZoneURI\", new ExtensionMapping(\"destinationTranslatedZoneURI\", StringType, null)),\n+ entry(\"duid\", new ExtensionMapping(\"destinationUserId\", StringType, \"destination.user.id\")),\n+ entry(\"duser\", new ExtensionMapping(\"destinationUserName\", StringType, \"destination.user.name\")),\n+ entry(\"dpriv\", new ExtensionMapping(\"destinationUserPrivileges\", StringType, \"destination.user.group.name\")),\n+ entry(\"destinationZoneExternalID\", new ExtensionMapping(\"destinationZoneExternalID\", StringType, null)),\n+ entry(\"destinationZoneURI\", new ExtensionMapping(\"destinationZoneURI\", StringType, null)),\n+ entry(\"act\", new ExtensionMapping(\"deviceAction\", StringType, \"event.action\")),\n+ entry(\"dvc\", new ExtensionMapping(\"deviceAddress\", IPType, \"observer.ip\")),\n+ entry(\"cfp1Label\", new ExtensionMapping(\"deviceCustomFloatingPoint1Label\", StringType, null)),\n+ entry(\"cfp3Label\", new ExtensionMapping(\"deviceCustomFloatingPoint3Label\", StringType, null)),\n+ entry(\"cfp4Label\", new ExtensionMapping(\"deviceCustomFloatingPoint4Label\", StringType, null)),\n+ entry(\"deviceCustomDate1\", new ExtensionMapping(\"deviceCustomDate1\", TimestampType, null)),\n+ entry(\"deviceCustomDate1Label\", new ExtensionMapping(\"deviceCustomDate1Label\", StringType, null)),\n+ entry(\"deviceCustomDate2\", new ExtensionMapping(\"deviceCustomDate2\", TimestampType, null)),\n+ entry(\"deviceCustomDate2Label\", new ExtensionMapping(\"deviceCustomDate2Label\", StringType, null)),\n+ entry(\"cfp1\", new ExtensionMapping(\"deviceCustomFloatingPoint1\", FloatType, null)),\n+ entry(\"cfp2\", new ExtensionMapping(\"deviceCustomFloatingPoint2\", FloatType, null)),\n+ entry(\"cfp2Label\", new ExtensionMapping(\"deviceCustomFloatingPoint2Label\", StringType, null)),\n+ entry(\"cfp3\", new ExtensionMapping(\"deviceCustomFloatingPoint3\", FloatType, null)),\n+ entry(\"cfp4\", new ExtensionMapping(\"deviceCustomFloatingPoint4\", FloatType, null)),\n+ entry(\"c6a1\", new ExtensionMapping(\"deviceCustomIPv6Address1\", IPType, null)),\n+ entry(\"c6a1Label\", new ExtensionMapping(\"deviceCustomIPv6Address1Label\", StringType, null)),\n+ entry(\"c6a2\", new ExtensionMapping(\"deviceCustomIPv6Address2\", IPType, null)),\n+ entry(\"c6a2Label\", new ExtensionMapping(\"deviceCustomIPv6Address2Label\", StringType, null)),\n+ entry(\"c6a3\", new ExtensionMapping(\"deviceCustomIPv6Address3\", IPType, null)),\n+ entry(\"c6a3Label\", new ExtensionMapping(\"deviceCustomIPv6Address3Label\", StringType, null)),\n+ entry(\"c6a4\", new ExtensionMapping(\"deviceCustomIPv6Address4\", IPType, null)),\n+ entry(\"C6a4Label\", new ExtensionMapping(\"deviceCustomIPv6Address4Label\", StringType, null)),\n+ entry(\"cn1\", new ExtensionMapping(\"deviceCustomNumber1\", LongType, null)),\n+ entry(\"cn1Label\", new ExtensionMapping(\"deviceCustomNumber1Label\", StringType, null)),\n+ entry(\"cn2\", new ExtensionMapping(\"deviceCustomNumber2\", LongType, null)),\n+ entry(\"cn2Label\", new ExtensionMapping(\"deviceCustomNumber2Label\", StringType, null)),\n+ entry(\"cn3\", new ExtensionMapping(\"deviceCustomNumber3\", LongType, null)),\n+ entry(\"cn3Label\", new ExtensionMapping(\"deviceCustomNumber3Label\", StringType, null)),\n+ entry(\"cs1\", new ExtensionMapping(\"deviceCustomString1\", StringType, null)),\n+ entry(\"cs1Label\", new ExtensionMapping(\"deviceCustomString1Label\", StringType, null)),\n+ entry(\"cs2\", new ExtensionMapping(\"deviceCustomString2\", StringType, null)),\n+ entry(\"cs2Label\", new ExtensionMapping(\"deviceCustomString2Label\", StringType, null)),\n+ entry(\"cs3\", new ExtensionMapping(\"deviceCustomString3\", StringType, null)),\n+ entry(\"cs3Label\", new ExtensionMapping(\"deviceCustomString3Label\", StringType, null)),\n+ entry(\"cs4\", new ExtensionMapping(\"deviceCustomString4\", StringType, null)),\n+ entry(\"cs4Label\", new ExtensionMapping(\"deviceCustomString4Label\", StringType, null)),\n+ entry(\"cs5\", new ExtensionMapping(\"deviceCustomString5\", StringType, null)),\n+ entry(\"cs5Label\", new ExtensionMapping(\"deviceCustomString5Label\", StringType, null)),\n+ entry(\"cs6\", new ExtensionMapping(\"deviceCustomString6\", StringType, null)),\n+ entry(\"cs6Label\", new ExtensionMapping(\"deviceCustomString6Label\", StringType, null)),\n+ entry(\"deviceDirection\", new ExtensionMapping(\"deviceDirection\", StringType, \"network.direction\")),\n+ entry(\"deviceDnsDomain\", new ExtensionMapping(\"deviceDnsDomain\", StringType, \"observer.registered_domain\")),\n+ entry(\"cat\", new ExtensionMapping(\"deviceEventCategory\", StringType, null)),\n+ entry(\"deviceExternalId\", new ExtensionMapping(\"deviceExternalId\", StringType, \"observer.name\")),\n+ entry(\"deviceFacility\", new ExtensionMapping(\"deviceFacility\", LongType, \"log.syslog.facility.code\")),\n+ entry(\"dvchost\", new ExtensionMapping(\"deviceHostName\", StringType, \"observer.hostname\")),\n+ entry(\"deviceInboundInterface\", new ExtensionMapping(\"deviceInboundInterface\", StringType, \"observer.ingress.interface.name\")),\n+ entry(\"dvcmac\", new ExtensionMapping(\"deviceMacAddress\", MACAddressType, \"observer.mac\")),\n+ entry(\"deviceNtDomain\", new ExtensionMapping(\"deviceNtDomain\", StringType, null)),\n+ entry(\"deviceOutboundInterface\", new ExtensionMapping(\"deviceOutboundInterface\", StringType, \"observer.egress.interface.name\")),\n+ entry(\"devicePayloadId\", new ExtensionMapping(\"devicePayloadId\", StringType, \"event.id\")),\n+ entry(\"dvcpid\", new ExtensionMapping(\"deviceProcessId\", LongType, \"process.pid\")),\n+ entry(\"deviceProcessName\", new ExtensionMapping(\"deviceProcessName\", StringType, \"process.name\")),\n+ entry(\"rt\", new ExtensionMapping(\"deviceReceiptTime\", TimestampType, \"@timestamp\")),\n+ entry(\"dtz\", new ExtensionMapping(\"deviceTimeZone\", StringType, \"event.timezone\")),\n+ entry(\"deviceTranslatedAddress\", new ExtensionMapping(\"deviceTranslatedAddress\", IPType, \"host.nat.ip\")),\n+ entry(\"deviceTranslatedZoneExternalID\", new ExtensionMapping(\"deviceTranslatedZoneExternalID\", StringType, null)),\n+ entry(\"deviceTranslatedZoneURI\", new ExtensionMapping(\"deviceTranslatedZoneURI\", StringType, null)),\n+ entry(\"deviceZoneExternalID\", new ExtensionMapping(\"deviceZoneExternalID\", StringType, null)),\n+ entry(\"deviceZoneURI\", new ExtensionMapping(\"deviceZoneURI\", StringType, null)),\n+ entry(\"end\", new ExtensionMapping(\"endTime\", TimestampType, \"event.end\")),\n+ entry(\"eventId\", new ExtensionMapping(\"eventId\", StringType, \"event.id\")),\n+ entry(\"outcome\", new ExtensionMapping(\"eventOutcome\", StringType, \"event.outcome\")),\n+ entry(\"externalId\", new ExtensionMapping(\"externalId\", StringType, null)),\n+ entry(\"fileCreateTime\", new ExtensionMapping(\"fileCreateTime\", TimestampType, \"file.created\")),\n+ entry(\"fileHash\", new ExtensionMapping(\"fileHash\", StringType, \"file.hash\")),\n+ entry(\"fileId\", new ExtensionMapping(\"fileId\", StringType, \"file.inode\")),\n+ entry(\"fileModificationTime\", new ExtensionMapping(\"fileModificationTime\", TimestampType, \"file.mtime\")),\n+ entry(\"flexNumber1\", new ExtensionMapping(\"deviceFlexNumber1\", LongType, null)),\n+ entry(\"flexNumber1Label\", new ExtensionMapping(\"deviceFlexNumber1Label\", StringType, null)),\n+ entry(\"flexNumber2\", new ExtensionMapping(\"deviceFlexNumber2\", LongType, null)),\n+ entry(\"flexNumber2Label\", new ExtensionMapping(\"deviceFlexNumber2Label\", StringType, null)),\n+ entry(\"fname\", new ExtensionMapping(\"filename\", StringType, \"file.name\")),\n+ entry(\"filePath\", new ExtensionMapping(\"filePath\", StringType, \"file.path\")),\n+ entry(\"filePermission\", new ExtensionMapping(\"filePermission\", StringType, \"file.group\")),\n+ entry(\"fsize\", new ExtensionMapping(\"fileSize\", LongType, \"file.size\")),\n+ entry(\"fileType\", new ExtensionMapping(\"fileType\", StringType, \"file.type\")),\n+ entry(\"flexDate1\", new ExtensionMapping(\"flexDate1\", TimestampType, null)),\n+ entry(\"flexDate1Label\", new ExtensionMapping(\"flexDate1Label\", StringType, null)),\n+ entry(\"flexString1\", new ExtensionMapping(\"flexString1\", StringType, null)),\n+ entry(\"flexString2\", new ExtensionMapping(\"flexString2\", StringType, null)),\n+ entry(\"flexString1Label\", new ExtensionMapping(\"flexString1Label\", StringType, null)),\n+ entry(\"flexString2Label\", new ExtensionMapping(\"flexString2Label\", StringType, null)),\n+ entry(\"msg\", new ExtensionMapping(\"message\", StringType, \"message\")),\n+ entry(\"oldFileCreateTime\", new ExtensionMapping(\"oldFileCreateTime\", TimestampType, null)),\n+ entry(\"oldFileHash\", new ExtensionMapping(\"oldFileHash\", StringType, null)),\n+ entry(\"oldFileId\", new ExtensionMapping(\"oldFileId\", StringType, null)),\n+ entry(\"oldFileModificationTime\", new ExtensionMapping(\"oldFileModificationTime\", TimestampType, null)),\n+ entry(\"oldFileName\", new ExtensionMapping(\"oldFileName\", StringType, null)),\n+ entry(\"oldFilePath\", new ExtensionMapping(\"oldFilePath\", StringType, null)),\n+ entry(\"oldFilePermission\", new ExtensionMapping(\"oldFilePermission\", StringType, null)),\n+ entry(\"oldFileSize\", new ExtensionMapping(\"oldFileSize\", IntegerType, null)),\n+ entry(\"oldFileType\", new ExtensionMapping(\"oldFileType\", StringType, null)),\n+ entry(\"rawEvent\", new ExtensionMapping(\"rawEvent\", StringType, \"event.original\")),\n+ entry(\"reason\", new ExtensionMapping(\"Reason\", StringType, \"event.reason\")),\n+ entry(\"requestClientApplication\", new ExtensionMapping(\"requestClientApplication\", StringType, \"user_agent.original\")),\n+ entry(\"requestContext\", new ExtensionMapping(\"requestContext\", StringType, \"http.request.referrer\")),\n+ entry(\"requestCookies\", new ExtensionMapping(\"requestCookies\", StringType, null)),\n+ entry(\"requestMethod\", new ExtensionMapping(\"requestMethod\", StringType, \"http.request.method\")),\n+ entry(\"request\", new ExtensionMapping(\"requestUrl\", StringType, \"url.original\")),\n+ entry(\"src\", new ExtensionMapping(\"sourceAddress\", IPType, \"source.ip\")),\n+ entry(\"sourceDnsDomain\", new ExtensionMapping(\"sourceDnsDomain\", StringType, \"source.domain\")),\n+ entry(\"slat\", new ExtensionMapping(\"sourceGeoLatitude\", DoubleType, \"source.geo.location.lat\")),\n+ entry(\"slong\", new ExtensionMapping(\"sourceGeoLongitude\", DoubleType, \"source.geo.location.lon\")),\n+ entry(\"shost\", new ExtensionMapping(\"sourceHostName\", StringType, \"source.domain\")),\n+ entry(\"smac\", new ExtensionMapping(\"sourceMacAddress\", MACAddressType, \"source.mac\")),\n+ entry(\"sntdom\", new ExtensionMapping(\"sourceNtDomain\", StringType, \"source.registered_domain\")),\n+ entry(\"spt\", new ExtensionMapping(\"sourcePort\", IntegerType, \"source.port\")),\n+ entry(\"spid\", new ExtensionMapping(\"sourceProcessId\", LongType, \"source.process.pid\")),\n+ entry(\"sproc\", new ExtensionMapping(\"sourceProcessName\", StringType, \"source.process.name\")),\n+ entry(\"sourceServiceName\", new ExtensionMapping(\"sourceServiceName\", StringType, \"source.service.name\")),\n+ entry(\"sourceTranslatedAddress\", new ExtensionMapping(\"sourceTranslatedAddress\", IPType, \"source.nat.ip\")),\n+ entry(\"sourceTranslatedPort\", new ExtensionMapping(\"sourceTranslatedPort\", IntegerType, \"source.nat.port\")),\n+ entry(\"sourceTranslatedZoneExternalID\", new ExtensionMapping(\"sourceTranslatedZoneExternalID\", StringType, null)),\n+ entry(\"sourceTranslatedZoneURI\", new ExtensionMapping(\"sourceTranslatedZoneURI\", StringType, null)),\n+ entry(\"suid\", new ExtensionMapping(\"sourceUserId\", StringType, \"source.user.id\")),\n+ entry(\"suser\", new ExtensionMapping(\"sourceUserName\", StringType, \"source.user.name\")),\n+ entry(\"spriv\", new ExtensionMapping(\"sourceUserPrivileges\", StringType, \"source.user.group.name\")),\n+ entry(\"sourceZoneExternalID\", new ExtensionMapping(\"sourceZoneExternalID\", StringType, null)),\n+ entry(\"sourceZoneURI\", new ExtensionMapping(\"sourceZoneURI\", StringType, null)),\n+ entry(\"start\", new ExtensionMapping(\"startTime\", TimestampType, \"event.start\")),\n+ entry(\"proto\", new ExtensionMapping(\"transportProtocol\", StringType, \"network.transport\")),\n+ entry(\"type\", new ExtensionMapping(\"type\", IntegerType, \"event.kind\")),\n+ entry(\"catdt\", new ExtensionMapping(\"categoryDeviceType\", StringType, null)),\n+ entry(\"mrt\", new ExtensionMapping(\"managerReceiptTime\", TimestampType, \"event.ingested\"))\n+ );\n+\n+ private static final Set ERROR_MESSAGE_INCOMPLETE_CEF_HEADER = Set.of(\"incomplete CEF header\");\n+ private static final List TIME_LAYOUTS = List.of(\n+ // MMM dd HH:mm:ss.SSS zzz\n+ \"MMM dd HH:mm:ss.SSS z\",\n+ \"MMM dd HH:mm:ss.SSS Z\",\n+ \"MMM dd HH:mm:ss.SSS 'GMT'XX:XX\",\n+ // MMM dd HH:mm:sss.SSS\n+ \"MMM dd HH:mm:ss.SSS\",\n+ // MMM dd HH:mm:ss zzz\n+ \"MMM dd HH:mm:ss z\",\n+ \"MMM dd HH:mm:ss Z\",\n+ \"MMM dd HH:mm:ss 'GMT'XX:XX\",\n+ // MMM dd HH:mm:ss\n+ \"MMM dd HH:mm:ss\",\n+ // MMM dd yyyy HH:mm:ss.SSS zzz\n+ \"MMM dd yyyy HH:mm:ss.SSS z\",\n+ \"MMM dd yyyy HH:mm:ss.SSS Z\",\n+ \"MMM dd yyyy HH:mm:ss.SSS 'GMT'XX:XX\",\n+ // MMM dd yyyy HH:mm:ss.SSS\n+ \"MMM dd yyyy HH:mm:ss.SSS\",\n+ // MMM dd yyyy HH:mm:ss zzz\n+ \"MMM dd yyyy HH:mm:ss z\",\n+ \"MMM dd yyyy HH:mm:ss Z\",\n+ \"MMM dd yyyy HH:mm:ss 'GMT'XX:XX\",\n+ // MMM dd yyyy HH:mm:ss\n+ \"MMM dd yyyy HH:mm:ss\"\n+ );\n+\n+ private static final List CHRONO_FIELDS = List.of(\n+ NANO_OF_SECOND,\n+ SECOND_OF_DAY,\n+ MINUTE_OF_DAY,\n+ HOUR_OF_DAY,\n+ DAY_OF_MONTH,\n+ MONTH_OF_YEAR\n+ );\n+\n+ CefEvent process(String cefString) {\n+ List headers = new ArrayList<>();\n+ Matcher matcher = HEADER_NEXT_FIELD_PATTERN.matcher(cefString);\n+ int extensionStart = 0;\n+\n+ for (int i = 0; i < 7 && matcher.find(); i++) {\n+ String field = matcher.group(1);\n+ field = HEADER_ESCAPE_CAPTURE.matcher(field).replaceAll(\"$1\");\n+ headers.add(field);\n+ extensionStart = matcher.end();\n+ }\n+\n+ if (headers.isEmpty() == false && headers.getFirst().startsWith(\"CEF:\")) {\n+ CefEvent event = new CefEvent();\n+ // Add error message if there are not enough header fields\n+ if (headers.size() != 7) {\n+ event.addRootMapping(\"error.message\", new HashSet<>(ERROR_MESSAGE_INCOMPLETE_CEF_HEADER));", + "comment_created_at": "2025-04-08T17:24:22+00:00", + "comment_author": "joegallo", + "comment_body": "Why does this implementation add an error message and not throw an exception? This doesn't seem in line with the way ingest processing usually handles errors.", + "pr_file_module": null + }, + { + "comment_id": "2034669550", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 122491, + "pr_file": "modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/CefParser.java", + "discussion_id": "2033704333", + "commented_code": "@@ -0,0 +1,565 @@\n+/*\n+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one\n+ * or more contributor license agreements. Licensed under the \"Elastic License\n+ * 2.0\", the \"GNU Affero General Public License v3.0 only\", and the \"Server Side\n+ * Public License v 1\"; you may not use this file except in compliance with, at\n+ * your election, the \"Elastic License 2.0\", the \"GNU Affero General Public\n+ * License v3.0 only\", or the \"Server Side Public License, v 1\".\n+ */\n+package org.elasticsearch.ingest.common;\n+\n+import org.elasticsearch.common.Strings;\n+import org.elasticsearch.common.network.InetAddresses;\n+import org.elasticsearch.common.network.NetworkAddress;\n+import org.elasticsearch.common.time.DateFormatters;\n+import org.elasticsearch.core.Nullable;\n+\n+import java.time.Instant;\n+import java.time.LocalDate;\n+import java.time.ZoneId;\n+import java.time.ZoneOffset;\n+import java.time.ZonedDateTime;\n+import java.time.format.DateTimeFormatter;\n+import java.time.format.DateTimeParseException;\n+import java.time.temporal.ChronoField;\n+import java.time.temporal.TemporalAccessor;\n+import java.time.temporal.WeekFields;\n+import java.util.ArrayList;\n+import java.util.HashMap;\n+import java.util.HashSet;\n+import java.util.List;\n+import java.util.Locale;\n+import java.util.Map;\n+import java.util.Objects;\n+import java.util.Set;\n+import java.util.regex.Matcher;\n+import java.util.regex.Pattern;\n+\n+import static java.time.temporal.ChronoField.DAY_OF_MONTH;\n+import static java.time.temporal.ChronoField.HOUR_OF_DAY;\n+import static java.time.temporal.ChronoField.MINUTE_OF_DAY;\n+import static java.time.temporal.ChronoField.MONTH_OF_YEAR;\n+import static java.time.temporal.ChronoField.NANO_OF_SECOND;\n+import static java.time.temporal.ChronoField.SECOND_OF_DAY;\n+import static java.util.Map.entry;\n+import static org.elasticsearch.ingest.common.CefParser.DataType.DoubleType;\n+import static org.elasticsearch.ingest.common.CefParser.DataType.FloatType;\n+import static org.elasticsearch.ingest.common.CefParser.DataType.IPType;\n+import static org.elasticsearch.ingest.common.CefParser.DataType.IntegerType;\n+import static org.elasticsearch.ingest.common.CefParser.DataType.LongType;\n+import static org.elasticsearch.ingest.common.CefParser.DataType.MACAddressType;\n+import static org.elasticsearch.ingest.common.CefParser.DataType.StringType;\n+import static org.elasticsearch.ingest.common.CefParser.DataType.TimestampType;\n+\n+final class CefParser {\n+ private final boolean removeEmptyValues;\n+ private final ZoneId timezone;\n+\n+ CefParser(ZoneId timezone, boolean removeEmptyValues) {\n+ this.removeEmptyValues = removeEmptyValues;\n+ this.timezone = timezone;\n+ }\n+\n+ private static final Pattern HEADER_PATTERN = Pattern.compile(\"(?:\\\\\\\\\\\\||\\\\\\\\\\\\\\\\|[^|])*?\");\n+ private static final Pattern HEADER_NEXT_FIELD_PATTERN = Pattern.compile(\"(\" + HEADER_PATTERN.pattern() + \")\\\\|\");\n+ private static final Pattern HEADER_ESCAPE_CAPTURE = Pattern.compile(\"\\\\\\\\([\\\\\\\\|])\");\n+\n+ // New patterns for extension parsing\n+ private static final String EXTENSION_KEY_PATTERN = \"(?:[\\\\w-]+(?:\\\\.[^\\\\.=\\\\s\\\\|\\\\\\\\\\\\[\\\\]]+)*(?:\\\\[[0-9]+\\\\])?(?==))\";\n+ private static final Pattern EXTENSION_KEY_ARRAY_CAPTURE = Pattern.compile(\"^([^\\\\[\\\\]]+)((?:\\\\[[0-9]+\\\\])+)$\");\n+ private static final String EXTENSION_VALUE_PATTERN = \"(?:[^\\\\s\\\\\\\\]|\\\\\\\\[^|]|\\\\s(?!\" + EXTENSION_KEY_PATTERN + \"=))*\";\n+ private static final Pattern EXTENSION_NEXT_KEY_VALUE_PATTERN = Pattern.compile(\n+ \"(\" + EXTENSION_KEY_PATTERN + \")=(\" + EXTENSION_VALUE_PATTERN + \")(?:\\\\s+|$)\"\n+ );\n+\n+ // Comprehensive regex pattern to match various MAC address formats\n+ private static final String MAC_ADDRESS_REGEX = \"^(\" +\n+ // Combined colon and hyphen separated 6-group patterns\n+ \"(([0-9A-Fa-f]{2}[:|-]){5}[0-9A-Fa-f]{2})|\" +\n+ // Dot-separated 6-group pattern\n+ \"([0-9A-Fa-f]{4}\\\\.){2}[0-9A-Fa-f]{4}|\" +\n+ // Combined colon and hyphen separated 8-group patterns\n+ \"([0-9A-Fa-f]{2}[:|-]){7}[0-9A-Fa-f]{2}|\" +\n+ // Dot-separated EUI-64\n+ \"([0-9A-Fa-f]{4}\\\\.){3}[0-9A-Fa-f]{4}\" + \")$\";\n+\n+ private static final Pattern MAC_ADDRESS_PATTERN = Pattern.compile(MAC_ADDRESS_REGEX);\n+ private static final int EUI48_HEX_LENGTH = 48 / 4;\n+ private static final int EUI64_HEX_LENGTH = 64 / 4;\n+ private static final int EUI64_HEX_WITH_SEPARATOR_MAX_LENGTH = EUI64_HEX_LENGTH + EUI64_HEX_LENGTH / 2 - 1;\n+ private static final Map EXTENSION_VALUE_SANITIZER_REVERSE_MAPPING = Map.ofEntries(\n+ entry(\"\\\\\\\\\", \"\\\\\"),\n+ entry(\"\\\\=\", \"=\"),\n+ entry(\"\\\\\\n\", \"\\n\"),\n+ entry(\"\\\\\\r\", \"\\r\")\n+ );\n+\n+ enum DataType {\n+ IntegerType,\n+ LongType,\n+ FloatType,\n+ DoubleType,\n+ StringType,\n+ BooleanType,\n+ IPType,\n+ MACAddressType,\n+ TimestampType\n+ }\n+\n+ private static final Map EXTENSION_MAPPINGS = Map.ofEntries(\n+ entry(\"agt\", new ExtensionMapping(\"agentAddress\", IPType, \"agent.ip\")),\n+ entry(\"agentDnsDomain\", new ExtensionMapping(\"agentDnsDomain\", StringType, \"agent.name\")),\n+ entry(\"ahost\", new ExtensionMapping(\"agentHostName\", StringType, \"agent.name\")),\n+ entry(\"aid\", new ExtensionMapping(\"agentId\", StringType, \"agent.id\")),\n+ entry(\"amac\", new ExtensionMapping(\"agentMacAddress\", MACAddressType, \"agent.mac\")),\n+ entry(\"agentNtDomain\", new ExtensionMapping(\"agentNtDomain\", StringType, null)),\n+ entry(\"art\", new ExtensionMapping(\"agentReceiptTime\", TimestampType, \"event.created\")),\n+ entry(\"atz\", new ExtensionMapping(\"agentTimeZone\", StringType, null)),\n+ entry(\"agentTranslatedAddress\", new ExtensionMapping(\"agentTranslatedAddress\", IPType, null)),\n+ entry(\"agentTranslatedZoneExternalID\", new ExtensionMapping(\"agentTranslatedZoneExternalID\", StringType, null)),\n+ entry(\"agentTranslatedZoneURI\", new ExtensionMapping(\"agentTranslatedZoneURI\", StringType, null)),\n+ entry(\"at\", new ExtensionMapping(\"agentType\", StringType, \"agent.type\")),\n+ entry(\"av\", new ExtensionMapping(\"agentVersion\", StringType, \"agent.version\")),\n+ entry(\"agentZoneExternalID\", new ExtensionMapping(\"agentZoneExternalID\", StringType, null)),\n+ entry(\"agentZoneURI\", new ExtensionMapping(\"agentZoneURI\", StringType, null)),\n+ entry(\"app\", new ExtensionMapping(\"applicationProtocol\", StringType, \"network.protocol\")),\n+ entry(\"cnt\", new ExtensionMapping(\"baseEventCount\", IntegerType, null)),\n+ entry(\"in\", new ExtensionMapping(\"bytesIn\", LongType, \"source.bytes\")),\n+ entry(\"out\", new ExtensionMapping(\"bytesOut\", LongType, \"destination.bytes\")),\n+ entry(\"customerExternalID\", new ExtensionMapping(\"customerExternalID\", StringType, \"organization.id\")),\n+ entry(\"customerURI\", new ExtensionMapping(\"customerURI\", StringType, \"organization.name\")),\n+ entry(\"dst\", new ExtensionMapping(\"destinationAddress\", IPType, \"destination.ip\")),\n+ entry(\"destinationDnsDomain\", new ExtensionMapping(\"destinationDnsDomain\", StringType, \"destination.registered_domain\")),\n+ entry(\"dlat\", new ExtensionMapping(\"destinationGeoLatitude\", DoubleType, \"destination.geo.location.lat\")),\n+ entry(\"dlong\", new ExtensionMapping(\"destinationGeoLongitude\", DoubleType, \"destination.geo.location.lon\")),\n+ entry(\"dhost\", new ExtensionMapping(\"destinationHostName\", StringType, \"destination.domain\")),\n+ entry(\"dmac\", new ExtensionMapping(\"destinationMacAddress\", MACAddressType, \"destination.mac\")),\n+ entry(\"dntdom\", new ExtensionMapping(\"destinationNtDomain\", StringType, \"destination.registered_domain\")),\n+ entry(\"dpt\", new ExtensionMapping(\"destinationPort\", IntegerType, \"destination.port\")),\n+ entry(\"dpid\", new ExtensionMapping(\"destinationProcessId\", LongType, \"destination.process.pid\")),\n+ entry(\"dproc\", new ExtensionMapping(\"destinationProcessName\", StringType, \"destination.process.name\")),\n+ entry(\"destinationServiceName\", new ExtensionMapping(\"destinationServiceName\", StringType, \"destination.service.name\")),\n+ entry(\"destinationTranslatedAddress\", new ExtensionMapping(\"destinationTranslatedAddress\", IPType, \"destination.nat.ip\")),\n+ entry(\"destinationTranslatedPort\", new ExtensionMapping(\"destinationTranslatedPort\", IntegerType, \"destination.nat.port\")),\n+ entry(\"destinationTranslatedZoneExternalID\", new ExtensionMapping(\"destinationTranslatedZoneExternalID\", StringType, null)),\n+ entry(\"destinationTranslatedZoneURI\", new ExtensionMapping(\"destinationTranslatedZoneURI\", StringType, null)),\n+ entry(\"duid\", new ExtensionMapping(\"destinationUserId\", StringType, \"destination.user.id\")),\n+ entry(\"duser\", new ExtensionMapping(\"destinationUserName\", StringType, \"destination.user.name\")),\n+ entry(\"dpriv\", new ExtensionMapping(\"destinationUserPrivileges\", StringType, \"destination.user.group.name\")),\n+ entry(\"destinationZoneExternalID\", new ExtensionMapping(\"destinationZoneExternalID\", StringType, null)),\n+ entry(\"destinationZoneURI\", new ExtensionMapping(\"destinationZoneURI\", StringType, null)),\n+ entry(\"act\", new ExtensionMapping(\"deviceAction\", StringType, \"event.action\")),\n+ entry(\"dvc\", new ExtensionMapping(\"deviceAddress\", IPType, \"observer.ip\")),\n+ entry(\"cfp1Label\", new ExtensionMapping(\"deviceCustomFloatingPoint1Label\", StringType, null)),\n+ entry(\"cfp3Label\", new ExtensionMapping(\"deviceCustomFloatingPoint3Label\", StringType, null)),\n+ entry(\"cfp4Label\", new ExtensionMapping(\"deviceCustomFloatingPoint4Label\", StringType, null)),\n+ entry(\"deviceCustomDate1\", new ExtensionMapping(\"deviceCustomDate1\", TimestampType, null)),\n+ entry(\"deviceCustomDate1Label\", new ExtensionMapping(\"deviceCustomDate1Label\", StringType, null)),\n+ entry(\"deviceCustomDate2\", new ExtensionMapping(\"deviceCustomDate2\", TimestampType, null)),\n+ entry(\"deviceCustomDate2Label\", new ExtensionMapping(\"deviceCustomDate2Label\", StringType, null)),\n+ entry(\"cfp1\", new ExtensionMapping(\"deviceCustomFloatingPoint1\", FloatType, null)),\n+ entry(\"cfp2\", new ExtensionMapping(\"deviceCustomFloatingPoint2\", FloatType, null)),\n+ entry(\"cfp2Label\", new ExtensionMapping(\"deviceCustomFloatingPoint2Label\", StringType, null)),\n+ entry(\"cfp3\", new ExtensionMapping(\"deviceCustomFloatingPoint3\", FloatType, null)),\n+ entry(\"cfp4\", new ExtensionMapping(\"deviceCustomFloatingPoint4\", FloatType, null)),\n+ entry(\"c6a1\", new ExtensionMapping(\"deviceCustomIPv6Address1\", IPType, null)),\n+ entry(\"c6a1Label\", new ExtensionMapping(\"deviceCustomIPv6Address1Label\", StringType, null)),\n+ entry(\"c6a2\", new ExtensionMapping(\"deviceCustomIPv6Address2\", IPType, null)),\n+ entry(\"c6a2Label\", new ExtensionMapping(\"deviceCustomIPv6Address2Label\", StringType, null)),\n+ entry(\"c6a3\", new ExtensionMapping(\"deviceCustomIPv6Address3\", IPType, null)),\n+ entry(\"c6a3Label\", new ExtensionMapping(\"deviceCustomIPv6Address3Label\", StringType, null)),\n+ entry(\"c6a4\", new ExtensionMapping(\"deviceCustomIPv6Address4\", IPType, null)),\n+ entry(\"C6a4Label\", new ExtensionMapping(\"deviceCustomIPv6Address4Label\", StringType, null)),\n+ entry(\"cn1\", new ExtensionMapping(\"deviceCustomNumber1\", LongType, null)),\n+ entry(\"cn1Label\", new ExtensionMapping(\"deviceCustomNumber1Label\", StringType, null)),\n+ entry(\"cn2\", new ExtensionMapping(\"deviceCustomNumber2\", LongType, null)),\n+ entry(\"cn2Label\", new ExtensionMapping(\"deviceCustomNumber2Label\", StringType, null)),\n+ entry(\"cn3\", new ExtensionMapping(\"deviceCustomNumber3\", LongType, null)),\n+ entry(\"cn3Label\", new ExtensionMapping(\"deviceCustomNumber3Label\", StringType, null)),\n+ entry(\"cs1\", new ExtensionMapping(\"deviceCustomString1\", StringType, null)),\n+ entry(\"cs1Label\", new ExtensionMapping(\"deviceCustomString1Label\", StringType, null)),\n+ entry(\"cs2\", new ExtensionMapping(\"deviceCustomString2\", StringType, null)),\n+ entry(\"cs2Label\", new ExtensionMapping(\"deviceCustomString2Label\", StringType, null)),\n+ entry(\"cs3\", new ExtensionMapping(\"deviceCustomString3\", StringType, null)),\n+ entry(\"cs3Label\", new ExtensionMapping(\"deviceCustomString3Label\", StringType, null)),\n+ entry(\"cs4\", new ExtensionMapping(\"deviceCustomString4\", StringType, null)),\n+ entry(\"cs4Label\", new ExtensionMapping(\"deviceCustomString4Label\", StringType, null)),\n+ entry(\"cs5\", new ExtensionMapping(\"deviceCustomString5\", StringType, null)),\n+ entry(\"cs5Label\", new ExtensionMapping(\"deviceCustomString5Label\", StringType, null)),\n+ entry(\"cs6\", new ExtensionMapping(\"deviceCustomString6\", StringType, null)),\n+ entry(\"cs6Label\", new ExtensionMapping(\"deviceCustomString6Label\", StringType, null)),\n+ entry(\"deviceDirection\", new ExtensionMapping(\"deviceDirection\", StringType, \"network.direction\")),\n+ entry(\"deviceDnsDomain\", new ExtensionMapping(\"deviceDnsDomain\", StringType, \"observer.registered_domain\")),\n+ entry(\"cat\", new ExtensionMapping(\"deviceEventCategory\", StringType, null)),\n+ entry(\"deviceExternalId\", new ExtensionMapping(\"deviceExternalId\", StringType, \"observer.name\")),\n+ entry(\"deviceFacility\", new ExtensionMapping(\"deviceFacility\", LongType, \"log.syslog.facility.code\")),\n+ entry(\"dvchost\", new ExtensionMapping(\"deviceHostName\", StringType, \"observer.hostname\")),\n+ entry(\"deviceInboundInterface\", new ExtensionMapping(\"deviceInboundInterface\", StringType, \"observer.ingress.interface.name\")),\n+ entry(\"dvcmac\", new ExtensionMapping(\"deviceMacAddress\", MACAddressType, \"observer.mac\")),\n+ entry(\"deviceNtDomain\", new ExtensionMapping(\"deviceNtDomain\", StringType, null)),\n+ entry(\"deviceOutboundInterface\", new ExtensionMapping(\"deviceOutboundInterface\", StringType, \"observer.egress.interface.name\")),\n+ entry(\"devicePayloadId\", new ExtensionMapping(\"devicePayloadId\", StringType, \"event.id\")),\n+ entry(\"dvcpid\", new ExtensionMapping(\"deviceProcessId\", LongType, \"process.pid\")),\n+ entry(\"deviceProcessName\", new ExtensionMapping(\"deviceProcessName\", StringType, \"process.name\")),\n+ entry(\"rt\", new ExtensionMapping(\"deviceReceiptTime\", TimestampType, \"@timestamp\")),\n+ entry(\"dtz\", new ExtensionMapping(\"deviceTimeZone\", StringType, \"event.timezone\")),\n+ entry(\"deviceTranslatedAddress\", new ExtensionMapping(\"deviceTranslatedAddress\", IPType, \"host.nat.ip\")),\n+ entry(\"deviceTranslatedZoneExternalID\", new ExtensionMapping(\"deviceTranslatedZoneExternalID\", StringType, null)),\n+ entry(\"deviceTranslatedZoneURI\", new ExtensionMapping(\"deviceTranslatedZoneURI\", StringType, null)),\n+ entry(\"deviceZoneExternalID\", new ExtensionMapping(\"deviceZoneExternalID\", StringType, null)),\n+ entry(\"deviceZoneURI\", new ExtensionMapping(\"deviceZoneURI\", StringType, null)),\n+ entry(\"end\", new ExtensionMapping(\"endTime\", TimestampType, \"event.end\")),\n+ entry(\"eventId\", new ExtensionMapping(\"eventId\", StringType, \"event.id\")),\n+ entry(\"outcome\", new ExtensionMapping(\"eventOutcome\", StringType, \"event.outcome\")),\n+ entry(\"externalId\", new ExtensionMapping(\"externalId\", StringType, null)),\n+ entry(\"fileCreateTime\", new ExtensionMapping(\"fileCreateTime\", TimestampType, \"file.created\")),\n+ entry(\"fileHash\", new ExtensionMapping(\"fileHash\", StringType, \"file.hash\")),\n+ entry(\"fileId\", new ExtensionMapping(\"fileId\", StringType, \"file.inode\")),\n+ entry(\"fileModificationTime\", new ExtensionMapping(\"fileModificationTime\", TimestampType, \"file.mtime\")),\n+ entry(\"flexNumber1\", new ExtensionMapping(\"deviceFlexNumber1\", LongType, null)),\n+ entry(\"flexNumber1Label\", new ExtensionMapping(\"deviceFlexNumber1Label\", StringType, null)),\n+ entry(\"flexNumber2\", new ExtensionMapping(\"deviceFlexNumber2\", LongType, null)),\n+ entry(\"flexNumber2Label\", new ExtensionMapping(\"deviceFlexNumber2Label\", StringType, null)),\n+ entry(\"fname\", new ExtensionMapping(\"filename\", StringType, \"file.name\")),\n+ entry(\"filePath\", new ExtensionMapping(\"filePath\", StringType, \"file.path\")),\n+ entry(\"filePermission\", new ExtensionMapping(\"filePermission\", StringType, \"file.group\")),\n+ entry(\"fsize\", new ExtensionMapping(\"fileSize\", LongType, \"file.size\")),\n+ entry(\"fileType\", new ExtensionMapping(\"fileType\", StringType, \"file.type\")),\n+ entry(\"flexDate1\", new ExtensionMapping(\"flexDate1\", TimestampType, null)),\n+ entry(\"flexDate1Label\", new ExtensionMapping(\"flexDate1Label\", StringType, null)),\n+ entry(\"flexString1\", new ExtensionMapping(\"flexString1\", StringType, null)),\n+ entry(\"flexString2\", new ExtensionMapping(\"flexString2\", StringType, null)),\n+ entry(\"flexString1Label\", new ExtensionMapping(\"flexString1Label\", StringType, null)),\n+ entry(\"flexString2Label\", new ExtensionMapping(\"flexString2Label\", StringType, null)),\n+ entry(\"msg\", new ExtensionMapping(\"message\", StringType, \"message\")),\n+ entry(\"oldFileCreateTime\", new ExtensionMapping(\"oldFileCreateTime\", TimestampType, null)),\n+ entry(\"oldFileHash\", new ExtensionMapping(\"oldFileHash\", StringType, null)),\n+ entry(\"oldFileId\", new ExtensionMapping(\"oldFileId\", StringType, null)),\n+ entry(\"oldFileModificationTime\", new ExtensionMapping(\"oldFileModificationTime\", TimestampType, null)),\n+ entry(\"oldFileName\", new ExtensionMapping(\"oldFileName\", StringType, null)),\n+ entry(\"oldFilePath\", new ExtensionMapping(\"oldFilePath\", StringType, null)),\n+ entry(\"oldFilePermission\", new ExtensionMapping(\"oldFilePermission\", StringType, null)),\n+ entry(\"oldFileSize\", new ExtensionMapping(\"oldFileSize\", IntegerType, null)),\n+ entry(\"oldFileType\", new ExtensionMapping(\"oldFileType\", StringType, null)),\n+ entry(\"rawEvent\", new ExtensionMapping(\"rawEvent\", StringType, \"event.original\")),\n+ entry(\"reason\", new ExtensionMapping(\"Reason\", StringType, \"event.reason\")),\n+ entry(\"requestClientApplication\", new ExtensionMapping(\"requestClientApplication\", StringType, \"user_agent.original\")),\n+ entry(\"requestContext\", new ExtensionMapping(\"requestContext\", StringType, \"http.request.referrer\")),\n+ entry(\"requestCookies\", new ExtensionMapping(\"requestCookies\", StringType, null)),\n+ entry(\"requestMethod\", new ExtensionMapping(\"requestMethod\", StringType, \"http.request.method\")),\n+ entry(\"request\", new ExtensionMapping(\"requestUrl\", StringType, \"url.original\")),\n+ entry(\"src\", new ExtensionMapping(\"sourceAddress\", IPType, \"source.ip\")),\n+ entry(\"sourceDnsDomain\", new ExtensionMapping(\"sourceDnsDomain\", StringType, \"source.domain\")),\n+ entry(\"slat\", new ExtensionMapping(\"sourceGeoLatitude\", DoubleType, \"source.geo.location.lat\")),\n+ entry(\"slong\", new ExtensionMapping(\"sourceGeoLongitude\", DoubleType, \"source.geo.location.lon\")),\n+ entry(\"shost\", new ExtensionMapping(\"sourceHostName\", StringType, \"source.domain\")),\n+ entry(\"smac\", new ExtensionMapping(\"sourceMacAddress\", MACAddressType, \"source.mac\")),\n+ entry(\"sntdom\", new ExtensionMapping(\"sourceNtDomain\", StringType, \"source.registered_domain\")),\n+ entry(\"spt\", new ExtensionMapping(\"sourcePort\", IntegerType, \"source.port\")),\n+ entry(\"spid\", new ExtensionMapping(\"sourceProcessId\", LongType, \"source.process.pid\")),\n+ entry(\"sproc\", new ExtensionMapping(\"sourceProcessName\", StringType, \"source.process.name\")),\n+ entry(\"sourceServiceName\", new ExtensionMapping(\"sourceServiceName\", StringType, \"source.service.name\")),\n+ entry(\"sourceTranslatedAddress\", new ExtensionMapping(\"sourceTranslatedAddress\", IPType, \"source.nat.ip\")),\n+ entry(\"sourceTranslatedPort\", new ExtensionMapping(\"sourceTranslatedPort\", IntegerType, \"source.nat.port\")),\n+ entry(\"sourceTranslatedZoneExternalID\", new ExtensionMapping(\"sourceTranslatedZoneExternalID\", StringType, null)),\n+ entry(\"sourceTranslatedZoneURI\", new ExtensionMapping(\"sourceTranslatedZoneURI\", StringType, null)),\n+ entry(\"suid\", new ExtensionMapping(\"sourceUserId\", StringType, \"source.user.id\")),\n+ entry(\"suser\", new ExtensionMapping(\"sourceUserName\", StringType, \"source.user.name\")),\n+ entry(\"spriv\", new ExtensionMapping(\"sourceUserPrivileges\", StringType, \"source.user.group.name\")),\n+ entry(\"sourceZoneExternalID\", new ExtensionMapping(\"sourceZoneExternalID\", StringType, null)),\n+ entry(\"sourceZoneURI\", new ExtensionMapping(\"sourceZoneURI\", StringType, null)),\n+ entry(\"start\", new ExtensionMapping(\"startTime\", TimestampType, \"event.start\")),\n+ entry(\"proto\", new ExtensionMapping(\"transportProtocol\", StringType, \"network.transport\")),\n+ entry(\"type\", new ExtensionMapping(\"type\", IntegerType, \"event.kind\")),\n+ entry(\"catdt\", new ExtensionMapping(\"categoryDeviceType\", StringType, null)),\n+ entry(\"mrt\", new ExtensionMapping(\"managerReceiptTime\", TimestampType, \"event.ingested\"))\n+ );\n+\n+ private static final Set ERROR_MESSAGE_INCOMPLETE_CEF_HEADER = Set.of(\"incomplete CEF header\");\n+ private static final List TIME_LAYOUTS = List.of(\n+ // MMM dd HH:mm:ss.SSS zzz\n+ \"MMM dd HH:mm:ss.SSS z\",\n+ \"MMM dd HH:mm:ss.SSS Z\",\n+ \"MMM dd HH:mm:ss.SSS 'GMT'XX:XX\",\n+ // MMM dd HH:mm:sss.SSS\n+ \"MMM dd HH:mm:ss.SSS\",\n+ // MMM dd HH:mm:ss zzz\n+ \"MMM dd HH:mm:ss z\",\n+ \"MMM dd HH:mm:ss Z\",\n+ \"MMM dd HH:mm:ss 'GMT'XX:XX\",\n+ // MMM dd HH:mm:ss\n+ \"MMM dd HH:mm:ss\",\n+ // MMM dd yyyy HH:mm:ss.SSS zzz\n+ \"MMM dd yyyy HH:mm:ss.SSS z\",\n+ \"MMM dd yyyy HH:mm:ss.SSS Z\",\n+ \"MMM dd yyyy HH:mm:ss.SSS 'GMT'XX:XX\",\n+ // MMM dd yyyy HH:mm:ss.SSS\n+ \"MMM dd yyyy HH:mm:ss.SSS\",\n+ // MMM dd yyyy HH:mm:ss zzz\n+ \"MMM dd yyyy HH:mm:ss z\",\n+ \"MMM dd yyyy HH:mm:ss Z\",\n+ \"MMM dd yyyy HH:mm:ss 'GMT'XX:XX\",\n+ // MMM dd yyyy HH:mm:ss\n+ \"MMM dd yyyy HH:mm:ss\"\n+ );\n+\n+ private static final List CHRONO_FIELDS = List.of(\n+ NANO_OF_SECOND,\n+ SECOND_OF_DAY,\n+ MINUTE_OF_DAY,\n+ HOUR_OF_DAY,\n+ DAY_OF_MONTH,\n+ MONTH_OF_YEAR\n+ );\n+\n+ CefEvent process(String cefString) {\n+ List headers = new ArrayList<>();\n+ Matcher matcher = HEADER_NEXT_FIELD_PATTERN.matcher(cefString);\n+ int extensionStart = 0;\n+\n+ for (int i = 0; i < 7 && matcher.find(); i++) {\n+ String field = matcher.group(1);\n+ field = HEADER_ESCAPE_CAPTURE.matcher(field).replaceAll(\"$1\");\n+ headers.add(field);\n+ extensionStart = matcher.end();\n+ }\n+\n+ if (headers.isEmpty() == false && headers.getFirst().startsWith(\"CEF:\")) {\n+ CefEvent event = new CefEvent();\n+ // Add error message if there are not enough header fields\n+ if (headers.size() != 7) {\n+ event.addRootMapping(\"error.message\", new HashSet<>(ERROR_MESSAGE_INCOMPLETE_CEF_HEADER));", + "comment_created_at": "2025-04-09T07:27:13+00:00", + "comment_author": "bhapas", + "comment_body": "I followed beats approach for this initially. But I agree that this should just throw exception just like other scenarios. - Fixed in https://github.com/elastic/elasticsearch/pull/122491/commits/ee969db097a7d4dfb50e014975ec524d5dc478af", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1199069767", + "pr_number": 95941, + "pr_file": "server/src/main/java/org/elasticsearch/action/support/TimeoutReleasableListener.java", + "created_at": "2023-05-19T15:03:10+00:00", + "commented_code": "/*\n * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one\n * or more contributor license agreements. Licensed under the Elastic License\n * 2.0 and the Server Side Public License, v 1; you may not use this file except\n * in compliance with, at your election, the Elastic License 2.0 or the Server\n * Side Public License, v 1.\n */\n\npackage org.elasticsearch.action.support;\n\nimport org.elasticsearch.ElasticsearchTimeoutException;\nimport org.elasticsearch.action.ActionListener;\nimport org.elasticsearch.core.Releasable;\nimport org.elasticsearch.core.Releasables;\nimport org.elasticsearch.core.TimeValue;\nimport org.elasticsearch.threadpool.ThreadPool;\n\nimport java.util.Objects;\nimport java.util.concurrent.atomic.AtomicReference;\nimport java.util.function.Supplier;\n\n/**\n * A listener for a {@link Releasable} which races against a timeout, but which does not leak any {@link Releasable} with which it is\n * eventually completed.\n */\npublic class TimeoutReleasableListener implements ActionListener {\n\n private final Supplier> listenerSupplier;\n private final Runnable timeoutCanceller;\n\n public static TimeoutReleasableListener create(\n ActionListener delegate,\n TimeValue timeout,\n ThreadPool threadPool,\n String timeoutExecutor\n ) {\n final var listenerSupplier = readOnce(delegate);\n return new TimeoutReleasableListener(listenerSupplier, scheduleTimeout(timeout, threadPool, timeoutExecutor, listenerSupplier));\n }\n\n private static Supplier> readOnce(ActionListener listener) {\n final var listenerRef = new AtomicReference<>(ActionListener.assertOnce(listener));\n return new Supplier<>() {\n @Override\n public ActionListener get() {\n return listenerRef.getAndSet(null);\n }\n\n @Override\n public String toString() {\n return Objects.toString(listenerRef.get());\n }\n };\n }\n\n private static Runnable scheduleTimeout(\n TimeValue timeout,\n ThreadPool threadPool,\n String timeoutExecutor,\n Supplier> listenerSupplier\n ) {\n try {\n final var cancellable = threadPool.schedule(() -> {\n final var listener = listenerSupplier.get();\n if (listener != null) {\n listener.onFailure(new ElasticsearchTimeoutException(\"timed out after [\" + timeout + \"/\" + timeout.millis() + \"ms]\"));\n }\n }, timeout, timeoutExecutor);\n return () -> {\n try {\n cancellable.cancel();\n } catch (Exception e) {\n // should not happen, we cannot reasonably do anything with an exception here anyway\n assert false : e;\n }\n };\n } catch (Exception e) {\n listenerSupplier.get().onFailure(e);\n return () -> {};", + "repo_full_name": "elastic/elasticsearch", + "discussion_comments": [ + { + "comment_id": "1199069767", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 95941, + "pr_file": "server/src/main/java/org/elasticsearch/action/support/TimeoutReleasableListener.java", + "discussion_id": "1199069767", + "commented_code": "@@ -0,0 +1,112 @@\n+/*\n+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one\n+ * or more contributor license agreements. Licensed under the Elastic License\n+ * 2.0 and the Server Side Public License, v 1; you may not use this file except\n+ * in compliance with, at your election, the Elastic License 2.0 or the Server\n+ * Side Public License, v 1.\n+ */\n+\n+package org.elasticsearch.action.support;\n+\n+import org.elasticsearch.ElasticsearchTimeoutException;\n+import org.elasticsearch.action.ActionListener;\n+import org.elasticsearch.core.Releasable;\n+import org.elasticsearch.core.Releasables;\n+import org.elasticsearch.core.TimeValue;\n+import org.elasticsearch.threadpool.ThreadPool;\n+\n+import java.util.Objects;\n+import java.util.concurrent.atomic.AtomicReference;\n+import java.util.function.Supplier;\n+\n+/**\n+ * A listener for a {@link Releasable} which races against a timeout, but which does not leak any {@link Releasable} with which it is\n+ * eventually completed.\n+ */\n+public class TimeoutReleasableListener implements ActionListener {\n+\n+ private final Supplier> listenerSupplier;\n+ private final Runnable timeoutCanceller;\n+\n+ public static TimeoutReleasableListener create(\n+ ActionListener delegate,\n+ TimeValue timeout,\n+ ThreadPool threadPool,\n+ String timeoutExecutor\n+ ) {\n+ final var listenerSupplier = readOnce(delegate);\n+ return new TimeoutReleasableListener(listenerSupplier, scheduleTimeout(timeout, threadPool, timeoutExecutor, listenerSupplier));\n+ }\n+\n+ private static Supplier> readOnce(ActionListener listener) {\n+ final var listenerRef = new AtomicReference<>(ActionListener.assertOnce(listener));\n+ return new Supplier<>() {\n+ @Override\n+ public ActionListener get() {\n+ return listenerRef.getAndSet(null);\n+ }\n+\n+ @Override\n+ public String toString() {\n+ return Objects.toString(listenerRef.get());\n+ }\n+ };\n+ }\n+\n+ private static Runnable scheduleTimeout(\n+ TimeValue timeout,\n+ ThreadPool threadPool,\n+ String timeoutExecutor,\n+ Supplier> listenerSupplier\n+ ) {\n+ try {\n+ final var cancellable = threadPool.schedule(() -> {\n+ final var listener = listenerSupplier.get();\n+ if (listener != null) {\n+ listener.onFailure(new ElasticsearchTimeoutException(\"timed out after [\" + timeout + \"/\" + timeout.millis() + \"ms]\"));\n+ }\n+ }, timeout, timeoutExecutor);\n+ return () -> {\n+ try {\n+ cancellable.cancel();\n+ } catch (Exception e) {\n+ // should not happen, we cannot reasonably do anything with an exception here anyway\n+ assert false : e;\n+ }\n+ };\n+ } catch (Exception e) {\n+ listenerSupplier.get().onFailure(e);\n+ return () -> {};", + "comment_created_at": "2023-05-19T15:03:10+00:00", + "comment_author": "henningandersen", + "comment_body": "I wonder about this. I suppose it only happens during shutdown? Can we perhaps assert that?\r\n\r\nIn case we do get here, it seems odd that we try to block operations afterwards. Probably not a big deal if it only happens on shutdown.\r\n\r\nTo some extent we could just throw here? Or change `TimeoutReleasableListener.create` into a `run` method accepting the code to run under timeout (a `Consumer SOAR_MIN_DISTANCE) {\n soar = ESVectorUtil.soarDistance(vector, neighborCentroid, diffs, soarLambda, vectorCentroidDist);\n } else {\n // if the vector is very close to the centroid, we look for the second-nearest centroid\n soar = VectorUtil.squareDistance(vector, neighborCentroid);\n }\n if (soar < minSoar) {\n bestAssignment = neighbor;\n minSoar = soar;\n }\n }\n\n assert bestAssignment != -1 : \"Failed to assign soar vector to centroid\";", - "repo_full_name": "elastic/elasticsearch", - "discussion_comments": [ - { - "comment_id": "2172022369", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 130206, - "pr_file": "server/src/main/java/org/elasticsearch/index/codec/vectors/cluster/KMeansLocal.java", - "discussion_id": "2172022369", - "commented_code": "@@ -207,13 +215,19 @@ private int[] assignSpilled(FloatVectorValues vectors, List neighborhoods\n continue;\n }\n float[] neighborCentroid = centroids[neighbor];\n- float soar = ESVectorUtil.soarDistance(vector, neighborCentroid, diffs, soarLambda, vectorCentroidDist);\n+ final float soar;\n+ if (vectorCentroidDist > SOAR_MIN_DISTANCE) {\n+ soar = ESVectorUtil.soarDistance(vector, neighborCentroid, diffs, soarLambda, vectorCentroidDist);\n+ } else {\n+ // if the vector is very close to the centroid, we look for the second-nearest centroid\n+ soar = VectorUtil.squareDistance(vector, neighborCentroid);\n+ }\n if (soar < minSoar) {\n bestAssignment = neighbor;\n minSoar = soar;\n }\n }\n-\n+ assert bestAssignment != -1 : \"Failed to assign soar vector to centroid\";", - "comment_created_at": "2025-06-27T13:12:53+00:00", - "comment_author": "Copilot", - "comment_body": "Using an assertion to handle the unassigned bestAssignment might fail silently in production if assertions are disabled. Consider explicitly handling this error case (e.g., by throwing an exception) to ensure that assignment failures are caught.\n```suggestion\n if (bestAssignment == -1) {\n throw new IllegalStateException(\"Failed to assign soar vector to centroid\");\n }\n```", - "pr_file_module": null - }, - { - "comment_id": "2172025988", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 130206, - "pr_file": "server/src/main/java/org/elasticsearch/index/codec/vectors/cluster/KMeansLocal.java", - "discussion_id": "2172022369", - "commented_code": "@@ -207,13 +215,19 @@ private int[] assignSpilled(FloatVectorValues vectors, List neighborhoods\n continue;\n }\n float[] neighborCentroid = centroids[neighbor];\n- float soar = ESVectorUtil.soarDistance(vector, neighborCentroid, diffs, soarLambda, vectorCentroidDist);\n+ final float soar;\n+ if (vectorCentroidDist > SOAR_MIN_DISTANCE) {\n+ soar = ESVectorUtil.soarDistance(vector, neighborCentroid, diffs, soarLambda, vectorCentroidDist);\n+ } else {\n+ // if the vector is very close to the centroid, we look for the second-nearest centroid\n+ soar = VectorUtil.squareDistance(vector, neighborCentroid);\n+ }\n if (soar < minSoar) {\n bestAssignment = neighbor;\n minSoar = soar;\n }\n }\n-\n+ assert bestAssignment != -1 : \"Failed to assign soar vector to centroid\";", - "comment_created_at": "2025-06-27T13:14:55+00:00", - "comment_author": "benwtrent", - "comment_body": "This assert is fine. But we need to make sure that if we do accidentally set an invalid centroid, when going through the assignments later, we check for `-1`", - "pr_file_module": null - }, - { - "comment_id": "2172029264", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 130206, - "pr_file": "server/src/main/java/org/elasticsearch/index/codec/vectors/cluster/KMeansLocal.java", - "discussion_id": "2172022369", - "commented_code": "@@ -207,13 +215,19 @@ private int[] assignSpilled(FloatVectorValues vectors, List neighborhoods\n continue;\n }\n float[] neighborCentroid = centroids[neighbor];\n- float soar = ESVectorUtil.soarDistance(vector, neighborCentroid, diffs, soarLambda, vectorCentroidDist);\n+ final float soar;\n+ if (vectorCentroidDist > SOAR_MIN_DISTANCE) {\n+ soar = ESVectorUtil.soarDistance(vector, neighborCentroid, diffs, soarLambda, vectorCentroidDist);\n+ } else {\n+ // if the vector is very close to the centroid, we look for the second-nearest centroid\n+ soar = VectorUtil.squareDistance(vector, neighborCentroid);\n+ }\n if (soar < minSoar) {\n bestAssignment = neighbor;\n minSoar = soar;\n }\n }\n-\n+ assert bestAssignment != -1 : \"Failed to assign soar vector to centroid\";", - "comment_created_at": "2025-06-27T13:16:41+00:00", - "comment_author": "iverase", - "comment_body": "We are doing it as the code is working now, let see if we can add a test\r\n", - "pr_file_module": null - }, - { - "comment_id": "2172040204", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 130206, - "pr_file": "server/src/main/java/org/elasticsearch/index/codec/vectors/cluster/KMeansLocal.java", - "discussion_id": "2172022369", - "commented_code": "@@ -207,13 +215,19 @@ private int[] assignSpilled(FloatVectorValues vectors, List neighborhoods\n continue;\n }\n float[] neighborCentroid = centroids[neighbor];\n- float soar = ESVectorUtil.soarDistance(vector, neighborCentroid, diffs, soarLambda, vectorCentroidDist);\n+ final float soar;\n+ if (vectorCentroidDist > SOAR_MIN_DISTANCE) {\n+ soar = ESVectorUtil.soarDistance(vector, neighborCentroid, diffs, soarLambda, vectorCentroidDist);\n+ } else {\n+ // if the vector is very close to the centroid, we look for the second-nearest centroid\n+ soar = VectorUtil.squareDistance(vector, neighborCentroid);\n+ }\n if (soar < minSoar) {\n bestAssignment = neighbor;\n minSoar = soar;\n }\n }\n-\n+ assert bestAssignment != -1 : \"Failed to assign soar vector to centroid\";", - "comment_created_at": "2025-06-27T13:22:44+00:00", - "comment_author": "iverase", - "comment_body": "We don't really check but the way we create assignments per clusters (which is horrible slow and I want to change it) later on only take valid values so we can put in the array anything that will get ignored. ", - "pr_file_module": null - }, - { - "comment_id": "2172052178", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 130206, - "pr_file": "server/src/main/java/org/elasticsearch/index/codec/vectors/cluster/KMeansLocal.java", - "discussion_id": "2172022369", - "commented_code": "@@ -207,13 +215,19 @@ private int[] assignSpilled(FloatVectorValues vectors, List neighborhoods\n continue;\n }\n float[] neighborCentroid = centroids[neighbor];\n- float soar = ESVectorUtil.soarDistance(vector, neighborCentroid, diffs, soarLambda, vectorCentroidDist);\n+ final float soar;\n+ if (vectorCentroidDist > SOAR_MIN_DISTANCE) {\n+ soar = ESVectorUtil.soarDistance(vector, neighborCentroid, diffs, soarLambda, vectorCentroidDist);\n+ } else {\n+ // if the vector is very close to the centroid, we look for the second-nearest centroid\n+ soar = VectorUtil.squareDistance(vector, neighborCentroid);\n+ }\n if (soar < minSoar) {\n bestAssignment = neighbor;\n minSoar = soar;\n }\n }\n-\n+ assert bestAssignment != -1 : \"Failed to assign soar vector to centroid\";", - "comment_created_at": "2025-06-27T13:29:00+00:00", - "comment_author": "iverase", - "comment_body": "I pushed https://github.com/elastic/elasticsearch/pull/130206/commits/d26103a495bf0f83fbb2d28d1c00df2ad6b4ebe7 that is how I found this issue. We explicitly check for -1 in soar assignments.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1491692056", - "pr_number": 105558, - "pr_file": "server/src/main/java/org/elasticsearch/cluster/coordination/NodeJoinExecutor.java", - "created_at": "2024-02-15T21:27:47+00:00", - "commented_code": "} else {\n try {\n CompatibilityVersions compatibilityVersions = nodeJoinTask.compatibilityVersions();\n assert systemIndexVersionConsistent(compatibilityVersions.systemIndexMappingsVersion(), compatibilityVersionsMap);", - "repo_full_name": "elastic/elasticsearch", - "discussion_comments": [ - { - "comment_id": "1491692056", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 105558, - "pr_file": "server/src/main/java/org/elasticsearch/cluster/coordination/NodeJoinExecutor.java", - "discussion_id": "1491692056", - "commented_code": "@@ -158,6 +159,7 @@ public ClusterState execute(BatchExecutionContext batchExecutionContex\n } else {\n try {\n CompatibilityVersions compatibilityVersions = nodeJoinTask.compatibilityVersions();\n+ assert systemIndexVersionConsistent(compatibilityVersions.systemIndexMappingsVersion(), compatibilityVersionsMap);", - "comment_created_at": "2024-02-15T21:27:47+00:00", - "comment_author": "rjernst", - "comment_body": "Using assert means this won't run in production. Should we also ensure these match regardless of whether asserts are turned on?", - "pr_file_module": null - }, - { - "comment_id": "1491749688", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 105558, - "pr_file": "server/src/main/java/org/elasticsearch/cluster/coordination/NodeJoinExecutor.java", - "discussion_id": "1491692056", - "commented_code": "@@ -158,6 +159,7 @@ public ClusterState execute(BatchExecutionContext batchExecutionContex\n } else {\n try {\n CompatibilityVersions compatibilityVersions = nodeJoinTask.compatibilityVersions();\n+ assert systemIndexVersionConsistent(compatibilityVersions.systemIndexMappingsVersion(), compatibilityVersionsMap);", - "comment_created_at": "2024-02-15T22:39:30+00:00", - "comment_author": "williamrandolph", - "comment_body": "Definitely -- I put up this PR as a draft so I could run CI, work out a good test case, and figure out what we need to communicate to plugin authors. Once all that's done, I'll put this in an `ensureSystemIndexVersionConsistent` along the lines of the other checks in this block of code.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2033704333", - "pr_number": 122491, - "pr_file": "modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/CefParser.java", - "created_at": "2025-04-08T17:24:22+00:00", - "commented_code": "/*\n * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one\n * or more contributor license agreements. Licensed under the \"Elastic License\n * 2.0\", the \"GNU Affero General Public License v3.0 only\", and the \"Server Side\n * Public License v 1\"; you may not use this file except in compliance with, at\n * your election, the \"Elastic License 2.0\", the \"GNU Affero General Public\n * License v3.0 only\", or the \"Server Side Public License, v 1\".\n */\npackage org.elasticsearch.ingest.common;\n\nimport org.elasticsearch.common.Strings;\nimport org.elasticsearch.common.network.InetAddresses;\nimport org.elasticsearch.common.network.NetworkAddress;\nimport org.elasticsearch.common.time.DateFormatters;\nimport org.elasticsearch.core.Nullable;\n\nimport java.time.Instant;\nimport java.time.LocalDate;\nimport java.time.ZoneId;\nimport java.time.ZoneOffset;\nimport java.time.ZonedDateTime;\nimport java.time.format.DateTimeFormatter;\nimport java.time.format.DateTimeParseException;\nimport java.time.temporal.ChronoField;\nimport java.time.temporal.TemporalAccessor;\nimport java.time.temporal.WeekFields;\nimport java.util.ArrayList;\nimport java.util.HashMap;\nimport java.util.HashSet;\nimport java.util.List;\nimport java.util.Locale;\nimport java.util.Map;\nimport java.util.Objects;\nimport java.util.Set;\nimport java.util.regex.Matcher;\nimport java.util.regex.Pattern;\n\nimport static java.time.temporal.ChronoField.DAY_OF_MONTH;\nimport static java.time.temporal.ChronoField.HOUR_OF_DAY;\nimport static java.time.temporal.ChronoField.MINUTE_OF_DAY;\nimport static java.time.temporal.ChronoField.MONTH_OF_YEAR;\nimport static java.time.temporal.ChronoField.NANO_OF_SECOND;\nimport static java.time.temporal.ChronoField.SECOND_OF_DAY;\nimport static java.util.Map.entry;\nimport static org.elasticsearch.ingest.common.CefParser.DataType.DoubleType;\nimport static org.elasticsearch.ingest.common.CefParser.DataType.FloatType;\nimport static org.elasticsearch.ingest.common.CefParser.DataType.IPType;\nimport static org.elasticsearch.ingest.common.CefParser.DataType.IntegerType;\nimport static org.elasticsearch.ingest.common.CefParser.DataType.LongType;\nimport static org.elasticsearch.ingest.common.CefParser.DataType.MACAddressType;\nimport static org.elasticsearch.ingest.common.CefParser.DataType.StringType;\nimport static org.elasticsearch.ingest.common.CefParser.DataType.TimestampType;\n\nfinal class CefParser {\n private final boolean removeEmptyValues;\n private final ZoneId timezone;\n\n CefParser(ZoneId timezone, boolean removeEmptyValues) {\n this.removeEmptyValues = removeEmptyValues;\n this.timezone = timezone;\n }\n\n private static final Pattern HEADER_PATTERN = Pattern.compile(\"(?:\\\\\\\\\\\\||\\\\\\\\\\\\\\\\|[^|])*?\");\n private static final Pattern HEADER_NEXT_FIELD_PATTERN = Pattern.compile(\"(\" + HEADER_PATTERN.pattern() + \")\\\\|\");\n private static final Pattern HEADER_ESCAPE_CAPTURE = Pattern.compile(\"\\\\\\\\([\\\\\\\\|])\");\n\n // New patterns for extension parsing\n private static final String EXTENSION_KEY_PATTERN = \"(?:[\\\\w-]+(?:\\\\.[^\\\\.=\\\\s\\\\|\\\\\\\\\\\\[\\\\]]+)*(?:\\\\[[0-9]+\\\\])?(?==))\";\n private static final Pattern EXTENSION_KEY_ARRAY_CAPTURE = Pattern.compile(\"^([^\\\\[\\\\]]+)((?:\\\\[[0-9]+\\\\])+)$\");\n private static final String EXTENSION_VALUE_PATTERN = \"(?:[^\\\\s\\\\\\\\]|\\\\\\\\[^|]|\\\\s(?!\" + EXTENSION_KEY_PATTERN + \"=))*\";\n private static final Pattern EXTENSION_NEXT_KEY_VALUE_PATTERN = Pattern.compile(\n \"(\" + EXTENSION_KEY_PATTERN + \")=(\" + EXTENSION_VALUE_PATTERN + \")(?:\\\\s+|$)\"\n );\n\n // Comprehensive regex pattern to match various MAC address formats\n private static final String MAC_ADDRESS_REGEX = \"^(\" +\n // Combined colon and hyphen separated 6-group patterns\n \"(([0-9A-Fa-f]{2}[:|-]){5}[0-9A-Fa-f]{2})|\" +\n // Dot-separated 6-group pattern\n \"([0-9A-Fa-f]{4}\\\\.){2}[0-9A-Fa-f]{4}|\" +\n // Combined colon and hyphen separated 8-group patterns\n \"([0-9A-Fa-f]{2}[:|-]){7}[0-9A-Fa-f]{2}|\" +\n // Dot-separated EUI-64\n \"([0-9A-Fa-f]{4}\\\\.){3}[0-9A-Fa-f]{4}\" + \")$\";\n\n private static final Pattern MAC_ADDRESS_PATTERN = Pattern.compile(MAC_ADDRESS_REGEX);\n private static final int EUI48_HEX_LENGTH = 48 / 4;\n private static final int EUI64_HEX_LENGTH = 64 / 4;\n private static final int EUI64_HEX_WITH_SEPARATOR_MAX_LENGTH = EUI64_HEX_LENGTH + EUI64_HEX_LENGTH / 2 - 1;\n private static final Map EXTENSION_VALUE_SANITIZER_REVERSE_MAPPING = Map.ofEntries(\n entry(\"\\\\\\\\\", \"\\\\\"),\n entry(\"\\\\=\", \"=\"),\n entry(\"\\\\\\n\", \"\\n\"),\n entry(\"\\\\\\r\", \"\\r\")\n );\n\n enum DataType {\n IntegerType,\n LongType,\n FloatType,\n DoubleType,\n StringType,\n BooleanType,\n IPType,\n MACAddressType,\n TimestampType\n }\n\n private static final Map EXTENSION_MAPPINGS = Map.ofEntries(\n entry(\"agt\", new ExtensionMapping(\"agentAddress\", IPType, \"agent.ip\")),\n entry(\"agentDnsDomain\", new ExtensionMapping(\"agentDnsDomain\", StringType, \"agent.name\")),\n entry(\"ahost\", new ExtensionMapping(\"agentHostName\", StringType, \"agent.name\")),\n entry(\"aid\", new ExtensionMapping(\"agentId\", StringType, \"agent.id\")),\n entry(\"amac\", new ExtensionMapping(\"agentMacAddress\", MACAddressType, \"agent.mac\")),\n entry(\"agentNtDomain\", new ExtensionMapping(\"agentNtDomain\", StringType, null)),\n entry(\"art\", new ExtensionMapping(\"agentReceiptTime\", TimestampType, \"event.created\")),\n entry(\"atz\", new ExtensionMapping(\"agentTimeZone\", StringType, null)),\n entry(\"agentTranslatedAddress\", new ExtensionMapping(\"agentTranslatedAddress\", IPType, null)),\n entry(\"agentTranslatedZoneExternalID\", new ExtensionMapping(\"agentTranslatedZoneExternalID\", StringType, null)),\n entry(\"agentTranslatedZoneURI\", new ExtensionMapping(\"agentTranslatedZoneURI\", StringType, null)),\n entry(\"at\", new ExtensionMapping(\"agentType\", StringType, \"agent.type\")),\n entry(\"av\", new ExtensionMapping(\"agentVersion\", StringType, \"agent.version\")),\n entry(\"agentZoneExternalID\", new ExtensionMapping(\"agentZoneExternalID\", StringType, null)),\n entry(\"agentZoneURI\", new ExtensionMapping(\"agentZoneURI\", StringType, null)),\n entry(\"app\", new ExtensionMapping(\"applicationProtocol\", StringType, \"network.protocol\")),\n entry(\"cnt\", new ExtensionMapping(\"baseEventCount\", IntegerType, null)),\n entry(\"in\", new ExtensionMapping(\"bytesIn\", LongType, \"source.bytes\")),\n entry(\"out\", new ExtensionMapping(\"bytesOut\", LongType, \"destination.bytes\")),\n entry(\"customerExternalID\", new ExtensionMapping(\"customerExternalID\", StringType, \"organization.id\")),\n entry(\"customerURI\", new ExtensionMapping(\"customerURI\", StringType, \"organization.name\")),\n entry(\"dst\", new ExtensionMapping(\"destinationAddress\", IPType, \"destination.ip\")),\n entry(\"destinationDnsDomain\", new ExtensionMapping(\"destinationDnsDomain\", StringType, \"destination.registered_domain\")),\n entry(\"dlat\", new ExtensionMapping(\"destinationGeoLatitude\", DoubleType, \"destination.geo.location.lat\")),\n entry(\"dlong\", new ExtensionMapping(\"destinationGeoLongitude\", DoubleType, \"destination.geo.location.lon\")),\n entry(\"dhost\", new ExtensionMapping(\"destinationHostName\", StringType, \"destination.domain\")),\n entry(\"dmac\", new ExtensionMapping(\"destinationMacAddress\", MACAddressType, \"destination.mac\")),\n entry(\"dntdom\", new ExtensionMapping(\"destinationNtDomain\", StringType, \"destination.registered_domain\")),\n entry(\"dpt\", new ExtensionMapping(\"destinationPort\", IntegerType, \"destination.port\")),\n entry(\"dpid\", new ExtensionMapping(\"destinationProcessId\", LongType, \"destination.process.pid\")),\n entry(\"dproc\", new ExtensionMapping(\"destinationProcessName\", StringType, \"destination.process.name\")),\n entry(\"destinationServiceName\", new ExtensionMapping(\"destinationServiceName\", StringType, \"destination.service.name\")),\n entry(\"destinationTranslatedAddress\", new ExtensionMapping(\"destinationTranslatedAddress\", IPType, \"destination.nat.ip\")),\n entry(\"destinationTranslatedPort\", new ExtensionMapping(\"destinationTranslatedPort\", IntegerType, \"destination.nat.port\")),\n entry(\"destinationTranslatedZoneExternalID\", new ExtensionMapping(\"destinationTranslatedZoneExternalID\", StringType, null)),\n entry(\"destinationTranslatedZoneURI\", new ExtensionMapping(\"destinationTranslatedZoneURI\", StringType, null)),\n entry(\"duid\", new ExtensionMapping(\"destinationUserId\", StringType, \"destination.user.id\")),\n entry(\"duser\", new ExtensionMapping(\"destinationUserName\", StringType, \"destination.user.name\")),\n entry(\"dpriv\", new ExtensionMapping(\"destinationUserPrivileges\", StringType, \"destination.user.group.name\")),\n entry(\"destinationZoneExternalID\", new ExtensionMapping(\"destinationZoneExternalID\", StringType, null)),\n entry(\"destinationZoneURI\", new ExtensionMapping(\"destinationZoneURI\", StringType, null)),\n entry(\"act\", new ExtensionMapping(\"deviceAction\", StringType, \"event.action\")),\n entry(\"dvc\", new ExtensionMapping(\"deviceAddress\", IPType, \"observer.ip\")),\n entry(\"cfp1Label\", new ExtensionMapping(\"deviceCustomFloatingPoint1Label\", StringType, null)),\n entry(\"cfp3Label\", new ExtensionMapping(\"deviceCustomFloatingPoint3Label\", StringType, null)),\n entry(\"cfp4Label\", new ExtensionMapping(\"deviceCustomFloatingPoint4Label\", StringType, null)),\n entry(\"deviceCustomDate1\", new ExtensionMapping(\"deviceCustomDate1\", TimestampType, null)),\n entry(\"deviceCustomDate1Label\", new ExtensionMapping(\"deviceCustomDate1Label\", StringType, null)),\n entry(\"deviceCustomDate2\", new ExtensionMapping(\"deviceCustomDate2\", TimestampType, null)),\n entry(\"deviceCustomDate2Label\", new ExtensionMapping(\"deviceCustomDate2Label\", StringType, null)),\n entry(\"cfp1\", new ExtensionMapping(\"deviceCustomFloatingPoint1\", FloatType, null)),\n entry(\"cfp2\", new ExtensionMapping(\"deviceCustomFloatingPoint2\", FloatType, null)),\n entry(\"cfp2Label\", new ExtensionMapping(\"deviceCustomFloatingPoint2Label\", StringType, null)),\n entry(\"cfp3\", new ExtensionMapping(\"deviceCustomFloatingPoint3\", FloatType, null)),\n entry(\"cfp4\", new ExtensionMapping(\"deviceCustomFloatingPoint4\", FloatType, null)),\n entry(\"c6a1\", new ExtensionMapping(\"deviceCustomIPv6Address1\", IPType, null)),\n entry(\"c6a1Label\", new ExtensionMapping(\"deviceCustomIPv6Address1Label\", StringType, null)),\n entry(\"c6a2\", new ExtensionMapping(\"deviceCustomIPv6Address2\", IPType, null)),\n entry(\"c6a2Label\", new ExtensionMapping(\"deviceCustomIPv6Address2Label\", StringType, null)),\n entry(\"c6a3\", new ExtensionMapping(\"deviceCustomIPv6Address3\", IPType, null)),\n entry(\"c6a3Label\", new ExtensionMapping(\"deviceCustomIPv6Address3Label\", StringType, null)),\n entry(\"c6a4\", new ExtensionMapping(\"deviceCustomIPv6Address4\", IPType, null)),\n entry(\"C6a4Label\", new ExtensionMapping(\"deviceCustomIPv6Address4Label\", StringType, null)),\n entry(\"cn1\", new ExtensionMapping(\"deviceCustomNumber1\", LongType, null)),\n entry(\"cn1Label\", new ExtensionMapping(\"deviceCustomNumber1Label\", StringType, null)),\n entry(\"cn2\", new ExtensionMapping(\"deviceCustomNumber2\", LongType, null)),\n entry(\"cn2Label\", new ExtensionMapping(\"deviceCustomNumber2Label\", StringType, null)),\n entry(\"cn3\", new ExtensionMapping(\"deviceCustomNumber3\", LongType, null)),\n entry(\"cn3Label\", new ExtensionMapping(\"deviceCustomNumber3Label\", StringType, null)),\n entry(\"cs1\", new ExtensionMapping(\"deviceCustomString1\", StringType, null)),\n entry(\"cs1Label\", new ExtensionMapping(\"deviceCustomString1Label\", StringType, null)),\n entry(\"cs2\", new ExtensionMapping(\"deviceCustomString2\", StringType, null)),\n entry(\"cs2Label\", new ExtensionMapping(\"deviceCustomString2Label\", StringType, null)),\n entry(\"cs3\", new ExtensionMapping(\"deviceCustomString3\", StringType, null)),\n entry(\"cs3Label\", new ExtensionMapping(\"deviceCustomString3Label\", StringType, null)),\n entry(\"cs4\", new ExtensionMapping(\"deviceCustomString4\", StringType, null)),\n entry(\"cs4Label\", new ExtensionMapping(\"deviceCustomString4Label\", StringType, null)),\n entry(\"cs5\", new ExtensionMapping(\"deviceCustomString5\", StringType, null)),\n entry(\"cs5Label\", new ExtensionMapping(\"deviceCustomString5Label\", StringType, null)),\n entry(\"cs6\", new ExtensionMapping(\"deviceCustomString6\", StringType, null)),\n entry(\"cs6Label\", new ExtensionMapping(\"deviceCustomString6Label\", StringType, null)),\n entry(\"deviceDirection\", new ExtensionMapping(\"deviceDirection\", StringType, \"network.direction\")),\n entry(\"deviceDnsDomain\", new ExtensionMapping(\"deviceDnsDomain\", StringType, \"observer.registered_domain\")),\n entry(\"cat\", new ExtensionMapping(\"deviceEventCategory\", StringType, null)),\n entry(\"deviceExternalId\", new ExtensionMapping(\"deviceExternalId\", StringType, \"observer.name\")),\n entry(\"deviceFacility\", new ExtensionMapping(\"deviceFacility\", LongType, \"log.syslog.facility.code\")),\n entry(\"dvchost\", new ExtensionMapping(\"deviceHostName\", StringType, \"observer.hostname\")),\n entry(\"deviceInboundInterface\", new ExtensionMapping(\"deviceInboundInterface\", StringType, \"observer.ingress.interface.name\")),\n entry(\"dvcmac\", new ExtensionMapping(\"deviceMacAddress\", MACAddressType, \"observer.mac\")),\n entry(\"deviceNtDomain\", new ExtensionMapping(\"deviceNtDomain\", StringType, null)),\n entry(\"deviceOutboundInterface\", new ExtensionMapping(\"deviceOutboundInterface\", StringType, \"observer.egress.interface.name\")),\n entry(\"devicePayloadId\", new ExtensionMapping(\"devicePayloadId\", StringType, \"event.id\")),\n entry(\"dvcpid\", new ExtensionMapping(\"deviceProcessId\", LongType, \"process.pid\")),\n entry(\"deviceProcessName\", new ExtensionMapping(\"deviceProcessName\", StringType, \"process.name\")),\n entry(\"rt\", new ExtensionMapping(\"deviceReceiptTime\", TimestampType, \"@timestamp\")),\n entry(\"dtz\", new ExtensionMapping(\"deviceTimeZone\", StringType, \"event.timezone\")),\n entry(\"deviceTranslatedAddress\", new ExtensionMapping(\"deviceTranslatedAddress\", IPType, \"host.nat.ip\")),\n entry(\"deviceTranslatedZoneExternalID\", new ExtensionMapping(\"deviceTranslatedZoneExternalID\", StringType, null)),\n entry(\"deviceTranslatedZoneURI\", new ExtensionMapping(\"deviceTranslatedZoneURI\", StringType, null)),\n entry(\"deviceZoneExternalID\", new ExtensionMapping(\"deviceZoneExternalID\", StringType, null)),\n entry(\"deviceZoneURI\", new ExtensionMapping(\"deviceZoneURI\", StringType, null)),\n entry(\"end\", new ExtensionMapping(\"endTime\", TimestampType, \"event.end\")),\n entry(\"eventId\", new ExtensionMapping(\"eventId\", StringType, \"event.id\")),\n entry(\"outcome\", new ExtensionMapping(\"eventOutcome\", StringType, \"event.outcome\")),\n entry(\"externalId\", new ExtensionMapping(\"externalId\", StringType, null)),\n entry(\"fileCreateTime\", new ExtensionMapping(\"fileCreateTime\", TimestampType, \"file.created\")),\n entry(\"fileHash\", new ExtensionMapping(\"fileHash\", StringType, \"file.hash\")),\n entry(\"fileId\", new ExtensionMapping(\"fileId\", StringType, \"file.inode\")),\n entry(\"fileModificationTime\", new ExtensionMapping(\"fileModificationTime\", TimestampType, \"file.mtime\")),\n entry(\"flexNumber1\", new ExtensionMapping(\"deviceFlexNumber1\", LongType, null)),\n entry(\"flexNumber1Label\", new ExtensionMapping(\"deviceFlexNumber1Label\", StringType, null)),\n entry(\"flexNumber2\", new ExtensionMapping(\"deviceFlexNumber2\", LongType, null)),\n entry(\"flexNumber2Label\", new ExtensionMapping(\"deviceFlexNumber2Label\", StringType, null)),\n entry(\"fname\", new ExtensionMapping(\"filename\", StringType, \"file.name\")),\n entry(\"filePath\", new ExtensionMapping(\"filePath\", StringType, \"file.path\")),\n entry(\"filePermission\", new ExtensionMapping(\"filePermission\", StringType, \"file.group\")),\n entry(\"fsize\", new ExtensionMapping(\"fileSize\", LongType, \"file.size\")),\n entry(\"fileType\", new ExtensionMapping(\"fileType\", StringType, \"file.type\")),\n entry(\"flexDate1\", new ExtensionMapping(\"flexDate1\", TimestampType, null)),\n entry(\"flexDate1Label\", new ExtensionMapping(\"flexDate1Label\", StringType, null)),\n entry(\"flexString1\", new ExtensionMapping(\"flexString1\", StringType, null)),\n entry(\"flexString2\", new ExtensionMapping(\"flexString2\", StringType, null)),\n entry(\"flexString1Label\", new ExtensionMapping(\"flexString1Label\", StringType, null)),\n entry(\"flexString2Label\", new ExtensionMapping(\"flexString2Label\", StringType, null)),\n entry(\"msg\", new ExtensionMapping(\"message\", StringType, \"message\")),\n entry(\"oldFileCreateTime\", new ExtensionMapping(\"oldFileCreateTime\", TimestampType, null)),\n entry(\"oldFileHash\", new ExtensionMapping(\"oldFileHash\", StringType, null)),\n entry(\"oldFileId\", new ExtensionMapping(\"oldFileId\", StringType, null)),\n entry(\"oldFileModificationTime\", new ExtensionMapping(\"oldFileModificationTime\", TimestampType, null)),\n entry(\"oldFileName\", new ExtensionMapping(\"oldFileName\", StringType, null)),\n entry(\"oldFilePath\", new ExtensionMapping(\"oldFilePath\", StringType, null)),\n entry(\"oldFilePermission\", new ExtensionMapping(\"oldFilePermission\", StringType, null)),\n entry(\"oldFileSize\", new ExtensionMapping(\"oldFileSize\", IntegerType, null)),\n entry(\"oldFileType\", new ExtensionMapping(\"oldFileType\", StringType, null)),\n entry(\"rawEvent\", new ExtensionMapping(\"rawEvent\", StringType, \"event.original\")),\n entry(\"reason\", new ExtensionMapping(\"Reason\", StringType, \"event.reason\")),\n entry(\"requestClientApplication\", new ExtensionMapping(\"requestClientApplication\", StringType, \"user_agent.original\")),\n entry(\"requestContext\", new ExtensionMapping(\"requestContext\", StringType, \"http.request.referrer\")),\n entry(\"requestCookies\", new ExtensionMapping(\"requestCookies\", StringType, null)),\n entry(\"requestMethod\", new ExtensionMapping(\"requestMethod\", StringType, \"http.request.method\")),\n entry(\"request\", new ExtensionMapping(\"requestUrl\", StringType, \"url.original\")),\n entry(\"src\", new ExtensionMapping(\"sourceAddress\", IPType, \"source.ip\")),\n entry(\"sourceDnsDomain\", new ExtensionMapping(\"sourceDnsDomain\", StringType, \"source.domain\")),\n entry(\"slat\", new ExtensionMapping(\"sourceGeoLatitude\", DoubleType, \"source.geo.location.lat\")),\n entry(\"slong\", new ExtensionMapping(\"sourceGeoLongitude\", DoubleType, \"source.geo.location.lon\")),\n entry(\"shost\", new ExtensionMapping(\"sourceHostName\", StringType, \"source.domain\")),\n entry(\"smac\", new ExtensionMapping(\"sourceMacAddress\", MACAddressType, \"source.mac\")),\n entry(\"sntdom\", new ExtensionMapping(\"sourceNtDomain\", StringType, \"source.registered_domain\")),\n entry(\"spt\", new ExtensionMapping(\"sourcePort\", IntegerType, \"source.port\")),\n entry(\"spid\", new ExtensionMapping(\"sourceProcessId\", LongType, \"source.process.pid\")),\n entry(\"sproc\", new ExtensionMapping(\"sourceProcessName\", StringType, \"source.process.name\")),\n entry(\"sourceServiceName\", new ExtensionMapping(\"sourceServiceName\", StringType, \"source.service.name\")),\n entry(\"sourceTranslatedAddress\", new ExtensionMapping(\"sourceTranslatedAddress\", IPType, \"source.nat.ip\")),\n entry(\"sourceTranslatedPort\", new ExtensionMapping(\"sourceTranslatedPort\", IntegerType, \"source.nat.port\")),\n entry(\"sourceTranslatedZoneExternalID\", new ExtensionMapping(\"sourceTranslatedZoneExternalID\", StringType, null)),\n entry(\"sourceTranslatedZoneURI\", new ExtensionMapping(\"sourceTranslatedZoneURI\", StringType, null)),\n entry(\"suid\", new ExtensionMapping(\"sourceUserId\", StringType, \"source.user.id\")),\n entry(\"suser\", new ExtensionMapping(\"sourceUserName\", StringType, \"source.user.name\")),\n entry(\"spriv\", new ExtensionMapping(\"sourceUserPrivileges\", StringType, \"source.user.group.name\")),\n entry(\"sourceZoneExternalID\", new ExtensionMapping(\"sourceZoneExternalID\", StringType, null)),\n entry(\"sourceZoneURI\", new ExtensionMapping(\"sourceZoneURI\", StringType, null)),\n entry(\"start\", new ExtensionMapping(\"startTime\", TimestampType, \"event.start\")),\n entry(\"proto\", new ExtensionMapping(\"transportProtocol\", StringType, \"network.transport\")),\n entry(\"type\", new ExtensionMapping(\"type\", IntegerType, \"event.kind\")),\n entry(\"catdt\", new ExtensionMapping(\"categoryDeviceType\", StringType, null)),\n entry(\"mrt\", new ExtensionMapping(\"managerReceiptTime\", TimestampType, \"event.ingested\"))\n );\n\n private static final Set ERROR_MESSAGE_INCOMPLETE_CEF_HEADER = Set.of(\"incomplete CEF header\");\n private static final List TIME_LAYOUTS = List.of(\n // MMM dd HH:mm:ss.SSS zzz\n \"MMM dd HH:mm:ss.SSS z\",\n \"MMM dd HH:mm:ss.SSS Z\",\n \"MMM dd HH:mm:ss.SSS 'GMT'XX:XX\",\n // MMM dd HH:mm:sss.SSS\n \"MMM dd HH:mm:ss.SSS\",\n // MMM dd HH:mm:ss zzz\n \"MMM dd HH:mm:ss z\",\n \"MMM dd HH:mm:ss Z\",\n \"MMM dd HH:mm:ss 'GMT'XX:XX\",\n // MMM dd HH:mm:ss\n \"MMM dd HH:mm:ss\",\n // MMM dd yyyy HH:mm:ss.SSS zzz\n \"MMM dd yyyy HH:mm:ss.SSS z\",\n \"MMM dd yyyy HH:mm:ss.SSS Z\",\n \"MMM dd yyyy HH:mm:ss.SSS 'GMT'XX:XX\",\n // MMM dd yyyy HH:mm:ss.SSS\n \"MMM dd yyyy HH:mm:ss.SSS\",\n // MMM dd yyyy HH:mm:ss zzz\n \"MMM dd yyyy HH:mm:ss z\",\n \"MMM dd yyyy HH:mm:ss Z\",\n \"MMM dd yyyy HH:mm:ss 'GMT'XX:XX\",\n // MMM dd yyyy HH:mm:ss\n \"MMM dd yyyy HH:mm:ss\"\n );\n\n private static final List CHRONO_FIELDS = List.of(\n NANO_OF_SECOND,\n SECOND_OF_DAY,\n MINUTE_OF_DAY,\n HOUR_OF_DAY,\n DAY_OF_MONTH,\n MONTH_OF_YEAR\n );\n\n CefEvent process(String cefString) {\n List headers = new ArrayList<>();\n Matcher matcher = HEADER_NEXT_FIELD_PATTERN.matcher(cefString);\n int extensionStart = 0;\n\n for (int i = 0; i < 7 && matcher.find(); i++) {\n String field = matcher.group(1);\n field = HEADER_ESCAPE_CAPTURE.matcher(field).replaceAll(\"$1\");\n headers.add(field);\n extensionStart = matcher.end();\n }\n\n if (headers.isEmpty() == false && headers.getFirst().startsWith(\"CEF:\")) {\n CefEvent event = new CefEvent();\n // Add error message if there are not enough header fields\n if (headers.size() != 7) {\n event.addRootMapping(\"error.message\", new HashSet<>(ERROR_MESSAGE_INCOMPLETE_CEF_HEADER));", - "repo_full_name": "elastic/elasticsearch", - "discussion_comments": [ - { - "comment_id": "2033704333", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 122491, - "pr_file": "modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/CefParser.java", - "discussion_id": "2033704333", - "commented_code": "@@ -0,0 +1,565 @@\n+/*\n+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one\n+ * or more contributor license agreements. Licensed under the \"Elastic License\n+ * 2.0\", the \"GNU Affero General Public License v3.0 only\", and the \"Server Side\n+ * Public License v 1\"; you may not use this file except in compliance with, at\n+ * your election, the \"Elastic License 2.0\", the \"GNU Affero General Public\n+ * License v3.0 only\", or the \"Server Side Public License, v 1\".\n+ */\n+package org.elasticsearch.ingest.common;\n+\n+import org.elasticsearch.common.Strings;\n+import org.elasticsearch.common.network.InetAddresses;\n+import org.elasticsearch.common.network.NetworkAddress;\n+import org.elasticsearch.common.time.DateFormatters;\n+import org.elasticsearch.core.Nullable;\n+\n+import java.time.Instant;\n+import java.time.LocalDate;\n+import java.time.ZoneId;\n+import java.time.ZoneOffset;\n+import java.time.ZonedDateTime;\n+import java.time.format.DateTimeFormatter;\n+import java.time.format.DateTimeParseException;\n+import java.time.temporal.ChronoField;\n+import java.time.temporal.TemporalAccessor;\n+import java.time.temporal.WeekFields;\n+import java.util.ArrayList;\n+import java.util.HashMap;\n+import java.util.HashSet;\n+import java.util.List;\n+import java.util.Locale;\n+import java.util.Map;\n+import java.util.Objects;\n+import java.util.Set;\n+import java.util.regex.Matcher;\n+import java.util.regex.Pattern;\n+\n+import static java.time.temporal.ChronoField.DAY_OF_MONTH;\n+import static java.time.temporal.ChronoField.HOUR_OF_DAY;\n+import static java.time.temporal.ChronoField.MINUTE_OF_DAY;\n+import static java.time.temporal.ChronoField.MONTH_OF_YEAR;\n+import static java.time.temporal.ChronoField.NANO_OF_SECOND;\n+import static java.time.temporal.ChronoField.SECOND_OF_DAY;\n+import static java.util.Map.entry;\n+import static org.elasticsearch.ingest.common.CefParser.DataType.DoubleType;\n+import static org.elasticsearch.ingest.common.CefParser.DataType.FloatType;\n+import static org.elasticsearch.ingest.common.CefParser.DataType.IPType;\n+import static org.elasticsearch.ingest.common.CefParser.DataType.IntegerType;\n+import static org.elasticsearch.ingest.common.CefParser.DataType.LongType;\n+import static org.elasticsearch.ingest.common.CefParser.DataType.MACAddressType;\n+import static org.elasticsearch.ingest.common.CefParser.DataType.StringType;\n+import static org.elasticsearch.ingest.common.CefParser.DataType.TimestampType;\n+\n+final class CefParser {\n+ private final boolean removeEmptyValues;\n+ private final ZoneId timezone;\n+\n+ CefParser(ZoneId timezone, boolean removeEmptyValues) {\n+ this.removeEmptyValues = removeEmptyValues;\n+ this.timezone = timezone;\n+ }\n+\n+ private static final Pattern HEADER_PATTERN = Pattern.compile(\"(?:\\\\\\\\\\\\||\\\\\\\\\\\\\\\\|[^|])*?\");\n+ private static final Pattern HEADER_NEXT_FIELD_PATTERN = Pattern.compile(\"(\" + HEADER_PATTERN.pattern() + \")\\\\|\");\n+ private static final Pattern HEADER_ESCAPE_CAPTURE = Pattern.compile(\"\\\\\\\\([\\\\\\\\|])\");\n+\n+ // New patterns for extension parsing\n+ private static final String EXTENSION_KEY_PATTERN = \"(?:[\\\\w-]+(?:\\\\.[^\\\\.=\\\\s\\\\|\\\\\\\\\\\\[\\\\]]+)*(?:\\\\[[0-9]+\\\\])?(?==))\";\n+ private static final Pattern EXTENSION_KEY_ARRAY_CAPTURE = Pattern.compile(\"^([^\\\\[\\\\]]+)((?:\\\\[[0-9]+\\\\])+)$\");\n+ private static final String EXTENSION_VALUE_PATTERN = \"(?:[^\\\\s\\\\\\\\]|\\\\\\\\[^|]|\\\\s(?!\" + EXTENSION_KEY_PATTERN + \"=))*\";\n+ private static final Pattern EXTENSION_NEXT_KEY_VALUE_PATTERN = Pattern.compile(\n+ \"(\" + EXTENSION_KEY_PATTERN + \")=(\" + EXTENSION_VALUE_PATTERN + \")(?:\\\\s+|$)\"\n+ );\n+\n+ // Comprehensive regex pattern to match various MAC address formats\n+ private static final String MAC_ADDRESS_REGEX = \"^(\" +\n+ // Combined colon and hyphen separated 6-group patterns\n+ \"(([0-9A-Fa-f]{2}[:|-]){5}[0-9A-Fa-f]{2})|\" +\n+ // Dot-separated 6-group pattern\n+ \"([0-9A-Fa-f]{4}\\\\.){2}[0-9A-Fa-f]{4}|\" +\n+ // Combined colon and hyphen separated 8-group patterns\n+ \"([0-9A-Fa-f]{2}[:|-]){7}[0-9A-Fa-f]{2}|\" +\n+ // Dot-separated EUI-64\n+ \"([0-9A-Fa-f]{4}\\\\.){3}[0-9A-Fa-f]{4}\" + \")$\";\n+\n+ private static final Pattern MAC_ADDRESS_PATTERN = Pattern.compile(MAC_ADDRESS_REGEX);\n+ private static final int EUI48_HEX_LENGTH = 48 / 4;\n+ private static final int EUI64_HEX_LENGTH = 64 / 4;\n+ private static final int EUI64_HEX_WITH_SEPARATOR_MAX_LENGTH = EUI64_HEX_LENGTH + EUI64_HEX_LENGTH / 2 - 1;\n+ private static final Map EXTENSION_VALUE_SANITIZER_REVERSE_MAPPING = Map.ofEntries(\n+ entry(\"\\\\\\\\\", \"\\\\\"),\n+ entry(\"\\\\=\", \"=\"),\n+ entry(\"\\\\\\n\", \"\\n\"),\n+ entry(\"\\\\\\r\", \"\\r\")\n+ );\n+\n+ enum DataType {\n+ IntegerType,\n+ LongType,\n+ FloatType,\n+ DoubleType,\n+ StringType,\n+ BooleanType,\n+ IPType,\n+ MACAddressType,\n+ TimestampType\n+ }\n+\n+ private static final Map EXTENSION_MAPPINGS = Map.ofEntries(\n+ entry(\"agt\", new ExtensionMapping(\"agentAddress\", IPType, \"agent.ip\")),\n+ entry(\"agentDnsDomain\", new ExtensionMapping(\"agentDnsDomain\", StringType, \"agent.name\")),\n+ entry(\"ahost\", new ExtensionMapping(\"agentHostName\", StringType, \"agent.name\")),\n+ entry(\"aid\", new ExtensionMapping(\"agentId\", StringType, \"agent.id\")),\n+ entry(\"amac\", new ExtensionMapping(\"agentMacAddress\", MACAddressType, \"agent.mac\")),\n+ entry(\"agentNtDomain\", new ExtensionMapping(\"agentNtDomain\", StringType, null)),\n+ entry(\"art\", new ExtensionMapping(\"agentReceiptTime\", TimestampType, \"event.created\")),\n+ entry(\"atz\", new ExtensionMapping(\"agentTimeZone\", StringType, null)),\n+ entry(\"agentTranslatedAddress\", new ExtensionMapping(\"agentTranslatedAddress\", IPType, null)),\n+ entry(\"agentTranslatedZoneExternalID\", new ExtensionMapping(\"agentTranslatedZoneExternalID\", StringType, null)),\n+ entry(\"agentTranslatedZoneURI\", new ExtensionMapping(\"agentTranslatedZoneURI\", StringType, null)),\n+ entry(\"at\", new ExtensionMapping(\"agentType\", StringType, \"agent.type\")),\n+ entry(\"av\", new ExtensionMapping(\"agentVersion\", StringType, \"agent.version\")),\n+ entry(\"agentZoneExternalID\", new ExtensionMapping(\"agentZoneExternalID\", StringType, null)),\n+ entry(\"agentZoneURI\", new ExtensionMapping(\"agentZoneURI\", StringType, null)),\n+ entry(\"app\", new ExtensionMapping(\"applicationProtocol\", StringType, \"network.protocol\")),\n+ entry(\"cnt\", new ExtensionMapping(\"baseEventCount\", IntegerType, null)),\n+ entry(\"in\", new ExtensionMapping(\"bytesIn\", LongType, \"source.bytes\")),\n+ entry(\"out\", new ExtensionMapping(\"bytesOut\", LongType, \"destination.bytes\")),\n+ entry(\"customerExternalID\", new ExtensionMapping(\"customerExternalID\", StringType, \"organization.id\")),\n+ entry(\"customerURI\", new ExtensionMapping(\"customerURI\", StringType, \"organization.name\")),\n+ entry(\"dst\", new ExtensionMapping(\"destinationAddress\", IPType, \"destination.ip\")),\n+ entry(\"destinationDnsDomain\", new ExtensionMapping(\"destinationDnsDomain\", StringType, \"destination.registered_domain\")),\n+ entry(\"dlat\", new ExtensionMapping(\"destinationGeoLatitude\", DoubleType, \"destination.geo.location.lat\")),\n+ entry(\"dlong\", new ExtensionMapping(\"destinationGeoLongitude\", DoubleType, \"destination.geo.location.lon\")),\n+ entry(\"dhost\", new ExtensionMapping(\"destinationHostName\", StringType, \"destination.domain\")),\n+ entry(\"dmac\", new ExtensionMapping(\"destinationMacAddress\", MACAddressType, \"destination.mac\")),\n+ entry(\"dntdom\", new ExtensionMapping(\"destinationNtDomain\", StringType, \"destination.registered_domain\")),\n+ entry(\"dpt\", new ExtensionMapping(\"destinationPort\", IntegerType, \"destination.port\")),\n+ entry(\"dpid\", new ExtensionMapping(\"destinationProcessId\", LongType, \"destination.process.pid\")),\n+ entry(\"dproc\", new ExtensionMapping(\"destinationProcessName\", StringType, \"destination.process.name\")),\n+ entry(\"destinationServiceName\", new ExtensionMapping(\"destinationServiceName\", StringType, \"destination.service.name\")),\n+ entry(\"destinationTranslatedAddress\", new ExtensionMapping(\"destinationTranslatedAddress\", IPType, \"destination.nat.ip\")),\n+ entry(\"destinationTranslatedPort\", new ExtensionMapping(\"destinationTranslatedPort\", IntegerType, \"destination.nat.port\")),\n+ entry(\"destinationTranslatedZoneExternalID\", new ExtensionMapping(\"destinationTranslatedZoneExternalID\", StringType, null)),\n+ entry(\"destinationTranslatedZoneURI\", new ExtensionMapping(\"destinationTranslatedZoneURI\", StringType, null)),\n+ entry(\"duid\", new ExtensionMapping(\"destinationUserId\", StringType, \"destination.user.id\")),\n+ entry(\"duser\", new ExtensionMapping(\"destinationUserName\", StringType, \"destination.user.name\")),\n+ entry(\"dpriv\", new ExtensionMapping(\"destinationUserPrivileges\", StringType, \"destination.user.group.name\")),\n+ entry(\"destinationZoneExternalID\", new ExtensionMapping(\"destinationZoneExternalID\", StringType, null)),\n+ entry(\"destinationZoneURI\", new ExtensionMapping(\"destinationZoneURI\", StringType, null)),\n+ entry(\"act\", new ExtensionMapping(\"deviceAction\", StringType, \"event.action\")),\n+ entry(\"dvc\", new ExtensionMapping(\"deviceAddress\", IPType, \"observer.ip\")),\n+ entry(\"cfp1Label\", new ExtensionMapping(\"deviceCustomFloatingPoint1Label\", StringType, null)),\n+ entry(\"cfp3Label\", new ExtensionMapping(\"deviceCustomFloatingPoint3Label\", StringType, null)),\n+ entry(\"cfp4Label\", new ExtensionMapping(\"deviceCustomFloatingPoint4Label\", StringType, null)),\n+ entry(\"deviceCustomDate1\", new ExtensionMapping(\"deviceCustomDate1\", TimestampType, null)),\n+ entry(\"deviceCustomDate1Label\", new ExtensionMapping(\"deviceCustomDate1Label\", StringType, null)),\n+ entry(\"deviceCustomDate2\", new ExtensionMapping(\"deviceCustomDate2\", TimestampType, null)),\n+ entry(\"deviceCustomDate2Label\", new ExtensionMapping(\"deviceCustomDate2Label\", StringType, null)),\n+ entry(\"cfp1\", new ExtensionMapping(\"deviceCustomFloatingPoint1\", FloatType, null)),\n+ entry(\"cfp2\", new ExtensionMapping(\"deviceCustomFloatingPoint2\", FloatType, null)),\n+ entry(\"cfp2Label\", new ExtensionMapping(\"deviceCustomFloatingPoint2Label\", StringType, null)),\n+ entry(\"cfp3\", new ExtensionMapping(\"deviceCustomFloatingPoint3\", FloatType, null)),\n+ entry(\"cfp4\", new ExtensionMapping(\"deviceCustomFloatingPoint4\", FloatType, null)),\n+ entry(\"c6a1\", new ExtensionMapping(\"deviceCustomIPv6Address1\", IPType, null)),\n+ entry(\"c6a1Label\", new ExtensionMapping(\"deviceCustomIPv6Address1Label\", StringType, null)),\n+ entry(\"c6a2\", new ExtensionMapping(\"deviceCustomIPv6Address2\", IPType, null)),\n+ entry(\"c6a2Label\", new ExtensionMapping(\"deviceCustomIPv6Address2Label\", StringType, null)),\n+ entry(\"c6a3\", new ExtensionMapping(\"deviceCustomIPv6Address3\", IPType, null)),\n+ entry(\"c6a3Label\", new ExtensionMapping(\"deviceCustomIPv6Address3Label\", StringType, null)),\n+ entry(\"c6a4\", new ExtensionMapping(\"deviceCustomIPv6Address4\", IPType, null)),\n+ entry(\"C6a4Label\", new ExtensionMapping(\"deviceCustomIPv6Address4Label\", StringType, null)),\n+ entry(\"cn1\", new ExtensionMapping(\"deviceCustomNumber1\", LongType, null)),\n+ entry(\"cn1Label\", new ExtensionMapping(\"deviceCustomNumber1Label\", StringType, null)),\n+ entry(\"cn2\", new ExtensionMapping(\"deviceCustomNumber2\", LongType, null)),\n+ entry(\"cn2Label\", new ExtensionMapping(\"deviceCustomNumber2Label\", StringType, null)),\n+ entry(\"cn3\", new ExtensionMapping(\"deviceCustomNumber3\", LongType, null)),\n+ entry(\"cn3Label\", new ExtensionMapping(\"deviceCustomNumber3Label\", StringType, null)),\n+ entry(\"cs1\", new ExtensionMapping(\"deviceCustomString1\", StringType, null)),\n+ entry(\"cs1Label\", new ExtensionMapping(\"deviceCustomString1Label\", StringType, null)),\n+ entry(\"cs2\", new ExtensionMapping(\"deviceCustomString2\", StringType, null)),\n+ entry(\"cs2Label\", new ExtensionMapping(\"deviceCustomString2Label\", StringType, null)),\n+ entry(\"cs3\", new ExtensionMapping(\"deviceCustomString3\", StringType, null)),\n+ entry(\"cs3Label\", new ExtensionMapping(\"deviceCustomString3Label\", StringType, null)),\n+ entry(\"cs4\", new ExtensionMapping(\"deviceCustomString4\", StringType, null)),\n+ entry(\"cs4Label\", new ExtensionMapping(\"deviceCustomString4Label\", StringType, null)),\n+ entry(\"cs5\", new ExtensionMapping(\"deviceCustomString5\", StringType, null)),\n+ entry(\"cs5Label\", new ExtensionMapping(\"deviceCustomString5Label\", StringType, null)),\n+ entry(\"cs6\", new ExtensionMapping(\"deviceCustomString6\", StringType, null)),\n+ entry(\"cs6Label\", new ExtensionMapping(\"deviceCustomString6Label\", StringType, null)),\n+ entry(\"deviceDirection\", new ExtensionMapping(\"deviceDirection\", StringType, \"network.direction\")),\n+ entry(\"deviceDnsDomain\", new ExtensionMapping(\"deviceDnsDomain\", StringType, \"observer.registered_domain\")),\n+ entry(\"cat\", new ExtensionMapping(\"deviceEventCategory\", StringType, null)),\n+ entry(\"deviceExternalId\", new ExtensionMapping(\"deviceExternalId\", StringType, \"observer.name\")),\n+ entry(\"deviceFacility\", new ExtensionMapping(\"deviceFacility\", LongType, \"log.syslog.facility.code\")),\n+ entry(\"dvchost\", new ExtensionMapping(\"deviceHostName\", StringType, \"observer.hostname\")),\n+ entry(\"deviceInboundInterface\", new ExtensionMapping(\"deviceInboundInterface\", StringType, \"observer.ingress.interface.name\")),\n+ entry(\"dvcmac\", new ExtensionMapping(\"deviceMacAddress\", MACAddressType, \"observer.mac\")),\n+ entry(\"deviceNtDomain\", new ExtensionMapping(\"deviceNtDomain\", StringType, null)),\n+ entry(\"deviceOutboundInterface\", new ExtensionMapping(\"deviceOutboundInterface\", StringType, \"observer.egress.interface.name\")),\n+ entry(\"devicePayloadId\", new ExtensionMapping(\"devicePayloadId\", StringType, \"event.id\")),\n+ entry(\"dvcpid\", new ExtensionMapping(\"deviceProcessId\", LongType, \"process.pid\")),\n+ entry(\"deviceProcessName\", new ExtensionMapping(\"deviceProcessName\", StringType, \"process.name\")),\n+ entry(\"rt\", new ExtensionMapping(\"deviceReceiptTime\", TimestampType, \"@timestamp\")),\n+ entry(\"dtz\", new ExtensionMapping(\"deviceTimeZone\", StringType, \"event.timezone\")),\n+ entry(\"deviceTranslatedAddress\", new ExtensionMapping(\"deviceTranslatedAddress\", IPType, \"host.nat.ip\")),\n+ entry(\"deviceTranslatedZoneExternalID\", new ExtensionMapping(\"deviceTranslatedZoneExternalID\", StringType, null)),\n+ entry(\"deviceTranslatedZoneURI\", new ExtensionMapping(\"deviceTranslatedZoneURI\", StringType, null)),\n+ entry(\"deviceZoneExternalID\", new ExtensionMapping(\"deviceZoneExternalID\", StringType, null)),\n+ entry(\"deviceZoneURI\", new ExtensionMapping(\"deviceZoneURI\", StringType, null)),\n+ entry(\"end\", new ExtensionMapping(\"endTime\", TimestampType, \"event.end\")),\n+ entry(\"eventId\", new ExtensionMapping(\"eventId\", StringType, \"event.id\")),\n+ entry(\"outcome\", new ExtensionMapping(\"eventOutcome\", StringType, \"event.outcome\")),\n+ entry(\"externalId\", new ExtensionMapping(\"externalId\", StringType, null)),\n+ entry(\"fileCreateTime\", new ExtensionMapping(\"fileCreateTime\", TimestampType, \"file.created\")),\n+ entry(\"fileHash\", new ExtensionMapping(\"fileHash\", StringType, \"file.hash\")),\n+ entry(\"fileId\", new ExtensionMapping(\"fileId\", StringType, \"file.inode\")),\n+ entry(\"fileModificationTime\", new ExtensionMapping(\"fileModificationTime\", TimestampType, \"file.mtime\")),\n+ entry(\"flexNumber1\", new ExtensionMapping(\"deviceFlexNumber1\", LongType, null)),\n+ entry(\"flexNumber1Label\", new ExtensionMapping(\"deviceFlexNumber1Label\", StringType, null)),\n+ entry(\"flexNumber2\", new ExtensionMapping(\"deviceFlexNumber2\", LongType, null)),\n+ entry(\"flexNumber2Label\", new ExtensionMapping(\"deviceFlexNumber2Label\", StringType, null)),\n+ entry(\"fname\", new ExtensionMapping(\"filename\", StringType, \"file.name\")),\n+ entry(\"filePath\", new ExtensionMapping(\"filePath\", StringType, \"file.path\")),\n+ entry(\"filePermission\", new ExtensionMapping(\"filePermission\", StringType, \"file.group\")),\n+ entry(\"fsize\", new ExtensionMapping(\"fileSize\", LongType, \"file.size\")),\n+ entry(\"fileType\", new ExtensionMapping(\"fileType\", StringType, \"file.type\")),\n+ entry(\"flexDate1\", new ExtensionMapping(\"flexDate1\", TimestampType, null)),\n+ entry(\"flexDate1Label\", new ExtensionMapping(\"flexDate1Label\", StringType, null)),\n+ entry(\"flexString1\", new ExtensionMapping(\"flexString1\", StringType, null)),\n+ entry(\"flexString2\", new ExtensionMapping(\"flexString2\", StringType, null)),\n+ entry(\"flexString1Label\", new ExtensionMapping(\"flexString1Label\", StringType, null)),\n+ entry(\"flexString2Label\", new ExtensionMapping(\"flexString2Label\", StringType, null)),\n+ entry(\"msg\", new ExtensionMapping(\"message\", StringType, \"message\")),\n+ entry(\"oldFileCreateTime\", new ExtensionMapping(\"oldFileCreateTime\", TimestampType, null)),\n+ entry(\"oldFileHash\", new ExtensionMapping(\"oldFileHash\", StringType, null)),\n+ entry(\"oldFileId\", new ExtensionMapping(\"oldFileId\", StringType, null)),\n+ entry(\"oldFileModificationTime\", new ExtensionMapping(\"oldFileModificationTime\", TimestampType, null)),\n+ entry(\"oldFileName\", new ExtensionMapping(\"oldFileName\", StringType, null)),\n+ entry(\"oldFilePath\", new ExtensionMapping(\"oldFilePath\", StringType, null)),\n+ entry(\"oldFilePermission\", new ExtensionMapping(\"oldFilePermission\", StringType, null)),\n+ entry(\"oldFileSize\", new ExtensionMapping(\"oldFileSize\", IntegerType, null)),\n+ entry(\"oldFileType\", new ExtensionMapping(\"oldFileType\", StringType, null)),\n+ entry(\"rawEvent\", new ExtensionMapping(\"rawEvent\", StringType, \"event.original\")),\n+ entry(\"reason\", new ExtensionMapping(\"Reason\", StringType, \"event.reason\")),\n+ entry(\"requestClientApplication\", new ExtensionMapping(\"requestClientApplication\", StringType, \"user_agent.original\")),\n+ entry(\"requestContext\", new ExtensionMapping(\"requestContext\", StringType, \"http.request.referrer\")),\n+ entry(\"requestCookies\", new ExtensionMapping(\"requestCookies\", StringType, null)),\n+ entry(\"requestMethod\", new ExtensionMapping(\"requestMethod\", StringType, \"http.request.method\")),\n+ entry(\"request\", new ExtensionMapping(\"requestUrl\", StringType, \"url.original\")),\n+ entry(\"src\", new ExtensionMapping(\"sourceAddress\", IPType, \"source.ip\")),\n+ entry(\"sourceDnsDomain\", new ExtensionMapping(\"sourceDnsDomain\", StringType, \"source.domain\")),\n+ entry(\"slat\", new ExtensionMapping(\"sourceGeoLatitude\", DoubleType, \"source.geo.location.lat\")),\n+ entry(\"slong\", new ExtensionMapping(\"sourceGeoLongitude\", DoubleType, \"source.geo.location.lon\")),\n+ entry(\"shost\", new ExtensionMapping(\"sourceHostName\", StringType, \"source.domain\")),\n+ entry(\"smac\", new ExtensionMapping(\"sourceMacAddress\", MACAddressType, \"source.mac\")),\n+ entry(\"sntdom\", new ExtensionMapping(\"sourceNtDomain\", StringType, \"source.registered_domain\")),\n+ entry(\"spt\", new ExtensionMapping(\"sourcePort\", IntegerType, \"source.port\")),\n+ entry(\"spid\", new ExtensionMapping(\"sourceProcessId\", LongType, \"source.process.pid\")),\n+ entry(\"sproc\", new ExtensionMapping(\"sourceProcessName\", StringType, \"source.process.name\")),\n+ entry(\"sourceServiceName\", new ExtensionMapping(\"sourceServiceName\", StringType, \"source.service.name\")),\n+ entry(\"sourceTranslatedAddress\", new ExtensionMapping(\"sourceTranslatedAddress\", IPType, \"source.nat.ip\")),\n+ entry(\"sourceTranslatedPort\", new ExtensionMapping(\"sourceTranslatedPort\", IntegerType, \"source.nat.port\")),\n+ entry(\"sourceTranslatedZoneExternalID\", new ExtensionMapping(\"sourceTranslatedZoneExternalID\", StringType, null)),\n+ entry(\"sourceTranslatedZoneURI\", new ExtensionMapping(\"sourceTranslatedZoneURI\", StringType, null)),\n+ entry(\"suid\", new ExtensionMapping(\"sourceUserId\", StringType, \"source.user.id\")),\n+ entry(\"suser\", new ExtensionMapping(\"sourceUserName\", StringType, \"source.user.name\")),\n+ entry(\"spriv\", new ExtensionMapping(\"sourceUserPrivileges\", StringType, \"source.user.group.name\")),\n+ entry(\"sourceZoneExternalID\", new ExtensionMapping(\"sourceZoneExternalID\", StringType, null)),\n+ entry(\"sourceZoneURI\", new ExtensionMapping(\"sourceZoneURI\", StringType, null)),\n+ entry(\"start\", new ExtensionMapping(\"startTime\", TimestampType, \"event.start\")),\n+ entry(\"proto\", new ExtensionMapping(\"transportProtocol\", StringType, \"network.transport\")),\n+ entry(\"type\", new ExtensionMapping(\"type\", IntegerType, \"event.kind\")),\n+ entry(\"catdt\", new ExtensionMapping(\"categoryDeviceType\", StringType, null)),\n+ entry(\"mrt\", new ExtensionMapping(\"managerReceiptTime\", TimestampType, \"event.ingested\"))\n+ );\n+\n+ private static final Set ERROR_MESSAGE_INCOMPLETE_CEF_HEADER = Set.of(\"incomplete CEF header\");\n+ private static final List TIME_LAYOUTS = List.of(\n+ // MMM dd HH:mm:ss.SSS zzz\n+ \"MMM dd HH:mm:ss.SSS z\",\n+ \"MMM dd HH:mm:ss.SSS Z\",\n+ \"MMM dd HH:mm:ss.SSS 'GMT'XX:XX\",\n+ // MMM dd HH:mm:sss.SSS\n+ \"MMM dd HH:mm:ss.SSS\",\n+ // MMM dd HH:mm:ss zzz\n+ \"MMM dd HH:mm:ss z\",\n+ \"MMM dd HH:mm:ss Z\",\n+ \"MMM dd HH:mm:ss 'GMT'XX:XX\",\n+ // MMM dd HH:mm:ss\n+ \"MMM dd HH:mm:ss\",\n+ // MMM dd yyyy HH:mm:ss.SSS zzz\n+ \"MMM dd yyyy HH:mm:ss.SSS z\",\n+ \"MMM dd yyyy HH:mm:ss.SSS Z\",\n+ \"MMM dd yyyy HH:mm:ss.SSS 'GMT'XX:XX\",\n+ // MMM dd yyyy HH:mm:ss.SSS\n+ \"MMM dd yyyy HH:mm:ss.SSS\",\n+ // MMM dd yyyy HH:mm:ss zzz\n+ \"MMM dd yyyy HH:mm:ss z\",\n+ \"MMM dd yyyy HH:mm:ss Z\",\n+ \"MMM dd yyyy HH:mm:ss 'GMT'XX:XX\",\n+ // MMM dd yyyy HH:mm:ss\n+ \"MMM dd yyyy HH:mm:ss\"\n+ );\n+\n+ private static final List CHRONO_FIELDS = List.of(\n+ NANO_OF_SECOND,\n+ SECOND_OF_DAY,\n+ MINUTE_OF_DAY,\n+ HOUR_OF_DAY,\n+ DAY_OF_MONTH,\n+ MONTH_OF_YEAR\n+ );\n+\n+ CefEvent process(String cefString) {\n+ List headers = new ArrayList<>();\n+ Matcher matcher = HEADER_NEXT_FIELD_PATTERN.matcher(cefString);\n+ int extensionStart = 0;\n+\n+ for (int i = 0; i < 7 && matcher.find(); i++) {\n+ String field = matcher.group(1);\n+ field = HEADER_ESCAPE_CAPTURE.matcher(field).replaceAll(\"$1\");\n+ headers.add(field);\n+ extensionStart = matcher.end();\n+ }\n+\n+ if (headers.isEmpty() == false && headers.getFirst().startsWith(\"CEF:\")) {\n+ CefEvent event = new CefEvent();\n+ // Add error message if there are not enough header fields\n+ if (headers.size() != 7) {\n+ event.addRootMapping(\"error.message\", new HashSet<>(ERROR_MESSAGE_INCOMPLETE_CEF_HEADER));", - "comment_created_at": "2025-04-08T17:24:22+00:00", - "comment_author": "joegallo", - "comment_body": "Why does this implementation add an error message and not throw an exception? This doesn't seem in line with the way ingest processing usually handles errors.", - "pr_file_module": null - }, - { - "comment_id": "2034669550", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 122491, - "pr_file": "modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/CefParser.java", - "discussion_id": "2033704333", - "commented_code": "@@ -0,0 +1,565 @@\n+/*\n+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one\n+ * or more contributor license agreements. Licensed under the \"Elastic License\n+ * 2.0\", the \"GNU Affero General Public License v3.0 only\", and the \"Server Side\n+ * Public License v 1\"; you may not use this file except in compliance with, at\n+ * your election, the \"Elastic License 2.0\", the \"GNU Affero General Public\n+ * License v3.0 only\", or the \"Server Side Public License, v 1\".\n+ */\n+package org.elasticsearch.ingest.common;\n+\n+import org.elasticsearch.common.Strings;\n+import org.elasticsearch.common.network.InetAddresses;\n+import org.elasticsearch.common.network.NetworkAddress;\n+import org.elasticsearch.common.time.DateFormatters;\n+import org.elasticsearch.core.Nullable;\n+\n+import java.time.Instant;\n+import java.time.LocalDate;\n+import java.time.ZoneId;\n+import java.time.ZoneOffset;\n+import java.time.ZonedDateTime;\n+import java.time.format.DateTimeFormatter;\n+import java.time.format.DateTimeParseException;\n+import java.time.temporal.ChronoField;\n+import java.time.temporal.TemporalAccessor;\n+import java.time.temporal.WeekFields;\n+import java.util.ArrayList;\n+import java.util.HashMap;\n+import java.util.HashSet;\n+import java.util.List;\n+import java.util.Locale;\n+import java.util.Map;\n+import java.util.Objects;\n+import java.util.Set;\n+import java.util.regex.Matcher;\n+import java.util.regex.Pattern;\n+\n+import static java.time.temporal.ChronoField.DAY_OF_MONTH;\n+import static java.time.temporal.ChronoField.HOUR_OF_DAY;\n+import static java.time.temporal.ChronoField.MINUTE_OF_DAY;\n+import static java.time.temporal.ChronoField.MONTH_OF_YEAR;\n+import static java.time.temporal.ChronoField.NANO_OF_SECOND;\n+import static java.time.temporal.ChronoField.SECOND_OF_DAY;\n+import static java.util.Map.entry;\n+import static org.elasticsearch.ingest.common.CefParser.DataType.DoubleType;\n+import static org.elasticsearch.ingest.common.CefParser.DataType.FloatType;\n+import static org.elasticsearch.ingest.common.CefParser.DataType.IPType;\n+import static org.elasticsearch.ingest.common.CefParser.DataType.IntegerType;\n+import static org.elasticsearch.ingest.common.CefParser.DataType.LongType;\n+import static org.elasticsearch.ingest.common.CefParser.DataType.MACAddressType;\n+import static org.elasticsearch.ingest.common.CefParser.DataType.StringType;\n+import static org.elasticsearch.ingest.common.CefParser.DataType.TimestampType;\n+\n+final class CefParser {\n+ private final boolean removeEmptyValues;\n+ private final ZoneId timezone;\n+\n+ CefParser(ZoneId timezone, boolean removeEmptyValues) {\n+ this.removeEmptyValues = removeEmptyValues;\n+ this.timezone = timezone;\n+ }\n+\n+ private static final Pattern HEADER_PATTERN = Pattern.compile(\"(?:\\\\\\\\\\\\||\\\\\\\\\\\\\\\\|[^|])*?\");\n+ private static final Pattern HEADER_NEXT_FIELD_PATTERN = Pattern.compile(\"(\" + HEADER_PATTERN.pattern() + \")\\\\|\");\n+ private static final Pattern HEADER_ESCAPE_CAPTURE = Pattern.compile(\"\\\\\\\\([\\\\\\\\|])\");\n+\n+ // New patterns for extension parsing\n+ private static final String EXTENSION_KEY_PATTERN = \"(?:[\\\\w-]+(?:\\\\.[^\\\\.=\\\\s\\\\|\\\\\\\\\\\\[\\\\]]+)*(?:\\\\[[0-9]+\\\\])?(?==))\";\n+ private static final Pattern EXTENSION_KEY_ARRAY_CAPTURE = Pattern.compile(\"^([^\\\\[\\\\]]+)((?:\\\\[[0-9]+\\\\])+)$\");\n+ private static final String EXTENSION_VALUE_PATTERN = \"(?:[^\\\\s\\\\\\\\]|\\\\\\\\[^|]|\\\\s(?!\" + EXTENSION_KEY_PATTERN + \"=))*\";\n+ private static final Pattern EXTENSION_NEXT_KEY_VALUE_PATTERN = Pattern.compile(\n+ \"(\" + EXTENSION_KEY_PATTERN + \")=(\" + EXTENSION_VALUE_PATTERN + \")(?:\\\\s+|$)\"\n+ );\n+\n+ // Comprehensive regex pattern to match various MAC address formats\n+ private static final String MAC_ADDRESS_REGEX = \"^(\" +\n+ // Combined colon and hyphen separated 6-group patterns\n+ \"(([0-9A-Fa-f]{2}[:|-]){5}[0-9A-Fa-f]{2})|\" +\n+ // Dot-separated 6-group pattern\n+ \"([0-9A-Fa-f]{4}\\\\.){2}[0-9A-Fa-f]{4}|\" +\n+ // Combined colon and hyphen separated 8-group patterns\n+ \"([0-9A-Fa-f]{2}[:|-]){7}[0-9A-Fa-f]{2}|\" +\n+ // Dot-separated EUI-64\n+ \"([0-9A-Fa-f]{4}\\\\.){3}[0-9A-Fa-f]{4}\" + \")$\";\n+\n+ private static final Pattern MAC_ADDRESS_PATTERN = Pattern.compile(MAC_ADDRESS_REGEX);\n+ private static final int EUI48_HEX_LENGTH = 48 / 4;\n+ private static final int EUI64_HEX_LENGTH = 64 / 4;\n+ private static final int EUI64_HEX_WITH_SEPARATOR_MAX_LENGTH = EUI64_HEX_LENGTH + EUI64_HEX_LENGTH / 2 - 1;\n+ private static final Map EXTENSION_VALUE_SANITIZER_REVERSE_MAPPING = Map.ofEntries(\n+ entry(\"\\\\\\\\\", \"\\\\\"),\n+ entry(\"\\\\=\", \"=\"),\n+ entry(\"\\\\\\n\", \"\\n\"),\n+ entry(\"\\\\\\r\", \"\\r\")\n+ );\n+\n+ enum DataType {\n+ IntegerType,\n+ LongType,\n+ FloatType,\n+ DoubleType,\n+ StringType,\n+ BooleanType,\n+ IPType,\n+ MACAddressType,\n+ TimestampType\n+ }\n+\n+ private static final Map EXTENSION_MAPPINGS = Map.ofEntries(\n+ entry(\"agt\", new ExtensionMapping(\"agentAddress\", IPType, \"agent.ip\")),\n+ entry(\"agentDnsDomain\", new ExtensionMapping(\"agentDnsDomain\", StringType, \"agent.name\")),\n+ entry(\"ahost\", new ExtensionMapping(\"agentHostName\", StringType, \"agent.name\")),\n+ entry(\"aid\", new ExtensionMapping(\"agentId\", StringType, \"agent.id\")),\n+ entry(\"amac\", new ExtensionMapping(\"agentMacAddress\", MACAddressType, \"agent.mac\")),\n+ entry(\"agentNtDomain\", new ExtensionMapping(\"agentNtDomain\", StringType, null)),\n+ entry(\"art\", new ExtensionMapping(\"agentReceiptTime\", TimestampType, \"event.created\")),\n+ entry(\"atz\", new ExtensionMapping(\"agentTimeZone\", StringType, null)),\n+ entry(\"agentTranslatedAddress\", new ExtensionMapping(\"agentTranslatedAddress\", IPType, null)),\n+ entry(\"agentTranslatedZoneExternalID\", new ExtensionMapping(\"agentTranslatedZoneExternalID\", StringType, null)),\n+ entry(\"agentTranslatedZoneURI\", new ExtensionMapping(\"agentTranslatedZoneURI\", StringType, null)),\n+ entry(\"at\", new ExtensionMapping(\"agentType\", StringType, \"agent.type\")),\n+ entry(\"av\", new ExtensionMapping(\"agentVersion\", StringType, \"agent.version\")),\n+ entry(\"agentZoneExternalID\", new ExtensionMapping(\"agentZoneExternalID\", StringType, null)),\n+ entry(\"agentZoneURI\", new ExtensionMapping(\"agentZoneURI\", StringType, null)),\n+ entry(\"app\", new ExtensionMapping(\"applicationProtocol\", StringType, \"network.protocol\")),\n+ entry(\"cnt\", new ExtensionMapping(\"baseEventCount\", IntegerType, null)),\n+ entry(\"in\", new ExtensionMapping(\"bytesIn\", LongType, \"source.bytes\")),\n+ entry(\"out\", new ExtensionMapping(\"bytesOut\", LongType, \"destination.bytes\")),\n+ entry(\"customerExternalID\", new ExtensionMapping(\"customerExternalID\", StringType, \"organization.id\")),\n+ entry(\"customerURI\", new ExtensionMapping(\"customerURI\", StringType, \"organization.name\")),\n+ entry(\"dst\", new ExtensionMapping(\"destinationAddress\", IPType, \"destination.ip\")),\n+ entry(\"destinationDnsDomain\", new ExtensionMapping(\"destinationDnsDomain\", StringType, \"destination.registered_domain\")),\n+ entry(\"dlat\", new ExtensionMapping(\"destinationGeoLatitude\", DoubleType, \"destination.geo.location.lat\")),\n+ entry(\"dlong\", new ExtensionMapping(\"destinationGeoLongitude\", DoubleType, \"destination.geo.location.lon\")),\n+ entry(\"dhost\", new ExtensionMapping(\"destinationHostName\", StringType, \"destination.domain\")),\n+ entry(\"dmac\", new ExtensionMapping(\"destinationMacAddress\", MACAddressType, \"destination.mac\")),\n+ entry(\"dntdom\", new ExtensionMapping(\"destinationNtDomain\", StringType, \"destination.registered_domain\")),\n+ entry(\"dpt\", new ExtensionMapping(\"destinationPort\", IntegerType, \"destination.port\")),\n+ entry(\"dpid\", new ExtensionMapping(\"destinationProcessId\", LongType, \"destination.process.pid\")),\n+ entry(\"dproc\", new ExtensionMapping(\"destinationProcessName\", StringType, \"destination.process.name\")),\n+ entry(\"destinationServiceName\", new ExtensionMapping(\"destinationServiceName\", StringType, \"destination.service.name\")),\n+ entry(\"destinationTranslatedAddress\", new ExtensionMapping(\"destinationTranslatedAddress\", IPType, \"destination.nat.ip\")),\n+ entry(\"destinationTranslatedPort\", new ExtensionMapping(\"destinationTranslatedPort\", IntegerType, \"destination.nat.port\")),\n+ entry(\"destinationTranslatedZoneExternalID\", new ExtensionMapping(\"destinationTranslatedZoneExternalID\", StringType, null)),\n+ entry(\"destinationTranslatedZoneURI\", new ExtensionMapping(\"destinationTranslatedZoneURI\", StringType, null)),\n+ entry(\"duid\", new ExtensionMapping(\"destinationUserId\", StringType, \"destination.user.id\")),\n+ entry(\"duser\", new ExtensionMapping(\"destinationUserName\", StringType, \"destination.user.name\")),\n+ entry(\"dpriv\", new ExtensionMapping(\"destinationUserPrivileges\", StringType, \"destination.user.group.name\")),\n+ entry(\"destinationZoneExternalID\", new ExtensionMapping(\"destinationZoneExternalID\", StringType, null)),\n+ entry(\"destinationZoneURI\", new ExtensionMapping(\"destinationZoneURI\", StringType, null)),\n+ entry(\"act\", new ExtensionMapping(\"deviceAction\", StringType, \"event.action\")),\n+ entry(\"dvc\", new ExtensionMapping(\"deviceAddress\", IPType, \"observer.ip\")),\n+ entry(\"cfp1Label\", new ExtensionMapping(\"deviceCustomFloatingPoint1Label\", StringType, null)),\n+ entry(\"cfp3Label\", new ExtensionMapping(\"deviceCustomFloatingPoint3Label\", StringType, null)),\n+ entry(\"cfp4Label\", new ExtensionMapping(\"deviceCustomFloatingPoint4Label\", StringType, null)),\n+ entry(\"deviceCustomDate1\", new ExtensionMapping(\"deviceCustomDate1\", TimestampType, null)),\n+ entry(\"deviceCustomDate1Label\", new ExtensionMapping(\"deviceCustomDate1Label\", StringType, null)),\n+ entry(\"deviceCustomDate2\", new ExtensionMapping(\"deviceCustomDate2\", TimestampType, null)),\n+ entry(\"deviceCustomDate2Label\", new ExtensionMapping(\"deviceCustomDate2Label\", StringType, null)),\n+ entry(\"cfp1\", new ExtensionMapping(\"deviceCustomFloatingPoint1\", FloatType, null)),\n+ entry(\"cfp2\", new ExtensionMapping(\"deviceCustomFloatingPoint2\", FloatType, null)),\n+ entry(\"cfp2Label\", new ExtensionMapping(\"deviceCustomFloatingPoint2Label\", StringType, null)),\n+ entry(\"cfp3\", new ExtensionMapping(\"deviceCustomFloatingPoint3\", FloatType, null)),\n+ entry(\"cfp4\", new ExtensionMapping(\"deviceCustomFloatingPoint4\", FloatType, null)),\n+ entry(\"c6a1\", new ExtensionMapping(\"deviceCustomIPv6Address1\", IPType, null)),\n+ entry(\"c6a1Label\", new ExtensionMapping(\"deviceCustomIPv6Address1Label\", StringType, null)),\n+ entry(\"c6a2\", new ExtensionMapping(\"deviceCustomIPv6Address2\", IPType, null)),\n+ entry(\"c6a2Label\", new ExtensionMapping(\"deviceCustomIPv6Address2Label\", StringType, null)),\n+ entry(\"c6a3\", new ExtensionMapping(\"deviceCustomIPv6Address3\", IPType, null)),\n+ entry(\"c6a3Label\", new ExtensionMapping(\"deviceCustomIPv6Address3Label\", StringType, null)),\n+ entry(\"c6a4\", new ExtensionMapping(\"deviceCustomIPv6Address4\", IPType, null)),\n+ entry(\"C6a4Label\", new ExtensionMapping(\"deviceCustomIPv6Address4Label\", StringType, null)),\n+ entry(\"cn1\", new ExtensionMapping(\"deviceCustomNumber1\", LongType, null)),\n+ entry(\"cn1Label\", new ExtensionMapping(\"deviceCustomNumber1Label\", StringType, null)),\n+ entry(\"cn2\", new ExtensionMapping(\"deviceCustomNumber2\", LongType, null)),\n+ entry(\"cn2Label\", new ExtensionMapping(\"deviceCustomNumber2Label\", StringType, null)),\n+ entry(\"cn3\", new ExtensionMapping(\"deviceCustomNumber3\", LongType, null)),\n+ entry(\"cn3Label\", new ExtensionMapping(\"deviceCustomNumber3Label\", StringType, null)),\n+ entry(\"cs1\", new ExtensionMapping(\"deviceCustomString1\", StringType, null)),\n+ entry(\"cs1Label\", new ExtensionMapping(\"deviceCustomString1Label\", StringType, null)),\n+ entry(\"cs2\", new ExtensionMapping(\"deviceCustomString2\", StringType, null)),\n+ entry(\"cs2Label\", new ExtensionMapping(\"deviceCustomString2Label\", StringType, null)),\n+ entry(\"cs3\", new ExtensionMapping(\"deviceCustomString3\", StringType, null)),\n+ entry(\"cs3Label\", new ExtensionMapping(\"deviceCustomString3Label\", StringType, null)),\n+ entry(\"cs4\", new ExtensionMapping(\"deviceCustomString4\", StringType, null)),\n+ entry(\"cs4Label\", new ExtensionMapping(\"deviceCustomString4Label\", StringType, null)),\n+ entry(\"cs5\", new ExtensionMapping(\"deviceCustomString5\", StringType, null)),\n+ entry(\"cs5Label\", new ExtensionMapping(\"deviceCustomString5Label\", StringType, null)),\n+ entry(\"cs6\", new ExtensionMapping(\"deviceCustomString6\", StringType, null)),\n+ entry(\"cs6Label\", new ExtensionMapping(\"deviceCustomString6Label\", StringType, null)),\n+ entry(\"deviceDirection\", new ExtensionMapping(\"deviceDirection\", StringType, \"network.direction\")),\n+ entry(\"deviceDnsDomain\", new ExtensionMapping(\"deviceDnsDomain\", StringType, \"observer.registered_domain\")),\n+ entry(\"cat\", new ExtensionMapping(\"deviceEventCategory\", StringType, null)),\n+ entry(\"deviceExternalId\", new ExtensionMapping(\"deviceExternalId\", StringType, \"observer.name\")),\n+ entry(\"deviceFacility\", new ExtensionMapping(\"deviceFacility\", LongType, \"log.syslog.facility.code\")),\n+ entry(\"dvchost\", new ExtensionMapping(\"deviceHostName\", StringType, \"observer.hostname\")),\n+ entry(\"deviceInboundInterface\", new ExtensionMapping(\"deviceInboundInterface\", StringType, \"observer.ingress.interface.name\")),\n+ entry(\"dvcmac\", new ExtensionMapping(\"deviceMacAddress\", MACAddressType, \"observer.mac\")),\n+ entry(\"deviceNtDomain\", new ExtensionMapping(\"deviceNtDomain\", StringType, null)),\n+ entry(\"deviceOutboundInterface\", new ExtensionMapping(\"deviceOutboundInterface\", StringType, \"observer.egress.interface.name\")),\n+ entry(\"devicePayloadId\", new ExtensionMapping(\"devicePayloadId\", StringType, \"event.id\")),\n+ entry(\"dvcpid\", new ExtensionMapping(\"deviceProcessId\", LongType, \"process.pid\")),\n+ entry(\"deviceProcessName\", new ExtensionMapping(\"deviceProcessName\", StringType, \"process.name\")),\n+ entry(\"rt\", new ExtensionMapping(\"deviceReceiptTime\", TimestampType, \"@timestamp\")),\n+ entry(\"dtz\", new ExtensionMapping(\"deviceTimeZone\", StringType, \"event.timezone\")),\n+ entry(\"deviceTranslatedAddress\", new ExtensionMapping(\"deviceTranslatedAddress\", IPType, \"host.nat.ip\")),\n+ entry(\"deviceTranslatedZoneExternalID\", new ExtensionMapping(\"deviceTranslatedZoneExternalID\", StringType, null)),\n+ entry(\"deviceTranslatedZoneURI\", new ExtensionMapping(\"deviceTranslatedZoneURI\", StringType, null)),\n+ entry(\"deviceZoneExternalID\", new ExtensionMapping(\"deviceZoneExternalID\", StringType, null)),\n+ entry(\"deviceZoneURI\", new ExtensionMapping(\"deviceZoneURI\", StringType, null)),\n+ entry(\"end\", new ExtensionMapping(\"endTime\", TimestampType, \"event.end\")),\n+ entry(\"eventId\", new ExtensionMapping(\"eventId\", StringType, \"event.id\")),\n+ entry(\"outcome\", new ExtensionMapping(\"eventOutcome\", StringType, \"event.outcome\")),\n+ entry(\"externalId\", new ExtensionMapping(\"externalId\", StringType, null)),\n+ entry(\"fileCreateTime\", new ExtensionMapping(\"fileCreateTime\", TimestampType, \"file.created\")),\n+ entry(\"fileHash\", new ExtensionMapping(\"fileHash\", StringType, \"file.hash\")),\n+ entry(\"fileId\", new ExtensionMapping(\"fileId\", StringType, \"file.inode\")),\n+ entry(\"fileModificationTime\", new ExtensionMapping(\"fileModificationTime\", TimestampType, \"file.mtime\")),\n+ entry(\"flexNumber1\", new ExtensionMapping(\"deviceFlexNumber1\", LongType, null)),\n+ entry(\"flexNumber1Label\", new ExtensionMapping(\"deviceFlexNumber1Label\", StringType, null)),\n+ entry(\"flexNumber2\", new ExtensionMapping(\"deviceFlexNumber2\", LongType, null)),\n+ entry(\"flexNumber2Label\", new ExtensionMapping(\"deviceFlexNumber2Label\", StringType, null)),\n+ entry(\"fname\", new ExtensionMapping(\"filename\", StringType, \"file.name\")),\n+ entry(\"filePath\", new ExtensionMapping(\"filePath\", StringType, \"file.path\")),\n+ entry(\"filePermission\", new ExtensionMapping(\"filePermission\", StringType, \"file.group\")),\n+ entry(\"fsize\", new ExtensionMapping(\"fileSize\", LongType, \"file.size\")),\n+ entry(\"fileType\", new ExtensionMapping(\"fileType\", StringType, \"file.type\")),\n+ entry(\"flexDate1\", new ExtensionMapping(\"flexDate1\", TimestampType, null)),\n+ entry(\"flexDate1Label\", new ExtensionMapping(\"flexDate1Label\", StringType, null)),\n+ entry(\"flexString1\", new ExtensionMapping(\"flexString1\", StringType, null)),\n+ entry(\"flexString2\", new ExtensionMapping(\"flexString2\", StringType, null)),\n+ entry(\"flexString1Label\", new ExtensionMapping(\"flexString1Label\", StringType, null)),\n+ entry(\"flexString2Label\", new ExtensionMapping(\"flexString2Label\", StringType, null)),\n+ entry(\"msg\", new ExtensionMapping(\"message\", StringType, \"message\")),\n+ entry(\"oldFileCreateTime\", new ExtensionMapping(\"oldFileCreateTime\", TimestampType, null)),\n+ entry(\"oldFileHash\", new ExtensionMapping(\"oldFileHash\", StringType, null)),\n+ entry(\"oldFileId\", new ExtensionMapping(\"oldFileId\", StringType, null)),\n+ entry(\"oldFileModificationTime\", new ExtensionMapping(\"oldFileModificationTime\", TimestampType, null)),\n+ entry(\"oldFileName\", new ExtensionMapping(\"oldFileName\", StringType, null)),\n+ entry(\"oldFilePath\", new ExtensionMapping(\"oldFilePath\", StringType, null)),\n+ entry(\"oldFilePermission\", new ExtensionMapping(\"oldFilePermission\", StringType, null)),\n+ entry(\"oldFileSize\", new ExtensionMapping(\"oldFileSize\", IntegerType, null)),\n+ entry(\"oldFileType\", new ExtensionMapping(\"oldFileType\", StringType, null)),\n+ entry(\"rawEvent\", new ExtensionMapping(\"rawEvent\", StringType, \"event.original\")),\n+ entry(\"reason\", new ExtensionMapping(\"Reason\", StringType, \"event.reason\")),\n+ entry(\"requestClientApplication\", new ExtensionMapping(\"requestClientApplication\", StringType, \"user_agent.original\")),\n+ entry(\"requestContext\", new ExtensionMapping(\"requestContext\", StringType, \"http.request.referrer\")),\n+ entry(\"requestCookies\", new ExtensionMapping(\"requestCookies\", StringType, null)),\n+ entry(\"requestMethod\", new ExtensionMapping(\"requestMethod\", StringType, \"http.request.method\")),\n+ entry(\"request\", new ExtensionMapping(\"requestUrl\", StringType, \"url.original\")),\n+ entry(\"src\", new ExtensionMapping(\"sourceAddress\", IPType, \"source.ip\")),\n+ entry(\"sourceDnsDomain\", new ExtensionMapping(\"sourceDnsDomain\", StringType, \"source.domain\")),\n+ entry(\"slat\", new ExtensionMapping(\"sourceGeoLatitude\", DoubleType, \"source.geo.location.lat\")),\n+ entry(\"slong\", new ExtensionMapping(\"sourceGeoLongitude\", DoubleType, \"source.geo.location.lon\")),\n+ entry(\"shost\", new ExtensionMapping(\"sourceHostName\", StringType, \"source.domain\")),\n+ entry(\"smac\", new ExtensionMapping(\"sourceMacAddress\", MACAddressType, \"source.mac\")),\n+ entry(\"sntdom\", new ExtensionMapping(\"sourceNtDomain\", StringType, \"source.registered_domain\")),\n+ entry(\"spt\", new ExtensionMapping(\"sourcePort\", IntegerType, \"source.port\")),\n+ entry(\"spid\", new ExtensionMapping(\"sourceProcessId\", LongType, \"source.process.pid\")),\n+ entry(\"sproc\", new ExtensionMapping(\"sourceProcessName\", StringType, \"source.process.name\")),\n+ entry(\"sourceServiceName\", new ExtensionMapping(\"sourceServiceName\", StringType, \"source.service.name\")),\n+ entry(\"sourceTranslatedAddress\", new ExtensionMapping(\"sourceTranslatedAddress\", IPType, \"source.nat.ip\")),\n+ entry(\"sourceTranslatedPort\", new ExtensionMapping(\"sourceTranslatedPort\", IntegerType, \"source.nat.port\")),\n+ entry(\"sourceTranslatedZoneExternalID\", new ExtensionMapping(\"sourceTranslatedZoneExternalID\", StringType, null)),\n+ entry(\"sourceTranslatedZoneURI\", new ExtensionMapping(\"sourceTranslatedZoneURI\", StringType, null)),\n+ entry(\"suid\", new ExtensionMapping(\"sourceUserId\", StringType, \"source.user.id\")),\n+ entry(\"suser\", new ExtensionMapping(\"sourceUserName\", StringType, \"source.user.name\")),\n+ entry(\"spriv\", new ExtensionMapping(\"sourceUserPrivileges\", StringType, \"source.user.group.name\")),\n+ entry(\"sourceZoneExternalID\", new ExtensionMapping(\"sourceZoneExternalID\", StringType, null)),\n+ entry(\"sourceZoneURI\", new ExtensionMapping(\"sourceZoneURI\", StringType, null)),\n+ entry(\"start\", new ExtensionMapping(\"startTime\", TimestampType, \"event.start\")),\n+ entry(\"proto\", new ExtensionMapping(\"transportProtocol\", StringType, \"network.transport\")),\n+ entry(\"type\", new ExtensionMapping(\"type\", IntegerType, \"event.kind\")),\n+ entry(\"catdt\", new ExtensionMapping(\"categoryDeviceType\", StringType, null)),\n+ entry(\"mrt\", new ExtensionMapping(\"managerReceiptTime\", TimestampType, \"event.ingested\"))\n+ );\n+\n+ private static final Set ERROR_MESSAGE_INCOMPLETE_CEF_HEADER = Set.of(\"incomplete CEF header\");\n+ private static final List TIME_LAYOUTS = List.of(\n+ // MMM dd HH:mm:ss.SSS zzz\n+ \"MMM dd HH:mm:ss.SSS z\",\n+ \"MMM dd HH:mm:ss.SSS Z\",\n+ \"MMM dd HH:mm:ss.SSS 'GMT'XX:XX\",\n+ // MMM dd HH:mm:sss.SSS\n+ \"MMM dd HH:mm:ss.SSS\",\n+ // MMM dd HH:mm:ss zzz\n+ \"MMM dd HH:mm:ss z\",\n+ \"MMM dd HH:mm:ss Z\",\n+ \"MMM dd HH:mm:ss 'GMT'XX:XX\",\n+ // MMM dd HH:mm:ss\n+ \"MMM dd HH:mm:ss\",\n+ // MMM dd yyyy HH:mm:ss.SSS zzz\n+ \"MMM dd yyyy HH:mm:ss.SSS z\",\n+ \"MMM dd yyyy HH:mm:ss.SSS Z\",\n+ \"MMM dd yyyy HH:mm:ss.SSS 'GMT'XX:XX\",\n+ // MMM dd yyyy HH:mm:ss.SSS\n+ \"MMM dd yyyy HH:mm:ss.SSS\",\n+ // MMM dd yyyy HH:mm:ss zzz\n+ \"MMM dd yyyy HH:mm:ss z\",\n+ \"MMM dd yyyy HH:mm:ss Z\",\n+ \"MMM dd yyyy HH:mm:ss 'GMT'XX:XX\",\n+ // MMM dd yyyy HH:mm:ss\n+ \"MMM dd yyyy HH:mm:ss\"\n+ );\n+\n+ private static final List CHRONO_FIELDS = List.of(\n+ NANO_OF_SECOND,\n+ SECOND_OF_DAY,\n+ MINUTE_OF_DAY,\n+ HOUR_OF_DAY,\n+ DAY_OF_MONTH,\n+ MONTH_OF_YEAR\n+ );\n+\n+ CefEvent process(String cefString) {\n+ List headers = new ArrayList<>();\n+ Matcher matcher = HEADER_NEXT_FIELD_PATTERN.matcher(cefString);\n+ int extensionStart = 0;\n+\n+ for (int i = 0; i < 7 && matcher.find(); i++) {\n+ String field = matcher.group(1);\n+ field = HEADER_ESCAPE_CAPTURE.matcher(field).replaceAll(\"$1\");\n+ headers.add(field);\n+ extensionStart = matcher.end();\n+ }\n+\n+ if (headers.isEmpty() == false && headers.getFirst().startsWith(\"CEF:\")) {\n+ CefEvent event = new CefEvent();\n+ // Add error message if there are not enough header fields\n+ if (headers.size() != 7) {\n+ event.addRootMapping(\"error.message\", new HashSet<>(ERROR_MESSAGE_INCOMPLETE_CEF_HEADER));", - "comment_created_at": "2025-04-09T07:27:13+00:00", - "comment_author": "bhapas", - "comment_body": "I followed beats approach for this initially. But I agree that this should just throw exception just like other scenarios. - Fixed in https://github.com/elastic/elasticsearch/pull/122491/commits/ee969db097a7d4dfb50e014975ec524d5dc478af", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1199069767", - "pr_number": 95941, - "pr_file": "server/src/main/java/org/elasticsearch/action/support/TimeoutReleasableListener.java", - "created_at": "2023-05-19T15:03:10+00:00", - "commented_code": "/*\n * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one\n * or more contributor license agreements. Licensed under the Elastic License\n * 2.0 and the Server Side Public License, v 1; you may not use this file except\n * in compliance with, at your election, the Elastic License 2.0 or the Server\n * Side Public License, v 1.\n */\n\npackage org.elasticsearch.action.support;\n\nimport org.elasticsearch.ElasticsearchTimeoutException;\nimport org.elasticsearch.action.ActionListener;\nimport org.elasticsearch.core.Releasable;\nimport org.elasticsearch.core.Releasables;\nimport org.elasticsearch.core.TimeValue;\nimport org.elasticsearch.threadpool.ThreadPool;\n\nimport java.util.Objects;\nimport java.util.concurrent.atomic.AtomicReference;\nimport java.util.function.Supplier;\n\n/**\n * A listener for a {@link Releasable} which races against a timeout, but which does not leak any {@link Releasable} with which it is\n * eventually completed.\n */\npublic class TimeoutReleasableListener implements ActionListener {\n\n private final Supplier> listenerSupplier;\n private final Runnable timeoutCanceller;\n\n public static TimeoutReleasableListener create(\n ActionListener delegate,\n TimeValue timeout,\n ThreadPool threadPool,\n String timeoutExecutor\n ) {\n final var listenerSupplier = readOnce(delegate);\n return new TimeoutReleasableListener(listenerSupplier, scheduleTimeout(timeout, threadPool, timeoutExecutor, listenerSupplier));\n }\n\n private static Supplier> readOnce(ActionListener listener) {\n final var listenerRef = new AtomicReference<>(ActionListener.assertOnce(listener));\n return new Supplier<>() {\n @Override\n public ActionListener get() {\n return listenerRef.getAndSet(null);\n }\n\n @Override\n public String toString() {\n return Objects.toString(listenerRef.get());\n }\n };\n }\n\n private static Runnable scheduleTimeout(\n TimeValue timeout,\n ThreadPool threadPool,\n String timeoutExecutor,\n Supplier> listenerSupplier\n ) {\n try {\n final var cancellable = threadPool.schedule(() -> {\n final var listener = listenerSupplier.get();\n if (listener != null) {\n listener.onFailure(new ElasticsearchTimeoutException(\"timed out after [\" + timeout + \"/\" + timeout.millis() + \"ms]\"));\n }\n }, timeout, timeoutExecutor);\n return () -> {\n try {\n cancellable.cancel();\n } catch (Exception e) {\n // should not happen, we cannot reasonably do anything with an exception here anyway\n assert false : e;\n }\n };\n } catch (Exception e) {\n listenerSupplier.get().onFailure(e);\n return () -> {};", - "repo_full_name": "elastic/elasticsearch", - "discussion_comments": [ - { - "comment_id": "1199069767", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 95941, - "pr_file": "server/src/main/java/org/elasticsearch/action/support/TimeoutReleasableListener.java", - "discussion_id": "1199069767", - "commented_code": "@@ -0,0 +1,112 @@\n+/*\n+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one\n+ * or more contributor license agreements. Licensed under the Elastic License\n+ * 2.0 and the Server Side Public License, v 1; you may not use this file except\n+ * in compliance with, at your election, the Elastic License 2.0 or the Server\n+ * Side Public License, v 1.\n+ */\n+\n+package org.elasticsearch.action.support;\n+\n+import org.elasticsearch.ElasticsearchTimeoutException;\n+import org.elasticsearch.action.ActionListener;\n+import org.elasticsearch.core.Releasable;\n+import org.elasticsearch.core.Releasables;\n+import org.elasticsearch.core.TimeValue;\n+import org.elasticsearch.threadpool.ThreadPool;\n+\n+import java.util.Objects;\n+import java.util.concurrent.atomic.AtomicReference;\n+import java.util.function.Supplier;\n+\n+/**\n+ * A listener for a {@link Releasable} which races against a timeout, but which does not leak any {@link Releasable} with which it is\n+ * eventually completed.\n+ */\n+public class TimeoutReleasableListener implements ActionListener {\n+\n+ private final Supplier> listenerSupplier;\n+ private final Runnable timeoutCanceller;\n+\n+ public static TimeoutReleasableListener create(\n+ ActionListener delegate,\n+ TimeValue timeout,\n+ ThreadPool threadPool,\n+ String timeoutExecutor\n+ ) {\n+ final var listenerSupplier = readOnce(delegate);\n+ return new TimeoutReleasableListener(listenerSupplier, scheduleTimeout(timeout, threadPool, timeoutExecutor, listenerSupplier));\n+ }\n+\n+ private static Supplier> readOnce(ActionListener listener) {\n+ final var listenerRef = new AtomicReference<>(ActionListener.assertOnce(listener));\n+ return new Supplier<>() {\n+ @Override\n+ public ActionListener get() {\n+ return listenerRef.getAndSet(null);\n+ }\n+\n+ @Override\n+ public String toString() {\n+ return Objects.toString(listenerRef.get());\n+ }\n+ };\n+ }\n+\n+ private static Runnable scheduleTimeout(\n+ TimeValue timeout,\n+ ThreadPool threadPool,\n+ String timeoutExecutor,\n+ Supplier> listenerSupplier\n+ ) {\n+ try {\n+ final var cancellable = threadPool.schedule(() -> {\n+ final var listener = listenerSupplier.get();\n+ if (listener != null) {\n+ listener.onFailure(new ElasticsearchTimeoutException(\"timed out after [\" + timeout + \"/\" + timeout.millis() + \"ms]\"));\n+ }\n+ }, timeout, timeoutExecutor);\n+ return () -> {\n+ try {\n+ cancellable.cancel();\n+ } catch (Exception e) {\n+ // should not happen, we cannot reasonably do anything with an exception here anyway\n+ assert false : e;\n+ }\n+ };\n+ } catch (Exception e) {\n+ listenerSupplier.get().onFailure(e);\n+ return () -> {};", - "comment_created_at": "2023-05-19T15:03:10+00:00", - "comment_author": "henningandersen", - "comment_body": "I wonder about this. I suppose it only happens during shutdown? Can we perhaps assert that?\r\n\r\nIn case we do get here, it seems odd that we try to block operations afterwards. Probably not a big deal if it only happens on shutdown.\r\n\r\nTo some extent we could just throw here? Or change `TimeoutReleasableListener.create` into a `run` method accepting the code to run under timeout (a `Consumer listener\n ) {\n PhysicalPlan physicalPlan = logicalPlanToPhysicalPlan(optimizedPlan, request);\n // TODO: this could be snuck into the underlying listener\n EsqlCCSUtils.updateExecutionInfoAtEndOfPlanning(executionInfo);\n // execute any potential subplans\n executeSubPlans(physicalPlan, planRunner, executionInfo, request, listener);\n executeSubPlans(optimizedPlan, planRunner, executionInfo, request, listener);\n }\n\n private record PlanTuple(PhysicalPlan physical, LogicalPlan logical) {}\n private record LogicalPlanTuple(LogicalPlan nonStubbedSubPlan, LogicalPlan originalSubPlan) {}\n\n private void executeSubPlans(\n PhysicalPlan physicalPlan,\n LogicalPlan optimizedPlan,\n PlanRunner runner,\n EsqlExecutionInfo executionInfo,\n EsqlQueryRequest request,\n ActionListener listener\n ) {\n List subplans = new ArrayList<>();\n\n // Currently the inlinestats are limited and supported as streaming operators, thus present inside the fragment as logical plans\n // Below they get collected, translated into a separate, coordinator based plan and the results 'broadcasted' as a local relation\n physicalPlan.forEachUp(FragmentExec.class, f -> {\n f.fragment().forEachUp(InlineJoin.class, ij -> {\n // extract the right side of the plan and replace its source\n LogicalPlan subplan = InlineJoin.replaceStub(ij.left(), ij.right());\n // mark the new root node as optimized\n subplan.setOptimized();\n PhysicalPlan subqueryPlan = logicalPlanToPhysicalPlan(subplan, request);\n subplans.add(new PlanTuple(subqueryPlan, ij.right()));\n });\n });\n\n Iterator iterator = subplans.iterator();\n var subPlan = firstSubPlan(optimizedPlan);\n\n // TODO: merge into one method\n if (subplans.size() > 0) {\n if (subPlan != null) {\n // code-path to execute subplans\n executeSubPlan(new DriverCompletionInfo.Accumulator(), physicalPlan, iterator, executionInfo, runner, listener);\n executeSubPlan(new DriverCompletionInfo.Accumulator(), optimizedPlan, subPlan, executionInfo, runner, request, listener);\n } else {\n PhysicalPlan physicalPlan = logicalPlanToPhysicalPlan(optimizedPlan, request);\n // execute main plan\n runner.run(physicalPlan, listener);\n }\n }\n\n private LogicalPlanTuple firstSubPlan(LogicalPlan optimizedPlan) {\n Holder subPlan = new Holder<>();\n // Collect the first inlinejoin (bottom up in the tree)\n optimizedPlan.forEachUp(InlineJoin.class, ij -> {\n // extract the right side of the plan and replace its source\n if (subPlan.get() == null && ij.right().anyMatch(p -> p instanceof StubRelation)) {\n var p = InlineJoin.replaceStub(ij.left(), ij.right());\n p.setOptimized();\n subPlan.set(new LogicalPlanTuple(p, ij.right()));\n }\n });\n return subPlan.get();\n }\n\n private void executeSubPlan(\n DriverCompletionInfo.Accumulator completionInfoAccumulator,\n PhysicalPlan plan,\n Iterator subPlanIterator,\n LogicalPlan optimizedPlan,\n LogicalPlanTuple subPlans,\n EsqlExecutionInfo executionInfo,\n PlanRunner runner,\n EsqlQueryRequest request,\n ActionListener listener\n ) {\n PlanTuple tuple = subPlanIterator.next();\n // Create a physical plan out of the logical sub-plan\n var physicalSubPlan = logicalPlanToPhysicalPlan(subPlans.nonStubbedSubPlan, request);\n\n runner.run(tuple.physical, listener.delegateFailureAndWrap((next, result) -> {\n runner.run(physicalSubPlan, listener.delegateFailureAndWrap((next, result) -> {\n try {\n // Translate the subquery into a separate, coordinator based plan and the results 'broadcasted' as a local relation\n completionInfoAccumulator.accumulate(result.completionInfo());\n LocalRelation resultWrapper = resultToPlan(tuple.logical, result);\n LocalRelation resultWrapper = resultToPlan(subPlans.nonStubbedSubPlan, result);", + "repo_full_name": "elastic/elasticsearch", + "discussion_comments": [ + { + "comment_id": "2166743201", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 128917, + "pr_file": "x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/EsqlSession.java", + "discussion_id": "2166743201", + "commented_code": "@@ -207,85 +206,85 @@ public void executeOptimizedPlan(\n LogicalPlan optimizedPlan,\n ActionListener listener\n ) {\n- PhysicalPlan physicalPlan = logicalPlanToPhysicalPlan(optimizedPlan, request);\n // TODO: this could be snuck into the underlying listener\n EsqlCCSUtils.updateExecutionInfoAtEndOfPlanning(executionInfo);\n // execute any potential subplans\n- executeSubPlans(physicalPlan, planRunner, executionInfo, request, listener);\n+ executeSubPlans(optimizedPlan, planRunner, executionInfo, request, listener);\n }\n \n- private record PlanTuple(PhysicalPlan physical, LogicalPlan logical) {}\n+ private record LogicalPlanTuple(LogicalPlan nonStubbedSubPlan, LogicalPlan originalSubPlan) {}\n \n private void executeSubPlans(\n- PhysicalPlan physicalPlan,\n+ LogicalPlan optimizedPlan,\n PlanRunner runner,\n EsqlExecutionInfo executionInfo,\n EsqlQueryRequest request,\n ActionListener listener\n ) {\n- List subplans = new ArrayList<>();\n-\n- // Currently the inlinestats are limited and supported as streaming operators, thus present inside the fragment as logical plans\n- // Below they get collected, translated into a separate, coordinator based plan and the results 'broadcasted' as a local relation\n- physicalPlan.forEachUp(FragmentExec.class, f -> {\n- f.fragment().forEachUp(InlineJoin.class, ij -> {\n- // extract the right side of the plan and replace its source\n- LogicalPlan subplan = InlineJoin.replaceStub(ij.left(), ij.right());\n- // mark the new root node as optimized\n- subplan.setOptimized();\n- PhysicalPlan subqueryPlan = logicalPlanToPhysicalPlan(subplan, request);\n- subplans.add(new PlanTuple(subqueryPlan, ij.right()));\n- });\n- });\n-\n- Iterator iterator = subplans.iterator();\n+ var subPlan = firstSubPlan(optimizedPlan);\n \n // TODO: merge into one method\n- if (subplans.size() > 0) {\n+ if (subPlan != null) {\n // code-path to execute subplans\n- executeSubPlan(new DriverCompletionInfo.Accumulator(), physicalPlan, iterator, executionInfo, runner, listener);\n+ executeSubPlan(new DriverCompletionInfo.Accumulator(), optimizedPlan, subPlan, executionInfo, runner, request, listener);\n } else {\n+ PhysicalPlan physicalPlan = logicalPlanToPhysicalPlan(optimizedPlan, request);\n // execute main plan\n runner.run(physicalPlan, listener);\n }\n }\n \n+ private LogicalPlanTuple firstSubPlan(LogicalPlan optimizedPlan) {\n+ Holder subPlan = new Holder<>();\n+ // Collect the first inlinejoin (bottom up in the tree)\n+ optimizedPlan.forEachUp(InlineJoin.class, ij -> {\n+ // extract the right side of the plan and replace its source\n+ if (subPlan.get() == null && ij.right().anyMatch(p -> p instanceof StubRelation)) {\n+ var p = InlineJoin.replaceStub(ij.left(), ij.right());\n+ p.setOptimized();\n+ subPlan.set(new LogicalPlanTuple(p, ij.right()));\n+ }\n+ });\n+ return subPlan.get();\n+ }\n+\n private void executeSubPlan(\n DriverCompletionInfo.Accumulator completionInfoAccumulator,\n- PhysicalPlan plan,\n- Iterator subPlanIterator,\n+ LogicalPlan optimizedPlan,\n+ LogicalPlanTuple subPlans,\n EsqlExecutionInfo executionInfo,\n PlanRunner runner,\n+ EsqlQueryRequest request,\n ActionListener listener\n ) {\n- PlanTuple tuple = subPlanIterator.next();\n+ // Create a physical plan out of the logical sub-plan\n+ var physicalSubPlan = logicalPlanToPhysicalPlan(subPlans.nonStubbedSubPlan, request);\n \n- runner.run(tuple.physical, listener.delegateFailureAndWrap((next, result) -> {\n+ runner.run(physicalSubPlan, listener.delegateFailureAndWrap((next, result) -> {\n try {\n+ // Translate the subquery into a separate, coordinator based plan and the results 'broadcasted' as a local relation\n completionInfoAccumulator.accumulate(result.completionInfo());\n- LocalRelation resultWrapper = resultToPlan(tuple.logical, result);\n+ LocalRelation resultWrapper = resultToPlan(subPlans.nonStubbedSubPlan, result);", + "comment_created_at": "2025-06-25T13:37:18+00:00", + "comment_author": "bpintea", + "comment_body": "Not related to this change, but: `resultToPlan` could be made static as well.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2167048244", + "pr_number": 128917, + "pr_file": "x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/local/CopyingLocalSupplier.java", + "created_at": "2025-06-25T15:45:27+00:00", + "commented_code": "/*\n * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one\n * or more contributor license agreements. Licensed under the Elastic License\n * 2.0; you may not use this file except in compliance with the Elastic License\n * 2.0.\n */\n\npackage org.elasticsearch.xpack.esql.plan.logical.local;\n\nimport org.elasticsearch.compute.data.Block;\nimport org.elasticsearch.compute.data.BlockUtils;\nimport org.elasticsearch.xpack.esql.planner.PlannerUtils;\n\n/**\n * A {@link LocalSupplier} that allways creates a new copy of the {@link Block}s initially provided at creation time.\n */\npublic class CopyingLocalSupplier extends ImmediateLocalSupplier {\n\n public CopyingLocalSupplier(Block[] blocks) {\n super(blocks);\n }\n\n @Override\n public Block[] get() {", + "repo_full_name": "elastic/elasticsearch", + "discussion_comments": [ + { + "comment_id": "2167048244", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 128917, + "pr_file": "x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/local/CopyingLocalSupplier.java", + "discussion_id": "2167048244", + "commented_code": "@@ -0,0 +1,31 @@\n+/*\n+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one\n+ * or more contributor license agreements. Licensed under the Elastic License\n+ * 2.0; you may not use this file except in compliance with the Elastic License\n+ * 2.0.\n+ */\n+\n+package org.elasticsearch.xpack.esql.plan.logical.local;\n+\n+import org.elasticsearch.compute.data.Block;\n+import org.elasticsearch.compute.data.BlockUtils;\n+import org.elasticsearch.xpack.esql.planner.PlannerUtils;\n+\n+/**\n+ * A {@link LocalSupplier} that allways creates a new copy of the {@link Block}s initially provided at creation time.\n+ */\n+public class CopyingLocalSupplier extends ImmediateLocalSupplier {\n+\n+ public CopyingLocalSupplier(Block[] blocks) {\n+ super(blocks);\n+ }\n+\n+ @Override\n+ public Block[] get() {", + "comment_created_at": "2025-06-25T15:45:27+00:00", + "comment_author": "bpintea", + "comment_body": "Should this method be made a util in `BlockUtils`? There's a few possible uses of one, as called by `BlockUtils#deepCopyOf`. Then you could use a plain lambda supplier directly in `ReplaceRowAsLocalRelation` when building the `LocalRelation` (and wouldn't need this extra supplier).", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2169174964", + "pr_number": 128917, + "pr_file": "x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/EsqlSession.java", + "created_at": "2025-06-26T14:11:34+00:00", + "commented_code": "LogicalPlan optimizedPlan,\n ActionListener listener\n ) {\n PhysicalPlan physicalPlan = logicalPlanToPhysicalPlan(optimizedPlan, request);\n if (explainMode) {", + "repo_full_name": "elastic/elasticsearch", + "discussion_comments": [ + { + "comment_id": "2169174964", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 128917, + "pr_file": "x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/EsqlSession.java", + "discussion_id": "2169174964", + "commented_code": "@@ -217,8 +217,8 @@ public void executeOptimizedPlan(\n LogicalPlan optimizedPlan,\n ActionListener listener\n ) {\n- PhysicalPlan physicalPlan = logicalPlanToPhysicalPlan(optimizedPlan, request);\n if (explainMode) {", + "comment_created_at": "2025-06-26T14:11:34+00:00", + "comment_author": "bpintea", + "comment_body": "Nit: would it be worth extracting this case into an own method?", + "pr_file_module": null + }, + { + "comment_id": "2172308567", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 128917, + "pr_file": "x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/EsqlSession.java", + "discussion_id": "2169174964", + "commented_code": "@@ -217,8 +217,8 @@ public void executeOptimizedPlan(\n LogicalPlan optimizedPlan,\n ActionListener listener\n ) {\n- PhysicalPlan physicalPlan = logicalPlanToPhysicalPlan(optimizedPlan, request);\n if (explainMode) {", + "comment_created_at": "2025-06-27T15:41:11+00:00", + "comment_author": "astefan", + "comment_body": "I've added a TODO to revisit this. `explain` in EsqlSession needs a second look, some things do not feel like they belong here.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2180043409", + "pr_number": 129902, + "pr_file": "server/src/main/java/org/elasticsearch/action/termvectors/TransportEnsureDocsSearchableAction.java", + "created_at": "2025-07-02T13:17:00+00:00", + "commented_code": "/*\n * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one\n * or more contributor license agreements. Licensed under the \"Elastic License\n * 2.0\", the \"GNU Affero General Public License v3.0 only\", and the \"Server Side\n * Public License v 1\"; you may not use this file except in compliance with, at\n * your election, the \"Elastic License 2.0\", the \"GNU Affero General Public\n * License v3.0 only\", or the \"Server Side Public License, v 1\".\n *\n * This file was contributed to by generative AI\n */\n\npackage org.elasticsearch.action.termvectors;\n\nimport org.apache.logging.log4j.LogManager;\nimport org.apache.logging.log4j.Logger;\nimport org.elasticsearch.ElasticsearchException;\nimport org.elasticsearch.action.ActionListener;\nimport org.elasticsearch.action.ActionRequestValidationException;\nimport org.elasticsearch.action.ActionResponse;\nimport org.elasticsearch.action.ActionType;\nimport org.elasticsearch.action.NoShardAvailableActionException;\nimport org.elasticsearch.action.admin.indices.refresh.TransportShardRefreshAction;\nimport org.elasticsearch.action.support.ActionFilters;\nimport org.elasticsearch.action.support.ActiveShardCount;\nimport org.elasticsearch.action.support.replication.BasicReplicationRequest;\nimport org.elasticsearch.action.support.single.shard.SingleShardRequest;\nimport org.elasticsearch.action.support.single.shard.TransportSingleShardAction;\nimport org.elasticsearch.client.internal.node.NodeClient;\nimport org.elasticsearch.cluster.ProjectState;\nimport org.elasticsearch.cluster.metadata.IndexNameExpressionResolver;\nimport org.elasticsearch.cluster.node.DiscoveryNode;\nimport org.elasticsearch.cluster.node.DiscoveryNodeRole;\nimport org.elasticsearch.cluster.project.ProjectResolver;\nimport org.elasticsearch.cluster.routing.ShardIterator;\nimport org.elasticsearch.cluster.service.ClusterService;\nimport org.elasticsearch.common.io.stream.StreamInput;\nimport org.elasticsearch.common.io.stream.StreamOutput;\nimport org.elasticsearch.common.io.stream.Writeable;\nimport org.elasticsearch.index.IndexService;\nimport org.elasticsearch.index.mapper.Uid;\nimport org.elasticsearch.index.shard.IndexShard;\nimport org.elasticsearch.index.shard.ShardId;\nimport org.elasticsearch.indices.IndicesService;\nimport org.elasticsearch.injection.guice.Inject;\nimport org.elasticsearch.threadpool.ThreadPool;\nimport org.elasticsearch.transport.TransportService;\n\nimport java.io.IOException;\nimport java.util.List;\n\n/**\n * This action is used in serverless to ensure that documents are searchable on the search tier before processing\n * term vector requests. It is an intermediate action that is executed on the indexing node and responds\n * with a no-op (the search node can proceed to process the term vector request). The action may trigger an external refresh\n * to ensure the search shards are up to date before returning the no-op.\n */\npublic class TransportEnsureDocsSearchableAction extends TransportSingleShardAction<", + "repo_full_name": "elastic/elasticsearch", + "discussion_comments": [ + { + "comment_id": "2180043409", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 129902, + "pr_file": "server/src/main/java/org/elasticsearch/action/termvectors/TransportEnsureDocsSearchableAction.java", + "discussion_id": "2180043409", + "commented_code": "@@ -0,0 +1,224 @@\n+/*\n+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one\n+ * or more contributor license agreements. Licensed under the \"Elastic License\n+ * 2.0\", the \"GNU Affero General Public License v3.0 only\", and the \"Server Side\n+ * Public License v 1\"; you may not use this file except in compliance with, at\n+ * your election, the \"Elastic License 2.0\", the \"GNU Affero General Public\n+ * License v3.0 only\", or the \"Server Side Public License, v 1\".\n+ *\n+ * This file was contributed to by generative AI\n+ */\n+\n+package org.elasticsearch.action.termvectors;\n+\n+import org.apache.logging.log4j.LogManager;\n+import org.apache.logging.log4j.Logger;\n+import org.elasticsearch.ElasticsearchException;\n+import org.elasticsearch.action.ActionListener;\n+import org.elasticsearch.action.ActionRequestValidationException;\n+import org.elasticsearch.action.ActionResponse;\n+import org.elasticsearch.action.ActionType;\n+import org.elasticsearch.action.NoShardAvailableActionException;\n+import org.elasticsearch.action.admin.indices.refresh.TransportShardRefreshAction;\n+import org.elasticsearch.action.support.ActionFilters;\n+import org.elasticsearch.action.support.ActiveShardCount;\n+import org.elasticsearch.action.support.replication.BasicReplicationRequest;\n+import org.elasticsearch.action.support.single.shard.SingleShardRequest;\n+import org.elasticsearch.action.support.single.shard.TransportSingleShardAction;\n+import org.elasticsearch.client.internal.node.NodeClient;\n+import org.elasticsearch.cluster.ProjectState;\n+import org.elasticsearch.cluster.metadata.IndexNameExpressionResolver;\n+import org.elasticsearch.cluster.node.DiscoveryNode;\n+import org.elasticsearch.cluster.node.DiscoveryNodeRole;\n+import org.elasticsearch.cluster.project.ProjectResolver;\n+import org.elasticsearch.cluster.routing.ShardIterator;\n+import org.elasticsearch.cluster.service.ClusterService;\n+import org.elasticsearch.common.io.stream.StreamInput;\n+import org.elasticsearch.common.io.stream.StreamOutput;\n+import org.elasticsearch.common.io.stream.Writeable;\n+import org.elasticsearch.index.IndexService;\n+import org.elasticsearch.index.mapper.Uid;\n+import org.elasticsearch.index.shard.IndexShard;\n+import org.elasticsearch.index.shard.ShardId;\n+import org.elasticsearch.indices.IndicesService;\n+import org.elasticsearch.injection.guice.Inject;\n+import org.elasticsearch.threadpool.ThreadPool;\n+import org.elasticsearch.transport.TransportService;\n+\n+import java.io.IOException;\n+import java.util.List;\n+\n+/**\n+ * This action is used in serverless to ensure that documents are searchable on the search tier before processing\n+ * term vector requests. It is an intermediate action that is executed on the indexing node and responds\n+ * with a no-op (the search node can proceed to process the term vector request). The action may trigger an external refresh\n+ * to ensure the search shards are up to date before returning the no-op.\n+ */\n+public class TransportEnsureDocsSearchableAction extends TransportSingleShardAction<", + "comment_created_at": "2025-07-02T13:17:00+00:00", + "comment_author": "tlrx", + "comment_body": "Could we have only the ActionType and the request in `server`, and the rest lies into the serverless code base? Similarly to StatelessPrimaryRelocationAction?", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2177395073", + "pr_number": 130364, + "pr_file": "x-pack/plugin/deprecation/src/main/java/org/elasticsearch/xpack/deprecation/ClusterDeprecationChecker.java", + "created_at": "2025-07-01T11:51:17+00:00", + "commented_code": "import org.apache.logging.log4j.LogManager;\nimport org.apache.logging.log4j.Logger;\nimport org.elasticsearch.cluster.ClusterState;\nimport org.elasticsearch.common.TriConsumer;\nimport org.elasticsearch.xcontent.NamedXContentRegistry;\nimport org.elasticsearch.xpack.core.deprecation.DeprecationIssue;\nimport org.elasticsearch.xpack.core.transform.transforms.TransformConfig;\n\nimport java.io.IOException;\nimport java.util.ArrayList;\nimport java.util.List;\nimport java.util.function.BiConsumer;\n\n/**\n * Cluster-specific deprecation checks, this is used to populate the {@code cluster_settings} field\n */\npublic class ClusterDeprecationChecker {\n\n private static final Logger logger = LogManager.getLogger(ClusterDeprecationChecker.class);\n private final List, List>> CHECKS = List.of(\n this::checkTransformSettings\n );\n private final List, List>> CHECKS = List.of(this::checkTransformSettings);\n private final NamedXContentRegistry xContentRegistry;\n\n ClusterDeprecationChecker(NamedXContentRegistry xContentRegistry) {\n this.xContentRegistry = xContentRegistry;\n }\n\n public List check(ClusterState clusterState, List transformConfigs) {\n public List check(List transformConfigs) {\n List allIssues = new ArrayList<>();\n CHECKS.forEach(check -> check.apply(clusterState, transformConfigs, allIssues));\n CHECKS.forEach(check -> check.accept(transformConfigs, allIssues));", + "repo_full_name": "elastic/elasticsearch", + "discussion_comments": [ + { + "comment_id": "2177395073", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 130364, + "pr_file": "x-pack/plugin/deprecation/src/main/java/org/elasticsearch/xpack/deprecation/ClusterDeprecationChecker.java", + "discussion_id": "2177395073", + "commented_code": "@@ -9,42 +9,35 @@\n \n import org.apache.logging.log4j.LogManager;\n import org.apache.logging.log4j.Logger;\n-import org.elasticsearch.cluster.ClusterState;\n-import org.elasticsearch.common.TriConsumer;\n import org.elasticsearch.xcontent.NamedXContentRegistry;\n import org.elasticsearch.xpack.core.deprecation.DeprecationIssue;\n import org.elasticsearch.xpack.core.transform.transforms.TransformConfig;\n \n import java.io.IOException;\n import java.util.ArrayList;\n import java.util.List;\n+import java.util.function.BiConsumer;\n \n /**\n * Cluster-specific deprecation checks, this is used to populate the {@code cluster_settings} field\n */\n public class ClusterDeprecationChecker {\n \n private static final Logger logger = LogManager.getLogger(ClusterDeprecationChecker.class);\n- private final List, List>> CHECKS = List.of(\n- this::checkTransformSettings\n- );\n+ private final List, List>> CHECKS = List.of(this::checkTransformSettings);\n private final NamedXContentRegistry xContentRegistry;\n \n ClusterDeprecationChecker(NamedXContentRegistry xContentRegistry) {\n this.xContentRegistry = xContentRegistry;\n }\n \n- public List check(ClusterState clusterState, List transformConfigs) {\n+ public List check(List transformConfigs) {\n List allIssues = new ArrayList<>();\n- CHECKS.forEach(check -> check.apply(clusterState, transformConfigs, allIssues));\n+ CHECKS.forEach(check -> check.accept(transformConfigs, allIssues));", + "comment_created_at": "2025-07-01T11:51:17+00:00", + "comment_author": "PeteGillinElastic", + "comment_body": "Do we feel like the `CHECKS` abstraction is still pulling its weight here? There's one check hard-coded, we could just call `checkTransformSettings` directly, and it'd be more readable. If we needed to add more checks in the future, it's not clear to me that adding a method to the list would be better than adding another method call here. Plus, it's not clear to me that the `BiConsumer` would have the correct signature for a theoretical future check, either. (Presumably, at some point in the past there was a check which consumed `ClusterState`, hence the redundant parameter. A future check might need that, or might need the `ProjectMetadata`, but we can't really guess. now.)", + "pr_file_module": null + }, + { + "comment_id": "2177503438", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 130364, + "pr_file": "x-pack/plugin/deprecation/src/main/java/org/elasticsearch/xpack/deprecation/ClusterDeprecationChecker.java", + "discussion_id": "2177395073", + "commented_code": "@@ -9,42 +9,35 @@\n \n import org.apache.logging.log4j.LogManager;\n import org.apache.logging.log4j.Logger;\n-import org.elasticsearch.cluster.ClusterState;\n-import org.elasticsearch.common.TriConsumer;\n import org.elasticsearch.xcontent.NamedXContentRegistry;\n import org.elasticsearch.xpack.core.deprecation.DeprecationIssue;\n import org.elasticsearch.xpack.core.transform.transforms.TransformConfig;\n \n import java.io.IOException;\n import java.util.ArrayList;\n import java.util.List;\n+import java.util.function.BiConsumer;\n \n /**\n * Cluster-specific deprecation checks, this is used to populate the {@code cluster_settings} field\n */\n public class ClusterDeprecationChecker {\n \n private static final Logger logger = LogManager.getLogger(ClusterDeprecationChecker.class);\n- private final List, List>> CHECKS = List.of(\n- this::checkTransformSettings\n- );\n+ private final List, List>> CHECKS = List.of(this::checkTransformSettings);\n private final NamedXContentRegistry xContentRegistry;\n \n ClusterDeprecationChecker(NamedXContentRegistry xContentRegistry) {\n this.xContentRegistry = xContentRegistry;\n }\n \n- public List check(ClusterState clusterState, List transformConfigs) {\n+ public List check(List transformConfigs) {\n List allIssues = new ArrayList<>();\n- CHECKS.forEach(check -> check.apply(clusterState, transformConfigs, allIssues));\n+ CHECKS.forEach(check -> check.accept(transformConfigs, allIssues));", + "comment_created_at": "2025-07-01T12:39:04+00:00", + "comment_author": "nielsbauman", + "comment_body": "I hear what you're saying. My intuition was that this `CHECKS` field makes this class more consistent with the other deprecation checkers. But I don't have strong feelings, so I'm fine with calling that method directly.", + "pr_file_module": null + }, + { + "comment_id": "2177510862", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 130364, + "pr_file": "x-pack/plugin/deprecation/src/main/java/org/elasticsearch/xpack/deprecation/ClusterDeprecationChecker.java", + "discussion_id": "2177395073", + "commented_code": "@@ -9,42 +9,35 @@\n \n import org.apache.logging.log4j.LogManager;\n import org.apache.logging.log4j.Logger;\n-import org.elasticsearch.cluster.ClusterState;\n-import org.elasticsearch.common.TriConsumer;\n import org.elasticsearch.xcontent.NamedXContentRegistry;\n import org.elasticsearch.xpack.core.deprecation.DeprecationIssue;\n import org.elasticsearch.xpack.core.transform.transforms.TransformConfig;\n \n import java.io.IOException;\n import java.util.ArrayList;\n import java.util.List;\n+import java.util.function.BiConsumer;\n \n /**\n * Cluster-specific deprecation checks, this is used to populate the {@code cluster_settings} field\n */\n public class ClusterDeprecationChecker {\n \n private static final Logger logger = LogManager.getLogger(ClusterDeprecationChecker.class);\n- private final List, List>> CHECKS = List.of(\n- this::checkTransformSettings\n- );\n+ private final List, List>> CHECKS = List.of(this::checkTransformSettings);\n private final NamedXContentRegistry xContentRegistry;\n \n ClusterDeprecationChecker(NamedXContentRegistry xContentRegistry) {\n this.xContentRegistry = xContentRegistry;\n }\n \n- public List check(ClusterState clusterState, List transformConfigs) {\n+ public List check(List transformConfigs) {\n List allIssues = new ArrayList<>();\n- CHECKS.forEach(check -> check.apply(clusterState, transformConfigs, allIssues));\n+ CHECKS.forEach(check -> check.accept(transformConfigs, allIssues));", + "comment_created_at": "2025-07-01T12:42:49+00:00", + "comment_author": "nielsbauman", + "comment_body": "On second thought, I think I agree with you that the `CHECKS` abstraction isn't super valuable, and that goes for the other classes too. I don't think it's worth opening a PR for those other classes, but perhaps I'll remember it if I ever need to touch this code again.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2179784624", + "pr_number": 130131, + "pr_file": "plugins/discovery-ec2/src/test/java/org/elasticsearch/discovery/ec2/EC2RetriesTests.java", + "created_at": "2025-07-02T11:12:54+00:00", + "commented_code": "new NoneCircuitBreakerService(),\n new SharedGroupFactory(Settings.EMPTY)\n ),\n threadPool,\n TransportService.NOOP_TRANSPORT_INTERCEPTOR,", + "repo_full_name": "elastic/elasticsearch", + "discussion_comments": [ + { + "comment_id": "2179784624", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 130131, + "pr_file": "plugins/discovery-ec2/src/test/java/org/elasticsearch/discovery/ec2/EC2RetriesTests.java", + "discussion_id": "2179784624", + "commented_code": "@@ -59,9 +59,7 @@ protected MockTransportService createTransportService() {\n new NoneCircuitBreakerService(),\n new SharedGroupFactory(Settings.EMPTY)\n ),\n- threadPool,\n- TransportService.NOOP_TRANSPORT_INTERCEPTOR,", + "comment_created_at": "2025-07-02T11:12:54+00:00", + "comment_author": "GalLalouche", + "comment_body": "It seemed all clients passed the same `Settings.EMPTY`, interceptor, and `null` in the `clusterSettings` to this constructor (refactored to a factory method), so I've simplified the method by removing those parameters.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2156561649", + "pr_number": 129633, + "pr_file": "x-pack/plugin/esql/compute/gen/src/main/java/org/elasticsearch/compute/gen/GroupingAggregatorImplementer.java", + "created_at": "2025-06-19T09:26:33+00:00", + "commented_code": "}\n builder.beginControlFlow(\"for (int groupPosition = 0; groupPosition < groups.getPositionCount(); groupPosition++)\");\n {", + "repo_full_name": "elastic/elasticsearch", + "discussion_comments": [ + { + "comment_id": "2156561649", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 129633, + "pr_file": "x-pack/plugin/esql/compute/gen/src/main/java/org/elasticsearch/compute/gen/GroupingAggregatorImplementer.java", + "discussion_id": "2156561649", + "commented_code": "@@ -610,7 +611,18 @@ private MethodSpec addIntermediateInput() {\n }\n builder.beginControlFlow(\"for (int groupPosition = 0; groupPosition < groups.getPositionCount(); groupPosition++)\");\n {", + "comment_created_at": "2025-06-19T09:26:33+00:00", + "comment_author": "Copilot", + "comment_body": "[nitpick] The addIntermediateInput method contains duplicated logic for handling block types versus non-block types; consider refactoring to extract the common behavior and reduce code duplication.\n```suggestion\n {\n String groupIdAssignment;\n```", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2163237789", + "pr_number": 129684, + "pr_file": "server/src/main/java/org/elasticsearch/index/codec/vectors/es818/ES818BinaryQuantizedVectorsFormat.java", + "created_at": "2025-06-24T08:03:09+00:00", + "commented_code": "*
            • The sparse vector information, if required, mapping vector ordinal to doc ID\n *
            \n */\n@SuppressForbidden(", + "repo_full_name": "elastic/elasticsearch", + "discussion_comments": [ + { + "comment_id": "2163237789", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 129684, + "pr_file": "server/src/main/java/org/elasticsearch/index/codec/vectors/es818/ES818BinaryQuantizedVectorsFormat.java", + "discussion_id": "2163237789", + "commented_code": "@@ -85,8 +86,10 @@\n *
          • The sparse vector information, if required, mapping vector ordinal to doc ID\n *
          \n */\n+@SuppressForbidden(", + "comment_created_at": "2025-06-24T08:03:09+00:00", + "comment_author": "mosche", + "comment_body": "could you extract a little helper and suppress there, so we don't accidentally suppress more than intended", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2163241519", + "pr_number": 129684, + "pr_file": "server/src/main/java/org/elasticsearch/search/SearchService.java", + "created_at": "2025-06-24T08:05:06+00:00", + "commented_code": "* @param threadPool with context where to write the new header\n * @return the wrapped action listener\n */\n @SuppressForbidden(\n reason = \"TODO Deprecate any lenient usage of Boolean#parseBoolean https://github.com/elastic/elasticsearch/issues/128993\"\n )", + "repo_full_name": "elastic/elasticsearch", + "discussion_comments": [ + { + "comment_id": "2163241519", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 129684, + "pr_file": "server/src/main/java/org/elasticsearch/search/SearchService.java", + "discussion_id": "2163241519", + "commented_code": "@@ -589,6 +590,9 @@ protected void doClose() {\n * @param threadPool with context where to write the new header\n * @return the wrapped action listener\n */\n+ @SuppressForbidden(\n+ reason = \"TODO Deprecate any lenient usage of Boolean#parseBoolean https://github.com/elastic/elasticsearch/issues/128993\"\n+ )", + "comment_created_at": "2025-06-24T08:05:06+00:00", + "comment_author": "mosche", + "comment_body": "could you extract a little helper and suppress there, so we don't accidentally suppress more than intended", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2163296345", + "pr_number": 129684, + "pr_file": "modules/transport-netty4/src/main/java/org/elasticsearch/transport/netty4/NettyAllocator.java", + "created_at": "2025-06-24T08:29:59+00:00", + "commented_code": "import org.elasticsearch.common.util.PageCacheRecycler;\nimport org.elasticsearch.core.Assertions;\nimport org.elasticsearch.core.Booleans;\nimport org.elasticsearch.core.SuppressForbidden;\nimport org.elasticsearch.monitor.jvm.JvmInfo;\n\nimport java.util.Arrays;\nimport java.util.concurrent.atomic.AtomicBoolean;\n\n@SuppressForbidden(\n reason = \"TODO Deprecate any lenient usage of Boolean#parseBoolean https://github.com/elastic/elasticsearch/issues/128993\"\n)", + "repo_full_name": "elastic/elasticsearch", + "discussion_comments": [ + { + "comment_id": "2163296345", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 129684, + "pr_file": "modules/transport-netty4/src/main/java/org/elasticsearch/transport/netty4/NettyAllocator.java", + "discussion_id": "2163296345", + "commented_code": "@@ -26,11 +26,15 @@\n import org.elasticsearch.common.util.PageCacheRecycler;\n import org.elasticsearch.core.Assertions;\n import org.elasticsearch.core.Booleans;\n+import org.elasticsearch.core.SuppressForbidden;\n import org.elasticsearch.monitor.jvm.JvmInfo;\n \n import java.util.Arrays;\n import java.util.concurrent.atomic.AtomicBoolean;\n \n+@SuppressForbidden(\n+ reason = \"TODO Deprecate any lenient usage of Boolean#parseBoolean https://github.com/elastic/elasticsearch/issues/128993\"\n+)", + "comment_created_at": "2025-06-24T08:29:59+00:00", + "comment_author": "mosche", + "comment_body": "could you extract a little helper and suppress there, so we don't accidentally suppress more than intended", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2163301148", + "pr_number": 129684, + "pr_file": "server/src/main/java/org/elasticsearch/common/network/NetworkUtils.java", + "created_at": "2025-06-24T08:31:25+00:00", + "commented_code": "* Utilities for network interfaces / addresses binding and publishing.\n * Its only intended for that purpose, not general purpose usage!!!!\n */\n@SuppressForbidden(\n reason = \"TODO Deprecate any lenient usage of Boolean#parseBoolean https://github.com/elastic/elasticsearch/issues/128993\"\n)", + "repo_full_name": "elastic/elasticsearch", + "discussion_comments": [ + { + "comment_id": "2163301148", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 129684, + "pr_file": "server/src/main/java/org/elasticsearch/common/network/NetworkUtils.java", + "discussion_id": "2163301148", + "commented_code": "@@ -32,6 +33,9 @@\n * Utilities for network interfaces / addresses binding and publishing.\n * Its only intended for that purpose, not general purpose usage!!!!\n */\n+@SuppressForbidden(\n+ reason = \"TODO Deprecate any lenient usage of Boolean#parseBoolean https://github.com/elastic/elasticsearch/issues/128993\"\n+)", + "comment_created_at": "2025-06-24T08:31:25+00:00", + "comment_author": "mosche", + "comment_body": "could you extract a little helper and suppress there, so we don't accidentally suppress more than intended", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2163302186", + "pr_number": 129684, + "pr_file": "server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormat.java", + "created_at": "2025-06-24T08:31:54+00:00", + "commented_code": "* cpu resources.\n *
        \n */\n@SuppressForbidden(\n reason = \"TODO Deprecate any lenient usage of Boolean#parseBoolean https://github.com/elastic/elasticsearch/issues/128993\"\n)", + "repo_full_name": "elastic/elasticsearch", + "discussion_comments": [ + { + "comment_id": "2163302186", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 129684, + "pr_file": "server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormat.java", + "discussion_id": "2163302186", + "commented_code": "@@ -28,6 +29,9 @@\n * cpu resources.\n *
      \n */\n+@SuppressForbidden(\n+ reason = \"TODO Deprecate any lenient usage of Boolean#parseBoolean https://github.com/elastic/elasticsearch/issues/128993\"\n+)", + "comment_created_at": "2025-06-24T08:31:54+00:00", + "comment_author": "mosche", + "comment_body": "could you extract a little helper and suppress there, so we don't accidentally suppress more than intended", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2171122552", + "pr_number": 129684, + "pr_file": "modules/lang-painless/src/main/java/org/elasticsearch/painless/PainlessScriptEngine.java", + "created_at": "2025-06-27T08:00:12+00:00", + "commented_code": "}\n }\n\n @SuppressForbidden(\n reason = \"TODO Deprecate any lenient usage of Boolean#parseBoolean https://github.com/elastic/elasticsearch/issues/128993\"\n )", + "repo_full_name": "elastic/elasticsearch", + "discussion_comments": [ + { + "comment_id": "2171122552", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 129684, + "pr_file": "modules/lang-painless/src/main/java/org/elasticsearch/painless/PainlessScriptEngine.java", + "discussion_id": "2171122552", + "commented_code": "@@ -391,6 +392,9 @@ ScriptScope compile(Compiler compiler, Loader loader, String scriptName, String\n }\n }\n \n+ @SuppressForbidden(\n+ reason = \"TODO Deprecate any lenient usage of Boolean#parseBoolean https://github.com/elastic/elasticsearch/issues/128993\"\n+ )", + "comment_created_at": "2025-06-27T08:00:12+00:00", + "comment_author": "mosche", + "comment_body": "please also extract a small helper here to minimize the scope of @SuppressForbidden", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2171175467", + "pr_number": 129684, + "pr_file": "x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/inference/trainedmodel/PredictionFieldType.java", + "created_at": "2025-06-27T08:17:14+00:00", + "commented_code": "return name().toLowerCase(Locale.ROOT);\n }\n\n @SuppressForbidden(reason = \"accept lenient boolean field values\")", + "repo_full_name": "elastic/elasticsearch", + "discussion_comments": [ + { + "comment_id": "2171175467", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 129684, + "pr_file": "x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/inference/trainedmodel/PredictionFieldType.java", + "discussion_id": "2171175467", + "commented_code": "@@ -45,6 +46,7 @@ public String toString() {\n return name().toLowerCase(Locale.ROOT);\n }\n \n+ @SuppressForbidden(reason = \"accept lenient boolean field values\")", + "comment_created_at": "2025-06-27T08:17:14+00:00", + "comment_author": "mosche", + "comment_body": "please also extract a small helper here to minimize the scope of @SuppressForbidden", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1646778366", + "pr_number": 109949, + "pr_file": "server/src/main/java/org/elasticsearch/action/bulk/TransportShardBulkAction.java", + "created_at": "2024-06-20T00:22:25+00:00", + "commented_code": "request.getAutoGeneratedTimestamp(),\n request.isRetry()\n );\n\n if (result.getResultType() == Engine.Result.Type.MAPPING_UPDATE_REQUIRED) {\n return handleMappingUpdateRequired(\n context,\n mappingUpdater,\n waitForMappingUpdate,\n itemDoneListener,\n result,\n primary,\n version,\n updateResult\n );\n }\n }\n if (result.getResultType() == Engine.Result.Type.MAPPING_UPDATE_REQUIRED) {\n\n try {\n Optional mergedSource = Optional.ofNullable(\n primary.mapperService()\n .merge(\n MapperService.SINGLE_MAPPING_NAME,\n new CompressedXContent(result.getRequiredMappingUpdate()),\n MapperService.MergeReason.MAPPING_AUTO_UPDATE_PREFLIGHT\n )\n ).map(DocumentMapper::mappingSource);\n Optional previousSource = Optional.ofNullable(primary.mapperService().documentMapper())\n .map(DocumentMapper::mappingSource);\n onComplete(result, context, updateResult);\n return true;\n }\n\n if (mergedSource.equals(previousSource)) {\n context.resetForNoopMappingUpdateRetry(primary.mapperService().mappingVersion());\n return true;\n }\n } catch (Exception e) {\n logger.info(() -> format(\"%s mapping update rejected by primary\", primary.shardId()), e);\n assert result.getId() != null;\n onComplete(exceptionToResult(e, primary, isDelete, version, result.getId()), context, updateResult);\n private static boolean handleMappingUpdateRequired(", + "repo_full_name": "elastic/elasticsearch", + "discussion_comments": [ + { + "comment_id": "1646778366", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 109949, + "pr_file": "server/src/main/java/org/elasticsearch/action/bulk/TransportShardBulkAction.java", + "discussion_id": "1646778366", + "commented_code": "@@ -379,64 +376,88 @@ static boolean executeBulkItemRequest(\n request.getAutoGeneratedTimestamp(),\n request.isRetry()\n );\n-\n+ if (result.getResultType() == Engine.Result.Type.MAPPING_UPDATE_REQUIRED) {\n+ return handleMappingUpdateRequired(\n+ context,\n+ mappingUpdater,\n+ waitForMappingUpdate,\n+ itemDoneListener,\n+ result,\n+ primary,\n+ version,\n+ updateResult\n+ );\n+ }\n }\n- if (result.getResultType() == Engine.Result.Type.MAPPING_UPDATE_REQUIRED) {\n \n- try {\n- Optional mergedSource = Optional.ofNullable(\n- primary.mapperService()\n- .merge(\n- MapperService.SINGLE_MAPPING_NAME,\n- new CompressedXContent(result.getRequiredMappingUpdate()),\n- MapperService.MergeReason.MAPPING_AUTO_UPDATE_PREFLIGHT\n- )\n- ).map(DocumentMapper::mappingSource);\n- Optional previousSource = Optional.ofNullable(primary.mapperService().documentMapper())\n- .map(DocumentMapper::mappingSource);\n+ onComplete(result, context, updateResult);\n+ return true;\n+ }\n \n- if (mergedSource.equals(previousSource)) {\n- context.resetForNoopMappingUpdateRetry(primary.mapperService().mappingVersion());\n- return true;\n- }\n- } catch (Exception e) {\n- logger.info(() -> format(\"%s mapping update rejected by primary\", primary.shardId()), e);\n- assert result.getId() != null;\n- onComplete(exceptionToResult(e, primary, isDelete, version, result.getId()), context, updateResult);\n+ private static boolean handleMappingUpdateRequired(", + "comment_created_at": "2024-06-20T00:22:25+00:00", + "comment_author": "original-brownbear", + "comment_body": "Just extracted this into its own method now that it got so big and removed the `Optional` use which was confusing and unnecessary. Sorry for mixing fix + refactoring a little here, hope it's not too confusing. Otherwise I can make this into a 2 parter.", + "pr_file_module": null + }, + { + "comment_id": "1647324269", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 109949, + "pr_file": "server/src/main/java/org/elasticsearch/action/bulk/TransportShardBulkAction.java", + "discussion_id": "1646778366", + "commented_code": "@@ -379,64 +376,88 @@ static boolean executeBulkItemRequest(\n request.getAutoGeneratedTimestamp(),\n request.isRetry()\n );\n-\n+ if (result.getResultType() == Engine.Result.Type.MAPPING_UPDATE_REQUIRED) {\n+ return handleMappingUpdateRequired(\n+ context,\n+ mappingUpdater,\n+ waitForMappingUpdate,\n+ itemDoneListener,\n+ result,\n+ primary,\n+ version,\n+ updateResult\n+ );\n+ }\n }\n- if (result.getResultType() == Engine.Result.Type.MAPPING_UPDATE_REQUIRED) {\n \n- try {\n- Optional mergedSource = Optional.ofNullable(\n- primary.mapperService()\n- .merge(\n- MapperService.SINGLE_MAPPING_NAME,\n- new CompressedXContent(result.getRequiredMappingUpdate()),\n- MapperService.MergeReason.MAPPING_AUTO_UPDATE_PREFLIGHT\n- )\n- ).map(DocumentMapper::mappingSource);\n- Optional previousSource = Optional.ofNullable(primary.mapperService().documentMapper())\n- .map(DocumentMapper::mappingSource);\n+ onComplete(result, context, updateResult);\n+ return true;\n+ }\n \n- if (mergedSource.equals(previousSource)) {\n- context.resetForNoopMappingUpdateRetry(primary.mapperService().mappingVersion());\n- return true;\n- }\n- } catch (Exception e) {\n- logger.info(() -> format(\"%s mapping update rejected by primary\", primary.shardId()), e);\n- assert result.getId() != null;\n- onComplete(exceptionToResult(e, primary, isDelete, version, result.getId()), context, updateResult);\n+ private static boolean handleMappingUpdateRequired(", + "comment_created_at": "2024-06-20T10:02:50+00:00", + "comment_author": "javanna", + "comment_body": "given how delicate things are, I think it would be great to split this in two: refactoring + actual change, as a safety measure. I could definitely miss things otherwise while reviewing this as-is.", + "pr_file_module": null + }, + { + "comment_id": "1647381507", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 109949, + "pr_file": "server/src/main/java/org/elasticsearch/action/bulk/TransportShardBulkAction.java", + "discussion_id": "1646778366", + "commented_code": "@@ -379,64 +376,88 @@ static boolean executeBulkItemRequest(\n request.getAutoGeneratedTimestamp(),\n request.isRetry()\n );\n-\n+ if (result.getResultType() == Engine.Result.Type.MAPPING_UPDATE_REQUIRED) {\n+ return handleMappingUpdateRequired(\n+ context,\n+ mappingUpdater,\n+ waitForMappingUpdate,\n+ itemDoneListener,\n+ result,\n+ primary,\n+ version,\n+ updateResult\n+ );\n+ }\n }\n- if (result.getResultType() == Engine.Result.Type.MAPPING_UPDATE_REQUIRED) {\n \n- try {\n- Optional mergedSource = Optional.ofNullable(\n- primary.mapperService()\n- .merge(\n- MapperService.SINGLE_MAPPING_NAME,\n- new CompressedXContent(result.getRequiredMappingUpdate()),\n- MapperService.MergeReason.MAPPING_AUTO_UPDATE_PREFLIGHT\n- )\n- ).map(DocumentMapper::mappingSource);\n- Optional previousSource = Optional.ofNullable(primary.mapperService().documentMapper())\n- .map(DocumentMapper::mappingSource);\n+ onComplete(result, context, updateResult);\n+ return true;\n+ }\n \n- if (mergedSource.equals(previousSource)) {\n- context.resetForNoopMappingUpdateRetry(primary.mapperService().mappingVersion());\n- return true;\n- }\n- } catch (Exception e) {\n- logger.info(() -> format(\"%s mapping update rejected by primary\", primary.shardId()), e);\n- assert result.getId() != null;\n- onComplete(exceptionToResult(e, primary, isDelete, version, result.getId()), context, updateResult);\n+ private static boolean handleMappingUpdateRequired(", + "comment_created_at": "2024-06-20T10:49:43+00:00", + "comment_author": "original-brownbear", + "comment_body": "Agreed :) Here you go https://github.com/elastic/elasticsearch/pull/109961 :)", + "pr_file_module": null + }, + { + "comment_id": "1648632335", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 109949, + "pr_file": "server/src/main/java/org/elasticsearch/action/bulk/TransportShardBulkAction.java", + "discussion_id": "1646778366", + "commented_code": "@@ -379,64 +376,88 @@ static boolean executeBulkItemRequest(\n request.getAutoGeneratedTimestamp(),\n request.isRetry()\n );\n-\n+ if (result.getResultType() == Engine.Result.Type.MAPPING_UPDATE_REQUIRED) {\n+ return handleMappingUpdateRequired(\n+ context,\n+ mappingUpdater,\n+ waitForMappingUpdate,\n+ itemDoneListener,\n+ result,\n+ primary,\n+ version,\n+ updateResult\n+ );\n+ }\n }\n- if (result.getResultType() == Engine.Result.Type.MAPPING_UPDATE_REQUIRED) {\n \n- try {\n- Optional mergedSource = Optional.ofNullable(\n- primary.mapperService()\n- .merge(\n- MapperService.SINGLE_MAPPING_NAME,\n- new CompressedXContent(result.getRequiredMappingUpdate()),\n- MapperService.MergeReason.MAPPING_AUTO_UPDATE_PREFLIGHT\n- )\n- ).map(DocumentMapper::mappingSource);\n- Optional previousSource = Optional.ofNullable(primary.mapperService().documentMapper())\n- .map(DocumentMapper::mappingSource);\n+ onComplete(result, context, updateResult);\n+ return true;\n+ }\n \n- if (mergedSource.equals(previousSource)) {\n- context.resetForNoopMappingUpdateRetry(primary.mapperService().mappingVersion());\n- return true;\n- }\n- } catch (Exception e) {\n- logger.info(() -> format(\"%s mapping update rejected by primary\", primary.shardId()), e);\n- assert result.getId() != null;\n- onComplete(exceptionToResult(e, primary, isDelete, version, result.getId()), context, updateResult);\n+ private static boolean handleMappingUpdateRequired(", + "comment_created_at": "2024-06-21T08:45:01+00:00", + "comment_author": "javanna", + "comment_body": "thanks!", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "908872960", + "pr_number": 87810, + "pr_file": "x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/optimizer/Optimizer.java", + "created_at": "2022-06-28T19:37:01+00:00", + "commented_code": "}\n\n public static class ConvertTypes extends OptimizerExpressionRule {\n\n public ConvertTypes() {\n super(TransformDirection.DOWN);\n }\n\n @Override\n public Expression rule(ScalarFunction e) {\n if (e instanceof BinaryComparison predicate && predicate.right().foldable()) {\n return convertBinaryComparison(predicate);\n } else if (e instanceof Range range) {\n return convertRange(range);\n } else if (e instanceof In in) {\n return convertIn(in);\n }\n return e;\n }\n\n private Expression convertIn(In in) {\n if (in.list().isEmpty()) {\n return in;\n }\n List newRight = new ArrayList<>();\n List additionalConditions = new ArrayList<>();\n DataType type = in.value().dataType();\n boolean converted = false;\n for (Expression exp : in.list()) {\n if (type == exp.dataType() || exp.foldable() == false || DataTypes.areCompatible(type, exp.dataType())) {\n newRight.add(exp);\n } else {\n converted = true;\n Object foldedValue = exp.fold();\n if (DataTypes.isDateTime(type) && DataTypes.isString(exp.dataType())) {\n try {\n // try date math first\n Tuple range = DateFieldMapper.DateFieldType.toTimestampRange(\n foldedValue,\n foldedValue,\n true,\n true,\n ZoneOffset.UTC,\n DateUtils.DATE_MATH_PARSER,\n DateFieldMapper.Resolution.MILLISECONDS,\n () -> System.currentTimeMillis()\n );\n if (range.v1() < range.v2()) {\n /*\n treat it as a date range\n eg. the following\n something IN (, , )\n becomes\n something BETWEEN AND OR someting IN (, )\n */\n additionalConditions.add(\n new Range(\n in.source(),\n in.value(),\n tryCast(exp, range.v1(), type),\n true,\n tryCast(exp, range.v2(), type),\n true,\n in.zoneId()\n )\n );\n } else {\n newRight.add(tryCast(exp, foldedValue, DataTypes.DATETIME));\n }\n } catch (ElasticsearchParseException | DateTimeParseException e) {\n // the date parsing just failed, use the original expression\n newRight.add(exp);\n }\n } else {\n newRight.add(tryCast(exp, foldedValue, type));\n }\n }\n }\n if (converted == false) {\n return in;\n }\n if (newRight.size() == 1) {\n additionalConditions.add(new Equals(in.source(), in.value(), newRight.remove(0)));\n }\n\n if (newRight.size() > 0 && additionalConditions.size() > 0) {\n return new Or(\n in.source(),\n new In(in.source(), in.value(), newRight, in.zoneId()),\n toOrTree(in.source(), additionalConditions)\n );\n } else if (newRight.size() > 0) {\n return new In(in.source(), in.value(), newRight, in.zoneId());\n } else if (additionalConditions.size() > 0) {\n return toOrTree(in.source(), additionalConditions);\n }\n\n return in;\n }\n\n private Expression toOrTree(Source source, List items) {\n if (items.size() == 1) {\n return items.get(0);\n }\n if (items.size() == 2) {\n return new Or(source, items.get(0), items.get(1));\n }\n\n int half = items.size() / 2;\n return new Or(source, toOrTree(source, items.subList(0, half)), toOrTree(source, items.subList(half, items.size())));\n }\n\n private Expression convertBinaryComparison(BinaryComparison predicate) {\n Object rightValue = predicate.right().fold();\n if (rightValue == null && predicate instanceof NullEquals) {\n return new IsNull(predicate.source(), predicate.left());\n } else if (rightValue != null) {\n try {\n if (DataTypes.isDateTime(predicate.left().dataType()) && DataTypes.isString(predicate.right().dataType())) {", + "repo_full_name": "elastic/elasticsearch", + "discussion_comments": [ + { + "comment_id": "908872960", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 87810, + "pr_file": "x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/optimizer/Optimizer.java", + "discussion_id": "908872960", + "commented_code": "@@ -1266,6 +1284,241 @@ private boolean foldable(Expression e) {\n \n }\n \n+ public static class ConvertTypes extends OptimizerExpressionRule {\n+\n+ public ConvertTypes() {\n+ super(TransformDirection.DOWN);\n+ }\n+\n+ @Override\n+ public Expression rule(ScalarFunction e) {\n+ if (e instanceof BinaryComparison predicate && predicate.right().foldable()) {\n+ return convertBinaryComparison(predicate);\n+ } else if (e instanceof Range range) {\n+ return convertRange(range);\n+ } else if (e instanceof In in) {\n+ return convertIn(in);\n+ }\n+ return e;\n+ }\n+\n+ private Expression convertIn(In in) {\n+ if (in.list().isEmpty()) {\n+ return in;\n+ }\n+ List newRight = new ArrayList<>();\n+ List additionalConditions = new ArrayList<>();\n+ DataType type = in.value().dataType();\n+ boolean converted = false;\n+ for (Expression exp : in.list()) {\n+ if (type == exp.dataType() || exp.foldable() == false || DataTypes.areCompatible(type, exp.dataType())) {\n+ newRight.add(exp);\n+ } else {\n+ converted = true;\n+ Object foldedValue = exp.fold();\n+ if (DataTypes.isDateTime(type) && DataTypes.isString(exp.dataType())) {\n+ try {\n+ // try date math first\n+ Tuple range = DateFieldMapper.DateFieldType.toTimestampRange(\n+ foldedValue,\n+ foldedValue,\n+ true,\n+ true,\n+ ZoneOffset.UTC,\n+ DateUtils.DATE_MATH_PARSER,\n+ DateFieldMapper.Resolution.MILLISECONDS,\n+ () -> System.currentTimeMillis()\n+ );\n+ if (range.v1() < range.v2()) {\n+ /*\n+ treat it as a date range\n+ eg. the following\n+ something IN (, , )\n+ becomes\n+ something BETWEEN AND OR someting IN (, )\n+ */\n+ additionalConditions.add(\n+ new Range(\n+ in.source(),\n+ in.value(),\n+ tryCast(exp, range.v1(), type),\n+ true,\n+ tryCast(exp, range.v2(), type),\n+ true,\n+ in.zoneId()\n+ )\n+ );\n+ } else {\n+ newRight.add(tryCast(exp, foldedValue, DataTypes.DATETIME));\n+ }\n+ } catch (ElasticsearchParseException | DateTimeParseException e) {\n+ // the date parsing just failed, use the original expression\n+ newRight.add(exp);\n+ }\n+ } else {\n+ newRight.add(tryCast(exp, foldedValue, type));\n+ }\n+ }\n+ }\n+ if (converted == false) {\n+ return in;\n+ }\n+ if (newRight.size() == 1) {\n+ additionalConditions.add(new Equals(in.source(), in.value(), newRight.remove(0)));\n+ }\n+\n+ if (newRight.size() > 0 && additionalConditions.size() > 0) {\n+ return new Or(\n+ in.source(),\n+ new In(in.source(), in.value(), newRight, in.zoneId()),\n+ toOrTree(in.source(), additionalConditions)\n+ );\n+ } else if (newRight.size() > 0) {\n+ return new In(in.source(), in.value(), newRight, in.zoneId());\n+ } else if (additionalConditions.size() > 0) {\n+ return toOrTree(in.source(), additionalConditions);\n+ }\n+\n+ return in;\n+ }\n+\n+ private Expression toOrTree(Source source, List items) {\n+ if (items.size() == 1) {\n+ return items.get(0);\n+ }\n+ if (items.size() == 2) {\n+ return new Or(source, items.get(0), items.get(1));\n+ }\n+\n+ int half = items.size() / 2;\n+ return new Or(source, toOrTree(source, items.subList(0, half)), toOrTree(source, items.subList(half, items.size())));\n+ }\n+\n+ private Expression convertBinaryComparison(BinaryComparison predicate) {\n+ Object rightValue = predicate.right().fold();\n+ if (rightValue == null && predicate instanceof NullEquals) {\n+ return new IsNull(predicate.source(), predicate.left());\n+ } else if (rightValue != null) {\n+ try {\n+ if (DataTypes.isDateTime(predicate.left().dataType()) && DataTypes.isString(predicate.right().dataType())) {", + "comment_created_at": "2022-06-28T19:37:01+00:00", + "comment_author": "bpintea", + "comment_body": "Might be worth extracting this key check into a function? (some `couldBeTimestampRange(DataType, DataType)`?)", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/elasticsearch-extract-for-clarity.md b/_reviewers/elasticsearch-extract-for-clarity.md index a2ba94a..c96a976 100644 --- a/_reviewers/elasticsearch-extract-for-clarity.md +++ b/_reviewers/elasticsearch-extract-for-clarity.md @@ -54,431 +54,3 @@ For complex class functionality, consider isolating related methods: ``` This approach makes code more maintainable, easier to understand, and simplifies future changes. - - -[ - { - "discussion_id": "2166743201", - "pr_number": 128917, - "pr_file": "x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/EsqlSession.java", - "created_at": "2025-06-25T13:37:18+00:00", - "commented_code": "LogicalPlan optimizedPlan,\n ActionListener listener\n ) {\n PhysicalPlan physicalPlan = logicalPlanToPhysicalPlan(optimizedPlan, request);\n // TODO: this could be snuck into the underlying listener\n EsqlCCSUtils.updateExecutionInfoAtEndOfPlanning(executionInfo);\n // execute any potential subplans\n executeSubPlans(physicalPlan, planRunner, executionInfo, request, listener);\n executeSubPlans(optimizedPlan, planRunner, executionInfo, request, listener);\n }\n\n private record PlanTuple(PhysicalPlan physical, LogicalPlan logical) {}\n private record LogicalPlanTuple(LogicalPlan nonStubbedSubPlan, LogicalPlan originalSubPlan) {}\n\n private void executeSubPlans(\n PhysicalPlan physicalPlan,\n LogicalPlan optimizedPlan,\n PlanRunner runner,\n EsqlExecutionInfo executionInfo,\n EsqlQueryRequest request,\n ActionListener listener\n ) {\n List subplans = new ArrayList<>();\n\n // Currently the inlinestats are limited and supported as streaming operators, thus present inside the fragment as logical plans\n // Below they get collected, translated into a separate, coordinator based plan and the results 'broadcasted' as a local relation\n physicalPlan.forEachUp(FragmentExec.class, f -> {\n f.fragment().forEachUp(InlineJoin.class, ij -> {\n // extract the right side of the plan and replace its source\n LogicalPlan subplan = InlineJoin.replaceStub(ij.left(), ij.right());\n // mark the new root node as optimized\n subplan.setOptimized();\n PhysicalPlan subqueryPlan = logicalPlanToPhysicalPlan(subplan, request);\n subplans.add(new PlanTuple(subqueryPlan, ij.right()));\n });\n });\n\n Iterator iterator = subplans.iterator();\n var subPlan = firstSubPlan(optimizedPlan);\n\n // TODO: merge into one method\n if (subplans.size() > 0) {\n if (subPlan != null) {\n // code-path to execute subplans\n executeSubPlan(new DriverCompletionInfo.Accumulator(), physicalPlan, iterator, executionInfo, runner, listener);\n executeSubPlan(new DriverCompletionInfo.Accumulator(), optimizedPlan, subPlan, executionInfo, runner, request, listener);\n } else {\n PhysicalPlan physicalPlan = logicalPlanToPhysicalPlan(optimizedPlan, request);\n // execute main plan\n runner.run(physicalPlan, listener);\n }\n }\n\n private LogicalPlanTuple firstSubPlan(LogicalPlan optimizedPlan) {\n Holder subPlan = new Holder<>();\n // Collect the first inlinejoin (bottom up in the tree)\n optimizedPlan.forEachUp(InlineJoin.class, ij -> {\n // extract the right side of the plan and replace its source\n if (subPlan.get() == null && ij.right().anyMatch(p -> p instanceof StubRelation)) {\n var p = InlineJoin.replaceStub(ij.left(), ij.right());\n p.setOptimized();\n subPlan.set(new LogicalPlanTuple(p, ij.right()));\n }\n });\n return subPlan.get();\n }\n\n private void executeSubPlan(\n DriverCompletionInfo.Accumulator completionInfoAccumulator,\n PhysicalPlan plan,\n Iterator subPlanIterator,\n LogicalPlan optimizedPlan,\n LogicalPlanTuple subPlans,\n EsqlExecutionInfo executionInfo,\n PlanRunner runner,\n EsqlQueryRequest request,\n ActionListener listener\n ) {\n PlanTuple tuple = subPlanIterator.next();\n // Create a physical plan out of the logical sub-plan\n var physicalSubPlan = logicalPlanToPhysicalPlan(subPlans.nonStubbedSubPlan, request);\n\n runner.run(tuple.physical, listener.delegateFailureAndWrap((next, result) -> {\n runner.run(physicalSubPlan, listener.delegateFailureAndWrap((next, result) -> {\n try {\n // Translate the subquery into a separate, coordinator based plan and the results 'broadcasted' as a local relation\n completionInfoAccumulator.accumulate(result.completionInfo());\n LocalRelation resultWrapper = resultToPlan(tuple.logical, result);\n LocalRelation resultWrapper = resultToPlan(subPlans.nonStubbedSubPlan, result);", - "repo_full_name": "elastic/elasticsearch", - "discussion_comments": [ - { - "comment_id": "2166743201", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 128917, - "pr_file": "x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/EsqlSession.java", - "discussion_id": "2166743201", - "commented_code": "@@ -207,85 +206,85 @@ public void executeOptimizedPlan(\n LogicalPlan optimizedPlan,\n ActionListener listener\n ) {\n- PhysicalPlan physicalPlan = logicalPlanToPhysicalPlan(optimizedPlan, request);\n // TODO: this could be snuck into the underlying listener\n EsqlCCSUtils.updateExecutionInfoAtEndOfPlanning(executionInfo);\n // execute any potential subplans\n- executeSubPlans(physicalPlan, planRunner, executionInfo, request, listener);\n+ executeSubPlans(optimizedPlan, planRunner, executionInfo, request, listener);\n }\n \n- private record PlanTuple(PhysicalPlan physical, LogicalPlan logical) {}\n+ private record LogicalPlanTuple(LogicalPlan nonStubbedSubPlan, LogicalPlan originalSubPlan) {}\n \n private void executeSubPlans(\n- PhysicalPlan physicalPlan,\n+ LogicalPlan optimizedPlan,\n PlanRunner runner,\n EsqlExecutionInfo executionInfo,\n EsqlQueryRequest request,\n ActionListener listener\n ) {\n- List subplans = new ArrayList<>();\n-\n- // Currently the inlinestats are limited and supported as streaming operators, thus present inside the fragment as logical plans\n- // Below they get collected, translated into a separate, coordinator based plan and the results 'broadcasted' as a local relation\n- physicalPlan.forEachUp(FragmentExec.class, f -> {\n- f.fragment().forEachUp(InlineJoin.class, ij -> {\n- // extract the right side of the plan and replace its source\n- LogicalPlan subplan = InlineJoin.replaceStub(ij.left(), ij.right());\n- // mark the new root node as optimized\n- subplan.setOptimized();\n- PhysicalPlan subqueryPlan = logicalPlanToPhysicalPlan(subplan, request);\n- subplans.add(new PlanTuple(subqueryPlan, ij.right()));\n- });\n- });\n-\n- Iterator iterator = subplans.iterator();\n+ var subPlan = firstSubPlan(optimizedPlan);\n \n // TODO: merge into one method\n- if (subplans.size() > 0) {\n+ if (subPlan != null) {\n // code-path to execute subplans\n- executeSubPlan(new DriverCompletionInfo.Accumulator(), physicalPlan, iterator, executionInfo, runner, listener);\n+ executeSubPlan(new DriverCompletionInfo.Accumulator(), optimizedPlan, subPlan, executionInfo, runner, request, listener);\n } else {\n+ PhysicalPlan physicalPlan = logicalPlanToPhysicalPlan(optimizedPlan, request);\n // execute main plan\n runner.run(physicalPlan, listener);\n }\n }\n \n+ private LogicalPlanTuple firstSubPlan(LogicalPlan optimizedPlan) {\n+ Holder subPlan = new Holder<>();\n+ // Collect the first inlinejoin (bottom up in the tree)\n+ optimizedPlan.forEachUp(InlineJoin.class, ij -> {\n+ // extract the right side of the plan and replace its source\n+ if (subPlan.get() == null && ij.right().anyMatch(p -> p instanceof StubRelation)) {\n+ var p = InlineJoin.replaceStub(ij.left(), ij.right());\n+ p.setOptimized();\n+ subPlan.set(new LogicalPlanTuple(p, ij.right()));\n+ }\n+ });\n+ return subPlan.get();\n+ }\n+\n private void executeSubPlan(\n DriverCompletionInfo.Accumulator completionInfoAccumulator,\n- PhysicalPlan plan,\n- Iterator subPlanIterator,\n+ LogicalPlan optimizedPlan,\n+ LogicalPlanTuple subPlans,\n EsqlExecutionInfo executionInfo,\n PlanRunner runner,\n+ EsqlQueryRequest request,\n ActionListener listener\n ) {\n- PlanTuple tuple = subPlanIterator.next();\n+ // Create a physical plan out of the logical sub-plan\n+ var physicalSubPlan = logicalPlanToPhysicalPlan(subPlans.nonStubbedSubPlan, request);\n \n- runner.run(tuple.physical, listener.delegateFailureAndWrap((next, result) -> {\n+ runner.run(physicalSubPlan, listener.delegateFailureAndWrap((next, result) -> {\n try {\n+ // Translate the subquery into a separate, coordinator based plan and the results 'broadcasted' as a local relation\n completionInfoAccumulator.accumulate(result.completionInfo());\n- LocalRelation resultWrapper = resultToPlan(tuple.logical, result);\n+ LocalRelation resultWrapper = resultToPlan(subPlans.nonStubbedSubPlan, result);", - "comment_created_at": "2025-06-25T13:37:18+00:00", - "comment_author": "bpintea", - "comment_body": "Not related to this change, but: `resultToPlan` could be made static as well.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2167048244", - "pr_number": 128917, - "pr_file": "x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/local/CopyingLocalSupplier.java", - "created_at": "2025-06-25T15:45:27+00:00", - "commented_code": "/*\n * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one\n * or more contributor license agreements. Licensed under the Elastic License\n * 2.0; you may not use this file except in compliance with the Elastic License\n * 2.0.\n */\n\npackage org.elasticsearch.xpack.esql.plan.logical.local;\n\nimport org.elasticsearch.compute.data.Block;\nimport org.elasticsearch.compute.data.BlockUtils;\nimport org.elasticsearch.xpack.esql.planner.PlannerUtils;\n\n/**\n * A {@link LocalSupplier} that allways creates a new copy of the {@link Block}s initially provided at creation time.\n */\npublic class CopyingLocalSupplier extends ImmediateLocalSupplier {\n\n public CopyingLocalSupplier(Block[] blocks) {\n super(blocks);\n }\n\n @Override\n public Block[] get() {", - "repo_full_name": "elastic/elasticsearch", - "discussion_comments": [ - { - "comment_id": "2167048244", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 128917, - "pr_file": "x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/local/CopyingLocalSupplier.java", - "discussion_id": "2167048244", - "commented_code": "@@ -0,0 +1,31 @@\n+/*\n+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one\n+ * or more contributor license agreements. Licensed under the Elastic License\n+ * 2.0; you may not use this file except in compliance with the Elastic License\n+ * 2.0.\n+ */\n+\n+package org.elasticsearch.xpack.esql.plan.logical.local;\n+\n+import org.elasticsearch.compute.data.Block;\n+import org.elasticsearch.compute.data.BlockUtils;\n+import org.elasticsearch.xpack.esql.planner.PlannerUtils;\n+\n+/**\n+ * A {@link LocalSupplier} that allways creates a new copy of the {@link Block}s initially provided at creation time.\n+ */\n+public class CopyingLocalSupplier extends ImmediateLocalSupplier {\n+\n+ public CopyingLocalSupplier(Block[] blocks) {\n+ super(blocks);\n+ }\n+\n+ @Override\n+ public Block[] get() {", - "comment_created_at": "2025-06-25T15:45:27+00:00", - "comment_author": "bpintea", - "comment_body": "Should this method be made a util in `BlockUtils`? There's a few possible uses of one, as called by `BlockUtils#deepCopyOf`. Then you could use a plain lambda supplier directly in `ReplaceRowAsLocalRelation` when building the `LocalRelation` (and wouldn't need this extra supplier).", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2169174964", - "pr_number": 128917, - "pr_file": "x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/EsqlSession.java", - "created_at": "2025-06-26T14:11:34+00:00", - "commented_code": "LogicalPlan optimizedPlan,\n ActionListener listener\n ) {\n PhysicalPlan physicalPlan = logicalPlanToPhysicalPlan(optimizedPlan, request);\n if (explainMode) {", - "repo_full_name": "elastic/elasticsearch", - "discussion_comments": [ - { - "comment_id": "2169174964", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 128917, - "pr_file": "x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/EsqlSession.java", - "discussion_id": "2169174964", - "commented_code": "@@ -217,8 +217,8 @@ public void executeOptimizedPlan(\n LogicalPlan optimizedPlan,\n ActionListener listener\n ) {\n- PhysicalPlan physicalPlan = logicalPlanToPhysicalPlan(optimizedPlan, request);\n if (explainMode) {", - "comment_created_at": "2025-06-26T14:11:34+00:00", - "comment_author": "bpintea", - "comment_body": "Nit: would it be worth extracting this case into an own method?", - "pr_file_module": null - }, - { - "comment_id": "2172308567", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 128917, - "pr_file": "x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/EsqlSession.java", - "discussion_id": "2169174964", - "commented_code": "@@ -217,8 +217,8 @@ public void executeOptimizedPlan(\n LogicalPlan optimizedPlan,\n ActionListener listener\n ) {\n- PhysicalPlan physicalPlan = logicalPlanToPhysicalPlan(optimizedPlan, request);\n if (explainMode) {", - "comment_created_at": "2025-06-27T15:41:11+00:00", - "comment_author": "astefan", - "comment_body": "I've added a TODO to revisit this. `explain` in EsqlSession needs a second look, some things do not feel like they belong here.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2180043409", - "pr_number": 129902, - "pr_file": "server/src/main/java/org/elasticsearch/action/termvectors/TransportEnsureDocsSearchableAction.java", - "created_at": "2025-07-02T13:17:00+00:00", - "commented_code": "/*\n * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one\n * or more contributor license agreements. Licensed under the \"Elastic License\n * 2.0\", the \"GNU Affero General Public License v3.0 only\", and the \"Server Side\n * Public License v 1\"; you may not use this file except in compliance with, at\n * your election, the \"Elastic License 2.0\", the \"GNU Affero General Public\n * License v3.0 only\", or the \"Server Side Public License, v 1\".\n *\n * This file was contributed to by generative AI\n */\n\npackage org.elasticsearch.action.termvectors;\n\nimport org.apache.logging.log4j.LogManager;\nimport org.apache.logging.log4j.Logger;\nimport org.elasticsearch.ElasticsearchException;\nimport org.elasticsearch.action.ActionListener;\nimport org.elasticsearch.action.ActionRequestValidationException;\nimport org.elasticsearch.action.ActionResponse;\nimport org.elasticsearch.action.ActionType;\nimport org.elasticsearch.action.NoShardAvailableActionException;\nimport org.elasticsearch.action.admin.indices.refresh.TransportShardRefreshAction;\nimport org.elasticsearch.action.support.ActionFilters;\nimport org.elasticsearch.action.support.ActiveShardCount;\nimport org.elasticsearch.action.support.replication.BasicReplicationRequest;\nimport org.elasticsearch.action.support.single.shard.SingleShardRequest;\nimport org.elasticsearch.action.support.single.shard.TransportSingleShardAction;\nimport org.elasticsearch.client.internal.node.NodeClient;\nimport org.elasticsearch.cluster.ProjectState;\nimport org.elasticsearch.cluster.metadata.IndexNameExpressionResolver;\nimport org.elasticsearch.cluster.node.DiscoveryNode;\nimport org.elasticsearch.cluster.node.DiscoveryNodeRole;\nimport org.elasticsearch.cluster.project.ProjectResolver;\nimport org.elasticsearch.cluster.routing.ShardIterator;\nimport org.elasticsearch.cluster.service.ClusterService;\nimport org.elasticsearch.common.io.stream.StreamInput;\nimport org.elasticsearch.common.io.stream.StreamOutput;\nimport org.elasticsearch.common.io.stream.Writeable;\nimport org.elasticsearch.index.IndexService;\nimport org.elasticsearch.index.mapper.Uid;\nimport org.elasticsearch.index.shard.IndexShard;\nimport org.elasticsearch.index.shard.ShardId;\nimport org.elasticsearch.indices.IndicesService;\nimport org.elasticsearch.injection.guice.Inject;\nimport org.elasticsearch.threadpool.ThreadPool;\nimport org.elasticsearch.transport.TransportService;\n\nimport java.io.IOException;\nimport java.util.List;\n\n/**\n * This action is used in serverless to ensure that documents are searchable on the search tier before processing\n * term vector requests. It is an intermediate action that is executed on the indexing node and responds\n * with a no-op (the search node can proceed to process the term vector request). The action may trigger an external refresh\n * to ensure the search shards are up to date before returning the no-op.\n */\npublic class TransportEnsureDocsSearchableAction extends TransportSingleShardAction<", - "repo_full_name": "elastic/elasticsearch", - "discussion_comments": [ - { - "comment_id": "2180043409", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 129902, - "pr_file": "server/src/main/java/org/elasticsearch/action/termvectors/TransportEnsureDocsSearchableAction.java", - "discussion_id": "2180043409", - "commented_code": "@@ -0,0 +1,224 @@\n+/*\n+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one\n+ * or more contributor license agreements. Licensed under the \"Elastic License\n+ * 2.0\", the \"GNU Affero General Public License v3.0 only\", and the \"Server Side\n+ * Public License v 1\"; you may not use this file except in compliance with, at\n+ * your election, the \"Elastic License 2.0\", the \"GNU Affero General Public\n+ * License v3.0 only\", or the \"Server Side Public License, v 1\".\n+ *\n+ * This file was contributed to by generative AI\n+ */\n+\n+package org.elasticsearch.action.termvectors;\n+\n+import org.apache.logging.log4j.LogManager;\n+import org.apache.logging.log4j.Logger;\n+import org.elasticsearch.ElasticsearchException;\n+import org.elasticsearch.action.ActionListener;\n+import org.elasticsearch.action.ActionRequestValidationException;\n+import org.elasticsearch.action.ActionResponse;\n+import org.elasticsearch.action.ActionType;\n+import org.elasticsearch.action.NoShardAvailableActionException;\n+import org.elasticsearch.action.admin.indices.refresh.TransportShardRefreshAction;\n+import org.elasticsearch.action.support.ActionFilters;\n+import org.elasticsearch.action.support.ActiveShardCount;\n+import org.elasticsearch.action.support.replication.BasicReplicationRequest;\n+import org.elasticsearch.action.support.single.shard.SingleShardRequest;\n+import org.elasticsearch.action.support.single.shard.TransportSingleShardAction;\n+import org.elasticsearch.client.internal.node.NodeClient;\n+import org.elasticsearch.cluster.ProjectState;\n+import org.elasticsearch.cluster.metadata.IndexNameExpressionResolver;\n+import org.elasticsearch.cluster.node.DiscoveryNode;\n+import org.elasticsearch.cluster.node.DiscoveryNodeRole;\n+import org.elasticsearch.cluster.project.ProjectResolver;\n+import org.elasticsearch.cluster.routing.ShardIterator;\n+import org.elasticsearch.cluster.service.ClusterService;\n+import org.elasticsearch.common.io.stream.StreamInput;\n+import org.elasticsearch.common.io.stream.StreamOutput;\n+import org.elasticsearch.common.io.stream.Writeable;\n+import org.elasticsearch.index.IndexService;\n+import org.elasticsearch.index.mapper.Uid;\n+import org.elasticsearch.index.shard.IndexShard;\n+import org.elasticsearch.index.shard.ShardId;\n+import org.elasticsearch.indices.IndicesService;\n+import org.elasticsearch.injection.guice.Inject;\n+import org.elasticsearch.threadpool.ThreadPool;\n+import org.elasticsearch.transport.TransportService;\n+\n+import java.io.IOException;\n+import java.util.List;\n+\n+/**\n+ * This action is used in serverless to ensure that documents are searchable on the search tier before processing\n+ * term vector requests. It is an intermediate action that is executed on the indexing node and responds\n+ * with a no-op (the search node can proceed to process the term vector request). The action may trigger an external refresh\n+ * to ensure the search shards are up to date before returning the no-op.\n+ */\n+public class TransportEnsureDocsSearchableAction extends TransportSingleShardAction<", - "comment_created_at": "2025-07-02T13:17:00+00:00", - "comment_author": "tlrx", - "comment_body": "Could we have only the ActionType and the request in `server`, and the rest lies into the serverless code base? Similarly to StatelessPrimaryRelocationAction?", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2177395073", - "pr_number": 130364, - "pr_file": "x-pack/plugin/deprecation/src/main/java/org/elasticsearch/xpack/deprecation/ClusterDeprecationChecker.java", - "created_at": "2025-07-01T11:51:17+00:00", - "commented_code": "import org.apache.logging.log4j.LogManager;\nimport org.apache.logging.log4j.Logger;\nimport org.elasticsearch.cluster.ClusterState;\nimport org.elasticsearch.common.TriConsumer;\nimport org.elasticsearch.xcontent.NamedXContentRegistry;\nimport org.elasticsearch.xpack.core.deprecation.DeprecationIssue;\nimport org.elasticsearch.xpack.core.transform.transforms.TransformConfig;\n\nimport java.io.IOException;\nimport java.util.ArrayList;\nimport java.util.List;\nimport java.util.function.BiConsumer;\n\n/**\n * Cluster-specific deprecation checks, this is used to populate the {@code cluster_settings} field\n */\npublic class ClusterDeprecationChecker {\n\n private static final Logger logger = LogManager.getLogger(ClusterDeprecationChecker.class);\n private final List, List>> CHECKS = List.of(\n this::checkTransformSettings\n );\n private final List, List>> CHECKS = List.of(this::checkTransformSettings);\n private final NamedXContentRegistry xContentRegistry;\n\n ClusterDeprecationChecker(NamedXContentRegistry xContentRegistry) {\n this.xContentRegistry = xContentRegistry;\n }\n\n public List check(ClusterState clusterState, List transformConfigs) {\n public List check(List transformConfigs) {\n List allIssues = new ArrayList<>();\n CHECKS.forEach(check -> check.apply(clusterState, transformConfigs, allIssues));\n CHECKS.forEach(check -> check.accept(transformConfigs, allIssues));", - "repo_full_name": "elastic/elasticsearch", - "discussion_comments": [ - { - "comment_id": "2177395073", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 130364, - "pr_file": "x-pack/plugin/deprecation/src/main/java/org/elasticsearch/xpack/deprecation/ClusterDeprecationChecker.java", - "discussion_id": "2177395073", - "commented_code": "@@ -9,42 +9,35 @@\n \n import org.apache.logging.log4j.LogManager;\n import org.apache.logging.log4j.Logger;\n-import org.elasticsearch.cluster.ClusterState;\n-import org.elasticsearch.common.TriConsumer;\n import org.elasticsearch.xcontent.NamedXContentRegistry;\n import org.elasticsearch.xpack.core.deprecation.DeprecationIssue;\n import org.elasticsearch.xpack.core.transform.transforms.TransformConfig;\n \n import java.io.IOException;\n import java.util.ArrayList;\n import java.util.List;\n+import java.util.function.BiConsumer;\n \n /**\n * Cluster-specific deprecation checks, this is used to populate the {@code cluster_settings} field\n */\n public class ClusterDeprecationChecker {\n \n private static final Logger logger = LogManager.getLogger(ClusterDeprecationChecker.class);\n- private final List, List>> CHECKS = List.of(\n- this::checkTransformSettings\n- );\n+ private final List, List>> CHECKS = List.of(this::checkTransformSettings);\n private final NamedXContentRegistry xContentRegistry;\n \n ClusterDeprecationChecker(NamedXContentRegistry xContentRegistry) {\n this.xContentRegistry = xContentRegistry;\n }\n \n- public List check(ClusterState clusterState, List transformConfigs) {\n+ public List check(List transformConfigs) {\n List allIssues = new ArrayList<>();\n- CHECKS.forEach(check -> check.apply(clusterState, transformConfigs, allIssues));\n+ CHECKS.forEach(check -> check.accept(transformConfigs, allIssues));", - "comment_created_at": "2025-07-01T11:51:17+00:00", - "comment_author": "PeteGillinElastic", - "comment_body": "Do we feel like the `CHECKS` abstraction is still pulling its weight here? There's one check hard-coded, we could just call `checkTransformSettings` directly, and it'd be more readable. If we needed to add more checks in the future, it's not clear to me that adding a method to the list would be better than adding another method call here. Plus, it's not clear to me that the `BiConsumer` would have the correct signature for a theoretical future check, either. (Presumably, at some point in the past there was a check which consumed `ClusterState`, hence the redundant parameter. A future check might need that, or might need the `ProjectMetadata`, but we can't really guess. now.)", - "pr_file_module": null - }, - { - "comment_id": "2177503438", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 130364, - "pr_file": "x-pack/plugin/deprecation/src/main/java/org/elasticsearch/xpack/deprecation/ClusterDeprecationChecker.java", - "discussion_id": "2177395073", - "commented_code": "@@ -9,42 +9,35 @@\n \n import org.apache.logging.log4j.LogManager;\n import org.apache.logging.log4j.Logger;\n-import org.elasticsearch.cluster.ClusterState;\n-import org.elasticsearch.common.TriConsumer;\n import org.elasticsearch.xcontent.NamedXContentRegistry;\n import org.elasticsearch.xpack.core.deprecation.DeprecationIssue;\n import org.elasticsearch.xpack.core.transform.transforms.TransformConfig;\n \n import java.io.IOException;\n import java.util.ArrayList;\n import java.util.List;\n+import java.util.function.BiConsumer;\n \n /**\n * Cluster-specific deprecation checks, this is used to populate the {@code cluster_settings} field\n */\n public class ClusterDeprecationChecker {\n \n private static final Logger logger = LogManager.getLogger(ClusterDeprecationChecker.class);\n- private final List, List>> CHECKS = List.of(\n- this::checkTransformSettings\n- );\n+ private final List, List>> CHECKS = List.of(this::checkTransformSettings);\n private final NamedXContentRegistry xContentRegistry;\n \n ClusterDeprecationChecker(NamedXContentRegistry xContentRegistry) {\n this.xContentRegistry = xContentRegistry;\n }\n \n- public List check(ClusterState clusterState, List transformConfigs) {\n+ public List check(List transformConfigs) {\n List allIssues = new ArrayList<>();\n- CHECKS.forEach(check -> check.apply(clusterState, transformConfigs, allIssues));\n+ CHECKS.forEach(check -> check.accept(transformConfigs, allIssues));", - "comment_created_at": "2025-07-01T12:39:04+00:00", - "comment_author": "nielsbauman", - "comment_body": "I hear what you're saying. My intuition was that this `CHECKS` field makes this class more consistent with the other deprecation checkers. But I don't have strong feelings, so I'm fine with calling that method directly.", - "pr_file_module": null - }, - { - "comment_id": "2177510862", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 130364, - "pr_file": "x-pack/plugin/deprecation/src/main/java/org/elasticsearch/xpack/deprecation/ClusterDeprecationChecker.java", - "discussion_id": "2177395073", - "commented_code": "@@ -9,42 +9,35 @@\n \n import org.apache.logging.log4j.LogManager;\n import org.apache.logging.log4j.Logger;\n-import org.elasticsearch.cluster.ClusterState;\n-import org.elasticsearch.common.TriConsumer;\n import org.elasticsearch.xcontent.NamedXContentRegistry;\n import org.elasticsearch.xpack.core.deprecation.DeprecationIssue;\n import org.elasticsearch.xpack.core.transform.transforms.TransformConfig;\n \n import java.io.IOException;\n import java.util.ArrayList;\n import java.util.List;\n+import java.util.function.BiConsumer;\n \n /**\n * Cluster-specific deprecation checks, this is used to populate the {@code cluster_settings} field\n */\n public class ClusterDeprecationChecker {\n \n private static final Logger logger = LogManager.getLogger(ClusterDeprecationChecker.class);\n- private final List, List>> CHECKS = List.of(\n- this::checkTransformSettings\n- );\n+ private final List, List>> CHECKS = List.of(this::checkTransformSettings);\n private final NamedXContentRegistry xContentRegistry;\n \n ClusterDeprecationChecker(NamedXContentRegistry xContentRegistry) {\n this.xContentRegistry = xContentRegistry;\n }\n \n- public List check(ClusterState clusterState, List transformConfigs) {\n+ public List check(List transformConfigs) {\n List allIssues = new ArrayList<>();\n- CHECKS.forEach(check -> check.apply(clusterState, transformConfigs, allIssues));\n+ CHECKS.forEach(check -> check.accept(transformConfigs, allIssues));", - "comment_created_at": "2025-07-01T12:42:49+00:00", - "comment_author": "nielsbauman", - "comment_body": "On second thought, I think I agree with you that the `CHECKS` abstraction isn't super valuable, and that goes for the other classes too. I don't think it's worth opening a PR for those other classes, but perhaps I'll remember it if I ever need to touch this code again.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2179784624", - "pr_number": 130131, - "pr_file": "plugins/discovery-ec2/src/test/java/org/elasticsearch/discovery/ec2/EC2RetriesTests.java", - "created_at": "2025-07-02T11:12:54+00:00", - "commented_code": "new NoneCircuitBreakerService(),\n new SharedGroupFactory(Settings.EMPTY)\n ),\n threadPool,\n TransportService.NOOP_TRANSPORT_INTERCEPTOR,", - "repo_full_name": "elastic/elasticsearch", - "discussion_comments": [ - { - "comment_id": "2179784624", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 130131, - "pr_file": "plugins/discovery-ec2/src/test/java/org/elasticsearch/discovery/ec2/EC2RetriesTests.java", - "discussion_id": "2179784624", - "commented_code": "@@ -59,9 +59,7 @@ protected MockTransportService createTransportService() {\n new NoneCircuitBreakerService(),\n new SharedGroupFactory(Settings.EMPTY)\n ),\n- threadPool,\n- TransportService.NOOP_TRANSPORT_INTERCEPTOR,", - "comment_created_at": "2025-07-02T11:12:54+00:00", - "comment_author": "GalLalouche", - "comment_body": "It seemed all clients passed the same `Settings.EMPTY`, interceptor, and `null` in the `clusterSettings` to this constructor (refactored to a factory method), so I've simplified the method by removing those parameters.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2156561649", - "pr_number": 129633, - "pr_file": "x-pack/plugin/esql/compute/gen/src/main/java/org/elasticsearch/compute/gen/GroupingAggregatorImplementer.java", - "created_at": "2025-06-19T09:26:33+00:00", - "commented_code": "}\n builder.beginControlFlow(\"for (int groupPosition = 0; groupPosition < groups.getPositionCount(); groupPosition++)\");\n {", - "repo_full_name": "elastic/elasticsearch", - "discussion_comments": [ - { - "comment_id": "2156561649", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 129633, - "pr_file": "x-pack/plugin/esql/compute/gen/src/main/java/org/elasticsearch/compute/gen/GroupingAggregatorImplementer.java", - "discussion_id": "2156561649", - "commented_code": "@@ -610,7 +611,18 @@ private MethodSpec addIntermediateInput() {\n }\n builder.beginControlFlow(\"for (int groupPosition = 0; groupPosition < groups.getPositionCount(); groupPosition++)\");\n {", - "comment_created_at": "2025-06-19T09:26:33+00:00", - "comment_author": "Copilot", - "comment_body": "[nitpick] The addIntermediateInput method contains duplicated logic for handling block types versus non-block types; consider refactoring to extract the common behavior and reduce code duplication.\n```suggestion\n {\n String groupIdAssignment;\n```", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2163237789", - "pr_number": 129684, - "pr_file": "server/src/main/java/org/elasticsearch/index/codec/vectors/es818/ES818BinaryQuantizedVectorsFormat.java", - "created_at": "2025-06-24T08:03:09+00:00", - "commented_code": "*
    • The sparse vector information, if required, mapping vector ordinal to doc ID\n *
    \n */\n@SuppressForbidden(", - "repo_full_name": "elastic/elasticsearch", - "discussion_comments": [ - { - "comment_id": "2163237789", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 129684, - "pr_file": "server/src/main/java/org/elasticsearch/index/codec/vectors/es818/ES818BinaryQuantizedVectorsFormat.java", - "discussion_id": "2163237789", - "commented_code": "@@ -85,8 +86,10 @@\n *
  • The sparse vector information, if required, mapping vector ordinal to doc ID\n *
\n */\n+@SuppressForbidden(", - "comment_created_at": "2025-06-24T08:03:09+00:00", - "comment_author": "mosche", - "comment_body": "could you extract a little helper and suppress there, so we don't accidentally suppress more than intended", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2163241519", - "pr_number": 129684, - "pr_file": "server/src/main/java/org/elasticsearch/search/SearchService.java", - "created_at": "2025-06-24T08:05:06+00:00", - "commented_code": "* @param threadPool with context where to write the new header\n * @return the wrapped action listener\n */\n @SuppressForbidden(\n reason = \"TODO Deprecate any lenient usage of Boolean#parseBoolean https://github.com/elastic/elasticsearch/issues/128993\"\n )", - "repo_full_name": "elastic/elasticsearch", - "discussion_comments": [ - { - "comment_id": "2163241519", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 129684, - "pr_file": "server/src/main/java/org/elasticsearch/search/SearchService.java", - "discussion_id": "2163241519", - "commented_code": "@@ -589,6 +590,9 @@ protected void doClose() {\n * @param threadPool with context where to write the new header\n * @return the wrapped action listener\n */\n+ @SuppressForbidden(\n+ reason = \"TODO Deprecate any lenient usage of Boolean#parseBoolean https://github.com/elastic/elasticsearch/issues/128993\"\n+ )", - "comment_created_at": "2025-06-24T08:05:06+00:00", - "comment_author": "mosche", - "comment_body": "could you extract a little helper and suppress there, so we don't accidentally suppress more than intended", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2163296345", - "pr_number": 129684, - "pr_file": "modules/transport-netty4/src/main/java/org/elasticsearch/transport/netty4/NettyAllocator.java", - "created_at": "2025-06-24T08:29:59+00:00", - "commented_code": "import org.elasticsearch.common.util.PageCacheRecycler;\nimport org.elasticsearch.core.Assertions;\nimport org.elasticsearch.core.Booleans;\nimport org.elasticsearch.core.SuppressForbidden;\nimport org.elasticsearch.monitor.jvm.JvmInfo;\n\nimport java.util.Arrays;\nimport java.util.concurrent.atomic.AtomicBoolean;\n\n@SuppressForbidden(\n reason = \"TODO Deprecate any lenient usage of Boolean#parseBoolean https://github.com/elastic/elasticsearch/issues/128993\"\n)", - "repo_full_name": "elastic/elasticsearch", - "discussion_comments": [ - { - "comment_id": "2163296345", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 129684, - "pr_file": "modules/transport-netty4/src/main/java/org/elasticsearch/transport/netty4/NettyAllocator.java", - "discussion_id": "2163296345", - "commented_code": "@@ -26,11 +26,15 @@\n import org.elasticsearch.common.util.PageCacheRecycler;\n import org.elasticsearch.core.Assertions;\n import org.elasticsearch.core.Booleans;\n+import org.elasticsearch.core.SuppressForbidden;\n import org.elasticsearch.monitor.jvm.JvmInfo;\n \n import java.util.Arrays;\n import java.util.concurrent.atomic.AtomicBoolean;\n \n+@SuppressForbidden(\n+ reason = \"TODO Deprecate any lenient usage of Boolean#parseBoolean https://github.com/elastic/elasticsearch/issues/128993\"\n+)", - "comment_created_at": "2025-06-24T08:29:59+00:00", - "comment_author": "mosche", - "comment_body": "could you extract a little helper and suppress there, so we don't accidentally suppress more than intended", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2163301148", - "pr_number": 129684, - "pr_file": "server/src/main/java/org/elasticsearch/common/network/NetworkUtils.java", - "created_at": "2025-06-24T08:31:25+00:00", - "commented_code": "* Utilities for network interfaces / addresses binding and publishing.\n * Its only intended for that purpose, not general purpose usage!!!!\n */\n@SuppressForbidden(\n reason = \"TODO Deprecate any lenient usage of Boolean#parseBoolean https://github.com/elastic/elasticsearch/issues/128993\"\n)", - "repo_full_name": "elastic/elasticsearch", - "discussion_comments": [ - { - "comment_id": "2163301148", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 129684, - "pr_file": "server/src/main/java/org/elasticsearch/common/network/NetworkUtils.java", - "discussion_id": "2163301148", - "commented_code": "@@ -32,6 +33,9 @@\n * Utilities for network interfaces / addresses binding and publishing.\n * Its only intended for that purpose, not general purpose usage!!!!\n */\n+@SuppressForbidden(\n+ reason = \"TODO Deprecate any lenient usage of Boolean#parseBoolean https://github.com/elastic/elasticsearch/issues/128993\"\n+)", - "comment_created_at": "2025-06-24T08:31:25+00:00", - "comment_author": "mosche", - "comment_body": "could you extract a little helper and suppress there, so we don't accidentally suppress more than intended", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2163302186", - "pr_number": 129684, - "pr_file": "server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormat.java", - "created_at": "2025-06-24T08:31:54+00:00", - "commented_code": "* cpu resources.\n * \n */\n@SuppressForbidden(\n reason = \"TODO Deprecate any lenient usage of Boolean#parseBoolean https://github.com/elastic/elasticsearch/issues/128993\"\n)", - "repo_full_name": "elastic/elasticsearch", - "discussion_comments": [ - { - "comment_id": "2163302186", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 129684, - "pr_file": "server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormat.java", - "discussion_id": "2163302186", - "commented_code": "@@ -28,6 +29,9 @@\n * cpu resources.\n * \n */\n+@SuppressForbidden(\n+ reason = \"TODO Deprecate any lenient usage of Boolean#parseBoolean https://github.com/elastic/elasticsearch/issues/128993\"\n+)", - "comment_created_at": "2025-06-24T08:31:54+00:00", - "comment_author": "mosche", - "comment_body": "could you extract a little helper and suppress there, so we don't accidentally suppress more than intended", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2171122552", - "pr_number": 129684, - "pr_file": "modules/lang-painless/src/main/java/org/elasticsearch/painless/PainlessScriptEngine.java", - "created_at": "2025-06-27T08:00:12+00:00", - "commented_code": "}\n }\n\n @SuppressForbidden(\n reason = \"TODO Deprecate any lenient usage of Boolean#parseBoolean https://github.com/elastic/elasticsearch/issues/128993\"\n )", - "repo_full_name": "elastic/elasticsearch", - "discussion_comments": [ - { - "comment_id": "2171122552", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 129684, - "pr_file": "modules/lang-painless/src/main/java/org/elasticsearch/painless/PainlessScriptEngine.java", - "discussion_id": "2171122552", - "commented_code": "@@ -391,6 +392,9 @@ ScriptScope compile(Compiler compiler, Loader loader, String scriptName, String\n }\n }\n \n+ @SuppressForbidden(\n+ reason = \"TODO Deprecate any lenient usage of Boolean#parseBoolean https://github.com/elastic/elasticsearch/issues/128993\"\n+ )", - "comment_created_at": "2025-06-27T08:00:12+00:00", - "comment_author": "mosche", - "comment_body": "please also extract a small helper here to minimize the scope of @SuppressForbidden", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2171175467", - "pr_number": 129684, - "pr_file": "x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/inference/trainedmodel/PredictionFieldType.java", - "created_at": "2025-06-27T08:17:14+00:00", - "commented_code": "return name().toLowerCase(Locale.ROOT);\n }\n\n @SuppressForbidden(reason = \"accept lenient boolean field values\")", - "repo_full_name": "elastic/elasticsearch", - "discussion_comments": [ - { - "comment_id": "2171175467", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 129684, - "pr_file": "x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/inference/trainedmodel/PredictionFieldType.java", - "discussion_id": "2171175467", - "commented_code": "@@ -45,6 +46,7 @@ public String toString() {\n return name().toLowerCase(Locale.ROOT);\n }\n \n+ @SuppressForbidden(reason = \"accept lenient boolean field values\")", - "comment_created_at": "2025-06-27T08:17:14+00:00", - "comment_author": "mosche", - "comment_body": "please also extract a small helper here to minimize the scope of @SuppressForbidden", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1646778366", - "pr_number": 109949, - "pr_file": "server/src/main/java/org/elasticsearch/action/bulk/TransportShardBulkAction.java", - "created_at": "2024-06-20T00:22:25+00:00", - "commented_code": "request.getAutoGeneratedTimestamp(),\n request.isRetry()\n );\n\n if (result.getResultType() == Engine.Result.Type.MAPPING_UPDATE_REQUIRED) {\n return handleMappingUpdateRequired(\n context,\n mappingUpdater,\n waitForMappingUpdate,\n itemDoneListener,\n result,\n primary,\n version,\n updateResult\n );\n }\n }\n if (result.getResultType() == Engine.Result.Type.MAPPING_UPDATE_REQUIRED) {\n\n try {\n Optional mergedSource = Optional.ofNullable(\n primary.mapperService()\n .merge(\n MapperService.SINGLE_MAPPING_NAME,\n new CompressedXContent(result.getRequiredMappingUpdate()),\n MapperService.MergeReason.MAPPING_AUTO_UPDATE_PREFLIGHT\n )\n ).map(DocumentMapper::mappingSource);\n Optional previousSource = Optional.ofNullable(primary.mapperService().documentMapper())\n .map(DocumentMapper::mappingSource);\n onComplete(result, context, updateResult);\n return true;\n }\n\n if (mergedSource.equals(previousSource)) {\n context.resetForNoopMappingUpdateRetry(primary.mapperService().mappingVersion());\n return true;\n }\n } catch (Exception e) {\n logger.info(() -> format(\"%s mapping update rejected by primary\", primary.shardId()), e);\n assert result.getId() != null;\n onComplete(exceptionToResult(e, primary, isDelete, version, result.getId()), context, updateResult);\n private static boolean handleMappingUpdateRequired(", - "repo_full_name": "elastic/elasticsearch", - "discussion_comments": [ - { - "comment_id": "1646778366", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 109949, - "pr_file": "server/src/main/java/org/elasticsearch/action/bulk/TransportShardBulkAction.java", - "discussion_id": "1646778366", - "commented_code": "@@ -379,64 +376,88 @@ static boolean executeBulkItemRequest(\n request.getAutoGeneratedTimestamp(),\n request.isRetry()\n );\n-\n+ if (result.getResultType() == Engine.Result.Type.MAPPING_UPDATE_REQUIRED) {\n+ return handleMappingUpdateRequired(\n+ context,\n+ mappingUpdater,\n+ waitForMappingUpdate,\n+ itemDoneListener,\n+ result,\n+ primary,\n+ version,\n+ updateResult\n+ );\n+ }\n }\n- if (result.getResultType() == Engine.Result.Type.MAPPING_UPDATE_REQUIRED) {\n \n- try {\n- Optional mergedSource = Optional.ofNullable(\n- primary.mapperService()\n- .merge(\n- MapperService.SINGLE_MAPPING_NAME,\n- new CompressedXContent(result.getRequiredMappingUpdate()),\n- MapperService.MergeReason.MAPPING_AUTO_UPDATE_PREFLIGHT\n- )\n- ).map(DocumentMapper::mappingSource);\n- Optional previousSource = Optional.ofNullable(primary.mapperService().documentMapper())\n- .map(DocumentMapper::mappingSource);\n+ onComplete(result, context, updateResult);\n+ return true;\n+ }\n \n- if (mergedSource.equals(previousSource)) {\n- context.resetForNoopMappingUpdateRetry(primary.mapperService().mappingVersion());\n- return true;\n- }\n- } catch (Exception e) {\n- logger.info(() -> format(\"%s mapping update rejected by primary\", primary.shardId()), e);\n- assert result.getId() != null;\n- onComplete(exceptionToResult(e, primary, isDelete, version, result.getId()), context, updateResult);\n+ private static boolean handleMappingUpdateRequired(", - "comment_created_at": "2024-06-20T00:22:25+00:00", - "comment_author": "original-brownbear", - "comment_body": "Just extracted this into its own method now that it got so big and removed the `Optional` use which was confusing and unnecessary. Sorry for mixing fix + refactoring a little here, hope it's not too confusing. Otherwise I can make this into a 2 parter.", - "pr_file_module": null - }, - { - "comment_id": "1647324269", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 109949, - "pr_file": "server/src/main/java/org/elasticsearch/action/bulk/TransportShardBulkAction.java", - "discussion_id": "1646778366", - "commented_code": "@@ -379,64 +376,88 @@ static boolean executeBulkItemRequest(\n request.getAutoGeneratedTimestamp(),\n request.isRetry()\n );\n-\n+ if (result.getResultType() == Engine.Result.Type.MAPPING_UPDATE_REQUIRED) {\n+ return handleMappingUpdateRequired(\n+ context,\n+ mappingUpdater,\n+ waitForMappingUpdate,\n+ itemDoneListener,\n+ result,\n+ primary,\n+ version,\n+ updateResult\n+ );\n+ }\n }\n- if (result.getResultType() == Engine.Result.Type.MAPPING_UPDATE_REQUIRED) {\n \n- try {\n- Optional mergedSource = Optional.ofNullable(\n- primary.mapperService()\n- .merge(\n- MapperService.SINGLE_MAPPING_NAME,\n- new CompressedXContent(result.getRequiredMappingUpdate()),\n- MapperService.MergeReason.MAPPING_AUTO_UPDATE_PREFLIGHT\n- )\n- ).map(DocumentMapper::mappingSource);\n- Optional previousSource = Optional.ofNullable(primary.mapperService().documentMapper())\n- .map(DocumentMapper::mappingSource);\n+ onComplete(result, context, updateResult);\n+ return true;\n+ }\n \n- if (mergedSource.equals(previousSource)) {\n- context.resetForNoopMappingUpdateRetry(primary.mapperService().mappingVersion());\n- return true;\n- }\n- } catch (Exception e) {\n- logger.info(() -> format(\"%s mapping update rejected by primary\", primary.shardId()), e);\n- assert result.getId() != null;\n- onComplete(exceptionToResult(e, primary, isDelete, version, result.getId()), context, updateResult);\n+ private static boolean handleMappingUpdateRequired(", - "comment_created_at": "2024-06-20T10:02:50+00:00", - "comment_author": "javanna", - "comment_body": "given how delicate things are, I think it would be great to split this in two: refactoring + actual change, as a safety measure. I could definitely miss things otherwise while reviewing this as-is.", - "pr_file_module": null - }, - { - "comment_id": "1647381507", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 109949, - "pr_file": "server/src/main/java/org/elasticsearch/action/bulk/TransportShardBulkAction.java", - "discussion_id": "1646778366", - "commented_code": "@@ -379,64 +376,88 @@ static boolean executeBulkItemRequest(\n request.getAutoGeneratedTimestamp(),\n request.isRetry()\n );\n-\n+ if (result.getResultType() == Engine.Result.Type.MAPPING_UPDATE_REQUIRED) {\n+ return handleMappingUpdateRequired(\n+ context,\n+ mappingUpdater,\n+ waitForMappingUpdate,\n+ itemDoneListener,\n+ result,\n+ primary,\n+ version,\n+ updateResult\n+ );\n+ }\n }\n- if (result.getResultType() == Engine.Result.Type.MAPPING_UPDATE_REQUIRED) {\n \n- try {\n- Optional mergedSource = Optional.ofNullable(\n- primary.mapperService()\n- .merge(\n- MapperService.SINGLE_MAPPING_NAME,\n- new CompressedXContent(result.getRequiredMappingUpdate()),\n- MapperService.MergeReason.MAPPING_AUTO_UPDATE_PREFLIGHT\n- )\n- ).map(DocumentMapper::mappingSource);\n- Optional previousSource = Optional.ofNullable(primary.mapperService().documentMapper())\n- .map(DocumentMapper::mappingSource);\n+ onComplete(result, context, updateResult);\n+ return true;\n+ }\n \n- if (mergedSource.equals(previousSource)) {\n- context.resetForNoopMappingUpdateRetry(primary.mapperService().mappingVersion());\n- return true;\n- }\n- } catch (Exception e) {\n- logger.info(() -> format(\"%s mapping update rejected by primary\", primary.shardId()), e);\n- assert result.getId() != null;\n- onComplete(exceptionToResult(e, primary, isDelete, version, result.getId()), context, updateResult);\n+ private static boolean handleMappingUpdateRequired(", - "comment_created_at": "2024-06-20T10:49:43+00:00", - "comment_author": "original-brownbear", - "comment_body": "Agreed :) Here you go https://github.com/elastic/elasticsearch/pull/109961 :)", - "pr_file_module": null - }, - { - "comment_id": "1648632335", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 109949, - "pr_file": "server/src/main/java/org/elasticsearch/action/bulk/TransportShardBulkAction.java", - "discussion_id": "1646778366", - "commented_code": "@@ -379,64 +376,88 @@ static boolean executeBulkItemRequest(\n request.getAutoGeneratedTimestamp(),\n request.isRetry()\n );\n-\n+ if (result.getResultType() == Engine.Result.Type.MAPPING_UPDATE_REQUIRED) {\n+ return handleMappingUpdateRequired(\n+ context,\n+ mappingUpdater,\n+ waitForMappingUpdate,\n+ itemDoneListener,\n+ result,\n+ primary,\n+ version,\n+ updateResult\n+ );\n+ }\n }\n- if (result.getResultType() == Engine.Result.Type.MAPPING_UPDATE_REQUIRED) {\n \n- try {\n- Optional mergedSource = Optional.ofNullable(\n- primary.mapperService()\n- .merge(\n- MapperService.SINGLE_MAPPING_NAME,\n- new CompressedXContent(result.getRequiredMappingUpdate()),\n- MapperService.MergeReason.MAPPING_AUTO_UPDATE_PREFLIGHT\n- )\n- ).map(DocumentMapper::mappingSource);\n- Optional previousSource = Optional.ofNullable(primary.mapperService().documentMapper())\n- .map(DocumentMapper::mappingSource);\n+ onComplete(result, context, updateResult);\n+ return true;\n+ }\n \n- if (mergedSource.equals(previousSource)) {\n- context.resetForNoopMappingUpdateRetry(primary.mapperService().mappingVersion());\n- return true;\n- }\n- } catch (Exception e) {\n- logger.info(() -> format(\"%s mapping update rejected by primary\", primary.shardId()), e);\n- assert result.getId() != null;\n- onComplete(exceptionToResult(e, primary, isDelete, version, result.getId()), context, updateResult);\n+ private static boolean handleMappingUpdateRequired(", - "comment_created_at": "2024-06-21T08:45:01+00:00", - "comment_author": "javanna", - "comment_body": "thanks!", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "908872960", - "pr_number": 87810, - "pr_file": "x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/optimizer/Optimizer.java", - "created_at": "2022-06-28T19:37:01+00:00", - "commented_code": "}\n\n public static class ConvertTypes extends OptimizerExpressionRule {\n\n public ConvertTypes() {\n super(TransformDirection.DOWN);\n }\n\n @Override\n public Expression rule(ScalarFunction e) {\n if (e instanceof BinaryComparison predicate && predicate.right().foldable()) {\n return convertBinaryComparison(predicate);\n } else if (e instanceof Range range) {\n return convertRange(range);\n } else if (e instanceof In in) {\n return convertIn(in);\n }\n return e;\n }\n\n private Expression convertIn(In in) {\n if (in.list().isEmpty()) {\n return in;\n }\n List newRight = new ArrayList<>();\n List additionalConditions = new ArrayList<>();\n DataType type = in.value().dataType();\n boolean converted = false;\n for (Expression exp : in.list()) {\n if (type == exp.dataType() || exp.foldable() == false || DataTypes.areCompatible(type, exp.dataType())) {\n newRight.add(exp);\n } else {\n converted = true;\n Object foldedValue = exp.fold();\n if (DataTypes.isDateTime(type) && DataTypes.isString(exp.dataType())) {\n try {\n // try date math first\n Tuple range = DateFieldMapper.DateFieldType.toTimestampRange(\n foldedValue,\n foldedValue,\n true,\n true,\n ZoneOffset.UTC,\n DateUtils.DATE_MATH_PARSER,\n DateFieldMapper.Resolution.MILLISECONDS,\n () -> System.currentTimeMillis()\n );\n if (range.v1() < range.v2()) {\n /*\n treat it as a date range\n eg. the following\n something IN (, , )\n becomes\n something BETWEEN AND OR someting IN (, )\n */\n additionalConditions.add(\n new Range(\n in.source(),\n in.value(),\n tryCast(exp, range.v1(), type),\n true,\n tryCast(exp, range.v2(), type),\n true,\n in.zoneId()\n )\n );\n } else {\n newRight.add(tryCast(exp, foldedValue, DataTypes.DATETIME));\n }\n } catch (ElasticsearchParseException | DateTimeParseException e) {\n // the date parsing just failed, use the original expression\n newRight.add(exp);\n }\n } else {\n newRight.add(tryCast(exp, foldedValue, type));\n }\n }\n }\n if (converted == false) {\n return in;\n }\n if (newRight.size() == 1) {\n additionalConditions.add(new Equals(in.source(), in.value(), newRight.remove(0)));\n }\n\n if (newRight.size() > 0 && additionalConditions.size() > 0) {\n return new Or(\n in.source(),\n new In(in.source(), in.value(), newRight, in.zoneId()),\n toOrTree(in.source(), additionalConditions)\n );\n } else if (newRight.size() > 0) {\n return new In(in.source(), in.value(), newRight, in.zoneId());\n } else if (additionalConditions.size() > 0) {\n return toOrTree(in.source(), additionalConditions);\n }\n\n return in;\n }\n\n private Expression toOrTree(Source source, List items) {\n if (items.size() == 1) {\n return items.get(0);\n }\n if (items.size() == 2) {\n return new Or(source, items.get(0), items.get(1));\n }\n\n int half = items.size() / 2;\n return new Or(source, toOrTree(source, items.subList(0, half)), toOrTree(source, items.subList(half, items.size())));\n }\n\n private Expression convertBinaryComparison(BinaryComparison predicate) {\n Object rightValue = predicate.right().fold();\n if (rightValue == null && predicate instanceof NullEquals) {\n return new IsNull(predicate.source(), predicate.left());\n } else if (rightValue != null) {\n try {\n if (DataTypes.isDateTime(predicate.left().dataType()) && DataTypes.isString(predicate.right().dataType())) {", - "repo_full_name": "elastic/elasticsearch", - "discussion_comments": [ - { - "comment_id": "908872960", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 87810, - "pr_file": "x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/optimizer/Optimizer.java", - "discussion_id": "908872960", - "commented_code": "@@ -1266,6 +1284,241 @@ private boolean foldable(Expression e) {\n \n }\n \n+ public static class ConvertTypes extends OptimizerExpressionRule {\n+\n+ public ConvertTypes() {\n+ super(TransformDirection.DOWN);\n+ }\n+\n+ @Override\n+ public Expression rule(ScalarFunction e) {\n+ if (e instanceof BinaryComparison predicate && predicate.right().foldable()) {\n+ return convertBinaryComparison(predicate);\n+ } else if (e instanceof Range range) {\n+ return convertRange(range);\n+ } else if (e instanceof In in) {\n+ return convertIn(in);\n+ }\n+ return e;\n+ }\n+\n+ private Expression convertIn(In in) {\n+ if (in.list().isEmpty()) {\n+ return in;\n+ }\n+ List newRight = new ArrayList<>();\n+ List additionalConditions = new ArrayList<>();\n+ DataType type = in.value().dataType();\n+ boolean converted = false;\n+ for (Expression exp : in.list()) {\n+ if (type == exp.dataType() || exp.foldable() == false || DataTypes.areCompatible(type, exp.dataType())) {\n+ newRight.add(exp);\n+ } else {\n+ converted = true;\n+ Object foldedValue = exp.fold();\n+ if (DataTypes.isDateTime(type) && DataTypes.isString(exp.dataType())) {\n+ try {\n+ // try date math first\n+ Tuple range = DateFieldMapper.DateFieldType.toTimestampRange(\n+ foldedValue,\n+ foldedValue,\n+ true,\n+ true,\n+ ZoneOffset.UTC,\n+ DateUtils.DATE_MATH_PARSER,\n+ DateFieldMapper.Resolution.MILLISECONDS,\n+ () -> System.currentTimeMillis()\n+ );\n+ if (range.v1() < range.v2()) {\n+ /*\n+ treat it as a date range\n+ eg. the following\n+ something IN (, , )\n+ becomes\n+ something BETWEEN AND OR someting IN (, )\n+ */\n+ additionalConditions.add(\n+ new Range(\n+ in.source(),\n+ in.value(),\n+ tryCast(exp, range.v1(), type),\n+ true,\n+ tryCast(exp, range.v2(), type),\n+ true,\n+ in.zoneId()\n+ )\n+ );\n+ } else {\n+ newRight.add(tryCast(exp, foldedValue, DataTypes.DATETIME));\n+ }\n+ } catch (ElasticsearchParseException | DateTimeParseException e) {\n+ // the date parsing just failed, use the original expression\n+ newRight.add(exp);\n+ }\n+ } else {\n+ newRight.add(tryCast(exp, foldedValue, type));\n+ }\n+ }\n+ }\n+ if (converted == false) {\n+ return in;\n+ }\n+ if (newRight.size() == 1) {\n+ additionalConditions.add(new Equals(in.source(), in.value(), newRight.remove(0)));\n+ }\n+\n+ if (newRight.size() > 0 && additionalConditions.size() > 0) {\n+ return new Or(\n+ in.source(),\n+ new In(in.source(), in.value(), newRight, in.zoneId()),\n+ toOrTree(in.source(), additionalConditions)\n+ );\n+ } else if (newRight.size() > 0) {\n+ return new In(in.source(), in.value(), newRight, in.zoneId());\n+ } else if (additionalConditions.size() > 0) {\n+ return toOrTree(in.source(), additionalConditions);\n+ }\n+\n+ return in;\n+ }\n+\n+ private Expression toOrTree(Source source, List items) {\n+ if (items.size() == 1) {\n+ return items.get(0);\n+ }\n+ if (items.size() == 2) {\n+ return new Or(source, items.get(0), items.get(1));\n+ }\n+\n+ int half = items.size() / 2;\n+ return new Or(source, toOrTree(source, items.subList(0, half)), toOrTree(source, items.subList(half, items.size())));\n+ }\n+\n+ private Expression convertBinaryComparison(BinaryComparison predicate) {\n+ Object rightValue = predicate.right().fold();\n+ if (rightValue == null && predicate instanceof NullEquals) {\n+ return new IsNull(predicate.source(), predicate.left());\n+ } else if (rightValue != null) {\n+ try {\n+ if (DataTypes.isDateTime(predicate.left().dataType()) && DataTypes.isString(predicate.right().dataType())) {", - "comment_created_at": "2022-06-28T19:37:01+00:00", - "comment_author": "bpintea", - "comment_body": "Might be worth extracting this key check into a function? (some `couldBeTimestampRange(DataType, DataType)`?)", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/elasticsearch-measure-before-optimizing-performance.json b/_reviewers/elasticsearch-measure-before-optimizing-performance.json new file mode 100644 index 0000000..993f4bd --- /dev/null +++ b/_reviewers/elasticsearch-measure-before-optimizing-performance.json @@ -0,0 +1,314 @@ +[ + { + "discussion_id": "2166503674", + "pr_number": 129969, + "pr_file": "server/src/main/java/org/elasticsearch/index/engine/InternalEngine.java", + "created_at": "2025-06-25T11:38:51+00:00", + "commented_code": "iwc.setMaxFullFlushMergeWaitMillis(-1);\n iwc.setSimilarity(engineConfig.getSimilarity());\n iwc.setRAMBufferSizeMB(engineConfig.getIndexingBufferSize().getMbFrac());\n iwc.setCodec(engineConfig.getCodec());\n iwc.setCodec(new TrackingPostingsInMemoryBytesCodec(engineConfig.getCodec()));", + "repo_full_name": "elastic/elasticsearch", + "discussion_comments": [ + { + "comment_id": "2166503674", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 129969, + "pr_file": "server/src/main/java/org/elasticsearch/index/engine/InternalEngine.java", + "discussion_id": "2166503674", + "commented_code": "@@ -2778,7 +2779,7 @@ private IndexWriterConfig getIndexWriterConfig() {\n iwc.setMaxFullFlushMergeWaitMillis(-1);\n iwc.setSimilarity(engineConfig.getSimilarity());\n iwc.setRAMBufferSizeMB(engineConfig.getIndexingBufferSize().getMbFrac());\n- iwc.setCodec(engineConfig.getCodec());\n+ iwc.setCodec(new TrackingPostingsInMemoryBytesCodec(engineConfig.getCodec()));", + "comment_created_at": "2025-06-25T11:38:51+00:00", + "comment_author": "martijnvg", + "comment_body": "I wonder what the overhead is of always wrapping the codec in `TrackingPostingsInMemoryBytesCodec`. Maybe let's quickly run benchmark? (elastic/logs?)\r\n\r\nAdditionally I wonder whether this should only be done for stateless only.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2168541968", + "pr_number": 129997, + "pr_file": "server/src/main/java/org/elasticsearch/index/mapper/DocumentParser.java", + "created_at": "2025-06-26T08:47:27+00:00", + "commented_code": "XContentParser.Token token;\n XContentParser.Token previousToken = parser.currentToken();\n int elements = 0;\n int countArray = 0;\n long nestedDocsLimit = context.indexSettings().getMappingNestedDocsLimit();\n while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) {\n if (token == XContentParser.Token.START_OBJECT) {\n if (countArray++ >= nestedDocsLimit) {", + "repo_full_name": "elastic/elasticsearch", + "discussion_comments": [ + { + "comment_id": "2168541968", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 129997, + "pr_file": "server/src/main/java/org/elasticsearch/index/mapper/DocumentParser.java", + "discussion_id": "2168541968", + "commented_code": "@@ -749,8 +749,20 @@ private static void parseNonDynamicArray(\n XContentParser.Token token;\n XContentParser.Token previousToken = parser.currentToken();\n int elements = 0;\n+ int countArray = 0;\n+ long nestedDocsLimit = context.indexSettings().getMappingNestedDocsLimit();\n while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) {\n if (token == XContentParser.Token.START_OBJECT) {\n+ if (countArray++ >= nestedDocsLimit) {", + "comment_created_at": "2025-06-26T08:47:27+00:00", + "comment_author": "javanna", + "comment_body": "I worry that this will be a performance hit when parsing documents perhaps. We should have micro benchmarks for document parsing under ./benchmarks. It is probably worth double checking the impact of this change.", + "pr_file_module": null + }, + { + "comment_id": "2168594099", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 129997, + "pr_file": "server/src/main/java/org/elasticsearch/index/mapper/DocumentParser.java", + "discussion_id": "2168541968", + "commented_code": "@@ -749,8 +749,20 @@ private static void parseNonDynamicArray(\n XContentParser.Token token;\n XContentParser.Token previousToken = parser.currentToken();\n int elements = 0;\n+ int countArray = 0;\n+ long nestedDocsLimit = context.indexSettings().getMappingNestedDocsLimit();\n while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) {\n if (token == XContentParser.Token.START_OBJECT) {\n+ if (countArray++ >= nestedDocsLimit) {", + "comment_created_at": "2025-06-26T09:12:03+00:00", + "comment_author": "drempapis", + "comment_body": ">We should have micro benchmarks for document parsing under ./benchmarks.\r\n\r\nWhat’s the recommended way to micro-benchmark this functionality", + "pr_file_module": null + }, + { + "comment_id": "2172680459", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 129997, + "pr_file": "server/src/main/java/org/elasticsearch/index/mapper/DocumentParser.java", + "discussion_id": "2168541968", + "commented_code": "@@ -749,8 +749,20 @@ private static void parseNonDynamicArray(\n XContentParser.Token token;\n XContentParser.Token previousToken = parser.currentToken();\n int elements = 0;\n+ int countArray = 0;\n+ long nestedDocsLimit = context.indexSettings().getMappingNestedDocsLimit();\n while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) {\n if (token == XContentParser.Token.START_OBJECT) {\n+ if (countArray++ >= nestedDocsLimit) {", + "comment_created_at": "2025-06-27T18:54:56+00:00", + "comment_author": "javanna", + "comment_body": "I would run the existing micro benchmarks for document parsing, and check if they would detect regressions around the counting you added, or add a new one that's specific to this scenario if needed. We may actually need documents with an array including many inner objects that we likely don't have in the existing benchmarks", + "pr_file_module": null + }, + { + "comment_id": "2177851365", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 129997, + "pr_file": "server/src/main/java/org/elasticsearch/index/mapper/DocumentParser.java", + "discussion_id": "2168541968", + "commented_code": "@@ -749,8 +749,20 @@ private static void parseNonDynamicArray(\n XContentParser.Token token;\n XContentParser.Token previousToken = parser.currentToken();\n int elements = 0;\n+ int countArray = 0;\n+ long nestedDocsLimit = context.indexSettings().getMappingNestedDocsLimit();\n while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) {\n if (token == XContentParser.Token.START_OBJECT) {\n+ if (countArray++ >= nestedDocsLimit) {", + "comment_created_at": "2025-07-01T15:04:00+00:00", + "comment_author": "drempapis", + "comment_body": "I did a micro-benchmarking to test the time it takes to parse documents with varying `array` sizes.\r\nThe `Run 1` is for he main branch where`Run 2` is for the current branch (update)\r\n\r\n```\r\n\r\nArray Size | Run 1 Avg (ns/op) | Run 2 Avg (ns/op) | Difference (ns) | % Change\r\n--------------------------------------------------------------------------------\r\n10 | 1983.711 | 2223.238 | +239.527 | +12.07%\r\n100 | 15356.069 | 15497.797 | +141.728 | +0.92%\r\n1000 | 141146.977 | 145791.315 | +4644.338 | +3.29%\r\n10000 | 1432616.938 | 1443769.196 | +11152.258 | +0.78%\r\n```\r\nThe performance degraded slightly in the second run for all array sizes. For larger array sizes (1000 and 10000), the performance remained quite stable. \r\n\r\nFor the array size of 10 we observe the largest relative increase though the absolute impact is small.\r\n\r\nI executed the benchmark locally, aiming to minimize system load by stopping all resource-intensive processes. However, some system noise or garbage collection activity may have influenced the results.\r\n\r\nThe code I executed was the following \r\n```\r\n@Fork(value = 1)\r\n@Warmup(iterations = 5)\r\n@Measurement(iterations = 5)\r\n@BenchmarkMode(Mode.AverageTime)\r\n@OutputTimeUnit(TimeUnit.NANOSECONDS)\r\n@State(Scope.Benchmark)\r\npublic class DocumentParserBenchmark {\r\n\r\n static {\r\n LogConfigurator.loadLog4jPlugins();\r\n LogConfigurator.configureESLogging();\r\n }\r\n\r\n @Param({\"10\", \"100\", \"1000\", \"10000\"})\r\n private int arraySize;\r\n\r\n private MapperService mapperService;\r\n\r\n private SourceToParse sourceToParse;\r\n\r\n @Setup\r\n public void setUp() {\r\n this.mapperService = MapperServiceFactory.create(\"\"\"\r\n {\r\n \"properties\": {\r\n \"array\": {\r\n \"properties\": {\r\n \"value\": { \"type\": \"integer\" }\r\n }\r\n }\r\n }\r\n }\r\n \\s\"\"\");\r\n\r\n this.sourceToParse = new SourceToParse(UUIDs.randomBase64UUID(),\r\n new BytesArray(generateDocWithArray(arraySize)), XContentType.JSON);\r\n }\r\n\r\n @Benchmark\r\n public List benchmarkDocumentWithLargeArray() {\r\n return mapperService.documentMapper().parse(sourceToParse).docs();\r\n }\r\n\r\n private static String generateDocWithArray(int arraySize) {\r\n StringBuilder sb = new StringBuilder();\r\n sb.append(\"{ \\\"array\\\": [\");\r\n for (int i = 1; i <= arraySize; i++) {\r\n sb.append(\"{\\\"value\\\": \").append(i).append(\"}\");\r\n if (i < arraySize) {\r\n sb.append(\",\");\r\n }\r\n }\r\n sb.append(\"]}\");\r\n return sb.toString();\r\n }\r\n}\r\n```", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1965662264", + "pr_number": 123148, + "pr_file": "server/src/main/java/org/elasticsearch/action/fieldcaps/FieldCapabilitiesFetcher.java", + "created_at": "2025-02-21T15:14:51+00:00", + "commented_code": "null,\n Map.of()\n );\n responseMap.put(parentField, fieldCap);\n\n if (filter == null || Arrays.asList(types).contains(type)) {", + "repo_full_name": "elastic/elasticsearch", + "discussion_comments": [ + { + "comment_id": "1965662264", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 123148, + "pr_file": "server/src/main/java/org/elasticsearch/action/fieldcaps/FieldCapabilitiesFetcher.java", + "discussion_id": "1965662264", + "commented_code": "@@ -214,7 +215,10 @@ static Map retrieveFieldCaps(\n null,\n Map.of()\n );\n- responseMap.put(parentField, fieldCap);\n+\n+ if (filter == null || Arrays.asList(types).contains(type)) {", + "comment_created_at": "2025-02-21T15:14:51+00:00", + "comment_author": "piergm", + "comment_body": "Careful, by calling `Arrays.asList` in a loop, here we are converting every time the array to a new `ArrayList` and it is not very efficient.\r\nThere are a couple of options here:\r\n1) We can convert the array to a ArrayList before the for loop (this should be safe since we are not changing the array content).\r\n2) Use a Set instead of an ArrayList (also placed outside the loop), in this way the .contains call is O(1), while for ArrayList is O(n). \r\nSide note: I don't expect this array to ever become huge therefore both approach should be safe to use but to be extra safe I'd opt for the Set.\r\nIf we are using Set we can even go for `Set.of(types)` since it returns an unmodifiable set!", + "pr_file_module": null + }, + { + "comment_id": "1967988577", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 123148, + "pr_file": "server/src/main/java/org/elasticsearch/action/fieldcaps/FieldCapabilitiesFetcher.java", + "discussion_id": "1965662264", + "commented_code": "@@ -214,7 +215,10 @@ static Map retrieveFieldCaps(\n null,\n Map.of()\n );\n- responseMap.put(parentField, fieldCap);\n+\n+ if (filter == null || Arrays.asList(types).contains(type)) {", + "comment_created_at": "2025-02-24T16:21:56+00:00", + "comment_author": "luizgpsantos", + "comment_body": "Yep, that makes sense. To avoid the duplicated initialization of `Set.of(types)`, I changed some other places. Also, now I have to check for the `parent` in the filters, so I used the same `Set` strategy.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1957201015", + "pr_number": 122669, + "pr_file": "server/src/main/java/org/elasticsearch/action/search/SearchQueryThenFetchAsyncAction.java", + "created_at": "2025-02-15T22:19:48+00:00", + "commented_code": "final SearchActionListener listener\n ) {\n ShardSearchRequest request = rewriteShardSearchRequest(super.buildShardSearchRequest(shardIt, listener.requestIndex));\n getSearchTransport().sendExecuteQuery(connection, request, getTask(), listener);\n var searchTransport = getSearchTransport();\n var task = getTask();\n if (searchTransport.transportService().getLocalNodeConnection() == connection) {\n searchService.executeQueryPhase(request, task, listener);", + "repo_full_name": "elastic/elasticsearch", + "discussion_comments": [ + { + "comment_id": "1957201015", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 122669, + "pr_file": "server/src/main/java/org/elasticsearch/action/search/SearchQueryThenFetchAsyncAction.java", + "discussion_id": "1957201015", + "commented_code": "@@ -95,7 +98,13 @@ protected void executePhaseOnShard(\n final SearchActionListener listener\n ) {\n ShardSearchRequest request = rewriteShardSearchRequest(super.buildShardSearchRequest(shardIt, listener.requestIndex));\n- getSearchTransport().sendExecuteQuery(connection, request, getTask(), listener);\n+ var searchTransport = getSearchTransport();\n+ var task = getTask();\n+ if (searchTransport.transportService().getLocalNodeConnection() == connection) {\n+ searchService.executeQueryPhase(request, task, listener);", + "comment_created_at": "2025-02-15T22:19:48+00:00", + "comment_author": "javanna", + "comment_body": "out of curiosity: if we do this, why do it only for the query phase? Also, couldn't this conditional be added to the sendExecuteQuery method instead? What kind of overhead does this save? I can imagine that this is a pretty common pattern, or is search the edge case?", + "pr_file_module": null + }, + { + "comment_id": "1957204416", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 122669, + "pr_file": "server/src/main/java/org/elasticsearch/action/search/SearchQueryThenFetchAsyncAction.java", + "discussion_id": "1957201015", + "commented_code": "@@ -95,7 +98,13 @@ protected void executePhaseOnShard(\n final SearchActionListener listener\n ) {\n ShardSearchRequest request = rewriteShardSearchRequest(super.buildShardSearchRequest(shardIt, listener.requestIndex));\n- getSearchTransport().sendExecuteQuery(connection, request, getTask(), listener);\n+ var searchTransport = getSearchTransport();\n+ var task = getTask();\n+ if (searchTransport.transportService().getLocalNodeConnection() == connection) {\n+ searchService.executeQueryPhase(request, task, listener);", + "comment_created_at": "2025-02-15T22:49:01+00:00", + "comment_author": "original-brownbear", + "comment_body": "Hmm good question. I guess I only noticed this in benchmarks for search and for the query phase specifically. For fetch it's not super visible since you don't hit so many shards mostly and for bulk indexing you still have per-shard bulks so the cost isn't in that.\r\nI first noticed this with batched execution where the overhead becomes super visible but it's equally visible without it for large data nodes that do coordination work already (or if queries are heavy, like a large terms query or some geo stuff or so).\r\nThe overhead saved is 1. all the lookups in the transport layer, lots of listener wrapping, child-task registration and most importantly security.\r\n**But** :) that's why I need a review from security here I think. Functionally I think security still works the same way if not more efficiently. All tests pass because we auth the top level search request. DLS/FLS are applied as well but somehow those cache assertions needed adjustment and seemingly we do use the cache more now and I can't explain why.\r\nthe security overhead is considerable here, it's well in excess of the can_match cost for most rally runs it seems :O ", + "pr_file_module": null + }, + { + "comment_id": "1957206010", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 122669, + "pr_file": "server/src/main/java/org/elasticsearch/action/search/SearchQueryThenFetchAsyncAction.java", + "discussion_id": "1957201015", + "commented_code": "@@ -95,7 +98,13 @@ protected void executePhaseOnShard(\n final SearchActionListener listener\n ) {\n ShardSearchRequest request = rewriteShardSearchRequest(super.buildShardSearchRequest(shardIt, listener.requestIndex));\n- getSearchTransport().sendExecuteQuery(connection, request, getTask(), listener);\n+ var searchTransport = getSearchTransport();\n+ var task = getTask();\n+ if (searchTransport.transportService().getLocalNodeConnection() == connection) {\n+ searchService.executeQueryPhase(request, task, listener);", + "comment_created_at": "2025-02-15T23:03:00+00:00", + "comment_author": "original-brownbear", + "comment_body": "It's this (couldn't zoom out further :P)\r\n![out2](https://github.com/user-attachments/assets/b242f23c-1ed1-41f4-bb97-5c5ed261f12c)\r\n\r\nvs this\r\n![out](https://github.com/user-attachments/assets/246c2643-a0ce-4ab5-8730-a6f50f4ed0c8)\r\n\r\nand on a transport thread. ", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2012358741", + "pr_number": 122491, + "pr_file": "modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/CefParser.java", + "created_at": "2025-03-25T15:21:43+00:00", + "commented_code": "/*\n * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one\n * or more contributor license agreements. Licensed under the \"Elastic License\n * 2.0\", the \"GNU Affero General Public License v3.0 only\", and the \"Server Side\n * Public License v 1\"; you may not use this file except in compliance with, at\n * your election, the \"Elastic License 2.0\", the \"GNU Affero General Public\n * License v3.0 only\", or the \"Server Side Public License, v 1\".\n */\npackage org.elasticsearch.ingest.common;\n\nimport org.elasticsearch.ingest.IngestDocument;\n\nimport java.util.ArrayList;\nimport java.util.HashMap;\nimport java.util.List;\nimport java.util.Map;\nimport java.util.regex.Matcher;\nimport java.util.regex.Pattern;\nimport java.util.stream.Collectors;\n\nfinal class CefParser {\n\n private final IngestDocument ingestDocument;\n private final boolean removeEmptyValue;\n\n CefParser(IngestDocument ingestDocument, boolean removeEmptyValue) {\n this.ingestDocument = ingestDocument;\n this.removeEmptyValue = removeEmptyValue;\n }\n\n private static final Pattern HEADER_PATTERN = Pattern.compile(\"(?:\\\\\\\\\\\\||\\\\\\\\\\\\\\\\|[^|])*?\");\n private static final Pattern HEADER_NEXT_FIELD_PATTERN = Pattern.compile(\"(\" + HEADER_PATTERN.pattern() + \")\\\\|\");\n private static final Pattern HEADER_ESCAPE_CAPTURE = Pattern.compile(\"\\\\\\\\([\\\\\\\\|])\");\n\n // New patterns for extension parsing\n private static final String EXTENSION_KEY_PATTERN = \"(?:[\\\\w-]+(?:\\\\.[^\\\\.=\\\\s\\\\|\\\\\\\\\\\\[\\\\]]+)*(?:\\\\[[0-9]+\\\\])?(?==))\";\n private static final Pattern EXTENSION_KEY_ARRAY_CAPTURE = Pattern.compile(\"^([^\\\\[\\\\]]+)((?:\\\\[[0-9]+\\\\])+)$\");\n private static final String EXTENSION_VALUE_PATTERN = \"(?:\\\\S|\\\\s(?!\" + EXTENSION_KEY_PATTERN + \"=))*\";\n private static final Pattern EXTENSION_NEXT_KEY_VALUE_PATTERN = Pattern.compile(\n \"(\" + EXTENSION_KEY_PATTERN + \")=(\" + EXTENSION_VALUE_PATTERN + \")(?:\\\\s+|$)\"\n );\n private static final Map EXTENSION_VALUE_SANITIZER_REVERSE_MAPPING = new HashMap<>();\n private static final Map FIELD_MAPPING = new HashMap<>();\n private static final String ERROR_MESSAGE_INCOMPLETE_CEF_HEADER = \"incomplete CEF header\";\n\n static {\n EXTENSION_VALUE_SANITIZER_REVERSE_MAPPING.put(\"\\\\\\\\\", \"\\\\\");\n EXTENSION_VALUE_SANITIZER_REVERSE_MAPPING.put(\"\\\\=\", \"=\");\n EXTENSION_VALUE_SANITIZER_REVERSE_MAPPING.put(\"\\\\\\n\", \"\\n\");\n EXTENSION_VALUE_SANITIZER_REVERSE_MAPPING.put(\"\\\\\\r\", \"\\r\");\n\n FIELD_MAPPING.put(\"app\", \"network.protocol\");\n FIELD_MAPPING.put(\"in\", \"source.bytes\");\n FIELD_MAPPING.put(\"out\", \"destination.bytes\");\n FIELD_MAPPING.put(\"dst\", \"destination.ip\");\n FIELD_MAPPING.put(\"dlat\", \"destination.geo.location.lat\");\n FIELD_MAPPING.put(\"dlong\", \"destination.geo.location.lon\");\n FIELD_MAPPING.put(\"dhost\", \"destination.domain\");\n FIELD_MAPPING.put(\"dmac\", \"destination.mac\");\n FIELD_MAPPING.put(\"dntdom\", \"destination.registered_domain\");\n FIELD_MAPPING.put(\"dpt\", \"destination.port\");\n FIELD_MAPPING.put(\"dpid\", \"destination.process.pid\");\n FIELD_MAPPING.put(\"dproc\", \"destination.process.name\");\n FIELD_MAPPING.put(\"duid\", \"destination.user.id\");\n FIELD_MAPPING.put(\"duser\", \"destination.user.name\");\n FIELD_MAPPING.put(\"dpriv\", \"destination.user.group.name\");\n FIELD_MAPPING.put(\"act\", \"event.action\");\n FIELD_MAPPING.put(\"dvc\", \"observer.ip\");\n FIELD_MAPPING.put(\"deviceDirection\", \"network.direction\");\n FIELD_MAPPING.put(\"deviceDnsDomain\", \"observer.registered_domain\");\n FIELD_MAPPING.put(\"deviceExternalId\", \"observer.name\");\n FIELD_MAPPING.put(\"deviceFacility\", \"log.syslog.facility.code\");\n FIELD_MAPPING.put(\"dvchost\", \"observer.hostname\");\n FIELD_MAPPING.put(\"deviceInboundInterface\", \"observer.ingress.interface.name\");\n FIELD_MAPPING.put(\"dvcmac\", \"observer.mac\");\n FIELD_MAPPING.put(\"deviceOutboundInterface\", \"observer.egress.interface.name\");\n FIELD_MAPPING.put(\"dvcpid\", \"process.pid\");\n FIELD_MAPPING.put(\"deviceProcessName\", \"process.name\");\n FIELD_MAPPING.put(\"rt\", \"@timestamp\");\n FIELD_MAPPING.put(\"dtz\", \"event.timezone\");\n FIELD_MAPPING.put(\"deviceTranslatedAddress\", \"host.nat.ip\");\n FIELD_MAPPING.put(\"device.version\", \"observer.version\");\n FIELD_MAPPING.put(\"deviceVersion\", \"observer.version\");\n FIELD_MAPPING.put(\"device.product\", \"observer.product\");\n FIELD_MAPPING.put(\"deviceProduct\", \"observer.product\");\n FIELD_MAPPING.put(\"device.event_class_id\", \"event.code\");\n FIELD_MAPPING.put(\"device.vendor\", \"observer.vendor\");\n FIELD_MAPPING.put(\"deviceVendor\", \"observer.vendor\");\n FIELD_MAPPING.put(\"end\", \"event.end\");\n FIELD_MAPPING.put(\"eventId\", \"event.id\");\n FIELD_MAPPING.put(\"outcome\", \"event.outcome\");\n FIELD_MAPPING.put(\"fileCreateTime\", \"file.created\");\n FIELD_MAPPING.put(\"fileHash\", \"file.hash\");\n FIELD_MAPPING.put(\"fileId\", \"file.inode\");\n FIELD_MAPPING.put(\"fileModificationTime\", \"file.mtime\");\n FIELD_MAPPING.put(\"fname\", \"file.name\");\n FIELD_MAPPING.put(\"filePath\", \"file.path\");\n FIELD_MAPPING.put(\"filePermission\", \"file.group\");\n FIELD_MAPPING.put(\"fsize\", \"file.size\");\n FIELD_MAPPING.put(\"fileType\", \"file.extension\");\n FIELD_MAPPING.put(\"mrt\", \"event.ingested\");\n FIELD_MAPPING.put(\"msg\", \"message\");\n FIELD_MAPPING.put(\"reason\", \"event.reason\");\n FIELD_MAPPING.put(\"requestClientApplication\", \"user_agent.original\");\n FIELD_MAPPING.put(\"requestContext\", \"http.request.referrer\");\n FIELD_MAPPING.put(\"requestMethod\", \"http.request.method\");\n FIELD_MAPPING.put(\"request\", \"url.original\");\n FIELD_MAPPING.put(\"src\", \"source.ip\");\n FIELD_MAPPING.put(\"sourceDnsDomain\", \"source.registered_domain\");\n FIELD_MAPPING.put(\"slat\", \"source.geo.location.lat\");\n FIELD_MAPPING.put(\"slong\", \"source.geo.location.lon\");\n FIELD_MAPPING.put(\"shost\", \"source.domain\");\n FIELD_MAPPING.put(\"smac\", \"source.mac\");\n FIELD_MAPPING.put(\"sntdom\", \"source.registered_domain\");\n FIELD_MAPPING.put(\"spt\", \"source.port\");\n FIELD_MAPPING.put(\"spid\", \"source.process.pid\");\n FIELD_MAPPING.put(\"sproc\", \"source.process.name\");\n FIELD_MAPPING.put(\"sourceServiceName\", \"source.service.name\");\n FIELD_MAPPING.put(\"suser\", \"source.user.name\");\n FIELD_MAPPING.put(\"start\", \"event.start\");\n FIELD_MAPPING.put(\"proto\", \"network.transport\");\n // Add more mappings as needed\n }\n\n void process(String cefString, String targetField) {\n List headerFields = new ArrayList<>();\n Matcher headerMatcher = HEADER_NEXT_FIELD_PATTERN.matcher(cefString);\n int extensionStart = 0;\n\n for (int i = 0; i < 7 && headerMatcher.find(); i++) {\n String field = headerMatcher.group(1);\n field = HEADER_ESCAPE_CAPTURE.matcher(field).replaceAll(\"$1\");\n headerFields.add(field);\n extensionStart = headerMatcher.end();\n }\n\n if (headerFields.size() > 0 && headerFields.get(0).startsWith(\"CEF:\")) {\n CEFEvent event = new CEFEvent();\n // Add error message if there are not enough header fields\n if (headerFields.size() != 7) {\n ingestDocument.appendFieldValue(\"error.message\", ERROR_MESSAGE_INCOMPLETE_CEF_HEADER);\n }\n for (int i = 0; i < headerFields.size(); i++) {\n switch (i) {\n case 0:\n event.setVersion(headerFields.get(0).substring(4));\n break;\n case 1:\n event.setDeviceVendor(headerFields.get(1));\n ingestDocument.setFieldValue(\"observer.vendor\", headerFields.get(1));\n break;\n case 2:\n event.setDeviceProduct(headerFields.get(2));\n ingestDocument.setFieldValue(\"observer.product\", headerFields.get(2));\n break;\n case 3:\n event.setDeviceVersion(headerFields.get(3));\n ingestDocument.setFieldValue(\"observer.version\", headerFields.get(3));\n break;\n case 4:\n event.setDeviceEventClassId(headerFields.get(4));\n ingestDocument.setFieldValue(\"event.code\", headerFields.get(4));\n break;\n case 5:\n event.setName(headerFields.get(5));\n break;\n case 6:\n event.setSeverity(headerFields.get(6));\n break;\n }\n }\n String extensionString = cefString.substring(extensionStart);\n Map extensions = parseExtensions(extensionString);\n\n if (removeEmptyValue) {\n removeEmptyValue(extensions);\n }\n event.setExtensions(extensions);\n\n // Translate possible ECS fields and remove them from extensions\n Map translatedFields = extensions.entrySet()\n .stream()\n .filter(entry -> FIELD_MAPPING.containsKey(entry.getKey()))\n .collect(Collectors.toMap(entry -> FIELD_MAPPING.get(entry.getKey()), entry -> {\n Class fieldType = ECSFieldWithType.getFieldType(FIELD_MAPPING.get(entry.getKey()));\n return convertValueToType(entry.getValue(), fieldType);\n }));\n\n // Remove the translated entries from extensions\n event.removeMappedExtensions();\n\n ingestDocument.setFieldValue(targetField, event.toObject());\n // Add ECS translations to the root of the document\n if (translatedFields.isEmpty() == false) {\n translatedFields.forEach(ingestDocument::setFieldValue);\n }\n } else {\n throw new IllegalArgumentException(\"Invalid CEF format\");\n }\n }\n\n private Object convertValueToType(String value, Class type) {\n if (type == Long.class) {\n return Long.parseLong(value);\n } else if (type == Double.class) {\n return Double.parseDouble(value);\n } else if (type == Integer.class) {\n return Integer.parseInt(value);\n } else {\n return value; // Default to String\n }\n }\n\n private Map parseExtensions(String extensionString) {\n Map extensions = new HashMap<>();\n Matcher matcher = EXTENSION_NEXT_KEY_VALUE_PATTERN.matcher(extensionString);\n int lastEnd = 0;\n while (matcher.find()) {\n String key = matcher.group(1);\n String value = matcher.group(2);\n\n // Convert extension field name to strict legal field_reference\n if (key.endsWith(\"]\")) {\n key = convertArrayLikeKey(key);\n }\n\n extensions.put(key, desanitizeExtensionVal(value.trim()));\n lastEnd = matcher.end();\n }\n // If there's any remaining unparsed content, throw an exception\n if (lastEnd < extensionString.length()) {\n throw new IllegalArgumentException(\"Invalid extensions; keyless value present: \" + extensionString.substring(lastEnd));\n }\n return extensions;\n }\n\n private void removeEmptyValue(Map map) {\n map.entrySet().removeIf(entry -> entry.getValue().isEmpty());\n map.entrySet().removeIf(entry -> entry.getValue() == null);\n }\n\n private String convertArrayLikeKey(String key) {\n Matcher matcher = EXTENSION_KEY_ARRAY_CAPTURE.matcher(key);\n if (matcher.matches()) {\n return \"[\" + matcher.group(1) + \"]\" + matcher.group(2);\n }\n return key;\n }\n\n public static String desanitizeExtensionVal(String value) {\n String desanitized = value;\n for (Map.Entry entry : EXTENSION_VALUE_SANITIZER_REVERSE_MAPPING.entrySet()) {\n desanitized = desanitized.replace(entry.getKey(), entry.getValue());\n }\n return desanitized;\n }\n\n public static class CEFEvent {\n private String version;\n private String deviceVendor;\n private String deviceProduct;\n private String deviceVersion;\n private String deviceEventClassId;\n private String name;\n private String severity;\n private final Map extensions = new HashMap<>();\n private Map translatedFields;\n\n // Getters and setters for all fields\n public String getVersion() {\n return version;\n }\n\n public void setVersion(String version) {\n this.version = version;\n }\n\n public String getDeviceVendor() {\n return deviceVendor;\n }\n\n public void setDeviceVendor(String deviceVendor) {\n this.deviceVendor = deviceVendor;\n }\n\n public String getDeviceProduct() {\n return deviceProduct;\n }\n\n public void setDeviceProduct(String deviceProduct) {\n this.deviceProduct = deviceProduct;\n }\n\n public String getDeviceVersion() {\n return deviceVersion;\n }\n\n public void setDeviceVersion(String deviceVersion) {\n this.deviceVersion = deviceVersion;\n }\n\n public String getDeviceEventClassId() {\n return deviceEventClassId;\n }\n\n public void setDeviceEventClassId(String deviceEventClassId) {\n this.deviceEventClassId = deviceEventClassId;\n }\n\n public String getName() {\n return name;\n }\n\n public void setName(String name) {\n this.name = name;\n }\n\n public String getSeverity() {\n return severity;\n }\n\n public void setSeverity(String severity) {\n this.severity = severity;\n }\n\n public Map getExtensions() {\n return extensions;\n }\n\n public void setExtensions(Map extensions) {\n this.extensions.putAll(extensions);\n }\n\n public void addExtension(String key, String extension) {\n this.extensions.put(key, extension);\n }\n\n public void removeMappedExtensions() {\n this.extensions.entrySet().removeIf(entry -> FIELD_MAPPING.containsKey(entry.getKey()));\n this.extensions.entrySet().removeIf(entry -> FIELD_MAPPING.containsValue(entry.getKey()));", + "repo_full_name": "elastic/elasticsearch", + "discussion_comments": [ + { + "comment_id": "2012358741", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 122491, + "pr_file": "modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/CefParser.java", + "discussion_id": "2012358741", + "commented_code": "@@ -0,0 +1,449 @@\n+/*\n+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one\n+ * or more contributor license agreements. Licensed under the \"Elastic License\n+ * 2.0\", the \"GNU Affero General Public License v3.0 only\", and the \"Server Side\n+ * Public License v 1\"; you may not use this file except in compliance with, at\n+ * your election, the \"Elastic License 2.0\", the \"GNU Affero General Public\n+ * License v3.0 only\", or the \"Server Side Public License, v 1\".\n+ */\n+package org.elasticsearch.ingest.common;\n+\n+import org.elasticsearch.ingest.IngestDocument;\n+\n+import java.util.ArrayList;\n+import java.util.HashMap;\n+import java.util.List;\n+import java.util.Map;\n+import java.util.regex.Matcher;\n+import java.util.regex.Pattern;\n+import java.util.stream.Collectors;\n+\n+final class CefParser {\n+\n+ private final IngestDocument ingestDocument;\n+ private final boolean removeEmptyValue;\n+\n+ CefParser(IngestDocument ingestDocument, boolean removeEmptyValue) {\n+ this.ingestDocument = ingestDocument;\n+ this.removeEmptyValue = removeEmptyValue;\n+ }\n+\n+ private static final Pattern HEADER_PATTERN = Pattern.compile(\"(?:\\\\\\\\\\\\||\\\\\\\\\\\\\\\\|[^|])*?\");\n+ private static final Pattern HEADER_NEXT_FIELD_PATTERN = Pattern.compile(\"(\" + HEADER_PATTERN.pattern() + \")\\\\|\");\n+ private static final Pattern HEADER_ESCAPE_CAPTURE = Pattern.compile(\"\\\\\\\\([\\\\\\\\|])\");\n+\n+ // New patterns for extension parsing\n+ private static final String EXTENSION_KEY_PATTERN = \"(?:[\\\\w-]+(?:\\\\.[^\\\\.=\\\\s\\\\|\\\\\\\\\\\\[\\\\]]+)*(?:\\\\[[0-9]+\\\\])?(?==))\";\n+ private static final Pattern EXTENSION_KEY_ARRAY_CAPTURE = Pattern.compile(\"^([^\\\\[\\\\]]+)((?:\\\\[[0-9]+\\\\])+)$\");\n+ private static final String EXTENSION_VALUE_PATTERN = \"(?:\\\\S|\\\\s(?!\" + EXTENSION_KEY_PATTERN + \"=))*\";\n+ private static final Pattern EXTENSION_NEXT_KEY_VALUE_PATTERN = Pattern.compile(\n+ \"(\" + EXTENSION_KEY_PATTERN + \")=(\" + EXTENSION_VALUE_PATTERN + \")(?:\\\\s+|$)\"\n+ );\n+ private static final Map EXTENSION_VALUE_SANITIZER_REVERSE_MAPPING = new HashMap<>();\n+ private static final Map FIELD_MAPPING = new HashMap<>();\n+ private static final String ERROR_MESSAGE_INCOMPLETE_CEF_HEADER = \"incomplete CEF header\";\n+\n+ static {\n+ EXTENSION_VALUE_SANITIZER_REVERSE_MAPPING.put(\"\\\\\\\\\", \"\\\\\");\n+ EXTENSION_VALUE_SANITIZER_REVERSE_MAPPING.put(\"\\\\=\", \"=\");\n+ EXTENSION_VALUE_SANITIZER_REVERSE_MAPPING.put(\"\\\\\\n\", \"\\n\");\n+ EXTENSION_VALUE_SANITIZER_REVERSE_MAPPING.put(\"\\\\\\r\", \"\\r\");\n+\n+ FIELD_MAPPING.put(\"app\", \"network.protocol\");\n+ FIELD_MAPPING.put(\"in\", \"source.bytes\");\n+ FIELD_MAPPING.put(\"out\", \"destination.bytes\");\n+ FIELD_MAPPING.put(\"dst\", \"destination.ip\");\n+ FIELD_MAPPING.put(\"dlat\", \"destination.geo.location.lat\");\n+ FIELD_MAPPING.put(\"dlong\", \"destination.geo.location.lon\");\n+ FIELD_MAPPING.put(\"dhost\", \"destination.domain\");\n+ FIELD_MAPPING.put(\"dmac\", \"destination.mac\");\n+ FIELD_MAPPING.put(\"dntdom\", \"destination.registered_domain\");\n+ FIELD_MAPPING.put(\"dpt\", \"destination.port\");\n+ FIELD_MAPPING.put(\"dpid\", \"destination.process.pid\");\n+ FIELD_MAPPING.put(\"dproc\", \"destination.process.name\");\n+ FIELD_MAPPING.put(\"duid\", \"destination.user.id\");\n+ FIELD_MAPPING.put(\"duser\", \"destination.user.name\");\n+ FIELD_MAPPING.put(\"dpriv\", \"destination.user.group.name\");\n+ FIELD_MAPPING.put(\"act\", \"event.action\");\n+ FIELD_MAPPING.put(\"dvc\", \"observer.ip\");\n+ FIELD_MAPPING.put(\"deviceDirection\", \"network.direction\");\n+ FIELD_MAPPING.put(\"deviceDnsDomain\", \"observer.registered_domain\");\n+ FIELD_MAPPING.put(\"deviceExternalId\", \"observer.name\");\n+ FIELD_MAPPING.put(\"deviceFacility\", \"log.syslog.facility.code\");\n+ FIELD_MAPPING.put(\"dvchost\", \"observer.hostname\");\n+ FIELD_MAPPING.put(\"deviceInboundInterface\", \"observer.ingress.interface.name\");\n+ FIELD_MAPPING.put(\"dvcmac\", \"observer.mac\");\n+ FIELD_MAPPING.put(\"deviceOutboundInterface\", \"observer.egress.interface.name\");\n+ FIELD_MAPPING.put(\"dvcpid\", \"process.pid\");\n+ FIELD_MAPPING.put(\"deviceProcessName\", \"process.name\");\n+ FIELD_MAPPING.put(\"rt\", \"@timestamp\");\n+ FIELD_MAPPING.put(\"dtz\", \"event.timezone\");\n+ FIELD_MAPPING.put(\"deviceTranslatedAddress\", \"host.nat.ip\");\n+ FIELD_MAPPING.put(\"device.version\", \"observer.version\");\n+ FIELD_MAPPING.put(\"deviceVersion\", \"observer.version\");\n+ FIELD_MAPPING.put(\"device.product\", \"observer.product\");\n+ FIELD_MAPPING.put(\"deviceProduct\", \"observer.product\");\n+ FIELD_MAPPING.put(\"device.event_class_id\", \"event.code\");\n+ FIELD_MAPPING.put(\"device.vendor\", \"observer.vendor\");\n+ FIELD_MAPPING.put(\"deviceVendor\", \"observer.vendor\");\n+ FIELD_MAPPING.put(\"end\", \"event.end\");\n+ FIELD_MAPPING.put(\"eventId\", \"event.id\");\n+ FIELD_MAPPING.put(\"outcome\", \"event.outcome\");\n+ FIELD_MAPPING.put(\"fileCreateTime\", \"file.created\");\n+ FIELD_MAPPING.put(\"fileHash\", \"file.hash\");\n+ FIELD_MAPPING.put(\"fileId\", \"file.inode\");\n+ FIELD_MAPPING.put(\"fileModificationTime\", \"file.mtime\");\n+ FIELD_MAPPING.put(\"fname\", \"file.name\");\n+ FIELD_MAPPING.put(\"filePath\", \"file.path\");\n+ FIELD_MAPPING.put(\"filePermission\", \"file.group\");\n+ FIELD_MAPPING.put(\"fsize\", \"file.size\");\n+ FIELD_MAPPING.put(\"fileType\", \"file.extension\");\n+ FIELD_MAPPING.put(\"mrt\", \"event.ingested\");\n+ FIELD_MAPPING.put(\"msg\", \"message\");\n+ FIELD_MAPPING.put(\"reason\", \"event.reason\");\n+ FIELD_MAPPING.put(\"requestClientApplication\", \"user_agent.original\");\n+ FIELD_MAPPING.put(\"requestContext\", \"http.request.referrer\");\n+ FIELD_MAPPING.put(\"requestMethod\", \"http.request.method\");\n+ FIELD_MAPPING.put(\"request\", \"url.original\");\n+ FIELD_MAPPING.put(\"src\", \"source.ip\");\n+ FIELD_MAPPING.put(\"sourceDnsDomain\", \"source.registered_domain\");\n+ FIELD_MAPPING.put(\"slat\", \"source.geo.location.lat\");\n+ FIELD_MAPPING.put(\"slong\", \"source.geo.location.lon\");\n+ FIELD_MAPPING.put(\"shost\", \"source.domain\");\n+ FIELD_MAPPING.put(\"smac\", \"source.mac\");\n+ FIELD_MAPPING.put(\"sntdom\", \"source.registered_domain\");\n+ FIELD_MAPPING.put(\"spt\", \"source.port\");\n+ FIELD_MAPPING.put(\"spid\", \"source.process.pid\");\n+ FIELD_MAPPING.put(\"sproc\", \"source.process.name\");\n+ FIELD_MAPPING.put(\"sourceServiceName\", \"source.service.name\");\n+ FIELD_MAPPING.put(\"suser\", \"source.user.name\");\n+ FIELD_MAPPING.put(\"start\", \"event.start\");\n+ FIELD_MAPPING.put(\"proto\", \"network.transport\");\n+ // Add more mappings as needed\n+ }\n+\n+ void process(String cefString, String targetField) {\n+ List headerFields = new ArrayList<>();\n+ Matcher headerMatcher = HEADER_NEXT_FIELD_PATTERN.matcher(cefString);\n+ int extensionStart = 0;\n+\n+ for (int i = 0; i < 7 && headerMatcher.find(); i++) {\n+ String field = headerMatcher.group(1);\n+ field = HEADER_ESCAPE_CAPTURE.matcher(field).replaceAll(\"$1\");\n+ headerFields.add(field);\n+ extensionStart = headerMatcher.end();\n+ }\n+\n+ if (headerFields.size() > 0 && headerFields.get(0).startsWith(\"CEF:\")) {\n+ CEFEvent event = new CEFEvent();\n+ // Add error message if there are not enough header fields\n+ if (headerFields.size() != 7) {\n+ ingestDocument.appendFieldValue(\"error.message\", ERROR_MESSAGE_INCOMPLETE_CEF_HEADER);\n+ }\n+ for (int i = 0; i < headerFields.size(); i++) {\n+ switch (i) {\n+ case 0:\n+ event.setVersion(headerFields.get(0).substring(4));\n+ break;\n+ case 1:\n+ event.setDeviceVendor(headerFields.get(1));\n+ ingestDocument.setFieldValue(\"observer.vendor\", headerFields.get(1));\n+ break;\n+ case 2:\n+ event.setDeviceProduct(headerFields.get(2));\n+ ingestDocument.setFieldValue(\"observer.product\", headerFields.get(2));\n+ break;\n+ case 3:\n+ event.setDeviceVersion(headerFields.get(3));\n+ ingestDocument.setFieldValue(\"observer.version\", headerFields.get(3));\n+ break;\n+ case 4:\n+ event.setDeviceEventClassId(headerFields.get(4));\n+ ingestDocument.setFieldValue(\"event.code\", headerFields.get(4));\n+ break;\n+ case 5:\n+ event.setName(headerFields.get(5));\n+ break;\n+ case 6:\n+ event.setSeverity(headerFields.get(6));\n+ break;\n+ }\n+ }\n+ String extensionString = cefString.substring(extensionStart);\n+ Map extensions = parseExtensions(extensionString);\n+\n+ if (removeEmptyValue) {\n+ removeEmptyValue(extensions);\n+ }\n+ event.setExtensions(extensions);\n+\n+ // Translate possible ECS fields and remove them from extensions\n+ Map translatedFields = extensions.entrySet()\n+ .stream()\n+ .filter(entry -> FIELD_MAPPING.containsKey(entry.getKey()))\n+ .collect(Collectors.toMap(entry -> FIELD_MAPPING.get(entry.getKey()), entry -> {\n+ Class fieldType = ECSFieldWithType.getFieldType(FIELD_MAPPING.get(entry.getKey()));\n+ return convertValueToType(entry.getValue(), fieldType);\n+ }));\n+\n+ // Remove the translated entries from extensions\n+ event.removeMappedExtensions();\n+\n+ ingestDocument.setFieldValue(targetField, event.toObject());\n+ // Add ECS translations to the root of the document\n+ if (translatedFields.isEmpty() == false) {\n+ translatedFields.forEach(ingestDocument::setFieldValue);\n+ }\n+ } else {\n+ throw new IllegalArgumentException(\"Invalid CEF format\");\n+ }\n+ }\n+\n+ private Object convertValueToType(String value, Class type) {\n+ if (type == Long.class) {\n+ return Long.parseLong(value);\n+ } else if (type == Double.class) {\n+ return Double.parseDouble(value);\n+ } else if (type == Integer.class) {\n+ return Integer.parseInt(value);\n+ } else {\n+ return value; // Default to String\n+ }\n+ }\n+\n+ private Map parseExtensions(String extensionString) {\n+ Map extensions = new HashMap<>();\n+ Matcher matcher = EXTENSION_NEXT_KEY_VALUE_PATTERN.matcher(extensionString);\n+ int lastEnd = 0;\n+ while (matcher.find()) {\n+ String key = matcher.group(1);\n+ String value = matcher.group(2);\n+\n+ // Convert extension field name to strict legal field_reference\n+ if (key.endsWith(\"]\")) {\n+ key = convertArrayLikeKey(key);\n+ }\n+\n+ extensions.put(key, desanitizeExtensionVal(value.trim()));\n+ lastEnd = matcher.end();\n+ }\n+ // If there's any remaining unparsed content, throw an exception\n+ if (lastEnd < extensionString.length()) {\n+ throw new IllegalArgumentException(\"Invalid extensions; keyless value present: \" + extensionString.substring(lastEnd));\n+ }\n+ return extensions;\n+ }\n+\n+ private void removeEmptyValue(Map map) {\n+ map.entrySet().removeIf(entry -> entry.getValue().isEmpty());\n+ map.entrySet().removeIf(entry -> entry.getValue() == null);\n+ }\n+\n+ private String convertArrayLikeKey(String key) {\n+ Matcher matcher = EXTENSION_KEY_ARRAY_CAPTURE.matcher(key);\n+ if (matcher.matches()) {\n+ return \"[\" + matcher.group(1) + \"]\" + matcher.group(2);\n+ }\n+ return key;\n+ }\n+\n+ public static String desanitizeExtensionVal(String value) {\n+ String desanitized = value;\n+ for (Map.Entry entry : EXTENSION_VALUE_SANITIZER_REVERSE_MAPPING.entrySet()) {\n+ desanitized = desanitized.replace(entry.getKey(), entry.getValue());\n+ }\n+ return desanitized;\n+ }\n+\n+ public static class CEFEvent {\n+ private String version;\n+ private String deviceVendor;\n+ private String deviceProduct;\n+ private String deviceVersion;\n+ private String deviceEventClassId;\n+ private String name;\n+ private String severity;\n+ private final Map extensions = new HashMap<>();\n+ private Map translatedFields;\n+\n+ // Getters and setters for all fields\n+ public String getVersion() {\n+ return version;\n+ }\n+\n+ public void setVersion(String version) {\n+ this.version = version;\n+ }\n+\n+ public String getDeviceVendor() {\n+ return deviceVendor;\n+ }\n+\n+ public void setDeviceVendor(String deviceVendor) {\n+ this.deviceVendor = deviceVendor;\n+ }\n+\n+ public String getDeviceProduct() {\n+ return deviceProduct;\n+ }\n+\n+ public void setDeviceProduct(String deviceProduct) {\n+ this.deviceProduct = deviceProduct;\n+ }\n+\n+ public String getDeviceVersion() {\n+ return deviceVersion;\n+ }\n+\n+ public void setDeviceVersion(String deviceVersion) {\n+ this.deviceVersion = deviceVersion;\n+ }\n+\n+ public String getDeviceEventClassId() {\n+ return deviceEventClassId;\n+ }\n+\n+ public void setDeviceEventClassId(String deviceEventClassId) {\n+ this.deviceEventClassId = deviceEventClassId;\n+ }\n+\n+ public String getName() {\n+ return name;\n+ }\n+\n+ public void setName(String name) {\n+ this.name = name;\n+ }\n+\n+ public String getSeverity() {\n+ return severity;\n+ }\n+\n+ public void setSeverity(String severity) {\n+ this.severity = severity;\n+ }\n+\n+ public Map getExtensions() {\n+ return extensions;\n+ }\n+\n+ public void setExtensions(Map extensions) {\n+ this.extensions.putAll(extensions);\n+ }\n+\n+ public void addExtension(String key, String extension) {\n+ this.extensions.put(key, extension);\n+ }\n+\n+ public void removeMappedExtensions() {\n+ this.extensions.entrySet().removeIf(entry -> FIELD_MAPPING.containsKey(entry.getKey()));\n+ this.extensions.entrySet().removeIf(entry -> FIELD_MAPPING.containsValue(entry.getKey()));", + "comment_created_at": "2025-03-25T15:21:43+00:00", + "comment_author": "joegallo", + "comment_body": "You're doing N scans through the whole collection here, I doubt that's going to be especially performant.", + "pr_file_module": null + }, + { + "comment_id": "2012372556", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 122491, + "pr_file": "modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/CefParser.java", + "discussion_id": "2012358741", + "commented_code": "@@ -0,0 +1,449 @@\n+/*\n+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one\n+ * or more contributor license agreements. Licensed under the \"Elastic License\n+ * 2.0\", the \"GNU Affero General Public License v3.0 only\", and the \"Server Side\n+ * Public License v 1\"; you may not use this file except in compliance with, at\n+ * your election, the \"Elastic License 2.0\", the \"GNU Affero General Public\n+ * License v3.0 only\", or the \"Server Side Public License, v 1\".\n+ */\n+package org.elasticsearch.ingest.common;\n+\n+import org.elasticsearch.ingest.IngestDocument;\n+\n+import java.util.ArrayList;\n+import java.util.HashMap;\n+import java.util.List;\n+import java.util.Map;\n+import java.util.regex.Matcher;\n+import java.util.regex.Pattern;\n+import java.util.stream.Collectors;\n+\n+final class CefParser {\n+\n+ private final IngestDocument ingestDocument;\n+ private final boolean removeEmptyValue;\n+\n+ CefParser(IngestDocument ingestDocument, boolean removeEmptyValue) {\n+ this.ingestDocument = ingestDocument;\n+ this.removeEmptyValue = removeEmptyValue;\n+ }\n+\n+ private static final Pattern HEADER_PATTERN = Pattern.compile(\"(?:\\\\\\\\\\\\||\\\\\\\\\\\\\\\\|[^|])*?\");\n+ private static final Pattern HEADER_NEXT_FIELD_PATTERN = Pattern.compile(\"(\" + HEADER_PATTERN.pattern() + \")\\\\|\");\n+ private static final Pattern HEADER_ESCAPE_CAPTURE = Pattern.compile(\"\\\\\\\\([\\\\\\\\|])\");\n+\n+ // New patterns for extension parsing\n+ private static final String EXTENSION_KEY_PATTERN = \"(?:[\\\\w-]+(?:\\\\.[^\\\\.=\\\\s\\\\|\\\\\\\\\\\\[\\\\]]+)*(?:\\\\[[0-9]+\\\\])?(?==))\";\n+ private static final Pattern EXTENSION_KEY_ARRAY_CAPTURE = Pattern.compile(\"^([^\\\\[\\\\]]+)((?:\\\\[[0-9]+\\\\])+)$\");\n+ private static final String EXTENSION_VALUE_PATTERN = \"(?:\\\\S|\\\\s(?!\" + EXTENSION_KEY_PATTERN + \"=))*\";\n+ private static final Pattern EXTENSION_NEXT_KEY_VALUE_PATTERN = Pattern.compile(\n+ \"(\" + EXTENSION_KEY_PATTERN + \")=(\" + EXTENSION_VALUE_PATTERN + \")(?:\\\\s+|$)\"\n+ );\n+ private static final Map EXTENSION_VALUE_SANITIZER_REVERSE_MAPPING = new HashMap<>();\n+ private static final Map FIELD_MAPPING = new HashMap<>();\n+ private static final String ERROR_MESSAGE_INCOMPLETE_CEF_HEADER = \"incomplete CEF header\";\n+\n+ static {\n+ EXTENSION_VALUE_SANITIZER_REVERSE_MAPPING.put(\"\\\\\\\\\", \"\\\\\");\n+ EXTENSION_VALUE_SANITIZER_REVERSE_MAPPING.put(\"\\\\=\", \"=\");\n+ EXTENSION_VALUE_SANITIZER_REVERSE_MAPPING.put(\"\\\\\\n\", \"\\n\");\n+ EXTENSION_VALUE_SANITIZER_REVERSE_MAPPING.put(\"\\\\\\r\", \"\\r\");\n+\n+ FIELD_MAPPING.put(\"app\", \"network.protocol\");\n+ FIELD_MAPPING.put(\"in\", \"source.bytes\");\n+ FIELD_MAPPING.put(\"out\", \"destination.bytes\");\n+ FIELD_MAPPING.put(\"dst\", \"destination.ip\");\n+ FIELD_MAPPING.put(\"dlat\", \"destination.geo.location.lat\");\n+ FIELD_MAPPING.put(\"dlong\", \"destination.geo.location.lon\");\n+ FIELD_MAPPING.put(\"dhost\", \"destination.domain\");\n+ FIELD_MAPPING.put(\"dmac\", \"destination.mac\");\n+ FIELD_MAPPING.put(\"dntdom\", \"destination.registered_domain\");\n+ FIELD_MAPPING.put(\"dpt\", \"destination.port\");\n+ FIELD_MAPPING.put(\"dpid\", \"destination.process.pid\");\n+ FIELD_MAPPING.put(\"dproc\", \"destination.process.name\");\n+ FIELD_MAPPING.put(\"duid\", \"destination.user.id\");\n+ FIELD_MAPPING.put(\"duser\", \"destination.user.name\");\n+ FIELD_MAPPING.put(\"dpriv\", \"destination.user.group.name\");\n+ FIELD_MAPPING.put(\"act\", \"event.action\");\n+ FIELD_MAPPING.put(\"dvc\", \"observer.ip\");\n+ FIELD_MAPPING.put(\"deviceDirection\", \"network.direction\");\n+ FIELD_MAPPING.put(\"deviceDnsDomain\", \"observer.registered_domain\");\n+ FIELD_MAPPING.put(\"deviceExternalId\", \"observer.name\");\n+ FIELD_MAPPING.put(\"deviceFacility\", \"log.syslog.facility.code\");\n+ FIELD_MAPPING.put(\"dvchost\", \"observer.hostname\");\n+ FIELD_MAPPING.put(\"deviceInboundInterface\", \"observer.ingress.interface.name\");\n+ FIELD_MAPPING.put(\"dvcmac\", \"observer.mac\");\n+ FIELD_MAPPING.put(\"deviceOutboundInterface\", \"observer.egress.interface.name\");\n+ FIELD_MAPPING.put(\"dvcpid\", \"process.pid\");\n+ FIELD_MAPPING.put(\"deviceProcessName\", \"process.name\");\n+ FIELD_MAPPING.put(\"rt\", \"@timestamp\");\n+ FIELD_MAPPING.put(\"dtz\", \"event.timezone\");\n+ FIELD_MAPPING.put(\"deviceTranslatedAddress\", \"host.nat.ip\");\n+ FIELD_MAPPING.put(\"device.version\", \"observer.version\");\n+ FIELD_MAPPING.put(\"deviceVersion\", \"observer.version\");\n+ FIELD_MAPPING.put(\"device.product\", \"observer.product\");\n+ FIELD_MAPPING.put(\"deviceProduct\", \"observer.product\");\n+ FIELD_MAPPING.put(\"device.event_class_id\", \"event.code\");\n+ FIELD_MAPPING.put(\"device.vendor\", \"observer.vendor\");\n+ FIELD_MAPPING.put(\"deviceVendor\", \"observer.vendor\");\n+ FIELD_MAPPING.put(\"end\", \"event.end\");\n+ FIELD_MAPPING.put(\"eventId\", \"event.id\");\n+ FIELD_MAPPING.put(\"outcome\", \"event.outcome\");\n+ FIELD_MAPPING.put(\"fileCreateTime\", \"file.created\");\n+ FIELD_MAPPING.put(\"fileHash\", \"file.hash\");\n+ FIELD_MAPPING.put(\"fileId\", \"file.inode\");\n+ FIELD_MAPPING.put(\"fileModificationTime\", \"file.mtime\");\n+ FIELD_MAPPING.put(\"fname\", \"file.name\");\n+ FIELD_MAPPING.put(\"filePath\", \"file.path\");\n+ FIELD_MAPPING.put(\"filePermission\", \"file.group\");\n+ FIELD_MAPPING.put(\"fsize\", \"file.size\");\n+ FIELD_MAPPING.put(\"fileType\", \"file.extension\");\n+ FIELD_MAPPING.put(\"mrt\", \"event.ingested\");\n+ FIELD_MAPPING.put(\"msg\", \"message\");\n+ FIELD_MAPPING.put(\"reason\", \"event.reason\");\n+ FIELD_MAPPING.put(\"requestClientApplication\", \"user_agent.original\");\n+ FIELD_MAPPING.put(\"requestContext\", \"http.request.referrer\");\n+ FIELD_MAPPING.put(\"requestMethod\", \"http.request.method\");\n+ FIELD_MAPPING.put(\"request\", \"url.original\");\n+ FIELD_MAPPING.put(\"src\", \"source.ip\");\n+ FIELD_MAPPING.put(\"sourceDnsDomain\", \"source.registered_domain\");\n+ FIELD_MAPPING.put(\"slat\", \"source.geo.location.lat\");\n+ FIELD_MAPPING.put(\"slong\", \"source.geo.location.lon\");\n+ FIELD_MAPPING.put(\"shost\", \"source.domain\");\n+ FIELD_MAPPING.put(\"smac\", \"source.mac\");\n+ FIELD_MAPPING.put(\"sntdom\", \"source.registered_domain\");\n+ FIELD_MAPPING.put(\"spt\", \"source.port\");\n+ FIELD_MAPPING.put(\"spid\", \"source.process.pid\");\n+ FIELD_MAPPING.put(\"sproc\", \"source.process.name\");\n+ FIELD_MAPPING.put(\"sourceServiceName\", \"source.service.name\");\n+ FIELD_MAPPING.put(\"suser\", \"source.user.name\");\n+ FIELD_MAPPING.put(\"start\", \"event.start\");\n+ FIELD_MAPPING.put(\"proto\", \"network.transport\");\n+ // Add more mappings as needed\n+ }\n+\n+ void process(String cefString, String targetField) {\n+ List headerFields = new ArrayList<>();\n+ Matcher headerMatcher = HEADER_NEXT_FIELD_PATTERN.matcher(cefString);\n+ int extensionStart = 0;\n+\n+ for (int i = 0; i < 7 && headerMatcher.find(); i++) {\n+ String field = headerMatcher.group(1);\n+ field = HEADER_ESCAPE_CAPTURE.matcher(field).replaceAll(\"$1\");\n+ headerFields.add(field);\n+ extensionStart = headerMatcher.end();\n+ }\n+\n+ if (headerFields.size() > 0 && headerFields.get(0).startsWith(\"CEF:\")) {\n+ CEFEvent event = new CEFEvent();\n+ // Add error message if there are not enough header fields\n+ if (headerFields.size() != 7) {\n+ ingestDocument.appendFieldValue(\"error.message\", ERROR_MESSAGE_INCOMPLETE_CEF_HEADER);\n+ }\n+ for (int i = 0; i < headerFields.size(); i++) {\n+ switch (i) {\n+ case 0:\n+ event.setVersion(headerFields.get(0).substring(4));\n+ break;\n+ case 1:\n+ event.setDeviceVendor(headerFields.get(1));\n+ ingestDocument.setFieldValue(\"observer.vendor\", headerFields.get(1));\n+ break;\n+ case 2:\n+ event.setDeviceProduct(headerFields.get(2));\n+ ingestDocument.setFieldValue(\"observer.product\", headerFields.get(2));\n+ break;\n+ case 3:\n+ event.setDeviceVersion(headerFields.get(3));\n+ ingestDocument.setFieldValue(\"observer.version\", headerFields.get(3));\n+ break;\n+ case 4:\n+ event.setDeviceEventClassId(headerFields.get(4));\n+ ingestDocument.setFieldValue(\"event.code\", headerFields.get(4));\n+ break;\n+ case 5:\n+ event.setName(headerFields.get(5));\n+ break;\n+ case 6:\n+ event.setSeverity(headerFields.get(6));\n+ break;\n+ }\n+ }\n+ String extensionString = cefString.substring(extensionStart);\n+ Map extensions = parseExtensions(extensionString);\n+\n+ if (removeEmptyValue) {\n+ removeEmptyValue(extensions);\n+ }\n+ event.setExtensions(extensions);\n+\n+ // Translate possible ECS fields and remove them from extensions\n+ Map translatedFields = extensions.entrySet()\n+ .stream()\n+ .filter(entry -> FIELD_MAPPING.containsKey(entry.getKey()))\n+ .collect(Collectors.toMap(entry -> FIELD_MAPPING.get(entry.getKey()), entry -> {\n+ Class fieldType = ECSFieldWithType.getFieldType(FIELD_MAPPING.get(entry.getKey()));\n+ return convertValueToType(entry.getValue(), fieldType);\n+ }));\n+\n+ // Remove the translated entries from extensions\n+ event.removeMappedExtensions();\n+\n+ ingestDocument.setFieldValue(targetField, event.toObject());\n+ // Add ECS translations to the root of the document\n+ if (translatedFields.isEmpty() == false) {\n+ translatedFields.forEach(ingestDocument::setFieldValue);\n+ }\n+ } else {\n+ throw new IllegalArgumentException(\"Invalid CEF format\");\n+ }\n+ }\n+\n+ private Object convertValueToType(String value, Class type) {\n+ if (type == Long.class) {\n+ return Long.parseLong(value);\n+ } else if (type == Double.class) {\n+ return Double.parseDouble(value);\n+ } else if (type == Integer.class) {\n+ return Integer.parseInt(value);\n+ } else {\n+ return value; // Default to String\n+ }\n+ }\n+\n+ private Map parseExtensions(String extensionString) {\n+ Map extensions = new HashMap<>();\n+ Matcher matcher = EXTENSION_NEXT_KEY_VALUE_PATTERN.matcher(extensionString);\n+ int lastEnd = 0;\n+ while (matcher.find()) {\n+ String key = matcher.group(1);\n+ String value = matcher.group(2);\n+\n+ // Convert extension field name to strict legal field_reference\n+ if (key.endsWith(\"]\")) {\n+ key = convertArrayLikeKey(key);\n+ }\n+\n+ extensions.put(key, desanitizeExtensionVal(value.trim()));\n+ lastEnd = matcher.end();\n+ }\n+ // If there's any remaining unparsed content, throw an exception\n+ if (lastEnd < extensionString.length()) {\n+ throw new IllegalArgumentException(\"Invalid extensions; keyless value present: \" + extensionString.substring(lastEnd));\n+ }\n+ return extensions;\n+ }\n+\n+ private void removeEmptyValue(Map map) {\n+ map.entrySet().removeIf(entry -> entry.getValue().isEmpty());\n+ map.entrySet().removeIf(entry -> entry.getValue() == null);\n+ }\n+\n+ private String convertArrayLikeKey(String key) {\n+ Matcher matcher = EXTENSION_KEY_ARRAY_CAPTURE.matcher(key);\n+ if (matcher.matches()) {\n+ return \"[\" + matcher.group(1) + \"]\" + matcher.group(2);\n+ }\n+ return key;\n+ }\n+\n+ public static String desanitizeExtensionVal(String value) {\n+ String desanitized = value;\n+ for (Map.Entry entry : EXTENSION_VALUE_SANITIZER_REVERSE_MAPPING.entrySet()) {\n+ desanitized = desanitized.replace(entry.getKey(), entry.getValue());\n+ }\n+ return desanitized;\n+ }\n+\n+ public static class CEFEvent {\n+ private String version;\n+ private String deviceVendor;\n+ private String deviceProduct;\n+ private String deviceVersion;\n+ private String deviceEventClassId;\n+ private String name;\n+ private String severity;\n+ private final Map extensions = new HashMap<>();\n+ private Map translatedFields;\n+\n+ // Getters and setters for all fields\n+ public String getVersion() {\n+ return version;\n+ }\n+\n+ public void setVersion(String version) {\n+ this.version = version;\n+ }\n+\n+ public String getDeviceVendor() {\n+ return deviceVendor;\n+ }\n+\n+ public void setDeviceVendor(String deviceVendor) {\n+ this.deviceVendor = deviceVendor;\n+ }\n+\n+ public String getDeviceProduct() {\n+ return deviceProduct;\n+ }\n+\n+ public void setDeviceProduct(String deviceProduct) {\n+ this.deviceProduct = deviceProduct;\n+ }\n+\n+ public String getDeviceVersion() {\n+ return deviceVersion;\n+ }\n+\n+ public void setDeviceVersion(String deviceVersion) {\n+ this.deviceVersion = deviceVersion;\n+ }\n+\n+ public String getDeviceEventClassId() {\n+ return deviceEventClassId;\n+ }\n+\n+ public void setDeviceEventClassId(String deviceEventClassId) {\n+ this.deviceEventClassId = deviceEventClassId;\n+ }\n+\n+ public String getName() {\n+ return name;\n+ }\n+\n+ public void setName(String name) {\n+ this.name = name;\n+ }\n+\n+ public String getSeverity() {\n+ return severity;\n+ }\n+\n+ public void setSeverity(String severity) {\n+ this.severity = severity;\n+ }\n+\n+ public Map getExtensions() {\n+ return extensions;\n+ }\n+\n+ public void setExtensions(Map extensions) {\n+ this.extensions.putAll(extensions);\n+ }\n+\n+ public void addExtension(String key, String extension) {\n+ this.extensions.put(key, extension);\n+ }\n+\n+ public void removeMappedExtensions() {\n+ this.extensions.entrySet().removeIf(entry -> FIELD_MAPPING.containsKey(entry.getKey()));\n+ this.extensions.entrySet().removeIf(entry -> FIELD_MAPPING.containsValue(entry.getKey()));", + "comment_created_at": "2025-03-25T15:28:16+00:00", + "comment_author": "bhapas", + "comment_body": "I think this can be \r\n\r\n```java\r\npublic void removeMappedExtensions() {\r\n this.extensions.entrySet().removeIf(entry -> \r\n FIELD_MAPPING.containsKey(entry.getKey()) || FIELD_MAPPING.containsValue(entry.getKey())\r\n );\r\n}\r\n```\r\n\r\nOr you mean something else?", + "pr_file_module": null + }, + { + "comment_id": "2012395998", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 122491, + "pr_file": "modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/CefParser.java", + "discussion_id": "2012358741", + "commented_code": "@@ -0,0 +1,449 @@\n+/*\n+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one\n+ * or more contributor license agreements. Licensed under the \"Elastic License\n+ * 2.0\", the \"GNU Affero General Public License v3.0 only\", and the \"Server Side\n+ * Public License v 1\"; you may not use this file except in compliance with, at\n+ * your election, the \"Elastic License 2.0\", the \"GNU Affero General Public\n+ * License v3.0 only\", or the \"Server Side Public License, v 1\".\n+ */\n+package org.elasticsearch.ingest.common;\n+\n+import org.elasticsearch.ingest.IngestDocument;\n+\n+import java.util.ArrayList;\n+import java.util.HashMap;\n+import java.util.List;\n+import java.util.Map;\n+import java.util.regex.Matcher;\n+import java.util.regex.Pattern;\n+import java.util.stream.Collectors;\n+\n+final class CefParser {\n+\n+ private final IngestDocument ingestDocument;\n+ private final boolean removeEmptyValue;\n+\n+ CefParser(IngestDocument ingestDocument, boolean removeEmptyValue) {\n+ this.ingestDocument = ingestDocument;\n+ this.removeEmptyValue = removeEmptyValue;\n+ }\n+\n+ private static final Pattern HEADER_PATTERN = Pattern.compile(\"(?:\\\\\\\\\\\\||\\\\\\\\\\\\\\\\|[^|])*?\");\n+ private static final Pattern HEADER_NEXT_FIELD_PATTERN = Pattern.compile(\"(\" + HEADER_PATTERN.pattern() + \")\\\\|\");\n+ private static final Pattern HEADER_ESCAPE_CAPTURE = Pattern.compile(\"\\\\\\\\([\\\\\\\\|])\");\n+\n+ // New patterns for extension parsing\n+ private static final String EXTENSION_KEY_PATTERN = \"(?:[\\\\w-]+(?:\\\\.[^\\\\.=\\\\s\\\\|\\\\\\\\\\\\[\\\\]]+)*(?:\\\\[[0-9]+\\\\])?(?==))\";\n+ private static final Pattern EXTENSION_KEY_ARRAY_CAPTURE = Pattern.compile(\"^([^\\\\[\\\\]]+)((?:\\\\[[0-9]+\\\\])+)$\");\n+ private static final String EXTENSION_VALUE_PATTERN = \"(?:\\\\S|\\\\s(?!\" + EXTENSION_KEY_PATTERN + \"=))*\";\n+ private static final Pattern EXTENSION_NEXT_KEY_VALUE_PATTERN = Pattern.compile(\n+ \"(\" + EXTENSION_KEY_PATTERN + \")=(\" + EXTENSION_VALUE_PATTERN + \")(?:\\\\s+|$)\"\n+ );\n+ private static final Map EXTENSION_VALUE_SANITIZER_REVERSE_MAPPING = new HashMap<>();\n+ private static final Map FIELD_MAPPING = new HashMap<>();\n+ private static final String ERROR_MESSAGE_INCOMPLETE_CEF_HEADER = \"incomplete CEF header\";\n+\n+ static {\n+ EXTENSION_VALUE_SANITIZER_REVERSE_MAPPING.put(\"\\\\\\\\\", \"\\\\\");\n+ EXTENSION_VALUE_SANITIZER_REVERSE_MAPPING.put(\"\\\\=\", \"=\");\n+ EXTENSION_VALUE_SANITIZER_REVERSE_MAPPING.put(\"\\\\\\n\", \"\\n\");\n+ EXTENSION_VALUE_SANITIZER_REVERSE_MAPPING.put(\"\\\\\\r\", \"\\r\");\n+\n+ FIELD_MAPPING.put(\"app\", \"network.protocol\");\n+ FIELD_MAPPING.put(\"in\", \"source.bytes\");\n+ FIELD_MAPPING.put(\"out\", \"destination.bytes\");\n+ FIELD_MAPPING.put(\"dst\", \"destination.ip\");\n+ FIELD_MAPPING.put(\"dlat\", \"destination.geo.location.lat\");\n+ FIELD_MAPPING.put(\"dlong\", \"destination.geo.location.lon\");\n+ FIELD_MAPPING.put(\"dhost\", \"destination.domain\");\n+ FIELD_MAPPING.put(\"dmac\", \"destination.mac\");\n+ FIELD_MAPPING.put(\"dntdom\", \"destination.registered_domain\");\n+ FIELD_MAPPING.put(\"dpt\", \"destination.port\");\n+ FIELD_MAPPING.put(\"dpid\", \"destination.process.pid\");\n+ FIELD_MAPPING.put(\"dproc\", \"destination.process.name\");\n+ FIELD_MAPPING.put(\"duid\", \"destination.user.id\");\n+ FIELD_MAPPING.put(\"duser\", \"destination.user.name\");\n+ FIELD_MAPPING.put(\"dpriv\", \"destination.user.group.name\");\n+ FIELD_MAPPING.put(\"act\", \"event.action\");\n+ FIELD_MAPPING.put(\"dvc\", \"observer.ip\");\n+ FIELD_MAPPING.put(\"deviceDirection\", \"network.direction\");\n+ FIELD_MAPPING.put(\"deviceDnsDomain\", \"observer.registered_domain\");\n+ FIELD_MAPPING.put(\"deviceExternalId\", \"observer.name\");\n+ FIELD_MAPPING.put(\"deviceFacility\", \"log.syslog.facility.code\");\n+ FIELD_MAPPING.put(\"dvchost\", \"observer.hostname\");\n+ FIELD_MAPPING.put(\"deviceInboundInterface\", \"observer.ingress.interface.name\");\n+ FIELD_MAPPING.put(\"dvcmac\", \"observer.mac\");\n+ FIELD_MAPPING.put(\"deviceOutboundInterface\", \"observer.egress.interface.name\");\n+ FIELD_MAPPING.put(\"dvcpid\", \"process.pid\");\n+ FIELD_MAPPING.put(\"deviceProcessName\", \"process.name\");\n+ FIELD_MAPPING.put(\"rt\", \"@timestamp\");\n+ FIELD_MAPPING.put(\"dtz\", \"event.timezone\");\n+ FIELD_MAPPING.put(\"deviceTranslatedAddress\", \"host.nat.ip\");\n+ FIELD_MAPPING.put(\"device.version\", \"observer.version\");\n+ FIELD_MAPPING.put(\"deviceVersion\", \"observer.version\");\n+ FIELD_MAPPING.put(\"device.product\", \"observer.product\");\n+ FIELD_MAPPING.put(\"deviceProduct\", \"observer.product\");\n+ FIELD_MAPPING.put(\"device.event_class_id\", \"event.code\");\n+ FIELD_MAPPING.put(\"device.vendor\", \"observer.vendor\");\n+ FIELD_MAPPING.put(\"deviceVendor\", \"observer.vendor\");\n+ FIELD_MAPPING.put(\"end\", \"event.end\");\n+ FIELD_MAPPING.put(\"eventId\", \"event.id\");\n+ FIELD_MAPPING.put(\"outcome\", \"event.outcome\");\n+ FIELD_MAPPING.put(\"fileCreateTime\", \"file.created\");\n+ FIELD_MAPPING.put(\"fileHash\", \"file.hash\");\n+ FIELD_MAPPING.put(\"fileId\", \"file.inode\");\n+ FIELD_MAPPING.put(\"fileModificationTime\", \"file.mtime\");\n+ FIELD_MAPPING.put(\"fname\", \"file.name\");\n+ FIELD_MAPPING.put(\"filePath\", \"file.path\");\n+ FIELD_MAPPING.put(\"filePermission\", \"file.group\");\n+ FIELD_MAPPING.put(\"fsize\", \"file.size\");\n+ FIELD_MAPPING.put(\"fileType\", \"file.extension\");\n+ FIELD_MAPPING.put(\"mrt\", \"event.ingested\");\n+ FIELD_MAPPING.put(\"msg\", \"message\");\n+ FIELD_MAPPING.put(\"reason\", \"event.reason\");\n+ FIELD_MAPPING.put(\"requestClientApplication\", \"user_agent.original\");\n+ FIELD_MAPPING.put(\"requestContext\", \"http.request.referrer\");\n+ FIELD_MAPPING.put(\"requestMethod\", \"http.request.method\");\n+ FIELD_MAPPING.put(\"request\", \"url.original\");\n+ FIELD_MAPPING.put(\"src\", \"source.ip\");\n+ FIELD_MAPPING.put(\"sourceDnsDomain\", \"source.registered_domain\");\n+ FIELD_MAPPING.put(\"slat\", \"source.geo.location.lat\");\n+ FIELD_MAPPING.put(\"slong\", \"source.geo.location.lon\");\n+ FIELD_MAPPING.put(\"shost\", \"source.domain\");\n+ FIELD_MAPPING.put(\"smac\", \"source.mac\");\n+ FIELD_MAPPING.put(\"sntdom\", \"source.registered_domain\");\n+ FIELD_MAPPING.put(\"spt\", \"source.port\");\n+ FIELD_MAPPING.put(\"spid\", \"source.process.pid\");\n+ FIELD_MAPPING.put(\"sproc\", \"source.process.name\");\n+ FIELD_MAPPING.put(\"sourceServiceName\", \"source.service.name\");\n+ FIELD_MAPPING.put(\"suser\", \"source.user.name\");\n+ FIELD_MAPPING.put(\"start\", \"event.start\");\n+ FIELD_MAPPING.put(\"proto\", \"network.transport\");\n+ // Add more mappings as needed\n+ }\n+\n+ void process(String cefString, String targetField) {\n+ List headerFields = new ArrayList<>();\n+ Matcher headerMatcher = HEADER_NEXT_FIELD_PATTERN.matcher(cefString);\n+ int extensionStart = 0;\n+\n+ for (int i = 0; i < 7 && headerMatcher.find(); i++) {\n+ String field = headerMatcher.group(1);\n+ field = HEADER_ESCAPE_CAPTURE.matcher(field).replaceAll(\"$1\");\n+ headerFields.add(field);\n+ extensionStart = headerMatcher.end();\n+ }\n+\n+ if (headerFields.size() > 0 && headerFields.get(0).startsWith(\"CEF:\")) {\n+ CEFEvent event = new CEFEvent();\n+ // Add error message if there are not enough header fields\n+ if (headerFields.size() != 7) {\n+ ingestDocument.appendFieldValue(\"error.message\", ERROR_MESSAGE_INCOMPLETE_CEF_HEADER);\n+ }\n+ for (int i = 0; i < headerFields.size(); i++) {\n+ switch (i) {\n+ case 0:\n+ event.setVersion(headerFields.get(0).substring(4));\n+ break;\n+ case 1:\n+ event.setDeviceVendor(headerFields.get(1));\n+ ingestDocument.setFieldValue(\"observer.vendor\", headerFields.get(1));\n+ break;\n+ case 2:\n+ event.setDeviceProduct(headerFields.get(2));\n+ ingestDocument.setFieldValue(\"observer.product\", headerFields.get(2));\n+ break;\n+ case 3:\n+ event.setDeviceVersion(headerFields.get(3));\n+ ingestDocument.setFieldValue(\"observer.version\", headerFields.get(3));\n+ break;\n+ case 4:\n+ event.setDeviceEventClassId(headerFields.get(4));\n+ ingestDocument.setFieldValue(\"event.code\", headerFields.get(4));\n+ break;\n+ case 5:\n+ event.setName(headerFields.get(5));\n+ break;\n+ case 6:\n+ event.setSeverity(headerFields.get(6));\n+ break;\n+ }\n+ }\n+ String extensionString = cefString.substring(extensionStart);\n+ Map extensions = parseExtensions(extensionString);\n+\n+ if (removeEmptyValue) {\n+ removeEmptyValue(extensions);\n+ }\n+ event.setExtensions(extensions);\n+\n+ // Translate possible ECS fields and remove them from extensions\n+ Map translatedFields = extensions.entrySet()\n+ .stream()\n+ .filter(entry -> FIELD_MAPPING.containsKey(entry.getKey()))\n+ .collect(Collectors.toMap(entry -> FIELD_MAPPING.get(entry.getKey()), entry -> {\n+ Class fieldType = ECSFieldWithType.getFieldType(FIELD_MAPPING.get(entry.getKey()));\n+ return convertValueToType(entry.getValue(), fieldType);\n+ }));\n+\n+ // Remove the translated entries from extensions\n+ event.removeMappedExtensions();\n+\n+ ingestDocument.setFieldValue(targetField, event.toObject());\n+ // Add ECS translations to the root of the document\n+ if (translatedFields.isEmpty() == false) {\n+ translatedFields.forEach(ingestDocument::setFieldValue);\n+ }\n+ } else {\n+ throw new IllegalArgumentException(\"Invalid CEF format\");\n+ }\n+ }\n+\n+ private Object convertValueToType(String value, Class type) {\n+ if (type == Long.class) {\n+ return Long.parseLong(value);\n+ } else if (type == Double.class) {\n+ return Double.parseDouble(value);\n+ } else if (type == Integer.class) {\n+ return Integer.parseInt(value);\n+ } else {\n+ return value; // Default to String\n+ }\n+ }\n+\n+ private Map parseExtensions(String extensionString) {\n+ Map extensions = new HashMap<>();\n+ Matcher matcher = EXTENSION_NEXT_KEY_VALUE_PATTERN.matcher(extensionString);\n+ int lastEnd = 0;\n+ while (matcher.find()) {\n+ String key = matcher.group(1);\n+ String value = matcher.group(2);\n+\n+ // Convert extension field name to strict legal field_reference\n+ if (key.endsWith(\"]\")) {\n+ key = convertArrayLikeKey(key);\n+ }\n+\n+ extensions.put(key, desanitizeExtensionVal(value.trim()));\n+ lastEnd = matcher.end();\n+ }\n+ // If there's any remaining unparsed content, throw an exception\n+ if (lastEnd < extensionString.length()) {\n+ throw new IllegalArgumentException(\"Invalid extensions; keyless value present: \" + extensionString.substring(lastEnd));\n+ }\n+ return extensions;\n+ }\n+\n+ private void removeEmptyValue(Map map) {\n+ map.entrySet().removeIf(entry -> entry.getValue().isEmpty());\n+ map.entrySet().removeIf(entry -> entry.getValue() == null);\n+ }\n+\n+ private String convertArrayLikeKey(String key) {\n+ Matcher matcher = EXTENSION_KEY_ARRAY_CAPTURE.matcher(key);\n+ if (matcher.matches()) {\n+ return \"[\" + matcher.group(1) + \"]\" + matcher.group(2);\n+ }\n+ return key;\n+ }\n+\n+ public static String desanitizeExtensionVal(String value) {\n+ String desanitized = value;\n+ for (Map.Entry entry : EXTENSION_VALUE_SANITIZER_REVERSE_MAPPING.entrySet()) {\n+ desanitized = desanitized.replace(entry.getKey(), entry.getValue());\n+ }\n+ return desanitized;\n+ }\n+\n+ public static class CEFEvent {\n+ private String version;\n+ private String deviceVendor;\n+ private String deviceProduct;\n+ private String deviceVersion;\n+ private String deviceEventClassId;\n+ private String name;\n+ private String severity;\n+ private final Map extensions = new HashMap<>();\n+ private Map translatedFields;\n+\n+ // Getters and setters for all fields\n+ public String getVersion() {\n+ return version;\n+ }\n+\n+ public void setVersion(String version) {\n+ this.version = version;\n+ }\n+\n+ public String getDeviceVendor() {\n+ return deviceVendor;\n+ }\n+\n+ public void setDeviceVendor(String deviceVendor) {\n+ this.deviceVendor = deviceVendor;\n+ }\n+\n+ public String getDeviceProduct() {\n+ return deviceProduct;\n+ }\n+\n+ public void setDeviceProduct(String deviceProduct) {\n+ this.deviceProduct = deviceProduct;\n+ }\n+\n+ public String getDeviceVersion() {\n+ return deviceVersion;\n+ }\n+\n+ public void setDeviceVersion(String deviceVersion) {\n+ this.deviceVersion = deviceVersion;\n+ }\n+\n+ public String getDeviceEventClassId() {\n+ return deviceEventClassId;\n+ }\n+\n+ public void setDeviceEventClassId(String deviceEventClassId) {\n+ this.deviceEventClassId = deviceEventClassId;\n+ }\n+\n+ public String getName() {\n+ return name;\n+ }\n+\n+ public void setName(String name) {\n+ this.name = name;\n+ }\n+\n+ public String getSeverity() {\n+ return severity;\n+ }\n+\n+ public void setSeverity(String severity) {\n+ this.severity = severity;\n+ }\n+\n+ public Map getExtensions() {\n+ return extensions;\n+ }\n+\n+ public void setExtensions(Map extensions) {\n+ this.extensions.putAll(extensions);\n+ }\n+\n+ public void addExtension(String key, String extension) {\n+ this.extensions.put(key, extension);\n+ }\n+\n+ public void removeMappedExtensions() {\n+ this.extensions.entrySet().removeIf(entry -> FIELD_MAPPING.containsKey(entry.getKey()));\n+ this.extensions.entrySet().removeIf(entry -> FIELD_MAPPING.containsValue(entry.getKey()));", + "comment_created_at": "2025-03-25T15:39:16+00:00", + "comment_author": "joegallo", + "comment_body": "Explain to me how `containsValue` works on a Map.", + "pr_file_module": null + }, + { + "comment_id": "2012404187", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 122491, + "pr_file": "modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/CefParser.java", + "discussion_id": "2012358741", + "commented_code": "@@ -0,0 +1,449 @@\n+/*\n+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one\n+ * or more contributor license agreements. Licensed under the \"Elastic License\n+ * 2.0\", the \"GNU Affero General Public License v3.0 only\", and the \"Server Side\n+ * Public License v 1\"; you may not use this file except in compliance with, at\n+ * your election, the \"Elastic License 2.0\", the \"GNU Affero General Public\n+ * License v3.0 only\", or the \"Server Side Public License, v 1\".\n+ */\n+package org.elasticsearch.ingest.common;\n+\n+import org.elasticsearch.ingest.IngestDocument;\n+\n+import java.util.ArrayList;\n+import java.util.HashMap;\n+import java.util.List;\n+import java.util.Map;\n+import java.util.regex.Matcher;\n+import java.util.regex.Pattern;\n+import java.util.stream.Collectors;\n+\n+final class CefParser {\n+\n+ private final IngestDocument ingestDocument;\n+ private final boolean removeEmptyValue;\n+\n+ CefParser(IngestDocument ingestDocument, boolean removeEmptyValue) {\n+ this.ingestDocument = ingestDocument;\n+ this.removeEmptyValue = removeEmptyValue;\n+ }\n+\n+ private static final Pattern HEADER_PATTERN = Pattern.compile(\"(?:\\\\\\\\\\\\||\\\\\\\\\\\\\\\\|[^|])*?\");\n+ private static final Pattern HEADER_NEXT_FIELD_PATTERN = Pattern.compile(\"(\" + HEADER_PATTERN.pattern() + \")\\\\|\");\n+ private static final Pattern HEADER_ESCAPE_CAPTURE = Pattern.compile(\"\\\\\\\\([\\\\\\\\|])\");\n+\n+ // New patterns for extension parsing\n+ private static final String EXTENSION_KEY_PATTERN = \"(?:[\\\\w-]+(?:\\\\.[^\\\\.=\\\\s\\\\|\\\\\\\\\\\\[\\\\]]+)*(?:\\\\[[0-9]+\\\\])?(?==))\";\n+ private static final Pattern EXTENSION_KEY_ARRAY_CAPTURE = Pattern.compile(\"^([^\\\\[\\\\]]+)((?:\\\\[[0-9]+\\\\])+)$\");\n+ private static final String EXTENSION_VALUE_PATTERN = \"(?:\\\\S|\\\\s(?!\" + EXTENSION_KEY_PATTERN + \"=))*\";\n+ private static final Pattern EXTENSION_NEXT_KEY_VALUE_PATTERN = Pattern.compile(\n+ \"(\" + EXTENSION_KEY_PATTERN + \")=(\" + EXTENSION_VALUE_PATTERN + \")(?:\\\\s+|$)\"\n+ );\n+ private static final Map EXTENSION_VALUE_SANITIZER_REVERSE_MAPPING = new HashMap<>();\n+ private static final Map FIELD_MAPPING = new HashMap<>();\n+ private static final String ERROR_MESSAGE_INCOMPLETE_CEF_HEADER = \"incomplete CEF header\";\n+\n+ static {\n+ EXTENSION_VALUE_SANITIZER_REVERSE_MAPPING.put(\"\\\\\\\\\", \"\\\\\");\n+ EXTENSION_VALUE_SANITIZER_REVERSE_MAPPING.put(\"\\\\=\", \"=\");\n+ EXTENSION_VALUE_SANITIZER_REVERSE_MAPPING.put(\"\\\\\\n\", \"\\n\");\n+ EXTENSION_VALUE_SANITIZER_REVERSE_MAPPING.put(\"\\\\\\r\", \"\\r\");\n+\n+ FIELD_MAPPING.put(\"app\", \"network.protocol\");\n+ FIELD_MAPPING.put(\"in\", \"source.bytes\");\n+ FIELD_MAPPING.put(\"out\", \"destination.bytes\");\n+ FIELD_MAPPING.put(\"dst\", \"destination.ip\");\n+ FIELD_MAPPING.put(\"dlat\", \"destination.geo.location.lat\");\n+ FIELD_MAPPING.put(\"dlong\", \"destination.geo.location.lon\");\n+ FIELD_MAPPING.put(\"dhost\", \"destination.domain\");\n+ FIELD_MAPPING.put(\"dmac\", \"destination.mac\");\n+ FIELD_MAPPING.put(\"dntdom\", \"destination.registered_domain\");\n+ FIELD_MAPPING.put(\"dpt\", \"destination.port\");\n+ FIELD_MAPPING.put(\"dpid\", \"destination.process.pid\");\n+ FIELD_MAPPING.put(\"dproc\", \"destination.process.name\");\n+ FIELD_MAPPING.put(\"duid\", \"destination.user.id\");\n+ FIELD_MAPPING.put(\"duser\", \"destination.user.name\");\n+ FIELD_MAPPING.put(\"dpriv\", \"destination.user.group.name\");\n+ FIELD_MAPPING.put(\"act\", \"event.action\");\n+ FIELD_MAPPING.put(\"dvc\", \"observer.ip\");\n+ FIELD_MAPPING.put(\"deviceDirection\", \"network.direction\");\n+ FIELD_MAPPING.put(\"deviceDnsDomain\", \"observer.registered_domain\");\n+ FIELD_MAPPING.put(\"deviceExternalId\", \"observer.name\");\n+ FIELD_MAPPING.put(\"deviceFacility\", \"log.syslog.facility.code\");\n+ FIELD_MAPPING.put(\"dvchost\", \"observer.hostname\");\n+ FIELD_MAPPING.put(\"deviceInboundInterface\", \"observer.ingress.interface.name\");\n+ FIELD_MAPPING.put(\"dvcmac\", \"observer.mac\");\n+ FIELD_MAPPING.put(\"deviceOutboundInterface\", \"observer.egress.interface.name\");\n+ FIELD_MAPPING.put(\"dvcpid\", \"process.pid\");\n+ FIELD_MAPPING.put(\"deviceProcessName\", \"process.name\");\n+ FIELD_MAPPING.put(\"rt\", \"@timestamp\");\n+ FIELD_MAPPING.put(\"dtz\", \"event.timezone\");\n+ FIELD_MAPPING.put(\"deviceTranslatedAddress\", \"host.nat.ip\");\n+ FIELD_MAPPING.put(\"device.version\", \"observer.version\");\n+ FIELD_MAPPING.put(\"deviceVersion\", \"observer.version\");\n+ FIELD_MAPPING.put(\"device.product\", \"observer.product\");\n+ FIELD_MAPPING.put(\"deviceProduct\", \"observer.product\");\n+ FIELD_MAPPING.put(\"device.event_class_id\", \"event.code\");\n+ FIELD_MAPPING.put(\"device.vendor\", \"observer.vendor\");\n+ FIELD_MAPPING.put(\"deviceVendor\", \"observer.vendor\");\n+ FIELD_MAPPING.put(\"end\", \"event.end\");\n+ FIELD_MAPPING.put(\"eventId\", \"event.id\");\n+ FIELD_MAPPING.put(\"outcome\", \"event.outcome\");\n+ FIELD_MAPPING.put(\"fileCreateTime\", \"file.created\");\n+ FIELD_MAPPING.put(\"fileHash\", \"file.hash\");\n+ FIELD_MAPPING.put(\"fileId\", \"file.inode\");\n+ FIELD_MAPPING.put(\"fileModificationTime\", \"file.mtime\");\n+ FIELD_MAPPING.put(\"fname\", \"file.name\");\n+ FIELD_MAPPING.put(\"filePath\", \"file.path\");\n+ FIELD_MAPPING.put(\"filePermission\", \"file.group\");\n+ FIELD_MAPPING.put(\"fsize\", \"file.size\");\n+ FIELD_MAPPING.put(\"fileType\", \"file.extension\");\n+ FIELD_MAPPING.put(\"mrt\", \"event.ingested\");\n+ FIELD_MAPPING.put(\"msg\", \"message\");\n+ FIELD_MAPPING.put(\"reason\", \"event.reason\");\n+ FIELD_MAPPING.put(\"requestClientApplication\", \"user_agent.original\");\n+ FIELD_MAPPING.put(\"requestContext\", \"http.request.referrer\");\n+ FIELD_MAPPING.put(\"requestMethod\", \"http.request.method\");\n+ FIELD_MAPPING.put(\"request\", \"url.original\");\n+ FIELD_MAPPING.put(\"src\", \"source.ip\");\n+ FIELD_MAPPING.put(\"sourceDnsDomain\", \"source.registered_domain\");\n+ FIELD_MAPPING.put(\"slat\", \"source.geo.location.lat\");\n+ FIELD_MAPPING.put(\"slong\", \"source.geo.location.lon\");\n+ FIELD_MAPPING.put(\"shost\", \"source.domain\");\n+ FIELD_MAPPING.put(\"smac\", \"source.mac\");\n+ FIELD_MAPPING.put(\"sntdom\", \"source.registered_domain\");\n+ FIELD_MAPPING.put(\"spt\", \"source.port\");\n+ FIELD_MAPPING.put(\"spid\", \"source.process.pid\");\n+ FIELD_MAPPING.put(\"sproc\", \"source.process.name\");\n+ FIELD_MAPPING.put(\"sourceServiceName\", \"source.service.name\");\n+ FIELD_MAPPING.put(\"suser\", \"source.user.name\");\n+ FIELD_MAPPING.put(\"start\", \"event.start\");\n+ FIELD_MAPPING.put(\"proto\", \"network.transport\");\n+ // Add more mappings as needed\n+ }\n+\n+ void process(String cefString, String targetField) {\n+ List headerFields = new ArrayList<>();\n+ Matcher headerMatcher = HEADER_NEXT_FIELD_PATTERN.matcher(cefString);\n+ int extensionStart = 0;\n+\n+ for (int i = 0; i < 7 && headerMatcher.find(); i++) {\n+ String field = headerMatcher.group(1);\n+ field = HEADER_ESCAPE_CAPTURE.matcher(field).replaceAll(\"$1\");\n+ headerFields.add(field);\n+ extensionStart = headerMatcher.end();\n+ }\n+\n+ if (headerFields.size() > 0 && headerFields.get(0).startsWith(\"CEF:\")) {\n+ CEFEvent event = new CEFEvent();\n+ // Add error message if there are not enough header fields\n+ if (headerFields.size() != 7) {\n+ ingestDocument.appendFieldValue(\"error.message\", ERROR_MESSAGE_INCOMPLETE_CEF_HEADER);\n+ }\n+ for (int i = 0; i < headerFields.size(); i++) {\n+ switch (i) {\n+ case 0:\n+ event.setVersion(headerFields.get(0).substring(4));\n+ break;\n+ case 1:\n+ event.setDeviceVendor(headerFields.get(1));\n+ ingestDocument.setFieldValue(\"observer.vendor\", headerFields.get(1));\n+ break;\n+ case 2:\n+ event.setDeviceProduct(headerFields.get(2));\n+ ingestDocument.setFieldValue(\"observer.product\", headerFields.get(2));\n+ break;\n+ case 3:\n+ event.setDeviceVersion(headerFields.get(3));\n+ ingestDocument.setFieldValue(\"observer.version\", headerFields.get(3));\n+ break;\n+ case 4:\n+ event.setDeviceEventClassId(headerFields.get(4));\n+ ingestDocument.setFieldValue(\"event.code\", headerFields.get(4));\n+ break;\n+ case 5:\n+ event.setName(headerFields.get(5));\n+ break;\n+ case 6:\n+ event.setSeverity(headerFields.get(6));\n+ break;\n+ }\n+ }\n+ String extensionString = cefString.substring(extensionStart);\n+ Map extensions = parseExtensions(extensionString);\n+\n+ if (removeEmptyValue) {\n+ removeEmptyValue(extensions);\n+ }\n+ event.setExtensions(extensions);\n+\n+ // Translate possible ECS fields and remove them from extensions\n+ Map translatedFields = extensions.entrySet()\n+ .stream()\n+ .filter(entry -> FIELD_MAPPING.containsKey(entry.getKey()))\n+ .collect(Collectors.toMap(entry -> FIELD_MAPPING.get(entry.getKey()), entry -> {\n+ Class fieldType = ECSFieldWithType.getFieldType(FIELD_MAPPING.get(entry.getKey()));\n+ return convertValueToType(entry.getValue(), fieldType);\n+ }));\n+\n+ // Remove the translated entries from extensions\n+ event.removeMappedExtensions();\n+\n+ ingestDocument.setFieldValue(targetField, event.toObject());\n+ // Add ECS translations to the root of the document\n+ if (translatedFields.isEmpty() == false) {\n+ translatedFields.forEach(ingestDocument::setFieldValue);\n+ }\n+ } else {\n+ throw new IllegalArgumentException(\"Invalid CEF format\");\n+ }\n+ }\n+\n+ private Object convertValueToType(String value, Class type) {\n+ if (type == Long.class) {\n+ return Long.parseLong(value);\n+ } else if (type == Double.class) {\n+ return Double.parseDouble(value);\n+ } else if (type == Integer.class) {\n+ return Integer.parseInt(value);\n+ } else {\n+ return value; // Default to String\n+ }\n+ }\n+\n+ private Map parseExtensions(String extensionString) {\n+ Map extensions = new HashMap<>();\n+ Matcher matcher = EXTENSION_NEXT_KEY_VALUE_PATTERN.matcher(extensionString);\n+ int lastEnd = 0;\n+ while (matcher.find()) {\n+ String key = matcher.group(1);\n+ String value = matcher.group(2);\n+\n+ // Convert extension field name to strict legal field_reference\n+ if (key.endsWith(\"]\")) {\n+ key = convertArrayLikeKey(key);\n+ }\n+\n+ extensions.put(key, desanitizeExtensionVal(value.trim()));\n+ lastEnd = matcher.end();\n+ }\n+ // If there's any remaining unparsed content, throw an exception\n+ if (lastEnd < extensionString.length()) {\n+ throw new IllegalArgumentException(\"Invalid extensions; keyless value present: \" + extensionString.substring(lastEnd));\n+ }\n+ return extensions;\n+ }\n+\n+ private void removeEmptyValue(Map map) {\n+ map.entrySet().removeIf(entry -> entry.getValue().isEmpty());\n+ map.entrySet().removeIf(entry -> entry.getValue() == null);\n+ }\n+\n+ private String convertArrayLikeKey(String key) {\n+ Matcher matcher = EXTENSION_KEY_ARRAY_CAPTURE.matcher(key);\n+ if (matcher.matches()) {\n+ return \"[\" + matcher.group(1) + \"]\" + matcher.group(2);\n+ }\n+ return key;\n+ }\n+\n+ public static String desanitizeExtensionVal(String value) {\n+ String desanitized = value;\n+ for (Map.Entry entry : EXTENSION_VALUE_SANITIZER_REVERSE_MAPPING.entrySet()) {\n+ desanitized = desanitized.replace(entry.getKey(), entry.getValue());\n+ }\n+ return desanitized;\n+ }\n+\n+ public static class CEFEvent {\n+ private String version;\n+ private String deviceVendor;\n+ private String deviceProduct;\n+ private String deviceVersion;\n+ private String deviceEventClassId;\n+ private String name;\n+ private String severity;\n+ private final Map extensions = new HashMap<>();\n+ private Map translatedFields;\n+\n+ // Getters and setters for all fields\n+ public String getVersion() {\n+ return version;\n+ }\n+\n+ public void setVersion(String version) {\n+ this.version = version;\n+ }\n+\n+ public String getDeviceVendor() {\n+ return deviceVendor;\n+ }\n+\n+ public void setDeviceVendor(String deviceVendor) {\n+ this.deviceVendor = deviceVendor;\n+ }\n+\n+ public String getDeviceProduct() {\n+ return deviceProduct;\n+ }\n+\n+ public void setDeviceProduct(String deviceProduct) {\n+ this.deviceProduct = deviceProduct;\n+ }\n+\n+ public String getDeviceVersion() {\n+ return deviceVersion;\n+ }\n+\n+ public void setDeviceVersion(String deviceVersion) {\n+ this.deviceVersion = deviceVersion;\n+ }\n+\n+ public String getDeviceEventClassId() {\n+ return deviceEventClassId;\n+ }\n+\n+ public void setDeviceEventClassId(String deviceEventClassId) {\n+ this.deviceEventClassId = deviceEventClassId;\n+ }\n+\n+ public String getName() {\n+ return name;\n+ }\n+\n+ public void setName(String name) {\n+ this.name = name;\n+ }\n+\n+ public String getSeverity() {\n+ return severity;\n+ }\n+\n+ public void setSeverity(String severity) {\n+ this.severity = severity;\n+ }\n+\n+ public Map getExtensions() {\n+ return extensions;\n+ }\n+\n+ public void setExtensions(Map extensions) {\n+ this.extensions.putAll(extensions);\n+ }\n+\n+ public void addExtension(String key, String extension) {\n+ this.extensions.put(key, extension);\n+ }\n+\n+ public void removeMappedExtensions() {\n+ this.extensions.entrySet().removeIf(entry -> FIELD_MAPPING.containsKey(entry.getKey()));\n+ this.extensions.entrySet().removeIf(entry -> FIELD_MAPPING.containsValue(entry.getKey()));", + "comment_created_at": "2025-03-25T15:43:23+00:00", + "comment_author": "bhapas", + "comment_body": "Ohh yeah I get your point now.. I was only looking at `containsKey` thing. Yeah the `containsValue` iterates over the N entries each time. I ll figure out a different way for this.", + "pr_file_module": null + }, + { + "comment_id": "2012455120", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 122491, + "pr_file": "modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/CefParser.java", + "discussion_id": "2012358741", + "commented_code": "@@ -0,0 +1,449 @@\n+/*\n+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one\n+ * or more contributor license agreements. Licensed under the \"Elastic License\n+ * 2.0\", the \"GNU Affero General Public License v3.0 only\", and the \"Server Side\n+ * Public License v 1\"; you may not use this file except in compliance with, at\n+ * your election, the \"Elastic License 2.0\", the \"GNU Affero General Public\n+ * License v3.0 only\", or the \"Server Side Public License, v 1\".\n+ */\n+package org.elasticsearch.ingest.common;\n+\n+import org.elasticsearch.ingest.IngestDocument;\n+\n+import java.util.ArrayList;\n+import java.util.HashMap;\n+import java.util.List;\n+import java.util.Map;\n+import java.util.regex.Matcher;\n+import java.util.regex.Pattern;\n+import java.util.stream.Collectors;\n+\n+final class CefParser {\n+\n+ private final IngestDocument ingestDocument;\n+ private final boolean removeEmptyValue;\n+\n+ CefParser(IngestDocument ingestDocument, boolean removeEmptyValue) {\n+ this.ingestDocument = ingestDocument;\n+ this.removeEmptyValue = removeEmptyValue;\n+ }\n+\n+ private static final Pattern HEADER_PATTERN = Pattern.compile(\"(?:\\\\\\\\\\\\||\\\\\\\\\\\\\\\\|[^|])*?\");\n+ private static final Pattern HEADER_NEXT_FIELD_PATTERN = Pattern.compile(\"(\" + HEADER_PATTERN.pattern() + \")\\\\|\");\n+ private static final Pattern HEADER_ESCAPE_CAPTURE = Pattern.compile(\"\\\\\\\\([\\\\\\\\|])\");\n+\n+ // New patterns for extension parsing\n+ private static final String EXTENSION_KEY_PATTERN = \"(?:[\\\\w-]+(?:\\\\.[^\\\\.=\\\\s\\\\|\\\\\\\\\\\\[\\\\]]+)*(?:\\\\[[0-9]+\\\\])?(?==))\";\n+ private static final Pattern EXTENSION_KEY_ARRAY_CAPTURE = Pattern.compile(\"^([^\\\\[\\\\]]+)((?:\\\\[[0-9]+\\\\])+)$\");\n+ private static final String EXTENSION_VALUE_PATTERN = \"(?:\\\\S|\\\\s(?!\" + EXTENSION_KEY_PATTERN + \"=))*\";\n+ private static final Pattern EXTENSION_NEXT_KEY_VALUE_PATTERN = Pattern.compile(\n+ \"(\" + EXTENSION_KEY_PATTERN + \")=(\" + EXTENSION_VALUE_PATTERN + \")(?:\\\\s+|$)\"\n+ );\n+ private static final Map EXTENSION_VALUE_SANITIZER_REVERSE_MAPPING = new HashMap<>();\n+ private static final Map FIELD_MAPPING = new HashMap<>();\n+ private static final String ERROR_MESSAGE_INCOMPLETE_CEF_HEADER = \"incomplete CEF header\";\n+\n+ static {\n+ EXTENSION_VALUE_SANITIZER_REVERSE_MAPPING.put(\"\\\\\\\\\", \"\\\\\");\n+ EXTENSION_VALUE_SANITIZER_REVERSE_MAPPING.put(\"\\\\=\", \"=\");\n+ EXTENSION_VALUE_SANITIZER_REVERSE_MAPPING.put(\"\\\\\\n\", \"\\n\");\n+ EXTENSION_VALUE_SANITIZER_REVERSE_MAPPING.put(\"\\\\\\r\", \"\\r\");\n+\n+ FIELD_MAPPING.put(\"app\", \"network.protocol\");\n+ FIELD_MAPPING.put(\"in\", \"source.bytes\");\n+ FIELD_MAPPING.put(\"out\", \"destination.bytes\");\n+ FIELD_MAPPING.put(\"dst\", \"destination.ip\");\n+ FIELD_MAPPING.put(\"dlat\", \"destination.geo.location.lat\");\n+ FIELD_MAPPING.put(\"dlong\", \"destination.geo.location.lon\");\n+ FIELD_MAPPING.put(\"dhost\", \"destination.domain\");\n+ FIELD_MAPPING.put(\"dmac\", \"destination.mac\");\n+ FIELD_MAPPING.put(\"dntdom\", \"destination.registered_domain\");\n+ FIELD_MAPPING.put(\"dpt\", \"destination.port\");\n+ FIELD_MAPPING.put(\"dpid\", \"destination.process.pid\");\n+ FIELD_MAPPING.put(\"dproc\", \"destination.process.name\");\n+ FIELD_MAPPING.put(\"duid\", \"destination.user.id\");\n+ FIELD_MAPPING.put(\"duser\", \"destination.user.name\");\n+ FIELD_MAPPING.put(\"dpriv\", \"destination.user.group.name\");\n+ FIELD_MAPPING.put(\"act\", \"event.action\");\n+ FIELD_MAPPING.put(\"dvc\", \"observer.ip\");\n+ FIELD_MAPPING.put(\"deviceDirection\", \"network.direction\");\n+ FIELD_MAPPING.put(\"deviceDnsDomain\", \"observer.registered_domain\");\n+ FIELD_MAPPING.put(\"deviceExternalId\", \"observer.name\");\n+ FIELD_MAPPING.put(\"deviceFacility\", \"log.syslog.facility.code\");\n+ FIELD_MAPPING.put(\"dvchost\", \"observer.hostname\");\n+ FIELD_MAPPING.put(\"deviceInboundInterface\", \"observer.ingress.interface.name\");\n+ FIELD_MAPPING.put(\"dvcmac\", \"observer.mac\");\n+ FIELD_MAPPING.put(\"deviceOutboundInterface\", \"observer.egress.interface.name\");\n+ FIELD_MAPPING.put(\"dvcpid\", \"process.pid\");\n+ FIELD_MAPPING.put(\"deviceProcessName\", \"process.name\");\n+ FIELD_MAPPING.put(\"rt\", \"@timestamp\");\n+ FIELD_MAPPING.put(\"dtz\", \"event.timezone\");\n+ FIELD_MAPPING.put(\"deviceTranslatedAddress\", \"host.nat.ip\");\n+ FIELD_MAPPING.put(\"device.version\", \"observer.version\");\n+ FIELD_MAPPING.put(\"deviceVersion\", \"observer.version\");\n+ FIELD_MAPPING.put(\"device.product\", \"observer.product\");\n+ FIELD_MAPPING.put(\"deviceProduct\", \"observer.product\");\n+ FIELD_MAPPING.put(\"device.event_class_id\", \"event.code\");\n+ FIELD_MAPPING.put(\"device.vendor\", \"observer.vendor\");\n+ FIELD_MAPPING.put(\"deviceVendor\", \"observer.vendor\");\n+ FIELD_MAPPING.put(\"end\", \"event.end\");\n+ FIELD_MAPPING.put(\"eventId\", \"event.id\");\n+ FIELD_MAPPING.put(\"outcome\", \"event.outcome\");\n+ FIELD_MAPPING.put(\"fileCreateTime\", \"file.created\");\n+ FIELD_MAPPING.put(\"fileHash\", \"file.hash\");\n+ FIELD_MAPPING.put(\"fileId\", \"file.inode\");\n+ FIELD_MAPPING.put(\"fileModificationTime\", \"file.mtime\");\n+ FIELD_MAPPING.put(\"fname\", \"file.name\");\n+ FIELD_MAPPING.put(\"filePath\", \"file.path\");\n+ FIELD_MAPPING.put(\"filePermission\", \"file.group\");\n+ FIELD_MAPPING.put(\"fsize\", \"file.size\");\n+ FIELD_MAPPING.put(\"fileType\", \"file.extension\");\n+ FIELD_MAPPING.put(\"mrt\", \"event.ingested\");\n+ FIELD_MAPPING.put(\"msg\", \"message\");\n+ FIELD_MAPPING.put(\"reason\", \"event.reason\");\n+ FIELD_MAPPING.put(\"requestClientApplication\", \"user_agent.original\");\n+ FIELD_MAPPING.put(\"requestContext\", \"http.request.referrer\");\n+ FIELD_MAPPING.put(\"requestMethod\", \"http.request.method\");\n+ FIELD_MAPPING.put(\"request\", \"url.original\");\n+ FIELD_MAPPING.put(\"src\", \"source.ip\");\n+ FIELD_MAPPING.put(\"sourceDnsDomain\", \"source.registered_domain\");\n+ FIELD_MAPPING.put(\"slat\", \"source.geo.location.lat\");\n+ FIELD_MAPPING.put(\"slong\", \"source.geo.location.lon\");\n+ FIELD_MAPPING.put(\"shost\", \"source.domain\");\n+ FIELD_MAPPING.put(\"smac\", \"source.mac\");\n+ FIELD_MAPPING.put(\"sntdom\", \"source.registered_domain\");\n+ FIELD_MAPPING.put(\"spt\", \"source.port\");\n+ FIELD_MAPPING.put(\"spid\", \"source.process.pid\");\n+ FIELD_MAPPING.put(\"sproc\", \"source.process.name\");\n+ FIELD_MAPPING.put(\"sourceServiceName\", \"source.service.name\");\n+ FIELD_MAPPING.put(\"suser\", \"source.user.name\");\n+ FIELD_MAPPING.put(\"start\", \"event.start\");\n+ FIELD_MAPPING.put(\"proto\", \"network.transport\");\n+ // Add more mappings as needed\n+ }\n+\n+ void process(String cefString, String targetField) {\n+ List headerFields = new ArrayList<>();\n+ Matcher headerMatcher = HEADER_NEXT_FIELD_PATTERN.matcher(cefString);\n+ int extensionStart = 0;\n+\n+ for (int i = 0; i < 7 && headerMatcher.find(); i++) {\n+ String field = headerMatcher.group(1);\n+ field = HEADER_ESCAPE_CAPTURE.matcher(field).replaceAll(\"$1\");\n+ headerFields.add(field);\n+ extensionStart = headerMatcher.end();\n+ }\n+\n+ if (headerFields.size() > 0 && headerFields.get(0).startsWith(\"CEF:\")) {\n+ CEFEvent event = new CEFEvent();\n+ // Add error message if there are not enough header fields\n+ if (headerFields.size() != 7) {\n+ ingestDocument.appendFieldValue(\"error.message\", ERROR_MESSAGE_INCOMPLETE_CEF_HEADER);\n+ }\n+ for (int i = 0; i < headerFields.size(); i++) {\n+ switch (i) {\n+ case 0:\n+ event.setVersion(headerFields.get(0).substring(4));\n+ break;\n+ case 1:\n+ event.setDeviceVendor(headerFields.get(1));\n+ ingestDocument.setFieldValue(\"observer.vendor\", headerFields.get(1));\n+ break;\n+ case 2:\n+ event.setDeviceProduct(headerFields.get(2));\n+ ingestDocument.setFieldValue(\"observer.product\", headerFields.get(2));\n+ break;\n+ case 3:\n+ event.setDeviceVersion(headerFields.get(3));\n+ ingestDocument.setFieldValue(\"observer.version\", headerFields.get(3));\n+ break;\n+ case 4:\n+ event.setDeviceEventClassId(headerFields.get(4));\n+ ingestDocument.setFieldValue(\"event.code\", headerFields.get(4));\n+ break;\n+ case 5:\n+ event.setName(headerFields.get(5));\n+ break;\n+ case 6:\n+ event.setSeverity(headerFields.get(6));\n+ break;\n+ }\n+ }\n+ String extensionString = cefString.substring(extensionStart);\n+ Map extensions = parseExtensions(extensionString);\n+\n+ if (removeEmptyValue) {\n+ removeEmptyValue(extensions);\n+ }\n+ event.setExtensions(extensions);\n+\n+ // Translate possible ECS fields and remove them from extensions\n+ Map translatedFields = extensions.entrySet()\n+ .stream()\n+ .filter(entry -> FIELD_MAPPING.containsKey(entry.getKey()))\n+ .collect(Collectors.toMap(entry -> FIELD_MAPPING.get(entry.getKey()), entry -> {\n+ Class fieldType = ECSFieldWithType.getFieldType(FIELD_MAPPING.get(entry.getKey()));\n+ return convertValueToType(entry.getValue(), fieldType);\n+ }));\n+\n+ // Remove the translated entries from extensions\n+ event.removeMappedExtensions();\n+\n+ ingestDocument.setFieldValue(targetField, event.toObject());\n+ // Add ECS translations to the root of the document\n+ if (translatedFields.isEmpty() == false) {\n+ translatedFields.forEach(ingestDocument::setFieldValue);\n+ }\n+ } else {\n+ throw new IllegalArgumentException(\"Invalid CEF format\");\n+ }\n+ }\n+\n+ private Object convertValueToType(String value, Class type) {\n+ if (type == Long.class) {\n+ return Long.parseLong(value);\n+ } else if (type == Double.class) {\n+ return Double.parseDouble(value);\n+ } else if (type == Integer.class) {\n+ return Integer.parseInt(value);\n+ } else {\n+ return value; // Default to String\n+ }\n+ }\n+\n+ private Map parseExtensions(String extensionString) {\n+ Map extensions = new HashMap<>();\n+ Matcher matcher = EXTENSION_NEXT_KEY_VALUE_PATTERN.matcher(extensionString);\n+ int lastEnd = 0;\n+ while (matcher.find()) {\n+ String key = matcher.group(1);\n+ String value = matcher.group(2);\n+\n+ // Convert extension field name to strict legal field_reference\n+ if (key.endsWith(\"]\")) {\n+ key = convertArrayLikeKey(key);\n+ }\n+\n+ extensions.put(key, desanitizeExtensionVal(value.trim()));\n+ lastEnd = matcher.end();\n+ }\n+ // If there's any remaining unparsed content, throw an exception\n+ if (lastEnd < extensionString.length()) {\n+ throw new IllegalArgumentException(\"Invalid extensions; keyless value present: \" + extensionString.substring(lastEnd));\n+ }\n+ return extensions;\n+ }\n+\n+ private void removeEmptyValue(Map map) {\n+ map.entrySet().removeIf(entry -> entry.getValue().isEmpty());\n+ map.entrySet().removeIf(entry -> entry.getValue() == null);\n+ }\n+\n+ private String convertArrayLikeKey(String key) {\n+ Matcher matcher = EXTENSION_KEY_ARRAY_CAPTURE.matcher(key);\n+ if (matcher.matches()) {\n+ return \"[\" + matcher.group(1) + \"]\" + matcher.group(2);\n+ }\n+ return key;\n+ }\n+\n+ public static String desanitizeExtensionVal(String value) {\n+ String desanitized = value;\n+ for (Map.Entry entry : EXTENSION_VALUE_SANITIZER_REVERSE_MAPPING.entrySet()) {\n+ desanitized = desanitized.replace(entry.getKey(), entry.getValue());\n+ }\n+ return desanitized;\n+ }\n+\n+ public static class CEFEvent {\n+ private String version;\n+ private String deviceVendor;\n+ private String deviceProduct;\n+ private String deviceVersion;\n+ private String deviceEventClassId;\n+ private String name;\n+ private String severity;\n+ private final Map extensions = new HashMap<>();\n+ private Map translatedFields;\n+\n+ // Getters and setters for all fields\n+ public String getVersion() {\n+ return version;\n+ }\n+\n+ public void setVersion(String version) {\n+ this.version = version;\n+ }\n+\n+ public String getDeviceVendor() {\n+ return deviceVendor;\n+ }\n+\n+ public void setDeviceVendor(String deviceVendor) {\n+ this.deviceVendor = deviceVendor;\n+ }\n+\n+ public String getDeviceProduct() {\n+ return deviceProduct;\n+ }\n+\n+ public void setDeviceProduct(String deviceProduct) {\n+ this.deviceProduct = deviceProduct;\n+ }\n+\n+ public String getDeviceVersion() {\n+ return deviceVersion;\n+ }\n+\n+ public void setDeviceVersion(String deviceVersion) {\n+ this.deviceVersion = deviceVersion;\n+ }\n+\n+ public String getDeviceEventClassId() {\n+ return deviceEventClassId;\n+ }\n+\n+ public void setDeviceEventClassId(String deviceEventClassId) {\n+ this.deviceEventClassId = deviceEventClassId;\n+ }\n+\n+ public String getName() {\n+ return name;\n+ }\n+\n+ public void setName(String name) {\n+ this.name = name;\n+ }\n+\n+ public String getSeverity() {\n+ return severity;\n+ }\n+\n+ public void setSeverity(String severity) {\n+ this.severity = severity;\n+ }\n+\n+ public Map getExtensions() {\n+ return extensions;\n+ }\n+\n+ public void setExtensions(Map extensions) {\n+ this.extensions.putAll(extensions);\n+ }\n+\n+ public void addExtension(String key, String extension) {\n+ this.extensions.put(key, extension);\n+ }\n+\n+ public void removeMappedExtensions() {\n+ this.extensions.entrySet().removeIf(entry -> FIELD_MAPPING.containsKey(entry.getKey()));\n+ this.extensions.entrySet().removeIf(entry -> FIELD_MAPPING.containsValue(entry.getKey()));", + "comment_created_at": "2025-03-25T16:09:29+00:00", + "comment_author": "joegallo", + "comment_body": "If you smash all the keys and all the values into a single additional static `Set`, then you could do a `this.extensions.removeAll(thatStaticSet)`.", + "pr_file_module": null + }, + { + "comment_id": "2014378954", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 122491, + "pr_file": "modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/CefParser.java", + "discussion_id": "2012358741", + "commented_code": "@@ -0,0 +1,449 @@\n+/*\n+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one\n+ * or more contributor license agreements. Licensed under the \"Elastic License\n+ * 2.0\", the \"GNU Affero General Public License v3.0 only\", and the \"Server Side\n+ * Public License v 1\"; you may not use this file except in compliance with, at\n+ * your election, the \"Elastic License 2.0\", the \"GNU Affero General Public\n+ * License v3.0 only\", or the \"Server Side Public License, v 1\".\n+ */\n+package org.elasticsearch.ingest.common;\n+\n+import org.elasticsearch.ingest.IngestDocument;\n+\n+import java.util.ArrayList;\n+import java.util.HashMap;\n+import java.util.List;\n+import java.util.Map;\n+import java.util.regex.Matcher;\n+import java.util.regex.Pattern;\n+import java.util.stream.Collectors;\n+\n+final class CefParser {\n+\n+ private final IngestDocument ingestDocument;\n+ private final boolean removeEmptyValue;\n+\n+ CefParser(IngestDocument ingestDocument, boolean removeEmptyValue) {\n+ this.ingestDocument = ingestDocument;\n+ this.removeEmptyValue = removeEmptyValue;\n+ }\n+\n+ private static final Pattern HEADER_PATTERN = Pattern.compile(\"(?:\\\\\\\\\\\\||\\\\\\\\\\\\\\\\|[^|])*?\");\n+ private static final Pattern HEADER_NEXT_FIELD_PATTERN = Pattern.compile(\"(\" + HEADER_PATTERN.pattern() + \")\\\\|\");\n+ private static final Pattern HEADER_ESCAPE_CAPTURE = Pattern.compile(\"\\\\\\\\([\\\\\\\\|])\");\n+\n+ // New patterns for extension parsing\n+ private static final String EXTENSION_KEY_PATTERN = \"(?:[\\\\w-]+(?:\\\\.[^\\\\.=\\\\s\\\\|\\\\\\\\\\\\[\\\\]]+)*(?:\\\\[[0-9]+\\\\])?(?==))\";\n+ private static final Pattern EXTENSION_KEY_ARRAY_CAPTURE = Pattern.compile(\"^([^\\\\[\\\\]]+)((?:\\\\[[0-9]+\\\\])+)$\");\n+ private static final String EXTENSION_VALUE_PATTERN = \"(?:\\\\S|\\\\s(?!\" + EXTENSION_KEY_PATTERN + \"=))*\";\n+ private static final Pattern EXTENSION_NEXT_KEY_VALUE_PATTERN = Pattern.compile(\n+ \"(\" + EXTENSION_KEY_PATTERN + \")=(\" + EXTENSION_VALUE_PATTERN + \")(?:\\\\s+|$)\"\n+ );\n+ private static final Map EXTENSION_VALUE_SANITIZER_REVERSE_MAPPING = new HashMap<>();\n+ private static final Map FIELD_MAPPING = new HashMap<>();\n+ private static final String ERROR_MESSAGE_INCOMPLETE_CEF_HEADER = \"incomplete CEF header\";\n+\n+ static {\n+ EXTENSION_VALUE_SANITIZER_REVERSE_MAPPING.put(\"\\\\\\\\\", \"\\\\\");\n+ EXTENSION_VALUE_SANITIZER_REVERSE_MAPPING.put(\"\\\\=\", \"=\");\n+ EXTENSION_VALUE_SANITIZER_REVERSE_MAPPING.put(\"\\\\\\n\", \"\\n\");\n+ EXTENSION_VALUE_SANITIZER_REVERSE_MAPPING.put(\"\\\\\\r\", \"\\r\");\n+\n+ FIELD_MAPPING.put(\"app\", \"network.protocol\");\n+ FIELD_MAPPING.put(\"in\", \"source.bytes\");\n+ FIELD_MAPPING.put(\"out\", \"destination.bytes\");\n+ FIELD_MAPPING.put(\"dst\", \"destination.ip\");\n+ FIELD_MAPPING.put(\"dlat\", \"destination.geo.location.lat\");\n+ FIELD_MAPPING.put(\"dlong\", \"destination.geo.location.lon\");\n+ FIELD_MAPPING.put(\"dhost\", \"destination.domain\");\n+ FIELD_MAPPING.put(\"dmac\", \"destination.mac\");\n+ FIELD_MAPPING.put(\"dntdom\", \"destination.registered_domain\");\n+ FIELD_MAPPING.put(\"dpt\", \"destination.port\");\n+ FIELD_MAPPING.put(\"dpid\", \"destination.process.pid\");\n+ FIELD_MAPPING.put(\"dproc\", \"destination.process.name\");\n+ FIELD_MAPPING.put(\"duid\", \"destination.user.id\");\n+ FIELD_MAPPING.put(\"duser\", \"destination.user.name\");\n+ FIELD_MAPPING.put(\"dpriv\", \"destination.user.group.name\");\n+ FIELD_MAPPING.put(\"act\", \"event.action\");\n+ FIELD_MAPPING.put(\"dvc\", \"observer.ip\");\n+ FIELD_MAPPING.put(\"deviceDirection\", \"network.direction\");\n+ FIELD_MAPPING.put(\"deviceDnsDomain\", \"observer.registered_domain\");\n+ FIELD_MAPPING.put(\"deviceExternalId\", \"observer.name\");\n+ FIELD_MAPPING.put(\"deviceFacility\", \"log.syslog.facility.code\");\n+ FIELD_MAPPING.put(\"dvchost\", \"observer.hostname\");\n+ FIELD_MAPPING.put(\"deviceInboundInterface\", \"observer.ingress.interface.name\");\n+ FIELD_MAPPING.put(\"dvcmac\", \"observer.mac\");\n+ FIELD_MAPPING.put(\"deviceOutboundInterface\", \"observer.egress.interface.name\");\n+ FIELD_MAPPING.put(\"dvcpid\", \"process.pid\");\n+ FIELD_MAPPING.put(\"deviceProcessName\", \"process.name\");\n+ FIELD_MAPPING.put(\"rt\", \"@timestamp\");\n+ FIELD_MAPPING.put(\"dtz\", \"event.timezone\");\n+ FIELD_MAPPING.put(\"deviceTranslatedAddress\", \"host.nat.ip\");\n+ FIELD_MAPPING.put(\"device.version\", \"observer.version\");\n+ FIELD_MAPPING.put(\"deviceVersion\", \"observer.version\");\n+ FIELD_MAPPING.put(\"device.product\", \"observer.product\");\n+ FIELD_MAPPING.put(\"deviceProduct\", \"observer.product\");\n+ FIELD_MAPPING.put(\"device.event_class_id\", \"event.code\");\n+ FIELD_MAPPING.put(\"device.vendor\", \"observer.vendor\");\n+ FIELD_MAPPING.put(\"deviceVendor\", \"observer.vendor\");\n+ FIELD_MAPPING.put(\"end\", \"event.end\");\n+ FIELD_MAPPING.put(\"eventId\", \"event.id\");\n+ FIELD_MAPPING.put(\"outcome\", \"event.outcome\");\n+ FIELD_MAPPING.put(\"fileCreateTime\", \"file.created\");\n+ FIELD_MAPPING.put(\"fileHash\", \"file.hash\");\n+ FIELD_MAPPING.put(\"fileId\", \"file.inode\");\n+ FIELD_MAPPING.put(\"fileModificationTime\", \"file.mtime\");\n+ FIELD_MAPPING.put(\"fname\", \"file.name\");\n+ FIELD_MAPPING.put(\"filePath\", \"file.path\");\n+ FIELD_MAPPING.put(\"filePermission\", \"file.group\");\n+ FIELD_MAPPING.put(\"fsize\", \"file.size\");\n+ FIELD_MAPPING.put(\"fileType\", \"file.extension\");\n+ FIELD_MAPPING.put(\"mrt\", \"event.ingested\");\n+ FIELD_MAPPING.put(\"msg\", \"message\");\n+ FIELD_MAPPING.put(\"reason\", \"event.reason\");\n+ FIELD_MAPPING.put(\"requestClientApplication\", \"user_agent.original\");\n+ FIELD_MAPPING.put(\"requestContext\", \"http.request.referrer\");\n+ FIELD_MAPPING.put(\"requestMethod\", \"http.request.method\");\n+ FIELD_MAPPING.put(\"request\", \"url.original\");\n+ FIELD_MAPPING.put(\"src\", \"source.ip\");\n+ FIELD_MAPPING.put(\"sourceDnsDomain\", \"source.registered_domain\");\n+ FIELD_MAPPING.put(\"slat\", \"source.geo.location.lat\");\n+ FIELD_MAPPING.put(\"slong\", \"source.geo.location.lon\");\n+ FIELD_MAPPING.put(\"shost\", \"source.domain\");\n+ FIELD_MAPPING.put(\"smac\", \"source.mac\");\n+ FIELD_MAPPING.put(\"sntdom\", \"source.registered_domain\");\n+ FIELD_MAPPING.put(\"spt\", \"source.port\");\n+ FIELD_MAPPING.put(\"spid\", \"source.process.pid\");\n+ FIELD_MAPPING.put(\"sproc\", \"source.process.name\");\n+ FIELD_MAPPING.put(\"sourceServiceName\", \"source.service.name\");\n+ FIELD_MAPPING.put(\"suser\", \"source.user.name\");\n+ FIELD_MAPPING.put(\"start\", \"event.start\");\n+ FIELD_MAPPING.put(\"proto\", \"network.transport\");\n+ // Add more mappings as needed\n+ }\n+\n+ void process(String cefString, String targetField) {\n+ List headerFields = new ArrayList<>();\n+ Matcher headerMatcher = HEADER_NEXT_FIELD_PATTERN.matcher(cefString);\n+ int extensionStart = 0;\n+\n+ for (int i = 0; i < 7 && headerMatcher.find(); i++) {\n+ String field = headerMatcher.group(1);\n+ field = HEADER_ESCAPE_CAPTURE.matcher(field).replaceAll(\"$1\");\n+ headerFields.add(field);\n+ extensionStart = headerMatcher.end();\n+ }\n+\n+ if (headerFields.size() > 0 && headerFields.get(0).startsWith(\"CEF:\")) {\n+ CEFEvent event = new CEFEvent();\n+ // Add error message if there are not enough header fields\n+ if (headerFields.size() != 7) {\n+ ingestDocument.appendFieldValue(\"error.message\", ERROR_MESSAGE_INCOMPLETE_CEF_HEADER);\n+ }\n+ for (int i = 0; i < headerFields.size(); i++) {\n+ switch (i) {\n+ case 0:\n+ event.setVersion(headerFields.get(0).substring(4));\n+ break;\n+ case 1:\n+ event.setDeviceVendor(headerFields.get(1));\n+ ingestDocument.setFieldValue(\"observer.vendor\", headerFields.get(1));\n+ break;\n+ case 2:\n+ event.setDeviceProduct(headerFields.get(2));\n+ ingestDocument.setFieldValue(\"observer.product\", headerFields.get(2));\n+ break;\n+ case 3:\n+ event.setDeviceVersion(headerFields.get(3));\n+ ingestDocument.setFieldValue(\"observer.version\", headerFields.get(3));\n+ break;\n+ case 4:\n+ event.setDeviceEventClassId(headerFields.get(4));\n+ ingestDocument.setFieldValue(\"event.code\", headerFields.get(4));\n+ break;\n+ case 5:\n+ event.setName(headerFields.get(5));\n+ break;\n+ case 6:\n+ event.setSeverity(headerFields.get(6));\n+ break;\n+ }\n+ }\n+ String extensionString = cefString.substring(extensionStart);\n+ Map extensions = parseExtensions(extensionString);\n+\n+ if (removeEmptyValue) {\n+ removeEmptyValue(extensions);\n+ }\n+ event.setExtensions(extensions);\n+\n+ // Translate possible ECS fields and remove them from extensions\n+ Map translatedFields = extensions.entrySet()\n+ .stream()\n+ .filter(entry -> FIELD_MAPPING.containsKey(entry.getKey()))\n+ .collect(Collectors.toMap(entry -> FIELD_MAPPING.get(entry.getKey()), entry -> {\n+ Class fieldType = ECSFieldWithType.getFieldType(FIELD_MAPPING.get(entry.getKey()));\n+ return convertValueToType(entry.getValue(), fieldType);\n+ }));\n+\n+ // Remove the translated entries from extensions\n+ event.removeMappedExtensions();\n+\n+ ingestDocument.setFieldValue(targetField, event.toObject());\n+ // Add ECS translations to the root of the document\n+ if (translatedFields.isEmpty() == false) {\n+ translatedFields.forEach(ingestDocument::setFieldValue);\n+ }\n+ } else {\n+ throw new IllegalArgumentException(\"Invalid CEF format\");\n+ }\n+ }\n+\n+ private Object convertValueToType(String value, Class type) {\n+ if (type == Long.class) {\n+ return Long.parseLong(value);\n+ } else if (type == Double.class) {\n+ return Double.parseDouble(value);\n+ } else if (type == Integer.class) {\n+ return Integer.parseInt(value);\n+ } else {\n+ return value; // Default to String\n+ }\n+ }\n+\n+ private Map parseExtensions(String extensionString) {\n+ Map extensions = new HashMap<>();\n+ Matcher matcher = EXTENSION_NEXT_KEY_VALUE_PATTERN.matcher(extensionString);\n+ int lastEnd = 0;\n+ while (matcher.find()) {\n+ String key = matcher.group(1);\n+ String value = matcher.group(2);\n+\n+ // Convert extension field name to strict legal field_reference\n+ if (key.endsWith(\"]\")) {\n+ key = convertArrayLikeKey(key);\n+ }\n+\n+ extensions.put(key, desanitizeExtensionVal(value.trim()));\n+ lastEnd = matcher.end();\n+ }\n+ // If there's any remaining unparsed content, throw an exception\n+ if (lastEnd < extensionString.length()) {\n+ throw new IllegalArgumentException(\"Invalid extensions; keyless value present: \" + extensionString.substring(lastEnd));\n+ }\n+ return extensions;\n+ }\n+\n+ private void removeEmptyValue(Map map) {\n+ map.entrySet().removeIf(entry -> entry.getValue().isEmpty());\n+ map.entrySet().removeIf(entry -> entry.getValue() == null);\n+ }\n+\n+ private String convertArrayLikeKey(String key) {\n+ Matcher matcher = EXTENSION_KEY_ARRAY_CAPTURE.matcher(key);\n+ if (matcher.matches()) {\n+ return \"[\" + matcher.group(1) + \"]\" + matcher.group(2);\n+ }\n+ return key;\n+ }\n+\n+ public static String desanitizeExtensionVal(String value) {\n+ String desanitized = value;\n+ for (Map.Entry entry : EXTENSION_VALUE_SANITIZER_REVERSE_MAPPING.entrySet()) {\n+ desanitized = desanitized.replace(entry.getKey(), entry.getValue());\n+ }\n+ return desanitized;\n+ }\n+\n+ public static class CEFEvent {\n+ private String version;\n+ private String deviceVendor;\n+ private String deviceProduct;\n+ private String deviceVersion;\n+ private String deviceEventClassId;\n+ private String name;\n+ private String severity;\n+ private final Map extensions = new HashMap<>();\n+ private Map translatedFields;\n+\n+ // Getters and setters for all fields\n+ public String getVersion() {\n+ return version;\n+ }\n+\n+ public void setVersion(String version) {\n+ this.version = version;\n+ }\n+\n+ public String getDeviceVendor() {\n+ return deviceVendor;\n+ }\n+\n+ public void setDeviceVendor(String deviceVendor) {\n+ this.deviceVendor = deviceVendor;\n+ }\n+\n+ public String getDeviceProduct() {\n+ return deviceProduct;\n+ }\n+\n+ public void setDeviceProduct(String deviceProduct) {\n+ this.deviceProduct = deviceProduct;\n+ }\n+\n+ public String getDeviceVersion() {\n+ return deviceVersion;\n+ }\n+\n+ public void setDeviceVersion(String deviceVersion) {\n+ this.deviceVersion = deviceVersion;\n+ }\n+\n+ public String getDeviceEventClassId() {\n+ return deviceEventClassId;\n+ }\n+\n+ public void setDeviceEventClassId(String deviceEventClassId) {\n+ this.deviceEventClassId = deviceEventClassId;\n+ }\n+\n+ public String getName() {\n+ return name;\n+ }\n+\n+ public void setName(String name) {\n+ this.name = name;\n+ }\n+\n+ public String getSeverity() {\n+ return severity;\n+ }\n+\n+ public void setSeverity(String severity) {\n+ this.severity = severity;\n+ }\n+\n+ public Map getExtensions() {\n+ return extensions;\n+ }\n+\n+ public void setExtensions(Map extensions) {\n+ this.extensions.putAll(extensions);\n+ }\n+\n+ public void addExtension(String key, String extension) {\n+ this.extensions.put(key, extension);\n+ }\n+\n+ public void removeMappedExtensions() {\n+ this.extensions.entrySet().removeIf(entry -> FIELD_MAPPING.containsKey(entry.getKey()));\n+ this.extensions.entrySet().removeIf(entry -> FIELD_MAPPING.containsValue(entry.getKey()));", + "comment_created_at": "2025-03-26T15:00:44+00:00", + "comment_author": "bhapas", + "comment_body": "```\r\n public void removeMappedExtensions() {\r\n Set fieldMappingKeysAndValues = new HashSet<>();\r\n fieldMappingKeysAndValues.addAll(FIELD_MAPPING.keySet());\r\n fieldMappingKeysAndValues.addAll(FIELD_MAPPING.values());\r\n\r\n extensions.entrySet().removeIf(entry -> fieldMappingKeysAndValues.contains(entry.getKey()));\r\n }\r\n```", + "pr_file_module": null + }, + { + "comment_id": "2014448667", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 122491, + "pr_file": "modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/CefParser.java", + "discussion_id": "2012358741", + "commented_code": "@@ -0,0 +1,449 @@\n+/*\n+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one\n+ * or more contributor license agreements. Licensed under the \"Elastic License\n+ * 2.0\", the \"GNU Affero General Public License v3.0 only\", and the \"Server Side\n+ * Public License v 1\"; you may not use this file except in compliance with, at\n+ * your election, the \"Elastic License 2.0\", the \"GNU Affero General Public\n+ * License v3.0 only\", or the \"Server Side Public License, v 1\".\n+ */\n+package org.elasticsearch.ingest.common;\n+\n+import org.elasticsearch.ingest.IngestDocument;\n+\n+import java.util.ArrayList;\n+import java.util.HashMap;\n+import java.util.List;\n+import java.util.Map;\n+import java.util.regex.Matcher;\n+import java.util.regex.Pattern;\n+import java.util.stream.Collectors;\n+\n+final class CefParser {\n+\n+ private final IngestDocument ingestDocument;\n+ private final boolean removeEmptyValue;\n+\n+ CefParser(IngestDocument ingestDocument, boolean removeEmptyValue) {\n+ this.ingestDocument = ingestDocument;\n+ this.removeEmptyValue = removeEmptyValue;\n+ }\n+\n+ private static final Pattern HEADER_PATTERN = Pattern.compile(\"(?:\\\\\\\\\\\\||\\\\\\\\\\\\\\\\|[^|])*?\");\n+ private static final Pattern HEADER_NEXT_FIELD_PATTERN = Pattern.compile(\"(\" + HEADER_PATTERN.pattern() + \")\\\\|\");\n+ private static final Pattern HEADER_ESCAPE_CAPTURE = Pattern.compile(\"\\\\\\\\([\\\\\\\\|])\");\n+\n+ // New patterns for extension parsing\n+ private static final String EXTENSION_KEY_PATTERN = \"(?:[\\\\w-]+(?:\\\\.[^\\\\.=\\\\s\\\\|\\\\\\\\\\\\[\\\\]]+)*(?:\\\\[[0-9]+\\\\])?(?==))\";\n+ private static final Pattern EXTENSION_KEY_ARRAY_CAPTURE = Pattern.compile(\"^([^\\\\[\\\\]]+)((?:\\\\[[0-9]+\\\\])+)$\");\n+ private static final String EXTENSION_VALUE_PATTERN = \"(?:\\\\S|\\\\s(?!\" + EXTENSION_KEY_PATTERN + \"=))*\";\n+ private static final Pattern EXTENSION_NEXT_KEY_VALUE_PATTERN = Pattern.compile(\n+ \"(\" + EXTENSION_KEY_PATTERN + \")=(\" + EXTENSION_VALUE_PATTERN + \")(?:\\\\s+|$)\"\n+ );\n+ private static final Map EXTENSION_VALUE_SANITIZER_REVERSE_MAPPING = new HashMap<>();\n+ private static final Map FIELD_MAPPING = new HashMap<>();\n+ private static final String ERROR_MESSAGE_INCOMPLETE_CEF_HEADER = \"incomplete CEF header\";\n+\n+ static {\n+ EXTENSION_VALUE_SANITIZER_REVERSE_MAPPING.put(\"\\\\\\\\\", \"\\\\\");\n+ EXTENSION_VALUE_SANITIZER_REVERSE_MAPPING.put(\"\\\\=\", \"=\");\n+ EXTENSION_VALUE_SANITIZER_REVERSE_MAPPING.put(\"\\\\\\n\", \"\\n\");\n+ EXTENSION_VALUE_SANITIZER_REVERSE_MAPPING.put(\"\\\\\\r\", \"\\r\");\n+\n+ FIELD_MAPPING.put(\"app\", \"network.protocol\");\n+ FIELD_MAPPING.put(\"in\", \"source.bytes\");\n+ FIELD_MAPPING.put(\"out\", \"destination.bytes\");\n+ FIELD_MAPPING.put(\"dst\", \"destination.ip\");\n+ FIELD_MAPPING.put(\"dlat\", \"destination.geo.location.lat\");\n+ FIELD_MAPPING.put(\"dlong\", \"destination.geo.location.lon\");\n+ FIELD_MAPPING.put(\"dhost\", \"destination.domain\");\n+ FIELD_MAPPING.put(\"dmac\", \"destination.mac\");\n+ FIELD_MAPPING.put(\"dntdom\", \"destination.registered_domain\");\n+ FIELD_MAPPING.put(\"dpt\", \"destination.port\");\n+ FIELD_MAPPING.put(\"dpid\", \"destination.process.pid\");\n+ FIELD_MAPPING.put(\"dproc\", \"destination.process.name\");\n+ FIELD_MAPPING.put(\"duid\", \"destination.user.id\");\n+ FIELD_MAPPING.put(\"duser\", \"destination.user.name\");\n+ FIELD_MAPPING.put(\"dpriv\", \"destination.user.group.name\");\n+ FIELD_MAPPING.put(\"act\", \"event.action\");\n+ FIELD_MAPPING.put(\"dvc\", \"observer.ip\");\n+ FIELD_MAPPING.put(\"deviceDirection\", \"network.direction\");\n+ FIELD_MAPPING.put(\"deviceDnsDomain\", \"observer.registered_domain\");\n+ FIELD_MAPPING.put(\"deviceExternalId\", \"observer.name\");\n+ FIELD_MAPPING.put(\"deviceFacility\", \"log.syslog.facility.code\");\n+ FIELD_MAPPING.put(\"dvchost\", \"observer.hostname\");\n+ FIELD_MAPPING.put(\"deviceInboundInterface\", \"observer.ingress.interface.name\");\n+ FIELD_MAPPING.put(\"dvcmac\", \"observer.mac\");\n+ FIELD_MAPPING.put(\"deviceOutboundInterface\", \"observer.egress.interface.name\");\n+ FIELD_MAPPING.put(\"dvcpid\", \"process.pid\");\n+ FIELD_MAPPING.put(\"deviceProcessName\", \"process.name\");\n+ FIELD_MAPPING.put(\"rt\", \"@timestamp\");\n+ FIELD_MAPPING.put(\"dtz\", \"event.timezone\");\n+ FIELD_MAPPING.put(\"deviceTranslatedAddress\", \"host.nat.ip\");\n+ FIELD_MAPPING.put(\"device.version\", \"observer.version\");\n+ FIELD_MAPPING.put(\"deviceVersion\", \"observer.version\");\n+ FIELD_MAPPING.put(\"device.product\", \"observer.product\");\n+ FIELD_MAPPING.put(\"deviceProduct\", \"observer.product\");\n+ FIELD_MAPPING.put(\"device.event_class_id\", \"event.code\");\n+ FIELD_MAPPING.put(\"device.vendor\", \"observer.vendor\");\n+ FIELD_MAPPING.put(\"deviceVendor\", \"observer.vendor\");\n+ FIELD_MAPPING.put(\"end\", \"event.end\");\n+ FIELD_MAPPING.put(\"eventId\", \"event.id\");\n+ FIELD_MAPPING.put(\"outcome\", \"event.outcome\");\n+ FIELD_MAPPING.put(\"fileCreateTime\", \"file.created\");\n+ FIELD_MAPPING.put(\"fileHash\", \"file.hash\");\n+ FIELD_MAPPING.put(\"fileId\", \"file.inode\");\n+ FIELD_MAPPING.put(\"fileModificationTime\", \"file.mtime\");\n+ FIELD_MAPPING.put(\"fname\", \"file.name\");\n+ FIELD_MAPPING.put(\"filePath\", \"file.path\");\n+ FIELD_MAPPING.put(\"filePermission\", \"file.group\");\n+ FIELD_MAPPING.put(\"fsize\", \"file.size\");\n+ FIELD_MAPPING.put(\"fileType\", \"file.extension\");\n+ FIELD_MAPPING.put(\"mrt\", \"event.ingested\");\n+ FIELD_MAPPING.put(\"msg\", \"message\");\n+ FIELD_MAPPING.put(\"reason\", \"event.reason\");\n+ FIELD_MAPPING.put(\"requestClientApplication\", \"user_agent.original\");\n+ FIELD_MAPPING.put(\"requestContext\", \"http.request.referrer\");\n+ FIELD_MAPPING.put(\"requestMethod\", \"http.request.method\");\n+ FIELD_MAPPING.put(\"request\", \"url.original\");\n+ FIELD_MAPPING.put(\"src\", \"source.ip\");\n+ FIELD_MAPPING.put(\"sourceDnsDomain\", \"source.registered_domain\");\n+ FIELD_MAPPING.put(\"slat\", \"source.geo.location.lat\");\n+ FIELD_MAPPING.put(\"slong\", \"source.geo.location.lon\");\n+ FIELD_MAPPING.put(\"shost\", \"source.domain\");\n+ FIELD_MAPPING.put(\"smac\", \"source.mac\");\n+ FIELD_MAPPING.put(\"sntdom\", \"source.registered_domain\");\n+ FIELD_MAPPING.put(\"spt\", \"source.port\");\n+ FIELD_MAPPING.put(\"spid\", \"source.process.pid\");\n+ FIELD_MAPPING.put(\"sproc\", \"source.process.name\");\n+ FIELD_MAPPING.put(\"sourceServiceName\", \"source.service.name\");\n+ FIELD_MAPPING.put(\"suser\", \"source.user.name\");\n+ FIELD_MAPPING.put(\"start\", \"event.start\");\n+ FIELD_MAPPING.put(\"proto\", \"network.transport\");\n+ // Add more mappings as needed\n+ }\n+\n+ void process(String cefString, String targetField) {\n+ List headerFields = new ArrayList<>();\n+ Matcher headerMatcher = HEADER_NEXT_FIELD_PATTERN.matcher(cefString);\n+ int extensionStart = 0;\n+\n+ for (int i = 0; i < 7 && headerMatcher.find(); i++) {\n+ String field = headerMatcher.group(1);\n+ field = HEADER_ESCAPE_CAPTURE.matcher(field).replaceAll(\"$1\");\n+ headerFields.add(field);\n+ extensionStart = headerMatcher.end();\n+ }\n+\n+ if (headerFields.size() > 0 && headerFields.get(0).startsWith(\"CEF:\")) {\n+ CEFEvent event = new CEFEvent();\n+ // Add error message if there are not enough header fields\n+ if (headerFields.size() != 7) {\n+ ingestDocument.appendFieldValue(\"error.message\", ERROR_MESSAGE_INCOMPLETE_CEF_HEADER);\n+ }\n+ for (int i = 0; i < headerFields.size(); i++) {\n+ switch (i) {\n+ case 0:\n+ event.setVersion(headerFields.get(0).substring(4));\n+ break;\n+ case 1:\n+ event.setDeviceVendor(headerFields.get(1));\n+ ingestDocument.setFieldValue(\"observer.vendor\", headerFields.get(1));\n+ break;\n+ case 2:\n+ event.setDeviceProduct(headerFields.get(2));\n+ ingestDocument.setFieldValue(\"observer.product\", headerFields.get(2));\n+ break;\n+ case 3:\n+ event.setDeviceVersion(headerFields.get(3));\n+ ingestDocument.setFieldValue(\"observer.version\", headerFields.get(3));\n+ break;\n+ case 4:\n+ event.setDeviceEventClassId(headerFields.get(4));\n+ ingestDocument.setFieldValue(\"event.code\", headerFields.get(4));\n+ break;\n+ case 5:\n+ event.setName(headerFields.get(5));\n+ break;\n+ case 6:\n+ event.setSeverity(headerFields.get(6));\n+ break;\n+ }\n+ }\n+ String extensionString = cefString.substring(extensionStart);\n+ Map extensions = parseExtensions(extensionString);\n+\n+ if (removeEmptyValue) {\n+ removeEmptyValue(extensions);\n+ }\n+ event.setExtensions(extensions);\n+\n+ // Translate possible ECS fields and remove them from extensions\n+ Map translatedFields = extensions.entrySet()\n+ .stream()\n+ .filter(entry -> FIELD_MAPPING.containsKey(entry.getKey()))\n+ .collect(Collectors.toMap(entry -> FIELD_MAPPING.get(entry.getKey()), entry -> {\n+ Class fieldType = ECSFieldWithType.getFieldType(FIELD_MAPPING.get(entry.getKey()));\n+ return convertValueToType(entry.getValue(), fieldType);\n+ }));\n+\n+ // Remove the translated entries from extensions\n+ event.removeMappedExtensions();\n+\n+ ingestDocument.setFieldValue(targetField, event.toObject());\n+ // Add ECS translations to the root of the document\n+ if (translatedFields.isEmpty() == false) {\n+ translatedFields.forEach(ingestDocument::setFieldValue);\n+ }\n+ } else {\n+ throw new IllegalArgumentException(\"Invalid CEF format\");\n+ }\n+ }\n+\n+ private Object convertValueToType(String value, Class type) {\n+ if (type == Long.class) {\n+ return Long.parseLong(value);\n+ } else if (type == Double.class) {\n+ return Double.parseDouble(value);\n+ } else if (type == Integer.class) {\n+ return Integer.parseInt(value);\n+ } else {\n+ return value; // Default to String\n+ }\n+ }\n+\n+ private Map parseExtensions(String extensionString) {\n+ Map extensions = new HashMap<>();\n+ Matcher matcher = EXTENSION_NEXT_KEY_VALUE_PATTERN.matcher(extensionString);\n+ int lastEnd = 0;\n+ while (matcher.find()) {\n+ String key = matcher.group(1);\n+ String value = matcher.group(2);\n+\n+ // Convert extension field name to strict legal field_reference\n+ if (key.endsWith(\"]\")) {\n+ key = convertArrayLikeKey(key);\n+ }\n+\n+ extensions.put(key, desanitizeExtensionVal(value.trim()));\n+ lastEnd = matcher.end();\n+ }\n+ // If there's any remaining unparsed content, throw an exception\n+ if (lastEnd < extensionString.length()) {\n+ throw new IllegalArgumentException(\"Invalid extensions; keyless value present: \" + extensionString.substring(lastEnd));\n+ }\n+ return extensions;\n+ }\n+\n+ private void removeEmptyValue(Map map) {\n+ map.entrySet().removeIf(entry -> entry.getValue().isEmpty());\n+ map.entrySet().removeIf(entry -> entry.getValue() == null);\n+ }\n+\n+ private String convertArrayLikeKey(String key) {\n+ Matcher matcher = EXTENSION_KEY_ARRAY_CAPTURE.matcher(key);\n+ if (matcher.matches()) {\n+ return \"[\" + matcher.group(1) + \"]\" + matcher.group(2);\n+ }\n+ return key;\n+ }\n+\n+ public static String desanitizeExtensionVal(String value) {\n+ String desanitized = value;\n+ for (Map.Entry entry : EXTENSION_VALUE_SANITIZER_REVERSE_MAPPING.entrySet()) {\n+ desanitized = desanitized.replace(entry.getKey(), entry.getValue());\n+ }\n+ return desanitized;\n+ }\n+\n+ public static class CEFEvent {\n+ private String version;\n+ private String deviceVendor;\n+ private String deviceProduct;\n+ private String deviceVersion;\n+ private String deviceEventClassId;\n+ private String name;\n+ private String severity;\n+ private final Map extensions = new HashMap<>();\n+ private Map translatedFields;\n+\n+ // Getters and setters for all fields\n+ public String getVersion() {\n+ return version;\n+ }\n+\n+ public void setVersion(String version) {\n+ this.version = version;\n+ }\n+\n+ public String getDeviceVendor() {\n+ return deviceVendor;\n+ }\n+\n+ public void setDeviceVendor(String deviceVendor) {\n+ this.deviceVendor = deviceVendor;\n+ }\n+\n+ public String getDeviceProduct() {\n+ return deviceProduct;\n+ }\n+\n+ public void setDeviceProduct(String deviceProduct) {\n+ this.deviceProduct = deviceProduct;\n+ }\n+\n+ public String getDeviceVersion() {\n+ return deviceVersion;\n+ }\n+\n+ public void setDeviceVersion(String deviceVersion) {\n+ this.deviceVersion = deviceVersion;\n+ }\n+\n+ public String getDeviceEventClassId() {\n+ return deviceEventClassId;\n+ }\n+\n+ public void setDeviceEventClassId(String deviceEventClassId) {\n+ this.deviceEventClassId = deviceEventClassId;\n+ }\n+\n+ public String getName() {\n+ return name;\n+ }\n+\n+ public void setName(String name) {\n+ this.name = name;\n+ }\n+\n+ public String getSeverity() {\n+ return severity;\n+ }\n+\n+ public void setSeverity(String severity) {\n+ this.severity = severity;\n+ }\n+\n+ public Map getExtensions() {\n+ return extensions;\n+ }\n+\n+ public void setExtensions(Map extensions) {\n+ this.extensions.putAll(extensions);\n+ }\n+\n+ public void addExtension(String key, String extension) {\n+ this.extensions.put(key, extension);\n+ }\n+\n+ public void removeMappedExtensions() {\n+ this.extensions.entrySet().removeIf(entry -> FIELD_MAPPING.containsKey(entry.getKey()));\n+ this.extensions.entrySet().removeIf(entry -> FIELD_MAPPING.containsValue(entry.getKey()));", + "comment_created_at": "2025-03-26T15:33:55+00:00", + "comment_author": "joegallo", + "comment_body": "No, do it staticly. There's no need to allocate a new HashSet umpteen times.", + "pr_file_module": null + }, + { + "comment_id": "2016204628", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 122491, + "pr_file": "modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/CefParser.java", + "discussion_id": "2012358741", + "commented_code": "@@ -0,0 +1,449 @@\n+/*\n+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one\n+ * or more contributor license agreements. Licensed under the \"Elastic License\n+ * 2.0\", the \"GNU Affero General Public License v3.0 only\", and the \"Server Side\n+ * Public License v 1\"; you may not use this file except in compliance with, at\n+ * your election, the \"Elastic License 2.0\", the \"GNU Affero General Public\n+ * License v3.0 only\", or the \"Server Side Public License, v 1\".\n+ */\n+package org.elasticsearch.ingest.common;\n+\n+import org.elasticsearch.ingest.IngestDocument;\n+\n+import java.util.ArrayList;\n+import java.util.HashMap;\n+import java.util.List;\n+import java.util.Map;\n+import java.util.regex.Matcher;\n+import java.util.regex.Pattern;\n+import java.util.stream.Collectors;\n+\n+final class CefParser {\n+\n+ private final IngestDocument ingestDocument;\n+ private final boolean removeEmptyValue;\n+\n+ CefParser(IngestDocument ingestDocument, boolean removeEmptyValue) {\n+ this.ingestDocument = ingestDocument;\n+ this.removeEmptyValue = removeEmptyValue;\n+ }\n+\n+ private static final Pattern HEADER_PATTERN = Pattern.compile(\"(?:\\\\\\\\\\\\||\\\\\\\\\\\\\\\\|[^|])*?\");\n+ private static final Pattern HEADER_NEXT_FIELD_PATTERN = Pattern.compile(\"(\" + HEADER_PATTERN.pattern() + \")\\\\|\");\n+ private static final Pattern HEADER_ESCAPE_CAPTURE = Pattern.compile(\"\\\\\\\\([\\\\\\\\|])\");\n+\n+ // New patterns for extension parsing\n+ private static final String EXTENSION_KEY_PATTERN = \"(?:[\\\\w-]+(?:\\\\.[^\\\\.=\\\\s\\\\|\\\\\\\\\\\\[\\\\]]+)*(?:\\\\[[0-9]+\\\\])?(?==))\";\n+ private static final Pattern EXTENSION_KEY_ARRAY_CAPTURE = Pattern.compile(\"^([^\\\\[\\\\]]+)((?:\\\\[[0-9]+\\\\])+)$\");\n+ private static final String EXTENSION_VALUE_PATTERN = \"(?:\\\\S|\\\\s(?!\" + EXTENSION_KEY_PATTERN + \"=))*\";\n+ private static final Pattern EXTENSION_NEXT_KEY_VALUE_PATTERN = Pattern.compile(\n+ \"(\" + EXTENSION_KEY_PATTERN + \")=(\" + EXTENSION_VALUE_PATTERN + \")(?:\\\\s+|$)\"\n+ );\n+ private static final Map EXTENSION_VALUE_SANITIZER_REVERSE_MAPPING = new HashMap<>();\n+ private static final Map FIELD_MAPPING = new HashMap<>();\n+ private static final String ERROR_MESSAGE_INCOMPLETE_CEF_HEADER = \"incomplete CEF header\";\n+\n+ static {\n+ EXTENSION_VALUE_SANITIZER_REVERSE_MAPPING.put(\"\\\\\\\\\", \"\\\\\");\n+ EXTENSION_VALUE_SANITIZER_REVERSE_MAPPING.put(\"\\\\=\", \"=\");\n+ EXTENSION_VALUE_SANITIZER_REVERSE_MAPPING.put(\"\\\\\\n\", \"\\n\");\n+ EXTENSION_VALUE_SANITIZER_REVERSE_MAPPING.put(\"\\\\\\r\", \"\\r\");\n+\n+ FIELD_MAPPING.put(\"app\", \"network.protocol\");\n+ FIELD_MAPPING.put(\"in\", \"source.bytes\");\n+ FIELD_MAPPING.put(\"out\", \"destination.bytes\");\n+ FIELD_MAPPING.put(\"dst\", \"destination.ip\");\n+ FIELD_MAPPING.put(\"dlat\", \"destination.geo.location.lat\");\n+ FIELD_MAPPING.put(\"dlong\", \"destination.geo.location.lon\");\n+ FIELD_MAPPING.put(\"dhost\", \"destination.domain\");\n+ FIELD_MAPPING.put(\"dmac\", \"destination.mac\");\n+ FIELD_MAPPING.put(\"dntdom\", \"destination.registered_domain\");\n+ FIELD_MAPPING.put(\"dpt\", \"destination.port\");\n+ FIELD_MAPPING.put(\"dpid\", \"destination.process.pid\");\n+ FIELD_MAPPING.put(\"dproc\", \"destination.process.name\");\n+ FIELD_MAPPING.put(\"duid\", \"destination.user.id\");\n+ FIELD_MAPPING.put(\"duser\", \"destination.user.name\");\n+ FIELD_MAPPING.put(\"dpriv\", \"destination.user.group.name\");\n+ FIELD_MAPPING.put(\"act\", \"event.action\");\n+ FIELD_MAPPING.put(\"dvc\", \"observer.ip\");\n+ FIELD_MAPPING.put(\"deviceDirection\", \"network.direction\");\n+ FIELD_MAPPING.put(\"deviceDnsDomain\", \"observer.registered_domain\");\n+ FIELD_MAPPING.put(\"deviceExternalId\", \"observer.name\");\n+ FIELD_MAPPING.put(\"deviceFacility\", \"log.syslog.facility.code\");\n+ FIELD_MAPPING.put(\"dvchost\", \"observer.hostname\");\n+ FIELD_MAPPING.put(\"deviceInboundInterface\", \"observer.ingress.interface.name\");\n+ FIELD_MAPPING.put(\"dvcmac\", \"observer.mac\");\n+ FIELD_MAPPING.put(\"deviceOutboundInterface\", \"observer.egress.interface.name\");\n+ FIELD_MAPPING.put(\"dvcpid\", \"process.pid\");\n+ FIELD_MAPPING.put(\"deviceProcessName\", \"process.name\");\n+ FIELD_MAPPING.put(\"rt\", \"@timestamp\");\n+ FIELD_MAPPING.put(\"dtz\", \"event.timezone\");\n+ FIELD_MAPPING.put(\"deviceTranslatedAddress\", \"host.nat.ip\");\n+ FIELD_MAPPING.put(\"device.version\", \"observer.version\");\n+ FIELD_MAPPING.put(\"deviceVersion\", \"observer.version\");\n+ FIELD_MAPPING.put(\"device.product\", \"observer.product\");\n+ FIELD_MAPPING.put(\"deviceProduct\", \"observer.product\");\n+ FIELD_MAPPING.put(\"device.event_class_id\", \"event.code\");\n+ FIELD_MAPPING.put(\"device.vendor\", \"observer.vendor\");\n+ FIELD_MAPPING.put(\"deviceVendor\", \"observer.vendor\");\n+ FIELD_MAPPING.put(\"end\", \"event.end\");\n+ FIELD_MAPPING.put(\"eventId\", \"event.id\");\n+ FIELD_MAPPING.put(\"outcome\", \"event.outcome\");\n+ FIELD_MAPPING.put(\"fileCreateTime\", \"file.created\");\n+ FIELD_MAPPING.put(\"fileHash\", \"file.hash\");\n+ FIELD_MAPPING.put(\"fileId\", \"file.inode\");\n+ FIELD_MAPPING.put(\"fileModificationTime\", \"file.mtime\");\n+ FIELD_MAPPING.put(\"fname\", \"file.name\");\n+ FIELD_MAPPING.put(\"filePath\", \"file.path\");\n+ FIELD_MAPPING.put(\"filePermission\", \"file.group\");\n+ FIELD_MAPPING.put(\"fsize\", \"file.size\");\n+ FIELD_MAPPING.put(\"fileType\", \"file.extension\");\n+ FIELD_MAPPING.put(\"mrt\", \"event.ingested\");\n+ FIELD_MAPPING.put(\"msg\", \"message\");\n+ FIELD_MAPPING.put(\"reason\", \"event.reason\");\n+ FIELD_MAPPING.put(\"requestClientApplication\", \"user_agent.original\");\n+ FIELD_MAPPING.put(\"requestContext\", \"http.request.referrer\");\n+ FIELD_MAPPING.put(\"requestMethod\", \"http.request.method\");\n+ FIELD_MAPPING.put(\"request\", \"url.original\");\n+ FIELD_MAPPING.put(\"src\", \"source.ip\");\n+ FIELD_MAPPING.put(\"sourceDnsDomain\", \"source.registered_domain\");\n+ FIELD_MAPPING.put(\"slat\", \"source.geo.location.lat\");\n+ FIELD_MAPPING.put(\"slong\", \"source.geo.location.lon\");\n+ FIELD_MAPPING.put(\"shost\", \"source.domain\");\n+ FIELD_MAPPING.put(\"smac\", \"source.mac\");\n+ FIELD_MAPPING.put(\"sntdom\", \"source.registered_domain\");\n+ FIELD_MAPPING.put(\"spt\", \"source.port\");\n+ FIELD_MAPPING.put(\"spid\", \"source.process.pid\");\n+ FIELD_MAPPING.put(\"sproc\", \"source.process.name\");\n+ FIELD_MAPPING.put(\"sourceServiceName\", \"source.service.name\");\n+ FIELD_MAPPING.put(\"suser\", \"source.user.name\");\n+ FIELD_MAPPING.put(\"start\", \"event.start\");\n+ FIELD_MAPPING.put(\"proto\", \"network.transport\");\n+ // Add more mappings as needed\n+ }\n+\n+ void process(String cefString, String targetField) {\n+ List headerFields = new ArrayList<>();\n+ Matcher headerMatcher = HEADER_NEXT_FIELD_PATTERN.matcher(cefString);\n+ int extensionStart = 0;\n+\n+ for (int i = 0; i < 7 && headerMatcher.find(); i++) {\n+ String field = headerMatcher.group(1);\n+ field = HEADER_ESCAPE_CAPTURE.matcher(field).replaceAll(\"$1\");\n+ headerFields.add(field);\n+ extensionStart = headerMatcher.end();\n+ }\n+\n+ if (headerFields.size() > 0 && headerFields.get(0).startsWith(\"CEF:\")) {\n+ CEFEvent event = new CEFEvent();\n+ // Add error message if there are not enough header fields\n+ if (headerFields.size() != 7) {\n+ ingestDocument.appendFieldValue(\"error.message\", ERROR_MESSAGE_INCOMPLETE_CEF_HEADER);\n+ }\n+ for (int i = 0; i < headerFields.size(); i++) {\n+ switch (i) {\n+ case 0:\n+ event.setVersion(headerFields.get(0).substring(4));\n+ break;\n+ case 1:\n+ event.setDeviceVendor(headerFields.get(1));\n+ ingestDocument.setFieldValue(\"observer.vendor\", headerFields.get(1));\n+ break;\n+ case 2:\n+ event.setDeviceProduct(headerFields.get(2));\n+ ingestDocument.setFieldValue(\"observer.product\", headerFields.get(2));\n+ break;\n+ case 3:\n+ event.setDeviceVersion(headerFields.get(3));\n+ ingestDocument.setFieldValue(\"observer.version\", headerFields.get(3));\n+ break;\n+ case 4:\n+ event.setDeviceEventClassId(headerFields.get(4));\n+ ingestDocument.setFieldValue(\"event.code\", headerFields.get(4));\n+ break;\n+ case 5:\n+ event.setName(headerFields.get(5));\n+ break;\n+ case 6:\n+ event.setSeverity(headerFields.get(6));\n+ break;\n+ }\n+ }\n+ String extensionString = cefString.substring(extensionStart);\n+ Map extensions = parseExtensions(extensionString);\n+\n+ if (removeEmptyValue) {\n+ removeEmptyValue(extensions);\n+ }\n+ event.setExtensions(extensions);\n+\n+ // Translate possible ECS fields and remove them from extensions\n+ Map translatedFields = extensions.entrySet()\n+ .stream()\n+ .filter(entry -> FIELD_MAPPING.containsKey(entry.getKey()))\n+ .collect(Collectors.toMap(entry -> FIELD_MAPPING.get(entry.getKey()), entry -> {\n+ Class fieldType = ECSFieldWithType.getFieldType(FIELD_MAPPING.get(entry.getKey()));\n+ return convertValueToType(entry.getValue(), fieldType);\n+ }));\n+\n+ // Remove the translated entries from extensions\n+ event.removeMappedExtensions();\n+\n+ ingestDocument.setFieldValue(targetField, event.toObject());\n+ // Add ECS translations to the root of the document\n+ if (translatedFields.isEmpty() == false) {\n+ translatedFields.forEach(ingestDocument::setFieldValue);\n+ }\n+ } else {\n+ throw new IllegalArgumentException(\"Invalid CEF format\");\n+ }\n+ }\n+\n+ private Object convertValueToType(String value, Class type) {\n+ if (type == Long.class) {\n+ return Long.parseLong(value);\n+ } else if (type == Double.class) {\n+ return Double.parseDouble(value);\n+ } else if (type == Integer.class) {\n+ return Integer.parseInt(value);\n+ } else {\n+ return value; // Default to String\n+ }\n+ }\n+\n+ private Map parseExtensions(String extensionString) {\n+ Map extensions = new HashMap<>();\n+ Matcher matcher = EXTENSION_NEXT_KEY_VALUE_PATTERN.matcher(extensionString);\n+ int lastEnd = 0;\n+ while (matcher.find()) {\n+ String key = matcher.group(1);\n+ String value = matcher.group(2);\n+\n+ // Convert extension field name to strict legal field_reference\n+ if (key.endsWith(\"]\")) {\n+ key = convertArrayLikeKey(key);\n+ }\n+\n+ extensions.put(key, desanitizeExtensionVal(value.trim()));\n+ lastEnd = matcher.end();\n+ }\n+ // If there's any remaining unparsed content, throw an exception\n+ if (lastEnd < extensionString.length()) {\n+ throw new IllegalArgumentException(\"Invalid extensions; keyless value present: \" + extensionString.substring(lastEnd));\n+ }\n+ return extensions;\n+ }\n+\n+ private void removeEmptyValue(Map map) {\n+ map.entrySet().removeIf(entry -> entry.getValue().isEmpty());\n+ map.entrySet().removeIf(entry -> entry.getValue() == null);\n+ }\n+\n+ private String convertArrayLikeKey(String key) {\n+ Matcher matcher = EXTENSION_KEY_ARRAY_CAPTURE.matcher(key);\n+ if (matcher.matches()) {\n+ return \"[\" + matcher.group(1) + \"]\" + matcher.group(2);\n+ }\n+ return key;\n+ }\n+\n+ public static String desanitizeExtensionVal(String value) {\n+ String desanitized = value;\n+ for (Map.Entry entry : EXTENSION_VALUE_SANITIZER_REVERSE_MAPPING.entrySet()) {\n+ desanitized = desanitized.replace(entry.getKey(), entry.getValue());\n+ }\n+ return desanitized;\n+ }\n+\n+ public static class CEFEvent {\n+ private String version;\n+ private String deviceVendor;\n+ private String deviceProduct;\n+ private String deviceVersion;\n+ private String deviceEventClassId;\n+ private String name;\n+ private String severity;\n+ private final Map extensions = new HashMap<>();\n+ private Map translatedFields;\n+\n+ // Getters and setters for all fields\n+ public String getVersion() {\n+ return version;\n+ }\n+\n+ public void setVersion(String version) {\n+ this.version = version;\n+ }\n+\n+ public String getDeviceVendor() {\n+ return deviceVendor;\n+ }\n+\n+ public void setDeviceVendor(String deviceVendor) {\n+ this.deviceVendor = deviceVendor;\n+ }\n+\n+ public String getDeviceProduct() {\n+ return deviceProduct;\n+ }\n+\n+ public void setDeviceProduct(String deviceProduct) {\n+ this.deviceProduct = deviceProduct;\n+ }\n+\n+ public String getDeviceVersion() {\n+ return deviceVersion;\n+ }\n+\n+ public void setDeviceVersion(String deviceVersion) {\n+ this.deviceVersion = deviceVersion;\n+ }\n+\n+ public String getDeviceEventClassId() {\n+ return deviceEventClassId;\n+ }\n+\n+ public void setDeviceEventClassId(String deviceEventClassId) {\n+ this.deviceEventClassId = deviceEventClassId;\n+ }\n+\n+ public String getName() {\n+ return name;\n+ }\n+\n+ public void setName(String name) {\n+ this.name = name;\n+ }\n+\n+ public String getSeverity() {\n+ return severity;\n+ }\n+\n+ public void setSeverity(String severity) {\n+ this.severity = severity;\n+ }\n+\n+ public Map getExtensions() {\n+ return extensions;\n+ }\n+\n+ public void setExtensions(Map extensions) {\n+ this.extensions.putAll(extensions);\n+ }\n+\n+ public void addExtension(String key, String extension) {\n+ this.extensions.put(key, extension);\n+ }\n+\n+ public void removeMappedExtensions() {\n+ this.extensions.entrySet().removeIf(entry -> FIELD_MAPPING.containsKey(entry.getKey()));\n+ this.extensions.entrySet().removeIf(entry -> FIELD_MAPPING.containsValue(entry.getKey()));", + "comment_created_at": "2025-03-27T10:40:24+00:00", + "comment_author": "bhapas", + "comment_body": "Moved to static Hashset.", + "pr_file_module": null + }, + { + "comment_id": "2016853377", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 122491, + "pr_file": "modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/CefParser.java", + "discussion_id": "2012358741", + "commented_code": "@@ -0,0 +1,449 @@\n+/*\n+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one\n+ * or more contributor license agreements. Licensed under the \"Elastic License\n+ * 2.0\", the \"GNU Affero General Public License v3.0 only\", and the \"Server Side\n+ * Public License v 1\"; you may not use this file except in compliance with, at\n+ * your election, the \"Elastic License 2.0\", the \"GNU Affero General Public\n+ * License v3.0 only\", or the \"Server Side Public License, v 1\".\n+ */\n+package org.elasticsearch.ingest.common;\n+\n+import org.elasticsearch.ingest.IngestDocument;\n+\n+import java.util.ArrayList;\n+import java.util.HashMap;\n+import java.util.List;\n+import java.util.Map;\n+import java.util.regex.Matcher;\n+import java.util.regex.Pattern;\n+import java.util.stream.Collectors;\n+\n+final class CefParser {\n+\n+ private final IngestDocument ingestDocument;\n+ private final boolean removeEmptyValue;\n+\n+ CefParser(IngestDocument ingestDocument, boolean removeEmptyValue) {\n+ this.ingestDocument = ingestDocument;\n+ this.removeEmptyValue = removeEmptyValue;\n+ }\n+\n+ private static final Pattern HEADER_PATTERN = Pattern.compile(\"(?:\\\\\\\\\\\\||\\\\\\\\\\\\\\\\|[^|])*?\");\n+ private static final Pattern HEADER_NEXT_FIELD_PATTERN = Pattern.compile(\"(\" + HEADER_PATTERN.pattern() + \")\\\\|\");\n+ private static final Pattern HEADER_ESCAPE_CAPTURE = Pattern.compile(\"\\\\\\\\([\\\\\\\\|])\");\n+\n+ // New patterns for extension parsing\n+ private static final String EXTENSION_KEY_PATTERN = \"(?:[\\\\w-]+(?:\\\\.[^\\\\.=\\\\s\\\\|\\\\\\\\\\\\[\\\\]]+)*(?:\\\\[[0-9]+\\\\])?(?==))\";\n+ private static final Pattern EXTENSION_KEY_ARRAY_CAPTURE = Pattern.compile(\"^([^\\\\[\\\\]]+)((?:\\\\[[0-9]+\\\\])+)$\");\n+ private static final String EXTENSION_VALUE_PATTERN = \"(?:\\\\S|\\\\s(?!\" + EXTENSION_KEY_PATTERN + \"=))*\";\n+ private static final Pattern EXTENSION_NEXT_KEY_VALUE_PATTERN = Pattern.compile(\n+ \"(\" + EXTENSION_KEY_PATTERN + \")=(\" + EXTENSION_VALUE_PATTERN + \")(?:\\\\s+|$)\"\n+ );\n+ private static final Map EXTENSION_VALUE_SANITIZER_REVERSE_MAPPING = new HashMap<>();\n+ private static final Map FIELD_MAPPING = new HashMap<>();\n+ private static final String ERROR_MESSAGE_INCOMPLETE_CEF_HEADER = \"incomplete CEF header\";\n+\n+ static {\n+ EXTENSION_VALUE_SANITIZER_REVERSE_MAPPING.put(\"\\\\\\\\\", \"\\\\\");\n+ EXTENSION_VALUE_SANITIZER_REVERSE_MAPPING.put(\"\\\\=\", \"=\");\n+ EXTENSION_VALUE_SANITIZER_REVERSE_MAPPING.put(\"\\\\\\n\", \"\\n\");\n+ EXTENSION_VALUE_SANITIZER_REVERSE_MAPPING.put(\"\\\\\\r\", \"\\r\");\n+\n+ FIELD_MAPPING.put(\"app\", \"network.protocol\");\n+ FIELD_MAPPING.put(\"in\", \"source.bytes\");\n+ FIELD_MAPPING.put(\"out\", \"destination.bytes\");\n+ FIELD_MAPPING.put(\"dst\", \"destination.ip\");\n+ FIELD_MAPPING.put(\"dlat\", \"destination.geo.location.lat\");\n+ FIELD_MAPPING.put(\"dlong\", \"destination.geo.location.lon\");\n+ FIELD_MAPPING.put(\"dhost\", \"destination.domain\");\n+ FIELD_MAPPING.put(\"dmac\", \"destination.mac\");\n+ FIELD_MAPPING.put(\"dntdom\", \"destination.registered_domain\");\n+ FIELD_MAPPING.put(\"dpt\", \"destination.port\");\n+ FIELD_MAPPING.put(\"dpid\", \"destination.process.pid\");\n+ FIELD_MAPPING.put(\"dproc\", \"destination.process.name\");\n+ FIELD_MAPPING.put(\"duid\", \"destination.user.id\");\n+ FIELD_MAPPING.put(\"duser\", \"destination.user.name\");\n+ FIELD_MAPPING.put(\"dpriv\", \"destination.user.group.name\");\n+ FIELD_MAPPING.put(\"act\", \"event.action\");\n+ FIELD_MAPPING.put(\"dvc\", \"observer.ip\");\n+ FIELD_MAPPING.put(\"deviceDirection\", \"network.direction\");\n+ FIELD_MAPPING.put(\"deviceDnsDomain\", \"observer.registered_domain\");\n+ FIELD_MAPPING.put(\"deviceExternalId\", \"observer.name\");\n+ FIELD_MAPPING.put(\"deviceFacility\", \"log.syslog.facility.code\");\n+ FIELD_MAPPING.put(\"dvchost\", \"observer.hostname\");\n+ FIELD_MAPPING.put(\"deviceInboundInterface\", \"observer.ingress.interface.name\");\n+ FIELD_MAPPING.put(\"dvcmac\", \"observer.mac\");\n+ FIELD_MAPPING.put(\"deviceOutboundInterface\", \"observer.egress.interface.name\");\n+ FIELD_MAPPING.put(\"dvcpid\", \"process.pid\");\n+ FIELD_MAPPING.put(\"deviceProcessName\", \"process.name\");\n+ FIELD_MAPPING.put(\"rt\", \"@timestamp\");\n+ FIELD_MAPPING.put(\"dtz\", \"event.timezone\");\n+ FIELD_MAPPING.put(\"deviceTranslatedAddress\", \"host.nat.ip\");\n+ FIELD_MAPPING.put(\"device.version\", \"observer.version\");\n+ FIELD_MAPPING.put(\"deviceVersion\", \"observer.version\");\n+ FIELD_MAPPING.put(\"device.product\", \"observer.product\");\n+ FIELD_MAPPING.put(\"deviceProduct\", \"observer.product\");\n+ FIELD_MAPPING.put(\"device.event_class_id\", \"event.code\");\n+ FIELD_MAPPING.put(\"device.vendor\", \"observer.vendor\");\n+ FIELD_MAPPING.put(\"deviceVendor\", \"observer.vendor\");\n+ FIELD_MAPPING.put(\"end\", \"event.end\");\n+ FIELD_MAPPING.put(\"eventId\", \"event.id\");\n+ FIELD_MAPPING.put(\"outcome\", \"event.outcome\");\n+ FIELD_MAPPING.put(\"fileCreateTime\", \"file.created\");\n+ FIELD_MAPPING.put(\"fileHash\", \"file.hash\");\n+ FIELD_MAPPING.put(\"fileId\", \"file.inode\");\n+ FIELD_MAPPING.put(\"fileModificationTime\", \"file.mtime\");\n+ FIELD_MAPPING.put(\"fname\", \"file.name\");\n+ FIELD_MAPPING.put(\"filePath\", \"file.path\");\n+ FIELD_MAPPING.put(\"filePermission\", \"file.group\");\n+ FIELD_MAPPING.put(\"fsize\", \"file.size\");\n+ FIELD_MAPPING.put(\"fileType\", \"file.extension\");\n+ FIELD_MAPPING.put(\"mrt\", \"event.ingested\");\n+ FIELD_MAPPING.put(\"msg\", \"message\");\n+ FIELD_MAPPING.put(\"reason\", \"event.reason\");\n+ FIELD_MAPPING.put(\"requestClientApplication\", \"user_agent.original\");\n+ FIELD_MAPPING.put(\"requestContext\", \"http.request.referrer\");\n+ FIELD_MAPPING.put(\"requestMethod\", \"http.request.method\");\n+ FIELD_MAPPING.put(\"request\", \"url.original\");\n+ FIELD_MAPPING.put(\"src\", \"source.ip\");\n+ FIELD_MAPPING.put(\"sourceDnsDomain\", \"source.registered_domain\");\n+ FIELD_MAPPING.put(\"slat\", \"source.geo.location.lat\");\n+ FIELD_MAPPING.put(\"slong\", \"source.geo.location.lon\");\n+ FIELD_MAPPING.put(\"shost\", \"source.domain\");\n+ FIELD_MAPPING.put(\"smac\", \"source.mac\");\n+ FIELD_MAPPING.put(\"sntdom\", \"source.registered_domain\");\n+ FIELD_MAPPING.put(\"spt\", \"source.port\");\n+ FIELD_MAPPING.put(\"spid\", \"source.process.pid\");\n+ FIELD_MAPPING.put(\"sproc\", \"source.process.name\");\n+ FIELD_MAPPING.put(\"sourceServiceName\", \"source.service.name\");\n+ FIELD_MAPPING.put(\"suser\", \"source.user.name\");\n+ FIELD_MAPPING.put(\"start\", \"event.start\");\n+ FIELD_MAPPING.put(\"proto\", \"network.transport\");\n+ // Add more mappings as needed\n+ }\n+\n+ void process(String cefString, String targetField) {\n+ List headerFields = new ArrayList<>();\n+ Matcher headerMatcher = HEADER_NEXT_FIELD_PATTERN.matcher(cefString);\n+ int extensionStart = 0;\n+\n+ for (int i = 0; i < 7 && headerMatcher.find(); i++) {\n+ String field = headerMatcher.group(1);\n+ field = HEADER_ESCAPE_CAPTURE.matcher(field).replaceAll(\"$1\");\n+ headerFields.add(field);\n+ extensionStart = headerMatcher.end();\n+ }\n+\n+ if (headerFields.size() > 0 && headerFields.get(0).startsWith(\"CEF:\")) {\n+ CEFEvent event = new CEFEvent();\n+ // Add error message if there are not enough header fields\n+ if (headerFields.size() != 7) {\n+ ingestDocument.appendFieldValue(\"error.message\", ERROR_MESSAGE_INCOMPLETE_CEF_HEADER);\n+ }\n+ for (int i = 0; i < headerFields.size(); i++) {\n+ switch (i) {\n+ case 0:\n+ event.setVersion(headerFields.get(0).substring(4));\n+ break;\n+ case 1:\n+ event.setDeviceVendor(headerFields.get(1));\n+ ingestDocument.setFieldValue(\"observer.vendor\", headerFields.get(1));\n+ break;\n+ case 2:\n+ event.setDeviceProduct(headerFields.get(2));\n+ ingestDocument.setFieldValue(\"observer.product\", headerFields.get(2));\n+ break;\n+ case 3:\n+ event.setDeviceVersion(headerFields.get(3));\n+ ingestDocument.setFieldValue(\"observer.version\", headerFields.get(3));\n+ break;\n+ case 4:\n+ event.setDeviceEventClassId(headerFields.get(4));\n+ ingestDocument.setFieldValue(\"event.code\", headerFields.get(4));\n+ break;\n+ case 5:\n+ event.setName(headerFields.get(5));\n+ break;\n+ case 6:\n+ event.setSeverity(headerFields.get(6));\n+ break;\n+ }\n+ }\n+ String extensionString = cefString.substring(extensionStart);\n+ Map extensions = parseExtensions(extensionString);\n+\n+ if (removeEmptyValue) {\n+ removeEmptyValue(extensions);\n+ }\n+ event.setExtensions(extensions);\n+\n+ // Translate possible ECS fields and remove them from extensions\n+ Map translatedFields = extensions.entrySet()\n+ .stream()\n+ .filter(entry -> FIELD_MAPPING.containsKey(entry.getKey()))\n+ .collect(Collectors.toMap(entry -> FIELD_MAPPING.get(entry.getKey()), entry -> {\n+ Class fieldType = ECSFieldWithType.getFieldType(FIELD_MAPPING.get(entry.getKey()));\n+ return convertValueToType(entry.getValue(), fieldType);\n+ }));\n+\n+ // Remove the translated entries from extensions\n+ event.removeMappedExtensions();\n+\n+ ingestDocument.setFieldValue(targetField, event.toObject());\n+ // Add ECS translations to the root of the document\n+ if (translatedFields.isEmpty() == false) {\n+ translatedFields.forEach(ingestDocument::setFieldValue);\n+ }\n+ } else {\n+ throw new IllegalArgumentException(\"Invalid CEF format\");\n+ }\n+ }\n+\n+ private Object convertValueToType(String value, Class type) {\n+ if (type == Long.class) {\n+ return Long.parseLong(value);\n+ } else if (type == Double.class) {\n+ return Double.parseDouble(value);\n+ } else if (type == Integer.class) {\n+ return Integer.parseInt(value);\n+ } else {\n+ return value; // Default to String\n+ }\n+ }\n+\n+ private Map parseExtensions(String extensionString) {\n+ Map extensions = new HashMap<>();\n+ Matcher matcher = EXTENSION_NEXT_KEY_VALUE_PATTERN.matcher(extensionString);\n+ int lastEnd = 0;\n+ while (matcher.find()) {\n+ String key = matcher.group(1);\n+ String value = matcher.group(2);\n+\n+ // Convert extension field name to strict legal field_reference\n+ if (key.endsWith(\"]\")) {\n+ key = convertArrayLikeKey(key);\n+ }\n+\n+ extensions.put(key, desanitizeExtensionVal(value.trim()));\n+ lastEnd = matcher.end();\n+ }\n+ // If there's any remaining unparsed content, throw an exception\n+ if (lastEnd < extensionString.length()) {\n+ throw new IllegalArgumentException(\"Invalid extensions; keyless value present: \" + extensionString.substring(lastEnd));\n+ }\n+ return extensions;\n+ }\n+\n+ private void removeEmptyValue(Map map) {\n+ map.entrySet().removeIf(entry -> entry.getValue().isEmpty());\n+ map.entrySet().removeIf(entry -> entry.getValue() == null);\n+ }\n+\n+ private String convertArrayLikeKey(String key) {\n+ Matcher matcher = EXTENSION_KEY_ARRAY_CAPTURE.matcher(key);\n+ if (matcher.matches()) {\n+ return \"[\" + matcher.group(1) + \"]\" + matcher.group(2);\n+ }\n+ return key;\n+ }\n+\n+ public static String desanitizeExtensionVal(String value) {\n+ String desanitized = value;\n+ for (Map.Entry entry : EXTENSION_VALUE_SANITIZER_REVERSE_MAPPING.entrySet()) {\n+ desanitized = desanitized.replace(entry.getKey(), entry.getValue());\n+ }\n+ return desanitized;\n+ }\n+\n+ public static class CEFEvent {\n+ private String version;\n+ private String deviceVendor;\n+ private String deviceProduct;\n+ private String deviceVersion;\n+ private String deviceEventClassId;\n+ private String name;\n+ private String severity;\n+ private final Map extensions = new HashMap<>();\n+ private Map translatedFields;\n+\n+ // Getters and setters for all fields\n+ public String getVersion() {\n+ return version;\n+ }\n+\n+ public void setVersion(String version) {\n+ this.version = version;\n+ }\n+\n+ public String getDeviceVendor() {\n+ return deviceVendor;\n+ }\n+\n+ public void setDeviceVendor(String deviceVendor) {\n+ this.deviceVendor = deviceVendor;\n+ }\n+\n+ public String getDeviceProduct() {\n+ return deviceProduct;\n+ }\n+\n+ public void setDeviceProduct(String deviceProduct) {\n+ this.deviceProduct = deviceProduct;\n+ }\n+\n+ public String getDeviceVersion() {\n+ return deviceVersion;\n+ }\n+\n+ public void setDeviceVersion(String deviceVersion) {\n+ this.deviceVersion = deviceVersion;\n+ }\n+\n+ public String getDeviceEventClassId() {\n+ return deviceEventClassId;\n+ }\n+\n+ public void setDeviceEventClassId(String deviceEventClassId) {\n+ this.deviceEventClassId = deviceEventClassId;\n+ }\n+\n+ public String getName() {\n+ return name;\n+ }\n+\n+ public void setName(String name) {\n+ this.name = name;\n+ }\n+\n+ public String getSeverity() {\n+ return severity;\n+ }\n+\n+ public void setSeverity(String severity) {\n+ this.severity = severity;\n+ }\n+\n+ public Map getExtensions() {\n+ return extensions;\n+ }\n+\n+ public void setExtensions(Map extensions) {\n+ this.extensions.putAll(extensions);\n+ }\n+\n+ public void addExtension(String key, String extension) {\n+ this.extensions.put(key, extension);\n+ }\n+\n+ public void removeMappedExtensions() {\n+ this.extensions.entrySet().removeIf(entry -> FIELD_MAPPING.containsKey(entry.getKey()));\n+ this.extensions.entrySet().removeIf(entry -> FIELD_MAPPING.containsValue(entry.getKey()));", + "comment_created_at": "2025-03-27T14:48:18+00:00", + "comment_author": "joegallo", + "comment_body": "This looks right to me now, but I moved the location of the definition of the static set.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2018560953", + "pr_number": 122491, + "pr_file": "modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/CefParser.java", + "created_at": "2025-03-28T12:20:15+00:00", + "commented_code": "}\n\n // Translate extensions to possible ECS fields\n Map translatedFields = extensions.entrySet()\n .stream()\n .filter(entry -> FIELD_MAPPINGS.containsKey(entry.getKey()))\n .collect(Collectors.toMap(entry -> FIELD_MAPPINGS.get(entry.getKey()), entry -> {\n DataType fieldType = FIELDS.get(FIELD_MAPPINGS.get(entry.getKey()));\n return convertValueToType(entry.getValue(), fieldType);\n }));\n Map translatedFields = new HashMap<>();", + "repo_full_name": "elastic/elasticsearch", + "discussion_comments": [ + { + "comment_id": "2018560953", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 122491, + "pr_file": "modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/CefParser.java", + "discussion_id": "2018560953", + "commented_code": "@@ -348,13 +347,15 @@ private void processExtensions(String cefString, int extensionStart, CEFEvent ev\n }\n \n // Translate extensions to possible ECS fields\n- Map translatedFields = extensions.entrySet()\n- .stream()\n- .filter(entry -> FIELD_MAPPINGS.containsKey(entry.getKey()))\n- .collect(Collectors.toMap(entry -> FIELD_MAPPINGS.get(entry.getKey()), entry -> {\n- DataType fieldType = FIELDS.get(FIELD_MAPPINGS.get(entry.getKey()));\n- return convertValueToType(entry.getValue(), fieldType);\n- }));\n+ Map translatedFields = new HashMap<>();", + "comment_created_at": "2025-03-28T12:20:15+00:00", + "comment_author": "joegallo", + "comment_body": "Do you think it's worth pre-sizing this `HashMap` (via `HashMap.newHashMap`) to match the size of the `extensions`? By my read that would slightly oversize the map, but with the effect that you'd never need to resize. I imagine it's worth it.", + "pr_file_module": null + }, + { + "comment_id": "2019736523", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 122491, + "pr_file": "modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/CefParser.java", + "discussion_id": "2018560953", + "commented_code": "@@ -348,13 +347,15 @@ private void processExtensions(String cefString, int extensionStart, CEFEvent ev\n }\n \n // Translate extensions to possible ECS fields\n- Map translatedFields = extensions.entrySet()\n- .stream()\n- .filter(entry -> FIELD_MAPPINGS.containsKey(entry.getKey()))\n- .collect(Collectors.toMap(entry -> FIELD_MAPPINGS.get(entry.getKey()), entry -> {\n- DataType fieldType = FIELDS.get(FIELD_MAPPINGS.get(entry.getKey()));\n- return convertValueToType(entry.getValue(), fieldType);\n- }));\n+ Map translatedFields = new HashMap<>();", + "comment_created_at": "2025-03-29T06:40:34+00:00", + "comment_author": "bhapas", + "comment_body": "Removing all intermediate collections. Adding the mapping to event as we parse.", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/elasticsearch-measure-before-optimizing-performance.md b/_reviewers/elasticsearch-measure-before-optimizing-performance.md index 99c301c..31a6d40 100644 --- a/_reviewers/elasticsearch-measure-before-optimizing-performance.md +++ b/_reviewers/elasticsearch-measure-before-optimizing-performance.md @@ -44,319 +44,3 @@ Key practices: - Consider system-wide impact, not just local optimizations This approach helps prevent premature optimization and ensures changes provide meaningful performance benefits. - - -[ - { - "discussion_id": "2166503674", - "pr_number": 129969, - "pr_file": "server/src/main/java/org/elasticsearch/index/engine/InternalEngine.java", - "created_at": "2025-06-25T11:38:51+00:00", - "commented_code": "iwc.setMaxFullFlushMergeWaitMillis(-1);\n iwc.setSimilarity(engineConfig.getSimilarity());\n iwc.setRAMBufferSizeMB(engineConfig.getIndexingBufferSize().getMbFrac());\n iwc.setCodec(engineConfig.getCodec());\n iwc.setCodec(new TrackingPostingsInMemoryBytesCodec(engineConfig.getCodec()));", - "repo_full_name": "elastic/elasticsearch", - "discussion_comments": [ - { - "comment_id": "2166503674", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 129969, - "pr_file": "server/src/main/java/org/elasticsearch/index/engine/InternalEngine.java", - "discussion_id": "2166503674", - "commented_code": "@@ -2778,7 +2779,7 @@ private IndexWriterConfig getIndexWriterConfig() {\n iwc.setMaxFullFlushMergeWaitMillis(-1);\n iwc.setSimilarity(engineConfig.getSimilarity());\n iwc.setRAMBufferSizeMB(engineConfig.getIndexingBufferSize().getMbFrac());\n- iwc.setCodec(engineConfig.getCodec());\n+ iwc.setCodec(new TrackingPostingsInMemoryBytesCodec(engineConfig.getCodec()));", - "comment_created_at": "2025-06-25T11:38:51+00:00", - "comment_author": "martijnvg", - "comment_body": "I wonder what the overhead is of always wrapping the codec in `TrackingPostingsInMemoryBytesCodec`. Maybe let's quickly run benchmark? (elastic/logs?)\r\n\r\nAdditionally I wonder whether this should only be done for stateless only.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2168541968", - "pr_number": 129997, - "pr_file": "server/src/main/java/org/elasticsearch/index/mapper/DocumentParser.java", - "created_at": "2025-06-26T08:47:27+00:00", - "commented_code": "XContentParser.Token token;\n XContentParser.Token previousToken = parser.currentToken();\n int elements = 0;\n int countArray = 0;\n long nestedDocsLimit = context.indexSettings().getMappingNestedDocsLimit();\n while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) {\n if (token == XContentParser.Token.START_OBJECT) {\n if (countArray++ >= nestedDocsLimit) {", - "repo_full_name": "elastic/elasticsearch", - "discussion_comments": [ - { - "comment_id": "2168541968", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 129997, - "pr_file": "server/src/main/java/org/elasticsearch/index/mapper/DocumentParser.java", - "discussion_id": "2168541968", - "commented_code": "@@ -749,8 +749,20 @@ private static void parseNonDynamicArray(\n XContentParser.Token token;\n XContentParser.Token previousToken = parser.currentToken();\n int elements = 0;\n+ int countArray = 0;\n+ long nestedDocsLimit = context.indexSettings().getMappingNestedDocsLimit();\n while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) {\n if (token == XContentParser.Token.START_OBJECT) {\n+ if (countArray++ >= nestedDocsLimit) {", - "comment_created_at": "2025-06-26T08:47:27+00:00", - "comment_author": "javanna", - "comment_body": "I worry that this will be a performance hit when parsing documents perhaps. We should have micro benchmarks for document parsing under ./benchmarks. It is probably worth double checking the impact of this change.", - "pr_file_module": null - }, - { - "comment_id": "2168594099", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 129997, - "pr_file": "server/src/main/java/org/elasticsearch/index/mapper/DocumentParser.java", - "discussion_id": "2168541968", - "commented_code": "@@ -749,8 +749,20 @@ private static void parseNonDynamicArray(\n XContentParser.Token token;\n XContentParser.Token previousToken = parser.currentToken();\n int elements = 0;\n+ int countArray = 0;\n+ long nestedDocsLimit = context.indexSettings().getMappingNestedDocsLimit();\n while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) {\n if (token == XContentParser.Token.START_OBJECT) {\n+ if (countArray++ >= nestedDocsLimit) {", - "comment_created_at": "2025-06-26T09:12:03+00:00", - "comment_author": "drempapis", - "comment_body": ">We should have micro benchmarks for document parsing under ./benchmarks.\r\n\r\nWhat\u2019s the recommended way to micro-benchmark this functionality", - "pr_file_module": null - }, - { - "comment_id": "2172680459", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 129997, - "pr_file": "server/src/main/java/org/elasticsearch/index/mapper/DocumentParser.java", - "discussion_id": "2168541968", - "commented_code": "@@ -749,8 +749,20 @@ private static void parseNonDynamicArray(\n XContentParser.Token token;\n XContentParser.Token previousToken = parser.currentToken();\n int elements = 0;\n+ int countArray = 0;\n+ long nestedDocsLimit = context.indexSettings().getMappingNestedDocsLimit();\n while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) {\n if (token == XContentParser.Token.START_OBJECT) {\n+ if (countArray++ >= nestedDocsLimit) {", - "comment_created_at": "2025-06-27T18:54:56+00:00", - "comment_author": "javanna", - "comment_body": "I would run the existing micro benchmarks for document parsing, and check if they would detect regressions around the counting you added, or add a new one that's specific to this scenario if needed. We may actually need documents with an array including many inner objects that we likely don't have in the existing benchmarks", - "pr_file_module": null - }, - { - "comment_id": "2177851365", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 129997, - "pr_file": "server/src/main/java/org/elasticsearch/index/mapper/DocumentParser.java", - "discussion_id": "2168541968", - "commented_code": "@@ -749,8 +749,20 @@ private static void parseNonDynamicArray(\n XContentParser.Token token;\n XContentParser.Token previousToken = parser.currentToken();\n int elements = 0;\n+ int countArray = 0;\n+ long nestedDocsLimit = context.indexSettings().getMappingNestedDocsLimit();\n while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) {\n if (token == XContentParser.Token.START_OBJECT) {\n+ if (countArray++ >= nestedDocsLimit) {", - "comment_created_at": "2025-07-01T15:04:00+00:00", - "comment_author": "drempapis", - "comment_body": "I did a micro-benchmarking to test the time it takes to parse documents with varying `array` sizes.\r\nThe `Run 1` is for he main branch where`Run 2` is for the current branch (update)\r\n\r\n```\r\n\r\nArray Size | Run 1 Avg (ns/op) | Run 2 Avg (ns/op) | Difference (ns) | % Change\r\n--------------------------------------------------------------------------------\r\n10 | 1983.711 | 2223.238 | +239.527 | +12.07%\r\n100 | 15356.069 | 15497.797 | +141.728 | +0.92%\r\n1000 | 141146.977 | 145791.315 | +4644.338 | +3.29%\r\n10000 | 1432616.938 | 1443769.196 | +11152.258 | +0.78%\r\n```\r\nThe performance degraded slightly in the second run for all array sizes. For larger array sizes (1000 and 10000), the performance remained quite stable. \r\n\r\nFor the array size of 10 we observe the largest relative increase though the absolute impact is small.\r\n\r\nI executed the benchmark locally, aiming to minimize system load by stopping all resource-intensive processes. However, some system noise or garbage collection activity may have influenced the results.\r\n\r\nThe code I executed was the following \r\n```\r\n@Fork(value = 1)\r\n@Warmup(iterations = 5)\r\n@Measurement(iterations = 5)\r\n@BenchmarkMode(Mode.AverageTime)\r\n@OutputTimeUnit(TimeUnit.NANOSECONDS)\r\n@State(Scope.Benchmark)\r\npublic class DocumentParserBenchmark {\r\n\r\n static {\r\n LogConfigurator.loadLog4jPlugins();\r\n LogConfigurator.configureESLogging();\r\n }\r\n\r\n @Param({\"10\", \"100\", \"1000\", \"10000\"})\r\n private int arraySize;\r\n\r\n private MapperService mapperService;\r\n\r\n private SourceToParse sourceToParse;\r\n\r\n @Setup\r\n public void setUp() {\r\n this.mapperService = MapperServiceFactory.create(\"\"\"\r\n {\r\n \"properties\": {\r\n \"array\": {\r\n \"properties\": {\r\n \"value\": { \"type\": \"integer\" }\r\n }\r\n }\r\n }\r\n }\r\n \\s\"\"\");\r\n\r\n this.sourceToParse = new SourceToParse(UUIDs.randomBase64UUID(),\r\n new BytesArray(generateDocWithArray(arraySize)), XContentType.JSON);\r\n }\r\n\r\n @Benchmark\r\n public List benchmarkDocumentWithLargeArray() {\r\n return mapperService.documentMapper().parse(sourceToParse).docs();\r\n }\r\n\r\n private static String generateDocWithArray(int arraySize) {\r\n StringBuilder sb = new StringBuilder();\r\n sb.append(\"{ \\\"array\\\": [\");\r\n for (int i = 1; i <= arraySize; i++) {\r\n sb.append(\"{\\\"value\\\": \").append(i).append(\"}\");\r\n if (i < arraySize) {\r\n sb.append(\",\");\r\n }\r\n }\r\n sb.append(\"]}\");\r\n return sb.toString();\r\n }\r\n}\r\n```", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1965662264", - "pr_number": 123148, - "pr_file": "server/src/main/java/org/elasticsearch/action/fieldcaps/FieldCapabilitiesFetcher.java", - "created_at": "2025-02-21T15:14:51+00:00", - "commented_code": "null,\n Map.of()\n );\n responseMap.put(parentField, fieldCap);\n\n if (filter == null || Arrays.asList(types).contains(type)) {", - "repo_full_name": "elastic/elasticsearch", - "discussion_comments": [ - { - "comment_id": "1965662264", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 123148, - "pr_file": "server/src/main/java/org/elasticsearch/action/fieldcaps/FieldCapabilitiesFetcher.java", - "discussion_id": "1965662264", - "commented_code": "@@ -214,7 +215,10 @@ static Map retrieveFieldCaps(\n null,\n Map.of()\n );\n- responseMap.put(parentField, fieldCap);\n+\n+ if (filter == null || Arrays.asList(types).contains(type)) {", - "comment_created_at": "2025-02-21T15:14:51+00:00", - "comment_author": "piergm", - "comment_body": "Careful, by calling `Arrays.asList` in a loop, here we are converting every time the array to a new `ArrayList` and it is not very efficient.\r\nThere are a couple of options here:\r\n1) We can convert the array to a ArrayList before the for loop (this should be safe since we are not changing the array content).\r\n2) Use a Set instead of an ArrayList (also placed outside the loop), in this way the .contains call is O(1), while for ArrayList is O(n). \r\nSide note: I don't expect this array to ever become huge therefore both approach should be safe to use but to be extra safe I'd opt for the Set.\r\nIf we are using Set we can even go for `Set.of(types)` since it returns an unmodifiable set!", - "pr_file_module": null - }, - { - "comment_id": "1967988577", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 123148, - "pr_file": "server/src/main/java/org/elasticsearch/action/fieldcaps/FieldCapabilitiesFetcher.java", - "discussion_id": "1965662264", - "commented_code": "@@ -214,7 +215,10 @@ static Map retrieveFieldCaps(\n null,\n Map.of()\n );\n- responseMap.put(parentField, fieldCap);\n+\n+ if (filter == null || Arrays.asList(types).contains(type)) {", - "comment_created_at": "2025-02-24T16:21:56+00:00", - "comment_author": "luizgpsantos", - "comment_body": "Yep, that makes sense. To avoid the duplicated initialization of `Set.of(types)`, I changed some other places. Also, now I have to check for the `parent` in the filters, so I used the same `Set` strategy.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1957201015", - "pr_number": 122669, - "pr_file": "server/src/main/java/org/elasticsearch/action/search/SearchQueryThenFetchAsyncAction.java", - "created_at": "2025-02-15T22:19:48+00:00", - "commented_code": "final SearchActionListener listener\n ) {\n ShardSearchRequest request = rewriteShardSearchRequest(super.buildShardSearchRequest(shardIt, listener.requestIndex));\n getSearchTransport().sendExecuteQuery(connection, request, getTask(), listener);\n var searchTransport = getSearchTransport();\n var task = getTask();\n if (searchTransport.transportService().getLocalNodeConnection() == connection) {\n searchService.executeQueryPhase(request, task, listener);", - "repo_full_name": "elastic/elasticsearch", - "discussion_comments": [ - { - "comment_id": "1957201015", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 122669, - "pr_file": "server/src/main/java/org/elasticsearch/action/search/SearchQueryThenFetchAsyncAction.java", - "discussion_id": "1957201015", - "commented_code": "@@ -95,7 +98,13 @@ protected void executePhaseOnShard(\n final SearchActionListener listener\n ) {\n ShardSearchRequest request = rewriteShardSearchRequest(super.buildShardSearchRequest(shardIt, listener.requestIndex));\n- getSearchTransport().sendExecuteQuery(connection, request, getTask(), listener);\n+ var searchTransport = getSearchTransport();\n+ var task = getTask();\n+ if (searchTransport.transportService().getLocalNodeConnection() == connection) {\n+ searchService.executeQueryPhase(request, task, listener);", - "comment_created_at": "2025-02-15T22:19:48+00:00", - "comment_author": "javanna", - "comment_body": "out of curiosity: if we do this, why do it only for the query phase? Also, couldn't this conditional be added to the sendExecuteQuery method instead? What kind of overhead does this save? I can imagine that this is a pretty common pattern, or is search the edge case?", - "pr_file_module": null - }, - { - "comment_id": "1957204416", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 122669, - "pr_file": "server/src/main/java/org/elasticsearch/action/search/SearchQueryThenFetchAsyncAction.java", - "discussion_id": "1957201015", - "commented_code": "@@ -95,7 +98,13 @@ protected void executePhaseOnShard(\n final SearchActionListener listener\n ) {\n ShardSearchRequest request = rewriteShardSearchRequest(super.buildShardSearchRequest(shardIt, listener.requestIndex));\n- getSearchTransport().sendExecuteQuery(connection, request, getTask(), listener);\n+ var searchTransport = getSearchTransport();\n+ var task = getTask();\n+ if (searchTransport.transportService().getLocalNodeConnection() == connection) {\n+ searchService.executeQueryPhase(request, task, listener);", - "comment_created_at": "2025-02-15T22:49:01+00:00", - "comment_author": "original-brownbear", - "comment_body": "Hmm good question. I guess I only noticed this in benchmarks for search and for the query phase specifically. For fetch it's not super visible since you don't hit so many shards mostly and for bulk indexing you still have per-shard bulks so the cost isn't in that.\r\nI first noticed this with batched execution where the overhead becomes super visible but it's equally visible without it for large data nodes that do coordination work already (or if queries are heavy, like a large terms query or some geo stuff or so).\r\nThe overhead saved is 1. all the lookups in the transport layer, lots of listener wrapping, child-task registration and most importantly security.\r\n**But** :) that's why I need a review from security here I think. Functionally I think security still works the same way if not more efficiently. All tests pass because we auth the top level search request. DLS/FLS are applied as well but somehow those cache assertions needed adjustment and seemingly we do use the cache more now and I can't explain why.\r\nthe security overhead is considerable here, it's well in excess of the can_match cost for most rally runs it seems :O ", - "pr_file_module": null - }, - { - "comment_id": "1957206010", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 122669, - "pr_file": "server/src/main/java/org/elasticsearch/action/search/SearchQueryThenFetchAsyncAction.java", - "discussion_id": "1957201015", - "commented_code": "@@ -95,7 +98,13 @@ protected void executePhaseOnShard(\n final SearchActionListener listener\n ) {\n ShardSearchRequest request = rewriteShardSearchRequest(super.buildShardSearchRequest(shardIt, listener.requestIndex));\n- getSearchTransport().sendExecuteQuery(connection, request, getTask(), listener);\n+ var searchTransport = getSearchTransport();\n+ var task = getTask();\n+ if (searchTransport.transportService().getLocalNodeConnection() == connection) {\n+ searchService.executeQueryPhase(request, task, listener);", - "comment_created_at": "2025-02-15T23:03:00+00:00", - "comment_author": "original-brownbear", - "comment_body": "It's this (couldn't zoom out further :P)\r\n![out2](https://github.com/user-attachments/assets/b242f23c-1ed1-41f4-bb97-5c5ed261f12c)\r\n\r\nvs this\r\n![out](https://github.com/user-attachments/assets/246c2643-a0ce-4ab5-8730-a6f50f4ed0c8)\r\n\r\nand on a transport thread. ", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2012358741", - "pr_number": 122491, - "pr_file": "modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/CefParser.java", - "created_at": "2025-03-25T15:21:43+00:00", - "commented_code": "/*\n * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one\n * or more contributor license agreements. Licensed under the \"Elastic License\n * 2.0\", the \"GNU Affero General Public License v3.0 only\", and the \"Server Side\n * Public License v 1\"; you may not use this file except in compliance with, at\n * your election, the \"Elastic License 2.0\", the \"GNU Affero General Public\n * License v3.0 only\", or the \"Server Side Public License, v 1\".\n */\npackage org.elasticsearch.ingest.common;\n\nimport org.elasticsearch.ingest.IngestDocument;\n\nimport java.util.ArrayList;\nimport java.util.HashMap;\nimport java.util.List;\nimport java.util.Map;\nimport java.util.regex.Matcher;\nimport java.util.regex.Pattern;\nimport java.util.stream.Collectors;\n\nfinal class CefParser {\n\n private final IngestDocument ingestDocument;\n private final boolean removeEmptyValue;\n\n CefParser(IngestDocument ingestDocument, boolean removeEmptyValue) {\n this.ingestDocument = ingestDocument;\n this.removeEmptyValue = removeEmptyValue;\n }\n\n private static final Pattern HEADER_PATTERN = Pattern.compile(\"(?:\\\\\\\\\\\\||\\\\\\\\\\\\\\\\|[^|])*?\");\n private static final Pattern HEADER_NEXT_FIELD_PATTERN = Pattern.compile(\"(\" + HEADER_PATTERN.pattern() + \")\\\\|\");\n private static final Pattern HEADER_ESCAPE_CAPTURE = Pattern.compile(\"\\\\\\\\([\\\\\\\\|])\");\n\n // New patterns for extension parsing\n private static final String EXTENSION_KEY_PATTERN = \"(?:[\\\\w-]+(?:\\\\.[^\\\\.=\\\\s\\\\|\\\\\\\\\\\\[\\\\]]+)*(?:\\\\[[0-9]+\\\\])?(?==))\";\n private static final Pattern EXTENSION_KEY_ARRAY_CAPTURE = Pattern.compile(\"^([^\\\\[\\\\]]+)((?:\\\\[[0-9]+\\\\])+)$\");\n private static final String EXTENSION_VALUE_PATTERN = \"(?:\\\\S|\\\\s(?!\" + EXTENSION_KEY_PATTERN + \"=))*\";\n private static final Pattern EXTENSION_NEXT_KEY_VALUE_PATTERN = Pattern.compile(\n \"(\" + EXTENSION_KEY_PATTERN + \")=(\" + EXTENSION_VALUE_PATTERN + \")(?:\\\\s+|$)\"\n );\n private static final Map EXTENSION_VALUE_SANITIZER_REVERSE_MAPPING = new HashMap<>();\n private static final Map FIELD_MAPPING = new HashMap<>();\n private static final String ERROR_MESSAGE_INCOMPLETE_CEF_HEADER = \"incomplete CEF header\";\n\n static {\n EXTENSION_VALUE_SANITIZER_REVERSE_MAPPING.put(\"\\\\\\\\\", \"\\\\\");\n EXTENSION_VALUE_SANITIZER_REVERSE_MAPPING.put(\"\\\\=\", \"=\");\n EXTENSION_VALUE_SANITIZER_REVERSE_MAPPING.put(\"\\\\\\n\", \"\\n\");\n EXTENSION_VALUE_SANITIZER_REVERSE_MAPPING.put(\"\\\\\\r\", \"\\r\");\n\n FIELD_MAPPING.put(\"app\", \"network.protocol\");\n FIELD_MAPPING.put(\"in\", \"source.bytes\");\n FIELD_MAPPING.put(\"out\", \"destination.bytes\");\n FIELD_MAPPING.put(\"dst\", \"destination.ip\");\n FIELD_MAPPING.put(\"dlat\", \"destination.geo.location.lat\");\n FIELD_MAPPING.put(\"dlong\", \"destination.geo.location.lon\");\n FIELD_MAPPING.put(\"dhost\", \"destination.domain\");\n FIELD_MAPPING.put(\"dmac\", \"destination.mac\");\n FIELD_MAPPING.put(\"dntdom\", \"destination.registered_domain\");\n FIELD_MAPPING.put(\"dpt\", \"destination.port\");\n FIELD_MAPPING.put(\"dpid\", \"destination.process.pid\");\n FIELD_MAPPING.put(\"dproc\", \"destination.process.name\");\n FIELD_MAPPING.put(\"duid\", \"destination.user.id\");\n FIELD_MAPPING.put(\"duser\", \"destination.user.name\");\n FIELD_MAPPING.put(\"dpriv\", \"destination.user.group.name\");\n FIELD_MAPPING.put(\"act\", \"event.action\");\n FIELD_MAPPING.put(\"dvc\", \"observer.ip\");\n FIELD_MAPPING.put(\"deviceDirection\", \"network.direction\");\n FIELD_MAPPING.put(\"deviceDnsDomain\", \"observer.registered_domain\");\n FIELD_MAPPING.put(\"deviceExternalId\", \"observer.name\");\n FIELD_MAPPING.put(\"deviceFacility\", \"log.syslog.facility.code\");\n FIELD_MAPPING.put(\"dvchost\", \"observer.hostname\");\n FIELD_MAPPING.put(\"deviceInboundInterface\", \"observer.ingress.interface.name\");\n FIELD_MAPPING.put(\"dvcmac\", \"observer.mac\");\n FIELD_MAPPING.put(\"deviceOutboundInterface\", \"observer.egress.interface.name\");\n FIELD_MAPPING.put(\"dvcpid\", \"process.pid\");\n FIELD_MAPPING.put(\"deviceProcessName\", \"process.name\");\n FIELD_MAPPING.put(\"rt\", \"@timestamp\");\n FIELD_MAPPING.put(\"dtz\", \"event.timezone\");\n FIELD_MAPPING.put(\"deviceTranslatedAddress\", \"host.nat.ip\");\n FIELD_MAPPING.put(\"device.version\", \"observer.version\");\n FIELD_MAPPING.put(\"deviceVersion\", \"observer.version\");\n FIELD_MAPPING.put(\"device.product\", \"observer.product\");\n FIELD_MAPPING.put(\"deviceProduct\", \"observer.product\");\n FIELD_MAPPING.put(\"device.event_class_id\", \"event.code\");\n FIELD_MAPPING.put(\"device.vendor\", \"observer.vendor\");\n FIELD_MAPPING.put(\"deviceVendor\", \"observer.vendor\");\n FIELD_MAPPING.put(\"end\", \"event.end\");\n FIELD_MAPPING.put(\"eventId\", \"event.id\");\n FIELD_MAPPING.put(\"outcome\", \"event.outcome\");\n FIELD_MAPPING.put(\"fileCreateTime\", \"file.created\");\n FIELD_MAPPING.put(\"fileHash\", \"file.hash\");\n FIELD_MAPPING.put(\"fileId\", \"file.inode\");\n FIELD_MAPPING.put(\"fileModificationTime\", \"file.mtime\");\n FIELD_MAPPING.put(\"fname\", \"file.name\");\n FIELD_MAPPING.put(\"filePath\", \"file.path\");\n FIELD_MAPPING.put(\"filePermission\", \"file.group\");\n FIELD_MAPPING.put(\"fsize\", \"file.size\");\n FIELD_MAPPING.put(\"fileType\", \"file.extension\");\n FIELD_MAPPING.put(\"mrt\", \"event.ingested\");\n FIELD_MAPPING.put(\"msg\", \"message\");\n FIELD_MAPPING.put(\"reason\", \"event.reason\");\n FIELD_MAPPING.put(\"requestClientApplication\", \"user_agent.original\");\n FIELD_MAPPING.put(\"requestContext\", \"http.request.referrer\");\n FIELD_MAPPING.put(\"requestMethod\", \"http.request.method\");\n FIELD_MAPPING.put(\"request\", \"url.original\");\n FIELD_MAPPING.put(\"src\", \"source.ip\");\n FIELD_MAPPING.put(\"sourceDnsDomain\", \"source.registered_domain\");\n FIELD_MAPPING.put(\"slat\", \"source.geo.location.lat\");\n FIELD_MAPPING.put(\"slong\", \"source.geo.location.lon\");\n FIELD_MAPPING.put(\"shost\", \"source.domain\");\n FIELD_MAPPING.put(\"smac\", \"source.mac\");\n FIELD_MAPPING.put(\"sntdom\", \"source.registered_domain\");\n FIELD_MAPPING.put(\"spt\", \"source.port\");\n FIELD_MAPPING.put(\"spid\", \"source.process.pid\");\n FIELD_MAPPING.put(\"sproc\", \"source.process.name\");\n FIELD_MAPPING.put(\"sourceServiceName\", \"source.service.name\");\n FIELD_MAPPING.put(\"suser\", \"source.user.name\");\n FIELD_MAPPING.put(\"start\", \"event.start\");\n FIELD_MAPPING.put(\"proto\", \"network.transport\");\n // Add more mappings as needed\n }\n\n void process(String cefString, String targetField) {\n List headerFields = new ArrayList<>();\n Matcher headerMatcher = HEADER_NEXT_FIELD_PATTERN.matcher(cefString);\n int extensionStart = 0;\n\n for (int i = 0; i < 7 && headerMatcher.find(); i++) {\n String field = headerMatcher.group(1);\n field = HEADER_ESCAPE_CAPTURE.matcher(field).replaceAll(\"$1\");\n headerFields.add(field);\n extensionStart = headerMatcher.end();\n }\n\n if (headerFields.size() > 0 && headerFields.get(0).startsWith(\"CEF:\")) {\n CEFEvent event = new CEFEvent();\n // Add error message if there are not enough header fields\n if (headerFields.size() != 7) {\n ingestDocument.appendFieldValue(\"error.message\", ERROR_MESSAGE_INCOMPLETE_CEF_HEADER);\n }\n for (int i = 0; i < headerFields.size(); i++) {\n switch (i) {\n case 0:\n event.setVersion(headerFields.get(0).substring(4));\n break;\n case 1:\n event.setDeviceVendor(headerFields.get(1));\n ingestDocument.setFieldValue(\"observer.vendor\", headerFields.get(1));\n break;\n case 2:\n event.setDeviceProduct(headerFields.get(2));\n ingestDocument.setFieldValue(\"observer.product\", headerFields.get(2));\n break;\n case 3:\n event.setDeviceVersion(headerFields.get(3));\n ingestDocument.setFieldValue(\"observer.version\", headerFields.get(3));\n break;\n case 4:\n event.setDeviceEventClassId(headerFields.get(4));\n ingestDocument.setFieldValue(\"event.code\", headerFields.get(4));\n break;\n case 5:\n event.setName(headerFields.get(5));\n break;\n case 6:\n event.setSeverity(headerFields.get(6));\n break;\n }\n }\n String extensionString = cefString.substring(extensionStart);\n Map extensions = parseExtensions(extensionString);\n\n if (removeEmptyValue) {\n removeEmptyValue(extensions);\n }\n event.setExtensions(extensions);\n\n // Translate possible ECS fields and remove them from extensions\n Map translatedFields = extensions.entrySet()\n .stream()\n .filter(entry -> FIELD_MAPPING.containsKey(entry.getKey()))\n .collect(Collectors.toMap(entry -> FIELD_MAPPING.get(entry.getKey()), entry -> {\n Class fieldType = ECSFieldWithType.getFieldType(FIELD_MAPPING.get(entry.getKey()));\n return convertValueToType(entry.getValue(), fieldType);\n }));\n\n // Remove the translated entries from extensions\n event.removeMappedExtensions();\n\n ingestDocument.setFieldValue(targetField, event.toObject());\n // Add ECS translations to the root of the document\n if (translatedFields.isEmpty() == false) {\n translatedFields.forEach(ingestDocument::setFieldValue);\n }\n } else {\n throw new IllegalArgumentException(\"Invalid CEF format\");\n }\n }\n\n private Object convertValueToType(String value, Class type) {\n if (type == Long.class) {\n return Long.parseLong(value);\n } else if (type == Double.class) {\n return Double.parseDouble(value);\n } else if (type == Integer.class) {\n return Integer.parseInt(value);\n } else {\n return value; // Default to String\n }\n }\n\n private Map parseExtensions(String extensionString) {\n Map extensions = new HashMap<>();\n Matcher matcher = EXTENSION_NEXT_KEY_VALUE_PATTERN.matcher(extensionString);\n int lastEnd = 0;\n while (matcher.find()) {\n String key = matcher.group(1);\n String value = matcher.group(2);\n\n // Convert extension field name to strict legal field_reference\n if (key.endsWith(\"]\")) {\n key = convertArrayLikeKey(key);\n }\n\n extensions.put(key, desanitizeExtensionVal(value.trim()));\n lastEnd = matcher.end();\n }\n // If there's any remaining unparsed content, throw an exception\n if (lastEnd < extensionString.length()) {\n throw new IllegalArgumentException(\"Invalid extensions; keyless value present: \" + extensionString.substring(lastEnd));\n }\n return extensions;\n }\n\n private void removeEmptyValue(Map map) {\n map.entrySet().removeIf(entry -> entry.getValue().isEmpty());\n map.entrySet().removeIf(entry -> entry.getValue() == null);\n }\n\n private String convertArrayLikeKey(String key) {\n Matcher matcher = EXTENSION_KEY_ARRAY_CAPTURE.matcher(key);\n if (matcher.matches()) {\n return \"[\" + matcher.group(1) + \"]\" + matcher.group(2);\n }\n return key;\n }\n\n public static String desanitizeExtensionVal(String value) {\n String desanitized = value;\n for (Map.Entry entry : EXTENSION_VALUE_SANITIZER_REVERSE_MAPPING.entrySet()) {\n desanitized = desanitized.replace(entry.getKey(), entry.getValue());\n }\n return desanitized;\n }\n\n public static class CEFEvent {\n private String version;\n private String deviceVendor;\n private String deviceProduct;\n private String deviceVersion;\n private String deviceEventClassId;\n private String name;\n private String severity;\n private final Map extensions = new HashMap<>();\n private Map translatedFields;\n\n // Getters and setters for all fields\n public String getVersion() {\n return version;\n }\n\n public void setVersion(String version) {\n this.version = version;\n }\n\n public String getDeviceVendor() {\n return deviceVendor;\n }\n\n public void setDeviceVendor(String deviceVendor) {\n this.deviceVendor = deviceVendor;\n }\n\n public String getDeviceProduct() {\n return deviceProduct;\n }\n\n public void setDeviceProduct(String deviceProduct) {\n this.deviceProduct = deviceProduct;\n }\n\n public String getDeviceVersion() {\n return deviceVersion;\n }\n\n public void setDeviceVersion(String deviceVersion) {\n this.deviceVersion = deviceVersion;\n }\n\n public String getDeviceEventClassId() {\n return deviceEventClassId;\n }\n\n public void setDeviceEventClassId(String deviceEventClassId) {\n this.deviceEventClassId = deviceEventClassId;\n }\n\n public String getName() {\n return name;\n }\n\n public void setName(String name) {\n this.name = name;\n }\n\n public String getSeverity() {\n return severity;\n }\n\n public void setSeverity(String severity) {\n this.severity = severity;\n }\n\n public Map getExtensions() {\n return extensions;\n }\n\n public void setExtensions(Map extensions) {\n this.extensions.putAll(extensions);\n }\n\n public void addExtension(String key, String extension) {\n this.extensions.put(key, extension);\n }\n\n public void removeMappedExtensions() {\n this.extensions.entrySet().removeIf(entry -> FIELD_MAPPING.containsKey(entry.getKey()));\n this.extensions.entrySet().removeIf(entry -> FIELD_MAPPING.containsValue(entry.getKey()));", - "repo_full_name": "elastic/elasticsearch", - "discussion_comments": [ - { - "comment_id": "2012358741", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 122491, - "pr_file": "modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/CefParser.java", - "discussion_id": "2012358741", - "commented_code": "@@ -0,0 +1,449 @@\n+/*\n+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one\n+ * or more contributor license agreements. Licensed under the \"Elastic License\n+ * 2.0\", the \"GNU Affero General Public License v3.0 only\", and the \"Server Side\n+ * Public License v 1\"; you may not use this file except in compliance with, at\n+ * your election, the \"Elastic License 2.0\", the \"GNU Affero General Public\n+ * License v3.0 only\", or the \"Server Side Public License, v 1\".\n+ */\n+package org.elasticsearch.ingest.common;\n+\n+import org.elasticsearch.ingest.IngestDocument;\n+\n+import java.util.ArrayList;\n+import java.util.HashMap;\n+import java.util.List;\n+import java.util.Map;\n+import java.util.regex.Matcher;\n+import java.util.regex.Pattern;\n+import java.util.stream.Collectors;\n+\n+final class CefParser {\n+\n+ private final IngestDocument ingestDocument;\n+ private final boolean removeEmptyValue;\n+\n+ CefParser(IngestDocument ingestDocument, boolean removeEmptyValue) {\n+ this.ingestDocument = ingestDocument;\n+ this.removeEmptyValue = removeEmptyValue;\n+ }\n+\n+ private static final Pattern HEADER_PATTERN = Pattern.compile(\"(?:\\\\\\\\\\\\||\\\\\\\\\\\\\\\\|[^|])*?\");\n+ private static final Pattern HEADER_NEXT_FIELD_PATTERN = Pattern.compile(\"(\" + HEADER_PATTERN.pattern() + \")\\\\|\");\n+ private static final Pattern HEADER_ESCAPE_CAPTURE = Pattern.compile(\"\\\\\\\\([\\\\\\\\|])\");\n+\n+ // New patterns for extension parsing\n+ private static final String EXTENSION_KEY_PATTERN = \"(?:[\\\\w-]+(?:\\\\.[^\\\\.=\\\\s\\\\|\\\\\\\\\\\\[\\\\]]+)*(?:\\\\[[0-9]+\\\\])?(?==))\";\n+ private static final Pattern EXTENSION_KEY_ARRAY_CAPTURE = Pattern.compile(\"^([^\\\\[\\\\]]+)((?:\\\\[[0-9]+\\\\])+)$\");\n+ private static final String EXTENSION_VALUE_PATTERN = \"(?:\\\\S|\\\\s(?!\" + EXTENSION_KEY_PATTERN + \"=))*\";\n+ private static final Pattern EXTENSION_NEXT_KEY_VALUE_PATTERN = Pattern.compile(\n+ \"(\" + EXTENSION_KEY_PATTERN + \")=(\" + EXTENSION_VALUE_PATTERN + \")(?:\\\\s+|$)\"\n+ );\n+ private static final Map EXTENSION_VALUE_SANITIZER_REVERSE_MAPPING = new HashMap<>();\n+ private static final Map FIELD_MAPPING = new HashMap<>();\n+ private static final String ERROR_MESSAGE_INCOMPLETE_CEF_HEADER = \"incomplete CEF header\";\n+\n+ static {\n+ EXTENSION_VALUE_SANITIZER_REVERSE_MAPPING.put(\"\\\\\\\\\", \"\\\\\");\n+ EXTENSION_VALUE_SANITIZER_REVERSE_MAPPING.put(\"\\\\=\", \"=\");\n+ EXTENSION_VALUE_SANITIZER_REVERSE_MAPPING.put(\"\\\\\\n\", \"\\n\");\n+ EXTENSION_VALUE_SANITIZER_REVERSE_MAPPING.put(\"\\\\\\r\", \"\\r\");\n+\n+ FIELD_MAPPING.put(\"app\", \"network.protocol\");\n+ FIELD_MAPPING.put(\"in\", \"source.bytes\");\n+ FIELD_MAPPING.put(\"out\", \"destination.bytes\");\n+ FIELD_MAPPING.put(\"dst\", \"destination.ip\");\n+ FIELD_MAPPING.put(\"dlat\", \"destination.geo.location.lat\");\n+ FIELD_MAPPING.put(\"dlong\", \"destination.geo.location.lon\");\n+ FIELD_MAPPING.put(\"dhost\", \"destination.domain\");\n+ FIELD_MAPPING.put(\"dmac\", \"destination.mac\");\n+ FIELD_MAPPING.put(\"dntdom\", \"destination.registered_domain\");\n+ FIELD_MAPPING.put(\"dpt\", \"destination.port\");\n+ FIELD_MAPPING.put(\"dpid\", \"destination.process.pid\");\n+ FIELD_MAPPING.put(\"dproc\", \"destination.process.name\");\n+ FIELD_MAPPING.put(\"duid\", \"destination.user.id\");\n+ FIELD_MAPPING.put(\"duser\", \"destination.user.name\");\n+ FIELD_MAPPING.put(\"dpriv\", \"destination.user.group.name\");\n+ FIELD_MAPPING.put(\"act\", \"event.action\");\n+ FIELD_MAPPING.put(\"dvc\", \"observer.ip\");\n+ FIELD_MAPPING.put(\"deviceDirection\", \"network.direction\");\n+ FIELD_MAPPING.put(\"deviceDnsDomain\", \"observer.registered_domain\");\n+ FIELD_MAPPING.put(\"deviceExternalId\", \"observer.name\");\n+ FIELD_MAPPING.put(\"deviceFacility\", \"log.syslog.facility.code\");\n+ FIELD_MAPPING.put(\"dvchost\", \"observer.hostname\");\n+ FIELD_MAPPING.put(\"deviceInboundInterface\", \"observer.ingress.interface.name\");\n+ FIELD_MAPPING.put(\"dvcmac\", \"observer.mac\");\n+ FIELD_MAPPING.put(\"deviceOutboundInterface\", \"observer.egress.interface.name\");\n+ FIELD_MAPPING.put(\"dvcpid\", \"process.pid\");\n+ FIELD_MAPPING.put(\"deviceProcessName\", \"process.name\");\n+ FIELD_MAPPING.put(\"rt\", \"@timestamp\");\n+ FIELD_MAPPING.put(\"dtz\", \"event.timezone\");\n+ FIELD_MAPPING.put(\"deviceTranslatedAddress\", \"host.nat.ip\");\n+ FIELD_MAPPING.put(\"device.version\", \"observer.version\");\n+ FIELD_MAPPING.put(\"deviceVersion\", \"observer.version\");\n+ FIELD_MAPPING.put(\"device.product\", \"observer.product\");\n+ FIELD_MAPPING.put(\"deviceProduct\", \"observer.product\");\n+ FIELD_MAPPING.put(\"device.event_class_id\", \"event.code\");\n+ FIELD_MAPPING.put(\"device.vendor\", \"observer.vendor\");\n+ FIELD_MAPPING.put(\"deviceVendor\", \"observer.vendor\");\n+ FIELD_MAPPING.put(\"end\", \"event.end\");\n+ FIELD_MAPPING.put(\"eventId\", \"event.id\");\n+ FIELD_MAPPING.put(\"outcome\", \"event.outcome\");\n+ FIELD_MAPPING.put(\"fileCreateTime\", \"file.created\");\n+ FIELD_MAPPING.put(\"fileHash\", \"file.hash\");\n+ FIELD_MAPPING.put(\"fileId\", \"file.inode\");\n+ FIELD_MAPPING.put(\"fileModificationTime\", \"file.mtime\");\n+ FIELD_MAPPING.put(\"fname\", \"file.name\");\n+ FIELD_MAPPING.put(\"filePath\", \"file.path\");\n+ FIELD_MAPPING.put(\"filePermission\", \"file.group\");\n+ FIELD_MAPPING.put(\"fsize\", \"file.size\");\n+ FIELD_MAPPING.put(\"fileType\", \"file.extension\");\n+ FIELD_MAPPING.put(\"mrt\", \"event.ingested\");\n+ FIELD_MAPPING.put(\"msg\", \"message\");\n+ FIELD_MAPPING.put(\"reason\", \"event.reason\");\n+ FIELD_MAPPING.put(\"requestClientApplication\", \"user_agent.original\");\n+ FIELD_MAPPING.put(\"requestContext\", \"http.request.referrer\");\n+ FIELD_MAPPING.put(\"requestMethod\", \"http.request.method\");\n+ FIELD_MAPPING.put(\"request\", \"url.original\");\n+ FIELD_MAPPING.put(\"src\", \"source.ip\");\n+ FIELD_MAPPING.put(\"sourceDnsDomain\", \"source.registered_domain\");\n+ FIELD_MAPPING.put(\"slat\", \"source.geo.location.lat\");\n+ FIELD_MAPPING.put(\"slong\", \"source.geo.location.lon\");\n+ FIELD_MAPPING.put(\"shost\", \"source.domain\");\n+ FIELD_MAPPING.put(\"smac\", \"source.mac\");\n+ FIELD_MAPPING.put(\"sntdom\", \"source.registered_domain\");\n+ FIELD_MAPPING.put(\"spt\", \"source.port\");\n+ FIELD_MAPPING.put(\"spid\", \"source.process.pid\");\n+ FIELD_MAPPING.put(\"sproc\", \"source.process.name\");\n+ FIELD_MAPPING.put(\"sourceServiceName\", \"source.service.name\");\n+ FIELD_MAPPING.put(\"suser\", \"source.user.name\");\n+ FIELD_MAPPING.put(\"start\", \"event.start\");\n+ FIELD_MAPPING.put(\"proto\", \"network.transport\");\n+ // Add more mappings as needed\n+ }\n+\n+ void process(String cefString, String targetField) {\n+ List headerFields = new ArrayList<>();\n+ Matcher headerMatcher = HEADER_NEXT_FIELD_PATTERN.matcher(cefString);\n+ int extensionStart = 0;\n+\n+ for (int i = 0; i < 7 && headerMatcher.find(); i++) {\n+ String field = headerMatcher.group(1);\n+ field = HEADER_ESCAPE_CAPTURE.matcher(field).replaceAll(\"$1\");\n+ headerFields.add(field);\n+ extensionStart = headerMatcher.end();\n+ }\n+\n+ if (headerFields.size() > 0 && headerFields.get(0).startsWith(\"CEF:\")) {\n+ CEFEvent event = new CEFEvent();\n+ // Add error message if there are not enough header fields\n+ if (headerFields.size() != 7) {\n+ ingestDocument.appendFieldValue(\"error.message\", ERROR_MESSAGE_INCOMPLETE_CEF_HEADER);\n+ }\n+ for (int i = 0; i < headerFields.size(); i++) {\n+ switch (i) {\n+ case 0:\n+ event.setVersion(headerFields.get(0).substring(4));\n+ break;\n+ case 1:\n+ event.setDeviceVendor(headerFields.get(1));\n+ ingestDocument.setFieldValue(\"observer.vendor\", headerFields.get(1));\n+ break;\n+ case 2:\n+ event.setDeviceProduct(headerFields.get(2));\n+ ingestDocument.setFieldValue(\"observer.product\", headerFields.get(2));\n+ break;\n+ case 3:\n+ event.setDeviceVersion(headerFields.get(3));\n+ ingestDocument.setFieldValue(\"observer.version\", headerFields.get(3));\n+ break;\n+ case 4:\n+ event.setDeviceEventClassId(headerFields.get(4));\n+ ingestDocument.setFieldValue(\"event.code\", headerFields.get(4));\n+ break;\n+ case 5:\n+ event.setName(headerFields.get(5));\n+ break;\n+ case 6:\n+ event.setSeverity(headerFields.get(6));\n+ break;\n+ }\n+ }\n+ String extensionString = cefString.substring(extensionStart);\n+ Map extensions = parseExtensions(extensionString);\n+\n+ if (removeEmptyValue) {\n+ removeEmptyValue(extensions);\n+ }\n+ event.setExtensions(extensions);\n+\n+ // Translate possible ECS fields and remove them from extensions\n+ Map translatedFields = extensions.entrySet()\n+ .stream()\n+ .filter(entry -> FIELD_MAPPING.containsKey(entry.getKey()))\n+ .collect(Collectors.toMap(entry -> FIELD_MAPPING.get(entry.getKey()), entry -> {\n+ Class fieldType = ECSFieldWithType.getFieldType(FIELD_MAPPING.get(entry.getKey()));\n+ return convertValueToType(entry.getValue(), fieldType);\n+ }));\n+\n+ // Remove the translated entries from extensions\n+ event.removeMappedExtensions();\n+\n+ ingestDocument.setFieldValue(targetField, event.toObject());\n+ // Add ECS translations to the root of the document\n+ if (translatedFields.isEmpty() == false) {\n+ translatedFields.forEach(ingestDocument::setFieldValue);\n+ }\n+ } else {\n+ throw new IllegalArgumentException(\"Invalid CEF format\");\n+ }\n+ }\n+\n+ private Object convertValueToType(String value, Class type) {\n+ if (type == Long.class) {\n+ return Long.parseLong(value);\n+ } else if (type == Double.class) {\n+ return Double.parseDouble(value);\n+ } else if (type == Integer.class) {\n+ return Integer.parseInt(value);\n+ } else {\n+ return value; // Default to String\n+ }\n+ }\n+\n+ private Map parseExtensions(String extensionString) {\n+ Map extensions = new HashMap<>();\n+ Matcher matcher = EXTENSION_NEXT_KEY_VALUE_PATTERN.matcher(extensionString);\n+ int lastEnd = 0;\n+ while (matcher.find()) {\n+ String key = matcher.group(1);\n+ String value = matcher.group(2);\n+\n+ // Convert extension field name to strict legal field_reference\n+ if (key.endsWith(\"]\")) {\n+ key = convertArrayLikeKey(key);\n+ }\n+\n+ extensions.put(key, desanitizeExtensionVal(value.trim()));\n+ lastEnd = matcher.end();\n+ }\n+ // If there's any remaining unparsed content, throw an exception\n+ if (lastEnd < extensionString.length()) {\n+ throw new IllegalArgumentException(\"Invalid extensions; keyless value present: \" + extensionString.substring(lastEnd));\n+ }\n+ return extensions;\n+ }\n+\n+ private void removeEmptyValue(Map map) {\n+ map.entrySet().removeIf(entry -> entry.getValue().isEmpty());\n+ map.entrySet().removeIf(entry -> entry.getValue() == null);\n+ }\n+\n+ private String convertArrayLikeKey(String key) {\n+ Matcher matcher = EXTENSION_KEY_ARRAY_CAPTURE.matcher(key);\n+ if (matcher.matches()) {\n+ return \"[\" + matcher.group(1) + \"]\" + matcher.group(2);\n+ }\n+ return key;\n+ }\n+\n+ public static String desanitizeExtensionVal(String value) {\n+ String desanitized = value;\n+ for (Map.Entry entry : EXTENSION_VALUE_SANITIZER_REVERSE_MAPPING.entrySet()) {\n+ desanitized = desanitized.replace(entry.getKey(), entry.getValue());\n+ }\n+ return desanitized;\n+ }\n+\n+ public static class CEFEvent {\n+ private String version;\n+ private String deviceVendor;\n+ private String deviceProduct;\n+ private String deviceVersion;\n+ private String deviceEventClassId;\n+ private String name;\n+ private String severity;\n+ private final Map extensions = new HashMap<>();\n+ private Map translatedFields;\n+\n+ // Getters and setters for all fields\n+ public String getVersion() {\n+ return version;\n+ }\n+\n+ public void setVersion(String version) {\n+ this.version = version;\n+ }\n+\n+ public String getDeviceVendor() {\n+ return deviceVendor;\n+ }\n+\n+ public void setDeviceVendor(String deviceVendor) {\n+ this.deviceVendor = deviceVendor;\n+ }\n+\n+ public String getDeviceProduct() {\n+ return deviceProduct;\n+ }\n+\n+ public void setDeviceProduct(String deviceProduct) {\n+ this.deviceProduct = deviceProduct;\n+ }\n+\n+ public String getDeviceVersion() {\n+ return deviceVersion;\n+ }\n+\n+ public void setDeviceVersion(String deviceVersion) {\n+ this.deviceVersion = deviceVersion;\n+ }\n+\n+ public String getDeviceEventClassId() {\n+ return deviceEventClassId;\n+ }\n+\n+ public void setDeviceEventClassId(String deviceEventClassId) {\n+ this.deviceEventClassId = deviceEventClassId;\n+ }\n+\n+ public String getName() {\n+ return name;\n+ }\n+\n+ public void setName(String name) {\n+ this.name = name;\n+ }\n+\n+ public String getSeverity() {\n+ return severity;\n+ }\n+\n+ public void setSeverity(String severity) {\n+ this.severity = severity;\n+ }\n+\n+ public Map getExtensions() {\n+ return extensions;\n+ }\n+\n+ public void setExtensions(Map extensions) {\n+ this.extensions.putAll(extensions);\n+ }\n+\n+ public void addExtension(String key, String extension) {\n+ this.extensions.put(key, extension);\n+ }\n+\n+ public void removeMappedExtensions() {\n+ this.extensions.entrySet().removeIf(entry -> FIELD_MAPPING.containsKey(entry.getKey()));\n+ this.extensions.entrySet().removeIf(entry -> FIELD_MAPPING.containsValue(entry.getKey()));", - "comment_created_at": "2025-03-25T15:21:43+00:00", - "comment_author": "joegallo", - "comment_body": "You're doing N scans through the whole collection here, I doubt that's going to be especially performant.", - "pr_file_module": null - }, - { - "comment_id": "2012372556", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 122491, - "pr_file": "modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/CefParser.java", - "discussion_id": "2012358741", - "commented_code": "@@ -0,0 +1,449 @@\n+/*\n+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one\n+ * or more contributor license agreements. Licensed under the \"Elastic License\n+ * 2.0\", the \"GNU Affero General Public License v3.0 only\", and the \"Server Side\n+ * Public License v 1\"; you may not use this file except in compliance with, at\n+ * your election, the \"Elastic License 2.0\", the \"GNU Affero General Public\n+ * License v3.0 only\", or the \"Server Side Public License, v 1\".\n+ */\n+package org.elasticsearch.ingest.common;\n+\n+import org.elasticsearch.ingest.IngestDocument;\n+\n+import java.util.ArrayList;\n+import java.util.HashMap;\n+import java.util.List;\n+import java.util.Map;\n+import java.util.regex.Matcher;\n+import java.util.regex.Pattern;\n+import java.util.stream.Collectors;\n+\n+final class CefParser {\n+\n+ private final IngestDocument ingestDocument;\n+ private final boolean removeEmptyValue;\n+\n+ CefParser(IngestDocument ingestDocument, boolean removeEmptyValue) {\n+ this.ingestDocument = ingestDocument;\n+ this.removeEmptyValue = removeEmptyValue;\n+ }\n+\n+ private static final Pattern HEADER_PATTERN = Pattern.compile(\"(?:\\\\\\\\\\\\||\\\\\\\\\\\\\\\\|[^|])*?\");\n+ private static final Pattern HEADER_NEXT_FIELD_PATTERN = Pattern.compile(\"(\" + HEADER_PATTERN.pattern() + \")\\\\|\");\n+ private static final Pattern HEADER_ESCAPE_CAPTURE = Pattern.compile(\"\\\\\\\\([\\\\\\\\|])\");\n+\n+ // New patterns for extension parsing\n+ private static final String EXTENSION_KEY_PATTERN = \"(?:[\\\\w-]+(?:\\\\.[^\\\\.=\\\\s\\\\|\\\\\\\\\\\\[\\\\]]+)*(?:\\\\[[0-9]+\\\\])?(?==))\";\n+ private static final Pattern EXTENSION_KEY_ARRAY_CAPTURE = Pattern.compile(\"^([^\\\\[\\\\]]+)((?:\\\\[[0-9]+\\\\])+)$\");\n+ private static final String EXTENSION_VALUE_PATTERN = \"(?:\\\\S|\\\\s(?!\" + EXTENSION_KEY_PATTERN + \"=))*\";\n+ private static final Pattern EXTENSION_NEXT_KEY_VALUE_PATTERN = Pattern.compile(\n+ \"(\" + EXTENSION_KEY_PATTERN + \")=(\" + EXTENSION_VALUE_PATTERN + \")(?:\\\\s+|$)\"\n+ );\n+ private static final Map EXTENSION_VALUE_SANITIZER_REVERSE_MAPPING = new HashMap<>();\n+ private static final Map FIELD_MAPPING = new HashMap<>();\n+ private static final String ERROR_MESSAGE_INCOMPLETE_CEF_HEADER = \"incomplete CEF header\";\n+\n+ static {\n+ EXTENSION_VALUE_SANITIZER_REVERSE_MAPPING.put(\"\\\\\\\\\", \"\\\\\");\n+ EXTENSION_VALUE_SANITIZER_REVERSE_MAPPING.put(\"\\\\=\", \"=\");\n+ EXTENSION_VALUE_SANITIZER_REVERSE_MAPPING.put(\"\\\\\\n\", \"\\n\");\n+ EXTENSION_VALUE_SANITIZER_REVERSE_MAPPING.put(\"\\\\\\r\", \"\\r\");\n+\n+ FIELD_MAPPING.put(\"app\", \"network.protocol\");\n+ FIELD_MAPPING.put(\"in\", \"source.bytes\");\n+ FIELD_MAPPING.put(\"out\", \"destination.bytes\");\n+ FIELD_MAPPING.put(\"dst\", \"destination.ip\");\n+ FIELD_MAPPING.put(\"dlat\", \"destination.geo.location.lat\");\n+ FIELD_MAPPING.put(\"dlong\", \"destination.geo.location.lon\");\n+ FIELD_MAPPING.put(\"dhost\", \"destination.domain\");\n+ FIELD_MAPPING.put(\"dmac\", \"destination.mac\");\n+ FIELD_MAPPING.put(\"dntdom\", \"destination.registered_domain\");\n+ FIELD_MAPPING.put(\"dpt\", \"destination.port\");\n+ FIELD_MAPPING.put(\"dpid\", \"destination.process.pid\");\n+ FIELD_MAPPING.put(\"dproc\", \"destination.process.name\");\n+ FIELD_MAPPING.put(\"duid\", \"destination.user.id\");\n+ FIELD_MAPPING.put(\"duser\", \"destination.user.name\");\n+ FIELD_MAPPING.put(\"dpriv\", \"destination.user.group.name\");\n+ FIELD_MAPPING.put(\"act\", \"event.action\");\n+ FIELD_MAPPING.put(\"dvc\", \"observer.ip\");\n+ FIELD_MAPPING.put(\"deviceDirection\", \"network.direction\");\n+ FIELD_MAPPING.put(\"deviceDnsDomain\", \"observer.registered_domain\");\n+ FIELD_MAPPING.put(\"deviceExternalId\", \"observer.name\");\n+ FIELD_MAPPING.put(\"deviceFacility\", \"log.syslog.facility.code\");\n+ FIELD_MAPPING.put(\"dvchost\", \"observer.hostname\");\n+ FIELD_MAPPING.put(\"deviceInboundInterface\", \"observer.ingress.interface.name\");\n+ FIELD_MAPPING.put(\"dvcmac\", \"observer.mac\");\n+ FIELD_MAPPING.put(\"deviceOutboundInterface\", \"observer.egress.interface.name\");\n+ FIELD_MAPPING.put(\"dvcpid\", \"process.pid\");\n+ FIELD_MAPPING.put(\"deviceProcessName\", \"process.name\");\n+ FIELD_MAPPING.put(\"rt\", \"@timestamp\");\n+ FIELD_MAPPING.put(\"dtz\", \"event.timezone\");\n+ FIELD_MAPPING.put(\"deviceTranslatedAddress\", \"host.nat.ip\");\n+ FIELD_MAPPING.put(\"device.version\", \"observer.version\");\n+ FIELD_MAPPING.put(\"deviceVersion\", \"observer.version\");\n+ FIELD_MAPPING.put(\"device.product\", \"observer.product\");\n+ FIELD_MAPPING.put(\"deviceProduct\", \"observer.product\");\n+ FIELD_MAPPING.put(\"device.event_class_id\", \"event.code\");\n+ FIELD_MAPPING.put(\"device.vendor\", \"observer.vendor\");\n+ FIELD_MAPPING.put(\"deviceVendor\", \"observer.vendor\");\n+ FIELD_MAPPING.put(\"end\", \"event.end\");\n+ FIELD_MAPPING.put(\"eventId\", \"event.id\");\n+ FIELD_MAPPING.put(\"outcome\", \"event.outcome\");\n+ FIELD_MAPPING.put(\"fileCreateTime\", \"file.created\");\n+ FIELD_MAPPING.put(\"fileHash\", \"file.hash\");\n+ FIELD_MAPPING.put(\"fileId\", \"file.inode\");\n+ FIELD_MAPPING.put(\"fileModificationTime\", \"file.mtime\");\n+ FIELD_MAPPING.put(\"fname\", \"file.name\");\n+ FIELD_MAPPING.put(\"filePath\", \"file.path\");\n+ FIELD_MAPPING.put(\"filePermission\", \"file.group\");\n+ FIELD_MAPPING.put(\"fsize\", \"file.size\");\n+ FIELD_MAPPING.put(\"fileType\", \"file.extension\");\n+ FIELD_MAPPING.put(\"mrt\", \"event.ingested\");\n+ FIELD_MAPPING.put(\"msg\", \"message\");\n+ FIELD_MAPPING.put(\"reason\", \"event.reason\");\n+ FIELD_MAPPING.put(\"requestClientApplication\", \"user_agent.original\");\n+ FIELD_MAPPING.put(\"requestContext\", \"http.request.referrer\");\n+ FIELD_MAPPING.put(\"requestMethod\", \"http.request.method\");\n+ FIELD_MAPPING.put(\"request\", \"url.original\");\n+ FIELD_MAPPING.put(\"src\", \"source.ip\");\n+ FIELD_MAPPING.put(\"sourceDnsDomain\", \"source.registered_domain\");\n+ FIELD_MAPPING.put(\"slat\", \"source.geo.location.lat\");\n+ FIELD_MAPPING.put(\"slong\", \"source.geo.location.lon\");\n+ FIELD_MAPPING.put(\"shost\", \"source.domain\");\n+ FIELD_MAPPING.put(\"smac\", \"source.mac\");\n+ FIELD_MAPPING.put(\"sntdom\", \"source.registered_domain\");\n+ FIELD_MAPPING.put(\"spt\", \"source.port\");\n+ FIELD_MAPPING.put(\"spid\", \"source.process.pid\");\n+ FIELD_MAPPING.put(\"sproc\", \"source.process.name\");\n+ FIELD_MAPPING.put(\"sourceServiceName\", \"source.service.name\");\n+ FIELD_MAPPING.put(\"suser\", \"source.user.name\");\n+ FIELD_MAPPING.put(\"start\", \"event.start\");\n+ FIELD_MAPPING.put(\"proto\", \"network.transport\");\n+ // Add more mappings as needed\n+ }\n+\n+ void process(String cefString, String targetField) {\n+ List headerFields = new ArrayList<>();\n+ Matcher headerMatcher = HEADER_NEXT_FIELD_PATTERN.matcher(cefString);\n+ int extensionStart = 0;\n+\n+ for (int i = 0; i < 7 && headerMatcher.find(); i++) {\n+ String field = headerMatcher.group(1);\n+ field = HEADER_ESCAPE_CAPTURE.matcher(field).replaceAll(\"$1\");\n+ headerFields.add(field);\n+ extensionStart = headerMatcher.end();\n+ }\n+\n+ if (headerFields.size() > 0 && headerFields.get(0).startsWith(\"CEF:\")) {\n+ CEFEvent event = new CEFEvent();\n+ // Add error message if there are not enough header fields\n+ if (headerFields.size() != 7) {\n+ ingestDocument.appendFieldValue(\"error.message\", ERROR_MESSAGE_INCOMPLETE_CEF_HEADER);\n+ }\n+ for (int i = 0; i < headerFields.size(); i++) {\n+ switch (i) {\n+ case 0:\n+ event.setVersion(headerFields.get(0).substring(4));\n+ break;\n+ case 1:\n+ event.setDeviceVendor(headerFields.get(1));\n+ ingestDocument.setFieldValue(\"observer.vendor\", headerFields.get(1));\n+ break;\n+ case 2:\n+ event.setDeviceProduct(headerFields.get(2));\n+ ingestDocument.setFieldValue(\"observer.product\", headerFields.get(2));\n+ break;\n+ case 3:\n+ event.setDeviceVersion(headerFields.get(3));\n+ ingestDocument.setFieldValue(\"observer.version\", headerFields.get(3));\n+ break;\n+ case 4:\n+ event.setDeviceEventClassId(headerFields.get(4));\n+ ingestDocument.setFieldValue(\"event.code\", headerFields.get(4));\n+ break;\n+ case 5:\n+ event.setName(headerFields.get(5));\n+ break;\n+ case 6:\n+ event.setSeverity(headerFields.get(6));\n+ break;\n+ }\n+ }\n+ String extensionString = cefString.substring(extensionStart);\n+ Map extensions = parseExtensions(extensionString);\n+\n+ if (removeEmptyValue) {\n+ removeEmptyValue(extensions);\n+ }\n+ event.setExtensions(extensions);\n+\n+ // Translate possible ECS fields and remove them from extensions\n+ Map translatedFields = extensions.entrySet()\n+ .stream()\n+ .filter(entry -> FIELD_MAPPING.containsKey(entry.getKey()))\n+ .collect(Collectors.toMap(entry -> FIELD_MAPPING.get(entry.getKey()), entry -> {\n+ Class fieldType = ECSFieldWithType.getFieldType(FIELD_MAPPING.get(entry.getKey()));\n+ return convertValueToType(entry.getValue(), fieldType);\n+ }));\n+\n+ // Remove the translated entries from extensions\n+ event.removeMappedExtensions();\n+\n+ ingestDocument.setFieldValue(targetField, event.toObject());\n+ // Add ECS translations to the root of the document\n+ if (translatedFields.isEmpty() == false) {\n+ translatedFields.forEach(ingestDocument::setFieldValue);\n+ }\n+ } else {\n+ throw new IllegalArgumentException(\"Invalid CEF format\");\n+ }\n+ }\n+\n+ private Object convertValueToType(String value, Class type) {\n+ if (type == Long.class) {\n+ return Long.parseLong(value);\n+ } else if (type == Double.class) {\n+ return Double.parseDouble(value);\n+ } else if (type == Integer.class) {\n+ return Integer.parseInt(value);\n+ } else {\n+ return value; // Default to String\n+ }\n+ }\n+\n+ private Map parseExtensions(String extensionString) {\n+ Map extensions = new HashMap<>();\n+ Matcher matcher = EXTENSION_NEXT_KEY_VALUE_PATTERN.matcher(extensionString);\n+ int lastEnd = 0;\n+ while (matcher.find()) {\n+ String key = matcher.group(1);\n+ String value = matcher.group(2);\n+\n+ // Convert extension field name to strict legal field_reference\n+ if (key.endsWith(\"]\")) {\n+ key = convertArrayLikeKey(key);\n+ }\n+\n+ extensions.put(key, desanitizeExtensionVal(value.trim()));\n+ lastEnd = matcher.end();\n+ }\n+ // If there's any remaining unparsed content, throw an exception\n+ if (lastEnd < extensionString.length()) {\n+ throw new IllegalArgumentException(\"Invalid extensions; keyless value present: \" + extensionString.substring(lastEnd));\n+ }\n+ return extensions;\n+ }\n+\n+ private void removeEmptyValue(Map map) {\n+ map.entrySet().removeIf(entry -> entry.getValue().isEmpty());\n+ map.entrySet().removeIf(entry -> entry.getValue() == null);\n+ }\n+\n+ private String convertArrayLikeKey(String key) {\n+ Matcher matcher = EXTENSION_KEY_ARRAY_CAPTURE.matcher(key);\n+ if (matcher.matches()) {\n+ return \"[\" + matcher.group(1) + \"]\" + matcher.group(2);\n+ }\n+ return key;\n+ }\n+\n+ public static String desanitizeExtensionVal(String value) {\n+ String desanitized = value;\n+ for (Map.Entry entry : EXTENSION_VALUE_SANITIZER_REVERSE_MAPPING.entrySet()) {\n+ desanitized = desanitized.replace(entry.getKey(), entry.getValue());\n+ }\n+ return desanitized;\n+ }\n+\n+ public static class CEFEvent {\n+ private String version;\n+ private String deviceVendor;\n+ private String deviceProduct;\n+ private String deviceVersion;\n+ private String deviceEventClassId;\n+ private String name;\n+ private String severity;\n+ private final Map extensions = new HashMap<>();\n+ private Map translatedFields;\n+\n+ // Getters and setters for all fields\n+ public String getVersion() {\n+ return version;\n+ }\n+\n+ public void setVersion(String version) {\n+ this.version = version;\n+ }\n+\n+ public String getDeviceVendor() {\n+ return deviceVendor;\n+ }\n+\n+ public void setDeviceVendor(String deviceVendor) {\n+ this.deviceVendor = deviceVendor;\n+ }\n+\n+ public String getDeviceProduct() {\n+ return deviceProduct;\n+ }\n+\n+ public void setDeviceProduct(String deviceProduct) {\n+ this.deviceProduct = deviceProduct;\n+ }\n+\n+ public String getDeviceVersion() {\n+ return deviceVersion;\n+ }\n+\n+ public void setDeviceVersion(String deviceVersion) {\n+ this.deviceVersion = deviceVersion;\n+ }\n+\n+ public String getDeviceEventClassId() {\n+ return deviceEventClassId;\n+ }\n+\n+ public void setDeviceEventClassId(String deviceEventClassId) {\n+ this.deviceEventClassId = deviceEventClassId;\n+ }\n+\n+ public String getName() {\n+ return name;\n+ }\n+\n+ public void setName(String name) {\n+ this.name = name;\n+ }\n+\n+ public String getSeverity() {\n+ return severity;\n+ }\n+\n+ public void setSeverity(String severity) {\n+ this.severity = severity;\n+ }\n+\n+ public Map getExtensions() {\n+ return extensions;\n+ }\n+\n+ public void setExtensions(Map extensions) {\n+ this.extensions.putAll(extensions);\n+ }\n+\n+ public void addExtension(String key, String extension) {\n+ this.extensions.put(key, extension);\n+ }\n+\n+ public void removeMappedExtensions() {\n+ this.extensions.entrySet().removeIf(entry -> FIELD_MAPPING.containsKey(entry.getKey()));\n+ this.extensions.entrySet().removeIf(entry -> FIELD_MAPPING.containsValue(entry.getKey()));", - "comment_created_at": "2025-03-25T15:28:16+00:00", - "comment_author": "bhapas", - "comment_body": "I think this can be \r\n\r\n```java\r\npublic void removeMappedExtensions() {\r\n this.extensions.entrySet().removeIf(entry -> \r\n FIELD_MAPPING.containsKey(entry.getKey()) || FIELD_MAPPING.containsValue(entry.getKey())\r\n );\r\n}\r\n```\r\n\r\nOr you mean something else?", - "pr_file_module": null - }, - { - "comment_id": "2012395998", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 122491, - "pr_file": "modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/CefParser.java", - "discussion_id": "2012358741", - "commented_code": "@@ -0,0 +1,449 @@\n+/*\n+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one\n+ * or more contributor license agreements. Licensed under the \"Elastic License\n+ * 2.0\", the \"GNU Affero General Public License v3.0 only\", and the \"Server Side\n+ * Public License v 1\"; you may not use this file except in compliance with, at\n+ * your election, the \"Elastic License 2.0\", the \"GNU Affero General Public\n+ * License v3.0 only\", or the \"Server Side Public License, v 1\".\n+ */\n+package org.elasticsearch.ingest.common;\n+\n+import org.elasticsearch.ingest.IngestDocument;\n+\n+import java.util.ArrayList;\n+import java.util.HashMap;\n+import java.util.List;\n+import java.util.Map;\n+import java.util.regex.Matcher;\n+import java.util.regex.Pattern;\n+import java.util.stream.Collectors;\n+\n+final class CefParser {\n+\n+ private final IngestDocument ingestDocument;\n+ private final boolean removeEmptyValue;\n+\n+ CefParser(IngestDocument ingestDocument, boolean removeEmptyValue) {\n+ this.ingestDocument = ingestDocument;\n+ this.removeEmptyValue = removeEmptyValue;\n+ }\n+\n+ private static final Pattern HEADER_PATTERN = Pattern.compile(\"(?:\\\\\\\\\\\\||\\\\\\\\\\\\\\\\|[^|])*?\");\n+ private static final Pattern HEADER_NEXT_FIELD_PATTERN = Pattern.compile(\"(\" + HEADER_PATTERN.pattern() + \")\\\\|\");\n+ private static final Pattern HEADER_ESCAPE_CAPTURE = Pattern.compile(\"\\\\\\\\([\\\\\\\\|])\");\n+\n+ // New patterns for extension parsing\n+ private static final String EXTENSION_KEY_PATTERN = \"(?:[\\\\w-]+(?:\\\\.[^\\\\.=\\\\s\\\\|\\\\\\\\\\\\[\\\\]]+)*(?:\\\\[[0-9]+\\\\])?(?==))\";\n+ private static final Pattern EXTENSION_KEY_ARRAY_CAPTURE = Pattern.compile(\"^([^\\\\[\\\\]]+)((?:\\\\[[0-9]+\\\\])+)$\");\n+ private static final String EXTENSION_VALUE_PATTERN = \"(?:\\\\S|\\\\s(?!\" + EXTENSION_KEY_PATTERN + \"=))*\";\n+ private static final Pattern EXTENSION_NEXT_KEY_VALUE_PATTERN = Pattern.compile(\n+ \"(\" + EXTENSION_KEY_PATTERN + \")=(\" + EXTENSION_VALUE_PATTERN + \")(?:\\\\s+|$)\"\n+ );\n+ private static final Map EXTENSION_VALUE_SANITIZER_REVERSE_MAPPING = new HashMap<>();\n+ private static final Map FIELD_MAPPING = new HashMap<>();\n+ private static final String ERROR_MESSAGE_INCOMPLETE_CEF_HEADER = \"incomplete CEF header\";\n+\n+ static {\n+ EXTENSION_VALUE_SANITIZER_REVERSE_MAPPING.put(\"\\\\\\\\\", \"\\\\\");\n+ EXTENSION_VALUE_SANITIZER_REVERSE_MAPPING.put(\"\\\\=\", \"=\");\n+ EXTENSION_VALUE_SANITIZER_REVERSE_MAPPING.put(\"\\\\\\n\", \"\\n\");\n+ EXTENSION_VALUE_SANITIZER_REVERSE_MAPPING.put(\"\\\\\\r\", \"\\r\");\n+\n+ FIELD_MAPPING.put(\"app\", \"network.protocol\");\n+ FIELD_MAPPING.put(\"in\", \"source.bytes\");\n+ FIELD_MAPPING.put(\"out\", \"destination.bytes\");\n+ FIELD_MAPPING.put(\"dst\", \"destination.ip\");\n+ FIELD_MAPPING.put(\"dlat\", \"destination.geo.location.lat\");\n+ FIELD_MAPPING.put(\"dlong\", \"destination.geo.location.lon\");\n+ FIELD_MAPPING.put(\"dhost\", \"destination.domain\");\n+ FIELD_MAPPING.put(\"dmac\", \"destination.mac\");\n+ FIELD_MAPPING.put(\"dntdom\", \"destination.registered_domain\");\n+ FIELD_MAPPING.put(\"dpt\", \"destination.port\");\n+ FIELD_MAPPING.put(\"dpid\", \"destination.process.pid\");\n+ FIELD_MAPPING.put(\"dproc\", \"destination.process.name\");\n+ FIELD_MAPPING.put(\"duid\", \"destination.user.id\");\n+ FIELD_MAPPING.put(\"duser\", \"destination.user.name\");\n+ FIELD_MAPPING.put(\"dpriv\", \"destination.user.group.name\");\n+ FIELD_MAPPING.put(\"act\", \"event.action\");\n+ FIELD_MAPPING.put(\"dvc\", \"observer.ip\");\n+ FIELD_MAPPING.put(\"deviceDirection\", \"network.direction\");\n+ FIELD_MAPPING.put(\"deviceDnsDomain\", \"observer.registered_domain\");\n+ FIELD_MAPPING.put(\"deviceExternalId\", \"observer.name\");\n+ FIELD_MAPPING.put(\"deviceFacility\", \"log.syslog.facility.code\");\n+ FIELD_MAPPING.put(\"dvchost\", \"observer.hostname\");\n+ FIELD_MAPPING.put(\"deviceInboundInterface\", \"observer.ingress.interface.name\");\n+ FIELD_MAPPING.put(\"dvcmac\", \"observer.mac\");\n+ FIELD_MAPPING.put(\"deviceOutboundInterface\", \"observer.egress.interface.name\");\n+ FIELD_MAPPING.put(\"dvcpid\", \"process.pid\");\n+ FIELD_MAPPING.put(\"deviceProcessName\", \"process.name\");\n+ FIELD_MAPPING.put(\"rt\", \"@timestamp\");\n+ FIELD_MAPPING.put(\"dtz\", \"event.timezone\");\n+ FIELD_MAPPING.put(\"deviceTranslatedAddress\", \"host.nat.ip\");\n+ FIELD_MAPPING.put(\"device.version\", \"observer.version\");\n+ FIELD_MAPPING.put(\"deviceVersion\", \"observer.version\");\n+ FIELD_MAPPING.put(\"device.product\", \"observer.product\");\n+ FIELD_MAPPING.put(\"deviceProduct\", \"observer.product\");\n+ FIELD_MAPPING.put(\"device.event_class_id\", \"event.code\");\n+ FIELD_MAPPING.put(\"device.vendor\", \"observer.vendor\");\n+ FIELD_MAPPING.put(\"deviceVendor\", \"observer.vendor\");\n+ FIELD_MAPPING.put(\"end\", \"event.end\");\n+ FIELD_MAPPING.put(\"eventId\", \"event.id\");\n+ FIELD_MAPPING.put(\"outcome\", \"event.outcome\");\n+ FIELD_MAPPING.put(\"fileCreateTime\", \"file.created\");\n+ FIELD_MAPPING.put(\"fileHash\", \"file.hash\");\n+ FIELD_MAPPING.put(\"fileId\", \"file.inode\");\n+ FIELD_MAPPING.put(\"fileModificationTime\", \"file.mtime\");\n+ FIELD_MAPPING.put(\"fname\", \"file.name\");\n+ FIELD_MAPPING.put(\"filePath\", \"file.path\");\n+ FIELD_MAPPING.put(\"filePermission\", \"file.group\");\n+ FIELD_MAPPING.put(\"fsize\", \"file.size\");\n+ FIELD_MAPPING.put(\"fileType\", \"file.extension\");\n+ FIELD_MAPPING.put(\"mrt\", \"event.ingested\");\n+ FIELD_MAPPING.put(\"msg\", \"message\");\n+ FIELD_MAPPING.put(\"reason\", \"event.reason\");\n+ FIELD_MAPPING.put(\"requestClientApplication\", \"user_agent.original\");\n+ FIELD_MAPPING.put(\"requestContext\", \"http.request.referrer\");\n+ FIELD_MAPPING.put(\"requestMethod\", \"http.request.method\");\n+ FIELD_MAPPING.put(\"request\", \"url.original\");\n+ FIELD_MAPPING.put(\"src\", \"source.ip\");\n+ FIELD_MAPPING.put(\"sourceDnsDomain\", \"source.registered_domain\");\n+ FIELD_MAPPING.put(\"slat\", \"source.geo.location.lat\");\n+ FIELD_MAPPING.put(\"slong\", \"source.geo.location.lon\");\n+ FIELD_MAPPING.put(\"shost\", \"source.domain\");\n+ FIELD_MAPPING.put(\"smac\", \"source.mac\");\n+ FIELD_MAPPING.put(\"sntdom\", \"source.registered_domain\");\n+ FIELD_MAPPING.put(\"spt\", \"source.port\");\n+ FIELD_MAPPING.put(\"spid\", \"source.process.pid\");\n+ FIELD_MAPPING.put(\"sproc\", \"source.process.name\");\n+ FIELD_MAPPING.put(\"sourceServiceName\", \"source.service.name\");\n+ FIELD_MAPPING.put(\"suser\", \"source.user.name\");\n+ FIELD_MAPPING.put(\"start\", \"event.start\");\n+ FIELD_MAPPING.put(\"proto\", \"network.transport\");\n+ // Add more mappings as needed\n+ }\n+\n+ void process(String cefString, String targetField) {\n+ List headerFields = new ArrayList<>();\n+ Matcher headerMatcher = HEADER_NEXT_FIELD_PATTERN.matcher(cefString);\n+ int extensionStart = 0;\n+\n+ for (int i = 0; i < 7 && headerMatcher.find(); i++) {\n+ String field = headerMatcher.group(1);\n+ field = HEADER_ESCAPE_CAPTURE.matcher(field).replaceAll(\"$1\");\n+ headerFields.add(field);\n+ extensionStart = headerMatcher.end();\n+ }\n+\n+ if (headerFields.size() > 0 && headerFields.get(0).startsWith(\"CEF:\")) {\n+ CEFEvent event = new CEFEvent();\n+ // Add error message if there are not enough header fields\n+ if (headerFields.size() != 7) {\n+ ingestDocument.appendFieldValue(\"error.message\", ERROR_MESSAGE_INCOMPLETE_CEF_HEADER);\n+ }\n+ for (int i = 0; i < headerFields.size(); i++) {\n+ switch (i) {\n+ case 0:\n+ event.setVersion(headerFields.get(0).substring(4));\n+ break;\n+ case 1:\n+ event.setDeviceVendor(headerFields.get(1));\n+ ingestDocument.setFieldValue(\"observer.vendor\", headerFields.get(1));\n+ break;\n+ case 2:\n+ event.setDeviceProduct(headerFields.get(2));\n+ ingestDocument.setFieldValue(\"observer.product\", headerFields.get(2));\n+ break;\n+ case 3:\n+ event.setDeviceVersion(headerFields.get(3));\n+ ingestDocument.setFieldValue(\"observer.version\", headerFields.get(3));\n+ break;\n+ case 4:\n+ event.setDeviceEventClassId(headerFields.get(4));\n+ ingestDocument.setFieldValue(\"event.code\", headerFields.get(4));\n+ break;\n+ case 5:\n+ event.setName(headerFields.get(5));\n+ break;\n+ case 6:\n+ event.setSeverity(headerFields.get(6));\n+ break;\n+ }\n+ }\n+ String extensionString = cefString.substring(extensionStart);\n+ Map extensions = parseExtensions(extensionString);\n+\n+ if (removeEmptyValue) {\n+ removeEmptyValue(extensions);\n+ }\n+ event.setExtensions(extensions);\n+\n+ // Translate possible ECS fields and remove them from extensions\n+ Map translatedFields = extensions.entrySet()\n+ .stream()\n+ .filter(entry -> FIELD_MAPPING.containsKey(entry.getKey()))\n+ .collect(Collectors.toMap(entry -> FIELD_MAPPING.get(entry.getKey()), entry -> {\n+ Class fieldType = ECSFieldWithType.getFieldType(FIELD_MAPPING.get(entry.getKey()));\n+ return convertValueToType(entry.getValue(), fieldType);\n+ }));\n+\n+ // Remove the translated entries from extensions\n+ event.removeMappedExtensions();\n+\n+ ingestDocument.setFieldValue(targetField, event.toObject());\n+ // Add ECS translations to the root of the document\n+ if (translatedFields.isEmpty() == false) {\n+ translatedFields.forEach(ingestDocument::setFieldValue);\n+ }\n+ } else {\n+ throw new IllegalArgumentException(\"Invalid CEF format\");\n+ }\n+ }\n+\n+ private Object convertValueToType(String value, Class type) {\n+ if (type == Long.class) {\n+ return Long.parseLong(value);\n+ } else if (type == Double.class) {\n+ return Double.parseDouble(value);\n+ } else if (type == Integer.class) {\n+ return Integer.parseInt(value);\n+ } else {\n+ return value; // Default to String\n+ }\n+ }\n+\n+ private Map parseExtensions(String extensionString) {\n+ Map extensions = new HashMap<>();\n+ Matcher matcher = EXTENSION_NEXT_KEY_VALUE_PATTERN.matcher(extensionString);\n+ int lastEnd = 0;\n+ while (matcher.find()) {\n+ String key = matcher.group(1);\n+ String value = matcher.group(2);\n+\n+ // Convert extension field name to strict legal field_reference\n+ if (key.endsWith(\"]\")) {\n+ key = convertArrayLikeKey(key);\n+ }\n+\n+ extensions.put(key, desanitizeExtensionVal(value.trim()));\n+ lastEnd = matcher.end();\n+ }\n+ // If there's any remaining unparsed content, throw an exception\n+ if (lastEnd < extensionString.length()) {\n+ throw new IllegalArgumentException(\"Invalid extensions; keyless value present: \" + extensionString.substring(lastEnd));\n+ }\n+ return extensions;\n+ }\n+\n+ private void removeEmptyValue(Map map) {\n+ map.entrySet().removeIf(entry -> entry.getValue().isEmpty());\n+ map.entrySet().removeIf(entry -> entry.getValue() == null);\n+ }\n+\n+ private String convertArrayLikeKey(String key) {\n+ Matcher matcher = EXTENSION_KEY_ARRAY_CAPTURE.matcher(key);\n+ if (matcher.matches()) {\n+ return \"[\" + matcher.group(1) + \"]\" + matcher.group(2);\n+ }\n+ return key;\n+ }\n+\n+ public static String desanitizeExtensionVal(String value) {\n+ String desanitized = value;\n+ for (Map.Entry entry : EXTENSION_VALUE_SANITIZER_REVERSE_MAPPING.entrySet()) {\n+ desanitized = desanitized.replace(entry.getKey(), entry.getValue());\n+ }\n+ return desanitized;\n+ }\n+\n+ public static class CEFEvent {\n+ private String version;\n+ private String deviceVendor;\n+ private String deviceProduct;\n+ private String deviceVersion;\n+ private String deviceEventClassId;\n+ private String name;\n+ private String severity;\n+ private final Map extensions = new HashMap<>();\n+ private Map translatedFields;\n+\n+ // Getters and setters for all fields\n+ public String getVersion() {\n+ return version;\n+ }\n+\n+ public void setVersion(String version) {\n+ this.version = version;\n+ }\n+\n+ public String getDeviceVendor() {\n+ return deviceVendor;\n+ }\n+\n+ public void setDeviceVendor(String deviceVendor) {\n+ this.deviceVendor = deviceVendor;\n+ }\n+\n+ public String getDeviceProduct() {\n+ return deviceProduct;\n+ }\n+\n+ public void setDeviceProduct(String deviceProduct) {\n+ this.deviceProduct = deviceProduct;\n+ }\n+\n+ public String getDeviceVersion() {\n+ return deviceVersion;\n+ }\n+\n+ public void setDeviceVersion(String deviceVersion) {\n+ this.deviceVersion = deviceVersion;\n+ }\n+\n+ public String getDeviceEventClassId() {\n+ return deviceEventClassId;\n+ }\n+\n+ public void setDeviceEventClassId(String deviceEventClassId) {\n+ this.deviceEventClassId = deviceEventClassId;\n+ }\n+\n+ public String getName() {\n+ return name;\n+ }\n+\n+ public void setName(String name) {\n+ this.name = name;\n+ }\n+\n+ public String getSeverity() {\n+ return severity;\n+ }\n+\n+ public void setSeverity(String severity) {\n+ this.severity = severity;\n+ }\n+\n+ public Map getExtensions() {\n+ return extensions;\n+ }\n+\n+ public void setExtensions(Map extensions) {\n+ this.extensions.putAll(extensions);\n+ }\n+\n+ public void addExtension(String key, String extension) {\n+ this.extensions.put(key, extension);\n+ }\n+\n+ public void removeMappedExtensions() {\n+ this.extensions.entrySet().removeIf(entry -> FIELD_MAPPING.containsKey(entry.getKey()));\n+ this.extensions.entrySet().removeIf(entry -> FIELD_MAPPING.containsValue(entry.getKey()));", - "comment_created_at": "2025-03-25T15:39:16+00:00", - "comment_author": "joegallo", - "comment_body": "Explain to me how `containsValue` works on a Map.", - "pr_file_module": null - }, - { - "comment_id": "2012404187", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 122491, - "pr_file": "modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/CefParser.java", - "discussion_id": "2012358741", - "commented_code": "@@ -0,0 +1,449 @@\n+/*\n+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one\n+ * or more contributor license agreements. Licensed under the \"Elastic License\n+ * 2.0\", the \"GNU Affero General Public License v3.0 only\", and the \"Server Side\n+ * Public License v 1\"; you may not use this file except in compliance with, at\n+ * your election, the \"Elastic License 2.0\", the \"GNU Affero General Public\n+ * License v3.0 only\", or the \"Server Side Public License, v 1\".\n+ */\n+package org.elasticsearch.ingest.common;\n+\n+import org.elasticsearch.ingest.IngestDocument;\n+\n+import java.util.ArrayList;\n+import java.util.HashMap;\n+import java.util.List;\n+import java.util.Map;\n+import java.util.regex.Matcher;\n+import java.util.regex.Pattern;\n+import java.util.stream.Collectors;\n+\n+final class CefParser {\n+\n+ private final IngestDocument ingestDocument;\n+ private final boolean removeEmptyValue;\n+\n+ CefParser(IngestDocument ingestDocument, boolean removeEmptyValue) {\n+ this.ingestDocument = ingestDocument;\n+ this.removeEmptyValue = removeEmptyValue;\n+ }\n+\n+ private static final Pattern HEADER_PATTERN = Pattern.compile(\"(?:\\\\\\\\\\\\||\\\\\\\\\\\\\\\\|[^|])*?\");\n+ private static final Pattern HEADER_NEXT_FIELD_PATTERN = Pattern.compile(\"(\" + HEADER_PATTERN.pattern() + \")\\\\|\");\n+ private static final Pattern HEADER_ESCAPE_CAPTURE = Pattern.compile(\"\\\\\\\\([\\\\\\\\|])\");\n+\n+ // New patterns for extension parsing\n+ private static final String EXTENSION_KEY_PATTERN = \"(?:[\\\\w-]+(?:\\\\.[^\\\\.=\\\\s\\\\|\\\\\\\\\\\\[\\\\]]+)*(?:\\\\[[0-9]+\\\\])?(?==))\";\n+ private static final Pattern EXTENSION_KEY_ARRAY_CAPTURE = Pattern.compile(\"^([^\\\\[\\\\]]+)((?:\\\\[[0-9]+\\\\])+)$\");\n+ private static final String EXTENSION_VALUE_PATTERN = \"(?:\\\\S|\\\\s(?!\" + EXTENSION_KEY_PATTERN + \"=))*\";\n+ private static final Pattern EXTENSION_NEXT_KEY_VALUE_PATTERN = Pattern.compile(\n+ \"(\" + EXTENSION_KEY_PATTERN + \")=(\" + EXTENSION_VALUE_PATTERN + \")(?:\\\\s+|$)\"\n+ );\n+ private static final Map EXTENSION_VALUE_SANITIZER_REVERSE_MAPPING = new HashMap<>();\n+ private static final Map FIELD_MAPPING = new HashMap<>();\n+ private static final String ERROR_MESSAGE_INCOMPLETE_CEF_HEADER = \"incomplete CEF header\";\n+\n+ static {\n+ EXTENSION_VALUE_SANITIZER_REVERSE_MAPPING.put(\"\\\\\\\\\", \"\\\\\");\n+ EXTENSION_VALUE_SANITIZER_REVERSE_MAPPING.put(\"\\\\=\", \"=\");\n+ EXTENSION_VALUE_SANITIZER_REVERSE_MAPPING.put(\"\\\\\\n\", \"\\n\");\n+ EXTENSION_VALUE_SANITIZER_REVERSE_MAPPING.put(\"\\\\\\r\", \"\\r\");\n+\n+ FIELD_MAPPING.put(\"app\", \"network.protocol\");\n+ FIELD_MAPPING.put(\"in\", \"source.bytes\");\n+ FIELD_MAPPING.put(\"out\", \"destination.bytes\");\n+ FIELD_MAPPING.put(\"dst\", \"destination.ip\");\n+ FIELD_MAPPING.put(\"dlat\", \"destination.geo.location.lat\");\n+ FIELD_MAPPING.put(\"dlong\", \"destination.geo.location.lon\");\n+ FIELD_MAPPING.put(\"dhost\", \"destination.domain\");\n+ FIELD_MAPPING.put(\"dmac\", \"destination.mac\");\n+ FIELD_MAPPING.put(\"dntdom\", \"destination.registered_domain\");\n+ FIELD_MAPPING.put(\"dpt\", \"destination.port\");\n+ FIELD_MAPPING.put(\"dpid\", \"destination.process.pid\");\n+ FIELD_MAPPING.put(\"dproc\", \"destination.process.name\");\n+ FIELD_MAPPING.put(\"duid\", \"destination.user.id\");\n+ FIELD_MAPPING.put(\"duser\", \"destination.user.name\");\n+ FIELD_MAPPING.put(\"dpriv\", \"destination.user.group.name\");\n+ FIELD_MAPPING.put(\"act\", \"event.action\");\n+ FIELD_MAPPING.put(\"dvc\", \"observer.ip\");\n+ FIELD_MAPPING.put(\"deviceDirection\", \"network.direction\");\n+ FIELD_MAPPING.put(\"deviceDnsDomain\", \"observer.registered_domain\");\n+ FIELD_MAPPING.put(\"deviceExternalId\", \"observer.name\");\n+ FIELD_MAPPING.put(\"deviceFacility\", \"log.syslog.facility.code\");\n+ FIELD_MAPPING.put(\"dvchost\", \"observer.hostname\");\n+ FIELD_MAPPING.put(\"deviceInboundInterface\", \"observer.ingress.interface.name\");\n+ FIELD_MAPPING.put(\"dvcmac\", \"observer.mac\");\n+ FIELD_MAPPING.put(\"deviceOutboundInterface\", \"observer.egress.interface.name\");\n+ FIELD_MAPPING.put(\"dvcpid\", \"process.pid\");\n+ FIELD_MAPPING.put(\"deviceProcessName\", \"process.name\");\n+ FIELD_MAPPING.put(\"rt\", \"@timestamp\");\n+ FIELD_MAPPING.put(\"dtz\", \"event.timezone\");\n+ FIELD_MAPPING.put(\"deviceTranslatedAddress\", \"host.nat.ip\");\n+ FIELD_MAPPING.put(\"device.version\", \"observer.version\");\n+ FIELD_MAPPING.put(\"deviceVersion\", \"observer.version\");\n+ FIELD_MAPPING.put(\"device.product\", \"observer.product\");\n+ FIELD_MAPPING.put(\"deviceProduct\", \"observer.product\");\n+ FIELD_MAPPING.put(\"device.event_class_id\", \"event.code\");\n+ FIELD_MAPPING.put(\"device.vendor\", \"observer.vendor\");\n+ FIELD_MAPPING.put(\"deviceVendor\", \"observer.vendor\");\n+ FIELD_MAPPING.put(\"end\", \"event.end\");\n+ FIELD_MAPPING.put(\"eventId\", \"event.id\");\n+ FIELD_MAPPING.put(\"outcome\", \"event.outcome\");\n+ FIELD_MAPPING.put(\"fileCreateTime\", \"file.created\");\n+ FIELD_MAPPING.put(\"fileHash\", \"file.hash\");\n+ FIELD_MAPPING.put(\"fileId\", \"file.inode\");\n+ FIELD_MAPPING.put(\"fileModificationTime\", \"file.mtime\");\n+ FIELD_MAPPING.put(\"fname\", \"file.name\");\n+ FIELD_MAPPING.put(\"filePath\", \"file.path\");\n+ FIELD_MAPPING.put(\"filePermission\", \"file.group\");\n+ FIELD_MAPPING.put(\"fsize\", \"file.size\");\n+ FIELD_MAPPING.put(\"fileType\", \"file.extension\");\n+ FIELD_MAPPING.put(\"mrt\", \"event.ingested\");\n+ FIELD_MAPPING.put(\"msg\", \"message\");\n+ FIELD_MAPPING.put(\"reason\", \"event.reason\");\n+ FIELD_MAPPING.put(\"requestClientApplication\", \"user_agent.original\");\n+ FIELD_MAPPING.put(\"requestContext\", \"http.request.referrer\");\n+ FIELD_MAPPING.put(\"requestMethod\", \"http.request.method\");\n+ FIELD_MAPPING.put(\"request\", \"url.original\");\n+ FIELD_MAPPING.put(\"src\", \"source.ip\");\n+ FIELD_MAPPING.put(\"sourceDnsDomain\", \"source.registered_domain\");\n+ FIELD_MAPPING.put(\"slat\", \"source.geo.location.lat\");\n+ FIELD_MAPPING.put(\"slong\", \"source.geo.location.lon\");\n+ FIELD_MAPPING.put(\"shost\", \"source.domain\");\n+ FIELD_MAPPING.put(\"smac\", \"source.mac\");\n+ FIELD_MAPPING.put(\"sntdom\", \"source.registered_domain\");\n+ FIELD_MAPPING.put(\"spt\", \"source.port\");\n+ FIELD_MAPPING.put(\"spid\", \"source.process.pid\");\n+ FIELD_MAPPING.put(\"sproc\", \"source.process.name\");\n+ FIELD_MAPPING.put(\"sourceServiceName\", \"source.service.name\");\n+ FIELD_MAPPING.put(\"suser\", \"source.user.name\");\n+ FIELD_MAPPING.put(\"start\", \"event.start\");\n+ FIELD_MAPPING.put(\"proto\", \"network.transport\");\n+ // Add more mappings as needed\n+ }\n+\n+ void process(String cefString, String targetField) {\n+ List headerFields = new ArrayList<>();\n+ Matcher headerMatcher = HEADER_NEXT_FIELD_PATTERN.matcher(cefString);\n+ int extensionStart = 0;\n+\n+ for (int i = 0; i < 7 && headerMatcher.find(); i++) {\n+ String field = headerMatcher.group(1);\n+ field = HEADER_ESCAPE_CAPTURE.matcher(field).replaceAll(\"$1\");\n+ headerFields.add(field);\n+ extensionStart = headerMatcher.end();\n+ }\n+\n+ if (headerFields.size() > 0 && headerFields.get(0).startsWith(\"CEF:\")) {\n+ CEFEvent event = new CEFEvent();\n+ // Add error message if there are not enough header fields\n+ if (headerFields.size() != 7) {\n+ ingestDocument.appendFieldValue(\"error.message\", ERROR_MESSAGE_INCOMPLETE_CEF_HEADER);\n+ }\n+ for (int i = 0; i < headerFields.size(); i++) {\n+ switch (i) {\n+ case 0:\n+ event.setVersion(headerFields.get(0).substring(4));\n+ break;\n+ case 1:\n+ event.setDeviceVendor(headerFields.get(1));\n+ ingestDocument.setFieldValue(\"observer.vendor\", headerFields.get(1));\n+ break;\n+ case 2:\n+ event.setDeviceProduct(headerFields.get(2));\n+ ingestDocument.setFieldValue(\"observer.product\", headerFields.get(2));\n+ break;\n+ case 3:\n+ event.setDeviceVersion(headerFields.get(3));\n+ ingestDocument.setFieldValue(\"observer.version\", headerFields.get(3));\n+ break;\n+ case 4:\n+ event.setDeviceEventClassId(headerFields.get(4));\n+ ingestDocument.setFieldValue(\"event.code\", headerFields.get(4));\n+ break;\n+ case 5:\n+ event.setName(headerFields.get(5));\n+ break;\n+ case 6:\n+ event.setSeverity(headerFields.get(6));\n+ break;\n+ }\n+ }\n+ String extensionString = cefString.substring(extensionStart);\n+ Map extensions = parseExtensions(extensionString);\n+\n+ if (removeEmptyValue) {\n+ removeEmptyValue(extensions);\n+ }\n+ event.setExtensions(extensions);\n+\n+ // Translate possible ECS fields and remove them from extensions\n+ Map translatedFields = extensions.entrySet()\n+ .stream()\n+ .filter(entry -> FIELD_MAPPING.containsKey(entry.getKey()))\n+ .collect(Collectors.toMap(entry -> FIELD_MAPPING.get(entry.getKey()), entry -> {\n+ Class fieldType = ECSFieldWithType.getFieldType(FIELD_MAPPING.get(entry.getKey()));\n+ return convertValueToType(entry.getValue(), fieldType);\n+ }));\n+\n+ // Remove the translated entries from extensions\n+ event.removeMappedExtensions();\n+\n+ ingestDocument.setFieldValue(targetField, event.toObject());\n+ // Add ECS translations to the root of the document\n+ if (translatedFields.isEmpty() == false) {\n+ translatedFields.forEach(ingestDocument::setFieldValue);\n+ }\n+ } else {\n+ throw new IllegalArgumentException(\"Invalid CEF format\");\n+ }\n+ }\n+\n+ private Object convertValueToType(String value, Class type) {\n+ if (type == Long.class) {\n+ return Long.parseLong(value);\n+ } else if (type == Double.class) {\n+ return Double.parseDouble(value);\n+ } else if (type == Integer.class) {\n+ return Integer.parseInt(value);\n+ } else {\n+ return value; // Default to String\n+ }\n+ }\n+\n+ private Map parseExtensions(String extensionString) {\n+ Map extensions = new HashMap<>();\n+ Matcher matcher = EXTENSION_NEXT_KEY_VALUE_PATTERN.matcher(extensionString);\n+ int lastEnd = 0;\n+ while (matcher.find()) {\n+ String key = matcher.group(1);\n+ String value = matcher.group(2);\n+\n+ // Convert extension field name to strict legal field_reference\n+ if (key.endsWith(\"]\")) {\n+ key = convertArrayLikeKey(key);\n+ }\n+\n+ extensions.put(key, desanitizeExtensionVal(value.trim()));\n+ lastEnd = matcher.end();\n+ }\n+ // If there's any remaining unparsed content, throw an exception\n+ if (lastEnd < extensionString.length()) {\n+ throw new IllegalArgumentException(\"Invalid extensions; keyless value present: \" + extensionString.substring(lastEnd));\n+ }\n+ return extensions;\n+ }\n+\n+ private void removeEmptyValue(Map map) {\n+ map.entrySet().removeIf(entry -> entry.getValue().isEmpty());\n+ map.entrySet().removeIf(entry -> entry.getValue() == null);\n+ }\n+\n+ private String convertArrayLikeKey(String key) {\n+ Matcher matcher = EXTENSION_KEY_ARRAY_CAPTURE.matcher(key);\n+ if (matcher.matches()) {\n+ return \"[\" + matcher.group(1) + \"]\" + matcher.group(2);\n+ }\n+ return key;\n+ }\n+\n+ public static String desanitizeExtensionVal(String value) {\n+ String desanitized = value;\n+ for (Map.Entry entry : EXTENSION_VALUE_SANITIZER_REVERSE_MAPPING.entrySet()) {\n+ desanitized = desanitized.replace(entry.getKey(), entry.getValue());\n+ }\n+ return desanitized;\n+ }\n+\n+ public static class CEFEvent {\n+ private String version;\n+ private String deviceVendor;\n+ private String deviceProduct;\n+ private String deviceVersion;\n+ private String deviceEventClassId;\n+ private String name;\n+ private String severity;\n+ private final Map extensions = new HashMap<>();\n+ private Map translatedFields;\n+\n+ // Getters and setters for all fields\n+ public String getVersion() {\n+ return version;\n+ }\n+\n+ public void setVersion(String version) {\n+ this.version = version;\n+ }\n+\n+ public String getDeviceVendor() {\n+ return deviceVendor;\n+ }\n+\n+ public void setDeviceVendor(String deviceVendor) {\n+ this.deviceVendor = deviceVendor;\n+ }\n+\n+ public String getDeviceProduct() {\n+ return deviceProduct;\n+ }\n+\n+ public void setDeviceProduct(String deviceProduct) {\n+ this.deviceProduct = deviceProduct;\n+ }\n+\n+ public String getDeviceVersion() {\n+ return deviceVersion;\n+ }\n+\n+ public void setDeviceVersion(String deviceVersion) {\n+ this.deviceVersion = deviceVersion;\n+ }\n+\n+ public String getDeviceEventClassId() {\n+ return deviceEventClassId;\n+ }\n+\n+ public void setDeviceEventClassId(String deviceEventClassId) {\n+ this.deviceEventClassId = deviceEventClassId;\n+ }\n+\n+ public String getName() {\n+ return name;\n+ }\n+\n+ public void setName(String name) {\n+ this.name = name;\n+ }\n+\n+ public String getSeverity() {\n+ return severity;\n+ }\n+\n+ public void setSeverity(String severity) {\n+ this.severity = severity;\n+ }\n+\n+ public Map getExtensions() {\n+ return extensions;\n+ }\n+\n+ public void setExtensions(Map extensions) {\n+ this.extensions.putAll(extensions);\n+ }\n+\n+ public void addExtension(String key, String extension) {\n+ this.extensions.put(key, extension);\n+ }\n+\n+ public void removeMappedExtensions() {\n+ this.extensions.entrySet().removeIf(entry -> FIELD_MAPPING.containsKey(entry.getKey()));\n+ this.extensions.entrySet().removeIf(entry -> FIELD_MAPPING.containsValue(entry.getKey()));", - "comment_created_at": "2025-03-25T15:43:23+00:00", - "comment_author": "bhapas", - "comment_body": "Ohh yeah I get your point now.. I was only looking at `containsKey` thing. Yeah the `containsValue` iterates over the N entries each time. I ll figure out a different way for this.", - "pr_file_module": null - }, - { - "comment_id": "2012455120", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 122491, - "pr_file": "modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/CefParser.java", - "discussion_id": "2012358741", - "commented_code": "@@ -0,0 +1,449 @@\n+/*\n+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one\n+ * or more contributor license agreements. Licensed under the \"Elastic License\n+ * 2.0\", the \"GNU Affero General Public License v3.0 only\", and the \"Server Side\n+ * Public License v 1\"; you may not use this file except in compliance with, at\n+ * your election, the \"Elastic License 2.0\", the \"GNU Affero General Public\n+ * License v3.0 only\", or the \"Server Side Public License, v 1\".\n+ */\n+package org.elasticsearch.ingest.common;\n+\n+import org.elasticsearch.ingest.IngestDocument;\n+\n+import java.util.ArrayList;\n+import java.util.HashMap;\n+import java.util.List;\n+import java.util.Map;\n+import java.util.regex.Matcher;\n+import java.util.regex.Pattern;\n+import java.util.stream.Collectors;\n+\n+final class CefParser {\n+\n+ private final IngestDocument ingestDocument;\n+ private final boolean removeEmptyValue;\n+\n+ CefParser(IngestDocument ingestDocument, boolean removeEmptyValue) {\n+ this.ingestDocument = ingestDocument;\n+ this.removeEmptyValue = removeEmptyValue;\n+ }\n+\n+ private static final Pattern HEADER_PATTERN = Pattern.compile(\"(?:\\\\\\\\\\\\||\\\\\\\\\\\\\\\\|[^|])*?\");\n+ private static final Pattern HEADER_NEXT_FIELD_PATTERN = Pattern.compile(\"(\" + HEADER_PATTERN.pattern() + \")\\\\|\");\n+ private static final Pattern HEADER_ESCAPE_CAPTURE = Pattern.compile(\"\\\\\\\\([\\\\\\\\|])\");\n+\n+ // New patterns for extension parsing\n+ private static final String EXTENSION_KEY_PATTERN = \"(?:[\\\\w-]+(?:\\\\.[^\\\\.=\\\\s\\\\|\\\\\\\\\\\\[\\\\]]+)*(?:\\\\[[0-9]+\\\\])?(?==))\";\n+ private static final Pattern EXTENSION_KEY_ARRAY_CAPTURE = Pattern.compile(\"^([^\\\\[\\\\]]+)((?:\\\\[[0-9]+\\\\])+)$\");\n+ private static final String EXTENSION_VALUE_PATTERN = \"(?:\\\\S|\\\\s(?!\" + EXTENSION_KEY_PATTERN + \"=))*\";\n+ private static final Pattern EXTENSION_NEXT_KEY_VALUE_PATTERN = Pattern.compile(\n+ \"(\" + EXTENSION_KEY_PATTERN + \")=(\" + EXTENSION_VALUE_PATTERN + \")(?:\\\\s+|$)\"\n+ );\n+ private static final Map EXTENSION_VALUE_SANITIZER_REVERSE_MAPPING = new HashMap<>();\n+ private static final Map FIELD_MAPPING = new HashMap<>();\n+ private static final String ERROR_MESSAGE_INCOMPLETE_CEF_HEADER = \"incomplete CEF header\";\n+\n+ static {\n+ EXTENSION_VALUE_SANITIZER_REVERSE_MAPPING.put(\"\\\\\\\\\", \"\\\\\");\n+ EXTENSION_VALUE_SANITIZER_REVERSE_MAPPING.put(\"\\\\=\", \"=\");\n+ EXTENSION_VALUE_SANITIZER_REVERSE_MAPPING.put(\"\\\\\\n\", \"\\n\");\n+ EXTENSION_VALUE_SANITIZER_REVERSE_MAPPING.put(\"\\\\\\r\", \"\\r\");\n+\n+ FIELD_MAPPING.put(\"app\", \"network.protocol\");\n+ FIELD_MAPPING.put(\"in\", \"source.bytes\");\n+ FIELD_MAPPING.put(\"out\", \"destination.bytes\");\n+ FIELD_MAPPING.put(\"dst\", \"destination.ip\");\n+ FIELD_MAPPING.put(\"dlat\", \"destination.geo.location.lat\");\n+ FIELD_MAPPING.put(\"dlong\", \"destination.geo.location.lon\");\n+ FIELD_MAPPING.put(\"dhost\", \"destination.domain\");\n+ FIELD_MAPPING.put(\"dmac\", \"destination.mac\");\n+ FIELD_MAPPING.put(\"dntdom\", \"destination.registered_domain\");\n+ FIELD_MAPPING.put(\"dpt\", \"destination.port\");\n+ FIELD_MAPPING.put(\"dpid\", \"destination.process.pid\");\n+ FIELD_MAPPING.put(\"dproc\", \"destination.process.name\");\n+ FIELD_MAPPING.put(\"duid\", \"destination.user.id\");\n+ FIELD_MAPPING.put(\"duser\", \"destination.user.name\");\n+ FIELD_MAPPING.put(\"dpriv\", \"destination.user.group.name\");\n+ FIELD_MAPPING.put(\"act\", \"event.action\");\n+ FIELD_MAPPING.put(\"dvc\", \"observer.ip\");\n+ FIELD_MAPPING.put(\"deviceDirection\", \"network.direction\");\n+ FIELD_MAPPING.put(\"deviceDnsDomain\", \"observer.registered_domain\");\n+ FIELD_MAPPING.put(\"deviceExternalId\", \"observer.name\");\n+ FIELD_MAPPING.put(\"deviceFacility\", \"log.syslog.facility.code\");\n+ FIELD_MAPPING.put(\"dvchost\", \"observer.hostname\");\n+ FIELD_MAPPING.put(\"deviceInboundInterface\", \"observer.ingress.interface.name\");\n+ FIELD_MAPPING.put(\"dvcmac\", \"observer.mac\");\n+ FIELD_MAPPING.put(\"deviceOutboundInterface\", \"observer.egress.interface.name\");\n+ FIELD_MAPPING.put(\"dvcpid\", \"process.pid\");\n+ FIELD_MAPPING.put(\"deviceProcessName\", \"process.name\");\n+ FIELD_MAPPING.put(\"rt\", \"@timestamp\");\n+ FIELD_MAPPING.put(\"dtz\", \"event.timezone\");\n+ FIELD_MAPPING.put(\"deviceTranslatedAddress\", \"host.nat.ip\");\n+ FIELD_MAPPING.put(\"device.version\", \"observer.version\");\n+ FIELD_MAPPING.put(\"deviceVersion\", \"observer.version\");\n+ FIELD_MAPPING.put(\"device.product\", \"observer.product\");\n+ FIELD_MAPPING.put(\"deviceProduct\", \"observer.product\");\n+ FIELD_MAPPING.put(\"device.event_class_id\", \"event.code\");\n+ FIELD_MAPPING.put(\"device.vendor\", \"observer.vendor\");\n+ FIELD_MAPPING.put(\"deviceVendor\", \"observer.vendor\");\n+ FIELD_MAPPING.put(\"end\", \"event.end\");\n+ FIELD_MAPPING.put(\"eventId\", \"event.id\");\n+ FIELD_MAPPING.put(\"outcome\", \"event.outcome\");\n+ FIELD_MAPPING.put(\"fileCreateTime\", \"file.created\");\n+ FIELD_MAPPING.put(\"fileHash\", \"file.hash\");\n+ FIELD_MAPPING.put(\"fileId\", \"file.inode\");\n+ FIELD_MAPPING.put(\"fileModificationTime\", \"file.mtime\");\n+ FIELD_MAPPING.put(\"fname\", \"file.name\");\n+ FIELD_MAPPING.put(\"filePath\", \"file.path\");\n+ FIELD_MAPPING.put(\"filePermission\", \"file.group\");\n+ FIELD_MAPPING.put(\"fsize\", \"file.size\");\n+ FIELD_MAPPING.put(\"fileType\", \"file.extension\");\n+ FIELD_MAPPING.put(\"mrt\", \"event.ingested\");\n+ FIELD_MAPPING.put(\"msg\", \"message\");\n+ FIELD_MAPPING.put(\"reason\", \"event.reason\");\n+ FIELD_MAPPING.put(\"requestClientApplication\", \"user_agent.original\");\n+ FIELD_MAPPING.put(\"requestContext\", \"http.request.referrer\");\n+ FIELD_MAPPING.put(\"requestMethod\", \"http.request.method\");\n+ FIELD_MAPPING.put(\"request\", \"url.original\");\n+ FIELD_MAPPING.put(\"src\", \"source.ip\");\n+ FIELD_MAPPING.put(\"sourceDnsDomain\", \"source.registered_domain\");\n+ FIELD_MAPPING.put(\"slat\", \"source.geo.location.lat\");\n+ FIELD_MAPPING.put(\"slong\", \"source.geo.location.lon\");\n+ FIELD_MAPPING.put(\"shost\", \"source.domain\");\n+ FIELD_MAPPING.put(\"smac\", \"source.mac\");\n+ FIELD_MAPPING.put(\"sntdom\", \"source.registered_domain\");\n+ FIELD_MAPPING.put(\"spt\", \"source.port\");\n+ FIELD_MAPPING.put(\"spid\", \"source.process.pid\");\n+ FIELD_MAPPING.put(\"sproc\", \"source.process.name\");\n+ FIELD_MAPPING.put(\"sourceServiceName\", \"source.service.name\");\n+ FIELD_MAPPING.put(\"suser\", \"source.user.name\");\n+ FIELD_MAPPING.put(\"start\", \"event.start\");\n+ FIELD_MAPPING.put(\"proto\", \"network.transport\");\n+ // Add more mappings as needed\n+ }\n+\n+ void process(String cefString, String targetField) {\n+ List headerFields = new ArrayList<>();\n+ Matcher headerMatcher = HEADER_NEXT_FIELD_PATTERN.matcher(cefString);\n+ int extensionStart = 0;\n+\n+ for (int i = 0; i < 7 && headerMatcher.find(); i++) {\n+ String field = headerMatcher.group(1);\n+ field = HEADER_ESCAPE_CAPTURE.matcher(field).replaceAll(\"$1\");\n+ headerFields.add(field);\n+ extensionStart = headerMatcher.end();\n+ }\n+\n+ if (headerFields.size() > 0 && headerFields.get(0).startsWith(\"CEF:\")) {\n+ CEFEvent event = new CEFEvent();\n+ // Add error message if there are not enough header fields\n+ if (headerFields.size() != 7) {\n+ ingestDocument.appendFieldValue(\"error.message\", ERROR_MESSAGE_INCOMPLETE_CEF_HEADER);\n+ }\n+ for (int i = 0; i < headerFields.size(); i++) {\n+ switch (i) {\n+ case 0:\n+ event.setVersion(headerFields.get(0).substring(4));\n+ break;\n+ case 1:\n+ event.setDeviceVendor(headerFields.get(1));\n+ ingestDocument.setFieldValue(\"observer.vendor\", headerFields.get(1));\n+ break;\n+ case 2:\n+ event.setDeviceProduct(headerFields.get(2));\n+ ingestDocument.setFieldValue(\"observer.product\", headerFields.get(2));\n+ break;\n+ case 3:\n+ event.setDeviceVersion(headerFields.get(3));\n+ ingestDocument.setFieldValue(\"observer.version\", headerFields.get(3));\n+ break;\n+ case 4:\n+ event.setDeviceEventClassId(headerFields.get(4));\n+ ingestDocument.setFieldValue(\"event.code\", headerFields.get(4));\n+ break;\n+ case 5:\n+ event.setName(headerFields.get(5));\n+ break;\n+ case 6:\n+ event.setSeverity(headerFields.get(6));\n+ break;\n+ }\n+ }\n+ String extensionString = cefString.substring(extensionStart);\n+ Map extensions = parseExtensions(extensionString);\n+\n+ if (removeEmptyValue) {\n+ removeEmptyValue(extensions);\n+ }\n+ event.setExtensions(extensions);\n+\n+ // Translate possible ECS fields and remove them from extensions\n+ Map translatedFields = extensions.entrySet()\n+ .stream()\n+ .filter(entry -> FIELD_MAPPING.containsKey(entry.getKey()))\n+ .collect(Collectors.toMap(entry -> FIELD_MAPPING.get(entry.getKey()), entry -> {\n+ Class fieldType = ECSFieldWithType.getFieldType(FIELD_MAPPING.get(entry.getKey()));\n+ return convertValueToType(entry.getValue(), fieldType);\n+ }));\n+\n+ // Remove the translated entries from extensions\n+ event.removeMappedExtensions();\n+\n+ ingestDocument.setFieldValue(targetField, event.toObject());\n+ // Add ECS translations to the root of the document\n+ if (translatedFields.isEmpty() == false) {\n+ translatedFields.forEach(ingestDocument::setFieldValue);\n+ }\n+ } else {\n+ throw new IllegalArgumentException(\"Invalid CEF format\");\n+ }\n+ }\n+\n+ private Object convertValueToType(String value, Class type) {\n+ if (type == Long.class) {\n+ return Long.parseLong(value);\n+ } else if (type == Double.class) {\n+ return Double.parseDouble(value);\n+ } else if (type == Integer.class) {\n+ return Integer.parseInt(value);\n+ } else {\n+ return value; // Default to String\n+ }\n+ }\n+\n+ private Map parseExtensions(String extensionString) {\n+ Map extensions = new HashMap<>();\n+ Matcher matcher = EXTENSION_NEXT_KEY_VALUE_PATTERN.matcher(extensionString);\n+ int lastEnd = 0;\n+ while (matcher.find()) {\n+ String key = matcher.group(1);\n+ String value = matcher.group(2);\n+\n+ // Convert extension field name to strict legal field_reference\n+ if (key.endsWith(\"]\")) {\n+ key = convertArrayLikeKey(key);\n+ }\n+\n+ extensions.put(key, desanitizeExtensionVal(value.trim()));\n+ lastEnd = matcher.end();\n+ }\n+ // If there's any remaining unparsed content, throw an exception\n+ if (lastEnd < extensionString.length()) {\n+ throw new IllegalArgumentException(\"Invalid extensions; keyless value present: \" + extensionString.substring(lastEnd));\n+ }\n+ return extensions;\n+ }\n+\n+ private void removeEmptyValue(Map map) {\n+ map.entrySet().removeIf(entry -> entry.getValue().isEmpty());\n+ map.entrySet().removeIf(entry -> entry.getValue() == null);\n+ }\n+\n+ private String convertArrayLikeKey(String key) {\n+ Matcher matcher = EXTENSION_KEY_ARRAY_CAPTURE.matcher(key);\n+ if (matcher.matches()) {\n+ return \"[\" + matcher.group(1) + \"]\" + matcher.group(2);\n+ }\n+ return key;\n+ }\n+\n+ public static String desanitizeExtensionVal(String value) {\n+ String desanitized = value;\n+ for (Map.Entry entry : EXTENSION_VALUE_SANITIZER_REVERSE_MAPPING.entrySet()) {\n+ desanitized = desanitized.replace(entry.getKey(), entry.getValue());\n+ }\n+ return desanitized;\n+ }\n+\n+ public static class CEFEvent {\n+ private String version;\n+ private String deviceVendor;\n+ private String deviceProduct;\n+ private String deviceVersion;\n+ private String deviceEventClassId;\n+ private String name;\n+ private String severity;\n+ private final Map extensions = new HashMap<>();\n+ private Map translatedFields;\n+\n+ // Getters and setters for all fields\n+ public String getVersion() {\n+ return version;\n+ }\n+\n+ public void setVersion(String version) {\n+ this.version = version;\n+ }\n+\n+ public String getDeviceVendor() {\n+ return deviceVendor;\n+ }\n+\n+ public void setDeviceVendor(String deviceVendor) {\n+ this.deviceVendor = deviceVendor;\n+ }\n+\n+ public String getDeviceProduct() {\n+ return deviceProduct;\n+ }\n+\n+ public void setDeviceProduct(String deviceProduct) {\n+ this.deviceProduct = deviceProduct;\n+ }\n+\n+ public String getDeviceVersion() {\n+ return deviceVersion;\n+ }\n+\n+ public void setDeviceVersion(String deviceVersion) {\n+ this.deviceVersion = deviceVersion;\n+ }\n+\n+ public String getDeviceEventClassId() {\n+ return deviceEventClassId;\n+ }\n+\n+ public void setDeviceEventClassId(String deviceEventClassId) {\n+ this.deviceEventClassId = deviceEventClassId;\n+ }\n+\n+ public String getName() {\n+ return name;\n+ }\n+\n+ public void setName(String name) {\n+ this.name = name;\n+ }\n+\n+ public String getSeverity() {\n+ return severity;\n+ }\n+\n+ public void setSeverity(String severity) {\n+ this.severity = severity;\n+ }\n+\n+ public Map getExtensions() {\n+ return extensions;\n+ }\n+\n+ public void setExtensions(Map extensions) {\n+ this.extensions.putAll(extensions);\n+ }\n+\n+ public void addExtension(String key, String extension) {\n+ this.extensions.put(key, extension);\n+ }\n+\n+ public void removeMappedExtensions() {\n+ this.extensions.entrySet().removeIf(entry -> FIELD_MAPPING.containsKey(entry.getKey()));\n+ this.extensions.entrySet().removeIf(entry -> FIELD_MAPPING.containsValue(entry.getKey()));", - "comment_created_at": "2025-03-25T16:09:29+00:00", - "comment_author": "joegallo", - "comment_body": "If you smash all the keys and all the values into a single additional static `Set`, then you could do a `this.extensions.removeAll(thatStaticSet)`.", - "pr_file_module": null - }, - { - "comment_id": "2014378954", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 122491, - "pr_file": "modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/CefParser.java", - "discussion_id": "2012358741", - "commented_code": "@@ -0,0 +1,449 @@\n+/*\n+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one\n+ * or more contributor license agreements. Licensed under the \"Elastic License\n+ * 2.0\", the \"GNU Affero General Public License v3.0 only\", and the \"Server Side\n+ * Public License v 1\"; you may not use this file except in compliance with, at\n+ * your election, the \"Elastic License 2.0\", the \"GNU Affero General Public\n+ * License v3.0 only\", or the \"Server Side Public License, v 1\".\n+ */\n+package org.elasticsearch.ingest.common;\n+\n+import org.elasticsearch.ingest.IngestDocument;\n+\n+import java.util.ArrayList;\n+import java.util.HashMap;\n+import java.util.List;\n+import java.util.Map;\n+import java.util.regex.Matcher;\n+import java.util.regex.Pattern;\n+import java.util.stream.Collectors;\n+\n+final class CefParser {\n+\n+ private final IngestDocument ingestDocument;\n+ private final boolean removeEmptyValue;\n+\n+ CefParser(IngestDocument ingestDocument, boolean removeEmptyValue) {\n+ this.ingestDocument = ingestDocument;\n+ this.removeEmptyValue = removeEmptyValue;\n+ }\n+\n+ private static final Pattern HEADER_PATTERN = Pattern.compile(\"(?:\\\\\\\\\\\\||\\\\\\\\\\\\\\\\|[^|])*?\");\n+ private static final Pattern HEADER_NEXT_FIELD_PATTERN = Pattern.compile(\"(\" + HEADER_PATTERN.pattern() + \")\\\\|\");\n+ private static final Pattern HEADER_ESCAPE_CAPTURE = Pattern.compile(\"\\\\\\\\([\\\\\\\\|])\");\n+\n+ // New patterns for extension parsing\n+ private static final String EXTENSION_KEY_PATTERN = \"(?:[\\\\w-]+(?:\\\\.[^\\\\.=\\\\s\\\\|\\\\\\\\\\\\[\\\\]]+)*(?:\\\\[[0-9]+\\\\])?(?==))\";\n+ private static final Pattern EXTENSION_KEY_ARRAY_CAPTURE = Pattern.compile(\"^([^\\\\[\\\\]]+)((?:\\\\[[0-9]+\\\\])+)$\");\n+ private static final String EXTENSION_VALUE_PATTERN = \"(?:\\\\S|\\\\s(?!\" + EXTENSION_KEY_PATTERN + \"=))*\";\n+ private static final Pattern EXTENSION_NEXT_KEY_VALUE_PATTERN = Pattern.compile(\n+ \"(\" + EXTENSION_KEY_PATTERN + \")=(\" + EXTENSION_VALUE_PATTERN + \")(?:\\\\s+|$)\"\n+ );\n+ private static final Map EXTENSION_VALUE_SANITIZER_REVERSE_MAPPING = new HashMap<>();\n+ private static final Map FIELD_MAPPING = new HashMap<>();\n+ private static final String ERROR_MESSAGE_INCOMPLETE_CEF_HEADER = \"incomplete CEF header\";\n+\n+ static {\n+ EXTENSION_VALUE_SANITIZER_REVERSE_MAPPING.put(\"\\\\\\\\\", \"\\\\\");\n+ EXTENSION_VALUE_SANITIZER_REVERSE_MAPPING.put(\"\\\\=\", \"=\");\n+ EXTENSION_VALUE_SANITIZER_REVERSE_MAPPING.put(\"\\\\\\n\", \"\\n\");\n+ EXTENSION_VALUE_SANITIZER_REVERSE_MAPPING.put(\"\\\\\\r\", \"\\r\");\n+\n+ FIELD_MAPPING.put(\"app\", \"network.protocol\");\n+ FIELD_MAPPING.put(\"in\", \"source.bytes\");\n+ FIELD_MAPPING.put(\"out\", \"destination.bytes\");\n+ FIELD_MAPPING.put(\"dst\", \"destination.ip\");\n+ FIELD_MAPPING.put(\"dlat\", \"destination.geo.location.lat\");\n+ FIELD_MAPPING.put(\"dlong\", \"destination.geo.location.lon\");\n+ FIELD_MAPPING.put(\"dhost\", \"destination.domain\");\n+ FIELD_MAPPING.put(\"dmac\", \"destination.mac\");\n+ FIELD_MAPPING.put(\"dntdom\", \"destination.registered_domain\");\n+ FIELD_MAPPING.put(\"dpt\", \"destination.port\");\n+ FIELD_MAPPING.put(\"dpid\", \"destination.process.pid\");\n+ FIELD_MAPPING.put(\"dproc\", \"destination.process.name\");\n+ FIELD_MAPPING.put(\"duid\", \"destination.user.id\");\n+ FIELD_MAPPING.put(\"duser\", \"destination.user.name\");\n+ FIELD_MAPPING.put(\"dpriv\", \"destination.user.group.name\");\n+ FIELD_MAPPING.put(\"act\", \"event.action\");\n+ FIELD_MAPPING.put(\"dvc\", \"observer.ip\");\n+ FIELD_MAPPING.put(\"deviceDirection\", \"network.direction\");\n+ FIELD_MAPPING.put(\"deviceDnsDomain\", \"observer.registered_domain\");\n+ FIELD_MAPPING.put(\"deviceExternalId\", \"observer.name\");\n+ FIELD_MAPPING.put(\"deviceFacility\", \"log.syslog.facility.code\");\n+ FIELD_MAPPING.put(\"dvchost\", \"observer.hostname\");\n+ FIELD_MAPPING.put(\"deviceInboundInterface\", \"observer.ingress.interface.name\");\n+ FIELD_MAPPING.put(\"dvcmac\", \"observer.mac\");\n+ FIELD_MAPPING.put(\"deviceOutboundInterface\", \"observer.egress.interface.name\");\n+ FIELD_MAPPING.put(\"dvcpid\", \"process.pid\");\n+ FIELD_MAPPING.put(\"deviceProcessName\", \"process.name\");\n+ FIELD_MAPPING.put(\"rt\", \"@timestamp\");\n+ FIELD_MAPPING.put(\"dtz\", \"event.timezone\");\n+ FIELD_MAPPING.put(\"deviceTranslatedAddress\", \"host.nat.ip\");\n+ FIELD_MAPPING.put(\"device.version\", \"observer.version\");\n+ FIELD_MAPPING.put(\"deviceVersion\", \"observer.version\");\n+ FIELD_MAPPING.put(\"device.product\", \"observer.product\");\n+ FIELD_MAPPING.put(\"deviceProduct\", \"observer.product\");\n+ FIELD_MAPPING.put(\"device.event_class_id\", \"event.code\");\n+ FIELD_MAPPING.put(\"device.vendor\", \"observer.vendor\");\n+ FIELD_MAPPING.put(\"deviceVendor\", \"observer.vendor\");\n+ FIELD_MAPPING.put(\"end\", \"event.end\");\n+ FIELD_MAPPING.put(\"eventId\", \"event.id\");\n+ FIELD_MAPPING.put(\"outcome\", \"event.outcome\");\n+ FIELD_MAPPING.put(\"fileCreateTime\", \"file.created\");\n+ FIELD_MAPPING.put(\"fileHash\", \"file.hash\");\n+ FIELD_MAPPING.put(\"fileId\", \"file.inode\");\n+ FIELD_MAPPING.put(\"fileModificationTime\", \"file.mtime\");\n+ FIELD_MAPPING.put(\"fname\", \"file.name\");\n+ FIELD_MAPPING.put(\"filePath\", \"file.path\");\n+ FIELD_MAPPING.put(\"filePermission\", \"file.group\");\n+ FIELD_MAPPING.put(\"fsize\", \"file.size\");\n+ FIELD_MAPPING.put(\"fileType\", \"file.extension\");\n+ FIELD_MAPPING.put(\"mrt\", \"event.ingested\");\n+ FIELD_MAPPING.put(\"msg\", \"message\");\n+ FIELD_MAPPING.put(\"reason\", \"event.reason\");\n+ FIELD_MAPPING.put(\"requestClientApplication\", \"user_agent.original\");\n+ FIELD_MAPPING.put(\"requestContext\", \"http.request.referrer\");\n+ FIELD_MAPPING.put(\"requestMethod\", \"http.request.method\");\n+ FIELD_MAPPING.put(\"request\", \"url.original\");\n+ FIELD_MAPPING.put(\"src\", \"source.ip\");\n+ FIELD_MAPPING.put(\"sourceDnsDomain\", \"source.registered_domain\");\n+ FIELD_MAPPING.put(\"slat\", \"source.geo.location.lat\");\n+ FIELD_MAPPING.put(\"slong\", \"source.geo.location.lon\");\n+ FIELD_MAPPING.put(\"shost\", \"source.domain\");\n+ FIELD_MAPPING.put(\"smac\", \"source.mac\");\n+ FIELD_MAPPING.put(\"sntdom\", \"source.registered_domain\");\n+ FIELD_MAPPING.put(\"spt\", \"source.port\");\n+ FIELD_MAPPING.put(\"spid\", \"source.process.pid\");\n+ FIELD_MAPPING.put(\"sproc\", \"source.process.name\");\n+ FIELD_MAPPING.put(\"sourceServiceName\", \"source.service.name\");\n+ FIELD_MAPPING.put(\"suser\", \"source.user.name\");\n+ FIELD_MAPPING.put(\"start\", \"event.start\");\n+ FIELD_MAPPING.put(\"proto\", \"network.transport\");\n+ // Add more mappings as needed\n+ }\n+\n+ void process(String cefString, String targetField) {\n+ List headerFields = new ArrayList<>();\n+ Matcher headerMatcher = HEADER_NEXT_FIELD_PATTERN.matcher(cefString);\n+ int extensionStart = 0;\n+\n+ for (int i = 0; i < 7 && headerMatcher.find(); i++) {\n+ String field = headerMatcher.group(1);\n+ field = HEADER_ESCAPE_CAPTURE.matcher(field).replaceAll(\"$1\");\n+ headerFields.add(field);\n+ extensionStart = headerMatcher.end();\n+ }\n+\n+ if (headerFields.size() > 0 && headerFields.get(0).startsWith(\"CEF:\")) {\n+ CEFEvent event = new CEFEvent();\n+ // Add error message if there are not enough header fields\n+ if (headerFields.size() != 7) {\n+ ingestDocument.appendFieldValue(\"error.message\", ERROR_MESSAGE_INCOMPLETE_CEF_HEADER);\n+ }\n+ for (int i = 0; i < headerFields.size(); i++) {\n+ switch (i) {\n+ case 0:\n+ event.setVersion(headerFields.get(0).substring(4));\n+ break;\n+ case 1:\n+ event.setDeviceVendor(headerFields.get(1));\n+ ingestDocument.setFieldValue(\"observer.vendor\", headerFields.get(1));\n+ break;\n+ case 2:\n+ event.setDeviceProduct(headerFields.get(2));\n+ ingestDocument.setFieldValue(\"observer.product\", headerFields.get(2));\n+ break;\n+ case 3:\n+ event.setDeviceVersion(headerFields.get(3));\n+ ingestDocument.setFieldValue(\"observer.version\", headerFields.get(3));\n+ break;\n+ case 4:\n+ event.setDeviceEventClassId(headerFields.get(4));\n+ ingestDocument.setFieldValue(\"event.code\", headerFields.get(4));\n+ break;\n+ case 5:\n+ event.setName(headerFields.get(5));\n+ break;\n+ case 6:\n+ event.setSeverity(headerFields.get(6));\n+ break;\n+ }\n+ }\n+ String extensionString = cefString.substring(extensionStart);\n+ Map extensions = parseExtensions(extensionString);\n+\n+ if (removeEmptyValue) {\n+ removeEmptyValue(extensions);\n+ }\n+ event.setExtensions(extensions);\n+\n+ // Translate possible ECS fields and remove them from extensions\n+ Map translatedFields = extensions.entrySet()\n+ .stream()\n+ .filter(entry -> FIELD_MAPPING.containsKey(entry.getKey()))\n+ .collect(Collectors.toMap(entry -> FIELD_MAPPING.get(entry.getKey()), entry -> {\n+ Class fieldType = ECSFieldWithType.getFieldType(FIELD_MAPPING.get(entry.getKey()));\n+ return convertValueToType(entry.getValue(), fieldType);\n+ }));\n+\n+ // Remove the translated entries from extensions\n+ event.removeMappedExtensions();\n+\n+ ingestDocument.setFieldValue(targetField, event.toObject());\n+ // Add ECS translations to the root of the document\n+ if (translatedFields.isEmpty() == false) {\n+ translatedFields.forEach(ingestDocument::setFieldValue);\n+ }\n+ } else {\n+ throw new IllegalArgumentException(\"Invalid CEF format\");\n+ }\n+ }\n+\n+ private Object convertValueToType(String value, Class type) {\n+ if (type == Long.class) {\n+ return Long.parseLong(value);\n+ } else if (type == Double.class) {\n+ return Double.parseDouble(value);\n+ } else if (type == Integer.class) {\n+ return Integer.parseInt(value);\n+ } else {\n+ return value; // Default to String\n+ }\n+ }\n+\n+ private Map parseExtensions(String extensionString) {\n+ Map extensions = new HashMap<>();\n+ Matcher matcher = EXTENSION_NEXT_KEY_VALUE_PATTERN.matcher(extensionString);\n+ int lastEnd = 0;\n+ while (matcher.find()) {\n+ String key = matcher.group(1);\n+ String value = matcher.group(2);\n+\n+ // Convert extension field name to strict legal field_reference\n+ if (key.endsWith(\"]\")) {\n+ key = convertArrayLikeKey(key);\n+ }\n+\n+ extensions.put(key, desanitizeExtensionVal(value.trim()));\n+ lastEnd = matcher.end();\n+ }\n+ // If there's any remaining unparsed content, throw an exception\n+ if (lastEnd < extensionString.length()) {\n+ throw new IllegalArgumentException(\"Invalid extensions; keyless value present: \" + extensionString.substring(lastEnd));\n+ }\n+ return extensions;\n+ }\n+\n+ private void removeEmptyValue(Map map) {\n+ map.entrySet().removeIf(entry -> entry.getValue().isEmpty());\n+ map.entrySet().removeIf(entry -> entry.getValue() == null);\n+ }\n+\n+ private String convertArrayLikeKey(String key) {\n+ Matcher matcher = EXTENSION_KEY_ARRAY_CAPTURE.matcher(key);\n+ if (matcher.matches()) {\n+ return \"[\" + matcher.group(1) + \"]\" + matcher.group(2);\n+ }\n+ return key;\n+ }\n+\n+ public static String desanitizeExtensionVal(String value) {\n+ String desanitized = value;\n+ for (Map.Entry entry : EXTENSION_VALUE_SANITIZER_REVERSE_MAPPING.entrySet()) {\n+ desanitized = desanitized.replace(entry.getKey(), entry.getValue());\n+ }\n+ return desanitized;\n+ }\n+\n+ public static class CEFEvent {\n+ private String version;\n+ private String deviceVendor;\n+ private String deviceProduct;\n+ private String deviceVersion;\n+ private String deviceEventClassId;\n+ private String name;\n+ private String severity;\n+ private final Map extensions = new HashMap<>();\n+ private Map translatedFields;\n+\n+ // Getters and setters for all fields\n+ public String getVersion() {\n+ return version;\n+ }\n+\n+ public void setVersion(String version) {\n+ this.version = version;\n+ }\n+\n+ public String getDeviceVendor() {\n+ return deviceVendor;\n+ }\n+\n+ public void setDeviceVendor(String deviceVendor) {\n+ this.deviceVendor = deviceVendor;\n+ }\n+\n+ public String getDeviceProduct() {\n+ return deviceProduct;\n+ }\n+\n+ public void setDeviceProduct(String deviceProduct) {\n+ this.deviceProduct = deviceProduct;\n+ }\n+\n+ public String getDeviceVersion() {\n+ return deviceVersion;\n+ }\n+\n+ public void setDeviceVersion(String deviceVersion) {\n+ this.deviceVersion = deviceVersion;\n+ }\n+\n+ public String getDeviceEventClassId() {\n+ return deviceEventClassId;\n+ }\n+\n+ public void setDeviceEventClassId(String deviceEventClassId) {\n+ this.deviceEventClassId = deviceEventClassId;\n+ }\n+\n+ public String getName() {\n+ return name;\n+ }\n+\n+ public void setName(String name) {\n+ this.name = name;\n+ }\n+\n+ public String getSeverity() {\n+ return severity;\n+ }\n+\n+ public void setSeverity(String severity) {\n+ this.severity = severity;\n+ }\n+\n+ public Map getExtensions() {\n+ return extensions;\n+ }\n+\n+ public void setExtensions(Map extensions) {\n+ this.extensions.putAll(extensions);\n+ }\n+\n+ public void addExtension(String key, String extension) {\n+ this.extensions.put(key, extension);\n+ }\n+\n+ public void removeMappedExtensions() {\n+ this.extensions.entrySet().removeIf(entry -> FIELD_MAPPING.containsKey(entry.getKey()));\n+ this.extensions.entrySet().removeIf(entry -> FIELD_MAPPING.containsValue(entry.getKey()));", - "comment_created_at": "2025-03-26T15:00:44+00:00", - "comment_author": "bhapas", - "comment_body": "```\r\n public void removeMappedExtensions() {\r\n Set fieldMappingKeysAndValues = new HashSet<>();\r\n fieldMappingKeysAndValues.addAll(FIELD_MAPPING.keySet());\r\n fieldMappingKeysAndValues.addAll(FIELD_MAPPING.values());\r\n\r\n extensions.entrySet().removeIf(entry -> fieldMappingKeysAndValues.contains(entry.getKey()));\r\n }\r\n```", - "pr_file_module": null - }, - { - "comment_id": "2014448667", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 122491, - "pr_file": "modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/CefParser.java", - "discussion_id": "2012358741", - "commented_code": "@@ -0,0 +1,449 @@\n+/*\n+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one\n+ * or more contributor license agreements. Licensed under the \"Elastic License\n+ * 2.0\", the \"GNU Affero General Public License v3.0 only\", and the \"Server Side\n+ * Public License v 1\"; you may not use this file except in compliance with, at\n+ * your election, the \"Elastic License 2.0\", the \"GNU Affero General Public\n+ * License v3.0 only\", or the \"Server Side Public License, v 1\".\n+ */\n+package org.elasticsearch.ingest.common;\n+\n+import org.elasticsearch.ingest.IngestDocument;\n+\n+import java.util.ArrayList;\n+import java.util.HashMap;\n+import java.util.List;\n+import java.util.Map;\n+import java.util.regex.Matcher;\n+import java.util.regex.Pattern;\n+import java.util.stream.Collectors;\n+\n+final class CefParser {\n+\n+ private final IngestDocument ingestDocument;\n+ private final boolean removeEmptyValue;\n+\n+ CefParser(IngestDocument ingestDocument, boolean removeEmptyValue) {\n+ this.ingestDocument = ingestDocument;\n+ this.removeEmptyValue = removeEmptyValue;\n+ }\n+\n+ private static final Pattern HEADER_PATTERN = Pattern.compile(\"(?:\\\\\\\\\\\\||\\\\\\\\\\\\\\\\|[^|])*?\");\n+ private static final Pattern HEADER_NEXT_FIELD_PATTERN = Pattern.compile(\"(\" + HEADER_PATTERN.pattern() + \")\\\\|\");\n+ private static final Pattern HEADER_ESCAPE_CAPTURE = Pattern.compile(\"\\\\\\\\([\\\\\\\\|])\");\n+\n+ // New patterns for extension parsing\n+ private static final String EXTENSION_KEY_PATTERN = \"(?:[\\\\w-]+(?:\\\\.[^\\\\.=\\\\s\\\\|\\\\\\\\\\\\[\\\\]]+)*(?:\\\\[[0-9]+\\\\])?(?==))\";\n+ private static final Pattern EXTENSION_KEY_ARRAY_CAPTURE = Pattern.compile(\"^([^\\\\[\\\\]]+)((?:\\\\[[0-9]+\\\\])+)$\");\n+ private static final String EXTENSION_VALUE_PATTERN = \"(?:\\\\S|\\\\s(?!\" + EXTENSION_KEY_PATTERN + \"=))*\";\n+ private static final Pattern EXTENSION_NEXT_KEY_VALUE_PATTERN = Pattern.compile(\n+ \"(\" + EXTENSION_KEY_PATTERN + \")=(\" + EXTENSION_VALUE_PATTERN + \")(?:\\\\s+|$)\"\n+ );\n+ private static final Map EXTENSION_VALUE_SANITIZER_REVERSE_MAPPING = new HashMap<>();\n+ private static final Map FIELD_MAPPING = new HashMap<>();\n+ private static final String ERROR_MESSAGE_INCOMPLETE_CEF_HEADER = \"incomplete CEF header\";\n+\n+ static {\n+ EXTENSION_VALUE_SANITIZER_REVERSE_MAPPING.put(\"\\\\\\\\\", \"\\\\\");\n+ EXTENSION_VALUE_SANITIZER_REVERSE_MAPPING.put(\"\\\\=\", \"=\");\n+ EXTENSION_VALUE_SANITIZER_REVERSE_MAPPING.put(\"\\\\\\n\", \"\\n\");\n+ EXTENSION_VALUE_SANITIZER_REVERSE_MAPPING.put(\"\\\\\\r\", \"\\r\");\n+\n+ FIELD_MAPPING.put(\"app\", \"network.protocol\");\n+ FIELD_MAPPING.put(\"in\", \"source.bytes\");\n+ FIELD_MAPPING.put(\"out\", \"destination.bytes\");\n+ FIELD_MAPPING.put(\"dst\", \"destination.ip\");\n+ FIELD_MAPPING.put(\"dlat\", \"destination.geo.location.lat\");\n+ FIELD_MAPPING.put(\"dlong\", \"destination.geo.location.lon\");\n+ FIELD_MAPPING.put(\"dhost\", \"destination.domain\");\n+ FIELD_MAPPING.put(\"dmac\", \"destination.mac\");\n+ FIELD_MAPPING.put(\"dntdom\", \"destination.registered_domain\");\n+ FIELD_MAPPING.put(\"dpt\", \"destination.port\");\n+ FIELD_MAPPING.put(\"dpid\", \"destination.process.pid\");\n+ FIELD_MAPPING.put(\"dproc\", \"destination.process.name\");\n+ FIELD_MAPPING.put(\"duid\", \"destination.user.id\");\n+ FIELD_MAPPING.put(\"duser\", \"destination.user.name\");\n+ FIELD_MAPPING.put(\"dpriv\", \"destination.user.group.name\");\n+ FIELD_MAPPING.put(\"act\", \"event.action\");\n+ FIELD_MAPPING.put(\"dvc\", \"observer.ip\");\n+ FIELD_MAPPING.put(\"deviceDirection\", \"network.direction\");\n+ FIELD_MAPPING.put(\"deviceDnsDomain\", \"observer.registered_domain\");\n+ FIELD_MAPPING.put(\"deviceExternalId\", \"observer.name\");\n+ FIELD_MAPPING.put(\"deviceFacility\", \"log.syslog.facility.code\");\n+ FIELD_MAPPING.put(\"dvchost\", \"observer.hostname\");\n+ FIELD_MAPPING.put(\"deviceInboundInterface\", \"observer.ingress.interface.name\");\n+ FIELD_MAPPING.put(\"dvcmac\", \"observer.mac\");\n+ FIELD_MAPPING.put(\"deviceOutboundInterface\", \"observer.egress.interface.name\");\n+ FIELD_MAPPING.put(\"dvcpid\", \"process.pid\");\n+ FIELD_MAPPING.put(\"deviceProcessName\", \"process.name\");\n+ FIELD_MAPPING.put(\"rt\", \"@timestamp\");\n+ FIELD_MAPPING.put(\"dtz\", \"event.timezone\");\n+ FIELD_MAPPING.put(\"deviceTranslatedAddress\", \"host.nat.ip\");\n+ FIELD_MAPPING.put(\"device.version\", \"observer.version\");\n+ FIELD_MAPPING.put(\"deviceVersion\", \"observer.version\");\n+ FIELD_MAPPING.put(\"device.product\", \"observer.product\");\n+ FIELD_MAPPING.put(\"deviceProduct\", \"observer.product\");\n+ FIELD_MAPPING.put(\"device.event_class_id\", \"event.code\");\n+ FIELD_MAPPING.put(\"device.vendor\", \"observer.vendor\");\n+ FIELD_MAPPING.put(\"deviceVendor\", \"observer.vendor\");\n+ FIELD_MAPPING.put(\"end\", \"event.end\");\n+ FIELD_MAPPING.put(\"eventId\", \"event.id\");\n+ FIELD_MAPPING.put(\"outcome\", \"event.outcome\");\n+ FIELD_MAPPING.put(\"fileCreateTime\", \"file.created\");\n+ FIELD_MAPPING.put(\"fileHash\", \"file.hash\");\n+ FIELD_MAPPING.put(\"fileId\", \"file.inode\");\n+ FIELD_MAPPING.put(\"fileModificationTime\", \"file.mtime\");\n+ FIELD_MAPPING.put(\"fname\", \"file.name\");\n+ FIELD_MAPPING.put(\"filePath\", \"file.path\");\n+ FIELD_MAPPING.put(\"filePermission\", \"file.group\");\n+ FIELD_MAPPING.put(\"fsize\", \"file.size\");\n+ FIELD_MAPPING.put(\"fileType\", \"file.extension\");\n+ FIELD_MAPPING.put(\"mrt\", \"event.ingested\");\n+ FIELD_MAPPING.put(\"msg\", \"message\");\n+ FIELD_MAPPING.put(\"reason\", \"event.reason\");\n+ FIELD_MAPPING.put(\"requestClientApplication\", \"user_agent.original\");\n+ FIELD_MAPPING.put(\"requestContext\", \"http.request.referrer\");\n+ FIELD_MAPPING.put(\"requestMethod\", \"http.request.method\");\n+ FIELD_MAPPING.put(\"request\", \"url.original\");\n+ FIELD_MAPPING.put(\"src\", \"source.ip\");\n+ FIELD_MAPPING.put(\"sourceDnsDomain\", \"source.registered_domain\");\n+ FIELD_MAPPING.put(\"slat\", \"source.geo.location.lat\");\n+ FIELD_MAPPING.put(\"slong\", \"source.geo.location.lon\");\n+ FIELD_MAPPING.put(\"shost\", \"source.domain\");\n+ FIELD_MAPPING.put(\"smac\", \"source.mac\");\n+ FIELD_MAPPING.put(\"sntdom\", \"source.registered_domain\");\n+ FIELD_MAPPING.put(\"spt\", \"source.port\");\n+ FIELD_MAPPING.put(\"spid\", \"source.process.pid\");\n+ FIELD_MAPPING.put(\"sproc\", \"source.process.name\");\n+ FIELD_MAPPING.put(\"sourceServiceName\", \"source.service.name\");\n+ FIELD_MAPPING.put(\"suser\", \"source.user.name\");\n+ FIELD_MAPPING.put(\"start\", \"event.start\");\n+ FIELD_MAPPING.put(\"proto\", \"network.transport\");\n+ // Add more mappings as needed\n+ }\n+\n+ void process(String cefString, String targetField) {\n+ List headerFields = new ArrayList<>();\n+ Matcher headerMatcher = HEADER_NEXT_FIELD_PATTERN.matcher(cefString);\n+ int extensionStart = 0;\n+\n+ for (int i = 0; i < 7 && headerMatcher.find(); i++) {\n+ String field = headerMatcher.group(1);\n+ field = HEADER_ESCAPE_CAPTURE.matcher(field).replaceAll(\"$1\");\n+ headerFields.add(field);\n+ extensionStart = headerMatcher.end();\n+ }\n+\n+ if (headerFields.size() > 0 && headerFields.get(0).startsWith(\"CEF:\")) {\n+ CEFEvent event = new CEFEvent();\n+ // Add error message if there are not enough header fields\n+ if (headerFields.size() != 7) {\n+ ingestDocument.appendFieldValue(\"error.message\", ERROR_MESSAGE_INCOMPLETE_CEF_HEADER);\n+ }\n+ for (int i = 0; i < headerFields.size(); i++) {\n+ switch (i) {\n+ case 0:\n+ event.setVersion(headerFields.get(0).substring(4));\n+ break;\n+ case 1:\n+ event.setDeviceVendor(headerFields.get(1));\n+ ingestDocument.setFieldValue(\"observer.vendor\", headerFields.get(1));\n+ break;\n+ case 2:\n+ event.setDeviceProduct(headerFields.get(2));\n+ ingestDocument.setFieldValue(\"observer.product\", headerFields.get(2));\n+ break;\n+ case 3:\n+ event.setDeviceVersion(headerFields.get(3));\n+ ingestDocument.setFieldValue(\"observer.version\", headerFields.get(3));\n+ break;\n+ case 4:\n+ event.setDeviceEventClassId(headerFields.get(4));\n+ ingestDocument.setFieldValue(\"event.code\", headerFields.get(4));\n+ break;\n+ case 5:\n+ event.setName(headerFields.get(5));\n+ break;\n+ case 6:\n+ event.setSeverity(headerFields.get(6));\n+ break;\n+ }\n+ }\n+ String extensionString = cefString.substring(extensionStart);\n+ Map extensions = parseExtensions(extensionString);\n+\n+ if (removeEmptyValue) {\n+ removeEmptyValue(extensions);\n+ }\n+ event.setExtensions(extensions);\n+\n+ // Translate possible ECS fields and remove them from extensions\n+ Map translatedFields = extensions.entrySet()\n+ .stream()\n+ .filter(entry -> FIELD_MAPPING.containsKey(entry.getKey()))\n+ .collect(Collectors.toMap(entry -> FIELD_MAPPING.get(entry.getKey()), entry -> {\n+ Class fieldType = ECSFieldWithType.getFieldType(FIELD_MAPPING.get(entry.getKey()));\n+ return convertValueToType(entry.getValue(), fieldType);\n+ }));\n+\n+ // Remove the translated entries from extensions\n+ event.removeMappedExtensions();\n+\n+ ingestDocument.setFieldValue(targetField, event.toObject());\n+ // Add ECS translations to the root of the document\n+ if (translatedFields.isEmpty() == false) {\n+ translatedFields.forEach(ingestDocument::setFieldValue);\n+ }\n+ } else {\n+ throw new IllegalArgumentException(\"Invalid CEF format\");\n+ }\n+ }\n+\n+ private Object convertValueToType(String value, Class type) {\n+ if (type == Long.class) {\n+ return Long.parseLong(value);\n+ } else if (type == Double.class) {\n+ return Double.parseDouble(value);\n+ } else if (type == Integer.class) {\n+ return Integer.parseInt(value);\n+ } else {\n+ return value; // Default to String\n+ }\n+ }\n+\n+ private Map parseExtensions(String extensionString) {\n+ Map extensions = new HashMap<>();\n+ Matcher matcher = EXTENSION_NEXT_KEY_VALUE_PATTERN.matcher(extensionString);\n+ int lastEnd = 0;\n+ while (matcher.find()) {\n+ String key = matcher.group(1);\n+ String value = matcher.group(2);\n+\n+ // Convert extension field name to strict legal field_reference\n+ if (key.endsWith(\"]\")) {\n+ key = convertArrayLikeKey(key);\n+ }\n+\n+ extensions.put(key, desanitizeExtensionVal(value.trim()));\n+ lastEnd = matcher.end();\n+ }\n+ // If there's any remaining unparsed content, throw an exception\n+ if (lastEnd < extensionString.length()) {\n+ throw new IllegalArgumentException(\"Invalid extensions; keyless value present: \" + extensionString.substring(lastEnd));\n+ }\n+ return extensions;\n+ }\n+\n+ private void removeEmptyValue(Map map) {\n+ map.entrySet().removeIf(entry -> entry.getValue().isEmpty());\n+ map.entrySet().removeIf(entry -> entry.getValue() == null);\n+ }\n+\n+ private String convertArrayLikeKey(String key) {\n+ Matcher matcher = EXTENSION_KEY_ARRAY_CAPTURE.matcher(key);\n+ if (matcher.matches()) {\n+ return \"[\" + matcher.group(1) + \"]\" + matcher.group(2);\n+ }\n+ return key;\n+ }\n+\n+ public static String desanitizeExtensionVal(String value) {\n+ String desanitized = value;\n+ for (Map.Entry entry : EXTENSION_VALUE_SANITIZER_REVERSE_MAPPING.entrySet()) {\n+ desanitized = desanitized.replace(entry.getKey(), entry.getValue());\n+ }\n+ return desanitized;\n+ }\n+\n+ public static class CEFEvent {\n+ private String version;\n+ private String deviceVendor;\n+ private String deviceProduct;\n+ private String deviceVersion;\n+ private String deviceEventClassId;\n+ private String name;\n+ private String severity;\n+ private final Map extensions = new HashMap<>();\n+ private Map translatedFields;\n+\n+ // Getters and setters for all fields\n+ public String getVersion() {\n+ return version;\n+ }\n+\n+ public void setVersion(String version) {\n+ this.version = version;\n+ }\n+\n+ public String getDeviceVendor() {\n+ return deviceVendor;\n+ }\n+\n+ public void setDeviceVendor(String deviceVendor) {\n+ this.deviceVendor = deviceVendor;\n+ }\n+\n+ public String getDeviceProduct() {\n+ return deviceProduct;\n+ }\n+\n+ public void setDeviceProduct(String deviceProduct) {\n+ this.deviceProduct = deviceProduct;\n+ }\n+\n+ public String getDeviceVersion() {\n+ return deviceVersion;\n+ }\n+\n+ public void setDeviceVersion(String deviceVersion) {\n+ this.deviceVersion = deviceVersion;\n+ }\n+\n+ public String getDeviceEventClassId() {\n+ return deviceEventClassId;\n+ }\n+\n+ public void setDeviceEventClassId(String deviceEventClassId) {\n+ this.deviceEventClassId = deviceEventClassId;\n+ }\n+\n+ public String getName() {\n+ return name;\n+ }\n+\n+ public void setName(String name) {\n+ this.name = name;\n+ }\n+\n+ public String getSeverity() {\n+ return severity;\n+ }\n+\n+ public void setSeverity(String severity) {\n+ this.severity = severity;\n+ }\n+\n+ public Map getExtensions() {\n+ return extensions;\n+ }\n+\n+ public void setExtensions(Map extensions) {\n+ this.extensions.putAll(extensions);\n+ }\n+\n+ public void addExtension(String key, String extension) {\n+ this.extensions.put(key, extension);\n+ }\n+\n+ public void removeMappedExtensions() {\n+ this.extensions.entrySet().removeIf(entry -> FIELD_MAPPING.containsKey(entry.getKey()));\n+ this.extensions.entrySet().removeIf(entry -> FIELD_MAPPING.containsValue(entry.getKey()));", - "comment_created_at": "2025-03-26T15:33:55+00:00", - "comment_author": "joegallo", - "comment_body": "No, do it staticly. There's no need to allocate a new HashSet umpteen times.", - "pr_file_module": null - }, - { - "comment_id": "2016204628", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 122491, - "pr_file": "modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/CefParser.java", - "discussion_id": "2012358741", - "commented_code": "@@ -0,0 +1,449 @@\n+/*\n+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one\n+ * or more contributor license agreements. Licensed under the \"Elastic License\n+ * 2.0\", the \"GNU Affero General Public License v3.0 only\", and the \"Server Side\n+ * Public License v 1\"; you may not use this file except in compliance with, at\n+ * your election, the \"Elastic License 2.0\", the \"GNU Affero General Public\n+ * License v3.0 only\", or the \"Server Side Public License, v 1\".\n+ */\n+package org.elasticsearch.ingest.common;\n+\n+import org.elasticsearch.ingest.IngestDocument;\n+\n+import java.util.ArrayList;\n+import java.util.HashMap;\n+import java.util.List;\n+import java.util.Map;\n+import java.util.regex.Matcher;\n+import java.util.regex.Pattern;\n+import java.util.stream.Collectors;\n+\n+final class CefParser {\n+\n+ private final IngestDocument ingestDocument;\n+ private final boolean removeEmptyValue;\n+\n+ CefParser(IngestDocument ingestDocument, boolean removeEmptyValue) {\n+ this.ingestDocument = ingestDocument;\n+ this.removeEmptyValue = removeEmptyValue;\n+ }\n+\n+ private static final Pattern HEADER_PATTERN = Pattern.compile(\"(?:\\\\\\\\\\\\||\\\\\\\\\\\\\\\\|[^|])*?\");\n+ private static final Pattern HEADER_NEXT_FIELD_PATTERN = Pattern.compile(\"(\" + HEADER_PATTERN.pattern() + \")\\\\|\");\n+ private static final Pattern HEADER_ESCAPE_CAPTURE = Pattern.compile(\"\\\\\\\\([\\\\\\\\|])\");\n+\n+ // New patterns for extension parsing\n+ private static final String EXTENSION_KEY_PATTERN = \"(?:[\\\\w-]+(?:\\\\.[^\\\\.=\\\\s\\\\|\\\\\\\\\\\\[\\\\]]+)*(?:\\\\[[0-9]+\\\\])?(?==))\";\n+ private static final Pattern EXTENSION_KEY_ARRAY_CAPTURE = Pattern.compile(\"^([^\\\\[\\\\]]+)((?:\\\\[[0-9]+\\\\])+)$\");\n+ private static final String EXTENSION_VALUE_PATTERN = \"(?:\\\\S|\\\\s(?!\" + EXTENSION_KEY_PATTERN + \"=))*\";\n+ private static final Pattern EXTENSION_NEXT_KEY_VALUE_PATTERN = Pattern.compile(\n+ \"(\" + EXTENSION_KEY_PATTERN + \")=(\" + EXTENSION_VALUE_PATTERN + \")(?:\\\\s+|$)\"\n+ );\n+ private static final Map EXTENSION_VALUE_SANITIZER_REVERSE_MAPPING = new HashMap<>();\n+ private static final Map FIELD_MAPPING = new HashMap<>();\n+ private static final String ERROR_MESSAGE_INCOMPLETE_CEF_HEADER = \"incomplete CEF header\";\n+\n+ static {\n+ EXTENSION_VALUE_SANITIZER_REVERSE_MAPPING.put(\"\\\\\\\\\", \"\\\\\");\n+ EXTENSION_VALUE_SANITIZER_REVERSE_MAPPING.put(\"\\\\=\", \"=\");\n+ EXTENSION_VALUE_SANITIZER_REVERSE_MAPPING.put(\"\\\\\\n\", \"\\n\");\n+ EXTENSION_VALUE_SANITIZER_REVERSE_MAPPING.put(\"\\\\\\r\", \"\\r\");\n+\n+ FIELD_MAPPING.put(\"app\", \"network.protocol\");\n+ FIELD_MAPPING.put(\"in\", \"source.bytes\");\n+ FIELD_MAPPING.put(\"out\", \"destination.bytes\");\n+ FIELD_MAPPING.put(\"dst\", \"destination.ip\");\n+ FIELD_MAPPING.put(\"dlat\", \"destination.geo.location.lat\");\n+ FIELD_MAPPING.put(\"dlong\", \"destination.geo.location.lon\");\n+ FIELD_MAPPING.put(\"dhost\", \"destination.domain\");\n+ FIELD_MAPPING.put(\"dmac\", \"destination.mac\");\n+ FIELD_MAPPING.put(\"dntdom\", \"destination.registered_domain\");\n+ FIELD_MAPPING.put(\"dpt\", \"destination.port\");\n+ FIELD_MAPPING.put(\"dpid\", \"destination.process.pid\");\n+ FIELD_MAPPING.put(\"dproc\", \"destination.process.name\");\n+ FIELD_MAPPING.put(\"duid\", \"destination.user.id\");\n+ FIELD_MAPPING.put(\"duser\", \"destination.user.name\");\n+ FIELD_MAPPING.put(\"dpriv\", \"destination.user.group.name\");\n+ FIELD_MAPPING.put(\"act\", \"event.action\");\n+ FIELD_MAPPING.put(\"dvc\", \"observer.ip\");\n+ FIELD_MAPPING.put(\"deviceDirection\", \"network.direction\");\n+ FIELD_MAPPING.put(\"deviceDnsDomain\", \"observer.registered_domain\");\n+ FIELD_MAPPING.put(\"deviceExternalId\", \"observer.name\");\n+ FIELD_MAPPING.put(\"deviceFacility\", \"log.syslog.facility.code\");\n+ FIELD_MAPPING.put(\"dvchost\", \"observer.hostname\");\n+ FIELD_MAPPING.put(\"deviceInboundInterface\", \"observer.ingress.interface.name\");\n+ FIELD_MAPPING.put(\"dvcmac\", \"observer.mac\");\n+ FIELD_MAPPING.put(\"deviceOutboundInterface\", \"observer.egress.interface.name\");\n+ FIELD_MAPPING.put(\"dvcpid\", \"process.pid\");\n+ FIELD_MAPPING.put(\"deviceProcessName\", \"process.name\");\n+ FIELD_MAPPING.put(\"rt\", \"@timestamp\");\n+ FIELD_MAPPING.put(\"dtz\", \"event.timezone\");\n+ FIELD_MAPPING.put(\"deviceTranslatedAddress\", \"host.nat.ip\");\n+ FIELD_MAPPING.put(\"device.version\", \"observer.version\");\n+ FIELD_MAPPING.put(\"deviceVersion\", \"observer.version\");\n+ FIELD_MAPPING.put(\"device.product\", \"observer.product\");\n+ FIELD_MAPPING.put(\"deviceProduct\", \"observer.product\");\n+ FIELD_MAPPING.put(\"device.event_class_id\", \"event.code\");\n+ FIELD_MAPPING.put(\"device.vendor\", \"observer.vendor\");\n+ FIELD_MAPPING.put(\"deviceVendor\", \"observer.vendor\");\n+ FIELD_MAPPING.put(\"end\", \"event.end\");\n+ FIELD_MAPPING.put(\"eventId\", \"event.id\");\n+ FIELD_MAPPING.put(\"outcome\", \"event.outcome\");\n+ FIELD_MAPPING.put(\"fileCreateTime\", \"file.created\");\n+ FIELD_MAPPING.put(\"fileHash\", \"file.hash\");\n+ FIELD_MAPPING.put(\"fileId\", \"file.inode\");\n+ FIELD_MAPPING.put(\"fileModificationTime\", \"file.mtime\");\n+ FIELD_MAPPING.put(\"fname\", \"file.name\");\n+ FIELD_MAPPING.put(\"filePath\", \"file.path\");\n+ FIELD_MAPPING.put(\"filePermission\", \"file.group\");\n+ FIELD_MAPPING.put(\"fsize\", \"file.size\");\n+ FIELD_MAPPING.put(\"fileType\", \"file.extension\");\n+ FIELD_MAPPING.put(\"mrt\", \"event.ingested\");\n+ FIELD_MAPPING.put(\"msg\", \"message\");\n+ FIELD_MAPPING.put(\"reason\", \"event.reason\");\n+ FIELD_MAPPING.put(\"requestClientApplication\", \"user_agent.original\");\n+ FIELD_MAPPING.put(\"requestContext\", \"http.request.referrer\");\n+ FIELD_MAPPING.put(\"requestMethod\", \"http.request.method\");\n+ FIELD_MAPPING.put(\"request\", \"url.original\");\n+ FIELD_MAPPING.put(\"src\", \"source.ip\");\n+ FIELD_MAPPING.put(\"sourceDnsDomain\", \"source.registered_domain\");\n+ FIELD_MAPPING.put(\"slat\", \"source.geo.location.lat\");\n+ FIELD_MAPPING.put(\"slong\", \"source.geo.location.lon\");\n+ FIELD_MAPPING.put(\"shost\", \"source.domain\");\n+ FIELD_MAPPING.put(\"smac\", \"source.mac\");\n+ FIELD_MAPPING.put(\"sntdom\", \"source.registered_domain\");\n+ FIELD_MAPPING.put(\"spt\", \"source.port\");\n+ FIELD_MAPPING.put(\"spid\", \"source.process.pid\");\n+ FIELD_MAPPING.put(\"sproc\", \"source.process.name\");\n+ FIELD_MAPPING.put(\"sourceServiceName\", \"source.service.name\");\n+ FIELD_MAPPING.put(\"suser\", \"source.user.name\");\n+ FIELD_MAPPING.put(\"start\", \"event.start\");\n+ FIELD_MAPPING.put(\"proto\", \"network.transport\");\n+ // Add more mappings as needed\n+ }\n+\n+ void process(String cefString, String targetField) {\n+ List headerFields = new ArrayList<>();\n+ Matcher headerMatcher = HEADER_NEXT_FIELD_PATTERN.matcher(cefString);\n+ int extensionStart = 0;\n+\n+ for (int i = 0; i < 7 && headerMatcher.find(); i++) {\n+ String field = headerMatcher.group(1);\n+ field = HEADER_ESCAPE_CAPTURE.matcher(field).replaceAll(\"$1\");\n+ headerFields.add(field);\n+ extensionStart = headerMatcher.end();\n+ }\n+\n+ if (headerFields.size() > 0 && headerFields.get(0).startsWith(\"CEF:\")) {\n+ CEFEvent event = new CEFEvent();\n+ // Add error message if there are not enough header fields\n+ if (headerFields.size() != 7) {\n+ ingestDocument.appendFieldValue(\"error.message\", ERROR_MESSAGE_INCOMPLETE_CEF_HEADER);\n+ }\n+ for (int i = 0; i < headerFields.size(); i++) {\n+ switch (i) {\n+ case 0:\n+ event.setVersion(headerFields.get(0).substring(4));\n+ break;\n+ case 1:\n+ event.setDeviceVendor(headerFields.get(1));\n+ ingestDocument.setFieldValue(\"observer.vendor\", headerFields.get(1));\n+ break;\n+ case 2:\n+ event.setDeviceProduct(headerFields.get(2));\n+ ingestDocument.setFieldValue(\"observer.product\", headerFields.get(2));\n+ break;\n+ case 3:\n+ event.setDeviceVersion(headerFields.get(3));\n+ ingestDocument.setFieldValue(\"observer.version\", headerFields.get(3));\n+ break;\n+ case 4:\n+ event.setDeviceEventClassId(headerFields.get(4));\n+ ingestDocument.setFieldValue(\"event.code\", headerFields.get(4));\n+ break;\n+ case 5:\n+ event.setName(headerFields.get(5));\n+ break;\n+ case 6:\n+ event.setSeverity(headerFields.get(6));\n+ break;\n+ }\n+ }\n+ String extensionString = cefString.substring(extensionStart);\n+ Map extensions = parseExtensions(extensionString);\n+\n+ if (removeEmptyValue) {\n+ removeEmptyValue(extensions);\n+ }\n+ event.setExtensions(extensions);\n+\n+ // Translate possible ECS fields and remove them from extensions\n+ Map translatedFields = extensions.entrySet()\n+ .stream()\n+ .filter(entry -> FIELD_MAPPING.containsKey(entry.getKey()))\n+ .collect(Collectors.toMap(entry -> FIELD_MAPPING.get(entry.getKey()), entry -> {\n+ Class fieldType = ECSFieldWithType.getFieldType(FIELD_MAPPING.get(entry.getKey()));\n+ return convertValueToType(entry.getValue(), fieldType);\n+ }));\n+\n+ // Remove the translated entries from extensions\n+ event.removeMappedExtensions();\n+\n+ ingestDocument.setFieldValue(targetField, event.toObject());\n+ // Add ECS translations to the root of the document\n+ if (translatedFields.isEmpty() == false) {\n+ translatedFields.forEach(ingestDocument::setFieldValue);\n+ }\n+ } else {\n+ throw new IllegalArgumentException(\"Invalid CEF format\");\n+ }\n+ }\n+\n+ private Object convertValueToType(String value, Class type) {\n+ if (type == Long.class) {\n+ return Long.parseLong(value);\n+ } else if (type == Double.class) {\n+ return Double.parseDouble(value);\n+ } else if (type == Integer.class) {\n+ return Integer.parseInt(value);\n+ } else {\n+ return value; // Default to String\n+ }\n+ }\n+\n+ private Map parseExtensions(String extensionString) {\n+ Map extensions = new HashMap<>();\n+ Matcher matcher = EXTENSION_NEXT_KEY_VALUE_PATTERN.matcher(extensionString);\n+ int lastEnd = 0;\n+ while (matcher.find()) {\n+ String key = matcher.group(1);\n+ String value = matcher.group(2);\n+\n+ // Convert extension field name to strict legal field_reference\n+ if (key.endsWith(\"]\")) {\n+ key = convertArrayLikeKey(key);\n+ }\n+\n+ extensions.put(key, desanitizeExtensionVal(value.trim()));\n+ lastEnd = matcher.end();\n+ }\n+ // If there's any remaining unparsed content, throw an exception\n+ if (lastEnd < extensionString.length()) {\n+ throw new IllegalArgumentException(\"Invalid extensions; keyless value present: \" + extensionString.substring(lastEnd));\n+ }\n+ return extensions;\n+ }\n+\n+ private void removeEmptyValue(Map map) {\n+ map.entrySet().removeIf(entry -> entry.getValue().isEmpty());\n+ map.entrySet().removeIf(entry -> entry.getValue() == null);\n+ }\n+\n+ private String convertArrayLikeKey(String key) {\n+ Matcher matcher = EXTENSION_KEY_ARRAY_CAPTURE.matcher(key);\n+ if (matcher.matches()) {\n+ return \"[\" + matcher.group(1) + \"]\" + matcher.group(2);\n+ }\n+ return key;\n+ }\n+\n+ public static String desanitizeExtensionVal(String value) {\n+ String desanitized = value;\n+ for (Map.Entry entry : EXTENSION_VALUE_SANITIZER_REVERSE_MAPPING.entrySet()) {\n+ desanitized = desanitized.replace(entry.getKey(), entry.getValue());\n+ }\n+ return desanitized;\n+ }\n+\n+ public static class CEFEvent {\n+ private String version;\n+ private String deviceVendor;\n+ private String deviceProduct;\n+ private String deviceVersion;\n+ private String deviceEventClassId;\n+ private String name;\n+ private String severity;\n+ private final Map extensions = new HashMap<>();\n+ private Map translatedFields;\n+\n+ // Getters and setters for all fields\n+ public String getVersion() {\n+ return version;\n+ }\n+\n+ public void setVersion(String version) {\n+ this.version = version;\n+ }\n+\n+ public String getDeviceVendor() {\n+ return deviceVendor;\n+ }\n+\n+ public void setDeviceVendor(String deviceVendor) {\n+ this.deviceVendor = deviceVendor;\n+ }\n+\n+ public String getDeviceProduct() {\n+ return deviceProduct;\n+ }\n+\n+ public void setDeviceProduct(String deviceProduct) {\n+ this.deviceProduct = deviceProduct;\n+ }\n+\n+ public String getDeviceVersion() {\n+ return deviceVersion;\n+ }\n+\n+ public void setDeviceVersion(String deviceVersion) {\n+ this.deviceVersion = deviceVersion;\n+ }\n+\n+ public String getDeviceEventClassId() {\n+ return deviceEventClassId;\n+ }\n+\n+ public void setDeviceEventClassId(String deviceEventClassId) {\n+ this.deviceEventClassId = deviceEventClassId;\n+ }\n+\n+ public String getName() {\n+ return name;\n+ }\n+\n+ public void setName(String name) {\n+ this.name = name;\n+ }\n+\n+ public String getSeverity() {\n+ return severity;\n+ }\n+\n+ public void setSeverity(String severity) {\n+ this.severity = severity;\n+ }\n+\n+ public Map getExtensions() {\n+ return extensions;\n+ }\n+\n+ public void setExtensions(Map extensions) {\n+ this.extensions.putAll(extensions);\n+ }\n+\n+ public void addExtension(String key, String extension) {\n+ this.extensions.put(key, extension);\n+ }\n+\n+ public void removeMappedExtensions() {\n+ this.extensions.entrySet().removeIf(entry -> FIELD_MAPPING.containsKey(entry.getKey()));\n+ this.extensions.entrySet().removeIf(entry -> FIELD_MAPPING.containsValue(entry.getKey()));", - "comment_created_at": "2025-03-27T10:40:24+00:00", - "comment_author": "bhapas", - "comment_body": "Moved to static Hashset.", - "pr_file_module": null - }, - { - "comment_id": "2016853377", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 122491, - "pr_file": "modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/CefParser.java", - "discussion_id": "2012358741", - "commented_code": "@@ -0,0 +1,449 @@\n+/*\n+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one\n+ * or more contributor license agreements. Licensed under the \"Elastic License\n+ * 2.0\", the \"GNU Affero General Public License v3.0 only\", and the \"Server Side\n+ * Public License v 1\"; you may not use this file except in compliance with, at\n+ * your election, the \"Elastic License 2.0\", the \"GNU Affero General Public\n+ * License v3.0 only\", or the \"Server Side Public License, v 1\".\n+ */\n+package org.elasticsearch.ingest.common;\n+\n+import org.elasticsearch.ingest.IngestDocument;\n+\n+import java.util.ArrayList;\n+import java.util.HashMap;\n+import java.util.List;\n+import java.util.Map;\n+import java.util.regex.Matcher;\n+import java.util.regex.Pattern;\n+import java.util.stream.Collectors;\n+\n+final class CefParser {\n+\n+ private final IngestDocument ingestDocument;\n+ private final boolean removeEmptyValue;\n+\n+ CefParser(IngestDocument ingestDocument, boolean removeEmptyValue) {\n+ this.ingestDocument = ingestDocument;\n+ this.removeEmptyValue = removeEmptyValue;\n+ }\n+\n+ private static final Pattern HEADER_PATTERN = Pattern.compile(\"(?:\\\\\\\\\\\\||\\\\\\\\\\\\\\\\|[^|])*?\");\n+ private static final Pattern HEADER_NEXT_FIELD_PATTERN = Pattern.compile(\"(\" + HEADER_PATTERN.pattern() + \")\\\\|\");\n+ private static final Pattern HEADER_ESCAPE_CAPTURE = Pattern.compile(\"\\\\\\\\([\\\\\\\\|])\");\n+\n+ // New patterns for extension parsing\n+ private static final String EXTENSION_KEY_PATTERN = \"(?:[\\\\w-]+(?:\\\\.[^\\\\.=\\\\s\\\\|\\\\\\\\\\\\[\\\\]]+)*(?:\\\\[[0-9]+\\\\])?(?==))\";\n+ private static final Pattern EXTENSION_KEY_ARRAY_CAPTURE = Pattern.compile(\"^([^\\\\[\\\\]]+)((?:\\\\[[0-9]+\\\\])+)$\");\n+ private static final String EXTENSION_VALUE_PATTERN = \"(?:\\\\S|\\\\s(?!\" + EXTENSION_KEY_PATTERN + \"=))*\";\n+ private static final Pattern EXTENSION_NEXT_KEY_VALUE_PATTERN = Pattern.compile(\n+ \"(\" + EXTENSION_KEY_PATTERN + \")=(\" + EXTENSION_VALUE_PATTERN + \")(?:\\\\s+|$)\"\n+ );\n+ private static final Map EXTENSION_VALUE_SANITIZER_REVERSE_MAPPING = new HashMap<>();\n+ private static final Map FIELD_MAPPING = new HashMap<>();\n+ private static final String ERROR_MESSAGE_INCOMPLETE_CEF_HEADER = \"incomplete CEF header\";\n+\n+ static {\n+ EXTENSION_VALUE_SANITIZER_REVERSE_MAPPING.put(\"\\\\\\\\\", \"\\\\\");\n+ EXTENSION_VALUE_SANITIZER_REVERSE_MAPPING.put(\"\\\\=\", \"=\");\n+ EXTENSION_VALUE_SANITIZER_REVERSE_MAPPING.put(\"\\\\\\n\", \"\\n\");\n+ EXTENSION_VALUE_SANITIZER_REVERSE_MAPPING.put(\"\\\\\\r\", \"\\r\");\n+\n+ FIELD_MAPPING.put(\"app\", \"network.protocol\");\n+ FIELD_MAPPING.put(\"in\", \"source.bytes\");\n+ FIELD_MAPPING.put(\"out\", \"destination.bytes\");\n+ FIELD_MAPPING.put(\"dst\", \"destination.ip\");\n+ FIELD_MAPPING.put(\"dlat\", \"destination.geo.location.lat\");\n+ FIELD_MAPPING.put(\"dlong\", \"destination.geo.location.lon\");\n+ FIELD_MAPPING.put(\"dhost\", \"destination.domain\");\n+ FIELD_MAPPING.put(\"dmac\", \"destination.mac\");\n+ FIELD_MAPPING.put(\"dntdom\", \"destination.registered_domain\");\n+ FIELD_MAPPING.put(\"dpt\", \"destination.port\");\n+ FIELD_MAPPING.put(\"dpid\", \"destination.process.pid\");\n+ FIELD_MAPPING.put(\"dproc\", \"destination.process.name\");\n+ FIELD_MAPPING.put(\"duid\", \"destination.user.id\");\n+ FIELD_MAPPING.put(\"duser\", \"destination.user.name\");\n+ FIELD_MAPPING.put(\"dpriv\", \"destination.user.group.name\");\n+ FIELD_MAPPING.put(\"act\", \"event.action\");\n+ FIELD_MAPPING.put(\"dvc\", \"observer.ip\");\n+ FIELD_MAPPING.put(\"deviceDirection\", \"network.direction\");\n+ FIELD_MAPPING.put(\"deviceDnsDomain\", \"observer.registered_domain\");\n+ FIELD_MAPPING.put(\"deviceExternalId\", \"observer.name\");\n+ FIELD_MAPPING.put(\"deviceFacility\", \"log.syslog.facility.code\");\n+ FIELD_MAPPING.put(\"dvchost\", \"observer.hostname\");\n+ FIELD_MAPPING.put(\"deviceInboundInterface\", \"observer.ingress.interface.name\");\n+ FIELD_MAPPING.put(\"dvcmac\", \"observer.mac\");\n+ FIELD_MAPPING.put(\"deviceOutboundInterface\", \"observer.egress.interface.name\");\n+ FIELD_MAPPING.put(\"dvcpid\", \"process.pid\");\n+ FIELD_MAPPING.put(\"deviceProcessName\", \"process.name\");\n+ FIELD_MAPPING.put(\"rt\", \"@timestamp\");\n+ FIELD_MAPPING.put(\"dtz\", \"event.timezone\");\n+ FIELD_MAPPING.put(\"deviceTranslatedAddress\", \"host.nat.ip\");\n+ FIELD_MAPPING.put(\"device.version\", \"observer.version\");\n+ FIELD_MAPPING.put(\"deviceVersion\", \"observer.version\");\n+ FIELD_MAPPING.put(\"device.product\", \"observer.product\");\n+ FIELD_MAPPING.put(\"deviceProduct\", \"observer.product\");\n+ FIELD_MAPPING.put(\"device.event_class_id\", \"event.code\");\n+ FIELD_MAPPING.put(\"device.vendor\", \"observer.vendor\");\n+ FIELD_MAPPING.put(\"deviceVendor\", \"observer.vendor\");\n+ FIELD_MAPPING.put(\"end\", \"event.end\");\n+ FIELD_MAPPING.put(\"eventId\", \"event.id\");\n+ FIELD_MAPPING.put(\"outcome\", \"event.outcome\");\n+ FIELD_MAPPING.put(\"fileCreateTime\", \"file.created\");\n+ FIELD_MAPPING.put(\"fileHash\", \"file.hash\");\n+ FIELD_MAPPING.put(\"fileId\", \"file.inode\");\n+ FIELD_MAPPING.put(\"fileModificationTime\", \"file.mtime\");\n+ FIELD_MAPPING.put(\"fname\", \"file.name\");\n+ FIELD_MAPPING.put(\"filePath\", \"file.path\");\n+ FIELD_MAPPING.put(\"filePermission\", \"file.group\");\n+ FIELD_MAPPING.put(\"fsize\", \"file.size\");\n+ FIELD_MAPPING.put(\"fileType\", \"file.extension\");\n+ FIELD_MAPPING.put(\"mrt\", \"event.ingested\");\n+ FIELD_MAPPING.put(\"msg\", \"message\");\n+ FIELD_MAPPING.put(\"reason\", \"event.reason\");\n+ FIELD_MAPPING.put(\"requestClientApplication\", \"user_agent.original\");\n+ FIELD_MAPPING.put(\"requestContext\", \"http.request.referrer\");\n+ FIELD_MAPPING.put(\"requestMethod\", \"http.request.method\");\n+ FIELD_MAPPING.put(\"request\", \"url.original\");\n+ FIELD_MAPPING.put(\"src\", \"source.ip\");\n+ FIELD_MAPPING.put(\"sourceDnsDomain\", \"source.registered_domain\");\n+ FIELD_MAPPING.put(\"slat\", \"source.geo.location.lat\");\n+ FIELD_MAPPING.put(\"slong\", \"source.geo.location.lon\");\n+ FIELD_MAPPING.put(\"shost\", \"source.domain\");\n+ FIELD_MAPPING.put(\"smac\", \"source.mac\");\n+ FIELD_MAPPING.put(\"sntdom\", \"source.registered_domain\");\n+ FIELD_MAPPING.put(\"spt\", \"source.port\");\n+ FIELD_MAPPING.put(\"spid\", \"source.process.pid\");\n+ FIELD_MAPPING.put(\"sproc\", \"source.process.name\");\n+ FIELD_MAPPING.put(\"sourceServiceName\", \"source.service.name\");\n+ FIELD_MAPPING.put(\"suser\", \"source.user.name\");\n+ FIELD_MAPPING.put(\"start\", \"event.start\");\n+ FIELD_MAPPING.put(\"proto\", \"network.transport\");\n+ // Add more mappings as needed\n+ }\n+\n+ void process(String cefString, String targetField) {\n+ List headerFields = new ArrayList<>();\n+ Matcher headerMatcher = HEADER_NEXT_FIELD_PATTERN.matcher(cefString);\n+ int extensionStart = 0;\n+\n+ for (int i = 0; i < 7 && headerMatcher.find(); i++) {\n+ String field = headerMatcher.group(1);\n+ field = HEADER_ESCAPE_CAPTURE.matcher(field).replaceAll(\"$1\");\n+ headerFields.add(field);\n+ extensionStart = headerMatcher.end();\n+ }\n+\n+ if (headerFields.size() > 0 && headerFields.get(0).startsWith(\"CEF:\")) {\n+ CEFEvent event = new CEFEvent();\n+ // Add error message if there are not enough header fields\n+ if (headerFields.size() != 7) {\n+ ingestDocument.appendFieldValue(\"error.message\", ERROR_MESSAGE_INCOMPLETE_CEF_HEADER);\n+ }\n+ for (int i = 0; i < headerFields.size(); i++) {\n+ switch (i) {\n+ case 0:\n+ event.setVersion(headerFields.get(0).substring(4));\n+ break;\n+ case 1:\n+ event.setDeviceVendor(headerFields.get(1));\n+ ingestDocument.setFieldValue(\"observer.vendor\", headerFields.get(1));\n+ break;\n+ case 2:\n+ event.setDeviceProduct(headerFields.get(2));\n+ ingestDocument.setFieldValue(\"observer.product\", headerFields.get(2));\n+ break;\n+ case 3:\n+ event.setDeviceVersion(headerFields.get(3));\n+ ingestDocument.setFieldValue(\"observer.version\", headerFields.get(3));\n+ break;\n+ case 4:\n+ event.setDeviceEventClassId(headerFields.get(4));\n+ ingestDocument.setFieldValue(\"event.code\", headerFields.get(4));\n+ break;\n+ case 5:\n+ event.setName(headerFields.get(5));\n+ break;\n+ case 6:\n+ event.setSeverity(headerFields.get(6));\n+ break;\n+ }\n+ }\n+ String extensionString = cefString.substring(extensionStart);\n+ Map extensions = parseExtensions(extensionString);\n+\n+ if (removeEmptyValue) {\n+ removeEmptyValue(extensions);\n+ }\n+ event.setExtensions(extensions);\n+\n+ // Translate possible ECS fields and remove them from extensions\n+ Map translatedFields = extensions.entrySet()\n+ .stream()\n+ .filter(entry -> FIELD_MAPPING.containsKey(entry.getKey()))\n+ .collect(Collectors.toMap(entry -> FIELD_MAPPING.get(entry.getKey()), entry -> {\n+ Class fieldType = ECSFieldWithType.getFieldType(FIELD_MAPPING.get(entry.getKey()));\n+ return convertValueToType(entry.getValue(), fieldType);\n+ }));\n+\n+ // Remove the translated entries from extensions\n+ event.removeMappedExtensions();\n+\n+ ingestDocument.setFieldValue(targetField, event.toObject());\n+ // Add ECS translations to the root of the document\n+ if (translatedFields.isEmpty() == false) {\n+ translatedFields.forEach(ingestDocument::setFieldValue);\n+ }\n+ } else {\n+ throw new IllegalArgumentException(\"Invalid CEF format\");\n+ }\n+ }\n+\n+ private Object convertValueToType(String value, Class type) {\n+ if (type == Long.class) {\n+ return Long.parseLong(value);\n+ } else if (type == Double.class) {\n+ return Double.parseDouble(value);\n+ } else if (type == Integer.class) {\n+ return Integer.parseInt(value);\n+ } else {\n+ return value; // Default to String\n+ }\n+ }\n+\n+ private Map parseExtensions(String extensionString) {\n+ Map extensions = new HashMap<>();\n+ Matcher matcher = EXTENSION_NEXT_KEY_VALUE_PATTERN.matcher(extensionString);\n+ int lastEnd = 0;\n+ while (matcher.find()) {\n+ String key = matcher.group(1);\n+ String value = matcher.group(2);\n+\n+ // Convert extension field name to strict legal field_reference\n+ if (key.endsWith(\"]\")) {\n+ key = convertArrayLikeKey(key);\n+ }\n+\n+ extensions.put(key, desanitizeExtensionVal(value.trim()));\n+ lastEnd = matcher.end();\n+ }\n+ // If there's any remaining unparsed content, throw an exception\n+ if (lastEnd < extensionString.length()) {\n+ throw new IllegalArgumentException(\"Invalid extensions; keyless value present: \" + extensionString.substring(lastEnd));\n+ }\n+ return extensions;\n+ }\n+\n+ private void removeEmptyValue(Map map) {\n+ map.entrySet().removeIf(entry -> entry.getValue().isEmpty());\n+ map.entrySet().removeIf(entry -> entry.getValue() == null);\n+ }\n+\n+ private String convertArrayLikeKey(String key) {\n+ Matcher matcher = EXTENSION_KEY_ARRAY_CAPTURE.matcher(key);\n+ if (matcher.matches()) {\n+ return \"[\" + matcher.group(1) + \"]\" + matcher.group(2);\n+ }\n+ return key;\n+ }\n+\n+ public static String desanitizeExtensionVal(String value) {\n+ String desanitized = value;\n+ for (Map.Entry entry : EXTENSION_VALUE_SANITIZER_REVERSE_MAPPING.entrySet()) {\n+ desanitized = desanitized.replace(entry.getKey(), entry.getValue());\n+ }\n+ return desanitized;\n+ }\n+\n+ public static class CEFEvent {\n+ private String version;\n+ private String deviceVendor;\n+ private String deviceProduct;\n+ private String deviceVersion;\n+ private String deviceEventClassId;\n+ private String name;\n+ private String severity;\n+ private final Map extensions = new HashMap<>();\n+ private Map translatedFields;\n+\n+ // Getters and setters for all fields\n+ public String getVersion() {\n+ return version;\n+ }\n+\n+ public void setVersion(String version) {\n+ this.version = version;\n+ }\n+\n+ public String getDeviceVendor() {\n+ return deviceVendor;\n+ }\n+\n+ public void setDeviceVendor(String deviceVendor) {\n+ this.deviceVendor = deviceVendor;\n+ }\n+\n+ public String getDeviceProduct() {\n+ return deviceProduct;\n+ }\n+\n+ public void setDeviceProduct(String deviceProduct) {\n+ this.deviceProduct = deviceProduct;\n+ }\n+\n+ public String getDeviceVersion() {\n+ return deviceVersion;\n+ }\n+\n+ public void setDeviceVersion(String deviceVersion) {\n+ this.deviceVersion = deviceVersion;\n+ }\n+\n+ public String getDeviceEventClassId() {\n+ return deviceEventClassId;\n+ }\n+\n+ public void setDeviceEventClassId(String deviceEventClassId) {\n+ this.deviceEventClassId = deviceEventClassId;\n+ }\n+\n+ public String getName() {\n+ return name;\n+ }\n+\n+ public void setName(String name) {\n+ this.name = name;\n+ }\n+\n+ public String getSeverity() {\n+ return severity;\n+ }\n+\n+ public void setSeverity(String severity) {\n+ this.severity = severity;\n+ }\n+\n+ public Map getExtensions() {\n+ return extensions;\n+ }\n+\n+ public void setExtensions(Map extensions) {\n+ this.extensions.putAll(extensions);\n+ }\n+\n+ public void addExtension(String key, String extension) {\n+ this.extensions.put(key, extension);\n+ }\n+\n+ public void removeMappedExtensions() {\n+ this.extensions.entrySet().removeIf(entry -> FIELD_MAPPING.containsKey(entry.getKey()));\n+ this.extensions.entrySet().removeIf(entry -> FIELD_MAPPING.containsValue(entry.getKey()));", - "comment_created_at": "2025-03-27T14:48:18+00:00", - "comment_author": "joegallo", - "comment_body": "This looks right to me now, but I moved the location of the definition of the static set.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2018560953", - "pr_number": 122491, - "pr_file": "modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/CefParser.java", - "created_at": "2025-03-28T12:20:15+00:00", - "commented_code": "}\n\n // Translate extensions to possible ECS fields\n Map translatedFields = extensions.entrySet()\n .stream()\n .filter(entry -> FIELD_MAPPINGS.containsKey(entry.getKey()))\n .collect(Collectors.toMap(entry -> FIELD_MAPPINGS.get(entry.getKey()), entry -> {\n DataType fieldType = FIELDS.get(FIELD_MAPPINGS.get(entry.getKey()));\n return convertValueToType(entry.getValue(), fieldType);\n }));\n Map translatedFields = new HashMap<>();", - "repo_full_name": "elastic/elasticsearch", - "discussion_comments": [ - { - "comment_id": "2018560953", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 122491, - "pr_file": "modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/CefParser.java", - "discussion_id": "2018560953", - "commented_code": "@@ -348,13 +347,15 @@ private void processExtensions(String cefString, int extensionStart, CEFEvent ev\n }\n \n // Translate extensions to possible ECS fields\n- Map translatedFields = extensions.entrySet()\n- .stream()\n- .filter(entry -> FIELD_MAPPINGS.containsKey(entry.getKey()))\n- .collect(Collectors.toMap(entry -> FIELD_MAPPINGS.get(entry.getKey()), entry -> {\n- DataType fieldType = FIELDS.get(FIELD_MAPPINGS.get(entry.getKey()));\n- return convertValueToType(entry.getValue(), fieldType);\n- }));\n+ Map translatedFields = new HashMap<>();", - "comment_created_at": "2025-03-28T12:20:15+00:00", - "comment_author": "joegallo", - "comment_body": "Do you think it's worth pre-sizing this `HashMap` (via `HashMap.newHashMap`) to match the size of the `extensions`? By my read that would slightly oversize the map, but with the effect that you'd never need to resize. I imagine it's worth it.", - "pr_file_module": null - }, - { - "comment_id": "2019736523", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 122491, - "pr_file": "modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/CefParser.java", - "discussion_id": "2018560953", - "commented_code": "@@ -348,13 +347,15 @@ private void processExtensions(String cefString, int extensionStart, CEFEvent ev\n }\n \n // Translate extensions to possible ECS fields\n- Map translatedFields = extensions.entrySet()\n- .stream()\n- .filter(entry -> FIELD_MAPPINGS.containsKey(entry.getKey()))\n- .collect(Collectors.toMap(entry -> FIELD_MAPPINGS.get(entry.getKey()), entry -> {\n- DataType fieldType = FIELDS.get(FIELD_MAPPINGS.get(entry.getKey()));\n- return convertValueToType(entry.getValue(), fieldType);\n- }));\n+ Map translatedFields = new HashMap<>();", - "comment_created_at": "2025-03-29T06:40:34+00:00", - "comment_author": "bhapas", - "comment_body": "Removing all intermediate collections. Adding the mapping to event as we parse.", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/elasticsearch-name-reflects-meaning.json b/_reviewers/elasticsearch-name-reflects-meaning.json new file mode 100644 index 0000000..c6d54bf --- /dev/null +++ b/_reviewers/elasticsearch-name-reflects-meaning.json @@ -0,0 +1,296 @@ +[ + { + "discussion_id": "2137933778", + "pr_number": 129146, + "pr_file": "x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/action/IbmWatsonxActionCreator.java", + "created_at": "2025-06-10T13:38:48+00:00", + "commented_code": "package org.elasticsearch.xpack.inference.services.ibmwatsonx.action;\n\nimport org.elasticsearch.inference.TaskType;\nimport org.elasticsearch.threadpool.ThreadPool;\nimport org.elasticsearch.xpack.inference.common.Truncator;\nimport org.elasticsearch.xpack.inference.external.action.ExecutableAction;\nimport org.elasticsearch.xpack.inference.external.action.SenderExecutableAction;\nimport org.elasticsearch.xpack.inference.external.action.SingleInputSenderExecutableAction;\nimport org.elasticsearch.xpack.inference.external.http.retry.ResponseHandler;\nimport org.elasticsearch.xpack.inference.external.http.sender.ChatCompletionInput;\nimport org.elasticsearch.xpack.inference.external.http.sender.GenericRequestManager;\nimport org.elasticsearch.xpack.inference.external.http.sender.Sender;\nimport org.elasticsearch.xpack.inference.external.http.sender.UnifiedChatInput;\nimport org.elasticsearch.xpack.inference.services.ServiceComponents;\nimport org.elasticsearch.xpack.inference.services.ibmwatsonx.IbmWatsonxCompletionResponseHandler;\nimport org.elasticsearch.xpack.inference.services.ibmwatsonx.IbmWatsonxEmbeddingsRequestManager;\nimport org.elasticsearch.xpack.inference.services.ibmwatsonx.IbmWatsonxRerankRequestManager;\nimport org.elasticsearch.xpack.inference.services.ibmwatsonx.completion.IbmWatsonxChatCompletionModel;\nimport org.elasticsearch.xpack.inference.services.ibmwatsonx.embeddings.IbmWatsonxEmbeddingsModel;\nimport org.elasticsearch.xpack.inference.services.ibmwatsonx.request.IbmWatsonxChatCompletionRequest;\nimport org.elasticsearch.xpack.inference.services.ibmwatsonx.rerank.IbmWatsonxRerankModel;\nimport org.elasticsearch.xpack.inference.services.openai.response.OpenAiChatCompletionResponseEntity;\n\nimport java.util.Map;\nimport java.util.Objects;\n\nimport static org.elasticsearch.core.Strings.format;\nimport static org.elasticsearch.xpack.inference.external.action.ActionUtils.constructFailedToSendRequestMessage;\n\n/**\n * IbmWatsonxActionCreator is responsible for creating executable actions for various models.\n * It implements the IbmWatsonxActionVisitor interface to provide specific implementations.\n */\npublic class IbmWatsonxActionCreator implements IbmWatsonxActionVisitor {\n private final Sender sender;\n private final ServiceComponents serviceComponents;\n\n static final String COMPLETION_REQUEST_TYPE = \"IBM WatsonX completions\";", + "repo_full_name": "elastic/elasticsearch", + "discussion_comments": [ + { + "comment_id": "2137933778", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 129146, + "pr_file": "x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/action/IbmWatsonxActionCreator.java", + "discussion_id": "2137933778", + "commented_code": "@@ -7,26 +7,48 @@\n \n package org.elasticsearch.xpack.inference.services.ibmwatsonx.action;\n \n+import org.elasticsearch.inference.TaskType;\n import org.elasticsearch.threadpool.ThreadPool;\n import org.elasticsearch.xpack.inference.common.Truncator;\n import org.elasticsearch.xpack.inference.external.action.ExecutableAction;\n import org.elasticsearch.xpack.inference.external.action.SenderExecutableAction;\n+import org.elasticsearch.xpack.inference.external.action.SingleInputSenderExecutableAction;\n+import org.elasticsearch.xpack.inference.external.http.retry.ResponseHandler;\n+import org.elasticsearch.xpack.inference.external.http.sender.ChatCompletionInput;\n+import org.elasticsearch.xpack.inference.external.http.sender.GenericRequestManager;\n import org.elasticsearch.xpack.inference.external.http.sender.Sender;\n+import org.elasticsearch.xpack.inference.external.http.sender.UnifiedChatInput;\n import org.elasticsearch.xpack.inference.services.ServiceComponents;\n+import org.elasticsearch.xpack.inference.services.ibmwatsonx.IbmWatsonxCompletionResponseHandler;\n import org.elasticsearch.xpack.inference.services.ibmwatsonx.IbmWatsonxEmbeddingsRequestManager;\n import org.elasticsearch.xpack.inference.services.ibmwatsonx.IbmWatsonxRerankRequestManager;\n+import org.elasticsearch.xpack.inference.services.ibmwatsonx.completion.IbmWatsonxChatCompletionModel;\n import org.elasticsearch.xpack.inference.services.ibmwatsonx.embeddings.IbmWatsonxEmbeddingsModel;\n+import org.elasticsearch.xpack.inference.services.ibmwatsonx.request.IbmWatsonxChatCompletionRequest;\n import org.elasticsearch.xpack.inference.services.ibmwatsonx.rerank.IbmWatsonxRerankModel;\n+import org.elasticsearch.xpack.inference.services.openai.response.OpenAiChatCompletionResponseEntity;\n \n import java.util.Map;\n import java.util.Objects;\n \n+import static org.elasticsearch.core.Strings.format;\n import static org.elasticsearch.xpack.inference.external.action.ActionUtils.constructFailedToSendRequestMessage;\n \n+/**\n+ * IbmWatsonxActionCreator is responsible for creating executable actions for various models.\n+ * It implements the IbmWatsonxActionVisitor interface to provide specific implementations.\n+ */\n public class IbmWatsonxActionCreator implements IbmWatsonxActionVisitor {\n private final Sender sender;\n private final ServiceComponents serviceComponents;\n \n+ static final String COMPLETION_REQUEST_TYPE = \"IBM WatsonX completions\";", + "comment_created_at": "2025-06-10T13:38:48+00:00", + "comment_author": "Jan-Kazlouski-elastic", + "comment_body": "After discussion in dms it was found that platform is called Watsonx not WatsonX. Could you please unify naming. Thank you!", + "pr_file_module": null + }, + { + "comment_id": "2145230716", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 129146, + "pr_file": "x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/action/IbmWatsonxActionCreator.java", + "discussion_id": "2137933778", + "commented_code": "@@ -7,26 +7,48 @@\n \n package org.elasticsearch.xpack.inference.services.ibmwatsonx.action;\n \n+import org.elasticsearch.inference.TaskType;\n import org.elasticsearch.threadpool.ThreadPool;\n import org.elasticsearch.xpack.inference.common.Truncator;\n import org.elasticsearch.xpack.inference.external.action.ExecutableAction;\n import org.elasticsearch.xpack.inference.external.action.SenderExecutableAction;\n+import org.elasticsearch.xpack.inference.external.action.SingleInputSenderExecutableAction;\n+import org.elasticsearch.xpack.inference.external.http.retry.ResponseHandler;\n+import org.elasticsearch.xpack.inference.external.http.sender.ChatCompletionInput;\n+import org.elasticsearch.xpack.inference.external.http.sender.GenericRequestManager;\n import org.elasticsearch.xpack.inference.external.http.sender.Sender;\n+import org.elasticsearch.xpack.inference.external.http.sender.UnifiedChatInput;\n import org.elasticsearch.xpack.inference.services.ServiceComponents;\n+import org.elasticsearch.xpack.inference.services.ibmwatsonx.IbmWatsonxCompletionResponseHandler;\n import org.elasticsearch.xpack.inference.services.ibmwatsonx.IbmWatsonxEmbeddingsRequestManager;\n import org.elasticsearch.xpack.inference.services.ibmwatsonx.IbmWatsonxRerankRequestManager;\n+import org.elasticsearch.xpack.inference.services.ibmwatsonx.completion.IbmWatsonxChatCompletionModel;\n import org.elasticsearch.xpack.inference.services.ibmwatsonx.embeddings.IbmWatsonxEmbeddingsModel;\n+import org.elasticsearch.xpack.inference.services.ibmwatsonx.request.IbmWatsonxChatCompletionRequest;\n import org.elasticsearch.xpack.inference.services.ibmwatsonx.rerank.IbmWatsonxRerankModel;\n+import org.elasticsearch.xpack.inference.services.openai.response.OpenAiChatCompletionResponseEntity;\n \n import java.util.Map;\n import java.util.Objects;\n \n+import static org.elasticsearch.core.Strings.format;\n import static org.elasticsearch.xpack.inference.external.action.ActionUtils.constructFailedToSendRequestMessage;\n \n+/**\n+ * IbmWatsonxActionCreator is responsible for creating executable actions for various models.\n+ * It implements the IbmWatsonxActionVisitor interface to provide specific implementations.\n+ */\n public class IbmWatsonxActionCreator implements IbmWatsonxActionVisitor {\n private final Sender sender;\n private final ServiceComponents serviceComponents;\n \n+ static final String COMPLETION_REQUEST_TYPE = \"IBM WatsonX completions\";", + "comment_created_at": "2025-06-13T14:32:43+00:00", + "comment_author": "Evgenii-Kazannik", + "comment_body": "Thanks. Unified the naming as IBM Watsonx", + "pr_file_module": null + }, + { + "comment_id": "2150308524", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 129146, + "pr_file": "x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/action/IbmWatsonxActionCreator.java", + "discussion_id": "2137933778", + "commented_code": "@@ -7,26 +7,48 @@\n \n package org.elasticsearch.xpack.inference.services.ibmwatsonx.action;\n \n+import org.elasticsearch.inference.TaskType;\n import org.elasticsearch.threadpool.ThreadPool;\n import org.elasticsearch.xpack.inference.common.Truncator;\n import org.elasticsearch.xpack.inference.external.action.ExecutableAction;\n import org.elasticsearch.xpack.inference.external.action.SenderExecutableAction;\n+import org.elasticsearch.xpack.inference.external.action.SingleInputSenderExecutableAction;\n+import org.elasticsearch.xpack.inference.external.http.retry.ResponseHandler;\n+import org.elasticsearch.xpack.inference.external.http.sender.ChatCompletionInput;\n+import org.elasticsearch.xpack.inference.external.http.sender.GenericRequestManager;\n import org.elasticsearch.xpack.inference.external.http.sender.Sender;\n+import org.elasticsearch.xpack.inference.external.http.sender.UnifiedChatInput;\n import org.elasticsearch.xpack.inference.services.ServiceComponents;\n+import org.elasticsearch.xpack.inference.services.ibmwatsonx.IbmWatsonxCompletionResponseHandler;\n import org.elasticsearch.xpack.inference.services.ibmwatsonx.IbmWatsonxEmbeddingsRequestManager;\n import org.elasticsearch.xpack.inference.services.ibmwatsonx.IbmWatsonxRerankRequestManager;\n+import org.elasticsearch.xpack.inference.services.ibmwatsonx.completion.IbmWatsonxChatCompletionModel;\n import org.elasticsearch.xpack.inference.services.ibmwatsonx.embeddings.IbmWatsonxEmbeddingsModel;\n+import org.elasticsearch.xpack.inference.services.ibmwatsonx.request.IbmWatsonxChatCompletionRequest;\n import org.elasticsearch.xpack.inference.services.ibmwatsonx.rerank.IbmWatsonxRerankModel;\n+import org.elasticsearch.xpack.inference.services.openai.response.OpenAiChatCompletionResponseEntity;\n \n import java.util.Map;\n import java.util.Objects;\n \n+import static org.elasticsearch.core.Strings.format;\n import static org.elasticsearch.xpack.inference.external.action.ActionUtils.constructFailedToSendRequestMessage;\n \n+/**\n+ * IbmWatsonxActionCreator is responsible for creating executable actions for various models.\n+ * It implements the IbmWatsonxActionVisitor interface to provide specific implementations.\n+ */\n public class IbmWatsonxActionCreator implements IbmWatsonxActionVisitor {\n private final Sender sender;\n private final ServiceComponents serviceComponents;\n \n+ static final String COMPLETION_REQUEST_TYPE = \"IBM WatsonX completions\";", + "comment_created_at": "2025-06-16T15:34:29+00:00", + "comment_author": "dan-rubinstein", + "comment_body": "A bit of a nitpick but the platform seems to be called `IBM watsonx` (from [their website](https://www.ibm.com/watsonx)) but this change updates it everywhere to `IBM Watsonx`. Can we keep consistent with IBM's capitalization to avoid confusion?", + "pr_file_module": null + }, + { + "comment_id": "2156393080", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 129146, + "pr_file": "x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/action/IbmWatsonxActionCreator.java", + "discussion_id": "2137933778", + "commented_code": "@@ -7,26 +7,48 @@\n \n package org.elasticsearch.xpack.inference.services.ibmwatsonx.action;\n \n+import org.elasticsearch.inference.TaskType;\n import org.elasticsearch.threadpool.ThreadPool;\n import org.elasticsearch.xpack.inference.common.Truncator;\n import org.elasticsearch.xpack.inference.external.action.ExecutableAction;\n import org.elasticsearch.xpack.inference.external.action.SenderExecutableAction;\n+import org.elasticsearch.xpack.inference.external.action.SingleInputSenderExecutableAction;\n+import org.elasticsearch.xpack.inference.external.http.retry.ResponseHandler;\n+import org.elasticsearch.xpack.inference.external.http.sender.ChatCompletionInput;\n+import org.elasticsearch.xpack.inference.external.http.sender.GenericRequestManager;\n import org.elasticsearch.xpack.inference.external.http.sender.Sender;\n+import org.elasticsearch.xpack.inference.external.http.sender.UnifiedChatInput;\n import org.elasticsearch.xpack.inference.services.ServiceComponents;\n+import org.elasticsearch.xpack.inference.services.ibmwatsonx.IbmWatsonxCompletionResponseHandler;\n import org.elasticsearch.xpack.inference.services.ibmwatsonx.IbmWatsonxEmbeddingsRequestManager;\n import org.elasticsearch.xpack.inference.services.ibmwatsonx.IbmWatsonxRerankRequestManager;\n+import org.elasticsearch.xpack.inference.services.ibmwatsonx.completion.IbmWatsonxChatCompletionModel;\n import org.elasticsearch.xpack.inference.services.ibmwatsonx.embeddings.IbmWatsonxEmbeddingsModel;\n+import org.elasticsearch.xpack.inference.services.ibmwatsonx.request.IbmWatsonxChatCompletionRequest;\n import org.elasticsearch.xpack.inference.services.ibmwatsonx.rerank.IbmWatsonxRerankModel;\n+import org.elasticsearch.xpack.inference.services.openai.response.OpenAiChatCompletionResponseEntity;\n \n import java.util.Map;\n import java.util.Objects;\n \n+import static org.elasticsearch.core.Strings.format;\n import static org.elasticsearch.xpack.inference.external.action.ActionUtils.constructFailedToSendRequestMessage;\n \n+/**\n+ * IbmWatsonxActionCreator is responsible for creating executable actions for various models.\n+ * It implements the IbmWatsonxActionVisitor interface to provide specific implementations.\n+ */\n public class IbmWatsonxActionCreator implements IbmWatsonxActionVisitor {\n private final Sender sender;\n private final ServiceComponents serviceComponents;\n \n+ static final String COMPLETION_REQUEST_TYPE = \"IBM WatsonX completions\";", + "comment_created_at": "2025-06-19T07:59:32+00:00", + "comment_author": "Evgenii-Kazannik", + "comment_body": "Sure. I updated it everywhere from IBM Watsonx to IBM watsonx. Thanks", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2176564193", + "pr_number": 129013, + "pr_file": "server/src/main/java/org/elasticsearch/TransportVersions.java", + "created_at": "2025-07-01T06:45:32+00:00", + "commented_code": "public static final TransportVersion ML_INFERENCE_COHERE_API_VERSION = def(9_110_0_00);\n public static final TransportVersion ESQL_PROFILE_INCLUDE_PLAN = def(9_111_0_00);\n public static final TransportVersion MAPPINGS_IN_DATA_STREAMS = def(9_112_0_00);\n public static final TransportVersion LOOKUP_JOIN_MANY_INDICES = def(9_113_0_00);", + "repo_full_name": "elastic/elasticsearch", + "discussion_comments": [ + { + "comment_id": "2176564193", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 129013, + "pr_file": "server/src/main/java/org/elasticsearch/TransportVersions.java", + "discussion_id": "2176564193", + "commented_code": "@@ -326,6 +326,7 @@ static TransportVersion def(int id) {\n public static final TransportVersion ML_INFERENCE_COHERE_API_VERSION = def(9_110_0_00);\n public static final TransportVersion ESQL_PROFILE_INCLUDE_PLAN = def(9_111_0_00);\n public static final TransportVersion MAPPINGS_IN_DATA_STREAMS = def(9_112_0_00);\n+ public static final TransportVersion LOOKUP_JOIN_MANY_INDICES = def(9_113_0_00);", + "comment_created_at": "2025-07-01T06:45:32+00:00", + "comment_author": "idegtiarenko", + "comment_body": "This is a bit confusing. Why many indices? Should it be LOOKUP_JOIN_REMOTE_INDICES instead?", + "pr_file_module": null + }, + { + "comment_id": "2177851184", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 129013, + "pr_file": "server/src/main/java/org/elasticsearch/TransportVersions.java", + "discussion_id": "2176564193", + "commented_code": "@@ -326,6 +326,7 @@ static TransportVersion def(int id) {\n public static final TransportVersion ML_INFERENCE_COHERE_API_VERSION = def(9_110_0_00);\n public static final TransportVersion ESQL_PROFILE_INCLUDE_PLAN = def(9_111_0_00);\n public static final TransportVersion MAPPINGS_IN_DATA_STREAMS = def(9_112_0_00);\n+ public static final TransportVersion LOOKUP_JOIN_MANY_INDICES = def(9_113_0_00);", + "comment_created_at": "2025-07-01T15:03:54+00:00", + "comment_author": "smalyshev", + "comment_body": "Because 9.1 already supports receiving LOOKUP JOIN from the coordinator, but only if there's a single index. If there is more than one - i.e. if it `lookup_index` resolves to `remote1:lookup_on_remote1,remote2:lookup_on_remote2` (because of aliases), 9.1 remote would not be able to process such request. ", + "pr_file_module": null + }, + { + "comment_id": "2180403269", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 129013, + "pr_file": "server/src/main/java/org/elasticsearch/TransportVersions.java", + "discussion_id": "2176564193", + "commented_code": "@@ -326,6 +326,7 @@ static TransportVersion def(int id) {\n public static final TransportVersion ML_INFERENCE_COHERE_API_VERSION = def(9_110_0_00);\n public static final TransportVersion ESQL_PROFILE_INCLUDE_PLAN = def(9_111_0_00);\n public static final TransportVersion MAPPINGS_IN_DATA_STREAMS = def(9_112_0_00);\n+ public static final TransportVersion LOOKUP_JOIN_MANY_INDICES = def(9_113_0_00);", + "comment_created_at": "2025-07-02T15:47:44+00:00", + "comment_author": "alex-spies", + "comment_body": "I find this name confusing as well. Can we rename to `LOOKUP_JOIN_CCS` or so?", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2178772720", + "pr_number": 130411, + "pr_file": "server/src/main/java/org/elasticsearch/cluster/NodeWriteLoad.java", + "created_at": "2025-07-02T00:43:50+00:00", + "commented_code": "/*\n * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one\n * or more contributor license agreements. Licensed under the \"Elastic License\n * 2.0\", the \"GNU Affero General Public License v3.0 only\", and the \"Server Side\n * Public License v 1\"; you may not use this file except in compliance with, at\n * your election, the \"Elastic License 2.0\", the \"GNU Affero General Public\n * License v3.0 only\", or the \"Server Side Public License, v 1\".\n */\n\npackage org.elasticsearch.cluster;\n\nimport org.elasticsearch.common.io.stream.StreamInput;\nimport org.elasticsearch.common.io.stream.StreamOutput;\nimport org.elasticsearch.common.io.stream.Writeable;\n\nimport java.io.IOException;\n\n/**\n * Record representing an estimate of a node's write load. The estimation is based on the usage of the node's write thread pool.\n *\n * @param nodeId Node ID.\n * @param totalWriteThreadPoolThreads Total number of threads in the write thread pool.\n * @param percentWriteThreadPoolUtilization Percent of write thread pool threads that are in use, averaged over some period of time.\n * @param maxTaskTimeInWriteQueueMillis How long the oldest task (next to be run) in the write thread pool queue has been queued. Zero if\n * there is no write thread pool queue.\n */\npublic record NodeWriteLoad(\n String nodeId,\n int totalWriteThreadPoolThreads,\n int percentWriteThreadPoolUtilization,\n long maxTaskTimeInWriteQueueMillis", + "repo_full_name": "elastic/elasticsearch", + "discussion_comments": [ + { + "comment_id": "2178772720", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 130411, + "pr_file": "server/src/main/java/org/elasticsearch/cluster/NodeWriteLoad.java", + "discussion_id": "2178772720", + "commented_code": "@@ -0,0 +1,71 @@\n+/*\n+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one\n+ * or more contributor license agreements. Licensed under the \"Elastic License\n+ * 2.0\", the \"GNU Affero General Public License v3.0 only\", and the \"Server Side\n+ * Public License v 1\"; you may not use this file except in compliance with, at\n+ * your election, the \"Elastic License 2.0\", the \"GNU Affero General Public\n+ * License v3.0 only\", or the \"Server Side Public License, v 1\".\n+ */\n+\n+package org.elasticsearch.cluster;\n+\n+import org.elasticsearch.common.io.stream.StreamInput;\n+import org.elasticsearch.common.io.stream.StreamOutput;\n+import org.elasticsearch.common.io.stream.Writeable;\n+\n+import java.io.IOException;\n+\n+/**\n+ * Record representing an estimate of a node's write load. The estimation is based on the usage of the node's write thread pool.\n+ *\n+ * @param nodeId Node ID.\n+ * @param totalWriteThreadPoolThreads Total number of threads in the write thread pool.\n+ * @param percentWriteThreadPoolUtilization Percent of write thread pool threads that are in use, averaged over some period of time.\n+ * @param maxTaskTimeInWriteQueueMillis How long the oldest task (next to be run) in the write thread pool queue has been queued. Zero if\n+ * there is no write thread pool queue.\n+ */\n+public record NodeWriteLoad(\n+ String nodeId,\n+ int totalWriteThreadPoolThreads,\n+ int percentWriteThreadPoolUtilization,\n+ long maxTaskTimeInWriteQueueMillis", + "comment_created_at": "2025-07-02T00:43:50+00:00", + "comment_author": "nicktindall", + "comment_body": "I think we should indicate that these are averages (e.g. `averageWriteThreadPoolUtilization` and `averageWriteThreadPoolQueueLatency`)\r\n\r\nI used the term \"queue latency\" to describe the time spent waiting in the queue, I'm not married to that term specifically but I think we should try and be consistent (happy to agree on something else).", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2168477523", + "pr_number": 130042, + "pr_file": "server/src/main/java/org/elasticsearch/cluster/metadata/MetadataDataStreamsService.java", + "created_at": "2025-06-26T08:16:17+00:00", + "commented_code": "Priority.NORMAL,\n updateSettingsExecutor\n );\n ClusterStateTaskExecutor updateMappingsExecutor = new SimpleBatchedAckListenerTaskExecutor<>() {\n\n @Override\n public Tuple executeTask(\n UpdateMappingsTask updateMappingsTask,\n ClusterState clusterState\n ) throws Exception {\n DataStream dataStream = createDataStreamForUpdatedDataStreamMappings(\n updateMappingsTask.projectId,\n updateMappingsTask.dataStreamName,\n updateMappingsTask.mappingsOverrides,\n clusterState\n );\n ProjectMetadata projectMetadata = clusterState.metadata().getProject(updateMappingsTask.projectId);\n ProjectMetadata.Builder projectMetadataBuilder = ProjectMetadata.builder(projectMetadata);\n projectMetadataBuilder.removeDataStream(updateMappingsTask.dataStreamName);\n projectMetadataBuilder.put(dataStream);\n ClusterState updatedClusterState = ClusterState.builder(clusterState).putProjectMetadata(projectMetadataBuilder).build();\n return new Tuple<>(updatedClusterState, updateMappingsTask);\n }\n };\n this.updateMappingsTaskQueue = clusterService.createTaskQueue(\n \"update-data-stream-settings\",", + "repo_full_name": "elastic/elasticsearch", + "discussion_comments": [ + { + "comment_id": "2168477523", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 130042, + "pr_file": "server/src/main/java/org/elasticsearch/cluster/metadata/MetadataDataStreamsService.java", + "discussion_id": "2168477523", + "commented_code": "@@ -164,6 +166,32 @@ public Tuple executeTask(\n Priority.NORMAL,\n updateSettingsExecutor\n );\n+ ClusterStateTaskExecutor updateMappingsExecutor = new SimpleBatchedAckListenerTaskExecutor<>() {\n+\n+ @Override\n+ public Tuple executeTask(\n+ UpdateMappingsTask updateMappingsTask,\n+ ClusterState clusterState\n+ ) throws Exception {\n+ DataStream dataStream = createDataStreamForUpdatedDataStreamMappings(\n+ updateMappingsTask.projectId,\n+ updateMappingsTask.dataStreamName,\n+ updateMappingsTask.mappingsOverrides,\n+ clusterState\n+ );\n+ ProjectMetadata projectMetadata = clusterState.metadata().getProject(updateMappingsTask.projectId);\n+ ProjectMetadata.Builder projectMetadataBuilder = ProjectMetadata.builder(projectMetadata);\n+ projectMetadataBuilder.removeDataStream(updateMappingsTask.dataStreamName);\n+ projectMetadataBuilder.put(dataStream);\n+ ClusterState updatedClusterState = ClusterState.builder(clusterState).putProjectMetadata(projectMetadataBuilder).build();\n+ return new Tuple<>(updatedClusterState, updateMappingsTask);\n+ }\n+ };\n+ this.updateMappingsTaskQueue = clusterService.createTaskQueue(\n+ \"update-data-stream-settings\",", + "comment_created_at": "2025-06-26T08:16:17+00:00", + "comment_author": "Copilot", + "comment_body": "The task queue is named \"update-data-stream-settings\" but is used for mappings; it should be renamed to \"update-data-stream-mappings\" for clarity.\n```suggestion\n \"update-data-stream-mappings\",\n```", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2128503327", + "pr_number": 128930, + "pr_file": "x-pack/plugin/ilm/src/main/java/org/elasticsearch/xpack/ilm/IndexLifecycleService.java", + "created_at": "2025-06-05T10:23:45+00:00", + "commented_code": "return IndexLifecycleTransition.moveIndexToStep(index, project, newStepKey, nowSupplier, policyRegistry, true);\n }\n\n public ClusterState moveClusterStateToPreviouslyFailedStep(ClusterState currentState, String[] indices) {\n ClusterState newState = currentState;\n public ProjectMetadata moveClusterStateToPreviouslyFailedStep(ProjectMetadata currentProject, String[] indices) {", + "repo_full_name": "elastic/elasticsearch", + "discussion_comments": [ + { + "comment_id": "2128503327", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 128930, + "pr_file": "x-pack/plugin/ilm/src/main/java/org/elasticsearch/xpack/ilm/IndexLifecycleService.java", + "discussion_id": "2128503327", + "commented_code": "@@ -170,12 +170,18 @@ public ProjectMetadata moveIndexToStep(ProjectMetadata project, Index index, Ste\n return IndexLifecycleTransition.moveIndexToStep(index, project, newStepKey, nowSupplier, policyRegistry, true);\n }\n \n- public ClusterState moveClusterStateToPreviouslyFailedStep(ClusterState currentState, String[] indices) {\n- ClusterState newState = currentState;\n+ public ProjectMetadata moveClusterStateToPreviouslyFailedStep(ProjectMetadata currentProject, String[] indices) {", + "comment_created_at": "2025-06-05T10:23:45+00:00", + "comment_author": "PeteGillinElastic", + "comment_body": "Method naming time again! This definitely shouldn't have `ClusterState` in its name since it deals with `ProjectMetadata`. Arguably, it should be called `moveIndicesToPreviouslyFailedStep`, since it's really the state of the indices that it's acting on. WDYT?", + "pr_file_module": null + }, + { + "comment_id": "2129069796", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 128930, + "pr_file": "x-pack/plugin/ilm/src/main/java/org/elasticsearch/xpack/ilm/IndexLifecycleService.java", + "discussion_id": "2128503327", + "commented_code": "@@ -170,12 +170,18 @@ public ProjectMetadata moveIndexToStep(ProjectMetadata project, Index index, Ste\n return IndexLifecycleTransition.moveIndexToStep(index, project, newStepKey, nowSupplier, policyRegistry, true);\n }\n \n- public ClusterState moveClusterStateToPreviouslyFailedStep(ClusterState currentState, String[] indices) {\n- ClusterState newState = currentState;\n+ public ProjectMetadata moveClusterStateToPreviouslyFailedStep(ProjectMetadata currentProject, String[] indices) {", + "comment_created_at": "2025-06-05T14:59:59+00:00", + "comment_author": "nielsbauman", + "comment_body": "Ugh, you're absolutely right. I did it for one of these methods, but forgot to do the others. Thanks for raising it.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2128506436", + "pr_number": 128930, + "pr_file": "x-pack/plugin/ilm/src/main/java/org/elasticsearch/xpack/ilm/IndexLifecycleTransition.java", + "created_at": "2025-06-05T10:24:50+00:00", + "commented_code": ");\n }\n\n return LifecycleExecutionStateUtils.newClusterStateWithLifecycleState(clusterState, idxMeta.getIndex(), failedState.build());\n return project.withLifecycleState(idxMeta.getIndex(), failedState.build());\n }\n\n /**\n * Move the given index's execution state back to a step that had previously failed. If this is\n * an automatic retry ({@code isAutomaticRetry}), the retry count is incremented.\n */\n static ClusterState moveClusterStateToPreviouslyFailedStep(\n ClusterState currentState,\n static ProjectMetadata moveClusterStateToPreviouslyFailedStep(", + "repo_full_name": "elastic/elasticsearch", + "discussion_comments": [ + { + "comment_id": "2128506436", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 128930, + "pr_file": "x-pack/plugin/ilm/src/main/java/org/elasticsearch/xpack/ilm/IndexLifecycleTransition.java", + "discussion_id": "2128506436", + "commented_code": "@@ -204,22 +201,21 @@ static ClusterState moveClusterStateToErrorStep(\n );\n }\n \n- return LifecycleExecutionStateUtils.newClusterStateWithLifecycleState(clusterState, idxMeta.getIndex(), failedState.build());\n+ return project.withLifecycleState(idxMeta.getIndex(), failedState.build());\n }\n \n /**\n * Move the given index's execution state back to a step that had previously failed. If this is\n * an automatic retry ({@code isAutomaticRetry}), the retry count is incremented.\n */\n- static ClusterState moveClusterStateToPreviouslyFailedStep(\n- ClusterState currentState,\n+ static ProjectMetadata moveClusterStateToPreviouslyFailedStep(", + "comment_created_at": "2025-06-05T10:24:50+00:00", + "comment_author": "PeteGillinElastic", + "comment_body": "Method naming again... Should this one be `moveIndexToPreviouslyFailedStep`, like you've done with `moveIndexToErrorStep` above?", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2027497237", + "pr_number": 122491, + "pr_file": "modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/CefProcessorTests.java", + "created_at": "2025-04-03T17:57:14+00:00", + "commented_code": "/*\n * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one\n * or more contributor license agreements. Licensed under the \"Elastic License\n * 2.0\", the \"GNU Affero General Public License v3.0 only\", and the \"Server Side\n * Public License v 1\"; you may not use this file except in compliance with, at\n * your election, the \"Elastic License 2.0\", the \"GNU Affero General Public\n * License v3.0 only\", or the \"Server Side Public License, v 1\".\n */\n\npackage org.elasticsearch.ingest.common;\n\nimport org.elasticsearch.ingest.IngestDocument;\nimport org.elasticsearch.test.ESTestCase;\nimport org.junit.Before;\n\nimport java.time.Instant;\nimport java.time.ZoneId;\nimport java.time.ZonedDateTime;\nimport java.util.HashMap;\nimport java.util.List;\nimport java.util.Map;\nimport java.util.Set;\n\nimport static java.util.Map.entry;\nimport static org.hamcrest.Matchers.containsInAnyOrder;\nimport static org.hamcrest.Matchers.equalTo;\n\npublic class CefProcessorTests extends ESTestCase {\n\n private IngestDocument document;\n\n @Before\n public void setUp() throws Exception {\n super.setUp();\n }\n\n public void testExecute() {\n Map source = new HashMap<>();\n String message = \"CEF:0|Elastic|Vaporware|1.0.0-alpha|18|Web request|low|eventId=3457 requestMethod=POST \"\n + \"slat=38.915 slong=-77.511 proto=TCP sourceServiceName=httpd requestContext=https://www.google.com \"\n + \"src=89.160.20.156 spt=33876 dst=192.168.10.1 dpt=443 request=https://www.example.com/cart\";\n source.put(\"message\", message);\n document = new IngestDocument(\"index\", \"id\", 1L, null, null, source);\n CefProcessor processor = new CefProcessor(\"tag\", \"description\", \"message\", \"cef\", false, true, null);\n processor.execute(document);\n assertMapsEqual(\n document.getSource(),\n Map.ofEntries(\n entry(\n \"cef\",\n Map.ofEntries(\n entry(\"version\", \"0\"),\n entry(\n \"device\",\n Map.of(\"vendor\", \"Elastic\", \"product\", \"Vaporware\", \"version\", \"1.0.0-alpha\", \"event_class_id\", \"18\")\n ),\n entry(\"name\", \"Web request\"),\n entry(\"severity\", \"low\")\n )\n ),\n entry(\"observer\", Map.of(\"product\", \"Vaporware\", \"vendor\", \"Elastic\", \"version\", \"1.0.0-alpha\")),\n entry(\"event\", Map.of(\"id\", \"3457\", \"code\", \"18\")),\n entry(\n \"source\",\n Map.ofEntries(\n entry(\"ip\", \"89.160.20.156\"),\n entry(\"port\", 33876),\n entry(\"geo\", Map.of(\"location\", Map.of(\"lon\", -77.511, \"lat\", 38.915))),\n entry(\"service\", Map.of(\"name\", \"httpd\"))\n )\n ),\n entry(\"destination\", Map.of(\"ip\", \"192.168.10.1\", \"port\", 443)),\n entry(\"http\", Map.of(\"request\", Map.of(\"method\", \"POST\", \"referrer\", \"https://www.google.com\"))),\n entry(\"network\", Map.of(\"transport\", \"TCP\")),\n entry(\"url\", Map.of(\"original\", \"https://www.example.com/cart\")),\n entry(\"message\", message)\n )\n );\n }\n\n public void testInvalidCefFormat() {\n Map invalidSource = new HashMap<>();\n invalidSource.put(\"message\", \"Invalid CEF message\");\n IngestDocument invalidIngestDocument = new IngestDocument(\"index\", \"id\", 1L, null, null, invalidSource);\n CefProcessor processor = new CefProcessor(\"tag\", \"description\", \"message\", \"cef\", false, true, null);\n expectThrows(IllegalArgumentException.class, () -> processor.execute(invalidIngestDocument));\n }\n\n public void testStandardMessage() {\n String message = \"CEF:26|security|threatmanager|1.0|100|trojan successfully stopped|10|\"\n + \"src=10.0.0.192 dst=12.121.122.82 spt=1232 eventId=1 in=4294967296 out=4294967296\";\n Map source = new HashMap<>();\n source.put(\"message\", message);\n document = new IngestDocument(\"index\", \"id\", 1L, null, null, source);\n CefProcessor processor = new CefProcessor(\"tag\", \"description\", \"message\", \"cef\", false, true, null);\n processor.execute(document);\n assertMapsEqual(\n document.getSource(),\n Map.ofEntries(\n entry(\n \"cef\",\n Map.ofEntries(\n entry(\"version\", \"26\"),\n entry(\n \"device\",\n Map.of(\"vendor\", \"security\", \"product\", \"threatmanager\", \"version\", \"1.0\", \"event_class_id\", \"100\")\n ),\n entry(\"name\", \"trojan successfully stopped\"),\n entry(\"severity\", \"10\")\n )\n ),\n entry(\"observer\", Map.of(\"product\", \"threatmanager\", \"vendor\", \"security\", \"version\", \"1.0\")),\n entry(\"source\", Map.of(\"ip\", \"10.0.0.192\", \"port\", 1232, \"bytes\", 4294967296L)),\n entry(\"destination\", Map.of(\"ip\", \"12.121.122.82\", \"bytes\", 4294967296L)),\n entry(\"event\", Map.of(\"id\", \"1\", \"code\", \"100\")),\n entry(\"message\", message)\n )\n );\n }\n\n public void testHeaderOnly() {\n String message = \"CEF:26|security|threatmanager|1.0|100|trojan successfully stopped|10|\";\n Map source = new HashMap<>();\n source.put(\"message\", message);\n document = new IngestDocument(\"index\", \"id\", 1L, null, null, source);\n CefProcessor processor = new CefProcessor(\"tag\", \"description\", \"message\", \"cef\", false, true, null);\n processor.execute(document);\n assertMapsEqual(\n document.getSource(),\n Map.ofEntries(\n entry(\n \"cef\",\n Map.ofEntries(\n entry(\"version\", \"26\"),\n entry(\n \"device\",\n Map.of(\"vendor\", \"security\", \"product\", \"threatmanager\", \"version\", \"1.0\", \"event_class_id\", \"100\")\n ),\n entry(\"name\", \"trojan successfully stopped\"),\n entry(\"severity\", \"10\")\n )\n ),\n entry(\"event\", Map.of(\"code\", \"100\")),\n entry(\"observer\", Map.of(\"product\", \"threatmanager\", \"vendor\", \"security\", \"version\", \"1.0\")),\n entry(\"message\", message)\n )\n );\n }\n\n public void testEmptyDeviceFields() {\n String message = \"CEF:0|||1.0|100|trojan successfully stopped|10|src=10.0.0.192 dst=12.121.122.82 spt=1232\";\n Map source = new HashMap<>();\n source.put(\"message\", message);\n document = new IngestDocument(\"index\", \"id\", 1L, null, null, source);\n CefProcessor processor = new CefProcessor(\"tag\", \"description\", \"message\", \"cef\", false, true, null);\n processor.execute(document);\n assertMapsEqual(\n document.getSource(),\n Map.ofEntries(\n entry(\n \"cef\",\n Map.ofEntries(\n entry(\"version\", \"0\"),\n entry(\"device\", Map.of(\"vendor\", \"\", \"product\", \"\", \"version\", \"1.0\", \"event_class_id\", \"100\")),\n entry(\"name\", \"trojan successfully stopped\"),\n entry(\"severity\", \"10\")\n )\n ),\n entry(\"event\", Map.of(\"code\", \"100\")),\n entry(\"observer\", Map.of(\"product\", \"\", \"vendor\", \"\", \"version\", \"1.0\")),\n entry(\"source\", Map.of(\"ip\", \"10.0.0.192\", \"port\", 1232)),\n entry(\"destination\", Map.of(\"ip\", \"12.121.122.82\")),\n entry(\"message\", message)\n )\n );\n }\n\n public void testEscapedPipeInHeader() {\n String message = \"CEF:26|security|threat\\\\|->manager|1.0|100|\"\n + \"trojan successfully stopped|10|src=10.0.0.192 dst=12.121.122.82 spt=1232\";\n Map source = new HashMap<>();\n source.put(\"message\", message);\n document = new IngestDocument(\"index\", \"id\", 1L, null, null, source);\n CefProcessor processor = new CefProcessor(\"tag\", \"description\", \"message\", \"cef\", false, true, null);\n processor.execute(document);\n assertMapsEqual(\n document.getSource(),\n Map.ofEntries(\n entry(\n \"cef\",\n Map.ofEntries(\n entry(\"version\", \"26\"),\n entry(\n \"device\",\n Map.of(\"vendor\", \"security\", \"product\", \"threat|->manager\", \"version\", \"1.0\", \"event_class_id\", \"100\")\n ),\n entry(\"name\", \"trojan successfully stopped\"),\n entry(\"severity\", \"10\")\n )\n ),\n entry(\"event\", Map.of(\"code\", \"100\")),\n entry(\"observer\", Map.of(\"product\", \"threat|->manager\", \"vendor\", \"security\", \"version\", \"1.0\")),\n entry(\"source\", Map.of(\"ip\", \"10.0.0.192\", \"port\", 1232)),\n entry(\"destination\", Map.of(\"ip\", \"12.121.122.82\")),\n entry(\"message\", message)\n )\n );\n }\n\n public void testEqualsSignInHeader() {\n String message = \"CEF:26|security|threat=manager|1.0|100|trojan successfully stopped|10|src=10.0.0.192 dst=12.121.122.82 spt=1232\";\n Map source = new HashMap<>();\n source.put(\"message\", message);\n document = new IngestDocument(\"index\", \"id\", 1L, null, null, source);\n CefProcessor processor = new CefProcessor(\"tag\", \"description\", \"message\", \"cef\", false, true, null);\n processor.execute(document);\n assertMapsEqual(\n document.getSource(),\n Map.ofEntries(\n entry(\n \"cef\",\n Map.ofEntries(\n entry(\"version\", \"26\"),\n entry(\n \"device\",\n Map.of(\"vendor\", \"security\", \"product\", \"threat=manager\", \"version\", \"1.0\", \"event_class_id\", \"100\")\n ),\n entry(\"name\", \"trojan successfully stopped\"),\n entry(\"severity\", \"10\")\n )\n ),\n entry(\"event\", Map.of(\"code\", \"100\")),\n entry(\"observer\", Map.of(\"product\", \"threat=manager\", \"vendor\", \"security\", \"version\", \"1.0\")),\n entry(\"source\", Map.of(\"ip\", \"10.0.0.192\", \"port\", 1232)),\n entry(\"destination\", Map.of(\"ip\", \"12.121.122.82\")),\n entry(\"message\", message)\n )\n );\n }\n\n public void testEmptyExtensionValue() {\n String message = \"CEF:26|security|threatmanager|1.0|100|trojan successfully stopped|10|src=10.0.0.192 dst= spt=1232\";\n Map source = new HashMap<>();\n source.put(\"message\", message);\n document = new IngestDocument(\"index\", \"id\", 1L, null, null, source);\n CefProcessor processor = new CefProcessor(\"tag\", \"description\", \"message\", \"cef\", false, true, null);\n processor.execute(document);\n assertMapsEqual(\n document.getSource(),\n Map.ofEntries(\n entry(\n \"cef\",\n Map.ofEntries(\n entry(\"version\", \"26\"),\n entry(\n \"device\",\n Map.of(\"vendor\", \"security\", \"product\", \"threatmanager\", \"version\", \"1.0\", \"event_class_id\", \"100\")\n ),\n entry(\"name\", \"trojan successfully stopped\"),\n entry(\"severity\", \"10\")\n )\n ),\n entry(\"event\", Map.of(\"code\", \"100\")),\n entry(\"observer\", Map.of(\"product\", \"threatmanager\", \"vendor\", \"security\", \"version\", \"1.0\")),\n entry(\"source\", Map.of(\"ip\", \"10.0.0.192\", \"port\", 1232)),\n entry(\"message\", message)\n )\n );\n }\n\n public void testLeadingWhitespace() {\n String message = \"CEF:0|security|threatmanager|1.0|100|trojan successfully stopped|10| src=10.0.0.192 dst=12.121.122.82 spt=1232\";\n Map source = new HashMap<>();\n source.put(\"message\", message);\n document = new IngestDocument(\"index\", \"id\", 1L, null, null, source);\n CefProcessor processor = new CefProcessor(\"tag\", \"description\", \"message\", \"cef\", false, true, null);\n processor.execute(document);\n assertMapsEqual(\n document.getSource(),\n Map.ofEntries(\n entry(\n \"cef\",\n Map.ofEntries(\n entry(\"version\", \"0\"),\n entry(\n \"device\",\n Map.of(\"vendor\", \"security\", \"product\", \"threatmanager\", \"version\", \"1.0\", \"event_class_id\", \"100\")\n ),\n entry(\"name\", \"trojan successfully stopped\"),\n entry(\"severity\", \"10\")\n )\n ),\n entry(\"event\", Map.of(\"code\", \"100\")),\n entry(\"observer\", Map.of(\"product\", \"threatmanager\", \"vendor\", \"security\", \"version\", \"1.0\")),\n entry(\"source\", Map.of(\"ip\", \"10.0.0.192\", \"port\", 1232)),\n entry(\"destination\", Map.of(\"ip\", \"12.121.122.82\")),\n entry(\"message\", message)\n )\n );\n }\n\n public void testEscapedPipeInExtension() {\n String message = \"CEF:0|security|threatmanager|1.0|100|trojan successfully stopped|10|moo=this\\\\|has an escaped pipe\";\n Map source = new HashMap<>();\n source.put(\"message\", message);\n document = new IngestDocument(\"index\", \"id\", 1L, null, null, source);\n CefProcessor processor = new CefProcessor(\"tag\", \"description\", \"message\", \"cef\", false, true, null);\n expectThrows(IllegalArgumentException.class, () -> processor.execute(document));\n }\n\n public void testPipeInMessage() {\n String message = \"CEF:0|security|threatmanager|1.0|100|trojan successfully stopped|10|moo=this|has a pipe\";\n Map source = new HashMap<>();\n source.put(\"message\", message);\n document = new IngestDocument(\"index\", \"id\", 1L, null, null, source);\n CefProcessor processor = new CefProcessor(\"tag\", \"description\", \"message\", \"cef\", false, true, null);\n processor.execute(document);\n assertMapsEqual(\n document.getSource(),\n Map.ofEntries(\n entry(\n \"cef\",\n Map.ofEntries(\n entry(\"version\", \"0\"),\n entry(\n \"device\",\n Map.of(\"vendor\", \"security\", \"product\", \"threatmanager\", \"version\", \"1.0\", \"event_class_id\", \"100\")\n ),\n entry(\"name\", \"trojan successfully stopped\"),\n entry(\"severity\", \"10\"),\n entry(\"extensions\", Map.of(\"moo\", \"this|has a pipe\"))\n )\n ),\n entry(\"event\", Map.of(\"code\", \"100\")),\n entry(\"observer\", Map.of(\"product\", \"threatmanager\", \"vendor\", \"security\", \"version\", \"1.0\")),\n entry(\"message\", message)\n )\n );\n }\n\n public void testEqualsInMessage() {\n String message =\n \"CEF:0|security|threatmanager|1.0|100|trojan successfully stopped|10|moo=this =has = equals\\\\= dst=12.121.122.82 spt=1232\";\n Map source = new HashMap<>();\n source.put(\"message\", message);\n document = new IngestDocument(\"index\", \"id\", 1L, null, null, source);\n CefProcessor processor = new CefProcessor(\"tag\", \"description\", \"message\", \"cef\", false, true, null);\n processor.execute(document);\n assertMapsEqual(\n document.getSource(),\n Map.ofEntries(\n entry(\n \"cef\",\n Map.ofEntries(\n entry(\"version\", \"0\"),\n entry(\n \"device\",\n Map.of(\"vendor\", \"security\", \"product\", \"threatmanager\", \"version\", \"1.0\", \"event_class_id\", \"100\")\n ),\n entry(\"name\", \"trojan successfully stopped\"),\n entry(\"severity\", \"10\"),\n entry(\"extensions\", Map.of(\"moo\", \"this =has = equals=\"))\n )\n ),\n entry(\"event\", Map.of(\"code\", \"100\")),\n entry(\"observer\", Map.of(\"product\", \"threatmanager\", \"vendor\", \"security\", \"version\", \"1.0\")),\n entry(\"destination\", Map.of(\"ip\", \"12.121.122.82\")),\n entry(\"source\", Map.of(\"port\", 1232)),\n entry(\"message\", message)\n )\n );\n }\n\n public void testEscapesInExtension() {\n String message = \"CEF:0|security|threatmanager|1.0|100|trojan successfully stopped|10|msg=a+b\\\\=c x=c\\\\\\\\d\\\\=z\";\n Map source = new HashMap<>();\n source.put(\"message\", message);\n document = new IngestDocument(\"index\", \"id\", 1L, null, null, source);\n CefProcessor processor = new CefProcessor(\"tag\", \"description\", \"message\", \"cef\", false, true, null);\n processor.execute(document);\n assertMapsEqual(\n document.getSource(),\n Map.ofEntries(\n entry(\n \"cef\",\n Map.ofEntries(\n entry(\"version\", \"0\"),\n entry(\n \"device\",\n Map.of(\"vendor\", \"security\", \"product\", \"threatmanager\", \"version\", \"1.0\", \"event_class_id\", \"100\")\n ),\n entry(\"name\", \"trojan successfully stopped\"),\n entry(\"severity\", \"10\"),\n entry(\"extensions\", Map.of(\"x\", \"c\\\\d=z\"))\n )\n ),\n entry(\"event\", Map.of(\"code\", \"100\")),\n entry(\"observer\", Map.of(\"product\", \"threatmanager\", \"vendor\", \"security\", \"version\", \"1.0\")),\n entry(\"message\", \"a+b=c\")\n )\n );\n }\n\n public void testMalformedExtensionEscape() {\n String message = \"CEF:0|FooBar|Web Gateway|1.2.3.45.67|200|Success|2|rt=Sep 07 2018 14:50:39 cat=Access Log dst=1.1.1.1 \"\n + \"dhost=foo.example.com suser=redacted src=2.2.2.2 requestMethod=POST request='https://foo.example.com/bar/bingo/1' \"\n + \"requestClientApplication='Foo-Bar/2018.1.7; =Email:user@example.com; Guid:test=' cs1= cs1Label=Foo Bar\";\n Map source = new HashMap<>();\n source.put(\"message\", message);\n document = new IngestDocument(\"index\", \"id\", 1L, null, null, source);\n CefProcessor processor = new CefProcessor(\"tag\", \"description\", \"message\", \"cef\", false, true, null);\n processor.execute(document);\n assertMapsEqual(\n document.getSource(),\n Map.ofEntries(\n entry(\n \"cef\",\n Map.ofEntries(\n entry(\"version\", \"0\"),\n entry(\n \"device\",\n Map.of(\"vendor\", \"FooBar\", \"product\", \"Web Gateway\", \"version\", \"1.2.3.45.67\", \"event_class_id\", \"200\")\n ),\n entry(\"name\", \"Success\"),\n entry(\"severity\", \"2\"),\n entry(\"extensions\", Map.of(\"deviceCustomString1Label\", \"Foo Bar\", \"deviceEventCategory\", \"Access Log\"))\n )\n ),\n entry(\"event\", Map.of(\"code\", \"200\")),\n entry(\"observer\", Map.of(\"product\", \"Web Gateway\", \"vendor\", \"FooBar\", \"version\", \"1.2.3.45.67\")),\n entry(\"@timestamp\", ZonedDateTime.parse(\"2018-09-07T14:50:39Z\")),\n entry(\"destination\", Map.of(\"ip\", \"1.1.1.1\", \"domain\", \"foo.example.com\")),\n entry(\"source\", Map.of(\"ip\", \"2.2.2.2\", \"user\", Map.of(\"name\", \"redacted\"))),\n entry(\"http\", Map.of(\"request\", Map.of(\"method\", \"POST\"))),\n entry(\"url\", Map.of(\"original\", \"'https://foo.example.com/bar/bingo/1'\")),\n entry(\"user_agent\", Map.of(\"original\", \"'Foo-Bar/2018.1.7; =Email:user@example.com; Guid:test='\")),\n entry(\"message\", message)\n )\n );\n }\n\n public void testMultipleMalformedExtensionValues() {\n String message = \"CEF:0|vendor|product|version|event_id|name|Very-High| \"\n + \"msg=Hello World error=Failed because id==old_id user=root angle=106.7<=180\";\n Map source = new HashMap<>();\n source.put(\"message\", message);\n document = new IngestDocument(\"index\", \"id\", 1L, null, null, source);\n CefProcessor processor = new CefProcessor(\"tag\", \"description\", \"message\", \"cef\", false, true, null);\n processor.execute(document);\n assertMapsEqual(\n document.getSource(),\n Map.ofEntries(\n entry(\n \"cef\",\n Map.ofEntries(\n entry(\"version\", \"0\"),\n entry(\n \"device\",\n Map.of(\"vendor\", \"vendor\", \"product\", \"product\", \"version\", \"version\", \"event_class_id\", \"event_id\")\n ),\n entry(\"name\", \"name\"),\n entry(\"severity\", \"Very-High\"),\n entry(\"extensions\", Map.of(\"id\", \"=old_id\", \"user\", \"root\", \"angle\", \"106.7<=180\", \"error\", \"Failed because\"))\n )\n ),\n entry(\"event\", Map.of(\"code\", \"event_id\")),\n entry(\"observer\", Map.of(\"product\", \"product\", \"vendor\", \"vendor\", \"version\", \"version\")),\n entry(\"message\", \"Hello World\")\n )\n );\n }\n\n public void testPaddedMessage() {\n String message = \"CEF:0|security|threatmanager|1.0|100|message is padded|10|spt=1232 \"\n + \"msg=Trailing space in non-final extensions is preserved src=10.0.0.192 \";\n Map source = new HashMap<>();\n source.put(\"message\", message);\n document = new IngestDocument(\"index\", \"id\", 1L, null, null, source);\n CefProcessor processor = new CefProcessor(\"tag\", \"description\", \"message\", \"cef\", false, true, null);\n processor.execute(document);\n assertMapsEqual(\n document.getSource(),\n Map.ofEntries(\n entry(\n \"cef\",\n Map.ofEntries(\n entry(\"version\", \"0\"),\n entry(\n \"device\",\n Map.of(\"vendor\", \"security\", \"product\", \"threatmanager\", \"version\", \"1.0\", \"event_class_id\", \"100\")\n ),\n entry(\"name\", \"message is padded\"),\n entry(\"severity\", \"10\")\n )\n ),\n entry(\"event\", Map.of(\"code\", \"100\")),\n entry(\"observer\", Map.of(\"product\", \"threatmanager\", \"vendor\", \"security\", \"version\", \"1.0\")),\n entry(\"source\", Map.of(\"ip\", \"10.0.0.192\", \"port\", 1232)),\n entry(\"message\", \"Trailing space in non-final extensions is preserved\")\n )\n );\n }\n\n public void testCrlfMessage() {\n String message = \"CEF:0|security|threatmanager|1.0|100|message is padded|10|\"\n + \"spt=1232 msg=Trailing space in final extensions is not preserved\\t \\r\\ndpt=1234\";\n Map source = new HashMap<>();\n source.put(\"message\", message);\n document = new IngestDocument(\"index\", \"id\", 1L, null, null, source);\n CefProcessor processor = new CefProcessor(\"tag\", \"description\", \"message\", \"cef\", false, true, null);\n processor.execute(document);\n assertMapsEqual(\n document.getSource(),\n Map.ofEntries(\n entry(\n \"cef\",\n Map.ofEntries(\n entry(\"version\", \"0\"),\n entry(\n \"device\",\n Map.of(\"vendor\", \"security\", \"product\", \"threatmanager\", \"version\", \"1.0\", \"event_class_id\", \"100\")\n ),\n entry(\"name\", \"message is padded\"),\n entry(\"severity\", \"10\")\n )\n ),\n entry(\"event\", Map.of(\"code\", \"100\")),\n entry(\"observer\", Map.of(\"product\", \"threatmanager\", \"vendor\", \"security\", \"version\", \"1.0\")),\n entry(\"source\", Map.of(\"port\", 1232)),\n entry(\"message\", \"Trailing space in final extensions is not preserved\"),\n entry(\"destination\", Map.of(\"port\", 1234))\n )\n );\n }\n\n public void testTabMessage() {\n String message = \"CEF:0|security|threatmanager|1.0|100|message is padded|10|\"\n + \"spt=1232 msg=Tabs\\tand\\rcontrol\\ncharacters are preserved\\t src=127.0.0.1\";\n Map source = new HashMap<>();\n source.put(\"message\", message);\n document = new IngestDocument(\"index\", \"id\", 1L, null, null, source);\n CefProcessor processor = new CefProcessor(\"tag\", \"description\", \"message\", \"cef\", false, true, null);\n processor.execute(document);\n assertMapsEqual(\n document.getSource(),\n Map.ofEntries(\n entry(\n \"cef\",\n Map.ofEntries(\n entry(\"version\", \"0\"),\n entry(\n \"device\",\n Map.of(\"vendor\", \"security\", \"product\", \"threatmanager\", \"version\", \"1.0\", \"event_class_id\", \"100\")\n ),\n entry(\"name\", \"message is padded\"),\n entry(\"severity\", \"10\")\n )\n ),\n entry(\"event\", Map.of(\"code\", \"100\")),\n entry(\"observer\", Map.of(\"product\", \"threatmanager\", \"vendor\", \"security\", \"version\", \"1.0\")),\n entry(\"source\", Map.of(\"port\", 1232, \"ip\", \"127.0.0.1\")),\n entry(\"message\", \"Tabs\\tand\\rcontrol\\ncharacters are preserved\")\n )\n );\n }\n\n public void testTabNoSepMessage() {\n String message = \"CEF:0|security|threatmanager|1.0|100|message has tabs|10|spt=1232 msg=Tab is not a separator\\tsrc=127.0.0.1\";\n Map source = new HashMap<>();\n source.put(\"message\", message);\n document = new IngestDocument(\"index\", \"id\", 1L, null, null, source);\n CefProcessor processor = new CefProcessor(\"tag\", \"description\", \"message\", \"cef\", false, true, null);\n processor.execute(document);\n assertMapsEqual(\n document.getSource(),\n Map.ofEntries(\n entry(\n \"cef\",\n Map.ofEntries(\n entry(\"version\", \"0\"),\n entry(\n \"device\",\n Map.of(\"vendor\", \"security\", \"product\", \"threatmanager\", \"version\", \"1.0\", \"event_class_id\", \"100\")\n ),\n entry(\"name\", \"message has tabs\"),\n entry(\"severity\", \"10\")\n )\n ),\n entry(\"event\", Map.of(\"code\", \"100\")),\n entry(\"observer\", Map.of(\"product\", \"threatmanager\", \"vendor\", \"security\", \"version\", \"1.0\")),\n entry(\"source\", Map.of(\"port\", 1232, \"ip\", \"127.0.0.1\")),\n entry(\"message\", \"Tab is not a separator\")\n )\n );\n }\n\n public void testEscapedMessage() {\n String message = \"CEF:0|security\\\\compliance|threat\\\\|->manager|1.0|100|message contains escapes|10|\"\n + \"spt=1232 msg=Newlines in messages\\\\\\nare allowed.\\\\\\r\\\\\\nAnd so are carriage feeds\\\\\\\\newlines\\\\\\\\\\\\=. dpt=4432\";\n Map source = new HashMap<>();\n source.put(\"message\", message);\n document = new IngestDocument(\"index\", \"id\", 1L, null, null, source);\n CefProcessor processor = new CefProcessor(\"tag\", \"description\", \"message\", \"cef\", false, true, null);\n processor.execute(document);\n assertMapsEqual(\n document.getSource(),\n Map.ofEntries(\n entry(\n \"cef\",\n Map.ofEntries(\n entry(\"version\", \"0\"),\n entry(\n \"device\",\n Map.ofEntries(\n entry(\"vendor\", \"security\\\\compliance\"),\n entry(\"product\", \"threat|->manager\"),\n entry(\"version\", \"1.0\"),\n entry(\"event_class_id\", \"100\")\n )\n ),\n entry(\"name\", \"message contains escapes\"),\n entry(\"severity\", \"10\")\n )\n ),\n entry(\"event\", Map.of(\"code\", \"100\")),\n entry(\"observer\", Map.of(\"product\", \"threat|->manager\", \"vendor\", \"security\\\\compliance\", \"version\", \"1.0\")),\n entry(\"source\", Map.of(\"port\", 1232)),\n entry(\"message\", \"Newlines in messages\\nare allowed.\\r\\nAnd so are carriage feeds\\\\newlines\\\\=.\"),\n entry(\"destination\", Map.of(\"port\", 4432))\n )\n );\n }\n\n public void testTruncatedHeader() {\n String message = \"CEF:0|SentinelOne|Mgmt|activityID=1111111111111111111 activityType=3505 \"\n + \"siteId=None siteName=None accountId=1222222222222222222 accountName=foo-bar mdr notificationScope=ACCOUNT\";\n Map source = new HashMap<>();\n source.put(\"message\", message);\n document = new IngestDocument(\"index\", \"id\", 1L, null, null, source);\n CefProcessor processor = new CefProcessor(\"tag\", \"description\", \"message\", \"cef\", false, true, null);\n processor.execute(document);\n assertMapsEqual(\n document.getSource(),\n Map.ofEntries(\n entry(\n \"cef\",\n Map.ofEntries(\n entry(\"version\", \"0\"),\n entry(\"device\", Map.of(\"vendor\", \"SentinelOne\", \"product\", \"Mgmt\")),\n entry(\n \"extensions\",\n Map.ofEntries(\n entry(\"activityID\", \"1111111111111111111\"),\n entry(\"activityType\", \"3505\"),\n entry(\"siteId\", \"None\"),\n entry(\"siteName\", \"None\"),\n entry(\"accountId\", \"1222222222222222222\"),\n entry(\"accountName\", \"foo-bar mdr\"),\n entry(\"notificationScope\", \"ACCOUNT\")\n )\n )\n )\n ),\n entry(\"observer\", Map.of(\"product\", \"Mgmt\", \"vendor\", \"SentinelOne\")),\n entry(\"message\", message),\n entry(\"error\", Map.of(\"message\", Set.of(\"incomplete CEF header\")))\n )\n );\n }\n\n public void testIgnoreEmptyValuesInExtension() {\n String message = \"CEF:26|security|threat=manager|1.0|100|trojan successfully stopped|10|src= dst=12.121.122.82 spt=\";\n Map source = new HashMap<>();\n source.put(\"message\", message);\n document = new IngestDocument(\"index\", \"id\", 1L, null, null, source);\n CefProcessor processor = new CefProcessor(\"tag\", \"description\", \"message\", \"cef\", false, true, null);\n processor.execute(document);\n assertMapsEqual(\n document.getSource(),\n Map.ofEntries(\n entry(\n \"cef\",\n Map.ofEntries(\n entry(\"version\", \"26\"),\n entry(\n \"device\",\n Map.of(\"vendor\", \"security\", \"product\", \"threat=manager\", \"version\", \"1.0\", \"event_class_id\", \"100\")\n ),\n entry(\"name\", \"trojan successfully stopped\"),\n entry(\"severity\", \"10\")\n )\n ),\n entry(\"event\", Map.of(\"code\", \"100\")),\n entry(\"observer\", Map.of(\"product\", \"threat=manager\", \"vendor\", \"security\", \"version\", \"1.0\")),\n entry(\"destination\", Map.of(\"ip\", \"12.121.122.82\")),\n entry(\"message\", message)\n )\n );\n }\n\n public void testHyphenInExtensionKey() {\n String message = \"CEF:26|security|threatmanager|1.0|100|trojan successfully stopped|10|Some-Key=123456\";\n Map source = new HashMap<>();\n source.put(\"message\", message);\n document = new IngestDocument(\"index\", \"id\", 1L, null, null, source);\n CefProcessor processor = new CefProcessor(\"tag\", \"description\", \"message\", \"cef\", false, true, null);\n processor.execute(document);\n assertMapsEqual(\n document.getSource(),\n Map.ofEntries(\n entry(\n \"cef\",\n Map.ofEntries(\n entry(\"version\", \"26\"),\n entry(\n \"device\",\n Map.of(\"vendor\", \"security\", \"product\", \"threatmanager\", \"version\", \"1.0\", \"event_class_id\", \"100\")\n ),\n entry(\"name\", \"trojan successfully stopped\"),\n entry(\"severity\", \"10\"),\n entry(\"extensions\", Map.of(\"Some-Key\", \"123456\"))\n )\n ),\n entry(\"event\", Map.of(\"code\", \"100\")),\n entry(\"observer\", Map.of(\"product\", \"threatmanager\", \"vendor\", \"security\", \"version\", \"1.0\")),\n entry(\"message\", message)\n )\n );\n }\n\n public void testAllFieldsInExtension() {\n String message = \"CEF:0|security|threatmanager|1.0|100|trojan successfully stopped|10|\"\n + \"agt=192.168.0.1 agentDnsDomain=example.com ahost=agentHost aid=agentId amac=00:0a:95:9d:68:16 agentNtDomain=example.org \"\n + \"art=1622547800000 atz=UTC agentTranslatedAddress=10.0.0.1 agentTranslatedZoneExternalID=ext123 agentTranslatedZoneURI=uri \"\n + \"at=agentType av=1.0 agentZoneExternalID=zoneExtId agentZoneURI=zoneUri app=HTTP cnt=1234 in=5678 out=91011 \"\n + \"customerExternalID=custExtId customerURI=custUri dst=192.168.0.2 dlat=37.7749 dlong=-122.4194 \"\n + \"dhost=destHost dmac=00:0a:95:9d:68:16 dntdom=destNtDomain dpt=80 dpid=1234 \"\n + \"dproc=destProc destinationServiceName=destService \"\n + \"destinationTranslatedAddress=10.0.0.2 destinationTranslatedPort=8080 destinationTranslatedZoneExternalID=destExtId \"\n + \"destinationTranslatedZoneURI=destUri duid=destUserId duser=destUser dpriv=admin destinationZoneExternalID=destZoneExtId \"\n + \"destinationZoneURI=destZoneUri act=blocked dvc=192.168.0.3 cfp1Label=cfp1Label cfp3Label=cfp3Label cfp4Label=cfp4Label \"\n + \"deviceCustomDate1=1622547800000 deviceCustomDate1Label=customDate1Label deviceCustomDate2=1622547900000 \"\n + \"deviceCustomDate2Label=customDate2Label cfp1=1.23 cfp2=2.34 cfp2Label=cfp2Label cfp3=3.45 cfp4=4.56 c6a1=2001:db8::1 \"\n + \"c6a1Label=c6a1Label c6a2=2001:db8::2 c6a2Label=c6a2Label c6a3=2001:db8::3 c6a3Label=c6a3Label c6a4=2001:db8::4 \"\n + \"C6a4Label=c6a4Label cn1=123 cn1Label=cn1Label cn2=234 cn2Label=cn2Label cn3=345 cn3Label=cn3Label cs1=customString1 \"\n + \"cs1Label=cs1Label cs2=customString2 cs2Label=cs2Label cs3=customString3 cs3Label=cs3Label \"\n + \"cs4=customString4 cs4Label=cs4Label \"\n + \"cs5=customString5 cs5Label=cs5Label cs6=customString6 cs6Label=cs6Label deviceDirection=inbound deviceDnsDomain=example.com \"\n + \"cat=category deviceExternalId=extId deviceFacility=16 dvchost=host1 deviceInboundInterface=eth0 dvcmac=00:0a:95:9d:68:16 \"\n + \"deviceNtDomain=example.org deviceOutboundInterface=eth1 devicePayloadId=payloadId dvcpid=5678 deviceProcessName=procName \"\n + \"rt=1622547800000 dtz=UTC deviceTranslatedAddress=10.0.0.3 deviceTranslatedZoneExternalID=transExtId \"\n + \"deviceTranslatedZoneURI=transUri deviceZoneExternalID=zoneExtId deviceZoneURI=zoneUri end=1622547900000 eventId=evt123 \"\n + \"outcome=success externalId=extId fileCreateTime=1622547800000 fileHash=abcd1234 fileId=5678 \"\n + \"fileModificationTime=1622547900000 \"\n + \"fname=file.txt filePath=/path/to/file filePermission=rw-r--r-- fsize=1024 fileType=txt flexDate1=1622547800000 \"\n + \"flexDate1Label=flexDate1Label flexString1=flexString1 flexString2=flexString2 flexString1Label=flexString1Label \"\n + \"flexString2Label=flexString2Label msg=message oldFileCreateTime=1622547800000 oldFileHash=oldHash oldFileId=oldId \"\n + \"oldFileModificationTime=1622547900000 oldFileName=oldFile oldFilePath=/old/path \"\n + \"oldFilePermission=rw-r--r-- oldFileSize=2048 \"\n + \"oldFileType=oldType rawEvent=rawEvent reason=reason requestClientApplication=Mozilla requestContext=referrer \"\n + \"requestCookies=cookies requestMethod=GET request=url src=192.168.0.4 sourceDnsDomain=sourceDomain \"\n + \"slat=37.7749 slong=-122.4194 \"\n + \"shost=sourceHost smac=00:0a:95:9d:68:16 sntdom=sourceNtDomain spt=443 spid=1234 \"\n + \"sproc=sourceProc sourceServiceName=sourceService \"\n + \"sourceTranslatedAddress=10.0.0.4 sourceTranslatedPort=8081 sourceTranslatedZoneExternalID=sourceExtId \"\n + \"sourceTranslatedZoneURI=sourceUri suid=sourceUserId suser=sourceUser spriv=sourcePriv sourceZoneExternalID=sourceZoneExtId \"\n + \"sourceZoneURI=sourceZoneUri start=1622547800000 proto=TCP type=1 catdt=catDeviceType mrt=1622547800000\";\n Map source = new HashMap<>();\n source.put(\"message\", message);\n document = new IngestDocument(\"index\", \"id\", 1L, null, null, source);\n CefProcessor processor = new CefProcessor(\"tag\", \"description\", \"message\", \"cef\", false, true, null);\n processor.execute(document);\n assertMapsEqual(\n document.getSource(),\n Map.ofEntries(\n entry(\n \"cef\",\n Map.ofEntries(\n entry(\"version\", \"0\"),\n entry(\n \"device\",\n Map.of(\"vendor\", \"security\", \"product\", \"threatmanager\", \"version\", \"1.0\", \"event_class_id\", \"100\")\n ),\n entry(\"name\", \"trojan successfully stopped\"),\n entry(\"severity\", \"10\"),\n entry(\n \"extensions\",\n Map.ofEntries(\n entry(\"deviceNtDomain\", \"example.org\"),\n entry(\"agentZoneExternalID\", \"zoneExtId\"),\n entry(\"agentTimeZone\", \"UTC\"),\n entry(\"deviceCustomIPv6Address1Label\", \"c6a1Label\"),\n entry(\"deviceCustomString1\", \"customString1\"),\n entry(\"deviceCustomIPv6Address2Label\", \"c6a2Label\"),\n entry(\"deviceCustomNumber3\", 345L),\n entry(\"deviceCustomFloatingPoint1\", 1.23f),\n entry(\"deviceCustomNumber2\", 234L),\n entry(\"deviceCustomFloatingPoint2\", 2.34f),\n entry(\"deviceCustomFloatingPoint3\", 3.45f),\n entry(\"deviceCustomFloatingPoint4\", 4.56f),\n entry(\"flexDate1\", ZonedDateTime.parse(\"2021-06-01T11:43:20Z\")),\n entry(\"destinationTranslatedZoneExternalID\", \"destExtId\"),\n entry(\"deviceCustomNumber1\", 123L),\n entry(\"deviceEventCategory\", \"category\"),\n entry(\"deviceCustomString6Label\", \"cs6Label\"),\n entry(\"deviceCustomNumber2Label\", \"cn2Label\"),\n entry(\"flexString1Label\", \"flexString1Label\"),\n entry(\"deviceCustomString5Label\", \"cs5Label\"),\n entry(\"agentZoneURI\", \"zoneUri\"),\n entry(\"deviceCustomString2Label\", \"cs2Label\"),\n entry(\"deviceCustomDate2Label\", \"customDate2Label\"),\n entry(\"deviceCustomNumber1Label\", \"cn1Label\"),\n entry(\"oldFileType\", \"oldType\"),\n entry(\"destinationZoneExternalID\", \"destZoneExtId\"),\n entry(\"categoryDeviceType\", \"catDeviceType\"),\n entry(\"deviceZoneURI\", \"zoneUri\"),\n entry(\"sourceTranslatedZoneExternalID\", \"sourceExtId\"),\n entry(\"agentTranslatedAddress\", \"10.0.0.1\"),\n entry(\"requestCookies\", \"cookies\"),\n entry(\"deviceCustomIPv6Address3\", \"2001:db8::3\"),\n entry(\"oldFilePath\", \"/old/path\"),\n entry(\"deviceCustomIPv6Address2\", \"2001:db8::2\"),\n entry(\"deviceCustomIPv6Address1\", \"2001:db8::1\"),\n entry(\"oldFileId\", \"oldId\"),\n entry(\"deviceTranslatedZoneExternalID\", \"transExtId\"),\n entry(\"deviceCustomFloatingPoint2Label\", \"cfp2Label\"),\n entry(\"deviceTranslatedZoneURI\", \"transUri\"),\n entry(\"deviceCustomIPv6Address4Label\", \"c6a4Label\"),\n entry(\"agentTranslatedZoneURI\", \"uri\"),\n entry(\"oldFilePermission\", \"rw-r--r--\"),\n entry(\"deviceCustomIPv6Address4\", \"2001:db8::4\"),\n entry(\"sourceZoneURI\", \"sourceZoneUri\"),\n entry(\"deviceCustomFloatingPoint3Label\", \"cfp3Label\"),\n entry(\"agentTranslatedZoneExternalID\", \"ext123\"),\n entry(\"destinationZoneURI\", \"destZoneUri\"),\n entry(\"flexDate1Label\", \"flexDate1Label\"),\n entry(\"agentNtDomain\", \"example.org\"),\n entry(\"deviceCustomDate2\", ZonedDateTime.parse(\"2021-06-01T11:45Z\")),\n entry(\"deviceCustomDate1\", ZonedDateTime.parse(\"2021-06-01T11:43:20Z\")),\n entry(\"deviceCustomString3Label\", \"cs3Label\"),\n entry(\"deviceCustomDate1Label\", \"customDate1Label\"),\n entry(\"destinationTranslatedZoneURI\", \"destUri\"),\n entry(\"oldFileModificationTime\", ZonedDateTime.parse(\"2021-06-01T11:45Z\")),\n entry(\"deviceCustomFloatingPoint1Label\", \"cfp1Label\"),\n entry(\"deviceCustomIPv6Address3Label\", \"c6a3Label\"),\n entry(\"deviceCustomFloatingPoint4Label\", \"cfp4Label\"),\n entry(\"oldFileSize\", 2048),\n entry(\"externalId\", \"extId\"),\n entry(\"baseEventCount\", 1234),\n entry(\"flexString2\", \"flexString2\"),\n entry(\"deviceCustomNumber3Label\", \"cn3Label\"),\n entry(\"flexString1\", \"flexString1\"),\n entry(\"deviceCustomString4Label\", \"cs4Label\"),\n entry(\"flexString2Label\", \"flexString2Label\"),\n entry(\"deviceCustomString3\", \"customString3\"),\n entry(\"deviceCustomString2\", \"customString2\"),\n entry(\"deviceCustomString1Label\", \"cs1Label\"),\n entry(\"deviceCustomString5\", \"customString5\"),\n entry(\"deviceCustomString4\", \"customString4\"),\n entry(\"deviceZoneExternalID\", \"zoneExtId\"),\n entry(\"deviceCustomString6\", \"customString6\"),\n entry(\"oldFileName\", \"oldFile\"),\n entry(\"sourceZoneExternalID\", \"sourceZoneExtId\"),\n entry(\"oldFileHash\", \"oldHash\"),\n entry(\"sourceTranslatedZoneURI\", \"sourceUri\"),\n entry(\"oldFileCreateTime\", ZonedDateTime.parse(\"2021-06-01T11:43:20Z\"))\n )\n )\n )\n ),\n entry(\"host\", Map.of(\"nat\", Map.of(\"ip\", \"10.0.0.3\"))),\n entry(\"log\", Map.of(\"syslog\", Map.of(\"facility\", Map.of(\"code\", 16L)))),\n entry(\n \"observer\",\n Map.ofEntries(\n entry(\"ingress\", Map.of(\"interface\", Map.of(\"name\", \"eth0\"))),\n entry(\"registered_domain\", \"example.com\"),\n entry(\"product\", \"threatmanager\"),\n entry(\"hostname\", \"host1\"),\n entry(\"vendor\", \"security\"),\n entry(\"ip\", \"192.168.0.3\"),\n entry(\"name\", \"extId\"),\n entry(\"version\", \"1.0\"),\n entry(\"mac\", \"00:0a:95:9d:68:16\"),\n entry(\"egress\", Map.of(\"interface\", Map.of(\"name\", \"eth1\")))\n )\n ),\n entry(\n \"agent\",\n Map.ofEntries(\n entry(\"ip\", \"192.168.0.1\"),\n entry(\"name\", \"example.com\"),\n entry(\"id\", \"agentId\"),\n entry(\"type\", \"agentType\"),\n entry(\"version\", \"1.0\"),\n entry(\"mac\", \"00:0a:95:9d:68:16\")\n )\n ),\n entry(\"process\", Map.of(\"name\", \"procName\", \"pid\", 5678L)),\n entry(\n \"destination\",\n Map.ofEntries(\n entry(\"nat\", Map.of(\"port\", 8080, \"ip\", \"10.0.0.2\")),\n entry(\"geo\", Map.of(\"location\", Map.of(\"lon\", -122.4194, \"lat\", 37.7749))),\n entry(\"registered_domain\", \"destNtDomain\"),\n entry(\"process\", Map.of(\"name\", \"destProc\", \"pid\", 1234L)),\n entry(\"port\", 80),\n entry(\"bytes\", 91011L),\n entry(\"service\", Map.of(\"name\", \"destService\")),\n entry(\"domain\", \"destHost\"),\n entry(\"ip\", \"192.168.0.2\"),\n entry(\"user\", Map.of(\"name\", \"destUser\", \"id\", \"destUserId\", \"group\", Map.of(\"name\", \"admin\"))),\n entry(\"mac\", \"00:0a:95:9d:68:16\")\n )\n ),\n entry(\n \"source\",\n Map.ofEntries(\n entry(\"geo\", Map.of(\"location\", Map.of(\"lon\", -122.4194, \"lat\", 37.7749))),\n entry(\"nat\", Map.of(\"port\", 8081, \"ip\", \"10.0.0.4\")),\n entry(\"registered_domain\", \"sourceNtDomain\"),\n entry(\"process\", Map.of(\"name\", \"sourceProc\", \"pid\", 1234L)),\n entry(\"port\", 443),\n entry(\"service\", Map.of(\"name\", \"sourceService\")),\n entry(\"bytes\", 5678L),\n entry(\"ip\", \"192.168.0.4\"),\n entry(\"domain\", \"sourceDomain\"),\n entry(\"user\", Map.of(\"name\", \"sourceUser\", \"id\", \"sourceUserId\", \"group\", Map.of(\"name\", \"sourcePriv\"))),\n entry(\"mac\", \"00:0a:95:9d:68:16\")\n )\n ),\n entry(\"message\", \"message\"),\n entry(\"url\", Map.of(\"original\", \"url\")),\n entry(\"network\", Map.of(\"protocol\", \"HTTP\", \"transport\", \"TCP\", \"direction\", \"inbound\")),\n entry(\n \"file\",\n Map.ofEntries(\n entry(\"inode\", \"5678\"),\n entry(\"path\", \"/path/to/file\"),\n entry(\"size\", 1024L),\n entry(\"created\", ZonedDateTime.parse(\"2021-06-01T11:43:20Z\")),\n entry(\"name\", \"file.txt\"),\n entry(\"mtime\", ZonedDateTime.parse(\"2021-06-01T11:45Z\")),\n entry(\"type\", \"txt\"),\n entry(\"hash\", \"abcd1234\"),\n entry(\"group\", \"rw-r--r--\")\n )\n ),\n entry(\"@timestamp\", ZonedDateTime.parse(\"2021-06-01T11:43:20Z\")),\n entry(\"organization\", Map.of(\"name\", \"custUri\", \"id\", \"custExtId\")),\n entry(\n \"event\",\n Map.ofEntries(\n entry(\"action\", \"blocked\"),\n entry(\"timezone\", \"UTC\"),\n entry(\"end\", ZonedDateTime.parse(\"2021-06-01T11:45Z\")),\n entry(\"id\", \"evt123\"),\n entry(\"outcome\", \"success\"),\n entry(\"start\", ZonedDateTime.parse(\"2021-06-01T11:43:20Z\")),\n entry(\"reason\", \"reason\"),\n entry(\"ingested\", ZonedDateTime.parse(\"2021-06-01T11:43:20Z\")),\n entry(\"kind\", 1),\n entry(\"original\", \"rawEvent\"),\n entry(\"created\", ZonedDateTime.parse(\"2021-06-01T11:43:20Z\")),\n entry(\"code\", \"100\")\n )\n ),\n entry(\"user_agent\", Map.of(\"original\", \"Mozilla\")),\n entry(\"http\", Map.of(\"request\", Map.of(\"referrer\", \"referrer\", \"method\", \"GET\")))\n )\n );\n }\n\n // Date parsing tests\n public void testToTimestampWithUnixTimestamp() {\n CefParser parser = new CefParser(ZoneId.of(\"UTC\"), false);\n String unixTimestamp = \"1633072800000\"; // Example Unix timestamp in milliseconds\n ZonedDateTime expected = ZonedDateTime.ofInstant(Instant.ofEpochMilli(Long.parseLong(unixTimestamp)), ZoneId.of(\"UTC\"));\n ZonedDateTime result = parser.toTimestamp(unixTimestamp);\n assertEquals(expected, result);\n }\n\n public void testToTimestampWithFormattedDate() {\n CefParser parser = new CefParser(ZoneId.of(\"Europe/Stockholm\"), false);\n String formattedDate = \"Oct 01 2021 12:00:00 UTC\"; // Example formatted date\n ZonedDateTime expected = ZonedDateTime.parse(\"2021-10-01T14:00+02:00[Europe/Stockholm]\");\n ZonedDateTime result = parser.toTimestamp(formattedDate);\n assertEquals(expected, result);\n }\n\n public void testToTimestampWithFormattedDateWithoutYear() {\n CefParser parser = new CefParser(ZoneId.of(\"UTC\"), false);\n String formattedDate = \"Oct 01 12:00:00 UTC\"; // Example formatted date without year\n int currentYear = ZonedDateTime.now(ZoneId.of(\"UTC\")).getYear();\n ZonedDateTime expected = ZonedDateTime.parse(currentYear + \"-10-01T12:00:00Z[UTC]\");\n ZonedDateTime result = parser.toTimestamp(formattedDate);\n assertEquals(expected, result);\n }\n\n public void testToTimestampWithFormattedDateWithoutTimezone() {\n CefParser parser = new CefParser(ZoneId.of(\"UTC\"), false);\n String formattedDate = \"Sep 07 2018 14:50:39\"; // Example formatted date without year\n ZonedDateTime expected = ZonedDateTime.parse(\"2018-09-07T14:50:39Z[UTC]\");\n ZonedDateTime result = parser.toTimestamp(formattedDate);\n assertEquals(expected, result);\n }\n\n public void testToTimestampWithInvalidDate() {\n CefParser parser = new CefParser(ZoneId.of(\"UTC\"), false);\n String invalidDate = \"invalid date\";\n assertThrows(IllegalArgumentException.class, () -> parser.toTimestamp(invalidDate));\n }\n\n public void testToMacAddressWithSeparators() {\n CefParser parser = new CefParser(ZoneId.of(\"UTC\"), false);\n List macAddresses = List.of(\n // EUI-48 (with separators).\n \"00:0D:60:AF:1B:61\",\n \"00-0D-60-AF-1B-61\",\n \"000D.60AF.1B61\",\n\n // EUI-64 (with separators).\n \"00:0D:60:FF:FE:AF:1B:61\",\n \"00-0D-60-FF-FE-AF-1B-61\",\n \"000D.60FF.FEAF.1B61\"\n );\n macAddresses.forEach(macAddress -> {\n String result = parser.toMACAddress(macAddress);\n assertEquals(macAddress, result);\n });\n }\n\n public void testEUI48ToMacAddressWithOutSeparators() {\n CefParser parser = new CefParser(ZoneId.of(\"UTC\"), false);\n String macAddress = \"000D60AF1B61\";\n String result = parser.toMACAddress(macAddress);\n assertEquals(\"00:0D:60:AF:1B:61\", result);\n }\n\n public void testEUI64ToMacAddressWithOutSeparators() {\n CefParser parser = new CefParser(ZoneId.of(\"UTC\"), false);\n String macAddress = \"000D60FFFEAF1B61\";\n String result = parser.toMACAddress(macAddress);\n assertEquals(\"00:0D:60:FF:FE:AF:1B:61\", result);\n }\n\n public void toIP_validIPv4Address() {\n CefParser parser = new CefParser(ZoneId.of(\"UTC\"), true);\n String result = parser.toIP(\"192.168.1.1\");\n assertEquals(\"192.168.1.1\", result);\n }\n\n public void toIP_validIPv6Address() {\n CefParser parser = new CefParser(ZoneId.of(\"UTC\"), true);\n String result = parser.toIP(\"2001:0db8:85a3:0000:0000:8a2e:0370:7334\");\n assertEquals(\"2001:db8:85a3::8a2e:370:7334\", result);\n }\n\n public void toIP_invalidIPAddress() {\n CefParser parser = new CefParser(ZoneId.of(\"UTC\"), true);\n IllegalArgumentException exception = assertThrows(IllegalArgumentException.class, () -> parser.toIP(\"invalid_ip\"));\n assertEquals(\"Invalid IP address format\", exception.getMessage());\n }\n\n public void toIP_emptyString() {\n CefParser parser = new CefParser(ZoneId.of(\"UTC\"), true);\n IllegalArgumentException exception = assertThrows(IllegalArgumentException.class, () -> parser.toIP(\"\"));\n assertEquals(\"Invalid IP address format\", exception.getMessage());\n }\n\n public void toIP_nullString() {", + "repo_full_name": "elastic/elasticsearch", + "discussion_comments": [ + { + "comment_id": "2027497237", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 122491, + "pr_file": "modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/CefProcessorTests.java", + "discussion_id": "2027497237", + "commented_code": "@@ -0,0 +1,1104 @@\n+/*\n+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one\n+ * or more contributor license agreements. Licensed under the \"Elastic License\n+ * 2.0\", the \"GNU Affero General Public License v3.0 only\", and the \"Server Side\n+ * Public License v 1\"; you may not use this file except in compliance with, at\n+ * your election, the \"Elastic License 2.0\", the \"GNU Affero General Public\n+ * License v3.0 only\", or the \"Server Side Public License, v 1\".\n+ */\n+\n+package org.elasticsearch.ingest.common;\n+\n+import org.elasticsearch.ingest.IngestDocument;\n+import org.elasticsearch.test.ESTestCase;\n+import org.junit.Before;\n+\n+import java.time.Instant;\n+import java.time.ZoneId;\n+import java.time.ZonedDateTime;\n+import java.util.HashMap;\n+import java.util.List;\n+import java.util.Map;\n+import java.util.Set;\n+\n+import static java.util.Map.entry;\n+import static org.hamcrest.Matchers.containsInAnyOrder;\n+import static org.hamcrest.Matchers.equalTo;\n+\n+public class CefProcessorTests extends ESTestCase {\n+\n+ private IngestDocument document;\n+\n+ @Before\n+ public void setUp() throws Exception {\n+ super.setUp();\n+ }\n+\n+ public void testExecute() {\n+ Map source = new HashMap<>();\n+ String message = \"CEF:0|Elastic|Vaporware|1.0.0-alpha|18|Web request|low|eventId=3457 requestMethod=POST \"\n+ + \"slat=38.915 slong=-77.511 proto=TCP sourceServiceName=httpd requestContext=https://www.google.com \"\n+ + \"src=89.160.20.156 spt=33876 dst=192.168.10.1 dpt=443 request=https://www.example.com/cart\";\n+ source.put(\"message\", message);\n+ document = new IngestDocument(\"index\", \"id\", 1L, null, null, source);\n+ CefProcessor processor = new CefProcessor(\"tag\", \"description\", \"message\", \"cef\", false, true, null);\n+ processor.execute(document);\n+ assertMapsEqual(\n+ document.getSource(),\n+ Map.ofEntries(\n+ entry(\n+ \"cef\",\n+ Map.ofEntries(\n+ entry(\"version\", \"0\"),\n+ entry(\n+ \"device\",\n+ Map.of(\"vendor\", \"Elastic\", \"product\", \"Vaporware\", \"version\", \"1.0.0-alpha\", \"event_class_id\", \"18\")\n+ ),\n+ entry(\"name\", \"Web request\"),\n+ entry(\"severity\", \"low\")\n+ )\n+ ),\n+ entry(\"observer\", Map.of(\"product\", \"Vaporware\", \"vendor\", \"Elastic\", \"version\", \"1.0.0-alpha\")),\n+ entry(\"event\", Map.of(\"id\", \"3457\", \"code\", \"18\")),\n+ entry(\n+ \"source\",\n+ Map.ofEntries(\n+ entry(\"ip\", \"89.160.20.156\"),\n+ entry(\"port\", 33876),\n+ entry(\"geo\", Map.of(\"location\", Map.of(\"lon\", -77.511, \"lat\", 38.915))),\n+ entry(\"service\", Map.of(\"name\", \"httpd\"))\n+ )\n+ ),\n+ entry(\"destination\", Map.of(\"ip\", \"192.168.10.1\", \"port\", 443)),\n+ entry(\"http\", Map.of(\"request\", Map.of(\"method\", \"POST\", \"referrer\", \"https://www.google.com\"))),\n+ entry(\"network\", Map.of(\"transport\", \"TCP\")),\n+ entry(\"url\", Map.of(\"original\", \"https://www.example.com/cart\")),\n+ entry(\"message\", message)\n+ )\n+ );\n+ }\n+\n+ public void testInvalidCefFormat() {\n+ Map invalidSource = new HashMap<>();\n+ invalidSource.put(\"message\", \"Invalid CEF message\");\n+ IngestDocument invalidIngestDocument = new IngestDocument(\"index\", \"id\", 1L, null, null, invalidSource);\n+ CefProcessor processor = new CefProcessor(\"tag\", \"description\", \"message\", \"cef\", false, true, null);\n+ expectThrows(IllegalArgumentException.class, () -> processor.execute(invalidIngestDocument));\n+ }\n+\n+ public void testStandardMessage() {\n+ String message = \"CEF:26|security|threatmanager|1.0|100|trojan successfully stopped|10|\"\n+ + \"src=10.0.0.192 dst=12.121.122.82 spt=1232 eventId=1 in=4294967296 out=4294967296\";\n+ Map source = new HashMap<>();\n+ source.put(\"message\", message);\n+ document = new IngestDocument(\"index\", \"id\", 1L, null, null, source);\n+ CefProcessor processor = new CefProcessor(\"tag\", \"description\", \"message\", \"cef\", false, true, null);\n+ processor.execute(document);\n+ assertMapsEqual(\n+ document.getSource(),\n+ Map.ofEntries(\n+ entry(\n+ \"cef\",\n+ Map.ofEntries(\n+ entry(\"version\", \"26\"),\n+ entry(\n+ \"device\",\n+ Map.of(\"vendor\", \"security\", \"product\", \"threatmanager\", \"version\", \"1.0\", \"event_class_id\", \"100\")\n+ ),\n+ entry(\"name\", \"trojan successfully stopped\"),\n+ entry(\"severity\", \"10\")\n+ )\n+ ),\n+ entry(\"observer\", Map.of(\"product\", \"threatmanager\", \"vendor\", \"security\", \"version\", \"1.0\")),\n+ entry(\"source\", Map.of(\"ip\", \"10.0.0.192\", \"port\", 1232, \"bytes\", 4294967296L)),\n+ entry(\"destination\", Map.of(\"ip\", \"12.121.122.82\", \"bytes\", 4294967296L)),\n+ entry(\"event\", Map.of(\"id\", \"1\", \"code\", \"100\")),\n+ entry(\"message\", message)\n+ )\n+ );\n+ }\n+\n+ public void testHeaderOnly() {\n+ String message = \"CEF:26|security|threatmanager|1.0|100|trojan successfully stopped|10|\";\n+ Map source = new HashMap<>();\n+ source.put(\"message\", message);\n+ document = new IngestDocument(\"index\", \"id\", 1L, null, null, source);\n+ CefProcessor processor = new CefProcessor(\"tag\", \"description\", \"message\", \"cef\", false, true, null);\n+ processor.execute(document);\n+ assertMapsEqual(\n+ document.getSource(),\n+ Map.ofEntries(\n+ entry(\n+ \"cef\",\n+ Map.ofEntries(\n+ entry(\"version\", \"26\"),\n+ entry(\n+ \"device\",\n+ Map.of(\"vendor\", \"security\", \"product\", \"threatmanager\", \"version\", \"1.0\", \"event_class_id\", \"100\")\n+ ),\n+ entry(\"name\", \"trojan successfully stopped\"),\n+ entry(\"severity\", \"10\")\n+ )\n+ ),\n+ entry(\"event\", Map.of(\"code\", \"100\")),\n+ entry(\"observer\", Map.of(\"product\", \"threatmanager\", \"vendor\", \"security\", \"version\", \"1.0\")),\n+ entry(\"message\", message)\n+ )\n+ );\n+ }\n+\n+ public void testEmptyDeviceFields() {\n+ String message = \"CEF:0|||1.0|100|trojan successfully stopped|10|src=10.0.0.192 dst=12.121.122.82 spt=1232\";\n+ Map source = new HashMap<>();\n+ source.put(\"message\", message);\n+ document = new IngestDocument(\"index\", \"id\", 1L, null, null, source);\n+ CefProcessor processor = new CefProcessor(\"tag\", \"description\", \"message\", \"cef\", false, true, null);\n+ processor.execute(document);\n+ assertMapsEqual(\n+ document.getSource(),\n+ Map.ofEntries(\n+ entry(\n+ \"cef\",\n+ Map.ofEntries(\n+ entry(\"version\", \"0\"),\n+ entry(\"device\", Map.of(\"vendor\", \"\", \"product\", \"\", \"version\", \"1.0\", \"event_class_id\", \"100\")),\n+ entry(\"name\", \"trojan successfully stopped\"),\n+ entry(\"severity\", \"10\")\n+ )\n+ ),\n+ entry(\"event\", Map.of(\"code\", \"100\")),\n+ entry(\"observer\", Map.of(\"product\", \"\", \"vendor\", \"\", \"version\", \"1.0\")),\n+ entry(\"source\", Map.of(\"ip\", \"10.0.0.192\", \"port\", 1232)),\n+ entry(\"destination\", Map.of(\"ip\", \"12.121.122.82\")),\n+ entry(\"message\", message)\n+ )\n+ );\n+ }\n+\n+ public void testEscapedPipeInHeader() {\n+ String message = \"CEF:26|security|threat\\\\|->manager|1.0|100|\"\n+ + \"trojan successfully stopped|10|src=10.0.0.192 dst=12.121.122.82 spt=1232\";\n+ Map source = new HashMap<>();\n+ source.put(\"message\", message);\n+ document = new IngestDocument(\"index\", \"id\", 1L, null, null, source);\n+ CefProcessor processor = new CefProcessor(\"tag\", \"description\", \"message\", \"cef\", false, true, null);\n+ processor.execute(document);\n+ assertMapsEqual(\n+ document.getSource(),\n+ Map.ofEntries(\n+ entry(\n+ \"cef\",\n+ Map.ofEntries(\n+ entry(\"version\", \"26\"),\n+ entry(\n+ \"device\",\n+ Map.of(\"vendor\", \"security\", \"product\", \"threat|->manager\", \"version\", \"1.0\", \"event_class_id\", \"100\")\n+ ),\n+ entry(\"name\", \"trojan successfully stopped\"),\n+ entry(\"severity\", \"10\")\n+ )\n+ ),\n+ entry(\"event\", Map.of(\"code\", \"100\")),\n+ entry(\"observer\", Map.of(\"product\", \"threat|->manager\", \"vendor\", \"security\", \"version\", \"1.0\")),\n+ entry(\"source\", Map.of(\"ip\", \"10.0.0.192\", \"port\", 1232)),\n+ entry(\"destination\", Map.of(\"ip\", \"12.121.122.82\")),\n+ entry(\"message\", message)\n+ )\n+ );\n+ }\n+\n+ public void testEqualsSignInHeader() {\n+ String message = \"CEF:26|security|threat=manager|1.0|100|trojan successfully stopped|10|src=10.0.0.192 dst=12.121.122.82 spt=1232\";\n+ Map source = new HashMap<>();\n+ source.put(\"message\", message);\n+ document = new IngestDocument(\"index\", \"id\", 1L, null, null, source);\n+ CefProcessor processor = new CefProcessor(\"tag\", \"description\", \"message\", \"cef\", false, true, null);\n+ processor.execute(document);\n+ assertMapsEqual(\n+ document.getSource(),\n+ Map.ofEntries(\n+ entry(\n+ \"cef\",\n+ Map.ofEntries(\n+ entry(\"version\", \"26\"),\n+ entry(\n+ \"device\",\n+ Map.of(\"vendor\", \"security\", \"product\", \"threat=manager\", \"version\", \"1.0\", \"event_class_id\", \"100\")\n+ ),\n+ entry(\"name\", \"trojan successfully stopped\"),\n+ entry(\"severity\", \"10\")\n+ )\n+ ),\n+ entry(\"event\", Map.of(\"code\", \"100\")),\n+ entry(\"observer\", Map.of(\"product\", \"threat=manager\", \"vendor\", \"security\", \"version\", \"1.0\")),\n+ entry(\"source\", Map.of(\"ip\", \"10.0.0.192\", \"port\", 1232)),\n+ entry(\"destination\", Map.of(\"ip\", \"12.121.122.82\")),\n+ entry(\"message\", message)\n+ )\n+ );\n+ }\n+\n+ public void testEmptyExtensionValue() {\n+ String message = \"CEF:26|security|threatmanager|1.0|100|trojan successfully stopped|10|src=10.0.0.192 dst= spt=1232\";\n+ Map source = new HashMap<>();\n+ source.put(\"message\", message);\n+ document = new IngestDocument(\"index\", \"id\", 1L, null, null, source);\n+ CefProcessor processor = new CefProcessor(\"tag\", \"description\", \"message\", \"cef\", false, true, null);\n+ processor.execute(document);\n+ assertMapsEqual(\n+ document.getSource(),\n+ Map.ofEntries(\n+ entry(\n+ \"cef\",\n+ Map.ofEntries(\n+ entry(\"version\", \"26\"),\n+ entry(\n+ \"device\",\n+ Map.of(\"vendor\", \"security\", \"product\", \"threatmanager\", \"version\", \"1.0\", \"event_class_id\", \"100\")\n+ ),\n+ entry(\"name\", \"trojan successfully stopped\"),\n+ entry(\"severity\", \"10\")\n+ )\n+ ),\n+ entry(\"event\", Map.of(\"code\", \"100\")),\n+ entry(\"observer\", Map.of(\"product\", \"threatmanager\", \"vendor\", \"security\", \"version\", \"1.0\")),\n+ entry(\"source\", Map.of(\"ip\", \"10.0.0.192\", \"port\", 1232)),\n+ entry(\"message\", message)\n+ )\n+ );\n+ }\n+\n+ public void testLeadingWhitespace() {\n+ String message = \"CEF:0|security|threatmanager|1.0|100|trojan successfully stopped|10| src=10.0.0.192 dst=12.121.122.82 spt=1232\";\n+ Map source = new HashMap<>();\n+ source.put(\"message\", message);\n+ document = new IngestDocument(\"index\", \"id\", 1L, null, null, source);\n+ CefProcessor processor = new CefProcessor(\"tag\", \"description\", \"message\", \"cef\", false, true, null);\n+ processor.execute(document);\n+ assertMapsEqual(\n+ document.getSource(),\n+ Map.ofEntries(\n+ entry(\n+ \"cef\",\n+ Map.ofEntries(\n+ entry(\"version\", \"0\"),\n+ entry(\n+ \"device\",\n+ Map.of(\"vendor\", \"security\", \"product\", \"threatmanager\", \"version\", \"1.0\", \"event_class_id\", \"100\")\n+ ),\n+ entry(\"name\", \"trojan successfully stopped\"),\n+ entry(\"severity\", \"10\")\n+ )\n+ ),\n+ entry(\"event\", Map.of(\"code\", \"100\")),\n+ entry(\"observer\", Map.of(\"product\", \"threatmanager\", \"vendor\", \"security\", \"version\", \"1.0\")),\n+ entry(\"source\", Map.of(\"ip\", \"10.0.0.192\", \"port\", 1232)),\n+ entry(\"destination\", Map.of(\"ip\", \"12.121.122.82\")),\n+ entry(\"message\", message)\n+ )\n+ );\n+ }\n+\n+ public void testEscapedPipeInExtension() {\n+ String message = \"CEF:0|security|threatmanager|1.0|100|trojan successfully stopped|10|moo=this\\\\|has an escaped pipe\";\n+ Map source = new HashMap<>();\n+ source.put(\"message\", message);\n+ document = new IngestDocument(\"index\", \"id\", 1L, null, null, source);\n+ CefProcessor processor = new CefProcessor(\"tag\", \"description\", \"message\", \"cef\", false, true, null);\n+ expectThrows(IllegalArgumentException.class, () -> processor.execute(document));\n+ }\n+\n+ public void testPipeInMessage() {\n+ String message = \"CEF:0|security|threatmanager|1.0|100|trojan successfully stopped|10|moo=this|has a pipe\";\n+ Map source = new HashMap<>();\n+ source.put(\"message\", message);\n+ document = new IngestDocument(\"index\", \"id\", 1L, null, null, source);\n+ CefProcessor processor = new CefProcessor(\"tag\", \"description\", \"message\", \"cef\", false, true, null);\n+ processor.execute(document);\n+ assertMapsEqual(\n+ document.getSource(),\n+ Map.ofEntries(\n+ entry(\n+ \"cef\",\n+ Map.ofEntries(\n+ entry(\"version\", \"0\"),\n+ entry(\n+ \"device\",\n+ Map.of(\"vendor\", \"security\", \"product\", \"threatmanager\", \"version\", \"1.0\", \"event_class_id\", \"100\")\n+ ),\n+ entry(\"name\", \"trojan successfully stopped\"),\n+ entry(\"severity\", \"10\"),\n+ entry(\"extensions\", Map.of(\"moo\", \"this|has a pipe\"))\n+ )\n+ ),\n+ entry(\"event\", Map.of(\"code\", \"100\")),\n+ entry(\"observer\", Map.of(\"product\", \"threatmanager\", \"vendor\", \"security\", \"version\", \"1.0\")),\n+ entry(\"message\", message)\n+ )\n+ );\n+ }\n+\n+ public void testEqualsInMessage() {\n+ String message =\n+ \"CEF:0|security|threatmanager|1.0|100|trojan successfully stopped|10|moo=this =has = equals\\\\= dst=12.121.122.82 spt=1232\";\n+ Map source = new HashMap<>();\n+ source.put(\"message\", message);\n+ document = new IngestDocument(\"index\", \"id\", 1L, null, null, source);\n+ CefProcessor processor = new CefProcessor(\"tag\", \"description\", \"message\", \"cef\", false, true, null);\n+ processor.execute(document);\n+ assertMapsEqual(\n+ document.getSource(),\n+ Map.ofEntries(\n+ entry(\n+ \"cef\",\n+ Map.ofEntries(\n+ entry(\"version\", \"0\"),\n+ entry(\n+ \"device\",\n+ Map.of(\"vendor\", \"security\", \"product\", \"threatmanager\", \"version\", \"1.0\", \"event_class_id\", \"100\")\n+ ),\n+ entry(\"name\", \"trojan successfully stopped\"),\n+ entry(\"severity\", \"10\"),\n+ entry(\"extensions\", Map.of(\"moo\", \"this =has = equals=\"))\n+ )\n+ ),\n+ entry(\"event\", Map.of(\"code\", \"100\")),\n+ entry(\"observer\", Map.of(\"product\", \"threatmanager\", \"vendor\", \"security\", \"version\", \"1.0\")),\n+ entry(\"destination\", Map.of(\"ip\", \"12.121.122.82\")),\n+ entry(\"source\", Map.of(\"port\", 1232)),\n+ entry(\"message\", message)\n+ )\n+ );\n+ }\n+\n+ public void testEscapesInExtension() {\n+ String message = \"CEF:0|security|threatmanager|1.0|100|trojan successfully stopped|10|msg=a+b\\\\=c x=c\\\\\\\\d\\\\=z\";\n+ Map source = new HashMap<>();\n+ source.put(\"message\", message);\n+ document = new IngestDocument(\"index\", \"id\", 1L, null, null, source);\n+ CefProcessor processor = new CefProcessor(\"tag\", \"description\", \"message\", \"cef\", false, true, null);\n+ processor.execute(document);\n+ assertMapsEqual(\n+ document.getSource(),\n+ Map.ofEntries(\n+ entry(\n+ \"cef\",\n+ Map.ofEntries(\n+ entry(\"version\", \"0\"),\n+ entry(\n+ \"device\",\n+ Map.of(\"vendor\", \"security\", \"product\", \"threatmanager\", \"version\", \"1.0\", \"event_class_id\", \"100\")\n+ ),\n+ entry(\"name\", \"trojan successfully stopped\"),\n+ entry(\"severity\", \"10\"),\n+ entry(\"extensions\", Map.of(\"x\", \"c\\\\d=z\"))\n+ )\n+ ),\n+ entry(\"event\", Map.of(\"code\", \"100\")),\n+ entry(\"observer\", Map.of(\"product\", \"threatmanager\", \"vendor\", \"security\", \"version\", \"1.0\")),\n+ entry(\"message\", \"a+b=c\")\n+ )\n+ );\n+ }\n+\n+ public void testMalformedExtensionEscape() {\n+ String message = \"CEF:0|FooBar|Web Gateway|1.2.3.45.67|200|Success|2|rt=Sep 07 2018 14:50:39 cat=Access Log dst=1.1.1.1 \"\n+ + \"dhost=foo.example.com suser=redacted src=2.2.2.2 requestMethod=POST request='https://foo.example.com/bar/bingo/1' \"\n+ + \"requestClientApplication='Foo-Bar/2018.1.7; =Email:user@example.com; Guid:test=' cs1= cs1Label=Foo Bar\";\n+ Map source = new HashMap<>();\n+ source.put(\"message\", message);\n+ document = new IngestDocument(\"index\", \"id\", 1L, null, null, source);\n+ CefProcessor processor = new CefProcessor(\"tag\", \"description\", \"message\", \"cef\", false, true, null);\n+ processor.execute(document);\n+ assertMapsEqual(\n+ document.getSource(),\n+ Map.ofEntries(\n+ entry(\n+ \"cef\",\n+ Map.ofEntries(\n+ entry(\"version\", \"0\"),\n+ entry(\n+ \"device\",\n+ Map.of(\"vendor\", \"FooBar\", \"product\", \"Web Gateway\", \"version\", \"1.2.3.45.67\", \"event_class_id\", \"200\")\n+ ),\n+ entry(\"name\", \"Success\"),\n+ entry(\"severity\", \"2\"),\n+ entry(\"extensions\", Map.of(\"deviceCustomString1Label\", \"Foo Bar\", \"deviceEventCategory\", \"Access Log\"))\n+ )\n+ ),\n+ entry(\"event\", Map.of(\"code\", \"200\")),\n+ entry(\"observer\", Map.of(\"product\", \"Web Gateway\", \"vendor\", \"FooBar\", \"version\", \"1.2.3.45.67\")),\n+ entry(\"@timestamp\", ZonedDateTime.parse(\"2018-09-07T14:50:39Z\")),\n+ entry(\"destination\", Map.of(\"ip\", \"1.1.1.1\", \"domain\", \"foo.example.com\")),\n+ entry(\"source\", Map.of(\"ip\", \"2.2.2.2\", \"user\", Map.of(\"name\", \"redacted\"))),\n+ entry(\"http\", Map.of(\"request\", Map.of(\"method\", \"POST\"))),\n+ entry(\"url\", Map.of(\"original\", \"'https://foo.example.com/bar/bingo/1'\")),\n+ entry(\"user_agent\", Map.of(\"original\", \"'Foo-Bar/2018.1.7; =Email:user@example.com; Guid:test='\")),\n+ entry(\"message\", message)\n+ )\n+ );\n+ }\n+\n+ public void testMultipleMalformedExtensionValues() {\n+ String message = \"CEF:0|vendor|product|version|event_id|name|Very-High| \"\n+ + \"msg=Hello World error=Failed because id==old_id user=root angle=106.7<=180\";\n+ Map source = new HashMap<>();\n+ source.put(\"message\", message);\n+ document = new IngestDocument(\"index\", \"id\", 1L, null, null, source);\n+ CefProcessor processor = new CefProcessor(\"tag\", \"description\", \"message\", \"cef\", false, true, null);\n+ processor.execute(document);\n+ assertMapsEqual(\n+ document.getSource(),\n+ Map.ofEntries(\n+ entry(\n+ \"cef\",\n+ Map.ofEntries(\n+ entry(\"version\", \"0\"),\n+ entry(\n+ \"device\",\n+ Map.of(\"vendor\", \"vendor\", \"product\", \"product\", \"version\", \"version\", \"event_class_id\", \"event_id\")\n+ ),\n+ entry(\"name\", \"name\"),\n+ entry(\"severity\", \"Very-High\"),\n+ entry(\"extensions\", Map.of(\"id\", \"=old_id\", \"user\", \"root\", \"angle\", \"106.7<=180\", \"error\", \"Failed because\"))\n+ )\n+ ),\n+ entry(\"event\", Map.of(\"code\", \"event_id\")),\n+ entry(\"observer\", Map.of(\"product\", \"product\", \"vendor\", \"vendor\", \"version\", \"version\")),\n+ entry(\"message\", \"Hello World\")\n+ )\n+ );\n+ }\n+\n+ public void testPaddedMessage() {\n+ String message = \"CEF:0|security|threatmanager|1.0|100|message is padded|10|spt=1232 \"\n+ + \"msg=Trailing space in non-final extensions is preserved src=10.0.0.192 \";\n+ Map source = new HashMap<>();\n+ source.put(\"message\", message);\n+ document = new IngestDocument(\"index\", \"id\", 1L, null, null, source);\n+ CefProcessor processor = new CefProcessor(\"tag\", \"description\", \"message\", \"cef\", false, true, null);\n+ processor.execute(document);\n+ assertMapsEqual(\n+ document.getSource(),\n+ Map.ofEntries(\n+ entry(\n+ \"cef\",\n+ Map.ofEntries(\n+ entry(\"version\", \"0\"),\n+ entry(\n+ \"device\",\n+ Map.of(\"vendor\", \"security\", \"product\", \"threatmanager\", \"version\", \"1.0\", \"event_class_id\", \"100\")\n+ ),\n+ entry(\"name\", \"message is padded\"),\n+ entry(\"severity\", \"10\")\n+ )\n+ ),\n+ entry(\"event\", Map.of(\"code\", \"100\")),\n+ entry(\"observer\", Map.of(\"product\", \"threatmanager\", \"vendor\", \"security\", \"version\", \"1.0\")),\n+ entry(\"source\", Map.of(\"ip\", \"10.0.0.192\", \"port\", 1232)),\n+ entry(\"message\", \"Trailing space in non-final extensions is preserved\")\n+ )\n+ );\n+ }\n+\n+ public void testCrlfMessage() {\n+ String message = \"CEF:0|security|threatmanager|1.0|100|message is padded|10|\"\n+ + \"spt=1232 msg=Trailing space in final extensions is not preserved\\t \\r\\ndpt=1234\";\n+ Map source = new HashMap<>();\n+ source.put(\"message\", message);\n+ document = new IngestDocument(\"index\", \"id\", 1L, null, null, source);\n+ CefProcessor processor = new CefProcessor(\"tag\", \"description\", \"message\", \"cef\", false, true, null);\n+ processor.execute(document);\n+ assertMapsEqual(\n+ document.getSource(),\n+ Map.ofEntries(\n+ entry(\n+ \"cef\",\n+ Map.ofEntries(\n+ entry(\"version\", \"0\"),\n+ entry(\n+ \"device\",\n+ Map.of(\"vendor\", \"security\", \"product\", \"threatmanager\", \"version\", \"1.0\", \"event_class_id\", \"100\")\n+ ),\n+ entry(\"name\", \"message is padded\"),\n+ entry(\"severity\", \"10\")\n+ )\n+ ),\n+ entry(\"event\", Map.of(\"code\", \"100\")),\n+ entry(\"observer\", Map.of(\"product\", \"threatmanager\", \"vendor\", \"security\", \"version\", \"1.0\")),\n+ entry(\"source\", Map.of(\"port\", 1232)),\n+ entry(\"message\", \"Trailing space in final extensions is not preserved\"),\n+ entry(\"destination\", Map.of(\"port\", 1234))\n+ )\n+ );\n+ }\n+\n+ public void testTabMessage() {\n+ String message = \"CEF:0|security|threatmanager|1.0|100|message is padded|10|\"\n+ + \"spt=1232 msg=Tabs\\tand\\rcontrol\\ncharacters are preserved\\t src=127.0.0.1\";\n+ Map source = new HashMap<>();\n+ source.put(\"message\", message);\n+ document = new IngestDocument(\"index\", \"id\", 1L, null, null, source);\n+ CefProcessor processor = new CefProcessor(\"tag\", \"description\", \"message\", \"cef\", false, true, null);\n+ processor.execute(document);\n+ assertMapsEqual(\n+ document.getSource(),\n+ Map.ofEntries(\n+ entry(\n+ \"cef\",\n+ Map.ofEntries(\n+ entry(\"version\", \"0\"),\n+ entry(\n+ \"device\",\n+ Map.of(\"vendor\", \"security\", \"product\", \"threatmanager\", \"version\", \"1.0\", \"event_class_id\", \"100\")\n+ ),\n+ entry(\"name\", \"message is padded\"),\n+ entry(\"severity\", \"10\")\n+ )\n+ ),\n+ entry(\"event\", Map.of(\"code\", \"100\")),\n+ entry(\"observer\", Map.of(\"product\", \"threatmanager\", \"vendor\", \"security\", \"version\", \"1.0\")),\n+ entry(\"source\", Map.of(\"port\", 1232, \"ip\", \"127.0.0.1\")),\n+ entry(\"message\", \"Tabs\\tand\\rcontrol\\ncharacters are preserved\")\n+ )\n+ );\n+ }\n+\n+ public void testTabNoSepMessage() {\n+ String message = \"CEF:0|security|threatmanager|1.0|100|message has tabs|10|spt=1232 msg=Tab is not a separator\\tsrc=127.0.0.1\";\n+ Map source = new HashMap<>();\n+ source.put(\"message\", message);\n+ document = new IngestDocument(\"index\", \"id\", 1L, null, null, source);\n+ CefProcessor processor = new CefProcessor(\"tag\", \"description\", \"message\", \"cef\", false, true, null);\n+ processor.execute(document);\n+ assertMapsEqual(\n+ document.getSource(),\n+ Map.ofEntries(\n+ entry(\n+ \"cef\",\n+ Map.ofEntries(\n+ entry(\"version\", \"0\"),\n+ entry(\n+ \"device\",\n+ Map.of(\"vendor\", \"security\", \"product\", \"threatmanager\", \"version\", \"1.0\", \"event_class_id\", \"100\")\n+ ),\n+ entry(\"name\", \"message has tabs\"),\n+ entry(\"severity\", \"10\")\n+ )\n+ ),\n+ entry(\"event\", Map.of(\"code\", \"100\")),\n+ entry(\"observer\", Map.of(\"product\", \"threatmanager\", \"vendor\", \"security\", \"version\", \"1.0\")),\n+ entry(\"source\", Map.of(\"port\", 1232, \"ip\", \"127.0.0.1\")),\n+ entry(\"message\", \"Tab is not a separator\")\n+ )\n+ );\n+ }\n+\n+ public void testEscapedMessage() {\n+ String message = \"CEF:0|security\\\\compliance|threat\\\\|->manager|1.0|100|message contains escapes|10|\"\n+ + \"spt=1232 msg=Newlines in messages\\\\\\nare allowed.\\\\\\r\\\\\\nAnd so are carriage feeds\\\\\\\\newlines\\\\\\\\\\\\=. dpt=4432\";\n+ Map source = new HashMap<>();\n+ source.put(\"message\", message);\n+ document = new IngestDocument(\"index\", \"id\", 1L, null, null, source);\n+ CefProcessor processor = new CefProcessor(\"tag\", \"description\", \"message\", \"cef\", false, true, null);\n+ processor.execute(document);\n+ assertMapsEqual(\n+ document.getSource(),\n+ Map.ofEntries(\n+ entry(\n+ \"cef\",\n+ Map.ofEntries(\n+ entry(\"version\", \"0\"),\n+ entry(\n+ \"device\",\n+ Map.ofEntries(\n+ entry(\"vendor\", \"security\\\\compliance\"),\n+ entry(\"product\", \"threat|->manager\"),\n+ entry(\"version\", \"1.0\"),\n+ entry(\"event_class_id\", \"100\")\n+ )\n+ ),\n+ entry(\"name\", \"message contains escapes\"),\n+ entry(\"severity\", \"10\")\n+ )\n+ ),\n+ entry(\"event\", Map.of(\"code\", \"100\")),\n+ entry(\"observer\", Map.of(\"product\", \"threat|->manager\", \"vendor\", \"security\\\\compliance\", \"version\", \"1.0\")),\n+ entry(\"source\", Map.of(\"port\", 1232)),\n+ entry(\"message\", \"Newlines in messages\\nare allowed.\\r\\nAnd so are carriage feeds\\\\newlines\\\\=.\"),\n+ entry(\"destination\", Map.of(\"port\", 4432))\n+ )\n+ );\n+ }\n+\n+ public void testTruncatedHeader() {\n+ String message = \"CEF:0|SentinelOne|Mgmt|activityID=1111111111111111111 activityType=3505 \"\n+ + \"siteId=None siteName=None accountId=1222222222222222222 accountName=foo-bar mdr notificationScope=ACCOUNT\";\n+ Map source = new HashMap<>();\n+ source.put(\"message\", message);\n+ document = new IngestDocument(\"index\", \"id\", 1L, null, null, source);\n+ CefProcessor processor = new CefProcessor(\"tag\", \"description\", \"message\", \"cef\", false, true, null);\n+ processor.execute(document);\n+ assertMapsEqual(\n+ document.getSource(),\n+ Map.ofEntries(\n+ entry(\n+ \"cef\",\n+ Map.ofEntries(\n+ entry(\"version\", \"0\"),\n+ entry(\"device\", Map.of(\"vendor\", \"SentinelOne\", \"product\", \"Mgmt\")),\n+ entry(\n+ \"extensions\",\n+ Map.ofEntries(\n+ entry(\"activityID\", \"1111111111111111111\"),\n+ entry(\"activityType\", \"3505\"),\n+ entry(\"siteId\", \"None\"),\n+ entry(\"siteName\", \"None\"),\n+ entry(\"accountId\", \"1222222222222222222\"),\n+ entry(\"accountName\", \"foo-bar mdr\"),\n+ entry(\"notificationScope\", \"ACCOUNT\")\n+ )\n+ )\n+ )\n+ ),\n+ entry(\"observer\", Map.of(\"product\", \"Mgmt\", \"vendor\", \"SentinelOne\")),\n+ entry(\"message\", message),\n+ entry(\"error\", Map.of(\"message\", Set.of(\"incomplete CEF header\")))\n+ )\n+ );\n+ }\n+\n+ public void testIgnoreEmptyValuesInExtension() {\n+ String message = \"CEF:26|security|threat=manager|1.0|100|trojan successfully stopped|10|src= dst=12.121.122.82 spt=\";\n+ Map source = new HashMap<>();\n+ source.put(\"message\", message);\n+ document = new IngestDocument(\"index\", \"id\", 1L, null, null, source);\n+ CefProcessor processor = new CefProcessor(\"tag\", \"description\", \"message\", \"cef\", false, true, null);\n+ processor.execute(document);\n+ assertMapsEqual(\n+ document.getSource(),\n+ Map.ofEntries(\n+ entry(\n+ \"cef\",\n+ Map.ofEntries(\n+ entry(\"version\", \"26\"),\n+ entry(\n+ \"device\",\n+ Map.of(\"vendor\", \"security\", \"product\", \"threat=manager\", \"version\", \"1.0\", \"event_class_id\", \"100\")\n+ ),\n+ entry(\"name\", \"trojan successfully stopped\"),\n+ entry(\"severity\", \"10\")\n+ )\n+ ),\n+ entry(\"event\", Map.of(\"code\", \"100\")),\n+ entry(\"observer\", Map.of(\"product\", \"threat=manager\", \"vendor\", \"security\", \"version\", \"1.0\")),\n+ entry(\"destination\", Map.of(\"ip\", \"12.121.122.82\")),\n+ entry(\"message\", message)\n+ )\n+ );\n+ }\n+\n+ public void testHyphenInExtensionKey() {\n+ String message = \"CEF:26|security|threatmanager|1.0|100|trojan successfully stopped|10|Some-Key=123456\";\n+ Map source = new HashMap<>();\n+ source.put(\"message\", message);\n+ document = new IngestDocument(\"index\", \"id\", 1L, null, null, source);\n+ CefProcessor processor = new CefProcessor(\"tag\", \"description\", \"message\", \"cef\", false, true, null);\n+ processor.execute(document);\n+ assertMapsEqual(\n+ document.getSource(),\n+ Map.ofEntries(\n+ entry(\n+ \"cef\",\n+ Map.ofEntries(\n+ entry(\"version\", \"26\"),\n+ entry(\n+ \"device\",\n+ Map.of(\"vendor\", \"security\", \"product\", \"threatmanager\", \"version\", \"1.0\", \"event_class_id\", \"100\")\n+ ),\n+ entry(\"name\", \"trojan successfully stopped\"),\n+ entry(\"severity\", \"10\"),\n+ entry(\"extensions\", Map.of(\"Some-Key\", \"123456\"))\n+ )\n+ ),\n+ entry(\"event\", Map.of(\"code\", \"100\")),\n+ entry(\"observer\", Map.of(\"product\", \"threatmanager\", \"vendor\", \"security\", \"version\", \"1.0\")),\n+ entry(\"message\", message)\n+ )\n+ );\n+ }\n+\n+ public void testAllFieldsInExtension() {\n+ String message = \"CEF:0|security|threatmanager|1.0|100|trojan successfully stopped|10|\"\n+ + \"agt=192.168.0.1 agentDnsDomain=example.com ahost=agentHost aid=agentId amac=00:0a:95:9d:68:16 agentNtDomain=example.org \"\n+ + \"art=1622547800000 atz=UTC agentTranslatedAddress=10.0.0.1 agentTranslatedZoneExternalID=ext123 agentTranslatedZoneURI=uri \"\n+ + \"at=agentType av=1.0 agentZoneExternalID=zoneExtId agentZoneURI=zoneUri app=HTTP cnt=1234 in=5678 out=91011 \"\n+ + \"customerExternalID=custExtId customerURI=custUri dst=192.168.0.2 dlat=37.7749 dlong=-122.4194 \"\n+ + \"dhost=destHost dmac=00:0a:95:9d:68:16 dntdom=destNtDomain dpt=80 dpid=1234 \"\n+ + \"dproc=destProc destinationServiceName=destService \"\n+ + \"destinationTranslatedAddress=10.0.0.2 destinationTranslatedPort=8080 destinationTranslatedZoneExternalID=destExtId \"\n+ + \"destinationTranslatedZoneURI=destUri duid=destUserId duser=destUser dpriv=admin destinationZoneExternalID=destZoneExtId \"\n+ + \"destinationZoneURI=destZoneUri act=blocked dvc=192.168.0.3 cfp1Label=cfp1Label cfp3Label=cfp3Label cfp4Label=cfp4Label \"\n+ + \"deviceCustomDate1=1622547800000 deviceCustomDate1Label=customDate1Label deviceCustomDate2=1622547900000 \"\n+ + \"deviceCustomDate2Label=customDate2Label cfp1=1.23 cfp2=2.34 cfp2Label=cfp2Label cfp3=3.45 cfp4=4.56 c6a1=2001:db8::1 \"\n+ + \"c6a1Label=c6a1Label c6a2=2001:db8::2 c6a2Label=c6a2Label c6a3=2001:db8::3 c6a3Label=c6a3Label c6a4=2001:db8::4 \"\n+ + \"C6a4Label=c6a4Label cn1=123 cn1Label=cn1Label cn2=234 cn2Label=cn2Label cn3=345 cn3Label=cn3Label cs1=customString1 \"\n+ + \"cs1Label=cs1Label cs2=customString2 cs2Label=cs2Label cs3=customString3 cs3Label=cs3Label \"\n+ + \"cs4=customString4 cs4Label=cs4Label \"\n+ + \"cs5=customString5 cs5Label=cs5Label cs6=customString6 cs6Label=cs6Label deviceDirection=inbound deviceDnsDomain=example.com \"\n+ + \"cat=category deviceExternalId=extId deviceFacility=16 dvchost=host1 deviceInboundInterface=eth0 dvcmac=00:0a:95:9d:68:16 \"\n+ + \"deviceNtDomain=example.org deviceOutboundInterface=eth1 devicePayloadId=payloadId dvcpid=5678 deviceProcessName=procName \"\n+ + \"rt=1622547800000 dtz=UTC deviceTranslatedAddress=10.0.0.3 deviceTranslatedZoneExternalID=transExtId \"\n+ + \"deviceTranslatedZoneURI=transUri deviceZoneExternalID=zoneExtId deviceZoneURI=zoneUri end=1622547900000 eventId=evt123 \"\n+ + \"outcome=success externalId=extId fileCreateTime=1622547800000 fileHash=abcd1234 fileId=5678 \"\n+ + \"fileModificationTime=1622547900000 \"\n+ + \"fname=file.txt filePath=/path/to/file filePermission=rw-r--r-- fsize=1024 fileType=txt flexDate1=1622547800000 \"\n+ + \"flexDate1Label=flexDate1Label flexString1=flexString1 flexString2=flexString2 flexString1Label=flexString1Label \"\n+ + \"flexString2Label=flexString2Label msg=message oldFileCreateTime=1622547800000 oldFileHash=oldHash oldFileId=oldId \"\n+ + \"oldFileModificationTime=1622547900000 oldFileName=oldFile oldFilePath=/old/path \"\n+ + \"oldFilePermission=rw-r--r-- oldFileSize=2048 \"\n+ + \"oldFileType=oldType rawEvent=rawEvent reason=reason requestClientApplication=Mozilla requestContext=referrer \"\n+ + \"requestCookies=cookies requestMethod=GET request=url src=192.168.0.4 sourceDnsDomain=sourceDomain \"\n+ + \"slat=37.7749 slong=-122.4194 \"\n+ + \"shost=sourceHost smac=00:0a:95:9d:68:16 sntdom=sourceNtDomain spt=443 spid=1234 \"\n+ + \"sproc=sourceProc sourceServiceName=sourceService \"\n+ + \"sourceTranslatedAddress=10.0.0.4 sourceTranslatedPort=8081 sourceTranslatedZoneExternalID=sourceExtId \"\n+ + \"sourceTranslatedZoneURI=sourceUri suid=sourceUserId suser=sourceUser spriv=sourcePriv sourceZoneExternalID=sourceZoneExtId \"\n+ + \"sourceZoneURI=sourceZoneUri start=1622547800000 proto=TCP type=1 catdt=catDeviceType mrt=1622547800000\";\n+ Map source = new HashMap<>();\n+ source.put(\"message\", message);\n+ document = new IngestDocument(\"index\", \"id\", 1L, null, null, source);\n+ CefProcessor processor = new CefProcessor(\"tag\", \"description\", \"message\", \"cef\", false, true, null);\n+ processor.execute(document);\n+ assertMapsEqual(\n+ document.getSource(),\n+ Map.ofEntries(\n+ entry(\n+ \"cef\",\n+ Map.ofEntries(\n+ entry(\"version\", \"0\"),\n+ entry(\n+ \"device\",\n+ Map.of(\"vendor\", \"security\", \"product\", \"threatmanager\", \"version\", \"1.0\", \"event_class_id\", \"100\")\n+ ),\n+ entry(\"name\", \"trojan successfully stopped\"),\n+ entry(\"severity\", \"10\"),\n+ entry(\n+ \"extensions\",\n+ Map.ofEntries(\n+ entry(\"deviceNtDomain\", \"example.org\"),\n+ entry(\"agentZoneExternalID\", \"zoneExtId\"),\n+ entry(\"agentTimeZone\", \"UTC\"),\n+ entry(\"deviceCustomIPv6Address1Label\", \"c6a1Label\"),\n+ entry(\"deviceCustomString1\", \"customString1\"),\n+ entry(\"deviceCustomIPv6Address2Label\", \"c6a2Label\"),\n+ entry(\"deviceCustomNumber3\", 345L),\n+ entry(\"deviceCustomFloatingPoint1\", 1.23f),\n+ entry(\"deviceCustomNumber2\", 234L),\n+ entry(\"deviceCustomFloatingPoint2\", 2.34f),\n+ entry(\"deviceCustomFloatingPoint3\", 3.45f),\n+ entry(\"deviceCustomFloatingPoint4\", 4.56f),\n+ entry(\"flexDate1\", ZonedDateTime.parse(\"2021-06-01T11:43:20Z\")),\n+ entry(\"destinationTranslatedZoneExternalID\", \"destExtId\"),\n+ entry(\"deviceCustomNumber1\", 123L),\n+ entry(\"deviceEventCategory\", \"category\"),\n+ entry(\"deviceCustomString6Label\", \"cs6Label\"),\n+ entry(\"deviceCustomNumber2Label\", \"cn2Label\"),\n+ entry(\"flexString1Label\", \"flexString1Label\"),\n+ entry(\"deviceCustomString5Label\", \"cs5Label\"),\n+ entry(\"agentZoneURI\", \"zoneUri\"),\n+ entry(\"deviceCustomString2Label\", \"cs2Label\"),\n+ entry(\"deviceCustomDate2Label\", \"customDate2Label\"),\n+ entry(\"deviceCustomNumber1Label\", \"cn1Label\"),\n+ entry(\"oldFileType\", \"oldType\"),\n+ entry(\"destinationZoneExternalID\", \"destZoneExtId\"),\n+ entry(\"categoryDeviceType\", \"catDeviceType\"),\n+ entry(\"deviceZoneURI\", \"zoneUri\"),\n+ entry(\"sourceTranslatedZoneExternalID\", \"sourceExtId\"),\n+ entry(\"agentTranslatedAddress\", \"10.0.0.1\"),\n+ entry(\"requestCookies\", \"cookies\"),\n+ entry(\"deviceCustomIPv6Address3\", \"2001:db8::3\"),\n+ entry(\"oldFilePath\", \"/old/path\"),\n+ entry(\"deviceCustomIPv6Address2\", \"2001:db8::2\"),\n+ entry(\"deviceCustomIPv6Address1\", \"2001:db8::1\"),\n+ entry(\"oldFileId\", \"oldId\"),\n+ entry(\"deviceTranslatedZoneExternalID\", \"transExtId\"),\n+ entry(\"deviceCustomFloatingPoint2Label\", \"cfp2Label\"),\n+ entry(\"deviceTranslatedZoneURI\", \"transUri\"),\n+ entry(\"deviceCustomIPv6Address4Label\", \"c6a4Label\"),\n+ entry(\"agentTranslatedZoneURI\", \"uri\"),\n+ entry(\"oldFilePermission\", \"rw-r--r--\"),\n+ entry(\"deviceCustomIPv6Address4\", \"2001:db8::4\"),\n+ entry(\"sourceZoneURI\", \"sourceZoneUri\"),\n+ entry(\"deviceCustomFloatingPoint3Label\", \"cfp3Label\"),\n+ entry(\"agentTranslatedZoneExternalID\", \"ext123\"),\n+ entry(\"destinationZoneURI\", \"destZoneUri\"),\n+ entry(\"flexDate1Label\", \"flexDate1Label\"),\n+ entry(\"agentNtDomain\", \"example.org\"),\n+ entry(\"deviceCustomDate2\", ZonedDateTime.parse(\"2021-06-01T11:45Z\")),\n+ entry(\"deviceCustomDate1\", ZonedDateTime.parse(\"2021-06-01T11:43:20Z\")),\n+ entry(\"deviceCustomString3Label\", \"cs3Label\"),\n+ entry(\"deviceCustomDate1Label\", \"customDate1Label\"),\n+ entry(\"destinationTranslatedZoneURI\", \"destUri\"),\n+ entry(\"oldFileModificationTime\", ZonedDateTime.parse(\"2021-06-01T11:45Z\")),\n+ entry(\"deviceCustomFloatingPoint1Label\", \"cfp1Label\"),\n+ entry(\"deviceCustomIPv6Address3Label\", \"c6a3Label\"),\n+ entry(\"deviceCustomFloatingPoint4Label\", \"cfp4Label\"),\n+ entry(\"oldFileSize\", 2048),\n+ entry(\"externalId\", \"extId\"),\n+ entry(\"baseEventCount\", 1234),\n+ entry(\"flexString2\", \"flexString2\"),\n+ entry(\"deviceCustomNumber3Label\", \"cn3Label\"),\n+ entry(\"flexString1\", \"flexString1\"),\n+ entry(\"deviceCustomString4Label\", \"cs4Label\"),\n+ entry(\"flexString2Label\", \"flexString2Label\"),\n+ entry(\"deviceCustomString3\", \"customString3\"),\n+ entry(\"deviceCustomString2\", \"customString2\"),\n+ entry(\"deviceCustomString1Label\", \"cs1Label\"),\n+ entry(\"deviceCustomString5\", \"customString5\"),\n+ entry(\"deviceCustomString4\", \"customString4\"),\n+ entry(\"deviceZoneExternalID\", \"zoneExtId\"),\n+ entry(\"deviceCustomString6\", \"customString6\"),\n+ entry(\"oldFileName\", \"oldFile\"),\n+ entry(\"sourceZoneExternalID\", \"sourceZoneExtId\"),\n+ entry(\"oldFileHash\", \"oldHash\"),\n+ entry(\"sourceTranslatedZoneURI\", \"sourceUri\"),\n+ entry(\"oldFileCreateTime\", ZonedDateTime.parse(\"2021-06-01T11:43:20Z\"))\n+ )\n+ )\n+ )\n+ ),\n+ entry(\"host\", Map.of(\"nat\", Map.of(\"ip\", \"10.0.0.3\"))),\n+ entry(\"log\", Map.of(\"syslog\", Map.of(\"facility\", Map.of(\"code\", 16L)))),\n+ entry(\n+ \"observer\",\n+ Map.ofEntries(\n+ entry(\"ingress\", Map.of(\"interface\", Map.of(\"name\", \"eth0\"))),\n+ entry(\"registered_domain\", \"example.com\"),\n+ entry(\"product\", \"threatmanager\"),\n+ entry(\"hostname\", \"host1\"),\n+ entry(\"vendor\", \"security\"),\n+ entry(\"ip\", \"192.168.0.3\"),\n+ entry(\"name\", \"extId\"),\n+ entry(\"version\", \"1.0\"),\n+ entry(\"mac\", \"00:0a:95:9d:68:16\"),\n+ entry(\"egress\", Map.of(\"interface\", Map.of(\"name\", \"eth1\")))\n+ )\n+ ),\n+ entry(\n+ \"agent\",\n+ Map.ofEntries(\n+ entry(\"ip\", \"192.168.0.1\"),\n+ entry(\"name\", \"example.com\"),\n+ entry(\"id\", \"agentId\"),\n+ entry(\"type\", \"agentType\"),\n+ entry(\"version\", \"1.0\"),\n+ entry(\"mac\", \"00:0a:95:9d:68:16\")\n+ )\n+ ),\n+ entry(\"process\", Map.of(\"name\", \"procName\", \"pid\", 5678L)),\n+ entry(\n+ \"destination\",\n+ Map.ofEntries(\n+ entry(\"nat\", Map.of(\"port\", 8080, \"ip\", \"10.0.0.2\")),\n+ entry(\"geo\", Map.of(\"location\", Map.of(\"lon\", -122.4194, \"lat\", 37.7749))),\n+ entry(\"registered_domain\", \"destNtDomain\"),\n+ entry(\"process\", Map.of(\"name\", \"destProc\", \"pid\", 1234L)),\n+ entry(\"port\", 80),\n+ entry(\"bytes\", 91011L),\n+ entry(\"service\", Map.of(\"name\", \"destService\")),\n+ entry(\"domain\", \"destHost\"),\n+ entry(\"ip\", \"192.168.0.2\"),\n+ entry(\"user\", Map.of(\"name\", \"destUser\", \"id\", \"destUserId\", \"group\", Map.of(\"name\", \"admin\"))),\n+ entry(\"mac\", \"00:0a:95:9d:68:16\")\n+ )\n+ ),\n+ entry(\n+ \"source\",\n+ Map.ofEntries(\n+ entry(\"geo\", Map.of(\"location\", Map.of(\"lon\", -122.4194, \"lat\", 37.7749))),\n+ entry(\"nat\", Map.of(\"port\", 8081, \"ip\", \"10.0.0.4\")),\n+ entry(\"registered_domain\", \"sourceNtDomain\"),\n+ entry(\"process\", Map.of(\"name\", \"sourceProc\", \"pid\", 1234L)),\n+ entry(\"port\", 443),\n+ entry(\"service\", Map.of(\"name\", \"sourceService\")),\n+ entry(\"bytes\", 5678L),\n+ entry(\"ip\", \"192.168.0.4\"),\n+ entry(\"domain\", \"sourceDomain\"),\n+ entry(\"user\", Map.of(\"name\", \"sourceUser\", \"id\", \"sourceUserId\", \"group\", Map.of(\"name\", \"sourcePriv\"))),\n+ entry(\"mac\", \"00:0a:95:9d:68:16\")\n+ )\n+ ),\n+ entry(\"message\", \"message\"),\n+ entry(\"url\", Map.of(\"original\", \"url\")),\n+ entry(\"network\", Map.of(\"protocol\", \"HTTP\", \"transport\", \"TCP\", \"direction\", \"inbound\")),\n+ entry(\n+ \"file\",\n+ Map.ofEntries(\n+ entry(\"inode\", \"5678\"),\n+ entry(\"path\", \"/path/to/file\"),\n+ entry(\"size\", 1024L),\n+ entry(\"created\", ZonedDateTime.parse(\"2021-06-01T11:43:20Z\")),\n+ entry(\"name\", \"file.txt\"),\n+ entry(\"mtime\", ZonedDateTime.parse(\"2021-06-01T11:45Z\")),\n+ entry(\"type\", \"txt\"),\n+ entry(\"hash\", \"abcd1234\"),\n+ entry(\"group\", \"rw-r--r--\")\n+ )\n+ ),\n+ entry(\"@timestamp\", ZonedDateTime.parse(\"2021-06-01T11:43:20Z\")),\n+ entry(\"organization\", Map.of(\"name\", \"custUri\", \"id\", \"custExtId\")),\n+ entry(\n+ \"event\",\n+ Map.ofEntries(\n+ entry(\"action\", \"blocked\"),\n+ entry(\"timezone\", \"UTC\"),\n+ entry(\"end\", ZonedDateTime.parse(\"2021-06-01T11:45Z\")),\n+ entry(\"id\", \"evt123\"),\n+ entry(\"outcome\", \"success\"),\n+ entry(\"start\", ZonedDateTime.parse(\"2021-06-01T11:43:20Z\")),\n+ entry(\"reason\", \"reason\"),\n+ entry(\"ingested\", ZonedDateTime.parse(\"2021-06-01T11:43:20Z\")),\n+ entry(\"kind\", 1),\n+ entry(\"original\", \"rawEvent\"),\n+ entry(\"created\", ZonedDateTime.parse(\"2021-06-01T11:43:20Z\")),\n+ entry(\"code\", \"100\")\n+ )\n+ ),\n+ entry(\"user_agent\", Map.of(\"original\", \"Mozilla\")),\n+ entry(\"http\", Map.of(\"request\", Map.of(\"referrer\", \"referrer\", \"method\", \"GET\")))\n+ )\n+ );\n+ }\n+\n+ // Date parsing tests\n+ public void testToTimestampWithUnixTimestamp() {\n+ CefParser parser = new CefParser(ZoneId.of(\"UTC\"), false);\n+ String unixTimestamp = \"1633072800000\"; // Example Unix timestamp in milliseconds\n+ ZonedDateTime expected = ZonedDateTime.ofInstant(Instant.ofEpochMilli(Long.parseLong(unixTimestamp)), ZoneId.of(\"UTC\"));\n+ ZonedDateTime result = parser.toTimestamp(unixTimestamp);\n+ assertEquals(expected, result);\n+ }\n+\n+ public void testToTimestampWithFormattedDate() {\n+ CefParser parser = new CefParser(ZoneId.of(\"Europe/Stockholm\"), false);\n+ String formattedDate = \"Oct 01 2021 12:00:00 UTC\"; // Example formatted date\n+ ZonedDateTime expected = ZonedDateTime.parse(\"2021-10-01T14:00+02:00[Europe/Stockholm]\");\n+ ZonedDateTime result = parser.toTimestamp(formattedDate);\n+ assertEquals(expected, result);\n+ }\n+\n+ public void testToTimestampWithFormattedDateWithoutYear() {\n+ CefParser parser = new CefParser(ZoneId.of(\"UTC\"), false);\n+ String formattedDate = \"Oct 01 12:00:00 UTC\"; // Example formatted date without year\n+ int currentYear = ZonedDateTime.now(ZoneId.of(\"UTC\")).getYear();\n+ ZonedDateTime expected = ZonedDateTime.parse(currentYear + \"-10-01T12:00:00Z[UTC]\");\n+ ZonedDateTime result = parser.toTimestamp(formattedDate);\n+ assertEquals(expected, result);\n+ }\n+\n+ public void testToTimestampWithFormattedDateWithoutTimezone() {\n+ CefParser parser = new CefParser(ZoneId.of(\"UTC\"), false);\n+ String formattedDate = \"Sep 07 2018 14:50:39\"; // Example formatted date without year\n+ ZonedDateTime expected = ZonedDateTime.parse(\"2018-09-07T14:50:39Z[UTC]\");\n+ ZonedDateTime result = parser.toTimestamp(formattedDate);\n+ assertEquals(expected, result);\n+ }\n+\n+ public void testToTimestampWithInvalidDate() {\n+ CefParser parser = new CefParser(ZoneId.of(\"UTC\"), false);\n+ String invalidDate = \"invalid date\";\n+ assertThrows(IllegalArgumentException.class, () -> parser.toTimestamp(invalidDate));\n+ }\n+\n+ public void testToMacAddressWithSeparators() {\n+ CefParser parser = new CefParser(ZoneId.of(\"UTC\"), false);\n+ List macAddresses = List.of(\n+ // EUI-48 (with separators).\n+ \"00:0D:60:AF:1B:61\",\n+ \"00-0D-60-AF-1B-61\",\n+ \"000D.60AF.1B61\",\n+\n+ // EUI-64 (with separators).\n+ \"00:0D:60:FF:FE:AF:1B:61\",\n+ \"00-0D-60-FF-FE-AF-1B-61\",\n+ \"000D.60FF.FEAF.1B61\"\n+ );\n+ macAddresses.forEach(macAddress -> {\n+ String result = parser.toMACAddress(macAddress);\n+ assertEquals(macAddress, result);\n+ });\n+ }\n+\n+ public void testEUI48ToMacAddressWithOutSeparators() {\n+ CefParser parser = new CefParser(ZoneId.of(\"UTC\"), false);\n+ String macAddress = \"000D60AF1B61\";\n+ String result = parser.toMACAddress(macAddress);\n+ assertEquals(\"00:0D:60:AF:1B:61\", result);\n+ }\n+\n+ public void testEUI64ToMacAddressWithOutSeparators() {\n+ CefParser parser = new CefParser(ZoneId.of(\"UTC\"), false);\n+ String macAddress = \"000D60FFFEAF1B61\";\n+ String result = parser.toMACAddress(macAddress);\n+ assertEquals(\"00:0D:60:FF:FE:AF:1B:61\", result);\n+ }\n+\n+ public void toIP_validIPv4Address() {\n+ CefParser parser = new CefParser(ZoneId.of(\"UTC\"), true);\n+ String result = parser.toIP(\"192.168.1.1\");\n+ assertEquals(\"192.168.1.1\", result);\n+ }\n+\n+ public void toIP_validIPv6Address() {\n+ CefParser parser = new CefParser(ZoneId.of(\"UTC\"), true);\n+ String result = parser.toIP(\"2001:0db8:85a3:0000:0000:8a2e:0370:7334\");\n+ assertEquals(\"2001:db8:85a3::8a2e:370:7334\", result);\n+ }\n+\n+ public void toIP_invalidIPAddress() {\n+ CefParser parser = new CefParser(ZoneId.of(\"UTC\"), true);\n+ IllegalArgumentException exception = assertThrows(IllegalArgumentException.class, () -> parser.toIP(\"invalid_ip\"));\n+ assertEquals(\"Invalid IP address format\", exception.getMessage());\n+ }\n+\n+ public void toIP_emptyString() {\n+ CefParser parser = new CefParser(ZoneId.of(\"UTC\"), true);\n+ IllegalArgumentException exception = assertThrows(IllegalArgumentException.class, () -> parser.toIP(\"\"));\n+ assertEquals(\"Invalid IP address format\", exception.getMessage());\n+ }\n+\n+ public void toIP_nullString() {", + "comment_created_at": "2025-04-03T17:57:14+00:00", + "comment_author": "joegallo", + "comment_body": "These `toIP_` methods aren't formatted to actually be JUnit tests (which start with `test`), so none of them are running. If you fix that so that they do run, not all of the tests pass.\r\n\r\nPlease avoid underscores in these names, regardless, so for example have `toIP_nullString` become `testToIPNullString`.", + "pr_file_module": null + }, + { + "comment_id": "2028230119", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 122491, + "pr_file": "modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/CefProcessorTests.java", + "discussion_id": "2027497237", + "commented_code": "@@ -0,0 +1,1104 @@\n+/*\n+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one\n+ * or more contributor license agreements. Licensed under the \"Elastic License\n+ * 2.0\", the \"GNU Affero General Public License v3.0 only\", and the \"Server Side\n+ * Public License v 1\"; you may not use this file except in compliance with, at\n+ * your election, the \"Elastic License 2.0\", the \"GNU Affero General Public\n+ * License v3.0 only\", or the \"Server Side Public License, v 1\".\n+ */\n+\n+package org.elasticsearch.ingest.common;\n+\n+import org.elasticsearch.ingest.IngestDocument;\n+import org.elasticsearch.test.ESTestCase;\n+import org.junit.Before;\n+\n+import java.time.Instant;\n+import java.time.ZoneId;\n+import java.time.ZonedDateTime;\n+import java.util.HashMap;\n+import java.util.List;\n+import java.util.Map;\n+import java.util.Set;\n+\n+import static java.util.Map.entry;\n+import static org.hamcrest.Matchers.containsInAnyOrder;\n+import static org.hamcrest.Matchers.equalTo;\n+\n+public class CefProcessorTests extends ESTestCase {\n+\n+ private IngestDocument document;\n+\n+ @Before\n+ public void setUp() throws Exception {\n+ super.setUp();\n+ }\n+\n+ public void testExecute() {\n+ Map source = new HashMap<>();\n+ String message = \"CEF:0|Elastic|Vaporware|1.0.0-alpha|18|Web request|low|eventId=3457 requestMethod=POST \"\n+ + \"slat=38.915 slong=-77.511 proto=TCP sourceServiceName=httpd requestContext=https://www.google.com \"\n+ + \"src=89.160.20.156 spt=33876 dst=192.168.10.1 dpt=443 request=https://www.example.com/cart\";\n+ source.put(\"message\", message);\n+ document = new IngestDocument(\"index\", \"id\", 1L, null, null, source);\n+ CefProcessor processor = new CefProcessor(\"tag\", \"description\", \"message\", \"cef\", false, true, null);\n+ processor.execute(document);\n+ assertMapsEqual(\n+ document.getSource(),\n+ Map.ofEntries(\n+ entry(\n+ \"cef\",\n+ Map.ofEntries(\n+ entry(\"version\", \"0\"),\n+ entry(\n+ \"device\",\n+ Map.of(\"vendor\", \"Elastic\", \"product\", \"Vaporware\", \"version\", \"1.0.0-alpha\", \"event_class_id\", \"18\")\n+ ),\n+ entry(\"name\", \"Web request\"),\n+ entry(\"severity\", \"low\")\n+ )\n+ ),\n+ entry(\"observer\", Map.of(\"product\", \"Vaporware\", \"vendor\", \"Elastic\", \"version\", \"1.0.0-alpha\")),\n+ entry(\"event\", Map.of(\"id\", \"3457\", \"code\", \"18\")),\n+ entry(\n+ \"source\",\n+ Map.ofEntries(\n+ entry(\"ip\", \"89.160.20.156\"),\n+ entry(\"port\", 33876),\n+ entry(\"geo\", Map.of(\"location\", Map.of(\"lon\", -77.511, \"lat\", 38.915))),\n+ entry(\"service\", Map.of(\"name\", \"httpd\"))\n+ )\n+ ),\n+ entry(\"destination\", Map.of(\"ip\", \"192.168.10.1\", \"port\", 443)),\n+ entry(\"http\", Map.of(\"request\", Map.of(\"method\", \"POST\", \"referrer\", \"https://www.google.com\"))),\n+ entry(\"network\", Map.of(\"transport\", \"TCP\")),\n+ entry(\"url\", Map.of(\"original\", \"https://www.example.com/cart\")),\n+ entry(\"message\", message)\n+ )\n+ );\n+ }\n+\n+ public void testInvalidCefFormat() {\n+ Map invalidSource = new HashMap<>();\n+ invalidSource.put(\"message\", \"Invalid CEF message\");\n+ IngestDocument invalidIngestDocument = new IngestDocument(\"index\", \"id\", 1L, null, null, invalidSource);\n+ CefProcessor processor = new CefProcessor(\"tag\", \"description\", \"message\", \"cef\", false, true, null);\n+ expectThrows(IllegalArgumentException.class, () -> processor.execute(invalidIngestDocument));\n+ }\n+\n+ public void testStandardMessage() {\n+ String message = \"CEF:26|security|threatmanager|1.0|100|trojan successfully stopped|10|\"\n+ + \"src=10.0.0.192 dst=12.121.122.82 spt=1232 eventId=1 in=4294967296 out=4294967296\";\n+ Map source = new HashMap<>();\n+ source.put(\"message\", message);\n+ document = new IngestDocument(\"index\", \"id\", 1L, null, null, source);\n+ CefProcessor processor = new CefProcessor(\"tag\", \"description\", \"message\", \"cef\", false, true, null);\n+ processor.execute(document);\n+ assertMapsEqual(\n+ document.getSource(),\n+ Map.ofEntries(\n+ entry(\n+ \"cef\",\n+ Map.ofEntries(\n+ entry(\"version\", \"26\"),\n+ entry(\n+ \"device\",\n+ Map.of(\"vendor\", \"security\", \"product\", \"threatmanager\", \"version\", \"1.0\", \"event_class_id\", \"100\")\n+ ),\n+ entry(\"name\", \"trojan successfully stopped\"),\n+ entry(\"severity\", \"10\")\n+ )\n+ ),\n+ entry(\"observer\", Map.of(\"product\", \"threatmanager\", \"vendor\", \"security\", \"version\", \"1.0\")),\n+ entry(\"source\", Map.of(\"ip\", \"10.0.0.192\", \"port\", 1232, \"bytes\", 4294967296L)),\n+ entry(\"destination\", Map.of(\"ip\", \"12.121.122.82\", \"bytes\", 4294967296L)),\n+ entry(\"event\", Map.of(\"id\", \"1\", \"code\", \"100\")),\n+ entry(\"message\", message)\n+ )\n+ );\n+ }\n+\n+ public void testHeaderOnly() {\n+ String message = \"CEF:26|security|threatmanager|1.0|100|trojan successfully stopped|10|\";\n+ Map source = new HashMap<>();\n+ source.put(\"message\", message);\n+ document = new IngestDocument(\"index\", \"id\", 1L, null, null, source);\n+ CefProcessor processor = new CefProcessor(\"tag\", \"description\", \"message\", \"cef\", false, true, null);\n+ processor.execute(document);\n+ assertMapsEqual(\n+ document.getSource(),\n+ Map.ofEntries(\n+ entry(\n+ \"cef\",\n+ Map.ofEntries(\n+ entry(\"version\", \"26\"),\n+ entry(\n+ \"device\",\n+ Map.of(\"vendor\", \"security\", \"product\", \"threatmanager\", \"version\", \"1.0\", \"event_class_id\", \"100\")\n+ ),\n+ entry(\"name\", \"trojan successfully stopped\"),\n+ entry(\"severity\", \"10\")\n+ )\n+ ),\n+ entry(\"event\", Map.of(\"code\", \"100\")),\n+ entry(\"observer\", Map.of(\"product\", \"threatmanager\", \"vendor\", \"security\", \"version\", \"1.0\")),\n+ entry(\"message\", message)\n+ )\n+ );\n+ }\n+\n+ public void testEmptyDeviceFields() {\n+ String message = \"CEF:0|||1.0|100|trojan successfully stopped|10|src=10.0.0.192 dst=12.121.122.82 spt=1232\";\n+ Map source = new HashMap<>();\n+ source.put(\"message\", message);\n+ document = new IngestDocument(\"index\", \"id\", 1L, null, null, source);\n+ CefProcessor processor = new CefProcessor(\"tag\", \"description\", \"message\", \"cef\", false, true, null);\n+ processor.execute(document);\n+ assertMapsEqual(\n+ document.getSource(),\n+ Map.ofEntries(\n+ entry(\n+ \"cef\",\n+ Map.ofEntries(\n+ entry(\"version\", \"0\"),\n+ entry(\"device\", Map.of(\"vendor\", \"\", \"product\", \"\", \"version\", \"1.0\", \"event_class_id\", \"100\")),\n+ entry(\"name\", \"trojan successfully stopped\"),\n+ entry(\"severity\", \"10\")\n+ )\n+ ),\n+ entry(\"event\", Map.of(\"code\", \"100\")),\n+ entry(\"observer\", Map.of(\"product\", \"\", \"vendor\", \"\", \"version\", \"1.0\")),\n+ entry(\"source\", Map.of(\"ip\", \"10.0.0.192\", \"port\", 1232)),\n+ entry(\"destination\", Map.of(\"ip\", \"12.121.122.82\")),\n+ entry(\"message\", message)\n+ )\n+ );\n+ }\n+\n+ public void testEscapedPipeInHeader() {\n+ String message = \"CEF:26|security|threat\\\\|->manager|1.0|100|\"\n+ + \"trojan successfully stopped|10|src=10.0.0.192 dst=12.121.122.82 spt=1232\";\n+ Map source = new HashMap<>();\n+ source.put(\"message\", message);\n+ document = new IngestDocument(\"index\", \"id\", 1L, null, null, source);\n+ CefProcessor processor = new CefProcessor(\"tag\", \"description\", \"message\", \"cef\", false, true, null);\n+ processor.execute(document);\n+ assertMapsEqual(\n+ document.getSource(),\n+ Map.ofEntries(\n+ entry(\n+ \"cef\",\n+ Map.ofEntries(\n+ entry(\"version\", \"26\"),\n+ entry(\n+ \"device\",\n+ Map.of(\"vendor\", \"security\", \"product\", \"threat|->manager\", \"version\", \"1.0\", \"event_class_id\", \"100\")\n+ ),\n+ entry(\"name\", \"trojan successfully stopped\"),\n+ entry(\"severity\", \"10\")\n+ )\n+ ),\n+ entry(\"event\", Map.of(\"code\", \"100\")),\n+ entry(\"observer\", Map.of(\"product\", \"threat|->manager\", \"vendor\", \"security\", \"version\", \"1.0\")),\n+ entry(\"source\", Map.of(\"ip\", \"10.0.0.192\", \"port\", 1232)),\n+ entry(\"destination\", Map.of(\"ip\", \"12.121.122.82\")),\n+ entry(\"message\", message)\n+ )\n+ );\n+ }\n+\n+ public void testEqualsSignInHeader() {\n+ String message = \"CEF:26|security|threat=manager|1.0|100|trojan successfully stopped|10|src=10.0.0.192 dst=12.121.122.82 spt=1232\";\n+ Map source = new HashMap<>();\n+ source.put(\"message\", message);\n+ document = new IngestDocument(\"index\", \"id\", 1L, null, null, source);\n+ CefProcessor processor = new CefProcessor(\"tag\", \"description\", \"message\", \"cef\", false, true, null);\n+ processor.execute(document);\n+ assertMapsEqual(\n+ document.getSource(),\n+ Map.ofEntries(\n+ entry(\n+ \"cef\",\n+ Map.ofEntries(\n+ entry(\"version\", \"26\"),\n+ entry(\n+ \"device\",\n+ Map.of(\"vendor\", \"security\", \"product\", \"threat=manager\", \"version\", \"1.0\", \"event_class_id\", \"100\")\n+ ),\n+ entry(\"name\", \"trojan successfully stopped\"),\n+ entry(\"severity\", \"10\")\n+ )\n+ ),\n+ entry(\"event\", Map.of(\"code\", \"100\")),\n+ entry(\"observer\", Map.of(\"product\", \"threat=manager\", \"vendor\", \"security\", \"version\", \"1.0\")),\n+ entry(\"source\", Map.of(\"ip\", \"10.0.0.192\", \"port\", 1232)),\n+ entry(\"destination\", Map.of(\"ip\", \"12.121.122.82\")),\n+ entry(\"message\", message)\n+ )\n+ );\n+ }\n+\n+ public void testEmptyExtensionValue() {\n+ String message = \"CEF:26|security|threatmanager|1.0|100|trojan successfully stopped|10|src=10.0.0.192 dst= spt=1232\";\n+ Map source = new HashMap<>();\n+ source.put(\"message\", message);\n+ document = new IngestDocument(\"index\", \"id\", 1L, null, null, source);\n+ CefProcessor processor = new CefProcessor(\"tag\", \"description\", \"message\", \"cef\", false, true, null);\n+ processor.execute(document);\n+ assertMapsEqual(\n+ document.getSource(),\n+ Map.ofEntries(\n+ entry(\n+ \"cef\",\n+ Map.ofEntries(\n+ entry(\"version\", \"26\"),\n+ entry(\n+ \"device\",\n+ Map.of(\"vendor\", \"security\", \"product\", \"threatmanager\", \"version\", \"1.0\", \"event_class_id\", \"100\")\n+ ),\n+ entry(\"name\", \"trojan successfully stopped\"),\n+ entry(\"severity\", \"10\")\n+ )\n+ ),\n+ entry(\"event\", Map.of(\"code\", \"100\")),\n+ entry(\"observer\", Map.of(\"product\", \"threatmanager\", \"vendor\", \"security\", \"version\", \"1.0\")),\n+ entry(\"source\", Map.of(\"ip\", \"10.0.0.192\", \"port\", 1232)),\n+ entry(\"message\", message)\n+ )\n+ );\n+ }\n+\n+ public void testLeadingWhitespace() {\n+ String message = \"CEF:0|security|threatmanager|1.0|100|trojan successfully stopped|10| src=10.0.0.192 dst=12.121.122.82 spt=1232\";\n+ Map source = new HashMap<>();\n+ source.put(\"message\", message);\n+ document = new IngestDocument(\"index\", \"id\", 1L, null, null, source);\n+ CefProcessor processor = new CefProcessor(\"tag\", \"description\", \"message\", \"cef\", false, true, null);\n+ processor.execute(document);\n+ assertMapsEqual(\n+ document.getSource(),\n+ Map.ofEntries(\n+ entry(\n+ \"cef\",\n+ Map.ofEntries(\n+ entry(\"version\", \"0\"),\n+ entry(\n+ \"device\",\n+ Map.of(\"vendor\", \"security\", \"product\", \"threatmanager\", \"version\", \"1.0\", \"event_class_id\", \"100\")\n+ ),\n+ entry(\"name\", \"trojan successfully stopped\"),\n+ entry(\"severity\", \"10\")\n+ )\n+ ),\n+ entry(\"event\", Map.of(\"code\", \"100\")),\n+ entry(\"observer\", Map.of(\"product\", \"threatmanager\", \"vendor\", \"security\", \"version\", \"1.0\")),\n+ entry(\"source\", Map.of(\"ip\", \"10.0.0.192\", \"port\", 1232)),\n+ entry(\"destination\", Map.of(\"ip\", \"12.121.122.82\")),\n+ entry(\"message\", message)\n+ )\n+ );\n+ }\n+\n+ public void testEscapedPipeInExtension() {\n+ String message = \"CEF:0|security|threatmanager|1.0|100|trojan successfully stopped|10|moo=this\\\\|has an escaped pipe\";\n+ Map source = new HashMap<>();\n+ source.put(\"message\", message);\n+ document = new IngestDocument(\"index\", \"id\", 1L, null, null, source);\n+ CefProcessor processor = new CefProcessor(\"tag\", \"description\", \"message\", \"cef\", false, true, null);\n+ expectThrows(IllegalArgumentException.class, () -> processor.execute(document));\n+ }\n+\n+ public void testPipeInMessage() {\n+ String message = \"CEF:0|security|threatmanager|1.0|100|trojan successfully stopped|10|moo=this|has a pipe\";\n+ Map source = new HashMap<>();\n+ source.put(\"message\", message);\n+ document = new IngestDocument(\"index\", \"id\", 1L, null, null, source);\n+ CefProcessor processor = new CefProcessor(\"tag\", \"description\", \"message\", \"cef\", false, true, null);\n+ processor.execute(document);\n+ assertMapsEqual(\n+ document.getSource(),\n+ Map.ofEntries(\n+ entry(\n+ \"cef\",\n+ Map.ofEntries(\n+ entry(\"version\", \"0\"),\n+ entry(\n+ \"device\",\n+ Map.of(\"vendor\", \"security\", \"product\", \"threatmanager\", \"version\", \"1.0\", \"event_class_id\", \"100\")\n+ ),\n+ entry(\"name\", \"trojan successfully stopped\"),\n+ entry(\"severity\", \"10\"),\n+ entry(\"extensions\", Map.of(\"moo\", \"this|has a pipe\"))\n+ )\n+ ),\n+ entry(\"event\", Map.of(\"code\", \"100\")),\n+ entry(\"observer\", Map.of(\"product\", \"threatmanager\", \"vendor\", \"security\", \"version\", \"1.0\")),\n+ entry(\"message\", message)\n+ )\n+ );\n+ }\n+\n+ public void testEqualsInMessage() {\n+ String message =\n+ \"CEF:0|security|threatmanager|1.0|100|trojan successfully stopped|10|moo=this =has = equals\\\\= dst=12.121.122.82 spt=1232\";\n+ Map source = new HashMap<>();\n+ source.put(\"message\", message);\n+ document = new IngestDocument(\"index\", \"id\", 1L, null, null, source);\n+ CefProcessor processor = new CefProcessor(\"tag\", \"description\", \"message\", \"cef\", false, true, null);\n+ processor.execute(document);\n+ assertMapsEqual(\n+ document.getSource(),\n+ Map.ofEntries(\n+ entry(\n+ \"cef\",\n+ Map.ofEntries(\n+ entry(\"version\", \"0\"),\n+ entry(\n+ \"device\",\n+ Map.of(\"vendor\", \"security\", \"product\", \"threatmanager\", \"version\", \"1.0\", \"event_class_id\", \"100\")\n+ ),\n+ entry(\"name\", \"trojan successfully stopped\"),\n+ entry(\"severity\", \"10\"),\n+ entry(\"extensions\", Map.of(\"moo\", \"this =has = equals=\"))\n+ )\n+ ),\n+ entry(\"event\", Map.of(\"code\", \"100\")),\n+ entry(\"observer\", Map.of(\"product\", \"threatmanager\", \"vendor\", \"security\", \"version\", \"1.0\")),\n+ entry(\"destination\", Map.of(\"ip\", \"12.121.122.82\")),\n+ entry(\"source\", Map.of(\"port\", 1232)),\n+ entry(\"message\", message)\n+ )\n+ );\n+ }\n+\n+ public void testEscapesInExtension() {\n+ String message = \"CEF:0|security|threatmanager|1.0|100|trojan successfully stopped|10|msg=a+b\\\\=c x=c\\\\\\\\d\\\\=z\";\n+ Map source = new HashMap<>();\n+ source.put(\"message\", message);\n+ document = new IngestDocument(\"index\", \"id\", 1L, null, null, source);\n+ CefProcessor processor = new CefProcessor(\"tag\", \"description\", \"message\", \"cef\", false, true, null);\n+ processor.execute(document);\n+ assertMapsEqual(\n+ document.getSource(),\n+ Map.ofEntries(\n+ entry(\n+ \"cef\",\n+ Map.ofEntries(\n+ entry(\"version\", \"0\"),\n+ entry(\n+ \"device\",\n+ Map.of(\"vendor\", \"security\", \"product\", \"threatmanager\", \"version\", \"1.0\", \"event_class_id\", \"100\")\n+ ),\n+ entry(\"name\", \"trojan successfully stopped\"),\n+ entry(\"severity\", \"10\"),\n+ entry(\"extensions\", Map.of(\"x\", \"c\\\\d=z\"))\n+ )\n+ ),\n+ entry(\"event\", Map.of(\"code\", \"100\")),\n+ entry(\"observer\", Map.of(\"product\", \"threatmanager\", \"vendor\", \"security\", \"version\", \"1.0\")),\n+ entry(\"message\", \"a+b=c\")\n+ )\n+ );\n+ }\n+\n+ public void testMalformedExtensionEscape() {\n+ String message = \"CEF:0|FooBar|Web Gateway|1.2.3.45.67|200|Success|2|rt=Sep 07 2018 14:50:39 cat=Access Log dst=1.1.1.1 \"\n+ + \"dhost=foo.example.com suser=redacted src=2.2.2.2 requestMethod=POST request='https://foo.example.com/bar/bingo/1' \"\n+ + \"requestClientApplication='Foo-Bar/2018.1.7; =Email:user@example.com; Guid:test=' cs1= cs1Label=Foo Bar\";\n+ Map source = new HashMap<>();\n+ source.put(\"message\", message);\n+ document = new IngestDocument(\"index\", \"id\", 1L, null, null, source);\n+ CefProcessor processor = new CefProcessor(\"tag\", \"description\", \"message\", \"cef\", false, true, null);\n+ processor.execute(document);\n+ assertMapsEqual(\n+ document.getSource(),\n+ Map.ofEntries(\n+ entry(\n+ \"cef\",\n+ Map.ofEntries(\n+ entry(\"version\", \"0\"),\n+ entry(\n+ \"device\",\n+ Map.of(\"vendor\", \"FooBar\", \"product\", \"Web Gateway\", \"version\", \"1.2.3.45.67\", \"event_class_id\", \"200\")\n+ ),\n+ entry(\"name\", \"Success\"),\n+ entry(\"severity\", \"2\"),\n+ entry(\"extensions\", Map.of(\"deviceCustomString1Label\", \"Foo Bar\", \"deviceEventCategory\", \"Access Log\"))\n+ )\n+ ),\n+ entry(\"event\", Map.of(\"code\", \"200\")),\n+ entry(\"observer\", Map.of(\"product\", \"Web Gateway\", \"vendor\", \"FooBar\", \"version\", \"1.2.3.45.67\")),\n+ entry(\"@timestamp\", ZonedDateTime.parse(\"2018-09-07T14:50:39Z\")),\n+ entry(\"destination\", Map.of(\"ip\", \"1.1.1.1\", \"domain\", \"foo.example.com\")),\n+ entry(\"source\", Map.of(\"ip\", \"2.2.2.2\", \"user\", Map.of(\"name\", \"redacted\"))),\n+ entry(\"http\", Map.of(\"request\", Map.of(\"method\", \"POST\"))),\n+ entry(\"url\", Map.of(\"original\", \"'https://foo.example.com/bar/bingo/1'\")),\n+ entry(\"user_agent\", Map.of(\"original\", \"'Foo-Bar/2018.1.7; =Email:user@example.com; Guid:test='\")),\n+ entry(\"message\", message)\n+ )\n+ );\n+ }\n+\n+ public void testMultipleMalformedExtensionValues() {\n+ String message = \"CEF:0|vendor|product|version|event_id|name|Very-High| \"\n+ + \"msg=Hello World error=Failed because id==old_id user=root angle=106.7<=180\";\n+ Map source = new HashMap<>();\n+ source.put(\"message\", message);\n+ document = new IngestDocument(\"index\", \"id\", 1L, null, null, source);\n+ CefProcessor processor = new CefProcessor(\"tag\", \"description\", \"message\", \"cef\", false, true, null);\n+ processor.execute(document);\n+ assertMapsEqual(\n+ document.getSource(),\n+ Map.ofEntries(\n+ entry(\n+ \"cef\",\n+ Map.ofEntries(\n+ entry(\"version\", \"0\"),\n+ entry(\n+ \"device\",\n+ Map.of(\"vendor\", \"vendor\", \"product\", \"product\", \"version\", \"version\", \"event_class_id\", \"event_id\")\n+ ),\n+ entry(\"name\", \"name\"),\n+ entry(\"severity\", \"Very-High\"),\n+ entry(\"extensions\", Map.of(\"id\", \"=old_id\", \"user\", \"root\", \"angle\", \"106.7<=180\", \"error\", \"Failed because\"))\n+ )\n+ ),\n+ entry(\"event\", Map.of(\"code\", \"event_id\")),\n+ entry(\"observer\", Map.of(\"product\", \"product\", \"vendor\", \"vendor\", \"version\", \"version\")),\n+ entry(\"message\", \"Hello World\")\n+ )\n+ );\n+ }\n+\n+ public void testPaddedMessage() {\n+ String message = \"CEF:0|security|threatmanager|1.0|100|message is padded|10|spt=1232 \"\n+ + \"msg=Trailing space in non-final extensions is preserved src=10.0.0.192 \";\n+ Map source = new HashMap<>();\n+ source.put(\"message\", message);\n+ document = new IngestDocument(\"index\", \"id\", 1L, null, null, source);\n+ CefProcessor processor = new CefProcessor(\"tag\", \"description\", \"message\", \"cef\", false, true, null);\n+ processor.execute(document);\n+ assertMapsEqual(\n+ document.getSource(),\n+ Map.ofEntries(\n+ entry(\n+ \"cef\",\n+ Map.ofEntries(\n+ entry(\"version\", \"0\"),\n+ entry(\n+ \"device\",\n+ Map.of(\"vendor\", \"security\", \"product\", \"threatmanager\", \"version\", \"1.0\", \"event_class_id\", \"100\")\n+ ),\n+ entry(\"name\", \"message is padded\"),\n+ entry(\"severity\", \"10\")\n+ )\n+ ),\n+ entry(\"event\", Map.of(\"code\", \"100\")),\n+ entry(\"observer\", Map.of(\"product\", \"threatmanager\", \"vendor\", \"security\", \"version\", \"1.0\")),\n+ entry(\"source\", Map.of(\"ip\", \"10.0.0.192\", \"port\", 1232)),\n+ entry(\"message\", \"Trailing space in non-final extensions is preserved\")\n+ )\n+ );\n+ }\n+\n+ public void testCrlfMessage() {\n+ String message = \"CEF:0|security|threatmanager|1.0|100|message is padded|10|\"\n+ + \"spt=1232 msg=Trailing space in final extensions is not preserved\\t \\r\\ndpt=1234\";\n+ Map source = new HashMap<>();\n+ source.put(\"message\", message);\n+ document = new IngestDocument(\"index\", \"id\", 1L, null, null, source);\n+ CefProcessor processor = new CefProcessor(\"tag\", \"description\", \"message\", \"cef\", false, true, null);\n+ processor.execute(document);\n+ assertMapsEqual(\n+ document.getSource(),\n+ Map.ofEntries(\n+ entry(\n+ \"cef\",\n+ Map.ofEntries(\n+ entry(\"version\", \"0\"),\n+ entry(\n+ \"device\",\n+ Map.of(\"vendor\", \"security\", \"product\", \"threatmanager\", \"version\", \"1.0\", \"event_class_id\", \"100\")\n+ ),\n+ entry(\"name\", \"message is padded\"),\n+ entry(\"severity\", \"10\")\n+ )\n+ ),\n+ entry(\"event\", Map.of(\"code\", \"100\")),\n+ entry(\"observer\", Map.of(\"product\", \"threatmanager\", \"vendor\", \"security\", \"version\", \"1.0\")),\n+ entry(\"source\", Map.of(\"port\", 1232)),\n+ entry(\"message\", \"Trailing space in final extensions is not preserved\"),\n+ entry(\"destination\", Map.of(\"port\", 1234))\n+ )\n+ );\n+ }\n+\n+ public void testTabMessage() {\n+ String message = \"CEF:0|security|threatmanager|1.0|100|message is padded|10|\"\n+ + \"spt=1232 msg=Tabs\\tand\\rcontrol\\ncharacters are preserved\\t src=127.0.0.1\";\n+ Map source = new HashMap<>();\n+ source.put(\"message\", message);\n+ document = new IngestDocument(\"index\", \"id\", 1L, null, null, source);\n+ CefProcessor processor = new CefProcessor(\"tag\", \"description\", \"message\", \"cef\", false, true, null);\n+ processor.execute(document);\n+ assertMapsEqual(\n+ document.getSource(),\n+ Map.ofEntries(\n+ entry(\n+ \"cef\",\n+ Map.ofEntries(\n+ entry(\"version\", \"0\"),\n+ entry(\n+ \"device\",\n+ Map.of(\"vendor\", \"security\", \"product\", \"threatmanager\", \"version\", \"1.0\", \"event_class_id\", \"100\")\n+ ),\n+ entry(\"name\", \"message is padded\"),\n+ entry(\"severity\", \"10\")\n+ )\n+ ),\n+ entry(\"event\", Map.of(\"code\", \"100\")),\n+ entry(\"observer\", Map.of(\"product\", \"threatmanager\", \"vendor\", \"security\", \"version\", \"1.0\")),\n+ entry(\"source\", Map.of(\"port\", 1232, \"ip\", \"127.0.0.1\")),\n+ entry(\"message\", \"Tabs\\tand\\rcontrol\\ncharacters are preserved\")\n+ )\n+ );\n+ }\n+\n+ public void testTabNoSepMessage() {\n+ String message = \"CEF:0|security|threatmanager|1.0|100|message has tabs|10|spt=1232 msg=Tab is not a separator\\tsrc=127.0.0.1\";\n+ Map source = new HashMap<>();\n+ source.put(\"message\", message);\n+ document = new IngestDocument(\"index\", \"id\", 1L, null, null, source);\n+ CefProcessor processor = new CefProcessor(\"tag\", \"description\", \"message\", \"cef\", false, true, null);\n+ processor.execute(document);\n+ assertMapsEqual(\n+ document.getSource(),\n+ Map.ofEntries(\n+ entry(\n+ \"cef\",\n+ Map.ofEntries(\n+ entry(\"version\", \"0\"),\n+ entry(\n+ \"device\",\n+ Map.of(\"vendor\", \"security\", \"product\", \"threatmanager\", \"version\", \"1.0\", \"event_class_id\", \"100\")\n+ ),\n+ entry(\"name\", \"message has tabs\"),\n+ entry(\"severity\", \"10\")\n+ )\n+ ),\n+ entry(\"event\", Map.of(\"code\", \"100\")),\n+ entry(\"observer\", Map.of(\"product\", \"threatmanager\", \"vendor\", \"security\", \"version\", \"1.0\")),\n+ entry(\"source\", Map.of(\"port\", 1232, \"ip\", \"127.0.0.1\")),\n+ entry(\"message\", \"Tab is not a separator\")\n+ )\n+ );\n+ }\n+\n+ public void testEscapedMessage() {\n+ String message = \"CEF:0|security\\\\compliance|threat\\\\|->manager|1.0|100|message contains escapes|10|\"\n+ + \"spt=1232 msg=Newlines in messages\\\\\\nare allowed.\\\\\\r\\\\\\nAnd so are carriage feeds\\\\\\\\newlines\\\\\\\\\\\\=. dpt=4432\";\n+ Map source = new HashMap<>();\n+ source.put(\"message\", message);\n+ document = new IngestDocument(\"index\", \"id\", 1L, null, null, source);\n+ CefProcessor processor = new CefProcessor(\"tag\", \"description\", \"message\", \"cef\", false, true, null);\n+ processor.execute(document);\n+ assertMapsEqual(\n+ document.getSource(),\n+ Map.ofEntries(\n+ entry(\n+ \"cef\",\n+ Map.ofEntries(\n+ entry(\"version\", \"0\"),\n+ entry(\n+ \"device\",\n+ Map.ofEntries(\n+ entry(\"vendor\", \"security\\\\compliance\"),\n+ entry(\"product\", \"threat|->manager\"),\n+ entry(\"version\", \"1.0\"),\n+ entry(\"event_class_id\", \"100\")\n+ )\n+ ),\n+ entry(\"name\", \"message contains escapes\"),\n+ entry(\"severity\", \"10\")\n+ )\n+ ),\n+ entry(\"event\", Map.of(\"code\", \"100\")),\n+ entry(\"observer\", Map.of(\"product\", \"threat|->manager\", \"vendor\", \"security\\\\compliance\", \"version\", \"1.0\")),\n+ entry(\"source\", Map.of(\"port\", 1232)),\n+ entry(\"message\", \"Newlines in messages\\nare allowed.\\r\\nAnd so are carriage feeds\\\\newlines\\\\=.\"),\n+ entry(\"destination\", Map.of(\"port\", 4432))\n+ )\n+ );\n+ }\n+\n+ public void testTruncatedHeader() {\n+ String message = \"CEF:0|SentinelOne|Mgmt|activityID=1111111111111111111 activityType=3505 \"\n+ + \"siteId=None siteName=None accountId=1222222222222222222 accountName=foo-bar mdr notificationScope=ACCOUNT\";\n+ Map source = new HashMap<>();\n+ source.put(\"message\", message);\n+ document = new IngestDocument(\"index\", \"id\", 1L, null, null, source);\n+ CefProcessor processor = new CefProcessor(\"tag\", \"description\", \"message\", \"cef\", false, true, null);\n+ processor.execute(document);\n+ assertMapsEqual(\n+ document.getSource(),\n+ Map.ofEntries(\n+ entry(\n+ \"cef\",\n+ Map.ofEntries(\n+ entry(\"version\", \"0\"),\n+ entry(\"device\", Map.of(\"vendor\", \"SentinelOne\", \"product\", \"Mgmt\")),\n+ entry(\n+ \"extensions\",\n+ Map.ofEntries(\n+ entry(\"activityID\", \"1111111111111111111\"),\n+ entry(\"activityType\", \"3505\"),\n+ entry(\"siteId\", \"None\"),\n+ entry(\"siteName\", \"None\"),\n+ entry(\"accountId\", \"1222222222222222222\"),\n+ entry(\"accountName\", \"foo-bar mdr\"),\n+ entry(\"notificationScope\", \"ACCOUNT\")\n+ )\n+ )\n+ )\n+ ),\n+ entry(\"observer\", Map.of(\"product\", \"Mgmt\", \"vendor\", \"SentinelOne\")),\n+ entry(\"message\", message),\n+ entry(\"error\", Map.of(\"message\", Set.of(\"incomplete CEF header\")))\n+ )\n+ );\n+ }\n+\n+ public void testIgnoreEmptyValuesInExtension() {\n+ String message = \"CEF:26|security|threat=manager|1.0|100|trojan successfully stopped|10|src= dst=12.121.122.82 spt=\";\n+ Map source = new HashMap<>();\n+ source.put(\"message\", message);\n+ document = new IngestDocument(\"index\", \"id\", 1L, null, null, source);\n+ CefProcessor processor = new CefProcessor(\"tag\", \"description\", \"message\", \"cef\", false, true, null);\n+ processor.execute(document);\n+ assertMapsEqual(\n+ document.getSource(),\n+ Map.ofEntries(\n+ entry(\n+ \"cef\",\n+ Map.ofEntries(\n+ entry(\"version\", \"26\"),\n+ entry(\n+ \"device\",\n+ Map.of(\"vendor\", \"security\", \"product\", \"threat=manager\", \"version\", \"1.0\", \"event_class_id\", \"100\")\n+ ),\n+ entry(\"name\", \"trojan successfully stopped\"),\n+ entry(\"severity\", \"10\")\n+ )\n+ ),\n+ entry(\"event\", Map.of(\"code\", \"100\")),\n+ entry(\"observer\", Map.of(\"product\", \"threat=manager\", \"vendor\", \"security\", \"version\", \"1.0\")),\n+ entry(\"destination\", Map.of(\"ip\", \"12.121.122.82\")),\n+ entry(\"message\", message)\n+ )\n+ );\n+ }\n+\n+ public void testHyphenInExtensionKey() {\n+ String message = \"CEF:26|security|threatmanager|1.0|100|trojan successfully stopped|10|Some-Key=123456\";\n+ Map source = new HashMap<>();\n+ source.put(\"message\", message);\n+ document = new IngestDocument(\"index\", \"id\", 1L, null, null, source);\n+ CefProcessor processor = new CefProcessor(\"tag\", \"description\", \"message\", \"cef\", false, true, null);\n+ processor.execute(document);\n+ assertMapsEqual(\n+ document.getSource(),\n+ Map.ofEntries(\n+ entry(\n+ \"cef\",\n+ Map.ofEntries(\n+ entry(\"version\", \"26\"),\n+ entry(\n+ \"device\",\n+ Map.of(\"vendor\", \"security\", \"product\", \"threatmanager\", \"version\", \"1.0\", \"event_class_id\", \"100\")\n+ ),\n+ entry(\"name\", \"trojan successfully stopped\"),\n+ entry(\"severity\", \"10\"),\n+ entry(\"extensions\", Map.of(\"Some-Key\", \"123456\"))\n+ )\n+ ),\n+ entry(\"event\", Map.of(\"code\", \"100\")),\n+ entry(\"observer\", Map.of(\"product\", \"threatmanager\", \"vendor\", \"security\", \"version\", \"1.0\")),\n+ entry(\"message\", message)\n+ )\n+ );\n+ }\n+\n+ public void testAllFieldsInExtension() {\n+ String message = \"CEF:0|security|threatmanager|1.0|100|trojan successfully stopped|10|\"\n+ + \"agt=192.168.0.1 agentDnsDomain=example.com ahost=agentHost aid=agentId amac=00:0a:95:9d:68:16 agentNtDomain=example.org \"\n+ + \"art=1622547800000 atz=UTC agentTranslatedAddress=10.0.0.1 agentTranslatedZoneExternalID=ext123 agentTranslatedZoneURI=uri \"\n+ + \"at=agentType av=1.0 agentZoneExternalID=zoneExtId agentZoneURI=zoneUri app=HTTP cnt=1234 in=5678 out=91011 \"\n+ + \"customerExternalID=custExtId customerURI=custUri dst=192.168.0.2 dlat=37.7749 dlong=-122.4194 \"\n+ + \"dhost=destHost dmac=00:0a:95:9d:68:16 dntdom=destNtDomain dpt=80 dpid=1234 \"\n+ + \"dproc=destProc destinationServiceName=destService \"\n+ + \"destinationTranslatedAddress=10.0.0.2 destinationTranslatedPort=8080 destinationTranslatedZoneExternalID=destExtId \"\n+ + \"destinationTranslatedZoneURI=destUri duid=destUserId duser=destUser dpriv=admin destinationZoneExternalID=destZoneExtId \"\n+ + \"destinationZoneURI=destZoneUri act=blocked dvc=192.168.0.3 cfp1Label=cfp1Label cfp3Label=cfp3Label cfp4Label=cfp4Label \"\n+ + \"deviceCustomDate1=1622547800000 deviceCustomDate1Label=customDate1Label deviceCustomDate2=1622547900000 \"\n+ + \"deviceCustomDate2Label=customDate2Label cfp1=1.23 cfp2=2.34 cfp2Label=cfp2Label cfp3=3.45 cfp4=4.56 c6a1=2001:db8::1 \"\n+ + \"c6a1Label=c6a1Label c6a2=2001:db8::2 c6a2Label=c6a2Label c6a3=2001:db8::3 c6a3Label=c6a3Label c6a4=2001:db8::4 \"\n+ + \"C6a4Label=c6a4Label cn1=123 cn1Label=cn1Label cn2=234 cn2Label=cn2Label cn3=345 cn3Label=cn3Label cs1=customString1 \"\n+ + \"cs1Label=cs1Label cs2=customString2 cs2Label=cs2Label cs3=customString3 cs3Label=cs3Label \"\n+ + \"cs4=customString4 cs4Label=cs4Label \"\n+ + \"cs5=customString5 cs5Label=cs5Label cs6=customString6 cs6Label=cs6Label deviceDirection=inbound deviceDnsDomain=example.com \"\n+ + \"cat=category deviceExternalId=extId deviceFacility=16 dvchost=host1 deviceInboundInterface=eth0 dvcmac=00:0a:95:9d:68:16 \"\n+ + \"deviceNtDomain=example.org deviceOutboundInterface=eth1 devicePayloadId=payloadId dvcpid=5678 deviceProcessName=procName \"\n+ + \"rt=1622547800000 dtz=UTC deviceTranslatedAddress=10.0.0.3 deviceTranslatedZoneExternalID=transExtId \"\n+ + \"deviceTranslatedZoneURI=transUri deviceZoneExternalID=zoneExtId deviceZoneURI=zoneUri end=1622547900000 eventId=evt123 \"\n+ + \"outcome=success externalId=extId fileCreateTime=1622547800000 fileHash=abcd1234 fileId=5678 \"\n+ + \"fileModificationTime=1622547900000 \"\n+ + \"fname=file.txt filePath=/path/to/file filePermission=rw-r--r-- fsize=1024 fileType=txt flexDate1=1622547800000 \"\n+ + \"flexDate1Label=flexDate1Label flexString1=flexString1 flexString2=flexString2 flexString1Label=flexString1Label \"\n+ + \"flexString2Label=flexString2Label msg=message oldFileCreateTime=1622547800000 oldFileHash=oldHash oldFileId=oldId \"\n+ + \"oldFileModificationTime=1622547900000 oldFileName=oldFile oldFilePath=/old/path \"\n+ + \"oldFilePermission=rw-r--r-- oldFileSize=2048 \"\n+ + \"oldFileType=oldType rawEvent=rawEvent reason=reason requestClientApplication=Mozilla requestContext=referrer \"\n+ + \"requestCookies=cookies requestMethod=GET request=url src=192.168.0.4 sourceDnsDomain=sourceDomain \"\n+ + \"slat=37.7749 slong=-122.4194 \"\n+ + \"shost=sourceHost smac=00:0a:95:9d:68:16 sntdom=sourceNtDomain spt=443 spid=1234 \"\n+ + \"sproc=sourceProc sourceServiceName=sourceService \"\n+ + \"sourceTranslatedAddress=10.0.0.4 sourceTranslatedPort=8081 sourceTranslatedZoneExternalID=sourceExtId \"\n+ + \"sourceTranslatedZoneURI=sourceUri suid=sourceUserId suser=sourceUser spriv=sourcePriv sourceZoneExternalID=sourceZoneExtId \"\n+ + \"sourceZoneURI=sourceZoneUri start=1622547800000 proto=TCP type=1 catdt=catDeviceType mrt=1622547800000\";\n+ Map source = new HashMap<>();\n+ source.put(\"message\", message);\n+ document = new IngestDocument(\"index\", \"id\", 1L, null, null, source);\n+ CefProcessor processor = new CefProcessor(\"tag\", \"description\", \"message\", \"cef\", false, true, null);\n+ processor.execute(document);\n+ assertMapsEqual(\n+ document.getSource(),\n+ Map.ofEntries(\n+ entry(\n+ \"cef\",\n+ Map.ofEntries(\n+ entry(\"version\", \"0\"),\n+ entry(\n+ \"device\",\n+ Map.of(\"vendor\", \"security\", \"product\", \"threatmanager\", \"version\", \"1.0\", \"event_class_id\", \"100\")\n+ ),\n+ entry(\"name\", \"trojan successfully stopped\"),\n+ entry(\"severity\", \"10\"),\n+ entry(\n+ \"extensions\",\n+ Map.ofEntries(\n+ entry(\"deviceNtDomain\", \"example.org\"),\n+ entry(\"agentZoneExternalID\", \"zoneExtId\"),\n+ entry(\"agentTimeZone\", \"UTC\"),\n+ entry(\"deviceCustomIPv6Address1Label\", \"c6a1Label\"),\n+ entry(\"deviceCustomString1\", \"customString1\"),\n+ entry(\"deviceCustomIPv6Address2Label\", \"c6a2Label\"),\n+ entry(\"deviceCustomNumber3\", 345L),\n+ entry(\"deviceCustomFloatingPoint1\", 1.23f),\n+ entry(\"deviceCustomNumber2\", 234L),\n+ entry(\"deviceCustomFloatingPoint2\", 2.34f),\n+ entry(\"deviceCustomFloatingPoint3\", 3.45f),\n+ entry(\"deviceCustomFloatingPoint4\", 4.56f),\n+ entry(\"flexDate1\", ZonedDateTime.parse(\"2021-06-01T11:43:20Z\")),\n+ entry(\"destinationTranslatedZoneExternalID\", \"destExtId\"),\n+ entry(\"deviceCustomNumber1\", 123L),\n+ entry(\"deviceEventCategory\", \"category\"),\n+ entry(\"deviceCustomString6Label\", \"cs6Label\"),\n+ entry(\"deviceCustomNumber2Label\", \"cn2Label\"),\n+ entry(\"flexString1Label\", \"flexString1Label\"),\n+ entry(\"deviceCustomString5Label\", \"cs5Label\"),\n+ entry(\"agentZoneURI\", \"zoneUri\"),\n+ entry(\"deviceCustomString2Label\", \"cs2Label\"),\n+ entry(\"deviceCustomDate2Label\", \"customDate2Label\"),\n+ entry(\"deviceCustomNumber1Label\", \"cn1Label\"),\n+ entry(\"oldFileType\", \"oldType\"),\n+ entry(\"destinationZoneExternalID\", \"destZoneExtId\"),\n+ entry(\"categoryDeviceType\", \"catDeviceType\"),\n+ entry(\"deviceZoneURI\", \"zoneUri\"),\n+ entry(\"sourceTranslatedZoneExternalID\", \"sourceExtId\"),\n+ entry(\"agentTranslatedAddress\", \"10.0.0.1\"),\n+ entry(\"requestCookies\", \"cookies\"),\n+ entry(\"deviceCustomIPv6Address3\", \"2001:db8::3\"),\n+ entry(\"oldFilePath\", \"/old/path\"),\n+ entry(\"deviceCustomIPv6Address2\", \"2001:db8::2\"),\n+ entry(\"deviceCustomIPv6Address1\", \"2001:db8::1\"),\n+ entry(\"oldFileId\", \"oldId\"),\n+ entry(\"deviceTranslatedZoneExternalID\", \"transExtId\"),\n+ entry(\"deviceCustomFloatingPoint2Label\", \"cfp2Label\"),\n+ entry(\"deviceTranslatedZoneURI\", \"transUri\"),\n+ entry(\"deviceCustomIPv6Address4Label\", \"c6a4Label\"),\n+ entry(\"agentTranslatedZoneURI\", \"uri\"),\n+ entry(\"oldFilePermission\", \"rw-r--r--\"),\n+ entry(\"deviceCustomIPv6Address4\", \"2001:db8::4\"),\n+ entry(\"sourceZoneURI\", \"sourceZoneUri\"),\n+ entry(\"deviceCustomFloatingPoint3Label\", \"cfp3Label\"),\n+ entry(\"agentTranslatedZoneExternalID\", \"ext123\"),\n+ entry(\"destinationZoneURI\", \"destZoneUri\"),\n+ entry(\"flexDate1Label\", \"flexDate1Label\"),\n+ entry(\"agentNtDomain\", \"example.org\"),\n+ entry(\"deviceCustomDate2\", ZonedDateTime.parse(\"2021-06-01T11:45Z\")),\n+ entry(\"deviceCustomDate1\", ZonedDateTime.parse(\"2021-06-01T11:43:20Z\")),\n+ entry(\"deviceCustomString3Label\", \"cs3Label\"),\n+ entry(\"deviceCustomDate1Label\", \"customDate1Label\"),\n+ entry(\"destinationTranslatedZoneURI\", \"destUri\"),\n+ entry(\"oldFileModificationTime\", ZonedDateTime.parse(\"2021-06-01T11:45Z\")),\n+ entry(\"deviceCustomFloatingPoint1Label\", \"cfp1Label\"),\n+ entry(\"deviceCustomIPv6Address3Label\", \"c6a3Label\"),\n+ entry(\"deviceCustomFloatingPoint4Label\", \"cfp4Label\"),\n+ entry(\"oldFileSize\", 2048),\n+ entry(\"externalId\", \"extId\"),\n+ entry(\"baseEventCount\", 1234),\n+ entry(\"flexString2\", \"flexString2\"),\n+ entry(\"deviceCustomNumber3Label\", \"cn3Label\"),\n+ entry(\"flexString1\", \"flexString1\"),\n+ entry(\"deviceCustomString4Label\", \"cs4Label\"),\n+ entry(\"flexString2Label\", \"flexString2Label\"),\n+ entry(\"deviceCustomString3\", \"customString3\"),\n+ entry(\"deviceCustomString2\", \"customString2\"),\n+ entry(\"deviceCustomString1Label\", \"cs1Label\"),\n+ entry(\"deviceCustomString5\", \"customString5\"),\n+ entry(\"deviceCustomString4\", \"customString4\"),\n+ entry(\"deviceZoneExternalID\", \"zoneExtId\"),\n+ entry(\"deviceCustomString6\", \"customString6\"),\n+ entry(\"oldFileName\", \"oldFile\"),\n+ entry(\"sourceZoneExternalID\", \"sourceZoneExtId\"),\n+ entry(\"oldFileHash\", \"oldHash\"),\n+ entry(\"sourceTranslatedZoneURI\", \"sourceUri\"),\n+ entry(\"oldFileCreateTime\", ZonedDateTime.parse(\"2021-06-01T11:43:20Z\"))\n+ )\n+ )\n+ )\n+ ),\n+ entry(\"host\", Map.of(\"nat\", Map.of(\"ip\", \"10.0.0.3\"))),\n+ entry(\"log\", Map.of(\"syslog\", Map.of(\"facility\", Map.of(\"code\", 16L)))),\n+ entry(\n+ \"observer\",\n+ Map.ofEntries(\n+ entry(\"ingress\", Map.of(\"interface\", Map.of(\"name\", \"eth0\"))),\n+ entry(\"registered_domain\", \"example.com\"),\n+ entry(\"product\", \"threatmanager\"),\n+ entry(\"hostname\", \"host1\"),\n+ entry(\"vendor\", \"security\"),\n+ entry(\"ip\", \"192.168.0.3\"),\n+ entry(\"name\", \"extId\"),\n+ entry(\"version\", \"1.0\"),\n+ entry(\"mac\", \"00:0a:95:9d:68:16\"),\n+ entry(\"egress\", Map.of(\"interface\", Map.of(\"name\", \"eth1\")))\n+ )\n+ ),\n+ entry(\n+ \"agent\",\n+ Map.ofEntries(\n+ entry(\"ip\", \"192.168.0.1\"),\n+ entry(\"name\", \"example.com\"),\n+ entry(\"id\", \"agentId\"),\n+ entry(\"type\", \"agentType\"),\n+ entry(\"version\", \"1.0\"),\n+ entry(\"mac\", \"00:0a:95:9d:68:16\")\n+ )\n+ ),\n+ entry(\"process\", Map.of(\"name\", \"procName\", \"pid\", 5678L)),\n+ entry(\n+ \"destination\",\n+ Map.ofEntries(\n+ entry(\"nat\", Map.of(\"port\", 8080, \"ip\", \"10.0.0.2\")),\n+ entry(\"geo\", Map.of(\"location\", Map.of(\"lon\", -122.4194, \"lat\", 37.7749))),\n+ entry(\"registered_domain\", \"destNtDomain\"),\n+ entry(\"process\", Map.of(\"name\", \"destProc\", \"pid\", 1234L)),\n+ entry(\"port\", 80),\n+ entry(\"bytes\", 91011L),\n+ entry(\"service\", Map.of(\"name\", \"destService\")),\n+ entry(\"domain\", \"destHost\"),\n+ entry(\"ip\", \"192.168.0.2\"),\n+ entry(\"user\", Map.of(\"name\", \"destUser\", \"id\", \"destUserId\", \"group\", Map.of(\"name\", \"admin\"))),\n+ entry(\"mac\", \"00:0a:95:9d:68:16\")\n+ )\n+ ),\n+ entry(\n+ \"source\",\n+ Map.ofEntries(\n+ entry(\"geo\", Map.of(\"location\", Map.of(\"lon\", -122.4194, \"lat\", 37.7749))),\n+ entry(\"nat\", Map.of(\"port\", 8081, \"ip\", \"10.0.0.4\")),\n+ entry(\"registered_domain\", \"sourceNtDomain\"),\n+ entry(\"process\", Map.of(\"name\", \"sourceProc\", \"pid\", 1234L)),\n+ entry(\"port\", 443),\n+ entry(\"service\", Map.of(\"name\", \"sourceService\")),\n+ entry(\"bytes\", 5678L),\n+ entry(\"ip\", \"192.168.0.4\"),\n+ entry(\"domain\", \"sourceDomain\"),\n+ entry(\"user\", Map.of(\"name\", \"sourceUser\", \"id\", \"sourceUserId\", \"group\", Map.of(\"name\", \"sourcePriv\"))),\n+ entry(\"mac\", \"00:0a:95:9d:68:16\")\n+ )\n+ ),\n+ entry(\"message\", \"message\"),\n+ entry(\"url\", Map.of(\"original\", \"url\")),\n+ entry(\"network\", Map.of(\"protocol\", \"HTTP\", \"transport\", \"TCP\", \"direction\", \"inbound\")),\n+ entry(\n+ \"file\",\n+ Map.ofEntries(\n+ entry(\"inode\", \"5678\"),\n+ entry(\"path\", \"/path/to/file\"),\n+ entry(\"size\", 1024L),\n+ entry(\"created\", ZonedDateTime.parse(\"2021-06-01T11:43:20Z\")),\n+ entry(\"name\", \"file.txt\"),\n+ entry(\"mtime\", ZonedDateTime.parse(\"2021-06-01T11:45Z\")),\n+ entry(\"type\", \"txt\"),\n+ entry(\"hash\", \"abcd1234\"),\n+ entry(\"group\", \"rw-r--r--\")\n+ )\n+ ),\n+ entry(\"@timestamp\", ZonedDateTime.parse(\"2021-06-01T11:43:20Z\")),\n+ entry(\"organization\", Map.of(\"name\", \"custUri\", \"id\", \"custExtId\")),\n+ entry(\n+ \"event\",\n+ Map.ofEntries(\n+ entry(\"action\", \"blocked\"),\n+ entry(\"timezone\", \"UTC\"),\n+ entry(\"end\", ZonedDateTime.parse(\"2021-06-01T11:45Z\")),\n+ entry(\"id\", \"evt123\"),\n+ entry(\"outcome\", \"success\"),\n+ entry(\"start\", ZonedDateTime.parse(\"2021-06-01T11:43:20Z\")),\n+ entry(\"reason\", \"reason\"),\n+ entry(\"ingested\", ZonedDateTime.parse(\"2021-06-01T11:43:20Z\")),\n+ entry(\"kind\", 1),\n+ entry(\"original\", \"rawEvent\"),\n+ entry(\"created\", ZonedDateTime.parse(\"2021-06-01T11:43:20Z\")),\n+ entry(\"code\", \"100\")\n+ )\n+ ),\n+ entry(\"user_agent\", Map.of(\"original\", \"Mozilla\")),\n+ entry(\"http\", Map.of(\"request\", Map.of(\"referrer\", \"referrer\", \"method\", \"GET\")))\n+ )\n+ );\n+ }\n+\n+ // Date parsing tests\n+ public void testToTimestampWithUnixTimestamp() {\n+ CefParser parser = new CefParser(ZoneId.of(\"UTC\"), false);\n+ String unixTimestamp = \"1633072800000\"; // Example Unix timestamp in milliseconds\n+ ZonedDateTime expected = ZonedDateTime.ofInstant(Instant.ofEpochMilli(Long.parseLong(unixTimestamp)), ZoneId.of(\"UTC\"));\n+ ZonedDateTime result = parser.toTimestamp(unixTimestamp);\n+ assertEquals(expected, result);\n+ }\n+\n+ public void testToTimestampWithFormattedDate() {\n+ CefParser parser = new CefParser(ZoneId.of(\"Europe/Stockholm\"), false);\n+ String formattedDate = \"Oct 01 2021 12:00:00 UTC\"; // Example formatted date\n+ ZonedDateTime expected = ZonedDateTime.parse(\"2021-10-01T14:00+02:00[Europe/Stockholm]\");\n+ ZonedDateTime result = parser.toTimestamp(formattedDate);\n+ assertEquals(expected, result);\n+ }\n+\n+ public void testToTimestampWithFormattedDateWithoutYear() {\n+ CefParser parser = new CefParser(ZoneId.of(\"UTC\"), false);\n+ String formattedDate = \"Oct 01 12:00:00 UTC\"; // Example formatted date without year\n+ int currentYear = ZonedDateTime.now(ZoneId.of(\"UTC\")).getYear();\n+ ZonedDateTime expected = ZonedDateTime.parse(currentYear + \"-10-01T12:00:00Z[UTC]\");\n+ ZonedDateTime result = parser.toTimestamp(formattedDate);\n+ assertEquals(expected, result);\n+ }\n+\n+ public void testToTimestampWithFormattedDateWithoutTimezone() {\n+ CefParser parser = new CefParser(ZoneId.of(\"UTC\"), false);\n+ String formattedDate = \"Sep 07 2018 14:50:39\"; // Example formatted date without year\n+ ZonedDateTime expected = ZonedDateTime.parse(\"2018-09-07T14:50:39Z[UTC]\");\n+ ZonedDateTime result = parser.toTimestamp(formattedDate);\n+ assertEquals(expected, result);\n+ }\n+\n+ public void testToTimestampWithInvalidDate() {\n+ CefParser parser = new CefParser(ZoneId.of(\"UTC\"), false);\n+ String invalidDate = \"invalid date\";\n+ assertThrows(IllegalArgumentException.class, () -> parser.toTimestamp(invalidDate));\n+ }\n+\n+ public void testToMacAddressWithSeparators() {\n+ CefParser parser = new CefParser(ZoneId.of(\"UTC\"), false);\n+ List macAddresses = List.of(\n+ // EUI-48 (with separators).\n+ \"00:0D:60:AF:1B:61\",\n+ \"00-0D-60-AF-1B-61\",\n+ \"000D.60AF.1B61\",\n+\n+ // EUI-64 (with separators).\n+ \"00:0D:60:FF:FE:AF:1B:61\",\n+ \"00-0D-60-FF-FE-AF-1B-61\",\n+ \"000D.60FF.FEAF.1B61\"\n+ );\n+ macAddresses.forEach(macAddress -> {\n+ String result = parser.toMACAddress(macAddress);\n+ assertEquals(macAddress, result);\n+ });\n+ }\n+\n+ public void testEUI48ToMacAddressWithOutSeparators() {\n+ CefParser parser = new CefParser(ZoneId.of(\"UTC\"), false);\n+ String macAddress = \"000D60AF1B61\";\n+ String result = parser.toMACAddress(macAddress);\n+ assertEquals(\"00:0D:60:AF:1B:61\", result);\n+ }\n+\n+ public void testEUI64ToMacAddressWithOutSeparators() {\n+ CefParser parser = new CefParser(ZoneId.of(\"UTC\"), false);\n+ String macAddress = \"000D60FFFEAF1B61\";\n+ String result = parser.toMACAddress(macAddress);\n+ assertEquals(\"00:0D:60:FF:FE:AF:1B:61\", result);\n+ }\n+\n+ public void toIP_validIPv4Address() {\n+ CefParser parser = new CefParser(ZoneId.of(\"UTC\"), true);\n+ String result = parser.toIP(\"192.168.1.1\");\n+ assertEquals(\"192.168.1.1\", result);\n+ }\n+\n+ public void toIP_validIPv6Address() {\n+ CefParser parser = new CefParser(ZoneId.of(\"UTC\"), true);\n+ String result = parser.toIP(\"2001:0db8:85a3:0000:0000:8a2e:0370:7334\");\n+ assertEquals(\"2001:db8:85a3::8a2e:370:7334\", result);\n+ }\n+\n+ public void toIP_invalidIPAddress() {\n+ CefParser parser = new CefParser(ZoneId.of(\"UTC\"), true);\n+ IllegalArgumentException exception = assertThrows(IllegalArgumentException.class, () -> parser.toIP(\"invalid_ip\"));\n+ assertEquals(\"Invalid IP address format\", exception.getMessage());\n+ }\n+\n+ public void toIP_emptyString() {\n+ CefParser parser = new CefParser(ZoneId.of(\"UTC\"), true);\n+ IllegalArgumentException exception = assertThrows(IllegalArgumentException.class, () -> parser.toIP(\"\"));\n+ assertEquals(\"Invalid IP address format\", exception.getMessage());\n+ }\n+\n+ public void toIP_nullString() {", + "comment_created_at": "2025-04-04T07:02:32+00:00", + "comment_author": "bhapas", + "comment_body": "https://github.com/elastic/elasticsearch/pull/122491/commits/2b5bfd2ce93a8a8e1d043a6fd7f073585f70945b fixes this", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1192216195", + "pr_number": 95836, + "pr_file": "build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/transportversion/BumpTransportVersionTask.java", + "created_at": "2023-05-12T10:51:18+00:00", + "commented_code": "/*\n * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one\n * or more contributor license agreements. Licensed under the Elastic License\n * 2.0 and the Server Side Public License, v 1; you may not use this file except\n * in compliance with, at your election, the Elastic License 2.0 or the Server\n * Side Public License, v 1.\n */\n\npackage org.elasticsearch.gradle.internal.transportversion;\n\nimport org.gradle.api.DefaultTask;\nimport org.gradle.api.tasks.TaskAction;\nimport org.jetbrains.annotations.NotNull;\n\nimport java.io.IOException;\nimport java.nio.charset.StandardCharsets;\nimport java.nio.file.Files;\nimport java.nio.file.Path;\nimport java.util.UUID;\nimport java.util.regex.Matcher;\nimport java.util.regex.Pattern;\nimport java.util.stream.Collectors;\nimport java.util.stream.Stream;\n\npublic class BumpTransportVersionTask extends DefaultTask {", + "repo_full_name": "elastic/elasticsearch", + "discussion_comments": [ + { + "comment_id": "1192216195", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 95836, + "pr_file": "build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/transportversion/BumpTransportVersionTask.java", + "discussion_id": "1192216195", + "commented_code": "@@ -0,0 +1,85 @@\n+/*\n+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one\n+ * or more contributor license agreements. Licensed under the Elastic License\n+ * 2.0 and the Server Side Public License, v 1; you may not use this file except\n+ * in compliance with, at your election, the Elastic License 2.0 or the Server\n+ * Side Public License, v 1.\n+ */\n+\n+package org.elasticsearch.gradle.internal.transportversion;\n+\n+import org.gradle.api.DefaultTask;\n+import org.gradle.api.tasks.TaskAction;\n+import org.jetbrains.annotations.NotNull;\n+\n+import java.io.IOException;\n+import java.nio.charset.StandardCharsets;\n+import java.nio.file.Files;\n+import java.nio.file.Path;\n+import java.util.UUID;\n+import java.util.regex.Matcher;\n+import java.util.regex.Pattern;\n+import java.util.stream.Collectors;\n+import java.util.stream.Stream;\n+\n+public class BumpTransportVersionTask extends DefaultTask {", + "comment_created_at": "2023-05-12T10:51:18+00:00", + "comment_author": "thecoop", + "comment_body": "Maybe 'NewTransportVersionTask' to match with the rest?", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2180389078", + "pr_number": 130410, + "pr_file": "x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/action/LookupJoinTypesIT.java", + "created_at": "2025-07-02T15:40:51+00:00", + "commented_code": "return existing.stream().anyMatch(c -> c.exists(indexName));\n }\n\n public void testOutputSupportedTypes() throws Exception {\n Map, DataType> signatures = new LinkedHashMap<>();\n for (TestConfigs configs : testConfigurations.values()) {\n if (configs.group.equals(\"unsupported\") || configs.group.equals(\"union-types\")) {", + "repo_full_name": "elastic/elasticsearch", + "discussion_comments": [ + { + "comment_id": "2180389078", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 130410, + "pr_file": "x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/action/LookupJoinTypesIT.java", + "discussion_id": "2180389078", + "commented_code": "@@ -265,6 +268,21 @@ private static boolean existingIndex(Collection existing, DataType\n return existing.stream().anyMatch(c -> c.exists(indexName));\n }\n \n+ public void testOutputSupportedTypes() throws Exception {\n+ Map, DataType> signatures = new LinkedHashMap<>();\n+ for (TestConfigs configs : testConfigurations.values()) {\n+ if (configs.group.equals(\"unsupported\") || configs.group.equals(\"union-types\")) {", + "comment_created_at": "2025-07-02T15:40:51+00:00", + "comment_author": "alex-spies", + "comment_body": "maybe we should use static string constants rather than typing out `unsupported` and `union-types` verbatim.", + "pr_file_module": null + }, + { + "comment_id": "2180425537", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 130410, + "pr_file": "x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/action/LookupJoinTypesIT.java", + "discussion_id": "2180389078", + "commented_code": "@@ -265,6 +268,21 @@ private static boolean existingIndex(Collection existing, DataType\n return existing.stream().anyMatch(c -> c.exists(indexName));\n }\n \n+ public void testOutputSupportedTypes() throws Exception {\n+ Map, DataType> signatures = new LinkedHashMap<>();\n+ for (TestConfigs configs : testConfigurations.values()) {\n+ if (configs.group.equals(\"unsupported\") || configs.group.equals(\"union-types\")) {", + "comment_created_at": "2025-07-02T15:56:00+00:00", + "comment_author": "craigtaverner", + "comment_body": "Good idea, but perhaps out of scope, since this test uses string literals like this in many places and I think they could all be fixed together. Perhaps moved into an enum, and use a switch somewhere to assert none are left out?", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/elasticsearch-name-reflects-meaning.md b/_reviewers/elasticsearch-name-reflects-meaning.md index 5364f2d..833b552 100644 --- a/_reviewers/elasticsearch-name-reflects-meaning.md +++ b/_reviewers/elasticsearch-name-reflects-meaning.md @@ -40,301 +40,3 @@ Choose names that clearly communicate the intent, behavior, and semantics of cod if (configs.group.equals(GROUP_UNSUPPORTED) || configs.group.equals(GROUP_UNION_TYPES)) { ``` - - -[ - { - "discussion_id": "2137933778", - "pr_number": 129146, - "pr_file": "x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/action/IbmWatsonxActionCreator.java", - "created_at": "2025-06-10T13:38:48+00:00", - "commented_code": "package org.elasticsearch.xpack.inference.services.ibmwatsonx.action;\n\nimport org.elasticsearch.inference.TaskType;\nimport org.elasticsearch.threadpool.ThreadPool;\nimport org.elasticsearch.xpack.inference.common.Truncator;\nimport org.elasticsearch.xpack.inference.external.action.ExecutableAction;\nimport org.elasticsearch.xpack.inference.external.action.SenderExecutableAction;\nimport org.elasticsearch.xpack.inference.external.action.SingleInputSenderExecutableAction;\nimport org.elasticsearch.xpack.inference.external.http.retry.ResponseHandler;\nimport org.elasticsearch.xpack.inference.external.http.sender.ChatCompletionInput;\nimport org.elasticsearch.xpack.inference.external.http.sender.GenericRequestManager;\nimport org.elasticsearch.xpack.inference.external.http.sender.Sender;\nimport org.elasticsearch.xpack.inference.external.http.sender.UnifiedChatInput;\nimport org.elasticsearch.xpack.inference.services.ServiceComponents;\nimport org.elasticsearch.xpack.inference.services.ibmwatsonx.IbmWatsonxCompletionResponseHandler;\nimport org.elasticsearch.xpack.inference.services.ibmwatsonx.IbmWatsonxEmbeddingsRequestManager;\nimport org.elasticsearch.xpack.inference.services.ibmwatsonx.IbmWatsonxRerankRequestManager;\nimport org.elasticsearch.xpack.inference.services.ibmwatsonx.completion.IbmWatsonxChatCompletionModel;\nimport org.elasticsearch.xpack.inference.services.ibmwatsonx.embeddings.IbmWatsonxEmbeddingsModel;\nimport org.elasticsearch.xpack.inference.services.ibmwatsonx.request.IbmWatsonxChatCompletionRequest;\nimport org.elasticsearch.xpack.inference.services.ibmwatsonx.rerank.IbmWatsonxRerankModel;\nimport org.elasticsearch.xpack.inference.services.openai.response.OpenAiChatCompletionResponseEntity;\n\nimport java.util.Map;\nimport java.util.Objects;\n\nimport static org.elasticsearch.core.Strings.format;\nimport static org.elasticsearch.xpack.inference.external.action.ActionUtils.constructFailedToSendRequestMessage;\n\n/**\n * IbmWatsonxActionCreator is responsible for creating executable actions for various models.\n * It implements the IbmWatsonxActionVisitor interface to provide specific implementations.\n */\npublic class IbmWatsonxActionCreator implements IbmWatsonxActionVisitor {\n private final Sender sender;\n private final ServiceComponents serviceComponents;\n\n static final String COMPLETION_REQUEST_TYPE = \"IBM WatsonX completions\";", - "repo_full_name": "elastic/elasticsearch", - "discussion_comments": [ - { - "comment_id": "2137933778", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 129146, - "pr_file": "x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/action/IbmWatsonxActionCreator.java", - "discussion_id": "2137933778", - "commented_code": "@@ -7,26 +7,48 @@\n \n package org.elasticsearch.xpack.inference.services.ibmwatsonx.action;\n \n+import org.elasticsearch.inference.TaskType;\n import org.elasticsearch.threadpool.ThreadPool;\n import org.elasticsearch.xpack.inference.common.Truncator;\n import org.elasticsearch.xpack.inference.external.action.ExecutableAction;\n import org.elasticsearch.xpack.inference.external.action.SenderExecutableAction;\n+import org.elasticsearch.xpack.inference.external.action.SingleInputSenderExecutableAction;\n+import org.elasticsearch.xpack.inference.external.http.retry.ResponseHandler;\n+import org.elasticsearch.xpack.inference.external.http.sender.ChatCompletionInput;\n+import org.elasticsearch.xpack.inference.external.http.sender.GenericRequestManager;\n import org.elasticsearch.xpack.inference.external.http.sender.Sender;\n+import org.elasticsearch.xpack.inference.external.http.sender.UnifiedChatInput;\n import org.elasticsearch.xpack.inference.services.ServiceComponents;\n+import org.elasticsearch.xpack.inference.services.ibmwatsonx.IbmWatsonxCompletionResponseHandler;\n import org.elasticsearch.xpack.inference.services.ibmwatsonx.IbmWatsonxEmbeddingsRequestManager;\n import org.elasticsearch.xpack.inference.services.ibmwatsonx.IbmWatsonxRerankRequestManager;\n+import org.elasticsearch.xpack.inference.services.ibmwatsonx.completion.IbmWatsonxChatCompletionModel;\n import org.elasticsearch.xpack.inference.services.ibmwatsonx.embeddings.IbmWatsonxEmbeddingsModel;\n+import org.elasticsearch.xpack.inference.services.ibmwatsonx.request.IbmWatsonxChatCompletionRequest;\n import org.elasticsearch.xpack.inference.services.ibmwatsonx.rerank.IbmWatsonxRerankModel;\n+import org.elasticsearch.xpack.inference.services.openai.response.OpenAiChatCompletionResponseEntity;\n \n import java.util.Map;\n import java.util.Objects;\n \n+import static org.elasticsearch.core.Strings.format;\n import static org.elasticsearch.xpack.inference.external.action.ActionUtils.constructFailedToSendRequestMessage;\n \n+/**\n+ * IbmWatsonxActionCreator is responsible for creating executable actions for various models.\n+ * It implements the IbmWatsonxActionVisitor interface to provide specific implementations.\n+ */\n public class IbmWatsonxActionCreator implements IbmWatsonxActionVisitor {\n private final Sender sender;\n private final ServiceComponents serviceComponents;\n \n+ static final String COMPLETION_REQUEST_TYPE = \"IBM WatsonX completions\";", - "comment_created_at": "2025-06-10T13:38:48+00:00", - "comment_author": "Jan-Kazlouski-elastic", - "comment_body": "After discussion in dms it was found that platform is called Watsonx not WatsonX. Could you please unify naming. Thank you!", - "pr_file_module": null - }, - { - "comment_id": "2145230716", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 129146, - "pr_file": "x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/action/IbmWatsonxActionCreator.java", - "discussion_id": "2137933778", - "commented_code": "@@ -7,26 +7,48 @@\n \n package org.elasticsearch.xpack.inference.services.ibmwatsonx.action;\n \n+import org.elasticsearch.inference.TaskType;\n import org.elasticsearch.threadpool.ThreadPool;\n import org.elasticsearch.xpack.inference.common.Truncator;\n import org.elasticsearch.xpack.inference.external.action.ExecutableAction;\n import org.elasticsearch.xpack.inference.external.action.SenderExecutableAction;\n+import org.elasticsearch.xpack.inference.external.action.SingleInputSenderExecutableAction;\n+import org.elasticsearch.xpack.inference.external.http.retry.ResponseHandler;\n+import org.elasticsearch.xpack.inference.external.http.sender.ChatCompletionInput;\n+import org.elasticsearch.xpack.inference.external.http.sender.GenericRequestManager;\n import org.elasticsearch.xpack.inference.external.http.sender.Sender;\n+import org.elasticsearch.xpack.inference.external.http.sender.UnifiedChatInput;\n import org.elasticsearch.xpack.inference.services.ServiceComponents;\n+import org.elasticsearch.xpack.inference.services.ibmwatsonx.IbmWatsonxCompletionResponseHandler;\n import org.elasticsearch.xpack.inference.services.ibmwatsonx.IbmWatsonxEmbeddingsRequestManager;\n import org.elasticsearch.xpack.inference.services.ibmwatsonx.IbmWatsonxRerankRequestManager;\n+import org.elasticsearch.xpack.inference.services.ibmwatsonx.completion.IbmWatsonxChatCompletionModel;\n import org.elasticsearch.xpack.inference.services.ibmwatsonx.embeddings.IbmWatsonxEmbeddingsModel;\n+import org.elasticsearch.xpack.inference.services.ibmwatsonx.request.IbmWatsonxChatCompletionRequest;\n import org.elasticsearch.xpack.inference.services.ibmwatsonx.rerank.IbmWatsonxRerankModel;\n+import org.elasticsearch.xpack.inference.services.openai.response.OpenAiChatCompletionResponseEntity;\n \n import java.util.Map;\n import java.util.Objects;\n \n+import static org.elasticsearch.core.Strings.format;\n import static org.elasticsearch.xpack.inference.external.action.ActionUtils.constructFailedToSendRequestMessage;\n \n+/**\n+ * IbmWatsonxActionCreator is responsible for creating executable actions for various models.\n+ * It implements the IbmWatsonxActionVisitor interface to provide specific implementations.\n+ */\n public class IbmWatsonxActionCreator implements IbmWatsonxActionVisitor {\n private final Sender sender;\n private final ServiceComponents serviceComponents;\n \n+ static final String COMPLETION_REQUEST_TYPE = \"IBM WatsonX completions\";", - "comment_created_at": "2025-06-13T14:32:43+00:00", - "comment_author": "Evgenii-Kazannik", - "comment_body": "Thanks. Unified the naming as IBM Watsonx", - "pr_file_module": null - }, - { - "comment_id": "2150308524", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 129146, - "pr_file": "x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/action/IbmWatsonxActionCreator.java", - "discussion_id": "2137933778", - "commented_code": "@@ -7,26 +7,48 @@\n \n package org.elasticsearch.xpack.inference.services.ibmwatsonx.action;\n \n+import org.elasticsearch.inference.TaskType;\n import org.elasticsearch.threadpool.ThreadPool;\n import org.elasticsearch.xpack.inference.common.Truncator;\n import org.elasticsearch.xpack.inference.external.action.ExecutableAction;\n import org.elasticsearch.xpack.inference.external.action.SenderExecutableAction;\n+import org.elasticsearch.xpack.inference.external.action.SingleInputSenderExecutableAction;\n+import org.elasticsearch.xpack.inference.external.http.retry.ResponseHandler;\n+import org.elasticsearch.xpack.inference.external.http.sender.ChatCompletionInput;\n+import org.elasticsearch.xpack.inference.external.http.sender.GenericRequestManager;\n import org.elasticsearch.xpack.inference.external.http.sender.Sender;\n+import org.elasticsearch.xpack.inference.external.http.sender.UnifiedChatInput;\n import org.elasticsearch.xpack.inference.services.ServiceComponents;\n+import org.elasticsearch.xpack.inference.services.ibmwatsonx.IbmWatsonxCompletionResponseHandler;\n import org.elasticsearch.xpack.inference.services.ibmwatsonx.IbmWatsonxEmbeddingsRequestManager;\n import org.elasticsearch.xpack.inference.services.ibmwatsonx.IbmWatsonxRerankRequestManager;\n+import org.elasticsearch.xpack.inference.services.ibmwatsonx.completion.IbmWatsonxChatCompletionModel;\n import org.elasticsearch.xpack.inference.services.ibmwatsonx.embeddings.IbmWatsonxEmbeddingsModel;\n+import org.elasticsearch.xpack.inference.services.ibmwatsonx.request.IbmWatsonxChatCompletionRequest;\n import org.elasticsearch.xpack.inference.services.ibmwatsonx.rerank.IbmWatsonxRerankModel;\n+import org.elasticsearch.xpack.inference.services.openai.response.OpenAiChatCompletionResponseEntity;\n \n import java.util.Map;\n import java.util.Objects;\n \n+import static org.elasticsearch.core.Strings.format;\n import static org.elasticsearch.xpack.inference.external.action.ActionUtils.constructFailedToSendRequestMessage;\n \n+/**\n+ * IbmWatsonxActionCreator is responsible for creating executable actions for various models.\n+ * It implements the IbmWatsonxActionVisitor interface to provide specific implementations.\n+ */\n public class IbmWatsonxActionCreator implements IbmWatsonxActionVisitor {\n private final Sender sender;\n private final ServiceComponents serviceComponents;\n \n+ static final String COMPLETION_REQUEST_TYPE = \"IBM WatsonX completions\";", - "comment_created_at": "2025-06-16T15:34:29+00:00", - "comment_author": "dan-rubinstein", - "comment_body": "A bit of a nitpick but the platform seems to be called `IBM watsonx` (from [their website](https://www.ibm.com/watsonx)) but this change updates it everywhere to `IBM Watsonx`. Can we keep consistent with IBM's capitalization to avoid confusion?", - "pr_file_module": null - }, - { - "comment_id": "2156393080", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 129146, - "pr_file": "x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/action/IbmWatsonxActionCreator.java", - "discussion_id": "2137933778", - "commented_code": "@@ -7,26 +7,48 @@\n \n package org.elasticsearch.xpack.inference.services.ibmwatsonx.action;\n \n+import org.elasticsearch.inference.TaskType;\n import org.elasticsearch.threadpool.ThreadPool;\n import org.elasticsearch.xpack.inference.common.Truncator;\n import org.elasticsearch.xpack.inference.external.action.ExecutableAction;\n import org.elasticsearch.xpack.inference.external.action.SenderExecutableAction;\n+import org.elasticsearch.xpack.inference.external.action.SingleInputSenderExecutableAction;\n+import org.elasticsearch.xpack.inference.external.http.retry.ResponseHandler;\n+import org.elasticsearch.xpack.inference.external.http.sender.ChatCompletionInput;\n+import org.elasticsearch.xpack.inference.external.http.sender.GenericRequestManager;\n import org.elasticsearch.xpack.inference.external.http.sender.Sender;\n+import org.elasticsearch.xpack.inference.external.http.sender.UnifiedChatInput;\n import org.elasticsearch.xpack.inference.services.ServiceComponents;\n+import org.elasticsearch.xpack.inference.services.ibmwatsonx.IbmWatsonxCompletionResponseHandler;\n import org.elasticsearch.xpack.inference.services.ibmwatsonx.IbmWatsonxEmbeddingsRequestManager;\n import org.elasticsearch.xpack.inference.services.ibmwatsonx.IbmWatsonxRerankRequestManager;\n+import org.elasticsearch.xpack.inference.services.ibmwatsonx.completion.IbmWatsonxChatCompletionModel;\n import org.elasticsearch.xpack.inference.services.ibmwatsonx.embeddings.IbmWatsonxEmbeddingsModel;\n+import org.elasticsearch.xpack.inference.services.ibmwatsonx.request.IbmWatsonxChatCompletionRequest;\n import org.elasticsearch.xpack.inference.services.ibmwatsonx.rerank.IbmWatsonxRerankModel;\n+import org.elasticsearch.xpack.inference.services.openai.response.OpenAiChatCompletionResponseEntity;\n \n import java.util.Map;\n import java.util.Objects;\n \n+import static org.elasticsearch.core.Strings.format;\n import static org.elasticsearch.xpack.inference.external.action.ActionUtils.constructFailedToSendRequestMessage;\n \n+/**\n+ * IbmWatsonxActionCreator is responsible for creating executable actions for various models.\n+ * It implements the IbmWatsonxActionVisitor interface to provide specific implementations.\n+ */\n public class IbmWatsonxActionCreator implements IbmWatsonxActionVisitor {\n private final Sender sender;\n private final ServiceComponents serviceComponents;\n \n+ static final String COMPLETION_REQUEST_TYPE = \"IBM WatsonX completions\";", - "comment_created_at": "2025-06-19T07:59:32+00:00", - "comment_author": "Evgenii-Kazannik", - "comment_body": "Sure. I updated it everywhere from IBM Watsonx to IBM watsonx. Thanks", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2176564193", - "pr_number": 129013, - "pr_file": "server/src/main/java/org/elasticsearch/TransportVersions.java", - "created_at": "2025-07-01T06:45:32+00:00", - "commented_code": "public static final TransportVersion ML_INFERENCE_COHERE_API_VERSION = def(9_110_0_00);\n public static final TransportVersion ESQL_PROFILE_INCLUDE_PLAN = def(9_111_0_00);\n public static final TransportVersion MAPPINGS_IN_DATA_STREAMS = def(9_112_0_00);\n public static final TransportVersion LOOKUP_JOIN_MANY_INDICES = def(9_113_0_00);", - "repo_full_name": "elastic/elasticsearch", - "discussion_comments": [ - { - "comment_id": "2176564193", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 129013, - "pr_file": "server/src/main/java/org/elasticsearch/TransportVersions.java", - "discussion_id": "2176564193", - "commented_code": "@@ -326,6 +326,7 @@ static TransportVersion def(int id) {\n public static final TransportVersion ML_INFERENCE_COHERE_API_VERSION = def(9_110_0_00);\n public static final TransportVersion ESQL_PROFILE_INCLUDE_PLAN = def(9_111_0_00);\n public static final TransportVersion MAPPINGS_IN_DATA_STREAMS = def(9_112_0_00);\n+ public static final TransportVersion LOOKUP_JOIN_MANY_INDICES = def(9_113_0_00);", - "comment_created_at": "2025-07-01T06:45:32+00:00", - "comment_author": "idegtiarenko", - "comment_body": "This is a bit confusing. Why many indices? Should it be LOOKUP_JOIN_REMOTE_INDICES instead?", - "pr_file_module": null - }, - { - "comment_id": "2177851184", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 129013, - "pr_file": "server/src/main/java/org/elasticsearch/TransportVersions.java", - "discussion_id": "2176564193", - "commented_code": "@@ -326,6 +326,7 @@ static TransportVersion def(int id) {\n public static final TransportVersion ML_INFERENCE_COHERE_API_VERSION = def(9_110_0_00);\n public static final TransportVersion ESQL_PROFILE_INCLUDE_PLAN = def(9_111_0_00);\n public static final TransportVersion MAPPINGS_IN_DATA_STREAMS = def(9_112_0_00);\n+ public static final TransportVersion LOOKUP_JOIN_MANY_INDICES = def(9_113_0_00);", - "comment_created_at": "2025-07-01T15:03:54+00:00", - "comment_author": "smalyshev", - "comment_body": "Because 9.1 already supports receiving LOOKUP JOIN from the coordinator, but only if there's a single index. If there is more than one - i.e. if it `lookup_index` resolves to `remote1:lookup_on_remote1,remote2:lookup_on_remote2` (because of aliases), 9.1 remote would not be able to process such request. ", - "pr_file_module": null - }, - { - "comment_id": "2180403269", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 129013, - "pr_file": "server/src/main/java/org/elasticsearch/TransportVersions.java", - "discussion_id": "2176564193", - "commented_code": "@@ -326,6 +326,7 @@ static TransportVersion def(int id) {\n public static final TransportVersion ML_INFERENCE_COHERE_API_VERSION = def(9_110_0_00);\n public static final TransportVersion ESQL_PROFILE_INCLUDE_PLAN = def(9_111_0_00);\n public static final TransportVersion MAPPINGS_IN_DATA_STREAMS = def(9_112_0_00);\n+ public static final TransportVersion LOOKUP_JOIN_MANY_INDICES = def(9_113_0_00);", - "comment_created_at": "2025-07-02T15:47:44+00:00", - "comment_author": "alex-spies", - "comment_body": "I find this name confusing as well. Can we rename to `LOOKUP_JOIN_CCS` or so?", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2178772720", - "pr_number": 130411, - "pr_file": "server/src/main/java/org/elasticsearch/cluster/NodeWriteLoad.java", - "created_at": "2025-07-02T00:43:50+00:00", - "commented_code": "/*\n * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one\n * or more contributor license agreements. Licensed under the \"Elastic License\n * 2.0\", the \"GNU Affero General Public License v3.0 only\", and the \"Server Side\n * Public License v 1\"; you may not use this file except in compliance with, at\n * your election, the \"Elastic License 2.0\", the \"GNU Affero General Public\n * License v3.0 only\", or the \"Server Side Public License, v 1\".\n */\n\npackage org.elasticsearch.cluster;\n\nimport org.elasticsearch.common.io.stream.StreamInput;\nimport org.elasticsearch.common.io.stream.StreamOutput;\nimport org.elasticsearch.common.io.stream.Writeable;\n\nimport java.io.IOException;\n\n/**\n * Record representing an estimate of a node's write load. The estimation is based on the usage of the node's write thread pool.\n *\n * @param nodeId Node ID.\n * @param totalWriteThreadPoolThreads Total number of threads in the write thread pool.\n * @param percentWriteThreadPoolUtilization Percent of write thread pool threads that are in use, averaged over some period of time.\n * @param maxTaskTimeInWriteQueueMillis How long the oldest task (next to be run) in the write thread pool queue has been queued. Zero if\n * there is no write thread pool queue.\n */\npublic record NodeWriteLoad(\n String nodeId,\n int totalWriteThreadPoolThreads,\n int percentWriteThreadPoolUtilization,\n long maxTaskTimeInWriteQueueMillis", - "repo_full_name": "elastic/elasticsearch", - "discussion_comments": [ - { - "comment_id": "2178772720", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 130411, - "pr_file": "server/src/main/java/org/elasticsearch/cluster/NodeWriteLoad.java", - "discussion_id": "2178772720", - "commented_code": "@@ -0,0 +1,71 @@\n+/*\n+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one\n+ * or more contributor license agreements. Licensed under the \"Elastic License\n+ * 2.0\", the \"GNU Affero General Public License v3.0 only\", and the \"Server Side\n+ * Public License v 1\"; you may not use this file except in compliance with, at\n+ * your election, the \"Elastic License 2.0\", the \"GNU Affero General Public\n+ * License v3.0 only\", or the \"Server Side Public License, v 1\".\n+ */\n+\n+package org.elasticsearch.cluster;\n+\n+import org.elasticsearch.common.io.stream.StreamInput;\n+import org.elasticsearch.common.io.stream.StreamOutput;\n+import org.elasticsearch.common.io.stream.Writeable;\n+\n+import java.io.IOException;\n+\n+/**\n+ * Record representing an estimate of a node's write load. The estimation is based on the usage of the node's write thread pool.\n+ *\n+ * @param nodeId Node ID.\n+ * @param totalWriteThreadPoolThreads Total number of threads in the write thread pool.\n+ * @param percentWriteThreadPoolUtilization Percent of write thread pool threads that are in use, averaged over some period of time.\n+ * @param maxTaskTimeInWriteQueueMillis How long the oldest task (next to be run) in the write thread pool queue has been queued. Zero if\n+ * there is no write thread pool queue.\n+ */\n+public record NodeWriteLoad(\n+ String nodeId,\n+ int totalWriteThreadPoolThreads,\n+ int percentWriteThreadPoolUtilization,\n+ long maxTaskTimeInWriteQueueMillis", - "comment_created_at": "2025-07-02T00:43:50+00:00", - "comment_author": "nicktindall", - "comment_body": "I think we should indicate that these are averages (e.g. `averageWriteThreadPoolUtilization` and `averageWriteThreadPoolQueueLatency`)\r\n\r\nI used the term \"queue latency\" to describe the time spent waiting in the queue, I'm not married to that term specifically but I think we should try and be consistent (happy to agree on something else).", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2168477523", - "pr_number": 130042, - "pr_file": "server/src/main/java/org/elasticsearch/cluster/metadata/MetadataDataStreamsService.java", - "created_at": "2025-06-26T08:16:17+00:00", - "commented_code": "Priority.NORMAL,\n updateSettingsExecutor\n );\n ClusterStateTaskExecutor updateMappingsExecutor = new SimpleBatchedAckListenerTaskExecutor<>() {\n\n @Override\n public Tuple executeTask(\n UpdateMappingsTask updateMappingsTask,\n ClusterState clusterState\n ) throws Exception {\n DataStream dataStream = createDataStreamForUpdatedDataStreamMappings(\n updateMappingsTask.projectId,\n updateMappingsTask.dataStreamName,\n updateMappingsTask.mappingsOverrides,\n clusterState\n );\n ProjectMetadata projectMetadata = clusterState.metadata().getProject(updateMappingsTask.projectId);\n ProjectMetadata.Builder projectMetadataBuilder = ProjectMetadata.builder(projectMetadata);\n projectMetadataBuilder.removeDataStream(updateMappingsTask.dataStreamName);\n projectMetadataBuilder.put(dataStream);\n ClusterState updatedClusterState = ClusterState.builder(clusterState).putProjectMetadata(projectMetadataBuilder).build();\n return new Tuple<>(updatedClusterState, updateMappingsTask);\n }\n };\n this.updateMappingsTaskQueue = clusterService.createTaskQueue(\n \"update-data-stream-settings\",", - "repo_full_name": "elastic/elasticsearch", - "discussion_comments": [ - { - "comment_id": "2168477523", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 130042, - "pr_file": "server/src/main/java/org/elasticsearch/cluster/metadata/MetadataDataStreamsService.java", - "discussion_id": "2168477523", - "commented_code": "@@ -164,6 +166,32 @@ public Tuple executeTask(\n Priority.NORMAL,\n updateSettingsExecutor\n );\n+ ClusterStateTaskExecutor updateMappingsExecutor = new SimpleBatchedAckListenerTaskExecutor<>() {\n+\n+ @Override\n+ public Tuple executeTask(\n+ UpdateMappingsTask updateMappingsTask,\n+ ClusterState clusterState\n+ ) throws Exception {\n+ DataStream dataStream = createDataStreamForUpdatedDataStreamMappings(\n+ updateMappingsTask.projectId,\n+ updateMappingsTask.dataStreamName,\n+ updateMappingsTask.mappingsOverrides,\n+ clusterState\n+ );\n+ ProjectMetadata projectMetadata = clusterState.metadata().getProject(updateMappingsTask.projectId);\n+ ProjectMetadata.Builder projectMetadataBuilder = ProjectMetadata.builder(projectMetadata);\n+ projectMetadataBuilder.removeDataStream(updateMappingsTask.dataStreamName);\n+ projectMetadataBuilder.put(dataStream);\n+ ClusterState updatedClusterState = ClusterState.builder(clusterState).putProjectMetadata(projectMetadataBuilder).build();\n+ return new Tuple<>(updatedClusterState, updateMappingsTask);\n+ }\n+ };\n+ this.updateMappingsTaskQueue = clusterService.createTaskQueue(\n+ \"update-data-stream-settings\",", - "comment_created_at": "2025-06-26T08:16:17+00:00", - "comment_author": "Copilot", - "comment_body": "The task queue is named \"update-data-stream-settings\" but is used for mappings; it should be renamed to \"update-data-stream-mappings\" for clarity.\n```suggestion\n \"update-data-stream-mappings\",\n```", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2128503327", - "pr_number": 128930, - "pr_file": "x-pack/plugin/ilm/src/main/java/org/elasticsearch/xpack/ilm/IndexLifecycleService.java", - "created_at": "2025-06-05T10:23:45+00:00", - "commented_code": "return IndexLifecycleTransition.moveIndexToStep(index, project, newStepKey, nowSupplier, policyRegistry, true);\n }\n\n public ClusterState moveClusterStateToPreviouslyFailedStep(ClusterState currentState, String[] indices) {\n ClusterState newState = currentState;\n public ProjectMetadata moveClusterStateToPreviouslyFailedStep(ProjectMetadata currentProject, String[] indices) {", - "repo_full_name": "elastic/elasticsearch", - "discussion_comments": [ - { - "comment_id": "2128503327", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 128930, - "pr_file": "x-pack/plugin/ilm/src/main/java/org/elasticsearch/xpack/ilm/IndexLifecycleService.java", - "discussion_id": "2128503327", - "commented_code": "@@ -170,12 +170,18 @@ public ProjectMetadata moveIndexToStep(ProjectMetadata project, Index index, Ste\n return IndexLifecycleTransition.moveIndexToStep(index, project, newStepKey, nowSupplier, policyRegistry, true);\n }\n \n- public ClusterState moveClusterStateToPreviouslyFailedStep(ClusterState currentState, String[] indices) {\n- ClusterState newState = currentState;\n+ public ProjectMetadata moveClusterStateToPreviouslyFailedStep(ProjectMetadata currentProject, String[] indices) {", - "comment_created_at": "2025-06-05T10:23:45+00:00", - "comment_author": "PeteGillinElastic", - "comment_body": "Method naming time again! This definitely shouldn't have `ClusterState` in its name since it deals with `ProjectMetadata`. Arguably, it should be called `moveIndicesToPreviouslyFailedStep`, since it's really the state of the indices that it's acting on. WDYT?", - "pr_file_module": null - }, - { - "comment_id": "2129069796", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 128930, - "pr_file": "x-pack/plugin/ilm/src/main/java/org/elasticsearch/xpack/ilm/IndexLifecycleService.java", - "discussion_id": "2128503327", - "commented_code": "@@ -170,12 +170,18 @@ public ProjectMetadata moveIndexToStep(ProjectMetadata project, Index index, Ste\n return IndexLifecycleTransition.moveIndexToStep(index, project, newStepKey, nowSupplier, policyRegistry, true);\n }\n \n- public ClusterState moveClusterStateToPreviouslyFailedStep(ClusterState currentState, String[] indices) {\n- ClusterState newState = currentState;\n+ public ProjectMetadata moveClusterStateToPreviouslyFailedStep(ProjectMetadata currentProject, String[] indices) {", - "comment_created_at": "2025-06-05T14:59:59+00:00", - "comment_author": "nielsbauman", - "comment_body": "Ugh, you're absolutely right. I did it for one of these methods, but forgot to do the others. Thanks for raising it.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2128506436", - "pr_number": 128930, - "pr_file": "x-pack/plugin/ilm/src/main/java/org/elasticsearch/xpack/ilm/IndexLifecycleTransition.java", - "created_at": "2025-06-05T10:24:50+00:00", - "commented_code": ");\n }\n\n return LifecycleExecutionStateUtils.newClusterStateWithLifecycleState(clusterState, idxMeta.getIndex(), failedState.build());\n return project.withLifecycleState(idxMeta.getIndex(), failedState.build());\n }\n\n /**\n * Move the given index's execution state back to a step that had previously failed. If this is\n * an automatic retry ({@code isAutomaticRetry}), the retry count is incremented.\n */\n static ClusterState moveClusterStateToPreviouslyFailedStep(\n ClusterState currentState,\n static ProjectMetadata moveClusterStateToPreviouslyFailedStep(", - "repo_full_name": "elastic/elasticsearch", - "discussion_comments": [ - { - "comment_id": "2128506436", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 128930, - "pr_file": "x-pack/plugin/ilm/src/main/java/org/elasticsearch/xpack/ilm/IndexLifecycleTransition.java", - "discussion_id": "2128506436", - "commented_code": "@@ -204,22 +201,21 @@ static ClusterState moveClusterStateToErrorStep(\n );\n }\n \n- return LifecycleExecutionStateUtils.newClusterStateWithLifecycleState(clusterState, idxMeta.getIndex(), failedState.build());\n+ return project.withLifecycleState(idxMeta.getIndex(), failedState.build());\n }\n \n /**\n * Move the given index's execution state back to a step that had previously failed. If this is\n * an automatic retry ({@code isAutomaticRetry}), the retry count is incremented.\n */\n- static ClusterState moveClusterStateToPreviouslyFailedStep(\n- ClusterState currentState,\n+ static ProjectMetadata moveClusterStateToPreviouslyFailedStep(", - "comment_created_at": "2025-06-05T10:24:50+00:00", - "comment_author": "PeteGillinElastic", - "comment_body": "Method naming again... Should this one be `moveIndexToPreviouslyFailedStep`, like you've done with `moveIndexToErrorStep` above?", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2027497237", - "pr_number": 122491, - "pr_file": "modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/CefProcessorTests.java", - "created_at": "2025-04-03T17:57:14+00:00", - "commented_code": "/*\n * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one\n * or more contributor license agreements. Licensed under the \"Elastic License\n * 2.0\", the \"GNU Affero General Public License v3.0 only\", and the \"Server Side\n * Public License v 1\"; you may not use this file except in compliance with, at\n * your election, the \"Elastic License 2.0\", the \"GNU Affero General Public\n * License v3.0 only\", or the \"Server Side Public License, v 1\".\n */\n\npackage org.elasticsearch.ingest.common;\n\nimport org.elasticsearch.ingest.IngestDocument;\nimport org.elasticsearch.test.ESTestCase;\nimport org.junit.Before;\n\nimport java.time.Instant;\nimport java.time.ZoneId;\nimport java.time.ZonedDateTime;\nimport java.util.HashMap;\nimport java.util.List;\nimport java.util.Map;\nimport java.util.Set;\n\nimport static java.util.Map.entry;\nimport static org.hamcrest.Matchers.containsInAnyOrder;\nimport static org.hamcrest.Matchers.equalTo;\n\npublic class CefProcessorTests extends ESTestCase {\n\n private IngestDocument document;\n\n @Before\n public void setUp() throws Exception {\n super.setUp();\n }\n\n public void testExecute() {\n Map source = new HashMap<>();\n String message = \"CEF:0|Elastic|Vaporware|1.0.0-alpha|18|Web request|low|eventId=3457 requestMethod=POST \"\n + \"slat=38.915 slong=-77.511 proto=TCP sourceServiceName=httpd requestContext=https://www.google.com \"\n + \"src=89.160.20.156 spt=33876 dst=192.168.10.1 dpt=443 request=https://www.example.com/cart\";\n source.put(\"message\", message);\n document = new IngestDocument(\"index\", \"id\", 1L, null, null, source);\n CefProcessor processor = new CefProcessor(\"tag\", \"description\", \"message\", \"cef\", false, true, null);\n processor.execute(document);\n assertMapsEqual(\n document.getSource(),\n Map.ofEntries(\n entry(\n \"cef\",\n Map.ofEntries(\n entry(\"version\", \"0\"),\n entry(\n \"device\",\n Map.of(\"vendor\", \"Elastic\", \"product\", \"Vaporware\", \"version\", \"1.0.0-alpha\", \"event_class_id\", \"18\")\n ),\n entry(\"name\", \"Web request\"),\n entry(\"severity\", \"low\")\n )\n ),\n entry(\"observer\", Map.of(\"product\", \"Vaporware\", \"vendor\", \"Elastic\", \"version\", \"1.0.0-alpha\")),\n entry(\"event\", Map.of(\"id\", \"3457\", \"code\", \"18\")),\n entry(\n \"source\",\n Map.ofEntries(\n entry(\"ip\", \"89.160.20.156\"),\n entry(\"port\", 33876),\n entry(\"geo\", Map.of(\"location\", Map.of(\"lon\", -77.511, \"lat\", 38.915))),\n entry(\"service\", Map.of(\"name\", \"httpd\"))\n )\n ),\n entry(\"destination\", Map.of(\"ip\", \"192.168.10.1\", \"port\", 443)),\n entry(\"http\", Map.of(\"request\", Map.of(\"method\", \"POST\", \"referrer\", \"https://www.google.com\"))),\n entry(\"network\", Map.of(\"transport\", \"TCP\")),\n entry(\"url\", Map.of(\"original\", \"https://www.example.com/cart\")),\n entry(\"message\", message)\n )\n );\n }\n\n public void testInvalidCefFormat() {\n Map invalidSource = new HashMap<>();\n invalidSource.put(\"message\", \"Invalid CEF message\");\n IngestDocument invalidIngestDocument = new IngestDocument(\"index\", \"id\", 1L, null, null, invalidSource);\n CefProcessor processor = new CefProcessor(\"tag\", \"description\", \"message\", \"cef\", false, true, null);\n expectThrows(IllegalArgumentException.class, () -> processor.execute(invalidIngestDocument));\n }\n\n public void testStandardMessage() {\n String message = \"CEF:26|security|threatmanager|1.0|100|trojan successfully stopped|10|\"\n + \"src=10.0.0.192 dst=12.121.122.82 spt=1232 eventId=1 in=4294967296 out=4294967296\";\n Map source = new HashMap<>();\n source.put(\"message\", message);\n document = new IngestDocument(\"index\", \"id\", 1L, null, null, source);\n CefProcessor processor = new CefProcessor(\"tag\", \"description\", \"message\", \"cef\", false, true, null);\n processor.execute(document);\n assertMapsEqual(\n document.getSource(),\n Map.ofEntries(\n entry(\n \"cef\",\n Map.ofEntries(\n entry(\"version\", \"26\"),\n entry(\n \"device\",\n Map.of(\"vendor\", \"security\", \"product\", \"threatmanager\", \"version\", \"1.0\", \"event_class_id\", \"100\")\n ),\n entry(\"name\", \"trojan successfully stopped\"),\n entry(\"severity\", \"10\")\n )\n ),\n entry(\"observer\", Map.of(\"product\", \"threatmanager\", \"vendor\", \"security\", \"version\", \"1.0\")),\n entry(\"source\", Map.of(\"ip\", \"10.0.0.192\", \"port\", 1232, \"bytes\", 4294967296L)),\n entry(\"destination\", Map.of(\"ip\", \"12.121.122.82\", \"bytes\", 4294967296L)),\n entry(\"event\", Map.of(\"id\", \"1\", \"code\", \"100\")),\n entry(\"message\", message)\n )\n );\n }\n\n public void testHeaderOnly() {\n String message = \"CEF:26|security|threatmanager|1.0|100|trojan successfully stopped|10|\";\n Map source = new HashMap<>();\n source.put(\"message\", message);\n document = new IngestDocument(\"index\", \"id\", 1L, null, null, source);\n CefProcessor processor = new CefProcessor(\"tag\", \"description\", \"message\", \"cef\", false, true, null);\n processor.execute(document);\n assertMapsEqual(\n document.getSource(),\n Map.ofEntries(\n entry(\n \"cef\",\n Map.ofEntries(\n entry(\"version\", \"26\"),\n entry(\n \"device\",\n Map.of(\"vendor\", \"security\", \"product\", \"threatmanager\", \"version\", \"1.0\", \"event_class_id\", \"100\")\n ),\n entry(\"name\", \"trojan successfully stopped\"),\n entry(\"severity\", \"10\")\n )\n ),\n entry(\"event\", Map.of(\"code\", \"100\")),\n entry(\"observer\", Map.of(\"product\", \"threatmanager\", \"vendor\", \"security\", \"version\", \"1.0\")),\n entry(\"message\", message)\n )\n );\n }\n\n public void testEmptyDeviceFields() {\n String message = \"CEF:0|||1.0|100|trojan successfully stopped|10|src=10.0.0.192 dst=12.121.122.82 spt=1232\";\n Map source = new HashMap<>();\n source.put(\"message\", message);\n document = new IngestDocument(\"index\", \"id\", 1L, null, null, source);\n CefProcessor processor = new CefProcessor(\"tag\", \"description\", \"message\", \"cef\", false, true, null);\n processor.execute(document);\n assertMapsEqual(\n document.getSource(),\n Map.ofEntries(\n entry(\n \"cef\",\n Map.ofEntries(\n entry(\"version\", \"0\"),\n entry(\"device\", Map.of(\"vendor\", \"\", \"product\", \"\", \"version\", \"1.0\", \"event_class_id\", \"100\")),\n entry(\"name\", \"trojan successfully stopped\"),\n entry(\"severity\", \"10\")\n )\n ),\n entry(\"event\", Map.of(\"code\", \"100\")),\n entry(\"observer\", Map.of(\"product\", \"\", \"vendor\", \"\", \"version\", \"1.0\")),\n entry(\"source\", Map.of(\"ip\", \"10.0.0.192\", \"port\", 1232)),\n entry(\"destination\", Map.of(\"ip\", \"12.121.122.82\")),\n entry(\"message\", message)\n )\n );\n }\n\n public void testEscapedPipeInHeader() {\n String message = \"CEF:26|security|threat\\\\|->manager|1.0|100|\"\n + \"trojan successfully stopped|10|src=10.0.0.192 dst=12.121.122.82 spt=1232\";\n Map source = new HashMap<>();\n source.put(\"message\", message);\n document = new IngestDocument(\"index\", \"id\", 1L, null, null, source);\n CefProcessor processor = new CefProcessor(\"tag\", \"description\", \"message\", \"cef\", false, true, null);\n processor.execute(document);\n assertMapsEqual(\n document.getSource(),\n Map.ofEntries(\n entry(\n \"cef\",\n Map.ofEntries(\n entry(\"version\", \"26\"),\n entry(\n \"device\",\n Map.of(\"vendor\", \"security\", \"product\", \"threat|->manager\", \"version\", \"1.0\", \"event_class_id\", \"100\")\n ),\n entry(\"name\", \"trojan successfully stopped\"),\n entry(\"severity\", \"10\")\n )\n ),\n entry(\"event\", Map.of(\"code\", \"100\")),\n entry(\"observer\", Map.of(\"product\", \"threat|->manager\", \"vendor\", \"security\", \"version\", \"1.0\")),\n entry(\"source\", Map.of(\"ip\", \"10.0.0.192\", \"port\", 1232)),\n entry(\"destination\", Map.of(\"ip\", \"12.121.122.82\")),\n entry(\"message\", message)\n )\n );\n }\n\n public void testEqualsSignInHeader() {\n String message = \"CEF:26|security|threat=manager|1.0|100|trojan successfully stopped|10|src=10.0.0.192 dst=12.121.122.82 spt=1232\";\n Map source = new HashMap<>();\n source.put(\"message\", message);\n document = new IngestDocument(\"index\", \"id\", 1L, null, null, source);\n CefProcessor processor = new CefProcessor(\"tag\", \"description\", \"message\", \"cef\", false, true, null);\n processor.execute(document);\n assertMapsEqual(\n document.getSource(),\n Map.ofEntries(\n entry(\n \"cef\",\n Map.ofEntries(\n entry(\"version\", \"26\"),\n entry(\n \"device\",\n Map.of(\"vendor\", \"security\", \"product\", \"threat=manager\", \"version\", \"1.0\", \"event_class_id\", \"100\")\n ),\n entry(\"name\", \"trojan successfully stopped\"),\n entry(\"severity\", \"10\")\n )\n ),\n entry(\"event\", Map.of(\"code\", \"100\")),\n entry(\"observer\", Map.of(\"product\", \"threat=manager\", \"vendor\", \"security\", \"version\", \"1.0\")),\n entry(\"source\", Map.of(\"ip\", \"10.0.0.192\", \"port\", 1232)),\n entry(\"destination\", Map.of(\"ip\", \"12.121.122.82\")),\n entry(\"message\", message)\n )\n );\n }\n\n public void testEmptyExtensionValue() {\n String message = \"CEF:26|security|threatmanager|1.0|100|trojan successfully stopped|10|src=10.0.0.192 dst= spt=1232\";\n Map source = new HashMap<>();\n source.put(\"message\", message);\n document = new IngestDocument(\"index\", \"id\", 1L, null, null, source);\n CefProcessor processor = new CefProcessor(\"tag\", \"description\", \"message\", \"cef\", false, true, null);\n processor.execute(document);\n assertMapsEqual(\n document.getSource(),\n Map.ofEntries(\n entry(\n \"cef\",\n Map.ofEntries(\n entry(\"version\", \"26\"),\n entry(\n \"device\",\n Map.of(\"vendor\", \"security\", \"product\", \"threatmanager\", \"version\", \"1.0\", \"event_class_id\", \"100\")\n ),\n entry(\"name\", \"trojan successfully stopped\"),\n entry(\"severity\", \"10\")\n )\n ),\n entry(\"event\", Map.of(\"code\", \"100\")),\n entry(\"observer\", Map.of(\"product\", \"threatmanager\", \"vendor\", \"security\", \"version\", \"1.0\")),\n entry(\"source\", Map.of(\"ip\", \"10.0.0.192\", \"port\", 1232)),\n entry(\"message\", message)\n )\n );\n }\n\n public void testLeadingWhitespace() {\n String message = \"CEF:0|security|threatmanager|1.0|100|trojan successfully stopped|10| src=10.0.0.192 dst=12.121.122.82 spt=1232\";\n Map source = new HashMap<>();\n source.put(\"message\", message);\n document = new IngestDocument(\"index\", \"id\", 1L, null, null, source);\n CefProcessor processor = new CefProcessor(\"tag\", \"description\", \"message\", \"cef\", false, true, null);\n processor.execute(document);\n assertMapsEqual(\n document.getSource(),\n Map.ofEntries(\n entry(\n \"cef\",\n Map.ofEntries(\n entry(\"version\", \"0\"),\n entry(\n \"device\",\n Map.of(\"vendor\", \"security\", \"product\", \"threatmanager\", \"version\", \"1.0\", \"event_class_id\", \"100\")\n ),\n entry(\"name\", \"trojan successfully stopped\"),\n entry(\"severity\", \"10\")\n )\n ),\n entry(\"event\", Map.of(\"code\", \"100\")),\n entry(\"observer\", Map.of(\"product\", \"threatmanager\", \"vendor\", \"security\", \"version\", \"1.0\")),\n entry(\"source\", Map.of(\"ip\", \"10.0.0.192\", \"port\", 1232)),\n entry(\"destination\", Map.of(\"ip\", \"12.121.122.82\")),\n entry(\"message\", message)\n )\n );\n }\n\n public void testEscapedPipeInExtension() {\n String message = \"CEF:0|security|threatmanager|1.0|100|trojan successfully stopped|10|moo=this\\\\|has an escaped pipe\";\n Map source = new HashMap<>();\n source.put(\"message\", message);\n document = new IngestDocument(\"index\", \"id\", 1L, null, null, source);\n CefProcessor processor = new CefProcessor(\"tag\", \"description\", \"message\", \"cef\", false, true, null);\n expectThrows(IllegalArgumentException.class, () -> processor.execute(document));\n }\n\n public void testPipeInMessage() {\n String message = \"CEF:0|security|threatmanager|1.0|100|trojan successfully stopped|10|moo=this|has a pipe\";\n Map source = new HashMap<>();\n source.put(\"message\", message);\n document = new IngestDocument(\"index\", \"id\", 1L, null, null, source);\n CefProcessor processor = new CefProcessor(\"tag\", \"description\", \"message\", \"cef\", false, true, null);\n processor.execute(document);\n assertMapsEqual(\n document.getSource(),\n Map.ofEntries(\n entry(\n \"cef\",\n Map.ofEntries(\n entry(\"version\", \"0\"),\n entry(\n \"device\",\n Map.of(\"vendor\", \"security\", \"product\", \"threatmanager\", \"version\", \"1.0\", \"event_class_id\", \"100\")\n ),\n entry(\"name\", \"trojan successfully stopped\"),\n entry(\"severity\", \"10\"),\n entry(\"extensions\", Map.of(\"moo\", \"this|has a pipe\"))\n )\n ),\n entry(\"event\", Map.of(\"code\", \"100\")),\n entry(\"observer\", Map.of(\"product\", \"threatmanager\", \"vendor\", \"security\", \"version\", \"1.0\")),\n entry(\"message\", message)\n )\n );\n }\n\n public void testEqualsInMessage() {\n String message =\n \"CEF:0|security|threatmanager|1.0|100|trojan successfully stopped|10|moo=this =has = equals\\\\= dst=12.121.122.82 spt=1232\";\n Map source = new HashMap<>();\n source.put(\"message\", message);\n document = new IngestDocument(\"index\", \"id\", 1L, null, null, source);\n CefProcessor processor = new CefProcessor(\"tag\", \"description\", \"message\", \"cef\", false, true, null);\n processor.execute(document);\n assertMapsEqual(\n document.getSource(),\n Map.ofEntries(\n entry(\n \"cef\",\n Map.ofEntries(\n entry(\"version\", \"0\"),\n entry(\n \"device\",\n Map.of(\"vendor\", \"security\", \"product\", \"threatmanager\", \"version\", \"1.0\", \"event_class_id\", \"100\")\n ),\n entry(\"name\", \"trojan successfully stopped\"),\n entry(\"severity\", \"10\"),\n entry(\"extensions\", Map.of(\"moo\", \"this =has = equals=\"))\n )\n ),\n entry(\"event\", Map.of(\"code\", \"100\")),\n entry(\"observer\", Map.of(\"product\", \"threatmanager\", \"vendor\", \"security\", \"version\", \"1.0\")),\n entry(\"destination\", Map.of(\"ip\", \"12.121.122.82\")),\n entry(\"source\", Map.of(\"port\", 1232)),\n entry(\"message\", message)\n )\n );\n }\n\n public void testEscapesInExtension() {\n String message = \"CEF:0|security|threatmanager|1.0|100|trojan successfully stopped|10|msg=a+b\\\\=c x=c\\\\\\\\d\\\\=z\";\n Map source = new HashMap<>();\n source.put(\"message\", message);\n document = new IngestDocument(\"index\", \"id\", 1L, null, null, source);\n CefProcessor processor = new CefProcessor(\"tag\", \"description\", \"message\", \"cef\", false, true, null);\n processor.execute(document);\n assertMapsEqual(\n document.getSource(),\n Map.ofEntries(\n entry(\n \"cef\",\n Map.ofEntries(\n entry(\"version\", \"0\"),\n entry(\n \"device\",\n Map.of(\"vendor\", \"security\", \"product\", \"threatmanager\", \"version\", \"1.0\", \"event_class_id\", \"100\")\n ),\n entry(\"name\", \"trojan successfully stopped\"),\n entry(\"severity\", \"10\"),\n entry(\"extensions\", Map.of(\"x\", \"c\\\\d=z\"))\n )\n ),\n entry(\"event\", Map.of(\"code\", \"100\")),\n entry(\"observer\", Map.of(\"product\", \"threatmanager\", \"vendor\", \"security\", \"version\", \"1.0\")),\n entry(\"message\", \"a+b=c\")\n )\n );\n }\n\n public void testMalformedExtensionEscape() {\n String message = \"CEF:0|FooBar|Web Gateway|1.2.3.45.67|200|Success|2|rt=Sep 07 2018 14:50:39 cat=Access Log dst=1.1.1.1 \"\n + \"dhost=foo.example.com suser=redacted src=2.2.2.2 requestMethod=POST request='https://foo.example.com/bar/bingo/1' \"\n + \"requestClientApplication='Foo-Bar/2018.1.7; =Email:user@example.com; Guid:test=' cs1= cs1Label=Foo Bar\";\n Map source = new HashMap<>();\n source.put(\"message\", message);\n document = new IngestDocument(\"index\", \"id\", 1L, null, null, source);\n CefProcessor processor = new CefProcessor(\"tag\", \"description\", \"message\", \"cef\", false, true, null);\n processor.execute(document);\n assertMapsEqual(\n document.getSource(),\n Map.ofEntries(\n entry(\n \"cef\",\n Map.ofEntries(\n entry(\"version\", \"0\"),\n entry(\n \"device\",\n Map.of(\"vendor\", \"FooBar\", \"product\", \"Web Gateway\", \"version\", \"1.2.3.45.67\", \"event_class_id\", \"200\")\n ),\n entry(\"name\", \"Success\"),\n entry(\"severity\", \"2\"),\n entry(\"extensions\", Map.of(\"deviceCustomString1Label\", \"Foo Bar\", \"deviceEventCategory\", \"Access Log\"))\n )\n ),\n entry(\"event\", Map.of(\"code\", \"200\")),\n entry(\"observer\", Map.of(\"product\", \"Web Gateway\", \"vendor\", \"FooBar\", \"version\", \"1.2.3.45.67\")),\n entry(\"@timestamp\", ZonedDateTime.parse(\"2018-09-07T14:50:39Z\")),\n entry(\"destination\", Map.of(\"ip\", \"1.1.1.1\", \"domain\", \"foo.example.com\")),\n entry(\"source\", Map.of(\"ip\", \"2.2.2.2\", \"user\", Map.of(\"name\", \"redacted\"))),\n entry(\"http\", Map.of(\"request\", Map.of(\"method\", \"POST\"))),\n entry(\"url\", Map.of(\"original\", \"'https://foo.example.com/bar/bingo/1'\")),\n entry(\"user_agent\", Map.of(\"original\", \"'Foo-Bar/2018.1.7; =Email:user@example.com; Guid:test='\")),\n entry(\"message\", message)\n )\n );\n }\n\n public void testMultipleMalformedExtensionValues() {\n String message = \"CEF:0|vendor|product|version|event_id|name|Very-High| \"\n + \"msg=Hello World error=Failed because id==old_id user=root angle=106.7<=180\";\n Map source = new HashMap<>();\n source.put(\"message\", message);\n document = new IngestDocument(\"index\", \"id\", 1L, null, null, source);\n CefProcessor processor = new CefProcessor(\"tag\", \"description\", \"message\", \"cef\", false, true, null);\n processor.execute(document);\n assertMapsEqual(\n document.getSource(),\n Map.ofEntries(\n entry(\n \"cef\",\n Map.ofEntries(\n entry(\"version\", \"0\"),\n entry(\n \"device\",\n Map.of(\"vendor\", \"vendor\", \"product\", \"product\", \"version\", \"version\", \"event_class_id\", \"event_id\")\n ),\n entry(\"name\", \"name\"),\n entry(\"severity\", \"Very-High\"),\n entry(\"extensions\", Map.of(\"id\", \"=old_id\", \"user\", \"root\", \"angle\", \"106.7<=180\", \"error\", \"Failed because\"))\n )\n ),\n entry(\"event\", Map.of(\"code\", \"event_id\")),\n entry(\"observer\", Map.of(\"product\", \"product\", \"vendor\", \"vendor\", \"version\", \"version\")),\n entry(\"message\", \"Hello World\")\n )\n );\n }\n\n public void testPaddedMessage() {\n String message = \"CEF:0|security|threatmanager|1.0|100|message is padded|10|spt=1232 \"\n + \"msg=Trailing space in non-final extensions is preserved src=10.0.0.192 \";\n Map source = new HashMap<>();\n source.put(\"message\", message);\n document = new IngestDocument(\"index\", \"id\", 1L, null, null, source);\n CefProcessor processor = new CefProcessor(\"tag\", \"description\", \"message\", \"cef\", false, true, null);\n processor.execute(document);\n assertMapsEqual(\n document.getSource(),\n Map.ofEntries(\n entry(\n \"cef\",\n Map.ofEntries(\n entry(\"version\", \"0\"),\n entry(\n \"device\",\n Map.of(\"vendor\", \"security\", \"product\", \"threatmanager\", \"version\", \"1.0\", \"event_class_id\", \"100\")\n ),\n entry(\"name\", \"message is padded\"),\n entry(\"severity\", \"10\")\n )\n ),\n entry(\"event\", Map.of(\"code\", \"100\")),\n entry(\"observer\", Map.of(\"product\", \"threatmanager\", \"vendor\", \"security\", \"version\", \"1.0\")),\n entry(\"source\", Map.of(\"ip\", \"10.0.0.192\", \"port\", 1232)),\n entry(\"message\", \"Trailing space in non-final extensions is preserved\")\n )\n );\n }\n\n public void testCrlfMessage() {\n String message = \"CEF:0|security|threatmanager|1.0|100|message is padded|10|\"\n + \"spt=1232 msg=Trailing space in final extensions is not preserved\\t \\r\\ndpt=1234\";\n Map source = new HashMap<>();\n source.put(\"message\", message);\n document = new IngestDocument(\"index\", \"id\", 1L, null, null, source);\n CefProcessor processor = new CefProcessor(\"tag\", \"description\", \"message\", \"cef\", false, true, null);\n processor.execute(document);\n assertMapsEqual(\n document.getSource(),\n Map.ofEntries(\n entry(\n \"cef\",\n Map.ofEntries(\n entry(\"version\", \"0\"),\n entry(\n \"device\",\n Map.of(\"vendor\", \"security\", \"product\", \"threatmanager\", \"version\", \"1.0\", \"event_class_id\", \"100\")\n ),\n entry(\"name\", \"message is padded\"),\n entry(\"severity\", \"10\")\n )\n ),\n entry(\"event\", Map.of(\"code\", \"100\")),\n entry(\"observer\", Map.of(\"product\", \"threatmanager\", \"vendor\", \"security\", \"version\", \"1.0\")),\n entry(\"source\", Map.of(\"port\", 1232)),\n entry(\"message\", \"Trailing space in final extensions is not preserved\"),\n entry(\"destination\", Map.of(\"port\", 1234))\n )\n );\n }\n\n public void testTabMessage() {\n String message = \"CEF:0|security|threatmanager|1.0|100|message is padded|10|\"\n + \"spt=1232 msg=Tabs\\tand\\rcontrol\\ncharacters are preserved\\t src=127.0.0.1\";\n Map source = new HashMap<>();\n source.put(\"message\", message);\n document = new IngestDocument(\"index\", \"id\", 1L, null, null, source);\n CefProcessor processor = new CefProcessor(\"tag\", \"description\", \"message\", \"cef\", false, true, null);\n processor.execute(document);\n assertMapsEqual(\n document.getSource(),\n Map.ofEntries(\n entry(\n \"cef\",\n Map.ofEntries(\n entry(\"version\", \"0\"),\n entry(\n \"device\",\n Map.of(\"vendor\", \"security\", \"product\", \"threatmanager\", \"version\", \"1.0\", \"event_class_id\", \"100\")\n ),\n entry(\"name\", \"message is padded\"),\n entry(\"severity\", \"10\")\n )\n ),\n entry(\"event\", Map.of(\"code\", \"100\")),\n entry(\"observer\", Map.of(\"product\", \"threatmanager\", \"vendor\", \"security\", \"version\", \"1.0\")),\n entry(\"source\", Map.of(\"port\", 1232, \"ip\", \"127.0.0.1\")),\n entry(\"message\", \"Tabs\\tand\\rcontrol\\ncharacters are preserved\")\n )\n );\n }\n\n public void testTabNoSepMessage() {\n String message = \"CEF:0|security|threatmanager|1.0|100|message has tabs|10|spt=1232 msg=Tab is not a separator\\tsrc=127.0.0.1\";\n Map source = new HashMap<>();\n source.put(\"message\", message);\n document = new IngestDocument(\"index\", \"id\", 1L, null, null, source);\n CefProcessor processor = new CefProcessor(\"tag\", \"description\", \"message\", \"cef\", false, true, null);\n processor.execute(document);\n assertMapsEqual(\n document.getSource(),\n Map.ofEntries(\n entry(\n \"cef\",\n Map.ofEntries(\n entry(\"version\", \"0\"),\n entry(\n \"device\",\n Map.of(\"vendor\", \"security\", \"product\", \"threatmanager\", \"version\", \"1.0\", \"event_class_id\", \"100\")\n ),\n entry(\"name\", \"message has tabs\"),\n entry(\"severity\", \"10\")\n )\n ),\n entry(\"event\", Map.of(\"code\", \"100\")),\n entry(\"observer\", Map.of(\"product\", \"threatmanager\", \"vendor\", \"security\", \"version\", \"1.0\")),\n entry(\"source\", Map.of(\"port\", 1232, \"ip\", \"127.0.0.1\")),\n entry(\"message\", \"Tab is not a separator\")\n )\n );\n }\n\n public void testEscapedMessage() {\n String message = \"CEF:0|security\\\\compliance|threat\\\\|->manager|1.0|100|message contains escapes|10|\"\n + \"spt=1232 msg=Newlines in messages\\\\\\nare allowed.\\\\\\r\\\\\\nAnd so are carriage feeds\\\\\\\\newlines\\\\\\\\\\\\=. dpt=4432\";\n Map source = new HashMap<>();\n source.put(\"message\", message);\n document = new IngestDocument(\"index\", \"id\", 1L, null, null, source);\n CefProcessor processor = new CefProcessor(\"tag\", \"description\", \"message\", \"cef\", false, true, null);\n processor.execute(document);\n assertMapsEqual(\n document.getSource(),\n Map.ofEntries(\n entry(\n \"cef\",\n Map.ofEntries(\n entry(\"version\", \"0\"),\n entry(\n \"device\",\n Map.ofEntries(\n entry(\"vendor\", \"security\\\\compliance\"),\n entry(\"product\", \"threat|->manager\"),\n entry(\"version\", \"1.0\"),\n entry(\"event_class_id\", \"100\")\n )\n ),\n entry(\"name\", \"message contains escapes\"),\n entry(\"severity\", \"10\")\n )\n ),\n entry(\"event\", Map.of(\"code\", \"100\")),\n entry(\"observer\", Map.of(\"product\", \"threat|->manager\", \"vendor\", \"security\\\\compliance\", \"version\", \"1.0\")),\n entry(\"source\", Map.of(\"port\", 1232)),\n entry(\"message\", \"Newlines in messages\\nare allowed.\\r\\nAnd so are carriage feeds\\\\newlines\\\\=.\"),\n entry(\"destination\", Map.of(\"port\", 4432))\n )\n );\n }\n\n public void testTruncatedHeader() {\n String message = \"CEF:0|SentinelOne|Mgmt|activityID=1111111111111111111 activityType=3505 \"\n + \"siteId=None siteName=None accountId=1222222222222222222 accountName=foo-bar mdr notificationScope=ACCOUNT\";\n Map source = new HashMap<>();\n source.put(\"message\", message);\n document = new IngestDocument(\"index\", \"id\", 1L, null, null, source);\n CefProcessor processor = new CefProcessor(\"tag\", \"description\", \"message\", \"cef\", false, true, null);\n processor.execute(document);\n assertMapsEqual(\n document.getSource(),\n Map.ofEntries(\n entry(\n \"cef\",\n Map.ofEntries(\n entry(\"version\", \"0\"),\n entry(\"device\", Map.of(\"vendor\", \"SentinelOne\", \"product\", \"Mgmt\")),\n entry(\n \"extensions\",\n Map.ofEntries(\n entry(\"activityID\", \"1111111111111111111\"),\n entry(\"activityType\", \"3505\"),\n entry(\"siteId\", \"None\"),\n entry(\"siteName\", \"None\"),\n entry(\"accountId\", \"1222222222222222222\"),\n entry(\"accountName\", \"foo-bar mdr\"),\n entry(\"notificationScope\", \"ACCOUNT\")\n )\n )\n )\n ),\n entry(\"observer\", Map.of(\"product\", \"Mgmt\", \"vendor\", \"SentinelOne\")),\n entry(\"message\", message),\n entry(\"error\", Map.of(\"message\", Set.of(\"incomplete CEF header\")))\n )\n );\n }\n\n public void testIgnoreEmptyValuesInExtension() {\n String message = \"CEF:26|security|threat=manager|1.0|100|trojan successfully stopped|10|src= dst=12.121.122.82 spt=\";\n Map source = new HashMap<>();\n source.put(\"message\", message);\n document = new IngestDocument(\"index\", \"id\", 1L, null, null, source);\n CefProcessor processor = new CefProcessor(\"tag\", \"description\", \"message\", \"cef\", false, true, null);\n processor.execute(document);\n assertMapsEqual(\n document.getSource(),\n Map.ofEntries(\n entry(\n \"cef\",\n Map.ofEntries(\n entry(\"version\", \"26\"),\n entry(\n \"device\",\n Map.of(\"vendor\", \"security\", \"product\", \"threat=manager\", \"version\", \"1.0\", \"event_class_id\", \"100\")\n ),\n entry(\"name\", \"trojan successfully stopped\"),\n entry(\"severity\", \"10\")\n )\n ),\n entry(\"event\", Map.of(\"code\", \"100\")),\n entry(\"observer\", Map.of(\"product\", \"threat=manager\", \"vendor\", \"security\", \"version\", \"1.0\")),\n entry(\"destination\", Map.of(\"ip\", \"12.121.122.82\")),\n entry(\"message\", message)\n )\n );\n }\n\n public void testHyphenInExtensionKey() {\n String message = \"CEF:26|security|threatmanager|1.0|100|trojan successfully stopped|10|Some-Key=123456\";\n Map source = new HashMap<>();\n source.put(\"message\", message);\n document = new IngestDocument(\"index\", \"id\", 1L, null, null, source);\n CefProcessor processor = new CefProcessor(\"tag\", \"description\", \"message\", \"cef\", false, true, null);\n processor.execute(document);\n assertMapsEqual(\n document.getSource(),\n Map.ofEntries(\n entry(\n \"cef\",\n Map.ofEntries(\n entry(\"version\", \"26\"),\n entry(\n \"device\",\n Map.of(\"vendor\", \"security\", \"product\", \"threatmanager\", \"version\", \"1.0\", \"event_class_id\", \"100\")\n ),\n entry(\"name\", \"trojan successfully stopped\"),\n entry(\"severity\", \"10\"),\n entry(\"extensions\", Map.of(\"Some-Key\", \"123456\"))\n )\n ),\n entry(\"event\", Map.of(\"code\", \"100\")),\n entry(\"observer\", Map.of(\"product\", \"threatmanager\", \"vendor\", \"security\", \"version\", \"1.0\")),\n entry(\"message\", message)\n )\n );\n }\n\n public void testAllFieldsInExtension() {\n String message = \"CEF:0|security|threatmanager|1.0|100|trojan successfully stopped|10|\"\n + \"agt=192.168.0.1 agentDnsDomain=example.com ahost=agentHost aid=agentId amac=00:0a:95:9d:68:16 agentNtDomain=example.org \"\n + \"art=1622547800000 atz=UTC agentTranslatedAddress=10.0.0.1 agentTranslatedZoneExternalID=ext123 agentTranslatedZoneURI=uri \"\n + \"at=agentType av=1.0 agentZoneExternalID=zoneExtId agentZoneURI=zoneUri app=HTTP cnt=1234 in=5678 out=91011 \"\n + \"customerExternalID=custExtId customerURI=custUri dst=192.168.0.2 dlat=37.7749 dlong=-122.4194 \"\n + \"dhost=destHost dmac=00:0a:95:9d:68:16 dntdom=destNtDomain dpt=80 dpid=1234 \"\n + \"dproc=destProc destinationServiceName=destService \"\n + \"destinationTranslatedAddress=10.0.0.2 destinationTranslatedPort=8080 destinationTranslatedZoneExternalID=destExtId \"\n + \"destinationTranslatedZoneURI=destUri duid=destUserId duser=destUser dpriv=admin destinationZoneExternalID=destZoneExtId \"\n + \"destinationZoneURI=destZoneUri act=blocked dvc=192.168.0.3 cfp1Label=cfp1Label cfp3Label=cfp3Label cfp4Label=cfp4Label \"\n + \"deviceCustomDate1=1622547800000 deviceCustomDate1Label=customDate1Label deviceCustomDate2=1622547900000 \"\n + \"deviceCustomDate2Label=customDate2Label cfp1=1.23 cfp2=2.34 cfp2Label=cfp2Label cfp3=3.45 cfp4=4.56 c6a1=2001:db8::1 \"\n + \"c6a1Label=c6a1Label c6a2=2001:db8::2 c6a2Label=c6a2Label c6a3=2001:db8::3 c6a3Label=c6a3Label c6a4=2001:db8::4 \"\n + \"C6a4Label=c6a4Label cn1=123 cn1Label=cn1Label cn2=234 cn2Label=cn2Label cn3=345 cn3Label=cn3Label cs1=customString1 \"\n + \"cs1Label=cs1Label cs2=customString2 cs2Label=cs2Label cs3=customString3 cs3Label=cs3Label \"\n + \"cs4=customString4 cs4Label=cs4Label \"\n + \"cs5=customString5 cs5Label=cs5Label cs6=customString6 cs6Label=cs6Label deviceDirection=inbound deviceDnsDomain=example.com \"\n + \"cat=category deviceExternalId=extId deviceFacility=16 dvchost=host1 deviceInboundInterface=eth0 dvcmac=00:0a:95:9d:68:16 \"\n + \"deviceNtDomain=example.org deviceOutboundInterface=eth1 devicePayloadId=payloadId dvcpid=5678 deviceProcessName=procName \"\n + \"rt=1622547800000 dtz=UTC deviceTranslatedAddress=10.0.0.3 deviceTranslatedZoneExternalID=transExtId \"\n + \"deviceTranslatedZoneURI=transUri deviceZoneExternalID=zoneExtId deviceZoneURI=zoneUri end=1622547900000 eventId=evt123 \"\n + \"outcome=success externalId=extId fileCreateTime=1622547800000 fileHash=abcd1234 fileId=5678 \"\n + \"fileModificationTime=1622547900000 \"\n + \"fname=file.txt filePath=/path/to/file filePermission=rw-r--r-- fsize=1024 fileType=txt flexDate1=1622547800000 \"\n + \"flexDate1Label=flexDate1Label flexString1=flexString1 flexString2=flexString2 flexString1Label=flexString1Label \"\n + \"flexString2Label=flexString2Label msg=message oldFileCreateTime=1622547800000 oldFileHash=oldHash oldFileId=oldId \"\n + \"oldFileModificationTime=1622547900000 oldFileName=oldFile oldFilePath=/old/path \"\n + \"oldFilePermission=rw-r--r-- oldFileSize=2048 \"\n + \"oldFileType=oldType rawEvent=rawEvent reason=reason requestClientApplication=Mozilla requestContext=referrer \"\n + \"requestCookies=cookies requestMethod=GET request=url src=192.168.0.4 sourceDnsDomain=sourceDomain \"\n + \"slat=37.7749 slong=-122.4194 \"\n + \"shost=sourceHost smac=00:0a:95:9d:68:16 sntdom=sourceNtDomain spt=443 spid=1234 \"\n + \"sproc=sourceProc sourceServiceName=sourceService \"\n + \"sourceTranslatedAddress=10.0.0.4 sourceTranslatedPort=8081 sourceTranslatedZoneExternalID=sourceExtId \"\n + \"sourceTranslatedZoneURI=sourceUri suid=sourceUserId suser=sourceUser spriv=sourcePriv sourceZoneExternalID=sourceZoneExtId \"\n + \"sourceZoneURI=sourceZoneUri start=1622547800000 proto=TCP type=1 catdt=catDeviceType mrt=1622547800000\";\n Map source = new HashMap<>();\n source.put(\"message\", message);\n document = new IngestDocument(\"index\", \"id\", 1L, null, null, source);\n CefProcessor processor = new CefProcessor(\"tag\", \"description\", \"message\", \"cef\", false, true, null);\n processor.execute(document);\n assertMapsEqual(\n document.getSource(),\n Map.ofEntries(\n entry(\n \"cef\",\n Map.ofEntries(\n entry(\"version\", \"0\"),\n entry(\n \"device\",\n Map.of(\"vendor\", \"security\", \"product\", \"threatmanager\", \"version\", \"1.0\", \"event_class_id\", \"100\")\n ),\n entry(\"name\", \"trojan successfully stopped\"),\n entry(\"severity\", \"10\"),\n entry(\n \"extensions\",\n Map.ofEntries(\n entry(\"deviceNtDomain\", \"example.org\"),\n entry(\"agentZoneExternalID\", \"zoneExtId\"),\n entry(\"agentTimeZone\", \"UTC\"),\n entry(\"deviceCustomIPv6Address1Label\", \"c6a1Label\"),\n entry(\"deviceCustomString1\", \"customString1\"),\n entry(\"deviceCustomIPv6Address2Label\", \"c6a2Label\"),\n entry(\"deviceCustomNumber3\", 345L),\n entry(\"deviceCustomFloatingPoint1\", 1.23f),\n entry(\"deviceCustomNumber2\", 234L),\n entry(\"deviceCustomFloatingPoint2\", 2.34f),\n entry(\"deviceCustomFloatingPoint3\", 3.45f),\n entry(\"deviceCustomFloatingPoint4\", 4.56f),\n entry(\"flexDate1\", ZonedDateTime.parse(\"2021-06-01T11:43:20Z\")),\n entry(\"destinationTranslatedZoneExternalID\", \"destExtId\"),\n entry(\"deviceCustomNumber1\", 123L),\n entry(\"deviceEventCategory\", \"category\"),\n entry(\"deviceCustomString6Label\", \"cs6Label\"),\n entry(\"deviceCustomNumber2Label\", \"cn2Label\"),\n entry(\"flexString1Label\", \"flexString1Label\"),\n entry(\"deviceCustomString5Label\", \"cs5Label\"),\n entry(\"agentZoneURI\", \"zoneUri\"),\n entry(\"deviceCustomString2Label\", \"cs2Label\"),\n entry(\"deviceCustomDate2Label\", \"customDate2Label\"),\n entry(\"deviceCustomNumber1Label\", \"cn1Label\"),\n entry(\"oldFileType\", \"oldType\"),\n entry(\"destinationZoneExternalID\", \"destZoneExtId\"),\n entry(\"categoryDeviceType\", \"catDeviceType\"),\n entry(\"deviceZoneURI\", \"zoneUri\"),\n entry(\"sourceTranslatedZoneExternalID\", \"sourceExtId\"),\n entry(\"agentTranslatedAddress\", \"10.0.0.1\"),\n entry(\"requestCookies\", \"cookies\"),\n entry(\"deviceCustomIPv6Address3\", \"2001:db8::3\"),\n entry(\"oldFilePath\", \"/old/path\"),\n entry(\"deviceCustomIPv6Address2\", \"2001:db8::2\"),\n entry(\"deviceCustomIPv6Address1\", \"2001:db8::1\"),\n entry(\"oldFileId\", \"oldId\"),\n entry(\"deviceTranslatedZoneExternalID\", \"transExtId\"),\n entry(\"deviceCustomFloatingPoint2Label\", \"cfp2Label\"),\n entry(\"deviceTranslatedZoneURI\", \"transUri\"),\n entry(\"deviceCustomIPv6Address4Label\", \"c6a4Label\"),\n entry(\"agentTranslatedZoneURI\", \"uri\"),\n entry(\"oldFilePermission\", \"rw-r--r--\"),\n entry(\"deviceCustomIPv6Address4\", \"2001:db8::4\"),\n entry(\"sourceZoneURI\", \"sourceZoneUri\"),\n entry(\"deviceCustomFloatingPoint3Label\", \"cfp3Label\"),\n entry(\"agentTranslatedZoneExternalID\", \"ext123\"),\n entry(\"destinationZoneURI\", \"destZoneUri\"),\n entry(\"flexDate1Label\", \"flexDate1Label\"),\n entry(\"agentNtDomain\", \"example.org\"),\n entry(\"deviceCustomDate2\", ZonedDateTime.parse(\"2021-06-01T11:45Z\")),\n entry(\"deviceCustomDate1\", ZonedDateTime.parse(\"2021-06-01T11:43:20Z\")),\n entry(\"deviceCustomString3Label\", \"cs3Label\"),\n entry(\"deviceCustomDate1Label\", \"customDate1Label\"),\n entry(\"destinationTranslatedZoneURI\", \"destUri\"),\n entry(\"oldFileModificationTime\", ZonedDateTime.parse(\"2021-06-01T11:45Z\")),\n entry(\"deviceCustomFloatingPoint1Label\", \"cfp1Label\"),\n entry(\"deviceCustomIPv6Address3Label\", \"c6a3Label\"),\n entry(\"deviceCustomFloatingPoint4Label\", \"cfp4Label\"),\n entry(\"oldFileSize\", 2048),\n entry(\"externalId\", \"extId\"),\n entry(\"baseEventCount\", 1234),\n entry(\"flexString2\", \"flexString2\"),\n entry(\"deviceCustomNumber3Label\", \"cn3Label\"),\n entry(\"flexString1\", \"flexString1\"),\n entry(\"deviceCustomString4Label\", \"cs4Label\"),\n entry(\"flexString2Label\", \"flexString2Label\"),\n entry(\"deviceCustomString3\", \"customString3\"),\n entry(\"deviceCustomString2\", \"customString2\"),\n entry(\"deviceCustomString1Label\", \"cs1Label\"),\n entry(\"deviceCustomString5\", \"customString5\"),\n entry(\"deviceCustomString4\", \"customString4\"),\n entry(\"deviceZoneExternalID\", \"zoneExtId\"),\n entry(\"deviceCustomString6\", \"customString6\"),\n entry(\"oldFileName\", \"oldFile\"),\n entry(\"sourceZoneExternalID\", \"sourceZoneExtId\"),\n entry(\"oldFileHash\", \"oldHash\"),\n entry(\"sourceTranslatedZoneURI\", \"sourceUri\"),\n entry(\"oldFileCreateTime\", ZonedDateTime.parse(\"2021-06-01T11:43:20Z\"))\n )\n )\n )\n ),\n entry(\"host\", Map.of(\"nat\", Map.of(\"ip\", \"10.0.0.3\"))),\n entry(\"log\", Map.of(\"syslog\", Map.of(\"facility\", Map.of(\"code\", 16L)))),\n entry(\n \"observer\",\n Map.ofEntries(\n entry(\"ingress\", Map.of(\"interface\", Map.of(\"name\", \"eth0\"))),\n entry(\"registered_domain\", \"example.com\"),\n entry(\"product\", \"threatmanager\"),\n entry(\"hostname\", \"host1\"),\n entry(\"vendor\", \"security\"),\n entry(\"ip\", \"192.168.0.3\"),\n entry(\"name\", \"extId\"),\n entry(\"version\", \"1.0\"),\n entry(\"mac\", \"00:0a:95:9d:68:16\"),\n entry(\"egress\", Map.of(\"interface\", Map.of(\"name\", \"eth1\")))\n )\n ),\n entry(\n \"agent\",\n Map.ofEntries(\n entry(\"ip\", \"192.168.0.1\"),\n entry(\"name\", \"example.com\"),\n entry(\"id\", \"agentId\"),\n entry(\"type\", \"agentType\"),\n entry(\"version\", \"1.0\"),\n entry(\"mac\", \"00:0a:95:9d:68:16\")\n )\n ),\n entry(\"process\", Map.of(\"name\", \"procName\", \"pid\", 5678L)),\n entry(\n \"destination\",\n Map.ofEntries(\n entry(\"nat\", Map.of(\"port\", 8080, \"ip\", \"10.0.0.2\")),\n entry(\"geo\", Map.of(\"location\", Map.of(\"lon\", -122.4194, \"lat\", 37.7749))),\n entry(\"registered_domain\", \"destNtDomain\"),\n entry(\"process\", Map.of(\"name\", \"destProc\", \"pid\", 1234L)),\n entry(\"port\", 80),\n entry(\"bytes\", 91011L),\n entry(\"service\", Map.of(\"name\", \"destService\")),\n entry(\"domain\", \"destHost\"),\n entry(\"ip\", \"192.168.0.2\"),\n entry(\"user\", Map.of(\"name\", \"destUser\", \"id\", \"destUserId\", \"group\", Map.of(\"name\", \"admin\"))),\n entry(\"mac\", \"00:0a:95:9d:68:16\")\n )\n ),\n entry(\n \"source\",\n Map.ofEntries(\n entry(\"geo\", Map.of(\"location\", Map.of(\"lon\", -122.4194, \"lat\", 37.7749))),\n entry(\"nat\", Map.of(\"port\", 8081, \"ip\", \"10.0.0.4\")),\n entry(\"registered_domain\", \"sourceNtDomain\"),\n entry(\"process\", Map.of(\"name\", \"sourceProc\", \"pid\", 1234L)),\n entry(\"port\", 443),\n entry(\"service\", Map.of(\"name\", \"sourceService\")),\n entry(\"bytes\", 5678L),\n entry(\"ip\", \"192.168.0.4\"),\n entry(\"domain\", \"sourceDomain\"),\n entry(\"user\", Map.of(\"name\", \"sourceUser\", \"id\", \"sourceUserId\", \"group\", Map.of(\"name\", \"sourcePriv\"))),\n entry(\"mac\", \"00:0a:95:9d:68:16\")\n )\n ),\n entry(\"message\", \"message\"),\n entry(\"url\", Map.of(\"original\", \"url\")),\n entry(\"network\", Map.of(\"protocol\", \"HTTP\", \"transport\", \"TCP\", \"direction\", \"inbound\")),\n entry(\n \"file\",\n Map.ofEntries(\n entry(\"inode\", \"5678\"),\n entry(\"path\", \"/path/to/file\"),\n entry(\"size\", 1024L),\n entry(\"created\", ZonedDateTime.parse(\"2021-06-01T11:43:20Z\")),\n entry(\"name\", \"file.txt\"),\n entry(\"mtime\", ZonedDateTime.parse(\"2021-06-01T11:45Z\")),\n entry(\"type\", \"txt\"),\n entry(\"hash\", \"abcd1234\"),\n entry(\"group\", \"rw-r--r--\")\n )\n ),\n entry(\"@timestamp\", ZonedDateTime.parse(\"2021-06-01T11:43:20Z\")),\n entry(\"organization\", Map.of(\"name\", \"custUri\", \"id\", \"custExtId\")),\n entry(\n \"event\",\n Map.ofEntries(\n entry(\"action\", \"blocked\"),\n entry(\"timezone\", \"UTC\"),\n entry(\"end\", ZonedDateTime.parse(\"2021-06-01T11:45Z\")),\n entry(\"id\", \"evt123\"),\n entry(\"outcome\", \"success\"),\n entry(\"start\", ZonedDateTime.parse(\"2021-06-01T11:43:20Z\")),\n entry(\"reason\", \"reason\"),\n entry(\"ingested\", ZonedDateTime.parse(\"2021-06-01T11:43:20Z\")),\n entry(\"kind\", 1),\n entry(\"original\", \"rawEvent\"),\n entry(\"created\", ZonedDateTime.parse(\"2021-06-01T11:43:20Z\")),\n entry(\"code\", \"100\")\n )\n ),\n entry(\"user_agent\", Map.of(\"original\", \"Mozilla\")),\n entry(\"http\", Map.of(\"request\", Map.of(\"referrer\", \"referrer\", \"method\", \"GET\")))\n )\n );\n }\n\n // Date parsing tests\n public void testToTimestampWithUnixTimestamp() {\n CefParser parser = new CefParser(ZoneId.of(\"UTC\"), false);\n String unixTimestamp = \"1633072800000\"; // Example Unix timestamp in milliseconds\n ZonedDateTime expected = ZonedDateTime.ofInstant(Instant.ofEpochMilli(Long.parseLong(unixTimestamp)), ZoneId.of(\"UTC\"));\n ZonedDateTime result = parser.toTimestamp(unixTimestamp);\n assertEquals(expected, result);\n }\n\n public void testToTimestampWithFormattedDate() {\n CefParser parser = new CefParser(ZoneId.of(\"Europe/Stockholm\"), false);\n String formattedDate = \"Oct 01 2021 12:00:00 UTC\"; // Example formatted date\n ZonedDateTime expected = ZonedDateTime.parse(\"2021-10-01T14:00+02:00[Europe/Stockholm]\");\n ZonedDateTime result = parser.toTimestamp(formattedDate);\n assertEquals(expected, result);\n }\n\n public void testToTimestampWithFormattedDateWithoutYear() {\n CefParser parser = new CefParser(ZoneId.of(\"UTC\"), false);\n String formattedDate = \"Oct 01 12:00:00 UTC\"; // Example formatted date without year\n int currentYear = ZonedDateTime.now(ZoneId.of(\"UTC\")).getYear();\n ZonedDateTime expected = ZonedDateTime.parse(currentYear + \"-10-01T12:00:00Z[UTC]\");\n ZonedDateTime result = parser.toTimestamp(formattedDate);\n assertEquals(expected, result);\n }\n\n public void testToTimestampWithFormattedDateWithoutTimezone() {\n CefParser parser = new CefParser(ZoneId.of(\"UTC\"), false);\n String formattedDate = \"Sep 07 2018 14:50:39\"; // Example formatted date without year\n ZonedDateTime expected = ZonedDateTime.parse(\"2018-09-07T14:50:39Z[UTC]\");\n ZonedDateTime result = parser.toTimestamp(formattedDate);\n assertEquals(expected, result);\n }\n\n public void testToTimestampWithInvalidDate() {\n CefParser parser = new CefParser(ZoneId.of(\"UTC\"), false);\n String invalidDate = \"invalid date\";\n assertThrows(IllegalArgumentException.class, () -> parser.toTimestamp(invalidDate));\n }\n\n public void testToMacAddressWithSeparators() {\n CefParser parser = new CefParser(ZoneId.of(\"UTC\"), false);\n List macAddresses = List.of(\n // EUI-48 (with separators).\n \"00:0D:60:AF:1B:61\",\n \"00-0D-60-AF-1B-61\",\n \"000D.60AF.1B61\",\n\n // EUI-64 (with separators).\n \"00:0D:60:FF:FE:AF:1B:61\",\n \"00-0D-60-FF-FE-AF-1B-61\",\n \"000D.60FF.FEAF.1B61\"\n );\n macAddresses.forEach(macAddress -> {\n String result = parser.toMACAddress(macAddress);\n assertEquals(macAddress, result);\n });\n }\n\n public void testEUI48ToMacAddressWithOutSeparators() {\n CefParser parser = new CefParser(ZoneId.of(\"UTC\"), false);\n String macAddress = \"000D60AF1B61\";\n String result = parser.toMACAddress(macAddress);\n assertEquals(\"00:0D:60:AF:1B:61\", result);\n }\n\n public void testEUI64ToMacAddressWithOutSeparators() {\n CefParser parser = new CefParser(ZoneId.of(\"UTC\"), false);\n String macAddress = \"000D60FFFEAF1B61\";\n String result = parser.toMACAddress(macAddress);\n assertEquals(\"00:0D:60:FF:FE:AF:1B:61\", result);\n }\n\n public void toIP_validIPv4Address() {\n CefParser parser = new CefParser(ZoneId.of(\"UTC\"), true);\n String result = parser.toIP(\"192.168.1.1\");\n assertEquals(\"192.168.1.1\", result);\n }\n\n public void toIP_validIPv6Address() {\n CefParser parser = new CefParser(ZoneId.of(\"UTC\"), true);\n String result = parser.toIP(\"2001:0db8:85a3:0000:0000:8a2e:0370:7334\");\n assertEquals(\"2001:db8:85a3::8a2e:370:7334\", result);\n }\n\n public void toIP_invalidIPAddress() {\n CefParser parser = new CefParser(ZoneId.of(\"UTC\"), true);\n IllegalArgumentException exception = assertThrows(IllegalArgumentException.class, () -> parser.toIP(\"invalid_ip\"));\n assertEquals(\"Invalid IP address format\", exception.getMessage());\n }\n\n public void toIP_emptyString() {\n CefParser parser = new CefParser(ZoneId.of(\"UTC\"), true);\n IllegalArgumentException exception = assertThrows(IllegalArgumentException.class, () -> parser.toIP(\"\"));\n assertEquals(\"Invalid IP address format\", exception.getMessage());\n }\n\n public void toIP_nullString() {", - "repo_full_name": "elastic/elasticsearch", - "discussion_comments": [ - { - "comment_id": "2027497237", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 122491, - "pr_file": "modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/CefProcessorTests.java", - "discussion_id": "2027497237", - "commented_code": "@@ -0,0 +1,1104 @@\n+/*\n+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one\n+ * or more contributor license agreements. Licensed under the \"Elastic License\n+ * 2.0\", the \"GNU Affero General Public License v3.0 only\", and the \"Server Side\n+ * Public License v 1\"; you may not use this file except in compliance with, at\n+ * your election, the \"Elastic License 2.0\", the \"GNU Affero General Public\n+ * License v3.0 only\", or the \"Server Side Public License, v 1\".\n+ */\n+\n+package org.elasticsearch.ingest.common;\n+\n+import org.elasticsearch.ingest.IngestDocument;\n+import org.elasticsearch.test.ESTestCase;\n+import org.junit.Before;\n+\n+import java.time.Instant;\n+import java.time.ZoneId;\n+import java.time.ZonedDateTime;\n+import java.util.HashMap;\n+import java.util.List;\n+import java.util.Map;\n+import java.util.Set;\n+\n+import static java.util.Map.entry;\n+import static org.hamcrest.Matchers.containsInAnyOrder;\n+import static org.hamcrest.Matchers.equalTo;\n+\n+public class CefProcessorTests extends ESTestCase {\n+\n+ private IngestDocument document;\n+\n+ @Before\n+ public void setUp() throws Exception {\n+ super.setUp();\n+ }\n+\n+ public void testExecute() {\n+ Map source = new HashMap<>();\n+ String message = \"CEF:0|Elastic|Vaporware|1.0.0-alpha|18|Web request|low|eventId=3457 requestMethod=POST \"\n+ + \"slat=38.915 slong=-77.511 proto=TCP sourceServiceName=httpd requestContext=https://www.google.com \"\n+ + \"src=89.160.20.156 spt=33876 dst=192.168.10.1 dpt=443 request=https://www.example.com/cart\";\n+ source.put(\"message\", message);\n+ document = new IngestDocument(\"index\", \"id\", 1L, null, null, source);\n+ CefProcessor processor = new CefProcessor(\"tag\", \"description\", \"message\", \"cef\", false, true, null);\n+ processor.execute(document);\n+ assertMapsEqual(\n+ document.getSource(),\n+ Map.ofEntries(\n+ entry(\n+ \"cef\",\n+ Map.ofEntries(\n+ entry(\"version\", \"0\"),\n+ entry(\n+ \"device\",\n+ Map.of(\"vendor\", \"Elastic\", \"product\", \"Vaporware\", \"version\", \"1.0.0-alpha\", \"event_class_id\", \"18\")\n+ ),\n+ entry(\"name\", \"Web request\"),\n+ entry(\"severity\", \"low\")\n+ )\n+ ),\n+ entry(\"observer\", Map.of(\"product\", \"Vaporware\", \"vendor\", \"Elastic\", \"version\", \"1.0.0-alpha\")),\n+ entry(\"event\", Map.of(\"id\", \"3457\", \"code\", \"18\")),\n+ entry(\n+ \"source\",\n+ Map.ofEntries(\n+ entry(\"ip\", \"89.160.20.156\"),\n+ entry(\"port\", 33876),\n+ entry(\"geo\", Map.of(\"location\", Map.of(\"lon\", -77.511, \"lat\", 38.915))),\n+ entry(\"service\", Map.of(\"name\", \"httpd\"))\n+ )\n+ ),\n+ entry(\"destination\", Map.of(\"ip\", \"192.168.10.1\", \"port\", 443)),\n+ entry(\"http\", Map.of(\"request\", Map.of(\"method\", \"POST\", \"referrer\", \"https://www.google.com\"))),\n+ entry(\"network\", Map.of(\"transport\", \"TCP\")),\n+ entry(\"url\", Map.of(\"original\", \"https://www.example.com/cart\")),\n+ entry(\"message\", message)\n+ )\n+ );\n+ }\n+\n+ public void testInvalidCefFormat() {\n+ Map invalidSource = new HashMap<>();\n+ invalidSource.put(\"message\", \"Invalid CEF message\");\n+ IngestDocument invalidIngestDocument = new IngestDocument(\"index\", \"id\", 1L, null, null, invalidSource);\n+ CefProcessor processor = new CefProcessor(\"tag\", \"description\", \"message\", \"cef\", false, true, null);\n+ expectThrows(IllegalArgumentException.class, () -> processor.execute(invalidIngestDocument));\n+ }\n+\n+ public void testStandardMessage() {\n+ String message = \"CEF:26|security|threatmanager|1.0|100|trojan successfully stopped|10|\"\n+ + \"src=10.0.0.192 dst=12.121.122.82 spt=1232 eventId=1 in=4294967296 out=4294967296\";\n+ Map source = new HashMap<>();\n+ source.put(\"message\", message);\n+ document = new IngestDocument(\"index\", \"id\", 1L, null, null, source);\n+ CefProcessor processor = new CefProcessor(\"tag\", \"description\", \"message\", \"cef\", false, true, null);\n+ processor.execute(document);\n+ assertMapsEqual(\n+ document.getSource(),\n+ Map.ofEntries(\n+ entry(\n+ \"cef\",\n+ Map.ofEntries(\n+ entry(\"version\", \"26\"),\n+ entry(\n+ \"device\",\n+ Map.of(\"vendor\", \"security\", \"product\", \"threatmanager\", \"version\", \"1.0\", \"event_class_id\", \"100\")\n+ ),\n+ entry(\"name\", \"trojan successfully stopped\"),\n+ entry(\"severity\", \"10\")\n+ )\n+ ),\n+ entry(\"observer\", Map.of(\"product\", \"threatmanager\", \"vendor\", \"security\", \"version\", \"1.0\")),\n+ entry(\"source\", Map.of(\"ip\", \"10.0.0.192\", \"port\", 1232, \"bytes\", 4294967296L)),\n+ entry(\"destination\", Map.of(\"ip\", \"12.121.122.82\", \"bytes\", 4294967296L)),\n+ entry(\"event\", Map.of(\"id\", \"1\", \"code\", \"100\")),\n+ entry(\"message\", message)\n+ )\n+ );\n+ }\n+\n+ public void testHeaderOnly() {\n+ String message = \"CEF:26|security|threatmanager|1.0|100|trojan successfully stopped|10|\";\n+ Map source = new HashMap<>();\n+ source.put(\"message\", message);\n+ document = new IngestDocument(\"index\", \"id\", 1L, null, null, source);\n+ CefProcessor processor = new CefProcessor(\"tag\", \"description\", \"message\", \"cef\", false, true, null);\n+ processor.execute(document);\n+ assertMapsEqual(\n+ document.getSource(),\n+ Map.ofEntries(\n+ entry(\n+ \"cef\",\n+ Map.ofEntries(\n+ entry(\"version\", \"26\"),\n+ entry(\n+ \"device\",\n+ Map.of(\"vendor\", \"security\", \"product\", \"threatmanager\", \"version\", \"1.0\", \"event_class_id\", \"100\")\n+ ),\n+ entry(\"name\", \"trojan successfully stopped\"),\n+ entry(\"severity\", \"10\")\n+ )\n+ ),\n+ entry(\"event\", Map.of(\"code\", \"100\")),\n+ entry(\"observer\", Map.of(\"product\", \"threatmanager\", \"vendor\", \"security\", \"version\", \"1.0\")),\n+ entry(\"message\", message)\n+ )\n+ );\n+ }\n+\n+ public void testEmptyDeviceFields() {\n+ String message = \"CEF:0|||1.0|100|trojan successfully stopped|10|src=10.0.0.192 dst=12.121.122.82 spt=1232\";\n+ Map source = new HashMap<>();\n+ source.put(\"message\", message);\n+ document = new IngestDocument(\"index\", \"id\", 1L, null, null, source);\n+ CefProcessor processor = new CefProcessor(\"tag\", \"description\", \"message\", \"cef\", false, true, null);\n+ processor.execute(document);\n+ assertMapsEqual(\n+ document.getSource(),\n+ Map.ofEntries(\n+ entry(\n+ \"cef\",\n+ Map.ofEntries(\n+ entry(\"version\", \"0\"),\n+ entry(\"device\", Map.of(\"vendor\", \"\", \"product\", \"\", \"version\", \"1.0\", \"event_class_id\", \"100\")),\n+ entry(\"name\", \"trojan successfully stopped\"),\n+ entry(\"severity\", \"10\")\n+ )\n+ ),\n+ entry(\"event\", Map.of(\"code\", \"100\")),\n+ entry(\"observer\", Map.of(\"product\", \"\", \"vendor\", \"\", \"version\", \"1.0\")),\n+ entry(\"source\", Map.of(\"ip\", \"10.0.0.192\", \"port\", 1232)),\n+ entry(\"destination\", Map.of(\"ip\", \"12.121.122.82\")),\n+ entry(\"message\", message)\n+ )\n+ );\n+ }\n+\n+ public void testEscapedPipeInHeader() {\n+ String message = \"CEF:26|security|threat\\\\|->manager|1.0|100|\"\n+ + \"trojan successfully stopped|10|src=10.0.0.192 dst=12.121.122.82 spt=1232\";\n+ Map source = new HashMap<>();\n+ source.put(\"message\", message);\n+ document = new IngestDocument(\"index\", \"id\", 1L, null, null, source);\n+ CefProcessor processor = new CefProcessor(\"tag\", \"description\", \"message\", \"cef\", false, true, null);\n+ processor.execute(document);\n+ assertMapsEqual(\n+ document.getSource(),\n+ Map.ofEntries(\n+ entry(\n+ \"cef\",\n+ Map.ofEntries(\n+ entry(\"version\", \"26\"),\n+ entry(\n+ \"device\",\n+ Map.of(\"vendor\", \"security\", \"product\", \"threat|->manager\", \"version\", \"1.0\", \"event_class_id\", \"100\")\n+ ),\n+ entry(\"name\", \"trojan successfully stopped\"),\n+ entry(\"severity\", \"10\")\n+ )\n+ ),\n+ entry(\"event\", Map.of(\"code\", \"100\")),\n+ entry(\"observer\", Map.of(\"product\", \"threat|->manager\", \"vendor\", \"security\", \"version\", \"1.0\")),\n+ entry(\"source\", Map.of(\"ip\", \"10.0.0.192\", \"port\", 1232)),\n+ entry(\"destination\", Map.of(\"ip\", \"12.121.122.82\")),\n+ entry(\"message\", message)\n+ )\n+ );\n+ }\n+\n+ public void testEqualsSignInHeader() {\n+ String message = \"CEF:26|security|threat=manager|1.0|100|trojan successfully stopped|10|src=10.0.0.192 dst=12.121.122.82 spt=1232\";\n+ Map source = new HashMap<>();\n+ source.put(\"message\", message);\n+ document = new IngestDocument(\"index\", \"id\", 1L, null, null, source);\n+ CefProcessor processor = new CefProcessor(\"tag\", \"description\", \"message\", \"cef\", false, true, null);\n+ processor.execute(document);\n+ assertMapsEqual(\n+ document.getSource(),\n+ Map.ofEntries(\n+ entry(\n+ \"cef\",\n+ Map.ofEntries(\n+ entry(\"version\", \"26\"),\n+ entry(\n+ \"device\",\n+ Map.of(\"vendor\", \"security\", \"product\", \"threat=manager\", \"version\", \"1.0\", \"event_class_id\", \"100\")\n+ ),\n+ entry(\"name\", \"trojan successfully stopped\"),\n+ entry(\"severity\", \"10\")\n+ )\n+ ),\n+ entry(\"event\", Map.of(\"code\", \"100\")),\n+ entry(\"observer\", Map.of(\"product\", \"threat=manager\", \"vendor\", \"security\", \"version\", \"1.0\")),\n+ entry(\"source\", Map.of(\"ip\", \"10.0.0.192\", \"port\", 1232)),\n+ entry(\"destination\", Map.of(\"ip\", \"12.121.122.82\")),\n+ entry(\"message\", message)\n+ )\n+ );\n+ }\n+\n+ public void testEmptyExtensionValue() {\n+ String message = \"CEF:26|security|threatmanager|1.0|100|trojan successfully stopped|10|src=10.0.0.192 dst= spt=1232\";\n+ Map source = new HashMap<>();\n+ source.put(\"message\", message);\n+ document = new IngestDocument(\"index\", \"id\", 1L, null, null, source);\n+ CefProcessor processor = new CefProcessor(\"tag\", \"description\", \"message\", \"cef\", false, true, null);\n+ processor.execute(document);\n+ assertMapsEqual(\n+ document.getSource(),\n+ Map.ofEntries(\n+ entry(\n+ \"cef\",\n+ Map.ofEntries(\n+ entry(\"version\", \"26\"),\n+ entry(\n+ \"device\",\n+ Map.of(\"vendor\", \"security\", \"product\", \"threatmanager\", \"version\", \"1.0\", \"event_class_id\", \"100\")\n+ ),\n+ entry(\"name\", \"trojan successfully stopped\"),\n+ entry(\"severity\", \"10\")\n+ )\n+ ),\n+ entry(\"event\", Map.of(\"code\", \"100\")),\n+ entry(\"observer\", Map.of(\"product\", \"threatmanager\", \"vendor\", \"security\", \"version\", \"1.0\")),\n+ entry(\"source\", Map.of(\"ip\", \"10.0.0.192\", \"port\", 1232)),\n+ entry(\"message\", message)\n+ )\n+ );\n+ }\n+\n+ public void testLeadingWhitespace() {\n+ String message = \"CEF:0|security|threatmanager|1.0|100|trojan successfully stopped|10| src=10.0.0.192 dst=12.121.122.82 spt=1232\";\n+ Map source = new HashMap<>();\n+ source.put(\"message\", message);\n+ document = new IngestDocument(\"index\", \"id\", 1L, null, null, source);\n+ CefProcessor processor = new CefProcessor(\"tag\", \"description\", \"message\", \"cef\", false, true, null);\n+ processor.execute(document);\n+ assertMapsEqual(\n+ document.getSource(),\n+ Map.ofEntries(\n+ entry(\n+ \"cef\",\n+ Map.ofEntries(\n+ entry(\"version\", \"0\"),\n+ entry(\n+ \"device\",\n+ Map.of(\"vendor\", \"security\", \"product\", \"threatmanager\", \"version\", \"1.0\", \"event_class_id\", \"100\")\n+ ),\n+ entry(\"name\", \"trojan successfully stopped\"),\n+ entry(\"severity\", \"10\")\n+ )\n+ ),\n+ entry(\"event\", Map.of(\"code\", \"100\")),\n+ entry(\"observer\", Map.of(\"product\", \"threatmanager\", \"vendor\", \"security\", \"version\", \"1.0\")),\n+ entry(\"source\", Map.of(\"ip\", \"10.0.0.192\", \"port\", 1232)),\n+ entry(\"destination\", Map.of(\"ip\", \"12.121.122.82\")),\n+ entry(\"message\", message)\n+ )\n+ );\n+ }\n+\n+ public void testEscapedPipeInExtension() {\n+ String message = \"CEF:0|security|threatmanager|1.0|100|trojan successfully stopped|10|moo=this\\\\|has an escaped pipe\";\n+ Map source = new HashMap<>();\n+ source.put(\"message\", message);\n+ document = new IngestDocument(\"index\", \"id\", 1L, null, null, source);\n+ CefProcessor processor = new CefProcessor(\"tag\", \"description\", \"message\", \"cef\", false, true, null);\n+ expectThrows(IllegalArgumentException.class, () -> processor.execute(document));\n+ }\n+\n+ public void testPipeInMessage() {\n+ String message = \"CEF:0|security|threatmanager|1.0|100|trojan successfully stopped|10|moo=this|has a pipe\";\n+ Map source = new HashMap<>();\n+ source.put(\"message\", message);\n+ document = new IngestDocument(\"index\", \"id\", 1L, null, null, source);\n+ CefProcessor processor = new CefProcessor(\"tag\", \"description\", \"message\", \"cef\", false, true, null);\n+ processor.execute(document);\n+ assertMapsEqual(\n+ document.getSource(),\n+ Map.ofEntries(\n+ entry(\n+ \"cef\",\n+ Map.ofEntries(\n+ entry(\"version\", \"0\"),\n+ entry(\n+ \"device\",\n+ Map.of(\"vendor\", \"security\", \"product\", \"threatmanager\", \"version\", \"1.0\", \"event_class_id\", \"100\")\n+ ),\n+ entry(\"name\", \"trojan successfully stopped\"),\n+ entry(\"severity\", \"10\"),\n+ entry(\"extensions\", Map.of(\"moo\", \"this|has a pipe\"))\n+ )\n+ ),\n+ entry(\"event\", Map.of(\"code\", \"100\")),\n+ entry(\"observer\", Map.of(\"product\", \"threatmanager\", \"vendor\", \"security\", \"version\", \"1.0\")),\n+ entry(\"message\", message)\n+ )\n+ );\n+ }\n+\n+ public void testEqualsInMessage() {\n+ String message =\n+ \"CEF:0|security|threatmanager|1.0|100|trojan successfully stopped|10|moo=this =has = equals\\\\= dst=12.121.122.82 spt=1232\";\n+ Map source = new HashMap<>();\n+ source.put(\"message\", message);\n+ document = new IngestDocument(\"index\", \"id\", 1L, null, null, source);\n+ CefProcessor processor = new CefProcessor(\"tag\", \"description\", \"message\", \"cef\", false, true, null);\n+ processor.execute(document);\n+ assertMapsEqual(\n+ document.getSource(),\n+ Map.ofEntries(\n+ entry(\n+ \"cef\",\n+ Map.ofEntries(\n+ entry(\"version\", \"0\"),\n+ entry(\n+ \"device\",\n+ Map.of(\"vendor\", \"security\", \"product\", \"threatmanager\", \"version\", \"1.0\", \"event_class_id\", \"100\")\n+ ),\n+ entry(\"name\", \"trojan successfully stopped\"),\n+ entry(\"severity\", \"10\"),\n+ entry(\"extensions\", Map.of(\"moo\", \"this =has = equals=\"))\n+ )\n+ ),\n+ entry(\"event\", Map.of(\"code\", \"100\")),\n+ entry(\"observer\", Map.of(\"product\", \"threatmanager\", \"vendor\", \"security\", \"version\", \"1.0\")),\n+ entry(\"destination\", Map.of(\"ip\", \"12.121.122.82\")),\n+ entry(\"source\", Map.of(\"port\", 1232)),\n+ entry(\"message\", message)\n+ )\n+ );\n+ }\n+\n+ public void testEscapesInExtension() {\n+ String message = \"CEF:0|security|threatmanager|1.0|100|trojan successfully stopped|10|msg=a+b\\\\=c x=c\\\\\\\\d\\\\=z\";\n+ Map source = new HashMap<>();\n+ source.put(\"message\", message);\n+ document = new IngestDocument(\"index\", \"id\", 1L, null, null, source);\n+ CefProcessor processor = new CefProcessor(\"tag\", \"description\", \"message\", \"cef\", false, true, null);\n+ processor.execute(document);\n+ assertMapsEqual(\n+ document.getSource(),\n+ Map.ofEntries(\n+ entry(\n+ \"cef\",\n+ Map.ofEntries(\n+ entry(\"version\", \"0\"),\n+ entry(\n+ \"device\",\n+ Map.of(\"vendor\", \"security\", \"product\", \"threatmanager\", \"version\", \"1.0\", \"event_class_id\", \"100\")\n+ ),\n+ entry(\"name\", \"trojan successfully stopped\"),\n+ entry(\"severity\", \"10\"),\n+ entry(\"extensions\", Map.of(\"x\", \"c\\\\d=z\"))\n+ )\n+ ),\n+ entry(\"event\", Map.of(\"code\", \"100\")),\n+ entry(\"observer\", Map.of(\"product\", \"threatmanager\", \"vendor\", \"security\", \"version\", \"1.0\")),\n+ entry(\"message\", \"a+b=c\")\n+ )\n+ );\n+ }\n+\n+ public void testMalformedExtensionEscape() {\n+ String message = \"CEF:0|FooBar|Web Gateway|1.2.3.45.67|200|Success|2|rt=Sep 07 2018 14:50:39 cat=Access Log dst=1.1.1.1 \"\n+ + \"dhost=foo.example.com suser=redacted src=2.2.2.2 requestMethod=POST request='https://foo.example.com/bar/bingo/1' \"\n+ + \"requestClientApplication='Foo-Bar/2018.1.7; =Email:user@example.com; Guid:test=' cs1= cs1Label=Foo Bar\";\n+ Map source = new HashMap<>();\n+ source.put(\"message\", message);\n+ document = new IngestDocument(\"index\", \"id\", 1L, null, null, source);\n+ CefProcessor processor = new CefProcessor(\"tag\", \"description\", \"message\", \"cef\", false, true, null);\n+ processor.execute(document);\n+ assertMapsEqual(\n+ document.getSource(),\n+ Map.ofEntries(\n+ entry(\n+ \"cef\",\n+ Map.ofEntries(\n+ entry(\"version\", \"0\"),\n+ entry(\n+ \"device\",\n+ Map.of(\"vendor\", \"FooBar\", \"product\", \"Web Gateway\", \"version\", \"1.2.3.45.67\", \"event_class_id\", \"200\")\n+ ),\n+ entry(\"name\", \"Success\"),\n+ entry(\"severity\", \"2\"),\n+ entry(\"extensions\", Map.of(\"deviceCustomString1Label\", \"Foo Bar\", \"deviceEventCategory\", \"Access Log\"))\n+ )\n+ ),\n+ entry(\"event\", Map.of(\"code\", \"200\")),\n+ entry(\"observer\", Map.of(\"product\", \"Web Gateway\", \"vendor\", \"FooBar\", \"version\", \"1.2.3.45.67\")),\n+ entry(\"@timestamp\", ZonedDateTime.parse(\"2018-09-07T14:50:39Z\")),\n+ entry(\"destination\", Map.of(\"ip\", \"1.1.1.1\", \"domain\", \"foo.example.com\")),\n+ entry(\"source\", Map.of(\"ip\", \"2.2.2.2\", \"user\", Map.of(\"name\", \"redacted\"))),\n+ entry(\"http\", Map.of(\"request\", Map.of(\"method\", \"POST\"))),\n+ entry(\"url\", Map.of(\"original\", \"'https://foo.example.com/bar/bingo/1'\")),\n+ entry(\"user_agent\", Map.of(\"original\", \"'Foo-Bar/2018.1.7; =Email:user@example.com; Guid:test='\")),\n+ entry(\"message\", message)\n+ )\n+ );\n+ }\n+\n+ public void testMultipleMalformedExtensionValues() {\n+ String message = \"CEF:0|vendor|product|version|event_id|name|Very-High| \"\n+ + \"msg=Hello World error=Failed because id==old_id user=root angle=106.7<=180\";\n+ Map source = new HashMap<>();\n+ source.put(\"message\", message);\n+ document = new IngestDocument(\"index\", \"id\", 1L, null, null, source);\n+ CefProcessor processor = new CefProcessor(\"tag\", \"description\", \"message\", \"cef\", false, true, null);\n+ processor.execute(document);\n+ assertMapsEqual(\n+ document.getSource(),\n+ Map.ofEntries(\n+ entry(\n+ \"cef\",\n+ Map.ofEntries(\n+ entry(\"version\", \"0\"),\n+ entry(\n+ \"device\",\n+ Map.of(\"vendor\", \"vendor\", \"product\", \"product\", \"version\", \"version\", \"event_class_id\", \"event_id\")\n+ ),\n+ entry(\"name\", \"name\"),\n+ entry(\"severity\", \"Very-High\"),\n+ entry(\"extensions\", Map.of(\"id\", \"=old_id\", \"user\", \"root\", \"angle\", \"106.7<=180\", \"error\", \"Failed because\"))\n+ )\n+ ),\n+ entry(\"event\", Map.of(\"code\", \"event_id\")),\n+ entry(\"observer\", Map.of(\"product\", \"product\", \"vendor\", \"vendor\", \"version\", \"version\")),\n+ entry(\"message\", \"Hello World\")\n+ )\n+ );\n+ }\n+\n+ public void testPaddedMessage() {\n+ String message = \"CEF:0|security|threatmanager|1.0|100|message is padded|10|spt=1232 \"\n+ + \"msg=Trailing space in non-final extensions is preserved src=10.0.0.192 \";\n+ Map source = new HashMap<>();\n+ source.put(\"message\", message);\n+ document = new IngestDocument(\"index\", \"id\", 1L, null, null, source);\n+ CefProcessor processor = new CefProcessor(\"tag\", \"description\", \"message\", \"cef\", false, true, null);\n+ processor.execute(document);\n+ assertMapsEqual(\n+ document.getSource(),\n+ Map.ofEntries(\n+ entry(\n+ \"cef\",\n+ Map.ofEntries(\n+ entry(\"version\", \"0\"),\n+ entry(\n+ \"device\",\n+ Map.of(\"vendor\", \"security\", \"product\", \"threatmanager\", \"version\", \"1.0\", \"event_class_id\", \"100\")\n+ ),\n+ entry(\"name\", \"message is padded\"),\n+ entry(\"severity\", \"10\")\n+ )\n+ ),\n+ entry(\"event\", Map.of(\"code\", \"100\")),\n+ entry(\"observer\", Map.of(\"product\", \"threatmanager\", \"vendor\", \"security\", \"version\", \"1.0\")),\n+ entry(\"source\", Map.of(\"ip\", \"10.0.0.192\", \"port\", 1232)),\n+ entry(\"message\", \"Trailing space in non-final extensions is preserved\")\n+ )\n+ );\n+ }\n+\n+ public void testCrlfMessage() {\n+ String message = \"CEF:0|security|threatmanager|1.0|100|message is padded|10|\"\n+ + \"spt=1232 msg=Trailing space in final extensions is not preserved\\t \\r\\ndpt=1234\";\n+ Map source = new HashMap<>();\n+ source.put(\"message\", message);\n+ document = new IngestDocument(\"index\", \"id\", 1L, null, null, source);\n+ CefProcessor processor = new CefProcessor(\"tag\", \"description\", \"message\", \"cef\", false, true, null);\n+ processor.execute(document);\n+ assertMapsEqual(\n+ document.getSource(),\n+ Map.ofEntries(\n+ entry(\n+ \"cef\",\n+ Map.ofEntries(\n+ entry(\"version\", \"0\"),\n+ entry(\n+ \"device\",\n+ Map.of(\"vendor\", \"security\", \"product\", \"threatmanager\", \"version\", \"1.0\", \"event_class_id\", \"100\")\n+ ),\n+ entry(\"name\", \"message is padded\"),\n+ entry(\"severity\", \"10\")\n+ )\n+ ),\n+ entry(\"event\", Map.of(\"code\", \"100\")),\n+ entry(\"observer\", Map.of(\"product\", \"threatmanager\", \"vendor\", \"security\", \"version\", \"1.0\")),\n+ entry(\"source\", Map.of(\"port\", 1232)),\n+ entry(\"message\", \"Trailing space in final extensions is not preserved\"),\n+ entry(\"destination\", Map.of(\"port\", 1234))\n+ )\n+ );\n+ }\n+\n+ public void testTabMessage() {\n+ String message = \"CEF:0|security|threatmanager|1.0|100|message is padded|10|\"\n+ + \"spt=1232 msg=Tabs\\tand\\rcontrol\\ncharacters are preserved\\t src=127.0.0.1\";\n+ Map source = new HashMap<>();\n+ source.put(\"message\", message);\n+ document = new IngestDocument(\"index\", \"id\", 1L, null, null, source);\n+ CefProcessor processor = new CefProcessor(\"tag\", \"description\", \"message\", \"cef\", false, true, null);\n+ processor.execute(document);\n+ assertMapsEqual(\n+ document.getSource(),\n+ Map.ofEntries(\n+ entry(\n+ \"cef\",\n+ Map.ofEntries(\n+ entry(\"version\", \"0\"),\n+ entry(\n+ \"device\",\n+ Map.of(\"vendor\", \"security\", \"product\", \"threatmanager\", \"version\", \"1.0\", \"event_class_id\", \"100\")\n+ ),\n+ entry(\"name\", \"message is padded\"),\n+ entry(\"severity\", \"10\")\n+ )\n+ ),\n+ entry(\"event\", Map.of(\"code\", \"100\")),\n+ entry(\"observer\", Map.of(\"product\", \"threatmanager\", \"vendor\", \"security\", \"version\", \"1.0\")),\n+ entry(\"source\", Map.of(\"port\", 1232, \"ip\", \"127.0.0.1\")),\n+ entry(\"message\", \"Tabs\\tand\\rcontrol\\ncharacters are preserved\")\n+ )\n+ );\n+ }\n+\n+ public void testTabNoSepMessage() {\n+ String message = \"CEF:0|security|threatmanager|1.0|100|message has tabs|10|spt=1232 msg=Tab is not a separator\\tsrc=127.0.0.1\";\n+ Map source = new HashMap<>();\n+ source.put(\"message\", message);\n+ document = new IngestDocument(\"index\", \"id\", 1L, null, null, source);\n+ CefProcessor processor = new CefProcessor(\"tag\", \"description\", \"message\", \"cef\", false, true, null);\n+ processor.execute(document);\n+ assertMapsEqual(\n+ document.getSource(),\n+ Map.ofEntries(\n+ entry(\n+ \"cef\",\n+ Map.ofEntries(\n+ entry(\"version\", \"0\"),\n+ entry(\n+ \"device\",\n+ Map.of(\"vendor\", \"security\", \"product\", \"threatmanager\", \"version\", \"1.0\", \"event_class_id\", \"100\")\n+ ),\n+ entry(\"name\", \"message has tabs\"),\n+ entry(\"severity\", \"10\")\n+ )\n+ ),\n+ entry(\"event\", Map.of(\"code\", \"100\")),\n+ entry(\"observer\", Map.of(\"product\", \"threatmanager\", \"vendor\", \"security\", \"version\", \"1.0\")),\n+ entry(\"source\", Map.of(\"port\", 1232, \"ip\", \"127.0.0.1\")),\n+ entry(\"message\", \"Tab is not a separator\")\n+ )\n+ );\n+ }\n+\n+ public void testEscapedMessage() {\n+ String message = \"CEF:0|security\\\\compliance|threat\\\\|->manager|1.0|100|message contains escapes|10|\"\n+ + \"spt=1232 msg=Newlines in messages\\\\\\nare allowed.\\\\\\r\\\\\\nAnd so are carriage feeds\\\\\\\\newlines\\\\\\\\\\\\=. dpt=4432\";\n+ Map source = new HashMap<>();\n+ source.put(\"message\", message);\n+ document = new IngestDocument(\"index\", \"id\", 1L, null, null, source);\n+ CefProcessor processor = new CefProcessor(\"tag\", \"description\", \"message\", \"cef\", false, true, null);\n+ processor.execute(document);\n+ assertMapsEqual(\n+ document.getSource(),\n+ Map.ofEntries(\n+ entry(\n+ \"cef\",\n+ Map.ofEntries(\n+ entry(\"version\", \"0\"),\n+ entry(\n+ \"device\",\n+ Map.ofEntries(\n+ entry(\"vendor\", \"security\\\\compliance\"),\n+ entry(\"product\", \"threat|->manager\"),\n+ entry(\"version\", \"1.0\"),\n+ entry(\"event_class_id\", \"100\")\n+ )\n+ ),\n+ entry(\"name\", \"message contains escapes\"),\n+ entry(\"severity\", \"10\")\n+ )\n+ ),\n+ entry(\"event\", Map.of(\"code\", \"100\")),\n+ entry(\"observer\", Map.of(\"product\", \"threat|->manager\", \"vendor\", \"security\\\\compliance\", \"version\", \"1.0\")),\n+ entry(\"source\", Map.of(\"port\", 1232)),\n+ entry(\"message\", \"Newlines in messages\\nare allowed.\\r\\nAnd so are carriage feeds\\\\newlines\\\\=.\"),\n+ entry(\"destination\", Map.of(\"port\", 4432))\n+ )\n+ );\n+ }\n+\n+ public void testTruncatedHeader() {\n+ String message = \"CEF:0|SentinelOne|Mgmt|activityID=1111111111111111111 activityType=3505 \"\n+ + \"siteId=None siteName=None accountId=1222222222222222222 accountName=foo-bar mdr notificationScope=ACCOUNT\";\n+ Map source = new HashMap<>();\n+ source.put(\"message\", message);\n+ document = new IngestDocument(\"index\", \"id\", 1L, null, null, source);\n+ CefProcessor processor = new CefProcessor(\"tag\", \"description\", \"message\", \"cef\", false, true, null);\n+ processor.execute(document);\n+ assertMapsEqual(\n+ document.getSource(),\n+ Map.ofEntries(\n+ entry(\n+ \"cef\",\n+ Map.ofEntries(\n+ entry(\"version\", \"0\"),\n+ entry(\"device\", Map.of(\"vendor\", \"SentinelOne\", \"product\", \"Mgmt\")),\n+ entry(\n+ \"extensions\",\n+ Map.ofEntries(\n+ entry(\"activityID\", \"1111111111111111111\"),\n+ entry(\"activityType\", \"3505\"),\n+ entry(\"siteId\", \"None\"),\n+ entry(\"siteName\", \"None\"),\n+ entry(\"accountId\", \"1222222222222222222\"),\n+ entry(\"accountName\", \"foo-bar mdr\"),\n+ entry(\"notificationScope\", \"ACCOUNT\")\n+ )\n+ )\n+ )\n+ ),\n+ entry(\"observer\", Map.of(\"product\", \"Mgmt\", \"vendor\", \"SentinelOne\")),\n+ entry(\"message\", message),\n+ entry(\"error\", Map.of(\"message\", Set.of(\"incomplete CEF header\")))\n+ )\n+ );\n+ }\n+\n+ public void testIgnoreEmptyValuesInExtension() {\n+ String message = \"CEF:26|security|threat=manager|1.0|100|trojan successfully stopped|10|src= dst=12.121.122.82 spt=\";\n+ Map source = new HashMap<>();\n+ source.put(\"message\", message);\n+ document = new IngestDocument(\"index\", \"id\", 1L, null, null, source);\n+ CefProcessor processor = new CefProcessor(\"tag\", \"description\", \"message\", \"cef\", false, true, null);\n+ processor.execute(document);\n+ assertMapsEqual(\n+ document.getSource(),\n+ Map.ofEntries(\n+ entry(\n+ \"cef\",\n+ Map.ofEntries(\n+ entry(\"version\", \"26\"),\n+ entry(\n+ \"device\",\n+ Map.of(\"vendor\", \"security\", \"product\", \"threat=manager\", \"version\", \"1.0\", \"event_class_id\", \"100\")\n+ ),\n+ entry(\"name\", \"trojan successfully stopped\"),\n+ entry(\"severity\", \"10\")\n+ )\n+ ),\n+ entry(\"event\", Map.of(\"code\", \"100\")),\n+ entry(\"observer\", Map.of(\"product\", \"threat=manager\", \"vendor\", \"security\", \"version\", \"1.0\")),\n+ entry(\"destination\", Map.of(\"ip\", \"12.121.122.82\")),\n+ entry(\"message\", message)\n+ )\n+ );\n+ }\n+\n+ public void testHyphenInExtensionKey() {\n+ String message = \"CEF:26|security|threatmanager|1.0|100|trojan successfully stopped|10|Some-Key=123456\";\n+ Map source = new HashMap<>();\n+ source.put(\"message\", message);\n+ document = new IngestDocument(\"index\", \"id\", 1L, null, null, source);\n+ CefProcessor processor = new CefProcessor(\"tag\", \"description\", \"message\", \"cef\", false, true, null);\n+ processor.execute(document);\n+ assertMapsEqual(\n+ document.getSource(),\n+ Map.ofEntries(\n+ entry(\n+ \"cef\",\n+ Map.ofEntries(\n+ entry(\"version\", \"26\"),\n+ entry(\n+ \"device\",\n+ Map.of(\"vendor\", \"security\", \"product\", \"threatmanager\", \"version\", \"1.0\", \"event_class_id\", \"100\")\n+ ),\n+ entry(\"name\", \"trojan successfully stopped\"),\n+ entry(\"severity\", \"10\"),\n+ entry(\"extensions\", Map.of(\"Some-Key\", \"123456\"))\n+ )\n+ ),\n+ entry(\"event\", Map.of(\"code\", \"100\")),\n+ entry(\"observer\", Map.of(\"product\", \"threatmanager\", \"vendor\", \"security\", \"version\", \"1.0\")),\n+ entry(\"message\", message)\n+ )\n+ );\n+ }\n+\n+ public void testAllFieldsInExtension() {\n+ String message = \"CEF:0|security|threatmanager|1.0|100|trojan successfully stopped|10|\"\n+ + \"agt=192.168.0.1 agentDnsDomain=example.com ahost=agentHost aid=agentId amac=00:0a:95:9d:68:16 agentNtDomain=example.org \"\n+ + \"art=1622547800000 atz=UTC agentTranslatedAddress=10.0.0.1 agentTranslatedZoneExternalID=ext123 agentTranslatedZoneURI=uri \"\n+ + \"at=agentType av=1.0 agentZoneExternalID=zoneExtId agentZoneURI=zoneUri app=HTTP cnt=1234 in=5678 out=91011 \"\n+ + \"customerExternalID=custExtId customerURI=custUri dst=192.168.0.2 dlat=37.7749 dlong=-122.4194 \"\n+ + \"dhost=destHost dmac=00:0a:95:9d:68:16 dntdom=destNtDomain dpt=80 dpid=1234 \"\n+ + \"dproc=destProc destinationServiceName=destService \"\n+ + \"destinationTranslatedAddress=10.0.0.2 destinationTranslatedPort=8080 destinationTranslatedZoneExternalID=destExtId \"\n+ + \"destinationTranslatedZoneURI=destUri duid=destUserId duser=destUser dpriv=admin destinationZoneExternalID=destZoneExtId \"\n+ + \"destinationZoneURI=destZoneUri act=blocked dvc=192.168.0.3 cfp1Label=cfp1Label cfp3Label=cfp3Label cfp4Label=cfp4Label \"\n+ + \"deviceCustomDate1=1622547800000 deviceCustomDate1Label=customDate1Label deviceCustomDate2=1622547900000 \"\n+ + \"deviceCustomDate2Label=customDate2Label cfp1=1.23 cfp2=2.34 cfp2Label=cfp2Label cfp3=3.45 cfp4=4.56 c6a1=2001:db8::1 \"\n+ + \"c6a1Label=c6a1Label c6a2=2001:db8::2 c6a2Label=c6a2Label c6a3=2001:db8::3 c6a3Label=c6a3Label c6a4=2001:db8::4 \"\n+ + \"C6a4Label=c6a4Label cn1=123 cn1Label=cn1Label cn2=234 cn2Label=cn2Label cn3=345 cn3Label=cn3Label cs1=customString1 \"\n+ + \"cs1Label=cs1Label cs2=customString2 cs2Label=cs2Label cs3=customString3 cs3Label=cs3Label \"\n+ + \"cs4=customString4 cs4Label=cs4Label \"\n+ + \"cs5=customString5 cs5Label=cs5Label cs6=customString6 cs6Label=cs6Label deviceDirection=inbound deviceDnsDomain=example.com \"\n+ + \"cat=category deviceExternalId=extId deviceFacility=16 dvchost=host1 deviceInboundInterface=eth0 dvcmac=00:0a:95:9d:68:16 \"\n+ + \"deviceNtDomain=example.org deviceOutboundInterface=eth1 devicePayloadId=payloadId dvcpid=5678 deviceProcessName=procName \"\n+ + \"rt=1622547800000 dtz=UTC deviceTranslatedAddress=10.0.0.3 deviceTranslatedZoneExternalID=transExtId \"\n+ + \"deviceTranslatedZoneURI=transUri deviceZoneExternalID=zoneExtId deviceZoneURI=zoneUri end=1622547900000 eventId=evt123 \"\n+ + \"outcome=success externalId=extId fileCreateTime=1622547800000 fileHash=abcd1234 fileId=5678 \"\n+ + \"fileModificationTime=1622547900000 \"\n+ + \"fname=file.txt filePath=/path/to/file filePermission=rw-r--r-- fsize=1024 fileType=txt flexDate1=1622547800000 \"\n+ + \"flexDate1Label=flexDate1Label flexString1=flexString1 flexString2=flexString2 flexString1Label=flexString1Label \"\n+ + \"flexString2Label=flexString2Label msg=message oldFileCreateTime=1622547800000 oldFileHash=oldHash oldFileId=oldId \"\n+ + \"oldFileModificationTime=1622547900000 oldFileName=oldFile oldFilePath=/old/path \"\n+ + \"oldFilePermission=rw-r--r-- oldFileSize=2048 \"\n+ + \"oldFileType=oldType rawEvent=rawEvent reason=reason requestClientApplication=Mozilla requestContext=referrer \"\n+ + \"requestCookies=cookies requestMethod=GET request=url src=192.168.0.4 sourceDnsDomain=sourceDomain \"\n+ + \"slat=37.7749 slong=-122.4194 \"\n+ + \"shost=sourceHost smac=00:0a:95:9d:68:16 sntdom=sourceNtDomain spt=443 spid=1234 \"\n+ + \"sproc=sourceProc sourceServiceName=sourceService \"\n+ + \"sourceTranslatedAddress=10.0.0.4 sourceTranslatedPort=8081 sourceTranslatedZoneExternalID=sourceExtId \"\n+ + \"sourceTranslatedZoneURI=sourceUri suid=sourceUserId suser=sourceUser spriv=sourcePriv sourceZoneExternalID=sourceZoneExtId \"\n+ + \"sourceZoneURI=sourceZoneUri start=1622547800000 proto=TCP type=1 catdt=catDeviceType mrt=1622547800000\";\n+ Map source = new HashMap<>();\n+ source.put(\"message\", message);\n+ document = new IngestDocument(\"index\", \"id\", 1L, null, null, source);\n+ CefProcessor processor = new CefProcessor(\"tag\", \"description\", \"message\", \"cef\", false, true, null);\n+ processor.execute(document);\n+ assertMapsEqual(\n+ document.getSource(),\n+ Map.ofEntries(\n+ entry(\n+ \"cef\",\n+ Map.ofEntries(\n+ entry(\"version\", \"0\"),\n+ entry(\n+ \"device\",\n+ Map.of(\"vendor\", \"security\", \"product\", \"threatmanager\", \"version\", \"1.0\", \"event_class_id\", \"100\")\n+ ),\n+ entry(\"name\", \"trojan successfully stopped\"),\n+ entry(\"severity\", \"10\"),\n+ entry(\n+ \"extensions\",\n+ Map.ofEntries(\n+ entry(\"deviceNtDomain\", \"example.org\"),\n+ entry(\"agentZoneExternalID\", \"zoneExtId\"),\n+ entry(\"agentTimeZone\", \"UTC\"),\n+ entry(\"deviceCustomIPv6Address1Label\", \"c6a1Label\"),\n+ entry(\"deviceCustomString1\", \"customString1\"),\n+ entry(\"deviceCustomIPv6Address2Label\", \"c6a2Label\"),\n+ entry(\"deviceCustomNumber3\", 345L),\n+ entry(\"deviceCustomFloatingPoint1\", 1.23f),\n+ entry(\"deviceCustomNumber2\", 234L),\n+ entry(\"deviceCustomFloatingPoint2\", 2.34f),\n+ entry(\"deviceCustomFloatingPoint3\", 3.45f),\n+ entry(\"deviceCustomFloatingPoint4\", 4.56f),\n+ entry(\"flexDate1\", ZonedDateTime.parse(\"2021-06-01T11:43:20Z\")),\n+ entry(\"destinationTranslatedZoneExternalID\", \"destExtId\"),\n+ entry(\"deviceCustomNumber1\", 123L),\n+ entry(\"deviceEventCategory\", \"category\"),\n+ entry(\"deviceCustomString6Label\", \"cs6Label\"),\n+ entry(\"deviceCustomNumber2Label\", \"cn2Label\"),\n+ entry(\"flexString1Label\", \"flexString1Label\"),\n+ entry(\"deviceCustomString5Label\", \"cs5Label\"),\n+ entry(\"agentZoneURI\", \"zoneUri\"),\n+ entry(\"deviceCustomString2Label\", \"cs2Label\"),\n+ entry(\"deviceCustomDate2Label\", \"customDate2Label\"),\n+ entry(\"deviceCustomNumber1Label\", \"cn1Label\"),\n+ entry(\"oldFileType\", \"oldType\"),\n+ entry(\"destinationZoneExternalID\", \"destZoneExtId\"),\n+ entry(\"categoryDeviceType\", \"catDeviceType\"),\n+ entry(\"deviceZoneURI\", \"zoneUri\"),\n+ entry(\"sourceTranslatedZoneExternalID\", \"sourceExtId\"),\n+ entry(\"agentTranslatedAddress\", \"10.0.0.1\"),\n+ entry(\"requestCookies\", \"cookies\"),\n+ entry(\"deviceCustomIPv6Address3\", \"2001:db8::3\"),\n+ entry(\"oldFilePath\", \"/old/path\"),\n+ entry(\"deviceCustomIPv6Address2\", \"2001:db8::2\"),\n+ entry(\"deviceCustomIPv6Address1\", \"2001:db8::1\"),\n+ entry(\"oldFileId\", \"oldId\"),\n+ entry(\"deviceTranslatedZoneExternalID\", \"transExtId\"),\n+ entry(\"deviceCustomFloatingPoint2Label\", \"cfp2Label\"),\n+ entry(\"deviceTranslatedZoneURI\", \"transUri\"),\n+ entry(\"deviceCustomIPv6Address4Label\", \"c6a4Label\"),\n+ entry(\"agentTranslatedZoneURI\", \"uri\"),\n+ entry(\"oldFilePermission\", \"rw-r--r--\"),\n+ entry(\"deviceCustomIPv6Address4\", \"2001:db8::4\"),\n+ entry(\"sourceZoneURI\", \"sourceZoneUri\"),\n+ entry(\"deviceCustomFloatingPoint3Label\", \"cfp3Label\"),\n+ entry(\"agentTranslatedZoneExternalID\", \"ext123\"),\n+ entry(\"destinationZoneURI\", \"destZoneUri\"),\n+ entry(\"flexDate1Label\", \"flexDate1Label\"),\n+ entry(\"agentNtDomain\", \"example.org\"),\n+ entry(\"deviceCustomDate2\", ZonedDateTime.parse(\"2021-06-01T11:45Z\")),\n+ entry(\"deviceCustomDate1\", ZonedDateTime.parse(\"2021-06-01T11:43:20Z\")),\n+ entry(\"deviceCustomString3Label\", \"cs3Label\"),\n+ entry(\"deviceCustomDate1Label\", \"customDate1Label\"),\n+ entry(\"destinationTranslatedZoneURI\", \"destUri\"),\n+ entry(\"oldFileModificationTime\", ZonedDateTime.parse(\"2021-06-01T11:45Z\")),\n+ entry(\"deviceCustomFloatingPoint1Label\", \"cfp1Label\"),\n+ entry(\"deviceCustomIPv6Address3Label\", \"c6a3Label\"),\n+ entry(\"deviceCustomFloatingPoint4Label\", \"cfp4Label\"),\n+ entry(\"oldFileSize\", 2048),\n+ entry(\"externalId\", \"extId\"),\n+ entry(\"baseEventCount\", 1234),\n+ entry(\"flexString2\", \"flexString2\"),\n+ entry(\"deviceCustomNumber3Label\", \"cn3Label\"),\n+ entry(\"flexString1\", \"flexString1\"),\n+ entry(\"deviceCustomString4Label\", \"cs4Label\"),\n+ entry(\"flexString2Label\", \"flexString2Label\"),\n+ entry(\"deviceCustomString3\", \"customString3\"),\n+ entry(\"deviceCustomString2\", \"customString2\"),\n+ entry(\"deviceCustomString1Label\", \"cs1Label\"),\n+ entry(\"deviceCustomString5\", \"customString5\"),\n+ entry(\"deviceCustomString4\", \"customString4\"),\n+ entry(\"deviceZoneExternalID\", \"zoneExtId\"),\n+ entry(\"deviceCustomString6\", \"customString6\"),\n+ entry(\"oldFileName\", \"oldFile\"),\n+ entry(\"sourceZoneExternalID\", \"sourceZoneExtId\"),\n+ entry(\"oldFileHash\", \"oldHash\"),\n+ entry(\"sourceTranslatedZoneURI\", \"sourceUri\"),\n+ entry(\"oldFileCreateTime\", ZonedDateTime.parse(\"2021-06-01T11:43:20Z\"))\n+ )\n+ )\n+ )\n+ ),\n+ entry(\"host\", Map.of(\"nat\", Map.of(\"ip\", \"10.0.0.3\"))),\n+ entry(\"log\", Map.of(\"syslog\", Map.of(\"facility\", Map.of(\"code\", 16L)))),\n+ entry(\n+ \"observer\",\n+ Map.ofEntries(\n+ entry(\"ingress\", Map.of(\"interface\", Map.of(\"name\", \"eth0\"))),\n+ entry(\"registered_domain\", \"example.com\"),\n+ entry(\"product\", \"threatmanager\"),\n+ entry(\"hostname\", \"host1\"),\n+ entry(\"vendor\", \"security\"),\n+ entry(\"ip\", \"192.168.0.3\"),\n+ entry(\"name\", \"extId\"),\n+ entry(\"version\", \"1.0\"),\n+ entry(\"mac\", \"00:0a:95:9d:68:16\"),\n+ entry(\"egress\", Map.of(\"interface\", Map.of(\"name\", \"eth1\")))\n+ )\n+ ),\n+ entry(\n+ \"agent\",\n+ Map.ofEntries(\n+ entry(\"ip\", \"192.168.0.1\"),\n+ entry(\"name\", \"example.com\"),\n+ entry(\"id\", \"agentId\"),\n+ entry(\"type\", \"agentType\"),\n+ entry(\"version\", \"1.0\"),\n+ entry(\"mac\", \"00:0a:95:9d:68:16\")\n+ )\n+ ),\n+ entry(\"process\", Map.of(\"name\", \"procName\", \"pid\", 5678L)),\n+ entry(\n+ \"destination\",\n+ Map.ofEntries(\n+ entry(\"nat\", Map.of(\"port\", 8080, \"ip\", \"10.0.0.2\")),\n+ entry(\"geo\", Map.of(\"location\", Map.of(\"lon\", -122.4194, \"lat\", 37.7749))),\n+ entry(\"registered_domain\", \"destNtDomain\"),\n+ entry(\"process\", Map.of(\"name\", \"destProc\", \"pid\", 1234L)),\n+ entry(\"port\", 80),\n+ entry(\"bytes\", 91011L),\n+ entry(\"service\", Map.of(\"name\", \"destService\")),\n+ entry(\"domain\", \"destHost\"),\n+ entry(\"ip\", \"192.168.0.2\"),\n+ entry(\"user\", Map.of(\"name\", \"destUser\", \"id\", \"destUserId\", \"group\", Map.of(\"name\", \"admin\"))),\n+ entry(\"mac\", \"00:0a:95:9d:68:16\")\n+ )\n+ ),\n+ entry(\n+ \"source\",\n+ Map.ofEntries(\n+ entry(\"geo\", Map.of(\"location\", Map.of(\"lon\", -122.4194, \"lat\", 37.7749))),\n+ entry(\"nat\", Map.of(\"port\", 8081, \"ip\", \"10.0.0.4\")),\n+ entry(\"registered_domain\", \"sourceNtDomain\"),\n+ entry(\"process\", Map.of(\"name\", \"sourceProc\", \"pid\", 1234L)),\n+ entry(\"port\", 443),\n+ entry(\"service\", Map.of(\"name\", \"sourceService\")),\n+ entry(\"bytes\", 5678L),\n+ entry(\"ip\", \"192.168.0.4\"),\n+ entry(\"domain\", \"sourceDomain\"),\n+ entry(\"user\", Map.of(\"name\", \"sourceUser\", \"id\", \"sourceUserId\", \"group\", Map.of(\"name\", \"sourcePriv\"))),\n+ entry(\"mac\", \"00:0a:95:9d:68:16\")\n+ )\n+ ),\n+ entry(\"message\", \"message\"),\n+ entry(\"url\", Map.of(\"original\", \"url\")),\n+ entry(\"network\", Map.of(\"protocol\", \"HTTP\", \"transport\", \"TCP\", \"direction\", \"inbound\")),\n+ entry(\n+ \"file\",\n+ Map.ofEntries(\n+ entry(\"inode\", \"5678\"),\n+ entry(\"path\", \"/path/to/file\"),\n+ entry(\"size\", 1024L),\n+ entry(\"created\", ZonedDateTime.parse(\"2021-06-01T11:43:20Z\")),\n+ entry(\"name\", \"file.txt\"),\n+ entry(\"mtime\", ZonedDateTime.parse(\"2021-06-01T11:45Z\")),\n+ entry(\"type\", \"txt\"),\n+ entry(\"hash\", \"abcd1234\"),\n+ entry(\"group\", \"rw-r--r--\")\n+ )\n+ ),\n+ entry(\"@timestamp\", ZonedDateTime.parse(\"2021-06-01T11:43:20Z\")),\n+ entry(\"organization\", Map.of(\"name\", \"custUri\", \"id\", \"custExtId\")),\n+ entry(\n+ \"event\",\n+ Map.ofEntries(\n+ entry(\"action\", \"blocked\"),\n+ entry(\"timezone\", \"UTC\"),\n+ entry(\"end\", ZonedDateTime.parse(\"2021-06-01T11:45Z\")),\n+ entry(\"id\", \"evt123\"),\n+ entry(\"outcome\", \"success\"),\n+ entry(\"start\", ZonedDateTime.parse(\"2021-06-01T11:43:20Z\")),\n+ entry(\"reason\", \"reason\"),\n+ entry(\"ingested\", ZonedDateTime.parse(\"2021-06-01T11:43:20Z\")),\n+ entry(\"kind\", 1),\n+ entry(\"original\", \"rawEvent\"),\n+ entry(\"created\", ZonedDateTime.parse(\"2021-06-01T11:43:20Z\")),\n+ entry(\"code\", \"100\")\n+ )\n+ ),\n+ entry(\"user_agent\", Map.of(\"original\", \"Mozilla\")),\n+ entry(\"http\", Map.of(\"request\", Map.of(\"referrer\", \"referrer\", \"method\", \"GET\")))\n+ )\n+ );\n+ }\n+\n+ // Date parsing tests\n+ public void testToTimestampWithUnixTimestamp() {\n+ CefParser parser = new CefParser(ZoneId.of(\"UTC\"), false);\n+ String unixTimestamp = \"1633072800000\"; // Example Unix timestamp in milliseconds\n+ ZonedDateTime expected = ZonedDateTime.ofInstant(Instant.ofEpochMilli(Long.parseLong(unixTimestamp)), ZoneId.of(\"UTC\"));\n+ ZonedDateTime result = parser.toTimestamp(unixTimestamp);\n+ assertEquals(expected, result);\n+ }\n+\n+ public void testToTimestampWithFormattedDate() {\n+ CefParser parser = new CefParser(ZoneId.of(\"Europe/Stockholm\"), false);\n+ String formattedDate = \"Oct 01 2021 12:00:00 UTC\"; // Example formatted date\n+ ZonedDateTime expected = ZonedDateTime.parse(\"2021-10-01T14:00+02:00[Europe/Stockholm]\");\n+ ZonedDateTime result = parser.toTimestamp(formattedDate);\n+ assertEquals(expected, result);\n+ }\n+\n+ public void testToTimestampWithFormattedDateWithoutYear() {\n+ CefParser parser = new CefParser(ZoneId.of(\"UTC\"), false);\n+ String formattedDate = \"Oct 01 12:00:00 UTC\"; // Example formatted date without year\n+ int currentYear = ZonedDateTime.now(ZoneId.of(\"UTC\")).getYear();\n+ ZonedDateTime expected = ZonedDateTime.parse(currentYear + \"-10-01T12:00:00Z[UTC]\");\n+ ZonedDateTime result = parser.toTimestamp(formattedDate);\n+ assertEquals(expected, result);\n+ }\n+\n+ public void testToTimestampWithFormattedDateWithoutTimezone() {\n+ CefParser parser = new CefParser(ZoneId.of(\"UTC\"), false);\n+ String formattedDate = \"Sep 07 2018 14:50:39\"; // Example formatted date without year\n+ ZonedDateTime expected = ZonedDateTime.parse(\"2018-09-07T14:50:39Z[UTC]\");\n+ ZonedDateTime result = parser.toTimestamp(formattedDate);\n+ assertEquals(expected, result);\n+ }\n+\n+ public void testToTimestampWithInvalidDate() {\n+ CefParser parser = new CefParser(ZoneId.of(\"UTC\"), false);\n+ String invalidDate = \"invalid date\";\n+ assertThrows(IllegalArgumentException.class, () -> parser.toTimestamp(invalidDate));\n+ }\n+\n+ public void testToMacAddressWithSeparators() {\n+ CefParser parser = new CefParser(ZoneId.of(\"UTC\"), false);\n+ List macAddresses = List.of(\n+ // EUI-48 (with separators).\n+ \"00:0D:60:AF:1B:61\",\n+ \"00-0D-60-AF-1B-61\",\n+ \"000D.60AF.1B61\",\n+\n+ // EUI-64 (with separators).\n+ \"00:0D:60:FF:FE:AF:1B:61\",\n+ \"00-0D-60-FF-FE-AF-1B-61\",\n+ \"000D.60FF.FEAF.1B61\"\n+ );\n+ macAddresses.forEach(macAddress -> {\n+ String result = parser.toMACAddress(macAddress);\n+ assertEquals(macAddress, result);\n+ });\n+ }\n+\n+ public void testEUI48ToMacAddressWithOutSeparators() {\n+ CefParser parser = new CefParser(ZoneId.of(\"UTC\"), false);\n+ String macAddress = \"000D60AF1B61\";\n+ String result = parser.toMACAddress(macAddress);\n+ assertEquals(\"00:0D:60:AF:1B:61\", result);\n+ }\n+\n+ public void testEUI64ToMacAddressWithOutSeparators() {\n+ CefParser parser = new CefParser(ZoneId.of(\"UTC\"), false);\n+ String macAddress = \"000D60FFFEAF1B61\";\n+ String result = parser.toMACAddress(macAddress);\n+ assertEquals(\"00:0D:60:FF:FE:AF:1B:61\", result);\n+ }\n+\n+ public void toIP_validIPv4Address() {\n+ CefParser parser = new CefParser(ZoneId.of(\"UTC\"), true);\n+ String result = parser.toIP(\"192.168.1.1\");\n+ assertEquals(\"192.168.1.1\", result);\n+ }\n+\n+ public void toIP_validIPv6Address() {\n+ CefParser parser = new CefParser(ZoneId.of(\"UTC\"), true);\n+ String result = parser.toIP(\"2001:0db8:85a3:0000:0000:8a2e:0370:7334\");\n+ assertEquals(\"2001:db8:85a3::8a2e:370:7334\", result);\n+ }\n+\n+ public void toIP_invalidIPAddress() {\n+ CefParser parser = new CefParser(ZoneId.of(\"UTC\"), true);\n+ IllegalArgumentException exception = assertThrows(IllegalArgumentException.class, () -> parser.toIP(\"invalid_ip\"));\n+ assertEquals(\"Invalid IP address format\", exception.getMessage());\n+ }\n+\n+ public void toIP_emptyString() {\n+ CefParser parser = new CefParser(ZoneId.of(\"UTC\"), true);\n+ IllegalArgumentException exception = assertThrows(IllegalArgumentException.class, () -> parser.toIP(\"\"));\n+ assertEquals(\"Invalid IP address format\", exception.getMessage());\n+ }\n+\n+ public void toIP_nullString() {", - "comment_created_at": "2025-04-03T17:57:14+00:00", - "comment_author": "joegallo", - "comment_body": "These `toIP_` methods aren't formatted to actually be JUnit tests (which start with `test`), so none of them are running. If you fix that so that they do run, not all of the tests pass.\r\n\r\nPlease avoid underscores in these names, regardless, so for example have `toIP_nullString` become `testToIPNullString`.", - "pr_file_module": null - }, - { - "comment_id": "2028230119", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 122491, - "pr_file": "modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/CefProcessorTests.java", - "discussion_id": "2027497237", - "commented_code": "@@ -0,0 +1,1104 @@\n+/*\n+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one\n+ * or more contributor license agreements. Licensed under the \"Elastic License\n+ * 2.0\", the \"GNU Affero General Public License v3.0 only\", and the \"Server Side\n+ * Public License v 1\"; you may not use this file except in compliance with, at\n+ * your election, the \"Elastic License 2.0\", the \"GNU Affero General Public\n+ * License v3.0 only\", or the \"Server Side Public License, v 1\".\n+ */\n+\n+package org.elasticsearch.ingest.common;\n+\n+import org.elasticsearch.ingest.IngestDocument;\n+import org.elasticsearch.test.ESTestCase;\n+import org.junit.Before;\n+\n+import java.time.Instant;\n+import java.time.ZoneId;\n+import java.time.ZonedDateTime;\n+import java.util.HashMap;\n+import java.util.List;\n+import java.util.Map;\n+import java.util.Set;\n+\n+import static java.util.Map.entry;\n+import static org.hamcrest.Matchers.containsInAnyOrder;\n+import static org.hamcrest.Matchers.equalTo;\n+\n+public class CefProcessorTests extends ESTestCase {\n+\n+ private IngestDocument document;\n+\n+ @Before\n+ public void setUp() throws Exception {\n+ super.setUp();\n+ }\n+\n+ public void testExecute() {\n+ Map source = new HashMap<>();\n+ String message = \"CEF:0|Elastic|Vaporware|1.0.0-alpha|18|Web request|low|eventId=3457 requestMethod=POST \"\n+ + \"slat=38.915 slong=-77.511 proto=TCP sourceServiceName=httpd requestContext=https://www.google.com \"\n+ + \"src=89.160.20.156 spt=33876 dst=192.168.10.1 dpt=443 request=https://www.example.com/cart\";\n+ source.put(\"message\", message);\n+ document = new IngestDocument(\"index\", \"id\", 1L, null, null, source);\n+ CefProcessor processor = new CefProcessor(\"tag\", \"description\", \"message\", \"cef\", false, true, null);\n+ processor.execute(document);\n+ assertMapsEqual(\n+ document.getSource(),\n+ Map.ofEntries(\n+ entry(\n+ \"cef\",\n+ Map.ofEntries(\n+ entry(\"version\", \"0\"),\n+ entry(\n+ \"device\",\n+ Map.of(\"vendor\", \"Elastic\", \"product\", \"Vaporware\", \"version\", \"1.0.0-alpha\", \"event_class_id\", \"18\")\n+ ),\n+ entry(\"name\", \"Web request\"),\n+ entry(\"severity\", \"low\")\n+ )\n+ ),\n+ entry(\"observer\", Map.of(\"product\", \"Vaporware\", \"vendor\", \"Elastic\", \"version\", \"1.0.0-alpha\")),\n+ entry(\"event\", Map.of(\"id\", \"3457\", \"code\", \"18\")),\n+ entry(\n+ \"source\",\n+ Map.ofEntries(\n+ entry(\"ip\", \"89.160.20.156\"),\n+ entry(\"port\", 33876),\n+ entry(\"geo\", Map.of(\"location\", Map.of(\"lon\", -77.511, \"lat\", 38.915))),\n+ entry(\"service\", Map.of(\"name\", \"httpd\"))\n+ )\n+ ),\n+ entry(\"destination\", Map.of(\"ip\", \"192.168.10.1\", \"port\", 443)),\n+ entry(\"http\", Map.of(\"request\", Map.of(\"method\", \"POST\", \"referrer\", \"https://www.google.com\"))),\n+ entry(\"network\", Map.of(\"transport\", \"TCP\")),\n+ entry(\"url\", Map.of(\"original\", \"https://www.example.com/cart\")),\n+ entry(\"message\", message)\n+ )\n+ );\n+ }\n+\n+ public void testInvalidCefFormat() {\n+ Map invalidSource = new HashMap<>();\n+ invalidSource.put(\"message\", \"Invalid CEF message\");\n+ IngestDocument invalidIngestDocument = new IngestDocument(\"index\", \"id\", 1L, null, null, invalidSource);\n+ CefProcessor processor = new CefProcessor(\"tag\", \"description\", \"message\", \"cef\", false, true, null);\n+ expectThrows(IllegalArgumentException.class, () -> processor.execute(invalidIngestDocument));\n+ }\n+\n+ public void testStandardMessage() {\n+ String message = \"CEF:26|security|threatmanager|1.0|100|trojan successfully stopped|10|\"\n+ + \"src=10.0.0.192 dst=12.121.122.82 spt=1232 eventId=1 in=4294967296 out=4294967296\";\n+ Map source = new HashMap<>();\n+ source.put(\"message\", message);\n+ document = new IngestDocument(\"index\", \"id\", 1L, null, null, source);\n+ CefProcessor processor = new CefProcessor(\"tag\", \"description\", \"message\", \"cef\", false, true, null);\n+ processor.execute(document);\n+ assertMapsEqual(\n+ document.getSource(),\n+ Map.ofEntries(\n+ entry(\n+ \"cef\",\n+ Map.ofEntries(\n+ entry(\"version\", \"26\"),\n+ entry(\n+ \"device\",\n+ Map.of(\"vendor\", \"security\", \"product\", \"threatmanager\", \"version\", \"1.0\", \"event_class_id\", \"100\")\n+ ),\n+ entry(\"name\", \"trojan successfully stopped\"),\n+ entry(\"severity\", \"10\")\n+ )\n+ ),\n+ entry(\"observer\", Map.of(\"product\", \"threatmanager\", \"vendor\", \"security\", \"version\", \"1.0\")),\n+ entry(\"source\", Map.of(\"ip\", \"10.0.0.192\", \"port\", 1232, \"bytes\", 4294967296L)),\n+ entry(\"destination\", Map.of(\"ip\", \"12.121.122.82\", \"bytes\", 4294967296L)),\n+ entry(\"event\", Map.of(\"id\", \"1\", \"code\", \"100\")),\n+ entry(\"message\", message)\n+ )\n+ );\n+ }\n+\n+ public void testHeaderOnly() {\n+ String message = \"CEF:26|security|threatmanager|1.0|100|trojan successfully stopped|10|\";\n+ Map source = new HashMap<>();\n+ source.put(\"message\", message);\n+ document = new IngestDocument(\"index\", \"id\", 1L, null, null, source);\n+ CefProcessor processor = new CefProcessor(\"tag\", \"description\", \"message\", \"cef\", false, true, null);\n+ processor.execute(document);\n+ assertMapsEqual(\n+ document.getSource(),\n+ Map.ofEntries(\n+ entry(\n+ \"cef\",\n+ Map.ofEntries(\n+ entry(\"version\", \"26\"),\n+ entry(\n+ \"device\",\n+ Map.of(\"vendor\", \"security\", \"product\", \"threatmanager\", \"version\", \"1.0\", \"event_class_id\", \"100\")\n+ ),\n+ entry(\"name\", \"trojan successfully stopped\"),\n+ entry(\"severity\", \"10\")\n+ )\n+ ),\n+ entry(\"event\", Map.of(\"code\", \"100\")),\n+ entry(\"observer\", Map.of(\"product\", \"threatmanager\", \"vendor\", \"security\", \"version\", \"1.0\")),\n+ entry(\"message\", message)\n+ )\n+ );\n+ }\n+\n+ public void testEmptyDeviceFields() {\n+ String message = \"CEF:0|||1.0|100|trojan successfully stopped|10|src=10.0.0.192 dst=12.121.122.82 spt=1232\";\n+ Map source = new HashMap<>();\n+ source.put(\"message\", message);\n+ document = new IngestDocument(\"index\", \"id\", 1L, null, null, source);\n+ CefProcessor processor = new CefProcessor(\"tag\", \"description\", \"message\", \"cef\", false, true, null);\n+ processor.execute(document);\n+ assertMapsEqual(\n+ document.getSource(),\n+ Map.ofEntries(\n+ entry(\n+ \"cef\",\n+ Map.ofEntries(\n+ entry(\"version\", \"0\"),\n+ entry(\"device\", Map.of(\"vendor\", \"\", \"product\", \"\", \"version\", \"1.0\", \"event_class_id\", \"100\")),\n+ entry(\"name\", \"trojan successfully stopped\"),\n+ entry(\"severity\", \"10\")\n+ )\n+ ),\n+ entry(\"event\", Map.of(\"code\", \"100\")),\n+ entry(\"observer\", Map.of(\"product\", \"\", \"vendor\", \"\", \"version\", \"1.0\")),\n+ entry(\"source\", Map.of(\"ip\", \"10.0.0.192\", \"port\", 1232)),\n+ entry(\"destination\", Map.of(\"ip\", \"12.121.122.82\")),\n+ entry(\"message\", message)\n+ )\n+ );\n+ }\n+\n+ public void testEscapedPipeInHeader() {\n+ String message = \"CEF:26|security|threat\\\\|->manager|1.0|100|\"\n+ + \"trojan successfully stopped|10|src=10.0.0.192 dst=12.121.122.82 spt=1232\";\n+ Map source = new HashMap<>();\n+ source.put(\"message\", message);\n+ document = new IngestDocument(\"index\", \"id\", 1L, null, null, source);\n+ CefProcessor processor = new CefProcessor(\"tag\", \"description\", \"message\", \"cef\", false, true, null);\n+ processor.execute(document);\n+ assertMapsEqual(\n+ document.getSource(),\n+ Map.ofEntries(\n+ entry(\n+ \"cef\",\n+ Map.ofEntries(\n+ entry(\"version\", \"26\"),\n+ entry(\n+ \"device\",\n+ Map.of(\"vendor\", \"security\", \"product\", \"threat|->manager\", \"version\", \"1.0\", \"event_class_id\", \"100\")\n+ ),\n+ entry(\"name\", \"trojan successfully stopped\"),\n+ entry(\"severity\", \"10\")\n+ )\n+ ),\n+ entry(\"event\", Map.of(\"code\", \"100\")),\n+ entry(\"observer\", Map.of(\"product\", \"threat|->manager\", \"vendor\", \"security\", \"version\", \"1.0\")),\n+ entry(\"source\", Map.of(\"ip\", \"10.0.0.192\", \"port\", 1232)),\n+ entry(\"destination\", Map.of(\"ip\", \"12.121.122.82\")),\n+ entry(\"message\", message)\n+ )\n+ );\n+ }\n+\n+ public void testEqualsSignInHeader() {\n+ String message = \"CEF:26|security|threat=manager|1.0|100|trojan successfully stopped|10|src=10.0.0.192 dst=12.121.122.82 spt=1232\";\n+ Map source = new HashMap<>();\n+ source.put(\"message\", message);\n+ document = new IngestDocument(\"index\", \"id\", 1L, null, null, source);\n+ CefProcessor processor = new CefProcessor(\"tag\", \"description\", \"message\", \"cef\", false, true, null);\n+ processor.execute(document);\n+ assertMapsEqual(\n+ document.getSource(),\n+ Map.ofEntries(\n+ entry(\n+ \"cef\",\n+ Map.ofEntries(\n+ entry(\"version\", \"26\"),\n+ entry(\n+ \"device\",\n+ Map.of(\"vendor\", \"security\", \"product\", \"threat=manager\", \"version\", \"1.0\", \"event_class_id\", \"100\")\n+ ),\n+ entry(\"name\", \"trojan successfully stopped\"),\n+ entry(\"severity\", \"10\")\n+ )\n+ ),\n+ entry(\"event\", Map.of(\"code\", \"100\")),\n+ entry(\"observer\", Map.of(\"product\", \"threat=manager\", \"vendor\", \"security\", \"version\", \"1.0\")),\n+ entry(\"source\", Map.of(\"ip\", \"10.0.0.192\", \"port\", 1232)),\n+ entry(\"destination\", Map.of(\"ip\", \"12.121.122.82\")),\n+ entry(\"message\", message)\n+ )\n+ );\n+ }\n+\n+ public void testEmptyExtensionValue() {\n+ String message = \"CEF:26|security|threatmanager|1.0|100|trojan successfully stopped|10|src=10.0.0.192 dst= spt=1232\";\n+ Map source = new HashMap<>();\n+ source.put(\"message\", message);\n+ document = new IngestDocument(\"index\", \"id\", 1L, null, null, source);\n+ CefProcessor processor = new CefProcessor(\"tag\", \"description\", \"message\", \"cef\", false, true, null);\n+ processor.execute(document);\n+ assertMapsEqual(\n+ document.getSource(),\n+ Map.ofEntries(\n+ entry(\n+ \"cef\",\n+ Map.ofEntries(\n+ entry(\"version\", \"26\"),\n+ entry(\n+ \"device\",\n+ Map.of(\"vendor\", \"security\", \"product\", \"threatmanager\", \"version\", \"1.0\", \"event_class_id\", \"100\")\n+ ),\n+ entry(\"name\", \"trojan successfully stopped\"),\n+ entry(\"severity\", \"10\")\n+ )\n+ ),\n+ entry(\"event\", Map.of(\"code\", \"100\")),\n+ entry(\"observer\", Map.of(\"product\", \"threatmanager\", \"vendor\", \"security\", \"version\", \"1.0\")),\n+ entry(\"source\", Map.of(\"ip\", \"10.0.0.192\", \"port\", 1232)),\n+ entry(\"message\", message)\n+ )\n+ );\n+ }\n+\n+ public void testLeadingWhitespace() {\n+ String message = \"CEF:0|security|threatmanager|1.0|100|trojan successfully stopped|10| src=10.0.0.192 dst=12.121.122.82 spt=1232\";\n+ Map source = new HashMap<>();\n+ source.put(\"message\", message);\n+ document = new IngestDocument(\"index\", \"id\", 1L, null, null, source);\n+ CefProcessor processor = new CefProcessor(\"tag\", \"description\", \"message\", \"cef\", false, true, null);\n+ processor.execute(document);\n+ assertMapsEqual(\n+ document.getSource(),\n+ Map.ofEntries(\n+ entry(\n+ \"cef\",\n+ Map.ofEntries(\n+ entry(\"version\", \"0\"),\n+ entry(\n+ \"device\",\n+ Map.of(\"vendor\", \"security\", \"product\", \"threatmanager\", \"version\", \"1.0\", \"event_class_id\", \"100\")\n+ ),\n+ entry(\"name\", \"trojan successfully stopped\"),\n+ entry(\"severity\", \"10\")\n+ )\n+ ),\n+ entry(\"event\", Map.of(\"code\", \"100\")),\n+ entry(\"observer\", Map.of(\"product\", \"threatmanager\", \"vendor\", \"security\", \"version\", \"1.0\")),\n+ entry(\"source\", Map.of(\"ip\", \"10.0.0.192\", \"port\", 1232)),\n+ entry(\"destination\", Map.of(\"ip\", \"12.121.122.82\")),\n+ entry(\"message\", message)\n+ )\n+ );\n+ }\n+\n+ public void testEscapedPipeInExtension() {\n+ String message = \"CEF:0|security|threatmanager|1.0|100|trojan successfully stopped|10|moo=this\\\\|has an escaped pipe\";\n+ Map source = new HashMap<>();\n+ source.put(\"message\", message);\n+ document = new IngestDocument(\"index\", \"id\", 1L, null, null, source);\n+ CefProcessor processor = new CefProcessor(\"tag\", \"description\", \"message\", \"cef\", false, true, null);\n+ expectThrows(IllegalArgumentException.class, () -> processor.execute(document));\n+ }\n+\n+ public void testPipeInMessage() {\n+ String message = \"CEF:0|security|threatmanager|1.0|100|trojan successfully stopped|10|moo=this|has a pipe\";\n+ Map source = new HashMap<>();\n+ source.put(\"message\", message);\n+ document = new IngestDocument(\"index\", \"id\", 1L, null, null, source);\n+ CefProcessor processor = new CefProcessor(\"tag\", \"description\", \"message\", \"cef\", false, true, null);\n+ processor.execute(document);\n+ assertMapsEqual(\n+ document.getSource(),\n+ Map.ofEntries(\n+ entry(\n+ \"cef\",\n+ Map.ofEntries(\n+ entry(\"version\", \"0\"),\n+ entry(\n+ \"device\",\n+ Map.of(\"vendor\", \"security\", \"product\", \"threatmanager\", \"version\", \"1.0\", \"event_class_id\", \"100\")\n+ ),\n+ entry(\"name\", \"trojan successfully stopped\"),\n+ entry(\"severity\", \"10\"),\n+ entry(\"extensions\", Map.of(\"moo\", \"this|has a pipe\"))\n+ )\n+ ),\n+ entry(\"event\", Map.of(\"code\", \"100\")),\n+ entry(\"observer\", Map.of(\"product\", \"threatmanager\", \"vendor\", \"security\", \"version\", \"1.0\")),\n+ entry(\"message\", message)\n+ )\n+ );\n+ }\n+\n+ public void testEqualsInMessage() {\n+ String message =\n+ \"CEF:0|security|threatmanager|1.0|100|trojan successfully stopped|10|moo=this =has = equals\\\\= dst=12.121.122.82 spt=1232\";\n+ Map source = new HashMap<>();\n+ source.put(\"message\", message);\n+ document = new IngestDocument(\"index\", \"id\", 1L, null, null, source);\n+ CefProcessor processor = new CefProcessor(\"tag\", \"description\", \"message\", \"cef\", false, true, null);\n+ processor.execute(document);\n+ assertMapsEqual(\n+ document.getSource(),\n+ Map.ofEntries(\n+ entry(\n+ \"cef\",\n+ Map.ofEntries(\n+ entry(\"version\", \"0\"),\n+ entry(\n+ \"device\",\n+ Map.of(\"vendor\", \"security\", \"product\", \"threatmanager\", \"version\", \"1.0\", \"event_class_id\", \"100\")\n+ ),\n+ entry(\"name\", \"trojan successfully stopped\"),\n+ entry(\"severity\", \"10\"),\n+ entry(\"extensions\", Map.of(\"moo\", \"this =has = equals=\"))\n+ )\n+ ),\n+ entry(\"event\", Map.of(\"code\", \"100\")),\n+ entry(\"observer\", Map.of(\"product\", \"threatmanager\", \"vendor\", \"security\", \"version\", \"1.0\")),\n+ entry(\"destination\", Map.of(\"ip\", \"12.121.122.82\")),\n+ entry(\"source\", Map.of(\"port\", 1232)),\n+ entry(\"message\", message)\n+ )\n+ );\n+ }\n+\n+ public void testEscapesInExtension() {\n+ String message = \"CEF:0|security|threatmanager|1.0|100|trojan successfully stopped|10|msg=a+b\\\\=c x=c\\\\\\\\d\\\\=z\";\n+ Map source = new HashMap<>();\n+ source.put(\"message\", message);\n+ document = new IngestDocument(\"index\", \"id\", 1L, null, null, source);\n+ CefProcessor processor = new CefProcessor(\"tag\", \"description\", \"message\", \"cef\", false, true, null);\n+ processor.execute(document);\n+ assertMapsEqual(\n+ document.getSource(),\n+ Map.ofEntries(\n+ entry(\n+ \"cef\",\n+ Map.ofEntries(\n+ entry(\"version\", \"0\"),\n+ entry(\n+ \"device\",\n+ Map.of(\"vendor\", \"security\", \"product\", \"threatmanager\", \"version\", \"1.0\", \"event_class_id\", \"100\")\n+ ),\n+ entry(\"name\", \"trojan successfully stopped\"),\n+ entry(\"severity\", \"10\"),\n+ entry(\"extensions\", Map.of(\"x\", \"c\\\\d=z\"))\n+ )\n+ ),\n+ entry(\"event\", Map.of(\"code\", \"100\")),\n+ entry(\"observer\", Map.of(\"product\", \"threatmanager\", \"vendor\", \"security\", \"version\", \"1.0\")),\n+ entry(\"message\", \"a+b=c\")\n+ )\n+ );\n+ }\n+\n+ public void testMalformedExtensionEscape() {\n+ String message = \"CEF:0|FooBar|Web Gateway|1.2.3.45.67|200|Success|2|rt=Sep 07 2018 14:50:39 cat=Access Log dst=1.1.1.1 \"\n+ + \"dhost=foo.example.com suser=redacted src=2.2.2.2 requestMethod=POST request='https://foo.example.com/bar/bingo/1' \"\n+ + \"requestClientApplication='Foo-Bar/2018.1.7; =Email:user@example.com; Guid:test=' cs1= cs1Label=Foo Bar\";\n+ Map source = new HashMap<>();\n+ source.put(\"message\", message);\n+ document = new IngestDocument(\"index\", \"id\", 1L, null, null, source);\n+ CefProcessor processor = new CefProcessor(\"tag\", \"description\", \"message\", \"cef\", false, true, null);\n+ processor.execute(document);\n+ assertMapsEqual(\n+ document.getSource(),\n+ Map.ofEntries(\n+ entry(\n+ \"cef\",\n+ Map.ofEntries(\n+ entry(\"version\", \"0\"),\n+ entry(\n+ \"device\",\n+ Map.of(\"vendor\", \"FooBar\", \"product\", \"Web Gateway\", \"version\", \"1.2.3.45.67\", \"event_class_id\", \"200\")\n+ ),\n+ entry(\"name\", \"Success\"),\n+ entry(\"severity\", \"2\"),\n+ entry(\"extensions\", Map.of(\"deviceCustomString1Label\", \"Foo Bar\", \"deviceEventCategory\", \"Access Log\"))\n+ )\n+ ),\n+ entry(\"event\", Map.of(\"code\", \"200\")),\n+ entry(\"observer\", Map.of(\"product\", \"Web Gateway\", \"vendor\", \"FooBar\", \"version\", \"1.2.3.45.67\")),\n+ entry(\"@timestamp\", ZonedDateTime.parse(\"2018-09-07T14:50:39Z\")),\n+ entry(\"destination\", Map.of(\"ip\", \"1.1.1.1\", \"domain\", \"foo.example.com\")),\n+ entry(\"source\", Map.of(\"ip\", \"2.2.2.2\", \"user\", Map.of(\"name\", \"redacted\"))),\n+ entry(\"http\", Map.of(\"request\", Map.of(\"method\", \"POST\"))),\n+ entry(\"url\", Map.of(\"original\", \"'https://foo.example.com/bar/bingo/1'\")),\n+ entry(\"user_agent\", Map.of(\"original\", \"'Foo-Bar/2018.1.7; =Email:user@example.com; Guid:test='\")),\n+ entry(\"message\", message)\n+ )\n+ );\n+ }\n+\n+ public void testMultipleMalformedExtensionValues() {\n+ String message = \"CEF:0|vendor|product|version|event_id|name|Very-High| \"\n+ + \"msg=Hello World error=Failed because id==old_id user=root angle=106.7<=180\";\n+ Map source = new HashMap<>();\n+ source.put(\"message\", message);\n+ document = new IngestDocument(\"index\", \"id\", 1L, null, null, source);\n+ CefProcessor processor = new CefProcessor(\"tag\", \"description\", \"message\", \"cef\", false, true, null);\n+ processor.execute(document);\n+ assertMapsEqual(\n+ document.getSource(),\n+ Map.ofEntries(\n+ entry(\n+ \"cef\",\n+ Map.ofEntries(\n+ entry(\"version\", \"0\"),\n+ entry(\n+ \"device\",\n+ Map.of(\"vendor\", \"vendor\", \"product\", \"product\", \"version\", \"version\", \"event_class_id\", \"event_id\")\n+ ),\n+ entry(\"name\", \"name\"),\n+ entry(\"severity\", \"Very-High\"),\n+ entry(\"extensions\", Map.of(\"id\", \"=old_id\", \"user\", \"root\", \"angle\", \"106.7<=180\", \"error\", \"Failed because\"))\n+ )\n+ ),\n+ entry(\"event\", Map.of(\"code\", \"event_id\")),\n+ entry(\"observer\", Map.of(\"product\", \"product\", \"vendor\", \"vendor\", \"version\", \"version\")),\n+ entry(\"message\", \"Hello World\")\n+ )\n+ );\n+ }\n+\n+ public void testPaddedMessage() {\n+ String message = \"CEF:0|security|threatmanager|1.0|100|message is padded|10|spt=1232 \"\n+ + \"msg=Trailing space in non-final extensions is preserved src=10.0.0.192 \";\n+ Map source = new HashMap<>();\n+ source.put(\"message\", message);\n+ document = new IngestDocument(\"index\", \"id\", 1L, null, null, source);\n+ CefProcessor processor = new CefProcessor(\"tag\", \"description\", \"message\", \"cef\", false, true, null);\n+ processor.execute(document);\n+ assertMapsEqual(\n+ document.getSource(),\n+ Map.ofEntries(\n+ entry(\n+ \"cef\",\n+ Map.ofEntries(\n+ entry(\"version\", \"0\"),\n+ entry(\n+ \"device\",\n+ Map.of(\"vendor\", \"security\", \"product\", \"threatmanager\", \"version\", \"1.0\", \"event_class_id\", \"100\")\n+ ),\n+ entry(\"name\", \"message is padded\"),\n+ entry(\"severity\", \"10\")\n+ )\n+ ),\n+ entry(\"event\", Map.of(\"code\", \"100\")),\n+ entry(\"observer\", Map.of(\"product\", \"threatmanager\", \"vendor\", \"security\", \"version\", \"1.0\")),\n+ entry(\"source\", Map.of(\"ip\", \"10.0.0.192\", \"port\", 1232)),\n+ entry(\"message\", \"Trailing space in non-final extensions is preserved\")\n+ )\n+ );\n+ }\n+\n+ public void testCrlfMessage() {\n+ String message = \"CEF:0|security|threatmanager|1.0|100|message is padded|10|\"\n+ + \"spt=1232 msg=Trailing space in final extensions is not preserved\\t \\r\\ndpt=1234\";\n+ Map source = new HashMap<>();\n+ source.put(\"message\", message);\n+ document = new IngestDocument(\"index\", \"id\", 1L, null, null, source);\n+ CefProcessor processor = new CefProcessor(\"tag\", \"description\", \"message\", \"cef\", false, true, null);\n+ processor.execute(document);\n+ assertMapsEqual(\n+ document.getSource(),\n+ Map.ofEntries(\n+ entry(\n+ \"cef\",\n+ Map.ofEntries(\n+ entry(\"version\", \"0\"),\n+ entry(\n+ \"device\",\n+ Map.of(\"vendor\", \"security\", \"product\", \"threatmanager\", \"version\", \"1.0\", \"event_class_id\", \"100\")\n+ ),\n+ entry(\"name\", \"message is padded\"),\n+ entry(\"severity\", \"10\")\n+ )\n+ ),\n+ entry(\"event\", Map.of(\"code\", \"100\")),\n+ entry(\"observer\", Map.of(\"product\", \"threatmanager\", \"vendor\", \"security\", \"version\", \"1.0\")),\n+ entry(\"source\", Map.of(\"port\", 1232)),\n+ entry(\"message\", \"Trailing space in final extensions is not preserved\"),\n+ entry(\"destination\", Map.of(\"port\", 1234))\n+ )\n+ );\n+ }\n+\n+ public void testTabMessage() {\n+ String message = \"CEF:0|security|threatmanager|1.0|100|message is padded|10|\"\n+ + \"spt=1232 msg=Tabs\\tand\\rcontrol\\ncharacters are preserved\\t src=127.0.0.1\";\n+ Map source = new HashMap<>();\n+ source.put(\"message\", message);\n+ document = new IngestDocument(\"index\", \"id\", 1L, null, null, source);\n+ CefProcessor processor = new CefProcessor(\"tag\", \"description\", \"message\", \"cef\", false, true, null);\n+ processor.execute(document);\n+ assertMapsEqual(\n+ document.getSource(),\n+ Map.ofEntries(\n+ entry(\n+ \"cef\",\n+ Map.ofEntries(\n+ entry(\"version\", \"0\"),\n+ entry(\n+ \"device\",\n+ Map.of(\"vendor\", \"security\", \"product\", \"threatmanager\", \"version\", \"1.0\", \"event_class_id\", \"100\")\n+ ),\n+ entry(\"name\", \"message is padded\"),\n+ entry(\"severity\", \"10\")\n+ )\n+ ),\n+ entry(\"event\", Map.of(\"code\", \"100\")),\n+ entry(\"observer\", Map.of(\"product\", \"threatmanager\", \"vendor\", \"security\", \"version\", \"1.0\")),\n+ entry(\"source\", Map.of(\"port\", 1232, \"ip\", \"127.0.0.1\")),\n+ entry(\"message\", \"Tabs\\tand\\rcontrol\\ncharacters are preserved\")\n+ )\n+ );\n+ }\n+\n+ public void testTabNoSepMessage() {\n+ String message = \"CEF:0|security|threatmanager|1.0|100|message has tabs|10|spt=1232 msg=Tab is not a separator\\tsrc=127.0.0.1\";\n+ Map source = new HashMap<>();\n+ source.put(\"message\", message);\n+ document = new IngestDocument(\"index\", \"id\", 1L, null, null, source);\n+ CefProcessor processor = new CefProcessor(\"tag\", \"description\", \"message\", \"cef\", false, true, null);\n+ processor.execute(document);\n+ assertMapsEqual(\n+ document.getSource(),\n+ Map.ofEntries(\n+ entry(\n+ \"cef\",\n+ Map.ofEntries(\n+ entry(\"version\", \"0\"),\n+ entry(\n+ \"device\",\n+ Map.of(\"vendor\", \"security\", \"product\", \"threatmanager\", \"version\", \"1.0\", \"event_class_id\", \"100\")\n+ ),\n+ entry(\"name\", \"message has tabs\"),\n+ entry(\"severity\", \"10\")\n+ )\n+ ),\n+ entry(\"event\", Map.of(\"code\", \"100\")),\n+ entry(\"observer\", Map.of(\"product\", \"threatmanager\", \"vendor\", \"security\", \"version\", \"1.0\")),\n+ entry(\"source\", Map.of(\"port\", 1232, \"ip\", \"127.0.0.1\")),\n+ entry(\"message\", \"Tab is not a separator\")\n+ )\n+ );\n+ }\n+\n+ public void testEscapedMessage() {\n+ String message = \"CEF:0|security\\\\compliance|threat\\\\|->manager|1.0|100|message contains escapes|10|\"\n+ + \"spt=1232 msg=Newlines in messages\\\\\\nare allowed.\\\\\\r\\\\\\nAnd so are carriage feeds\\\\\\\\newlines\\\\\\\\\\\\=. dpt=4432\";\n+ Map source = new HashMap<>();\n+ source.put(\"message\", message);\n+ document = new IngestDocument(\"index\", \"id\", 1L, null, null, source);\n+ CefProcessor processor = new CefProcessor(\"tag\", \"description\", \"message\", \"cef\", false, true, null);\n+ processor.execute(document);\n+ assertMapsEqual(\n+ document.getSource(),\n+ Map.ofEntries(\n+ entry(\n+ \"cef\",\n+ Map.ofEntries(\n+ entry(\"version\", \"0\"),\n+ entry(\n+ \"device\",\n+ Map.ofEntries(\n+ entry(\"vendor\", \"security\\\\compliance\"),\n+ entry(\"product\", \"threat|->manager\"),\n+ entry(\"version\", \"1.0\"),\n+ entry(\"event_class_id\", \"100\")\n+ )\n+ ),\n+ entry(\"name\", \"message contains escapes\"),\n+ entry(\"severity\", \"10\")\n+ )\n+ ),\n+ entry(\"event\", Map.of(\"code\", \"100\")),\n+ entry(\"observer\", Map.of(\"product\", \"threat|->manager\", \"vendor\", \"security\\\\compliance\", \"version\", \"1.0\")),\n+ entry(\"source\", Map.of(\"port\", 1232)),\n+ entry(\"message\", \"Newlines in messages\\nare allowed.\\r\\nAnd so are carriage feeds\\\\newlines\\\\=.\"),\n+ entry(\"destination\", Map.of(\"port\", 4432))\n+ )\n+ );\n+ }\n+\n+ public void testTruncatedHeader() {\n+ String message = \"CEF:0|SentinelOne|Mgmt|activityID=1111111111111111111 activityType=3505 \"\n+ + \"siteId=None siteName=None accountId=1222222222222222222 accountName=foo-bar mdr notificationScope=ACCOUNT\";\n+ Map source = new HashMap<>();\n+ source.put(\"message\", message);\n+ document = new IngestDocument(\"index\", \"id\", 1L, null, null, source);\n+ CefProcessor processor = new CefProcessor(\"tag\", \"description\", \"message\", \"cef\", false, true, null);\n+ processor.execute(document);\n+ assertMapsEqual(\n+ document.getSource(),\n+ Map.ofEntries(\n+ entry(\n+ \"cef\",\n+ Map.ofEntries(\n+ entry(\"version\", \"0\"),\n+ entry(\"device\", Map.of(\"vendor\", \"SentinelOne\", \"product\", \"Mgmt\")),\n+ entry(\n+ \"extensions\",\n+ Map.ofEntries(\n+ entry(\"activityID\", \"1111111111111111111\"),\n+ entry(\"activityType\", \"3505\"),\n+ entry(\"siteId\", \"None\"),\n+ entry(\"siteName\", \"None\"),\n+ entry(\"accountId\", \"1222222222222222222\"),\n+ entry(\"accountName\", \"foo-bar mdr\"),\n+ entry(\"notificationScope\", \"ACCOUNT\")\n+ )\n+ )\n+ )\n+ ),\n+ entry(\"observer\", Map.of(\"product\", \"Mgmt\", \"vendor\", \"SentinelOne\")),\n+ entry(\"message\", message),\n+ entry(\"error\", Map.of(\"message\", Set.of(\"incomplete CEF header\")))\n+ )\n+ );\n+ }\n+\n+ public void testIgnoreEmptyValuesInExtension() {\n+ String message = \"CEF:26|security|threat=manager|1.0|100|trojan successfully stopped|10|src= dst=12.121.122.82 spt=\";\n+ Map source = new HashMap<>();\n+ source.put(\"message\", message);\n+ document = new IngestDocument(\"index\", \"id\", 1L, null, null, source);\n+ CefProcessor processor = new CefProcessor(\"tag\", \"description\", \"message\", \"cef\", false, true, null);\n+ processor.execute(document);\n+ assertMapsEqual(\n+ document.getSource(),\n+ Map.ofEntries(\n+ entry(\n+ \"cef\",\n+ Map.ofEntries(\n+ entry(\"version\", \"26\"),\n+ entry(\n+ \"device\",\n+ Map.of(\"vendor\", \"security\", \"product\", \"threat=manager\", \"version\", \"1.0\", \"event_class_id\", \"100\")\n+ ),\n+ entry(\"name\", \"trojan successfully stopped\"),\n+ entry(\"severity\", \"10\")\n+ )\n+ ),\n+ entry(\"event\", Map.of(\"code\", \"100\")),\n+ entry(\"observer\", Map.of(\"product\", \"threat=manager\", \"vendor\", \"security\", \"version\", \"1.0\")),\n+ entry(\"destination\", Map.of(\"ip\", \"12.121.122.82\")),\n+ entry(\"message\", message)\n+ )\n+ );\n+ }\n+\n+ public void testHyphenInExtensionKey() {\n+ String message = \"CEF:26|security|threatmanager|1.0|100|trojan successfully stopped|10|Some-Key=123456\";\n+ Map source = new HashMap<>();\n+ source.put(\"message\", message);\n+ document = new IngestDocument(\"index\", \"id\", 1L, null, null, source);\n+ CefProcessor processor = new CefProcessor(\"tag\", \"description\", \"message\", \"cef\", false, true, null);\n+ processor.execute(document);\n+ assertMapsEqual(\n+ document.getSource(),\n+ Map.ofEntries(\n+ entry(\n+ \"cef\",\n+ Map.ofEntries(\n+ entry(\"version\", \"26\"),\n+ entry(\n+ \"device\",\n+ Map.of(\"vendor\", \"security\", \"product\", \"threatmanager\", \"version\", \"1.0\", \"event_class_id\", \"100\")\n+ ),\n+ entry(\"name\", \"trojan successfully stopped\"),\n+ entry(\"severity\", \"10\"),\n+ entry(\"extensions\", Map.of(\"Some-Key\", \"123456\"))\n+ )\n+ ),\n+ entry(\"event\", Map.of(\"code\", \"100\")),\n+ entry(\"observer\", Map.of(\"product\", \"threatmanager\", \"vendor\", \"security\", \"version\", \"1.0\")),\n+ entry(\"message\", message)\n+ )\n+ );\n+ }\n+\n+ public void testAllFieldsInExtension() {\n+ String message = \"CEF:0|security|threatmanager|1.0|100|trojan successfully stopped|10|\"\n+ + \"agt=192.168.0.1 agentDnsDomain=example.com ahost=agentHost aid=agentId amac=00:0a:95:9d:68:16 agentNtDomain=example.org \"\n+ + \"art=1622547800000 atz=UTC agentTranslatedAddress=10.0.0.1 agentTranslatedZoneExternalID=ext123 agentTranslatedZoneURI=uri \"\n+ + \"at=agentType av=1.0 agentZoneExternalID=zoneExtId agentZoneURI=zoneUri app=HTTP cnt=1234 in=5678 out=91011 \"\n+ + \"customerExternalID=custExtId customerURI=custUri dst=192.168.0.2 dlat=37.7749 dlong=-122.4194 \"\n+ + \"dhost=destHost dmac=00:0a:95:9d:68:16 dntdom=destNtDomain dpt=80 dpid=1234 \"\n+ + \"dproc=destProc destinationServiceName=destService \"\n+ + \"destinationTranslatedAddress=10.0.0.2 destinationTranslatedPort=8080 destinationTranslatedZoneExternalID=destExtId \"\n+ + \"destinationTranslatedZoneURI=destUri duid=destUserId duser=destUser dpriv=admin destinationZoneExternalID=destZoneExtId \"\n+ + \"destinationZoneURI=destZoneUri act=blocked dvc=192.168.0.3 cfp1Label=cfp1Label cfp3Label=cfp3Label cfp4Label=cfp4Label \"\n+ + \"deviceCustomDate1=1622547800000 deviceCustomDate1Label=customDate1Label deviceCustomDate2=1622547900000 \"\n+ + \"deviceCustomDate2Label=customDate2Label cfp1=1.23 cfp2=2.34 cfp2Label=cfp2Label cfp3=3.45 cfp4=4.56 c6a1=2001:db8::1 \"\n+ + \"c6a1Label=c6a1Label c6a2=2001:db8::2 c6a2Label=c6a2Label c6a3=2001:db8::3 c6a3Label=c6a3Label c6a4=2001:db8::4 \"\n+ + \"C6a4Label=c6a4Label cn1=123 cn1Label=cn1Label cn2=234 cn2Label=cn2Label cn3=345 cn3Label=cn3Label cs1=customString1 \"\n+ + \"cs1Label=cs1Label cs2=customString2 cs2Label=cs2Label cs3=customString3 cs3Label=cs3Label \"\n+ + \"cs4=customString4 cs4Label=cs4Label \"\n+ + \"cs5=customString5 cs5Label=cs5Label cs6=customString6 cs6Label=cs6Label deviceDirection=inbound deviceDnsDomain=example.com \"\n+ + \"cat=category deviceExternalId=extId deviceFacility=16 dvchost=host1 deviceInboundInterface=eth0 dvcmac=00:0a:95:9d:68:16 \"\n+ + \"deviceNtDomain=example.org deviceOutboundInterface=eth1 devicePayloadId=payloadId dvcpid=5678 deviceProcessName=procName \"\n+ + \"rt=1622547800000 dtz=UTC deviceTranslatedAddress=10.0.0.3 deviceTranslatedZoneExternalID=transExtId \"\n+ + \"deviceTranslatedZoneURI=transUri deviceZoneExternalID=zoneExtId deviceZoneURI=zoneUri end=1622547900000 eventId=evt123 \"\n+ + \"outcome=success externalId=extId fileCreateTime=1622547800000 fileHash=abcd1234 fileId=5678 \"\n+ + \"fileModificationTime=1622547900000 \"\n+ + \"fname=file.txt filePath=/path/to/file filePermission=rw-r--r-- fsize=1024 fileType=txt flexDate1=1622547800000 \"\n+ + \"flexDate1Label=flexDate1Label flexString1=flexString1 flexString2=flexString2 flexString1Label=flexString1Label \"\n+ + \"flexString2Label=flexString2Label msg=message oldFileCreateTime=1622547800000 oldFileHash=oldHash oldFileId=oldId \"\n+ + \"oldFileModificationTime=1622547900000 oldFileName=oldFile oldFilePath=/old/path \"\n+ + \"oldFilePermission=rw-r--r-- oldFileSize=2048 \"\n+ + \"oldFileType=oldType rawEvent=rawEvent reason=reason requestClientApplication=Mozilla requestContext=referrer \"\n+ + \"requestCookies=cookies requestMethod=GET request=url src=192.168.0.4 sourceDnsDomain=sourceDomain \"\n+ + \"slat=37.7749 slong=-122.4194 \"\n+ + \"shost=sourceHost smac=00:0a:95:9d:68:16 sntdom=sourceNtDomain spt=443 spid=1234 \"\n+ + \"sproc=sourceProc sourceServiceName=sourceService \"\n+ + \"sourceTranslatedAddress=10.0.0.4 sourceTranslatedPort=8081 sourceTranslatedZoneExternalID=sourceExtId \"\n+ + \"sourceTranslatedZoneURI=sourceUri suid=sourceUserId suser=sourceUser spriv=sourcePriv sourceZoneExternalID=sourceZoneExtId \"\n+ + \"sourceZoneURI=sourceZoneUri start=1622547800000 proto=TCP type=1 catdt=catDeviceType mrt=1622547800000\";\n+ Map source = new HashMap<>();\n+ source.put(\"message\", message);\n+ document = new IngestDocument(\"index\", \"id\", 1L, null, null, source);\n+ CefProcessor processor = new CefProcessor(\"tag\", \"description\", \"message\", \"cef\", false, true, null);\n+ processor.execute(document);\n+ assertMapsEqual(\n+ document.getSource(),\n+ Map.ofEntries(\n+ entry(\n+ \"cef\",\n+ Map.ofEntries(\n+ entry(\"version\", \"0\"),\n+ entry(\n+ \"device\",\n+ Map.of(\"vendor\", \"security\", \"product\", \"threatmanager\", \"version\", \"1.0\", \"event_class_id\", \"100\")\n+ ),\n+ entry(\"name\", \"trojan successfully stopped\"),\n+ entry(\"severity\", \"10\"),\n+ entry(\n+ \"extensions\",\n+ Map.ofEntries(\n+ entry(\"deviceNtDomain\", \"example.org\"),\n+ entry(\"agentZoneExternalID\", \"zoneExtId\"),\n+ entry(\"agentTimeZone\", \"UTC\"),\n+ entry(\"deviceCustomIPv6Address1Label\", \"c6a1Label\"),\n+ entry(\"deviceCustomString1\", \"customString1\"),\n+ entry(\"deviceCustomIPv6Address2Label\", \"c6a2Label\"),\n+ entry(\"deviceCustomNumber3\", 345L),\n+ entry(\"deviceCustomFloatingPoint1\", 1.23f),\n+ entry(\"deviceCustomNumber2\", 234L),\n+ entry(\"deviceCustomFloatingPoint2\", 2.34f),\n+ entry(\"deviceCustomFloatingPoint3\", 3.45f),\n+ entry(\"deviceCustomFloatingPoint4\", 4.56f),\n+ entry(\"flexDate1\", ZonedDateTime.parse(\"2021-06-01T11:43:20Z\")),\n+ entry(\"destinationTranslatedZoneExternalID\", \"destExtId\"),\n+ entry(\"deviceCustomNumber1\", 123L),\n+ entry(\"deviceEventCategory\", \"category\"),\n+ entry(\"deviceCustomString6Label\", \"cs6Label\"),\n+ entry(\"deviceCustomNumber2Label\", \"cn2Label\"),\n+ entry(\"flexString1Label\", \"flexString1Label\"),\n+ entry(\"deviceCustomString5Label\", \"cs5Label\"),\n+ entry(\"agentZoneURI\", \"zoneUri\"),\n+ entry(\"deviceCustomString2Label\", \"cs2Label\"),\n+ entry(\"deviceCustomDate2Label\", \"customDate2Label\"),\n+ entry(\"deviceCustomNumber1Label\", \"cn1Label\"),\n+ entry(\"oldFileType\", \"oldType\"),\n+ entry(\"destinationZoneExternalID\", \"destZoneExtId\"),\n+ entry(\"categoryDeviceType\", \"catDeviceType\"),\n+ entry(\"deviceZoneURI\", \"zoneUri\"),\n+ entry(\"sourceTranslatedZoneExternalID\", \"sourceExtId\"),\n+ entry(\"agentTranslatedAddress\", \"10.0.0.1\"),\n+ entry(\"requestCookies\", \"cookies\"),\n+ entry(\"deviceCustomIPv6Address3\", \"2001:db8::3\"),\n+ entry(\"oldFilePath\", \"/old/path\"),\n+ entry(\"deviceCustomIPv6Address2\", \"2001:db8::2\"),\n+ entry(\"deviceCustomIPv6Address1\", \"2001:db8::1\"),\n+ entry(\"oldFileId\", \"oldId\"),\n+ entry(\"deviceTranslatedZoneExternalID\", \"transExtId\"),\n+ entry(\"deviceCustomFloatingPoint2Label\", \"cfp2Label\"),\n+ entry(\"deviceTranslatedZoneURI\", \"transUri\"),\n+ entry(\"deviceCustomIPv6Address4Label\", \"c6a4Label\"),\n+ entry(\"agentTranslatedZoneURI\", \"uri\"),\n+ entry(\"oldFilePermission\", \"rw-r--r--\"),\n+ entry(\"deviceCustomIPv6Address4\", \"2001:db8::4\"),\n+ entry(\"sourceZoneURI\", \"sourceZoneUri\"),\n+ entry(\"deviceCustomFloatingPoint3Label\", \"cfp3Label\"),\n+ entry(\"agentTranslatedZoneExternalID\", \"ext123\"),\n+ entry(\"destinationZoneURI\", \"destZoneUri\"),\n+ entry(\"flexDate1Label\", \"flexDate1Label\"),\n+ entry(\"agentNtDomain\", \"example.org\"),\n+ entry(\"deviceCustomDate2\", ZonedDateTime.parse(\"2021-06-01T11:45Z\")),\n+ entry(\"deviceCustomDate1\", ZonedDateTime.parse(\"2021-06-01T11:43:20Z\")),\n+ entry(\"deviceCustomString3Label\", \"cs3Label\"),\n+ entry(\"deviceCustomDate1Label\", \"customDate1Label\"),\n+ entry(\"destinationTranslatedZoneURI\", \"destUri\"),\n+ entry(\"oldFileModificationTime\", ZonedDateTime.parse(\"2021-06-01T11:45Z\")),\n+ entry(\"deviceCustomFloatingPoint1Label\", \"cfp1Label\"),\n+ entry(\"deviceCustomIPv6Address3Label\", \"c6a3Label\"),\n+ entry(\"deviceCustomFloatingPoint4Label\", \"cfp4Label\"),\n+ entry(\"oldFileSize\", 2048),\n+ entry(\"externalId\", \"extId\"),\n+ entry(\"baseEventCount\", 1234),\n+ entry(\"flexString2\", \"flexString2\"),\n+ entry(\"deviceCustomNumber3Label\", \"cn3Label\"),\n+ entry(\"flexString1\", \"flexString1\"),\n+ entry(\"deviceCustomString4Label\", \"cs4Label\"),\n+ entry(\"flexString2Label\", \"flexString2Label\"),\n+ entry(\"deviceCustomString3\", \"customString3\"),\n+ entry(\"deviceCustomString2\", \"customString2\"),\n+ entry(\"deviceCustomString1Label\", \"cs1Label\"),\n+ entry(\"deviceCustomString5\", \"customString5\"),\n+ entry(\"deviceCustomString4\", \"customString4\"),\n+ entry(\"deviceZoneExternalID\", \"zoneExtId\"),\n+ entry(\"deviceCustomString6\", \"customString6\"),\n+ entry(\"oldFileName\", \"oldFile\"),\n+ entry(\"sourceZoneExternalID\", \"sourceZoneExtId\"),\n+ entry(\"oldFileHash\", \"oldHash\"),\n+ entry(\"sourceTranslatedZoneURI\", \"sourceUri\"),\n+ entry(\"oldFileCreateTime\", ZonedDateTime.parse(\"2021-06-01T11:43:20Z\"))\n+ )\n+ )\n+ )\n+ ),\n+ entry(\"host\", Map.of(\"nat\", Map.of(\"ip\", \"10.0.0.3\"))),\n+ entry(\"log\", Map.of(\"syslog\", Map.of(\"facility\", Map.of(\"code\", 16L)))),\n+ entry(\n+ \"observer\",\n+ Map.ofEntries(\n+ entry(\"ingress\", Map.of(\"interface\", Map.of(\"name\", \"eth0\"))),\n+ entry(\"registered_domain\", \"example.com\"),\n+ entry(\"product\", \"threatmanager\"),\n+ entry(\"hostname\", \"host1\"),\n+ entry(\"vendor\", \"security\"),\n+ entry(\"ip\", \"192.168.0.3\"),\n+ entry(\"name\", \"extId\"),\n+ entry(\"version\", \"1.0\"),\n+ entry(\"mac\", \"00:0a:95:9d:68:16\"),\n+ entry(\"egress\", Map.of(\"interface\", Map.of(\"name\", \"eth1\")))\n+ )\n+ ),\n+ entry(\n+ \"agent\",\n+ Map.ofEntries(\n+ entry(\"ip\", \"192.168.0.1\"),\n+ entry(\"name\", \"example.com\"),\n+ entry(\"id\", \"agentId\"),\n+ entry(\"type\", \"agentType\"),\n+ entry(\"version\", \"1.0\"),\n+ entry(\"mac\", \"00:0a:95:9d:68:16\")\n+ )\n+ ),\n+ entry(\"process\", Map.of(\"name\", \"procName\", \"pid\", 5678L)),\n+ entry(\n+ \"destination\",\n+ Map.ofEntries(\n+ entry(\"nat\", Map.of(\"port\", 8080, \"ip\", \"10.0.0.2\")),\n+ entry(\"geo\", Map.of(\"location\", Map.of(\"lon\", -122.4194, \"lat\", 37.7749))),\n+ entry(\"registered_domain\", \"destNtDomain\"),\n+ entry(\"process\", Map.of(\"name\", \"destProc\", \"pid\", 1234L)),\n+ entry(\"port\", 80),\n+ entry(\"bytes\", 91011L),\n+ entry(\"service\", Map.of(\"name\", \"destService\")),\n+ entry(\"domain\", \"destHost\"),\n+ entry(\"ip\", \"192.168.0.2\"),\n+ entry(\"user\", Map.of(\"name\", \"destUser\", \"id\", \"destUserId\", \"group\", Map.of(\"name\", \"admin\"))),\n+ entry(\"mac\", \"00:0a:95:9d:68:16\")\n+ )\n+ ),\n+ entry(\n+ \"source\",\n+ Map.ofEntries(\n+ entry(\"geo\", Map.of(\"location\", Map.of(\"lon\", -122.4194, \"lat\", 37.7749))),\n+ entry(\"nat\", Map.of(\"port\", 8081, \"ip\", \"10.0.0.4\")),\n+ entry(\"registered_domain\", \"sourceNtDomain\"),\n+ entry(\"process\", Map.of(\"name\", \"sourceProc\", \"pid\", 1234L)),\n+ entry(\"port\", 443),\n+ entry(\"service\", Map.of(\"name\", \"sourceService\")),\n+ entry(\"bytes\", 5678L),\n+ entry(\"ip\", \"192.168.0.4\"),\n+ entry(\"domain\", \"sourceDomain\"),\n+ entry(\"user\", Map.of(\"name\", \"sourceUser\", \"id\", \"sourceUserId\", \"group\", Map.of(\"name\", \"sourcePriv\"))),\n+ entry(\"mac\", \"00:0a:95:9d:68:16\")\n+ )\n+ ),\n+ entry(\"message\", \"message\"),\n+ entry(\"url\", Map.of(\"original\", \"url\")),\n+ entry(\"network\", Map.of(\"protocol\", \"HTTP\", \"transport\", \"TCP\", \"direction\", \"inbound\")),\n+ entry(\n+ \"file\",\n+ Map.ofEntries(\n+ entry(\"inode\", \"5678\"),\n+ entry(\"path\", \"/path/to/file\"),\n+ entry(\"size\", 1024L),\n+ entry(\"created\", ZonedDateTime.parse(\"2021-06-01T11:43:20Z\")),\n+ entry(\"name\", \"file.txt\"),\n+ entry(\"mtime\", ZonedDateTime.parse(\"2021-06-01T11:45Z\")),\n+ entry(\"type\", \"txt\"),\n+ entry(\"hash\", \"abcd1234\"),\n+ entry(\"group\", \"rw-r--r--\")\n+ )\n+ ),\n+ entry(\"@timestamp\", ZonedDateTime.parse(\"2021-06-01T11:43:20Z\")),\n+ entry(\"organization\", Map.of(\"name\", \"custUri\", \"id\", \"custExtId\")),\n+ entry(\n+ \"event\",\n+ Map.ofEntries(\n+ entry(\"action\", \"blocked\"),\n+ entry(\"timezone\", \"UTC\"),\n+ entry(\"end\", ZonedDateTime.parse(\"2021-06-01T11:45Z\")),\n+ entry(\"id\", \"evt123\"),\n+ entry(\"outcome\", \"success\"),\n+ entry(\"start\", ZonedDateTime.parse(\"2021-06-01T11:43:20Z\")),\n+ entry(\"reason\", \"reason\"),\n+ entry(\"ingested\", ZonedDateTime.parse(\"2021-06-01T11:43:20Z\")),\n+ entry(\"kind\", 1),\n+ entry(\"original\", \"rawEvent\"),\n+ entry(\"created\", ZonedDateTime.parse(\"2021-06-01T11:43:20Z\")),\n+ entry(\"code\", \"100\")\n+ )\n+ ),\n+ entry(\"user_agent\", Map.of(\"original\", \"Mozilla\")),\n+ entry(\"http\", Map.of(\"request\", Map.of(\"referrer\", \"referrer\", \"method\", \"GET\")))\n+ )\n+ );\n+ }\n+\n+ // Date parsing tests\n+ public void testToTimestampWithUnixTimestamp() {\n+ CefParser parser = new CefParser(ZoneId.of(\"UTC\"), false);\n+ String unixTimestamp = \"1633072800000\"; // Example Unix timestamp in milliseconds\n+ ZonedDateTime expected = ZonedDateTime.ofInstant(Instant.ofEpochMilli(Long.parseLong(unixTimestamp)), ZoneId.of(\"UTC\"));\n+ ZonedDateTime result = parser.toTimestamp(unixTimestamp);\n+ assertEquals(expected, result);\n+ }\n+\n+ public void testToTimestampWithFormattedDate() {\n+ CefParser parser = new CefParser(ZoneId.of(\"Europe/Stockholm\"), false);\n+ String formattedDate = \"Oct 01 2021 12:00:00 UTC\"; // Example formatted date\n+ ZonedDateTime expected = ZonedDateTime.parse(\"2021-10-01T14:00+02:00[Europe/Stockholm]\");\n+ ZonedDateTime result = parser.toTimestamp(formattedDate);\n+ assertEquals(expected, result);\n+ }\n+\n+ public void testToTimestampWithFormattedDateWithoutYear() {\n+ CefParser parser = new CefParser(ZoneId.of(\"UTC\"), false);\n+ String formattedDate = \"Oct 01 12:00:00 UTC\"; // Example formatted date without year\n+ int currentYear = ZonedDateTime.now(ZoneId.of(\"UTC\")).getYear();\n+ ZonedDateTime expected = ZonedDateTime.parse(currentYear + \"-10-01T12:00:00Z[UTC]\");\n+ ZonedDateTime result = parser.toTimestamp(formattedDate);\n+ assertEquals(expected, result);\n+ }\n+\n+ public void testToTimestampWithFormattedDateWithoutTimezone() {\n+ CefParser parser = new CefParser(ZoneId.of(\"UTC\"), false);\n+ String formattedDate = \"Sep 07 2018 14:50:39\"; // Example formatted date without year\n+ ZonedDateTime expected = ZonedDateTime.parse(\"2018-09-07T14:50:39Z[UTC]\");\n+ ZonedDateTime result = parser.toTimestamp(formattedDate);\n+ assertEquals(expected, result);\n+ }\n+\n+ public void testToTimestampWithInvalidDate() {\n+ CefParser parser = new CefParser(ZoneId.of(\"UTC\"), false);\n+ String invalidDate = \"invalid date\";\n+ assertThrows(IllegalArgumentException.class, () -> parser.toTimestamp(invalidDate));\n+ }\n+\n+ public void testToMacAddressWithSeparators() {\n+ CefParser parser = new CefParser(ZoneId.of(\"UTC\"), false);\n+ List macAddresses = List.of(\n+ // EUI-48 (with separators).\n+ \"00:0D:60:AF:1B:61\",\n+ \"00-0D-60-AF-1B-61\",\n+ \"000D.60AF.1B61\",\n+\n+ // EUI-64 (with separators).\n+ \"00:0D:60:FF:FE:AF:1B:61\",\n+ \"00-0D-60-FF-FE-AF-1B-61\",\n+ \"000D.60FF.FEAF.1B61\"\n+ );\n+ macAddresses.forEach(macAddress -> {\n+ String result = parser.toMACAddress(macAddress);\n+ assertEquals(macAddress, result);\n+ });\n+ }\n+\n+ public void testEUI48ToMacAddressWithOutSeparators() {\n+ CefParser parser = new CefParser(ZoneId.of(\"UTC\"), false);\n+ String macAddress = \"000D60AF1B61\";\n+ String result = parser.toMACAddress(macAddress);\n+ assertEquals(\"00:0D:60:AF:1B:61\", result);\n+ }\n+\n+ public void testEUI64ToMacAddressWithOutSeparators() {\n+ CefParser parser = new CefParser(ZoneId.of(\"UTC\"), false);\n+ String macAddress = \"000D60FFFEAF1B61\";\n+ String result = parser.toMACAddress(macAddress);\n+ assertEquals(\"00:0D:60:FF:FE:AF:1B:61\", result);\n+ }\n+\n+ public void toIP_validIPv4Address() {\n+ CefParser parser = new CefParser(ZoneId.of(\"UTC\"), true);\n+ String result = parser.toIP(\"192.168.1.1\");\n+ assertEquals(\"192.168.1.1\", result);\n+ }\n+\n+ public void toIP_validIPv6Address() {\n+ CefParser parser = new CefParser(ZoneId.of(\"UTC\"), true);\n+ String result = parser.toIP(\"2001:0db8:85a3:0000:0000:8a2e:0370:7334\");\n+ assertEquals(\"2001:db8:85a3::8a2e:370:7334\", result);\n+ }\n+\n+ public void toIP_invalidIPAddress() {\n+ CefParser parser = new CefParser(ZoneId.of(\"UTC\"), true);\n+ IllegalArgumentException exception = assertThrows(IllegalArgumentException.class, () -> parser.toIP(\"invalid_ip\"));\n+ assertEquals(\"Invalid IP address format\", exception.getMessage());\n+ }\n+\n+ public void toIP_emptyString() {\n+ CefParser parser = new CefParser(ZoneId.of(\"UTC\"), true);\n+ IllegalArgumentException exception = assertThrows(IllegalArgumentException.class, () -> parser.toIP(\"\"));\n+ assertEquals(\"Invalid IP address format\", exception.getMessage());\n+ }\n+\n+ public void toIP_nullString() {", - "comment_created_at": "2025-04-04T07:02:32+00:00", - "comment_author": "bhapas", - "comment_body": "https://github.com/elastic/elasticsearch/pull/122491/commits/2b5bfd2ce93a8a8e1d043a6fd7f073585f70945b fixes this", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1192216195", - "pr_number": 95836, - "pr_file": "build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/transportversion/BumpTransportVersionTask.java", - "created_at": "2023-05-12T10:51:18+00:00", - "commented_code": "/*\n * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one\n * or more contributor license agreements. Licensed under the Elastic License\n * 2.0 and the Server Side Public License, v 1; you may not use this file except\n * in compliance with, at your election, the Elastic License 2.0 or the Server\n * Side Public License, v 1.\n */\n\npackage org.elasticsearch.gradle.internal.transportversion;\n\nimport org.gradle.api.DefaultTask;\nimport org.gradle.api.tasks.TaskAction;\nimport org.jetbrains.annotations.NotNull;\n\nimport java.io.IOException;\nimport java.nio.charset.StandardCharsets;\nimport java.nio.file.Files;\nimport java.nio.file.Path;\nimport java.util.UUID;\nimport java.util.regex.Matcher;\nimport java.util.regex.Pattern;\nimport java.util.stream.Collectors;\nimport java.util.stream.Stream;\n\npublic class BumpTransportVersionTask extends DefaultTask {", - "repo_full_name": "elastic/elasticsearch", - "discussion_comments": [ - { - "comment_id": "1192216195", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 95836, - "pr_file": "build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/transportversion/BumpTransportVersionTask.java", - "discussion_id": "1192216195", - "commented_code": "@@ -0,0 +1,85 @@\n+/*\n+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one\n+ * or more contributor license agreements. Licensed under the Elastic License\n+ * 2.0 and the Server Side Public License, v 1; you may not use this file except\n+ * in compliance with, at your election, the Elastic License 2.0 or the Server\n+ * Side Public License, v 1.\n+ */\n+\n+package org.elasticsearch.gradle.internal.transportversion;\n+\n+import org.gradle.api.DefaultTask;\n+import org.gradle.api.tasks.TaskAction;\n+import org.jetbrains.annotations.NotNull;\n+\n+import java.io.IOException;\n+import java.nio.charset.StandardCharsets;\n+import java.nio.file.Files;\n+import java.nio.file.Path;\n+import java.util.UUID;\n+import java.util.regex.Matcher;\n+import java.util.regex.Pattern;\n+import java.util.stream.Collectors;\n+import java.util.stream.Stream;\n+\n+public class BumpTransportVersionTask extends DefaultTask {", - "comment_created_at": "2023-05-12T10:51:18+00:00", - "comment_author": "thecoop", - "comment_body": "Maybe 'NewTransportVersionTask' to match with the rest?", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2180389078", - "pr_number": 130410, - "pr_file": "x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/action/LookupJoinTypesIT.java", - "created_at": "2025-07-02T15:40:51+00:00", - "commented_code": "return existing.stream().anyMatch(c -> c.exists(indexName));\n }\n\n public void testOutputSupportedTypes() throws Exception {\n Map, DataType> signatures = new LinkedHashMap<>();\n for (TestConfigs configs : testConfigurations.values()) {\n if (configs.group.equals(\"unsupported\") || configs.group.equals(\"union-types\")) {", - "repo_full_name": "elastic/elasticsearch", - "discussion_comments": [ - { - "comment_id": "2180389078", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 130410, - "pr_file": "x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/action/LookupJoinTypesIT.java", - "discussion_id": "2180389078", - "commented_code": "@@ -265,6 +268,21 @@ private static boolean existingIndex(Collection existing, DataType\n return existing.stream().anyMatch(c -> c.exists(indexName));\n }\n \n+ public void testOutputSupportedTypes() throws Exception {\n+ Map, DataType> signatures = new LinkedHashMap<>();\n+ for (TestConfigs configs : testConfigurations.values()) {\n+ if (configs.group.equals(\"unsupported\") || configs.group.equals(\"union-types\")) {", - "comment_created_at": "2025-07-02T15:40:51+00:00", - "comment_author": "alex-spies", - "comment_body": "maybe we should use static string constants rather than typing out `unsupported` and `union-types` verbatim.", - "pr_file_module": null - }, - { - "comment_id": "2180425537", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 130410, - "pr_file": "x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/action/LookupJoinTypesIT.java", - "discussion_id": "2180389078", - "commented_code": "@@ -265,6 +268,21 @@ private static boolean existingIndex(Collection existing, DataType\n return existing.stream().anyMatch(c -> c.exists(indexName));\n }\n \n+ public void testOutputSupportedTypes() throws Exception {\n+ Map, DataType> signatures = new LinkedHashMap<>();\n+ for (TestConfigs configs : testConfigurations.values()) {\n+ if (configs.group.equals(\"unsupported\") || configs.group.equals(\"union-types\")) {", - "comment_created_at": "2025-07-02T15:56:00+00:00", - "comment_author": "craigtaverner", - "comment_body": "Good idea, but perhaps out of scope, since this test uses string literals like this in many places and I think they could all be fixed together. Perhaps moved into an enum, and use a switch somewhere to assert none are left out?", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/elasticsearch-optimize-before-implementing.json b/_reviewers/elasticsearch-optimize-before-implementing.json new file mode 100644 index 0000000..151b34f --- /dev/null +++ b/_reviewers/elasticsearch-optimize-before-implementing.json @@ -0,0 +1,382 @@ +[ + { + "discussion_id": "2170953937", + "pr_number": 128917, + "pr_file": "x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/EsqlSession.java", + "created_at": "2025-06-27T06:59:47+00:00", + "commented_code": "values.add(List.of(\"coordinator\", \"optimizedPhysicalPlan\", physicalPlanString));\n var blocks = BlockUtils.fromList(PlannerUtils.NON_BREAKING_BLOCK_FACTORY, values);\n physicalPlan = new LocalSourceExec(Source.EMPTY, fields, LocalSupplier.of(blocks));\n planRunner.run(physicalPlan, listener);\n } else {\n // TODO: this could be snuck into the underlying listener\n EsqlCCSUtils.updateExecutionInfoAtEndOfPlanning(executionInfo);\n // execute any potential subplans\n executeSubPlans(optimizedPlan, planRunner, executionInfo, request, listener);\n }\n // TODO: this could be snuck into the underlying listener\n EsqlCCSUtils.updateExecutionInfoAtEndOfPlanning(executionInfo);\n // execute any potential subplans\n executeSubPlans(physicalPlan, planRunner, executionInfo, request, listener);\n }\n\n private record PlanTuple(PhysicalPlan physical, LogicalPlan logical) {}\n private record LogicalPlanTuple(LogicalPlan nonStubbedSubPlan, LogicalPlan originalSubPlan) {}\n\n private void executeSubPlans(\n PhysicalPlan physicalPlan,\n LogicalPlan optimizedPlan,\n PlanRunner runner,\n EsqlExecutionInfo executionInfo,\n EsqlQueryRequest request,\n ActionListener listener\n ) {\n List subplans = new ArrayList<>();\n\n // Currently the inlinestats are limited and supported as streaming operators, thus present inside the fragment as logical plans\n // Below they get collected, translated into a separate, coordinator based plan and the results 'broadcasted' as a local relation\n physicalPlan.forEachUp(FragmentExec.class, f -> {\n f.fragment().forEachUp(InlineJoin.class, ij -> {\n // extract the right side of the plan and replace its source\n LogicalPlan subplan = InlineJoin.replaceStub(ij.left(), ij.right());\n // mark the new root node as optimized\n subplan.setOptimized();\n PhysicalPlan subqueryPlan = logicalPlanToPhysicalPlan(subplan, request);\n subplans.add(new PlanTuple(subqueryPlan, ij.right()));\n });\n });\n\n Iterator iterator = subplans.iterator();\n var subPlan = firstSubPlan(optimizedPlan);\n\n // TODO: merge into one method\n if (subplans.size() > 0) {\n if (subPlan != null) {\n // code-path to execute subplans\n executeSubPlan(new DriverCompletionInfo.Accumulator(), physicalPlan, iterator, executionInfo, runner, listener);\n executeSubPlan(new DriverCompletionInfo.Accumulator(), optimizedPlan, subPlan, executionInfo, runner, request, listener);\n } else {\n PhysicalPlan physicalPlan = logicalPlanToPhysicalPlan(optimizedPlan, request);\n // execute main plan\n runner.run(physicalPlan, listener);\n }\n }\n\n private LogicalPlanTuple firstSubPlan(LogicalPlan optimizedPlan) {\n Holder subPlan = new Holder<>();\n // Collect the first inlinejoin (bottom up in the tree)\n optimizedPlan.forEachUp(InlineJoin.class, ij -> {\n // extract the right side of the plan and replace its source\n if (subPlan.get() == null && ij.right().anyMatch(p -> p instanceof StubRelation)) {\n var p = InlineJoin.replaceStub(ij.left(), ij.right());\n p.setOptimized();\n subPlan.set(new LogicalPlanTuple(p, ij.right()));\n }\n });\n return subPlan.get();\n }\n\n private void executeSubPlan(\n DriverCompletionInfo.Accumulator completionInfoAccumulator,\n PhysicalPlan plan,\n Iterator subPlanIterator,\n LogicalPlan optimizedPlan,\n LogicalPlanTuple subPlans,\n EsqlExecutionInfo executionInfo,\n PlanRunner runner,\n EsqlQueryRequest request,\n ActionListener listener\n ) {\n PlanTuple tuple = subPlanIterator.next();\n // Create a physical plan out of the logical sub-plan\n var physicalSubPlan = logicalPlanToPhysicalPlan(subPlans.nonStubbedSubPlan, request);\n\n runner.run(tuple.physical, listener.delegateFailureAndWrap((next, result) -> {\n runner.run(physicalSubPlan, listener.delegateFailureAndWrap((next, result) -> {\n try {\n // Translate the subquery into a separate, coordinator based plan and the results 'broadcasted' as a local relation\n completionInfoAccumulator.accumulate(result.completionInfo());\n LocalRelation resultWrapper = resultToPlan(tuple.logical, result);\n LocalRelation resultWrapper = resultToPlan(subPlans.nonStubbedSubPlan, result);\n\n // replace the original logical plan with the backing result\n PhysicalPlan newPlan = plan.transformUp(FragmentExec.class, f -> {\n LogicalPlan frag = f.fragment();\n return f.withFragment(\n frag.transformUp(\n InlineJoin.class,\n ij -> ij.right() == tuple.logical ? InlineJoin.inlineData(ij, resultWrapper) : ij\n )\n );\n });\n LogicalPlan newLogicalPlan = optimizedPlan.transformUp(\n InlineJoin.class,\n ij -> ij.right() == subPlans.originalSubPlan ? InlineJoin.inlineData(ij, resultWrapper) : ij\n );\n newLogicalPlan.setOptimized();", + "repo_full_name": "elastic/elasticsearch", + "discussion_comments": [ + { + "comment_id": "2170953937", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 128917, + "pr_file": "x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/EsqlSession.java", + "discussion_id": "2170953937", + "commented_code": "@@ -231,85 +231,88 @@ public void executeOptimizedPlan(\n values.add(List.of(\"coordinator\", \"optimizedPhysicalPlan\", physicalPlanString));\n var blocks = BlockUtils.fromList(PlannerUtils.NON_BREAKING_BLOCK_FACTORY, values);\n physicalPlan = new LocalSourceExec(Source.EMPTY, fields, LocalSupplier.of(blocks));\n+ planRunner.run(physicalPlan, listener);\n+ } else {\n+ // TODO: this could be snuck into the underlying listener\n+ EsqlCCSUtils.updateExecutionInfoAtEndOfPlanning(executionInfo);\n+ // execute any potential subplans\n+ executeSubPlans(optimizedPlan, planRunner, executionInfo, request, listener);\n }\n- // TODO: this could be snuck into the underlying listener\n- EsqlCCSUtils.updateExecutionInfoAtEndOfPlanning(executionInfo);\n- // execute any potential subplans\n- executeSubPlans(physicalPlan, planRunner, executionInfo, request, listener);\n }\n \n- private record PlanTuple(PhysicalPlan physical, LogicalPlan logical) {}\n+ private record LogicalPlanTuple(LogicalPlan nonStubbedSubPlan, LogicalPlan originalSubPlan) {}\n \n private void executeSubPlans(\n- PhysicalPlan physicalPlan,\n+ LogicalPlan optimizedPlan,\n PlanRunner runner,\n EsqlExecutionInfo executionInfo,\n EsqlQueryRequest request,\n ActionListener listener\n ) {\n- List subplans = new ArrayList<>();\n-\n- // Currently the inlinestats are limited and supported as streaming operators, thus present inside the fragment as logical plans\n- // Below they get collected, translated into a separate, coordinator based plan and the results 'broadcasted' as a local relation\n- physicalPlan.forEachUp(FragmentExec.class, f -> {\n- f.fragment().forEachUp(InlineJoin.class, ij -> {\n- // extract the right side of the plan and replace its source\n- LogicalPlan subplan = InlineJoin.replaceStub(ij.left(), ij.right());\n- // mark the new root node as optimized\n- subplan.setOptimized();\n- PhysicalPlan subqueryPlan = logicalPlanToPhysicalPlan(subplan, request);\n- subplans.add(new PlanTuple(subqueryPlan, ij.right()));\n- });\n- });\n-\n- Iterator iterator = subplans.iterator();\n+ var subPlan = firstSubPlan(optimizedPlan);\n \n // TODO: merge into one method\n- if (subplans.size() > 0) {\n+ if (subPlan != null) {\n // code-path to execute subplans\n- executeSubPlan(new DriverCompletionInfo.Accumulator(), physicalPlan, iterator, executionInfo, runner, listener);\n+ executeSubPlan(new DriverCompletionInfo.Accumulator(), optimizedPlan, subPlan, executionInfo, runner, request, listener);\n } else {\n+ PhysicalPlan physicalPlan = logicalPlanToPhysicalPlan(optimizedPlan, request);\n // execute main plan\n runner.run(physicalPlan, listener);\n }\n }\n \n+ private LogicalPlanTuple firstSubPlan(LogicalPlan optimizedPlan) {\n+ Holder subPlan = new Holder<>();\n+ // Collect the first inlinejoin (bottom up in the tree)\n+ optimizedPlan.forEachUp(InlineJoin.class, ij -> {\n+ // extract the right side of the plan and replace its source\n+ if (subPlan.get() == null && ij.right().anyMatch(p -> p instanceof StubRelation)) {\n+ var p = InlineJoin.replaceStub(ij.left(), ij.right());\n+ p.setOptimized();\n+ subPlan.set(new LogicalPlanTuple(p, ij.right()));\n+ }\n+ });\n+ return subPlan.get();\n+ }\n+\n private void executeSubPlan(\n DriverCompletionInfo.Accumulator completionInfoAccumulator,\n- PhysicalPlan plan,\n- Iterator subPlanIterator,\n+ LogicalPlan optimizedPlan,\n+ LogicalPlanTuple subPlans,\n EsqlExecutionInfo executionInfo,\n PlanRunner runner,\n+ EsqlQueryRequest request,\n ActionListener listener\n ) {\n- PlanTuple tuple = subPlanIterator.next();\n+ // Create a physical plan out of the logical sub-plan\n+ var physicalSubPlan = logicalPlanToPhysicalPlan(subPlans.nonStubbedSubPlan, request);\n \n- runner.run(tuple.physical, listener.delegateFailureAndWrap((next, result) -> {\n+ runner.run(physicalSubPlan, listener.delegateFailureAndWrap((next, result) -> {\n try {\n+ // Translate the subquery into a separate, coordinator based plan and the results 'broadcasted' as a local relation\n completionInfoAccumulator.accumulate(result.completionInfo());\n- LocalRelation resultWrapper = resultToPlan(tuple.logical, result);\n+ LocalRelation resultWrapper = resultToPlan(subPlans.nonStubbedSubPlan, result);\n \n // replace the original logical plan with the backing result\n- PhysicalPlan newPlan = plan.transformUp(FragmentExec.class, f -> {\n- LogicalPlan frag = f.fragment();\n- return f.withFragment(\n- frag.transformUp(\n- InlineJoin.class,\n- ij -> ij.right() == tuple.logical ? InlineJoin.inlineData(ij, resultWrapper) : ij\n- )\n- );\n- });\n+ LogicalPlan newLogicalPlan = optimizedPlan.transformUp(\n+ InlineJoin.class,\n+ ij -> ij.right() == subPlans.originalSubPlan ? InlineJoin.inlineData(ij, resultWrapper) : ij\n+ );\n+ newLogicalPlan.setOptimized();", + "comment_created_at": "2025-06-27T06:59:47+00:00", + "comment_author": "alex-spies", + "comment_body": "This is surprising. Shouldn't we re-optimize the plan now that we were able to replace the stub with an actual result?\n\nAfter the stub was replaced, we can actually do more stuff, like push down limits (which before that would be wrong as it would have affected the stats).\n\nIf I understand correctly, this is something we might want to improve later, right? If so, let's leave a comment; maybe a `TODO` to make the intention clear.", + "pr_file_module": null + }, + { + "comment_id": "2170963284", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 128917, + "pr_file": "x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/EsqlSession.java", + "discussion_id": "2170953937", + "commented_code": "@@ -231,85 +231,88 @@ public void executeOptimizedPlan(\n values.add(List.of(\"coordinator\", \"optimizedPhysicalPlan\", physicalPlanString));\n var blocks = BlockUtils.fromList(PlannerUtils.NON_BREAKING_BLOCK_FACTORY, values);\n physicalPlan = new LocalSourceExec(Source.EMPTY, fields, LocalSupplier.of(blocks));\n+ planRunner.run(physicalPlan, listener);\n+ } else {\n+ // TODO: this could be snuck into the underlying listener\n+ EsqlCCSUtils.updateExecutionInfoAtEndOfPlanning(executionInfo);\n+ // execute any potential subplans\n+ executeSubPlans(optimizedPlan, planRunner, executionInfo, request, listener);\n }\n- // TODO: this could be snuck into the underlying listener\n- EsqlCCSUtils.updateExecutionInfoAtEndOfPlanning(executionInfo);\n- // execute any potential subplans\n- executeSubPlans(physicalPlan, planRunner, executionInfo, request, listener);\n }\n \n- private record PlanTuple(PhysicalPlan physical, LogicalPlan logical) {}\n+ private record LogicalPlanTuple(LogicalPlan nonStubbedSubPlan, LogicalPlan originalSubPlan) {}\n \n private void executeSubPlans(\n- PhysicalPlan physicalPlan,\n+ LogicalPlan optimizedPlan,\n PlanRunner runner,\n EsqlExecutionInfo executionInfo,\n EsqlQueryRequest request,\n ActionListener listener\n ) {\n- List subplans = new ArrayList<>();\n-\n- // Currently the inlinestats are limited and supported as streaming operators, thus present inside the fragment as logical plans\n- // Below they get collected, translated into a separate, coordinator based plan and the results 'broadcasted' as a local relation\n- physicalPlan.forEachUp(FragmentExec.class, f -> {\n- f.fragment().forEachUp(InlineJoin.class, ij -> {\n- // extract the right side of the plan and replace its source\n- LogicalPlan subplan = InlineJoin.replaceStub(ij.left(), ij.right());\n- // mark the new root node as optimized\n- subplan.setOptimized();\n- PhysicalPlan subqueryPlan = logicalPlanToPhysicalPlan(subplan, request);\n- subplans.add(new PlanTuple(subqueryPlan, ij.right()));\n- });\n- });\n-\n- Iterator iterator = subplans.iterator();\n+ var subPlan = firstSubPlan(optimizedPlan);\n \n // TODO: merge into one method\n- if (subplans.size() > 0) {\n+ if (subPlan != null) {\n // code-path to execute subplans\n- executeSubPlan(new DriverCompletionInfo.Accumulator(), physicalPlan, iterator, executionInfo, runner, listener);\n+ executeSubPlan(new DriverCompletionInfo.Accumulator(), optimizedPlan, subPlan, executionInfo, runner, request, listener);\n } else {\n+ PhysicalPlan physicalPlan = logicalPlanToPhysicalPlan(optimizedPlan, request);\n // execute main plan\n runner.run(physicalPlan, listener);\n }\n }\n \n+ private LogicalPlanTuple firstSubPlan(LogicalPlan optimizedPlan) {\n+ Holder subPlan = new Holder<>();\n+ // Collect the first inlinejoin (bottom up in the tree)\n+ optimizedPlan.forEachUp(InlineJoin.class, ij -> {\n+ // extract the right side of the plan and replace its source\n+ if (subPlan.get() == null && ij.right().anyMatch(p -> p instanceof StubRelation)) {\n+ var p = InlineJoin.replaceStub(ij.left(), ij.right());\n+ p.setOptimized();\n+ subPlan.set(new LogicalPlanTuple(p, ij.right()));\n+ }\n+ });\n+ return subPlan.get();\n+ }\n+\n private void executeSubPlan(\n DriverCompletionInfo.Accumulator completionInfoAccumulator,\n- PhysicalPlan plan,\n- Iterator subPlanIterator,\n+ LogicalPlan optimizedPlan,\n+ LogicalPlanTuple subPlans,\n EsqlExecutionInfo executionInfo,\n PlanRunner runner,\n+ EsqlQueryRequest request,\n ActionListener listener\n ) {\n- PlanTuple tuple = subPlanIterator.next();\n+ // Create a physical plan out of the logical sub-plan\n+ var physicalSubPlan = logicalPlanToPhysicalPlan(subPlans.nonStubbedSubPlan, request);\n \n- runner.run(tuple.physical, listener.delegateFailureAndWrap((next, result) -> {\n+ runner.run(physicalSubPlan, listener.delegateFailureAndWrap((next, result) -> {\n try {\n+ // Translate the subquery into a separate, coordinator based plan and the results 'broadcasted' as a local relation\n completionInfoAccumulator.accumulate(result.completionInfo());\n- LocalRelation resultWrapper = resultToPlan(tuple.logical, result);\n+ LocalRelation resultWrapper = resultToPlan(subPlans.nonStubbedSubPlan, result);\n \n // replace the original logical plan with the backing result\n- PhysicalPlan newPlan = plan.transformUp(FragmentExec.class, f -> {\n- LogicalPlan frag = f.fragment();\n- return f.withFragment(\n- frag.transformUp(\n- InlineJoin.class,\n- ij -> ij.right() == tuple.logical ? InlineJoin.inlineData(ij, resultWrapper) : ij\n- )\n- );\n- });\n+ LogicalPlan newLogicalPlan = optimizedPlan.transformUp(\n+ InlineJoin.class,\n+ ij -> ij.right() == subPlans.originalSubPlan ? InlineJoin.inlineData(ij, resultWrapper) : ij\n+ );\n+ newLogicalPlan.setOptimized();", + "comment_created_at": "2025-06-27T07:04:15+00:00", + "comment_author": "astefan", + "comment_body": "Yeah, a TODO is right.\r\n\r\nI had my doubts with this thing. I feel like there are things here that were left hanging (things do still seem they can be optimized further), but at this stage of the `inlinestats` progress, I think it's worth ignoring it. But TODO is needed, because it's something we need to think about a little.", + "pr_file_module": null + }, + { + "comment_id": "2171129944", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 128917, + "pr_file": "x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/EsqlSession.java", + "discussion_id": "2170953937", + "commented_code": "@@ -231,85 +231,88 @@ public void executeOptimizedPlan(\n values.add(List.of(\"coordinator\", \"optimizedPhysicalPlan\", physicalPlanString));\n var blocks = BlockUtils.fromList(PlannerUtils.NON_BREAKING_BLOCK_FACTORY, values);\n physicalPlan = new LocalSourceExec(Source.EMPTY, fields, LocalSupplier.of(blocks));\n+ planRunner.run(physicalPlan, listener);\n+ } else {\n+ // TODO: this could be snuck into the underlying listener\n+ EsqlCCSUtils.updateExecutionInfoAtEndOfPlanning(executionInfo);\n+ // execute any potential subplans\n+ executeSubPlans(optimizedPlan, planRunner, executionInfo, request, listener);\n }\n- // TODO: this could be snuck into the underlying listener\n- EsqlCCSUtils.updateExecutionInfoAtEndOfPlanning(executionInfo);\n- // execute any potential subplans\n- executeSubPlans(physicalPlan, planRunner, executionInfo, request, listener);\n }\n \n- private record PlanTuple(PhysicalPlan physical, LogicalPlan logical) {}\n+ private record LogicalPlanTuple(LogicalPlan nonStubbedSubPlan, LogicalPlan originalSubPlan) {}\n \n private void executeSubPlans(\n- PhysicalPlan physicalPlan,\n+ LogicalPlan optimizedPlan,\n PlanRunner runner,\n EsqlExecutionInfo executionInfo,\n EsqlQueryRequest request,\n ActionListener listener\n ) {\n- List subplans = new ArrayList<>();\n-\n- // Currently the inlinestats are limited and supported as streaming operators, thus present inside the fragment as logical plans\n- // Below they get collected, translated into a separate, coordinator based plan and the results 'broadcasted' as a local relation\n- physicalPlan.forEachUp(FragmentExec.class, f -> {\n- f.fragment().forEachUp(InlineJoin.class, ij -> {\n- // extract the right side of the plan and replace its source\n- LogicalPlan subplan = InlineJoin.replaceStub(ij.left(), ij.right());\n- // mark the new root node as optimized\n- subplan.setOptimized();\n- PhysicalPlan subqueryPlan = logicalPlanToPhysicalPlan(subplan, request);\n- subplans.add(new PlanTuple(subqueryPlan, ij.right()));\n- });\n- });\n-\n- Iterator iterator = subplans.iterator();\n+ var subPlan = firstSubPlan(optimizedPlan);\n \n // TODO: merge into one method\n- if (subplans.size() > 0) {\n+ if (subPlan != null) {\n // code-path to execute subplans\n- executeSubPlan(new DriverCompletionInfo.Accumulator(), physicalPlan, iterator, executionInfo, runner, listener);\n+ executeSubPlan(new DriverCompletionInfo.Accumulator(), optimizedPlan, subPlan, executionInfo, runner, request, listener);\n } else {\n+ PhysicalPlan physicalPlan = logicalPlanToPhysicalPlan(optimizedPlan, request);\n // execute main plan\n runner.run(physicalPlan, listener);\n }\n }\n \n+ private LogicalPlanTuple firstSubPlan(LogicalPlan optimizedPlan) {\n+ Holder subPlan = new Holder<>();\n+ // Collect the first inlinejoin (bottom up in the tree)\n+ optimizedPlan.forEachUp(InlineJoin.class, ij -> {\n+ // extract the right side of the plan and replace its source\n+ if (subPlan.get() == null && ij.right().anyMatch(p -> p instanceof StubRelation)) {\n+ var p = InlineJoin.replaceStub(ij.left(), ij.right());\n+ p.setOptimized();\n+ subPlan.set(new LogicalPlanTuple(p, ij.right()));\n+ }\n+ });\n+ return subPlan.get();\n+ }\n+\n private void executeSubPlan(\n DriverCompletionInfo.Accumulator completionInfoAccumulator,\n- PhysicalPlan plan,\n- Iterator subPlanIterator,\n+ LogicalPlan optimizedPlan,\n+ LogicalPlanTuple subPlans,\n EsqlExecutionInfo executionInfo,\n PlanRunner runner,\n+ EsqlQueryRequest request,\n ActionListener listener\n ) {\n- PlanTuple tuple = subPlanIterator.next();\n+ // Create a physical plan out of the logical sub-plan\n+ var physicalSubPlan = logicalPlanToPhysicalPlan(subPlans.nonStubbedSubPlan, request);\n \n- runner.run(tuple.physical, listener.delegateFailureAndWrap((next, result) -> {\n+ runner.run(physicalSubPlan, listener.delegateFailureAndWrap((next, result) -> {\n try {\n+ // Translate the subquery into a separate, coordinator based plan and the results 'broadcasted' as a local relation\n completionInfoAccumulator.accumulate(result.completionInfo());\n- LocalRelation resultWrapper = resultToPlan(tuple.logical, result);\n+ LocalRelation resultWrapper = resultToPlan(subPlans.nonStubbedSubPlan, result);\n \n // replace the original logical plan with the backing result\n- PhysicalPlan newPlan = plan.transformUp(FragmentExec.class, f -> {\n- LogicalPlan frag = f.fragment();\n- return f.withFragment(\n- frag.transformUp(\n- InlineJoin.class,\n- ij -> ij.right() == tuple.logical ? InlineJoin.inlineData(ij, resultWrapper) : ij\n- )\n- );\n- });\n+ LogicalPlan newLogicalPlan = optimizedPlan.transformUp(\n+ InlineJoin.class,\n+ ij -> ij.right() == subPlans.originalSubPlan ? InlineJoin.inlineData(ij, resultWrapper) : ij\n+ );\n+ newLogicalPlan.setOptimized();", + "comment_created_at": "2025-06-27T08:02:42+00:00", + "comment_author": "bpintea", + "comment_body": "Hmm, I thought this _was_ sufficient. The approach basically gradually executes righthand sides of (inline) joins, always resulting in a `LocalRelation`. Ending up with a join with whatever was before on the left and this local relation on the right. Not sure if we can further optimise this (and haven't done it before - the LIMIT makes it past the InlineJoin into the lefthand side already, since InlineJoin preserves the rows count).\r\nBut a TODO can re-eval things later, 👍 .", + "pr_file_module": null + }, + { + "comment_id": "2174975919", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 128917, + "pr_file": "x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/EsqlSession.java", + "discussion_id": "2170953937", + "commented_code": "@@ -231,85 +231,88 @@ public void executeOptimizedPlan(\n values.add(List.of(\"coordinator\", \"optimizedPhysicalPlan\", physicalPlanString));\n var blocks = BlockUtils.fromList(PlannerUtils.NON_BREAKING_BLOCK_FACTORY, values);\n physicalPlan = new LocalSourceExec(Source.EMPTY, fields, LocalSupplier.of(blocks));\n+ planRunner.run(physicalPlan, listener);\n+ } else {\n+ // TODO: this could be snuck into the underlying listener\n+ EsqlCCSUtils.updateExecutionInfoAtEndOfPlanning(executionInfo);\n+ // execute any potential subplans\n+ executeSubPlans(optimizedPlan, planRunner, executionInfo, request, listener);\n }\n- // TODO: this could be snuck into the underlying listener\n- EsqlCCSUtils.updateExecutionInfoAtEndOfPlanning(executionInfo);\n- // execute any potential subplans\n- executeSubPlans(physicalPlan, planRunner, executionInfo, request, listener);\n }\n \n- private record PlanTuple(PhysicalPlan physical, LogicalPlan logical) {}\n+ private record LogicalPlanTuple(LogicalPlan nonStubbedSubPlan, LogicalPlan originalSubPlan) {}\n \n private void executeSubPlans(\n- PhysicalPlan physicalPlan,\n+ LogicalPlan optimizedPlan,\n PlanRunner runner,\n EsqlExecutionInfo executionInfo,\n EsqlQueryRequest request,\n ActionListener listener\n ) {\n- List subplans = new ArrayList<>();\n-\n- // Currently the inlinestats are limited and supported as streaming operators, thus present inside the fragment as logical plans\n- // Below they get collected, translated into a separate, coordinator based plan and the results 'broadcasted' as a local relation\n- physicalPlan.forEachUp(FragmentExec.class, f -> {\n- f.fragment().forEachUp(InlineJoin.class, ij -> {\n- // extract the right side of the plan and replace its source\n- LogicalPlan subplan = InlineJoin.replaceStub(ij.left(), ij.right());\n- // mark the new root node as optimized\n- subplan.setOptimized();\n- PhysicalPlan subqueryPlan = logicalPlanToPhysicalPlan(subplan, request);\n- subplans.add(new PlanTuple(subqueryPlan, ij.right()));\n- });\n- });\n-\n- Iterator iterator = subplans.iterator();\n+ var subPlan = firstSubPlan(optimizedPlan);\n \n // TODO: merge into one method\n- if (subplans.size() > 0) {\n+ if (subPlan != null) {\n // code-path to execute subplans\n- executeSubPlan(new DriverCompletionInfo.Accumulator(), physicalPlan, iterator, executionInfo, runner, listener);\n+ executeSubPlan(new DriverCompletionInfo.Accumulator(), optimizedPlan, subPlan, executionInfo, runner, request, listener);\n } else {\n+ PhysicalPlan physicalPlan = logicalPlanToPhysicalPlan(optimizedPlan, request);\n // execute main plan\n runner.run(physicalPlan, listener);\n }\n }\n \n+ private LogicalPlanTuple firstSubPlan(LogicalPlan optimizedPlan) {\n+ Holder subPlan = new Holder<>();\n+ // Collect the first inlinejoin (bottom up in the tree)\n+ optimizedPlan.forEachUp(InlineJoin.class, ij -> {\n+ // extract the right side of the plan and replace its source\n+ if (subPlan.get() == null && ij.right().anyMatch(p -> p instanceof StubRelation)) {\n+ var p = InlineJoin.replaceStub(ij.left(), ij.right());\n+ p.setOptimized();\n+ subPlan.set(new LogicalPlanTuple(p, ij.right()));\n+ }\n+ });\n+ return subPlan.get();\n+ }\n+\n private void executeSubPlan(\n DriverCompletionInfo.Accumulator completionInfoAccumulator,\n- PhysicalPlan plan,\n- Iterator subPlanIterator,\n+ LogicalPlan optimizedPlan,\n+ LogicalPlanTuple subPlans,\n EsqlExecutionInfo executionInfo,\n PlanRunner runner,\n+ EsqlQueryRequest request,\n ActionListener listener\n ) {\n- PlanTuple tuple = subPlanIterator.next();\n+ // Create a physical plan out of the logical sub-plan\n+ var physicalSubPlan = logicalPlanToPhysicalPlan(subPlans.nonStubbedSubPlan, request);\n \n- runner.run(tuple.physical, listener.delegateFailureAndWrap((next, result) -> {\n+ runner.run(physicalSubPlan, listener.delegateFailureAndWrap((next, result) -> {\n try {\n+ // Translate the subquery into a separate, coordinator based plan and the results 'broadcasted' as a local relation\n completionInfoAccumulator.accumulate(result.completionInfo());\n- LocalRelation resultWrapper = resultToPlan(tuple.logical, result);\n+ LocalRelation resultWrapper = resultToPlan(subPlans.nonStubbedSubPlan, result);\n \n // replace the original logical plan with the backing result\n- PhysicalPlan newPlan = plan.transformUp(FragmentExec.class, f -> {\n- LogicalPlan frag = f.fragment();\n- return f.withFragment(\n- frag.transformUp(\n- InlineJoin.class,\n- ij -> ij.right() == tuple.logical ? InlineJoin.inlineData(ij, resultWrapper) : ij\n- )\n- );\n- });\n+ LogicalPlan newLogicalPlan = optimizedPlan.transformUp(\n+ InlineJoin.class,\n+ ij -> ij.right() == subPlans.originalSubPlan ? InlineJoin.inlineData(ij, resultWrapper) : ij\n+ );\n+ newLogicalPlan.setOptimized();", + "comment_created_at": "2025-06-30T12:38:14+00:00", + "comment_author": "alex-spies", + "comment_body": "Oh, we can further optimize it alright :)\r\n\r\nThe limit being pushed/copied down into the left hand side is a bug - that should only happen in subsequent passes. See my comment [here](https://github.com/elastic/elasticsearch/pull/128917#discussion_r2174955992).\r\n\r\nAlso, if the `INLINESTATS` has no `BY` clause, it can be turned into an `EVAL` with literals in subsequent phases, which can trigger more optimizations (like constant folding, filter pushdown, optimizing away checks against constant keyword fields). Some of this could probably be somehow hacked into the first optimization pass, but currently I think it's more natural (or at least easier) to just have another optimizer pass per query phase.", + "pr_file_module": null + }, + { + "comment_id": "2175521950", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 128917, + "pr_file": "x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/EsqlSession.java", + "discussion_id": "2170953937", + "commented_code": "@@ -231,85 +231,88 @@ public void executeOptimizedPlan(\n values.add(List.of(\"coordinator\", \"optimizedPhysicalPlan\", physicalPlanString));\n var blocks = BlockUtils.fromList(PlannerUtils.NON_BREAKING_BLOCK_FACTORY, values);\n physicalPlan = new LocalSourceExec(Source.EMPTY, fields, LocalSupplier.of(blocks));\n+ planRunner.run(physicalPlan, listener);\n+ } else {\n+ // TODO: this could be snuck into the underlying listener\n+ EsqlCCSUtils.updateExecutionInfoAtEndOfPlanning(executionInfo);\n+ // execute any potential subplans\n+ executeSubPlans(optimizedPlan, planRunner, executionInfo, request, listener);\n }\n- // TODO: this could be snuck into the underlying listener\n- EsqlCCSUtils.updateExecutionInfoAtEndOfPlanning(executionInfo);\n- // execute any potential subplans\n- executeSubPlans(physicalPlan, planRunner, executionInfo, request, listener);\n }\n \n- private record PlanTuple(PhysicalPlan physical, LogicalPlan logical) {}\n+ private record LogicalPlanTuple(LogicalPlan nonStubbedSubPlan, LogicalPlan originalSubPlan) {}\n \n private void executeSubPlans(\n- PhysicalPlan physicalPlan,\n+ LogicalPlan optimizedPlan,\n PlanRunner runner,\n EsqlExecutionInfo executionInfo,\n EsqlQueryRequest request,\n ActionListener listener\n ) {\n- List subplans = new ArrayList<>();\n-\n- // Currently the inlinestats are limited and supported as streaming operators, thus present inside the fragment as logical plans\n- // Below they get collected, translated into a separate, coordinator based plan and the results 'broadcasted' as a local relation\n- physicalPlan.forEachUp(FragmentExec.class, f -> {\n- f.fragment().forEachUp(InlineJoin.class, ij -> {\n- // extract the right side of the plan and replace its source\n- LogicalPlan subplan = InlineJoin.replaceStub(ij.left(), ij.right());\n- // mark the new root node as optimized\n- subplan.setOptimized();\n- PhysicalPlan subqueryPlan = logicalPlanToPhysicalPlan(subplan, request);\n- subplans.add(new PlanTuple(subqueryPlan, ij.right()));\n- });\n- });\n-\n- Iterator iterator = subplans.iterator();\n+ var subPlan = firstSubPlan(optimizedPlan);\n \n // TODO: merge into one method\n- if (subplans.size() > 0) {\n+ if (subPlan != null) {\n // code-path to execute subplans\n- executeSubPlan(new DriverCompletionInfo.Accumulator(), physicalPlan, iterator, executionInfo, runner, listener);\n+ executeSubPlan(new DriverCompletionInfo.Accumulator(), optimizedPlan, subPlan, executionInfo, runner, request, listener);\n } else {\n+ PhysicalPlan physicalPlan = logicalPlanToPhysicalPlan(optimizedPlan, request);\n // execute main plan\n runner.run(physicalPlan, listener);\n }\n }\n \n+ private LogicalPlanTuple firstSubPlan(LogicalPlan optimizedPlan) {\n+ Holder subPlan = new Holder<>();\n+ // Collect the first inlinejoin (bottom up in the tree)\n+ optimizedPlan.forEachUp(InlineJoin.class, ij -> {\n+ // extract the right side of the plan and replace its source\n+ if (subPlan.get() == null && ij.right().anyMatch(p -> p instanceof StubRelation)) {\n+ var p = InlineJoin.replaceStub(ij.left(), ij.right());\n+ p.setOptimized();\n+ subPlan.set(new LogicalPlanTuple(p, ij.right()));\n+ }\n+ });\n+ return subPlan.get();\n+ }\n+\n private void executeSubPlan(\n DriverCompletionInfo.Accumulator completionInfoAccumulator,\n- PhysicalPlan plan,\n- Iterator subPlanIterator,\n+ LogicalPlan optimizedPlan,\n+ LogicalPlanTuple subPlans,\n EsqlExecutionInfo executionInfo,\n PlanRunner runner,\n+ EsqlQueryRequest request,\n ActionListener listener\n ) {\n- PlanTuple tuple = subPlanIterator.next();\n+ // Create a physical plan out of the logical sub-plan\n+ var physicalSubPlan = logicalPlanToPhysicalPlan(subPlans.nonStubbedSubPlan, request);\n \n- runner.run(tuple.physical, listener.delegateFailureAndWrap((next, result) -> {\n+ runner.run(physicalSubPlan, listener.delegateFailureAndWrap((next, result) -> {\n try {\n+ // Translate the subquery into a separate, coordinator based plan and the results 'broadcasted' as a local relation\n completionInfoAccumulator.accumulate(result.completionInfo());\n- LocalRelation resultWrapper = resultToPlan(tuple.logical, result);\n+ LocalRelation resultWrapper = resultToPlan(subPlans.nonStubbedSubPlan, result);\n \n // replace the original logical plan with the backing result\n- PhysicalPlan newPlan = plan.transformUp(FragmentExec.class, f -> {\n- LogicalPlan frag = f.fragment();\n- return f.withFragment(\n- frag.transformUp(\n- InlineJoin.class,\n- ij -> ij.right() == tuple.logical ? InlineJoin.inlineData(ij, resultWrapper) : ij\n- )\n- );\n- });\n+ LogicalPlan newLogicalPlan = optimizedPlan.transformUp(\n+ InlineJoin.class,\n+ ij -> ij.right() == subPlans.originalSubPlan ? InlineJoin.inlineData(ij, resultWrapper) : ij\n+ );\n+ newLogicalPlan.setOptimized();", + "comment_created_at": "2025-06-30T16:57:00+00:00", + "comment_author": "bpintea", + "comment_body": "> The limit being pushed/copied down into the left hand side is a bug - that should only happen in subsequent passes.\r\n\r\nActually, very true!\r\n\r\n> Also, if the INLINESTATS has no BY clause, it can be turned into an EVAL with literals in subsequent phases\r\n\r\nI see. Yes, this could be done, but not with the current shape of the planning (i.e. rerunning the optimiser as it is now won't replan). But yes, you're right, this could still be evolved.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2175016623", + "pr_number": 127223, + "pr_file": "server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java", + "created_at": "2025-06-30T12:59:04+00:00", + "commented_code": "return knnQuery;\n }\n\n private Query maybeWrapPatience(Query knnQuery) {\n Query finalQuery = knnQuery;\n if (knnQuery instanceof KnnByteVectorQuery knnByteVectorQuery) {\n finalQuery = maybeWrapPatienceByte(knnByteVectorQuery, Math.max(7, (int) (knnByteVectorQuery.getK() * 0.3)));\n } else if (knnQuery instanceof KnnFloatVectorQuery knnFloatVectorQuery) {\n finalQuery = maybeWrapPatienceFloat(knnFloatVectorQuery, Math.max(7, (int) (knnFloatVectorQuery.getK() * 0.3)));\n }\n return finalQuery;\n }\n\n private Query maybeWrapPatienceByte(KnnByteVectorQuery knnQuery, int patience) {\n Query returnedQuery = knnQuery;\n if (indexOptions instanceof HnswIndexOptions) {\n returnedQuery = PatienceKnnVectorQuery.fromByteQuery(knnQuery, 0.995, patience);\n } else if (indexOptions instanceof Int8HnswIndexOptions) {\n returnedQuery = PatienceKnnVectorQuery.fromByteQuery(knnQuery, 0.995, patience);\n } else if (indexOptions instanceof Int4HnswIndexOptions) {\n returnedQuery = PatienceKnnVectorQuery.fromByteQuery(knnQuery, 0.995, patience);\n } else if (indexOptions instanceof BBQHnswIndexOptions) {\n returnedQuery = PatienceKnnVectorQuery.fromByteQuery(knnQuery, 0.995, patience);\n }\n return returnedQuery;\n }\n\n private Query maybeWrapPatienceFloat(KnnFloatVectorQuery knnQuery, int patience) {\n Query returnedQuery = knnQuery;\n if (indexOptions instanceof HnswIndexOptions hnswIndexOptions) {\n returnedQuery = PatienceKnnVectorQuery.fromFloatQuery(knnQuery, 0.995, patience);\n } else if (indexOptions instanceof Int8HnswIndexOptions hnswIndexOptions) {\n returnedQuery = PatienceKnnVectorQuery.fromFloatQuery(knnQuery, 0.995, patience);\n } else if (indexOptions instanceof Int4HnswIndexOptions hnswIndexOptions) {\n returnedQuery = PatienceKnnVectorQuery.fromFloatQuery(knnQuery, 0.995, patience);\n } else if (indexOptions instanceof BBQHnswIndexOptions hnswIndexOptions) {\n returnedQuery = PatienceKnnVectorQuery.fromFloatQuery(knnQuery, 0.995, patience);\n }\n return returnedQuery;", + "repo_full_name": "elastic/elasticsearch", + "discussion_comments": [ + { + "comment_id": "2175016623", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 127223, + "pr_file": "server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java", + "discussion_id": "2175016623", + "commented_code": "@@ -2526,6 +2561,44 @@ private Query createKnnByteQuery(\n return knnQuery;\n }\n \n+ private Query maybeWrapPatience(Query knnQuery) {\n+ Query finalQuery = knnQuery;\n+ if (knnQuery instanceof KnnByteVectorQuery knnByteVectorQuery) {\n+ finalQuery = maybeWrapPatienceByte(knnByteVectorQuery, Math.max(7, (int) (knnByteVectorQuery.getK() * 0.3)));\n+ } else if (knnQuery instanceof KnnFloatVectorQuery knnFloatVectorQuery) {\n+ finalQuery = maybeWrapPatienceFloat(knnFloatVectorQuery, Math.max(7, (int) (knnFloatVectorQuery.getK() * 0.3)));\n+ }\n+ return finalQuery;\n+ }\n+\n+ private Query maybeWrapPatienceByte(KnnByteVectorQuery knnQuery, int patience) {\n+ Query returnedQuery = knnQuery;\n+ if (indexOptions instanceof HnswIndexOptions) {\n+ returnedQuery = PatienceKnnVectorQuery.fromByteQuery(knnQuery, 0.995, patience);\n+ } else if (indexOptions instanceof Int8HnswIndexOptions) {\n+ returnedQuery = PatienceKnnVectorQuery.fromByteQuery(knnQuery, 0.995, patience);\n+ } else if (indexOptions instanceof Int4HnswIndexOptions) {\n+ returnedQuery = PatienceKnnVectorQuery.fromByteQuery(knnQuery, 0.995, patience);\n+ } else if (indexOptions instanceof BBQHnswIndexOptions) {\n+ returnedQuery = PatienceKnnVectorQuery.fromByteQuery(knnQuery, 0.995, patience);\n+ }\n+ return returnedQuery;\n+ }\n+\n+ private Query maybeWrapPatienceFloat(KnnFloatVectorQuery knnQuery, int patience) {\n+ Query returnedQuery = knnQuery;\n+ if (indexOptions instanceof HnswIndexOptions hnswIndexOptions) {\n+ returnedQuery = PatienceKnnVectorQuery.fromFloatQuery(knnQuery, 0.995, patience);\n+ } else if (indexOptions instanceof Int8HnswIndexOptions hnswIndexOptions) {\n+ returnedQuery = PatienceKnnVectorQuery.fromFloatQuery(knnQuery, 0.995, patience);\n+ } else if (indexOptions instanceof Int4HnswIndexOptions hnswIndexOptions) {\n+ returnedQuery = PatienceKnnVectorQuery.fromFloatQuery(knnQuery, 0.995, patience);\n+ } else if (indexOptions instanceof BBQHnswIndexOptions hnswIndexOptions) {\n+ returnedQuery = PatienceKnnVectorQuery.fromFloatQuery(knnQuery, 0.995, patience);\n+ }\n+ return returnedQuery;", + "comment_created_at": "2025-06-30T12:59:04+00:00", + "comment_author": "benwtrent", + "comment_body": "Why do we have this big predicate branch? \r\n\r\nAlso, it seems that this `0.995` is just the default in Lucene. \r\n\r\nWhy can't we simplify all this and return `PatienceKnnVectorQuery.fromFloatQuery(knnQuery)` ?\r\n\r\nSimilar question to the byte query.", + "pr_file_module": null + }, + { + "comment_id": "2175126375", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 127223, + "pr_file": "server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java", + "discussion_id": "2175016623", + "commented_code": "@@ -2526,6 +2561,44 @@ private Query createKnnByteQuery(\n return knnQuery;\n }\n \n+ private Query maybeWrapPatience(Query knnQuery) {\n+ Query finalQuery = knnQuery;\n+ if (knnQuery instanceof KnnByteVectorQuery knnByteVectorQuery) {\n+ finalQuery = maybeWrapPatienceByte(knnByteVectorQuery, Math.max(7, (int) (knnByteVectorQuery.getK() * 0.3)));\n+ } else if (knnQuery instanceof KnnFloatVectorQuery knnFloatVectorQuery) {\n+ finalQuery = maybeWrapPatienceFloat(knnFloatVectorQuery, Math.max(7, (int) (knnFloatVectorQuery.getK() * 0.3)));\n+ }\n+ return finalQuery;\n+ }\n+\n+ private Query maybeWrapPatienceByte(KnnByteVectorQuery knnQuery, int patience) {\n+ Query returnedQuery = knnQuery;\n+ if (indexOptions instanceof HnswIndexOptions) {\n+ returnedQuery = PatienceKnnVectorQuery.fromByteQuery(knnQuery, 0.995, patience);\n+ } else if (indexOptions instanceof Int8HnswIndexOptions) {\n+ returnedQuery = PatienceKnnVectorQuery.fromByteQuery(knnQuery, 0.995, patience);\n+ } else if (indexOptions instanceof Int4HnswIndexOptions) {\n+ returnedQuery = PatienceKnnVectorQuery.fromByteQuery(knnQuery, 0.995, patience);\n+ } else if (indexOptions instanceof BBQHnswIndexOptions) {\n+ returnedQuery = PatienceKnnVectorQuery.fromByteQuery(knnQuery, 0.995, patience);\n+ }\n+ return returnedQuery;\n+ }\n+\n+ private Query maybeWrapPatienceFloat(KnnFloatVectorQuery knnQuery, int patience) {\n+ Query returnedQuery = knnQuery;\n+ if (indexOptions instanceof HnswIndexOptions hnswIndexOptions) {\n+ returnedQuery = PatienceKnnVectorQuery.fromFloatQuery(knnQuery, 0.995, patience);\n+ } else if (indexOptions instanceof Int8HnswIndexOptions hnswIndexOptions) {\n+ returnedQuery = PatienceKnnVectorQuery.fromFloatQuery(knnQuery, 0.995, patience);\n+ } else if (indexOptions instanceof Int4HnswIndexOptions hnswIndexOptions) {\n+ returnedQuery = PatienceKnnVectorQuery.fromFloatQuery(knnQuery, 0.995, patience);\n+ } else if (indexOptions instanceof BBQHnswIndexOptions hnswIndexOptions) {\n+ returnedQuery = PatienceKnnVectorQuery.fromFloatQuery(knnQuery, 0.995, patience);\n+ }\n+ return returnedQuery;", + "comment_created_at": "2025-06-30T13:47:17+00:00", + "comment_author": "tteofili", + "comment_body": "++ on the defaults.\r\nre the branches, it doesn't make sense to wrap the knn query with non-HNSW index types because, currently, it only works with them.", + "pr_file_module": null + }, + { + "comment_id": "2175182641", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 127223, + "pr_file": "server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java", + "discussion_id": "2175016623", + "commented_code": "@@ -2526,6 +2561,44 @@ private Query createKnnByteQuery(\n return knnQuery;\n }\n \n+ private Query maybeWrapPatience(Query knnQuery) {\n+ Query finalQuery = knnQuery;\n+ if (knnQuery instanceof KnnByteVectorQuery knnByteVectorQuery) {\n+ finalQuery = maybeWrapPatienceByte(knnByteVectorQuery, Math.max(7, (int) (knnByteVectorQuery.getK() * 0.3)));\n+ } else if (knnQuery instanceof KnnFloatVectorQuery knnFloatVectorQuery) {\n+ finalQuery = maybeWrapPatienceFloat(knnFloatVectorQuery, Math.max(7, (int) (knnFloatVectorQuery.getK() * 0.3)));\n+ }\n+ return finalQuery;\n+ }\n+\n+ private Query maybeWrapPatienceByte(KnnByteVectorQuery knnQuery, int patience) {\n+ Query returnedQuery = knnQuery;\n+ if (indexOptions instanceof HnswIndexOptions) {\n+ returnedQuery = PatienceKnnVectorQuery.fromByteQuery(knnQuery, 0.995, patience);\n+ } else if (indexOptions instanceof Int8HnswIndexOptions) {\n+ returnedQuery = PatienceKnnVectorQuery.fromByteQuery(knnQuery, 0.995, patience);\n+ } else if (indexOptions instanceof Int4HnswIndexOptions) {\n+ returnedQuery = PatienceKnnVectorQuery.fromByteQuery(knnQuery, 0.995, patience);\n+ } else if (indexOptions instanceof BBQHnswIndexOptions) {\n+ returnedQuery = PatienceKnnVectorQuery.fromByteQuery(knnQuery, 0.995, patience);\n+ }\n+ return returnedQuery;\n+ }\n+\n+ private Query maybeWrapPatienceFloat(KnnFloatVectorQuery knnQuery, int patience) {\n+ Query returnedQuery = knnQuery;\n+ if (indexOptions instanceof HnswIndexOptions hnswIndexOptions) {\n+ returnedQuery = PatienceKnnVectorQuery.fromFloatQuery(knnQuery, 0.995, patience);\n+ } else if (indexOptions instanceof Int8HnswIndexOptions hnswIndexOptions) {\n+ returnedQuery = PatienceKnnVectorQuery.fromFloatQuery(knnQuery, 0.995, patience);\n+ } else if (indexOptions instanceof Int4HnswIndexOptions hnswIndexOptions) {\n+ returnedQuery = PatienceKnnVectorQuery.fromFloatQuery(knnQuery, 0.995, patience);\n+ } else if (indexOptions instanceof BBQHnswIndexOptions hnswIndexOptions) {\n+ returnedQuery = PatienceKnnVectorQuery.fromFloatQuery(knnQuery, 0.995, patience);\n+ }\n+ return returnedQuery;", + "comment_created_at": "2025-06-30T14:13:20+00:00", + "comment_author": "benwtrent", + "comment_body": "@tteofili OK, I understand that, I think maybe we can then collapse into predicate then :)", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2170926461", + "pr_number": 130160, + "pr_file": "x-pack/plugin/downsample/src/main/java/org/elasticsearch/xpack/downsample/DownsampleShardPersistentTaskExecutor.java", + "created_at": "2025-06-27T06:44:02+00:00", + "commented_code": ".orElse(NO_NODE_FOUND);\n }\n\n /**\n * An eligible node to run the downsampling task for a shard is a node that holds\n * a searchable version of this shard.\n * In stateless deployment we choose only nodes that hold search shards.\n * Otherwise, we choose the node that holds the primary shard.\n * Visible for testing.\n * @param indexShardRouting the routing of the shard to be downsampled\n * @return the set of candidate nodes downsampling can run on.\n */\n Set getEligibleNodes(IndexShardRoutingTable indexShardRouting) {", + "repo_full_name": "elastic/elasticsearch", + "discussion_comments": [ + { + "comment_id": "2170926461", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 130160, + "pr_file": "x-pack/plugin/downsample/src/main/java/org/elasticsearch/xpack/downsample/DownsampleShardPersistentTaskExecutor.java", + "discussion_id": "2170926461", + "commented_code": "@@ -159,6 +164,29 @@ public PersistentTasksCustomMetadata.Assignment getAssignment(\n .orElse(NO_NODE_FOUND);\n }\n \n+ /**\n+ * An eligible node to run the downsampling task for a shard is a node that holds\n+ * a searchable version of this shard.\n+ * In stateless deployment we choose only nodes that hold search shards.\n+ * Otherwise, we choose the node that holds the primary shard.\n+ * Visible for testing.\n+ * @param indexShardRouting the routing of the shard to be downsampled\n+ * @return the set of candidate nodes downsampling can run on.\n+ */\n+ Set getEligibleNodes(IndexShardRoutingTable indexShardRouting) {", + "comment_created_at": "2025-06-27T06:44:02+00:00", + "comment_author": "martijnvg", + "comment_body": "Maybe rewrite this method like this:\r\n\r\n```\r\nPredicate getEligibleNodes(IndexShardRoutingTable indexShardRouting) {\r\n if (isStateless) {\r\n return candidateNode -> {\r\n for (var shardRouting : indexShardRouting.replicaShards()) {\r\n if (shardRouting.started()) {\r\n return shardRouting.currentNodeId().equals(candidateNode.getId());\r\n }\r\n }\r\n return false;\r\n };\r\n } else if (indexShardRouting.primaryShard().started()) {\r\n return candidateNode -> indexShardRouting.primaryShard().currentNodeId().equals(candidateNode.getId());\r\n } else {\r\n return null;\r\n }\r\n }\r\n```\r\n\r\nThat way the logic is better contained and no intermediate set is created?", + "pr_file_module": null + }, + { + "comment_id": "2171212580", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 130160, + "pr_file": "x-pack/plugin/downsample/src/main/java/org/elasticsearch/xpack/downsample/DownsampleShardPersistentTaskExecutor.java", + "discussion_id": "2170926461", + "commented_code": "@@ -159,6 +164,29 @@ public PersistentTasksCustomMetadata.Assignment getAssignment(\n .orElse(NO_NODE_FOUND);\n }\n \n+ /**\n+ * An eligible node to run the downsampling task for a shard is a node that holds\n+ * a searchable version of this shard.\n+ * In stateless deployment we choose only nodes that hold search shards.\n+ * Otherwise, we choose the node that holds the primary shard.\n+ * Visible for testing.\n+ * @param indexShardRouting the routing of the shard to be downsampled\n+ * @return the set of candidate nodes downsampling can run on.\n+ */\n+ Set getEligibleNodes(IndexShardRoutingTable indexShardRouting) {", + "comment_created_at": "2025-06-27T08:29:16+00:00", + "comment_author": "gmarouli", + "comment_body": "I have been thinking about this, and I think both my current implementation and this can be a bit inefficient in big clusters because we iterate over all the candidate nodes that could be a lot. \r\n\r\nThinking about this some more, I am considering iterating over the eligible nodes which are probably way less than the total candidates and check if they are a candidate node.\r\n\r\nWhat do you think?\r\n\r\nAbout the code you shared, it does read nicely, but I am concerned that for every candidate node we are going to be building a new iterator over the replica shards ending up with more object churn than with the previous solution. Right?\r\n\r\n", + "pr_file_module": null + }, + { + "comment_id": "2171650282", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 130160, + "pr_file": "x-pack/plugin/downsample/src/main/java/org/elasticsearch/xpack/downsample/DownsampleShardPersistentTaskExecutor.java", + "discussion_id": "2170926461", + "commented_code": "@@ -159,6 +164,29 @@ public PersistentTasksCustomMetadata.Assignment getAssignment(\n .orElse(NO_NODE_FOUND);\n }\n \n+ /**\n+ * An eligible node to run the downsampling task for a shard is a node that holds\n+ * a searchable version of this shard.\n+ * In stateless deployment we choose only nodes that hold search shards.\n+ * Otherwise, we choose the node that holds the primary shard.\n+ * Visible for testing.\n+ * @param indexShardRouting the routing of the shard to be downsampled\n+ * @return the set of candidate nodes downsampling can run on.\n+ */\n+ Set getEligibleNodes(IndexShardRoutingTable indexShardRouting) {", + "comment_created_at": "2025-06-27T11:07:12+00:00", + "comment_author": "gmarouli", + "comment_body": "@martijnvg I refactored it, I think it looks a bit cleaner now. Let me know what you think.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1977324330", + "pr_number": 123322, + "pr_file": "x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/QueryProgressFragmentOptimizer.java", + "created_at": "2025-03-03T11:09:38+00:00", + "commented_code": "/*\n * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one\n * or more contributor license agreements. Licensed under the Elastic License\n * 2.0; you may not use this file except in compliance with the Elastic License\n * 2.0.\n */\n\npackage org.elasticsearch.xpack.esql.optimizer;\n\nimport org.elasticsearch.core.Nullable;\nimport org.elasticsearch.xpack.esql.core.expression.FoldContext;\nimport org.elasticsearch.xpack.esql.core.expression.Literal;\nimport org.elasticsearch.xpack.esql.plan.logical.Limit;\nimport org.elasticsearch.xpack.esql.plan.physical.FragmentExec;\nimport org.elasticsearch.xpack.esql.plan.physical.PhysicalPlan;\nimport org.elasticsearch.xpack.esql.planner.mapper.Mapper;\n\nimport java.util.function.IntSupplier;\n\n/**\n * Attempts to rewrite the fragment enclosed in a data-node plan based on the progress of the query. For example:\n * `FROM x | LIMIT 100` can be safely rewritten to `FROM x | LIMIT 40` if 60 rows have already been collected.\n * TODO: Rewrite TopN to filters\n */\npublic final class QueryProgressFragmentOptimizer {\n private final PhysicalVerifier verifier = PhysicalVerifier.INSTANCE;\n private final IntSupplier alreadyCollectedLimit;\n\n /**\n * @param alreadyCollectedLimit the supplier to get the number of rows already collected\n */\n public QueryProgressFragmentOptimizer(@Nullable IntSupplier alreadyCollectedLimit) {\n this.alreadyCollectedLimit = alreadyCollectedLimit;\n }\n\n /**\n * Attempts to optimize the fragment enclosed in a data-node plan based on the progress of the query.\n * @param plan the input plan\n * @return the optimized plan. If this returns null, the query can be early terminated.\n */\n public PhysicalPlan optimizeFragment(PhysicalPlan plan) {\n if (alreadyCollectedLimit == null) {\n return plan;\n }\n final var fragments = plan.collectFirstChildren(p -> p instanceof FragmentExec);\n if (fragments.size() != 1) {\n return plan;\n }\n final FragmentExec fragment = (FragmentExec) fragments.getFirst();\n final var pipelineBreakers = fragment.fragment().collectFirstChildren(Mapper::isPipelineBreaker);\n if (pipelineBreakers.isEmpty()) {\n return plan;\n }\n // Rewrite LIMIT\n if (pipelineBreakers.getFirst() instanceof Limit firstLimit) {", + "repo_full_name": "elastic/elasticsearch", + "discussion_comments": [ + { + "comment_id": "1977324330", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 123322, + "pr_file": "x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/QueryProgressFragmentOptimizer.java", + "discussion_id": "1977324330", + "commented_code": "@@ -0,0 +1,84 @@\n+/*\n+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one\n+ * or more contributor license agreements. Licensed under the Elastic License\n+ * 2.0; you may not use this file except in compliance with the Elastic License\n+ * 2.0.\n+ */\n+\n+package org.elasticsearch.xpack.esql.optimizer;\n+\n+import org.elasticsearch.core.Nullable;\n+import org.elasticsearch.xpack.esql.core.expression.FoldContext;\n+import org.elasticsearch.xpack.esql.core.expression.Literal;\n+import org.elasticsearch.xpack.esql.plan.logical.Limit;\n+import org.elasticsearch.xpack.esql.plan.physical.FragmentExec;\n+import org.elasticsearch.xpack.esql.plan.physical.PhysicalPlan;\n+import org.elasticsearch.xpack.esql.planner.mapper.Mapper;\n+\n+import java.util.function.IntSupplier;\n+\n+/**\n+ * Attempts to rewrite the fragment enclosed in a data-node plan based on the progress of the query. For example:\n+ * `FROM x | LIMIT 100` can be safely rewritten to `FROM x | LIMIT 40` if 60 rows have already been collected.\n+ * TODO: Rewrite TopN to filters\n+ */\n+public final class QueryProgressFragmentOptimizer {\n+ private final PhysicalVerifier verifier = PhysicalVerifier.INSTANCE;\n+ private final IntSupplier alreadyCollectedLimit;\n+\n+ /**\n+ * @param alreadyCollectedLimit the supplier to get the number of rows already collected\n+ */\n+ public QueryProgressFragmentOptimizer(@Nullable IntSupplier alreadyCollectedLimit) {\n+ this.alreadyCollectedLimit = alreadyCollectedLimit;\n+ }\n+\n+ /**\n+ * Attempts to optimize the fragment enclosed in a data-node plan based on the progress of the query.\n+ * @param plan the input plan\n+ * @return the optimized plan. If this returns null, the query can be early terminated.\n+ */\n+ public PhysicalPlan optimizeFragment(PhysicalPlan plan) {\n+ if (alreadyCollectedLimit == null) {\n+ return plan;\n+ }\n+ final var fragments = plan.collectFirstChildren(p -> p instanceof FragmentExec);\n+ if (fragments.size() != 1) {\n+ return plan;\n+ }\n+ final FragmentExec fragment = (FragmentExec) fragments.getFirst();\n+ final var pipelineBreakers = fragment.fragment().collectFirstChildren(Mapper::isPipelineBreaker);\n+ if (pipelineBreakers.isEmpty()) {\n+ return plan;\n+ }\n+ // Rewrite LIMIT\n+ if (pipelineBreakers.getFirst() instanceof Limit firstLimit) {", + "comment_created_at": "2025-03-03T11:09:38+00:00", + "comment_author": "alex-spies", + "comment_body": "The approach here assumes that there are no query plan nodes after the `LIMIT` which may rely on the number of rows that they see. I believe this is correct now (as there just shouldn't be anything after the `LIMIT`), but we cannot guarantee that this will always be the case, esp. as different teams add new commands to ES|QL.\r\n\r\nCouldn't we limit the scope of this optimizer rule to the case where the _last_ plan node in the fragment is a `LIMIT`? That's more easily provable to be correct and will break less likely when we add new query plan nodes.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1977694495", + "pr_number": 123322, + "pr_file": "x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/QueryProgressFragmentOptimizer.java", + "created_at": "2025-03-03T15:14:00+00:00", + "commented_code": "/*\n * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one\n * or more contributor license agreements. Licensed under the Elastic License\n * 2.0; you may not use this file except in compliance with the Elastic License\n * 2.0.\n */\n\npackage org.elasticsearch.xpack.esql.optimizer;\n\nimport org.elasticsearch.core.Nullable;\nimport org.elasticsearch.xpack.esql.core.expression.FoldContext;\nimport org.elasticsearch.xpack.esql.core.expression.Literal;\nimport org.elasticsearch.xpack.esql.plan.logical.Limit;\nimport org.elasticsearch.xpack.esql.plan.physical.FragmentExec;\nimport org.elasticsearch.xpack.esql.plan.physical.PhysicalPlan;\nimport org.elasticsearch.xpack.esql.planner.mapper.Mapper;\n\nimport java.util.function.IntSupplier;\n\n/**\n * Attempts to rewrite the fragment enclosed in a data-node plan based on the progress of the query. For example:\n * `FROM x | LIMIT 100` can be safely rewritten to `FROM x | LIMIT 40` if 60 rows have already been collected.\n * TODO: Rewrite TopN to filters\n */\npublic final class QueryProgressFragmentOptimizer {", + "repo_full_name": "elastic/elasticsearch", + "discussion_comments": [ + { + "comment_id": "1977694495", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 123322, + "pr_file": "x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/QueryProgressFragmentOptimizer.java", + "discussion_id": "1977694495", + "commented_code": "@@ -0,0 +1,84 @@\n+/*\n+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one\n+ * or more contributor license agreements. Licensed under the Elastic License\n+ * 2.0; you may not use this file except in compliance with the Elastic License\n+ * 2.0.\n+ */\n+\n+package org.elasticsearch.xpack.esql.optimizer;\n+\n+import org.elasticsearch.core.Nullable;\n+import org.elasticsearch.xpack.esql.core.expression.FoldContext;\n+import org.elasticsearch.xpack.esql.core.expression.Literal;\n+import org.elasticsearch.xpack.esql.plan.logical.Limit;\n+import org.elasticsearch.xpack.esql.plan.physical.FragmentExec;\n+import org.elasticsearch.xpack.esql.plan.physical.PhysicalPlan;\n+import org.elasticsearch.xpack.esql.planner.mapper.Mapper;\n+\n+import java.util.function.IntSupplier;\n+\n+/**\n+ * Attempts to rewrite the fragment enclosed in a data-node plan based on the progress of the query. For example:\n+ * `FROM x | LIMIT 100` can be safely rewritten to `FROM x | LIMIT 40` if 60 rows have already been collected.\n+ * TODO: Rewrite TopN to filters\n+ */\n+public final class QueryProgressFragmentOptimizer {", + "comment_created_at": "2025-03-03T15:14:00+00:00", + "comment_author": "astefan", + "comment_body": "I would look into integrating this step into existing optimization infrastructure. Even though this is a data-node level optimization that depends on how many rows have been returned, in essence I believe it should be a **logical plan level optimization**. There is a recent change in our code base that duplicates a `limit` in other parts of the query in certain situations which means the original plan can have a `limit 50` somewhere in the middle and now, instead of a `limit 50` somewhere at the end of the query, it has a `limit 10` or similar. Probably it doesn't affect the overall execution, but we don't know how this code evolves in the future.\r\n\r\nInitially, I thought that this could be put somewhere in `ComputeService:379` in the physical plan optimizer, meaning here\r\n```\r\nplan = PlannerUtils.localPlan(context.searchExecutionContexts(), context.configuration(), context.foldCtx(), plan);\r\n```\r\n\r\nAnd this method could receive the new value for the limit. That value should then be passed to the **physical plan optimizer**. **Maybe this _could_ be a better start for this feature.**\r\n\r\nI think this is a really great idea for optimizing simple queries to be faster, but a less fragile implementation should integrate this step in the usual optimization processes, since this _could_ interfere with other optimization steps. Having it there instead can also be a signal for future development that this optimization exists. The code in the ES|QL distributed code is quite separated from the one in the optimization plan code.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2100298832", + "pr_number": 128135, + "pr_file": "x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/topn/TopNOperator.java", + "created_at": "2025-05-21T13:26:36+00:00", + "commented_code": "private final BlockFactory blockFactory;\n private final CircuitBreaker breaker;\n private final Queue inputQueue;\n private final Map inputQueues;", + "repo_full_name": "elastic/elasticsearch", + "discussion_comments": [ + { + "comment_id": "2100298832", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 128135, + "pr_file": "x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/topn/TopNOperator.java", + "discussion_id": "2100298832", + "commented_code": "@@ -264,12 +282,14 @@ public String describe() {\n \n private final BlockFactory blockFactory;\n private final CircuitBreaker breaker;\n- private final Queue inputQueue;\n+ private final Map inputQueues;", + "comment_created_at": "2025-05-21T13:26:36+00:00", + "comment_author": "nik9000", + "comment_body": "I'm a bit worried about memory tracking for the keys.\r\n\r\nGenerally we use `ObjectArray` to keep things like the `Queue`s and we use `BytesRefHash` to assign the ids.\r\n\r\nIf we did that it'd read very very very similarly to an aggregation. In aggs-land we separate the hashing implementation from the \"collecting\" one. I wonder if we'd be better off using the aggs infrastructure in this case. No idea how we migrate from one to the other, but we have a `TopBytesRefAggregator`. And we could try to use that *somehow*. If we did that we'd get nice handling for the hashing for free. We're quite good at those hash keys.", + "pr_file_module": null + }, + { + "comment_id": "2102502245", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 128135, + "pr_file": "x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/topn/TopNOperator.java", + "discussion_id": "2100298832", + "commented_code": "@@ -264,12 +282,14 @@ public String describe() {\n \n private final BlockFactory blockFactory;\n private final CircuitBreaker breaker;\n- private final Queue inputQueue;\n+ private final Map inputQueues;", + "comment_created_at": "2025-05-22T12:59:38+00:00", + "comment_author": "przemekwitek", + "comment_body": "> Generally we use ObjectArray to keep things like the Queues and we use BytesRefHash to assign the ids.\r\n\r\nOk, that's something I'll take a look into.\r\n\r\n> And we could try to use that somehow.\r\n\r\nDo you think it would be beneficial to do it in this PR? To me it sounds like quite a complex refactoring.", + "pr_file_module": null + }, + { + "comment_id": "2102526209", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 128135, + "pr_file": "x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/topn/TopNOperator.java", + "discussion_id": "2100298832", + "commented_code": "@@ -264,12 +282,14 @@ public String describe() {\n \n private final BlockFactory blockFactory;\n private final CircuitBreaker breaker;\n- private final Queue inputQueue;\n+ private final Map inputQueues;", + "comment_created_at": "2025-05-22T13:11:25+00:00", + "comment_author": "nik9000", + "comment_body": "It is. It has the advantage of applying all of the grouping code we worked hard on for aggs so you don't have to hand build something on your side. Maybe you'd be better off just using the `BlockHash.build` infrastructure here. It'd be quite compatible with the ObjectArray stuff. I'm just worried that if you did it you'd find yourself building most of the `HashAggregatorOperator`.\r\n\r\nI can have a little look around this morning though.", + "pr_file_module": null + }, + { + "comment_id": "2102580378", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 128135, + "pr_file": "x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/topn/TopNOperator.java", + "discussion_id": "2100298832", + "commented_code": "@@ -264,12 +282,14 @@ public String describe() {\n \n private final BlockFactory blockFactory;\n private final CircuitBreaker breaker;\n- private final Queue inputQueue;\n+ private final Map inputQueues;", + "comment_created_at": "2025-05-22T13:34:28+00:00", + "comment_author": "nik9000", + "comment_body": "I had a look and think it's probably not worth trying to migrate the whole thing to an agg. Aggs encode their intermediate representation as a single dense column. TopN builds the result columns. They aren't incompatible, but it'd be tricky to migrate.\r\n\r\nI suppose what I suggest instead is to use `BlockHash` to turn the grouping keys into an integer and then `ObjectArray` to contain the heaps. In that case `BlockHash` is going to do a good job managing memory, breaking if it uses too much, and cleaning up. It also has support for non-string keys and complex sets of keys. And will absorb any optimizations we make in the future.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2099445576", + "pr_number": 127260, + "pr_file": "server/src/main/java/org/elasticsearch/lucene/queries/TimestampTwoPhaseIterator.java", + "created_at": "2025-05-21T06:17:28+00:00", + "commented_code": "/*\n * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one\n * or more contributor license agreements. Licensed under the \"Elastic License\n * 2.0\", the \"GNU Affero General Public License v3.0 only\", and the \"Server Side\n * Public License v 1\"; you may not use this file except in compliance with, at\n * your election, the \"Elastic License 2.0\", the \"GNU Affero General Public\n * License v3.0 only\", or the \"Server Side Public License, v 1\".\n */\npackage org.elasticsearch.lucene.queries;\n\nimport org.apache.lucene.index.DocValuesSkipper;\nimport org.apache.lucene.index.NumericDocValues;\nimport org.apache.lucene.search.DocIdSetIterator;\nimport org.apache.lucene.search.TwoPhaseIterator;\n\nimport java.io.IOException;\n\n/**\n * Based on {@link org.apache.lucene.search.DocValuesRangeIterator} but modified for time series and logsdb use cases.\n *

\n * The @timestamp field always has exactly one value and all documents always have a @timestamp value.\n * Additionally, the @timestamp field is always the secondary index sort field and sort order is always descending.\n *

\n * This makes the doc value skipper on @timestamp field less effective, and so the doc value skipper for the first index sort field is\n * also used to skip to the next primary sort value if for the current skipper represents one value (min and max value are the same) and\n * the current value is lower than minTimestamp.\n */\nfinal class TimestampTwoPhaseIterator extends TwoPhaseIterator {\n\n private final RangeNoGapsApproximation approximation;\n private final NumericDocValues timestamps;\n\n private final long minTimestamp;\n private final long maxTimestamp;\n\n TimestampTwoPhaseIterator(\n NumericDocValues timestamps,\n DocValuesSkipper timestampSkipper,\n DocValuesSkipper primaryFieldSkipper,\n long minTimestamp,\n long maxTimestamp\n ) {\n super(new RangeNoGapsApproximation(timestamps, timestampSkipper, primaryFieldSkipper, minTimestamp, maxTimestamp));\n this.approximation = (RangeNoGapsApproximation) approximation();\n this.timestamps = timestamps;\n this.minTimestamp = minTimestamp;\n this.maxTimestamp = maxTimestamp;\n }\n\n static final class RangeNoGapsApproximation extends DocIdSetIterator {\n\n private final DocIdSetIterator innerApproximation;\n\n final DocValuesSkipper timestampSkipper;\n final DocValuesSkipper primaryFieldSkipper;\n final long minTimestamp;\n final long maxTimestamp;\n\n private int doc = -1;\n\n // Track a decision for all doc IDs between the current doc ID and upTo inclusive.\n Match match = Match.MAYBE;\n int upTo = -1;\n int primaryFieldUpTo = -1;\n\n RangeNoGapsApproximation(\n DocIdSetIterator innerApproximation,\n DocValuesSkipper timestampSkipper,\n DocValuesSkipper primaryFieldSkipper,\n long minTimestamp,\n long maxTimestamp\n ) {\n this.innerApproximation = innerApproximation;\n this.timestampSkipper = timestampSkipper;\n this.primaryFieldSkipper = primaryFieldSkipper;\n this.minTimestamp = minTimestamp;\n this.maxTimestamp = maxTimestamp;\n }\n\n @Override\n public int docID() {\n return doc;\n }\n\n @Override\n public int nextDoc() throws IOException {\n return advance(docID() + 1);\n }\n\n @Override\n public int advance(int target) throws IOException {\n while (true) {\n if (target > upTo) {\n timestampSkipper.advance(target);\n upTo = timestampSkipper.maxDocID(0);\n if (upTo == DocIdSetIterator.NO_MORE_DOCS) {\n return doc = DocIdSetIterator.NO_MORE_DOCS;\n }\n match = match(0);\n\n // If we have a YES or NO decision, see if we still have the same decision on a higher\n // level (= on a wider range of doc IDs)\n int nextLevel = 1;\n while (match != Match.MAYBE && nextLevel < timestampSkipper.numLevels() && match == match(nextLevel)) {\n upTo = timestampSkipper.maxDocID(nextLevel);\n nextLevel++;\n }\n }\n switch (match) {\n case YES:\n return doc = target;\n case MAYBE:\n if (target > innerApproximation.docID()) {\n target = innerApproximation.advance(target);", + "repo_full_name": "elastic/elasticsearch", + "discussion_comments": [ + { + "comment_id": "2099445576", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 127260, + "pr_file": "server/src/main/java/org/elasticsearch/lucene/queries/TimestampTwoPhaseIterator.java", + "discussion_id": "2099445576", + "commented_code": "@@ -0,0 +1,207 @@\n+/*\n+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one\n+ * or more contributor license agreements. Licensed under the \"Elastic License\n+ * 2.0\", the \"GNU Affero General Public License v3.0 only\", and the \"Server Side\n+ * Public License v 1\"; you may not use this file except in compliance with, at\n+ * your election, the \"Elastic License 2.0\", the \"GNU Affero General Public\n+ * License v3.0 only\", or the \"Server Side Public License, v 1\".\n+ */\n+package org.elasticsearch.lucene.queries;\n+\n+import org.apache.lucene.index.DocValuesSkipper;\n+import org.apache.lucene.index.NumericDocValues;\n+import org.apache.lucene.search.DocIdSetIterator;\n+import org.apache.lucene.search.TwoPhaseIterator;\n+\n+import java.io.IOException;\n+\n+/**\n+ * Based on {@link org.apache.lucene.search.DocValuesRangeIterator} but modified for time series and logsdb use cases.\n+ *

\n+ * The @timestamp field always has exactly one value and all documents always have a @timestamp value.\n+ * Additionally, the @timestamp field is always the secondary index sort field and sort order is always descending.\n+ *

\n+ * This makes the doc value skipper on @timestamp field less effective, and so the doc value skipper for the first index sort field is\n+ * also used to skip to the next primary sort value if for the current skipper represents one value (min and max value are the same) and\n+ * the current value is lower than minTimestamp.\n+ */\n+final class TimestampTwoPhaseIterator extends TwoPhaseIterator {\n+\n+ private final RangeNoGapsApproximation approximation;\n+ private final NumericDocValues timestamps;\n+\n+ private final long minTimestamp;\n+ private final long maxTimestamp;\n+\n+ TimestampTwoPhaseIterator(\n+ NumericDocValues timestamps,\n+ DocValuesSkipper timestampSkipper,\n+ DocValuesSkipper primaryFieldSkipper,\n+ long minTimestamp,\n+ long maxTimestamp\n+ ) {\n+ super(new RangeNoGapsApproximation(timestamps, timestampSkipper, primaryFieldSkipper, minTimestamp, maxTimestamp));\n+ this.approximation = (RangeNoGapsApproximation) approximation();\n+ this.timestamps = timestamps;\n+ this.minTimestamp = minTimestamp;\n+ this.maxTimestamp = maxTimestamp;\n+ }\n+\n+ static final class RangeNoGapsApproximation extends DocIdSetIterator {\n+\n+ private final DocIdSetIterator innerApproximation;\n+\n+ final DocValuesSkipper timestampSkipper;\n+ final DocValuesSkipper primaryFieldSkipper;\n+ final long minTimestamp;\n+ final long maxTimestamp;\n+\n+ private int doc = -1;\n+\n+ // Track a decision for all doc IDs between the current doc ID and upTo inclusive.\n+ Match match = Match.MAYBE;\n+ int upTo = -1;\n+ int primaryFieldUpTo = -1;\n+\n+ RangeNoGapsApproximation(\n+ DocIdSetIterator innerApproximation,\n+ DocValuesSkipper timestampSkipper,\n+ DocValuesSkipper primaryFieldSkipper,\n+ long minTimestamp,\n+ long maxTimestamp\n+ ) {\n+ this.innerApproximation = innerApproximation;\n+ this.timestampSkipper = timestampSkipper;\n+ this.primaryFieldSkipper = primaryFieldSkipper;\n+ this.minTimestamp = minTimestamp;\n+ this.maxTimestamp = maxTimestamp;\n+ }\n+\n+ @Override\n+ public int docID() {\n+ return doc;\n+ }\n+\n+ @Override\n+ public int nextDoc() throws IOException {\n+ return advance(docID() + 1);\n+ }\n+\n+ @Override\n+ public int advance(int target) throws IOException {\n+ while (true) {\n+ if (target > upTo) {\n+ timestampSkipper.advance(target);\n+ upTo = timestampSkipper.maxDocID(0);\n+ if (upTo == DocIdSetIterator.NO_MORE_DOCS) {\n+ return doc = DocIdSetIterator.NO_MORE_DOCS;\n+ }\n+ match = match(0);\n+\n+ // If we have a YES or NO decision, see if we still have the same decision on a higher\n+ // level (= on a wider range of doc IDs)\n+ int nextLevel = 1;\n+ while (match != Match.MAYBE && nextLevel < timestampSkipper.numLevels() && match == match(nextLevel)) {\n+ upTo = timestampSkipper.maxDocID(nextLevel);\n+ nextLevel++;\n+ }\n+ }\n+ switch (match) {\n+ case YES:\n+ return doc = target;\n+ case MAYBE:\n+ if (target > innerApproximation.docID()) {\n+ target = innerApproximation.advance(target);", + "comment_created_at": "2025-05-21T06:17:28+00:00", + "comment_author": "iverase", + "comment_body": "I would expect to advance here the timestamps iterator instead of the innerApproximation. ", + "pr_file_module": null + }, + { + "comment_id": "2099525131", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 127260, + "pr_file": "server/src/main/java/org/elasticsearch/lucene/queries/TimestampTwoPhaseIterator.java", + "discussion_id": "2099445576", + "commented_code": "@@ -0,0 +1,207 @@\n+/*\n+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one\n+ * or more contributor license agreements. Licensed under the \"Elastic License\n+ * 2.0\", the \"GNU Affero General Public License v3.0 only\", and the \"Server Side\n+ * Public License v 1\"; you may not use this file except in compliance with, at\n+ * your election, the \"Elastic License 2.0\", the \"GNU Affero General Public\n+ * License v3.0 only\", or the \"Server Side Public License, v 1\".\n+ */\n+package org.elasticsearch.lucene.queries;\n+\n+import org.apache.lucene.index.DocValuesSkipper;\n+import org.apache.lucene.index.NumericDocValues;\n+import org.apache.lucene.search.DocIdSetIterator;\n+import org.apache.lucene.search.TwoPhaseIterator;\n+\n+import java.io.IOException;\n+\n+/**\n+ * Based on {@link org.apache.lucene.search.DocValuesRangeIterator} but modified for time series and logsdb use cases.\n+ *

\n+ * The @timestamp field always has exactly one value and all documents always have a @timestamp value.\n+ * Additionally, the @timestamp field is always the secondary index sort field and sort order is always descending.\n+ *

\n+ * This makes the doc value skipper on @timestamp field less effective, and so the doc value skipper for the first index sort field is\n+ * also used to skip to the next primary sort value if for the current skipper represents one value (min and max value are the same) and\n+ * the current value is lower than minTimestamp.\n+ */\n+final class TimestampTwoPhaseIterator extends TwoPhaseIterator {\n+\n+ private final RangeNoGapsApproximation approximation;\n+ private final NumericDocValues timestamps;\n+\n+ private final long minTimestamp;\n+ private final long maxTimestamp;\n+\n+ TimestampTwoPhaseIterator(\n+ NumericDocValues timestamps,\n+ DocValuesSkipper timestampSkipper,\n+ DocValuesSkipper primaryFieldSkipper,\n+ long minTimestamp,\n+ long maxTimestamp\n+ ) {\n+ super(new RangeNoGapsApproximation(timestamps, timestampSkipper, primaryFieldSkipper, minTimestamp, maxTimestamp));\n+ this.approximation = (RangeNoGapsApproximation) approximation();\n+ this.timestamps = timestamps;\n+ this.minTimestamp = minTimestamp;\n+ this.maxTimestamp = maxTimestamp;\n+ }\n+\n+ static final class RangeNoGapsApproximation extends DocIdSetIterator {\n+\n+ private final DocIdSetIterator innerApproximation;\n+\n+ final DocValuesSkipper timestampSkipper;\n+ final DocValuesSkipper primaryFieldSkipper;\n+ final long minTimestamp;\n+ final long maxTimestamp;\n+\n+ private int doc = -1;\n+\n+ // Track a decision for all doc IDs between the current doc ID and upTo inclusive.\n+ Match match = Match.MAYBE;\n+ int upTo = -1;\n+ int primaryFieldUpTo = -1;\n+\n+ RangeNoGapsApproximation(\n+ DocIdSetIterator innerApproximation,\n+ DocValuesSkipper timestampSkipper,\n+ DocValuesSkipper primaryFieldSkipper,\n+ long minTimestamp,\n+ long maxTimestamp\n+ ) {\n+ this.innerApproximation = innerApproximation;\n+ this.timestampSkipper = timestampSkipper;\n+ this.primaryFieldSkipper = primaryFieldSkipper;\n+ this.minTimestamp = minTimestamp;\n+ this.maxTimestamp = maxTimestamp;\n+ }\n+\n+ @Override\n+ public int docID() {\n+ return doc;\n+ }\n+\n+ @Override\n+ public int nextDoc() throws IOException {\n+ return advance(docID() + 1);\n+ }\n+\n+ @Override\n+ public int advance(int target) throws IOException {\n+ while (true) {\n+ if (target > upTo) {\n+ timestampSkipper.advance(target);\n+ upTo = timestampSkipper.maxDocID(0);\n+ if (upTo == DocIdSetIterator.NO_MORE_DOCS) {\n+ return doc = DocIdSetIterator.NO_MORE_DOCS;\n+ }\n+ match = match(0);\n+\n+ // If we have a YES or NO decision, see if we still have the same decision on a higher\n+ // level (= on a wider range of doc IDs)\n+ int nextLevel = 1;\n+ while (match != Match.MAYBE && nextLevel < timestampSkipper.numLevels() && match == match(nextLevel)) {\n+ upTo = timestampSkipper.maxDocID(nextLevel);\n+ nextLevel++;\n+ }\n+ }\n+ switch (match) {\n+ case YES:\n+ return doc = target;\n+ case MAYBE:\n+ if (target > innerApproximation.docID()) {\n+ target = innerApproximation.advance(target);", + "comment_created_at": "2025-05-21T07:05:52+00:00", + "comment_author": "iverase", + "comment_body": "I see now that it is equivalent. Would it make sense to make innerApproximation a NumericDocValues and remove the reference from the parent class? ", + "pr_file_module": null + }, + { + "comment_id": "2111161746", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 127260, + "pr_file": "server/src/main/java/org/elasticsearch/lucene/queries/TimestampTwoPhaseIterator.java", + "discussion_id": "2099445576", + "commented_code": "@@ -0,0 +1,207 @@\n+/*\n+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one\n+ * or more contributor license agreements. Licensed under the \"Elastic License\n+ * 2.0\", the \"GNU Affero General Public License v3.0 only\", and the \"Server Side\n+ * Public License v 1\"; you may not use this file except in compliance with, at\n+ * your election, the \"Elastic License 2.0\", the \"GNU Affero General Public\n+ * License v3.0 only\", or the \"Server Side Public License, v 1\".\n+ */\n+package org.elasticsearch.lucene.queries;\n+\n+import org.apache.lucene.index.DocValuesSkipper;\n+import org.apache.lucene.index.NumericDocValues;\n+import org.apache.lucene.search.DocIdSetIterator;\n+import org.apache.lucene.search.TwoPhaseIterator;\n+\n+import java.io.IOException;\n+\n+/**\n+ * Based on {@link org.apache.lucene.search.DocValuesRangeIterator} but modified for time series and logsdb use cases.\n+ *

\n+ * The @timestamp field always has exactly one value and all documents always have a @timestamp value.\n+ * Additionally, the @timestamp field is always the secondary index sort field and sort order is always descending.\n+ *

\n+ * This makes the doc value skipper on @timestamp field less effective, and so the doc value skipper for the first index sort field is\n+ * also used to skip to the next primary sort value if for the current skipper represents one value (min and max value are the same) and\n+ * the current value is lower than minTimestamp.\n+ */\n+final class TimestampTwoPhaseIterator extends TwoPhaseIterator {\n+\n+ private final RangeNoGapsApproximation approximation;\n+ private final NumericDocValues timestamps;\n+\n+ private final long minTimestamp;\n+ private final long maxTimestamp;\n+\n+ TimestampTwoPhaseIterator(\n+ NumericDocValues timestamps,\n+ DocValuesSkipper timestampSkipper,\n+ DocValuesSkipper primaryFieldSkipper,\n+ long minTimestamp,\n+ long maxTimestamp\n+ ) {\n+ super(new RangeNoGapsApproximation(timestamps, timestampSkipper, primaryFieldSkipper, minTimestamp, maxTimestamp));\n+ this.approximation = (RangeNoGapsApproximation) approximation();\n+ this.timestamps = timestamps;\n+ this.minTimestamp = minTimestamp;\n+ this.maxTimestamp = maxTimestamp;\n+ }\n+\n+ static final class RangeNoGapsApproximation extends DocIdSetIterator {\n+\n+ private final DocIdSetIterator innerApproximation;\n+\n+ final DocValuesSkipper timestampSkipper;\n+ final DocValuesSkipper primaryFieldSkipper;\n+ final long minTimestamp;\n+ final long maxTimestamp;\n+\n+ private int doc = -1;\n+\n+ // Track a decision for all doc IDs between the current doc ID and upTo inclusive.\n+ Match match = Match.MAYBE;\n+ int upTo = -1;\n+ int primaryFieldUpTo = -1;\n+\n+ RangeNoGapsApproximation(\n+ DocIdSetIterator innerApproximation,\n+ DocValuesSkipper timestampSkipper,\n+ DocValuesSkipper primaryFieldSkipper,\n+ long minTimestamp,\n+ long maxTimestamp\n+ ) {\n+ this.innerApproximation = innerApproximation;\n+ this.timestampSkipper = timestampSkipper;\n+ this.primaryFieldSkipper = primaryFieldSkipper;\n+ this.minTimestamp = minTimestamp;\n+ this.maxTimestamp = maxTimestamp;\n+ }\n+\n+ @Override\n+ public int docID() {\n+ return doc;\n+ }\n+\n+ @Override\n+ public int nextDoc() throws IOException {\n+ return advance(docID() + 1);\n+ }\n+\n+ @Override\n+ public int advance(int target) throws IOException {\n+ while (true) {\n+ if (target > upTo) {\n+ timestampSkipper.advance(target);\n+ upTo = timestampSkipper.maxDocID(0);\n+ if (upTo == DocIdSetIterator.NO_MORE_DOCS) {\n+ return doc = DocIdSetIterator.NO_MORE_DOCS;\n+ }\n+ match = match(0);\n+\n+ // If we have a YES or NO decision, see if we still have the same decision on a higher\n+ // level (= on a wider range of doc IDs)\n+ int nextLevel = 1;\n+ while (match != Match.MAYBE && nextLevel < timestampSkipper.numLevels() && match == match(nextLevel)) {\n+ upTo = timestampSkipper.maxDocID(nextLevel);\n+ nextLevel++;\n+ }\n+ }\n+ switch (match) {\n+ case YES:\n+ return doc = target;\n+ case MAYBE:\n+ if (target > innerApproximation.docID()) {\n+ target = innerApproximation.advance(target);", + "comment_created_at": "2025-05-28T07:37:57+00:00", + "comment_author": "martijnvg", + "comment_body": "done: 24819c4929b7591fdb9795333cf4d1e3329cdfc3", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2099475391", + "pr_number": 127260, + "pr_file": "server/src/main/java/org/elasticsearch/lucene/queries/TimestampTwoPhaseIterator.java", + "created_at": "2025-05-21T06:38:24+00:00", + "commented_code": "/*\n * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one\n * or more contributor license agreements. Licensed under the \"Elastic License\n * 2.0\", the \"GNU Affero General Public License v3.0 only\", and the \"Server Side\n * Public License v 1\"; you may not use this file except in compliance with, at\n * your election, the \"Elastic License 2.0\", the \"GNU Affero General Public\n * License v3.0 only\", or the \"Server Side Public License, v 1\".\n */\npackage org.elasticsearch.lucene.queries;\n\nimport org.apache.lucene.index.DocValuesSkipper;\nimport org.apache.lucene.index.NumericDocValues;\nimport org.apache.lucene.search.DocIdSetIterator;\nimport org.apache.lucene.search.TwoPhaseIterator;\n\nimport java.io.IOException;\n\n/**\n * Based on {@link org.apache.lucene.search.DocValuesRangeIterator} but modified for time series and logsdb use cases.\n *

\n * The @timestamp field always has exactly one value and all documents always have a @timestamp value.\n * Additionally, the @timestamp field is always the secondary index sort field and sort order is always descending.\n *

\n * This makes the doc value skipper on @timestamp field less effective, and so the doc value skipper for the first index sort field is\n * also used to skip to the next primary sort value if for the current skipper represents one value (min and max value are the same) and\n * the current value is lower than minTimestamp.\n */\nfinal class TimestampTwoPhaseIterator extends TwoPhaseIterator {\n\n private final RangeNoGapsApproximation approximation;\n private final NumericDocValues timestamps;\n\n private final long minTimestamp;\n private final long maxTimestamp;\n\n TimestampTwoPhaseIterator(\n NumericDocValues timestamps,\n DocValuesSkipper timestampSkipper,\n DocValuesSkipper primaryFieldSkipper,\n long minTimestamp,\n long maxTimestamp\n ) {\n super(new RangeNoGapsApproximation(timestamps, timestampSkipper, primaryFieldSkipper, minTimestamp, maxTimestamp));\n this.approximation = (RangeNoGapsApproximation) approximation();\n this.timestamps = timestamps;\n this.minTimestamp = minTimestamp;\n this.maxTimestamp = maxTimestamp;\n }\n\n static final class RangeNoGapsApproximation extends DocIdSetIterator {\n\n private final DocIdSetIterator innerApproximation;\n\n final DocValuesSkipper timestampSkipper;\n final DocValuesSkipper primaryFieldSkipper;\n final long minTimestamp;\n final long maxTimestamp;\n\n private int doc = -1;\n\n // Track a decision for all doc IDs between the current doc ID and upTo inclusive.\n Match match = Match.MAYBE;\n int upTo = -1;\n int primaryFieldUpTo = -1;\n\n RangeNoGapsApproximation(\n DocIdSetIterator innerApproximation,\n DocValuesSkipper timestampSkipper,\n DocValuesSkipper primaryFieldSkipper,\n long minTimestamp,\n long maxTimestamp\n ) {\n this.innerApproximation = innerApproximation;\n this.timestampSkipper = timestampSkipper;\n this.primaryFieldSkipper = primaryFieldSkipper;\n this.minTimestamp = minTimestamp;\n this.maxTimestamp = maxTimestamp;\n }\n\n @Override\n public int docID() {\n return doc;\n }\n\n @Override\n public int nextDoc() throws IOException {\n return advance(docID() + 1);\n }\n\n @Override\n public int advance(int target) throws IOException {\n while (true) {\n if (target > upTo) {\n timestampSkipper.advance(target);\n upTo = timestampSkipper.maxDocID(0);\n if (upTo == DocIdSetIterator.NO_MORE_DOCS) {\n return doc = DocIdSetIterator.NO_MORE_DOCS;\n }\n match = match(0);\n\n // If we have a YES or NO decision, see if we still have the same decision on a higher\n // level (= on a wider range of doc IDs)\n int nextLevel = 1;\n while (match != Match.MAYBE && nextLevel < timestampSkipper.numLevels() && match == match(nextLevel)) {\n upTo = timestampSkipper.maxDocID(nextLevel);\n nextLevel++;\n }\n }\n switch (match) {\n case YES:\n return doc = target;\n case MAYBE:\n if (target > innerApproximation.docID()) {\n target = innerApproximation.advance(target);\n }\n if (target <= upTo) {\n return doc = target;\n }\n break;\n case NO_AND_SKIP:\n if (target > primaryFieldUpTo) {\n primaryFieldSkipper.advance(target);\n for (int level = 0; level < primaryFieldSkipper.numLevels(); level++) {\n if (primaryFieldSkipper.minValue(level) == primaryFieldSkipper.maxValue(level)) {\n primaryFieldUpTo = primaryFieldSkipper.maxDocID(level);\n } else {\n break;\n }\n }\n if (primaryFieldUpTo > upTo) {\n upTo = primaryFieldUpTo;\n }\n }\n if (upTo == DocIdSetIterator.NO_MORE_DOCS) {\n return doc = NO_MORE_DOCS;\n }\n target = upTo + 1;\n break;\n case NO:\n if (upTo == DocIdSetIterator.NO_MORE_DOCS) {\n return doc = NO_MORE_DOCS;\n }\n target = upTo + 1;\n break;\n default:\n throw new AssertionError(\"Unknown enum constant: \" + match);\n }\n }\n }\n\n @Override\n public long cost() {\n return innerApproximation.cost();\n }\n\n Match match(int level) {\n long minValue = timestampSkipper.minValue(level);\n long maxValue = timestampSkipper.maxValue(level);\n if (minValue > maxTimestamp) {\n return Match.NO;\n } else if (maxValue < minTimestamp) {\n return Match.NO_AND_SKIP;\n } else if (minValue >= minTimestamp && maxValue <= maxTimestamp) {\n return Match.YES;\n } else {\n return Match.MAYBE;\n }\n }\n\n }\n\n @Override\n public boolean matches() throws IOException {\n return switch (approximation.match) {\n case YES -> true;\n case MAYBE -> {\n final long value = timestamps.longValue();\n yield value >= minTimestamp && value <= maxTimestamp;", + "repo_full_name": "elastic/elasticsearch", + "discussion_comments": [ + { + "comment_id": "2099475391", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 127260, + "pr_file": "server/src/main/java/org/elasticsearch/lucene/queries/TimestampTwoPhaseIterator.java", + "discussion_id": "2099475391", + "commented_code": "@@ -0,0 +1,207 @@\n+/*\n+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one\n+ * or more contributor license agreements. Licensed under the \"Elastic License\n+ * 2.0\", the \"GNU Affero General Public License v3.0 only\", and the \"Server Side\n+ * Public License v 1\"; you may not use this file except in compliance with, at\n+ * your election, the \"Elastic License 2.0\", the \"GNU Affero General Public\n+ * License v3.0 only\", or the \"Server Side Public License, v 1\".\n+ */\n+package org.elasticsearch.lucene.queries;\n+\n+import org.apache.lucene.index.DocValuesSkipper;\n+import org.apache.lucene.index.NumericDocValues;\n+import org.apache.lucene.search.DocIdSetIterator;\n+import org.apache.lucene.search.TwoPhaseIterator;\n+\n+import java.io.IOException;\n+\n+/**\n+ * Based on {@link org.apache.lucene.search.DocValuesRangeIterator} but modified for time series and logsdb use cases.\n+ *

\n+ * The @timestamp field always has exactly one value and all documents always have a @timestamp value.\n+ * Additionally, the @timestamp field is always the secondary index sort field and sort order is always descending.\n+ *

\n+ * This makes the doc value skipper on @timestamp field less effective, and so the doc value skipper for the first index sort field is\n+ * also used to skip to the next primary sort value if for the current skipper represents one value (min and max value are the same) and\n+ * the current value is lower than minTimestamp.\n+ */\n+final class TimestampTwoPhaseIterator extends TwoPhaseIterator {\n+\n+ private final RangeNoGapsApproximation approximation;\n+ private final NumericDocValues timestamps;\n+\n+ private final long minTimestamp;\n+ private final long maxTimestamp;\n+\n+ TimestampTwoPhaseIterator(\n+ NumericDocValues timestamps,\n+ DocValuesSkipper timestampSkipper,\n+ DocValuesSkipper primaryFieldSkipper,\n+ long minTimestamp,\n+ long maxTimestamp\n+ ) {\n+ super(new RangeNoGapsApproximation(timestamps, timestampSkipper, primaryFieldSkipper, minTimestamp, maxTimestamp));\n+ this.approximation = (RangeNoGapsApproximation) approximation();\n+ this.timestamps = timestamps;\n+ this.minTimestamp = minTimestamp;\n+ this.maxTimestamp = maxTimestamp;\n+ }\n+\n+ static final class RangeNoGapsApproximation extends DocIdSetIterator {\n+\n+ private final DocIdSetIterator innerApproximation;\n+\n+ final DocValuesSkipper timestampSkipper;\n+ final DocValuesSkipper primaryFieldSkipper;\n+ final long minTimestamp;\n+ final long maxTimestamp;\n+\n+ private int doc = -1;\n+\n+ // Track a decision for all doc IDs between the current doc ID and upTo inclusive.\n+ Match match = Match.MAYBE;\n+ int upTo = -1;\n+ int primaryFieldUpTo = -1;\n+\n+ RangeNoGapsApproximation(\n+ DocIdSetIterator innerApproximation,\n+ DocValuesSkipper timestampSkipper,\n+ DocValuesSkipper primaryFieldSkipper,\n+ long minTimestamp,\n+ long maxTimestamp\n+ ) {\n+ this.innerApproximation = innerApproximation;\n+ this.timestampSkipper = timestampSkipper;\n+ this.primaryFieldSkipper = primaryFieldSkipper;\n+ this.minTimestamp = minTimestamp;\n+ this.maxTimestamp = maxTimestamp;\n+ }\n+\n+ @Override\n+ public int docID() {\n+ return doc;\n+ }\n+\n+ @Override\n+ public int nextDoc() throws IOException {\n+ return advance(docID() + 1);\n+ }\n+\n+ @Override\n+ public int advance(int target) throws IOException {\n+ while (true) {\n+ if (target > upTo) {\n+ timestampSkipper.advance(target);\n+ upTo = timestampSkipper.maxDocID(0);\n+ if (upTo == DocIdSetIterator.NO_MORE_DOCS) {\n+ return doc = DocIdSetIterator.NO_MORE_DOCS;\n+ }\n+ match = match(0);\n+\n+ // If we have a YES or NO decision, see if we still have the same decision on a higher\n+ // level (= on a wider range of doc IDs)\n+ int nextLevel = 1;\n+ while (match != Match.MAYBE && nextLevel < timestampSkipper.numLevels() && match == match(nextLevel)) {\n+ upTo = timestampSkipper.maxDocID(nextLevel);\n+ nextLevel++;\n+ }\n+ }\n+ switch (match) {\n+ case YES:\n+ return doc = target;\n+ case MAYBE:\n+ if (target > innerApproximation.docID()) {\n+ target = innerApproximation.advance(target);\n+ }\n+ if (target <= upTo) {\n+ return doc = target;\n+ }\n+ break;\n+ case NO_AND_SKIP:\n+ if (target > primaryFieldUpTo) {\n+ primaryFieldSkipper.advance(target);\n+ for (int level = 0; level < primaryFieldSkipper.numLevels(); level++) {\n+ if (primaryFieldSkipper.minValue(level) == primaryFieldSkipper.maxValue(level)) {\n+ primaryFieldUpTo = primaryFieldSkipper.maxDocID(level);\n+ } else {\n+ break;\n+ }\n+ }\n+ if (primaryFieldUpTo > upTo) {\n+ upTo = primaryFieldUpTo;\n+ }\n+ }\n+ if (upTo == DocIdSetIterator.NO_MORE_DOCS) {\n+ return doc = NO_MORE_DOCS;\n+ }\n+ target = upTo + 1;\n+ break;\n+ case NO:\n+ if (upTo == DocIdSetIterator.NO_MORE_DOCS) {\n+ return doc = NO_MORE_DOCS;\n+ }\n+ target = upTo + 1;\n+ break;\n+ default:\n+ throw new AssertionError(\"Unknown enum constant: \" + match);\n+ }\n+ }\n+ }\n+\n+ @Override\n+ public long cost() {\n+ return innerApproximation.cost();\n+ }\n+\n+ Match match(int level) {\n+ long minValue = timestampSkipper.minValue(level);\n+ long maxValue = timestampSkipper.maxValue(level);\n+ if (minValue > maxTimestamp) {\n+ return Match.NO;\n+ } else if (maxValue < minTimestamp) {\n+ return Match.NO_AND_SKIP;\n+ } else if (minValue >= minTimestamp && maxValue <= maxTimestamp) {\n+ return Match.YES;\n+ } else {\n+ return Match.MAYBE;\n+ }\n+ }\n+\n+ }\n+\n+ @Override\n+ public boolean matches() throws IOException {\n+ return switch (approximation.match) {\n+ case YES -> true;\n+ case MAYBE -> {\n+ final long value = timestamps.longValue();\n+ yield value >= minTimestamp && value <= maxTimestamp;", + "comment_created_at": "2025-05-21T06:38:24+00:00", + "comment_author": "iverase", + "comment_body": "if value < minTimestamp, we know we will not be matching any more documents, maybe we can move approximation.match to NO?", + "pr_file_module": null + }, + { + "comment_id": "2099478400", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 127260, + "pr_file": "server/src/main/java/org/elasticsearch/lucene/queries/TimestampTwoPhaseIterator.java", + "discussion_id": "2099475391", + "commented_code": "@@ -0,0 +1,207 @@\n+/*\n+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one\n+ * or more contributor license agreements. Licensed under the \"Elastic License\n+ * 2.0\", the \"GNU Affero General Public License v3.0 only\", and the \"Server Side\n+ * Public License v 1\"; you may not use this file except in compliance with, at\n+ * your election, the \"Elastic License 2.0\", the \"GNU Affero General Public\n+ * License v3.0 only\", or the \"Server Side Public License, v 1\".\n+ */\n+package org.elasticsearch.lucene.queries;\n+\n+import org.apache.lucene.index.DocValuesSkipper;\n+import org.apache.lucene.index.NumericDocValues;\n+import org.apache.lucene.search.DocIdSetIterator;\n+import org.apache.lucene.search.TwoPhaseIterator;\n+\n+import java.io.IOException;\n+\n+/**\n+ * Based on {@link org.apache.lucene.search.DocValuesRangeIterator} but modified for time series and logsdb use cases.\n+ *

\n+ * The @timestamp field always has exactly one value and all documents always have a @timestamp value.\n+ * Additionally, the @timestamp field is always the secondary index sort field and sort order is always descending.\n+ *

\n+ * This makes the doc value skipper on @timestamp field less effective, and so the doc value skipper for the first index sort field is\n+ * also used to skip to the next primary sort value if for the current skipper represents one value (min and max value are the same) and\n+ * the current value is lower than minTimestamp.\n+ */\n+final class TimestampTwoPhaseIterator extends TwoPhaseIterator {\n+\n+ private final RangeNoGapsApproximation approximation;\n+ private final NumericDocValues timestamps;\n+\n+ private final long minTimestamp;\n+ private final long maxTimestamp;\n+\n+ TimestampTwoPhaseIterator(\n+ NumericDocValues timestamps,\n+ DocValuesSkipper timestampSkipper,\n+ DocValuesSkipper primaryFieldSkipper,\n+ long minTimestamp,\n+ long maxTimestamp\n+ ) {\n+ super(new RangeNoGapsApproximation(timestamps, timestampSkipper, primaryFieldSkipper, minTimestamp, maxTimestamp));\n+ this.approximation = (RangeNoGapsApproximation) approximation();\n+ this.timestamps = timestamps;\n+ this.minTimestamp = minTimestamp;\n+ this.maxTimestamp = maxTimestamp;\n+ }\n+\n+ static final class RangeNoGapsApproximation extends DocIdSetIterator {\n+\n+ private final DocIdSetIterator innerApproximation;\n+\n+ final DocValuesSkipper timestampSkipper;\n+ final DocValuesSkipper primaryFieldSkipper;\n+ final long minTimestamp;\n+ final long maxTimestamp;\n+\n+ private int doc = -1;\n+\n+ // Track a decision for all doc IDs between the current doc ID and upTo inclusive.\n+ Match match = Match.MAYBE;\n+ int upTo = -1;\n+ int primaryFieldUpTo = -1;\n+\n+ RangeNoGapsApproximation(\n+ DocIdSetIterator innerApproximation,\n+ DocValuesSkipper timestampSkipper,\n+ DocValuesSkipper primaryFieldSkipper,\n+ long minTimestamp,\n+ long maxTimestamp\n+ ) {\n+ this.innerApproximation = innerApproximation;\n+ this.timestampSkipper = timestampSkipper;\n+ this.primaryFieldSkipper = primaryFieldSkipper;\n+ this.minTimestamp = minTimestamp;\n+ this.maxTimestamp = maxTimestamp;\n+ }\n+\n+ @Override\n+ public int docID() {\n+ return doc;\n+ }\n+\n+ @Override\n+ public int nextDoc() throws IOException {\n+ return advance(docID() + 1);\n+ }\n+\n+ @Override\n+ public int advance(int target) throws IOException {\n+ while (true) {\n+ if (target > upTo) {\n+ timestampSkipper.advance(target);\n+ upTo = timestampSkipper.maxDocID(0);\n+ if (upTo == DocIdSetIterator.NO_MORE_DOCS) {\n+ return doc = DocIdSetIterator.NO_MORE_DOCS;\n+ }\n+ match = match(0);\n+\n+ // If we have a YES or NO decision, see if we still have the same decision on a higher\n+ // level (= on a wider range of doc IDs)\n+ int nextLevel = 1;\n+ while (match != Match.MAYBE && nextLevel < timestampSkipper.numLevels() && match == match(nextLevel)) {\n+ upTo = timestampSkipper.maxDocID(nextLevel);\n+ nextLevel++;\n+ }\n+ }\n+ switch (match) {\n+ case YES:\n+ return doc = target;\n+ case MAYBE:\n+ if (target > innerApproximation.docID()) {\n+ target = innerApproximation.advance(target);\n+ }\n+ if (target <= upTo) {\n+ return doc = target;\n+ }\n+ break;\n+ case NO_AND_SKIP:\n+ if (target > primaryFieldUpTo) {\n+ primaryFieldSkipper.advance(target);\n+ for (int level = 0; level < primaryFieldSkipper.numLevels(); level++) {\n+ if (primaryFieldSkipper.minValue(level) == primaryFieldSkipper.maxValue(level)) {\n+ primaryFieldUpTo = primaryFieldSkipper.maxDocID(level);\n+ } else {\n+ break;\n+ }\n+ }\n+ if (primaryFieldUpTo > upTo) {\n+ upTo = primaryFieldUpTo;\n+ }\n+ }\n+ if (upTo == DocIdSetIterator.NO_MORE_DOCS) {\n+ return doc = NO_MORE_DOCS;\n+ }\n+ target = upTo + 1;\n+ break;\n+ case NO:\n+ if (upTo == DocIdSetIterator.NO_MORE_DOCS) {\n+ return doc = NO_MORE_DOCS;\n+ }\n+ target = upTo + 1;\n+ break;\n+ default:\n+ throw new AssertionError(\"Unknown enum constant: \" + match);\n+ }\n+ }\n+ }\n+\n+ @Override\n+ public long cost() {\n+ return innerApproximation.cost();\n+ }\n+\n+ Match match(int level) {\n+ long minValue = timestampSkipper.minValue(level);\n+ long maxValue = timestampSkipper.maxValue(level);\n+ if (minValue > maxTimestamp) {\n+ return Match.NO;\n+ } else if (maxValue < minTimestamp) {\n+ return Match.NO_AND_SKIP;\n+ } else if (minValue >= minTimestamp && maxValue <= maxTimestamp) {\n+ return Match.YES;\n+ } else {\n+ return Match.MAYBE;\n+ }\n+ }\n+\n+ }\n+\n+ @Override\n+ public boolean matches() throws IOException {\n+ return switch (approximation.match) {\n+ case YES -> true;\n+ case MAYBE -> {\n+ final long value = timestamps.longValue();\n+ yield value >= minTimestamp && value <= maxTimestamp;", + "comment_created_at": "2025-05-21T06:40:02+00:00", + "comment_author": "iverase", + "comment_body": "Although this is only true if we are always in the same primary sort.", + "pr_file_module": null + }, + { + "comment_id": "2099497432", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 127260, + "pr_file": "server/src/main/java/org/elasticsearch/lucene/queries/TimestampTwoPhaseIterator.java", + "discussion_id": "2099475391", + "commented_code": "@@ -0,0 +1,207 @@\n+/*\n+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one\n+ * or more contributor license agreements. Licensed under the \"Elastic License\n+ * 2.0\", the \"GNU Affero General Public License v3.0 only\", and the \"Server Side\n+ * Public License v 1\"; you may not use this file except in compliance with, at\n+ * your election, the \"Elastic License 2.0\", the \"GNU Affero General Public\n+ * License v3.0 only\", or the \"Server Side Public License, v 1\".\n+ */\n+package org.elasticsearch.lucene.queries;\n+\n+import org.apache.lucene.index.DocValuesSkipper;\n+import org.apache.lucene.index.NumericDocValues;\n+import org.apache.lucene.search.DocIdSetIterator;\n+import org.apache.lucene.search.TwoPhaseIterator;\n+\n+import java.io.IOException;\n+\n+/**\n+ * Based on {@link org.apache.lucene.search.DocValuesRangeIterator} but modified for time series and logsdb use cases.\n+ *

\n+ * The @timestamp field always has exactly one value and all documents always have a @timestamp value.\n+ * Additionally, the @timestamp field is always the secondary index sort field and sort order is always descending.\n+ *

\n+ * This makes the doc value skipper on @timestamp field less effective, and so the doc value skipper for the first index sort field is\n+ * also used to skip to the next primary sort value if for the current skipper represents one value (min and max value are the same) and\n+ * the current value is lower than minTimestamp.\n+ */\n+final class TimestampTwoPhaseIterator extends TwoPhaseIterator {\n+\n+ private final RangeNoGapsApproximation approximation;\n+ private final NumericDocValues timestamps;\n+\n+ private final long minTimestamp;\n+ private final long maxTimestamp;\n+\n+ TimestampTwoPhaseIterator(\n+ NumericDocValues timestamps,\n+ DocValuesSkipper timestampSkipper,\n+ DocValuesSkipper primaryFieldSkipper,\n+ long minTimestamp,\n+ long maxTimestamp\n+ ) {\n+ super(new RangeNoGapsApproximation(timestamps, timestampSkipper, primaryFieldSkipper, minTimestamp, maxTimestamp));\n+ this.approximation = (RangeNoGapsApproximation) approximation();\n+ this.timestamps = timestamps;\n+ this.minTimestamp = minTimestamp;\n+ this.maxTimestamp = maxTimestamp;\n+ }\n+\n+ static final class RangeNoGapsApproximation extends DocIdSetIterator {\n+\n+ private final DocIdSetIterator innerApproximation;\n+\n+ final DocValuesSkipper timestampSkipper;\n+ final DocValuesSkipper primaryFieldSkipper;\n+ final long minTimestamp;\n+ final long maxTimestamp;\n+\n+ private int doc = -1;\n+\n+ // Track a decision for all doc IDs between the current doc ID and upTo inclusive.\n+ Match match = Match.MAYBE;\n+ int upTo = -1;\n+ int primaryFieldUpTo = -1;\n+\n+ RangeNoGapsApproximation(\n+ DocIdSetIterator innerApproximation,\n+ DocValuesSkipper timestampSkipper,\n+ DocValuesSkipper primaryFieldSkipper,\n+ long minTimestamp,\n+ long maxTimestamp\n+ ) {\n+ this.innerApproximation = innerApproximation;\n+ this.timestampSkipper = timestampSkipper;\n+ this.primaryFieldSkipper = primaryFieldSkipper;\n+ this.minTimestamp = minTimestamp;\n+ this.maxTimestamp = maxTimestamp;\n+ }\n+\n+ @Override\n+ public int docID() {\n+ return doc;\n+ }\n+\n+ @Override\n+ public int nextDoc() throws IOException {\n+ return advance(docID() + 1);\n+ }\n+\n+ @Override\n+ public int advance(int target) throws IOException {\n+ while (true) {\n+ if (target > upTo) {\n+ timestampSkipper.advance(target);\n+ upTo = timestampSkipper.maxDocID(0);\n+ if (upTo == DocIdSetIterator.NO_MORE_DOCS) {\n+ return doc = DocIdSetIterator.NO_MORE_DOCS;\n+ }\n+ match = match(0);\n+\n+ // If we have a YES or NO decision, see if we still have the same decision on a higher\n+ // level (= on a wider range of doc IDs)\n+ int nextLevel = 1;\n+ while (match != Match.MAYBE && nextLevel < timestampSkipper.numLevels() && match == match(nextLevel)) {\n+ upTo = timestampSkipper.maxDocID(nextLevel);\n+ nextLevel++;\n+ }\n+ }\n+ switch (match) {\n+ case YES:\n+ return doc = target;\n+ case MAYBE:\n+ if (target > innerApproximation.docID()) {\n+ target = innerApproximation.advance(target);\n+ }\n+ if (target <= upTo) {\n+ return doc = target;\n+ }\n+ break;\n+ case NO_AND_SKIP:\n+ if (target > primaryFieldUpTo) {\n+ primaryFieldSkipper.advance(target);\n+ for (int level = 0; level < primaryFieldSkipper.numLevels(); level++) {\n+ if (primaryFieldSkipper.minValue(level) == primaryFieldSkipper.maxValue(level)) {\n+ primaryFieldUpTo = primaryFieldSkipper.maxDocID(level);\n+ } else {\n+ break;\n+ }\n+ }\n+ if (primaryFieldUpTo > upTo) {\n+ upTo = primaryFieldUpTo;\n+ }\n+ }\n+ if (upTo == DocIdSetIterator.NO_MORE_DOCS) {\n+ return doc = NO_MORE_DOCS;\n+ }\n+ target = upTo + 1;\n+ break;\n+ case NO:\n+ if (upTo == DocIdSetIterator.NO_MORE_DOCS) {\n+ return doc = NO_MORE_DOCS;\n+ }\n+ target = upTo + 1;\n+ break;\n+ default:\n+ throw new AssertionError(\"Unknown enum constant: \" + match);\n+ }\n+ }\n+ }\n+\n+ @Override\n+ public long cost() {\n+ return innerApproximation.cost();\n+ }\n+\n+ Match match(int level) {\n+ long minValue = timestampSkipper.minValue(level);\n+ long maxValue = timestampSkipper.maxValue(level);\n+ if (minValue > maxTimestamp) {\n+ return Match.NO;\n+ } else if (maxValue < minTimestamp) {\n+ return Match.NO_AND_SKIP;\n+ } else if (minValue >= minTimestamp && maxValue <= maxTimestamp) {\n+ return Match.YES;\n+ } else {\n+ return Match.MAYBE;\n+ }\n+ }\n+\n+ }\n+\n+ @Override\n+ public boolean matches() throws IOException {\n+ return switch (approximation.match) {\n+ case YES -> true;\n+ case MAYBE -> {\n+ final long value = timestamps.longValue();\n+ yield value >= minTimestamp && value <= maxTimestamp;", + "comment_created_at": "2025-05-21T06:51:10+00:00", + "comment_author": "iverase", + "comment_body": "I wonder if we should have two MAYBE options, one that we know we are always in the same primary sort and another one with mixed primary sort. In that case we can apply the optimization in the first case (which should be the common case).", + "pr_file_module": null + }, + { + "comment_id": "2099593498", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 127260, + "pr_file": "server/src/main/java/org/elasticsearch/lucene/queries/TimestampTwoPhaseIterator.java", + "discussion_id": "2099475391", + "commented_code": "@@ -0,0 +1,207 @@\n+/*\n+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one\n+ * or more contributor license agreements. Licensed under the \"Elastic License\n+ * 2.0\", the \"GNU Affero General Public License v3.0 only\", and the \"Server Side\n+ * Public License v 1\"; you may not use this file except in compliance with, at\n+ * your election, the \"Elastic License 2.0\", the \"GNU Affero General Public\n+ * License v3.0 only\", or the \"Server Side Public License, v 1\".\n+ */\n+package org.elasticsearch.lucene.queries;\n+\n+import org.apache.lucene.index.DocValuesSkipper;\n+import org.apache.lucene.index.NumericDocValues;\n+import org.apache.lucene.search.DocIdSetIterator;\n+import org.apache.lucene.search.TwoPhaseIterator;\n+\n+import java.io.IOException;\n+\n+/**\n+ * Based on {@link org.apache.lucene.search.DocValuesRangeIterator} but modified for time series and logsdb use cases.\n+ *

\n+ * The @timestamp field always has exactly one value and all documents always have a @timestamp value.\n+ * Additionally, the @timestamp field is always the secondary index sort field and sort order is always descending.\n+ *

\n+ * This makes the doc value skipper on @timestamp field less effective, and so the doc value skipper for the first index sort field is\n+ * also used to skip to the next primary sort value if for the current skipper represents one value (min and max value are the same) and\n+ * the current value is lower than minTimestamp.\n+ */\n+final class TimestampTwoPhaseIterator extends TwoPhaseIterator {\n+\n+ private final RangeNoGapsApproximation approximation;\n+ private final NumericDocValues timestamps;\n+\n+ private final long minTimestamp;\n+ private final long maxTimestamp;\n+\n+ TimestampTwoPhaseIterator(\n+ NumericDocValues timestamps,\n+ DocValuesSkipper timestampSkipper,\n+ DocValuesSkipper primaryFieldSkipper,\n+ long minTimestamp,\n+ long maxTimestamp\n+ ) {\n+ super(new RangeNoGapsApproximation(timestamps, timestampSkipper, primaryFieldSkipper, minTimestamp, maxTimestamp));\n+ this.approximation = (RangeNoGapsApproximation) approximation();\n+ this.timestamps = timestamps;\n+ this.minTimestamp = minTimestamp;\n+ this.maxTimestamp = maxTimestamp;\n+ }\n+\n+ static final class RangeNoGapsApproximation extends DocIdSetIterator {\n+\n+ private final DocIdSetIterator innerApproximation;\n+\n+ final DocValuesSkipper timestampSkipper;\n+ final DocValuesSkipper primaryFieldSkipper;\n+ final long minTimestamp;\n+ final long maxTimestamp;\n+\n+ private int doc = -1;\n+\n+ // Track a decision for all doc IDs between the current doc ID and upTo inclusive.\n+ Match match = Match.MAYBE;\n+ int upTo = -1;\n+ int primaryFieldUpTo = -1;\n+\n+ RangeNoGapsApproximation(\n+ DocIdSetIterator innerApproximation,\n+ DocValuesSkipper timestampSkipper,\n+ DocValuesSkipper primaryFieldSkipper,\n+ long minTimestamp,\n+ long maxTimestamp\n+ ) {\n+ this.innerApproximation = innerApproximation;\n+ this.timestampSkipper = timestampSkipper;\n+ this.primaryFieldSkipper = primaryFieldSkipper;\n+ this.minTimestamp = minTimestamp;\n+ this.maxTimestamp = maxTimestamp;\n+ }\n+\n+ @Override\n+ public int docID() {\n+ return doc;\n+ }\n+\n+ @Override\n+ public int nextDoc() throws IOException {\n+ return advance(docID() + 1);\n+ }\n+\n+ @Override\n+ public int advance(int target) throws IOException {\n+ while (true) {\n+ if (target > upTo) {\n+ timestampSkipper.advance(target);\n+ upTo = timestampSkipper.maxDocID(0);\n+ if (upTo == DocIdSetIterator.NO_MORE_DOCS) {\n+ return doc = DocIdSetIterator.NO_MORE_DOCS;\n+ }\n+ match = match(0);\n+\n+ // If we have a YES or NO decision, see if we still have the same decision on a higher\n+ // level (= on a wider range of doc IDs)\n+ int nextLevel = 1;\n+ while (match != Match.MAYBE && nextLevel < timestampSkipper.numLevels() && match == match(nextLevel)) {\n+ upTo = timestampSkipper.maxDocID(nextLevel);\n+ nextLevel++;\n+ }\n+ }\n+ switch (match) {\n+ case YES:\n+ return doc = target;\n+ case MAYBE:\n+ if (target > innerApproximation.docID()) {\n+ target = innerApproximation.advance(target);\n+ }\n+ if (target <= upTo) {\n+ return doc = target;\n+ }\n+ break;\n+ case NO_AND_SKIP:\n+ if (target > primaryFieldUpTo) {\n+ primaryFieldSkipper.advance(target);\n+ for (int level = 0; level < primaryFieldSkipper.numLevels(); level++) {\n+ if (primaryFieldSkipper.minValue(level) == primaryFieldSkipper.maxValue(level)) {\n+ primaryFieldUpTo = primaryFieldSkipper.maxDocID(level);\n+ } else {\n+ break;\n+ }\n+ }\n+ if (primaryFieldUpTo > upTo) {\n+ upTo = primaryFieldUpTo;\n+ }\n+ }\n+ if (upTo == DocIdSetIterator.NO_MORE_DOCS) {\n+ return doc = NO_MORE_DOCS;\n+ }\n+ target = upTo + 1;\n+ break;\n+ case NO:\n+ if (upTo == DocIdSetIterator.NO_MORE_DOCS) {\n+ return doc = NO_MORE_DOCS;\n+ }\n+ target = upTo + 1;\n+ break;\n+ default:\n+ throw new AssertionError(\"Unknown enum constant: \" + match);\n+ }\n+ }\n+ }\n+\n+ @Override\n+ public long cost() {\n+ return innerApproximation.cost();\n+ }\n+\n+ Match match(int level) {\n+ long minValue = timestampSkipper.minValue(level);\n+ long maxValue = timestampSkipper.maxValue(level);\n+ if (minValue > maxTimestamp) {\n+ return Match.NO;\n+ } else if (maxValue < minTimestamp) {\n+ return Match.NO_AND_SKIP;\n+ } else if (minValue >= minTimestamp && maxValue <= maxTimestamp) {\n+ return Match.YES;\n+ } else {\n+ return Match.MAYBE;\n+ }\n+ }\n+\n+ }\n+\n+ @Override\n+ public boolean matches() throws IOException {\n+ return switch (approximation.match) {\n+ case YES -> true;\n+ case MAYBE -> {\n+ final long value = timestamps.longValue();\n+ yield value >= minTimestamp && value <= maxTimestamp;", + "comment_created_at": "2025-05-21T07:42:10+00:00", + "comment_author": "martijnvg", + "comment_body": "Interesting, let me take a look at this.", + "pr_file_module": null + }, + { + "comment_id": "2126409027", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 127260, + "pr_file": "server/src/main/java/org/elasticsearch/lucene/queries/TimestampTwoPhaseIterator.java", + "discussion_id": "2099475391", + "commented_code": "@@ -0,0 +1,207 @@\n+/*\n+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one\n+ * or more contributor license agreements. Licensed under the \"Elastic License\n+ * 2.0\", the \"GNU Affero General Public License v3.0 only\", and the \"Server Side\n+ * Public License v 1\"; you may not use this file except in compliance with, at\n+ * your election, the \"Elastic License 2.0\", the \"GNU Affero General Public\n+ * License v3.0 only\", or the \"Server Side Public License, v 1\".\n+ */\n+package org.elasticsearch.lucene.queries;\n+\n+import org.apache.lucene.index.DocValuesSkipper;\n+import org.apache.lucene.index.NumericDocValues;\n+import org.apache.lucene.search.DocIdSetIterator;\n+import org.apache.lucene.search.TwoPhaseIterator;\n+\n+import java.io.IOException;\n+\n+/**\n+ * Based on {@link org.apache.lucene.search.DocValuesRangeIterator} but modified for time series and logsdb use cases.\n+ *

\n+ * The @timestamp field always has exactly one value and all documents always have a @timestamp value.\n+ * Additionally, the @timestamp field is always the secondary index sort field and sort order is always descending.\n+ *

\n+ * This makes the doc value skipper on @timestamp field less effective, and so the doc value skipper for the first index sort field is\n+ * also used to skip to the next primary sort value if for the current skipper represents one value (min and max value are the same) and\n+ * the current value is lower than minTimestamp.\n+ */\n+final class TimestampTwoPhaseIterator extends TwoPhaseIterator {\n+\n+ private final RangeNoGapsApproximation approximation;\n+ private final NumericDocValues timestamps;\n+\n+ private final long minTimestamp;\n+ private final long maxTimestamp;\n+\n+ TimestampTwoPhaseIterator(\n+ NumericDocValues timestamps,\n+ DocValuesSkipper timestampSkipper,\n+ DocValuesSkipper primaryFieldSkipper,\n+ long minTimestamp,\n+ long maxTimestamp\n+ ) {\n+ super(new RangeNoGapsApproximation(timestamps, timestampSkipper, primaryFieldSkipper, minTimestamp, maxTimestamp));\n+ this.approximation = (RangeNoGapsApproximation) approximation();\n+ this.timestamps = timestamps;\n+ this.minTimestamp = minTimestamp;\n+ this.maxTimestamp = maxTimestamp;\n+ }\n+\n+ static final class RangeNoGapsApproximation extends DocIdSetIterator {\n+\n+ private final DocIdSetIterator innerApproximation;\n+\n+ final DocValuesSkipper timestampSkipper;\n+ final DocValuesSkipper primaryFieldSkipper;\n+ final long minTimestamp;\n+ final long maxTimestamp;\n+\n+ private int doc = -1;\n+\n+ // Track a decision for all doc IDs between the current doc ID and upTo inclusive.\n+ Match match = Match.MAYBE;\n+ int upTo = -1;\n+ int primaryFieldUpTo = -1;\n+\n+ RangeNoGapsApproximation(\n+ DocIdSetIterator innerApproximation,\n+ DocValuesSkipper timestampSkipper,\n+ DocValuesSkipper primaryFieldSkipper,\n+ long minTimestamp,\n+ long maxTimestamp\n+ ) {\n+ this.innerApproximation = innerApproximation;\n+ this.timestampSkipper = timestampSkipper;\n+ this.primaryFieldSkipper = primaryFieldSkipper;\n+ this.minTimestamp = minTimestamp;\n+ this.maxTimestamp = maxTimestamp;\n+ }\n+\n+ @Override\n+ public int docID() {\n+ return doc;\n+ }\n+\n+ @Override\n+ public int nextDoc() throws IOException {\n+ return advance(docID() + 1);\n+ }\n+\n+ @Override\n+ public int advance(int target) throws IOException {\n+ while (true) {\n+ if (target > upTo) {\n+ timestampSkipper.advance(target);\n+ upTo = timestampSkipper.maxDocID(0);\n+ if (upTo == DocIdSetIterator.NO_MORE_DOCS) {\n+ return doc = DocIdSetIterator.NO_MORE_DOCS;\n+ }\n+ match = match(0);\n+\n+ // If we have a YES or NO decision, see if we still have the same decision on a higher\n+ // level (= on a wider range of doc IDs)\n+ int nextLevel = 1;\n+ while (match != Match.MAYBE && nextLevel < timestampSkipper.numLevels() && match == match(nextLevel)) {\n+ upTo = timestampSkipper.maxDocID(nextLevel);\n+ nextLevel++;\n+ }\n+ }\n+ switch (match) {\n+ case YES:\n+ return doc = target;\n+ case MAYBE:\n+ if (target > innerApproximation.docID()) {\n+ target = innerApproximation.advance(target);\n+ }\n+ if (target <= upTo) {\n+ return doc = target;\n+ }\n+ break;\n+ case NO_AND_SKIP:\n+ if (target > primaryFieldUpTo) {\n+ primaryFieldSkipper.advance(target);\n+ for (int level = 0; level < primaryFieldSkipper.numLevels(); level++) {\n+ if (primaryFieldSkipper.minValue(level) == primaryFieldSkipper.maxValue(level)) {\n+ primaryFieldUpTo = primaryFieldSkipper.maxDocID(level);\n+ } else {\n+ break;\n+ }\n+ }\n+ if (primaryFieldUpTo > upTo) {\n+ upTo = primaryFieldUpTo;\n+ }\n+ }\n+ if (upTo == DocIdSetIterator.NO_MORE_DOCS) {\n+ return doc = NO_MORE_DOCS;\n+ }\n+ target = upTo + 1;\n+ break;\n+ case NO:\n+ if (upTo == DocIdSetIterator.NO_MORE_DOCS) {\n+ return doc = NO_MORE_DOCS;\n+ }\n+ target = upTo + 1;\n+ break;\n+ default:\n+ throw new AssertionError(\"Unknown enum constant: \" + match);\n+ }\n+ }\n+ }\n+\n+ @Override\n+ public long cost() {\n+ return innerApproximation.cost();\n+ }\n+\n+ Match match(int level) {\n+ long minValue = timestampSkipper.minValue(level);\n+ long maxValue = timestampSkipper.maxValue(level);\n+ if (minValue > maxTimestamp) {\n+ return Match.NO;\n+ } else if (maxValue < minTimestamp) {\n+ return Match.NO_AND_SKIP;\n+ } else if (minValue >= minTimestamp && maxValue <= maxTimestamp) {\n+ return Match.YES;\n+ } else {\n+ return Match.MAYBE;\n+ }\n+ }\n+\n+ }\n+\n+ @Override\n+ public boolean matches() throws IOException {\n+ return switch (approximation.match) {\n+ case YES -> true;\n+ case MAYBE -> {\n+ final long value = timestamps.longValue();\n+ yield value >= minTimestamp && value <= maxTimestamp;", + "comment_created_at": "2025-06-04T11:49:14+00:00", + "comment_author": "martijnvg", + "comment_body": "done: 57395650b39aa637db395b059c1740b8cdf82bda", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/elasticsearch-optimize-before-implementing.md b/_reviewers/elasticsearch-optimize-before-implementing.md index 29d0038..14bd62f 100644 --- a/_reviewers/elasticsearch-optimize-before-implementing.md +++ b/_reviewers/elasticsearch-optimize-before-implementing.md @@ -46,387 +46,3 @@ return nodeIds.stream() .filter(nodeId -> candidateNodeIds.contains(nodeId)) .collect(Collectors.toSet()); ``` - - -[ - { - "discussion_id": "2170953937", - "pr_number": 128917, - "pr_file": "x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/EsqlSession.java", - "created_at": "2025-06-27T06:59:47+00:00", - "commented_code": "values.add(List.of(\"coordinator\", \"optimizedPhysicalPlan\", physicalPlanString));\n var blocks = BlockUtils.fromList(PlannerUtils.NON_BREAKING_BLOCK_FACTORY, values);\n physicalPlan = new LocalSourceExec(Source.EMPTY, fields, LocalSupplier.of(blocks));\n planRunner.run(physicalPlan, listener);\n } else {\n // TODO: this could be snuck into the underlying listener\n EsqlCCSUtils.updateExecutionInfoAtEndOfPlanning(executionInfo);\n // execute any potential subplans\n executeSubPlans(optimizedPlan, planRunner, executionInfo, request, listener);\n }\n // TODO: this could be snuck into the underlying listener\n EsqlCCSUtils.updateExecutionInfoAtEndOfPlanning(executionInfo);\n // execute any potential subplans\n executeSubPlans(physicalPlan, planRunner, executionInfo, request, listener);\n }\n\n private record PlanTuple(PhysicalPlan physical, LogicalPlan logical) {}\n private record LogicalPlanTuple(LogicalPlan nonStubbedSubPlan, LogicalPlan originalSubPlan) {}\n\n private void executeSubPlans(\n PhysicalPlan physicalPlan,\n LogicalPlan optimizedPlan,\n PlanRunner runner,\n EsqlExecutionInfo executionInfo,\n EsqlQueryRequest request,\n ActionListener listener\n ) {\n List subplans = new ArrayList<>();\n\n // Currently the inlinestats are limited and supported as streaming operators, thus present inside the fragment as logical plans\n // Below they get collected, translated into a separate, coordinator based plan and the results 'broadcasted' as a local relation\n physicalPlan.forEachUp(FragmentExec.class, f -> {\n f.fragment().forEachUp(InlineJoin.class, ij -> {\n // extract the right side of the plan and replace its source\n LogicalPlan subplan = InlineJoin.replaceStub(ij.left(), ij.right());\n // mark the new root node as optimized\n subplan.setOptimized();\n PhysicalPlan subqueryPlan = logicalPlanToPhysicalPlan(subplan, request);\n subplans.add(new PlanTuple(subqueryPlan, ij.right()));\n });\n });\n\n Iterator iterator = subplans.iterator();\n var subPlan = firstSubPlan(optimizedPlan);\n\n // TODO: merge into one method\n if (subplans.size() > 0) {\n if (subPlan != null) {\n // code-path to execute subplans\n executeSubPlan(new DriverCompletionInfo.Accumulator(), physicalPlan, iterator, executionInfo, runner, listener);\n executeSubPlan(new DriverCompletionInfo.Accumulator(), optimizedPlan, subPlan, executionInfo, runner, request, listener);\n } else {\n PhysicalPlan physicalPlan = logicalPlanToPhysicalPlan(optimizedPlan, request);\n // execute main plan\n runner.run(physicalPlan, listener);\n }\n }\n\n private LogicalPlanTuple firstSubPlan(LogicalPlan optimizedPlan) {\n Holder subPlan = new Holder<>();\n // Collect the first inlinejoin (bottom up in the tree)\n optimizedPlan.forEachUp(InlineJoin.class, ij -> {\n // extract the right side of the plan and replace its source\n if (subPlan.get() == null && ij.right().anyMatch(p -> p instanceof StubRelation)) {\n var p = InlineJoin.replaceStub(ij.left(), ij.right());\n p.setOptimized();\n subPlan.set(new LogicalPlanTuple(p, ij.right()));\n }\n });\n return subPlan.get();\n }\n\n private void executeSubPlan(\n DriverCompletionInfo.Accumulator completionInfoAccumulator,\n PhysicalPlan plan,\n Iterator subPlanIterator,\n LogicalPlan optimizedPlan,\n LogicalPlanTuple subPlans,\n EsqlExecutionInfo executionInfo,\n PlanRunner runner,\n EsqlQueryRequest request,\n ActionListener listener\n ) {\n PlanTuple tuple = subPlanIterator.next();\n // Create a physical plan out of the logical sub-plan\n var physicalSubPlan = logicalPlanToPhysicalPlan(subPlans.nonStubbedSubPlan, request);\n\n runner.run(tuple.physical, listener.delegateFailureAndWrap((next, result) -> {\n runner.run(physicalSubPlan, listener.delegateFailureAndWrap((next, result) -> {\n try {\n // Translate the subquery into a separate, coordinator based plan and the results 'broadcasted' as a local relation\n completionInfoAccumulator.accumulate(result.completionInfo());\n LocalRelation resultWrapper = resultToPlan(tuple.logical, result);\n LocalRelation resultWrapper = resultToPlan(subPlans.nonStubbedSubPlan, result);\n\n // replace the original logical plan with the backing result\n PhysicalPlan newPlan = plan.transformUp(FragmentExec.class, f -> {\n LogicalPlan frag = f.fragment();\n return f.withFragment(\n frag.transformUp(\n InlineJoin.class,\n ij -> ij.right() == tuple.logical ? InlineJoin.inlineData(ij, resultWrapper) : ij\n )\n );\n });\n LogicalPlan newLogicalPlan = optimizedPlan.transformUp(\n InlineJoin.class,\n ij -> ij.right() == subPlans.originalSubPlan ? InlineJoin.inlineData(ij, resultWrapper) : ij\n );\n newLogicalPlan.setOptimized();", - "repo_full_name": "elastic/elasticsearch", - "discussion_comments": [ - { - "comment_id": "2170953937", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 128917, - "pr_file": "x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/EsqlSession.java", - "discussion_id": "2170953937", - "commented_code": "@@ -231,85 +231,88 @@ public void executeOptimizedPlan(\n values.add(List.of(\"coordinator\", \"optimizedPhysicalPlan\", physicalPlanString));\n var blocks = BlockUtils.fromList(PlannerUtils.NON_BREAKING_BLOCK_FACTORY, values);\n physicalPlan = new LocalSourceExec(Source.EMPTY, fields, LocalSupplier.of(blocks));\n+ planRunner.run(physicalPlan, listener);\n+ } else {\n+ // TODO: this could be snuck into the underlying listener\n+ EsqlCCSUtils.updateExecutionInfoAtEndOfPlanning(executionInfo);\n+ // execute any potential subplans\n+ executeSubPlans(optimizedPlan, planRunner, executionInfo, request, listener);\n }\n- // TODO: this could be snuck into the underlying listener\n- EsqlCCSUtils.updateExecutionInfoAtEndOfPlanning(executionInfo);\n- // execute any potential subplans\n- executeSubPlans(physicalPlan, planRunner, executionInfo, request, listener);\n }\n \n- private record PlanTuple(PhysicalPlan physical, LogicalPlan logical) {}\n+ private record LogicalPlanTuple(LogicalPlan nonStubbedSubPlan, LogicalPlan originalSubPlan) {}\n \n private void executeSubPlans(\n- PhysicalPlan physicalPlan,\n+ LogicalPlan optimizedPlan,\n PlanRunner runner,\n EsqlExecutionInfo executionInfo,\n EsqlQueryRequest request,\n ActionListener listener\n ) {\n- List subplans = new ArrayList<>();\n-\n- // Currently the inlinestats are limited and supported as streaming operators, thus present inside the fragment as logical plans\n- // Below they get collected, translated into a separate, coordinator based plan and the results 'broadcasted' as a local relation\n- physicalPlan.forEachUp(FragmentExec.class, f -> {\n- f.fragment().forEachUp(InlineJoin.class, ij -> {\n- // extract the right side of the plan and replace its source\n- LogicalPlan subplan = InlineJoin.replaceStub(ij.left(), ij.right());\n- // mark the new root node as optimized\n- subplan.setOptimized();\n- PhysicalPlan subqueryPlan = logicalPlanToPhysicalPlan(subplan, request);\n- subplans.add(new PlanTuple(subqueryPlan, ij.right()));\n- });\n- });\n-\n- Iterator iterator = subplans.iterator();\n+ var subPlan = firstSubPlan(optimizedPlan);\n \n // TODO: merge into one method\n- if (subplans.size() > 0) {\n+ if (subPlan != null) {\n // code-path to execute subplans\n- executeSubPlan(new DriverCompletionInfo.Accumulator(), physicalPlan, iterator, executionInfo, runner, listener);\n+ executeSubPlan(new DriverCompletionInfo.Accumulator(), optimizedPlan, subPlan, executionInfo, runner, request, listener);\n } else {\n+ PhysicalPlan physicalPlan = logicalPlanToPhysicalPlan(optimizedPlan, request);\n // execute main plan\n runner.run(physicalPlan, listener);\n }\n }\n \n+ private LogicalPlanTuple firstSubPlan(LogicalPlan optimizedPlan) {\n+ Holder subPlan = new Holder<>();\n+ // Collect the first inlinejoin (bottom up in the tree)\n+ optimizedPlan.forEachUp(InlineJoin.class, ij -> {\n+ // extract the right side of the plan and replace its source\n+ if (subPlan.get() == null && ij.right().anyMatch(p -> p instanceof StubRelation)) {\n+ var p = InlineJoin.replaceStub(ij.left(), ij.right());\n+ p.setOptimized();\n+ subPlan.set(new LogicalPlanTuple(p, ij.right()));\n+ }\n+ });\n+ return subPlan.get();\n+ }\n+\n private void executeSubPlan(\n DriverCompletionInfo.Accumulator completionInfoAccumulator,\n- PhysicalPlan plan,\n- Iterator subPlanIterator,\n+ LogicalPlan optimizedPlan,\n+ LogicalPlanTuple subPlans,\n EsqlExecutionInfo executionInfo,\n PlanRunner runner,\n+ EsqlQueryRequest request,\n ActionListener listener\n ) {\n- PlanTuple tuple = subPlanIterator.next();\n+ // Create a physical plan out of the logical sub-plan\n+ var physicalSubPlan = logicalPlanToPhysicalPlan(subPlans.nonStubbedSubPlan, request);\n \n- runner.run(tuple.physical, listener.delegateFailureAndWrap((next, result) -> {\n+ runner.run(physicalSubPlan, listener.delegateFailureAndWrap((next, result) -> {\n try {\n+ // Translate the subquery into a separate, coordinator based plan and the results 'broadcasted' as a local relation\n completionInfoAccumulator.accumulate(result.completionInfo());\n- LocalRelation resultWrapper = resultToPlan(tuple.logical, result);\n+ LocalRelation resultWrapper = resultToPlan(subPlans.nonStubbedSubPlan, result);\n \n // replace the original logical plan with the backing result\n- PhysicalPlan newPlan = plan.transformUp(FragmentExec.class, f -> {\n- LogicalPlan frag = f.fragment();\n- return f.withFragment(\n- frag.transformUp(\n- InlineJoin.class,\n- ij -> ij.right() == tuple.logical ? InlineJoin.inlineData(ij, resultWrapper) : ij\n- )\n- );\n- });\n+ LogicalPlan newLogicalPlan = optimizedPlan.transformUp(\n+ InlineJoin.class,\n+ ij -> ij.right() == subPlans.originalSubPlan ? InlineJoin.inlineData(ij, resultWrapper) : ij\n+ );\n+ newLogicalPlan.setOptimized();", - "comment_created_at": "2025-06-27T06:59:47+00:00", - "comment_author": "alex-spies", - "comment_body": "This is surprising. Shouldn't we re-optimize the plan now that we were able to replace the stub with an actual result?\n\nAfter the stub was replaced, we can actually do more stuff, like push down limits (which before that would be wrong as it would have affected the stats).\n\nIf I understand correctly, this is something we might want to improve later, right? If so, let's leave a comment; maybe a `TODO` to make the intention clear.", - "pr_file_module": null - }, - { - "comment_id": "2170963284", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 128917, - "pr_file": "x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/EsqlSession.java", - "discussion_id": "2170953937", - "commented_code": "@@ -231,85 +231,88 @@ public void executeOptimizedPlan(\n values.add(List.of(\"coordinator\", \"optimizedPhysicalPlan\", physicalPlanString));\n var blocks = BlockUtils.fromList(PlannerUtils.NON_BREAKING_BLOCK_FACTORY, values);\n physicalPlan = new LocalSourceExec(Source.EMPTY, fields, LocalSupplier.of(blocks));\n+ planRunner.run(physicalPlan, listener);\n+ } else {\n+ // TODO: this could be snuck into the underlying listener\n+ EsqlCCSUtils.updateExecutionInfoAtEndOfPlanning(executionInfo);\n+ // execute any potential subplans\n+ executeSubPlans(optimizedPlan, planRunner, executionInfo, request, listener);\n }\n- // TODO: this could be snuck into the underlying listener\n- EsqlCCSUtils.updateExecutionInfoAtEndOfPlanning(executionInfo);\n- // execute any potential subplans\n- executeSubPlans(physicalPlan, planRunner, executionInfo, request, listener);\n }\n \n- private record PlanTuple(PhysicalPlan physical, LogicalPlan logical) {}\n+ private record LogicalPlanTuple(LogicalPlan nonStubbedSubPlan, LogicalPlan originalSubPlan) {}\n \n private void executeSubPlans(\n- PhysicalPlan physicalPlan,\n+ LogicalPlan optimizedPlan,\n PlanRunner runner,\n EsqlExecutionInfo executionInfo,\n EsqlQueryRequest request,\n ActionListener listener\n ) {\n- List subplans = new ArrayList<>();\n-\n- // Currently the inlinestats are limited and supported as streaming operators, thus present inside the fragment as logical plans\n- // Below they get collected, translated into a separate, coordinator based plan and the results 'broadcasted' as a local relation\n- physicalPlan.forEachUp(FragmentExec.class, f -> {\n- f.fragment().forEachUp(InlineJoin.class, ij -> {\n- // extract the right side of the plan and replace its source\n- LogicalPlan subplan = InlineJoin.replaceStub(ij.left(), ij.right());\n- // mark the new root node as optimized\n- subplan.setOptimized();\n- PhysicalPlan subqueryPlan = logicalPlanToPhysicalPlan(subplan, request);\n- subplans.add(new PlanTuple(subqueryPlan, ij.right()));\n- });\n- });\n-\n- Iterator iterator = subplans.iterator();\n+ var subPlan = firstSubPlan(optimizedPlan);\n \n // TODO: merge into one method\n- if (subplans.size() > 0) {\n+ if (subPlan != null) {\n // code-path to execute subplans\n- executeSubPlan(new DriverCompletionInfo.Accumulator(), physicalPlan, iterator, executionInfo, runner, listener);\n+ executeSubPlan(new DriverCompletionInfo.Accumulator(), optimizedPlan, subPlan, executionInfo, runner, request, listener);\n } else {\n+ PhysicalPlan physicalPlan = logicalPlanToPhysicalPlan(optimizedPlan, request);\n // execute main plan\n runner.run(physicalPlan, listener);\n }\n }\n \n+ private LogicalPlanTuple firstSubPlan(LogicalPlan optimizedPlan) {\n+ Holder subPlan = new Holder<>();\n+ // Collect the first inlinejoin (bottom up in the tree)\n+ optimizedPlan.forEachUp(InlineJoin.class, ij -> {\n+ // extract the right side of the plan and replace its source\n+ if (subPlan.get() == null && ij.right().anyMatch(p -> p instanceof StubRelation)) {\n+ var p = InlineJoin.replaceStub(ij.left(), ij.right());\n+ p.setOptimized();\n+ subPlan.set(new LogicalPlanTuple(p, ij.right()));\n+ }\n+ });\n+ return subPlan.get();\n+ }\n+\n private void executeSubPlan(\n DriverCompletionInfo.Accumulator completionInfoAccumulator,\n- PhysicalPlan plan,\n- Iterator subPlanIterator,\n+ LogicalPlan optimizedPlan,\n+ LogicalPlanTuple subPlans,\n EsqlExecutionInfo executionInfo,\n PlanRunner runner,\n+ EsqlQueryRequest request,\n ActionListener listener\n ) {\n- PlanTuple tuple = subPlanIterator.next();\n+ // Create a physical plan out of the logical sub-plan\n+ var physicalSubPlan = logicalPlanToPhysicalPlan(subPlans.nonStubbedSubPlan, request);\n \n- runner.run(tuple.physical, listener.delegateFailureAndWrap((next, result) -> {\n+ runner.run(physicalSubPlan, listener.delegateFailureAndWrap((next, result) -> {\n try {\n+ // Translate the subquery into a separate, coordinator based plan and the results 'broadcasted' as a local relation\n completionInfoAccumulator.accumulate(result.completionInfo());\n- LocalRelation resultWrapper = resultToPlan(tuple.logical, result);\n+ LocalRelation resultWrapper = resultToPlan(subPlans.nonStubbedSubPlan, result);\n \n // replace the original logical plan with the backing result\n- PhysicalPlan newPlan = plan.transformUp(FragmentExec.class, f -> {\n- LogicalPlan frag = f.fragment();\n- return f.withFragment(\n- frag.transformUp(\n- InlineJoin.class,\n- ij -> ij.right() == tuple.logical ? InlineJoin.inlineData(ij, resultWrapper) : ij\n- )\n- );\n- });\n+ LogicalPlan newLogicalPlan = optimizedPlan.transformUp(\n+ InlineJoin.class,\n+ ij -> ij.right() == subPlans.originalSubPlan ? InlineJoin.inlineData(ij, resultWrapper) : ij\n+ );\n+ newLogicalPlan.setOptimized();", - "comment_created_at": "2025-06-27T07:04:15+00:00", - "comment_author": "astefan", - "comment_body": "Yeah, a TODO is right.\r\n\r\nI had my doubts with this thing. I feel like there are things here that were left hanging (things do still seem they can be optimized further), but at this stage of the `inlinestats` progress, I think it's worth ignoring it. But TODO is needed, because it's something we need to think about a little.", - "pr_file_module": null - }, - { - "comment_id": "2171129944", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 128917, - "pr_file": "x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/EsqlSession.java", - "discussion_id": "2170953937", - "commented_code": "@@ -231,85 +231,88 @@ public void executeOptimizedPlan(\n values.add(List.of(\"coordinator\", \"optimizedPhysicalPlan\", physicalPlanString));\n var blocks = BlockUtils.fromList(PlannerUtils.NON_BREAKING_BLOCK_FACTORY, values);\n physicalPlan = new LocalSourceExec(Source.EMPTY, fields, LocalSupplier.of(blocks));\n+ planRunner.run(physicalPlan, listener);\n+ } else {\n+ // TODO: this could be snuck into the underlying listener\n+ EsqlCCSUtils.updateExecutionInfoAtEndOfPlanning(executionInfo);\n+ // execute any potential subplans\n+ executeSubPlans(optimizedPlan, planRunner, executionInfo, request, listener);\n }\n- // TODO: this could be snuck into the underlying listener\n- EsqlCCSUtils.updateExecutionInfoAtEndOfPlanning(executionInfo);\n- // execute any potential subplans\n- executeSubPlans(physicalPlan, planRunner, executionInfo, request, listener);\n }\n \n- private record PlanTuple(PhysicalPlan physical, LogicalPlan logical) {}\n+ private record LogicalPlanTuple(LogicalPlan nonStubbedSubPlan, LogicalPlan originalSubPlan) {}\n \n private void executeSubPlans(\n- PhysicalPlan physicalPlan,\n+ LogicalPlan optimizedPlan,\n PlanRunner runner,\n EsqlExecutionInfo executionInfo,\n EsqlQueryRequest request,\n ActionListener listener\n ) {\n- List subplans = new ArrayList<>();\n-\n- // Currently the inlinestats are limited and supported as streaming operators, thus present inside the fragment as logical plans\n- // Below they get collected, translated into a separate, coordinator based plan and the results 'broadcasted' as a local relation\n- physicalPlan.forEachUp(FragmentExec.class, f -> {\n- f.fragment().forEachUp(InlineJoin.class, ij -> {\n- // extract the right side of the plan and replace its source\n- LogicalPlan subplan = InlineJoin.replaceStub(ij.left(), ij.right());\n- // mark the new root node as optimized\n- subplan.setOptimized();\n- PhysicalPlan subqueryPlan = logicalPlanToPhysicalPlan(subplan, request);\n- subplans.add(new PlanTuple(subqueryPlan, ij.right()));\n- });\n- });\n-\n- Iterator iterator = subplans.iterator();\n+ var subPlan = firstSubPlan(optimizedPlan);\n \n // TODO: merge into one method\n- if (subplans.size() > 0) {\n+ if (subPlan != null) {\n // code-path to execute subplans\n- executeSubPlan(new DriverCompletionInfo.Accumulator(), physicalPlan, iterator, executionInfo, runner, listener);\n+ executeSubPlan(new DriverCompletionInfo.Accumulator(), optimizedPlan, subPlan, executionInfo, runner, request, listener);\n } else {\n+ PhysicalPlan physicalPlan = logicalPlanToPhysicalPlan(optimizedPlan, request);\n // execute main plan\n runner.run(physicalPlan, listener);\n }\n }\n \n+ private LogicalPlanTuple firstSubPlan(LogicalPlan optimizedPlan) {\n+ Holder subPlan = new Holder<>();\n+ // Collect the first inlinejoin (bottom up in the tree)\n+ optimizedPlan.forEachUp(InlineJoin.class, ij -> {\n+ // extract the right side of the plan and replace its source\n+ if (subPlan.get() == null && ij.right().anyMatch(p -> p instanceof StubRelation)) {\n+ var p = InlineJoin.replaceStub(ij.left(), ij.right());\n+ p.setOptimized();\n+ subPlan.set(new LogicalPlanTuple(p, ij.right()));\n+ }\n+ });\n+ return subPlan.get();\n+ }\n+\n private void executeSubPlan(\n DriverCompletionInfo.Accumulator completionInfoAccumulator,\n- PhysicalPlan plan,\n- Iterator subPlanIterator,\n+ LogicalPlan optimizedPlan,\n+ LogicalPlanTuple subPlans,\n EsqlExecutionInfo executionInfo,\n PlanRunner runner,\n+ EsqlQueryRequest request,\n ActionListener listener\n ) {\n- PlanTuple tuple = subPlanIterator.next();\n+ // Create a physical plan out of the logical sub-plan\n+ var physicalSubPlan = logicalPlanToPhysicalPlan(subPlans.nonStubbedSubPlan, request);\n \n- runner.run(tuple.physical, listener.delegateFailureAndWrap((next, result) -> {\n+ runner.run(physicalSubPlan, listener.delegateFailureAndWrap((next, result) -> {\n try {\n+ // Translate the subquery into a separate, coordinator based plan and the results 'broadcasted' as a local relation\n completionInfoAccumulator.accumulate(result.completionInfo());\n- LocalRelation resultWrapper = resultToPlan(tuple.logical, result);\n+ LocalRelation resultWrapper = resultToPlan(subPlans.nonStubbedSubPlan, result);\n \n // replace the original logical plan with the backing result\n- PhysicalPlan newPlan = plan.transformUp(FragmentExec.class, f -> {\n- LogicalPlan frag = f.fragment();\n- return f.withFragment(\n- frag.transformUp(\n- InlineJoin.class,\n- ij -> ij.right() == tuple.logical ? InlineJoin.inlineData(ij, resultWrapper) : ij\n- )\n- );\n- });\n+ LogicalPlan newLogicalPlan = optimizedPlan.transformUp(\n+ InlineJoin.class,\n+ ij -> ij.right() == subPlans.originalSubPlan ? InlineJoin.inlineData(ij, resultWrapper) : ij\n+ );\n+ newLogicalPlan.setOptimized();", - "comment_created_at": "2025-06-27T08:02:42+00:00", - "comment_author": "bpintea", - "comment_body": "Hmm, I thought this _was_ sufficient. The approach basically gradually executes righthand sides of (inline) joins, always resulting in a `LocalRelation`. Ending up with a join with whatever was before on the left and this local relation on the right. Not sure if we can further optimise this (and haven't done it before - the LIMIT makes it past the InlineJoin into the lefthand side already, since InlineJoin preserves the rows count).\r\nBut a TODO can re-eval things later, \ud83d\udc4d .", - "pr_file_module": null - }, - { - "comment_id": "2174975919", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 128917, - "pr_file": "x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/EsqlSession.java", - "discussion_id": "2170953937", - "commented_code": "@@ -231,85 +231,88 @@ public void executeOptimizedPlan(\n values.add(List.of(\"coordinator\", \"optimizedPhysicalPlan\", physicalPlanString));\n var blocks = BlockUtils.fromList(PlannerUtils.NON_BREAKING_BLOCK_FACTORY, values);\n physicalPlan = new LocalSourceExec(Source.EMPTY, fields, LocalSupplier.of(blocks));\n+ planRunner.run(physicalPlan, listener);\n+ } else {\n+ // TODO: this could be snuck into the underlying listener\n+ EsqlCCSUtils.updateExecutionInfoAtEndOfPlanning(executionInfo);\n+ // execute any potential subplans\n+ executeSubPlans(optimizedPlan, planRunner, executionInfo, request, listener);\n }\n- // TODO: this could be snuck into the underlying listener\n- EsqlCCSUtils.updateExecutionInfoAtEndOfPlanning(executionInfo);\n- // execute any potential subplans\n- executeSubPlans(physicalPlan, planRunner, executionInfo, request, listener);\n }\n \n- private record PlanTuple(PhysicalPlan physical, LogicalPlan logical) {}\n+ private record LogicalPlanTuple(LogicalPlan nonStubbedSubPlan, LogicalPlan originalSubPlan) {}\n \n private void executeSubPlans(\n- PhysicalPlan physicalPlan,\n+ LogicalPlan optimizedPlan,\n PlanRunner runner,\n EsqlExecutionInfo executionInfo,\n EsqlQueryRequest request,\n ActionListener listener\n ) {\n- List subplans = new ArrayList<>();\n-\n- // Currently the inlinestats are limited and supported as streaming operators, thus present inside the fragment as logical plans\n- // Below they get collected, translated into a separate, coordinator based plan and the results 'broadcasted' as a local relation\n- physicalPlan.forEachUp(FragmentExec.class, f -> {\n- f.fragment().forEachUp(InlineJoin.class, ij -> {\n- // extract the right side of the plan and replace its source\n- LogicalPlan subplan = InlineJoin.replaceStub(ij.left(), ij.right());\n- // mark the new root node as optimized\n- subplan.setOptimized();\n- PhysicalPlan subqueryPlan = logicalPlanToPhysicalPlan(subplan, request);\n- subplans.add(new PlanTuple(subqueryPlan, ij.right()));\n- });\n- });\n-\n- Iterator iterator = subplans.iterator();\n+ var subPlan = firstSubPlan(optimizedPlan);\n \n // TODO: merge into one method\n- if (subplans.size() > 0) {\n+ if (subPlan != null) {\n // code-path to execute subplans\n- executeSubPlan(new DriverCompletionInfo.Accumulator(), physicalPlan, iterator, executionInfo, runner, listener);\n+ executeSubPlan(new DriverCompletionInfo.Accumulator(), optimizedPlan, subPlan, executionInfo, runner, request, listener);\n } else {\n+ PhysicalPlan physicalPlan = logicalPlanToPhysicalPlan(optimizedPlan, request);\n // execute main plan\n runner.run(physicalPlan, listener);\n }\n }\n \n+ private LogicalPlanTuple firstSubPlan(LogicalPlan optimizedPlan) {\n+ Holder subPlan = new Holder<>();\n+ // Collect the first inlinejoin (bottom up in the tree)\n+ optimizedPlan.forEachUp(InlineJoin.class, ij -> {\n+ // extract the right side of the plan and replace its source\n+ if (subPlan.get() == null && ij.right().anyMatch(p -> p instanceof StubRelation)) {\n+ var p = InlineJoin.replaceStub(ij.left(), ij.right());\n+ p.setOptimized();\n+ subPlan.set(new LogicalPlanTuple(p, ij.right()));\n+ }\n+ });\n+ return subPlan.get();\n+ }\n+\n private void executeSubPlan(\n DriverCompletionInfo.Accumulator completionInfoAccumulator,\n- PhysicalPlan plan,\n- Iterator subPlanIterator,\n+ LogicalPlan optimizedPlan,\n+ LogicalPlanTuple subPlans,\n EsqlExecutionInfo executionInfo,\n PlanRunner runner,\n+ EsqlQueryRequest request,\n ActionListener listener\n ) {\n- PlanTuple tuple = subPlanIterator.next();\n+ // Create a physical plan out of the logical sub-plan\n+ var physicalSubPlan = logicalPlanToPhysicalPlan(subPlans.nonStubbedSubPlan, request);\n \n- runner.run(tuple.physical, listener.delegateFailureAndWrap((next, result) -> {\n+ runner.run(physicalSubPlan, listener.delegateFailureAndWrap((next, result) -> {\n try {\n+ // Translate the subquery into a separate, coordinator based plan and the results 'broadcasted' as a local relation\n completionInfoAccumulator.accumulate(result.completionInfo());\n- LocalRelation resultWrapper = resultToPlan(tuple.logical, result);\n+ LocalRelation resultWrapper = resultToPlan(subPlans.nonStubbedSubPlan, result);\n \n // replace the original logical plan with the backing result\n- PhysicalPlan newPlan = plan.transformUp(FragmentExec.class, f -> {\n- LogicalPlan frag = f.fragment();\n- return f.withFragment(\n- frag.transformUp(\n- InlineJoin.class,\n- ij -> ij.right() == tuple.logical ? InlineJoin.inlineData(ij, resultWrapper) : ij\n- )\n- );\n- });\n+ LogicalPlan newLogicalPlan = optimizedPlan.transformUp(\n+ InlineJoin.class,\n+ ij -> ij.right() == subPlans.originalSubPlan ? InlineJoin.inlineData(ij, resultWrapper) : ij\n+ );\n+ newLogicalPlan.setOptimized();", - "comment_created_at": "2025-06-30T12:38:14+00:00", - "comment_author": "alex-spies", - "comment_body": "Oh, we can further optimize it alright :)\r\n\r\nThe limit being pushed/copied down into the left hand side is a bug - that should only happen in subsequent passes. See my comment [here](https://github.com/elastic/elasticsearch/pull/128917#discussion_r2174955992).\r\n\r\nAlso, if the `INLINESTATS` has no `BY` clause, it can be turned into an `EVAL` with literals in subsequent phases, which can trigger more optimizations (like constant folding, filter pushdown, optimizing away checks against constant keyword fields). Some of this could probably be somehow hacked into the first optimization pass, but currently I think it's more natural (or at least easier) to just have another optimizer pass per query phase.", - "pr_file_module": null - }, - { - "comment_id": "2175521950", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 128917, - "pr_file": "x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/EsqlSession.java", - "discussion_id": "2170953937", - "commented_code": "@@ -231,85 +231,88 @@ public void executeOptimizedPlan(\n values.add(List.of(\"coordinator\", \"optimizedPhysicalPlan\", physicalPlanString));\n var blocks = BlockUtils.fromList(PlannerUtils.NON_BREAKING_BLOCK_FACTORY, values);\n physicalPlan = new LocalSourceExec(Source.EMPTY, fields, LocalSupplier.of(blocks));\n+ planRunner.run(physicalPlan, listener);\n+ } else {\n+ // TODO: this could be snuck into the underlying listener\n+ EsqlCCSUtils.updateExecutionInfoAtEndOfPlanning(executionInfo);\n+ // execute any potential subplans\n+ executeSubPlans(optimizedPlan, planRunner, executionInfo, request, listener);\n }\n- // TODO: this could be snuck into the underlying listener\n- EsqlCCSUtils.updateExecutionInfoAtEndOfPlanning(executionInfo);\n- // execute any potential subplans\n- executeSubPlans(physicalPlan, planRunner, executionInfo, request, listener);\n }\n \n- private record PlanTuple(PhysicalPlan physical, LogicalPlan logical) {}\n+ private record LogicalPlanTuple(LogicalPlan nonStubbedSubPlan, LogicalPlan originalSubPlan) {}\n \n private void executeSubPlans(\n- PhysicalPlan physicalPlan,\n+ LogicalPlan optimizedPlan,\n PlanRunner runner,\n EsqlExecutionInfo executionInfo,\n EsqlQueryRequest request,\n ActionListener listener\n ) {\n- List subplans = new ArrayList<>();\n-\n- // Currently the inlinestats are limited and supported as streaming operators, thus present inside the fragment as logical plans\n- // Below they get collected, translated into a separate, coordinator based plan and the results 'broadcasted' as a local relation\n- physicalPlan.forEachUp(FragmentExec.class, f -> {\n- f.fragment().forEachUp(InlineJoin.class, ij -> {\n- // extract the right side of the plan and replace its source\n- LogicalPlan subplan = InlineJoin.replaceStub(ij.left(), ij.right());\n- // mark the new root node as optimized\n- subplan.setOptimized();\n- PhysicalPlan subqueryPlan = logicalPlanToPhysicalPlan(subplan, request);\n- subplans.add(new PlanTuple(subqueryPlan, ij.right()));\n- });\n- });\n-\n- Iterator iterator = subplans.iterator();\n+ var subPlan = firstSubPlan(optimizedPlan);\n \n // TODO: merge into one method\n- if (subplans.size() > 0) {\n+ if (subPlan != null) {\n // code-path to execute subplans\n- executeSubPlan(new DriverCompletionInfo.Accumulator(), physicalPlan, iterator, executionInfo, runner, listener);\n+ executeSubPlan(new DriverCompletionInfo.Accumulator(), optimizedPlan, subPlan, executionInfo, runner, request, listener);\n } else {\n+ PhysicalPlan physicalPlan = logicalPlanToPhysicalPlan(optimizedPlan, request);\n // execute main plan\n runner.run(physicalPlan, listener);\n }\n }\n \n+ private LogicalPlanTuple firstSubPlan(LogicalPlan optimizedPlan) {\n+ Holder subPlan = new Holder<>();\n+ // Collect the first inlinejoin (bottom up in the tree)\n+ optimizedPlan.forEachUp(InlineJoin.class, ij -> {\n+ // extract the right side of the plan and replace its source\n+ if (subPlan.get() == null && ij.right().anyMatch(p -> p instanceof StubRelation)) {\n+ var p = InlineJoin.replaceStub(ij.left(), ij.right());\n+ p.setOptimized();\n+ subPlan.set(new LogicalPlanTuple(p, ij.right()));\n+ }\n+ });\n+ return subPlan.get();\n+ }\n+\n private void executeSubPlan(\n DriverCompletionInfo.Accumulator completionInfoAccumulator,\n- PhysicalPlan plan,\n- Iterator subPlanIterator,\n+ LogicalPlan optimizedPlan,\n+ LogicalPlanTuple subPlans,\n EsqlExecutionInfo executionInfo,\n PlanRunner runner,\n+ EsqlQueryRequest request,\n ActionListener listener\n ) {\n- PlanTuple tuple = subPlanIterator.next();\n+ // Create a physical plan out of the logical sub-plan\n+ var physicalSubPlan = logicalPlanToPhysicalPlan(subPlans.nonStubbedSubPlan, request);\n \n- runner.run(tuple.physical, listener.delegateFailureAndWrap((next, result) -> {\n+ runner.run(physicalSubPlan, listener.delegateFailureAndWrap((next, result) -> {\n try {\n+ // Translate the subquery into a separate, coordinator based plan and the results 'broadcasted' as a local relation\n completionInfoAccumulator.accumulate(result.completionInfo());\n- LocalRelation resultWrapper = resultToPlan(tuple.logical, result);\n+ LocalRelation resultWrapper = resultToPlan(subPlans.nonStubbedSubPlan, result);\n \n // replace the original logical plan with the backing result\n- PhysicalPlan newPlan = plan.transformUp(FragmentExec.class, f -> {\n- LogicalPlan frag = f.fragment();\n- return f.withFragment(\n- frag.transformUp(\n- InlineJoin.class,\n- ij -> ij.right() == tuple.logical ? InlineJoin.inlineData(ij, resultWrapper) : ij\n- )\n- );\n- });\n+ LogicalPlan newLogicalPlan = optimizedPlan.transformUp(\n+ InlineJoin.class,\n+ ij -> ij.right() == subPlans.originalSubPlan ? InlineJoin.inlineData(ij, resultWrapper) : ij\n+ );\n+ newLogicalPlan.setOptimized();", - "comment_created_at": "2025-06-30T16:57:00+00:00", - "comment_author": "bpintea", - "comment_body": "> The limit being pushed/copied down into the left hand side is a bug - that should only happen in subsequent passes.\r\n\r\nActually, very true!\r\n\r\n> Also, if the INLINESTATS has no BY clause, it can be turned into an EVAL with literals in subsequent phases\r\n\r\nI see. Yes, this could be done, but not with the current shape of the planning (i.e. rerunning the optimiser as it is now won't replan). But yes, you're right, this could still be evolved.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2175016623", - "pr_number": 127223, - "pr_file": "server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java", - "created_at": "2025-06-30T12:59:04+00:00", - "commented_code": "return knnQuery;\n }\n\n private Query maybeWrapPatience(Query knnQuery) {\n Query finalQuery = knnQuery;\n if (knnQuery instanceof KnnByteVectorQuery knnByteVectorQuery) {\n finalQuery = maybeWrapPatienceByte(knnByteVectorQuery, Math.max(7, (int) (knnByteVectorQuery.getK() * 0.3)));\n } else if (knnQuery instanceof KnnFloatVectorQuery knnFloatVectorQuery) {\n finalQuery = maybeWrapPatienceFloat(knnFloatVectorQuery, Math.max(7, (int) (knnFloatVectorQuery.getK() * 0.3)));\n }\n return finalQuery;\n }\n\n private Query maybeWrapPatienceByte(KnnByteVectorQuery knnQuery, int patience) {\n Query returnedQuery = knnQuery;\n if (indexOptions instanceof HnswIndexOptions) {\n returnedQuery = PatienceKnnVectorQuery.fromByteQuery(knnQuery, 0.995, patience);\n } else if (indexOptions instanceof Int8HnswIndexOptions) {\n returnedQuery = PatienceKnnVectorQuery.fromByteQuery(knnQuery, 0.995, patience);\n } else if (indexOptions instanceof Int4HnswIndexOptions) {\n returnedQuery = PatienceKnnVectorQuery.fromByteQuery(knnQuery, 0.995, patience);\n } else if (indexOptions instanceof BBQHnswIndexOptions) {\n returnedQuery = PatienceKnnVectorQuery.fromByteQuery(knnQuery, 0.995, patience);\n }\n return returnedQuery;\n }\n\n private Query maybeWrapPatienceFloat(KnnFloatVectorQuery knnQuery, int patience) {\n Query returnedQuery = knnQuery;\n if (indexOptions instanceof HnswIndexOptions hnswIndexOptions) {\n returnedQuery = PatienceKnnVectorQuery.fromFloatQuery(knnQuery, 0.995, patience);\n } else if (indexOptions instanceof Int8HnswIndexOptions hnswIndexOptions) {\n returnedQuery = PatienceKnnVectorQuery.fromFloatQuery(knnQuery, 0.995, patience);\n } else if (indexOptions instanceof Int4HnswIndexOptions hnswIndexOptions) {\n returnedQuery = PatienceKnnVectorQuery.fromFloatQuery(knnQuery, 0.995, patience);\n } else if (indexOptions instanceof BBQHnswIndexOptions hnswIndexOptions) {\n returnedQuery = PatienceKnnVectorQuery.fromFloatQuery(knnQuery, 0.995, patience);\n }\n return returnedQuery;", - "repo_full_name": "elastic/elasticsearch", - "discussion_comments": [ - { - "comment_id": "2175016623", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 127223, - "pr_file": "server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java", - "discussion_id": "2175016623", - "commented_code": "@@ -2526,6 +2561,44 @@ private Query createKnnByteQuery(\n return knnQuery;\n }\n \n+ private Query maybeWrapPatience(Query knnQuery) {\n+ Query finalQuery = knnQuery;\n+ if (knnQuery instanceof KnnByteVectorQuery knnByteVectorQuery) {\n+ finalQuery = maybeWrapPatienceByte(knnByteVectorQuery, Math.max(7, (int) (knnByteVectorQuery.getK() * 0.3)));\n+ } else if (knnQuery instanceof KnnFloatVectorQuery knnFloatVectorQuery) {\n+ finalQuery = maybeWrapPatienceFloat(knnFloatVectorQuery, Math.max(7, (int) (knnFloatVectorQuery.getK() * 0.3)));\n+ }\n+ return finalQuery;\n+ }\n+\n+ private Query maybeWrapPatienceByte(KnnByteVectorQuery knnQuery, int patience) {\n+ Query returnedQuery = knnQuery;\n+ if (indexOptions instanceof HnswIndexOptions) {\n+ returnedQuery = PatienceKnnVectorQuery.fromByteQuery(knnQuery, 0.995, patience);\n+ } else if (indexOptions instanceof Int8HnswIndexOptions) {\n+ returnedQuery = PatienceKnnVectorQuery.fromByteQuery(knnQuery, 0.995, patience);\n+ } else if (indexOptions instanceof Int4HnswIndexOptions) {\n+ returnedQuery = PatienceKnnVectorQuery.fromByteQuery(knnQuery, 0.995, patience);\n+ } else if (indexOptions instanceof BBQHnswIndexOptions) {\n+ returnedQuery = PatienceKnnVectorQuery.fromByteQuery(knnQuery, 0.995, patience);\n+ }\n+ return returnedQuery;\n+ }\n+\n+ private Query maybeWrapPatienceFloat(KnnFloatVectorQuery knnQuery, int patience) {\n+ Query returnedQuery = knnQuery;\n+ if (indexOptions instanceof HnswIndexOptions hnswIndexOptions) {\n+ returnedQuery = PatienceKnnVectorQuery.fromFloatQuery(knnQuery, 0.995, patience);\n+ } else if (indexOptions instanceof Int8HnswIndexOptions hnswIndexOptions) {\n+ returnedQuery = PatienceKnnVectorQuery.fromFloatQuery(knnQuery, 0.995, patience);\n+ } else if (indexOptions instanceof Int4HnswIndexOptions hnswIndexOptions) {\n+ returnedQuery = PatienceKnnVectorQuery.fromFloatQuery(knnQuery, 0.995, patience);\n+ } else if (indexOptions instanceof BBQHnswIndexOptions hnswIndexOptions) {\n+ returnedQuery = PatienceKnnVectorQuery.fromFloatQuery(knnQuery, 0.995, patience);\n+ }\n+ return returnedQuery;", - "comment_created_at": "2025-06-30T12:59:04+00:00", - "comment_author": "benwtrent", - "comment_body": "Why do we have this big predicate branch? \r\n\r\nAlso, it seems that this `0.995` is just the default in Lucene. \r\n\r\nWhy can't we simplify all this and return `PatienceKnnVectorQuery.fromFloatQuery(knnQuery)` ?\r\n\r\nSimilar question to the byte query.", - "pr_file_module": null - }, - { - "comment_id": "2175126375", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 127223, - "pr_file": "server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java", - "discussion_id": "2175016623", - "commented_code": "@@ -2526,6 +2561,44 @@ private Query createKnnByteQuery(\n return knnQuery;\n }\n \n+ private Query maybeWrapPatience(Query knnQuery) {\n+ Query finalQuery = knnQuery;\n+ if (knnQuery instanceof KnnByteVectorQuery knnByteVectorQuery) {\n+ finalQuery = maybeWrapPatienceByte(knnByteVectorQuery, Math.max(7, (int) (knnByteVectorQuery.getK() * 0.3)));\n+ } else if (knnQuery instanceof KnnFloatVectorQuery knnFloatVectorQuery) {\n+ finalQuery = maybeWrapPatienceFloat(knnFloatVectorQuery, Math.max(7, (int) (knnFloatVectorQuery.getK() * 0.3)));\n+ }\n+ return finalQuery;\n+ }\n+\n+ private Query maybeWrapPatienceByte(KnnByteVectorQuery knnQuery, int patience) {\n+ Query returnedQuery = knnQuery;\n+ if (indexOptions instanceof HnswIndexOptions) {\n+ returnedQuery = PatienceKnnVectorQuery.fromByteQuery(knnQuery, 0.995, patience);\n+ } else if (indexOptions instanceof Int8HnswIndexOptions) {\n+ returnedQuery = PatienceKnnVectorQuery.fromByteQuery(knnQuery, 0.995, patience);\n+ } else if (indexOptions instanceof Int4HnswIndexOptions) {\n+ returnedQuery = PatienceKnnVectorQuery.fromByteQuery(knnQuery, 0.995, patience);\n+ } else if (indexOptions instanceof BBQHnswIndexOptions) {\n+ returnedQuery = PatienceKnnVectorQuery.fromByteQuery(knnQuery, 0.995, patience);\n+ }\n+ return returnedQuery;\n+ }\n+\n+ private Query maybeWrapPatienceFloat(KnnFloatVectorQuery knnQuery, int patience) {\n+ Query returnedQuery = knnQuery;\n+ if (indexOptions instanceof HnswIndexOptions hnswIndexOptions) {\n+ returnedQuery = PatienceKnnVectorQuery.fromFloatQuery(knnQuery, 0.995, patience);\n+ } else if (indexOptions instanceof Int8HnswIndexOptions hnswIndexOptions) {\n+ returnedQuery = PatienceKnnVectorQuery.fromFloatQuery(knnQuery, 0.995, patience);\n+ } else if (indexOptions instanceof Int4HnswIndexOptions hnswIndexOptions) {\n+ returnedQuery = PatienceKnnVectorQuery.fromFloatQuery(knnQuery, 0.995, patience);\n+ } else if (indexOptions instanceof BBQHnswIndexOptions hnswIndexOptions) {\n+ returnedQuery = PatienceKnnVectorQuery.fromFloatQuery(knnQuery, 0.995, patience);\n+ }\n+ return returnedQuery;", - "comment_created_at": "2025-06-30T13:47:17+00:00", - "comment_author": "tteofili", - "comment_body": "++ on the defaults.\r\nre the branches, it doesn't make sense to wrap the knn query with non-HNSW index types because, currently, it only works with them.", - "pr_file_module": null - }, - { - "comment_id": "2175182641", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 127223, - "pr_file": "server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java", - "discussion_id": "2175016623", - "commented_code": "@@ -2526,6 +2561,44 @@ private Query createKnnByteQuery(\n return knnQuery;\n }\n \n+ private Query maybeWrapPatience(Query knnQuery) {\n+ Query finalQuery = knnQuery;\n+ if (knnQuery instanceof KnnByteVectorQuery knnByteVectorQuery) {\n+ finalQuery = maybeWrapPatienceByte(knnByteVectorQuery, Math.max(7, (int) (knnByteVectorQuery.getK() * 0.3)));\n+ } else if (knnQuery instanceof KnnFloatVectorQuery knnFloatVectorQuery) {\n+ finalQuery = maybeWrapPatienceFloat(knnFloatVectorQuery, Math.max(7, (int) (knnFloatVectorQuery.getK() * 0.3)));\n+ }\n+ return finalQuery;\n+ }\n+\n+ private Query maybeWrapPatienceByte(KnnByteVectorQuery knnQuery, int patience) {\n+ Query returnedQuery = knnQuery;\n+ if (indexOptions instanceof HnswIndexOptions) {\n+ returnedQuery = PatienceKnnVectorQuery.fromByteQuery(knnQuery, 0.995, patience);\n+ } else if (indexOptions instanceof Int8HnswIndexOptions) {\n+ returnedQuery = PatienceKnnVectorQuery.fromByteQuery(knnQuery, 0.995, patience);\n+ } else if (indexOptions instanceof Int4HnswIndexOptions) {\n+ returnedQuery = PatienceKnnVectorQuery.fromByteQuery(knnQuery, 0.995, patience);\n+ } else if (indexOptions instanceof BBQHnswIndexOptions) {\n+ returnedQuery = PatienceKnnVectorQuery.fromByteQuery(knnQuery, 0.995, patience);\n+ }\n+ return returnedQuery;\n+ }\n+\n+ private Query maybeWrapPatienceFloat(KnnFloatVectorQuery knnQuery, int patience) {\n+ Query returnedQuery = knnQuery;\n+ if (indexOptions instanceof HnswIndexOptions hnswIndexOptions) {\n+ returnedQuery = PatienceKnnVectorQuery.fromFloatQuery(knnQuery, 0.995, patience);\n+ } else if (indexOptions instanceof Int8HnswIndexOptions hnswIndexOptions) {\n+ returnedQuery = PatienceKnnVectorQuery.fromFloatQuery(knnQuery, 0.995, patience);\n+ } else if (indexOptions instanceof Int4HnswIndexOptions hnswIndexOptions) {\n+ returnedQuery = PatienceKnnVectorQuery.fromFloatQuery(knnQuery, 0.995, patience);\n+ } else if (indexOptions instanceof BBQHnswIndexOptions hnswIndexOptions) {\n+ returnedQuery = PatienceKnnVectorQuery.fromFloatQuery(knnQuery, 0.995, patience);\n+ }\n+ return returnedQuery;", - "comment_created_at": "2025-06-30T14:13:20+00:00", - "comment_author": "benwtrent", - "comment_body": "@tteofili OK, I understand that, I think maybe we can then collapse into predicate then :)", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2170926461", - "pr_number": 130160, - "pr_file": "x-pack/plugin/downsample/src/main/java/org/elasticsearch/xpack/downsample/DownsampleShardPersistentTaskExecutor.java", - "created_at": "2025-06-27T06:44:02+00:00", - "commented_code": ".orElse(NO_NODE_FOUND);\n }\n\n /**\n * An eligible node to run the downsampling task for a shard is a node that holds\n * a searchable version of this shard.\n * In stateless deployment we choose only nodes that hold search shards.\n * Otherwise, we choose the node that holds the primary shard.\n * Visible for testing.\n * @param indexShardRouting the routing of the shard to be downsampled\n * @return the set of candidate nodes downsampling can run on.\n */\n Set getEligibleNodes(IndexShardRoutingTable indexShardRouting) {", - "repo_full_name": "elastic/elasticsearch", - "discussion_comments": [ - { - "comment_id": "2170926461", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 130160, - "pr_file": "x-pack/plugin/downsample/src/main/java/org/elasticsearch/xpack/downsample/DownsampleShardPersistentTaskExecutor.java", - "discussion_id": "2170926461", - "commented_code": "@@ -159,6 +164,29 @@ public PersistentTasksCustomMetadata.Assignment getAssignment(\n .orElse(NO_NODE_FOUND);\n }\n \n+ /**\n+ * An eligible node to run the downsampling task for a shard is a node that holds\n+ * a searchable version of this shard.\n+ * In stateless deployment we choose only nodes that hold search shards.\n+ * Otherwise, we choose the node that holds the primary shard.\n+ * Visible for testing.\n+ * @param indexShardRouting the routing of the shard to be downsampled\n+ * @return the set of candidate nodes downsampling can run on.\n+ */\n+ Set getEligibleNodes(IndexShardRoutingTable indexShardRouting) {", - "comment_created_at": "2025-06-27T06:44:02+00:00", - "comment_author": "martijnvg", - "comment_body": "Maybe rewrite this method like this:\r\n\r\n```\r\nPredicate getEligibleNodes(IndexShardRoutingTable indexShardRouting) {\r\n if (isStateless) {\r\n return candidateNode -> {\r\n for (var shardRouting : indexShardRouting.replicaShards()) {\r\n if (shardRouting.started()) {\r\n return shardRouting.currentNodeId().equals(candidateNode.getId());\r\n }\r\n }\r\n return false;\r\n };\r\n } else if (indexShardRouting.primaryShard().started()) {\r\n return candidateNode -> indexShardRouting.primaryShard().currentNodeId().equals(candidateNode.getId());\r\n } else {\r\n return null;\r\n }\r\n }\r\n```\r\n\r\nThat way the logic is better contained and no intermediate set is created?", - "pr_file_module": null - }, - { - "comment_id": "2171212580", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 130160, - "pr_file": "x-pack/plugin/downsample/src/main/java/org/elasticsearch/xpack/downsample/DownsampleShardPersistentTaskExecutor.java", - "discussion_id": "2170926461", - "commented_code": "@@ -159,6 +164,29 @@ public PersistentTasksCustomMetadata.Assignment getAssignment(\n .orElse(NO_NODE_FOUND);\n }\n \n+ /**\n+ * An eligible node to run the downsampling task for a shard is a node that holds\n+ * a searchable version of this shard.\n+ * In stateless deployment we choose only nodes that hold search shards.\n+ * Otherwise, we choose the node that holds the primary shard.\n+ * Visible for testing.\n+ * @param indexShardRouting the routing of the shard to be downsampled\n+ * @return the set of candidate nodes downsampling can run on.\n+ */\n+ Set getEligibleNodes(IndexShardRoutingTable indexShardRouting) {", - "comment_created_at": "2025-06-27T08:29:16+00:00", - "comment_author": "gmarouli", - "comment_body": "I have been thinking about this, and I think both my current implementation and this can be a bit inefficient in big clusters because we iterate over all the candidate nodes that could be a lot. \r\n\r\nThinking about this some more, I am considering iterating over the eligible nodes which are probably way less than the total candidates and check if they are a candidate node.\r\n\r\nWhat do you think?\r\n\r\nAbout the code you shared, it does read nicely, but I am concerned that for every candidate node we are going to be building a new iterator over the replica shards ending up with more object churn than with the previous solution. Right?\r\n\r\n", - "pr_file_module": null - }, - { - "comment_id": "2171650282", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 130160, - "pr_file": "x-pack/plugin/downsample/src/main/java/org/elasticsearch/xpack/downsample/DownsampleShardPersistentTaskExecutor.java", - "discussion_id": "2170926461", - "commented_code": "@@ -159,6 +164,29 @@ public PersistentTasksCustomMetadata.Assignment getAssignment(\n .orElse(NO_NODE_FOUND);\n }\n \n+ /**\n+ * An eligible node to run the downsampling task for a shard is a node that holds\n+ * a searchable version of this shard.\n+ * In stateless deployment we choose only nodes that hold search shards.\n+ * Otherwise, we choose the node that holds the primary shard.\n+ * Visible for testing.\n+ * @param indexShardRouting the routing of the shard to be downsampled\n+ * @return the set of candidate nodes downsampling can run on.\n+ */\n+ Set getEligibleNodes(IndexShardRoutingTable indexShardRouting) {", - "comment_created_at": "2025-06-27T11:07:12+00:00", - "comment_author": "gmarouli", - "comment_body": "@martijnvg I refactored it, I think it looks a bit cleaner now. Let me know what you think.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1977324330", - "pr_number": 123322, - "pr_file": "x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/QueryProgressFragmentOptimizer.java", - "created_at": "2025-03-03T11:09:38+00:00", - "commented_code": "/*\n * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one\n * or more contributor license agreements. Licensed under the Elastic License\n * 2.0; you may not use this file except in compliance with the Elastic License\n * 2.0.\n */\n\npackage org.elasticsearch.xpack.esql.optimizer;\n\nimport org.elasticsearch.core.Nullable;\nimport org.elasticsearch.xpack.esql.core.expression.FoldContext;\nimport org.elasticsearch.xpack.esql.core.expression.Literal;\nimport org.elasticsearch.xpack.esql.plan.logical.Limit;\nimport org.elasticsearch.xpack.esql.plan.physical.FragmentExec;\nimport org.elasticsearch.xpack.esql.plan.physical.PhysicalPlan;\nimport org.elasticsearch.xpack.esql.planner.mapper.Mapper;\n\nimport java.util.function.IntSupplier;\n\n/**\n * Attempts to rewrite the fragment enclosed in a data-node plan based on the progress of the query. For example:\n * `FROM x | LIMIT 100` can be safely rewritten to `FROM x | LIMIT 40` if 60 rows have already been collected.\n * TODO: Rewrite TopN to filters\n */\npublic final class QueryProgressFragmentOptimizer {\n private final PhysicalVerifier verifier = PhysicalVerifier.INSTANCE;\n private final IntSupplier alreadyCollectedLimit;\n\n /**\n * @param alreadyCollectedLimit the supplier to get the number of rows already collected\n */\n public QueryProgressFragmentOptimizer(@Nullable IntSupplier alreadyCollectedLimit) {\n this.alreadyCollectedLimit = alreadyCollectedLimit;\n }\n\n /**\n * Attempts to optimize the fragment enclosed in a data-node plan based on the progress of the query.\n * @param plan the input plan\n * @return the optimized plan. If this returns null, the query can be early terminated.\n */\n public PhysicalPlan optimizeFragment(PhysicalPlan plan) {\n if (alreadyCollectedLimit == null) {\n return plan;\n }\n final var fragments = plan.collectFirstChildren(p -> p instanceof FragmentExec);\n if (fragments.size() != 1) {\n return plan;\n }\n final FragmentExec fragment = (FragmentExec) fragments.getFirst();\n final var pipelineBreakers = fragment.fragment().collectFirstChildren(Mapper::isPipelineBreaker);\n if (pipelineBreakers.isEmpty()) {\n return plan;\n }\n // Rewrite LIMIT\n if (pipelineBreakers.getFirst() instanceof Limit firstLimit) {", - "repo_full_name": "elastic/elasticsearch", - "discussion_comments": [ - { - "comment_id": "1977324330", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 123322, - "pr_file": "x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/QueryProgressFragmentOptimizer.java", - "discussion_id": "1977324330", - "commented_code": "@@ -0,0 +1,84 @@\n+/*\n+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one\n+ * or more contributor license agreements. Licensed under the Elastic License\n+ * 2.0; you may not use this file except in compliance with the Elastic License\n+ * 2.0.\n+ */\n+\n+package org.elasticsearch.xpack.esql.optimizer;\n+\n+import org.elasticsearch.core.Nullable;\n+import org.elasticsearch.xpack.esql.core.expression.FoldContext;\n+import org.elasticsearch.xpack.esql.core.expression.Literal;\n+import org.elasticsearch.xpack.esql.plan.logical.Limit;\n+import org.elasticsearch.xpack.esql.plan.physical.FragmentExec;\n+import org.elasticsearch.xpack.esql.plan.physical.PhysicalPlan;\n+import org.elasticsearch.xpack.esql.planner.mapper.Mapper;\n+\n+import java.util.function.IntSupplier;\n+\n+/**\n+ * Attempts to rewrite the fragment enclosed in a data-node plan based on the progress of the query. For example:\n+ * `FROM x | LIMIT 100` can be safely rewritten to `FROM x | LIMIT 40` if 60 rows have already been collected.\n+ * TODO: Rewrite TopN to filters\n+ */\n+public final class QueryProgressFragmentOptimizer {\n+ private final PhysicalVerifier verifier = PhysicalVerifier.INSTANCE;\n+ private final IntSupplier alreadyCollectedLimit;\n+\n+ /**\n+ * @param alreadyCollectedLimit the supplier to get the number of rows already collected\n+ */\n+ public QueryProgressFragmentOptimizer(@Nullable IntSupplier alreadyCollectedLimit) {\n+ this.alreadyCollectedLimit = alreadyCollectedLimit;\n+ }\n+\n+ /**\n+ * Attempts to optimize the fragment enclosed in a data-node plan based on the progress of the query.\n+ * @param plan the input plan\n+ * @return the optimized plan. If this returns null, the query can be early terminated.\n+ */\n+ public PhysicalPlan optimizeFragment(PhysicalPlan plan) {\n+ if (alreadyCollectedLimit == null) {\n+ return plan;\n+ }\n+ final var fragments = plan.collectFirstChildren(p -> p instanceof FragmentExec);\n+ if (fragments.size() != 1) {\n+ return plan;\n+ }\n+ final FragmentExec fragment = (FragmentExec) fragments.getFirst();\n+ final var pipelineBreakers = fragment.fragment().collectFirstChildren(Mapper::isPipelineBreaker);\n+ if (pipelineBreakers.isEmpty()) {\n+ return plan;\n+ }\n+ // Rewrite LIMIT\n+ if (pipelineBreakers.getFirst() instanceof Limit firstLimit) {", - "comment_created_at": "2025-03-03T11:09:38+00:00", - "comment_author": "alex-spies", - "comment_body": "The approach here assumes that there are no query plan nodes after the `LIMIT` which may rely on the number of rows that they see. I believe this is correct now (as there just shouldn't be anything after the `LIMIT`), but we cannot guarantee that this will always be the case, esp. as different teams add new commands to ES|QL.\r\n\r\nCouldn't we limit the scope of this optimizer rule to the case where the _last_ plan node in the fragment is a `LIMIT`? That's more easily provable to be correct and will break less likely when we add new query plan nodes.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1977694495", - "pr_number": 123322, - "pr_file": "x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/QueryProgressFragmentOptimizer.java", - "created_at": "2025-03-03T15:14:00+00:00", - "commented_code": "/*\n * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one\n * or more contributor license agreements. Licensed under the Elastic License\n * 2.0; you may not use this file except in compliance with the Elastic License\n * 2.0.\n */\n\npackage org.elasticsearch.xpack.esql.optimizer;\n\nimport org.elasticsearch.core.Nullable;\nimport org.elasticsearch.xpack.esql.core.expression.FoldContext;\nimport org.elasticsearch.xpack.esql.core.expression.Literal;\nimport org.elasticsearch.xpack.esql.plan.logical.Limit;\nimport org.elasticsearch.xpack.esql.plan.physical.FragmentExec;\nimport org.elasticsearch.xpack.esql.plan.physical.PhysicalPlan;\nimport org.elasticsearch.xpack.esql.planner.mapper.Mapper;\n\nimport java.util.function.IntSupplier;\n\n/**\n * Attempts to rewrite the fragment enclosed in a data-node plan based on the progress of the query. For example:\n * `FROM x | LIMIT 100` can be safely rewritten to `FROM x | LIMIT 40` if 60 rows have already been collected.\n * TODO: Rewrite TopN to filters\n */\npublic final class QueryProgressFragmentOptimizer {", - "repo_full_name": "elastic/elasticsearch", - "discussion_comments": [ - { - "comment_id": "1977694495", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 123322, - "pr_file": "x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/QueryProgressFragmentOptimizer.java", - "discussion_id": "1977694495", - "commented_code": "@@ -0,0 +1,84 @@\n+/*\n+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one\n+ * or more contributor license agreements. Licensed under the Elastic License\n+ * 2.0; you may not use this file except in compliance with the Elastic License\n+ * 2.0.\n+ */\n+\n+package org.elasticsearch.xpack.esql.optimizer;\n+\n+import org.elasticsearch.core.Nullable;\n+import org.elasticsearch.xpack.esql.core.expression.FoldContext;\n+import org.elasticsearch.xpack.esql.core.expression.Literal;\n+import org.elasticsearch.xpack.esql.plan.logical.Limit;\n+import org.elasticsearch.xpack.esql.plan.physical.FragmentExec;\n+import org.elasticsearch.xpack.esql.plan.physical.PhysicalPlan;\n+import org.elasticsearch.xpack.esql.planner.mapper.Mapper;\n+\n+import java.util.function.IntSupplier;\n+\n+/**\n+ * Attempts to rewrite the fragment enclosed in a data-node plan based on the progress of the query. For example:\n+ * `FROM x | LIMIT 100` can be safely rewritten to `FROM x | LIMIT 40` if 60 rows have already been collected.\n+ * TODO: Rewrite TopN to filters\n+ */\n+public final class QueryProgressFragmentOptimizer {", - "comment_created_at": "2025-03-03T15:14:00+00:00", - "comment_author": "astefan", - "comment_body": "I would look into integrating this step into existing optimization infrastructure. Even though this is a data-node level optimization that depends on how many rows have been returned, in essence I believe it should be a **logical plan level optimization**. There is a recent change in our code base that duplicates a `limit` in other parts of the query in certain situations which means the original plan can have a `limit 50` somewhere in the middle and now, instead of a `limit 50` somewhere at the end of the query, it has a `limit 10` or similar. Probably it doesn't affect the overall execution, but we don't know how this code evolves in the future.\r\n\r\nInitially, I thought that this could be put somewhere in `ComputeService:379` in the physical plan optimizer, meaning here\r\n```\r\nplan = PlannerUtils.localPlan(context.searchExecutionContexts(), context.configuration(), context.foldCtx(), plan);\r\n```\r\n\r\nAnd this method could receive the new value for the limit. That value should then be passed to the **physical plan optimizer**. **Maybe this _could_ be a better start for this feature.**\r\n\r\nI think this is a really great idea for optimizing simple queries to be faster, but a less fragile implementation should integrate this step in the usual optimization processes, since this _could_ interfere with other optimization steps. Having it there instead can also be a signal for future development that this optimization exists. The code in the ES|QL distributed code is quite separated from the one in the optimization plan code.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2100298832", - "pr_number": 128135, - "pr_file": "x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/topn/TopNOperator.java", - "created_at": "2025-05-21T13:26:36+00:00", - "commented_code": "private final BlockFactory blockFactory;\n private final CircuitBreaker breaker;\n private final Queue inputQueue;\n private final Map inputQueues;", - "repo_full_name": "elastic/elasticsearch", - "discussion_comments": [ - { - "comment_id": "2100298832", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 128135, - "pr_file": "x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/topn/TopNOperator.java", - "discussion_id": "2100298832", - "commented_code": "@@ -264,12 +282,14 @@ public String describe() {\n \n private final BlockFactory blockFactory;\n private final CircuitBreaker breaker;\n- private final Queue inputQueue;\n+ private final Map inputQueues;", - "comment_created_at": "2025-05-21T13:26:36+00:00", - "comment_author": "nik9000", - "comment_body": "I'm a bit worried about memory tracking for the keys.\r\n\r\nGenerally we use `ObjectArray` to keep things like the `Queue`s and we use `BytesRefHash` to assign the ids.\r\n\r\nIf we did that it'd read very very very similarly to an aggregation. In aggs-land we separate the hashing implementation from the \"collecting\" one. I wonder if we'd be better off using the aggs infrastructure in this case. No idea how we migrate from one to the other, but we have a `TopBytesRefAggregator`. And we could try to use that *somehow*. If we did that we'd get nice handling for the hashing for free. We're quite good at those hash keys.", - "pr_file_module": null - }, - { - "comment_id": "2102502245", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 128135, - "pr_file": "x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/topn/TopNOperator.java", - "discussion_id": "2100298832", - "commented_code": "@@ -264,12 +282,14 @@ public String describe() {\n \n private final BlockFactory blockFactory;\n private final CircuitBreaker breaker;\n- private final Queue inputQueue;\n+ private final Map inputQueues;", - "comment_created_at": "2025-05-22T12:59:38+00:00", - "comment_author": "przemekwitek", - "comment_body": "> Generally we use ObjectArray to keep things like the Queues and we use BytesRefHash to assign the ids.\r\n\r\nOk, that's something I'll take a look into.\r\n\r\n> And we could try to use that somehow.\r\n\r\nDo you think it would be beneficial to do it in this PR? To me it sounds like quite a complex refactoring.", - "pr_file_module": null - }, - { - "comment_id": "2102526209", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 128135, - "pr_file": "x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/topn/TopNOperator.java", - "discussion_id": "2100298832", - "commented_code": "@@ -264,12 +282,14 @@ public String describe() {\n \n private final BlockFactory blockFactory;\n private final CircuitBreaker breaker;\n- private final Queue inputQueue;\n+ private final Map inputQueues;", - "comment_created_at": "2025-05-22T13:11:25+00:00", - "comment_author": "nik9000", - "comment_body": "It is. It has the advantage of applying all of the grouping code we worked hard on for aggs so you don't have to hand build something on your side. Maybe you'd be better off just using the `BlockHash.build` infrastructure here. It'd be quite compatible with the ObjectArray stuff. I'm just worried that if you did it you'd find yourself building most of the `HashAggregatorOperator`.\r\n\r\nI can have a little look around this morning though.", - "pr_file_module": null - }, - { - "comment_id": "2102580378", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 128135, - "pr_file": "x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/topn/TopNOperator.java", - "discussion_id": "2100298832", - "commented_code": "@@ -264,12 +282,14 @@ public String describe() {\n \n private final BlockFactory blockFactory;\n private final CircuitBreaker breaker;\n- private final Queue inputQueue;\n+ private final Map inputQueues;", - "comment_created_at": "2025-05-22T13:34:28+00:00", - "comment_author": "nik9000", - "comment_body": "I had a look and think it's probably not worth trying to migrate the whole thing to an agg. Aggs encode their intermediate representation as a single dense column. TopN builds the result columns. They aren't incompatible, but it'd be tricky to migrate.\r\n\r\nI suppose what I suggest instead is to use `BlockHash` to turn the grouping keys into an integer and then `ObjectArray` to contain the heaps. In that case `BlockHash` is going to do a good job managing memory, breaking if it uses too much, and cleaning up. It also has support for non-string keys and complex sets of keys. And will absorb any optimizations we make in the future.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2099445576", - "pr_number": 127260, - "pr_file": "server/src/main/java/org/elasticsearch/lucene/queries/TimestampTwoPhaseIterator.java", - "created_at": "2025-05-21T06:17:28+00:00", - "commented_code": "/*\n * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one\n * or more contributor license agreements. Licensed under the \"Elastic License\n * 2.0\", the \"GNU Affero General Public License v3.0 only\", and the \"Server Side\n * Public License v 1\"; you may not use this file except in compliance with, at\n * your election, the \"Elastic License 2.0\", the \"GNU Affero General Public\n * License v3.0 only\", or the \"Server Side Public License, v 1\".\n */\npackage org.elasticsearch.lucene.queries;\n\nimport org.apache.lucene.index.DocValuesSkipper;\nimport org.apache.lucene.index.NumericDocValues;\nimport org.apache.lucene.search.DocIdSetIterator;\nimport org.apache.lucene.search.TwoPhaseIterator;\n\nimport java.io.IOException;\n\n/**\n * Based on {@link org.apache.lucene.search.DocValuesRangeIterator} but modified for time series and logsdb use cases.\n *

\n * The @timestamp field always has exactly one value and all documents always have a @timestamp value.\n * Additionally, the @timestamp field is always the secondary index sort field and sort order is always descending.\n *

\n * This makes the doc value skipper on @timestamp field less effective, and so the doc value skipper for the first index sort field is\n * also used to skip to the next primary sort value if for the current skipper represents one value (min and max value are the same) and\n * the current value is lower than minTimestamp.\n */\nfinal class TimestampTwoPhaseIterator extends TwoPhaseIterator {\n\n private final RangeNoGapsApproximation approximation;\n private final NumericDocValues timestamps;\n\n private final long minTimestamp;\n private final long maxTimestamp;\n\n TimestampTwoPhaseIterator(\n NumericDocValues timestamps,\n DocValuesSkipper timestampSkipper,\n DocValuesSkipper primaryFieldSkipper,\n long minTimestamp,\n long maxTimestamp\n ) {\n super(new RangeNoGapsApproximation(timestamps, timestampSkipper, primaryFieldSkipper, minTimestamp, maxTimestamp));\n this.approximation = (RangeNoGapsApproximation) approximation();\n this.timestamps = timestamps;\n this.minTimestamp = minTimestamp;\n this.maxTimestamp = maxTimestamp;\n }\n\n static final class RangeNoGapsApproximation extends DocIdSetIterator {\n\n private final DocIdSetIterator innerApproximation;\n\n final DocValuesSkipper timestampSkipper;\n final DocValuesSkipper primaryFieldSkipper;\n final long minTimestamp;\n final long maxTimestamp;\n\n private int doc = -1;\n\n // Track a decision for all doc IDs between the current doc ID and upTo inclusive.\n Match match = Match.MAYBE;\n int upTo = -1;\n int primaryFieldUpTo = -1;\n\n RangeNoGapsApproximation(\n DocIdSetIterator innerApproximation,\n DocValuesSkipper timestampSkipper,\n DocValuesSkipper primaryFieldSkipper,\n long minTimestamp,\n long maxTimestamp\n ) {\n this.innerApproximation = innerApproximation;\n this.timestampSkipper = timestampSkipper;\n this.primaryFieldSkipper = primaryFieldSkipper;\n this.minTimestamp = minTimestamp;\n this.maxTimestamp = maxTimestamp;\n }\n\n @Override\n public int docID() {\n return doc;\n }\n\n @Override\n public int nextDoc() throws IOException {\n return advance(docID() + 1);\n }\n\n @Override\n public int advance(int target) throws IOException {\n while (true) {\n if (target > upTo) {\n timestampSkipper.advance(target);\n upTo = timestampSkipper.maxDocID(0);\n if (upTo == DocIdSetIterator.NO_MORE_DOCS) {\n return doc = DocIdSetIterator.NO_MORE_DOCS;\n }\n match = match(0);\n\n // If we have a YES or NO decision, see if we still have the same decision on a higher\n // level (= on a wider range of doc IDs)\n int nextLevel = 1;\n while (match != Match.MAYBE && nextLevel < timestampSkipper.numLevels() && match == match(nextLevel)) {\n upTo = timestampSkipper.maxDocID(nextLevel);\n nextLevel++;\n }\n }\n switch (match) {\n case YES:\n return doc = target;\n case MAYBE:\n if (target > innerApproximation.docID()) {\n target = innerApproximation.advance(target);", - "repo_full_name": "elastic/elasticsearch", - "discussion_comments": [ - { - "comment_id": "2099445576", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 127260, - "pr_file": "server/src/main/java/org/elasticsearch/lucene/queries/TimestampTwoPhaseIterator.java", - "discussion_id": "2099445576", - "commented_code": "@@ -0,0 +1,207 @@\n+/*\n+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one\n+ * or more contributor license agreements. Licensed under the \"Elastic License\n+ * 2.0\", the \"GNU Affero General Public License v3.0 only\", and the \"Server Side\n+ * Public License v 1\"; you may not use this file except in compliance with, at\n+ * your election, the \"Elastic License 2.0\", the \"GNU Affero General Public\n+ * License v3.0 only\", or the \"Server Side Public License, v 1\".\n+ */\n+package org.elasticsearch.lucene.queries;\n+\n+import org.apache.lucene.index.DocValuesSkipper;\n+import org.apache.lucene.index.NumericDocValues;\n+import org.apache.lucene.search.DocIdSetIterator;\n+import org.apache.lucene.search.TwoPhaseIterator;\n+\n+import java.io.IOException;\n+\n+/**\n+ * Based on {@link org.apache.lucene.search.DocValuesRangeIterator} but modified for time series and logsdb use cases.\n+ *

\n+ * The @timestamp field always has exactly one value and all documents always have a @timestamp value.\n+ * Additionally, the @timestamp field is always the secondary index sort field and sort order is always descending.\n+ *

\n+ * This makes the doc value skipper on @timestamp field less effective, and so the doc value skipper for the first index sort field is\n+ * also used to skip to the next primary sort value if for the current skipper represents one value (min and max value are the same) and\n+ * the current value is lower than minTimestamp.\n+ */\n+final class TimestampTwoPhaseIterator extends TwoPhaseIterator {\n+\n+ private final RangeNoGapsApproximation approximation;\n+ private final NumericDocValues timestamps;\n+\n+ private final long minTimestamp;\n+ private final long maxTimestamp;\n+\n+ TimestampTwoPhaseIterator(\n+ NumericDocValues timestamps,\n+ DocValuesSkipper timestampSkipper,\n+ DocValuesSkipper primaryFieldSkipper,\n+ long minTimestamp,\n+ long maxTimestamp\n+ ) {\n+ super(new RangeNoGapsApproximation(timestamps, timestampSkipper, primaryFieldSkipper, minTimestamp, maxTimestamp));\n+ this.approximation = (RangeNoGapsApproximation) approximation();\n+ this.timestamps = timestamps;\n+ this.minTimestamp = minTimestamp;\n+ this.maxTimestamp = maxTimestamp;\n+ }\n+\n+ static final class RangeNoGapsApproximation extends DocIdSetIterator {\n+\n+ private final DocIdSetIterator innerApproximation;\n+\n+ final DocValuesSkipper timestampSkipper;\n+ final DocValuesSkipper primaryFieldSkipper;\n+ final long minTimestamp;\n+ final long maxTimestamp;\n+\n+ private int doc = -1;\n+\n+ // Track a decision for all doc IDs between the current doc ID and upTo inclusive.\n+ Match match = Match.MAYBE;\n+ int upTo = -1;\n+ int primaryFieldUpTo = -1;\n+\n+ RangeNoGapsApproximation(\n+ DocIdSetIterator innerApproximation,\n+ DocValuesSkipper timestampSkipper,\n+ DocValuesSkipper primaryFieldSkipper,\n+ long minTimestamp,\n+ long maxTimestamp\n+ ) {\n+ this.innerApproximation = innerApproximation;\n+ this.timestampSkipper = timestampSkipper;\n+ this.primaryFieldSkipper = primaryFieldSkipper;\n+ this.minTimestamp = minTimestamp;\n+ this.maxTimestamp = maxTimestamp;\n+ }\n+\n+ @Override\n+ public int docID() {\n+ return doc;\n+ }\n+\n+ @Override\n+ public int nextDoc() throws IOException {\n+ return advance(docID() + 1);\n+ }\n+\n+ @Override\n+ public int advance(int target) throws IOException {\n+ while (true) {\n+ if (target > upTo) {\n+ timestampSkipper.advance(target);\n+ upTo = timestampSkipper.maxDocID(0);\n+ if (upTo == DocIdSetIterator.NO_MORE_DOCS) {\n+ return doc = DocIdSetIterator.NO_MORE_DOCS;\n+ }\n+ match = match(0);\n+\n+ // If we have a YES or NO decision, see if we still have the same decision on a higher\n+ // level (= on a wider range of doc IDs)\n+ int nextLevel = 1;\n+ while (match != Match.MAYBE && nextLevel < timestampSkipper.numLevels() && match == match(nextLevel)) {\n+ upTo = timestampSkipper.maxDocID(nextLevel);\n+ nextLevel++;\n+ }\n+ }\n+ switch (match) {\n+ case YES:\n+ return doc = target;\n+ case MAYBE:\n+ if (target > innerApproximation.docID()) {\n+ target = innerApproximation.advance(target);", - "comment_created_at": "2025-05-21T06:17:28+00:00", - "comment_author": "iverase", - "comment_body": "I would expect to advance here the timestamps iterator instead of the innerApproximation. ", - "pr_file_module": null - }, - { - "comment_id": "2099525131", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 127260, - "pr_file": "server/src/main/java/org/elasticsearch/lucene/queries/TimestampTwoPhaseIterator.java", - "discussion_id": "2099445576", - "commented_code": "@@ -0,0 +1,207 @@\n+/*\n+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one\n+ * or more contributor license agreements. Licensed under the \"Elastic License\n+ * 2.0\", the \"GNU Affero General Public License v3.0 only\", and the \"Server Side\n+ * Public License v 1\"; you may not use this file except in compliance with, at\n+ * your election, the \"Elastic License 2.0\", the \"GNU Affero General Public\n+ * License v3.0 only\", or the \"Server Side Public License, v 1\".\n+ */\n+package org.elasticsearch.lucene.queries;\n+\n+import org.apache.lucene.index.DocValuesSkipper;\n+import org.apache.lucene.index.NumericDocValues;\n+import org.apache.lucene.search.DocIdSetIterator;\n+import org.apache.lucene.search.TwoPhaseIterator;\n+\n+import java.io.IOException;\n+\n+/**\n+ * Based on {@link org.apache.lucene.search.DocValuesRangeIterator} but modified for time series and logsdb use cases.\n+ *

\n+ * The @timestamp field always has exactly one value and all documents always have a @timestamp value.\n+ * Additionally, the @timestamp field is always the secondary index sort field and sort order is always descending.\n+ *

\n+ * This makes the doc value skipper on @timestamp field less effective, and so the doc value skipper for the first index sort field is\n+ * also used to skip to the next primary sort value if for the current skipper represents one value (min and max value are the same) and\n+ * the current value is lower than minTimestamp.\n+ */\n+final class TimestampTwoPhaseIterator extends TwoPhaseIterator {\n+\n+ private final RangeNoGapsApproximation approximation;\n+ private final NumericDocValues timestamps;\n+\n+ private final long minTimestamp;\n+ private final long maxTimestamp;\n+\n+ TimestampTwoPhaseIterator(\n+ NumericDocValues timestamps,\n+ DocValuesSkipper timestampSkipper,\n+ DocValuesSkipper primaryFieldSkipper,\n+ long minTimestamp,\n+ long maxTimestamp\n+ ) {\n+ super(new RangeNoGapsApproximation(timestamps, timestampSkipper, primaryFieldSkipper, minTimestamp, maxTimestamp));\n+ this.approximation = (RangeNoGapsApproximation) approximation();\n+ this.timestamps = timestamps;\n+ this.minTimestamp = minTimestamp;\n+ this.maxTimestamp = maxTimestamp;\n+ }\n+\n+ static final class RangeNoGapsApproximation extends DocIdSetIterator {\n+\n+ private final DocIdSetIterator innerApproximation;\n+\n+ final DocValuesSkipper timestampSkipper;\n+ final DocValuesSkipper primaryFieldSkipper;\n+ final long minTimestamp;\n+ final long maxTimestamp;\n+\n+ private int doc = -1;\n+\n+ // Track a decision for all doc IDs between the current doc ID and upTo inclusive.\n+ Match match = Match.MAYBE;\n+ int upTo = -1;\n+ int primaryFieldUpTo = -1;\n+\n+ RangeNoGapsApproximation(\n+ DocIdSetIterator innerApproximation,\n+ DocValuesSkipper timestampSkipper,\n+ DocValuesSkipper primaryFieldSkipper,\n+ long minTimestamp,\n+ long maxTimestamp\n+ ) {\n+ this.innerApproximation = innerApproximation;\n+ this.timestampSkipper = timestampSkipper;\n+ this.primaryFieldSkipper = primaryFieldSkipper;\n+ this.minTimestamp = minTimestamp;\n+ this.maxTimestamp = maxTimestamp;\n+ }\n+\n+ @Override\n+ public int docID() {\n+ return doc;\n+ }\n+\n+ @Override\n+ public int nextDoc() throws IOException {\n+ return advance(docID() + 1);\n+ }\n+\n+ @Override\n+ public int advance(int target) throws IOException {\n+ while (true) {\n+ if (target > upTo) {\n+ timestampSkipper.advance(target);\n+ upTo = timestampSkipper.maxDocID(0);\n+ if (upTo == DocIdSetIterator.NO_MORE_DOCS) {\n+ return doc = DocIdSetIterator.NO_MORE_DOCS;\n+ }\n+ match = match(0);\n+\n+ // If we have a YES or NO decision, see if we still have the same decision on a higher\n+ // level (= on a wider range of doc IDs)\n+ int nextLevel = 1;\n+ while (match != Match.MAYBE && nextLevel < timestampSkipper.numLevels() && match == match(nextLevel)) {\n+ upTo = timestampSkipper.maxDocID(nextLevel);\n+ nextLevel++;\n+ }\n+ }\n+ switch (match) {\n+ case YES:\n+ return doc = target;\n+ case MAYBE:\n+ if (target > innerApproximation.docID()) {\n+ target = innerApproximation.advance(target);", - "comment_created_at": "2025-05-21T07:05:52+00:00", - "comment_author": "iverase", - "comment_body": "I see now that it is equivalent. Would it make sense to make innerApproximation a NumericDocValues and remove the reference from the parent class? ", - "pr_file_module": null - }, - { - "comment_id": "2111161746", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 127260, - "pr_file": "server/src/main/java/org/elasticsearch/lucene/queries/TimestampTwoPhaseIterator.java", - "discussion_id": "2099445576", - "commented_code": "@@ -0,0 +1,207 @@\n+/*\n+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one\n+ * or more contributor license agreements. Licensed under the \"Elastic License\n+ * 2.0\", the \"GNU Affero General Public License v3.0 only\", and the \"Server Side\n+ * Public License v 1\"; you may not use this file except in compliance with, at\n+ * your election, the \"Elastic License 2.0\", the \"GNU Affero General Public\n+ * License v3.0 only\", or the \"Server Side Public License, v 1\".\n+ */\n+package org.elasticsearch.lucene.queries;\n+\n+import org.apache.lucene.index.DocValuesSkipper;\n+import org.apache.lucene.index.NumericDocValues;\n+import org.apache.lucene.search.DocIdSetIterator;\n+import org.apache.lucene.search.TwoPhaseIterator;\n+\n+import java.io.IOException;\n+\n+/**\n+ * Based on {@link org.apache.lucene.search.DocValuesRangeIterator} but modified for time series and logsdb use cases.\n+ *

\n+ * The @timestamp field always has exactly one value and all documents always have a @timestamp value.\n+ * Additionally, the @timestamp field is always the secondary index sort field and sort order is always descending.\n+ *

\n+ * This makes the doc value skipper on @timestamp field less effective, and so the doc value skipper for the first index sort field is\n+ * also used to skip to the next primary sort value if for the current skipper represents one value (min and max value are the same) and\n+ * the current value is lower than minTimestamp.\n+ */\n+final class TimestampTwoPhaseIterator extends TwoPhaseIterator {\n+\n+ private final RangeNoGapsApproximation approximation;\n+ private final NumericDocValues timestamps;\n+\n+ private final long minTimestamp;\n+ private final long maxTimestamp;\n+\n+ TimestampTwoPhaseIterator(\n+ NumericDocValues timestamps,\n+ DocValuesSkipper timestampSkipper,\n+ DocValuesSkipper primaryFieldSkipper,\n+ long minTimestamp,\n+ long maxTimestamp\n+ ) {\n+ super(new RangeNoGapsApproximation(timestamps, timestampSkipper, primaryFieldSkipper, minTimestamp, maxTimestamp));\n+ this.approximation = (RangeNoGapsApproximation) approximation();\n+ this.timestamps = timestamps;\n+ this.minTimestamp = minTimestamp;\n+ this.maxTimestamp = maxTimestamp;\n+ }\n+\n+ static final class RangeNoGapsApproximation extends DocIdSetIterator {\n+\n+ private final DocIdSetIterator innerApproximation;\n+\n+ final DocValuesSkipper timestampSkipper;\n+ final DocValuesSkipper primaryFieldSkipper;\n+ final long minTimestamp;\n+ final long maxTimestamp;\n+\n+ private int doc = -1;\n+\n+ // Track a decision for all doc IDs between the current doc ID and upTo inclusive.\n+ Match match = Match.MAYBE;\n+ int upTo = -1;\n+ int primaryFieldUpTo = -1;\n+\n+ RangeNoGapsApproximation(\n+ DocIdSetIterator innerApproximation,\n+ DocValuesSkipper timestampSkipper,\n+ DocValuesSkipper primaryFieldSkipper,\n+ long minTimestamp,\n+ long maxTimestamp\n+ ) {\n+ this.innerApproximation = innerApproximation;\n+ this.timestampSkipper = timestampSkipper;\n+ this.primaryFieldSkipper = primaryFieldSkipper;\n+ this.minTimestamp = minTimestamp;\n+ this.maxTimestamp = maxTimestamp;\n+ }\n+\n+ @Override\n+ public int docID() {\n+ return doc;\n+ }\n+\n+ @Override\n+ public int nextDoc() throws IOException {\n+ return advance(docID() + 1);\n+ }\n+\n+ @Override\n+ public int advance(int target) throws IOException {\n+ while (true) {\n+ if (target > upTo) {\n+ timestampSkipper.advance(target);\n+ upTo = timestampSkipper.maxDocID(0);\n+ if (upTo == DocIdSetIterator.NO_MORE_DOCS) {\n+ return doc = DocIdSetIterator.NO_MORE_DOCS;\n+ }\n+ match = match(0);\n+\n+ // If we have a YES or NO decision, see if we still have the same decision on a higher\n+ // level (= on a wider range of doc IDs)\n+ int nextLevel = 1;\n+ while (match != Match.MAYBE && nextLevel < timestampSkipper.numLevels() && match == match(nextLevel)) {\n+ upTo = timestampSkipper.maxDocID(nextLevel);\n+ nextLevel++;\n+ }\n+ }\n+ switch (match) {\n+ case YES:\n+ return doc = target;\n+ case MAYBE:\n+ if (target > innerApproximation.docID()) {\n+ target = innerApproximation.advance(target);", - "comment_created_at": "2025-05-28T07:37:57+00:00", - "comment_author": "martijnvg", - "comment_body": "done: 24819c4929b7591fdb9795333cf4d1e3329cdfc3", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2099475391", - "pr_number": 127260, - "pr_file": "server/src/main/java/org/elasticsearch/lucene/queries/TimestampTwoPhaseIterator.java", - "created_at": "2025-05-21T06:38:24+00:00", - "commented_code": "/*\n * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one\n * or more contributor license agreements. Licensed under the \"Elastic License\n * 2.0\", the \"GNU Affero General Public License v3.0 only\", and the \"Server Side\n * Public License v 1\"; you may not use this file except in compliance with, at\n * your election, the \"Elastic License 2.0\", the \"GNU Affero General Public\n * License v3.0 only\", or the \"Server Side Public License, v 1\".\n */\npackage org.elasticsearch.lucene.queries;\n\nimport org.apache.lucene.index.DocValuesSkipper;\nimport org.apache.lucene.index.NumericDocValues;\nimport org.apache.lucene.search.DocIdSetIterator;\nimport org.apache.lucene.search.TwoPhaseIterator;\n\nimport java.io.IOException;\n\n/**\n * Based on {@link org.apache.lucene.search.DocValuesRangeIterator} but modified for time series and logsdb use cases.\n *

\n * The @timestamp field always has exactly one value and all documents always have a @timestamp value.\n * Additionally, the @timestamp field is always the secondary index sort field and sort order is always descending.\n *

\n * This makes the doc value skipper on @timestamp field less effective, and so the doc value skipper for the first index sort field is\n * also used to skip to the next primary sort value if for the current skipper represents one value (min and max value are the same) and\n * the current value is lower than minTimestamp.\n */\nfinal class TimestampTwoPhaseIterator extends TwoPhaseIterator {\n\n private final RangeNoGapsApproximation approximation;\n private final NumericDocValues timestamps;\n\n private final long minTimestamp;\n private final long maxTimestamp;\n\n TimestampTwoPhaseIterator(\n NumericDocValues timestamps,\n DocValuesSkipper timestampSkipper,\n DocValuesSkipper primaryFieldSkipper,\n long minTimestamp,\n long maxTimestamp\n ) {\n super(new RangeNoGapsApproximation(timestamps, timestampSkipper, primaryFieldSkipper, minTimestamp, maxTimestamp));\n this.approximation = (RangeNoGapsApproximation) approximation();\n this.timestamps = timestamps;\n this.minTimestamp = minTimestamp;\n this.maxTimestamp = maxTimestamp;\n }\n\n static final class RangeNoGapsApproximation extends DocIdSetIterator {\n\n private final DocIdSetIterator innerApproximation;\n\n final DocValuesSkipper timestampSkipper;\n final DocValuesSkipper primaryFieldSkipper;\n final long minTimestamp;\n final long maxTimestamp;\n\n private int doc = -1;\n\n // Track a decision for all doc IDs between the current doc ID and upTo inclusive.\n Match match = Match.MAYBE;\n int upTo = -1;\n int primaryFieldUpTo = -1;\n\n RangeNoGapsApproximation(\n DocIdSetIterator innerApproximation,\n DocValuesSkipper timestampSkipper,\n DocValuesSkipper primaryFieldSkipper,\n long minTimestamp,\n long maxTimestamp\n ) {\n this.innerApproximation = innerApproximation;\n this.timestampSkipper = timestampSkipper;\n this.primaryFieldSkipper = primaryFieldSkipper;\n this.minTimestamp = minTimestamp;\n this.maxTimestamp = maxTimestamp;\n }\n\n @Override\n public int docID() {\n return doc;\n }\n\n @Override\n public int nextDoc() throws IOException {\n return advance(docID() + 1);\n }\n\n @Override\n public int advance(int target) throws IOException {\n while (true) {\n if (target > upTo) {\n timestampSkipper.advance(target);\n upTo = timestampSkipper.maxDocID(0);\n if (upTo == DocIdSetIterator.NO_MORE_DOCS) {\n return doc = DocIdSetIterator.NO_MORE_DOCS;\n }\n match = match(0);\n\n // If we have a YES or NO decision, see if we still have the same decision on a higher\n // level (= on a wider range of doc IDs)\n int nextLevel = 1;\n while (match != Match.MAYBE && nextLevel < timestampSkipper.numLevels() && match == match(nextLevel)) {\n upTo = timestampSkipper.maxDocID(nextLevel);\n nextLevel++;\n }\n }\n switch (match) {\n case YES:\n return doc = target;\n case MAYBE:\n if (target > innerApproximation.docID()) {\n target = innerApproximation.advance(target);\n }\n if (target <= upTo) {\n return doc = target;\n }\n break;\n case NO_AND_SKIP:\n if (target > primaryFieldUpTo) {\n primaryFieldSkipper.advance(target);\n for (int level = 0; level < primaryFieldSkipper.numLevels(); level++) {\n if (primaryFieldSkipper.minValue(level) == primaryFieldSkipper.maxValue(level)) {\n primaryFieldUpTo = primaryFieldSkipper.maxDocID(level);\n } else {\n break;\n }\n }\n if (primaryFieldUpTo > upTo) {\n upTo = primaryFieldUpTo;\n }\n }\n if (upTo == DocIdSetIterator.NO_MORE_DOCS) {\n return doc = NO_MORE_DOCS;\n }\n target = upTo + 1;\n break;\n case NO:\n if (upTo == DocIdSetIterator.NO_MORE_DOCS) {\n return doc = NO_MORE_DOCS;\n }\n target = upTo + 1;\n break;\n default:\n throw new AssertionError(\"Unknown enum constant: \" + match);\n }\n }\n }\n\n @Override\n public long cost() {\n return innerApproximation.cost();\n }\n\n Match match(int level) {\n long minValue = timestampSkipper.minValue(level);\n long maxValue = timestampSkipper.maxValue(level);\n if (minValue > maxTimestamp) {\n return Match.NO;\n } else if (maxValue < minTimestamp) {\n return Match.NO_AND_SKIP;\n } else if (minValue >= minTimestamp && maxValue <= maxTimestamp) {\n return Match.YES;\n } else {\n return Match.MAYBE;\n }\n }\n\n }\n\n @Override\n public boolean matches() throws IOException {\n return switch (approximation.match) {\n case YES -> true;\n case MAYBE -> {\n final long value = timestamps.longValue();\n yield value >= minTimestamp && value <= maxTimestamp;", - "repo_full_name": "elastic/elasticsearch", - "discussion_comments": [ - { - "comment_id": "2099475391", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 127260, - "pr_file": "server/src/main/java/org/elasticsearch/lucene/queries/TimestampTwoPhaseIterator.java", - "discussion_id": "2099475391", - "commented_code": "@@ -0,0 +1,207 @@\n+/*\n+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one\n+ * or more contributor license agreements. Licensed under the \"Elastic License\n+ * 2.0\", the \"GNU Affero General Public License v3.0 only\", and the \"Server Side\n+ * Public License v 1\"; you may not use this file except in compliance with, at\n+ * your election, the \"Elastic License 2.0\", the \"GNU Affero General Public\n+ * License v3.0 only\", or the \"Server Side Public License, v 1\".\n+ */\n+package org.elasticsearch.lucene.queries;\n+\n+import org.apache.lucene.index.DocValuesSkipper;\n+import org.apache.lucene.index.NumericDocValues;\n+import org.apache.lucene.search.DocIdSetIterator;\n+import org.apache.lucene.search.TwoPhaseIterator;\n+\n+import java.io.IOException;\n+\n+/**\n+ * Based on {@link org.apache.lucene.search.DocValuesRangeIterator} but modified for time series and logsdb use cases.\n+ *

\n+ * The @timestamp field always has exactly one value and all documents always have a @timestamp value.\n+ * Additionally, the @timestamp field is always the secondary index sort field and sort order is always descending.\n+ *

\n+ * This makes the doc value skipper on @timestamp field less effective, and so the doc value skipper for the first index sort field is\n+ * also used to skip to the next primary sort value if for the current skipper represents one value (min and max value are the same) and\n+ * the current value is lower than minTimestamp.\n+ */\n+final class TimestampTwoPhaseIterator extends TwoPhaseIterator {\n+\n+ private final RangeNoGapsApproximation approximation;\n+ private final NumericDocValues timestamps;\n+\n+ private final long minTimestamp;\n+ private final long maxTimestamp;\n+\n+ TimestampTwoPhaseIterator(\n+ NumericDocValues timestamps,\n+ DocValuesSkipper timestampSkipper,\n+ DocValuesSkipper primaryFieldSkipper,\n+ long minTimestamp,\n+ long maxTimestamp\n+ ) {\n+ super(new RangeNoGapsApproximation(timestamps, timestampSkipper, primaryFieldSkipper, minTimestamp, maxTimestamp));\n+ this.approximation = (RangeNoGapsApproximation) approximation();\n+ this.timestamps = timestamps;\n+ this.minTimestamp = minTimestamp;\n+ this.maxTimestamp = maxTimestamp;\n+ }\n+\n+ static final class RangeNoGapsApproximation extends DocIdSetIterator {\n+\n+ private final DocIdSetIterator innerApproximation;\n+\n+ final DocValuesSkipper timestampSkipper;\n+ final DocValuesSkipper primaryFieldSkipper;\n+ final long minTimestamp;\n+ final long maxTimestamp;\n+\n+ private int doc = -1;\n+\n+ // Track a decision for all doc IDs between the current doc ID and upTo inclusive.\n+ Match match = Match.MAYBE;\n+ int upTo = -1;\n+ int primaryFieldUpTo = -1;\n+\n+ RangeNoGapsApproximation(\n+ DocIdSetIterator innerApproximation,\n+ DocValuesSkipper timestampSkipper,\n+ DocValuesSkipper primaryFieldSkipper,\n+ long minTimestamp,\n+ long maxTimestamp\n+ ) {\n+ this.innerApproximation = innerApproximation;\n+ this.timestampSkipper = timestampSkipper;\n+ this.primaryFieldSkipper = primaryFieldSkipper;\n+ this.minTimestamp = minTimestamp;\n+ this.maxTimestamp = maxTimestamp;\n+ }\n+\n+ @Override\n+ public int docID() {\n+ return doc;\n+ }\n+\n+ @Override\n+ public int nextDoc() throws IOException {\n+ return advance(docID() + 1);\n+ }\n+\n+ @Override\n+ public int advance(int target) throws IOException {\n+ while (true) {\n+ if (target > upTo) {\n+ timestampSkipper.advance(target);\n+ upTo = timestampSkipper.maxDocID(0);\n+ if (upTo == DocIdSetIterator.NO_MORE_DOCS) {\n+ return doc = DocIdSetIterator.NO_MORE_DOCS;\n+ }\n+ match = match(0);\n+\n+ // If we have a YES or NO decision, see if we still have the same decision on a higher\n+ // level (= on a wider range of doc IDs)\n+ int nextLevel = 1;\n+ while (match != Match.MAYBE && nextLevel < timestampSkipper.numLevels() && match == match(nextLevel)) {\n+ upTo = timestampSkipper.maxDocID(nextLevel);\n+ nextLevel++;\n+ }\n+ }\n+ switch (match) {\n+ case YES:\n+ return doc = target;\n+ case MAYBE:\n+ if (target > innerApproximation.docID()) {\n+ target = innerApproximation.advance(target);\n+ }\n+ if (target <= upTo) {\n+ return doc = target;\n+ }\n+ break;\n+ case NO_AND_SKIP:\n+ if (target > primaryFieldUpTo) {\n+ primaryFieldSkipper.advance(target);\n+ for (int level = 0; level < primaryFieldSkipper.numLevels(); level++) {\n+ if (primaryFieldSkipper.minValue(level) == primaryFieldSkipper.maxValue(level)) {\n+ primaryFieldUpTo = primaryFieldSkipper.maxDocID(level);\n+ } else {\n+ break;\n+ }\n+ }\n+ if (primaryFieldUpTo > upTo) {\n+ upTo = primaryFieldUpTo;\n+ }\n+ }\n+ if (upTo == DocIdSetIterator.NO_MORE_DOCS) {\n+ return doc = NO_MORE_DOCS;\n+ }\n+ target = upTo + 1;\n+ break;\n+ case NO:\n+ if (upTo == DocIdSetIterator.NO_MORE_DOCS) {\n+ return doc = NO_MORE_DOCS;\n+ }\n+ target = upTo + 1;\n+ break;\n+ default:\n+ throw new AssertionError(\"Unknown enum constant: \" + match);\n+ }\n+ }\n+ }\n+\n+ @Override\n+ public long cost() {\n+ return innerApproximation.cost();\n+ }\n+\n+ Match match(int level) {\n+ long minValue = timestampSkipper.minValue(level);\n+ long maxValue = timestampSkipper.maxValue(level);\n+ if (minValue > maxTimestamp) {\n+ return Match.NO;\n+ } else if (maxValue < minTimestamp) {\n+ return Match.NO_AND_SKIP;\n+ } else if (minValue >= minTimestamp && maxValue <= maxTimestamp) {\n+ return Match.YES;\n+ } else {\n+ return Match.MAYBE;\n+ }\n+ }\n+\n+ }\n+\n+ @Override\n+ public boolean matches() throws IOException {\n+ return switch (approximation.match) {\n+ case YES -> true;\n+ case MAYBE -> {\n+ final long value = timestamps.longValue();\n+ yield value >= minTimestamp && value <= maxTimestamp;", - "comment_created_at": "2025-05-21T06:38:24+00:00", - "comment_author": "iverase", - "comment_body": "if value < minTimestamp, we know we will not be matching any more documents, maybe we can move approximation.match to NO?", - "pr_file_module": null - }, - { - "comment_id": "2099478400", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 127260, - "pr_file": "server/src/main/java/org/elasticsearch/lucene/queries/TimestampTwoPhaseIterator.java", - "discussion_id": "2099475391", - "commented_code": "@@ -0,0 +1,207 @@\n+/*\n+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one\n+ * or more contributor license agreements. Licensed under the \"Elastic License\n+ * 2.0\", the \"GNU Affero General Public License v3.0 only\", and the \"Server Side\n+ * Public License v 1\"; you may not use this file except in compliance with, at\n+ * your election, the \"Elastic License 2.0\", the \"GNU Affero General Public\n+ * License v3.0 only\", or the \"Server Side Public License, v 1\".\n+ */\n+package org.elasticsearch.lucene.queries;\n+\n+import org.apache.lucene.index.DocValuesSkipper;\n+import org.apache.lucene.index.NumericDocValues;\n+import org.apache.lucene.search.DocIdSetIterator;\n+import org.apache.lucene.search.TwoPhaseIterator;\n+\n+import java.io.IOException;\n+\n+/**\n+ * Based on {@link org.apache.lucene.search.DocValuesRangeIterator} but modified for time series and logsdb use cases.\n+ *

\n+ * The @timestamp field always has exactly one value and all documents always have a @timestamp value.\n+ * Additionally, the @timestamp field is always the secondary index sort field and sort order is always descending.\n+ *

\n+ * This makes the doc value skipper on @timestamp field less effective, and so the doc value skipper for the first index sort field is\n+ * also used to skip to the next primary sort value if for the current skipper represents one value (min and max value are the same) and\n+ * the current value is lower than minTimestamp.\n+ */\n+final class TimestampTwoPhaseIterator extends TwoPhaseIterator {\n+\n+ private final RangeNoGapsApproximation approximation;\n+ private final NumericDocValues timestamps;\n+\n+ private final long minTimestamp;\n+ private final long maxTimestamp;\n+\n+ TimestampTwoPhaseIterator(\n+ NumericDocValues timestamps,\n+ DocValuesSkipper timestampSkipper,\n+ DocValuesSkipper primaryFieldSkipper,\n+ long minTimestamp,\n+ long maxTimestamp\n+ ) {\n+ super(new RangeNoGapsApproximation(timestamps, timestampSkipper, primaryFieldSkipper, minTimestamp, maxTimestamp));\n+ this.approximation = (RangeNoGapsApproximation) approximation();\n+ this.timestamps = timestamps;\n+ this.minTimestamp = minTimestamp;\n+ this.maxTimestamp = maxTimestamp;\n+ }\n+\n+ static final class RangeNoGapsApproximation extends DocIdSetIterator {\n+\n+ private final DocIdSetIterator innerApproximation;\n+\n+ final DocValuesSkipper timestampSkipper;\n+ final DocValuesSkipper primaryFieldSkipper;\n+ final long minTimestamp;\n+ final long maxTimestamp;\n+\n+ private int doc = -1;\n+\n+ // Track a decision for all doc IDs between the current doc ID and upTo inclusive.\n+ Match match = Match.MAYBE;\n+ int upTo = -1;\n+ int primaryFieldUpTo = -1;\n+\n+ RangeNoGapsApproximation(\n+ DocIdSetIterator innerApproximation,\n+ DocValuesSkipper timestampSkipper,\n+ DocValuesSkipper primaryFieldSkipper,\n+ long minTimestamp,\n+ long maxTimestamp\n+ ) {\n+ this.innerApproximation = innerApproximation;\n+ this.timestampSkipper = timestampSkipper;\n+ this.primaryFieldSkipper = primaryFieldSkipper;\n+ this.minTimestamp = minTimestamp;\n+ this.maxTimestamp = maxTimestamp;\n+ }\n+\n+ @Override\n+ public int docID() {\n+ return doc;\n+ }\n+\n+ @Override\n+ public int nextDoc() throws IOException {\n+ return advance(docID() + 1);\n+ }\n+\n+ @Override\n+ public int advance(int target) throws IOException {\n+ while (true) {\n+ if (target > upTo) {\n+ timestampSkipper.advance(target);\n+ upTo = timestampSkipper.maxDocID(0);\n+ if (upTo == DocIdSetIterator.NO_MORE_DOCS) {\n+ return doc = DocIdSetIterator.NO_MORE_DOCS;\n+ }\n+ match = match(0);\n+\n+ // If we have a YES or NO decision, see if we still have the same decision on a higher\n+ // level (= on a wider range of doc IDs)\n+ int nextLevel = 1;\n+ while (match != Match.MAYBE && nextLevel < timestampSkipper.numLevels() && match == match(nextLevel)) {\n+ upTo = timestampSkipper.maxDocID(nextLevel);\n+ nextLevel++;\n+ }\n+ }\n+ switch (match) {\n+ case YES:\n+ return doc = target;\n+ case MAYBE:\n+ if (target > innerApproximation.docID()) {\n+ target = innerApproximation.advance(target);\n+ }\n+ if (target <= upTo) {\n+ return doc = target;\n+ }\n+ break;\n+ case NO_AND_SKIP:\n+ if (target > primaryFieldUpTo) {\n+ primaryFieldSkipper.advance(target);\n+ for (int level = 0; level < primaryFieldSkipper.numLevels(); level++) {\n+ if (primaryFieldSkipper.minValue(level) == primaryFieldSkipper.maxValue(level)) {\n+ primaryFieldUpTo = primaryFieldSkipper.maxDocID(level);\n+ } else {\n+ break;\n+ }\n+ }\n+ if (primaryFieldUpTo > upTo) {\n+ upTo = primaryFieldUpTo;\n+ }\n+ }\n+ if (upTo == DocIdSetIterator.NO_MORE_DOCS) {\n+ return doc = NO_MORE_DOCS;\n+ }\n+ target = upTo + 1;\n+ break;\n+ case NO:\n+ if (upTo == DocIdSetIterator.NO_MORE_DOCS) {\n+ return doc = NO_MORE_DOCS;\n+ }\n+ target = upTo + 1;\n+ break;\n+ default:\n+ throw new AssertionError(\"Unknown enum constant: \" + match);\n+ }\n+ }\n+ }\n+\n+ @Override\n+ public long cost() {\n+ return innerApproximation.cost();\n+ }\n+\n+ Match match(int level) {\n+ long minValue = timestampSkipper.minValue(level);\n+ long maxValue = timestampSkipper.maxValue(level);\n+ if (minValue > maxTimestamp) {\n+ return Match.NO;\n+ } else if (maxValue < minTimestamp) {\n+ return Match.NO_AND_SKIP;\n+ } else if (minValue >= minTimestamp && maxValue <= maxTimestamp) {\n+ return Match.YES;\n+ } else {\n+ return Match.MAYBE;\n+ }\n+ }\n+\n+ }\n+\n+ @Override\n+ public boolean matches() throws IOException {\n+ return switch (approximation.match) {\n+ case YES -> true;\n+ case MAYBE -> {\n+ final long value = timestamps.longValue();\n+ yield value >= minTimestamp && value <= maxTimestamp;", - "comment_created_at": "2025-05-21T06:40:02+00:00", - "comment_author": "iverase", - "comment_body": "Although this is only true if we are always in the same primary sort.", - "pr_file_module": null - }, - { - "comment_id": "2099497432", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 127260, - "pr_file": "server/src/main/java/org/elasticsearch/lucene/queries/TimestampTwoPhaseIterator.java", - "discussion_id": "2099475391", - "commented_code": "@@ -0,0 +1,207 @@\n+/*\n+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one\n+ * or more contributor license agreements. Licensed under the \"Elastic License\n+ * 2.0\", the \"GNU Affero General Public License v3.0 only\", and the \"Server Side\n+ * Public License v 1\"; you may not use this file except in compliance with, at\n+ * your election, the \"Elastic License 2.0\", the \"GNU Affero General Public\n+ * License v3.0 only\", or the \"Server Side Public License, v 1\".\n+ */\n+package org.elasticsearch.lucene.queries;\n+\n+import org.apache.lucene.index.DocValuesSkipper;\n+import org.apache.lucene.index.NumericDocValues;\n+import org.apache.lucene.search.DocIdSetIterator;\n+import org.apache.lucene.search.TwoPhaseIterator;\n+\n+import java.io.IOException;\n+\n+/**\n+ * Based on {@link org.apache.lucene.search.DocValuesRangeIterator} but modified for time series and logsdb use cases.\n+ *

\n+ * The @timestamp field always has exactly one value and all documents always have a @timestamp value.\n+ * Additionally, the @timestamp field is always the secondary index sort field and sort order is always descending.\n+ *

\n+ * This makes the doc value skipper on @timestamp field less effective, and so the doc value skipper for the first index sort field is\n+ * also used to skip to the next primary sort value if for the current skipper represents one value (min and max value are the same) and\n+ * the current value is lower than minTimestamp.\n+ */\n+final class TimestampTwoPhaseIterator extends TwoPhaseIterator {\n+\n+ private final RangeNoGapsApproximation approximation;\n+ private final NumericDocValues timestamps;\n+\n+ private final long minTimestamp;\n+ private final long maxTimestamp;\n+\n+ TimestampTwoPhaseIterator(\n+ NumericDocValues timestamps,\n+ DocValuesSkipper timestampSkipper,\n+ DocValuesSkipper primaryFieldSkipper,\n+ long minTimestamp,\n+ long maxTimestamp\n+ ) {\n+ super(new RangeNoGapsApproximation(timestamps, timestampSkipper, primaryFieldSkipper, minTimestamp, maxTimestamp));\n+ this.approximation = (RangeNoGapsApproximation) approximation();\n+ this.timestamps = timestamps;\n+ this.minTimestamp = minTimestamp;\n+ this.maxTimestamp = maxTimestamp;\n+ }\n+\n+ static final class RangeNoGapsApproximation extends DocIdSetIterator {\n+\n+ private final DocIdSetIterator innerApproximation;\n+\n+ final DocValuesSkipper timestampSkipper;\n+ final DocValuesSkipper primaryFieldSkipper;\n+ final long minTimestamp;\n+ final long maxTimestamp;\n+\n+ private int doc = -1;\n+\n+ // Track a decision for all doc IDs between the current doc ID and upTo inclusive.\n+ Match match = Match.MAYBE;\n+ int upTo = -1;\n+ int primaryFieldUpTo = -1;\n+\n+ RangeNoGapsApproximation(\n+ DocIdSetIterator innerApproximation,\n+ DocValuesSkipper timestampSkipper,\n+ DocValuesSkipper primaryFieldSkipper,\n+ long minTimestamp,\n+ long maxTimestamp\n+ ) {\n+ this.innerApproximation = innerApproximation;\n+ this.timestampSkipper = timestampSkipper;\n+ this.primaryFieldSkipper = primaryFieldSkipper;\n+ this.minTimestamp = minTimestamp;\n+ this.maxTimestamp = maxTimestamp;\n+ }\n+\n+ @Override\n+ public int docID() {\n+ return doc;\n+ }\n+\n+ @Override\n+ public int nextDoc() throws IOException {\n+ return advance(docID() + 1);\n+ }\n+\n+ @Override\n+ public int advance(int target) throws IOException {\n+ while (true) {\n+ if (target > upTo) {\n+ timestampSkipper.advance(target);\n+ upTo = timestampSkipper.maxDocID(0);\n+ if (upTo == DocIdSetIterator.NO_MORE_DOCS) {\n+ return doc = DocIdSetIterator.NO_MORE_DOCS;\n+ }\n+ match = match(0);\n+\n+ // If we have a YES or NO decision, see if we still have the same decision on a higher\n+ // level (= on a wider range of doc IDs)\n+ int nextLevel = 1;\n+ while (match != Match.MAYBE && nextLevel < timestampSkipper.numLevels() && match == match(nextLevel)) {\n+ upTo = timestampSkipper.maxDocID(nextLevel);\n+ nextLevel++;\n+ }\n+ }\n+ switch (match) {\n+ case YES:\n+ return doc = target;\n+ case MAYBE:\n+ if (target > innerApproximation.docID()) {\n+ target = innerApproximation.advance(target);\n+ }\n+ if (target <= upTo) {\n+ return doc = target;\n+ }\n+ break;\n+ case NO_AND_SKIP:\n+ if (target > primaryFieldUpTo) {\n+ primaryFieldSkipper.advance(target);\n+ for (int level = 0; level < primaryFieldSkipper.numLevels(); level++) {\n+ if (primaryFieldSkipper.minValue(level) == primaryFieldSkipper.maxValue(level)) {\n+ primaryFieldUpTo = primaryFieldSkipper.maxDocID(level);\n+ } else {\n+ break;\n+ }\n+ }\n+ if (primaryFieldUpTo > upTo) {\n+ upTo = primaryFieldUpTo;\n+ }\n+ }\n+ if (upTo == DocIdSetIterator.NO_MORE_DOCS) {\n+ return doc = NO_MORE_DOCS;\n+ }\n+ target = upTo + 1;\n+ break;\n+ case NO:\n+ if (upTo == DocIdSetIterator.NO_MORE_DOCS) {\n+ return doc = NO_MORE_DOCS;\n+ }\n+ target = upTo + 1;\n+ break;\n+ default:\n+ throw new AssertionError(\"Unknown enum constant: \" + match);\n+ }\n+ }\n+ }\n+\n+ @Override\n+ public long cost() {\n+ return innerApproximation.cost();\n+ }\n+\n+ Match match(int level) {\n+ long minValue = timestampSkipper.minValue(level);\n+ long maxValue = timestampSkipper.maxValue(level);\n+ if (minValue > maxTimestamp) {\n+ return Match.NO;\n+ } else if (maxValue < minTimestamp) {\n+ return Match.NO_AND_SKIP;\n+ } else if (minValue >= minTimestamp && maxValue <= maxTimestamp) {\n+ return Match.YES;\n+ } else {\n+ return Match.MAYBE;\n+ }\n+ }\n+\n+ }\n+\n+ @Override\n+ public boolean matches() throws IOException {\n+ return switch (approximation.match) {\n+ case YES -> true;\n+ case MAYBE -> {\n+ final long value = timestamps.longValue();\n+ yield value >= minTimestamp && value <= maxTimestamp;", - "comment_created_at": "2025-05-21T06:51:10+00:00", - "comment_author": "iverase", - "comment_body": "I wonder if we should have two MAYBE options, one that we know we are always in the same primary sort and another one with mixed primary sort. In that case we can apply the optimization in the first case (which should be the common case).", - "pr_file_module": null - }, - { - "comment_id": "2099593498", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 127260, - "pr_file": "server/src/main/java/org/elasticsearch/lucene/queries/TimestampTwoPhaseIterator.java", - "discussion_id": "2099475391", - "commented_code": "@@ -0,0 +1,207 @@\n+/*\n+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one\n+ * or more contributor license agreements. Licensed under the \"Elastic License\n+ * 2.0\", the \"GNU Affero General Public License v3.0 only\", and the \"Server Side\n+ * Public License v 1\"; you may not use this file except in compliance with, at\n+ * your election, the \"Elastic License 2.0\", the \"GNU Affero General Public\n+ * License v3.0 only\", or the \"Server Side Public License, v 1\".\n+ */\n+package org.elasticsearch.lucene.queries;\n+\n+import org.apache.lucene.index.DocValuesSkipper;\n+import org.apache.lucene.index.NumericDocValues;\n+import org.apache.lucene.search.DocIdSetIterator;\n+import org.apache.lucene.search.TwoPhaseIterator;\n+\n+import java.io.IOException;\n+\n+/**\n+ * Based on {@link org.apache.lucene.search.DocValuesRangeIterator} but modified for time series and logsdb use cases.\n+ *

\n+ * The @timestamp field always has exactly one value and all documents always have a @timestamp value.\n+ * Additionally, the @timestamp field is always the secondary index sort field and sort order is always descending.\n+ *

\n+ * This makes the doc value skipper on @timestamp field less effective, and so the doc value skipper for the first index sort field is\n+ * also used to skip to the next primary sort value if for the current skipper represents one value (min and max value are the same) and\n+ * the current value is lower than minTimestamp.\n+ */\n+final class TimestampTwoPhaseIterator extends TwoPhaseIterator {\n+\n+ private final RangeNoGapsApproximation approximation;\n+ private final NumericDocValues timestamps;\n+\n+ private final long minTimestamp;\n+ private final long maxTimestamp;\n+\n+ TimestampTwoPhaseIterator(\n+ NumericDocValues timestamps,\n+ DocValuesSkipper timestampSkipper,\n+ DocValuesSkipper primaryFieldSkipper,\n+ long minTimestamp,\n+ long maxTimestamp\n+ ) {\n+ super(new RangeNoGapsApproximation(timestamps, timestampSkipper, primaryFieldSkipper, minTimestamp, maxTimestamp));\n+ this.approximation = (RangeNoGapsApproximation) approximation();\n+ this.timestamps = timestamps;\n+ this.minTimestamp = minTimestamp;\n+ this.maxTimestamp = maxTimestamp;\n+ }\n+\n+ static final class RangeNoGapsApproximation extends DocIdSetIterator {\n+\n+ private final DocIdSetIterator innerApproximation;\n+\n+ final DocValuesSkipper timestampSkipper;\n+ final DocValuesSkipper primaryFieldSkipper;\n+ final long minTimestamp;\n+ final long maxTimestamp;\n+\n+ private int doc = -1;\n+\n+ // Track a decision for all doc IDs between the current doc ID and upTo inclusive.\n+ Match match = Match.MAYBE;\n+ int upTo = -1;\n+ int primaryFieldUpTo = -1;\n+\n+ RangeNoGapsApproximation(\n+ DocIdSetIterator innerApproximation,\n+ DocValuesSkipper timestampSkipper,\n+ DocValuesSkipper primaryFieldSkipper,\n+ long minTimestamp,\n+ long maxTimestamp\n+ ) {\n+ this.innerApproximation = innerApproximation;\n+ this.timestampSkipper = timestampSkipper;\n+ this.primaryFieldSkipper = primaryFieldSkipper;\n+ this.minTimestamp = minTimestamp;\n+ this.maxTimestamp = maxTimestamp;\n+ }\n+\n+ @Override\n+ public int docID() {\n+ return doc;\n+ }\n+\n+ @Override\n+ public int nextDoc() throws IOException {\n+ return advance(docID() + 1);\n+ }\n+\n+ @Override\n+ public int advance(int target) throws IOException {\n+ while (true) {\n+ if (target > upTo) {\n+ timestampSkipper.advance(target);\n+ upTo = timestampSkipper.maxDocID(0);\n+ if (upTo == DocIdSetIterator.NO_MORE_DOCS) {\n+ return doc = DocIdSetIterator.NO_MORE_DOCS;\n+ }\n+ match = match(0);\n+\n+ // If we have a YES or NO decision, see if we still have the same decision on a higher\n+ // level (= on a wider range of doc IDs)\n+ int nextLevel = 1;\n+ while (match != Match.MAYBE && nextLevel < timestampSkipper.numLevels() && match == match(nextLevel)) {\n+ upTo = timestampSkipper.maxDocID(nextLevel);\n+ nextLevel++;\n+ }\n+ }\n+ switch (match) {\n+ case YES:\n+ return doc = target;\n+ case MAYBE:\n+ if (target > innerApproximation.docID()) {\n+ target = innerApproximation.advance(target);\n+ }\n+ if (target <= upTo) {\n+ return doc = target;\n+ }\n+ break;\n+ case NO_AND_SKIP:\n+ if (target > primaryFieldUpTo) {\n+ primaryFieldSkipper.advance(target);\n+ for (int level = 0; level < primaryFieldSkipper.numLevels(); level++) {\n+ if (primaryFieldSkipper.minValue(level) == primaryFieldSkipper.maxValue(level)) {\n+ primaryFieldUpTo = primaryFieldSkipper.maxDocID(level);\n+ } else {\n+ break;\n+ }\n+ }\n+ if (primaryFieldUpTo > upTo) {\n+ upTo = primaryFieldUpTo;\n+ }\n+ }\n+ if (upTo == DocIdSetIterator.NO_MORE_DOCS) {\n+ return doc = NO_MORE_DOCS;\n+ }\n+ target = upTo + 1;\n+ break;\n+ case NO:\n+ if (upTo == DocIdSetIterator.NO_MORE_DOCS) {\n+ return doc = NO_MORE_DOCS;\n+ }\n+ target = upTo + 1;\n+ break;\n+ default:\n+ throw new AssertionError(\"Unknown enum constant: \" + match);\n+ }\n+ }\n+ }\n+\n+ @Override\n+ public long cost() {\n+ return innerApproximation.cost();\n+ }\n+\n+ Match match(int level) {\n+ long minValue = timestampSkipper.minValue(level);\n+ long maxValue = timestampSkipper.maxValue(level);\n+ if (minValue > maxTimestamp) {\n+ return Match.NO;\n+ } else if (maxValue < minTimestamp) {\n+ return Match.NO_AND_SKIP;\n+ } else if (minValue >= minTimestamp && maxValue <= maxTimestamp) {\n+ return Match.YES;\n+ } else {\n+ return Match.MAYBE;\n+ }\n+ }\n+\n+ }\n+\n+ @Override\n+ public boolean matches() throws IOException {\n+ return switch (approximation.match) {\n+ case YES -> true;\n+ case MAYBE -> {\n+ final long value = timestamps.longValue();\n+ yield value >= minTimestamp && value <= maxTimestamp;", - "comment_created_at": "2025-05-21T07:42:10+00:00", - "comment_author": "martijnvg", - "comment_body": "Interesting, let me take a look at this.", - "pr_file_module": null - }, - { - "comment_id": "2126409027", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 127260, - "pr_file": "server/src/main/java/org/elasticsearch/lucene/queries/TimestampTwoPhaseIterator.java", - "discussion_id": "2099475391", - "commented_code": "@@ -0,0 +1,207 @@\n+/*\n+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one\n+ * or more contributor license agreements. Licensed under the \"Elastic License\n+ * 2.0\", the \"GNU Affero General Public License v3.0 only\", and the \"Server Side\n+ * Public License v 1\"; you may not use this file except in compliance with, at\n+ * your election, the \"Elastic License 2.0\", the \"GNU Affero General Public\n+ * License v3.0 only\", or the \"Server Side Public License, v 1\".\n+ */\n+package org.elasticsearch.lucene.queries;\n+\n+import org.apache.lucene.index.DocValuesSkipper;\n+import org.apache.lucene.index.NumericDocValues;\n+import org.apache.lucene.search.DocIdSetIterator;\n+import org.apache.lucene.search.TwoPhaseIterator;\n+\n+import java.io.IOException;\n+\n+/**\n+ * Based on {@link org.apache.lucene.search.DocValuesRangeIterator} but modified for time series and logsdb use cases.\n+ *

\n+ * The @timestamp field always has exactly one value and all documents always have a @timestamp value.\n+ * Additionally, the @timestamp field is always the secondary index sort field and sort order is always descending.\n+ *

\n+ * This makes the doc value skipper on @timestamp field less effective, and so the doc value skipper for the first index sort field is\n+ * also used to skip to the next primary sort value if for the current skipper represents one value (min and max value are the same) and\n+ * the current value is lower than minTimestamp.\n+ */\n+final class TimestampTwoPhaseIterator extends TwoPhaseIterator {\n+\n+ private final RangeNoGapsApproximation approximation;\n+ private final NumericDocValues timestamps;\n+\n+ private final long minTimestamp;\n+ private final long maxTimestamp;\n+\n+ TimestampTwoPhaseIterator(\n+ NumericDocValues timestamps,\n+ DocValuesSkipper timestampSkipper,\n+ DocValuesSkipper primaryFieldSkipper,\n+ long minTimestamp,\n+ long maxTimestamp\n+ ) {\n+ super(new RangeNoGapsApproximation(timestamps, timestampSkipper, primaryFieldSkipper, minTimestamp, maxTimestamp));\n+ this.approximation = (RangeNoGapsApproximation) approximation();\n+ this.timestamps = timestamps;\n+ this.minTimestamp = minTimestamp;\n+ this.maxTimestamp = maxTimestamp;\n+ }\n+\n+ static final class RangeNoGapsApproximation extends DocIdSetIterator {\n+\n+ private final DocIdSetIterator innerApproximation;\n+\n+ final DocValuesSkipper timestampSkipper;\n+ final DocValuesSkipper primaryFieldSkipper;\n+ final long minTimestamp;\n+ final long maxTimestamp;\n+\n+ private int doc = -1;\n+\n+ // Track a decision for all doc IDs between the current doc ID and upTo inclusive.\n+ Match match = Match.MAYBE;\n+ int upTo = -1;\n+ int primaryFieldUpTo = -1;\n+\n+ RangeNoGapsApproximation(\n+ DocIdSetIterator innerApproximation,\n+ DocValuesSkipper timestampSkipper,\n+ DocValuesSkipper primaryFieldSkipper,\n+ long minTimestamp,\n+ long maxTimestamp\n+ ) {\n+ this.innerApproximation = innerApproximation;\n+ this.timestampSkipper = timestampSkipper;\n+ this.primaryFieldSkipper = primaryFieldSkipper;\n+ this.minTimestamp = minTimestamp;\n+ this.maxTimestamp = maxTimestamp;\n+ }\n+\n+ @Override\n+ public int docID() {\n+ return doc;\n+ }\n+\n+ @Override\n+ public int nextDoc() throws IOException {\n+ return advance(docID() + 1);\n+ }\n+\n+ @Override\n+ public int advance(int target) throws IOException {\n+ while (true) {\n+ if (target > upTo) {\n+ timestampSkipper.advance(target);\n+ upTo = timestampSkipper.maxDocID(0);\n+ if (upTo == DocIdSetIterator.NO_MORE_DOCS) {\n+ return doc = DocIdSetIterator.NO_MORE_DOCS;\n+ }\n+ match = match(0);\n+\n+ // If we have a YES or NO decision, see if we still have the same decision on a higher\n+ // level (= on a wider range of doc IDs)\n+ int nextLevel = 1;\n+ while (match != Match.MAYBE && nextLevel < timestampSkipper.numLevels() && match == match(nextLevel)) {\n+ upTo = timestampSkipper.maxDocID(nextLevel);\n+ nextLevel++;\n+ }\n+ }\n+ switch (match) {\n+ case YES:\n+ return doc = target;\n+ case MAYBE:\n+ if (target > innerApproximation.docID()) {\n+ target = innerApproximation.advance(target);\n+ }\n+ if (target <= upTo) {\n+ return doc = target;\n+ }\n+ break;\n+ case NO_AND_SKIP:\n+ if (target > primaryFieldUpTo) {\n+ primaryFieldSkipper.advance(target);\n+ for (int level = 0; level < primaryFieldSkipper.numLevels(); level++) {\n+ if (primaryFieldSkipper.minValue(level) == primaryFieldSkipper.maxValue(level)) {\n+ primaryFieldUpTo = primaryFieldSkipper.maxDocID(level);\n+ } else {\n+ break;\n+ }\n+ }\n+ if (primaryFieldUpTo > upTo) {\n+ upTo = primaryFieldUpTo;\n+ }\n+ }\n+ if (upTo == DocIdSetIterator.NO_MORE_DOCS) {\n+ return doc = NO_MORE_DOCS;\n+ }\n+ target = upTo + 1;\n+ break;\n+ case NO:\n+ if (upTo == DocIdSetIterator.NO_MORE_DOCS) {\n+ return doc = NO_MORE_DOCS;\n+ }\n+ target = upTo + 1;\n+ break;\n+ default:\n+ throw new AssertionError(\"Unknown enum constant: \" + match);\n+ }\n+ }\n+ }\n+\n+ @Override\n+ public long cost() {\n+ return innerApproximation.cost();\n+ }\n+\n+ Match match(int level) {\n+ long minValue = timestampSkipper.minValue(level);\n+ long maxValue = timestampSkipper.maxValue(level);\n+ if (minValue > maxTimestamp) {\n+ return Match.NO;\n+ } else if (maxValue < minTimestamp) {\n+ return Match.NO_AND_SKIP;\n+ } else if (minValue >= minTimestamp && maxValue <= maxTimestamp) {\n+ return Match.YES;\n+ } else {\n+ return Match.MAYBE;\n+ }\n+ }\n+\n+ }\n+\n+ @Override\n+ public boolean matches() throws IOException {\n+ return switch (approximation.match) {\n+ case YES -> true;\n+ case MAYBE -> {\n+ final long value = timestamps.longValue();\n+ yield value >= minTimestamp && value <= maxTimestamp;", - "comment_created_at": "2025-06-04T11:49:14+00:00", - "comment_author": "martijnvg", - "comment_body": "done: 57395650b39aa637db395b059c1740b8cdf82bda", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/elasticsearch-parallel-branch-traceability.json b/_reviewers/elasticsearch-parallel-branch-traceability.json new file mode 100644 index 0000000..402f1c8 --- /dev/null +++ b/_reviewers/elasticsearch-parallel-branch-traceability.json @@ -0,0 +1,114 @@ +[ + { + "discussion_id": "2174959398", + "pr_number": 130314, + "pr_file": "docs/reference/query-languages/esql/_snippets/commands/layout/fork.md", + "created_at": "2025-06-30T12:28:57+00:00", + "commented_code": "## `FORK` [esql-fork]\n\n```yaml {applies_to}\nserverless: preview\nstack: preview 9.1.0\n```\n\nThe `FORK` processing command runs multiple execution branches and outputs the\nresults back into a single table.\n\n**Syntax**\n\n```esql\nFORK ( ) ( ) ... ( )\n```\n\n**Description**\n\nThe `FORK` processing command feeds the input rows into multiple execution\nbranches and outputs the results into a single table, enhanced with a\ndiscriminator column called `_fork`.\n\nThe values of the discriminator column `_fork` are `fork1`, `fork2`, ... and\nthey designate which `FORK` branch the current row is coming from.\nThe values of the `_fork` column always start with `fork1`, which indicates that\nthe row is coming from the first branch.\n\nThe `FORK` branches can output different columns as long as there exists no\ncolumn with the same name and different data types in two different `FORK`\nbranches.", + "repo_full_name": "elastic/elasticsearch", + "discussion_comments": [ + { + "comment_id": "2174959398", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 130314, + "pr_file": "docs/reference/query-languages/esql/_snippets/commands/layout/fork.md", + "discussion_id": "2174959398", + "commented_code": "@@ -0,0 +1,67 @@\n+## `FORK` [esql-fork]\n+\n+```yaml {applies_to}\n+serverless: preview\n+stack: preview 9.1.0\n+```\n+\n+The `FORK` processing command runs multiple execution branches and outputs the\n+results back into a single table.\n+\n+**Syntax**\n+\n+```esql\n+FORK ( ) ( ) ... ( )\n+```\n+\n+**Description**\n+\n+The `FORK` processing command feeds the input rows into multiple execution\n+branches and outputs the results into a single table, enhanced with a\n+discriminator column called `_fork`.\n+\n+The values of the discriminator column `_fork` are `fork1`, `fork2`, ... and\n+they designate which `FORK` branch the current row is coming from.\n+The values of the `_fork` column always start with `fork1`, which indicates that\n+the row is coming from the first branch.\n+\n+The `FORK` branches can output different columns as long as there exists no\n+column with the same name and different data types in two different `FORK`\n+branches.", + "comment_created_at": "2025-06-30T12:28:57+00:00", + "comment_author": "leemthompo", + "comment_body": "```suggestion\r\n`FORK` branches can output different columns, but columns with the \r\nsame name must have the same data type across all branches.\r\n```", + "pr_file_module": null + }, + { + "comment_id": "2174959864", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 130314, + "pr_file": "docs/reference/query-languages/esql/_snippets/commands/layout/fork.md", + "discussion_id": "2174959398", + "commented_code": "@@ -0,0 +1,67 @@\n+## `FORK` [esql-fork]\n+\n+```yaml {applies_to}\n+serverless: preview\n+stack: preview 9.1.0\n+```\n+\n+The `FORK` processing command runs multiple execution branches and outputs the\n+results back into a single table.\n+\n+**Syntax**\n+\n+```esql\n+FORK ( ) ( ) ... ( )\n+```\n+\n+**Description**\n+\n+The `FORK` processing command feeds the input rows into multiple execution\n+branches and outputs the results into a single table, enhanced with a\n+discriminator column called `_fork`.\n+\n+The values of the discriminator column `_fork` are `fork1`, `fork2`, ... and\n+they designate which `FORK` branch the current row is coming from.\n+The values of the `_fork` column always start with `fork1`, which indicates that\n+the row is coming from the first branch.\n+\n+The `FORK` branches can output different columns as long as there exists no\n+column with the same name and different data types in two different `FORK`\n+branches.", + "comment_created_at": "2025-06-30T12:29:11+00:00", + "comment_author": "leemthompo", + "comment_body": "I _think_ this is saying the same thing, more concisely", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2175125088", + "pr_number": 130314, + "pr_file": "docs/reference/query-languages/esql/_snippets/commands/layout/fork.md", + "created_at": "2025-06-30T13:46:38+00:00", + "commented_code": "## `FORK` [esql-fork]\n\n```yaml {applies_to}\nserverless: preview\nstack: preview 9.1.0\n```\n\nThe `FORK` processing command creates multiple execution branches to operate\non the same input data and combines the results in a single output table.\n\n**Syntax**\n\n```esql\nFORK ( ) ( ) ... ( )\n```\n\n**Description**\n\nThe `FORK` processing command feeds the input rows into multiple execution\nbranches and outputs the results into a single table, with a discriminator column (`_fork`) to identify which branch each row came from.", + "repo_full_name": "elastic/elasticsearch", + "discussion_comments": [ + { + "comment_id": "2175125088", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 130314, + "pr_file": "docs/reference/query-languages/esql/_snippets/commands/layout/fork.md", + "discussion_id": "2175125088", + "commented_code": "@@ -0,0 +1,58 @@\n+## `FORK` [esql-fork]\n+\n+```yaml {applies_to}\n+serverless: preview\n+stack: preview 9.1.0\n+```\n+\n+The `FORK` processing command creates multiple execution branches to operate\n+on the same input data and combines the results in a single output table.\n+\n+**Syntax**\n+\n+```esql\n+FORK ( ) ( ) ... ( )\n+```\n+\n+**Description**\n+\n+The `FORK` processing command feeds the input rows into multiple execution\n+branches and outputs the results into a single table, with a discriminator column (`_fork`) to identify which branch each row came from.", + "comment_created_at": "2025-06-30T13:46:38+00:00", + "comment_author": "leemthompo", + "comment_body": "```suggestion\r\nThe `FORK` processing command creates multiple execution branches to operate\r\non the same input data and combines the results in a single output table. A discriminator column (`_fork`) is added to identify which branch each row came from.\r\n```", + "pr_file_module": null + }, + { + "comment_id": "2175125515", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 130314, + "pr_file": "docs/reference/query-languages/esql/_snippets/commands/layout/fork.md", + "discussion_id": "2175125088", + "commented_code": "@@ -0,0 +1,58 @@\n+## `FORK` [esql-fork]\n+\n+```yaml {applies_to}\n+serverless: preview\n+stack: preview 9.1.0\n+```\n+\n+The `FORK` processing command creates multiple execution branches to operate\n+on the same input data and combines the results in a single output table.\n+\n+**Syntax**\n+\n+```esql\n+FORK ( ) ( ) ... ( )\n+```\n+\n+**Description**\n+\n+The `FORK` processing command feeds the input rows into multiple execution\n+branches and outputs the results into a single table, with a discriminator column (`_fork`) to identify which branch each row came from.", + "comment_created_at": "2025-06-30T13:46:50+00:00", + "comment_author": "leemthompo", + "comment_body": "keeping same verbiage as opening sentence", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2175137002", + "pr_number": 130314, + "pr_file": "docs/reference/query-languages/esql/_snippets/commands/layout/fork.md", + "created_at": "2025-06-30T13:52:20+00:00", + "commented_code": "## `FORK` [esql-fork]\n\n```yaml {applies_to}\nserverless: preview\nstack: preview 9.1.0\n```\n\nThe `FORK` processing command creates multiple execution branches to operate\non the same input data and combines the results in a single output table.\n\n**Syntax**\n\n```esql\nFORK ( ) ( ) ... ( )\n```\n\n**Description**\n\nThe `FORK` processing command feeds the input rows into multiple execution\nbranches and outputs the results into a single table, with a discriminator column (`_fork`) to identify which branch each row came from.\n\nThe values of the discriminator column `_fork` are `fork1`, `fork2`, etc. and\nthey designate which `FORK` branch the current row comes from.\nThe values of the `_fork` column always start with `fork1`, which indicates that\nthe row comes from the first branch.\n\n`FORK` branches can output different columns, but columns with the\nsame name must have the same data type across all branches.", + "repo_full_name": "elastic/elasticsearch", + "discussion_comments": [ + { + "comment_id": "2175137002", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 130314, + "pr_file": "docs/reference/query-languages/esql/_snippets/commands/layout/fork.md", + "discussion_id": "2175137002", + "commented_code": "@@ -0,0 +1,58 @@\n+## `FORK` [esql-fork]\n+\n+```yaml {applies_to}\n+serverless: preview\n+stack: preview 9.1.0\n+```\n+\n+The `FORK` processing command creates multiple execution branches to operate\n+on the same input data and combines the results in a single output table.\n+\n+**Syntax**\n+\n+```esql\n+FORK ( ) ( ) ... ( )\n+```\n+\n+**Description**\n+\n+The `FORK` processing command feeds the input rows into multiple execution\n+branches and outputs the results into a single table, with a discriminator column (`_fork`) to identify which branch each row came from.\n+\n+The values of the discriminator column `_fork` are `fork1`, `fork2`, etc. and\n+they designate which `FORK` branch the current row comes from.\n+The values of the `_fork` column always start with `fork1`, which indicates that\n+the row comes from the first branch.\n+\n+`FORK` branches can output different columns, but columns with the\n+same name must have the same data type across all branches.", + "comment_created_at": "2025-06-30T13:52:20+00:00", + "comment_author": "leemthompo", + "comment_body": "```suggestion\r\n**Branch identification:**\r\n- The `_fork` column identifies each branch with values like `fork1`, `fork2`, `fork3`\r\n- Values correspond to the order branches are defined\r\n- `fork1` always indicates the first branch\r\n```", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2175138062", + "pr_number": 130314, + "pr_file": "docs/reference/query-languages/esql/_snippets/commands/layout/fork.md", + "created_at": "2025-06-30T13:52:48+00:00", + "commented_code": "## `FORK` [esql-fork]\n\n```yaml {applies_to}\nserverless: preview\nstack: preview 9.1.0\n```\n\nThe `FORK` processing command creates multiple execution branches to operate\non the same input data and combines the results in a single output table.\n\n**Syntax**\n\n```esql\nFORK ( ) ( ) ... ( )\n```\n\n**Description**\n\nThe `FORK` processing command feeds the input rows into multiple execution\nbranches and outputs the results into a single table, with a discriminator column (`_fork`) to identify which branch each row came from.\n\nThe values of the discriminator column `_fork` are `fork1`, `fork2`, etc. and\nthey designate which `FORK` branch the current row comes from.\nThe values of the `_fork` column always start with `fork1`, which indicates that\nthe row comes from the first branch.\n\n`FORK` branches can output different columns, but columns with the\nsame name must have the same data type across all branches.\n\nWhen a column does not exist in a `FORK` branch, but it exists in the output of\nother branches, `FORK` will add `null` values to the rows that have missing\ncolumns.\n\n`FORK` preserves the order of the rows from each subset, but it does not\nguarantee that the rows will follow the same order in which the `FORK` branches\nare defined. The rows from the first branch can be interleaved with the rows\nfrom subsequent branches. Use `SORT _fork` after `FORK` if you need to change\nthis behaviour.", + "repo_full_name": "elastic/elasticsearch", + "discussion_comments": [ + { + "comment_id": "2175138062", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 130314, + "pr_file": "docs/reference/query-languages/esql/_snippets/commands/layout/fork.md", + "discussion_id": "2175138062", + "commented_code": "@@ -0,0 +1,58 @@\n+## `FORK` [esql-fork]\n+\n+```yaml {applies_to}\n+serverless: preview\n+stack: preview 9.1.0\n+```\n+\n+The `FORK` processing command creates multiple execution branches to operate\n+on the same input data and combines the results in a single output table.\n+\n+**Syntax**\n+\n+```esql\n+FORK ( ) ( ) ... ( )\n+```\n+\n+**Description**\n+\n+The `FORK` processing command feeds the input rows into multiple execution\n+branches and outputs the results into a single table, with a discriminator column (`_fork`) to identify which branch each row came from.\n+\n+The values of the discriminator column `_fork` are `fork1`, `fork2`, etc. and\n+they designate which `FORK` branch the current row comes from.\n+The values of the `_fork` column always start with `fork1`, which indicates that\n+the row comes from the first branch.\n+\n+`FORK` branches can output different columns, but columns with the\n+same name must have the same data type across all branches.\n+\n+When a column does not exist in a `FORK` branch, but it exists in the output of\n+other branches, `FORK` will add `null` values to the rows that have missing\n+columns.\n+\n+`FORK` preserves the order of the rows from each subset, but it does not\n+guarantee that the rows will follow the same order in which the `FORK` branches\n+are defined. The rows from the first branch can be interleaved with the rows\n+from subsequent branches. Use `SORT _fork` after `FORK` if you need to change\n+this behaviour.", + "comment_created_at": "2025-06-30T13:52:48+00:00", + "comment_author": "leemthompo", + "comment_body": "```suggestion\r\n**Column handling:**\r\n- `FORK` branches can output different columns\r\n- Columns with the same name must have the same data type across all branches \r\n- Missing columns are filled with `null` values\r\n\r\n**Row ordering:**\r\n- `FORK` preserves row order within each branch\r\n- Rows from different branches may be interleaved\r\n- Use `SORT _fork` to group results by branch\r\n```", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/elasticsearch-parallel-branch-traceability.md b/_reviewers/elasticsearch-parallel-branch-traceability.md index a05de6a..38d062a 100644 --- a/_reviewers/elasticsearch-parallel-branch-traceability.md +++ b/_reviewers/elasticsearch-parallel-branch-traceability.md @@ -41,119 +41,3 @@ function processBranches(input) { ``` This approach ensures that outputs from complex multi-branch algorithms remain traceable, consistent, and analyzable, which is critical for debugging and maintaining algorithmic correctness. - - -[ - { - "discussion_id": "2174959398", - "pr_number": 130314, - "pr_file": "docs/reference/query-languages/esql/_snippets/commands/layout/fork.md", - "created_at": "2025-06-30T12:28:57+00:00", - "commented_code": "## `FORK` [esql-fork]\n\n```yaml {applies_to}\nserverless: preview\nstack: preview 9.1.0\n```\n\nThe `FORK` processing command runs multiple execution branches and outputs the\nresults back into a single table.\n\n**Syntax**\n\n```esql\nFORK ( ) ( ) ... ( )\n```\n\n**Description**\n\nThe `FORK` processing command feeds the input rows into multiple execution\nbranches and outputs the results into a single table, enhanced with a\ndiscriminator column called `_fork`.\n\nThe values of the discriminator column `_fork` are `fork1`, `fork2`, ... and\nthey designate which `FORK` branch the current row is coming from.\nThe values of the `_fork` column always start with `fork1`, which indicates that\nthe row is coming from the first branch.\n\nThe `FORK` branches can output different columns as long as there exists no\ncolumn with the same name and different data types in two different `FORK`\nbranches.", - "repo_full_name": "elastic/elasticsearch", - "discussion_comments": [ - { - "comment_id": "2174959398", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 130314, - "pr_file": "docs/reference/query-languages/esql/_snippets/commands/layout/fork.md", - "discussion_id": "2174959398", - "commented_code": "@@ -0,0 +1,67 @@\n+## `FORK` [esql-fork]\n+\n+```yaml {applies_to}\n+serverless: preview\n+stack: preview 9.1.0\n+```\n+\n+The `FORK` processing command runs multiple execution branches and outputs the\n+results back into a single table.\n+\n+**Syntax**\n+\n+```esql\n+FORK ( ) ( ) ... ( )\n+```\n+\n+**Description**\n+\n+The `FORK` processing command feeds the input rows into multiple execution\n+branches and outputs the results into a single table, enhanced with a\n+discriminator column called `_fork`.\n+\n+The values of the discriminator column `_fork` are `fork1`, `fork2`, ... and\n+they designate which `FORK` branch the current row is coming from.\n+The values of the `_fork` column always start with `fork1`, which indicates that\n+the row is coming from the first branch.\n+\n+The `FORK` branches can output different columns as long as there exists no\n+column with the same name and different data types in two different `FORK`\n+branches.", - "comment_created_at": "2025-06-30T12:28:57+00:00", - "comment_author": "leemthompo", - "comment_body": "```suggestion\r\n`FORK` branches can output different columns, but columns with the \r\nsame name must have the same data type across all branches.\r\n```", - "pr_file_module": null - }, - { - "comment_id": "2174959864", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 130314, - "pr_file": "docs/reference/query-languages/esql/_snippets/commands/layout/fork.md", - "discussion_id": "2174959398", - "commented_code": "@@ -0,0 +1,67 @@\n+## `FORK` [esql-fork]\n+\n+```yaml {applies_to}\n+serverless: preview\n+stack: preview 9.1.0\n+```\n+\n+The `FORK` processing command runs multiple execution branches and outputs the\n+results back into a single table.\n+\n+**Syntax**\n+\n+```esql\n+FORK ( ) ( ) ... ( )\n+```\n+\n+**Description**\n+\n+The `FORK` processing command feeds the input rows into multiple execution\n+branches and outputs the results into a single table, enhanced with a\n+discriminator column called `_fork`.\n+\n+The values of the discriminator column `_fork` are `fork1`, `fork2`, ... and\n+they designate which `FORK` branch the current row is coming from.\n+The values of the `_fork` column always start with `fork1`, which indicates that\n+the row is coming from the first branch.\n+\n+The `FORK` branches can output different columns as long as there exists no\n+column with the same name and different data types in two different `FORK`\n+branches.", - "comment_created_at": "2025-06-30T12:29:11+00:00", - "comment_author": "leemthompo", - "comment_body": "I _think_ this is saying the same thing, more concisely", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2175125088", - "pr_number": 130314, - "pr_file": "docs/reference/query-languages/esql/_snippets/commands/layout/fork.md", - "created_at": "2025-06-30T13:46:38+00:00", - "commented_code": "## `FORK` [esql-fork]\n\n```yaml {applies_to}\nserverless: preview\nstack: preview 9.1.0\n```\n\nThe `FORK` processing command creates multiple execution branches to operate\non the same input data and combines the results in a single output table.\n\n**Syntax**\n\n```esql\nFORK ( ) ( ) ... ( )\n```\n\n**Description**\n\nThe `FORK` processing command feeds the input rows into multiple execution\nbranches and outputs the results into a single table, with a discriminator column (`_fork`) to identify which branch each row came from.", - "repo_full_name": "elastic/elasticsearch", - "discussion_comments": [ - { - "comment_id": "2175125088", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 130314, - "pr_file": "docs/reference/query-languages/esql/_snippets/commands/layout/fork.md", - "discussion_id": "2175125088", - "commented_code": "@@ -0,0 +1,58 @@\n+## `FORK` [esql-fork]\n+\n+```yaml {applies_to}\n+serverless: preview\n+stack: preview 9.1.0\n+```\n+\n+The `FORK` processing command creates multiple execution branches to operate\n+on the same input data and combines the results in a single output table.\n+\n+**Syntax**\n+\n+```esql\n+FORK ( ) ( ) ... ( )\n+```\n+\n+**Description**\n+\n+The `FORK` processing command feeds the input rows into multiple execution\n+branches and outputs the results into a single table, with a discriminator column (`_fork`) to identify which branch each row came from.", - "comment_created_at": "2025-06-30T13:46:38+00:00", - "comment_author": "leemthompo", - "comment_body": "```suggestion\r\nThe `FORK` processing command creates multiple execution branches to operate\r\non the same input data and combines the results in a single output table. A discriminator column (`_fork`) is added to identify which branch each row came from.\r\n```", - "pr_file_module": null - }, - { - "comment_id": "2175125515", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 130314, - "pr_file": "docs/reference/query-languages/esql/_snippets/commands/layout/fork.md", - "discussion_id": "2175125088", - "commented_code": "@@ -0,0 +1,58 @@\n+## `FORK` [esql-fork]\n+\n+```yaml {applies_to}\n+serverless: preview\n+stack: preview 9.1.0\n+```\n+\n+The `FORK` processing command creates multiple execution branches to operate\n+on the same input data and combines the results in a single output table.\n+\n+**Syntax**\n+\n+```esql\n+FORK ( ) ( ) ... ( )\n+```\n+\n+**Description**\n+\n+The `FORK` processing command feeds the input rows into multiple execution\n+branches and outputs the results into a single table, with a discriminator column (`_fork`) to identify which branch each row came from.", - "comment_created_at": "2025-06-30T13:46:50+00:00", - "comment_author": "leemthompo", - "comment_body": "keeping same verbiage as opening sentence", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2175137002", - "pr_number": 130314, - "pr_file": "docs/reference/query-languages/esql/_snippets/commands/layout/fork.md", - "created_at": "2025-06-30T13:52:20+00:00", - "commented_code": "## `FORK` [esql-fork]\n\n```yaml {applies_to}\nserverless: preview\nstack: preview 9.1.0\n```\n\nThe `FORK` processing command creates multiple execution branches to operate\non the same input data and combines the results in a single output table.\n\n**Syntax**\n\n```esql\nFORK ( ) ( ) ... ( )\n```\n\n**Description**\n\nThe `FORK` processing command feeds the input rows into multiple execution\nbranches and outputs the results into a single table, with a discriminator column (`_fork`) to identify which branch each row came from.\n\nThe values of the discriminator column `_fork` are `fork1`, `fork2`, etc. and\nthey designate which `FORK` branch the current row comes from.\nThe values of the `_fork` column always start with `fork1`, which indicates that\nthe row comes from the first branch.\n\n`FORK` branches can output different columns, but columns with the\nsame name must have the same data type across all branches.", - "repo_full_name": "elastic/elasticsearch", - "discussion_comments": [ - { - "comment_id": "2175137002", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 130314, - "pr_file": "docs/reference/query-languages/esql/_snippets/commands/layout/fork.md", - "discussion_id": "2175137002", - "commented_code": "@@ -0,0 +1,58 @@\n+## `FORK` [esql-fork]\n+\n+```yaml {applies_to}\n+serverless: preview\n+stack: preview 9.1.0\n+```\n+\n+The `FORK` processing command creates multiple execution branches to operate\n+on the same input data and combines the results in a single output table.\n+\n+**Syntax**\n+\n+```esql\n+FORK ( ) ( ) ... ( )\n+```\n+\n+**Description**\n+\n+The `FORK` processing command feeds the input rows into multiple execution\n+branches and outputs the results into a single table, with a discriminator column (`_fork`) to identify which branch each row came from.\n+\n+The values of the discriminator column `_fork` are `fork1`, `fork2`, etc. and\n+they designate which `FORK` branch the current row comes from.\n+The values of the `_fork` column always start with `fork1`, which indicates that\n+the row comes from the first branch.\n+\n+`FORK` branches can output different columns, but columns with the\n+same name must have the same data type across all branches.", - "comment_created_at": "2025-06-30T13:52:20+00:00", - "comment_author": "leemthompo", - "comment_body": "```suggestion\r\n**Branch identification:**\r\n- The `_fork` column identifies each branch with values like `fork1`, `fork2`, `fork3`\r\n- Values correspond to the order branches are defined\r\n- `fork1` always indicates the first branch\r\n```", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2175138062", - "pr_number": 130314, - "pr_file": "docs/reference/query-languages/esql/_snippets/commands/layout/fork.md", - "created_at": "2025-06-30T13:52:48+00:00", - "commented_code": "## `FORK` [esql-fork]\n\n```yaml {applies_to}\nserverless: preview\nstack: preview 9.1.0\n```\n\nThe `FORK` processing command creates multiple execution branches to operate\non the same input data and combines the results in a single output table.\n\n**Syntax**\n\n```esql\nFORK ( ) ( ) ... ( )\n```\n\n**Description**\n\nThe `FORK` processing command feeds the input rows into multiple execution\nbranches and outputs the results into a single table, with a discriminator column (`_fork`) to identify which branch each row came from.\n\nThe values of the discriminator column `_fork` are `fork1`, `fork2`, etc. and\nthey designate which `FORK` branch the current row comes from.\nThe values of the `_fork` column always start with `fork1`, which indicates that\nthe row comes from the first branch.\n\n`FORK` branches can output different columns, but columns with the\nsame name must have the same data type across all branches.\n\nWhen a column does not exist in a `FORK` branch, but it exists in the output of\nother branches, `FORK` will add `null` values to the rows that have missing\ncolumns.\n\n`FORK` preserves the order of the rows from each subset, but it does not\nguarantee that the rows will follow the same order in which the `FORK` branches\nare defined. The rows from the first branch can be interleaved with the rows\nfrom subsequent branches. Use `SORT _fork` after `FORK` if you need to change\nthis behaviour.", - "repo_full_name": "elastic/elasticsearch", - "discussion_comments": [ - { - "comment_id": "2175138062", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 130314, - "pr_file": "docs/reference/query-languages/esql/_snippets/commands/layout/fork.md", - "discussion_id": "2175138062", - "commented_code": "@@ -0,0 +1,58 @@\n+## `FORK` [esql-fork]\n+\n+```yaml {applies_to}\n+serverless: preview\n+stack: preview 9.1.0\n+```\n+\n+The `FORK` processing command creates multiple execution branches to operate\n+on the same input data and combines the results in a single output table.\n+\n+**Syntax**\n+\n+```esql\n+FORK ( ) ( ) ... ( )\n+```\n+\n+**Description**\n+\n+The `FORK` processing command feeds the input rows into multiple execution\n+branches and outputs the results into a single table, with a discriminator column (`_fork`) to identify which branch each row came from.\n+\n+The values of the discriminator column `_fork` are `fork1`, `fork2`, etc. and\n+they designate which `FORK` branch the current row comes from.\n+The values of the `_fork` column always start with `fork1`, which indicates that\n+the row comes from the first branch.\n+\n+`FORK` branches can output different columns, but columns with the\n+same name must have the same data type across all branches.\n+\n+When a column does not exist in a `FORK` branch, but it exists in the output of\n+other branches, `FORK` will add `null` values to the rows that have missing\n+columns.\n+\n+`FORK` preserves the order of the rows from each subset, but it does not\n+guarantee that the rows will follow the same order in which the `FORK` branches\n+are defined. The rows from the first branch can be interleaved with the rows\n+from subsequent branches. Use `SORT _fork` after `FORK` if you need to change\n+this behaviour.", - "comment_created_at": "2025-06-30T13:52:48+00:00", - "comment_author": "leemthompo", - "comment_body": "```suggestion\r\n**Column handling:**\r\n- `FORK` branches can output different columns\r\n- Columns with the same name must have the same data type across all branches \r\n- Missing columns are filled with `null` values\r\n\r\n**Row ordering:**\r\n- `FORK` preserves row order within each branch\r\n- Rows from different branches may be interleaved\r\n- Use `SORT _fork` to group results by branch\r\n```", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/elasticsearch-prefer-callbacks-over-blocking.json b/_reviewers/elasticsearch-prefer-callbacks-over-blocking.json new file mode 100644 index 0000000..3290855 --- /dev/null +++ b/_reviewers/elasticsearch-prefer-callbacks-over-blocking.json @@ -0,0 +1,170 @@ +[ + { + "discussion_id": "2171945967", + "pr_number": 130189, + "pr_file": "server/src/internalClusterTest/java/org/elasticsearch/index/engine/MergeWithLowDiskSpaceIT.java", + "created_at": "2025-06-27T12:32:35+00:00", + "commented_code": "});\n }\n\n public void testForceMergeIsBlockedThenUnblocked() throws Exception {\n String node = internalCluster().startNode();\n ensureStableCluster(1);\n setTotalSpace(node, Long.MAX_VALUE);\n ThreadPoolMergeExecutorService threadPoolMergeExecutorService = internalCluster().getInstance(IndicesService.class, node)\n .getThreadPoolMergeExecutorService();\n TestTelemetryPlugin testTelemetryPlugin = getTelemetryPlugin(node);\n // create some index\n final String indexName = randomAlphaOfLength(10).toLowerCase(Locale.ROOT);\n createIndex(\n indexName,\n Settings.builder().put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0).put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1).build()\n );\n // get current disk space usage (for all indices on the node)\n IndicesStatsResponse stats = indicesAdmin().prepareStats().clear().setStore(true).get();\n long usedDiskSpaceAfterIndexing = stats.getTotal().getStore().sizeInBytes();\n // restrict the total disk space such that the next merge does not have sufficient disk space\n long insufficientTotalDiskSpace = usedDiskSpaceAfterIndexing + MERGE_DISK_HIGH_WATERMARK_BYTES - randomLongBetween(1L, 10L);\n setTotalSpace(node, insufficientTotalDiskSpace);\n // node stats' FS stats should report that there is insufficient disk space available\n assertBusy(() -> {\n NodesStatsResponse nodesStatsResponse = client().admin().cluster().prepareNodesStats().setFs(true).get();\n assertThat(nodesStatsResponse.getNodes().size(), equalTo(1));\n NodeStats nodeStats = nodesStatsResponse.getNodes().get(0);\n assertThat(nodeStats.getFs().getTotal().getTotal().getBytes(), equalTo(insufficientTotalDiskSpace));\n assertThat(nodeStats.getFs().getTotal().getAvailable().getBytes(), lessThan(MERGE_DISK_HIGH_WATERMARK_BYTES));\n });\n int indexingRounds = randomIntBetween(2, 5);\n while (indexingRounds-- > 0) {\n indexRandom(\n true,\n true,\n true,\n false,\n IntStream.range(1, randomIntBetween(2, 5))\n .mapToObj(i -> prepareIndex(indexName).setSource(\"field\", randomAlphaOfLength(50)))\n .toList()\n );\n }\n // start force merging (which is blocking) on a separate thread\n Thread forceMergeThread = new Thread(", + "repo_full_name": "elastic/elasticsearch", + "discussion_comments": [ + { + "comment_id": "2171945967", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 130189, + "pr_file": "server/src/internalClusterTest/java/org/elasticsearch/index/engine/MergeWithLowDiskSpaceIT.java", + "discussion_id": "2171945967", + "commented_code": "@@ -155,8 +170,125 @@ public void testShardCloseWhenDiskSpaceInsufficient() throws Exception {\n });\n }\n \n+ public void testForceMergeIsBlockedThenUnblocked() throws Exception {\n+ String node = internalCluster().startNode();\n+ ensureStableCluster(1);\n+ setTotalSpace(node, Long.MAX_VALUE);\n+ ThreadPoolMergeExecutorService threadPoolMergeExecutorService = internalCluster().getInstance(IndicesService.class, node)\n+ .getThreadPoolMergeExecutorService();\n+ TestTelemetryPlugin testTelemetryPlugin = getTelemetryPlugin(node);\n+ // create some index\n+ final String indexName = randomAlphaOfLength(10).toLowerCase(Locale.ROOT);\n+ createIndex(\n+ indexName,\n+ Settings.builder().put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0).put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1).build()\n+ );\n+ // get current disk space usage (for all indices on the node)\n+ IndicesStatsResponse stats = indicesAdmin().prepareStats().clear().setStore(true).get();\n+ long usedDiskSpaceAfterIndexing = stats.getTotal().getStore().sizeInBytes();\n+ // restrict the total disk space such that the next merge does not have sufficient disk space\n+ long insufficientTotalDiskSpace = usedDiskSpaceAfterIndexing + MERGE_DISK_HIGH_WATERMARK_BYTES - randomLongBetween(1L, 10L);\n+ setTotalSpace(node, insufficientTotalDiskSpace);\n+ // node stats' FS stats should report that there is insufficient disk space available\n+ assertBusy(() -> {\n+ NodesStatsResponse nodesStatsResponse = client().admin().cluster().prepareNodesStats().setFs(true).get();\n+ assertThat(nodesStatsResponse.getNodes().size(), equalTo(1));\n+ NodeStats nodeStats = nodesStatsResponse.getNodes().get(0);\n+ assertThat(nodeStats.getFs().getTotal().getTotal().getBytes(), equalTo(insufficientTotalDiskSpace));\n+ assertThat(nodeStats.getFs().getTotal().getAvailable().getBytes(), lessThan(MERGE_DISK_HIGH_WATERMARK_BYTES));\n+ });\n+ int indexingRounds = randomIntBetween(2, 5);\n+ while (indexingRounds-- > 0) {\n+ indexRandom(\n+ true,\n+ true,\n+ true,\n+ false,\n+ IntStream.range(1, randomIntBetween(2, 5))\n+ .mapToObj(i -> prepareIndex(indexName).setSource(\"field\", randomAlphaOfLength(50)))\n+ .toList()\n+ );\n+ }\n+ // start force merging (which is blocking) on a separate thread\n+ Thread forceMergeThread = new Thread(", + "comment_created_at": "2025-06-27T12:32:35+00:00", + "comment_author": "henningandersen", + "comment_body": "Could we avoid the `.get()` call and use `execute()` instead to avoid the thread? Assertions further down could then be about whether the future is done or not.", + "pr_file_module": null + }, + { + "comment_id": "2173598204", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 130189, + "pr_file": "server/src/internalClusterTest/java/org/elasticsearch/index/engine/MergeWithLowDiskSpaceIT.java", + "discussion_id": "2171945967", + "commented_code": "@@ -155,8 +170,125 @@ public void testShardCloseWhenDiskSpaceInsufficient() throws Exception {\n });\n }\n \n+ public void testForceMergeIsBlockedThenUnblocked() throws Exception {\n+ String node = internalCluster().startNode();\n+ ensureStableCluster(1);\n+ setTotalSpace(node, Long.MAX_VALUE);\n+ ThreadPoolMergeExecutorService threadPoolMergeExecutorService = internalCluster().getInstance(IndicesService.class, node)\n+ .getThreadPoolMergeExecutorService();\n+ TestTelemetryPlugin testTelemetryPlugin = getTelemetryPlugin(node);\n+ // create some index\n+ final String indexName = randomAlphaOfLength(10).toLowerCase(Locale.ROOT);\n+ createIndex(\n+ indexName,\n+ Settings.builder().put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0).put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1).build()\n+ );\n+ // get current disk space usage (for all indices on the node)\n+ IndicesStatsResponse stats = indicesAdmin().prepareStats().clear().setStore(true).get();\n+ long usedDiskSpaceAfterIndexing = stats.getTotal().getStore().sizeInBytes();\n+ // restrict the total disk space such that the next merge does not have sufficient disk space\n+ long insufficientTotalDiskSpace = usedDiskSpaceAfterIndexing + MERGE_DISK_HIGH_WATERMARK_BYTES - randomLongBetween(1L, 10L);\n+ setTotalSpace(node, insufficientTotalDiskSpace);\n+ // node stats' FS stats should report that there is insufficient disk space available\n+ assertBusy(() -> {\n+ NodesStatsResponse nodesStatsResponse = client().admin().cluster().prepareNodesStats().setFs(true).get();\n+ assertThat(nodesStatsResponse.getNodes().size(), equalTo(1));\n+ NodeStats nodeStats = nodesStatsResponse.getNodes().get(0);\n+ assertThat(nodeStats.getFs().getTotal().getTotal().getBytes(), equalTo(insufficientTotalDiskSpace));\n+ assertThat(nodeStats.getFs().getTotal().getAvailable().getBytes(), lessThan(MERGE_DISK_HIGH_WATERMARK_BYTES));\n+ });\n+ int indexingRounds = randomIntBetween(2, 5);\n+ while (indexingRounds-- > 0) {\n+ indexRandom(\n+ true,\n+ true,\n+ true,\n+ false,\n+ IntStream.range(1, randomIntBetween(2, 5))\n+ .mapToObj(i -> prepareIndex(indexName).setSource(\"field\", randomAlphaOfLength(50)))\n+ .toList()\n+ );\n+ }\n+ // start force merging (which is blocking) on a separate thread\n+ Thread forceMergeThread = new Thread(", + "comment_created_at": "2025-06-29T04:59:23+00:00", + "comment_author": "albertzaharovits", + "comment_body": "Of course, thanks for the suggestion! (I've handled it as part of https://github.com/elastic/elasticsearch/pull/130189/commits/3452dcea85c813a38e83bdbc17af67b610e38bfd)", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2171949919", + "pr_number": 130189, + "pr_file": "server/src/internalClusterTest/java/org/elasticsearch/index/engine/MergeWithLowDiskSpaceIT.java", + "created_at": "2025-06-27T12:33:57+00:00", + "commented_code": "});\n }\n\n public void testForceMergeIsBlockedThenUnblocked() throws Exception {\n String node = internalCluster().startNode();\n ensureStableCluster(1);\n setTotalSpace(node, Long.MAX_VALUE);\n ThreadPoolMergeExecutorService threadPoolMergeExecutorService = internalCluster().getInstance(IndicesService.class, node)\n .getThreadPoolMergeExecutorService();\n TestTelemetryPlugin testTelemetryPlugin = getTelemetryPlugin(node);\n // create some index\n final String indexName = randomAlphaOfLength(10).toLowerCase(Locale.ROOT);\n createIndex(\n indexName,\n Settings.builder().put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0).put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1).build()\n );\n // get current disk space usage (for all indices on the node)\n IndicesStatsResponse stats = indicesAdmin().prepareStats().clear().setStore(true).get();\n long usedDiskSpaceAfterIndexing = stats.getTotal().getStore().sizeInBytes();\n // restrict the total disk space such that the next merge does not have sufficient disk space\n long insufficientTotalDiskSpace = usedDiskSpaceAfterIndexing + MERGE_DISK_HIGH_WATERMARK_BYTES - randomLongBetween(1L, 10L);\n setTotalSpace(node, insufficientTotalDiskSpace);\n // node stats' FS stats should report that there is insufficient disk space available\n assertBusy(() -> {\n NodesStatsResponse nodesStatsResponse = client().admin().cluster().prepareNodesStats().setFs(true).get();\n assertThat(nodesStatsResponse.getNodes().size(), equalTo(1));\n NodeStats nodeStats = nodesStatsResponse.getNodes().get(0);\n assertThat(nodeStats.getFs().getTotal().getTotal().getBytes(), equalTo(insufficientTotalDiskSpace));\n assertThat(nodeStats.getFs().getTotal().getAvailable().getBytes(), lessThan(MERGE_DISK_HIGH_WATERMARK_BYTES));\n });\n int indexingRounds = randomIntBetween(2, 5);\n while (indexingRounds-- > 0) {\n indexRandom(\n true,\n true,\n true,\n false,\n IntStream.range(1, randomIntBetween(2, 5))\n .mapToObj(i -> prepareIndex(indexName).setSource(\"field\", randomAlphaOfLength(50)))\n .toList()\n );\n }\n // start force merging (which is blocking) on a separate thread\n Thread forceMergeThread = new Thread(\n // the max segments argument makes it a blocking call\n () -> assertNoFailures(indicesAdmin().prepareForceMerge(indexName).setMaxNumSegments(1).get())\n );\n forceMergeThread.start();\n assertBusy(() -> {\n // merge executor says merging is blocked due to insufficient disk space while there is a single merge task enqueued\n assertThat(threadPoolMergeExecutorService.getMergeTasksQueueLength(), equalTo(1));\n assertTrue(threadPoolMergeExecutorService.isMergingBlockedDueToInsufficientDiskSpace());\n // telemetry says that there are indeed some segments enqueued to be merged\n testTelemetryPlugin.collect();\n assertThat(\n testTelemetryPlugin.getLongGaugeMeasurement(MergeMetrics.MERGE_SEGMENTS_QUEUED_USAGE).getLast().getLong(),\n greaterThan(0L)\n );\n // but still no merges are currently running\n assertThat(\n testTelemetryPlugin.getLongGaugeMeasurement(MergeMetrics.MERGE_SEGMENTS_RUNNING_USAGE).getLast().getLong(),\n equalTo(0L)\n );\n // indices stats also says that no merge is currently running (blocked merges are NOT considered as \"running\")\n IndicesStatsResponse indicesStatsResponse = client().admin().indices().prepareStats(indexName).setMerge(true).get();\n long currentMergeCount = indicesStatsResponse.getIndices().get(indexName).getPrimaries().merge.getCurrent();\n assertThat(currentMergeCount, equalTo(0L));\n });\n // the force merge call is still blocked\n assertTrue(forceMergeThread.isAlive());\n // merge executor still confirms merging is blocked due to insufficient disk space\n assertTrue(threadPoolMergeExecutorService.isMergingBlockedDueToInsufficientDiskSpace());\n // make disk space available in order to unblock the merge\n if (randomBoolean()) {\n setTotalSpace(node, Long.MAX_VALUE);\n } else {\n updateClusterSettings(\n Settings.builder().put(ThreadPoolMergeExecutorService.INDICES_MERGE_DISK_HIGH_WATERMARK_SETTING.getKey(), \"0b\")\n );\n }\n // assert that all the merges are now done and that the force-merge call returns\n assertBusy(() -> {\n IndicesStatsResponse indicesStatsResponse = client().admin().indices().prepareStats(indexName).setMerge(true).get();\n long currentMergeCount = indicesStatsResponse.getIndices().get(indexName).getPrimaries().merge.getCurrent();\n // NO merging is in progress\n assertThat(currentMergeCount, equalTo(0L));\n long totalMergeCount = indicesStatsResponse.getIndices().get(indexName).getPrimaries().merge.getTotal();\n assertThat(totalMergeCount, greaterThan(0L));\n // force merge call returned\n assertFalse(forceMergeThread.isAlive());", + "repo_full_name": "elastic/elasticsearch", + "discussion_comments": [ + { + "comment_id": "2171949919", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 130189, + "pr_file": "server/src/internalClusterTest/java/org/elasticsearch/index/engine/MergeWithLowDiskSpaceIT.java", + "discussion_id": "2171949919", + "commented_code": "@@ -155,8 +170,125 @@ public void testShardCloseWhenDiskSpaceInsufficient() throws Exception {\n });\n }\n \n+ public void testForceMergeIsBlockedThenUnblocked() throws Exception {\n+ String node = internalCluster().startNode();\n+ ensureStableCluster(1);\n+ setTotalSpace(node, Long.MAX_VALUE);\n+ ThreadPoolMergeExecutorService threadPoolMergeExecutorService = internalCluster().getInstance(IndicesService.class, node)\n+ .getThreadPoolMergeExecutorService();\n+ TestTelemetryPlugin testTelemetryPlugin = getTelemetryPlugin(node);\n+ // create some index\n+ final String indexName = randomAlphaOfLength(10).toLowerCase(Locale.ROOT);\n+ createIndex(\n+ indexName,\n+ Settings.builder().put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0).put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1).build()\n+ );\n+ // get current disk space usage (for all indices on the node)\n+ IndicesStatsResponse stats = indicesAdmin().prepareStats().clear().setStore(true).get();\n+ long usedDiskSpaceAfterIndexing = stats.getTotal().getStore().sizeInBytes();\n+ // restrict the total disk space such that the next merge does not have sufficient disk space\n+ long insufficientTotalDiskSpace = usedDiskSpaceAfterIndexing + MERGE_DISK_HIGH_WATERMARK_BYTES - randomLongBetween(1L, 10L);\n+ setTotalSpace(node, insufficientTotalDiskSpace);\n+ // node stats' FS stats should report that there is insufficient disk space available\n+ assertBusy(() -> {\n+ NodesStatsResponse nodesStatsResponse = client().admin().cluster().prepareNodesStats().setFs(true).get();\n+ assertThat(nodesStatsResponse.getNodes().size(), equalTo(1));\n+ NodeStats nodeStats = nodesStatsResponse.getNodes().get(0);\n+ assertThat(nodeStats.getFs().getTotal().getTotal().getBytes(), equalTo(insufficientTotalDiskSpace));\n+ assertThat(nodeStats.getFs().getTotal().getAvailable().getBytes(), lessThan(MERGE_DISK_HIGH_WATERMARK_BYTES));\n+ });\n+ int indexingRounds = randomIntBetween(2, 5);\n+ while (indexingRounds-- > 0) {\n+ indexRandom(\n+ true,\n+ true,\n+ true,\n+ false,\n+ IntStream.range(1, randomIntBetween(2, 5))\n+ .mapToObj(i -> prepareIndex(indexName).setSource(\"field\", randomAlphaOfLength(50)))\n+ .toList()\n+ );\n+ }\n+ // start force merging (which is blocking) on a separate thread\n+ Thread forceMergeThread = new Thread(\n+ // the max segments argument makes it a blocking call\n+ () -> assertNoFailures(indicesAdmin().prepareForceMerge(indexName).setMaxNumSegments(1).get())\n+ );\n+ forceMergeThread.start();\n+ assertBusy(() -> {\n+ // merge executor says merging is blocked due to insufficient disk space while there is a single merge task enqueued\n+ assertThat(threadPoolMergeExecutorService.getMergeTasksQueueLength(), equalTo(1));\n+ assertTrue(threadPoolMergeExecutorService.isMergingBlockedDueToInsufficientDiskSpace());\n+ // telemetry says that there are indeed some segments enqueued to be merged\n+ testTelemetryPlugin.collect();\n+ assertThat(\n+ testTelemetryPlugin.getLongGaugeMeasurement(MergeMetrics.MERGE_SEGMENTS_QUEUED_USAGE).getLast().getLong(),\n+ greaterThan(0L)\n+ );\n+ // but still no merges are currently running\n+ assertThat(\n+ testTelemetryPlugin.getLongGaugeMeasurement(MergeMetrics.MERGE_SEGMENTS_RUNNING_USAGE).getLast().getLong(),\n+ equalTo(0L)\n+ );\n+ // indices stats also says that no merge is currently running (blocked merges are NOT considered as \"running\")\n+ IndicesStatsResponse indicesStatsResponse = client().admin().indices().prepareStats(indexName).setMerge(true).get();\n+ long currentMergeCount = indicesStatsResponse.getIndices().get(indexName).getPrimaries().merge.getCurrent();\n+ assertThat(currentMergeCount, equalTo(0L));\n+ });\n+ // the force merge call is still blocked\n+ assertTrue(forceMergeThread.isAlive());\n+ // merge executor still confirms merging is blocked due to insufficient disk space\n+ assertTrue(threadPoolMergeExecutorService.isMergingBlockedDueToInsufficientDiskSpace());\n+ // make disk space available in order to unblock the merge\n+ if (randomBoolean()) {\n+ setTotalSpace(node, Long.MAX_VALUE);\n+ } else {\n+ updateClusterSettings(\n+ Settings.builder().put(ThreadPoolMergeExecutorService.INDICES_MERGE_DISK_HIGH_WATERMARK_SETTING.getKey(), \"0b\")\n+ );\n+ }\n+ // assert that all the merges are now done and that the force-merge call returns\n+ assertBusy(() -> {\n+ IndicesStatsResponse indicesStatsResponse = client().admin().indices().prepareStats(indexName).setMerge(true).get();\n+ long currentMergeCount = indicesStatsResponse.getIndices().get(indexName).getPrimaries().merge.getCurrent();\n+ // NO merging is in progress\n+ assertThat(currentMergeCount, equalTo(0L));\n+ long totalMergeCount = indicesStatsResponse.getIndices().get(indexName).getPrimaries().merge.getTotal();\n+ assertThat(totalMergeCount, greaterThan(0L));\n+ // force merge call returned\n+ assertFalse(forceMergeThread.isAlive());", + "comment_created_at": "2025-06-27T12:33:57+00:00", + "comment_author": "henningandersen", + "comment_body": "Could we instead wait for the force merge to complete first? If we go for a future, we can use `future.get()` here (or safeGet). If not, we can use `forcemergeThread.join`. That way I think we can avoid the `assertBusy`, since all the things here should hopefully be up to date.", + "pr_file_module": null + }, + { + "comment_id": "2173598419", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 130189, + "pr_file": "server/src/internalClusterTest/java/org/elasticsearch/index/engine/MergeWithLowDiskSpaceIT.java", + "discussion_id": "2171949919", + "commented_code": "@@ -155,8 +170,125 @@ public void testShardCloseWhenDiskSpaceInsufficient() throws Exception {\n });\n }\n \n+ public void testForceMergeIsBlockedThenUnblocked() throws Exception {\n+ String node = internalCluster().startNode();\n+ ensureStableCluster(1);\n+ setTotalSpace(node, Long.MAX_VALUE);\n+ ThreadPoolMergeExecutorService threadPoolMergeExecutorService = internalCluster().getInstance(IndicesService.class, node)\n+ .getThreadPoolMergeExecutorService();\n+ TestTelemetryPlugin testTelemetryPlugin = getTelemetryPlugin(node);\n+ // create some index\n+ final String indexName = randomAlphaOfLength(10).toLowerCase(Locale.ROOT);\n+ createIndex(\n+ indexName,\n+ Settings.builder().put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0).put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1).build()\n+ );\n+ // get current disk space usage (for all indices on the node)\n+ IndicesStatsResponse stats = indicesAdmin().prepareStats().clear().setStore(true).get();\n+ long usedDiskSpaceAfterIndexing = stats.getTotal().getStore().sizeInBytes();\n+ // restrict the total disk space such that the next merge does not have sufficient disk space\n+ long insufficientTotalDiskSpace = usedDiskSpaceAfterIndexing + MERGE_DISK_HIGH_WATERMARK_BYTES - randomLongBetween(1L, 10L);\n+ setTotalSpace(node, insufficientTotalDiskSpace);\n+ // node stats' FS stats should report that there is insufficient disk space available\n+ assertBusy(() -> {\n+ NodesStatsResponse nodesStatsResponse = client().admin().cluster().prepareNodesStats().setFs(true).get();\n+ assertThat(nodesStatsResponse.getNodes().size(), equalTo(1));\n+ NodeStats nodeStats = nodesStatsResponse.getNodes().get(0);\n+ assertThat(nodeStats.getFs().getTotal().getTotal().getBytes(), equalTo(insufficientTotalDiskSpace));\n+ assertThat(nodeStats.getFs().getTotal().getAvailable().getBytes(), lessThan(MERGE_DISK_HIGH_WATERMARK_BYTES));\n+ });\n+ int indexingRounds = randomIntBetween(2, 5);\n+ while (indexingRounds-- > 0) {\n+ indexRandom(\n+ true,\n+ true,\n+ true,\n+ false,\n+ IntStream.range(1, randomIntBetween(2, 5))\n+ .mapToObj(i -> prepareIndex(indexName).setSource(\"field\", randomAlphaOfLength(50)))\n+ .toList()\n+ );\n+ }\n+ // start force merging (which is blocking) on a separate thread\n+ Thread forceMergeThread = new Thread(\n+ // the max segments argument makes it a blocking call\n+ () -> assertNoFailures(indicesAdmin().prepareForceMerge(indexName).setMaxNumSegments(1).get())\n+ );\n+ forceMergeThread.start();\n+ assertBusy(() -> {\n+ // merge executor says merging is blocked due to insufficient disk space while there is a single merge task enqueued\n+ assertThat(threadPoolMergeExecutorService.getMergeTasksQueueLength(), equalTo(1));\n+ assertTrue(threadPoolMergeExecutorService.isMergingBlockedDueToInsufficientDiskSpace());\n+ // telemetry says that there are indeed some segments enqueued to be merged\n+ testTelemetryPlugin.collect();\n+ assertThat(\n+ testTelemetryPlugin.getLongGaugeMeasurement(MergeMetrics.MERGE_SEGMENTS_QUEUED_USAGE).getLast().getLong(),\n+ greaterThan(0L)\n+ );\n+ // but still no merges are currently running\n+ assertThat(\n+ testTelemetryPlugin.getLongGaugeMeasurement(MergeMetrics.MERGE_SEGMENTS_RUNNING_USAGE).getLast().getLong(),\n+ equalTo(0L)\n+ );\n+ // indices stats also says that no merge is currently running (blocked merges are NOT considered as \"running\")\n+ IndicesStatsResponse indicesStatsResponse = client().admin().indices().prepareStats(indexName).setMerge(true).get();\n+ long currentMergeCount = indicesStatsResponse.getIndices().get(indexName).getPrimaries().merge.getCurrent();\n+ assertThat(currentMergeCount, equalTo(0L));\n+ });\n+ // the force merge call is still blocked\n+ assertTrue(forceMergeThread.isAlive());\n+ // merge executor still confirms merging is blocked due to insufficient disk space\n+ assertTrue(threadPoolMergeExecutorService.isMergingBlockedDueToInsufficientDiskSpace());\n+ // make disk space available in order to unblock the merge\n+ if (randomBoolean()) {\n+ setTotalSpace(node, Long.MAX_VALUE);\n+ } else {\n+ updateClusterSettings(\n+ Settings.builder().put(ThreadPoolMergeExecutorService.INDICES_MERGE_DISK_HIGH_WATERMARK_SETTING.getKey(), \"0b\")\n+ );\n+ }\n+ // assert that all the merges are now done and that the force-merge call returns\n+ assertBusy(() -> {\n+ IndicesStatsResponse indicesStatsResponse = client().admin().indices().prepareStats(indexName).setMerge(true).get();\n+ long currentMergeCount = indicesStatsResponse.getIndices().get(indexName).getPrimaries().merge.getCurrent();\n+ // NO merging is in progress\n+ assertThat(currentMergeCount, equalTo(0L));\n+ long totalMergeCount = indicesStatsResponse.getIndices().get(indexName).getPrimaries().merge.getTotal();\n+ assertThat(totalMergeCount, greaterThan(0L));\n+ // force merge call returned\n+ assertFalse(forceMergeThread.isAlive());", + "comment_created_at": "2025-06-29T05:00:58+00:00", + "comment_author": "albertzaharovits", + "comment_body": "Great suggestion too, thanks! (also handled as part of https://github.com/elastic/elasticsearch/pull/130189/commits/3452dcea85c813a38e83bdbc17af67b610e38bfd)", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1559824642", + "pr_number": 107188, + "pr_file": "x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportDeleteTrainedModelAction.java", + "created_at": "2024-04-10T17:19:05+00:00", + "commented_code": "}\n }\n\n protected boolean modelExists(String modelId) {", + "repo_full_name": "elastic/elasticsearch", + "discussion_comments": [ + { + "comment_id": "1559824642", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 107188, + "pr_file": "x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportDeleteTrainedModelAction.java", + "discussion_id": "1559824642", + "commented_code": "@@ -228,6 +244,39 @@ private void deleteModel(DeleteTrainedModelAction.Request request, ClusterState\n }\n }\n \n+ protected boolean modelExists(String modelId) {", + "comment_created_at": "2024-04-10T17:19:05+00:00", + "comment_author": "davidkyle", + "comment_body": "You can avoid the countdown latch and hence blocking the calling thread by using a listener. \r\n\r\nYou don't have to timeout the call to `trainedModelProvider.getTrainedModel()` if it does timeout simply out let the error propagate from the call. \r\n\r\n```\r\n private void modelExists(String modelId, ActionListener listener) {\r\n trainedModelProvider.getTrainedModel(modelId, GetTrainedModelsAction.Includes.empty(), null,\r\n ActionListener.wrap(\r\n model -> listener.onResponse(Boolean.TRUE),\r\n exception -> {\r\n if (ExceptionsHelper.unwrapCause(exception) instanceof ResourceNotFoundException) {\r\n listener.onResponse(Boolean.FALSE);\r\n } else {\r\n listener.onFailure(exception);\r\n }\r\n }\r\n )\r\n );\r\n }\r\n```", + "pr_file_module": null + }, + { + "comment_id": "1561275004", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 107188, + "pr_file": "x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportDeleteTrainedModelAction.java", + "discussion_id": "1559824642", + "commented_code": "@@ -228,6 +244,39 @@ private void deleteModel(DeleteTrainedModelAction.Request request, ClusterState\n }\n }\n \n+ protected boolean modelExists(String modelId) {", + "comment_created_at": "2024-04-11T16:04:54+00:00", + "comment_author": "davidkyle", + "comment_body": "After you make the `modelExists()` function async with a listener you will need to chain the various processing steps together. The best way to do this is use a [SubscribableListener](https://github.com/elastic/elasticsearch/blob/main/server/src/main/java/org/elasticsearch/action/support/SubscribableListener.java) \r\n\r\nHere's an example if it being used:\r\nhttps://github.com/elastic/elasticsearch/blob/main/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportGetTrainedModelsStatsAction.java#L128", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1559831450", + "pr_number": 107188, + "pr_file": "x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportDeleteTrainedModelAction.java", + "created_at": "2024-04-10T17:25:50+00:00", + "commented_code": "}\n }\n\n protected boolean modelExists(String modelId) {\n CountDownLatch latch = new CountDownLatch(1);\n final AtomicBoolean modelExists = new AtomicBoolean(false);\n\n ActionListener trainedModelListener = new ActionListener<>() {\n @Override\n public void onResponse(TrainedModelConfig config) {\n modelExists.set(true);\n latch.countDown();\n }\n\n @Override\n public void onFailure(Exception e) {\n logger.error(\"Failed to retrieve model {}: {}\", modelId, e.getMessage(), e);\n latch.countDown();\n }\n };\n\n trainedModelProvider.getTrainedModel(modelId, GetTrainedModelsAction.Includes.empty(), null, trainedModelListener);\n\n try {\n boolean latchReached = latch.await(5, TimeUnit.SECONDS);\n\n if (latchReached == false) {\n throw new ElasticsearchException(\"Timeout while waiting for trained model to be retrieved\");\n }\n } catch (InterruptedException e) {\n throw new ElasticsearchException(\"Unexpected exception\", e);", + "repo_full_name": "elastic/elasticsearch", + "discussion_comments": [ + { + "comment_id": "1559831450", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 107188, + "pr_file": "x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportDeleteTrainedModelAction.java", + "discussion_id": "1559831450", + "commented_code": "@@ -228,6 +244,39 @@ private void deleteModel(DeleteTrainedModelAction.Request request, ClusterState\n }\n }\n \n+ protected boolean modelExists(String modelId) {\n+ CountDownLatch latch = new CountDownLatch(1);\n+ final AtomicBoolean modelExists = new AtomicBoolean(false);\n+\n+ ActionListener trainedModelListener = new ActionListener<>() {\n+ @Override\n+ public void onResponse(TrainedModelConfig config) {\n+ modelExists.set(true);\n+ latch.countDown();\n+ }\n+\n+ @Override\n+ public void onFailure(Exception e) {\n+ logger.error(\"Failed to retrieve model {}: {}\", modelId, e.getMessage(), e);\n+ latch.countDown();\n+ }\n+ };\n+\n+ trainedModelProvider.getTrainedModel(modelId, GetTrainedModelsAction.Includes.empty(), null, trainedModelListener);\n+\n+ try {\n+ boolean latchReached = latch.await(5, TimeUnit.SECONDS);\n+\n+ if (latchReached == false) {\n+ throw new ElasticsearchException(\"Timeout while waiting for trained model to be retrieved\");\n+ }\n+ } catch (InterruptedException e) {\n+ throw new ElasticsearchException(\"Unexpected exception\", e);", + "comment_created_at": "2024-04-10T17:25:50+00:00", + "comment_author": "davidkyle", + "comment_body": "This code is not necessary if you take my suggestion but in Java it's best practice in to reset the interrupt flag with `Thread.currentThread().interrupt();`", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1559910187", + "pr_number": 107188, + "pr_file": "x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportDeleteTrainedModelAction.java", + "created_at": "2024-04-10T18:42:46+00:00", + "commented_code": "}\n }\n\n protected boolean modelExists(String modelId) {\n CountDownLatch latch = new CountDownLatch(1);\n final AtomicBoolean modelExists = new AtomicBoolean(false);\n\n ActionListener trainedModelListener = new ActionListener<>() {\n @Override\n public void onResponse(TrainedModelConfig config) {\n modelExists.set(true);\n latch.countDown();\n }\n\n @Override\n public void onFailure(Exception e) {\n logger.error(\"Failed to retrieve model {}: {}\", modelId, e.getMessage(), e);\n latch.countDown();\n }\n };\n\n trainedModelProvider.getTrainedModel(modelId, GetTrainedModelsAction.Includes.empty(), null, trainedModelListener);", + "repo_full_name": "elastic/elasticsearch", + "discussion_comments": [ + { + "comment_id": "1559910187", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 107188, + "pr_file": "x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportDeleteTrainedModelAction.java", + "discussion_id": "1559910187", + "commented_code": "@@ -228,6 +244,39 @@ private void deleteModel(DeleteTrainedModelAction.Request request, ClusterState\n }\n }\n \n+ protected boolean modelExists(String modelId) {\n+ CountDownLatch latch = new CountDownLatch(1);\n+ final AtomicBoolean modelExists = new AtomicBoolean(false);\n+\n+ ActionListener trainedModelListener = new ActionListener<>() {\n+ @Override\n+ public void onResponse(TrainedModelConfig config) {\n+ modelExists.set(true);\n+ latch.countDown();\n+ }\n+\n+ @Override\n+ public void onFailure(Exception e) {\n+ logger.error(\"Failed to retrieve model {}: {}\", modelId, e.getMessage(), e);\n+ latch.countDown();\n+ }\n+ };\n+\n+ trainedModelProvider.getTrainedModel(modelId, GetTrainedModelsAction.Includes.empty(), null, trainedModelListener);", + "comment_created_at": "2024-04-10T18:42:46+00:00", + "comment_author": "maxhniebergall", + "comment_body": "If we don't pass a parent task to this request, it wont be cancelable. I think it would be better to pass in the task from the masterOperation here. ", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1559919796", + "pr_number": 107188, + "pr_file": "x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportDeleteTrainedModelAction.java", + "created_at": "2024-04-10T18:52:49+00:00", + "commented_code": "}\n }\n\n protected boolean modelExists(String modelId) {\n CountDownLatch latch = new CountDownLatch(1);\n final AtomicBoolean modelExists = new AtomicBoolean(false);", + "repo_full_name": "elastic/elasticsearch", + "discussion_comments": [ + { + "comment_id": "1559919796", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 107188, + "pr_file": "x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportDeleteTrainedModelAction.java", + "discussion_id": "1559919796", + "commented_code": "@@ -228,6 +244,39 @@ private void deleteModel(DeleteTrainedModelAction.Request request, ClusterState\n }\n }\n \n+ protected boolean modelExists(String modelId) {\n+ CountDownLatch latch = new CountDownLatch(1);\n+ final AtomicBoolean modelExists = new AtomicBoolean(false);\n+", + "comment_created_at": "2024-04-10T18:52:49+00:00", + "comment_author": "maxhniebergall", + "comment_body": "To avoid using a latch and requiring a timeout, I think we could replace this function with an actionListener. What do you think? ", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/elasticsearch-prefer-callbacks-over-blocking.md b/_reviewers/elasticsearch-prefer-callbacks-over-blocking.md index 676e930..7afa2d5 100644 --- a/_reviewers/elasticsearch-prefer-callbacks-over-blocking.md +++ b/_reviewers/elasticsearch-prefer-callbacks-over-blocking.md @@ -49,175 +49,3 @@ void getResourceAsync(String id, ActionListener listener) { ``` Similarly, for testing asynchronous code, avoid spawning threads and waiting with Thread.join(). Instead, use futures or test-friendly abstractions that allow controlling when callbacks are triggered. - - -[ - { - "discussion_id": "2171945967", - "pr_number": 130189, - "pr_file": "server/src/internalClusterTest/java/org/elasticsearch/index/engine/MergeWithLowDiskSpaceIT.java", - "created_at": "2025-06-27T12:32:35+00:00", - "commented_code": "});\n }\n\n public void testForceMergeIsBlockedThenUnblocked() throws Exception {\n String node = internalCluster().startNode();\n ensureStableCluster(1);\n setTotalSpace(node, Long.MAX_VALUE);\n ThreadPoolMergeExecutorService threadPoolMergeExecutorService = internalCluster().getInstance(IndicesService.class, node)\n .getThreadPoolMergeExecutorService();\n TestTelemetryPlugin testTelemetryPlugin = getTelemetryPlugin(node);\n // create some index\n final String indexName = randomAlphaOfLength(10).toLowerCase(Locale.ROOT);\n createIndex(\n indexName,\n Settings.builder().put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0).put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1).build()\n );\n // get current disk space usage (for all indices on the node)\n IndicesStatsResponse stats = indicesAdmin().prepareStats().clear().setStore(true).get();\n long usedDiskSpaceAfterIndexing = stats.getTotal().getStore().sizeInBytes();\n // restrict the total disk space such that the next merge does not have sufficient disk space\n long insufficientTotalDiskSpace = usedDiskSpaceAfterIndexing + MERGE_DISK_HIGH_WATERMARK_BYTES - randomLongBetween(1L, 10L);\n setTotalSpace(node, insufficientTotalDiskSpace);\n // node stats' FS stats should report that there is insufficient disk space available\n assertBusy(() -> {\n NodesStatsResponse nodesStatsResponse = client().admin().cluster().prepareNodesStats().setFs(true).get();\n assertThat(nodesStatsResponse.getNodes().size(), equalTo(1));\n NodeStats nodeStats = nodesStatsResponse.getNodes().get(0);\n assertThat(nodeStats.getFs().getTotal().getTotal().getBytes(), equalTo(insufficientTotalDiskSpace));\n assertThat(nodeStats.getFs().getTotal().getAvailable().getBytes(), lessThan(MERGE_DISK_HIGH_WATERMARK_BYTES));\n });\n int indexingRounds = randomIntBetween(2, 5);\n while (indexingRounds-- > 0) {\n indexRandom(\n true,\n true,\n true,\n false,\n IntStream.range(1, randomIntBetween(2, 5))\n .mapToObj(i -> prepareIndex(indexName).setSource(\"field\", randomAlphaOfLength(50)))\n .toList()\n );\n }\n // start force merging (which is blocking) on a separate thread\n Thread forceMergeThread = new Thread(", - "repo_full_name": "elastic/elasticsearch", - "discussion_comments": [ - { - "comment_id": "2171945967", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 130189, - "pr_file": "server/src/internalClusterTest/java/org/elasticsearch/index/engine/MergeWithLowDiskSpaceIT.java", - "discussion_id": "2171945967", - "commented_code": "@@ -155,8 +170,125 @@ public void testShardCloseWhenDiskSpaceInsufficient() throws Exception {\n });\n }\n \n+ public void testForceMergeIsBlockedThenUnblocked() throws Exception {\n+ String node = internalCluster().startNode();\n+ ensureStableCluster(1);\n+ setTotalSpace(node, Long.MAX_VALUE);\n+ ThreadPoolMergeExecutorService threadPoolMergeExecutorService = internalCluster().getInstance(IndicesService.class, node)\n+ .getThreadPoolMergeExecutorService();\n+ TestTelemetryPlugin testTelemetryPlugin = getTelemetryPlugin(node);\n+ // create some index\n+ final String indexName = randomAlphaOfLength(10).toLowerCase(Locale.ROOT);\n+ createIndex(\n+ indexName,\n+ Settings.builder().put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0).put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1).build()\n+ );\n+ // get current disk space usage (for all indices on the node)\n+ IndicesStatsResponse stats = indicesAdmin().prepareStats().clear().setStore(true).get();\n+ long usedDiskSpaceAfterIndexing = stats.getTotal().getStore().sizeInBytes();\n+ // restrict the total disk space such that the next merge does not have sufficient disk space\n+ long insufficientTotalDiskSpace = usedDiskSpaceAfterIndexing + MERGE_DISK_HIGH_WATERMARK_BYTES - randomLongBetween(1L, 10L);\n+ setTotalSpace(node, insufficientTotalDiskSpace);\n+ // node stats' FS stats should report that there is insufficient disk space available\n+ assertBusy(() -> {\n+ NodesStatsResponse nodesStatsResponse = client().admin().cluster().prepareNodesStats().setFs(true).get();\n+ assertThat(nodesStatsResponse.getNodes().size(), equalTo(1));\n+ NodeStats nodeStats = nodesStatsResponse.getNodes().get(0);\n+ assertThat(nodeStats.getFs().getTotal().getTotal().getBytes(), equalTo(insufficientTotalDiskSpace));\n+ assertThat(nodeStats.getFs().getTotal().getAvailable().getBytes(), lessThan(MERGE_DISK_HIGH_WATERMARK_BYTES));\n+ });\n+ int indexingRounds = randomIntBetween(2, 5);\n+ while (indexingRounds-- > 0) {\n+ indexRandom(\n+ true,\n+ true,\n+ true,\n+ false,\n+ IntStream.range(1, randomIntBetween(2, 5))\n+ .mapToObj(i -> prepareIndex(indexName).setSource(\"field\", randomAlphaOfLength(50)))\n+ .toList()\n+ );\n+ }\n+ // start force merging (which is blocking) on a separate thread\n+ Thread forceMergeThread = new Thread(", - "comment_created_at": "2025-06-27T12:32:35+00:00", - "comment_author": "henningandersen", - "comment_body": "Could we avoid the `.get()` call and use `execute()` instead to avoid the thread? Assertions further down could then be about whether the future is done or not.", - "pr_file_module": null - }, - { - "comment_id": "2173598204", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 130189, - "pr_file": "server/src/internalClusterTest/java/org/elasticsearch/index/engine/MergeWithLowDiskSpaceIT.java", - "discussion_id": "2171945967", - "commented_code": "@@ -155,8 +170,125 @@ public void testShardCloseWhenDiskSpaceInsufficient() throws Exception {\n });\n }\n \n+ public void testForceMergeIsBlockedThenUnblocked() throws Exception {\n+ String node = internalCluster().startNode();\n+ ensureStableCluster(1);\n+ setTotalSpace(node, Long.MAX_VALUE);\n+ ThreadPoolMergeExecutorService threadPoolMergeExecutorService = internalCluster().getInstance(IndicesService.class, node)\n+ .getThreadPoolMergeExecutorService();\n+ TestTelemetryPlugin testTelemetryPlugin = getTelemetryPlugin(node);\n+ // create some index\n+ final String indexName = randomAlphaOfLength(10).toLowerCase(Locale.ROOT);\n+ createIndex(\n+ indexName,\n+ Settings.builder().put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0).put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1).build()\n+ );\n+ // get current disk space usage (for all indices on the node)\n+ IndicesStatsResponse stats = indicesAdmin().prepareStats().clear().setStore(true).get();\n+ long usedDiskSpaceAfterIndexing = stats.getTotal().getStore().sizeInBytes();\n+ // restrict the total disk space such that the next merge does not have sufficient disk space\n+ long insufficientTotalDiskSpace = usedDiskSpaceAfterIndexing + MERGE_DISK_HIGH_WATERMARK_BYTES - randomLongBetween(1L, 10L);\n+ setTotalSpace(node, insufficientTotalDiskSpace);\n+ // node stats' FS stats should report that there is insufficient disk space available\n+ assertBusy(() -> {\n+ NodesStatsResponse nodesStatsResponse = client().admin().cluster().prepareNodesStats().setFs(true).get();\n+ assertThat(nodesStatsResponse.getNodes().size(), equalTo(1));\n+ NodeStats nodeStats = nodesStatsResponse.getNodes().get(0);\n+ assertThat(nodeStats.getFs().getTotal().getTotal().getBytes(), equalTo(insufficientTotalDiskSpace));\n+ assertThat(nodeStats.getFs().getTotal().getAvailable().getBytes(), lessThan(MERGE_DISK_HIGH_WATERMARK_BYTES));\n+ });\n+ int indexingRounds = randomIntBetween(2, 5);\n+ while (indexingRounds-- > 0) {\n+ indexRandom(\n+ true,\n+ true,\n+ true,\n+ false,\n+ IntStream.range(1, randomIntBetween(2, 5))\n+ .mapToObj(i -> prepareIndex(indexName).setSource(\"field\", randomAlphaOfLength(50)))\n+ .toList()\n+ );\n+ }\n+ // start force merging (which is blocking) on a separate thread\n+ Thread forceMergeThread = new Thread(", - "comment_created_at": "2025-06-29T04:59:23+00:00", - "comment_author": "albertzaharovits", - "comment_body": "Of course, thanks for the suggestion! (I've handled it as part of https://github.com/elastic/elasticsearch/pull/130189/commits/3452dcea85c813a38e83bdbc17af67b610e38bfd)", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2171949919", - "pr_number": 130189, - "pr_file": "server/src/internalClusterTest/java/org/elasticsearch/index/engine/MergeWithLowDiskSpaceIT.java", - "created_at": "2025-06-27T12:33:57+00:00", - "commented_code": "});\n }\n\n public void testForceMergeIsBlockedThenUnblocked() throws Exception {\n String node = internalCluster().startNode();\n ensureStableCluster(1);\n setTotalSpace(node, Long.MAX_VALUE);\n ThreadPoolMergeExecutorService threadPoolMergeExecutorService = internalCluster().getInstance(IndicesService.class, node)\n .getThreadPoolMergeExecutorService();\n TestTelemetryPlugin testTelemetryPlugin = getTelemetryPlugin(node);\n // create some index\n final String indexName = randomAlphaOfLength(10).toLowerCase(Locale.ROOT);\n createIndex(\n indexName,\n Settings.builder().put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0).put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1).build()\n );\n // get current disk space usage (for all indices on the node)\n IndicesStatsResponse stats = indicesAdmin().prepareStats().clear().setStore(true).get();\n long usedDiskSpaceAfterIndexing = stats.getTotal().getStore().sizeInBytes();\n // restrict the total disk space such that the next merge does not have sufficient disk space\n long insufficientTotalDiskSpace = usedDiskSpaceAfterIndexing + MERGE_DISK_HIGH_WATERMARK_BYTES - randomLongBetween(1L, 10L);\n setTotalSpace(node, insufficientTotalDiskSpace);\n // node stats' FS stats should report that there is insufficient disk space available\n assertBusy(() -> {\n NodesStatsResponse nodesStatsResponse = client().admin().cluster().prepareNodesStats().setFs(true).get();\n assertThat(nodesStatsResponse.getNodes().size(), equalTo(1));\n NodeStats nodeStats = nodesStatsResponse.getNodes().get(0);\n assertThat(nodeStats.getFs().getTotal().getTotal().getBytes(), equalTo(insufficientTotalDiskSpace));\n assertThat(nodeStats.getFs().getTotal().getAvailable().getBytes(), lessThan(MERGE_DISK_HIGH_WATERMARK_BYTES));\n });\n int indexingRounds = randomIntBetween(2, 5);\n while (indexingRounds-- > 0) {\n indexRandom(\n true,\n true,\n true,\n false,\n IntStream.range(1, randomIntBetween(2, 5))\n .mapToObj(i -> prepareIndex(indexName).setSource(\"field\", randomAlphaOfLength(50)))\n .toList()\n );\n }\n // start force merging (which is blocking) on a separate thread\n Thread forceMergeThread = new Thread(\n // the max segments argument makes it a blocking call\n () -> assertNoFailures(indicesAdmin().prepareForceMerge(indexName).setMaxNumSegments(1).get())\n );\n forceMergeThread.start();\n assertBusy(() -> {\n // merge executor says merging is blocked due to insufficient disk space while there is a single merge task enqueued\n assertThat(threadPoolMergeExecutorService.getMergeTasksQueueLength(), equalTo(1));\n assertTrue(threadPoolMergeExecutorService.isMergingBlockedDueToInsufficientDiskSpace());\n // telemetry says that there are indeed some segments enqueued to be merged\n testTelemetryPlugin.collect();\n assertThat(\n testTelemetryPlugin.getLongGaugeMeasurement(MergeMetrics.MERGE_SEGMENTS_QUEUED_USAGE).getLast().getLong(),\n greaterThan(0L)\n );\n // but still no merges are currently running\n assertThat(\n testTelemetryPlugin.getLongGaugeMeasurement(MergeMetrics.MERGE_SEGMENTS_RUNNING_USAGE).getLast().getLong(),\n equalTo(0L)\n );\n // indices stats also says that no merge is currently running (blocked merges are NOT considered as \"running\")\n IndicesStatsResponse indicesStatsResponse = client().admin().indices().prepareStats(indexName).setMerge(true).get();\n long currentMergeCount = indicesStatsResponse.getIndices().get(indexName).getPrimaries().merge.getCurrent();\n assertThat(currentMergeCount, equalTo(0L));\n });\n // the force merge call is still blocked\n assertTrue(forceMergeThread.isAlive());\n // merge executor still confirms merging is blocked due to insufficient disk space\n assertTrue(threadPoolMergeExecutorService.isMergingBlockedDueToInsufficientDiskSpace());\n // make disk space available in order to unblock the merge\n if (randomBoolean()) {\n setTotalSpace(node, Long.MAX_VALUE);\n } else {\n updateClusterSettings(\n Settings.builder().put(ThreadPoolMergeExecutorService.INDICES_MERGE_DISK_HIGH_WATERMARK_SETTING.getKey(), \"0b\")\n );\n }\n // assert that all the merges are now done and that the force-merge call returns\n assertBusy(() -> {\n IndicesStatsResponse indicesStatsResponse = client().admin().indices().prepareStats(indexName).setMerge(true).get();\n long currentMergeCount = indicesStatsResponse.getIndices().get(indexName).getPrimaries().merge.getCurrent();\n // NO merging is in progress\n assertThat(currentMergeCount, equalTo(0L));\n long totalMergeCount = indicesStatsResponse.getIndices().get(indexName).getPrimaries().merge.getTotal();\n assertThat(totalMergeCount, greaterThan(0L));\n // force merge call returned\n assertFalse(forceMergeThread.isAlive());", - "repo_full_name": "elastic/elasticsearch", - "discussion_comments": [ - { - "comment_id": "2171949919", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 130189, - "pr_file": "server/src/internalClusterTest/java/org/elasticsearch/index/engine/MergeWithLowDiskSpaceIT.java", - "discussion_id": "2171949919", - "commented_code": "@@ -155,8 +170,125 @@ public void testShardCloseWhenDiskSpaceInsufficient() throws Exception {\n });\n }\n \n+ public void testForceMergeIsBlockedThenUnblocked() throws Exception {\n+ String node = internalCluster().startNode();\n+ ensureStableCluster(1);\n+ setTotalSpace(node, Long.MAX_VALUE);\n+ ThreadPoolMergeExecutorService threadPoolMergeExecutorService = internalCluster().getInstance(IndicesService.class, node)\n+ .getThreadPoolMergeExecutorService();\n+ TestTelemetryPlugin testTelemetryPlugin = getTelemetryPlugin(node);\n+ // create some index\n+ final String indexName = randomAlphaOfLength(10).toLowerCase(Locale.ROOT);\n+ createIndex(\n+ indexName,\n+ Settings.builder().put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0).put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1).build()\n+ );\n+ // get current disk space usage (for all indices on the node)\n+ IndicesStatsResponse stats = indicesAdmin().prepareStats().clear().setStore(true).get();\n+ long usedDiskSpaceAfterIndexing = stats.getTotal().getStore().sizeInBytes();\n+ // restrict the total disk space such that the next merge does not have sufficient disk space\n+ long insufficientTotalDiskSpace = usedDiskSpaceAfterIndexing + MERGE_DISK_HIGH_WATERMARK_BYTES - randomLongBetween(1L, 10L);\n+ setTotalSpace(node, insufficientTotalDiskSpace);\n+ // node stats' FS stats should report that there is insufficient disk space available\n+ assertBusy(() -> {\n+ NodesStatsResponse nodesStatsResponse = client().admin().cluster().prepareNodesStats().setFs(true).get();\n+ assertThat(nodesStatsResponse.getNodes().size(), equalTo(1));\n+ NodeStats nodeStats = nodesStatsResponse.getNodes().get(0);\n+ assertThat(nodeStats.getFs().getTotal().getTotal().getBytes(), equalTo(insufficientTotalDiskSpace));\n+ assertThat(nodeStats.getFs().getTotal().getAvailable().getBytes(), lessThan(MERGE_DISK_HIGH_WATERMARK_BYTES));\n+ });\n+ int indexingRounds = randomIntBetween(2, 5);\n+ while (indexingRounds-- > 0) {\n+ indexRandom(\n+ true,\n+ true,\n+ true,\n+ false,\n+ IntStream.range(1, randomIntBetween(2, 5))\n+ .mapToObj(i -> prepareIndex(indexName).setSource(\"field\", randomAlphaOfLength(50)))\n+ .toList()\n+ );\n+ }\n+ // start force merging (which is blocking) on a separate thread\n+ Thread forceMergeThread = new Thread(\n+ // the max segments argument makes it a blocking call\n+ () -> assertNoFailures(indicesAdmin().prepareForceMerge(indexName).setMaxNumSegments(1).get())\n+ );\n+ forceMergeThread.start();\n+ assertBusy(() -> {\n+ // merge executor says merging is blocked due to insufficient disk space while there is a single merge task enqueued\n+ assertThat(threadPoolMergeExecutorService.getMergeTasksQueueLength(), equalTo(1));\n+ assertTrue(threadPoolMergeExecutorService.isMergingBlockedDueToInsufficientDiskSpace());\n+ // telemetry says that there are indeed some segments enqueued to be merged\n+ testTelemetryPlugin.collect();\n+ assertThat(\n+ testTelemetryPlugin.getLongGaugeMeasurement(MergeMetrics.MERGE_SEGMENTS_QUEUED_USAGE).getLast().getLong(),\n+ greaterThan(0L)\n+ );\n+ // but still no merges are currently running\n+ assertThat(\n+ testTelemetryPlugin.getLongGaugeMeasurement(MergeMetrics.MERGE_SEGMENTS_RUNNING_USAGE).getLast().getLong(),\n+ equalTo(0L)\n+ );\n+ // indices stats also says that no merge is currently running (blocked merges are NOT considered as \"running\")\n+ IndicesStatsResponse indicesStatsResponse = client().admin().indices().prepareStats(indexName).setMerge(true).get();\n+ long currentMergeCount = indicesStatsResponse.getIndices().get(indexName).getPrimaries().merge.getCurrent();\n+ assertThat(currentMergeCount, equalTo(0L));\n+ });\n+ // the force merge call is still blocked\n+ assertTrue(forceMergeThread.isAlive());\n+ // merge executor still confirms merging is blocked due to insufficient disk space\n+ assertTrue(threadPoolMergeExecutorService.isMergingBlockedDueToInsufficientDiskSpace());\n+ // make disk space available in order to unblock the merge\n+ if (randomBoolean()) {\n+ setTotalSpace(node, Long.MAX_VALUE);\n+ } else {\n+ updateClusterSettings(\n+ Settings.builder().put(ThreadPoolMergeExecutorService.INDICES_MERGE_DISK_HIGH_WATERMARK_SETTING.getKey(), \"0b\")\n+ );\n+ }\n+ // assert that all the merges are now done and that the force-merge call returns\n+ assertBusy(() -> {\n+ IndicesStatsResponse indicesStatsResponse = client().admin().indices().prepareStats(indexName).setMerge(true).get();\n+ long currentMergeCount = indicesStatsResponse.getIndices().get(indexName).getPrimaries().merge.getCurrent();\n+ // NO merging is in progress\n+ assertThat(currentMergeCount, equalTo(0L));\n+ long totalMergeCount = indicesStatsResponse.getIndices().get(indexName).getPrimaries().merge.getTotal();\n+ assertThat(totalMergeCount, greaterThan(0L));\n+ // force merge call returned\n+ assertFalse(forceMergeThread.isAlive());", - "comment_created_at": "2025-06-27T12:33:57+00:00", - "comment_author": "henningandersen", - "comment_body": "Could we instead wait for the force merge to complete first? If we go for a future, we can use `future.get()` here (or safeGet). If not, we can use `forcemergeThread.join`. That way I think we can avoid the `assertBusy`, since all the things here should hopefully be up to date.", - "pr_file_module": null - }, - { - "comment_id": "2173598419", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 130189, - "pr_file": "server/src/internalClusterTest/java/org/elasticsearch/index/engine/MergeWithLowDiskSpaceIT.java", - "discussion_id": "2171949919", - "commented_code": "@@ -155,8 +170,125 @@ public void testShardCloseWhenDiskSpaceInsufficient() throws Exception {\n });\n }\n \n+ public void testForceMergeIsBlockedThenUnblocked() throws Exception {\n+ String node = internalCluster().startNode();\n+ ensureStableCluster(1);\n+ setTotalSpace(node, Long.MAX_VALUE);\n+ ThreadPoolMergeExecutorService threadPoolMergeExecutorService = internalCluster().getInstance(IndicesService.class, node)\n+ .getThreadPoolMergeExecutorService();\n+ TestTelemetryPlugin testTelemetryPlugin = getTelemetryPlugin(node);\n+ // create some index\n+ final String indexName = randomAlphaOfLength(10).toLowerCase(Locale.ROOT);\n+ createIndex(\n+ indexName,\n+ Settings.builder().put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0).put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1).build()\n+ );\n+ // get current disk space usage (for all indices on the node)\n+ IndicesStatsResponse stats = indicesAdmin().prepareStats().clear().setStore(true).get();\n+ long usedDiskSpaceAfterIndexing = stats.getTotal().getStore().sizeInBytes();\n+ // restrict the total disk space such that the next merge does not have sufficient disk space\n+ long insufficientTotalDiskSpace = usedDiskSpaceAfterIndexing + MERGE_DISK_HIGH_WATERMARK_BYTES - randomLongBetween(1L, 10L);\n+ setTotalSpace(node, insufficientTotalDiskSpace);\n+ // node stats' FS stats should report that there is insufficient disk space available\n+ assertBusy(() -> {\n+ NodesStatsResponse nodesStatsResponse = client().admin().cluster().prepareNodesStats().setFs(true).get();\n+ assertThat(nodesStatsResponse.getNodes().size(), equalTo(1));\n+ NodeStats nodeStats = nodesStatsResponse.getNodes().get(0);\n+ assertThat(nodeStats.getFs().getTotal().getTotal().getBytes(), equalTo(insufficientTotalDiskSpace));\n+ assertThat(nodeStats.getFs().getTotal().getAvailable().getBytes(), lessThan(MERGE_DISK_HIGH_WATERMARK_BYTES));\n+ });\n+ int indexingRounds = randomIntBetween(2, 5);\n+ while (indexingRounds-- > 0) {\n+ indexRandom(\n+ true,\n+ true,\n+ true,\n+ false,\n+ IntStream.range(1, randomIntBetween(2, 5))\n+ .mapToObj(i -> prepareIndex(indexName).setSource(\"field\", randomAlphaOfLength(50)))\n+ .toList()\n+ );\n+ }\n+ // start force merging (which is blocking) on a separate thread\n+ Thread forceMergeThread = new Thread(\n+ // the max segments argument makes it a blocking call\n+ () -> assertNoFailures(indicesAdmin().prepareForceMerge(indexName).setMaxNumSegments(1).get())\n+ );\n+ forceMergeThread.start();\n+ assertBusy(() -> {\n+ // merge executor says merging is blocked due to insufficient disk space while there is a single merge task enqueued\n+ assertThat(threadPoolMergeExecutorService.getMergeTasksQueueLength(), equalTo(1));\n+ assertTrue(threadPoolMergeExecutorService.isMergingBlockedDueToInsufficientDiskSpace());\n+ // telemetry says that there are indeed some segments enqueued to be merged\n+ testTelemetryPlugin.collect();\n+ assertThat(\n+ testTelemetryPlugin.getLongGaugeMeasurement(MergeMetrics.MERGE_SEGMENTS_QUEUED_USAGE).getLast().getLong(),\n+ greaterThan(0L)\n+ );\n+ // but still no merges are currently running\n+ assertThat(\n+ testTelemetryPlugin.getLongGaugeMeasurement(MergeMetrics.MERGE_SEGMENTS_RUNNING_USAGE).getLast().getLong(),\n+ equalTo(0L)\n+ );\n+ // indices stats also says that no merge is currently running (blocked merges are NOT considered as \"running\")\n+ IndicesStatsResponse indicesStatsResponse = client().admin().indices().prepareStats(indexName).setMerge(true).get();\n+ long currentMergeCount = indicesStatsResponse.getIndices().get(indexName).getPrimaries().merge.getCurrent();\n+ assertThat(currentMergeCount, equalTo(0L));\n+ });\n+ // the force merge call is still blocked\n+ assertTrue(forceMergeThread.isAlive());\n+ // merge executor still confirms merging is blocked due to insufficient disk space\n+ assertTrue(threadPoolMergeExecutorService.isMergingBlockedDueToInsufficientDiskSpace());\n+ // make disk space available in order to unblock the merge\n+ if (randomBoolean()) {\n+ setTotalSpace(node, Long.MAX_VALUE);\n+ } else {\n+ updateClusterSettings(\n+ Settings.builder().put(ThreadPoolMergeExecutorService.INDICES_MERGE_DISK_HIGH_WATERMARK_SETTING.getKey(), \"0b\")\n+ );\n+ }\n+ // assert that all the merges are now done and that the force-merge call returns\n+ assertBusy(() -> {\n+ IndicesStatsResponse indicesStatsResponse = client().admin().indices().prepareStats(indexName).setMerge(true).get();\n+ long currentMergeCount = indicesStatsResponse.getIndices().get(indexName).getPrimaries().merge.getCurrent();\n+ // NO merging is in progress\n+ assertThat(currentMergeCount, equalTo(0L));\n+ long totalMergeCount = indicesStatsResponse.getIndices().get(indexName).getPrimaries().merge.getTotal();\n+ assertThat(totalMergeCount, greaterThan(0L));\n+ // force merge call returned\n+ assertFalse(forceMergeThread.isAlive());", - "comment_created_at": "2025-06-29T05:00:58+00:00", - "comment_author": "albertzaharovits", - "comment_body": "Great suggestion too, thanks! (also handled as part of https://github.com/elastic/elasticsearch/pull/130189/commits/3452dcea85c813a38e83bdbc17af67b610e38bfd)", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1559824642", - "pr_number": 107188, - "pr_file": "x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportDeleteTrainedModelAction.java", - "created_at": "2024-04-10T17:19:05+00:00", - "commented_code": "}\n }\n\n protected boolean modelExists(String modelId) {", - "repo_full_name": "elastic/elasticsearch", - "discussion_comments": [ - { - "comment_id": "1559824642", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 107188, - "pr_file": "x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportDeleteTrainedModelAction.java", - "discussion_id": "1559824642", - "commented_code": "@@ -228,6 +244,39 @@ private void deleteModel(DeleteTrainedModelAction.Request request, ClusterState\n }\n }\n \n+ protected boolean modelExists(String modelId) {", - "comment_created_at": "2024-04-10T17:19:05+00:00", - "comment_author": "davidkyle", - "comment_body": "You can avoid the countdown latch and hence blocking the calling thread by using a listener. \r\n\r\nYou don't have to timeout the call to `trainedModelProvider.getTrainedModel()` if it does timeout simply out let the error propagate from the call. \r\n\r\n```\r\n private void modelExists(String modelId, ActionListener listener) {\r\n trainedModelProvider.getTrainedModel(modelId, GetTrainedModelsAction.Includes.empty(), null,\r\n ActionListener.wrap(\r\n model -> listener.onResponse(Boolean.TRUE),\r\n exception -> {\r\n if (ExceptionsHelper.unwrapCause(exception) instanceof ResourceNotFoundException) {\r\n listener.onResponse(Boolean.FALSE);\r\n } else {\r\n listener.onFailure(exception);\r\n }\r\n }\r\n )\r\n );\r\n }\r\n```", - "pr_file_module": null - }, - { - "comment_id": "1561275004", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 107188, - "pr_file": "x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportDeleteTrainedModelAction.java", - "discussion_id": "1559824642", - "commented_code": "@@ -228,6 +244,39 @@ private void deleteModel(DeleteTrainedModelAction.Request request, ClusterState\n }\n }\n \n+ protected boolean modelExists(String modelId) {", - "comment_created_at": "2024-04-11T16:04:54+00:00", - "comment_author": "davidkyle", - "comment_body": "After you make the `modelExists()` function async with a listener you will need to chain the various processing steps together. The best way to do this is use a [SubscribableListener](https://github.com/elastic/elasticsearch/blob/main/server/src/main/java/org/elasticsearch/action/support/SubscribableListener.java) \r\n\r\nHere's an example if it being used:\r\nhttps://github.com/elastic/elasticsearch/blob/main/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportGetTrainedModelsStatsAction.java#L128", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1559831450", - "pr_number": 107188, - "pr_file": "x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportDeleteTrainedModelAction.java", - "created_at": "2024-04-10T17:25:50+00:00", - "commented_code": "}\n }\n\n protected boolean modelExists(String modelId) {\n CountDownLatch latch = new CountDownLatch(1);\n final AtomicBoolean modelExists = new AtomicBoolean(false);\n\n ActionListener trainedModelListener = new ActionListener<>() {\n @Override\n public void onResponse(TrainedModelConfig config) {\n modelExists.set(true);\n latch.countDown();\n }\n\n @Override\n public void onFailure(Exception e) {\n logger.error(\"Failed to retrieve model {}: {}\", modelId, e.getMessage(), e);\n latch.countDown();\n }\n };\n\n trainedModelProvider.getTrainedModel(modelId, GetTrainedModelsAction.Includes.empty(), null, trainedModelListener);\n\n try {\n boolean latchReached = latch.await(5, TimeUnit.SECONDS);\n\n if (latchReached == false) {\n throw new ElasticsearchException(\"Timeout while waiting for trained model to be retrieved\");\n }\n } catch (InterruptedException e) {\n throw new ElasticsearchException(\"Unexpected exception\", e);", - "repo_full_name": "elastic/elasticsearch", - "discussion_comments": [ - { - "comment_id": "1559831450", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 107188, - "pr_file": "x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportDeleteTrainedModelAction.java", - "discussion_id": "1559831450", - "commented_code": "@@ -228,6 +244,39 @@ private void deleteModel(DeleteTrainedModelAction.Request request, ClusterState\n }\n }\n \n+ protected boolean modelExists(String modelId) {\n+ CountDownLatch latch = new CountDownLatch(1);\n+ final AtomicBoolean modelExists = new AtomicBoolean(false);\n+\n+ ActionListener trainedModelListener = new ActionListener<>() {\n+ @Override\n+ public void onResponse(TrainedModelConfig config) {\n+ modelExists.set(true);\n+ latch.countDown();\n+ }\n+\n+ @Override\n+ public void onFailure(Exception e) {\n+ logger.error(\"Failed to retrieve model {}: {}\", modelId, e.getMessage(), e);\n+ latch.countDown();\n+ }\n+ };\n+\n+ trainedModelProvider.getTrainedModel(modelId, GetTrainedModelsAction.Includes.empty(), null, trainedModelListener);\n+\n+ try {\n+ boolean latchReached = latch.await(5, TimeUnit.SECONDS);\n+\n+ if (latchReached == false) {\n+ throw new ElasticsearchException(\"Timeout while waiting for trained model to be retrieved\");\n+ }\n+ } catch (InterruptedException e) {\n+ throw new ElasticsearchException(\"Unexpected exception\", e);", - "comment_created_at": "2024-04-10T17:25:50+00:00", - "comment_author": "davidkyle", - "comment_body": "This code is not necessary if you take my suggestion but in Java it's best practice in to reset the interrupt flag with `Thread.currentThread().interrupt();`", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1559910187", - "pr_number": 107188, - "pr_file": "x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportDeleteTrainedModelAction.java", - "created_at": "2024-04-10T18:42:46+00:00", - "commented_code": "}\n }\n\n protected boolean modelExists(String modelId) {\n CountDownLatch latch = new CountDownLatch(1);\n final AtomicBoolean modelExists = new AtomicBoolean(false);\n\n ActionListener trainedModelListener = new ActionListener<>() {\n @Override\n public void onResponse(TrainedModelConfig config) {\n modelExists.set(true);\n latch.countDown();\n }\n\n @Override\n public void onFailure(Exception e) {\n logger.error(\"Failed to retrieve model {}: {}\", modelId, e.getMessage(), e);\n latch.countDown();\n }\n };\n\n trainedModelProvider.getTrainedModel(modelId, GetTrainedModelsAction.Includes.empty(), null, trainedModelListener);", - "repo_full_name": "elastic/elasticsearch", - "discussion_comments": [ - { - "comment_id": "1559910187", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 107188, - "pr_file": "x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportDeleteTrainedModelAction.java", - "discussion_id": "1559910187", - "commented_code": "@@ -228,6 +244,39 @@ private void deleteModel(DeleteTrainedModelAction.Request request, ClusterState\n }\n }\n \n+ protected boolean modelExists(String modelId) {\n+ CountDownLatch latch = new CountDownLatch(1);\n+ final AtomicBoolean modelExists = new AtomicBoolean(false);\n+\n+ ActionListener trainedModelListener = new ActionListener<>() {\n+ @Override\n+ public void onResponse(TrainedModelConfig config) {\n+ modelExists.set(true);\n+ latch.countDown();\n+ }\n+\n+ @Override\n+ public void onFailure(Exception e) {\n+ logger.error(\"Failed to retrieve model {}: {}\", modelId, e.getMessage(), e);\n+ latch.countDown();\n+ }\n+ };\n+\n+ trainedModelProvider.getTrainedModel(modelId, GetTrainedModelsAction.Includes.empty(), null, trainedModelListener);", - "comment_created_at": "2024-04-10T18:42:46+00:00", - "comment_author": "maxhniebergall", - "comment_body": "If we don't pass a parent task to this request, it wont be cancelable. I think it would be better to pass in the task from the masterOperation here. ", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1559919796", - "pr_number": 107188, - "pr_file": "x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportDeleteTrainedModelAction.java", - "created_at": "2024-04-10T18:52:49+00:00", - "commented_code": "}\n }\n\n protected boolean modelExists(String modelId) {\n CountDownLatch latch = new CountDownLatch(1);\n final AtomicBoolean modelExists = new AtomicBoolean(false);", - "repo_full_name": "elastic/elasticsearch", - "discussion_comments": [ - { - "comment_id": "1559919796", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 107188, - "pr_file": "x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportDeleteTrainedModelAction.java", - "discussion_id": "1559919796", - "commented_code": "@@ -228,6 +244,39 @@ private void deleteModel(DeleteTrainedModelAction.Request request, ClusterState\n }\n }\n \n+ protected boolean modelExists(String modelId) {\n+ CountDownLatch latch = new CountDownLatch(1);\n+ final AtomicBoolean modelExists = new AtomicBoolean(false);\n+", - "comment_created_at": "2024-04-10T18:52:49+00:00", - "comment_author": "maxhniebergall", - "comment_body": "To avoid using a latch and requiring a timeout, I think we could replace this function with an actionListener. What do you think? ", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/elasticsearch-prevent-redundant-operations.json b/_reviewers/elasticsearch-prevent-redundant-operations.json new file mode 100644 index 0000000..6e834ff --- /dev/null +++ b/_reviewers/elasticsearch-prevent-redundant-operations.json @@ -0,0 +1,316 @@ +[ + { + "discussion_id": "1651609389", + "pr_number": 109949, + "pr_file": "server/src/main/java/org/elasticsearch/action/bulk/TransportShardBulkAction.java", + "created_at": "2024-06-24T21:05:28+00:00", + "commented_code": "long version,\n UpdateHelper.Result updateResult\n ) {\n final var mapperService = primary.mapperService();\n try {\n final var sourceToMerge = new CompressedXContent(result.getRequiredMappingUpdate());\n final var mapperService = primary.mapperService();\n final long initialMappingVersion = mapperService.mappingVersion();\n final var existingMapper = mapperService.documentMapper();\n if (existingMapper != null && sourceToMerge.equals(existingMapper.mappingSource())) {\n context.resetForNoopMappingUpdateRetry(initialMappingVersion);\n return true;\n }", + "repo_full_name": "elastic/elasticsearch", + "discussion_comments": [ + { + "comment_id": "1651609389", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 109949, + "pr_file": "server/src/main/java/org/elasticsearch/action/bulk/TransportShardBulkAction.java", + "discussion_id": "1651609389", + "commented_code": "@@ -405,16 +403,24 @@ private static boolean handleMappingUpdateRequired(\n long version,\n UpdateHelper.Result updateResult\n ) {\n- final var mapperService = primary.mapperService();\n try {\n+ final var sourceToMerge = new CompressedXContent(result.getRequiredMappingUpdate());\n+ final var mapperService = primary.mapperService();\n+ final long initialMappingVersion = mapperService.mappingVersion();\n+ final var existingMapper = mapperService.documentMapper();\n+ if (existingMapper != null && sourceToMerge.equals(existingMapper.mappingSource())) {\n+ context.resetForNoopMappingUpdateRetry(initialMappingVersion);\n+ return true;\n+ }", + "comment_created_at": "2024-06-24T21:05:28+00:00", + "comment_author": "javanna", + "comment_body": "Do you have a scenario in mind where this will help specifically? You are comparing the dynamic mapping update with the existing mappings, when are those two going to be the same?", + "pr_file_module": null + }, + { + "comment_id": "1652749658", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 109949, + "pr_file": "server/src/main/java/org/elasticsearch/action/bulk/TransportShardBulkAction.java", + "discussion_id": "1651609389", + "commented_code": "@@ -405,16 +403,24 @@ private static boolean handleMappingUpdateRequired(\n long version,\n UpdateHelper.Result updateResult\n ) {\n- final var mapperService = primary.mapperService();\n try {\n+ final var sourceToMerge = new CompressedXContent(result.getRequiredMappingUpdate());\n+ final var mapperService = primary.mapperService();\n+ final long initialMappingVersion = mapperService.mappingVersion();\n+ final var existingMapper = mapperService.documentMapper();\n+ if (existingMapper != null && sourceToMerge.equals(existingMapper.mappingSource())) {\n+ context.resetForNoopMappingUpdateRetry(initialMappingVersion);\n+ return true;\n+ }", + "comment_created_at": "2024-06-25T12:42:52+00:00", + "comment_author": "original-brownbear", + "comment_body": "Yea, after failing over to a new index when a bunch of data nodes are blasting the master all with the same update. No need to waste energy on all write threads and the master for longer than absolutely necessary IMO :)", + "pr_file_module": null + }, + { + "comment_id": "1653529791", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 109949, + "pr_file": "server/src/main/java/org/elasticsearch/action/bulk/TransportShardBulkAction.java", + "discussion_id": "1651609389", + "commented_code": "@@ -405,16 +403,24 @@ private static boolean handleMappingUpdateRequired(\n long version,\n UpdateHelper.Result updateResult\n ) {\n- final var mapperService = primary.mapperService();\n try {\n+ final var sourceToMerge = new CompressedXContent(result.getRequiredMappingUpdate());\n+ final var mapperService = primary.mapperService();\n+ final long initialMappingVersion = mapperService.mappingVersion();\n+ final var existingMapper = mapperService.documentMapper();\n+ if (existingMapper != null && sourceToMerge.equals(existingMapper.mappingSource())) {\n+ context.resetForNoopMappingUpdateRetry(initialMappingVersion);\n+ return true;\n+ }", + "comment_created_at": "2024-06-25T20:38:44+00:00", + "comment_author": "javanna", + "comment_body": "I don't understand how the conditional is matched in that scenario: how can the existing mappings be exactly the same as the dynamic mapping update? The existing mappings hold all the fields that have been added until then, and the new mappings hold the updates to be made following indexing of a new document. Say that we are indexing the first doc of a new index, the dynamic mapping update would contain a lot of fields, and the existing mappings would be empty? I am sure I am missing something, can you help me better understand?", + "pr_file_module": null + }, + { + "comment_id": "1653536880", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 109949, + "pr_file": "server/src/main/java/org/elasticsearch/action/bulk/TransportShardBulkAction.java", + "discussion_id": "1651609389", + "commented_code": "@@ -405,16 +403,24 @@ private static boolean handleMappingUpdateRequired(\n long version,\n UpdateHelper.Result updateResult\n ) {\n- final var mapperService = primary.mapperService();\n try {\n+ final var sourceToMerge = new CompressedXContent(result.getRequiredMappingUpdate());\n+ final var mapperService = primary.mapperService();\n+ final long initialMappingVersion = mapperService.mappingVersion();\n+ final var existingMapper = mapperService.documentMapper();\n+ if (existingMapper != null && sourceToMerge.equals(existingMapper.mappingSource())) {\n+ context.resetForNoopMappingUpdateRetry(initialMappingVersion);\n+ return true;\n+ }", + "comment_created_at": "2024-06-25T20:45:02+00:00", + "comment_author": "original-brownbear", + "comment_body": "> I don't understand how the conditional is matched in that scenario: how can the existing mappings be exactly the same as the dynamic mapping update?\r\n\r\nI think that's when we bootstrap a new index that uses dynamic mapping updates. If you index the same kind of logs that will trigger the same mapping in parallel, you will and should get the exact same mapping here every time (like think some metrics line from Metricbeat or so).\r\n\r\n> Say that we are indexing the first doc of a new index, the dynamic mapping update would contain a lot of fields, and the existing mappings would be empty\r\n\r\nJup exactly. E.g. Elasticsearch detailed node stats I'd say :P that's a couple hundred fields that would get created in the dynamic updates. No need to ship that out from write threads more often than absolutely necessary every time we roll-over an index.\r\n", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1633466723", + "pr_number": 109390, + "pr_file": "server/src/main/java/org/elasticsearch/synonyms/SynonymsManagementAPIService.java", + "created_at": "2024-06-10T15:41:56+00:00", + "commented_code": "}\n\n public void putSynonymRule(String synonymsSetId, SynonymRule synonymRule, ActionListener listener) {\n checkSynonymSetExists(synonymsSetId, listener.delegateFailure((l1, obj) -> {\n try {\n IndexRequest indexRequest = createSynonymRuleIndexRequest(synonymsSetId, synonymRule).setRefreshPolicy(\n WriteRequest.RefreshPolicy.IMMEDIATE\n );\n client.index(indexRequest, l1.delegateFailure((l2, indexResponse) -> {\n UpdateSynonymsResultStatus updateStatus = indexResponse.status() == RestStatus.CREATED\n ? UpdateSynonymsResultStatus.CREATED\n : UpdateSynonymsResultStatus.UPDATED;\n\n reloadAnalyzers(synonymsSetId, false, l2, updateStatus);\n checkSynonymSetExists(synonymsSetId, listener.delegateFailureAndWrap((l1, obj) -> {\n // Count synonym rules to check if we're at maximum\n client.prepareSearch(SYNONYMS_ALIAS_NAME)\n .setQuery(\n QueryBuilders.boolQuery()\n .must(QueryBuilders.termQuery(SYNONYMS_SET_FIELD, synonymsSetId))\n .filter(QueryBuilders.termQuery(OBJECT_TYPE_FIELD, SYNONYM_RULE_OBJECT_TYPE))\n )\n .setSize(0)\n .setPreference(Preference.LOCAL.type())\n .setTrackTotalHits(true)\n .execute(l1.delegateFailureAndWrap((searchListener, searchResponse) -> {\n long synonymsSetSize = searchResponse.getHits().getTotalHits().value;\n if (synonymsSetSize >= MAX_SYNONYMS_SETS) {\n // We could potentially update a synonym rule when we're at max capacity, but we're keeping this simple", + "repo_full_name": "elastic/elasticsearch", + "discussion_comments": [ + { + "comment_id": "1633466723", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 109390, + "pr_file": "server/src/main/java/org/elasticsearch/synonyms/SynonymsManagementAPIService.java", + "discussion_id": "1633466723", + "commented_code": "@@ -308,21 +355,42 @@ public void putSynonymsSet(String synonymSetId, SynonymRule[] synonymsSet, Actio\n }\n \n public void putSynonymRule(String synonymsSetId, SynonymRule synonymRule, ActionListener listener) {\n- checkSynonymSetExists(synonymsSetId, listener.delegateFailure((l1, obj) -> {\n- try {\n- IndexRequest indexRequest = createSynonymRuleIndexRequest(synonymsSetId, synonymRule).setRefreshPolicy(\n- WriteRequest.RefreshPolicy.IMMEDIATE\n- );\n- client.index(indexRequest, l1.delegateFailure((l2, indexResponse) -> {\n- UpdateSynonymsResultStatus updateStatus = indexResponse.status() == RestStatus.CREATED\n- ? UpdateSynonymsResultStatus.CREATED\n- : UpdateSynonymsResultStatus.UPDATED;\n-\n- reloadAnalyzers(synonymsSetId, false, l2, updateStatus);\n+ checkSynonymSetExists(synonymsSetId, listener.delegateFailureAndWrap((l1, obj) -> {\n+ // Count synonym rules to check if we're at maximum\n+ client.prepareSearch(SYNONYMS_ALIAS_NAME)\n+ .setQuery(\n+ QueryBuilders.boolQuery()\n+ .must(QueryBuilders.termQuery(SYNONYMS_SET_FIELD, synonymsSetId))\n+ .filter(QueryBuilders.termQuery(OBJECT_TYPE_FIELD, SYNONYM_RULE_OBJECT_TYPE))\n+ )\n+ .setSize(0)\n+ .setPreference(Preference.LOCAL.type())\n+ .setTrackTotalHits(true)\n+ .execute(l1.delegateFailureAndWrap((searchListener, searchResponse) -> {\n+ long synonymsSetSize = searchResponse.getHits().getTotalHits().value;\n+ if (synonymsSetSize >= MAX_SYNONYMS_SETS) {\n+ // We could potentially update a synonym rule when we're at max capacity, but we're keeping this simple", + "comment_created_at": "2024-06-10T15:41:56+00:00", + "comment_author": "kderusso", + "comment_body": "We could consider supporting updates here by adding a `must_not` clause to the query on the synonym ID? Then it would be below the max if it existed.", + "pr_file_module": null + }, + { + "comment_id": "1633545813", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 109390, + "pr_file": "server/src/main/java/org/elasticsearch/synonyms/SynonymsManagementAPIService.java", + "discussion_id": "1633466723", + "commented_code": "@@ -308,21 +355,42 @@ public void putSynonymsSet(String synonymSetId, SynonymRule[] synonymsSet, Actio\n }\n \n public void putSynonymRule(String synonymsSetId, SynonymRule synonymRule, ActionListener listener) {\n- checkSynonymSetExists(synonymsSetId, listener.delegateFailure((l1, obj) -> {\n- try {\n- IndexRequest indexRequest = createSynonymRuleIndexRequest(synonymsSetId, synonymRule).setRefreshPolicy(\n- WriteRequest.RefreshPolicy.IMMEDIATE\n- );\n- client.index(indexRequest, l1.delegateFailure((l2, indexResponse) -> {\n- UpdateSynonymsResultStatus updateStatus = indexResponse.status() == RestStatus.CREATED\n- ? UpdateSynonymsResultStatus.CREATED\n- : UpdateSynonymsResultStatus.UPDATED;\n-\n- reloadAnalyzers(synonymsSetId, false, l2, updateStatus);\n+ checkSynonymSetExists(synonymsSetId, listener.delegateFailureAndWrap((l1, obj) -> {\n+ // Count synonym rules to check if we're at maximum\n+ client.prepareSearch(SYNONYMS_ALIAS_NAME)\n+ .setQuery(\n+ QueryBuilders.boolQuery()\n+ .must(QueryBuilders.termQuery(SYNONYMS_SET_FIELD, synonymsSetId))\n+ .filter(QueryBuilders.termQuery(OBJECT_TYPE_FIELD, SYNONYM_RULE_OBJECT_TYPE))\n+ )\n+ .setSize(0)\n+ .setPreference(Preference.LOCAL.type())\n+ .setTrackTotalHits(true)\n+ .execute(l1.delegateFailureAndWrap((searchListener, searchResponse) -> {\n+ long synonymsSetSize = searchResponse.getHits().getTotalHits().value;\n+ if (synonymsSetSize >= MAX_SYNONYMS_SETS) {\n+ // We could potentially update a synonym rule when we're at max capacity, but we're keeping this simple", + "comment_created_at": "2024-06-10T16:44:43+00:00", + "comment_author": "carlosdelest", + "comment_body": "That's a good way of doing this! Again, I feel it a bit complicated for such an edge case.", + "pr_file_module": null + }, + { + "comment_id": "1638117253", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 109390, + "pr_file": "server/src/main/java/org/elasticsearch/synonyms/SynonymsManagementAPIService.java", + "discussion_id": "1633466723", + "commented_code": "@@ -308,21 +355,42 @@ public void putSynonymsSet(String synonymSetId, SynonymRule[] synonymsSet, Actio\n }\n \n public void putSynonymRule(String synonymsSetId, SynonymRule synonymRule, ActionListener listener) {\n- checkSynonymSetExists(synonymsSetId, listener.delegateFailure((l1, obj) -> {\n- try {\n- IndexRequest indexRequest = createSynonymRuleIndexRequest(synonymsSetId, synonymRule).setRefreshPolicy(\n- WriteRequest.RefreshPolicy.IMMEDIATE\n- );\n- client.index(indexRequest, l1.delegateFailure((l2, indexResponse) -> {\n- UpdateSynonymsResultStatus updateStatus = indexResponse.status() == RestStatus.CREATED\n- ? UpdateSynonymsResultStatus.CREATED\n- : UpdateSynonymsResultStatus.UPDATED;\n-\n- reloadAnalyzers(synonymsSetId, false, l2, updateStatus);\n+ checkSynonymSetExists(synonymsSetId, listener.delegateFailureAndWrap((l1, obj) -> {\n+ // Count synonym rules to check if we're at maximum\n+ client.prepareSearch(SYNONYMS_ALIAS_NAME)\n+ .setQuery(\n+ QueryBuilders.boolQuery()\n+ .must(QueryBuilders.termQuery(SYNONYMS_SET_FIELD, synonymsSetId))\n+ .filter(QueryBuilders.termQuery(OBJECT_TYPE_FIELD, SYNONYM_RULE_OBJECT_TYPE))\n+ )\n+ .setSize(0)\n+ .setPreference(Preference.LOCAL.type())\n+ .setTrackTotalHits(true)\n+ .execute(l1.delegateFailureAndWrap((searchListener, searchResponse) -> {\n+ long synonymsSetSize = searchResponse.getHits().getTotalHits().value;\n+ if (synonymsSetSize >= MAX_SYNONYMS_SETS) {\n+ // We could potentially update a synonym rule when we're at max capacity, but we're keeping this simple", + "comment_created_at": "2024-06-13T12:24:10+00:00", + "comment_author": "kderusso", + "comment_body": "I do think it's worth taking this edge case into consideration, because you know someone is going to try it and file a bug later. ", + "pr_file_module": null + }, + { + "comment_id": "1638197058", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 109390, + "pr_file": "server/src/main/java/org/elasticsearch/synonyms/SynonymsManagementAPIService.java", + "discussion_id": "1633466723", + "commented_code": "@@ -308,21 +355,42 @@ public void putSynonymsSet(String synonymSetId, SynonymRule[] synonymsSet, Actio\n }\n \n public void putSynonymRule(String synonymsSetId, SynonymRule synonymRule, ActionListener listener) {\n- checkSynonymSetExists(synonymsSetId, listener.delegateFailure((l1, obj) -> {\n- try {\n- IndexRequest indexRequest = createSynonymRuleIndexRequest(synonymsSetId, synonymRule).setRefreshPolicy(\n- WriteRequest.RefreshPolicy.IMMEDIATE\n- );\n- client.index(indexRequest, l1.delegateFailure((l2, indexResponse) -> {\n- UpdateSynonymsResultStatus updateStatus = indexResponse.status() == RestStatus.CREATED\n- ? UpdateSynonymsResultStatus.CREATED\n- : UpdateSynonymsResultStatus.UPDATED;\n-\n- reloadAnalyzers(synonymsSetId, false, l2, updateStatus);\n+ checkSynonymSetExists(synonymsSetId, listener.delegateFailureAndWrap((l1, obj) -> {\n+ // Count synonym rules to check if we're at maximum\n+ client.prepareSearch(SYNONYMS_ALIAS_NAME)\n+ .setQuery(\n+ QueryBuilders.boolQuery()\n+ .must(QueryBuilders.termQuery(SYNONYMS_SET_FIELD, synonymsSetId))\n+ .filter(QueryBuilders.termQuery(OBJECT_TYPE_FIELD, SYNONYM_RULE_OBJECT_TYPE))\n+ )\n+ .setSize(0)\n+ .setPreference(Preference.LOCAL.type())\n+ .setTrackTotalHits(true)\n+ .execute(l1.delegateFailureAndWrap((searchListener, searchResponse) -> {\n+ long synonymsSetSize = searchResponse.getHits().getTotalHits().value;\n+ if (synonymsSetSize >= MAX_SYNONYMS_SETS) {\n+ // We could potentially update a synonym rule when we're at max capacity, but we're keeping this simple", + "comment_created_at": "2024-06-13T13:14:54+00:00", + "comment_author": "jimczi", + "comment_body": "Agreed, it's not really an edge case to be at capacity and to update the existing rules. I think we need a more robust approach here like executing all updates on the master node and executing the updates sequentially.", + "pr_file_module": null + }, + { + "comment_id": "1638282360", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 109390, + "pr_file": "server/src/main/java/org/elasticsearch/synonyms/SynonymsManagementAPIService.java", + "discussion_id": "1633466723", + "commented_code": "@@ -308,21 +355,42 @@ public void putSynonymsSet(String synonymSetId, SynonymRule[] synonymsSet, Actio\n }\n \n public void putSynonymRule(String synonymsSetId, SynonymRule synonymRule, ActionListener listener) {\n- checkSynonymSetExists(synonymsSetId, listener.delegateFailure((l1, obj) -> {\n- try {\n- IndexRequest indexRequest = createSynonymRuleIndexRequest(synonymsSetId, synonymRule).setRefreshPolicy(\n- WriteRequest.RefreshPolicy.IMMEDIATE\n- );\n- client.index(indexRequest, l1.delegateFailure((l2, indexResponse) -> {\n- UpdateSynonymsResultStatus updateStatus = indexResponse.status() == RestStatus.CREATED\n- ? UpdateSynonymsResultStatus.CREATED\n- : UpdateSynonymsResultStatus.UPDATED;\n-\n- reloadAnalyzers(synonymsSetId, false, l2, updateStatus);\n+ checkSynonymSetExists(synonymsSetId, listener.delegateFailureAndWrap((l1, obj) -> {\n+ // Count synonym rules to check if we're at maximum\n+ client.prepareSearch(SYNONYMS_ALIAS_NAME)\n+ .setQuery(\n+ QueryBuilders.boolQuery()\n+ .must(QueryBuilders.termQuery(SYNONYMS_SET_FIELD, synonymsSetId))\n+ .filter(QueryBuilders.termQuery(OBJECT_TYPE_FIELD, SYNONYM_RULE_OBJECT_TYPE))\n+ )\n+ .setSize(0)\n+ .setPreference(Preference.LOCAL.type())\n+ .setTrackTotalHits(true)\n+ .execute(l1.delegateFailureAndWrap((searchListener, searchResponse) -> {\n+ long synonymsSetSize = searchResponse.getHits().getTotalHits().value;\n+ if (synonymsSetSize >= MAX_SYNONYMS_SETS) {\n+ // We could potentially update a synonym rule when we're at max capacity, but we're keeping this simple", + "comment_created_at": "2024-06-13T14:05:20+00:00", + "comment_author": "carlosdelest", + "comment_body": "> it's not really an edge case to be at capacity and to update the existing rules\r\n\r\nThis edge case happens only if we're updating an individual synonym rule when we're at max capacity. Updating rules in batch means that first we remove all the rules and apply the updates in a bulk request.\r\n\r\nIt sounds weird to me to update individual rules when we have 100k synonyms in a synonym set- at that scale, I would think that users do batch updates of rules, which effectively replace the existing ones.\r\n\r\nBut if y'all think this needs to be dealt with, I will! I'll update the code and ping when ready 👍 \r\n\r\n> I think we need a more robust approach here like executing all updates on the master node and executing the updates sequentially\r\n\r\nSo do you think that this should be a `TransportMasterNodeAction`? As this updates an index and not the cluster state, what would be the advantages of applying the action on the master node? \r\n\r\nAs we're doing a bulk request under the hood, doesn't that mean that we're applying the updates sequentially?", + "pr_file_module": null + }, + { + "comment_id": "1638334270", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 109390, + "pr_file": "server/src/main/java/org/elasticsearch/synonyms/SynonymsManagementAPIService.java", + "discussion_id": "1633466723", + "commented_code": "@@ -308,21 +355,42 @@ public void putSynonymsSet(String synonymSetId, SynonymRule[] synonymsSet, Actio\n }\n \n public void putSynonymRule(String synonymsSetId, SynonymRule synonymRule, ActionListener listener) {\n- checkSynonymSetExists(synonymsSetId, listener.delegateFailure((l1, obj) -> {\n- try {\n- IndexRequest indexRequest = createSynonymRuleIndexRequest(synonymsSetId, synonymRule).setRefreshPolicy(\n- WriteRequest.RefreshPolicy.IMMEDIATE\n- );\n- client.index(indexRequest, l1.delegateFailure((l2, indexResponse) -> {\n- UpdateSynonymsResultStatus updateStatus = indexResponse.status() == RestStatus.CREATED\n- ? UpdateSynonymsResultStatus.CREATED\n- : UpdateSynonymsResultStatus.UPDATED;\n-\n- reloadAnalyzers(synonymsSetId, false, l2, updateStatus);\n+ checkSynonymSetExists(synonymsSetId, listener.delegateFailureAndWrap((l1, obj) -> {\n+ // Count synonym rules to check if we're at maximum\n+ client.prepareSearch(SYNONYMS_ALIAS_NAME)\n+ .setQuery(\n+ QueryBuilders.boolQuery()\n+ .must(QueryBuilders.termQuery(SYNONYMS_SET_FIELD, synonymsSetId))\n+ .filter(QueryBuilders.termQuery(OBJECT_TYPE_FIELD, SYNONYM_RULE_OBJECT_TYPE))\n+ )\n+ .setSize(0)\n+ .setPreference(Preference.LOCAL.type())\n+ .setTrackTotalHits(true)\n+ .execute(l1.delegateFailureAndWrap((searchListener, searchResponse) -> {\n+ long synonymsSetSize = searchResponse.getHits().getTotalHits().value;\n+ if (synonymsSetSize >= MAX_SYNONYMS_SETS) {\n+ // We could potentially update a synonym rule when we're at max capacity, but we're keeping this simple", + "comment_created_at": "2024-06-13T14:36:16+00:00", + "comment_author": "jimczi", + "comment_body": "> Updating rules in batch means that first we remove all the rules and apply the updates in a bulk request.\r\n\r\nThis is another case of non-consistent updates, we need to ensure some ordering here.\r\n\r\n> So do you think that this should be a TransportMasterNodeAction? As this updates an index and not the cluster state, what would be the advantages of applying the action on the master node?\r\n\r\nHaving a single place where updates can occur but as you noticed that won't be enough. We also need to ensure that all updates are done sequentially, not in a single bulk request but globally when multiple synonyms updates are done in parallel. See the `MetadataMappingService` for an example of service that applies updates sequentially. \r\n\r\n", + "pr_file_module": null + }, + { + "comment_id": "1638341068", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 109390, + "pr_file": "server/src/main/java/org/elasticsearch/synonyms/SynonymsManagementAPIService.java", + "discussion_id": "1633466723", + "commented_code": "@@ -308,21 +355,42 @@ public void putSynonymsSet(String synonymSetId, SynonymRule[] synonymsSet, Actio\n }\n \n public void putSynonymRule(String synonymsSetId, SynonymRule synonymRule, ActionListener listener) {\n- checkSynonymSetExists(synonymsSetId, listener.delegateFailure((l1, obj) -> {\n- try {\n- IndexRequest indexRequest = createSynonymRuleIndexRequest(synonymsSetId, synonymRule).setRefreshPolicy(\n- WriteRequest.RefreshPolicy.IMMEDIATE\n- );\n- client.index(indexRequest, l1.delegateFailure((l2, indexResponse) -> {\n- UpdateSynonymsResultStatus updateStatus = indexResponse.status() == RestStatus.CREATED\n- ? UpdateSynonymsResultStatus.CREATED\n- : UpdateSynonymsResultStatus.UPDATED;\n-\n- reloadAnalyzers(synonymsSetId, false, l2, updateStatus);\n+ checkSynonymSetExists(synonymsSetId, listener.delegateFailureAndWrap((l1, obj) -> {\n+ // Count synonym rules to check if we're at maximum\n+ client.prepareSearch(SYNONYMS_ALIAS_NAME)\n+ .setQuery(\n+ QueryBuilders.boolQuery()\n+ .must(QueryBuilders.termQuery(SYNONYMS_SET_FIELD, synonymsSetId))\n+ .filter(QueryBuilders.termQuery(OBJECT_TYPE_FIELD, SYNONYM_RULE_OBJECT_TYPE))\n+ )\n+ .setSize(0)\n+ .setPreference(Preference.LOCAL.type())\n+ .setTrackTotalHits(true)\n+ .execute(l1.delegateFailureAndWrap((searchListener, searchResponse) -> {\n+ long synonymsSetSize = searchResponse.getHits().getTotalHits().value;\n+ if (synonymsSetSize >= MAX_SYNONYMS_SETS) {\n+ // We could potentially update a synonym rule when we're at max capacity, but we're keeping this simple", + "comment_created_at": "2024-06-13T14:40:13+00:00", + "comment_author": "carlosdelest", + "comment_body": "Thanks @jimczi , I will take a look into that.\r\n\r\nI think we should create a separate issue for ensuring updates are applied sequentially, as this is not directly related to this change?", + "pr_file_module": null + }, + { + "comment_id": "1638388512", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 109390, + "pr_file": "server/src/main/java/org/elasticsearch/synonyms/SynonymsManagementAPIService.java", + "discussion_id": "1633466723", + "commented_code": "@@ -308,21 +355,42 @@ public void putSynonymsSet(String synonymSetId, SynonymRule[] synonymsSet, Actio\n }\n \n public void putSynonymRule(String synonymsSetId, SynonymRule synonymRule, ActionListener listener) {\n- checkSynonymSetExists(synonymsSetId, listener.delegateFailure((l1, obj) -> {\n- try {\n- IndexRequest indexRequest = createSynonymRuleIndexRequest(synonymsSetId, synonymRule).setRefreshPolicy(\n- WriteRequest.RefreshPolicy.IMMEDIATE\n- );\n- client.index(indexRequest, l1.delegateFailure((l2, indexResponse) -> {\n- UpdateSynonymsResultStatus updateStatus = indexResponse.status() == RestStatus.CREATED\n- ? UpdateSynonymsResultStatus.CREATED\n- : UpdateSynonymsResultStatus.UPDATED;\n-\n- reloadAnalyzers(synonymsSetId, false, l2, updateStatus);\n+ checkSynonymSetExists(synonymsSetId, listener.delegateFailureAndWrap((l1, obj) -> {\n+ // Count synonym rules to check if we're at maximum\n+ client.prepareSearch(SYNONYMS_ALIAS_NAME)\n+ .setQuery(\n+ QueryBuilders.boolQuery()\n+ .must(QueryBuilders.termQuery(SYNONYMS_SET_FIELD, synonymsSetId))\n+ .filter(QueryBuilders.termQuery(OBJECT_TYPE_FIELD, SYNONYM_RULE_OBJECT_TYPE))\n+ )\n+ .setSize(0)\n+ .setPreference(Preference.LOCAL.type())\n+ .setTrackTotalHits(true)\n+ .execute(l1.delegateFailureAndWrap((searchListener, searchResponse) -> {\n+ long synonymsSetSize = searchResponse.getHits().getTotalHits().value;\n+ if (synonymsSetSize >= MAX_SYNONYMS_SETS) {\n+ // We could potentially update a synonym rule when we're at max capacity, but we're keeping this simple", + "comment_created_at": "2024-06-13T15:09:25+00:00", + "comment_author": "jimczi", + "comment_body": "I think it's related, we cannot limit the number of synonyms without ensuring that updates are applied sequentially.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2177518712", + "pr_number": 130387, + "pr_file": "server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java", + "created_at": "2025-07-01T12:46:28+00:00", + "commented_code": "return ft;\n }\n\n /**\n * Does a `match` query generate all valid candidates for `==`? Meaning,\n * if I do a match query for any string, say `foo bar baz`, then that\n * query will find all documents that indexed the same string.\n *

\n * This should be true for most sanely configured text fields. That's\n * just how we use them for search. But it's quite possible to make\n * the index analyzer not agree with the search analyzer, for example.\n *

\n *

\n * So this implementation is ultra-paranoid.\n *

\n */\n private boolean matchQueryYieldsCandidateMatchesForEquality() {\n return index.getValue() == Boolean.TRUE\n && analyzers.indexAnalyzer.isConfigured() == false\n && analyzers.searchAnalyzer.isConfigured() == false;\n }", + "repo_full_name": "elastic/elasticsearch", + "discussion_comments": [ + { + "comment_id": "2177518712", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 130387, + "pr_file": "server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java", + "discussion_id": "2177518712", + "commented_code": "@@ -422,6 +424,25 @@ private TextFieldType buildFieldType(\n return ft;\n }\n \n+ /**\n+ * Does a `match` query generate all valid candidates for `==`? Meaning,\n+ * if I do a match query for any string, say `foo bar baz`, then that\n+ * query will find all documents that indexed the same string.\n+ *

\n+ * This should be true for most sanely configured text fields. That's\n+ * just how we use them for search. But it's quite possible to make\n+ * the index analyzer not agree with the search analyzer, for example.\n+ *

\n+ *

\n+ * So this implementation is ultra-paranoid.\n+ *

\n+ */\n+ private boolean matchQueryYieldsCandidateMatchesForEquality() {\n+ return index.getValue() == Boolean.TRUE\n+ && analyzers.indexAnalyzer.isConfigured() == false\n+ && analyzers.searchAnalyzer.isConfigured() == false;\n+ }", + "comment_created_at": "2025-07-01T12:46:28+00:00", + "comment_author": "nik9000", + "comment_body": "==SEARCH FOLKS LOOK HERE==\r\n\r\nIs this paranoid enough? Like, could you break the `match` query with other settings I'm not checking? I'd love to default this to only working if there's nothing configured on the text field and then opt in configurations.", + "pr_file_module": null + }, + { + "comment_id": "2177582793", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 130387, + "pr_file": "server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java", + "discussion_id": "2177518712", + "commented_code": "@@ -422,6 +424,25 @@ private TextFieldType buildFieldType(\n return ft;\n }\n \n+ /**\n+ * Does a `match` query generate all valid candidates for `==`? Meaning,\n+ * if I do a match query for any string, say `foo bar baz`, then that\n+ * query will find all documents that indexed the same string.\n+ *

\n+ * This should be true for most sanely configured text fields. That's\n+ * just how we use them for search. But it's quite possible to make\n+ * the index analyzer not agree with the search analyzer, for example.\n+ *

\n+ *

\n+ * So this implementation is ultra-paranoid.\n+ *

\n+ */\n+ private boolean matchQueryYieldsCandidateMatchesForEquality() {\n+ return index.getValue() == Boolean.TRUE\n+ && analyzers.indexAnalyzer.isConfigured() == false\n+ && analyzers.searchAnalyzer.isConfigured() == false;\n+ }", + "comment_created_at": "2025-07-01T13:17:25+00:00", + "comment_author": "jimczi", + "comment_body": "This is paranoid indeed. Just checking if the analyzer are the same is not enough since we have morphological analyzer that can tokenize differently depending on the surrounding text (Kuromoji, Nori, ..).\r\nWe could have a list of `allowed` analyzers but that sounds like a good start that will handle 99% of the use cases. \r\nI can't think of another setting that would change this, are we matching the input as phrase query or as a conjunction of term?", + "pr_file_module": null + }, + { + "comment_id": "2177603114", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 130387, + "pr_file": "server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java", + "discussion_id": "2177518712", + "commented_code": "@@ -422,6 +424,25 @@ private TextFieldType buildFieldType(\n return ft;\n }\n \n+ /**\n+ * Does a `match` query generate all valid candidates for `==`? Meaning,\n+ * if I do a match query for any string, say `foo bar baz`, then that\n+ * query will find all documents that indexed the same string.\n+ *

\n+ * This should be true for most sanely configured text fields. That's\n+ * just how we use them for search. But it's quite possible to make\n+ * the index analyzer not agree with the search analyzer, for example.\n+ *

\n+ *

\n+ * So this implementation is ultra-paranoid.\n+ *

\n+ */\n+ private boolean matchQueryYieldsCandidateMatchesForEquality() {\n+ return index.getValue() == Boolean.TRUE\n+ && analyzers.indexAnalyzer.isConfigured() == false\n+ && analyzers.searchAnalyzer.isConfigured() == false;\n+ }", + "comment_created_at": "2025-07-01T13:24:52+00:00", + "comment_author": "nik9000", + "comment_body": "Conjunction of terms right now. I probably should do phrase to be honest.", + "pr_file_module": null + }, + { + "comment_id": "2177608120", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 130387, + "pr_file": "server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java", + "discussion_id": "2177518712", + "commented_code": "@@ -422,6 +424,25 @@ private TextFieldType buildFieldType(\n return ft;\n }\n \n+ /**\n+ * Does a `match` query generate all valid candidates for `==`? Meaning,\n+ * if I do a match query for any string, say `foo bar baz`, then that\n+ * query will find all documents that indexed the same string.\n+ *

\n+ * This should be true for most sanely configured text fields. That's\n+ * just how we use them for search. But it's quite possible to make\n+ * the index analyzer not agree with the search analyzer, for example.\n+ *

\n+ *

\n+ * So this implementation is ultra-paranoid.\n+ *

\n+ */\n+ private boolean matchQueryYieldsCandidateMatchesForEquality() {\n+ return index.getValue() == Boolean.TRUE\n+ && analyzers.indexAnalyzer.isConfigured() == false\n+ && analyzers.searchAnalyzer.isConfigured() == false;\n+ }", + "comment_created_at": "2025-07-01T13:26:59+00:00", + "comment_author": "nik9000", + "comment_body": "Do you think it's worth building a field setting that overrides our choices? A `true/false/null` thing - `true` means \"I know match is fine, push\", `false` means \"I know match isn't fine, never push\" and `null` means \"you figure it out\".\r\n\r\n> since we have morphological analyzer that can tokenize differently depending on the surrounding text (Kuromoji, Nori, ..)\r\n\r\nCan you give an example? So long as the same text tokenizes the same way that *should* be fine. We're not pushing \"contains tokens\" here - we're pushing exat string ==.", + "pr_file_module": null + }, + { + "comment_id": "2177644488", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 130387, + "pr_file": "server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java", + "discussion_id": "2177518712", + "commented_code": "@@ -422,6 +424,25 @@ private TextFieldType buildFieldType(\n return ft;\n }\n \n+ /**\n+ * Does a `match` query generate all valid candidates for `==`? Meaning,\n+ * if I do a match query for any string, say `foo bar baz`, then that\n+ * query will find all documents that indexed the same string.\n+ *

\n+ * This should be true for most sanely configured text fields. That's\n+ * just how we use them for search. But it's quite possible to make\n+ * the index analyzer not agree with the search analyzer, for example.\n+ *

\n+ *

\n+ * So this implementation is ultra-paranoid.\n+ *

\n+ */\n+ private boolean matchQueryYieldsCandidateMatchesForEquality() {\n+ return index.getValue() == Boolean.TRUE\n+ && analyzers.indexAnalyzer.isConfigured() == false\n+ && analyzers.searchAnalyzer.isConfigured() == false;\n+ }", + "comment_created_at": "2025-07-01T13:43:11+00:00", + "comment_author": "jimczi", + "comment_body": "> Can you give an example? So long as the same text tokenizes the same way that should be fine. We're not pushing \"contains tokens\" here - we're pushing exat string ==.\r\n\r\nAh right sorry, the input must be exactly the same. So then another test, to remove the analyzer restriction, would be to use a memory index and verify that the search analyzer query can find the input indexed with the index analyzer?\r\n\r\n> Do you think it's worth building a field setting that overrides our choices?\r\n\r\nI wonder if that should be pushed down to mappings directly. We have `termQuery`, `phraseQuery` defined by the field mappers now so we could delegate the decision there with an `exactQuery`? That would help with fields like `match_only_text` to avoid doing two verifications on the content? ", + "pr_file_module": null + }, + { + "comment_id": "2177655532", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 130387, + "pr_file": "server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java", + "discussion_id": "2177518712", + "commented_code": "@@ -422,6 +424,25 @@ private TextFieldType buildFieldType(\n return ft;\n }\n \n+ /**\n+ * Does a `match` query generate all valid candidates for `==`? Meaning,\n+ * if I do a match query for any string, say `foo bar baz`, then that\n+ * query will find all documents that indexed the same string.\n+ *

\n+ * This should be true for most sanely configured text fields. That's\n+ * just how we use them for search. But it's quite possible to make\n+ * the index analyzer not agree with the search analyzer, for example.\n+ *

\n+ *

\n+ * So this implementation is ultra-paranoid.\n+ *

\n+ */\n+ private boolean matchQueryYieldsCandidateMatchesForEquality() {\n+ return index.getValue() == Boolean.TRUE\n+ && analyzers.indexAnalyzer.isConfigured() == false\n+ && analyzers.searchAnalyzer.isConfigured() == false;\n+ }", + "comment_created_at": "2025-07-01T13:47:59+00:00", + "comment_author": "jimczi", + "comment_body": "cc @javanna ", + "pr_file_module": null + }, + { + "comment_id": "2179888009", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 130387, + "pr_file": "server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java", + "discussion_id": "2177518712", + "commented_code": "@@ -422,6 +424,25 @@ private TextFieldType buildFieldType(\n return ft;\n }\n \n+ /**\n+ * Does a `match` query generate all valid candidates for `==`? Meaning,\n+ * if I do a match query for any string, say `foo bar baz`, then that\n+ * query will find all documents that indexed the same string.\n+ *

\n+ * This should be true for most sanely configured text fields. That's\n+ * just how we use them for search. But it's quite possible to make\n+ * the index analyzer not agree with the search analyzer, for example.\n+ *

\n+ *

\n+ * So this implementation is ultra-paranoid.\n+ *

\n+ */\n+ private boolean matchQueryYieldsCandidateMatchesForEquality() {\n+ return index.getValue() == Boolean.TRUE\n+ && analyzers.indexAnalyzer.isConfigured() == false\n+ && analyzers.searchAnalyzer.isConfigured() == false;\n+ }", + "comment_created_at": "2025-07-02T12:09:04+00:00", + "comment_author": "nik9000", + "comment_body": "I think ESQL might indeed deserve some more query methods on `MappedFieldType`. I've been adding methods to, like, `TextFieldMapper` that help me decide what query to build. But I think it'd be cleaner to put these into the `MappedFieldType`. Also, it'd be more possible for, well, more fields to link into ESQL.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2172861734", + "pr_number": 129282, + "pr_file": "x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMatchQueryRewriteInterceptor.java", + "created_at": "2025-06-27T20:54:05+00:00", + "commented_code": "boolQueryBuilder.should(\n createSemanticSubQuery(\n indexInformation.getInferenceIndices(),\n matchQueryBuilder.fieldName(),\n (String) matchQueryBuilder.value()\n matchQueryBuilder\n )\n );\n boolQueryBuilder.should(createSubQueryForIndices(indexInformation.nonInferenceIndices(), matchQueryBuilder));\n boolQueryBuilder.boost(matchQueryBuilder.boost());\n boolQueryBuilder.queryName(matchQueryBuilder.queryName());\n return boolQueryBuilder;", + "repo_full_name": "elastic/elasticsearch", + "discussion_comments": [ + { + "comment_id": "2172861734", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 129282, + "pr_file": "x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMatchQueryRewriteInterceptor.java", + "discussion_id": "2172861734", + "commented_code": "@@ -50,11 +50,12 @@ protected QueryBuilder buildCombinedInferenceAndNonInferenceQuery(\n boolQueryBuilder.should(\n createSemanticSubQuery(\n indexInformation.getInferenceIndices(),\n- matchQueryBuilder.fieldName(),\n- (String) matchQueryBuilder.value()\n+ matchQueryBuilder\n )\n );\n boolQueryBuilder.should(createSubQueryForIndices(indexInformation.nonInferenceIndices(), matchQueryBuilder));\n+ boolQueryBuilder.boost(matchQueryBuilder.boost());\n+ boolQueryBuilder.queryName(matchQueryBuilder.queryName());\n return boolQueryBuilder;", + "comment_created_at": "2025-06-27T20:54:05+00:00", + "comment_author": "Samiul-TheSoccerFan", + "comment_body": "The final Query for `match` will look something like this:\r\n\r\n```\r\n{\r\n \"bool\" : {\r\n \"should\" : [ {\r\n \"bool\" : {\r\n \"should\" : [ {\r\n \"bool\" : {\r\n \"must\" : [ {\r\n \"semantic\" : {\r\n \"field\" : \"semantic1\",\r\n \"query\" : \"wolf\",\r\n \"lenient\" : true\r\n }\r\n } ],\r\n \"filter\" : [ {\r\n \"terms\" : {\r\n \"_index\" : [ \"semantic-text-index\" ],\r\n \"boost\" : 1.0\r\n }\r\n } ],\r\n \"boost\" : 1.0\r\n }\r\n }, {\r\n \"bool\" : {\r\n \"must\" : [ {\r\n \"match\" : {\r\n \"semantic1\" : {\r\n \"query\" : \"wolf\",\r\n \"boost\" : 3.0,\r\n \"_name\" : \"semantic1_match\"\r\n }\r\n }\r\n } ],\r\n \"filter\" : [ {\r\n \"terms\" : {\r\n \"_index\" : [ \"normal-text-index\" ],\r\n \"boost\" : 1.0\r\n }\r\n } ],\r\n \"boost\" : 1.0\r\n }\r\n } ],\r\n \"boost\" : 3.0,\r\n \"_name\" : \"semantic1_match\"\r\n }\r\n }, {\r\n \"bool\" : {\r\n \"should\" : [ {\r\n \"bool\" : {\r\n \"must\" : [ {\r\n \"semantic\" : {\r\n \"field\" : \"semantic2\",\r\n \"query\" : \"strangers\",\r\n \"lenient\" : true\r\n }\r\n } ],\r\n \"filter\" : [ {\r\n \"terms\" : {\r\n \"_index\" : [ \"semantic-text-index\" ],\r\n \"boost\" : 1.0\r\n }\r\n } ],\r\n \"boost\" : 1.0\r\n }\r\n }, {\r\n \"bool\" : {\r\n \"must\" : [ {\r\n \"match\" : {\r\n \"semantic2\" : {\r\n \"query\" : \"strangers\",\r\n \"boost\" : 2.5,\r\n \"_name\" : \"semantic2_match\"\r\n }\r\n }\r\n } ],\r\n \"filter\" : [ {\r\n \"terms\" : {\r\n \"_index\" : [ \"normal-text-index\" ],\r\n \"boost\" : 1.0\r\n }\r\n } ],\r\n \"boost\" : 1.0\r\n }\r\n } ],\r\n \"boost\" : 2.5,\r\n \"_name\" : \"semantic2_match\"\r\n }\r\n } ],\r\n \"boost\" : 1.0\r\n }\r\n}\r\n```", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2167423740", + "pr_number": 130035, + "pr_file": "x-pack/plugin/migrate/src/main/java/org/elasticsearch/xpack/migrate/action/ReindexDataStreamIndexTransportAction.java", + "created_at": "2025-06-25T19:07:18+00:00", + "commented_code": "ReindexDataStreamIndexAction.Request request,\n ActionListener listener\n ) {\n var project = clusterService.state().projectState();\n var project = projectResolver.getProjectState(clusterService.state());", + "repo_full_name": "elastic/elasticsearch", + "discussion_comments": [ + { + "comment_id": "2167423740", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 130035, + "pr_file": "x-pack/plugin/migrate/src/main/java/org/elasticsearch/xpack/migrate/action/ReindexDataStreamIndexTransportAction.java", + "discussion_id": "2167423740", + "commented_code": "@@ -142,7 +146,7 @@ protected void doExecute(\n ReindexDataStreamIndexAction.Request request,\n ActionListener listener\n ) {\n- var project = clusterService.state().projectState();\n+ var project = projectResolver.getProjectState(clusterService.state());", + "comment_created_at": "2025-06-25T19:07:18+00:00", + "comment_author": "nielsbauman", + "comment_body": "Since we only use the `ProjectMetadata` in this method, we can avoid constructing the `ProjectState` by using `projectResolver.getProjectMetadata(clusterService.state())`.", + "pr_file_module": null + }, + { + "comment_id": "2167646421", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 130035, + "pr_file": "x-pack/plugin/migrate/src/main/java/org/elasticsearch/xpack/migrate/action/ReindexDataStreamIndexTransportAction.java", + "discussion_id": "2167423740", + "commented_code": "@@ -142,7 +146,7 @@ protected void doExecute(\n ReindexDataStreamIndexAction.Request request,\n ActionListener listener\n ) {\n- var project = clusterService.state().projectState();\n+ var project = projectResolver.getProjectState(clusterService.state());", + "comment_created_at": "2025-06-25T21:14:27+00:00", + "comment_author": "joegallo", + "comment_body": "34e98085388cae88b1299732e99903b2b7df7d2c", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/elasticsearch-prevent-redundant-operations.md b/_reviewers/elasticsearch-prevent-redundant-operations.md index 963e63f..555f8bb 100644 --- a/_reviewers/elasticsearch-prevent-redundant-operations.md +++ b/_reviewers/elasticsearch-prevent-redundant-operations.md @@ -32,321 +32,3 @@ if (existingMapper != null && sourceToMerge.equals(existingMapper.mappingSource( - Preserve important properties like query boost and name parameters when transforming queries These optimizations are particularly important when multiple data nodes might be issuing similar updates (like dynamic mappings) or when working near capacity limits of the system. - - -[ - { - "discussion_id": "1651609389", - "pr_number": 109949, - "pr_file": "server/src/main/java/org/elasticsearch/action/bulk/TransportShardBulkAction.java", - "created_at": "2024-06-24T21:05:28+00:00", - "commented_code": "long version,\n UpdateHelper.Result updateResult\n ) {\n final var mapperService = primary.mapperService();\n try {\n final var sourceToMerge = new CompressedXContent(result.getRequiredMappingUpdate());\n final var mapperService = primary.mapperService();\n final long initialMappingVersion = mapperService.mappingVersion();\n final var existingMapper = mapperService.documentMapper();\n if (existingMapper != null && sourceToMerge.equals(existingMapper.mappingSource())) {\n context.resetForNoopMappingUpdateRetry(initialMappingVersion);\n return true;\n }", - "repo_full_name": "elastic/elasticsearch", - "discussion_comments": [ - { - "comment_id": "1651609389", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 109949, - "pr_file": "server/src/main/java/org/elasticsearch/action/bulk/TransportShardBulkAction.java", - "discussion_id": "1651609389", - "commented_code": "@@ -405,16 +403,24 @@ private static boolean handleMappingUpdateRequired(\n long version,\n UpdateHelper.Result updateResult\n ) {\n- final var mapperService = primary.mapperService();\n try {\n+ final var sourceToMerge = new CompressedXContent(result.getRequiredMappingUpdate());\n+ final var mapperService = primary.mapperService();\n+ final long initialMappingVersion = mapperService.mappingVersion();\n+ final var existingMapper = mapperService.documentMapper();\n+ if (existingMapper != null && sourceToMerge.equals(existingMapper.mappingSource())) {\n+ context.resetForNoopMappingUpdateRetry(initialMappingVersion);\n+ return true;\n+ }", - "comment_created_at": "2024-06-24T21:05:28+00:00", - "comment_author": "javanna", - "comment_body": "Do you have a scenario in mind where this will help specifically? You are comparing the dynamic mapping update with the existing mappings, when are those two going to be the same?", - "pr_file_module": null - }, - { - "comment_id": "1652749658", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 109949, - "pr_file": "server/src/main/java/org/elasticsearch/action/bulk/TransportShardBulkAction.java", - "discussion_id": "1651609389", - "commented_code": "@@ -405,16 +403,24 @@ private static boolean handleMappingUpdateRequired(\n long version,\n UpdateHelper.Result updateResult\n ) {\n- final var mapperService = primary.mapperService();\n try {\n+ final var sourceToMerge = new CompressedXContent(result.getRequiredMappingUpdate());\n+ final var mapperService = primary.mapperService();\n+ final long initialMappingVersion = mapperService.mappingVersion();\n+ final var existingMapper = mapperService.documentMapper();\n+ if (existingMapper != null && sourceToMerge.equals(existingMapper.mappingSource())) {\n+ context.resetForNoopMappingUpdateRetry(initialMappingVersion);\n+ return true;\n+ }", - "comment_created_at": "2024-06-25T12:42:52+00:00", - "comment_author": "original-brownbear", - "comment_body": "Yea, after failing over to a new index when a bunch of data nodes are blasting the master all with the same update. No need to waste energy on all write threads and the master for longer than absolutely necessary IMO :)", - "pr_file_module": null - }, - { - "comment_id": "1653529791", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 109949, - "pr_file": "server/src/main/java/org/elasticsearch/action/bulk/TransportShardBulkAction.java", - "discussion_id": "1651609389", - "commented_code": "@@ -405,16 +403,24 @@ private static boolean handleMappingUpdateRequired(\n long version,\n UpdateHelper.Result updateResult\n ) {\n- final var mapperService = primary.mapperService();\n try {\n+ final var sourceToMerge = new CompressedXContent(result.getRequiredMappingUpdate());\n+ final var mapperService = primary.mapperService();\n+ final long initialMappingVersion = mapperService.mappingVersion();\n+ final var existingMapper = mapperService.documentMapper();\n+ if (existingMapper != null && sourceToMerge.equals(existingMapper.mappingSource())) {\n+ context.resetForNoopMappingUpdateRetry(initialMappingVersion);\n+ return true;\n+ }", - "comment_created_at": "2024-06-25T20:38:44+00:00", - "comment_author": "javanna", - "comment_body": "I don't understand how the conditional is matched in that scenario: how can the existing mappings be exactly the same as the dynamic mapping update? The existing mappings hold all the fields that have been added until then, and the new mappings hold the updates to be made following indexing of a new document. Say that we are indexing the first doc of a new index, the dynamic mapping update would contain a lot of fields, and the existing mappings would be empty? I am sure I am missing something, can you help me better understand?", - "pr_file_module": null - }, - { - "comment_id": "1653536880", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 109949, - "pr_file": "server/src/main/java/org/elasticsearch/action/bulk/TransportShardBulkAction.java", - "discussion_id": "1651609389", - "commented_code": "@@ -405,16 +403,24 @@ private static boolean handleMappingUpdateRequired(\n long version,\n UpdateHelper.Result updateResult\n ) {\n- final var mapperService = primary.mapperService();\n try {\n+ final var sourceToMerge = new CompressedXContent(result.getRequiredMappingUpdate());\n+ final var mapperService = primary.mapperService();\n+ final long initialMappingVersion = mapperService.mappingVersion();\n+ final var existingMapper = mapperService.documentMapper();\n+ if (existingMapper != null && sourceToMerge.equals(existingMapper.mappingSource())) {\n+ context.resetForNoopMappingUpdateRetry(initialMappingVersion);\n+ return true;\n+ }", - "comment_created_at": "2024-06-25T20:45:02+00:00", - "comment_author": "original-brownbear", - "comment_body": "> I don't understand how the conditional is matched in that scenario: how can the existing mappings be exactly the same as the dynamic mapping update?\r\n\r\nI think that's when we bootstrap a new index that uses dynamic mapping updates. If you index the same kind of logs that will trigger the same mapping in parallel, you will and should get the exact same mapping here every time (like think some metrics line from Metricbeat or so).\r\n\r\n> Say that we are indexing the first doc of a new index, the dynamic mapping update would contain a lot of fields, and the existing mappings would be empty\r\n\r\nJup exactly. E.g. Elasticsearch detailed node stats I'd say :P that's a couple hundred fields that would get created in the dynamic updates. No need to ship that out from write threads more often than absolutely necessary every time we roll-over an index.\r\n", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1633466723", - "pr_number": 109390, - "pr_file": "server/src/main/java/org/elasticsearch/synonyms/SynonymsManagementAPIService.java", - "created_at": "2024-06-10T15:41:56+00:00", - "commented_code": "}\n\n public void putSynonymRule(String synonymsSetId, SynonymRule synonymRule, ActionListener listener) {\n checkSynonymSetExists(synonymsSetId, listener.delegateFailure((l1, obj) -> {\n try {\n IndexRequest indexRequest = createSynonymRuleIndexRequest(synonymsSetId, synonymRule).setRefreshPolicy(\n WriteRequest.RefreshPolicy.IMMEDIATE\n );\n client.index(indexRequest, l1.delegateFailure((l2, indexResponse) -> {\n UpdateSynonymsResultStatus updateStatus = indexResponse.status() == RestStatus.CREATED\n ? UpdateSynonymsResultStatus.CREATED\n : UpdateSynonymsResultStatus.UPDATED;\n\n reloadAnalyzers(synonymsSetId, false, l2, updateStatus);\n checkSynonymSetExists(synonymsSetId, listener.delegateFailureAndWrap((l1, obj) -> {\n // Count synonym rules to check if we're at maximum\n client.prepareSearch(SYNONYMS_ALIAS_NAME)\n .setQuery(\n QueryBuilders.boolQuery()\n .must(QueryBuilders.termQuery(SYNONYMS_SET_FIELD, synonymsSetId))\n .filter(QueryBuilders.termQuery(OBJECT_TYPE_FIELD, SYNONYM_RULE_OBJECT_TYPE))\n )\n .setSize(0)\n .setPreference(Preference.LOCAL.type())\n .setTrackTotalHits(true)\n .execute(l1.delegateFailureAndWrap((searchListener, searchResponse) -> {\n long synonymsSetSize = searchResponse.getHits().getTotalHits().value;\n if (synonymsSetSize >= MAX_SYNONYMS_SETS) {\n // We could potentially update a synonym rule when we're at max capacity, but we're keeping this simple", - "repo_full_name": "elastic/elasticsearch", - "discussion_comments": [ - { - "comment_id": "1633466723", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 109390, - "pr_file": "server/src/main/java/org/elasticsearch/synonyms/SynonymsManagementAPIService.java", - "discussion_id": "1633466723", - "commented_code": "@@ -308,21 +355,42 @@ public void putSynonymsSet(String synonymSetId, SynonymRule[] synonymsSet, Actio\n }\n \n public void putSynonymRule(String synonymsSetId, SynonymRule synonymRule, ActionListener listener) {\n- checkSynonymSetExists(synonymsSetId, listener.delegateFailure((l1, obj) -> {\n- try {\n- IndexRequest indexRequest = createSynonymRuleIndexRequest(synonymsSetId, synonymRule).setRefreshPolicy(\n- WriteRequest.RefreshPolicy.IMMEDIATE\n- );\n- client.index(indexRequest, l1.delegateFailure((l2, indexResponse) -> {\n- UpdateSynonymsResultStatus updateStatus = indexResponse.status() == RestStatus.CREATED\n- ? UpdateSynonymsResultStatus.CREATED\n- : UpdateSynonymsResultStatus.UPDATED;\n-\n- reloadAnalyzers(synonymsSetId, false, l2, updateStatus);\n+ checkSynonymSetExists(synonymsSetId, listener.delegateFailureAndWrap((l1, obj) -> {\n+ // Count synonym rules to check if we're at maximum\n+ client.prepareSearch(SYNONYMS_ALIAS_NAME)\n+ .setQuery(\n+ QueryBuilders.boolQuery()\n+ .must(QueryBuilders.termQuery(SYNONYMS_SET_FIELD, synonymsSetId))\n+ .filter(QueryBuilders.termQuery(OBJECT_TYPE_FIELD, SYNONYM_RULE_OBJECT_TYPE))\n+ )\n+ .setSize(0)\n+ .setPreference(Preference.LOCAL.type())\n+ .setTrackTotalHits(true)\n+ .execute(l1.delegateFailureAndWrap((searchListener, searchResponse) -> {\n+ long synonymsSetSize = searchResponse.getHits().getTotalHits().value;\n+ if (synonymsSetSize >= MAX_SYNONYMS_SETS) {\n+ // We could potentially update a synonym rule when we're at max capacity, but we're keeping this simple", - "comment_created_at": "2024-06-10T15:41:56+00:00", - "comment_author": "kderusso", - "comment_body": "We could consider supporting updates here by adding a `must_not` clause to the query on the synonym ID? Then it would be below the max if it existed.", - "pr_file_module": null - }, - { - "comment_id": "1633545813", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 109390, - "pr_file": "server/src/main/java/org/elasticsearch/synonyms/SynonymsManagementAPIService.java", - "discussion_id": "1633466723", - "commented_code": "@@ -308,21 +355,42 @@ public void putSynonymsSet(String synonymSetId, SynonymRule[] synonymsSet, Actio\n }\n \n public void putSynonymRule(String synonymsSetId, SynonymRule synonymRule, ActionListener listener) {\n- checkSynonymSetExists(synonymsSetId, listener.delegateFailure((l1, obj) -> {\n- try {\n- IndexRequest indexRequest = createSynonymRuleIndexRequest(synonymsSetId, synonymRule).setRefreshPolicy(\n- WriteRequest.RefreshPolicy.IMMEDIATE\n- );\n- client.index(indexRequest, l1.delegateFailure((l2, indexResponse) -> {\n- UpdateSynonymsResultStatus updateStatus = indexResponse.status() == RestStatus.CREATED\n- ? UpdateSynonymsResultStatus.CREATED\n- : UpdateSynonymsResultStatus.UPDATED;\n-\n- reloadAnalyzers(synonymsSetId, false, l2, updateStatus);\n+ checkSynonymSetExists(synonymsSetId, listener.delegateFailureAndWrap((l1, obj) -> {\n+ // Count synonym rules to check if we're at maximum\n+ client.prepareSearch(SYNONYMS_ALIAS_NAME)\n+ .setQuery(\n+ QueryBuilders.boolQuery()\n+ .must(QueryBuilders.termQuery(SYNONYMS_SET_FIELD, synonymsSetId))\n+ .filter(QueryBuilders.termQuery(OBJECT_TYPE_FIELD, SYNONYM_RULE_OBJECT_TYPE))\n+ )\n+ .setSize(0)\n+ .setPreference(Preference.LOCAL.type())\n+ .setTrackTotalHits(true)\n+ .execute(l1.delegateFailureAndWrap((searchListener, searchResponse) -> {\n+ long synonymsSetSize = searchResponse.getHits().getTotalHits().value;\n+ if (synonymsSetSize >= MAX_SYNONYMS_SETS) {\n+ // We could potentially update a synonym rule when we're at max capacity, but we're keeping this simple", - "comment_created_at": "2024-06-10T16:44:43+00:00", - "comment_author": "carlosdelest", - "comment_body": "That's a good way of doing this! Again, I feel it a bit complicated for such an edge case.", - "pr_file_module": null - }, - { - "comment_id": "1638117253", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 109390, - "pr_file": "server/src/main/java/org/elasticsearch/synonyms/SynonymsManagementAPIService.java", - "discussion_id": "1633466723", - "commented_code": "@@ -308,21 +355,42 @@ public void putSynonymsSet(String synonymSetId, SynonymRule[] synonymsSet, Actio\n }\n \n public void putSynonymRule(String synonymsSetId, SynonymRule synonymRule, ActionListener listener) {\n- checkSynonymSetExists(synonymsSetId, listener.delegateFailure((l1, obj) -> {\n- try {\n- IndexRequest indexRequest = createSynonymRuleIndexRequest(synonymsSetId, synonymRule).setRefreshPolicy(\n- WriteRequest.RefreshPolicy.IMMEDIATE\n- );\n- client.index(indexRequest, l1.delegateFailure((l2, indexResponse) -> {\n- UpdateSynonymsResultStatus updateStatus = indexResponse.status() == RestStatus.CREATED\n- ? UpdateSynonymsResultStatus.CREATED\n- : UpdateSynonymsResultStatus.UPDATED;\n-\n- reloadAnalyzers(synonymsSetId, false, l2, updateStatus);\n+ checkSynonymSetExists(synonymsSetId, listener.delegateFailureAndWrap((l1, obj) -> {\n+ // Count synonym rules to check if we're at maximum\n+ client.prepareSearch(SYNONYMS_ALIAS_NAME)\n+ .setQuery(\n+ QueryBuilders.boolQuery()\n+ .must(QueryBuilders.termQuery(SYNONYMS_SET_FIELD, synonymsSetId))\n+ .filter(QueryBuilders.termQuery(OBJECT_TYPE_FIELD, SYNONYM_RULE_OBJECT_TYPE))\n+ )\n+ .setSize(0)\n+ .setPreference(Preference.LOCAL.type())\n+ .setTrackTotalHits(true)\n+ .execute(l1.delegateFailureAndWrap((searchListener, searchResponse) -> {\n+ long synonymsSetSize = searchResponse.getHits().getTotalHits().value;\n+ if (synonymsSetSize >= MAX_SYNONYMS_SETS) {\n+ // We could potentially update a synonym rule when we're at max capacity, but we're keeping this simple", - "comment_created_at": "2024-06-13T12:24:10+00:00", - "comment_author": "kderusso", - "comment_body": "I do think it's worth taking this edge case into consideration, because you know someone is going to try it and file a bug later. ", - "pr_file_module": null - }, - { - "comment_id": "1638197058", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 109390, - "pr_file": "server/src/main/java/org/elasticsearch/synonyms/SynonymsManagementAPIService.java", - "discussion_id": "1633466723", - "commented_code": "@@ -308,21 +355,42 @@ public void putSynonymsSet(String synonymSetId, SynonymRule[] synonymsSet, Actio\n }\n \n public void putSynonymRule(String synonymsSetId, SynonymRule synonymRule, ActionListener listener) {\n- checkSynonymSetExists(synonymsSetId, listener.delegateFailure((l1, obj) -> {\n- try {\n- IndexRequest indexRequest = createSynonymRuleIndexRequest(synonymsSetId, synonymRule).setRefreshPolicy(\n- WriteRequest.RefreshPolicy.IMMEDIATE\n- );\n- client.index(indexRequest, l1.delegateFailure((l2, indexResponse) -> {\n- UpdateSynonymsResultStatus updateStatus = indexResponse.status() == RestStatus.CREATED\n- ? UpdateSynonymsResultStatus.CREATED\n- : UpdateSynonymsResultStatus.UPDATED;\n-\n- reloadAnalyzers(synonymsSetId, false, l2, updateStatus);\n+ checkSynonymSetExists(synonymsSetId, listener.delegateFailureAndWrap((l1, obj) -> {\n+ // Count synonym rules to check if we're at maximum\n+ client.prepareSearch(SYNONYMS_ALIAS_NAME)\n+ .setQuery(\n+ QueryBuilders.boolQuery()\n+ .must(QueryBuilders.termQuery(SYNONYMS_SET_FIELD, synonymsSetId))\n+ .filter(QueryBuilders.termQuery(OBJECT_TYPE_FIELD, SYNONYM_RULE_OBJECT_TYPE))\n+ )\n+ .setSize(0)\n+ .setPreference(Preference.LOCAL.type())\n+ .setTrackTotalHits(true)\n+ .execute(l1.delegateFailureAndWrap((searchListener, searchResponse) -> {\n+ long synonymsSetSize = searchResponse.getHits().getTotalHits().value;\n+ if (synonymsSetSize >= MAX_SYNONYMS_SETS) {\n+ // We could potentially update a synonym rule when we're at max capacity, but we're keeping this simple", - "comment_created_at": "2024-06-13T13:14:54+00:00", - "comment_author": "jimczi", - "comment_body": "Agreed, it's not really an edge case to be at capacity and to update the existing rules. I think we need a more robust approach here like executing all updates on the master node and executing the updates sequentially.", - "pr_file_module": null - }, - { - "comment_id": "1638282360", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 109390, - "pr_file": "server/src/main/java/org/elasticsearch/synonyms/SynonymsManagementAPIService.java", - "discussion_id": "1633466723", - "commented_code": "@@ -308,21 +355,42 @@ public void putSynonymsSet(String synonymSetId, SynonymRule[] synonymsSet, Actio\n }\n \n public void putSynonymRule(String synonymsSetId, SynonymRule synonymRule, ActionListener listener) {\n- checkSynonymSetExists(synonymsSetId, listener.delegateFailure((l1, obj) -> {\n- try {\n- IndexRequest indexRequest = createSynonymRuleIndexRequest(synonymsSetId, synonymRule).setRefreshPolicy(\n- WriteRequest.RefreshPolicy.IMMEDIATE\n- );\n- client.index(indexRequest, l1.delegateFailure((l2, indexResponse) -> {\n- UpdateSynonymsResultStatus updateStatus = indexResponse.status() == RestStatus.CREATED\n- ? UpdateSynonymsResultStatus.CREATED\n- : UpdateSynonymsResultStatus.UPDATED;\n-\n- reloadAnalyzers(synonymsSetId, false, l2, updateStatus);\n+ checkSynonymSetExists(synonymsSetId, listener.delegateFailureAndWrap((l1, obj) -> {\n+ // Count synonym rules to check if we're at maximum\n+ client.prepareSearch(SYNONYMS_ALIAS_NAME)\n+ .setQuery(\n+ QueryBuilders.boolQuery()\n+ .must(QueryBuilders.termQuery(SYNONYMS_SET_FIELD, synonymsSetId))\n+ .filter(QueryBuilders.termQuery(OBJECT_TYPE_FIELD, SYNONYM_RULE_OBJECT_TYPE))\n+ )\n+ .setSize(0)\n+ .setPreference(Preference.LOCAL.type())\n+ .setTrackTotalHits(true)\n+ .execute(l1.delegateFailureAndWrap((searchListener, searchResponse) -> {\n+ long synonymsSetSize = searchResponse.getHits().getTotalHits().value;\n+ if (synonymsSetSize >= MAX_SYNONYMS_SETS) {\n+ // We could potentially update a synonym rule when we're at max capacity, but we're keeping this simple", - "comment_created_at": "2024-06-13T14:05:20+00:00", - "comment_author": "carlosdelest", - "comment_body": "> it's not really an edge case to be at capacity and to update the existing rules\r\n\r\nThis edge case happens only if we're updating an individual synonym rule when we're at max capacity. Updating rules in batch means that first we remove all the rules and apply the updates in a bulk request.\r\n\r\nIt sounds weird to me to update individual rules when we have 100k synonyms in a synonym set- at that scale, I would think that users do batch updates of rules, which effectively replace the existing ones.\r\n\r\nBut if y'all think this needs to be dealt with, I will! I'll update the code and ping when ready \ud83d\udc4d \r\n\r\n> I think we need a more robust approach here like executing all updates on the master node and executing the updates sequentially\r\n\r\nSo do you think that this should be a `TransportMasterNodeAction`? As this updates an index and not the cluster state, what would be the advantages of applying the action on the master node? \r\n\r\nAs we're doing a bulk request under the hood, doesn't that mean that we're applying the updates sequentially?", - "pr_file_module": null - }, - { - "comment_id": "1638334270", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 109390, - "pr_file": "server/src/main/java/org/elasticsearch/synonyms/SynonymsManagementAPIService.java", - "discussion_id": "1633466723", - "commented_code": "@@ -308,21 +355,42 @@ public void putSynonymsSet(String synonymSetId, SynonymRule[] synonymsSet, Actio\n }\n \n public void putSynonymRule(String synonymsSetId, SynonymRule synonymRule, ActionListener listener) {\n- checkSynonymSetExists(synonymsSetId, listener.delegateFailure((l1, obj) -> {\n- try {\n- IndexRequest indexRequest = createSynonymRuleIndexRequest(synonymsSetId, synonymRule).setRefreshPolicy(\n- WriteRequest.RefreshPolicy.IMMEDIATE\n- );\n- client.index(indexRequest, l1.delegateFailure((l2, indexResponse) -> {\n- UpdateSynonymsResultStatus updateStatus = indexResponse.status() == RestStatus.CREATED\n- ? UpdateSynonymsResultStatus.CREATED\n- : UpdateSynonymsResultStatus.UPDATED;\n-\n- reloadAnalyzers(synonymsSetId, false, l2, updateStatus);\n+ checkSynonymSetExists(synonymsSetId, listener.delegateFailureAndWrap((l1, obj) -> {\n+ // Count synonym rules to check if we're at maximum\n+ client.prepareSearch(SYNONYMS_ALIAS_NAME)\n+ .setQuery(\n+ QueryBuilders.boolQuery()\n+ .must(QueryBuilders.termQuery(SYNONYMS_SET_FIELD, synonymsSetId))\n+ .filter(QueryBuilders.termQuery(OBJECT_TYPE_FIELD, SYNONYM_RULE_OBJECT_TYPE))\n+ )\n+ .setSize(0)\n+ .setPreference(Preference.LOCAL.type())\n+ .setTrackTotalHits(true)\n+ .execute(l1.delegateFailureAndWrap((searchListener, searchResponse) -> {\n+ long synonymsSetSize = searchResponse.getHits().getTotalHits().value;\n+ if (synonymsSetSize >= MAX_SYNONYMS_SETS) {\n+ // We could potentially update a synonym rule when we're at max capacity, but we're keeping this simple", - "comment_created_at": "2024-06-13T14:36:16+00:00", - "comment_author": "jimczi", - "comment_body": "> Updating rules in batch means that first we remove all the rules and apply the updates in a bulk request.\r\n\r\nThis is another case of non-consistent updates, we need to ensure some ordering here.\r\n\r\n> So do you think that this should be a TransportMasterNodeAction? As this updates an index and not the cluster state, what would be the advantages of applying the action on the master node?\r\n\r\nHaving a single place where updates can occur but as you noticed that won't be enough. We also need to ensure that all updates are done sequentially, not in a single bulk request but globally when multiple synonyms updates are done in parallel. See the `MetadataMappingService` for an example of service that applies updates sequentially. \r\n\r\n", - "pr_file_module": null - }, - { - "comment_id": "1638341068", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 109390, - "pr_file": "server/src/main/java/org/elasticsearch/synonyms/SynonymsManagementAPIService.java", - "discussion_id": "1633466723", - "commented_code": "@@ -308,21 +355,42 @@ public void putSynonymsSet(String synonymSetId, SynonymRule[] synonymsSet, Actio\n }\n \n public void putSynonymRule(String synonymsSetId, SynonymRule synonymRule, ActionListener listener) {\n- checkSynonymSetExists(synonymsSetId, listener.delegateFailure((l1, obj) -> {\n- try {\n- IndexRequest indexRequest = createSynonymRuleIndexRequest(synonymsSetId, synonymRule).setRefreshPolicy(\n- WriteRequest.RefreshPolicy.IMMEDIATE\n- );\n- client.index(indexRequest, l1.delegateFailure((l2, indexResponse) -> {\n- UpdateSynonymsResultStatus updateStatus = indexResponse.status() == RestStatus.CREATED\n- ? UpdateSynonymsResultStatus.CREATED\n- : UpdateSynonymsResultStatus.UPDATED;\n-\n- reloadAnalyzers(synonymsSetId, false, l2, updateStatus);\n+ checkSynonymSetExists(synonymsSetId, listener.delegateFailureAndWrap((l1, obj) -> {\n+ // Count synonym rules to check if we're at maximum\n+ client.prepareSearch(SYNONYMS_ALIAS_NAME)\n+ .setQuery(\n+ QueryBuilders.boolQuery()\n+ .must(QueryBuilders.termQuery(SYNONYMS_SET_FIELD, synonymsSetId))\n+ .filter(QueryBuilders.termQuery(OBJECT_TYPE_FIELD, SYNONYM_RULE_OBJECT_TYPE))\n+ )\n+ .setSize(0)\n+ .setPreference(Preference.LOCAL.type())\n+ .setTrackTotalHits(true)\n+ .execute(l1.delegateFailureAndWrap((searchListener, searchResponse) -> {\n+ long synonymsSetSize = searchResponse.getHits().getTotalHits().value;\n+ if (synonymsSetSize >= MAX_SYNONYMS_SETS) {\n+ // We could potentially update a synonym rule when we're at max capacity, but we're keeping this simple", - "comment_created_at": "2024-06-13T14:40:13+00:00", - "comment_author": "carlosdelest", - "comment_body": "Thanks @jimczi , I will take a look into that.\r\n\r\nI think we should create a separate issue for ensuring updates are applied sequentially, as this is not directly related to this change?", - "pr_file_module": null - }, - { - "comment_id": "1638388512", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 109390, - "pr_file": "server/src/main/java/org/elasticsearch/synonyms/SynonymsManagementAPIService.java", - "discussion_id": "1633466723", - "commented_code": "@@ -308,21 +355,42 @@ public void putSynonymsSet(String synonymSetId, SynonymRule[] synonymsSet, Actio\n }\n \n public void putSynonymRule(String synonymsSetId, SynonymRule synonymRule, ActionListener listener) {\n- checkSynonymSetExists(synonymsSetId, listener.delegateFailure((l1, obj) -> {\n- try {\n- IndexRequest indexRequest = createSynonymRuleIndexRequest(synonymsSetId, synonymRule).setRefreshPolicy(\n- WriteRequest.RefreshPolicy.IMMEDIATE\n- );\n- client.index(indexRequest, l1.delegateFailure((l2, indexResponse) -> {\n- UpdateSynonymsResultStatus updateStatus = indexResponse.status() == RestStatus.CREATED\n- ? UpdateSynonymsResultStatus.CREATED\n- : UpdateSynonymsResultStatus.UPDATED;\n-\n- reloadAnalyzers(synonymsSetId, false, l2, updateStatus);\n+ checkSynonymSetExists(synonymsSetId, listener.delegateFailureAndWrap((l1, obj) -> {\n+ // Count synonym rules to check if we're at maximum\n+ client.prepareSearch(SYNONYMS_ALIAS_NAME)\n+ .setQuery(\n+ QueryBuilders.boolQuery()\n+ .must(QueryBuilders.termQuery(SYNONYMS_SET_FIELD, synonymsSetId))\n+ .filter(QueryBuilders.termQuery(OBJECT_TYPE_FIELD, SYNONYM_RULE_OBJECT_TYPE))\n+ )\n+ .setSize(0)\n+ .setPreference(Preference.LOCAL.type())\n+ .setTrackTotalHits(true)\n+ .execute(l1.delegateFailureAndWrap((searchListener, searchResponse) -> {\n+ long synonymsSetSize = searchResponse.getHits().getTotalHits().value;\n+ if (synonymsSetSize >= MAX_SYNONYMS_SETS) {\n+ // We could potentially update a synonym rule when we're at max capacity, but we're keeping this simple", - "comment_created_at": "2024-06-13T15:09:25+00:00", - "comment_author": "jimczi", - "comment_body": "I think it's related, we cannot limit the number of synonyms without ensuring that updates are applied sequentially.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2177518712", - "pr_number": 130387, - "pr_file": "server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java", - "created_at": "2025-07-01T12:46:28+00:00", - "commented_code": "return ft;\n }\n\n /**\n * Does a `match` query generate all valid candidates for `==`? Meaning,\n * if I do a match query for any string, say `foo bar baz`, then that\n * query will find all documents that indexed the same string.\n *

\n * This should be true for most sanely configured text fields. That's\n * just how we use them for search. But it's quite possible to make\n * the index analyzer not agree with the search analyzer, for example.\n *

\n *

\n * So this implementation is ultra-paranoid.\n *

\n */\n private boolean matchQueryYieldsCandidateMatchesForEquality() {\n return index.getValue() == Boolean.TRUE\n && analyzers.indexAnalyzer.isConfigured() == false\n && analyzers.searchAnalyzer.isConfigured() == false;\n }", - "repo_full_name": "elastic/elasticsearch", - "discussion_comments": [ - { - "comment_id": "2177518712", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 130387, - "pr_file": "server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java", - "discussion_id": "2177518712", - "commented_code": "@@ -422,6 +424,25 @@ private TextFieldType buildFieldType(\n return ft;\n }\n \n+ /**\n+ * Does a `match` query generate all valid candidates for `==`? Meaning,\n+ * if I do a match query for any string, say `foo bar baz`, then that\n+ * query will find all documents that indexed the same string.\n+ *

\n+ * This should be true for most sanely configured text fields. That's\n+ * just how we use them for search. But it's quite possible to make\n+ * the index analyzer not agree with the search analyzer, for example.\n+ *

\n+ *

\n+ * So this implementation is ultra-paranoid.\n+ *

\n+ */\n+ private boolean matchQueryYieldsCandidateMatchesForEquality() {\n+ return index.getValue() == Boolean.TRUE\n+ && analyzers.indexAnalyzer.isConfigured() == false\n+ && analyzers.searchAnalyzer.isConfigured() == false;\n+ }", - "comment_created_at": "2025-07-01T12:46:28+00:00", - "comment_author": "nik9000", - "comment_body": "==SEARCH FOLKS LOOK HERE==\r\n\r\nIs this paranoid enough? Like, could you break the `match` query with other settings I'm not checking? I'd love to default this to only working if there's nothing configured on the text field and then opt in configurations.", - "pr_file_module": null - }, - { - "comment_id": "2177582793", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 130387, - "pr_file": "server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java", - "discussion_id": "2177518712", - "commented_code": "@@ -422,6 +424,25 @@ private TextFieldType buildFieldType(\n return ft;\n }\n \n+ /**\n+ * Does a `match` query generate all valid candidates for `==`? Meaning,\n+ * if I do a match query for any string, say `foo bar baz`, then that\n+ * query will find all documents that indexed the same string.\n+ *

\n+ * This should be true for most sanely configured text fields. That's\n+ * just how we use them for search. But it's quite possible to make\n+ * the index analyzer not agree with the search analyzer, for example.\n+ *

\n+ *

\n+ * So this implementation is ultra-paranoid.\n+ *

\n+ */\n+ private boolean matchQueryYieldsCandidateMatchesForEquality() {\n+ return index.getValue() == Boolean.TRUE\n+ && analyzers.indexAnalyzer.isConfigured() == false\n+ && analyzers.searchAnalyzer.isConfigured() == false;\n+ }", - "comment_created_at": "2025-07-01T13:17:25+00:00", - "comment_author": "jimczi", - "comment_body": "This is paranoid indeed. Just checking if the analyzer are the same is not enough since we have morphological analyzer that can tokenize differently depending on the surrounding text (Kuromoji, Nori, ..).\r\nWe could have a list of `allowed` analyzers but that sounds like a good start that will handle 99% of the use cases. \r\nI can't think of another setting that would change this, are we matching the input as phrase query or as a conjunction of term?", - "pr_file_module": null - }, - { - "comment_id": "2177603114", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 130387, - "pr_file": "server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java", - "discussion_id": "2177518712", - "commented_code": "@@ -422,6 +424,25 @@ private TextFieldType buildFieldType(\n return ft;\n }\n \n+ /**\n+ * Does a `match` query generate all valid candidates for `==`? Meaning,\n+ * if I do a match query for any string, say `foo bar baz`, then that\n+ * query will find all documents that indexed the same string.\n+ *

\n+ * This should be true for most sanely configured text fields. That's\n+ * just how we use them for search. But it's quite possible to make\n+ * the index analyzer not agree with the search analyzer, for example.\n+ *

\n+ *

\n+ * So this implementation is ultra-paranoid.\n+ *

\n+ */\n+ private boolean matchQueryYieldsCandidateMatchesForEquality() {\n+ return index.getValue() == Boolean.TRUE\n+ && analyzers.indexAnalyzer.isConfigured() == false\n+ && analyzers.searchAnalyzer.isConfigured() == false;\n+ }", - "comment_created_at": "2025-07-01T13:24:52+00:00", - "comment_author": "nik9000", - "comment_body": "Conjunction of terms right now. I probably should do phrase to be honest.", - "pr_file_module": null - }, - { - "comment_id": "2177608120", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 130387, - "pr_file": "server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java", - "discussion_id": "2177518712", - "commented_code": "@@ -422,6 +424,25 @@ private TextFieldType buildFieldType(\n return ft;\n }\n \n+ /**\n+ * Does a `match` query generate all valid candidates for `==`? Meaning,\n+ * if I do a match query for any string, say `foo bar baz`, then that\n+ * query will find all documents that indexed the same string.\n+ *

\n+ * This should be true for most sanely configured text fields. That's\n+ * just how we use them for search. But it's quite possible to make\n+ * the index analyzer not agree with the search analyzer, for example.\n+ *

\n+ *

\n+ * So this implementation is ultra-paranoid.\n+ *

\n+ */\n+ private boolean matchQueryYieldsCandidateMatchesForEquality() {\n+ return index.getValue() == Boolean.TRUE\n+ && analyzers.indexAnalyzer.isConfigured() == false\n+ && analyzers.searchAnalyzer.isConfigured() == false;\n+ }", - "comment_created_at": "2025-07-01T13:26:59+00:00", - "comment_author": "nik9000", - "comment_body": "Do you think it's worth building a field setting that overrides our choices? A `true/false/null` thing - `true` means \"I know match is fine, push\", `false` means \"I know match isn't fine, never push\" and `null` means \"you figure it out\".\r\n\r\n> since we have morphological analyzer that can tokenize differently depending on the surrounding text (Kuromoji, Nori, ..)\r\n\r\nCan you give an example? So long as the same text tokenizes the same way that *should* be fine. We're not pushing \"contains tokens\" here - we're pushing exat string ==.", - "pr_file_module": null - }, - { - "comment_id": "2177644488", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 130387, - "pr_file": "server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java", - "discussion_id": "2177518712", - "commented_code": "@@ -422,6 +424,25 @@ private TextFieldType buildFieldType(\n return ft;\n }\n \n+ /**\n+ * Does a `match` query generate all valid candidates for `==`? Meaning,\n+ * if I do a match query for any string, say `foo bar baz`, then that\n+ * query will find all documents that indexed the same string.\n+ *

\n+ * This should be true for most sanely configured text fields. That's\n+ * just how we use them for search. But it's quite possible to make\n+ * the index analyzer not agree with the search analyzer, for example.\n+ *

\n+ *

\n+ * So this implementation is ultra-paranoid.\n+ *

\n+ */\n+ private boolean matchQueryYieldsCandidateMatchesForEquality() {\n+ return index.getValue() == Boolean.TRUE\n+ && analyzers.indexAnalyzer.isConfigured() == false\n+ && analyzers.searchAnalyzer.isConfigured() == false;\n+ }", - "comment_created_at": "2025-07-01T13:43:11+00:00", - "comment_author": "jimczi", - "comment_body": "> Can you give an example? So long as the same text tokenizes the same way that should be fine. We're not pushing \"contains tokens\" here - we're pushing exat string ==.\r\n\r\nAh right sorry, the input must be exactly the same. So then another test, to remove the analyzer restriction, would be to use a memory index and verify that the search analyzer query can find the input indexed with the index analyzer?\r\n\r\n> Do you think it's worth building a field setting that overrides our choices?\r\n\r\nI wonder if that should be pushed down to mappings directly. We have `termQuery`, `phraseQuery` defined by the field mappers now so we could delegate the decision there with an `exactQuery`? That would help with fields like `match_only_text` to avoid doing two verifications on the content? ", - "pr_file_module": null - }, - { - "comment_id": "2177655532", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 130387, - "pr_file": "server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java", - "discussion_id": "2177518712", - "commented_code": "@@ -422,6 +424,25 @@ private TextFieldType buildFieldType(\n return ft;\n }\n \n+ /**\n+ * Does a `match` query generate all valid candidates for `==`? Meaning,\n+ * if I do a match query for any string, say `foo bar baz`, then that\n+ * query will find all documents that indexed the same string.\n+ *

\n+ * This should be true for most sanely configured text fields. That's\n+ * just how we use them for search. But it's quite possible to make\n+ * the index analyzer not agree with the search analyzer, for example.\n+ *

\n+ *

\n+ * So this implementation is ultra-paranoid.\n+ *

\n+ */\n+ private boolean matchQueryYieldsCandidateMatchesForEquality() {\n+ return index.getValue() == Boolean.TRUE\n+ && analyzers.indexAnalyzer.isConfigured() == false\n+ && analyzers.searchAnalyzer.isConfigured() == false;\n+ }", - "comment_created_at": "2025-07-01T13:47:59+00:00", - "comment_author": "jimczi", - "comment_body": "cc @javanna ", - "pr_file_module": null - }, - { - "comment_id": "2179888009", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 130387, - "pr_file": "server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java", - "discussion_id": "2177518712", - "commented_code": "@@ -422,6 +424,25 @@ private TextFieldType buildFieldType(\n return ft;\n }\n \n+ /**\n+ * Does a `match` query generate all valid candidates for `==`? Meaning,\n+ * if I do a match query for any string, say `foo bar baz`, then that\n+ * query will find all documents that indexed the same string.\n+ *

\n+ * This should be true for most sanely configured text fields. That's\n+ * just how we use them for search. But it's quite possible to make\n+ * the index analyzer not agree with the search analyzer, for example.\n+ *

\n+ *

\n+ * So this implementation is ultra-paranoid.\n+ *

\n+ */\n+ private boolean matchQueryYieldsCandidateMatchesForEquality() {\n+ return index.getValue() == Boolean.TRUE\n+ && analyzers.indexAnalyzer.isConfigured() == false\n+ && analyzers.searchAnalyzer.isConfigured() == false;\n+ }", - "comment_created_at": "2025-07-02T12:09:04+00:00", - "comment_author": "nik9000", - "comment_body": "I think ESQL might indeed deserve some more query methods on `MappedFieldType`. I've been adding methods to, like, `TextFieldMapper` that help me decide what query to build. But I think it'd be cleaner to put these into the `MappedFieldType`. Also, it'd be more possible for, well, more fields to link into ESQL.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2172861734", - "pr_number": 129282, - "pr_file": "x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMatchQueryRewriteInterceptor.java", - "created_at": "2025-06-27T20:54:05+00:00", - "commented_code": "boolQueryBuilder.should(\n createSemanticSubQuery(\n indexInformation.getInferenceIndices(),\n matchQueryBuilder.fieldName(),\n (String) matchQueryBuilder.value()\n matchQueryBuilder\n )\n );\n boolQueryBuilder.should(createSubQueryForIndices(indexInformation.nonInferenceIndices(), matchQueryBuilder));\n boolQueryBuilder.boost(matchQueryBuilder.boost());\n boolQueryBuilder.queryName(matchQueryBuilder.queryName());\n return boolQueryBuilder;", - "repo_full_name": "elastic/elasticsearch", - "discussion_comments": [ - { - "comment_id": "2172861734", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 129282, - "pr_file": "x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMatchQueryRewriteInterceptor.java", - "discussion_id": "2172861734", - "commented_code": "@@ -50,11 +50,12 @@ protected QueryBuilder buildCombinedInferenceAndNonInferenceQuery(\n boolQueryBuilder.should(\n createSemanticSubQuery(\n indexInformation.getInferenceIndices(),\n- matchQueryBuilder.fieldName(),\n- (String) matchQueryBuilder.value()\n+ matchQueryBuilder\n )\n );\n boolQueryBuilder.should(createSubQueryForIndices(indexInformation.nonInferenceIndices(), matchQueryBuilder));\n+ boolQueryBuilder.boost(matchQueryBuilder.boost());\n+ boolQueryBuilder.queryName(matchQueryBuilder.queryName());\n return boolQueryBuilder;", - "comment_created_at": "2025-06-27T20:54:05+00:00", - "comment_author": "Samiul-TheSoccerFan", - "comment_body": "The final Query for `match` will look something like this:\r\n\r\n```\r\n{\r\n \"bool\" : {\r\n \"should\" : [ {\r\n \"bool\" : {\r\n \"should\" : [ {\r\n \"bool\" : {\r\n \"must\" : [ {\r\n \"semantic\" : {\r\n \"field\" : \"semantic1\",\r\n \"query\" : \"wolf\",\r\n \"lenient\" : true\r\n }\r\n } ],\r\n \"filter\" : [ {\r\n \"terms\" : {\r\n \"_index\" : [ \"semantic-text-index\" ],\r\n \"boost\" : 1.0\r\n }\r\n } ],\r\n \"boost\" : 1.0\r\n }\r\n }, {\r\n \"bool\" : {\r\n \"must\" : [ {\r\n \"match\" : {\r\n \"semantic1\" : {\r\n \"query\" : \"wolf\",\r\n \"boost\" : 3.0,\r\n \"_name\" : \"semantic1_match\"\r\n }\r\n }\r\n } ],\r\n \"filter\" : [ {\r\n \"terms\" : {\r\n \"_index\" : [ \"normal-text-index\" ],\r\n \"boost\" : 1.0\r\n }\r\n } ],\r\n \"boost\" : 1.0\r\n }\r\n } ],\r\n \"boost\" : 3.0,\r\n \"_name\" : \"semantic1_match\"\r\n }\r\n }, {\r\n \"bool\" : {\r\n \"should\" : [ {\r\n \"bool\" : {\r\n \"must\" : [ {\r\n \"semantic\" : {\r\n \"field\" : \"semantic2\",\r\n \"query\" : \"strangers\",\r\n \"lenient\" : true\r\n }\r\n } ],\r\n \"filter\" : [ {\r\n \"terms\" : {\r\n \"_index\" : [ \"semantic-text-index\" ],\r\n \"boost\" : 1.0\r\n }\r\n } ],\r\n \"boost\" : 1.0\r\n }\r\n }, {\r\n \"bool\" : {\r\n \"must\" : [ {\r\n \"match\" : {\r\n \"semantic2\" : {\r\n \"query\" : \"strangers\",\r\n \"boost\" : 2.5,\r\n \"_name\" : \"semantic2_match\"\r\n }\r\n }\r\n } ],\r\n \"filter\" : [ {\r\n \"terms\" : {\r\n \"_index\" : [ \"normal-text-index\" ],\r\n \"boost\" : 1.0\r\n }\r\n } ],\r\n \"boost\" : 1.0\r\n }\r\n } ],\r\n \"boost\" : 2.5,\r\n \"_name\" : \"semantic2_match\"\r\n }\r\n } ],\r\n \"boost\" : 1.0\r\n }\r\n}\r\n```", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2167423740", - "pr_number": 130035, - "pr_file": "x-pack/plugin/migrate/src/main/java/org/elasticsearch/xpack/migrate/action/ReindexDataStreamIndexTransportAction.java", - "created_at": "2025-06-25T19:07:18+00:00", - "commented_code": "ReindexDataStreamIndexAction.Request request,\n ActionListener listener\n ) {\n var project = clusterService.state().projectState();\n var project = projectResolver.getProjectState(clusterService.state());", - "repo_full_name": "elastic/elasticsearch", - "discussion_comments": [ - { - "comment_id": "2167423740", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 130035, - "pr_file": "x-pack/plugin/migrate/src/main/java/org/elasticsearch/xpack/migrate/action/ReindexDataStreamIndexTransportAction.java", - "discussion_id": "2167423740", - "commented_code": "@@ -142,7 +146,7 @@ protected void doExecute(\n ReindexDataStreamIndexAction.Request request,\n ActionListener listener\n ) {\n- var project = clusterService.state().projectState();\n+ var project = projectResolver.getProjectState(clusterService.state());", - "comment_created_at": "2025-06-25T19:07:18+00:00", - "comment_author": "nielsbauman", - "comment_body": "Since we only use the `ProjectMetadata` in this method, we can avoid constructing the `ProjectState` by using `projectResolver.getProjectMetadata(clusterService.state())`.", - "pr_file_module": null - }, - { - "comment_id": "2167646421", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 130035, - "pr_file": "x-pack/plugin/migrate/src/main/java/org/elasticsearch/xpack/migrate/action/ReindexDataStreamIndexTransportAction.java", - "discussion_id": "2167423740", - "commented_code": "@@ -142,7 +146,7 @@ protected void doExecute(\n ReindexDataStreamIndexAction.Request request,\n ActionListener listener\n ) {\n- var project = clusterService.state().projectState();\n+ var project = projectResolver.getProjectState(clusterService.state());", - "comment_created_at": "2025-06-25T21:14:27+00:00", - "comment_author": "joegallo", - "comment_body": "34e98085388cae88b1299732e99903b2b7df7d2c", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/elasticsearch-robust-test-assertions.json b/_reviewers/elasticsearch-robust-test-assertions.json new file mode 100644 index 0000000..9ed91b2 --- /dev/null +++ b/_reviewers/elasticsearch-robust-test-assertions.json @@ -0,0 +1,182 @@ +[ + { + "discussion_id": "2156561631", + "pr_number": 129633, + "pr_file": "x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/aggregation/GroupingAggregatorFunctionTestCase.java", + "created_at": "2025-06-19T09:26:32+00:00", + "commented_code": "delegate.selectedMayContainUnseenGroups(seenGroupIds);\n }\n\n @Override\n public void addIntermediateInput(int positionOffset, IntArrayBlock groupIds, Page page) {\n addIntermediateInputInternal(positionOffset, groupIds, page);\n }\n\n @Override\n public void addIntermediateInput(int positionOffset, IntBigArrayBlock groupIds, Page page) {\n addIntermediateInputInternal(positionOffset, groupIds, page);\n }\n\n @Override\n public void addIntermediateInput(int positionOffset, IntVector groupIds, Page page) {\n addIntermediateInputInternal(positionOffset, groupIds.asBlock(), page);\n }\n\n public void addIntermediateInputInternal(int positionOffset, IntBlock groupIds, Page page) {\n BlockFactory blockFactory = TestBlockFactory.getNonBreakingInstance();\n int[] chunk = new int[emitChunkSize];\n for (int offset = 0; offset < groupIds.getPositionCount(); offset += emitChunkSize) {\n int count = 0;\n for (int i = offset; i < Math.min(groupIds.getPositionCount(), offset + emitChunkSize); i++) {\n chunk[count++] = groupIds.getInt(i);\n int chunkPosition = 0;\n int offset = 0;\n for (int position = 0; position < groupIds.getPositionCount(); position++) {\n if (groupIds.isNull(position)) {\n continue;\n }\n BlockFactory blockFactory = TestBlockFactory.getNonBreakingInstance(); // TODO: just for compile\n delegate.addIntermediateInput(positionOffset + offset, blockFactory.newIntArrayVector(chunk, count), page);\n int firstValueIndex = groupIds.getFirstValueIndex(position);\n int valueCount = groupIds.getValueCount(position);\n assert valueCount == 1; // Multi-values make chunking more complex, and it's not a real case yet", + "repo_full_name": "elastic/elasticsearch", + "discussion_comments": [ + { + "comment_id": "2156561631", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 129633, + "pr_file": "x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/aggregation/GroupingAggregatorFunctionTestCase.java", + "discussion_id": "2156561631", + "commented_code": "@@ -739,16 +739,52 @@ public void selectedMayContainUnseenGroups(SeenGroupIds seenGroupIds) {\n delegate.selectedMayContainUnseenGroups(seenGroupIds);\n }\n \n+ @Override\n+ public void addIntermediateInput(int positionOffset, IntArrayBlock groupIds, Page page) {\n+ addIntermediateInputInternal(positionOffset, groupIds, page);\n+ }\n+\n+ @Override\n+ public void addIntermediateInput(int positionOffset, IntBigArrayBlock groupIds, Page page) {\n+ addIntermediateInputInternal(positionOffset, groupIds, page);\n+ }\n+\n @Override\n public void addIntermediateInput(int positionOffset, IntVector groupIds, Page page) {\n+ addIntermediateInputInternal(positionOffset, groupIds.asBlock(), page);\n+ }\n+\n+ public void addIntermediateInputInternal(int positionOffset, IntBlock groupIds, Page page) {\n+ BlockFactory blockFactory = TestBlockFactory.getNonBreakingInstance();\n int[] chunk = new int[emitChunkSize];\n- for (int offset = 0; offset < groupIds.getPositionCount(); offset += emitChunkSize) {\n- int count = 0;\n- for (int i = offset; i < Math.min(groupIds.getPositionCount(), offset + emitChunkSize); i++) {\n- chunk[count++] = groupIds.getInt(i);\n+ int chunkPosition = 0;\n+ int offset = 0;\n+ for (int position = 0; position < groupIds.getPositionCount(); position++) {\n+ if (groupIds.isNull(position)) {\n+ continue;\n }\n- BlockFactory blockFactory = TestBlockFactory.getNonBreakingInstance(); // TODO: just for compile\n- delegate.addIntermediateInput(positionOffset + offset, blockFactory.newIntArrayVector(chunk, count), page);\n+ int firstValueIndex = groupIds.getFirstValueIndex(position);\n+ int valueCount = groupIds.getValueCount(position);\n+ assert valueCount == 1; // Multi-values make chunking more complex, and it's not a real case yet", + "comment_created_at": "2025-06-19T09:26:32+00:00", + "comment_author": "Copilot", + "comment_body": "[nitpick] Consider replacing the plain 'assert' statement with a proper JUnit assertion (e.g., assertEquals(1, valueCount)) to provide a clearer error message if this assumption fails.\n```suggestion\n assertEquals(1, valueCount, \"Multi-values make chunking more complex, and it's not a real case yet\");\n```", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "967030927", + "pr_number": 89112, + "pr_file": "server/src/test/java/org/elasticsearch/index/analysis/AnalysisTests.java", + "created_at": "2022-09-09T12:46:36+00:00", + "commented_code": "List wordList = Analysis.getWordList(env, nodeSettings, \"foo.bar\");\n assertEquals(Arrays.asList(\"hello\", \"world\"), wordList);\n }\n\n public void testCustomAnalyzerWithNotSupportKey() {\n Settings analyzerSettings = Settings.builder().put(\"tokenizer\", \"standard\").put(\"foo\", \"bar\").put(\"type\", \"custom\").build();\n\n try {\n createComponents(\"my_analyzer\", analyzerSettings, testAnalysis.tokenizer, testAnalysis.charFilter, testAnalysis.tokenFilter);\n fail(\"expected failure\");\n } catch (IllegalArgumentException e) {", + "repo_full_name": "elastic/elasticsearch", + "discussion_comments": [ + { + "comment_id": "967030927", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 89112, + "pr_file": "server/src/test/java/org/elasticsearch/index/analysis/AnalysisTests.java", + "discussion_id": "967030927", + "commented_code": "@@ -103,4 +116,15 @@ public void testParseWordList() throws IOException {\n List wordList = Analysis.getWordList(env, nodeSettings, \"foo.bar\");\n assertEquals(Arrays.asList(\"hello\", \"world\"), wordList);\n }\n+\n+ public void testCustomAnalyzerWithNotSupportKey() {\n+ Settings analyzerSettings = Settings.builder().put(\"tokenizer\", \"standard\").put(\"foo\", \"bar\").put(\"type\", \"custom\").build();\n+\n+ try {\n+ createComponents(\"my_analyzer\", analyzerSettings, testAnalysis.tokenizer, testAnalysis.charFilter, testAnalysis.tokenFilter);\n+ fail(\"expected failure\");\n+ } catch (IllegalArgumentException e) {", + "comment_created_at": "2022-09-09T12:46:36+00:00", + "comment_author": "javanna", + "comment_body": "you can use expectThrows instead .", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2142767556", + "pr_number": 129320, + "pr_file": "x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/action/filter/ShardBulkInferenceActionFilterTests.java", + "created_at": "2025-06-12T13:36:52+00:00", + "commented_code": "IndexingPressure.Coordinating coordinatingIndexingPressure = indexingPressure.getCoordinating();\n assertThat(coordinatingIndexingPressure, notNullValue());\n verify(coordinatingIndexingPressure).increment(1, bytesUsed(doc0Source));\n verify(coordinatingIndexingPressure).increment(1, bytesUsed(doc1Source));\n verify(coordinatingIndexingPressure).increment(1, bytesUsed(doc2Source));\n verify(coordinatingIndexingPressure).increment(1, bytesUsed(doc3Source));\n verify(coordinatingIndexingPressure).increment(1, bytesUsed(doc4Source));\n verify(coordinatingIndexingPressure).increment(1, bytesUsed(doc0UpdateSource));\n verify(coordinatingIndexingPressure, times(6)).increment(eq(1), longThat(l -> l > 0));\n if (useLegacyFormat == false) {\n verify(coordinatingIndexingPressure).increment(1, bytesUsed(doc1UpdateSource));\n verify(coordinatingIndexingPressure).increment(1, longThat(l -> l > bytesUsed(doc1UpdateSource)));", + "repo_full_name": "elastic/elasticsearch", + "discussion_comments": [ + { + "comment_id": "2142767556", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 129320, + "pr_file": "x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/action/filter/ShardBulkInferenceActionFilterTests.java", + "discussion_id": "2142767556", + "commented_code": "@@ -616,20 +616,13 @@ public void testIndexingPressure() throws Exception {\n \n IndexingPressure.Coordinating coordinatingIndexingPressure = indexingPressure.getCoordinating();\n assertThat(coordinatingIndexingPressure, notNullValue());\n- verify(coordinatingIndexingPressure).increment(1, bytesUsed(doc0Source));\n- verify(coordinatingIndexingPressure).increment(1, bytesUsed(doc1Source));\n- verify(coordinatingIndexingPressure).increment(1, bytesUsed(doc2Source));\n- verify(coordinatingIndexingPressure).increment(1, bytesUsed(doc3Source));\n- verify(coordinatingIndexingPressure).increment(1, bytesUsed(doc4Source));\n- verify(coordinatingIndexingPressure).increment(1, bytesUsed(doc0UpdateSource));\n+ verify(coordinatingIndexingPressure, times(6)).increment(eq(1), longThat(l -> l > 0));\n if (useLegacyFormat == false) {\n- verify(coordinatingIndexingPressure).increment(1, bytesUsed(doc1UpdateSource));\n+ verify(coordinatingIndexingPressure).increment(1, longThat(l -> l > bytesUsed(doc1UpdateSource)));", + "comment_created_at": "2025-06-12T13:36:52+00:00", + "comment_author": "Mikep86", + "comment_body": "If we're not checking for calls to `increment` with exact counts of bytes allocated, we can simplify this to:\r\n\r\n```\r\nverify(coordinatingIndexingPressure, times(useLegacyFormat ? 6 : 7)).increment(eq(1), longThat(l -> l > 0));\r\n```\r\n\r\nThis also makes the test more robust, as the fact that we allocate more bytes than `doc1UpdateSource` size is only because `_inference_fields`, with all of its internal data structures, happens to be much larger than the small original test source. This is not of consequence to the behavior we wish to verify.", + "pr_file_module": null + }, + { + "comment_id": "2143764343", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 129320, + "pr_file": "x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/action/filter/ShardBulkInferenceActionFilterTests.java", + "discussion_id": "2142767556", + "commented_code": "@@ -616,20 +616,13 @@ public void testIndexingPressure() throws Exception {\n \n IndexingPressure.Coordinating coordinatingIndexingPressure = indexingPressure.getCoordinating();\n assertThat(coordinatingIndexingPressure, notNullValue());\n- verify(coordinatingIndexingPressure).increment(1, bytesUsed(doc0Source));\n- verify(coordinatingIndexingPressure).increment(1, bytesUsed(doc1Source));\n- verify(coordinatingIndexingPressure).increment(1, bytesUsed(doc2Source));\n- verify(coordinatingIndexingPressure).increment(1, bytesUsed(doc3Source));\n- verify(coordinatingIndexingPressure).increment(1, bytesUsed(doc4Source));\n- verify(coordinatingIndexingPressure).increment(1, bytesUsed(doc0UpdateSource));\n+ verify(coordinatingIndexingPressure, times(6)).increment(eq(1), longThat(l -> l > 0));\n if (useLegacyFormat == false) {\n- verify(coordinatingIndexingPressure).increment(1, bytesUsed(doc1UpdateSource));\n+ verify(coordinatingIndexingPressure).increment(1, longThat(l -> l > bytesUsed(doc1UpdateSource)));", + "comment_created_at": "2025-06-12T22:36:34+00:00", + "comment_author": "jimczi", + "comment_body": "thanks, https://github.com/elastic/elasticsearch/pull/129320/commits/c85718d22a7da9f137b3ebc75f5b8699f8aabd3e", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2170079489", + "pr_number": 130041, + "pr_file": "x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/security/xcontent/XContentUtilsTests.java", + "created_at": "2025-06-26T21:47:28+00:00", + "commented_code": "assertThat(json, equalTo(\"{\\\"authorization\\\":{\\\"api_key\\\":{\\\"id\\\":\\\"\" + apiKeyId + \"\\\",\\\"name\\\":\\\"\" + apiKeyName + \"\\\"}}}\"));\n }\n\n public void testAddAuthorizationInfoWithCloudApiKey() throws IOException {\n String apiKeyId = randomAlphaOfLength(20);\n Authentication authentication = AuthenticationTestHelper.randomCloudApiKeyAuthentication(apiKeyId);\n String json = generateJson(Map.of(AuthenticationField.AUTHENTICATION_KEY, authentication.encode()));\n assertThat(json, containsString(\"{\\\"authorization\\\":{\\\"cloud_api_key\\\":{\\\"id\\\":\\\"\" + apiKeyId + \"\\\"\"));", + "repo_full_name": "elastic/elasticsearch", + "discussion_comments": [ + { + "comment_id": "2170079489", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 130041, + "pr_file": "x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/security/xcontent/XContentUtilsTests.java", + "discussion_id": "2170079489", + "commented_code": "@@ -62,6 +63,13 @@ public void testAddAuthorizationInfoWithApiKey() throws IOException {\n assertThat(json, equalTo(\"{\\\"authorization\\\":{\\\"api_key\\\":{\\\"id\\\":\\\"\" + apiKeyId + \"\\\",\\\"name\\\":\\\"\" + apiKeyName + \"\\\"}}}\"));\n }\n \n+ public void testAddAuthorizationInfoWithCloudApiKey() throws IOException {\n+ String apiKeyId = randomAlphaOfLength(20);\n+ Authentication authentication = AuthenticationTestHelper.randomCloudApiKeyAuthentication(apiKeyId);\n+ String json = generateJson(Map.of(AuthenticationField.AUTHENTICATION_KEY, authentication.encode()));\n+ assertThat(json, containsString(\"{\\\"authorization\\\":{\\\"cloud_api_key\\\":{\\\"id\\\":\\\"\" + apiKeyId + \"\\\"\"));", + "comment_created_at": "2025-06-26T21:47:28+00:00", + "comment_author": "slobodanadamovic", + "comment_body": "Test only asserts `id` field. Using `randomCloudApiKeyAuthentication(apiKeyId)` may not be quite suitable here. Probably it's better to randomize `User` then call `randomCloudApiKeyAuthentication(user)`. This way you can assert all fields as you control user's principal (API key `id`), metadata (`name` and `internal`) and `roles`.", + "pr_file_module": null + }, + { + "comment_id": "2172474348", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 130041, + "pr_file": "x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/security/xcontent/XContentUtilsTests.java", + "discussion_id": "2170079489", + "commented_code": "@@ -62,6 +63,13 @@ public void testAddAuthorizationInfoWithApiKey() throws IOException {\n assertThat(json, equalTo(\"{\\\"authorization\\\":{\\\"api_key\\\":{\\\"id\\\":\\\"\" + apiKeyId + \"\\\",\\\"name\\\":\\\"\" + apiKeyName + \"\\\"}}}\"));\n }\n \n+ public void testAddAuthorizationInfoWithCloudApiKey() throws IOException {\n+ String apiKeyId = randomAlphaOfLength(20);\n+ Authentication authentication = AuthenticationTestHelper.randomCloudApiKeyAuthentication(apiKeyId);\n+ String json = generateJson(Map.of(AuthenticationField.AUTHENTICATION_KEY, authentication.encode()));\n+ assertThat(json, containsString(\"{\\\"authorization\\\":{\\\"cloud_api_key\\\":{\\\"id\\\":\\\"\" + apiKeyId + \"\\\"\"));", + "comment_created_at": "2025-06-27T17:00:11+00:00", + "comment_author": "ankit--sethi", + "comment_body": "refactored the test and some helper methods to assert on a fully randomized user", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2150623161", + "pr_number": 129146, + "pr_file": "x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/request/IbmWatsonxChatCompletionRequestTests.java", + "created_at": "2025-06-16T18:37:42+00:00", + "commented_code": "/*\n * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one\n * or more contributor license agreements. Licensed under the Elastic License\n * 2.0; you may not use this file except in compliance with the Elastic License\n * 2.0.\n */\n\npackage org.elasticsearch.xpack.inference.services.ibmwatsonx.request;\n\nimport org.apache.http.HttpHeaders;\nimport org.apache.http.client.methods.HttpPost;\nimport org.elasticsearch.core.Nullable;\nimport org.elasticsearch.test.ESTestCase;\nimport org.elasticsearch.xpack.inference.external.http.sender.UnifiedChatInput;\nimport org.elasticsearch.xpack.inference.services.ibmwatsonx.completion.IbmWatsonxChatCompletionModel;\nimport org.elasticsearch.xpack.inference.services.ibmwatsonx.completion.IbmWatsonxChatCompletionModelTests;\n\nimport java.io.IOException;\nimport java.net.URI;\nimport java.net.URISyntaxException;\nimport java.util.List;\nimport java.util.Map;\n\nimport static org.elasticsearch.xpack.inference.external.http.Utils.entityAsMap;\nimport static org.hamcrest.Matchers.aMapWithSize;\nimport static org.hamcrest.Matchers.instanceOf;\nimport static org.hamcrest.Matchers.is;\n\npublic class IbmWatsonxChatCompletionRequestTests extends ESTestCase {\n private static final String AUTH_HEADER_VALUE = \"foo\";\n private static final String API_COMPLETIONS_PATH = \"https://abc.com/ml/v1/text/chat?version=apiVersion\";\n\n public void testCreateRequest_WithStreaming() throws IOException, URISyntaxException {\n var request = createRequest(\"secret\", randomAlphaOfLength(15), \"model\", true);", + "repo_full_name": "elastic/elasticsearch", + "discussion_comments": [ + { + "comment_id": "2150623161", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 129146, + "pr_file": "x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/request/IbmWatsonxChatCompletionRequestTests.java", + "discussion_id": "2150623161", + "commented_code": "@@ -0,0 +1,96 @@\n+/*\n+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one\n+ * or more contributor license agreements. Licensed under the Elastic License\n+ * 2.0; you may not use this file except in compliance with the Elastic License\n+ * 2.0.\n+ */\n+\n+package org.elasticsearch.xpack.inference.services.ibmwatsonx.request;\n+\n+import org.apache.http.HttpHeaders;\n+import org.apache.http.client.methods.HttpPost;\n+import org.elasticsearch.core.Nullable;\n+import org.elasticsearch.test.ESTestCase;\n+import org.elasticsearch.xpack.inference.external.http.sender.UnifiedChatInput;\n+import org.elasticsearch.xpack.inference.services.ibmwatsonx.completion.IbmWatsonxChatCompletionModel;\n+import org.elasticsearch.xpack.inference.services.ibmwatsonx.completion.IbmWatsonxChatCompletionModelTests;\n+\n+import java.io.IOException;\n+import java.net.URI;\n+import java.net.URISyntaxException;\n+import java.util.List;\n+import java.util.Map;\n+\n+import static org.elasticsearch.xpack.inference.external.http.Utils.entityAsMap;\n+import static org.hamcrest.Matchers.aMapWithSize;\n+import static org.hamcrest.Matchers.instanceOf;\n+import static org.hamcrest.Matchers.is;\n+\n+public class IbmWatsonxChatCompletionRequestTests extends ESTestCase {\n+ private static final String AUTH_HEADER_VALUE = \"foo\";\n+ private static final String API_COMPLETIONS_PATH = \"https://abc.com/ml/v1/text/chat?version=apiVersion\";\n+\n+ public void testCreateRequest_WithStreaming() throws IOException, URISyntaxException {\n+ var request = createRequest(\"secret\", randomAlphaOfLength(15), \"model\", true);", + "comment_created_at": "2025-06-16T18:37:42+00:00", + "comment_author": "dan-rubinstein", + "comment_body": "Can we use randomized strings when possible? (ex. \"secret\", \"model\", etc)", + "pr_file_module": null + }, + { + "comment_id": "2156405897", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 129146, + "pr_file": "x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/request/IbmWatsonxChatCompletionRequestTests.java", + "discussion_id": "2150623161", + "commented_code": "@@ -0,0 +1,96 @@\n+/*\n+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one\n+ * or more contributor license agreements. Licensed under the Elastic License\n+ * 2.0; you may not use this file except in compliance with the Elastic License\n+ * 2.0.\n+ */\n+\n+package org.elasticsearch.xpack.inference.services.ibmwatsonx.request;\n+\n+import org.apache.http.HttpHeaders;\n+import org.apache.http.client.methods.HttpPost;\n+import org.elasticsearch.core.Nullable;\n+import org.elasticsearch.test.ESTestCase;\n+import org.elasticsearch.xpack.inference.external.http.sender.UnifiedChatInput;\n+import org.elasticsearch.xpack.inference.services.ibmwatsonx.completion.IbmWatsonxChatCompletionModel;\n+import org.elasticsearch.xpack.inference.services.ibmwatsonx.completion.IbmWatsonxChatCompletionModelTests;\n+\n+import java.io.IOException;\n+import java.net.URI;\n+import java.net.URISyntaxException;\n+import java.util.List;\n+import java.util.Map;\n+\n+import static org.elasticsearch.xpack.inference.external.http.Utils.entityAsMap;\n+import static org.hamcrest.Matchers.aMapWithSize;\n+import static org.hamcrest.Matchers.instanceOf;\n+import static org.hamcrest.Matchers.is;\n+\n+public class IbmWatsonxChatCompletionRequestTests extends ESTestCase {\n+ private static final String AUTH_HEADER_VALUE = \"foo\";\n+ private static final String API_COMPLETIONS_PATH = \"https://abc.com/ml/v1/text/chat?version=apiVersion\";\n+\n+ public void testCreateRequest_WithStreaming() throws IOException, URISyntaxException {\n+ var request = createRequest(\"secret\", randomAlphaOfLength(15), \"model\", true);", + "comment_created_at": "2025-06-19T08:06:18+00:00", + "comment_author": "Evgenii-Kazannik", + "comment_body": "I used randomAlphaOfLength in order to generate other values. Thank you", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2169402115", + "pr_number": 130019, + "pr_file": "x-pack/plugin/esql/qa/server/multi-clusters/src/javaRestTest/java/org/elasticsearch/xpack/esql/ccq/MultiClustersIT.java", + "created_at": "2025-06-26T15:59:32+00:00", + "commented_code": "assertThat(remoteClusterShards.keySet(), equalTo(Set.of(\"total\", \"successful\", \"skipped\", \"failed\")));\n assertThat((Integer) remoteClusterShards.get(\"total\"), greaterThanOrEqualTo(0));\n assertThat((Integer) remoteClusterShards.get(\"successful\"), equalTo((Integer) remoteClusterShards.get(\"total\")));\n assertThat((Integer) remoteClusterShards.get(\"skipped\"), equalTo(0));\n // assertThat((Integer) remoteClusterShards.get(\"skipped\"), equalTo(0));", + "repo_full_name": "elastic/elasticsearch", + "discussion_comments": [ + { + "comment_id": "2169402115", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 130019, + "pr_file": "x-pack/plugin/esql/qa/server/multi-clusters/src/javaRestTest/java/org/elasticsearch/xpack/esql/ccq/MultiClustersIT.java", + "discussion_id": "2169402115", + "commented_code": "@@ -254,7 +255,7 @@ private void assertClusterDetailsMap(Map result, boolean remoteO\n assertThat(remoteClusterShards.keySet(), equalTo(Set.of(\"total\", \"successful\", \"skipped\", \"failed\")));\n assertThat((Integer) remoteClusterShards.get(\"total\"), greaterThanOrEqualTo(0));\n assertThat((Integer) remoteClusterShards.get(\"successful\"), equalTo((Integer) remoteClusterShards.get(\"total\")));\n- assertThat((Integer) remoteClusterShards.get(\"skipped\"), equalTo(0));\n+ // assertThat((Integer) remoteClusterShards.get(\"skipped\"), equalTo(0));", + "comment_created_at": "2025-06-26T15:59:32+00:00", + "comment_author": "julian-elastic", + "comment_body": "Do you agree this check should be removed? The new test cases we added have both local and remote to start with and later we filter local or remote", + "pr_file_module": null + }, + { + "comment_id": "2175130811", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 130019, + "pr_file": "x-pack/plugin/esql/qa/server/multi-clusters/src/javaRestTest/java/org/elasticsearch/xpack/esql/ccq/MultiClustersIT.java", + "discussion_id": "2169402115", + "commented_code": "@@ -254,7 +255,7 @@ private void assertClusterDetailsMap(Map result, boolean remoteO\n assertThat(remoteClusterShards.keySet(), equalTo(Set.of(\"total\", \"successful\", \"skipped\", \"failed\")));\n assertThat((Integer) remoteClusterShards.get(\"total\"), greaterThanOrEqualTo(0));\n assertThat((Integer) remoteClusterShards.get(\"successful\"), equalTo((Integer) remoteClusterShards.get(\"total\")));\n- assertThat((Integer) remoteClusterShards.get(\"skipped\"), equalTo(0));\n+ // assertThat((Integer) remoteClusterShards.get(\"skipped\"), equalTo(0));", + "comment_created_at": "2025-06-30T13:49:29+00:00", + "comment_author": "nik9000", + "comment_body": "I changed it to `greaterThanOrEqualTo(0)` - it's not assertion a whole lot and that's ok with me.", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/elasticsearch-robust-test-assertions.md b/_reviewers/elasticsearch-robust-test-assertions.md index 3ab2643..873900a 100644 --- a/_reviewers/elasticsearch-robust-test-assertions.md +++ b/_reviewers/elasticsearch-robust-test-assertions.md @@ -62,187 +62,3 @@ Use precise, informative assertions in tests to provide clear feedback when test ``` 6. **Use appropriate matchers**: For values that may vary, use flexible assertions like `greaterThanOrEqualTo()` instead of strict equality. - - -[ - { - "discussion_id": "2156561631", - "pr_number": 129633, - "pr_file": "x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/aggregation/GroupingAggregatorFunctionTestCase.java", - "created_at": "2025-06-19T09:26:32+00:00", - "commented_code": "delegate.selectedMayContainUnseenGroups(seenGroupIds);\n }\n\n @Override\n public void addIntermediateInput(int positionOffset, IntArrayBlock groupIds, Page page) {\n addIntermediateInputInternal(positionOffset, groupIds, page);\n }\n\n @Override\n public void addIntermediateInput(int positionOffset, IntBigArrayBlock groupIds, Page page) {\n addIntermediateInputInternal(positionOffset, groupIds, page);\n }\n\n @Override\n public void addIntermediateInput(int positionOffset, IntVector groupIds, Page page) {\n addIntermediateInputInternal(positionOffset, groupIds.asBlock(), page);\n }\n\n public void addIntermediateInputInternal(int positionOffset, IntBlock groupIds, Page page) {\n BlockFactory blockFactory = TestBlockFactory.getNonBreakingInstance();\n int[] chunk = new int[emitChunkSize];\n for (int offset = 0; offset < groupIds.getPositionCount(); offset += emitChunkSize) {\n int count = 0;\n for (int i = offset; i < Math.min(groupIds.getPositionCount(), offset + emitChunkSize); i++) {\n chunk[count++] = groupIds.getInt(i);\n int chunkPosition = 0;\n int offset = 0;\n for (int position = 0; position < groupIds.getPositionCount(); position++) {\n if (groupIds.isNull(position)) {\n continue;\n }\n BlockFactory blockFactory = TestBlockFactory.getNonBreakingInstance(); // TODO: just for compile\n delegate.addIntermediateInput(positionOffset + offset, blockFactory.newIntArrayVector(chunk, count), page);\n int firstValueIndex = groupIds.getFirstValueIndex(position);\n int valueCount = groupIds.getValueCount(position);\n assert valueCount == 1; // Multi-values make chunking more complex, and it's not a real case yet", - "repo_full_name": "elastic/elasticsearch", - "discussion_comments": [ - { - "comment_id": "2156561631", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 129633, - "pr_file": "x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/aggregation/GroupingAggregatorFunctionTestCase.java", - "discussion_id": "2156561631", - "commented_code": "@@ -739,16 +739,52 @@ public void selectedMayContainUnseenGroups(SeenGroupIds seenGroupIds) {\n delegate.selectedMayContainUnseenGroups(seenGroupIds);\n }\n \n+ @Override\n+ public void addIntermediateInput(int positionOffset, IntArrayBlock groupIds, Page page) {\n+ addIntermediateInputInternal(positionOffset, groupIds, page);\n+ }\n+\n+ @Override\n+ public void addIntermediateInput(int positionOffset, IntBigArrayBlock groupIds, Page page) {\n+ addIntermediateInputInternal(positionOffset, groupIds, page);\n+ }\n+\n @Override\n public void addIntermediateInput(int positionOffset, IntVector groupIds, Page page) {\n+ addIntermediateInputInternal(positionOffset, groupIds.asBlock(), page);\n+ }\n+\n+ public void addIntermediateInputInternal(int positionOffset, IntBlock groupIds, Page page) {\n+ BlockFactory blockFactory = TestBlockFactory.getNonBreakingInstance();\n int[] chunk = new int[emitChunkSize];\n- for (int offset = 0; offset < groupIds.getPositionCount(); offset += emitChunkSize) {\n- int count = 0;\n- for (int i = offset; i < Math.min(groupIds.getPositionCount(), offset + emitChunkSize); i++) {\n- chunk[count++] = groupIds.getInt(i);\n+ int chunkPosition = 0;\n+ int offset = 0;\n+ for (int position = 0; position < groupIds.getPositionCount(); position++) {\n+ if (groupIds.isNull(position)) {\n+ continue;\n }\n- BlockFactory blockFactory = TestBlockFactory.getNonBreakingInstance(); // TODO: just for compile\n- delegate.addIntermediateInput(positionOffset + offset, blockFactory.newIntArrayVector(chunk, count), page);\n+ int firstValueIndex = groupIds.getFirstValueIndex(position);\n+ int valueCount = groupIds.getValueCount(position);\n+ assert valueCount == 1; // Multi-values make chunking more complex, and it's not a real case yet", - "comment_created_at": "2025-06-19T09:26:32+00:00", - "comment_author": "Copilot", - "comment_body": "[nitpick] Consider replacing the plain 'assert' statement with a proper JUnit assertion (e.g., assertEquals(1, valueCount)) to provide a clearer error message if this assumption fails.\n```suggestion\n assertEquals(1, valueCount, \"Multi-values make chunking more complex, and it's not a real case yet\");\n```", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "967030927", - "pr_number": 89112, - "pr_file": "server/src/test/java/org/elasticsearch/index/analysis/AnalysisTests.java", - "created_at": "2022-09-09T12:46:36+00:00", - "commented_code": "List wordList = Analysis.getWordList(env, nodeSettings, \"foo.bar\");\n assertEquals(Arrays.asList(\"hello\", \"world\"), wordList);\n }\n\n public void testCustomAnalyzerWithNotSupportKey() {\n Settings analyzerSettings = Settings.builder().put(\"tokenizer\", \"standard\").put(\"foo\", \"bar\").put(\"type\", \"custom\").build();\n\n try {\n createComponents(\"my_analyzer\", analyzerSettings, testAnalysis.tokenizer, testAnalysis.charFilter, testAnalysis.tokenFilter);\n fail(\"expected failure\");\n } catch (IllegalArgumentException e) {", - "repo_full_name": "elastic/elasticsearch", - "discussion_comments": [ - { - "comment_id": "967030927", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 89112, - "pr_file": "server/src/test/java/org/elasticsearch/index/analysis/AnalysisTests.java", - "discussion_id": "967030927", - "commented_code": "@@ -103,4 +116,15 @@ public void testParseWordList() throws IOException {\n List wordList = Analysis.getWordList(env, nodeSettings, \"foo.bar\");\n assertEquals(Arrays.asList(\"hello\", \"world\"), wordList);\n }\n+\n+ public void testCustomAnalyzerWithNotSupportKey() {\n+ Settings analyzerSettings = Settings.builder().put(\"tokenizer\", \"standard\").put(\"foo\", \"bar\").put(\"type\", \"custom\").build();\n+\n+ try {\n+ createComponents(\"my_analyzer\", analyzerSettings, testAnalysis.tokenizer, testAnalysis.charFilter, testAnalysis.tokenFilter);\n+ fail(\"expected failure\");\n+ } catch (IllegalArgumentException e) {", - "comment_created_at": "2022-09-09T12:46:36+00:00", - "comment_author": "javanna", - "comment_body": "you can use expectThrows instead .", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2142767556", - "pr_number": 129320, - "pr_file": "x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/action/filter/ShardBulkInferenceActionFilterTests.java", - "created_at": "2025-06-12T13:36:52+00:00", - "commented_code": "IndexingPressure.Coordinating coordinatingIndexingPressure = indexingPressure.getCoordinating();\n assertThat(coordinatingIndexingPressure, notNullValue());\n verify(coordinatingIndexingPressure).increment(1, bytesUsed(doc0Source));\n verify(coordinatingIndexingPressure).increment(1, bytesUsed(doc1Source));\n verify(coordinatingIndexingPressure).increment(1, bytesUsed(doc2Source));\n verify(coordinatingIndexingPressure).increment(1, bytesUsed(doc3Source));\n verify(coordinatingIndexingPressure).increment(1, bytesUsed(doc4Source));\n verify(coordinatingIndexingPressure).increment(1, bytesUsed(doc0UpdateSource));\n verify(coordinatingIndexingPressure, times(6)).increment(eq(1), longThat(l -> l > 0));\n if (useLegacyFormat == false) {\n verify(coordinatingIndexingPressure).increment(1, bytesUsed(doc1UpdateSource));\n verify(coordinatingIndexingPressure).increment(1, longThat(l -> l > bytesUsed(doc1UpdateSource)));", - "repo_full_name": "elastic/elasticsearch", - "discussion_comments": [ - { - "comment_id": "2142767556", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 129320, - "pr_file": "x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/action/filter/ShardBulkInferenceActionFilterTests.java", - "discussion_id": "2142767556", - "commented_code": "@@ -616,20 +616,13 @@ public void testIndexingPressure() throws Exception {\n \n IndexingPressure.Coordinating coordinatingIndexingPressure = indexingPressure.getCoordinating();\n assertThat(coordinatingIndexingPressure, notNullValue());\n- verify(coordinatingIndexingPressure).increment(1, bytesUsed(doc0Source));\n- verify(coordinatingIndexingPressure).increment(1, bytesUsed(doc1Source));\n- verify(coordinatingIndexingPressure).increment(1, bytesUsed(doc2Source));\n- verify(coordinatingIndexingPressure).increment(1, bytesUsed(doc3Source));\n- verify(coordinatingIndexingPressure).increment(1, bytesUsed(doc4Source));\n- verify(coordinatingIndexingPressure).increment(1, bytesUsed(doc0UpdateSource));\n+ verify(coordinatingIndexingPressure, times(6)).increment(eq(1), longThat(l -> l > 0));\n if (useLegacyFormat == false) {\n- verify(coordinatingIndexingPressure).increment(1, bytesUsed(doc1UpdateSource));\n+ verify(coordinatingIndexingPressure).increment(1, longThat(l -> l > bytesUsed(doc1UpdateSource)));", - "comment_created_at": "2025-06-12T13:36:52+00:00", - "comment_author": "Mikep86", - "comment_body": "If we're not checking for calls to `increment` with exact counts of bytes allocated, we can simplify this to:\r\n\r\n```\r\nverify(coordinatingIndexingPressure, times(useLegacyFormat ? 6 : 7)).increment(eq(1), longThat(l -> l > 0));\r\n```\r\n\r\nThis also makes the test more robust, as the fact that we allocate more bytes than `doc1UpdateSource` size is only because `_inference_fields`, with all of its internal data structures, happens to be much larger than the small original test source. This is not of consequence to the behavior we wish to verify.", - "pr_file_module": null - }, - { - "comment_id": "2143764343", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 129320, - "pr_file": "x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/action/filter/ShardBulkInferenceActionFilterTests.java", - "discussion_id": "2142767556", - "commented_code": "@@ -616,20 +616,13 @@ public void testIndexingPressure() throws Exception {\n \n IndexingPressure.Coordinating coordinatingIndexingPressure = indexingPressure.getCoordinating();\n assertThat(coordinatingIndexingPressure, notNullValue());\n- verify(coordinatingIndexingPressure).increment(1, bytesUsed(doc0Source));\n- verify(coordinatingIndexingPressure).increment(1, bytesUsed(doc1Source));\n- verify(coordinatingIndexingPressure).increment(1, bytesUsed(doc2Source));\n- verify(coordinatingIndexingPressure).increment(1, bytesUsed(doc3Source));\n- verify(coordinatingIndexingPressure).increment(1, bytesUsed(doc4Source));\n- verify(coordinatingIndexingPressure).increment(1, bytesUsed(doc0UpdateSource));\n+ verify(coordinatingIndexingPressure, times(6)).increment(eq(1), longThat(l -> l > 0));\n if (useLegacyFormat == false) {\n- verify(coordinatingIndexingPressure).increment(1, bytesUsed(doc1UpdateSource));\n+ verify(coordinatingIndexingPressure).increment(1, longThat(l -> l > bytesUsed(doc1UpdateSource)));", - "comment_created_at": "2025-06-12T22:36:34+00:00", - "comment_author": "jimczi", - "comment_body": "thanks, https://github.com/elastic/elasticsearch/pull/129320/commits/c85718d22a7da9f137b3ebc75f5b8699f8aabd3e", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2170079489", - "pr_number": 130041, - "pr_file": "x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/security/xcontent/XContentUtilsTests.java", - "created_at": "2025-06-26T21:47:28+00:00", - "commented_code": "assertThat(json, equalTo(\"{\\\"authorization\\\":{\\\"api_key\\\":{\\\"id\\\":\\\"\" + apiKeyId + \"\\\",\\\"name\\\":\\\"\" + apiKeyName + \"\\\"}}}\"));\n }\n\n public void testAddAuthorizationInfoWithCloudApiKey() throws IOException {\n String apiKeyId = randomAlphaOfLength(20);\n Authentication authentication = AuthenticationTestHelper.randomCloudApiKeyAuthentication(apiKeyId);\n String json = generateJson(Map.of(AuthenticationField.AUTHENTICATION_KEY, authentication.encode()));\n assertThat(json, containsString(\"{\\\"authorization\\\":{\\\"cloud_api_key\\\":{\\\"id\\\":\\\"\" + apiKeyId + \"\\\"\"));", - "repo_full_name": "elastic/elasticsearch", - "discussion_comments": [ - { - "comment_id": "2170079489", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 130041, - "pr_file": "x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/security/xcontent/XContentUtilsTests.java", - "discussion_id": "2170079489", - "commented_code": "@@ -62,6 +63,13 @@ public void testAddAuthorizationInfoWithApiKey() throws IOException {\n assertThat(json, equalTo(\"{\\\"authorization\\\":{\\\"api_key\\\":{\\\"id\\\":\\\"\" + apiKeyId + \"\\\",\\\"name\\\":\\\"\" + apiKeyName + \"\\\"}}}\"));\n }\n \n+ public void testAddAuthorizationInfoWithCloudApiKey() throws IOException {\n+ String apiKeyId = randomAlphaOfLength(20);\n+ Authentication authentication = AuthenticationTestHelper.randomCloudApiKeyAuthentication(apiKeyId);\n+ String json = generateJson(Map.of(AuthenticationField.AUTHENTICATION_KEY, authentication.encode()));\n+ assertThat(json, containsString(\"{\\\"authorization\\\":{\\\"cloud_api_key\\\":{\\\"id\\\":\\\"\" + apiKeyId + \"\\\"\"));", - "comment_created_at": "2025-06-26T21:47:28+00:00", - "comment_author": "slobodanadamovic", - "comment_body": "Test only asserts `id` field. Using `randomCloudApiKeyAuthentication(apiKeyId)` may not be quite suitable here. Probably it's better to randomize `User` then call `randomCloudApiKeyAuthentication(user)`. This way you can assert all fields as you control user's principal (API key `id`), metadata (`name` and `internal`) and `roles`.", - "pr_file_module": null - }, - { - "comment_id": "2172474348", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 130041, - "pr_file": "x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/security/xcontent/XContentUtilsTests.java", - "discussion_id": "2170079489", - "commented_code": "@@ -62,6 +63,13 @@ public void testAddAuthorizationInfoWithApiKey() throws IOException {\n assertThat(json, equalTo(\"{\\\"authorization\\\":{\\\"api_key\\\":{\\\"id\\\":\\\"\" + apiKeyId + \"\\\",\\\"name\\\":\\\"\" + apiKeyName + \"\\\"}}}\"));\n }\n \n+ public void testAddAuthorizationInfoWithCloudApiKey() throws IOException {\n+ String apiKeyId = randomAlphaOfLength(20);\n+ Authentication authentication = AuthenticationTestHelper.randomCloudApiKeyAuthentication(apiKeyId);\n+ String json = generateJson(Map.of(AuthenticationField.AUTHENTICATION_KEY, authentication.encode()));\n+ assertThat(json, containsString(\"{\\\"authorization\\\":{\\\"cloud_api_key\\\":{\\\"id\\\":\\\"\" + apiKeyId + \"\\\"\"));", - "comment_created_at": "2025-06-27T17:00:11+00:00", - "comment_author": "ankit--sethi", - "comment_body": "refactored the test and some helper methods to assert on a fully randomized user", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2150623161", - "pr_number": 129146, - "pr_file": "x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/request/IbmWatsonxChatCompletionRequestTests.java", - "created_at": "2025-06-16T18:37:42+00:00", - "commented_code": "/*\n * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one\n * or more contributor license agreements. Licensed under the Elastic License\n * 2.0; you may not use this file except in compliance with the Elastic License\n * 2.0.\n */\n\npackage org.elasticsearch.xpack.inference.services.ibmwatsonx.request;\n\nimport org.apache.http.HttpHeaders;\nimport org.apache.http.client.methods.HttpPost;\nimport org.elasticsearch.core.Nullable;\nimport org.elasticsearch.test.ESTestCase;\nimport org.elasticsearch.xpack.inference.external.http.sender.UnifiedChatInput;\nimport org.elasticsearch.xpack.inference.services.ibmwatsonx.completion.IbmWatsonxChatCompletionModel;\nimport org.elasticsearch.xpack.inference.services.ibmwatsonx.completion.IbmWatsonxChatCompletionModelTests;\n\nimport java.io.IOException;\nimport java.net.URI;\nimport java.net.URISyntaxException;\nimport java.util.List;\nimport java.util.Map;\n\nimport static org.elasticsearch.xpack.inference.external.http.Utils.entityAsMap;\nimport static org.hamcrest.Matchers.aMapWithSize;\nimport static org.hamcrest.Matchers.instanceOf;\nimport static org.hamcrest.Matchers.is;\n\npublic class IbmWatsonxChatCompletionRequestTests extends ESTestCase {\n private static final String AUTH_HEADER_VALUE = \"foo\";\n private static final String API_COMPLETIONS_PATH = \"https://abc.com/ml/v1/text/chat?version=apiVersion\";\n\n public void testCreateRequest_WithStreaming() throws IOException, URISyntaxException {\n var request = createRequest(\"secret\", randomAlphaOfLength(15), \"model\", true);", - "repo_full_name": "elastic/elasticsearch", - "discussion_comments": [ - { - "comment_id": "2150623161", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 129146, - "pr_file": "x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/request/IbmWatsonxChatCompletionRequestTests.java", - "discussion_id": "2150623161", - "commented_code": "@@ -0,0 +1,96 @@\n+/*\n+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one\n+ * or more contributor license agreements. Licensed under the Elastic License\n+ * 2.0; you may not use this file except in compliance with the Elastic License\n+ * 2.0.\n+ */\n+\n+package org.elasticsearch.xpack.inference.services.ibmwatsonx.request;\n+\n+import org.apache.http.HttpHeaders;\n+import org.apache.http.client.methods.HttpPost;\n+import org.elasticsearch.core.Nullable;\n+import org.elasticsearch.test.ESTestCase;\n+import org.elasticsearch.xpack.inference.external.http.sender.UnifiedChatInput;\n+import org.elasticsearch.xpack.inference.services.ibmwatsonx.completion.IbmWatsonxChatCompletionModel;\n+import org.elasticsearch.xpack.inference.services.ibmwatsonx.completion.IbmWatsonxChatCompletionModelTests;\n+\n+import java.io.IOException;\n+import java.net.URI;\n+import java.net.URISyntaxException;\n+import java.util.List;\n+import java.util.Map;\n+\n+import static org.elasticsearch.xpack.inference.external.http.Utils.entityAsMap;\n+import static org.hamcrest.Matchers.aMapWithSize;\n+import static org.hamcrest.Matchers.instanceOf;\n+import static org.hamcrest.Matchers.is;\n+\n+public class IbmWatsonxChatCompletionRequestTests extends ESTestCase {\n+ private static final String AUTH_HEADER_VALUE = \"foo\";\n+ private static final String API_COMPLETIONS_PATH = \"https://abc.com/ml/v1/text/chat?version=apiVersion\";\n+\n+ public void testCreateRequest_WithStreaming() throws IOException, URISyntaxException {\n+ var request = createRequest(\"secret\", randomAlphaOfLength(15), \"model\", true);", - "comment_created_at": "2025-06-16T18:37:42+00:00", - "comment_author": "dan-rubinstein", - "comment_body": "Can we use randomized strings when possible? (ex. \"secret\", \"model\", etc)", - "pr_file_module": null - }, - { - "comment_id": "2156405897", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 129146, - "pr_file": "x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/request/IbmWatsonxChatCompletionRequestTests.java", - "discussion_id": "2150623161", - "commented_code": "@@ -0,0 +1,96 @@\n+/*\n+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one\n+ * or more contributor license agreements. Licensed under the Elastic License\n+ * 2.0; you may not use this file except in compliance with the Elastic License\n+ * 2.0.\n+ */\n+\n+package org.elasticsearch.xpack.inference.services.ibmwatsonx.request;\n+\n+import org.apache.http.HttpHeaders;\n+import org.apache.http.client.methods.HttpPost;\n+import org.elasticsearch.core.Nullable;\n+import org.elasticsearch.test.ESTestCase;\n+import org.elasticsearch.xpack.inference.external.http.sender.UnifiedChatInput;\n+import org.elasticsearch.xpack.inference.services.ibmwatsonx.completion.IbmWatsonxChatCompletionModel;\n+import org.elasticsearch.xpack.inference.services.ibmwatsonx.completion.IbmWatsonxChatCompletionModelTests;\n+\n+import java.io.IOException;\n+import java.net.URI;\n+import java.net.URISyntaxException;\n+import java.util.List;\n+import java.util.Map;\n+\n+import static org.elasticsearch.xpack.inference.external.http.Utils.entityAsMap;\n+import static org.hamcrest.Matchers.aMapWithSize;\n+import static org.hamcrest.Matchers.instanceOf;\n+import static org.hamcrest.Matchers.is;\n+\n+public class IbmWatsonxChatCompletionRequestTests extends ESTestCase {\n+ private static final String AUTH_HEADER_VALUE = \"foo\";\n+ private static final String API_COMPLETIONS_PATH = \"https://abc.com/ml/v1/text/chat?version=apiVersion\";\n+\n+ public void testCreateRequest_WithStreaming() throws IOException, URISyntaxException {\n+ var request = createRequest(\"secret\", randomAlphaOfLength(15), \"model\", true);", - "comment_created_at": "2025-06-19T08:06:18+00:00", - "comment_author": "Evgenii-Kazannik", - "comment_body": "I used randomAlphaOfLength in order to generate other values. Thank you", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2169402115", - "pr_number": 130019, - "pr_file": "x-pack/plugin/esql/qa/server/multi-clusters/src/javaRestTest/java/org/elasticsearch/xpack/esql/ccq/MultiClustersIT.java", - "created_at": "2025-06-26T15:59:32+00:00", - "commented_code": "assertThat(remoteClusterShards.keySet(), equalTo(Set.of(\"total\", \"successful\", \"skipped\", \"failed\")));\n assertThat((Integer) remoteClusterShards.get(\"total\"), greaterThanOrEqualTo(0));\n assertThat((Integer) remoteClusterShards.get(\"successful\"), equalTo((Integer) remoteClusterShards.get(\"total\")));\n assertThat((Integer) remoteClusterShards.get(\"skipped\"), equalTo(0));\n // assertThat((Integer) remoteClusterShards.get(\"skipped\"), equalTo(0));", - "repo_full_name": "elastic/elasticsearch", - "discussion_comments": [ - { - "comment_id": "2169402115", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 130019, - "pr_file": "x-pack/plugin/esql/qa/server/multi-clusters/src/javaRestTest/java/org/elasticsearch/xpack/esql/ccq/MultiClustersIT.java", - "discussion_id": "2169402115", - "commented_code": "@@ -254,7 +255,7 @@ private void assertClusterDetailsMap(Map result, boolean remoteO\n assertThat(remoteClusterShards.keySet(), equalTo(Set.of(\"total\", \"successful\", \"skipped\", \"failed\")));\n assertThat((Integer) remoteClusterShards.get(\"total\"), greaterThanOrEqualTo(0));\n assertThat((Integer) remoteClusterShards.get(\"successful\"), equalTo((Integer) remoteClusterShards.get(\"total\")));\n- assertThat((Integer) remoteClusterShards.get(\"skipped\"), equalTo(0));\n+ // assertThat((Integer) remoteClusterShards.get(\"skipped\"), equalTo(0));", - "comment_created_at": "2025-06-26T15:59:32+00:00", - "comment_author": "julian-elastic", - "comment_body": "Do you agree this check should be removed? The new test cases we added have both local and remote to start with and later we filter local or remote", - "pr_file_module": null - }, - { - "comment_id": "2175130811", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 130019, - "pr_file": "x-pack/plugin/esql/qa/server/multi-clusters/src/javaRestTest/java/org/elasticsearch/xpack/esql/ccq/MultiClustersIT.java", - "discussion_id": "2169402115", - "commented_code": "@@ -254,7 +255,7 @@ private void assertClusterDetailsMap(Map result, boolean remoteO\n assertThat(remoteClusterShards.keySet(), equalTo(Set.of(\"total\", \"successful\", \"skipped\", \"failed\")));\n assertThat((Integer) remoteClusterShards.get(\"total\"), greaterThanOrEqualTo(0));\n assertThat((Integer) remoteClusterShards.get(\"successful\"), equalTo((Integer) remoteClusterShards.get(\"total\")));\n- assertThat((Integer) remoteClusterShards.get(\"skipped\"), equalTo(0));\n+ // assertThat((Integer) remoteClusterShards.get(\"skipped\"), equalTo(0));", - "comment_created_at": "2025-06-30T13:49:29+00:00", - "comment_author": "nik9000", - "comment_body": "I changed it to `greaterThanOrEqualTo(0)` - it's not assertion a whole lot and that's ok with me.", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/elasticsearch-scope-and-document-configurations.json b/_reviewers/elasticsearch-scope-and-document-configurations.json new file mode 100644 index 0000000..ab19bcd --- /dev/null +++ b/_reviewers/elasticsearch-scope-and-document-configurations.json @@ -0,0 +1,104 @@ +[ + { + "discussion_id": "2157620496", + "pr_number": 129704, + "pr_file": "docs/reference/elasticsearch/index-settings/serverless.md", + "created_at": "2025-06-19T20:23:19+00:00", + "commented_code": "---\nnavigation_title: Serverless index settings\napplies_to:\n serverless: all\n---\n\n# Index settings available in {{serverless-full}} projects\n\nThis page lists the {{es}} index settings available in {{serverless-full}} projects.", + "repo_full_name": "elastic/elasticsearch", + "discussion_comments": [ + { + "comment_id": "2157620496", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 129704, + "pr_file": "docs/reference/elasticsearch/index-settings/serverless.md", + "discussion_id": "2157620496", + "commented_code": "@@ -0,0 +1,61 @@\n+---\n+navigation_title: Serverless index settings\n+applies_to:\n+ serverless: all\n+---\n+\n+# Index settings available in {{serverless-full}} projects\n+\n+This page lists the {{es}} index settings available in {{serverless-full}} projects.", + "comment_created_at": "2025-06-19T20:23:19+00:00", + "comment_author": "shainaraskas", + "comment_body": "```suggestion\r\n[{{serverless-full}}](docs-content://deploy-manage/deploy/elastic-cloud/serverless.md) manages most index settings for you. This page lists the {{es}} index settings available in {{serverless-short}} projects.\r\n```", + "pr_file_module": null + }, + { + "comment_id": "2159581100", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 129704, + "pr_file": "docs/reference/elasticsearch/index-settings/serverless.md", + "discussion_id": "2157620496", + "commented_code": "@@ -0,0 +1,61 @@\n+---\n+navigation_title: Serverless index settings\n+applies_to:\n+ serverless: all\n+---\n+\n+# Index settings available in {{serverless-full}} projects\n+\n+This page lists the {{es}} index settings available in {{serverless-full}} projects.", + "comment_created_at": "2025-06-20T19:50:31+00:00", + "comment_author": "shainaraskas", + "comment_body": "I think we probably should just align closely with whatever PM approves in https://github.com/elastic/docs-content/pull/1728 so we don't need an extra round of approvals\r\n\r\nmaybe \r\n\r\n```suggestion\r\nIn {{serverless-full}} projects, configuration available to users is [limited](docs-content://deploy-manage/deploy/elastic-cloud/differences-from-other-elasticsearch-offerings.md) to certain index-level settings. These restrictions help ensure the reliability of {{serverless-short}} projects.\r\n\r\nThis page lists the {{es}} index settings available in {{serverless-full}} projects.\r\n```\r\n\r\nor \r\n\r\n```suggestion\r\nIn {{serverless-full}} projects, configuration available to users is [limited](docs-content://deploy-manage/deploy/elastic-cloud/differences-from-other-elasticsearch-offerings.md) to certain index-level settings.\r\n\r\nThis page lists the {{es}} index settings available in {{serverless-full}} projects.\r\n```\r\n\r\n", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2172153221", + "pr_number": 127223, + "pr_file": "docs/reference/elasticsearch/mapping-reference/dense-vector.md", + "created_at": "2025-06-27T14:19:42+00:00", + "commented_code": ": In case a knn query specifies a `rescore_vector` parameter, the query `rescore_vector` parameter will be used instead.\n : See [oversampling and rescoring quantized vectors](docs-content://solutions/search/vector/knn.md#dense-vector-knn-search-rescoring) for details.\n:::::\n\n`early_termination`\n: (Optional, boolean) Apply _patience_ based early termination strategy to knn queries over HNSW graphs (see [paper](https://cs.uwaterloo.ca/~jimmylin/publications/Teofili_Lin_ECIR2025.pdf)). This is expected to produce Only applicable to `hnsw`, `int8_hnsw`, `int4_hnsw` and `bbq_hnsw` index types.", + "repo_full_name": "elastic/elasticsearch", + "discussion_comments": [ + { + "comment_id": "2172153221", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 127223, + "pr_file": "docs/reference/elasticsearch/mapping-reference/dense-vector.md", + "discussion_id": "2172153221", + "commented_code": "@@ -281,6 +281,9 @@ $$$dense-vector-index-options$$$\n : In case a knn query specifies a `rescore_vector` parameter, the query `rescore_vector` parameter will be used instead.\n : See [oversampling and rescoring quantized vectors](docs-content://solutions/search/vector/knn.md#dense-vector-knn-search-rescoring) for details.\n :::::\n+\n+`early_termination`\n+: (Optional, boolean) Apply _patience_ based early termination strategy to knn queries over HNSW graphs (see [paper](https://cs.uwaterloo.ca/~jimmylin/publications/Teofili_Lin_ECIR2025.pdf)). This is expected to produce Only applicable to `hnsw`, `int8_hnsw`, `int4_hnsw` and `bbq_hnsw` index types.", + "comment_created_at": "2025-06-27T14:19:42+00:00", + "comment_author": "benwtrent", + "comment_body": "I think this should be an index wide setting if possible. It can be dynamic there since its query time only, but having it per vector field seems like too much.", + "pr_file_module": null + }, + { + "comment_id": "2172360995", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 127223, + "pr_file": "docs/reference/elasticsearch/mapping-reference/dense-vector.md", + "discussion_id": "2172153221", + "commented_code": "@@ -281,6 +281,9 @@ $$$dense-vector-index-options$$$\n : In case a knn query specifies a `rescore_vector` parameter, the query `rescore_vector` parameter will be used instead.\n : See [oversampling and rescoring quantized vectors](docs-content://solutions/search/vector/knn.md#dense-vector-knn-search-rescoring) for details.\n :::::\n+\n+`early_termination`\n+: (Optional, boolean) Apply _patience_ based early termination strategy to knn queries over HNSW graphs (see [paper](https://cs.uwaterloo.ca/~jimmylin/publications/Teofili_Lin_ECIR2025.pdf)). This is expected to produce Only applicable to `hnsw`, `int8_hnsw`, `int4_hnsw` and `bbq_hnsw` index types.", + "comment_created_at": "2025-06-27T16:11:20+00:00", + "comment_author": "tteofili", + "comment_body": "I can see your point, however I can think of this setting to be either enabled or not either index or field-wise, whereas I don't think users would enable/disable it on a per query basis.", + "pr_file_module": null + }, + { + "comment_id": "2172961695", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 127223, + "pr_file": "docs/reference/elasticsearch/mapping-reference/dense-vector.md", + "discussion_id": "2172153221", + "commented_code": "@@ -281,6 +281,9 @@ $$$dense-vector-index-options$$$\n : In case a knn query specifies a `rescore_vector` parameter, the query `rescore_vector` parameter will be used instead.\n : See [oversampling and rescoring quantized vectors](docs-content://solutions/search/vector/knn.md#dense-vector-knn-search-rescoring) for details.\n :::::\n+\n+`early_termination`\n+: (Optional, boolean) Apply _patience_ based early termination strategy to knn queries over HNSW graphs (see [paper](https://cs.uwaterloo.ca/~jimmylin/publications/Teofili_Lin_ECIR2025.pdf)). This is expected to produce Only applicable to `hnsw`, `int8_hnsw`, `int4_hnsw` and `bbq_hnsw` index types.", + "comment_created_at": "2025-06-27T23:01:24+00:00", + "comment_author": "benwtrent", + "comment_body": "> however I can think of this setting to be either enabled or not either index or field-wise\r\n\r\nI agree, I think it should be done at the index level. I only mention the \"query time\" nature of it to indicate that it can be a dynamic index setting.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2064735069", + "pr_number": 127482, + "pr_file": "docs/reference/elasticsearch/jvm-settings.md", + "created_at": "2025-04-28T21:06:09+00:00", + "commented_code": "::::\n\n\n## Default JVM heap sizes [default-jvm-sizes]\n\nIf heap sizes are not specifically set, {{es}} will calculate JVM heap sizing based on the total amount of system memory, depending on the node's role.\n\n* Master-only node\n * 60% of total system memory, up to a maximum of 31 GB.\n* Machine Learning-only node\n * 40% of the first 16 gigabytes plus 10% of memory above that when total system memory is more than 16 gigabytes, up to a maximum of 31 GB.\n* Data-only node\n * 40% of total system memory when less than 1 GB, with a minimum of 128 MB.\n * 50% of total system memory when more than 1 GB, with a maximum of 31 GB.", + "repo_full_name": "elastic/elasticsearch", + "discussion_comments": [ + { + "comment_id": "2064735069", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 127482, + "pr_file": "docs/reference/elasticsearch/jvm-settings.md", + "discussion_id": "2064735069", + "commented_code": "@@ -128,6 +128,18 @@ If you are running {{es}} as a Windows service, you can change the heap size usi\n ::::\n \n \n+## Default JVM heap sizes [default-jvm-sizes]\n+\n+If heap sizes are not specifically set, {{es}} will calculate JVM heap sizing based on the total amount of system memory, depending on the node's role.\n+\n+* Master-only node\n+ * 60% of total system memory, up to a maximum of 31 GB.\n+* Machine Learning-only node\n+ * 40% of the first 16 gigabytes plus 10% of memory above that when total system memory is more than 16 gigabytes, up to a maximum of 31 GB.\n+* Data-only node\n+ * 40% of total system memory when less than 1 GB, with a minimum of 128 MB.\n+ * 50% of total system memory when more than 1 GB, with a maximum of 31 GB.", + "comment_created_at": "2025-04-28T21:06:09+00:00", + "comment_author": "Copilot", + "comment_body": "The documentation for Data-only nodes distinguishes between memory amounts 'less than 1 GB' and 'more than 1 GB', but it does not specify what happens when the node has exactly 1 GB of memory. Consider adding clarification for this edge case.\n```suggestion\n * 50% of total system memory when 1 GB or more, with a maximum of 31 GB.\n```", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/elasticsearch-scope-and-document-configurations.md b/_reviewers/elasticsearch-scope-and-document-configurations.md index 496c6ac..32e7644 100644 --- a/_reviewers/elasticsearch-scope-and-document-configurations.md +++ b/_reviewers/elasticsearch-scope-and-document-configurations.md @@ -36,109 +36,3 @@ When designing and implementing configuration options, carefully consider two ke ``` For managed or restricted environments, clearly explain which configuration options are available to users and why certain configurations might be limited or handled automatically. - - -[ - { - "discussion_id": "2157620496", - "pr_number": 129704, - "pr_file": "docs/reference/elasticsearch/index-settings/serverless.md", - "created_at": "2025-06-19T20:23:19+00:00", - "commented_code": "---\nnavigation_title: Serverless index settings\napplies_to:\n serverless: all\n---\n\n# Index settings available in {{serverless-full}} projects\n\nThis page lists the {{es}} index settings available in {{serverless-full}} projects.", - "repo_full_name": "elastic/elasticsearch", - "discussion_comments": [ - { - "comment_id": "2157620496", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 129704, - "pr_file": "docs/reference/elasticsearch/index-settings/serverless.md", - "discussion_id": "2157620496", - "commented_code": "@@ -0,0 +1,61 @@\n+---\n+navigation_title: Serverless index settings\n+applies_to:\n+ serverless: all\n+---\n+\n+# Index settings available in {{serverless-full}} projects\n+\n+This page lists the {{es}} index settings available in {{serverless-full}} projects.", - "comment_created_at": "2025-06-19T20:23:19+00:00", - "comment_author": "shainaraskas", - "comment_body": "```suggestion\r\n[{{serverless-full}}](docs-content://deploy-manage/deploy/elastic-cloud/serverless.md) manages most index settings for you. This page lists the {{es}} index settings available in {{serverless-short}} projects.\r\n```", - "pr_file_module": null - }, - { - "comment_id": "2159581100", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 129704, - "pr_file": "docs/reference/elasticsearch/index-settings/serverless.md", - "discussion_id": "2157620496", - "commented_code": "@@ -0,0 +1,61 @@\n+---\n+navigation_title: Serverless index settings\n+applies_to:\n+ serverless: all\n+---\n+\n+# Index settings available in {{serverless-full}} projects\n+\n+This page lists the {{es}} index settings available in {{serverless-full}} projects.", - "comment_created_at": "2025-06-20T19:50:31+00:00", - "comment_author": "shainaraskas", - "comment_body": "I think we probably should just align closely with whatever PM approves in https://github.com/elastic/docs-content/pull/1728 so we don't need an extra round of approvals\r\n\r\nmaybe \r\n\r\n```suggestion\r\nIn {{serverless-full}} projects, configuration available to users is [limited](docs-content://deploy-manage/deploy/elastic-cloud/differences-from-other-elasticsearch-offerings.md) to certain index-level settings. These restrictions help ensure the reliability of {{serverless-short}} projects.\r\n\r\nThis page lists the {{es}} index settings available in {{serverless-full}} projects.\r\n```\r\n\r\nor \r\n\r\n```suggestion\r\nIn {{serverless-full}} projects, configuration available to users is [limited](docs-content://deploy-manage/deploy/elastic-cloud/differences-from-other-elasticsearch-offerings.md) to certain index-level settings.\r\n\r\nThis page lists the {{es}} index settings available in {{serverless-full}} projects.\r\n```\r\n\r\n", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2172153221", - "pr_number": 127223, - "pr_file": "docs/reference/elasticsearch/mapping-reference/dense-vector.md", - "created_at": "2025-06-27T14:19:42+00:00", - "commented_code": ": In case a knn query specifies a `rescore_vector` parameter, the query `rescore_vector` parameter will be used instead.\n : See [oversampling and rescoring quantized vectors](docs-content://solutions/search/vector/knn.md#dense-vector-knn-search-rescoring) for details.\n:::::\n\n`early_termination`\n: (Optional, boolean) Apply _patience_ based early termination strategy to knn queries over HNSW graphs (see [paper](https://cs.uwaterloo.ca/~jimmylin/publications/Teofili_Lin_ECIR2025.pdf)). This is expected to produce Only applicable to `hnsw`, `int8_hnsw`, `int4_hnsw` and `bbq_hnsw` index types.", - "repo_full_name": "elastic/elasticsearch", - "discussion_comments": [ - { - "comment_id": "2172153221", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 127223, - "pr_file": "docs/reference/elasticsearch/mapping-reference/dense-vector.md", - "discussion_id": "2172153221", - "commented_code": "@@ -281,6 +281,9 @@ $$$dense-vector-index-options$$$\n : In case a knn query specifies a `rescore_vector` parameter, the query `rescore_vector` parameter will be used instead.\n : See [oversampling and rescoring quantized vectors](docs-content://solutions/search/vector/knn.md#dense-vector-knn-search-rescoring) for details.\n :::::\n+\n+`early_termination`\n+: (Optional, boolean) Apply _patience_ based early termination strategy to knn queries over HNSW graphs (see [paper](https://cs.uwaterloo.ca/~jimmylin/publications/Teofili_Lin_ECIR2025.pdf)). This is expected to produce Only applicable to `hnsw`, `int8_hnsw`, `int4_hnsw` and `bbq_hnsw` index types.", - "comment_created_at": "2025-06-27T14:19:42+00:00", - "comment_author": "benwtrent", - "comment_body": "I think this should be an index wide setting if possible. It can be dynamic there since its query time only, but having it per vector field seems like too much.", - "pr_file_module": null - }, - { - "comment_id": "2172360995", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 127223, - "pr_file": "docs/reference/elasticsearch/mapping-reference/dense-vector.md", - "discussion_id": "2172153221", - "commented_code": "@@ -281,6 +281,9 @@ $$$dense-vector-index-options$$$\n : In case a knn query specifies a `rescore_vector` parameter, the query `rescore_vector` parameter will be used instead.\n : See [oversampling and rescoring quantized vectors](docs-content://solutions/search/vector/knn.md#dense-vector-knn-search-rescoring) for details.\n :::::\n+\n+`early_termination`\n+: (Optional, boolean) Apply _patience_ based early termination strategy to knn queries over HNSW graphs (see [paper](https://cs.uwaterloo.ca/~jimmylin/publications/Teofili_Lin_ECIR2025.pdf)). This is expected to produce Only applicable to `hnsw`, `int8_hnsw`, `int4_hnsw` and `bbq_hnsw` index types.", - "comment_created_at": "2025-06-27T16:11:20+00:00", - "comment_author": "tteofili", - "comment_body": "I can see your point, however I can think of this setting to be either enabled or not either index or field-wise, whereas I don't think users would enable/disable it on a per query basis.", - "pr_file_module": null - }, - { - "comment_id": "2172961695", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 127223, - "pr_file": "docs/reference/elasticsearch/mapping-reference/dense-vector.md", - "discussion_id": "2172153221", - "commented_code": "@@ -281,6 +281,9 @@ $$$dense-vector-index-options$$$\n : In case a knn query specifies a `rescore_vector` parameter, the query `rescore_vector` parameter will be used instead.\n : See [oversampling and rescoring quantized vectors](docs-content://solutions/search/vector/knn.md#dense-vector-knn-search-rescoring) for details.\n :::::\n+\n+`early_termination`\n+: (Optional, boolean) Apply _patience_ based early termination strategy to knn queries over HNSW graphs (see [paper](https://cs.uwaterloo.ca/~jimmylin/publications/Teofili_Lin_ECIR2025.pdf)). This is expected to produce Only applicable to `hnsw`, `int8_hnsw`, `int4_hnsw` and `bbq_hnsw` index types.", - "comment_created_at": "2025-06-27T23:01:24+00:00", - "comment_author": "benwtrent", - "comment_body": "> however I can think of this setting to be either enabled or not either index or field-wise\r\n\r\nI agree, I think it should be done at the index level. I only mention the \"query time\" nature of it to indicate that it can be a dynamic index setting.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2064735069", - "pr_number": 127482, - "pr_file": "docs/reference/elasticsearch/jvm-settings.md", - "created_at": "2025-04-28T21:06:09+00:00", - "commented_code": "::::\n\n\n## Default JVM heap sizes [default-jvm-sizes]\n\nIf heap sizes are not specifically set, {{es}} will calculate JVM heap sizing based on the total amount of system memory, depending on the node's role.\n\n* Master-only node\n * 60% of total system memory, up to a maximum of 31 GB.\n* Machine Learning-only node\n * 40% of the first 16 gigabytes plus 10% of memory above that when total system memory is more than 16 gigabytes, up to a maximum of 31 GB.\n* Data-only node\n * 40% of total system memory when less than 1 GB, with a minimum of 128 MB.\n * 50% of total system memory when more than 1 GB, with a maximum of 31 GB.", - "repo_full_name": "elastic/elasticsearch", - "discussion_comments": [ - { - "comment_id": "2064735069", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 127482, - "pr_file": "docs/reference/elasticsearch/jvm-settings.md", - "discussion_id": "2064735069", - "commented_code": "@@ -128,6 +128,18 @@ If you are running {{es}} as a Windows service, you can change the heap size usi\n ::::\n \n \n+## Default JVM heap sizes [default-jvm-sizes]\n+\n+If heap sizes are not specifically set, {{es}} will calculate JVM heap sizing based on the total amount of system memory, depending on the node's role.\n+\n+* Master-only node\n+ * 60% of total system memory, up to a maximum of 31 GB.\n+* Machine Learning-only node\n+ * 40% of the first 16 gigabytes plus 10% of memory above that when total system memory is more than 16 gigabytes, up to a maximum of 31 GB.\n+* Data-only node\n+ * 40% of total system memory when less than 1 GB, with a minimum of 128 MB.\n+ * 50% of total system memory when more than 1 GB, with a maximum of 31 GB.", - "comment_created_at": "2025-04-28T21:06:09+00:00", - "comment_author": "Copilot", - "comment_body": "The documentation for Data-only nodes distinguishes between memory amounts 'less than 1 GB' and 'more than 1 GB', but it does not specify what happens when the node has exactly 1 GB of memory. Consider adding clarification for this edge case.\n```suggestion\n * 50% of total system memory when 1 GB or more, with a maximum of 31 GB.\n```", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/elasticsearch-specify-explicit-rest-formats.json b/_reviewers/elasticsearch-specify-explicit-rest-formats.json new file mode 100644 index 0000000..563ea68 --- /dev/null +++ b/_reviewers/elasticsearch-specify-explicit-rest-formats.json @@ -0,0 +1,70 @@ +[ + { + "discussion_id": "2179929463", + "pr_number": 130325, + "pr_file": "modules/ingest-common/src/yamlRestTest/resources/rest-api-spec/test/ingest/120_grok.yml", + "created_at": "2025-07-02T12:29:26+00:00", + "commented_code": "ingest.processor_grok: {}\n - length: { patterns: 318 }\n - match: { patterns.PATH: \"(?:%{UNIXPATH}|%{WINPATH})\" }\n\n\n---\n\"Test simulate with invalid GROK pattern\":\n - skip:\n features: headers\n - do:\n catch: bad_request\n headers:\n Content-Type: application/json", + "repo_full_name": "elastic/elasticsearch", + "discussion_comments": [ + { + "comment_id": "2179929463", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 130325, + "pr_file": "modules/ingest-common/src/yamlRestTest/resources/rest-api-spec/test/ingest/120_grok.yml", + "discussion_id": "2179929463", + "commented_code": "@@ -154,3 +154,45 @@ teardown:\n ingest.processor_grok: {}\n - length: { patterns: 318 }\n - match: { patterns.PATH: \"(?:%{UNIXPATH}|%{WINPATH})\" }\n+\n+\n+---\n+\"Test simulate with invalid GROK pattern\":\n+ - skip:\n+ features: headers\n+ - do:\n+ catch: bad_request\n+ headers:\n+ Content-Type: application/json", + "comment_created_at": "2025-07-02T12:29:26+00:00", + "comment_author": "szybia", + "comment_body": "re. headers: existing `simulate.ingest` REST tests have skip and send a content-type header.\r\n\r\ntest either doesn't run or breaks without this, it seems without it the endpoint expects cbor by default.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2180109441", + "pr_number": 130402, + "pr_file": "x-pack/plugin/rank-rrf/src/yamlRestTest/resources/rest-api-spec/test/rrf/950_pinned_interaction.yml", + "created_at": "2025-07-02T13:45:39+00:00", + "commented_code": "retriever:\n rrf:\n retrievers:\n -\n standard:\n query:\n match: { text: \"document\" }\n -\n pinned:\n ids: [\"doc4\", \"doc5\"]\n retriever:\n standard:\n query:\n match: { text: \"document\" }\n\n - match: { hits.total.value: 5 }\n - match: { hits.hits.0._id: doc1 }\n - lt: { hits.hits.0._score: 100.0 }\n - match: { hits.hits.1._id: doc4 }\n - match: { hits.hits.2._id: doc5 }\n match_none: {}", + "repo_full_name": "elastic/elasticsearch", + "discussion_comments": [ + { + "comment_id": "2180109441", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 130402, + "pr_file": "x-pack/plugin/rank-rrf/src/yamlRestTest/resources/rest-api-spec/test/rrf/950_pinned_interaction.yml", + "discussion_id": "2180109441", + "commented_code": "@@ -77,22 +77,15 @@ setup:\n retriever:\n rrf:\n retrievers:\n- -\n- standard:\n- query:\n- match: { text: \"document\" }\n -\n pinned:\n ids: [\"doc4\", \"doc5\"]\n retriever:\n standard:\n query:\n- match: { text: \"document\" }\n-\n- - match: { hits.total.value: 5 }\n- - match: { hits.hits.0._id: doc1 }\n- - lt: { hits.hits.0._score: 100.0 }\n- - match: { hits.hits.1._id: doc4 }\n- - match: { hits.hits.2._id: doc5 }\n+ match_none: {}", + "comment_created_at": "2025-07-02T13:45:39+00:00", + "comment_author": "Mikep86", + "comment_body": "I think @kderusso meant creating an `rrf` retriever with two sub-retrievers, one of which is a `match_none` query.", + "pr_file_module": null + }, + { + "comment_id": "2180119064", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 130402, + "pr_file": "x-pack/plugin/rank-rrf/src/yamlRestTest/resources/rest-api-spec/test/rrf/950_pinned_interaction.yml", + "discussion_id": "2180109441", + "commented_code": "@@ -77,22 +77,15 @@ setup:\n retriever:\n rrf:\n retrievers:\n- -\n- standard:\n- query:\n- match: { text: \"document\" }\n -\n pinned:\n ids: [\"doc4\", \"doc5\"]\n retriever:\n standard:\n query:\n- match: { text: \"document\" }\n-\n- - match: { hits.total.value: 5 }\n- - match: { hits.hits.0._id: doc1 }\n- - lt: { hits.hits.0._score: 100.0 }\n- - match: { hits.hits.1._id: doc4 }\n- - match: { hits.hits.2._id: doc5 }\n+ match_none: {}", + "comment_created_at": "2025-07-02T13:49:43+00:00", + "comment_author": "mridula-s109", + "comment_body": "Oh okays, looking into it.", + "pr_file_module": null + }, + { + "comment_id": "2180145926", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 130402, + "pr_file": "x-pack/plugin/rank-rrf/src/yamlRestTest/resources/rest-api-spec/test/rrf/950_pinned_interaction.yml", + "discussion_id": "2180109441", + "commented_code": "@@ -77,22 +77,15 @@ setup:\n retriever:\n rrf:\n retrievers:\n- -\n- standard:\n- query:\n- match: { text: \"document\" }\n -\n pinned:\n ids: [\"doc4\", \"doc5\"]\n retriever:\n standard:\n query:\n- match: { text: \"document\" }\n-\n- - match: { hits.total.value: 5 }\n- - match: { hits.hits.0._id: doc1 }\n- - lt: { hits.hits.0._score: 100.0 }\n- - match: { hits.hits.1._id: doc4 }\n- - match: { hits.hits.2._id: doc5 }\n+ match_none: {}", + "comment_created_at": "2025-07-02T14:01:08+00:00", + "comment_author": "mridula-s109", + "comment_body": "have fixed the same.", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/elasticsearch-specify-explicit-rest-formats.md b/_reviewers/elasticsearch-specify-explicit-rest-formats.md index eb06e24..8d12407 100644 --- a/_reviewers/elasticsearch-specify-explicit-rest-formats.md +++ b/_reviewers/elasticsearch-specify-explicit-rest-formats.md @@ -39,75 +39,3 @@ retriever: ``` This practice improves test reliability and clarity by making the expected request format visible and intentional rather than implicit. - - -[ - { - "discussion_id": "2179929463", - "pr_number": 130325, - "pr_file": "modules/ingest-common/src/yamlRestTest/resources/rest-api-spec/test/ingest/120_grok.yml", - "created_at": "2025-07-02T12:29:26+00:00", - "commented_code": "ingest.processor_grok: {}\n - length: { patterns: 318 }\n - match: { patterns.PATH: \"(?:%{UNIXPATH}|%{WINPATH})\" }\n\n\n---\n\"Test simulate with invalid GROK pattern\":\n - skip:\n features: headers\n - do:\n catch: bad_request\n headers:\n Content-Type: application/json", - "repo_full_name": "elastic/elasticsearch", - "discussion_comments": [ - { - "comment_id": "2179929463", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 130325, - "pr_file": "modules/ingest-common/src/yamlRestTest/resources/rest-api-spec/test/ingest/120_grok.yml", - "discussion_id": "2179929463", - "commented_code": "@@ -154,3 +154,45 @@ teardown:\n ingest.processor_grok: {}\n - length: { patterns: 318 }\n - match: { patterns.PATH: \"(?:%{UNIXPATH}|%{WINPATH})\" }\n+\n+\n+---\n+\"Test simulate with invalid GROK pattern\":\n+ - skip:\n+ features: headers\n+ - do:\n+ catch: bad_request\n+ headers:\n+ Content-Type: application/json", - "comment_created_at": "2025-07-02T12:29:26+00:00", - "comment_author": "szybia", - "comment_body": "re. headers: existing `simulate.ingest` REST tests have skip and send a content-type header.\r\n\r\ntest either doesn't run or breaks without this, it seems without it the endpoint expects cbor by default.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2180109441", - "pr_number": 130402, - "pr_file": "x-pack/plugin/rank-rrf/src/yamlRestTest/resources/rest-api-spec/test/rrf/950_pinned_interaction.yml", - "created_at": "2025-07-02T13:45:39+00:00", - "commented_code": "retriever:\n rrf:\n retrievers:\n -\n standard:\n query:\n match: { text: \"document\" }\n -\n pinned:\n ids: [\"doc4\", \"doc5\"]\n retriever:\n standard:\n query:\n match: { text: \"document\" }\n\n - match: { hits.total.value: 5 }\n - match: { hits.hits.0._id: doc1 }\n - lt: { hits.hits.0._score: 100.0 }\n - match: { hits.hits.1._id: doc4 }\n - match: { hits.hits.2._id: doc5 }\n match_none: {}", - "repo_full_name": "elastic/elasticsearch", - "discussion_comments": [ - { - "comment_id": "2180109441", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 130402, - "pr_file": "x-pack/plugin/rank-rrf/src/yamlRestTest/resources/rest-api-spec/test/rrf/950_pinned_interaction.yml", - "discussion_id": "2180109441", - "commented_code": "@@ -77,22 +77,15 @@ setup:\n retriever:\n rrf:\n retrievers:\n- -\n- standard:\n- query:\n- match: { text: \"document\" }\n -\n pinned:\n ids: [\"doc4\", \"doc5\"]\n retriever:\n standard:\n query:\n- match: { text: \"document\" }\n-\n- - match: { hits.total.value: 5 }\n- - match: { hits.hits.0._id: doc1 }\n- - lt: { hits.hits.0._score: 100.0 }\n- - match: { hits.hits.1._id: doc4 }\n- - match: { hits.hits.2._id: doc5 }\n+ match_none: {}", - "comment_created_at": "2025-07-02T13:45:39+00:00", - "comment_author": "Mikep86", - "comment_body": "I think @kderusso meant creating an `rrf` retriever with two sub-retrievers, one of which is a `match_none` query.", - "pr_file_module": null - }, - { - "comment_id": "2180119064", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 130402, - "pr_file": "x-pack/plugin/rank-rrf/src/yamlRestTest/resources/rest-api-spec/test/rrf/950_pinned_interaction.yml", - "discussion_id": "2180109441", - "commented_code": "@@ -77,22 +77,15 @@ setup:\n retriever:\n rrf:\n retrievers:\n- -\n- standard:\n- query:\n- match: { text: \"document\" }\n -\n pinned:\n ids: [\"doc4\", \"doc5\"]\n retriever:\n standard:\n query:\n- match: { text: \"document\" }\n-\n- - match: { hits.total.value: 5 }\n- - match: { hits.hits.0._id: doc1 }\n- - lt: { hits.hits.0._score: 100.0 }\n- - match: { hits.hits.1._id: doc4 }\n- - match: { hits.hits.2._id: doc5 }\n+ match_none: {}", - "comment_created_at": "2025-07-02T13:49:43+00:00", - "comment_author": "mridula-s109", - "comment_body": "Oh okays, looking into it.", - "pr_file_module": null - }, - { - "comment_id": "2180145926", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 130402, - "pr_file": "x-pack/plugin/rank-rrf/src/yamlRestTest/resources/rest-api-spec/test/rrf/950_pinned_interaction.yml", - "discussion_id": "2180109441", - "commented_code": "@@ -77,22 +77,15 @@ setup:\n retriever:\n rrf:\n retrievers:\n- -\n- standard:\n- query:\n- match: { text: \"document\" }\n -\n pinned:\n ids: [\"doc4\", \"doc5\"]\n retriever:\n standard:\n query:\n- match: { text: \"document\" }\n-\n- - match: { hits.total.value: 5 }\n- - match: { hits.hits.0._id: doc1 }\n- - lt: { hits.hits.0._score: 100.0 }\n- - match: { hits.hits.1._id: doc4 }\n- - match: { hits.hits.2._id: doc5 }\n+ match_none: {}", - "comment_created_at": "2025-07-02T14:01:08+00:00", - "comment_author": "mridula-s109", - "comment_body": "have fixed the same.", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/elasticsearch-stage-intensive-operations-carefully.json b/_reviewers/elasticsearch-stage-intensive-operations-carefully.json new file mode 100644 index 0000000..33e8bd0 --- /dev/null +++ b/_reviewers/elasticsearch-stage-intensive-operations-carefully.json @@ -0,0 +1,92 @@ +[ + { + "discussion_id": "2175560758", + "pr_number": 130280, + "pr_file": "docs/reference/elasticsearch/index-lifecycle-actions/ilm-forcemerge.md", + "created_at": "2025-06-30T17:23:09+00:00", + "commented_code": "Phases allowed: hot, warm.\n\n[Force merges](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-forcemerge) the index into the specified maximum number of [segments](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-segments).\n\n::::{note}\nShards that are relocating during a `forcemerge` will not be merged.\n::::\n\n[Force merges](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-forcemerge) the index into the specified maximum number of [segments](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-segments). This operation is best effort. For example, shards that are relocating during a `forcemerge` will not be merged.\n\nTo use the `forcemerge` action in the `hot` phase, the `rollover` action **must** be present. If no rollover action is configured, {{ilm-init}} will reject the policy.\n\n:::::{admonition} Performance considerations\n:name: ilm-forcemerge-performance\n\nForce merge is a resource-intensive operation. If too many force merges are triggered at once, it can negatively impact your cluster. This can happen when you apply an {{ilm-init}} policy that includes a force merge action to existing indices. If they meet the `min_age` criteria, they can immediately proceed through multiple phases. You can prevent this by increasing the `min_age` or setting `index.lifecycle.origination_date` to change how the index age is calculated.\n\nIf you experience a force merge task queue backlog, you might need to increase the size of the force merge threadpool so indices can be force merged in parallel. To do this, configure the `thread_pool.force_merge.size` [cluster setting](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-cluster-get-settings).\n\n::::{important}\nThis can have cascading performance impacts. Monitor cluster performance and increment the size of the thread pool slowly to reduce the backlog.\n::::\nForce merge is a resource-intensive operation. If too many force merges are triggered at once, it can negatively impact your cluster. For example, this can happen when you \n* modify an existing {{ilm-init}} policy's phase `min_age` such that indices trigger into force-merging phase faster.", + "repo_full_name": "elastic/elasticsearch", + "discussion_comments": [ + { + "comment_id": "2175560758", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 130280, + "pr_file": "docs/reference/elasticsearch/index-lifecycle-actions/ilm-forcemerge.md", + "discussion_id": "2175560758", + "commented_code": "@@ -7,29 +7,26 @@ mapped_pages:\n \n Phases allowed: hot, warm.\n \n-[Force merges](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-forcemerge) the index into the specified maximum number of [segments](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-segments).\n-\n-::::{note}\n-Shards that are relocating during a `forcemerge` will not be merged.\n-::::\n-\n+[Force merges](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-forcemerge) the index into the specified maximum number of [segments](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-segments). This operation is best effort. For example, shards that are relocating during a `forcemerge` will not be merged.\n \n To use the `forcemerge` action in the `hot` phase, the `rollover` action **must** be present. If no rollover action is configured, {{ilm-init}} will reject the policy.\n \n :::::{admonition} Performance considerations\n :name: ilm-forcemerge-performance\n \n-Force merge is a resource-intensive operation. If too many force merges are triggered at once, it can negatively impact your cluster. This can happen when you apply an {{ilm-init}} policy that includes a force merge action to existing indices. If they meet the `min_age` criteria, they can immediately proceed through multiple phases. You can prevent this by increasing the `min_age` or setting `index.lifecycle.origination_date` to change how the index age is calculated.\n-\n-If you experience a force merge task queue backlog, you might need to increase the size of the force merge threadpool so indices can be force merged in parallel. To do this, configure the `thread_pool.force_merge.size` [cluster setting](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-cluster-get-settings).\n-\n-::::{important}\n-This can have cascading performance impacts. Monitor cluster performance and increment the size of the thread pool slowly to reduce the backlog.\n-::::\n+Force merge is a resource-intensive operation. If too many force merges are triggered at once, it can negatively impact your cluster. For example, this can happen when you \n+* modify an existing {{ilm-init}} policy's phase `min_age` such that indices trigger into force-merging phase faster.", + "comment_created_at": "2025-06-30T17:23:09+00:00", + "comment_author": "kilfoyle", + "comment_body": "```suggestion\r\n* modify an {{ilm-init}} policy to lower the [`min_age`](./ilm-rollover.md#ilm-rollover-options) for one or more phases, causing indices to trigger the force merge at a faster rate.\r\n```\r\n\r\nI think it'd be helpful to link to the docs for this setting. It's probably not obvious that it's a rollover setting rather than an ILM setting.", + "pr_file_module": null + }, + { + "comment_id": "2175889489", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 130280, + "pr_file": "docs/reference/elasticsearch/index-lifecycle-actions/ilm-forcemerge.md", + "discussion_id": "2175560758", + "commented_code": "@@ -7,29 +7,26 @@ mapped_pages:\n \n Phases allowed: hot, warm.\n \n-[Force merges](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-forcemerge) the index into the specified maximum number of [segments](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-segments).\n-\n-::::{note}\n-Shards that are relocating during a `forcemerge` will not be merged.\n-::::\n-\n+[Force merges](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-forcemerge) the index into the specified maximum number of [segments](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-segments). This operation is best effort. For example, shards that are relocating during a `forcemerge` will not be merged.\n \n To use the `forcemerge` action in the `hot` phase, the `rollover` action **must** be present. If no rollover action is configured, {{ilm-init}} will reject the policy.\n \n :::::{admonition} Performance considerations\n :name: ilm-forcemerge-performance\n \n-Force merge is a resource-intensive operation. If too many force merges are triggered at once, it can negatively impact your cluster. This can happen when you apply an {{ilm-init}} policy that includes a force merge action to existing indices. If they meet the `min_age` criteria, they can immediately proceed through multiple phases. You can prevent this by increasing the `min_age` or setting `index.lifecycle.origination_date` to change how the index age is calculated.\n-\n-If you experience a force merge task queue backlog, you might need to increase the size of the force merge threadpool so indices can be force merged in parallel. To do this, configure the `thread_pool.force_merge.size` [cluster setting](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-cluster-get-settings).\n-\n-::::{important}\n-This can have cascading performance impacts. Monitor cluster performance and increment the size of the thread pool slowly to reduce the backlog.\n-::::\n+Force merge is a resource-intensive operation. If too many force merges are triggered at once, it can negatively impact your cluster. For example, this can happen when you \n+* modify an existing {{ilm-init}} policy's phase `min_age` such that indices trigger into force-merging phase faster.", + "comment_created_at": "2025-06-30T20:46:45+00:00", + "comment_author": "stefnestor", + "comment_body": "I had meant a phase's `min_age` and not the `hot.rollover`'s `min_age` setting actually. \r\n\r\nJust updating the rollover `min_age` would potentially not cascade multiple indices immediately, but the example we frequently encounter is \"oh no my hot/warm/cold is filling up disk, let me lower frozen `min_age` to push data to it faster\" **mass** triggers force merge queues and causes heap+cpu performance issues on the data tier you were just trying to resolve disk on 😅 . ", + "pr_file_module": null + }, + { + "comment_id": "2175980577", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 130280, + "pr_file": "docs/reference/elasticsearch/index-lifecycle-actions/ilm-forcemerge.md", + "discussion_id": "2175560758", + "commented_code": "@@ -7,29 +7,26 @@ mapped_pages:\n \n Phases allowed: hot, warm.\n \n-[Force merges](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-forcemerge) the index into the specified maximum number of [segments](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-segments).\n-\n-::::{note}\n-Shards that are relocating during a `forcemerge` will not be merged.\n-::::\n-\n+[Force merges](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-forcemerge) the index into the specified maximum number of [segments](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-segments). This operation is best effort. For example, shards that are relocating during a `forcemerge` will not be merged.\n \n To use the `forcemerge` action in the `hot` phase, the `rollover` action **must** be present. If no rollover action is configured, {{ilm-init}} will reject the policy.\n \n :::::{admonition} Performance considerations\n :name: ilm-forcemerge-performance\n \n-Force merge is a resource-intensive operation. If too many force merges are triggered at once, it can negatively impact your cluster. This can happen when you apply an {{ilm-init}} policy that includes a force merge action to existing indices. If they meet the `min_age` criteria, they can immediately proceed through multiple phases. You can prevent this by increasing the `min_age` or setting `index.lifecycle.origination_date` to change how the index age is calculated.\n-\n-If you experience a force merge task queue backlog, you might need to increase the size of the force merge threadpool so indices can be force merged in parallel. To do this, configure the `thread_pool.force_merge.size` [cluster setting](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-cluster-get-settings).\n-\n-::::{important}\n-This can have cascading performance impacts. Monitor cluster performance and increment the size of the thread pool slowly to reduce the backlog.\n-::::\n+Force merge is a resource-intensive operation. If too many force merges are triggered at once, it can negatively impact your cluster. For example, this can happen when you \n+* modify an existing {{ilm-init}} policy's phase `min_age` such that indices trigger into force-merging phase faster.", + "comment_created_at": "2025-06-30T21:16:55+00:00", + "comment_author": "kilfoyle", + "comment_body": "Ah, okay. Let's not use the link then.\r\n\r\nI see now that `min_age` is also part of the [`/_ilm/policy` API](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-ilm-put-lifecycle). I didn't realize that these are different things.\r\n", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2175578616", + "pr_number": 130280, + "pr_file": "docs/reference/elasticsearch/index-lifecycle-actions/ilm-forcemerge.md", + "created_at": "2025-06-30T17:35:48+00:00", + "commented_code": "Phases allowed: hot, warm.\n\n[Force merges](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-forcemerge) the index into the specified maximum number of [segments](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-segments).\n\n::::{note}\nShards that are relocating during a `forcemerge` will not be merged.\n::::\n\n[Force merges](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-forcemerge) the index into the specified maximum number of [segments](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-segments). This operation is best effort. For example, shards that are relocating during a `forcemerge` will not be merged.\n\nTo use the `forcemerge` action in the `hot` phase, the `rollover` action **must** be present. If no rollover action is configured, {{ilm-init}} will reject the policy.\n\n:::::{admonition} Performance considerations\n:name: ilm-forcemerge-performance\n\nForce merge is a resource-intensive operation. If too many force merges are triggered at once, it can negatively impact your cluster. This can happen when you apply an {{ilm-init}} policy that includes a force merge action to existing indices. If they meet the `min_age` criteria, they can immediately proceed through multiple phases. You can prevent this by increasing the `min_age` or setting `index.lifecycle.origination_date` to change how the index age is calculated.\n\nIf you experience a force merge task queue backlog, you might need to increase the size of the force merge threadpool so indices can be force merged in parallel. To do this, configure the `thread_pool.force_merge.size` [cluster setting](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-cluster-get-settings).\n\n::::{important}\nThis can have cascading performance impacts. Monitor cluster performance and increment the size of the thread pool slowly to reduce the backlog.\n::::\nForce merge is a resource-intensive operation. If too many force merges are triggered at once, it can negatively impact your cluster. For example, this can happen when you \n* modify an existing {{ilm-init}} policy's phase `min_age` such that indices trigger into force-merging phase faster.\n* apply an {{ilm-init}} policy that includes a force merge action to existing indices. If they meet the `min_age` criteria, they can immediately proceed through multiple actions. You can prevent this by increasing the `min_age` or setting `index.lifecycle.origination_date` to change how the index age is calculated.\n* run the [{{ilm-init}} Move Step API](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-ilm-move-to-step) against multiple indices.\n\nIf you experience a force merge task queue backlog, you might need to increase the size of the force merge threadpool so indices can be force merged in parallel. To do this, configure the `thread_pool.force_merge.size` [cluster setting](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-cluster-get-settings). This is considered an expert setting override as this can have cascading performance impacts. Monitor cluster performance and increment the size of the thread pool slowly to reduce the backlog.", + "repo_full_name": "elastic/elasticsearch", + "discussion_comments": [ + { + "comment_id": "2175578616", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 130280, + "pr_file": "docs/reference/elasticsearch/index-lifecycle-actions/ilm-forcemerge.md", + "discussion_id": "2175578616", + "commented_code": "@@ -7,29 +7,26 @@ mapped_pages:\n \n Phases allowed: hot, warm.\n \n-[Force merges](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-forcemerge) the index into the specified maximum number of [segments](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-segments).\n-\n-::::{note}\n-Shards that are relocating during a `forcemerge` will not be merged.\n-::::\n-\n+[Force merges](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-forcemerge) the index into the specified maximum number of [segments](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-segments). This operation is best effort. For example, shards that are relocating during a `forcemerge` will not be merged.\n \n To use the `forcemerge` action in the `hot` phase, the `rollover` action **must** be present. If no rollover action is configured, {{ilm-init}} will reject the policy.\n \n :::::{admonition} Performance considerations\n :name: ilm-forcemerge-performance\n \n-Force merge is a resource-intensive operation. If too many force merges are triggered at once, it can negatively impact your cluster. This can happen when you apply an {{ilm-init}} policy that includes a force merge action to existing indices. If they meet the `min_age` criteria, they can immediately proceed through multiple phases. You can prevent this by increasing the `min_age` or setting `index.lifecycle.origination_date` to change how the index age is calculated.\n-\n-If you experience a force merge task queue backlog, you might need to increase the size of the force merge threadpool so indices can be force merged in parallel. To do this, configure the `thread_pool.force_merge.size` [cluster setting](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-cluster-get-settings).\n-\n-::::{important}\n-This can have cascading performance impacts. Monitor cluster performance and increment the size of the thread pool slowly to reduce the backlog.\n-::::\n+Force merge is a resource-intensive operation. If too many force merges are triggered at once, it can negatively impact your cluster. For example, this can happen when you \n+* modify an existing {{ilm-init}} policy's phase `min_age` such that indices trigger into force-merging phase faster.\n+* apply an {{ilm-init}} policy that includes a force merge action to existing indices. If they meet the `min_age` criteria, they can immediately proceed through multiple actions. You can prevent this by increasing the `min_age` or setting `index.lifecycle.origination_date` to change how the index age is calculated.\n+* run the [{{ilm-init}} Move Step API](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-ilm-move-to-step) against multiple indices.\n \n+If you experience a force merge task queue backlog, you might need to increase the size of the force merge threadpool so indices can be force merged in parallel. To do this, configure the `thread_pool.force_merge.size` [cluster setting](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-cluster-get-settings). This is considered an expert setting override as this can have cascading performance impacts. Monitor cluster performance and increment the size of the thread pool slowly to reduce the backlog.", + "comment_created_at": "2025-06-30T17:35:48+00:00", + "comment_author": "kilfoyle", + "comment_body": "```suggestion\r\nIf you experience a force merge task queue backlog, you might need to increase the size of the force merge threadpool so indices can be force merged in parallel. To do this, configure the `thread_pool.force_merge.size` [cluster setting](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-cluster-get-settings). \r\n\r\n::::{important}\r\nNote that `thread_pool.force_merge.size` is an advanced setting. Adjusting it can cause cascading performance impacts. Monitor cluster performance and increment the size of the thread pool slowly to reduce the backlog.\r\n::::\r\n```\r\nI think it's a good idea to keep this in an \"important\" block just so it's easier for users to notice.\r\n\r\n\"screen25\"\r\n", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2175632315", + "pr_number": 130280, + "pr_file": "docs/reference/elasticsearch/index-lifecycle-actions/ilm-forcemerge.md", + "created_at": "2025-06-30T18:11:49+00:00", + "commented_code": "Phases allowed: hot, warm.\n\n[Force merges](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-forcemerge) the index into the specified maximum number of [segments](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-segments).\n\n::::{note}\nShards that are relocating during a `forcemerge` will not be merged.\n::::\n\n[Force merges](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-forcemerge) the index into the specified maximum number of [segments](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-segments). This operation is best effort. For example, shards that are relocating during a `forcemerge` will not be merged.\n\nTo use the `forcemerge` action in the `hot` phase, the `rollover` action **must** be present. If no rollover action is configured, {{ilm-init}} will reject the policy.\n\n:::::{admonition} Performance considerations\n:name: ilm-forcemerge-performance\n\nForce merge is a resource-intensive operation. If too many force merges are triggered at once, it can negatively impact your cluster. This can happen when you apply an {{ilm-init}} policy that includes a force merge action to existing indices. If they meet the `min_age` criteria, they can immediately proceed through multiple phases. You can prevent this by increasing the `min_age` or setting `index.lifecycle.origination_date` to change how the index age is calculated.\n\nIf you experience a force merge task queue backlog, you might need to increase the size of the force merge threadpool so indices can be force merged in parallel. To do this, configure the `thread_pool.force_merge.size` [cluster setting](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-cluster-get-settings).\n\n::::{important}\nThis can have cascading performance impacts. Monitor cluster performance and increment the size of the thread pool slowly to reduce the backlog.\n::::\nForce merge is a resource-intensive operation. If too many force merges are triggered at once, it can negatively impact your cluster. For example, this can happen when you \n* modify an existing {{ilm-init}} policy's phase `min_age` such that indices trigger into force-merging phase faster.\n* apply an {{ilm-init}} policy that includes a force merge action to existing indices. If they meet the `min_age` criteria, they can immediately proceed through multiple actions. You can prevent this by increasing the `min_age` or setting `index.lifecycle.origination_date` to change how the index age is calculated.\n* run the [{{ilm-init}} Move Step API](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-ilm-move-to-step) against multiple indices.\n\nIf you experience a force merge task queue backlog, you might need to increase the size of the force merge threadpool so indices can be force merged in parallel. To do this, configure the `thread_pool.force_merge.size` [cluster setting](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-cluster-get-settings). This is considered an expert setting override as this can have cascading performance impacts. Monitor cluster performance and increment the size of the thread pool slowly to reduce the backlog.\n\nForce merging will be performed by the nodes within the current phase of the index. A forcemerge in the `hot` phase will use hot nodes with potentially faster nodes, while impacting ingestion more. A forcemerge in the `warm` phase will use warm nodes and potentially take longer to perform, but without impacting ingestion in the `hot` tier.\nForce merging will be performed by the node hosting the shard. The [node's role](https://www.elastic.co/docs/deploy-manage/distributed-architecture/clusters-nodes-shards/node-roles#data-node-role) frequently is equal to the [data tier](https://www.elastic.co/docs/manage-data/lifecycle/data-tiers) of the {{ilm-init}}'s phase of the index, but this is not guaranteed. For example, a forcemerge in the \n* `hot` phase will use hot nodes; however, performing merges on this potentially higher performance hardware may have a trade off of impacting ingestion. \n* `warm` phase will use warm nodes; however, merges may potentially take longer to perform on less performant hardware but will avoid impacting ingestion in the `hot` tier.\n* `cold` or `frozen` phase [{{ilm-init}} Searchable Snapshots](https://www.elastic.co/docs/reference/elasticsearch/index-lifecycle-actions/ilm-searchable-snapshot) via `force_merge_index` happens on the preceeding data tier.", + "repo_full_name": "elastic/elasticsearch", + "discussion_comments": [ + { + "comment_id": "2175632315", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 130280, + "pr_file": "docs/reference/elasticsearch/index-lifecycle-actions/ilm-forcemerge.md", + "discussion_id": "2175632315", + "commented_code": "@@ -7,29 +7,26 @@ mapped_pages:\n \n Phases allowed: hot, warm.\n \n-[Force merges](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-forcemerge) the index into the specified maximum number of [segments](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-segments).\n-\n-::::{note}\n-Shards that are relocating during a `forcemerge` will not be merged.\n-::::\n-\n+[Force merges](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-forcemerge) the index into the specified maximum number of [segments](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-segments). This operation is best effort. For example, shards that are relocating during a `forcemerge` will not be merged.\n \n To use the `forcemerge` action in the `hot` phase, the `rollover` action **must** be present. If no rollover action is configured, {{ilm-init}} will reject the policy.\n \n :::::{admonition} Performance considerations\n :name: ilm-forcemerge-performance\n \n-Force merge is a resource-intensive operation. If too many force merges are triggered at once, it can negatively impact your cluster. This can happen when you apply an {{ilm-init}} policy that includes a force merge action to existing indices. If they meet the `min_age` criteria, they can immediately proceed through multiple phases. You can prevent this by increasing the `min_age` or setting `index.lifecycle.origination_date` to change how the index age is calculated.\n-\n-If you experience a force merge task queue backlog, you might need to increase the size of the force merge threadpool so indices can be force merged in parallel. To do this, configure the `thread_pool.force_merge.size` [cluster setting](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-cluster-get-settings).\n-\n-::::{important}\n-This can have cascading performance impacts. Monitor cluster performance and increment the size of the thread pool slowly to reduce the backlog.\n-::::\n+Force merge is a resource-intensive operation. If too many force merges are triggered at once, it can negatively impact your cluster. For example, this can happen when you \n+* modify an existing {{ilm-init}} policy's phase `min_age` such that indices trigger into force-merging phase faster.\n+* apply an {{ilm-init}} policy that includes a force merge action to existing indices. If they meet the `min_age` criteria, they can immediately proceed through multiple actions. You can prevent this by increasing the `min_age` or setting `index.lifecycle.origination_date` to change how the index age is calculated.\n+* run the [{{ilm-init}} Move Step API](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-ilm-move-to-step) against multiple indices.\n \n+If you experience a force merge task queue backlog, you might need to increase the size of the force merge threadpool so indices can be force merged in parallel. To do this, configure the `thread_pool.force_merge.size` [cluster setting](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-cluster-get-settings). This is considered an expert setting override as this can have cascading performance impacts. Monitor cluster performance and increment the size of the thread pool slowly to reduce the backlog.\n \n-Force merging will be performed by the nodes within the current phase of the index. A forcemerge in the `hot` phase will use hot nodes with potentially faster nodes, while impacting ingestion more. A forcemerge in the `warm` phase will use warm nodes and potentially take longer to perform, but without impacting ingestion in the `hot` tier.\n+Force merging will be performed by the node hosting the shard. The [node's role](https://www.elastic.co/docs/deploy-manage/distributed-architecture/clusters-nodes-shards/node-roles#data-node-role) frequently is equal to the [data tier](https://www.elastic.co/docs/manage-data/lifecycle/data-tiers) of the {{ilm-init}}'s phase of the index, but this is not guaranteed. For example, a forcemerge in the \n+* `hot` phase will use hot nodes; however, performing merges on this potentially higher performance hardware may have a trade off of impacting ingestion. \n+* `warm` phase will use warm nodes; however, merges may potentially take longer to perform on less performant hardware but will avoid impacting ingestion in the `hot` tier.\n+* `cold` or `frozen` phase [{{ilm-init}} Searchable Snapshots](https://www.elastic.co/docs/reference/elasticsearch/index-lifecycle-actions/ilm-searchable-snapshot) via `force_merge_index` happens on the preceeding data tier.", + "comment_created_at": "2025-06-30T18:11:49+00:00", + "comment_author": "kilfoyle", + "comment_body": "```suggestion\r\nForce merging will be performed by the node hosting the shard. The [node's role](docs-content://deploy-manage/distributed-architecture/clusters-nodes-shards/node-settings.md#node-roles) frequently matches the [data tier](docs-content://manage-data/lifecycle/data-tiers.md) of the {{ilm-init}}'s phase of the index, but this is not guaranteed. For example: \r\n* A force merge in the `hot` phase will use hot nodes. Merges may be faster on this potentially higher performance hardware but may have the tradeoff of impacting ingestion. \r\n* A force merge in the `warm` phase will use warm nodes. Merges may take longer to perform on potentially lower performance hardware but will avoid impacting ingestion in the `hot` tier.\r\n* A force merge in the `cold` or `frozen` phase [{{ilm-init}} Searchable Snapshots](./ilm-searchable-snapshot) using `force_merge_index` happens on the preceeding data tier.\r\n```\r\n\r\nThe above rephrases things bit and fixes up the links. ", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/elasticsearch-stage-intensive-operations-carefully.md b/_reviewers/elasticsearch-stage-intensive-operations-carefully.md index 657b8eb..d84bf2e 100644 --- a/_reviewers/elasticsearch-stage-intensive-operations-carefully.md +++ b/_reviewers/elasticsearch-stage-intensive-operations-carefully.md @@ -18,97 +18,3 @@ For example: 3. Consider where operations execute and potential trade-offs - operations on high-performance nodes may complete faster but could impact critical workflows like data ingestion This approach helps maintain system stability while optimizing resource-intensive operations. - - -[ - { - "discussion_id": "2175560758", - "pr_number": 130280, - "pr_file": "docs/reference/elasticsearch/index-lifecycle-actions/ilm-forcemerge.md", - "created_at": "2025-06-30T17:23:09+00:00", - "commented_code": "Phases allowed: hot, warm.\n\n[Force merges](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-forcemerge) the index into the specified maximum number of [segments](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-segments).\n\n::::{note}\nShards that are relocating during a `forcemerge` will not be merged.\n::::\n\n[Force merges](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-forcemerge) the index into the specified maximum number of [segments](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-segments). This operation is best effort. For example, shards that are relocating during a `forcemerge` will not be merged.\n\nTo use the `forcemerge` action in the `hot` phase, the `rollover` action **must** be present. If no rollover action is configured, {{ilm-init}} will reject the policy.\n\n:::::{admonition} Performance considerations\n:name: ilm-forcemerge-performance\n\nForce merge is a resource-intensive operation. If too many force merges are triggered at once, it can negatively impact your cluster. This can happen when you apply an {{ilm-init}} policy that includes a force merge action to existing indices. If they meet the `min_age` criteria, they can immediately proceed through multiple phases. You can prevent this by increasing the `min_age` or setting `index.lifecycle.origination_date` to change how the index age is calculated.\n\nIf you experience a force merge task queue backlog, you might need to increase the size of the force merge threadpool so indices can be force merged in parallel. To do this, configure the `thread_pool.force_merge.size` [cluster setting](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-cluster-get-settings).\n\n::::{important}\nThis can have cascading performance impacts. Monitor cluster performance and increment the size of the thread pool slowly to reduce the backlog.\n::::\nForce merge is a resource-intensive operation. If too many force merges are triggered at once, it can negatively impact your cluster. For example, this can happen when you \n* modify an existing {{ilm-init}} policy's phase `min_age` such that indices trigger into force-merging phase faster.", - "repo_full_name": "elastic/elasticsearch", - "discussion_comments": [ - { - "comment_id": "2175560758", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 130280, - "pr_file": "docs/reference/elasticsearch/index-lifecycle-actions/ilm-forcemerge.md", - "discussion_id": "2175560758", - "commented_code": "@@ -7,29 +7,26 @@ mapped_pages:\n \n Phases allowed: hot, warm.\n \n-[Force merges](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-forcemerge) the index into the specified maximum number of [segments](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-segments).\n-\n-::::{note}\n-Shards that are relocating during a `forcemerge` will not be merged.\n-::::\n-\n+[Force merges](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-forcemerge) the index into the specified maximum number of [segments](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-segments). This operation is best effort. For example, shards that are relocating during a `forcemerge` will not be merged.\n \n To use the `forcemerge` action in the `hot` phase, the `rollover` action **must** be present. If no rollover action is configured, {{ilm-init}} will reject the policy.\n \n :::::{admonition} Performance considerations\n :name: ilm-forcemerge-performance\n \n-Force merge is a resource-intensive operation. If too many force merges are triggered at once, it can negatively impact your cluster. This can happen when you apply an {{ilm-init}} policy that includes a force merge action to existing indices. If they meet the `min_age` criteria, they can immediately proceed through multiple phases. You can prevent this by increasing the `min_age` or setting `index.lifecycle.origination_date` to change how the index age is calculated.\n-\n-If you experience a force merge task queue backlog, you might need to increase the size of the force merge threadpool so indices can be force merged in parallel. To do this, configure the `thread_pool.force_merge.size` [cluster setting](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-cluster-get-settings).\n-\n-::::{important}\n-This can have cascading performance impacts. Monitor cluster performance and increment the size of the thread pool slowly to reduce the backlog.\n-::::\n+Force merge is a resource-intensive operation. If too many force merges are triggered at once, it can negatively impact your cluster. For example, this can happen when you \n+* modify an existing {{ilm-init}} policy's phase `min_age` such that indices trigger into force-merging phase faster.", - "comment_created_at": "2025-06-30T17:23:09+00:00", - "comment_author": "kilfoyle", - "comment_body": "```suggestion\r\n* modify an {{ilm-init}} policy to lower the [`min_age`](./ilm-rollover.md#ilm-rollover-options) for one or more phases, causing indices to trigger the force merge at a faster rate.\r\n```\r\n\r\nI think it'd be helpful to link to the docs for this setting. It's probably not obvious that it's a rollover setting rather than an ILM setting.", - "pr_file_module": null - }, - { - "comment_id": "2175889489", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 130280, - "pr_file": "docs/reference/elasticsearch/index-lifecycle-actions/ilm-forcemerge.md", - "discussion_id": "2175560758", - "commented_code": "@@ -7,29 +7,26 @@ mapped_pages:\n \n Phases allowed: hot, warm.\n \n-[Force merges](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-forcemerge) the index into the specified maximum number of [segments](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-segments).\n-\n-::::{note}\n-Shards that are relocating during a `forcemerge` will not be merged.\n-::::\n-\n+[Force merges](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-forcemerge) the index into the specified maximum number of [segments](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-segments). This operation is best effort. For example, shards that are relocating during a `forcemerge` will not be merged.\n \n To use the `forcemerge` action in the `hot` phase, the `rollover` action **must** be present. If no rollover action is configured, {{ilm-init}} will reject the policy.\n \n :::::{admonition} Performance considerations\n :name: ilm-forcemerge-performance\n \n-Force merge is a resource-intensive operation. If too many force merges are triggered at once, it can negatively impact your cluster. This can happen when you apply an {{ilm-init}} policy that includes a force merge action to existing indices. If they meet the `min_age` criteria, they can immediately proceed through multiple phases. You can prevent this by increasing the `min_age` or setting `index.lifecycle.origination_date` to change how the index age is calculated.\n-\n-If you experience a force merge task queue backlog, you might need to increase the size of the force merge threadpool so indices can be force merged in parallel. To do this, configure the `thread_pool.force_merge.size` [cluster setting](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-cluster-get-settings).\n-\n-::::{important}\n-This can have cascading performance impacts. Monitor cluster performance and increment the size of the thread pool slowly to reduce the backlog.\n-::::\n+Force merge is a resource-intensive operation. If too many force merges are triggered at once, it can negatively impact your cluster. For example, this can happen when you \n+* modify an existing {{ilm-init}} policy's phase `min_age` such that indices trigger into force-merging phase faster.", - "comment_created_at": "2025-06-30T20:46:45+00:00", - "comment_author": "stefnestor", - "comment_body": "I had meant a phase's `min_age` and not the `hot.rollover`'s `min_age` setting actually. \r\n\r\nJust updating the rollover `min_age` would potentially not cascade multiple indices immediately, but the example we frequently encounter is \"oh no my hot/warm/cold is filling up disk, let me lower frozen `min_age` to push data to it faster\" **mass** triggers force merge queues and causes heap+cpu performance issues on the data tier you were just trying to resolve disk on \ud83d\ude05 . ", - "pr_file_module": null - }, - { - "comment_id": "2175980577", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 130280, - "pr_file": "docs/reference/elasticsearch/index-lifecycle-actions/ilm-forcemerge.md", - "discussion_id": "2175560758", - "commented_code": "@@ -7,29 +7,26 @@ mapped_pages:\n \n Phases allowed: hot, warm.\n \n-[Force merges](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-forcemerge) the index into the specified maximum number of [segments](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-segments).\n-\n-::::{note}\n-Shards that are relocating during a `forcemerge` will not be merged.\n-::::\n-\n+[Force merges](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-forcemerge) the index into the specified maximum number of [segments](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-segments). This operation is best effort. For example, shards that are relocating during a `forcemerge` will not be merged.\n \n To use the `forcemerge` action in the `hot` phase, the `rollover` action **must** be present. If no rollover action is configured, {{ilm-init}} will reject the policy.\n \n :::::{admonition} Performance considerations\n :name: ilm-forcemerge-performance\n \n-Force merge is a resource-intensive operation. If too many force merges are triggered at once, it can negatively impact your cluster. This can happen when you apply an {{ilm-init}} policy that includes a force merge action to existing indices. If they meet the `min_age` criteria, they can immediately proceed through multiple phases. You can prevent this by increasing the `min_age` or setting `index.lifecycle.origination_date` to change how the index age is calculated.\n-\n-If you experience a force merge task queue backlog, you might need to increase the size of the force merge threadpool so indices can be force merged in parallel. To do this, configure the `thread_pool.force_merge.size` [cluster setting](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-cluster-get-settings).\n-\n-::::{important}\n-This can have cascading performance impacts. Monitor cluster performance and increment the size of the thread pool slowly to reduce the backlog.\n-::::\n+Force merge is a resource-intensive operation. If too many force merges are triggered at once, it can negatively impact your cluster. For example, this can happen when you \n+* modify an existing {{ilm-init}} policy's phase `min_age` such that indices trigger into force-merging phase faster.", - "comment_created_at": "2025-06-30T21:16:55+00:00", - "comment_author": "kilfoyle", - "comment_body": "Ah, okay. Let's not use the link then.\r\n\r\nI see now that `min_age` is also part of the [`/_ilm/policy` API](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-ilm-put-lifecycle). I didn't realize that these are different things.\r\n", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2175578616", - "pr_number": 130280, - "pr_file": "docs/reference/elasticsearch/index-lifecycle-actions/ilm-forcemerge.md", - "created_at": "2025-06-30T17:35:48+00:00", - "commented_code": "Phases allowed: hot, warm.\n\n[Force merges](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-forcemerge) the index into the specified maximum number of [segments](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-segments).\n\n::::{note}\nShards that are relocating during a `forcemerge` will not be merged.\n::::\n\n[Force merges](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-forcemerge) the index into the specified maximum number of [segments](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-segments). This operation is best effort. For example, shards that are relocating during a `forcemerge` will not be merged.\n\nTo use the `forcemerge` action in the `hot` phase, the `rollover` action **must** be present. If no rollover action is configured, {{ilm-init}} will reject the policy.\n\n:::::{admonition} Performance considerations\n:name: ilm-forcemerge-performance\n\nForce merge is a resource-intensive operation. If too many force merges are triggered at once, it can negatively impact your cluster. This can happen when you apply an {{ilm-init}} policy that includes a force merge action to existing indices. If they meet the `min_age` criteria, they can immediately proceed through multiple phases. You can prevent this by increasing the `min_age` or setting `index.lifecycle.origination_date` to change how the index age is calculated.\n\nIf you experience a force merge task queue backlog, you might need to increase the size of the force merge threadpool so indices can be force merged in parallel. To do this, configure the `thread_pool.force_merge.size` [cluster setting](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-cluster-get-settings).\n\n::::{important}\nThis can have cascading performance impacts. Monitor cluster performance and increment the size of the thread pool slowly to reduce the backlog.\n::::\nForce merge is a resource-intensive operation. If too many force merges are triggered at once, it can negatively impact your cluster. For example, this can happen when you \n* modify an existing {{ilm-init}} policy's phase `min_age` such that indices trigger into force-merging phase faster.\n* apply an {{ilm-init}} policy that includes a force merge action to existing indices. If they meet the `min_age` criteria, they can immediately proceed through multiple actions. You can prevent this by increasing the `min_age` or setting `index.lifecycle.origination_date` to change how the index age is calculated.\n* run the [{{ilm-init}} Move Step API](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-ilm-move-to-step) against multiple indices.\n\nIf you experience a force merge task queue backlog, you might need to increase the size of the force merge threadpool so indices can be force merged in parallel. To do this, configure the `thread_pool.force_merge.size` [cluster setting](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-cluster-get-settings). This is considered an expert setting override as this can have cascading performance impacts. Monitor cluster performance and increment the size of the thread pool slowly to reduce the backlog.", - "repo_full_name": "elastic/elasticsearch", - "discussion_comments": [ - { - "comment_id": "2175578616", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 130280, - "pr_file": "docs/reference/elasticsearch/index-lifecycle-actions/ilm-forcemerge.md", - "discussion_id": "2175578616", - "commented_code": "@@ -7,29 +7,26 @@ mapped_pages:\n \n Phases allowed: hot, warm.\n \n-[Force merges](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-forcemerge) the index into the specified maximum number of [segments](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-segments).\n-\n-::::{note}\n-Shards that are relocating during a `forcemerge` will not be merged.\n-::::\n-\n+[Force merges](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-forcemerge) the index into the specified maximum number of [segments](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-segments). This operation is best effort. For example, shards that are relocating during a `forcemerge` will not be merged.\n \n To use the `forcemerge` action in the `hot` phase, the `rollover` action **must** be present. If no rollover action is configured, {{ilm-init}} will reject the policy.\n \n :::::{admonition} Performance considerations\n :name: ilm-forcemerge-performance\n \n-Force merge is a resource-intensive operation. If too many force merges are triggered at once, it can negatively impact your cluster. This can happen when you apply an {{ilm-init}} policy that includes a force merge action to existing indices. If they meet the `min_age` criteria, they can immediately proceed through multiple phases. You can prevent this by increasing the `min_age` or setting `index.lifecycle.origination_date` to change how the index age is calculated.\n-\n-If you experience a force merge task queue backlog, you might need to increase the size of the force merge threadpool so indices can be force merged in parallel. To do this, configure the `thread_pool.force_merge.size` [cluster setting](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-cluster-get-settings).\n-\n-::::{important}\n-This can have cascading performance impacts. Monitor cluster performance and increment the size of the thread pool slowly to reduce the backlog.\n-::::\n+Force merge is a resource-intensive operation. If too many force merges are triggered at once, it can negatively impact your cluster. For example, this can happen when you \n+* modify an existing {{ilm-init}} policy's phase `min_age` such that indices trigger into force-merging phase faster.\n+* apply an {{ilm-init}} policy that includes a force merge action to existing indices. If they meet the `min_age` criteria, they can immediately proceed through multiple actions. You can prevent this by increasing the `min_age` or setting `index.lifecycle.origination_date` to change how the index age is calculated.\n+* run the [{{ilm-init}} Move Step API](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-ilm-move-to-step) against multiple indices.\n \n+If you experience a force merge task queue backlog, you might need to increase the size of the force merge threadpool so indices can be force merged in parallel. To do this, configure the `thread_pool.force_merge.size` [cluster setting](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-cluster-get-settings). This is considered an expert setting override as this can have cascading performance impacts. Monitor cluster performance and increment the size of the thread pool slowly to reduce the backlog.", - "comment_created_at": "2025-06-30T17:35:48+00:00", - "comment_author": "kilfoyle", - "comment_body": "```suggestion\r\nIf you experience a force merge task queue backlog, you might need to increase the size of the force merge threadpool so indices can be force merged in parallel. To do this, configure the `thread_pool.force_merge.size` [cluster setting](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-cluster-get-settings). \r\n\r\n::::{important}\r\nNote that `thread_pool.force_merge.size` is an advanced setting. Adjusting it can cause cascading performance impacts. Monitor cluster performance and increment the size of the thread pool slowly to reduce the backlog.\r\n::::\r\n```\r\nI think it's a good idea to keep this in an \"important\" block just so it's easier for users to notice.\r\n\r\n\"screen25\"\r\n", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2175632315", - "pr_number": 130280, - "pr_file": "docs/reference/elasticsearch/index-lifecycle-actions/ilm-forcemerge.md", - "created_at": "2025-06-30T18:11:49+00:00", - "commented_code": "Phases allowed: hot, warm.\n\n[Force merges](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-forcemerge) the index into the specified maximum number of [segments](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-segments).\n\n::::{note}\nShards that are relocating during a `forcemerge` will not be merged.\n::::\n\n[Force merges](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-forcemerge) the index into the specified maximum number of [segments](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-segments). This operation is best effort. For example, shards that are relocating during a `forcemerge` will not be merged.\n\nTo use the `forcemerge` action in the `hot` phase, the `rollover` action **must** be present. If no rollover action is configured, {{ilm-init}} will reject the policy.\n\n:::::{admonition} Performance considerations\n:name: ilm-forcemerge-performance\n\nForce merge is a resource-intensive operation. If too many force merges are triggered at once, it can negatively impact your cluster. This can happen when you apply an {{ilm-init}} policy that includes a force merge action to existing indices. If they meet the `min_age` criteria, they can immediately proceed through multiple phases. You can prevent this by increasing the `min_age` or setting `index.lifecycle.origination_date` to change how the index age is calculated.\n\nIf you experience a force merge task queue backlog, you might need to increase the size of the force merge threadpool so indices can be force merged in parallel. To do this, configure the `thread_pool.force_merge.size` [cluster setting](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-cluster-get-settings).\n\n::::{important}\nThis can have cascading performance impacts. Monitor cluster performance and increment the size of the thread pool slowly to reduce the backlog.\n::::\nForce merge is a resource-intensive operation. If too many force merges are triggered at once, it can negatively impact your cluster. For example, this can happen when you \n* modify an existing {{ilm-init}} policy's phase `min_age` such that indices trigger into force-merging phase faster.\n* apply an {{ilm-init}} policy that includes a force merge action to existing indices. If they meet the `min_age` criteria, they can immediately proceed through multiple actions. You can prevent this by increasing the `min_age` or setting `index.lifecycle.origination_date` to change how the index age is calculated.\n* run the [{{ilm-init}} Move Step API](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-ilm-move-to-step) against multiple indices.\n\nIf you experience a force merge task queue backlog, you might need to increase the size of the force merge threadpool so indices can be force merged in parallel. To do this, configure the `thread_pool.force_merge.size` [cluster setting](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-cluster-get-settings). This is considered an expert setting override as this can have cascading performance impacts. Monitor cluster performance and increment the size of the thread pool slowly to reduce the backlog.\n\nForce merging will be performed by the nodes within the current phase of the index. A forcemerge in the `hot` phase will use hot nodes with potentially faster nodes, while impacting ingestion more. A forcemerge in the `warm` phase will use warm nodes and potentially take longer to perform, but without impacting ingestion in the `hot` tier.\nForce merging will be performed by the node hosting the shard. The [node's role](https://www.elastic.co/docs/deploy-manage/distributed-architecture/clusters-nodes-shards/node-roles#data-node-role) frequently is equal to the [data tier](https://www.elastic.co/docs/manage-data/lifecycle/data-tiers) of the {{ilm-init}}'s phase of the index, but this is not guaranteed. For example, a forcemerge in the \n* `hot` phase will use hot nodes; however, performing merges on this potentially higher performance hardware may have a trade off of impacting ingestion. \n* `warm` phase will use warm nodes; however, merges may potentially take longer to perform on less performant hardware but will avoid impacting ingestion in the `hot` tier.\n* `cold` or `frozen` phase [{{ilm-init}} Searchable Snapshots](https://www.elastic.co/docs/reference/elasticsearch/index-lifecycle-actions/ilm-searchable-snapshot) via `force_merge_index` happens on the preceeding data tier.", - "repo_full_name": "elastic/elasticsearch", - "discussion_comments": [ - { - "comment_id": "2175632315", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 130280, - "pr_file": "docs/reference/elasticsearch/index-lifecycle-actions/ilm-forcemerge.md", - "discussion_id": "2175632315", - "commented_code": "@@ -7,29 +7,26 @@ mapped_pages:\n \n Phases allowed: hot, warm.\n \n-[Force merges](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-forcemerge) the index into the specified maximum number of [segments](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-segments).\n-\n-::::{note}\n-Shards that are relocating during a `forcemerge` will not be merged.\n-::::\n-\n+[Force merges](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-forcemerge) the index into the specified maximum number of [segments](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-segments). This operation is best effort. For example, shards that are relocating during a `forcemerge` will not be merged.\n \n To use the `forcemerge` action in the `hot` phase, the `rollover` action **must** be present. If no rollover action is configured, {{ilm-init}} will reject the policy.\n \n :::::{admonition} Performance considerations\n :name: ilm-forcemerge-performance\n \n-Force merge is a resource-intensive operation. If too many force merges are triggered at once, it can negatively impact your cluster. This can happen when you apply an {{ilm-init}} policy that includes a force merge action to existing indices. If they meet the `min_age` criteria, they can immediately proceed through multiple phases. You can prevent this by increasing the `min_age` or setting `index.lifecycle.origination_date` to change how the index age is calculated.\n-\n-If you experience a force merge task queue backlog, you might need to increase the size of the force merge threadpool so indices can be force merged in parallel. To do this, configure the `thread_pool.force_merge.size` [cluster setting](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-cluster-get-settings).\n-\n-::::{important}\n-This can have cascading performance impacts. Monitor cluster performance and increment the size of the thread pool slowly to reduce the backlog.\n-::::\n+Force merge is a resource-intensive operation. If too many force merges are triggered at once, it can negatively impact your cluster. For example, this can happen when you \n+* modify an existing {{ilm-init}} policy's phase `min_age` such that indices trigger into force-merging phase faster.\n+* apply an {{ilm-init}} policy that includes a force merge action to existing indices. If they meet the `min_age` criteria, they can immediately proceed through multiple actions. You can prevent this by increasing the `min_age` or setting `index.lifecycle.origination_date` to change how the index age is calculated.\n+* run the [{{ilm-init}} Move Step API](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-ilm-move-to-step) against multiple indices.\n \n+If you experience a force merge task queue backlog, you might need to increase the size of the force merge threadpool so indices can be force merged in parallel. To do this, configure the `thread_pool.force_merge.size` [cluster setting](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-cluster-get-settings). This is considered an expert setting override as this can have cascading performance impacts. Monitor cluster performance and increment the size of the thread pool slowly to reduce the backlog.\n \n-Force merging will be performed by the nodes within the current phase of the index. A forcemerge in the `hot` phase will use hot nodes with potentially faster nodes, while impacting ingestion more. A forcemerge in the `warm` phase will use warm nodes and potentially take longer to perform, but without impacting ingestion in the `hot` tier.\n+Force merging will be performed by the node hosting the shard. The [node's role](https://www.elastic.co/docs/deploy-manage/distributed-architecture/clusters-nodes-shards/node-roles#data-node-role) frequently is equal to the [data tier](https://www.elastic.co/docs/manage-data/lifecycle/data-tiers) of the {{ilm-init}}'s phase of the index, but this is not guaranteed. For example, a forcemerge in the \n+* `hot` phase will use hot nodes; however, performing merges on this potentially higher performance hardware may have a trade off of impacting ingestion. \n+* `warm` phase will use warm nodes; however, merges may potentially take longer to perform on less performant hardware but will avoid impacting ingestion in the `hot` tier.\n+* `cold` or `frozen` phase [{{ilm-init}} Searchable Snapshots](https://www.elastic.co/docs/reference/elasticsearch/index-lifecycle-actions/ilm-searchable-snapshot) via `force_merge_index` happens on the preceeding data tier.", - "comment_created_at": "2025-06-30T18:11:49+00:00", - "comment_author": "kilfoyle", - "comment_body": "```suggestion\r\nForce merging will be performed by the node hosting the shard. The [node's role](docs-content://deploy-manage/distributed-architecture/clusters-nodes-shards/node-settings.md#node-roles) frequently matches the [data tier](docs-content://manage-data/lifecycle/data-tiers.md) of the {{ilm-init}}'s phase of the index, but this is not guaranteed. For example: \r\n* A force merge in the `hot` phase will use hot nodes. Merges may be faster on this potentially higher performance hardware but may have the tradeoff of impacting ingestion. \r\n* A force merge in the `warm` phase will use warm nodes. Merges may take longer to perform on potentially lower performance hardware but will avoid impacting ingestion in the `hot` tier.\r\n* A force merge in the `cold` or `frozen` phase [{{ilm-init}} Searchable Snapshots](./ilm-searchable-snapshot) using `force_merge_index` happens on the preceeding data tier.\r\n```\r\n\r\nThe above rephrases things bit and fixes up the links. ", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/elasticsearch-use-configuration-access-methods.json b/_reviewers/elasticsearch-use-configuration-access-methods.json new file mode 100644 index 0000000..ec7b47b --- /dev/null +++ b/_reviewers/elasticsearch-use-configuration-access-methods.json @@ -0,0 +1,170 @@ +[ + { + "discussion_id": "2171109662", + "pr_number": 129684, + "pr_file": "server/src/main/java/org/elasticsearch/index/IndexService.java", + "created_at": "2025-06-27T07:57:12+00:00", + "commented_code": "mapperMetrics\n );\n this.indexFieldData = new IndexFieldDataService(indexSettings, indicesFieldDataCache, circuitBreakerService);\n boolean sourceOnly = Boolean.parseBoolean(indexSettings.getSettings().get(\"index.source_only\"));\n boolean sourceOnly = isSourceOnly(indexSettings);", + "repo_full_name": "elastic/elasticsearch", + "discussion_comments": [ + { + "comment_id": "2171109662", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 129684, + "pr_file": "server/src/main/java/org/elasticsearch/index/IndexService.java", + "discussion_id": "2171109662", + "commented_code": "@@ -246,7 +247,7 @@ public IndexService(\n mapperMetrics\n );\n this.indexFieldData = new IndexFieldDataService(indexSettings, indicesFieldDataCache, circuitBreakerService);\n- boolean sourceOnly = Boolean.parseBoolean(indexSettings.getSettings().get(\"index.source_only\"));\n+ boolean sourceOnly = isSourceOnly(indexSettings);", + "comment_created_at": "2025-06-27T07:57:12+00:00", + "comment_author": "mosche", + "comment_body": "I had a closer look at this, this can actually be \r\n```suggestion\r\n boolean sourceOnly = indexSettings.getAsBoolean(\"index.source_only\", false);\r\n```", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2171136090", + "pr_number": 129684, + "pr_file": "server/src/main/java/org/elasticsearch/action/admin/indices/resolve/ResolveIndexAction.java", + "created_at": "2025-06-27T08:05:11+00:00", + "commented_code": "if (ia.isSystem()) {\n attributes.add(Attribute.SYSTEM);\n }\n final boolean isFrozen = Boolean.parseBoolean(writeIndex.getSettings().get(\"index.frozen\"));\n final boolean isFrozen = isFrozen(writeIndex);", + "repo_full_name": "elastic/elasticsearch", + "discussion_comments": [ + { + "comment_id": "2171136090", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 129684, + "pr_file": "server/src/main/java/org/elasticsearch/action/admin/indices/resolve/ResolveIndexAction.java", + "discussion_id": "2171136090", + "commented_code": "@@ -629,7 +630,7 @@ private static void enrichIndexAbstraction(\n if (ia.isSystem()) {\n attributes.add(Attribute.SYSTEM);\n }\n- final boolean isFrozen = Boolean.parseBoolean(writeIndex.getSettings().get(\"index.frozen\"));\n+ final boolean isFrozen = isFrozen(writeIndex);", + "comment_created_at": "2025-06-27T08:05:11+00:00", + "comment_author": "mosche", + "comment_body": "I had a closer look at this, this can actually be \r\n```suggestion\r\n final boolean isFrozen = writeIndex.getSettings().getAsBoolean(\"index.frozen\", false);\r\n```", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2174997550", + "pr_number": 129684, + "pr_file": "x-pack/qa/kerberos-tests/src/javaRestTest/java/org/elasticsearch/xpack/security/authc/kerberos/KerberosAuthenticationIT.java", + "created_at": "2025-06-30T12:49:40+00:00", + "commented_code": "public void testLoginByKeytab() throws IOException, PrivilegedActionException {\n final String keytabPath = krb5Fixture.getKeytab().toString();\n final boolean enabledDebugLogs = Boolean.parseBoolean(ENABLE_KERBEROS_DEBUG_LOGS_KEY);\n final boolean enabledDebugLogs = Booleans.parseBoolean(ENABLE_KERBEROS_DEBUG_LOGS_KEY);", + "repo_full_name": "elastic/elasticsearch", + "discussion_comments": [ + { + "comment_id": "2174997550", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 129684, + "pr_file": "x-pack/qa/kerberos-tests/src/javaRestTest/java/org/elasticsearch/xpack/security/authc/kerberos/KerberosAuthenticationIT.java", + "discussion_id": "2174997550", + "commented_code": "@@ -141,7 +142,7 @@ public void setupRoleMapping() throws IOException {\n \n public void testLoginByKeytab() throws IOException, PrivilegedActionException {\n final String keytabPath = krb5Fixture.getKeytab().toString();\n- final boolean enabledDebugLogs = Boolean.parseBoolean(ENABLE_KERBEROS_DEBUG_LOGS_KEY);\n+ final boolean enabledDebugLogs = Booleans.parseBoolean(ENABLE_KERBEROS_DEBUG_LOGS_KEY);", + "comment_created_at": "2025-06-30T12:49:40+00:00", + "comment_author": "mosche", + "comment_body": "```suggestion\r\n final boolean enabledDebugLogs = Booleans.parseBoolean(System.getProperty(ENABLE_KERBEROS_DEBUG_LOGS_KEY), false);\r\n```", + "pr_file_module": null + }, + { + "comment_id": "2174999003", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 129684, + "pr_file": "x-pack/qa/kerberos-tests/src/javaRestTest/java/org/elasticsearch/xpack/security/authc/kerberos/KerberosAuthenticationIT.java", + "discussion_id": "2174997550", + "commented_code": "@@ -141,7 +142,7 @@ public void setupRoleMapping() throws IOException {\n \n public void testLoginByKeytab() throws IOException, PrivilegedActionException {\n final String keytabPath = krb5Fixture.getKeytab().toString();\n- final boolean enabledDebugLogs = Boolean.parseBoolean(ENABLE_KERBEROS_DEBUG_LOGS_KEY);\n+ final boolean enabledDebugLogs = Booleans.parseBoolean(ENABLE_KERBEROS_DEBUG_LOGS_KEY);", + "comment_created_at": "2025-06-30T12:50:25+00:00", + "comment_author": "mosche", + "comment_body": "The tests below also need to default to `false`", + "pr_file_module": null + }, + { + "comment_id": "2176671414", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 129684, + "pr_file": "x-pack/qa/kerberos-tests/src/javaRestTest/java/org/elasticsearch/xpack/security/authc/kerberos/KerberosAuthenticationIT.java", + "discussion_id": "2174997550", + "commented_code": "@@ -141,7 +142,7 @@ public void setupRoleMapping() throws IOException {\n \n public void testLoginByKeytab() throws IOException, PrivilegedActionException {\n final String keytabPath = krb5Fixture.getKeytab().toString();\n- final boolean enabledDebugLogs = Boolean.parseBoolean(ENABLE_KERBEROS_DEBUG_LOGS_KEY);\n+ final boolean enabledDebugLogs = Booleans.parseBoolean(ENABLE_KERBEROS_DEBUG_LOGS_KEY);", + "comment_created_at": "2025-07-01T07:40:01+00:00", + "comment_author": "M12b4", + "comment_body": "Why", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2138277505", + "pr_number": 129128, + "pr_file": "server/src/main/java/org/elasticsearch/action/admin/indices/readonly/RemoveIndexBlockRequest.java", + "created_at": "2025-06-10T15:59:54+00:00", + "commented_code": "/*\n * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one\n * or more contributor license agreements. Licensed under the \"Elastic License\n * 2.0\", the \"GNU Affero General Public License v3.0 only\", and the \"Server Side\n * Public License v 1\"; you may not use this file except in compliance with, at\n * your election, the \"Elastic License 2.0\", the \"GNU Affero General Public\n * License v3.0 only\", or the \"Server Side Public License, v 1\".\n */\n\npackage org.elasticsearch.action.admin.indices.readonly;\n\nimport org.elasticsearch.action.ActionRequestValidationException;\nimport org.elasticsearch.action.IndicesRequest;\nimport org.elasticsearch.action.support.IndicesOptions;\nimport org.elasticsearch.action.support.master.AcknowledgedRequest;\nimport org.elasticsearch.cluster.metadata.IndexMetadata.APIBlock;\nimport org.elasticsearch.common.io.stream.StreamInput;\nimport org.elasticsearch.common.io.stream.StreamOutput;\nimport org.elasticsearch.common.util.CollectionUtils;\n\nimport java.io.IOException;\nimport java.util.Arrays;\nimport java.util.Objects;\n\nimport static org.elasticsearch.action.ValidateActions.addValidationError;\n\n/**\n * A request to remove a block from an index.\n */\npublic class RemoveIndexBlockRequest extends AcknowledgedRequest implements IndicesRequest.Replaceable {\n\n private final APIBlock block;\n private String[] indices;\n private IndicesOptions indicesOptions = IndicesOptions.strictExpandOpen();\n\n public RemoveIndexBlockRequest(StreamInput in) throws IOException {\n super(in);\n indices = in.readStringArray();\n indicesOptions = IndicesOptions.readIndicesOptions(in);\n block = APIBlock.readFrom(in);\n }\n\n /**\n * Constructs a new request for the specified block and indices\n */\n public RemoveIndexBlockRequest(APIBlock block, String... indices) {\n super(TRAPPY_IMPLICIT_DEFAULT_MASTER_NODE_TIMEOUT, DEFAULT_ACK_TIMEOUT);", + "repo_full_name": "elastic/elasticsearch", + "discussion_comments": [ + { + "comment_id": "2138277505", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 129128, + "pr_file": "server/src/main/java/org/elasticsearch/action/admin/indices/readonly/RemoveIndexBlockRequest.java", + "discussion_id": "2138277505", + "commented_code": "@@ -0,0 +1,140 @@\n+/*\n+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one\n+ * or more contributor license agreements. Licensed under the \"Elastic License\n+ * 2.0\", the \"GNU Affero General Public License v3.0 only\", and the \"Server Side\n+ * Public License v 1\"; you may not use this file except in compliance with, at\n+ * your election, the \"Elastic License 2.0\", the \"GNU Affero General Public\n+ * License v3.0 only\", or the \"Server Side Public License, v 1\".\n+ */\n+\n+package org.elasticsearch.action.admin.indices.readonly;\n+\n+import org.elasticsearch.action.ActionRequestValidationException;\n+import org.elasticsearch.action.IndicesRequest;\n+import org.elasticsearch.action.support.IndicesOptions;\n+import org.elasticsearch.action.support.master.AcknowledgedRequest;\n+import org.elasticsearch.cluster.metadata.IndexMetadata.APIBlock;\n+import org.elasticsearch.common.io.stream.StreamInput;\n+import org.elasticsearch.common.io.stream.StreamOutput;\n+import org.elasticsearch.common.util.CollectionUtils;\n+\n+import java.io.IOException;\n+import java.util.Arrays;\n+import java.util.Objects;\n+\n+import static org.elasticsearch.action.ValidateActions.addValidationError;\n+\n+/**\n+ * A request to remove a block from an index.\n+ */\n+public class RemoveIndexBlockRequest extends AcknowledgedRequest implements IndicesRequest.Replaceable {\n+\n+ private final APIBlock block;\n+ private String[] indices;\n+ private IndicesOptions indicesOptions = IndicesOptions.strictExpandOpen();\n+\n+ public RemoveIndexBlockRequest(StreamInput in) throws IOException {\n+ super(in);\n+ indices = in.readStringArray();\n+ indicesOptions = IndicesOptions.readIndicesOptions(in);\n+ block = APIBlock.readFrom(in);\n+ }\n+\n+ /**\n+ * Constructs a new request for the specified block and indices\n+ */\n+ public RemoveIndexBlockRequest(APIBlock block, String... indices) {\n+ super(TRAPPY_IMPLICIT_DEFAULT_MASTER_NODE_TIMEOUT, DEFAULT_ACK_TIMEOUT);", + "comment_created_at": "2025-06-10T15:59:54+00:00", + "comment_author": "nielsbauman", + "comment_body": "I know this is what the add block request does, but we shouldn't use the implicit default master node timeout here. \r\n```suggestion\r\n public RemoveIndexBlockRequest(TimeValue masterTimeout, TimeValue ackTimeout, APIBlock block, String... indices) {\r\n super(masterTimeout, ackTimeout);\r\n```\r\nYou'll have to propagate this change to the callers. In the REST class, you can simply pass the two timeouts to the constructor instead of using the setters. In the `RemoveIndexBlockRequestBuilder`, you'll have to propagate the two parameters as well. In all the usages in tests, you can use `TEST_REQUEST_TIMEOUT` for both timeouts.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2167359107", + "pr_number": 130033, + "pr_file": "x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/huggingface/rerank/HuggingFaceRerankServiceSettings.java", + "created_at": "2025-06-25T18:34:59+00:00", + "commented_code": "return TransportVersions.ML_INFERENCE_HUGGING_FACE_RERANK_ADDED;\n }\n\n @Override\n public boolean supportsVersion(TransportVersion version) {\n return ServiceSettings.super.supportsVersion(version)", + "repo_full_name": "elastic/elasticsearch", + "discussion_comments": [ + { + "comment_id": "2167359107", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 130033, + "pr_file": "x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/huggingface/rerank/HuggingFaceRerankServiceSettings.java", + "discussion_id": "2167359107", + "commented_code": "@@ -118,6 +118,12 @@ public TransportVersion getMinimalSupportedVersion() {\n return TransportVersions.ML_INFERENCE_HUGGING_FACE_RERANK_ADDED;\n }\n \n+ @Override\n+ public boolean supportsVersion(TransportVersion version) {\n+ return ServiceSettings.super.supportsVersion(version)", + "comment_created_at": "2025-06-25T18:34:59+00:00", + "comment_author": "nik9000", + "comment_body": "I think it'd be more clear to do `version.onOrAfter(ML_INFERENCE_HUGGING_FACE_RERANK_ADDED) || version.isPatchFrom(ML_INFERENCE_HUGGING_FACE_RERANK_ADDED_8_19)`. Something like that.", + "pr_file_module": null + }, + { + "comment_id": "2167360875", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 130033, + "pr_file": "x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/huggingface/rerank/HuggingFaceRerankServiceSettings.java", + "discussion_id": "2167359107", + "commented_code": "@@ -118,6 +118,12 @@ public TransportVersion getMinimalSupportedVersion() {\n return TransportVersions.ML_INFERENCE_HUGGING_FACE_RERANK_ADDED;\n }\n \n+ @Override\n+ public boolean supportsVersion(TransportVersion version) {\n+ return ServiceSettings.super.supportsVersion(version)", + "comment_created_at": "2025-06-25T18:36:01+00:00", + "comment_author": "nik9000", + "comment_body": "It's the same thing - but it's easier to scan it and make *sure* it's the same thing.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "967029501", + "pr_number": 89112, + "pr_file": "server/src/main/java/org/elasticsearch/index/analysis/AnalyzerComponents.java", + "created_at": "2022-09-09T12:44:59+00:00", + "commented_code": "final Map charFilters,\n final Map tokenFilters\n ) {\n for (String key : analyzerSettings.keySet()) {\n switch (key) {\n case \"tokenizer\", \"char_filter\", \"filter\", \"type\", \"position_increment_gap\" -> {}", + "repo_full_name": "elastic/elasticsearch", + "discussion_comments": [ + { + "comment_id": "967029501", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 89112, + "pr_file": "server/src/main/java/org/elasticsearch/index/analysis/AnalyzerComponents.java", + "discussion_id": "967029501", + "commented_code": "@@ -44,6 +44,12 @@ static AnalyzerComponents createComponents(\n final Map charFilters,\n final Map tokenFilters\n ) {\n+ for (String key : analyzerSettings.keySet()) {\n+ switch (key) {\n+ case \"tokenizer\", \"char_filter\", \"filter\", \"type\", \"position_increment_gap\" -> {}", + "comment_created_at": "2022-09-09T12:44:59+00:00", + "comment_author": "javanna", + "comment_body": "another way of doing this, without deduplicating the expected analyzer keys, would be to duplicate the settings into a mutable map, which we'd remove from every time we read a certain setting. At the end, if any item is left in the map it means that some unsupported param has been provided.", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/elasticsearch-use-configuration-access-methods.md b/_reviewers/elasticsearch-use-configuration-access-methods.md index 6b99c30..3ee94b8 100644 --- a/_reviewers/elasticsearch-use-configuration-access-methods.md +++ b/_reviewers/elasticsearch-use-configuration-access-methods.md @@ -34,175 +34,3 @@ boolean enabledDebugLogs = Booleans.parseBoolean(System.getProperty(ENABLE_KERBE Similarly, when exposing configuration parameters in APIs, avoid implicit defaults and require explicit parameters. When checking compatibility or validating configurations, use clear and readable expressions that make the intent obvious. This practice promotes consistency, makes default values explicit, and leverages the built-in validation provided by the configuration framework. - - -[ - { - "discussion_id": "2171109662", - "pr_number": 129684, - "pr_file": "server/src/main/java/org/elasticsearch/index/IndexService.java", - "created_at": "2025-06-27T07:57:12+00:00", - "commented_code": "mapperMetrics\n );\n this.indexFieldData = new IndexFieldDataService(indexSettings, indicesFieldDataCache, circuitBreakerService);\n boolean sourceOnly = Boolean.parseBoolean(indexSettings.getSettings().get(\"index.source_only\"));\n boolean sourceOnly = isSourceOnly(indexSettings);", - "repo_full_name": "elastic/elasticsearch", - "discussion_comments": [ - { - "comment_id": "2171109662", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 129684, - "pr_file": "server/src/main/java/org/elasticsearch/index/IndexService.java", - "discussion_id": "2171109662", - "commented_code": "@@ -246,7 +247,7 @@ public IndexService(\n mapperMetrics\n );\n this.indexFieldData = new IndexFieldDataService(indexSettings, indicesFieldDataCache, circuitBreakerService);\n- boolean sourceOnly = Boolean.parseBoolean(indexSettings.getSettings().get(\"index.source_only\"));\n+ boolean sourceOnly = isSourceOnly(indexSettings);", - "comment_created_at": "2025-06-27T07:57:12+00:00", - "comment_author": "mosche", - "comment_body": "I had a closer look at this, this can actually be \r\n```suggestion\r\n boolean sourceOnly = indexSettings.getAsBoolean(\"index.source_only\", false);\r\n```", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2171136090", - "pr_number": 129684, - "pr_file": "server/src/main/java/org/elasticsearch/action/admin/indices/resolve/ResolveIndexAction.java", - "created_at": "2025-06-27T08:05:11+00:00", - "commented_code": "if (ia.isSystem()) {\n attributes.add(Attribute.SYSTEM);\n }\n final boolean isFrozen = Boolean.parseBoolean(writeIndex.getSettings().get(\"index.frozen\"));\n final boolean isFrozen = isFrozen(writeIndex);", - "repo_full_name": "elastic/elasticsearch", - "discussion_comments": [ - { - "comment_id": "2171136090", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 129684, - "pr_file": "server/src/main/java/org/elasticsearch/action/admin/indices/resolve/ResolveIndexAction.java", - "discussion_id": "2171136090", - "commented_code": "@@ -629,7 +630,7 @@ private static void enrichIndexAbstraction(\n if (ia.isSystem()) {\n attributes.add(Attribute.SYSTEM);\n }\n- final boolean isFrozen = Boolean.parseBoolean(writeIndex.getSettings().get(\"index.frozen\"));\n+ final boolean isFrozen = isFrozen(writeIndex);", - "comment_created_at": "2025-06-27T08:05:11+00:00", - "comment_author": "mosche", - "comment_body": "I had a closer look at this, this can actually be \r\n```suggestion\r\n final boolean isFrozen = writeIndex.getSettings().getAsBoolean(\"index.frozen\", false);\r\n```", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2174997550", - "pr_number": 129684, - "pr_file": "x-pack/qa/kerberos-tests/src/javaRestTest/java/org/elasticsearch/xpack/security/authc/kerberos/KerberosAuthenticationIT.java", - "created_at": "2025-06-30T12:49:40+00:00", - "commented_code": "public void testLoginByKeytab() throws IOException, PrivilegedActionException {\n final String keytabPath = krb5Fixture.getKeytab().toString();\n final boolean enabledDebugLogs = Boolean.parseBoolean(ENABLE_KERBEROS_DEBUG_LOGS_KEY);\n final boolean enabledDebugLogs = Booleans.parseBoolean(ENABLE_KERBEROS_DEBUG_LOGS_KEY);", - "repo_full_name": "elastic/elasticsearch", - "discussion_comments": [ - { - "comment_id": "2174997550", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 129684, - "pr_file": "x-pack/qa/kerberos-tests/src/javaRestTest/java/org/elasticsearch/xpack/security/authc/kerberos/KerberosAuthenticationIT.java", - "discussion_id": "2174997550", - "commented_code": "@@ -141,7 +142,7 @@ public void setupRoleMapping() throws IOException {\n \n public void testLoginByKeytab() throws IOException, PrivilegedActionException {\n final String keytabPath = krb5Fixture.getKeytab().toString();\n- final boolean enabledDebugLogs = Boolean.parseBoolean(ENABLE_KERBEROS_DEBUG_LOGS_KEY);\n+ final boolean enabledDebugLogs = Booleans.parseBoolean(ENABLE_KERBEROS_DEBUG_LOGS_KEY);", - "comment_created_at": "2025-06-30T12:49:40+00:00", - "comment_author": "mosche", - "comment_body": "```suggestion\r\n final boolean enabledDebugLogs = Booleans.parseBoolean(System.getProperty(ENABLE_KERBEROS_DEBUG_LOGS_KEY), false);\r\n```", - "pr_file_module": null - }, - { - "comment_id": "2174999003", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 129684, - "pr_file": "x-pack/qa/kerberos-tests/src/javaRestTest/java/org/elasticsearch/xpack/security/authc/kerberos/KerberosAuthenticationIT.java", - "discussion_id": "2174997550", - "commented_code": "@@ -141,7 +142,7 @@ public void setupRoleMapping() throws IOException {\n \n public void testLoginByKeytab() throws IOException, PrivilegedActionException {\n final String keytabPath = krb5Fixture.getKeytab().toString();\n- final boolean enabledDebugLogs = Boolean.parseBoolean(ENABLE_KERBEROS_DEBUG_LOGS_KEY);\n+ final boolean enabledDebugLogs = Booleans.parseBoolean(ENABLE_KERBEROS_DEBUG_LOGS_KEY);", - "comment_created_at": "2025-06-30T12:50:25+00:00", - "comment_author": "mosche", - "comment_body": "The tests below also need to default to `false`", - "pr_file_module": null - }, - { - "comment_id": "2176671414", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 129684, - "pr_file": "x-pack/qa/kerberos-tests/src/javaRestTest/java/org/elasticsearch/xpack/security/authc/kerberos/KerberosAuthenticationIT.java", - "discussion_id": "2174997550", - "commented_code": "@@ -141,7 +142,7 @@ public void setupRoleMapping() throws IOException {\n \n public void testLoginByKeytab() throws IOException, PrivilegedActionException {\n final String keytabPath = krb5Fixture.getKeytab().toString();\n- final boolean enabledDebugLogs = Boolean.parseBoolean(ENABLE_KERBEROS_DEBUG_LOGS_KEY);\n+ final boolean enabledDebugLogs = Booleans.parseBoolean(ENABLE_KERBEROS_DEBUG_LOGS_KEY);", - "comment_created_at": "2025-07-01T07:40:01+00:00", - "comment_author": "M12b4", - "comment_body": "Why", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2138277505", - "pr_number": 129128, - "pr_file": "server/src/main/java/org/elasticsearch/action/admin/indices/readonly/RemoveIndexBlockRequest.java", - "created_at": "2025-06-10T15:59:54+00:00", - "commented_code": "/*\n * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one\n * or more contributor license agreements. Licensed under the \"Elastic License\n * 2.0\", the \"GNU Affero General Public License v3.0 only\", and the \"Server Side\n * Public License v 1\"; you may not use this file except in compliance with, at\n * your election, the \"Elastic License 2.0\", the \"GNU Affero General Public\n * License v3.0 only\", or the \"Server Side Public License, v 1\".\n */\n\npackage org.elasticsearch.action.admin.indices.readonly;\n\nimport org.elasticsearch.action.ActionRequestValidationException;\nimport org.elasticsearch.action.IndicesRequest;\nimport org.elasticsearch.action.support.IndicesOptions;\nimport org.elasticsearch.action.support.master.AcknowledgedRequest;\nimport org.elasticsearch.cluster.metadata.IndexMetadata.APIBlock;\nimport org.elasticsearch.common.io.stream.StreamInput;\nimport org.elasticsearch.common.io.stream.StreamOutput;\nimport org.elasticsearch.common.util.CollectionUtils;\n\nimport java.io.IOException;\nimport java.util.Arrays;\nimport java.util.Objects;\n\nimport static org.elasticsearch.action.ValidateActions.addValidationError;\n\n/**\n * A request to remove a block from an index.\n */\npublic class RemoveIndexBlockRequest extends AcknowledgedRequest implements IndicesRequest.Replaceable {\n\n private final APIBlock block;\n private String[] indices;\n private IndicesOptions indicesOptions = IndicesOptions.strictExpandOpen();\n\n public RemoveIndexBlockRequest(StreamInput in) throws IOException {\n super(in);\n indices = in.readStringArray();\n indicesOptions = IndicesOptions.readIndicesOptions(in);\n block = APIBlock.readFrom(in);\n }\n\n /**\n * Constructs a new request for the specified block and indices\n */\n public RemoveIndexBlockRequest(APIBlock block, String... indices) {\n super(TRAPPY_IMPLICIT_DEFAULT_MASTER_NODE_TIMEOUT, DEFAULT_ACK_TIMEOUT);", - "repo_full_name": "elastic/elasticsearch", - "discussion_comments": [ - { - "comment_id": "2138277505", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 129128, - "pr_file": "server/src/main/java/org/elasticsearch/action/admin/indices/readonly/RemoveIndexBlockRequest.java", - "discussion_id": "2138277505", - "commented_code": "@@ -0,0 +1,140 @@\n+/*\n+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one\n+ * or more contributor license agreements. Licensed under the \"Elastic License\n+ * 2.0\", the \"GNU Affero General Public License v3.0 only\", and the \"Server Side\n+ * Public License v 1\"; you may not use this file except in compliance with, at\n+ * your election, the \"Elastic License 2.0\", the \"GNU Affero General Public\n+ * License v3.0 only\", or the \"Server Side Public License, v 1\".\n+ */\n+\n+package org.elasticsearch.action.admin.indices.readonly;\n+\n+import org.elasticsearch.action.ActionRequestValidationException;\n+import org.elasticsearch.action.IndicesRequest;\n+import org.elasticsearch.action.support.IndicesOptions;\n+import org.elasticsearch.action.support.master.AcknowledgedRequest;\n+import org.elasticsearch.cluster.metadata.IndexMetadata.APIBlock;\n+import org.elasticsearch.common.io.stream.StreamInput;\n+import org.elasticsearch.common.io.stream.StreamOutput;\n+import org.elasticsearch.common.util.CollectionUtils;\n+\n+import java.io.IOException;\n+import java.util.Arrays;\n+import java.util.Objects;\n+\n+import static org.elasticsearch.action.ValidateActions.addValidationError;\n+\n+/**\n+ * A request to remove a block from an index.\n+ */\n+public class RemoveIndexBlockRequest extends AcknowledgedRequest implements IndicesRequest.Replaceable {\n+\n+ private final APIBlock block;\n+ private String[] indices;\n+ private IndicesOptions indicesOptions = IndicesOptions.strictExpandOpen();\n+\n+ public RemoveIndexBlockRequest(StreamInput in) throws IOException {\n+ super(in);\n+ indices = in.readStringArray();\n+ indicesOptions = IndicesOptions.readIndicesOptions(in);\n+ block = APIBlock.readFrom(in);\n+ }\n+\n+ /**\n+ * Constructs a new request for the specified block and indices\n+ */\n+ public RemoveIndexBlockRequest(APIBlock block, String... indices) {\n+ super(TRAPPY_IMPLICIT_DEFAULT_MASTER_NODE_TIMEOUT, DEFAULT_ACK_TIMEOUT);", - "comment_created_at": "2025-06-10T15:59:54+00:00", - "comment_author": "nielsbauman", - "comment_body": "I know this is what the add block request does, but we shouldn't use the implicit default master node timeout here. \r\n```suggestion\r\n public RemoveIndexBlockRequest(TimeValue masterTimeout, TimeValue ackTimeout, APIBlock block, String... indices) {\r\n super(masterTimeout, ackTimeout);\r\n```\r\nYou'll have to propagate this change to the callers. In the REST class, you can simply pass the two timeouts to the constructor instead of using the setters. In the `RemoveIndexBlockRequestBuilder`, you'll have to propagate the two parameters as well. In all the usages in tests, you can use `TEST_REQUEST_TIMEOUT` for both timeouts.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2167359107", - "pr_number": 130033, - "pr_file": "x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/huggingface/rerank/HuggingFaceRerankServiceSettings.java", - "created_at": "2025-06-25T18:34:59+00:00", - "commented_code": "return TransportVersions.ML_INFERENCE_HUGGING_FACE_RERANK_ADDED;\n }\n\n @Override\n public boolean supportsVersion(TransportVersion version) {\n return ServiceSettings.super.supportsVersion(version)", - "repo_full_name": "elastic/elasticsearch", - "discussion_comments": [ - { - "comment_id": "2167359107", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 130033, - "pr_file": "x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/huggingface/rerank/HuggingFaceRerankServiceSettings.java", - "discussion_id": "2167359107", - "commented_code": "@@ -118,6 +118,12 @@ public TransportVersion getMinimalSupportedVersion() {\n return TransportVersions.ML_INFERENCE_HUGGING_FACE_RERANK_ADDED;\n }\n \n+ @Override\n+ public boolean supportsVersion(TransportVersion version) {\n+ return ServiceSettings.super.supportsVersion(version)", - "comment_created_at": "2025-06-25T18:34:59+00:00", - "comment_author": "nik9000", - "comment_body": "I think it'd be more clear to do `version.onOrAfter(ML_INFERENCE_HUGGING_FACE_RERANK_ADDED) || version.isPatchFrom(ML_INFERENCE_HUGGING_FACE_RERANK_ADDED_8_19)`. Something like that.", - "pr_file_module": null - }, - { - "comment_id": "2167360875", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 130033, - "pr_file": "x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/huggingface/rerank/HuggingFaceRerankServiceSettings.java", - "discussion_id": "2167359107", - "commented_code": "@@ -118,6 +118,12 @@ public TransportVersion getMinimalSupportedVersion() {\n return TransportVersions.ML_INFERENCE_HUGGING_FACE_RERANK_ADDED;\n }\n \n+ @Override\n+ public boolean supportsVersion(TransportVersion version) {\n+ return ServiceSettings.super.supportsVersion(version)", - "comment_created_at": "2025-06-25T18:36:01+00:00", - "comment_author": "nik9000", - "comment_body": "It's the same thing - but it's easier to scan it and make *sure* it's the same thing.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "967029501", - "pr_number": 89112, - "pr_file": "server/src/main/java/org/elasticsearch/index/analysis/AnalyzerComponents.java", - "created_at": "2022-09-09T12:44:59+00:00", - "commented_code": "final Map charFilters,\n final Map tokenFilters\n ) {\n for (String key : analyzerSettings.keySet()) {\n switch (key) {\n case \"tokenizer\", \"char_filter\", \"filter\", \"type\", \"position_increment_gap\" -> {}", - "repo_full_name": "elastic/elasticsearch", - "discussion_comments": [ - { - "comment_id": "967029501", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 89112, - "pr_file": "server/src/main/java/org/elasticsearch/index/analysis/AnalyzerComponents.java", - "discussion_id": "967029501", - "commented_code": "@@ -44,6 +44,12 @@ static AnalyzerComponents createComponents(\n final Map charFilters,\n final Map tokenFilters\n ) {\n+ for (String key : analyzerSettings.keySet()) {\n+ switch (key) {\n+ case \"tokenizer\", \"char_filter\", \"filter\", \"type\", \"position_increment_gap\" -> {}", - "comment_created_at": "2022-09-09T12:44:59+00:00", - "comment_author": "javanna", - "comment_body": "another way of doing this, without deduplicating the expected analyzer keys, would be to duplicate the settings into a mutable map, which we'd remove from every time we read a certain setting. At the end, if any item is left in the map it means that some unsupported param has been provided.", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/elasticsearch-use-current-configuration-patterns.json b/_reviewers/elasticsearch-use-current-configuration-patterns.json new file mode 100644 index 0000000..bf88f1f --- /dev/null +++ b/_reviewers/elasticsearch-use-current-configuration-patterns.json @@ -0,0 +1,82 @@ +[ + { + "discussion_id": "2144994545", + "pr_number": 129128, + "pr_file": "rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/indices.blocks/10_basic.yml", + "created_at": "2025-06-13T12:30:00+00:00", + "commented_code": "index: test_index\n body:\n index.blocks.write: false\n\n---\n\"Test removing block via remove_block API\":\n - requires:\n cluster_features: [\"gte_v9.1.0\"]\n reason: \"index block APIs have only been made available in 9.1.0\"", + "repo_full_name": "elastic/elasticsearch", + "discussion_comments": [ + { + "comment_id": "2144994545", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 129128, + "pr_file": "rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/indices.blocks/10_basic.yml", + "discussion_id": "2144994545", + "commented_code": "@@ -28,3 +28,43 @@\n index: test_index\n body:\n index.blocks.write: false\n+\n+---\n+\"Test removing block via remove_block API\":\n+ - requires:\n+ cluster_features: [\"gte_v9.1.0\"]\n+ reason: \"index block APIs have only been made available in 9.1.0\"", + "comment_created_at": "2025-06-13T12:30:00+00:00", + "comment_author": "nielsbauman", + "comment_body": "Skipping tests based on cluster versions is no longer supported.\r\n```suggestion\r\n test_runner_features: [capabilities]\r\n reason: \"index block APIs have only been made available in 9.1.0\"\r\n capabilities:\r\n - method: DELETE\r\n path: /{index}/_block/{block}\r\n\r\n```", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1973437329", + "pr_number": 123599, + "pr_file": "modules/reindex/src/main/plugin-metadata/entitlement-policy.yaml", + "created_at": "2025-02-27T11:58:07+00:00", + "commented_code": "ALL-UNNAMED:\n - manage_threads\n - outbound_network\n - create_class_loader # needed for Painless to generate runtime classes\n - files:\n - relative_path: \"\"\n - relative_path_setting: \"reindex.ssl.certificate\"", + "repo_full_name": "elastic/elasticsearch", + "discussion_comments": [ + { + "comment_id": "1973437329", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 123599, + "pr_file": "modules/reindex/src/main/plugin-metadata/entitlement-policy.yaml", + "discussion_id": "1973437329", + "commented_code": "@@ -1,7 +1,20 @@\n ALL-UNNAMED:\n - manage_threads\n - outbound_network\n+ - create_class_loader # needed for Painless to generate runtime classes\n - files:\n- - relative_path: \"\"\n+ - relative_path_setting: \"reindex.ssl.certificate\"", + "comment_created_at": "2025-02-27T11:58:07+00:00", + "comment_author": "ldematte", + "comment_body": "@rjernst I have no idea if I'm using this correctly, as this is the first time we use it I think.\r\nWill this work with exclusive access?\r\nCan I use a glob (reindex.ssl.*) instead?\r\n`certificate_authorities` is a list; will that work with your file settings implementation?", + "pr_file_module": null + }, + { + "comment_id": "1974562173", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 123599, + "pr_file": "modules/reindex/src/main/plugin-metadata/entitlement-policy.yaml", + "discussion_id": "1973437329", + "commented_code": "@@ -1,7 +1,20 @@\n ALL-UNNAMED:\n - manage_threads\n - outbound_network\n+ - create_class_loader # needed for Painless to generate runtime classes\n - files:\n- - relative_path: \"\"\n+ - relative_path_setting: \"reindex.ssl.certificate\"", + "comment_created_at": "2025-02-28T00:50:23+00:00", + "comment_author": "rjernst", + "comment_body": "It should work if the setting is a list, we resolve all paths, whether for a single setting, or multiple settings. It should also work with exclusive. Also note the properties are changing per our earlier discussion, see https://github.com/elastic/elasticsearch/pull/123649.\r\n\r\nBut it's curious that we didn't need this before? I guess it would have only worked if it was relative to the config dir.", + "pr_file_module": null + }, + { + "comment_id": "1974902540", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 123599, + "pr_file": "modules/reindex/src/main/plugin-metadata/entitlement-policy.yaml", + "discussion_id": "1973437329", + "commented_code": "@@ -1,7 +1,20 @@\n ALL-UNNAMED:\n - manage_threads\n - outbound_network\n+ - create_class_loader # needed for Painless to generate runtime classes\n - files:\n- - relative_path: \"\"\n+ - relative_path_setting: \"reindex.ssl.certificate\"", + "comment_created_at": "2025-02-28T06:57:25+00:00", + "comment_author": "ldematte", + "comment_body": "Yes, I think so!", + "pr_file_module": null + }, + { + "comment_id": "1974904628", + "repo_full_name": "elastic/elasticsearch", + "pr_number": 123599, + "pr_file": "modules/reindex/src/main/plugin-metadata/entitlement-policy.yaml", + "discussion_id": "1973437329", + "commented_code": "@@ -1,7 +1,20 @@\n ALL-UNNAMED:\n - manage_threads\n - outbound_network\n+ - create_class_loader # needed for Painless to generate runtime classes\n - files:\n- - relative_path: \"\"\n+ - relative_path_setting: \"reindex.ssl.certificate\"", + "comment_created_at": "2025-02-28T06:59:22+00:00", + "comment_author": "ldematte", + "comment_body": "> Also note the properties are changing per our earlier discussion, see https://github.com/elastic/elasticsearch/pull/123649.\r\n\r\nThanks! So `reindex.ssl.*` should work? I'll try it/look at logs/debug what it resolves to. Or maybe I'll write a test for it.\r\nI'll update this to use the new properties after that is merged.", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/elasticsearch-use-current-configuration-patterns.md b/_reviewers/elasticsearch-use-current-configuration-patterns.md index 7cfc784..5a2fe3a 100644 --- a/_reviewers/elasticsearch-use-current-configuration-patterns.md +++ b/_reviewers/elasticsearch-use-current-configuration-patterns.md @@ -37,87 +37,3 @@ files: ``` Using current configuration patterns improves maintainability, compatibility, and ensures the system behaves as expected across environments. - - -[ - { - "discussion_id": "2144994545", - "pr_number": 129128, - "pr_file": "rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/indices.blocks/10_basic.yml", - "created_at": "2025-06-13T12:30:00+00:00", - "commented_code": "index: test_index\n body:\n index.blocks.write: false\n\n---\n\"Test removing block via remove_block API\":\n - requires:\n cluster_features: [\"gte_v9.1.0\"]\n reason: \"index block APIs have only been made available in 9.1.0\"", - "repo_full_name": "elastic/elasticsearch", - "discussion_comments": [ - { - "comment_id": "2144994545", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 129128, - "pr_file": "rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/indices.blocks/10_basic.yml", - "discussion_id": "2144994545", - "commented_code": "@@ -28,3 +28,43 @@\n index: test_index\n body:\n index.blocks.write: false\n+\n+---\n+\"Test removing block via remove_block API\":\n+ - requires:\n+ cluster_features: [\"gte_v9.1.0\"]\n+ reason: \"index block APIs have only been made available in 9.1.0\"", - "comment_created_at": "2025-06-13T12:30:00+00:00", - "comment_author": "nielsbauman", - "comment_body": "Skipping tests based on cluster versions is no longer supported.\r\n```suggestion\r\n test_runner_features: [capabilities]\r\n reason: \"index block APIs have only been made available in 9.1.0\"\r\n capabilities:\r\n - method: DELETE\r\n path: /{index}/_block/{block}\r\n\r\n```", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1973437329", - "pr_number": 123599, - "pr_file": "modules/reindex/src/main/plugin-metadata/entitlement-policy.yaml", - "created_at": "2025-02-27T11:58:07+00:00", - "commented_code": "ALL-UNNAMED:\n - manage_threads\n - outbound_network\n - create_class_loader # needed for Painless to generate runtime classes\n - files:\n - relative_path: \"\"\n - relative_path_setting: \"reindex.ssl.certificate\"", - "repo_full_name": "elastic/elasticsearch", - "discussion_comments": [ - { - "comment_id": "1973437329", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 123599, - "pr_file": "modules/reindex/src/main/plugin-metadata/entitlement-policy.yaml", - "discussion_id": "1973437329", - "commented_code": "@@ -1,7 +1,20 @@\n ALL-UNNAMED:\n - manage_threads\n - outbound_network\n+ - create_class_loader # needed for Painless to generate runtime classes\n - files:\n- - relative_path: \"\"\n+ - relative_path_setting: \"reindex.ssl.certificate\"", - "comment_created_at": "2025-02-27T11:58:07+00:00", - "comment_author": "ldematte", - "comment_body": "@rjernst I have no idea if I'm using this correctly, as this is the first time we use it I think.\r\nWill this work with exclusive access?\r\nCan I use a glob (reindex.ssl.*) instead?\r\n`certificate_authorities` is a list; will that work with your file settings implementation?", - "pr_file_module": null - }, - { - "comment_id": "1974562173", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 123599, - "pr_file": "modules/reindex/src/main/plugin-metadata/entitlement-policy.yaml", - "discussion_id": "1973437329", - "commented_code": "@@ -1,7 +1,20 @@\n ALL-UNNAMED:\n - manage_threads\n - outbound_network\n+ - create_class_loader # needed for Painless to generate runtime classes\n - files:\n- - relative_path: \"\"\n+ - relative_path_setting: \"reindex.ssl.certificate\"", - "comment_created_at": "2025-02-28T00:50:23+00:00", - "comment_author": "rjernst", - "comment_body": "It should work if the setting is a list, we resolve all paths, whether for a single setting, or multiple settings. It should also work with exclusive. Also note the properties are changing per our earlier discussion, see https://github.com/elastic/elasticsearch/pull/123649.\r\n\r\nBut it's curious that we didn't need this before? I guess it would have only worked if it was relative to the config dir.", - "pr_file_module": null - }, - { - "comment_id": "1974902540", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 123599, - "pr_file": "modules/reindex/src/main/plugin-metadata/entitlement-policy.yaml", - "discussion_id": "1973437329", - "commented_code": "@@ -1,7 +1,20 @@\n ALL-UNNAMED:\n - manage_threads\n - outbound_network\n+ - create_class_loader # needed for Painless to generate runtime classes\n - files:\n- - relative_path: \"\"\n+ - relative_path_setting: \"reindex.ssl.certificate\"", - "comment_created_at": "2025-02-28T06:57:25+00:00", - "comment_author": "ldematte", - "comment_body": "Yes, I think so!", - "pr_file_module": null - }, - { - "comment_id": "1974904628", - "repo_full_name": "elastic/elasticsearch", - "pr_number": 123599, - "pr_file": "modules/reindex/src/main/plugin-metadata/entitlement-policy.yaml", - "discussion_id": "1973437329", - "commented_code": "@@ -1,7 +1,20 @@\n ALL-UNNAMED:\n - manage_threads\n - outbound_network\n+ - create_class_loader # needed for Painless to generate runtime classes\n - files:\n- - relative_path: \"\"\n+ - relative_path_setting: \"reindex.ssl.certificate\"", - "comment_created_at": "2025-02-28T06:59:22+00:00", - "comment_author": "ldematte", - "comment_body": "> Also note the properties are changing per our earlier discussion, see https://github.com/elastic/elasticsearch/pull/123649.\r\n\r\nThanks! So `reindex.ssl.*` should work? I'll try it/look at logs/debug what it resolves to. Or maybe I'll write a test for it.\r\nI'll update this to use the new properties after that is merged.", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/framework-cache-expensive-operations.json b/_reviewers/framework-cache-expensive-operations.json new file mode 100644 index 0000000..6770167 --- /dev/null +++ b/_reviewers/framework-cache-expensive-operations.json @@ -0,0 +1,254 @@ +[ + { + "discussion_id": "2152151153", + "pr_number": 56060, + "pr_file": "src/Illuminate/Database/Eloquent/Model.php", + "created_at": "2025-06-17T12:34:12+00:00", + "commented_code": "return $this;\n }\n\n /**\n * Determine if the given model class is soft deletable.\n */\n public static function isSoftDeletable(): bool\n {\n return in_array(SoftDeletes::class, class_uses_recursive(static::class));\n }\n\n /**\n * Determine if the given model class is prunable.\n */\n protected function isPrunable(): bool\n {\n return in_array(Prunable::class, class_uses_recursive(static::class)) || static::isMassPrunable();", + "repo_full_name": "laravel/framework", + "discussion_comments": [ + { + "comment_id": "2152151153", + "repo_full_name": "laravel/framework", + "pr_number": 56060, + "pr_file": "src/Illuminate/Database/Eloquent/Model.php", + "discussion_id": "2152151153", + "commented_code": "@@ -2513,6 +2513,30 @@ public function escapeWhenCastingToString($escape = true)\n return $this;\n }\n \n+ /**\n+ * Determine if the given model class is soft deletable.\n+ */\n+ public static function isSoftDeletable(): bool\n+ {\n+ return in_array(SoftDeletes::class, class_uses_recursive(static::class));\n+ }\n+\n+ /**\n+ * Determine if the given model class is prunable.\n+ */\n+ protected function isPrunable(): bool\n+ {\n+ return in_array(Prunable::class, class_uses_recursive(static::class)) || static::isMassPrunable();", + "comment_created_at": "2025-06-17T12:34:12+00:00", + "comment_author": "cosmastech", + "comment_body": "**thought:** It would be great if there were a ClassUsesRecursive once helper that would store these values for the lifetime of a request.\r\n\r\nWritten out as you have here, we can see that we're going to loop through all the traits of the class and its parent-classes twice if the class is MassPrunable but not Prunable.", + "pr_file_module": null + }, + { + "comment_id": "2152406875", + "repo_full_name": "laravel/framework", + "pr_number": 56060, + "pr_file": "src/Illuminate/Database/Eloquent/Model.php", + "discussion_id": "2152151153", + "commented_code": "@@ -2513,6 +2513,30 @@ public function escapeWhenCastingToString($escape = true)\n return $this;\n }\n \n+ /**\n+ * Determine if the given model class is soft deletable.\n+ */\n+ public static function isSoftDeletable(): bool\n+ {\n+ return in_array(SoftDeletes::class, class_uses_recursive(static::class));\n+ }\n+\n+ /**\n+ * Determine if the given model class is prunable.\n+ */\n+ protected function isPrunable(): bool\n+ {\n+ return in_array(Prunable::class, class_uses_recursive(static::class)) || static::isMassPrunable();", + "comment_created_at": "2025-06-17T14:20:12+00:00", + "comment_author": "shaedrich", + "comment_body": "Good point 👍🏻 I had a similar idea—looks like there's an appetite for this 🚀", + "pr_file_module": null + }, + { + "comment_id": "2157457993", + "repo_full_name": "laravel/framework", + "pr_number": 56060, + "pr_file": "src/Illuminate/Database/Eloquent/Model.php", + "discussion_id": "2152151153", + "commented_code": "@@ -2513,6 +2513,30 @@ public function escapeWhenCastingToString($escape = true)\n return $this;\n }\n \n+ /**\n+ * Determine if the given model class is soft deletable.\n+ */\n+ public static function isSoftDeletable(): bool\n+ {\n+ return in_array(SoftDeletes::class, class_uses_recursive(static::class));\n+ }\n+\n+ /**\n+ * Determine if the given model class is prunable.\n+ */\n+ protected function isPrunable(): bool\n+ {\n+ return in_array(Prunable::class, class_uses_recursive(static::class)) || static::isMassPrunable();", + "comment_created_at": "2025-06-19T17:51:26+00:00", + "comment_author": "fgaroby", + "comment_body": "@cosmastech : [something like this](https://github.com/laravel/framework/pull/56079)?", + "pr_file_module": null + }, + { + "comment_id": "2157698190", + "repo_full_name": "laravel/framework", + "pr_number": 56060, + "pr_file": "src/Illuminate/Database/Eloquent/Model.php", + "discussion_id": "2152151153", + "commented_code": "@@ -2513,6 +2513,30 @@ public function escapeWhenCastingToString($escape = true)\n return $this;\n }\n \n+ /**\n+ * Determine if the given model class is soft deletable.\n+ */\n+ public static function isSoftDeletable(): bool\n+ {\n+ return in_array(SoftDeletes::class, class_uses_recursive(static::class));\n+ }\n+\n+ /**\n+ * Determine if the given model class is prunable.\n+ */\n+ protected function isPrunable(): bool\n+ {\n+ return in_array(Prunable::class, class_uses_recursive(static::class)) || static::isMassPrunable();", + "comment_created_at": "2025-06-19T21:55:02+00:00", + "comment_author": "cosmastech", + "comment_body": "Yeah! Nice.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2152154653", + "pr_number": 56060, + "pr_file": "src/Illuminate/Database/Eloquent/MassPrunable.php", + "created_at": "2025-06-17T12:35:55+00:00", + "commented_code": "$total = 0;\n\n $softDeletable = in_array(SoftDeletes::class, class_uses_recursive(get_class($this)));\n\n do {\n $total += $count = $softDeletable\n $total += $count = static::isSoftDeletable()", + "repo_full_name": "laravel/framework", + "discussion_comments": [ + { + "comment_id": "2152154653", + "repo_full_name": "laravel/framework", + "pr_number": 56060, + "pr_file": "src/Illuminate/Database/Eloquent/MassPrunable.php", + "discussion_id": "2152154653", + "commented_code": "@@ -23,10 +26,8 @@ public function pruneAll(int $chunkSize = 1000)\n \n $total = 0;\n \n- $softDeletable = in_array(SoftDeletes::class, class_uses_recursive(get_class($this)));\n-\n do {\n- $total += $count = $softDeletable\n+ $total += $count = static::isSoftDeletable()", + "comment_created_at": "2025-06-17T12:35:55+00:00", + "comment_author": "cosmastech", + "comment_body": "**note:** This was explicitly moved outside of the loop so we wouldn't have to recursively scan the model again and again. https://github.com/laravel/framework/pull/55274", + "pr_file_module": null + }, + { + "comment_id": "2152409906", + "repo_full_name": "laravel/framework", + "pr_number": 56060, + "pr_file": "src/Illuminate/Database/Eloquent/MassPrunable.php", + "discussion_id": "2152154653", + "commented_code": "@@ -23,10 +26,8 @@ public function pruneAll(int $chunkSize = 1000)\n \n $total = 0;\n \n- $softDeletable = in_array(SoftDeletes::class, class_uses_recursive(get_class($this)));\n-\n do {\n- $total += $count = $softDeletable\n+ $total += $count = static::isSoftDeletable()", + "comment_created_at": "2025-06-17T14:21:26+00:00", + "comment_author": "shaedrich", + "comment_body": "Thanks for the hint—I'll adjust that 👍🏻", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1938570236", + "pr_number": 54442, + "pr_file": "src/Illuminate/Bus/PendingBatch.php", + "created_at": "2025-02-02T20:14:09+00:00", + "commented_code": "return $batch;\n }\n\n private function checkJobIsBatchable(object|array $job): void\n {\n foreach (Arr::wrap($job) as $job) {\n if ($job instanceof PendingBatch) {\n $this->checkJobIsBatchable($job->jobs->all());\n\n return;\n }\n\n if (! in_array(Batchable::class, class_uses_recursive($job))) {", + "repo_full_name": "laravel/framework", + "discussion_comments": [ + { + "comment_id": "1938570236", + "repo_full_name": "laravel/framework", + "pr_number": 54442, + "pr_file": "src/Illuminate/Bus/PendingBatch.php", + "discussion_id": "1938570236", + "commented_code": "@@ -414,4 +412,19 @@ protected function store($repository)\n \n return $batch;\n }\n+\n+ private function checkJobIsBatchable(object|array $job): void\n+ {\n+ foreach (Arr::wrap($job) as $job) {\n+ if ($job instanceof PendingBatch) {\n+ $this->checkJobIsBatchable($job->jobs->all());\n+\n+ return;\n+ }\n+\n+ if (! in_array(Batchable::class, class_uses_recursive($job))) {", + "comment_created_at": "2025-02-02T20:14:09+00:00", + "comment_author": "cosmastech", + "comment_body": "I wonder if it wouldn't be useful to cache the job's class so you don't have to call class_uses_recursive for multiple instances of the same class.", + "pr_file_module": null + }, + { + "comment_id": "1938615259", + "repo_full_name": "laravel/framework", + "pr_number": 54442, + "pr_file": "src/Illuminate/Bus/PendingBatch.php", + "discussion_id": "1938570236", + "commented_code": "@@ -414,4 +412,19 @@ protected function store($repository)\n \n return $batch;\n }\n+\n+ private function checkJobIsBatchable(object|array $job): void\n+ {\n+ foreach (Arr::wrap($job) as $job) {\n+ if ($job instanceof PendingBatch) {\n+ $this->checkJobIsBatchable($job->jobs->all());\n+\n+ return;\n+ }\n+\n+ if (! in_array(Batchable::class, class_uses_recursive($job))) {", + "comment_created_at": "2025-02-02T23:32:52+00:00", + "comment_author": "shaedrich", + "comment_body": "While it's not possible with `class_uses_recursive`, this could be taken one step further, by implementing something similar to `class_uses_recursive`: Caching all classes in the recursive chain 🤔 ", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1714821791", + "pr_number": 52461, + "pr_file": "src/Illuminate/Database/Eloquent/Model.php", + "created_at": "2024-08-13T07:39:12+00:00", + "commented_code": "*/\n public function toArray()\n {\n return array_merge($this->attributesToArray(), $this->relationsToArray());\n return $this->once(\n fn () => array_merge($this->attributesToArray(), $this->relationsToArray()),\n $this->attributesToArray(),\n );", + "repo_full_name": "laravel/framework", + "discussion_comments": [ + { + "comment_id": "1714821791", + "repo_full_name": "laravel/framework", + "pr_number": 52461, + "pr_file": "src/Illuminate/Database/Eloquent/Model.php", + "discussion_id": "1714821791", + "commented_code": "@@ -1644,7 +1647,10 @@ public function callNamedScope($scope, array $parameters = [])\n */\n public function toArray()\n {\n- return array_merge($this->attributesToArray(), $this->relationsToArray());\n+ return $this->once(\n+ fn () => array_merge($this->attributesToArray(), $this->relationsToArray()),\n+ $this->attributesToArray(),\n+ );", + "comment_created_at": "2024-08-13T07:39:12+00:00", + "comment_author": "Tofandel", + "comment_body": "```suggestion\r\n $attributes = $this->attributesToArray();\r\n return $this->once(\r\n fn () => array_merge($attributes, $this->relationsToArray()),\r\n $attributes,\r\n );\r\n```\r\n\r\nThis kind of methods are a bit expensive so I would avoid calling it twice\r\n\r\nIdeally `default` could be a callback", + "pr_file_module": null + }, + { + "comment_id": "1715004249", + "repo_full_name": "laravel/framework", + "pr_number": 52461, + "pr_file": "src/Illuminate/Database/Eloquent/Model.php", + "discussion_id": "1714821791", + "commented_code": "@@ -1644,7 +1647,10 @@ public function callNamedScope($scope, array $parameters = [])\n */\n public function toArray()\n {\n- return array_merge($this->attributesToArray(), $this->relationsToArray());\n+ return $this->once(\n+ fn () => array_merge($this->attributesToArray(), $this->relationsToArray()),\n+ $this->attributesToArray(),\n+ );", + "comment_created_at": "2024-08-13T09:45:54+00:00", + "comment_author": "samlev", + "comment_body": "I've updated the default to accept a callback, and cache that value.\r\n\r\nYour suggested change wouldn't prevent `attributesToArray()` from being called on each recursive call to `toArray()` - it would just ignore the result on subsequent calls.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1669441618", + "pr_number": 52012, + "pr_file": "src/Illuminate/Collections/Collection.php", + "created_at": "2024-07-08T23:32:53+00:00", + "commented_code": "return new static(array_filter($this->items));\n }\n\n /**\n * Run a map and filter over each of the items.\n *\n * @template TMapValue\n *\n * @param callable(TValue, TKey): TMapValue $callback\n * @param (callable(TValue, TKey): bool)|null $reject\n * @return static\n */\n public function filterMap($callable, $filter = null)\n {\n $filter = $filter === null\n ? fn ($value) => (bool) $value\n : match ((new ReflectionFunction($filter))->getNumberOfParameters()) {", + "repo_full_name": "laravel/framework", + "discussion_comments": [ + { + "comment_id": "1669441618", + "repo_full_name": "laravel/framework", + "pr_number": 52012, + "pr_file": "src/Illuminate/Collections/Collection.php", + "discussion_id": "1669441618", + "commented_code": "@@ -372,6 +373,35 @@ public function filter(?callable $callback = null)\n return new static(array_filter($this->items));\n }\n \n+ /**\n+ * Run a map and filter over each of the items.\n+ *\n+ * @template TMapValue\n+ *\n+ * @param callable(TValue, TKey): TMapValue $callback\n+ * @param (callable(TValue, TKey): bool)|null $reject\n+ * @return static\n+ */\n+ public function filterMap($callable, $filter = null)\n+ {\n+ $filter = $filter === null\n+ ? fn ($value) => (bool) $value\n+ : match ((new ReflectionFunction($filter))->getNumberOfParameters()) {", + "comment_created_at": "2024-07-08T23:32:53+00:00", + "comment_author": "Tofandel", + "comment_body": "This reflection seems unnecessary; why not just use `$filter` as is?", + "pr_file_module": null + }, + { + "comment_id": "1669518971", + "repo_full_name": "laravel/framework", + "pr_number": 52012, + "pr_file": "src/Illuminate/Collections/Collection.php", + "discussion_id": "1669441618", + "commented_code": "@@ -372,6 +373,35 @@ public function filter(?callable $callback = null)\n return new static(array_filter($this->items));\n }\n \n+ /**\n+ * Run a map and filter over each of the items.\n+ *\n+ * @template TMapValue\n+ *\n+ * @param callable(TValue, TKey): TMapValue $callback\n+ * @param (callable(TValue, TKey): bool)|null $reject\n+ * @return static\n+ */\n+ public function filterMap($callable, $filter = null)\n+ {\n+ $filter = $filter === null\n+ ? fn ($value) => (bool) $value\n+ : match ((new ReflectionFunction($filter))->getNumberOfParameters()) {", + "comment_created_at": "2024-07-09T01:25:38+00:00", + "comment_author": "timacdonald", + "comment_body": "PHP's built-in methods throw an exception when you pass too many arguments to them.\r\n\r\nWe check the number of arguments here to ensure that we don't hit that exception if the function only accepts a single argument (as we pass the value and the key to the filter method).\r\n\r\nWe take a similar approach in the `Collection:map` and `Arr::map` functions:\r\n\r\nhttps://github.com/laravel/framework/blob/0e9053da9d709c544200260cc0be5e223ea8ff93/src/Illuminate/Collections/Arr.php#L600-L610\r\n\r\nHowever, I benchmarked this approach with my approach and found that using reflection was faster than allowing the exception to be thrown and the impact of using reflection is negligible.\r\n\r\nThe tests fail without this work.", + "pr_file_module": null + }, + { + "comment_id": "1669955879", + "repo_full_name": "laravel/framework", + "pr_number": 52012, + "pr_file": "src/Illuminate/Collections/Collection.php", + "discussion_id": "1669441618", + "commented_code": "@@ -372,6 +373,35 @@ public function filter(?callable $callback = null)\n return new static(array_filter($this->items));\n }\n \n+ /**\n+ * Run a map and filter over each of the items.\n+ *\n+ * @template TMapValue\n+ *\n+ * @param callable(TValue, TKey): TMapValue $callback\n+ * @param (callable(TValue, TKey): bool)|null $reject\n+ * @return static\n+ */\n+ public function filterMap($callable, $filter = null)\n+ {\n+ $filter = $filter === null\n+ ? fn ($value) => (bool) $value\n+ : match ((new ReflectionFunction($filter))->getNumberOfParameters()) {", + "comment_created_at": "2024-07-09T08:04:01+00:00", + "comment_author": "Tofandel", + "comment_body": "Ah yes, the good ol' php inbuilt functions that always destroy all your hopes 😅 I forgot about those\r\n\r\nReflection is indeed better than try, catch, because try catch means you need to call the function which if it had some side effects could be very undesirable, and reflection is always super fast in php (I don't know why this myth came to be that it's slow and needs caching) but the advantage of the try catch is that it works even if you use this kind of things\r\n\r\n`$this->filterMap($callable, fn () => ! $reject(...func_get_args()))`", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1642109271", + "pr_number": 51809, + "pr_file": "src/Illuminate/Database/Eloquent/Relations/Relation.php", + "created_at": "2024-06-17T03:40:13+00:00", + "commented_code": "return static::$morphMap[$alias] ?? null;\n }\n\n /**\n * Get the alias associated with a custom polymorphic class.\n *\n * @param string $className\n * @return int|string|null\n */\n public static function getMorphAlias(string $className)\n {\n foreach (static::$morphMap as $alias => $morphedClass) {\n if ($morphedClass === $className) {\n return $alias;\n }\n }\n\n return null;", + "repo_full_name": "laravel/framework", + "discussion_comments": [ + { + "comment_id": "1642109271", + "repo_full_name": "laravel/framework", + "pr_number": 51809, + "pr_file": "src/Illuminate/Database/Eloquent/Relations/Relation.php", + "discussion_id": "1642109271", + "commented_code": "@@ -501,6 +501,23 @@ public static function getMorphedModel($alias)\n return static::$morphMap[$alias] ?? null;\n }\n \n+ /**\n+ * Get the alias associated with a custom polymorphic class.\n+ *\n+ * @param string $className\n+ * @return int|string|null\n+ */\n+ public static function getMorphAlias(string $className)\n+ {\n+ foreach (static::$morphMap as $alias => $morphedClass) {\n+ if ($morphedClass === $className) {\n+ return $alias;\n+ }\n+ }\n+\n+ return null;", + "comment_created_at": "2024-06-17T03:40:13+00:00", + "comment_author": "calebdw", + "comment_body": "Would it not be easier to do the following?\r\n\r\n```suggestion\r\n return array_flip(static::$morphMap)[$className] ?? null;\r\n```", + "pr_file_module": null + }, + { + "comment_id": "1642138211", + "repo_full_name": "laravel/framework", + "pr_number": 51809, + "pr_file": "src/Illuminate/Database/Eloquent/Relations/Relation.php", + "discussion_id": "1642109271", + "commented_code": "@@ -501,6 +501,23 @@ public static function getMorphedModel($alias)\n return static::$morphMap[$alias] ?? null;\n }\n \n+ /**\n+ * Get the alias associated with a custom polymorphic class.\n+ *\n+ * @param string $className\n+ * @return int|string|null\n+ */\n+ public static function getMorphAlias(string $className)\n+ {\n+ foreach (static::$morphMap as $alias => $morphedClass) {\n+ if ($morphedClass === $className) {\n+ return $alias;\n+ }\n+ }\n+\n+ return null;", + "comment_created_at": "2024-06-17T04:15:42+00:00", + "comment_author": "donnysim", + "comment_body": "Would it not be easier to use array_search? Flip would create unnecessary garbage and for heavy morph use that's not desirable.", + "pr_file_module": null + }, + { + "comment_id": "1642887423", + "repo_full_name": "laravel/framework", + "pr_number": 51809, + "pr_file": "src/Illuminate/Database/Eloquent/Relations/Relation.php", + "discussion_id": "1642109271", + "commented_code": "@@ -501,6 +501,23 @@ public static function getMorphedModel($alias)\n return static::$morphMap[$alias] ?? null;\n }\n \n+ /**\n+ * Get the alias associated with a custom polymorphic class.\n+ *\n+ * @param string $className\n+ * @return int|string|null\n+ */\n+ public static function getMorphAlias(string $className)\n+ {\n+ foreach (static::$morphMap as $alias => $morphedClass) {\n+ if ($morphedClass === $className) {\n+ return $alias;\n+ }\n+ }\n+\n+ return null;", + "comment_created_at": "2024-06-17T14:12:24+00:00", + "comment_author": "calebdw", + "comment_body": "@taylorotwell, I was mistaken---benchmarking shows that using `array_search` is a good deal faster than having to flip the entire array and then still search through the keys:\r\n\r\n![image](https://github.com/laravel/framework/assets/4176520/58880242-29ac-4a69-b8ad-3280b9190553)\r\n\r\n", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/framework-cache-expensive-operations.md b/_reviewers/framework-cache-expensive-operations.md index 16af272..c413ba8 100644 --- a/_reviewers/framework-cache-expensive-operations.md +++ b/_reviewers/framework-cache-expensive-operations.md @@ -41,259 +41,3 @@ public function isPrunable(): bool Similarly, when selecting methods for array operations, favor efficient approaches. For instance, use `array_search()` over `array_flip()` followed by key lookup for better performance with large arrays. When implementing expensive transformations (like `toArray()` on models), use memoization techniques to avoid recalculating the same values multiple times during a single request. - - -[ - { - "discussion_id": "2152151153", - "pr_number": 56060, - "pr_file": "src/Illuminate/Database/Eloquent/Model.php", - "created_at": "2025-06-17T12:34:12+00:00", - "commented_code": "return $this;\n }\n\n /**\n * Determine if the given model class is soft deletable.\n */\n public static function isSoftDeletable(): bool\n {\n return in_array(SoftDeletes::class, class_uses_recursive(static::class));\n }\n\n /**\n * Determine if the given model class is prunable.\n */\n protected function isPrunable(): bool\n {\n return in_array(Prunable::class, class_uses_recursive(static::class)) || static::isMassPrunable();", - "repo_full_name": "laravel/framework", - "discussion_comments": [ - { - "comment_id": "2152151153", - "repo_full_name": "laravel/framework", - "pr_number": 56060, - "pr_file": "src/Illuminate/Database/Eloquent/Model.php", - "discussion_id": "2152151153", - "commented_code": "@@ -2513,6 +2513,30 @@ public function escapeWhenCastingToString($escape = true)\n return $this;\n }\n \n+ /**\n+ * Determine if the given model class is soft deletable.\n+ */\n+ public static function isSoftDeletable(): bool\n+ {\n+ return in_array(SoftDeletes::class, class_uses_recursive(static::class));\n+ }\n+\n+ /**\n+ * Determine if the given model class is prunable.\n+ */\n+ protected function isPrunable(): bool\n+ {\n+ return in_array(Prunable::class, class_uses_recursive(static::class)) || static::isMassPrunable();", - "comment_created_at": "2025-06-17T12:34:12+00:00", - "comment_author": "cosmastech", - "comment_body": "**thought:** It would be great if there were a ClassUsesRecursive once helper that would store these values for the lifetime of a request.\r\n\r\nWritten out as you have here, we can see that we're going to loop through all the traits of the class and its parent-classes twice if the class is MassPrunable but not Prunable.", - "pr_file_module": null - }, - { - "comment_id": "2152406875", - "repo_full_name": "laravel/framework", - "pr_number": 56060, - "pr_file": "src/Illuminate/Database/Eloquent/Model.php", - "discussion_id": "2152151153", - "commented_code": "@@ -2513,6 +2513,30 @@ public function escapeWhenCastingToString($escape = true)\n return $this;\n }\n \n+ /**\n+ * Determine if the given model class is soft deletable.\n+ */\n+ public static function isSoftDeletable(): bool\n+ {\n+ return in_array(SoftDeletes::class, class_uses_recursive(static::class));\n+ }\n+\n+ /**\n+ * Determine if the given model class is prunable.\n+ */\n+ protected function isPrunable(): bool\n+ {\n+ return in_array(Prunable::class, class_uses_recursive(static::class)) || static::isMassPrunable();", - "comment_created_at": "2025-06-17T14:20:12+00:00", - "comment_author": "shaedrich", - "comment_body": "Good point \ud83d\udc4d\ud83c\udffb I had a similar idea\u2014looks like there's an appetite for this \ud83d\ude80", - "pr_file_module": null - }, - { - "comment_id": "2157457993", - "repo_full_name": "laravel/framework", - "pr_number": 56060, - "pr_file": "src/Illuminate/Database/Eloquent/Model.php", - "discussion_id": "2152151153", - "commented_code": "@@ -2513,6 +2513,30 @@ public function escapeWhenCastingToString($escape = true)\n return $this;\n }\n \n+ /**\n+ * Determine if the given model class is soft deletable.\n+ */\n+ public static function isSoftDeletable(): bool\n+ {\n+ return in_array(SoftDeletes::class, class_uses_recursive(static::class));\n+ }\n+\n+ /**\n+ * Determine if the given model class is prunable.\n+ */\n+ protected function isPrunable(): bool\n+ {\n+ return in_array(Prunable::class, class_uses_recursive(static::class)) || static::isMassPrunable();", - "comment_created_at": "2025-06-19T17:51:26+00:00", - "comment_author": "fgaroby", - "comment_body": "@cosmastech : [something like this](https://github.com/laravel/framework/pull/56079)?", - "pr_file_module": null - }, - { - "comment_id": "2157698190", - "repo_full_name": "laravel/framework", - "pr_number": 56060, - "pr_file": "src/Illuminate/Database/Eloquent/Model.php", - "discussion_id": "2152151153", - "commented_code": "@@ -2513,6 +2513,30 @@ public function escapeWhenCastingToString($escape = true)\n return $this;\n }\n \n+ /**\n+ * Determine if the given model class is soft deletable.\n+ */\n+ public static function isSoftDeletable(): bool\n+ {\n+ return in_array(SoftDeletes::class, class_uses_recursive(static::class));\n+ }\n+\n+ /**\n+ * Determine if the given model class is prunable.\n+ */\n+ protected function isPrunable(): bool\n+ {\n+ return in_array(Prunable::class, class_uses_recursive(static::class)) || static::isMassPrunable();", - "comment_created_at": "2025-06-19T21:55:02+00:00", - "comment_author": "cosmastech", - "comment_body": "Yeah! Nice.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2152154653", - "pr_number": 56060, - "pr_file": "src/Illuminate/Database/Eloquent/MassPrunable.php", - "created_at": "2025-06-17T12:35:55+00:00", - "commented_code": "$total = 0;\n\n $softDeletable = in_array(SoftDeletes::class, class_uses_recursive(get_class($this)));\n\n do {\n $total += $count = $softDeletable\n $total += $count = static::isSoftDeletable()", - "repo_full_name": "laravel/framework", - "discussion_comments": [ - { - "comment_id": "2152154653", - "repo_full_name": "laravel/framework", - "pr_number": 56060, - "pr_file": "src/Illuminate/Database/Eloquent/MassPrunable.php", - "discussion_id": "2152154653", - "commented_code": "@@ -23,10 +26,8 @@ public function pruneAll(int $chunkSize = 1000)\n \n $total = 0;\n \n- $softDeletable = in_array(SoftDeletes::class, class_uses_recursive(get_class($this)));\n-\n do {\n- $total += $count = $softDeletable\n+ $total += $count = static::isSoftDeletable()", - "comment_created_at": "2025-06-17T12:35:55+00:00", - "comment_author": "cosmastech", - "comment_body": "**note:** This was explicitly moved outside of the loop so we wouldn't have to recursively scan the model again and again. https://github.com/laravel/framework/pull/55274", - "pr_file_module": null - }, - { - "comment_id": "2152409906", - "repo_full_name": "laravel/framework", - "pr_number": 56060, - "pr_file": "src/Illuminate/Database/Eloquent/MassPrunable.php", - "discussion_id": "2152154653", - "commented_code": "@@ -23,10 +26,8 @@ public function pruneAll(int $chunkSize = 1000)\n \n $total = 0;\n \n- $softDeletable = in_array(SoftDeletes::class, class_uses_recursive(get_class($this)));\n-\n do {\n- $total += $count = $softDeletable\n+ $total += $count = static::isSoftDeletable()", - "comment_created_at": "2025-06-17T14:21:26+00:00", - "comment_author": "shaedrich", - "comment_body": "Thanks for the hint\u2014I'll adjust that \ud83d\udc4d\ud83c\udffb", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1938570236", - "pr_number": 54442, - "pr_file": "src/Illuminate/Bus/PendingBatch.php", - "created_at": "2025-02-02T20:14:09+00:00", - "commented_code": "return $batch;\n }\n\n private function checkJobIsBatchable(object|array $job): void\n {\n foreach (Arr::wrap($job) as $job) {\n if ($job instanceof PendingBatch) {\n $this->checkJobIsBatchable($job->jobs->all());\n\n return;\n }\n\n if (! in_array(Batchable::class, class_uses_recursive($job))) {", - "repo_full_name": "laravel/framework", - "discussion_comments": [ - { - "comment_id": "1938570236", - "repo_full_name": "laravel/framework", - "pr_number": 54442, - "pr_file": "src/Illuminate/Bus/PendingBatch.php", - "discussion_id": "1938570236", - "commented_code": "@@ -414,4 +412,19 @@ protected function store($repository)\n \n return $batch;\n }\n+\n+ private function checkJobIsBatchable(object|array $job): void\n+ {\n+ foreach (Arr::wrap($job) as $job) {\n+ if ($job instanceof PendingBatch) {\n+ $this->checkJobIsBatchable($job->jobs->all());\n+\n+ return;\n+ }\n+\n+ if (! in_array(Batchable::class, class_uses_recursive($job))) {", - "comment_created_at": "2025-02-02T20:14:09+00:00", - "comment_author": "cosmastech", - "comment_body": "I wonder if it wouldn't be useful to cache the job's class so you don't have to call class_uses_recursive for multiple instances of the same class.", - "pr_file_module": null - }, - { - "comment_id": "1938615259", - "repo_full_name": "laravel/framework", - "pr_number": 54442, - "pr_file": "src/Illuminate/Bus/PendingBatch.php", - "discussion_id": "1938570236", - "commented_code": "@@ -414,4 +412,19 @@ protected function store($repository)\n \n return $batch;\n }\n+\n+ private function checkJobIsBatchable(object|array $job): void\n+ {\n+ foreach (Arr::wrap($job) as $job) {\n+ if ($job instanceof PendingBatch) {\n+ $this->checkJobIsBatchable($job->jobs->all());\n+\n+ return;\n+ }\n+\n+ if (! in_array(Batchable::class, class_uses_recursive($job))) {", - "comment_created_at": "2025-02-02T23:32:52+00:00", - "comment_author": "shaedrich", - "comment_body": "While it's not possible with `class_uses_recursive`, this could be taken one step further, by implementing something similar to `class_uses_recursive`: Caching all classes in the recursive chain \ud83e\udd14 ", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1714821791", - "pr_number": 52461, - "pr_file": "src/Illuminate/Database/Eloquent/Model.php", - "created_at": "2024-08-13T07:39:12+00:00", - "commented_code": "*/\n public function toArray()\n {\n return array_merge($this->attributesToArray(), $this->relationsToArray());\n return $this->once(\n fn () => array_merge($this->attributesToArray(), $this->relationsToArray()),\n $this->attributesToArray(),\n );", - "repo_full_name": "laravel/framework", - "discussion_comments": [ - { - "comment_id": "1714821791", - "repo_full_name": "laravel/framework", - "pr_number": 52461, - "pr_file": "src/Illuminate/Database/Eloquent/Model.php", - "discussion_id": "1714821791", - "commented_code": "@@ -1644,7 +1647,10 @@ public function callNamedScope($scope, array $parameters = [])\n */\n public function toArray()\n {\n- return array_merge($this->attributesToArray(), $this->relationsToArray());\n+ return $this->once(\n+ fn () => array_merge($this->attributesToArray(), $this->relationsToArray()),\n+ $this->attributesToArray(),\n+ );", - "comment_created_at": "2024-08-13T07:39:12+00:00", - "comment_author": "Tofandel", - "comment_body": "```suggestion\r\n $attributes = $this->attributesToArray();\r\n return $this->once(\r\n fn () => array_merge($attributes, $this->relationsToArray()),\r\n $attributes,\r\n );\r\n```\r\n\r\nThis kind of methods are a bit expensive so I would avoid calling it twice\r\n\r\nIdeally `default` could be a callback", - "pr_file_module": null - }, - { - "comment_id": "1715004249", - "repo_full_name": "laravel/framework", - "pr_number": 52461, - "pr_file": "src/Illuminate/Database/Eloquent/Model.php", - "discussion_id": "1714821791", - "commented_code": "@@ -1644,7 +1647,10 @@ public function callNamedScope($scope, array $parameters = [])\n */\n public function toArray()\n {\n- return array_merge($this->attributesToArray(), $this->relationsToArray());\n+ return $this->once(\n+ fn () => array_merge($this->attributesToArray(), $this->relationsToArray()),\n+ $this->attributesToArray(),\n+ );", - "comment_created_at": "2024-08-13T09:45:54+00:00", - "comment_author": "samlev", - "comment_body": "I've updated the default to accept a callback, and cache that value.\r\n\r\nYour suggested change wouldn't prevent `attributesToArray()` from being called on each recursive call to `toArray()` - it would just ignore the result on subsequent calls.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1669441618", - "pr_number": 52012, - "pr_file": "src/Illuminate/Collections/Collection.php", - "created_at": "2024-07-08T23:32:53+00:00", - "commented_code": "return new static(array_filter($this->items));\n }\n\n /**\n * Run a map and filter over each of the items.\n *\n * @template TMapValue\n *\n * @param callable(TValue, TKey): TMapValue $callback\n * @param (callable(TValue, TKey): bool)|null $reject\n * @return static\n */\n public function filterMap($callable, $filter = null)\n {\n $filter = $filter === null\n ? fn ($value) => (bool) $value\n : match ((new ReflectionFunction($filter))->getNumberOfParameters()) {", - "repo_full_name": "laravel/framework", - "discussion_comments": [ - { - "comment_id": "1669441618", - "repo_full_name": "laravel/framework", - "pr_number": 52012, - "pr_file": "src/Illuminate/Collections/Collection.php", - "discussion_id": "1669441618", - "commented_code": "@@ -372,6 +373,35 @@ public function filter(?callable $callback = null)\n return new static(array_filter($this->items));\n }\n \n+ /**\n+ * Run a map and filter over each of the items.\n+ *\n+ * @template TMapValue\n+ *\n+ * @param callable(TValue, TKey): TMapValue $callback\n+ * @param (callable(TValue, TKey): bool)|null $reject\n+ * @return static\n+ */\n+ public function filterMap($callable, $filter = null)\n+ {\n+ $filter = $filter === null\n+ ? fn ($value) => (bool) $value\n+ : match ((new ReflectionFunction($filter))->getNumberOfParameters()) {", - "comment_created_at": "2024-07-08T23:32:53+00:00", - "comment_author": "Tofandel", - "comment_body": "This reflection seems unnecessary; why not just use `$filter` as is?", - "pr_file_module": null - }, - { - "comment_id": "1669518971", - "repo_full_name": "laravel/framework", - "pr_number": 52012, - "pr_file": "src/Illuminate/Collections/Collection.php", - "discussion_id": "1669441618", - "commented_code": "@@ -372,6 +373,35 @@ public function filter(?callable $callback = null)\n return new static(array_filter($this->items));\n }\n \n+ /**\n+ * Run a map and filter over each of the items.\n+ *\n+ * @template TMapValue\n+ *\n+ * @param callable(TValue, TKey): TMapValue $callback\n+ * @param (callable(TValue, TKey): bool)|null $reject\n+ * @return static\n+ */\n+ public function filterMap($callable, $filter = null)\n+ {\n+ $filter = $filter === null\n+ ? fn ($value) => (bool) $value\n+ : match ((new ReflectionFunction($filter))->getNumberOfParameters()) {", - "comment_created_at": "2024-07-09T01:25:38+00:00", - "comment_author": "timacdonald", - "comment_body": "PHP's built-in methods throw an exception when you pass too many arguments to them.\r\n\r\nWe check the number of arguments here to ensure that we don't hit that exception if the function only accepts a single argument (as we pass the value and the key to the filter method).\r\n\r\nWe take a similar approach in the `Collection:map` and `Arr::map` functions:\r\n\r\nhttps://github.com/laravel/framework/blob/0e9053da9d709c544200260cc0be5e223ea8ff93/src/Illuminate/Collections/Arr.php#L600-L610\r\n\r\nHowever, I benchmarked this approach with my approach and found that using reflection was faster than allowing the exception to be thrown and the impact of using reflection is negligible.\r\n\r\nThe tests fail without this work.", - "pr_file_module": null - }, - { - "comment_id": "1669955879", - "repo_full_name": "laravel/framework", - "pr_number": 52012, - "pr_file": "src/Illuminate/Collections/Collection.php", - "discussion_id": "1669441618", - "commented_code": "@@ -372,6 +373,35 @@ public function filter(?callable $callback = null)\n return new static(array_filter($this->items));\n }\n \n+ /**\n+ * Run a map and filter over each of the items.\n+ *\n+ * @template TMapValue\n+ *\n+ * @param callable(TValue, TKey): TMapValue $callback\n+ * @param (callable(TValue, TKey): bool)|null $reject\n+ * @return static\n+ */\n+ public function filterMap($callable, $filter = null)\n+ {\n+ $filter = $filter === null\n+ ? fn ($value) => (bool) $value\n+ : match ((new ReflectionFunction($filter))->getNumberOfParameters()) {", - "comment_created_at": "2024-07-09T08:04:01+00:00", - "comment_author": "Tofandel", - "comment_body": "Ah yes, the good ol' php inbuilt functions that always destroy all your hopes \ud83d\ude05 I forgot about those\r\n\r\nReflection is indeed better than try, catch, because try catch means you need to call the function which if it had some side effects could be very undesirable, and reflection is always super fast in php (I don't know why this myth came to be that it's slow and needs caching) but the advantage of the try catch is that it works even if you use this kind of things\r\n\r\n`$this->filterMap($callable, fn () => ! $reject(...func_get_args()))`", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1642109271", - "pr_number": 51809, - "pr_file": "src/Illuminate/Database/Eloquent/Relations/Relation.php", - "created_at": "2024-06-17T03:40:13+00:00", - "commented_code": "return static::$morphMap[$alias] ?? null;\n }\n\n /**\n * Get the alias associated with a custom polymorphic class.\n *\n * @param string $className\n * @return int|string|null\n */\n public static function getMorphAlias(string $className)\n {\n foreach (static::$morphMap as $alias => $morphedClass) {\n if ($morphedClass === $className) {\n return $alias;\n }\n }\n\n return null;", - "repo_full_name": "laravel/framework", - "discussion_comments": [ - { - "comment_id": "1642109271", - "repo_full_name": "laravel/framework", - "pr_number": 51809, - "pr_file": "src/Illuminate/Database/Eloquent/Relations/Relation.php", - "discussion_id": "1642109271", - "commented_code": "@@ -501,6 +501,23 @@ public static function getMorphedModel($alias)\n return static::$morphMap[$alias] ?? null;\n }\n \n+ /**\n+ * Get the alias associated with a custom polymorphic class.\n+ *\n+ * @param string $className\n+ * @return int|string|null\n+ */\n+ public static function getMorphAlias(string $className)\n+ {\n+ foreach (static::$morphMap as $alias => $morphedClass) {\n+ if ($morphedClass === $className) {\n+ return $alias;\n+ }\n+ }\n+\n+ return null;", - "comment_created_at": "2024-06-17T03:40:13+00:00", - "comment_author": "calebdw", - "comment_body": "Would it not be easier to do the following?\r\n\r\n```suggestion\r\n return array_flip(static::$morphMap)[$className] ?? null;\r\n```", - "pr_file_module": null - }, - { - "comment_id": "1642138211", - "repo_full_name": "laravel/framework", - "pr_number": 51809, - "pr_file": "src/Illuminate/Database/Eloquent/Relations/Relation.php", - "discussion_id": "1642109271", - "commented_code": "@@ -501,6 +501,23 @@ public static function getMorphedModel($alias)\n return static::$morphMap[$alias] ?? null;\n }\n \n+ /**\n+ * Get the alias associated with a custom polymorphic class.\n+ *\n+ * @param string $className\n+ * @return int|string|null\n+ */\n+ public static function getMorphAlias(string $className)\n+ {\n+ foreach (static::$morphMap as $alias => $morphedClass) {\n+ if ($morphedClass === $className) {\n+ return $alias;\n+ }\n+ }\n+\n+ return null;", - "comment_created_at": "2024-06-17T04:15:42+00:00", - "comment_author": "donnysim", - "comment_body": "Would it not be easier to use array_search? Flip would create unnecessary garbage and for heavy morph use that's not desirable.", - "pr_file_module": null - }, - { - "comment_id": "1642887423", - "repo_full_name": "laravel/framework", - "pr_number": 51809, - "pr_file": "src/Illuminate/Database/Eloquent/Relations/Relation.php", - "discussion_id": "1642109271", - "commented_code": "@@ -501,6 +501,23 @@ public static function getMorphedModel($alias)\n return static::$morphMap[$alias] ?? null;\n }\n \n+ /**\n+ * Get the alias associated with a custom polymorphic class.\n+ *\n+ * @param string $className\n+ * @return int|string|null\n+ */\n+ public static function getMorphAlias(string $className)\n+ {\n+ foreach (static::$morphMap as $alias => $morphedClass) {\n+ if ($morphedClass === $className) {\n+ return $alias;\n+ }\n+ }\n+\n+ return null;", - "comment_created_at": "2024-06-17T14:12:24+00:00", - "comment_author": "calebdw", - "comment_body": "@taylorotwell, I was mistaken---benchmarking shows that using `array_search` is a good deal faster than having to flip the entire array and then still search through the keys:\r\n\r\n![image](https://github.com/laravel/framework/assets/4176520/58880242-29ac-4a69-b8ad-3280b9190553)\r\n\r\n", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/framework-descriptive-configuration-keys.json b/_reviewers/framework-descriptive-configuration-keys.json new file mode 100644 index 0000000..ecaa92e --- /dev/null +++ b/_reviewers/framework-descriptive-configuration-keys.json @@ -0,0 +1,186 @@ +[ + { + "discussion_id": "2086556212", + "pr_number": 55724, + "pr_file": "src/Illuminate/Foundation/Http/MaintenanceModeBypassCookie.php", + "created_at": "2025-05-13T11:06:49+00:00", + "commented_code": "*/\n public static function create(string $key)\n {\n $expiresAt = Carbon::now()->addHours(12);\n $expiresAt = Carbon::now()->addHours(config('session.maintenance_bypass_cookie_lifetime'));", + "repo_full_name": "laravel/framework", + "discussion_comments": [ + { + "comment_id": "2086556212", + "repo_full_name": "laravel/framework", + "pr_number": 55724, + "pr_file": "src/Illuminate/Foundation/Http/MaintenanceModeBypassCookie.php", + "discussion_id": "2086556212", + "commented_code": "@@ -15,7 +15,7 @@ class MaintenanceModeBypassCookie\n */\n public static function create(string $key)\n {\n- $expiresAt = Carbon::now()->addHours(12);\n+ $expiresAt = Carbon::now()->addHours(config('session.maintenance_bypass_cookie_lifetime'));", + "comment_created_at": "2025-05-13T11:06:49+00:00", + "comment_author": "shaedrich", + "comment_body": "According to your config doc block,\r\n> number of minutes\r\n\r\nthis is now minutes that are added\r\n```suggestion\r\n $expiresAt = Carbon::now()->addMinutes(config('session.maintenance_bypass_cookie_lifetime'));\r\n```", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2086558463", + "pr_number": 55724, + "pr_file": "config/session.php", + "created_at": "2025-05-13T11:08:14+00:00", + "commented_code": "'expire_on_close' => env('SESSION_EXPIRE_ON_CLOSE', false),\n\n /*\n |--------------------------------------------------------------------------\n | Maintenance Bypass Cookie Lifetime\n |--------------------------------------------------------------------------\n |\n | Here you may specify the number of minutes the maintenance mode\n | bypass cookie should remain valid. This cookie is issued when a\n | user accesses the application using the secret bypass URL. Once\n | set, it allows temporary access to the application while it is\n | in maintenance mode. The default duration is 12 hours.\n |\n */\n\n 'maintenance_bypass_cookie_lifetime' => (int) env('SESSION_MAINTENANCE_BYPASS_COOKIE_LIFETIME', 7720),", + "repo_full_name": "laravel/framework", + "discussion_comments": [ + { + "comment_id": "2086558463", + "repo_full_name": "laravel/framework", + "pr_number": 55724, + "pr_file": "config/session.php", + "discussion_id": "2086558463", + "commented_code": "@@ -36,6 +36,21 @@\n \n 'expire_on_close' => env('SESSION_EXPIRE_ON_CLOSE', false),\n \n+ /*\n+ |--------------------------------------------------------------------------\n+ | Maintenance Bypass Cookie Lifetime\n+ |--------------------------------------------------------------------------\n+ |\n+ | Here you may specify the number of minutes the maintenance mode\n+ | bypass cookie should remain valid. This cookie is issued when a\n+ | user accesses the application using the secret bypass URL. Once\n+ | set, it allows temporary access to the application while it is\n+ | in maintenance mode. The default duration is 12 hours.\n+ |\n+ */\n+\n+ 'maintenance_bypass_cookie_lifetime' => (int) env('SESSION_MAINTENANCE_BYPASS_COOKIE_LIFETIME', 7720),", + "comment_created_at": "2025-05-13T11:08:14+00:00", + "comment_author": "shaedrich", + "comment_body": "Accoding to your PHPDoc block,\r\n> number of minutes\r\n\r\nbut 7720 minutes is not 12 hours\r\n```suggestion\r\n /*\r\n |--------------------------------------------------------------------------\r\n | Maintenance Bypass Cookie Lifetime\r\n |--------------------------------------------------------------------------\r\n |\r\n | Here you may specify the number of minutes the maintenance mode\r\n | bypass cookie should remain valid. This cookie is issued when a\r\n | user accesses the application using the secret bypass URL. Once\r\n | set, it allows temporary access to the application while it is\r\n | in maintenance mode. The default duration is 12 hours.\r\n |\r\n */\r\n\r\n 'maintenance_bypass_cookie_lifetime' => (int) env('SESSION_MAINTENANCE_BYPASS_COOKIE_LIFETIME', 720),\r\n```", + "pr_file_module": null + }, + { + "comment_id": "2086559648", + "repo_full_name": "laravel/framework", + "pr_number": 55724, + "pr_file": "config/session.php", + "discussion_id": "2086558463", + "commented_code": "@@ -36,6 +36,21 @@\n \n 'expire_on_close' => env('SESSION_EXPIRE_ON_CLOSE', false),\n \n+ /*\n+ |--------------------------------------------------------------------------\n+ | Maintenance Bypass Cookie Lifetime\n+ |--------------------------------------------------------------------------\n+ |\n+ | Here you may specify the number of minutes the maintenance mode\n+ | bypass cookie should remain valid. This cookie is issued when a\n+ | user accesses the application using the secret bypass URL. Once\n+ | set, it allows temporary access to the application while it is\n+ | in maintenance mode. The default duration is 12 hours.\n+ |\n+ */\n+\n+ 'maintenance_bypass_cookie_lifetime' => (int) env('SESSION_MAINTENANCE_BYPASS_COOKIE_LIFETIME', 7720),", + "comment_created_at": "2025-05-13T11:09:02+00:00", + "comment_author": "shaedrich", + "comment_body": "Maybe, to avoid confusion when using, the unit should be included in the key:\r\n```suggestion\r\n /*\r\n |--------------------------------------------------------------------------\r\n | Maintenance Bypass Cookie Lifetime\r\n |--------------------------------------------------------------------------\r\n |\r\n | Here you may specify the number of minutes the maintenance mode\r\n | bypass cookie should remain valid. This cookie is issued when a\r\n | user accesses the application using the secret bypass URL. Once\r\n | set, it allows temporary access to the application while it is\r\n | in maintenance mode. The default duration is 12 hours.\r\n |\r\n */\r\n\r\n 'maintenance_bypass_cookie_lifetime_minutes' => (int) env('SESSION_MAINTENANCE_BYPASS_COOKIE_LIFETIME_MINUTES', 720),\r\n```", + "pr_file_module": null + }, + { + "comment_id": "2086578428", + "repo_full_name": "laravel/framework", + "pr_number": 55724, + "pr_file": "config/session.php", + "discussion_id": "2086558463", + "commented_code": "@@ -36,6 +36,21 @@\n \n 'expire_on_close' => env('SESSION_EXPIRE_ON_CLOSE', false),\n \n+ /*\n+ |--------------------------------------------------------------------------\n+ | Maintenance Bypass Cookie Lifetime\n+ |--------------------------------------------------------------------------\n+ |\n+ | Here you may specify the number of minutes the maintenance mode\n+ | bypass cookie should remain valid. This cookie is issued when a\n+ | user accesses the application using the secret bypass URL. Once\n+ | set, it allows temporary access to the application while it is\n+ | in maintenance mode. The default duration is 12 hours.\n+ |\n+ */\n+\n+ 'maintenance_bypass_cookie_lifetime' => (int) env('SESSION_MAINTENANCE_BYPASS_COOKIE_LIFETIME', 7720),", + "comment_created_at": "2025-05-13T11:20:06+00:00", + "comment_author": "sajjadhossainshohag", + "comment_body": "@shaedrich This was a mistake, thanks.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1868980349", + "pr_number": 53749, + "pr_file": "config/mail.php", + "created_at": "2024-12-04T08:46:30+00:00", + "commented_code": "'mailers' => [\n\n 'smtp' => [\n 'scheme' => env('MAIL_MAILER', 'smtp'),", + "repo_full_name": "laravel/framework", + "discussion_comments": [ + { + "comment_id": "1868980349", + "repo_full_name": "laravel/framework", + "pr_number": 53749, + "pr_file": "config/mail.php", + "discussion_id": "1868980349", + "commented_code": "@@ -38,6 +38,7 @@\n 'mailers' => [\n \n 'smtp' => [\n+ 'scheme' => env('MAIL_MAILER', 'smtp'),", + "comment_created_at": "2024-12-04T08:46:30+00:00", + "comment_author": "crynobone", + "comment_body": "This is incorrect, possible values of `MAIL_MAILER` are \"smtp\", \"sendmail\", \"mailgun\", \"ses\", \"ses-v2\", \"postmark\", \"resend\", \"log\", \"array\", \"failover\" or \"roundrobin\"", + "pr_file_module": null + }, + { + "comment_id": "1869165206", + "repo_full_name": "laravel/framework", + "pr_number": 53749, + "pr_file": "config/mail.php", + "discussion_id": "1868980349", + "commented_code": "@@ -38,6 +38,7 @@\n 'mailers' => [\n \n 'smtp' => [\n+ 'scheme' => env('MAIL_MAILER', 'smtp'),", + "comment_created_at": "2024-12-04T10:29:31+00:00", + "comment_author": "danielrona", + "comment_body": "Yes, you are right this should be of course a new variable, my mistake I added our hotfix instead of the generic config option.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1869186104", + "pr_number": 53749, + "pr_file": "config/mail.php", + "created_at": "2024-12-04T10:38:39+00:00", + "commented_code": "'mailers' => [\n\n 'smtp' => [\n 'scheme' => env('MAIL_SCHEME', 'smtp'),", + "repo_full_name": "laravel/framework", + "discussion_comments": [ + { + "comment_id": "1869186104", + "repo_full_name": "laravel/framework", + "pr_number": 53749, + "pr_file": "config/mail.php", + "discussion_id": "1869186104", + "commented_code": "@@ -38,6 +38,7 @@\n 'mailers' => [\n \n 'smtp' => [\n+ 'scheme' => env('MAIL_SCHEME', 'smtp'),", + "comment_created_at": "2024-12-04T10:38:39+00:00", + "comment_author": "crynobone", + "comment_body": "This will make `scheme` default to `smtp` instead of relying on `port` value as indicated in #53585\r\n\r\nWith this changes everyone using the following configuration:\r\n\r\n```ini\r\nMAIL_ENCRYPTION=tls\r\nMAIL_PORT=465\r\n```\r\n\r\nWill resolved to `stmp` instead correctly using `smtps` and create more gotcha than what we correctly have with `MAIL_ENCRYPTION`.", + "pr_file_module": null + }, + { + "comment_id": "1869237308", + "repo_full_name": "laravel/framework", + "pr_number": 53749, + "pr_file": "config/mail.php", + "discussion_id": "1869186104", + "commented_code": "@@ -38,6 +38,7 @@\n 'mailers' => [\n \n 'smtp' => [\n+ 'scheme' => env('MAIL_SCHEME', 'smtp'),", + "comment_created_at": "2024-12-04T11:01:21+00:00", + "comment_author": "danielrona", + "comment_body": "Right now everyone who has configured ```MAIL_ENCRYPTION=null``` has a broken configuration.\r\n\r\nI also do not understand why we would be relying on a port to define a scheme that makes no sense.\r\n\r\nJust because a port can be smtp or smtps doesn't mean it needs to be, \r\nI can have port 25 be smpts or I can have port 587 be smtp this is totally up to the setup of the provider.\r\n\r\nYes the defaults would be smtp for 25 and smtps for 587 not to mention that 465 is a legacy default port for smtps nowadays it would/should be 587. \r\n\r\nSo right now this would still end up causing issues for most people and having a dedicated configuration parameter would resolve this.\r\n\r\nYou can setup a fresh laravel installation and it will fail due to not having the proper config entries.\r\n\r\n```env\r\nMAIL_MAILER=smtp\r\nMAIL_HOST=127.0.0.1\r\nMAIL_PORT=9925\r\nMAIL_USERNAME=null\r\nMAIL_PASSWORD=null\r\nMAIL_ENCRYPTION=null\r\nMAIL_FROM_ADDRESS=\"hello@example.com\"\r\nMAIL_FROM_NAME=\"${APP_NAME}\"\r\n``` \r\nTestMail is a empty view Mailabable\r\n\r\n```php\r\nMail::to('test@localhost')->send(new TestMail());\r\n```\r\nwill fail with \r\n`The \"\" scheme is not supported; supported schemes for mailer \"smtp\" are: \"smtp\", \"smtps\".`", + "pr_file_module": null + }, + { + "comment_id": "1869272592", + "repo_full_name": "laravel/framework", + "pr_number": 53749, + "pr_file": "config/mail.php", + "discussion_id": "1869186104", + "commented_code": "@@ -38,6 +38,7 @@\n 'mailers' => [\n \n 'smtp' => [\n+ 'scheme' => env('MAIL_SCHEME', 'smtp'),", + "comment_created_at": "2024-12-04T11:19:16+00:00", + "comment_author": "crynobone", + "comment_body": "> I also do not understand why we would be relying on a port to define a scheme that makes no sense.\r\n\r\nThis is a requirement by Symfony Mailer. `scheme` here is only used to comply with `Dsn` instance but the result is then passed to `EsmtpTransport` differently. \r\n\r\nhttps://github.com/symfony/mailer/blob/e4d358702fb66e4c8a2af08e90e7271a62de39cc/Transport/Smtp/EsmtpTransport.php#L56-L68\r\n\r\n> The \"\" scheme is not supported; supported schemes for mailer \"smtp\" are: \"smtp\", \"smtps\".\r\n\r\nThe fixed in https://github.com/laravel/framework/pull/53585 has already fixed this and before the release can be done you should downgrade to 7.1 as suggested.\r\n\r\nWe will look into removing `MAIL_ENCRYPTION` and streamlining the configuration for next major version if possible.\r\n", + "pr_file_module": null + }, + { + "comment_id": "1869424105", + "repo_full_name": "laravel/framework", + "pr_number": 53749, + "pr_file": "config/mail.php", + "discussion_id": "1869186104", + "commented_code": "@@ -38,6 +38,7 @@\n 'mailers' => [\n \n 'smtp' => [\n+ 'scheme' => env('MAIL_SCHEME', 'smtp'),", + "comment_created_at": "2024-12-04T12:42:14+00:00", + "comment_author": "danielrona", + "comment_body": "while #53585 might fix this for everyone using port 465 , this doesn't make much sense and it's still a breaking change because everyone with a port != 465 will have not ```smtps``` set on their configuration files while actually having ```MAIL_ENCRYPTION=tls```", + "pr_file_module": null + }, + { + "comment_id": "1869441169", + "repo_full_name": "laravel/framework", + "pr_number": 53749, + "pr_file": "config/mail.php", + "discussion_id": "1869186104", + "commented_code": "@@ -38,6 +38,7 @@\n 'mailers' => [\n \n 'smtp' => [\n+ 'scheme' => env('MAIL_SCHEME', 'smtp'),", + "comment_created_at": "2024-12-04T12:53:58+00:00", + "comment_author": "crynobone", + "comment_body": "https://github.com/symfony/mailer/blob/7.2/CHANGELOG.md#440\n\n`encryption` configuration was deprecated and removed in earlier `symfony/mailer` release. This probably being left out from `swiftmailer` era", + "pr_file_module": null + }, + { + "comment_id": "1869587190", + "repo_full_name": "laravel/framework", + "pr_number": 53749, + "pr_file": "config/mail.php", + "discussion_id": "1869186104", + "commented_code": "@@ -38,6 +38,7 @@\n 'mailers' => [\n \n 'smtp' => [\n+ 'scheme' => env('MAIL_SCHEME', 'smtp'),", + "comment_created_at": "2024-12-04T14:02:47+00:00", + "comment_author": "danielrona", + "comment_body": "I adjusted my pull dropped the config changes and this would solve the issue, until ```MAIL_ENCRYPTION``` is removed and the configuration file streamlined with the next major version.", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/framework-descriptive-configuration-keys.md b/_reviewers/framework-descriptive-configuration-keys.md index c5f4d5b..1368e4e 100644 --- a/_reviewers/framework-descriptive-configuration-keys.md +++ b/_reviewers/framework-descriptive-configuration-keys.md @@ -27,191 +27,3 @@ Use a more descriptive key that includes the unit: ``` This approach makes configuration self-documenting, reduces the need to consult documentation, and prevents errors from unit mismatches or incorrect assumptions about value types. When environment variables serve as configuration sources, maintain this same naming convention to ensure consistency across your application's configuration system. - - -[ - { - "discussion_id": "2086556212", - "pr_number": 55724, - "pr_file": "src/Illuminate/Foundation/Http/MaintenanceModeBypassCookie.php", - "created_at": "2025-05-13T11:06:49+00:00", - "commented_code": "*/\n public static function create(string $key)\n {\n $expiresAt = Carbon::now()->addHours(12);\n $expiresAt = Carbon::now()->addHours(config('session.maintenance_bypass_cookie_lifetime'));", - "repo_full_name": "laravel/framework", - "discussion_comments": [ - { - "comment_id": "2086556212", - "repo_full_name": "laravel/framework", - "pr_number": 55724, - "pr_file": "src/Illuminate/Foundation/Http/MaintenanceModeBypassCookie.php", - "discussion_id": "2086556212", - "commented_code": "@@ -15,7 +15,7 @@ class MaintenanceModeBypassCookie\n */\n public static function create(string $key)\n {\n- $expiresAt = Carbon::now()->addHours(12);\n+ $expiresAt = Carbon::now()->addHours(config('session.maintenance_bypass_cookie_lifetime'));", - "comment_created_at": "2025-05-13T11:06:49+00:00", - "comment_author": "shaedrich", - "comment_body": "According to your config doc block,\r\n> number of minutes\r\n\r\nthis is now minutes that are added\r\n```suggestion\r\n $expiresAt = Carbon::now()->addMinutes(config('session.maintenance_bypass_cookie_lifetime'));\r\n```", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2086558463", - "pr_number": 55724, - "pr_file": "config/session.php", - "created_at": "2025-05-13T11:08:14+00:00", - "commented_code": "'expire_on_close' => env('SESSION_EXPIRE_ON_CLOSE', false),\n\n /*\n |--------------------------------------------------------------------------\n | Maintenance Bypass Cookie Lifetime\n |--------------------------------------------------------------------------\n |\n | Here you may specify the number of minutes the maintenance mode\n | bypass cookie should remain valid. This cookie is issued when a\n | user accesses the application using the secret bypass URL. Once\n | set, it allows temporary access to the application while it is\n | in maintenance mode. The default duration is 12 hours.\n |\n */\n\n 'maintenance_bypass_cookie_lifetime' => (int) env('SESSION_MAINTENANCE_BYPASS_COOKIE_LIFETIME', 7720),", - "repo_full_name": "laravel/framework", - "discussion_comments": [ - { - "comment_id": "2086558463", - "repo_full_name": "laravel/framework", - "pr_number": 55724, - "pr_file": "config/session.php", - "discussion_id": "2086558463", - "commented_code": "@@ -36,6 +36,21 @@\n \n 'expire_on_close' => env('SESSION_EXPIRE_ON_CLOSE', false),\n \n+ /*\n+ |--------------------------------------------------------------------------\n+ | Maintenance Bypass Cookie Lifetime\n+ |--------------------------------------------------------------------------\n+ |\n+ | Here you may specify the number of minutes the maintenance mode\n+ | bypass cookie should remain valid. This cookie is issued when a\n+ | user accesses the application using the secret bypass URL. Once\n+ | set, it allows temporary access to the application while it is\n+ | in maintenance mode. The default duration is 12 hours.\n+ |\n+ */\n+\n+ 'maintenance_bypass_cookie_lifetime' => (int) env('SESSION_MAINTENANCE_BYPASS_COOKIE_LIFETIME', 7720),", - "comment_created_at": "2025-05-13T11:08:14+00:00", - "comment_author": "shaedrich", - "comment_body": "Accoding to your PHPDoc block,\r\n> number of minutes\r\n\r\nbut 7720 minutes is not 12 hours\r\n```suggestion\r\n /*\r\n |--------------------------------------------------------------------------\r\n | Maintenance Bypass Cookie Lifetime\r\n |--------------------------------------------------------------------------\r\n |\r\n | Here you may specify the number of minutes the maintenance mode\r\n | bypass cookie should remain valid. This cookie is issued when a\r\n | user accesses the application using the secret bypass URL. Once\r\n | set, it allows temporary access to the application while it is\r\n | in maintenance mode. The default duration is 12 hours.\r\n |\r\n */\r\n\r\n 'maintenance_bypass_cookie_lifetime' => (int) env('SESSION_MAINTENANCE_BYPASS_COOKIE_LIFETIME', 720),\r\n```", - "pr_file_module": null - }, - { - "comment_id": "2086559648", - "repo_full_name": "laravel/framework", - "pr_number": 55724, - "pr_file": "config/session.php", - "discussion_id": "2086558463", - "commented_code": "@@ -36,6 +36,21 @@\n \n 'expire_on_close' => env('SESSION_EXPIRE_ON_CLOSE', false),\n \n+ /*\n+ |--------------------------------------------------------------------------\n+ | Maintenance Bypass Cookie Lifetime\n+ |--------------------------------------------------------------------------\n+ |\n+ | Here you may specify the number of minutes the maintenance mode\n+ | bypass cookie should remain valid. This cookie is issued when a\n+ | user accesses the application using the secret bypass URL. Once\n+ | set, it allows temporary access to the application while it is\n+ | in maintenance mode. The default duration is 12 hours.\n+ |\n+ */\n+\n+ 'maintenance_bypass_cookie_lifetime' => (int) env('SESSION_MAINTENANCE_BYPASS_COOKIE_LIFETIME', 7720),", - "comment_created_at": "2025-05-13T11:09:02+00:00", - "comment_author": "shaedrich", - "comment_body": "Maybe, to avoid confusion when using, the unit should be included in the key:\r\n```suggestion\r\n /*\r\n |--------------------------------------------------------------------------\r\n | Maintenance Bypass Cookie Lifetime\r\n |--------------------------------------------------------------------------\r\n |\r\n | Here you may specify the number of minutes the maintenance mode\r\n | bypass cookie should remain valid. This cookie is issued when a\r\n | user accesses the application using the secret bypass URL. Once\r\n | set, it allows temporary access to the application while it is\r\n | in maintenance mode. The default duration is 12 hours.\r\n |\r\n */\r\n\r\n 'maintenance_bypass_cookie_lifetime_minutes' => (int) env('SESSION_MAINTENANCE_BYPASS_COOKIE_LIFETIME_MINUTES', 720),\r\n```", - "pr_file_module": null - }, - { - "comment_id": "2086578428", - "repo_full_name": "laravel/framework", - "pr_number": 55724, - "pr_file": "config/session.php", - "discussion_id": "2086558463", - "commented_code": "@@ -36,6 +36,21 @@\n \n 'expire_on_close' => env('SESSION_EXPIRE_ON_CLOSE', false),\n \n+ /*\n+ |--------------------------------------------------------------------------\n+ | Maintenance Bypass Cookie Lifetime\n+ |--------------------------------------------------------------------------\n+ |\n+ | Here you may specify the number of minutes the maintenance mode\n+ | bypass cookie should remain valid. This cookie is issued when a\n+ | user accesses the application using the secret bypass URL. Once\n+ | set, it allows temporary access to the application while it is\n+ | in maintenance mode. The default duration is 12 hours.\n+ |\n+ */\n+\n+ 'maintenance_bypass_cookie_lifetime' => (int) env('SESSION_MAINTENANCE_BYPASS_COOKIE_LIFETIME', 7720),", - "comment_created_at": "2025-05-13T11:20:06+00:00", - "comment_author": "sajjadhossainshohag", - "comment_body": "@shaedrich This was a mistake, thanks.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1868980349", - "pr_number": 53749, - "pr_file": "config/mail.php", - "created_at": "2024-12-04T08:46:30+00:00", - "commented_code": "'mailers' => [\n\n 'smtp' => [\n 'scheme' => env('MAIL_MAILER', 'smtp'),", - "repo_full_name": "laravel/framework", - "discussion_comments": [ - { - "comment_id": "1868980349", - "repo_full_name": "laravel/framework", - "pr_number": 53749, - "pr_file": "config/mail.php", - "discussion_id": "1868980349", - "commented_code": "@@ -38,6 +38,7 @@\n 'mailers' => [\n \n 'smtp' => [\n+ 'scheme' => env('MAIL_MAILER', 'smtp'),", - "comment_created_at": "2024-12-04T08:46:30+00:00", - "comment_author": "crynobone", - "comment_body": "This is incorrect, possible values of `MAIL_MAILER` are \"smtp\", \"sendmail\", \"mailgun\", \"ses\", \"ses-v2\", \"postmark\", \"resend\", \"log\", \"array\", \"failover\" or \"roundrobin\"", - "pr_file_module": null - }, - { - "comment_id": "1869165206", - "repo_full_name": "laravel/framework", - "pr_number": 53749, - "pr_file": "config/mail.php", - "discussion_id": "1868980349", - "commented_code": "@@ -38,6 +38,7 @@\n 'mailers' => [\n \n 'smtp' => [\n+ 'scheme' => env('MAIL_MAILER', 'smtp'),", - "comment_created_at": "2024-12-04T10:29:31+00:00", - "comment_author": "danielrona", - "comment_body": "Yes, you are right this should be of course a new variable, my mistake I added our hotfix instead of the generic config option.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1869186104", - "pr_number": 53749, - "pr_file": "config/mail.php", - "created_at": "2024-12-04T10:38:39+00:00", - "commented_code": "'mailers' => [\n\n 'smtp' => [\n 'scheme' => env('MAIL_SCHEME', 'smtp'),", - "repo_full_name": "laravel/framework", - "discussion_comments": [ - { - "comment_id": "1869186104", - "repo_full_name": "laravel/framework", - "pr_number": 53749, - "pr_file": "config/mail.php", - "discussion_id": "1869186104", - "commented_code": "@@ -38,6 +38,7 @@\n 'mailers' => [\n \n 'smtp' => [\n+ 'scheme' => env('MAIL_SCHEME', 'smtp'),", - "comment_created_at": "2024-12-04T10:38:39+00:00", - "comment_author": "crynobone", - "comment_body": "This will make `scheme` default to `smtp` instead of relying on `port` value as indicated in #53585\r\n\r\nWith this changes everyone using the following configuration:\r\n\r\n```ini\r\nMAIL_ENCRYPTION=tls\r\nMAIL_PORT=465\r\n```\r\n\r\nWill resolved to `stmp` instead correctly using `smtps` and create more gotcha than what we correctly have with `MAIL_ENCRYPTION`.", - "pr_file_module": null - }, - { - "comment_id": "1869237308", - "repo_full_name": "laravel/framework", - "pr_number": 53749, - "pr_file": "config/mail.php", - "discussion_id": "1869186104", - "commented_code": "@@ -38,6 +38,7 @@\n 'mailers' => [\n \n 'smtp' => [\n+ 'scheme' => env('MAIL_SCHEME', 'smtp'),", - "comment_created_at": "2024-12-04T11:01:21+00:00", - "comment_author": "danielrona", - "comment_body": "Right now everyone who has configured ```MAIL_ENCRYPTION=null``` has a broken configuration.\r\n\r\nI also do not understand why we would be relying on a port to define a scheme that makes no sense.\r\n\r\nJust because a port can be smtp or smtps doesn't mean it needs to be, \r\nI can have port 25 be smpts or I can have port 587 be smtp this is totally up to the setup of the provider.\r\n\r\nYes the defaults would be smtp for 25 and smtps for 587 not to mention that 465 is a legacy default port for smtps nowadays it would/should be 587. \r\n\r\nSo right now this would still end up causing issues for most people and having a dedicated configuration parameter would resolve this.\r\n\r\nYou can setup a fresh laravel installation and it will fail due to not having the proper config entries.\r\n\r\n```env\r\nMAIL_MAILER=smtp\r\nMAIL_HOST=127.0.0.1\r\nMAIL_PORT=9925\r\nMAIL_USERNAME=null\r\nMAIL_PASSWORD=null\r\nMAIL_ENCRYPTION=null\r\nMAIL_FROM_ADDRESS=\"hello@example.com\"\r\nMAIL_FROM_NAME=\"${APP_NAME}\"\r\n``` \r\nTestMail is a empty view Mailabable\r\n\r\n```php\r\nMail::to('test@localhost')->send(new TestMail());\r\n```\r\nwill fail with \r\n`The \"\" scheme is not supported; supported schemes for mailer \"smtp\" are: \"smtp\", \"smtps\".`", - "pr_file_module": null - }, - { - "comment_id": "1869272592", - "repo_full_name": "laravel/framework", - "pr_number": 53749, - "pr_file": "config/mail.php", - "discussion_id": "1869186104", - "commented_code": "@@ -38,6 +38,7 @@\n 'mailers' => [\n \n 'smtp' => [\n+ 'scheme' => env('MAIL_SCHEME', 'smtp'),", - "comment_created_at": "2024-12-04T11:19:16+00:00", - "comment_author": "crynobone", - "comment_body": "> I also do not understand why we would be relying on a port to define a scheme that makes no sense.\r\n\r\nThis is a requirement by Symfony Mailer. `scheme` here is only used to comply with `Dsn` instance but the result is then passed to `EsmtpTransport` differently. \r\n\r\nhttps://github.com/symfony/mailer/blob/e4d358702fb66e4c8a2af08e90e7271a62de39cc/Transport/Smtp/EsmtpTransport.php#L56-L68\r\n\r\n> The \"\" scheme is not supported; supported schemes for mailer \"smtp\" are: \"smtp\", \"smtps\".\r\n\r\nThe fixed in https://github.com/laravel/framework/pull/53585 has already fixed this and before the release can be done you should downgrade to 7.1 as suggested.\r\n\r\nWe will look into removing `MAIL_ENCRYPTION` and streamlining the configuration for next major version if possible.\r\n", - "pr_file_module": null - }, - { - "comment_id": "1869424105", - "repo_full_name": "laravel/framework", - "pr_number": 53749, - "pr_file": "config/mail.php", - "discussion_id": "1869186104", - "commented_code": "@@ -38,6 +38,7 @@\n 'mailers' => [\n \n 'smtp' => [\n+ 'scheme' => env('MAIL_SCHEME', 'smtp'),", - "comment_created_at": "2024-12-04T12:42:14+00:00", - "comment_author": "danielrona", - "comment_body": "while #53585 might fix this for everyone using port 465 , this doesn't make much sense and it's still a breaking change because everyone with a port != 465 will have not ```smtps``` set on their configuration files while actually having ```MAIL_ENCRYPTION=tls```", - "pr_file_module": null - }, - { - "comment_id": "1869441169", - "repo_full_name": "laravel/framework", - "pr_number": 53749, - "pr_file": "config/mail.php", - "discussion_id": "1869186104", - "commented_code": "@@ -38,6 +38,7 @@\n 'mailers' => [\n \n 'smtp' => [\n+ 'scheme' => env('MAIL_SCHEME', 'smtp'),", - "comment_created_at": "2024-12-04T12:53:58+00:00", - "comment_author": "crynobone", - "comment_body": "https://github.com/symfony/mailer/blob/7.2/CHANGELOG.md#440\n\n`encryption` configuration was deprecated and removed in earlier `symfony/mailer` release. This probably being left out from `swiftmailer` era", - "pr_file_module": null - }, - { - "comment_id": "1869587190", - "repo_full_name": "laravel/framework", - "pr_number": 53749, - "pr_file": "config/mail.php", - "discussion_id": "1869186104", - "commented_code": "@@ -38,6 +38,7 @@\n 'mailers' => [\n \n 'smtp' => [\n+ 'scheme' => env('MAIL_SCHEME', 'smtp'),", - "comment_created_at": "2024-12-04T14:02:47+00:00", - "comment_author": "danielrona", - "comment_body": "I adjusted my pull dropped the config changes and this would solve the issue, until ```MAIL_ENCRYPTION``` is removed and the configuration file streamlined with the next major version.", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/framework-design-flexible-apis.json b/_reviewers/framework-design-flexible-apis.json new file mode 100644 index 0000000..eb0aa0d --- /dev/null +++ b/_reviewers/framework-design-flexible-apis.json @@ -0,0 +1,196 @@ +[ + { + "discussion_id": "909053006", + "pr_number": 42976, + "pr_file": "src/Illuminate/Foundation/Console/Kernel.php", + "created_at": "2022-06-28T22:30:59+00:00", + "commented_code": "* Register a Closure based command with the application.\n *\n * @param string $signature\n * @param \\Closure $callback\n * @param \\Closure|callable $callback\n * @return \\Illuminate\\Foundation\\Console\\ClosureCommand\n */\n public function command($signature, Closure $callback)\n public function command($signature, $callback)\n {\n $command = new ClosureCommand($signature, $callback);\n $command = $callback instanceof Closure\n ? new ClosureCommand($signature, $callback)\n : new CallableCommand($signature, $callback);", + "repo_full_name": "laravel/framework", + "discussion_comments": [ + { + "comment_id": "909053006", + "repo_full_name": "laravel/framework", + "pr_number": 42976, + "pr_file": "src/Illuminate/Foundation/Console/Kernel.php", + "discussion_id": "909053006", + "commented_code": "@@ -185,12 +185,14 @@ protected function commands()\n * Register a Closure based command with the application.\n *\n * @param string $signature\n- * @param \\Closure $callback\n+ * @param \\Closure|callable $callback\n * @return \\Illuminate\\Foundation\\Console\\ClosureCommand\n */\n- public function command($signature, Closure $callback)\n+ public function command($signature, $callback)\n {\n- $command = new ClosureCommand($signature, $callback);\n+ $command = $callback instanceof Closure\n+ ? new ClosureCommand($signature, $callback)\n+ : new CallableCommand($signature, $callback);", + "comment_created_at": "2022-06-28T22:30:59+00:00", + "comment_author": "ziadoz", + "comment_body": "If you wrapped the callable in a closure here, would you be able to get rid of the `CallableCommand` class?\r\n\r\n```suggestion\r\n if (! $callback instanceof Closure && is_callable($callback)) {\r\n $callback = Closure::fromCallable($callback);\r\n }\r\n\r\n $command = new ClosureCommand($signature, $callback)\r\n```", + "pr_file_module": null + }, + { + "comment_id": "920243479", + "repo_full_name": "laravel/framework", + "pr_number": 42976, + "pr_file": "src/Illuminate/Foundation/Console/Kernel.php", + "discussion_id": "909053006", + "commented_code": "@@ -185,12 +185,14 @@ protected function commands()\n * Register a Closure based command with the application.\n *\n * @param string $signature\n- * @param \\Closure $callback\n+ * @param \\Closure|callable $callback\n * @return \\Illuminate\\Foundation\\Console\\ClosureCommand\n */\n- public function command($signature, Closure $callback)\n+ public function command($signature, $callback)\n {\n- $command = new ClosureCommand($signature, $callback);\n+ $command = $callback instanceof Closure\n+ ? new ClosureCommand($signature, $callback)\n+ : new CallableCommand($signature, $callback);", + "comment_created_at": "2022-07-13T15:50:36+00:00", + "comment_author": "chu121su12", + "comment_body": "I tried this logic and the code here\r\n\r\nhttps://github.com/laravel/framework/blob/1f77f3d586debda17be6a886b172e7336d65bb60/src/Illuminate/Foundation/Console/CallableCommand.php#L34-L77\r\n\r\nis quite difficult to be spliced around the section in the `Console::command` suggested.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1715170843", + "pr_number": 52466, + "pr_file": "src/Illuminate/Http/Client/Response.php", + "created_at": "2024-08-13T11:57:14+00:00", + "commented_code": "/**\n * Throw an exception if a server or client error occurred.\n *\n * @param \\Closure|null $callback\n * @return $this\n *\n * @throws \\Illuminate\\Http\\Client\\RequestException\n */\n public function throw()\n public function throw(?Closure $callback = null)", + "repo_full_name": "laravel/framework", + "discussion_comments": [ + { + "comment_id": "1715170843", + "repo_full_name": "laravel/framework", + "pr_number": 52466, + "pr_file": "src/Illuminate/Http/Client/Response.php", + "discussion_id": "1715170843", + "commented_code": "@@ -277,17 +278,16 @@ public function toException()\n /**\n * Throw an exception if a server or client error occurred.\n *\n+ * @param \\Closure|null $callback\n * @return $this\n *\n * @throws \\Illuminate\\Http\\Client\\RequestException\n */\n- public function throw()\n+ public function throw(?Closure $callback = null)", + "comment_created_at": "2024-08-13T11:57:14+00:00", + "comment_author": "rodrigopedra", + "comment_body": "I'd use `?callable` instead of `?Closure` to allow for invokable objects, and other callable (such as denoted by the array syntax).\r\n\r\n```php\r\npublic function throw(?callable $callback = null)\r\n```\r\n\r\nP.S.: don't forget to update the docblock", + "pr_file_module": null + }, + { + "comment_id": "1715219915", + "repo_full_name": "laravel/framework", + "pr_number": 52466, + "pr_file": "src/Illuminate/Http/Client/Response.php", + "discussion_id": "1715170843", + "commented_code": "@@ -277,17 +278,16 @@ public function toException()\n /**\n * Throw an exception if a server or client error occurred.\n *\n+ * @param \\Closure|null $callback\n * @return $this\n *\n * @throws \\Illuminate\\Http\\Client\\RequestException\n */\n- public function throw()\n+ public function throw(?Closure $callback = null)", + "comment_created_at": "2024-08-13T12:36:10+00:00", + "comment_author": "alirezadp10", + "comment_body": "Thanks for your help.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1997723700", + "pr_number": 55033, + "pr_file": "src/Illuminate/Validation/Rules/Date.php", + "created_at": "2025-03-16T21:31:20+00:00", + "commented_code": "*/\n public function format(string $format): static\n {\n array_shift($this->constraints);", + "repo_full_name": "laravel/framework", + "discussion_comments": [ + { + "comment_id": "1997723700", + "repo_full_name": "laravel/framework", + "pr_number": 55033, + "pr_file": "src/Illuminate/Validation/Rules/Date.php", + "discussion_id": "1997723700", + "commented_code": "@@ -22,6 +22,8 @@ class Date implements Stringable\n */\n public function format(string $format): static\n {\n+ array_shift($this->constraints);", + "comment_created_at": "2025-03-16T21:31:20+00:00", + "comment_author": "AndrewMast", + "comment_body": "I propose we add a `$format` property (`protected ?string $format = null;`) and make the `format` method simply do:\r\n\r\n```php\r\n/**\r\n * Ensure the date has the given format.\r\n */\r\npublic function format(string $format): static\r\n{\r\n $this->format = $format;\r\n\r\n return $this;\r\n}\r\n```\r\n\r\nThen, in the `__toString` method, we can simply do:\r\n```php\r\n/**\r\n * Convert the rule to a validation string.\r\n */\r\npublic function __toString(): string\r\n{\r\n return implode('|', [\r\n $this->format === null ? 'date' : 'date_format:'.$this->format,\r\n ...$this->constraints,\r\n ]);\r\n}\r\n```\r\n\r\nAnd in `formatDate()`:\r\n```php\r\n/**\r\n * Format the date for the validation rule.\r\n */\r\nprotected function formatDate(DateTimeInterface|string $date): string\r\n{\r\n return $date instanceof DateTimeInterface\r\n ? $date->format($this->format ?? 'Y-m-d')\r\n : $date;\r\n}\r\n```\r\n\r\nWe would also need to make the `$constraints` property start out empty:\r\n```php\r\nprotected ?string $format = null;\r\n\r\nprotected array $constraints = [];\r\n```\r\n\r\nOne downside to this approach is this would require `format` to be called before `after()` or `before()`, but the only way I can think of to solve this would be to include the DateTime instances in the `$constraints` array and only format them in the `__toString()` method, but that would require a rather large rewrite of this class and wouldn't seem very straight forward.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1860845192", + "pr_number": 53663, + "pr_file": "src/Illuminate/Http/Client/Factory.php", + "created_at": "2024-11-27T15:16:42+00:00", + "commented_code": "return;\n }\n\n return $callback instanceof Closure || $callback instanceof ResponseSequence\n ? $callback($request, $options)\n : $callback;\n if (is_int($callback) && $callback >= 100 && $callback < 600) {\n return static::response(status: $callback);", + "repo_full_name": "laravel/framework", + "discussion_comments": [ + { + "comment_id": "1860845192", + "repo_full_name": "laravel/framework", + "pr_number": 53663, + "pr_file": "src/Illuminate/Http/Client/Factory.php", + "discussion_id": "1860845192", + "commented_code": "@@ -267,9 +267,19 @@ public function stubUrl($url, $callback)\n return;\n }\n \n- return $callback instanceof Closure || $callback instanceof ResponseSequence\n- ? $callback($request, $options)\n- : $callback;\n+ if (is_int($callback) && $callback >= 100 && $callback < 600) {\n+ return static::response(status: $callback);", + "comment_created_at": "2024-11-27T15:16:42+00:00", + "comment_author": "andrey-helldar", + "comment_body": "Wow. I took another look at the code and was very surprised!\r\n\r\n@jasonmccreary, okay, let's say `200` is the status code and `'204'` is the response body.\r\n\r\nBut what is the difference between, for example, `500` and `600`? Here `500` is the **status code**, and `600` is the **response body** 😀\r\n\r\n@taylorotwell, you'll be interested too (I wrote my thoughts above, and here's an addendum).\r\n\r\n```php\r\nHttp::fake([\r\n 'google.com' => 'Hello World', // no problem\r\n 'github.com' => ['foo' => 'bar'], // no problem\r\n\r\n 'bar.laravel.com' => '204', // it's a response body with status code 200\r\n\r\n 'foo.laravel.com' => 99, // it's a response body with status code 200\r\n 'foo.laravel.com' => 100, // it's a status code 100\r\n 'foo.laravel.com' => 204, // it's a status code 204\r\n 'foo.laravel.com' => 204.99, // it's a response body with status code 200\r\n 'foo.laravel.com' => 599, // it's a status code 599\r\n 'foo.laravel.com' => 600, // it's a response body with status code 200\r\n]);\r\n```", + "pr_file_module": null + }, + { + "comment_id": "1860904761", + "repo_full_name": "laravel/framework", + "pr_number": 53663, + "pr_file": "src/Illuminate/Http/Client/Factory.php", + "discussion_id": "1860845192", + "commented_code": "@@ -267,9 +267,19 @@ public function stubUrl($url, $callback)\n return;\n }\n \n- return $callback instanceof Closure || $callback instanceof ResponseSequence\n- ? $callback($request, $options)\n- : $callback;\n+ if (is_int($callback) && $callback >= 100 && $callback < 600) {\n+ return static::response(status: $callback);", + "comment_created_at": "2024-11-27T15:52:20+00:00", + "comment_author": "andrey-helldar", + "comment_body": "My suggestion:\r\n\r\n```php\r\npublic function stubUrl($url, $callback)\r\n{\r\n return $this->fake(function ($request, $options) use ($url, $callback) {\r\n if (! Str::is(Str::start($url, '*'), $request->url())) {\r\n return;\r\n }\r\n\r\n if ($callback instanceof Closure || $callback instanceof ResponseSequence) {\r\n return $callback($request, $options);\r\n }\r\n\r\n if (is_numeric($callback) || is_string($callback) || is_array($callback)) { // here\r\n return static::response($callback);\r\n }\r\n\r\n return $callback;\r\n });\r\n}\r\n```", + "pr_file_module": null + }, + { + "comment_id": "1860983198", + "repo_full_name": "laravel/framework", + "pr_number": 53663, + "pr_file": "src/Illuminate/Http/Client/Factory.php", + "discussion_id": "1860845192", + "commented_code": "@@ -267,9 +267,19 @@ public function stubUrl($url, $callback)\n return;\n }\n \n- return $callback instanceof Closure || $callback instanceof ResponseSequence\n- ? $callback($request, $options)\n- : $callback;\n+ if (is_int($callback) && $callback >= 100 && $callback < 600) {\n+ return static::response(status: $callback);", + "comment_created_at": "2024-11-27T16:46:20+00:00", + "comment_author": "shaedrich", + "comment_body": "What about just wrapping the callback? This would be following the original train of thought of this PR:\r\n```diff\r\npublic function stubUrl($url, $callback)\r\n{\r\n return $this->fake(function ($request, $options) use ($url, $callback) {\r\n if (! Str::is(Str::start($url, '*'), $request->url())) {\r\n return;\r\n }\r\n\r\n+ if (is_numeric($callback) || is_string($callback) || is_array($callback) {\r\n+ $callback = static::response($callback);\r\n+ }\r\n\r\n if ($callback instanceof Closure || $callback instanceof ResponseSequence) {\r\n return $callback($request, $options);\r\n }\r\n-\r\n- if (is_numeric($callback) || is_string($callback) || is_array($callback)) { // here\r\n- return static::response($callback);\r\n- }\r\n\r\n return $callback;\r\n });\r\n}\r\n```", + "pr_file_module": null + }, + { + "comment_id": "1861197986", + "repo_full_name": "laravel/framework", + "pr_number": 53663, + "pr_file": "src/Illuminate/Http/Client/Factory.php", + "discussion_id": "1860845192", + "commented_code": "@@ -267,9 +267,19 @@ public function stubUrl($url, $callback)\n return;\n }\n \n- return $callback instanceof Closure || $callback instanceof ResponseSequence\n- ? $callback($request, $options)\n- : $callback;\n+ if (is_int($callback) && $callback >= 100 && $callback < 600) {\n+ return static::response(status: $callback);", + "comment_created_at": "2024-11-27T20:09:55+00:00", + "comment_author": "andrey-helldar", + "comment_body": "Yeah, you can do that too :)", + "pr_file_module": null + }, + { + "comment_id": "1861200387", + "repo_full_name": "laravel/framework", + "pr_number": 53663, + "pr_file": "src/Illuminate/Http/Client/Factory.php", + "discussion_id": "1860845192", + "commented_code": "@@ -267,9 +267,19 @@ public function stubUrl($url, $callback)\n return;\n }\n \n- return $callback instanceof Closure || $callback instanceof ResponseSequence\n- ? $callback($request, $options)\n- : $callback;\n+ if (is_int($callback) && $callback >= 100 && $callback < 600) {\n+ return static::response(status: $callback);", + "comment_created_at": "2024-11-27T20:12:51+00:00", + "comment_author": "shaedrich", + "comment_body": "Oh, I misread `ResponseSequence` as `Response` 🤦🏻‍♂️ Your suggestion makes more sense then 😅 ", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1941509492", + "pr_number": 54464, + "pr_file": "src/Illuminate/Collections/Arr.php", + "created_at": "2025-02-04T16:28:36+00:00", + "commented_code": "* Convert the array into a query string.\n *\n * @param array $array\n * @param int-mask-of $encodingType (optional) Query encoding type.\n * @return string\n */\n public static function query($array)\n public static function query($array, $encodingType = PHP_QUERY_RFC3986)\n {\n return http_build_query($array, '', '&', PHP_QUERY_RFC3986);\n return http_build_query($array, '', '&', $encodingType);", + "repo_full_name": "laravel/framework", + "discussion_comments": [ + { + "comment_id": "1941509492", + "repo_full_name": "laravel/framework", + "pr_number": 54464, + "pr_file": "src/Illuminate/Collections/Arr.php", + "discussion_id": "1941509492", + "commented_code": "@@ -702,11 +702,12 @@ public static function pull(&$array, $key, $default = null)\n * Convert the array into a query string.\n *\n * @param array $array\n+ * @param int-mask-of $encodingType (optional) Query encoding type.\n * @return string\n */\n- public static function query($array)\n+ public static function query($array, $encodingType = PHP_QUERY_RFC3986)\n {\n- return http_build_query($array, '', '&', PHP_QUERY_RFC3986);\n+ return http_build_query($array, '', '&', $encodingType);", + "comment_created_at": "2025-02-04T16:28:36+00:00", + "comment_author": "shaedrich", + "comment_body": "> I've added all the changes except your enum idea because I merged one in early by accident and I can't merge that in now due to it being classed as outdated. Could you suggest that change again and I'll pull that in\r\n\r\nRe-adding the enum suggestion from https://github.com/laravel/framework/pull/54464#discussion_r1941450711 as requested per https://github.com/laravel/framework/pull/54464#issuecomment-2634468189\r\n```suggestion\r\n * @param int-mask-of|HttpQueryEncoding $encodingType (optional) Query encoding type.\r\n * @return string\r\n */\r\n public static function query($array, $encodingType = HttpQueryEncoding::Rfc3986)\r\n {\r\n return http_build_query($array, '', '&', enum_value($encodingType))\r\n```", + "pr_file_module": null + }, + { + "comment_id": "1941516190", + "repo_full_name": "laravel/framework", + "pr_number": 54464, + "pr_file": "src/Illuminate/Collections/Arr.php", + "discussion_id": "1941509492", + "commented_code": "@@ -702,11 +702,12 @@ public static function pull(&$array, $key, $default = null)\n * Convert the array into a query string.\n *\n * @param array $array\n+ * @param int-mask-of $encodingType (optional) Query encoding type.\n * @return string\n */\n- public static function query($array)\n+ public static function query($array, $encodingType = PHP_QUERY_RFC3986)\n {\n- return http_build_query($array, '', '&', PHP_QUERY_RFC3986);\n+ return http_build_query($array, '', '&', $encodingType);", + "comment_created_at": "2025-02-04T16:32:20+00:00", + "comment_author": "shaedrich", + "comment_body": "Please keep in mind, that you have to manually\r\n* import `enum_value` via `use function Illuminate\\Support\\enum_value;`\r\n* create said enum as\r\n ```php\r\n constraints);", - "repo_full_name": "laravel/framework", - "discussion_comments": [ - { - "comment_id": "1997723700", - "repo_full_name": "laravel/framework", - "pr_number": 55033, - "pr_file": "src/Illuminate/Validation/Rules/Date.php", - "discussion_id": "1997723700", - "commented_code": "@@ -22,6 +22,8 @@ class Date implements Stringable\n */\n public function format(string $format): static\n {\n+ array_shift($this->constraints);", - "comment_created_at": "2025-03-16T21:31:20+00:00", - "comment_author": "AndrewMast", - "comment_body": "I propose we add a `$format` property (`protected ?string $format = null;`) and make the `format` method simply do:\r\n\r\n```php\r\n/**\r\n * Ensure the date has the given format.\r\n */\r\npublic function format(string $format): static\r\n{\r\n $this->format = $format;\r\n\r\n return $this;\r\n}\r\n```\r\n\r\nThen, in the `__toString` method, we can simply do:\r\n```php\r\n/**\r\n * Convert the rule to a validation string.\r\n */\r\npublic function __toString(): string\r\n{\r\n return implode('|', [\r\n $this->format === null ? 'date' : 'date_format:'.$this->format,\r\n ...$this->constraints,\r\n ]);\r\n}\r\n```\r\n\r\nAnd in `formatDate()`:\r\n```php\r\n/**\r\n * Format the date for the validation rule.\r\n */\r\nprotected function formatDate(DateTimeInterface|string $date): string\r\n{\r\n return $date instanceof DateTimeInterface\r\n ? $date->format($this->format ?? 'Y-m-d')\r\n : $date;\r\n}\r\n```\r\n\r\nWe would also need to make the `$constraints` property start out empty:\r\n```php\r\nprotected ?string $format = null;\r\n\r\nprotected array $constraints = [];\r\n```\r\n\r\nOne downside to this approach is this would require `format` to be called before `after()` or `before()`, but the only way I can think of to solve this would be to include the DateTime instances in the `$constraints` array and only format them in the `__toString()` method, but that would require a rather large rewrite of this class and wouldn't seem very straight forward.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1860845192", - "pr_number": 53663, - "pr_file": "src/Illuminate/Http/Client/Factory.php", - "created_at": "2024-11-27T15:16:42+00:00", - "commented_code": "return;\n }\n\n return $callback instanceof Closure || $callback instanceof ResponseSequence\n ? $callback($request, $options)\n : $callback;\n if (is_int($callback) && $callback >= 100 && $callback < 600) {\n return static::response(status: $callback);", - "repo_full_name": "laravel/framework", - "discussion_comments": [ - { - "comment_id": "1860845192", - "repo_full_name": "laravel/framework", - "pr_number": 53663, - "pr_file": "src/Illuminate/Http/Client/Factory.php", - "discussion_id": "1860845192", - "commented_code": "@@ -267,9 +267,19 @@ public function stubUrl($url, $callback)\n return;\n }\n \n- return $callback instanceof Closure || $callback instanceof ResponseSequence\n- ? $callback($request, $options)\n- : $callback;\n+ if (is_int($callback) && $callback >= 100 && $callback < 600) {\n+ return static::response(status: $callback);", - "comment_created_at": "2024-11-27T15:16:42+00:00", - "comment_author": "andrey-helldar", - "comment_body": "Wow. I took another look at the code and was very surprised!\r\n\r\n@jasonmccreary, okay, let's say `200` is the status code and `'204'` is the response body.\r\n\r\nBut what is the difference between, for example, `500` and `600`? Here `500` is the **status code**, and `600` is the **response body** \ud83d\ude00\r\n\r\n@taylorotwell, you'll be interested too (I wrote my thoughts above, and here's an addendum).\r\n\r\n```php\r\nHttp::fake([\r\n 'google.com' => 'Hello World', // no problem\r\n 'github.com' => ['foo' => 'bar'], // no problem\r\n\r\n 'bar.laravel.com' => '204', // it's a response body with status code 200\r\n\r\n 'foo.laravel.com' => 99, // it's a response body with status code 200\r\n 'foo.laravel.com' => 100, // it's a status code 100\r\n 'foo.laravel.com' => 204, // it's a status code 204\r\n 'foo.laravel.com' => 204.99, // it's a response body with status code 200\r\n 'foo.laravel.com' => 599, // it's a status code 599\r\n 'foo.laravel.com' => 600, // it's a response body with status code 200\r\n]);\r\n```", - "pr_file_module": null - }, - { - "comment_id": "1860904761", - "repo_full_name": "laravel/framework", - "pr_number": 53663, - "pr_file": "src/Illuminate/Http/Client/Factory.php", - "discussion_id": "1860845192", - "commented_code": "@@ -267,9 +267,19 @@ public function stubUrl($url, $callback)\n return;\n }\n \n- return $callback instanceof Closure || $callback instanceof ResponseSequence\n- ? $callback($request, $options)\n- : $callback;\n+ if (is_int($callback) && $callback >= 100 && $callback < 600) {\n+ return static::response(status: $callback);", - "comment_created_at": "2024-11-27T15:52:20+00:00", - "comment_author": "andrey-helldar", - "comment_body": "My suggestion:\r\n\r\n```php\r\npublic function stubUrl($url, $callback)\r\n{\r\n return $this->fake(function ($request, $options) use ($url, $callback) {\r\n if (! Str::is(Str::start($url, '*'), $request->url())) {\r\n return;\r\n }\r\n\r\n if ($callback instanceof Closure || $callback instanceof ResponseSequence) {\r\n return $callback($request, $options);\r\n }\r\n\r\n if (is_numeric($callback) || is_string($callback) || is_array($callback)) { // here\r\n return static::response($callback);\r\n }\r\n\r\n return $callback;\r\n });\r\n}\r\n```", - "pr_file_module": null - }, - { - "comment_id": "1860983198", - "repo_full_name": "laravel/framework", - "pr_number": 53663, - "pr_file": "src/Illuminate/Http/Client/Factory.php", - "discussion_id": "1860845192", - "commented_code": "@@ -267,9 +267,19 @@ public function stubUrl($url, $callback)\n return;\n }\n \n- return $callback instanceof Closure || $callback instanceof ResponseSequence\n- ? $callback($request, $options)\n- : $callback;\n+ if (is_int($callback) && $callback >= 100 && $callback < 600) {\n+ return static::response(status: $callback);", - "comment_created_at": "2024-11-27T16:46:20+00:00", - "comment_author": "shaedrich", - "comment_body": "What about just wrapping the callback? This would be following the original train of thought of this PR:\r\n```diff\r\npublic function stubUrl($url, $callback)\r\n{\r\n return $this->fake(function ($request, $options) use ($url, $callback) {\r\n if (! Str::is(Str::start($url, '*'), $request->url())) {\r\n return;\r\n }\r\n\r\n+ if (is_numeric($callback) || is_string($callback) || is_array($callback) {\r\n+ $callback = static::response($callback);\r\n+ }\r\n\r\n if ($callback instanceof Closure || $callback instanceof ResponseSequence) {\r\n return $callback($request, $options);\r\n }\r\n-\r\n- if (is_numeric($callback) || is_string($callback) || is_array($callback)) { // here\r\n- return static::response($callback);\r\n- }\r\n\r\n return $callback;\r\n });\r\n}\r\n```", - "pr_file_module": null - }, - { - "comment_id": "1861197986", - "repo_full_name": "laravel/framework", - "pr_number": 53663, - "pr_file": "src/Illuminate/Http/Client/Factory.php", - "discussion_id": "1860845192", - "commented_code": "@@ -267,9 +267,19 @@ public function stubUrl($url, $callback)\n return;\n }\n \n- return $callback instanceof Closure || $callback instanceof ResponseSequence\n- ? $callback($request, $options)\n- : $callback;\n+ if (is_int($callback) && $callback >= 100 && $callback < 600) {\n+ return static::response(status: $callback);", - "comment_created_at": "2024-11-27T20:09:55+00:00", - "comment_author": "andrey-helldar", - "comment_body": "Yeah, you can do that too :)", - "pr_file_module": null - }, - { - "comment_id": "1861200387", - "repo_full_name": "laravel/framework", - "pr_number": 53663, - "pr_file": "src/Illuminate/Http/Client/Factory.php", - "discussion_id": "1860845192", - "commented_code": "@@ -267,9 +267,19 @@ public function stubUrl($url, $callback)\n return;\n }\n \n- return $callback instanceof Closure || $callback instanceof ResponseSequence\n- ? $callback($request, $options)\n- : $callback;\n+ if (is_int($callback) && $callback >= 100 && $callback < 600) {\n+ return static::response(status: $callback);", - "comment_created_at": "2024-11-27T20:12:51+00:00", - "comment_author": "shaedrich", - "comment_body": "Oh, I misread `ResponseSequence` as `Response` \ud83e\udd26\ud83c\udffb\u200d\u2642\ufe0f Your suggestion makes more sense then \ud83d\ude05 ", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1941509492", - "pr_number": 54464, - "pr_file": "src/Illuminate/Collections/Arr.php", - "created_at": "2025-02-04T16:28:36+00:00", - "commented_code": "* Convert the array into a query string.\n *\n * @param array $array\n * @param int-mask-of $encodingType (optional) Query encoding type.\n * @return string\n */\n public static function query($array)\n public static function query($array, $encodingType = PHP_QUERY_RFC3986)\n {\n return http_build_query($array, '', '&', PHP_QUERY_RFC3986);\n return http_build_query($array, '', '&', $encodingType);", - "repo_full_name": "laravel/framework", - "discussion_comments": [ - { - "comment_id": "1941509492", - "repo_full_name": "laravel/framework", - "pr_number": 54464, - "pr_file": "src/Illuminate/Collections/Arr.php", - "discussion_id": "1941509492", - "commented_code": "@@ -702,11 +702,12 @@ public static function pull(&$array, $key, $default = null)\n * Convert the array into a query string.\n *\n * @param array $array\n+ * @param int-mask-of $encodingType (optional) Query encoding type.\n * @return string\n */\n- public static function query($array)\n+ public static function query($array, $encodingType = PHP_QUERY_RFC3986)\n {\n- return http_build_query($array, '', '&', PHP_QUERY_RFC3986);\n+ return http_build_query($array, '', '&', $encodingType);", - "comment_created_at": "2025-02-04T16:28:36+00:00", - "comment_author": "shaedrich", - "comment_body": "> I've added all the changes except your enum idea because I merged one in early by accident and I can't merge that in now due to it being classed as outdated. Could you suggest that change again and I'll pull that in\r\n\r\nRe-adding the enum suggestion from https://github.com/laravel/framework/pull/54464#discussion_r1941450711 as requested per https://github.com/laravel/framework/pull/54464#issuecomment-2634468189\r\n```suggestion\r\n * @param int-mask-of|HttpQueryEncoding $encodingType (optional) Query encoding type.\r\n * @return string\r\n */\r\n public static function query($array, $encodingType = HttpQueryEncoding::Rfc3986)\r\n {\r\n return http_build_query($array, '', '&', enum_value($encodingType))\r\n```", - "pr_file_module": null - }, - { - "comment_id": "1941516190", - "repo_full_name": "laravel/framework", - "pr_number": 54464, - "pr_file": "src/Illuminate/Collections/Arr.php", - "discussion_id": "1941509492", - "commented_code": "@@ -702,11 +702,12 @@ public static function pull(&$array, $key, $default = null)\n * Convert the array into a query string.\n *\n * @param array $array\n+ * @param int-mask-of $encodingType (optional) Query encoding type.\n * @return string\n */\n- public static function query($array)\n+ public static function query($array, $encodingType = PHP_QUERY_RFC3986)\n {\n- return http_build_query($array, '', '&', PHP_QUERY_RFC3986);\n+ return http_build_query($array, '', '&', $encodingType);", - "comment_created_at": "2025-02-04T16:32:20+00:00", - "comment_author": "shaedrich", - "comment_body": "Please keep in mind, that you have to manually\r\n* import `enum_value` via `use function Illuminate\\Support\\enum_value;`\r\n* create said enum as\r\n ```php\r\n wrap($column)`. This also affects the other grammars.", + "pr_file_module": null + }, + { + "comment_id": "1724361191", + "repo_full_name": "laravel/framework", + "pr_number": 52535, + "pr_file": "src/Illuminate/Database/Query/Grammars/MySqlGrammar.php", + "discussion_id": "1724014845", + "commented_code": "@@ -524,4 +524,19 @@ protected function wrapJsonBooleanSelector($value)\n \n return 'json_extract('.$field.$path.')';\n }\n+\n+ /**\n+ * Apply custom ordering to a query based on a priority array.\n+ *\n+ * @param $column\n+ * @param array $priority\n+ * @param $direction\n+ * @return string\n+ */\n+ public function orderByPriority($column, array $priority, $direction = 'asc')\n+ {\n+ $placeholders = implode(',', array_fill(0, count($priority), '?'));\n+\n+ return \"FIELD($column, $placeholders) $direction\";", + "comment_created_at": "2024-08-21T04:21:26+00:00", + "comment_author": "Mushood", + "comment_body": "I have applied the wrap function on all grammars and updated the tests accordingly.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1898688984", + "pr_number": 54025, + "pr_file": "src/Illuminate/Database/Schema/Grammars/MySqlGrammar.php", + "created_at": "2024-12-27T19:45:49+00:00", + "commented_code": "*/\n protected function compileKey(Blueprint $blueprint, Fluent $command, $type)\n {\n return sprintf('alter table %s add %s %s%s(%s)',\n $columns = collect($command->columns)->map(function ($column) {\n // Check if the column contains a functional expression\n // and return as-is, otherwise wrap\n return preg_match('/\\(.+\\)/', $column) ? $column : $this->wrap($column);\n })->implode(', ');\n\n return sprintf(\n 'alter table %s add %s %s%s(%s)',\n $this->wrapTable($blueprint),\n $type,\n $this->wrap($command->index),\n $command->algorithm ? ' using '.$command->algorithm : '',\n $this->columnize($command->columns)\n $command->algorithm ? ' using ' . $command->algorithm : '',\n $columns\n );\n }", + "repo_full_name": "laravel/framework", + "discussion_comments": [ + { + "comment_id": "1898688984", + "repo_full_name": "laravel/framework", + "pr_number": 54025, + "pr_file": "src/Illuminate/Database/Schema/Grammars/MySqlGrammar.php", + "discussion_id": "1898688984", + "commented_code": "@@ -492,12 +492,19 @@ public function compileSpatialIndex(Blueprint $blueprint, Fluent $command)\n */\n protected function compileKey(Blueprint $blueprint, Fluent $command, $type)\n {\n- return sprintf('alter table %s add %s %s%s(%s)',\n+ $columns = collect($command->columns)->map(function ($column) {\n+ // Check if the column contains a functional expression\n+ // and return as-is, otherwise wrap\n+ return preg_match('/\\(.+\\)/', $column) ? $column : $this->wrap($column);\n+ })->implode(', ');\n+\n+ return sprintf(\n+ 'alter table %s add %s %s%s(%s)',\n $this->wrapTable($blueprint),\n $type,\n $this->wrap($command->index),\n- $command->algorithm ? ' using '.$command->algorithm : '',\n- $this->columnize($command->columns)\n+ $command->algorithm ? ' using ' . $command->algorithm : '',\n+ $columns\n );\n }\n ", + "comment_created_at": "2024-12-27T19:45:49+00:00", + "comment_author": "shaedrich", + "comment_body": "One possible solution for https://github.com/laravel/framework/pull/54025#discussion_r1898460480\r\n```suggestion\r\n\r\n protected static function isFunctionalExpression(string $column): bool\r\n {\r\n return preg_match('/\\(.+\\)/', $column);\r\n }\r\n```", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1898689872", + "pr_number": 54025, + "pr_file": "src/Illuminate/Database/Schema/Grammars/MySqlGrammar.php", + "created_at": "2024-12-27T19:48:17+00:00", + "commented_code": "*/\n protected function compileKey(Blueprint $blueprint, Fluent $command, $type)\n {\n return sprintf('alter table %s add %s %s%s(%s)',\n $columns = collect($command->columns)->map(function ($column) {\n // Check if the column contains a functional expression\n // and return as-is, otherwise wrap\n return preg_match('/\\(.+\\)/', $column) ? $column : $this->wrap($column);", + "repo_full_name": "laravel/framework", + "discussion_comments": [ + { + "comment_id": "1898689872", + "repo_full_name": "laravel/framework", + "pr_number": 54025, + "pr_file": "src/Illuminate/Database/Schema/Grammars/MySqlGrammar.php", + "discussion_id": "1898689872", + "commented_code": "@@ -492,12 +492,19 @@ public function compileSpatialIndex(Blueprint $blueprint, Fluent $command)\n */\n protected function compileKey(Blueprint $blueprint, Fluent $command, $type)\n {\n- return sprintf('alter table %s add %s %s%s(%s)',\n+ $columns = collect($command->columns)->map(function ($column) {\n+ // Check if the column contains a functional expression\n+ // and return as-is, otherwise wrap\n+ return preg_match('/\\(.+\\)/', $column) ? $column : $this->wrap($column);", + "comment_created_at": "2024-12-27T19:48:17+00:00", + "comment_author": "shaedrich", + "comment_body": "https://github.com/laravel/framework/pull/54025#discussion_r1898688984 can be applied here, then:\r\n```suggestion\r\n return self::isFunctionalExpression($column) ? $column : $this->wrap($column);\r\n```", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1898691810", + "pr_number": 54025, + "pr_file": "src/Illuminate/Database/Schema/Grammars/MySqlGrammar.php", + "created_at": "2024-12-27T19:54:11+00:00", + "commented_code": "*/\n protected function compileKey(Blueprint $blueprint, Fluent $command, $type)\n {\n return sprintf('alter table %s add %s %s%s(%s)',\n $columns = collect($command->columns)->map(function (string $column) {\n return self::isFunctionalExpression($column) ? $column : $this->wrap($column);\n })->implode(', ');", + "repo_full_name": "laravel/framework", + "discussion_comments": [ + { + "comment_id": "1898691810", + "repo_full_name": "laravel/framework", + "pr_number": 54025, + "pr_file": "src/Illuminate/Database/Schema/Grammars/MySqlGrammar.php", + "discussion_id": "1898691810", + "commented_code": "@@ -492,15 +492,23 @@ public function compileSpatialIndex(Blueprint $blueprint, Fluent $command)\n */\n protected function compileKey(Blueprint $blueprint, Fluent $command, $type)\n {\n- return sprintf('alter table %s add %s %s%s(%s)',\n+ $columns = collect($command->columns)->map(function (string $column) {\n+ return self::isFunctionalExpression($column) ? $column : $this->wrap($column);\n+ })->implode(', ');", + "comment_created_at": "2024-12-27T19:54:11+00:00", + "comment_author": "shaedrich", + "comment_body": "https://github.com/laravel/framework/pull/54025#discussion_r1898675719 got outdated—not sure if you still want to change that:\r\n```suggestion\r\n $columns = collect($command->columns)\r\n ->map(fn (string $column) => self::isFunctionalExpression($column) ? $column : $this->wrap($column))\r\n ->implode(', ');\r\n```", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1682404703", + "pr_number": 52177, + "pr_file": "tests/Integration/Database/DatabaseSchemaBlueprintTest.php", + "created_at": "2024-07-18T08:11:43+00:00", + "commented_code": "$this->assertEquals($expected, $queries);\n }\n\n public function testAddColumnNamedCreateWorks()\n {\n DB::connection()->getSchemaBuilder()->create('users', function ($table) {\n $table->string('name')->nullable();\n });\n\n $blueprintMySql = new Blueprint('users', function ($table) {\n $table->string('create')->nullable(false);\n });\n\n $queries = $blueprintMySql->toSql(DB::connection(), new MySqlGrammar);\n\n $expected = [\n 'alter table `users` add `create` varchar(255) not null',\n ];\n\n $this->assertEquals($expected, $queries);\n\n $blueprintPostgres = new Blueprint('users', function ($table) {\n $table->string('create')->nullable(false);\n });\n\n $queries = $blueprintPostgres->toSql(DB::connection(), new PostgresGrammar);\n\n $expected = [\n 'alter table \"users\" add column \"create\" varchar(255) not null',\n ];\n\n $this->assertEquals($expected, $queries);\n\n $blueprintSQLite = new Blueprint('users', function ($table) {\n $table->string('create')->nullable(false);\n });\n\n $queries = $blueprintSQLite->toSql(DB::connection(), new SQLiteGrammar);\n\n $expected = [\n 'alter table \"users\" add column \"create\" varchar not null',\n ];\n\n $this->assertEquals($expected, $queries);\n\n $blueprintSqlServer = new Blueprint('users', function ($table) {\n $table->string('create')->nullable(false);\n });\n\n $queries = $blueprintSqlServer->toSql(DB::connection(), new SqlServerGrammar);\n\n $expected = [\n 'alter table \"users\" add \"create\" nvarchar(255) not null',\n ];\n\n $this->assertEquals($expected, $queries);", + "repo_full_name": "laravel/framework", + "discussion_comments": [ + { + "comment_id": "1682404703", + "repo_full_name": "laravel/framework", + "pr_number": 52177, + "pr_file": "tests/Integration/Database/DatabaseSchemaBlueprintTest.php", + "discussion_id": "1682404703", + "commented_code": "@@ -507,6 +507,61 @@ public function testAddUniqueIndexWithNameWorks()\n $this->assertEquals($expected, $queries);\n }\n \n+ public function testAddColumnNamedCreateWorks()\n+ {\n+ DB::connection()->getSchemaBuilder()->create('users', function ($table) {\n+ $table->string('name')->nullable();\n+ });\n+\n+ $blueprintMySql = new Blueprint('users', function ($table) {\n+ $table->string('create')->nullable(false);\n+ });\n+\n+ $queries = $blueprintMySql->toSql(DB::connection(), new MySqlGrammar);\n+\n+ $expected = [\n+ 'alter table `users` add `create` varchar(255) not null',\n+ ];\n+\n+ $this->assertEquals($expected, $queries);\n+\n+ $blueprintPostgres = new Blueprint('users', function ($table) {\n+ $table->string('create')->nullable(false);\n+ });\n+\n+ $queries = $blueprintPostgres->toSql(DB::connection(), new PostgresGrammar);\n+\n+ $expected = [\n+ 'alter table \"users\" add column \"create\" varchar(255) not null',\n+ ];\n+\n+ $this->assertEquals($expected, $queries);\n+\n+ $blueprintSQLite = new Blueprint('users', function ($table) {\n+ $table->string('create')->nullable(false);\n+ });\n+\n+ $queries = $blueprintSQLite->toSql(DB::connection(), new SQLiteGrammar);\n+\n+ $expected = [\n+ 'alter table \"users\" add column \"create\" varchar not null',\n+ ];\n+\n+ $this->assertEquals($expected, $queries);\n+\n+ $blueprintSqlServer = new Blueprint('users', function ($table) {\n+ $table->string('create')->nullable(false);\n+ });\n+\n+ $queries = $blueprintSqlServer->toSql(DB::connection(), new SqlServerGrammar);\n+\n+ $expected = [\n+ 'alter table \"users\" add \"create\" nvarchar(255) not null',\n+ ];\n+\n+ $this->assertEquals($expected, $queries);", + "comment_created_at": "2024-07-18T08:11:43+00:00", + "comment_author": "hafezdivandari", + "comment_body": "```suggestion\r\n Schema::create('users', function (Blueprint $table) {\r\n $table->string('name');\r\n });\r\n\r\n Schema::table('users', function (Blueprint $table) {\r\n $table->string('create');\r\n });\r\n\r\n $this->assertTrue(Schema::hasColumn('users', 'create'));\r\n```", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/framework-escape-column-names-properly.md b/_reviewers/framework-escape-column-names-properly.md index 3c0eadc..e13d183 100644 --- a/_reviewers/framework-escape-column-names-properly.md +++ b/_reviewers/framework-escape-column-names-properly.md @@ -39,129 +39,3 @@ $query = $this->builder->select($this->wrap($column))->from('users'); 3. **Cross-database compatibility**: Different database systems use different identifier quoting (MySQL uses backticks, PostgreSQL uses double quotes). Always use the database grammar's `wrap()` method instead of hardcoding quote characters. This practice prevents SQL errors when working with reserved words as column names, functional expressions in indexes, and ensures consistent behavior across different database systems. - - -[ - { - "discussion_id": "1724014845", - "pr_number": 52535, - "pr_file": "src/Illuminate/Database/Query/Grammars/MySqlGrammar.php", - "created_at": "2024-08-20T21:50:45+00:00", - "commented_code": "return 'json_extract('.$field.$path.')';\n }\n\n /**\n * Apply custom ordering to a query based on a priority array.\n *\n * @param $column\n * @param array $priority\n * @param $direction\n * @return string\n */\n public function orderByPriority($column, array $priority, $direction = 'asc')\n {\n $placeholders = implode(',', array_fill(0, count($priority), '?'));\n\n return \"FIELD($column, $placeholders) $direction\";", - "repo_full_name": "laravel/framework", - "discussion_comments": [ - { - "comment_id": "1724014845", - "repo_full_name": "laravel/framework", - "pr_number": 52535, - "pr_file": "src/Illuminate/Database/Query/Grammars/MySqlGrammar.php", - "discussion_id": "1724014845", - "commented_code": "@@ -524,4 +524,19 @@ protected function wrapJsonBooleanSelector($value)\n \n return 'json_extract('.$field.$path.')';\n }\n+\n+ /**\n+ * Apply custom ordering to a query based on a priority array.\n+ *\n+ * @param $column\n+ * @param array $priority\n+ * @param $direction\n+ * @return string\n+ */\n+ public function orderByPriority($column, array $priority, $direction = 'asc')\n+ {\n+ $placeholders = implode(',', array_fill(0, count($priority), '?'));\n+\n+ return \"FIELD($column, $placeholders) $direction\";", - "comment_created_at": "2024-08-20T21:50:45+00:00", - "comment_author": "staudenmeir", - "comment_body": "The column name needs to be wrapped with `$this->wrap($column)`. This also affects the other grammars.", - "pr_file_module": null - }, - { - "comment_id": "1724361191", - "repo_full_name": "laravel/framework", - "pr_number": 52535, - "pr_file": "src/Illuminate/Database/Query/Grammars/MySqlGrammar.php", - "discussion_id": "1724014845", - "commented_code": "@@ -524,4 +524,19 @@ protected function wrapJsonBooleanSelector($value)\n \n return 'json_extract('.$field.$path.')';\n }\n+\n+ /**\n+ * Apply custom ordering to a query based on a priority array.\n+ *\n+ * @param $column\n+ * @param array $priority\n+ * @param $direction\n+ * @return string\n+ */\n+ public function orderByPriority($column, array $priority, $direction = 'asc')\n+ {\n+ $placeholders = implode(',', array_fill(0, count($priority), '?'));\n+\n+ return \"FIELD($column, $placeholders) $direction\";", - "comment_created_at": "2024-08-21T04:21:26+00:00", - "comment_author": "Mushood", - "comment_body": "I have applied the wrap function on all grammars and updated the tests accordingly.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1898688984", - "pr_number": 54025, - "pr_file": "src/Illuminate/Database/Schema/Grammars/MySqlGrammar.php", - "created_at": "2024-12-27T19:45:49+00:00", - "commented_code": "*/\n protected function compileKey(Blueprint $blueprint, Fluent $command, $type)\n {\n return sprintf('alter table %s add %s %s%s(%s)',\n $columns = collect($command->columns)->map(function ($column) {\n // Check if the column contains a functional expression\n // and return as-is, otherwise wrap\n return preg_match('/\\(.+\\)/', $column) ? $column : $this->wrap($column);\n })->implode(', ');\n\n return sprintf(\n 'alter table %s add %s %s%s(%s)',\n $this->wrapTable($blueprint),\n $type,\n $this->wrap($command->index),\n $command->algorithm ? ' using '.$command->algorithm : '',\n $this->columnize($command->columns)\n $command->algorithm ? ' using ' . $command->algorithm : '',\n $columns\n );\n }", - "repo_full_name": "laravel/framework", - "discussion_comments": [ - { - "comment_id": "1898688984", - "repo_full_name": "laravel/framework", - "pr_number": 54025, - "pr_file": "src/Illuminate/Database/Schema/Grammars/MySqlGrammar.php", - "discussion_id": "1898688984", - "commented_code": "@@ -492,12 +492,19 @@ public function compileSpatialIndex(Blueprint $blueprint, Fluent $command)\n */\n protected function compileKey(Blueprint $blueprint, Fluent $command, $type)\n {\n- return sprintf('alter table %s add %s %s%s(%s)',\n+ $columns = collect($command->columns)->map(function ($column) {\n+ // Check if the column contains a functional expression\n+ // and return as-is, otherwise wrap\n+ return preg_match('/\\(.+\\)/', $column) ? $column : $this->wrap($column);\n+ })->implode(', ');\n+\n+ return sprintf(\n+ 'alter table %s add %s %s%s(%s)',\n $this->wrapTable($blueprint),\n $type,\n $this->wrap($command->index),\n- $command->algorithm ? ' using '.$command->algorithm : '',\n- $this->columnize($command->columns)\n+ $command->algorithm ? ' using ' . $command->algorithm : '',\n+ $columns\n );\n }\n ", - "comment_created_at": "2024-12-27T19:45:49+00:00", - "comment_author": "shaedrich", - "comment_body": "One possible solution for https://github.com/laravel/framework/pull/54025#discussion_r1898460480\r\n```suggestion\r\n\r\n protected static function isFunctionalExpression(string $column): bool\r\n {\r\n return preg_match('/\\(.+\\)/', $column);\r\n }\r\n```", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1898689872", - "pr_number": 54025, - "pr_file": "src/Illuminate/Database/Schema/Grammars/MySqlGrammar.php", - "created_at": "2024-12-27T19:48:17+00:00", - "commented_code": "*/\n protected function compileKey(Blueprint $blueprint, Fluent $command, $type)\n {\n return sprintf('alter table %s add %s %s%s(%s)',\n $columns = collect($command->columns)->map(function ($column) {\n // Check if the column contains a functional expression\n // and return as-is, otherwise wrap\n return preg_match('/\\(.+\\)/', $column) ? $column : $this->wrap($column);", - "repo_full_name": "laravel/framework", - "discussion_comments": [ - { - "comment_id": "1898689872", - "repo_full_name": "laravel/framework", - "pr_number": 54025, - "pr_file": "src/Illuminate/Database/Schema/Grammars/MySqlGrammar.php", - "discussion_id": "1898689872", - "commented_code": "@@ -492,12 +492,19 @@ public function compileSpatialIndex(Blueprint $blueprint, Fluent $command)\n */\n protected function compileKey(Blueprint $blueprint, Fluent $command, $type)\n {\n- return sprintf('alter table %s add %s %s%s(%s)',\n+ $columns = collect($command->columns)->map(function ($column) {\n+ // Check if the column contains a functional expression\n+ // and return as-is, otherwise wrap\n+ return preg_match('/\\(.+\\)/', $column) ? $column : $this->wrap($column);", - "comment_created_at": "2024-12-27T19:48:17+00:00", - "comment_author": "shaedrich", - "comment_body": "https://github.com/laravel/framework/pull/54025#discussion_r1898688984 can be applied here, then:\r\n```suggestion\r\n return self::isFunctionalExpression($column) ? $column : $this->wrap($column);\r\n```", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1898691810", - "pr_number": 54025, - "pr_file": "src/Illuminate/Database/Schema/Grammars/MySqlGrammar.php", - "created_at": "2024-12-27T19:54:11+00:00", - "commented_code": "*/\n protected function compileKey(Blueprint $blueprint, Fluent $command, $type)\n {\n return sprintf('alter table %s add %s %s%s(%s)',\n $columns = collect($command->columns)->map(function (string $column) {\n return self::isFunctionalExpression($column) ? $column : $this->wrap($column);\n })->implode(', ');", - "repo_full_name": "laravel/framework", - "discussion_comments": [ - { - "comment_id": "1898691810", - "repo_full_name": "laravel/framework", - "pr_number": 54025, - "pr_file": "src/Illuminate/Database/Schema/Grammars/MySqlGrammar.php", - "discussion_id": "1898691810", - "commented_code": "@@ -492,15 +492,23 @@ public function compileSpatialIndex(Blueprint $blueprint, Fluent $command)\n */\n protected function compileKey(Blueprint $blueprint, Fluent $command, $type)\n {\n- return sprintf('alter table %s add %s %s%s(%s)',\n+ $columns = collect($command->columns)->map(function (string $column) {\n+ return self::isFunctionalExpression($column) ? $column : $this->wrap($column);\n+ })->implode(', ');", - "comment_created_at": "2024-12-27T19:54:11+00:00", - "comment_author": "shaedrich", - "comment_body": "https://github.com/laravel/framework/pull/54025#discussion_r1898675719 got outdated\u2014not sure if you still want to change that:\r\n```suggestion\r\n $columns = collect($command->columns)\r\n ->map(fn (string $column) => self::isFunctionalExpression($column) ? $column : $this->wrap($column))\r\n ->implode(', ');\r\n```", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1682404703", - "pr_number": 52177, - "pr_file": "tests/Integration/Database/DatabaseSchemaBlueprintTest.php", - "created_at": "2024-07-18T08:11:43+00:00", - "commented_code": "$this->assertEquals($expected, $queries);\n }\n\n public function testAddColumnNamedCreateWorks()\n {\n DB::connection()->getSchemaBuilder()->create('users', function ($table) {\n $table->string('name')->nullable();\n });\n\n $blueprintMySql = new Blueprint('users', function ($table) {\n $table->string('create')->nullable(false);\n });\n\n $queries = $blueprintMySql->toSql(DB::connection(), new MySqlGrammar);\n\n $expected = [\n 'alter table `users` add `create` varchar(255) not null',\n ];\n\n $this->assertEquals($expected, $queries);\n\n $blueprintPostgres = new Blueprint('users', function ($table) {\n $table->string('create')->nullable(false);\n });\n\n $queries = $blueprintPostgres->toSql(DB::connection(), new PostgresGrammar);\n\n $expected = [\n 'alter table \"users\" add column \"create\" varchar(255) not null',\n ];\n\n $this->assertEquals($expected, $queries);\n\n $blueprintSQLite = new Blueprint('users', function ($table) {\n $table->string('create')->nullable(false);\n });\n\n $queries = $blueprintSQLite->toSql(DB::connection(), new SQLiteGrammar);\n\n $expected = [\n 'alter table \"users\" add column \"create\" varchar not null',\n ];\n\n $this->assertEquals($expected, $queries);\n\n $blueprintSqlServer = new Blueprint('users', function ($table) {\n $table->string('create')->nullable(false);\n });\n\n $queries = $blueprintSqlServer->toSql(DB::connection(), new SqlServerGrammar);\n\n $expected = [\n 'alter table \"users\" add \"create\" nvarchar(255) not null',\n ];\n\n $this->assertEquals($expected, $queries);", - "repo_full_name": "laravel/framework", - "discussion_comments": [ - { - "comment_id": "1682404703", - "repo_full_name": "laravel/framework", - "pr_number": 52177, - "pr_file": "tests/Integration/Database/DatabaseSchemaBlueprintTest.php", - "discussion_id": "1682404703", - "commented_code": "@@ -507,6 +507,61 @@ public function testAddUniqueIndexWithNameWorks()\n $this->assertEquals($expected, $queries);\n }\n \n+ public function testAddColumnNamedCreateWorks()\n+ {\n+ DB::connection()->getSchemaBuilder()->create('users', function ($table) {\n+ $table->string('name')->nullable();\n+ });\n+\n+ $blueprintMySql = new Blueprint('users', function ($table) {\n+ $table->string('create')->nullable(false);\n+ });\n+\n+ $queries = $blueprintMySql->toSql(DB::connection(), new MySqlGrammar);\n+\n+ $expected = [\n+ 'alter table `users` add `create` varchar(255) not null',\n+ ];\n+\n+ $this->assertEquals($expected, $queries);\n+\n+ $blueprintPostgres = new Blueprint('users', function ($table) {\n+ $table->string('create')->nullable(false);\n+ });\n+\n+ $queries = $blueprintPostgres->toSql(DB::connection(), new PostgresGrammar);\n+\n+ $expected = [\n+ 'alter table \"users\" add column \"create\" varchar(255) not null',\n+ ];\n+\n+ $this->assertEquals($expected, $queries);\n+\n+ $blueprintSQLite = new Blueprint('users', function ($table) {\n+ $table->string('create')->nullable(false);\n+ });\n+\n+ $queries = $blueprintSQLite->toSql(DB::connection(), new SQLiteGrammar);\n+\n+ $expected = [\n+ 'alter table \"users\" add column \"create\" varchar not null',\n+ ];\n+\n+ $this->assertEquals($expected, $queries);\n+\n+ $blueprintSqlServer = new Blueprint('users', function ($table) {\n+ $table->string('create')->nullable(false);\n+ });\n+\n+ $queries = $blueprintSqlServer->toSql(DB::connection(), new SqlServerGrammar);\n+\n+ $expected = [\n+ 'alter table \"users\" add \"create\" nvarchar(255) not null',\n+ ];\n+\n+ $this->assertEquals($expected, $queries);", - "comment_created_at": "2024-07-18T08:11:43+00:00", - "comment_author": "hafezdivandari", - "comment_body": "```suggestion\r\n Schema::create('users', function (Blueprint $table) {\r\n $table->string('name');\r\n });\r\n\r\n Schema::table('users', function (Blueprint $table) {\r\n $table->string('create');\r\n });\r\n\r\n $this->assertTrue(Schema::hasColumn('users', 'create'));\r\n```", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/framework-explicit-null-handling.json b/_reviewers/framework-explicit-null-handling.json new file mode 100644 index 0000000..5ab2cb9 --- /dev/null +++ b/_reviewers/framework-explicit-null-handling.json @@ -0,0 +1,356 @@ +[ + { + "discussion_id": "2072707142", + "pr_number": 55645, + "pr_file": "src/Illuminate/Container/Container.php", + "created_at": "2025-05-04T20:43:00+00:00", + "commented_code": "array_pop($this->buildStack);\n\n if (!empty($reflector->getAttributes(Lazy::class))) {", + "repo_full_name": "laravel/framework", + "discussion_comments": [ + { + "comment_id": "2072707142", + "repo_full_name": "laravel/framework", + "pr_number": 55645, + "pr_file": "src/Illuminate/Container/Container.php", + "discussion_id": "2072707142", + "commented_code": "@@ -1058,8 +1059,16 @@ public function build($concrete)\n \n array_pop($this->buildStack);\n \n+ if (!empty($reflector->getAttributes(Lazy::class))) {", + "comment_created_at": "2025-05-04T20:43:00+00:00", + "comment_author": "shaedrich", + "comment_body": "You could just directly check this\r\n```suggestion\r\n if ($reflector->getAttributes(Lazy::class) !== []) {\r\n```", + "pr_file_module": null + }, + { + "comment_id": "2073825274", + "repo_full_name": "laravel/framework", + "pr_number": 55645, + "pr_file": "src/Illuminate/Container/Container.php", + "discussion_id": "2072707142", + "commented_code": "@@ -1058,8 +1059,16 @@ public function build($concrete)\n \n array_pop($this->buildStack);\n \n+ if (!empty($reflector->getAttributes(Lazy::class))) {", + "comment_created_at": "2025-05-05T17:04:20+00:00", + "comment_author": "olekjs", + "comment_body": "Does this change bring anything beyond syntax? I’m just asking out of curiosity.", + "pr_file_module": null + }, + { + "comment_id": "2073872455", + "repo_full_name": "laravel/framework", + "pr_number": 55645, + "pr_file": "src/Illuminate/Container/Container.php", + "discussion_id": "2072707142", + "commented_code": "@@ -1058,8 +1059,16 @@ public function build($concrete)\n \n array_pop($this->buildStack);\n \n+ if (!empty($reflector->getAttributes(Lazy::class))) {", + "comment_created_at": "2025-05-05T17:32:05+00:00", + "comment_author": "shaedrich", + "comment_body": "Not much. Technically, a function call is more expensive than a comparison, but this should mostly be optimized away in practice. One _slight_ advantage would be a clear communication that we are working with an array here and don't have to check _any_ structure's emptiness.", + "pr_file_module": null + }, + { + "comment_id": "2078948392", + "repo_full_name": "laravel/framework", + "pr_number": 55645, + "pr_file": "src/Illuminate/Container/Container.php", + "discussion_id": "2072707142", + "commented_code": "@@ -1058,8 +1059,16 @@ public function build($concrete)\n \n array_pop($this->buildStack);\n \n+ if (!empty($reflector->getAttributes(Lazy::class))) {", + "comment_created_at": "2025-05-08T06:03:12+00:00", + "comment_author": "macropay-solutions", + "comment_body": "@olekjs empty function is a trap for future bugs.\r\nWe have a rule in place to never use empty function because of that. \r\n\r\nempty('0') is true for example.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2111059728", + "pr_number": 55877, + "pr_file": "src/Illuminate/Foundation/Console/ConfigPublishCommand.php", + "created_at": "2025-05-28T06:45:12+00:00", + "commented_code": "return;\n }\n\n $name = (string) (is_null($this->argument('name')) ? select(\n label: 'Which configuration file would you like to publish?',\n options: (new Collection($config))->map(function (string $path) {\n return basename($path, '.php');\n }),\n ) : $this->argument('name'));\n $choices = array_map(\n fn (string $path) => basename($path, '.php'),\n $config,\n );\n\n $choice = windows_os()\n ? select(\n 'Which configuration file would you like to publish?',\n $choices,\n scroll: 15,\n )\n : search(\n label: 'Which configuration file would you like to publish?',\n placeholder: 'Search...',\n options: fn ($search) => array_values(array_filter(\n $choices,\n fn ($choice) => str_contains(strtolower($choice), strtolower($search))\n )),\n scroll: 15,\n );\n\n $name = (string) (is_null($this->argument('name'))\n ? $choice\n : $this->argument('name'));", + "repo_full_name": "laravel/framework", + "discussion_comments": [ + { + "comment_id": "2111059728", + "repo_full_name": "laravel/framework", + "pr_number": 55877, + "pr_file": "src/Illuminate/Foundation/Console/ConfigPublishCommand.php", + "discussion_id": "2111059728", + "commented_code": "@@ -46,12 +47,30 @@ public function handle()\n return;\n }\n \n- $name = (string) (is_null($this->argument('name')) ? select(\n- label: 'Which configuration file would you like to publish?',\n- options: (new Collection($config))->map(function (string $path) {\n- return basename($path, '.php');\n- }),\n- ) : $this->argument('name'));\n+ $choices = array_map(\n+ fn (string $path) => basename($path, '.php'),\n+ $config,\n+ );\n+\n+ $choice = windows_os()\n+ ? select(\n+ 'Which configuration file would you like to publish?',\n+ $choices,\n+ scroll: 15,\n+ )\n+ : search(\n+ label: 'Which configuration file would you like to publish?',\n+ placeholder: 'Search...',\n+ options: fn ($search) => array_values(array_filter(\n+ $choices,\n+ fn ($choice) => str_contains(strtolower($choice), strtolower($search))\n+ )),\n+ scroll: 15,\n+ );\n+\n+ $name = (string) (is_null($this->argument('name'))\n+ ? $choice\n+ : $this->argument('name'));", + "comment_created_at": "2025-05-28T06:45:12+00:00", + "comment_author": "shaedrich", + "comment_body": "Wouldn't this suffice?\r\n```suggestion\r\n $name = (string) ($this->argument('name') ?? $choice);\r\n```", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2102660306", + "pr_number": 55810, + "pr_file": "src/Illuminate/Filesystem/FilesystemAdapter.php", + "created_at": "2025-05-22T14:05:37+00:00", + "commented_code": "*/\n protected function concatPathToUrl($url, $path)\n {\n if (empty($url) || empty($path)) {", + "repo_full_name": "laravel/framework", + "discussion_comments": [ + { + "comment_id": "2102660306", + "repo_full_name": "laravel/framework", + "pr_number": 55810, + "pr_file": "src/Illuminate/Filesystem/FilesystemAdapter.php", + "discussion_id": "2102660306", + "commented_code": "@@ -847,6 +847,10 @@ public function temporaryUploadUrl($path, $expiration, array $options = [])\n */\n protected function concatPathToUrl($url, $path)\n {\n+ if (empty($url) || empty($path)) {", + "comment_created_at": "2025-05-22T14:05:37+00:00", + "comment_author": "GrahamCampbell", + "comment_body": "Empty is the wrong function here. It does not check if the variable us the empty string, it checks if it looks like an enmpty-ish value, inc if it's 0. One should compare against `''` instead.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2090368477", + "pr_number": 55740, + "pr_file": "src/Illuminate/Foundation/Configuration/ApplicationBuilder.php", + "created_at": "2025-05-15T06:23:18+00:00", + "commented_code": "string $apiPrefix = 'api',\n ?callable $then = null)\n {\n if (is_null($using) && (is_string($web) || is_array($web) || is_string($api) || is_array($api) || is_string($pages) || is_string($health)) || is_callable($then)) {\n $using = $this->buildRoutingCallback($web, $api, $pages, $health, $apiPrefix, $then);\n if (is_null($using) && (is_string($web) || is_array($web) || is_string($api) || is_array($api) || is_string($pages)) || is_callable($then)) {\n $using = $this->buildRoutingCallback($web, $api, $pages, $apiPrefix, $then);\n }\n\n if (is_string($health)) {\n PreventRequestsDuringMaintenance::except($health);\n }\n if (is_string($health)) {\n $using = function () use ($health, $using) {\n $this->buildHealthCheckRoute($health);\n call_user_func($using, $this->app);", + "repo_full_name": "laravel/framework", + "discussion_comments": [ + { + "comment_id": "2090368477", + "repo_full_name": "laravel/framework", + "pr_number": 55740, + "pr_file": "src/Illuminate/Foundation/Configuration/ApplicationBuilder.php", + "discussion_id": "2090368477", + "commented_code": "@@ -159,12 +159,17 @@ public function withRouting(?Closure $using = null,\n string $apiPrefix = 'api',\n ?callable $then = null)\n {\n- if (is_null($using) && (is_string($web) || is_array($web) || is_string($api) || is_array($api) || is_string($pages) || is_string($health)) || is_callable($then)) {\n- $using = $this->buildRoutingCallback($web, $api, $pages, $health, $apiPrefix, $then);\n+ if (is_null($using) && (is_string($web) || is_array($web) || is_string($api) || is_array($api) || is_string($pages)) || is_callable($then)) {\n+ $using = $this->buildRoutingCallback($web, $api, $pages, $apiPrefix, $then);\n+ }\n \n- if (is_string($health)) {\n- PreventRequestsDuringMaintenance::except($health);\n- }\n+ if (is_string($health)) {\n+ $using = function () use ($health, $using) {\n+ $this->buildHealthCheckRoute($health);\n+ call_user_func($using, $this->app);", + "comment_created_at": "2025-05-15T06:23:18+00:00", + "comment_author": "rodrigopedra", + "comment_body": "This line should be:\r\n\r\n```php\r\n$this->app->call($using);\r\n```\r\n\r\nTo preserve the current behavior, as in:\r\n\r\nhttps://github.com/laravel/framework/blob/df12a087731b37708cfbba72172a4c381363fed2/src/Illuminate/Foundation/Support/Providers/RouteServiceProvider.php#L162", + "pr_file_module": null + }, + { + "comment_id": "2090370825", + "repo_full_name": "laravel/framework", + "pr_number": 55740, + "pr_file": "src/Illuminate/Foundation/Configuration/ApplicationBuilder.php", + "discussion_id": "2090368477", + "commented_code": "@@ -159,12 +159,17 @@ public function withRouting(?Closure $using = null,\n string $apiPrefix = 'api',\n ?callable $then = null)\n {\n- if (is_null($using) && (is_string($web) || is_array($web) || is_string($api) || is_array($api) || is_string($pages) || is_string($health)) || is_callable($then)) {\n- $using = $this->buildRoutingCallback($web, $api, $pages, $health, $apiPrefix, $then);\n+ if (is_null($using) && (is_string($web) || is_array($web) || is_string($api) || is_array($api) || is_string($pages)) || is_callable($then)) {\n+ $using = $this->buildRoutingCallback($web, $api, $pages, $apiPrefix, $then);\n+ }\n \n- if (is_string($health)) {\n- PreventRequestsDuringMaintenance::except($health);\n- }\n+ if (is_string($health)) {\n+ $using = function () use ($health, $using) {\n+ $this->buildHealthCheckRoute($health);\n+ call_user_func($using, $this->app);", + "comment_created_at": "2025-05-15T06:25:02+00:00", + "comment_author": "rodrigopedra", + "comment_body": "Also, `$using` in this point, can be `null`. A check for that should be added.\r\n\r\nAn app can be built without specifying no routes, by just providing the `$commands` and/or the `$channels` parameters.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1953114524", + "pr_number": 54582, + "pr_file": "src/Illuminate/View/Compilers/BladeCompiler.php", + "created_at": "2025-02-12T17:32:47+00:00", + "commented_code": "if (is_null($alias)) {", + "repo_full_name": "laravel/framework", + "discussion_comments": [ + { + "comment_id": "1953114524", + "repo_full_name": "laravel/framework", + "pr_number": 54582, + "pr_file": "src/Illuminate/View/Compilers/BladeCompiler.php", + "discussion_id": "1953114524", + "commented_code": "@@ -762,14 +768,14 @@ public function component($class, $alias = null, $prefix = '')\n \n if (is_null($alias)) {", + "comment_created_at": "2025-02-12T17:32:47+00:00", + "comment_author": "shaedrich", + "comment_body": "You can avoid the function call like this:\r\n```suggestion\r\n if ($alias === null) {\r\n```", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1923809576", + "pr_number": 54285, + "pr_file": "src/Illuminate/Support/Str.php", + "created_at": "2025-01-21T14:16:17+00:00", + "commented_code": "static::$camelCache = [];\n static::$studlyCache = [];\n }\n\n /**\n * Sanitize the given string.\n *\n *\n * See: https://symfony.com/doc/current/html_sanitizer.html\n *\n * @param string $string The input string to sanitize.\n * @param HtmlSanitizerConfig|null $config Custom configuration to use for sanitizing.\n * @return string The sanitized string.\n */\n public static function sanitize($string, ?HtmlSanitizerConfig $config = null)\n {\n if (is_null($string)) {", + "repo_full_name": "laravel/framework", + "discussion_comments": [ + { + "comment_id": "1923809576", + "repo_full_name": "laravel/framework", + "pr_number": 54285, + "pr_file": "src/Illuminate/Support/Str.php", + "discussion_id": "1923809576", + "commented_code": "@@ -2014,4 +2016,28 @@ public static function flushCache()\n static::$camelCache = [];\n static::$studlyCache = [];\n }\n+\n+ /**\n+ * Sanitize the given string.\n+ *\n+ *\n+ * See: https://symfony.com/doc/current/html_sanitizer.html\n+ *\n+ * @param string $string The input string to sanitize.\n+ * @param HtmlSanitizerConfig|null $config Custom configuration to use for sanitizing.\n+ * @return string The sanitized string.\n+ */\n+ public static function sanitize($string, ?HtmlSanitizerConfig $config = null)\n+ {\n+ if (is_null($string)) {", + "comment_created_at": "2025-01-21T14:16:17+00:00", + "comment_author": "shaedrich", + "comment_body": "According to your `is_null()` call, your string can be `null`. This should be reflected in the PHPDoc comment type as well:\r\n```suggestion\r\n * @param string|null $string The input string to sanitize.\r\n * @param HtmlSanitizerConfig|null $config Custom configuration to use for sanitizing.\r\n * @return string The sanitized string.\r\n */\r\n public static function sanitize($string, ?HtmlSanitizerConfig $config = null)\r\n {\r\n if ($string === null) {\r\n```", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1923810842", + "pr_number": 54285, + "pr_file": "src/Illuminate/Support/Str.php", + "created_at": "2025-01-21T14:17:05+00:00", + "commented_code": "static::$camelCache = [];\n static::$studlyCache = [];\n }\n\n /**\n * Sanitize the given string.\n *\n *\n * See: https://symfony.com/doc/current/html_sanitizer.html\n *\n * @param string $string The input string to sanitize.\n * @param HtmlSanitizerConfig|null $config Custom configuration to use for sanitizing.\n * @return string The sanitized string.\n */\n public static function sanitize($string, ?HtmlSanitizerConfig $config = null)\n {\n if (is_null($string)) {\n return null;", + "repo_full_name": "laravel/framework", + "discussion_comments": [ + { + "comment_id": "1923810842", + "repo_full_name": "laravel/framework", + "pr_number": 54285, + "pr_file": "src/Illuminate/Support/Str.php", + "discussion_id": "1923810842", + "commented_code": "@@ -2014,4 +2016,28 @@ public static function flushCache()\n static::$camelCache = [];\n static::$studlyCache = [];\n }\n+\n+ /**\n+ * Sanitize the given string.\n+ *\n+ *\n+ * See: https://symfony.com/doc/current/html_sanitizer.html\n+ *\n+ * @param string $string The input string to sanitize.\n+ * @param HtmlSanitizerConfig|null $config Custom configuration to use for sanitizing.\n+ * @return string The sanitized string.\n+ */\n+ public static function sanitize($string, ?HtmlSanitizerConfig $config = null)\n+ {\n+ if (is_null($string)) {\n+ return null;", + "comment_created_at": "2025-01-21T14:17:05+00:00", + "comment_author": "shaedrich", + "comment_body": "According to your `is_null()` call, the method can return `null`. This should be reflected in the PHPDoc comment type as well:\r\n```suggestion\r\n * @return string|null The sanitized string.\r\n */\r\n public static function sanitize($string, ?HtmlSanitizerConfig $config = null)\r\n {\r\n if (is_null($string)) {\r\n return null;\r\n```", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1884704097", + "pr_number": 53872, + "pr_file": "src/Illuminate/Http/Response.php", + "created_at": "2024-12-14T01:28:04+00:00", + "commented_code": "return $this;\n }\n\n /**\n * Gets the current response content.\n */\n #[\\Override]\n public function getContent(): string|false\n {\n return transform(parent::getContent(), fn ($content) => $content, '');", + "repo_full_name": "laravel/framework", + "discussion_comments": [ + { + "comment_id": "1884704097", + "repo_full_name": "laravel/framework", + "pr_number": 53872, + "pr_file": "src/Illuminate/Http/Response.php", + "discussion_id": "1884704097", + "commented_code": "@@ -75,6 +75,15 @@ public function setContent(mixed $content): static\n return $this;\n }\n \n+ /**\n+ * Gets the current response content.\n+ */\n+ #[\\Override]\n+ public function getContent(): string|false\n+ {\n+ return transform(parent::getContent(), fn ($content) => $content, '');", + "comment_created_at": "2024-12-14T01:28:04+00:00", + "comment_author": "rodrigopedra", + "comment_body": "Why not just?\r\n\r\n```php\r\nreturn parent::getContent() ?? '';\r\n```\r\n", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1689591772", + "pr_number": 51956, + "pr_file": "src/Illuminate/Database/Eloquent/Concerns/HasAttributes.php", + "created_at": "2024-07-24T11:15:45+00:00", + "commented_code": "// If the relation value has been set, we will set it on this attributes\n // list for returning. If it was not arrayable or null, we'll not set\n // the value on the array because it is some type of invalid value.\n if (isset($relation) || is_null($value)) {\n if (isset($relation)) {", + "repo_full_name": "laravel/framework", + "discussion_comments": [ + { + "comment_id": "1689591772", + "repo_full_name": "laravel/framework", + "pr_number": 51956, + "pr_file": "src/Illuminate/Database/Eloquent/Concerns/HasAttributes.php", + "discussion_id": "1689591772", + "commented_code": "@@ -394,7 +394,7 @@ public function relationsToArray()\n // If the relation value has been set, we will set it on this attributes\n // list for returning. If it was not arrayable or null, we'll not set\n // the value on the array because it is some type of invalid value.\n- if (isset($relation) || is_null($value)) {\n+ if (isset($relation)) {", + "comment_created_at": "2024-07-24T11:15:45+00:00", + "comment_author": "pxpm", + "comment_body": "Doesn't this change the behavior? `null` values should enter the condition. Removing the `|| is_null()` they will not enter anymore as the `isset()` without the `is_null()` check would exclude them. \r\n\r\n```php\r\n$relation, $value = null;\r\n\r\nif (isset($relation)) {\r\n// we don't reach this code\r\n}\r\n\r\nif (isset($relation) || is_null($value)) {\r\n// this code is executed\r\n}\r\n```\r\nIf this is some kind of bug fixing it shouldn't be part of this PR IMO. ", + "pr_file_module": null + }, + { + "comment_id": "1689784138", + "repo_full_name": "laravel/framework", + "pr_number": 51956, + "pr_file": "src/Illuminate/Database/Eloquent/Concerns/HasAttributes.php", + "discussion_id": "1689591772", + "commented_code": "@@ -394,7 +394,7 @@ public function relationsToArray()\n // If the relation value has been set, we will set it on this attributes\n // list for returning. If it was not arrayable or null, we'll not set\n // the value on the array because it is some type of invalid value.\n- if (isset($relation) || is_null($value)) {\n+ if (isset($relation)) {", + "comment_created_at": "2024-07-24T13:16:01+00:00", + "comment_author": "calebdw", + "comment_body": "This yielded the following error:\r\n\r\n```\r\n ------ ------------------------------------------------------------------------------------------------------------------ \r\n Line Illuminate/Database/Eloquent/Concerns/HasAttributes.php (in context of class Illuminate\\Database\\Eloquent\\Model) \r\n ------ ------------------------------------------------------------------------------------------------------------------ \r\n 398 Variable $relation might not be defined. \r\n 🪪 variable.undefined \r\n```\r\n\r\nThe `is_null($value)` is already checked above:\r\n\r\nhttps://github.com/laravel/framework/blob/46297e441252d495713fb30223ffcfb325a4f120/src/Illuminate/Database/Eloquent/Concerns/HasAttributes.php#L380-L385\r\n\r\nAnd you'll notice that `$value` is not referenced inside the `if` body and therefore should not be referenced inside the condition. The only thing that matters is checking that the `$relation` isset before assigning it.", + "pr_file_module": null + }, + { + "comment_id": "1689815593", + "repo_full_name": "laravel/framework", + "pr_number": 51956, + "pr_file": "src/Illuminate/Database/Eloquent/Concerns/HasAttributes.php", + "discussion_id": "1689591772", + "commented_code": "@@ -394,7 +394,7 @@ public function relationsToArray()\n // If the relation value has been set, we will set it on this attributes\n // list for returning. If it was not arrayable or null, we'll not set\n // the value on the array because it is some type of invalid value.\n- if (isset($relation) || is_null($value)) {\n+ if (isset($relation)) {", + "comment_created_at": "2024-07-24T13:31:40+00:00", + "comment_author": "pxpm", + "comment_body": "Hey @calebdw I think we are talking different things here. \r\n\r\nThe first check for `is_null($value)` just sets the `$relation` value to `null`. The check you removed sets `$attributes[$key] = $relation;` when `$value = null **OR** isset($relation)`. \r\n\r\nThe fact that `$value` is not used inside the if, does not mean you can remove it from the condition as that would turn up not setting `$attributes[$key] = null` like was previously done. ", + "pr_file_module": null + }, + { + "comment_id": "1689824977", + "repo_full_name": "laravel/framework", + "pr_number": 51956, + "pr_file": "src/Illuminate/Database/Eloquent/Concerns/HasAttributes.php", + "discussion_id": "1689591772", + "commented_code": "@@ -394,7 +394,7 @@ public function relationsToArray()\n // If the relation value has been set, we will set it on this attributes\n // list for returning. If it was not arrayable or null, we'll not set\n // the value on the array because it is some type of invalid value.\n- if (isset($relation) || is_null($value)) {\n+ if (isset($relation)) {", + "comment_created_at": "2024-07-24T13:36:20+00:00", + "comment_author": "calebdw", + "comment_body": "If `$value === null`, then `$relation` isset.", + "pr_file_module": null + }, + { + "comment_id": "1689830616", + "repo_full_name": "laravel/framework", + "pr_number": 51956, + "pr_file": "src/Illuminate/Database/Eloquent/Concerns/HasAttributes.php", + "discussion_id": "1689591772", + "commented_code": "@@ -394,7 +394,7 @@ public function relationsToArray()\n // If the relation value has been set, we will set it on this attributes\n // list for returning. If it was not arrayable or null, we'll not set\n // the value on the array because it is some type of invalid value.\n- if (isset($relation) || is_null($value)) {\n+ if (isset($relation)) {", + "comment_created_at": "2024-07-24T13:39:03+00:00", + "comment_author": "calebdw", + "comment_body": "Hmm, apparently there's some idiosyncrasies with [isset](https://www.php.net/manual/en/function.isset.php):\r\n\r\n> Determine if a variable is considered set, this means if a variable is declared and is different than [null](https://www.php.net/manual/en/reserved.constants.php#constant.null). ", + "pr_file_module": null + }, + { + "comment_id": "1689855018", + "repo_full_name": "laravel/framework", + "pr_number": 51956, + "pr_file": "src/Illuminate/Database/Eloquent/Concerns/HasAttributes.php", + "discussion_id": "1689591772", + "commented_code": "@@ -394,7 +394,7 @@ public function relationsToArray()\n // If the relation value has been set, we will set it on this attributes\n // list for returning. If it was not arrayable or null, we'll not set\n // the value on the array because it is some type of invalid value.\n- if (isset($relation) || is_null($value)) {\n+ if (isset($relation)) {", + "comment_created_at": "2024-07-24T13:51:01+00:00", + "comment_author": "pxpm", + "comment_body": "Indeed @calebdw that was the issue with your reasoning, as you were considering `$relation` set when it's `null`. 👍 ", + "pr_file_module": null + }, + { + "comment_id": "1689893235", + "repo_full_name": "laravel/framework", + "pr_number": 51956, + "pr_file": "src/Illuminate/Database/Eloquent/Concerns/HasAttributes.php", + "discussion_id": "1689591772", + "commented_code": "@@ -394,7 +394,7 @@ public function relationsToArray()\n // If the relation value has been set, we will set it on this attributes\n // list for returning. If it was not arrayable or null, we'll not set\n // the value on the array because it is some type of invalid value.\n- if (isset($relation) || is_null($value)) {\n+ if (isset($relation)) {", + "comment_created_at": "2024-07-24T14:09:10+00:00", + "comment_author": "calebdw", + "comment_body": "It turns out there's not a straightforward way in PHP to check if a variable isset / declared (null values being acceptable). Even using `isset($relation) || is_null($relation)` fails because `is_null` returns true when the variable does not exist :roll_eyes: .\r\n\r\nThanks for pointing this out! I also added a test since this is rather tricky", + "pr_file_module": null + }, + { + "comment_id": "1689940559", + "repo_full_name": "laravel/framework", + "pr_number": 51956, + "pr_file": "src/Illuminate/Database/Eloquent/Concerns/HasAttributes.php", + "discussion_id": "1689591772", + "commented_code": "@@ -394,7 +394,7 @@ public function relationsToArray()\n // If the relation value has been set, we will set it on this attributes\n // list for returning. If it was not arrayable or null, we'll not set\n // the value on the array because it is some type of invalid value.\n- if (isset($relation) || is_null($value)) {\n+ if (isset($relation)) {", + "comment_created_at": "2024-07-24T14:37:04+00:00", + "comment_author": "pxpm", + "comment_body": "Well, theoretically you can do a `get_defined_vars()` and a `array_key_exists()` to check if a variable is actually defined disregarding it's value: \r\n\r\n```php\r\n$test = null;\r\n\r\nisset($test); // return false\r\narray_key_exists('test', get_defined_vars()); // return true\r\n```\r\n\r\nNote that `get_defined_vars()` gets the variables depending on the scope it's called. https://www.php.net/manual/en/function.get-defined-vars.php\r\n\r\nCheers", + "pr_file_module": null + }, + { + "comment_id": "1689947777", + "repo_full_name": "laravel/framework", + "pr_number": 51956, + "pr_file": "src/Illuminate/Database/Eloquent/Concerns/HasAttributes.php", + "discussion_id": "1689591772", + "commented_code": "@@ -394,7 +394,7 @@ public function relationsToArray()\n // If the relation value has been set, we will set it on this attributes\n // list for returning. If it was not arrayable or null, we'll not set\n // the value on the array because it is some type of invalid value.\n- if (isset($relation) || is_null($value)) {\n+ if (isset($relation)) {", + "comment_created_at": "2024-07-24T14:41:33+00:00", + "comment_author": "calebdw", + "comment_body": "Yes, I considered that, however, phpstan doesn't recognize that the `$relation` is defined inside the `if` so you'd still need an `@phpstan-ignore` call", + "pr_file_module": null + }, + { + "comment_id": "1689967349", + "repo_full_name": "laravel/framework", + "pr_number": 51956, + "pr_file": "src/Illuminate/Database/Eloquent/Concerns/HasAttributes.php", + "discussion_id": "1689591772", + "commented_code": "@@ -394,7 +394,7 @@ public function relationsToArray()\n // If the relation value has been set, we will set it on this attributes\n // list for returning. If it was not arrayable or null, we'll not set\n // the value on the array because it is some type of invalid value.\n- if (isset($relation) || is_null($value)) {\n+ if (isset($relation)) {", + "comment_created_at": "2024-07-24T14:53:35+00:00", + "comment_author": "pxpm", + "comment_body": "You can probably use `$relation ?? null` to avoid the phpstan error ?", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/framework-explicit-null-handling.md b/_reviewers/framework-explicit-null-handling.md index ec075ad..e472a2d 100644 --- a/_reviewers/framework-explicit-null-handling.md +++ b/_reviewers/framework-explicit-null-handling.md @@ -94,361 +94,3 @@ if (isset($relation)) { ``` Remember that `isset()` returns false for null values, which may not be what you expect when working with relationships or arrays that might legitimately contain null values. - - -[ - { - "discussion_id": "2072707142", - "pr_number": 55645, - "pr_file": "src/Illuminate/Container/Container.php", - "created_at": "2025-05-04T20:43:00+00:00", - "commented_code": "array_pop($this->buildStack);\n\n if (!empty($reflector->getAttributes(Lazy::class))) {", - "repo_full_name": "laravel/framework", - "discussion_comments": [ - { - "comment_id": "2072707142", - "repo_full_name": "laravel/framework", - "pr_number": 55645, - "pr_file": "src/Illuminate/Container/Container.php", - "discussion_id": "2072707142", - "commented_code": "@@ -1058,8 +1059,16 @@ public function build($concrete)\n \n array_pop($this->buildStack);\n \n+ if (!empty($reflector->getAttributes(Lazy::class))) {", - "comment_created_at": "2025-05-04T20:43:00+00:00", - "comment_author": "shaedrich", - "comment_body": "You could just directly check this\r\n```suggestion\r\n if ($reflector->getAttributes(Lazy::class) !== []) {\r\n```", - "pr_file_module": null - }, - { - "comment_id": "2073825274", - "repo_full_name": "laravel/framework", - "pr_number": 55645, - "pr_file": "src/Illuminate/Container/Container.php", - "discussion_id": "2072707142", - "commented_code": "@@ -1058,8 +1059,16 @@ public function build($concrete)\n \n array_pop($this->buildStack);\n \n+ if (!empty($reflector->getAttributes(Lazy::class))) {", - "comment_created_at": "2025-05-05T17:04:20+00:00", - "comment_author": "olekjs", - "comment_body": "Does this change bring anything beyond syntax? I\u2019m just asking out of curiosity.", - "pr_file_module": null - }, - { - "comment_id": "2073872455", - "repo_full_name": "laravel/framework", - "pr_number": 55645, - "pr_file": "src/Illuminate/Container/Container.php", - "discussion_id": "2072707142", - "commented_code": "@@ -1058,8 +1059,16 @@ public function build($concrete)\n \n array_pop($this->buildStack);\n \n+ if (!empty($reflector->getAttributes(Lazy::class))) {", - "comment_created_at": "2025-05-05T17:32:05+00:00", - "comment_author": "shaedrich", - "comment_body": "Not much. Technically, a function call is more expensive than a comparison, but this should mostly be optimized away in practice. One _slight_ advantage would be a clear communication that we are working with an array here and don't have to check _any_ structure's emptiness.", - "pr_file_module": null - }, - { - "comment_id": "2078948392", - "repo_full_name": "laravel/framework", - "pr_number": 55645, - "pr_file": "src/Illuminate/Container/Container.php", - "discussion_id": "2072707142", - "commented_code": "@@ -1058,8 +1059,16 @@ public function build($concrete)\n \n array_pop($this->buildStack);\n \n+ if (!empty($reflector->getAttributes(Lazy::class))) {", - "comment_created_at": "2025-05-08T06:03:12+00:00", - "comment_author": "macropay-solutions", - "comment_body": "@olekjs empty function is a trap for future bugs.\r\nWe have a rule in place to never use empty function because of that. \r\n\r\nempty('0') is true for example.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2111059728", - "pr_number": 55877, - "pr_file": "src/Illuminate/Foundation/Console/ConfigPublishCommand.php", - "created_at": "2025-05-28T06:45:12+00:00", - "commented_code": "return;\n }\n\n $name = (string) (is_null($this->argument('name')) ? select(\n label: 'Which configuration file would you like to publish?',\n options: (new Collection($config))->map(function (string $path) {\n return basename($path, '.php');\n }),\n ) : $this->argument('name'));\n $choices = array_map(\n fn (string $path) => basename($path, '.php'),\n $config,\n );\n\n $choice = windows_os()\n ? select(\n 'Which configuration file would you like to publish?',\n $choices,\n scroll: 15,\n )\n : search(\n label: 'Which configuration file would you like to publish?',\n placeholder: 'Search...',\n options: fn ($search) => array_values(array_filter(\n $choices,\n fn ($choice) => str_contains(strtolower($choice), strtolower($search))\n )),\n scroll: 15,\n );\n\n $name = (string) (is_null($this->argument('name'))\n ? $choice\n : $this->argument('name'));", - "repo_full_name": "laravel/framework", - "discussion_comments": [ - { - "comment_id": "2111059728", - "repo_full_name": "laravel/framework", - "pr_number": 55877, - "pr_file": "src/Illuminate/Foundation/Console/ConfigPublishCommand.php", - "discussion_id": "2111059728", - "commented_code": "@@ -46,12 +47,30 @@ public function handle()\n return;\n }\n \n- $name = (string) (is_null($this->argument('name')) ? select(\n- label: 'Which configuration file would you like to publish?',\n- options: (new Collection($config))->map(function (string $path) {\n- return basename($path, '.php');\n- }),\n- ) : $this->argument('name'));\n+ $choices = array_map(\n+ fn (string $path) => basename($path, '.php'),\n+ $config,\n+ );\n+\n+ $choice = windows_os()\n+ ? select(\n+ 'Which configuration file would you like to publish?',\n+ $choices,\n+ scroll: 15,\n+ )\n+ : search(\n+ label: 'Which configuration file would you like to publish?',\n+ placeholder: 'Search...',\n+ options: fn ($search) => array_values(array_filter(\n+ $choices,\n+ fn ($choice) => str_contains(strtolower($choice), strtolower($search))\n+ )),\n+ scroll: 15,\n+ );\n+\n+ $name = (string) (is_null($this->argument('name'))\n+ ? $choice\n+ : $this->argument('name'));", - "comment_created_at": "2025-05-28T06:45:12+00:00", - "comment_author": "shaedrich", - "comment_body": "Wouldn't this suffice?\r\n```suggestion\r\n $name = (string) ($this->argument('name') ?? $choice);\r\n```", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2102660306", - "pr_number": 55810, - "pr_file": "src/Illuminate/Filesystem/FilesystemAdapter.php", - "created_at": "2025-05-22T14:05:37+00:00", - "commented_code": "*/\n protected function concatPathToUrl($url, $path)\n {\n if (empty($url) || empty($path)) {", - "repo_full_name": "laravel/framework", - "discussion_comments": [ - { - "comment_id": "2102660306", - "repo_full_name": "laravel/framework", - "pr_number": 55810, - "pr_file": "src/Illuminate/Filesystem/FilesystemAdapter.php", - "discussion_id": "2102660306", - "commented_code": "@@ -847,6 +847,10 @@ public function temporaryUploadUrl($path, $expiration, array $options = [])\n */\n protected function concatPathToUrl($url, $path)\n {\n+ if (empty($url) || empty($path)) {", - "comment_created_at": "2025-05-22T14:05:37+00:00", - "comment_author": "GrahamCampbell", - "comment_body": "Empty is the wrong function here. It does not check if the variable us the empty string, it checks if it looks like an enmpty-ish value, inc if it's 0. One should compare against `''` instead.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2090368477", - "pr_number": 55740, - "pr_file": "src/Illuminate/Foundation/Configuration/ApplicationBuilder.php", - "created_at": "2025-05-15T06:23:18+00:00", - "commented_code": "string $apiPrefix = 'api',\n ?callable $then = null)\n {\n if (is_null($using) && (is_string($web) || is_array($web) || is_string($api) || is_array($api) || is_string($pages) || is_string($health)) || is_callable($then)) {\n $using = $this->buildRoutingCallback($web, $api, $pages, $health, $apiPrefix, $then);\n if (is_null($using) && (is_string($web) || is_array($web) || is_string($api) || is_array($api) || is_string($pages)) || is_callable($then)) {\n $using = $this->buildRoutingCallback($web, $api, $pages, $apiPrefix, $then);\n }\n\n if (is_string($health)) {\n PreventRequestsDuringMaintenance::except($health);\n }\n if (is_string($health)) {\n $using = function () use ($health, $using) {\n $this->buildHealthCheckRoute($health);\n call_user_func($using, $this->app);", - "repo_full_name": "laravel/framework", - "discussion_comments": [ - { - "comment_id": "2090368477", - "repo_full_name": "laravel/framework", - "pr_number": 55740, - "pr_file": "src/Illuminate/Foundation/Configuration/ApplicationBuilder.php", - "discussion_id": "2090368477", - "commented_code": "@@ -159,12 +159,17 @@ public function withRouting(?Closure $using = null,\n string $apiPrefix = 'api',\n ?callable $then = null)\n {\n- if (is_null($using) && (is_string($web) || is_array($web) || is_string($api) || is_array($api) || is_string($pages) || is_string($health)) || is_callable($then)) {\n- $using = $this->buildRoutingCallback($web, $api, $pages, $health, $apiPrefix, $then);\n+ if (is_null($using) && (is_string($web) || is_array($web) || is_string($api) || is_array($api) || is_string($pages)) || is_callable($then)) {\n+ $using = $this->buildRoutingCallback($web, $api, $pages, $apiPrefix, $then);\n+ }\n \n- if (is_string($health)) {\n- PreventRequestsDuringMaintenance::except($health);\n- }\n+ if (is_string($health)) {\n+ $using = function () use ($health, $using) {\n+ $this->buildHealthCheckRoute($health);\n+ call_user_func($using, $this->app);", - "comment_created_at": "2025-05-15T06:23:18+00:00", - "comment_author": "rodrigopedra", - "comment_body": "This line should be:\r\n\r\n```php\r\n$this->app->call($using);\r\n```\r\n\r\nTo preserve the current behavior, as in:\r\n\r\nhttps://github.com/laravel/framework/blob/df12a087731b37708cfbba72172a4c381363fed2/src/Illuminate/Foundation/Support/Providers/RouteServiceProvider.php#L162", - "pr_file_module": null - }, - { - "comment_id": "2090370825", - "repo_full_name": "laravel/framework", - "pr_number": 55740, - "pr_file": "src/Illuminate/Foundation/Configuration/ApplicationBuilder.php", - "discussion_id": "2090368477", - "commented_code": "@@ -159,12 +159,17 @@ public function withRouting(?Closure $using = null,\n string $apiPrefix = 'api',\n ?callable $then = null)\n {\n- if (is_null($using) && (is_string($web) || is_array($web) || is_string($api) || is_array($api) || is_string($pages) || is_string($health)) || is_callable($then)) {\n- $using = $this->buildRoutingCallback($web, $api, $pages, $health, $apiPrefix, $then);\n+ if (is_null($using) && (is_string($web) || is_array($web) || is_string($api) || is_array($api) || is_string($pages)) || is_callable($then)) {\n+ $using = $this->buildRoutingCallback($web, $api, $pages, $apiPrefix, $then);\n+ }\n \n- if (is_string($health)) {\n- PreventRequestsDuringMaintenance::except($health);\n- }\n+ if (is_string($health)) {\n+ $using = function () use ($health, $using) {\n+ $this->buildHealthCheckRoute($health);\n+ call_user_func($using, $this->app);", - "comment_created_at": "2025-05-15T06:25:02+00:00", - "comment_author": "rodrigopedra", - "comment_body": "Also, `$using` in this point, can be `null`. A check for that should be added.\r\n\r\nAn app can be built without specifying no routes, by just providing the `$commands` and/or the `$channels` parameters.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1953114524", - "pr_number": 54582, - "pr_file": "src/Illuminate/View/Compilers/BladeCompiler.php", - "created_at": "2025-02-12T17:32:47+00:00", - "commented_code": "if (is_null($alias)) {", - "repo_full_name": "laravel/framework", - "discussion_comments": [ - { - "comment_id": "1953114524", - "repo_full_name": "laravel/framework", - "pr_number": 54582, - "pr_file": "src/Illuminate/View/Compilers/BladeCompiler.php", - "discussion_id": "1953114524", - "commented_code": "@@ -762,14 +768,14 @@ public function component($class, $alias = null, $prefix = '')\n \n if (is_null($alias)) {", - "comment_created_at": "2025-02-12T17:32:47+00:00", - "comment_author": "shaedrich", - "comment_body": "You can avoid the function call like this:\r\n```suggestion\r\n if ($alias === null) {\r\n```", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1923809576", - "pr_number": 54285, - "pr_file": "src/Illuminate/Support/Str.php", - "created_at": "2025-01-21T14:16:17+00:00", - "commented_code": "static::$camelCache = [];\n static::$studlyCache = [];\n }\n\n /**\n * Sanitize the given string.\n *\n *\n * See: https://symfony.com/doc/current/html_sanitizer.html\n *\n * @param string $string The input string to sanitize.\n * @param HtmlSanitizerConfig|null $config Custom configuration to use for sanitizing.\n * @return string The sanitized string.\n */\n public static function sanitize($string, ?HtmlSanitizerConfig $config = null)\n {\n if (is_null($string)) {", - "repo_full_name": "laravel/framework", - "discussion_comments": [ - { - "comment_id": "1923809576", - "repo_full_name": "laravel/framework", - "pr_number": 54285, - "pr_file": "src/Illuminate/Support/Str.php", - "discussion_id": "1923809576", - "commented_code": "@@ -2014,4 +2016,28 @@ public static function flushCache()\n static::$camelCache = [];\n static::$studlyCache = [];\n }\n+\n+ /**\n+ * Sanitize the given string.\n+ *\n+ *\n+ * See: https://symfony.com/doc/current/html_sanitizer.html\n+ *\n+ * @param string $string The input string to sanitize.\n+ * @param HtmlSanitizerConfig|null $config Custom configuration to use for sanitizing.\n+ * @return string The sanitized string.\n+ */\n+ public static function sanitize($string, ?HtmlSanitizerConfig $config = null)\n+ {\n+ if (is_null($string)) {", - "comment_created_at": "2025-01-21T14:16:17+00:00", - "comment_author": "shaedrich", - "comment_body": "According to your `is_null()` call, your string can be `null`. This should be reflected in the PHPDoc comment type as well:\r\n```suggestion\r\n * @param string|null $string The input string to sanitize.\r\n * @param HtmlSanitizerConfig|null $config Custom configuration to use for sanitizing.\r\n * @return string The sanitized string.\r\n */\r\n public static function sanitize($string, ?HtmlSanitizerConfig $config = null)\r\n {\r\n if ($string === null) {\r\n```", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1923810842", - "pr_number": 54285, - "pr_file": "src/Illuminate/Support/Str.php", - "created_at": "2025-01-21T14:17:05+00:00", - "commented_code": "static::$camelCache = [];\n static::$studlyCache = [];\n }\n\n /**\n * Sanitize the given string.\n *\n *\n * See: https://symfony.com/doc/current/html_sanitizer.html\n *\n * @param string $string The input string to sanitize.\n * @param HtmlSanitizerConfig|null $config Custom configuration to use for sanitizing.\n * @return string The sanitized string.\n */\n public static function sanitize($string, ?HtmlSanitizerConfig $config = null)\n {\n if (is_null($string)) {\n return null;", - "repo_full_name": "laravel/framework", - "discussion_comments": [ - { - "comment_id": "1923810842", - "repo_full_name": "laravel/framework", - "pr_number": 54285, - "pr_file": "src/Illuminate/Support/Str.php", - "discussion_id": "1923810842", - "commented_code": "@@ -2014,4 +2016,28 @@ public static function flushCache()\n static::$camelCache = [];\n static::$studlyCache = [];\n }\n+\n+ /**\n+ * Sanitize the given string.\n+ *\n+ *\n+ * See: https://symfony.com/doc/current/html_sanitizer.html\n+ *\n+ * @param string $string The input string to sanitize.\n+ * @param HtmlSanitizerConfig|null $config Custom configuration to use for sanitizing.\n+ * @return string The sanitized string.\n+ */\n+ public static function sanitize($string, ?HtmlSanitizerConfig $config = null)\n+ {\n+ if (is_null($string)) {\n+ return null;", - "comment_created_at": "2025-01-21T14:17:05+00:00", - "comment_author": "shaedrich", - "comment_body": "According to your `is_null()` call, the method can return `null`. This should be reflected in the PHPDoc comment type as well:\r\n```suggestion\r\n * @return string|null The sanitized string.\r\n */\r\n public static function sanitize($string, ?HtmlSanitizerConfig $config = null)\r\n {\r\n if (is_null($string)) {\r\n return null;\r\n```", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1884704097", - "pr_number": 53872, - "pr_file": "src/Illuminate/Http/Response.php", - "created_at": "2024-12-14T01:28:04+00:00", - "commented_code": "return $this;\n }\n\n /**\n * Gets the current response content.\n */\n #[\\Override]\n public function getContent(): string|false\n {\n return transform(parent::getContent(), fn ($content) => $content, '');", - "repo_full_name": "laravel/framework", - "discussion_comments": [ - { - "comment_id": "1884704097", - "repo_full_name": "laravel/framework", - "pr_number": 53872, - "pr_file": "src/Illuminate/Http/Response.php", - "discussion_id": "1884704097", - "commented_code": "@@ -75,6 +75,15 @@ public function setContent(mixed $content): static\n return $this;\n }\n \n+ /**\n+ * Gets the current response content.\n+ */\n+ #[\\Override]\n+ public function getContent(): string|false\n+ {\n+ return transform(parent::getContent(), fn ($content) => $content, '');", - "comment_created_at": "2024-12-14T01:28:04+00:00", - "comment_author": "rodrigopedra", - "comment_body": "Why not just?\r\n\r\n```php\r\nreturn parent::getContent() ?? '';\r\n```\r\n", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1689591772", - "pr_number": 51956, - "pr_file": "src/Illuminate/Database/Eloquent/Concerns/HasAttributes.php", - "created_at": "2024-07-24T11:15:45+00:00", - "commented_code": "// If the relation value has been set, we will set it on this attributes\n // list for returning. If it was not arrayable or null, we'll not set\n // the value on the array because it is some type of invalid value.\n if (isset($relation) || is_null($value)) {\n if (isset($relation)) {", - "repo_full_name": "laravel/framework", - "discussion_comments": [ - { - "comment_id": "1689591772", - "repo_full_name": "laravel/framework", - "pr_number": 51956, - "pr_file": "src/Illuminate/Database/Eloquent/Concerns/HasAttributes.php", - "discussion_id": "1689591772", - "commented_code": "@@ -394,7 +394,7 @@ public function relationsToArray()\n // If the relation value has been set, we will set it on this attributes\n // list for returning. If it was not arrayable or null, we'll not set\n // the value on the array because it is some type of invalid value.\n- if (isset($relation) || is_null($value)) {\n+ if (isset($relation)) {", - "comment_created_at": "2024-07-24T11:15:45+00:00", - "comment_author": "pxpm", - "comment_body": "Doesn't this change the behavior? `null` values should enter the condition. Removing the `|| is_null()` they will not enter anymore as the `isset()` without the `is_null()` check would exclude them. \r\n\r\n```php\r\n$relation, $value = null;\r\n\r\nif (isset($relation)) {\r\n// we don't reach this code\r\n}\r\n\r\nif (isset($relation) || is_null($value)) {\r\n// this code is executed\r\n}\r\n```\r\nIf this is some kind of bug fixing it shouldn't be part of this PR IMO. ", - "pr_file_module": null - }, - { - "comment_id": "1689784138", - "repo_full_name": "laravel/framework", - "pr_number": 51956, - "pr_file": "src/Illuminate/Database/Eloquent/Concerns/HasAttributes.php", - "discussion_id": "1689591772", - "commented_code": "@@ -394,7 +394,7 @@ public function relationsToArray()\n // If the relation value has been set, we will set it on this attributes\n // list for returning. If it was not arrayable or null, we'll not set\n // the value on the array because it is some type of invalid value.\n- if (isset($relation) || is_null($value)) {\n+ if (isset($relation)) {", - "comment_created_at": "2024-07-24T13:16:01+00:00", - "comment_author": "calebdw", - "comment_body": "This yielded the following error:\r\n\r\n```\r\n ------ ------------------------------------------------------------------------------------------------------------------ \r\n Line Illuminate/Database/Eloquent/Concerns/HasAttributes.php (in context of class Illuminate\\Database\\Eloquent\\Model) \r\n ------ ------------------------------------------------------------------------------------------------------------------ \r\n 398 Variable $relation might not be defined. \r\n \ud83e\udeaa variable.undefined \r\n```\r\n\r\nThe `is_null($value)` is already checked above:\r\n\r\nhttps://github.com/laravel/framework/blob/46297e441252d495713fb30223ffcfb325a4f120/src/Illuminate/Database/Eloquent/Concerns/HasAttributes.php#L380-L385\r\n\r\nAnd you'll notice that `$value` is not referenced inside the `if` body and therefore should not be referenced inside the condition. The only thing that matters is checking that the `$relation` isset before assigning it.", - "pr_file_module": null - }, - { - "comment_id": "1689815593", - "repo_full_name": "laravel/framework", - "pr_number": 51956, - "pr_file": "src/Illuminate/Database/Eloquent/Concerns/HasAttributes.php", - "discussion_id": "1689591772", - "commented_code": "@@ -394,7 +394,7 @@ public function relationsToArray()\n // If the relation value has been set, we will set it on this attributes\n // list for returning. If it was not arrayable or null, we'll not set\n // the value on the array because it is some type of invalid value.\n- if (isset($relation) || is_null($value)) {\n+ if (isset($relation)) {", - "comment_created_at": "2024-07-24T13:31:40+00:00", - "comment_author": "pxpm", - "comment_body": "Hey @calebdw I think we are talking different things here. \r\n\r\nThe first check for `is_null($value)` just sets the `$relation` value to `null`. The check you removed sets `$attributes[$key] = $relation;` when `$value = null **OR** isset($relation)`. \r\n\r\nThe fact that `$value` is not used inside the if, does not mean you can remove it from the condition as that would turn up not setting `$attributes[$key] = null` like was previously done. ", - "pr_file_module": null - }, - { - "comment_id": "1689824977", - "repo_full_name": "laravel/framework", - "pr_number": 51956, - "pr_file": "src/Illuminate/Database/Eloquent/Concerns/HasAttributes.php", - "discussion_id": "1689591772", - "commented_code": "@@ -394,7 +394,7 @@ public function relationsToArray()\n // If the relation value has been set, we will set it on this attributes\n // list for returning. If it was not arrayable or null, we'll not set\n // the value on the array because it is some type of invalid value.\n- if (isset($relation) || is_null($value)) {\n+ if (isset($relation)) {", - "comment_created_at": "2024-07-24T13:36:20+00:00", - "comment_author": "calebdw", - "comment_body": "If `$value === null`, then `$relation` isset.", - "pr_file_module": null - }, - { - "comment_id": "1689830616", - "repo_full_name": "laravel/framework", - "pr_number": 51956, - "pr_file": "src/Illuminate/Database/Eloquent/Concerns/HasAttributes.php", - "discussion_id": "1689591772", - "commented_code": "@@ -394,7 +394,7 @@ public function relationsToArray()\n // If the relation value has been set, we will set it on this attributes\n // list for returning. If it was not arrayable or null, we'll not set\n // the value on the array because it is some type of invalid value.\n- if (isset($relation) || is_null($value)) {\n+ if (isset($relation)) {", - "comment_created_at": "2024-07-24T13:39:03+00:00", - "comment_author": "calebdw", - "comment_body": "Hmm, apparently there's some idiosyncrasies with [isset](https://www.php.net/manual/en/function.isset.php):\r\n\r\n> Determine if a variable is considered set, this means if a variable is declared and is different than [null](https://www.php.net/manual/en/reserved.constants.php#constant.null). ", - "pr_file_module": null - }, - { - "comment_id": "1689855018", - "repo_full_name": "laravel/framework", - "pr_number": 51956, - "pr_file": "src/Illuminate/Database/Eloquent/Concerns/HasAttributes.php", - "discussion_id": "1689591772", - "commented_code": "@@ -394,7 +394,7 @@ public function relationsToArray()\n // If the relation value has been set, we will set it on this attributes\n // list for returning. If it was not arrayable or null, we'll not set\n // the value on the array because it is some type of invalid value.\n- if (isset($relation) || is_null($value)) {\n+ if (isset($relation)) {", - "comment_created_at": "2024-07-24T13:51:01+00:00", - "comment_author": "pxpm", - "comment_body": "Indeed @calebdw that was the issue with your reasoning, as you were considering `$relation` set when it's `null`. \ud83d\udc4d ", - "pr_file_module": null - }, - { - "comment_id": "1689893235", - "repo_full_name": "laravel/framework", - "pr_number": 51956, - "pr_file": "src/Illuminate/Database/Eloquent/Concerns/HasAttributes.php", - "discussion_id": "1689591772", - "commented_code": "@@ -394,7 +394,7 @@ public function relationsToArray()\n // If the relation value has been set, we will set it on this attributes\n // list for returning. If it was not arrayable or null, we'll not set\n // the value on the array because it is some type of invalid value.\n- if (isset($relation) || is_null($value)) {\n+ if (isset($relation)) {", - "comment_created_at": "2024-07-24T14:09:10+00:00", - "comment_author": "calebdw", - "comment_body": "It turns out there's not a straightforward way in PHP to check if a variable isset / declared (null values being acceptable). Even using `isset($relation) || is_null($relation)` fails because `is_null` returns true when the variable does not exist :roll_eyes: .\r\n\r\nThanks for pointing this out! I also added a test since this is rather tricky", - "pr_file_module": null - }, - { - "comment_id": "1689940559", - "repo_full_name": "laravel/framework", - "pr_number": 51956, - "pr_file": "src/Illuminate/Database/Eloquent/Concerns/HasAttributes.php", - "discussion_id": "1689591772", - "commented_code": "@@ -394,7 +394,7 @@ public function relationsToArray()\n // If the relation value has been set, we will set it on this attributes\n // list for returning. If it was not arrayable or null, we'll not set\n // the value on the array because it is some type of invalid value.\n- if (isset($relation) || is_null($value)) {\n+ if (isset($relation)) {", - "comment_created_at": "2024-07-24T14:37:04+00:00", - "comment_author": "pxpm", - "comment_body": "Well, theoretically you can do a `get_defined_vars()` and a `array_key_exists()` to check if a variable is actually defined disregarding it's value: \r\n\r\n```php\r\n$test = null;\r\n\r\nisset($test); // return false\r\narray_key_exists('test', get_defined_vars()); // return true\r\n```\r\n\r\nNote that `get_defined_vars()` gets the variables depending on the scope it's called. https://www.php.net/manual/en/function.get-defined-vars.php\r\n\r\nCheers", - "pr_file_module": null - }, - { - "comment_id": "1689947777", - "repo_full_name": "laravel/framework", - "pr_number": 51956, - "pr_file": "src/Illuminate/Database/Eloquent/Concerns/HasAttributes.php", - "discussion_id": "1689591772", - "commented_code": "@@ -394,7 +394,7 @@ public function relationsToArray()\n // If the relation value has been set, we will set it on this attributes\n // list for returning. If it was not arrayable or null, we'll not set\n // the value on the array because it is some type of invalid value.\n- if (isset($relation) || is_null($value)) {\n+ if (isset($relation)) {", - "comment_created_at": "2024-07-24T14:41:33+00:00", - "comment_author": "calebdw", - "comment_body": "Yes, I considered that, however, phpstan doesn't recognize that the `$relation` is defined inside the `if` so you'd still need an `@phpstan-ignore` call", - "pr_file_module": null - }, - { - "comment_id": "1689967349", - "repo_full_name": "laravel/framework", - "pr_number": 51956, - "pr_file": "src/Illuminate/Database/Eloquent/Concerns/HasAttributes.php", - "discussion_id": "1689591772", - "commented_code": "@@ -394,7 +394,7 @@ public function relationsToArray()\n // If the relation value has been set, we will set it on this attributes\n // list for returning. If it was not arrayable or null, we'll not set\n // the value on the array because it is some type of invalid value.\n- if (isset($relation) || is_null($value)) {\n+ if (isset($relation)) {", - "comment_created_at": "2024-07-24T14:53:35+00:00", - "comment_author": "pxpm", - "comment_body": "You can probably use `$relation ?? null` to avoid the phpstan error ?", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/framework-keep-ci-configurations-minimal.json b/_reviewers/framework-keep-ci-configurations-minimal.json new file mode 100644 index 0000000..18935de --- /dev/null +++ b/_reviewers/framework-keep-ci-configurations-minimal.json @@ -0,0 +1,310 @@ +[ + { + "discussion_id": "1856404321", + "pr_number": 53648, + "pr_file": ".github/workflows/databases-nightly.yml", + "created_at": "2024-11-25T11:03:56+00:00", + "commented_code": "uses: shivammathur/setup-php@v2\n with:\n php-version: 8.3\n extensions: dom, curl, libxml, mbstring, zip, pcntl, pdo, pdo_mysql, :php-psr\n extensions: dom, curl, libxml, mbstring, zip, pcntl, pdo, pdo_mysql, xml, xdebug, :php-psr", + "repo_full_name": "laravel/framework", + "discussion_comments": [ + { + "comment_id": "1856404321", + "repo_full_name": "laravel/framework", + "pr_number": 53648, + "pr_file": ".github/workflows/databases-nightly.yml", + "discussion_id": "1856404321", + "commented_code": "@@ -77,9 +77,9 @@ jobs:\n uses: shivammathur/setup-php@v2\n with:\n php-version: 8.3\n- extensions: dom, curl, libxml, mbstring, zip, pcntl, pdo, pdo_mysql, :php-psr\n+ extensions: dom, curl, libxml, mbstring, zip, pcntl, pdo, pdo_mysql, xml, xdebug, :php-psr", + "comment_created_at": "2024-11-25T11:03:56+00:00", + "comment_author": "crynobone", + "comment_body": "```suggestion\r\n extensions: dom, curl, libxml, mbstring, zip, pcntl, pdo, pdo_mysql, :php-psr\r\n```", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1856406988", + "pr_number": 53648, + "pr_file": ".github/workflows/databases-nightly.yml", + "created_at": "2024-11-25T11:04:34+00:00", + "commented_code": "uses: shivammathur/setup-php@v2\n with:\n php-version: 8.3\n extensions: dom, curl, libxml, mbstring, zip, pcntl, pdo, pdo_mysql, :php-psr\n extensions: dom, curl, libxml, mbstring, zip, pcntl, pdo, pdo_mysql, xml, xdebug, :php-psr", + "repo_full_name": "laravel/framework", + "discussion_comments": [ + { + "comment_id": "1856406988", + "repo_full_name": "laravel/framework", + "pr_number": 53648, + "pr_file": ".github/workflows/databases-nightly.yml", + "discussion_id": "1856406988", + "commented_code": "@@ -31,9 +31,9 @@ jobs:\n uses: shivammathur/setup-php@v2\n with:\n php-version: 8.3\n- extensions: dom, curl, libxml, mbstring, zip, pcntl, pdo, pdo_mysql, :php-psr\n+ extensions: dom, curl, libxml, mbstring, zip, pcntl, pdo, pdo_mysql, xml, xdebug, :php-psr", + "comment_created_at": "2024-11-25T11:04:34+00:00", + "comment_author": "crynobone", + "comment_body": "```suggestion\r\n extensions: dom, curl, libxml, mbstring, zip, pcntl, pdo, pdo_mysql, :php-psr\r\n```", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1856407588", + "pr_number": 53648, + "pr_file": ".github/workflows/tests.yml", + "created_at": "2024-11-25T11:04:57+00:00", + "commented_code": "uses: shivammathur/setup-php@v2\n with:\n php-version: ${{ matrix.php }}\n extensions: dom, curl, libxml, mbstring, zip, pdo, sqlite, pdo_sqlite, gd, pdo_mysql, fileinfo, ftp, redis, memcached, gmp, intl, :php-psr\n extensions: dom, curl, libxml, mbstring, zip, pdo, sqlite, pdo_sqlite, gd, pdo_mysql, fileinfo, ftp, redis, memcached, gmp, intl, xml, xdebug, :php-psr", + "repo_full_name": "laravel/framework", + "discussion_comments": [ + { + "comment_id": "1856407588", + "repo_full_name": "laravel/framework", + "pr_number": 53648, + "pr_file": ".github/workflows/tests.yml", + "discussion_id": "1856407588", + "commented_code": "@@ -133,9 +133,9 @@ jobs:\n uses: shivammathur/setup-php@v2\n with:\n php-version: ${{ matrix.php }}\n- extensions: dom, curl, libxml, mbstring, zip, pdo, sqlite, pdo_sqlite, gd, pdo_mysql, fileinfo, ftp, redis, memcached, gmp, intl, :php-psr\n+ extensions: dom, curl, libxml, mbstring, zip, pdo, sqlite, pdo_sqlite, gd, pdo_mysql, fileinfo, ftp, redis, memcached, gmp, intl, xml, xdebug, :php-psr", + "comment_created_at": "2024-11-25T11:04:57+00:00", + "comment_author": "crynobone", + "comment_body": "```suggestion\r\n extensions: dom, curl, libxml, mbstring, zip, pdo, sqlite, pdo_sqlite, gd, pdo_mysql, fileinfo, ftp, redis, memcached, gmp, intl, :php-psr\r\n```", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1856408180", + "pr_number": 53648, + "pr_file": ".github/workflows/tests.yml", + "created_at": "2024-11-25T11:05:20+00:00", + "commented_code": "uses: shivammathur/setup-php@v2\n with:\n php-version: ${{ matrix.php }}\n extensions: dom, curl, libxml, mbstring, zip, pcntl, pdo, sqlite, pdo_sqlite, gd, redis, igbinary, msgpack, memcached, gmp, :php-psr\n extensions: dom, curl, libxml, mbstring, zip, pcntl, pdo, sqlite, pdo_sqlite, gd, redis, igbinary, msgpack, memcached, gmp, xml, xdebug, :php-psr\n ini-values: error_reporting=E_ALL\n tools: composer:v2\n coverage: none\n coverage: xdebug", + "repo_full_name": "laravel/framework", + "discussion_comments": [ + { + "comment_id": "1856408180", + "repo_full_name": "laravel/framework", + "pr_number": 53648, + "pr_file": ".github/workflows/tests.yml", + "discussion_id": "1856408180", + "commented_code": "@@ -53,10 +53,10 @@ jobs:\n uses: shivammathur/setup-php@v2\n with:\n php-version: ${{ matrix.php }}\n- extensions: dom, curl, libxml, mbstring, zip, pcntl, pdo, sqlite, pdo_sqlite, gd, redis, igbinary, msgpack, memcached, gmp, :php-psr\n+ extensions: dom, curl, libxml, mbstring, zip, pcntl, pdo, sqlite, pdo_sqlite, gd, redis, igbinary, msgpack, memcached, gmp, xml, xdebug, :php-psr\n ini-values: error_reporting=E_ALL\n tools: composer:v2\n- coverage: none\n+ coverage: xdebug", + "comment_created_at": "2024-11-25T11:05:20+00:00", + "comment_author": "crynobone", + "comment_body": "```suggestion\r\n coverage: none\r\n```", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1856408369", + "pr_number": 53648, + "pr_file": ".github/workflows/tests.yml", + "created_at": "2024-11-25T11:05:28+00:00", + "commented_code": "uses: shivammathur/setup-php@v2\n with:\n php-version: ${{ matrix.php }}\n extensions: dom, curl, libxml, mbstring, zip, pcntl, pdo, sqlite, pdo_sqlite, gd, redis, igbinary, msgpack, memcached, gmp, :php-psr\n extensions: dom, curl, libxml, mbstring, zip, pcntl, pdo, sqlite, pdo_sqlite, gd, redis, igbinary, msgpack, memcached, gmp, xml, xdebug, :php-psr", + "repo_full_name": "laravel/framework", + "discussion_comments": [ + { + "comment_id": "1856408369", + "repo_full_name": "laravel/framework", + "pr_number": 53648, + "pr_file": ".github/workflows/tests.yml", + "discussion_id": "1856408369", + "commented_code": "@@ -53,10 +53,10 @@ jobs:\n uses: shivammathur/setup-php@v2\n with:\n php-version: ${{ matrix.php }}\n- extensions: dom, curl, libxml, mbstring, zip, pcntl, pdo, sqlite, pdo_sqlite, gd, redis, igbinary, msgpack, memcached, gmp, :php-psr\n+ extensions: dom, curl, libxml, mbstring, zip, pcntl, pdo, sqlite, pdo_sqlite, gd, redis, igbinary, msgpack, memcached, gmp, xml, xdebug, :php-psr", + "comment_created_at": "2024-11-25T11:05:28+00:00", + "comment_author": "crynobone", + "comment_body": "```suggestion\r\n extensions: dom, curl, libxml, mbstring, zip, pcntl, pdo, sqlite, pdo_sqlite, gd, redis, igbinary, msgpack, memcached, gmp, :php-psr\r\n```", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1856408715", + "pr_number": 53648, + "pr_file": ".github/workflows/queues.yml", + "created_at": "2024-11-25T11:05:44+00:00", + "commented_code": "uses: shivammathur/setup-php@v2\n with:\n php-version: 8.2\n extensions: dom, curl, libxml, mbstring, zip, pcntl, pdo, pdo_mysql, :php-psr\n extensions: dom, curl, libxml, mbstring, zip, pcntl, pdo, pdo_mysql, xml, xdebug, :php-psr", + "repo_full_name": "laravel/framework", + "discussion_comments": [ + { + "comment_id": "1856408715", + "repo_full_name": "laravel/framework", + "pr_number": 53648, + "pr_file": ".github/workflows/queues.yml", + "discussion_id": "1856408715", + "commented_code": "@@ -147,9 +147,9 @@ jobs:\n uses: shivammathur/setup-php@v2\n with:\n php-version: 8.2\n- extensions: dom, curl, libxml, mbstring, zip, pcntl, pdo, pdo_mysql, :php-psr\n+ extensions: dom, curl, libxml, mbstring, zip, pcntl, pdo, pdo_mysql, xml, xdebug, :php-psr", + "comment_created_at": "2024-11-25T11:05:44+00:00", + "comment_author": "crynobone", + "comment_body": "```suggestion\r\n extensions: dom, curl, libxml, mbstring, zip, pcntl, pdo, pdo_mysql, :php-psr\r\n```", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1856409284", + "pr_number": 53648, + "pr_file": ".github/workflows/queues.yml", + "created_at": "2024-11-25T11:06:11+00:00", + "commented_code": "uses: shivammathur/setup-php@v2\n with:\n php-version: 8.2\n extensions: dom, curl, libxml, mbstring, zip, pcntl, pdo, pdo_mysql, :php-psr\n extensions: dom, curl, libxml, mbstring, zip, pcntl, pdo, pdo_mysql, xml, xdebug, :php-psr", + "repo_full_name": "laravel/framework", + "discussion_comments": [ + { + "comment_id": "1856409284", + "repo_full_name": "laravel/framework", + "pr_number": 53648, + "pr_file": ".github/workflows/queues.yml", + "discussion_id": "1856409284", + "commented_code": "@@ -107,9 +107,9 @@ jobs:\n uses: shivammathur/setup-php@v2\n with:\n php-version: 8.2\n- extensions: dom, curl, libxml, mbstring, zip, pcntl, pdo, pdo_mysql, :php-psr\n+ extensions: dom, curl, libxml, mbstring, zip, pcntl, pdo, pdo_mysql, xml, xdebug, :php-psr", + "comment_created_at": "2024-11-25T11:06:11+00:00", + "comment_author": "crynobone", + "comment_body": "```suggestion\r\n extensions: dom, curl, libxml, mbstring, zip, pcntl, pdo, pdo_mysql, :php-psr\r\n```", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1856409723", + "pr_number": 53648, + "pr_file": ".github/workflows/queues.yml", + "created_at": "2024-11-25T11:06:28+00:00", + "commented_code": "uses: shivammathur/setup-php@v2\n with:\n php-version: 8.2\n extensions: dom, curl, libxml, mbstring, zip, pcntl, pdo, pdo_mysql, :php-psr\n extensions: dom, curl, libxml, mbstring, zip, pcntl, pdo, pdo_mysql, xml, xdebug, :php-psr", + "repo_full_name": "laravel/framework", + "discussion_comments": [ + { + "comment_id": "1856409723", + "repo_full_name": "laravel/framework", + "pr_number": 53648, + "pr_file": ".github/workflows/queues.yml", + "discussion_id": "1856409723", + "commented_code": "@@ -59,9 +59,9 @@ jobs:\n uses: shivammathur/setup-php@v2\n with:\n php-version: 8.2\n- extensions: dom, curl, libxml, mbstring, zip, pcntl, pdo, pdo_mysql, :php-psr\n+ extensions: dom, curl, libxml, mbstring, zip, pcntl, pdo, pdo_mysql, xml, xdebug, :php-psr", + "comment_created_at": "2024-11-25T11:06:28+00:00", + "comment_author": "crynobone", + "comment_body": "```suggestion\r\n extensions: dom, curl, libxml, mbstring, zip, pcntl, pdo, pdo_mysql, :php-psr\r\n```", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1856409908", + "pr_number": 53648, + "pr_file": ".github/workflows/queues.yml", + "created_at": "2024-11-25T11:06:37+00:00", + "commented_code": "uses: shivammathur/setup-php@v2\n with:\n php-version: 8.2\n extensions: dom, curl, libxml, mbstring, zip, pcntl, pdo, pdo_mysql, :php-psr\n extensions: dom, curl, libxml, mbstring, zip, pcntl, pdo, pdo_mysql, xml, xdebug, :php-psr\n tools: composer:v2\n coverage: none\n coverage: xdebug", + "repo_full_name": "laravel/framework", + "discussion_comments": [ + { + "comment_id": "1856409908", + "repo_full_name": "laravel/framework", + "pr_number": 53648, + "pr_file": ".github/workflows/queues.yml", + "discussion_id": "1856409908", + "commented_code": "@@ -24,9 +24,9 @@ jobs:\n uses: shivammathur/setup-php@v2\n with:\n php-version: 8.2\n- extensions: dom, curl, libxml, mbstring, zip, pcntl, pdo, pdo_mysql, :php-psr\n+ extensions: dom, curl, libxml, mbstring, zip, pcntl, pdo, pdo_mysql, xml, xdebug, :php-psr\n tools: composer:v2\n- coverage: none\n+ coverage: xdebug", + "comment_created_at": "2024-11-25T11:06:37+00:00", + "comment_author": "crynobone", + "comment_body": "```suggestion\r\n coverage: none\r\n```", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1856410088", + "pr_number": 53648, + "pr_file": ".github/workflows/queues.yml", + "created_at": "2024-11-25T11:06:45+00:00", + "commented_code": "uses: shivammathur/setup-php@v2\n with:\n php-version: 8.2\n extensions: dom, curl, libxml, mbstring, zip, pcntl, pdo, pdo_mysql, :php-psr\n extensions: dom, curl, libxml, mbstring, zip, pcntl, pdo, pdo_mysql, xml, xdebug, :php-psr", + "repo_full_name": "laravel/framework", + "discussion_comments": [ + { + "comment_id": "1856410088", + "repo_full_name": "laravel/framework", + "pr_number": 53648, + "pr_file": ".github/workflows/queues.yml", + "discussion_id": "1856410088", + "commented_code": "@@ -24,9 +24,9 @@ jobs:\n uses: shivammathur/setup-php@v2\n with:\n php-version: 8.2\n- extensions: dom, curl, libxml, mbstring, zip, pcntl, pdo, pdo_mysql, :php-psr\n+ extensions: dom, curl, libxml, mbstring, zip, pcntl, pdo, pdo_mysql, xml, xdebug, :php-psr", + "comment_created_at": "2024-11-25T11:06:45+00:00", + "comment_author": "crynobone", + "comment_body": "```suggestion\r\n extensions: dom, curl, libxml, mbstring, zip, pcntl, pdo, pdo_mysql, :php-psr\r\n```", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1856411136", + "pr_number": 53648, + "pr_file": ".github/workflows/databases.yml", + "created_at": "2024-11-25T11:07:18+00:00", + "commented_code": "uses: shivammathur/setup-php@v2\n with:\n php-version: 8.3\n extensions: dom, curl, libxml, mbstring, zip, pcntl, sqlsrv, pdo, pdo_sqlsrv, odbc, pdo_odbc, :php-psr\n extensions: dom, curl, libxml, mbstring, zip, pcntl, sqlsrv, pdo, pdo_sqlsrv, odbc, pdo_odbc, xml, xdebug, :php-psr\n tools: composer:v2\n coverage: none\n coverage: xdebug", + "repo_full_name": "laravel/framework", + "discussion_comments": [ + { + "comment_id": "1856411136", + "repo_full_name": "laravel/framework", + "pr_number": 53648, + "pr_file": ".github/workflows/databases.yml", + "discussion_id": "1856411136", + "commented_code": "@@ -319,9 +319,9 @@ jobs:\n uses: shivammathur/setup-php@v2\n with:\n php-version: 8.3\n- extensions: dom, curl, libxml, mbstring, zip, pcntl, sqlsrv, pdo, pdo_sqlsrv, odbc, pdo_odbc, :php-psr\n+ extensions: dom, curl, libxml, mbstring, zip, pcntl, sqlsrv, pdo, pdo_sqlsrv, odbc, pdo_odbc, xml, xdebug, :php-psr\n tools: composer:v2\n- coverage: none\n+ coverage: xdebug", + "comment_created_at": "2024-11-25T11:07:18+00:00", + "comment_author": "crynobone", + "comment_body": "```suggestion\r\n coverage: none\r\n```", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1856412237", + "pr_number": 53648, + "pr_file": ".github/workflows/databases.yml", + "created_at": "2024-11-25T11:08:08+00:00", + "commented_code": "uses: shivammathur/setup-php@v2\n with:\n php-version: 8.3\n extensions: dom, curl, libxml, mbstring, zip, pcntl, pdo, pdo_pgsql, :php-psr\n extensions: dom, curl, libxml, mbstring, zip, pcntl, pdo, pdo_pgsql, xml, xdebug, :php-psr", + "repo_full_name": "laravel/framework", + "discussion_comments": [ + { + "comment_id": "1856412237", + "repo_full_name": "laravel/framework", + "pr_number": 53648, + "pr_file": ".github/workflows/databases.yml", + "discussion_id": "1856412237", + "commented_code": "@@ -224,9 +224,9 @@ jobs:\n uses: shivammathur/setup-php@v2\n with:\n php-version: 8.3\n- extensions: dom, curl, libxml, mbstring, zip, pcntl, pdo, pdo_pgsql, :php-psr\n+ extensions: dom, curl, libxml, mbstring, zip, pcntl, pdo, pdo_pgsql, xml, xdebug, :php-psr", + "comment_created_at": "2024-11-25T11:08:08+00:00", + "comment_author": "crynobone", + "comment_body": "```suggestion\r\n extensions: dom, curl, libxml, mbstring, zip, pcntl, pdo, pdo_pgsql, :php-psr\r\n```", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1856412446", + "pr_number": 53648, + "pr_file": ".github/workflows/databases.yml", + "created_at": "2024-11-25T11:08:17+00:00", + "commented_code": "uses: shivammathur/setup-php@v2\n with:\n php-version: 8.3\n extensions: dom, curl, libxml, mbstring, zip, pcntl, pdo, pdo_pgsql, :php-psr\n extensions: dom, curl, libxml, mbstring, zip, pcntl, pdo, pdo_pgsql, xml, xdebug, :php-psr\n tools: composer:v2\n coverage: none\n coverage: xdebug", + "repo_full_name": "laravel/framework", + "discussion_comments": [ + { + "comment_id": "1856412446", + "repo_full_name": "laravel/framework", + "pr_number": 53648, + "pr_file": ".github/workflows/databases.yml", + "discussion_id": "1856412446", + "commented_code": "@@ -175,9 +175,9 @@ jobs:\n uses: shivammathur/setup-php@v2\n with:\n php-version: 8.3\n- extensions: dom, curl, libxml, mbstring, zip, pcntl, pdo, pdo_pgsql, :php-psr\n+ extensions: dom, curl, libxml, mbstring, zip, pcntl, pdo, pdo_pgsql, xml, xdebug, :php-psr\n tools: composer:v2\n- coverage: none\n+ coverage: xdebug", + "comment_created_at": "2024-11-25T11:08:17+00:00", + "comment_author": "crynobone", + "comment_body": "```suggestion\r\n coverage: none\r\n```", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1856413251", + "pr_number": 53648, + "pr_file": ".github/workflows/databases.yml", + "created_at": "2024-11-25T11:08:51+00:00", + "commented_code": "uses: shivammathur/setup-php@v2\n with:\n php-version: 8.3\n extensions: dom, curl, libxml, mbstring, zip, pcntl, pdo, pdo_mysql, :php-psr\n extensions: dom, curl, libxml, mbstring, zip, pcntl, pdo, pdo_mysql, xml, xdebug, :php-psr\n tools: composer:v2\n coverage: none\n coverage: xdebug", + "repo_full_name": "laravel/framework", + "discussion_comments": [ + { + "comment_id": "1856413251", + "repo_full_name": "laravel/framework", + "pr_number": 53648, + "pr_file": ".github/workflows/databases.yml", + "discussion_id": "1856413251", + "commented_code": "@@ -128,9 +128,9 @@ jobs:\n uses: shivammathur/setup-php@v2\n with:\n php-version: 8.3\n- extensions: dom, curl, libxml, mbstring, zip, pcntl, pdo, pdo_mysql, :php-psr\n+ extensions: dom, curl, libxml, mbstring, zip, pcntl, pdo, pdo_mysql, xml, xdebug, :php-psr\n tools: composer:v2\n- coverage: none\n+ coverage: xdebug", + "comment_created_at": "2024-11-25T11:08:51+00:00", + "comment_author": "crynobone", + "comment_body": "```suggestion\r\n coverage: none\r\n```", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/framework-keep-ci-configurations-minimal.md b/_reviewers/framework-keep-ci-configurations-minimal.md index 9be789f..013497b 100644 --- a/_reviewers/framework-keep-ci-configurations-minimal.md +++ b/_reviewers/framework-keep-ci-configurations-minimal.md @@ -25,315 +25,3 @@ Example: ``` This approach keeps CI workflows efficient, reduces execution time, and avoids potential issues from unnecessary dependencies. - - -[ - { - "discussion_id": "1856404321", - "pr_number": 53648, - "pr_file": ".github/workflows/databases-nightly.yml", - "created_at": "2024-11-25T11:03:56+00:00", - "commented_code": "uses: shivammathur/setup-php@v2\n with:\n php-version: 8.3\n extensions: dom, curl, libxml, mbstring, zip, pcntl, pdo, pdo_mysql, :php-psr\n extensions: dom, curl, libxml, mbstring, zip, pcntl, pdo, pdo_mysql, xml, xdebug, :php-psr", - "repo_full_name": "laravel/framework", - "discussion_comments": [ - { - "comment_id": "1856404321", - "repo_full_name": "laravel/framework", - "pr_number": 53648, - "pr_file": ".github/workflows/databases-nightly.yml", - "discussion_id": "1856404321", - "commented_code": "@@ -77,9 +77,9 @@ jobs:\n uses: shivammathur/setup-php@v2\n with:\n php-version: 8.3\n- extensions: dom, curl, libxml, mbstring, zip, pcntl, pdo, pdo_mysql, :php-psr\n+ extensions: dom, curl, libxml, mbstring, zip, pcntl, pdo, pdo_mysql, xml, xdebug, :php-psr", - "comment_created_at": "2024-11-25T11:03:56+00:00", - "comment_author": "crynobone", - "comment_body": "```suggestion\r\n extensions: dom, curl, libxml, mbstring, zip, pcntl, pdo, pdo_mysql, :php-psr\r\n```", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1856406988", - "pr_number": 53648, - "pr_file": ".github/workflows/databases-nightly.yml", - "created_at": "2024-11-25T11:04:34+00:00", - "commented_code": "uses: shivammathur/setup-php@v2\n with:\n php-version: 8.3\n extensions: dom, curl, libxml, mbstring, zip, pcntl, pdo, pdo_mysql, :php-psr\n extensions: dom, curl, libxml, mbstring, zip, pcntl, pdo, pdo_mysql, xml, xdebug, :php-psr", - "repo_full_name": "laravel/framework", - "discussion_comments": [ - { - "comment_id": "1856406988", - "repo_full_name": "laravel/framework", - "pr_number": 53648, - "pr_file": ".github/workflows/databases-nightly.yml", - "discussion_id": "1856406988", - "commented_code": "@@ -31,9 +31,9 @@ jobs:\n uses: shivammathur/setup-php@v2\n with:\n php-version: 8.3\n- extensions: dom, curl, libxml, mbstring, zip, pcntl, pdo, pdo_mysql, :php-psr\n+ extensions: dom, curl, libxml, mbstring, zip, pcntl, pdo, pdo_mysql, xml, xdebug, :php-psr", - "comment_created_at": "2024-11-25T11:04:34+00:00", - "comment_author": "crynobone", - "comment_body": "```suggestion\r\n extensions: dom, curl, libxml, mbstring, zip, pcntl, pdo, pdo_mysql, :php-psr\r\n```", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1856407588", - "pr_number": 53648, - "pr_file": ".github/workflows/tests.yml", - "created_at": "2024-11-25T11:04:57+00:00", - "commented_code": "uses: shivammathur/setup-php@v2\n with:\n php-version: ${{ matrix.php }}\n extensions: dom, curl, libxml, mbstring, zip, pdo, sqlite, pdo_sqlite, gd, pdo_mysql, fileinfo, ftp, redis, memcached, gmp, intl, :php-psr\n extensions: dom, curl, libxml, mbstring, zip, pdo, sqlite, pdo_sqlite, gd, pdo_mysql, fileinfo, ftp, redis, memcached, gmp, intl, xml, xdebug, :php-psr", - "repo_full_name": "laravel/framework", - "discussion_comments": [ - { - "comment_id": "1856407588", - "repo_full_name": "laravel/framework", - "pr_number": 53648, - "pr_file": ".github/workflows/tests.yml", - "discussion_id": "1856407588", - "commented_code": "@@ -133,9 +133,9 @@ jobs:\n uses: shivammathur/setup-php@v2\n with:\n php-version: ${{ matrix.php }}\n- extensions: dom, curl, libxml, mbstring, zip, pdo, sqlite, pdo_sqlite, gd, pdo_mysql, fileinfo, ftp, redis, memcached, gmp, intl, :php-psr\n+ extensions: dom, curl, libxml, mbstring, zip, pdo, sqlite, pdo_sqlite, gd, pdo_mysql, fileinfo, ftp, redis, memcached, gmp, intl, xml, xdebug, :php-psr", - "comment_created_at": "2024-11-25T11:04:57+00:00", - "comment_author": "crynobone", - "comment_body": "```suggestion\r\n extensions: dom, curl, libxml, mbstring, zip, pdo, sqlite, pdo_sqlite, gd, pdo_mysql, fileinfo, ftp, redis, memcached, gmp, intl, :php-psr\r\n```", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1856408180", - "pr_number": 53648, - "pr_file": ".github/workflows/tests.yml", - "created_at": "2024-11-25T11:05:20+00:00", - "commented_code": "uses: shivammathur/setup-php@v2\n with:\n php-version: ${{ matrix.php }}\n extensions: dom, curl, libxml, mbstring, zip, pcntl, pdo, sqlite, pdo_sqlite, gd, redis, igbinary, msgpack, memcached, gmp, :php-psr\n extensions: dom, curl, libxml, mbstring, zip, pcntl, pdo, sqlite, pdo_sqlite, gd, redis, igbinary, msgpack, memcached, gmp, xml, xdebug, :php-psr\n ini-values: error_reporting=E_ALL\n tools: composer:v2\n coverage: none\n coverage: xdebug", - "repo_full_name": "laravel/framework", - "discussion_comments": [ - { - "comment_id": "1856408180", - "repo_full_name": "laravel/framework", - "pr_number": 53648, - "pr_file": ".github/workflows/tests.yml", - "discussion_id": "1856408180", - "commented_code": "@@ -53,10 +53,10 @@ jobs:\n uses: shivammathur/setup-php@v2\n with:\n php-version: ${{ matrix.php }}\n- extensions: dom, curl, libxml, mbstring, zip, pcntl, pdo, sqlite, pdo_sqlite, gd, redis, igbinary, msgpack, memcached, gmp, :php-psr\n+ extensions: dom, curl, libxml, mbstring, zip, pcntl, pdo, sqlite, pdo_sqlite, gd, redis, igbinary, msgpack, memcached, gmp, xml, xdebug, :php-psr\n ini-values: error_reporting=E_ALL\n tools: composer:v2\n- coverage: none\n+ coverage: xdebug", - "comment_created_at": "2024-11-25T11:05:20+00:00", - "comment_author": "crynobone", - "comment_body": "```suggestion\r\n coverage: none\r\n```", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1856408369", - "pr_number": 53648, - "pr_file": ".github/workflows/tests.yml", - "created_at": "2024-11-25T11:05:28+00:00", - "commented_code": "uses: shivammathur/setup-php@v2\n with:\n php-version: ${{ matrix.php }}\n extensions: dom, curl, libxml, mbstring, zip, pcntl, pdo, sqlite, pdo_sqlite, gd, redis, igbinary, msgpack, memcached, gmp, :php-psr\n extensions: dom, curl, libxml, mbstring, zip, pcntl, pdo, sqlite, pdo_sqlite, gd, redis, igbinary, msgpack, memcached, gmp, xml, xdebug, :php-psr", - "repo_full_name": "laravel/framework", - "discussion_comments": [ - { - "comment_id": "1856408369", - "repo_full_name": "laravel/framework", - "pr_number": 53648, - "pr_file": ".github/workflows/tests.yml", - "discussion_id": "1856408369", - "commented_code": "@@ -53,10 +53,10 @@ jobs:\n uses: shivammathur/setup-php@v2\n with:\n php-version: ${{ matrix.php }}\n- extensions: dom, curl, libxml, mbstring, zip, pcntl, pdo, sqlite, pdo_sqlite, gd, redis, igbinary, msgpack, memcached, gmp, :php-psr\n+ extensions: dom, curl, libxml, mbstring, zip, pcntl, pdo, sqlite, pdo_sqlite, gd, redis, igbinary, msgpack, memcached, gmp, xml, xdebug, :php-psr", - "comment_created_at": "2024-11-25T11:05:28+00:00", - "comment_author": "crynobone", - "comment_body": "```suggestion\r\n extensions: dom, curl, libxml, mbstring, zip, pcntl, pdo, sqlite, pdo_sqlite, gd, redis, igbinary, msgpack, memcached, gmp, :php-psr\r\n```", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1856408715", - "pr_number": 53648, - "pr_file": ".github/workflows/queues.yml", - "created_at": "2024-11-25T11:05:44+00:00", - "commented_code": "uses: shivammathur/setup-php@v2\n with:\n php-version: 8.2\n extensions: dom, curl, libxml, mbstring, zip, pcntl, pdo, pdo_mysql, :php-psr\n extensions: dom, curl, libxml, mbstring, zip, pcntl, pdo, pdo_mysql, xml, xdebug, :php-psr", - "repo_full_name": "laravel/framework", - "discussion_comments": [ - { - "comment_id": "1856408715", - "repo_full_name": "laravel/framework", - "pr_number": 53648, - "pr_file": ".github/workflows/queues.yml", - "discussion_id": "1856408715", - "commented_code": "@@ -147,9 +147,9 @@ jobs:\n uses: shivammathur/setup-php@v2\n with:\n php-version: 8.2\n- extensions: dom, curl, libxml, mbstring, zip, pcntl, pdo, pdo_mysql, :php-psr\n+ extensions: dom, curl, libxml, mbstring, zip, pcntl, pdo, pdo_mysql, xml, xdebug, :php-psr", - "comment_created_at": "2024-11-25T11:05:44+00:00", - "comment_author": "crynobone", - "comment_body": "```suggestion\r\n extensions: dom, curl, libxml, mbstring, zip, pcntl, pdo, pdo_mysql, :php-psr\r\n```", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1856409284", - "pr_number": 53648, - "pr_file": ".github/workflows/queues.yml", - "created_at": "2024-11-25T11:06:11+00:00", - "commented_code": "uses: shivammathur/setup-php@v2\n with:\n php-version: 8.2\n extensions: dom, curl, libxml, mbstring, zip, pcntl, pdo, pdo_mysql, :php-psr\n extensions: dom, curl, libxml, mbstring, zip, pcntl, pdo, pdo_mysql, xml, xdebug, :php-psr", - "repo_full_name": "laravel/framework", - "discussion_comments": [ - { - "comment_id": "1856409284", - "repo_full_name": "laravel/framework", - "pr_number": 53648, - "pr_file": ".github/workflows/queues.yml", - "discussion_id": "1856409284", - "commented_code": "@@ -107,9 +107,9 @@ jobs:\n uses: shivammathur/setup-php@v2\n with:\n php-version: 8.2\n- extensions: dom, curl, libxml, mbstring, zip, pcntl, pdo, pdo_mysql, :php-psr\n+ extensions: dom, curl, libxml, mbstring, zip, pcntl, pdo, pdo_mysql, xml, xdebug, :php-psr", - "comment_created_at": "2024-11-25T11:06:11+00:00", - "comment_author": "crynobone", - "comment_body": "```suggestion\r\n extensions: dom, curl, libxml, mbstring, zip, pcntl, pdo, pdo_mysql, :php-psr\r\n```", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1856409723", - "pr_number": 53648, - "pr_file": ".github/workflows/queues.yml", - "created_at": "2024-11-25T11:06:28+00:00", - "commented_code": "uses: shivammathur/setup-php@v2\n with:\n php-version: 8.2\n extensions: dom, curl, libxml, mbstring, zip, pcntl, pdo, pdo_mysql, :php-psr\n extensions: dom, curl, libxml, mbstring, zip, pcntl, pdo, pdo_mysql, xml, xdebug, :php-psr", - "repo_full_name": "laravel/framework", - "discussion_comments": [ - { - "comment_id": "1856409723", - "repo_full_name": "laravel/framework", - "pr_number": 53648, - "pr_file": ".github/workflows/queues.yml", - "discussion_id": "1856409723", - "commented_code": "@@ -59,9 +59,9 @@ jobs:\n uses: shivammathur/setup-php@v2\n with:\n php-version: 8.2\n- extensions: dom, curl, libxml, mbstring, zip, pcntl, pdo, pdo_mysql, :php-psr\n+ extensions: dom, curl, libxml, mbstring, zip, pcntl, pdo, pdo_mysql, xml, xdebug, :php-psr", - "comment_created_at": "2024-11-25T11:06:28+00:00", - "comment_author": "crynobone", - "comment_body": "```suggestion\r\n extensions: dom, curl, libxml, mbstring, zip, pcntl, pdo, pdo_mysql, :php-psr\r\n```", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1856409908", - "pr_number": 53648, - "pr_file": ".github/workflows/queues.yml", - "created_at": "2024-11-25T11:06:37+00:00", - "commented_code": "uses: shivammathur/setup-php@v2\n with:\n php-version: 8.2\n extensions: dom, curl, libxml, mbstring, zip, pcntl, pdo, pdo_mysql, :php-psr\n extensions: dom, curl, libxml, mbstring, zip, pcntl, pdo, pdo_mysql, xml, xdebug, :php-psr\n tools: composer:v2\n coverage: none\n coverage: xdebug", - "repo_full_name": "laravel/framework", - "discussion_comments": [ - { - "comment_id": "1856409908", - "repo_full_name": "laravel/framework", - "pr_number": 53648, - "pr_file": ".github/workflows/queues.yml", - "discussion_id": "1856409908", - "commented_code": "@@ -24,9 +24,9 @@ jobs:\n uses: shivammathur/setup-php@v2\n with:\n php-version: 8.2\n- extensions: dom, curl, libxml, mbstring, zip, pcntl, pdo, pdo_mysql, :php-psr\n+ extensions: dom, curl, libxml, mbstring, zip, pcntl, pdo, pdo_mysql, xml, xdebug, :php-psr\n tools: composer:v2\n- coverage: none\n+ coverage: xdebug", - "comment_created_at": "2024-11-25T11:06:37+00:00", - "comment_author": "crynobone", - "comment_body": "```suggestion\r\n coverage: none\r\n```", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1856410088", - "pr_number": 53648, - "pr_file": ".github/workflows/queues.yml", - "created_at": "2024-11-25T11:06:45+00:00", - "commented_code": "uses: shivammathur/setup-php@v2\n with:\n php-version: 8.2\n extensions: dom, curl, libxml, mbstring, zip, pcntl, pdo, pdo_mysql, :php-psr\n extensions: dom, curl, libxml, mbstring, zip, pcntl, pdo, pdo_mysql, xml, xdebug, :php-psr", - "repo_full_name": "laravel/framework", - "discussion_comments": [ - { - "comment_id": "1856410088", - "repo_full_name": "laravel/framework", - "pr_number": 53648, - "pr_file": ".github/workflows/queues.yml", - "discussion_id": "1856410088", - "commented_code": "@@ -24,9 +24,9 @@ jobs:\n uses: shivammathur/setup-php@v2\n with:\n php-version: 8.2\n- extensions: dom, curl, libxml, mbstring, zip, pcntl, pdo, pdo_mysql, :php-psr\n+ extensions: dom, curl, libxml, mbstring, zip, pcntl, pdo, pdo_mysql, xml, xdebug, :php-psr", - "comment_created_at": "2024-11-25T11:06:45+00:00", - "comment_author": "crynobone", - "comment_body": "```suggestion\r\n extensions: dom, curl, libxml, mbstring, zip, pcntl, pdo, pdo_mysql, :php-psr\r\n```", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1856411136", - "pr_number": 53648, - "pr_file": ".github/workflows/databases.yml", - "created_at": "2024-11-25T11:07:18+00:00", - "commented_code": "uses: shivammathur/setup-php@v2\n with:\n php-version: 8.3\n extensions: dom, curl, libxml, mbstring, zip, pcntl, sqlsrv, pdo, pdo_sqlsrv, odbc, pdo_odbc, :php-psr\n extensions: dom, curl, libxml, mbstring, zip, pcntl, sqlsrv, pdo, pdo_sqlsrv, odbc, pdo_odbc, xml, xdebug, :php-psr\n tools: composer:v2\n coverage: none\n coverage: xdebug", - "repo_full_name": "laravel/framework", - "discussion_comments": [ - { - "comment_id": "1856411136", - "repo_full_name": "laravel/framework", - "pr_number": 53648, - "pr_file": ".github/workflows/databases.yml", - "discussion_id": "1856411136", - "commented_code": "@@ -319,9 +319,9 @@ jobs:\n uses: shivammathur/setup-php@v2\n with:\n php-version: 8.3\n- extensions: dom, curl, libxml, mbstring, zip, pcntl, sqlsrv, pdo, pdo_sqlsrv, odbc, pdo_odbc, :php-psr\n+ extensions: dom, curl, libxml, mbstring, zip, pcntl, sqlsrv, pdo, pdo_sqlsrv, odbc, pdo_odbc, xml, xdebug, :php-psr\n tools: composer:v2\n- coverage: none\n+ coverage: xdebug", - "comment_created_at": "2024-11-25T11:07:18+00:00", - "comment_author": "crynobone", - "comment_body": "```suggestion\r\n coverage: none\r\n```", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1856412237", - "pr_number": 53648, - "pr_file": ".github/workflows/databases.yml", - "created_at": "2024-11-25T11:08:08+00:00", - "commented_code": "uses: shivammathur/setup-php@v2\n with:\n php-version: 8.3\n extensions: dom, curl, libxml, mbstring, zip, pcntl, pdo, pdo_pgsql, :php-psr\n extensions: dom, curl, libxml, mbstring, zip, pcntl, pdo, pdo_pgsql, xml, xdebug, :php-psr", - "repo_full_name": "laravel/framework", - "discussion_comments": [ - { - "comment_id": "1856412237", - "repo_full_name": "laravel/framework", - "pr_number": 53648, - "pr_file": ".github/workflows/databases.yml", - "discussion_id": "1856412237", - "commented_code": "@@ -224,9 +224,9 @@ jobs:\n uses: shivammathur/setup-php@v2\n with:\n php-version: 8.3\n- extensions: dom, curl, libxml, mbstring, zip, pcntl, pdo, pdo_pgsql, :php-psr\n+ extensions: dom, curl, libxml, mbstring, zip, pcntl, pdo, pdo_pgsql, xml, xdebug, :php-psr", - "comment_created_at": "2024-11-25T11:08:08+00:00", - "comment_author": "crynobone", - "comment_body": "```suggestion\r\n extensions: dom, curl, libxml, mbstring, zip, pcntl, pdo, pdo_pgsql, :php-psr\r\n```", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1856412446", - "pr_number": 53648, - "pr_file": ".github/workflows/databases.yml", - "created_at": "2024-11-25T11:08:17+00:00", - "commented_code": "uses: shivammathur/setup-php@v2\n with:\n php-version: 8.3\n extensions: dom, curl, libxml, mbstring, zip, pcntl, pdo, pdo_pgsql, :php-psr\n extensions: dom, curl, libxml, mbstring, zip, pcntl, pdo, pdo_pgsql, xml, xdebug, :php-psr\n tools: composer:v2\n coverage: none\n coverage: xdebug", - "repo_full_name": "laravel/framework", - "discussion_comments": [ - { - "comment_id": "1856412446", - "repo_full_name": "laravel/framework", - "pr_number": 53648, - "pr_file": ".github/workflows/databases.yml", - "discussion_id": "1856412446", - "commented_code": "@@ -175,9 +175,9 @@ jobs:\n uses: shivammathur/setup-php@v2\n with:\n php-version: 8.3\n- extensions: dom, curl, libxml, mbstring, zip, pcntl, pdo, pdo_pgsql, :php-psr\n+ extensions: dom, curl, libxml, mbstring, zip, pcntl, pdo, pdo_pgsql, xml, xdebug, :php-psr\n tools: composer:v2\n- coverage: none\n+ coverage: xdebug", - "comment_created_at": "2024-11-25T11:08:17+00:00", - "comment_author": "crynobone", - "comment_body": "```suggestion\r\n coverage: none\r\n```", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1856413251", - "pr_number": 53648, - "pr_file": ".github/workflows/databases.yml", - "created_at": "2024-11-25T11:08:51+00:00", - "commented_code": "uses: shivammathur/setup-php@v2\n with:\n php-version: 8.3\n extensions: dom, curl, libxml, mbstring, zip, pcntl, pdo, pdo_mysql, :php-psr\n extensions: dom, curl, libxml, mbstring, zip, pcntl, pdo, pdo_mysql, xml, xdebug, :php-psr\n tools: composer:v2\n coverage: none\n coverage: xdebug", - "repo_full_name": "laravel/framework", - "discussion_comments": [ - { - "comment_id": "1856413251", - "repo_full_name": "laravel/framework", - "pr_number": 53648, - "pr_file": ".github/workflows/databases.yml", - "discussion_id": "1856413251", - "commented_code": "@@ -128,9 +128,9 @@ jobs:\n uses: shivammathur/setup-php@v2\n with:\n php-version: 8.3\n- extensions: dom, curl, libxml, mbstring, zip, pcntl, pdo, pdo_mysql, :php-psr\n+ extensions: dom, curl, libxml, mbstring, zip, pcntl, pdo, pdo_mysql, xml, xdebug, :php-psr\n tools: composer:v2\n- coverage: none\n+ coverage: xdebug", - "comment_created_at": "2024-11-25T11:08:51+00:00", - "comment_author": "crynobone", - "comment_body": "```suggestion\r\n coverage: none\r\n```", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/framework-manage-dependencies-wisely.json b/_reviewers/framework-manage-dependencies-wisely.json new file mode 100644 index 0000000..2e11187 --- /dev/null +++ b/_reviewers/framework-manage-dependencies-wisely.json @@ -0,0 +1,196 @@ +[ + { + "discussion_id": "2019945476", + "pr_number": 55203, + "pr_file": "src/Illuminate/Concurrency/composer.json", + "created_at": "2025-03-29T19:04:13+00:00", + "commented_code": "],\n \"require\": {\n \"php\": \"^8.2\",\n \"illuminate/console\": \"^12.0\",\n \"illuminate/contracts\": \"^12.0\",\n \"illuminate/process\": \"^12.0\",", + "repo_full_name": "laravel/framework", + "discussion_comments": [ + { + "comment_id": "2019945476", + "repo_full_name": "laravel/framework", + "pr_number": 55203, + "pr_file": "src/Illuminate/Concurrency/composer.json", + "discussion_id": "2019945476", + "commented_code": "@@ -15,9 +15,7 @@\n ],\n \"require\": {\n \"php\": \"^8.2\",\n- \"illuminate/console\": \"^12.0\",\n \"illuminate/contracts\": \"^12.0\",\n- \"illuminate/process\": \"^12.0\",", + "comment_created_at": "2025-03-29T19:04:13+00:00", + "comment_author": "AndrewMast", + "comment_body": "Why are `illuminate/console` and `illuminate/process` removed? I see that `console` is added to the `suggest` field, but what about `process`? Does the rest of the subpackage not need these? If not, it might be best to address the changes in a separate PR.\r\n\r\nI do like the idea of adding `spatie/fork` and the other driver options to `suggest`, though. ", + "pr_file_module": null + }, + { + "comment_id": "2019948808", + "repo_full_name": "laravel/framework", + "pr_number": 55203, + "pr_file": "src/Illuminate/Concurrency/composer.json", + "discussion_id": "2019945476", + "commented_code": "@@ -15,9 +15,7 @@\n ],\n \"require\": {\n \"php\": \"^8.2\",\n- \"illuminate/console\": \"^12.0\",\n \"illuminate/contracts\": \"^12.0\",\n- \"illuminate/process\": \"^12.0\",", + "comment_created_at": "2025-03-29T19:09:46+00:00", + "comment_author": "Amirhf1", + "comment_body": "You are absolutely right. It was my mistake to remove the dependencies for `illuminate/console` and `illuminate/process`. I’ve corrected that and added both dependencies back to the composer.json file. I’ve still kept the suggestions for illuminate/redis and spatie/fork, as they are needed for optional drivers.\r\nThank you for your thorough review.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1934190230", + "pr_number": 54375, + "pr_file": "composer.json", + "created_at": "2025-01-29T16:18:29+00:00", + "commented_code": "\"symfony/uid\": \"^7.0.3\",\n \"symfony/var-dumper\": \"^7.0.3\",\n \"tijsverkoyen/css-to-inline-styles\": \"^2.2.5\",\n \"visus/cuid2\": \"^5.0.0\",", + "repo_full_name": "laravel/framework", + "discussion_comments": [ + { + "comment_id": "1934190230", + "repo_full_name": "laravel/framework", + "pr_number": 54375, + "pr_file": "composer.json", + "discussion_id": "1934190230", + "commented_code": "@@ -57,6 +58,7 @@\n \"symfony/uid\": \"^7.0.3\",\n \"symfony/var-dumper\": \"^7.0.3\",\n \"tijsverkoyen/css-to-inline-styles\": \"^2.2.5\",\n+ \"visus/cuid2\": \"^5.0.0\",", + "comment_created_at": "2025-01-29T16:18:29+00:00", + "comment_author": "stevebauman", + "comment_body": "This maintainer isn't very active. I'd be very hesitant to base the framework on this package's support.", + "pr_file_module": null + }, + { + "comment_id": "1976438706", + "repo_full_name": "laravel/framework", + "pr_number": 54375, + "pr_file": "composer.json", + "discussion_id": "1934190230", + "commented_code": "@@ -57,6 +58,7 @@\n \"symfony/uid\": \"^7.0.3\",\n \"symfony/var-dumper\": \"^7.0.3\",\n \"tijsverkoyen/css-to-inline-styles\": \"^2.2.5\",\n+ \"visus/cuid2\": \"^5.0.0\",", + "comment_created_at": "2025-03-01T15:34:45+00:00", + "comment_author": "xaoseric", + "comment_body": "Something like cuid2 ideally won't need a lot of updates, but if that is a blocker then I can bring over the code for it as an illuminate/cuid2 package so we can update it without relying on that package.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1667427139", + "pr_number": 52039, + "pr_file": "composer.json", + "created_at": "2024-07-06T19:56:55+00:00", + "commented_code": "\"ext-openssl\": \"*\",\n \"ext-session\": \"*\",\n \"ext-tokenizer\": \"*\",\n \"ext-zlib\": \"*\",", + "repo_full_name": "laravel/framework", + "discussion_comments": [ + { + "comment_id": "1667427139", + "repo_full_name": "laravel/framework", + "pr_number": 52039, + "pr_file": "composer.json", + "discussion_id": "1667427139", + "commented_code": "@@ -23,6 +23,7 @@\n \"ext-openssl\": \"*\",\n \"ext-session\": \"*\",\n \"ext-tokenizer\": \"*\",\n+ \"ext-zlib\": \"*\",", + "comment_created_at": "2024-07-06T19:56:55+00:00", + "comment_author": "rojtjo", + "comment_body": "Adding an additional extension in a patch release is a breaking change. This should either only be in the `suggest` section (with an `extension_loaded` check in the code) or be targeted at 12.x instead.", + "pr_file_module": null + }, + { + "comment_id": "1668174517", + "repo_full_name": "laravel/framework", + "pr_number": 52039, + "pr_file": "composer.json", + "discussion_id": "1667427139", + "commented_code": "@@ -23,6 +23,7 @@\n \"ext-openssl\": \"*\",\n \"ext-session\": \"*\",\n \"ext-tokenizer\": \"*\",\n+ \"ext-zlib\": \"*\",", + "comment_created_at": "2024-07-08T08:07:27+00:00", + "comment_author": "driesvints", + "comment_body": "Just for reference, this isn't a breaking change. Composer will warn you about the missing extension. Adding new dependencies on patch releases is allowed.", + "pr_file_module": null + }, + { + "comment_id": "1668209544", + "repo_full_name": "laravel/framework", + "pr_number": 52039, + "pr_file": "composer.json", + "discussion_id": "1667427139", + "commented_code": "@@ -23,6 +23,7 @@\n \"ext-openssl\": \"*\",\n \"ext-session\": \"*\",\n \"ext-tokenizer\": \"*\",\n+ \"ext-zlib\": \"*\",", + "comment_created_at": "2024-07-08T08:29:38+00:00", + "comment_author": "rodrigopedra", + "comment_body": "@driesvints thanks for clarifying that. I understand the argument for composer dependencies, but does it also applies for extensions?\r\n\r\nThat Composer warns about a missing extension, it does, indeed, all good about that.\r\n\r\nWhat I mean is: if someone is running a Laravel 11 app on a server which they are not allowed to freely install extensions, such as shared hosting, then they wouldn't be able to update their app due to the inability to install a newly required extension.\r\n\r\nDoesn't it count as a breaking change? As a requirement change?\r\n\r\nI believe a `suggests` entry would be a better fit in that case.\r\n\r\nIf that case is still allowed, as a requirement, I believe docs should also be updated to reflect the newly required extension on the \"Server Requirements\" section:\r\n\r\nhttps://laravel.com/docs/11.x/deployment#server-requirements", + "pr_file_module": null + }, + { + "comment_id": "1668251380", + "repo_full_name": "laravel/framework", + "pr_number": 52039, + "pr_file": "composer.json", + "discussion_id": "1667427139", + "commented_code": "@@ -23,6 +23,7 @@\n \"ext-openssl\": \"*\",\n \"ext-session\": \"*\",\n \"ext-tokenizer\": \"*\",\n+ \"ext-zlib\": \"*\",", + "comment_created_at": "2024-07-08T08:52:47+00:00", + "comment_author": "driesvints", + "comment_body": "If the extension isn't needed for a core essential of Laravel then it can indeed be added as a suggest. I believe this can be done here since the feature is indeed isolated.\r\n\r\nSemVer doesn't look at things like shared hosting and the ability to not install extensions. It's solely focused on what is a breaking change and adding a dependency on a patch release isn't one. A good dependency manager, like Composer, will warn you about the new requirement and prevent you from moving forward in a deploy/install process until you've fulfilled the requirement. And thus nothing will break.", + "pr_file_module": null + }, + { + "comment_id": "1668706256", + "repo_full_name": "laravel/framework", + "pr_number": 52039, + "pr_file": "composer.json", + "discussion_id": "1667427139", + "commented_code": "@@ -23,6 +23,7 @@\n \"ext-openssl\": \"*\",\n \"ext-session\": \"*\",\n \"ext-tokenizer\": \"*\",\n+ \"ext-zlib\": \"*\",", + "comment_created_at": "2024-07-08T14:06:01+00:00", + "comment_author": "driesvints", + "comment_body": "@rmunate move this to suggest please. Also for the `composer.json` of the support package.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1847590909", + "pr_number": 53552, + "pr_file": "composer.json", + "created_at": "2024-11-19T03:33:16+00:00", + "commented_code": "\"guzzlehttp/guzzle\": \"^7.8\",\n \"guzzlehttp/uri-template\": \"^1.0\",\n \"laravel/prompts\": \"^0.1.18|^0.2.0|^0.3.0\",\n \"laravel/serializable-closure\": \"^1.3\",\n \"laravel/serializable-closure\": \"^2.0@dev\",", + "repo_full_name": "laravel/framework", + "discussion_comments": [ + { + "comment_id": "1847590909", + "repo_full_name": "laravel/framework", + "pr_number": 53552, + "pr_file": "composer.json", + "discussion_id": "1847590909", + "commented_code": "@@ -32,7 +32,7 @@\n \"guzzlehttp/guzzle\": \"^7.8\",\n \"guzzlehttp/uri-template\": \"^1.0\",\n \"laravel/prompts\": \"^0.1.18|^0.2.0|^0.3.0\",\n- \"laravel/serializable-closure\": \"^1.3\",\n+ \"laravel/serializable-closure\": \"^2.0@dev\",", + "comment_created_at": "2024-11-19T03:33:16+00:00", + "comment_author": "crynobone", + "comment_body": "```suggestion\r\n \"laravel/serializable-closure\": \"^1.3|^2.0\",\r\n```", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1661941378", + "pr_number": 51983, + "pr_file": "src/Illuminate/Cache/composer.json", + "created_at": "2024-07-02T07:00:36+00:00", + "commented_code": "\"illuminate/collections\": \"^11.0\",\n \"illuminate/contracts\": \"^11.0\",\n \"illuminate/macroable\": \"^11.0\",\n \"illuminate/support\": \"^11.0\"\n \"illuminate/support\": \"^11.0\",\n \"psr/simple-cache\": \"^1.0|^2.0|^3.0\"", + "repo_full_name": "laravel/framework", + "discussion_comments": [ + { + "comment_id": "1661941378", + "repo_full_name": "laravel/framework", + "pr_number": 51983, + "pr_file": "src/Illuminate/Cache/composer.json", + "discussion_id": "1661941378", + "commented_code": "@@ -18,7 +18,8 @@\n \"illuminate/collections\": \"^11.0\",\n \"illuminate/contracts\": \"^11.0\",\n \"illuminate/macroable\": \"^11.0\",\n- \"illuminate/support\": \"^11.0\"\n+ \"illuminate/support\": \"^11.0\",\n+ \"psr/simple-cache\": \"^1.0|^2.0|^3.0\"", + "comment_created_at": "2024-07-02T07:00:36+00:00", + "comment_author": "driesvints", + "comment_body": "This isn't correct. This package doesn't use this dependency directly but through a Symfony component. Please revert this.", + "pr_file_module": null + }, + { + "comment_id": "1661979504", + "repo_full_name": "laravel/framework", + "pr_number": 51983, + "pr_file": "src/Illuminate/Cache/composer.json", + "discussion_id": "1661941378", + "commented_code": "@@ -18,7 +18,8 @@\n \"illuminate/collections\": \"^11.0\",\n \"illuminate/contracts\": \"^11.0\",\n \"illuminate/macroable\": \"^11.0\",\n- \"illuminate/support\": \"^11.0\"\n+ \"illuminate/support\": \"^11.0\",\n+ \"psr/simple-cache\": \"^1.0|^2.0|^3.0\"", + "comment_created_at": "2024-07-02T07:27:40+00:00", + "comment_author": "seriquynh", + "comment_body": "Oh, my bad. `illuminate\\cache` doesn't use directly `psr/simple-cache`, it uses through `illuminate/contracts`. I restored this change and updated PR's description. Thank you!\r\nhttps://github.com/laravel/framework/blob/11.x/src/Illuminate/Contracts/Cache/Repository.php\r\n\r\n```php\r\nalgorithm(), [\n 'memory_cost' => $this->memory($options),\n 'time_cost' => $this->time($options),\n 'threads' => $this->threads($options),\n ]);\n\n if (! is_string($hash)) {\n try {\n $hash = password_hash($value, $this->algorithm(), [\n 'memory_cost' => $this->memory($options),\n 'time_cost' => $this->time($options),\n 'threads' => $this->threads($options),\n ]);\n } catch (Error) {", + "repo_full_name": "laravel/framework", + "discussion_comments": [ + { + "comment_id": "1878116494", + "repo_full_name": "laravel/framework", + "pr_number": 53821, + "pr_file": "src/Illuminate/Hashing/ArgonHasher.php", + "discussion_id": "1878116494", + "commented_code": "@@ -60,13 +61,13 @@ public function __construct(array $options = [])\n */\n public function make(#[\\SensitiveParameter] $value, array $options = [])\n {\n- $hash = @password_hash($value, $this->algorithm(), [\n- 'memory_cost' => $this->memory($options),\n- 'time_cost' => $this->time($options),\n- 'threads' => $this->threads($options),\n- ]);\n-\n- if (! is_string($hash)) {\n+ try {\n+ $hash = password_hash($value, $this->algorithm(), [\n+ 'memory_cost' => $this->memory($options),\n+ 'time_cost' => $this->time($options),\n+ 'threads' => $this->threads($options),\n+ ]);\n+ } catch (Error) {", + "comment_created_at": "2024-12-10T13:40:39+00:00", + "comment_author": "cosmastech", + "comment_body": "Might it be helpful to add this caught Error as the `previous` parameter of the RuntimeException?", + "pr_file_module": null + }, + { + "comment_id": "1878332094", + "repo_full_name": "laravel/framework", + "pr_number": 53821, + "pr_file": "src/Illuminate/Hashing/ArgonHasher.php", + "discussion_id": "1878116494", + "commented_code": "@@ -60,13 +61,13 @@ public function __construct(array $options = [])\n */\n public function make(#[\\SensitiveParameter] $value, array $options = [])\n {\n- $hash = @password_hash($value, $this->algorithm(), [\n- 'memory_cost' => $this->memory($options),\n- 'time_cost' => $this->time($options),\n- 'threads' => $this->threads($options),\n- ]);\n-\n- if (! is_string($hash)) {\n+ try {\n+ $hash = password_hash($value, $this->algorithm(), [\n+ 'memory_cost' => $this->memory($options),\n+ 'time_cost' => $this->time($options),\n+ 'threads' => $this->threads($options),\n+ ]);\n+ } catch (Error) {", + "comment_created_at": "2024-12-10T15:38:01+00:00", + "comment_author": "browner12", + "comment_body": "yah, I was considering that. is there any security implication though, because it could expose info like which algo is being used?", + "pr_file_module": null + }, + { + "comment_id": "1878386477", + "repo_full_name": "laravel/framework", + "pr_number": 53821, + "pr_file": "src/Illuminate/Hashing/ArgonHasher.php", + "discussion_id": "1878116494", + "commented_code": "@@ -60,13 +61,13 @@ public function __construct(array $options = [])\n */\n public function make(#[\\SensitiveParameter] $value, array $options = [])\n {\n- $hash = @password_hash($value, $this->algorithm(), [\n- 'memory_cost' => $this->memory($options),\n- 'time_cost' => $this->time($options),\n- 'threads' => $this->threads($options),\n- ]);\n-\n- if (! is_string($hash)) {\n+ try {\n+ $hash = password_hash($value, $this->algorithm(), [\n+ 'memory_cost' => $this->memory($options),\n+ 'time_cost' => $this->time($options),\n+ 'threads' => $this->threads($options),\n+ ]);\n+ } catch (Error) {", + "comment_created_at": "2024-12-10T16:09:01+00:00", + "comment_author": "cosmastech", + "comment_body": "Isn't it already showing the hashing algo in the exception message?\n\nIn theory, these exceptions should not be passed along to the end-user when the app is set to production mode.\n\n(I don't have a strong opinion on it either way, just a thought as I have fallen into the practice of forwarding exceptions when throwing a new one)", + "pr_file_module": null + }, + { + "comment_id": "1878387944", + "repo_full_name": "laravel/framework", + "pr_number": 53821, + "pr_file": "src/Illuminate/Hashing/ArgonHasher.php", + "discussion_id": "1878116494", + "commented_code": "@@ -60,13 +61,13 @@ public function __construct(array $options = [])\n */\n public function make(#[\\SensitiveParameter] $value, array $options = [])\n {\n- $hash = @password_hash($value, $this->algorithm(), [\n- 'memory_cost' => $this->memory($options),\n- 'time_cost' => $this->time($options),\n- 'threads' => $this->threads($options),\n- ]);\n-\n- if (! is_string($hash)) {\n+ try {\n+ $hash = password_hash($value, $this->algorithm(), [\n+ 'memory_cost' => $this->memory($options),\n+ 'time_cost' => $this->time($options),\n+ 'threads' => $this->threads($options),\n+ ]);\n+ } catch (Error) {", + "comment_created_at": "2024-12-10T16:09:58+00:00", + "comment_author": "cosmastech", + "comment_body": "It also catches a generic Error object, which could be caused by any number of things. At least this way you can see what specifically happened.", + "pr_file_module": null + }, + { + "comment_id": "1878417875", + "repo_full_name": "laravel/framework", + "pr_number": 53821, + "pr_file": "src/Illuminate/Hashing/ArgonHasher.php", + "discussion_id": "1878116494", + "commented_code": "@@ -60,13 +61,13 @@ public function __construct(array $options = [])\n */\n public function make(#[\\SensitiveParameter] $value, array $options = [])\n {\n- $hash = @password_hash($value, $this->algorithm(), [\n- 'memory_cost' => $this->memory($options),\n- 'time_cost' => $this->time($options),\n- 'threads' => $this->threads($options),\n- ]);\n-\n- if (! is_string($hash)) {\n+ try {\n+ $hash = password_hash($value, $this->algorithm(), [\n+ 'memory_cost' => $this->memory($options),\n+ 'time_cost' => $this->time($options),\n+ 'threads' => $this->threads($options),\n+ ]);\n+ } catch (Error) {", + "comment_created_at": "2024-12-10T16:24:50+00:00", + "comment_author": "browner12", + "comment_body": "it could also expose other info, like cost factor, etc. \r\n\r\nyou're right, it should never show to users in production.\r\n\r\nI'd say give a PR a shot, and see what other people think.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1837330959", + "pr_number": 53370, + "pr_file": "src/Illuminate/Support/Url.php", + "created_at": "2024-11-12T00:21:55+00:00", + "commented_code": "algorithm(), [\n 'memory_cost' => $this->memory($options),\n 'time_cost' => $this->time($options),\n 'threads' => $this->threads($options),\n ]);\n\n if (! is_string($hash)) {\n try {\n $hash = password_hash($value, $this->algorithm(), [\n 'memory_cost' => $this->memory($options),\n 'time_cost' => $this->time($options),\n 'threads' => $this->threads($options),\n ]);\n } catch (Error) {", - "repo_full_name": "laravel/framework", - "discussion_comments": [ - { - "comment_id": "1878116494", - "repo_full_name": "laravel/framework", - "pr_number": 53821, - "pr_file": "src/Illuminate/Hashing/ArgonHasher.php", - "discussion_id": "1878116494", - "commented_code": "@@ -60,13 +61,13 @@ public function __construct(array $options = [])\n */\n public function make(#[\\SensitiveParameter] $value, array $options = [])\n {\n- $hash = @password_hash($value, $this->algorithm(), [\n- 'memory_cost' => $this->memory($options),\n- 'time_cost' => $this->time($options),\n- 'threads' => $this->threads($options),\n- ]);\n-\n- if (! is_string($hash)) {\n+ try {\n+ $hash = password_hash($value, $this->algorithm(), [\n+ 'memory_cost' => $this->memory($options),\n+ 'time_cost' => $this->time($options),\n+ 'threads' => $this->threads($options),\n+ ]);\n+ } catch (Error) {", - "comment_created_at": "2024-12-10T13:40:39+00:00", - "comment_author": "cosmastech", - "comment_body": "Might it be helpful to add this caught Error as the `previous` parameter of the RuntimeException?", - "pr_file_module": null - }, - { - "comment_id": "1878332094", - "repo_full_name": "laravel/framework", - "pr_number": 53821, - "pr_file": "src/Illuminate/Hashing/ArgonHasher.php", - "discussion_id": "1878116494", - "commented_code": "@@ -60,13 +61,13 @@ public function __construct(array $options = [])\n */\n public function make(#[\\SensitiveParameter] $value, array $options = [])\n {\n- $hash = @password_hash($value, $this->algorithm(), [\n- 'memory_cost' => $this->memory($options),\n- 'time_cost' => $this->time($options),\n- 'threads' => $this->threads($options),\n- ]);\n-\n- if (! is_string($hash)) {\n+ try {\n+ $hash = password_hash($value, $this->algorithm(), [\n+ 'memory_cost' => $this->memory($options),\n+ 'time_cost' => $this->time($options),\n+ 'threads' => $this->threads($options),\n+ ]);\n+ } catch (Error) {", - "comment_created_at": "2024-12-10T15:38:01+00:00", - "comment_author": "browner12", - "comment_body": "yah, I was considering that. is there any security implication though, because it could expose info like which algo is being used?", - "pr_file_module": null - }, - { - "comment_id": "1878386477", - "repo_full_name": "laravel/framework", - "pr_number": 53821, - "pr_file": "src/Illuminate/Hashing/ArgonHasher.php", - "discussion_id": "1878116494", - "commented_code": "@@ -60,13 +61,13 @@ public function __construct(array $options = [])\n */\n public function make(#[\\SensitiveParameter] $value, array $options = [])\n {\n- $hash = @password_hash($value, $this->algorithm(), [\n- 'memory_cost' => $this->memory($options),\n- 'time_cost' => $this->time($options),\n- 'threads' => $this->threads($options),\n- ]);\n-\n- if (! is_string($hash)) {\n+ try {\n+ $hash = password_hash($value, $this->algorithm(), [\n+ 'memory_cost' => $this->memory($options),\n+ 'time_cost' => $this->time($options),\n+ 'threads' => $this->threads($options),\n+ ]);\n+ } catch (Error) {", - "comment_created_at": "2024-12-10T16:09:01+00:00", - "comment_author": "cosmastech", - "comment_body": "Isn't it already showing the hashing algo in the exception message?\n\nIn theory, these exceptions should not be passed along to the end-user when the app is set to production mode.\n\n(I don't have a strong opinion on it either way, just a thought as I have fallen into the practice of forwarding exceptions when throwing a new one)", - "pr_file_module": null - }, - { - "comment_id": "1878387944", - "repo_full_name": "laravel/framework", - "pr_number": 53821, - "pr_file": "src/Illuminate/Hashing/ArgonHasher.php", - "discussion_id": "1878116494", - "commented_code": "@@ -60,13 +61,13 @@ public function __construct(array $options = [])\n */\n public function make(#[\\SensitiveParameter] $value, array $options = [])\n {\n- $hash = @password_hash($value, $this->algorithm(), [\n- 'memory_cost' => $this->memory($options),\n- 'time_cost' => $this->time($options),\n- 'threads' => $this->threads($options),\n- ]);\n-\n- if (! is_string($hash)) {\n+ try {\n+ $hash = password_hash($value, $this->algorithm(), [\n+ 'memory_cost' => $this->memory($options),\n+ 'time_cost' => $this->time($options),\n+ 'threads' => $this->threads($options),\n+ ]);\n+ } catch (Error) {", - "comment_created_at": "2024-12-10T16:09:58+00:00", - "comment_author": "cosmastech", - "comment_body": "It also catches a generic Error object, which could be caused by any number of things. At least this way you can see what specifically happened.", - "pr_file_module": null - }, - { - "comment_id": "1878417875", - "repo_full_name": "laravel/framework", - "pr_number": 53821, - "pr_file": "src/Illuminate/Hashing/ArgonHasher.php", - "discussion_id": "1878116494", - "commented_code": "@@ -60,13 +61,13 @@ public function __construct(array $options = [])\n */\n public function make(#[\\SensitiveParameter] $value, array $options = [])\n {\n- $hash = @password_hash($value, $this->algorithm(), [\n- 'memory_cost' => $this->memory($options),\n- 'time_cost' => $this->time($options),\n- 'threads' => $this->threads($options),\n- ]);\n-\n- if (! is_string($hash)) {\n+ try {\n+ $hash = password_hash($value, $this->algorithm(), [\n+ 'memory_cost' => $this->memory($options),\n+ 'time_cost' => $this->time($options),\n+ 'threads' => $this->threads($options),\n+ ]);\n+ } catch (Error) {", - "comment_created_at": "2024-12-10T16:24:50+00:00", - "comment_author": "browner12", - "comment_body": "it could also expose other info, like cost factor, etc. \r\n\r\nyou're right, it should never show to users in production.\r\n\r\nI'd say give a PR a shot, and see what other people think.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1837330959", - "pr_number": 53370, - "pr_file": "src/Illuminate/Support/Url.php", - "created_at": "2024-11-12T00:21:55+00:00", - "commented_code": "createSchema();\n }\n\n protected function tearDown(): void\n {\n Schema::drop('test_models');\n\n parent::tearDown();\n }\n\n protected function getEnvironmentSetUp($app)\n {\n $app['config']->set('database.default', 'testing');\n }\n\n #[Test]\n public function it_finds_existing_model_when_using_enum_id()\n {\n EloquentBuilderFindOrFailWithEnumTestModel::create(['id' => 1, 'name' => 'one']);\n\n $model = EloquentBuilderFindOrFailWithEnumTestModel::findOrFail(EloquentBuilderFindOrFailWithEnumTestBackedEnum::One);\n\n $this->assertInstanceOf(EloquentBuilderFindOrFailWithEnumTestModel::class, $model);\n $this->assertTrue($model->exists);\n $this->assertEquals(1, $model->id);\n }\n\n #[Test]\n public function it_throws_exception_when_enum_id_does_not_exist()", + "repo_full_name": "laravel/framework", + "discussion_comments": [ + { + "comment_id": "2174034334", + "repo_full_name": "laravel/framework", + "pr_number": 56169, + "pr_file": "tests/Database/DatabaseEloquentBuilderFindOrFailWithEnumTest.php", + "discussion_id": "2174034334", + "commented_code": "@@ -0,0 +1,74 @@\n+createSchema();\n+ }\n+\n+ protected function tearDown(): void\n+ {\n+ Schema::drop('test_models');\n+\n+ parent::tearDown();\n+ }\n+\n+ protected function getEnvironmentSetUp($app)\n+ {\n+ $app['config']->set('database.default', 'testing');\n+ }\n+\n+ #[Test]\n+ public function it_finds_existing_model_when_using_enum_id()\n+ {\n+ EloquentBuilderFindOrFailWithEnumTestModel::create(['id' => 1, 'name' => 'one']);\n+\n+ $model = EloquentBuilderFindOrFailWithEnumTestModel::findOrFail(EloquentBuilderFindOrFailWithEnumTestBackedEnum::One);\n+\n+ $this->assertInstanceOf(EloquentBuilderFindOrFailWithEnumTestModel::class, $model);\n+ $this->assertTrue($model->exists);\n+ $this->assertEquals(1, $model->id);\n+ }\n+\n+ #[Test]\n+ public function it_throws_exception_when_enum_id_does_not_exist()", + "comment_created_at": "2025-06-30T00:51:53+00:00", + "comment_author": "crynobone", + "comment_body": "We don't use `Test` attribute in the framework, prefix the method name with `test` and use studly case.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2084631773", + "pr_number": 55715, + "pr_file": "src/Illuminate/Collections/Arr.php", + "created_at": "2025-05-12T13:05:05+00:00", + "commented_code": "return is_array($value) || $value instanceof ArrayAccess;\n }\n\n /**\n * Determine whether the given value is arrayable.\n *\n * @param mixed $value\n * @return bool\n */\n public static function arrayable($value)", + "repo_full_name": "laravel/framework", + "discussion_comments": [ + { + "comment_id": "2084631773", + "repo_full_name": "laravel/framework", + "pr_number": 55715, + "pr_file": "src/Illuminate/Collections/Arr.php", + "discussion_id": "2084631773", + "commented_code": "@@ -23,6 +28,21 @@ public static function accessible($value)\n return is_array($value) || $value instanceof ArrayAccess;\n }\n \n+ /**\n+ * Determine whether the given value is arrayable.\n+ *\n+ * @param mixed $value\n+ * @return bool\n+ */\n+ public static function arrayable($value)", + "comment_created_at": "2025-05-12T13:05:05+00:00", + "comment_author": "shaedrich", + "comment_body": "Wouldn't it make sense to use a prefix here to indicate the return value? Because, I would expect the method to convert it from array-like values to `Arrayable` instances\r\n```suggestion\r\n public static function isArrayable($value)\r\n```", + "pr_file_module": null + }, + { + "comment_id": "2084658378", + "repo_full_name": "laravel/framework", + "pr_number": 55715, + "pr_file": "src/Illuminate/Collections/Arr.php", + "discussion_id": "2084631773", + "commented_code": "@@ -23,6 +28,21 @@ public static function accessible($value)\n return is_array($value) || $value instanceof ArrayAccess;\n }\n \n+ /**\n+ * Determine whether the given value is arrayable.\n+ *\n+ * @param mixed $value\n+ * @return bool\n+ */\n+ public static function arrayable($value)", + "comment_created_at": "2025-05-12T13:18:12+00:00", + "comment_author": "daniser", + "comment_body": "I followed naming convention used in `Arr::accessible`.", + "pr_file_module": null + }, + { + "comment_id": "2084679790", + "repo_full_name": "laravel/framework", + "pr_number": 55715, + "pr_file": "src/Illuminate/Collections/Arr.php", + "discussion_id": "2084631773", + "commented_code": "@@ -23,6 +28,21 @@ public static function accessible($value)\n return is_array($value) || $value instanceof ArrayAccess;\n }\n \n+ /**\n+ * Determine whether the given value is arrayable.\n+ *\n+ * @param mixed $value\n+ * @return bool\n+ */\n+ public static function arrayable($value)", + "comment_created_at": "2025-05-12T13:28:41+00:00", + "comment_author": "shaedrich", + "comment_body": "Ah, thanks for the explanation 👍🏻 ", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1898695749", + "pr_number": 54027, + "pr_file": "src/Illuminate/Support/Number.php", + "created_at": "2024-12-27T20:06:02+00:00", + "commented_code": "return static::$currency;\n }\n\n /**\n * Generate a random number of the given length.", + "repo_full_name": "laravel/framework", + "discussion_comments": [ + { + "comment_id": "1898695749", + "repo_full_name": "laravel/framework", + "pr_number": 54027, + "pr_file": "src/Illuminate/Support/Number.php", + "discussion_id": "1898695749", + "commented_code": "@@ -367,6 +367,30 @@ public static function defaultCurrency()\n return static::$currency;\n }\n \n+ /**\n+ * Generate a random number of the given length.", + "comment_created_at": "2024-12-27T20:06:02+00:00", + "comment_author": "shaedrich", + "comment_body": "\"number\" might be misleading here:\r\n```suggestion\r\n * Generate a random integer of the given length.\r\n```", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1898696764", + "pr_number": 54027, + "pr_file": "src/Illuminate/Support/Number.php", + "created_at": "2024-12-27T20:08:52+00:00", + "commented_code": "return static::$currency;\n }\n\n /**\n * Generate a random number of the given length.\n *\n * @param int $length\n * @return int\n */\n public static function random(int $length = 6): int\n {\n $maxLength = strlen((string) PHP_INT_MAX);\n\n if ($length < 1 || $length > $maxLength) {\n return 0;\n }\n\n $min = 10 ** ($length - 1);\n $max = (10 ** $length) - 1;\n\n if ($length == $maxLength) {", + "repo_full_name": "laravel/framework", + "discussion_comments": [ + { + "comment_id": "1898696764", + "repo_full_name": "laravel/framework", + "pr_number": 54027, + "pr_file": "src/Illuminate/Support/Number.php", + "discussion_id": "1898696764", + "commented_code": "@@ -367,6 +367,30 @@ public static function defaultCurrency()\n return static::$currency;\n }\n \n+ /**\n+ * Generate a random number of the given length.\n+ *\n+ * @param int $length\n+ * @return int\n+ */\n+ public static function random(int $length = 6): int\n+ {\n+ $maxLength = strlen((string) PHP_INT_MAX);\n+\n+ if ($length < 1 || $length > $maxLength) {\n+ return 0;\n+ }\n+\n+ $min = 10 ** ($length - 1);\n+ $max = (10 ** $length) - 1;\n+\n+ if ($length == $maxLength) {", + "comment_created_at": "2024-12-27T20:08:52+00:00", + "comment_author": "shaedrich", + "comment_body": "Even though, you are using `strlen()` here, I'd say, \"digits\" is the more correct term when dealing with numbers:\r\n```suggestion\r\n * Generate a random number with the given amount of digits.\r\n *\r\n * @param int $digits\r\n * @return int\r\n */\r\n public static function random(int $digits = 6): int\r\n {\r\n $maxDigits = strlen((string) PHP_INT_MAX);\r\n\r\n if ($digits < 1 || $digits > $maxDigits) {\r\n return 0;\r\n }\r\n\r\n $min = 10 ** ($digits - 1);\r\n $max = (10 ** $digits) - 1;\r\n\r\n if ($digits == $maxDigits) {\r\n```", + "pr_file_module": null + }, + { + "comment_id": "1900456982", + "repo_full_name": "laravel/framework", + "pr_number": 54027, + "pr_file": "src/Illuminate/Support/Number.php", + "discussion_id": "1898696764", + "commented_code": "@@ -367,6 +367,30 @@ public static function defaultCurrency()\n return static::$currency;\n }\n \n+ /**\n+ * Generate a random number of the given length.\n+ *\n+ * @param int $length\n+ * @return int\n+ */\n+ public static function random(int $length = 6): int\n+ {\n+ $maxLength = strlen((string) PHP_INT_MAX);\n+\n+ if ($length < 1 || $length > $maxLength) {\n+ return 0;\n+ }\n+\n+ $min = 10 ** ($length - 1);\n+ $max = (10 ** $length) - 1;\n+\n+ if ($length == $maxLength) {", + "comment_created_at": "2025-01-01T20:30:55+00:00", + "comment_author": "dilovanmatini", + "comment_body": "Yes, it is more readable.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1470746887", + "pr_number": 49900, + "pr_file": "src/Illuminate/Translation/Translator.php", + "created_at": "2024-01-30T08:14:28+00:00", + "commented_code": "// the translator was instantiated. Then, we can load the lines and return.\n $locales = $fallback ? $this->localeArray($locale) : [$locale];\n\n foreach ($locales as $locale) {\n foreach ($locales as $_locale) {\n if (! is_null($line = $this->getLine(\n $namespace, $group, $locale, $item, $replace\n $namespace, $group, $_locale, $item, $replace", + "repo_full_name": "laravel/framework", + "discussion_comments": [ + { + "comment_id": "1470746887", + "repo_full_name": "laravel/framework", + "pr_number": 49900, + "pr_file": "src/Illuminate/Translation/Translator.php", + "discussion_id": "1470746887", + "commented_code": "@@ -160,9 +160,9 @@ public function get($key, array $replace = [], $locale = null, $fallback = true)\n // the translator was instantiated. Then, we can load the lines and return.\n $locales = $fallback ? $this->localeArray($locale) : [$locale];\n \n- foreach ($locales as $locale) {\n+ foreach ($locales as $_locale) {\n if (! is_null($line = $this->getLine(\n- $namespace, $group, $locale, $item, $replace\n+ $namespace, $group, $_locale, $item, $replace", + "comment_created_at": "2024-01-30T08:14:28+00:00", + "comment_author": "driesvints", + "comment_body": "Use `$languageLineLocale` instead here. `$_` isn't a syntax we use anywhere.", + "pr_file_module": null + }, + { + "comment_id": "1470754575", + "repo_full_name": "laravel/framework", + "pr_number": 49900, + "pr_file": "src/Illuminate/Translation/Translator.php", + "discussion_id": "1470746887", + "commented_code": "@@ -160,9 +160,9 @@ public function get($key, array $replace = [], $locale = null, $fallback = true)\n // the translator was instantiated. Then, we can load the lines and return.\n $locales = $fallback ? $this->localeArray($locale) : [$locale];\n \n- foreach ($locales as $locale) {\n+ foreach ($locales as $_locale) {\n if (! is_null($line = $this->getLine(\n- $namespace, $group, $locale, $item, $replace\n+ $namespace, $group, $_locale, $item, $replace", + "comment_created_at": "2024-01-30T08:21:24+00:00", + "comment_author": "VicGUTT", + "comment_body": "Sure. I was wondering about it myself.\r\n\r\nThanks for the feedback; done in: [549de01](https://github.com/laravel/framework/pull/49900/commits/549de01ea9f7fa17b272d9baa04be0fe186b9c26)", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1714804417", + "pr_number": 52461, + "pr_file": "src/Illuminate/Database/Eloquent/Concerns/PreventsCircularRecursion.php", + "created_at": "2024-08-13T07:26:20+00:00", + "commented_code": ">\n */\n protected static $recursionCache;\n\n /**\n * Get the current recursion cache being used by the model.\n *\n * @return \\WeakMap\n */\n protected static function getRecursionCache()\n {\n return static::$recursionCache ??= new \\WeakMap();\n }\n\n /**\n * Get the current stack of methods being called recursively.\n *\n * @param object $object\n * @return array\n */\n protected static function getRecursiveCallStack($object): array\n {\n return static::getRecursionCache()->offsetExists($object)\n ? static::getRecursionCache()->offsetGet($object)\n : [];\n }\n\n /**\n * Prevent a method from being called multiple times on the same object within the same call stack.\n *\n * @param callable $callback\n * @param mixed $default\n * @return mixed\n */\n protected function once($callback, $default = null)", + "repo_full_name": "laravel/framework", + "discussion_comments": [ + { + "comment_id": "1714804417", + "repo_full_name": "laravel/framework", + "pr_number": 52461, + "pr_file": "src/Illuminate/Database/Eloquent/Concerns/PreventsCircularRecursion.php", + "discussion_id": "1714804417", + "commented_code": "@@ -0,0 +1,74 @@\n+>\n+ */\n+ protected static $recursionCache;\n+\n+ /**\n+ * Get the current recursion cache being used by the model.\n+ *\n+ * @return \\WeakMap\n+ */\n+ protected static function getRecursionCache()\n+ {\n+ return static::$recursionCache ??= new \\WeakMap();\n+ }\n+\n+ /**\n+ * Get the current stack of methods being called recursively.\n+ *\n+ * @param object $object\n+ * @return array\n+ */\n+ protected static function getRecursiveCallStack($object): array\n+ {\n+ return static::getRecursionCache()->offsetExists($object)\n+ ? static::getRecursionCache()->offsetGet($object)\n+ : [];\n+ }\n+\n+ /**\n+ * Prevent a method from being called multiple times on the same object within the same call stack.\n+ *\n+ * @param callable $callback\n+ * @param mixed $default\n+ * @return mixed\n+ */\n+ protected function once($callback, $default = null)", + "comment_created_at": "2024-08-13T07:26:20+00:00", + "comment_author": "Tofandel", + "comment_body": "I do think following laravel's pattern this should be renamed to `withoutInfiniteRecursion` or `withoutCircularRecursion`\r\n\r\nAs you've explained it has a very different behavior than the once helper and could be confusing from a readability standpoint", + "pr_file_module": null + }, + { + "comment_id": "1714811116", + "repo_full_name": "laravel/framework", + "pr_number": 52461, + "pr_file": "src/Illuminate/Database/Eloquent/Concerns/PreventsCircularRecursion.php", + "discussion_id": "1714804417", + "commented_code": "@@ -0,0 +1,74 @@\n+>\n+ */\n+ protected static $recursionCache;\n+\n+ /**\n+ * Get the current recursion cache being used by the model.\n+ *\n+ * @return \\WeakMap\n+ */\n+ protected static function getRecursionCache()\n+ {\n+ return static::$recursionCache ??= new \\WeakMap();\n+ }\n+\n+ /**\n+ * Get the current stack of methods being called recursively.\n+ *\n+ * @param object $object\n+ * @return array\n+ */\n+ protected static function getRecursiveCallStack($object): array\n+ {\n+ return static::getRecursionCache()->offsetExists($object)\n+ ? static::getRecursionCache()->offsetGet($object)\n+ : [];\n+ }\n+\n+ /**\n+ * Prevent a method from being called multiple times on the same object within the same call stack.\n+ *\n+ * @param callable $callback\n+ * @param mixed $default\n+ * @return mixed\n+ */\n+ protected function once($callback, $default = null)", + "comment_created_at": "2024-08-13T07:31:08+00:00", + "comment_author": "samlev", + "comment_body": "I had actuall named it `withoutRecursion()` originally then changed it to `once()`", + "pr_file_module": null + }, + { + "comment_id": "1714829338", + "repo_full_name": "laravel/framework", + "pr_number": 52461, + "pr_file": "src/Illuminate/Database/Eloquent/Concerns/PreventsCircularRecursion.php", + "discussion_id": "1714804417", + "commented_code": "@@ -0,0 +1,74 @@\n+>\n+ */\n+ protected static $recursionCache;\n+\n+ /**\n+ * Get the current recursion cache being used by the model.\n+ *\n+ * @return \\WeakMap\n+ */\n+ protected static function getRecursionCache()\n+ {\n+ return static::$recursionCache ??= new \\WeakMap();\n+ }\n+\n+ /**\n+ * Get the current stack of methods being called recursively.\n+ *\n+ * @param object $object\n+ * @return array\n+ */\n+ protected static function getRecursiveCallStack($object): array\n+ {\n+ return static::getRecursionCache()->offsetExists($object)\n+ ? static::getRecursionCache()->offsetGet($object)\n+ : [];\n+ }\n+\n+ /**\n+ * Prevent a method from being called multiple times on the same object within the same call stack.\n+ *\n+ * @param callable $callback\n+ * @param mixed $default\n+ * @return mixed\n+ */\n+ protected function once($callback, $default = null)", + "comment_created_at": "2024-08-13T07:44:44+00:00", + "comment_author": "Tofandel", + "comment_body": "That also works, as long as it makes the code more readable in `Model` so you can understand without having to take a look inside the function", + "pr_file_module": null + }, + { + "comment_id": "1715001879", + "repo_full_name": "laravel/framework", + "pr_number": 52461, + "pr_file": "src/Illuminate/Database/Eloquent/Concerns/PreventsCircularRecursion.php", + "discussion_id": "1714804417", + "commented_code": "@@ -0,0 +1,74 @@\n+>\n+ */\n+ protected static $recursionCache;\n+\n+ /**\n+ * Get the current recursion cache being used by the model.\n+ *\n+ * @return \\WeakMap\n+ */\n+ protected static function getRecursionCache()\n+ {\n+ return static::$recursionCache ??= new \\WeakMap();\n+ }\n+\n+ /**\n+ * Get the current stack of methods being called recursively.\n+ *\n+ * @param object $object\n+ * @return array\n+ */\n+ protected static function getRecursiveCallStack($object): array\n+ {\n+ return static::getRecursionCache()->offsetExists($object)\n+ ? static::getRecursionCache()->offsetGet($object)\n+ : [];\n+ }\n+\n+ /**\n+ * Prevent a method from being called multiple times on the same object within the same call stack.\n+ *\n+ * @param callable $callback\n+ * @param mixed $default\n+ * @return mixed\n+ */\n+ protected function once($callback, $default = null)", + "comment_created_at": "2024-08-13T09:44:11+00:00", + "comment_author": "samlev", + "comment_body": "I've updated this back to `withoutRecursion()`", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/framework-name-indicates-clear-purpose.md b/_reviewers/framework-name-indicates-clear-purpose.md index 4d2f6d8..9fb5cb5 100644 --- a/_reviewers/framework-name-indicates-clear-purpose.md +++ b/_reviewers/framework-name-indicates-clear-purpose.md @@ -34,223 +34,3 @@ public function isArrayable($value) { } // Clear purpose ``` This standard helps prevent confusion, makes code self-documenting, and maintains consistency across the codebase. - - -[ - { - "discussion_id": "2174034334", - "pr_number": 56169, - "pr_file": "tests/Database/DatabaseEloquentBuilderFindOrFailWithEnumTest.php", - "created_at": "2025-06-30T00:51:53+00:00", - "commented_code": "createSchema();\n }\n\n protected function tearDown(): void\n {\n Schema::drop('test_models');\n\n parent::tearDown();\n }\n\n protected function getEnvironmentSetUp($app)\n {\n $app['config']->set('database.default', 'testing');\n }\n\n #[Test]\n public function it_finds_existing_model_when_using_enum_id()\n {\n EloquentBuilderFindOrFailWithEnumTestModel::create(['id' => 1, 'name' => 'one']);\n\n $model = EloquentBuilderFindOrFailWithEnumTestModel::findOrFail(EloquentBuilderFindOrFailWithEnumTestBackedEnum::One);\n\n $this->assertInstanceOf(EloquentBuilderFindOrFailWithEnumTestModel::class, $model);\n $this->assertTrue($model->exists);\n $this->assertEquals(1, $model->id);\n }\n\n #[Test]\n public function it_throws_exception_when_enum_id_does_not_exist()", - "repo_full_name": "laravel/framework", - "discussion_comments": [ - { - "comment_id": "2174034334", - "repo_full_name": "laravel/framework", - "pr_number": 56169, - "pr_file": "tests/Database/DatabaseEloquentBuilderFindOrFailWithEnumTest.php", - "discussion_id": "2174034334", - "commented_code": "@@ -0,0 +1,74 @@\n+createSchema();\n+ }\n+\n+ protected function tearDown(): void\n+ {\n+ Schema::drop('test_models');\n+\n+ parent::tearDown();\n+ }\n+\n+ protected function getEnvironmentSetUp($app)\n+ {\n+ $app['config']->set('database.default', 'testing');\n+ }\n+\n+ #[Test]\n+ public function it_finds_existing_model_when_using_enum_id()\n+ {\n+ EloquentBuilderFindOrFailWithEnumTestModel::create(['id' => 1, 'name' => 'one']);\n+\n+ $model = EloquentBuilderFindOrFailWithEnumTestModel::findOrFail(EloquentBuilderFindOrFailWithEnumTestBackedEnum::One);\n+\n+ $this->assertInstanceOf(EloquentBuilderFindOrFailWithEnumTestModel::class, $model);\n+ $this->assertTrue($model->exists);\n+ $this->assertEquals(1, $model->id);\n+ }\n+\n+ #[Test]\n+ public function it_throws_exception_when_enum_id_does_not_exist()", - "comment_created_at": "2025-06-30T00:51:53+00:00", - "comment_author": "crynobone", - "comment_body": "We don't use `Test` attribute in the framework, prefix the method name with `test` and use studly case.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2084631773", - "pr_number": 55715, - "pr_file": "src/Illuminate/Collections/Arr.php", - "created_at": "2025-05-12T13:05:05+00:00", - "commented_code": "return is_array($value) || $value instanceof ArrayAccess;\n }\n\n /**\n * Determine whether the given value is arrayable.\n *\n * @param mixed $value\n * @return bool\n */\n public static function arrayable($value)", - "repo_full_name": "laravel/framework", - "discussion_comments": [ - { - "comment_id": "2084631773", - "repo_full_name": "laravel/framework", - "pr_number": 55715, - "pr_file": "src/Illuminate/Collections/Arr.php", - "discussion_id": "2084631773", - "commented_code": "@@ -23,6 +28,21 @@ public static function accessible($value)\n return is_array($value) || $value instanceof ArrayAccess;\n }\n \n+ /**\n+ * Determine whether the given value is arrayable.\n+ *\n+ * @param mixed $value\n+ * @return bool\n+ */\n+ public static function arrayable($value)", - "comment_created_at": "2025-05-12T13:05:05+00:00", - "comment_author": "shaedrich", - "comment_body": "Wouldn't it make sense to use a prefix here to indicate the return value? Because, I would expect the method to convert it from array-like values to `Arrayable` instances\r\n```suggestion\r\n public static function isArrayable($value)\r\n```", - "pr_file_module": null - }, - { - "comment_id": "2084658378", - "repo_full_name": "laravel/framework", - "pr_number": 55715, - "pr_file": "src/Illuminate/Collections/Arr.php", - "discussion_id": "2084631773", - "commented_code": "@@ -23,6 +28,21 @@ public static function accessible($value)\n return is_array($value) || $value instanceof ArrayAccess;\n }\n \n+ /**\n+ * Determine whether the given value is arrayable.\n+ *\n+ * @param mixed $value\n+ * @return bool\n+ */\n+ public static function arrayable($value)", - "comment_created_at": "2025-05-12T13:18:12+00:00", - "comment_author": "daniser", - "comment_body": "I followed naming convention used in `Arr::accessible`.", - "pr_file_module": null - }, - { - "comment_id": "2084679790", - "repo_full_name": "laravel/framework", - "pr_number": 55715, - "pr_file": "src/Illuminate/Collections/Arr.php", - "discussion_id": "2084631773", - "commented_code": "@@ -23,6 +28,21 @@ public static function accessible($value)\n return is_array($value) || $value instanceof ArrayAccess;\n }\n \n+ /**\n+ * Determine whether the given value is arrayable.\n+ *\n+ * @param mixed $value\n+ * @return bool\n+ */\n+ public static function arrayable($value)", - "comment_created_at": "2025-05-12T13:28:41+00:00", - "comment_author": "shaedrich", - "comment_body": "Ah, thanks for the explanation \ud83d\udc4d\ud83c\udffb ", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1898695749", - "pr_number": 54027, - "pr_file": "src/Illuminate/Support/Number.php", - "created_at": "2024-12-27T20:06:02+00:00", - "commented_code": "return static::$currency;\n }\n\n /**\n * Generate a random number of the given length.", - "repo_full_name": "laravel/framework", - "discussion_comments": [ - { - "comment_id": "1898695749", - "repo_full_name": "laravel/framework", - "pr_number": 54027, - "pr_file": "src/Illuminate/Support/Number.php", - "discussion_id": "1898695749", - "commented_code": "@@ -367,6 +367,30 @@ public static function defaultCurrency()\n return static::$currency;\n }\n \n+ /**\n+ * Generate a random number of the given length.", - "comment_created_at": "2024-12-27T20:06:02+00:00", - "comment_author": "shaedrich", - "comment_body": "\"number\" might be misleading here:\r\n```suggestion\r\n * Generate a random integer of the given length.\r\n```", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1898696764", - "pr_number": 54027, - "pr_file": "src/Illuminate/Support/Number.php", - "created_at": "2024-12-27T20:08:52+00:00", - "commented_code": "return static::$currency;\n }\n\n /**\n * Generate a random number of the given length.\n *\n * @param int $length\n * @return int\n */\n public static function random(int $length = 6): int\n {\n $maxLength = strlen((string) PHP_INT_MAX);\n\n if ($length < 1 || $length > $maxLength) {\n return 0;\n }\n\n $min = 10 ** ($length - 1);\n $max = (10 ** $length) - 1;\n\n if ($length == $maxLength) {", - "repo_full_name": "laravel/framework", - "discussion_comments": [ - { - "comment_id": "1898696764", - "repo_full_name": "laravel/framework", - "pr_number": 54027, - "pr_file": "src/Illuminate/Support/Number.php", - "discussion_id": "1898696764", - "commented_code": "@@ -367,6 +367,30 @@ public static function defaultCurrency()\n return static::$currency;\n }\n \n+ /**\n+ * Generate a random number of the given length.\n+ *\n+ * @param int $length\n+ * @return int\n+ */\n+ public static function random(int $length = 6): int\n+ {\n+ $maxLength = strlen((string) PHP_INT_MAX);\n+\n+ if ($length < 1 || $length > $maxLength) {\n+ return 0;\n+ }\n+\n+ $min = 10 ** ($length - 1);\n+ $max = (10 ** $length) - 1;\n+\n+ if ($length == $maxLength) {", - "comment_created_at": "2024-12-27T20:08:52+00:00", - "comment_author": "shaedrich", - "comment_body": "Even though, you are using `strlen()` here, I'd say, \"digits\" is the more correct term when dealing with numbers:\r\n```suggestion\r\n * Generate a random number with the given amount of digits.\r\n *\r\n * @param int $digits\r\n * @return int\r\n */\r\n public static function random(int $digits = 6): int\r\n {\r\n $maxDigits = strlen((string) PHP_INT_MAX);\r\n\r\n if ($digits < 1 || $digits > $maxDigits) {\r\n return 0;\r\n }\r\n\r\n $min = 10 ** ($digits - 1);\r\n $max = (10 ** $digits) - 1;\r\n\r\n if ($digits == $maxDigits) {\r\n```", - "pr_file_module": null - }, - { - "comment_id": "1900456982", - "repo_full_name": "laravel/framework", - "pr_number": 54027, - "pr_file": "src/Illuminate/Support/Number.php", - "discussion_id": "1898696764", - "commented_code": "@@ -367,6 +367,30 @@ public static function defaultCurrency()\n return static::$currency;\n }\n \n+ /**\n+ * Generate a random number of the given length.\n+ *\n+ * @param int $length\n+ * @return int\n+ */\n+ public static function random(int $length = 6): int\n+ {\n+ $maxLength = strlen((string) PHP_INT_MAX);\n+\n+ if ($length < 1 || $length > $maxLength) {\n+ return 0;\n+ }\n+\n+ $min = 10 ** ($length - 1);\n+ $max = (10 ** $length) - 1;\n+\n+ if ($length == $maxLength) {", - "comment_created_at": "2025-01-01T20:30:55+00:00", - "comment_author": "dilovanmatini", - "comment_body": "Yes, it is more readable.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1470746887", - "pr_number": 49900, - "pr_file": "src/Illuminate/Translation/Translator.php", - "created_at": "2024-01-30T08:14:28+00:00", - "commented_code": "// the translator was instantiated. Then, we can load the lines and return.\n $locales = $fallback ? $this->localeArray($locale) : [$locale];\n\n foreach ($locales as $locale) {\n foreach ($locales as $_locale) {\n if (! is_null($line = $this->getLine(\n $namespace, $group, $locale, $item, $replace\n $namespace, $group, $_locale, $item, $replace", - "repo_full_name": "laravel/framework", - "discussion_comments": [ - { - "comment_id": "1470746887", - "repo_full_name": "laravel/framework", - "pr_number": 49900, - "pr_file": "src/Illuminate/Translation/Translator.php", - "discussion_id": "1470746887", - "commented_code": "@@ -160,9 +160,9 @@ public function get($key, array $replace = [], $locale = null, $fallback = true)\n // the translator was instantiated. Then, we can load the lines and return.\n $locales = $fallback ? $this->localeArray($locale) : [$locale];\n \n- foreach ($locales as $locale) {\n+ foreach ($locales as $_locale) {\n if (! is_null($line = $this->getLine(\n- $namespace, $group, $locale, $item, $replace\n+ $namespace, $group, $_locale, $item, $replace", - "comment_created_at": "2024-01-30T08:14:28+00:00", - "comment_author": "driesvints", - "comment_body": "Use `$languageLineLocale` instead here. `$_` isn't a syntax we use anywhere.", - "pr_file_module": null - }, - { - "comment_id": "1470754575", - "repo_full_name": "laravel/framework", - "pr_number": 49900, - "pr_file": "src/Illuminate/Translation/Translator.php", - "discussion_id": "1470746887", - "commented_code": "@@ -160,9 +160,9 @@ public function get($key, array $replace = [], $locale = null, $fallback = true)\n // the translator was instantiated. Then, we can load the lines and return.\n $locales = $fallback ? $this->localeArray($locale) : [$locale];\n \n- foreach ($locales as $locale) {\n+ foreach ($locales as $_locale) {\n if (! is_null($line = $this->getLine(\n- $namespace, $group, $locale, $item, $replace\n+ $namespace, $group, $_locale, $item, $replace", - "comment_created_at": "2024-01-30T08:21:24+00:00", - "comment_author": "VicGUTT", - "comment_body": "Sure. I was wondering about it myself.\r\n\r\nThanks for the feedback; done in: [549de01](https://github.com/laravel/framework/pull/49900/commits/549de01ea9f7fa17b272d9baa04be0fe186b9c26)", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1714804417", - "pr_number": 52461, - "pr_file": "src/Illuminate/Database/Eloquent/Concerns/PreventsCircularRecursion.php", - "created_at": "2024-08-13T07:26:20+00:00", - "commented_code": ">\n */\n protected static $recursionCache;\n\n /**\n * Get the current recursion cache being used by the model.\n *\n * @return \\WeakMap\n */\n protected static function getRecursionCache()\n {\n return static::$recursionCache ??= new \\WeakMap();\n }\n\n /**\n * Get the current stack of methods being called recursively.\n *\n * @param object $object\n * @return array\n */\n protected static function getRecursiveCallStack($object): array\n {\n return static::getRecursionCache()->offsetExists($object)\n ? static::getRecursionCache()->offsetGet($object)\n : [];\n }\n\n /**\n * Prevent a method from being called multiple times on the same object within the same call stack.\n *\n * @param callable $callback\n * @param mixed $default\n * @return mixed\n */\n protected function once($callback, $default = null)", - "repo_full_name": "laravel/framework", - "discussion_comments": [ - { - "comment_id": "1714804417", - "repo_full_name": "laravel/framework", - "pr_number": 52461, - "pr_file": "src/Illuminate/Database/Eloquent/Concerns/PreventsCircularRecursion.php", - "discussion_id": "1714804417", - "commented_code": "@@ -0,0 +1,74 @@\n+>\n+ */\n+ protected static $recursionCache;\n+\n+ /**\n+ * Get the current recursion cache being used by the model.\n+ *\n+ * @return \\WeakMap\n+ */\n+ protected static function getRecursionCache()\n+ {\n+ return static::$recursionCache ??= new \\WeakMap();\n+ }\n+\n+ /**\n+ * Get the current stack of methods being called recursively.\n+ *\n+ * @param object $object\n+ * @return array\n+ */\n+ protected static function getRecursiveCallStack($object): array\n+ {\n+ return static::getRecursionCache()->offsetExists($object)\n+ ? static::getRecursionCache()->offsetGet($object)\n+ : [];\n+ }\n+\n+ /**\n+ * Prevent a method from being called multiple times on the same object within the same call stack.\n+ *\n+ * @param callable $callback\n+ * @param mixed $default\n+ * @return mixed\n+ */\n+ protected function once($callback, $default = null)", - "comment_created_at": "2024-08-13T07:26:20+00:00", - "comment_author": "Tofandel", - "comment_body": "I do think following laravel's pattern this should be renamed to `withoutInfiniteRecursion` or `withoutCircularRecursion`\r\n\r\nAs you've explained it has a very different behavior than the once helper and could be confusing from a readability standpoint", - "pr_file_module": null - }, - { - "comment_id": "1714811116", - "repo_full_name": "laravel/framework", - "pr_number": 52461, - "pr_file": "src/Illuminate/Database/Eloquent/Concerns/PreventsCircularRecursion.php", - "discussion_id": "1714804417", - "commented_code": "@@ -0,0 +1,74 @@\n+>\n+ */\n+ protected static $recursionCache;\n+\n+ /**\n+ * Get the current recursion cache being used by the model.\n+ *\n+ * @return \\WeakMap\n+ */\n+ protected static function getRecursionCache()\n+ {\n+ return static::$recursionCache ??= new \\WeakMap();\n+ }\n+\n+ /**\n+ * Get the current stack of methods being called recursively.\n+ *\n+ * @param object $object\n+ * @return array\n+ */\n+ protected static function getRecursiveCallStack($object): array\n+ {\n+ return static::getRecursionCache()->offsetExists($object)\n+ ? static::getRecursionCache()->offsetGet($object)\n+ : [];\n+ }\n+\n+ /**\n+ * Prevent a method from being called multiple times on the same object within the same call stack.\n+ *\n+ * @param callable $callback\n+ * @param mixed $default\n+ * @return mixed\n+ */\n+ protected function once($callback, $default = null)", - "comment_created_at": "2024-08-13T07:31:08+00:00", - "comment_author": "samlev", - "comment_body": "I had actuall named it `withoutRecursion()` originally then changed it to `once()`", - "pr_file_module": null - }, - { - "comment_id": "1714829338", - "repo_full_name": "laravel/framework", - "pr_number": 52461, - "pr_file": "src/Illuminate/Database/Eloquent/Concerns/PreventsCircularRecursion.php", - "discussion_id": "1714804417", - "commented_code": "@@ -0,0 +1,74 @@\n+>\n+ */\n+ protected static $recursionCache;\n+\n+ /**\n+ * Get the current recursion cache being used by the model.\n+ *\n+ * @return \\WeakMap\n+ */\n+ protected static function getRecursionCache()\n+ {\n+ return static::$recursionCache ??= new \\WeakMap();\n+ }\n+\n+ /**\n+ * Get the current stack of methods being called recursively.\n+ *\n+ * @param object $object\n+ * @return array\n+ */\n+ protected static function getRecursiveCallStack($object): array\n+ {\n+ return static::getRecursionCache()->offsetExists($object)\n+ ? static::getRecursionCache()->offsetGet($object)\n+ : [];\n+ }\n+\n+ /**\n+ * Prevent a method from being called multiple times on the same object within the same call stack.\n+ *\n+ * @param callable $callback\n+ * @param mixed $default\n+ * @return mixed\n+ */\n+ protected function once($callback, $default = null)", - "comment_created_at": "2024-08-13T07:44:44+00:00", - "comment_author": "Tofandel", - "comment_body": "That also works, as long as it makes the code more readable in `Model` so you can understand without having to take a look inside the function", - "pr_file_module": null - }, - { - "comment_id": "1715001879", - "repo_full_name": "laravel/framework", - "pr_number": 52461, - "pr_file": "src/Illuminate/Database/Eloquent/Concerns/PreventsCircularRecursion.php", - "discussion_id": "1714804417", - "commented_code": "@@ -0,0 +1,74 @@\n+>\n+ */\n+ protected static $recursionCache;\n+\n+ /**\n+ * Get the current recursion cache being used by the model.\n+ *\n+ * @return \\WeakMap\n+ */\n+ protected static function getRecursionCache()\n+ {\n+ return static::$recursionCache ??= new \\WeakMap();\n+ }\n+\n+ /**\n+ * Get the current stack of methods being called recursively.\n+ *\n+ * @param object $object\n+ * @return array\n+ */\n+ protected static function getRecursiveCallStack($object): array\n+ {\n+ return static::getRecursionCache()->offsetExists($object)\n+ ? static::getRecursionCache()->offsetGet($object)\n+ : [];\n+ }\n+\n+ /**\n+ * Prevent a method from being called multiple times on the same object within the same call stack.\n+ *\n+ * @param callable $callback\n+ * @param mixed $default\n+ * @return mixed\n+ */\n+ protected function once($callback, $default = null)", - "comment_created_at": "2024-08-13T09:44:11+00:00", - "comment_author": "samlev", - "comment_body": "I've updated this back to `withoutRecursion()`", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/framework-optimize-for-code-readability.json b/_reviewers/framework-optimize-for-code-readability.json new file mode 100644 index 0000000..87b6e0f --- /dev/null +++ b/_reviewers/framework-optimize-for-code-readability.json @@ -0,0 +1,258 @@ +[ + { + "discussion_id": "2101199329", + "pr_number": 55810, + "pr_file": "src/Illuminate/Filesystem/FilesystemAdapter.php", + "created_at": "2025-05-21T21:27:36+00:00", + "commented_code": "*/\n public function url($path)\n {\n if (isset($this->config['prefix'])) {\n if (isset($this->config['prefix']) && ! empty($this->config['prefix'])) {\n $path = $this->concatPathToUrl($this->config['prefix'], $path);", + "repo_full_name": "laravel/framework", + "discussion_comments": [ + { + "comment_id": "2101199329", + "repo_full_name": "laravel/framework", + "pr_number": 55810, + "pr_file": "src/Illuminate/Filesystem/FilesystemAdapter.php", + "discussion_id": "2101199329", + "commented_code": "@@ -725,7 +725,7 @@ public function writeStream($path, $resource, array $options = [])\n */\n public function url($path)\n {\n- if (isset($this->config['prefix'])) {\n+ if (isset($this->config['prefix']) && ! empty($this->config['prefix'])) {\n $path = $this->concatPathToUrl($this->config['prefix'], $path);", + "comment_created_at": "2025-05-21T21:27:36+00:00", + "comment_author": "shaedrich", + "comment_body": "Shouldn't this be handled _inside_ the `concatPathToUrl()` method?", + "pr_file_module": null + }, + { + "comment_id": "2101227158", + "repo_full_name": "laravel/framework", + "pr_number": 55810, + "pr_file": "src/Illuminate/Filesystem/FilesystemAdapter.php", + "discussion_id": "2101199329", + "commented_code": "@@ -725,7 +725,7 @@ public function writeStream($path, $resource, array $options = [])\n */\n public function url($path)\n {\n- if (isset($this->config['prefix'])) {\n+ if (isset($this->config['prefix']) && ! empty($this->config['prefix'])) {\n $path = $this->concatPathToUrl($this->config['prefix'], $path);", + "comment_created_at": "2025-05-21T21:50:07+00:00", + "comment_author": "MrTuffaha", + "comment_body": "good point", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2101260253", + "pr_number": 55810, + "pr_file": "src/Illuminate/Filesystem/FilesystemAdapter.php", + "created_at": "2025-05-21T22:12:45+00:00", + "commented_code": "*/\n protected function concatPathToUrl($url, $path)\n {\n if (empty($url)) {\n return trim($path, '/');\n }\n\n if (empty($path)) {\n return trim($url, '/');\n }\n\n return rtrim($url, '/').'/'.ltrim($path, '/');", + "repo_full_name": "laravel/framework", + "discussion_comments": [ + { + "comment_id": "2101260253", + "repo_full_name": "laravel/framework", + "pr_number": 55810, + "pr_file": "src/Illuminate/Filesystem/FilesystemAdapter.php", + "discussion_id": "2101260253", + "commented_code": "@@ -847,6 +847,14 @@ public function temporaryUploadUrl($path, $expiration, array $options = [])\n */\n protected function concatPathToUrl($url, $path)\n {\n+ if (empty($url)) {\n+ return trim($path, '/');\n+ }\n+\n+ if (empty($path)) {\n+ return trim($url, '/');\n+ }\n+\n return rtrim($url, '/').'/'.ltrim($path, '/');", + "comment_created_at": "2025-05-21T22:12:45+00:00", + "comment_author": "shaedrich", + "comment_body": "You could even shorten that, if you want:\r\n```suggestion\r\n return match (true) {\r\n empty($url) => trim($path, '/'),\r\n empty($path) => trim($url, '/'),\r\n default => rtrim($url, '/').'/'.ltrim($path, '/'),\r\n };\r\n```", + "pr_file_module": null + }, + { + "comment_id": "2101264645", + "repo_full_name": "laravel/framework", + "pr_number": 55810, + "pr_file": "src/Illuminate/Filesystem/FilesystemAdapter.php", + "discussion_id": "2101260253", + "commented_code": "@@ -847,6 +847,14 @@ public function temporaryUploadUrl($path, $expiration, array $options = [])\n */\n protected function concatPathToUrl($url, $path)\n {\n+ if (empty($url)) {\n+ return trim($path, '/');\n+ }\n+\n+ if (empty($path)) {\n+ return trim($url, '/');\n+ }\n+\n return rtrim($url, '/').'/'.ltrim($path, '/');", + "comment_created_at": "2025-05-21T22:17:40+00:00", + "comment_author": "MrTuffaha", + "comment_body": "While i do like shorter code, i do belive that my code is easier to inderstand/more readable, at least at first glance.", + "pr_file_module": null + }, + { + "comment_id": "2102187470", + "repo_full_name": "laravel/framework", + "pr_number": 55810, + "pr_file": "src/Illuminate/Filesystem/FilesystemAdapter.php", + "discussion_id": "2101260253", + "commented_code": "@@ -847,6 +847,14 @@ public function temporaryUploadUrl($path, $expiration, array $options = [])\n */\n protected function concatPathToUrl($url, $path)\n {\n+ if (empty($url)) {\n+ return trim($path, '/');\n+ }\n+\n+ if (empty($path)) {\n+ return trim($url, '/');\n+ }\n+\n return rtrim($url, '/').'/'.ltrim($path, '/');", + "comment_created_at": "2025-05-22T10:11:49+00:00", + "comment_author": "shaedrich", + "comment_body": "Yeah, no problem 😃 :+1:", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1975059131", + "pr_number": 54845, + "pr_file": "src/Illuminate/Validation/Validator.php", + "created_at": "2025-02-28T09:05:49+00:00", + "commented_code": "*/\n protected function validateUsingCustomRule($attribute, $value, $rule)\n {\n $attribute = $this->replacePlaceholderInString($attribute);\n $originalAttribute = $this->replacePlaceholderInString($attribute);\n\n $attribute = match (true) {\n $rule instanceof Rules\\File => $attribute,\n default => $originalAttribute,\n };", + "repo_full_name": "laravel/framework", + "discussion_comments": [ + { + "comment_id": "1975059131", + "repo_full_name": "laravel/framework", + "pr_number": 54845, + "pr_file": "src/Illuminate/Validation/Validator.php", + "discussion_id": "1975059131", + "commented_code": "@@ -872,7 +872,12 @@ protected function hasNotFailedPreviousRuleIfPresenceRule($rule, $attribute)\n */\n protected function validateUsingCustomRule($attribute, $value, $rule)\n {\n- $attribute = $this->replacePlaceholderInString($attribute);\n+ $originalAttribute = $this->replacePlaceholderInString($attribute);\n+\n+ $attribute = match (true) {\n+ $rule instanceof Rules\\File => $attribute,\n+ default => $originalAttribute,\n+ };", + "comment_created_at": "2025-02-28T09:05:49+00:00", + "comment_author": "shaedrich", + "comment_body": "match statements with only two cases kinda defeat the purpose, I'd say\r\n```suggestion\r\n $originalAttribute = $this->replacePlaceholderInString($attribute);\r\n\r\n $attribute = $rule instanceof Rules\\File\r\n ? $attribute\r\n : $this->replacePlaceholderInString($attribute);\r\n```", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1868256502", + "pr_number": 53748, + "pr_file": "src/Illuminate/Database/Console/PruneCommand.php", + "created_at": "2024-12-03T19:08:18+00:00", + "commented_code": "protected function getPath()\n {\n if (! empty($path = $this->option('path'))) {\n return (new Collection($path))->map(function ($path) {\n return base_path($path);\n })->all();\n return (new Collection($path))\n ->map(fn ($path) => base_path($path))", + "repo_full_name": "laravel/framework", + "discussion_comments": [ + { + "comment_id": "1868256502", + "repo_full_name": "laravel/framework", + "pr_number": 53748, + "pr_file": "src/Illuminate/Database/Console/PruneCommand.php", + "discussion_id": "1868256502", + "commented_code": "@@ -157,9 +157,9 @@ protected function models()\n protected function getPath()\n {\n if (! empty($path = $this->option('path'))) {\n- return (new Collection($path))->map(function ($path) {\n- return base_path($path);\n- })->all();\n+ return (new Collection($path))\n+ ->map(fn ($path) => base_path($path))", + "comment_created_at": "2024-12-03T19:08:18+00:00", + "comment_author": "shaedrich", + "comment_body": "This can be just\r\n```suggestion\r\n ->map('base_path')\r\n```\r\nor\r\n```suggestion\r\n ->map(base_path(...))\r\n```", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1868258127", + "pr_number": 53748, + "pr_file": "src/Illuminate/Database/Eloquent/Model.php", + "created_at": "2024-12-03T19:09:48+00:00", + "commented_code": "*/\n public function qualifyColumns($columns)\n {\n return (new BaseCollection($columns))->map(function ($column) {\n return $this->qualifyColumn($column);\n })->all();\n return (new BaseCollection($columns))\n ->map(fn ($column) => $this->qualifyColumn($column))", + "repo_full_name": "laravel/framework", + "discussion_comments": [ + { + "comment_id": "1868258127", + "repo_full_name": "laravel/framework", + "pr_number": 53748, + "pr_file": "src/Illuminate/Database/Eloquent/Model.php", + "discussion_id": "1868258127", + "commented_code": "@@ -600,9 +600,9 @@ public function qualifyColumn($column)\n */\n public function qualifyColumns($columns)\n {\n- return (new BaseCollection($columns))->map(function ($column) {\n- return $this->qualifyColumn($column);\n- })->all();\n+ return (new BaseCollection($columns))\n+ ->map(fn ($column) => $this->qualifyColumn($column))", + "comment_created_at": "2024-12-03T19:09:48+00:00", + "comment_author": "shaedrich", + "comment_body": "This can just be\r\n```suggestion\r\n ->map([ $this, 'qualifyColumn' ])\r\n```\r\n\r\nor\r\n```suggestion\r\n ->map($this->qualifyColumn(...))\r\n```", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1868261023", + "pr_number": 53748, + "pr_file": "src/Illuminate/Database/Query/Grammars/PostgresGrammar.php", + "created_at": "2024-12-03T19:12:24+00:00", + "commented_code": "{\n $quote = func_num_args() === 2 ? func_get_arg(1) : \"'\";\n\n return (new Collection($path))->map(function ($attribute) {\n return $this->parseJsonPathArrayKeys($attribute);\n })->collapse()->map(function ($attribute) use ($quote) {\n return filter_var($attribute, FILTER_VALIDATE_INT) !== false\n ? $attribute\n : $quote.$attribute.$quote;\n })->all();\n return (new Collection($path))\n ->map(fn ($attribute) => $this->parseJsonPathArrayKeys($attribute))", + "repo_full_name": "laravel/framework", + "discussion_comments": [ + { + "comment_id": "1868261023", + "repo_full_name": "laravel/framework", + "pr_number": 53748, + "pr_file": "src/Illuminate/Database/Query/Grammars/PostgresGrammar.php", + "discussion_id": "1868261023", + "commented_code": "@@ -734,13 +736,15 @@ protected function wrapJsonPathAttributes($path)\n {\n $quote = func_num_args() === 2 ? func_get_arg(1) : \"'\";\n \n- return (new Collection($path))->map(function ($attribute) {\n- return $this->parseJsonPathArrayKeys($attribute);\n- })->collapse()->map(function ($attribute) use ($quote) {\n- return filter_var($attribute, FILTER_VALIDATE_INT) !== false\n- ? $attribute\n- : $quote.$attribute.$quote;\n- })->all();\n+ return (new Collection($path))\n+ ->map(fn ($attribute) => $this->parseJsonPathArrayKeys($attribute))", + "comment_created_at": "2024-12-03T19:12:24+00:00", + "comment_author": "shaedrich", + "comment_body": "This can be simplified to just\r\n```suggestion\r\n ->map([ $this, 'parseJsonPathArrayKeys' ])\r\n```\r\n\r\nor\r\n```suggestion\r\n ->map($this->parseJsonPathArrayKeys(...))\r\n```", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1977480863", + "pr_number": 54874, + "pr_file": "src/Illuminate/Collections/helpers.php", + "created_at": "2025-03-03T12:59:21+00:00", + "commented_code": "}\n}\n\nif (! function_exists('deepCollect')) {\n /**\n * Recursively convert an array and its nested arrays into Laravel collections.\n *\n * This function takes an array (or any iterable) and transforms it into an instance\n * of Laravel's Collection class. It applies the transformation to all nested arrays,\n * ensuring that every level of the array structure is converted into collections.\n *\n * @param mixed $value The input value, which can be an array, an iterable, or any other data type.\n * @return mixed If the value is an array, a Collection instance is returned with all nested arrays\n * converted. If the value is not an array, it is returned as-is.\n */\n function deepCollect($value)\n {\n // If the value is an array, convert it into a Collection and apply deepCollect recursively\n if (is_array($value)) {\n return collect($value)->map(fn ($item) => deepCollect($item));", + "repo_full_name": "laravel/framework", + "discussion_comments": [ + { + "comment_id": "1977480863", + "repo_full_name": "laravel/framework", + "pr_number": 54874, + "pr_file": "src/Illuminate/Collections/helpers.php", + "discussion_id": "1977480863", + "commented_code": "@@ -19,6 +19,30 @@ function collect($value = [])\n }\n }\n \n+if (! function_exists('deepCollect')) {\n+ /**\n+ * Recursively convert an array and its nested arrays into Laravel collections.\n+ *\n+ * This function takes an array (or any iterable) and transforms it into an instance\n+ * of Laravel's Collection class. It applies the transformation to all nested arrays,\n+ * ensuring that every level of the array structure is converted into collections.\n+ *\n+ * @param mixed $value The input value, which can be an array, an iterable, or any other data type.\n+ * @return mixed If the value is an array, a Collection instance is returned with all nested arrays\n+ * converted. If the value is not an array, it is returned as-is.\n+ */\n+ function deepCollect($value)\n+ {\n+ // If the value is an array, convert it into a Collection and apply deepCollect recursively\n+ if (is_array($value)) {\n+ return collect($value)->map(fn ($item) => deepCollect($item));", + "comment_created_at": "2025-03-03T12:59:21+00:00", + "comment_author": "shaedrich", + "comment_body": "You might be able to use first-class callable syntax here:\r\n```suggestion\r\n return collect($value)->map(deepCollect(...));\r\n```", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1953107378", + "pr_number": 54582, + "pr_file": "src/Illuminate/Database/Eloquent/Filterable.php", + "created_at": "2025-02-12T17:28:02+00:00", + "commented_code": " $methodOrValue) {\n\n $className = $this->filterNamespace . ucfirst($class);\n\n if (class_exists($className)) {\n\n if (method_exists($className, $methodOrValue))\n $className::{$methodOrValue}($builder);\n\n if (method_exists($className, 'apply'))\n $className::apply($builder, $methodOrValue);\n }\n }\n }\n }", + "repo_full_name": "laravel/framework", + "discussion_comments": [ + { + "comment_id": "1953107378", + "repo_full_name": "laravel/framework", + "pr_number": 54582, + "pr_file": "src/Illuminate/Database/Eloquent/Filterable.php", + "discussion_id": "1953107378", + "commented_code": "@@ -0,0 +1,36 @@\n+ $methodOrValue) {\n+\n+ $className = $this->filterNamespace . ucfirst($class);\n+\n+ if (class_exists($className)) {\n+\n+ if (method_exists($className, $methodOrValue))\n+ $className::{$methodOrValue}($builder);\n+\n+ if (method_exists($className, 'apply'))\n+ $className::apply($builder, $methodOrValue);\n+ }\n+ }\n+ }\n+ }", + "comment_created_at": "2025-02-12T17:28:02+00:00", + "comment_author": "shaedrich", + "comment_body": "You might want to use early returns here:\r\n```suggestion\r\n public function scopeFilter(Builder $builder, array $params): void\r\n {\r\n if (!is_array($params) || $params === []) {\r\n \treturn;\r\n }\r\n\r\n foreach ($params as $class => $methodOrValue) {\r\n $className = $this->filterNamespace . ucfirst($class);\r\n\r\n if (!class_exists($className)) {\r\n\t\t\t\tcontinue;\r\n }\r\n\r\n if (method_exists($className, $methodOrValue)) {\r\n $className::{$methodOrValue}($builder);\r\n }\r\n\r\n if (method_exists($className, 'apply')) {\r\n $className::apply($builder, $methodOrValue);\r\n }\r\n }\r\n }\r\n```", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1953141984", + "pr_number": 54582, + "pr_file": "src/Illuminate/View/Compilers/Concerns/CompilesApplied.php", + "created_at": "2025-02-12T17:49:06+00:00", + "commented_code": "replace('(', '')\r\n \t->replace(')', '')\r\n \t->explode(',');\r\n```", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1935718228", + "pr_number": 54417, + "pr_file": "src/Illuminate/Validation/ValidationRuleParser.php", + "created_at": "2025-01-30T14:37:26+00:00", + "commented_code": "return Arr::wrap($this->prepareRule($rule, $attribute));\n }\n\n $rules = [];\n\n foreach ($rule as $value) {\n if ($value instanceof Date) {\n $rules = array_merge($rules, explode('|', (string) $value));\n } else {\n $rules[] = $this->prepareRule($value, $attribute);\n }\n }\n\n return $rules;\n return array_map(\n [$this, 'prepareRule'],", + "repo_full_name": "laravel/framework", + "discussion_comments": [ + { + "comment_id": "1935718228", + "repo_full_name": "laravel/framework", + "pr_number": 54417, + "pr_file": "src/Illuminate/Validation/ValidationRuleParser.php", + "discussion_id": "1935718228", + "commented_code": "@@ -96,17 +95,11 @@ protected function explodeExplicitRule($rule, $attribute)\n return Arr::wrap($this->prepareRule($rule, $attribute));\n }\n \n- $rules = [];\n-\n- foreach ($rule as $value) {\n- if ($value instanceof Date) {\n- $rules = array_merge($rules, explode('|', (string) $value));\n- } else {\n- $rules[] = $this->prepareRule($value, $attribute);\n- }\n- }\n-\n- return $rules;\n+ return array_map(\n+ [$this, 'prepareRule'],", + "comment_created_at": "2025-01-30T14:37:26+00:00", + "comment_author": "shaedrich", + "comment_body": "You could use first-class callable syntax here:\r\n```suggestion\r\n $this->prepareRule(...),\r\n```", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/framework-optimize-for-code-readability.md b/_reviewers/framework-optimize-for-code-readability.md index fe8c535..2523fc1 100644 --- a/_reviewers/framework-optimize-for-code-readability.md +++ b/_reviewers/framework-optimize-for-code-readability.md @@ -55,263 +55,3 @@ The improved version: - Leverages modern PHP features appropriately Choose simpler constructs when they improve readability, but avoid sacrificing clarity for brevity. The goal is to write code that is easy to understand and maintain. - - -[ - { - "discussion_id": "2101199329", - "pr_number": 55810, - "pr_file": "src/Illuminate/Filesystem/FilesystemAdapter.php", - "created_at": "2025-05-21T21:27:36+00:00", - "commented_code": "*/\n public function url($path)\n {\n if (isset($this->config['prefix'])) {\n if (isset($this->config['prefix']) && ! empty($this->config['prefix'])) {\n $path = $this->concatPathToUrl($this->config['prefix'], $path);", - "repo_full_name": "laravel/framework", - "discussion_comments": [ - { - "comment_id": "2101199329", - "repo_full_name": "laravel/framework", - "pr_number": 55810, - "pr_file": "src/Illuminate/Filesystem/FilesystemAdapter.php", - "discussion_id": "2101199329", - "commented_code": "@@ -725,7 +725,7 @@ public function writeStream($path, $resource, array $options = [])\n */\n public function url($path)\n {\n- if (isset($this->config['prefix'])) {\n+ if (isset($this->config['prefix']) && ! empty($this->config['prefix'])) {\n $path = $this->concatPathToUrl($this->config['prefix'], $path);", - "comment_created_at": "2025-05-21T21:27:36+00:00", - "comment_author": "shaedrich", - "comment_body": "Shouldn't this be handled _inside_ the `concatPathToUrl()` method?", - "pr_file_module": null - }, - { - "comment_id": "2101227158", - "repo_full_name": "laravel/framework", - "pr_number": 55810, - "pr_file": "src/Illuminate/Filesystem/FilesystemAdapter.php", - "discussion_id": "2101199329", - "commented_code": "@@ -725,7 +725,7 @@ public function writeStream($path, $resource, array $options = [])\n */\n public function url($path)\n {\n- if (isset($this->config['prefix'])) {\n+ if (isset($this->config['prefix']) && ! empty($this->config['prefix'])) {\n $path = $this->concatPathToUrl($this->config['prefix'], $path);", - "comment_created_at": "2025-05-21T21:50:07+00:00", - "comment_author": "MrTuffaha", - "comment_body": "good point", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2101260253", - "pr_number": 55810, - "pr_file": "src/Illuminate/Filesystem/FilesystemAdapter.php", - "created_at": "2025-05-21T22:12:45+00:00", - "commented_code": "*/\n protected function concatPathToUrl($url, $path)\n {\n if (empty($url)) {\n return trim($path, '/');\n }\n\n if (empty($path)) {\n return trim($url, '/');\n }\n\n return rtrim($url, '/').'/'.ltrim($path, '/');", - "repo_full_name": "laravel/framework", - "discussion_comments": [ - { - "comment_id": "2101260253", - "repo_full_name": "laravel/framework", - "pr_number": 55810, - "pr_file": "src/Illuminate/Filesystem/FilesystemAdapter.php", - "discussion_id": "2101260253", - "commented_code": "@@ -847,6 +847,14 @@ public function temporaryUploadUrl($path, $expiration, array $options = [])\n */\n protected function concatPathToUrl($url, $path)\n {\n+ if (empty($url)) {\n+ return trim($path, '/');\n+ }\n+\n+ if (empty($path)) {\n+ return trim($url, '/');\n+ }\n+\n return rtrim($url, '/').'/'.ltrim($path, '/');", - "comment_created_at": "2025-05-21T22:12:45+00:00", - "comment_author": "shaedrich", - "comment_body": "You could even shorten that, if you want:\r\n```suggestion\r\n return match (true) {\r\n empty($url) => trim($path, '/'),\r\n empty($path) => trim($url, '/'),\r\n default => rtrim($url, '/').'/'.ltrim($path, '/'),\r\n };\r\n```", - "pr_file_module": null - }, - { - "comment_id": "2101264645", - "repo_full_name": "laravel/framework", - "pr_number": 55810, - "pr_file": "src/Illuminate/Filesystem/FilesystemAdapter.php", - "discussion_id": "2101260253", - "commented_code": "@@ -847,6 +847,14 @@ public function temporaryUploadUrl($path, $expiration, array $options = [])\n */\n protected function concatPathToUrl($url, $path)\n {\n+ if (empty($url)) {\n+ return trim($path, '/');\n+ }\n+\n+ if (empty($path)) {\n+ return trim($url, '/');\n+ }\n+\n return rtrim($url, '/').'/'.ltrim($path, '/');", - "comment_created_at": "2025-05-21T22:17:40+00:00", - "comment_author": "MrTuffaha", - "comment_body": "While i do like shorter code, i do belive that my code is easier to inderstand/more readable, at least at first glance.", - "pr_file_module": null - }, - { - "comment_id": "2102187470", - "repo_full_name": "laravel/framework", - "pr_number": 55810, - "pr_file": "src/Illuminate/Filesystem/FilesystemAdapter.php", - "discussion_id": "2101260253", - "commented_code": "@@ -847,6 +847,14 @@ public function temporaryUploadUrl($path, $expiration, array $options = [])\n */\n protected function concatPathToUrl($url, $path)\n {\n+ if (empty($url)) {\n+ return trim($path, '/');\n+ }\n+\n+ if (empty($path)) {\n+ return trim($url, '/');\n+ }\n+\n return rtrim($url, '/').'/'.ltrim($path, '/');", - "comment_created_at": "2025-05-22T10:11:49+00:00", - "comment_author": "shaedrich", - "comment_body": "Yeah, no problem \ud83d\ude03 :+1:", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1975059131", - "pr_number": 54845, - "pr_file": "src/Illuminate/Validation/Validator.php", - "created_at": "2025-02-28T09:05:49+00:00", - "commented_code": "*/\n protected function validateUsingCustomRule($attribute, $value, $rule)\n {\n $attribute = $this->replacePlaceholderInString($attribute);\n $originalAttribute = $this->replacePlaceholderInString($attribute);\n\n $attribute = match (true) {\n $rule instanceof Rules\\File => $attribute,\n default => $originalAttribute,\n };", - "repo_full_name": "laravel/framework", - "discussion_comments": [ - { - "comment_id": "1975059131", - "repo_full_name": "laravel/framework", - "pr_number": 54845, - "pr_file": "src/Illuminate/Validation/Validator.php", - "discussion_id": "1975059131", - "commented_code": "@@ -872,7 +872,12 @@ protected function hasNotFailedPreviousRuleIfPresenceRule($rule, $attribute)\n */\n protected function validateUsingCustomRule($attribute, $value, $rule)\n {\n- $attribute = $this->replacePlaceholderInString($attribute);\n+ $originalAttribute = $this->replacePlaceholderInString($attribute);\n+\n+ $attribute = match (true) {\n+ $rule instanceof Rules\\File => $attribute,\n+ default => $originalAttribute,\n+ };", - "comment_created_at": "2025-02-28T09:05:49+00:00", - "comment_author": "shaedrich", - "comment_body": "match statements with only two cases kinda defeat the purpose, I'd say\r\n```suggestion\r\n $originalAttribute = $this->replacePlaceholderInString($attribute);\r\n\r\n $attribute = $rule instanceof Rules\\File\r\n ? $attribute\r\n : $this->replacePlaceholderInString($attribute);\r\n```", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1868256502", - "pr_number": 53748, - "pr_file": "src/Illuminate/Database/Console/PruneCommand.php", - "created_at": "2024-12-03T19:08:18+00:00", - "commented_code": "protected function getPath()\n {\n if (! empty($path = $this->option('path'))) {\n return (new Collection($path))->map(function ($path) {\n return base_path($path);\n })->all();\n return (new Collection($path))\n ->map(fn ($path) => base_path($path))", - "repo_full_name": "laravel/framework", - "discussion_comments": [ - { - "comment_id": "1868256502", - "repo_full_name": "laravel/framework", - "pr_number": 53748, - "pr_file": "src/Illuminate/Database/Console/PruneCommand.php", - "discussion_id": "1868256502", - "commented_code": "@@ -157,9 +157,9 @@ protected function models()\n protected function getPath()\n {\n if (! empty($path = $this->option('path'))) {\n- return (new Collection($path))->map(function ($path) {\n- return base_path($path);\n- })->all();\n+ return (new Collection($path))\n+ ->map(fn ($path) => base_path($path))", - "comment_created_at": "2024-12-03T19:08:18+00:00", - "comment_author": "shaedrich", - "comment_body": "This can be just\r\n```suggestion\r\n ->map('base_path')\r\n```\r\nor\r\n```suggestion\r\n ->map(base_path(...))\r\n```", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1868258127", - "pr_number": 53748, - "pr_file": "src/Illuminate/Database/Eloquent/Model.php", - "created_at": "2024-12-03T19:09:48+00:00", - "commented_code": "*/\n public function qualifyColumns($columns)\n {\n return (new BaseCollection($columns))->map(function ($column) {\n return $this->qualifyColumn($column);\n })->all();\n return (new BaseCollection($columns))\n ->map(fn ($column) => $this->qualifyColumn($column))", - "repo_full_name": "laravel/framework", - "discussion_comments": [ - { - "comment_id": "1868258127", - "repo_full_name": "laravel/framework", - "pr_number": 53748, - "pr_file": "src/Illuminate/Database/Eloquent/Model.php", - "discussion_id": "1868258127", - "commented_code": "@@ -600,9 +600,9 @@ public function qualifyColumn($column)\n */\n public function qualifyColumns($columns)\n {\n- return (new BaseCollection($columns))->map(function ($column) {\n- return $this->qualifyColumn($column);\n- })->all();\n+ return (new BaseCollection($columns))\n+ ->map(fn ($column) => $this->qualifyColumn($column))", - "comment_created_at": "2024-12-03T19:09:48+00:00", - "comment_author": "shaedrich", - "comment_body": "This can just be\r\n```suggestion\r\n ->map([ $this, 'qualifyColumn' ])\r\n```\r\n\r\nor\r\n```suggestion\r\n ->map($this->qualifyColumn(...))\r\n```", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1868261023", - "pr_number": 53748, - "pr_file": "src/Illuminate/Database/Query/Grammars/PostgresGrammar.php", - "created_at": "2024-12-03T19:12:24+00:00", - "commented_code": "{\n $quote = func_num_args() === 2 ? func_get_arg(1) : \"'\";\n\n return (new Collection($path))->map(function ($attribute) {\n return $this->parseJsonPathArrayKeys($attribute);\n })->collapse()->map(function ($attribute) use ($quote) {\n return filter_var($attribute, FILTER_VALIDATE_INT) !== false\n ? $attribute\n : $quote.$attribute.$quote;\n })->all();\n return (new Collection($path))\n ->map(fn ($attribute) => $this->parseJsonPathArrayKeys($attribute))", - "repo_full_name": "laravel/framework", - "discussion_comments": [ - { - "comment_id": "1868261023", - "repo_full_name": "laravel/framework", - "pr_number": 53748, - "pr_file": "src/Illuminate/Database/Query/Grammars/PostgresGrammar.php", - "discussion_id": "1868261023", - "commented_code": "@@ -734,13 +736,15 @@ protected function wrapJsonPathAttributes($path)\n {\n $quote = func_num_args() === 2 ? func_get_arg(1) : \"'\";\n \n- return (new Collection($path))->map(function ($attribute) {\n- return $this->parseJsonPathArrayKeys($attribute);\n- })->collapse()->map(function ($attribute) use ($quote) {\n- return filter_var($attribute, FILTER_VALIDATE_INT) !== false\n- ? $attribute\n- : $quote.$attribute.$quote;\n- })->all();\n+ return (new Collection($path))\n+ ->map(fn ($attribute) => $this->parseJsonPathArrayKeys($attribute))", - "comment_created_at": "2024-12-03T19:12:24+00:00", - "comment_author": "shaedrich", - "comment_body": "This can be simplified to just\r\n```suggestion\r\n ->map([ $this, 'parseJsonPathArrayKeys' ])\r\n```\r\n\r\nor\r\n```suggestion\r\n ->map($this->parseJsonPathArrayKeys(...))\r\n```", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1977480863", - "pr_number": 54874, - "pr_file": "src/Illuminate/Collections/helpers.php", - "created_at": "2025-03-03T12:59:21+00:00", - "commented_code": "}\n}\n\nif (! function_exists('deepCollect')) {\n /**\n * Recursively convert an array and its nested arrays into Laravel collections.\n *\n * This function takes an array (or any iterable) and transforms it into an instance\n * of Laravel's Collection class. It applies the transformation to all nested arrays,\n * ensuring that every level of the array structure is converted into collections.\n *\n * @param mixed $value The input value, which can be an array, an iterable, or any other data type.\n * @return mixed If the value is an array, a Collection instance is returned with all nested arrays\n * converted. If the value is not an array, it is returned as-is.\n */\n function deepCollect($value)\n {\n // If the value is an array, convert it into a Collection and apply deepCollect recursively\n if (is_array($value)) {\n return collect($value)->map(fn ($item) => deepCollect($item));", - "repo_full_name": "laravel/framework", - "discussion_comments": [ - { - "comment_id": "1977480863", - "repo_full_name": "laravel/framework", - "pr_number": 54874, - "pr_file": "src/Illuminate/Collections/helpers.php", - "discussion_id": "1977480863", - "commented_code": "@@ -19,6 +19,30 @@ function collect($value = [])\n }\n }\n \n+if (! function_exists('deepCollect')) {\n+ /**\n+ * Recursively convert an array and its nested arrays into Laravel collections.\n+ *\n+ * This function takes an array (or any iterable) and transforms it into an instance\n+ * of Laravel's Collection class. It applies the transformation to all nested arrays,\n+ * ensuring that every level of the array structure is converted into collections.\n+ *\n+ * @param mixed $value The input value, which can be an array, an iterable, or any other data type.\n+ * @return mixed If the value is an array, a Collection instance is returned with all nested arrays\n+ * converted. If the value is not an array, it is returned as-is.\n+ */\n+ function deepCollect($value)\n+ {\n+ // If the value is an array, convert it into a Collection and apply deepCollect recursively\n+ if (is_array($value)) {\n+ return collect($value)->map(fn ($item) => deepCollect($item));", - "comment_created_at": "2025-03-03T12:59:21+00:00", - "comment_author": "shaedrich", - "comment_body": "You might be able to use first-class callable syntax here:\r\n```suggestion\r\n return collect($value)->map(deepCollect(...));\r\n```", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1953107378", - "pr_number": 54582, - "pr_file": "src/Illuminate/Database/Eloquent/Filterable.php", - "created_at": "2025-02-12T17:28:02+00:00", - "commented_code": " $methodOrValue) {\n\n $className = $this->filterNamespace . ucfirst($class);\n\n if (class_exists($className)) {\n\n if (method_exists($className, $methodOrValue))\n $className::{$methodOrValue}($builder);\n\n if (method_exists($className, 'apply'))\n $className::apply($builder, $methodOrValue);\n }\n }\n }\n }", - "repo_full_name": "laravel/framework", - "discussion_comments": [ - { - "comment_id": "1953107378", - "repo_full_name": "laravel/framework", - "pr_number": 54582, - "pr_file": "src/Illuminate/Database/Eloquent/Filterable.php", - "discussion_id": "1953107378", - "commented_code": "@@ -0,0 +1,36 @@\n+ $methodOrValue) {\n+\n+ $className = $this->filterNamespace . ucfirst($class);\n+\n+ if (class_exists($className)) {\n+\n+ if (method_exists($className, $methodOrValue))\n+ $className::{$methodOrValue}($builder);\n+\n+ if (method_exists($className, 'apply'))\n+ $className::apply($builder, $methodOrValue);\n+ }\n+ }\n+ }\n+ }", - "comment_created_at": "2025-02-12T17:28:02+00:00", - "comment_author": "shaedrich", - "comment_body": "You might want to use early returns here:\r\n```suggestion\r\n public function scopeFilter(Builder $builder, array $params): void\r\n {\r\n if (!is_array($params) || $params === []) {\r\n \treturn;\r\n }\r\n\r\n foreach ($params as $class => $methodOrValue) {\r\n $className = $this->filterNamespace . ucfirst($class);\r\n\r\n if (!class_exists($className)) {\r\n\t\t\t\tcontinue;\r\n }\r\n\r\n if (method_exists($className, $methodOrValue)) {\r\n $className::{$methodOrValue}($builder);\r\n }\r\n\r\n if (method_exists($className, 'apply')) {\r\n $className::apply($builder, $methodOrValue);\r\n }\r\n }\r\n }\r\n```", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1953141984", - "pr_number": 54582, - "pr_file": "src/Illuminate/View/Compilers/Concerns/CompilesApplied.php", - "created_at": "2025-02-12T17:49:06+00:00", - "commented_code": "replace('(', '')\r\n \t->replace(')', '')\r\n \t->explode(',');\r\n```", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1935718228", - "pr_number": 54417, - "pr_file": "src/Illuminate/Validation/ValidationRuleParser.php", - "created_at": "2025-01-30T14:37:26+00:00", - "commented_code": "return Arr::wrap($this->prepareRule($rule, $attribute));\n }\n\n $rules = [];\n\n foreach ($rule as $value) {\n if ($value instanceof Date) {\n $rules = array_merge($rules, explode('|', (string) $value));\n } else {\n $rules[] = $this->prepareRule($value, $attribute);\n }\n }\n\n return $rules;\n return array_map(\n [$this, 'prepareRule'],", - "repo_full_name": "laravel/framework", - "discussion_comments": [ - { - "comment_id": "1935718228", - "repo_full_name": "laravel/framework", - "pr_number": 54417, - "pr_file": "src/Illuminate/Validation/ValidationRuleParser.php", - "discussion_id": "1935718228", - "commented_code": "@@ -96,17 +95,11 @@ protected function explodeExplicitRule($rule, $attribute)\n return Arr::wrap($this->prepareRule($rule, $attribute));\n }\n \n- $rules = [];\n-\n- foreach ($rule as $value) {\n- if ($value instanceof Date) {\n- $rules = array_merge($rules, explode('|', (string) $value));\n- } else {\n- $rules[] = $this->prepareRule($value, $attribute);\n- }\n- }\n-\n- return $rules;\n+ return array_map(\n+ [$this, 'prepareRule'],", - "comment_created_at": "2025-01-30T14:37:26+00:00", - "comment_author": "shaedrich", - "comment_body": "You could use first-class callable syntax here:\r\n```suggestion\r\n $this->prepareRule(...),\r\n```", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/framework-optimize-loop-operations.json b/_reviewers/framework-optimize-loop-operations.json new file mode 100644 index 0000000..57b0116 --- /dev/null +++ b/_reviewers/framework-optimize-loop-operations.json @@ -0,0 +1,162 @@ +[ + { + "discussion_id": "2101429227", + "pr_number": 55815, + "pr_file": "src/Illuminate/Collections/Arr.php", + "created_at": "2025-05-22T01:05:00+00:00", + "commented_code": "return true;\n }\n\n /**\n * Determine if all keys exist in an array using \"dot\" notation.\n *\n * @param \\ArrayAccess|array $array\n * @param string|array $keys\n * @return bool\n */\n public static function hasAll($array, $keys)\n {\n $keys = (array) $keys;\n\n if (! $array || $keys === []) {\n return false;\n }\n\n $result = true;\n\n foreach ($keys as $key) {\n if (! static::has($array, $key)) {\n $result = false;\n }\n }\n\n return $result;", + "repo_full_name": "laravel/framework", + "discussion_comments": [ + { + "comment_id": "2101429227", + "repo_full_name": "laravel/framework", + "pr_number": 55815, + "pr_file": "src/Illuminate/Collections/Arr.php", + "discussion_id": "2101429227", + "commented_code": "@@ -495,6 +495,32 @@ public static function has($array, $keys)\n return true;\n }\n \n+ /**\n+ * Determine if all keys exist in an array using \"dot\" notation.\n+ *\n+ * @param \\ArrayAccess|array $array\n+ * @param string|array $keys\n+ * @return bool\n+ */\n+ public static function hasAll($array, $keys)\n+ {\n+ $keys = (array) $keys;\n+\n+ if (! $array || $keys === []) {\n+ return false;\n+ }\n+\n+ $result = true;\n+\n+ foreach ($keys as $key) {\n+ if (! static::has($array, $key)) {\n+ $result = false;\n+ }\n+ }\n+\n+ return $result;", + "comment_created_at": "2025-05-22T01:05:00+00:00", + "comment_author": "Rizky92", + "comment_body": "Might be better to bail on first failure.\r\n```suggestion\r\n foreach ($keys as $key) {\r\n if (! static::has($array, $key)) {\r\n return false;\r\n }\r\n }\r\n\r\n return true;\r\n```", + "pr_file_module": null + }, + { + "comment_id": "2101461078", + "repo_full_name": "laravel/framework", + "pr_number": 55815, + "pr_file": "src/Illuminate/Collections/Arr.php", + "discussion_id": "2101429227", + "commented_code": "@@ -495,6 +495,32 @@ public static function has($array, $keys)\n return true;\n }\n \n+ /**\n+ * Determine if all keys exist in an array using \"dot\" notation.\n+ *\n+ * @param \\ArrayAccess|array $array\n+ * @param string|array $keys\n+ * @return bool\n+ */\n+ public static function hasAll($array, $keys)\n+ {\n+ $keys = (array) $keys;\n+\n+ if (! $array || $keys === []) {\n+ return false;\n+ }\n+\n+ $result = true;\n+\n+ foreach ($keys as $key) {\n+ if (! static::has($array, $key)) {\n+ $result = false;\n+ }\n+ }\n+\n+ return $result;", + "comment_created_at": "2025-05-22T01:42:28+00:00", + "comment_author": "devajmeireles", + "comment_body": "Thanks!", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2051354482", + "pr_number": 55472, + "pr_file": "src/Illuminate/Collections/Arr.php", + "created_at": "2025-04-19T02:42:19+00:00", + "commented_code": "return is_array($value) ? $value : [$value];\n }\n\n /**\n * Group an associative array by a key or callback.\n *\n * @param array $array\n * @param string|int|callable $groupBy\n * @param bool $preserveKeys\n * @return array\n */\n public static function groupBy($array, $groupBy, $preserveKeys = false)\n {\n $result = [];\n\n foreach ($array as $key => $item) {\n $groupKey = is_callable($groupBy) ? $groupBy($item, $key) : static::get($item, $groupBy);", + "repo_full_name": "laravel/framework", + "discussion_comments": [ + { + "comment_id": "2051354482", + "repo_full_name": "laravel/framework", + "pr_number": 55472, + "pr_file": "src/Illuminate/Collections/Arr.php", + "discussion_id": "2051354482", + "commented_code": "@@ -1021,4 +1021,29 @@ public static function wrap($value)\n \n return is_array($value) ? $value : [$value];\n }\n+\n+ /**\n+ * Group an associative array by a key or callback.\n+ *\n+ * @param array $array\n+ * @param string|int|callable $groupBy\n+ * @param bool $preserveKeys\n+ * @return array\n+ */\n+ public static function groupBy($array, $groupBy, $preserveKeys = false)\n+ {\n+ $result = [];\n+\n+ foreach ($array as $key => $item) {\n+ $groupKey = is_callable($groupBy) ? $groupBy($item, $key) : static::get($item, $groupBy);", + "comment_created_at": "2025-04-19T02:42:19+00:00", + "comment_author": "rodrigopedra", + "comment_body": "I would ensure `$groupBy` is a callback out the loop, so we don't need to check if it is a callback on every item.\r\n\r\nSomething like this:\r\n\r\n```php\r\n$groupBy = is_callable($groupBy) ? $groupBy : fn ($item) => static::get($item, $groupBy);\r\n\r\nforeach ($array as $key => $item) {\r\n $groupKey = $groupBy($item, $key);\r\n\r\n // ...\r\n}\r\n```", + "pr_file_module": null + }, + { + "comment_id": "2051403566", + "repo_full_name": "laravel/framework", + "pr_number": 55472, + "pr_file": "src/Illuminate/Collections/Arr.php", + "discussion_id": "2051354482", + "commented_code": "@@ -1021,4 +1021,29 @@ public static function wrap($value)\n \n return is_array($value) ? $value : [$value];\n }\n+\n+ /**\n+ * Group an associative array by a key or callback.\n+ *\n+ * @param array $array\n+ * @param string|int|callable $groupBy\n+ * @param bool $preserveKeys\n+ * @return array\n+ */\n+ public static function groupBy($array, $groupBy, $preserveKeys = false)\n+ {\n+ $result = [];\n+\n+ foreach ($array as $key => $item) {\n+ $groupKey = is_callable($groupBy) ? $groupBy($item, $key) : static::get($item, $groupBy);", + "comment_created_at": "2025-04-19T05:54:40+00:00", + "comment_author": "vishal2931", + "comment_body": "Thanks for the feedback, but that approach looks more readable than this one. 🤔", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2096571598", + "pr_number": 55784, + "pr_file": "src/Illuminate/Support/Number.php", + "created_at": "2025-05-19T22:38:42+00:00", + "commented_code": "throw new RuntimeException('The \"intl\" PHP extension is required to use the ['.$method.'] method.');\n }\n }\n\n /**\n * Convert an integer into its Roman numeral representation.\n *\n * @param int $number The number to convert (must be between 1 and 3999)\n * @return string The Roman numeral representation\n *\n * @throws InvalidArgumentException If the number is not between 1 and 3999\n */\n public static function roman(int $number): string\n {\n if ($number <= 0 || $number > 3999) {\n throw new InvalidArgumentException('Number must be between 1 and 3999.');\n }\n\n $map = [\n 'M' => 1000,\n 'CM' => 900,\n 'D' => 500,\n 'CD' => 400,\n 'C' => 100,\n 'XC' => 90,\n 'L' => 50,\n 'XL' => 40,\n 'X' => 10,\n 'IX' => 9,\n 'V' => 5,\n 'IV' => 4,\n 'I' => 1,\n ];\n\n $result = '';\n\n foreach ($map as $roman => $value) {\n while ($number >= $value) {\n $result .= $roman;\n $number -= $value;\n }\n }", + "repo_full_name": "laravel/framework", + "discussion_comments": [ + { + "comment_id": "2096571598", + "repo_full_name": "laravel/framework", + "pr_number": 55784, + "pr_file": "src/Illuminate/Support/Number.php", + "discussion_id": "2096571598", + "commented_code": "@@ -427,4 +428,46 @@ protected static function ensureIntlExtensionIsInstalled()\n throw new RuntimeException('The \"intl\" PHP extension is required to use the ['.$method.'] method.');\n }\n }\n+\n+ /**\n+ * Convert an integer into its Roman numeral representation.\n+ *\n+ * @param int $number The number to convert (must be between 1 and 3999)\n+ * @return string The Roman numeral representation\n+ *\n+ * @throws InvalidArgumentException If the number is not between 1 and 3999\n+ */\n+ public static function roman(int $number): string\n+ {\n+ if ($number <= 0 || $number > 3999) {\n+ throw new InvalidArgumentException('Number must be between 1 and 3999.');\n+ }\n+\n+ $map = [\n+ 'M' => 1000,\n+ 'CM' => 900,\n+ 'D' => 500,\n+ 'CD' => 400,\n+ 'C' => 100,\n+ 'XC' => 90,\n+ 'L' => 50,\n+ 'XL' => 40,\n+ 'X' => 10,\n+ 'IX' => 9,\n+ 'V' => 5,\n+ 'IV' => 4,\n+ 'I' => 1,\n+ ];\n+\n+ $result = '';\n+\n+ foreach ($map as $roman => $value) {\n+ while ($number >= $value) {\n+ $result .= $roman;\n+ $number -= $value;\n+ }\n+ }", + "comment_created_at": "2025-05-19T22:38:42+00:00", + "comment_author": "shaedrich", + "comment_body": "You could prevent trailing iterations when calculating the result is done, speeding the process slightly more up:\r\n```suggestion\r\n foreach ($map as $roman => $value) {\r\n while ($number >= $value) {\r\n $result .= $roman;\r\n $number -= $value;\r\n }\r\n \r\n if ($number === 0) {\r\n \treturn $result;\r\n }\r\n }\r\n```", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1850814073", + "pr_number": 53596, + "pr_file": "src/Illuminate/Bus/Dispatcher.php", + "created_at": "2024-11-20T18:41:45+00:00", + "commented_code": "{\n $uses = class_uses_recursive($command);\n\n if (in_array(InteractsWithQueue::class, $uses) &&\n in_array(Queueable::class, $uses) &&\n ! $command->job) {\n if (isset($uses[InteractsWithQueue::class], $uses[Queueable::class]) && ! $command->job) {", + "repo_full_name": "laravel/framework", + "discussion_comments": [ + { + "comment_id": "1850814073", + "repo_full_name": "laravel/framework", + "pr_number": 53596, + "pr_file": "src/Illuminate/Bus/Dispatcher.php", + "discussion_id": "1850814073", + "commented_code": "@@ -109,9 +109,7 @@ public function dispatchNow($command, $handler = null)\n {\n $uses = class_uses_recursive($command);\n \n- if (in_array(InteractsWithQueue::class, $uses) &&\n- in_array(Queueable::class, $uses) &&\n- ! $command->job) {\n+ if (isset($uses[InteractsWithQueue::class], $uses[Queueable::class]) && ! $command->job) {", + "comment_created_at": "2024-11-20T18:41:45+00:00", + "comment_author": "GrahamCampbell", + "comment_body": "This is an incorrect refactor. isset looks at the keys. in_array looks at the values.", + "pr_file_module": null + }, + { + "comment_id": "1850820663", + "repo_full_name": "laravel/framework", + "pr_number": 53596, + "pr_file": "src/Illuminate/Bus/Dispatcher.php", + "discussion_id": "1850814073", + "commented_code": "@@ -109,9 +109,7 @@ public function dispatchNow($command, $handler = null)\n {\n $uses = class_uses_recursive($command);\n \n- if (in_array(InteractsWithQueue::class, $uses) &&\n- in_array(Queueable::class, $uses) &&\n- ! $command->job) {\n+ if (isset($uses[InteractsWithQueue::class], $uses[Queueable::class]) && ! $command->job) {", + "comment_created_at": "2024-11-20T18:47:36+00:00", + "comment_author": "ralphjsmit", + "comment_body": "Isn't `class_uses_recursive()` outputting the exact same key as value?\r\n```php\r\n[\r\n SomeConcern::class => SomeConcern::class,\r\n]\r\n```", + "pr_file_module": null + }, + { + "comment_id": "1850845485", + "repo_full_name": "laravel/framework", + "pr_number": 53596, + "pr_file": "src/Illuminate/Bus/Dispatcher.php", + "discussion_id": "1850814073", + "commented_code": "@@ -109,9 +109,7 @@ public function dispatchNow($command, $handler = null)\n {\n $uses = class_uses_recursive($command);\n \n- if (in_array(InteractsWithQueue::class, $uses) &&\n- in_array(Queueable::class, $uses) &&\n- ! $command->job) {\n+ if (isset($uses[InteractsWithQueue::class], $uses[Queueable::class]) && ! $command->job) {", + "comment_created_at": "2024-11-20T19:08:07+00:00", + "comment_author": "dshafik", + "comment_body": "@GrahamCampbell as @ralphjsmit points out, the result of `class_uses_recursive()` is a key-value pair with the trait name for _both_, therefore you can use `isset()` to check for the usage of a trait (`O(1)`) rather than traverse the array till you find the value using `in_array()`. (`O(n)`)", + "pr_file_module": null + }, + { + "comment_id": "1851152975", + "repo_full_name": "laravel/framework", + "pr_number": 53596, + "pr_file": "src/Illuminate/Bus/Dispatcher.php", + "discussion_id": "1850814073", + "commented_code": "@@ -109,9 +109,7 @@ public function dispatchNow($command, $handler = null)\n {\n $uses = class_uses_recursive($command);\n \n- if (in_array(InteractsWithQueue::class, $uses) &&\n- in_array(Queueable::class, $uses) &&\n- ! $command->job) {\n+ if (isset($uses[InteractsWithQueue::class], $uses[Queueable::class]) && ! $command->job) {", + "comment_created_at": "2024-11-21T00:24:15+00:00", + "comment_author": "crynobone", + "comment_body": "This should be fine and we use `isset()` as well for https://github.com/laravel/framework/blob/eb625fa6aa083dbae74b4f2cc7dc6dafcce5ff07/src/Illuminate/Foundation/Testing/Concerns/InteractsWithTestCaseLifecycle.php#L196-L220", + "pr_file_module": null + }, + { + "comment_id": "1851217495", + "repo_full_name": "laravel/framework", + "pr_number": 53596, + "pr_file": "src/Illuminate/Bus/Dispatcher.php", + "discussion_id": "1850814073", + "commented_code": "@@ -109,9 +109,7 @@ public function dispatchNow($command, $handler = null)\n {\n $uses = class_uses_recursive($command);\n \n- if (in_array(InteractsWithQueue::class, $uses) &&\n- in_array(Queueable::class, $uses) &&\n- ! $command->job) {\n+ if (isset($uses[InteractsWithQueue::class], $uses[Queueable::class]) && ! $command->job) {", + "comment_created_at": "2024-11-21T02:09:03+00:00", + "comment_author": "dshafik", + "comment_body": "@crynobone the `array_flip()` in that example should probably be removed too… (unrelated to this PR)", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/framework-optimize-loop-operations.md b/_reviewers/framework-optimize-loop-operations.md index 9be534b..5e7c48a 100644 --- a/_reviewers/framework-optimize-loop-operations.md +++ b/_reviewers/framework-optimize-loop-operations.md @@ -80,167 +80,3 @@ foreach ($map as $roman => $value) { ``` These optimizations help reduce computational complexity and unnecessary operations, resulting in more efficient and maintainable code. - - -[ - { - "discussion_id": "2101429227", - "pr_number": 55815, - "pr_file": "src/Illuminate/Collections/Arr.php", - "created_at": "2025-05-22T01:05:00+00:00", - "commented_code": "return true;\n }\n\n /**\n * Determine if all keys exist in an array using \"dot\" notation.\n *\n * @param \\ArrayAccess|array $array\n * @param string|array $keys\n * @return bool\n */\n public static function hasAll($array, $keys)\n {\n $keys = (array) $keys;\n\n if (! $array || $keys === []) {\n return false;\n }\n\n $result = true;\n\n foreach ($keys as $key) {\n if (! static::has($array, $key)) {\n $result = false;\n }\n }\n\n return $result;", - "repo_full_name": "laravel/framework", - "discussion_comments": [ - { - "comment_id": "2101429227", - "repo_full_name": "laravel/framework", - "pr_number": 55815, - "pr_file": "src/Illuminate/Collections/Arr.php", - "discussion_id": "2101429227", - "commented_code": "@@ -495,6 +495,32 @@ public static function has($array, $keys)\n return true;\n }\n \n+ /**\n+ * Determine if all keys exist in an array using \"dot\" notation.\n+ *\n+ * @param \\ArrayAccess|array $array\n+ * @param string|array $keys\n+ * @return bool\n+ */\n+ public static function hasAll($array, $keys)\n+ {\n+ $keys = (array) $keys;\n+\n+ if (! $array || $keys === []) {\n+ return false;\n+ }\n+\n+ $result = true;\n+\n+ foreach ($keys as $key) {\n+ if (! static::has($array, $key)) {\n+ $result = false;\n+ }\n+ }\n+\n+ return $result;", - "comment_created_at": "2025-05-22T01:05:00+00:00", - "comment_author": "Rizky92", - "comment_body": "Might be better to bail on first failure.\r\n```suggestion\r\n foreach ($keys as $key) {\r\n if (! static::has($array, $key)) {\r\n return false;\r\n }\r\n }\r\n\r\n return true;\r\n```", - "pr_file_module": null - }, - { - "comment_id": "2101461078", - "repo_full_name": "laravel/framework", - "pr_number": 55815, - "pr_file": "src/Illuminate/Collections/Arr.php", - "discussion_id": "2101429227", - "commented_code": "@@ -495,6 +495,32 @@ public static function has($array, $keys)\n return true;\n }\n \n+ /**\n+ * Determine if all keys exist in an array using \"dot\" notation.\n+ *\n+ * @param \\ArrayAccess|array $array\n+ * @param string|array $keys\n+ * @return bool\n+ */\n+ public static function hasAll($array, $keys)\n+ {\n+ $keys = (array) $keys;\n+\n+ if (! $array || $keys === []) {\n+ return false;\n+ }\n+\n+ $result = true;\n+\n+ foreach ($keys as $key) {\n+ if (! static::has($array, $key)) {\n+ $result = false;\n+ }\n+ }\n+\n+ return $result;", - "comment_created_at": "2025-05-22T01:42:28+00:00", - "comment_author": "devajmeireles", - "comment_body": "Thanks!", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2051354482", - "pr_number": 55472, - "pr_file": "src/Illuminate/Collections/Arr.php", - "created_at": "2025-04-19T02:42:19+00:00", - "commented_code": "return is_array($value) ? $value : [$value];\n }\n\n /**\n * Group an associative array by a key or callback.\n *\n * @param array $array\n * @param string|int|callable $groupBy\n * @param bool $preserveKeys\n * @return array\n */\n public static function groupBy($array, $groupBy, $preserveKeys = false)\n {\n $result = [];\n\n foreach ($array as $key => $item) {\n $groupKey = is_callable($groupBy) ? $groupBy($item, $key) : static::get($item, $groupBy);", - "repo_full_name": "laravel/framework", - "discussion_comments": [ - { - "comment_id": "2051354482", - "repo_full_name": "laravel/framework", - "pr_number": 55472, - "pr_file": "src/Illuminate/Collections/Arr.php", - "discussion_id": "2051354482", - "commented_code": "@@ -1021,4 +1021,29 @@ public static function wrap($value)\n \n return is_array($value) ? $value : [$value];\n }\n+\n+ /**\n+ * Group an associative array by a key or callback.\n+ *\n+ * @param array $array\n+ * @param string|int|callable $groupBy\n+ * @param bool $preserveKeys\n+ * @return array\n+ */\n+ public static function groupBy($array, $groupBy, $preserveKeys = false)\n+ {\n+ $result = [];\n+\n+ foreach ($array as $key => $item) {\n+ $groupKey = is_callable($groupBy) ? $groupBy($item, $key) : static::get($item, $groupBy);", - "comment_created_at": "2025-04-19T02:42:19+00:00", - "comment_author": "rodrigopedra", - "comment_body": "I would ensure `$groupBy` is a callback out the loop, so we don't need to check if it is a callback on every item.\r\n\r\nSomething like this:\r\n\r\n```php\r\n$groupBy = is_callable($groupBy) ? $groupBy : fn ($item) => static::get($item, $groupBy);\r\n\r\nforeach ($array as $key => $item) {\r\n $groupKey = $groupBy($item, $key);\r\n\r\n // ...\r\n}\r\n```", - "pr_file_module": null - }, - { - "comment_id": "2051403566", - "repo_full_name": "laravel/framework", - "pr_number": 55472, - "pr_file": "src/Illuminate/Collections/Arr.php", - "discussion_id": "2051354482", - "commented_code": "@@ -1021,4 +1021,29 @@ public static function wrap($value)\n \n return is_array($value) ? $value : [$value];\n }\n+\n+ /**\n+ * Group an associative array by a key or callback.\n+ *\n+ * @param array $array\n+ * @param string|int|callable $groupBy\n+ * @param bool $preserveKeys\n+ * @return array\n+ */\n+ public static function groupBy($array, $groupBy, $preserveKeys = false)\n+ {\n+ $result = [];\n+\n+ foreach ($array as $key => $item) {\n+ $groupKey = is_callable($groupBy) ? $groupBy($item, $key) : static::get($item, $groupBy);", - "comment_created_at": "2025-04-19T05:54:40+00:00", - "comment_author": "vishal2931", - "comment_body": "Thanks for the feedback, but that approach looks more readable than this one. \ud83e\udd14", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2096571598", - "pr_number": 55784, - "pr_file": "src/Illuminate/Support/Number.php", - "created_at": "2025-05-19T22:38:42+00:00", - "commented_code": "throw new RuntimeException('The \"intl\" PHP extension is required to use the ['.$method.'] method.');\n }\n }\n\n /**\n * Convert an integer into its Roman numeral representation.\n *\n * @param int $number The number to convert (must be between 1 and 3999)\n * @return string The Roman numeral representation\n *\n * @throws InvalidArgumentException If the number is not between 1 and 3999\n */\n public static function roman(int $number): string\n {\n if ($number <= 0 || $number > 3999) {\n throw new InvalidArgumentException('Number must be between 1 and 3999.');\n }\n\n $map = [\n 'M' => 1000,\n 'CM' => 900,\n 'D' => 500,\n 'CD' => 400,\n 'C' => 100,\n 'XC' => 90,\n 'L' => 50,\n 'XL' => 40,\n 'X' => 10,\n 'IX' => 9,\n 'V' => 5,\n 'IV' => 4,\n 'I' => 1,\n ];\n\n $result = '';\n\n foreach ($map as $roman => $value) {\n while ($number >= $value) {\n $result .= $roman;\n $number -= $value;\n }\n }", - "repo_full_name": "laravel/framework", - "discussion_comments": [ - { - "comment_id": "2096571598", - "repo_full_name": "laravel/framework", - "pr_number": 55784, - "pr_file": "src/Illuminate/Support/Number.php", - "discussion_id": "2096571598", - "commented_code": "@@ -427,4 +428,46 @@ protected static function ensureIntlExtensionIsInstalled()\n throw new RuntimeException('The \"intl\" PHP extension is required to use the ['.$method.'] method.');\n }\n }\n+\n+ /**\n+ * Convert an integer into its Roman numeral representation.\n+ *\n+ * @param int $number The number to convert (must be between 1 and 3999)\n+ * @return string The Roman numeral representation\n+ *\n+ * @throws InvalidArgumentException If the number is not between 1 and 3999\n+ */\n+ public static function roman(int $number): string\n+ {\n+ if ($number <= 0 || $number > 3999) {\n+ throw new InvalidArgumentException('Number must be between 1 and 3999.');\n+ }\n+\n+ $map = [\n+ 'M' => 1000,\n+ 'CM' => 900,\n+ 'D' => 500,\n+ 'CD' => 400,\n+ 'C' => 100,\n+ 'XC' => 90,\n+ 'L' => 50,\n+ 'XL' => 40,\n+ 'X' => 10,\n+ 'IX' => 9,\n+ 'V' => 5,\n+ 'IV' => 4,\n+ 'I' => 1,\n+ ];\n+\n+ $result = '';\n+\n+ foreach ($map as $roman => $value) {\n+ while ($number >= $value) {\n+ $result .= $roman;\n+ $number -= $value;\n+ }\n+ }", - "comment_created_at": "2025-05-19T22:38:42+00:00", - "comment_author": "shaedrich", - "comment_body": "You could prevent trailing iterations when calculating the result is done, speeding the process slightly more up:\r\n```suggestion\r\n foreach ($map as $roman => $value) {\r\n while ($number >= $value) {\r\n $result .= $roman;\r\n $number -= $value;\r\n }\r\n \r\n if ($number === 0) {\r\n \treturn $result;\r\n }\r\n }\r\n```", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1850814073", - "pr_number": 53596, - "pr_file": "src/Illuminate/Bus/Dispatcher.php", - "created_at": "2024-11-20T18:41:45+00:00", - "commented_code": "{\n $uses = class_uses_recursive($command);\n\n if (in_array(InteractsWithQueue::class, $uses) &&\n in_array(Queueable::class, $uses) &&\n ! $command->job) {\n if (isset($uses[InteractsWithQueue::class], $uses[Queueable::class]) && ! $command->job) {", - "repo_full_name": "laravel/framework", - "discussion_comments": [ - { - "comment_id": "1850814073", - "repo_full_name": "laravel/framework", - "pr_number": 53596, - "pr_file": "src/Illuminate/Bus/Dispatcher.php", - "discussion_id": "1850814073", - "commented_code": "@@ -109,9 +109,7 @@ public function dispatchNow($command, $handler = null)\n {\n $uses = class_uses_recursive($command);\n \n- if (in_array(InteractsWithQueue::class, $uses) &&\n- in_array(Queueable::class, $uses) &&\n- ! $command->job) {\n+ if (isset($uses[InteractsWithQueue::class], $uses[Queueable::class]) && ! $command->job) {", - "comment_created_at": "2024-11-20T18:41:45+00:00", - "comment_author": "GrahamCampbell", - "comment_body": "This is an incorrect refactor. isset looks at the keys. in_array looks at the values.", - "pr_file_module": null - }, - { - "comment_id": "1850820663", - "repo_full_name": "laravel/framework", - "pr_number": 53596, - "pr_file": "src/Illuminate/Bus/Dispatcher.php", - "discussion_id": "1850814073", - "commented_code": "@@ -109,9 +109,7 @@ public function dispatchNow($command, $handler = null)\n {\n $uses = class_uses_recursive($command);\n \n- if (in_array(InteractsWithQueue::class, $uses) &&\n- in_array(Queueable::class, $uses) &&\n- ! $command->job) {\n+ if (isset($uses[InteractsWithQueue::class], $uses[Queueable::class]) && ! $command->job) {", - "comment_created_at": "2024-11-20T18:47:36+00:00", - "comment_author": "ralphjsmit", - "comment_body": "Isn't `class_uses_recursive()` outputting the exact same key as value?\r\n```php\r\n[\r\n SomeConcern::class => SomeConcern::class,\r\n]\r\n```", - "pr_file_module": null - }, - { - "comment_id": "1850845485", - "repo_full_name": "laravel/framework", - "pr_number": 53596, - "pr_file": "src/Illuminate/Bus/Dispatcher.php", - "discussion_id": "1850814073", - "commented_code": "@@ -109,9 +109,7 @@ public function dispatchNow($command, $handler = null)\n {\n $uses = class_uses_recursive($command);\n \n- if (in_array(InteractsWithQueue::class, $uses) &&\n- in_array(Queueable::class, $uses) &&\n- ! $command->job) {\n+ if (isset($uses[InteractsWithQueue::class], $uses[Queueable::class]) && ! $command->job) {", - "comment_created_at": "2024-11-20T19:08:07+00:00", - "comment_author": "dshafik", - "comment_body": "@GrahamCampbell as @ralphjsmit points out, the result of `class_uses_recursive()` is a key-value pair with the trait name for _both_, therefore you can use `isset()` to check for the usage of a trait (`O(1)`) rather than traverse the array till you find the value using `in_array()`. (`O(n)`)", - "pr_file_module": null - }, - { - "comment_id": "1851152975", - "repo_full_name": "laravel/framework", - "pr_number": 53596, - "pr_file": "src/Illuminate/Bus/Dispatcher.php", - "discussion_id": "1850814073", - "commented_code": "@@ -109,9 +109,7 @@ public function dispatchNow($command, $handler = null)\n {\n $uses = class_uses_recursive($command);\n \n- if (in_array(InteractsWithQueue::class, $uses) &&\n- in_array(Queueable::class, $uses) &&\n- ! $command->job) {\n+ if (isset($uses[InteractsWithQueue::class], $uses[Queueable::class]) && ! $command->job) {", - "comment_created_at": "2024-11-21T00:24:15+00:00", - "comment_author": "crynobone", - "comment_body": "This should be fine and we use `isset()` as well for https://github.com/laravel/framework/blob/eb625fa6aa083dbae74b4f2cc7dc6dafcce5ff07/src/Illuminate/Foundation/Testing/Concerns/InteractsWithTestCaseLifecycle.php#L196-L220", - "pr_file_module": null - }, - { - "comment_id": "1851217495", - "repo_full_name": "laravel/framework", - "pr_number": 53596, - "pr_file": "src/Illuminate/Bus/Dispatcher.php", - "discussion_id": "1850814073", - "commented_code": "@@ -109,9 +109,7 @@ public function dispatchNow($command, $handler = null)\n {\n $uses = class_uses_recursive($command);\n \n- if (in_array(InteractsWithQueue::class, $uses) &&\n- in_array(Queueable::class, $uses) &&\n- ! $command->job) {\n+ if (isset($uses[InteractsWithQueue::class], $uses[Queueable::class]) && ! $command->job) {", - "comment_created_at": "2024-11-21T02:09:03+00:00", - "comment_author": "dshafik", - "comment_body": "@crynobone the `array_flip()` in that example should probably be removed too\u2026 (unrelated to this PR)", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/framework-optimize-migration-code.json b/_reviewers/framework-optimize-migration-code.json new file mode 100644 index 0000000..6e86cae --- /dev/null +++ b/_reviewers/framework-optimize-migration-code.json @@ -0,0 +1,102 @@ +[ + { + "discussion_id": "2008500679", + "pr_number": 55011, + "pr_file": "src/Illuminate/Database/Migrations/Migrator.php", + "created_at": "2025-03-22T00:25:12+00:00", + "commented_code": "return $this->pretendToRun($migration, 'up');\n }\n\n $this->write(Task::class, $name, fn () => $this->runMigration($migration, 'up'));\n if ($this->shouldSkipMigration($migration)) {\n $this->write(Task::class, $name, fn () => MigrationResult::Skipped);", + "repo_full_name": "laravel/framework", + "discussion_comments": [ + { + "comment_id": "2008500679", + "repo_full_name": "laravel/framework", + "pr_number": 55011, + "pr_file": "src/Illuminate/Database/Migrations/Migrator.php", + "discussion_id": "2008500679", + "commented_code": "@@ -241,12 +254,16 @@ protected function runUp($file, $batch, $pretend)\n return $this->pretendToRun($migration, 'up');\n }\n \n- $this->write(Task::class, $name, fn () => $this->runMigration($migration, 'up'));\n+ if ($this->shouldSkipMigration($migration)) {\n+ $this->write(Task::class, $name, fn () => MigrationResult::Skipped);", + "comment_created_at": "2025-03-22T00:25:12+00:00", + "comment_author": "inmanturbo", + "comment_body": "Shouldn't this return here instead of the else?\r\n\r\nThis will log the migration even though it was never run? This means that the migration won't get run later either, after the \"feature\" is \"enabled\" or `Migration::shouldRun()` otherwise returns `true`;\r\n\r\n```php\r\n\r\n if ($this->shouldSkipMigration($migration)) {\r\n $this->write(Task::class, $name, fn () => MigrationResult::Skipped);\r\n return;\r\n }\r\n```", + "pr_file_module": null + }, + { + "comment_id": "2008510104", + "repo_full_name": "laravel/framework", + "pr_number": 55011, + "pr_file": "src/Illuminate/Database/Migrations/Migrator.php", + "discussion_id": "2008500679", + "commented_code": "@@ -241,12 +254,16 @@ protected function runUp($file, $batch, $pretend)\n return $this->pretendToRun($migration, 'up');\n }\n \n- $this->write(Task::class, $name, fn () => $this->runMigration($migration, 'up'));\n+ if ($this->shouldSkipMigration($migration)) {\n+ $this->write(Task::class, $name, fn () => MigrationResult::Skipped);", + "comment_created_at": "2025-03-22T00:32:25+00:00", + "comment_author": "inmanturbo", + "comment_body": "Ahh I see, I was confused, you wrapped the `Migrator::log()` call inside the else block. In that case I would suggest just returning early.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1921142352", + "pr_number": 54250, + "pr_file": "src/Illuminate/Database/Migrations/Migrator.php", + "created_at": "2025-01-18T20:00:20+00:00", + "commented_code": "// We want to pull in the last batch of migrations that ran on the previous\n // migration operation. We'll then reverse those migrations and run each\n // of them \"down\" to reverse the last migration \"operation\" which ran.\n $migrations = $this->getMigrationsForRollback($options);\n $migrations = isset($options['selected']) && count($options['selected']) > 0 ? $options['selected'] : $this->getMigrationsForRollback($options);", + "repo_full_name": "laravel/framework", + "discussion_comments": [ + { + "comment_id": "1921142352", + "repo_full_name": "laravel/framework", + "pr_number": 54250, + "pr_file": "src/Illuminate/Database/Migrations/Migrator.php", + "discussion_id": "1921142352", + "commented_code": "@@ -230,7 +231,7 @@ public function rollback($paths = [], array $options = [])\n // We want to pull in the last batch of migrations that ran on the previous\n // migration operation. We'll then reverse those migrations and run each\n // of them \"down\" to reverse the last migration \"operation\" which ran.\n- $migrations = $this->getMigrationsForRollback($options);\n+ $migrations = isset($options['selected']) && count($options['selected']) > 0 ? $options['selected'] : $this->getMigrationsForRollback($options);", + "comment_created_at": "2025-01-18T20:00:20+00:00", + "comment_author": "shaedrich", + "comment_body": "Function call can be avoided like this:\r\n```suggestion\r\n $migrations = isset($options['selected']) && $options['selected'] !== [] ? $options['selected'] : $this->getMigrationsForRollback($options);\r\n```", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1921142441", + "pr_number": 54250, + "pr_file": "src/Illuminate/Database/Migrations/Migrator.php", + "created_at": "2025-01-18T20:01:08+00:00", + "commented_code": "{\n $this->events?->dispatch($event);\n }\n\n protected function getMigrationBatches($migrations)\n {\n $options = $this->resolveOptions($migrations);\n $selected = $options['selected'] ?? [];\n\n if (count($selected) > 0) {", + "repo_full_name": "laravel/framework", + "discussion_comments": [ + { + "comment_id": "1921142441", + "repo_full_name": "laravel/framework", + "pr_number": 54250, + "pr_file": "src/Illuminate/Database/Migrations/Migrator.php", + "discussion_id": "1921142441", + "commented_code": "@@ -754,4 +758,32 @@ public function fireMigrationEvent($event)\n {\n $this->events?->dispatch($event);\n }\n+\n+ protected function getMigrationBatches($migrations)\n+ {\n+ $options = $this->resolveOptions($migrations);\n+ $selected = $options['selected'] ?? [];\n+\n+ if (count($selected) > 0) {", + "comment_created_at": "2025-01-18T20:01:08+00:00", + "comment_author": "shaedrich", + "comment_body": "Same here:\r\n```suggestion\r\n if ($selected !== []) {\r\n```", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1921142809", + "pr_number": 54250, + "pr_file": "src/Illuminate/Database/Migrations/Migrator.php", + "created_at": "2025-01-18T20:03:19+00:00", + "commented_code": "public function getMigrationFiles($paths)\n {\n return (new Collection($paths))\n ->flatMap(fn ($path) => str_ends_with($path, '.php') ? [$path] : $this->files->glob($path.'/*_*.php'))\n ->flatMap(fn($path) => str_ends_with($path, '.php') ? [$path] : $this->files->glob($path . '/*_*.php'))\n ->filter()\n ->values()\n ->keyBy(fn ($file) => $this->getMigrationName($file))\n ->sortBy(fn ($file, $key) => $key)\n ->keyBy(fn($file) => $this->getMigrationName($file))", + "repo_full_name": "laravel/framework", + "discussion_comments": [ + { + "comment_id": "1921142809", + "repo_full_name": "laravel/framework", + "pr_number": 54250, + "pr_file": "src/Illuminate/Database/Migrations/Migrator.php", + "discussion_id": "1921142809", + "commented_code": "@@ -537,11 +541,11 @@ protected function getMigrationClass(string $migrationName): string\n public function getMigrationFiles($paths)\n {\n return (new Collection($paths))\n- ->flatMap(fn ($path) => str_ends_with($path, '.php') ? [$path] : $this->files->glob($path.'/*_*.php'))\n+ ->flatMap(fn($path) => str_ends_with($path, '.php') ? [$path] : $this->files->glob($path . '/*_*.php'))\n ->filter()\n ->values()\n- ->keyBy(fn ($file) => $this->getMigrationName($file))\n- ->sortBy(fn ($file, $key) => $key)\n+ ->keyBy(fn($file) => $this->getMigrationName($file))", + "comment_created_at": "2025-01-18T20:03:19+00:00", + "comment_author": "shaedrich", + "comment_body": "You might be able to replace it with this:\r\n```suggestion\r\n ->keyBy($this->getMigrationName(...))\r\n```", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/framework-optimize-migration-code.md b/_reviewers/framework-optimize-migration-code.md index 09d58c2..24993de 100644 --- a/_reviewers/framework-optimize-migration-code.md +++ b/_reviewers/framework-optimize-migration-code.md @@ -58,107 +58,3 @@ When writing database migration code, prioritize clarity and efficiency to ensur ``` These practices help create more readable and maintainable migration code, reducing the chance of errors during database schema changes. - - -[ - { - "discussion_id": "2008500679", - "pr_number": 55011, - "pr_file": "src/Illuminate/Database/Migrations/Migrator.php", - "created_at": "2025-03-22T00:25:12+00:00", - "commented_code": "return $this->pretendToRun($migration, 'up');\n }\n\n $this->write(Task::class, $name, fn () => $this->runMigration($migration, 'up'));\n if ($this->shouldSkipMigration($migration)) {\n $this->write(Task::class, $name, fn () => MigrationResult::Skipped);", - "repo_full_name": "laravel/framework", - "discussion_comments": [ - { - "comment_id": "2008500679", - "repo_full_name": "laravel/framework", - "pr_number": 55011, - "pr_file": "src/Illuminate/Database/Migrations/Migrator.php", - "discussion_id": "2008500679", - "commented_code": "@@ -241,12 +254,16 @@ protected function runUp($file, $batch, $pretend)\n return $this->pretendToRun($migration, 'up');\n }\n \n- $this->write(Task::class, $name, fn () => $this->runMigration($migration, 'up'));\n+ if ($this->shouldSkipMigration($migration)) {\n+ $this->write(Task::class, $name, fn () => MigrationResult::Skipped);", - "comment_created_at": "2025-03-22T00:25:12+00:00", - "comment_author": "inmanturbo", - "comment_body": "Shouldn't this return here instead of the else?\r\n\r\nThis will log the migration even though it was never run? This means that the migration won't get run later either, after the \"feature\" is \"enabled\" or `Migration::shouldRun()` otherwise returns `true`;\r\n\r\n```php\r\n\r\n if ($this->shouldSkipMigration($migration)) {\r\n $this->write(Task::class, $name, fn () => MigrationResult::Skipped);\r\n return;\r\n }\r\n```", - "pr_file_module": null - }, - { - "comment_id": "2008510104", - "repo_full_name": "laravel/framework", - "pr_number": 55011, - "pr_file": "src/Illuminate/Database/Migrations/Migrator.php", - "discussion_id": "2008500679", - "commented_code": "@@ -241,12 +254,16 @@ protected function runUp($file, $batch, $pretend)\n return $this->pretendToRun($migration, 'up');\n }\n \n- $this->write(Task::class, $name, fn () => $this->runMigration($migration, 'up'));\n+ if ($this->shouldSkipMigration($migration)) {\n+ $this->write(Task::class, $name, fn () => MigrationResult::Skipped);", - "comment_created_at": "2025-03-22T00:32:25+00:00", - "comment_author": "inmanturbo", - "comment_body": "Ahh I see, I was confused, you wrapped the `Migrator::log()` call inside the else block. In that case I would suggest just returning early.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1921142352", - "pr_number": 54250, - "pr_file": "src/Illuminate/Database/Migrations/Migrator.php", - "created_at": "2025-01-18T20:00:20+00:00", - "commented_code": "// We want to pull in the last batch of migrations that ran on the previous\n // migration operation. We'll then reverse those migrations and run each\n // of them \"down\" to reverse the last migration \"operation\" which ran.\n $migrations = $this->getMigrationsForRollback($options);\n $migrations = isset($options['selected']) && count($options['selected']) > 0 ? $options['selected'] : $this->getMigrationsForRollback($options);", - "repo_full_name": "laravel/framework", - "discussion_comments": [ - { - "comment_id": "1921142352", - "repo_full_name": "laravel/framework", - "pr_number": 54250, - "pr_file": "src/Illuminate/Database/Migrations/Migrator.php", - "discussion_id": "1921142352", - "commented_code": "@@ -230,7 +231,7 @@ public function rollback($paths = [], array $options = [])\n // We want to pull in the last batch of migrations that ran on the previous\n // migration operation. We'll then reverse those migrations and run each\n // of them \"down\" to reverse the last migration \"operation\" which ran.\n- $migrations = $this->getMigrationsForRollback($options);\n+ $migrations = isset($options['selected']) && count($options['selected']) > 0 ? $options['selected'] : $this->getMigrationsForRollback($options);", - "comment_created_at": "2025-01-18T20:00:20+00:00", - "comment_author": "shaedrich", - "comment_body": "Function call can be avoided like this:\r\n```suggestion\r\n $migrations = isset($options['selected']) && $options['selected'] !== [] ? $options['selected'] : $this->getMigrationsForRollback($options);\r\n```", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1921142441", - "pr_number": 54250, - "pr_file": "src/Illuminate/Database/Migrations/Migrator.php", - "created_at": "2025-01-18T20:01:08+00:00", - "commented_code": "{\n $this->events?->dispatch($event);\n }\n\n protected function getMigrationBatches($migrations)\n {\n $options = $this->resolveOptions($migrations);\n $selected = $options['selected'] ?? [];\n\n if (count($selected) > 0) {", - "repo_full_name": "laravel/framework", - "discussion_comments": [ - { - "comment_id": "1921142441", - "repo_full_name": "laravel/framework", - "pr_number": 54250, - "pr_file": "src/Illuminate/Database/Migrations/Migrator.php", - "discussion_id": "1921142441", - "commented_code": "@@ -754,4 +758,32 @@ public function fireMigrationEvent($event)\n {\n $this->events?->dispatch($event);\n }\n+\n+ protected function getMigrationBatches($migrations)\n+ {\n+ $options = $this->resolveOptions($migrations);\n+ $selected = $options['selected'] ?? [];\n+\n+ if (count($selected) > 0) {", - "comment_created_at": "2025-01-18T20:01:08+00:00", - "comment_author": "shaedrich", - "comment_body": "Same here:\r\n```suggestion\r\n if ($selected !== []) {\r\n```", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1921142809", - "pr_number": 54250, - "pr_file": "src/Illuminate/Database/Migrations/Migrator.php", - "created_at": "2025-01-18T20:03:19+00:00", - "commented_code": "public function getMigrationFiles($paths)\n {\n return (new Collection($paths))\n ->flatMap(fn ($path) => str_ends_with($path, '.php') ? [$path] : $this->files->glob($path.'/*_*.php'))\n ->flatMap(fn($path) => str_ends_with($path, '.php') ? [$path] : $this->files->glob($path . '/*_*.php'))\n ->filter()\n ->values()\n ->keyBy(fn ($file) => $this->getMigrationName($file))\n ->sortBy(fn ($file, $key) => $key)\n ->keyBy(fn($file) => $this->getMigrationName($file))", - "repo_full_name": "laravel/framework", - "discussion_comments": [ - { - "comment_id": "1921142809", - "repo_full_name": "laravel/framework", - "pr_number": 54250, - "pr_file": "src/Illuminate/Database/Migrations/Migrator.php", - "discussion_id": "1921142809", - "commented_code": "@@ -537,11 +541,11 @@ protected function getMigrationClass(string $migrationName): string\n public function getMigrationFiles($paths)\n {\n return (new Collection($paths))\n- ->flatMap(fn ($path) => str_ends_with($path, '.php') ? [$path] : $this->files->glob($path.'/*_*.php'))\n+ ->flatMap(fn($path) => str_ends_with($path, '.php') ? [$path] : $this->files->glob($path . '/*_*.php'))\n ->filter()\n ->values()\n- ->keyBy(fn ($file) => $this->getMigrationName($file))\n- ->sortBy(fn ($file, $key) => $key)\n+ ->keyBy(fn($file) => $this->getMigrationName($file))", - "comment_created_at": "2025-01-18T20:03:19+00:00", - "comment_author": "shaedrich", - "comment_body": "You might be able to replace it with this:\r\n```suggestion\r\n ->keyBy($this->getMigrationName(...))\r\n```", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/framework-precise-testing-dependency-versioning.json b/_reviewers/framework-precise-testing-dependency-versioning.json new file mode 100644 index 0000000..4d4f9cc --- /dev/null +++ b/_reviewers/framework-precise-testing-dependency-versioning.json @@ -0,0 +1,46 @@ +[ + { + "discussion_id": "1938439791", + "pr_number": 54316, + "pr_file": "composer.json", + "created_at": "2025-02-02T09:02:38+00:00", + "commented_code": "\"league/flysystem-read-only\": \"^3.25.1\",\n \"league/flysystem-sftp-v3\": \"^3.25.1\",\n \"mockery/mockery\": \"^1.6.10\",\n \"orchestra/testbench-core\": \"^9.6\",\n \"orchestra/testbench-core\": \"9.x-dev\",", + "repo_full_name": "laravel/framework", + "discussion_comments": [ + { + "comment_id": "1938439791", + "repo_full_name": "laravel/framework", + "pr_number": 54316, + "pr_file": "composer.json", + "discussion_id": "1938439791", + "commented_code": "@@ -111,11 +111,11 @@\n \"league/flysystem-read-only\": \"^3.25.1\",\n \"league/flysystem-sftp-v3\": \"^3.25.1\",\n \"mockery/mockery\": \"^1.6.10\",\n- \"orchestra/testbench-core\": \"^9.6\",\n+ \"orchestra/testbench-core\": \"9.x-dev\",", + "comment_created_at": "2025-02-02T09:02:38+00:00", + "comment_author": "crynobone", + "comment_body": "```suggestion\r\n \"orchestra/testbench-core\": \"^9.9.3\",\r\n```", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1946329442", + "pr_number": 54316, + "pr_file": "src/Illuminate/Testing/composer.json", + "created_at": "2025-02-07T10:41:38+00:00", + "commented_code": "\"illuminate/database\": \"Required to assert databases (^11.0).\",\n \"illuminate/http\": \"Required to assert responses (^11.0).\",\n \"mockery/mockery\": \"Required to use mocking (^1.6).\",\n \"phpunit/phpunit\": \"Required to use assertions and run tests (^10.5|^11.0).\"\n \"phpunit/phpunit\": \"Required to use assertions and run tests (^10.5.35|^11.3.6|^12.0).\"", + "repo_full_name": "laravel/framework", + "discussion_comments": [ + { + "comment_id": "1946329442", + "repo_full_name": "laravel/framework", + "pr_number": 54316, + "pr_file": "src/Illuminate/Testing/composer.json", + "discussion_id": "1946329442", + "commented_code": "@@ -37,7 +37,7 @@\n \"illuminate/database\": \"Required to assert databases (^11.0).\",\n \"illuminate/http\": \"Required to assert responses (^11.0).\",\n \"mockery/mockery\": \"Required to use mocking (^1.6).\",\n- \"phpunit/phpunit\": \"Required to use assertions and run tests (^10.5|^11.0).\"\n+ \"phpunit/phpunit\": \"Required to use assertions and run tests (^10.5.35|^11.3.6|^12.0).\"", + "comment_created_at": "2025-02-07T10:41:38+00:00", + "comment_author": "crynobone", + "comment_body": "```suggestion\r\n \"phpunit/phpunit\": \"Required to use assertions and run tests (^10.5.35|^11.3.6|^12.0.1).\"\r\n```", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/framework-precise-testing-dependency-versioning.md b/_reviewers/framework-precise-testing-dependency-versioning.md index 1c2e5f8..d5e1510 100644 --- a/_reviewers/framework-precise-testing-dependency-versioning.md +++ b/_reviewers/framework-precise-testing-dependency-versioning.md @@ -30,51 +30,3 @@ Example of practices to avoid: ``` This standard helps prevent subtle test failures due to dependency changes and makes test behavior more predictable across all environments. Using precise version constraints is particularly important for testing frameworks as unexpected behavior in these dependencies can lead to false positives or negatives in your test results. - - -[ - { - "discussion_id": "1938439791", - "pr_number": 54316, - "pr_file": "composer.json", - "created_at": "2025-02-02T09:02:38+00:00", - "commented_code": "\"league/flysystem-read-only\": \"^3.25.1\",\n \"league/flysystem-sftp-v3\": \"^3.25.1\",\n \"mockery/mockery\": \"^1.6.10\",\n \"orchestra/testbench-core\": \"^9.6\",\n \"orchestra/testbench-core\": \"9.x-dev\",", - "repo_full_name": "laravel/framework", - "discussion_comments": [ - { - "comment_id": "1938439791", - "repo_full_name": "laravel/framework", - "pr_number": 54316, - "pr_file": "composer.json", - "discussion_id": "1938439791", - "commented_code": "@@ -111,11 +111,11 @@\n \"league/flysystem-read-only\": \"^3.25.1\",\n \"league/flysystem-sftp-v3\": \"^3.25.1\",\n \"mockery/mockery\": \"^1.6.10\",\n- \"orchestra/testbench-core\": \"^9.6\",\n+ \"orchestra/testbench-core\": \"9.x-dev\",", - "comment_created_at": "2025-02-02T09:02:38+00:00", - "comment_author": "crynobone", - "comment_body": "```suggestion\r\n \"orchestra/testbench-core\": \"^9.9.3\",\r\n```", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1946329442", - "pr_number": 54316, - "pr_file": "src/Illuminate/Testing/composer.json", - "created_at": "2025-02-07T10:41:38+00:00", - "commented_code": "\"illuminate/database\": \"Required to assert databases (^11.0).\",\n \"illuminate/http\": \"Required to assert responses (^11.0).\",\n \"mockery/mockery\": \"Required to use mocking (^1.6).\",\n \"phpunit/phpunit\": \"Required to use assertions and run tests (^10.5|^11.0).\"\n \"phpunit/phpunit\": \"Required to use assertions and run tests (^10.5.35|^11.3.6|^12.0).\"", - "repo_full_name": "laravel/framework", - "discussion_comments": [ - { - "comment_id": "1946329442", - "repo_full_name": "laravel/framework", - "pr_number": 54316, - "pr_file": "src/Illuminate/Testing/composer.json", - "discussion_id": "1946329442", - "commented_code": "@@ -37,7 +37,7 @@\n \"illuminate/database\": \"Required to assert databases (^11.0).\",\n \"illuminate/http\": \"Required to assert responses (^11.0).\",\n \"mockery/mockery\": \"Required to use mocking (^1.6).\",\n- \"phpunit/phpunit\": \"Required to use assertions and run tests (^10.5|^11.0).\"\n+ \"phpunit/phpunit\": \"Required to use assertions and run tests (^10.5.35|^11.3.6|^12.0).\"", - "comment_created_at": "2025-02-07T10:41:38+00:00", - "comment_author": "crynobone", - "comment_body": "```suggestion\r\n \"phpunit/phpunit\": \"Required to use assertions and run tests (^10.5.35|^11.3.6|^12.0.1).\"\r\n```", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/framework-precise-type-annotations.json b/_reviewers/framework-precise-type-annotations.json new file mode 100644 index 0000000..2fc5131 --- /dev/null +++ b/_reviewers/framework-precise-type-annotations.json @@ -0,0 +1,226 @@ +[ + { + "discussion_id": "2144424524", + "pr_number": 56028, + "pr_file": "src/Illuminate/Cache/MemoizedStore.php", + "created_at": "2025-06-13T07:53:41+00:00", + "commented_code": "/**\n * Store multiple items in the cache for a given number of seconds.\n *\n * @param array $values", + "repo_full_name": "laravel/framework", + "discussion_comments": [ + { + "comment_id": "2144424524", + "repo_full_name": "laravel/framework", + "pr_number": 56028, + "pr_file": "src/Illuminate/Cache/MemoizedStore.php", + "discussion_id": "2144424524", + "commented_code": "@@ -108,6 +108,7 @@ public function put($key, $value, $seconds)\n /**\n * Store multiple items in the cache for a given number of seconds.\n *\n+ * @param array $values", + "comment_created_at": "2025-06-13T07:53:41+00:00", + "comment_author": "shaedrich", + "comment_body": "Since `prefix()` expects a `string`, so this can be reflected here:\r\n```suggestion\r\n * @param array $values\r\n```", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2102803004", + "pr_number": 55821, + "pr_file": "src/Illuminate/Support/Str.php", + "created_at": "2025-05-22T15:08:00+00:00", + "commented_code": "static::$camelCache = [];\n static::$studlyCache = [];\n }\n\n /**\n * Split a string by a given separator.\n *\n * @param string $separator\n * @param string $string\n * @param int $limit\n * @return array", + "repo_full_name": "laravel/framework", + "discussion_comments": [ + { + "comment_id": "2102803004", + "repo_full_name": "laravel/framework", + "pr_number": 55821, + "pr_file": "src/Illuminate/Support/Str.php", + "discussion_id": "2102803004", + "commented_code": "@@ -2083,4 +2083,17 @@ public static function flushCache()\n static::$camelCache = [];\n static::$studlyCache = [];\n }\n+\n+ /**\n+ * Split a string by a given separator.\n+ *\n+ * @param string $separator\n+ * @param string $string\n+ * @param int $limit\n+ * @return array", + "comment_created_at": "2025-05-22T15:08:00+00:00", + "comment_author": "shaedrich", + "comment_body": "The `array` will either consist of strings or be empty, so we can narrow this further as well:\r\n```suggestion\r\n * @return string[]\r\n```", + "pr_file_module": null + }, + { + "comment_id": "2102806447", + "repo_full_name": "laravel/framework", + "pr_number": 55821, + "pr_file": "src/Illuminate/Support/Str.php", + "discussion_id": "2102803004", + "commented_code": "@@ -2083,4 +2083,17 @@ public static function flushCache()\n static::$camelCache = [];\n static::$studlyCache = [];\n }\n+\n+ /**\n+ * Split a string by a given separator.\n+ *\n+ * @param string $separator\n+ * @param string $string\n+ * @param int $limit\n+ * @return array", + "comment_created_at": "2025-05-22T15:09:35+00:00", + "comment_author": "shaedrich", + "comment_body": "Ah, damn, sorry, hadn't submitted it and didn't see that it was closed when I hit the 'submit' button an hour later 😅", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2085688431", + "pr_number": 55721, + "pr_file": "src/Illuminate/Database/Eloquent/Relations/MorphToMany.php", + "created_at": "2025-05-12T23:36:35+00:00", + "commented_code": "}\n\n /**\n * Get the pivot models that are currently attached.\n * Get the pivot models that are currently attached, filtered by related model keys.\n *\n * @return \\Illuminate\\Support\\Collection\n * @param mixed $ids\n * @return \\Illuminate\\Support\\Collection", + "repo_full_name": "laravel/framework", + "discussion_comments": [ + { + "comment_id": "2085688431", + "repo_full_name": "laravel/framework", + "pr_number": 55721, + "pr_file": "src/Illuminate/Database/Eloquent/Relations/MorphToMany.php", + "discussion_id": "2085688431", + "commented_code": "@@ -121,13 +121,14 @@ public function getRelationExistenceQuery(Builder $query, Builder $parentQuery,\n }\n \n /**\n- * Get the pivot models that are currently attached.\n+ * Get the pivot models that are currently attached, filtered by related model keys.\n *\n- * @return \\Illuminate\\Support\\Collection\n+ * @param mixed $ids\n+ * @return \\Illuminate\\Support\\Collection", + "comment_created_at": "2025-05-12T23:36:35+00:00", + "comment_author": "shaedrich", + "comment_body": "Shouldn't this stay\r\n```suggestion\r\n * @return \\Illuminate\\Support\\Collection\r\n```\r\n? If there is trouble with static analysis, the [PHPDoc comment in `InteractsWithPivotTable`](https://github.com/laravel/framework/blob/12.x/src/Illuminate/Database/Eloquent/Relations/Concerns/InteractsWithPivotTable.php#L496) should be adjusted accordingly", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2077010982", + "pr_number": 55663, + "pr_file": "src/Illuminate/Database/Query/Builder.php", + "created_at": "2025-05-07T07:45:32+00:00", + "commented_code": "/**\n * Set the columns to be selected.\n *\n * @param array|mixed $columns\n * @param array|mixed $columns", + "repo_full_name": "laravel/framework", + "discussion_comments": [ + { + "comment_id": "2077010982", + "repo_full_name": "laravel/framework", + "pr_number": 55663, + "pr_file": "src/Illuminate/Database/Query/Builder.php", + "discussion_id": "2077010982", + "commented_code": "@@ -278,7 +278,7 @@ public function __construct(\n /**\n * Set the columns to be selected.\n *\n- * @param array|mixed $columns\n+ * @param array|mixed $columns", + "comment_created_at": "2025-05-07T07:45:32+00:00", + "comment_author": "shaedrich", + "comment_body": "This is not primarily to be read by humans but by tools and for them `mixed` and your proposed alternative make a **huge** difference. If you want to make it more bite-sized, you can always resort to `@phpstan-type` or the like", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1993214446", + "pr_number": 54992, + "pr_file": "src/Illuminate/Database/Eloquent/Concerns/HasAttributes.php", + "created_at": "2025-03-13T10:25:43+00:00", + "commented_code": "return $value;\n }\n\n /**\n * Get JSON casting flags for the specified attribute.\n *\n * @param string $key\n * @return int", + "repo_full_name": "laravel/framework", + "discussion_comments": [ + { + "comment_id": "1993214446", + "repo_full_name": "laravel/framework", + "pr_number": 54992, + "pr_file": "src/Illuminate/Database/Eloquent/Concerns/HasAttributes.php", + "discussion_id": "1993214446", + "commented_code": "@@ -1324,15 +1326,33 @@ protected function castAttributeAsJson($key, $value)\n return $value;\n }\n \n+ /**\n+ * Get JSON casting flags for the specified attribute.\n+ *\n+ * @param string $key\n+ * @return int", + "comment_created_at": "2025-03-13T10:25:43+00:00", + "comment_author": "shaedrich", + "comment_body": "Therefore, you can narrow the return here as well:\r\n```suggestion\r\n * @return int-mask-of\r\n```", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1842638038", + "pr_number": 53427, + "pr_file": "src/Illuminate/Console/Scheduling/Schedule.php", + "created_at": "2024-11-14T17:33:19+00:00", + "commented_code": "use Illuminate\\Support\\Traits\\Macroable;\nuse RuntimeException;\n\n/**\n * @mixin ScheduleAttributes", + "repo_full_name": "laravel/framework", + "discussion_comments": [ + { + "comment_id": "1842638038", + "repo_full_name": "laravel/framework", + "pr_number": 53427, + "pr_file": "src/Illuminate/Console/Scheduling/Schedule.php", + "discussion_id": "1842638038", + "commented_code": "@@ -17,9 +18,14 @@\n use Illuminate\\Support\\Traits\\Macroable;\n use RuntimeException;\n \n+/**\n+ * @mixin ScheduleAttributes", + "comment_created_at": "2024-11-14T17:33:19+00:00", + "comment_author": "stevebauman", + "comment_body": "This should be the full namespace:\r\n\r\n```suggestion\r\n * @mixin \\Illuminate\\Console\\Scheduling\\ScheduleAttributes\r\n```", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1910208001", + "pr_number": 54148, + "pr_file": "src/Illuminate/Routing/Controllers/HasMiddleware.php", + "created_at": "2025-01-10T10:57:00+00:00", + "commented_code": "/**\n * Get the middleware that should be assigned to the controller.\n *\n * @return \\Illuminate\\Routing\\Controllers\\Middleware[]\n * @return array", + "repo_full_name": "laravel/framework", + "discussion_comments": [ + { + "comment_id": "1910208001", + "repo_full_name": "laravel/framework", + "pr_number": 54148, + "pr_file": "src/Illuminate/Routing/Controllers/HasMiddleware.php", + "discussion_id": "1910208001", + "commented_code": "@@ -7,7 +7,7 @@ interface HasMiddleware\n /**\n * Get the middleware that should be assigned to the controller.\n *\n- * @return \\Illuminate\\Routing\\Controllers\\Middleware[]\n+ * @return array", + "comment_created_at": "2025-01-10T10:57:00+00:00", + "comment_author": "shaedrich", + "comment_body": "Wouldn't this have sufficed?\r\n```suggestion\r\n * @return (\\Illuminate\\Routing\\Controllers\\Middleware|\\Closure|string)[]\r\n```", + "pr_file_module": null + }, + { + "comment_id": "1910211747", + "repo_full_name": "laravel/framework", + "pr_number": 54148, + "pr_file": "src/Illuminate/Routing/Controllers/HasMiddleware.php", + "discussion_id": "1910208001", + "commented_code": "@@ -7,7 +7,7 @@ interface HasMiddleware\n /**\n * Get the middleware that should be assigned to the controller.\n *\n- * @return \\Illuminate\\Routing\\Controllers\\Middleware[]\n+ * @return array", + "comment_created_at": "2025-01-10T11:00:12+00:00", + "comment_author": "shaedrich", + "comment_body": "Also, is `string` the closest we can get? Have you tried `callable`?\r\n```suggestion\r\n * @return array\r\n```", + "pr_file_module": null + }, + { + "comment_id": "1910225592", + "repo_full_name": "laravel/framework", + "pr_number": 54148, + "pr_file": "src/Illuminate/Routing/Controllers/HasMiddleware.php", + "discussion_id": "1910208001", + "commented_code": "@@ -7,7 +7,7 @@ interface HasMiddleware\n /**\n * Get the middleware that should be assigned to the controller.\n *\n- * @return \\Illuminate\\Routing\\Controllers\\Middleware[]\n+ * @return array", + "comment_created_at": "2025-01-10T11:11:42+00:00", + "comment_author": "willpower232", + "comment_body": "using `(...)[]` seems to work just as well but I can't say I have seen that syntax before and tend to defer to the error output from phpstan which currently uses `array<...>`\r\n\r\nit does seem I can remove `\\Closure|string` and replace with `callable` so that is a nice improvement\r\n\r\nI have also seen `list<...>` is valid now so technically `@return list<\\Illuminate\\Routing\\Controllers\\Middleware|callable>` is an option in this case\r\n\r\nI'll wait for github to support polls and or others to vote on the best combination and way forward :sweat_smile: ", + "pr_file_module": null + }, + { + "comment_id": "1910258359", + "repo_full_name": "laravel/framework", + "pr_number": 54148, + "pr_file": "src/Illuminate/Routing/Controllers/HasMiddleware.php", + "discussion_id": "1910208001", + "commented_code": "@@ -7,7 +7,7 @@ interface HasMiddleware\n /**\n * Get the middleware that should be assigned to the controller.\n *\n- * @return \\Illuminate\\Routing\\Controllers\\Middleware[]\n+ * @return array", + "comment_created_at": "2025-01-10T11:39:49+00:00", + "comment_author": "shaedrich", + "comment_body": "That would actually be great for such cases 😂👍🏻 ", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1898688064", + "pr_number": 54025, + "pr_file": "src/Illuminate/Database/Schema/Blueprint.php", + "created_at": "2024-12-27T19:42:45+00:00", + "commented_code": "return $this->indexCommand('index', $columns, $name, $algorithm);\n }\n\n /**\n * Specify a functional index for the table.\n *\n * This method allows you to create a functional index using MySQL expressions\n * like `LOWER`, `UPPER`, or other supported functions directly on columns.\n *\n * @param string|array $columns The column(s) to include in the index.", + "repo_full_name": "laravel/framework", + "discussion_comments": [ + { + "comment_id": "1898688064", + "repo_full_name": "laravel/framework", + "pr_number": 54025, + "pr_file": "src/Illuminate/Database/Schema/Blueprint.php", + "discussion_id": "1898688064", + "commented_code": "@@ -678,6 +678,37 @@ public function index($columns, $name = null, $algorithm = null)\n return $this->indexCommand('index', $columns, $name, $algorithm);\n }\n \n+ /**\n+ * Specify a functional index for the table.\n+ *\n+ * This method allows you to create a functional index using MySQL expressions\n+ * like `LOWER`, `UPPER`, or other supported functions directly on columns.\n+ *\n+ * @param string|array $columns The column(s) to include in the index.", + "comment_created_at": "2024-12-27T19:42:45+00:00", + "comment_author": "shaedrich", + "comment_body": "This could be improved to read:\r\n```suggestion\r\n * @param string|string[] $columns The column(s) to include in the index.\r\n```", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/framework-precise-type-annotations.md b/_reviewers/framework-precise-type-annotations.md index 35bca1c..40e5518 100644 --- a/_reviewers/framework-precise-type-annotations.md +++ b/_reviewers/framework-precise-type-annotations.md @@ -31,231 +31,3 @@ protected function getJsonCastingFlags($key) ``` Precise type annotations help both developers and tools understand your code better, reducing potential errors and improving maintainability. - - -[ - { - "discussion_id": "2144424524", - "pr_number": 56028, - "pr_file": "src/Illuminate/Cache/MemoizedStore.php", - "created_at": "2025-06-13T07:53:41+00:00", - "commented_code": "/**\n * Store multiple items in the cache for a given number of seconds.\n *\n * @param array $values", - "repo_full_name": "laravel/framework", - "discussion_comments": [ - { - "comment_id": "2144424524", - "repo_full_name": "laravel/framework", - "pr_number": 56028, - "pr_file": "src/Illuminate/Cache/MemoizedStore.php", - "discussion_id": "2144424524", - "commented_code": "@@ -108,6 +108,7 @@ public function put($key, $value, $seconds)\n /**\n * Store multiple items in the cache for a given number of seconds.\n *\n+ * @param array $values", - "comment_created_at": "2025-06-13T07:53:41+00:00", - "comment_author": "shaedrich", - "comment_body": "Since `prefix()` expects a `string`, so this can be reflected here:\r\n```suggestion\r\n * @param array $values\r\n```", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2102803004", - "pr_number": 55821, - "pr_file": "src/Illuminate/Support/Str.php", - "created_at": "2025-05-22T15:08:00+00:00", - "commented_code": "static::$camelCache = [];\n static::$studlyCache = [];\n }\n\n /**\n * Split a string by a given separator.\n *\n * @param string $separator\n * @param string $string\n * @param int $limit\n * @return array", - "repo_full_name": "laravel/framework", - "discussion_comments": [ - { - "comment_id": "2102803004", - "repo_full_name": "laravel/framework", - "pr_number": 55821, - "pr_file": "src/Illuminate/Support/Str.php", - "discussion_id": "2102803004", - "commented_code": "@@ -2083,4 +2083,17 @@ public static function flushCache()\n static::$camelCache = [];\n static::$studlyCache = [];\n }\n+\n+ /**\n+ * Split a string by a given separator.\n+ *\n+ * @param string $separator\n+ * @param string $string\n+ * @param int $limit\n+ * @return array", - "comment_created_at": "2025-05-22T15:08:00+00:00", - "comment_author": "shaedrich", - "comment_body": "The `array` will either consist of strings or be empty, so we can narrow this further as well:\r\n```suggestion\r\n * @return string[]\r\n```", - "pr_file_module": null - }, - { - "comment_id": "2102806447", - "repo_full_name": "laravel/framework", - "pr_number": 55821, - "pr_file": "src/Illuminate/Support/Str.php", - "discussion_id": "2102803004", - "commented_code": "@@ -2083,4 +2083,17 @@ public static function flushCache()\n static::$camelCache = [];\n static::$studlyCache = [];\n }\n+\n+ /**\n+ * Split a string by a given separator.\n+ *\n+ * @param string $separator\n+ * @param string $string\n+ * @param int $limit\n+ * @return array", - "comment_created_at": "2025-05-22T15:09:35+00:00", - "comment_author": "shaedrich", - "comment_body": "Ah, damn, sorry, hadn't submitted it and didn't see that it was closed when I hit the 'submit' button an hour later \ud83d\ude05", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2085688431", - "pr_number": 55721, - "pr_file": "src/Illuminate/Database/Eloquent/Relations/MorphToMany.php", - "created_at": "2025-05-12T23:36:35+00:00", - "commented_code": "}\n\n /**\n * Get the pivot models that are currently attached.\n * Get the pivot models that are currently attached, filtered by related model keys.\n *\n * @return \\Illuminate\\Support\\Collection\n * @param mixed $ids\n * @return \\Illuminate\\Support\\Collection", - "repo_full_name": "laravel/framework", - "discussion_comments": [ - { - "comment_id": "2085688431", - "repo_full_name": "laravel/framework", - "pr_number": 55721, - "pr_file": "src/Illuminate/Database/Eloquent/Relations/MorphToMany.php", - "discussion_id": "2085688431", - "commented_code": "@@ -121,13 +121,14 @@ public function getRelationExistenceQuery(Builder $query, Builder $parentQuery,\n }\n \n /**\n- * Get the pivot models that are currently attached.\n+ * Get the pivot models that are currently attached, filtered by related model keys.\n *\n- * @return \\Illuminate\\Support\\Collection\n+ * @param mixed $ids\n+ * @return \\Illuminate\\Support\\Collection", - "comment_created_at": "2025-05-12T23:36:35+00:00", - "comment_author": "shaedrich", - "comment_body": "Shouldn't this stay\r\n```suggestion\r\n * @return \\Illuminate\\Support\\Collection\r\n```\r\n? If there is trouble with static analysis, the [PHPDoc comment in `InteractsWithPivotTable`](https://github.com/laravel/framework/blob/12.x/src/Illuminate/Database/Eloquent/Relations/Concerns/InteractsWithPivotTable.php#L496) should be adjusted accordingly", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2077010982", - "pr_number": 55663, - "pr_file": "src/Illuminate/Database/Query/Builder.php", - "created_at": "2025-05-07T07:45:32+00:00", - "commented_code": "/**\n * Set the columns to be selected.\n *\n * @param array|mixed $columns\n * @param array|mixed $columns", - "repo_full_name": "laravel/framework", - "discussion_comments": [ - { - "comment_id": "2077010982", - "repo_full_name": "laravel/framework", - "pr_number": 55663, - "pr_file": "src/Illuminate/Database/Query/Builder.php", - "discussion_id": "2077010982", - "commented_code": "@@ -278,7 +278,7 @@ public function __construct(\n /**\n * Set the columns to be selected.\n *\n- * @param array|mixed $columns\n+ * @param array|mixed $columns", - "comment_created_at": "2025-05-07T07:45:32+00:00", - "comment_author": "shaedrich", - "comment_body": "This is not primarily to be read by humans but by tools and for them `mixed` and your proposed alternative make a **huge** difference. If you want to make it more bite-sized, you can always resort to `@phpstan-type` or the like", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1993214446", - "pr_number": 54992, - "pr_file": "src/Illuminate/Database/Eloquent/Concerns/HasAttributes.php", - "created_at": "2025-03-13T10:25:43+00:00", - "commented_code": "return $value;\n }\n\n /**\n * Get JSON casting flags for the specified attribute.\n *\n * @param string $key\n * @return int", - "repo_full_name": "laravel/framework", - "discussion_comments": [ - { - "comment_id": "1993214446", - "repo_full_name": "laravel/framework", - "pr_number": 54992, - "pr_file": "src/Illuminate/Database/Eloquent/Concerns/HasAttributes.php", - "discussion_id": "1993214446", - "commented_code": "@@ -1324,15 +1326,33 @@ protected function castAttributeAsJson($key, $value)\n return $value;\n }\n \n+ /**\n+ * Get JSON casting flags for the specified attribute.\n+ *\n+ * @param string $key\n+ * @return int", - "comment_created_at": "2025-03-13T10:25:43+00:00", - "comment_author": "shaedrich", - "comment_body": "Therefore, you can narrow the return here as well:\r\n```suggestion\r\n * @return int-mask-of\r\n```", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1842638038", - "pr_number": 53427, - "pr_file": "src/Illuminate/Console/Scheduling/Schedule.php", - "created_at": "2024-11-14T17:33:19+00:00", - "commented_code": "use Illuminate\\Support\\Traits\\Macroable;\nuse RuntimeException;\n\n/**\n * @mixin ScheduleAttributes", - "repo_full_name": "laravel/framework", - "discussion_comments": [ - { - "comment_id": "1842638038", - "repo_full_name": "laravel/framework", - "pr_number": 53427, - "pr_file": "src/Illuminate/Console/Scheduling/Schedule.php", - "discussion_id": "1842638038", - "commented_code": "@@ -17,9 +18,14 @@\n use Illuminate\\Support\\Traits\\Macroable;\n use RuntimeException;\n \n+/**\n+ * @mixin ScheduleAttributes", - "comment_created_at": "2024-11-14T17:33:19+00:00", - "comment_author": "stevebauman", - "comment_body": "This should be the full namespace:\r\n\r\n```suggestion\r\n * @mixin \\Illuminate\\Console\\Scheduling\\ScheduleAttributes\r\n```", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1910208001", - "pr_number": 54148, - "pr_file": "src/Illuminate/Routing/Controllers/HasMiddleware.php", - "created_at": "2025-01-10T10:57:00+00:00", - "commented_code": "/**\n * Get the middleware that should be assigned to the controller.\n *\n * @return \\Illuminate\\Routing\\Controllers\\Middleware[]\n * @return array", - "repo_full_name": "laravel/framework", - "discussion_comments": [ - { - "comment_id": "1910208001", - "repo_full_name": "laravel/framework", - "pr_number": 54148, - "pr_file": "src/Illuminate/Routing/Controllers/HasMiddleware.php", - "discussion_id": "1910208001", - "commented_code": "@@ -7,7 +7,7 @@ interface HasMiddleware\n /**\n * Get the middleware that should be assigned to the controller.\n *\n- * @return \\Illuminate\\Routing\\Controllers\\Middleware[]\n+ * @return array", - "comment_created_at": "2025-01-10T10:57:00+00:00", - "comment_author": "shaedrich", - "comment_body": "Wouldn't this have sufficed?\r\n```suggestion\r\n * @return (\\Illuminate\\Routing\\Controllers\\Middleware|\\Closure|string)[]\r\n```", - "pr_file_module": null - }, - { - "comment_id": "1910211747", - "repo_full_name": "laravel/framework", - "pr_number": 54148, - "pr_file": "src/Illuminate/Routing/Controllers/HasMiddleware.php", - "discussion_id": "1910208001", - "commented_code": "@@ -7,7 +7,7 @@ interface HasMiddleware\n /**\n * Get the middleware that should be assigned to the controller.\n *\n- * @return \\Illuminate\\Routing\\Controllers\\Middleware[]\n+ * @return array", - "comment_created_at": "2025-01-10T11:00:12+00:00", - "comment_author": "shaedrich", - "comment_body": "Also, is `string` the closest we can get? Have you tried `callable`?\r\n```suggestion\r\n * @return array\r\n```", - "pr_file_module": null - }, - { - "comment_id": "1910225592", - "repo_full_name": "laravel/framework", - "pr_number": 54148, - "pr_file": "src/Illuminate/Routing/Controllers/HasMiddleware.php", - "discussion_id": "1910208001", - "commented_code": "@@ -7,7 +7,7 @@ interface HasMiddleware\n /**\n * Get the middleware that should be assigned to the controller.\n *\n- * @return \\Illuminate\\Routing\\Controllers\\Middleware[]\n+ * @return array", - "comment_created_at": "2025-01-10T11:11:42+00:00", - "comment_author": "willpower232", - "comment_body": "using `(...)[]` seems to work just as well but I can't say I have seen that syntax before and tend to defer to the error output from phpstan which currently uses `array<...>`\r\n\r\nit does seem I can remove `\\Closure|string` and replace with `callable` so that is a nice improvement\r\n\r\nI have also seen `list<...>` is valid now so technically `@return list<\\Illuminate\\Routing\\Controllers\\Middleware|callable>` is an option in this case\r\n\r\nI'll wait for github to support polls and or others to vote on the best combination and way forward :sweat_smile: ", - "pr_file_module": null - }, - { - "comment_id": "1910258359", - "repo_full_name": "laravel/framework", - "pr_number": 54148, - "pr_file": "src/Illuminate/Routing/Controllers/HasMiddleware.php", - "discussion_id": "1910208001", - "commented_code": "@@ -7,7 +7,7 @@ interface HasMiddleware\n /**\n * Get the middleware that should be assigned to the controller.\n *\n- * @return \\Illuminate\\Routing\\Controllers\\Middleware[]\n+ * @return array", - "comment_created_at": "2025-01-10T11:39:49+00:00", - "comment_author": "shaedrich", - "comment_body": "That would actually be great for such cases \ud83d\ude02\ud83d\udc4d\ud83c\udffb ", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1898688064", - "pr_number": 54025, - "pr_file": "src/Illuminate/Database/Schema/Blueprint.php", - "created_at": "2024-12-27T19:42:45+00:00", - "commented_code": "return $this->indexCommand('index', $columns, $name, $algorithm);\n }\n\n /**\n * Specify a functional index for the table.\n *\n * This method allows you to create a functional index using MySQL expressions\n * like `LOWER`, `UPPER`, or other supported functions directly on columns.\n *\n * @param string|array $columns The column(s) to include in the index.", - "repo_full_name": "laravel/framework", - "discussion_comments": [ - { - "comment_id": "1898688064", - "repo_full_name": "laravel/framework", - "pr_number": 54025, - "pr_file": "src/Illuminate/Database/Schema/Blueprint.php", - "discussion_id": "1898688064", - "commented_code": "@@ -678,6 +678,37 @@ public function index($columns, $name = null, $algorithm = null)\n return $this->indexCommand('index', $columns, $name, $algorithm);\n }\n \n+ /**\n+ * Specify a functional index for the table.\n+ *\n+ * This method allows you to create a functional index using MySQL expressions\n+ * like `LOWER`, `UPPER`, or other supported functions directly on columns.\n+ *\n+ * @param string|array $columns The column(s) to include in the index.", - "comment_created_at": "2024-12-27T19:42:45+00:00", - "comment_author": "shaedrich", - "comment_body": "This could be improved to read:\r\n```suggestion\r\n * @param string|string[] $columns The column(s) to include in the index.\r\n```", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/framework-use-modern-phpunit-attributes.json b/_reviewers/framework-use-modern-phpunit-attributes.json new file mode 100644 index 0000000..97e12fa --- /dev/null +++ b/_reviewers/framework-use-modern-phpunit-attributes.json @@ -0,0 +1,240 @@ +[ + { + "discussion_id": "2174034287", + "pr_number": 56169, + "pr_file": "tests/Database/DatabaseEloquentBuilderFindOrFailWithEnumTest.php", + "created_at": "2025-06-30T00:51:48+00:00", + "commented_code": "createSchema();\n }\n\n protected function tearDown(): void\n {\n Schema::drop('test_models');\n\n parent::tearDown();\n }\n\n protected function getEnvironmentSetUp($app)\n {\n $app['config']->set('database.default', 'testing');\n }\n\n #[Test]\n public function it_finds_existing_model_when_using_enum_id()", + "repo_full_name": "laravel/framework", + "discussion_comments": [ + { + "comment_id": "2174034287", + "repo_full_name": "laravel/framework", + "pr_number": 56169, + "pr_file": "tests/Database/DatabaseEloquentBuilderFindOrFailWithEnumTest.php", + "discussion_id": "2174034287", + "commented_code": "@@ -0,0 +1,74 @@\n+createSchema();\n+ }\n+\n+ protected function tearDown(): void\n+ {\n+ Schema::drop('test_models');\n+\n+ parent::tearDown();\n+ }\n+\n+ protected function getEnvironmentSetUp($app)\n+ {\n+ $app['config']->set('database.default', 'testing');\n+ }\n+\n+ #[Test]\n+ public function it_finds_existing_model_when_using_enum_id()", + "comment_created_at": "2025-06-30T00:51:48+00:00", + "comment_author": "crynobone", + "comment_body": "We don't use `Test` attribute in the framework, prefix the method name with `test` and use studly case.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2044226331", + "pr_number": 55412, + "pr_file": "tests/Database/DatabaseEloquentWithPropertyHooksTest.php", + "created_at": "2025-04-15T10:38:23+00:00", + "commented_code": "markTestSkipped(\n 'Property Hooks are not available to test in PHP '.PHP_MAJOR_VERSION.'.'.PHP_MINOR_VERSION,\n );\n }", + "repo_full_name": "laravel/framework", + "discussion_comments": [ + { + "comment_id": "2044226331", + "repo_full_name": "laravel/framework", + "pr_number": 55412, + "pr_file": "tests/Database/DatabaseEloquentWithPropertyHooksTest.php", + "discussion_id": "2044226331", + "commented_code": "@@ -0,0 +1,178 @@\n+markTestSkipped(\n+ 'Property Hooks are not available to test in PHP '.PHP_MAJOR_VERSION.'.'.PHP_MINOR_VERSION,\n+ );\n+ }", + "comment_created_at": "2025-04-15T10:38:23+00:00", + "comment_author": "shaedrich", + "comment_body": "You could alternatively use [PHPUnit's attribute annotations](https://docs.phpunit.de/en/10.5/attributes.html#requiresphp) to achieve this if you like:\r\n```suggestion\r\n#[RequiresPhp('8.4.0')]\r\nclass DatabaseEloquentWithPropertyHooksTest extends TestCase\r\n{\r\n protected function setUp(): void\r\n {\r\n parent::setUp();\r\n```", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2013914054", + "pr_number": 55173, + "pr_file": "tests/Database/DatabaseEloquentModelTest.php", + "created_at": "2025-03-26T11:11:12+00:00", + "commented_code": "$this->assertTrue($model->hasCast('enumAttribute', StringStatus::class));\n }\n\n public function testHasCastsOnBcmathNumber()\n {\n if (! $this->isBcMathNumberSupported()) {\n $this->markTestSkipped('Requires BcMath\\Number class.');\n }", + "repo_full_name": "laravel/framework", + "discussion_comments": [ + { + "comment_id": "2013914054", + "repo_full_name": "laravel/framework", + "pr_number": 55173, + "pr_file": "tests/Database/DatabaseEloquentModelTest.php", + "discussion_id": "2013914054", + "commented_code": "@@ -533,6 +534,35 @@ public function testHasCastsOnEnumAttribute()\n $this->assertTrue($model->hasCast('enumAttribute', StringStatus::class));\n }\n \n+ public function testHasCastsOnBcmathNumber()\n+ {\n+ if (! $this->isBcMathNumberSupported()) {\n+ $this->markTestSkipped('Requires BcMath\\Number class.');\n+ }", + "comment_created_at": "2025-03-26T11:11:12+00:00", + "comment_author": "shaedrich", + "comment_body": "Alternatively, you could use with attributes:\r\n```suggestion\r\n #[RequiresPhp('^8.4')]\r\n public function testHasCastsOnBcmathNumber()\r\n {\r\n```\r\nor\r\n```suggestion\r\n #[RequiresPhpExtension('bcmath')]\r\n public function testHasCastsOnBcmathNumber()\r\n {\r\n```\r\nor\r\n```suggestion\r\n #[RequiresMethod(Number::class, '__construct')]\r\n public function testHasCastsOnBcmathNumber()\r\n {\r\n```", + "pr_file_module": null + }, + { + "comment_id": "2013957686", + "repo_full_name": "laravel/framework", + "pr_number": 55173, + "pr_file": "tests/Database/DatabaseEloquentModelTest.php", + "discussion_id": "2013914054", + "commented_code": "@@ -533,6 +534,35 @@ public function testHasCastsOnEnumAttribute()\n $this->assertTrue($model->hasCast('enumAttribute', StringStatus::class));\n }\n \n+ public function testHasCastsOnBcmathNumber()\n+ {\n+ if (! $this->isBcMathNumberSupported()) {\n+ $this->markTestSkipped('Requires BcMath\\Number class.');\n+ }", + "comment_created_at": "2025-03-26T11:41:46+00:00", + "comment_author": "KentarouTakeda", + "comment_body": "Thank you for letting me know something I didn't know. That's a good way to write it!\r\n\r\nI considered the possibility of testing using polyfills, so I decided to check with `RequiresMethod`.", + "pr_file_module": null + }, + { + "comment_id": "2014338557", + "repo_full_name": "laravel/framework", + "pr_number": 55173, + "pr_file": "tests/Database/DatabaseEloquentModelTest.php", + "discussion_id": "2013914054", + "commented_code": "@@ -533,6 +534,35 @@ public function testHasCastsOnEnumAttribute()\n $this->assertTrue($model->hasCast('enumAttribute', StringStatus::class));\n }\n \n+ public function testHasCastsOnBcmathNumber()\n+ {\n+ if (! $this->isBcMathNumberSupported()) {\n+ $this->markTestSkipped('Requires BcMath\\Number class.');\n+ }", + "comment_created_at": "2025-03-26T14:41:53+00:00", + "comment_author": "shaedrich", + "comment_body": "You're welcome—always happy to introduce someone to something new 👍🏻 ", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1923828069", + "pr_number": 54285, + "pr_file": "tests/Support/SupportStrTest.php", + "created_at": "2025-01-21T14:26:55+00:00", + "commented_code": "$this->assertSame($expected, Str::chopEnd($subject, $needle));\n }\n }\n\n public function testSanitize()\n {\n foreach ([\n ['Hello', 'Hello', null],\n [123, '123', null],\n [null, null, null],\n ['', '', null],\n ['Hello', 'Hello', null],\n ['', '', null],\n ['', '', null],\n ['', '', null],\n ['Click me', 'Click me', null],\n ['', '', (new HtmlSanitizerConfig)->allowElement('img', 'src')->allowRelativeMedias()],\n ['', '', (new HtmlSanitizerConfig)->allowElement('img', 'src')->forceHttpsUrls()],\n ['', '', (new HtmlSanitizerConfig)->allowElement('span')->allowAttribute('data-attr', '*')],\n ] as $value) {\n [$subject, $expected, $config] = $value;\n\n $this->assertSame($expected, Str::sanitize($subject, $config));\n }\n }", + "repo_full_name": "laravel/framework", + "discussion_comments": [ + { + "comment_id": "1923828069", + "repo_full_name": "laravel/framework", + "pr_number": 54285, + "pr_file": "tests/Support/SupportStrTest.php", + "discussion_id": "1923828069", + "commented_code": "@@ -1635,6 +1636,28 @@ public function testChopEnd()\n $this->assertSame($expected, Str::chopEnd($subject, $needle));\n }\n }\n+\n+ public function testSanitize()\n+ {\n+ foreach ([\n+ ['Hello', 'Hello', null],\n+ [123, '123', null],\n+ [null, null, null],\n+ ['', '', null],\n+ ['Hello', 'Hello', null],\n+ ['', '', null],\n+ ['', '', null],\n+ ['', '', null],\n+ ['Click me', 'Click me', null],\n+ ['', '', (new HtmlSanitizerConfig)->allowElement('img', 'src')->allowRelativeMedias()],\n+ ['', '', (new HtmlSanitizerConfig)->allowElement('img', 'src')->forceHttpsUrls()],\n+ ['', '', (new HtmlSanitizerConfig)->allowElement('span')->allowAttribute('data-attr', '*')],\n+ ] as $value) {\n+ [$subject, $expected, $config] = $value;\n+\n+ $this->assertSame($expected, Str::sanitize($subject, $config));\n+ }\n+ }", + "comment_created_at": "2025-01-21T14:26:55+00:00", + "comment_author": "shaedrich", + "comment_body": "You can use the [`#[DataProvider]` attribute](https://docs.phpunit.de/en/11.5/attributes.html#data-provider) here\r\n```suggestion\r\n #[DataProvider('provideStrSanatizeTestStrings')\r\n public function testSanitize(?string $subject, ?string $expected, ?HtmlSanitizerConfig $config)\r\n {\r\n $this->assertSame($expected, Str::sanitize($subject, $config));\r\n }\r\n \r\n public static function provideStrSanatizeTestStrings()\r\n {\r\n return [\r\n 'non-empty string is returned as is' => ['Hello', 'Hello', null],\r\n 'stringified number' => [123, '123', null],\r\n 'null is returned as is' => [null, null, null],\r\n 'empty string is returned as is' => ['', '', null],\r\n 'XSS attack in string is sanitized' => ['Hello', 'Hello', null],\r\n 'XSS attack is sanitized to empty string' => ['', '', null],\r\n 'data attribute in HTML element is sanitized' => ['', '', null],\r\n 'event handler in HTML element is sanitized' => ['', '', null],\r\n 'inline JavaScript in HTML hyperlink element is sanitized' => ['Click me', 'Click me', null],\r\n 'HTML image element is returned as is' => ['', '', (new HtmlSanitizerConfig)->allowElement('img', 'src')->allowRelativeMedias()],\r\n 'HTML image element src URL is rewritten to use TLS' => ['', '', (new HtmlSanitizerConfig)->allowElement('img', 'src')->forceHttpsUrls()],\r\n 'data attribute in HTML element is not sanitized' => ['', '', (new HtmlSanitizerConfig)->allowElement('span')->allowAttribute('data-attr', '*')],\r\n ];\r\n }\r\n```", + "pr_file_module": null + }, + { + "comment_id": "1923834352", + "repo_full_name": "laravel/framework", + "pr_number": 54285, + "pr_file": "tests/Support/SupportStrTest.php", + "discussion_id": "1923828069", + "commented_code": "@@ -1635,6 +1636,28 @@ public function testChopEnd()\n $this->assertSame($expected, Str::chopEnd($subject, $needle));\n }\n }\n+\n+ public function testSanitize()\n+ {\n+ foreach ([\n+ ['Hello', 'Hello', null],\n+ [123, '123', null],\n+ [null, null, null],\n+ ['', '', null],\n+ ['Hello', 'Hello', null],\n+ ['', '', null],\n+ ['', '', null],\n+ ['', '', null],\n+ ['Click me', 'Click me', null],\n+ ['', '', (new HtmlSanitizerConfig)->allowElement('img', 'src')->allowRelativeMedias()],\n+ ['', '', (new HtmlSanitizerConfig)->allowElement('img', 'src')->forceHttpsUrls()],\n+ ['', '', (new HtmlSanitizerConfig)->allowElement('span')->allowAttribute('data-attr', '*')],\n+ ] as $value) {\n+ [$subject, $expected, $config] = $value;\n+\n+ $this->assertSame($expected, Str::sanitize($subject, $config));\n+ }\n+ }", + "comment_created_at": "2025-01-21T14:30:35+00:00", + "comment_author": "shaedrich", + "comment_body": "Alternatively, you can use the [`[#TestWith]` attribute](https://docs.phpunit.de/en/11.5/attributes.html#testwith):\r\n```suggestion\r\n #[TestWith(['Hello', 'Hello', null])\r\n #[TestWith([123, '123', null])\r\n #[TestWith([null, null, null])\r\n #[TestWith(['', '', null])\r\n #[TestWith(['Hello', 'Hello', null])\r\n #[TestWith(['', '', null])\r\n #[TestWith(['', '', null])\r\n #[TestWith(['', '', null])\r\n #[TestWith(['Click me', 'Click me', null])\r\n #[TestWith(['', '', (new HtmlSanitizerConfig)->allowElement('img', 'src')->allowRelativeMedias()])\r\n #[TestWith(['', '', (new HtmlSanitizerConfig)->allowElement('img', 'src')->forceHttpsUrls()])\r\n #[TestWith(['', '', (new HtmlSanitizerConfig)->allowElement('span')->allowAttribute('data-attr', '*')])\r\n public function testSanitize(?string $subject, ?string $expected, ?HtmlSanitizerConfig $config)\r\n {\r\n this->assertSame($expected, Str::sanitize($subject, $config));\r\n }\r\n```", + "pr_file_module": null + }, + { + "comment_id": "1923872921", + "repo_full_name": "laravel/framework", + "pr_number": 54285, + "pr_file": "tests/Support/SupportStrTest.php", + "discussion_id": "1923828069", + "commented_code": "@@ -1635,6 +1636,28 @@ public function testChopEnd()\n $this->assertSame($expected, Str::chopEnd($subject, $needle));\n }\n }\n+\n+ public function testSanitize()\n+ {\n+ foreach ([\n+ ['Hello', 'Hello', null],\n+ [123, '123', null],\n+ [null, null, null],\n+ ['', '', null],\n+ ['Hello', 'Hello', null],\n+ ['', '', null],\n+ ['', '', null],\n+ ['', '', null],\n+ ['Click me', 'Click me', null],\n+ ['', '', (new HtmlSanitizerConfig)->allowElement('img', 'src')->allowRelativeMedias()],\n+ ['', '', (new HtmlSanitizerConfig)->allowElement('img', 'src')->forceHttpsUrls()],\n+ ['', '', (new HtmlSanitizerConfig)->allowElement('span')->allowAttribute('data-attr', '*')],\n+ ] as $value) {\n+ [$subject, $expected, $config] = $value;\n+\n+ $this->assertSame($expected, Str::sanitize($subject, $config));\n+ }\n+ }", + "comment_created_at": "2025-01-21T14:53:07+00:00", + "comment_author": "jannescb", + "comment_body": "Thanks!\r\n\r\nI added/slightly modified your suggestions 👍 ", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1919808012", + "pr_number": 54226, + "pr_file": "tests/Validation/ValidationEmailRuleTest.php", + "created_at": "2025-01-17T09:26:19+00:00", + "commented_code": "$customValidationMessage ? [self::ATTRIBUTE.'.email' => $customValidationMessage] : []\n );\n\n $this->assertSame($expectToPass, $v->passes());\n $this->assertSame($expectToPass, $v->passes(), 'Expected email input '.$value.' to '.($expectToPass ? 'pass' : 'fail').'.');", + "repo_full_name": "laravel/framework", + "discussion_comments": [ + { + "comment_id": "1919808012", + "repo_full_name": "laravel/framework", + "pr_number": 54226, + "pr_file": "tests/Validation/ValidationEmailRuleTest.php", + "discussion_id": "1919808012", + "commented_code": "@@ -80,11 +81,12 @@ protected function assertValidationRules($rule, $values, $expectToPass, $expecte\n $customValidationMessage ? [self::ATTRIBUTE.'.email' => $customValidationMessage] : []\n );\n \n- $this->assertSame($expectToPass, $v->passes());\n+ $this->assertSame($expectToPass, $v->passes(), 'Expected email input '.$value.' to '.($expectToPass ? 'pass' : 'fail').'.');", + "comment_created_at": "2025-01-17T09:26:19+00:00", + "comment_author": "shaedrich", + "comment_body": "You might want to use a [`#[DataProvider]`](https://docs.phpunit.de/en/10.5/attributes.html#data-provider) for this", + "pr_file_module": null + }, + { + "comment_id": "1919924762", + "repo_full_name": "laravel/framework", + "pr_number": 54226, + "pr_file": "tests/Validation/ValidationEmailRuleTest.php", + "discussion_id": "1919808012", + "commented_code": "@@ -80,11 +81,12 @@ protected function assertValidationRules($rule, $values, $expectToPass, $expecte\n $customValidationMessage ? [self::ATTRIBUTE.'.email' => $customValidationMessage] : []\n );\n \n- $this->assertSame($expectToPass, $v->passes());\n+ $this->assertSame($expectToPass, $v->passes(), 'Expected email input '.$value.' to '.($expectToPass ? 'pass' : 'fail').'.');", + "comment_created_at": "2025-01-17T10:44:46+00:00", + "comment_author": "SanderMuller", + "comment_body": "Good point, updated to `#[TestWith]`", + "pr_file_module": null + }, + { + "comment_id": "1920378618", + "repo_full_name": "laravel/framework", + "pr_number": 54226, + "pr_file": "tests/Validation/ValidationEmailRuleTest.php", + "discussion_id": "1919808012", + "commented_code": "@@ -80,11 +81,12 @@ protected function assertValidationRules($rule, $values, $expectToPass, $expecte\n $customValidationMessage ? [self::ATTRIBUTE.'.email' => $customValidationMessage] : []\n );\n \n- $this->assertSame($expectToPass, $v->passes());\n+ $this->assertSame($expectToPass, $v->passes(), 'Expected email input '.$value.' to '.($expectToPass ? 'pass' : 'fail').'.');", + "comment_created_at": "2025-01-17T15:42:51+00:00", + "comment_author": "shaedrich", + "comment_body": "Nice, never used it but looks neat 👍🏻 ", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1919809801", + "pr_number": 54226, + "pr_file": "tests/Validation/ValidationEmailRuleTest.php", + "created_at": "2025-01-17T09:27:32+00:00", + "commented_code": "$this->assertValidationRules($rule, $values, true);\n }\n\n public function testStrict()\n public function testRfcCompliantStrict()\n {\n $emailThatFailsBothNonStrictButFailsInStrict = 'username@sub..example.com';\n $emailThatPassesNonStrictButFailsInStrict = '\"has space\"@example.com';\n $emailThatPassesBothNonStrictAndInStrict = 'plainaddress@example.com';\n\n $this->fails(\n (new Email())->rfcCompliant(true),\n 'invalid.@example.com',\n ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.'],\n (new Email())->rfcCompliant(strict: true),\n $emailThatPassesNonStrictButFailsInStrict,\n ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.']\n );\n\n $this->fails(\n Rule::email()->rfcCompliant(true),\n 'invalid.@example.com',\n ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.'],\n Rule::email()->rfcCompliant(strict: true),\n $emailThatPassesNonStrictButFailsInStrict,\n ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.']\n );\n\n $this->fails(\n (new Email())->rfcCompliant(true),\n 'username@sub..example.com',\n ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.'],\n (new Email())->rfcCompliant(strict: true),\n $emailThatFailsBothNonStrictButFailsInStrict,\n ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.']\n );\n\n $this->fails(\n Rule::email()->rfcCompliant(true),\n 'username@sub..example.com',\n ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.'],\n Rule::email()->rfcCompliant(strict: true),\n $emailThatFailsBothNonStrictButFailsInStrict,\n ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.']\n );\n\n $this->passes(\n (new Email())->rfcCompliant(true),\n 'plainaddress@example.com',\n (new Email())->rfcCompliant(strict: true),\n $emailThatPassesBothNonStrictAndInStrict\n );\n\n $this->passes(\n Rule::email()->rfcCompliant(true),\n 'plainaddress@example.com',\n Rule::email()->rfcCompliant(strict: true),\n $emailThatPassesBothNonStrictAndInStrict\n );\n }\n\n public function testDns()\n public function testValidateMxRecord()\n {\n $this->fails(\n (new Email())->validateMxRecord(),\n 'plainaddress@example.com',\n ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.'],\n ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.']\n );\n\n $this->fails(\n Rule::email()->validateMxRecord(),\n 'plainaddress@example.com',\n ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.'],\n ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.']\n );\n\n $this->passes(\n (new Email())->validateMxRecord(),\n 'taylor@laravel.com',\n 'taylor@laravel.com'\n );\n\n $this->passes(\n Rule::email()->validateMxRecord(),\n 'taylor@laravel.com',\n 'taylor@laravel.com'\n );\n }\n\n public function testSpoof()\n public function testPreventSpoofing()\n {\n $this->fails(\n (new Email())->preventSpoofing(),\n 'admin@examрle.com',// Contains a Cyrillic 'р' (U+0440), not a Latin 'p'\n ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.'],\n ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.']\n );\n\n $this->fails(\n Rule::email()->preventSpoofing(),\n 'admin@examрle.com',// Contains a Cyrillic 'р' (U+0440), not a Latin 'p'\n ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.'],\n ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.']\n );\n\n $spoofingEmail = 'admin@exam'.\"\\u{0440}\".'le.com';\n $this->fails(\n (new Email())->preventSpoofing(),\n $spoofingEmail,\n ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.'],\n ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.']\n );\n\n $this->fails(\n Rule::email()->preventSpoofing(),\n $spoofingEmail,\n ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.'],\n ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.']\n );\n\n $this->passes(\n (new Email())->preventSpoofing(),\n 'admin@example.com',\n 'admin@example.com'\n );\n\n $this->passes(\n Rule::email()->preventSpoofing(),\n 'admin@example.com',\n 'admin@example.com'\n );\n\n $this->passes(\n (new Email())->preventSpoofing(),\n 'test👨‍💻@domain.com',\n 'test👨‍💻@domain.com'\n );\n\n $this->passes(\n Rule::email()->preventSpoofing(),\n 'test👨‍💻@domain.com',\n 'test👨‍💻@domain.com'\n );\n }\n\n public function testFilter()\n public function testWithNativeValidation()\n {\n $this->fails(\n (new Email())->withNativeValidation(),\n 'tést@domain.com',\n ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.'],\n ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.']\n );\n\n $this->fails(\n Rule::email()->withNativeValidation(),\n 'tést@domain.com',\n ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.'],\n ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.']\n );\n\n $this->passes(\n (new Email())->withNativeValidation(),\n 'admin@example.com',\n 'admin@example.com'\n );\n\n $this->passes(\n Rule::email()->withNativeValidation(),\n 'admin@example.com',\n 'admin@example.com'\n );\n }\n\n public function testFilterUnicode()\n public function testWithNativeValidationAllowUnicode()\n {\n $this->fails(\n (new Email())->withNativeValidation(true),\n (new Email())->withNativeValidation(allowUnicode: true),\n 'invalid.@example.com',\n ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.'],\n ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.']\n );\n\n $this->fails(\n Rule::email()->withNativeValidation(true),\n Rule::email()->withNativeValidation(allowUnicode: true),\n 'invalid.@example.com',\n ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.'],\n ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.']\n );\n\n $this->passes(\n (new Email())->withNativeValidation(true),\n 'tést@domain.com',\n (new Email())->withNativeValidation(allowUnicode: true),\n 'tést@domain.com'\n );\n\n $this->passes(\n Rule::email()->withNativeValidation(true),\n 'tést@domain.com',\n Rule::email()->withNativeValidation(allowUnicode: true),\n 'tést@domain.com'\n );\n\n $this->passes(\n (new Email())->withNativeValidation(true),\n 'admin@example.com',\n (new Email())->withNativeValidation(allowUnicode: true),\n 'admin@example.com'\n );\n\n $this->passes(\n Rule::email()->withNativeValidation(true),\n 'admin@example.com',\n Rule::email()->withNativeValidation(allowUnicode: true),\n 'admin@example.com'\n );\n }\n\n public function testRfc()\n public function testRfcCompliantNonStrict()\n {\n $this->fails(\n (new Email())->rfcCompliant(),\n 'invalid.@example.com',\n ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.'],\n ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.']\n );\n\n $this->fails(\n Rule::email()->rfcCompliant(),\n 'invalid.@example.com',\n ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.'],\n ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.']\n );\n\n $this->fails(\n (new Email())->rfcCompliant(),\n 'test👨‍💻@domain.com',\n ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.'],\n ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.']\n );\n\n $this->fails(\n Rule::email()->rfcCompliant(),\n 'test👨‍💻@domain.com',\n ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.'],\n ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.']\n );\n\n $this->passes(\n (new Email())->rfcCompliant(),\n 'admin@example.com',\n 'admin@example.com'\n );\n\n $this->passes(\n Rule::email()->rfcCompliant(),\n 'admin@example.com',\n 'admin@example.com'\n );\n\n $this->passes(\n (new Email())->rfcCompliant(),\n 'tést@domain.com',\n 'tést@domain.com'\n );\n\n $this->passes(\n Rule::email()->rfcCompliant(),\n 'tést@domain.com',\n 'tést@domain.com'\n );\n }\n\n public function testPassesRfcCompliantButNotRfcCompliantStrict()\n {\n $emailsThatPassOnRfcCompliantButFailOnStrict = [\n '\"has space\"@example.com', // Quoted local part with space\n 'some(comment)@example.com', // Comment in local part\n 'abc.\"test\"@example.com', // Mixed quoted/unquoted local part\n '\"escaped\\\\\\\"quote\"@example.com', // Escaped quote inside quoted local part\n 'test@example', // Domain without TLD\n 'test@localhost', // Domain without TLD\n 'name@[127.0.0.1]', // Local-part with domain-literal IPv4 address\n 'user@[IPv6:::1]', // Domain-literal with unusual IPv6 short form\n 'a@[IPv6:2001:db8::1]', // Domain-literal with normal IPv6\n 'user@[IPv6:::]', // invalid shorthand IPv6\n '\"ab\\\\(c\"@example.com',\n ];", + "repo_full_name": "laravel/framework", + "discussion_comments": [ + { + "comment_id": "1919809801", + "repo_full_name": "laravel/framework", + "pr_number": 54226, + "pr_file": "tests/Validation/ValidationEmailRuleTest.php", + "discussion_id": "1919809801", + "commented_code": "@@ -99,291 +101,692 @@ protected function passes($rule, $values)\n $this->assertValidationRules($rule, $values, true);\n }\n \n- public function testStrict()\n+ public function testRfcCompliantStrict()\n {\n+ $emailThatFailsBothNonStrictButFailsInStrict = 'username@sub..example.com';\n+ $emailThatPassesNonStrictButFailsInStrict = '\"has space\"@example.com';\n+ $emailThatPassesBothNonStrictAndInStrict = 'plainaddress@example.com';\n+\n $this->fails(\n- (new Email())->rfcCompliant(true),\n- 'invalid.@example.com',\n- ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.'],\n+ (new Email())->rfcCompliant(strict: true),\n+ $emailThatPassesNonStrictButFailsInStrict,\n+ ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.']\n );\n \n $this->fails(\n- Rule::email()->rfcCompliant(true),\n- 'invalid.@example.com',\n- ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.'],\n+ Rule::email()->rfcCompliant(strict: true),\n+ $emailThatPassesNonStrictButFailsInStrict,\n+ ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.']\n );\n \n $this->fails(\n- (new Email())->rfcCompliant(true),\n- 'username@sub..example.com',\n- ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.'],\n+ (new Email())->rfcCompliant(strict: true),\n+ $emailThatFailsBothNonStrictButFailsInStrict,\n+ ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.']\n );\n \n $this->fails(\n- Rule::email()->rfcCompliant(true),\n- 'username@sub..example.com',\n- ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.'],\n+ Rule::email()->rfcCompliant(strict: true),\n+ $emailThatFailsBothNonStrictButFailsInStrict,\n+ ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.']\n );\n \n $this->passes(\n- (new Email())->rfcCompliant(true),\n- 'plainaddress@example.com',\n+ (new Email())->rfcCompliant(strict: true),\n+ $emailThatPassesBothNonStrictAndInStrict\n );\n \n $this->passes(\n- Rule::email()->rfcCompliant(true),\n- 'plainaddress@example.com',\n+ Rule::email()->rfcCompliant(strict: true),\n+ $emailThatPassesBothNonStrictAndInStrict\n );\n }\n \n- public function testDns()\n+ public function testValidateMxRecord()\n {\n $this->fails(\n (new Email())->validateMxRecord(),\n 'plainaddress@example.com',\n- ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.'],\n+ ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.']\n );\n \n $this->fails(\n Rule::email()->validateMxRecord(),\n 'plainaddress@example.com',\n- ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.'],\n+ ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.']\n );\n \n $this->passes(\n (new Email())->validateMxRecord(),\n- 'taylor@laravel.com',\n+ 'taylor@laravel.com'\n );\n \n $this->passes(\n Rule::email()->validateMxRecord(),\n- 'taylor@laravel.com',\n+ 'taylor@laravel.com'\n );\n }\n \n- public function testSpoof()\n+ public function testPreventSpoofing()\n {\n $this->fails(\n (new Email())->preventSpoofing(),\n 'admin@examрle.com',// Contains a Cyrillic 'р' (U+0440), not a Latin 'p'\n- ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.'],\n+ ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.']\n );\n \n $this->fails(\n Rule::email()->preventSpoofing(),\n 'admin@examрle.com',// Contains a Cyrillic 'р' (U+0440), not a Latin 'p'\n- ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.'],\n+ ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.']\n );\n \n $spoofingEmail = 'admin@exam'.\"\\u{0440}\".'le.com';\n $this->fails(\n (new Email())->preventSpoofing(),\n $spoofingEmail,\n- ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.'],\n+ ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.']\n );\n \n $this->fails(\n Rule::email()->preventSpoofing(),\n $spoofingEmail,\n- ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.'],\n+ ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.']\n );\n \n $this->passes(\n (new Email())->preventSpoofing(),\n- 'admin@example.com',\n+ 'admin@example.com'\n );\n \n $this->passes(\n Rule::email()->preventSpoofing(),\n- 'admin@example.com',\n+ 'admin@example.com'\n );\n \n $this->passes(\n (new Email())->preventSpoofing(),\n- 'test👨‍💻@domain.com',\n+ 'test👨‍💻@domain.com'\n );\n \n $this->passes(\n Rule::email()->preventSpoofing(),\n- 'test👨‍💻@domain.com',\n+ 'test👨‍💻@domain.com'\n );\n }\n \n- public function testFilter()\n+ public function testWithNativeValidation()\n {\n $this->fails(\n (new Email())->withNativeValidation(),\n 'tést@domain.com',\n- ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.'],\n+ ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.']\n );\n \n $this->fails(\n Rule::email()->withNativeValidation(),\n 'tést@domain.com',\n- ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.'],\n+ ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.']\n );\n \n $this->passes(\n (new Email())->withNativeValidation(),\n- 'admin@example.com',\n+ 'admin@example.com'\n );\n \n $this->passes(\n Rule::email()->withNativeValidation(),\n- 'admin@example.com',\n+ 'admin@example.com'\n );\n }\n \n- public function testFilterUnicode()\n+ public function testWithNativeValidationAllowUnicode()\n {\n $this->fails(\n- (new Email())->withNativeValidation(true),\n+ (new Email())->withNativeValidation(allowUnicode: true),\n 'invalid.@example.com',\n- ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.'],\n+ ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.']\n );\n \n $this->fails(\n- Rule::email()->withNativeValidation(true),\n+ Rule::email()->withNativeValidation(allowUnicode: true),\n 'invalid.@example.com',\n- ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.'],\n+ ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.']\n );\n \n $this->passes(\n- (new Email())->withNativeValidation(true),\n- 'tést@domain.com',\n+ (new Email())->withNativeValidation(allowUnicode: true),\n+ 'tést@domain.com'\n );\n \n $this->passes(\n- Rule::email()->withNativeValidation(true),\n- 'tést@domain.com',\n+ Rule::email()->withNativeValidation(allowUnicode: true),\n+ 'tést@domain.com'\n );\n \n $this->passes(\n- (new Email())->withNativeValidation(true),\n- 'admin@example.com',\n+ (new Email())->withNativeValidation(allowUnicode: true),\n+ 'admin@example.com'\n );\n \n $this->passes(\n- Rule::email()->withNativeValidation(true),\n- 'admin@example.com',\n+ Rule::email()->withNativeValidation(allowUnicode: true),\n+ 'admin@example.com'\n );\n }\n \n- public function testRfc()\n+ public function testRfcCompliantNonStrict()\n {\n $this->fails(\n (new Email())->rfcCompliant(),\n 'invalid.@example.com',\n- ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.'],\n+ ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.']\n );\n \n $this->fails(\n Rule::email()->rfcCompliant(),\n 'invalid.@example.com',\n- ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.'],\n+ ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.']\n );\n \n $this->fails(\n (new Email())->rfcCompliant(),\n 'test👨‍💻@domain.com',\n- ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.'],\n+ ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.']\n );\n \n $this->fails(\n Rule::email()->rfcCompliant(),\n 'test👨‍💻@domain.com',\n- ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.'],\n+ ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.']\n );\n \n $this->passes(\n (new Email())->rfcCompliant(),\n- 'admin@example.com',\n+ 'admin@example.com'\n );\n \n $this->passes(\n Rule::email()->rfcCompliant(),\n- 'admin@example.com',\n+ 'admin@example.com'\n );\n \n $this->passes(\n (new Email())->rfcCompliant(),\n- 'tést@domain.com',\n+ 'tést@domain.com'\n );\n \n $this->passes(\n Rule::email()->rfcCompliant(),\n- 'tést@domain.com',\n+ 'tést@domain.com'\n );\n }\n \n+ public function testPassesRfcCompliantButNotRfcCompliantStrict()\n+ {\n+ $emailsThatPassOnRfcCompliantButFailOnStrict = [\n+ '\"has space\"@example.com', // Quoted local part with space\n+ 'some(comment)@example.com', // Comment in local part\n+ 'abc.\"test\"@example.com', // Mixed quoted/unquoted local part\n+ '\"escaped\\\\\\\"quote\"@example.com', // Escaped quote inside quoted local part\n+ 'test@example', // Domain without TLD\n+ 'test@localhost', // Domain without TLD\n+ 'name@[127.0.0.1]', // Local-part with domain-literal IPv4 address\n+ 'user@[IPv6:::1]', // Domain-literal with unusual IPv6 short form\n+ 'a@[IPv6:2001:db8::1]', // Domain-literal with normal IPv6\n+ 'user@[IPv6:::]', // invalid shorthand IPv6\n+ '\"ab\\\\(c\"@example.com',\n+ ];", + "comment_created_at": "2025-01-17T09:27:32+00:00", + "comment_author": "shaedrich", + "comment_body": "same here", + "pr_file_module": null + }, + { + "comment_id": "1919811653", + "repo_full_name": "laravel/framework", + "pr_number": 54226, + "pr_file": "tests/Validation/ValidationEmailRuleTest.php", + "discussion_id": "1919809801", + "commented_code": "@@ -99,291 +101,692 @@ protected function passes($rule, $values)\n $this->assertValidationRules($rule, $values, true);\n }\n \n- public function testStrict()\n+ public function testRfcCompliantStrict()\n {\n+ $emailThatFailsBothNonStrictButFailsInStrict = 'username@sub..example.com';\n+ $emailThatPassesNonStrictButFailsInStrict = '\"has space\"@example.com';\n+ $emailThatPassesBothNonStrictAndInStrict = 'plainaddress@example.com';\n+\n $this->fails(\n- (new Email())->rfcCompliant(true),\n- 'invalid.@example.com',\n- ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.'],\n+ (new Email())->rfcCompliant(strict: true),\n+ $emailThatPassesNonStrictButFailsInStrict,\n+ ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.']\n );\n \n $this->fails(\n- Rule::email()->rfcCompliant(true),\n- 'invalid.@example.com',\n- ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.'],\n+ Rule::email()->rfcCompliant(strict: true),\n+ $emailThatPassesNonStrictButFailsInStrict,\n+ ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.']\n );\n \n $this->fails(\n- (new Email())->rfcCompliant(true),\n- 'username@sub..example.com',\n- ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.'],\n+ (new Email())->rfcCompliant(strict: true),\n+ $emailThatFailsBothNonStrictButFailsInStrict,\n+ ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.']\n );\n \n $this->fails(\n- Rule::email()->rfcCompliant(true),\n- 'username@sub..example.com',\n- ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.'],\n+ Rule::email()->rfcCompliant(strict: true),\n+ $emailThatFailsBothNonStrictButFailsInStrict,\n+ ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.']\n );\n \n $this->passes(\n- (new Email())->rfcCompliant(true),\n- 'plainaddress@example.com',\n+ (new Email())->rfcCompliant(strict: true),\n+ $emailThatPassesBothNonStrictAndInStrict\n );\n \n $this->passes(\n- Rule::email()->rfcCompliant(true),\n- 'plainaddress@example.com',\n+ Rule::email()->rfcCompliant(strict: true),\n+ $emailThatPassesBothNonStrictAndInStrict\n );\n }\n \n- public function testDns()\n+ public function testValidateMxRecord()\n {\n $this->fails(\n (new Email())->validateMxRecord(),\n 'plainaddress@example.com',\n- ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.'],\n+ ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.']\n );\n \n $this->fails(\n Rule::email()->validateMxRecord(),\n 'plainaddress@example.com',\n- ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.'],\n+ ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.']\n );\n \n $this->passes(\n (new Email())->validateMxRecord(),\n- 'taylor@laravel.com',\n+ 'taylor@laravel.com'\n );\n \n $this->passes(\n Rule::email()->validateMxRecord(),\n- 'taylor@laravel.com',\n+ 'taylor@laravel.com'\n );\n }\n \n- public function testSpoof()\n+ public function testPreventSpoofing()\n {\n $this->fails(\n (new Email())->preventSpoofing(),\n 'admin@examрle.com',// Contains a Cyrillic 'р' (U+0440), not a Latin 'p'\n- ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.'],\n+ ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.']\n );\n \n $this->fails(\n Rule::email()->preventSpoofing(),\n 'admin@examрle.com',// Contains a Cyrillic 'р' (U+0440), not a Latin 'p'\n- ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.'],\n+ ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.']\n );\n \n $spoofingEmail = 'admin@exam'.\"\\u{0440}\".'le.com';\n $this->fails(\n (new Email())->preventSpoofing(),\n $spoofingEmail,\n- ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.'],\n+ ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.']\n );\n \n $this->fails(\n Rule::email()->preventSpoofing(),\n $spoofingEmail,\n- ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.'],\n+ ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.']\n );\n \n $this->passes(\n (new Email())->preventSpoofing(),\n- 'admin@example.com',\n+ 'admin@example.com'\n );\n \n $this->passes(\n Rule::email()->preventSpoofing(),\n- 'admin@example.com',\n+ 'admin@example.com'\n );\n \n $this->passes(\n (new Email())->preventSpoofing(),\n- 'test👨‍💻@domain.com',\n+ 'test👨‍💻@domain.com'\n );\n \n $this->passes(\n Rule::email()->preventSpoofing(),\n- 'test👨‍💻@domain.com',\n+ 'test👨‍💻@domain.com'\n );\n }\n \n- public function testFilter()\n+ public function testWithNativeValidation()\n {\n $this->fails(\n (new Email())->withNativeValidation(),\n 'tést@domain.com',\n- ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.'],\n+ ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.']\n );\n \n $this->fails(\n Rule::email()->withNativeValidation(),\n 'tést@domain.com',\n- ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.'],\n+ ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.']\n );\n \n $this->passes(\n (new Email())->withNativeValidation(),\n- 'admin@example.com',\n+ 'admin@example.com'\n );\n \n $this->passes(\n Rule::email()->withNativeValidation(),\n- 'admin@example.com',\n+ 'admin@example.com'\n );\n }\n \n- public function testFilterUnicode()\n+ public function testWithNativeValidationAllowUnicode()\n {\n $this->fails(\n- (new Email())->withNativeValidation(true),\n+ (new Email())->withNativeValidation(allowUnicode: true),\n 'invalid.@example.com',\n- ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.'],\n+ ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.']\n );\n \n $this->fails(\n- Rule::email()->withNativeValidation(true),\n+ Rule::email()->withNativeValidation(allowUnicode: true),\n 'invalid.@example.com',\n- ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.'],\n+ ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.']\n );\n \n $this->passes(\n- (new Email())->withNativeValidation(true),\n- 'tést@domain.com',\n+ (new Email())->withNativeValidation(allowUnicode: true),\n+ 'tést@domain.com'\n );\n \n $this->passes(\n- Rule::email()->withNativeValidation(true),\n- 'tést@domain.com',\n+ Rule::email()->withNativeValidation(allowUnicode: true),\n+ 'tést@domain.com'\n );\n \n $this->passes(\n- (new Email())->withNativeValidation(true),\n- 'admin@example.com',\n+ (new Email())->withNativeValidation(allowUnicode: true),\n+ 'admin@example.com'\n );\n \n $this->passes(\n- Rule::email()->withNativeValidation(true),\n- 'admin@example.com',\n+ Rule::email()->withNativeValidation(allowUnicode: true),\n+ 'admin@example.com'\n );\n }\n \n- public function testRfc()\n+ public function testRfcCompliantNonStrict()\n {\n $this->fails(\n (new Email())->rfcCompliant(),\n 'invalid.@example.com',\n- ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.'],\n+ ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.']\n );\n \n $this->fails(\n Rule::email()->rfcCompliant(),\n 'invalid.@example.com',\n- ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.'],\n+ ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.']\n );\n \n $this->fails(\n (new Email())->rfcCompliant(),\n 'test👨‍💻@domain.com',\n- ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.'],\n+ ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.']\n );\n \n $this->fails(\n Rule::email()->rfcCompliant(),\n 'test👨‍💻@domain.com',\n- ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.'],\n+ ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.']\n );\n \n $this->passes(\n (new Email())->rfcCompliant(),\n- 'admin@example.com',\n+ 'admin@example.com'\n );\n \n $this->passes(\n Rule::email()->rfcCompliant(),\n- 'admin@example.com',\n+ 'admin@example.com'\n );\n \n $this->passes(\n (new Email())->rfcCompliant(),\n- 'tést@domain.com',\n+ 'tést@domain.com'\n );\n \n $this->passes(\n Rule::email()->rfcCompliant(),\n- 'tést@domain.com',\n+ 'tést@domain.com'\n );\n }\n \n+ public function testPassesRfcCompliantButNotRfcCompliantStrict()\n+ {\n+ $emailsThatPassOnRfcCompliantButFailOnStrict = [\n+ '\"has space\"@example.com', // Quoted local part with space\n+ 'some(comment)@example.com', // Comment in local part\n+ 'abc.\"test\"@example.com', // Mixed quoted/unquoted local part\n+ '\"escaped\\\\\\\"quote\"@example.com', // Escaped quote inside quoted local part\n+ 'test@example', // Domain without TLD\n+ 'test@localhost', // Domain without TLD\n+ 'name@[127.0.0.1]', // Local-part with domain-literal IPv4 address\n+ 'user@[IPv6:::1]', // Domain-literal with unusual IPv6 short form\n+ 'a@[IPv6:2001:db8::1]', // Domain-literal with normal IPv6\n+ 'user@[IPv6:::]', // invalid shorthand IPv6\n+ '\"ab\\\\(c\"@example.com',\n+ ];", + "comment_created_at": "2025-01-17T09:28:34+00:00", + "comment_author": "shaedrich", + "comment_body": "All of these arrays could alternatively be put into fixture files 🤔 ", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1639068204", + "pr_number": 51799, + "pr_file": "tests/Http/HttpClientTest.php", + "created_at": "2024-06-14T00:18:24+00:00", + "commented_code": "});\n }\n\n public function testWithBaseUrl()\n /**\n * @dataProvider baseUriWithRefProvider\n */", + "repo_full_name": "laravel/framework", + "discussion_comments": [ + { + "comment_id": "1639068204", + "repo_full_name": "laravel/framework", + "pr_number": 51799, + "pr_file": "tests/Http/HttpClientTest.php", + "discussion_id": "1639068204", + "commented_code": "@@ -985,23 +985,62 @@ public function testGetWithArrayQueryParamEncodes()\n });\n }\n \n- public function testWithBaseUrl()\n+ /**\n+ * @dataProvider baseUriWithRefProvider\n+ */", + "comment_created_at": "2024-06-14T00:18:24+00:00", + "comment_author": "crynobone", + "comment_body": "Please use PHPUnit `DataProvider` attribute over `@dataProvider` annotation.", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/framework-use-modern-phpunit-attributes.md b/_reviewers/framework-use-modern-phpunit-attributes.md index ebf0437..18575aa 100644 --- a/_reviewers/framework-use-modern-phpunit-attributes.md +++ b/_reviewers/framework-use-modern-phpunit-attributes.md @@ -57,245 +57,3 @@ public function testSanitize(?string $subject, ?string $expected, ?HtmlSanitizer ``` This approach improves code maintainability, provides better type hinting to IDEs, and follows modern PHP testing practices. - - -[ - { - "discussion_id": "2174034287", - "pr_number": 56169, - "pr_file": "tests/Database/DatabaseEloquentBuilderFindOrFailWithEnumTest.php", - "created_at": "2025-06-30T00:51:48+00:00", - "commented_code": "createSchema();\n }\n\n protected function tearDown(): void\n {\n Schema::drop('test_models');\n\n parent::tearDown();\n }\n\n protected function getEnvironmentSetUp($app)\n {\n $app['config']->set('database.default', 'testing');\n }\n\n #[Test]\n public function it_finds_existing_model_when_using_enum_id()", - "repo_full_name": "laravel/framework", - "discussion_comments": [ - { - "comment_id": "2174034287", - "repo_full_name": "laravel/framework", - "pr_number": 56169, - "pr_file": "tests/Database/DatabaseEloquentBuilderFindOrFailWithEnumTest.php", - "discussion_id": "2174034287", - "commented_code": "@@ -0,0 +1,74 @@\n+createSchema();\n+ }\n+\n+ protected function tearDown(): void\n+ {\n+ Schema::drop('test_models');\n+\n+ parent::tearDown();\n+ }\n+\n+ protected function getEnvironmentSetUp($app)\n+ {\n+ $app['config']->set('database.default', 'testing');\n+ }\n+\n+ #[Test]\n+ public function it_finds_existing_model_when_using_enum_id()", - "comment_created_at": "2025-06-30T00:51:48+00:00", - "comment_author": "crynobone", - "comment_body": "We don't use `Test` attribute in the framework, prefix the method name with `test` and use studly case.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2044226331", - "pr_number": 55412, - "pr_file": "tests/Database/DatabaseEloquentWithPropertyHooksTest.php", - "created_at": "2025-04-15T10:38:23+00:00", - "commented_code": "markTestSkipped(\n 'Property Hooks are not available to test in PHP '.PHP_MAJOR_VERSION.'.'.PHP_MINOR_VERSION,\n );\n }", - "repo_full_name": "laravel/framework", - "discussion_comments": [ - { - "comment_id": "2044226331", - "repo_full_name": "laravel/framework", - "pr_number": 55412, - "pr_file": "tests/Database/DatabaseEloquentWithPropertyHooksTest.php", - "discussion_id": "2044226331", - "commented_code": "@@ -0,0 +1,178 @@\n+markTestSkipped(\n+ 'Property Hooks are not available to test in PHP '.PHP_MAJOR_VERSION.'.'.PHP_MINOR_VERSION,\n+ );\n+ }", - "comment_created_at": "2025-04-15T10:38:23+00:00", - "comment_author": "shaedrich", - "comment_body": "You could alternatively use [PHPUnit's attribute annotations](https://docs.phpunit.de/en/10.5/attributes.html#requiresphp) to achieve this if you like:\r\n```suggestion\r\n#[RequiresPhp('8.4.0')]\r\nclass DatabaseEloquentWithPropertyHooksTest extends TestCase\r\n{\r\n protected function setUp(): void\r\n {\r\n parent::setUp();\r\n```", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2013914054", - "pr_number": 55173, - "pr_file": "tests/Database/DatabaseEloquentModelTest.php", - "created_at": "2025-03-26T11:11:12+00:00", - "commented_code": "$this->assertTrue($model->hasCast('enumAttribute', StringStatus::class));\n }\n\n public function testHasCastsOnBcmathNumber()\n {\n if (! $this->isBcMathNumberSupported()) {\n $this->markTestSkipped('Requires BcMath\\Number class.');\n }", - "repo_full_name": "laravel/framework", - "discussion_comments": [ - { - "comment_id": "2013914054", - "repo_full_name": "laravel/framework", - "pr_number": 55173, - "pr_file": "tests/Database/DatabaseEloquentModelTest.php", - "discussion_id": "2013914054", - "commented_code": "@@ -533,6 +534,35 @@ public function testHasCastsOnEnumAttribute()\n $this->assertTrue($model->hasCast('enumAttribute', StringStatus::class));\n }\n \n+ public function testHasCastsOnBcmathNumber()\n+ {\n+ if (! $this->isBcMathNumberSupported()) {\n+ $this->markTestSkipped('Requires BcMath\\Number class.');\n+ }", - "comment_created_at": "2025-03-26T11:11:12+00:00", - "comment_author": "shaedrich", - "comment_body": "Alternatively, you could use with attributes:\r\n```suggestion\r\n #[RequiresPhp('^8.4')]\r\n public function testHasCastsOnBcmathNumber()\r\n {\r\n```\r\nor\r\n```suggestion\r\n #[RequiresPhpExtension('bcmath')]\r\n public function testHasCastsOnBcmathNumber()\r\n {\r\n```\r\nor\r\n```suggestion\r\n #[RequiresMethod(Number::class, '__construct')]\r\n public function testHasCastsOnBcmathNumber()\r\n {\r\n```", - "pr_file_module": null - }, - { - "comment_id": "2013957686", - "repo_full_name": "laravel/framework", - "pr_number": 55173, - "pr_file": "tests/Database/DatabaseEloquentModelTest.php", - "discussion_id": "2013914054", - "commented_code": "@@ -533,6 +534,35 @@ public function testHasCastsOnEnumAttribute()\n $this->assertTrue($model->hasCast('enumAttribute', StringStatus::class));\n }\n \n+ public function testHasCastsOnBcmathNumber()\n+ {\n+ if (! $this->isBcMathNumberSupported()) {\n+ $this->markTestSkipped('Requires BcMath\\Number class.');\n+ }", - "comment_created_at": "2025-03-26T11:41:46+00:00", - "comment_author": "KentarouTakeda", - "comment_body": "Thank you for letting me know something I didn't know. That's a good way to write it!\r\n\r\nI considered the possibility of testing using polyfills, so I decided to check with `RequiresMethod`.", - "pr_file_module": null - }, - { - "comment_id": "2014338557", - "repo_full_name": "laravel/framework", - "pr_number": 55173, - "pr_file": "tests/Database/DatabaseEloquentModelTest.php", - "discussion_id": "2013914054", - "commented_code": "@@ -533,6 +534,35 @@ public function testHasCastsOnEnumAttribute()\n $this->assertTrue($model->hasCast('enumAttribute', StringStatus::class));\n }\n \n+ public function testHasCastsOnBcmathNumber()\n+ {\n+ if (! $this->isBcMathNumberSupported()) {\n+ $this->markTestSkipped('Requires BcMath\\Number class.');\n+ }", - "comment_created_at": "2025-03-26T14:41:53+00:00", - "comment_author": "shaedrich", - "comment_body": "You're welcome\u2014always happy to introduce someone to something new \ud83d\udc4d\ud83c\udffb ", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1923828069", - "pr_number": 54285, - "pr_file": "tests/Support/SupportStrTest.php", - "created_at": "2025-01-21T14:26:55+00:00", - "commented_code": "$this->assertSame($expected, Str::chopEnd($subject, $needle));\n }\n }\n\n public function testSanitize()\n {\n foreach ([\n ['Hello', 'Hello', null],\n [123, '123', null],\n [null, null, null],\n ['', '', null],\n ['Hello', 'Hello', null],\n ['', '', null],\n ['', '', null],\n ['', '', null],\n ['Click me', 'Click me', null],\n ['', '', (new HtmlSanitizerConfig)->allowElement('img', 'src')->allowRelativeMedias()],\n ['', '', (new HtmlSanitizerConfig)->allowElement('img', 'src')->forceHttpsUrls()],\n ['', '', (new HtmlSanitizerConfig)->allowElement('span')->allowAttribute('data-attr', '*')],\n ] as $value) {\n [$subject, $expected, $config] = $value;\n\n $this->assertSame($expected, Str::sanitize($subject, $config));\n }\n }", - "repo_full_name": "laravel/framework", - "discussion_comments": [ - { - "comment_id": "1923828069", - "repo_full_name": "laravel/framework", - "pr_number": 54285, - "pr_file": "tests/Support/SupportStrTest.php", - "discussion_id": "1923828069", - "commented_code": "@@ -1635,6 +1636,28 @@ public function testChopEnd()\n $this->assertSame($expected, Str::chopEnd($subject, $needle));\n }\n }\n+\n+ public function testSanitize()\n+ {\n+ foreach ([\n+ ['Hello', 'Hello', null],\n+ [123, '123', null],\n+ [null, null, null],\n+ ['', '', null],\n+ ['Hello', 'Hello', null],\n+ ['', '', null],\n+ ['', '', null],\n+ ['', '', null],\n+ ['Click me', 'Click me', null],\n+ ['', '', (new HtmlSanitizerConfig)->allowElement('img', 'src')->allowRelativeMedias()],\n+ ['', '', (new HtmlSanitizerConfig)->allowElement('img', 'src')->forceHttpsUrls()],\n+ ['', '', (new HtmlSanitizerConfig)->allowElement('span')->allowAttribute('data-attr', '*')],\n+ ] as $value) {\n+ [$subject, $expected, $config] = $value;\n+\n+ $this->assertSame($expected, Str::sanitize($subject, $config));\n+ }\n+ }", - "comment_created_at": "2025-01-21T14:26:55+00:00", - "comment_author": "shaedrich", - "comment_body": "You can use the [`#[DataProvider]` attribute](https://docs.phpunit.de/en/11.5/attributes.html#data-provider) here\r\n```suggestion\r\n #[DataProvider('provideStrSanatizeTestStrings')\r\n public function testSanitize(?string $subject, ?string $expected, ?HtmlSanitizerConfig $config)\r\n {\r\n $this->assertSame($expected, Str::sanitize($subject, $config));\r\n }\r\n \r\n public static function provideStrSanatizeTestStrings()\r\n {\r\n return [\r\n 'non-empty string is returned as is' => ['Hello', 'Hello', null],\r\n 'stringified number' => [123, '123', null],\r\n 'null is returned as is' => [null, null, null],\r\n 'empty string is returned as is' => ['', '', null],\r\n 'XSS attack in string is sanitized' => ['Hello', 'Hello', null],\r\n 'XSS attack is sanitized to empty string' => ['', '', null],\r\n 'data attribute in HTML element is sanitized' => ['', '', null],\r\n 'event handler in HTML element is sanitized' => ['', '', null],\r\n 'inline JavaScript in HTML hyperlink element is sanitized' => ['Click me', 'Click me', null],\r\n 'HTML image element is returned as is' => ['', '', (new HtmlSanitizerConfig)->allowElement('img', 'src')->allowRelativeMedias()],\r\n 'HTML image element src URL is rewritten to use TLS' => ['', '', (new HtmlSanitizerConfig)->allowElement('img', 'src')->forceHttpsUrls()],\r\n 'data attribute in HTML element is not sanitized' => ['', '', (new HtmlSanitizerConfig)->allowElement('span')->allowAttribute('data-attr', '*')],\r\n ];\r\n }\r\n```", - "pr_file_module": null - }, - { - "comment_id": "1923834352", - "repo_full_name": "laravel/framework", - "pr_number": 54285, - "pr_file": "tests/Support/SupportStrTest.php", - "discussion_id": "1923828069", - "commented_code": "@@ -1635,6 +1636,28 @@ public function testChopEnd()\n $this->assertSame($expected, Str::chopEnd($subject, $needle));\n }\n }\n+\n+ public function testSanitize()\n+ {\n+ foreach ([\n+ ['Hello', 'Hello', null],\n+ [123, '123', null],\n+ [null, null, null],\n+ ['', '', null],\n+ ['Hello', 'Hello', null],\n+ ['', '', null],\n+ ['', '', null],\n+ ['', '', null],\n+ ['Click me', 'Click me', null],\n+ ['', '', (new HtmlSanitizerConfig)->allowElement('img', 'src')->allowRelativeMedias()],\n+ ['', '', (new HtmlSanitizerConfig)->allowElement('img', 'src')->forceHttpsUrls()],\n+ ['', '', (new HtmlSanitizerConfig)->allowElement('span')->allowAttribute('data-attr', '*')],\n+ ] as $value) {\n+ [$subject, $expected, $config] = $value;\n+\n+ $this->assertSame($expected, Str::sanitize($subject, $config));\n+ }\n+ }", - "comment_created_at": "2025-01-21T14:30:35+00:00", - "comment_author": "shaedrich", - "comment_body": "Alternatively, you can use the [`[#TestWith]` attribute](https://docs.phpunit.de/en/11.5/attributes.html#testwith):\r\n```suggestion\r\n #[TestWith(['Hello', 'Hello', null])\r\n #[TestWith([123, '123', null])\r\n #[TestWith([null, null, null])\r\n #[TestWith(['', '', null])\r\n #[TestWith(['Hello', 'Hello', null])\r\n #[TestWith(['', '', null])\r\n #[TestWith(['', '', null])\r\n #[TestWith(['', '', null])\r\n #[TestWith(['Click me', 'Click me', null])\r\n #[TestWith(['', '', (new HtmlSanitizerConfig)->allowElement('img', 'src')->allowRelativeMedias()])\r\n #[TestWith(['', '', (new HtmlSanitizerConfig)->allowElement('img', 'src')->forceHttpsUrls()])\r\n #[TestWith(['', '', (new HtmlSanitizerConfig)->allowElement('span')->allowAttribute('data-attr', '*')])\r\n public function testSanitize(?string $subject, ?string $expected, ?HtmlSanitizerConfig $config)\r\n {\r\n this->assertSame($expected, Str::sanitize($subject, $config));\r\n }\r\n```", - "pr_file_module": null - }, - { - "comment_id": "1923872921", - "repo_full_name": "laravel/framework", - "pr_number": 54285, - "pr_file": "tests/Support/SupportStrTest.php", - "discussion_id": "1923828069", - "commented_code": "@@ -1635,6 +1636,28 @@ public function testChopEnd()\n $this->assertSame($expected, Str::chopEnd($subject, $needle));\n }\n }\n+\n+ public function testSanitize()\n+ {\n+ foreach ([\n+ ['Hello', 'Hello', null],\n+ [123, '123', null],\n+ [null, null, null],\n+ ['', '', null],\n+ ['Hello', 'Hello', null],\n+ ['', '', null],\n+ ['', '', null],\n+ ['', '', null],\n+ ['Click me', 'Click me', null],\n+ ['', '', (new HtmlSanitizerConfig)->allowElement('img', 'src')->allowRelativeMedias()],\n+ ['', '', (new HtmlSanitizerConfig)->allowElement('img', 'src')->forceHttpsUrls()],\n+ ['', '', (new HtmlSanitizerConfig)->allowElement('span')->allowAttribute('data-attr', '*')],\n+ ] as $value) {\n+ [$subject, $expected, $config] = $value;\n+\n+ $this->assertSame($expected, Str::sanitize($subject, $config));\n+ }\n+ }", - "comment_created_at": "2025-01-21T14:53:07+00:00", - "comment_author": "jannescb", - "comment_body": "Thanks!\r\n\r\nI added/slightly modified your suggestions \ud83d\udc4d ", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1919808012", - "pr_number": 54226, - "pr_file": "tests/Validation/ValidationEmailRuleTest.php", - "created_at": "2025-01-17T09:26:19+00:00", - "commented_code": "$customValidationMessage ? [self::ATTRIBUTE.'.email' => $customValidationMessage] : []\n );\n\n $this->assertSame($expectToPass, $v->passes());\n $this->assertSame($expectToPass, $v->passes(), 'Expected email input '.$value.' to '.($expectToPass ? 'pass' : 'fail').'.');", - "repo_full_name": "laravel/framework", - "discussion_comments": [ - { - "comment_id": "1919808012", - "repo_full_name": "laravel/framework", - "pr_number": 54226, - "pr_file": "tests/Validation/ValidationEmailRuleTest.php", - "discussion_id": "1919808012", - "commented_code": "@@ -80,11 +81,12 @@ protected function assertValidationRules($rule, $values, $expectToPass, $expecte\n $customValidationMessage ? [self::ATTRIBUTE.'.email' => $customValidationMessage] : []\n );\n \n- $this->assertSame($expectToPass, $v->passes());\n+ $this->assertSame($expectToPass, $v->passes(), 'Expected email input '.$value.' to '.($expectToPass ? 'pass' : 'fail').'.');", - "comment_created_at": "2025-01-17T09:26:19+00:00", - "comment_author": "shaedrich", - "comment_body": "You might want to use a [`#[DataProvider]`](https://docs.phpunit.de/en/10.5/attributes.html#data-provider) for this", - "pr_file_module": null - }, - { - "comment_id": "1919924762", - "repo_full_name": "laravel/framework", - "pr_number": 54226, - "pr_file": "tests/Validation/ValidationEmailRuleTest.php", - "discussion_id": "1919808012", - "commented_code": "@@ -80,11 +81,12 @@ protected function assertValidationRules($rule, $values, $expectToPass, $expecte\n $customValidationMessage ? [self::ATTRIBUTE.'.email' => $customValidationMessage] : []\n );\n \n- $this->assertSame($expectToPass, $v->passes());\n+ $this->assertSame($expectToPass, $v->passes(), 'Expected email input '.$value.' to '.($expectToPass ? 'pass' : 'fail').'.');", - "comment_created_at": "2025-01-17T10:44:46+00:00", - "comment_author": "SanderMuller", - "comment_body": "Good point, updated to `#[TestWith]`", - "pr_file_module": null - }, - { - "comment_id": "1920378618", - "repo_full_name": "laravel/framework", - "pr_number": 54226, - "pr_file": "tests/Validation/ValidationEmailRuleTest.php", - "discussion_id": "1919808012", - "commented_code": "@@ -80,11 +81,12 @@ protected function assertValidationRules($rule, $values, $expectToPass, $expecte\n $customValidationMessage ? [self::ATTRIBUTE.'.email' => $customValidationMessage] : []\n );\n \n- $this->assertSame($expectToPass, $v->passes());\n+ $this->assertSame($expectToPass, $v->passes(), 'Expected email input '.$value.' to '.($expectToPass ? 'pass' : 'fail').'.');", - "comment_created_at": "2025-01-17T15:42:51+00:00", - "comment_author": "shaedrich", - "comment_body": "Nice, never used it but looks neat \ud83d\udc4d\ud83c\udffb ", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1919809801", - "pr_number": 54226, - "pr_file": "tests/Validation/ValidationEmailRuleTest.php", - "created_at": "2025-01-17T09:27:32+00:00", - "commented_code": "$this->assertValidationRules($rule, $values, true);\n }\n\n public function testStrict()\n public function testRfcCompliantStrict()\n {\n $emailThatFailsBothNonStrictButFailsInStrict = 'username@sub..example.com';\n $emailThatPassesNonStrictButFailsInStrict = '\"has space\"@example.com';\n $emailThatPassesBothNonStrictAndInStrict = 'plainaddress@example.com';\n\n $this->fails(\n (new Email())->rfcCompliant(true),\n 'invalid.@example.com',\n ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.'],\n (new Email())->rfcCompliant(strict: true),\n $emailThatPassesNonStrictButFailsInStrict,\n ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.']\n );\n\n $this->fails(\n Rule::email()->rfcCompliant(true),\n 'invalid.@example.com',\n ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.'],\n Rule::email()->rfcCompliant(strict: true),\n $emailThatPassesNonStrictButFailsInStrict,\n ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.']\n );\n\n $this->fails(\n (new Email())->rfcCompliant(true),\n 'username@sub..example.com',\n ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.'],\n (new Email())->rfcCompliant(strict: true),\n $emailThatFailsBothNonStrictButFailsInStrict,\n ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.']\n );\n\n $this->fails(\n Rule::email()->rfcCompliant(true),\n 'username@sub..example.com',\n ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.'],\n Rule::email()->rfcCompliant(strict: true),\n $emailThatFailsBothNonStrictButFailsInStrict,\n ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.']\n );\n\n $this->passes(\n (new Email())->rfcCompliant(true),\n 'plainaddress@example.com',\n (new Email())->rfcCompliant(strict: true),\n $emailThatPassesBothNonStrictAndInStrict\n );\n\n $this->passes(\n Rule::email()->rfcCompliant(true),\n 'plainaddress@example.com',\n Rule::email()->rfcCompliant(strict: true),\n $emailThatPassesBothNonStrictAndInStrict\n );\n }\n\n public function testDns()\n public function testValidateMxRecord()\n {\n $this->fails(\n (new Email())->validateMxRecord(),\n 'plainaddress@example.com',\n ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.'],\n ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.']\n );\n\n $this->fails(\n Rule::email()->validateMxRecord(),\n 'plainaddress@example.com',\n ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.'],\n ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.']\n );\n\n $this->passes(\n (new Email())->validateMxRecord(),\n 'taylor@laravel.com',\n 'taylor@laravel.com'\n );\n\n $this->passes(\n Rule::email()->validateMxRecord(),\n 'taylor@laravel.com',\n 'taylor@laravel.com'\n );\n }\n\n public function testSpoof()\n public function testPreventSpoofing()\n {\n $this->fails(\n (new Email())->preventSpoofing(),\n 'admin@exam\u0440le.com',// Contains a Cyrillic '\u0440' (U+0440), not a Latin 'p'\n ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.'],\n ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.']\n );\n\n $this->fails(\n Rule::email()->preventSpoofing(),\n 'admin@exam\u0440le.com',// Contains a Cyrillic '\u0440' (U+0440), not a Latin 'p'\n ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.'],\n ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.']\n );\n\n $spoofingEmail = 'admin@exam'.\"\\u{0440}\".'le.com';\n $this->fails(\n (new Email())->preventSpoofing(),\n $spoofingEmail,\n ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.'],\n ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.']\n );\n\n $this->fails(\n Rule::email()->preventSpoofing(),\n $spoofingEmail,\n ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.'],\n ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.']\n );\n\n $this->passes(\n (new Email())->preventSpoofing(),\n 'admin@example.com',\n 'admin@example.com'\n );\n\n $this->passes(\n Rule::email()->preventSpoofing(),\n 'admin@example.com',\n 'admin@example.com'\n );\n\n $this->passes(\n (new Email())->preventSpoofing(),\n 'test\ud83d\udc68\u200d\ud83d\udcbb@domain.com',\n 'test\ud83d\udc68\u200d\ud83d\udcbb@domain.com'\n );\n\n $this->passes(\n Rule::email()->preventSpoofing(),\n 'test\ud83d\udc68\u200d\ud83d\udcbb@domain.com',\n 'test\ud83d\udc68\u200d\ud83d\udcbb@domain.com'\n );\n }\n\n public function testFilter()\n public function testWithNativeValidation()\n {\n $this->fails(\n (new Email())->withNativeValidation(),\n 't\u00e9st@domain.com',\n ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.'],\n ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.']\n );\n\n $this->fails(\n Rule::email()->withNativeValidation(),\n 't\u00e9st@domain.com',\n ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.'],\n ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.']\n );\n\n $this->passes(\n (new Email())->withNativeValidation(),\n 'admin@example.com',\n 'admin@example.com'\n );\n\n $this->passes(\n Rule::email()->withNativeValidation(),\n 'admin@example.com',\n 'admin@example.com'\n );\n }\n\n public function testFilterUnicode()\n public function testWithNativeValidationAllowUnicode()\n {\n $this->fails(\n (new Email())->withNativeValidation(true),\n (new Email())->withNativeValidation(allowUnicode: true),\n 'invalid.@example.com',\n ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.'],\n ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.']\n );\n\n $this->fails(\n Rule::email()->withNativeValidation(true),\n Rule::email()->withNativeValidation(allowUnicode: true),\n 'invalid.@example.com',\n ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.'],\n ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.']\n );\n\n $this->passes(\n (new Email())->withNativeValidation(true),\n 't\u00e9st@domain.com',\n (new Email())->withNativeValidation(allowUnicode: true),\n 't\u00e9st@domain.com'\n );\n\n $this->passes(\n Rule::email()->withNativeValidation(true),\n 't\u00e9st@domain.com',\n Rule::email()->withNativeValidation(allowUnicode: true),\n 't\u00e9st@domain.com'\n );\n\n $this->passes(\n (new Email())->withNativeValidation(true),\n 'admin@example.com',\n (new Email())->withNativeValidation(allowUnicode: true),\n 'admin@example.com'\n );\n\n $this->passes(\n Rule::email()->withNativeValidation(true),\n 'admin@example.com',\n Rule::email()->withNativeValidation(allowUnicode: true),\n 'admin@example.com'\n );\n }\n\n public function testRfc()\n public function testRfcCompliantNonStrict()\n {\n $this->fails(\n (new Email())->rfcCompliant(),\n 'invalid.@example.com',\n ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.'],\n ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.']\n );\n\n $this->fails(\n Rule::email()->rfcCompliant(),\n 'invalid.@example.com',\n ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.'],\n ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.']\n );\n\n $this->fails(\n (new Email())->rfcCompliant(),\n 'test\ud83d\udc68\u200d\ud83d\udcbb@domain.com',\n ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.'],\n ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.']\n );\n\n $this->fails(\n Rule::email()->rfcCompliant(),\n 'test\ud83d\udc68\u200d\ud83d\udcbb@domain.com',\n ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.'],\n ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.']\n );\n\n $this->passes(\n (new Email())->rfcCompliant(),\n 'admin@example.com',\n 'admin@example.com'\n );\n\n $this->passes(\n Rule::email()->rfcCompliant(),\n 'admin@example.com',\n 'admin@example.com'\n );\n\n $this->passes(\n (new Email())->rfcCompliant(),\n 't\u00e9st@domain.com',\n 't\u00e9st@domain.com'\n );\n\n $this->passes(\n Rule::email()->rfcCompliant(),\n 't\u00e9st@domain.com',\n 't\u00e9st@domain.com'\n );\n }\n\n public function testPassesRfcCompliantButNotRfcCompliantStrict()\n {\n $emailsThatPassOnRfcCompliantButFailOnStrict = [\n '\"has space\"@example.com', // Quoted local part with space\n 'some(comment)@example.com', // Comment in local part\n 'abc.\"test\"@example.com', // Mixed quoted/unquoted local part\n '\"escaped\\\\\\\"quote\"@example.com', // Escaped quote inside quoted local part\n 'test@example', // Domain without TLD\n 'test@localhost', // Domain without TLD\n 'name@[127.0.0.1]', // Local-part with domain-literal IPv4 address\n 'user@[IPv6:::1]', // Domain-literal with unusual IPv6 short form\n 'a@[IPv6:2001:db8::1]', // Domain-literal with normal IPv6\n 'user@[IPv6:::]', // invalid shorthand IPv6\n '\"ab\\\\(c\"@example.com',\n ];", - "repo_full_name": "laravel/framework", - "discussion_comments": [ - { - "comment_id": "1919809801", - "repo_full_name": "laravel/framework", - "pr_number": 54226, - "pr_file": "tests/Validation/ValidationEmailRuleTest.php", - "discussion_id": "1919809801", - "commented_code": "@@ -99,291 +101,692 @@ protected function passes($rule, $values)\n $this->assertValidationRules($rule, $values, true);\n }\n \n- public function testStrict()\n+ public function testRfcCompliantStrict()\n {\n+ $emailThatFailsBothNonStrictButFailsInStrict = 'username@sub..example.com';\n+ $emailThatPassesNonStrictButFailsInStrict = '\"has space\"@example.com';\n+ $emailThatPassesBothNonStrictAndInStrict = 'plainaddress@example.com';\n+\n $this->fails(\n- (new Email())->rfcCompliant(true),\n- 'invalid.@example.com',\n- ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.'],\n+ (new Email())->rfcCompliant(strict: true),\n+ $emailThatPassesNonStrictButFailsInStrict,\n+ ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.']\n );\n \n $this->fails(\n- Rule::email()->rfcCompliant(true),\n- 'invalid.@example.com',\n- ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.'],\n+ Rule::email()->rfcCompliant(strict: true),\n+ $emailThatPassesNonStrictButFailsInStrict,\n+ ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.']\n );\n \n $this->fails(\n- (new Email())->rfcCompliant(true),\n- 'username@sub..example.com',\n- ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.'],\n+ (new Email())->rfcCompliant(strict: true),\n+ $emailThatFailsBothNonStrictButFailsInStrict,\n+ ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.']\n );\n \n $this->fails(\n- Rule::email()->rfcCompliant(true),\n- 'username@sub..example.com',\n- ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.'],\n+ Rule::email()->rfcCompliant(strict: true),\n+ $emailThatFailsBothNonStrictButFailsInStrict,\n+ ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.']\n );\n \n $this->passes(\n- (new Email())->rfcCompliant(true),\n- 'plainaddress@example.com',\n+ (new Email())->rfcCompliant(strict: true),\n+ $emailThatPassesBothNonStrictAndInStrict\n );\n \n $this->passes(\n- Rule::email()->rfcCompliant(true),\n- 'plainaddress@example.com',\n+ Rule::email()->rfcCompliant(strict: true),\n+ $emailThatPassesBothNonStrictAndInStrict\n );\n }\n \n- public function testDns()\n+ public function testValidateMxRecord()\n {\n $this->fails(\n (new Email())->validateMxRecord(),\n 'plainaddress@example.com',\n- ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.'],\n+ ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.']\n );\n \n $this->fails(\n Rule::email()->validateMxRecord(),\n 'plainaddress@example.com',\n- ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.'],\n+ ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.']\n );\n \n $this->passes(\n (new Email())->validateMxRecord(),\n- 'taylor@laravel.com',\n+ 'taylor@laravel.com'\n );\n \n $this->passes(\n Rule::email()->validateMxRecord(),\n- 'taylor@laravel.com',\n+ 'taylor@laravel.com'\n );\n }\n \n- public function testSpoof()\n+ public function testPreventSpoofing()\n {\n $this->fails(\n (new Email())->preventSpoofing(),\n 'admin@exam\u0440le.com',// Contains a Cyrillic '\u0440' (U+0440), not a Latin 'p'\n- ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.'],\n+ ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.']\n );\n \n $this->fails(\n Rule::email()->preventSpoofing(),\n 'admin@exam\u0440le.com',// Contains a Cyrillic '\u0440' (U+0440), not a Latin 'p'\n- ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.'],\n+ ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.']\n );\n \n $spoofingEmail = 'admin@exam'.\"\\u{0440}\".'le.com';\n $this->fails(\n (new Email())->preventSpoofing(),\n $spoofingEmail,\n- ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.'],\n+ ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.']\n );\n \n $this->fails(\n Rule::email()->preventSpoofing(),\n $spoofingEmail,\n- ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.'],\n+ ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.']\n );\n \n $this->passes(\n (new Email())->preventSpoofing(),\n- 'admin@example.com',\n+ 'admin@example.com'\n );\n \n $this->passes(\n Rule::email()->preventSpoofing(),\n- 'admin@example.com',\n+ 'admin@example.com'\n );\n \n $this->passes(\n (new Email())->preventSpoofing(),\n- 'test\ud83d\udc68\u200d\ud83d\udcbb@domain.com',\n+ 'test\ud83d\udc68\u200d\ud83d\udcbb@domain.com'\n );\n \n $this->passes(\n Rule::email()->preventSpoofing(),\n- 'test\ud83d\udc68\u200d\ud83d\udcbb@domain.com',\n+ 'test\ud83d\udc68\u200d\ud83d\udcbb@domain.com'\n );\n }\n \n- public function testFilter()\n+ public function testWithNativeValidation()\n {\n $this->fails(\n (new Email())->withNativeValidation(),\n 't\u00e9st@domain.com',\n- ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.'],\n+ ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.']\n );\n \n $this->fails(\n Rule::email()->withNativeValidation(),\n 't\u00e9st@domain.com',\n- ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.'],\n+ ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.']\n );\n \n $this->passes(\n (new Email())->withNativeValidation(),\n- 'admin@example.com',\n+ 'admin@example.com'\n );\n \n $this->passes(\n Rule::email()->withNativeValidation(),\n- 'admin@example.com',\n+ 'admin@example.com'\n );\n }\n \n- public function testFilterUnicode()\n+ public function testWithNativeValidationAllowUnicode()\n {\n $this->fails(\n- (new Email())->withNativeValidation(true),\n+ (new Email())->withNativeValidation(allowUnicode: true),\n 'invalid.@example.com',\n- ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.'],\n+ ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.']\n );\n \n $this->fails(\n- Rule::email()->withNativeValidation(true),\n+ Rule::email()->withNativeValidation(allowUnicode: true),\n 'invalid.@example.com',\n- ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.'],\n+ ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.']\n );\n \n $this->passes(\n- (new Email())->withNativeValidation(true),\n- 't\u00e9st@domain.com',\n+ (new Email())->withNativeValidation(allowUnicode: true),\n+ 't\u00e9st@domain.com'\n );\n \n $this->passes(\n- Rule::email()->withNativeValidation(true),\n- 't\u00e9st@domain.com',\n+ Rule::email()->withNativeValidation(allowUnicode: true),\n+ 't\u00e9st@domain.com'\n );\n \n $this->passes(\n- (new Email())->withNativeValidation(true),\n- 'admin@example.com',\n+ (new Email())->withNativeValidation(allowUnicode: true),\n+ 'admin@example.com'\n );\n \n $this->passes(\n- Rule::email()->withNativeValidation(true),\n- 'admin@example.com',\n+ Rule::email()->withNativeValidation(allowUnicode: true),\n+ 'admin@example.com'\n );\n }\n \n- public function testRfc()\n+ public function testRfcCompliantNonStrict()\n {\n $this->fails(\n (new Email())->rfcCompliant(),\n 'invalid.@example.com',\n- ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.'],\n+ ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.']\n );\n \n $this->fails(\n Rule::email()->rfcCompliant(),\n 'invalid.@example.com',\n- ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.'],\n+ ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.']\n );\n \n $this->fails(\n (new Email())->rfcCompliant(),\n 'test\ud83d\udc68\u200d\ud83d\udcbb@domain.com',\n- ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.'],\n+ ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.']\n );\n \n $this->fails(\n Rule::email()->rfcCompliant(),\n 'test\ud83d\udc68\u200d\ud83d\udcbb@domain.com',\n- ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.'],\n+ ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.']\n );\n \n $this->passes(\n (new Email())->rfcCompliant(),\n- 'admin@example.com',\n+ 'admin@example.com'\n );\n \n $this->passes(\n Rule::email()->rfcCompliant(),\n- 'admin@example.com',\n+ 'admin@example.com'\n );\n \n $this->passes(\n (new Email())->rfcCompliant(),\n- 't\u00e9st@domain.com',\n+ 't\u00e9st@domain.com'\n );\n \n $this->passes(\n Rule::email()->rfcCompliant(),\n- 't\u00e9st@domain.com',\n+ 't\u00e9st@domain.com'\n );\n }\n \n+ public function testPassesRfcCompliantButNotRfcCompliantStrict()\n+ {\n+ $emailsThatPassOnRfcCompliantButFailOnStrict = [\n+ '\"has space\"@example.com', // Quoted local part with space\n+ 'some(comment)@example.com', // Comment in local part\n+ 'abc.\"test\"@example.com', // Mixed quoted/unquoted local part\n+ '\"escaped\\\\\\\"quote\"@example.com', // Escaped quote inside quoted local part\n+ 'test@example', // Domain without TLD\n+ 'test@localhost', // Domain without TLD\n+ 'name@[127.0.0.1]', // Local-part with domain-literal IPv4 address\n+ 'user@[IPv6:::1]', // Domain-literal with unusual IPv6 short form\n+ 'a@[IPv6:2001:db8::1]', // Domain-literal with normal IPv6\n+ 'user@[IPv6:::]', // invalid shorthand IPv6\n+ '\"ab\\\\(c\"@example.com',\n+ ];", - "comment_created_at": "2025-01-17T09:27:32+00:00", - "comment_author": "shaedrich", - "comment_body": "same here", - "pr_file_module": null - }, - { - "comment_id": "1919811653", - "repo_full_name": "laravel/framework", - "pr_number": 54226, - "pr_file": "tests/Validation/ValidationEmailRuleTest.php", - "discussion_id": "1919809801", - "commented_code": "@@ -99,291 +101,692 @@ protected function passes($rule, $values)\n $this->assertValidationRules($rule, $values, true);\n }\n \n- public function testStrict()\n+ public function testRfcCompliantStrict()\n {\n+ $emailThatFailsBothNonStrictButFailsInStrict = 'username@sub..example.com';\n+ $emailThatPassesNonStrictButFailsInStrict = '\"has space\"@example.com';\n+ $emailThatPassesBothNonStrictAndInStrict = 'plainaddress@example.com';\n+\n $this->fails(\n- (new Email())->rfcCompliant(true),\n- 'invalid.@example.com',\n- ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.'],\n+ (new Email())->rfcCompliant(strict: true),\n+ $emailThatPassesNonStrictButFailsInStrict,\n+ ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.']\n );\n \n $this->fails(\n- Rule::email()->rfcCompliant(true),\n- 'invalid.@example.com',\n- ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.'],\n+ Rule::email()->rfcCompliant(strict: true),\n+ $emailThatPassesNonStrictButFailsInStrict,\n+ ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.']\n );\n \n $this->fails(\n- (new Email())->rfcCompliant(true),\n- 'username@sub..example.com',\n- ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.'],\n+ (new Email())->rfcCompliant(strict: true),\n+ $emailThatFailsBothNonStrictButFailsInStrict,\n+ ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.']\n );\n \n $this->fails(\n- Rule::email()->rfcCompliant(true),\n- 'username@sub..example.com',\n- ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.'],\n+ Rule::email()->rfcCompliant(strict: true),\n+ $emailThatFailsBothNonStrictButFailsInStrict,\n+ ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.']\n );\n \n $this->passes(\n- (new Email())->rfcCompliant(true),\n- 'plainaddress@example.com',\n+ (new Email())->rfcCompliant(strict: true),\n+ $emailThatPassesBothNonStrictAndInStrict\n );\n \n $this->passes(\n- Rule::email()->rfcCompliant(true),\n- 'plainaddress@example.com',\n+ Rule::email()->rfcCompliant(strict: true),\n+ $emailThatPassesBothNonStrictAndInStrict\n );\n }\n \n- public function testDns()\n+ public function testValidateMxRecord()\n {\n $this->fails(\n (new Email())->validateMxRecord(),\n 'plainaddress@example.com',\n- ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.'],\n+ ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.']\n );\n \n $this->fails(\n Rule::email()->validateMxRecord(),\n 'plainaddress@example.com',\n- ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.'],\n+ ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.']\n );\n \n $this->passes(\n (new Email())->validateMxRecord(),\n- 'taylor@laravel.com',\n+ 'taylor@laravel.com'\n );\n \n $this->passes(\n Rule::email()->validateMxRecord(),\n- 'taylor@laravel.com',\n+ 'taylor@laravel.com'\n );\n }\n \n- public function testSpoof()\n+ public function testPreventSpoofing()\n {\n $this->fails(\n (new Email())->preventSpoofing(),\n 'admin@exam\u0440le.com',// Contains a Cyrillic '\u0440' (U+0440), not a Latin 'p'\n- ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.'],\n+ ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.']\n );\n \n $this->fails(\n Rule::email()->preventSpoofing(),\n 'admin@exam\u0440le.com',// Contains a Cyrillic '\u0440' (U+0440), not a Latin 'p'\n- ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.'],\n+ ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.']\n );\n \n $spoofingEmail = 'admin@exam'.\"\\u{0440}\".'le.com';\n $this->fails(\n (new Email())->preventSpoofing(),\n $spoofingEmail,\n- ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.'],\n+ ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.']\n );\n \n $this->fails(\n Rule::email()->preventSpoofing(),\n $spoofingEmail,\n- ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.'],\n+ ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.']\n );\n \n $this->passes(\n (new Email())->preventSpoofing(),\n- 'admin@example.com',\n+ 'admin@example.com'\n );\n \n $this->passes(\n Rule::email()->preventSpoofing(),\n- 'admin@example.com',\n+ 'admin@example.com'\n );\n \n $this->passes(\n (new Email())->preventSpoofing(),\n- 'test\ud83d\udc68\u200d\ud83d\udcbb@domain.com',\n+ 'test\ud83d\udc68\u200d\ud83d\udcbb@domain.com'\n );\n \n $this->passes(\n Rule::email()->preventSpoofing(),\n- 'test\ud83d\udc68\u200d\ud83d\udcbb@domain.com',\n+ 'test\ud83d\udc68\u200d\ud83d\udcbb@domain.com'\n );\n }\n \n- public function testFilter()\n+ public function testWithNativeValidation()\n {\n $this->fails(\n (new Email())->withNativeValidation(),\n 't\u00e9st@domain.com',\n- ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.'],\n+ ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.']\n );\n \n $this->fails(\n Rule::email()->withNativeValidation(),\n 't\u00e9st@domain.com',\n- ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.'],\n+ ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.']\n );\n \n $this->passes(\n (new Email())->withNativeValidation(),\n- 'admin@example.com',\n+ 'admin@example.com'\n );\n \n $this->passes(\n Rule::email()->withNativeValidation(),\n- 'admin@example.com',\n+ 'admin@example.com'\n );\n }\n \n- public function testFilterUnicode()\n+ public function testWithNativeValidationAllowUnicode()\n {\n $this->fails(\n- (new Email())->withNativeValidation(true),\n+ (new Email())->withNativeValidation(allowUnicode: true),\n 'invalid.@example.com',\n- ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.'],\n+ ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.']\n );\n \n $this->fails(\n- Rule::email()->withNativeValidation(true),\n+ Rule::email()->withNativeValidation(allowUnicode: true),\n 'invalid.@example.com',\n- ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.'],\n+ ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.']\n );\n \n $this->passes(\n- (new Email())->withNativeValidation(true),\n- 't\u00e9st@domain.com',\n+ (new Email())->withNativeValidation(allowUnicode: true),\n+ 't\u00e9st@domain.com'\n );\n \n $this->passes(\n- Rule::email()->withNativeValidation(true),\n- 't\u00e9st@domain.com',\n+ Rule::email()->withNativeValidation(allowUnicode: true),\n+ 't\u00e9st@domain.com'\n );\n \n $this->passes(\n- (new Email())->withNativeValidation(true),\n- 'admin@example.com',\n+ (new Email())->withNativeValidation(allowUnicode: true),\n+ 'admin@example.com'\n );\n \n $this->passes(\n- Rule::email()->withNativeValidation(true),\n- 'admin@example.com',\n+ Rule::email()->withNativeValidation(allowUnicode: true),\n+ 'admin@example.com'\n );\n }\n \n- public function testRfc()\n+ public function testRfcCompliantNonStrict()\n {\n $this->fails(\n (new Email())->rfcCompliant(),\n 'invalid.@example.com',\n- ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.'],\n+ ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.']\n );\n \n $this->fails(\n Rule::email()->rfcCompliant(),\n 'invalid.@example.com',\n- ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.'],\n+ ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.']\n );\n \n $this->fails(\n (new Email())->rfcCompliant(),\n 'test\ud83d\udc68\u200d\ud83d\udcbb@domain.com',\n- ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.'],\n+ ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.']\n );\n \n $this->fails(\n Rule::email()->rfcCompliant(),\n 'test\ud83d\udc68\u200d\ud83d\udcbb@domain.com',\n- ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.'],\n+ ['The '.self::ATTRIBUTE_REPLACED.' must be a valid email address.']\n );\n \n $this->passes(\n (new Email())->rfcCompliant(),\n- 'admin@example.com',\n+ 'admin@example.com'\n );\n \n $this->passes(\n Rule::email()->rfcCompliant(),\n- 'admin@example.com',\n+ 'admin@example.com'\n );\n \n $this->passes(\n (new Email())->rfcCompliant(),\n- 't\u00e9st@domain.com',\n+ 't\u00e9st@domain.com'\n );\n \n $this->passes(\n Rule::email()->rfcCompliant(),\n- 't\u00e9st@domain.com',\n+ 't\u00e9st@domain.com'\n );\n }\n \n+ public function testPassesRfcCompliantButNotRfcCompliantStrict()\n+ {\n+ $emailsThatPassOnRfcCompliantButFailOnStrict = [\n+ '\"has space\"@example.com', // Quoted local part with space\n+ 'some(comment)@example.com', // Comment in local part\n+ 'abc.\"test\"@example.com', // Mixed quoted/unquoted local part\n+ '\"escaped\\\\\\\"quote\"@example.com', // Escaped quote inside quoted local part\n+ 'test@example', // Domain without TLD\n+ 'test@localhost', // Domain without TLD\n+ 'name@[127.0.0.1]', // Local-part with domain-literal IPv4 address\n+ 'user@[IPv6:::1]', // Domain-literal with unusual IPv6 short form\n+ 'a@[IPv6:2001:db8::1]', // Domain-literal with normal IPv6\n+ 'user@[IPv6:::]', // invalid shorthand IPv6\n+ '\"ab\\\\(c\"@example.com',\n+ ];", - "comment_created_at": "2025-01-17T09:28:34+00:00", - "comment_author": "shaedrich", - "comment_body": "All of these arrays could alternatively be put into fixture files \ud83e\udd14 ", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1639068204", - "pr_number": 51799, - "pr_file": "tests/Http/HttpClientTest.php", - "created_at": "2024-06-14T00:18:24+00:00", - "commented_code": "});\n }\n\n public function testWithBaseUrl()\n /**\n * @dataProvider baseUriWithRefProvider\n */", - "repo_full_name": "laravel/framework", - "discussion_comments": [ - { - "comment_id": "1639068204", - "repo_full_name": "laravel/framework", - "pr_number": 51799, - "pr_file": "tests/Http/HttpClientTest.php", - "discussion_id": "1639068204", - "commented_code": "@@ -985,23 +985,62 @@ public function testGetWithArrayQueryParamEncodes()\n });\n }\n \n- public function testWithBaseUrl()\n+ /**\n+ * @dataProvider baseUriWithRefProvider\n+ */", - "comment_created_at": "2024-06-14T00:18:24+00:00", - "comment_author": "crynobone", - "comment_body": "Please use PHPUnit `DataProvider` attribute over `@dataProvider` annotation.", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/framework-use-semantic-exceptions.json b/_reviewers/framework-use-semantic-exceptions.json new file mode 100644 index 0000000..2f69c01 --- /dev/null +++ b/_reviewers/framework-use-semantic-exceptions.json @@ -0,0 +1,310 @@ +[ + { + "discussion_id": "2080817660", + "pr_number": 55685, + "pr_file": "src/Illuminate/Database/Eloquent/Model.php", + "created_at": "2025-05-09T02:51:43+00:00", + "commented_code": "protected function bootIfNotBooted()\n {\n if (! isset(static::$booted[static::class])) {\n static::$booted[static::class] = true;\n if (isset(static::$booting[static::class])) {\n throw new Exception('\"'.__METHOD__.'\" cannot be called on the \"'.static::class.'\" class while it is already being booted.');", + "repo_full_name": "laravel/framework", + "discussion_comments": [ + { + "comment_id": "2080817660", + "repo_full_name": "laravel/framework", + "pr_number": 55685, + "pr_file": "src/Illuminate/Database/Eloquent/Model.php", + "discussion_id": "2080817660", + "commented_code": "@@ -286,12 +292,20 @@ public function __construct(array $attributes = [])\n protected function bootIfNotBooted()\n {\n if (! isset(static::$booted[static::class])) {\n- static::$booted[static::class] = true;\n+ if (isset(static::$booting[static::class])) {\n+ throw new Exception('\"'.__METHOD__.'\" cannot be called on the \"'.static::class.'\" class while it is already being booted.');", + "comment_created_at": "2025-05-09T02:51:43+00:00", + "comment_author": "rodrigopedra", + "comment_body": "Shouldn't it be a `LogicException`?\r\n\r\n> Exception that represents error in the program logic. This kind of exception should lead directly to a fix in your code. \r\n\r\nhttps://www.php.net/manual/en/class.logicexception.php", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2009314457", + "pr_number": 55107, + "pr_file": "src/Illuminate/Database/Eloquent/Collection.php", + "created_at": "2025-03-24T00:28:42+00:00", + "commented_code": "return $model->newModelQuery()->whereKey($this->modelKeys());\n }\n\n /**\n * Create a new resource collection instance for the given resource.\n *\n * @param class-string|null $resourceClass\n * @return ResourceCollection\n */\n public function toResourceCollection(?string $resourceClass = null): ResourceCollection\n {\n if ($resourceClass === null) {\n return $this->guessResourceCollection();\n }\n\n return $resourceClass::collection($this);\n }\n\n /**\n * Guess the resource collection for the items.\n *\n * @return ResourceCollection\n */\n protected function guessResourceCollection(): ResourceCollection\n {\n if ($this->isEmpty()) {\n return new ResourceCollection($this);\n }\n\n $model = $this->first();\n\n assert(is_object($model), 'Resource collection guesser expects the collection to contain objects.');", + "repo_full_name": "laravel/framework", + "discussion_comments": [ + { + "comment_id": "2009314457", + "repo_full_name": "laravel/framework", + "pr_number": 55107, + "pr_file": "src/Illuminate/Database/Eloquent/Collection.php", + "discussion_id": "2009314457", + "commented_code": "@@ -843,4 +845,44 @@ public function toQuery()\n \n return $model->newModelQuery()->whereKey($this->modelKeys());\n }\n+\n+ /**\n+ * Create a new resource collection instance for the given resource.\n+ *\n+ * @param class-string|null $resourceClass\n+ * @return ResourceCollection\n+ */\n+ public function toResourceCollection(?string $resourceClass = null): ResourceCollection\n+ {\n+ if ($resourceClass === null) {\n+ return $this->guessResourceCollection();\n+ }\n+\n+ return $resourceClass::collection($this);\n+ }\n+\n+ /**\n+ * Guess the resource collection for the items.\n+ *\n+ * @return ResourceCollection\n+ */\n+ protected function guessResourceCollection(): ResourceCollection\n+ {\n+ if ($this->isEmpty()) {\n+ return new ResourceCollection($this);\n+ }\n+\n+ $model = $this->first();\n+\n+ assert(is_object($model), 'Resource collection guesser expects the collection to contain objects.');", + "comment_created_at": "2025-03-24T00:28:42+00:00", + "comment_author": "mohammadrasoulasghari", + "comment_body": "Using assertions in production code is risky as they may be disabled in production environments (zend.assertions=-1). When assertions fail in this state, no proper errors are thrown, leading to silent failures or unexpected behavior. Better to use explicit exceptions with meaningful error messages for reliable error handling.\r\n\r\nhttps://www.php.net/manual/en/ini.core.php#ini.zend.assertions\r\n\r\nsee : \\Illuminate\\Database\\Eloquent\\Collection::toQuery", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1973796962", + "pr_number": 54824, + "pr_file": "src/Illuminate/Validation/InvalidSoftDeleteQueryException.php", + "created_at": "2025-02-27T15:13:30+00:00", + "commented_code": " $maxLength) {\n return 0;", + "repo_full_name": "laravel/framework", + "discussion_comments": [ + { + "comment_id": "1898695609", + "repo_full_name": "laravel/framework", + "pr_number": 54027, + "pr_file": "src/Illuminate/Support/Number.php", + "discussion_id": "1898695609", + "commented_code": "@@ -367,6 +367,30 @@ public static function defaultCurrency()\n return static::$currency;\n }\n \n+ /**\n+ * Generate a random number of the given length.\n+ *\n+ * @param int $length\n+ * @return int\n+ */\n+ public static function random(int $length = 6): int\n+ {\n+ $maxLength = strlen((string) PHP_INT_MAX);\n+\n+ if ($length < 1 || $length > $maxLength) {\n+ return 0;", + "comment_created_at": "2024-12-27T20:05:40+00:00", + "comment_author": "shaedrich", + "comment_body": "Is `0` helpful here? If I can't generate a _random_ integer, I'd expect to get an exception thrown (maybe [`Random\\RandomException`](https://www.php.net/manual/en/class.random-randomexception.php) or [`Random\\RandomError`](https://www.php.net/manual/en/class.random-randomerror.php) 🤔)", + "pr_file_module": null + }, + { + "comment_id": "1898701582", + "repo_full_name": "laravel/framework", + "pr_number": 54027, + "pr_file": "src/Illuminate/Support/Number.php", + "discussion_id": "1898695609", + "commented_code": "@@ -367,6 +367,30 @@ public static function defaultCurrency()\n return static::$currency;\n }\n \n+ /**\n+ * Generate a random number of the given length.\n+ *\n+ * @param int $length\n+ * @return int\n+ */\n+ public static function random(int $length = 6): int\n+ {\n+ $maxLength = strlen((string) PHP_INT_MAX);\n+\n+ if ($length < 1 || $length > $maxLength) {\n+ return 0;", + "comment_created_at": "2024-12-27T20:24:48+00:00", + "comment_author": "shaedrich", + "comment_body": "Also, doesn't `random_int()` handle validation itself?", + "pr_file_module": null + }, + { + "comment_id": "1900456930", + "repo_full_name": "laravel/framework", + "pr_number": 54027, + "pr_file": "src/Illuminate/Support/Number.php", + "discussion_id": "1898695609", + "commented_code": "@@ -367,6 +367,30 @@ public static function defaultCurrency()\n return static::$currency;\n }\n \n+ /**\n+ * Generate a random number of the given length.\n+ *\n+ * @param int $length\n+ * @return int\n+ */\n+ public static function random(int $length = 6): int\n+ {\n+ $maxLength = strlen((string) PHP_INT_MAX);\n+\n+ if ($length < 1 || $length > $maxLength) {\n+ return 0;", + "comment_created_at": "2025-01-01T20:30:17+00:00", + "comment_author": "dilovanmatini", + "comment_body": "I hesitated to get an exception or just return 0. Then I checked Str:random it uses `random_int()` without throwing any exception. An that is why I chose to return 0.", + "pr_file_module": null + }, + { + "comment_id": "1900457686", + "repo_full_name": "laravel/framework", + "pr_number": 54027, + "pr_file": "src/Illuminate/Support/Number.php", + "discussion_id": "1898695609", + "commented_code": "@@ -367,6 +367,30 @@ public static function defaultCurrency()\n return static::$currency;\n }\n \n+ /**\n+ * Generate a random number of the given length.\n+ *\n+ * @param int $length\n+ * @return int\n+ */\n+ public static function random(int $length = 6): int\n+ {\n+ $maxLength = strlen((string) PHP_INT_MAX);\n+\n+ if ($length < 1 || $length > $maxLength) {\n+ return 0;", + "comment_created_at": "2025-01-01T20:36:04+00:00", + "comment_author": "shaedrich", + "comment_body": "If a function doesn't throw an exception, I'd at least expect it to return `null` or `false`, since `0` is a number but it's not random, so this can be quite confusing", + "pr_file_module": null + }, + { + "comment_id": "1900458031", + "repo_full_name": "laravel/framework", + "pr_number": 54027, + "pr_file": "src/Illuminate/Support/Number.php", + "discussion_id": "1898695609", + "commented_code": "@@ -367,6 +367,30 @@ public static function defaultCurrency()\n return static::$currency;\n }\n \n+ /**\n+ * Generate a random number of the given length.\n+ *\n+ * @param int $length\n+ * @return int\n+ */\n+ public static function random(int $length = 6): int\n+ {\n+ $maxLength = strlen((string) PHP_INT_MAX);\n+\n+ if ($length < 1 || $length > $maxLength) {\n+ return 0;", + "comment_created_at": "2025-01-01T20:39:21+00:00", + "comment_author": "dilovanmatini", + "comment_body": "That one is also possible 👍", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1898945139", + "pr_number": 54033, + "pr_file": "src/Illuminate/Database/Eloquent/Concerns/HasUniqueStringIds.php", + "created_at": "2024-12-28T15:52:23+00:00", + "commented_code": "*/\n protected function handleInvalidUniqueId($value, $field)\n {\n throw (new ModelNotFoundException)->setModel(get_class($this), $value);\n throw (new ModelNotFoundException)->setModel(get_class($this), $value)->setStatus(422);", + "repo_full_name": "laravel/framework", + "discussion_comments": [ + { + "comment_id": "1898945139", + "repo_full_name": "laravel/framework", + "pr_number": 54033, + "pr_file": "src/Illuminate/Database/Eloquent/Concerns/HasUniqueStringIds.php", + "discussion_id": "1898945139", + "commented_code": "@@ -103,6 +103,6 @@ public function getIncrementing()\n */\n protected function handleInvalidUniqueId($value, $field)\n {\n- throw (new ModelNotFoundException)->setModel(get_class($this), $value);\n+ throw (new ModelNotFoundException)->setModel(get_class($this), $value)->setStatus(422);", + "comment_created_at": "2024-12-28T15:52:23+00:00", + "comment_author": "shaedrich", + "comment_body": "Wouldn't it make more sense to throw a different exception (like [`\\Ramsey\\Uuid\\Exception\\InvalidUuidStringException`](https://github.com/ramsey/uuid/blob/4.x/src/Exception/InvalidUuidStringException.php)) as you suggested?\r\n\r\n> Create an entirely new exception for this case. I'd be happy to pivot to this if it makes more sense for the framework.\r\n\r\nI mean, the whole point is, that it's not about that the model isn't found.\r\n\r\n> Throw a ValidationException instead. Something like ValidationException::withMessages([$field => '???']). I'm not sure what the message should be in that case.\r\n\r\nI don't think, we should mix the validation functionality of the framework with the rest of the framework. A dedicated exception like in your first suggestion sounds better", + "pr_file_module": null + }, + { + "comment_id": "1899151726", + "repo_full_name": "laravel/framework", + "pr_number": 54033, + "pr_file": "src/Illuminate/Database/Eloquent/Concerns/HasUniqueStringIds.php", + "discussion_id": "1898945139", + "commented_code": "@@ -103,6 +103,6 @@ public function getIncrementing()\n */\n protected function handleInvalidUniqueId($value, $field)\n {\n- throw (new ModelNotFoundException)->setModel(get_class($this), $value);\n+ throw (new ModelNotFoundException)->setModel(get_class($this), $value)->setStatus(422);", + "comment_created_at": "2024-12-29T15:32:55+00:00", + "comment_author": "cosmastech", + "comment_body": "Sharing ModelNotFoundException doesn't seem like the right approach, I agree. Went with a new exception, `InvalidIdFormatException`.\r\n\r\nThe problem with using `InvalidUuidStringException` is that any time a user requests an endpoint with an invalid UUID, it would be reported (to Sentry, DataDog, et cetera) which isn't desirable.\r\n\r\nThe user (or the framework) could add it to the exception handler's dontReport array, but there may be times where the user does want it reported in parts of the code outside of route model binding.\r\n\r\nIt also would mean that every time a user creates their own unique string ID trait, they would have to cook up their own exception. By creating a more generic exception, it saves users from having to follow the additional steps to avoid eating up their exception reporting limits.", + "pr_file_module": null + }, + { + "comment_id": "1899153828", + "repo_full_name": "laravel/framework", + "pr_number": 54033, + "pr_file": "src/Illuminate/Database/Eloquent/Concerns/HasUniqueStringIds.php", + "discussion_id": "1898945139", + "commented_code": "@@ -103,6 +103,6 @@ public function getIncrementing()\n */\n protected function handleInvalidUniqueId($value, $field)\n {\n- throw (new ModelNotFoundException)->setModel(get_class($this), $value);\n+ throw (new ModelNotFoundException)->setModel(get_class($this), $value)->setStatus(422);", + "comment_created_at": "2024-12-29T15:45:57+00:00", + "comment_author": "shaedrich", + "comment_body": "> Went with a new exception, `InvalidIdFormatException`. The problem with using `InvalidUuidStringException` is that any time a user requests an endpoint with an invalid UUID, it would be reported (to Sentry, DataDog, et cetera) which isn't desirable.\r\n\r\nAgreed. A new exception sounds good 👍🏻 Also, this way, we are less dependent on `ramsey/uuid` (I'm remembering what chaos the introduction of the lazy classes into `ramsey/uuid` caused for some who used the package).\r\n\r\n> By creating a more generic exception, it saves users from having to follow the additional steps to avoid eating up their exception reporting limits.\r\n\r\nMy only problem now with this is that the naming is too generic for the implementation. For the implementation to match the level of genericness the name has, it would have to have some kind of property holding the format or a description of it.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1818276038", + "pr_number": 53318, + "pr_file": "src/Illuminate/Support/Number.php", + "created_at": "2024-10-28T01:53:10+00:00", + "commented_code": "*/\n protected static $currency = 'USD';\n\n\n /**\n * @param string $locale\n * @return string\n * @throws \\Exception\n */\n public static function validateLocale(string $locale): string\n {\n $availableLocales = ResourceBundle::getLocales('');\n\n if ( ! in_array($locale, $availableLocales, true)){\n throw new \\Exception(\"Locale is invalid\");", + "repo_full_name": "laravel/framework", + "discussion_comments": [ + { + "comment_id": "1818276038", + "repo_full_name": "laravel/framework", + "pr_number": 53318, + "pr_file": "src/Illuminate/Support/Number.php", + "discussion_id": "1818276038", + "commented_code": "@@ -24,6 +25,23 @@ class Number\n */\n protected static $currency = 'USD';\n \n+\n+ /**\n+ * @param string $locale\n+ * @return string\n+ * @throws \\Exception\n+ */\n+ public static function validateLocale(string $locale): string\n+ {\n+ $availableLocales = ResourceBundle::getLocales('');\n+\n+ if ( ! in_array($locale, $availableLocales, true)){\n+ throw new \\Exception(\"Locale is invalid\");", + "comment_created_at": "2024-10-28T01:53:10+00:00", + "comment_author": "crynobone", + "comment_body": "Should be `InvalidArgumentException`", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1681129407", + "pr_number": 52169, + "pr_file": "src/Illuminate/Foundation/ViteFileFromManifestNotFoundException.php", + "created_at": "2024-07-17T14:10:07+00:00", + "commented_code": " Exception that represents error in the program logic. This kind of exception should lead directly to a fix in your code. \r\n\r\nhttps://www.php.net/manual/en/class.logicexception.php", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2009314457", - "pr_number": 55107, - "pr_file": "src/Illuminate/Database/Eloquent/Collection.php", - "created_at": "2025-03-24T00:28:42+00:00", - "commented_code": "return $model->newModelQuery()->whereKey($this->modelKeys());\n }\n\n /**\n * Create a new resource collection instance for the given resource.\n *\n * @param class-string|null $resourceClass\n * @return ResourceCollection\n */\n public function toResourceCollection(?string $resourceClass = null): ResourceCollection\n {\n if ($resourceClass === null) {\n return $this->guessResourceCollection();\n }\n\n return $resourceClass::collection($this);\n }\n\n /**\n * Guess the resource collection for the items.\n *\n * @return ResourceCollection\n */\n protected function guessResourceCollection(): ResourceCollection\n {\n if ($this->isEmpty()) {\n return new ResourceCollection($this);\n }\n\n $model = $this->first();\n\n assert(is_object($model), 'Resource collection guesser expects the collection to contain objects.');", - "repo_full_name": "laravel/framework", - "discussion_comments": [ - { - "comment_id": "2009314457", - "repo_full_name": "laravel/framework", - "pr_number": 55107, - "pr_file": "src/Illuminate/Database/Eloquent/Collection.php", - "discussion_id": "2009314457", - "commented_code": "@@ -843,4 +845,44 @@ public function toQuery()\n \n return $model->newModelQuery()->whereKey($this->modelKeys());\n }\n+\n+ /**\n+ * Create a new resource collection instance for the given resource.\n+ *\n+ * @param class-string|null $resourceClass\n+ * @return ResourceCollection\n+ */\n+ public function toResourceCollection(?string $resourceClass = null): ResourceCollection\n+ {\n+ if ($resourceClass === null) {\n+ return $this->guessResourceCollection();\n+ }\n+\n+ return $resourceClass::collection($this);\n+ }\n+\n+ /**\n+ * Guess the resource collection for the items.\n+ *\n+ * @return ResourceCollection\n+ */\n+ protected function guessResourceCollection(): ResourceCollection\n+ {\n+ if ($this->isEmpty()) {\n+ return new ResourceCollection($this);\n+ }\n+\n+ $model = $this->first();\n+\n+ assert(is_object($model), 'Resource collection guesser expects the collection to contain objects.');", - "comment_created_at": "2025-03-24T00:28:42+00:00", - "comment_author": "mohammadrasoulasghari", - "comment_body": "Using assertions in production code is risky as they may be disabled in production environments (zend.assertions=-1). When assertions fail in this state, no proper errors are thrown, leading to silent failures or unexpected behavior. Better to use explicit exceptions with meaningful error messages for reliable error handling.\r\n\r\nhttps://www.php.net/manual/en/ini.core.php#ini.zend.assertions\r\n\r\nsee : \\Illuminate\\Database\\Eloquent\\Collection::toQuery", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1973796962", - "pr_number": 54824, - "pr_file": "src/Illuminate/Validation/InvalidSoftDeleteQueryException.php", - "created_at": "2025-02-27T15:13:30+00:00", - "commented_code": " $maxLength) {\n return 0;", - "repo_full_name": "laravel/framework", - "discussion_comments": [ - { - "comment_id": "1898695609", - "repo_full_name": "laravel/framework", - "pr_number": 54027, - "pr_file": "src/Illuminate/Support/Number.php", - "discussion_id": "1898695609", - "commented_code": "@@ -367,6 +367,30 @@ public static function defaultCurrency()\n return static::$currency;\n }\n \n+ /**\n+ * Generate a random number of the given length.\n+ *\n+ * @param int $length\n+ * @return int\n+ */\n+ public static function random(int $length = 6): int\n+ {\n+ $maxLength = strlen((string) PHP_INT_MAX);\n+\n+ if ($length < 1 || $length > $maxLength) {\n+ return 0;", - "comment_created_at": "2024-12-27T20:05:40+00:00", - "comment_author": "shaedrich", - "comment_body": "Is `0` helpful here? If I can't generate a _random_ integer, I'd expect to get an exception thrown (maybe [`Random\\RandomException`](https://www.php.net/manual/en/class.random-randomexception.php) or [`Random\\RandomError`](https://www.php.net/manual/en/class.random-randomerror.php) \ud83e\udd14)", - "pr_file_module": null - }, - { - "comment_id": "1898701582", - "repo_full_name": "laravel/framework", - "pr_number": 54027, - "pr_file": "src/Illuminate/Support/Number.php", - "discussion_id": "1898695609", - "commented_code": "@@ -367,6 +367,30 @@ public static function defaultCurrency()\n return static::$currency;\n }\n \n+ /**\n+ * Generate a random number of the given length.\n+ *\n+ * @param int $length\n+ * @return int\n+ */\n+ public static function random(int $length = 6): int\n+ {\n+ $maxLength = strlen((string) PHP_INT_MAX);\n+\n+ if ($length < 1 || $length > $maxLength) {\n+ return 0;", - "comment_created_at": "2024-12-27T20:24:48+00:00", - "comment_author": "shaedrich", - "comment_body": "Also, doesn't `random_int()` handle validation itself?", - "pr_file_module": null - }, - { - "comment_id": "1900456930", - "repo_full_name": "laravel/framework", - "pr_number": 54027, - "pr_file": "src/Illuminate/Support/Number.php", - "discussion_id": "1898695609", - "commented_code": "@@ -367,6 +367,30 @@ public static function defaultCurrency()\n return static::$currency;\n }\n \n+ /**\n+ * Generate a random number of the given length.\n+ *\n+ * @param int $length\n+ * @return int\n+ */\n+ public static function random(int $length = 6): int\n+ {\n+ $maxLength = strlen((string) PHP_INT_MAX);\n+\n+ if ($length < 1 || $length > $maxLength) {\n+ return 0;", - "comment_created_at": "2025-01-01T20:30:17+00:00", - "comment_author": "dilovanmatini", - "comment_body": "I hesitated to get an exception or just return 0. Then I checked Str:random it uses `random_int()` without throwing any exception. An that is why I chose to return 0.", - "pr_file_module": null - }, - { - "comment_id": "1900457686", - "repo_full_name": "laravel/framework", - "pr_number": 54027, - "pr_file": "src/Illuminate/Support/Number.php", - "discussion_id": "1898695609", - "commented_code": "@@ -367,6 +367,30 @@ public static function defaultCurrency()\n return static::$currency;\n }\n \n+ /**\n+ * Generate a random number of the given length.\n+ *\n+ * @param int $length\n+ * @return int\n+ */\n+ public static function random(int $length = 6): int\n+ {\n+ $maxLength = strlen((string) PHP_INT_MAX);\n+\n+ if ($length < 1 || $length > $maxLength) {\n+ return 0;", - "comment_created_at": "2025-01-01T20:36:04+00:00", - "comment_author": "shaedrich", - "comment_body": "If a function doesn't throw an exception, I'd at least expect it to return `null` or `false`, since `0` is a number but it's not random, so this can be quite confusing", - "pr_file_module": null - }, - { - "comment_id": "1900458031", - "repo_full_name": "laravel/framework", - "pr_number": 54027, - "pr_file": "src/Illuminate/Support/Number.php", - "discussion_id": "1898695609", - "commented_code": "@@ -367,6 +367,30 @@ public static function defaultCurrency()\n return static::$currency;\n }\n \n+ /**\n+ * Generate a random number of the given length.\n+ *\n+ * @param int $length\n+ * @return int\n+ */\n+ public static function random(int $length = 6): int\n+ {\n+ $maxLength = strlen((string) PHP_INT_MAX);\n+\n+ if ($length < 1 || $length > $maxLength) {\n+ return 0;", - "comment_created_at": "2025-01-01T20:39:21+00:00", - "comment_author": "dilovanmatini", - "comment_body": "That one is also possible \ud83d\udc4d", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1898945139", - "pr_number": 54033, - "pr_file": "src/Illuminate/Database/Eloquent/Concerns/HasUniqueStringIds.php", - "created_at": "2024-12-28T15:52:23+00:00", - "commented_code": "*/\n protected function handleInvalidUniqueId($value, $field)\n {\n throw (new ModelNotFoundException)->setModel(get_class($this), $value);\n throw (new ModelNotFoundException)->setModel(get_class($this), $value)->setStatus(422);", - "repo_full_name": "laravel/framework", - "discussion_comments": [ - { - "comment_id": "1898945139", - "repo_full_name": "laravel/framework", - "pr_number": 54033, - "pr_file": "src/Illuminate/Database/Eloquent/Concerns/HasUniqueStringIds.php", - "discussion_id": "1898945139", - "commented_code": "@@ -103,6 +103,6 @@ public function getIncrementing()\n */\n protected function handleInvalidUniqueId($value, $field)\n {\n- throw (new ModelNotFoundException)->setModel(get_class($this), $value);\n+ throw (new ModelNotFoundException)->setModel(get_class($this), $value)->setStatus(422);", - "comment_created_at": "2024-12-28T15:52:23+00:00", - "comment_author": "shaedrich", - "comment_body": "Wouldn't it make more sense to throw a different exception (like [`\\Ramsey\\Uuid\\Exception\\InvalidUuidStringException`](https://github.com/ramsey/uuid/blob/4.x/src/Exception/InvalidUuidStringException.php)) as you suggested?\r\n\r\n> Create an entirely new exception for this case. I'd be happy to pivot to this if it makes more sense for the framework.\r\n\r\nI mean, the whole point is, that it's not about that the model isn't found.\r\n\r\n> Throw a ValidationException instead. Something like ValidationException::withMessages([$field => '???']). I'm not sure what the message should be in that case.\r\n\r\nI don't think, we should mix the validation functionality of the framework with the rest of the framework. A dedicated exception like in your first suggestion sounds better", - "pr_file_module": null - }, - { - "comment_id": "1899151726", - "repo_full_name": "laravel/framework", - "pr_number": 54033, - "pr_file": "src/Illuminate/Database/Eloquent/Concerns/HasUniqueStringIds.php", - "discussion_id": "1898945139", - "commented_code": "@@ -103,6 +103,6 @@ public function getIncrementing()\n */\n protected function handleInvalidUniqueId($value, $field)\n {\n- throw (new ModelNotFoundException)->setModel(get_class($this), $value);\n+ throw (new ModelNotFoundException)->setModel(get_class($this), $value)->setStatus(422);", - "comment_created_at": "2024-12-29T15:32:55+00:00", - "comment_author": "cosmastech", - "comment_body": "Sharing ModelNotFoundException doesn't seem like the right approach, I agree. Went with a new exception, `InvalidIdFormatException`.\r\n\r\nThe problem with using `InvalidUuidStringException` is that any time a user requests an endpoint with an invalid UUID, it would be reported (to Sentry, DataDog, et cetera) which isn't desirable.\r\n\r\nThe user (or the framework) could add it to the exception handler's dontReport array, but there may be times where the user does want it reported in parts of the code outside of route model binding.\r\n\r\nIt also would mean that every time a user creates their own unique string ID trait, they would have to cook up their own exception. By creating a more generic exception, it saves users from having to follow the additional steps to avoid eating up their exception reporting limits.", - "pr_file_module": null - }, - { - "comment_id": "1899153828", - "repo_full_name": "laravel/framework", - "pr_number": 54033, - "pr_file": "src/Illuminate/Database/Eloquent/Concerns/HasUniqueStringIds.php", - "discussion_id": "1898945139", - "commented_code": "@@ -103,6 +103,6 @@ public function getIncrementing()\n */\n protected function handleInvalidUniqueId($value, $field)\n {\n- throw (new ModelNotFoundException)->setModel(get_class($this), $value);\n+ throw (new ModelNotFoundException)->setModel(get_class($this), $value)->setStatus(422);", - "comment_created_at": "2024-12-29T15:45:57+00:00", - "comment_author": "shaedrich", - "comment_body": "> Went with a new exception, `InvalidIdFormatException`. The problem with using `InvalidUuidStringException` is that any time a user requests an endpoint with an invalid UUID, it would be reported (to Sentry, DataDog, et cetera) which isn't desirable.\r\n\r\nAgreed. A new exception sounds good \ud83d\udc4d\ud83c\udffb Also, this way, we are less dependent on `ramsey/uuid` (I'm remembering what chaos the introduction of the lazy classes into `ramsey/uuid` caused for some who used the package).\r\n\r\n> By creating a more generic exception, it saves users from having to follow the additional steps to avoid eating up their exception reporting limits.\r\n\r\nMy only problem now with this is that the naming is too generic for the implementation. For the implementation to match the level of genericness the name has, it would have to have some kind of property holding the format or a description of it.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1818276038", - "pr_number": 53318, - "pr_file": "src/Illuminate/Support/Number.php", - "created_at": "2024-10-28T01:53:10+00:00", - "commented_code": "*/\n protected static $currency = 'USD';\n\n\n /**\n * @param string $locale\n * @return string\n * @throws \\Exception\n */\n public static function validateLocale(string $locale): string\n {\n $availableLocales = ResourceBundle::getLocales('');\n\n if ( ! in_array($locale, $availableLocales, true)){\n throw new \\Exception(\"Locale is invalid\");", - "repo_full_name": "laravel/framework", - "discussion_comments": [ - { - "comment_id": "1818276038", - "repo_full_name": "laravel/framework", - "pr_number": 53318, - "pr_file": "src/Illuminate/Support/Number.php", - "discussion_id": "1818276038", - "commented_code": "@@ -24,6 +25,23 @@ class Number\n */\n protected static $currency = 'USD';\n \n+\n+ /**\n+ * @param string $locale\n+ * @return string\n+ * @throws \\Exception\n+ */\n+ public static function validateLocale(string $locale): string\n+ {\n+ $availableLocales = ResourceBundle::getLocales('');\n+\n+ if ( ! in_array($locale, $availableLocales, true)){\n+ throw new \\Exception(\"Locale is invalid\");", - "comment_created_at": "2024-10-28T01:53:10+00:00", - "comment_author": "crynobone", - "comment_body": "Should be `InvalidArgumentException`", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1681129407", - "pr_number": 52169, - "pr_file": "src/Illuminate/Foundation/ViteFileFromManifestNotFoundException.php", - "created_at": "2024-07-17T14:10:07+00:00", - "commented_code": " return addAll(elements)\n 0 -> array = elements.toTypedArray() + array\n else -> array = array.copyOfRange(0, index).asDynamic().concat(elements.toTypedArray(), array.copyOfRange(index, size))\n\n var i = index\n elements.forEach { element ->\n array.asDynamic().splice(i, 0, element)", + "repo_full_name": "JetBrains/kotlin", + "discussion_comments": [ + { + "comment_id": "1151434877", + "repo_full_name": "JetBrains/kotlin", + "pr_number": 5116, + "pr_file": "libraries/stdlib/js/src/kotlin/collections/ArrayList.kt", + "discussion_id": "1151434877", + "commented_code": "@@ -82,10 +83,11 @@ public actual open class ArrayList internal constructor(private var array: Ar\n \n if (index == size) return addAll(elements)\n if (elements.isEmpty()) return false\n- when (index) {\n- size -> return addAll(elements)\n- 0 -> array = elements.toTypedArray() + array\n- else -> array = array.copyOfRange(0, index).asDynamic().concat(elements.toTypedArray(), array.copyOfRange(index, size))\n+\n+ var i = index\n+ elements.forEach { element ->\n+ array.asDynamic().splice(i, 0, element)", + "comment_created_at": "2023-03-29T06:09:39+00:00", + "comment_author": "ilya-g", + "comment_body": "Moving array tail on each element insertion would result in quadratic complexity.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1320287309", + "pr_number": 5174, + "pr_file": "build-common/src/org/jetbrains/kotlin/incremental/storage/LookupMap.kt", + "created_at": "2023-09-08T19:54:43+00:00", + "commented_code": "override fun dumpValue(value: Collection): String = value.toString()\n\n fun add(name: String, scope: String, fileId: Int) {\n storage.append(LookupSymbolKey(name, scope), listOf(fileId))\n storage.append(LookupSymbolKey(name, scope), fileId)\n }\n\n fun append(lookup: LookupSymbolKey, fileIds: Collection) {\n storage.append(lookup, fileIds)\n }\n\n operator fun get(key: LookupSymbolKey): Collection? = storage[key]\n override fun get(key: LookupSymbolKey): Collection? = storage[key]?.toSet()", + "repo_full_name": "JetBrains/kotlin", + "discussion_comments": [ + { + "comment_id": "1320287309", + "repo_full_name": "JetBrains/kotlin", + "pr_number": 5174, + "pr_file": "build-common/src/org/jetbrains/kotlin/incremental/storage/LookupMap.kt", + "discussion_id": "1320287309", + "commented_code": "@@ -35,23 +35,13 @@ class LookupMap(\n override fun dumpValue(value: Collection): String = value.toString()\n \n fun add(name: String, scope: String, fileId: Int) {\n- storage.append(LookupSymbolKey(name, scope), listOf(fileId))\n+ storage.append(LookupSymbolKey(name, scope), fileId)\n }\n \n- fun append(lookup: LookupSymbolKey, fileIds: Collection) {\n- storage.append(lookup, fileIds)\n- }\n-\n- operator fun get(key: LookupSymbolKey): Collection? = storage[key]\n+ override fun get(key: LookupSymbolKey): Collection? = storage[key]?.toSet()", + "comment_created_at": "2023-09-08T19:54:43+00:00", + "comment_author": "ALikhachev", + "comment_body": "What do you think about making the map values as `Set` in the first place? Now it's declared to be a map of `LookupSymbolKey -> Collection`, however effectively it is `LookupSymbolKey -> Set`, we deserialize the values as `List` and then convert the list into a `Set`. Perhaps we could avoid that conversion. On the other side, such a change will make the append logic of `InMemoryStorage` more complex", + "pr_file_module": null + }, + { + "comment_id": "1321592368", + "repo_full_name": "JetBrains/kotlin", + "pr_number": 5174, + "pr_file": "build-common/src/org/jetbrains/kotlin/incremental/storage/LookupMap.kt", + "discussion_id": "1320287309", + "commented_code": "@@ -35,23 +35,13 @@ class LookupMap(\n override fun dumpValue(value: Collection): String = value.toString()\n \n fun add(name: String, scope: String, fileId: Int) {\n- storage.append(LookupSymbolKey(name, scope), listOf(fileId))\n+ storage.append(LookupSymbolKey(name, scope), fileId)\n }\n \n- fun append(lookup: LookupSymbolKey, fileIds: Collection) {\n- storage.append(lookup, fileIds)\n- }\n-\n- operator fun get(key: LookupSymbolKey): Collection? = storage[key]\n+ override fun get(key: LookupSymbolKey): Collection? = storage[key]?.toSet()", + "comment_created_at": "2023-09-11T13:51:47+00:00", + "comment_author": "hungvietnguyen", + "comment_body": "Yup you’re right. I plan to declare this map as `LookupSymbolKey -> Set` in the follow-up PR. (This PR focuses on the abstract classes; the next PR will clean up the subclasses – e.g., removing all these overriding methods in the subclasses.)", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1173660401", + "pr_number": 5123, + "pr_file": "compiler/incremental-compilation-impl/src/org/jetbrains/kotlin/incremental/classpathDiff/ClasspathSnapshotter.kt", + "created_at": "2023-04-21T11:28:08+00:00", + "commented_code": "/** Creates [ClassSnapshot]s of the given classes. */\n fun snapshot(\n classes: List,\n granularity: ClassSnapshotGranularity = CLASS_MEMBER_LEVEL,\n classes: List,\n granularity: ClassSnapshotGranularity,\n metrics: BuildMetricsReporter = DoNothingBuildMetricsReporter\n ): List {\n val classesInfo: List = metrics.measure(BuildTime.READ_CLASSES_BASIC_INFO) {\n classes.map { it.classInfo }\n }\n val inaccessibleClassesInfo: Set = metrics.measure(BuildTime.FIND_INACCESSIBLE_CLASSES) {\n findInaccessibleClasses(classesInfo)\n }\n return classes.map {\n when {\n it.classInfo in inaccessibleClassesInfo -> InaccessibleClassSnapshot\n it.classInfo.isKotlinClass -> metrics.measure(BuildTime.SNAPSHOT_KOTLIN_CLASSES) {\n snapshotKotlinClass(it, granularity)\n val classSnapshots = mutableMapOf()\n val inaccessibleClasses = mutableSetOf()\n\n // Process the classes in descending order of their relative paths so that outer classes are processed first, which is needed for\n // detecting transitively inaccessible classes -- see below.\n // For example, if `com/example/A$B.class` has outer class `com/example/A.class`, we will process `com/example/A.class` first (the\n // string `com/example/A.class` is higher than `com/example/A$B.class` because `.` is higher than `$`).\n classes.sortedByDescending { it.classFile.unixStyleRelativePath }.forEach {\n val clazz = metrics.measure(BuildTime.LOAD_CONTENTS_OF_CLASSES) {\n it.loadContents()\n }\n val classSnapshot = when {\n // We don't need to snapshot classes that are (directly or transitively) inaccessible.\n // A class is transitively inaccessible if its outer class is (directly or transitively) inaccessible.\n // The outer class (if present) should have been processed due to the way we sorted the given classes (see above).\n //\n // In the uncommon case that the outer class is not yet processed (i.e., when the outer class's path is not a prefix of this\n // class's path) or if it can't be found in the given classes (this could happen with faulty jars), then we will not be able\n // to confirm whether this class is transitively inaccessible. If we try to find and process the outer class on the fly, we\n // will potentially need to load many more classes, and we will also have to cache them in memory to avoid loading them\n // twice later, therefore consuming more memory (KT-57757). To keep things simple and avoid KT-57757, we'll just consider\n // that the class is accessible and snapshot it (it's okay to snapshot more than required, but incorrect to snapshot less).\n // Note that this case is unlikely to happen anyway.", + "repo_full_name": "JetBrains/kotlin", + "discussion_comments": [ + { + "comment_id": "1173660401", + "repo_full_name": "JetBrains/kotlin", + "pr_number": 5123, + "pr_file": "compiler/incremental-compilation-impl/src/org/jetbrains/kotlin/incremental/classpathDiff/ClasspathSnapshotter.kt", + "discussion_id": "1173660401", + "commented_code": "@@ -57,26 +63,66 @@ object ClassSnapshotter {\n \n /** Creates [ClassSnapshot]s of the given classes. */\n fun snapshot(\n- classes: List,\n- granularity: ClassSnapshotGranularity = CLASS_MEMBER_LEVEL,\n+ classes: List,\n+ granularity: ClassSnapshotGranularity,\n metrics: BuildMetricsReporter = DoNothingBuildMetricsReporter\n ): List {\n- val classesInfo: List = metrics.measure(BuildTime.READ_CLASSES_BASIC_INFO) {\n- classes.map { it.classInfo }\n- }\n- val inaccessibleClassesInfo: Set = metrics.measure(BuildTime.FIND_INACCESSIBLE_CLASSES) {\n- findInaccessibleClasses(classesInfo)\n- }\n- return classes.map {\n- when {\n- it.classInfo in inaccessibleClassesInfo -> InaccessibleClassSnapshot\n- it.classInfo.isKotlinClass -> metrics.measure(BuildTime.SNAPSHOT_KOTLIN_CLASSES) {\n- snapshotKotlinClass(it, granularity)\n+ val classSnapshots = mutableMapOf()\n+ val inaccessibleClasses = mutableSetOf()\n+\n+ // Process the classes in descending order of their relative paths so that outer classes are processed first, which is needed for\n+ // detecting transitively inaccessible classes -- see below.\n+ // For example, if `com/example/A$B.class` has outer class `com/example/A.class`, we will process `com/example/A.class` first (the\n+ // string `com/example/A.class` is higher than `com/example/A$B.class` because `.` is higher than `$`).\n+ classes.sortedByDescending { it.classFile.unixStyleRelativePath }.forEach {\n+ val clazz = metrics.measure(BuildTime.LOAD_CONTENTS_OF_CLASSES) {\n+ it.loadContents()\n+ }\n+ val classSnapshot = when {\n+ // We don't need to snapshot classes that are (directly or transitively) inaccessible.\n+ // A class is transitively inaccessible if its outer class is (directly or transitively) inaccessible.\n+ // The outer class (if present) should have been processed due to the way we sorted the given classes (see above).\n+ //\n+ // In the uncommon case that the outer class is not yet processed (i.e., when the outer class's path is not a prefix of this\n+ // class's path) or if it can't be found in the given classes (this could happen with faulty jars), then we will not be able\n+ // to confirm whether this class is transitively inaccessible. If we try to find and process the outer class on the fly, we\n+ // will potentially need to load many more classes, and we will also have to cache them in memory to avoid loading them\n+ // twice later, therefore consuming more memory (KT-57757). To keep things simple and avoid KT-57757, we'll just consider\n+ // that the class is accessible and snapshot it (it's okay to snapshot more than required, but incorrect to snapshot less).\n+ // Note that this case is unlikely to happen anyway.", + "comment_created_at": "2023-04-21T11:28:08+00:00", + "comment_author": "gavra0", + "comment_body": "While this seems to address this use-case, I think we should still file a YT issue to switch to on-demand loading of classes. If we need B in order to process A, the class loading mechanism can provide this information. Currently, sorting of entries addresses the most common scenario, but if there are new cross-class requirements, this becomes difficult to maintain.\r\n\r\nAlso, the comment about the memory consumption does not really hold, in order to load A, you'll need to load the outer class parent chain. which should not be more than ~10 classes.", + "pr_file_module": null + }, + { + "comment_id": "1173941121", + "repo_full_name": "JetBrains/kotlin", + "pr_number": 5123, + "pr_file": "compiler/incremental-compilation-impl/src/org/jetbrains/kotlin/incremental/classpathDiff/ClasspathSnapshotter.kt", + "discussion_id": "1173660401", + "commented_code": "@@ -57,26 +63,66 @@ object ClassSnapshotter {\n \n /** Creates [ClassSnapshot]s of the given classes. */\n fun snapshot(\n- classes: List,\n- granularity: ClassSnapshotGranularity = CLASS_MEMBER_LEVEL,\n+ classes: List,\n+ granularity: ClassSnapshotGranularity,\n metrics: BuildMetricsReporter = DoNothingBuildMetricsReporter\n ): List {\n- val classesInfo: List = metrics.measure(BuildTime.READ_CLASSES_BASIC_INFO) {\n- classes.map { it.classInfo }\n- }\n- val inaccessibleClassesInfo: Set = metrics.measure(BuildTime.FIND_INACCESSIBLE_CLASSES) {\n- findInaccessibleClasses(classesInfo)\n- }\n- return classes.map {\n- when {\n- it.classInfo in inaccessibleClassesInfo -> InaccessibleClassSnapshot\n- it.classInfo.isKotlinClass -> metrics.measure(BuildTime.SNAPSHOT_KOTLIN_CLASSES) {\n- snapshotKotlinClass(it, granularity)\n+ val classSnapshots = mutableMapOf()\n+ val inaccessibleClasses = mutableSetOf()\n+\n+ // Process the classes in descending order of their relative paths so that outer classes are processed first, which is needed for\n+ // detecting transitively inaccessible classes -- see below.\n+ // For example, if `com/example/A$B.class` has outer class `com/example/A.class`, we will process `com/example/A.class` first (the\n+ // string `com/example/A.class` is higher than `com/example/A$B.class` because `.` is higher than `$`).\n+ classes.sortedByDescending { it.classFile.unixStyleRelativePath }.forEach {\n+ val clazz = metrics.measure(BuildTime.LOAD_CONTENTS_OF_CLASSES) {\n+ it.loadContents()\n+ }\n+ val classSnapshot = when {\n+ // We don't need to snapshot classes that are (directly or transitively) inaccessible.\n+ // A class is transitively inaccessible if its outer class is (directly or transitively) inaccessible.\n+ // The outer class (if present) should have been processed due to the way we sorted the given classes (see above).\n+ //\n+ // In the uncommon case that the outer class is not yet processed (i.e., when the outer class's path is not a prefix of this\n+ // class's path) or if it can't be found in the given classes (this could happen with faulty jars), then we will not be able\n+ // to confirm whether this class is transitively inaccessible. If we try to find and process the outer class on the fly, we\n+ // will potentially need to load many more classes, and we will also have to cache them in memory to avoid loading them\n+ // twice later, therefore consuming more memory (KT-57757). To keep things simple and avoid KT-57757, we'll just consider\n+ // that the class is accessible and snapshot it (it's okay to snapshot more than required, but incorrect to snapshot less).\n+ // Note that this case is unlikely to happen anyway.", + "comment_created_at": "2023-04-21T16:01:50+00:00", + "comment_author": "hungvietnguyen", + "comment_body": "> Also, the comment about the memory consumption does not really hold, in order to load A, you'll need to load the outer class parent chain. which should not be more than ~10 classes.\r\n\r\nThe key challenge is that we can only get a `ClassID` by reading the `.class` file's contents, not by its path, because the `$` character in the file path is ambiguous (`A$B.class` can either mean inner class `B` of outer class `A`, or it can mean top-level class `A$B` where `$` is part of its name).\r\n\r\nSo to find the outer class we will potentially need to look for + load all entries in the jar.\r\n\r\nI've updated the algorithm to look for outer classes, could you take another look and see if it addresses the issue?\r\n\r\n-----------------------------------\r\n\r\n(Btw, I tested on the 400 MB `ideaIC-2022.1.4/lib/app.jar` and got some numbers:\r\n\r\n- Directly inaccessible classes: 20241\r\n- Transitively inaccessible classes: 0 (we never found a case where a class says it's accessible but its outer class is inaccessible)\r\n- Kotlin classes: 9377\r\n- Java classes: 47854\r\n\r\nSo I'll need to be careful not to increase code complexity too much for detecting \"transitively inaccessible\" classes.)", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1320244529", + "pr_number": 5174, + "pr_file": "build-common/src/org/jetbrains/kotlin/incremental/storage/externalizers.kt", + "created_at": "2023-09-08T19:11:20+00:00", + "commented_code": "}\n}\n\nobject StringCollectionExternalizer : CollectionExternalizer>(StringExternalizer, { size -> ArrayList(size) })", + "repo_full_name": "JetBrains/kotlin", + "discussion_comments": [ + { + "comment_id": "1320244529", + "repo_full_name": "JetBrains/kotlin", + "pr_number": 5174, + "pr_file": "build-common/src/org/jetbrains/kotlin/incremental/storage/externalizers.kt", + "discussion_id": "1320244529", + "commented_code": "@@ -384,11 +323,13 @@ abstract class GenericCollectionExternalizer>(\n }\n }\n \n+object StringCollectionExternalizer : CollectionExternalizer>(StringExternalizer, { size -> ArrayList(size) })", + "comment_created_at": "2023-09-08T19:11:20+00:00", + "comment_author": "ALikhachev", + "comment_body": "I'm a bit worried by that change. Previously it was backed by `HashSet`", + "pr_file_module": null + }, + { + "comment_id": "1321591326", + "repo_full_name": "JetBrains/kotlin", + "pr_number": 5174, + "pr_file": "build-common/src/org/jetbrains/kotlin/incremental/storage/externalizers.kt", + "discussion_id": "1320244529", + "commented_code": "@@ -384,11 +323,13 @@ abstract class GenericCollectionExternalizer>(\n }\n }\n \n+object StringCollectionExternalizer : CollectionExternalizer>(StringExternalizer, { size -> ArrayList(size) })", + "comment_created_at": "2023-09-11T13:51:07+00:00", + "comment_author": "hungvietnguyen", + "comment_body": "A **Collection**Externalizer must be able to preserve duplicates in the collection, so I think using a List is required for correctness. Using a `HashSet` before was probably a mistake.", + "pr_file_module": null + }, + { + "comment_id": "1322166847", + "repo_full_name": "JetBrains/kotlin", + "pr_number": 5174, + "pr_file": "build-common/src/org/jetbrains/kotlin/incremental/storage/externalizers.kt", + "discussion_id": "1320244529", + "commented_code": "@@ -384,11 +323,13 @@ abstract class GenericCollectionExternalizer>(\n }\n }\n \n+object StringCollectionExternalizer : CollectionExternalizer>(StringExternalizer, { size -> ArrayList(size) })", + "comment_created_at": "2023-09-11T23:08:47+00:00", + "comment_author": "ALikhachev", + "comment_body": "Yep, I agree that in general it should be as you have done. Since you're going to perform more work on that as you say below, then I'm fine with that", + "pr_file_module": null + }, + { + "comment_id": "1322806157", + "repo_full_name": "JetBrains/kotlin", + "pr_number": 5174, + "pr_file": "build-common/src/org/jetbrains/kotlin/incremental/storage/externalizers.kt", + "discussion_id": "1320244529", + "commented_code": "@@ -384,11 +323,13 @@ abstract class GenericCollectionExternalizer>(\n }\n }\n \n+object StringCollectionExternalizer : CollectionExternalizer>(StringExternalizer, { size -> ArrayList(size) })", + "comment_created_at": "2023-09-12T10:13:54+00:00", + "comment_author": "hungvietnguyen", + "comment_body": "Great, thank you for your review! :)", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "995659629", + "pr_number": 4985, + "pr_file": "build-common/src/org/jetbrains/kotlin/incremental/IncrementalJvmCache.kt", + "created_at": "2022-10-14T11:31:49+00:00", + "commented_code": "// todo: reuse code with InlineFunctionsMap?\n private inner class ConstantsMap(storageFile: File) :\n BasicStringMap>(storageFile, LinkedHashMapExternalizer(StringExternalizer, ConstantExternalizer)) {\n BasicStringMap>(storageFile, MapExternalizer(StringExternalizer, ConstantValueExternalizer)) {", + "repo_full_name": "JetBrains/kotlin", + "discussion_comments": [ + { + "comment_id": "995659629", + "repo_full_name": "JetBrains/kotlin", + "pr_number": 4985, + "pr_file": "build-common/src/org/jetbrains/kotlin/incremental/IncrementalJvmCache.kt", + "discussion_id": "995659629", + "commented_code": "@@ -439,7 +432,7 @@ open class IncrementalJvmCache(\n \n // todo: reuse code with InlineFunctionsMap?\n private inner class ConstantsMap(storageFile: File) :\n- BasicStringMap>(storageFile, LinkedHashMapExternalizer(StringExternalizer, ConstantExternalizer)) {\n+ BasicStringMap>(storageFile, MapExternalizer(StringExternalizer, ConstantValueExternalizer)) {", + "comment_created_at": "2022-10-14T11:31:49+00:00", + "comment_author": "gavra0", + "comment_body": "What is the benefit of replacing Map with LinkedHashMap? Previously, it was clear that this was an ordered collection.", + "pr_file_module": null + }, + { + "comment_id": "995877871", + "repo_full_name": "JetBrains/kotlin", + "pr_number": 4985, + "pr_file": "build-common/src/org/jetbrains/kotlin/incremental/IncrementalJvmCache.kt", + "discussion_id": "995659629", + "commented_code": "@@ -439,7 +432,7 @@ open class IncrementalJvmCache(\n \n // todo: reuse code with InlineFunctionsMap?\n private inner class ConstantsMap(storageFile: File) :\n- BasicStringMap>(storageFile, LinkedHashMapExternalizer(StringExternalizer, ConstantExternalizer)) {\n+ BasicStringMap>(storageFile, MapExternalizer(StringExternalizer, ConstantValueExternalizer)) {", + "comment_created_at": "2022-10-14T15:20:18+00:00", + "comment_author": "hungvietnguyen", + "comment_body": "Right, I also liked the fact that it was ordered, but I made this change because:\r\n 1. `LinkedHashMap` feels a bit heavy.\r\n 2. All collections in Kotlin are already implicitly ordered.\r\n 3. If we really want to preserver order, having it a `LinkedHashMap` here is not enough. We will have to preserve order all the way from source to sink, and we would want to do that for all other data structures as well because almost everything involved in build should ideally be ordered. So we will end up turning all `Map`s and `Set`s throughout the codebase into `LinkedHashMap` and `LinkedHashSet`.\r\n - Actually, when implementing the new IC, I initially wrote new code with `LinkedHashMap` but over time found that it doesn't scale and the code just looks inconsistent if sometimes we use `Map`, sometimes we use `LinkedHashMap`. (Here is our previous discussion on this: https://github.com/google/kotlin/commit/654c4250ba81ce0d02c167c39de96b1212b3c2e7#commitcomment-57900126)\r\n\r\nThat said, I can also revert to `LinkedHashMap` if you think it's still nice to have.", + "pr_file_module": null + }, + { + "comment_id": "999556776", + "repo_full_name": "JetBrains/kotlin", + "pr_number": 4985, + "pr_file": "build-common/src/org/jetbrains/kotlin/incremental/IncrementalJvmCache.kt", + "discussion_id": "995659629", + "commented_code": "@@ -439,7 +432,7 @@ open class IncrementalJvmCache(\n \n // todo: reuse code with InlineFunctionsMap?\n private inner class ConstantsMap(storageFile: File) :\n- BasicStringMap>(storageFile, LinkedHashMapExternalizer(StringExternalizer, ConstantExternalizer)) {\n+ BasicStringMap>(storageFile, MapExternalizer(StringExternalizer, ConstantValueExternalizer)) {", + "comment_created_at": "2022-10-19T14:39:06+00:00", + "comment_author": "gavra0", + "comment_body": "kk, let's keep it as-is.", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/kotlin-consider-operation-time-complexity.md b/_reviewers/kotlin-consider-operation-time-complexity.md index fa4451c..b44b113 100644 --- a/_reviewers/kotlin-consider-operation-time-complexity.md +++ b/_reviewers/kotlin-consider-operation-time-complexity.md @@ -40,201 +40,3 @@ Choose data structures based on your specific use case: - Use LinkedList when you frequently insert/remove from middle - Use HashSet for fast lookups and uniqueness checks - Consider LinkedHashMap only when ordering is critical and worth the overhead - - -[ - { - "discussion_id": "1151434877", - "pr_number": 5116, - "pr_file": "libraries/stdlib/js/src/kotlin/collections/ArrayList.kt", - "created_at": "2023-03-29T06:09:39+00:00", - "commented_code": "if (index == size) return addAll(elements)\n if (elements.isEmpty()) return false\n when (index) {\n size -> return addAll(elements)\n 0 -> array = elements.toTypedArray() + array\n else -> array = array.copyOfRange(0, index).asDynamic().concat(elements.toTypedArray(), array.copyOfRange(index, size))\n\n var i = index\n elements.forEach { element ->\n array.asDynamic().splice(i, 0, element)", - "repo_full_name": "JetBrains/kotlin", - "discussion_comments": [ - { - "comment_id": "1151434877", - "repo_full_name": "JetBrains/kotlin", - "pr_number": 5116, - "pr_file": "libraries/stdlib/js/src/kotlin/collections/ArrayList.kt", - "discussion_id": "1151434877", - "commented_code": "@@ -82,10 +83,11 @@ public actual open class ArrayList internal constructor(private var array: Ar\n \n if (index == size) return addAll(elements)\n if (elements.isEmpty()) return false\n- when (index) {\n- size -> return addAll(elements)\n- 0 -> array = elements.toTypedArray() + array\n- else -> array = array.copyOfRange(0, index).asDynamic().concat(elements.toTypedArray(), array.copyOfRange(index, size))\n+\n+ var i = index\n+ elements.forEach { element ->\n+ array.asDynamic().splice(i, 0, element)", - "comment_created_at": "2023-03-29T06:09:39+00:00", - "comment_author": "ilya-g", - "comment_body": "Moving array tail on each element insertion would result in quadratic complexity.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1320287309", - "pr_number": 5174, - "pr_file": "build-common/src/org/jetbrains/kotlin/incremental/storage/LookupMap.kt", - "created_at": "2023-09-08T19:54:43+00:00", - "commented_code": "override fun dumpValue(value: Collection): String = value.toString()\n\n fun add(name: String, scope: String, fileId: Int) {\n storage.append(LookupSymbolKey(name, scope), listOf(fileId))\n storage.append(LookupSymbolKey(name, scope), fileId)\n }\n\n fun append(lookup: LookupSymbolKey, fileIds: Collection) {\n storage.append(lookup, fileIds)\n }\n\n operator fun get(key: LookupSymbolKey): Collection? = storage[key]\n override fun get(key: LookupSymbolKey): Collection? = storage[key]?.toSet()", - "repo_full_name": "JetBrains/kotlin", - "discussion_comments": [ - { - "comment_id": "1320287309", - "repo_full_name": "JetBrains/kotlin", - "pr_number": 5174, - "pr_file": "build-common/src/org/jetbrains/kotlin/incremental/storage/LookupMap.kt", - "discussion_id": "1320287309", - "commented_code": "@@ -35,23 +35,13 @@ class LookupMap(\n override fun dumpValue(value: Collection): String = value.toString()\n \n fun add(name: String, scope: String, fileId: Int) {\n- storage.append(LookupSymbolKey(name, scope), listOf(fileId))\n+ storage.append(LookupSymbolKey(name, scope), fileId)\n }\n \n- fun append(lookup: LookupSymbolKey, fileIds: Collection) {\n- storage.append(lookup, fileIds)\n- }\n-\n- operator fun get(key: LookupSymbolKey): Collection? = storage[key]\n+ override fun get(key: LookupSymbolKey): Collection? = storage[key]?.toSet()", - "comment_created_at": "2023-09-08T19:54:43+00:00", - "comment_author": "ALikhachev", - "comment_body": "What do you think about making the map values as `Set` in the first place? Now it's declared to be a map of `LookupSymbolKey -> Collection`, however effectively it is `LookupSymbolKey -> Set`, we deserialize the values as `List` and then convert the list into a `Set`. Perhaps we could avoid that conversion. On the other side, such a change will make the append logic of `InMemoryStorage` more complex", - "pr_file_module": null - }, - { - "comment_id": "1321592368", - "repo_full_name": "JetBrains/kotlin", - "pr_number": 5174, - "pr_file": "build-common/src/org/jetbrains/kotlin/incremental/storage/LookupMap.kt", - "discussion_id": "1320287309", - "commented_code": "@@ -35,23 +35,13 @@ class LookupMap(\n override fun dumpValue(value: Collection): String = value.toString()\n \n fun add(name: String, scope: String, fileId: Int) {\n- storage.append(LookupSymbolKey(name, scope), listOf(fileId))\n+ storage.append(LookupSymbolKey(name, scope), fileId)\n }\n \n- fun append(lookup: LookupSymbolKey, fileIds: Collection) {\n- storage.append(lookup, fileIds)\n- }\n-\n- operator fun get(key: LookupSymbolKey): Collection? = storage[key]\n+ override fun get(key: LookupSymbolKey): Collection? = storage[key]?.toSet()", - "comment_created_at": "2023-09-11T13:51:47+00:00", - "comment_author": "hungvietnguyen", - "comment_body": "Yup you\u2019re right. I plan to declare this map as `LookupSymbolKey -> Set` in the follow-up PR. (This PR focuses on the abstract classes; the next PR will clean up the subclasses \u2013 e.g., removing all these overriding methods in the subclasses.)", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1173660401", - "pr_number": 5123, - "pr_file": "compiler/incremental-compilation-impl/src/org/jetbrains/kotlin/incremental/classpathDiff/ClasspathSnapshotter.kt", - "created_at": "2023-04-21T11:28:08+00:00", - "commented_code": "/** Creates [ClassSnapshot]s of the given classes. */\n fun snapshot(\n classes: List,\n granularity: ClassSnapshotGranularity = CLASS_MEMBER_LEVEL,\n classes: List,\n granularity: ClassSnapshotGranularity,\n metrics: BuildMetricsReporter = DoNothingBuildMetricsReporter\n ): List {\n val classesInfo: List = metrics.measure(BuildTime.READ_CLASSES_BASIC_INFO) {\n classes.map { it.classInfo }\n }\n val inaccessibleClassesInfo: Set = metrics.measure(BuildTime.FIND_INACCESSIBLE_CLASSES) {\n findInaccessibleClasses(classesInfo)\n }\n return classes.map {\n when {\n it.classInfo in inaccessibleClassesInfo -> InaccessibleClassSnapshot\n it.classInfo.isKotlinClass -> metrics.measure(BuildTime.SNAPSHOT_KOTLIN_CLASSES) {\n snapshotKotlinClass(it, granularity)\n val classSnapshots = mutableMapOf()\n val inaccessibleClasses = mutableSetOf()\n\n // Process the classes in descending order of their relative paths so that outer classes are processed first, which is needed for\n // detecting transitively inaccessible classes -- see below.\n // For example, if `com/example/A$B.class` has outer class `com/example/A.class`, we will process `com/example/A.class` first (the\n // string `com/example/A.class` is higher than `com/example/A$B.class` because `.` is higher than `$`).\n classes.sortedByDescending { it.classFile.unixStyleRelativePath }.forEach {\n val clazz = metrics.measure(BuildTime.LOAD_CONTENTS_OF_CLASSES) {\n it.loadContents()\n }\n val classSnapshot = when {\n // We don't need to snapshot classes that are (directly or transitively) inaccessible.\n // A class is transitively inaccessible if its outer class is (directly or transitively) inaccessible.\n // The outer class (if present) should have been processed due to the way we sorted the given classes (see above).\n //\n // In the uncommon case that the outer class is not yet processed (i.e., when the outer class's path is not a prefix of this\n // class's path) or if it can't be found in the given classes (this could happen with faulty jars), then we will not be able\n // to confirm whether this class is transitively inaccessible. If we try to find and process the outer class on the fly, we\n // will potentially need to load many more classes, and we will also have to cache them in memory to avoid loading them\n // twice later, therefore consuming more memory (KT-57757). To keep things simple and avoid KT-57757, we'll just consider\n // that the class is accessible and snapshot it (it's okay to snapshot more than required, but incorrect to snapshot less).\n // Note that this case is unlikely to happen anyway.", - "repo_full_name": "JetBrains/kotlin", - "discussion_comments": [ - { - "comment_id": "1173660401", - "repo_full_name": "JetBrains/kotlin", - "pr_number": 5123, - "pr_file": "compiler/incremental-compilation-impl/src/org/jetbrains/kotlin/incremental/classpathDiff/ClasspathSnapshotter.kt", - "discussion_id": "1173660401", - "commented_code": "@@ -57,26 +63,66 @@ object ClassSnapshotter {\n \n /** Creates [ClassSnapshot]s of the given classes. */\n fun snapshot(\n- classes: List,\n- granularity: ClassSnapshotGranularity = CLASS_MEMBER_LEVEL,\n+ classes: List,\n+ granularity: ClassSnapshotGranularity,\n metrics: BuildMetricsReporter = DoNothingBuildMetricsReporter\n ): List {\n- val classesInfo: List = metrics.measure(BuildTime.READ_CLASSES_BASIC_INFO) {\n- classes.map { it.classInfo }\n- }\n- val inaccessibleClassesInfo: Set = metrics.measure(BuildTime.FIND_INACCESSIBLE_CLASSES) {\n- findInaccessibleClasses(classesInfo)\n- }\n- return classes.map {\n- when {\n- it.classInfo in inaccessibleClassesInfo -> InaccessibleClassSnapshot\n- it.classInfo.isKotlinClass -> metrics.measure(BuildTime.SNAPSHOT_KOTLIN_CLASSES) {\n- snapshotKotlinClass(it, granularity)\n+ val classSnapshots = mutableMapOf()\n+ val inaccessibleClasses = mutableSetOf()\n+\n+ // Process the classes in descending order of their relative paths so that outer classes are processed first, which is needed for\n+ // detecting transitively inaccessible classes -- see below.\n+ // For example, if `com/example/A$B.class` has outer class `com/example/A.class`, we will process `com/example/A.class` first (the\n+ // string `com/example/A.class` is higher than `com/example/A$B.class` because `.` is higher than `$`).\n+ classes.sortedByDescending { it.classFile.unixStyleRelativePath }.forEach {\n+ val clazz = metrics.measure(BuildTime.LOAD_CONTENTS_OF_CLASSES) {\n+ it.loadContents()\n+ }\n+ val classSnapshot = when {\n+ // We don't need to snapshot classes that are (directly or transitively) inaccessible.\n+ // A class is transitively inaccessible if its outer class is (directly or transitively) inaccessible.\n+ // The outer class (if present) should have been processed due to the way we sorted the given classes (see above).\n+ //\n+ // In the uncommon case that the outer class is not yet processed (i.e., when the outer class's path is not a prefix of this\n+ // class's path) or if it can't be found in the given classes (this could happen with faulty jars), then we will not be able\n+ // to confirm whether this class is transitively inaccessible. If we try to find and process the outer class on the fly, we\n+ // will potentially need to load many more classes, and we will also have to cache them in memory to avoid loading them\n+ // twice later, therefore consuming more memory (KT-57757). To keep things simple and avoid KT-57757, we'll just consider\n+ // that the class is accessible and snapshot it (it's okay to snapshot more than required, but incorrect to snapshot less).\n+ // Note that this case is unlikely to happen anyway.", - "comment_created_at": "2023-04-21T11:28:08+00:00", - "comment_author": "gavra0", - "comment_body": "While this seems to address this use-case, I think we should still file a YT issue to switch to on-demand loading of classes. If we need B in order to process A, the class loading mechanism can provide this information. Currently, sorting of entries addresses the most common scenario, but if there are new cross-class requirements, this becomes difficult to maintain.\r\n\r\nAlso, the comment about the memory consumption does not really hold, in order to load A, you'll need to load the outer class parent chain. which should not be more than ~10 classes.", - "pr_file_module": null - }, - { - "comment_id": "1173941121", - "repo_full_name": "JetBrains/kotlin", - "pr_number": 5123, - "pr_file": "compiler/incremental-compilation-impl/src/org/jetbrains/kotlin/incremental/classpathDiff/ClasspathSnapshotter.kt", - "discussion_id": "1173660401", - "commented_code": "@@ -57,26 +63,66 @@ object ClassSnapshotter {\n \n /** Creates [ClassSnapshot]s of the given classes. */\n fun snapshot(\n- classes: List,\n- granularity: ClassSnapshotGranularity = CLASS_MEMBER_LEVEL,\n+ classes: List,\n+ granularity: ClassSnapshotGranularity,\n metrics: BuildMetricsReporter = DoNothingBuildMetricsReporter\n ): List {\n- val classesInfo: List = metrics.measure(BuildTime.READ_CLASSES_BASIC_INFO) {\n- classes.map { it.classInfo }\n- }\n- val inaccessibleClassesInfo: Set = metrics.measure(BuildTime.FIND_INACCESSIBLE_CLASSES) {\n- findInaccessibleClasses(classesInfo)\n- }\n- return classes.map {\n- when {\n- it.classInfo in inaccessibleClassesInfo -> InaccessibleClassSnapshot\n- it.classInfo.isKotlinClass -> metrics.measure(BuildTime.SNAPSHOT_KOTLIN_CLASSES) {\n- snapshotKotlinClass(it, granularity)\n+ val classSnapshots = mutableMapOf()\n+ val inaccessibleClasses = mutableSetOf()\n+\n+ // Process the classes in descending order of their relative paths so that outer classes are processed first, which is needed for\n+ // detecting transitively inaccessible classes -- see below.\n+ // For example, if `com/example/A$B.class` has outer class `com/example/A.class`, we will process `com/example/A.class` first (the\n+ // string `com/example/A.class` is higher than `com/example/A$B.class` because `.` is higher than `$`).\n+ classes.sortedByDescending { it.classFile.unixStyleRelativePath }.forEach {\n+ val clazz = metrics.measure(BuildTime.LOAD_CONTENTS_OF_CLASSES) {\n+ it.loadContents()\n+ }\n+ val classSnapshot = when {\n+ // We don't need to snapshot classes that are (directly or transitively) inaccessible.\n+ // A class is transitively inaccessible if its outer class is (directly or transitively) inaccessible.\n+ // The outer class (if present) should have been processed due to the way we sorted the given classes (see above).\n+ //\n+ // In the uncommon case that the outer class is not yet processed (i.e., when the outer class's path is not a prefix of this\n+ // class's path) or if it can't be found in the given classes (this could happen with faulty jars), then we will not be able\n+ // to confirm whether this class is transitively inaccessible. If we try to find and process the outer class on the fly, we\n+ // will potentially need to load many more classes, and we will also have to cache them in memory to avoid loading them\n+ // twice later, therefore consuming more memory (KT-57757). To keep things simple and avoid KT-57757, we'll just consider\n+ // that the class is accessible and snapshot it (it's okay to snapshot more than required, but incorrect to snapshot less).\n+ // Note that this case is unlikely to happen anyway.", - "comment_created_at": "2023-04-21T16:01:50+00:00", - "comment_author": "hungvietnguyen", - "comment_body": "> Also, the comment about the memory consumption does not really hold, in order to load A, you'll need to load the outer class parent chain. which should not be more than ~10 classes.\r\n\r\nThe key challenge is that we can only get a `ClassID` by reading the `.class` file's contents, not by its path, because the `$` character in the file path is ambiguous (`A$B.class` can either mean inner class `B` of outer class `A`, or it can mean top-level class `A$B` where `$` is part of its name).\r\n\r\nSo to find the outer class we will potentially need to look for + load all entries in the jar.\r\n\r\nI've updated the algorithm to look for outer classes, could you take another look and see if it addresses the issue?\r\n\r\n-----------------------------------\r\n\r\n(Btw, I tested on the 400 MB `ideaIC-2022.1.4/lib/app.jar` and got some numbers:\r\n\r\n- Directly inaccessible classes: 20241\r\n- Transitively inaccessible classes: 0 (we never found a case where a class says it's accessible but its outer class is inaccessible)\r\n- Kotlin classes: 9377\r\n- Java classes: 47854\r\n\r\nSo I'll need to be careful not to increase code complexity too much for detecting \"transitively inaccessible\" classes.)", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1320244529", - "pr_number": 5174, - "pr_file": "build-common/src/org/jetbrains/kotlin/incremental/storage/externalizers.kt", - "created_at": "2023-09-08T19:11:20+00:00", - "commented_code": "}\n}\n\nobject StringCollectionExternalizer : CollectionExternalizer>(StringExternalizer, { size -> ArrayList(size) })", - "repo_full_name": "JetBrains/kotlin", - "discussion_comments": [ - { - "comment_id": "1320244529", - "repo_full_name": "JetBrains/kotlin", - "pr_number": 5174, - "pr_file": "build-common/src/org/jetbrains/kotlin/incremental/storage/externalizers.kt", - "discussion_id": "1320244529", - "commented_code": "@@ -384,11 +323,13 @@ abstract class GenericCollectionExternalizer>(\n }\n }\n \n+object StringCollectionExternalizer : CollectionExternalizer>(StringExternalizer, { size -> ArrayList(size) })", - "comment_created_at": "2023-09-08T19:11:20+00:00", - "comment_author": "ALikhachev", - "comment_body": "I'm a bit worried by that change. Previously it was backed by `HashSet`", - "pr_file_module": null - }, - { - "comment_id": "1321591326", - "repo_full_name": "JetBrains/kotlin", - "pr_number": 5174, - "pr_file": "build-common/src/org/jetbrains/kotlin/incremental/storage/externalizers.kt", - "discussion_id": "1320244529", - "commented_code": "@@ -384,11 +323,13 @@ abstract class GenericCollectionExternalizer>(\n }\n }\n \n+object StringCollectionExternalizer : CollectionExternalizer>(StringExternalizer, { size -> ArrayList(size) })", - "comment_created_at": "2023-09-11T13:51:07+00:00", - "comment_author": "hungvietnguyen", - "comment_body": "A **Collection**Externalizer must be able to preserve duplicates in the collection, so I think using a List is required for correctness. Using a `HashSet` before was probably a mistake.", - "pr_file_module": null - }, - { - "comment_id": "1322166847", - "repo_full_name": "JetBrains/kotlin", - "pr_number": 5174, - "pr_file": "build-common/src/org/jetbrains/kotlin/incremental/storage/externalizers.kt", - "discussion_id": "1320244529", - "commented_code": "@@ -384,11 +323,13 @@ abstract class GenericCollectionExternalizer>(\n }\n }\n \n+object StringCollectionExternalizer : CollectionExternalizer>(StringExternalizer, { size -> ArrayList(size) })", - "comment_created_at": "2023-09-11T23:08:47+00:00", - "comment_author": "ALikhachev", - "comment_body": "Yep, I agree that in general it should be as you have done. Since you're going to perform more work on that as you say below, then I'm fine with that", - "pr_file_module": null - }, - { - "comment_id": "1322806157", - "repo_full_name": "JetBrains/kotlin", - "pr_number": 5174, - "pr_file": "build-common/src/org/jetbrains/kotlin/incremental/storage/externalizers.kt", - "discussion_id": "1320244529", - "commented_code": "@@ -384,11 +323,13 @@ abstract class GenericCollectionExternalizer>(\n }\n }\n \n+object StringCollectionExternalizer : CollectionExternalizer>(StringExternalizer, { size -> ArrayList(size) })", - "comment_created_at": "2023-09-12T10:13:54+00:00", - "comment_author": "hungvietnguyen", - "comment_body": "Great, thank you for your review! :)", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "995659629", - "pr_number": 4985, - "pr_file": "build-common/src/org/jetbrains/kotlin/incremental/IncrementalJvmCache.kt", - "created_at": "2022-10-14T11:31:49+00:00", - "commented_code": "// todo: reuse code with InlineFunctionsMap?\n private inner class ConstantsMap(storageFile: File) :\n BasicStringMap>(storageFile, LinkedHashMapExternalizer(StringExternalizer, ConstantExternalizer)) {\n BasicStringMap>(storageFile, MapExternalizer(StringExternalizer, ConstantValueExternalizer)) {", - "repo_full_name": "JetBrains/kotlin", - "discussion_comments": [ - { - "comment_id": "995659629", - "repo_full_name": "JetBrains/kotlin", - "pr_number": 4985, - "pr_file": "build-common/src/org/jetbrains/kotlin/incremental/IncrementalJvmCache.kt", - "discussion_id": "995659629", - "commented_code": "@@ -439,7 +432,7 @@ open class IncrementalJvmCache(\n \n // todo: reuse code with InlineFunctionsMap?\n private inner class ConstantsMap(storageFile: File) :\n- BasicStringMap>(storageFile, LinkedHashMapExternalizer(StringExternalizer, ConstantExternalizer)) {\n+ BasicStringMap>(storageFile, MapExternalizer(StringExternalizer, ConstantValueExternalizer)) {", - "comment_created_at": "2022-10-14T11:31:49+00:00", - "comment_author": "gavra0", - "comment_body": "What is the benefit of replacing Map with LinkedHashMap? Previously, it was clear that this was an ordered collection.", - "pr_file_module": null - }, - { - "comment_id": "995877871", - "repo_full_name": "JetBrains/kotlin", - "pr_number": 4985, - "pr_file": "build-common/src/org/jetbrains/kotlin/incremental/IncrementalJvmCache.kt", - "discussion_id": "995659629", - "commented_code": "@@ -439,7 +432,7 @@ open class IncrementalJvmCache(\n \n // todo: reuse code with InlineFunctionsMap?\n private inner class ConstantsMap(storageFile: File) :\n- BasicStringMap>(storageFile, LinkedHashMapExternalizer(StringExternalizer, ConstantExternalizer)) {\n+ BasicStringMap>(storageFile, MapExternalizer(StringExternalizer, ConstantValueExternalizer)) {", - "comment_created_at": "2022-10-14T15:20:18+00:00", - "comment_author": "hungvietnguyen", - "comment_body": "Right, I also liked the fact that it was ordered, but I made this change because:\r\n 1. `LinkedHashMap` feels a bit heavy.\r\n 2. All collections in Kotlin are already implicitly ordered.\r\n 3. If we really want to preserver order, having it a `LinkedHashMap` here is not enough. We will have to preserve order all the way from source to sink, and we would want to do that for all other data structures as well because almost everything involved in build should ideally be ordered. So we will end up turning all `Map`s and `Set`s throughout the codebase into `LinkedHashMap` and `LinkedHashSet`.\r\n - Actually, when implementing the new IC, I initially wrote new code with `LinkedHashMap` but over time found that it doesn't scale and the code just looks inconsistent if sometimes we use `Map`, sometimes we use `LinkedHashMap`. (Here is our previous discussion on this: https://github.com/google/kotlin/commit/654c4250ba81ce0d02c167c39de96b1212b3c2e7#commitcomment-57900126)\r\n\r\nThat said, I can also revert to `LinkedHashMap` if you think it's still nice to have.", - "pr_file_module": null - }, - { - "comment_id": "999556776", - "repo_full_name": "JetBrains/kotlin", - "pr_number": 4985, - "pr_file": "build-common/src/org/jetbrains/kotlin/incremental/IncrementalJvmCache.kt", - "discussion_id": "995659629", - "commented_code": "@@ -439,7 +432,7 @@ open class IncrementalJvmCache(\n \n // todo: reuse code with InlineFunctionsMap?\n private inner class ConstantsMap(storageFile: File) :\n- BasicStringMap>(storageFile, LinkedHashMapExternalizer(StringExternalizer, ConstantExternalizer)) {\n+ BasicStringMap>(storageFile, MapExternalizer(StringExternalizer, ConstantValueExternalizer)) {", - "comment_created_at": "2022-10-19T14:39:06+00:00", - "comment_author": "gavra0", - "comment_body": "kk, let's keep it as-is.", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/kotlin-copy-external-string-inputs.json b/_reviewers/kotlin-copy-external-string-inputs.json new file mode 100644 index 0000000..4af6e5b --- /dev/null +++ b/_reviewers/kotlin-copy-external-string-inputs.json @@ -0,0 +1,70 @@ +[ + { + "discussion_id": "1581157906", + "pr_number": 5281, + "pr_file": "kotlin-native/runtime/src/launcher/cpp/launcher.cpp", + "created_at": "2024-04-26T14:58:47+00:00", + "commented_code": "//--- Setup args --------------------------------------------------------------//\n\nOBJ_GETTER(setupArgs, int argc, const char** argv) {\n kotlin::programName = argv[0];", + "repo_full_name": "JetBrains/kotlin", + "discussion_comments": [ + { + "comment_id": "1581157906", + "repo_full_name": "JetBrains/kotlin", + "pr_number": 5281, + "pr_file": "kotlin-native/runtime/src/launcher/cpp/launcher.cpp", + "discussion_id": "1581157906", + "commented_code": "@@ -30,6 +30,8 @@ using namespace kotlin;\n //--- Setup args --------------------------------------------------------------//\n \n OBJ_GETTER(setupArgs, int argc, const char** argv) {\n+ kotlin::programName = argv[0];", + "comment_created_at": "2024-04-26T14:58:47+00:00", + "comment_author": "SvyatoslavScherbina", + "comment_body": "Is `argv[0]` guaranteed to live long enough? What will happen if some code accesses `kotlin::programName` after the `main` function finishes?\r\nIs `argv` guaranteed to always have at least one element?", + "pr_file_module": null + }, + { + "comment_id": "1584879531", + "repo_full_name": "JetBrains/kotlin", + "pr_number": 5281, + "pr_file": "kotlin-native/runtime/src/launcher/cpp/launcher.cpp", + "discussion_id": "1581157906", + "commented_code": "@@ -30,6 +30,8 @@ using namespace kotlin;\n //--- Setup args --------------------------------------------------------------//\n \n OBJ_GETTER(setupArgs, int argc, const char** argv) {\n+ kotlin::programName = argv[0];", + "comment_created_at": "2024-04-30T13:57:23+00:00", + "comment_author": "vonox7", + "comment_body": "1. programName will now be set to null before argv leaves scope. I could not find a definite answer if it would be guaranteed to live long enough, so I opted for the safe choice.\r\n2. Theoretically it is guaranteed by the POSIX standard. However if it has 0 arguments, the code 1 line below would have crashed. I also added a testcase for argc=0, and added support for it via `std::max(0, ...)`. So now kotlin executables don't crash on startup when they are launched in a non posix compatible way.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1620522926", + "pr_number": 5281, + "pr_file": "kotlin-native/runtime/src/launcher/cpp/launcher.cpp", + "created_at": "2024-05-30T11:26:13+00:00", + "commented_code": "//--- Setup args --------------------------------------------------------------//\n\nOBJ_GETTER(setupArgs, int argc, const char** argv) {\n if (argc > 0 && argv[0][0] != '\\0') {\n kotlin::programName = argv[0];", + "repo_full_name": "JetBrains/kotlin", + "discussion_comments": [ + { + "comment_id": "1620522926", + "repo_full_name": "JetBrains/kotlin", + "pr_number": 5281, + "pr_file": "kotlin-native/runtime/src/launcher/cpp/launcher.cpp", + "discussion_id": "1620522926", + "commented_code": "@@ -30,8 +30,12 @@ using namespace kotlin;\n //--- Setup args --------------------------------------------------------------//\n \n OBJ_GETTER(setupArgs, int argc, const char** argv) {\n+ if (argc > 0 && argv[0][0] != '\\0') {\n+ kotlin::programName = argv[0];", + "comment_created_at": "2024-05-30T11:26:13+00:00", + "comment_author": "projedi", + "comment_body": "How about doing `kotlin::programName = strndup(argv[0], 4096)`? No need to `free` the result, and we don't have to worry about lifetimes. `4096` is just some random not-too-small not-too-big number to protect us from `argv[0]` being too large. Technically, the OS has some limits already, but might as well protect ourselves.", + "pr_file_module": null + }, + { + "comment_id": "1629431106", + "repo_full_name": "JetBrains/kotlin", + "pr_number": 5281, + "pr_file": "kotlin-native/runtime/src/launcher/cpp/launcher.cpp", + "discussion_id": "1620522926", + "commented_code": "@@ -30,8 +30,12 @@ using namespace kotlin;\n //--- Setup args --------------------------------------------------------------//\n \n OBJ_GETTER(setupArgs, int argc, const char** argv) {\n+ if (argc > 0 && argv[0][0] != '\\0') {\n+ kotlin::programName = argv[0];", + "comment_created_at": "2024-06-06T12:23:11+00:00", + "comment_author": "vonox7", + "comment_body": "Ok, I applied your idea.", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/kotlin-copy-external-string-inputs.md b/_reviewers/kotlin-copy-external-string-inputs.md index f275ebe..4d276ec 100644 --- a/_reviewers/kotlin-copy-external-string-inputs.md +++ b/_reviewers/kotlin-copy-external-string-inputs.md @@ -30,75 +30,3 @@ if (argc > 0 && argv[0][0] != '\0') { ``` This approach prevents crashes from invalid pointers, protects against potential buffer issues by limiting string length, and eliminates lifetime concerns by managing your own copy of the data. - - -[ - { - "discussion_id": "1581157906", - "pr_number": 5281, - "pr_file": "kotlin-native/runtime/src/launcher/cpp/launcher.cpp", - "created_at": "2024-04-26T14:58:47+00:00", - "commented_code": "//--- Setup args --------------------------------------------------------------//\n\nOBJ_GETTER(setupArgs, int argc, const char** argv) {\n kotlin::programName = argv[0];", - "repo_full_name": "JetBrains/kotlin", - "discussion_comments": [ - { - "comment_id": "1581157906", - "repo_full_name": "JetBrains/kotlin", - "pr_number": 5281, - "pr_file": "kotlin-native/runtime/src/launcher/cpp/launcher.cpp", - "discussion_id": "1581157906", - "commented_code": "@@ -30,6 +30,8 @@ using namespace kotlin;\n //--- Setup args --------------------------------------------------------------//\n \n OBJ_GETTER(setupArgs, int argc, const char** argv) {\n+ kotlin::programName = argv[0];", - "comment_created_at": "2024-04-26T14:58:47+00:00", - "comment_author": "SvyatoslavScherbina", - "comment_body": "Is `argv[0]` guaranteed to live long enough? What will happen if some code accesses `kotlin::programName` after the `main` function finishes?\r\nIs `argv` guaranteed to always have at least one element?", - "pr_file_module": null - }, - { - "comment_id": "1584879531", - "repo_full_name": "JetBrains/kotlin", - "pr_number": 5281, - "pr_file": "kotlin-native/runtime/src/launcher/cpp/launcher.cpp", - "discussion_id": "1581157906", - "commented_code": "@@ -30,6 +30,8 @@ using namespace kotlin;\n //--- Setup args --------------------------------------------------------------//\n \n OBJ_GETTER(setupArgs, int argc, const char** argv) {\n+ kotlin::programName = argv[0];", - "comment_created_at": "2024-04-30T13:57:23+00:00", - "comment_author": "vonox7", - "comment_body": "1. programName will now be set to null before argv leaves scope. I could not find a definite answer if it would be guaranteed to live long enough, so I opted for the safe choice.\r\n2. Theoretically it is guaranteed by the POSIX standard. However if it has 0 arguments, the code 1 line below would have crashed. I also added a testcase for argc=0, and added support for it via `std::max(0, ...)`. So now kotlin executables don't crash on startup when they are launched in a non posix compatible way.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1620522926", - "pr_number": 5281, - "pr_file": "kotlin-native/runtime/src/launcher/cpp/launcher.cpp", - "created_at": "2024-05-30T11:26:13+00:00", - "commented_code": "//--- Setup args --------------------------------------------------------------//\n\nOBJ_GETTER(setupArgs, int argc, const char** argv) {\n if (argc > 0 && argv[0][0] != '\\0') {\n kotlin::programName = argv[0];", - "repo_full_name": "JetBrains/kotlin", - "discussion_comments": [ - { - "comment_id": "1620522926", - "repo_full_name": "JetBrains/kotlin", - "pr_number": 5281, - "pr_file": "kotlin-native/runtime/src/launcher/cpp/launcher.cpp", - "discussion_id": "1620522926", - "commented_code": "@@ -30,8 +30,12 @@ using namespace kotlin;\n //--- Setup args --------------------------------------------------------------//\n \n OBJ_GETTER(setupArgs, int argc, const char** argv) {\n+ if (argc > 0 && argv[0][0] != '\\0') {\n+ kotlin::programName = argv[0];", - "comment_created_at": "2024-05-30T11:26:13+00:00", - "comment_author": "projedi", - "comment_body": "How about doing `kotlin::programName = strndup(argv[0], 4096)`? No need to `free` the result, and we don't have to worry about lifetimes. `4096` is just some random not-too-small not-too-big number to protect us from `argv[0]` being too large. Technically, the OS has some limits already, but might as well protect ourselves.", - "pr_file_module": null - }, - { - "comment_id": "1629431106", - "repo_full_name": "JetBrains/kotlin", - "pr_number": 5281, - "pr_file": "kotlin-native/runtime/src/launcher/cpp/launcher.cpp", - "discussion_id": "1620522926", - "commented_code": "@@ -30,8 +30,12 @@ using namespace kotlin;\n //--- Setup args --------------------------------------------------------------//\n \n OBJ_GETTER(setupArgs, int argc, const char** argv) {\n+ if (argc > 0 && argv[0][0] != '\\0') {\n+ kotlin::programName = argv[0];", - "comment_created_at": "2024-06-06T12:23:11+00:00", - "comment_author": "vonox7", - "comment_body": "Ok, I applied your idea.", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/kotlin-dependency-verification-configuration.json b/_reviewers/kotlin-dependency-verification-configuration.json new file mode 100644 index 0000000..e411a35 --- /dev/null +++ b/_reviewers/kotlin-dependency-verification-configuration.json @@ -0,0 +1,70 @@ +[ + { + "discussion_id": "651452380", + "pr_number": 4454, + "pr_file": "gradle/verification-metadata.xml", + "created_at": "2021-06-15T05:11:07+00:00", + "commented_code": "\n \n \n ", + "repo_full_name": "JetBrains/kotlin", + "discussion_comments": [ + { + "comment_id": "651452380", + "repo_full_name": "JetBrains/kotlin", + "pr_number": 4454, + "pr_file": "gradle/verification-metadata.xml", + "discussion_id": "651452380", + "commented_code": "@@ -2734,6 +2734,12 @@\n \n \n \n+ ", + "comment_created_at": "2021-06-15T05:11:07+00:00", + "comment_author": "jsjeon", + "comment_body": "I pointed out the same thing: https://github.com/JetBrains/kotlin/pull/4348#discussion_r626823062, but the below two are needed?", + "pr_file_module": null + }, + { + "comment_id": "651521343", + "repo_full_name": "JetBrains/kotlin", + "pr_number": 4454, + "pr_file": "gradle/verification-metadata.xml", + "discussion_id": "651452380", + "commented_code": "@@ -2734,6 +2734,12 @@\n \n \n \n+ ", + "comment_created_at": "2021-06-15T07:28:19+00:00", + "comment_author": "punzki", + "comment_body": "Yes, these are needed when you have new dependencies (see `build.gradle.kts` files). Otherwise you will get a build failure.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "626823062", + "pr_number": 4348, + "pr_file": "gradle/verification-metadata.xml", + "created_at": "2021-05-05T19:00:32+00:00", + "commented_code": "\n \n \n ", + "repo_full_name": "JetBrains/kotlin", + "discussion_comments": [ + { + "comment_id": "626823062", + "repo_full_name": "JetBrains/kotlin", + "pr_number": 4348, + "pr_file": "gradle/verification-metadata.xml", + "discussion_id": "626823062", + "commented_code": "@@ -2667,6 +2667,12 @@\n \n \n \n+ ", + "comment_created_at": "2021-05-05T19:00:32+00:00", + "comment_author": "jsjeon", + "comment_body": "I wonder this addition is the only relevant thing you need to add?", + "pr_file_module": null + }, + { + "comment_id": "626953554", + "repo_full_name": "JetBrains/kotlin", + "pr_number": 4348, + "pr_file": "gradle/verification-metadata.xml", + "discussion_id": "626823062", + "commented_code": "@@ -2667,6 +2667,12 @@\n \n \n \n+ ", + "comment_created_at": "2021-05-05T22:11:50+00:00", + "comment_author": "punzki", + "comment_body": "I only need the 3 dependencies in `build.gradle.kts`. I ran the following command to generate it: `./gradlew --write-verification-metadata md5,sha256 help` and I'm not sure why it added a bunch more dependencies. But I'll revert the file and add the 3 dependencies manually.", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/kotlin-dependency-verification-configuration.md b/_reviewers/kotlin-dependency-verification-configuration.md index 3ee5725..70bc3d3 100644 --- a/_reviewers/kotlin-dependency-verification-configuration.md +++ b/_reviewers/kotlin-dependency-verification-configuration.md @@ -29,75 +29,3 @@ dependencies { // Keep only required verification entries ``` - - -[ - { - "discussion_id": "651452380", - "pr_number": 4454, - "pr_file": "gradle/verification-metadata.xml", - "created_at": "2021-06-15T05:11:07+00:00", - "commented_code": "\n \n \n ", - "repo_full_name": "JetBrains/kotlin", - "discussion_comments": [ - { - "comment_id": "651452380", - "repo_full_name": "JetBrains/kotlin", - "pr_number": 4454, - "pr_file": "gradle/verification-metadata.xml", - "discussion_id": "651452380", - "commented_code": "@@ -2734,6 +2734,12 @@\n \n \n \n+ ", - "comment_created_at": "2021-06-15T05:11:07+00:00", - "comment_author": "jsjeon", - "comment_body": "I pointed out the same thing: https://github.com/JetBrains/kotlin/pull/4348#discussion_r626823062, but the below two are needed?", - "pr_file_module": null - }, - { - "comment_id": "651521343", - "repo_full_name": "JetBrains/kotlin", - "pr_number": 4454, - "pr_file": "gradle/verification-metadata.xml", - "discussion_id": "651452380", - "commented_code": "@@ -2734,6 +2734,12 @@\n \n \n \n+ ", - "comment_created_at": "2021-06-15T07:28:19+00:00", - "comment_author": "punzki", - "comment_body": "Yes, these are needed when you have new dependencies (see `build.gradle.kts` files). Otherwise you will get a build failure.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "626823062", - "pr_number": 4348, - "pr_file": "gradle/verification-metadata.xml", - "created_at": "2021-05-05T19:00:32+00:00", - "commented_code": "\n \n \n ", - "repo_full_name": "JetBrains/kotlin", - "discussion_comments": [ - { - "comment_id": "626823062", - "repo_full_name": "JetBrains/kotlin", - "pr_number": 4348, - "pr_file": "gradle/verification-metadata.xml", - "discussion_id": "626823062", - "commented_code": "@@ -2667,6 +2667,12 @@\n \n \n \n+ ", - "comment_created_at": "2021-05-05T19:00:32+00:00", - "comment_author": "jsjeon", - "comment_body": "I wonder this addition is the only relevant thing you need to add?", - "pr_file_module": null - }, - { - "comment_id": "626953554", - "repo_full_name": "JetBrains/kotlin", - "pr_number": 4348, - "pr_file": "gradle/verification-metadata.xml", - "discussion_id": "626823062", - "commented_code": "@@ -2667,6 +2667,12 @@\n \n \n \n+ ", - "comment_created_at": "2021-05-05T22:11:50+00:00", - "comment_author": "punzki", - "comment_body": "I only need the 3 dependencies in `build.gradle.kts`. I ran the following command to generate it: `./gradlew --write-verification-metadata md5,sha256 help` and I'm not sure why it added a bunch more dependencies. But I'll revert the file and add the 3 dependencies manually.", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/kotlin-design-extensible-stable-apis.json b/_reviewers/kotlin-design-extensible-stable-apis.json new file mode 100644 index 0000000..011e456 --- /dev/null +++ b/_reviewers/kotlin-design-extensible-stable-apis.json @@ -0,0 +1,312 @@ +[ + { + "discussion_id": "1438374793", + "pr_number": 5234, + "pr_file": "plugins/jvm-abi-gen/src/org/jetbrains/kotlin/jvm/abi/JvmAbiCommandLineProcessor.kt", + "created_at": "2023-12-29T18:59:12+00:00", + "commented_code": "\"sites of inline functions.\",\n false,\n )\n\n val REMOVE_PRIVATE_CLASSES_OPTION: CliOption =\n CliOption(\n \"removePrivateClasses\",\n \"true/false\",\n \"Remove private or effectively private classes from ABI. False by default. Note that if private class is being used \" +", + "repo_full_name": "JetBrains/kotlin", + "discussion_comments": [ + { + "comment_id": "1438374793", + "repo_full_name": "JetBrains/kotlin", + "pr_number": 5234, + "pr_file": "plugins/jvm-abi-gen/src/org/jetbrains/kotlin/jvm/abi/JvmAbiCommandLineProcessor.kt", + "discussion_id": "1438374793", + "commented_code": "@@ -32,18 +32,28 @@ class JvmAbiCommandLineProcessor : CommandLineProcessor {\n \"sites of inline functions.\",\n false,\n )\n+\n+ val REMOVE_PRIVATE_CLASSES_OPTION: CliOption =\n+ CliOption(\n+ \"removePrivateClasses\",\n+ \"true/false\",\n+ \"Remove private or effectively private classes from ABI. False by default. Note that if private class is being used \" +", + "comment_created_at": "2023-12-29T18:59:12+00:00", + "comment_author": "artem-zinnatullin", + "comment_body": "```suggestion\r\n \"Remove private or effectively private classes from ABI. False by default due to backwards compatibility. If enabled — private classes will no longer be available from Java.\"\r\n```\r\n\r\nSomething like this maybe? Ideally it should be clear that setting the option to `true` is highly desirable for stable ABI and it is false by default only due to being potentially breaking for rare Java use cases.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1470548786", + "pr_number": 5234, + "pr_file": "plugins/jvm-abi-gen/src/org/jetbrains/kotlin/jvm/abi/JvmAbiOutputExtension.kt", + "created_at": "2024-01-30T03:31:48+00:00", + "commented_code": "private val keptMethods = mutableListOf()\n private val innerClassInfos = mutableMapOf()\n\n override fun visit(\n version: Int,\n access: Int,\n name: String?,\n signature: String?,\n superName: String?,\n interfaces: Array?,\n ) {\n val prunedAccess = if (prune) access or Opcodes.ACC_PUBLIC else access", + "repo_full_name": "JetBrains/kotlin", + "discussion_comments": [ + { + "comment_id": "1470548786", + "repo_full_name": "JetBrains/kotlin", + "pr_number": 5234, + "pr_file": "plugins/jvm-abi-gen/src/org/jetbrains/kotlin/jvm/abi/JvmAbiOutputExtension.kt", + "discussion_id": "1470548786", + "commented_code": "@@ -92,15 +92,27 @@ class JvmAbiOutputExtension(\n private val keptMethods = mutableListOf()\n private val innerClassInfos = mutableMapOf()\n \n+ override fun visit(\n+ version: Int,\n+ access: Int,\n+ name: String?,\n+ signature: String?,\n+ superName: String?,\n+ interfaces: Array?,\n+ ) {\n+ val prunedAccess = if (prune) access or Opcodes.ACC_PUBLIC else access", + "comment_created_at": "2024-01-30T03:31:48+00:00", + "comment_author": "Tagakov", + "comment_body": "Same as in `@Metadata`. Not sure if I should fallback to package-private here instead.", + "pr_file_module": null + }, + { + "comment_id": "1471244812", + "repo_full_name": "JetBrains/kotlin", + "pr_number": 5234, + "pr_file": "plugins/jvm-abi-gen/src/org/jetbrains/kotlin/jvm/abi/JvmAbiOutputExtension.kt", + "discussion_id": "1470548786", + "commented_code": "@@ -92,15 +92,27 @@ class JvmAbiOutputExtension(\n private val keptMethods = mutableListOf()\n private val innerClassInfos = mutableMapOf()\n \n+ override fun visit(\n+ version: Int,\n+ access: Int,\n+ name: String?,\n+ signature: String?,\n+ superName: String?,\n+ interfaces: Array?,\n+ ) {\n+ val prunedAccess = if (prune) access or Opcodes.ACC_PUBLIC else access", + "comment_created_at": "2024-01-30T13:46:30+00:00", + "comment_author": "madsager", + "comment_body": "My 2 cents: I don't think we should ever change the visibility of any element that ends up in the ABI jar. That makes compilation against the ABI jar and the full jar inconsistent.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1190998608", + "pr_number": 5131, + "pr_file": "libraries/tools/kotlin-gradle-plugin/src/common/kotlin/org/jetbrains/kotlin/gradle/plugin/ide/IdeMultiplatformImport.kt", + "created_at": "2023-05-11T11:00:21+00:00", + "commented_code": "}\n\n /**\n * Any [IdeDependencyResolver] has to be registered specifying a certain resolution level.\n * Generally, all resolvers registered in a given resolution level will work collaboratively, meaning the dependency resolution\n * result is the aggregation of all resolvers running.\n * Indicating the 'priority' of a given [IdeDependencyResolver] or [IdeAdditionalArtifactResolver]:\n * When resolving dependencies, only the highest priority resolver will be selected and executed.\n * If there are multiple resolvers with the same [Priority] registered, then all of them will be executed\n * and the result will be merged.\n *\n * However, only the resolvers in the highest resolution result will run e.g.\n * If resolvers with level [Overwrite] are found, then only those will contribute to the dependency resolution.\n * Otherwise, all [Default] resolvers will run.\n * By 'default' resolvers are registered using [Priority.normal].\n * The Kotlin Gradle Plugin is not expected to register resolvers outside the [normal] or [high] priorities\n * External Kotlin Target maintainers _(such as Google)_ can therefore use [veryHigh] to overwrite all resolvers\n * from the Kotlin Gradle plugin or [low] to only register a resolver if there is nothing available by default\n * from the Kotlin Gradle plugin.\n */\n @ExternalKotlinTargetApi\n enum class DependencyResolutionLevel {\n Default, Overwrite\n class Priority(val value: Int) : Comparable {", + "repo_full_name": "JetBrains/kotlin", + "discussion_comments": [ + { + "comment_id": "1190998608", + "repo_full_name": "JetBrains/kotlin", + "pr_number": 5131, + "pr_file": "libraries/tools/kotlin-gradle-plugin/src/common/kotlin/org/jetbrains/kotlin/gradle/plugin/ide/IdeMultiplatformImport.kt", + "discussion_id": "1190998608", + "commented_code": "@@ -149,31 +152,67 @@ interface IdeMultiplatformImport {\n }\n \n /**\n- * Any [IdeDependencyResolver] has to be registered specifying a certain resolution level.\n- * Generally, all resolvers registered in a given resolution level will work collaboratively, meaning the dependency resolution\n- * result is the aggregation of all resolvers running.\n+ * Indicating the 'priority' of a given [IdeDependencyResolver] or [IdeAdditionalArtifactResolver]:\n+ * When resolving dependencies, only the highest priority resolver will be selected and executed.\n+ * If there are multiple resolvers with the same [Priority] registered, then all of them will be executed\n+ * and the result will be merged.\n *\n- * However, only the resolvers in the highest resolution result will run e.g.\n- * If resolvers with level [Overwrite] are found, then only those will contribute to the dependency resolution.\n- * Otherwise, all [Default] resolvers will run.\n+ * By 'default' resolvers are registered using [Priority.normal].\n+ * The Kotlin Gradle Plugin is not expected to register resolvers outside the [normal] or [high] priorities\n+ * External Kotlin Target maintainers _(such as Google)_ can therefore use [veryHigh] to overwrite all resolvers\n+ * from the Kotlin Gradle plugin or [low] to only register a resolver if there is nothing available by default\n+ * from the Kotlin Gradle plugin.\n */\n @ExternalKotlinTargetApi\n- enum class DependencyResolutionLevel {\n- Default, Overwrite\n+ class Priority(val value: Int) : Comparable {", + "comment_created_at": "2023-05-11T11:00:21+00:00", + "comment_author": "AmrAfifiy", + "comment_body": "Is there a reason why this is very fine grained rather than just an enum? I understand the need for having the four phases mentioned below, but why do you want to give such flexibility?", + "pr_file_module": null + }, + { + "comment_id": "1191213754", + "repo_full_name": "JetBrains/kotlin", + "pr_number": 5131, + "pr_file": "libraries/tools/kotlin-gradle-plugin/src/common/kotlin/org/jetbrains/kotlin/gradle/plugin/ide/IdeMultiplatformImport.kt", + "discussion_id": "1190998608", + "commented_code": "@@ -149,31 +152,67 @@ interface IdeMultiplatformImport {\n }\n \n /**\n- * Any [IdeDependencyResolver] has to be registered specifying a certain resolution level.\n- * Generally, all resolvers registered in a given resolution level will work collaboratively, meaning the dependency resolution\n- * result is the aggregation of all resolvers running.\n+ * Indicating the 'priority' of a given [IdeDependencyResolver] or [IdeAdditionalArtifactResolver]:\n+ * When resolving dependencies, only the highest priority resolver will be selected and executed.\n+ * If there are multiple resolvers with the same [Priority] registered, then all of them will be executed\n+ * and the result will be merged.\n *\n- * However, only the resolvers in the highest resolution result will run e.g.\n- * If resolvers with level [Overwrite] are found, then only those will contribute to the dependency resolution.\n- * Otherwise, all [Default] resolvers will run.\n+ * By 'default' resolvers are registered using [Priority.normal].\n+ * The Kotlin Gradle Plugin is not expected to register resolvers outside the [normal] or [high] priorities\n+ * External Kotlin Target maintainers _(such as Google)_ can therefore use [veryHigh] to overwrite all resolvers\n+ * from the Kotlin Gradle plugin or [low] to only register a resolver if there is nothing available by default\n+ * from the Kotlin Gradle plugin.\n */\n @ExternalKotlinTargetApi\n- enum class DependencyResolutionLevel {\n- Default, Overwrite\n+ class Priority(val value: Int) : Comparable {", + "comment_created_at": "2023-05-11T13:59:29+00:00", + "comment_author": "sellmair", + "comment_body": "Yes! There are two reasons for this change (maybe three)\r\n\r\n1): You see that KGP already also overwrites some resolvers with a constraint and the Level.Overwrite. Such overwrites therefore cannot again be overwritten by you folks\r\n\r\n2): The API was not intuitive enough. Everybody who stumbled over it had to ask for how it works. I really hope that Priority is just simpler to understand \r\n\r\n3): Enums in public API are much more complicated. How could we have added a value safely? This could have been a rather complicated process! \r\n", + "pr_file_module": null + }, + { + "comment_id": "1191432325", + "repo_full_name": "JetBrains/kotlin", + "pr_number": 5131, + "pr_file": "libraries/tools/kotlin-gradle-plugin/src/common/kotlin/org/jetbrains/kotlin/gradle/plugin/ide/IdeMultiplatformImport.kt", + "discussion_id": "1190998608", + "commented_code": "@@ -149,31 +152,67 @@ interface IdeMultiplatformImport {\n }\n \n /**\n- * Any [IdeDependencyResolver] has to be registered specifying a certain resolution level.\n- * Generally, all resolvers registered in a given resolution level will work collaboratively, meaning the dependency resolution\n- * result is the aggregation of all resolvers running.\n+ * Indicating the 'priority' of a given [IdeDependencyResolver] or [IdeAdditionalArtifactResolver]:\n+ * When resolving dependencies, only the highest priority resolver will be selected and executed.\n+ * If there are multiple resolvers with the same [Priority] registered, then all of them will be executed\n+ * and the result will be merged.\n *\n- * However, only the resolvers in the highest resolution result will run e.g.\n- * If resolvers with level [Overwrite] are found, then only those will contribute to the dependency resolution.\n- * Otherwise, all [Default] resolvers will run.\n+ * By 'default' resolvers are registered using [Priority.normal].\n+ * The Kotlin Gradle Plugin is not expected to register resolvers outside the [normal] or [high] priorities\n+ * External Kotlin Target maintainers _(such as Google)_ can therefore use [veryHigh] to overwrite all resolvers\n+ * from the Kotlin Gradle plugin or [low] to only register a resolver if there is nothing available by default\n+ * from the Kotlin Gradle plugin.\n */\n @ExternalKotlinTargetApi\n- enum class DependencyResolutionLevel {\n- Default, Overwrite\n+ class Priority(val value: Int) : Comparable {", + "comment_created_at": "2023-05-11T16:29:39+00:00", + "comment_author": "AmrAfifiy", + "comment_body": "I'm worried about that there is no way for a target to know if its resolvers end up running or not. Also if a plugin wants to match the highest priority that currently exists and just contribute to the resolution without completely overriding it.", + "pr_file_module": null + }, + { + "comment_id": "1191453535", + "repo_full_name": "JetBrains/kotlin", + "pr_number": 5131, + "pr_file": "libraries/tools/kotlin-gradle-plugin/src/common/kotlin/org/jetbrains/kotlin/gradle/plugin/ide/IdeMultiplatformImport.kt", + "discussion_id": "1190998608", + "commented_code": "@@ -149,31 +152,67 @@ interface IdeMultiplatformImport {\n }\n \n /**\n- * Any [IdeDependencyResolver] has to be registered specifying a certain resolution level.\n- * Generally, all resolvers registered in a given resolution level will work collaboratively, meaning the dependency resolution\n- * result is the aggregation of all resolvers running.\n+ * Indicating the 'priority' of a given [IdeDependencyResolver] or [IdeAdditionalArtifactResolver]:\n+ * When resolving dependencies, only the highest priority resolver will be selected and executed.\n+ * If there are multiple resolvers with the same [Priority] registered, then all of them will be executed\n+ * and the result will be merged.\n *\n- * However, only the resolvers in the highest resolution result will run e.g.\n- * If resolvers with level [Overwrite] are found, then only those will contribute to the dependency resolution.\n- * Otherwise, all [Default] resolvers will run.\n+ * By 'default' resolvers are registered using [Priority.normal].\n+ * The Kotlin Gradle Plugin is not expected to register resolvers outside the [normal] or [high] priorities\n+ * External Kotlin Target maintainers _(such as Google)_ can therefore use [veryHigh] to overwrite all resolvers\n+ * from the Kotlin Gradle plugin or [low] to only register a resolver if there is nothing available by default\n+ * from the Kotlin Gradle plugin.\n */\n @ExternalKotlinTargetApi\n- enum class DependencyResolutionLevel {\n- Default, Overwrite\n+ class Priority(val value: Int) : Comparable {", + "comment_created_at": "2023-05-11T16:50:05+00:00", + "comment_author": "sellmair", + "comment_body": "Those are good thoughts. \r\nHowever, I think it will get very complex if you want to make the API support more advanced semantics. \r\nThe previous version seemed already insufficient to handle potential edge cases between AGP and KGP. The new API might be tricky when AGP and some unknown vendor (or build author) tries to contribute some resolvers. \r\n\r\nHowever, the new API, whilst rather low level, seems flexible enough to handle known use cases and to evolve!\r\n", + "pr_file_module": null + }, + { + "comment_id": "1191454163", + "repo_full_name": "JetBrains/kotlin", + "pr_number": 5131, + "pr_file": "libraries/tools/kotlin-gradle-plugin/src/common/kotlin/org/jetbrains/kotlin/gradle/plugin/ide/IdeMultiplatformImport.kt", + "discussion_id": "1190998608", + "commented_code": "@@ -149,31 +152,67 @@ interface IdeMultiplatformImport {\n }\n \n /**\n- * Any [IdeDependencyResolver] has to be registered specifying a certain resolution level.\n- * Generally, all resolvers registered in a given resolution level will work collaboratively, meaning the dependency resolution\n- * result is the aggregation of all resolvers running.\n+ * Indicating the 'priority' of a given [IdeDependencyResolver] or [IdeAdditionalArtifactResolver]:\n+ * When resolving dependencies, only the highest priority resolver will be selected and executed.\n+ * If there are multiple resolvers with the same [Priority] registered, then all of them will be executed\n+ * and the result will be merged.\n *\n- * However, only the resolvers in the highest resolution result will run e.g.\n- * If resolvers with level [Overwrite] are found, then only those will contribute to the dependency resolution.\n- * Otherwise, all [Default] resolvers will run.\n+ * By 'default' resolvers are registered using [Priority.normal].\n+ * The Kotlin Gradle Plugin is not expected to register resolvers outside the [normal] or [high] priorities\n+ * External Kotlin Target maintainers _(such as Google)_ can therefore use [veryHigh] to overwrite all resolvers\n+ * from the Kotlin Gradle plugin or [low] to only register a resolver if there is nothing available by default\n+ * from the Kotlin Gradle plugin.\n */\n @ExternalKotlinTargetApi\n- enum class DependencyResolutionLevel {\n- Default, Overwrite\n+ class Priority(val value: Int) : Comparable {", + "comment_created_at": "2023-05-11T16:50:45+00:00", + "comment_author": "sellmair", + "comment_body": "If we know about uses cases where its hard for a third party to sync with AGP and KGP, then we could start designing new APIs to make this cases easier! \r\n", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1172544836", + "pr_number": 5121, + "pr_file": "libraries/tools/kotlin-gradle-plugin/build.gradle.kts", + "created_at": "2023-04-20T12:53:02+00:00", + "commented_code": "languageSettings.optIn(\"org.jetbrains.kotlin.compiler.plugin.ExperimentalCompilerApi\")\n}\n\napiValidation {\n publicMarkers.add(\"org.jetbrains.kotlin.gradle.ExternalKotlinTargetApi\")", + "repo_full_name": "JetBrains/kotlin", + "discussion_comments": [ + { + "comment_id": "1172544836", + "repo_full_name": "JetBrains/kotlin", + "pr_number": 5121, + "pr_file": "libraries/tools/kotlin-gradle-plugin/build.gradle.kts", + "discussion_id": "1172544836", + "commented_code": "@@ -26,6 +27,12 @@ kotlin.sourceSets.all {\n languageSettings.optIn(\"org.jetbrains.kotlin.compiler.plugin.ExperimentalCompilerApi\")\n }\n \n+apiValidation {\n+ publicMarkers.add(\"org.jetbrains.kotlin.gradle.ExternalKotlinTargetApi\")", + "comment_created_at": "2023-04-20T12:53:02+00:00", + "comment_author": "AmrAfifiy", + "comment_body": "are all APIs annotated with @ExternalKotlinTargetApi end up in kotlin-gradle-plugin-api artifact?", + "pr_file_module": null + }, + { + "comment_id": "1173565747", + "repo_full_name": "JetBrains/kotlin", + "pr_number": 5121, + "pr_file": "libraries/tools/kotlin-gradle-plugin/build.gradle.kts", + "discussion_id": "1172544836", + "commented_code": "@@ -26,6 +27,12 @@ kotlin.sourceSets.all {\n languageSettings.optIn(\"org.jetbrains.kotlin.compiler.plugin.ExperimentalCompilerApi\")\n }\n \n+apiValidation {\n+ publicMarkers.add(\"org.jetbrains.kotlin.gradle.ExternalKotlinTargetApi\")", + "comment_created_at": "2023-04-21T09:39:08+00:00", + "comment_author": "sellmair", + "comment_body": "No, those apis will be part of the `kotlin-gradle-plugin` artifact as those APIs are required to call into internal APIs of the implementation. So: `kotlin-gradle-plugin-api` already has binary compatibility validation, `kotlin-gradle-plugin` has not, but APIs marked with this annotation will have to be kept stable!\r\n", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "454254122", + "pr_number": 3544, + "pr_file": "libraries/scripting/dependencies/src/kotlin/script/experimental/dependencies/resolverNamedOptions.kt", + "created_at": "2020-07-14T10:19:21+00:00", + "commented_code": "/*\n * Copyright 2010-2020 JetBrains s.r.o. and Kotlin Programming Language contributors.\n * Use of this source code is governed by the Apache 2.0 license that can be found in the license/LICENSE.txt file.\n */\n\npackage kotlin.script.experimental.dependencies\n\nfun ExternalDependenciesResolver.Options.value(name: DependenciesResolverOptionsName) =\n value(name.key)\n\nfun ExternalDependenciesResolver.Options.flag(name: DependenciesResolverOptionsName) =\n flag(name.key)\n\noperator fun MutableMap.set(key: DependenciesResolverOptionsName, value: String) {\n put(key.key, value)\n}\n\n\nenum class DependenciesResolverOptionsName {", + "repo_full_name": "JetBrains/kotlin", + "discussion_comments": [ + { + "comment_id": "454254122", + "repo_full_name": "JetBrains/kotlin", + "pr_number": 3544, + "pr_file": "libraries/scripting/dependencies/src/kotlin/script/experimental/dependencies/resolverNamedOptions.kt", + "discussion_id": "454254122", + "commented_code": "@@ -0,0 +1,31 @@\n+/*\n+ * Copyright 2010-2020 JetBrains s.r.o. and Kotlin Programming Language contributors.\n+ * Use of this source code is governed by the Apache 2.0 license that can be found in the license/LICENSE.txt file.\n+ */\n+\n+package kotlin.script.experimental.dependencies\n+\n+fun ExternalDependenciesResolver.Options.value(name: DependenciesResolverOptionsName) =\n+ value(name.key)\n+\n+fun ExternalDependenciesResolver.Options.flag(name: DependenciesResolverOptionsName) =\n+ flag(name.key)\n+\n+operator fun MutableMap.set(key: DependenciesResolverOptionsName, value: String) {\n+ put(key.key, value)\n+}\n+\n+\n+enum class DependenciesResolverOptionsName {", + "comment_created_at": "2020-07-14T10:19:21+00:00", + "comment_author": "nerdsupremacist", + "comment_body": "These options seem very specific to the individual dependencies resolvers. Perhaps it would be more appropriate to prefix this with a `Maven*` or `Ivy*` respectively.\r\n\r\nThis API is not implemented by many other resolvers that I know of. But what I want to avoid is someone implementing a dependencies resolver and thinking they:\r\n1. have to support these exact options\r\n1. are only allowed to support these exact options\r\n\r\n@ligee what do you think?", + "pr_file_module": null + }, + { + "comment_id": "454392948", + "repo_full_name": "JetBrains/kotlin", + "pr_number": 3544, + "pr_file": "libraries/scripting/dependencies/src/kotlin/script/experimental/dependencies/resolverNamedOptions.kt", + "discussion_id": "454254122", + "commented_code": "@@ -0,0 +1,31 @@\n+/*\n+ * Copyright 2010-2020 JetBrains s.r.o. and Kotlin Programming Language contributors.\n+ * Use of this source code is governed by the Apache 2.0 license that can be found in the license/LICENSE.txt file.\n+ */\n+\n+package kotlin.script.experimental.dependencies\n+\n+fun ExternalDependenciesResolver.Options.value(name: DependenciesResolverOptionsName) =\n+ value(name.key)\n+\n+fun ExternalDependenciesResolver.Options.flag(name: DependenciesResolverOptionsName) =\n+ flag(name.key)\n+\n+operator fun MutableMap.set(key: DependenciesResolverOptionsName, value: String) {\n+ put(key.key, value)\n+}\n+\n+\n+enum class DependenciesResolverOptionsName {", + "comment_created_at": "2020-07-14T14:21:38+00:00", + "comment_author": "ligee", + "comment_body": "Thanks, @nerdsupremacist, I think the concern is valid.\r\nBut we can probably get away right now simply by moving it into the `impl` package, signalling that it is a mere implementation detail now.\r\n@ileasile, what do you think about this?", + "pr_file_module": null + }, + { + "comment_id": "454436629", + "repo_full_name": "JetBrains/kotlin", + "pr_number": 3544, + "pr_file": "libraries/scripting/dependencies/src/kotlin/script/experimental/dependencies/resolverNamedOptions.kt", + "discussion_id": "454254122", + "commented_code": "@@ -0,0 +1,31 @@\n+/*\n+ * Copyright 2010-2020 JetBrains s.r.o. and Kotlin Programming Language contributors.\n+ * Use of this source code is governed by the Apache 2.0 license that can be found in the license/LICENSE.txt file.\n+ */\n+\n+package kotlin.script.experimental.dependencies\n+\n+fun ExternalDependenciesResolver.Options.value(name: DependenciesResolverOptionsName) =\n+ value(name.key)\n+\n+fun ExternalDependenciesResolver.Options.flag(name: DependenciesResolverOptionsName) =\n+ flag(name.key)\n+\n+operator fun MutableMap.set(key: DependenciesResolverOptionsName, value: String) {\n+ put(key.key, value)\n+}\n+\n+\n+enum class DependenciesResolverOptionsName {", + "comment_created_at": "2020-07-14T15:18:59+00:00", + "comment_author": "ileasile", + "comment_body": "I see these options names only as a convenient and safe way to access \"text\" properties. Maybe it's worth adding somewhere in comments that these options are not mandatory? Also, I'm OK with moving it into the `impl` package.", + "pr_file_module": null + }, + { + "comment_id": "454566489", + "repo_full_name": "JetBrains/kotlin", + "pr_number": 3544, + "pr_file": "libraries/scripting/dependencies/src/kotlin/script/experimental/dependencies/resolverNamedOptions.kt", + "discussion_id": "454254122", + "commented_code": "@@ -0,0 +1,31 @@\n+/*\n+ * Copyright 2010-2020 JetBrains s.r.o. and Kotlin Programming Language contributors.\n+ * Use of this source code is governed by the Apache 2.0 license that can be found in the license/LICENSE.txt file.\n+ */\n+\n+package kotlin.script.experimental.dependencies\n+\n+fun ExternalDependenciesResolver.Options.value(name: DependenciesResolverOptionsName) =\n+ value(name.key)\n+\n+fun ExternalDependenciesResolver.Options.flag(name: DependenciesResolverOptionsName) =\n+ flag(name.key)\n+\n+operator fun MutableMap.set(key: DependenciesResolverOptionsName, value: String) {\n+ put(key.key, value)\n+}\n+\n+\n+enum class DependenciesResolverOptionsName {", + "comment_created_at": "2020-07-14T18:41:52+00:00", + "comment_author": "ligee", + "comment_body": "I understand your intention here, but we don't want to expose it as a generic API. So, please move it to the `impl`.", + "pr_file_module": null + }, + { + "comment_id": "454632609", + "repo_full_name": "JetBrains/kotlin", + "pr_number": 3544, + "pr_file": "libraries/scripting/dependencies/src/kotlin/script/experimental/dependencies/resolverNamedOptions.kt", + "discussion_id": "454254122", + "commented_code": "@@ -0,0 +1,31 @@\n+/*\n+ * Copyright 2010-2020 JetBrains s.r.o. and Kotlin Programming Language contributors.\n+ * Use of this source code is governed by the Apache 2.0 license that can be found in the license/LICENSE.txt file.\n+ */\n+\n+package kotlin.script.experimental.dependencies\n+\n+fun ExternalDependenciesResolver.Options.value(name: DependenciesResolverOptionsName) =\n+ value(name.key)\n+\n+fun ExternalDependenciesResolver.Options.flag(name: DependenciesResolverOptionsName) =\n+ flag(name.key)\n+\n+operator fun MutableMap.set(key: DependenciesResolverOptionsName, value: String) {\n+ put(key.key, value)\n+}\n+\n+\n+enum class DependenciesResolverOptionsName {", + "comment_created_at": "2020-07-14T20:43:00+00:00", + "comment_author": "ileasile", + "comment_body": "Sure. Done ✔️ ", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "429148358", + "pr_number": 3415, + "pr_file": "plugins/atomicfu/atomicfu-compiler/src/org.jetbrains.kotlinx.atomicfu.compiler/extensions/TransformerHelpers.kt", + "created_at": "2020-05-22T09:42:26+00:00", + "commented_code": "/*\n * Copyright 2010-2020 JetBrains s.r.o. and Kotlin Programming Language contributors.\n * Use of this source code is governed by the Apache 2.0 license that can be found in the license/LICENSE.txt file.\n */\n\npackage org.jetbrains.kotlinx.atomicfu.compiler.extensions\n\nimport org.jetbrains.kotlin.backend.common.extensions.IrPluginContext\nimport org.jetbrains.kotlin.descriptors.ClassConstructorDescriptor\nimport org.jetbrains.kotlin.descriptors.ClassDescriptor\nimport org.jetbrains.kotlin.descriptors.FunctionDescriptor\nimport org.jetbrains.kotlin.incremental.components.NoLookupLocation\nimport org.jetbrains.kotlin.ir.UNDEFINED_OFFSET\nimport org.jetbrains.kotlin.ir.backend.js.ir.JsIrBuilder\nimport org.jetbrains.kotlin.ir.declarations.*\nimport org.jetbrains.kotlin.ir.expressions.*\nimport org.jetbrains.kotlin.ir.expressions.impl.IrCallImpl\nimport org.jetbrains.kotlin.ir.expressions.impl.IrGetFieldImpl\nimport org.jetbrains.kotlin.ir.expressions.impl.IrSetFieldImpl\nimport org.jetbrains.kotlin.ir.symbols.*\nimport org.jetbrains.kotlin.ir.types.*\nimport org.jetbrains.kotlin.ir.types.impl.IrSimpleTypeImpl\nimport org.jetbrains.kotlin.ir.types.impl.makeTypeProjection\nimport org.jetbrains.kotlin.name.FqName\nimport org.jetbrains.kotlin.name.Name\nimport org.jetbrains.kotlin.resolve.scopes.MemberScope\nimport org.jetbrains.kotlin.types.Variance\nimport kotlin.random.Random\n\ninterface TransformerHelpers {\n\n val context: IrPluginContext\n\n fun buildCall(\n target: IrFunctionSymbol,\n type: IrType? = null,\n origin: IrStatementOrigin,\n typeArguments: List? = null,\n valueArguments: List? = null\n ): IrCall =\n IrCallImpl(\n UNDEFINED_OFFSET,\n UNDEFINED_OFFSET,\n type ?: target.owner.returnType,\n target,\n target.descriptor.typeParametersCount,\n origin\n ).apply {\n typeArguments?.let {\n assert(typeArguments.size == typeArgumentsCount)\n it.withIndex().forEach { (i, t) -> putTypeArgument(i, t) }\n }\n valueArguments?.let {\n assert(valueArguments.size == valueArgumentsCount)\n it.withIndex().forEach { (i, arg) -> putValueArgument(i, arg) }\n }\n }\n\n fun buildSetField(\n symbol: IrFieldSymbol,\n receiver: IrExpression?,\n value: IrExpression,\n superQualifierSymbol: IrClassSymbol? = null\n ) =\n IrSetFieldImpl(\n UNDEFINED_OFFSET,\n UNDEFINED_OFFSET,\n symbol,\n receiver,\n value,\n value.type,\n IrStatementOrigin.INVOKE,\n superQualifierSymbol\n )\n\n fun buildGetField(symbol: IrFieldSymbol, receiver: IrExpression?, superQualifierSymbol: IrClassSymbol? = null, type: IrType? = null) =\n IrGetFieldImpl(\n UNDEFINED_OFFSET,\n UNDEFINED_OFFSET,\n symbol,\n type ?: symbol.owner.type,\n receiver,\n IrStatementOrigin.GET_PROPERTY,\n superQualifierSymbol\n )\n\n fun buildFunctionSimpleType(typeParameters: List): IrSimpleType {\n val parametersCount = typeParameters.size - 1\n val classDesc = context.irBuiltIns.builtIns.getFunction(parametersCount)\n val symbol = context.symbolTable.referenceClass(classDesc)\n return IrSimpleTypeImpl(\n classifier = symbol,\n hasQuestionMark = false,\n arguments = typeParameters.map { it.toTypeArgument() },\n annotations = emptyList()\n )\n }\n\n fun getterName(name: String) = \"${Random.nextInt()}\"\n fun setterName(name: String) = \"${Random.nextInt()}\"\n fun Name.getFieldName() = \"\".toRegex().find(asString())?.groupValues?.get(1)\n\n private fun IrType.toTypeArgument(): IrTypeArgument {\n return makeTypeProjection(this, Variance.INVARIANT)\n }\n\n fun IrCall.getValueArguments() = (0 until valueArgumentsCount).map { i ->\n getValueArgument(i)\n }\n\n fun IrValueParameter.capture() = JsIrBuilder.buildGetValue(symbol)\n\n fun referencePackageFunction(", + "repo_full_name": "JetBrains/kotlin", + "discussion_comments": [ + { + "comment_id": "429148358", + "repo_full_name": "JetBrains/kotlin", + "pr_number": 3415, + "pr_file": "plugins/atomicfu/atomicfu-compiler/src/org.jetbrains.kotlinx.atomicfu.compiler/extensions/TransformerHelpers.kt", + "discussion_id": "429148358", + "commented_code": "@@ -0,0 +1,144 @@\n+/*\n+ * Copyright 2010-2020 JetBrains s.r.o. and Kotlin Programming Language contributors.\n+ * Use of this source code is governed by the Apache 2.0 license that can be found in the license/LICENSE.txt file.\n+ */\n+\n+package org.jetbrains.kotlinx.atomicfu.compiler.extensions\n+\n+import org.jetbrains.kotlin.backend.common.extensions.IrPluginContext\n+import org.jetbrains.kotlin.descriptors.ClassConstructorDescriptor\n+import org.jetbrains.kotlin.descriptors.ClassDescriptor\n+import org.jetbrains.kotlin.descriptors.FunctionDescriptor\n+import org.jetbrains.kotlin.incremental.components.NoLookupLocation\n+import org.jetbrains.kotlin.ir.UNDEFINED_OFFSET\n+import org.jetbrains.kotlin.ir.backend.js.ir.JsIrBuilder\n+import org.jetbrains.kotlin.ir.declarations.*\n+import org.jetbrains.kotlin.ir.expressions.*\n+import org.jetbrains.kotlin.ir.expressions.impl.IrCallImpl\n+import org.jetbrains.kotlin.ir.expressions.impl.IrGetFieldImpl\n+import org.jetbrains.kotlin.ir.expressions.impl.IrSetFieldImpl\n+import org.jetbrains.kotlin.ir.symbols.*\n+import org.jetbrains.kotlin.ir.types.*\n+import org.jetbrains.kotlin.ir.types.impl.IrSimpleTypeImpl\n+import org.jetbrains.kotlin.ir.types.impl.makeTypeProjection\n+import org.jetbrains.kotlin.name.FqName\n+import org.jetbrains.kotlin.name.Name\n+import org.jetbrains.kotlin.resolve.scopes.MemberScope\n+import org.jetbrains.kotlin.types.Variance\n+import kotlin.random.Random\n+\n+interface TransformerHelpers {\n+\n+ val context: IrPluginContext\n+\n+ fun buildCall(\n+ target: IrFunctionSymbol,\n+ type: IrType? = null,\n+ origin: IrStatementOrigin,\n+ typeArguments: List? = null,\n+ valueArguments: List? = null\n+ ): IrCall =\n+ IrCallImpl(\n+ UNDEFINED_OFFSET,\n+ UNDEFINED_OFFSET,\n+ type ?: target.owner.returnType,\n+ target,\n+ target.descriptor.typeParametersCount,\n+ origin\n+ ).apply {\n+ typeArguments?.let {\n+ assert(typeArguments.size == typeArgumentsCount)\n+ it.withIndex().forEach { (i, t) -> putTypeArgument(i, t) }\n+ }\n+ valueArguments?.let {\n+ assert(valueArguments.size == valueArgumentsCount)\n+ it.withIndex().forEach { (i, arg) -> putValueArgument(i, arg) }\n+ }\n+ }\n+\n+ fun buildSetField(\n+ symbol: IrFieldSymbol,\n+ receiver: IrExpression?,\n+ value: IrExpression,\n+ superQualifierSymbol: IrClassSymbol? = null\n+ ) =\n+ IrSetFieldImpl(\n+ UNDEFINED_OFFSET,\n+ UNDEFINED_OFFSET,\n+ symbol,\n+ receiver,\n+ value,\n+ value.type,\n+ IrStatementOrigin.INVOKE,\n+ superQualifierSymbol\n+ )\n+\n+ fun buildGetField(symbol: IrFieldSymbol, receiver: IrExpression?, superQualifierSymbol: IrClassSymbol? = null, type: IrType? = null) =\n+ IrGetFieldImpl(\n+ UNDEFINED_OFFSET,\n+ UNDEFINED_OFFSET,\n+ symbol,\n+ type ?: symbol.owner.type,\n+ receiver,\n+ IrStatementOrigin.GET_PROPERTY,\n+ superQualifierSymbol\n+ )\n+\n+ fun buildFunctionSimpleType(typeParameters: List): IrSimpleType {\n+ val parametersCount = typeParameters.size - 1\n+ val classDesc = context.irBuiltIns.builtIns.getFunction(parametersCount)\n+ val symbol = context.symbolTable.referenceClass(classDesc)\n+ return IrSimpleTypeImpl(\n+ classifier = symbol,\n+ hasQuestionMark = false,\n+ arguments = typeParameters.map { it.toTypeArgument() },\n+ annotations = emptyList()\n+ )\n+ }\n+\n+ fun getterName(name: String) = \"${Random.nextInt()}\"\n+ fun setterName(name: String) = \"${Random.nextInt()}\"\n+ fun Name.getFieldName() = \"\".toRegex().find(asString())?.groupValues?.get(1)\n+\n+ private fun IrType.toTypeArgument(): IrTypeArgument {\n+ return makeTypeProjection(this, Variance.INVARIANT)\n+ }\n+\n+ fun IrCall.getValueArguments() = (0 until valueArgumentsCount).map { i ->\n+ getValueArgument(i)\n+ }\n+\n+ fun IrValueParameter.capture() = JsIrBuilder.buildGetValue(symbol)\n+\n+ fun referencePackageFunction(", + "comment_created_at": "2020-05-22T09:42:26+00:00", + "comment_author": "romanart", + "comment_body": "Are the following methods reference external symbol? If so, please use corresponding `IrPluginContext` API", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "429178285", + "pr_number": 3415, + "pr_file": "plugins/atomicfu/atomicfu-compiler/src/org.jetbrains.kotlinx.atomicfu.compiler/extensions/AtomicFUTransformerJsIr.kt", + "created_at": "2020-05-22T10:51:39+00:00", + "commented_code": "/*\n * Copyright 2010-2020 JetBrains s.r.o. and Kotlin Programming Language contributors.\n * Use of this source code is governed by the Apache 2.0 license that can be found in the license/LICENSE.txt file.\n */\n\npackage org.jetbrains.kotlinx.atomicfu.compiler.extensions\n\nimport org.jetbrains.kotlin.backend.common.deepCopyWithVariables\nimport org.jetbrains.kotlin.backend.common.extensions.IrPluginContext\nimport org.jetbrains.kotlin.builtins.PrimitiveType\nimport org.jetbrains.kotlin.descriptors.*\nimport org.jetbrains.kotlin.descriptors.impl.FunctionDescriptorImpl\nimport org.jetbrains.kotlin.ir.*\nimport org.jetbrains.kotlin.ir.backend.js.ir.JsIrBuilder.buildValueParameter\nimport org.jetbrains.kotlin.ir.backend.js.ir.JsIrBuilder.buildFunction\nimport org.jetbrains.kotlin.ir.backend.js.ir.JsIrBuilder.buildBlockBody\nimport org.jetbrains.kotlin.ir.declarations.*\nimport org.jetbrains.kotlin.ir.declarations.impl.*\nimport org.jetbrains.kotlin.ir.expressions.*\nimport org.jetbrains.kotlin.ir.expressions.impl.*\nimport org.jetbrains.kotlin.ir.symbols.IrSimpleFunctionSymbol\nimport org.jetbrains.kotlin.ir.symbols.IrSymbol\nimport org.jetbrains.kotlin.ir.symbols.impl.*\nimport org.jetbrains.kotlin.ir.types.*\nimport org.jetbrains.kotlin.ir.util.*\nimport org.jetbrains.kotlin.ir.visitors.IrElementTransformerVoid\nimport java.lang.IllegalStateException\n\nprivate const val AFU_PKG = \"kotlinx/atomicfu\"\nprivate const val LOCKS = \"locks\"\nprivate const val ATOMIC_CONSTRUCTOR = \"atomic\"\nprivate const val ATOMICFU_VALUE_TYPE = \"\"\"Atomic(Int|Long|Boolean|Ref)\"\"\"\nprivate const val ATOMIC_ARRAY_TYPE = \"\"\"Atomic(Int|Long|Boolean|)Array\"\"\"\nprivate const val ATOMIC_ARRAY_FACTORY_FUNCTION = \"atomicArrayOfNulls\"\nprivate const val ATOMICFU_RUNTIME_FUNCTION_PREDICATE = \"atomicfu_\"\nprivate const val REENTRANT_LOCK_TYPE = \"ReentrantLock\"\nprivate const val GETTER = \"atomicfu\\$getter\"\nprivate const val SETTER = \"atomicfu\\$setter\"\nprivate const val GET = \"get\"\nprivate const val SET = \"set\"\n\nprivate fun String.prettyStr() = replace('/', '.')\n\nclass AtomicFUTransformer(override val context: IrPluginContext) : IrElementTransformerVoid(), TransformerHelpers {\n\n private val irBuiltIns = context.irBuiltIns\n\n private val AFU_CLASSES: Map = mapOf(\n \"AtomicInt\" to irBuiltIns.intType,\n \"AtomicLong\" to irBuiltIns.longType,\n \"AtomicRef\" to irBuiltIns.anyType,\n \"AtomicBoolean\" to irBuiltIns.booleanType\n )\n\n private val AFU_ARRAY_CLASSES: Map = mapOf(\n \"AtomicIntArray\" to context.builtIns.getPrimitiveArrayClassDescriptor(PrimitiveType.INT),\n \"AtomicLongArray\" to context.builtIns.getPrimitiveArrayClassDescriptor(PrimitiveType.LONG),\n \"AtomicBooleanArray\" to context.builtIns.getPrimitiveArrayClassDescriptor(PrimitiveType.BOOLEAN),\n \"AtomicArray\" to context.builtIns.array\n )\n\n override fun visitFile(irFile: IrFile): IrFile {\n irFile.declarations.map { declaration ->\n declaration.transformAtomicInlineDeclaration()\n }\n return super.visitFile(irFile)\n }\n\n override fun visitClass(irClass: IrClass): IrStatement {\n irClass.declarations.map { declaration ->\n declaration.transformAtomicInlineDeclaration()\n }\n return super.visitClass(irClass)\n }\n\n override fun visitProperty(property: IrProperty): IrStatement {\n if (property.backingField != null) {\n val backingField = property.backingField!!\n if (backingField.initializer != null) {\n val initializer = backingField.initializer!!.expression.transformAtomicValueInitializer()\n property.backingField!!.initializer = IrExpressionBodyImpl(initializer)\n }\n }\n return super.visitProperty(property)\n }\n\n override fun visitBlockBody(body: IrBlockBody): IrBody {\n body.statements.forEachIndexed { i, stmt ->\n if (stmt is IrCall) {\n body.statements[i] = stmt.transformAtomicFunctionCall()\n }\n }\n return super.visitBlockBody(body)\n }\n\n override fun visitBlock(block: IrBlock): IrExpression {\n block.statements.forEachIndexed { i, stmt ->\n if (stmt is IrCall) {\n block.statements[i] = stmt.transformAtomicFunctionCall()\n }\n }\n return super.visitBlock(block)\n }\n\n override fun visitReturn(returnExpr: IrReturn): IrExpression {\n returnExpr.value = returnExpr.value.transformAtomicFunctionCall()\n return super.visitReturn(returnExpr)\n }\n\n override fun visitTypeOperator(typeOp: IrTypeOperatorCall): IrExpression {\n typeOp.argument = typeOp.argument.transformAtomicFunctionCall()\n return super.visitTypeOperator(typeOp)\n }\n\n override fun visitSetVariable(setVar: IrSetVariable): IrExpression {\n setVar.value = setVar.value.transformAtomicFunctionCall()\n return super.visitSetVariable(setVar)\n }\n\n override fun visitSetField(setField: IrSetField): IrExpression {\n setField.value = setField.value.transformAtomicFunctionCall()\n return super.visitSetField(setField)\n }\n\n override fun visitVariable(declaration: IrVariable): IrStatement {\n declaration.initializer = declaration.initializer?.transformAtomicFunctionCall()\n return super.visitVariable(declaration)\n }\n\n override fun visitBranch(branch: IrBranch): IrBranch {\n branch.apply {\n condition = condition.transformAtomicFunctionCall()\n result = result.transformAtomicFunctionCall()\n }\n return super.visitBranch(branch)\n }\n\n override fun visitElseBranch(branch: IrElseBranch): IrElseBranch {\n branch.apply {\n condition = condition.transformAtomicFunctionCall()\n result = result.transformAtomicFunctionCall()\n }\n return super.visitElseBranch(branch)\n }\n\n private fun IrExpression.transformAtomicValueInitializer() =\n when {\n type.isAtomicValueType() -> getPureTypeValue()\n type.isAtomicArrayType() -> buildPureTypeArrayConstructor()\n type.isReentrantLockType() -> buildConstNull()\n else -> this\n }\n\n private fun IrDeclaration.transformAtomicInlineDeclaration() {\n if (this is IrFunction &&\n isInline &&\n extensionReceiverParameter != null &&\n extensionReceiverParameter!!.type.isAtomicValueType()\n ) {\n val type = extensionReceiverParameter!!.type\n val valueType = type.atomicToValueType()\n val getterType = buildFunctionSimpleType(listOf(irBuiltIns.unitType, valueType))\n val setterType = buildFunctionSimpleType(listOf(valueType, irBuiltIns.unitType))\n val valueParametersCount = valueParameters.size\n val extendedValueParameters = mutableListOf().apply {\n addAll(valueParameters)\n add(buildValueParameter(GETTER, valueParametersCount, getterType))\n add(buildValueParameter(SETTER, valueParametersCount + 1, setterType))\n }\n this as IrSimpleFunction\n (descriptor as FunctionDescriptorImpl).initialize(", + "repo_full_name": "JetBrains/kotlin", + "discussion_comments": [ + { + "comment_id": "429178285", + "repo_full_name": "JetBrains/kotlin", + "pr_number": 3415, + "pr_file": "plugins/atomicfu/atomicfu-compiler/src/org.jetbrains.kotlinx.atomicfu.compiler/extensions/AtomicFUTransformerJsIr.kt", + "discussion_id": "429178285", + "commented_code": "@@ -0,0 +1,473 @@\n+/*\n+ * Copyright 2010-2020 JetBrains s.r.o. and Kotlin Programming Language contributors.\n+ * Use of this source code is governed by the Apache 2.0 license that can be found in the license/LICENSE.txt file.\n+ */\n+\n+package org.jetbrains.kotlinx.atomicfu.compiler.extensions\n+\n+import org.jetbrains.kotlin.backend.common.deepCopyWithVariables\n+import org.jetbrains.kotlin.backend.common.extensions.IrPluginContext\n+import org.jetbrains.kotlin.builtins.PrimitiveType\n+import org.jetbrains.kotlin.descriptors.*\n+import org.jetbrains.kotlin.descriptors.impl.FunctionDescriptorImpl\n+import org.jetbrains.kotlin.ir.*\n+import org.jetbrains.kotlin.ir.backend.js.ir.JsIrBuilder.buildValueParameter\n+import org.jetbrains.kotlin.ir.backend.js.ir.JsIrBuilder.buildFunction\n+import org.jetbrains.kotlin.ir.backend.js.ir.JsIrBuilder.buildBlockBody\n+import org.jetbrains.kotlin.ir.declarations.*\n+import org.jetbrains.kotlin.ir.declarations.impl.*\n+import org.jetbrains.kotlin.ir.expressions.*\n+import org.jetbrains.kotlin.ir.expressions.impl.*\n+import org.jetbrains.kotlin.ir.symbols.IrSimpleFunctionSymbol\n+import org.jetbrains.kotlin.ir.symbols.IrSymbol\n+import org.jetbrains.kotlin.ir.symbols.impl.*\n+import org.jetbrains.kotlin.ir.types.*\n+import org.jetbrains.kotlin.ir.util.*\n+import org.jetbrains.kotlin.ir.visitors.IrElementTransformerVoid\n+import java.lang.IllegalStateException\n+\n+private const val AFU_PKG = \"kotlinx/atomicfu\"\n+private const val LOCKS = \"locks\"\n+private const val ATOMIC_CONSTRUCTOR = \"atomic\"\n+private const val ATOMICFU_VALUE_TYPE = \"\"\"Atomic(Int|Long|Boolean|Ref)\"\"\"\n+private const val ATOMIC_ARRAY_TYPE = \"\"\"Atomic(Int|Long|Boolean|)Array\"\"\"\n+private const val ATOMIC_ARRAY_FACTORY_FUNCTION = \"atomicArrayOfNulls\"\n+private const val ATOMICFU_RUNTIME_FUNCTION_PREDICATE = \"atomicfu_\"\n+private const val REENTRANT_LOCK_TYPE = \"ReentrantLock\"\n+private const val GETTER = \"atomicfu\\$getter\"\n+private const val SETTER = \"atomicfu\\$setter\"\n+private const val GET = \"get\"\n+private const val SET = \"set\"\n+\n+private fun String.prettyStr() = replace('/', '.')\n+\n+class AtomicFUTransformer(override val context: IrPluginContext) : IrElementTransformerVoid(), TransformerHelpers {\n+\n+ private val irBuiltIns = context.irBuiltIns\n+\n+ private val AFU_CLASSES: Map = mapOf(\n+ \"AtomicInt\" to irBuiltIns.intType,\n+ \"AtomicLong\" to irBuiltIns.longType,\n+ \"AtomicRef\" to irBuiltIns.anyType,\n+ \"AtomicBoolean\" to irBuiltIns.booleanType\n+ )\n+\n+ private val AFU_ARRAY_CLASSES: Map = mapOf(\n+ \"AtomicIntArray\" to context.builtIns.getPrimitiveArrayClassDescriptor(PrimitiveType.INT),\n+ \"AtomicLongArray\" to context.builtIns.getPrimitiveArrayClassDescriptor(PrimitiveType.LONG),\n+ \"AtomicBooleanArray\" to context.builtIns.getPrimitiveArrayClassDescriptor(PrimitiveType.BOOLEAN),\n+ \"AtomicArray\" to context.builtIns.array\n+ )\n+\n+ override fun visitFile(irFile: IrFile): IrFile {\n+ irFile.declarations.map { declaration ->\n+ declaration.transformAtomicInlineDeclaration()\n+ }\n+ return super.visitFile(irFile)\n+ }\n+\n+ override fun visitClass(irClass: IrClass): IrStatement {\n+ irClass.declarations.map { declaration ->\n+ declaration.transformAtomicInlineDeclaration()\n+ }\n+ return super.visitClass(irClass)\n+ }\n+\n+ override fun visitProperty(property: IrProperty): IrStatement {\n+ if (property.backingField != null) {\n+ val backingField = property.backingField!!\n+ if (backingField.initializer != null) {\n+ val initializer = backingField.initializer!!.expression.transformAtomicValueInitializer()\n+ property.backingField!!.initializer = IrExpressionBodyImpl(initializer)\n+ }\n+ }\n+ return super.visitProperty(property)\n+ }\n+\n+ override fun visitBlockBody(body: IrBlockBody): IrBody {\n+ body.statements.forEachIndexed { i, stmt ->\n+ if (stmt is IrCall) {\n+ body.statements[i] = stmt.transformAtomicFunctionCall()\n+ }\n+ }\n+ return super.visitBlockBody(body)\n+ }\n+\n+ override fun visitBlock(block: IrBlock): IrExpression {\n+ block.statements.forEachIndexed { i, stmt ->\n+ if (stmt is IrCall) {\n+ block.statements[i] = stmt.transformAtomicFunctionCall()\n+ }\n+ }\n+ return super.visitBlock(block)\n+ }\n+\n+ override fun visitReturn(returnExpr: IrReturn): IrExpression {\n+ returnExpr.value = returnExpr.value.transformAtomicFunctionCall()\n+ return super.visitReturn(returnExpr)\n+ }\n+\n+ override fun visitTypeOperator(typeOp: IrTypeOperatorCall): IrExpression {\n+ typeOp.argument = typeOp.argument.transformAtomicFunctionCall()\n+ return super.visitTypeOperator(typeOp)\n+ }\n+\n+ override fun visitSetVariable(setVar: IrSetVariable): IrExpression {\n+ setVar.value = setVar.value.transformAtomicFunctionCall()\n+ return super.visitSetVariable(setVar)\n+ }\n+\n+ override fun visitSetField(setField: IrSetField): IrExpression {\n+ setField.value = setField.value.transformAtomicFunctionCall()\n+ return super.visitSetField(setField)\n+ }\n+\n+ override fun visitVariable(declaration: IrVariable): IrStatement {\n+ declaration.initializer = declaration.initializer?.transformAtomicFunctionCall()\n+ return super.visitVariable(declaration)\n+ }\n+\n+ override fun visitBranch(branch: IrBranch): IrBranch {\n+ branch.apply {\n+ condition = condition.transformAtomicFunctionCall()\n+ result = result.transformAtomicFunctionCall()\n+ }\n+ return super.visitBranch(branch)\n+ }\n+\n+ override fun visitElseBranch(branch: IrElseBranch): IrElseBranch {\n+ branch.apply {\n+ condition = condition.transformAtomicFunctionCall()\n+ result = result.transformAtomicFunctionCall()\n+ }\n+ return super.visitElseBranch(branch)\n+ }\n+\n+ private fun IrExpression.transformAtomicValueInitializer() =\n+ when {\n+ type.isAtomicValueType() -> getPureTypeValue()\n+ type.isAtomicArrayType() -> buildPureTypeArrayConstructor()\n+ type.isReentrantLockType() -> buildConstNull()\n+ else -> this\n+ }\n+\n+ private fun IrDeclaration.transformAtomicInlineDeclaration() {\n+ if (this is IrFunction &&\n+ isInline &&\n+ extensionReceiverParameter != null &&\n+ extensionReceiverParameter!!.type.isAtomicValueType()\n+ ) {\n+ val type = extensionReceiverParameter!!.type\n+ val valueType = type.atomicToValueType()\n+ val getterType = buildFunctionSimpleType(listOf(irBuiltIns.unitType, valueType))\n+ val setterType = buildFunctionSimpleType(listOf(valueType, irBuiltIns.unitType))\n+ val valueParametersCount = valueParameters.size\n+ val extendedValueParameters = mutableListOf().apply {\n+ addAll(valueParameters)\n+ add(buildValueParameter(GETTER, valueParametersCount, getterType))\n+ add(buildValueParameter(SETTER, valueParametersCount + 1, setterType))\n+ }\n+ this as IrSimpleFunction\n+ (descriptor as FunctionDescriptorImpl).initialize(", + "comment_created_at": "2020-05-22T10:51:39+00:00", + "comment_author": "romanart", + "comment_body": "Please avoid using of `DeclarationDescriptor` API", + "pr_file_module": null + }, + { + "comment_id": "433370829", + "repo_full_name": "JetBrains/kotlin", + "pr_number": 3415, + "pr_file": "plugins/atomicfu/atomicfu-compiler/src/org.jetbrains.kotlinx.atomicfu.compiler/extensions/AtomicFUTransformerJsIr.kt", + "discussion_id": "429178285", + "commented_code": "@@ -0,0 +1,473 @@\n+/*\n+ * Copyright 2010-2020 JetBrains s.r.o. and Kotlin Programming Language contributors.\n+ * Use of this source code is governed by the Apache 2.0 license that can be found in the license/LICENSE.txt file.\n+ */\n+\n+package org.jetbrains.kotlinx.atomicfu.compiler.extensions\n+\n+import org.jetbrains.kotlin.backend.common.deepCopyWithVariables\n+import org.jetbrains.kotlin.backend.common.extensions.IrPluginContext\n+import org.jetbrains.kotlin.builtins.PrimitiveType\n+import org.jetbrains.kotlin.descriptors.*\n+import org.jetbrains.kotlin.descriptors.impl.FunctionDescriptorImpl\n+import org.jetbrains.kotlin.ir.*\n+import org.jetbrains.kotlin.ir.backend.js.ir.JsIrBuilder.buildValueParameter\n+import org.jetbrains.kotlin.ir.backend.js.ir.JsIrBuilder.buildFunction\n+import org.jetbrains.kotlin.ir.backend.js.ir.JsIrBuilder.buildBlockBody\n+import org.jetbrains.kotlin.ir.declarations.*\n+import org.jetbrains.kotlin.ir.declarations.impl.*\n+import org.jetbrains.kotlin.ir.expressions.*\n+import org.jetbrains.kotlin.ir.expressions.impl.*\n+import org.jetbrains.kotlin.ir.symbols.IrSimpleFunctionSymbol\n+import org.jetbrains.kotlin.ir.symbols.IrSymbol\n+import org.jetbrains.kotlin.ir.symbols.impl.*\n+import org.jetbrains.kotlin.ir.types.*\n+import org.jetbrains.kotlin.ir.util.*\n+import org.jetbrains.kotlin.ir.visitors.IrElementTransformerVoid\n+import java.lang.IllegalStateException\n+\n+private const val AFU_PKG = \"kotlinx/atomicfu\"\n+private const val LOCKS = \"locks\"\n+private const val ATOMIC_CONSTRUCTOR = \"atomic\"\n+private const val ATOMICFU_VALUE_TYPE = \"\"\"Atomic(Int|Long|Boolean|Ref)\"\"\"\n+private const val ATOMIC_ARRAY_TYPE = \"\"\"Atomic(Int|Long|Boolean|)Array\"\"\"\n+private const val ATOMIC_ARRAY_FACTORY_FUNCTION = \"atomicArrayOfNulls\"\n+private const val ATOMICFU_RUNTIME_FUNCTION_PREDICATE = \"atomicfu_\"\n+private const val REENTRANT_LOCK_TYPE = \"ReentrantLock\"\n+private const val GETTER = \"atomicfu\\$getter\"\n+private const val SETTER = \"atomicfu\\$setter\"\n+private const val GET = \"get\"\n+private const val SET = \"set\"\n+\n+private fun String.prettyStr() = replace('/', '.')\n+\n+class AtomicFUTransformer(override val context: IrPluginContext) : IrElementTransformerVoid(), TransformerHelpers {\n+\n+ private val irBuiltIns = context.irBuiltIns\n+\n+ private val AFU_CLASSES: Map = mapOf(\n+ \"AtomicInt\" to irBuiltIns.intType,\n+ \"AtomicLong\" to irBuiltIns.longType,\n+ \"AtomicRef\" to irBuiltIns.anyType,\n+ \"AtomicBoolean\" to irBuiltIns.booleanType\n+ )\n+\n+ private val AFU_ARRAY_CLASSES: Map = mapOf(\n+ \"AtomicIntArray\" to context.builtIns.getPrimitiveArrayClassDescriptor(PrimitiveType.INT),\n+ \"AtomicLongArray\" to context.builtIns.getPrimitiveArrayClassDescriptor(PrimitiveType.LONG),\n+ \"AtomicBooleanArray\" to context.builtIns.getPrimitiveArrayClassDescriptor(PrimitiveType.BOOLEAN),\n+ \"AtomicArray\" to context.builtIns.array\n+ )\n+\n+ override fun visitFile(irFile: IrFile): IrFile {\n+ irFile.declarations.map { declaration ->\n+ declaration.transformAtomicInlineDeclaration()\n+ }\n+ return super.visitFile(irFile)\n+ }\n+\n+ override fun visitClass(irClass: IrClass): IrStatement {\n+ irClass.declarations.map { declaration ->\n+ declaration.transformAtomicInlineDeclaration()\n+ }\n+ return super.visitClass(irClass)\n+ }\n+\n+ override fun visitProperty(property: IrProperty): IrStatement {\n+ if (property.backingField != null) {\n+ val backingField = property.backingField!!\n+ if (backingField.initializer != null) {\n+ val initializer = backingField.initializer!!.expression.transformAtomicValueInitializer()\n+ property.backingField!!.initializer = IrExpressionBodyImpl(initializer)\n+ }\n+ }\n+ return super.visitProperty(property)\n+ }\n+\n+ override fun visitBlockBody(body: IrBlockBody): IrBody {\n+ body.statements.forEachIndexed { i, stmt ->\n+ if (stmt is IrCall) {\n+ body.statements[i] = stmt.transformAtomicFunctionCall()\n+ }\n+ }\n+ return super.visitBlockBody(body)\n+ }\n+\n+ override fun visitBlock(block: IrBlock): IrExpression {\n+ block.statements.forEachIndexed { i, stmt ->\n+ if (stmt is IrCall) {\n+ block.statements[i] = stmt.transformAtomicFunctionCall()\n+ }\n+ }\n+ return super.visitBlock(block)\n+ }\n+\n+ override fun visitReturn(returnExpr: IrReturn): IrExpression {\n+ returnExpr.value = returnExpr.value.transformAtomicFunctionCall()\n+ return super.visitReturn(returnExpr)\n+ }\n+\n+ override fun visitTypeOperator(typeOp: IrTypeOperatorCall): IrExpression {\n+ typeOp.argument = typeOp.argument.transformAtomicFunctionCall()\n+ return super.visitTypeOperator(typeOp)\n+ }\n+\n+ override fun visitSetVariable(setVar: IrSetVariable): IrExpression {\n+ setVar.value = setVar.value.transformAtomicFunctionCall()\n+ return super.visitSetVariable(setVar)\n+ }\n+\n+ override fun visitSetField(setField: IrSetField): IrExpression {\n+ setField.value = setField.value.transformAtomicFunctionCall()\n+ return super.visitSetField(setField)\n+ }\n+\n+ override fun visitVariable(declaration: IrVariable): IrStatement {\n+ declaration.initializer = declaration.initializer?.transformAtomicFunctionCall()\n+ return super.visitVariable(declaration)\n+ }\n+\n+ override fun visitBranch(branch: IrBranch): IrBranch {\n+ branch.apply {\n+ condition = condition.transformAtomicFunctionCall()\n+ result = result.transformAtomicFunctionCall()\n+ }\n+ return super.visitBranch(branch)\n+ }\n+\n+ override fun visitElseBranch(branch: IrElseBranch): IrElseBranch {\n+ branch.apply {\n+ condition = condition.transformAtomicFunctionCall()\n+ result = result.transformAtomicFunctionCall()\n+ }\n+ return super.visitElseBranch(branch)\n+ }\n+\n+ private fun IrExpression.transformAtomicValueInitializer() =\n+ when {\n+ type.isAtomicValueType() -> getPureTypeValue()\n+ type.isAtomicArrayType() -> buildPureTypeArrayConstructor()\n+ type.isReentrantLockType() -> buildConstNull()\n+ else -> this\n+ }\n+\n+ private fun IrDeclaration.transformAtomicInlineDeclaration() {\n+ if (this is IrFunction &&\n+ isInline &&\n+ extensionReceiverParameter != null &&\n+ extensionReceiverParameter!!.type.isAtomicValueType()\n+ ) {\n+ val type = extensionReceiverParameter!!.type\n+ val valueType = type.atomicToValueType()\n+ val getterType = buildFunctionSimpleType(listOf(irBuiltIns.unitType, valueType))\n+ val setterType = buildFunctionSimpleType(listOf(valueType, irBuiltIns.unitType))\n+ val valueParametersCount = valueParameters.size\n+ val extendedValueParameters = mutableListOf().apply {\n+ addAll(valueParameters)\n+ add(buildValueParameter(GETTER, valueParametersCount, getterType))\n+ add(buildValueParameter(SETTER, valueParametersCount + 1, setterType))\n+ }\n+ this as IrSimpleFunction\n+ (descriptor as FunctionDescriptorImpl).initialize(", + "comment_created_at": "2020-06-01T17:11:41+00:00", + "comment_author": "SokolovaMaria", + "comment_body": "Could you please tell me, how can I avoid using DeclarationDescriptor API here? \r\n\r\nThe problem is that if I just change the parameters of the owner `IrFunction` (`extensionReceiver` and `valueParameters`) then `ManglerChecker` fails at the comparison of the actual `Ir` signature and the one declared in the `Descriptor`.", + "pr_file_module": null + }, + { + "comment_id": "433414477", + "repo_full_name": "JetBrains/kotlin", + "pr_number": 3415, + "pr_file": "plugins/atomicfu/atomicfu-compiler/src/org.jetbrains.kotlinx.atomicfu.compiler/extensions/AtomicFUTransformerJsIr.kt", + "discussion_id": "429178285", + "commented_code": "@@ -0,0 +1,473 @@\n+/*\n+ * Copyright 2010-2020 JetBrains s.r.o. and Kotlin Programming Language contributors.\n+ * Use of this source code is governed by the Apache 2.0 license that can be found in the license/LICENSE.txt file.\n+ */\n+\n+package org.jetbrains.kotlinx.atomicfu.compiler.extensions\n+\n+import org.jetbrains.kotlin.backend.common.deepCopyWithVariables\n+import org.jetbrains.kotlin.backend.common.extensions.IrPluginContext\n+import org.jetbrains.kotlin.builtins.PrimitiveType\n+import org.jetbrains.kotlin.descriptors.*\n+import org.jetbrains.kotlin.descriptors.impl.FunctionDescriptorImpl\n+import org.jetbrains.kotlin.ir.*\n+import org.jetbrains.kotlin.ir.backend.js.ir.JsIrBuilder.buildValueParameter\n+import org.jetbrains.kotlin.ir.backend.js.ir.JsIrBuilder.buildFunction\n+import org.jetbrains.kotlin.ir.backend.js.ir.JsIrBuilder.buildBlockBody\n+import org.jetbrains.kotlin.ir.declarations.*\n+import org.jetbrains.kotlin.ir.declarations.impl.*\n+import org.jetbrains.kotlin.ir.expressions.*\n+import org.jetbrains.kotlin.ir.expressions.impl.*\n+import org.jetbrains.kotlin.ir.symbols.IrSimpleFunctionSymbol\n+import org.jetbrains.kotlin.ir.symbols.IrSymbol\n+import org.jetbrains.kotlin.ir.symbols.impl.*\n+import org.jetbrains.kotlin.ir.types.*\n+import org.jetbrains.kotlin.ir.util.*\n+import org.jetbrains.kotlin.ir.visitors.IrElementTransformerVoid\n+import java.lang.IllegalStateException\n+\n+private const val AFU_PKG = \"kotlinx/atomicfu\"\n+private const val LOCKS = \"locks\"\n+private const val ATOMIC_CONSTRUCTOR = \"atomic\"\n+private const val ATOMICFU_VALUE_TYPE = \"\"\"Atomic(Int|Long|Boolean|Ref)\"\"\"\n+private const val ATOMIC_ARRAY_TYPE = \"\"\"Atomic(Int|Long|Boolean|)Array\"\"\"\n+private const val ATOMIC_ARRAY_FACTORY_FUNCTION = \"atomicArrayOfNulls\"\n+private const val ATOMICFU_RUNTIME_FUNCTION_PREDICATE = \"atomicfu_\"\n+private const val REENTRANT_LOCK_TYPE = \"ReentrantLock\"\n+private const val GETTER = \"atomicfu\\$getter\"\n+private const val SETTER = \"atomicfu\\$setter\"\n+private const val GET = \"get\"\n+private const val SET = \"set\"\n+\n+private fun String.prettyStr() = replace('/', '.')\n+\n+class AtomicFUTransformer(override val context: IrPluginContext) : IrElementTransformerVoid(), TransformerHelpers {\n+\n+ private val irBuiltIns = context.irBuiltIns\n+\n+ private val AFU_CLASSES: Map = mapOf(\n+ \"AtomicInt\" to irBuiltIns.intType,\n+ \"AtomicLong\" to irBuiltIns.longType,\n+ \"AtomicRef\" to irBuiltIns.anyType,\n+ \"AtomicBoolean\" to irBuiltIns.booleanType\n+ )\n+\n+ private val AFU_ARRAY_CLASSES: Map = mapOf(\n+ \"AtomicIntArray\" to context.builtIns.getPrimitiveArrayClassDescriptor(PrimitiveType.INT),\n+ \"AtomicLongArray\" to context.builtIns.getPrimitiveArrayClassDescriptor(PrimitiveType.LONG),\n+ \"AtomicBooleanArray\" to context.builtIns.getPrimitiveArrayClassDescriptor(PrimitiveType.BOOLEAN),\n+ \"AtomicArray\" to context.builtIns.array\n+ )\n+\n+ override fun visitFile(irFile: IrFile): IrFile {\n+ irFile.declarations.map { declaration ->\n+ declaration.transformAtomicInlineDeclaration()\n+ }\n+ return super.visitFile(irFile)\n+ }\n+\n+ override fun visitClass(irClass: IrClass): IrStatement {\n+ irClass.declarations.map { declaration ->\n+ declaration.transformAtomicInlineDeclaration()\n+ }\n+ return super.visitClass(irClass)\n+ }\n+\n+ override fun visitProperty(property: IrProperty): IrStatement {\n+ if (property.backingField != null) {\n+ val backingField = property.backingField!!\n+ if (backingField.initializer != null) {\n+ val initializer = backingField.initializer!!.expression.transformAtomicValueInitializer()\n+ property.backingField!!.initializer = IrExpressionBodyImpl(initializer)\n+ }\n+ }\n+ return super.visitProperty(property)\n+ }\n+\n+ override fun visitBlockBody(body: IrBlockBody): IrBody {\n+ body.statements.forEachIndexed { i, stmt ->\n+ if (stmt is IrCall) {\n+ body.statements[i] = stmt.transformAtomicFunctionCall()\n+ }\n+ }\n+ return super.visitBlockBody(body)\n+ }\n+\n+ override fun visitBlock(block: IrBlock): IrExpression {\n+ block.statements.forEachIndexed { i, stmt ->\n+ if (stmt is IrCall) {\n+ block.statements[i] = stmt.transformAtomicFunctionCall()\n+ }\n+ }\n+ return super.visitBlock(block)\n+ }\n+\n+ override fun visitReturn(returnExpr: IrReturn): IrExpression {\n+ returnExpr.value = returnExpr.value.transformAtomicFunctionCall()\n+ return super.visitReturn(returnExpr)\n+ }\n+\n+ override fun visitTypeOperator(typeOp: IrTypeOperatorCall): IrExpression {\n+ typeOp.argument = typeOp.argument.transformAtomicFunctionCall()\n+ return super.visitTypeOperator(typeOp)\n+ }\n+\n+ override fun visitSetVariable(setVar: IrSetVariable): IrExpression {\n+ setVar.value = setVar.value.transformAtomicFunctionCall()\n+ return super.visitSetVariable(setVar)\n+ }\n+\n+ override fun visitSetField(setField: IrSetField): IrExpression {\n+ setField.value = setField.value.transformAtomicFunctionCall()\n+ return super.visitSetField(setField)\n+ }\n+\n+ override fun visitVariable(declaration: IrVariable): IrStatement {\n+ declaration.initializer = declaration.initializer?.transformAtomicFunctionCall()\n+ return super.visitVariable(declaration)\n+ }\n+\n+ override fun visitBranch(branch: IrBranch): IrBranch {\n+ branch.apply {\n+ condition = condition.transformAtomicFunctionCall()\n+ result = result.transformAtomicFunctionCall()\n+ }\n+ return super.visitBranch(branch)\n+ }\n+\n+ override fun visitElseBranch(branch: IrElseBranch): IrElseBranch {\n+ branch.apply {\n+ condition = condition.transformAtomicFunctionCall()\n+ result = result.transformAtomicFunctionCall()\n+ }\n+ return super.visitElseBranch(branch)\n+ }\n+\n+ private fun IrExpression.transformAtomicValueInitializer() =\n+ when {\n+ type.isAtomicValueType() -> getPureTypeValue()\n+ type.isAtomicArrayType() -> buildPureTypeArrayConstructor()\n+ type.isReentrantLockType() -> buildConstNull()\n+ else -> this\n+ }\n+\n+ private fun IrDeclaration.transformAtomicInlineDeclaration() {\n+ if (this is IrFunction &&\n+ isInline &&\n+ extensionReceiverParameter != null &&\n+ extensionReceiverParameter!!.type.isAtomicValueType()\n+ ) {\n+ val type = extensionReceiverParameter!!.type\n+ val valueType = type.atomicToValueType()\n+ val getterType = buildFunctionSimpleType(listOf(irBuiltIns.unitType, valueType))\n+ val setterType = buildFunctionSimpleType(listOf(valueType, irBuiltIns.unitType))\n+ val valueParametersCount = valueParameters.size\n+ val extendedValueParameters = mutableListOf().apply {\n+ addAll(valueParameters)\n+ add(buildValueParameter(GETTER, valueParametersCount, getterType))\n+ add(buildValueParameter(SETTER, valueParametersCount + 1, setterType))\n+ }\n+ this as IrSimpleFunction\n+ (descriptor as FunctionDescriptorImpl).initialize(", + "comment_created_at": "2020-06-01T18:33:53+00:00", + "comment_author": "romanart", + "comment_body": "What kind of failure do you see? I bet it could be easily fixed if you use wrapped descriptor", + "pr_file_module": null + }, + { + "comment_id": "435628343", + "repo_full_name": "JetBrains/kotlin", + "pr_number": 3415, + "pr_file": "plugins/atomicfu/atomicfu-compiler/src/org.jetbrains.kotlinx.atomicfu.compiler/extensions/AtomicFUTransformerJsIr.kt", + "discussion_id": "429178285", + "commented_code": "@@ -0,0 +1,473 @@\n+/*\n+ * Copyright 2010-2020 JetBrains s.r.o. and Kotlin Programming Language contributors.\n+ * Use of this source code is governed by the Apache 2.0 license that can be found in the license/LICENSE.txt file.\n+ */\n+\n+package org.jetbrains.kotlinx.atomicfu.compiler.extensions\n+\n+import org.jetbrains.kotlin.backend.common.deepCopyWithVariables\n+import org.jetbrains.kotlin.backend.common.extensions.IrPluginContext\n+import org.jetbrains.kotlin.builtins.PrimitiveType\n+import org.jetbrains.kotlin.descriptors.*\n+import org.jetbrains.kotlin.descriptors.impl.FunctionDescriptorImpl\n+import org.jetbrains.kotlin.ir.*\n+import org.jetbrains.kotlin.ir.backend.js.ir.JsIrBuilder.buildValueParameter\n+import org.jetbrains.kotlin.ir.backend.js.ir.JsIrBuilder.buildFunction\n+import org.jetbrains.kotlin.ir.backend.js.ir.JsIrBuilder.buildBlockBody\n+import org.jetbrains.kotlin.ir.declarations.*\n+import org.jetbrains.kotlin.ir.declarations.impl.*\n+import org.jetbrains.kotlin.ir.expressions.*\n+import org.jetbrains.kotlin.ir.expressions.impl.*\n+import org.jetbrains.kotlin.ir.symbols.IrSimpleFunctionSymbol\n+import org.jetbrains.kotlin.ir.symbols.IrSymbol\n+import org.jetbrains.kotlin.ir.symbols.impl.*\n+import org.jetbrains.kotlin.ir.types.*\n+import org.jetbrains.kotlin.ir.util.*\n+import org.jetbrains.kotlin.ir.visitors.IrElementTransformerVoid\n+import java.lang.IllegalStateException\n+\n+private const val AFU_PKG = \"kotlinx/atomicfu\"\n+private const val LOCKS = \"locks\"\n+private const val ATOMIC_CONSTRUCTOR = \"atomic\"\n+private const val ATOMICFU_VALUE_TYPE = \"\"\"Atomic(Int|Long|Boolean|Ref)\"\"\"\n+private const val ATOMIC_ARRAY_TYPE = \"\"\"Atomic(Int|Long|Boolean|)Array\"\"\"\n+private const val ATOMIC_ARRAY_FACTORY_FUNCTION = \"atomicArrayOfNulls\"\n+private const val ATOMICFU_RUNTIME_FUNCTION_PREDICATE = \"atomicfu_\"\n+private const val REENTRANT_LOCK_TYPE = \"ReentrantLock\"\n+private const val GETTER = \"atomicfu\\$getter\"\n+private const val SETTER = \"atomicfu\\$setter\"\n+private const val GET = \"get\"\n+private const val SET = \"set\"\n+\n+private fun String.prettyStr() = replace('/', '.')\n+\n+class AtomicFUTransformer(override val context: IrPluginContext) : IrElementTransformerVoid(), TransformerHelpers {\n+\n+ private val irBuiltIns = context.irBuiltIns\n+\n+ private val AFU_CLASSES: Map = mapOf(\n+ \"AtomicInt\" to irBuiltIns.intType,\n+ \"AtomicLong\" to irBuiltIns.longType,\n+ \"AtomicRef\" to irBuiltIns.anyType,\n+ \"AtomicBoolean\" to irBuiltIns.booleanType\n+ )\n+\n+ private val AFU_ARRAY_CLASSES: Map = mapOf(\n+ \"AtomicIntArray\" to context.builtIns.getPrimitiveArrayClassDescriptor(PrimitiveType.INT),\n+ \"AtomicLongArray\" to context.builtIns.getPrimitiveArrayClassDescriptor(PrimitiveType.LONG),\n+ \"AtomicBooleanArray\" to context.builtIns.getPrimitiveArrayClassDescriptor(PrimitiveType.BOOLEAN),\n+ \"AtomicArray\" to context.builtIns.array\n+ )\n+\n+ override fun visitFile(irFile: IrFile): IrFile {\n+ irFile.declarations.map { declaration ->\n+ declaration.transformAtomicInlineDeclaration()\n+ }\n+ return super.visitFile(irFile)\n+ }\n+\n+ override fun visitClass(irClass: IrClass): IrStatement {\n+ irClass.declarations.map { declaration ->\n+ declaration.transformAtomicInlineDeclaration()\n+ }\n+ return super.visitClass(irClass)\n+ }\n+\n+ override fun visitProperty(property: IrProperty): IrStatement {\n+ if (property.backingField != null) {\n+ val backingField = property.backingField!!\n+ if (backingField.initializer != null) {\n+ val initializer = backingField.initializer!!.expression.transformAtomicValueInitializer()\n+ property.backingField!!.initializer = IrExpressionBodyImpl(initializer)\n+ }\n+ }\n+ return super.visitProperty(property)\n+ }\n+\n+ override fun visitBlockBody(body: IrBlockBody): IrBody {\n+ body.statements.forEachIndexed { i, stmt ->\n+ if (stmt is IrCall) {\n+ body.statements[i] = stmt.transformAtomicFunctionCall()\n+ }\n+ }\n+ return super.visitBlockBody(body)\n+ }\n+\n+ override fun visitBlock(block: IrBlock): IrExpression {\n+ block.statements.forEachIndexed { i, stmt ->\n+ if (stmt is IrCall) {\n+ block.statements[i] = stmt.transformAtomicFunctionCall()\n+ }\n+ }\n+ return super.visitBlock(block)\n+ }\n+\n+ override fun visitReturn(returnExpr: IrReturn): IrExpression {\n+ returnExpr.value = returnExpr.value.transformAtomicFunctionCall()\n+ return super.visitReturn(returnExpr)\n+ }\n+\n+ override fun visitTypeOperator(typeOp: IrTypeOperatorCall): IrExpression {\n+ typeOp.argument = typeOp.argument.transformAtomicFunctionCall()\n+ return super.visitTypeOperator(typeOp)\n+ }\n+\n+ override fun visitSetVariable(setVar: IrSetVariable): IrExpression {\n+ setVar.value = setVar.value.transformAtomicFunctionCall()\n+ return super.visitSetVariable(setVar)\n+ }\n+\n+ override fun visitSetField(setField: IrSetField): IrExpression {\n+ setField.value = setField.value.transformAtomicFunctionCall()\n+ return super.visitSetField(setField)\n+ }\n+\n+ override fun visitVariable(declaration: IrVariable): IrStatement {\n+ declaration.initializer = declaration.initializer?.transformAtomicFunctionCall()\n+ return super.visitVariable(declaration)\n+ }\n+\n+ override fun visitBranch(branch: IrBranch): IrBranch {\n+ branch.apply {\n+ condition = condition.transformAtomicFunctionCall()\n+ result = result.transformAtomicFunctionCall()\n+ }\n+ return super.visitBranch(branch)\n+ }\n+\n+ override fun visitElseBranch(branch: IrElseBranch): IrElseBranch {\n+ branch.apply {\n+ condition = condition.transformAtomicFunctionCall()\n+ result = result.transformAtomicFunctionCall()\n+ }\n+ return super.visitElseBranch(branch)\n+ }\n+\n+ private fun IrExpression.transformAtomicValueInitializer() =\n+ when {\n+ type.isAtomicValueType() -> getPureTypeValue()\n+ type.isAtomicArrayType() -> buildPureTypeArrayConstructor()\n+ type.isReentrantLockType() -> buildConstNull()\n+ else -> this\n+ }\n+\n+ private fun IrDeclaration.transformAtomicInlineDeclaration() {\n+ if (this is IrFunction &&\n+ isInline &&\n+ extensionReceiverParameter != null &&\n+ extensionReceiverParameter!!.type.isAtomicValueType()\n+ ) {\n+ val type = extensionReceiverParameter!!.type\n+ val valueType = type.atomicToValueType()\n+ val getterType = buildFunctionSimpleType(listOf(irBuiltIns.unitType, valueType))\n+ val setterType = buildFunctionSimpleType(listOf(valueType, irBuiltIns.unitType))\n+ val valueParametersCount = valueParameters.size\n+ val extendedValueParameters = mutableListOf().apply {\n+ addAll(valueParameters)\n+ add(buildValueParameter(GETTER, valueParametersCount, getterType))\n+ add(buildValueParameter(SETTER, valueParametersCount + 1, setterType))\n+ }\n+ this as IrSimpleFunction\n+ (descriptor as FunctionDescriptorImpl).initialize(", + "comment_created_at": "2020-06-05T00:30:16+00:00", + "comment_author": "SokolovaMaria", + "comment_body": "I fixed this by creating new transformed declarations with `WrappedSimpleFunctionDescriptor` (as I can not just set a new descriptor).", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/kotlin-design-extensible-stable-apis.md b/_reviewers/kotlin-design-extensible-stable-apis.md index f2ce1de..b485ab7 100644 --- a/_reviewers/kotlin-design-extensible-stable-apis.md +++ b/_reviewers/kotlin-design-extensible-stable-apis.md @@ -44,317 +44,3 @@ This approach: - Allows for future extensibility without breaking changes - Clearly separates public API from implementation details - Enables third-party extensions through well-defined mechanisms - - -[ - { - "discussion_id": "1438374793", - "pr_number": 5234, - "pr_file": "plugins/jvm-abi-gen/src/org/jetbrains/kotlin/jvm/abi/JvmAbiCommandLineProcessor.kt", - "created_at": "2023-12-29T18:59:12+00:00", - "commented_code": "\"sites of inline functions.\",\n false,\n )\n\n val REMOVE_PRIVATE_CLASSES_OPTION: CliOption =\n CliOption(\n \"removePrivateClasses\",\n \"true/false\",\n \"Remove private or effectively private classes from ABI. False by default. Note that if private class is being used \" +", - "repo_full_name": "JetBrains/kotlin", - "discussion_comments": [ - { - "comment_id": "1438374793", - "repo_full_name": "JetBrains/kotlin", - "pr_number": 5234, - "pr_file": "plugins/jvm-abi-gen/src/org/jetbrains/kotlin/jvm/abi/JvmAbiCommandLineProcessor.kt", - "discussion_id": "1438374793", - "commented_code": "@@ -32,18 +32,28 @@ class JvmAbiCommandLineProcessor : CommandLineProcessor {\n \"sites of inline functions.\",\n false,\n )\n+\n+ val REMOVE_PRIVATE_CLASSES_OPTION: CliOption =\n+ CliOption(\n+ \"removePrivateClasses\",\n+ \"true/false\",\n+ \"Remove private or effectively private classes from ABI. False by default. Note that if private class is being used \" +", - "comment_created_at": "2023-12-29T18:59:12+00:00", - "comment_author": "artem-zinnatullin", - "comment_body": "```suggestion\r\n \"Remove private or effectively private classes from ABI. False by default due to backwards compatibility. If enabled \u2014 private classes will no longer be available from Java.\"\r\n```\r\n\r\nSomething like this maybe? Ideally it should be clear that setting the option to `true` is highly desirable for stable ABI and it is false by default only due to being potentially breaking for rare Java use cases.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1470548786", - "pr_number": 5234, - "pr_file": "plugins/jvm-abi-gen/src/org/jetbrains/kotlin/jvm/abi/JvmAbiOutputExtension.kt", - "created_at": "2024-01-30T03:31:48+00:00", - "commented_code": "private val keptMethods = mutableListOf()\n private val innerClassInfos = mutableMapOf()\n\n override fun visit(\n version: Int,\n access: Int,\n name: String?,\n signature: String?,\n superName: String?,\n interfaces: Array?,\n ) {\n val prunedAccess = if (prune) access or Opcodes.ACC_PUBLIC else access", - "repo_full_name": "JetBrains/kotlin", - "discussion_comments": [ - { - "comment_id": "1470548786", - "repo_full_name": "JetBrains/kotlin", - "pr_number": 5234, - "pr_file": "plugins/jvm-abi-gen/src/org/jetbrains/kotlin/jvm/abi/JvmAbiOutputExtension.kt", - "discussion_id": "1470548786", - "commented_code": "@@ -92,15 +92,27 @@ class JvmAbiOutputExtension(\n private val keptMethods = mutableListOf()\n private val innerClassInfos = mutableMapOf()\n \n+ override fun visit(\n+ version: Int,\n+ access: Int,\n+ name: String?,\n+ signature: String?,\n+ superName: String?,\n+ interfaces: Array?,\n+ ) {\n+ val prunedAccess = if (prune) access or Opcodes.ACC_PUBLIC else access", - "comment_created_at": "2024-01-30T03:31:48+00:00", - "comment_author": "Tagakov", - "comment_body": "Same as in `@Metadata`. Not sure if I should fallback to package-private here instead.", - "pr_file_module": null - }, - { - "comment_id": "1471244812", - "repo_full_name": "JetBrains/kotlin", - "pr_number": 5234, - "pr_file": "plugins/jvm-abi-gen/src/org/jetbrains/kotlin/jvm/abi/JvmAbiOutputExtension.kt", - "discussion_id": "1470548786", - "commented_code": "@@ -92,15 +92,27 @@ class JvmAbiOutputExtension(\n private val keptMethods = mutableListOf()\n private val innerClassInfos = mutableMapOf()\n \n+ override fun visit(\n+ version: Int,\n+ access: Int,\n+ name: String?,\n+ signature: String?,\n+ superName: String?,\n+ interfaces: Array?,\n+ ) {\n+ val prunedAccess = if (prune) access or Opcodes.ACC_PUBLIC else access", - "comment_created_at": "2024-01-30T13:46:30+00:00", - "comment_author": "madsager", - "comment_body": "My 2 cents: I don't think we should ever change the visibility of any element that ends up in the ABI jar. That makes compilation against the ABI jar and the full jar inconsistent.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1190998608", - "pr_number": 5131, - "pr_file": "libraries/tools/kotlin-gradle-plugin/src/common/kotlin/org/jetbrains/kotlin/gradle/plugin/ide/IdeMultiplatformImport.kt", - "created_at": "2023-05-11T11:00:21+00:00", - "commented_code": "}\n\n /**\n * Any [IdeDependencyResolver] has to be registered specifying a certain resolution level.\n * Generally, all resolvers registered in a given resolution level will work collaboratively, meaning the dependency resolution\n * result is the aggregation of all resolvers running.\n * Indicating the 'priority' of a given [IdeDependencyResolver] or [IdeAdditionalArtifactResolver]:\n * When resolving dependencies, only the highest priority resolver will be selected and executed.\n * If there are multiple resolvers with the same [Priority] registered, then all of them will be executed\n * and the result will be merged.\n *\n * However, only the resolvers in the highest resolution result will run e.g.\n * If resolvers with level [Overwrite] are found, then only those will contribute to the dependency resolution.\n * Otherwise, all [Default] resolvers will run.\n * By 'default' resolvers are registered using [Priority.normal].\n * The Kotlin Gradle Plugin is not expected to register resolvers outside the [normal] or [high] priorities\n * External Kotlin Target maintainers _(such as Google)_ can therefore use [veryHigh] to overwrite all resolvers\n * from the Kotlin Gradle plugin or [low] to only register a resolver if there is nothing available by default\n * from the Kotlin Gradle plugin.\n */\n @ExternalKotlinTargetApi\n enum class DependencyResolutionLevel {\n Default, Overwrite\n class Priority(val value: Int) : Comparable {", - "repo_full_name": "JetBrains/kotlin", - "discussion_comments": [ - { - "comment_id": "1190998608", - "repo_full_name": "JetBrains/kotlin", - "pr_number": 5131, - "pr_file": "libraries/tools/kotlin-gradle-plugin/src/common/kotlin/org/jetbrains/kotlin/gradle/plugin/ide/IdeMultiplatformImport.kt", - "discussion_id": "1190998608", - "commented_code": "@@ -149,31 +152,67 @@ interface IdeMultiplatformImport {\n }\n \n /**\n- * Any [IdeDependencyResolver] has to be registered specifying a certain resolution level.\n- * Generally, all resolvers registered in a given resolution level will work collaboratively, meaning the dependency resolution\n- * result is the aggregation of all resolvers running.\n+ * Indicating the 'priority' of a given [IdeDependencyResolver] or [IdeAdditionalArtifactResolver]:\n+ * When resolving dependencies, only the highest priority resolver will be selected and executed.\n+ * If there are multiple resolvers with the same [Priority] registered, then all of them will be executed\n+ * and the result will be merged.\n *\n- * However, only the resolvers in the highest resolution result will run e.g.\n- * If resolvers with level [Overwrite] are found, then only those will contribute to the dependency resolution.\n- * Otherwise, all [Default] resolvers will run.\n+ * By 'default' resolvers are registered using [Priority.normal].\n+ * The Kotlin Gradle Plugin is not expected to register resolvers outside the [normal] or [high] priorities\n+ * External Kotlin Target maintainers _(such as Google)_ can therefore use [veryHigh] to overwrite all resolvers\n+ * from the Kotlin Gradle plugin or [low] to only register a resolver if there is nothing available by default\n+ * from the Kotlin Gradle plugin.\n */\n @ExternalKotlinTargetApi\n- enum class DependencyResolutionLevel {\n- Default, Overwrite\n+ class Priority(val value: Int) : Comparable {", - "comment_created_at": "2023-05-11T11:00:21+00:00", - "comment_author": "AmrAfifiy", - "comment_body": "Is there a reason why this is very fine grained rather than just an enum? I understand the need for having the four phases mentioned below, but why do you want to give such flexibility?", - "pr_file_module": null - }, - { - "comment_id": "1191213754", - "repo_full_name": "JetBrains/kotlin", - "pr_number": 5131, - "pr_file": "libraries/tools/kotlin-gradle-plugin/src/common/kotlin/org/jetbrains/kotlin/gradle/plugin/ide/IdeMultiplatformImport.kt", - "discussion_id": "1190998608", - "commented_code": "@@ -149,31 +152,67 @@ interface IdeMultiplatformImport {\n }\n \n /**\n- * Any [IdeDependencyResolver] has to be registered specifying a certain resolution level.\n- * Generally, all resolvers registered in a given resolution level will work collaboratively, meaning the dependency resolution\n- * result is the aggregation of all resolvers running.\n+ * Indicating the 'priority' of a given [IdeDependencyResolver] or [IdeAdditionalArtifactResolver]:\n+ * When resolving dependencies, only the highest priority resolver will be selected and executed.\n+ * If there are multiple resolvers with the same [Priority] registered, then all of them will be executed\n+ * and the result will be merged.\n *\n- * However, only the resolvers in the highest resolution result will run e.g.\n- * If resolvers with level [Overwrite] are found, then only those will contribute to the dependency resolution.\n- * Otherwise, all [Default] resolvers will run.\n+ * By 'default' resolvers are registered using [Priority.normal].\n+ * The Kotlin Gradle Plugin is not expected to register resolvers outside the [normal] or [high] priorities\n+ * External Kotlin Target maintainers _(such as Google)_ can therefore use [veryHigh] to overwrite all resolvers\n+ * from the Kotlin Gradle plugin or [low] to only register a resolver if there is nothing available by default\n+ * from the Kotlin Gradle plugin.\n */\n @ExternalKotlinTargetApi\n- enum class DependencyResolutionLevel {\n- Default, Overwrite\n+ class Priority(val value: Int) : Comparable {", - "comment_created_at": "2023-05-11T13:59:29+00:00", - "comment_author": "sellmair", - "comment_body": "Yes! There are two reasons for this change (maybe three)\r\n\r\n1): You see that KGP already also overwrites some resolvers with a constraint and the Level.Overwrite. Such overwrites therefore cannot again be overwritten by you folks\r\n\r\n2): The API was not intuitive enough. Everybody who stumbled over it had to ask for how it works. I really hope that Priority is just simpler to understand \r\n\r\n3): Enums in public API are much more complicated. How could we have added a value safely? This could have been a rather complicated process! \r\n", - "pr_file_module": null - }, - { - "comment_id": "1191432325", - "repo_full_name": "JetBrains/kotlin", - "pr_number": 5131, - "pr_file": "libraries/tools/kotlin-gradle-plugin/src/common/kotlin/org/jetbrains/kotlin/gradle/plugin/ide/IdeMultiplatformImport.kt", - "discussion_id": "1190998608", - "commented_code": "@@ -149,31 +152,67 @@ interface IdeMultiplatformImport {\n }\n \n /**\n- * Any [IdeDependencyResolver] has to be registered specifying a certain resolution level.\n- * Generally, all resolvers registered in a given resolution level will work collaboratively, meaning the dependency resolution\n- * result is the aggregation of all resolvers running.\n+ * Indicating the 'priority' of a given [IdeDependencyResolver] or [IdeAdditionalArtifactResolver]:\n+ * When resolving dependencies, only the highest priority resolver will be selected and executed.\n+ * If there are multiple resolvers with the same [Priority] registered, then all of them will be executed\n+ * and the result will be merged.\n *\n- * However, only the resolvers in the highest resolution result will run e.g.\n- * If resolvers with level [Overwrite] are found, then only those will contribute to the dependency resolution.\n- * Otherwise, all [Default] resolvers will run.\n+ * By 'default' resolvers are registered using [Priority.normal].\n+ * The Kotlin Gradle Plugin is not expected to register resolvers outside the [normal] or [high] priorities\n+ * External Kotlin Target maintainers _(such as Google)_ can therefore use [veryHigh] to overwrite all resolvers\n+ * from the Kotlin Gradle plugin or [low] to only register a resolver if there is nothing available by default\n+ * from the Kotlin Gradle plugin.\n */\n @ExternalKotlinTargetApi\n- enum class DependencyResolutionLevel {\n- Default, Overwrite\n+ class Priority(val value: Int) : Comparable {", - "comment_created_at": "2023-05-11T16:29:39+00:00", - "comment_author": "AmrAfifiy", - "comment_body": "I'm worried about that there is no way for a target to know if its resolvers end up running or not. Also if a plugin wants to match the highest priority that currently exists and just contribute to the resolution without completely overriding it.", - "pr_file_module": null - }, - { - "comment_id": "1191453535", - "repo_full_name": "JetBrains/kotlin", - "pr_number": 5131, - "pr_file": "libraries/tools/kotlin-gradle-plugin/src/common/kotlin/org/jetbrains/kotlin/gradle/plugin/ide/IdeMultiplatformImport.kt", - "discussion_id": "1190998608", - "commented_code": "@@ -149,31 +152,67 @@ interface IdeMultiplatformImport {\n }\n \n /**\n- * Any [IdeDependencyResolver] has to be registered specifying a certain resolution level.\n- * Generally, all resolvers registered in a given resolution level will work collaboratively, meaning the dependency resolution\n- * result is the aggregation of all resolvers running.\n+ * Indicating the 'priority' of a given [IdeDependencyResolver] or [IdeAdditionalArtifactResolver]:\n+ * When resolving dependencies, only the highest priority resolver will be selected and executed.\n+ * If there are multiple resolvers with the same [Priority] registered, then all of them will be executed\n+ * and the result will be merged.\n *\n- * However, only the resolvers in the highest resolution result will run e.g.\n- * If resolvers with level [Overwrite] are found, then only those will contribute to the dependency resolution.\n- * Otherwise, all [Default] resolvers will run.\n+ * By 'default' resolvers are registered using [Priority.normal].\n+ * The Kotlin Gradle Plugin is not expected to register resolvers outside the [normal] or [high] priorities\n+ * External Kotlin Target maintainers _(such as Google)_ can therefore use [veryHigh] to overwrite all resolvers\n+ * from the Kotlin Gradle plugin or [low] to only register a resolver if there is nothing available by default\n+ * from the Kotlin Gradle plugin.\n */\n @ExternalKotlinTargetApi\n- enum class DependencyResolutionLevel {\n- Default, Overwrite\n+ class Priority(val value: Int) : Comparable {", - "comment_created_at": "2023-05-11T16:50:05+00:00", - "comment_author": "sellmair", - "comment_body": "Those are good thoughts. \r\nHowever, I think it will get very complex if you want to make the API support more advanced semantics. \r\nThe previous version seemed already insufficient to handle potential edge cases between AGP and KGP. The new API might be tricky when AGP and some unknown vendor (or build author) tries to contribute some resolvers. \r\n\r\nHowever, the new API, whilst rather low level, seems flexible enough to handle known use cases and to evolve!\r\n", - "pr_file_module": null - }, - { - "comment_id": "1191454163", - "repo_full_name": "JetBrains/kotlin", - "pr_number": 5131, - "pr_file": "libraries/tools/kotlin-gradle-plugin/src/common/kotlin/org/jetbrains/kotlin/gradle/plugin/ide/IdeMultiplatformImport.kt", - "discussion_id": "1190998608", - "commented_code": "@@ -149,31 +152,67 @@ interface IdeMultiplatformImport {\n }\n \n /**\n- * Any [IdeDependencyResolver] has to be registered specifying a certain resolution level.\n- * Generally, all resolvers registered in a given resolution level will work collaboratively, meaning the dependency resolution\n- * result is the aggregation of all resolvers running.\n+ * Indicating the 'priority' of a given [IdeDependencyResolver] or [IdeAdditionalArtifactResolver]:\n+ * When resolving dependencies, only the highest priority resolver will be selected and executed.\n+ * If there are multiple resolvers with the same [Priority] registered, then all of them will be executed\n+ * and the result will be merged.\n *\n- * However, only the resolvers in the highest resolution result will run e.g.\n- * If resolvers with level [Overwrite] are found, then only those will contribute to the dependency resolution.\n- * Otherwise, all [Default] resolvers will run.\n+ * By 'default' resolvers are registered using [Priority.normal].\n+ * The Kotlin Gradle Plugin is not expected to register resolvers outside the [normal] or [high] priorities\n+ * External Kotlin Target maintainers _(such as Google)_ can therefore use [veryHigh] to overwrite all resolvers\n+ * from the Kotlin Gradle plugin or [low] to only register a resolver if there is nothing available by default\n+ * from the Kotlin Gradle plugin.\n */\n @ExternalKotlinTargetApi\n- enum class DependencyResolutionLevel {\n- Default, Overwrite\n+ class Priority(val value: Int) : Comparable {", - "comment_created_at": "2023-05-11T16:50:45+00:00", - "comment_author": "sellmair", - "comment_body": "If we know about uses cases where its hard for a third party to sync with AGP and KGP, then we could start designing new APIs to make this cases easier! \r\n", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1172544836", - "pr_number": 5121, - "pr_file": "libraries/tools/kotlin-gradle-plugin/build.gradle.kts", - "created_at": "2023-04-20T12:53:02+00:00", - "commented_code": "languageSettings.optIn(\"org.jetbrains.kotlin.compiler.plugin.ExperimentalCompilerApi\")\n}\n\napiValidation {\n publicMarkers.add(\"org.jetbrains.kotlin.gradle.ExternalKotlinTargetApi\")", - "repo_full_name": "JetBrains/kotlin", - "discussion_comments": [ - { - "comment_id": "1172544836", - "repo_full_name": "JetBrains/kotlin", - "pr_number": 5121, - "pr_file": "libraries/tools/kotlin-gradle-plugin/build.gradle.kts", - "discussion_id": "1172544836", - "commented_code": "@@ -26,6 +27,12 @@ kotlin.sourceSets.all {\n languageSettings.optIn(\"org.jetbrains.kotlin.compiler.plugin.ExperimentalCompilerApi\")\n }\n \n+apiValidation {\n+ publicMarkers.add(\"org.jetbrains.kotlin.gradle.ExternalKotlinTargetApi\")", - "comment_created_at": "2023-04-20T12:53:02+00:00", - "comment_author": "AmrAfifiy", - "comment_body": "are all APIs annotated with @ExternalKotlinTargetApi end up in kotlin-gradle-plugin-api artifact?", - "pr_file_module": null - }, - { - "comment_id": "1173565747", - "repo_full_name": "JetBrains/kotlin", - "pr_number": 5121, - "pr_file": "libraries/tools/kotlin-gradle-plugin/build.gradle.kts", - "discussion_id": "1172544836", - "commented_code": "@@ -26,6 +27,12 @@ kotlin.sourceSets.all {\n languageSettings.optIn(\"org.jetbrains.kotlin.compiler.plugin.ExperimentalCompilerApi\")\n }\n \n+apiValidation {\n+ publicMarkers.add(\"org.jetbrains.kotlin.gradle.ExternalKotlinTargetApi\")", - "comment_created_at": "2023-04-21T09:39:08+00:00", - "comment_author": "sellmair", - "comment_body": "No, those apis will be part of the `kotlin-gradle-plugin` artifact as those APIs are required to call into internal APIs of the implementation. So: `kotlin-gradle-plugin-api` already has binary compatibility validation, `kotlin-gradle-plugin` has not, but APIs marked with this annotation will have to be kept stable!\r\n", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "454254122", - "pr_number": 3544, - "pr_file": "libraries/scripting/dependencies/src/kotlin/script/experimental/dependencies/resolverNamedOptions.kt", - "created_at": "2020-07-14T10:19:21+00:00", - "commented_code": "/*\n * Copyright 2010-2020 JetBrains s.r.o. and Kotlin Programming Language contributors.\n * Use of this source code is governed by the Apache 2.0 license that can be found in the license/LICENSE.txt file.\n */\n\npackage kotlin.script.experimental.dependencies\n\nfun ExternalDependenciesResolver.Options.value(name: DependenciesResolverOptionsName) =\n value(name.key)\n\nfun ExternalDependenciesResolver.Options.flag(name: DependenciesResolverOptionsName) =\n flag(name.key)\n\noperator fun MutableMap.set(key: DependenciesResolverOptionsName, value: String) {\n put(key.key, value)\n}\n\n\nenum class DependenciesResolverOptionsName {", - "repo_full_name": "JetBrains/kotlin", - "discussion_comments": [ - { - "comment_id": "454254122", - "repo_full_name": "JetBrains/kotlin", - "pr_number": 3544, - "pr_file": "libraries/scripting/dependencies/src/kotlin/script/experimental/dependencies/resolverNamedOptions.kt", - "discussion_id": "454254122", - "commented_code": "@@ -0,0 +1,31 @@\n+/*\n+ * Copyright 2010-2020 JetBrains s.r.o. and Kotlin Programming Language contributors.\n+ * Use of this source code is governed by the Apache 2.0 license that can be found in the license/LICENSE.txt file.\n+ */\n+\n+package kotlin.script.experimental.dependencies\n+\n+fun ExternalDependenciesResolver.Options.value(name: DependenciesResolverOptionsName) =\n+ value(name.key)\n+\n+fun ExternalDependenciesResolver.Options.flag(name: DependenciesResolverOptionsName) =\n+ flag(name.key)\n+\n+operator fun MutableMap.set(key: DependenciesResolverOptionsName, value: String) {\n+ put(key.key, value)\n+}\n+\n+\n+enum class DependenciesResolverOptionsName {", - "comment_created_at": "2020-07-14T10:19:21+00:00", - "comment_author": "nerdsupremacist", - "comment_body": "These options seem very specific to the individual dependencies resolvers. Perhaps it would be more appropriate to prefix this with a `Maven*` or `Ivy*` respectively.\r\n\r\nThis API is not implemented by many other resolvers that I know of. But what I want to avoid is someone implementing a dependencies resolver and thinking they:\r\n1. have to support these exact options\r\n1. are only allowed to support these exact options\r\n\r\n@ligee what do you think?", - "pr_file_module": null - }, - { - "comment_id": "454392948", - "repo_full_name": "JetBrains/kotlin", - "pr_number": 3544, - "pr_file": "libraries/scripting/dependencies/src/kotlin/script/experimental/dependencies/resolverNamedOptions.kt", - "discussion_id": "454254122", - "commented_code": "@@ -0,0 +1,31 @@\n+/*\n+ * Copyright 2010-2020 JetBrains s.r.o. and Kotlin Programming Language contributors.\n+ * Use of this source code is governed by the Apache 2.0 license that can be found in the license/LICENSE.txt file.\n+ */\n+\n+package kotlin.script.experimental.dependencies\n+\n+fun ExternalDependenciesResolver.Options.value(name: DependenciesResolverOptionsName) =\n+ value(name.key)\n+\n+fun ExternalDependenciesResolver.Options.flag(name: DependenciesResolverOptionsName) =\n+ flag(name.key)\n+\n+operator fun MutableMap.set(key: DependenciesResolverOptionsName, value: String) {\n+ put(key.key, value)\n+}\n+\n+\n+enum class DependenciesResolverOptionsName {", - "comment_created_at": "2020-07-14T14:21:38+00:00", - "comment_author": "ligee", - "comment_body": "Thanks, @nerdsupremacist, I think the concern is valid.\r\nBut we can probably get away right now simply by moving it into the `impl` package, signalling that it is a mere implementation detail now.\r\n@ileasile, what do you think about this?", - "pr_file_module": null - }, - { - "comment_id": "454436629", - "repo_full_name": "JetBrains/kotlin", - "pr_number": 3544, - "pr_file": "libraries/scripting/dependencies/src/kotlin/script/experimental/dependencies/resolverNamedOptions.kt", - "discussion_id": "454254122", - "commented_code": "@@ -0,0 +1,31 @@\n+/*\n+ * Copyright 2010-2020 JetBrains s.r.o. and Kotlin Programming Language contributors.\n+ * Use of this source code is governed by the Apache 2.0 license that can be found in the license/LICENSE.txt file.\n+ */\n+\n+package kotlin.script.experimental.dependencies\n+\n+fun ExternalDependenciesResolver.Options.value(name: DependenciesResolverOptionsName) =\n+ value(name.key)\n+\n+fun ExternalDependenciesResolver.Options.flag(name: DependenciesResolverOptionsName) =\n+ flag(name.key)\n+\n+operator fun MutableMap.set(key: DependenciesResolverOptionsName, value: String) {\n+ put(key.key, value)\n+}\n+\n+\n+enum class DependenciesResolverOptionsName {", - "comment_created_at": "2020-07-14T15:18:59+00:00", - "comment_author": "ileasile", - "comment_body": "I see these options names only as a convenient and safe way to access \"text\" properties. Maybe it's worth adding somewhere in comments that these options are not mandatory? Also, I'm OK with moving it into the `impl` package.", - "pr_file_module": null - }, - { - "comment_id": "454566489", - "repo_full_name": "JetBrains/kotlin", - "pr_number": 3544, - "pr_file": "libraries/scripting/dependencies/src/kotlin/script/experimental/dependencies/resolverNamedOptions.kt", - "discussion_id": "454254122", - "commented_code": "@@ -0,0 +1,31 @@\n+/*\n+ * Copyright 2010-2020 JetBrains s.r.o. and Kotlin Programming Language contributors.\n+ * Use of this source code is governed by the Apache 2.0 license that can be found in the license/LICENSE.txt file.\n+ */\n+\n+package kotlin.script.experimental.dependencies\n+\n+fun ExternalDependenciesResolver.Options.value(name: DependenciesResolverOptionsName) =\n+ value(name.key)\n+\n+fun ExternalDependenciesResolver.Options.flag(name: DependenciesResolverOptionsName) =\n+ flag(name.key)\n+\n+operator fun MutableMap.set(key: DependenciesResolverOptionsName, value: String) {\n+ put(key.key, value)\n+}\n+\n+\n+enum class DependenciesResolverOptionsName {", - "comment_created_at": "2020-07-14T18:41:52+00:00", - "comment_author": "ligee", - "comment_body": "I understand your intention here, but we don't want to expose it as a generic API. So, please move it to the `impl`.", - "pr_file_module": null - }, - { - "comment_id": "454632609", - "repo_full_name": "JetBrains/kotlin", - "pr_number": 3544, - "pr_file": "libraries/scripting/dependencies/src/kotlin/script/experimental/dependencies/resolverNamedOptions.kt", - "discussion_id": "454254122", - "commented_code": "@@ -0,0 +1,31 @@\n+/*\n+ * Copyright 2010-2020 JetBrains s.r.o. and Kotlin Programming Language contributors.\n+ * Use of this source code is governed by the Apache 2.0 license that can be found in the license/LICENSE.txt file.\n+ */\n+\n+package kotlin.script.experimental.dependencies\n+\n+fun ExternalDependenciesResolver.Options.value(name: DependenciesResolverOptionsName) =\n+ value(name.key)\n+\n+fun ExternalDependenciesResolver.Options.flag(name: DependenciesResolverOptionsName) =\n+ flag(name.key)\n+\n+operator fun MutableMap.set(key: DependenciesResolverOptionsName, value: String) {\n+ put(key.key, value)\n+}\n+\n+\n+enum class DependenciesResolverOptionsName {", - "comment_created_at": "2020-07-14T20:43:00+00:00", - "comment_author": "ileasile", - "comment_body": "Sure. Done \u2714\ufe0f ", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "429148358", - "pr_number": 3415, - "pr_file": "plugins/atomicfu/atomicfu-compiler/src/org.jetbrains.kotlinx.atomicfu.compiler/extensions/TransformerHelpers.kt", - "created_at": "2020-05-22T09:42:26+00:00", - "commented_code": "/*\n * Copyright 2010-2020 JetBrains s.r.o. and Kotlin Programming Language contributors.\n * Use of this source code is governed by the Apache 2.0 license that can be found in the license/LICENSE.txt file.\n */\n\npackage org.jetbrains.kotlinx.atomicfu.compiler.extensions\n\nimport org.jetbrains.kotlin.backend.common.extensions.IrPluginContext\nimport org.jetbrains.kotlin.descriptors.ClassConstructorDescriptor\nimport org.jetbrains.kotlin.descriptors.ClassDescriptor\nimport org.jetbrains.kotlin.descriptors.FunctionDescriptor\nimport org.jetbrains.kotlin.incremental.components.NoLookupLocation\nimport org.jetbrains.kotlin.ir.UNDEFINED_OFFSET\nimport org.jetbrains.kotlin.ir.backend.js.ir.JsIrBuilder\nimport org.jetbrains.kotlin.ir.declarations.*\nimport org.jetbrains.kotlin.ir.expressions.*\nimport org.jetbrains.kotlin.ir.expressions.impl.IrCallImpl\nimport org.jetbrains.kotlin.ir.expressions.impl.IrGetFieldImpl\nimport org.jetbrains.kotlin.ir.expressions.impl.IrSetFieldImpl\nimport org.jetbrains.kotlin.ir.symbols.*\nimport org.jetbrains.kotlin.ir.types.*\nimport org.jetbrains.kotlin.ir.types.impl.IrSimpleTypeImpl\nimport org.jetbrains.kotlin.ir.types.impl.makeTypeProjection\nimport org.jetbrains.kotlin.name.FqName\nimport org.jetbrains.kotlin.name.Name\nimport org.jetbrains.kotlin.resolve.scopes.MemberScope\nimport org.jetbrains.kotlin.types.Variance\nimport kotlin.random.Random\n\ninterface TransformerHelpers {\n\n val context: IrPluginContext\n\n fun buildCall(\n target: IrFunctionSymbol,\n type: IrType? = null,\n origin: IrStatementOrigin,\n typeArguments: List? = null,\n valueArguments: List? = null\n ): IrCall =\n IrCallImpl(\n UNDEFINED_OFFSET,\n UNDEFINED_OFFSET,\n type ?: target.owner.returnType,\n target,\n target.descriptor.typeParametersCount,\n origin\n ).apply {\n typeArguments?.let {\n assert(typeArguments.size == typeArgumentsCount)\n it.withIndex().forEach { (i, t) -> putTypeArgument(i, t) }\n }\n valueArguments?.let {\n assert(valueArguments.size == valueArgumentsCount)\n it.withIndex().forEach { (i, arg) -> putValueArgument(i, arg) }\n }\n }\n\n fun buildSetField(\n symbol: IrFieldSymbol,\n receiver: IrExpression?,\n value: IrExpression,\n superQualifierSymbol: IrClassSymbol? = null\n ) =\n IrSetFieldImpl(\n UNDEFINED_OFFSET,\n UNDEFINED_OFFSET,\n symbol,\n receiver,\n value,\n value.type,\n IrStatementOrigin.INVOKE,\n superQualifierSymbol\n )\n\n fun buildGetField(symbol: IrFieldSymbol, receiver: IrExpression?, superQualifierSymbol: IrClassSymbol? = null, type: IrType? = null) =\n IrGetFieldImpl(\n UNDEFINED_OFFSET,\n UNDEFINED_OFFSET,\n symbol,\n type ?: symbol.owner.type,\n receiver,\n IrStatementOrigin.GET_PROPERTY,\n superQualifierSymbol\n )\n\n fun buildFunctionSimpleType(typeParameters: List): IrSimpleType {\n val parametersCount = typeParameters.size - 1\n val classDesc = context.irBuiltIns.builtIns.getFunction(parametersCount)\n val symbol = context.symbolTable.referenceClass(classDesc)\n return IrSimpleTypeImpl(\n classifier = symbol,\n hasQuestionMark = false,\n arguments = typeParameters.map { it.toTypeArgument() },\n annotations = emptyList()\n )\n }\n\n fun getterName(name: String) = \"${Random.nextInt()}\"\n fun setterName(name: String) = \"${Random.nextInt()}\"\n fun Name.getFieldName() = \"\".toRegex().find(asString())?.groupValues?.get(1)\n\n private fun IrType.toTypeArgument(): IrTypeArgument {\n return makeTypeProjection(this, Variance.INVARIANT)\n }\n\n fun IrCall.getValueArguments() = (0 until valueArgumentsCount).map { i ->\n getValueArgument(i)\n }\n\n fun IrValueParameter.capture() = JsIrBuilder.buildGetValue(symbol)\n\n fun referencePackageFunction(", - "repo_full_name": "JetBrains/kotlin", - "discussion_comments": [ - { - "comment_id": "429148358", - "repo_full_name": "JetBrains/kotlin", - "pr_number": 3415, - "pr_file": "plugins/atomicfu/atomicfu-compiler/src/org.jetbrains.kotlinx.atomicfu.compiler/extensions/TransformerHelpers.kt", - "discussion_id": "429148358", - "commented_code": "@@ -0,0 +1,144 @@\n+/*\n+ * Copyright 2010-2020 JetBrains s.r.o. and Kotlin Programming Language contributors.\n+ * Use of this source code is governed by the Apache 2.0 license that can be found in the license/LICENSE.txt file.\n+ */\n+\n+package org.jetbrains.kotlinx.atomicfu.compiler.extensions\n+\n+import org.jetbrains.kotlin.backend.common.extensions.IrPluginContext\n+import org.jetbrains.kotlin.descriptors.ClassConstructorDescriptor\n+import org.jetbrains.kotlin.descriptors.ClassDescriptor\n+import org.jetbrains.kotlin.descriptors.FunctionDescriptor\n+import org.jetbrains.kotlin.incremental.components.NoLookupLocation\n+import org.jetbrains.kotlin.ir.UNDEFINED_OFFSET\n+import org.jetbrains.kotlin.ir.backend.js.ir.JsIrBuilder\n+import org.jetbrains.kotlin.ir.declarations.*\n+import org.jetbrains.kotlin.ir.expressions.*\n+import org.jetbrains.kotlin.ir.expressions.impl.IrCallImpl\n+import org.jetbrains.kotlin.ir.expressions.impl.IrGetFieldImpl\n+import org.jetbrains.kotlin.ir.expressions.impl.IrSetFieldImpl\n+import org.jetbrains.kotlin.ir.symbols.*\n+import org.jetbrains.kotlin.ir.types.*\n+import org.jetbrains.kotlin.ir.types.impl.IrSimpleTypeImpl\n+import org.jetbrains.kotlin.ir.types.impl.makeTypeProjection\n+import org.jetbrains.kotlin.name.FqName\n+import org.jetbrains.kotlin.name.Name\n+import org.jetbrains.kotlin.resolve.scopes.MemberScope\n+import org.jetbrains.kotlin.types.Variance\n+import kotlin.random.Random\n+\n+interface TransformerHelpers {\n+\n+ val context: IrPluginContext\n+\n+ fun buildCall(\n+ target: IrFunctionSymbol,\n+ type: IrType? = null,\n+ origin: IrStatementOrigin,\n+ typeArguments: List? = null,\n+ valueArguments: List? = null\n+ ): IrCall =\n+ IrCallImpl(\n+ UNDEFINED_OFFSET,\n+ UNDEFINED_OFFSET,\n+ type ?: target.owner.returnType,\n+ target,\n+ target.descriptor.typeParametersCount,\n+ origin\n+ ).apply {\n+ typeArguments?.let {\n+ assert(typeArguments.size == typeArgumentsCount)\n+ it.withIndex().forEach { (i, t) -> putTypeArgument(i, t) }\n+ }\n+ valueArguments?.let {\n+ assert(valueArguments.size == valueArgumentsCount)\n+ it.withIndex().forEach { (i, arg) -> putValueArgument(i, arg) }\n+ }\n+ }\n+\n+ fun buildSetField(\n+ symbol: IrFieldSymbol,\n+ receiver: IrExpression?,\n+ value: IrExpression,\n+ superQualifierSymbol: IrClassSymbol? = null\n+ ) =\n+ IrSetFieldImpl(\n+ UNDEFINED_OFFSET,\n+ UNDEFINED_OFFSET,\n+ symbol,\n+ receiver,\n+ value,\n+ value.type,\n+ IrStatementOrigin.INVOKE,\n+ superQualifierSymbol\n+ )\n+\n+ fun buildGetField(symbol: IrFieldSymbol, receiver: IrExpression?, superQualifierSymbol: IrClassSymbol? = null, type: IrType? = null) =\n+ IrGetFieldImpl(\n+ UNDEFINED_OFFSET,\n+ UNDEFINED_OFFSET,\n+ symbol,\n+ type ?: symbol.owner.type,\n+ receiver,\n+ IrStatementOrigin.GET_PROPERTY,\n+ superQualifierSymbol\n+ )\n+\n+ fun buildFunctionSimpleType(typeParameters: List): IrSimpleType {\n+ val parametersCount = typeParameters.size - 1\n+ val classDesc = context.irBuiltIns.builtIns.getFunction(parametersCount)\n+ val symbol = context.symbolTable.referenceClass(classDesc)\n+ return IrSimpleTypeImpl(\n+ classifier = symbol,\n+ hasQuestionMark = false,\n+ arguments = typeParameters.map { it.toTypeArgument() },\n+ annotations = emptyList()\n+ )\n+ }\n+\n+ fun getterName(name: String) = \"${Random.nextInt()}\"\n+ fun setterName(name: String) = \"${Random.nextInt()}\"\n+ fun Name.getFieldName() = \"\".toRegex().find(asString())?.groupValues?.get(1)\n+\n+ private fun IrType.toTypeArgument(): IrTypeArgument {\n+ return makeTypeProjection(this, Variance.INVARIANT)\n+ }\n+\n+ fun IrCall.getValueArguments() = (0 until valueArgumentsCount).map { i ->\n+ getValueArgument(i)\n+ }\n+\n+ fun IrValueParameter.capture() = JsIrBuilder.buildGetValue(symbol)\n+\n+ fun referencePackageFunction(", - "comment_created_at": "2020-05-22T09:42:26+00:00", - "comment_author": "romanart", - "comment_body": "Are the following methods reference external symbol? If so, please use corresponding `IrPluginContext` API", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "429178285", - "pr_number": 3415, - "pr_file": "plugins/atomicfu/atomicfu-compiler/src/org.jetbrains.kotlinx.atomicfu.compiler/extensions/AtomicFUTransformerJsIr.kt", - "created_at": "2020-05-22T10:51:39+00:00", - "commented_code": "/*\n * Copyright 2010-2020 JetBrains s.r.o. and Kotlin Programming Language contributors.\n * Use of this source code is governed by the Apache 2.0 license that can be found in the license/LICENSE.txt file.\n */\n\npackage org.jetbrains.kotlinx.atomicfu.compiler.extensions\n\nimport org.jetbrains.kotlin.backend.common.deepCopyWithVariables\nimport org.jetbrains.kotlin.backend.common.extensions.IrPluginContext\nimport org.jetbrains.kotlin.builtins.PrimitiveType\nimport org.jetbrains.kotlin.descriptors.*\nimport org.jetbrains.kotlin.descriptors.impl.FunctionDescriptorImpl\nimport org.jetbrains.kotlin.ir.*\nimport org.jetbrains.kotlin.ir.backend.js.ir.JsIrBuilder.buildValueParameter\nimport org.jetbrains.kotlin.ir.backend.js.ir.JsIrBuilder.buildFunction\nimport org.jetbrains.kotlin.ir.backend.js.ir.JsIrBuilder.buildBlockBody\nimport org.jetbrains.kotlin.ir.declarations.*\nimport org.jetbrains.kotlin.ir.declarations.impl.*\nimport org.jetbrains.kotlin.ir.expressions.*\nimport org.jetbrains.kotlin.ir.expressions.impl.*\nimport org.jetbrains.kotlin.ir.symbols.IrSimpleFunctionSymbol\nimport org.jetbrains.kotlin.ir.symbols.IrSymbol\nimport org.jetbrains.kotlin.ir.symbols.impl.*\nimport org.jetbrains.kotlin.ir.types.*\nimport org.jetbrains.kotlin.ir.util.*\nimport org.jetbrains.kotlin.ir.visitors.IrElementTransformerVoid\nimport java.lang.IllegalStateException\n\nprivate const val AFU_PKG = \"kotlinx/atomicfu\"\nprivate const val LOCKS = \"locks\"\nprivate const val ATOMIC_CONSTRUCTOR = \"atomic\"\nprivate const val ATOMICFU_VALUE_TYPE = \"\"\"Atomic(Int|Long|Boolean|Ref)\"\"\"\nprivate const val ATOMIC_ARRAY_TYPE = \"\"\"Atomic(Int|Long|Boolean|)Array\"\"\"\nprivate const val ATOMIC_ARRAY_FACTORY_FUNCTION = \"atomicArrayOfNulls\"\nprivate const val ATOMICFU_RUNTIME_FUNCTION_PREDICATE = \"atomicfu_\"\nprivate const val REENTRANT_LOCK_TYPE = \"ReentrantLock\"\nprivate const val GETTER = \"atomicfu\\$getter\"\nprivate const val SETTER = \"atomicfu\\$setter\"\nprivate const val GET = \"get\"\nprivate const val SET = \"set\"\n\nprivate fun String.prettyStr() = replace('/', '.')\n\nclass AtomicFUTransformer(override val context: IrPluginContext) : IrElementTransformerVoid(), TransformerHelpers {\n\n private val irBuiltIns = context.irBuiltIns\n\n private val AFU_CLASSES: Map = mapOf(\n \"AtomicInt\" to irBuiltIns.intType,\n \"AtomicLong\" to irBuiltIns.longType,\n \"AtomicRef\" to irBuiltIns.anyType,\n \"AtomicBoolean\" to irBuiltIns.booleanType\n )\n\n private val AFU_ARRAY_CLASSES: Map = mapOf(\n \"AtomicIntArray\" to context.builtIns.getPrimitiveArrayClassDescriptor(PrimitiveType.INT),\n \"AtomicLongArray\" to context.builtIns.getPrimitiveArrayClassDescriptor(PrimitiveType.LONG),\n \"AtomicBooleanArray\" to context.builtIns.getPrimitiveArrayClassDescriptor(PrimitiveType.BOOLEAN),\n \"AtomicArray\" to context.builtIns.array\n )\n\n override fun visitFile(irFile: IrFile): IrFile {\n irFile.declarations.map { declaration ->\n declaration.transformAtomicInlineDeclaration()\n }\n return super.visitFile(irFile)\n }\n\n override fun visitClass(irClass: IrClass): IrStatement {\n irClass.declarations.map { declaration ->\n declaration.transformAtomicInlineDeclaration()\n }\n return super.visitClass(irClass)\n }\n\n override fun visitProperty(property: IrProperty): IrStatement {\n if (property.backingField != null) {\n val backingField = property.backingField!!\n if (backingField.initializer != null) {\n val initializer = backingField.initializer!!.expression.transformAtomicValueInitializer()\n property.backingField!!.initializer = IrExpressionBodyImpl(initializer)\n }\n }\n return super.visitProperty(property)\n }\n\n override fun visitBlockBody(body: IrBlockBody): IrBody {\n body.statements.forEachIndexed { i, stmt ->\n if (stmt is IrCall) {\n body.statements[i] = stmt.transformAtomicFunctionCall()\n }\n }\n return super.visitBlockBody(body)\n }\n\n override fun visitBlock(block: IrBlock): IrExpression {\n block.statements.forEachIndexed { i, stmt ->\n if (stmt is IrCall) {\n block.statements[i] = stmt.transformAtomicFunctionCall()\n }\n }\n return super.visitBlock(block)\n }\n\n override fun visitReturn(returnExpr: IrReturn): IrExpression {\n returnExpr.value = returnExpr.value.transformAtomicFunctionCall()\n return super.visitReturn(returnExpr)\n }\n\n override fun visitTypeOperator(typeOp: IrTypeOperatorCall): IrExpression {\n typeOp.argument = typeOp.argument.transformAtomicFunctionCall()\n return super.visitTypeOperator(typeOp)\n }\n\n override fun visitSetVariable(setVar: IrSetVariable): IrExpression {\n setVar.value = setVar.value.transformAtomicFunctionCall()\n return super.visitSetVariable(setVar)\n }\n\n override fun visitSetField(setField: IrSetField): IrExpression {\n setField.value = setField.value.transformAtomicFunctionCall()\n return super.visitSetField(setField)\n }\n\n override fun visitVariable(declaration: IrVariable): IrStatement {\n declaration.initializer = declaration.initializer?.transformAtomicFunctionCall()\n return super.visitVariable(declaration)\n }\n\n override fun visitBranch(branch: IrBranch): IrBranch {\n branch.apply {\n condition = condition.transformAtomicFunctionCall()\n result = result.transformAtomicFunctionCall()\n }\n return super.visitBranch(branch)\n }\n\n override fun visitElseBranch(branch: IrElseBranch): IrElseBranch {\n branch.apply {\n condition = condition.transformAtomicFunctionCall()\n result = result.transformAtomicFunctionCall()\n }\n return super.visitElseBranch(branch)\n }\n\n private fun IrExpression.transformAtomicValueInitializer() =\n when {\n type.isAtomicValueType() -> getPureTypeValue()\n type.isAtomicArrayType() -> buildPureTypeArrayConstructor()\n type.isReentrantLockType() -> buildConstNull()\n else -> this\n }\n\n private fun IrDeclaration.transformAtomicInlineDeclaration() {\n if (this is IrFunction &&\n isInline &&\n extensionReceiverParameter != null &&\n extensionReceiverParameter!!.type.isAtomicValueType()\n ) {\n val type = extensionReceiverParameter!!.type\n val valueType = type.atomicToValueType()\n val getterType = buildFunctionSimpleType(listOf(irBuiltIns.unitType, valueType))\n val setterType = buildFunctionSimpleType(listOf(valueType, irBuiltIns.unitType))\n val valueParametersCount = valueParameters.size\n val extendedValueParameters = mutableListOf().apply {\n addAll(valueParameters)\n add(buildValueParameter(GETTER, valueParametersCount, getterType))\n add(buildValueParameter(SETTER, valueParametersCount + 1, setterType))\n }\n this as IrSimpleFunction\n (descriptor as FunctionDescriptorImpl).initialize(", - "repo_full_name": "JetBrains/kotlin", - "discussion_comments": [ - { - "comment_id": "429178285", - "repo_full_name": "JetBrains/kotlin", - "pr_number": 3415, - "pr_file": "plugins/atomicfu/atomicfu-compiler/src/org.jetbrains.kotlinx.atomicfu.compiler/extensions/AtomicFUTransformerJsIr.kt", - "discussion_id": "429178285", - "commented_code": "@@ -0,0 +1,473 @@\n+/*\n+ * Copyright 2010-2020 JetBrains s.r.o. and Kotlin Programming Language contributors.\n+ * Use of this source code is governed by the Apache 2.0 license that can be found in the license/LICENSE.txt file.\n+ */\n+\n+package org.jetbrains.kotlinx.atomicfu.compiler.extensions\n+\n+import org.jetbrains.kotlin.backend.common.deepCopyWithVariables\n+import org.jetbrains.kotlin.backend.common.extensions.IrPluginContext\n+import org.jetbrains.kotlin.builtins.PrimitiveType\n+import org.jetbrains.kotlin.descriptors.*\n+import org.jetbrains.kotlin.descriptors.impl.FunctionDescriptorImpl\n+import org.jetbrains.kotlin.ir.*\n+import org.jetbrains.kotlin.ir.backend.js.ir.JsIrBuilder.buildValueParameter\n+import org.jetbrains.kotlin.ir.backend.js.ir.JsIrBuilder.buildFunction\n+import org.jetbrains.kotlin.ir.backend.js.ir.JsIrBuilder.buildBlockBody\n+import org.jetbrains.kotlin.ir.declarations.*\n+import org.jetbrains.kotlin.ir.declarations.impl.*\n+import org.jetbrains.kotlin.ir.expressions.*\n+import org.jetbrains.kotlin.ir.expressions.impl.*\n+import org.jetbrains.kotlin.ir.symbols.IrSimpleFunctionSymbol\n+import org.jetbrains.kotlin.ir.symbols.IrSymbol\n+import org.jetbrains.kotlin.ir.symbols.impl.*\n+import org.jetbrains.kotlin.ir.types.*\n+import org.jetbrains.kotlin.ir.util.*\n+import org.jetbrains.kotlin.ir.visitors.IrElementTransformerVoid\n+import java.lang.IllegalStateException\n+\n+private const val AFU_PKG = \"kotlinx/atomicfu\"\n+private const val LOCKS = \"locks\"\n+private const val ATOMIC_CONSTRUCTOR = \"atomic\"\n+private const val ATOMICFU_VALUE_TYPE = \"\"\"Atomic(Int|Long|Boolean|Ref)\"\"\"\n+private const val ATOMIC_ARRAY_TYPE = \"\"\"Atomic(Int|Long|Boolean|)Array\"\"\"\n+private const val ATOMIC_ARRAY_FACTORY_FUNCTION = \"atomicArrayOfNulls\"\n+private const val ATOMICFU_RUNTIME_FUNCTION_PREDICATE = \"atomicfu_\"\n+private const val REENTRANT_LOCK_TYPE = \"ReentrantLock\"\n+private const val GETTER = \"atomicfu\\$getter\"\n+private const val SETTER = \"atomicfu\\$setter\"\n+private const val GET = \"get\"\n+private const val SET = \"set\"\n+\n+private fun String.prettyStr() = replace('/', '.')\n+\n+class AtomicFUTransformer(override val context: IrPluginContext) : IrElementTransformerVoid(), TransformerHelpers {\n+\n+ private val irBuiltIns = context.irBuiltIns\n+\n+ private val AFU_CLASSES: Map = mapOf(\n+ \"AtomicInt\" to irBuiltIns.intType,\n+ \"AtomicLong\" to irBuiltIns.longType,\n+ \"AtomicRef\" to irBuiltIns.anyType,\n+ \"AtomicBoolean\" to irBuiltIns.booleanType\n+ )\n+\n+ private val AFU_ARRAY_CLASSES: Map = mapOf(\n+ \"AtomicIntArray\" to context.builtIns.getPrimitiveArrayClassDescriptor(PrimitiveType.INT),\n+ \"AtomicLongArray\" to context.builtIns.getPrimitiveArrayClassDescriptor(PrimitiveType.LONG),\n+ \"AtomicBooleanArray\" to context.builtIns.getPrimitiveArrayClassDescriptor(PrimitiveType.BOOLEAN),\n+ \"AtomicArray\" to context.builtIns.array\n+ )\n+\n+ override fun visitFile(irFile: IrFile): IrFile {\n+ irFile.declarations.map { declaration ->\n+ declaration.transformAtomicInlineDeclaration()\n+ }\n+ return super.visitFile(irFile)\n+ }\n+\n+ override fun visitClass(irClass: IrClass): IrStatement {\n+ irClass.declarations.map { declaration ->\n+ declaration.transformAtomicInlineDeclaration()\n+ }\n+ return super.visitClass(irClass)\n+ }\n+\n+ override fun visitProperty(property: IrProperty): IrStatement {\n+ if (property.backingField != null) {\n+ val backingField = property.backingField!!\n+ if (backingField.initializer != null) {\n+ val initializer = backingField.initializer!!.expression.transformAtomicValueInitializer()\n+ property.backingField!!.initializer = IrExpressionBodyImpl(initializer)\n+ }\n+ }\n+ return super.visitProperty(property)\n+ }\n+\n+ override fun visitBlockBody(body: IrBlockBody): IrBody {\n+ body.statements.forEachIndexed { i, stmt ->\n+ if (stmt is IrCall) {\n+ body.statements[i] = stmt.transformAtomicFunctionCall()\n+ }\n+ }\n+ return super.visitBlockBody(body)\n+ }\n+\n+ override fun visitBlock(block: IrBlock): IrExpression {\n+ block.statements.forEachIndexed { i, stmt ->\n+ if (stmt is IrCall) {\n+ block.statements[i] = stmt.transformAtomicFunctionCall()\n+ }\n+ }\n+ return super.visitBlock(block)\n+ }\n+\n+ override fun visitReturn(returnExpr: IrReturn): IrExpression {\n+ returnExpr.value = returnExpr.value.transformAtomicFunctionCall()\n+ return super.visitReturn(returnExpr)\n+ }\n+\n+ override fun visitTypeOperator(typeOp: IrTypeOperatorCall): IrExpression {\n+ typeOp.argument = typeOp.argument.transformAtomicFunctionCall()\n+ return super.visitTypeOperator(typeOp)\n+ }\n+\n+ override fun visitSetVariable(setVar: IrSetVariable): IrExpression {\n+ setVar.value = setVar.value.transformAtomicFunctionCall()\n+ return super.visitSetVariable(setVar)\n+ }\n+\n+ override fun visitSetField(setField: IrSetField): IrExpression {\n+ setField.value = setField.value.transformAtomicFunctionCall()\n+ return super.visitSetField(setField)\n+ }\n+\n+ override fun visitVariable(declaration: IrVariable): IrStatement {\n+ declaration.initializer = declaration.initializer?.transformAtomicFunctionCall()\n+ return super.visitVariable(declaration)\n+ }\n+\n+ override fun visitBranch(branch: IrBranch): IrBranch {\n+ branch.apply {\n+ condition = condition.transformAtomicFunctionCall()\n+ result = result.transformAtomicFunctionCall()\n+ }\n+ return super.visitBranch(branch)\n+ }\n+\n+ override fun visitElseBranch(branch: IrElseBranch): IrElseBranch {\n+ branch.apply {\n+ condition = condition.transformAtomicFunctionCall()\n+ result = result.transformAtomicFunctionCall()\n+ }\n+ return super.visitElseBranch(branch)\n+ }\n+\n+ private fun IrExpression.transformAtomicValueInitializer() =\n+ when {\n+ type.isAtomicValueType() -> getPureTypeValue()\n+ type.isAtomicArrayType() -> buildPureTypeArrayConstructor()\n+ type.isReentrantLockType() -> buildConstNull()\n+ else -> this\n+ }\n+\n+ private fun IrDeclaration.transformAtomicInlineDeclaration() {\n+ if (this is IrFunction &&\n+ isInline &&\n+ extensionReceiverParameter != null &&\n+ extensionReceiverParameter!!.type.isAtomicValueType()\n+ ) {\n+ val type = extensionReceiverParameter!!.type\n+ val valueType = type.atomicToValueType()\n+ val getterType = buildFunctionSimpleType(listOf(irBuiltIns.unitType, valueType))\n+ val setterType = buildFunctionSimpleType(listOf(valueType, irBuiltIns.unitType))\n+ val valueParametersCount = valueParameters.size\n+ val extendedValueParameters = mutableListOf().apply {\n+ addAll(valueParameters)\n+ add(buildValueParameter(GETTER, valueParametersCount, getterType))\n+ add(buildValueParameter(SETTER, valueParametersCount + 1, setterType))\n+ }\n+ this as IrSimpleFunction\n+ (descriptor as FunctionDescriptorImpl).initialize(", - "comment_created_at": "2020-05-22T10:51:39+00:00", - "comment_author": "romanart", - "comment_body": "Please avoid using of `DeclarationDescriptor` API", - "pr_file_module": null - }, - { - "comment_id": "433370829", - "repo_full_name": "JetBrains/kotlin", - "pr_number": 3415, - "pr_file": "plugins/atomicfu/atomicfu-compiler/src/org.jetbrains.kotlinx.atomicfu.compiler/extensions/AtomicFUTransformerJsIr.kt", - "discussion_id": "429178285", - "commented_code": "@@ -0,0 +1,473 @@\n+/*\n+ * Copyright 2010-2020 JetBrains s.r.o. and Kotlin Programming Language contributors.\n+ * Use of this source code is governed by the Apache 2.0 license that can be found in the license/LICENSE.txt file.\n+ */\n+\n+package org.jetbrains.kotlinx.atomicfu.compiler.extensions\n+\n+import org.jetbrains.kotlin.backend.common.deepCopyWithVariables\n+import org.jetbrains.kotlin.backend.common.extensions.IrPluginContext\n+import org.jetbrains.kotlin.builtins.PrimitiveType\n+import org.jetbrains.kotlin.descriptors.*\n+import org.jetbrains.kotlin.descriptors.impl.FunctionDescriptorImpl\n+import org.jetbrains.kotlin.ir.*\n+import org.jetbrains.kotlin.ir.backend.js.ir.JsIrBuilder.buildValueParameter\n+import org.jetbrains.kotlin.ir.backend.js.ir.JsIrBuilder.buildFunction\n+import org.jetbrains.kotlin.ir.backend.js.ir.JsIrBuilder.buildBlockBody\n+import org.jetbrains.kotlin.ir.declarations.*\n+import org.jetbrains.kotlin.ir.declarations.impl.*\n+import org.jetbrains.kotlin.ir.expressions.*\n+import org.jetbrains.kotlin.ir.expressions.impl.*\n+import org.jetbrains.kotlin.ir.symbols.IrSimpleFunctionSymbol\n+import org.jetbrains.kotlin.ir.symbols.IrSymbol\n+import org.jetbrains.kotlin.ir.symbols.impl.*\n+import org.jetbrains.kotlin.ir.types.*\n+import org.jetbrains.kotlin.ir.util.*\n+import org.jetbrains.kotlin.ir.visitors.IrElementTransformerVoid\n+import java.lang.IllegalStateException\n+\n+private const val AFU_PKG = \"kotlinx/atomicfu\"\n+private const val LOCKS = \"locks\"\n+private const val ATOMIC_CONSTRUCTOR = \"atomic\"\n+private const val ATOMICFU_VALUE_TYPE = \"\"\"Atomic(Int|Long|Boolean|Ref)\"\"\"\n+private const val ATOMIC_ARRAY_TYPE = \"\"\"Atomic(Int|Long|Boolean|)Array\"\"\"\n+private const val ATOMIC_ARRAY_FACTORY_FUNCTION = \"atomicArrayOfNulls\"\n+private const val ATOMICFU_RUNTIME_FUNCTION_PREDICATE = \"atomicfu_\"\n+private const val REENTRANT_LOCK_TYPE = \"ReentrantLock\"\n+private const val GETTER = \"atomicfu\\$getter\"\n+private const val SETTER = \"atomicfu\\$setter\"\n+private const val GET = \"get\"\n+private const val SET = \"set\"\n+\n+private fun String.prettyStr() = replace('/', '.')\n+\n+class AtomicFUTransformer(override val context: IrPluginContext) : IrElementTransformerVoid(), TransformerHelpers {\n+\n+ private val irBuiltIns = context.irBuiltIns\n+\n+ private val AFU_CLASSES: Map = mapOf(\n+ \"AtomicInt\" to irBuiltIns.intType,\n+ \"AtomicLong\" to irBuiltIns.longType,\n+ \"AtomicRef\" to irBuiltIns.anyType,\n+ \"AtomicBoolean\" to irBuiltIns.booleanType\n+ )\n+\n+ private val AFU_ARRAY_CLASSES: Map = mapOf(\n+ \"AtomicIntArray\" to context.builtIns.getPrimitiveArrayClassDescriptor(PrimitiveType.INT),\n+ \"AtomicLongArray\" to context.builtIns.getPrimitiveArrayClassDescriptor(PrimitiveType.LONG),\n+ \"AtomicBooleanArray\" to context.builtIns.getPrimitiveArrayClassDescriptor(PrimitiveType.BOOLEAN),\n+ \"AtomicArray\" to context.builtIns.array\n+ )\n+\n+ override fun visitFile(irFile: IrFile): IrFile {\n+ irFile.declarations.map { declaration ->\n+ declaration.transformAtomicInlineDeclaration()\n+ }\n+ return super.visitFile(irFile)\n+ }\n+\n+ override fun visitClass(irClass: IrClass): IrStatement {\n+ irClass.declarations.map { declaration ->\n+ declaration.transformAtomicInlineDeclaration()\n+ }\n+ return super.visitClass(irClass)\n+ }\n+\n+ override fun visitProperty(property: IrProperty): IrStatement {\n+ if (property.backingField != null) {\n+ val backingField = property.backingField!!\n+ if (backingField.initializer != null) {\n+ val initializer = backingField.initializer!!.expression.transformAtomicValueInitializer()\n+ property.backingField!!.initializer = IrExpressionBodyImpl(initializer)\n+ }\n+ }\n+ return super.visitProperty(property)\n+ }\n+\n+ override fun visitBlockBody(body: IrBlockBody): IrBody {\n+ body.statements.forEachIndexed { i, stmt ->\n+ if (stmt is IrCall) {\n+ body.statements[i] = stmt.transformAtomicFunctionCall()\n+ }\n+ }\n+ return super.visitBlockBody(body)\n+ }\n+\n+ override fun visitBlock(block: IrBlock): IrExpression {\n+ block.statements.forEachIndexed { i, stmt ->\n+ if (stmt is IrCall) {\n+ block.statements[i] = stmt.transformAtomicFunctionCall()\n+ }\n+ }\n+ return super.visitBlock(block)\n+ }\n+\n+ override fun visitReturn(returnExpr: IrReturn): IrExpression {\n+ returnExpr.value = returnExpr.value.transformAtomicFunctionCall()\n+ return super.visitReturn(returnExpr)\n+ }\n+\n+ override fun visitTypeOperator(typeOp: IrTypeOperatorCall): IrExpression {\n+ typeOp.argument = typeOp.argument.transformAtomicFunctionCall()\n+ return super.visitTypeOperator(typeOp)\n+ }\n+\n+ override fun visitSetVariable(setVar: IrSetVariable): IrExpression {\n+ setVar.value = setVar.value.transformAtomicFunctionCall()\n+ return super.visitSetVariable(setVar)\n+ }\n+\n+ override fun visitSetField(setField: IrSetField): IrExpression {\n+ setField.value = setField.value.transformAtomicFunctionCall()\n+ return super.visitSetField(setField)\n+ }\n+\n+ override fun visitVariable(declaration: IrVariable): IrStatement {\n+ declaration.initializer = declaration.initializer?.transformAtomicFunctionCall()\n+ return super.visitVariable(declaration)\n+ }\n+\n+ override fun visitBranch(branch: IrBranch): IrBranch {\n+ branch.apply {\n+ condition = condition.transformAtomicFunctionCall()\n+ result = result.transformAtomicFunctionCall()\n+ }\n+ return super.visitBranch(branch)\n+ }\n+\n+ override fun visitElseBranch(branch: IrElseBranch): IrElseBranch {\n+ branch.apply {\n+ condition = condition.transformAtomicFunctionCall()\n+ result = result.transformAtomicFunctionCall()\n+ }\n+ return super.visitElseBranch(branch)\n+ }\n+\n+ private fun IrExpression.transformAtomicValueInitializer() =\n+ when {\n+ type.isAtomicValueType() -> getPureTypeValue()\n+ type.isAtomicArrayType() -> buildPureTypeArrayConstructor()\n+ type.isReentrantLockType() -> buildConstNull()\n+ else -> this\n+ }\n+\n+ private fun IrDeclaration.transformAtomicInlineDeclaration() {\n+ if (this is IrFunction &&\n+ isInline &&\n+ extensionReceiverParameter != null &&\n+ extensionReceiverParameter!!.type.isAtomicValueType()\n+ ) {\n+ val type = extensionReceiverParameter!!.type\n+ val valueType = type.atomicToValueType()\n+ val getterType = buildFunctionSimpleType(listOf(irBuiltIns.unitType, valueType))\n+ val setterType = buildFunctionSimpleType(listOf(valueType, irBuiltIns.unitType))\n+ val valueParametersCount = valueParameters.size\n+ val extendedValueParameters = mutableListOf().apply {\n+ addAll(valueParameters)\n+ add(buildValueParameter(GETTER, valueParametersCount, getterType))\n+ add(buildValueParameter(SETTER, valueParametersCount + 1, setterType))\n+ }\n+ this as IrSimpleFunction\n+ (descriptor as FunctionDescriptorImpl).initialize(", - "comment_created_at": "2020-06-01T17:11:41+00:00", - "comment_author": "SokolovaMaria", - "comment_body": "Could you please tell me, how can I avoid using DeclarationDescriptor API here? \r\n\r\nThe problem is that if I just change the parameters of the owner `IrFunction` (`extensionReceiver` and `valueParameters`) then `ManglerChecker` fails at the comparison of the actual `Ir` signature and the one declared in the `Descriptor`.", - "pr_file_module": null - }, - { - "comment_id": "433414477", - "repo_full_name": "JetBrains/kotlin", - "pr_number": 3415, - "pr_file": "plugins/atomicfu/atomicfu-compiler/src/org.jetbrains.kotlinx.atomicfu.compiler/extensions/AtomicFUTransformerJsIr.kt", - "discussion_id": "429178285", - "commented_code": "@@ -0,0 +1,473 @@\n+/*\n+ * Copyright 2010-2020 JetBrains s.r.o. and Kotlin Programming Language contributors.\n+ * Use of this source code is governed by the Apache 2.0 license that can be found in the license/LICENSE.txt file.\n+ */\n+\n+package org.jetbrains.kotlinx.atomicfu.compiler.extensions\n+\n+import org.jetbrains.kotlin.backend.common.deepCopyWithVariables\n+import org.jetbrains.kotlin.backend.common.extensions.IrPluginContext\n+import org.jetbrains.kotlin.builtins.PrimitiveType\n+import org.jetbrains.kotlin.descriptors.*\n+import org.jetbrains.kotlin.descriptors.impl.FunctionDescriptorImpl\n+import org.jetbrains.kotlin.ir.*\n+import org.jetbrains.kotlin.ir.backend.js.ir.JsIrBuilder.buildValueParameter\n+import org.jetbrains.kotlin.ir.backend.js.ir.JsIrBuilder.buildFunction\n+import org.jetbrains.kotlin.ir.backend.js.ir.JsIrBuilder.buildBlockBody\n+import org.jetbrains.kotlin.ir.declarations.*\n+import org.jetbrains.kotlin.ir.declarations.impl.*\n+import org.jetbrains.kotlin.ir.expressions.*\n+import org.jetbrains.kotlin.ir.expressions.impl.*\n+import org.jetbrains.kotlin.ir.symbols.IrSimpleFunctionSymbol\n+import org.jetbrains.kotlin.ir.symbols.IrSymbol\n+import org.jetbrains.kotlin.ir.symbols.impl.*\n+import org.jetbrains.kotlin.ir.types.*\n+import org.jetbrains.kotlin.ir.util.*\n+import org.jetbrains.kotlin.ir.visitors.IrElementTransformerVoid\n+import java.lang.IllegalStateException\n+\n+private const val AFU_PKG = \"kotlinx/atomicfu\"\n+private const val LOCKS = \"locks\"\n+private const val ATOMIC_CONSTRUCTOR = \"atomic\"\n+private const val ATOMICFU_VALUE_TYPE = \"\"\"Atomic(Int|Long|Boolean|Ref)\"\"\"\n+private const val ATOMIC_ARRAY_TYPE = \"\"\"Atomic(Int|Long|Boolean|)Array\"\"\"\n+private const val ATOMIC_ARRAY_FACTORY_FUNCTION = \"atomicArrayOfNulls\"\n+private const val ATOMICFU_RUNTIME_FUNCTION_PREDICATE = \"atomicfu_\"\n+private const val REENTRANT_LOCK_TYPE = \"ReentrantLock\"\n+private const val GETTER = \"atomicfu\\$getter\"\n+private const val SETTER = \"atomicfu\\$setter\"\n+private const val GET = \"get\"\n+private const val SET = \"set\"\n+\n+private fun String.prettyStr() = replace('/', '.')\n+\n+class AtomicFUTransformer(override val context: IrPluginContext) : IrElementTransformerVoid(), TransformerHelpers {\n+\n+ private val irBuiltIns = context.irBuiltIns\n+\n+ private val AFU_CLASSES: Map = mapOf(\n+ \"AtomicInt\" to irBuiltIns.intType,\n+ \"AtomicLong\" to irBuiltIns.longType,\n+ \"AtomicRef\" to irBuiltIns.anyType,\n+ \"AtomicBoolean\" to irBuiltIns.booleanType\n+ )\n+\n+ private val AFU_ARRAY_CLASSES: Map = mapOf(\n+ \"AtomicIntArray\" to context.builtIns.getPrimitiveArrayClassDescriptor(PrimitiveType.INT),\n+ \"AtomicLongArray\" to context.builtIns.getPrimitiveArrayClassDescriptor(PrimitiveType.LONG),\n+ \"AtomicBooleanArray\" to context.builtIns.getPrimitiveArrayClassDescriptor(PrimitiveType.BOOLEAN),\n+ \"AtomicArray\" to context.builtIns.array\n+ )\n+\n+ override fun visitFile(irFile: IrFile): IrFile {\n+ irFile.declarations.map { declaration ->\n+ declaration.transformAtomicInlineDeclaration()\n+ }\n+ return super.visitFile(irFile)\n+ }\n+\n+ override fun visitClass(irClass: IrClass): IrStatement {\n+ irClass.declarations.map { declaration ->\n+ declaration.transformAtomicInlineDeclaration()\n+ }\n+ return super.visitClass(irClass)\n+ }\n+\n+ override fun visitProperty(property: IrProperty): IrStatement {\n+ if (property.backingField != null) {\n+ val backingField = property.backingField!!\n+ if (backingField.initializer != null) {\n+ val initializer = backingField.initializer!!.expression.transformAtomicValueInitializer()\n+ property.backingField!!.initializer = IrExpressionBodyImpl(initializer)\n+ }\n+ }\n+ return super.visitProperty(property)\n+ }\n+\n+ override fun visitBlockBody(body: IrBlockBody): IrBody {\n+ body.statements.forEachIndexed { i, stmt ->\n+ if (stmt is IrCall) {\n+ body.statements[i] = stmt.transformAtomicFunctionCall()\n+ }\n+ }\n+ return super.visitBlockBody(body)\n+ }\n+\n+ override fun visitBlock(block: IrBlock): IrExpression {\n+ block.statements.forEachIndexed { i, stmt ->\n+ if (stmt is IrCall) {\n+ block.statements[i] = stmt.transformAtomicFunctionCall()\n+ }\n+ }\n+ return super.visitBlock(block)\n+ }\n+\n+ override fun visitReturn(returnExpr: IrReturn): IrExpression {\n+ returnExpr.value = returnExpr.value.transformAtomicFunctionCall()\n+ return super.visitReturn(returnExpr)\n+ }\n+\n+ override fun visitTypeOperator(typeOp: IrTypeOperatorCall): IrExpression {\n+ typeOp.argument = typeOp.argument.transformAtomicFunctionCall()\n+ return super.visitTypeOperator(typeOp)\n+ }\n+\n+ override fun visitSetVariable(setVar: IrSetVariable): IrExpression {\n+ setVar.value = setVar.value.transformAtomicFunctionCall()\n+ return super.visitSetVariable(setVar)\n+ }\n+\n+ override fun visitSetField(setField: IrSetField): IrExpression {\n+ setField.value = setField.value.transformAtomicFunctionCall()\n+ return super.visitSetField(setField)\n+ }\n+\n+ override fun visitVariable(declaration: IrVariable): IrStatement {\n+ declaration.initializer = declaration.initializer?.transformAtomicFunctionCall()\n+ return super.visitVariable(declaration)\n+ }\n+\n+ override fun visitBranch(branch: IrBranch): IrBranch {\n+ branch.apply {\n+ condition = condition.transformAtomicFunctionCall()\n+ result = result.transformAtomicFunctionCall()\n+ }\n+ return super.visitBranch(branch)\n+ }\n+\n+ override fun visitElseBranch(branch: IrElseBranch): IrElseBranch {\n+ branch.apply {\n+ condition = condition.transformAtomicFunctionCall()\n+ result = result.transformAtomicFunctionCall()\n+ }\n+ return super.visitElseBranch(branch)\n+ }\n+\n+ private fun IrExpression.transformAtomicValueInitializer() =\n+ when {\n+ type.isAtomicValueType() -> getPureTypeValue()\n+ type.isAtomicArrayType() -> buildPureTypeArrayConstructor()\n+ type.isReentrantLockType() -> buildConstNull()\n+ else -> this\n+ }\n+\n+ private fun IrDeclaration.transformAtomicInlineDeclaration() {\n+ if (this is IrFunction &&\n+ isInline &&\n+ extensionReceiverParameter != null &&\n+ extensionReceiverParameter!!.type.isAtomicValueType()\n+ ) {\n+ val type = extensionReceiverParameter!!.type\n+ val valueType = type.atomicToValueType()\n+ val getterType = buildFunctionSimpleType(listOf(irBuiltIns.unitType, valueType))\n+ val setterType = buildFunctionSimpleType(listOf(valueType, irBuiltIns.unitType))\n+ val valueParametersCount = valueParameters.size\n+ val extendedValueParameters = mutableListOf().apply {\n+ addAll(valueParameters)\n+ add(buildValueParameter(GETTER, valueParametersCount, getterType))\n+ add(buildValueParameter(SETTER, valueParametersCount + 1, setterType))\n+ }\n+ this as IrSimpleFunction\n+ (descriptor as FunctionDescriptorImpl).initialize(", - "comment_created_at": "2020-06-01T18:33:53+00:00", - "comment_author": "romanart", - "comment_body": "What kind of failure do you see? I bet it could be easily fixed if you use wrapped descriptor", - "pr_file_module": null - }, - { - "comment_id": "435628343", - "repo_full_name": "JetBrains/kotlin", - "pr_number": 3415, - "pr_file": "plugins/atomicfu/atomicfu-compiler/src/org.jetbrains.kotlinx.atomicfu.compiler/extensions/AtomicFUTransformerJsIr.kt", - "discussion_id": "429178285", - "commented_code": "@@ -0,0 +1,473 @@\n+/*\n+ * Copyright 2010-2020 JetBrains s.r.o. and Kotlin Programming Language contributors.\n+ * Use of this source code is governed by the Apache 2.0 license that can be found in the license/LICENSE.txt file.\n+ */\n+\n+package org.jetbrains.kotlinx.atomicfu.compiler.extensions\n+\n+import org.jetbrains.kotlin.backend.common.deepCopyWithVariables\n+import org.jetbrains.kotlin.backend.common.extensions.IrPluginContext\n+import org.jetbrains.kotlin.builtins.PrimitiveType\n+import org.jetbrains.kotlin.descriptors.*\n+import org.jetbrains.kotlin.descriptors.impl.FunctionDescriptorImpl\n+import org.jetbrains.kotlin.ir.*\n+import org.jetbrains.kotlin.ir.backend.js.ir.JsIrBuilder.buildValueParameter\n+import org.jetbrains.kotlin.ir.backend.js.ir.JsIrBuilder.buildFunction\n+import org.jetbrains.kotlin.ir.backend.js.ir.JsIrBuilder.buildBlockBody\n+import org.jetbrains.kotlin.ir.declarations.*\n+import org.jetbrains.kotlin.ir.declarations.impl.*\n+import org.jetbrains.kotlin.ir.expressions.*\n+import org.jetbrains.kotlin.ir.expressions.impl.*\n+import org.jetbrains.kotlin.ir.symbols.IrSimpleFunctionSymbol\n+import org.jetbrains.kotlin.ir.symbols.IrSymbol\n+import org.jetbrains.kotlin.ir.symbols.impl.*\n+import org.jetbrains.kotlin.ir.types.*\n+import org.jetbrains.kotlin.ir.util.*\n+import org.jetbrains.kotlin.ir.visitors.IrElementTransformerVoid\n+import java.lang.IllegalStateException\n+\n+private const val AFU_PKG = \"kotlinx/atomicfu\"\n+private const val LOCKS = \"locks\"\n+private const val ATOMIC_CONSTRUCTOR = \"atomic\"\n+private const val ATOMICFU_VALUE_TYPE = \"\"\"Atomic(Int|Long|Boolean|Ref)\"\"\"\n+private const val ATOMIC_ARRAY_TYPE = \"\"\"Atomic(Int|Long|Boolean|)Array\"\"\"\n+private const val ATOMIC_ARRAY_FACTORY_FUNCTION = \"atomicArrayOfNulls\"\n+private const val ATOMICFU_RUNTIME_FUNCTION_PREDICATE = \"atomicfu_\"\n+private const val REENTRANT_LOCK_TYPE = \"ReentrantLock\"\n+private const val GETTER = \"atomicfu\\$getter\"\n+private const val SETTER = \"atomicfu\\$setter\"\n+private const val GET = \"get\"\n+private const val SET = \"set\"\n+\n+private fun String.prettyStr() = replace('/', '.')\n+\n+class AtomicFUTransformer(override val context: IrPluginContext) : IrElementTransformerVoid(), TransformerHelpers {\n+\n+ private val irBuiltIns = context.irBuiltIns\n+\n+ private val AFU_CLASSES: Map = mapOf(\n+ \"AtomicInt\" to irBuiltIns.intType,\n+ \"AtomicLong\" to irBuiltIns.longType,\n+ \"AtomicRef\" to irBuiltIns.anyType,\n+ \"AtomicBoolean\" to irBuiltIns.booleanType\n+ )\n+\n+ private val AFU_ARRAY_CLASSES: Map = mapOf(\n+ \"AtomicIntArray\" to context.builtIns.getPrimitiveArrayClassDescriptor(PrimitiveType.INT),\n+ \"AtomicLongArray\" to context.builtIns.getPrimitiveArrayClassDescriptor(PrimitiveType.LONG),\n+ \"AtomicBooleanArray\" to context.builtIns.getPrimitiveArrayClassDescriptor(PrimitiveType.BOOLEAN),\n+ \"AtomicArray\" to context.builtIns.array\n+ )\n+\n+ override fun visitFile(irFile: IrFile): IrFile {\n+ irFile.declarations.map { declaration ->\n+ declaration.transformAtomicInlineDeclaration()\n+ }\n+ return super.visitFile(irFile)\n+ }\n+\n+ override fun visitClass(irClass: IrClass): IrStatement {\n+ irClass.declarations.map { declaration ->\n+ declaration.transformAtomicInlineDeclaration()\n+ }\n+ return super.visitClass(irClass)\n+ }\n+\n+ override fun visitProperty(property: IrProperty): IrStatement {\n+ if (property.backingField != null) {\n+ val backingField = property.backingField!!\n+ if (backingField.initializer != null) {\n+ val initializer = backingField.initializer!!.expression.transformAtomicValueInitializer()\n+ property.backingField!!.initializer = IrExpressionBodyImpl(initializer)\n+ }\n+ }\n+ return super.visitProperty(property)\n+ }\n+\n+ override fun visitBlockBody(body: IrBlockBody): IrBody {\n+ body.statements.forEachIndexed { i, stmt ->\n+ if (stmt is IrCall) {\n+ body.statements[i] = stmt.transformAtomicFunctionCall()\n+ }\n+ }\n+ return super.visitBlockBody(body)\n+ }\n+\n+ override fun visitBlock(block: IrBlock): IrExpression {\n+ block.statements.forEachIndexed { i, stmt ->\n+ if (stmt is IrCall) {\n+ block.statements[i] = stmt.transformAtomicFunctionCall()\n+ }\n+ }\n+ return super.visitBlock(block)\n+ }\n+\n+ override fun visitReturn(returnExpr: IrReturn): IrExpression {\n+ returnExpr.value = returnExpr.value.transformAtomicFunctionCall()\n+ return super.visitReturn(returnExpr)\n+ }\n+\n+ override fun visitTypeOperator(typeOp: IrTypeOperatorCall): IrExpression {\n+ typeOp.argument = typeOp.argument.transformAtomicFunctionCall()\n+ return super.visitTypeOperator(typeOp)\n+ }\n+\n+ override fun visitSetVariable(setVar: IrSetVariable): IrExpression {\n+ setVar.value = setVar.value.transformAtomicFunctionCall()\n+ return super.visitSetVariable(setVar)\n+ }\n+\n+ override fun visitSetField(setField: IrSetField): IrExpression {\n+ setField.value = setField.value.transformAtomicFunctionCall()\n+ return super.visitSetField(setField)\n+ }\n+\n+ override fun visitVariable(declaration: IrVariable): IrStatement {\n+ declaration.initializer = declaration.initializer?.transformAtomicFunctionCall()\n+ return super.visitVariable(declaration)\n+ }\n+\n+ override fun visitBranch(branch: IrBranch): IrBranch {\n+ branch.apply {\n+ condition = condition.transformAtomicFunctionCall()\n+ result = result.transformAtomicFunctionCall()\n+ }\n+ return super.visitBranch(branch)\n+ }\n+\n+ override fun visitElseBranch(branch: IrElseBranch): IrElseBranch {\n+ branch.apply {\n+ condition = condition.transformAtomicFunctionCall()\n+ result = result.transformAtomicFunctionCall()\n+ }\n+ return super.visitElseBranch(branch)\n+ }\n+\n+ private fun IrExpression.transformAtomicValueInitializer() =\n+ when {\n+ type.isAtomicValueType() -> getPureTypeValue()\n+ type.isAtomicArrayType() -> buildPureTypeArrayConstructor()\n+ type.isReentrantLockType() -> buildConstNull()\n+ else -> this\n+ }\n+\n+ private fun IrDeclaration.transformAtomicInlineDeclaration() {\n+ if (this is IrFunction &&\n+ isInline &&\n+ extensionReceiverParameter != null &&\n+ extensionReceiverParameter!!.type.isAtomicValueType()\n+ ) {\n+ val type = extensionReceiverParameter!!.type\n+ val valueType = type.atomicToValueType()\n+ val getterType = buildFunctionSimpleType(listOf(irBuiltIns.unitType, valueType))\n+ val setterType = buildFunctionSimpleType(listOf(valueType, irBuiltIns.unitType))\n+ val valueParametersCount = valueParameters.size\n+ val extendedValueParameters = mutableListOf().apply {\n+ addAll(valueParameters)\n+ add(buildValueParameter(GETTER, valueParametersCount, getterType))\n+ add(buildValueParameter(SETTER, valueParametersCount + 1, setterType))\n+ }\n+ this as IrSimpleFunction\n+ (descriptor as FunctionDescriptorImpl).initialize(", - "comment_created_at": "2020-06-05T00:30:16+00:00", - "comment_author": "SokolovaMaria", - "comment_body": "I fixed this by creating new transformed declarations with `WrappedSimpleFunctionDescriptor` (as I can not just set a new descriptor).", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/kotlin-document-security-implementations.json b/_reviewers/kotlin-document-security-implementations.json new file mode 100644 index 0000000..a745d17 --- /dev/null +++ b/_reviewers/kotlin-document-security-implementations.json @@ -0,0 +1,72 @@ +[ + { + "discussion_id": "632319980", + "pr_number": 4383, + "pr_file": "libraries/scripting/dependencies-maven/src/kotlin/script/experimental/dependencies/maven/MavenDependenciesResolver.kt", + "created_at": "2021-05-14T06:43:17+00:00", + "commented_code": "val url = repositoryCoordinates.toRepositoryUrlOrNull()\n ?: return false.asSuccess()\n val repoId = repositoryCoordinates.string.replace(FORBIDDEN_CHARS, \"_\")\n val repo = RemoteRepository.Builder(repoId, \"default\", url.toString())\n if (repositoryCoordinates is MavenRepositoryCoordinates) {\n val username = repositoryCoordinates.username?.let(::tryResolveEnvironmentVariable)\n val password = repositoryCoordinates.password?.let(::tryResolveEnvironmentVariable)\n if (username != null) {\n val auth = AuthenticationBuilder().apply {\n addUsername(username)\n if (password != null) {\n addPassword(password)\n val repo = RemoteRepository.Builder(repoId, \"default\", url.toString()).apply {\n setAuthentication(\n AuthenticationBuilder().apply {\n with(options) {\n addUsername(username?.let(::tryResolveEnvironmentVariable))", + "repo_full_name": "JetBrains/kotlin", + "discussion_comments": [ + { + "comment_id": "632319980", + "repo_full_name": "JetBrains/kotlin", + "pr_number": 4383, + "pr_file": "libraries/scripting/dependencies-maven/src/kotlin/script/experimental/dependencies/maven/MavenDependenciesResolver.kt", + "discussion_id": "632319980", + "commented_code": "@@ -89,21 +90,22 @@ class MavenDependenciesResolver : ExternalDependenciesResolver {\n val url = repositoryCoordinates.toRepositoryUrlOrNull()\n ?: return false.asSuccess()\n val repoId = repositoryCoordinates.string.replace(FORBIDDEN_CHARS, \"_\")\n- val repo = RemoteRepository.Builder(repoId, \"default\", url.toString())\n- if (repositoryCoordinates is MavenRepositoryCoordinates) {\n- val username = repositoryCoordinates.username?.let(::tryResolveEnvironmentVariable)\n- val password = repositoryCoordinates.password?.let(::tryResolveEnvironmentVariable)\n- if (username != null) {\n- val auth = AuthenticationBuilder().apply {\n- addUsername(username)\n- if (password != null) {\n- addPassword(password)\n+ val repo = RemoteRepository.Builder(repoId, \"default\", url.toString()).apply {\n+ setAuthentication(\n+ AuthenticationBuilder().apply {\n+ with(options) {\n+ addUsername(username?.let(::tryResolveEnvironmentVariable))", + "comment_created_at": "2021-05-14T06:43:17+00:00", + "comment_author": "ligee", + "comment_body": "I suspect that the logic should be similar to the previous variant - first, we check if any option is set and only then add an auth for only one variant. (And if both are set, prefer the key/passphrase).", + "pr_file_module": null + }, + { + "comment_id": "632499749", + "repo_full_name": "JetBrains/kotlin", + "pr_number": 4383, + "pr_file": "libraries/scripting/dependencies-maven/src/kotlin/script/experimental/dependencies/maven/MavenDependenciesResolver.kt", + "discussion_id": "632319980", + "commented_code": "@@ -89,21 +90,22 @@ class MavenDependenciesResolver : ExternalDependenciesResolver {\n val url = repositoryCoordinates.toRepositoryUrlOrNull()\n ?: return false.asSuccess()\n val repoId = repositoryCoordinates.string.replace(FORBIDDEN_CHARS, \"_\")\n- val repo = RemoteRepository.Builder(repoId, \"default\", url.toString())\n- if (repositoryCoordinates is MavenRepositoryCoordinates) {\n- val username = repositoryCoordinates.username?.let(::tryResolveEnvironmentVariable)\n- val password = repositoryCoordinates.password?.let(::tryResolveEnvironmentVariable)\n- if (username != null) {\n- val auth = AuthenticationBuilder().apply {\n- addUsername(username)\n- if (password != null) {\n- addPassword(password)\n+ val repo = RemoteRepository.Builder(repoId, \"default\", url.toString()).apply {\n+ setAuthentication(\n+ AuthenticationBuilder().apply {\n+ with(options) {\n+ addUsername(username?.let(::tryResolveEnvironmentVariable))", + "comment_created_at": "2021-05-14T12:41:06+00:00", + "comment_author": "ileasile", + "comment_body": "I think that implemented approach is more in style of `org.eclipse.aether.repository.Authentication` interface: we simply put into the container all that we have, and consumers in the resolver internals decide what they will use. I think that this logic is more clear and more correct. Empty authorization doesn't differ (in logic) from null one, the cost here is insignificant", + "pr_file_module": null + }, + { + "comment_id": "632525443", + "repo_full_name": "JetBrains/kotlin", + "pr_number": 4383, + "pr_file": "libraries/scripting/dependencies-maven/src/kotlin/script/experimental/dependencies/maven/MavenDependenciesResolver.kt", + "discussion_id": "632319980", + "commented_code": "@@ -89,21 +90,22 @@ class MavenDependenciesResolver : ExternalDependenciesResolver {\n val url = repositoryCoordinates.toRepositoryUrlOrNull()\n ?: return false.asSuccess()\n val repoId = repositoryCoordinates.string.replace(FORBIDDEN_CHARS, \"_\")\n- val repo = RemoteRepository.Builder(repoId, \"default\", url.toString())\n- if (repositoryCoordinates is MavenRepositoryCoordinates) {\n- val username = repositoryCoordinates.username?.let(::tryResolveEnvironmentVariable)\n- val password = repositoryCoordinates.password?.let(::tryResolveEnvironmentVariable)\n- if (username != null) {\n- val auth = AuthenticationBuilder().apply {\n- addUsername(username)\n- if (password != null) {\n- addPassword(password)\n+ val repo = RemoteRepository.Builder(repoId, \"default\", url.toString()).apply {\n+ setAuthentication(\n+ AuthenticationBuilder().apply {\n+ with(options) {\n+ addUsername(username?.let(::tryResolveEnvironmentVariable))", + "comment_created_at": "2021-05-14T13:25:09+00:00", + "comment_author": "ligee", + "comment_body": "I do not see any \"style\" in the `org.eclipse.aether.repository.Authentication` that would suggest such implementation. Can you please show, what you mean? Or maybe it is documented somewhere?\r\nWithout some examples or explicit recommendations, I would consider this approach ambiguous, and therefore not desirable.", + "pr_file_module": null + }, + { + "comment_id": "632598089", + "repo_full_name": "JetBrains/kotlin", + "pr_number": 4383, + "pr_file": "libraries/scripting/dependencies-maven/src/kotlin/script/experimental/dependencies/maven/MavenDependenciesResolver.kt", + "discussion_id": "632319980", + "commented_code": "@@ -89,21 +90,22 @@ class MavenDependenciesResolver : ExternalDependenciesResolver {\n val url = repositoryCoordinates.toRepositoryUrlOrNull()\n ?: return false.asSuccess()\n val repoId = repositoryCoordinates.string.replace(FORBIDDEN_CHARS, \"_\")\n- val repo = RemoteRepository.Builder(repoId, \"default\", url.toString())\n- if (repositoryCoordinates is MavenRepositoryCoordinates) {\n- val username = repositoryCoordinates.username?.let(::tryResolveEnvironmentVariable)\n- val password = repositoryCoordinates.password?.let(::tryResolveEnvironmentVariable)\n- if (username != null) {\n- val auth = AuthenticationBuilder().apply {\n- addUsername(username)\n- if (password != null) {\n- addPassword(password)\n+ val repo = RemoteRepository.Builder(repoId, \"default\", url.toString()).apply {\n+ setAuthentication(\n+ AuthenticationBuilder().apply {\n+ with(options) {\n+ addUsername(username?.let(::tryResolveEnvironmentVariable))", + "comment_created_at": "2021-05-14T15:13:14+00:00", + "comment_author": "ileasile", + "comment_body": "Each Authentication populates AuthenticationContext in the `fill` method. `AuthenticationContext` has all the data in the raw map `org.eclipse.aether.repository.AuthenticationContext#authData`, and then each consumer uses it in its own way. I mean that all the logic about nullability and emptiness is on the lower level, these checks will be performed anyway.\r\nFor example, see how authorization is used in wagon transporter: it is first converted to another instance (`org.eclipse.aether.transport.wagon.WagonTransporter#getProxy`) and then these values are used with all the checks performed here: `org.apache.maven.wagon.shared.http.AbstractHttpClientWagon#openConnectionInternal`. I don't think we should care about it: some implementations may use password without username or username without password, it should not be solved here", + "pr_file_module": null + }, + { + "comment_id": "633291461", + "repo_full_name": "JetBrains/kotlin", + "pr_number": 4383, + "pr_file": "libraries/scripting/dependencies-maven/src/kotlin/script/experimental/dependencies/maven/MavenDependenciesResolver.kt", + "discussion_id": "632319980", + "commented_code": "@@ -89,21 +90,22 @@ class MavenDependenciesResolver : ExternalDependenciesResolver {\n val url = repositoryCoordinates.toRepositoryUrlOrNull()\n ?: return false.asSuccess()\n val repoId = repositoryCoordinates.string.replace(FORBIDDEN_CHARS, \"_\")\n- val repo = RemoteRepository.Builder(repoId, \"default\", url.toString())\n- if (repositoryCoordinates is MavenRepositoryCoordinates) {\n- val username = repositoryCoordinates.username?.let(::tryResolveEnvironmentVariable)\n- val password = repositoryCoordinates.password?.let(::tryResolveEnvironmentVariable)\n- if (username != null) {\n- val auth = AuthenticationBuilder().apply {\n- addUsername(username)\n- if (password != null) {\n- addPassword(password)\n+ val repo = RemoteRepository.Builder(repoId, \"default\", url.toString()).apply {\n+ setAuthentication(\n+ AuthenticationBuilder().apply {\n+ with(options) {\n+ addUsername(username?.let(::tryResolveEnvironmentVariable))", + "comment_created_at": "2021-05-17T07:38:52+00:00", + "comment_author": "ligee", + "comment_body": "Since it is a place that I stumbled upon, it may look suspicious to others in the future too. I'd suggest at least adding an appropriate comment with the link to implementation (and/or documentation) as an explanation.", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/kotlin-document-security-implementations.md b/_reviewers/kotlin-document-security-implementations.md index 2b6e41b..7f58772 100644 --- a/_reviewers/kotlin-document-security-implementations.md +++ b/_reviewers/kotlin-document-security-implementations.md @@ -29,77 +29,3 @@ setAuthentication( } ) ``` - - -[ - { - "discussion_id": "632319980", - "pr_number": 4383, - "pr_file": "libraries/scripting/dependencies-maven/src/kotlin/script/experimental/dependencies/maven/MavenDependenciesResolver.kt", - "created_at": "2021-05-14T06:43:17+00:00", - "commented_code": "val url = repositoryCoordinates.toRepositoryUrlOrNull()\n ?: return false.asSuccess()\n val repoId = repositoryCoordinates.string.replace(FORBIDDEN_CHARS, \"_\")\n val repo = RemoteRepository.Builder(repoId, \"default\", url.toString())\n if (repositoryCoordinates is MavenRepositoryCoordinates) {\n val username = repositoryCoordinates.username?.let(::tryResolveEnvironmentVariable)\n val password = repositoryCoordinates.password?.let(::tryResolveEnvironmentVariable)\n if (username != null) {\n val auth = AuthenticationBuilder().apply {\n addUsername(username)\n if (password != null) {\n addPassword(password)\n val repo = RemoteRepository.Builder(repoId, \"default\", url.toString()).apply {\n setAuthentication(\n AuthenticationBuilder().apply {\n with(options) {\n addUsername(username?.let(::tryResolveEnvironmentVariable))", - "repo_full_name": "JetBrains/kotlin", - "discussion_comments": [ - { - "comment_id": "632319980", - "repo_full_name": "JetBrains/kotlin", - "pr_number": 4383, - "pr_file": "libraries/scripting/dependencies-maven/src/kotlin/script/experimental/dependencies/maven/MavenDependenciesResolver.kt", - "discussion_id": "632319980", - "commented_code": "@@ -89,21 +90,22 @@ class MavenDependenciesResolver : ExternalDependenciesResolver {\n val url = repositoryCoordinates.toRepositoryUrlOrNull()\n ?: return false.asSuccess()\n val repoId = repositoryCoordinates.string.replace(FORBIDDEN_CHARS, \"_\")\n- val repo = RemoteRepository.Builder(repoId, \"default\", url.toString())\n- if (repositoryCoordinates is MavenRepositoryCoordinates) {\n- val username = repositoryCoordinates.username?.let(::tryResolveEnvironmentVariable)\n- val password = repositoryCoordinates.password?.let(::tryResolveEnvironmentVariable)\n- if (username != null) {\n- val auth = AuthenticationBuilder().apply {\n- addUsername(username)\n- if (password != null) {\n- addPassword(password)\n+ val repo = RemoteRepository.Builder(repoId, \"default\", url.toString()).apply {\n+ setAuthentication(\n+ AuthenticationBuilder().apply {\n+ with(options) {\n+ addUsername(username?.let(::tryResolveEnvironmentVariable))", - "comment_created_at": "2021-05-14T06:43:17+00:00", - "comment_author": "ligee", - "comment_body": "I suspect that the logic should be similar to the previous variant - first, we check if any option is set and only then add an auth for only one variant. (And if both are set, prefer the key/passphrase).", - "pr_file_module": null - }, - { - "comment_id": "632499749", - "repo_full_name": "JetBrains/kotlin", - "pr_number": 4383, - "pr_file": "libraries/scripting/dependencies-maven/src/kotlin/script/experimental/dependencies/maven/MavenDependenciesResolver.kt", - "discussion_id": "632319980", - "commented_code": "@@ -89,21 +90,22 @@ class MavenDependenciesResolver : ExternalDependenciesResolver {\n val url = repositoryCoordinates.toRepositoryUrlOrNull()\n ?: return false.asSuccess()\n val repoId = repositoryCoordinates.string.replace(FORBIDDEN_CHARS, \"_\")\n- val repo = RemoteRepository.Builder(repoId, \"default\", url.toString())\n- if (repositoryCoordinates is MavenRepositoryCoordinates) {\n- val username = repositoryCoordinates.username?.let(::tryResolveEnvironmentVariable)\n- val password = repositoryCoordinates.password?.let(::tryResolveEnvironmentVariable)\n- if (username != null) {\n- val auth = AuthenticationBuilder().apply {\n- addUsername(username)\n- if (password != null) {\n- addPassword(password)\n+ val repo = RemoteRepository.Builder(repoId, \"default\", url.toString()).apply {\n+ setAuthentication(\n+ AuthenticationBuilder().apply {\n+ with(options) {\n+ addUsername(username?.let(::tryResolveEnvironmentVariable))", - "comment_created_at": "2021-05-14T12:41:06+00:00", - "comment_author": "ileasile", - "comment_body": "I think that implemented approach is more in style of `org.eclipse.aether.repository.Authentication` interface: we simply put into the container all that we have, and consumers in the resolver internals decide what they will use. I think that this logic is more clear and more correct. Empty authorization doesn't differ (in logic) from null one, the cost here is insignificant", - "pr_file_module": null - }, - { - "comment_id": "632525443", - "repo_full_name": "JetBrains/kotlin", - "pr_number": 4383, - "pr_file": "libraries/scripting/dependencies-maven/src/kotlin/script/experimental/dependencies/maven/MavenDependenciesResolver.kt", - "discussion_id": "632319980", - "commented_code": "@@ -89,21 +90,22 @@ class MavenDependenciesResolver : ExternalDependenciesResolver {\n val url = repositoryCoordinates.toRepositoryUrlOrNull()\n ?: return false.asSuccess()\n val repoId = repositoryCoordinates.string.replace(FORBIDDEN_CHARS, \"_\")\n- val repo = RemoteRepository.Builder(repoId, \"default\", url.toString())\n- if (repositoryCoordinates is MavenRepositoryCoordinates) {\n- val username = repositoryCoordinates.username?.let(::tryResolveEnvironmentVariable)\n- val password = repositoryCoordinates.password?.let(::tryResolveEnvironmentVariable)\n- if (username != null) {\n- val auth = AuthenticationBuilder().apply {\n- addUsername(username)\n- if (password != null) {\n- addPassword(password)\n+ val repo = RemoteRepository.Builder(repoId, \"default\", url.toString()).apply {\n+ setAuthentication(\n+ AuthenticationBuilder().apply {\n+ with(options) {\n+ addUsername(username?.let(::tryResolveEnvironmentVariable))", - "comment_created_at": "2021-05-14T13:25:09+00:00", - "comment_author": "ligee", - "comment_body": "I do not see any \"style\" in the `org.eclipse.aether.repository.Authentication` that would suggest such implementation. Can you please show, what you mean? Or maybe it is documented somewhere?\r\nWithout some examples or explicit recommendations, I would consider this approach ambiguous, and therefore not desirable.", - "pr_file_module": null - }, - { - "comment_id": "632598089", - "repo_full_name": "JetBrains/kotlin", - "pr_number": 4383, - "pr_file": "libraries/scripting/dependencies-maven/src/kotlin/script/experimental/dependencies/maven/MavenDependenciesResolver.kt", - "discussion_id": "632319980", - "commented_code": "@@ -89,21 +90,22 @@ class MavenDependenciesResolver : ExternalDependenciesResolver {\n val url = repositoryCoordinates.toRepositoryUrlOrNull()\n ?: return false.asSuccess()\n val repoId = repositoryCoordinates.string.replace(FORBIDDEN_CHARS, \"_\")\n- val repo = RemoteRepository.Builder(repoId, \"default\", url.toString())\n- if (repositoryCoordinates is MavenRepositoryCoordinates) {\n- val username = repositoryCoordinates.username?.let(::tryResolveEnvironmentVariable)\n- val password = repositoryCoordinates.password?.let(::tryResolveEnvironmentVariable)\n- if (username != null) {\n- val auth = AuthenticationBuilder().apply {\n- addUsername(username)\n- if (password != null) {\n- addPassword(password)\n+ val repo = RemoteRepository.Builder(repoId, \"default\", url.toString()).apply {\n+ setAuthentication(\n+ AuthenticationBuilder().apply {\n+ with(options) {\n+ addUsername(username?.let(::tryResolveEnvironmentVariable))", - "comment_created_at": "2021-05-14T15:13:14+00:00", - "comment_author": "ileasile", - "comment_body": "Each Authentication populates AuthenticationContext in the `fill` method. `AuthenticationContext` has all the data in the raw map `org.eclipse.aether.repository.AuthenticationContext#authData`, and then each consumer uses it in its own way. I mean that all the logic about nullability and emptiness is on the lower level, these checks will be performed anyway.\r\nFor example, see how authorization is used in wagon transporter: it is first converted to another instance (`org.eclipse.aether.transport.wagon.WagonTransporter#getProxy`) and then these values are used with all the checks performed here: `org.apache.maven.wagon.shared.http.AbstractHttpClientWagon#openConnectionInternal`. I don't think we should care about it: some implementations may use password without username or username without password, it should not be solved here", - "pr_file_module": null - }, - { - "comment_id": "633291461", - "repo_full_name": "JetBrains/kotlin", - "pr_number": 4383, - "pr_file": "libraries/scripting/dependencies-maven/src/kotlin/script/experimental/dependencies/maven/MavenDependenciesResolver.kt", - "discussion_id": "632319980", - "commented_code": "@@ -89,21 +90,22 @@ class MavenDependenciesResolver : ExternalDependenciesResolver {\n val url = repositoryCoordinates.toRepositoryUrlOrNull()\n ?: return false.asSuccess()\n val repoId = repositoryCoordinates.string.replace(FORBIDDEN_CHARS, \"_\")\n- val repo = RemoteRepository.Builder(repoId, \"default\", url.toString())\n- if (repositoryCoordinates is MavenRepositoryCoordinates) {\n- val username = repositoryCoordinates.username?.let(::tryResolveEnvironmentVariable)\n- val password = repositoryCoordinates.password?.let(::tryResolveEnvironmentVariable)\n- if (username != null) {\n- val auth = AuthenticationBuilder().apply {\n- addUsername(username)\n- if (password != null) {\n- addPassword(password)\n+ val repo = RemoteRepository.Builder(repoId, \"default\", url.toString()).apply {\n+ setAuthentication(\n+ AuthenticationBuilder().apply {\n+ with(options) {\n+ addUsername(username?.let(::tryResolveEnvironmentVariable))", - "comment_created_at": "2021-05-17T07:38:52+00:00", - "comment_author": "ligee", - "comment_body": "Since it is a place that I stumbled upon, it may look suspicious to others in the future too. I'd suggest at least adding an appropriate comment with the link to implementation (and/or documentation) as an explanation.", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/kotlin-effective-api-samples.json b/_reviewers/kotlin-effective-api-samples.json new file mode 100644 index 0000000..9b07e9e --- /dev/null +++ b/_reviewers/kotlin-effective-api-samples.json @@ -0,0 +1,214 @@ +[ + { + "discussion_id": "1866663166", + "pr_number": 5368, + "pr_file": "libraries/tools/kotlin-stdlib-gen/src/templates/Elements.kt", + "created_at": "2024-12-02T21:42:42+00:00", + "commented_code": "} builder {\n inline(Inline.Only)\n doc { \"Returns the first ${f.element} matching the given [predicate], or `null` if no such ${f.element} was found.\" }\n sample(\"samples.collections.Collections.Elements.find\")\n specialFor(CharSequences) {\n sample(\"samples.text.Strings.find\")\n }\n specialFor(ArraysOfUnsigned) {", + "repo_full_name": "JetBrains/kotlin", + "discussion_comments": [ + { + "comment_id": "1866663166", + "repo_full_name": "JetBrains/kotlin", + "pr_number": 5368, + "pr_file": "libraries/tools/kotlin-stdlib-gen/src/templates/Elements.kt", + "discussion_id": "1866663166", + "commented_code": "@@ -607,7 +607,12 @@ object Elements : TemplateGroupBase() {\n } builder {\n inline(Inline.Only)\n doc { \"Returns the first ${f.element} matching the given [predicate], or `null` if no such ${f.element} was found.\" }\n- sample(\"samples.collections.Collections.Elements.find\")\n+ specialFor(CharSequences) {\n+ sample(\"samples.text.Strings.find\")\n+ }\n+ specialFor(ArraysOfUnsigned) {", + "comment_created_at": "2024-12-02T21:42:42+00:00", + "comment_author": "fzhinkin", + "comment_body": "The original `sample` call should be left as is, without any additional `specialFor` predicates, and the call generating a sample specialized for char sequences should be placed right after it (wrapped into a predicate):\r\n```\r\nsample(\"samples.collections.Collections.Elements.find\")\r\nspecialFor(CharSequences) {\r\n sample(\"samples.text.Strings.find\")\r\n}\r\n```", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1870204716", + "pr_number": 3977, + "pr_file": "libraries/stdlib/jvm/src/kotlin/text/StringsJVM.kt", + "created_at": "2024-12-04T19:59:04+00:00", + "commented_code": "* If [ignoreCase] is true, the result of `Char.uppercaseChar().lowercaseChar()` on each character is compared.\n *\n * @param ignoreCase `true` to ignore character case when comparing strings. By default `false`.\n * @sample samples.text.Strings.equals", + "repo_full_name": "JetBrains/kotlin", + "discussion_comments": [ + { + "comment_id": "1870204716", + "repo_full_name": "JetBrains/kotlin", + "pr_number": 3977, + "pr_file": "libraries/stdlib/jvm/src/kotlin/text/StringsJVM.kt", + "discussion_id": "1870204716", + "commented_code": "@@ -48,6 +48,7 @@ internal actual inline fun String.nativeLastIndexOf(str: String, fromIndex: Int)\n * If [ignoreCase] is true, the result of `Char.uppercaseChar().lowercaseChar()` on each character is compared.\n *\n * @param ignoreCase `true` to ignore character case when comparing strings. By default `false`.\n+ * @sample samples.text.Strings.equals", + "comment_created_at": "2024-12-04T19:59:04+00:00", + "comment_author": "fzhinkin", + "comment_body": "For all the actual declarations, samples need to be added to the corresponding expect declaration and all other actualizations.\r\n\r\nCheck `CharSequence?.contentEquals` as an example. ", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1870237260", + "pr_number": 3977, + "pr_file": "libraries/stdlib/samples/test/samples/text/strings.kt", + "created_at": "2024-12-04T20:26:49+00:00", + "commented_code": "assertPrints(mixedColor, \"brown&blue\")\n }\n\n @Sample\n fun toPattern() {\n val string = \"this is a regex\"\n val pattern = string.toPattern(1)\n assertPrints(pattern.pattern(), string)\n assertTrue(pattern.flags() == 1)\n }\n\n @Sample\n fun encodeToByteArray() {\n val str = \"Hello\"\n val byteArray = str.encodeToByteArray()\n assertPrints(byteArray.contentToString(), \"[72, 101, 108, 108, 111]\")\n assertPrints(byteArray.toString(Charsets.UTF_8), str)\n\n val byteArrayWithoutFirstLetter = str.encodeToByteArray(startIndex = 1, endIndex = str.length)", + "repo_full_name": "JetBrains/kotlin", + "discussion_comments": [ + { + "comment_id": "1870237260", + "repo_full_name": "JetBrains/kotlin", + "pr_number": 3977, + "pr_file": "libraries/stdlib/samples/test/samples/text/strings.kt", + "discussion_id": "1870237260", + "commented_code": "@@ -508,6 +508,104 @@ class Strings {\n assertPrints(mixedColor, \"brown&blue\")\n }\n \n+ @Sample\n+ fun toPattern() {\n+ val string = \"this is a regex\"\n+ val pattern = string.toPattern(1)\n+ assertPrints(pattern.pattern(), string)\n+ assertTrue(pattern.flags() == 1)\n+ }\n+\n+ @Sample\n+ fun encodeToByteArray() {\n+ val str = \"Hello\"\n+ val byteArray = str.encodeToByteArray()\n+ assertPrints(byteArray.contentToString(), \"[72, 101, 108, 108, 111]\")\n+ assertPrints(byteArray.toString(Charsets.UTF_8), str)\n+\n+ val byteArrayWithoutFirstLetter = str.encodeToByteArray(startIndex = 1, endIndex = str.length)", + "comment_created_at": "2024-12-04T20:26:49+00:00", + "comment_author": "fzhinkin", + "comment_body": "The sample is used for the different overload.", + "pr_file_module": null + }, + { + "comment_id": "1883936376", + "repo_full_name": "JetBrains/kotlin", + "pr_number": 3977, + "pr_file": "libraries/stdlib/samples/test/samples/text/strings.kt", + "discussion_id": "1870237260", + "commented_code": "@@ -508,6 +508,104 @@ class Strings {\n assertPrints(mixedColor, \"brown&blue\")\n }\n \n+ @Sample\n+ fun toPattern() {\n+ val string = \"this is a regex\"\n+ val pattern = string.toPattern(1)\n+ assertPrints(pattern.pattern(), string)\n+ assertTrue(pattern.flags() == 1)\n+ }\n+\n+ @Sample\n+ fun encodeToByteArray() {\n+ val str = \"Hello\"\n+ val byteArray = str.encodeToByteArray()\n+ assertPrints(byteArray.contentToString(), \"[72, 101, 108, 108, 111]\")\n+ assertPrints(byteArray.toString(Charsets.UTF_8), str)\n+\n+ val byteArrayWithoutFirstLetter = str.encodeToByteArray(startIndex = 1, endIndex = str.length)", + "comment_created_at": "2024-12-13T13:27:15+00:00", + "comment_author": "CharlesLgn", + "comment_body": "I do not understant what you are asking here 😕", + "pr_file_module": null + }, + { + "comment_id": "1883961726", + "repo_full_name": "JetBrains/kotlin", + "pr_number": 3977, + "pr_file": "libraries/stdlib/samples/test/samples/text/strings.kt", + "discussion_id": "1870237260", + "commented_code": "@@ -508,6 +508,104 @@ class Strings {\n assertPrints(mixedColor, \"brown&blue\")\n }\n \n+ @Sample\n+ fun toPattern() {\n+ val string = \"this is a regex\"\n+ val pattern = string.toPattern(1)\n+ assertPrints(pattern.pattern(), string)\n+ assertTrue(pattern.flags() == 1)\n+ }\n+\n+ @Sample\n+ fun encodeToByteArray() {\n+ val str = \"Hello\"\n+ val byteArray = str.encodeToByteArray()\n+ assertPrints(byteArray.contentToString(), \"[72, 101, 108, 108, 111]\")\n+ assertPrints(byteArray.toString(Charsets.UTF_8), str)\n+\n+ val byteArrayWithoutFirstLetter = str.encodeToByteArray(startIndex = 1, endIndex = str.length)", + "comment_created_at": "2024-12-13T13:47:00+00:00", + "comment_author": "fzhinkin", + "comment_body": "Sorry for the obscure comment. There are two `String.encodeToByteArray` overloads: one that does not accepts any parameters, and the one that does.\r\nThe sample showcases both these overloads, but the `@sample` tag referring the sample was placed only on one of those overloads.\r\n\r\nSo either the sample should not use the second overload, or the tag should be placed in KDoc for both of these functions.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1870241917", + "pr_number": 3977, + "pr_file": "libraries/stdlib/samples/test/samples/text/strings.kt", + "created_at": "2024-12-04T20:30:36+00:00", + "commented_code": "assertPrints(mixedColor, \"brown&blue\")\n }\n\n @Sample\n fun toPattern() {\n val string = \"this is a regex\"\n val pattern = string.toPattern(1)\n assertPrints(pattern.pattern(), string)\n assertTrue(pattern.flags() == 1)\n }\n\n @Sample\n fun encodeToByteArray() {\n val str = \"Hello\"\n val byteArray = str.encodeToByteArray()\n assertPrints(byteArray.contentToString(), \"[72, 101, 108, 108, 111]\")\n assertPrints(byteArray.toString(Charsets.UTF_8), str)\n\n val byteArrayWithoutFirstLetter = str.encodeToByteArray(startIndex = 1, endIndex = str.length)\n assertPrints(byteArrayWithoutFirstLetter.contentToString(), \"[101, 108, 108, 111]\")\n assertPrints(byteArrayWithoutFirstLetter.toString(Charsets.UTF_8), \"ello\")\n\n val byteArrayWithoutLastLetter = str.encodeToByteArray(startIndex = 0, endIndex = str.length - 1)\n assertPrints(byteArrayWithoutLastLetter.contentToString(), \"[72, 101, 108, 108]\")\n assertPrints(byteArrayWithoutLastLetter.toString(Charsets.UTF_8), \"Hell\")\n }\n\n @Sample\n fun subString() {\n val str = \"abcde\"\n assertPrints(str.substring(0), \"abcde\")\n assertPrints(str.substring(1), \"bcde\")\n assertFails { str.substring(6) }", + "repo_full_name": "JetBrains/kotlin", + "discussion_comments": [ + { + "comment_id": "1870241917", + "repo_full_name": "JetBrains/kotlin", + "pr_number": 3977, + "pr_file": "libraries/stdlib/samples/test/samples/text/strings.kt", + "discussion_id": "1870241917", + "commented_code": "@@ -508,6 +508,104 @@ class Strings {\n assertPrints(mixedColor, \"brown&blue\")\n }\n \n+ @Sample\n+ fun toPattern() {\n+ val string = \"this is a regex\"\n+ val pattern = string.toPattern(1)\n+ assertPrints(pattern.pattern(), string)\n+ assertTrue(pattern.flags() == 1)\n+ }\n+\n+ @Sample\n+ fun encodeToByteArray() {\n+ val str = \"Hello\"\n+ val byteArray = str.encodeToByteArray()\n+ assertPrints(byteArray.contentToString(), \"[72, 101, 108, 108, 111]\")\n+ assertPrints(byteArray.toString(Charsets.UTF_8), str)\n+\n+ val byteArrayWithoutFirstLetter = str.encodeToByteArray(startIndex = 1, endIndex = str.length)\n+ assertPrints(byteArrayWithoutFirstLetter.contentToString(), \"[101, 108, 108, 111]\")\n+ assertPrints(byteArrayWithoutFirstLetter.toString(Charsets.UTF_8), \"ello\")\n+\n+ val byteArrayWithoutLastLetter = str.encodeToByteArray(startIndex = 0, endIndex = str.length - 1)\n+ assertPrints(byteArrayWithoutLastLetter.contentToString(), \"[72, 101, 108, 108]\")\n+ assertPrints(byteArrayWithoutLastLetter.toString(Charsets.UTF_8), \"Hell\")\n+ }\n+\n+ @Sample\n+ fun subString() {\n+ val str = \"abcde\"\n+ assertPrints(str.substring(0), \"abcde\")\n+ assertPrints(str.substring(1), \"bcde\")\n+ assertFails { str.substring(6) }", + "comment_created_at": "2024-12-04T20:30:36+00:00", + "comment_author": "fzhinkin", + "comment_body": "For all `assertFails` it is better to add a comment describing the reason of the failure. Here, for example, it could be `// index exceeds string length`", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1870244939", + "pr_number": 3977, + "pr_file": "libraries/stdlib/samples/test/samples/text/strings.kt", + "created_at": "2024-12-04T20:33:48+00:00", + "commented_code": "assertPrints(mixedColor, \"brown&blue\")\n }\n\n @Sample\n fun toPattern() {\n val string = \"this is a regex\"\n val pattern = string.toPattern(1)\n assertPrints(pattern.pattern(), string)\n assertTrue(pattern.flags() == 1)\n }\n\n @Sample\n fun encodeToByteArray() {\n val str = \"Hello\"\n val byteArray = str.encodeToByteArray()\n assertPrints(byteArray.contentToString(), \"[72, 101, 108, 108, 111]\")\n assertPrints(byteArray.toString(Charsets.UTF_8), str)\n\n val byteArrayWithoutFirstLetter = str.encodeToByteArray(startIndex = 1, endIndex = str.length)\n assertPrints(byteArrayWithoutFirstLetter.contentToString(), \"[101, 108, 108, 111]\")\n assertPrints(byteArrayWithoutFirstLetter.toString(Charsets.UTF_8), \"ello\")\n\n val byteArrayWithoutLastLetter = str.encodeToByteArray(startIndex = 0, endIndex = str.length - 1)\n assertPrints(byteArrayWithoutLastLetter.contentToString(), \"[72, 101, 108, 108]\")\n assertPrints(byteArrayWithoutLastLetter.toString(Charsets.UTF_8), \"Hell\")\n }\n\n @Sample\n fun subString() {\n val str = \"abcde\"\n assertPrints(str.substring(0), \"abcde\")\n assertPrints(str.substring(1), \"bcde\")\n assertFails { str.substring(6) }\n assertPrints(str.substring(0, 0), \"\")", + "repo_full_name": "JetBrains/kotlin", + "discussion_comments": [ + { + "comment_id": "1870244939", + "repo_full_name": "JetBrains/kotlin", + "pr_number": 3977, + "pr_file": "libraries/stdlib/samples/test/samples/text/strings.kt", + "discussion_id": "1870244939", + "commented_code": "@@ -508,6 +508,104 @@ class Strings {\n assertPrints(mixedColor, \"brown&blue\")\n }\n \n+ @Sample\n+ fun toPattern() {\n+ val string = \"this is a regex\"\n+ val pattern = string.toPattern(1)\n+ assertPrints(pattern.pattern(), string)\n+ assertTrue(pattern.flags() == 1)\n+ }\n+\n+ @Sample\n+ fun encodeToByteArray() {\n+ val str = \"Hello\"\n+ val byteArray = str.encodeToByteArray()\n+ assertPrints(byteArray.contentToString(), \"[72, 101, 108, 108, 111]\")\n+ assertPrints(byteArray.toString(Charsets.UTF_8), str)\n+\n+ val byteArrayWithoutFirstLetter = str.encodeToByteArray(startIndex = 1, endIndex = str.length)\n+ assertPrints(byteArrayWithoutFirstLetter.contentToString(), \"[101, 108, 108, 111]\")\n+ assertPrints(byteArrayWithoutFirstLetter.toString(Charsets.UTF_8), \"ello\")\n+\n+ val byteArrayWithoutLastLetter = str.encodeToByteArray(startIndex = 0, endIndex = str.length - 1)\n+ assertPrints(byteArrayWithoutLastLetter.contentToString(), \"[72, 101, 108, 108]\")\n+ assertPrints(byteArrayWithoutLastLetter.toString(Charsets.UTF_8), \"Hell\")\n+ }\n+\n+ @Sample\n+ fun subString() {\n+ val str = \"abcde\"\n+ assertPrints(str.substring(0), \"abcde\")\n+ assertPrints(str.substring(1), \"bcde\")\n+ assertFails { str.substring(6) }\n+ assertPrints(str.substring(0, 0), \"\")", + "comment_created_at": "2024-12-04T20:33:48+00:00", + "comment_author": "fzhinkin", + "comment_body": "There are quite a few examples here, so it's better to split the function in two and provide samples dedicated to each particular `substring` overload.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1987938136", + "pr_number": 5411, + "pr_file": "libraries/stdlib/samples/test/samples/text/strings.kt", + "created_at": "2025-03-10T19:59:51+00:00", + "commented_code": "assertPrints(matchDetails(inputString, toFind, 10), \"Searching for 'ever' in 'Never ever give up' starting at position 10: Not found\")\n }\n\n @Sample\n fun lastIndexOf() {\n fun matchDetails(inputString: String, whatToFind: String, startIndex: Int = 0): String {\n val matchIndex = inputString.lastIndexOf(whatToFind, startIndex)\n return \"Searching for '$whatToFind' in '$inputString' starting at position $startIndex: \" +\n if (matchIndex >= 0) \"Found at $matchIndex\" else \"Not found\"\n }\n\n val inputString = \"Never ever give up\"\n val toFind = \"ever\"\n\n assertPrints(matchDetails(inputString, toFind), \"Searching for 'ever' in 'Never ever give up' starting at position 0: Found at 6\")\n assertPrints(matchDetails(inputString, toFind, 10), \"Searching for 'ever' in 'Never ever give up' starting at position 10: Not found\")\n }", + "repo_full_name": "JetBrains/kotlin", + "discussion_comments": [ + { + "comment_id": "1987938136", + "repo_full_name": "JetBrains/kotlin", + "pr_number": 5411, + "pr_file": "libraries/stdlib/samples/test/samples/text/strings.kt", + "discussion_id": "1987938136", + "commented_code": "@@ -444,6 +444,33 @@ class Strings {\n assertPrints(matchDetails(inputString, toFind, 10), \"Searching for 'ever' in 'Never ever give up' starting at position 10: Not found\")\n }\n \n+ @Sample\n+ fun lastIndexOf() {\n+ fun matchDetails(inputString: String, whatToFind: String, startIndex: Int = 0): String {\n+ val matchIndex = inputString.lastIndexOf(whatToFind, startIndex)\n+ return \"Searching for '$whatToFind' in '$inputString' starting at position $startIndex: \" +\n+ if (matchIndex >= 0) \"Found at $matchIndex\" else \"Not found\"\n+ }\n+\n+ val inputString = \"Never ever give up\"\n+ val toFind = \"ever\"\n+\n+ assertPrints(matchDetails(inputString, toFind), \"Searching for 'ever' in 'Never ever give up' starting at position 0: Found at 6\")\n+ assertPrints(matchDetails(inputString, toFind, 10), \"Searching for 'ever' in 'Never ever give up' starting at position 10: Not found\")\n+ }", + "comment_created_at": "2025-03-10T19:59:51+00:00", + "comment_author": "fzhinkin", + "comment_body": "Could you please also add an example with `startPosition = 5`?", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "764231663", + "pr_number": 4565, + "pr_file": "libraries/stdlib/samples/test/samples/text/strings.kt", + "created_at": "2021-12-07T17:48:04+00:00", + "commented_code": "assertPrints(mixedColor, \"brown&blue\")\n }\n\n @Sample\n fun all() {\n val name = \"fatima\"\n val containsWhiteSpace: Boolean = name.any{\n it.isWhitespace()\n }\n assertFalse(containsWhiteSpace)", + "repo_full_name": "JetBrains/kotlin", + "discussion_comments": [ + { + "comment_id": "764231663", + "repo_full_name": "JetBrains/kotlin", + "pr_number": 4565, + "pr_file": "libraries/stdlib/samples/test/samples/text/strings.kt", + "discussion_id": "764231663", + "commented_code": "@@ -507,4 +507,29 @@ class Strings {\n \n assertPrints(mixedColor, \"brown&blue\")\n }\n+\n+ @Sample\n+ fun all() {\n+ val name = \"fatima\"\n+ val containsWhiteSpace: Boolean = name.any{\n+ it.isWhitespace()\n+ }\n+ assertFalse(containsWhiteSpace)", + "comment_created_at": "2021-12-07T17:48:04+00:00", + "comment_author": "qurbonzoda", + "comment_body": "In samples we use `assertPrints(value, stringRepresentation)` which is converted to \r\n```\r\nprintln(value) // stringRepresentation\r\n```\r\nin Kotlin website. E.g. the `splitToSequence` sample above is converted to this runnable code in docs website: https://kotlinlang.org/api/latest/jvm/stdlib/kotlin.text/split-to-sequence.html", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1299179286", + "pr_number": 5169, + "pr_file": "libraries/tools/kotlin-stdlib-gen/src/templates/Ranges.kt", + "created_at": "2023-08-19T12:13:11+00:00", + "commented_code": "return TProgression.fromClosedRange(first, last, if (this.step > 0) step else -step)\n \"\"\"\n }\n sample(\"samples.ranges.Ranges.step${primitive!!.stepType}\")", + "repo_full_name": "JetBrains/kotlin", + "discussion_comments": [ + { + "comment_id": "1299179286", + "repo_full_name": "JetBrains/kotlin", + "pr_number": 5169, + "pr_file": "libraries/tools/kotlin-stdlib-gen/src/templates/Ranges.kt", + "discussion_id": "1299179286", + "commented_code": "@@ -76,6 +76,7 @@ object RangeOps : TemplateGroupBase() {\n return TProgression.fromClosedRange(first, last, if (this.step > 0) step else -step)\n \"\"\"\n }\n+ sample(\"samples.ranges.Ranges.step${primitive!!.stepType}\")", + "comment_created_at": "2023-08-19T12:13:11+00:00", + "comment_author": "ilya-g", + "comment_body": "Minor: please place it after `doc { }` block, because a sample is a part of function documentation. ", + "pr_file_module": null + }, + { + "comment_id": "1304693658", + "repo_full_name": "JetBrains/kotlin", + "pr_number": 5169, + "pr_file": "libraries/tools/kotlin-stdlib-gen/src/templates/Ranges.kt", + "discussion_id": "1299179286", + "commented_code": "@@ -76,6 +76,7 @@ object RangeOps : TemplateGroupBase() {\n return TProgression.fromClosedRange(first, last, if (this.step > 0) step else -step)\n \"\"\"\n }\n+ sample(\"samples.ranges.Ranges.step${primitive!!.stepType}\")", + "comment_created_at": "2023-08-24T18:07:19+00:00", + "comment_author": "AlexCue987", + "comment_body": "fixed", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/kotlin-effective-api-samples.md b/_reviewers/kotlin-effective-api-samples.md index e9d7473..f2e463d 100644 --- a/_reviewers/kotlin-effective-api-samples.md +++ b/_reviewers/kotlin-effective-api-samples.md @@ -72,219 +72,3 @@ Create clear, comprehensive, and properly structured code samples to document AP 5. **Ensure samples are properly referenced:** - Add `@sample` tags to all relevant function variants, including expect/actual declarations - For generated code, update samples by running appropriate generation commands - - -[ - { - "discussion_id": "1866663166", - "pr_number": 5368, - "pr_file": "libraries/tools/kotlin-stdlib-gen/src/templates/Elements.kt", - "created_at": "2024-12-02T21:42:42+00:00", - "commented_code": "} builder {\n inline(Inline.Only)\n doc { \"Returns the first ${f.element} matching the given [predicate], or `null` if no such ${f.element} was found.\" }\n sample(\"samples.collections.Collections.Elements.find\")\n specialFor(CharSequences) {\n sample(\"samples.text.Strings.find\")\n }\n specialFor(ArraysOfUnsigned) {", - "repo_full_name": "JetBrains/kotlin", - "discussion_comments": [ - { - "comment_id": "1866663166", - "repo_full_name": "JetBrains/kotlin", - "pr_number": 5368, - "pr_file": "libraries/tools/kotlin-stdlib-gen/src/templates/Elements.kt", - "discussion_id": "1866663166", - "commented_code": "@@ -607,7 +607,12 @@ object Elements : TemplateGroupBase() {\n } builder {\n inline(Inline.Only)\n doc { \"Returns the first ${f.element} matching the given [predicate], or `null` if no such ${f.element} was found.\" }\n- sample(\"samples.collections.Collections.Elements.find\")\n+ specialFor(CharSequences) {\n+ sample(\"samples.text.Strings.find\")\n+ }\n+ specialFor(ArraysOfUnsigned) {", - "comment_created_at": "2024-12-02T21:42:42+00:00", - "comment_author": "fzhinkin", - "comment_body": "The original `sample` call should be left as is, without any additional `specialFor` predicates, and the call generating a sample specialized for char sequences should be placed right after it (wrapped into a predicate):\r\n```\r\nsample(\"samples.collections.Collections.Elements.find\")\r\nspecialFor(CharSequences) {\r\n sample(\"samples.text.Strings.find\")\r\n}\r\n```", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1870204716", - "pr_number": 3977, - "pr_file": "libraries/stdlib/jvm/src/kotlin/text/StringsJVM.kt", - "created_at": "2024-12-04T19:59:04+00:00", - "commented_code": "* If [ignoreCase] is true, the result of `Char.uppercaseChar().lowercaseChar()` on each character is compared.\n *\n * @param ignoreCase `true` to ignore character case when comparing strings. By default `false`.\n * @sample samples.text.Strings.equals", - "repo_full_name": "JetBrains/kotlin", - "discussion_comments": [ - { - "comment_id": "1870204716", - "repo_full_name": "JetBrains/kotlin", - "pr_number": 3977, - "pr_file": "libraries/stdlib/jvm/src/kotlin/text/StringsJVM.kt", - "discussion_id": "1870204716", - "commented_code": "@@ -48,6 +48,7 @@ internal actual inline fun String.nativeLastIndexOf(str: String, fromIndex: Int)\n * If [ignoreCase] is true, the result of `Char.uppercaseChar().lowercaseChar()` on each character is compared.\n *\n * @param ignoreCase `true` to ignore character case when comparing strings. By default `false`.\n+ * @sample samples.text.Strings.equals", - "comment_created_at": "2024-12-04T19:59:04+00:00", - "comment_author": "fzhinkin", - "comment_body": "For all the actual declarations, samples need to be added to the corresponding expect declaration and all other actualizations.\r\n\r\nCheck `CharSequence?.contentEquals` as an example. ", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1870237260", - "pr_number": 3977, - "pr_file": "libraries/stdlib/samples/test/samples/text/strings.kt", - "created_at": "2024-12-04T20:26:49+00:00", - "commented_code": "assertPrints(mixedColor, \"brown&blue\")\n }\n\n @Sample\n fun toPattern() {\n val string = \"this is a regex\"\n val pattern = string.toPattern(1)\n assertPrints(pattern.pattern(), string)\n assertTrue(pattern.flags() == 1)\n }\n\n @Sample\n fun encodeToByteArray() {\n val str = \"Hello\"\n val byteArray = str.encodeToByteArray()\n assertPrints(byteArray.contentToString(), \"[72, 101, 108, 108, 111]\")\n assertPrints(byteArray.toString(Charsets.UTF_8), str)\n\n val byteArrayWithoutFirstLetter = str.encodeToByteArray(startIndex = 1, endIndex = str.length)", - "repo_full_name": "JetBrains/kotlin", - "discussion_comments": [ - { - "comment_id": "1870237260", - "repo_full_name": "JetBrains/kotlin", - "pr_number": 3977, - "pr_file": "libraries/stdlib/samples/test/samples/text/strings.kt", - "discussion_id": "1870237260", - "commented_code": "@@ -508,6 +508,104 @@ class Strings {\n assertPrints(mixedColor, \"brown&blue\")\n }\n \n+ @Sample\n+ fun toPattern() {\n+ val string = \"this is a regex\"\n+ val pattern = string.toPattern(1)\n+ assertPrints(pattern.pattern(), string)\n+ assertTrue(pattern.flags() == 1)\n+ }\n+\n+ @Sample\n+ fun encodeToByteArray() {\n+ val str = \"Hello\"\n+ val byteArray = str.encodeToByteArray()\n+ assertPrints(byteArray.contentToString(), \"[72, 101, 108, 108, 111]\")\n+ assertPrints(byteArray.toString(Charsets.UTF_8), str)\n+\n+ val byteArrayWithoutFirstLetter = str.encodeToByteArray(startIndex = 1, endIndex = str.length)", - "comment_created_at": "2024-12-04T20:26:49+00:00", - "comment_author": "fzhinkin", - "comment_body": "The sample is used for the different overload.", - "pr_file_module": null - }, - { - "comment_id": "1883936376", - "repo_full_name": "JetBrains/kotlin", - "pr_number": 3977, - "pr_file": "libraries/stdlib/samples/test/samples/text/strings.kt", - "discussion_id": "1870237260", - "commented_code": "@@ -508,6 +508,104 @@ class Strings {\n assertPrints(mixedColor, \"brown&blue\")\n }\n \n+ @Sample\n+ fun toPattern() {\n+ val string = \"this is a regex\"\n+ val pattern = string.toPattern(1)\n+ assertPrints(pattern.pattern(), string)\n+ assertTrue(pattern.flags() == 1)\n+ }\n+\n+ @Sample\n+ fun encodeToByteArray() {\n+ val str = \"Hello\"\n+ val byteArray = str.encodeToByteArray()\n+ assertPrints(byteArray.contentToString(), \"[72, 101, 108, 108, 111]\")\n+ assertPrints(byteArray.toString(Charsets.UTF_8), str)\n+\n+ val byteArrayWithoutFirstLetter = str.encodeToByteArray(startIndex = 1, endIndex = str.length)", - "comment_created_at": "2024-12-13T13:27:15+00:00", - "comment_author": "CharlesLgn", - "comment_body": "I do not understant what you are asking here \ud83d\ude15", - "pr_file_module": null - }, - { - "comment_id": "1883961726", - "repo_full_name": "JetBrains/kotlin", - "pr_number": 3977, - "pr_file": "libraries/stdlib/samples/test/samples/text/strings.kt", - "discussion_id": "1870237260", - "commented_code": "@@ -508,6 +508,104 @@ class Strings {\n assertPrints(mixedColor, \"brown&blue\")\n }\n \n+ @Sample\n+ fun toPattern() {\n+ val string = \"this is a regex\"\n+ val pattern = string.toPattern(1)\n+ assertPrints(pattern.pattern(), string)\n+ assertTrue(pattern.flags() == 1)\n+ }\n+\n+ @Sample\n+ fun encodeToByteArray() {\n+ val str = \"Hello\"\n+ val byteArray = str.encodeToByteArray()\n+ assertPrints(byteArray.contentToString(), \"[72, 101, 108, 108, 111]\")\n+ assertPrints(byteArray.toString(Charsets.UTF_8), str)\n+\n+ val byteArrayWithoutFirstLetter = str.encodeToByteArray(startIndex = 1, endIndex = str.length)", - "comment_created_at": "2024-12-13T13:47:00+00:00", - "comment_author": "fzhinkin", - "comment_body": "Sorry for the obscure comment. There are two `String.encodeToByteArray` overloads: one that does not accepts any parameters, and the one that does.\r\nThe sample showcases both these overloads, but the `@sample` tag referring the sample was placed only on one of those overloads.\r\n\r\nSo either the sample should not use the second overload, or the tag should be placed in KDoc for both of these functions.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1870241917", - "pr_number": 3977, - "pr_file": "libraries/stdlib/samples/test/samples/text/strings.kt", - "created_at": "2024-12-04T20:30:36+00:00", - "commented_code": "assertPrints(mixedColor, \"brown&blue\")\n }\n\n @Sample\n fun toPattern() {\n val string = \"this is a regex\"\n val pattern = string.toPattern(1)\n assertPrints(pattern.pattern(), string)\n assertTrue(pattern.flags() == 1)\n }\n\n @Sample\n fun encodeToByteArray() {\n val str = \"Hello\"\n val byteArray = str.encodeToByteArray()\n assertPrints(byteArray.contentToString(), \"[72, 101, 108, 108, 111]\")\n assertPrints(byteArray.toString(Charsets.UTF_8), str)\n\n val byteArrayWithoutFirstLetter = str.encodeToByteArray(startIndex = 1, endIndex = str.length)\n assertPrints(byteArrayWithoutFirstLetter.contentToString(), \"[101, 108, 108, 111]\")\n assertPrints(byteArrayWithoutFirstLetter.toString(Charsets.UTF_8), \"ello\")\n\n val byteArrayWithoutLastLetter = str.encodeToByteArray(startIndex = 0, endIndex = str.length - 1)\n assertPrints(byteArrayWithoutLastLetter.contentToString(), \"[72, 101, 108, 108]\")\n assertPrints(byteArrayWithoutLastLetter.toString(Charsets.UTF_8), \"Hell\")\n }\n\n @Sample\n fun subString() {\n val str = \"abcde\"\n assertPrints(str.substring(0), \"abcde\")\n assertPrints(str.substring(1), \"bcde\")\n assertFails { str.substring(6) }", - "repo_full_name": "JetBrains/kotlin", - "discussion_comments": [ - { - "comment_id": "1870241917", - "repo_full_name": "JetBrains/kotlin", - "pr_number": 3977, - "pr_file": "libraries/stdlib/samples/test/samples/text/strings.kt", - "discussion_id": "1870241917", - "commented_code": "@@ -508,6 +508,104 @@ class Strings {\n assertPrints(mixedColor, \"brown&blue\")\n }\n \n+ @Sample\n+ fun toPattern() {\n+ val string = \"this is a regex\"\n+ val pattern = string.toPattern(1)\n+ assertPrints(pattern.pattern(), string)\n+ assertTrue(pattern.flags() == 1)\n+ }\n+\n+ @Sample\n+ fun encodeToByteArray() {\n+ val str = \"Hello\"\n+ val byteArray = str.encodeToByteArray()\n+ assertPrints(byteArray.contentToString(), \"[72, 101, 108, 108, 111]\")\n+ assertPrints(byteArray.toString(Charsets.UTF_8), str)\n+\n+ val byteArrayWithoutFirstLetter = str.encodeToByteArray(startIndex = 1, endIndex = str.length)\n+ assertPrints(byteArrayWithoutFirstLetter.contentToString(), \"[101, 108, 108, 111]\")\n+ assertPrints(byteArrayWithoutFirstLetter.toString(Charsets.UTF_8), \"ello\")\n+\n+ val byteArrayWithoutLastLetter = str.encodeToByteArray(startIndex = 0, endIndex = str.length - 1)\n+ assertPrints(byteArrayWithoutLastLetter.contentToString(), \"[72, 101, 108, 108]\")\n+ assertPrints(byteArrayWithoutLastLetter.toString(Charsets.UTF_8), \"Hell\")\n+ }\n+\n+ @Sample\n+ fun subString() {\n+ val str = \"abcde\"\n+ assertPrints(str.substring(0), \"abcde\")\n+ assertPrints(str.substring(1), \"bcde\")\n+ assertFails { str.substring(6) }", - "comment_created_at": "2024-12-04T20:30:36+00:00", - "comment_author": "fzhinkin", - "comment_body": "For all `assertFails` it is better to add a comment describing the reason of the failure. Here, for example, it could be `// index exceeds string length`", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1870244939", - "pr_number": 3977, - "pr_file": "libraries/stdlib/samples/test/samples/text/strings.kt", - "created_at": "2024-12-04T20:33:48+00:00", - "commented_code": "assertPrints(mixedColor, \"brown&blue\")\n }\n\n @Sample\n fun toPattern() {\n val string = \"this is a regex\"\n val pattern = string.toPattern(1)\n assertPrints(pattern.pattern(), string)\n assertTrue(pattern.flags() == 1)\n }\n\n @Sample\n fun encodeToByteArray() {\n val str = \"Hello\"\n val byteArray = str.encodeToByteArray()\n assertPrints(byteArray.contentToString(), \"[72, 101, 108, 108, 111]\")\n assertPrints(byteArray.toString(Charsets.UTF_8), str)\n\n val byteArrayWithoutFirstLetter = str.encodeToByteArray(startIndex = 1, endIndex = str.length)\n assertPrints(byteArrayWithoutFirstLetter.contentToString(), \"[101, 108, 108, 111]\")\n assertPrints(byteArrayWithoutFirstLetter.toString(Charsets.UTF_8), \"ello\")\n\n val byteArrayWithoutLastLetter = str.encodeToByteArray(startIndex = 0, endIndex = str.length - 1)\n assertPrints(byteArrayWithoutLastLetter.contentToString(), \"[72, 101, 108, 108]\")\n assertPrints(byteArrayWithoutLastLetter.toString(Charsets.UTF_8), \"Hell\")\n }\n\n @Sample\n fun subString() {\n val str = \"abcde\"\n assertPrints(str.substring(0), \"abcde\")\n assertPrints(str.substring(1), \"bcde\")\n assertFails { str.substring(6) }\n assertPrints(str.substring(0, 0), \"\")", - "repo_full_name": "JetBrains/kotlin", - "discussion_comments": [ - { - "comment_id": "1870244939", - "repo_full_name": "JetBrains/kotlin", - "pr_number": 3977, - "pr_file": "libraries/stdlib/samples/test/samples/text/strings.kt", - "discussion_id": "1870244939", - "commented_code": "@@ -508,6 +508,104 @@ class Strings {\n assertPrints(mixedColor, \"brown&blue\")\n }\n \n+ @Sample\n+ fun toPattern() {\n+ val string = \"this is a regex\"\n+ val pattern = string.toPattern(1)\n+ assertPrints(pattern.pattern(), string)\n+ assertTrue(pattern.flags() == 1)\n+ }\n+\n+ @Sample\n+ fun encodeToByteArray() {\n+ val str = \"Hello\"\n+ val byteArray = str.encodeToByteArray()\n+ assertPrints(byteArray.contentToString(), \"[72, 101, 108, 108, 111]\")\n+ assertPrints(byteArray.toString(Charsets.UTF_8), str)\n+\n+ val byteArrayWithoutFirstLetter = str.encodeToByteArray(startIndex = 1, endIndex = str.length)\n+ assertPrints(byteArrayWithoutFirstLetter.contentToString(), \"[101, 108, 108, 111]\")\n+ assertPrints(byteArrayWithoutFirstLetter.toString(Charsets.UTF_8), \"ello\")\n+\n+ val byteArrayWithoutLastLetter = str.encodeToByteArray(startIndex = 0, endIndex = str.length - 1)\n+ assertPrints(byteArrayWithoutLastLetter.contentToString(), \"[72, 101, 108, 108]\")\n+ assertPrints(byteArrayWithoutLastLetter.toString(Charsets.UTF_8), \"Hell\")\n+ }\n+\n+ @Sample\n+ fun subString() {\n+ val str = \"abcde\"\n+ assertPrints(str.substring(0), \"abcde\")\n+ assertPrints(str.substring(1), \"bcde\")\n+ assertFails { str.substring(6) }\n+ assertPrints(str.substring(0, 0), \"\")", - "comment_created_at": "2024-12-04T20:33:48+00:00", - "comment_author": "fzhinkin", - "comment_body": "There are quite a few examples here, so it's better to split the function in two and provide samples dedicated to each particular `substring` overload.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1987938136", - "pr_number": 5411, - "pr_file": "libraries/stdlib/samples/test/samples/text/strings.kt", - "created_at": "2025-03-10T19:59:51+00:00", - "commented_code": "assertPrints(matchDetails(inputString, toFind, 10), \"Searching for 'ever' in 'Never ever give up' starting at position 10: Not found\")\n }\n\n @Sample\n fun lastIndexOf() {\n fun matchDetails(inputString: String, whatToFind: String, startIndex: Int = 0): String {\n val matchIndex = inputString.lastIndexOf(whatToFind, startIndex)\n return \"Searching for '$whatToFind' in '$inputString' starting at position $startIndex: \" +\n if (matchIndex >= 0) \"Found at $matchIndex\" else \"Not found\"\n }\n\n val inputString = \"Never ever give up\"\n val toFind = \"ever\"\n\n assertPrints(matchDetails(inputString, toFind), \"Searching for 'ever' in 'Never ever give up' starting at position 0: Found at 6\")\n assertPrints(matchDetails(inputString, toFind, 10), \"Searching for 'ever' in 'Never ever give up' starting at position 10: Not found\")\n }", - "repo_full_name": "JetBrains/kotlin", - "discussion_comments": [ - { - "comment_id": "1987938136", - "repo_full_name": "JetBrains/kotlin", - "pr_number": 5411, - "pr_file": "libraries/stdlib/samples/test/samples/text/strings.kt", - "discussion_id": "1987938136", - "commented_code": "@@ -444,6 +444,33 @@ class Strings {\n assertPrints(matchDetails(inputString, toFind, 10), \"Searching for 'ever' in 'Never ever give up' starting at position 10: Not found\")\n }\n \n+ @Sample\n+ fun lastIndexOf() {\n+ fun matchDetails(inputString: String, whatToFind: String, startIndex: Int = 0): String {\n+ val matchIndex = inputString.lastIndexOf(whatToFind, startIndex)\n+ return \"Searching for '$whatToFind' in '$inputString' starting at position $startIndex: \" +\n+ if (matchIndex >= 0) \"Found at $matchIndex\" else \"Not found\"\n+ }\n+\n+ val inputString = \"Never ever give up\"\n+ val toFind = \"ever\"\n+\n+ assertPrints(matchDetails(inputString, toFind), \"Searching for 'ever' in 'Never ever give up' starting at position 0: Found at 6\")\n+ assertPrints(matchDetails(inputString, toFind, 10), \"Searching for 'ever' in 'Never ever give up' starting at position 10: Not found\")\n+ }", - "comment_created_at": "2025-03-10T19:59:51+00:00", - "comment_author": "fzhinkin", - "comment_body": "Could you please also add an example with `startPosition = 5`?", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "764231663", - "pr_number": 4565, - "pr_file": "libraries/stdlib/samples/test/samples/text/strings.kt", - "created_at": "2021-12-07T17:48:04+00:00", - "commented_code": "assertPrints(mixedColor, \"brown&blue\")\n }\n\n @Sample\n fun all() {\n val name = \"fatima\"\n val containsWhiteSpace: Boolean = name.any{\n it.isWhitespace()\n }\n assertFalse(containsWhiteSpace)", - "repo_full_name": "JetBrains/kotlin", - "discussion_comments": [ - { - "comment_id": "764231663", - "repo_full_name": "JetBrains/kotlin", - "pr_number": 4565, - "pr_file": "libraries/stdlib/samples/test/samples/text/strings.kt", - "discussion_id": "764231663", - "commented_code": "@@ -507,4 +507,29 @@ class Strings {\n \n assertPrints(mixedColor, \"brown&blue\")\n }\n+\n+ @Sample\n+ fun all() {\n+ val name = \"fatima\"\n+ val containsWhiteSpace: Boolean = name.any{\n+ it.isWhitespace()\n+ }\n+ assertFalse(containsWhiteSpace)", - "comment_created_at": "2021-12-07T17:48:04+00:00", - "comment_author": "qurbonzoda", - "comment_body": "In samples we use `assertPrints(value, stringRepresentation)` which is converted to \r\n```\r\nprintln(value) // stringRepresentation\r\n```\r\nin Kotlin website. E.g. the `splitToSequence` sample above is converted to this runnable code in docs website: https://kotlinlang.org/api/latest/jvm/stdlib/kotlin.text/split-to-sequence.html", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1299179286", - "pr_number": 5169, - "pr_file": "libraries/tools/kotlin-stdlib-gen/src/templates/Ranges.kt", - "created_at": "2023-08-19T12:13:11+00:00", - "commented_code": "return TProgression.fromClosedRange(first, last, if (this.step > 0) step else -step)\n \"\"\"\n }\n sample(\"samples.ranges.Ranges.step${primitive!!.stepType}\")", - "repo_full_name": "JetBrains/kotlin", - "discussion_comments": [ - { - "comment_id": "1299179286", - "repo_full_name": "JetBrains/kotlin", - "pr_number": 5169, - "pr_file": "libraries/tools/kotlin-stdlib-gen/src/templates/Ranges.kt", - "discussion_id": "1299179286", - "commented_code": "@@ -76,6 +76,7 @@ object RangeOps : TemplateGroupBase() {\n return TProgression.fromClosedRange(first, last, if (this.step > 0) step else -step)\n \"\"\"\n }\n+ sample(\"samples.ranges.Ranges.step${primitive!!.stepType}\")", - "comment_created_at": "2023-08-19T12:13:11+00:00", - "comment_author": "ilya-g", - "comment_body": "Minor: please place it after `doc { }` block, because a sample is a part of function documentation. ", - "pr_file_module": null - }, - { - "comment_id": "1304693658", - "repo_full_name": "JetBrains/kotlin", - "pr_number": 5169, - "pr_file": "libraries/tools/kotlin-stdlib-gen/src/templates/Ranges.kt", - "discussion_id": "1299179286", - "commented_code": "@@ -76,6 +76,7 @@ object RangeOps : TemplateGroupBase() {\n return TProgression.fromClosedRange(first, last, if (this.step > 0) step else -step)\n \"\"\"\n }\n+ sample(\"samples.ranges.Ranges.step${primitive!!.stepType}\")", - "comment_created_at": "2023-08-24T18:07:19+00:00", - "comment_author": "AlexCue987", - "comment_body": "fixed", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/kotlin-keep-code-clearly-organized.json b/_reviewers/kotlin-keep-code-clearly-organized.json new file mode 100644 index 0000000..ae6fe24 --- /dev/null +++ b/_reviewers/kotlin-keep-code-clearly-organized.json @@ -0,0 +1,168 @@ +[ + { + "discussion_id": "1809231585", + "pr_number": 5364, + "pr_file": "libraries/stdlib/jvm/src/kotlin/text/StringNumberConversionsJVM.kt", + "created_at": "2024-10-21T17:37:16+00:00", + "commented_code": "public fun String.toBigDecimalOrNull(mathContext: java.math.MathContext): java.math.BigDecimal? =\n screenFloatValue(this) { it.toBigDecimal(mathContext) }\n\n/**\n * Recommended floating point number validation RegEx from the javadoc of `java.lang.Double.valueOf(String)`\n */\nprivate object ScreenFloatValueRegEx {\n @JvmField val value = run {\n val Digits = \"(\\\\p{Digit}+)\"\n val HexDigits = \"(\\\\p{XDigit}+)\"\n val Exp = \"[eE][+-]?$Digits\"\n\n val HexString = \"(0[xX]$HexDigits(\\\\.)?)|\" + // 0[xX] HexDigits ._opt BinaryExponent FloatTypeSuffix_opt\n \"(0[xX]$HexDigits?(\\\\.)$HexDigits)\" // 0[xX] HexDigits_opt . HexDigits BinaryExponent FloatTypeSuffix_opt\n\n val Number = \"($Digits(\\\\.)?($Digits?)($Exp)?)|\" + // Digits ._opt Digits_opt ExponentPart_opt FloatTypeSuffix_opt\n \"(\\\\.($Digits)($Exp)?)|\" + // . Digits ExponentPart_opt FloatTypeSuffix_opt\n \"(($HexString)[pP][+-]?$Digits)\" // HexSignificand BinaryExponent\n\n val fpRegex = \"[\\\\x00-\\\\x20]*[+-]?(NaN|Infinity|(($Number)[fFdD]?))[\\\\x00-\\\\x20]*\"\n\n Regex(fpRegex)\n }\n}\n\nprivate inline fun screenFloatValue(str: String, parse: (String) -> T): T? {\n return try {\n if (ScreenFloatValueRegEx.value.matches(str))\n if (isValidFloat(str))\n parse(str)\n else\n null\n } catch (e: NumberFormatException) { // overflow\n } catch (_: NumberFormatException) { // overflow\n null\n }\n}\n\nprivate const val LengthOfNaN = 2 // \"NaN\".length - 1\nprivate const val LengthOfInfinity = 7 // \"Infinity\".length - 1\n\nprivate fun isValidFloat(s: String): Boolean {\n // A float can have one of two representations:\n //\n // 1. Standard:\n // - With an integer part only: 1234\n // - With integer and fractional parts: 1234.4678\n // - With a fractional part only: .4678\n //\n // Optional sign prefix: + or -\n // Optional signed exponent: e or E, followed by optionally signed digits (+12, -12, 12)\n // Optional suffix: f, F, d, or D (for instance 12.34f or .34D)\n //\n // 2. Hexadecimal:\n // - With an integer part only: 0x12ab\n // - With integer and fractional parts: 0x12ab.CD78\n // - With a fractional part only: 0x.CD78\n //\n // Mandatory signed exponent: p or P, followed by optionally signed digits (+12, -12, 12)\n //\n // Optional sign prefix: + or -\n // Optional suffix: f, F, d, or D (for instance 12.34f or .34D)\n //\n // Two special cases:\n // \"NaN\" and \"Infinity\" strings, can have an optional sign prefix (+ or -)\n //\n // Implementation notes:\n // - The pattern \"myChar.code or 0x20 == 'x'.code\" is used to perform a case-insensitive\n // comparison of a character. Adding the 0x20 bit turns an upper case ASCII letter into\n // a lower case one.\n\n var start = 0\n var end = s.length - 1\n\n // Skip leading spaces\n while (start <= end && s[start].code <= 0x20) start++\n\n // Empty/whitespace string\n if (start > end) return false\n\n // Skip trailing spaces\n while (end > start && s[end].code <= 0x20) end--\n\n // Number starts with a positive or negative sign\n if (s[start] == '+' || s[start] == '-') start++\n // If we have nothing after the sign, the string is invalid\n if (start > end) return false\n\n var hasIntegerPart: Boolean\n var hasFractionalPart = false\n var isHex = false\n\n // Might be a hex string\n if (s[start] == '0') {\n start++\n // A \"0\" on its own is valid\n if (start > end) return true\n\n // Test for [xX] to see if we truly have a hex string\n if (s[start].code or 0x20 == 'x'.code) {", + "repo_full_name": "JetBrains/kotlin", + "discussion_comments": [ + { + "comment_id": "1809231585", + "repo_full_name": "JetBrains/kotlin", + "pr_number": 5364, + "pr_file": "libraries/stdlib/jvm/src/kotlin/text/StringNumberConversionsJVM.kt", + "discussion_id": "1809231585", + "commented_code": "@@ -239,35 +239,215 @@ public fun String.toBigDecimalOrNull(): java.math.BigDecimal? =\n public fun String.toBigDecimalOrNull(mathContext: java.math.MathContext): java.math.BigDecimal? =\n screenFloatValue(this) { it.toBigDecimal(mathContext) }\n \n-/**\n- * Recommended floating point number validation RegEx from the javadoc of `java.lang.Double.valueOf(String)`\n- */\n-private object ScreenFloatValueRegEx {\n- @JvmField val value = run {\n- val Digits = \"(\\\\p{Digit}+)\"\n- val HexDigits = \"(\\\\p{XDigit}+)\"\n- val Exp = \"[eE][+-]?$Digits\"\n-\n- val HexString = \"(0[xX]$HexDigits(\\\\.)?)|\" + // 0[xX] HexDigits ._opt BinaryExponent FloatTypeSuffix_opt\n- \"(0[xX]$HexDigits?(\\\\.)$HexDigits)\" // 0[xX] HexDigits_opt . HexDigits BinaryExponent FloatTypeSuffix_opt\n-\n- val Number = \"($Digits(\\\\.)?($Digits?)($Exp)?)|\" + // Digits ._opt Digits_opt ExponentPart_opt FloatTypeSuffix_opt\n- \"(\\\\.($Digits)($Exp)?)|\" + // . Digits ExponentPart_opt FloatTypeSuffix_opt\n- \"(($HexString)[pP][+-]?$Digits)\" // HexSignificand BinaryExponent\n-\n- val fpRegex = \"[\\\\x00-\\\\x20]*[+-]?(NaN|Infinity|(($Number)[fFdD]?))[\\\\x00-\\\\x20]*\"\n-\n- Regex(fpRegex)\n- }\n-}\n-\n private inline fun screenFloatValue(str: String, parse: (String) -> T): T? {\n return try {\n- if (ScreenFloatValueRegEx.value.matches(str))\n+ if (isValidFloat(str))\n parse(str)\n else\n null\n- } catch (e: NumberFormatException) { // overflow\n+ } catch (_: NumberFormatException) { // overflow\n null\n }\n }\n+\n+private const val LengthOfNaN = 2 // \"NaN\".length - 1\n+private const val LengthOfInfinity = 7 // \"Infinity\".length - 1\n+\n+private fun isValidFloat(s: String): Boolean {\n+ // A float can have one of two representations:\n+ //\n+ // 1. Standard:\n+ // - With an integer part only: 1234\n+ // - With integer and fractional parts: 1234.4678\n+ // - With a fractional part only: .4678\n+ //\n+ // Optional sign prefix: + or -\n+ // Optional signed exponent: e or E, followed by optionally signed digits (+12, -12, 12)\n+ // Optional suffix: f, F, d, or D (for instance 12.34f or .34D)\n+ //\n+ // 2. Hexadecimal:\n+ // - With an integer part only: 0x12ab\n+ // - With integer and fractional parts: 0x12ab.CD78\n+ // - With a fractional part only: 0x.CD78\n+ //\n+ // Mandatory signed exponent: p or P, followed by optionally signed digits (+12, -12, 12)\n+ //\n+ // Optional sign prefix: + or -\n+ // Optional suffix: f, F, d, or D (for instance 12.34f or .34D)\n+ //\n+ // Two special cases:\n+ // \"NaN\" and \"Infinity\" strings, can have an optional sign prefix (+ or -)\n+ //\n+ // Implementation notes:\n+ // - The pattern \"myChar.code or 0x20 == 'x'.code\" is used to perform a case-insensitive\n+ // comparison of a character. Adding the 0x20 bit turns an upper case ASCII letter into\n+ // a lower case one.\n+\n+ var start = 0\n+ var end = s.length - 1\n+\n+ // Skip leading spaces\n+ while (start <= end && s[start].code <= 0x20) start++\n+\n+ // Empty/whitespace string\n+ if (start > end) return false\n+\n+ // Skip trailing spaces\n+ while (end > start && s[end].code <= 0x20) end--\n+\n+ // Number starts with a positive or negative sign\n+ if (s[start] == '+' || s[start] == '-') start++\n+ // If we have nothing after the sign, the string is invalid\n+ if (start > end) return false\n+\n+ var hasIntegerPart: Boolean\n+ var hasFractionalPart = false\n+ var isHex = false\n+\n+ // Might be a hex string\n+ if (s[start] == '0') {\n+ start++\n+ // A \"0\" on its own is valid\n+ if (start > end) return true\n+\n+ // Test for [xX] to see if we truly have a hex string\n+ if (s[start].code or 0x20 == 'x'.code) {", + "comment_created_at": "2024-10-21T17:37:16+00:00", + "comment_author": "fzhinkin", + "comment_body": "`Char.code or 0x20` deserved its own private function.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1809238149", + "pr_number": 5364, + "pr_file": "libraries/stdlib/jvm/src/kotlin/text/StringNumberConversionsJVM.kt", + "created_at": "2024-10-21T17:42:44+00:00", + "commented_code": "public fun String.toBigDecimalOrNull(mathContext: java.math.MathContext): java.math.BigDecimal? =\n screenFloatValue(this) { it.toBigDecimal(mathContext) }\n\n/**\n * Recommended floating point number validation RegEx from the javadoc of `java.lang.Double.valueOf(String)`\n */\nprivate object ScreenFloatValueRegEx {\n @JvmField val value = run {\n val Digits = \"(\\\\p{Digit}+)\"\n val HexDigits = \"(\\\\p{XDigit}+)\"\n val Exp = \"[eE][+-]?$Digits\"\n\n val HexString = \"(0[xX]$HexDigits(\\\\.)?)|\" + // 0[xX] HexDigits ._opt BinaryExponent FloatTypeSuffix_opt\n \"(0[xX]$HexDigits?(\\\\.)$HexDigits)\" // 0[xX] HexDigits_opt . HexDigits BinaryExponent FloatTypeSuffix_opt\n\n val Number = \"($Digits(\\\\.)?($Digits?)($Exp)?)|\" + // Digits ._opt Digits_opt ExponentPart_opt FloatTypeSuffix_opt\n \"(\\\\.($Digits)($Exp)?)|\" + // . Digits ExponentPart_opt FloatTypeSuffix_opt\n \"(($HexString)[pP][+-]?$Digits)\" // HexSignificand BinaryExponent\n\n val fpRegex = \"[\\\\x00-\\\\x20]*[+-]?(NaN|Infinity|(($Number)[fFdD]?))[\\\\x00-\\\\x20]*\"\n\n Regex(fpRegex)\n }\n}\n\nprivate inline fun screenFloatValue(str: String, parse: (String) -> T): T? {\n return try {\n if (ScreenFloatValueRegEx.value.matches(str))\n if (isValidFloat(str))\n parse(str)\n else\n null\n } catch (e: NumberFormatException) { // overflow\n } catch (_: NumberFormatException) { // overflow\n null\n }\n}\n\nprivate const val LengthOfNaN = 2 // \"NaN\".length - 1\nprivate const val LengthOfInfinity = 7 // \"Infinity\".length - 1\n\nprivate fun isValidFloat(s: String): Boolean {\n // A float can have one of two representations:\n //\n // 1. Standard:\n // - With an integer part only: 1234\n // - With integer and fractional parts: 1234.4678\n // - With a fractional part only: .4678\n //\n // Optional sign prefix: + or -\n // Optional signed exponent: e or E, followed by optionally signed digits (+12, -12, 12)\n // Optional suffix: f, F, d, or D (for instance 12.34f or .34D)\n //\n // 2. Hexadecimal:\n // - With an integer part only: 0x12ab\n // - With integer and fractional parts: 0x12ab.CD78\n // - With a fractional part only: 0x.CD78\n //\n // Mandatory signed exponent: p or P, followed by optionally signed digits (+12, -12, 12)\n //\n // Optional sign prefix: + or -\n // Optional suffix: f, F, d, or D (for instance 12.34f or .34D)\n //\n // Two special cases:\n // \"NaN\" and \"Infinity\" strings, can have an optional sign prefix (+ or -)\n //\n // Implementation notes:\n // - The pattern \"myChar.code or 0x20 == 'x'.code\" is used to perform a case-insensitive\n // comparison of a character. Adding the 0x20 bit turns an upper case ASCII letter into\n // a lower case one.\n\n var start = 0\n var end = s.length - 1\n\n // Skip leading spaces\n while (start <= end && s[start].code <= 0x20) start++", + "repo_full_name": "JetBrains/kotlin", + "discussion_comments": [ + { + "comment_id": "1809238149", + "repo_full_name": "JetBrains/kotlin", + "pr_number": 5364, + "pr_file": "libraries/stdlib/jvm/src/kotlin/text/StringNumberConversionsJVM.kt", + "discussion_id": "1809238149", + "commented_code": "@@ -239,35 +239,215 @@ public fun String.toBigDecimalOrNull(): java.math.BigDecimal? =\n public fun String.toBigDecimalOrNull(mathContext: java.math.MathContext): java.math.BigDecimal? =\n screenFloatValue(this) { it.toBigDecimal(mathContext) }\n \n-/**\n- * Recommended floating point number validation RegEx from the javadoc of `java.lang.Double.valueOf(String)`\n- */\n-private object ScreenFloatValueRegEx {\n- @JvmField val value = run {\n- val Digits = \"(\\\\p{Digit}+)\"\n- val HexDigits = \"(\\\\p{XDigit}+)\"\n- val Exp = \"[eE][+-]?$Digits\"\n-\n- val HexString = \"(0[xX]$HexDigits(\\\\.)?)|\" + // 0[xX] HexDigits ._opt BinaryExponent FloatTypeSuffix_opt\n- \"(0[xX]$HexDigits?(\\\\.)$HexDigits)\" // 0[xX] HexDigits_opt . HexDigits BinaryExponent FloatTypeSuffix_opt\n-\n- val Number = \"($Digits(\\\\.)?($Digits?)($Exp)?)|\" + // Digits ._opt Digits_opt ExponentPart_opt FloatTypeSuffix_opt\n- \"(\\\\.($Digits)($Exp)?)|\" + // . Digits ExponentPart_opt FloatTypeSuffix_opt\n- \"(($HexString)[pP][+-]?$Digits)\" // HexSignificand BinaryExponent\n-\n- val fpRegex = \"[\\\\x00-\\\\x20]*[+-]?(NaN|Infinity|(($Number)[fFdD]?))[\\\\x00-\\\\x20]*\"\n-\n- Regex(fpRegex)\n- }\n-}\n-\n private inline fun screenFloatValue(str: String, parse: (String) -> T): T? {\n return try {\n- if (ScreenFloatValueRegEx.value.matches(str))\n+ if (isValidFloat(str))\n parse(str)\n else\n null\n- } catch (e: NumberFormatException) { // overflow\n+ } catch (_: NumberFormatException) { // overflow\n null\n }\n }\n+\n+private const val LengthOfNaN = 2 // \"NaN\".length - 1\n+private const val LengthOfInfinity = 7 // \"Infinity\".length - 1\n+\n+private fun isValidFloat(s: String): Boolean {\n+ // A float can have one of two representations:\n+ //\n+ // 1. Standard:\n+ // - With an integer part only: 1234\n+ // - With integer and fractional parts: 1234.4678\n+ // - With a fractional part only: .4678\n+ //\n+ // Optional sign prefix: + or -\n+ // Optional signed exponent: e or E, followed by optionally signed digits (+12, -12, 12)\n+ // Optional suffix: f, F, d, or D (for instance 12.34f or .34D)\n+ //\n+ // 2. Hexadecimal:\n+ // - With an integer part only: 0x12ab\n+ // - With integer and fractional parts: 0x12ab.CD78\n+ // - With a fractional part only: 0x.CD78\n+ //\n+ // Mandatory signed exponent: p or P, followed by optionally signed digits (+12, -12, 12)\n+ //\n+ // Optional sign prefix: + or -\n+ // Optional suffix: f, F, d, or D (for instance 12.34f or .34D)\n+ //\n+ // Two special cases:\n+ // \"NaN\" and \"Infinity\" strings, can have an optional sign prefix (+ or -)\n+ //\n+ // Implementation notes:\n+ // - The pattern \"myChar.code or 0x20 == 'x'.code\" is used to perform a case-insensitive\n+ // comparison of a character. Adding the 0x20 bit turns an upper case ASCII letter into\n+ // a lower case one.\n+\n+ var start = 0\n+ var end = s.length - 1\n+\n+ // Skip leading spaces\n+ while (start <= end && s[start].code <= 0x20) start++", + "comment_created_at": "2024-10-21T17:42:44+00:00", + "comment_author": "fzhinkin", + "comment_body": "Introduction of something like `inline fun String.advanceWhile(index: Int, endInclusive: Int, predicate: (Char) -> Boolean): Int` will shorten the whole function and will improve its readability:\r\n\r\n- `start = s.advanceWhile(start, end) { it.code <= 20 }`\r\n- `start = s.advanceWhile(start, end) { it.isAsciiDigit() || it.isHexLetter() }`\r\n- `start = s.advanceWhile(start, end) { it.isAsciiDigit() }`", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1809255640", + "pr_number": 5364, + "pr_file": "libraries/stdlib/jvm/src/kotlin/text/StringNumberConversionsJVM.kt", + "created_at": "2024-10-21T17:57:37+00:00", + "commented_code": "public fun String.toBigDecimalOrNull(mathContext: java.math.MathContext): java.math.BigDecimal? =\n screenFloatValue(this) { it.toBigDecimal(mathContext) }\n\n/**\n * Recommended floating point number validation RegEx from the javadoc of `java.lang.Double.valueOf(String)`\n */\nprivate object ScreenFloatValueRegEx {\n @JvmField val value = run {\n val Digits = \"(\\\\p{Digit}+)\"\n val HexDigits = \"(\\\\p{XDigit}+)\"\n val Exp = \"[eE][+-]?$Digits\"\n\n val HexString = \"(0[xX]$HexDigits(\\\\.)?)|\" + // 0[xX] HexDigits ._opt BinaryExponent FloatTypeSuffix_opt\n \"(0[xX]$HexDigits?(\\\\.)$HexDigits)\" // 0[xX] HexDigits_opt . HexDigits BinaryExponent FloatTypeSuffix_opt\n\n val Number = \"($Digits(\\\\.)?($Digits?)($Exp)?)|\" + // Digits ._opt Digits_opt ExponentPart_opt FloatTypeSuffix_opt\n \"(\\\\.($Digits)($Exp)?)|\" + // . Digits ExponentPart_opt FloatTypeSuffix_opt\n \"(($HexString)[pP][+-]?$Digits)\" // HexSignificand BinaryExponent\n\n val fpRegex = \"[\\\\x00-\\\\x20]*[+-]?(NaN|Infinity|(($Number)[fFdD]?))[\\\\x00-\\\\x20]*\"\n\n Regex(fpRegex)\n }\n}\n\nprivate inline fun screenFloatValue(str: String, parse: (String) -> T): T? {\n return try {\n if (ScreenFloatValueRegEx.value.matches(str))\n if (isValidFloat(str))\n parse(str)\n else\n null\n } catch (e: NumberFormatException) { // overflow\n } catch (_: NumberFormatException) { // overflow\n null\n }\n}\n\nprivate const val LengthOfNaN = 2 // \"NaN\".length - 1\nprivate const val LengthOfInfinity = 7 // \"Infinity\".length - 1\n\nprivate fun isValidFloat(s: String): Boolean {\n // A float can have one of two representations:\n //\n // 1. Standard:\n // - With an integer part only: 1234\n // - With integer and fractional parts: 1234.4678\n // - With a fractional part only: .4678\n //\n // Optional sign prefix: + or -\n // Optional signed exponent: e or E, followed by optionally signed digits (+12, -12, 12)\n // Optional suffix: f, F, d, or D (for instance 12.34f or .34D)\n //\n // 2. Hexadecimal:\n // - With an integer part only: 0x12ab\n // - With integer and fractional parts: 0x12ab.CD78\n // - With a fractional part only: 0x.CD78\n //\n // Mandatory signed exponent: p or P, followed by optionally signed digits (+12, -12, 12)\n //\n // Optional sign prefix: + or -\n // Optional suffix: f, F, d, or D (for instance 12.34f or .34D)\n //\n // Two special cases:\n // \"NaN\" and \"Infinity\" strings, can have an optional sign prefix (+ or -)\n //\n // Implementation notes:\n // - The pattern \"myChar.code or 0x20 == 'x'.code\" is used to perform a case-insensitive\n // comparison of a character. Adding the 0x20 bit turns an upper case ASCII letter into\n // a lower case one.\n\n var start = 0\n var end = s.length - 1\n\n // Skip leading spaces\n while (start <= end && s[start].code <= 0x20) start++\n\n // Empty/whitespace string\n if (start > end) return false\n\n // Skip trailing spaces\n while (end > start && s[end].code <= 0x20) end--\n\n // Number starts with a positive or negative sign\n if (s[start] == '+' || s[start] == '-') start++\n // If we have nothing after the sign, the string is invalid\n if (start > end) return false\n\n var hasIntegerPart: Boolean\n var hasFractionalPart = false\n var isHex = false\n\n // Might be a hex string\n if (s[start] == '0') {\n start++\n // A \"0\" on its own is valid\n if (start > end) return true\n\n // Test for [xX] to see if we truly have a hex string\n if (s[start].code or 0x20 == 'x'.code) {\n start++\n\n // Look for hex digits after the 0x prefix\n var checkpoint = start\n while (start <= end) {\n val d = s[start]\n if (d.isAsciiDigit() || d.isHexLetter()) {\n start++\n } else {\n break\n }\n }\n // Check if we found 0x*****, otherwise, the hex number might be of the\n // form 0x.*******\n hasIntegerPart = checkpoint != start\n\n // A hex string must have an exponent, the string is invalid if we only found an\n // integer part\n if (start > end) return false\n\n if (s[start] == '.') {\n start++\n\n // Look for hex digits for the fractional part\n checkpoint = start\n while (start <= end) {\n val d = s[start]\n if (d.isAsciiDigit() || d.isHexLetter()) {\n start++\n } else {\n break\n }\n }\n\n // Did we find a fractional part?\n hasFractionalPart = checkpoint != start\n }\n\n // A string must have an integer part, or a fractional part, or both\n if (!hasIntegerPart && !hasFractionalPart) return false\n\n // A hex string must have an exponent, the string is invalid if we only found an\n // integer and/or fractional part\n if (start > end) return false\n\n isHex = true\n } else {\n // Rewind the 0 we just parsed to make things easier below and try to parse a non-\n // hexadecimal string representation of a float\n start--\n }\n }\n\n // Parse a non-hexadecimal representations\n if (!isHex) {\n // Look for digits before the decimal separator, if any\n var checkpoint = start\n while (start <= end && s[start].isAsciiDigit()) start++\n\n // If there's no integer part, the float might be of the form .1234\n hasIntegerPart = checkpoint != start\n\n // A non-hexadecimal representation only needs an integer part, we can stop here\n if (start > end) return hasIntegerPart\n\n if (s[start] == '.') {\n start++\n\n // Look for the fractional part\n checkpoint = start\n while (start <= end && s[start].isAsciiDigit()) start++\n\n // Did we find a fractional part?\n hasFractionalPart = checkpoint != start\n }\n\n // A string must have an integer part, or a fractional part, or both\n if (!hasIntegerPart && !hasFractionalPart) {\n // Special case non-finite constants\n val constant = when (end) {", + "repo_full_name": "JetBrains/kotlin", + "discussion_comments": [ + { + "comment_id": "1809255640", + "repo_full_name": "JetBrains/kotlin", + "pr_number": 5364, + "pr_file": "libraries/stdlib/jvm/src/kotlin/text/StringNumberConversionsJVM.kt", + "discussion_id": "1809255640", + "commented_code": "@@ -239,35 +239,215 @@ public fun String.toBigDecimalOrNull(): java.math.BigDecimal? =\n public fun String.toBigDecimalOrNull(mathContext: java.math.MathContext): java.math.BigDecimal? =\n screenFloatValue(this) { it.toBigDecimal(mathContext) }\n \n-/**\n- * Recommended floating point number validation RegEx from the javadoc of `java.lang.Double.valueOf(String)`\n- */\n-private object ScreenFloatValueRegEx {\n- @JvmField val value = run {\n- val Digits = \"(\\\\p{Digit}+)\"\n- val HexDigits = \"(\\\\p{XDigit}+)\"\n- val Exp = \"[eE][+-]?$Digits\"\n-\n- val HexString = \"(0[xX]$HexDigits(\\\\.)?)|\" + // 0[xX] HexDigits ._opt BinaryExponent FloatTypeSuffix_opt\n- \"(0[xX]$HexDigits?(\\\\.)$HexDigits)\" // 0[xX] HexDigits_opt . HexDigits BinaryExponent FloatTypeSuffix_opt\n-\n- val Number = \"($Digits(\\\\.)?($Digits?)($Exp)?)|\" + // Digits ._opt Digits_opt ExponentPart_opt FloatTypeSuffix_opt\n- \"(\\\\.($Digits)($Exp)?)|\" + // . Digits ExponentPart_opt FloatTypeSuffix_opt\n- \"(($HexString)[pP][+-]?$Digits)\" // HexSignificand BinaryExponent\n-\n- val fpRegex = \"[\\\\x00-\\\\x20]*[+-]?(NaN|Infinity|(($Number)[fFdD]?))[\\\\x00-\\\\x20]*\"\n-\n- Regex(fpRegex)\n- }\n-}\n-\n private inline fun screenFloatValue(str: String, parse: (String) -> T): T? {\n return try {\n- if (ScreenFloatValueRegEx.value.matches(str))\n+ if (isValidFloat(str))\n parse(str)\n else\n null\n- } catch (e: NumberFormatException) { // overflow\n+ } catch (_: NumberFormatException) { // overflow\n null\n }\n }\n+\n+private const val LengthOfNaN = 2 // \"NaN\".length - 1\n+private const val LengthOfInfinity = 7 // \"Infinity\".length - 1\n+\n+private fun isValidFloat(s: String): Boolean {\n+ // A float can have one of two representations:\n+ //\n+ // 1. Standard:\n+ // - With an integer part only: 1234\n+ // - With integer and fractional parts: 1234.4678\n+ // - With a fractional part only: .4678\n+ //\n+ // Optional sign prefix: + or -\n+ // Optional signed exponent: e or E, followed by optionally signed digits (+12, -12, 12)\n+ // Optional suffix: f, F, d, or D (for instance 12.34f or .34D)\n+ //\n+ // 2. Hexadecimal:\n+ // - With an integer part only: 0x12ab\n+ // - With integer and fractional parts: 0x12ab.CD78\n+ // - With a fractional part only: 0x.CD78\n+ //\n+ // Mandatory signed exponent: p or P, followed by optionally signed digits (+12, -12, 12)\n+ //\n+ // Optional sign prefix: + or -\n+ // Optional suffix: f, F, d, or D (for instance 12.34f or .34D)\n+ //\n+ // Two special cases:\n+ // \"NaN\" and \"Infinity\" strings, can have an optional sign prefix (+ or -)\n+ //\n+ // Implementation notes:\n+ // - The pattern \"myChar.code or 0x20 == 'x'.code\" is used to perform a case-insensitive\n+ // comparison of a character. Adding the 0x20 bit turns an upper case ASCII letter into\n+ // a lower case one.\n+\n+ var start = 0\n+ var end = s.length - 1\n+\n+ // Skip leading spaces\n+ while (start <= end && s[start].code <= 0x20) start++\n+\n+ // Empty/whitespace string\n+ if (start > end) return false\n+\n+ // Skip trailing spaces\n+ while (end > start && s[end].code <= 0x20) end--\n+\n+ // Number starts with a positive or negative sign\n+ if (s[start] == '+' || s[start] == '-') start++\n+ // If we have nothing after the sign, the string is invalid\n+ if (start > end) return false\n+\n+ var hasIntegerPart: Boolean\n+ var hasFractionalPart = false\n+ var isHex = false\n+\n+ // Might be a hex string\n+ if (s[start] == '0') {\n+ start++\n+ // A \"0\" on its own is valid\n+ if (start > end) return true\n+\n+ // Test for [xX] to see if we truly have a hex string\n+ if (s[start].code or 0x20 == 'x'.code) {\n+ start++\n+\n+ // Look for hex digits after the 0x prefix\n+ var checkpoint = start\n+ while (start <= end) {\n+ val d = s[start]\n+ if (d.isAsciiDigit() || d.isHexLetter()) {\n+ start++\n+ } else {\n+ break\n+ }\n+ }\n+ // Check if we found 0x*****, otherwise, the hex number might be of the\n+ // form 0x.*******\n+ hasIntegerPart = checkpoint != start\n+\n+ // A hex string must have an exponent, the string is invalid if we only found an\n+ // integer part\n+ if (start > end) return false\n+\n+ if (s[start] == '.') {\n+ start++\n+\n+ // Look for hex digits for the fractional part\n+ checkpoint = start\n+ while (start <= end) {\n+ val d = s[start]\n+ if (d.isAsciiDigit() || d.isHexLetter()) {\n+ start++\n+ } else {\n+ break\n+ }\n+ }\n+\n+ // Did we find a fractional part?\n+ hasFractionalPart = checkpoint != start\n+ }\n+\n+ // A string must have an integer part, or a fractional part, or both\n+ if (!hasIntegerPart && !hasFractionalPart) return false\n+\n+ // A hex string must have an exponent, the string is invalid if we only found an\n+ // integer and/or fractional part\n+ if (start > end) return false\n+\n+ isHex = true\n+ } else {\n+ // Rewind the 0 we just parsed to make things easier below and try to parse a non-\n+ // hexadecimal string representation of a float\n+ start--\n+ }\n+ }\n+\n+ // Parse a non-hexadecimal representations\n+ if (!isHex) {\n+ // Look for digits before the decimal separator, if any\n+ var checkpoint = start\n+ while (start <= end && s[start].isAsciiDigit()) start++\n+\n+ // If there's no integer part, the float might be of the form .1234\n+ hasIntegerPart = checkpoint != start\n+\n+ // A non-hexadecimal representation only needs an integer part, we can stop here\n+ if (start > end) return hasIntegerPart\n+\n+ if (s[start] == '.') {\n+ start++\n+\n+ // Look for the fractional part\n+ checkpoint = start\n+ while (start <= end && s[start].isAsciiDigit()) start++\n+\n+ // Did we find a fractional part?\n+ hasFractionalPart = checkpoint != start\n+ }\n+\n+ // A string must have an integer part, or a fractional part, or both\n+ if (!hasIntegerPart && !hasFractionalPart) {\n+ // Special case non-finite constants\n+ val constant = when (end) {", + "comment_created_at": "2024-10-21T17:57:37+00:00", + "comment_author": "fzhinkin", + "comment_body": "For the sake of readability, I suggest outlining special-case handling to a separate function as well.\r\n`LengthOfXXX` could then be defined inside the function (They are not lengths, so it would be better to make their scope as narrow as possible).\r\n\r\n> For the sake of readability\r\n\r\nDon't get me wrong: the function is well written, there's just too much context to keep in mind while reading the code.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1081109610", + "pr_number": 5050, + "pr_file": "compiler/ir/backend.common/src/org/jetbrains/kotlin/backend/common/lower/inline/FunctionInlining.kt", + "created_at": "2023-01-19T11:02:06+00:00", + "commented_code": "return@forEach\n }\n\n if (argument.isImmutableVariableLoad) {\n substituteMap[argument.parameter] =\n argument.argumentExpression.transform( // Arguments may reference the previous ones - substitute them.\n substitutor,\n data = null\n )\n // Arguments may reference the previous ones - substitute them.\n val variableInitializer = argument.argumentExpression.transform(substitutor, data = null)\n val parameter = argument.parameter\n\n fun substituteParameterViaTemporaryVariable() {\n val newVariable = createTemporaryVariable(parameter, variableInitializer, callee)\n evaluationStatements.add(newVariable)\n substituteMap[parameter] = IrGetValueWithoutLocation(newVariable.symbol)\n }\n\n if (alwaysCreateTemporaryVariablesForArguments && !parameter.isInlineParameter()) {\n substituteParameterViaTemporaryVariable()\n return@forEach\n }\n\n // Arguments may reference the previous ones - substitute them.\n val variableInitializer = argument.argumentExpression.transform(substitutor, data = null)\n if (argument.isImmutableVariableLoad) {\n substituteMap[parameter] = variableInitializer\n return@forEach\n }\n\n val argumentExtracted = !argument.argumentExpression.isPure(false, context = context)", + "repo_full_name": "JetBrains/kotlin", + "discussion_comments": [ + { + "comment_id": "1081109610", + "repo_full_name": "JetBrains/kotlin", + "pr_number": 5050, + "pr_file": "compiler/ir/backend.common/src/org/jetbrains/kotlin/backend/common/lower/inline/FunctionInlining.kt", + "discussion_id": "1081109610", + "commented_code": "@@ -602,43 +603,61 @@ class FunctionInlining(\n return@forEach\n }\n \n- if (argument.isImmutableVariableLoad) {\n- substituteMap[argument.parameter] =\n- argument.argumentExpression.transform( // Arguments may reference the previous ones - substitute them.\n- substitutor,\n- data = null\n- )\n+ // Arguments may reference the previous ones - substitute them.\n+ val variableInitializer = argument.argumentExpression.transform(substitutor, data = null)\n+ val parameter = argument.parameter\n+\n+ fun substituteParameterViaTemporaryVariable() {\n+ val newVariable = createTemporaryVariable(parameter, variableInitializer, callee)\n+ evaluationStatements.add(newVariable)\n+ substituteMap[parameter] = IrGetValueWithoutLocation(newVariable.symbol)\n+ }\n+\n+ if (alwaysCreateTemporaryVariablesForArguments && !parameter.isInlineParameter()) {\n+ substituteParameterViaTemporaryVariable()\n return@forEach\n }\n \n- // Arguments may reference the previous ones - substitute them.\n- val variableInitializer = argument.argumentExpression.transform(substitutor, data = null)\n+ if (argument.isImmutableVariableLoad) {\n+ substituteMap[parameter] = variableInitializer\n+ return@forEach\n+ }\n \n val argumentExtracted = !argument.argumentExpression.isPure(false, context = context)", + "comment_created_at": "2023-01-19T11:02:06+00:00", + "comment_author": "kunyavskiy", + "comment_body": "For now, in fact we have to cases \n1. Directly substitute\n2. Create temporary variable. \n\nFor now logic on when which is happening is distributed all over the function code. It would be nice to collect it in one place. Like add a variable \"shouldInlineArgumentValue\", and just check over it and the end, without returns in the middle of function. \n\nMaybe it can be event extracted to a separate function.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "964684049", + "pr_number": 4933, + "pr_file": "plugins/value-container-assignment/value-container-assignment.k2/src/org/jetbrains/kotlin/container/assignment/k2/FirValueContainerAssignmentAltererExtension.kt", + "created_at": "2022-09-07T10:46:41+00:00", + "commented_code": "/*\n * Copyright 2010-2022 JetBrains s.r.o. and Kotlin Programming Language contributors.\n * Use of this source code is governed by the Apache 2.0 license that can be found in the license/LICENSE.txt file.\n */\n\npackage org.jetbrains.kotlin.container.assignment.k2\n\nimport org.jetbrains.kotlin.KtFakeSourceElementKind\nimport org.jetbrains.kotlin.fakeElement\nimport org.jetbrains.kotlin.fir.FirSession\nimport org.jetbrains.kotlin.fir.expressions.*\nimport org.jetbrains.kotlin.fir.expressions.builder.buildFunctionCall\nimport org.jetbrains.kotlin.fir.expressions.builder.buildPropertyAccessExpression\nimport org.jetbrains.kotlin.fir.extensions.FirAssignExpressionAltererExtension\nimport org.jetbrains.kotlin.fir.references.builder.buildSimpleNamedReference\nimport org.jetbrains.kotlin.fir.resolvedSymbol\nimport org.jetbrains.kotlin.fir.symbols.impl.FirPropertySymbol\nimport org.jetbrains.kotlin.fir.types.toRegularClassSymbol\nimport org.jetbrains.kotlin.name.Name\n\nclass FirValueContainerAssignmentAltererExtension(\n session: FirSession\n) : FirAssignExpressionAltererExtension(session) {\n\n companion object {\n val ASSIGN_METHOD = Name.identifier(\"assign\")", + "repo_full_name": "JetBrains/kotlin", + "discussion_comments": [ + { + "comment_id": "964684049", + "repo_full_name": "JetBrains/kotlin", + "pr_number": 4933, + "pr_file": "plugins/value-container-assignment/value-container-assignment.k2/src/org/jetbrains/kotlin/container/assignment/k2/FirValueContainerAssignmentAltererExtension.kt", + "discussion_id": "964684049", + "commented_code": "@@ -0,0 +1,72 @@\n+/*\n+ * Copyright 2010-2022 JetBrains s.r.o. and Kotlin Programming Language contributors.\n+ * Use of this source code is governed by the Apache 2.0 license that can be found in the license/LICENSE.txt file.\n+ */\n+\n+package org.jetbrains.kotlin.container.assignment.k2\n+\n+import org.jetbrains.kotlin.KtFakeSourceElementKind\n+import org.jetbrains.kotlin.fakeElement\n+import org.jetbrains.kotlin.fir.FirSession\n+import org.jetbrains.kotlin.fir.expressions.*\n+import org.jetbrains.kotlin.fir.expressions.builder.buildFunctionCall\n+import org.jetbrains.kotlin.fir.expressions.builder.buildPropertyAccessExpression\n+import org.jetbrains.kotlin.fir.extensions.FirAssignExpressionAltererExtension\n+import org.jetbrains.kotlin.fir.references.builder.buildSimpleNamedReference\n+import org.jetbrains.kotlin.fir.resolvedSymbol\n+import org.jetbrains.kotlin.fir.symbols.impl.FirPropertySymbol\n+import org.jetbrains.kotlin.fir.types.toRegularClassSymbol\n+import org.jetbrains.kotlin.name.Name\n+\n+class FirValueContainerAssignmentAltererExtension(\n+ session: FirSession\n+) : FirAssignExpressionAltererExtension(session) {\n+\n+ companion object {\n+ val ASSIGN_METHOD = Name.identifier(\"assign\")", + "comment_created_at": "2022-09-07T10:46:41+00:00", + "comment_author": "demiurg906", + "comment_body": "It's worth to exctract this constant into `.common` module", + "pr_file_module": null + }, + { + "comment_id": "966271057", + "repo_full_name": "JetBrains/kotlin", + "pr_number": 4933, + "pr_file": "plugins/value-container-assignment/value-container-assignment.k2/src/org/jetbrains/kotlin/container/assignment/k2/FirValueContainerAssignmentAltererExtension.kt", + "discussion_id": "964684049", + "commented_code": "@@ -0,0 +1,72 @@\n+/*\n+ * Copyright 2010-2022 JetBrains s.r.o. and Kotlin Programming Language contributors.\n+ * Use of this source code is governed by the Apache 2.0 license that can be found in the license/LICENSE.txt file.\n+ */\n+\n+package org.jetbrains.kotlin.container.assignment.k2\n+\n+import org.jetbrains.kotlin.KtFakeSourceElementKind\n+import org.jetbrains.kotlin.fakeElement\n+import org.jetbrains.kotlin.fir.FirSession\n+import org.jetbrains.kotlin.fir.expressions.*\n+import org.jetbrains.kotlin.fir.expressions.builder.buildFunctionCall\n+import org.jetbrains.kotlin.fir.expressions.builder.buildPropertyAccessExpression\n+import org.jetbrains.kotlin.fir.extensions.FirAssignExpressionAltererExtension\n+import org.jetbrains.kotlin.fir.references.builder.buildSimpleNamedReference\n+import org.jetbrains.kotlin.fir.resolvedSymbol\n+import org.jetbrains.kotlin.fir.symbols.impl.FirPropertySymbol\n+import org.jetbrains.kotlin.fir.types.toRegularClassSymbol\n+import org.jetbrains.kotlin.name.Name\n+\n+class FirValueContainerAssignmentAltererExtension(\n+ session: FirSession\n+) : FirAssignExpressionAltererExtension(session) {\n+\n+ companion object {\n+ val ASSIGN_METHOD = Name.identifier(\"assign\")", + "comment_created_at": "2022-09-08T18:06:03+00:00", + "comment_author": "asodja", + "comment_body": "Extracted to .common", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "893088032", + "pr_number": 4835, + "pr_file": "plugins/atomicfu/atomicfu-compiler/src/org/jetbrains/kotlinx/atomicfu/compiler/backend/jvm/AtomicfuJvmIrTransformer.kt", + "created_at": "2022-06-09T05:37:45+00:00", + "commented_code": "/*\n * Copyright 2010-2022 JetBrains s.r.o. and Kotlin Programming Language contributors.\n * Use of this source code is governed by the Apache 2.0 license that can be found in the license/LICENSE.txt file.\n */\n\npackage org.jetbrains.kotlinx.atomicfu.compiler.backend.jvm\n\nimport org.jetbrains.kotlin.backend.common.extensions.*\nimport org.jetbrains.kotlin.backend.jvm.ir.*\nimport org.jetbrains.kotlin.ir.*\nimport org.jetbrains.kotlin.ir.builders.declarations.*\nimport org.jetbrains.kotlin.ir.builders.*\nimport org.jetbrains.kotlin.ir.declarations.*\nimport org.jetbrains.kotlin.ir.declarations.impl.*\nimport org.jetbrains.kotlin.ir.expressions.*\nimport org.jetbrains.kotlin.ir.expressions.impl.*\nimport org.jetbrains.kotlin.ir.symbols.*\nimport org.jetbrains.kotlin.ir.types.*\nimport org.jetbrains.kotlin.ir.util.*\nimport org.jetbrains.kotlin.ir.visitors.*\nimport org.jetbrains.kotlin.name.*\nimport org.jetbrains.kotlin.util.capitalizeDecapitalize.*\nimport org.jetbrains.kotlinx.atomicfu.compiler.backend.*\nimport kotlin.collections.set\n\nprivate const val AFU_PKG = \"kotlinx.atomicfu\"\nprivate const val TRACE_BASE_TYPE = \"TraceBase\"\nprivate const val ATOMIC_VALUE_FACTORY = \"atomic\"\nprivate const val INVOKE = \"invoke\"\nprivate const val APPEND = \"append\"\nprivate const val GET = \"get\"\nprivate const val ATOMICFU = \"atomicfu\"\nprivate const val ATOMIC_ARRAY_RECEIVER_SUFFIX = \"\\$array\"\nprivate const val DISPATCH_RECEIVER = \"${ATOMICFU}\\$dispatchReceiver\"\nprivate const val ATOMIC_HANDLER = \"${ATOMICFU}\\$handler\"\nprivate const val ACTION = \"${ATOMICFU}\\$action\"\nprivate const val INDEX = \"${ATOMICFU}\\$index\"\nprivate const val VOLATILE_WRAPPER_SUFFIX = \"\\$VolatileWrapper\"\nprivate const val LOOP = \"loop\"\nprivate const val UPDATE = \"update\"\n\nclass AtomicfuJvmIrTransformer(\n private val context: IrPluginContext,\n private val atomicSymbols: AtomicSymbols\n) {\n private val irBuiltIns = context.irBuiltIns\n\n private val AFU_VALUE_TYPES: Map = mapOf(\n \"AtomicInt\" to irBuiltIns.intType,\n \"AtomicLong\" to irBuiltIns.longType,\n \"AtomicBoolean\" to irBuiltIns.booleanType,\n \"AtomicRef\" to irBuiltIns.anyNType\n )\n\n private val ATOMICFU_INLINE_FUNCTIONS = setOf(\"loop\", \"update\", \"getAndUpdate\", \"updateAndGet\")\n protected val ATOMIC_VALUE_TYPES = setOf(\"AtomicInt\", \"AtomicLong\", \"AtomicBoolean\", \"AtomicRef\")\n protected val ATOMIC_ARRAY_TYPES = setOf(\"AtomicIntArray\", \"AtomicLongArray\", \"AtomicBooleanArray\", \"AtomicArray\")\n\n fun transform(moduleFragment: IrModuleFragment) {\n transformAtomicFields(moduleFragment)\n transformAtomicExtensions(moduleFragment)\n transformAtomicfuDeclarations(moduleFragment)\n moduleFragment.files.forEach { irFile ->\n irFile.patchDeclarationParents()\n }\n }\n\n private fun transformAtomicFields(moduleFragment: IrModuleFragment) {\n moduleFragment.files.forEach { irFile ->\n irFile.transform(AtomicHandlerTransformer(), null)\n }\n }\n\n private fun transformAtomicExtensions(moduleFragment: IrModuleFragment) {\n moduleFragment.files.forEach { irFile ->\n irFile.transform(AtomicExtensionTransformer(), null)\n }\n }\n\n private fun transformAtomicfuDeclarations(moduleFragment: IrModuleFragment) {\n moduleFragment.files.forEach { irFile ->\n irFile.transform(AtomicfuTransformer(), null)\n }\n }\n\n private val propertyToAtomicHandler = mutableMapOf()\n\n private inner class AtomicHandlerTransformer : IrElementTransformer {\n override fun visitClass(declaration: IrClass, data: IrFunction?): IrStatement {\n declaration.declarations.filter(::fromKotlinxAtomicfu).forEach {\n (it as IrProperty).transformAtomicfuProperty(declaration)\n }\n return super.visitClass(declaration, data)\n }\n\n override fun visitFile(declaration: IrFile, data: IrFunction?): IrFile {\n declaration.declarations.filter(::fromKotlinxAtomicfu).forEach {\n (it as IrProperty).transformAtomicfuProperty(declaration)\n }\n return super.visitFile(declaration, data)\n }\n\n private fun IrProperty.transformAtomicfuProperty(parent: IrDeclarationContainer) {\n val isTopLevel = parent is IrFile\n when {\n isAtomic() -> {\n if (isTopLevel) {\n val parentClass = generateWrapperClass(this, parent)\n transformAtomicProperty(parentClass)\n moveFromFileToClass(parent as IrFile, parentClass)\n } else {\n transformAtomicProperty(parent as IrClass)\n }\n }\n isDelegatedToAtomic() -> transformDelegatedProperty(parent)\n isAtomicArray() -> transformAtomicArrayProperty(parent)\n isTrace() -> parent.declarations.remove(this)\n else -> {}\n }\n }\n\n private fun IrProperty.moveFromFileToClass(\n parentFile: IrFile,\n parentClass: IrClass\n ) {\n parentFile.declarations.remove(this)\n parentClass.declarations.add(this)\n parent = parentClass\n }\n\n private fun IrProperty.transformAtomicProperty(parentClass: IrClass) {\n // Atomic property transformation:\n // 1. replace it's backingField with a volatile property of atomic value type\n // 2. create j.u.c.a.Atomic*FieldUpdater for this volatile property to handle it's value atomically\n // val a = atomic(0) ->\n // volatile var a: Int = 0\n // val a$FU = AtomicIntegerFieldUpdater.newUpdater(parentClass, \"a\")\n //\n // Top-level atomic properties transformation:\n // 1. replace it's backingField with a volatile property of atomic value type\n // 2. wrap this volatile property into the generated class\n // 3. create j.u.c.a.Atomic*FieldUpdater for the volatile property to handle it's value atomically\n // val a = atomic(0) ->\n // class A$ParentFile$VolatileWrapper { volatile var a: Int = 0 }\n // val a$FU = AtomicIntegerFieldUpdater.newUpdater(A$ParentFile$VolatileWrapper::class, \"a\")\n backingField = buildVolatileRawField(this, parentClass)\n // update property accessors\n context.addDefaultGetter(this, parentClass)\n val fieldUpdater = addJucaAFUProperty(this, parentClass)\n registerAtomicHandler(fieldUpdater)\n }\n\n private fun IrProperty.transformAtomicArrayProperty(parent: IrDeclarationContainer) {\n // Replace atomicfu array classes with the corresponding atomic arrays from j.u.c.a.:\n // val intArr = atomicArrayOfNulls(5) ->\n // val intArr = AtomicReferenceArray(5)\n backingField = buildJucaArrayField(this, parent)\n // update property accessors\n context.addDefaultGetter(this, parent)\n registerAtomicHandler(this)\n }\n\n private fun IrProperty.transformDelegatedProperty(parentClass: IrDeclarationContainer) {\n backingField?.let {\n it.initializer?.let {\n val initializer = it.expression as IrCall\n if (initializer.isAtomicFactory()) {\n // Property delegated to atomic factory invocation:\n // 1. replace it's backingField with a volatile property of value type\n // 2. transform getter/setter\n // var a by atomic(0) ->\n // volatile var a: Int = 0\n val volatileField = buildVolatileRawField(this, parentClass)\n backingField = volatileField\n getter?.transformAccessor(volatileField, getter?.dispatchReceiverParameter?.capture())\n setter?.transformAccessor(volatileField, setter?.dispatchReceiverParameter?.capture())\n } else {\n // Property delegated to the atomic property:\n // 1. delegate it's accessors to get/set of the backingField of the atomic delegate\n // (that is already transformed to a volatile field of value type)\n // val _a = atomic(0)\n // var a by _a ->\n // volatile var _a: Int = 0\n // var a by _a\n val atomicProperty = initializer.getCorrespondingProperty()\n val volatileField = atomicProperty.backingField!!\n backingField = null\n if (atomicProperty.isTopLevel()) {\n atomicSymbols.createBuilder(symbol).run {\n val parent = getStaticVolatileWrapperInstance(atomicProperty)\n getter?.transformAccessor(volatileField, getProperty(parent, null))\n setter?.transformAccessor(volatileField, getProperty(parent, null))\n }\n } else {\n if (this.parent == atomicProperty.parent) {\n //class A {\n // val _a = atomic()\n // var a by _a\n //}\n getter?.transformAccessor(volatileField, getter?.dispatchReceiverParameter?.capture())\n setter?.transformAccessor(volatileField, setter?.dispatchReceiverParameter?.capture())\n } else {\n //class A {\n // val _a = atomic()\n // inner class B {\n // var a by _a\n // }\n //}\n val thisReceiver = atomicProperty.parentAsClass.thisReceiver\n getter?.transformAccessor(volatileField, thisReceiver?.capture())\n setter?.transformAccessor(volatileField, thisReceiver?.capture())\n }\n }\n }\n }\n }\n }\n\n private fun IrFunction.transformAccessor(volatileField: IrField, parent: IrExpression?) {\n val accessor = this\n atomicSymbols.createBuilder(symbol).run {", + "repo_full_name": "JetBrains/kotlin", + "discussion_comments": [ + { + "comment_id": "893088032", + "repo_full_name": "JetBrains/kotlin", + "pr_number": 4835, + "pr_file": "plugins/atomicfu/atomicfu-compiler/src/org/jetbrains/kotlinx/atomicfu/compiler/backend/jvm/AtomicfuJvmIrTransformer.kt", + "discussion_id": "893088032", + "commented_code": "@@ -0,0 +1,825 @@\n+/*\n+ * Copyright 2010-2022 JetBrains s.r.o. and Kotlin Programming Language contributors.\n+ * Use of this source code is governed by the Apache 2.0 license that can be found in the license/LICENSE.txt file.\n+ */\n+\n+package org.jetbrains.kotlinx.atomicfu.compiler.backend.jvm\n+\n+import org.jetbrains.kotlin.backend.common.extensions.*\n+import org.jetbrains.kotlin.backend.jvm.ir.*\n+import org.jetbrains.kotlin.ir.*\n+import org.jetbrains.kotlin.ir.builders.declarations.*\n+import org.jetbrains.kotlin.ir.builders.*\n+import org.jetbrains.kotlin.ir.declarations.*\n+import org.jetbrains.kotlin.ir.declarations.impl.*\n+import org.jetbrains.kotlin.ir.expressions.*\n+import org.jetbrains.kotlin.ir.expressions.impl.*\n+import org.jetbrains.kotlin.ir.symbols.*\n+import org.jetbrains.kotlin.ir.types.*\n+import org.jetbrains.kotlin.ir.util.*\n+import org.jetbrains.kotlin.ir.visitors.*\n+import org.jetbrains.kotlin.name.*\n+import org.jetbrains.kotlin.util.capitalizeDecapitalize.*\n+import org.jetbrains.kotlinx.atomicfu.compiler.backend.*\n+import kotlin.collections.set\n+\n+private const val AFU_PKG = \"kotlinx.atomicfu\"\n+private const val TRACE_BASE_TYPE = \"TraceBase\"\n+private const val ATOMIC_VALUE_FACTORY = \"atomic\"\n+private const val INVOKE = \"invoke\"\n+private const val APPEND = \"append\"\n+private const val GET = \"get\"\n+private const val ATOMICFU = \"atomicfu\"\n+private const val ATOMIC_ARRAY_RECEIVER_SUFFIX = \"\\$array\"\n+private const val DISPATCH_RECEIVER = \"${ATOMICFU}\\$dispatchReceiver\"\n+private const val ATOMIC_HANDLER = \"${ATOMICFU}\\$handler\"\n+private const val ACTION = \"${ATOMICFU}\\$action\"\n+private const val INDEX = \"${ATOMICFU}\\$index\"\n+private const val VOLATILE_WRAPPER_SUFFIX = \"\\$VolatileWrapper\"\n+private const val LOOP = \"loop\"\n+private const val UPDATE = \"update\"\n+\n+class AtomicfuJvmIrTransformer(\n+ private val context: IrPluginContext,\n+ private val atomicSymbols: AtomicSymbols\n+) {\n+ private val irBuiltIns = context.irBuiltIns\n+\n+ private val AFU_VALUE_TYPES: Map = mapOf(\n+ \"AtomicInt\" to irBuiltIns.intType,\n+ \"AtomicLong\" to irBuiltIns.longType,\n+ \"AtomicBoolean\" to irBuiltIns.booleanType,\n+ \"AtomicRef\" to irBuiltIns.anyNType\n+ )\n+\n+ private val ATOMICFU_INLINE_FUNCTIONS = setOf(\"loop\", \"update\", \"getAndUpdate\", \"updateAndGet\")\n+ protected val ATOMIC_VALUE_TYPES = setOf(\"AtomicInt\", \"AtomicLong\", \"AtomicBoolean\", \"AtomicRef\")\n+ protected val ATOMIC_ARRAY_TYPES = setOf(\"AtomicIntArray\", \"AtomicLongArray\", \"AtomicBooleanArray\", \"AtomicArray\")\n+\n+ fun transform(moduleFragment: IrModuleFragment) {\n+ transformAtomicFields(moduleFragment)\n+ transformAtomicExtensions(moduleFragment)\n+ transformAtomicfuDeclarations(moduleFragment)\n+ moduleFragment.files.forEach { irFile ->\n+ irFile.patchDeclarationParents()\n+ }\n+ }\n+\n+ private fun transformAtomicFields(moduleFragment: IrModuleFragment) {\n+ moduleFragment.files.forEach { irFile ->\n+ irFile.transform(AtomicHandlerTransformer(), null)\n+ }\n+ }\n+\n+ private fun transformAtomicExtensions(moduleFragment: IrModuleFragment) {\n+ moduleFragment.files.forEach { irFile ->\n+ irFile.transform(AtomicExtensionTransformer(), null)\n+ }\n+ }\n+\n+ private fun transformAtomicfuDeclarations(moduleFragment: IrModuleFragment) {\n+ moduleFragment.files.forEach { irFile ->\n+ irFile.transform(AtomicfuTransformer(), null)\n+ }\n+ }\n+\n+ private val propertyToAtomicHandler = mutableMapOf()\n+\n+ private inner class AtomicHandlerTransformer : IrElementTransformer {\n+ override fun visitClass(declaration: IrClass, data: IrFunction?): IrStatement {\n+ declaration.declarations.filter(::fromKotlinxAtomicfu).forEach {\n+ (it as IrProperty).transformAtomicfuProperty(declaration)\n+ }\n+ return super.visitClass(declaration, data)\n+ }\n+\n+ override fun visitFile(declaration: IrFile, data: IrFunction?): IrFile {\n+ declaration.declarations.filter(::fromKotlinxAtomicfu).forEach {\n+ (it as IrProperty).transformAtomicfuProperty(declaration)\n+ }\n+ return super.visitFile(declaration, data)\n+ }\n+\n+ private fun IrProperty.transformAtomicfuProperty(parent: IrDeclarationContainer) {\n+ val isTopLevel = parent is IrFile\n+ when {\n+ isAtomic() -> {\n+ if (isTopLevel) {\n+ val parentClass = generateWrapperClass(this, parent)\n+ transformAtomicProperty(parentClass)\n+ moveFromFileToClass(parent as IrFile, parentClass)\n+ } else {\n+ transformAtomicProperty(parent as IrClass)\n+ }\n+ }\n+ isDelegatedToAtomic() -> transformDelegatedProperty(parent)\n+ isAtomicArray() -> transformAtomicArrayProperty(parent)\n+ isTrace() -> parent.declarations.remove(this)\n+ else -> {}\n+ }\n+ }\n+\n+ private fun IrProperty.moveFromFileToClass(\n+ parentFile: IrFile,\n+ parentClass: IrClass\n+ ) {\n+ parentFile.declarations.remove(this)\n+ parentClass.declarations.add(this)\n+ parent = parentClass\n+ }\n+\n+ private fun IrProperty.transformAtomicProperty(parentClass: IrClass) {\n+ // Atomic property transformation:\n+ // 1. replace it's backingField with a volatile property of atomic value type\n+ // 2. create j.u.c.a.Atomic*FieldUpdater for this volatile property to handle it's value atomically\n+ // val a = atomic(0) ->\n+ // volatile var a: Int = 0\n+ // val a$FU = AtomicIntegerFieldUpdater.newUpdater(parentClass, \"a\")\n+ //\n+ // Top-level atomic properties transformation:\n+ // 1. replace it's backingField with a volatile property of atomic value type\n+ // 2. wrap this volatile property into the generated class\n+ // 3. create j.u.c.a.Atomic*FieldUpdater for the volatile property to handle it's value atomically\n+ // val a = atomic(0) ->\n+ // class A$ParentFile$VolatileWrapper { volatile var a: Int = 0 }\n+ // val a$FU = AtomicIntegerFieldUpdater.newUpdater(A$ParentFile$VolatileWrapper::class, \"a\")\n+ backingField = buildVolatileRawField(this, parentClass)\n+ // update property accessors\n+ context.addDefaultGetter(this, parentClass)\n+ val fieldUpdater = addJucaAFUProperty(this, parentClass)\n+ registerAtomicHandler(fieldUpdater)\n+ }\n+\n+ private fun IrProperty.transformAtomicArrayProperty(parent: IrDeclarationContainer) {\n+ // Replace atomicfu array classes with the corresponding atomic arrays from j.u.c.a.:\n+ // val intArr = atomicArrayOfNulls(5) ->\n+ // val intArr = AtomicReferenceArray(5)\n+ backingField = buildJucaArrayField(this, parent)\n+ // update property accessors\n+ context.addDefaultGetter(this, parent)\n+ registerAtomicHandler(this)\n+ }\n+\n+ private fun IrProperty.transformDelegatedProperty(parentClass: IrDeclarationContainer) {\n+ backingField?.let {\n+ it.initializer?.let {\n+ val initializer = it.expression as IrCall\n+ if (initializer.isAtomicFactory()) {\n+ // Property delegated to atomic factory invocation:\n+ // 1. replace it's backingField with a volatile property of value type\n+ // 2. transform getter/setter\n+ // var a by atomic(0) ->\n+ // volatile var a: Int = 0\n+ val volatileField = buildVolatileRawField(this, parentClass)\n+ backingField = volatileField\n+ getter?.transformAccessor(volatileField, getter?.dispatchReceiverParameter?.capture())\n+ setter?.transformAccessor(volatileField, setter?.dispatchReceiverParameter?.capture())\n+ } else {\n+ // Property delegated to the atomic property:\n+ // 1. delegate it's accessors to get/set of the backingField of the atomic delegate\n+ // (that is already transformed to a volatile field of value type)\n+ // val _a = atomic(0)\n+ // var a by _a ->\n+ // volatile var _a: Int = 0\n+ // var a by _a\n+ val atomicProperty = initializer.getCorrespondingProperty()\n+ val volatileField = atomicProperty.backingField!!\n+ backingField = null\n+ if (atomicProperty.isTopLevel()) {\n+ atomicSymbols.createBuilder(symbol).run {\n+ val parent = getStaticVolatileWrapperInstance(atomicProperty)\n+ getter?.transformAccessor(volatileField, getProperty(parent, null))\n+ setter?.transformAccessor(volatileField, getProperty(parent, null))\n+ }\n+ } else {\n+ if (this.parent == atomicProperty.parent) {\n+ //class A {\n+ // val _a = atomic()\n+ // var a by _a\n+ //}\n+ getter?.transformAccessor(volatileField, getter?.dispatchReceiverParameter?.capture())\n+ setter?.transformAccessor(volatileField, setter?.dispatchReceiverParameter?.capture())\n+ } else {\n+ //class A {\n+ // val _a = atomic()\n+ // inner class B {\n+ // var a by _a\n+ // }\n+ //}\n+ val thisReceiver = atomicProperty.parentAsClass.thisReceiver\n+ getter?.transformAccessor(volatileField, thisReceiver?.capture())\n+ setter?.transformAccessor(volatileField, thisReceiver?.capture())\n+ }\n+ }\n+ }\n+ }\n+ }\n+ }\n+\n+ private fun IrFunction.transformAccessor(volatileField: IrField, parent: IrExpression?) {\n+ val accessor = this\n+ atomicSymbols.createBuilder(symbol).run {", + "comment_created_at": "2022-06-09T05:37:45+00:00", + "comment_author": "ilmirus", + "comment_body": "Generally, we use `with(...createBuilder(symbol))` idiom, when we create IR nodes. This helps us spot the places, where IR is create at a glance.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "786266216", + "pr_number": 4560, + "pr_file": "compiler/ir/ir.tree/gen/org/jetbrains/kotlin/ir/expressions/IrGetValue.kt", + "created_at": "2022-01-17T20:02:16+00:00", + "commented_code": "/*\n * Copyright 2010-2021 JetBrains s.r.o. and Kotlin Programming Language contributors.\n * Use of this source code is governed by the Apache 2.0 license that can be found in the license/LICENSE.txt file.\n */\n\n//This file was generated automatically\n//DO NOT MODIFY IT MANUALLY\n\npackage org.jetbrains.kotlin.ir.expressions\n\nimport org.jetbrains.kotlin.ir.visitors.IrElementVisitor\n\n/**\n * A leaf IR tree element.\n * @sample org.jetbrains.kotlin.ir.generator.IrTree.getValue\n */\nabstract class IrGetValue : IrValueAccessExpression() {\n override fun accept(visitor: IrElementVisitor, data: D): R =\n visitor.visitGetValue(this, data)\n\n abstract fun copyWithOffsets(newStartOffset: Int, newEndOffset: Int): IrGetValue", + "repo_full_name": "JetBrains/kotlin", + "discussion_comments": [ + { + "comment_id": "786266216", + "repo_full_name": "JetBrains/kotlin", + "pr_number": 4560, + "pr_file": "compiler/ir/ir.tree/gen/org/jetbrains/kotlin/ir/expressions/IrGetValue.kt", + "discussion_id": "786266216", + "commented_code": "@@ -0,0 +1,22 @@\n+/*\n+ * Copyright 2010-2021 JetBrains s.r.o. and Kotlin Programming Language contributors.\n+ * Use of this source code is governed by the Apache 2.0 license that can be found in the license/LICENSE.txt file.\n+ */\n+\n+//This file was generated automatically\n+//DO NOT MODIFY IT MANUALLY\n+\n+package org.jetbrains.kotlin.ir.expressions\n+\n+import org.jetbrains.kotlin.ir.visitors.IrElementVisitor\n+\n+/**\n+ * A leaf IR tree element.\n+ * @sample org.jetbrains.kotlin.ir.generator.IrTree.getValue\n+ */\n+abstract class IrGetValue : IrValueAccessExpression() {\n+ override fun accept(visitor: IrElementVisitor, data: D): R =\n+ visitor.visitGetValue(this, data)\n+\n+ abstract fun copyWithOffsets(newStartOffset: Int, newEndOffset: Int): IrGetValue", + "comment_created_at": "2022-01-17T20:02:16+00:00", + "comment_author": "bashor", + "comment_body": "Minor: probably it could be extracted to extension function, same as for IrConst", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/kotlin-keep-code-clearly-organized.md b/_reviewers/kotlin-keep-code-clearly-organized.md index 9707750..3c57d2b 100644 --- a/_reviewers/kotlin-keep-code-clearly-organized.md +++ b/_reviewers/kotlin-keep-code-clearly-organized.md @@ -58,173 +58,3 @@ private fun String.parseSign(start: Int): Pair { } } ``` - - -[ - { - "discussion_id": "1809231585", - "pr_number": 5364, - "pr_file": "libraries/stdlib/jvm/src/kotlin/text/StringNumberConversionsJVM.kt", - "created_at": "2024-10-21T17:37:16+00:00", - "commented_code": "public fun String.toBigDecimalOrNull(mathContext: java.math.MathContext): java.math.BigDecimal? =\n screenFloatValue(this) { it.toBigDecimal(mathContext) }\n\n/**\n * Recommended floating point number validation RegEx from the javadoc of `java.lang.Double.valueOf(String)`\n */\nprivate object ScreenFloatValueRegEx {\n @JvmField val value = run {\n val Digits = \"(\\\\p{Digit}+)\"\n val HexDigits = \"(\\\\p{XDigit}+)\"\n val Exp = \"[eE][+-]?$Digits\"\n\n val HexString = \"(0[xX]$HexDigits(\\\\.)?)|\" + // 0[xX] HexDigits ._opt BinaryExponent FloatTypeSuffix_opt\n \"(0[xX]$HexDigits?(\\\\.)$HexDigits)\" // 0[xX] HexDigits_opt . HexDigits BinaryExponent FloatTypeSuffix_opt\n\n val Number = \"($Digits(\\\\.)?($Digits?)($Exp)?)|\" + // Digits ._opt Digits_opt ExponentPart_opt FloatTypeSuffix_opt\n \"(\\\\.($Digits)($Exp)?)|\" + // . Digits ExponentPart_opt FloatTypeSuffix_opt\n \"(($HexString)[pP][+-]?$Digits)\" // HexSignificand BinaryExponent\n\n val fpRegex = \"[\\\\x00-\\\\x20]*[+-]?(NaN|Infinity|(($Number)[fFdD]?))[\\\\x00-\\\\x20]*\"\n\n Regex(fpRegex)\n }\n}\n\nprivate inline fun screenFloatValue(str: String, parse: (String) -> T): T? {\n return try {\n if (ScreenFloatValueRegEx.value.matches(str))\n if (isValidFloat(str))\n parse(str)\n else\n null\n } catch (e: NumberFormatException) { // overflow\n } catch (_: NumberFormatException) { // overflow\n null\n }\n}\n\nprivate const val LengthOfNaN = 2 // \"NaN\".length - 1\nprivate const val LengthOfInfinity = 7 // \"Infinity\".length - 1\n\nprivate fun isValidFloat(s: String): Boolean {\n // A float can have one of two representations:\n //\n // 1. Standard:\n // - With an integer part only: 1234\n // - With integer and fractional parts: 1234.4678\n // - With a fractional part only: .4678\n //\n // Optional sign prefix: + or -\n // Optional signed exponent: e or E, followed by optionally signed digits (+12, -12, 12)\n // Optional suffix: f, F, d, or D (for instance 12.34f or .34D)\n //\n // 2. Hexadecimal:\n // - With an integer part only: 0x12ab\n // - With integer and fractional parts: 0x12ab.CD78\n // - With a fractional part only: 0x.CD78\n //\n // Mandatory signed exponent: p or P, followed by optionally signed digits (+12, -12, 12)\n //\n // Optional sign prefix: + or -\n // Optional suffix: f, F, d, or D (for instance 12.34f or .34D)\n //\n // Two special cases:\n // \"NaN\" and \"Infinity\" strings, can have an optional sign prefix (+ or -)\n //\n // Implementation notes:\n // - The pattern \"myChar.code or 0x20 == 'x'.code\" is used to perform a case-insensitive\n // comparison of a character. Adding the 0x20 bit turns an upper case ASCII letter into\n // a lower case one.\n\n var start = 0\n var end = s.length - 1\n\n // Skip leading spaces\n while (start <= end && s[start].code <= 0x20) start++\n\n // Empty/whitespace string\n if (start > end) return false\n\n // Skip trailing spaces\n while (end > start && s[end].code <= 0x20) end--\n\n // Number starts with a positive or negative sign\n if (s[start] == '+' || s[start] == '-') start++\n // If we have nothing after the sign, the string is invalid\n if (start > end) return false\n\n var hasIntegerPart: Boolean\n var hasFractionalPart = false\n var isHex = false\n\n // Might be a hex string\n if (s[start] == '0') {\n start++\n // A \"0\" on its own is valid\n if (start > end) return true\n\n // Test for [xX] to see if we truly have a hex string\n if (s[start].code or 0x20 == 'x'.code) {", - "repo_full_name": "JetBrains/kotlin", - "discussion_comments": [ - { - "comment_id": "1809231585", - "repo_full_name": "JetBrains/kotlin", - "pr_number": 5364, - "pr_file": "libraries/stdlib/jvm/src/kotlin/text/StringNumberConversionsJVM.kt", - "discussion_id": "1809231585", - "commented_code": "@@ -239,35 +239,215 @@ public fun String.toBigDecimalOrNull(): java.math.BigDecimal? =\n public fun String.toBigDecimalOrNull(mathContext: java.math.MathContext): java.math.BigDecimal? =\n screenFloatValue(this) { it.toBigDecimal(mathContext) }\n \n-/**\n- * Recommended floating point number validation RegEx from the javadoc of `java.lang.Double.valueOf(String)`\n- */\n-private object ScreenFloatValueRegEx {\n- @JvmField val value = run {\n- val Digits = \"(\\\\p{Digit}+)\"\n- val HexDigits = \"(\\\\p{XDigit}+)\"\n- val Exp = \"[eE][+-]?$Digits\"\n-\n- val HexString = \"(0[xX]$HexDigits(\\\\.)?)|\" + // 0[xX] HexDigits ._opt BinaryExponent FloatTypeSuffix_opt\n- \"(0[xX]$HexDigits?(\\\\.)$HexDigits)\" // 0[xX] HexDigits_opt . HexDigits BinaryExponent FloatTypeSuffix_opt\n-\n- val Number = \"($Digits(\\\\.)?($Digits?)($Exp)?)|\" + // Digits ._opt Digits_opt ExponentPart_opt FloatTypeSuffix_opt\n- \"(\\\\.($Digits)($Exp)?)|\" + // . Digits ExponentPart_opt FloatTypeSuffix_opt\n- \"(($HexString)[pP][+-]?$Digits)\" // HexSignificand BinaryExponent\n-\n- val fpRegex = \"[\\\\x00-\\\\x20]*[+-]?(NaN|Infinity|(($Number)[fFdD]?))[\\\\x00-\\\\x20]*\"\n-\n- Regex(fpRegex)\n- }\n-}\n-\n private inline fun screenFloatValue(str: String, parse: (String) -> T): T? {\n return try {\n- if (ScreenFloatValueRegEx.value.matches(str))\n+ if (isValidFloat(str))\n parse(str)\n else\n null\n- } catch (e: NumberFormatException) { // overflow\n+ } catch (_: NumberFormatException) { // overflow\n null\n }\n }\n+\n+private const val LengthOfNaN = 2 // \"NaN\".length - 1\n+private const val LengthOfInfinity = 7 // \"Infinity\".length - 1\n+\n+private fun isValidFloat(s: String): Boolean {\n+ // A float can have one of two representations:\n+ //\n+ // 1. Standard:\n+ // - With an integer part only: 1234\n+ // - With integer and fractional parts: 1234.4678\n+ // - With a fractional part only: .4678\n+ //\n+ // Optional sign prefix: + or -\n+ // Optional signed exponent: e or E, followed by optionally signed digits (+12, -12, 12)\n+ // Optional suffix: f, F, d, or D (for instance 12.34f or .34D)\n+ //\n+ // 2. Hexadecimal:\n+ // - With an integer part only: 0x12ab\n+ // - With integer and fractional parts: 0x12ab.CD78\n+ // - With a fractional part only: 0x.CD78\n+ //\n+ // Mandatory signed exponent: p or P, followed by optionally signed digits (+12, -12, 12)\n+ //\n+ // Optional sign prefix: + or -\n+ // Optional suffix: f, F, d, or D (for instance 12.34f or .34D)\n+ //\n+ // Two special cases:\n+ // \"NaN\" and \"Infinity\" strings, can have an optional sign prefix (+ or -)\n+ //\n+ // Implementation notes:\n+ // - The pattern \"myChar.code or 0x20 == 'x'.code\" is used to perform a case-insensitive\n+ // comparison of a character. Adding the 0x20 bit turns an upper case ASCII letter into\n+ // a lower case one.\n+\n+ var start = 0\n+ var end = s.length - 1\n+\n+ // Skip leading spaces\n+ while (start <= end && s[start].code <= 0x20) start++\n+\n+ // Empty/whitespace string\n+ if (start > end) return false\n+\n+ // Skip trailing spaces\n+ while (end > start && s[end].code <= 0x20) end--\n+\n+ // Number starts with a positive or negative sign\n+ if (s[start] == '+' || s[start] == '-') start++\n+ // If we have nothing after the sign, the string is invalid\n+ if (start > end) return false\n+\n+ var hasIntegerPart: Boolean\n+ var hasFractionalPart = false\n+ var isHex = false\n+\n+ // Might be a hex string\n+ if (s[start] == '0') {\n+ start++\n+ // A \"0\" on its own is valid\n+ if (start > end) return true\n+\n+ // Test for [xX] to see if we truly have a hex string\n+ if (s[start].code or 0x20 == 'x'.code) {", - "comment_created_at": "2024-10-21T17:37:16+00:00", - "comment_author": "fzhinkin", - "comment_body": "`Char.code or 0x20` deserved its own private function.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1809238149", - "pr_number": 5364, - "pr_file": "libraries/stdlib/jvm/src/kotlin/text/StringNumberConversionsJVM.kt", - "created_at": "2024-10-21T17:42:44+00:00", - "commented_code": "public fun String.toBigDecimalOrNull(mathContext: java.math.MathContext): java.math.BigDecimal? =\n screenFloatValue(this) { it.toBigDecimal(mathContext) }\n\n/**\n * Recommended floating point number validation RegEx from the javadoc of `java.lang.Double.valueOf(String)`\n */\nprivate object ScreenFloatValueRegEx {\n @JvmField val value = run {\n val Digits = \"(\\\\p{Digit}+)\"\n val HexDigits = \"(\\\\p{XDigit}+)\"\n val Exp = \"[eE][+-]?$Digits\"\n\n val HexString = \"(0[xX]$HexDigits(\\\\.)?)|\" + // 0[xX] HexDigits ._opt BinaryExponent FloatTypeSuffix_opt\n \"(0[xX]$HexDigits?(\\\\.)$HexDigits)\" // 0[xX] HexDigits_opt . HexDigits BinaryExponent FloatTypeSuffix_opt\n\n val Number = \"($Digits(\\\\.)?($Digits?)($Exp)?)|\" + // Digits ._opt Digits_opt ExponentPart_opt FloatTypeSuffix_opt\n \"(\\\\.($Digits)($Exp)?)|\" + // . Digits ExponentPart_opt FloatTypeSuffix_opt\n \"(($HexString)[pP][+-]?$Digits)\" // HexSignificand BinaryExponent\n\n val fpRegex = \"[\\\\x00-\\\\x20]*[+-]?(NaN|Infinity|(($Number)[fFdD]?))[\\\\x00-\\\\x20]*\"\n\n Regex(fpRegex)\n }\n}\n\nprivate inline fun screenFloatValue(str: String, parse: (String) -> T): T? {\n return try {\n if (ScreenFloatValueRegEx.value.matches(str))\n if (isValidFloat(str))\n parse(str)\n else\n null\n } catch (e: NumberFormatException) { // overflow\n } catch (_: NumberFormatException) { // overflow\n null\n }\n}\n\nprivate const val LengthOfNaN = 2 // \"NaN\".length - 1\nprivate const val LengthOfInfinity = 7 // \"Infinity\".length - 1\n\nprivate fun isValidFloat(s: String): Boolean {\n // A float can have one of two representations:\n //\n // 1. Standard:\n // - With an integer part only: 1234\n // - With integer and fractional parts: 1234.4678\n // - With a fractional part only: .4678\n //\n // Optional sign prefix: + or -\n // Optional signed exponent: e or E, followed by optionally signed digits (+12, -12, 12)\n // Optional suffix: f, F, d, or D (for instance 12.34f or .34D)\n //\n // 2. Hexadecimal:\n // - With an integer part only: 0x12ab\n // - With integer and fractional parts: 0x12ab.CD78\n // - With a fractional part only: 0x.CD78\n //\n // Mandatory signed exponent: p or P, followed by optionally signed digits (+12, -12, 12)\n //\n // Optional sign prefix: + or -\n // Optional suffix: f, F, d, or D (for instance 12.34f or .34D)\n //\n // Two special cases:\n // \"NaN\" and \"Infinity\" strings, can have an optional sign prefix (+ or -)\n //\n // Implementation notes:\n // - The pattern \"myChar.code or 0x20 == 'x'.code\" is used to perform a case-insensitive\n // comparison of a character. Adding the 0x20 bit turns an upper case ASCII letter into\n // a lower case one.\n\n var start = 0\n var end = s.length - 1\n\n // Skip leading spaces\n while (start <= end && s[start].code <= 0x20) start++", - "repo_full_name": "JetBrains/kotlin", - "discussion_comments": [ - { - "comment_id": "1809238149", - "repo_full_name": "JetBrains/kotlin", - "pr_number": 5364, - "pr_file": "libraries/stdlib/jvm/src/kotlin/text/StringNumberConversionsJVM.kt", - "discussion_id": "1809238149", - "commented_code": "@@ -239,35 +239,215 @@ public fun String.toBigDecimalOrNull(): java.math.BigDecimal? =\n public fun String.toBigDecimalOrNull(mathContext: java.math.MathContext): java.math.BigDecimal? =\n screenFloatValue(this) { it.toBigDecimal(mathContext) }\n \n-/**\n- * Recommended floating point number validation RegEx from the javadoc of `java.lang.Double.valueOf(String)`\n- */\n-private object ScreenFloatValueRegEx {\n- @JvmField val value = run {\n- val Digits = \"(\\\\p{Digit}+)\"\n- val HexDigits = \"(\\\\p{XDigit}+)\"\n- val Exp = \"[eE][+-]?$Digits\"\n-\n- val HexString = \"(0[xX]$HexDigits(\\\\.)?)|\" + // 0[xX] HexDigits ._opt BinaryExponent FloatTypeSuffix_opt\n- \"(0[xX]$HexDigits?(\\\\.)$HexDigits)\" // 0[xX] HexDigits_opt . HexDigits BinaryExponent FloatTypeSuffix_opt\n-\n- val Number = \"($Digits(\\\\.)?($Digits?)($Exp)?)|\" + // Digits ._opt Digits_opt ExponentPart_opt FloatTypeSuffix_opt\n- \"(\\\\.($Digits)($Exp)?)|\" + // . Digits ExponentPart_opt FloatTypeSuffix_opt\n- \"(($HexString)[pP][+-]?$Digits)\" // HexSignificand BinaryExponent\n-\n- val fpRegex = \"[\\\\x00-\\\\x20]*[+-]?(NaN|Infinity|(($Number)[fFdD]?))[\\\\x00-\\\\x20]*\"\n-\n- Regex(fpRegex)\n- }\n-}\n-\n private inline fun screenFloatValue(str: String, parse: (String) -> T): T? {\n return try {\n- if (ScreenFloatValueRegEx.value.matches(str))\n+ if (isValidFloat(str))\n parse(str)\n else\n null\n- } catch (e: NumberFormatException) { // overflow\n+ } catch (_: NumberFormatException) { // overflow\n null\n }\n }\n+\n+private const val LengthOfNaN = 2 // \"NaN\".length - 1\n+private const val LengthOfInfinity = 7 // \"Infinity\".length - 1\n+\n+private fun isValidFloat(s: String): Boolean {\n+ // A float can have one of two representations:\n+ //\n+ // 1. Standard:\n+ // - With an integer part only: 1234\n+ // - With integer and fractional parts: 1234.4678\n+ // - With a fractional part only: .4678\n+ //\n+ // Optional sign prefix: + or -\n+ // Optional signed exponent: e or E, followed by optionally signed digits (+12, -12, 12)\n+ // Optional suffix: f, F, d, or D (for instance 12.34f or .34D)\n+ //\n+ // 2. Hexadecimal:\n+ // - With an integer part only: 0x12ab\n+ // - With integer and fractional parts: 0x12ab.CD78\n+ // - With a fractional part only: 0x.CD78\n+ //\n+ // Mandatory signed exponent: p or P, followed by optionally signed digits (+12, -12, 12)\n+ //\n+ // Optional sign prefix: + or -\n+ // Optional suffix: f, F, d, or D (for instance 12.34f or .34D)\n+ //\n+ // Two special cases:\n+ // \"NaN\" and \"Infinity\" strings, can have an optional sign prefix (+ or -)\n+ //\n+ // Implementation notes:\n+ // - The pattern \"myChar.code or 0x20 == 'x'.code\" is used to perform a case-insensitive\n+ // comparison of a character. Adding the 0x20 bit turns an upper case ASCII letter into\n+ // a lower case one.\n+\n+ var start = 0\n+ var end = s.length - 1\n+\n+ // Skip leading spaces\n+ while (start <= end && s[start].code <= 0x20) start++", - "comment_created_at": "2024-10-21T17:42:44+00:00", - "comment_author": "fzhinkin", - "comment_body": "Introduction of something like `inline fun String.advanceWhile(index: Int, endInclusive: Int, predicate: (Char) -> Boolean): Int` will shorten the whole function and will improve its readability:\r\n\r\n- `start = s.advanceWhile(start, end) { it.code <= 20 }`\r\n- `start = s.advanceWhile(start, end) { it.isAsciiDigit() || it.isHexLetter() }`\r\n- `start = s.advanceWhile(start, end) { it.isAsciiDigit() }`", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1809255640", - "pr_number": 5364, - "pr_file": "libraries/stdlib/jvm/src/kotlin/text/StringNumberConversionsJVM.kt", - "created_at": "2024-10-21T17:57:37+00:00", - "commented_code": "public fun String.toBigDecimalOrNull(mathContext: java.math.MathContext): java.math.BigDecimal? =\n screenFloatValue(this) { it.toBigDecimal(mathContext) }\n\n/**\n * Recommended floating point number validation RegEx from the javadoc of `java.lang.Double.valueOf(String)`\n */\nprivate object ScreenFloatValueRegEx {\n @JvmField val value = run {\n val Digits = \"(\\\\p{Digit}+)\"\n val HexDigits = \"(\\\\p{XDigit}+)\"\n val Exp = \"[eE][+-]?$Digits\"\n\n val HexString = \"(0[xX]$HexDigits(\\\\.)?)|\" + // 0[xX] HexDigits ._opt BinaryExponent FloatTypeSuffix_opt\n \"(0[xX]$HexDigits?(\\\\.)$HexDigits)\" // 0[xX] HexDigits_opt . HexDigits BinaryExponent FloatTypeSuffix_opt\n\n val Number = \"($Digits(\\\\.)?($Digits?)($Exp)?)|\" + // Digits ._opt Digits_opt ExponentPart_opt FloatTypeSuffix_opt\n \"(\\\\.($Digits)($Exp)?)|\" + // . Digits ExponentPart_opt FloatTypeSuffix_opt\n \"(($HexString)[pP][+-]?$Digits)\" // HexSignificand BinaryExponent\n\n val fpRegex = \"[\\\\x00-\\\\x20]*[+-]?(NaN|Infinity|(($Number)[fFdD]?))[\\\\x00-\\\\x20]*\"\n\n Regex(fpRegex)\n }\n}\n\nprivate inline fun screenFloatValue(str: String, parse: (String) -> T): T? {\n return try {\n if (ScreenFloatValueRegEx.value.matches(str))\n if (isValidFloat(str))\n parse(str)\n else\n null\n } catch (e: NumberFormatException) { // overflow\n } catch (_: NumberFormatException) { // overflow\n null\n }\n}\n\nprivate const val LengthOfNaN = 2 // \"NaN\".length - 1\nprivate const val LengthOfInfinity = 7 // \"Infinity\".length - 1\n\nprivate fun isValidFloat(s: String): Boolean {\n // A float can have one of two representations:\n //\n // 1. Standard:\n // - With an integer part only: 1234\n // - With integer and fractional parts: 1234.4678\n // - With a fractional part only: .4678\n //\n // Optional sign prefix: + or -\n // Optional signed exponent: e or E, followed by optionally signed digits (+12, -12, 12)\n // Optional suffix: f, F, d, or D (for instance 12.34f or .34D)\n //\n // 2. Hexadecimal:\n // - With an integer part only: 0x12ab\n // - With integer and fractional parts: 0x12ab.CD78\n // - With a fractional part only: 0x.CD78\n //\n // Mandatory signed exponent: p or P, followed by optionally signed digits (+12, -12, 12)\n //\n // Optional sign prefix: + or -\n // Optional suffix: f, F, d, or D (for instance 12.34f or .34D)\n //\n // Two special cases:\n // \"NaN\" and \"Infinity\" strings, can have an optional sign prefix (+ or -)\n //\n // Implementation notes:\n // - The pattern \"myChar.code or 0x20 == 'x'.code\" is used to perform a case-insensitive\n // comparison of a character. Adding the 0x20 bit turns an upper case ASCII letter into\n // a lower case one.\n\n var start = 0\n var end = s.length - 1\n\n // Skip leading spaces\n while (start <= end && s[start].code <= 0x20) start++\n\n // Empty/whitespace string\n if (start > end) return false\n\n // Skip trailing spaces\n while (end > start && s[end].code <= 0x20) end--\n\n // Number starts with a positive or negative sign\n if (s[start] == '+' || s[start] == '-') start++\n // If we have nothing after the sign, the string is invalid\n if (start > end) return false\n\n var hasIntegerPart: Boolean\n var hasFractionalPart = false\n var isHex = false\n\n // Might be a hex string\n if (s[start] == '0') {\n start++\n // A \"0\" on its own is valid\n if (start > end) return true\n\n // Test for [xX] to see if we truly have a hex string\n if (s[start].code or 0x20 == 'x'.code) {\n start++\n\n // Look for hex digits after the 0x prefix\n var checkpoint = start\n while (start <= end) {\n val d = s[start]\n if (d.isAsciiDigit() || d.isHexLetter()) {\n start++\n } else {\n break\n }\n }\n // Check if we found 0x*****, otherwise, the hex number might be of the\n // form 0x.*******\n hasIntegerPart = checkpoint != start\n\n // A hex string must have an exponent, the string is invalid if we only found an\n // integer part\n if (start > end) return false\n\n if (s[start] == '.') {\n start++\n\n // Look for hex digits for the fractional part\n checkpoint = start\n while (start <= end) {\n val d = s[start]\n if (d.isAsciiDigit() || d.isHexLetter()) {\n start++\n } else {\n break\n }\n }\n\n // Did we find a fractional part?\n hasFractionalPart = checkpoint != start\n }\n\n // A string must have an integer part, or a fractional part, or both\n if (!hasIntegerPart && !hasFractionalPart) return false\n\n // A hex string must have an exponent, the string is invalid if we only found an\n // integer and/or fractional part\n if (start > end) return false\n\n isHex = true\n } else {\n // Rewind the 0 we just parsed to make things easier below and try to parse a non-\n // hexadecimal string representation of a float\n start--\n }\n }\n\n // Parse a non-hexadecimal representations\n if (!isHex) {\n // Look for digits before the decimal separator, if any\n var checkpoint = start\n while (start <= end && s[start].isAsciiDigit()) start++\n\n // If there's no integer part, the float might be of the form .1234\n hasIntegerPart = checkpoint != start\n\n // A non-hexadecimal representation only needs an integer part, we can stop here\n if (start > end) return hasIntegerPart\n\n if (s[start] == '.') {\n start++\n\n // Look for the fractional part\n checkpoint = start\n while (start <= end && s[start].isAsciiDigit()) start++\n\n // Did we find a fractional part?\n hasFractionalPart = checkpoint != start\n }\n\n // A string must have an integer part, or a fractional part, or both\n if (!hasIntegerPart && !hasFractionalPart) {\n // Special case non-finite constants\n val constant = when (end) {", - "repo_full_name": "JetBrains/kotlin", - "discussion_comments": [ - { - "comment_id": "1809255640", - "repo_full_name": "JetBrains/kotlin", - "pr_number": 5364, - "pr_file": "libraries/stdlib/jvm/src/kotlin/text/StringNumberConversionsJVM.kt", - "discussion_id": "1809255640", - "commented_code": "@@ -239,35 +239,215 @@ public fun String.toBigDecimalOrNull(): java.math.BigDecimal? =\n public fun String.toBigDecimalOrNull(mathContext: java.math.MathContext): java.math.BigDecimal? =\n screenFloatValue(this) { it.toBigDecimal(mathContext) }\n \n-/**\n- * Recommended floating point number validation RegEx from the javadoc of `java.lang.Double.valueOf(String)`\n- */\n-private object ScreenFloatValueRegEx {\n- @JvmField val value = run {\n- val Digits = \"(\\\\p{Digit}+)\"\n- val HexDigits = \"(\\\\p{XDigit}+)\"\n- val Exp = \"[eE][+-]?$Digits\"\n-\n- val HexString = \"(0[xX]$HexDigits(\\\\.)?)|\" + // 0[xX] HexDigits ._opt BinaryExponent FloatTypeSuffix_opt\n- \"(0[xX]$HexDigits?(\\\\.)$HexDigits)\" // 0[xX] HexDigits_opt . HexDigits BinaryExponent FloatTypeSuffix_opt\n-\n- val Number = \"($Digits(\\\\.)?($Digits?)($Exp)?)|\" + // Digits ._opt Digits_opt ExponentPart_opt FloatTypeSuffix_opt\n- \"(\\\\.($Digits)($Exp)?)|\" + // . Digits ExponentPart_opt FloatTypeSuffix_opt\n- \"(($HexString)[pP][+-]?$Digits)\" // HexSignificand BinaryExponent\n-\n- val fpRegex = \"[\\\\x00-\\\\x20]*[+-]?(NaN|Infinity|(($Number)[fFdD]?))[\\\\x00-\\\\x20]*\"\n-\n- Regex(fpRegex)\n- }\n-}\n-\n private inline fun screenFloatValue(str: String, parse: (String) -> T): T? {\n return try {\n- if (ScreenFloatValueRegEx.value.matches(str))\n+ if (isValidFloat(str))\n parse(str)\n else\n null\n- } catch (e: NumberFormatException) { // overflow\n+ } catch (_: NumberFormatException) { // overflow\n null\n }\n }\n+\n+private const val LengthOfNaN = 2 // \"NaN\".length - 1\n+private const val LengthOfInfinity = 7 // \"Infinity\".length - 1\n+\n+private fun isValidFloat(s: String): Boolean {\n+ // A float can have one of two representations:\n+ //\n+ // 1. Standard:\n+ // - With an integer part only: 1234\n+ // - With integer and fractional parts: 1234.4678\n+ // - With a fractional part only: .4678\n+ //\n+ // Optional sign prefix: + or -\n+ // Optional signed exponent: e or E, followed by optionally signed digits (+12, -12, 12)\n+ // Optional suffix: f, F, d, or D (for instance 12.34f or .34D)\n+ //\n+ // 2. Hexadecimal:\n+ // - With an integer part only: 0x12ab\n+ // - With integer and fractional parts: 0x12ab.CD78\n+ // - With a fractional part only: 0x.CD78\n+ //\n+ // Mandatory signed exponent: p or P, followed by optionally signed digits (+12, -12, 12)\n+ //\n+ // Optional sign prefix: + or -\n+ // Optional suffix: f, F, d, or D (for instance 12.34f or .34D)\n+ //\n+ // Two special cases:\n+ // \"NaN\" and \"Infinity\" strings, can have an optional sign prefix (+ or -)\n+ //\n+ // Implementation notes:\n+ // - The pattern \"myChar.code or 0x20 == 'x'.code\" is used to perform a case-insensitive\n+ // comparison of a character. Adding the 0x20 bit turns an upper case ASCII letter into\n+ // a lower case one.\n+\n+ var start = 0\n+ var end = s.length - 1\n+\n+ // Skip leading spaces\n+ while (start <= end && s[start].code <= 0x20) start++\n+\n+ // Empty/whitespace string\n+ if (start > end) return false\n+\n+ // Skip trailing spaces\n+ while (end > start && s[end].code <= 0x20) end--\n+\n+ // Number starts with a positive or negative sign\n+ if (s[start] == '+' || s[start] == '-') start++\n+ // If we have nothing after the sign, the string is invalid\n+ if (start > end) return false\n+\n+ var hasIntegerPart: Boolean\n+ var hasFractionalPart = false\n+ var isHex = false\n+\n+ // Might be a hex string\n+ if (s[start] == '0') {\n+ start++\n+ // A \"0\" on its own is valid\n+ if (start > end) return true\n+\n+ // Test for [xX] to see if we truly have a hex string\n+ if (s[start].code or 0x20 == 'x'.code) {\n+ start++\n+\n+ // Look for hex digits after the 0x prefix\n+ var checkpoint = start\n+ while (start <= end) {\n+ val d = s[start]\n+ if (d.isAsciiDigit() || d.isHexLetter()) {\n+ start++\n+ } else {\n+ break\n+ }\n+ }\n+ // Check if we found 0x*****, otherwise, the hex number might be of the\n+ // form 0x.*******\n+ hasIntegerPart = checkpoint != start\n+\n+ // A hex string must have an exponent, the string is invalid if we only found an\n+ // integer part\n+ if (start > end) return false\n+\n+ if (s[start] == '.') {\n+ start++\n+\n+ // Look for hex digits for the fractional part\n+ checkpoint = start\n+ while (start <= end) {\n+ val d = s[start]\n+ if (d.isAsciiDigit() || d.isHexLetter()) {\n+ start++\n+ } else {\n+ break\n+ }\n+ }\n+\n+ // Did we find a fractional part?\n+ hasFractionalPart = checkpoint != start\n+ }\n+\n+ // A string must have an integer part, or a fractional part, or both\n+ if (!hasIntegerPart && !hasFractionalPart) return false\n+\n+ // A hex string must have an exponent, the string is invalid if we only found an\n+ // integer and/or fractional part\n+ if (start > end) return false\n+\n+ isHex = true\n+ } else {\n+ // Rewind the 0 we just parsed to make things easier below and try to parse a non-\n+ // hexadecimal string representation of a float\n+ start--\n+ }\n+ }\n+\n+ // Parse a non-hexadecimal representations\n+ if (!isHex) {\n+ // Look for digits before the decimal separator, if any\n+ var checkpoint = start\n+ while (start <= end && s[start].isAsciiDigit()) start++\n+\n+ // If there's no integer part, the float might be of the form .1234\n+ hasIntegerPart = checkpoint != start\n+\n+ // A non-hexadecimal representation only needs an integer part, we can stop here\n+ if (start > end) return hasIntegerPart\n+\n+ if (s[start] == '.') {\n+ start++\n+\n+ // Look for the fractional part\n+ checkpoint = start\n+ while (start <= end && s[start].isAsciiDigit()) start++\n+\n+ // Did we find a fractional part?\n+ hasFractionalPart = checkpoint != start\n+ }\n+\n+ // A string must have an integer part, or a fractional part, or both\n+ if (!hasIntegerPart && !hasFractionalPart) {\n+ // Special case non-finite constants\n+ val constant = when (end) {", - "comment_created_at": "2024-10-21T17:57:37+00:00", - "comment_author": "fzhinkin", - "comment_body": "For the sake of readability, I suggest outlining special-case handling to a separate function as well.\r\n`LengthOfXXX` could then be defined inside the function (They are not lengths, so it would be better to make their scope as narrow as possible).\r\n\r\n> For the sake of readability\r\n\r\nDon't get me wrong: the function is well written, there's just too much context to keep in mind while reading the code.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1081109610", - "pr_number": 5050, - "pr_file": "compiler/ir/backend.common/src/org/jetbrains/kotlin/backend/common/lower/inline/FunctionInlining.kt", - "created_at": "2023-01-19T11:02:06+00:00", - "commented_code": "return@forEach\n }\n\n if (argument.isImmutableVariableLoad) {\n substituteMap[argument.parameter] =\n argument.argumentExpression.transform( // Arguments may reference the previous ones - substitute them.\n substitutor,\n data = null\n )\n // Arguments may reference the previous ones - substitute them.\n val variableInitializer = argument.argumentExpression.transform(substitutor, data = null)\n val parameter = argument.parameter\n\n fun substituteParameterViaTemporaryVariable() {\n val newVariable = createTemporaryVariable(parameter, variableInitializer, callee)\n evaluationStatements.add(newVariable)\n substituteMap[parameter] = IrGetValueWithoutLocation(newVariable.symbol)\n }\n\n if (alwaysCreateTemporaryVariablesForArguments && !parameter.isInlineParameter()) {\n substituteParameterViaTemporaryVariable()\n return@forEach\n }\n\n // Arguments may reference the previous ones - substitute them.\n val variableInitializer = argument.argumentExpression.transform(substitutor, data = null)\n if (argument.isImmutableVariableLoad) {\n substituteMap[parameter] = variableInitializer\n return@forEach\n }\n\n val argumentExtracted = !argument.argumentExpression.isPure(false, context = context)", - "repo_full_name": "JetBrains/kotlin", - "discussion_comments": [ - { - "comment_id": "1081109610", - "repo_full_name": "JetBrains/kotlin", - "pr_number": 5050, - "pr_file": "compiler/ir/backend.common/src/org/jetbrains/kotlin/backend/common/lower/inline/FunctionInlining.kt", - "discussion_id": "1081109610", - "commented_code": "@@ -602,43 +603,61 @@ class FunctionInlining(\n return@forEach\n }\n \n- if (argument.isImmutableVariableLoad) {\n- substituteMap[argument.parameter] =\n- argument.argumentExpression.transform( // Arguments may reference the previous ones - substitute them.\n- substitutor,\n- data = null\n- )\n+ // Arguments may reference the previous ones - substitute them.\n+ val variableInitializer = argument.argumentExpression.transform(substitutor, data = null)\n+ val parameter = argument.parameter\n+\n+ fun substituteParameterViaTemporaryVariable() {\n+ val newVariable = createTemporaryVariable(parameter, variableInitializer, callee)\n+ evaluationStatements.add(newVariable)\n+ substituteMap[parameter] = IrGetValueWithoutLocation(newVariable.symbol)\n+ }\n+\n+ if (alwaysCreateTemporaryVariablesForArguments && !parameter.isInlineParameter()) {\n+ substituteParameterViaTemporaryVariable()\n return@forEach\n }\n \n- // Arguments may reference the previous ones - substitute them.\n- val variableInitializer = argument.argumentExpression.transform(substitutor, data = null)\n+ if (argument.isImmutableVariableLoad) {\n+ substituteMap[parameter] = variableInitializer\n+ return@forEach\n+ }\n \n val argumentExtracted = !argument.argumentExpression.isPure(false, context = context)", - "comment_created_at": "2023-01-19T11:02:06+00:00", - "comment_author": "kunyavskiy", - "comment_body": "For now, in fact we have to cases \n1. Directly substitute\n2. Create temporary variable. \n\nFor now logic on when which is happening is distributed all over the function code. It would be nice to collect it in one place. Like add a variable \"shouldInlineArgumentValue\", and just check over it and the end, without returns in the middle of function. \n\nMaybe it can be event extracted to a separate function.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "964684049", - "pr_number": 4933, - "pr_file": "plugins/value-container-assignment/value-container-assignment.k2/src/org/jetbrains/kotlin/container/assignment/k2/FirValueContainerAssignmentAltererExtension.kt", - "created_at": "2022-09-07T10:46:41+00:00", - "commented_code": "/*\n * Copyright 2010-2022 JetBrains s.r.o. and Kotlin Programming Language contributors.\n * Use of this source code is governed by the Apache 2.0 license that can be found in the license/LICENSE.txt file.\n */\n\npackage org.jetbrains.kotlin.container.assignment.k2\n\nimport org.jetbrains.kotlin.KtFakeSourceElementKind\nimport org.jetbrains.kotlin.fakeElement\nimport org.jetbrains.kotlin.fir.FirSession\nimport org.jetbrains.kotlin.fir.expressions.*\nimport org.jetbrains.kotlin.fir.expressions.builder.buildFunctionCall\nimport org.jetbrains.kotlin.fir.expressions.builder.buildPropertyAccessExpression\nimport org.jetbrains.kotlin.fir.extensions.FirAssignExpressionAltererExtension\nimport org.jetbrains.kotlin.fir.references.builder.buildSimpleNamedReference\nimport org.jetbrains.kotlin.fir.resolvedSymbol\nimport org.jetbrains.kotlin.fir.symbols.impl.FirPropertySymbol\nimport org.jetbrains.kotlin.fir.types.toRegularClassSymbol\nimport org.jetbrains.kotlin.name.Name\n\nclass FirValueContainerAssignmentAltererExtension(\n session: FirSession\n) : FirAssignExpressionAltererExtension(session) {\n\n companion object {\n val ASSIGN_METHOD = Name.identifier(\"assign\")", - "repo_full_name": "JetBrains/kotlin", - "discussion_comments": [ - { - "comment_id": "964684049", - "repo_full_name": "JetBrains/kotlin", - "pr_number": 4933, - "pr_file": "plugins/value-container-assignment/value-container-assignment.k2/src/org/jetbrains/kotlin/container/assignment/k2/FirValueContainerAssignmentAltererExtension.kt", - "discussion_id": "964684049", - "commented_code": "@@ -0,0 +1,72 @@\n+/*\n+ * Copyright 2010-2022 JetBrains s.r.o. and Kotlin Programming Language contributors.\n+ * Use of this source code is governed by the Apache 2.0 license that can be found in the license/LICENSE.txt file.\n+ */\n+\n+package org.jetbrains.kotlin.container.assignment.k2\n+\n+import org.jetbrains.kotlin.KtFakeSourceElementKind\n+import org.jetbrains.kotlin.fakeElement\n+import org.jetbrains.kotlin.fir.FirSession\n+import org.jetbrains.kotlin.fir.expressions.*\n+import org.jetbrains.kotlin.fir.expressions.builder.buildFunctionCall\n+import org.jetbrains.kotlin.fir.expressions.builder.buildPropertyAccessExpression\n+import org.jetbrains.kotlin.fir.extensions.FirAssignExpressionAltererExtension\n+import org.jetbrains.kotlin.fir.references.builder.buildSimpleNamedReference\n+import org.jetbrains.kotlin.fir.resolvedSymbol\n+import org.jetbrains.kotlin.fir.symbols.impl.FirPropertySymbol\n+import org.jetbrains.kotlin.fir.types.toRegularClassSymbol\n+import org.jetbrains.kotlin.name.Name\n+\n+class FirValueContainerAssignmentAltererExtension(\n+ session: FirSession\n+) : FirAssignExpressionAltererExtension(session) {\n+\n+ companion object {\n+ val ASSIGN_METHOD = Name.identifier(\"assign\")", - "comment_created_at": "2022-09-07T10:46:41+00:00", - "comment_author": "demiurg906", - "comment_body": "It's worth to exctract this constant into `.common` module", - "pr_file_module": null - }, - { - "comment_id": "966271057", - "repo_full_name": "JetBrains/kotlin", - "pr_number": 4933, - "pr_file": "plugins/value-container-assignment/value-container-assignment.k2/src/org/jetbrains/kotlin/container/assignment/k2/FirValueContainerAssignmentAltererExtension.kt", - "discussion_id": "964684049", - "commented_code": "@@ -0,0 +1,72 @@\n+/*\n+ * Copyright 2010-2022 JetBrains s.r.o. and Kotlin Programming Language contributors.\n+ * Use of this source code is governed by the Apache 2.0 license that can be found in the license/LICENSE.txt file.\n+ */\n+\n+package org.jetbrains.kotlin.container.assignment.k2\n+\n+import org.jetbrains.kotlin.KtFakeSourceElementKind\n+import org.jetbrains.kotlin.fakeElement\n+import org.jetbrains.kotlin.fir.FirSession\n+import org.jetbrains.kotlin.fir.expressions.*\n+import org.jetbrains.kotlin.fir.expressions.builder.buildFunctionCall\n+import org.jetbrains.kotlin.fir.expressions.builder.buildPropertyAccessExpression\n+import org.jetbrains.kotlin.fir.extensions.FirAssignExpressionAltererExtension\n+import org.jetbrains.kotlin.fir.references.builder.buildSimpleNamedReference\n+import org.jetbrains.kotlin.fir.resolvedSymbol\n+import org.jetbrains.kotlin.fir.symbols.impl.FirPropertySymbol\n+import org.jetbrains.kotlin.fir.types.toRegularClassSymbol\n+import org.jetbrains.kotlin.name.Name\n+\n+class FirValueContainerAssignmentAltererExtension(\n+ session: FirSession\n+) : FirAssignExpressionAltererExtension(session) {\n+\n+ companion object {\n+ val ASSIGN_METHOD = Name.identifier(\"assign\")", - "comment_created_at": "2022-09-08T18:06:03+00:00", - "comment_author": "asodja", - "comment_body": "Extracted to .common", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "893088032", - "pr_number": 4835, - "pr_file": "plugins/atomicfu/atomicfu-compiler/src/org/jetbrains/kotlinx/atomicfu/compiler/backend/jvm/AtomicfuJvmIrTransformer.kt", - "created_at": "2022-06-09T05:37:45+00:00", - "commented_code": "/*\n * Copyright 2010-2022 JetBrains s.r.o. and Kotlin Programming Language contributors.\n * Use of this source code is governed by the Apache 2.0 license that can be found in the license/LICENSE.txt file.\n */\n\npackage org.jetbrains.kotlinx.atomicfu.compiler.backend.jvm\n\nimport org.jetbrains.kotlin.backend.common.extensions.*\nimport org.jetbrains.kotlin.backend.jvm.ir.*\nimport org.jetbrains.kotlin.ir.*\nimport org.jetbrains.kotlin.ir.builders.declarations.*\nimport org.jetbrains.kotlin.ir.builders.*\nimport org.jetbrains.kotlin.ir.declarations.*\nimport org.jetbrains.kotlin.ir.declarations.impl.*\nimport org.jetbrains.kotlin.ir.expressions.*\nimport org.jetbrains.kotlin.ir.expressions.impl.*\nimport org.jetbrains.kotlin.ir.symbols.*\nimport org.jetbrains.kotlin.ir.types.*\nimport org.jetbrains.kotlin.ir.util.*\nimport org.jetbrains.kotlin.ir.visitors.*\nimport org.jetbrains.kotlin.name.*\nimport org.jetbrains.kotlin.util.capitalizeDecapitalize.*\nimport org.jetbrains.kotlinx.atomicfu.compiler.backend.*\nimport kotlin.collections.set\n\nprivate const val AFU_PKG = \"kotlinx.atomicfu\"\nprivate const val TRACE_BASE_TYPE = \"TraceBase\"\nprivate const val ATOMIC_VALUE_FACTORY = \"atomic\"\nprivate const val INVOKE = \"invoke\"\nprivate const val APPEND = \"append\"\nprivate const val GET = \"get\"\nprivate const val ATOMICFU = \"atomicfu\"\nprivate const val ATOMIC_ARRAY_RECEIVER_SUFFIX = \"\\$array\"\nprivate const val DISPATCH_RECEIVER = \"${ATOMICFU}\\$dispatchReceiver\"\nprivate const val ATOMIC_HANDLER = \"${ATOMICFU}\\$handler\"\nprivate const val ACTION = \"${ATOMICFU}\\$action\"\nprivate const val INDEX = \"${ATOMICFU}\\$index\"\nprivate const val VOLATILE_WRAPPER_SUFFIX = \"\\$VolatileWrapper\"\nprivate const val LOOP = \"loop\"\nprivate const val UPDATE = \"update\"\n\nclass AtomicfuJvmIrTransformer(\n private val context: IrPluginContext,\n private val atomicSymbols: AtomicSymbols\n) {\n private val irBuiltIns = context.irBuiltIns\n\n private val AFU_VALUE_TYPES: Map = mapOf(\n \"AtomicInt\" to irBuiltIns.intType,\n \"AtomicLong\" to irBuiltIns.longType,\n \"AtomicBoolean\" to irBuiltIns.booleanType,\n \"AtomicRef\" to irBuiltIns.anyNType\n )\n\n private val ATOMICFU_INLINE_FUNCTIONS = setOf(\"loop\", \"update\", \"getAndUpdate\", \"updateAndGet\")\n protected val ATOMIC_VALUE_TYPES = setOf(\"AtomicInt\", \"AtomicLong\", \"AtomicBoolean\", \"AtomicRef\")\n protected val ATOMIC_ARRAY_TYPES = setOf(\"AtomicIntArray\", \"AtomicLongArray\", \"AtomicBooleanArray\", \"AtomicArray\")\n\n fun transform(moduleFragment: IrModuleFragment) {\n transformAtomicFields(moduleFragment)\n transformAtomicExtensions(moduleFragment)\n transformAtomicfuDeclarations(moduleFragment)\n moduleFragment.files.forEach { irFile ->\n irFile.patchDeclarationParents()\n }\n }\n\n private fun transformAtomicFields(moduleFragment: IrModuleFragment) {\n moduleFragment.files.forEach { irFile ->\n irFile.transform(AtomicHandlerTransformer(), null)\n }\n }\n\n private fun transformAtomicExtensions(moduleFragment: IrModuleFragment) {\n moduleFragment.files.forEach { irFile ->\n irFile.transform(AtomicExtensionTransformer(), null)\n }\n }\n\n private fun transformAtomicfuDeclarations(moduleFragment: IrModuleFragment) {\n moduleFragment.files.forEach { irFile ->\n irFile.transform(AtomicfuTransformer(), null)\n }\n }\n\n private val propertyToAtomicHandler = mutableMapOf()\n\n private inner class AtomicHandlerTransformer : IrElementTransformer {\n override fun visitClass(declaration: IrClass, data: IrFunction?): IrStatement {\n declaration.declarations.filter(::fromKotlinxAtomicfu).forEach {\n (it as IrProperty).transformAtomicfuProperty(declaration)\n }\n return super.visitClass(declaration, data)\n }\n\n override fun visitFile(declaration: IrFile, data: IrFunction?): IrFile {\n declaration.declarations.filter(::fromKotlinxAtomicfu).forEach {\n (it as IrProperty).transformAtomicfuProperty(declaration)\n }\n return super.visitFile(declaration, data)\n }\n\n private fun IrProperty.transformAtomicfuProperty(parent: IrDeclarationContainer) {\n val isTopLevel = parent is IrFile\n when {\n isAtomic() -> {\n if (isTopLevel) {\n val parentClass = generateWrapperClass(this, parent)\n transformAtomicProperty(parentClass)\n moveFromFileToClass(parent as IrFile, parentClass)\n } else {\n transformAtomicProperty(parent as IrClass)\n }\n }\n isDelegatedToAtomic() -> transformDelegatedProperty(parent)\n isAtomicArray() -> transformAtomicArrayProperty(parent)\n isTrace() -> parent.declarations.remove(this)\n else -> {}\n }\n }\n\n private fun IrProperty.moveFromFileToClass(\n parentFile: IrFile,\n parentClass: IrClass\n ) {\n parentFile.declarations.remove(this)\n parentClass.declarations.add(this)\n parent = parentClass\n }\n\n private fun IrProperty.transformAtomicProperty(parentClass: IrClass) {\n // Atomic property transformation:\n // 1. replace it's backingField with a volatile property of atomic value type\n // 2. create j.u.c.a.Atomic*FieldUpdater for this volatile property to handle it's value atomically\n // val a = atomic(0) ->\n // volatile var a: Int = 0\n // val a$FU = AtomicIntegerFieldUpdater.newUpdater(parentClass, \"a\")\n //\n // Top-level atomic properties transformation:\n // 1. replace it's backingField with a volatile property of atomic value type\n // 2. wrap this volatile property into the generated class\n // 3. create j.u.c.a.Atomic*FieldUpdater for the volatile property to handle it's value atomically\n // val a = atomic(0) ->\n // class A$ParentFile$VolatileWrapper { volatile var a: Int = 0 }\n // val a$FU = AtomicIntegerFieldUpdater.newUpdater(A$ParentFile$VolatileWrapper::class, \"a\")\n backingField = buildVolatileRawField(this, parentClass)\n // update property accessors\n context.addDefaultGetter(this, parentClass)\n val fieldUpdater = addJucaAFUProperty(this, parentClass)\n registerAtomicHandler(fieldUpdater)\n }\n\n private fun IrProperty.transformAtomicArrayProperty(parent: IrDeclarationContainer) {\n // Replace atomicfu array classes with the corresponding atomic arrays from j.u.c.a.:\n // val intArr = atomicArrayOfNulls(5) ->\n // val intArr = AtomicReferenceArray(5)\n backingField = buildJucaArrayField(this, parent)\n // update property accessors\n context.addDefaultGetter(this, parent)\n registerAtomicHandler(this)\n }\n\n private fun IrProperty.transformDelegatedProperty(parentClass: IrDeclarationContainer) {\n backingField?.let {\n it.initializer?.let {\n val initializer = it.expression as IrCall\n if (initializer.isAtomicFactory()) {\n // Property delegated to atomic factory invocation:\n // 1. replace it's backingField with a volatile property of value type\n // 2. transform getter/setter\n // var a by atomic(0) ->\n // volatile var a: Int = 0\n val volatileField = buildVolatileRawField(this, parentClass)\n backingField = volatileField\n getter?.transformAccessor(volatileField, getter?.dispatchReceiverParameter?.capture())\n setter?.transformAccessor(volatileField, setter?.dispatchReceiverParameter?.capture())\n } else {\n // Property delegated to the atomic property:\n // 1. delegate it's accessors to get/set of the backingField of the atomic delegate\n // (that is already transformed to a volatile field of value type)\n // val _a = atomic(0)\n // var a by _a ->\n // volatile var _a: Int = 0\n // var a by _a\n val atomicProperty = initializer.getCorrespondingProperty()\n val volatileField = atomicProperty.backingField!!\n backingField = null\n if (atomicProperty.isTopLevel()) {\n atomicSymbols.createBuilder(symbol).run {\n val parent = getStaticVolatileWrapperInstance(atomicProperty)\n getter?.transformAccessor(volatileField, getProperty(parent, null))\n setter?.transformAccessor(volatileField, getProperty(parent, null))\n }\n } else {\n if (this.parent == atomicProperty.parent) {\n //class A {\n // val _a = atomic()\n // var a by _a\n //}\n getter?.transformAccessor(volatileField, getter?.dispatchReceiverParameter?.capture())\n setter?.transformAccessor(volatileField, setter?.dispatchReceiverParameter?.capture())\n } else {\n //class A {\n // val _a = atomic()\n // inner class B {\n // var a by _a\n // }\n //}\n val thisReceiver = atomicProperty.parentAsClass.thisReceiver\n getter?.transformAccessor(volatileField, thisReceiver?.capture())\n setter?.transformAccessor(volatileField, thisReceiver?.capture())\n }\n }\n }\n }\n }\n }\n\n private fun IrFunction.transformAccessor(volatileField: IrField, parent: IrExpression?) {\n val accessor = this\n atomicSymbols.createBuilder(symbol).run {", - "repo_full_name": "JetBrains/kotlin", - "discussion_comments": [ - { - "comment_id": "893088032", - "repo_full_name": "JetBrains/kotlin", - "pr_number": 4835, - "pr_file": "plugins/atomicfu/atomicfu-compiler/src/org/jetbrains/kotlinx/atomicfu/compiler/backend/jvm/AtomicfuJvmIrTransformer.kt", - "discussion_id": "893088032", - "commented_code": "@@ -0,0 +1,825 @@\n+/*\n+ * Copyright 2010-2022 JetBrains s.r.o. and Kotlin Programming Language contributors.\n+ * Use of this source code is governed by the Apache 2.0 license that can be found in the license/LICENSE.txt file.\n+ */\n+\n+package org.jetbrains.kotlinx.atomicfu.compiler.backend.jvm\n+\n+import org.jetbrains.kotlin.backend.common.extensions.*\n+import org.jetbrains.kotlin.backend.jvm.ir.*\n+import org.jetbrains.kotlin.ir.*\n+import org.jetbrains.kotlin.ir.builders.declarations.*\n+import org.jetbrains.kotlin.ir.builders.*\n+import org.jetbrains.kotlin.ir.declarations.*\n+import org.jetbrains.kotlin.ir.declarations.impl.*\n+import org.jetbrains.kotlin.ir.expressions.*\n+import org.jetbrains.kotlin.ir.expressions.impl.*\n+import org.jetbrains.kotlin.ir.symbols.*\n+import org.jetbrains.kotlin.ir.types.*\n+import org.jetbrains.kotlin.ir.util.*\n+import org.jetbrains.kotlin.ir.visitors.*\n+import org.jetbrains.kotlin.name.*\n+import org.jetbrains.kotlin.util.capitalizeDecapitalize.*\n+import org.jetbrains.kotlinx.atomicfu.compiler.backend.*\n+import kotlin.collections.set\n+\n+private const val AFU_PKG = \"kotlinx.atomicfu\"\n+private const val TRACE_BASE_TYPE = \"TraceBase\"\n+private const val ATOMIC_VALUE_FACTORY = \"atomic\"\n+private const val INVOKE = \"invoke\"\n+private const val APPEND = \"append\"\n+private const val GET = \"get\"\n+private const val ATOMICFU = \"atomicfu\"\n+private const val ATOMIC_ARRAY_RECEIVER_SUFFIX = \"\\$array\"\n+private const val DISPATCH_RECEIVER = \"${ATOMICFU}\\$dispatchReceiver\"\n+private const val ATOMIC_HANDLER = \"${ATOMICFU}\\$handler\"\n+private const val ACTION = \"${ATOMICFU}\\$action\"\n+private const val INDEX = \"${ATOMICFU}\\$index\"\n+private const val VOLATILE_WRAPPER_SUFFIX = \"\\$VolatileWrapper\"\n+private const val LOOP = \"loop\"\n+private const val UPDATE = \"update\"\n+\n+class AtomicfuJvmIrTransformer(\n+ private val context: IrPluginContext,\n+ private val atomicSymbols: AtomicSymbols\n+) {\n+ private val irBuiltIns = context.irBuiltIns\n+\n+ private val AFU_VALUE_TYPES: Map = mapOf(\n+ \"AtomicInt\" to irBuiltIns.intType,\n+ \"AtomicLong\" to irBuiltIns.longType,\n+ \"AtomicBoolean\" to irBuiltIns.booleanType,\n+ \"AtomicRef\" to irBuiltIns.anyNType\n+ )\n+\n+ private val ATOMICFU_INLINE_FUNCTIONS = setOf(\"loop\", \"update\", \"getAndUpdate\", \"updateAndGet\")\n+ protected val ATOMIC_VALUE_TYPES = setOf(\"AtomicInt\", \"AtomicLong\", \"AtomicBoolean\", \"AtomicRef\")\n+ protected val ATOMIC_ARRAY_TYPES = setOf(\"AtomicIntArray\", \"AtomicLongArray\", \"AtomicBooleanArray\", \"AtomicArray\")\n+\n+ fun transform(moduleFragment: IrModuleFragment) {\n+ transformAtomicFields(moduleFragment)\n+ transformAtomicExtensions(moduleFragment)\n+ transformAtomicfuDeclarations(moduleFragment)\n+ moduleFragment.files.forEach { irFile ->\n+ irFile.patchDeclarationParents()\n+ }\n+ }\n+\n+ private fun transformAtomicFields(moduleFragment: IrModuleFragment) {\n+ moduleFragment.files.forEach { irFile ->\n+ irFile.transform(AtomicHandlerTransformer(), null)\n+ }\n+ }\n+\n+ private fun transformAtomicExtensions(moduleFragment: IrModuleFragment) {\n+ moduleFragment.files.forEach { irFile ->\n+ irFile.transform(AtomicExtensionTransformer(), null)\n+ }\n+ }\n+\n+ private fun transformAtomicfuDeclarations(moduleFragment: IrModuleFragment) {\n+ moduleFragment.files.forEach { irFile ->\n+ irFile.transform(AtomicfuTransformer(), null)\n+ }\n+ }\n+\n+ private val propertyToAtomicHandler = mutableMapOf()\n+\n+ private inner class AtomicHandlerTransformer : IrElementTransformer {\n+ override fun visitClass(declaration: IrClass, data: IrFunction?): IrStatement {\n+ declaration.declarations.filter(::fromKotlinxAtomicfu).forEach {\n+ (it as IrProperty).transformAtomicfuProperty(declaration)\n+ }\n+ return super.visitClass(declaration, data)\n+ }\n+\n+ override fun visitFile(declaration: IrFile, data: IrFunction?): IrFile {\n+ declaration.declarations.filter(::fromKotlinxAtomicfu).forEach {\n+ (it as IrProperty).transformAtomicfuProperty(declaration)\n+ }\n+ return super.visitFile(declaration, data)\n+ }\n+\n+ private fun IrProperty.transformAtomicfuProperty(parent: IrDeclarationContainer) {\n+ val isTopLevel = parent is IrFile\n+ when {\n+ isAtomic() -> {\n+ if (isTopLevel) {\n+ val parentClass = generateWrapperClass(this, parent)\n+ transformAtomicProperty(parentClass)\n+ moveFromFileToClass(parent as IrFile, parentClass)\n+ } else {\n+ transformAtomicProperty(parent as IrClass)\n+ }\n+ }\n+ isDelegatedToAtomic() -> transformDelegatedProperty(parent)\n+ isAtomicArray() -> transformAtomicArrayProperty(parent)\n+ isTrace() -> parent.declarations.remove(this)\n+ else -> {}\n+ }\n+ }\n+\n+ private fun IrProperty.moveFromFileToClass(\n+ parentFile: IrFile,\n+ parentClass: IrClass\n+ ) {\n+ parentFile.declarations.remove(this)\n+ parentClass.declarations.add(this)\n+ parent = parentClass\n+ }\n+\n+ private fun IrProperty.transformAtomicProperty(parentClass: IrClass) {\n+ // Atomic property transformation:\n+ // 1. replace it's backingField with a volatile property of atomic value type\n+ // 2. create j.u.c.a.Atomic*FieldUpdater for this volatile property to handle it's value atomically\n+ // val a = atomic(0) ->\n+ // volatile var a: Int = 0\n+ // val a$FU = AtomicIntegerFieldUpdater.newUpdater(parentClass, \"a\")\n+ //\n+ // Top-level atomic properties transformation:\n+ // 1. replace it's backingField with a volatile property of atomic value type\n+ // 2. wrap this volatile property into the generated class\n+ // 3. create j.u.c.a.Atomic*FieldUpdater for the volatile property to handle it's value atomically\n+ // val a = atomic(0) ->\n+ // class A$ParentFile$VolatileWrapper { volatile var a: Int = 0 }\n+ // val a$FU = AtomicIntegerFieldUpdater.newUpdater(A$ParentFile$VolatileWrapper::class, \"a\")\n+ backingField = buildVolatileRawField(this, parentClass)\n+ // update property accessors\n+ context.addDefaultGetter(this, parentClass)\n+ val fieldUpdater = addJucaAFUProperty(this, parentClass)\n+ registerAtomicHandler(fieldUpdater)\n+ }\n+\n+ private fun IrProperty.transformAtomicArrayProperty(parent: IrDeclarationContainer) {\n+ // Replace atomicfu array classes with the corresponding atomic arrays from j.u.c.a.:\n+ // val intArr = atomicArrayOfNulls(5) ->\n+ // val intArr = AtomicReferenceArray(5)\n+ backingField = buildJucaArrayField(this, parent)\n+ // update property accessors\n+ context.addDefaultGetter(this, parent)\n+ registerAtomicHandler(this)\n+ }\n+\n+ private fun IrProperty.transformDelegatedProperty(parentClass: IrDeclarationContainer) {\n+ backingField?.let {\n+ it.initializer?.let {\n+ val initializer = it.expression as IrCall\n+ if (initializer.isAtomicFactory()) {\n+ // Property delegated to atomic factory invocation:\n+ // 1. replace it's backingField with a volatile property of value type\n+ // 2. transform getter/setter\n+ // var a by atomic(0) ->\n+ // volatile var a: Int = 0\n+ val volatileField = buildVolatileRawField(this, parentClass)\n+ backingField = volatileField\n+ getter?.transformAccessor(volatileField, getter?.dispatchReceiverParameter?.capture())\n+ setter?.transformAccessor(volatileField, setter?.dispatchReceiverParameter?.capture())\n+ } else {\n+ // Property delegated to the atomic property:\n+ // 1. delegate it's accessors to get/set of the backingField of the atomic delegate\n+ // (that is already transformed to a volatile field of value type)\n+ // val _a = atomic(0)\n+ // var a by _a ->\n+ // volatile var _a: Int = 0\n+ // var a by _a\n+ val atomicProperty = initializer.getCorrespondingProperty()\n+ val volatileField = atomicProperty.backingField!!\n+ backingField = null\n+ if (atomicProperty.isTopLevel()) {\n+ atomicSymbols.createBuilder(symbol).run {\n+ val parent = getStaticVolatileWrapperInstance(atomicProperty)\n+ getter?.transformAccessor(volatileField, getProperty(parent, null))\n+ setter?.transformAccessor(volatileField, getProperty(parent, null))\n+ }\n+ } else {\n+ if (this.parent == atomicProperty.parent) {\n+ //class A {\n+ // val _a = atomic()\n+ // var a by _a\n+ //}\n+ getter?.transformAccessor(volatileField, getter?.dispatchReceiverParameter?.capture())\n+ setter?.transformAccessor(volatileField, setter?.dispatchReceiverParameter?.capture())\n+ } else {\n+ //class A {\n+ // val _a = atomic()\n+ // inner class B {\n+ // var a by _a\n+ // }\n+ //}\n+ val thisReceiver = atomicProperty.parentAsClass.thisReceiver\n+ getter?.transformAccessor(volatileField, thisReceiver?.capture())\n+ setter?.transformAccessor(volatileField, thisReceiver?.capture())\n+ }\n+ }\n+ }\n+ }\n+ }\n+ }\n+\n+ private fun IrFunction.transformAccessor(volatileField: IrField, parent: IrExpression?) {\n+ val accessor = this\n+ atomicSymbols.createBuilder(symbol).run {", - "comment_created_at": "2022-06-09T05:37:45+00:00", - "comment_author": "ilmirus", - "comment_body": "Generally, we use `with(...createBuilder(symbol))` idiom, when we create IR nodes. This helps us spot the places, where IR is create at a glance.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "786266216", - "pr_number": 4560, - "pr_file": "compiler/ir/ir.tree/gen/org/jetbrains/kotlin/ir/expressions/IrGetValue.kt", - "created_at": "2022-01-17T20:02:16+00:00", - "commented_code": "/*\n * Copyright 2010-2021 JetBrains s.r.o. and Kotlin Programming Language contributors.\n * Use of this source code is governed by the Apache 2.0 license that can be found in the license/LICENSE.txt file.\n */\n\n//This file was generated automatically\n//DO NOT MODIFY IT MANUALLY\n\npackage org.jetbrains.kotlin.ir.expressions\n\nimport org.jetbrains.kotlin.ir.visitors.IrElementVisitor\n\n/**\n * A leaf IR tree element.\n * @sample org.jetbrains.kotlin.ir.generator.IrTree.getValue\n */\nabstract class IrGetValue : IrValueAccessExpression() {\n override fun accept(visitor: IrElementVisitor, data: D): R =\n visitor.visitGetValue(this, data)\n\n abstract fun copyWithOffsets(newStartOffset: Int, newEndOffset: Int): IrGetValue", - "repo_full_name": "JetBrains/kotlin", - "discussion_comments": [ - { - "comment_id": "786266216", - "repo_full_name": "JetBrains/kotlin", - "pr_number": 4560, - "pr_file": "compiler/ir/ir.tree/gen/org/jetbrains/kotlin/ir/expressions/IrGetValue.kt", - "discussion_id": "786266216", - "commented_code": "@@ -0,0 +1,22 @@\n+/*\n+ * Copyright 2010-2021 JetBrains s.r.o. and Kotlin Programming Language contributors.\n+ * Use of this source code is governed by the Apache 2.0 license that can be found in the license/LICENSE.txt file.\n+ */\n+\n+//This file was generated automatically\n+//DO NOT MODIFY IT MANUALLY\n+\n+package org.jetbrains.kotlin.ir.expressions\n+\n+import org.jetbrains.kotlin.ir.visitors.IrElementVisitor\n+\n+/**\n+ * A leaf IR tree element.\n+ * @sample org.jetbrains.kotlin.ir.generator.IrTree.getValue\n+ */\n+abstract class IrGetValue : IrValueAccessExpression() {\n+ override fun accept(visitor: IrElementVisitor, data: D): R =\n+ visitor.visitGetValue(this, data)\n+\n+ abstract fun copyWithOffsets(newStartOffset: Int, newEndOffset: Int): IrGetValue", - "comment_created_at": "2022-01-17T20:02:16+00:00", - "comment_author": "bashor", - "comment_body": "Minor: probably it could be extracted to extension function, same as for IrConst", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/kotlin-minimize-not-null-assertions.json b/_reviewers/kotlin-minimize-not-null-assertions.json new file mode 100644 index 0000000..fff0900 --- /dev/null +++ b/_reviewers/kotlin-minimize-not-null-assertions.json @@ -0,0 +1,70 @@ +[ + { + "discussion_id": "1010257", + "pr_number": 80, + "pr_file": "idea/src/org/jetbrains/jet/plugin/quickfix/UnnecessaryNotNullAssertionFix.java", + "created_at": "2012-06-19T15:01:46+00:00", + "commented_code": "/*\n * Copyright 2010-2012 JetBrains s.r.o.\n *\n * Licensed under the Apache License, Version 2.0 (the \"License\");\n * you may not use this file except in compliance with the License.\n * You may obtain a copy of the License at\n *\n * http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\npackage org.jetbrains.jet.plugin.quickfix;\n\nimport com.intellij.codeInsight.intention.IntentionAction;\nimport com.intellij.openapi.editor.Editor;\nimport com.intellij.openapi.project.Project;\nimport com.intellij.psi.PsiElement;\nimport com.intellij.psi.PsiFile;\nimport com.intellij.psi.impl.source.tree.LeafPsiElement;\nimport com.intellij.util.IncorrectOperationException;\nimport org.jetbrains.annotations.NotNull;\nimport org.jetbrains.jet.lang.psi.JetFile;\nimport org.jetbrains.jet.lexer.JetTokens;\nimport org.jetbrains.jet.plugin.JetBundle;\n\n/**\n * @author slukjanov aka Frostman\n */\npublic class UnnecessaryNotNullAssertionFix implements IntentionAction {", + "repo_full_name": "JetBrains/kotlin", + "discussion_comments": [ + { + "comment_id": "1010257", + "repo_full_name": "JetBrains/kotlin", + "pr_number": 80, + "pr_file": "idea/src/org/jetbrains/jet/plugin/quickfix/UnnecessaryNotNullAssertionFix.java", + "discussion_id": "1010257", + "commented_code": "@@ -0,0 +1,73 @@\n+/*\n+ * Copyright 2010-2012 JetBrains s.r.o.\n+ *\n+ * Licensed under the Apache License, Version 2.0 (the \"License\");\n+ * you may not use this file except in compliance with the License.\n+ * You may obtain a copy of the License at\n+ *\n+ * http://www.apache.org/licenses/LICENSE-2.0\n+ *\n+ * Unless required by applicable law or agreed to in writing, software\n+ * distributed under the License is distributed on an \"AS IS\" BASIS,\n+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n+ * See the License for the specific language governing permissions and\n+ * limitations under the License.\n+ */\n+\n+package org.jetbrains.jet.plugin.quickfix;\n+\n+import com.intellij.codeInsight.intention.IntentionAction;\n+import com.intellij.openapi.editor.Editor;\n+import com.intellij.openapi.project.Project;\n+import com.intellij.psi.PsiElement;\n+import com.intellij.psi.PsiFile;\n+import com.intellij.psi.impl.source.tree.LeafPsiElement;\n+import com.intellij.util.IncorrectOperationException;\n+import org.jetbrains.annotations.NotNull;\n+import org.jetbrains.jet.lang.psi.JetFile;\n+import org.jetbrains.jet.lexer.JetTokens;\n+import org.jetbrains.jet.plugin.JetBundle;\n+\n+/**\n+ * @author slukjanov aka Frostman\n+ */\n+public class UnnecessaryNotNullAssertionFix implements IntentionAction {", + "comment_created_at": "2012-06-19T15:01:46+00:00", + "comment_author": "goodwinnk", + "comment_body": "I understand why you decided to create another class for that task but still think that there should be another logic in UnnecessaryNotNullAssertionFix and ReplaceCallFix. I propose ReplaceCallFix could also be used for replacing !!. call to something else as it can be used for introducing such calls now. And this class can could be responsible only for introducing and removing a!! postfix operation.\nWhat do you think?\n", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "915071284", + "pr_number": 4890, + "pr_file": "compiler/frontend/src/org/jetbrains/kotlin/types/expressions/ExpressionTypingVisitorForStatements.java", + "created_at": "2022-07-06T17:03:30+00:00", + "commented_code": "return resultInfo.replaceType(components.dataFlowAnalyzer.checkStatementType(expression, contextWithExpectedType));\n }\n\n private KotlinTypeInfo resolveAssignOperatorOverload(\n BindingContext bindingContext,\n KtBinaryExpression expression,\n ExpressionTypingContext contextWithExpectedType,\n KtExpression leftOperand,\n KtExpression left,\n KotlinTypeInfo leftInfo,\n ExpressionTypingContext context\n ) {\n KotlinType leftType = leftInfo.getType();\n KotlinType assignmentOperationType = null;\n TemporaryTraceAndCache temporaryForAssignmentOperation = null;\n VariableDescriptor descriptor = BindingContextUtils.extractVariableFromResolvedCall(bindingContext, leftOperand);\n OverloadResolutionResults assignmentOperationDescriptors = new NameNotFoundResolutionResult<>();\n if (descriptor != null && !descriptor.isVar()) {", + "repo_full_name": "JetBrains/kotlin", + "discussion_comments": [ + { + "comment_id": "915071284", + "repo_full_name": "JetBrains/kotlin", + "pr_number": 4890, + "pr_file": "compiler/frontend/src/org/jetbrains/kotlin/types/expressions/ExpressionTypingVisitorForStatements.java", + "discussion_id": "915071284", + "commented_code": "@@ -477,6 +486,41 @@ protected KotlinTypeInfo visitAssignment(KtBinaryExpression expression, Expressi\n return resultInfo.replaceType(components.dataFlowAnalyzer.checkStatementType(expression, contextWithExpectedType));\n }\n \n+ private KotlinTypeInfo resolveAssignOperatorOverload(\n+ BindingContext bindingContext,\n+ KtBinaryExpression expression,\n+ ExpressionTypingContext contextWithExpectedType,\n+ KtExpression leftOperand,\n+ KtExpression left,\n+ KotlinTypeInfo leftInfo,\n+ ExpressionTypingContext context\n+ ) {\n+ KotlinType leftType = leftInfo.getType();\n+ KotlinType assignmentOperationType = null;\n+ TemporaryTraceAndCache temporaryForAssignmentOperation = null;\n+ VariableDescriptor descriptor = BindingContextUtils.extractVariableFromResolvedCall(bindingContext, leftOperand);\n+ OverloadResolutionResults assignmentOperationDescriptors = new NameNotFoundResolutionResult<>();\n+ if (descriptor != null && !descriptor.isVar()) {", + "comment_created_at": "2022-07-06T17:03:30+00:00", + "comment_author": "mglukhikh", + "comment_body": "Should we just do early return in case `descriptor.isVar() == true`?", + "pr_file_module": null + }, + { + "comment_id": "915072169", + "repo_full_name": "JetBrains/kotlin", + "pr_number": 4890, + "pr_file": "compiler/frontend/src/org/jetbrains/kotlin/types/expressions/ExpressionTypingVisitorForStatements.java", + "discussion_id": "915071284", + "commented_code": "@@ -477,6 +486,41 @@ protected KotlinTypeInfo visitAssignment(KtBinaryExpression expression, Expressi\n return resultInfo.replaceType(components.dataFlowAnalyzer.checkStatementType(expression, contextWithExpectedType));\n }\n \n+ private KotlinTypeInfo resolveAssignOperatorOverload(\n+ BindingContext bindingContext,\n+ KtBinaryExpression expression,\n+ ExpressionTypingContext contextWithExpectedType,\n+ KtExpression leftOperand,\n+ KtExpression left,\n+ KotlinTypeInfo leftInfo,\n+ ExpressionTypingContext context\n+ ) {\n+ KotlinType leftType = leftInfo.getType();\n+ KotlinType assignmentOperationType = null;\n+ TemporaryTraceAndCache temporaryForAssignmentOperation = null;\n+ VariableDescriptor descriptor = BindingContextUtils.extractVariableFromResolvedCall(bindingContext, leftOperand);\n+ OverloadResolutionResults assignmentOperationDescriptors = new NameNotFoundResolutionResult<>();\n+ if (descriptor != null && !descriptor.isVar()) {", + "comment_created_at": "2022-07-06T17:04:38+00:00", + "comment_author": "mglukhikh", + "comment_body": "(and also in case when `descriptor == null`)", + "pr_file_module": null + }, + { + "comment_id": "926654946", + "repo_full_name": "JetBrains/kotlin", + "pr_number": 4890, + "pr_file": "compiler/frontend/src/org/jetbrains/kotlin/types/expressions/ExpressionTypingVisitorForStatements.java", + "discussion_id": "915071284", + "commented_code": "@@ -477,6 +486,41 @@ protected KotlinTypeInfo visitAssignment(KtBinaryExpression expression, Expressi\n return resultInfo.replaceType(components.dataFlowAnalyzer.checkStatementType(expression, contextWithExpectedType));\n }\n \n+ private KotlinTypeInfo resolveAssignOperatorOverload(\n+ BindingContext bindingContext,\n+ KtBinaryExpression expression,\n+ ExpressionTypingContext contextWithExpectedType,\n+ KtExpression leftOperand,\n+ KtExpression left,\n+ KotlinTypeInfo leftInfo,\n+ ExpressionTypingContext context\n+ ) {\n+ KotlinType leftType = leftInfo.getType();\n+ KotlinType assignmentOperationType = null;\n+ TemporaryTraceAndCache temporaryForAssignmentOperation = null;\n+ VariableDescriptor descriptor = BindingContextUtils.extractVariableFromResolvedCall(bindingContext, leftOperand);\n+ OverloadResolutionResults assignmentOperationDescriptors = new NameNotFoundResolutionResult<>();\n+ if (descriptor != null && !descriptor.isVar()) {", + "comment_created_at": "2022-07-21T13:09:12+00:00", + "comment_author": "asodja", + "comment_body": "Makes sense, done.", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/kotlin-minimize-not-null-assertions.md b/_reviewers/kotlin-minimize-not-null-assertions.md index 4e3a703..f8ba31e 100644 --- a/_reviewers/kotlin-minimize-not-null-assertions.md +++ b/_reviewers/kotlin-minimize-not-null-assertions.md @@ -34,75 +34,3 @@ val result = nullableValue.doSomething() ``` This pattern reduces the risk of NullPointerExceptions and makes code more robust. When reviewing code, look for unnecessary not-null assertions that could be replaced with safer constructs, as shown in the `UnnecessaryNotNullAssertionFix` class. Additionally, follow the pattern seen in the expression typing code where early returns are used when a descriptor is null. - - -[ - { - "discussion_id": "1010257", - "pr_number": 80, - "pr_file": "idea/src/org/jetbrains/jet/plugin/quickfix/UnnecessaryNotNullAssertionFix.java", - "created_at": "2012-06-19T15:01:46+00:00", - "commented_code": "/*\n * Copyright 2010-2012 JetBrains s.r.o.\n *\n * Licensed under the Apache License, Version 2.0 (the \"License\");\n * you may not use this file except in compliance with the License.\n * You may obtain a copy of the License at\n *\n * http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\npackage org.jetbrains.jet.plugin.quickfix;\n\nimport com.intellij.codeInsight.intention.IntentionAction;\nimport com.intellij.openapi.editor.Editor;\nimport com.intellij.openapi.project.Project;\nimport com.intellij.psi.PsiElement;\nimport com.intellij.psi.PsiFile;\nimport com.intellij.psi.impl.source.tree.LeafPsiElement;\nimport com.intellij.util.IncorrectOperationException;\nimport org.jetbrains.annotations.NotNull;\nimport org.jetbrains.jet.lang.psi.JetFile;\nimport org.jetbrains.jet.lexer.JetTokens;\nimport org.jetbrains.jet.plugin.JetBundle;\n\n/**\n * @author slukjanov aka Frostman\n */\npublic class UnnecessaryNotNullAssertionFix implements IntentionAction {", - "repo_full_name": "JetBrains/kotlin", - "discussion_comments": [ - { - "comment_id": "1010257", - "repo_full_name": "JetBrains/kotlin", - "pr_number": 80, - "pr_file": "idea/src/org/jetbrains/jet/plugin/quickfix/UnnecessaryNotNullAssertionFix.java", - "discussion_id": "1010257", - "commented_code": "@@ -0,0 +1,73 @@\n+/*\n+ * Copyright 2010-2012 JetBrains s.r.o.\n+ *\n+ * Licensed under the Apache License, Version 2.0 (the \"License\");\n+ * you may not use this file except in compliance with the License.\n+ * You may obtain a copy of the License at\n+ *\n+ * http://www.apache.org/licenses/LICENSE-2.0\n+ *\n+ * Unless required by applicable law or agreed to in writing, software\n+ * distributed under the License is distributed on an \"AS IS\" BASIS,\n+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n+ * See the License for the specific language governing permissions and\n+ * limitations under the License.\n+ */\n+\n+package org.jetbrains.jet.plugin.quickfix;\n+\n+import com.intellij.codeInsight.intention.IntentionAction;\n+import com.intellij.openapi.editor.Editor;\n+import com.intellij.openapi.project.Project;\n+import com.intellij.psi.PsiElement;\n+import com.intellij.psi.PsiFile;\n+import com.intellij.psi.impl.source.tree.LeafPsiElement;\n+import com.intellij.util.IncorrectOperationException;\n+import org.jetbrains.annotations.NotNull;\n+import org.jetbrains.jet.lang.psi.JetFile;\n+import org.jetbrains.jet.lexer.JetTokens;\n+import org.jetbrains.jet.plugin.JetBundle;\n+\n+/**\n+ * @author slukjanov aka Frostman\n+ */\n+public class UnnecessaryNotNullAssertionFix implements IntentionAction {", - "comment_created_at": "2012-06-19T15:01:46+00:00", - "comment_author": "goodwinnk", - "comment_body": "I understand why you decided to create another class for that task but still think that there should be another logic in UnnecessaryNotNullAssertionFix and ReplaceCallFix. I propose ReplaceCallFix could also be used for replacing !!. call to something else as it can be used for introducing such calls now. And this class can could be responsible only for introducing and removing a!! postfix operation.\nWhat do you think?\n", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "915071284", - "pr_number": 4890, - "pr_file": "compiler/frontend/src/org/jetbrains/kotlin/types/expressions/ExpressionTypingVisitorForStatements.java", - "created_at": "2022-07-06T17:03:30+00:00", - "commented_code": "return resultInfo.replaceType(components.dataFlowAnalyzer.checkStatementType(expression, contextWithExpectedType));\n }\n\n private KotlinTypeInfo resolveAssignOperatorOverload(\n BindingContext bindingContext,\n KtBinaryExpression expression,\n ExpressionTypingContext contextWithExpectedType,\n KtExpression leftOperand,\n KtExpression left,\n KotlinTypeInfo leftInfo,\n ExpressionTypingContext context\n ) {\n KotlinType leftType = leftInfo.getType();\n KotlinType assignmentOperationType = null;\n TemporaryTraceAndCache temporaryForAssignmentOperation = null;\n VariableDescriptor descriptor = BindingContextUtils.extractVariableFromResolvedCall(bindingContext, leftOperand);\n OverloadResolutionResults assignmentOperationDescriptors = new NameNotFoundResolutionResult<>();\n if (descriptor != null && !descriptor.isVar()) {", - "repo_full_name": "JetBrains/kotlin", - "discussion_comments": [ - { - "comment_id": "915071284", - "repo_full_name": "JetBrains/kotlin", - "pr_number": 4890, - "pr_file": "compiler/frontend/src/org/jetbrains/kotlin/types/expressions/ExpressionTypingVisitorForStatements.java", - "discussion_id": "915071284", - "commented_code": "@@ -477,6 +486,41 @@ protected KotlinTypeInfo visitAssignment(KtBinaryExpression expression, Expressi\n return resultInfo.replaceType(components.dataFlowAnalyzer.checkStatementType(expression, contextWithExpectedType));\n }\n \n+ private KotlinTypeInfo resolveAssignOperatorOverload(\n+ BindingContext bindingContext,\n+ KtBinaryExpression expression,\n+ ExpressionTypingContext contextWithExpectedType,\n+ KtExpression leftOperand,\n+ KtExpression left,\n+ KotlinTypeInfo leftInfo,\n+ ExpressionTypingContext context\n+ ) {\n+ KotlinType leftType = leftInfo.getType();\n+ KotlinType assignmentOperationType = null;\n+ TemporaryTraceAndCache temporaryForAssignmentOperation = null;\n+ VariableDescriptor descriptor = BindingContextUtils.extractVariableFromResolvedCall(bindingContext, leftOperand);\n+ OverloadResolutionResults assignmentOperationDescriptors = new NameNotFoundResolutionResult<>();\n+ if (descriptor != null && !descriptor.isVar()) {", - "comment_created_at": "2022-07-06T17:03:30+00:00", - "comment_author": "mglukhikh", - "comment_body": "Should we just do early return in case `descriptor.isVar() == true`?", - "pr_file_module": null - }, - { - "comment_id": "915072169", - "repo_full_name": "JetBrains/kotlin", - "pr_number": 4890, - "pr_file": "compiler/frontend/src/org/jetbrains/kotlin/types/expressions/ExpressionTypingVisitorForStatements.java", - "discussion_id": "915071284", - "commented_code": "@@ -477,6 +486,41 @@ protected KotlinTypeInfo visitAssignment(KtBinaryExpression expression, Expressi\n return resultInfo.replaceType(components.dataFlowAnalyzer.checkStatementType(expression, contextWithExpectedType));\n }\n \n+ private KotlinTypeInfo resolveAssignOperatorOverload(\n+ BindingContext bindingContext,\n+ KtBinaryExpression expression,\n+ ExpressionTypingContext contextWithExpectedType,\n+ KtExpression leftOperand,\n+ KtExpression left,\n+ KotlinTypeInfo leftInfo,\n+ ExpressionTypingContext context\n+ ) {\n+ KotlinType leftType = leftInfo.getType();\n+ KotlinType assignmentOperationType = null;\n+ TemporaryTraceAndCache temporaryForAssignmentOperation = null;\n+ VariableDescriptor descriptor = BindingContextUtils.extractVariableFromResolvedCall(bindingContext, leftOperand);\n+ OverloadResolutionResults assignmentOperationDescriptors = new NameNotFoundResolutionResult<>();\n+ if (descriptor != null && !descriptor.isVar()) {", - "comment_created_at": "2022-07-06T17:04:38+00:00", - "comment_author": "mglukhikh", - "comment_body": "(and also in case when `descriptor == null`)", - "pr_file_module": null - }, - { - "comment_id": "926654946", - "repo_full_name": "JetBrains/kotlin", - "pr_number": 4890, - "pr_file": "compiler/frontend/src/org/jetbrains/kotlin/types/expressions/ExpressionTypingVisitorForStatements.java", - "discussion_id": "915071284", - "commented_code": "@@ -477,6 +486,41 @@ protected KotlinTypeInfo visitAssignment(KtBinaryExpression expression, Expressi\n return resultInfo.replaceType(components.dataFlowAnalyzer.checkStatementType(expression, contextWithExpectedType));\n }\n \n+ private KotlinTypeInfo resolveAssignOperatorOverload(\n+ BindingContext bindingContext,\n+ KtBinaryExpression expression,\n+ ExpressionTypingContext contextWithExpectedType,\n+ KtExpression leftOperand,\n+ KtExpression left,\n+ KotlinTypeInfo leftInfo,\n+ ExpressionTypingContext context\n+ ) {\n+ KotlinType leftType = leftInfo.getType();\n+ KotlinType assignmentOperationType = null;\n+ TemporaryTraceAndCache temporaryForAssignmentOperation = null;\n+ VariableDescriptor descriptor = BindingContextUtils.extractVariableFromResolvedCall(bindingContext, leftOperand);\n+ OverloadResolutionResults assignmentOperationDescriptors = new NameNotFoundResolutionResult<>();\n+ if (descriptor != null && !descriptor.isVar()) {", - "comment_created_at": "2022-07-21T13:09:12+00:00", - "comment_author": "asodja", - "comment_body": "Makes sense, done.", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/kotlin-minimize-unnecessary-work.json b/_reviewers/kotlin-minimize-unnecessary-work.json new file mode 100644 index 0000000..5aeceab --- /dev/null +++ b/_reviewers/kotlin-minimize-unnecessary-work.json @@ -0,0 +1,384 @@ +[ + { + "discussion_id": "2030201923", + "pr_number": 5426, + "pr_file": "libraries/stdlib/common/src/generated/_Arrays.kt", + "created_at": "2025-04-06T16:43:45+00:00", + "commented_code": "* @sample samples.collections.Collections.Transformations.take\n */\npublic inline fun Array.takeWhile(predicate: (T) -> Boolean): List {", + "repo_full_name": "JetBrains/kotlin", + "discussion_comments": [ + { + "comment_id": "2030201923", + "repo_full_name": "JetBrains/kotlin", + "pr_number": 5426, + "pr_file": "libraries/stdlib/common/src/generated/_Arrays.kt", + "discussion_id": "2030201923", + "commented_code": "@@ -5295,13 +5285,16 @@ public inline fun CharArray.takeLastWhile(predicate: (Char) -> Boolean): List Array.takeWhile(predicate: (T) -> Boolean): List {", + "comment_created_at": "2025-04-06T16:43:45+00:00", + "comment_author": "hoangchungk53qx1", + "comment_body": " it will be easier to read, i think, with equivalent performance \r\n```suggestion\r\npublic inline fun Array.takeWhile(predicate: (T) -> Boolean): List {\r\n var i = 0\r\n while (i < size && predicate(this[i])) i++\r\n return if (i == 0) emptyList() else this.copyOfRange(0, i).asList()\r\n}\r\n```", + "pr_file_module": null + }, + { + "comment_id": "2034023407", + "repo_full_name": "JetBrains/kotlin", + "pr_number": 5426, + "pr_file": "libraries/stdlib/common/src/generated/_Arrays.kt", + "discussion_id": "2030201923", + "commented_code": "@@ -5295,13 +5285,16 @@ public inline fun CharArray.takeLastWhile(predicate: (Char) -> Boolean): List Array.takeWhile(predicate: (T) -> Boolean): List {", + "comment_created_at": "2025-04-08T20:57:31+00:00", + "comment_author": "alexismanin", + "comment_body": "I've checked the impact of removing the special case handling, and it looks like it is noticeable. On the benchmark result below, your suggestion is the \"takeWhileSuggestion\" case. We see that for array size 0 and 1, it is a little less fast than the proposed optimization : \r\n\r\n![image](https://github.com/user-attachments/assets/6658ed62-7e86-405e-882d-14aeb94e6e7b)\r\n\r\nTherefore, I think we should keep the when as it is now.\r\n\r\nP.S: If you want to double-check this, I've added the suggestions in my benchmark project [on the `test-pr-suggestions` branch](https://github.com/alexismanin/kt-benchmark-array-to-list/tree/test-pr-suggestions)", + "pr_file_module": null + }, + { + "comment_id": "2034521735", + "repo_full_name": "JetBrains/kotlin", + "pr_number": 5426, + "pr_file": "libraries/stdlib/common/src/generated/_Arrays.kt", + "discussion_id": "2030201923", + "commented_code": "@@ -5295,13 +5285,16 @@ public inline fun CharArray.takeLastWhile(predicate: (Char) -> Boolean): List Array.takeWhile(predicate: (T) -> Boolean): List {", + "comment_created_at": "2025-04-09T06:16:18+00:00", + "comment_author": "alexismanin", + "comment_body": "@hoangchungk53qx1 : After second thought, your simplification makes sense.\r\n\r\nI realized that it just miss a condition to improve its performance when *output list has only one element* (which is a broader case than my current implementation, which only accounts for cases when *input* array has only one element) : \r\n\r\n```kt\r\nreturn if (i == 0) emptyList()\r\n else if (i == 1) listOf(this[0])\r\n else copyOfRange(0, i).asList()\r\n```\r\n\r\nI've launched a benchmark by modifying your suggestion so that the final return condition accounts for cases where i is 1. It greatly improves performance, for two cases of the benchmark : \r\n\r\n 1. Input array has one element\r\n 2. Output list has only one element (in the benchmark, this is the case where input array is of size 3. We only take the first element from it)\r\n\r\nI will soon apply you suggestion with this tweak, and I think we will be good then.\r\n\r\nHere is a glimpse of updated benchmark results:\r\n\r\n![image](https://github.com/user-attachments/assets/93635032-d766-4869-b4ad-748f4f27c63a)\r\n", + "pr_file_module": null + }, + { + "comment_id": "2034602921", + "repo_full_name": "JetBrains/kotlin", + "pr_number": 5426, + "pr_file": "libraries/stdlib/common/src/generated/_Arrays.kt", + "discussion_id": "2030201923", + "commented_code": "@@ -5295,13 +5285,16 @@ public inline fun CharArray.takeLastWhile(predicate: (Char) -> Boolean): List Array.takeWhile(predicate: (T) -> Boolean): List {", + "comment_created_at": "2025-04-09T06:55:23+00:00", + "comment_author": "hoangchungk53qx1", + "comment_body": "Nice @alexismanin ", + "pr_file_module": null + }, + { + "comment_id": "2035221456", + "repo_full_name": "JetBrains/kotlin", + "pr_number": 5426, + "pr_file": "libraries/stdlib/common/src/generated/_Arrays.kt", + "discussion_id": "2030201923", + "commented_code": "@@ -5295,13 +5285,16 @@ public inline fun CharArray.takeLastWhile(predicate: (Char) -> Boolean): List Array.takeWhile(predicate: (T) -> Boolean): List {", + "comment_created_at": "2025-04-09T12:03:13+00:00", + "comment_author": "alexismanin", + "comment_body": "I've pushed the change to `takeWhile` function.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2030205112", + "pr_number": 5426, + "pr_file": "libraries/stdlib/src/kotlin/collections/Arrays.kt", + "created_at": "2025-04-06T16:57:37+00:00", + "commented_code": "* @sample samples.collections.Arrays.Transformations.flattenArray\n */\npublic fun Array>.flatten(): List {\n val result = ArrayList(sumOf { it.size })\n for (element in this) {\n result.addAll(element)\n return when (size) {", + "repo_full_name": "JetBrains/kotlin", + "discussion_comments": [ + { + "comment_id": "2030205112", + "repo_full_name": "JetBrains/kotlin", + "pr_number": 5426, + "pr_file": "libraries/stdlib/src/kotlin/collections/Arrays.kt", + "discussion_id": "2030205112", + "commented_code": "@@ -17,11 +17,26 @@ import kotlin.contracts.*\n * @sample samples.collections.Arrays.Transformations.flattenArray\n */\n public fun Array>.flatten(): List {\n- val result = ArrayList(sumOf { it.size })\n- for (element in this) {\n- result.addAll(element)\n+ return when (size) {", + "comment_created_at": "2025-04-06T16:57:37+00:00", + "comment_author": "hoangchungk53qx1", + "comment_body": "can early return \r\n\r\nif (isEmpty()) return emptyList()\r\n\r\n", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1437223453", + "pr_number": 5233, + "pr_file": "plugins/jvm-abi-gen/src/org/jetbrains/kotlin/jvm/abi/JvmAbiMetadataProcessor.kt", + "created_at": "2023-12-27T19:44:29+00:00", + "commented_code": "}\n\nprivate fun KmDeclarationContainer.removePrivateDeclarations() {\n functions.sortBy(KmFunction::name)\n properties.sortBy(KmProperty::name)\n\n functions.removeIf { it.visibility.isPrivate }\n properties.removeIf { it.visibility.isPrivate }", + "repo_full_name": "JetBrains/kotlin", + "discussion_comments": [ + { + "comment_id": "1437223453", + "repo_full_name": "JetBrains/kotlin", + "pr_number": 5233, + "pr_file": "plugins/jvm-abi-gen/src/org/jetbrains/kotlin/jvm/abi/JvmAbiMetadataProcessor.kt", + "discussion_id": "1437223453", + "commented_code": "@@ -163,6 +163,9 @@ private fun KmPackage.removePrivateDeclarations() {\n }\n \n private fun KmDeclarationContainer.removePrivateDeclarations() {\n+ functions.sortBy(KmFunction::name)\n+ properties.sortBy(KmProperty::name)\n+\n functions.removeIf { it.visibility.isPrivate }\n properties.removeIf { it.visibility.isPrivate }", + "comment_created_at": "2023-12-27T19:44:29+00:00", + "comment_author": "JakeWharton", + "comment_body": "```suggestion\n functions.removeIf { it.visibility.isPrivate }\n properties.removeIf { it.visibility.isPrivate }\n\n functions.sortBy(KmFunction::name)\n properties.sortBy(KmProperty::name)\n```\nMight as well sort after removal. Otherwise you're wasting time on sorting entries that are destined to be removed anyway.\n", + "pr_file_module": null + }, + { + "comment_id": "1437256954", + "repo_full_name": "JetBrains/kotlin", + "pr_number": 5233, + "pr_file": "plugins/jvm-abi-gen/src/org/jetbrains/kotlin/jvm/abi/JvmAbiMetadataProcessor.kt", + "discussion_id": "1437223453", + "commented_code": "@@ -163,6 +163,9 @@ private fun KmPackage.removePrivateDeclarations() {\n }\n \n private fun KmDeclarationContainer.removePrivateDeclarations() {\n+ functions.sortBy(KmFunction::name)\n+ properties.sortBy(KmProperty::name)\n+\n functions.removeIf { it.visibility.isPrivate }\n properties.removeIf { it.visibility.isPrivate }", + "comment_created_at": "2023-12-27T21:18:43+00:00", + "comment_author": "Tagakov", + "comment_body": "Thanks for catching it, noticed it after creating the PR but forgot to push.", + "pr_file_module": null + }, + { + "comment_id": "1440951272", + "repo_full_name": "JetBrains/kotlin", + "pr_number": 5233, + "pr_file": "plugins/jvm-abi-gen/src/org/jetbrains/kotlin/jvm/abi/JvmAbiMetadataProcessor.kt", + "discussion_id": "1437223453", + "commented_code": "@@ -163,6 +163,9 @@ private fun KmPackage.removePrivateDeclarations() {\n }\n \n private fun KmDeclarationContainer.removePrivateDeclarations() {\n+ functions.sortBy(KmFunction::name)\n+ properties.sortBy(KmProperty::name)\n+\n functions.removeIf { it.visibility.isPrivate }\n properties.removeIf { it.visibility.isPrivate }", + "comment_created_at": "2024-01-03T21:07:15+00:00", + "comment_author": "Tagakov", + "comment_body": "My dog ate it! I swear!", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1263892613", + "pr_number": 5156, + "pr_file": "kotlin-native/backend.native/compiler/ir/backend.native/src/org/jetbrains/kotlin/backend/konan/LlvmModuleSpecificationImpl.kt", + "created_at": "2023-07-14T15:58:00+00:00", + "commented_code": "override fun containsPackageFragment(packageFragment: IrPackageFragment): Boolean =\n packageFragment.konanLibrary.let { it == null || containsLibrary(it) }\n\n override fun containsDeclaration(declaration: IrDeclaration): Boolean =\n declaration.konanLibrary.let { it == null || containsLibrary(it) }\n // When producing per-file caches, some declarations might be generated by the stub generator.\n && declaration.getPackageFragment() !is IrExternalPackageFragment\n private val containsCache = mutableMapOf()\n\n override fun containsDeclaration(declaration: IrDeclaration): Boolean = containsCache.getOrPut(declaration) {", + "repo_full_name": "JetBrains/kotlin", + "discussion_comments": [ + { + "comment_id": "1263892613", + "repo_full_name": "JetBrains/kotlin", + "pr_number": 5156, + "pr_file": "kotlin-native/backend.native/compiler/ir/backend.native/src/org/jetbrains/kotlin/backend/konan/LlvmModuleSpecificationImpl.kt", + "discussion_id": "1263892613", + "commented_code": "@@ -28,10 +29,18 @@ internal abstract class LlvmModuleSpecificationBase(protected val cachedLibrarie\n override fun containsPackageFragment(packageFragment: IrPackageFragment): Boolean =\n packageFragment.konanLibrary.let { it == null || containsLibrary(it) }\n \n- override fun containsDeclaration(declaration: IrDeclaration): Boolean =\n- declaration.konanLibrary.let { it == null || containsLibrary(it) }\n- // When producing per-file caches, some declarations might be generated by the stub generator.\n- && declaration.getPackageFragment() !is IrExternalPackageFragment\n+ private val containsCache = mutableMapOf()\n+\n+ override fun containsDeclaration(declaration: IrDeclaration): Boolean = containsCache.getOrPut(declaration) {", + "comment_created_at": "2023-07-14T15:58:00+00:00", + "comment_author": "homuroll", + "comment_body": "What is the purpose of inlining of `declaration.konanLibrary` here?", + "pr_file_module": null + }, + { + "comment_id": "1264212481", + "repo_full_name": "JetBrains/kotlin", + "pr_number": 5156, + "pr_file": "kotlin-native/backend.native/compiler/ir/backend.native/src/org/jetbrains/kotlin/backend/konan/LlvmModuleSpecificationImpl.kt", + "discussion_id": "1263892613", + "commented_code": "@@ -28,10 +29,18 @@ internal abstract class LlvmModuleSpecificationBase(protected val cachedLibrarie\n override fun containsPackageFragment(packageFragment: IrPackageFragment): Boolean =\n packageFragment.konanLibrary.let { it == null || containsLibrary(it) }\n \n- override fun containsDeclaration(declaration: IrDeclaration): Boolean =\n- declaration.konanLibrary.let { it == null || containsLibrary(it) }\n- // When producing per-file caches, some declarations might be generated by the stub generator.\n- && declaration.getPackageFragment() !is IrExternalPackageFragment\n+ private val containsCache = mutableMapOf()\n+\n+ override fun containsDeclaration(declaration: IrDeclaration): Boolean = containsCache.getOrPut(declaration) {", + "comment_created_at": "2023-07-14T22:15:53+00:00", + "comment_author": "MarkCMann", + "comment_body": "To memoize more of the calls, by inlining the konan library recursive calls here we can store the result for the top level class in addition to all nested classes. \r\n\r\nOverall this method showed up on the profiler as being very heavy due to the repeated recursive calls in KonanLibrary -- I opted to memoize the data in the llvm module specification class instead of trying to add it to the IrDeclaration or adding a global cache to konanLibrary in IrUtils. ", + "pr_file_module": null + }, + { + "comment_id": "1265352329", + "repo_full_name": "JetBrains/kotlin", + "pr_number": 5156, + "pr_file": "kotlin-native/backend.native/compiler/ir/backend.native/src/org/jetbrains/kotlin/backend/konan/LlvmModuleSpecificationImpl.kt", + "discussion_id": "1263892613", + "commented_code": "@@ -28,10 +29,18 @@ internal abstract class LlvmModuleSpecificationBase(protected val cachedLibrarie\n override fun containsPackageFragment(packageFragment: IrPackageFragment): Boolean =\n packageFragment.konanLibrary.let { it == null || containsLibrary(it) }\n \n- override fun containsDeclaration(declaration: IrDeclaration): Boolean =\n- declaration.konanLibrary.let { it == null || containsLibrary(it) }\n- // When producing per-file caches, some declarations might be generated by the stub generator.\n- && declaration.getPackageFragment() !is IrExternalPackageFragment\n+ private val containsCache = mutableMapOf()\n+\n+ override fun containsDeclaration(declaration: IrDeclaration): Boolean = containsCache.getOrPut(declaration) {", + "comment_created_at": "2023-07-17T13:26:09+00:00", + "comment_author": "ddolovov", + "comment_body": "Just a question (I don't know the right answer):\r\n\r\nIs `fun containsDeclaration(declaration: IrDeclaration)` actually called for the same declaration multiple times? If no, then probably we should cache per `KotlinLibrary` or per smth else but not per `IrDeclaration`.", + "pr_file_module": null + }, + { + "comment_id": "1265458783", + "repo_full_name": "JetBrains/kotlin", + "pr_number": 5156, + "pr_file": "kotlin-native/backend.native/compiler/ir/backend.native/src/org/jetbrains/kotlin/backend/konan/LlvmModuleSpecificationImpl.kt", + "discussion_id": "1263892613", + "commented_code": "@@ -28,10 +29,18 @@ internal abstract class LlvmModuleSpecificationBase(protected val cachedLibrarie\n override fun containsPackageFragment(packageFragment: IrPackageFragment): Boolean =\n packageFragment.konanLibrary.let { it == null || containsLibrary(it) }\n \n- override fun containsDeclaration(declaration: IrDeclaration): Boolean =\n- declaration.konanLibrary.let { it == null || containsLibrary(it) }\n- // When producing per-file caches, some declarations might be generated by the stub generator.\n- && declaration.getPackageFragment() !is IrExternalPackageFragment\n+ private val containsCache = mutableMapOf()\n+\n+ override fun containsDeclaration(declaration: IrDeclaration): Boolean = containsCache.getOrPut(declaration) {", + "comment_created_at": "2023-07-17T14:32:30+00:00", + "comment_author": "homuroll", + "comment_body": "Got it. Please, add a comment here explaining this, and probably some short comment/reference to here from `declaration.konanLibrary` (in case someone decides to change it).", + "pr_file_module": null + }, + { + "comment_id": "1265950218", + "repo_full_name": "JetBrains/kotlin", + "pr_number": 5156, + "pr_file": "kotlin-native/backend.native/compiler/ir/backend.native/src/org/jetbrains/kotlin/backend/konan/LlvmModuleSpecificationImpl.kt", + "discussion_id": "1263892613", + "commented_code": "@@ -28,10 +29,18 @@ internal abstract class LlvmModuleSpecificationBase(protected val cachedLibrarie\n override fun containsPackageFragment(packageFragment: IrPackageFragment): Boolean =\n packageFragment.konanLibrary.let { it == null || containsLibrary(it) }\n \n- override fun containsDeclaration(declaration: IrDeclaration): Boolean =\n- declaration.konanLibrary.let { it == null || containsLibrary(it) }\n- // When producing per-file caches, some declarations might be generated by the stub generator.\n- && declaration.getPackageFragment() !is IrExternalPackageFragment\n+ private val containsCache = mutableMapOf()\n+\n+ override fun containsDeclaration(declaration: IrDeclaration): Boolean = containsCache.getOrPut(declaration) {", + "comment_created_at": "2023-07-17T22:10:13+00:00", + "comment_author": "MarkCMann", + "comment_body": "@ddolovov I'm not sure that I understand your concern here. Is this more related to the size of the cache? \r\n\r\nWith this change I've moved the recursive calls into the llvm contains declaration function so now that function should be called multiple times for `IrDeclarations`. ", + "pr_file_module": null + }, + { + "comment_id": "1266570240", + "repo_full_name": "JetBrains/kotlin", + "pr_number": 5156, + "pr_file": "kotlin-native/backend.native/compiler/ir/backend.native/src/org/jetbrains/kotlin/backend/konan/LlvmModuleSpecificationImpl.kt", + "discussion_id": "1263892613", + "commented_code": "@@ -28,10 +29,18 @@ internal abstract class LlvmModuleSpecificationBase(protected val cachedLibrarie\n override fun containsPackageFragment(packageFragment: IrPackageFragment): Boolean =\n packageFragment.konanLibrary.let { it == null || containsLibrary(it) }\n \n- override fun containsDeclaration(declaration: IrDeclaration): Boolean =\n- declaration.konanLibrary.let { it == null || containsLibrary(it) }\n- // When producing per-file caches, some declarations might be generated by the stub generator.\n- && declaration.getPackageFragment() !is IrExternalPackageFragment\n+ private val containsCache = mutableMapOf()\n+\n+ override fun containsDeclaration(declaration: IrDeclaration): Boolean = containsCache.getOrPut(declaration) {", + "comment_created_at": "2023-07-18T10:22:19+00:00", + "comment_author": "ddolovov", + "comment_body": "> so now that function should be called multiple times for IrDeclarations.\r\n\r\nYou mean it could be called multiple times for the same instance of `IrDeclaration` (and therefore it makes sense to cache per-declaration), right?", + "pr_file_module": null + }, + { + "comment_id": "1269759652", + "repo_full_name": "JetBrains/kotlin", + "pr_number": 5156, + "pr_file": "kotlin-native/backend.native/compiler/ir/backend.native/src/org/jetbrains/kotlin/backend/konan/LlvmModuleSpecificationImpl.kt", + "discussion_id": "1263892613", + "commented_code": "@@ -28,10 +29,18 @@ internal abstract class LlvmModuleSpecificationBase(protected val cachedLibrarie\n override fun containsPackageFragment(packageFragment: IrPackageFragment): Boolean =\n packageFragment.konanLibrary.let { it == null || containsLibrary(it) }\n \n- override fun containsDeclaration(declaration: IrDeclaration): Boolean =\n- declaration.konanLibrary.let { it == null || containsLibrary(it) }\n- // When producing per-file caches, some declarations might be generated by the stub generator.\n- && declaration.getPackageFragment() !is IrExternalPackageFragment\n+ private val containsCache = mutableMapOf()\n+\n+ override fun containsDeclaration(declaration: IrDeclaration): Boolean = containsCache.getOrPut(declaration) {", + "comment_created_at": "2023-07-20T17:24:07+00:00", + "comment_author": "MarkCMann", + "comment_body": "That's correct, especially for parent IrDeclarations they would be called repeatedly", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1265278095", + "pr_number": 5156, + "pr_file": "compiler/ir/serialization.common/src/org/jetbrains/kotlin/backend/common/overrides/FakeOverrides.kt", + "created_at": "2023-07-17T12:24:19+00:00", + "commented_code": "}\n\n fun provideFakeOverrides() {\n val entries = fakeOverrideCandidates.entries\n val entries = fakeOverrideCandidates.entries.toMutableList()", + "repo_full_name": "JetBrains/kotlin", + "discussion_comments": [ + { + "comment_id": "1265278095", + "repo_full_name": "JetBrains/kotlin", + "pr_number": 5156, + "pr_file": "compiler/ir/serialization.common/src/org/jetbrains/kotlin/backend/common/overrides/FakeOverrides.kt", + "discussion_id": "1265278095", + "commented_code": "@@ -286,10 +286,9 @@ class FakeOverrideBuilder(\n }\n \n fun provideFakeOverrides() {\n- val entries = fakeOverrideCandidates.entries\n+ val entries = fakeOverrideCandidates.entries.toMutableList()", + "comment_created_at": "2023-07-17T12:24:19+00:00", + "comment_author": "ddolovov", + "comment_body": "The `fakeOverrideCandidates` map should be cleaned at the exit from `provideFakeOverrides()`. Otherwise we would have repeated work each time we deserialize body of inline function: https://github.com/JetBrains/kotlin/blob/883102c4c68d1b73216c6ebebed0b288f0fa7978/kotlin-native/backend.native/compiler/ir/backend.native/src/org/jetbrains/kotlin/backend/konan/serialization/KonanIrlinker.kt#L938", + "pr_file_module": null + }, + { + "comment_id": "1265940209", + "repo_full_name": "JetBrains/kotlin", + "pr_number": 5156, + "pr_file": "compiler/ir/serialization.common/src/org/jetbrains/kotlin/backend/common/overrides/FakeOverrides.kt", + "discussion_id": "1265278095", + "commented_code": "@@ -286,10 +286,9 @@ class FakeOverrideBuilder(\n }\n \n fun provideFakeOverrides() {\n- val entries = fakeOverrideCandidates.entries\n+ val entries = fakeOverrideCandidates.entries.toMutableList()", + "comment_created_at": "2023-07-17T21:53:22+00:00", + "comment_author": "MarkCMann", + "comment_body": "Ah okay it wasn't obvious to me if the entry set returned was the actual underlying entryset for the map and could therefore mutate the entries in the map. I can add a call to clear the fakeOverrideCandidates here", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1020631690", + "pr_number": 4840, + "pr_file": "core/reflection.jvm/src/kotlin/reflect/jvm/internal/KCallableImpl.kt", + "created_at": "2022-11-12T01:19:53+00:00", + "commented_code": "return if (isAnnotationConstructor) callAnnotationConstructor(args) else callDefaultMethod(args, null)\n }\n\n private val _absentArguments = ReflectProperties.lazySoft {\n val parameterSize = parameters.size + (if (isSuspend) 1 else 0)\n val maskSize = (parameters.size + Integer.SIZE - 1) / Integer.SIZE\n\n // arguments with masks and DefaultConstructorMarker or MethodHandle\n // +1 is argument for DefaultConstructorMarker or MethodHandle\n val arguments = arrayOfNulls(parameterSize + maskSize + 1)\n\n // set absent values\n parameters.forEach { parameter ->\n if (parameter.isOptional && !parameter.type.isInlineClassType) {\n // For inline class types, the javaType refers to the underlying type of the inline class,\n // but we have to pass null in order to mark the argument as absent for InlineClassAwareCaller.\n arguments[parameter.index] = defaultPrimitiveValue(parameter.type.javaType)\n } else if (parameter.isVararg) {\n arguments[parameter.index] = defaultEmptyArray(parameter.type)\n }\n }\n\n for (i in 0 until maskSize) {\n arguments[parameterSize + i] = 0\n }\n\n arguments\n }\n\n private fun getAbsentArguments(): Array = _absentArguments().clone()\n\n // See ArgumentGenerator#generate\n internal fun callDefaultMethod(args: Map, continuationArgument: Continuation<*>?): R {\n val parameters = parameters\n val arguments = ArrayList(parameters.size)\n var mask = 0\n val masks = ArrayList(1)\n var index = 0\n var anyOptional = false\n val parameterSize = parameters.size + (if (isSuspend) 1 else 0)", + "repo_full_name": "JetBrains/kotlin", + "discussion_comments": [ + { + "comment_id": "1020631690", + "repo_full_name": "JetBrains/kotlin", + "pr_number": 4840, + "pr_file": "core/reflection.jvm/src/kotlin/reflect/jvm/internal/KCallableImpl.kt", + "discussion_id": "1020631690", + "commented_code": "@@ -112,65 +112,87 @@ internal abstract class KCallableImpl : KCallable, KTypeParameterOwner\n return if (isAnnotationConstructor) callAnnotationConstructor(args) else callDefaultMethod(args, null)\n }\n \n+ private val _absentArguments = ReflectProperties.lazySoft {\n+ val parameterSize = parameters.size + (if (isSuspend) 1 else 0)\n+ val maskSize = (parameters.size + Integer.SIZE - 1) / Integer.SIZE\n+\n+ // arguments with masks and DefaultConstructorMarker or MethodHandle\n+ // +1 is argument for DefaultConstructorMarker or MethodHandle\n+ val arguments = arrayOfNulls(parameterSize + maskSize + 1)\n+\n+ // set absent values\n+ parameters.forEach { parameter ->\n+ if (parameter.isOptional && !parameter.type.isInlineClassType) {\n+ // For inline class types, the javaType refers to the underlying type of the inline class,\n+ // but we have to pass null in order to mark the argument as absent for InlineClassAwareCaller.\n+ arguments[parameter.index] = defaultPrimitiveValue(parameter.type.javaType)\n+ } else if (parameter.isVararg) {\n+ arguments[parameter.index] = defaultEmptyArray(parameter.type)\n+ }\n+ }\n+\n+ for (i in 0 until maskSize) {\n+ arguments[parameterSize + i] = 0\n+ }\n+\n+ arguments\n+ }\n+\n+ private fun getAbsentArguments(): Array = _absentArguments().clone()\n+\n // See ArgumentGenerator#generate\n internal fun callDefaultMethod(args: Map, continuationArgument: Continuation<*>?): R {\n val parameters = parameters\n- val arguments = ArrayList(parameters.size)\n- var mask = 0\n- val masks = ArrayList(1)\n- var index = 0\n- var anyOptional = false\n+ val parameterSize = parameters.size + (if (isSuspend) 1 else 0)", + "comment_created_at": "2022-11-12T01:19:53+00:00", + "comment_author": "k163377", + "comment_body": "The following optimization to no-argument functions was found to be nearly twice as fast as 638d89c.\r\nDoes this optimization look like it should be incorporated?\r\n\r\n```\r\nBenchmark Mode Cnt Score Error Units\r\nMeasurement.zero thrpt 4 22299885.316 ± 295995.533 ops/s\r\nMeasurement.fiveWithDefault thrpt 4 1927106.356 ± 225032.224 ops/s\r\nMeasurement.fiveWithoutDefault thrpt 4 2460623.502 ± 96649.714 ops/s\r\nMeasurement.oneWithDefault thrpt 4 5384826.666 ± 340254.100 ops/s\r\nMeasurement.oneWithoutDefault thrpt 4 5654229.984 ± 5914104.417 ops/s\r\nMeasurement.twentyWithDefault thrpt 4 510341.805 ± 2940.113 ops/s\r\nMeasurement.twentyWithoutDefault thrpt 4 739231.653 ± 21298.002 ops/s\r\n```\r\n\r\n```suggestion\r\n val parameterSize = parameters.size + (if (isSuspend) 1 else 0)\r\n\r\n // Optimization for general no-argument functions. \r\n if (parameters.isEmpty()) {\r\n @Suppress(\"UNCHECKED_CAST\")\r\n return reflectionCall {\r\n caller.call(if (isSuspend) arrayOf(continuationArgument) else emptyArray()) as R\r\n }\r\n } else if (parameters.size == 1) {\r\n parameters.first().takeIf { it.kind != KParameter.Kind.VALUE }?.let { parameter ->\r\n @Suppress(\"UNCHECKED_CAST\")\r\n return reflectionCall {\r\n caller.call(if (isSuspend) arrayOf(args[parameter], continuationArgument) else arrayOf(args[parameter])) as R\r\n }\r\n }\r\n }\r\n```", + "pr_file_module": null + }, + { + "comment_id": "1031902832", + "repo_full_name": "JetBrains/kotlin", + "pr_number": 4840, + "pr_file": "core/reflection.jvm/src/kotlin/reflect/jvm/internal/KCallableImpl.kt", + "discussion_id": "1020631690", + "commented_code": "@@ -112,65 +112,87 @@ internal abstract class KCallableImpl : KCallable, KTypeParameterOwner\n return if (isAnnotationConstructor) callAnnotationConstructor(args) else callDefaultMethod(args, null)\n }\n \n+ private val _absentArguments = ReflectProperties.lazySoft {\n+ val parameterSize = parameters.size + (if (isSuspend) 1 else 0)\n+ val maskSize = (parameters.size + Integer.SIZE - 1) / Integer.SIZE\n+\n+ // arguments with masks and DefaultConstructorMarker or MethodHandle\n+ // +1 is argument for DefaultConstructorMarker or MethodHandle\n+ val arguments = arrayOfNulls(parameterSize + maskSize + 1)\n+\n+ // set absent values\n+ parameters.forEach { parameter ->\n+ if (parameter.isOptional && !parameter.type.isInlineClassType) {\n+ // For inline class types, the javaType refers to the underlying type of the inline class,\n+ // but we have to pass null in order to mark the argument as absent for InlineClassAwareCaller.\n+ arguments[parameter.index] = defaultPrimitiveValue(parameter.type.javaType)\n+ } else if (parameter.isVararg) {\n+ arguments[parameter.index] = defaultEmptyArray(parameter.type)\n+ }\n+ }\n+\n+ for (i in 0 until maskSize) {\n+ arguments[parameterSize + i] = 0\n+ }\n+\n+ arguments\n+ }\n+\n+ private fun getAbsentArguments(): Array = _absentArguments().clone()\n+\n // See ArgumentGenerator#generate\n internal fun callDefaultMethod(args: Map, continuationArgument: Continuation<*>?): R {\n val parameters = parameters\n- val arguments = ArrayList(parameters.size)\n- var mask = 0\n- val masks = ArrayList(1)\n- var index = 0\n- var anyOptional = false\n+ val parameterSize = parameters.size + (if (isSuspend) 1 else 0)", + "comment_created_at": "2022-11-24T23:02:29+00:00", + "comment_author": "udalov", + "comment_body": "For the 0-parameters case, it looks good! But for the 1 parameter, I'm not really sure, the code is getting quite complicated already. Besides, `Measurement.oneWithoutDefault` regressed in your results after this change, if I understand them correctly? I propose to keep the 0-parameter case only.", + "pr_file_module": null + }, + { + "comment_id": "1031954118", + "repo_full_name": "JetBrains/kotlin", + "pr_number": 4840, + "pr_file": "core/reflection.jvm/src/kotlin/reflect/jvm/internal/KCallableImpl.kt", + "discussion_id": "1020631690", + "commented_code": "@@ -112,65 +112,87 @@ internal abstract class KCallableImpl : KCallable, KTypeParameterOwner\n return if (isAnnotationConstructor) callAnnotationConstructor(args) else callDefaultMethod(args, null)\n }\n \n+ private val _absentArguments = ReflectProperties.lazySoft {\n+ val parameterSize = parameters.size + (if (isSuspend) 1 else 0)\n+ val maskSize = (parameters.size + Integer.SIZE - 1) / Integer.SIZE\n+\n+ // arguments with masks and DefaultConstructorMarker or MethodHandle\n+ // +1 is argument for DefaultConstructorMarker or MethodHandle\n+ val arguments = arrayOfNulls(parameterSize + maskSize + 1)\n+\n+ // set absent values\n+ parameters.forEach { parameter ->\n+ if (parameter.isOptional && !parameter.type.isInlineClassType) {\n+ // For inline class types, the javaType refers to the underlying type of the inline class,\n+ // but we have to pass null in order to mark the argument as absent for InlineClassAwareCaller.\n+ arguments[parameter.index] = defaultPrimitiveValue(parameter.type.javaType)\n+ } else if (parameter.isVararg) {\n+ arguments[parameter.index] = defaultEmptyArray(parameter.type)\n+ }\n+ }\n+\n+ for (i in 0 until maskSize) {\n+ arguments[parameterSize + i] = 0\n+ }\n+\n+ arguments\n+ }\n+\n+ private fun getAbsentArguments(): Array = _absentArguments().clone()\n+\n // See ArgumentGenerator#generate\n internal fun callDefaultMethod(args: Map, continuationArgument: Continuation<*>?): R {\n val parameters = parameters\n- val arguments = ArrayList(parameters.size)\n- var mask = 0\n- val masks = ArrayList(1)\n- var index = 0\n- var anyOptional = false\n+ val parameterSize = parameters.size + (if (isSuspend) 1 else 0)", + "comment_created_at": "2022-11-25T02:21:59+00:00", + "comment_author": "k163377", + "comment_body": "> Measurement.oneWithoutDefault regressed in your results after this change, if I understand them correctly?\r\n\r\nI ran the benchmark again with almost the same content, but there did not seem to be any regression in performance.\r\nSince the benchmark was run on a local machine, there must have been something going on in the background during this measurement.\r\n\r\n**1e1cd3a**\r\n\r\n```\r\nBenchmark Mode Cnt Score Error Units\r\nMeasurement.fiveWithDefault thrpt 4 1713974.649 ± 807978.785 ops/s\r\nMeasurement.fiveWithoutDefault thrpt 4 2402911.282 ± 116905.232 ops/s\r\nMeasurement.oneWithDefault thrpt 4 5018352.177 ± 3050333.309 ops/s\r\nMeasurement.oneWithoutDefault thrpt 4 6338081.662 ± 384633.083 ops/s\r\nMeasurement.twentyWithDefault thrpt 4 506022.842 ± 28381.148 ops/s\r\nMeasurement.twentyWithoutDefault thrpt 4 721987.591 ± 78852.517 ops/s\r\nMeasurement.zero thrpt 4 12727105.089 ± 188029.864 ops/s\r\n```\r\n\r\n**after-zero-arg-opt2**\r\n\r\n```\r\nBenchmark Mode Cnt Score Error Units\r\nMeasurement.fiveWithDefault thrpt 4 1880153.629 ± 232291.891 ops/s\r\nMeasurement.fiveWithoutDefault thrpt 4 2388820.000 ± 97785.718 ops/s\r\nMeasurement.oneWithDefault thrpt 4 5276216.078 ± 327025.056 ops/s\r\nMeasurement.oneWithoutDefault thrpt 4 6251850.821 ± 497046.067 ops/s\r\nMeasurement.twentyWithDefault thrpt 4 516927.278 ± 13448.841 ops/s\r\nMeasurement.twentyWithoutDefault thrpt 4 728619.156 ± 36904.744 ops/s\r\nMeasurement.zero thrpt 4 21774799.243 ± 1534253.147 ops/s\r\n```\r\n\r\n---\r\n\r\nI agree that it is complicated.\r\nI have pushed for a fix.\r\n[68a9741](https://github.com/JetBrains/kotlin/pull/4840/commits/68a9741ee2307eb0d27b2a77c1124c8aa267597a)\r\n\r\nI did not measure benchmark results with the one-parameter branch removed, but at least there is no reason for the score to drop since the target of the measurement is a top-level function.\r\n", + "pr_file_module": null + }, + { + "comment_id": "1031956945", + "repo_full_name": "JetBrains/kotlin", + "pr_number": 4840, + "pr_file": "core/reflection.jvm/src/kotlin/reflect/jvm/internal/KCallableImpl.kt", + "discussion_id": "1020631690", + "commented_code": "@@ -112,65 +112,87 @@ internal abstract class KCallableImpl : KCallable, KTypeParameterOwner\n return if (isAnnotationConstructor) callAnnotationConstructor(args) else callDefaultMethod(args, null)\n }\n \n+ private val _absentArguments = ReflectProperties.lazySoft {\n+ val parameterSize = parameters.size + (if (isSuspend) 1 else 0)\n+ val maskSize = (parameters.size + Integer.SIZE - 1) / Integer.SIZE\n+\n+ // arguments with masks and DefaultConstructorMarker or MethodHandle\n+ // +1 is argument for DefaultConstructorMarker or MethodHandle\n+ val arguments = arrayOfNulls(parameterSize + maskSize + 1)\n+\n+ // set absent values\n+ parameters.forEach { parameter ->\n+ if (parameter.isOptional && !parameter.type.isInlineClassType) {\n+ // For inline class types, the javaType refers to the underlying type of the inline class,\n+ // but we have to pass null in order to mark the argument as absent for InlineClassAwareCaller.\n+ arguments[parameter.index] = defaultPrimitiveValue(parameter.type.javaType)\n+ } else if (parameter.isVararg) {\n+ arguments[parameter.index] = defaultEmptyArray(parameter.type)\n+ }\n+ }\n+\n+ for (i in 0 until maskSize) {\n+ arguments[parameterSize + i] = 0\n+ }\n+\n+ arguments\n+ }\n+\n+ private fun getAbsentArguments(): Array = _absentArguments().clone()\n+\n // See ArgumentGenerator#generate\n internal fun callDefaultMethod(args: Map, continuationArgument: Continuation<*>?): R {\n val parameters = parameters\n- val arguments = ArrayList(parameters.size)\n- var mask = 0\n- val masks = ArrayList(1)\n- var index = 0\n- var anyOptional = false\n+ val parameterSize = parameters.size + (if (isSuspend) 1 else 0)", + "comment_created_at": "2022-11-25T02:30:17+00:00", + "comment_author": "k163377", + "comment_body": "As a side note, the reason I proposed the original form is that `getter` is a typical example of a function that takes only instances as arguments.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "994632200", + "pr_number": 4982, + "pr_file": "compiler/fir/providers/src/org/jetbrains/kotlin/fir/scopes/impl/FirTypeIntersectionScopeContext.kt", + "created_at": "2022-10-13T13:16:48+00:00", + "commented_code": "val result = mutableListOf>()\n\n while (allMembersWithScope.size > 1) {\n val maxByVisibility = findMemberWithMaxVisibility(allMembersWithScope)\n val extractBothWaysWithPrivate = overrideService.extractBothWaysOverridable(maxByVisibility, allMembersWithScope, overrideChecker)\n val extractedOverrides = extractBothWaysWithPrivate.filterNotTo(mutableListOf()) {\n Visibilities.isPrivate((it.member.fir as FirMemberDeclaration).visibility)\n }.takeIf { it.isNotEmpty() } ?: extractBothWaysWithPrivate\n val baseMembersForIntersection = extractedOverrides.calcBaseMembersForIntersectionOverride()\n if (baseMembersForIntersection.size > 1) {\n val (mostSpecific, scopeForMostSpecific) = overrideService.selectMostSpecificMember(\n baseMembersForIntersection,\n ReturnTypeCalculatorForFullBodyResolve\n )\n val intersectionOverrideContext = ContextForIntersectionOverrideConstruction(\n mostSpecific,\n this,\n extractedOverrides,\n scopeForMostSpecific\n )\n result += ResultOfIntersection.NonTrivial(\n intersectionOverrides,\n intersectionOverrideContext,\n extractedOverrides,\n containingScope = null\n )\n val groupWithPrivate =\n overrideService.extractBothWaysOverridable(allMembersWithScope.maxByVisibility(), allMembersWithScope, overrideChecker)\n val group = groupWithPrivate.filter { !Visibilities.isPrivate(it.member.fir.visibility) }.ifEmpty { groupWithPrivate }\n val singleRealImplementation = group.size == 1 ||\n group.mapTo(mutableSetOf()) { it.member.fir.unwrapSubstitutionOverrides().symbol }.size == 1\n val directOverrides = if (singleRealImplementation) group else group.onlyDirectlyInherited()", + "repo_full_name": "JetBrains/kotlin", + "discussion_comments": [ + { + "comment_id": "994632200", + "repo_full_name": "JetBrains/kotlin", + "pr_number": 4982, + "pr_file": "compiler/fir/providers/src/org/jetbrains/kotlin/fir/scopes/impl/FirTypeIntersectionScopeContext.kt", + "discussion_id": "994632200", + "commented_code": "@@ -161,32 +156,21 @@ class FirTypeIntersectionScopeContext(\n val result = mutableListOf>()\n \n while (allMembersWithScope.size > 1) {\n- val maxByVisibility = findMemberWithMaxVisibility(allMembersWithScope)\n- val extractBothWaysWithPrivate = overrideService.extractBothWaysOverridable(maxByVisibility, allMembersWithScope, overrideChecker)\n- val extractedOverrides = extractBothWaysWithPrivate.filterNotTo(mutableListOf()) {\n- Visibilities.isPrivate((it.member.fir as FirMemberDeclaration).visibility)\n- }.takeIf { it.isNotEmpty() } ?: extractBothWaysWithPrivate\n- val baseMembersForIntersection = extractedOverrides.calcBaseMembersForIntersectionOverride()\n- if (baseMembersForIntersection.size > 1) {\n- val (mostSpecific, scopeForMostSpecific) = overrideService.selectMostSpecificMember(\n- baseMembersForIntersection,\n- ReturnTypeCalculatorForFullBodyResolve\n- )\n- val intersectionOverrideContext = ContextForIntersectionOverrideConstruction(\n- mostSpecific,\n- this,\n- extractedOverrides,\n- scopeForMostSpecific\n- )\n- result += ResultOfIntersection.NonTrivial(\n- intersectionOverrides,\n- intersectionOverrideContext,\n- extractedOverrides,\n- containingScope = null\n- )\n+ val groupWithPrivate =\n+ overrideService.extractBothWaysOverridable(allMembersWithScope.maxByVisibility(), allMembersWithScope, overrideChecker)\n+ val group = groupWithPrivate.filter { !Visibilities.isPrivate(it.member.fir.visibility) }.ifEmpty { groupWithPrivate }\n+ val singleRealImplementation = group.size == 1 ||\n+ group.mapTo(mutableSetOf()) { it.member.fir.unwrapSubstitutionOverrides().symbol }.size == 1\n+ val directOverrides = if (singleRealImplementation) group else group.onlyDirectlyInherited()", + "comment_created_at": "2022-10-13T13:16:48+00:00", + "comment_author": "dzharkov", + "comment_body": "I would add a comment that this `if` looks more like an optimization than a semantically necessary thing.\nOr, probably I would just use `group.onlyDirectlyInherited()` unconditionally.\n", + "pr_file_module": null + }, + { + "comment_id": "994734316", + "repo_full_name": "JetBrains/kotlin", + "pr_number": 4982, + "pr_file": "compiler/fir/providers/src/org/jetbrains/kotlin/fir/scopes/impl/FirTypeIntersectionScopeContext.kt", + "discussion_id": "994632200", + "commented_code": "@@ -161,32 +156,21 @@ class FirTypeIntersectionScopeContext(\n val result = mutableListOf>()\n \n while (allMembersWithScope.size > 1) {\n- val maxByVisibility = findMemberWithMaxVisibility(allMembersWithScope)\n- val extractBothWaysWithPrivate = overrideService.extractBothWaysOverridable(maxByVisibility, allMembersWithScope, overrideChecker)\n- val extractedOverrides = extractBothWaysWithPrivate.filterNotTo(mutableListOf()) {\n- Visibilities.isPrivate((it.member.fir as FirMemberDeclaration).visibility)\n- }.takeIf { it.isNotEmpty() } ?: extractBothWaysWithPrivate\n- val baseMembersForIntersection = extractedOverrides.calcBaseMembersForIntersectionOverride()\n- if (baseMembersForIntersection.size > 1) {\n- val (mostSpecific, scopeForMostSpecific) = overrideService.selectMostSpecificMember(\n- baseMembersForIntersection,\n- ReturnTypeCalculatorForFullBodyResolve\n- )\n- val intersectionOverrideContext = ContextForIntersectionOverrideConstruction(\n- mostSpecific,\n- this,\n- extractedOverrides,\n- scopeForMostSpecific\n- )\n- result += ResultOfIntersection.NonTrivial(\n- intersectionOverrides,\n- intersectionOverrideContext,\n- extractedOverrides,\n- containingScope = null\n- )\n+ val groupWithPrivate =\n+ overrideService.extractBothWaysOverridable(allMembersWithScope.maxByVisibility(), allMembersWithScope, overrideChecker)\n+ val group = groupWithPrivate.filter { !Visibilities.isPrivate(it.member.fir.visibility) }.ifEmpty { groupWithPrivate }\n+ val singleRealImplementation = group.size == 1 ||\n+ group.mapTo(mutableSetOf()) { it.member.fir.unwrapSubstitutionOverrides().symbol }.size == 1\n+ val directOverrides = if (singleRealImplementation) group else group.onlyDirectlyInherited()", + "comment_created_at": "2022-10-13T14:39:12+00:00", + "comment_author": "pyos", + "comment_body": "BTW I'm also wondering here if `singleRealImplementation` should be computed *after* removing transitively inherited declarations, but I'm not familiar enough with this code to come up with different test cases.", + "pr_file_module": null + }, + { + "comment_id": "994749406", + "repo_full_name": "JetBrains/kotlin", + "pr_number": 4982, + "pr_file": "compiler/fir/providers/src/org/jetbrains/kotlin/fir/scopes/impl/FirTypeIntersectionScopeContext.kt", + "discussion_id": "994632200", + "commented_code": "@@ -161,32 +156,21 @@ class FirTypeIntersectionScopeContext(\n val result = mutableListOf>()\n \n while (allMembersWithScope.size > 1) {\n- val maxByVisibility = findMemberWithMaxVisibility(allMembersWithScope)\n- val extractBothWaysWithPrivate = overrideService.extractBothWaysOverridable(maxByVisibility, allMembersWithScope, overrideChecker)\n- val extractedOverrides = extractBothWaysWithPrivate.filterNotTo(mutableListOf()) {\n- Visibilities.isPrivate((it.member.fir as FirMemberDeclaration).visibility)\n- }.takeIf { it.isNotEmpty() } ?: extractBothWaysWithPrivate\n- val baseMembersForIntersection = extractedOverrides.calcBaseMembersForIntersectionOverride()\n- if (baseMembersForIntersection.size > 1) {\n- val (mostSpecific, scopeForMostSpecific) = overrideService.selectMostSpecificMember(\n- baseMembersForIntersection,\n- ReturnTypeCalculatorForFullBodyResolve\n- )\n- val intersectionOverrideContext = ContextForIntersectionOverrideConstruction(\n- mostSpecific,\n- this,\n- extractedOverrides,\n- scopeForMostSpecific\n- )\n- result += ResultOfIntersection.NonTrivial(\n- intersectionOverrides,\n- intersectionOverrideContext,\n- extractedOverrides,\n- containingScope = null\n- )\n+ val groupWithPrivate =\n+ overrideService.extractBothWaysOverridable(allMembersWithScope.maxByVisibility(), allMembersWithScope, overrideChecker)\n+ val group = groupWithPrivate.filter { !Visibilities.isPrivate(it.member.fir.visibility) }.ifEmpty { groupWithPrivate }\n+ val singleRealImplementation = group.size == 1 ||\n+ group.mapTo(mutableSetOf()) { it.member.fir.unwrapSubstitutionOverrides().symbol }.size == 1\n+ val directOverrides = if (singleRealImplementation) group else group.onlyDirectlyInherited()", + "comment_created_at": "2022-10-13T14:48:26+00:00", + "comment_author": "dzharkov", + "comment_body": "TBH I'm not sure here too, so I would move it without a clear reason", + "pr_file_module": null + }, + { + "comment_id": "994798498", + "repo_full_name": "JetBrains/kotlin", + "pr_number": 4982, + "pr_file": "compiler/fir/providers/src/org/jetbrains/kotlin/fir/scopes/impl/FirTypeIntersectionScopeContext.kt", + "discussion_id": "994632200", + "commented_code": "@@ -161,32 +156,21 @@ class FirTypeIntersectionScopeContext(\n val result = mutableListOf>()\n \n while (allMembersWithScope.size > 1) {\n- val maxByVisibility = findMemberWithMaxVisibility(allMembersWithScope)\n- val extractBothWaysWithPrivate = overrideService.extractBothWaysOverridable(maxByVisibility, allMembersWithScope, overrideChecker)\n- val extractedOverrides = extractBothWaysWithPrivate.filterNotTo(mutableListOf()) {\n- Visibilities.isPrivate((it.member.fir as FirMemberDeclaration).visibility)\n- }.takeIf { it.isNotEmpty() } ?: extractBothWaysWithPrivate\n- val baseMembersForIntersection = extractedOverrides.calcBaseMembersForIntersectionOverride()\n- if (baseMembersForIntersection.size > 1) {\n- val (mostSpecific, scopeForMostSpecific) = overrideService.selectMostSpecificMember(\n- baseMembersForIntersection,\n- ReturnTypeCalculatorForFullBodyResolve\n- )\n- val intersectionOverrideContext = ContextForIntersectionOverrideConstruction(\n- mostSpecific,\n- this,\n- extractedOverrides,\n- scopeForMostSpecific\n- )\n- result += ResultOfIntersection.NonTrivial(\n- intersectionOverrides,\n- intersectionOverrideContext,\n- extractedOverrides,\n- containingScope = null\n- )\n+ val groupWithPrivate =\n+ overrideService.extractBothWaysOverridable(allMembersWithScope.maxByVisibility(), allMembersWithScope, overrideChecker)\n+ val group = groupWithPrivate.filter { !Visibilities.isPrivate(it.member.fir.visibility) }.ifEmpty { groupWithPrivate }\n+ val singleRealImplementation = group.size == 1 ||\n+ group.mapTo(mutableSetOf()) { it.member.fir.unwrapSubstitutionOverrides().symbol }.size == 1\n+ val directOverrides = if (singleRealImplementation) group else group.onlyDirectlyInherited()", + "comment_created_at": "2022-10-13T15:26:17+00:00", + "comment_author": "pyos", + "comment_body": "done", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/kotlin-minimize-unnecessary-work.md b/_reviewers/kotlin-minimize-unnecessary-work.md index 5ce1d3b..5cc7856 100644 --- a/_reviewers/kotlin-minimize-unnecessary-work.md +++ b/_reviewers/kotlin-minimize-unnecessary-work.md @@ -51,389 +51,3 @@ if (parameters.isEmpty()) { ``` Always verify your optimizations with benchmarks to ensure they provide measurable benefits in real-world scenarios. - - -[ - { - "discussion_id": "2030201923", - "pr_number": 5426, - "pr_file": "libraries/stdlib/common/src/generated/_Arrays.kt", - "created_at": "2025-04-06T16:43:45+00:00", - "commented_code": "* @sample samples.collections.Collections.Transformations.take\n */\npublic inline fun Array.takeWhile(predicate: (T) -> Boolean): List {", - "repo_full_name": "JetBrains/kotlin", - "discussion_comments": [ - { - "comment_id": "2030201923", - "repo_full_name": "JetBrains/kotlin", - "pr_number": 5426, - "pr_file": "libraries/stdlib/common/src/generated/_Arrays.kt", - "discussion_id": "2030201923", - "commented_code": "@@ -5295,13 +5285,16 @@ public inline fun CharArray.takeLastWhile(predicate: (Char) -> Boolean): List Array.takeWhile(predicate: (T) -> Boolean): List {", - "comment_created_at": "2025-04-06T16:43:45+00:00", - "comment_author": "hoangchungk53qx1", - "comment_body": " it will be easier to read, i think, with equivalent performance \r\n```suggestion\r\npublic inline fun Array.takeWhile(predicate: (T) -> Boolean): List {\r\n var i = 0\r\n while (i < size && predicate(this[i])) i++\r\n return if (i == 0) emptyList() else this.copyOfRange(0, i).asList()\r\n}\r\n```", - "pr_file_module": null - }, - { - "comment_id": "2034023407", - "repo_full_name": "JetBrains/kotlin", - "pr_number": 5426, - "pr_file": "libraries/stdlib/common/src/generated/_Arrays.kt", - "discussion_id": "2030201923", - "commented_code": "@@ -5295,13 +5285,16 @@ public inline fun CharArray.takeLastWhile(predicate: (Char) -> Boolean): List Array.takeWhile(predicate: (T) -> Boolean): List {", - "comment_created_at": "2025-04-08T20:57:31+00:00", - "comment_author": "alexismanin", - "comment_body": "I've checked the impact of removing the special case handling, and it looks like it is noticeable. On the benchmark result below, your suggestion is the \"takeWhileSuggestion\" case. We see that for array size 0 and 1, it is a little less fast than the proposed optimization : \r\n\r\n![image](https://github.com/user-attachments/assets/6658ed62-7e86-405e-882d-14aeb94e6e7b)\r\n\r\nTherefore, I think we should keep the when as it is now.\r\n\r\nP.S: If you want to double-check this, I've added the suggestions in my benchmark project [on the `test-pr-suggestions` branch](https://github.com/alexismanin/kt-benchmark-array-to-list/tree/test-pr-suggestions)", - "pr_file_module": null - }, - { - "comment_id": "2034521735", - "repo_full_name": "JetBrains/kotlin", - "pr_number": 5426, - "pr_file": "libraries/stdlib/common/src/generated/_Arrays.kt", - "discussion_id": "2030201923", - "commented_code": "@@ -5295,13 +5285,16 @@ public inline fun CharArray.takeLastWhile(predicate: (Char) -> Boolean): List Array.takeWhile(predicate: (T) -> Boolean): List {", - "comment_created_at": "2025-04-09T06:16:18+00:00", - "comment_author": "alexismanin", - "comment_body": "@hoangchungk53qx1 : After second thought, your simplification makes sense.\r\n\r\nI realized that it just miss a condition to improve its performance when *output list has only one element* (which is a broader case than my current implementation, which only accounts for cases when *input* array has only one element) : \r\n\r\n```kt\r\nreturn if (i == 0) emptyList()\r\n else if (i == 1) listOf(this[0])\r\n else copyOfRange(0, i).asList()\r\n```\r\n\r\nI've launched a benchmark by modifying your suggestion so that the final return condition accounts for cases where i is 1. It greatly improves performance, for two cases of the benchmark : \r\n\r\n 1. Input array has one element\r\n 2. Output list has only one element (in the benchmark, this is the case where input array is of size 3. We only take the first element from it)\r\n\r\nI will soon apply you suggestion with this tweak, and I think we will be good then.\r\n\r\nHere is a glimpse of updated benchmark results:\r\n\r\n![image](https://github.com/user-attachments/assets/93635032-d766-4869-b4ad-748f4f27c63a)\r\n", - "pr_file_module": null - }, - { - "comment_id": "2034602921", - "repo_full_name": "JetBrains/kotlin", - "pr_number": 5426, - "pr_file": "libraries/stdlib/common/src/generated/_Arrays.kt", - "discussion_id": "2030201923", - "commented_code": "@@ -5295,13 +5285,16 @@ public inline fun CharArray.takeLastWhile(predicate: (Char) -> Boolean): List Array.takeWhile(predicate: (T) -> Boolean): List {", - "comment_created_at": "2025-04-09T06:55:23+00:00", - "comment_author": "hoangchungk53qx1", - "comment_body": "Nice @alexismanin ", - "pr_file_module": null - }, - { - "comment_id": "2035221456", - "repo_full_name": "JetBrains/kotlin", - "pr_number": 5426, - "pr_file": "libraries/stdlib/common/src/generated/_Arrays.kt", - "discussion_id": "2030201923", - "commented_code": "@@ -5295,13 +5285,16 @@ public inline fun CharArray.takeLastWhile(predicate: (Char) -> Boolean): List Array.takeWhile(predicate: (T) -> Boolean): List {", - "comment_created_at": "2025-04-09T12:03:13+00:00", - "comment_author": "alexismanin", - "comment_body": "I've pushed the change to `takeWhile` function.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2030205112", - "pr_number": 5426, - "pr_file": "libraries/stdlib/src/kotlin/collections/Arrays.kt", - "created_at": "2025-04-06T16:57:37+00:00", - "commented_code": "* @sample samples.collections.Arrays.Transformations.flattenArray\n */\npublic fun Array>.flatten(): List {\n val result = ArrayList(sumOf { it.size })\n for (element in this) {\n result.addAll(element)\n return when (size) {", - "repo_full_name": "JetBrains/kotlin", - "discussion_comments": [ - { - "comment_id": "2030205112", - "repo_full_name": "JetBrains/kotlin", - "pr_number": 5426, - "pr_file": "libraries/stdlib/src/kotlin/collections/Arrays.kt", - "discussion_id": "2030205112", - "commented_code": "@@ -17,11 +17,26 @@ import kotlin.contracts.*\n * @sample samples.collections.Arrays.Transformations.flattenArray\n */\n public fun Array>.flatten(): List {\n- val result = ArrayList(sumOf { it.size })\n- for (element in this) {\n- result.addAll(element)\n+ return when (size) {", - "comment_created_at": "2025-04-06T16:57:37+00:00", - "comment_author": "hoangchungk53qx1", - "comment_body": "can early return \r\n\r\nif (isEmpty()) return emptyList()\r\n\r\n", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1437223453", - "pr_number": 5233, - "pr_file": "plugins/jvm-abi-gen/src/org/jetbrains/kotlin/jvm/abi/JvmAbiMetadataProcessor.kt", - "created_at": "2023-12-27T19:44:29+00:00", - "commented_code": "}\n\nprivate fun KmDeclarationContainer.removePrivateDeclarations() {\n functions.sortBy(KmFunction::name)\n properties.sortBy(KmProperty::name)\n\n functions.removeIf { it.visibility.isPrivate }\n properties.removeIf { it.visibility.isPrivate }", - "repo_full_name": "JetBrains/kotlin", - "discussion_comments": [ - { - "comment_id": "1437223453", - "repo_full_name": "JetBrains/kotlin", - "pr_number": 5233, - "pr_file": "plugins/jvm-abi-gen/src/org/jetbrains/kotlin/jvm/abi/JvmAbiMetadataProcessor.kt", - "discussion_id": "1437223453", - "commented_code": "@@ -163,6 +163,9 @@ private fun KmPackage.removePrivateDeclarations() {\n }\n \n private fun KmDeclarationContainer.removePrivateDeclarations() {\n+ functions.sortBy(KmFunction::name)\n+ properties.sortBy(KmProperty::name)\n+\n functions.removeIf { it.visibility.isPrivate }\n properties.removeIf { it.visibility.isPrivate }", - "comment_created_at": "2023-12-27T19:44:29+00:00", - "comment_author": "JakeWharton", - "comment_body": "```suggestion\n functions.removeIf { it.visibility.isPrivate }\n properties.removeIf { it.visibility.isPrivate }\n\n functions.sortBy(KmFunction::name)\n properties.sortBy(KmProperty::name)\n```\nMight as well sort after removal. Otherwise you're wasting time on sorting entries that are destined to be removed anyway.\n", - "pr_file_module": null - }, - { - "comment_id": "1437256954", - "repo_full_name": "JetBrains/kotlin", - "pr_number": 5233, - "pr_file": "plugins/jvm-abi-gen/src/org/jetbrains/kotlin/jvm/abi/JvmAbiMetadataProcessor.kt", - "discussion_id": "1437223453", - "commented_code": "@@ -163,6 +163,9 @@ private fun KmPackage.removePrivateDeclarations() {\n }\n \n private fun KmDeclarationContainer.removePrivateDeclarations() {\n+ functions.sortBy(KmFunction::name)\n+ properties.sortBy(KmProperty::name)\n+\n functions.removeIf { it.visibility.isPrivate }\n properties.removeIf { it.visibility.isPrivate }", - "comment_created_at": "2023-12-27T21:18:43+00:00", - "comment_author": "Tagakov", - "comment_body": "Thanks for catching it, noticed it after creating the PR but forgot to push.", - "pr_file_module": null - }, - { - "comment_id": "1440951272", - "repo_full_name": "JetBrains/kotlin", - "pr_number": 5233, - "pr_file": "plugins/jvm-abi-gen/src/org/jetbrains/kotlin/jvm/abi/JvmAbiMetadataProcessor.kt", - "discussion_id": "1437223453", - "commented_code": "@@ -163,6 +163,9 @@ private fun KmPackage.removePrivateDeclarations() {\n }\n \n private fun KmDeclarationContainer.removePrivateDeclarations() {\n+ functions.sortBy(KmFunction::name)\n+ properties.sortBy(KmProperty::name)\n+\n functions.removeIf { it.visibility.isPrivate }\n properties.removeIf { it.visibility.isPrivate }", - "comment_created_at": "2024-01-03T21:07:15+00:00", - "comment_author": "Tagakov", - "comment_body": "My dog ate it! I swear!", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1263892613", - "pr_number": 5156, - "pr_file": "kotlin-native/backend.native/compiler/ir/backend.native/src/org/jetbrains/kotlin/backend/konan/LlvmModuleSpecificationImpl.kt", - "created_at": "2023-07-14T15:58:00+00:00", - "commented_code": "override fun containsPackageFragment(packageFragment: IrPackageFragment): Boolean =\n packageFragment.konanLibrary.let { it == null || containsLibrary(it) }\n\n override fun containsDeclaration(declaration: IrDeclaration): Boolean =\n declaration.konanLibrary.let { it == null || containsLibrary(it) }\n // When producing per-file caches, some declarations might be generated by the stub generator.\n && declaration.getPackageFragment() !is IrExternalPackageFragment\n private val containsCache = mutableMapOf()\n\n override fun containsDeclaration(declaration: IrDeclaration): Boolean = containsCache.getOrPut(declaration) {", - "repo_full_name": "JetBrains/kotlin", - "discussion_comments": [ - { - "comment_id": "1263892613", - "repo_full_name": "JetBrains/kotlin", - "pr_number": 5156, - "pr_file": "kotlin-native/backend.native/compiler/ir/backend.native/src/org/jetbrains/kotlin/backend/konan/LlvmModuleSpecificationImpl.kt", - "discussion_id": "1263892613", - "commented_code": "@@ -28,10 +29,18 @@ internal abstract class LlvmModuleSpecificationBase(protected val cachedLibrarie\n override fun containsPackageFragment(packageFragment: IrPackageFragment): Boolean =\n packageFragment.konanLibrary.let { it == null || containsLibrary(it) }\n \n- override fun containsDeclaration(declaration: IrDeclaration): Boolean =\n- declaration.konanLibrary.let { it == null || containsLibrary(it) }\n- // When producing per-file caches, some declarations might be generated by the stub generator.\n- && declaration.getPackageFragment() !is IrExternalPackageFragment\n+ private val containsCache = mutableMapOf()\n+\n+ override fun containsDeclaration(declaration: IrDeclaration): Boolean = containsCache.getOrPut(declaration) {", - "comment_created_at": "2023-07-14T15:58:00+00:00", - "comment_author": "homuroll", - "comment_body": "What is the purpose of inlining of `declaration.konanLibrary` here?", - "pr_file_module": null - }, - { - "comment_id": "1264212481", - "repo_full_name": "JetBrains/kotlin", - "pr_number": 5156, - "pr_file": "kotlin-native/backend.native/compiler/ir/backend.native/src/org/jetbrains/kotlin/backend/konan/LlvmModuleSpecificationImpl.kt", - "discussion_id": "1263892613", - "commented_code": "@@ -28,10 +29,18 @@ internal abstract class LlvmModuleSpecificationBase(protected val cachedLibrarie\n override fun containsPackageFragment(packageFragment: IrPackageFragment): Boolean =\n packageFragment.konanLibrary.let { it == null || containsLibrary(it) }\n \n- override fun containsDeclaration(declaration: IrDeclaration): Boolean =\n- declaration.konanLibrary.let { it == null || containsLibrary(it) }\n- // When producing per-file caches, some declarations might be generated by the stub generator.\n- && declaration.getPackageFragment() !is IrExternalPackageFragment\n+ private val containsCache = mutableMapOf()\n+\n+ override fun containsDeclaration(declaration: IrDeclaration): Boolean = containsCache.getOrPut(declaration) {", - "comment_created_at": "2023-07-14T22:15:53+00:00", - "comment_author": "MarkCMann", - "comment_body": "To memoize more of the calls, by inlining the konan library recursive calls here we can store the result for the top level class in addition to all nested classes. \r\n\r\nOverall this method showed up on the profiler as being very heavy due to the repeated recursive calls in KonanLibrary -- I opted to memoize the data in the llvm module specification class instead of trying to add it to the IrDeclaration or adding a global cache to konanLibrary in IrUtils. ", - "pr_file_module": null - }, - { - "comment_id": "1265352329", - "repo_full_name": "JetBrains/kotlin", - "pr_number": 5156, - "pr_file": "kotlin-native/backend.native/compiler/ir/backend.native/src/org/jetbrains/kotlin/backend/konan/LlvmModuleSpecificationImpl.kt", - "discussion_id": "1263892613", - "commented_code": "@@ -28,10 +29,18 @@ internal abstract class LlvmModuleSpecificationBase(protected val cachedLibrarie\n override fun containsPackageFragment(packageFragment: IrPackageFragment): Boolean =\n packageFragment.konanLibrary.let { it == null || containsLibrary(it) }\n \n- override fun containsDeclaration(declaration: IrDeclaration): Boolean =\n- declaration.konanLibrary.let { it == null || containsLibrary(it) }\n- // When producing per-file caches, some declarations might be generated by the stub generator.\n- && declaration.getPackageFragment() !is IrExternalPackageFragment\n+ private val containsCache = mutableMapOf()\n+\n+ override fun containsDeclaration(declaration: IrDeclaration): Boolean = containsCache.getOrPut(declaration) {", - "comment_created_at": "2023-07-17T13:26:09+00:00", - "comment_author": "ddolovov", - "comment_body": "Just a question (I don't know the right answer):\r\n\r\nIs `fun containsDeclaration(declaration: IrDeclaration)` actually called for the same declaration multiple times? If no, then probably we should cache per `KotlinLibrary` or per smth else but not per `IrDeclaration`.", - "pr_file_module": null - }, - { - "comment_id": "1265458783", - "repo_full_name": "JetBrains/kotlin", - "pr_number": 5156, - "pr_file": "kotlin-native/backend.native/compiler/ir/backend.native/src/org/jetbrains/kotlin/backend/konan/LlvmModuleSpecificationImpl.kt", - "discussion_id": "1263892613", - "commented_code": "@@ -28,10 +29,18 @@ internal abstract class LlvmModuleSpecificationBase(protected val cachedLibrarie\n override fun containsPackageFragment(packageFragment: IrPackageFragment): Boolean =\n packageFragment.konanLibrary.let { it == null || containsLibrary(it) }\n \n- override fun containsDeclaration(declaration: IrDeclaration): Boolean =\n- declaration.konanLibrary.let { it == null || containsLibrary(it) }\n- // When producing per-file caches, some declarations might be generated by the stub generator.\n- && declaration.getPackageFragment() !is IrExternalPackageFragment\n+ private val containsCache = mutableMapOf()\n+\n+ override fun containsDeclaration(declaration: IrDeclaration): Boolean = containsCache.getOrPut(declaration) {", - "comment_created_at": "2023-07-17T14:32:30+00:00", - "comment_author": "homuroll", - "comment_body": "Got it. Please, add a comment here explaining this, and probably some short comment/reference to here from `declaration.konanLibrary` (in case someone decides to change it).", - "pr_file_module": null - }, - { - "comment_id": "1265950218", - "repo_full_name": "JetBrains/kotlin", - "pr_number": 5156, - "pr_file": "kotlin-native/backend.native/compiler/ir/backend.native/src/org/jetbrains/kotlin/backend/konan/LlvmModuleSpecificationImpl.kt", - "discussion_id": "1263892613", - "commented_code": "@@ -28,10 +29,18 @@ internal abstract class LlvmModuleSpecificationBase(protected val cachedLibrarie\n override fun containsPackageFragment(packageFragment: IrPackageFragment): Boolean =\n packageFragment.konanLibrary.let { it == null || containsLibrary(it) }\n \n- override fun containsDeclaration(declaration: IrDeclaration): Boolean =\n- declaration.konanLibrary.let { it == null || containsLibrary(it) }\n- // When producing per-file caches, some declarations might be generated by the stub generator.\n- && declaration.getPackageFragment() !is IrExternalPackageFragment\n+ private val containsCache = mutableMapOf()\n+\n+ override fun containsDeclaration(declaration: IrDeclaration): Boolean = containsCache.getOrPut(declaration) {", - "comment_created_at": "2023-07-17T22:10:13+00:00", - "comment_author": "MarkCMann", - "comment_body": "@ddolovov I'm not sure that I understand your concern here. Is this more related to the size of the cache? \r\n\r\nWith this change I've moved the recursive calls into the llvm contains declaration function so now that function should be called multiple times for `IrDeclarations`. ", - "pr_file_module": null - }, - { - "comment_id": "1266570240", - "repo_full_name": "JetBrains/kotlin", - "pr_number": 5156, - "pr_file": "kotlin-native/backend.native/compiler/ir/backend.native/src/org/jetbrains/kotlin/backend/konan/LlvmModuleSpecificationImpl.kt", - "discussion_id": "1263892613", - "commented_code": "@@ -28,10 +29,18 @@ internal abstract class LlvmModuleSpecificationBase(protected val cachedLibrarie\n override fun containsPackageFragment(packageFragment: IrPackageFragment): Boolean =\n packageFragment.konanLibrary.let { it == null || containsLibrary(it) }\n \n- override fun containsDeclaration(declaration: IrDeclaration): Boolean =\n- declaration.konanLibrary.let { it == null || containsLibrary(it) }\n- // When producing per-file caches, some declarations might be generated by the stub generator.\n- && declaration.getPackageFragment() !is IrExternalPackageFragment\n+ private val containsCache = mutableMapOf()\n+\n+ override fun containsDeclaration(declaration: IrDeclaration): Boolean = containsCache.getOrPut(declaration) {", - "comment_created_at": "2023-07-18T10:22:19+00:00", - "comment_author": "ddolovov", - "comment_body": "> so now that function should be called multiple times for IrDeclarations.\r\n\r\nYou mean it could be called multiple times for the same instance of `IrDeclaration` (and therefore it makes sense to cache per-declaration), right?", - "pr_file_module": null - }, - { - "comment_id": "1269759652", - "repo_full_name": "JetBrains/kotlin", - "pr_number": 5156, - "pr_file": "kotlin-native/backend.native/compiler/ir/backend.native/src/org/jetbrains/kotlin/backend/konan/LlvmModuleSpecificationImpl.kt", - "discussion_id": "1263892613", - "commented_code": "@@ -28,10 +29,18 @@ internal abstract class LlvmModuleSpecificationBase(protected val cachedLibrarie\n override fun containsPackageFragment(packageFragment: IrPackageFragment): Boolean =\n packageFragment.konanLibrary.let { it == null || containsLibrary(it) }\n \n- override fun containsDeclaration(declaration: IrDeclaration): Boolean =\n- declaration.konanLibrary.let { it == null || containsLibrary(it) }\n- // When producing per-file caches, some declarations might be generated by the stub generator.\n- && declaration.getPackageFragment() !is IrExternalPackageFragment\n+ private val containsCache = mutableMapOf()\n+\n+ override fun containsDeclaration(declaration: IrDeclaration): Boolean = containsCache.getOrPut(declaration) {", - "comment_created_at": "2023-07-20T17:24:07+00:00", - "comment_author": "MarkCMann", - "comment_body": "That's correct, especially for parent IrDeclarations they would be called repeatedly", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1265278095", - "pr_number": 5156, - "pr_file": "compiler/ir/serialization.common/src/org/jetbrains/kotlin/backend/common/overrides/FakeOverrides.kt", - "created_at": "2023-07-17T12:24:19+00:00", - "commented_code": "}\n\n fun provideFakeOverrides() {\n val entries = fakeOverrideCandidates.entries\n val entries = fakeOverrideCandidates.entries.toMutableList()", - "repo_full_name": "JetBrains/kotlin", - "discussion_comments": [ - { - "comment_id": "1265278095", - "repo_full_name": "JetBrains/kotlin", - "pr_number": 5156, - "pr_file": "compiler/ir/serialization.common/src/org/jetbrains/kotlin/backend/common/overrides/FakeOverrides.kt", - "discussion_id": "1265278095", - "commented_code": "@@ -286,10 +286,9 @@ class FakeOverrideBuilder(\n }\n \n fun provideFakeOverrides() {\n- val entries = fakeOverrideCandidates.entries\n+ val entries = fakeOverrideCandidates.entries.toMutableList()", - "comment_created_at": "2023-07-17T12:24:19+00:00", - "comment_author": "ddolovov", - "comment_body": "The `fakeOverrideCandidates` map should be cleaned at the exit from `provideFakeOverrides()`. Otherwise we would have repeated work each time we deserialize body of inline function: https://github.com/JetBrains/kotlin/blob/883102c4c68d1b73216c6ebebed0b288f0fa7978/kotlin-native/backend.native/compiler/ir/backend.native/src/org/jetbrains/kotlin/backend/konan/serialization/KonanIrlinker.kt#L938", - "pr_file_module": null - }, - { - "comment_id": "1265940209", - "repo_full_name": "JetBrains/kotlin", - "pr_number": 5156, - "pr_file": "compiler/ir/serialization.common/src/org/jetbrains/kotlin/backend/common/overrides/FakeOverrides.kt", - "discussion_id": "1265278095", - "commented_code": "@@ -286,10 +286,9 @@ class FakeOverrideBuilder(\n }\n \n fun provideFakeOverrides() {\n- val entries = fakeOverrideCandidates.entries\n+ val entries = fakeOverrideCandidates.entries.toMutableList()", - "comment_created_at": "2023-07-17T21:53:22+00:00", - "comment_author": "MarkCMann", - "comment_body": "Ah okay it wasn't obvious to me if the entry set returned was the actual underlying entryset for the map and could therefore mutate the entries in the map. I can add a call to clear the fakeOverrideCandidates here", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1020631690", - "pr_number": 4840, - "pr_file": "core/reflection.jvm/src/kotlin/reflect/jvm/internal/KCallableImpl.kt", - "created_at": "2022-11-12T01:19:53+00:00", - "commented_code": "return if (isAnnotationConstructor) callAnnotationConstructor(args) else callDefaultMethod(args, null)\n }\n\n private val _absentArguments = ReflectProperties.lazySoft {\n val parameterSize = parameters.size + (if (isSuspend) 1 else 0)\n val maskSize = (parameters.size + Integer.SIZE - 1) / Integer.SIZE\n\n // arguments with masks and DefaultConstructorMarker or MethodHandle\n // +1 is argument for DefaultConstructorMarker or MethodHandle\n val arguments = arrayOfNulls(parameterSize + maskSize + 1)\n\n // set absent values\n parameters.forEach { parameter ->\n if (parameter.isOptional && !parameter.type.isInlineClassType) {\n // For inline class types, the javaType refers to the underlying type of the inline class,\n // but we have to pass null in order to mark the argument as absent for InlineClassAwareCaller.\n arguments[parameter.index] = defaultPrimitiveValue(parameter.type.javaType)\n } else if (parameter.isVararg) {\n arguments[parameter.index] = defaultEmptyArray(parameter.type)\n }\n }\n\n for (i in 0 until maskSize) {\n arguments[parameterSize + i] = 0\n }\n\n arguments\n }\n\n private fun getAbsentArguments(): Array = _absentArguments().clone()\n\n // See ArgumentGenerator#generate\n internal fun callDefaultMethod(args: Map, continuationArgument: Continuation<*>?): R {\n val parameters = parameters\n val arguments = ArrayList(parameters.size)\n var mask = 0\n val masks = ArrayList(1)\n var index = 0\n var anyOptional = false\n val parameterSize = parameters.size + (if (isSuspend) 1 else 0)", - "repo_full_name": "JetBrains/kotlin", - "discussion_comments": [ - { - "comment_id": "1020631690", - "repo_full_name": "JetBrains/kotlin", - "pr_number": 4840, - "pr_file": "core/reflection.jvm/src/kotlin/reflect/jvm/internal/KCallableImpl.kt", - "discussion_id": "1020631690", - "commented_code": "@@ -112,65 +112,87 @@ internal abstract class KCallableImpl : KCallable, KTypeParameterOwner\n return if (isAnnotationConstructor) callAnnotationConstructor(args) else callDefaultMethod(args, null)\n }\n \n+ private val _absentArguments = ReflectProperties.lazySoft {\n+ val parameterSize = parameters.size + (if (isSuspend) 1 else 0)\n+ val maskSize = (parameters.size + Integer.SIZE - 1) / Integer.SIZE\n+\n+ // arguments with masks and DefaultConstructorMarker or MethodHandle\n+ // +1 is argument for DefaultConstructorMarker or MethodHandle\n+ val arguments = arrayOfNulls(parameterSize + maskSize + 1)\n+\n+ // set absent values\n+ parameters.forEach { parameter ->\n+ if (parameter.isOptional && !parameter.type.isInlineClassType) {\n+ // For inline class types, the javaType refers to the underlying type of the inline class,\n+ // but we have to pass null in order to mark the argument as absent for InlineClassAwareCaller.\n+ arguments[parameter.index] = defaultPrimitiveValue(parameter.type.javaType)\n+ } else if (parameter.isVararg) {\n+ arguments[parameter.index] = defaultEmptyArray(parameter.type)\n+ }\n+ }\n+\n+ for (i in 0 until maskSize) {\n+ arguments[parameterSize + i] = 0\n+ }\n+\n+ arguments\n+ }\n+\n+ private fun getAbsentArguments(): Array = _absentArguments().clone()\n+\n // See ArgumentGenerator#generate\n internal fun callDefaultMethod(args: Map, continuationArgument: Continuation<*>?): R {\n val parameters = parameters\n- val arguments = ArrayList(parameters.size)\n- var mask = 0\n- val masks = ArrayList(1)\n- var index = 0\n- var anyOptional = false\n+ val parameterSize = parameters.size + (if (isSuspend) 1 else 0)", - "comment_created_at": "2022-11-12T01:19:53+00:00", - "comment_author": "k163377", - "comment_body": "The following optimization to no-argument functions was found to be nearly twice as fast as 638d89c.\r\nDoes this optimization look like it should be incorporated?\r\n\r\n```\r\nBenchmark Mode Cnt Score Error Units\r\nMeasurement.zero thrpt 4 22299885.316 \u00b1 295995.533 ops/s\r\nMeasurement.fiveWithDefault thrpt 4 1927106.356 \u00b1 225032.224 ops/s\r\nMeasurement.fiveWithoutDefault thrpt 4 2460623.502 \u00b1 96649.714 ops/s\r\nMeasurement.oneWithDefault thrpt 4 5384826.666 \u00b1 340254.100 ops/s\r\nMeasurement.oneWithoutDefault thrpt 4 5654229.984 \u00b1 5914104.417 ops/s\r\nMeasurement.twentyWithDefault thrpt 4 510341.805 \u00b1 2940.113 ops/s\r\nMeasurement.twentyWithoutDefault thrpt 4 739231.653 \u00b1 21298.002 ops/s\r\n```\r\n\r\n```suggestion\r\n val parameterSize = parameters.size + (if (isSuspend) 1 else 0)\r\n\r\n // Optimization for general no-argument functions. \r\n if (parameters.isEmpty()) {\r\n @Suppress(\"UNCHECKED_CAST\")\r\n return reflectionCall {\r\n caller.call(if (isSuspend) arrayOf(continuationArgument) else emptyArray()) as R\r\n }\r\n } else if (parameters.size == 1) {\r\n parameters.first().takeIf { it.kind != KParameter.Kind.VALUE }?.let { parameter ->\r\n @Suppress(\"UNCHECKED_CAST\")\r\n return reflectionCall {\r\n caller.call(if (isSuspend) arrayOf(args[parameter], continuationArgument) else arrayOf(args[parameter])) as R\r\n }\r\n }\r\n }\r\n```", - "pr_file_module": null - }, - { - "comment_id": "1031902832", - "repo_full_name": "JetBrains/kotlin", - "pr_number": 4840, - "pr_file": "core/reflection.jvm/src/kotlin/reflect/jvm/internal/KCallableImpl.kt", - "discussion_id": "1020631690", - "commented_code": "@@ -112,65 +112,87 @@ internal abstract class KCallableImpl : KCallable, KTypeParameterOwner\n return if (isAnnotationConstructor) callAnnotationConstructor(args) else callDefaultMethod(args, null)\n }\n \n+ private val _absentArguments = ReflectProperties.lazySoft {\n+ val parameterSize = parameters.size + (if (isSuspend) 1 else 0)\n+ val maskSize = (parameters.size + Integer.SIZE - 1) / Integer.SIZE\n+\n+ // arguments with masks and DefaultConstructorMarker or MethodHandle\n+ // +1 is argument for DefaultConstructorMarker or MethodHandle\n+ val arguments = arrayOfNulls(parameterSize + maskSize + 1)\n+\n+ // set absent values\n+ parameters.forEach { parameter ->\n+ if (parameter.isOptional && !parameter.type.isInlineClassType) {\n+ // For inline class types, the javaType refers to the underlying type of the inline class,\n+ // but we have to pass null in order to mark the argument as absent for InlineClassAwareCaller.\n+ arguments[parameter.index] = defaultPrimitiveValue(parameter.type.javaType)\n+ } else if (parameter.isVararg) {\n+ arguments[parameter.index] = defaultEmptyArray(parameter.type)\n+ }\n+ }\n+\n+ for (i in 0 until maskSize) {\n+ arguments[parameterSize + i] = 0\n+ }\n+\n+ arguments\n+ }\n+\n+ private fun getAbsentArguments(): Array = _absentArguments().clone()\n+\n // See ArgumentGenerator#generate\n internal fun callDefaultMethod(args: Map, continuationArgument: Continuation<*>?): R {\n val parameters = parameters\n- val arguments = ArrayList(parameters.size)\n- var mask = 0\n- val masks = ArrayList(1)\n- var index = 0\n- var anyOptional = false\n+ val parameterSize = parameters.size + (if (isSuspend) 1 else 0)", - "comment_created_at": "2022-11-24T23:02:29+00:00", - "comment_author": "udalov", - "comment_body": "For the 0-parameters case, it looks good! But for the 1 parameter, I'm not really sure, the code is getting quite complicated already. Besides, `Measurement.oneWithoutDefault` regressed in your results after this change, if I understand them correctly? I propose to keep the 0-parameter case only.", - "pr_file_module": null - }, - { - "comment_id": "1031954118", - "repo_full_name": "JetBrains/kotlin", - "pr_number": 4840, - "pr_file": "core/reflection.jvm/src/kotlin/reflect/jvm/internal/KCallableImpl.kt", - "discussion_id": "1020631690", - "commented_code": "@@ -112,65 +112,87 @@ internal abstract class KCallableImpl : KCallable, KTypeParameterOwner\n return if (isAnnotationConstructor) callAnnotationConstructor(args) else callDefaultMethod(args, null)\n }\n \n+ private val _absentArguments = ReflectProperties.lazySoft {\n+ val parameterSize = parameters.size + (if (isSuspend) 1 else 0)\n+ val maskSize = (parameters.size + Integer.SIZE - 1) / Integer.SIZE\n+\n+ // arguments with masks and DefaultConstructorMarker or MethodHandle\n+ // +1 is argument for DefaultConstructorMarker or MethodHandle\n+ val arguments = arrayOfNulls(parameterSize + maskSize + 1)\n+\n+ // set absent values\n+ parameters.forEach { parameter ->\n+ if (parameter.isOptional && !parameter.type.isInlineClassType) {\n+ // For inline class types, the javaType refers to the underlying type of the inline class,\n+ // but we have to pass null in order to mark the argument as absent for InlineClassAwareCaller.\n+ arguments[parameter.index] = defaultPrimitiveValue(parameter.type.javaType)\n+ } else if (parameter.isVararg) {\n+ arguments[parameter.index] = defaultEmptyArray(parameter.type)\n+ }\n+ }\n+\n+ for (i in 0 until maskSize) {\n+ arguments[parameterSize + i] = 0\n+ }\n+\n+ arguments\n+ }\n+\n+ private fun getAbsentArguments(): Array = _absentArguments().clone()\n+\n // See ArgumentGenerator#generate\n internal fun callDefaultMethod(args: Map, continuationArgument: Continuation<*>?): R {\n val parameters = parameters\n- val arguments = ArrayList(parameters.size)\n- var mask = 0\n- val masks = ArrayList(1)\n- var index = 0\n- var anyOptional = false\n+ val parameterSize = parameters.size + (if (isSuspend) 1 else 0)", - "comment_created_at": "2022-11-25T02:21:59+00:00", - "comment_author": "k163377", - "comment_body": "> Measurement.oneWithoutDefault regressed in your results after this change, if I understand them correctly?\r\n\r\nI ran the benchmark again with almost the same content, but there did not seem to be any regression in performance.\r\nSince the benchmark was run on a local machine, there must have been something going on in the background during this measurement.\r\n\r\n**1e1cd3a**\r\n\r\n```\r\nBenchmark Mode Cnt Score Error Units\r\nMeasurement.fiveWithDefault thrpt 4 1713974.649 \u00b1 807978.785 ops/s\r\nMeasurement.fiveWithoutDefault thrpt 4 2402911.282 \u00b1 116905.232 ops/s\r\nMeasurement.oneWithDefault thrpt 4 5018352.177 \u00b1 3050333.309 ops/s\r\nMeasurement.oneWithoutDefault thrpt 4 6338081.662 \u00b1 384633.083 ops/s\r\nMeasurement.twentyWithDefault thrpt 4 506022.842 \u00b1 28381.148 ops/s\r\nMeasurement.twentyWithoutDefault thrpt 4 721987.591 \u00b1 78852.517 ops/s\r\nMeasurement.zero thrpt 4 12727105.089 \u00b1 188029.864 ops/s\r\n```\r\n\r\n**after-zero-arg-opt2**\r\n\r\n```\r\nBenchmark Mode Cnt Score Error Units\r\nMeasurement.fiveWithDefault thrpt 4 1880153.629 \u00b1 232291.891 ops/s\r\nMeasurement.fiveWithoutDefault thrpt 4 2388820.000 \u00b1 97785.718 ops/s\r\nMeasurement.oneWithDefault thrpt 4 5276216.078 \u00b1 327025.056 ops/s\r\nMeasurement.oneWithoutDefault thrpt 4 6251850.821 \u00b1 497046.067 ops/s\r\nMeasurement.twentyWithDefault thrpt 4 516927.278 \u00b1 13448.841 ops/s\r\nMeasurement.twentyWithoutDefault thrpt 4 728619.156 \u00b1 36904.744 ops/s\r\nMeasurement.zero thrpt 4 21774799.243 \u00b1 1534253.147 ops/s\r\n```\r\n\r\n---\r\n\r\nI agree that it is complicated.\r\nI have pushed for a fix.\r\n[68a9741](https://github.com/JetBrains/kotlin/pull/4840/commits/68a9741ee2307eb0d27b2a77c1124c8aa267597a)\r\n\r\nI did not measure benchmark results with the one-parameter branch removed, but at least there is no reason for the score to drop since the target of the measurement is a top-level function.\r\n", - "pr_file_module": null - }, - { - "comment_id": "1031956945", - "repo_full_name": "JetBrains/kotlin", - "pr_number": 4840, - "pr_file": "core/reflection.jvm/src/kotlin/reflect/jvm/internal/KCallableImpl.kt", - "discussion_id": "1020631690", - "commented_code": "@@ -112,65 +112,87 @@ internal abstract class KCallableImpl : KCallable, KTypeParameterOwner\n return if (isAnnotationConstructor) callAnnotationConstructor(args) else callDefaultMethod(args, null)\n }\n \n+ private val _absentArguments = ReflectProperties.lazySoft {\n+ val parameterSize = parameters.size + (if (isSuspend) 1 else 0)\n+ val maskSize = (parameters.size + Integer.SIZE - 1) / Integer.SIZE\n+\n+ // arguments with masks and DefaultConstructorMarker or MethodHandle\n+ // +1 is argument for DefaultConstructorMarker or MethodHandle\n+ val arguments = arrayOfNulls(parameterSize + maskSize + 1)\n+\n+ // set absent values\n+ parameters.forEach { parameter ->\n+ if (parameter.isOptional && !parameter.type.isInlineClassType) {\n+ // For inline class types, the javaType refers to the underlying type of the inline class,\n+ // but we have to pass null in order to mark the argument as absent for InlineClassAwareCaller.\n+ arguments[parameter.index] = defaultPrimitiveValue(parameter.type.javaType)\n+ } else if (parameter.isVararg) {\n+ arguments[parameter.index] = defaultEmptyArray(parameter.type)\n+ }\n+ }\n+\n+ for (i in 0 until maskSize) {\n+ arguments[parameterSize + i] = 0\n+ }\n+\n+ arguments\n+ }\n+\n+ private fun getAbsentArguments(): Array = _absentArguments().clone()\n+\n // See ArgumentGenerator#generate\n internal fun callDefaultMethod(args: Map, continuationArgument: Continuation<*>?): R {\n val parameters = parameters\n- val arguments = ArrayList(parameters.size)\n- var mask = 0\n- val masks = ArrayList(1)\n- var index = 0\n- var anyOptional = false\n+ val parameterSize = parameters.size + (if (isSuspend) 1 else 0)", - "comment_created_at": "2022-11-25T02:30:17+00:00", - "comment_author": "k163377", - "comment_body": "As a side note, the reason I proposed the original form is that `getter` is a typical example of a function that takes only instances as arguments.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "994632200", - "pr_number": 4982, - "pr_file": "compiler/fir/providers/src/org/jetbrains/kotlin/fir/scopes/impl/FirTypeIntersectionScopeContext.kt", - "created_at": "2022-10-13T13:16:48+00:00", - "commented_code": "val result = mutableListOf>()\n\n while (allMembersWithScope.size > 1) {\n val maxByVisibility = findMemberWithMaxVisibility(allMembersWithScope)\n val extractBothWaysWithPrivate = overrideService.extractBothWaysOverridable(maxByVisibility, allMembersWithScope, overrideChecker)\n val extractedOverrides = extractBothWaysWithPrivate.filterNotTo(mutableListOf()) {\n Visibilities.isPrivate((it.member.fir as FirMemberDeclaration).visibility)\n }.takeIf { it.isNotEmpty() } ?: extractBothWaysWithPrivate\n val baseMembersForIntersection = extractedOverrides.calcBaseMembersForIntersectionOverride()\n if (baseMembersForIntersection.size > 1) {\n val (mostSpecific, scopeForMostSpecific) = overrideService.selectMostSpecificMember(\n baseMembersForIntersection,\n ReturnTypeCalculatorForFullBodyResolve\n )\n val intersectionOverrideContext = ContextForIntersectionOverrideConstruction(\n mostSpecific,\n this,\n extractedOverrides,\n scopeForMostSpecific\n )\n result += ResultOfIntersection.NonTrivial(\n intersectionOverrides,\n intersectionOverrideContext,\n extractedOverrides,\n containingScope = null\n )\n val groupWithPrivate =\n overrideService.extractBothWaysOverridable(allMembersWithScope.maxByVisibility(), allMembersWithScope, overrideChecker)\n val group = groupWithPrivate.filter { !Visibilities.isPrivate(it.member.fir.visibility) }.ifEmpty { groupWithPrivate }\n val singleRealImplementation = group.size == 1 ||\n group.mapTo(mutableSetOf()) { it.member.fir.unwrapSubstitutionOverrides().symbol }.size == 1\n val directOverrides = if (singleRealImplementation) group else group.onlyDirectlyInherited()", - "repo_full_name": "JetBrains/kotlin", - "discussion_comments": [ - { - "comment_id": "994632200", - "repo_full_name": "JetBrains/kotlin", - "pr_number": 4982, - "pr_file": "compiler/fir/providers/src/org/jetbrains/kotlin/fir/scopes/impl/FirTypeIntersectionScopeContext.kt", - "discussion_id": "994632200", - "commented_code": "@@ -161,32 +156,21 @@ class FirTypeIntersectionScopeContext(\n val result = mutableListOf>()\n \n while (allMembersWithScope.size > 1) {\n- val maxByVisibility = findMemberWithMaxVisibility(allMembersWithScope)\n- val extractBothWaysWithPrivate = overrideService.extractBothWaysOverridable(maxByVisibility, allMembersWithScope, overrideChecker)\n- val extractedOverrides = extractBothWaysWithPrivate.filterNotTo(mutableListOf()) {\n- Visibilities.isPrivate((it.member.fir as FirMemberDeclaration).visibility)\n- }.takeIf { it.isNotEmpty() } ?: extractBothWaysWithPrivate\n- val baseMembersForIntersection = extractedOverrides.calcBaseMembersForIntersectionOverride()\n- if (baseMembersForIntersection.size > 1) {\n- val (mostSpecific, scopeForMostSpecific) = overrideService.selectMostSpecificMember(\n- baseMembersForIntersection,\n- ReturnTypeCalculatorForFullBodyResolve\n- )\n- val intersectionOverrideContext = ContextForIntersectionOverrideConstruction(\n- mostSpecific,\n- this,\n- extractedOverrides,\n- scopeForMostSpecific\n- )\n- result += ResultOfIntersection.NonTrivial(\n- intersectionOverrides,\n- intersectionOverrideContext,\n- extractedOverrides,\n- containingScope = null\n- )\n+ val groupWithPrivate =\n+ overrideService.extractBothWaysOverridable(allMembersWithScope.maxByVisibility(), allMembersWithScope, overrideChecker)\n+ val group = groupWithPrivate.filter { !Visibilities.isPrivate(it.member.fir.visibility) }.ifEmpty { groupWithPrivate }\n+ val singleRealImplementation = group.size == 1 ||\n+ group.mapTo(mutableSetOf()) { it.member.fir.unwrapSubstitutionOverrides().symbol }.size == 1\n+ val directOverrides = if (singleRealImplementation) group else group.onlyDirectlyInherited()", - "comment_created_at": "2022-10-13T13:16:48+00:00", - "comment_author": "dzharkov", - "comment_body": "I would add a comment that this `if` looks more like an optimization than a semantically necessary thing.\nOr, probably I would just use `group.onlyDirectlyInherited()` unconditionally.\n", - "pr_file_module": null - }, - { - "comment_id": "994734316", - "repo_full_name": "JetBrains/kotlin", - "pr_number": 4982, - "pr_file": "compiler/fir/providers/src/org/jetbrains/kotlin/fir/scopes/impl/FirTypeIntersectionScopeContext.kt", - "discussion_id": "994632200", - "commented_code": "@@ -161,32 +156,21 @@ class FirTypeIntersectionScopeContext(\n val result = mutableListOf>()\n \n while (allMembersWithScope.size > 1) {\n- val maxByVisibility = findMemberWithMaxVisibility(allMembersWithScope)\n- val extractBothWaysWithPrivate = overrideService.extractBothWaysOverridable(maxByVisibility, allMembersWithScope, overrideChecker)\n- val extractedOverrides = extractBothWaysWithPrivate.filterNotTo(mutableListOf()) {\n- Visibilities.isPrivate((it.member.fir as FirMemberDeclaration).visibility)\n- }.takeIf { it.isNotEmpty() } ?: extractBothWaysWithPrivate\n- val baseMembersForIntersection = extractedOverrides.calcBaseMembersForIntersectionOverride()\n- if (baseMembersForIntersection.size > 1) {\n- val (mostSpecific, scopeForMostSpecific) = overrideService.selectMostSpecificMember(\n- baseMembersForIntersection,\n- ReturnTypeCalculatorForFullBodyResolve\n- )\n- val intersectionOverrideContext = ContextForIntersectionOverrideConstruction(\n- mostSpecific,\n- this,\n- extractedOverrides,\n- scopeForMostSpecific\n- )\n- result += ResultOfIntersection.NonTrivial(\n- intersectionOverrides,\n- intersectionOverrideContext,\n- extractedOverrides,\n- containingScope = null\n- )\n+ val groupWithPrivate =\n+ overrideService.extractBothWaysOverridable(allMembersWithScope.maxByVisibility(), allMembersWithScope, overrideChecker)\n+ val group = groupWithPrivate.filter { !Visibilities.isPrivate(it.member.fir.visibility) }.ifEmpty { groupWithPrivate }\n+ val singleRealImplementation = group.size == 1 ||\n+ group.mapTo(mutableSetOf()) { it.member.fir.unwrapSubstitutionOverrides().symbol }.size == 1\n+ val directOverrides = if (singleRealImplementation) group else group.onlyDirectlyInherited()", - "comment_created_at": "2022-10-13T14:39:12+00:00", - "comment_author": "pyos", - "comment_body": "BTW I'm also wondering here if `singleRealImplementation` should be computed *after* removing transitively inherited declarations, but I'm not familiar enough with this code to come up with different test cases.", - "pr_file_module": null - }, - { - "comment_id": "994749406", - "repo_full_name": "JetBrains/kotlin", - "pr_number": 4982, - "pr_file": "compiler/fir/providers/src/org/jetbrains/kotlin/fir/scopes/impl/FirTypeIntersectionScopeContext.kt", - "discussion_id": "994632200", - "commented_code": "@@ -161,32 +156,21 @@ class FirTypeIntersectionScopeContext(\n val result = mutableListOf>()\n \n while (allMembersWithScope.size > 1) {\n- val maxByVisibility = findMemberWithMaxVisibility(allMembersWithScope)\n- val extractBothWaysWithPrivate = overrideService.extractBothWaysOverridable(maxByVisibility, allMembersWithScope, overrideChecker)\n- val extractedOverrides = extractBothWaysWithPrivate.filterNotTo(mutableListOf()) {\n- Visibilities.isPrivate((it.member.fir as FirMemberDeclaration).visibility)\n- }.takeIf { it.isNotEmpty() } ?: extractBothWaysWithPrivate\n- val baseMembersForIntersection = extractedOverrides.calcBaseMembersForIntersectionOverride()\n- if (baseMembersForIntersection.size > 1) {\n- val (mostSpecific, scopeForMostSpecific) = overrideService.selectMostSpecificMember(\n- baseMembersForIntersection,\n- ReturnTypeCalculatorForFullBodyResolve\n- )\n- val intersectionOverrideContext = ContextForIntersectionOverrideConstruction(\n- mostSpecific,\n- this,\n- extractedOverrides,\n- scopeForMostSpecific\n- )\n- result += ResultOfIntersection.NonTrivial(\n- intersectionOverrides,\n- intersectionOverrideContext,\n- extractedOverrides,\n- containingScope = null\n- )\n+ val groupWithPrivate =\n+ overrideService.extractBothWaysOverridable(allMembersWithScope.maxByVisibility(), allMembersWithScope, overrideChecker)\n+ val group = groupWithPrivate.filter { !Visibilities.isPrivate(it.member.fir.visibility) }.ifEmpty { groupWithPrivate }\n+ val singleRealImplementation = group.size == 1 ||\n+ group.mapTo(mutableSetOf()) { it.member.fir.unwrapSubstitutionOverrides().symbol }.size == 1\n+ val directOverrides = if (singleRealImplementation) group else group.onlyDirectlyInherited()", - "comment_created_at": "2022-10-13T14:48:26+00:00", - "comment_author": "dzharkov", - "comment_body": "TBH I'm not sure here too, so I would move it without a clear reason", - "pr_file_module": null - }, - { - "comment_id": "994798498", - "repo_full_name": "JetBrains/kotlin", - "pr_number": 4982, - "pr_file": "compiler/fir/providers/src/org/jetbrains/kotlin/fir/scopes/impl/FirTypeIntersectionScopeContext.kt", - "discussion_id": "994632200", - "commented_code": "@@ -161,32 +156,21 @@ class FirTypeIntersectionScopeContext(\n val result = mutableListOf>()\n \n while (allMembersWithScope.size > 1) {\n- val maxByVisibility = findMemberWithMaxVisibility(allMembersWithScope)\n- val extractBothWaysWithPrivate = overrideService.extractBothWaysOverridable(maxByVisibility, allMembersWithScope, overrideChecker)\n- val extractedOverrides = extractBothWaysWithPrivate.filterNotTo(mutableListOf()) {\n- Visibilities.isPrivate((it.member.fir as FirMemberDeclaration).visibility)\n- }.takeIf { it.isNotEmpty() } ?: extractBothWaysWithPrivate\n- val baseMembersForIntersection = extractedOverrides.calcBaseMembersForIntersectionOverride()\n- if (baseMembersForIntersection.size > 1) {\n- val (mostSpecific, scopeForMostSpecific) = overrideService.selectMostSpecificMember(\n- baseMembersForIntersection,\n- ReturnTypeCalculatorForFullBodyResolve\n- )\n- val intersectionOverrideContext = ContextForIntersectionOverrideConstruction(\n- mostSpecific,\n- this,\n- extractedOverrides,\n- scopeForMostSpecific\n- )\n- result += ResultOfIntersection.NonTrivial(\n- intersectionOverrides,\n- intersectionOverrideContext,\n- extractedOverrides,\n- containingScope = null\n- )\n+ val groupWithPrivate =\n+ overrideService.extractBothWaysOverridable(allMembersWithScope.maxByVisibility(), allMembersWithScope, overrideChecker)\n+ val group = groupWithPrivate.filter { !Visibilities.isPrivate(it.member.fir.visibility) }.ifEmpty { groupWithPrivate }\n+ val singleRealImplementation = group.size == 1 ||\n+ group.mapTo(mutableSetOf()) { it.member.fir.unwrapSubstitutionOverrides().symbol }.size == 1\n+ val directOverrides = if (singleRealImplementation) group else group.onlyDirectlyInherited()", - "comment_created_at": "2022-10-13T15:26:17+00:00", - "comment_author": "pyos", - "comment_body": "done", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/kotlin-names-express-clear-intent.json b/_reviewers/kotlin-names-express-clear-intent.json new file mode 100644 index 0000000..809adad --- /dev/null +++ b/_reviewers/kotlin-names-express-clear-intent.json @@ -0,0 +1,400 @@ +[ + { + "discussion_id": "270310587", + "pr_number": 2232, + "pr_file": "libraries/stdlib/jvm/src/kotlin/io/files/FileTreeWalk.kt", + "created_at": "2019-03-29T08:09:17+00:00", + "commented_code": "import java.io.IOException\nimport java.util.ArrayDeque\n\n/**\n * An enumeration to describe possible algorithms to traverse.\n * There are two of them: search in breadth (same directory depth)\n */\npublic enum class WalkAlgorithm {\n /**\n * Breadth-first search\n */\n BFS,", + "repo_full_name": "JetBrains/kotlin", + "discussion_comments": [ + { + "comment_id": "270310587", + "repo_full_name": "JetBrains/kotlin", + "pr_number": 2232, + "pr_file": "libraries/stdlib/jvm/src/kotlin/io/files/FileTreeWalk.kt", + "discussion_id": "270310587", + "commented_code": "@@ -12,6 +12,20 @@ import java.io.File\n import java.io.IOException\n import java.util.ArrayDeque\n \n+/**\n+ * An enumeration to describe possible algorithms to traverse.\n+ * There are two of them: search in breadth (same directory depth)\n+ */\n+public enum class WalkAlgorithm {\n+ /**\n+ * Breadth-first search\n+ */\n+ BFS,", + "comment_created_at": "2019-03-29T08:09:17+00:00", + "comment_author": "sschuberth", + "comment_body": "I actually believe we should write these out as `BREADTH_FIRST` and `DEPTH_FIRST`, just like we have `TOP_DOWN` instead of `TD` below.", + "pr_file_module": null + }, + { + "comment_id": "272912852", + "repo_full_name": "JetBrains/kotlin", + "pr_number": 2232, + "pr_file": "libraries/stdlib/jvm/src/kotlin/io/files/FileTreeWalk.kt", + "discussion_id": "270310587", + "commented_code": "@@ -12,6 +12,20 @@ import java.io.File\n import java.io.IOException\n import java.util.ArrayDeque\n \n+/**\n+ * An enumeration to describe possible algorithms to traverse.\n+ * There are two of them: search in breadth (same directory depth)\n+ */\n+public enum class WalkAlgorithm {\n+ /**\n+ * Breadth-first search\n+ */\n+ BFS,", + "comment_created_at": "2019-04-08T07:24:14+00:00", + "comment_author": "aivinog1", + "comment_body": "Agree.", + "pr_file_module": null + }, + { + "comment_id": "272967895", + "repo_full_name": "JetBrains/kotlin", + "pr_number": 2232, + "pr_file": "libraries/stdlib/jvm/src/kotlin/io/files/FileTreeWalk.kt", + "discussion_id": "270310587", + "commented_code": "@@ -12,6 +12,20 @@ import java.io.File\n import java.io.IOException\n import java.util.ArrayDeque\n \n+/**\n+ * An enumeration to describe possible algorithms to traverse.\n+ * There are two of them: search in breadth (same directory depth)\n+ */\n+public enum class WalkAlgorithm {\n+ /**\n+ * Breadth-first search\n+ */\n+ BFS,", + "comment_created_at": "2019-04-08T09:55:30+00:00", + "comment_author": "aivinog1", + "comment_body": "> I actually believe we should write these out as `BREADTH_FIRST` and `DEPTH_FIRST`, just like we have `TOP_DOWN` instead of `TD` below.\r\n\r\nDone.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "936958162", + "pr_number": 4903, + "pr_file": "compiler/ir/backend.jvm/src/org/jetbrains/kotlin/backend/jvm/MemoizedInlineClassReplacements.kt", + "created_at": "2022-08-03T17:28:39+00:00", + "commented_code": "parent = irClass\n // We ignore type arguments here, since there is no good way to go from type arguments to types in the IR anyway.\n val typeArgument =", + "repo_full_name": "JetBrains/kotlin", + "discussion_comments": [ + { + "comment_id": "936958162", + "repo_full_name": "JetBrains/kotlin", + "pr_number": 4903, + "pr_file": "compiler/ir/backend.jvm/src/org/jetbrains/kotlin/backend/jvm/MemoizedInlineClassReplacements.kt", + "discussion_id": "936958162", + "commented_code": "@@ -155,7 +165,10 @@ class MemoizedInlineClassReplacements(\n parent = irClass\n // We ignore type arguments here, since there is no good way to go from type arguments to types in the IR anyway.\n val typeArgument =", + "comment_created_at": "2022-08-03T17:28:39+00:00", + "comment_author": "sfs", + "comment_body": "Since you're touching this code anyway, could you please rename this variable to `argumentType`? It's really not a type argument...", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "451431218", + "pr_number": 3516, + "pr_file": "libraries/scripting/type-providers/src/kotlin/script/experimental/typeProviders/generatedCode/GeneratedCode.kt", + "created_at": "2020-07-08T10:07:36+00:00", + "commented_code": "/*\n * Copyright 2010-2020 JetBrains s.r.o. and Kotlin Programming Language contributors.\n * Use of this source code is governed by the Apache 2.0 license that can be found in the license/LICENSE.txt file.\n */\n\npackage kotlin.script.experimental.typeProviders.generatedCode\n\nimport kotlin.reflect.KClass\nimport kotlin.script.experimental.typeProviders.generatedCode.impl.*\nimport kotlin.script.experimental.typeProviders.generatedCode.impl.Object\nimport kotlin.script.experimental.typeProviders.generatedCode.impl.writeJoined\nimport kotlin.script.experimental.typeProviders.generatedCode.internal.Generated\nimport kotlin.script.experimental.typeProviders.generatedCode.internal.visitor.GeneratedCodeVisitor\nimport kotlin.script.experimental.typeProviders.generatedCode.internal.InternalGeneratedCode\nimport kotlin.script.experimental.typeProviders.generatedCode.internal.visit\nimport kotlin.script.experimental.typeProviders.generatedCode.internal.visitor.captureImports\nimport kotlin.script.experimental.typeProviders.generatedCode.internal.write\nimport kotlin.text.StringBuilder\n\n/**\n * Code returned by a Type Provider.\n * [GeneratedCode] is a recursive construction that allows you to build generated code as a combination of multiple types already provided\n *\n * Example:\n *\n * ```kotlin\n * data class MyObject(val name: String, val value: Int) : GeneratedCode {\n * fun body(): GeneratedCode {\n * return GeneratedCode.composite {\n * +makeDataClass(name)\n * .withInterface(MyInterface::class)\n *\n * +makeConstant(\"$nameConstant\", value)\n * }\n * }\n * }\n * ```\n *\n * [GeneratedCode] can contain any of the following:\n * - Data classes. See [makeDataClass].\n * - Objects. See [makeObject].\n * - Constants. See [makeConstant].\n * - Packages. See [makePackage].\n * - Extension functions. See [makeExtensionMethod].\n * - etc.\n */\ninterface GeneratedCode {\n /**\n * Generate the code that corresponds to your type\n *\n * @return An instance of [GeneratedCode].\n * Important: don't return an infinite recursion in the form of return `this`\n */\n fun body(): GeneratedCode\n\n /**\n * A builder for a composite of multiple types\n */\n class Builder internal constructor() {\n private val mutableCodeList = mutableListOf()\n\n /**\n * Include code in your compound\n */\n operator fun GeneratedCode.unaryPlus() {\n if (this is CompoundGeneratedCode) {\n mutableCodeList.addAll(codeList)\n } else {\n mutableCodeList.add(this)\n }\n }\n\n operator fun String.unaryPlus() {\n mutableCodeList.add(inlineCode(this))\n }\n\n internal fun build(): GeneratedCode {\n return composite(mutableCodeList)\n }\n }\n\n object Empty : GeneratedCode by EmptyCode()\n\n companion object {\n /**\n * Create a composition of multiple instances of Generated Code\n */\n fun composite(init: Builder.() -> Unit) = Builder().apply(init).build()\n\n /**\n * Create a composition of multiple instances of Generated Code\n */\n fun composite(iterable: Collection): GeneratedCode = if (iterable.count() == 1)\n iterable.first()\n else\n CompoundGeneratedCode(iterable.toList())\n\n /**\n * Create a composition of multiple instances of Generated Code\n */\n fun composite(vararg code: GeneratedCode) = composite(code.toList())\n }\n}\n\nprivate data class CompoundGeneratedCode(val codeList: List) : InternalGeneratedCode() {\n override fun StringBuilder.write(indent: Int) {\n writeJoined(codeList, \"\\n\\n\", indent)\n }\n\n override fun GeneratedCodeVisitor.visit() {\n for (code in codeList) {\n visit(code)\n }\n }\n}\n\n/**\n * References a member that can be used as a type\n */\ninterface IdentifiableMember {\n val name: String\n fun imports(): Set = emptySet()\n\n companion object {\n private data class KClassIdentifiable(val kClass: KClass<*>) : IdentifiableMember {\n override val name: String = kClass.simpleName ?: kClass.qualifiedName!!\n override fun imports() = setOfNotNull(kClass.qualifiedName)\n }\n\n private data class NamedIdentifiable(override val name: String) : IdentifiableMember\n\n operator fun invoke(kClass: KClass<*>): IdentifiableMember = KClassIdentifiable(kClass)\n operator fun invoke(name: String): IdentifiableMember = NamedIdentifiable(name)\n }\n}\n\n/**\n * References an interface that can be implemented\n */\ninterface GeneratedInterface : IdentifiableMember\n\nprivate data class CastedInterfaceMember(private val member: IdentifiableMember) : GeneratedInterface, IdentifiableMember by member\n\nfun IdentifiableMember.asInterface(): GeneratedInterface = CastedInterfaceMember(this)\n\n/**\n * Generated code that is a type that can be used as an Argument\n */\ninterface IdentifiableCode : GeneratedCode, IdentifiableMember\n\n/**\n * Builder for a provided type whose JVM Name can be set\n */\ninterface JVMNameableBuilder {\n fun jvmName(name: String)\n}\n\n/**\n * Builder for a provided type. It can implement an interface and have some code inside.\n */\ninterface GeneratedTypeBuilder : IdentifiableMember {\n /**\n * Implement a generated interface\n */\n fun implement(generated: GeneratedInterface)\n\n /**\n * Add code to the namespace of the Type\n */\n fun inner(code: GeneratedCode)", + "repo_full_name": "JetBrains/kotlin", + "discussion_comments": [ + { + "comment_id": "451431218", + "repo_full_name": "JetBrains/kotlin", + "pr_number": 3516, + "pr_file": "libraries/scripting/type-providers/src/kotlin/script/experimental/typeProviders/generatedCode/GeneratedCode.kt", + "discussion_id": "451431218", + "commented_code": "@@ -0,0 +1,302 @@\n+/*\n+ * Copyright 2010-2020 JetBrains s.r.o. and Kotlin Programming Language contributors.\n+ * Use of this source code is governed by the Apache 2.0 license that can be found in the license/LICENSE.txt file.\n+ */\n+\n+package kotlin.script.experimental.typeProviders.generatedCode\n+\n+import kotlin.reflect.KClass\n+import kotlin.script.experimental.typeProviders.generatedCode.impl.*\n+import kotlin.script.experimental.typeProviders.generatedCode.impl.Object\n+import kotlin.script.experimental.typeProviders.generatedCode.impl.writeJoined\n+import kotlin.script.experimental.typeProviders.generatedCode.internal.Generated\n+import kotlin.script.experimental.typeProviders.generatedCode.internal.visitor.GeneratedCodeVisitor\n+import kotlin.script.experimental.typeProviders.generatedCode.internal.InternalGeneratedCode\n+import kotlin.script.experimental.typeProviders.generatedCode.internal.visit\n+import kotlin.script.experimental.typeProviders.generatedCode.internal.visitor.captureImports\n+import kotlin.script.experimental.typeProviders.generatedCode.internal.write\n+import kotlin.text.StringBuilder\n+\n+/**\n+ * Code returned by a Type Provider.\n+ * [GeneratedCode] is a recursive construction that allows you to build generated code as a combination of multiple types already provided\n+ *\n+ * Example:\n+ *\n+ * ```kotlin\n+ * data class MyObject(val name: String, val value: Int) : GeneratedCode {\n+ * fun body(): GeneratedCode {\n+ * return GeneratedCode.composite {\n+ * +makeDataClass(name)\n+ * .withInterface(MyInterface::class)\n+ *\n+ * +makeConstant(\"$nameConstant\", value)\n+ * }\n+ * }\n+ * }\n+ * ```\n+ *\n+ * [GeneratedCode] can contain any of the following:\n+ * - Data classes. See [makeDataClass].\n+ * - Objects. See [makeObject].\n+ * - Constants. See [makeConstant].\n+ * - Packages. See [makePackage].\n+ * - Extension functions. See [makeExtensionMethod].\n+ * - etc.\n+ */\n+interface GeneratedCode {\n+ /**\n+ * Generate the code that corresponds to your type\n+ *\n+ * @return An instance of [GeneratedCode].\n+ * Important: don't return an infinite recursion in the form of return `this`\n+ */\n+ fun body(): GeneratedCode\n+\n+ /**\n+ * A builder for a composite of multiple types\n+ */\n+ class Builder internal constructor() {\n+ private val mutableCodeList = mutableListOf()\n+\n+ /**\n+ * Include code in your compound\n+ */\n+ operator fun GeneratedCode.unaryPlus() {\n+ if (this is CompoundGeneratedCode) {\n+ mutableCodeList.addAll(codeList)\n+ } else {\n+ mutableCodeList.add(this)\n+ }\n+ }\n+\n+ operator fun String.unaryPlus() {\n+ mutableCodeList.add(inlineCode(this))\n+ }\n+\n+ internal fun build(): GeneratedCode {\n+ return composite(mutableCodeList)\n+ }\n+ }\n+\n+ object Empty : GeneratedCode by EmptyCode()\n+\n+ companion object {\n+ /**\n+ * Create a composition of multiple instances of Generated Code\n+ */\n+ fun composite(init: Builder.() -> Unit) = Builder().apply(init).build()\n+\n+ /**\n+ * Create a composition of multiple instances of Generated Code\n+ */\n+ fun composite(iterable: Collection): GeneratedCode = if (iterable.count() == 1)\n+ iterable.first()\n+ else\n+ CompoundGeneratedCode(iterable.toList())\n+\n+ /**\n+ * Create a composition of multiple instances of Generated Code\n+ */\n+ fun composite(vararg code: GeneratedCode) = composite(code.toList())\n+ }\n+}\n+\n+private data class CompoundGeneratedCode(val codeList: List) : InternalGeneratedCode() {\n+ override fun StringBuilder.write(indent: Int) {\n+ writeJoined(codeList, \"\\n\\n\", indent)\n+ }\n+\n+ override fun GeneratedCodeVisitor.visit() {\n+ for (code in codeList) {\n+ visit(code)\n+ }\n+ }\n+}\n+\n+/**\n+ * References a member that can be used as a type\n+ */\n+interface IdentifiableMember {\n+ val name: String\n+ fun imports(): Set = emptySet()\n+\n+ companion object {\n+ private data class KClassIdentifiable(val kClass: KClass<*>) : IdentifiableMember {\n+ override val name: String = kClass.simpleName ?: kClass.qualifiedName!!\n+ override fun imports() = setOfNotNull(kClass.qualifiedName)\n+ }\n+\n+ private data class NamedIdentifiable(override val name: String) : IdentifiableMember\n+\n+ operator fun invoke(kClass: KClass<*>): IdentifiableMember = KClassIdentifiable(kClass)\n+ operator fun invoke(name: String): IdentifiableMember = NamedIdentifiable(name)\n+ }\n+}\n+\n+/**\n+ * References an interface that can be implemented\n+ */\n+interface GeneratedInterface : IdentifiableMember\n+\n+private data class CastedInterfaceMember(private val member: IdentifiableMember) : GeneratedInterface, IdentifiableMember by member\n+\n+fun IdentifiableMember.asInterface(): GeneratedInterface = CastedInterfaceMember(this)\n+\n+/**\n+ * Generated code that is a type that can be used as an Argument\n+ */\n+interface IdentifiableCode : GeneratedCode, IdentifiableMember\n+\n+/**\n+ * Builder for a provided type whose JVM Name can be set\n+ */\n+interface JVMNameableBuilder {\n+ fun jvmName(name: String)\n+}\n+\n+/**\n+ * Builder for a provided type. It can implement an interface and have some code inside.\n+ */\n+interface GeneratedTypeBuilder : IdentifiableMember {\n+ /**\n+ * Implement a generated interface\n+ */\n+ fun implement(generated: GeneratedInterface)\n+\n+ /**\n+ * Add code to the namespace of the Type\n+ */\n+ fun inner(code: GeneratedCode)", + "comment_created_at": "2020-07-08T10:07:36+00:00", + "comment_author": "ligee", + "comment_body": "I think this is a confusing name, the word `inner` has a different meaning in the Kotlin code.\r\nIn the compiler, we're using `child`/`children` in such places.", + "pr_file_module": null + }, + { + "comment_id": "480104468", + "repo_full_name": "JetBrains/kotlin", + "pr_number": 3516, + "pr_file": "libraries/scripting/type-providers/src/kotlin/script/experimental/typeProviders/generatedCode/GeneratedCode.kt", + "discussion_id": "451431218", + "commented_code": "@@ -0,0 +1,302 @@\n+/*\n+ * Copyright 2010-2020 JetBrains s.r.o. and Kotlin Programming Language contributors.\n+ * Use of this source code is governed by the Apache 2.0 license that can be found in the license/LICENSE.txt file.\n+ */\n+\n+package kotlin.script.experimental.typeProviders.generatedCode\n+\n+import kotlin.reflect.KClass\n+import kotlin.script.experimental.typeProviders.generatedCode.impl.*\n+import kotlin.script.experimental.typeProviders.generatedCode.impl.Object\n+import kotlin.script.experimental.typeProviders.generatedCode.impl.writeJoined\n+import kotlin.script.experimental.typeProviders.generatedCode.internal.Generated\n+import kotlin.script.experimental.typeProviders.generatedCode.internal.visitor.GeneratedCodeVisitor\n+import kotlin.script.experimental.typeProviders.generatedCode.internal.InternalGeneratedCode\n+import kotlin.script.experimental.typeProviders.generatedCode.internal.visit\n+import kotlin.script.experimental.typeProviders.generatedCode.internal.visitor.captureImports\n+import kotlin.script.experimental.typeProviders.generatedCode.internal.write\n+import kotlin.text.StringBuilder\n+\n+/**\n+ * Code returned by a Type Provider.\n+ * [GeneratedCode] is a recursive construction that allows you to build generated code as a combination of multiple types already provided\n+ *\n+ * Example:\n+ *\n+ * ```kotlin\n+ * data class MyObject(val name: String, val value: Int) : GeneratedCode {\n+ * fun body(): GeneratedCode {\n+ * return GeneratedCode.composite {\n+ * +makeDataClass(name)\n+ * .withInterface(MyInterface::class)\n+ *\n+ * +makeConstant(\"$nameConstant\", value)\n+ * }\n+ * }\n+ * }\n+ * ```\n+ *\n+ * [GeneratedCode] can contain any of the following:\n+ * - Data classes. See [makeDataClass].\n+ * - Objects. See [makeObject].\n+ * - Constants. See [makeConstant].\n+ * - Packages. See [makePackage].\n+ * - Extension functions. See [makeExtensionMethod].\n+ * - etc.\n+ */\n+interface GeneratedCode {\n+ /**\n+ * Generate the code that corresponds to your type\n+ *\n+ * @return An instance of [GeneratedCode].\n+ * Important: don't return an infinite recursion in the form of return `this`\n+ */\n+ fun body(): GeneratedCode\n+\n+ /**\n+ * A builder for a composite of multiple types\n+ */\n+ class Builder internal constructor() {\n+ private val mutableCodeList = mutableListOf()\n+\n+ /**\n+ * Include code in your compound\n+ */\n+ operator fun GeneratedCode.unaryPlus() {\n+ if (this is CompoundGeneratedCode) {\n+ mutableCodeList.addAll(codeList)\n+ } else {\n+ mutableCodeList.add(this)\n+ }\n+ }\n+\n+ operator fun String.unaryPlus() {\n+ mutableCodeList.add(inlineCode(this))\n+ }\n+\n+ internal fun build(): GeneratedCode {\n+ return composite(mutableCodeList)\n+ }\n+ }\n+\n+ object Empty : GeneratedCode by EmptyCode()\n+\n+ companion object {\n+ /**\n+ * Create a composition of multiple instances of Generated Code\n+ */\n+ fun composite(init: Builder.() -> Unit) = Builder().apply(init).build()\n+\n+ /**\n+ * Create a composition of multiple instances of Generated Code\n+ */\n+ fun composite(iterable: Collection): GeneratedCode = if (iterable.count() == 1)\n+ iterable.first()\n+ else\n+ CompoundGeneratedCode(iterable.toList())\n+\n+ /**\n+ * Create a composition of multiple instances of Generated Code\n+ */\n+ fun composite(vararg code: GeneratedCode) = composite(code.toList())\n+ }\n+}\n+\n+private data class CompoundGeneratedCode(val codeList: List) : InternalGeneratedCode() {\n+ override fun StringBuilder.write(indent: Int) {\n+ writeJoined(codeList, \"\\n\\n\", indent)\n+ }\n+\n+ override fun GeneratedCodeVisitor.visit() {\n+ for (code in codeList) {\n+ visit(code)\n+ }\n+ }\n+}\n+\n+/**\n+ * References a member that can be used as a type\n+ */\n+interface IdentifiableMember {\n+ val name: String\n+ fun imports(): Set = emptySet()\n+\n+ companion object {\n+ private data class KClassIdentifiable(val kClass: KClass<*>) : IdentifiableMember {\n+ override val name: String = kClass.simpleName ?: kClass.qualifiedName!!\n+ override fun imports() = setOfNotNull(kClass.qualifiedName)\n+ }\n+\n+ private data class NamedIdentifiable(override val name: String) : IdentifiableMember\n+\n+ operator fun invoke(kClass: KClass<*>): IdentifiableMember = KClassIdentifiable(kClass)\n+ operator fun invoke(name: String): IdentifiableMember = NamedIdentifiable(name)\n+ }\n+}\n+\n+/**\n+ * References an interface that can be implemented\n+ */\n+interface GeneratedInterface : IdentifiableMember\n+\n+private data class CastedInterfaceMember(private val member: IdentifiableMember) : GeneratedInterface, IdentifiableMember by member\n+\n+fun IdentifiableMember.asInterface(): GeneratedInterface = CastedInterfaceMember(this)\n+\n+/**\n+ * Generated code that is a type that can be used as an Argument\n+ */\n+interface IdentifiableCode : GeneratedCode, IdentifiableMember\n+\n+/**\n+ * Builder for a provided type whose JVM Name can be set\n+ */\n+interface JVMNameableBuilder {\n+ fun jvmName(name: String)\n+}\n+\n+/**\n+ * Builder for a provided type. It can implement an interface and have some code inside.\n+ */\n+interface GeneratedTypeBuilder : IdentifiableMember {\n+ /**\n+ * Implement a generated interface\n+ */\n+ fun implement(generated: GeneratedInterface)\n+\n+ /**\n+ * Add code to the namespace of the Type\n+ */\n+ fun inner(code: GeneratedCode)", + "comment_created_at": "2020-08-31T12:44:02+00:00", + "comment_author": "nerdsupremacist", + "comment_body": "Got rid of this abstraction by making `ClassLikeBuilder` implement `GeneratedCode.Builder`", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "646189172", + "pr_number": 4432, + "pr_file": "libraries/stdlib/src/kotlin/collections/Collections.kt", + "created_at": "2021-06-06T21:19:33+00:00", + "commented_code": "return -(low + 1) // key not found\n}\n\n/**\n * If this list starts with the given [prefix], returns a view of the portion of this list without [prefix].\n * The returned list is backed by this list, so non-structural changes in the returned list are reflected in this list, and vice-versa.\n *\n * Structural changes in the base list make the behavior of the view undefined.\n */\npublic fun List.removePrefix(prefix: List): List {\n return if (this.startWith(prefix)) {\n this.subList(prefix.size, this.size)\n } else {\n this\n }\n}\n\n/**\n * Returns `true` if this list starts with the specified prefix.\n *\n * For empty [prefix] returns true\n */\npublic fun List.startWith(prefix: List): Boolean {", + "repo_full_name": "JetBrains/kotlin", + "discussion_comments": [ + { + "comment_id": "646189172", + "repo_full_name": "JetBrains/kotlin", + "pr_number": 4432, + "pr_file": "libraries/stdlib/src/kotlin/collections/Collections.kt", + "discussion_id": "646189172", + "commented_code": "@@ -434,6 +434,35 @@ public fun List.binarySearch(fromIndex: Int = 0, toIndex: Int = size, com\n return -(low + 1) // key not found\n }\n \n+/**\n+ * If this list starts with the given [prefix], returns a view of the portion of this list without [prefix].\n+ * The returned list is backed by this list, so non-structural changes in the returned list are reflected in this list, and vice-versa.\n+ *\n+ * Structural changes in the base list make the behavior of the view undefined.\n+ */\n+public fun List.removePrefix(prefix: List): List {\n+ return if (this.startWith(prefix)) {\n+ this.subList(prefix.size, this.size)\n+ } else {\n+ this\n+ }\n+}\n+\n+/**\n+ * Returns `true` if this list starts with the specified prefix.\n+ *\n+ * For empty [prefix] returns true\n+ */\n+public fun List.startWith(prefix: List): Boolean {", + "comment_created_at": "2021-06-06T21:19:33+00:00", + "comment_author": "Serdnad", + "comment_body": "Shouldn't this be \"startsWith\", to be consistent with the current implementation for String?", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1471288707", + "pr_number": 5252, + "pr_file": "plugins/jvm-abi-gen/src/org/jetbrains/kotlin/jvm/abi/JvmAbiCommandLineProcessor.kt", + "created_at": "2024-01-30T14:09:36+00:00", + "commented_code": "\"outside of the compilation unit where it's declared.\",\n false,\n )\n\n val DO_NOT_SORT_MEMBERS_OPTION: CliOption =", + "repo_full_name": "JetBrains/kotlin", + "discussion_comments": [ + { + "comment_id": "1471288707", + "repo_full_name": "JetBrains/kotlin", + "pr_number": 5252, + "pr_file": "plugins/jvm-abi-gen/src/org/jetbrains/kotlin/jvm/abi/JvmAbiCommandLineProcessor.kt", + "discussion_id": "1471288707", + "commented_code": "@@ -42,13 +42,27 @@ class JvmAbiCommandLineProcessor : CommandLineProcessor {\n \"outside of the compilation unit where it's declared.\",\n false,\n )\n+\n+ val DO_NOT_SORT_MEMBERS_OPTION: CliOption =", + "comment_created_at": "2024-01-30T14:09:36+00:00", + "comment_author": "madsager", + "comment_body": "I personally have a preference for flag names without negations, so I would vote for `sortMembers` with a default of true and a comment stating that turning that off reduces stability of the ABI jar.", + "pr_file_module": null + }, + { + "comment_id": "1471367973", + "repo_full_name": "JetBrains/kotlin", + "pr_number": 5252, + "pr_file": "plugins/jvm-abi-gen/src/org/jetbrains/kotlin/jvm/abi/JvmAbiCommandLineProcessor.kt", + "discussion_id": "1471288707", + "commented_code": "@@ -42,13 +42,27 @@ class JvmAbiCommandLineProcessor : CommandLineProcessor {\n \"outside of the compilation unit where it's declared.\",\n false,\n )\n+\n+ val DO_NOT_SORT_MEMBERS_OPTION: CliOption =", + "comment_created_at": "2024-01-30T14:50:27+00:00", + "comment_author": "udalov", + "comment_body": "Oh... My preference is flags which are _false by default_... 🙃 For some reason it's easier for me to get a model of a \"default\" behavior of the system, just looking at the names of the flags, as opposed to names + their default values.", + "pr_file_module": null + }, + { + "comment_id": "1471379505", + "repo_full_name": "JetBrains/kotlin", + "pr_number": 5252, + "pr_file": "plugins/jvm-abi-gen/src/org/jetbrains/kotlin/jvm/abi/JvmAbiCommandLineProcessor.kt", + "discussion_id": "1471288707", + "commented_code": "@@ -42,13 +42,27 @@ class JvmAbiCommandLineProcessor : CommandLineProcessor {\n \"outside of the compilation unit where it's declared.\",\n false,\n )\n+\n+ val DO_NOT_SORT_MEMBERS_OPTION: CliOption =", + "comment_created_at": "2024-01-30T14:56:48+00:00", + "comment_author": "madsager", + "comment_body": "!doNotShipIt 🤣 ", + "pr_file_module": null + }, + { + "comment_id": "1471380582", + "repo_full_name": "JetBrains/kotlin", + "pr_number": 5252, + "pr_file": "plugins/jvm-abi-gen/src/org/jetbrains/kotlin/jvm/abi/JvmAbiCommandLineProcessor.kt", + "discussion_id": "1471288707", + "commented_code": "@@ -42,13 +42,27 @@ class JvmAbiCommandLineProcessor : CommandLineProcessor {\n \"outside of the compilation unit where it's declared.\",\n false,\n )\n+\n+ val DO_NOT_SORT_MEMBERS_OPTION: CliOption =", + "comment_created_at": "2024-01-30T14:57:24+00:00", + "comment_author": "madsager", + "comment_body": "More seriously, as long as there is a flag I'm happy! 👍 ", + "pr_file_module": null + }, + { + "comment_id": "1471710838", + "repo_full_name": "JetBrains/kotlin", + "pr_number": 5252, + "pr_file": "plugins/jvm-abi-gen/src/org/jetbrains/kotlin/jvm/abi/JvmAbiCommandLineProcessor.kt", + "discussion_id": "1471288707", + "commented_code": "@@ -42,13 +42,27 @@ class JvmAbiCommandLineProcessor : CommandLineProcessor {\n \"outside of the compilation unit where it's declared.\",\n false,\n )\n+\n+ val DO_NOT_SORT_MEMBERS_OPTION: CliOption =", + "comment_created_at": "2024-01-30T17:59:51+00:00", + "comment_author": "Tagakov", + "comment_body": "What about `preserveDeclarationsOrder` ? I can rename the flag and make it disable sorting for inner classes as well to be consistent.", + "pr_file_module": null + }, + { + "comment_id": "1473159544", + "repo_full_name": "JetBrains/kotlin", + "pr_number": 5252, + "pr_file": "plugins/jvm-abi-gen/src/org/jetbrains/kotlin/jvm/abi/JvmAbiCommandLineProcessor.kt", + "discussion_id": "1471288707", + "commented_code": "@@ -42,13 +42,27 @@ class JvmAbiCommandLineProcessor : CommandLineProcessor {\n \"outside of the compilation unit where it's declared.\",\n false,\n )\n+\n+ val DO_NOT_SORT_MEMBERS_OPTION: CliOption =", + "comment_created_at": "2024-01-31T16:58:29+00:00", + "comment_author": "udalov", + "comment_body": "`preserveDeclarationsOrder` sounds great. `preserveDeclarationOrder` would be even better.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1440148268", + "pr_number": 5232, + "pr_file": "plugins/parcelize/parcelize-compiler/parcelize.backend/src/org/jetbrains/kotlin/parcelize/AndroidSymbols.kt", + "created_at": "2024-01-03T07:36:17+00:00", + "commented_code": "}\n }.symbol\n\n private val kotlinDuration: IrClassSymbol = createClass(", + "repo_full_name": "JetBrains/kotlin", + "discussion_comments": [ + { + "comment_id": "1440148268", + "repo_full_name": "JetBrains/kotlin", + "pr_number": 5232, + "pr_file": "plugins/parcelize/parcelize-compiler/parcelize.backend/src/org/jetbrains/kotlin/parcelize/AndroidSymbols.kt", + "discussion_id": "1440148268", + "commented_code": "@@ -185,6 +186,12 @@ class AndroidSymbols(\n }\n }.symbol\n \n+ private val kotlinDuration: IrClassSymbol = createClass(", + "comment_created_at": "2024-01-03T07:36:17+00:00", + "comment_author": "madsager", + "comment_body": "Maybe call this `kotlinTimeDuration` to follow other naming such as `androidOsParcel`?", + "pr_file_module": null + }, + { + "comment_id": "1440481155", + "repo_full_name": "JetBrains/kotlin", + "pr_number": 5232, + "pr_file": "plugins/parcelize/parcelize-compiler/parcelize.backend/src/org/jetbrains/kotlin/parcelize/AndroidSymbols.kt", + "discussion_id": "1440148268", + "commented_code": "@@ -185,6 +186,12 @@ class AndroidSymbols(\n }\n }.symbol\n \n+ private val kotlinDuration: IrClassSymbol = createClass(", + "comment_created_at": "2024-01-03T13:57:31+00:00", + "comment_author": "gala377", + "comment_body": "Done", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1051654064", + "pr_number": 5046, + "pr_file": "compiler/ir/backend.jvm/codegen/src/org/jetbrains/kotlin/backend/jvm/codegen/irCodegenUtils.kt", + "created_at": "2022-12-18T19:34:39+00:00", + "commented_code": "val property = callee.correspondingPropertySymbol?.owner ?: return false\n return property.isDeprecatedCallable(context)\n}\n\nval IrClass.getReifiedTypeParameters: ReifiedTypeParametersUsages", + "repo_full_name": "JetBrains/kotlin", + "discussion_comments": [ + { + "comment_id": "1051654064", + "repo_full_name": "JetBrains/kotlin", + "pr_number": 5046, + "pr_file": "compiler/ir/backend.jvm/codegen/src/org/jetbrains/kotlin/backend/jvm/codegen/irCodegenUtils.kt", + "discussion_id": "1051654064", + "commented_code": "@@ -320,3 +317,27 @@ private fun IrFunction.isAccessorForDeprecatedJvmStaticProperty(context: JvmBack\n val property = callee.correspondingPropertySymbol?.owner ?: return false\n return property.isDeprecatedCallable(context)\n }\n+\n+val IrClass.getReifiedTypeParameters: ReifiedTypeParametersUsages", + "comment_created_at": "2022-12-18T19:34:39+00:00", + "comment_author": "ilmirus", + "comment_body": "Please, stick to official code style - `getReifiedTypeParameters` is a function, if you use a property, use `reifiedTypeParameters`.", + "pr_file_module": null + }, + { + "comment_id": "1051770691", + "repo_full_name": "JetBrains/kotlin", + "pr_number": 5046, + "pr_file": "compiler/ir/backend.jvm/codegen/src/org/jetbrains/kotlin/backend/jvm/codegen/irCodegenUtils.kt", + "discussion_id": "1051654064", + "commented_code": "@@ -320,3 +317,27 @@ private fun IrFunction.isAccessorForDeprecatedJvmStaticProperty(context: JvmBack\n val property = callee.correspondingPropertySymbol?.owner ?: return false\n return property.isDeprecatedCallable(context)\n }\n+\n+val IrClass.getReifiedTypeParameters: ReifiedTypeParametersUsages", + "comment_created_at": "2022-12-19T03:51:10+00:00", + "comment_author": "larryxiao625", + "comment_body": "Thanks for you suggestion, I have modified it~. Could you please review again :)", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "695778850", + "pr_number": 4535, + "pr_file": "core/compiler.common.jvm/src/org/jetbrains/kotlin/load/java/AbstractAnnotationTypeQualifierResolver.kt", + "created_at": "2021-08-25T13:56:30+00:00", + "commented_code": "/*\n * Copyright 2010-2021 JetBrains s.r.o. and Kotlin Programming Language contributors.\n * Use of this source code is governed by the Apache 2.0 license that can be found in the license/LICENSE.txt file.\n */\n\npackage org.jetbrains.kotlin.load.java\n\nimport org.jetbrains.kotlin.builtins.StandardNames\nimport org.jetbrains.kotlin.descriptors.annotations.KotlinTarget\nimport org.jetbrains.kotlin.name.FqName\nimport java.util.concurrent.ConcurrentHashMap\n\ntypealias TypeQualifierWithApplicability = Pair>", + "repo_full_name": "JetBrains/kotlin", + "discussion_comments": [ + { + "comment_id": "695778850", + "repo_full_name": "JetBrains/kotlin", + "pr_number": 4535, + "pr_file": "core/compiler.common.jvm/src/org/jetbrains/kotlin/load/java/AbstractAnnotationTypeQualifierResolver.kt", + "discussion_id": "695778850", + "commented_code": "@@ -0,0 +1,113 @@\n+/*\n+ * Copyright 2010-2021 JetBrains s.r.o. and Kotlin Programming Language contributors.\n+ * Use of this source code is governed by the Apache 2.0 license that can be found in the license/LICENSE.txt file.\n+ */\n+\n+package org.jetbrains.kotlin.load.java\n+\n+import org.jetbrains.kotlin.builtins.StandardNames\n+import org.jetbrains.kotlin.descriptors.annotations.KotlinTarget\n+import org.jetbrains.kotlin.name.FqName\n+import java.util.concurrent.ConcurrentHashMap\n+\n+typealias TypeQualifierWithApplicability = Pair>", + "comment_created_at": "2021-08-25T13:56:30+00:00", + "comment_author": "dzharkov", + "comment_body": "I don't insist here, but we're trying to use one-capital-letter names for type parameters (e.g. `A` here)\nSame for AbstractAnnotationTypeQualifierResolver\n", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "454246644", + "pr_number": 3544, + "pr_file": "libraries/scripting/dependencies/src/kotlin/script/experimental/dependencies/resolverNamedOptions.kt", + "created_at": "2020-07-14T10:05:07+00:00", + "commented_code": "/*\n * Copyright 2010-2020 JetBrains s.r.o. and Kotlin Programming Language contributors.\n * Use of this source code is governed by the Apache 2.0 license that can be found in the license/LICENSE.txt file.\n */\n\npackage kotlin.script.experimental.dependencies\n\nfun ExternalDependenciesResolver.Options.value(name: DependenciesResolverOptionsName) =\n value(name.key)\n\nfun ExternalDependenciesResolver.Options.flag(name: DependenciesResolverOptionsName) =\n flag(name.key)\n\noperator fun MutableMap.set(key: DependenciesResolverOptionsName, value: String) {\n put(key.key, value)\n}\n\n\nenum class DependenciesResolverOptionsName {\n TRANSITIVE,\n DEPENDENCY_SCOPES;", + "repo_full_name": "JetBrains/kotlin", + "discussion_comments": [ + { + "comment_id": "454246644", + "repo_full_name": "JetBrains/kotlin", + "pr_number": 3544, + "pr_file": "libraries/scripting/dependencies/src/kotlin/script/experimental/dependencies/resolverNamedOptions.kt", + "discussion_id": "454246644", + "commented_code": "@@ -0,0 +1,31 @@\n+/*\n+ * Copyright 2010-2020 JetBrains s.r.o. and Kotlin Programming Language contributors.\n+ * Use of this source code is governed by the Apache 2.0 license that can be found in the license/LICENSE.txt file.\n+ */\n+\n+package kotlin.script.experimental.dependencies\n+\n+fun ExternalDependenciesResolver.Options.value(name: DependenciesResolverOptionsName) =\n+ value(name.key)\n+\n+fun ExternalDependenciesResolver.Options.flag(name: DependenciesResolverOptionsName) =\n+ flag(name.key)\n+\n+operator fun MutableMap.set(key: DependenciesResolverOptionsName, value: String) {\n+ put(key.key, value)\n+}\n+\n+\n+enum class DependenciesResolverOptionsName {\n+ TRANSITIVE,\n+ DEPENDENCY_SCOPES;", + "comment_created_at": "2020-07-14T10:05:07+00:00", + "comment_author": "ligee", + "comment_body": "Using underscores seems unusual for the maven configuration, and in maven this parameter called just `scope`, so I'd rename it, so the people can guess the param name easier.\r\nIf we want to have the naming flexibility - I'd add the string name parameter to the enum (and use `toLowerCase` as a default value).", + "pr_file_module": null + }, + { + "comment_id": "454329232", + "repo_full_name": "JetBrains/kotlin", + "pr_number": 3544, + "pr_file": "libraries/scripting/dependencies/src/kotlin/script/experimental/dependencies/resolverNamedOptions.kt", + "discussion_id": "454246644", + "commented_code": "@@ -0,0 +1,31 @@\n+/*\n+ * Copyright 2010-2020 JetBrains s.r.o. and Kotlin Programming Language contributors.\n+ * Use of this source code is governed by the Apache 2.0 license that can be found in the license/LICENSE.txt file.\n+ */\n+\n+package kotlin.script.experimental.dependencies\n+\n+fun ExternalDependenciesResolver.Options.value(name: DependenciesResolverOptionsName) =\n+ value(name.key)\n+\n+fun ExternalDependenciesResolver.Options.flag(name: DependenciesResolverOptionsName) =\n+ flag(name.key)\n+\n+operator fun MutableMap.set(key: DependenciesResolverOptionsName, value: String) {\n+ put(key.key, value)\n+}\n+\n+\n+enum class DependenciesResolverOptionsName {\n+ TRANSITIVE,\n+ DEPENDENCY_SCOPES;", + "comment_created_at": "2020-07-14T12:45:42+00:00", + "comment_author": "ileasile", + "comment_body": "Fixed this name and added a parameter", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "632467374", + "pr_number": 4383, + "pr_file": "libraries/scripting/dependencies/src/kotlin/script/experimental/dependencies/impl/resolverNamedOptions.kt", + "created_at": "2021-05-14T11:33:55+00:00", + "commented_code": "*/\nenum class DependenciesResolverOptionsName(optionName: String? = null) {\n TRANSITIVE,\n SCOPE;\n SCOPE,\n USERNAME,\n PASSWORD,\n PRIVATE_KEY_FILE,", + "repo_full_name": "JetBrains/kotlin", + "discussion_comments": [ + { + "comment_id": "632467374", + "repo_full_name": "JetBrains/kotlin", + "pr_number": 4383, + "pr_file": "libraries/scripting/dependencies/src/kotlin/script/experimental/dependencies/impl/resolverNamedOptions.kt", + "discussion_id": "632467374", + "commented_code": "@@ -23,7 +23,11 @@ operator fun MutableMap.set(key: DependenciesResolverOptionsName\n */\n enum class DependenciesResolverOptionsName(optionName: String? = null) {\n TRANSITIVE,\n- SCOPE;\n+ SCOPE,\n+ USERNAME,\n+ PASSWORD,\n+ PRIVATE_KEY_FILE,", + "comment_created_at": "2021-05-14T11:33:55+00:00", + "comment_author": "ligee", + "comment_body": "I don't like the names, taking into account that they will be used (in the lowercased form) in the annotations in the actual scripts. But I don't have much better suggestions either. Maybe `AUTH_KEY_FILE`/`AUTH_KEY_PASSPHRASE` will be a bit better, but I'm not sure. Maybe we should discuss it.", + "pr_file_module": null + }, + { + "comment_id": "632602786", + "repo_full_name": "JetBrains/kotlin", + "pr_number": 4383, + "pr_file": "libraries/scripting/dependencies/src/kotlin/script/experimental/dependencies/impl/resolverNamedOptions.kt", + "discussion_id": "632467374", + "commented_code": "@@ -23,7 +23,11 @@ operator fun MutableMap.set(key: DependenciesResolverOptionsName\n */\n enum class DependenciesResolverOptionsName(optionName: String? = null) {\n TRANSITIVE,\n- SCOPE;\n+ SCOPE,\n+ USERNAME,\n+ PASSWORD,\n+ PRIVATE_KEY_FILE,", + "comment_created_at": "2021-05-14T15:19:36+00:00", + "comment_author": "ileasile", + "comment_body": "I'm ok with the names you suggested, maybe it's even worth getting rid of `AUTH_` prefix", + "pr_file_module": null + }, + { + "comment_id": "633292977", + "repo_full_name": "JetBrains/kotlin", + "pr_number": 4383, + "pr_file": "libraries/scripting/dependencies/src/kotlin/script/experimental/dependencies/impl/resolverNamedOptions.kt", + "discussion_id": "632467374", + "commented_code": "@@ -23,7 +23,11 @@ operator fun MutableMap.set(key: DependenciesResolverOptionsName\n */\n enum class DependenciesResolverOptionsName(optionName: String? = null) {\n TRANSITIVE,\n- SCOPE;\n+ SCOPE,\n+ USERNAME,\n+ PASSWORD,\n+ PRIVATE_KEY_FILE,", + "comment_created_at": "2021-05-17T07:41:19+00:00", + "comment_author": "ligee", + "comment_body": "Ok, let it be `KEY_FILE`/`KEY_PASSPHRASE`", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "441753939", + "pr_number": 3486, + "pr_file": "compiler/frontend/src/org/jetbrains/kotlin/resolve/extensions/SyntheticResolveExtension.kt", + "created_at": "2020-06-17T18:44:52+00:00", + "commented_code": "fun getSyntheticNestedClassNames(thisDescriptor: ClassDescriptor): List = emptyList()\n\n /**\n * This method should return either superset of what [getSyntheticNestedClassNames] returns,\n * or null in case it needs to run resolution and inference and/or it is very costly.\n * Override this method if resolution started to fail with recursion.\n */\n fun maybeGetSyntheticNestedClassNames(thisDescriptor: ClassDescriptor): List? = getSyntheticNestedClassNames(thisDescriptor)", + "repo_full_name": "JetBrains/kotlin", + "discussion_comments": [ + { + "comment_id": "441753939", + "repo_full_name": "JetBrains/kotlin", + "pr_number": 3486, + "pr_file": "compiler/frontend/src/org/jetbrains/kotlin/resolve/extensions/SyntheticResolveExtension.kt", + "discussion_id": "441753939", + "commented_code": "@@ -136,6 +144,13 @@ interface SyntheticResolveExtension {\n \n fun getSyntheticNestedClassNames(thisDescriptor: ClassDescriptor): List = emptyList()\n \n+ /**\n+ * This method should return either superset of what [getSyntheticNestedClassNames] returns,\n+ * or null in case it needs to run resolution and inference and/or it is very costly.\n+ * Override this method if resolution started to fail with recursion.\n+ */\n+ fun maybeGetSyntheticNestedClassNames(thisDescriptor: ClassDescriptor): List? = getSyntheticNestedClassNames(thisDescriptor)", + "comment_created_at": "2020-06-17T18:44:52+00:00", + "comment_author": "sandwwraith", + "comment_body": "imo, `getPossibleSyntheticNestedClassNames` is a slightly better name for this method", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/kotlin-names-express-clear-intent.md b/_reviewers/kotlin-names-express-clear-intent.md index 1089118..2bb834e 100644 --- a/_reviewers/kotlin-names-express-clear-intent.md +++ b/_reviewers/kotlin-names-express-clear-intent.md @@ -34,405 +34,3 @@ enum class WalkAlgorithm { DEPTH_FIRST // Explicit } ``` - - -[ - { - "discussion_id": "270310587", - "pr_number": 2232, - "pr_file": "libraries/stdlib/jvm/src/kotlin/io/files/FileTreeWalk.kt", - "created_at": "2019-03-29T08:09:17+00:00", - "commented_code": "import java.io.IOException\nimport java.util.ArrayDeque\n\n/**\n * An enumeration to describe possible algorithms to traverse.\n * There are two of them: search in breadth (same directory depth)\n */\npublic enum class WalkAlgorithm {\n /**\n * Breadth-first search\n */\n BFS,", - "repo_full_name": "JetBrains/kotlin", - "discussion_comments": [ - { - "comment_id": "270310587", - "repo_full_name": "JetBrains/kotlin", - "pr_number": 2232, - "pr_file": "libraries/stdlib/jvm/src/kotlin/io/files/FileTreeWalk.kt", - "discussion_id": "270310587", - "commented_code": "@@ -12,6 +12,20 @@ import java.io.File\n import java.io.IOException\n import java.util.ArrayDeque\n \n+/**\n+ * An enumeration to describe possible algorithms to traverse.\n+ * There are two of them: search in breadth (same directory depth)\n+ */\n+public enum class WalkAlgorithm {\n+ /**\n+ * Breadth-first search\n+ */\n+ BFS,", - "comment_created_at": "2019-03-29T08:09:17+00:00", - "comment_author": "sschuberth", - "comment_body": "I actually believe we should write these out as `BREADTH_FIRST` and `DEPTH_FIRST`, just like we have `TOP_DOWN` instead of `TD` below.", - "pr_file_module": null - }, - { - "comment_id": "272912852", - "repo_full_name": "JetBrains/kotlin", - "pr_number": 2232, - "pr_file": "libraries/stdlib/jvm/src/kotlin/io/files/FileTreeWalk.kt", - "discussion_id": "270310587", - "commented_code": "@@ -12,6 +12,20 @@ import java.io.File\n import java.io.IOException\n import java.util.ArrayDeque\n \n+/**\n+ * An enumeration to describe possible algorithms to traverse.\n+ * There are two of them: search in breadth (same directory depth)\n+ */\n+public enum class WalkAlgorithm {\n+ /**\n+ * Breadth-first search\n+ */\n+ BFS,", - "comment_created_at": "2019-04-08T07:24:14+00:00", - "comment_author": "aivinog1", - "comment_body": "Agree.", - "pr_file_module": null - }, - { - "comment_id": "272967895", - "repo_full_name": "JetBrains/kotlin", - "pr_number": 2232, - "pr_file": "libraries/stdlib/jvm/src/kotlin/io/files/FileTreeWalk.kt", - "discussion_id": "270310587", - "commented_code": "@@ -12,6 +12,20 @@ import java.io.File\n import java.io.IOException\n import java.util.ArrayDeque\n \n+/**\n+ * An enumeration to describe possible algorithms to traverse.\n+ * There are two of them: search in breadth (same directory depth)\n+ */\n+public enum class WalkAlgorithm {\n+ /**\n+ * Breadth-first search\n+ */\n+ BFS,", - "comment_created_at": "2019-04-08T09:55:30+00:00", - "comment_author": "aivinog1", - "comment_body": "> I actually believe we should write these out as `BREADTH_FIRST` and `DEPTH_FIRST`, just like we have `TOP_DOWN` instead of `TD` below.\r\n\r\nDone.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "936958162", - "pr_number": 4903, - "pr_file": "compiler/ir/backend.jvm/src/org/jetbrains/kotlin/backend/jvm/MemoizedInlineClassReplacements.kt", - "created_at": "2022-08-03T17:28:39+00:00", - "commented_code": "parent = irClass\n // We ignore type arguments here, since there is no good way to go from type arguments to types in the IR anyway.\n val typeArgument =", - "repo_full_name": "JetBrains/kotlin", - "discussion_comments": [ - { - "comment_id": "936958162", - "repo_full_name": "JetBrains/kotlin", - "pr_number": 4903, - "pr_file": "compiler/ir/backend.jvm/src/org/jetbrains/kotlin/backend/jvm/MemoizedInlineClassReplacements.kt", - "discussion_id": "936958162", - "commented_code": "@@ -155,7 +165,10 @@ class MemoizedInlineClassReplacements(\n parent = irClass\n // We ignore type arguments here, since there is no good way to go from type arguments to types in the IR anyway.\n val typeArgument =", - "comment_created_at": "2022-08-03T17:28:39+00:00", - "comment_author": "sfs", - "comment_body": "Since you're touching this code anyway, could you please rename this variable to `argumentType`? It's really not a type argument...", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "451431218", - "pr_number": 3516, - "pr_file": "libraries/scripting/type-providers/src/kotlin/script/experimental/typeProviders/generatedCode/GeneratedCode.kt", - "created_at": "2020-07-08T10:07:36+00:00", - "commented_code": "/*\n * Copyright 2010-2020 JetBrains s.r.o. and Kotlin Programming Language contributors.\n * Use of this source code is governed by the Apache 2.0 license that can be found in the license/LICENSE.txt file.\n */\n\npackage kotlin.script.experimental.typeProviders.generatedCode\n\nimport kotlin.reflect.KClass\nimport kotlin.script.experimental.typeProviders.generatedCode.impl.*\nimport kotlin.script.experimental.typeProviders.generatedCode.impl.Object\nimport kotlin.script.experimental.typeProviders.generatedCode.impl.writeJoined\nimport kotlin.script.experimental.typeProviders.generatedCode.internal.Generated\nimport kotlin.script.experimental.typeProviders.generatedCode.internal.visitor.GeneratedCodeVisitor\nimport kotlin.script.experimental.typeProviders.generatedCode.internal.InternalGeneratedCode\nimport kotlin.script.experimental.typeProviders.generatedCode.internal.visit\nimport kotlin.script.experimental.typeProviders.generatedCode.internal.visitor.captureImports\nimport kotlin.script.experimental.typeProviders.generatedCode.internal.write\nimport kotlin.text.StringBuilder\n\n/**\n * Code returned by a Type Provider.\n * [GeneratedCode] is a recursive construction that allows you to build generated code as a combination of multiple types already provided\n *\n * Example:\n *\n * ```kotlin\n * data class MyObject(val name: String, val value: Int) : GeneratedCode {\n * fun body(): GeneratedCode {\n * return GeneratedCode.composite {\n * +makeDataClass(name)\n * .withInterface(MyInterface::class)\n *\n * +makeConstant(\"$nameConstant\", value)\n * }\n * }\n * }\n * ```\n *\n * [GeneratedCode] can contain any of the following:\n * - Data classes. See [makeDataClass].\n * - Objects. See [makeObject].\n * - Constants. See [makeConstant].\n * - Packages. See [makePackage].\n * - Extension functions. See [makeExtensionMethod].\n * - etc.\n */\ninterface GeneratedCode {\n /**\n * Generate the code that corresponds to your type\n *\n * @return An instance of [GeneratedCode].\n * Important: don't return an infinite recursion in the form of return `this`\n */\n fun body(): GeneratedCode\n\n /**\n * A builder for a composite of multiple types\n */\n class Builder internal constructor() {\n private val mutableCodeList = mutableListOf()\n\n /**\n * Include code in your compound\n */\n operator fun GeneratedCode.unaryPlus() {\n if (this is CompoundGeneratedCode) {\n mutableCodeList.addAll(codeList)\n } else {\n mutableCodeList.add(this)\n }\n }\n\n operator fun String.unaryPlus() {\n mutableCodeList.add(inlineCode(this))\n }\n\n internal fun build(): GeneratedCode {\n return composite(mutableCodeList)\n }\n }\n\n object Empty : GeneratedCode by EmptyCode()\n\n companion object {\n /**\n * Create a composition of multiple instances of Generated Code\n */\n fun composite(init: Builder.() -> Unit) = Builder().apply(init).build()\n\n /**\n * Create a composition of multiple instances of Generated Code\n */\n fun composite(iterable: Collection): GeneratedCode = if (iterable.count() == 1)\n iterable.first()\n else\n CompoundGeneratedCode(iterable.toList())\n\n /**\n * Create a composition of multiple instances of Generated Code\n */\n fun composite(vararg code: GeneratedCode) = composite(code.toList())\n }\n}\n\nprivate data class CompoundGeneratedCode(val codeList: List) : InternalGeneratedCode() {\n override fun StringBuilder.write(indent: Int) {\n writeJoined(codeList, \"\\n\\n\", indent)\n }\n\n override fun GeneratedCodeVisitor.visit() {\n for (code in codeList) {\n visit(code)\n }\n }\n}\n\n/**\n * References a member that can be used as a type\n */\ninterface IdentifiableMember {\n val name: String\n fun imports(): Set = emptySet()\n\n companion object {\n private data class KClassIdentifiable(val kClass: KClass<*>) : IdentifiableMember {\n override val name: String = kClass.simpleName ?: kClass.qualifiedName!!\n override fun imports() = setOfNotNull(kClass.qualifiedName)\n }\n\n private data class NamedIdentifiable(override val name: String) : IdentifiableMember\n\n operator fun invoke(kClass: KClass<*>): IdentifiableMember = KClassIdentifiable(kClass)\n operator fun invoke(name: String): IdentifiableMember = NamedIdentifiable(name)\n }\n}\n\n/**\n * References an interface that can be implemented\n */\ninterface GeneratedInterface : IdentifiableMember\n\nprivate data class CastedInterfaceMember(private val member: IdentifiableMember) : GeneratedInterface, IdentifiableMember by member\n\nfun IdentifiableMember.asInterface(): GeneratedInterface = CastedInterfaceMember(this)\n\n/**\n * Generated code that is a type that can be used as an Argument\n */\ninterface IdentifiableCode : GeneratedCode, IdentifiableMember\n\n/**\n * Builder for a provided type whose JVM Name can be set\n */\ninterface JVMNameableBuilder {\n fun jvmName(name: String)\n}\n\n/**\n * Builder for a provided type. It can implement an interface and have some code inside.\n */\ninterface GeneratedTypeBuilder : IdentifiableMember {\n /**\n * Implement a generated interface\n */\n fun implement(generated: GeneratedInterface)\n\n /**\n * Add code to the namespace of the Type\n */\n fun inner(code: GeneratedCode)", - "repo_full_name": "JetBrains/kotlin", - "discussion_comments": [ - { - "comment_id": "451431218", - "repo_full_name": "JetBrains/kotlin", - "pr_number": 3516, - "pr_file": "libraries/scripting/type-providers/src/kotlin/script/experimental/typeProviders/generatedCode/GeneratedCode.kt", - "discussion_id": "451431218", - "commented_code": "@@ -0,0 +1,302 @@\n+/*\n+ * Copyright 2010-2020 JetBrains s.r.o. and Kotlin Programming Language contributors.\n+ * Use of this source code is governed by the Apache 2.0 license that can be found in the license/LICENSE.txt file.\n+ */\n+\n+package kotlin.script.experimental.typeProviders.generatedCode\n+\n+import kotlin.reflect.KClass\n+import kotlin.script.experimental.typeProviders.generatedCode.impl.*\n+import kotlin.script.experimental.typeProviders.generatedCode.impl.Object\n+import kotlin.script.experimental.typeProviders.generatedCode.impl.writeJoined\n+import kotlin.script.experimental.typeProviders.generatedCode.internal.Generated\n+import kotlin.script.experimental.typeProviders.generatedCode.internal.visitor.GeneratedCodeVisitor\n+import kotlin.script.experimental.typeProviders.generatedCode.internal.InternalGeneratedCode\n+import kotlin.script.experimental.typeProviders.generatedCode.internal.visit\n+import kotlin.script.experimental.typeProviders.generatedCode.internal.visitor.captureImports\n+import kotlin.script.experimental.typeProviders.generatedCode.internal.write\n+import kotlin.text.StringBuilder\n+\n+/**\n+ * Code returned by a Type Provider.\n+ * [GeneratedCode] is a recursive construction that allows you to build generated code as a combination of multiple types already provided\n+ *\n+ * Example:\n+ *\n+ * ```kotlin\n+ * data class MyObject(val name: String, val value: Int) : GeneratedCode {\n+ * fun body(): GeneratedCode {\n+ * return GeneratedCode.composite {\n+ * +makeDataClass(name)\n+ * .withInterface(MyInterface::class)\n+ *\n+ * +makeConstant(\"$nameConstant\", value)\n+ * }\n+ * }\n+ * }\n+ * ```\n+ *\n+ * [GeneratedCode] can contain any of the following:\n+ * - Data classes. See [makeDataClass].\n+ * - Objects. See [makeObject].\n+ * - Constants. See [makeConstant].\n+ * - Packages. See [makePackage].\n+ * - Extension functions. See [makeExtensionMethod].\n+ * - etc.\n+ */\n+interface GeneratedCode {\n+ /**\n+ * Generate the code that corresponds to your type\n+ *\n+ * @return An instance of [GeneratedCode].\n+ * Important: don't return an infinite recursion in the form of return `this`\n+ */\n+ fun body(): GeneratedCode\n+\n+ /**\n+ * A builder for a composite of multiple types\n+ */\n+ class Builder internal constructor() {\n+ private val mutableCodeList = mutableListOf()\n+\n+ /**\n+ * Include code in your compound\n+ */\n+ operator fun GeneratedCode.unaryPlus() {\n+ if (this is CompoundGeneratedCode) {\n+ mutableCodeList.addAll(codeList)\n+ } else {\n+ mutableCodeList.add(this)\n+ }\n+ }\n+\n+ operator fun String.unaryPlus() {\n+ mutableCodeList.add(inlineCode(this))\n+ }\n+\n+ internal fun build(): GeneratedCode {\n+ return composite(mutableCodeList)\n+ }\n+ }\n+\n+ object Empty : GeneratedCode by EmptyCode()\n+\n+ companion object {\n+ /**\n+ * Create a composition of multiple instances of Generated Code\n+ */\n+ fun composite(init: Builder.() -> Unit) = Builder().apply(init).build()\n+\n+ /**\n+ * Create a composition of multiple instances of Generated Code\n+ */\n+ fun composite(iterable: Collection): GeneratedCode = if (iterable.count() == 1)\n+ iterable.first()\n+ else\n+ CompoundGeneratedCode(iterable.toList())\n+\n+ /**\n+ * Create a composition of multiple instances of Generated Code\n+ */\n+ fun composite(vararg code: GeneratedCode) = composite(code.toList())\n+ }\n+}\n+\n+private data class CompoundGeneratedCode(val codeList: List) : InternalGeneratedCode() {\n+ override fun StringBuilder.write(indent: Int) {\n+ writeJoined(codeList, \"\\n\\n\", indent)\n+ }\n+\n+ override fun GeneratedCodeVisitor.visit() {\n+ for (code in codeList) {\n+ visit(code)\n+ }\n+ }\n+}\n+\n+/**\n+ * References a member that can be used as a type\n+ */\n+interface IdentifiableMember {\n+ val name: String\n+ fun imports(): Set = emptySet()\n+\n+ companion object {\n+ private data class KClassIdentifiable(val kClass: KClass<*>) : IdentifiableMember {\n+ override val name: String = kClass.simpleName ?: kClass.qualifiedName!!\n+ override fun imports() = setOfNotNull(kClass.qualifiedName)\n+ }\n+\n+ private data class NamedIdentifiable(override val name: String) : IdentifiableMember\n+\n+ operator fun invoke(kClass: KClass<*>): IdentifiableMember = KClassIdentifiable(kClass)\n+ operator fun invoke(name: String): IdentifiableMember = NamedIdentifiable(name)\n+ }\n+}\n+\n+/**\n+ * References an interface that can be implemented\n+ */\n+interface GeneratedInterface : IdentifiableMember\n+\n+private data class CastedInterfaceMember(private val member: IdentifiableMember) : GeneratedInterface, IdentifiableMember by member\n+\n+fun IdentifiableMember.asInterface(): GeneratedInterface = CastedInterfaceMember(this)\n+\n+/**\n+ * Generated code that is a type that can be used as an Argument\n+ */\n+interface IdentifiableCode : GeneratedCode, IdentifiableMember\n+\n+/**\n+ * Builder for a provided type whose JVM Name can be set\n+ */\n+interface JVMNameableBuilder {\n+ fun jvmName(name: String)\n+}\n+\n+/**\n+ * Builder for a provided type. It can implement an interface and have some code inside.\n+ */\n+interface GeneratedTypeBuilder : IdentifiableMember {\n+ /**\n+ * Implement a generated interface\n+ */\n+ fun implement(generated: GeneratedInterface)\n+\n+ /**\n+ * Add code to the namespace of the Type\n+ */\n+ fun inner(code: GeneratedCode)", - "comment_created_at": "2020-07-08T10:07:36+00:00", - "comment_author": "ligee", - "comment_body": "I think this is a confusing name, the word `inner` has a different meaning in the Kotlin code.\r\nIn the compiler, we're using `child`/`children` in such places.", - "pr_file_module": null - }, - { - "comment_id": "480104468", - "repo_full_name": "JetBrains/kotlin", - "pr_number": 3516, - "pr_file": "libraries/scripting/type-providers/src/kotlin/script/experimental/typeProviders/generatedCode/GeneratedCode.kt", - "discussion_id": "451431218", - "commented_code": "@@ -0,0 +1,302 @@\n+/*\n+ * Copyright 2010-2020 JetBrains s.r.o. and Kotlin Programming Language contributors.\n+ * Use of this source code is governed by the Apache 2.0 license that can be found in the license/LICENSE.txt file.\n+ */\n+\n+package kotlin.script.experimental.typeProviders.generatedCode\n+\n+import kotlin.reflect.KClass\n+import kotlin.script.experimental.typeProviders.generatedCode.impl.*\n+import kotlin.script.experimental.typeProviders.generatedCode.impl.Object\n+import kotlin.script.experimental.typeProviders.generatedCode.impl.writeJoined\n+import kotlin.script.experimental.typeProviders.generatedCode.internal.Generated\n+import kotlin.script.experimental.typeProviders.generatedCode.internal.visitor.GeneratedCodeVisitor\n+import kotlin.script.experimental.typeProviders.generatedCode.internal.InternalGeneratedCode\n+import kotlin.script.experimental.typeProviders.generatedCode.internal.visit\n+import kotlin.script.experimental.typeProviders.generatedCode.internal.visitor.captureImports\n+import kotlin.script.experimental.typeProviders.generatedCode.internal.write\n+import kotlin.text.StringBuilder\n+\n+/**\n+ * Code returned by a Type Provider.\n+ * [GeneratedCode] is a recursive construction that allows you to build generated code as a combination of multiple types already provided\n+ *\n+ * Example:\n+ *\n+ * ```kotlin\n+ * data class MyObject(val name: String, val value: Int) : GeneratedCode {\n+ * fun body(): GeneratedCode {\n+ * return GeneratedCode.composite {\n+ * +makeDataClass(name)\n+ * .withInterface(MyInterface::class)\n+ *\n+ * +makeConstant(\"$nameConstant\", value)\n+ * }\n+ * }\n+ * }\n+ * ```\n+ *\n+ * [GeneratedCode] can contain any of the following:\n+ * - Data classes. See [makeDataClass].\n+ * - Objects. See [makeObject].\n+ * - Constants. See [makeConstant].\n+ * - Packages. See [makePackage].\n+ * - Extension functions. See [makeExtensionMethod].\n+ * - etc.\n+ */\n+interface GeneratedCode {\n+ /**\n+ * Generate the code that corresponds to your type\n+ *\n+ * @return An instance of [GeneratedCode].\n+ * Important: don't return an infinite recursion in the form of return `this`\n+ */\n+ fun body(): GeneratedCode\n+\n+ /**\n+ * A builder for a composite of multiple types\n+ */\n+ class Builder internal constructor() {\n+ private val mutableCodeList = mutableListOf()\n+\n+ /**\n+ * Include code in your compound\n+ */\n+ operator fun GeneratedCode.unaryPlus() {\n+ if (this is CompoundGeneratedCode) {\n+ mutableCodeList.addAll(codeList)\n+ } else {\n+ mutableCodeList.add(this)\n+ }\n+ }\n+\n+ operator fun String.unaryPlus() {\n+ mutableCodeList.add(inlineCode(this))\n+ }\n+\n+ internal fun build(): GeneratedCode {\n+ return composite(mutableCodeList)\n+ }\n+ }\n+\n+ object Empty : GeneratedCode by EmptyCode()\n+\n+ companion object {\n+ /**\n+ * Create a composition of multiple instances of Generated Code\n+ */\n+ fun composite(init: Builder.() -> Unit) = Builder().apply(init).build()\n+\n+ /**\n+ * Create a composition of multiple instances of Generated Code\n+ */\n+ fun composite(iterable: Collection): GeneratedCode = if (iterable.count() == 1)\n+ iterable.first()\n+ else\n+ CompoundGeneratedCode(iterable.toList())\n+\n+ /**\n+ * Create a composition of multiple instances of Generated Code\n+ */\n+ fun composite(vararg code: GeneratedCode) = composite(code.toList())\n+ }\n+}\n+\n+private data class CompoundGeneratedCode(val codeList: List) : InternalGeneratedCode() {\n+ override fun StringBuilder.write(indent: Int) {\n+ writeJoined(codeList, \"\\n\\n\", indent)\n+ }\n+\n+ override fun GeneratedCodeVisitor.visit() {\n+ for (code in codeList) {\n+ visit(code)\n+ }\n+ }\n+}\n+\n+/**\n+ * References a member that can be used as a type\n+ */\n+interface IdentifiableMember {\n+ val name: String\n+ fun imports(): Set = emptySet()\n+\n+ companion object {\n+ private data class KClassIdentifiable(val kClass: KClass<*>) : IdentifiableMember {\n+ override val name: String = kClass.simpleName ?: kClass.qualifiedName!!\n+ override fun imports() = setOfNotNull(kClass.qualifiedName)\n+ }\n+\n+ private data class NamedIdentifiable(override val name: String) : IdentifiableMember\n+\n+ operator fun invoke(kClass: KClass<*>): IdentifiableMember = KClassIdentifiable(kClass)\n+ operator fun invoke(name: String): IdentifiableMember = NamedIdentifiable(name)\n+ }\n+}\n+\n+/**\n+ * References an interface that can be implemented\n+ */\n+interface GeneratedInterface : IdentifiableMember\n+\n+private data class CastedInterfaceMember(private val member: IdentifiableMember) : GeneratedInterface, IdentifiableMember by member\n+\n+fun IdentifiableMember.asInterface(): GeneratedInterface = CastedInterfaceMember(this)\n+\n+/**\n+ * Generated code that is a type that can be used as an Argument\n+ */\n+interface IdentifiableCode : GeneratedCode, IdentifiableMember\n+\n+/**\n+ * Builder for a provided type whose JVM Name can be set\n+ */\n+interface JVMNameableBuilder {\n+ fun jvmName(name: String)\n+}\n+\n+/**\n+ * Builder for a provided type. It can implement an interface and have some code inside.\n+ */\n+interface GeneratedTypeBuilder : IdentifiableMember {\n+ /**\n+ * Implement a generated interface\n+ */\n+ fun implement(generated: GeneratedInterface)\n+\n+ /**\n+ * Add code to the namespace of the Type\n+ */\n+ fun inner(code: GeneratedCode)", - "comment_created_at": "2020-08-31T12:44:02+00:00", - "comment_author": "nerdsupremacist", - "comment_body": "Got rid of this abstraction by making `ClassLikeBuilder` implement `GeneratedCode.Builder`", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "646189172", - "pr_number": 4432, - "pr_file": "libraries/stdlib/src/kotlin/collections/Collections.kt", - "created_at": "2021-06-06T21:19:33+00:00", - "commented_code": "return -(low + 1) // key not found\n}\n\n/**\n * If this list starts with the given [prefix], returns a view of the portion of this list without [prefix].\n * The returned list is backed by this list, so non-structural changes in the returned list are reflected in this list, and vice-versa.\n *\n * Structural changes in the base list make the behavior of the view undefined.\n */\npublic fun List.removePrefix(prefix: List): List {\n return if (this.startWith(prefix)) {\n this.subList(prefix.size, this.size)\n } else {\n this\n }\n}\n\n/**\n * Returns `true` if this list starts with the specified prefix.\n *\n * For empty [prefix] returns true\n */\npublic fun List.startWith(prefix: List): Boolean {", - "repo_full_name": "JetBrains/kotlin", - "discussion_comments": [ - { - "comment_id": "646189172", - "repo_full_name": "JetBrains/kotlin", - "pr_number": 4432, - "pr_file": "libraries/stdlib/src/kotlin/collections/Collections.kt", - "discussion_id": "646189172", - "commented_code": "@@ -434,6 +434,35 @@ public fun List.binarySearch(fromIndex: Int = 0, toIndex: Int = size, com\n return -(low + 1) // key not found\n }\n \n+/**\n+ * If this list starts with the given [prefix], returns a view of the portion of this list without [prefix].\n+ * The returned list is backed by this list, so non-structural changes in the returned list are reflected in this list, and vice-versa.\n+ *\n+ * Structural changes in the base list make the behavior of the view undefined.\n+ */\n+public fun List.removePrefix(prefix: List): List {\n+ return if (this.startWith(prefix)) {\n+ this.subList(prefix.size, this.size)\n+ } else {\n+ this\n+ }\n+}\n+\n+/**\n+ * Returns `true` if this list starts with the specified prefix.\n+ *\n+ * For empty [prefix] returns true\n+ */\n+public fun List.startWith(prefix: List): Boolean {", - "comment_created_at": "2021-06-06T21:19:33+00:00", - "comment_author": "Serdnad", - "comment_body": "Shouldn't this be \"startsWith\", to be consistent with the current implementation for String?", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1471288707", - "pr_number": 5252, - "pr_file": "plugins/jvm-abi-gen/src/org/jetbrains/kotlin/jvm/abi/JvmAbiCommandLineProcessor.kt", - "created_at": "2024-01-30T14:09:36+00:00", - "commented_code": "\"outside of the compilation unit where it's declared.\",\n false,\n )\n\n val DO_NOT_SORT_MEMBERS_OPTION: CliOption =", - "repo_full_name": "JetBrains/kotlin", - "discussion_comments": [ - { - "comment_id": "1471288707", - "repo_full_name": "JetBrains/kotlin", - "pr_number": 5252, - "pr_file": "plugins/jvm-abi-gen/src/org/jetbrains/kotlin/jvm/abi/JvmAbiCommandLineProcessor.kt", - "discussion_id": "1471288707", - "commented_code": "@@ -42,13 +42,27 @@ class JvmAbiCommandLineProcessor : CommandLineProcessor {\n \"outside of the compilation unit where it's declared.\",\n false,\n )\n+\n+ val DO_NOT_SORT_MEMBERS_OPTION: CliOption =", - "comment_created_at": "2024-01-30T14:09:36+00:00", - "comment_author": "madsager", - "comment_body": "I personally have a preference for flag names without negations, so I would vote for `sortMembers` with a default of true and a comment stating that turning that off reduces stability of the ABI jar.", - "pr_file_module": null - }, - { - "comment_id": "1471367973", - "repo_full_name": "JetBrains/kotlin", - "pr_number": 5252, - "pr_file": "plugins/jvm-abi-gen/src/org/jetbrains/kotlin/jvm/abi/JvmAbiCommandLineProcessor.kt", - "discussion_id": "1471288707", - "commented_code": "@@ -42,13 +42,27 @@ class JvmAbiCommandLineProcessor : CommandLineProcessor {\n \"outside of the compilation unit where it's declared.\",\n false,\n )\n+\n+ val DO_NOT_SORT_MEMBERS_OPTION: CliOption =", - "comment_created_at": "2024-01-30T14:50:27+00:00", - "comment_author": "udalov", - "comment_body": "Oh... My preference is flags which are _false by default_... \ud83d\ude43 For some reason it's easier for me to get a model of a \"default\" behavior of the system, just looking at the names of the flags, as opposed to names + their default values.", - "pr_file_module": null - }, - { - "comment_id": "1471379505", - "repo_full_name": "JetBrains/kotlin", - "pr_number": 5252, - "pr_file": "plugins/jvm-abi-gen/src/org/jetbrains/kotlin/jvm/abi/JvmAbiCommandLineProcessor.kt", - "discussion_id": "1471288707", - "commented_code": "@@ -42,13 +42,27 @@ class JvmAbiCommandLineProcessor : CommandLineProcessor {\n \"outside of the compilation unit where it's declared.\",\n false,\n )\n+\n+ val DO_NOT_SORT_MEMBERS_OPTION: CliOption =", - "comment_created_at": "2024-01-30T14:56:48+00:00", - "comment_author": "madsager", - "comment_body": "!doNotShipIt \ud83e\udd23 ", - "pr_file_module": null - }, - { - "comment_id": "1471380582", - "repo_full_name": "JetBrains/kotlin", - "pr_number": 5252, - "pr_file": "plugins/jvm-abi-gen/src/org/jetbrains/kotlin/jvm/abi/JvmAbiCommandLineProcessor.kt", - "discussion_id": "1471288707", - "commented_code": "@@ -42,13 +42,27 @@ class JvmAbiCommandLineProcessor : CommandLineProcessor {\n \"outside of the compilation unit where it's declared.\",\n false,\n )\n+\n+ val DO_NOT_SORT_MEMBERS_OPTION: CliOption =", - "comment_created_at": "2024-01-30T14:57:24+00:00", - "comment_author": "madsager", - "comment_body": "More seriously, as long as there is a flag I'm happy! \ud83d\udc4d ", - "pr_file_module": null - }, - { - "comment_id": "1471710838", - "repo_full_name": "JetBrains/kotlin", - "pr_number": 5252, - "pr_file": "plugins/jvm-abi-gen/src/org/jetbrains/kotlin/jvm/abi/JvmAbiCommandLineProcessor.kt", - "discussion_id": "1471288707", - "commented_code": "@@ -42,13 +42,27 @@ class JvmAbiCommandLineProcessor : CommandLineProcessor {\n \"outside of the compilation unit where it's declared.\",\n false,\n )\n+\n+ val DO_NOT_SORT_MEMBERS_OPTION: CliOption =", - "comment_created_at": "2024-01-30T17:59:51+00:00", - "comment_author": "Tagakov", - "comment_body": "What about `preserveDeclarationsOrder` ? I can rename the flag and make it disable sorting for inner classes as well to be consistent.", - "pr_file_module": null - }, - { - "comment_id": "1473159544", - "repo_full_name": "JetBrains/kotlin", - "pr_number": 5252, - "pr_file": "plugins/jvm-abi-gen/src/org/jetbrains/kotlin/jvm/abi/JvmAbiCommandLineProcessor.kt", - "discussion_id": "1471288707", - "commented_code": "@@ -42,13 +42,27 @@ class JvmAbiCommandLineProcessor : CommandLineProcessor {\n \"outside of the compilation unit where it's declared.\",\n false,\n )\n+\n+ val DO_NOT_SORT_MEMBERS_OPTION: CliOption =", - "comment_created_at": "2024-01-31T16:58:29+00:00", - "comment_author": "udalov", - "comment_body": "`preserveDeclarationsOrder` sounds great. `preserveDeclarationOrder` would be even better.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1440148268", - "pr_number": 5232, - "pr_file": "plugins/parcelize/parcelize-compiler/parcelize.backend/src/org/jetbrains/kotlin/parcelize/AndroidSymbols.kt", - "created_at": "2024-01-03T07:36:17+00:00", - "commented_code": "}\n }.symbol\n\n private val kotlinDuration: IrClassSymbol = createClass(", - "repo_full_name": "JetBrains/kotlin", - "discussion_comments": [ - { - "comment_id": "1440148268", - "repo_full_name": "JetBrains/kotlin", - "pr_number": 5232, - "pr_file": "plugins/parcelize/parcelize-compiler/parcelize.backend/src/org/jetbrains/kotlin/parcelize/AndroidSymbols.kt", - "discussion_id": "1440148268", - "commented_code": "@@ -185,6 +186,12 @@ class AndroidSymbols(\n }\n }.symbol\n \n+ private val kotlinDuration: IrClassSymbol = createClass(", - "comment_created_at": "2024-01-03T07:36:17+00:00", - "comment_author": "madsager", - "comment_body": "Maybe call this `kotlinTimeDuration` to follow other naming such as `androidOsParcel`?", - "pr_file_module": null - }, - { - "comment_id": "1440481155", - "repo_full_name": "JetBrains/kotlin", - "pr_number": 5232, - "pr_file": "plugins/parcelize/parcelize-compiler/parcelize.backend/src/org/jetbrains/kotlin/parcelize/AndroidSymbols.kt", - "discussion_id": "1440148268", - "commented_code": "@@ -185,6 +186,12 @@ class AndroidSymbols(\n }\n }.symbol\n \n+ private val kotlinDuration: IrClassSymbol = createClass(", - "comment_created_at": "2024-01-03T13:57:31+00:00", - "comment_author": "gala377", - "comment_body": "Done", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1051654064", - "pr_number": 5046, - "pr_file": "compiler/ir/backend.jvm/codegen/src/org/jetbrains/kotlin/backend/jvm/codegen/irCodegenUtils.kt", - "created_at": "2022-12-18T19:34:39+00:00", - "commented_code": "val property = callee.correspondingPropertySymbol?.owner ?: return false\n return property.isDeprecatedCallable(context)\n}\n\nval IrClass.getReifiedTypeParameters: ReifiedTypeParametersUsages", - "repo_full_name": "JetBrains/kotlin", - "discussion_comments": [ - { - "comment_id": "1051654064", - "repo_full_name": "JetBrains/kotlin", - "pr_number": 5046, - "pr_file": "compiler/ir/backend.jvm/codegen/src/org/jetbrains/kotlin/backend/jvm/codegen/irCodegenUtils.kt", - "discussion_id": "1051654064", - "commented_code": "@@ -320,3 +317,27 @@ private fun IrFunction.isAccessorForDeprecatedJvmStaticProperty(context: JvmBack\n val property = callee.correspondingPropertySymbol?.owner ?: return false\n return property.isDeprecatedCallable(context)\n }\n+\n+val IrClass.getReifiedTypeParameters: ReifiedTypeParametersUsages", - "comment_created_at": "2022-12-18T19:34:39+00:00", - "comment_author": "ilmirus", - "comment_body": "Please, stick to official code style - `getReifiedTypeParameters` is a function, if you use a property, use `reifiedTypeParameters`.", - "pr_file_module": null - }, - { - "comment_id": "1051770691", - "repo_full_name": "JetBrains/kotlin", - "pr_number": 5046, - "pr_file": "compiler/ir/backend.jvm/codegen/src/org/jetbrains/kotlin/backend/jvm/codegen/irCodegenUtils.kt", - "discussion_id": "1051654064", - "commented_code": "@@ -320,3 +317,27 @@ private fun IrFunction.isAccessorForDeprecatedJvmStaticProperty(context: JvmBack\n val property = callee.correspondingPropertySymbol?.owner ?: return false\n return property.isDeprecatedCallable(context)\n }\n+\n+val IrClass.getReifiedTypeParameters: ReifiedTypeParametersUsages", - "comment_created_at": "2022-12-19T03:51:10+00:00", - "comment_author": "larryxiao625", - "comment_body": "Thanks for you suggestion, I have modified it~. Could you please review again :)", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "695778850", - "pr_number": 4535, - "pr_file": "core/compiler.common.jvm/src/org/jetbrains/kotlin/load/java/AbstractAnnotationTypeQualifierResolver.kt", - "created_at": "2021-08-25T13:56:30+00:00", - "commented_code": "/*\n * Copyright 2010-2021 JetBrains s.r.o. and Kotlin Programming Language contributors.\n * Use of this source code is governed by the Apache 2.0 license that can be found in the license/LICENSE.txt file.\n */\n\npackage org.jetbrains.kotlin.load.java\n\nimport org.jetbrains.kotlin.builtins.StandardNames\nimport org.jetbrains.kotlin.descriptors.annotations.KotlinTarget\nimport org.jetbrains.kotlin.name.FqName\nimport java.util.concurrent.ConcurrentHashMap\n\ntypealias TypeQualifierWithApplicability = Pair>", - "repo_full_name": "JetBrains/kotlin", - "discussion_comments": [ - { - "comment_id": "695778850", - "repo_full_name": "JetBrains/kotlin", - "pr_number": 4535, - "pr_file": "core/compiler.common.jvm/src/org/jetbrains/kotlin/load/java/AbstractAnnotationTypeQualifierResolver.kt", - "discussion_id": "695778850", - "commented_code": "@@ -0,0 +1,113 @@\n+/*\n+ * Copyright 2010-2021 JetBrains s.r.o. and Kotlin Programming Language contributors.\n+ * Use of this source code is governed by the Apache 2.0 license that can be found in the license/LICENSE.txt file.\n+ */\n+\n+package org.jetbrains.kotlin.load.java\n+\n+import org.jetbrains.kotlin.builtins.StandardNames\n+import org.jetbrains.kotlin.descriptors.annotations.KotlinTarget\n+import org.jetbrains.kotlin.name.FqName\n+import java.util.concurrent.ConcurrentHashMap\n+\n+typealias TypeQualifierWithApplicability = Pair>", - "comment_created_at": "2021-08-25T13:56:30+00:00", - "comment_author": "dzharkov", - "comment_body": "I don't insist here, but we're trying to use one-capital-letter names for type parameters (e.g. `A` here)\nSame for AbstractAnnotationTypeQualifierResolver\n", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "454246644", - "pr_number": 3544, - "pr_file": "libraries/scripting/dependencies/src/kotlin/script/experimental/dependencies/resolverNamedOptions.kt", - "created_at": "2020-07-14T10:05:07+00:00", - "commented_code": "/*\n * Copyright 2010-2020 JetBrains s.r.o. and Kotlin Programming Language contributors.\n * Use of this source code is governed by the Apache 2.0 license that can be found in the license/LICENSE.txt file.\n */\n\npackage kotlin.script.experimental.dependencies\n\nfun ExternalDependenciesResolver.Options.value(name: DependenciesResolverOptionsName) =\n value(name.key)\n\nfun ExternalDependenciesResolver.Options.flag(name: DependenciesResolverOptionsName) =\n flag(name.key)\n\noperator fun MutableMap.set(key: DependenciesResolverOptionsName, value: String) {\n put(key.key, value)\n}\n\n\nenum class DependenciesResolverOptionsName {\n TRANSITIVE,\n DEPENDENCY_SCOPES;", - "repo_full_name": "JetBrains/kotlin", - "discussion_comments": [ - { - "comment_id": "454246644", - "repo_full_name": "JetBrains/kotlin", - "pr_number": 3544, - "pr_file": "libraries/scripting/dependencies/src/kotlin/script/experimental/dependencies/resolverNamedOptions.kt", - "discussion_id": "454246644", - "commented_code": "@@ -0,0 +1,31 @@\n+/*\n+ * Copyright 2010-2020 JetBrains s.r.o. and Kotlin Programming Language contributors.\n+ * Use of this source code is governed by the Apache 2.0 license that can be found in the license/LICENSE.txt file.\n+ */\n+\n+package kotlin.script.experimental.dependencies\n+\n+fun ExternalDependenciesResolver.Options.value(name: DependenciesResolverOptionsName) =\n+ value(name.key)\n+\n+fun ExternalDependenciesResolver.Options.flag(name: DependenciesResolverOptionsName) =\n+ flag(name.key)\n+\n+operator fun MutableMap.set(key: DependenciesResolverOptionsName, value: String) {\n+ put(key.key, value)\n+}\n+\n+\n+enum class DependenciesResolverOptionsName {\n+ TRANSITIVE,\n+ DEPENDENCY_SCOPES;", - "comment_created_at": "2020-07-14T10:05:07+00:00", - "comment_author": "ligee", - "comment_body": "Using underscores seems unusual for the maven configuration, and in maven this parameter called just `scope`, so I'd rename it, so the people can guess the param name easier.\r\nIf we want to have the naming flexibility - I'd add the string name parameter to the enum (and use `toLowerCase` as a default value).", - "pr_file_module": null - }, - { - "comment_id": "454329232", - "repo_full_name": "JetBrains/kotlin", - "pr_number": 3544, - "pr_file": "libraries/scripting/dependencies/src/kotlin/script/experimental/dependencies/resolverNamedOptions.kt", - "discussion_id": "454246644", - "commented_code": "@@ -0,0 +1,31 @@\n+/*\n+ * Copyright 2010-2020 JetBrains s.r.o. and Kotlin Programming Language contributors.\n+ * Use of this source code is governed by the Apache 2.0 license that can be found in the license/LICENSE.txt file.\n+ */\n+\n+package kotlin.script.experimental.dependencies\n+\n+fun ExternalDependenciesResolver.Options.value(name: DependenciesResolverOptionsName) =\n+ value(name.key)\n+\n+fun ExternalDependenciesResolver.Options.flag(name: DependenciesResolverOptionsName) =\n+ flag(name.key)\n+\n+operator fun MutableMap.set(key: DependenciesResolverOptionsName, value: String) {\n+ put(key.key, value)\n+}\n+\n+\n+enum class DependenciesResolverOptionsName {\n+ TRANSITIVE,\n+ DEPENDENCY_SCOPES;", - "comment_created_at": "2020-07-14T12:45:42+00:00", - "comment_author": "ileasile", - "comment_body": "Fixed this name and added a parameter", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "632467374", - "pr_number": 4383, - "pr_file": "libraries/scripting/dependencies/src/kotlin/script/experimental/dependencies/impl/resolverNamedOptions.kt", - "created_at": "2021-05-14T11:33:55+00:00", - "commented_code": "*/\nenum class DependenciesResolverOptionsName(optionName: String? = null) {\n TRANSITIVE,\n SCOPE;\n SCOPE,\n USERNAME,\n PASSWORD,\n PRIVATE_KEY_FILE,", - "repo_full_name": "JetBrains/kotlin", - "discussion_comments": [ - { - "comment_id": "632467374", - "repo_full_name": "JetBrains/kotlin", - "pr_number": 4383, - "pr_file": "libraries/scripting/dependencies/src/kotlin/script/experimental/dependencies/impl/resolverNamedOptions.kt", - "discussion_id": "632467374", - "commented_code": "@@ -23,7 +23,11 @@ operator fun MutableMap.set(key: DependenciesResolverOptionsName\n */\n enum class DependenciesResolverOptionsName(optionName: String? = null) {\n TRANSITIVE,\n- SCOPE;\n+ SCOPE,\n+ USERNAME,\n+ PASSWORD,\n+ PRIVATE_KEY_FILE,", - "comment_created_at": "2021-05-14T11:33:55+00:00", - "comment_author": "ligee", - "comment_body": "I don't like the names, taking into account that they will be used (in the lowercased form) in the annotations in the actual scripts. But I don't have much better suggestions either. Maybe `AUTH_KEY_FILE`/`AUTH_KEY_PASSPHRASE` will be a bit better, but I'm not sure. Maybe we should discuss it.", - "pr_file_module": null - }, - { - "comment_id": "632602786", - "repo_full_name": "JetBrains/kotlin", - "pr_number": 4383, - "pr_file": "libraries/scripting/dependencies/src/kotlin/script/experimental/dependencies/impl/resolverNamedOptions.kt", - "discussion_id": "632467374", - "commented_code": "@@ -23,7 +23,11 @@ operator fun MutableMap.set(key: DependenciesResolverOptionsName\n */\n enum class DependenciesResolverOptionsName(optionName: String? = null) {\n TRANSITIVE,\n- SCOPE;\n+ SCOPE,\n+ USERNAME,\n+ PASSWORD,\n+ PRIVATE_KEY_FILE,", - "comment_created_at": "2021-05-14T15:19:36+00:00", - "comment_author": "ileasile", - "comment_body": "I'm ok with the names you suggested, maybe it's even worth getting rid of `AUTH_` prefix", - "pr_file_module": null - }, - { - "comment_id": "633292977", - "repo_full_name": "JetBrains/kotlin", - "pr_number": 4383, - "pr_file": "libraries/scripting/dependencies/src/kotlin/script/experimental/dependencies/impl/resolverNamedOptions.kt", - "discussion_id": "632467374", - "commented_code": "@@ -23,7 +23,11 @@ operator fun MutableMap.set(key: DependenciesResolverOptionsName\n */\n enum class DependenciesResolverOptionsName(optionName: String? = null) {\n TRANSITIVE,\n- SCOPE;\n+ SCOPE,\n+ USERNAME,\n+ PASSWORD,\n+ PRIVATE_KEY_FILE,", - "comment_created_at": "2021-05-17T07:41:19+00:00", - "comment_author": "ligee", - "comment_body": "Ok, let it be `KEY_FILE`/`KEY_PASSPHRASE`", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "441753939", - "pr_number": 3486, - "pr_file": "compiler/frontend/src/org/jetbrains/kotlin/resolve/extensions/SyntheticResolveExtension.kt", - "created_at": "2020-06-17T18:44:52+00:00", - "commented_code": "fun getSyntheticNestedClassNames(thisDescriptor: ClassDescriptor): List = emptyList()\n\n /**\n * This method should return either superset of what [getSyntheticNestedClassNames] returns,\n * or null in case it needs to run resolution and inference and/or it is very costly.\n * Override this method if resolution started to fail with recursion.\n */\n fun maybeGetSyntheticNestedClassNames(thisDescriptor: ClassDescriptor): List? = getSyntheticNestedClassNames(thisDescriptor)", - "repo_full_name": "JetBrains/kotlin", - "discussion_comments": [ - { - "comment_id": "441753939", - "repo_full_name": "JetBrains/kotlin", - "pr_number": 3486, - "pr_file": "compiler/frontend/src/org/jetbrains/kotlin/resolve/extensions/SyntheticResolveExtension.kt", - "discussion_id": "441753939", - "commented_code": "@@ -136,6 +144,13 @@ interface SyntheticResolveExtension {\n \n fun getSyntheticNestedClassNames(thisDescriptor: ClassDescriptor): List = emptyList()\n \n+ /**\n+ * This method should return either superset of what [getSyntheticNestedClassNames] returns,\n+ * or null in case it needs to run resolution and inference and/or it is very costly.\n+ * Override this method if resolution started to fail with recursion.\n+ */\n+ fun maybeGetSyntheticNestedClassNames(thisDescriptor: ClassDescriptor): List? = getSyntheticNestedClassNames(thisDescriptor)", - "comment_created_at": "2020-06-17T18:44:52+00:00", - "comment_author": "sandwwraith", - "comment_body": "imo, `getPossibleSyntheticNestedClassNames` is a slightly better name for this method", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/kotlin-prefer-descriptive-errors.json b/_reviewers/kotlin-prefer-descriptive-errors.json new file mode 100644 index 0000000..b56a401 --- /dev/null +++ b/_reviewers/kotlin-prefer-descriptive-errors.json @@ -0,0 +1,206 @@ +[ + { + "discussion_id": "1045585301", + "pr_number": 5044, + "pr_file": "compiler/ir/backend.jvm/src/org/jetbrains/kotlin/backend/jvm/JvmCachedDeclarations.kt", + "created_at": "2022-12-12T09:31:15+00:00", + "commented_code": "}\n }\n\n fun getStaticCompanionReplacementDeclaration(jvmStaticFunction: IrSimpleFunction): IrSimpleFunction =\n staticCompanionReplacementDeclarations.getOrPut(jvmStaticFunction) {\n val companion = jvmStaticFunction.parentAsClass\n assert(companion.isCompanion)", + "repo_full_name": "JetBrains/kotlin", + "discussion_comments": [ + { + "comment_id": "1045585301", + "repo_full_name": "JetBrains/kotlin", + "pr_number": 5044, + "pr_file": "compiler/ir/backend.jvm/src/org/jetbrains/kotlin/backend/jvm/JvmCachedDeclarations.kt", + "discussion_id": "1045585301", + "commented_code": "@@ -182,6 +186,29 @@ class JvmCachedDeclarations(\n }\n }\n \n+ fun getStaticCompanionReplacementDeclaration(jvmStaticFunction: IrSimpleFunction): IrSimpleFunction =\n+ staticCompanionReplacementDeclarations.getOrPut(jvmStaticFunction) {\n+ val companion = jvmStaticFunction.parentAsClass\n+ assert(companion.isCompanion)", + "comment_created_at": "2022-12-12T09:31:15+00:00", + "comment_author": "ilmirus", + "comment_body": "Please, add lambda to `assert` call, otherwise, there will be unhelpful `assertion failed` without further explanation.", + "pr_file_module": null + }, + { + "comment_id": "1046202170", + "repo_full_name": "JetBrains/kotlin", + "pr_number": 5044, + "pr_file": "compiler/ir/backend.jvm/src/org/jetbrains/kotlin/backend/jvm/JvmCachedDeclarations.kt", + "discussion_id": "1045585301", + "commented_code": "@@ -182,6 +186,29 @@ class JvmCachedDeclarations(\n }\n }\n \n+ fun getStaticCompanionReplacementDeclaration(jvmStaticFunction: IrSimpleFunction): IrSimpleFunction =\n+ staticCompanionReplacementDeclarations.getOrPut(jvmStaticFunction) {\n+ val companion = jvmStaticFunction.parentAsClass\n+ assert(companion.isCompanion)", + "comment_created_at": "2022-12-12T18:06:55+00:00", + "comment_author": "BomBardyGamer", + "comment_body": "Apologies. This was done this way because this is what the method above it (which gets the static proxy from cache) does. I'll add this.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1665765123", + "pr_number": 5306, + "pr_file": "compiler/cli/cli-base/src/org/jetbrains/kotlin/cli/jvm/modules/CliJavaModuleFinder.kt", + "created_at": "2024-07-04T14:08:01+00:00", + "commented_code": "private fun findSystemModule(moduleRoot: VirtualFile, useSig: Boolean = false): JavaModule.Explicit? {\n val file = moduleRoot.findChild(if (useSig) PsiJavaModule.MODULE_INFO_CLASS + \".sig\" else PsiJavaModule.MODULE_INFO_CLS_FILE)\n ?: return null\n val moduleInfo = JavaModuleInfo.read(file, javaFileManager, allScope) ?: return null\n val moduleInfo = try {\n JavaModuleInfo.read(file, javaFileManager, allScope) ?: return null\n } catch (e: IllegalStateException) {\n reportError(e.message!!)\n throw CompilationErrorException()", + "repo_full_name": "JetBrains/kotlin", + "discussion_comments": [ + { + "comment_id": "1665765123", + "repo_full_name": "JetBrains/kotlin", + "pr_number": 5306, + "pr_file": "compiler/cli/cli-base/src/org/jetbrains/kotlin/cli/jvm/modules/CliJavaModuleFinder.kt", + "discussion_id": "1665765123", + "commented_code": "@@ -119,7 +120,13 @@ class CliJavaModuleFinder(\n private fun findSystemModule(moduleRoot: VirtualFile, useSig: Boolean = false): JavaModule.Explicit? {\n val file = moduleRoot.findChild(if (useSig) PsiJavaModule.MODULE_INFO_CLASS + \".sig\" else PsiJavaModule.MODULE_INFO_CLS_FILE)\n ?: return null\n- val moduleInfo = JavaModuleInfo.read(file, javaFileManager, allScope) ?: return null\n+ val moduleInfo = try {\n+ JavaModuleInfo.read(file, javaFileManager, allScope) ?: return null\n+ } catch (e: IllegalStateException) {\n+ reportError(e.message!!)\n+ throw CompilationErrorException()", + "comment_created_at": "2024-07-04T14:08:01+00:00", + "comment_author": "udalov", + "comment_body": "This way we'll lose all important information from `e.cause` that would help us in diagnosing the problem. How about we at least append something like `\"\\nCaused by: ${e.cause::class.java.name}: ${e.cause.message}\"` to the error?", + "pr_file_module": null + }, + { + "comment_id": "1666502069", + "repo_full_name": "JetBrains/kotlin", + "pr_number": 5306, + "pr_file": "compiler/cli/cli-base/src/org/jetbrains/kotlin/cli/jvm/modules/CliJavaModuleFinder.kt", + "discussion_id": "1665765123", + "commented_code": "@@ -119,7 +120,13 @@ class CliJavaModuleFinder(\n private fun findSystemModule(moduleRoot: VirtualFile, useSig: Boolean = false): JavaModule.Explicit? {\n val file = moduleRoot.findChild(if (useSig) PsiJavaModule.MODULE_INFO_CLASS + \".sig\" else PsiJavaModule.MODULE_INFO_CLS_FILE)\n ?: return null\n- val moduleInfo = JavaModuleInfo.read(file, javaFileManager, allScope) ?: return null\n+ val moduleInfo = try {\n+ JavaModuleInfo.read(file, javaFileManager, allScope) ?: return null\n+ } catch (e: IllegalStateException) {\n+ reportError(e.message!!)\n+ throw CompilationErrorException()", + "comment_created_at": "2024-07-05T08:25:11+00:00", + "comment_author": "scaventz", + "comment_body": "Agree, updated.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1422626599", + "pr_number": 5220, + "pr_file": "compiler/ir/backend.js/src/org/jetbrains/kotlin/ir/backend/js/transformers/irToJs/JsIntrinsicTransformers.kt", + "created_at": "2023-12-11T15:13:19+00:00", + "commented_code": "}\n\n for ((type, prefix) in intrinsics.primitiveToTypedArrayMap) {\n add(intrinsics.primitiveToSizeConstructor[type]!!) { call, context ->\n add(intrinsics.primitiveToSizeConstructor[type] ?: doError(intrinsics.primitiveToSizeConstructor)) { call, context ->\n JsNew(JsNameRef(\"${prefix}Array\"), translateCallArguments(call, context))\n }\n add(intrinsics.primitiveToLiteralConstructor[type]!!) { call, context ->\n add(intrinsics.primitiveToLiteralConstructor[type] ?: doError(intrinsics.primitiveToSizeConstructor)) { call, context ->", + "repo_full_name": "JetBrains/kotlin", + "discussion_comments": [ + { + "comment_id": "1422626599", + "repo_full_name": "JetBrains/kotlin", + "pr_number": 5220, + "pr_file": "compiler/ir/backend.js/src/org/jetbrains/kotlin/ir/backend/js/transformers/irToJs/JsIntrinsicTransformers.kt", + "discussion_id": "1422626599", + "commented_code": "@@ -167,17 +170,17 @@ class JsIntrinsicTransformers(backendContext: JsIrBackendContext) {\n }\n \n for ((type, prefix) in intrinsics.primitiveToTypedArrayMap) {\n- add(intrinsics.primitiveToSizeConstructor[type]!!) { call, context ->\n+ add(intrinsics.primitiveToSizeConstructor[type] ?: doError(intrinsics.primitiveToSizeConstructor)) { call, context ->\n JsNew(JsNameRef(\"${prefix}Array\"), translateCallArguments(call, context))\n }\n- add(intrinsics.primitiveToLiteralConstructor[type]!!) { call, context ->\n+ add(intrinsics.primitiveToLiteralConstructor[type] ?: doError(intrinsics.primitiveToSizeConstructor)) { call, context ->", + "comment_created_at": "2023-12-11T15:13:19+00:00", + "comment_author": "udalov", + "comment_body": "I don't think these will ever fail", + "pr_file_module": null + }, + { + "comment_id": "1422935649", + "repo_full_name": "JetBrains/kotlin", + "pr_number": 5220, + "pr_file": "compiler/ir/backend.js/src/org/jetbrains/kotlin/ir/backend/js/transformers/irToJs/JsIntrinsicTransformers.kt", + "discussion_id": "1422626599", + "commented_code": "@@ -167,17 +170,17 @@ class JsIntrinsicTransformers(backendContext: JsIrBackendContext) {\n }\n \n for ((type, prefix) in intrinsics.primitiveToTypedArrayMap) {\n- add(intrinsics.primitiveToSizeConstructor[type]!!) { call, context ->\n+ add(intrinsics.primitiveToSizeConstructor[type] ?: doError(intrinsics.primitiveToSizeConstructor)) { call, context ->\n JsNew(JsNameRef(\"${prefix}Array\"), translateCallArguments(call, context))\n }\n- add(intrinsics.primitiveToLiteralConstructor[type]!!) { call, context ->\n+ add(intrinsics.primitiveToLiteralConstructor[type] ?: doError(intrinsics.primitiveToSizeConstructor)) { call, context ->", + "comment_created_at": "2023-12-11T18:24:39+00:00", + "comment_author": "InsanusMokrassar", + "comment_body": "I think it is related to a lot of lines in this file, but I already faced some issues in compiler without adequate message or environment info before several times", + "pr_file_module": null + }, + { + "comment_id": "1423852356", + "repo_full_name": "JetBrains/kotlin", + "pr_number": 5220, + "pr_file": "compiler/ir/backend.js/src/org/jetbrains/kotlin/ir/backend/js/transformers/irToJs/JsIntrinsicTransformers.kt", + "discussion_id": "1422626599", + "commented_code": "@@ -167,17 +170,17 @@ class JsIntrinsicTransformers(backendContext: JsIrBackendContext) {\n }\n \n for ((type, prefix) in intrinsics.primitiveToTypedArrayMap) {\n- add(intrinsics.primitiveToSizeConstructor[type]!!) { call, context ->\n+ add(intrinsics.primitiveToSizeConstructor[type] ?: doError(intrinsics.primitiveToSizeConstructor)) { call, context ->\n JsNew(JsNameRef(\"${prefix}Array\"), translateCallArguments(call, context))\n }\n- add(intrinsics.primitiveToLiteralConstructor[type]!!) { call, context ->\n+ add(intrinsics.primitiveToLiteralConstructor[type] ?: doError(intrinsics.primitiveToSizeConstructor)) { call, context ->", + "comment_created_at": "2023-12-12T11:21:58+00:00", + "comment_author": "udalov", + "comment_body": "I'm not convinced. I don't see a reason to add effectively dead code.", + "pr_file_module": null + }, + { + "comment_id": "1424189222", + "repo_full_name": "JetBrains/kotlin", + "pr_number": 5220, + "pr_file": "compiler/ir/backend.js/src/org/jetbrains/kotlin/ir/backend/js/transformers/irToJs/JsIntrinsicTransformers.kt", + "discussion_id": "1422626599", + "commented_code": "@@ -167,17 +170,17 @@ class JsIntrinsicTransformers(backendContext: JsIrBackendContext) {\n }\n \n for ((type, prefix) in intrinsics.primitiveToTypedArrayMap) {\n- add(intrinsics.primitiveToSizeConstructor[type]!!) { call, context ->\n+ add(intrinsics.primitiveToSizeConstructor[type] ?: doError(intrinsics.primitiveToSizeConstructor)) { call, context ->\n JsNew(JsNameRef(\"${prefix}Array\"), translateCallArguments(call, context))\n }\n- add(intrinsics.primitiveToLiteralConstructor[type]!!) { call, context ->\n+ add(intrinsics.primitiveToLiteralConstructor[type] ?: doError(intrinsics.primitiveToSizeConstructor)) { call, context ->", + "comment_created_at": "2023-12-12T15:34:57+00:00", + "comment_author": "InsanusMokrassar", + "comment_body": "So, may you guarantee somehow that this code will not be used? :) As I have said, in the whole file were a lot of `!!` which means `it is potential thrown error without any useful info`. I just have added some explanation to these things. In case you have some suggestion better than use `!!` or `?: doError`, I would be glad to see it and even realize, I think", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1265437800", + "pr_number": 5158, + "pr_file": "compiler/ir/backend.jvm/codegen/src/org/jetbrains/kotlin/backend/jvm/codegen/AnnotationCodegen.kt", + "created_at": "2023-07-17T14:17:40+00:00", + "commented_code": "else if (param.defaultValue != null)\n continue // Default value will be supplied by JVM at runtime.\n else if (context.state.classBuilderMode.generateBodies) //skip error for KAPT\n error(\"No value for annotation parameter $param\")\n error(\"No value for annotation parameter ${param.dumpKotlinLike()}\")", + "repo_full_name": "JetBrains/kotlin", + "discussion_comments": [ + { + "comment_id": "1265437800", + "repo_full_name": "JetBrains/kotlin", + "pr_number": 5158, + "pr_file": "compiler/ir/backend.jvm/codegen/src/org/jetbrains/kotlin/backend/jvm/codegen/AnnotationCodegen.kt", + "discussion_id": "1265437800", + "commented_code": "@@ -216,7 +216,7 @@ abstract class AnnotationCodegen(\n else if (param.defaultValue != null)\n continue // Default value will be supplied by JVM at runtime.\n else if (context.state.classBuilderMode.generateBodies) //skip error for KAPT\n- error(\"No value for annotation parameter $param\")\n+ error(\"No value for annotation parameter ${param.dumpKotlinLike()}\")", + "comment_created_at": "2023-07-17T14:17:40+00:00", + "comment_author": "udalov", + "comment_body": "Please use `render()` instead, as it's more resilient to errors and more tested (you wouldn't want another exception to happen when you're rendering an exception message)", + "pr_file_module": null + }, + { + "comment_id": "1265478745", + "repo_full_name": "JetBrains/kotlin", + "pr_number": 5158, + "pr_file": "compiler/ir/backend.jvm/codegen/src/org/jetbrains/kotlin/backend/jvm/codegen/AnnotationCodegen.kt", + "discussion_id": "1265437800", + "commented_code": "@@ -216,7 +216,7 @@ abstract class AnnotationCodegen(\n else if (param.defaultValue != null)\n continue // Default value will be supplied by JVM at runtime.\n else if (context.state.classBuilderMode.generateBodies) //skip error for KAPT\n- error(\"No value for annotation parameter $param\")\n+ error(\"No value for annotation parameter ${param.dumpKotlinLike()}\")", + "comment_created_at": "2023-07-17T14:45:23+00:00", + "comment_author": "JavierSegoviaCordoba", + "comment_body": "Done!", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1035132877", + "pr_number": 5030, + "pr_file": "compiler/fir/semantics/src/org/jetbrains/kotlin/fir/resolve/dfa/PersistentLogicSystem.kt", + "created_at": "2022-11-29T18:36:32+00:00", + "commented_code": "override fun getTypeStatement(variable: RealVariable): TypeStatement? =\n approvedTypeStatements[unwrapVariable(variable)]?.copy(variable = variable)\n\n fun lowestCommonAncestor(other: PersistentFlow): PersistentFlow? {\n var left = this\n var right = other\n while (left.level > right.level) {\n left = left.previousFlow ?: return null\n }\n while (right.level > left.level) {\n right = right.previousFlow ?: return null\n }\n while (left != right) {\n left = left.previousFlow ?: return null\n right = right.previousFlow ?: return null\n }\n return left\n }\n\n fun fork(): MutableFlow = MutableFlow(\n this,\n approvedTypeStatements.builder(),\n logicStatements.builder(),\n assignmentIndex.builder(),\n directAliasMap.builder(),\n backwardsAliasMap.builder(),\n )\n}\n\nabstract class PersistentLogicSystem(context: ConeInferenceContext) : LogicSystem(context) {\n abstract val variableStorage: VariableStorageImpl\nclass MutableFlow internal constructor(\n private val previousFlow: PersistentFlow?,\n internal val approvedTypeStatements: PersistentMap.Builder,\n internal val logicStatements: PersistentMap.Builder>,\n internal val assignmentIndex: PersistentMap.Builder,\n internal val directAliasMap: PersistentMap.Builder,\n internal val backwardsAliasMap: PersistentMap.Builder>,\n) : Flow {\n constructor() : this(\n null,\n emptyPersistentHashMapBuilder(),\n emptyPersistentHashMapBuilder(),\n emptyPersistentHashMapBuilder(),\n emptyPersistentHashMapBuilder(),\n emptyPersistentHashMapBuilder(),\n )\n\n override fun createEmptyFlow(): PersistentFlow =\n PersistentFlow()\n override val knownVariables: Set\n get() = approvedTypeStatements.keys + directAliasMap.keys\n\n override fun forkFlow(flow: PersistentFlow): PersistentFlow =\n PersistentFlow(flow)\n override fun unwrapVariable(variable: RealVariable): RealVariable =\n directAliasMap[variable] ?: variable\n\n override fun copyAllInformation(from: PersistentFlow, to: PersistentFlow) {\n to.approvedTypeStatements = from.approvedTypeStatements\n to.logicStatements = from.logicStatements\n to.directAliasMap = from.directAliasMap\n to.backwardsAliasMap = from.backwardsAliasMap\n to.assignmentIndex = from.assignmentIndex\n }\n override fun getTypeStatement(variable: RealVariable): TypeStatement? =\n approvedTypeStatements[unwrapVariable(variable)]?.copy(variable = variable)\n\n override fun joinFlow(flows: Collection): PersistentFlow =\n foldFlow(flows, allExecute = false)\n fun freeze(): PersistentFlow = PersistentFlow(\n previousFlow,\n approvedTypeStatements.build(),\n logicStatements.build(),\n assignmentIndex.build(),\n directAliasMap.build(),\n backwardsAliasMap.build(),\n )\n}\n\n override fun unionFlow(flows: Collection): PersistentFlow =\n foldFlow(flows, allExecute = true)\nprivate fun emptyPersistentHashMapBuilder(): PersistentMap.Builder =\n persistentHashMapOf().builder()\n\n private fun foldFlow(flows: Collection, allExecute: Boolean): PersistentFlow {\nabstract class PersistentLogicSystem(context: ConeInferenceContext) : LogicSystem(context) {\n abstract val variableStorage: VariableStorageImpl\n\n override fun joinFlow(flows: Collection, union: Boolean): MutableFlow {\n when (flows.size) {\n 0 -> return createEmptyFlow()\n 1 -> return forkFlow(flows.first())\n 0 -> return MutableFlow()\n 1 -> return flows.first().fork()\n }\n\n val commonFlow = flows.reduce(::lowestCommonFlow)\n val result = forkFlow(commonFlow)\n val commonFlow = flows.reduce { a, b ->\n a.lowestCommonAncestor(b) ?: throw AssertionError(\"no common ancestor in $a, $b\")", + "repo_full_name": "JetBrains/kotlin", + "discussion_comments": [ + { + "comment_id": "1035132877", + "repo_full_name": "JetBrains/kotlin", + "pr_number": 5030, + "pr_file": "compiler/fir/semantics/src/org/jetbrains/kotlin/fir/resolve/dfa/PersistentLogicSystem.kt", + "discussion_id": "1035132877", + "commented_code": "@@ -68,45 +41,91 @@ class PersistentFlow : Flow {\n \n override fun getTypeStatement(variable: RealVariable): TypeStatement? =\n approvedTypeStatements[unwrapVariable(variable)]?.copy(variable = variable)\n+\n+ fun lowestCommonAncestor(other: PersistentFlow): PersistentFlow? {\n+ var left = this\n+ var right = other\n+ while (left.level > right.level) {\n+ left = left.previousFlow ?: return null\n+ }\n+ while (right.level > left.level) {\n+ right = right.previousFlow ?: return null\n+ }\n+ while (left != right) {\n+ left = left.previousFlow ?: return null\n+ right = right.previousFlow ?: return null\n+ }\n+ return left\n+ }\n+\n+ fun fork(): MutableFlow = MutableFlow(\n+ this,\n+ approvedTypeStatements.builder(),\n+ logicStatements.builder(),\n+ assignmentIndex.builder(),\n+ directAliasMap.builder(),\n+ backwardsAliasMap.builder(),\n+ )\n }\n \n-abstract class PersistentLogicSystem(context: ConeInferenceContext) : LogicSystem(context) {\n- abstract val variableStorage: VariableStorageImpl\n+class MutableFlow internal constructor(\n+ private val previousFlow: PersistentFlow?,\n+ internal val approvedTypeStatements: PersistentMap.Builder,\n+ internal val logicStatements: PersistentMap.Builder>,\n+ internal val assignmentIndex: PersistentMap.Builder,\n+ internal val directAliasMap: PersistentMap.Builder,\n+ internal val backwardsAliasMap: PersistentMap.Builder>,\n+) : Flow {\n+ constructor() : this(\n+ null,\n+ emptyPersistentHashMapBuilder(),\n+ emptyPersistentHashMapBuilder(),\n+ emptyPersistentHashMapBuilder(),\n+ emptyPersistentHashMapBuilder(),\n+ emptyPersistentHashMapBuilder(),\n+ )\n \n- override fun createEmptyFlow(): PersistentFlow =\n- PersistentFlow()\n+ override val knownVariables: Set\n+ get() = approvedTypeStatements.keys + directAliasMap.keys\n \n- override fun forkFlow(flow: PersistentFlow): PersistentFlow =\n- PersistentFlow(flow)\n+ override fun unwrapVariable(variable: RealVariable): RealVariable =\n+ directAliasMap[variable] ?: variable\n \n- override fun copyAllInformation(from: PersistentFlow, to: PersistentFlow) {\n- to.approvedTypeStatements = from.approvedTypeStatements\n- to.logicStatements = from.logicStatements\n- to.directAliasMap = from.directAliasMap\n- to.backwardsAliasMap = from.backwardsAliasMap\n- to.assignmentIndex = from.assignmentIndex\n- }\n+ override fun getTypeStatement(variable: RealVariable): TypeStatement? =\n+ approvedTypeStatements[unwrapVariable(variable)]?.copy(variable = variable)\n \n- override fun joinFlow(flows: Collection): PersistentFlow =\n- foldFlow(flows, allExecute = false)\n+ fun freeze(): PersistentFlow = PersistentFlow(\n+ previousFlow,\n+ approvedTypeStatements.build(),\n+ logicStatements.build(),\n+ assignmentIndex.build(),\n+ directAliasMap.build(),\n+ backwardsAliasMap.build(),\n+ )\n+}\n \n- override fun unionFlow(flows: Collection): PersistentFlow =\n- foldFlow(flows, allExecute = true)\n+private fun emptyPersistentHashMapBuilder(): PersistentMap.Builder =\n+ persistentHashMapOf().builder()\n \n- private fun foldFlow(flows: Collection, allExecute: Boolean): PersistentFlow {\n+abstract class PersistentLogicSystem(context: ConeInferenceContext) : LogicSystem(context) {\n+ abstract val variableStorage: VariableStorageImpl\n+\n+ override fun joinFlow(flows: Collection, union: Boolean): MutableFlow {\n when (flows.size) {\n- 0 -> return createEmptyFlow()\n- 1 -> return forkFlow(flows.first())\n+ 0 -> return MutableFlow()\n+ 1 -> return flows.first().fork()\n }\n \n- val commonFlow = flows.reduce(::lowestCommonFlow)\n- val result = forkFlow(commonFlow)\n+ val commonFlow = flows.reduce { a, b ->\n+ a.lowestCommonAncestor(b) ?: throw AssertionError(\"no common ancestor in $a, $b\")", + "comment_created_at": "2022-11-29T18:36:32+00:00", + "comment_author": "demiurg906", + "comment_body": "It's better to use `error(\"...\")` function from stdlib instead of `AssertionError`", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "429178775", + "pr_number": 3415, + "pr_file": "plugins/atomicfu/atomicfu-compiler/src/org.jetbrains.kotlinx.atomicfu.compiler/extensions/AtomicFUTransformerJsIr.kt", + "created_at": "2020-05-22T10:52:48+00:00", + "commented_code": "/*\n * Copyright 2010-2020 JetBrains s.r.o. and Kotlin Programming Language contributors.\n * Use of this source code is governed by the Apache 2.0 license that can be found in the license/LICENSE.txt file.\n */\n\npackage org.jetbrains.kotlinx.atomicfu.compiler.extensions\n\nimport org.jetbrains.kotlin.backend.common.deepCopyWithVariables\nimport org.jetbrains.kotlin.backend.common.extensions.IrPluginContext\nimport org.jetbrains.kotlin.builtins.PrimitiveType\nimport org.jetbrains.kotlin.descriptors.*\nimport org.jetbrains.kotlin.descriptors.impl.FunctionDescriptorImpl\nimport org.jetbrains.kotlin.ir.*\nimport org.jetbrains.kotlin.ir.backend.js.ir.JsIrBuilder.buildValueParameter\nimport org.jetbrains.kotlin.ir.backend.js.ir.JsIrBuilder.buildFunction\nimport org.jetbrains.kotlin.ir.backend.js.ir.JsIrBuilder.buildBlockBody\nimport org.jetbrains.kotlin.ir.declarations.*\nimport org.jetbrains.kotlin.ir.declarations.impl.*\nimport org.jetbrains.kotlin.ir.expressions.*\nimport org.jetbrains.kotlin.ir.expressions.impl.*\nimport org.jetbrains.kotlin.ir.symbols.IrSimpleFunctionSymbol\nimport org.jetbrains.kotlin.ir.symbols.IrSymbol\nimport org.jetbrains.kotlin.ir.symbols.impl.*\nimport org.jetbrains.kotlin.ir.types.*\nimport org.jetbrains.kotlin.ir.util.*\nimport org.jetbrains.kotlin.ir.visitors.IrElementTransformerVoid\nimport java.lang.IllegalStateException\n\nprivate const val AFU_PKG = \"kotlinx/atomicfu\"\nprivate const val LOCKS = \"locks\"\nprivate const val ATOMIC_CONSTRUCTOR = \"atomic\"\nprivate const val ATOMICFU_VALUE_TYPE = \"\"\"Atomic(Int|Long|Boolean|Ref)\"\"\"\nprivate const val ATOMIC_ARRAY_TYPE = \"\"\"Atomic(Int|Long|Boolean|)Array\"\"\"\nprivate const val ATOMIC_ARRAY_FACTORY_FUNCTION = \"atomicArrayOfNulls\"\nprivate const val ATOMICFU_RUNTIME_FUNCTION_PREDICATE = \"atomicfu_\"\nprivate const val REENTRANT_LOCK_TYPE = \"ReentrantLock\"\nprivate const val GETTER = \"atomicfu\\$getter\"\nprivate const val SETTER = \"atomicfu\\$setter\"\nprivate const val GET = \"get\"\nprivate const val SET = \"set\"\n\nprivate fun String.prettyStr() = replace('/', '.')\n\nclass AtomicFUTransformer(override val context: IrPluginContext) : IrElementTransformerVoid(), TransformerHelpers {\n\n private val irBuiltIns = context.irBuiltIns\n\n private val AFU_CLASSES: Map = mapOf(\n \"AtomicInt\" to irBuiltIns.intType,\n \"AtomicLong\" to irBuiltIns.longType,\n \"AtomicRef\" to irBuiltIns.anyType,\n \"AtomicBoolean\" to irBuiltIns.booleanType\n )\n\n private val AFU_ARRAY_CLASSES: Map = mapOf(\n \"AtomicIntArray\" to context.builtIns.getPrimitiveArrayClassDescriptor(PrimitiveType.INT),\n \"AtomicLongArray\" to context.builtIns.getPrimitiveArrayClassDescriptor(PrimitiveType.LONG),\n \"AtomicBooleanArray\" to context.builtIns.getPrimitiveArrayClassDescriptor(PrimitiveType.BOOLEAN),\n \"AtomicArray\" to context.builtIns.array\n )\n\n override fun visitFile(irFile: IrFile): IrFile {\n irFile.declarations.map { declaration ->\n declaration.transformAtomicInlineDeclaration()\n }\n return super.visitFile(irFile)\n }\n\n override fun visitClass(irClass: IrClass): IrStatement {\n irClass.declarations.map { declaration ->\n declaration.transformAtomicInlineDeclaration()\n }\n return super.visitClass(irClass)\n }\n\n override fun visitProperty(property: IrProperty): IrStatement {\n if (property.backingField != null) {\n val backingField = property.backingField!!\n if (backingField.initializer != null) {\n val initializer = backingField.initializer!!.expression.transformAtomicValueInitializer()\n property.backingField!!.initializer = IrExpressionBodyImpl(initializer)\n }\n }\n return super.visitProperty(property)\n }\n\n override fun visitBlockBody(body: IrBlockBody): IrBody {\n body.statements.forEachIndexed { i, stmt ->\n if (stmt is IrCall) {\n body.statements[i] = stmt.transformAtomicFunctionCall()\n }\n }\n return super.visitBlockBody(body)\n }\n\n override fun visitBlock(block: IrBlock): IrExpression {\n block.statements.forEachIndexed { i, stmt ->\n if (stmt is IrCall) {\n block.statements[i] = stmt.transformAtomicFunctionCall()\n }\n }\n return super.visitBlock(block)\n }\n\n override fun visitReturn(returnExpr: IrReturn): IrExpression {\n returnExpr.value = returnExpr.value.transformAtomicFunctionCall()\n return super.visitReturn(returnExpr)\n }\n\n override fun visitTypeOperator(typeOp: IrTypeOperatorCall): IrExpression {\n typeOp.argument = typeOp.argument.transformAtomicFunctionCall()\n return super.visitTypeOperator(typeOp)\n }\n\n override fun visitSetVariable(setVar: IrSetVariable): IrExpression {\n setVar.value = setVar.value.transformAtomicFunctionCall()\n return super.visitSetVariable(setVar)\n }\n\n override fun visitSetField(setField: IrSetField): IrExpression {\n setField.value = setField.value.transformAtomicFunctionCall()\n return super.visitSetField(setField)\n }\n\n override fun visitVariable(declaration: IrVariable): IrStatement {\n declaration.initializer = declaration.initializer?.transformAtomicFunctionCall()\n return super.visitVariable(declaration)\n }\n\n override fun visitBranch(branch: IrBranch): IrBranch {\n branch.apply {\n condition = condition.transformAtomicFunctionCall()\n result = result.transformAtomicFunctionCall()\n }\n return super.visitBranch(branch)\n }\n\n override fun visitElseBranch(branch: IrElseBranch): IrElseBranch {\n branch.apply {\n condition = condition.transformAtomicFunctionCall()\n result = result.transformAtomicFunctionCall()\n }\n return super.visitElseBranch(branch)\n }\n\n private fun IrExpression.transformAtomicValueInitializer() =\n when {\n type.isAtomicValueType() -> getPureTypeValue()\n type.isAtomicArrayType() -> buildPureTypeArrayConstructor()\n type.isReentrantLockType() -> buildConstNull()\n else -> this\n }\n\n private fun IrDeclaration.transformAtomicInlineDeclaration() {\n if (this is IrFunction &&\n isInline &&\n extensionReceiverParameter != null &&\n extensionReceiverParameter!!.type.isAtomicValueType()\n ) {\n val type = extensionReceiverParameter!!.type\n val valueType = type.atomicToValueType()\n val getterType = buildFunctionSimpleType(listOf(irBuiltIns.unitType, valueType))\n val setterType = buildFunctionSimpleType(listOf(valueType, irBuiltIns.unitType))\n val valueParametersCount = valueParameters.size\n val extendedValueParameters = mutableListOf().apply {\n addAll(valueParameters)\n add(buildValueParameter(GETTER, valueParametersCount, getterType))\n add(buildValueParameter(SETTER, valueParametersCount + 1, setterType))\n }\n this as IrSimpleFunction\n (descriptor as FunctionDescriptorImpl).initialize(\n null,\n descriptor.dispatchReceiverParameter,\n descriptor.typeParameters,\n extendedValueParameters.map { it.descriptor as ValueParameterDescriptor },\n descriptor.returnType,\n descriptor.modality,\n descriptor.visibility\n )\n extensionReceiverParameter = null\n valueParameters = extendedValueParameters\n }\n }\n\n private fun IrExpression.getPureTypeValue(): IrExpression {\n require(this is IrCall && isAtomicFactoryFunction()) { \"Illegal initializer for the atomic property $this\" }\n return getValueArgument(0)!!.transformAtomicFunctionCall()\n }\n\n private fun IrExpression.buildPureTypeArrayConstructor() =\n when (this) {\n is IrConstructorCall -> {\n require(isAtomicArrayConstructor())\n val arrayConstructorSymbol = referenceBuiltInConstructor(type.getArrayClassDescriptor()) { it.valueParameters.size == 1 }\n val size = getValueArgument(0)\n IrConstructorCallImpl(\n UNDEFINED_OFFSET, UNDEFINED_OFFSET,\n irBuiltIns.unitType, arrayConstructorSymbol,\n 0, 0, 1\n ).apply {\n putValueArgument(0, size)\n }\n }\n is IrCall -> {\n require(isAtomicArrayFactoryFunction()) { \"Unsupported atomic array factory function $this\" }\n val arrayFactorySymbol = referencePackageFunction(\"kotlin\", \"arrayOfNulls\")\n val arrayElementType = getTypeArgument(0)!!\n val size = getValueArgument(0)\n buildCall(\n target = arrayFactorySymbol,\n type = type,\n origin = IrStatementOrigin.INVOKE,\n typeArguments = listOf(arrayElementType),\n valueArguments = listOf(size)\n )\n }\n else -> throw IllegalStateException(\"Illegal type of atomic array initializer\")", + "repo_full_name": "JetBrains/kotlin", + "discussion_comments": [ + { + "comment_id": "429178775", + "repo_full_name": "JetBrains/kotlin", + "pr_number": 3415, + "pr_file": "plugins/atomicfu/atomicfu-compiler/src/org.jetbrains.kotlinx.atomicfu.compiler/extensions/AtomicFUTransformerJsIr.kt", + "discussion_id": "429178775", + "commented_code": "@@ -0,0 +1,473 @@\n+/*\n+ * Copyright 2010-2020 JetBrains s.r.o. and Kotlin Programming Language contributors.\n+ * Use of this source code is governed by the Apache 2.0 license that can be found in the license/LICENSE.txt file.\n+ */\n+\n+package org.jetbrains.kotlinx.atomicfu.compiler.extensions\n+\n+import org.jetbrains.kotlin.backend.common.deepCopyWithVariables\n+import org.jetbrains.kotlin.backend.common.extensions.IrPluginContext\n+import org.jetbrains.kotlin.builtins.PrimitiveType\n+import org.jetbrains.kotlin.descriptors.*\n+import org.jetbrains.kotlin.descriptors.impl.FunctionDescriptorImpl\n+import org.jetbrains.kotlin.ir.*\n+import org.jetbrains.kotlin.ir.backend.js.ir.JsIrBuilder.buildValueParameter\n+import org.jetbrains.kotlin.ir.backend.js.ir.JsIrBuilder.buildFunction\n+import org.jetbrains.kotlin.ir.backend.js.ir.JsIrBuilder.buildBlockBody\n+import org.jetbrains.kotlin.ir.declarations.*\n+import org.jetbrains.kotlin.ir.declarations.impl.*\n+import org.jetbrains.kotlin.ir.expressions.*\n+import org.jetbrains.kotlin.ir.expressions.impl.*\n+import org.jetbrains.kotlin.ir.symbols.IrSimpleFunctionSymbol\n+import org.jetbrains.kotlin.ir.symbols.IrSymbol\n+import org.jetbrains.kotlin.ir.symbols.impl.*\n+import org.jetbrains.kotlin.ir.types.*\n+import org.jetbrains.kotlin.ir.util.*\n+import org.jetbrains.kotlin.ir.visitors.IrElementTransformerVoid\n+import java.lang.IllegalStateException\n+\n+private const val AFU_PKG = \"kotlinx/atomicfu\"\n+private const val LOCKS = \"locks\"\n+private const val ATOMIC_CONSTRUCTOR = \"atomic\"\n+private const val ATOMICFU_VALUE_TYPE = \"\"\"Atomic(Int|Long|Boolean|Ref)\"\"\"\n+private const val ATOMIC_ARRAY_TYPE = \"\"\"Atomic(Int|Long|Boolean|)Array\"\"\"\n+private const val ATOMIC_ARRAY_FACTORY_FUNCTION = \"atomicArrayOfNulls\"\n+private const val ATOMICFU_RUNTIME_FUNCTION_PREDICATE = \"atomicfu_\"\n+private const val REENTRANT_LOCK_TYPE = \"ReentrantLock\"\n+private const val GETTER = \"atomicfu\\$getter\"\n+private const val SETTER = \"atomicfu\\$setter\"\n+private const val GET = \"get\"\n+private const val SET = \"set\"\n+\n+private fun String.prettyStr() = replace('/', '.')\n+\n+class AtomicFUTransformer(override val context: IrPluginContext) : IrElementTransformerVoid(), TransformerHelpers {\n+\n+ private val irBuiltIns = context.irBuiltIns\n+\n+ private val AFU_CLASSES: Map = mapOf(\n+ \"AtomicInt\" to irBuiltIns.intType,\n+ \"AtomicLong\" to irBuiltIns.longType,\n+ \"AtomicRef\" to irBuiltIns.anyType,\n+ \"AtomicBoolean\" to irBuiltIns.booleanType\n+ )\n+\n+ private val AFU_ARRAY_CLASSES: Map = mapOf(\n+ \"AtomicIntArray\" to context.builtIns.getPrimitiveArrayClassDescriptor(PrimitiveType.INT),\n+ \"AtomicLongArray\" to context.builtIns.getPrimitiveArrayClassDescriptor(PrimitiveType.LONG),\n+ \"AtomicBooleanArray\" to context.builtIns.getPrimitiveArrayClassDescriptor(PrimitiveType.BOOLEAN),\n+ \"AtomicArray\" to context.builtIns.array\n+ )\n+\n+ override fun visitFile(irFile: IrFile): IrFile {\n+ irFile.declarations.map { declaration ->\n+ declaration.transformAtomicInlineDeclaration()\n+ }\n+ return super.visitFile(irFile)\n+ }\n+\n+ override fun visitClass(irClass: IrClass): IrStatement {\n+ irClass.declarations.map { declaration ->\n+ declaration.transformAtomicInlineDeclaration()\n+ }\n+ return super.visitClass(irClass)\n+ }\n+\n+ override fun visitProperty(property: IrProperty): IrStatement {\n+ if (property.backingField != null) {\n+ val backingField = property.backingField!!\n+ if (backingField.initializer != null) {\n+ val initializer = backingField.initializer!!.expression.transformAtomicValueInitializer()\n+ property.backingField!!.initializer = IrExpressionBodyImpl(initializer)\n+ }\n+ }\n+ return super.visitProperty(property)\n+ }\n+\n+ override fun visitBlockBody(body: IrBlockBody): IrBody {\n+ body.statements.forEachIndexed { i, stmt ->\n+ if (stmt is IrCall) {\n+ body.statements[i] = stmt.transformAtomicFunctionCall()\n+ }\n+ }\n+ return super.visitBlockBody(body)\n+ }\n+\n+ override fun visitBlock(block: IrBlock): IrExpression {\n+ block.statements.forEachIndexed { i, stmt ->\n+ if (stmt is IrCall) {\n+ block.statements[i] = stmt.transformAtomicFunctionCall()\n+ }\n+ }\n+ return super.visitBlock(block)\n+ }\n+\n+ override fun visitReturn(returnExpr: IrReturn): IrExpression {\n+ returnExpr.value = returnExpr.value.transformAtomicFunctionCall()\n+ return super.visitReturn(returnExpr)\n+ }\n+\n+ override fun visitTypeOperator(typeOp: IrTypeOperatorCall): IrExpression {\n+ typeOp.argument = typeOp.argument.transformAtomicFunctionCall()\n+ return super.visitTypeOperator(typeOp)\n+ }\n+\n+ override fun visitSetVariable(setVar: IrSetVariable): IrExpression {\n+ setVar.value = setVar.value.transformAtomicFunctionCall()\n+ return super.visitSetVariable(setVar)\n+ }\n+\n+ override fun visitSetField(setField: IrSetField): IrExpression {\n+ setField.value = setField.value.transformAtomicFunctionCall()\n+ return super.visitSetField(setField)\n+ }\n+\n+ override fun visitVariable(declaration: IrVariable): IrStatement {\n+ declaration.initializer = declaration.initializer?.transformAtomicFunctionCall()\n+ return super.visitVariable(declaration)\n+ }\n+\n+ override fun visitBranch(branch: IrBranch): IrBranch {\n+ branch.apply {\n+ condition = condition.transformAtomicFunctionCall()\n+ result = result.transformAtomicFunctionCall()\n+ }\n+ return super.visitBranch(branch)\n+ }\n+\n+ override fun visitElseBranch(branch: IrElseBranch): IrElseBranch {\n+ branch.apply {\n+ condition = condition.transformAtomicFunctionCall()\n+ result = result.transformAtomicFunctionCall()\n+ }\n+ return super.visitElseBranch(branch)\n+ }\n+\n+ private fun IrExpression.transformAtomicValueInitializer() =\n+ when {\n+ type.isAtomicValueType() -> getPureTypeValue()\n+ type.isAtomicArrayType() -> buildPureTypeArrayConstructor()\n+ type.isReentrantLockType() -> buildConstNull()\n+ else -> this\n+ }\n+\n+ private fun IrDeclaration.transformAtomicInlineDeclaration() {\n+ if (this is IrFunction &&\n+ isInline &&\n+ extensionReceiverParameter != null &&\n+ extensionReceiverParameter!!.type.isAtomicValueType()\n+ ) {\n+ val type = extensionReceiverParameter!!.type\n+ val valueType = type.atomicToValueType()\n+ val getterType = buildFunctionSimpleType(listOf(irBuiltIns.unitType, valueType))\n+ val setterType = buildFunctionSimpleType(listOf(valueType, irBuiltIns.unitType))\n+ val valueParametersCount = valueParameters.size\n+ val extendedValueParameters = mutableListOf().apply {\n+ addAll(valueParameters)\n+ add(buildValueParameter(GETTER, valueParametersCount, getterType))\n+ add(buildValueParameter(SETTER, valueParametersCount + 1, setterType))\n+ }\n+ this as IrSimpleFunction\n+ (descriptor as FunctionDescriptorImpl).initialize(\n+ null,\n+ descriptor.dispatchReceiverParameter,\n+ descriptor.typeParameters,\n+ extendedValueParameters.map { it.descriptor as ValueParameterDescriptor },\n+ descriptor.returnType,\n+ descriptor.modality,\n+ descriptor.visibility\n+ )\n+ extensionReceiverParameter = null\n+ valueParameters = extendedValueParameters\n+ }\n+ }\n+\n+ private fun IrExpression.getPureTypeValue(): IrExpression {\n+ require(this is IrCall && isAtomicFactoryFunction()) { \"Illegal initializer for the atomic property $this\" }\n+ return getValueArgument(0)!!.transformAtomicFunctionCall()\n+ }\n+\n+ private fun IrExpression.buildPureTypeArrayConstructor() =\n+ when (this) {\n+ is IrConstructorCall -> {\n+ require(isAtomicArrayConstructor())\n+ val arrayConstructorSymbol = referenceBuiltInConstructor(type.getArrayClassDescriptor()) { it.valueParameters.size == 1 }\n+ val size = getValueArgument(0)\n+ IrConstructorCallImpl(\n+ UNDEFINED_OFFSET, UNDEFINED_OFFSET,\n+ irBuiltIns.unitType, arrayConstructorSymbol,\n+ 0, 0, 1\n+ ).apply {\n+ putValueArgument(0, size)\n+ }\n+ }\n+ is IrCall -> {\n+ require(isAtomicArrayFactoryFunction()) { \"Unsupported atomic array factory function $this\" }\n+ val arrayFactorySymbol = referencePackageFunction(\"kotlin\", \"arrayOfNulls\")\n+ val arrayElementType = getTypeArgument(0)!!\n+ val size = getValueArgument(0)\n+ buildCall(\n+ target = arrayFactorySymbol,\n+ type = type,\n+ origin = IrStatementOrigin.INVOKE,\n+ typeArguments = listOf(arrayElementType),\n+ valueArguments = listOf(size)\n+ )\n+ }\n+ else -> throw IllegalStateException(\"Illegal type of atomic array initializer\")", + "comment_created_at": "2020-05-22T10:52:48+00:00", + "comment_author": "romanart", + "comment_body": "Use kotlin equivalent `error(..)` instead", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/kotlin-prefer-descriptive-errors.md b/_reviewers/kotlin-prefer-descriptive-errors.md index e4b1339..cb805b2 100644 --- a/_reviewers/kotlin-prefer-descriptive-errors.md +++ b/_reviewers/kotlin-prefer-descriptive-errors.md @@ -60,211 +60,3 @@ assert(companion.isCompanion) { "Expected $companion to be a companion object" } ``` By following these practices, you'll make your code more maintainable and debugging much easier when errors inevitably occur. - - -[ - { - "discussion_id": "1045585301", - "pr_number": 5044, - "pr_file": "compiler/ir/backend.jvm/src/org/jetbrains/kotlin/backend/jvm/JvmCachedDeclarations.kt", - "created_at": "2022-12-12T09:31:15+00:00", - "commented_code": "}\n }\n\n fun getStaticCompanionReplacementDeclaration(jvmStaticFunction: IrSimpleFunction): IrSimpleFunction =\n staticCompanionReplacementDeclarations.getOrPut(jvmStaticFunction) {\n val companion = jvmStaticFunction.parentAsClass\n assert(companion.isCompanion)", - "repo_full_name": "JetBrains/kotlin", - "discussion_comments": [ - { - "comment_id": "1045585301", - "repo_full_name": "JetBrains/kotlin", - "pr_number": 5044, - "pr_file": "compiler/ir/backend.jvm/src/org/jetbrains/kotlin/backend/jvm/JvmCachedDeclarations.kt", - "discussion_id": "1045585301", - "commented_code": "@@ -182,6 +186,29 @@ class JvmCachedDeclarations(\n }\n }\n \n+ fun getStaticCompanionReplacementDeclaration(jvmStaticFunction: IrSimpleFunction): IrSimpleFunction =\n+ staticCompanionReplacementDeclarations.getOrPut(jvmStaticFunction) {\n+ val companion = jvmStaticFunction.parentAsClass\n+ assert(companion.isCompanion)", - "comment_created_at": "2022-12-12T09:31:15+00:00", - "comment_author": "ilmirus", - "comment_body": "Please, add lambda to `assert` call, otherwise, there will be unhelpful `assertion failed` without further explanation.", - "pr_file_module": null - }, - { - "comment_id": "1046202170", - "repo_full_name": "JetBrains/kotlin", - "pr_number": 5044, - "pr_file": "compiler/ir/backend.jvm/src/org/jetbrains/kotlin/backend/jvm/JvmCachedDeclarations.kt", - "discussion_id": "1045585301", - "commented_code": "@@ -182,6 +186,29 @@ class JvmCachedDeclarations(\n }\n }\n \n+ fun getStaticCompanionReplacementDeclaration(jvmStaticFunction: IrSimpleFunction): IrSimpleFunction =\n+ staticCompanionReplacementDeclarations.getOrPut(jvmStaticFunction) {\n+ val companion = jvmStaticFunction.parentAsClass\n+ assert(companion.isCompanion)", - "comment_created_at": "2022-12-12T18:06:55+00:00", - "comment_author": "BomBardyGamer", - "comment_body": "Apologies. This was done this way because this is what the method above it (which gets the static proxy from cache) does. I'll add this.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1665765123", - "pr_number": 5306, - "pr_file": "compiler/cli/cli-base/src/org/jetbrains/kotlin/cli/jvm/modules/CliJavaModuleFinder.kt", - "created_at": "2024-07-04T14:08:01+00:00", - "commented_code": "private fun findSystemModule(moduleRoot: VirtualFile, useSig: Boolean = false): JavaModule.Explicit? {\n val file = moduleRoot.findChild(if (useSig) PsiJavaModule.MODULE_INFO_CLASS + \".sig\" else PsiJavaModule.MODULE_INFO_CLS_FILE)\n ?: return null\n val moduleInfo = JavaModuleInfo.read(file, javaFileManager, allScope) ?: return null\n val moduleInfo = try {\n JavaModuleInfo.read(file, javaFileManager, allScope) ?: return null\n } catch (e: IllegalStateException) {\n reportError(e.message!!)\n throw CompilationErrorException()", - "repo_full_name": "JetBrains/kotlin", - "discussion_comments": [ - { - "comment_id": "1665765123", - "repo_full_name": "JetBrains/kotlin", - "pr_number": 5306, - "pr_file": "compiler/cli/cli-base/src/org/jetbrains/kotlin/cli/jvm/modules/CliJavaModuleFinder.kt", - "discussion_id": "1665765123", - "commented_code": "@@ -119,7 +120,13 @@ class CliJavaModuleFinder(\n private fun findSystemModule(moduleRoot: VirtualFile, useSig: Boolean = false): JavaModule.Explicit? {\n val file = moduleRoot.findChild(if (useSig) PsiJavaModule.MODULE_INFO_CLASS + \".sig\" else PsiJavaModule.MODULE_INFO_CLS_FILE)\n ?: return null\n- val moduleInfo = JavaModuleInfo.read(file, javaFileManager, allScope) ?: return null\n+ val moduleInfo = try {\n+ JavaModuleInfo.read(file, javaFileManager, allScope) ?: return null\n+ } catch (e: IllegalStateException) {\n+ reportError(e.message!!)\n+ throw CompilationErrorException()", - "comment_created_at": "2024-07-04T14:08:01+00:00", - "comment_author": "udalov", - "comment_body": "This way we'll lose all important information from `e.cause` that would help us in diagnosing the problem. How about we at least append something like `\"\\nCaused by: ${e.cause::class.java.name}: ${e.cause.message}\"` to the error?", - "pr_file_module": null - }, - { - "comment_id": "1666502069", - "repo_full_name": "JetBrains/kotlin", - "pr_number": 5306, - "pr_file": "compiler/cli/cli-base/src/org/jetbrains/kotlin/cli/jvm/modules/CliJavaModuleFinder.kt", - "discussion_id": "1665765123", - "commented_code": "@@ -119,7 +120,13 @@ class CliJavaModuleFinder(\n private fun findSystemModule(moduleRoot: VirtualFile, useSig: Boolean = false): JavaModule.Explicit? {\n val file = moduleRoot.findChild(if (useSig) PsiJavaModule.MODULE_INFO_CLASS + \".sig\" else PsiJavaModule.MODULE_INFO_CLS_FILE)\n ?: return null\n- val moduleInfo = JavaModuleInfo.read(file, javaFileManager, allScope) ?: return null\n+ val moduleInfo = try {\n+ JavaModuleInfo.read(file, javaFileManager, allScope) ?: return null\n+ } catch (e: IllegalStateException) {\n+ reportError(e.message!!)\n+ throw CompilationErrorException()", - "comment_created_at": "2024-07-05T08:25:11+00:00", - "comment_author": "scaventz", - "comment_body": "Agree, updated.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1422626599", - "pr_number": 5220, - "pr_file": "compiler/ir/backend.js/src/org/jetbrains/kotlin/ir/backend/js/transformers/irToJs/JsIntrinsicTransformers.kt", - "created_at": "2023-12-11T15:13:19+00:00", - "commented_code": "}\n\n for ((type, prefix) in intrinsics.primitiveToTypedArrayMap) {\n add(intrinsics.primitiveToSizeConstructor[type]!!) { call, context ->\n add(intrinsics.primitiveToSizeConstructor[type] ?: doError(intrinsics.primitiveToSizeConstructor)) { call, context ->\n JsNew(JsNameRef(\"${prefix}Array\"), translateCallArguments(call, context))\n }\n add(intrinsics.primitiveToLiteralConstructor[type]!!) { call, context ->\n add(intrinsics.primitiveToLiteralConstructor[type] ?: doError(intrinsics.primitiveToSizeConstructor)) { call, context ->", - "repo_full_name": "JetBrains/kotlin", - "discussion_comments": [ - { - "comment_id": "1422626599", - "repo_full_name": "JetBrains/kotlin", - "pr_number": 5220, - "pr_file": "compiler/ir/backend.js/src/org/jetbrains/kotlin/ir/backend/js/transformers/irToJs/JsIntrinsicTransformers.kt", - "discussion_id": "1422626599", - "commented_code": "@@ -167,17 +170,17 @@ class JsIntrinsicTransformers(backendContext: JsIrBackendContext) {\n }\n \n for ((type, prefix) in intrinsics.primitiveToTypedArrayMap) {\n- add(intrinsics.primitiveToSizeConstructor[type]!!) { call, context ->\n+ add(intrinsics.primitiveToSizeConstructor[type] ?: doError(intrinsics.primitiveToSizeConstructor)) { call, context ->\n JsNew(JsNameRef(\"${prefix}Array\"), translateCallArguments(call, context))\n }\n- add(intrinsics.primitiveToLiteralConstructor[type]!!) { call, context ->\n+ add(intrinsics.primitiveToLiteralConstructor[type] ?: doError(intrinsics.primitiveToSizeConstructor)) { call, context ->", - "comment_created_at": "2023-12-11T15:13:19+00:00", - "comment_author": "udalov", - "comment_body": "I don't think these will ever fail", - "pr_file_module": null - }, - { - "comment_id": "1422935649", - "repo_full_name": "JetBrains/kotlin", - "pr_number": 5220, - "pr_file": "compiler/ir/backend.js/src/org/jetbrains/kotlin/ir/backend/js/transformers/irToJs/JsIntrinsicTransformers.kt", - "discussion_id": "1422626599", - "commented_code": "@@ -167,17 +170,17 @@ class JsIntrinsicTransformers(backendContext: JsIrBackendContext) {\n }\n \n for ((type, prefix) in intrinsics.primitiveToTypedArrayMap) {\n- add(intrinsics.primitiveToSizeConstructor[type]!!) { call, context ->\n+ add(intrinsics.primitiveToSizeConstructor[type] ?: doError(intrinsics.primitiveToSizeConstructor)) { call, context ->\n JsNew(JsNameRef(\"${prefix}Array\"), translateCallArguments(call, context))\n }\n- add(intrinsics.primitiveToLiteralConstructor[type]!!) { call, context ->\n+ add(intrinsics.primitiveToLiteralConstructor[type] ?: doError(intrinsics.primitiveToSizeConstructor)) { call, context ->", - "comment_created_at": "2023-12-11T18:24:39+00:00", - "comment_author": "InsanusMokrassar", - "comment_body": "I think it is related to a lot of lines in this file, but I already faced some issues in compiler without adequate message or environment info before several times", - "pr_file_module": null - }, - { - "comment_id": "1423852356", - "repo_full_name": "JetBrains/kotlin", - "pr_number": 5220, - "pr_file": "compiler/ir/backend.js/src/org/jetbrains/kotlin/ir/backend/js/transformers/irToJs/JsIntrinsicTransformers.kt", - "discussion_id": "1422626599", - "commented_code": "@@ -167,17 +170,17 @@ class JsIntrinsicTransformers(backendContext: JsIrBackendContext) {\n }\n \n for ((type, prefix) in intrinsics.primitiveToTypedArrayMap) {\n- add(intrinsics.primitiveToSizeConstructor[type]!!) { call, context ->\n+ add(intrinsics.primitiveToSizeConstructor[type] ?: doError(intrinsics.primitiveToSizeConstructor)) { call, context ->\n JsNew(JsNameRef(\"${prefix}Array\"), translateCallArguments(call, context))\n }\n- add(intrinsics.primitiveToLiteralConstructor[type]!!) { call, context ->\n+ add(intrinsics.primitiveToLiteralConstructor[type] ?: doError(intrinsics.primitiveToSizeConstructor)) { call, context ->", - "comment_created_at": "2023-12-12T11:21:58+00:00", - "comment_author": "udalov", - "comment_body": "I'm not convinced. I don't see a reason to add effectively dead code.", - "pr_file_module": null - }, - { - "comment_id": "1424189222", - "repo_full_name": "JetBrains/kotlin", - "pr_number": 5220, - "pr_file": "compiler/ir/backend.js/src/org/jetbrains/kotlin/ir/backend/js/transformers/irToJs/JsIntrinsicTransformers.kt", - "discussion_id": "1422626599", - "commented_code": "@@ -167,17 +170,17 @@ class JsIntrinsicTransformers(backendContext: JsIrBackendContext) {\n }\n \n for ((type, prefix) in intrinsics.primitiveToTypedArrayMap) {\n- add(intrinsics.primitiveToSizeConstructor[type]!!) { call, context ->\n+ add(intrinsics.primitiveToSizeConstructor[type] ?: doError(intrinsics.primitiveToSizeConstructor)) { call, context ->\n JsNew(JsNameRef(\"${prefix}Array\"), translateCallArguments(call, context))\n }\n- add(intrinsics.primitiveToLiteralConstructor[type]!!) { call, context ->\n+ add(intrinsics.primitiveToLiteralConstructor[type] ?: doError(intrinsics.primitiveToSizeConstructor)) { call, context ->", - "comment_created_at": "2023-12-12T15:34:57+00:00", - "comment_author": "InsanusMokrassar", - "comment_body": "So, may you guarantee somehow that this code will not be used? :) As I have said, in the whole file were a lot of `!!` which means `it is potential thrown error without any useful info`. I just have added some explanation to these things. In case you have some suggestion better than use `!!` or `?: doError`, I would be glad to see it and even realize, I think", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1265437800", - "pr_number": 5158, - "pr_file": "compiler/ir/backend.jvm/codegen/src/org/jetbrains/kotlin/backend/jvm/codegen/AnnotationCodegen.kt", - "created_at": "2023-07-17T14:17:40+00:00", - "commented_code": "else if (param.defaultValue != null)\n continue // Default value will be supplied by JVM at runtime.\n else if (context.state.classBuilderMode.generateBodies) //skip error for KAPT\n error(\"No value for annotation parameter $param\")\n error(\"No value for annotation parameter ${param.dumpKotlinLike()}\")", - "repo_full_name": "JetBrains/kotlin", - "discussion_comments": [ - { - "comment_id": "1265437800", - "repo_full_name": "JetBrains/kotlin", - "pr_number": 5158, - "pr_file": "compiler/ir/backend.jvm/codegen/src/org/jetbrains/kotlin/backend/jvm/codegen/AnnotationCodegen.kt", - "discussion_id": "1265437800", - "commented_code": "@@ -216,7 +216,7 @@ abstract class AnnotationCodegen(\n else if (param.defaultValue != null)\n continue // Default value will be supplied by JVM at runtime.\n else if (context.state.classBuilderMode.generateBodies) //skip error for KAPT\n- error(\"No value for annotation parameter $param\")\n+ error(\"No value for annotation parameter ${param.dumpKotlinLike()}\")", - "comment_created_at": "2023-07-17T14:17:40+00:00", - "comment_author": "udalov", - "comment_body": "Please use `render()` instead, as it's more resilient to errors and more tested (you wouldn't want another exception to happen when you're rendering an exception message)", - "pr_file_module": null - }, - { - "comment_id": "1265478745", - "repo_full_name": "JetBrains/kotlin", - "pr_number": 5158, - "pr_file": "compiler/ir/backend.jvm/codegen/src/org/jetbrains/kotlin/backend/jvm/codegen/AnnotationCodegen.kt", - "discussion_id": "1265437800", - "commented_code": "@@ -216,7 +216,7 @@ abstract class AnnotationCodegen(\n else if (param.defaultValue != null)\n continue // Default value will be supplied by JVM at runtime.\n else if (context.state.classBuilderMode.generateBodies) //skip error for KAPT\n- error(\"No value for annotation parameter $param\")\n+ error(\"No value for annotation parameter ${param.dumpKotlinLike()}\")", - "comment_created_at": "2023-07-17T14:45:23+00:00", - "comment_author": "JavierSegoviaCordoba", - "comment_body": "Done!", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1035132877", - "pr_number": 5030, - "pr_file": "compiler/fir/semantics/src/org/jetbrains/kotlin/fir/resolve/dfa/PersistentLogicSystem.kt", - "created_at": "2022-11-29T18:36:32+00:00", - "commented_code": "override fun getTypeStatement(variable: RealVariable): TypeStatement? =\n approvedTypeStatements[unwrapVariable(variable)]?.copy(variable = variable)\n\n fun lowestCommonAncestor(other: PersistentFlow): PersistentFlow? {\n var left = this\n var right = other\n while (left.level > right.level) {\n left = left.previousFlow ?: return null\n }\n while (right.level > left.level) {\n right = right.previousFlow ?: return null\n }\n while (left != right) {\n left = left.previousFlow ?: return null\n right = right.previousFlow ?: return null\n }\n return left\n }\n\n fun fork(): MutableFlow = MutableFlow(\n this,\n approvedTypeStatements.builder(),\n logicStatements.builder(),\n assignmentIndex.builder(),\n directAliasMap.builder(),\n backwardsAliasMap.builder(),\n )\n}\n\nabstract class PersistentLogicSystem(context: ConeInferenceContext) : LogicSystem(context) {\n abstract val variableStorage: VariableStorageImpl\nclass MutableFlow internal constructor(\n private val previousFlow: PersistentFlow?,\n internal val approvedTypeStatements: PersistentMap.Builder,\n internal val logicStatements: PersistentMap.Builder>,\n internal val assignmentIndex: PersistentMap.Builder,\n internal val directAliasMap: PersistentMap.Builder,\n internal val backwardsAliasMap: PersistentMap.Builder>,\n) : Flow {\n constructor() : this(\n null,\n emptyPersistentHashMapBuilder(),\n emptyPersistentHashMapBuilder(),\n emptyPersistentHashMapBuilder(),\n emptyPersistentHashMapBuilder(),\n emptyPersistentHashMapBuilder(),\n )\n\n override fun createEmptyFlow(): PersistentFlow =\n PersistentFlow()\n override val knownVariables: Set\n get() = approvedTypeStatements.keys + directAliasMap.keys\n\n override fun forkFlow(flow: PersistentFlow): PersistentFlow =\n PersistentFlow(flow)\n override fun unwrapVariable(variable: RealVariable): RealVariable =\n directAliasMap[variable] ?: variable\n\n override fun copyAllInformation(from: PersistentFlow, to: PersistentFlow) {\n to.approvedTypeStatements = from.approvedTypeStatements\n to.logicStatements = from.logicStatements\n to.directAliasMap = from.directAliasMap\n to.backwardsAliasMap = from.backwardsAliasMap\n to.assignmentIndex = from.assignmentIndex\n }\n override fun getTypeStatement(variable: RealVariable): TypeStatement? =\n approvedTypeStatements[unwrapVariable(variable)]?.copy(variable = variable)\n\n override fun joinFlow(flows: Collection): PersistentFlow =\n foldFlow(flows, allExecute = false)\n fun freeze(): PersistentFlow = PersistentFlow(\n previousFlow,\n approvedTypeStatements.build(),\n logicStatements.build(),\n assignmentIndex.build(),\n directAliasMap.build(),\n backwardsAliasMap.build(),\n )\n}\n\n override fun unionFlow(flows: Collection): PersistentFlow =\n foldFlow(flows, allExecute = true)\nprivate fun emptyPersistentHashMapBuilder(): PersistentMap.Builder =\n persistentHashMapOf().builder()\n\n private fun foldFlow(flows: Collection, allExecute: Boolean): PersistentFlow {\nabstract class PersistentLogicSystem(context: ConeInferenceContext) : LogicSystem(context) {\n abstract val variableStorage: VariableStorageImpl\n\n override fun joinFlow(flows: Collection, union: Boolean): MutableFlow {\n when (flows.size) {\n 0 -> return createEmptyFlow()\n 1 -> return forkFlow(flows.first())\n 0 -> return MutableFlow()\n 1 -> return flows.first().fork()\n }\n\n val commonFlow = flows.reduce(::lowestCommonFlow)\n val result = forkFlow(commonFlow)\n val commonFlow = flows.reduce { a, b ->\n a.lowestCommonAncestor(b) ?: throw AssertionError(\"no common ancestor in $a, $b\")", - "repo_full_name": "JetBrains/kotlin", - "discussion_comments": [ - { - "comment_id": "1035132877", - "repo_full_name": "JetBrains/kotlin", - "pr_number": 5030, - "pr_file": "compiler/fir/semantics/src/org/jetbrains/kotlin/fir/resolve/dfa/PersistentLogicSystem.kt", - "discussion_id": "1035132877", - "commented_code": "@@ -68,45 +41,91 @@ class PersistentFlow : Flow {\n \n override fun getTypeStatement(variable: RealVariable): TypeStatement? =\n approvedTypeStatements[unwrapVariable(variable)]?.copy(variable = variable)\n+\n+ fun lowestCommonAncestor(other: PersistentFlow): PersistentFlow? {\n+ var left = this\n+ var right = other\n+ while (left.level > right.level) {\n+ left = left.previousFlow ?: return null\n+ }\n+ while (right.level > left.level) {\n+ right = right.previousFlow ?: return null\n+ }\n+ while (left != right) {\n+ left = left.previousFlow ?: return null\n+ right = right.previousFlow ?: return null\n+ }\n+ return left\n+ }\n+\n+ fun fork(): MutableFlow = MutableFlow(\n+ this,\n+ approvedTypeStatements.builder(),\n+ logicStatements.builder(),\n+ assignmentIndex.builder(),\n+ directAliasMap.builder(),\n+ backwardsAliasMap.builder(),\n+ )\n }\n \n-abstract class PersistentLogicSystem(context: ConeInferenceContext) : LogicSystem(context) {\n- abstract val variableStorage: VariableStorageImpl\n+class MutableFlow internal constructor(\n+ private val previousFlow: PersistentFlow?,\n+ internal val approvedTypeStatements: PersistentMap.Builder,\n+ internal val logicStatements: PersistentMap.Builder>,\n+ internal val assignmentIndex: PersistentMap.Builder,\n+ internal val directAliasMap: PersistentMap.Builder,\n+ internal val backwardsAliasMap: PersistentMap.Builder>,\n+) : Flow {\n+ constructor() : this(\n+ null,\n+ emptyPersistentHashMapBuilder(),\n+ emptyPersistentHashMapBuilder(),\n+ emptyPersistentHashMapBuilder(),\n+ emptyPersistentHashMapBuilder(),\n+ emptyPersistentHashMapBuilder(),\n+ )\n \n- override fun createEmptyFlow(): PersistentFlow =\n- PersistentFlow()\n+ override val knownVariables: Set\n+ get() = approvedTypeStatements.keys + directAliasMap.keys\n \n- override fun forkFlow(flow: PersistentFlow): PersistentFlow =\n- PersistentFlow(flow)\n+ override fun unwrapVariable(variable: RealVariable): RealVariable =\n+ directAliasMap[variable] ?: variable\n \n- override fun copyAllInformation(from: PersistentFlow, to: PersistentFlow) {\n- to.approvedTypeStatements = from.approvedTypeStatements\n- to.logicStatements = from.logicStatements\n- to.directAliasMap = from.directAliasMap\n- to.backwardsAliasMap = from.backwardsAliasMap\n- to.assignmentIndex = from.assignmentIndex\n- }\n+ override fun getTypeStatement(variable: RealVariable): TypeStatement? =\n+ approvedTypeStatements[unwrapVariable(variable)]?.copy(variable = variable)\n \n- override fun joinFlow(flows: Collection): PersistentFlow =\n- foldFlow(flows, allExecute = false)\n+ fun freeze(): PersistentFlow = PersistentFlow(\n+ previousFlow,\n+ approvedTypeStatements.build(),\n+ logicStatements.build(),\n+ assignmentIndex.build(),\n+ directAliasMap.build(),\n+ backwardsAliasMap.build(),\n+ )\n+}\n \n- override fun unionFlow(flows: Collection): PersistentFlow =\n- foldFlow(flows, allExecute = true)\n+private fun emptyPersistentHashMapBuilder(): PersistentMap.Builder =\n+ persistentHashMapOf().builder()\n \n- private fun foldFlow(flows: Collection, allExecute: Boolean): PersistentFlow {\n+abstract class PersistentLogicSystem(context: ConeInferenceContext) : LogicSystem(context) {\n+ abstract val variableStorage: VariableStorageImpl\n+\n+ override fun joinFlow(flows: Collection, union: Boolean): MutableFlow {\n when (flows.size) {\n- 0 -> return createEmptyFlow()\n- 1 -> return forkFlow(flows.first())\n+ 0 -> return MutableFlow()\n+ 1 -> return flows.first().fork()\n }\n \n- val commonFlow = flows.reduce(::lowestCommonFlow)\n- val result = forkFlow(commonFlow)\n+ val commonFlow = flows.reduce { a, b ->\n+ a.lowestCommonAncestor(b) ?: throw AssertionError(\"no common ancestor in $a, $b\")", - "comment_created_at": "2022-11-29T18:36:32+00:00", - "comment_author": "demiurg906", - "comment_body": "It's better to use `error(\"...\")` function from stdlib instead of `AssertionError`", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "429178775", - "pr_number": 3415, - "pr_file": "plugins/atomicfu/atomicfu-compiler/src/org.jetbrains.kotlinx.atomicfu.compiler/extensions/AtomicFUTransformerJsIr.kt", - "created_at": "2020-05-22T10:52:48+00:00", - "commented_code": "/*\n * Copyright 2010-2020 JetBrains s.r.o. and Kotlin Programming Language contributors.\n * Use of this source code is governed by the Apache 2.0 license that can be found in the license/LICENSE.txt file.\n */\n\npackage org.jetbrains.kotlinx.atomicfu.compiler.extensions\n\nimport org.jetbrains.kotlin.backend.common.deepCopyWithVariables\nimport org.jetbrains.kotlin.backend.common.extensions.IrPluginContext\nimport org.jetbrains.kotlin.builtins.PrimitiveType\nimport org.jetbrains.kotlin.descriptors.*\nimport org.jetbrains.kotlin.descriptors.impl.FunctionDescriptorImpl\nimport org.jetbrains.kotlin.ir.*\nimport org.jetbrains.kotlin.ir.backend.js.ir.JsIrBuilder.buildValueParameter\nimport org.jetbrains.kotlin.ir.backend.js.ir.JsIrBuilder.buildFunction\nimport org.jetbrains.kotlin.ir.backend.js.ir.JsIrBuilder.buildBlockBody\nimport org.jetbrains.kotlin.ir.declarations.*\nimport org.jetbrains.kotlin.ir.declarations.impl.*\nimport org.jetbrains.kotlin.ir.expressions.*\nimport org.jetbrains.kotlin.ir.expressions.impl.*\nimport org.jetbrains.kotlin.ir.symbols.IrSimpleFunctionSymbol\nimport org.jetbrains.kotlin.ir.symbols.IrSymbol\nimport org.jetbrains.kotlin.ir.symbols.impl.*\nimport org.jetbrains.kotlin.ir.types.*\nimport org.jetbrains.kotlin.ir.util.*\nimport org.jetbrains.kotlin.ir.visitors.IrElementTransformerVoid\nimport java.lang.IllegalStateException\n\nprivate const val AFU_PKG = \"kotlinx/atomicfu\"\nprivate const val LOCKS = \"locks\"\nprivate const val ATOMIC_CONSTRUCTOR = \"atomic\"\nprivate const val ATOMICFU_VALUE_TYPE = \"\"\"Atomic(Int|Long|Boolean|Ref)\"\"\"\nprivate const val ATOMIC_ARRAY_TYPE = \"\"\"Atomic(Int|Long|Boolean|)Array\"\"\"\nprivate const val ATOMIC_ARRAY_FACTORY_FUNCTION = \"atomicArrayOfNulls\"\nprivate const val ATOMICFU_RUNTIME_FUNCTION_PREDICATE = \"atomicfu_\"\nprivate const val REENTRANT_LOCK_TYPE = \"ReentrantLock\"\nprivate const val GETTER = \"atomicfu\\$getter\"\nprivate const val SETTER = \"atomicfu\\$setter\"\nprivate const val GET = \"get\"\nprivate const val SET = \"set\"\n\nprivate fun String.prettyStr() = replace('/', '.')\n\nclass AtomicFUTransformer(override val context: IrPluginContext) : IrElementTransformerVoid(), TransformerHelpers {\n\n private val irBuiltIns = context.irBuiltIns\n\n private val AFU_CLASSES: Map = mapOf(\n \"AtomicInt\" to irBuiltIns.intType,\n \"AtomicLong\" to irBuiltIns.longType,\n \"AtomicRef\" to irBuiltIns.anyType,\n \"AtomicBoolean\" to irBuiltIns.booleanType\n )\n\n private val AFU_ARRAY_CLASSES: Map = mapOf(\n \"AtomicIntArray\" to context.builtIns.getPrimitiveArrayClassDescriptor(PrimitiveType.INT),\n \"AtomicLongArray\" to context.builtIns.getPrimitiveArrayClassDescriptor(PrimitiveType.LONG),\n \"AtomicBooleanArray\" to context.builtIns.getPrimitiveArrayClassDescriptor(PrimitiveType.BOOLEAN),\n \"AtomicArray\" to context.builtIns.array\n )\n\n override fun visitFile(irFile: IrFile): IrFile {\n irFile.declarations.map { declaration ->\n declaration.transformAtomicInlineDeclaration()\n }\n return super.visitFile(irFile)\n }\n\n override fun visitClass(irClass: IrClass): IrStatement {\n irClass.declarations.map { declaration ->\n declaration.transformAtomicInlineDeclaration()\n }\n return super.visitClass(irClass)\n }\n\n override fun visitProperty(property: IrProperty): IrStatement {\n if (property.backingField != null) {\n val backingField = property.backingField!!\n if (backingField.initializer != null) {\n val initializer = backingField.initializer!!.expression.transformAtomicValueInitializer()\n property.backingField!!.initializer = IrExpressionBodyImpl(initializer)\n }\n }\n return super.visitProperty(property)\n }\n\n override fun visitBlockBody(body: IrBlockBody): IrBody {\n body.statements.forEachIndexed { i, stmt ->\n if (stmt is IrCall) {\n body.statements[i] = stmt.transformAtomicFunctionCall()\n }\n }\n return super.visitBlockBody(body)\n }\n\n override fun visitBlock(block: IrBlock): IrExpression {\n block.statements.forEachIndexed { i, stmt ->\n if (stmt is IrCall) {\n block.statements[i] = stmt.transformAtomicFunctionCall()\n }\n }\n return super.visitBlock(block)\n }\n\n override fun visitReturn(returnExpr: IrReturn): IrExpression {\n returnExpr.value = returnExpr.value.transformAtomicFunctionCall()\n return super.visitReturn(returnExpr)\n }\n\n override fun visitTypeOperator(typeOp: IrTypeOperatorCall): IrExpression {\n typeOp.argument = typeOp.argument.transformAtomicFunctionCall()\n return super.visitTypeOperator(typeOp)\n }\n\n override fun visitSetVariable(setVar: IrSetVariable): IrExpression {\n setVar.value = setVar.value.transformAtomicFunctionCall()\n return super.visitSetVariable(setVar)\n }\n\n override fun visitSetField(setField: IrSetField): IrExpression {\n setField.value = setField.value.transformAtomicFunctionCall()\n return super.visitSetField(setField)\n }\n\n override fun visitVariable(declaration: IrVariable): IrStatement {\n declaration.initializer = declaration.initializer?.transformAtomicFunctionCall()\n return super.visitVariable(declaration)\n }\n\n override fun visitBranch(branch: IrBranch): IrBranch {\n branch.apply {\n condition = condition.transformAtomicFunctionCall()\n result = result.transformAtomicFunctionCall()\n }\n return super.visitBranch(branch)\n }\n\n override fun visitElseBranch(branch: IrElseBranch): IrElseBranch {\n branch.apply {\n condition = condition.transformAtomicFunctionCall()\n result = result.transformAtomicFunctionCall()\n }\n return super.visitElseBranch(branch)\n }\n\n private fun IrExpression.transformAtomicValueInitializer() =\n when {\n type.isAtomicValueType() -> getPureTypeValue()\n type.isAtomicArrayType() -> buildPureTypeArrayConstructor()\n type.isReentrantLockType() -> buildConstNull()\n else -> this\n }\n\n private fun IrDeclaration.transformAtomicInlineDeclaration() {\n if (this is IrFunction &&\n isInline &&\n extensionReceiverParameter != null &&\n extensionReceiverParameter!!.type.isAtomicValueType()\n ) {\n val type = extensionReceiverParameter!!.type\n val valueType = type.atomicToValueType()\n val getterType = buildFunctionSimpleType(listOf(irBuiltIns.unitType, valueType))\n val setterType = buildFunctionSimpleType(listOf(valueType, irBuiltIns.unitType))\n val valueParametersCount = valueParameters.size\n val extendedValueParameters = mutableListOf().apply {\n addAll(valueParameters)\n add(buildValueParameter(GETTER, valueParametersCount, getterType))\n add(buildValueParameter(SETTER, valueParametersCount + 1, setterType))\n }\n this as IrSimpleFunction\n (descriptor as FunctionDescriptorImpl).initialize(\n null,\n descriptor.dispatchReceiverParameter,\n descriptor.typeParameters,\n extendedValueParameters.map { it.descriptor as ValueParameterDescriptor },\n descriptor.returnType,\n descriptor.modality,\n descriptor.visibility\n )\n extensionReceiverParameter = null\n valueParameters = extendedValueParameters\n }\n }\n\n private fun IrExpression.getPureTypeValue(): IrExpression {\n require(this is IrCall && isAtomicFactoryFunction()) { \"Illegal initializer for the atomic property $this\" }\n return getValueArgument(0)!!.transformAtomicFunctionCall()\n }\n\n private fun IrExpression.buildPureTypeArrayConstructor() =\n when (this) {\n is IrConstructorCall -> {\n require(isAtomicArrayConstructor())\n val arrayConstructorSymbol = referenceBuiltInConstructor(type.getArrayClassDescriptor()) { it.valueParameters.size == 1 }\n val size = getValueArgument(0)\n IrConstructorCallImpl(\n UNDEFINED_OFFSET, UNDEFINED_OFFSET,\n irBuiltIns.unitType, arrayConstructorSymbol,\n 0, 0, 1\n ).apply {\n putValueArgument(0, size)\n }\n }\n is IrCall -> {\n require(isAtomicArrayFactoryFunction()) { \"Unsupported atomic array factory function $this\" }\n val arrayFactorySymbol = referencePackageFunction(\"kotlin\", \"arrayOfNulls\")\n val arrayElementType = getTypeArgument(0)!!\n val size = getValueArgument(0)\n buildCall(\n target = arrayFactorySymbol,\n type = type,\n origin = IrStatementOrigin.INVOKE,\n typeArguments = listOf(arrayElementType),\n valueArguments = listOf(size)\n )\n }\n else -> throw IllegalStateException(\"Illegal type of atomic array initializer\")", - "repo_full_name": "JetBrains/kotlin", - "discussion_comments": [ - { - "comment_id": "429178775", - "repo_full_name": "JetBrains/kotlin", - "pr_number": 3415, - "pr_file": "plugins/atomicfu/atomicfu-compiler/src/org.jetbrains.kotlinx.atomicfu.compiler/extensions/AtomicFUTransformerJsIr.kt", - "discussion_id": "429178775", - "commented_code": "@@ -0,0 +1,473 @@\n+/*\n+ * Copyright 2010-2020 JetBrains s.r.o. and Kotlin Programming Language contributors.\n+ * Use of this source code is governed by the Apache 2.0 license that can be found in the license/LICENSE.txt file.\n+ */\n+\n+package org.jetbrains.kotlinx.atomicfu.compiler.extensions\n+\n+import org.jetbrains.kotlin.backend.common.deepCopyWithVariables\n+import org.jetbrains.kotlin.backend.common.extensions.IrPluginContext\n+import org.jetbrains.kotlin.builtins.PrimitiveType\n+import org.jetbrains.kotlin.descriptors.*\n+import org.jetbrains.kotlin.descriptors.impl.FunctionDescriptorImpl\n+import org.jetbrains.kotlin.ir.*\n+import org.jetbrains.kotlin.ir.backend.js.ir.JsIrBuilder.buildValueParameter\n+import org.jetbrains.kotlin.ir.backend.js.ir.JsIrBuilder.buildFunction\n+import org.jetbrains.kotlin.ir.backend.js.ir.JsIrBuilder.buildBlockBody\n+import org.jetbrains.kotlin.ir.declarations.*\n+import org.jetbrains.kotlin.ir.declarations.impl.*\n+import org.jetbrains.kotlin.ir.expressions.*\n+import org.jetbrains.kotlin.ir.expressions.impl.*\n+import org.jetbrains.kotlin.ir.symbols.IrSimpleFunctionSymbol\n+import org.jetbrains.kotlin.ir.symbols.IrSymbol\n+import org.jetbrains.kotlin.ir.symbols.impl.*\n+import org.jetbrains.kotlin.ir.types.*\n+import org.jetbrains.kotlin.ir.util.*\n+import org.jetbrains.kotlin.ir.visitors.IrElementTransformerVoid\n+import java.lang.IllegalStateException\n+\n+private const val AFU_PKG = \"kotlinx/atomicfu\"\n+private const val LOCKS = \"locks\"\n+private const val ATOMIC_CONSTRUCTOR = \"atomic\"\n+private const val ATOMICFU_VALUE_TYPE = \"\"\"Atomic(Int|Long|Boolean|Ref)\"\"\"\n+private const val ATOMIC_ARRAY_TYPE = \"\"\"Atomic(Int|Long|Boolean|)Array\"\"\"\n+private const val ATOMIC_ARRAY_FACTORY_FUNCTION = \"atomicArrayOfNulls\"\n+private const val ATOMICFU_RUNTIME_FUNCTION_PREDICATE = \"atomicfu_\"\n+private const val REENTRANT_LOCK_TYPE = \"ReentrantLock\"\n+private const val GETTER = \"atomicfu\\$getter\"\n+private const val SETTER = \"atomicfu\\$setter\"\n+private const val GET = \"get\"\n+private const val SET = \"set\"\n+\n+private fun String.prettyStr() = replace('/', '.')\n+\n+class AtomicFUTransformer(override val context: IrPluginContext) : IrElementTransformerVoid(), TransformerHelpers {\n+\n+ private val irBuiltIns = context.irBuiltIns\n+\n+ private val AFU_CLASSES: Map = mapOf(\n+ \"AtomicInt\" to irBuiltIns.intType,\n+ \"AtomicLong\" to irBuiltIns.longType,\n+ \"AtomicRef\" to irBuiltIns.anyType,\n+ \"AtomicBoolean\" to irBuiltIns.booleanType\n+ )\n+\n+ private val AFU_ARRAY_CLASSES: Map = mapOf(\n+ \"AtomicIntArray\" to context.builtIns.getPrimitiveArrayClassDescriptor(PrimitiveType.INT),\n+ \"AtomicLongArray\" to context.builtIns.getPrimitiveArrayClassDescriptor(PrimitiveType.LONG),\n+ \"AtomicBooleanArray\" to context.builtIns.getPrimitiveArrayClassDescriptor(PrimitiveType.BOOLEAN),\n+ \"AtomicArray\" to context.builtIns.array\n+ )\n+\n+ override fun visitFile(irFile: IrFile): IrFile {\n+ irFile.declarations.map { declaration ->\n+ declaration.transformAtomicInlineDeclaration()\n+ }\n+ return super.visitFile(irFile)\n+ }\n+\n+ override fun visitClass(irClass: IrClass): IrStatement {\n+ irClass.declarations.map { declaration ->\n+ declaration.transformAtomicInlineDeclaration()\n+ }\n+ return super.visitClass(irClass)\n+ }\n+\n+ override fun visitProperty(property: IrProperty): IrStatement {\n+ if (property.backingField != null) {\n+ val backingField = property.backingField!!\n+ if (backingField.initializer != null) {\n+ val initializer = backingField.initializer!!.expression.transformAtomicValueInitializer()\n+ property.backingField!!.initializer = IrExpressionBodyImpl(initializer)\n+ }\n+ }\n+ return super.visitProperty(property)\n+ }\n+\n+ override fun visitBlockBody(body: IrBlockBody): IrBody {\n+ body.statements.forEachIndexed { i, stmt ->\n+ if (stmt is IrCall) {\n+ body.statements[i] = stmt.transformAtomicFunctionCall()\n+ }\n+ }\n+ return super.visitBlockBody(body)\n+ }\n+\n+ override fun visitBlock(block: IrBlock): IrExpression {\n+ block.statements.forEachIndexed { i, stmt ->\n+ if (stmt is IrCall) {\n+ block.statements[i] = stmt.transformAtomicFunctionCall()\n+ }\n+ }\n+ return super.visitBlock(block)\n+ }\n+\n+ override fun visitReturn(returnExpr: IrReturn): IrExpression {\n+ returnExpr.value = returnExpr.value.transformAtomicFunctionCall()\n+ return super.visitReturn(returnExpr)\n+ }\n+\n+ override fun visitTypeOperator(typeOp: IrTypeOperatorCall): IrExpression {\n+ typeOp.argument = typeOp.argument.transformAtomicFunctionCall()\n+ return super.visitTypeOperator(typeOp)\n+ }\n+\n+ override fun visitSetVariable(setVar: IrSetVariable): IrExpression {\n+ setVar.value = setVar.value.transformAtomicFunctionCall()\n+ return super.visitSetVariable(setVar)\n+ }\n+\n+ override fun visitSetField(setField: IrSetField): IrExpression {\n+ setField.value = setField.value.transformAtomicFunctionCall()\n+ return super.visitSetField(setField)\n+ }\n+\n+ override fun visitVariable(declaration: IrVariable): IrStatement {\n+ declaration.initializer = declaration.initializer?.transformAtomicFunctionCall()\n+ return super.visitVariable(declaration)\n+ }\n+\n+ override fun visitBranch(branch: IrBranch): IrBranch {\n+ branch.apply {\n+ condition = condition.transformAtomicFunctionCall()\n+ result = result.transformAtomicFunctionCall()\n+ }\n+ return super.visitBranch(branch)\n+ }\n+\n+ override fun visitElseBranch(branch: IrElseBranch): IrElseBranch {\n+ branch.apply {\n+ condition = condition.transformAtomicFunctionCall()\n+ result = result.transformAtomicFunctionCall()\n+ }\n+ return super.visitElseBranch(branch)\n+ }\n+\n+ private fun IrExpression.transformAtomicValueInitializer() =\n+ when {\n+ type.isAtomicValueType() -> getPureTypeValue()\n+ type.isAtomicArrayType() -> buildPureTypeArrayConstructor()\n+ type.isReentrantLockType() -> buildConstNull()\n+ else -> this\n+ }\n+\n+ private fun IrDeclaration.transformAtomicInlineDeclaration() {\n+ if (this is IrFunction &&\n+ isInline &&\n+ extensionReceiverParameter != null &&\n+ extensionReceiverParameter!!.type.isAtomicValueType()\n+ ) {\n+ val type = extensionReceiverParameter!!.type\n+ val valueType = type.atomicToValueType()\n+ val getterType = buildFunctionSimpleType(listOf(irBuiltIns.unitType, valueType))\n+ val setterType = buildFunctionSimpleType(listOf(valueType, irBuiltIns.unitType))\n+ val valueParametersCount = valueParameters.size\n+ val extendedValueParameters = mutableListOf().apply {\n+ addAll(valueParameters)\n+ add(buildValueParameter(GETTER, valueParametersCount, getterType))\n+ add(buildValueParameter(SETTER, valueParametersCount + 1, setterType))\n+ }\n+ this as IrSimpleFunction\n+ (descriptor as FunctionDescriptorImpl).initialize(\n+ null,\n+ descriptor.dispatchReceiverParameter,\n+ descriptor.typeParameters,\n+ extendedValueParameters.map { it.descriptor as ValueParameterDescriptor },\n+ descriptor.returnType,\n+ descriptor.modality,\n+ descriptor.visibility\n+ )\n+ extensionReceiverParameter = null\n+ valueParameters = extendedValueParameters\n+ }\n+ }\n+\n+ private fun IrExpression.getPureTypeValue(): IrExpression {\n+ require(this is IrCall && isAtomicFactoryFunction()) { \"Illegal initializer for the atomic property $this\" }\n+ return getValueArgument(0)!!.transformAtomicFunctionCall()\n+ }\n+\n+ private fun IrExpression.buildPureTypeArrayConstructor() =\n+ when (this) {\n+ is IrConstructorCall -> {\n+ require(isAtomicArrayConstructor())\n+ val arrayConstructorSymbol = referenceBuiltInConstructor(type.getArrayClassDescriptor()) { it.valueParameters.size == 1 }\n+ val size = getValueArgument(0)\n+ IrConstructorCallImpl(\n+ UNDEFINED_OFFSET, UNDEFINED_OFFSET,\n+ irBuiltIns.unitType, arrayConstructorSymbol,\n+ 0, 0, 1\n+ ).apply {\n+ putValueArgument(0, size)\n+ }\n+ }\n+ is IrCall -> {\n+ require(isAtomicArrayFactoryFunction()) { \"Unsupported atomic array factory function $this\" }\n+ val arrayFactorySymbol = referencePackageFunction(\"kotlin\", \"arrayOfNulls\")\n+ val arrayElementType = getTypeArgument(0)!!\n+ val size = getValueArgument(0)\n+ buildCall(\n+ target = arrayFactorySymbol,\n+ type = type,\n+ origin = IrStatementOrigin.INVOKE,\n+ typeArguments = listOf(arrayElementType),\n+ valueArguments = listOf(size)\n+ )\n+ }\n+ else -> throw IllegalStateException(\"Illegal type of atomic array initializer\")", - "comment_created_at": "2020-05-22T10:52:48+00:00", - "comment_author": "romanart", - "comment_body": "Use kotlin equivalent `error(..)` instead", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/kotlin-standardize-build-configurations.json b/_reviewers/kotlin-standardize-build-configurations.json new file mode 100644 index 0000000..625dec3 --- /dev/null +++ b/_reviewers/kotlin-standardize-build-configurations.json @@ -0,0 +1,80 @@ +[ + { + "discussion_id": "1094239825", + "pr_number": 5088, + "pr_file": "libraries/stdlib/jvm/build.gradle", + "created_at": "2023-02-02T09:22:04+00:00", + "commented_code": "testApi project(':kotlin-test:kotlin-test-junit')\n\n builtins project(':core:builtins')\n\n testImplementation(\"com.google.code.gson:gson:2.8.9\")", + "repo_full_name": "JetBrains/kotlin", + "discussion_comments": [ + { + "comment_id": "1094239825", + "repo_full_name": "JetBrains/kotlin", + "pr_number": 5088, + "pr_file": "libraries/stdlib/jvm/build.gradle", + "discussion_id": "1094239825", + "commented_code": "@@ -79,6 +79,8 @@ dependencies {\n testApi project(':kotlin-test:kotlin-test-junit')\n \n builtins project(':core:builtins')\n+\n+ testImplementation(\"com.google.code.gson:gson:2.8.9\")", + "comment_created_at": "2023-02-02T09:22:04+00:00", + "comment_author": "Badya", + "comment_body": "In order to have the same version of dependency across all the modules, it is better to declare it this way:\r\n```\r\ntestImplementation(commonDependency(\"com.google.code.gson:gson\"))\r\n```", + "pr_file_module": null + }, + { + "comment_id": "1094461867", + "repo_full_name": "JetBrains/kotlin", + "pr_number": 5088, + "pr_file": "libraries/stdlib/jvm/build.gradle", + "discussion_id": "1094239825", + "commented_code": "@@ -79,6 +79,8 @@ dependencies {\n testApi project(':kotlin-test:kotlin-test-junit')\n \n builtins project(':core:builtins')\n+\n+ testImplementation(\"com.google.code.gson:gson:2.8.9\")", + "comment_created_at": "2023-02-02T12:33:13+00:00", + "comment_author": "nikita-nazarov", + "comment_body": "Thanks for the comment! Unfortunately `libraries/stdlib/jvm/build.gradle` is a groovy script and you can't use the `commonDependency(...)` function there (as it is defined in `DependenciesKt#commonDependency` and probably can't be directly imported). What would be the best way to approach this issue?", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "441420936", + "pr_number": 3415, + "pr_file": "libraries/tools/atomicfu/build.gradle", + "created_at": "2020-06-17T09:44:55+00:00", + "commented_code": "repositories {\n mavenCentral()\n}\n\napply plugin: 'kotlin'\napply plugin: 'jps-compatible'\n\nconfigurePublishing(project)\n\npill {\n variant = 'FULL'\n}\n\ncompileJava {", + "repo_full_name": "JetBrains/kotlin", + "discussion_comments": [ + { + "comment_id": "441420936", + "repo_full_name": "JetBrains/kotlin", + "pr_number": 3415, + "pr_file": "libraries/tools/atomicfu/build.gradle", + "discussion_id": "441420936", + "commented_code": "@@ -0,0 +1,45 @@\n+repositories {\n+ mavenCentral()\n+}\n+\n+apply plugin: 'kotlin'\n+apply plugin: 'jps-compatible'\n+\n+configurePublishing(project)\n+\n+pill {\n+ variant = 'FULL'\n+}\n+\n+compileJava {", + "comment_created_at": "2020-06-17T09:44:55+00:00", + "comment_author": "4u7", + "comment_body": "Looks like this code duplicates configuration done for all project in the root project\r\nhttps://github.com/JetBrains/kotlin/blob/master/build.gradle.kts#L948", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "441421656", + "pr_number": 3415, + "pr_file": "libraries/tools/atomicfu/build.gradle", + "created_at": "2020-06-17T09:45:58+00:00", + "commented_code": "repositories {", + "repo_full_name": "JetBrains/kotlin", + "discussion_comments": [ + { + "comment_id": "441421656", + "repo_full_name": "JetBrains/kotlin", + "pr_number": 3415, + "pr_file": "libraries/tools/atomicfu/build.gradle", + "discussion_id": "441421656", + "commented_code": "@@ -0,0 +1,45 @@\n+repositories {", + "comment_created_at": "2020-06-17T09:45:58+00:00", + "comment_author": "4u7", + "comment_body": "Please prefer adding new modules with `.gradle.kts` scripts, it's not an issue tho", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/kotlin-standardize-build-configurations.md b/_reviewers/kotlin-standardize-build-configurations.md index 2206b2b..5a3c5ad 100644 --- a/_reviewers/kotlin-standardize-build-configurations.md +++ b/_reviewers/kotlin-standardize-build-configurations.md @@ -31,85 +31,3 @@ Maintain consistent and standardized build configurations across the project to 3. **Prefer Kotlin DSL** - For new modules, use `.gradle.kts` (Kotlin) scripts rather than `.gradle` (Groovy) scripts to benefit from type safety, better IDE support, and consistency with the broader codebase. By following these practices, you'll reduce configuration drift, make updates more manageable, and maintain a more consistent build environment. - - -[ - { - "discussion_id": "1094239825", - "pr_number": 5088, - "pr_file": "libraries/stdlib/jvm/build.gradle", - "created_at": "2023-02-02T09:22:04+00:00", - "commented_code": "testApi project(':kotlin-test:kotlin-test-junit')\n\n builtins project(':core:builtins')\n\n testImplementation(\"com.google.code.gson:gson:2.8.9\")", - "repo_full_name": "JetBrains/kotlin", - "discussion_comments": [ - { - "comment_id": "1094239825", - "repo_full_name": "JetBrains/kotlin", - "pr_number": 5088, - "pr_file": "libraries/stdlib/jvm/build.gradle", - "discussion_id": "1094239825", - "commented_code": "@@ -79,6 +79,8 @@ dependencies {\n testApi project(':kotlin-test:kotlin-test-junit')\n \n builtins project(':core:builtins')\n+\n+ testImplementation(\"com.google.code.gson:gson:2.8.9\")", - "comment_created_at": "2023-02-02T09:22:04+00:00", - "comment_author": "Badya", - "comment_body": "In order to have the same version of dependency across all the modules, it is better to declare it this way:\r\n```\r\ntestImplementation(commonDependency(\"com.google.code.gson:gson\"))\r\n```", - "pr_file_module": null - }, - { - "comment_id": "1094461867", - "repo_full_name": "JetBrains/kotlin", - "pr_number": 5088, - "pr_file": "libraries/stdlib/jvm/build.gradle", - "discussion_id": "1094239825", - "commented_code": "@@ -79,6 +79,8 @@ dependencies {\n testApi project(':kotlin-test:kotlin-test-junit')\n \n builtins project(':core:builtins')\n+\n+ testImplementation(\"com.google.code.gson:gson:2.8.9\")", - "comment_created_at": "2023-02-02T12:33:13+00:00", - "comment_author": "nikita-nazarov", - "comment_body": "Thanks for the comment! Unfortunately `libraries/stdlib/jvm/build.gradle` is a groovy script and you can't use the `commonDependency(...)` function there (as it is defined in `DependenciesKt#commonDependency` and probably can't be directly imported). What would be the best way to approach this issue?", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "441420936", - "pr_number": 3415, - "pr_file": "libraries/tools/atomicfu/build.gradle", - "created_at": "2020-06-17T09:44:55+00:00", - "commented_code": "repositories {\n mavenCentral()\n}\n\napply plugin: 'kotlin'\napply plugin: 'jps-compatible'\n\nconfigurePublishing(project)\n\npill {\n variant = 'FULL'\n}\n\ncompileJava {", - "repo_full_name": "JetBrains/kotlin", - "discussion_comments": [ - { - "comment_id": "441420936", - "repo_full_name": "JetBrains/kotlin", - "pr_number": 3415, - "pr_file": "libraries/tools/atomicfu/build.gradle", - "discussion_id": "441420936", - "commented_code": "@@ -0,0 +1,45 @@\n+repositories {\n+ mavenCentral()\n+}\n+\n+apply plugin: 'kotlin'\n+apply plugin: 'jps-compatible'\n+\n+configurePublishing(project)\n+\n+pill {\n+ variant = 'FULL'\n+}\n+\n+compileJava {", - "comment_created_at": "2020-06-17T09:44:55+00:00", - "comment_author": "4u7", - "comment_body": "Looks like this code duplicates configuration done for all project in the root project\r\nhttps://github.com/JetBrains/kotlin/blob/master/build.gradle.kts#L948", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "441421656", - "pr_number": 3415, - "pr_file": "libraries/tools/atomicfu/build.gradle", - "created_at": "2020-06-17T09:45:58+00:00", - "commented_code": "repositories {", - "repo_full_name": "JetBrains/kotlin", - "discussion_comments": [ - { - "comment_id": "441421656", - "repo_full_name": "JetBrains/kotlin", - "pr_number": 3415, - "pr_file": "libraries/tools/atomicfu/build.gradle", - "discussion_id": "441421656", - "commented_code": "@@ -0,0 +1,45 @@\n+repositories {", - "comment_created_at": "2020-06-17T09:45:58+00:00", - "comment_author": "4u7", - "comment_body": "Please prefer adding new modules with `.gradle.kts` scripts, it's not an issue tho", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/kotlin-test-edge-cases.json b/_reviewers/kotlin-test-edge-cases.json new file mode 100644 index 0000000..1278e83 --- /dev/null +++ b/_reviewers/kotlin-test-edge-cases.json @@ -0,0 +1,264 @@ +[ + { + "discussion_id": "1987917791", + "pr_number": 5411, + "pr_file": "libraries/stdlib/samples/test/samples/text/strings.kt", + "created_at": "2025-03-10T19:44:10+00:00", + "commented_code": "assertPrints(matchDetails(inputString, toFind, 10), \"Searching for 'ever' in 'Never ever give up' starting at position 10: Not found\")\n }\n\n @Sample\n fun lastIndexOf() {\n fun matchDetails(inputString: String, whatToFind: String, startIndex: Int = 0): String {\n val matchIndex = inputString.lastIndexOf(whatToFind, startIndex)\n return \"Searching for '$whatToFind' in '$inputString' starting at position $startIndex: \" +\n if (matchIndex >= 0) \"Found at $matchIndex\" else \"Not found\"\n }\n\n val inputString = \"Never ever give up\"\n val toFind = \"ever\"\n\n assertPrints(matchDetails(inputString, toFind), \"Searching for 'ever' in 'Never ever give up' starting at position 0: Found at 6\")\n assertPrints(matchDetails(inputString, toFind, 10), \"Searching for 'ever' in 'Never ever give up' starting at position 10: Not found\")\n }\n\n @Sample\n fun contains() {\n val string = \"Kotlin 1.4.0\"\n assertTrue(string.contains(\"K\"))\n assertFalse(string.contains(\"k\"))\n\n assertTrue(string.contains(\"1.4\"))\n assertFalse(string.contains(\"Z\"))", + "repo_full_name": "JetBrains/kotlin", + "discussion_comments": [ + { + "comment_id": "1987917791", + "repo_full_name": "JetBrains/kotlin", + "pr_number": 5411, + "pr_file": "libraries/stdlib/samples/test/samples/text/strings.kt", + "discussion_id": "1987917791", + "commented_code": "@@ -444,6 +444,33 @@ class Strings {\n assertPrints(matchDetails(inputString, toFind, 10), \"Searching for 'ever' in 'Never ever give up' starting at position 10: Not found\")\n }\n \n+ @Sample\n+ fun lastIndexOf() {\n+ fun matchDetails(inputString: String, whatToFind: String, startIndex: Int = 0): String {\n+ val matchIndex = inputString.lastIndexOf(whatToFind, startIndex)\n+ return \"Searching for '$whatToFind' in '$inputString' starting at position $startIndex: \" +\n+ if (matchIndex >= 0) \"Found at $matchIndex\" else \"Not found\"\n+ }\n+\n+ val inputString = \"Never ever give up\"\n+ val toFind = \"ever\"\n+\n+ assertPrints(matchDetails(inputString, toFind), \"Searching for 'ever' in 'Never ever give up' starting at position 0: Found at 6\")\n+ assertPrints(matchDetails(inputString, toFind, 10), \"Searching for 'ever' in 'Never ever give up' starting at position 10: Not found\")\n+ }\n+\n+ @Sample\n+ fun contains() {\n+ val string = \"Kotlin 1.4.0\"\n+ assertTrue(string.contains(\"K\"))\n+ assertFalse(string.contains(\"k\"))\n+\n+ assertTrue(string.contains(\"1.4\"))\n+ assertFalse(string.contains(\"Z\"))", + "comment_created_at": "2025-03-10T19:44:10+00:00", + "comment_author": "fzhinkin", + "comment_body": "I don't think these two examples adds a lot of value, two examples in the beginning of the sample shows the same: `contains` return true when a string contains a corresponding sub-sequences, and false otherwise.\r\n\r\nInstead, it would be nice to show how `contains` handles edge-cases:\r\n- when the receiver is not empty, but the parameter is empty;\r\n- when both the receiver and the parameter are the same char sequence (i.e., \"Kotlin\" in \"Kotlin\")\r\n- when both the receiver and the parameter are empty strings;\r\n- when the receiver is a sub-sequence of the parameter (i.e. \"Kotlin 2.2.0\" in \"Kotlin\").", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "451430040", + "pr_number": 3516, + "pr_file": "libraries/scripting/type-providers/src/kotlin/script/experimental/typeProviders/generatedCode/GeneratedCode.kt", + "created_at": "2020-07-08T10:05:21+00:00", + "commented_code": "/*\n * Copyright 2010-2020 JetBrains s.r.o. and Kotlin Programming Language contributors.\n * Use of this source code is governed by the Apache 2.0 license that can be found in the license/LICENSE.txt file.\n */\n\npackage kotlin.script.experimental.typeProviders.generatedCode\n\nimport kotlin.reflect.KClass\nimport kotlin.script.experimental.typeProviders.generatedCode.impl.*\nimport kotlin.script.experimental.typeProviders.generatedCode.impl.Object\nimport kotlin.script.experimental.typeProviders.generatedCode.impl.writeJoined\nimport kotlin.script.experimental.typeProviders.generatedCode.internal.Generated\nimport kotlin.script.experimental.typeProviders.generatedCode.internal.visitor.GeneratedCodeVisitor\nimport kotlin.script.experimental.typeProviders.generatedCode.internal.InternalGeneratedCode\nimport kotlin.script.experimental.typeProviders.generatedCode.internal.visit\nimport kotlin.script.experimental.typeProviders.generatedCode.internal.visitor.captureImports\nimport kotlin.script.experimental.typeProviders.generatedCode.internal.write\nimport kotlin.text.StringBuilder\n\n/**\n * Code returned by a Type Provider.\n * [GeneratedCode] is a recursive construction that allows you to build generated code as a combination of multiple types already provided\n *\n * Example:\n *\n * ```kotlin\n * data class MyObject(val name: String, val value: Int) : GeneratedCode {\n * fun body(): GeneratedCode {\n * return GeneratedCode.composite {\n * +makeDataClass(name)\n * .withInterface(MyInterface::class)\n *\n * +makeConstant(\"$nameConstant\", value)\n * }\n * }\n * }\n * ```\n *\n * [GeneratedCode] can contain any of the following:\n * - Data classes. See [makeDataClass].\n * - Objects. See [makeObject].\n * - Constants. See [makeConstant].\n * - Packages. See [makePackage].\n * - Extension functions. See [makeExtensionMethod].\n * - etc.\n */\ninterface GeneratedCode {\n /**\n * Generate the code that corresponds to your type\n *\n * @return An instance of [GeneratedCode].\n * Important: don't return an infinite recursion in the form of return `this`\n */\n fun body(): GeneratedCode\n\n /**\n * A builder for a composite of multiple types\n */\n class Builder internal constructor() {\n private val mutableCodeList = mutableListOf()\n\n /**\n * Include code in your compound\n */\n operator fun GeneratedCode.unaryPlus() {\n if (this is CompoundGeneratedCode) {\n mutableCodeList.addAll(codeList)\n } else {\n mutableCodeList.add(this)\n }\n }\n\n operator fun String.unaryPlus() {\n mutableCodeList.add(inlineCode(this))\n }\n\n internal fun build(): GeneratedCode {\n return composite(mutableCodeList)\n }\n }\n\n object Empty : GeneratedCode by EmptyCode()\n\n companion object {\n /**\n * Create a composition of multiple instances of Generated Code\n */\n fun composite(init: Builder.() -> Unit) = Builder().apply(init).build()\n\n /**\n * Create a composition of multiple instances of Generated Code\n */\n fun composite(iterable: Collection): GeneratedCode = if (iterable.count() == 1)\n iterable.first()\n else\n CompoundGeneratedCode(iterable.toList())\n\n /**\n * Create a composition of multiple instances of Generated Code\n */\n fun composite(vararg code: GeneratedCode) = composite(code.toList())\n }\n}\n\nprivate data class CompoundGeneratedCode(val codeList: List) : InternalGeneratedCode() {\n override fun StringBuilder.write(indent: Int) {\n writeJoined(codeList, \"\\n\\n\", indent)\n }\n\n override fun GeneratedCodeVisitor.visit() {\n for (code in codeList) {\n visit(code)\n }\n }\n}\n\n/**\n * References a member that can be used as a type\n */\ninterface IdentifiableMember {\n val name: String\n fun imports(): Set = emptySet()\n\n companion object {\n private data class KClassIdentifiable(val kClass: KClass<*>) : IdentifiableMember {\n override val name: String = kClass.simpleName ?: kClass.qualifiedName!!\n override fun imports() = setOfNotNull(kClass.qualifiedName)\n }\n\n private data class NamedIdentifiable(override val name: String) : IdentifiableMember\n\n operator fun invoke(kClass: KClass<*>): IdentifiableMember = KClassIdentifiable(kClass)\n operator fun invoke(name: String): IdentifiableMember = NamedIdentifiable(name)\n }\n}\n\n/**\n * References an interface that can be implemented\n */\ninterface GeneratedInterface : IdentifiableMember\n\nprivate data class CastedInterfaceMember(private val member: IdentifiableMember) : GeneratedInterface, IdentifiableMember by member\n\nfun IdentifiableMember.asInterface(): GeneratedInterface = CastedInterfaceMember(this)\n\n/**\n * Generated code that is a type that can be used as an Argument\n */\ninterface IdentifiableCode : GeneratedCode, IdentifiableMember\n\n/**\n * Builder for a provided type whose JVM Name can be set\n */\ninterface JVMNameableBuilder {\n fun jvmName(name: String)\n}\n\n/**\n * Builder for a provided type. It can implement an interface and have some code inside.\n */\ninterface GeneratedTypeBuilder : IdentifiableMember {\n /**\n * Implement a generated interface\n */\n fun implement(generated: GeneratedInterface)\n\n /**\n * Add code to the namespace of the Type\n */\n fun inner(code: GeneratedCode)\n}\n\n/**\n * Builder for a provided data class. It can implement interfaces, have code inside and have a number of properties\n */\ninterface DataClassBuilder : JVMNameableBuilder, GeneratedTypeBuilder {\n fun property(name: String, type: IdentifiableMember, default: GeneratedCode? = null)\n}\n\n/**\n * Builder for a provided object. It can implement interfaces, have code inside and have a number of properties.\n */\ninterface ObjectBuilder : JVMNameableBuilder, GeneratedTypeBuilder\n\n/**\n * Add multiple instances of generated code inside of the type\n */\nfun GeneratedTypeBuilder.inner(init: GeneratedCode.Builder.() -> Unit) {\n inner(GeneratedCode.composite(init))\n}\n\n/**\n * Implement the interface.\n */\nfun GeneratedTypeBuilder.implement(kClass: KClass<*>) {\n require(kClass.java.isInterface) { \"Provided type is not an interface\" }\n implement(IdentifiableMember(kClass).asInterface())\n}\n\n/**\n * Add a companion object to the generated type\n */\nfun GeneratedTypeBuilder.companionObject(name: String? = null, init: ObjectBuilder.() -> Unit) {", + "repo_full_name": "JetBrains/kotlin", + "discussion_comments": [ + { + "comment_id": "451430040", + "repo_full_name": "JetBrains/kotlin", + "pr_number": 3516, + "pr_file": "libraries/scripting/type-providers/src/kotlin/script/experimental/typeProviders/generatedCode/GeneratedCode.kt", + "discussion_id": "451430040", + "commented_code": "@@ -0,0 +1,302 @@\n+/*\n+ * Copyright 2010-2020 JetBrains s.r.o. and Kotlin Programming Language contributors.\n+ * Use of this source code is governed by the Apache 2.0 license that can be found in the license/LICENSE.txt file.\n+ */\n+\n+package kotlin.script.experimental.typeProviders.generatedCode\n+\n+import kotlin.reflect.KClass\n+import kotlin.script.experimental.typeProviders.generatedCode.impl.*\n+import kotlin.script.experimental.typeProviders.generatedCode.impl.Object\n+import kotlin.script.experimental.typeProviders.generatedCode.impl.writeJoined\n+import kotlin.script.experimental.typeProviders.generatedCode.internal.Generated\n+import kotlin.script.experimental.typeProviders.generatedCode.internal.visitor.GeneratedCodeVisitor\n+import kotlin.script.experimental.typeProviders.generatedCode.internal.InternalGeneratedCode\n+import kotlin.script.experimental.typeProviders.generatedCode.internal.visit\n+import kotlin.script.experimental.typeProviders.generatedCode.internal.visitor.captureImports\n+import kotlin.script.experimental.typeProviders.generatedCode.internal.write\n+import kotlin.text.StringBuilder\n+\n+/**\n+ * Code returned by a Type Provider.\n+ * [GeneratedCode] is a recursive construction that allows you to build generated code as a combination of multiple types already provided\n+ *\n+ * Example:\n+ *\n+ * ```kotlin\n+ * data class MyObject(val name: String, val value: Int) : GeneratedCode {\n+ * fun body(): GeneratedCode {\n+ * return GeneratedCode.composite {\n+ * +makeDataClass(name)\n+ * .withInterface(MyInterface::class)\n+ *\n+ * +makeConstant(\"$nameConstant\", value)\n+ * }\n+ * }\n+ * }\n+ * ```\n+ *\n+ * [GeneratedCode] can contain any of the following:\n+ * - Data classes. See [makeDataClass].\n+ * - Objects. See [makeObject].\n+ * - Constants. See [makeConstant].\n+ * - Packages. See [makePackage].\n+ * - Extension functions. See [makeExtensionMethod].\n+ * - etc.\n+ */\n+interface GeneratedCode {\n+ /**\n+ * Generate the code that corresponds to your type\n+ *\n+ * @return An instance of [GeneratedCode].\n+ * Important: don't return an infinite recursion in the form of return `this`\n+ */\n+ fun body(): GeneratedCode\n+\n+ /**\n+ * A builder for a composite of multiple types\n+ */\n+ class Builder internal constructor() {\n+ private val mutableCodeList = mutableListOf()\n+\n+ /**\n+ * Include code in your compound\n+ */\n+ operator fun GeneratedCode.unaryPlus() {\n+ if (this is CompoundGeneratedCode) {\n+ mutableCodeList.addAll(codeList)\n+ } else {\n+ mutableCodeList.add(this)\n+ }\n+ }\n+\n+ operator fun String.unaryPlus() {\n+ mutableCodeList.add(inlineCode(this))\n+ }\n+\n+ internal fun build(): GeneratedCode {\n+ return composite(mutableCodeList)\n+ }\n+ }\n+\n+ object Empty : GeneratedCode by EmptyCode()\n+\n+ companion object {\n+ /**\n+ * Create a composition of multiple instances of Generated Code\n+ */\n+ fun composite(init: Builder.() -> Unit) = Builder().apply(init).build()\n+\n+ /**\n+ * Create a composition of multiple instances of Generated Code\n+ */\n+ fun composite(iterable: Collection): GeneratedCode = if (iterable.count() == 1)\n+ iterable.first()\n+ else\n+ CompoundGeneratedCode(iterable.toList())\n+\n+ /**\n+ * Create a composition of multiple instances of Generated Code\n+ */\n+ fun composite(vararg code: GeneratedCode) = composite(code.toList())\n+ }\n+}\n+\n+private data class CompoundGeneratedCode(val codeList: List) : InternalGeneratedCode() {\n+ override fun StringBuilder.write(indent: Int) {\n+ writeJoined(codeList, \"\\n\\n\", indent)\n+ }\n+\n+ override fun GeneratedCodeVisitor.visit() {\n+ for (code in codeList) {\n+ visit(code)\n+ }\n+ }\n+}\n+\n+/**\n+ * References a member that can be used as a type\n+ */\n+interface IdentifiableMember {\n+ val name: String\n+ fun imports(): Set = emptySet()\n+\n+ companion object {\n+ private data class KClassIdentifiable(val kClass: KClass<*>) : IdentifiableMember {\n+ override val name: String = kClass.simpleName ?: kClass.qualifiedName!!\n+ override fun imports() = setOfNotNull(kClass.qualifiedName)\n+ }\n+\n+ private data class NamedIdentifiable(override val name: String) : IdentifiableMember\n+\n+ operator fun invoke(kClass: KClass<*>): IdentifiableMember = KClassIdentifiable(kClass)\n+ operator fun invoke(name: String): IdentifiableMember = NamedIdentifiable(name)\n+ }\n+}\n+\n+/**\n+ * References an interface that can be implemented\n+ */\n+interface GeneratedInterface : IdentifiableMember\n+\n+private data class CastedInterfaceMember(private val member: IdentifiableMember) : GeneratedInterface, IdentifiableMember by member\n+\n+fun IdentifiableMember.asInterface(): GeneratedInterface = CastedInterfaceMember(this)\n+\n+/**\n+ * Generated code that is a type that can be used as an Argument\n+ */\n+interface IdentifiableCode : GeneratedCode, IdentifiableMember\n+\n+/**\n+ * Builder for a provided type whose JVM Name can be set\n+ */\n+interface JVMNameableBuilder {\n+ fun jvmName(name: String)\n+}\n+\n+/**\n+ * Builder for a provided type. It can implement an interface and have some code inside.\n+ */\n+interface GeneratedTypeBuilder : IdentifiableMember {\n+ /**\n+ * Implement a generated interface\n+ */\n+ fun implement(generated: GeneratedInterface)\n+\n+ /**\n+ * Add code to the namespace of the Type\n+ */\n+ fun inner(code: GeneratedCode)\n+}\n+\n+/**\n+ * Builder for a provided data class. It can implement interfaces, have code inside and have a number of properties\n+ */\n+interface DataClassBuilder : JVMNameableBuilder, GeneratedTypeBuilder {\n+ fun property(name: String, type: IdentifiableMember, default: GeneratedCode? = null)\n+}\n+\n+/**\n+ * Builder for a provided object. It can implement interfaces, have code inside and have a number of properties.\n+ */\n+interface ObjectBuilder : JVMNameableBuilder, GeneratedTypeBuilder\n+\n+/**\n+ * Add multiple instances of generated code inside of the type\n+ */\n+fun GeneratedTypeBuilder.inner(init: GeneratedCode.Builder.() -> Unit) {\n+ inner(GeneratedCode.composite(init))\n+}\n+\n+/**\n+ * Implement the interface.\n+ */\n+fun GeneratedTypeBuilder.implement(kClass: KClass<*>) {\n+ require(kClass.java.isInterface) { \"Provided type is not an interface\" }\n+ implement(IdentifiableMember(kClass).asInterface())\n+}\n+\n+/**\n+ * Add a companion object to the generated type\n+ */\n+fun GeneratedTypeBuilder.companionObject(name: String? = null, init: ObjectBuilder.() -> Unit) {", + "comment_created_at": "2020-07-08T10:05:21+00:00", + "comment_author": "ligee", + "comment_body": "It would be nice to have tests that use all functions from the DSL, otherwise, they may rot over time and we may discover it too late.", + "pr_file_module": null + }, + { + "comment_id": "480104173", + "repo_full_name": "JetBrains/kotlin", + "pr_number": 3516, + "pr_file": "libraries/scripting/type-providers/src/kotlin/script/experimental/typeProviders/generatedCode/GeneratedCode.kt", + "discussion_id": "451430040", + "commented_code": "@@ -0,0 +1,302 @@\n+/*\n+ * Copyright 2010-2020 JetBrains s.r.o. and Kotlin Programming Language contributors.\n+ * Use of this source code is governed by the Apache 2.0 license that can be found in the license/LICENSE.txt file.\n+ */\n+\n+package kotlin.script.experimental.typeProviders.generatedCode\n+\n+import kotlin.reflect.KClass\n+import kotlin.script.experimental.typeProviders.generatedCode.impl.*\n+import kotlin.script.experimental.typeProviders.generatedCode.impl.Object\n+import kotlin.script.experimental.typeProviders.generatedCode.impl.writeJoined\n+import kotlin.script.experimental.typeProviders.generatedCode.internal.Generated\n+import kotlin.script.experimental.typeProviders.generatedCode.internal.visitor.GeneratedCodeVisitor\n+import kotlin.script.experimental.typeProviders.generatedCode.internal.InternalGeneratedCode\n+import kotlin.script.experimental.typeProviders.generatedCode.internal.visit\n+import kotlin.script.experimental.typeProviders.generatedCode.internal.visitor.captureImports\n+import kotlin.script.experimental.typeProviders.generatedCode.internal.write\n+import kotlin.text.StringBuilder\n+\n+/**\n+ * Code returned by a Type Provider.\n+ * [GeneratedCode] is a recursive construction that allows you to build generated code as a combination of multiple types already provided\n+ *\n+ * Example:\n+ *\n+ * ```kotlin\n+ * data class MyObject(val name: String, val value: Int) : GeneratedCode {\n+ * fun body(): GeneratedCode {\n+ * return GeneratedCode.composite {\n+ * +makeDataClass(name)\n+ * .withInterface(MyInterface::class)\n+ *\n+ * +makeConstant(\"$nameConstant\", value)\n+ * }\n+ * }\n+ * }\n+ * ```\n+ *\n+ * [GeneratedCode] can contain any of the following:\n+ * - Data classes. See [makeDataClass].\n+ * - Objects. See [makeObject].\n+ * - Constants. See [makeConstant].\n+ * - Packages. See [makePackage].\n+ * - Extension functions. See [makeExtensionMethod].\n+ * - etc.\n+ */\n+interface GeneratedCode {\n+ /**\n+ * Generate the code that corresponds to your type\n+ *\n+ * @return An instance of [GeneratedCode].\n+ * Important: don't return an infinite recursion in the form of return `this`\n+ */\n+ fun body(): GeneratedCode\n+\n+ /**\n+ * A builder for a composite of multiple types\n+ */\n+ class Builder internal constructor() {\n+ private val mutableCodeList = mutableListOf()\n+\n+ /**\n+ * Include code in your compound\n+ */\n+ operator fun GeneratedCode.unaryPlus() {\n+ if (this is CompoundGeneratedCode) {\n+ mutableCodeList.addAll(codeList)\n+ } else {\n+ mutableCodeList.add(this)\n+ }\n+ }\n+\n+ operator fun String.unaryPlus() {\n+ mutableCodeList.add(inlineCode(this))\n+ }\n+\n+ internal fun build(): GeneratedCode {\n+ return composite(mutableCodeList)\n+ }\n+ }\n+\n+ object Empty : GeneratedCode by EmptyCode()\n+\n+ companion object {\n+ /**\n+ * Create a composition of multiple instances of Generated Code\n+ */\n+ fun composite(init: Builder.() -> Unit) = Builder().apply(init).build()\n+\n+ /**\n+ * Create a composition of multiple instances of Generated Code\n+ */\n+ fun composite(iterable: Collection): GeneratedCode = if (iterable.count() == 1)\n+ iterable.first()\n+ else\n+ CompoundGeneratedCode(iterable.toList())\n+\n+ /**\n+ * Create a composition of multiple instances of Generated Code\n+ */\n+ fun composite(vararg code: GeneratedCode) = composite(code.toList())\n+ }\n+}\n+\n+private data class CompoundGeneratedCode(val codeList: List) : InternalGeneratedCode() {\n+ override fun StringBuilder.write(indent: Int) {\n+ writeJoined(codeList, \"\\n\\n\", indent)\n+ }\n+\n+ override fun GeneratedCodeVisitor.visit() {\n+ for (code in codeList) {\n+ visit(code)\n+ }\n+ }\n+}\n+\n+/**\n+ * References a member that can be used as a type\n+ */\n+interface IdentifiableMember {\n+ val name: String\n+ fun imports(): Set = emptySet()\n+\n+ companion object {\n+ private data class KClassIdentifiable(val kClass: KClass<*>) : IdentifiableMember {\n+ override val name: String = kClass.simpleName ?: kClass.qualifiedName!!\n+ override fun imports() = setOfNotNull(kClass.qualifiedName)\n+ }\n+\n+ private data class NamedIdentifiable(override val name: String) : IdentifiableMember\n+\n+ operator fun invoke(kClass: KClass<*>): IdentifiableMember = KClassIdentifiable(kClass)\n+ operator fun invoke(name: String): IdentifiableMember = NamedIdentifiable(name)\n+ }\n+}\n+\n+/**\n+ * References an interface that can be implemented\n+ */\n+interface GeneratedInterface : IdentifiableMember\n+\n+private data class CastedInterfaceMember(private val member: IdentifiableMember) : GeneratedInterface, IdentifiableMember by member\n+\n+fun IdentifiableMember.asInterface(): GeneratedInterface = CastedInterfaceMember(this)\n+\n+/**\n+ * Generated code that is a type that can be used as an Argument\n+ */\n+interface IdentifiableCode : GeneratedCode, IdentifiableMember\n+\n+/**\n+ * Builder for a provided type whose JVM Name can be set\n+ */\n+interface JVMNameableBuilder {\n+ fun jvmName(name: String)\n+}\n+\n+/**\n+ * Builder for a provided type. It can implement an interface and have some code inside.\n+ */\n+interface GeneratedTypeBuilder : IdentifiableMember {\n+ /**\n+ * Implement a generated interface\n+ */\n+ fun implement(generated: GeneratedInterface)\n+\n+ /**\n+ * Add code to the namespace of the Type\n+ */\n+ fun inner(code: GeneratedCode)\n+}\n+\n+/**\n+ * Builder for a provided data class. It can implement interfaces, have code inside and have a number of properties\n+ */\n+interface DataClassBuilder : JVMNameableBuilder, GeneratedTypeBuilder {\n+ fun property(name: String, type: IdentifiableMember, default: GeneratedCode? = null)\n+}\n+\n+/**\n+ * Builder for a provided object. It can implement interfaces, have code inside and have a number of properties.\n+ */\n+interface ObjectBuilder : JVMNameableBuilder, GeneratedTypeBuilder\n+\n+/**\n+ * Add multiple instances of generated code inside of the type\n+ */\n+fun GeneratedTypeBuilder.inner(init: GeneratedCode.Builder.() -> Unit) {\n+ inner(GeneratedCode.composite(init))\n+}\n+\n+/**\n+ * Implement the interface.\n+ */\n+fun GeneratedTypeBuilder.implement(kClass: KClass<*>) {\n+ require(kClass.java.isInterface) { \"Provided type is not an interface\" }\n+ implement(IdentifiableMember(kClass).asInterface())\n+}\n+\n+/**\n+ * Add a companion object to the generated type\n+ */\n+fun GeneratedTypeBuilder.companionObject(name: String? = null, init: ObjectBuilder.() -> Unit) {", + "comment_created_at": "2020-08-31T12:43:27+00:00", + "comment_author": "nerdsupremacist", + "comment_body": "Added a bunch of tests. It should be just shy of 70% coverage now", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1881708047", + "pr_number": 5385, + "pr_file": "native/native.tests/testData/codegen/cinterop/kt73565.kt", + "created_at": "2024-12-12T09:32:06+00:00", + "commented_code": "// TARGET_BACKEND: NATIVE\n@file:OptIn(kotlinx.cinterop.ExperimentalForeignApi::class)\n\nimport kotlin.test.*\nimport kotlinx.cinterop.*\n\nfun box(): String {", + "repo_full_name": "JetBrains/kotlin", + "discussion_comments": [ + { + "comment_id": "1881708047", + "repo_full_name": "JetBrains/kotlin", + "pr_number": 5385, + "pr_file": "native/native.tests/testData/codegen/cinterop/kt73565.kt", + "discussion_id": "1881708047", + "commented_code": "@@ -0,0 +1,53 @@\n+// TARGET_BACKEND: NATIVE\n+@file:OptIn(kotlinx.cinterop.ExperimentalForeignApi::class)\n+\n+import kotlin.test.*\n+import kotlinx.cinterop.*\n+\n+fun box(): String {", + "comment_created_at": "2024-12-12T09:32:06+00:00", + "comment_author": "sbogolepov", + "comment_body": "Please add tests for top-level functions as well.", + "pr_file_module": null + }, + { + "comment_id": "1881822457", + "repo_full_name": "JetBrains/kotlin", + "pr_number": 5385, + "pr_file": "native/native.tests/testData/codegen/cinterop/kt73565.kt", + "discussion_id": "1881708047", + "commented_code": "@@ -0,0 +1,53 @@\n+// TARGET_BACKEND: NATIVE\n+@file:OptIn(kotlinx.cinterop.ExperimentalForeignApi::class)\n+\n+import kotlin.test.*\n+import kotlinx.cinterop.*\n+\n+fun box(): String {", + "comment_created_at": "2024-12-12T10:41:58+00:00", + "comment_author": "KitsuneAlex", + "comment_body": "Good point, don't know why i didn't think of that, will add.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1870271493", + "pr_number": 4871, + "pr_file": "libraries/stdlib/samples/test/samples/text/strings.kt", + "created_at": "2024-12-04T20:59:23+00:00", + "commented_code": "assertPrints(textWithoutDigits, \"abcde\")\n }\n\n @Sample\n fun findLast() {\n val text = \"a1b2c3d4e5\"\n\n val lastNumberInText = text.findLast { it.isDigit() }\n val lastUpperCaseInText = text.findLast { it.isUpperCase() }\n\n assertPrints(lastNumberInText, \"5\")\n assertPrints(lastUpperCaseInText, \"null\")\n }", + "repo_full_name": "JetBrains/kotlin", + "discussion_comments": [ + { + "comment_id": "1870271493", + "repo_full_name": "JetBrains/kotlin", + "pr_number": 4871, + "pr_file": "libraries/stdlib/samples/test/samples/text/strings.kt", + "discussion_id": "1870271493", + "commented_code": "@@ -109,6 +109,17 @@ class Strings {\n assertPrints(textWithoutDigits, \"abcde\")\n }\n \n+ @Sample\n+ fun findLast() {\n+ val text = \"a1b2c3d4e5\"\n+\n+ val lastNumberInText = text.findLast { it.isDigit() }\n+ val lastUpperCaseInText = text.findLast { it.isUpperCase() }\n+\n+ assertPrints(lastNumberInText, \"5\")\n+ assertPrints(lastUpperCaseInText, \"null\")\n+ }", + "comment_created_at": "2024-12-04T20:59:23+00:00", + "comment_author": "fzhinkin", + "comment_body": "It would be nice to also showcase how the function works when invoked on an empty string: `assertPrints(\"\".findLast { true }, \"null\")`", + "pr_file_module": null + }, + { + "comment_id": "1874614782", + "repo_full_name": "JetBrains/kotlin", + "pr_number": 4871, + "pr_file": "libraries/stdlib/samples/test/samples/text/strings.kt", + "discussion_id": "1870271493", + "commented_code": "@@ -109,6 +109,17 @@ class Strings {\n assertPrints(textWithoutDigits, \"abcde\")\n }\n \n+ @Sample\n+ fun findLast() {\n+ val text = \"a1b2c3d4e5\"\n+\n+ val lastNumberInText = text.findLast { it.isDigit() }\n+ val lastUpperCaseInText = text.findLast { it.isUpperCase() }\n+\n+ assertPrints(lastNumberInText, \"5\")\n+ assertPrints(lastUpperCaseInText, \"null\")\n+ }", + "comment_created_at": "2024-12-08T05:00:24+00:00", + "comment_author": "simonNozaki", + "comment_body": "Added: https://github.com/JetBrains/kotlin/pull/4871/commits/bce231a8be4dc97e07424b3635b708a11cf25c85\r\n\r\nAnd, as you advised me, I updates all assertions to inline calling.", + "pr_file_module": null + }, + { + "comment_id": "1876523186", + "repo_full_name": "JetBrains/kotlin", + "pr_number": 4871, + "pr_file": "libraries/stdlib/samples/test/samples/text/strings.kt", + "discussion_id": "1870271493", + "commented_code": "@@ -109,6 +109,17 @@ class Strings {\n assertPrints(textWithoutDigits, \"abcde\")\n }\n \n+ @Sample\n+ fun findLast() {\n+ val text = \"a1b2c3d4e5\"\n+\n+ val lastNumberInText = text.findLast { it.isDigit() }\n+ val lastUpperCaseInText = text.findLast { it.isUpperCase() }\n+\n+ assertPrints(lastNumberInText, \"5\")\n+ assertPrints(lastUpperCaseInText, \"null\")\n+ }", + "comment_created_at": "2024-12-09T19:00:50+00:00", + "comment_author": "fzhinkin", + "comment_body": "Thank you!", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1581162687", + "pr_number": 5281, + "pr_file": "kotlin-native/backend.native/tests/runtime/program_name/runtime_program_name.kt", + "created_at": "2024-04-26T15:02:50+00:00", + "commented_code": "@file:OptIn(kotlin.experimental.ExperimentalNativeApi::class)\n\nimport kotlin.test.*\nimport kotlin.native.Platform\n\nfun main(args: Array) {\n // Remove path and extension (.kexe or .exe)\n val programFileName = Platform.programName.substringAfterLast(\"/\").substringBeforeLast(\".\")\n\n assertEquals(\"program_name\", programFileName)\n}", + "repo_full_name": "JetBrains/kotlin", + "discussion_comments": [ + { + "comment_id": "1581162687", + "repo_full_name": "JetBrains/kotlin", + "pr_number": 5281, + "pr_file": "kotlin-native/backend.native/tests/runtime/program_name/runtime_program_name.kt", + "discussion_id": "1581162687", + "commented_code": "@@ -0,0 +1,11 @@\n+@file:OptIn(kotlin.experimental.ExperimentalNativeApi::class)\n+\n+import kotlin.test.*\n+import kotlin.native.Platform\n+\n+fun main(args: Array) {\n+ // Remove path and extension (.kexe or .exe)\n+ val programFileName = Platform.programName.substringAfterLast(\"/\").substringBeforeLast(\".\")\n+\n+ assertEquals(\"program_name\", programFileName)\n+}", + "comment_created_at": "2024-04-26T15:02:50+00:00", + "comment_author": "SvyatoslavScherbina", + "comment_body": "More tests are necessary. For example, the original issue mentions a nice use case:\r\n> Build multi-call binaries. For example, busybox is a single binary that contains many tools, and decides which tool to run based on how the symbolic link is called: https://busybox.net/downloads/BusyBox.html#usage (that's how it can be so small)\r\n\r\nSo checking a case with symbolic link would also be useful.", + "pr_file_module": null + }, + { + "comment_id": "1584883309", + "repo_full_name": "JetBrains/kotlin", + "pr_number": 5281, + "pr_file": "kotlin-native/backend.native/tests/runtime/program_name/runtime_program_name.kt", + "discussion_id": "1581162687", + "commented_code": "@@ -0,0 +1,11 @@\n+@file:OptIn(kotlin.experimental.ExperimentalNativeApi::class)\n+\n+import kotlin.test.*\n+import kotlin.native.Platform\n+\n+fun main(args: Array) {\n+ // Remove path and extension (.kexe or .exe)\n+ val programFileName = Platform.programName.substringAfterLast(\"/\").substringBeforeLast(\".\")\n+\n+ assertEquals(\"program_name\", programFileName)\n+}", + "comment_created_at": "2024-04-30T13:59:47+00:00", + "comment_author": "vonox7", + "comment_body": "I added test for that by calling `execv()` in the C code. This allowed to test even more use-cases (no programName at all), while still covering the use-case of renaming/linking a binary.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1560651387", + "pr_number": 5270, + "pr_file": "native/native.tests/testData/codegen/cinterop/objc/kt65260.kt", + "created_at": "2024-04-11T08:39:21+00:00", + "commented_code": "/*\n * Copyright 2010-2024 JetBrains s.r.o. and Kotlin Programming Language contributors.\n * Use of this source code is governed by the Apache 2.0 license that can be found in the license/LICENSE.txt file.\n */\n// TARGET_BACKEND: NATIVE\n// DISABLE_NATIVE: isAppleTarget=false\n\n// MODULE: cinterop\n// FILE: conversion.def\nlanguage = Objective-C\nheaders = conversion.h\nheaderFilter = conversion.h\n\n// FILE: conversion.h\n#import \n\nNS_ASSUME_NONNULL_BEGIN\n\n__attribute__((objc_runtime_name(\"Foo\")))\n@interface A : NSObject\n@end\n\n__attribute__((objc_runtime_name(\"Bar\")))\n@interface B : A\n@end\n\nNS_ASSUME_NONNULL_END\n\n// FILE: conversion.m\n#import \"conversion.h\"\n\n@implementation A\n@end\n\n@implementation B\n@end\n\n// MODULE: lib(cinterop)\n// FILE: lib.kt\n@file:OptIn(kotlinx.cinterop.ExperimentalForeignApi::class)\nimport conversion.*\nimport platform.darwin.*\nimport kotlinx.cinterop.*\nimport kotlin.test.*\n\nclass ANativeHeir : A() {\n companion object\n}\n\nclass BNativeHeir : B() {\n companion object\n}\n\nfun testExternalObjCMetaClassCast() {\n try {\n BNativeHeir as A.Companion\n } catch (e: Exception) {\n assertTrue(e is ClassCastException)\n }", + "repo_full_name": "JetBrains/kotlin", + "discussion_comments": [ + { + "comment_id": "1560651387", + "repo_full_name": "JetBrains/kotlin", + "pr_number": 5270, + "pr_file": "native/native.tests/testData/codegen/cinterop/objc/kt65260.kt", + "discussion_id": "1560651387", + "commented_code": "@@ -0,0 +1,138 @@\n+/*\n+ * Copyright 2010-2024 JetBrains s.r.o. and Kotlin Programming Language contributors.\n+ * Use of this source code is governed by the Apache 2.0 license that can be found in the license/LICENSE.txt file.\n+ */\n+// TARGET_BACKEND: NATIVE\n+// DISABLE_NATIVE: isAppleTarget=false\n+\n+// MODULE: cinterop\n+// FILE: conversion.def\n+language = Objective-C\n+headers = conversion.h\n+headerFilter = conversion.h\n+\n+// FILE: conversion.h\n+#import \n+\n+NS_ASSUME_NONNULL_BEGIN\n+\n+__attribute__((objc_runtime_name(\"Foo\")))\n+@interface A : NSObject\n+@end\n+\n+__attribute__((objc_runtime_name(\"Bar\")))\n+@interface B : A\n+@end\n+\n+NS_ASSUME_NONNULL_END\n+\n+// FILE: conversion.m\n+#import \"conversion.h\"\n+\n+@implementation A\n+@end\n+\n+@implementation B\n+@end\n+\n+// MODULE: lib(cinterop)\n+// FILE: lib.kt\n+@file:OptIn(kotlinx.cinterop.ExperimentalForeignApi::class)\n+import conversion.*\n+import platform.darwin.*\n+import kotlinx.cinterop.*\n+import kotlin.test.*\n+\n+class ANativeHeir : A() {\n+ companion object\n+}\n+\n+class BNativeHeir : B() {\n+ companion object\n+}\n+\n+fun testExternalObjCMetaClassCast() {\n+ try {\n+ BNativeHeir as A.Companion\n+ } catch (e: Exception) {\n+ assertTrue(e is ClassCastException)\n+ }", + "comment_created_at": "2024-04-11T08:39:21+00:00", + "comment_author": "SvyatoslavScherbina", + "comment_body": "This test doesn't actually check that the cast fails. It checks that *if it fails, then it fails with ClassCastException*. In other words, if the cast doesn't fail, the test will still pass.", + "pr_file_module": null + }, + { + "comment_id": "1561088794", + "repo_full_name": "JetBrains/kotlin", + "pr_number": 5270, + "pr_file": "native/native.tests/testData/codegen/cinterop/objc/kt65260.kt", + "discussion_id": "1560651387", + "commented_code": "@@ -0,0 +1,138 @@\n+/*\n+ * Copyright 2010-2024 JetBrains s.r.o. and Kotlin Programming Language contributors.\n+ * Use of this source code is governed by the Apache 2.0 license that can be found in the license/LICENSE.txt file.\n+ */\n+// TARGET_BACKEND: NATIVE\n+// DISABLE_NATIVE: isAppleTarget=false\n+\n+// MODULE: cinterop\n+// FILE: conversion.def\n+language = Objective-C\n+headers = conversion.h\n+headerFilter = conversion.h\n+\n+// FILE: conversion.h\n+#import \n+\n+NS_ASSUME_NONNULL_BEGIN\n+\n+__attribute__((objc_runtime_name(\"Foo\")))\n+@interface A : NSObject\n+@end\n+\n+__attribute__((objc_runtime_name(\"Bar\")))\n+@interface B : A\n+@end\n+\n+NS_ASSUME_NONNULL_END\n+\n+// FILE: conversion.m\n+#import \"conversion.h\"\n+\n+@implementation A\n+@end\n+\n+@implementation B\n+@end\n+\n+// MODULE: lib(cinterop)\n+// FILE: lib.kt\n+@file:OptIn(kotlinx.cinterop.ExperimentalForeignApi::class)\n+import conversion.*\n+import platform.darwin.*\n+import kotlinx.cinterop.*\n+import kotlin.test.*\n+\n+class ANativeHeir : A() {\n+ companion object\n+}\n+\n+class BNativeHeir : B() {\n+ companion object\n+}\n+\n+fun testExternalObjCMetaClassCast() {\n+ try {\n+ BNativeHeir as A.Companion\n+ } catch (e: Exception) {\n+ assertTrue(e is ClassCastException)\n+ }", + "comment_created_at": "2024-04-11T14:09:39+00:00", + "comment_author": "edisongz", + "comment_body": "Actually the cast will always fail, because `BNativeHeir` isn't equal to `A`,\r\n`try-catch` is for test infra runner execute normally.\r\n", + "pr_file_module": null + }, + { + "comment_id": "1608507573", + "repo_full_name": "JetBrains/kotlin", + "pr_number": 5270, + "pr_file": "native/native.tests/testData/codegen/cinterop/objc/kt65260.kt", + "discussion_id": "1560651387", + "commented_code": "@@ -0,0 +1,138 @@\n+/*\n+ * Copyright 2010-2024 JetBrains s.r.o. and Kotlin Programming Language contributors.\n+ * Use of this source code is governed by the Apache 2.0 license that can be found in the license/LICENSE.txt file.\n+ */\n+// TARGET_BACKEND: NATIVE\n+// DISABLE_NATIVE: isAppleTarget=false\n+\n+// MODULE: cinterop\n+// FILE: conversion.def\n+language = Objective-C\n+headers = conversion.h\n+headerFilter = conversion.h\n+\n+// FILE: conversion.h\n+#import \n+\n+NS_ASSUME_NONNULL_BEGIN\n+\n+__attribute__((objc_runtime_name(\"Foo\")))\n+@interface A : NSObject\n+@end\n+\n+__attribute__((objc_runtime_name(\"Bar\")))\n+@interface B : A\n+@end\n+\n+NS_ASSUME_NONNULL_END\n+\n+// FILE: conversion.m\n+#import \"conversion.h\"\n+\n+@implementation A\n+@end\n+\n+@implementation B\n+@end\n+\n+// MODULE: lib(cinterop)\n+// FILE: lib.kt\n+@file:OptIn(kotlinx.cinterop.ExperimentalForeignApi::class)\n+import conversion.*\n+import platform.darwin.*\n+import kotlinx.cinterop.*\n+import kotlin.test.*\n+\n+class ANativeHeir : A() {\n+ companion object\n+}\n+\n+class BNativeHeir : B() {\n+ companion object\n+}\n+\n+fun testExternalObjCMetaClassCast() {\n+ try {\n+ BNativeHeir as A.Companion\n+ } catch (e: Exception) {\n+ assertTrue(e is ClassCastException)\n+ }", + "comment_created_at": "2024-05-21T15:12:30+00:00", + "comment_author": "SvyatoslavScherbina", + "comment_body": "The purpose of a test is to anticipate and prevent bugs. If the test is inteded to check that the cast fails, then it does that wrong, see my first comment.\r\n\r\nIn other words, consider there is a bug (e.g. a regression) in the compiler, and the cast no longer fails. Well, the test won't report this then.\r\n\r\nPlease either use `assertFailsWith`, or check `BNativeHeir is A.Companion` instead. \r\n\r\nSame in many places below.", + "pr_file_module": null + }, + { + "comment_id": "1608735483", + "repo_full_name": "JetBrains/kotlin", + "pr_number": 5270, + "pr_file": "native/native.tests/testData/codegen/cinterop/objc/kt65260.kt", + "discussion_id": "1560651387", + "commented_code": "@@ -0,0 +1,138 @@\n+/*\n+ * Copyright 2010-2024 JetBrains s.r.o. and Kotlin Programming Language contributors.\n+ * Use of this source code is governed by the Apache 2.0 license that can be found in the license/LICENSE.txt file.\n+ */\n+// TARGET_BACKEND: NATIVE\n+// DISABLE_NATIVE: isAppleTarget=false\n+\n+// MODULE: cinterop\n+// FILE: conversion.def\n+language = Objective-C\n+headers = conversion.h\n+headerFilter = conversion.h\n+\n+// FILE: conversion.h\n+#import \n+\n+NS_ASSUME_NONNULL_BEGIN\n+\n+__attribute__((objc_runtime_name(\"Foo\")))\n+@interface A : NSObject\n+@end\n+\n+__attribute__((objc_runtime_name(\"Bar\")))\n+@interface B : A\n+@end\n+\n+NS_ASSUME_NONNULL_END\n+\n+// FILE: conversion.m\n+#import \"conversion.h\"\n+\n+@implementation A\n+@end\n+\n+@implementation B\n+@end\n+\n+// MODULE: lib(cinterop)\n+// FILE: lib.kt\n+@file:OptIn(kotlinx.cinterop.ExperimentalForeignApi::class)\n+import conversion.*\n+import platform.darwin.*\n+import kotlinx.cinterop.*\n+import kotlin.test.*\n+\n+class ANativeHeir : A() {\n+ companion object\n+}\n+\n+class BNativeHeir : B() {\n+ companion object\n+}\n+\n+fun testExternalObjCMetaClassCast() {\n+ try {\n+ BNativeHeir as A.Companion\n+ } catch (e: Exception) {\n+ assertTrue(e is ClassCastException)\n+ }", + "comment_created_at": "2024-05-21T18:06:36+00:00", + "comment_author": "edisongz", + "comment_body": "Use `assertFailsWith` instead.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "964724921", + "pr_number": 4933, + "pr_file": "plugins/value-container-assignment/testData/diagnostics/incorrectUsageTest.kt", + "created_at": "2022-09-07T11:25:20+00:00", + "commented_code": "// !DIAGNOSTICS: -UNUSED_PARAMETER,-UNUSED_VARIABLE, -ASSIGNED_BUT_NEVER_ACCESSED_VARIABLE\n// !RENDER_DIAGNOSTICS_FULL_TEXT\n\nannotation class ValueContainer\n\n@ValueContainer\ndata class StringProperty(var v: String) {\n fun assign(v: String) {\n this.v = v\n }\n fun assign(v: StringProperty) {\n this.v = v.get()\n }\n fun get(): String = v\n}\n\ndata class Task(val input: StringProperty)\n\nfun test() {\n run {\n // Should report error if type doesn't match\n val task = Task(StringProperty(\"Fail\"))\n task.input = 42\n }\n\n run {\n // Should report error if type doesn't match with apply\n val task = Task(StringProperty(\"Fail\"))\n task.apply {\n input = 42\n }\n }\n\n run {\n // obj[i] = v should not be translated to obj.get(i).assign(v)\n operator fun Task.get(i: Int) = this.input\n val task = Task(StringProperty(\"Fail\"))\n task[0] = StringProperty(\"Fail\")", + "repo_full_name": "JetBrains/kotlin", + "discussion_comments": [ + { + "comment_id": "964724921", + "repo_full_name": "JetBrains/kotlin", + "pr_number": 4933, + "pr_file": "plugins/value-container-assignment/testData/diagnostics/incorrectUsageTest.kt", + "discussion_id": "964724921", + "commented_code": "@@ -0,0 +1,41 @@\n+// !DIAGNOSTICS: -UNUSED_PARAMETER,-UNUSED_VARIABLE, -ASSIGNED_BUT_NEVER_ACCESSED_VARIABLE\n+// !RENDER_DIAGNOSTICS_FULL_TEXT\n+\n+annotation class ValueContainer\n+\n+@ValueContainer\n+data class StringProperty(var v: String) {\n+ fun assign(v: String) {\n+ this.v = v\n+ }\n+ fun assign(v: StringProperty) {\n+ this.v = v.get()\n+ }\n+ fun get(): String = v\n+}\n+\n+data class Task(val input: StringProperty)\n+\n+fun test() {\n+ run {\n+ // Should report error if type doesn't match\n+ val task = Task(StringProperty(\"Fail\"))\n+ task.input = 42\n+ }\n+\n+ run {\n+ // Should report error if type doesn't match with apply\n+ val task = Task(StringProperty(\"Fail\"))\n+ task.apply {\n+ input = 42\n+ }\n+ }\n+\n+ run {\n+ // obj[i] = v should not be translated to obj.get(i).assign(v)\n+ operator fun Task.get(i: Int) = this.input\n+ val task = Task(StringProperty(\"Fail\"))\n+ task[0] = StringProperty(\"Fail\")", + "comment_created_at": "2022-09-07T11:25:20+00:00", + "comment_author": "demiurg906", + "comment_body": "Please also add tests for other operators with assignment\n- `a.b += c`\n- `a[b] += c`\n- `a?.b[c] = d`\n- `a?.b[c] += d`\n- etc", + "pr_file_module": null + }, + { + "comment_id": "966271356", + "repo_full_name": "JetBrains/kotlin", + "pr_number": 4933, + "pr_file": "plugins/value-container-assignment/testData/diagnostics/incorrectUsageTest.kt", + "discussion_id": "964724921", + "commented_code": "@@ -0,0 +1,41 @@\n+// !DIAGNOSTICS: -UNUSED_PARAMETER,-UNUSED_VARIABLE, -ASSIGNED_BUT_NEVER_ACCESSED_VARIABLE\n+// !RENDER_DIAGNOSTICS_FULL_TEXT\n+\n+annotation class ValueContainer\n+\n+@ValueContainer\n+data class StringProperty(var v: String) {\n+ fun assign(v: String) {\n+ this.v = v\n+ }\n+ fun assign(v: StringProperty) {\n+ this.v = v.get()\n+ }\n+ fun get(): String = v\n+}\n+\n+data class Task(val input: StringProperty)\n+\n+fun test() {\n+ run {\n+ // Should report error if type doesn't match\n+ val task = Task(StringProperty(\"Fail\"))\n+ task.input = 42\n+ }\n+\n+ run {\n+ // Should report error if type doesn't match with apply\n+ val task = Task(StringProperty(\"Fail\"))\n+ task.apply {\n+ input = 42\n+ }\n+ }\n+\n+ run {\n+ // obj[i] = v should not be translated to obj.get(i).assign(v)\n+ operator fun Task.get(i: Int) = this.input\n+ val task = Task(StringProperty(\"Fail\"))\n+ task[0] = StringProperty(\"Fail\")", + "comment_created_at": "2022-09-08T18:06:24+00:00", + "comment_author": "asodja", + "comment_body": "Added `otherOperators.kt` tests for diagnostics and box tests. Let me know if I missed some cases.", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/kotlin-test-edge-cases.md b/_reviewers/kotlin-test-edge-cases.md index 4a590af..3f72f0e 100644 --- a/_reviewers/kotlin-test-edge-cases.md +++ b/_reviewers/kotlin-test-edge-cases.md @@ -54,269 +54,3 @@ assertFailsWith { ``` Comprehensive testing prevents code from "rotting" and ensures reliability across all usage scenarios. - - -[ - { - "discussion_id": "1987917791", - "pr_number": 5411, - "pr_file": "libraries/stdlib/samples/test/samples/text/strings.kt", - "created_at": "2025-03-10T19:44:10+00:00", - "commented_code": "assertPrints(matchDetails(inputString, toFind, 10), \"Searching for 'ever' in 'Never ever give up' starting at position 10: Not found\")\n }\n\n @Sample\n fun lastIndexOf() {\n fun matchDetails(inputString: String, whatToFind: String, startIndex: Int = 0): String {\n val matchIndex = inputString.lastIndexOf(whatToFind, startIndex)\n return \"Searching for '$whatToFind' in '$inputString' starting at position $startIndex: \" +\n if (matchIndex >= 0) \"Found at $matchIndex\" else \"Not found\"\n }\n\n val inputString = \"Never ever give up\"\n val toFind = \"ever\"\n\n assertPrints(matchDetails(inputString, toFind), \"Searching for 'ever' in 'Never ever give up' starting at position 0: Found at 6\")\n assertPrints(matchDetails(inputString, toFind, 10), \"Searching for 'ever' in 'Never ever give up' starting at position 10: Not found\")\n }\n\n @Sample\n fun contains() {\n val string = \"Kotlin 1.4.0\"\n assertTrue(string.contains(\"K\"))\n assertFalse(string.contains(\"k\"))\n\n assertTrue(string.contains(\"1.4\"))\n assertFalse(string.contains(\"Z\"))", - "repo_full_name": "JetBrains/kotlin", - "discussion_comments": [ - { - "comment_id": "1987917791", - "repo_full_name": "JetBrains/kotlin", - "pr_number": 5411, - "pr_file": "libraries/stdlib/samples/test/samples/text/strings.kt", - "discussion_id": "1987917791", - "commented_code": "@@ -444,6 +444,33 @@ class Strings {\n assertPrints(matchDetails(inputString, toFind, 10), \"Searching for 'ever' in 'Never ever give up' starting at position 10: Not found\")\n }\n \n+ @Sample\n+ fun lastIndexOf() {\n+ fun matchDetails(inputString: String, whatToFind: String, startIndex: Int = 0): String {\n+ val matchIndex = inputString.lastIndexOf(whatToFind, startIndex)\n+ return \"Searching for '$whatToFind' in '$inputString' starting at position $startIndex: \" +\n+ if (matchIndex >= 0) \"Found at $matchIndex\" else \"Not found\"\n+ }\n+\n+ val inputString = \"Never ever give up\"\n+ val toFind = \"ever\"\n+\n+ assertPrints(matchDetails(inputString, toFind), \"Searching for 'ever' in 'Never ever give up' starting at position 0: Found at 6\")\n+ assertPrints(matchDetails(inputString, toFind, 10), \"Searching for 'ever' in 'Never ever give up' starting at position 10: Not found\")\n+ }\n+\n+ @Sample\n+ fun contains() {\n+ val string = \"Kotlin 1.4.0\"\n+ assertTrue(string.contains(\"K\"))\n+ assertFalse(string.contains(\"k\"))\n+\n+ assertTrue(string.contains(\"1.4\"))\n+ assertFalse(string.contains(\"Z\"))", - "comment_created_at": "2025-03-10T19:44:10+00:00", - "comment_author": "fzhinkin", - "comment_body": "I don't think these two examples adds a lot of value, two examples in the beginning of the sample shows the same: `contains` return true when a string contains a corresponding sub-sequences, and false otherwise.\r\n\r\nInstead, it would be nice to show how `contains` handles edge-cases:\r\n- when the receiver is not empty, but the parameter is empty;\r\n- when both the receiver and the parameter are the same char sequence (i.e., \"Kotlin\" in \"Kotlin\")\r\n- when both the receiver and the parameter are empty strings;\r\n- when the receiver is a sub-sequence of the parameter (i.e. \"Kotlin 2.2.0\" in \"Kotlin\").", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "451430040", - "pr_number": 3516, - "pr_file": "libraries/scripting/type-providers/src/kotlin/script/experimental/typeProviders/generatedCode/GeneratedCode.kt", - "created_at": "2020-07-08T10:05:21+00:00", - "commented_code": "/*\n * Copyright 2010-2020 JetBrains s.r.o. and Kotlin Programming Language contributors.\n * Use of this source code is governed by the Apache 2.0 license that can be found in the license/LICENSE.txt file.\n */\n\npackage kotlin.script.experimental.typeProviders.generatedCode\n\nimport kotlin.reflect.KClass\nimport kotlin.script.experimental.typeProviders.generatedCode.impl.*\nimport kotlin.script.experimental.typeProviders.generatedCode.impl.Object\nimport kotlin.script.experimental.typeProviders.generatedCode.impl.writeJoined\nimport kotlin.script.experimental.typeProviders.generatedCode.internal.Generated\nimport kotlin.script.experimental.typeProviders.generatedCode.internal.visitor.GeneratedCodeVisitor\nimport kotlin.script.experimental.typeProviders.generatedCode.internal.InternalGeneratedCode\nimport kotlin.script.experimental.typeProviders.generatedCode.internal.visit\nimport kotlin.script.experimental.typeProviders.generatedCode.internal.visitor.captureImports\nimport kotlin.script.experimental.typeProviders.generatedCode.internal.write\nimport kotlin.text.StringBuilder\n\n/**\n * Code returned by a Type Provider.\n * [GeneratedCode] is a recursive construction that allows you to build generated code as a combination of multiple types already provided\n *\n * Example:\n *\n * ```kotlin\n * data class MyObject(val name: String, val value: Int) : GeneratedCode {\n * fun body(): GeneratedCode {\n * return GeneratedCode.composite {\n * +makeDataClass(name)\n * .withInterface(MyInterface::class)\n *\n * +makeConstant(\"$nameConstant\", value)\n * }\n * }\n * }\n * ```\n *\n * [GeneratedCode] can contain any of the following:\n * - Data classes. See [makeDataClass].\n * - Objects. See [makeObject].\n * - Constants. See [makeConstant].\n * - Packages. See [makePackage].\n * - Extension functions. See [makeExtensionMethod].\n * - etc.\n */\ninterface GeneratedCode {\n /**\n * Generate the code that corresponds to your type\n *\n * @return An instance of [GeneratedCode].\n * Important: don't return an infinite recursion in the form of return `this`\n */\n fun body(): GeneratedCode\n\n /**\n * A builder for a composite of multiple types\n */\n class Builder internal constructor() {\n private val mutableCodeList = mutableListOf()\n\n /**\n * Include code in your compound\n */\n operator fun GeneratedCode.unaryPlus() {\n if (this is CompoundGeneratedCode) {\n mutableCodeList.addAll(codeList)\n } else {\n mutableCodeList.add(this)\n }\n }\n\n operator fun String.unaryPlus() {\n mutableCodeList.add(inlineCode(this))\n }\n\n internal fun build(): GeneratedCode {\n return composite(mutableCodeList)\n }\n }\n\n object Empty : GeneratedCode by EmptyCode()\n\n companion object {\n /**\n * Create a composition of multiple instances of Generated Code\n */\n fun composite(init: Builder.() -> Unit) = Builder().apply(init).build()\n\n /**\n * Create a composition of multiple instances of Generated Code\n */\n fun composite(iterable: Collection): GeneratedCode = if (iterable.count() == 1)\n iterable.first()\n else\n CompoundGeneratedCode(iterable.toList())\n\n /**\n * Create a composition of multiple instances of Generated Code\n */\n fun composite(vararg code: GeneratedCode) = composite(code.toList())\n }\n}\n\nprivate data class CompoundGeneratedCode(val codeList: List) : InternalGeneratedCode() {\n override fun StringBuilder.write(indent: Int) {\n writeJoined(codeList, \"\\n\\n\", indent)\n }\n\n override fun GeneratedCodeVisitor.visit() {\n for (code in codeList) {\n visit(code)\n }\n }\n}\n\n/**\n * References a member that can be used as a type\n */\ninterface IdentifiableMember {\n val name: String\n fun imports(): Set = emptySet()\n\n companion object {\n private data class KClassIdentifiable(val kClass: KClass<*>) : IdentifiableMember {\n override val name: String = kClass.simpleName ?: kClass.qualifiedName!!\n override fun imports() = setOfNotNull(kClass.qualifiedName)\n }\n\n private data class NamedIdentifiable(override val name: String) : IdentifiableMember\n\n operator fun invoke(kClass: KClass<*>): IdentifiableMember = KClassIdentifiable(kClass)\n operator fun invoke(name: String): IdentifiableMember = NamedIdentifiable(name)\n }\n}\n\n/**\n * References an interface that can be implemented\n */\ninterface GeneratedInterface : IdentifiableMember\n\nprivate data class CastedInterfaceMember(private val member: IdentifiableMember) : GeneratedInterface, IdentifiableMember by member\n\nfun IdentifiableMember.asInterface(): GeneratedInterface = CastedInterfaceMember(this)\n\n/**\n * Generated code that is a type that can be used as an Argument\n */\ninterface IdentifiableCode : GeneratedCode, IdentifiableMember\n\n/**\n * Builder for a provided type whose JVM Name can be set\n */\ninterface JVMNameableBuilder {\n fun jvmName(name: String)\n}\n\n/**\n * Builder for a provided type. It can implement an interface and have some code inside.\n */\ninterface GeneratedTypeBuilder : IdentifiableMember {\n /**\n * Implement a generated interface\n */\n fun implement(generated: GeneratedInterface)\n\n /**\n * Add code to the namespace of the Type\n */\n fun inner(code: GeneratedCode)\n}\n\n/**\n * Builder for a provided data class. It can implement interfaces, have code inside and have a number of properties\n */\ninterface DataClassBuilder : JVMNameableBuilder, GeneratedTypeBuilder {\n fun property(name: String, type: IdentifiableMember, default: GeneratedCode? = null)\n}\n\n/**\n * Builder for a provided object. It can implement interfaces, have code inside and have a number of properties.\n */\ninterface ObjectBuilder : JVMNameableBuilder, GeneratedTypeBuilder\n\n/**\n * Add multiple instances of generated code inside of the type\n */\nfun GeneratedTypeBuilder.inner(init: GeneratedCode.Builder.() -> Unit) {\n inner(GeneratedCode.composite(init))\n}\n\n/**\n * Implement the interface.\n */\nfun GeneratedTypeBuilder.implement(kClass: KClass<*>) {\n require(kClass.java.isInterface) { \"Provided type is not an interface\" }\n implement(IdentifiableMember(kClass).asInterface())\n}\n\n/**\n * Add a companion object to the generated type\n */\nfun GeneratedTypeBuilder.companionObject(name: String? = null, init: ObjectBuilder.() -> Unit) {", - "repo_full_name": "JetBrains/kotlin", - "discussion_comments": [ - { - "comment_id": "451430040", - "repo_full_name": "JetBrains/kotlin", - "pr_number": 3516, - "pr_file": "libraries/scripting/type-providers/src/kotlin/script/experimental/typeProviders/generatedCode/GeneratedCode.kt", - "discussion_id": "451430040", - "commented_code": "@@ -0,0 +1,302 @@\n+/*\n+ * Copyright 2010-2020 JetBrains s.r.o. and Kotlin Programming Language contributors.\n+ * Use of this source code is governed by the Apache 2.0 license that can be found in the license/LICENSE.txt file.\n+ */\n+\n+package kotlin.script.experimental.typeProviders.generatedCode\n+\n+import kotlin.reflect.KClass\n+import kotlin.script.experimental.typeProviders.generatedCode.impl.*\n+import kotlin.script.experimental.typeProviders.generatedCode.impl.Object\n+import kotlin.script.experimental.typeProviders.generatedCode.impl.writeJoined\n+import kotlin.script.experimental.typeProviders.generatedCode.internal.Generated\n+import kotlin.script.experimental.typeProviders.generatedCode.internal.visitor.GeneratedCodeVisitor\n+import kotlin.script.experimental.typeProviders.generatedCode.internal.InternalGeneratedCode\n+import kotlin.script.experimental.typeProviders.generatedCode.internal.visit\n+import kotlin.script.experimental.typeProviders.generatedCode.internal.visitor.captureImports\n+import kotlin.script.experimental.typeProviders.generatedCode.internal.write\n+import kotlin.text.StringBuilder\n+\n+/**\n+ * Code returned by a Type Provider.\n+ * [GeneratedCode] is a recursive construction that allows you to build generated code as a combination of multiple types already provided\n+ *\n+ * Example:\n+ *\n+ * ```kotlin\n+ * data class MyObject(val name: String, val value: Int) : GeneratedCode {\n+ * fun body(): GeneratedCode {\n+ * return GeneratedCode.composite {\n+ * +makeDataClass(name)\n+ * .withInterface(MyInterface::class)\n+ *\n+ * +makeConstant(\"$nameConstant\", value)\n+ * }\n+ * }\n+ * }\n+ * ```\n+ *\n+ * [GeneratedCode] can contain any of the following:\n+ * - Data classes. See [makeDataClass].\n+ * - Objects. See [makeObject].\n+ * - Constants. See [makeConstant].\n+ * - Packages. See [makePackage].\n+ * - Extension functions. See [makeExtensionMethod].\n+ * - etc.\n+ */\n+interface GeneratedCode {\n+ /**\n+ * Generate the code that corresponds to your type\n+ *\n+ * @return An instance of [GeneratedCode].\n+ * Important: don't return an infinite recursion in the form of return `this`\n+ */\n+ fun body(): GeneratedCode\n+\n+ /**\n+ * A builder for a composite of multiple types\n+ */\n+ class Builder internal constructor() {\n+ private val mutableCodeList = mutableListOf()\n+\n+ /**\n+ * Include code in your compound\n+ */\n+ operator fun GeneratedCode.unaryPlus() {\n+ if (this is CompoundGeneratedCode) {\n+ mutableCodeList.addAll(codeList)\n+ } else {\n+ mutableCodeList.add(this)\n+ }\n+ }\n+\n+ operator fun String.unaryPlus() {\n+ mutableCodeList.add(inlineCode(this))\n+ }\n+\n+ internal fun build(): GeneratedCode {\n+ return composite(mutableCodeList)\n+ }\n+ }\n+\n+ object Empty : GeneratedCode by EmptyCode()\n+\n+ companion object {\n+ /**\n+ * Create a composition of multiple instances of Generated Code\n+ */\n+ fun composite(init: Builder.() -> Unit) = Builder().apply(init).build()\n+\n+ /**\n+ * Create a composition of multiple instances of Generated Code\n+ */\n+ fun composite(iterable: Collection): GeneratedCode = if (iterable.count() == 1)\n+ iterable.first()\n+ else\n+ CompoundGeneratedCode(iterable.toList())\n+\n+ /**\n+ * Create a composition of multiple instances of Generated Code\n+ */\n+ fun composite(vararg code: GeneratedCode) = composite(code.toList())\n+ }\n+}\n+\n+private data class CompoundGeneratedCode(val codeList: List) : InternalGeneratedCode() {\n+ override fun StringBuilder.write(indent: Int) {\n+ writeJoined(codeList, \"\\n\\n\", indent)\n+ }\n+\n+ override fun GeneratedCodeVisitor.visit() {\n+ for (code in codeList) {\n+ visit(code)\n+ }\n+ }\n+}\n+\n+/**\n+ * References a member that can be used as a type\n+ */\n+interface IdentifiableMember {\n+ val name: String\n+ fun imports(): Set = emptySet()\n+\n+ companion object {\n+ private data class KClassIdentifiable(val kClass: KClass<*>) : IdentifiableMember {\n+ override val name: String = kClass.simpleName ?: kClass.qualifiedName!!\n+ override fun imports() = setOfNotNull(kClass.qualifiedName)\n+ }\n+\n+ private data class NamedIdentifiable(override val name: String) : IdentifiableMember\n+\n+ operator fun invoke(kClass: KClass<*>): IdentifiableMember = KClassIdentifiable(kClass)\n+ operator fun invoke(name: String): IdentifiableMember = NamedIdentifiable(name)\n+ }\n+}\n+\n+/**\n+ * References an interface that can be implemented\n+ */\n+interface GeneratedInterface : IdentifiableMember\n+\n+private data class CastedInterfaceMember(private val member: IdentifiableMember) : GeneratedInterface, IdentifiableMember by member\n+\n+fun IdentifiableMember.asInterface(): GeneratedInterface = CastedInterfaceMember(this)\n+\n+/**\n+ * Generated code that is a type that can be used as an Argument\n+ */\n+interface IdentifiableCode : GeneratedCode, IdentifiableMember\n+\n+/**\n+ * Builder for a provided type whose JVM Name can be set\n+ */\n+interface JVMNameableBuilder {\n+ fun jvmName(name: String)\n+}\n+\n+/**\n+ * Builder for a provided type. It can implement an interface and have some code inside.\n+ */\n+interface GeneratedTypeBuilder : IdentifiableMember {\n+ /**\n+ * Implement a generated interface\n+ */\n+ fun implement(generated: GeneratedInterface)\n+\n+ /**\n+ * Add code to the namespace of the Type\n+ */\n+ fun inner(code: GeneratedCode)\n+}\n+\n+/**\n+ * Builder for a provided data class. It can implement interfaces, have code inside and have a number of properties\n+ */\n+interface DataClassBuilder : JVMNameableBuilder, GeneratedTypeBuilder {\n+ fun property(name: String, type: IdentifiableMember, default: GeneratedCode? = null)\n+}\n+\n+/**\n+ * Builder for a provided object. It can implement interfaces, have code inside and have a number of properties.\n+ */\n+interface ObjectBuilder : JVMNameableBuilder, GeneratedTypeBuilder\n+\n+/**\n+ * Add multiple instances of generated code inside of the type\n+ */\n+fun GeneratedTypeBuilder.inner(init: GeneratedCode.Builder.() -> Unit) {\n+ inner(GeneratedCode.composite(init))\n+}\n+\n+/**\n+ * Implement the interface.\n+ */\n+fun GeneratedTypeBuilder.implement(kClass: KClass<*>) {\n+ require(kClass.java.isInterface) { \"Provided type is not an interface\" }\n+ implement(IdentifiableMember(kClass).asInterface())\n+}\n+\n+/**\n+ * Add a companion object to the generated type\n+ */\n+fun GeneratedTypeBuilder.companionObject(name: String? = null, init: ObjectBuilder.() -> Unit) {", - "comment_created_at": "2020-07-08T10:05:21+00:00", - "comment_author": "ligee", - "comment_body": "It would be nice to have tests that use all functions from the DSL, otherwise, they may rot over time and we may discover it too late.", - "pr_file_module": null - }, - { - "comment_id": "480104173", - "repo_full_name": "JetBrains/kotlin", - "pr_number": 3516, - "pr_file": "libraries/scripting/type-providers/src/kotlin/script/experimental/typeProviders/generatedCode/GeneratedCode.kt", - "discussion_id": "451430040", - "commented_code": "@@ -0,0 +1,302 @@\n+/*\n+ * Copyright 2010-2020 JetBrains s.r.o. and Kotlin Programming Language contributors.\n+ * Use of this source code is governed by the Apache 2.0 license that can be found in the license/LICENSE.txt file.\n+ */\n+\n+package kotlin.script.experimental.typeProviders.generatedCode\n+\n+import kotlin.reflect.KClass\n+import kotlin.script.experimental.typeProviders.generatedCode.impl.*\n+import kotlin.script.experimental.typeProviders.generatedCode.impl.Object\n+import kotlin.script.experimental.typeProviders.generatedCode.impl.writeJoined\n+import kotlin.script.experimental.typeProviders.generatedCode.internal.Generated\n+import kotlin.script.experimental.typeProviders.generatedCode.internal.visitor.GeneratedCodeVisitor\n+import kotlin.script.experimental.typeProviders.generatedCode.internal.InternalGeneratedCode\n+import kotlin.script.experimental.typeProviders.generatedCode.internal.visit\n+import kotlin.script.experimental.typeProviders.generatedCode.internal.visitor.captureImports\n+import kotlin.script.experimental.typeProviders.generatedCode.internal.write\n+import kotlin.text.StringBuilder\n+\n+/**\n+ * Code returned by a Type Provider.\n+ * [GeneratedCode] is a recursive construction that allows you to build generated code as a combination of multiple types already provided\n+ *\n+ * Example:\n+ *\n+ * ```kotlin\n+ * data class MyObject(val name: String, val value: Int) : GeneratedCode {\n+ * fun body(): GeneratedCode {\n+ * return GeneratedCode.composite {\n+ * +makeDataClass(name)\n+ * .withInterface(MyInterface::class)\n+ *\n+ * +makeConstant(\"$nameConstant\", value)\n+ * }\n+ * }\n+ * }\n+ * ```\n+ *\n+ * [GeneratedCode] can contain any of the following:\n+ * - Data classes. See [makeDataClass].\n+ * - Objects. See [makeObject].\n+ * - Constants. See [makeConstant].\n+ * - Packages. See [makePackage].\n+ * - Extension functions. See [makeExtensionMethod].\n+ * - etc.\n+ */\n+interface GeneratedCode {\n+ /**\n+ * Generate the code that corresponds to your type\n+ *\n+ * @return An instance of [GeneratedCode].\n+ * Important: don't return an infinite recursion in the form of return `this`\n+ */\n+ fun body(): GeneratedCode\n+\n+ /**\n+ * A builder for a composite of multiple types\n+ */\n+ class Builder internal constructor() {\n+ private val mutableCodeList = mutableListOf()\n+\n+ /**\n+ * Include code in your compound\n+ */\n+ operator fun GeneratedCode.unaryPlus() {\n+ if (this is CompoundGeneratedCode) {\n+ mutableCodeList.addAll(codeList)\n+ } else {\n+ mutableCodeList.add(this)\n+ }\n+ }\n+\n+ operator fun String.unaryPlus() {\n+ mutableCodeList.add(inlineCode(this))\n+ }\n+\n+ internal fun build(): GeneratedCode {\n+ return composite(mutableCodeList)\n+ }\n+ }\n+\n+ object Empty : GeneratedCode by EmptyCode()\n+\n+ companion object {\n+ /**\n+ * Create a composition of multiple instances of Generated Code\n+ */\n+ fun composite(init: Builder.() -> Unit) = Builder().apply(init).build()\n+\n+ /**\n+ * Create a composition of multiple instances of Generated Code\n+ */\n+ fun composite(iterable: Collection): GeneratedCode = if (iterable.count() == 1)\n+ iterable.first()\n+ else\n+ CompoundGeneratedCode(iterable.toList())\n+\n+ /**\n+ * Create a composition of multiple instances of Generated Code\n+ */\n+ fun composite(vararg code: GeneratedCode) = composite(code.toList())\n+ }\n+}\n+\n+private data class CompoundGeneratedCode(val codeList: List) : InternalGeneratedCode() {\n+ override fun StringBuilder.write(indent: Int) {\n+ writeJoined(codeList, \"\\n\\n\", indent)\n+ }\n+\n+ override fun GeneratedCodeVisitor.visit() {\n+ for (code in codeList) {\n+ visit(code)\n+ }\n+ }\n+}\n+\n+/**\n+ * References a member that can be used as a type\n+ */\n+interface IdentifiableMember {\n+ val name: String\n+ fun imports(): Set = emptySet()\n+\n+ companion object {\n+ private data class KClassIdentifiable(val kClass: KClass<*>) : IdentifiableMember {\n+ override val name: String = kClass.simpleName ?: kClass.qualifiedName!!\n+ override fun imports() = setOfNotNull(kClass.qualifiedName)\n+ }\n+\n+ private data class NamedIdentifiable(override val name: String) : IdentifiableMember\n+\n+ operator fun invoke(kClass: KClass<*>): IdentifiableMember = KClassIdentifiable(kClass)\n+ operator fun invoke(name: String): IdentifiableMember = NamedIdentifiable(name)\n+ }\n+}\n+\n+/**\n+ * References an interface that can be implemented\n+ */\n+interface GeneratedInterface : IdentifiableMember\n+\n+private data class CastedInterfaceMember(private val member: IdentifiableMember) : GeneratedInterface, IdentifiableMember by member\n+\n+fun IdentifiableMember.asInterface(): GeneratedInterface = CastedInterfaceMember(this)\n+\n+/**\n+ * Generated code that is a type that can be used as an Argument\n+ */\n+interface IdentifiableCode : GeneratedCode, IdentifiableMember\n+\n+/**\n+ * Builder for a provided type whose JVM Name can be set\n+ */\n+interface JVMNameableBuilder {\n+ fun jvmName(name: String)\n+}\n+\n+/**\n+ * Builder for a provided type. It can implement an interface and have some code inside.\n+ */\n+interface GeneratedTypeBuilder : IdentifiableMember {\n+ /**\n+ * Implement a generated interface\n+ */\n+ fun implement(generated: GeneratedInterface)\n+\n+ /**\n+ * Add code to the namespace of the Type\n+ */\n+ fun inner(code: GeneratedCode)\n+}\n+\n+/**\n+ * Builder for a provided data class. It can implement interfaces, have code inside and have a number of properties\n+ */\n+interface DataClassBuilder : JVMNameableBuilder, GeneratedTypeBuilder {\n+ fun property(name: String, type: IdentifiableMember, default: GeneratedCode? = null)\n+}\n+\n+/**\n+ * Builder for a provided object. It can implement interfaces, have code inside and have a number of properties.\n+ */\n+interface ObjectBuilder : JVMNameableBuilder, GeneratedTypeBuilder\n+\n+/**\n+ * Add multiple instances of generated code inside of the type\n+ */\n+fun GeneratedTypeBuilder.inner(init: GeneratedCode.Builder.() -> Unit) {\n+ inner(GeneratedCode.composite(init))\n+}\n+\n+/**\n+ * Implement the interface.\n+ */\n+fun GeneratedTypeBuilder.implement(kClass: KClass<*>) {\n+ require(kClass.java.isInterface) { \"Provided type is not an interface\" }\n+ implement(IdentifiableMember(kClass).asInterface())\n+}\n+\n+/**\n+ * Add a companion object to the generated type\n+ */\n+fun GeneratedTypeBuilder.companionObject(name: String? = null, init: ObjectBuilder.() -> Unit) {", - "comment_created_at": "2020-08-31T12:43:27+00:00", - "comment_author": "nerdsupremacist", - "comment_body": "Added a bunch of tests. It should be just shy of 70% coverage now", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1881708047", - "pr_number": 5385, - "pr_file": "native/native.tests/testData/codegen/cinterop/kt73565.kt", - "created_at": "2024-12-12T09:32:06+00:00", - "commented_code": "// TARGET_BACKEND: NATIVE\n@file:OptIn(kotlinx.cinterop.ExperimentalForeignApi::class)\n\nimport kotlin.test.*\nimport kotlinx.cinterop.*\n\nfun box(): String {", - "repo_full_name": "JetBrains/kotlin", - "discussion_comments": [ - { - "comment_id": "1881708047", - "repo_full_name": "JetBrains/kotlin", - "pr_number": 5385, - "pr_file": "native/native.tests/testData/codegen/cinterop/kt73565.kt", - "discussion_id": "1881708047", - "commented_code": "@@ -0,0 +1,53 @@\n+// TARGET_BACKEND: NATIVE\n+@file:OptIn(kotlinx.cinterop.ExperimentalForeignApi::class)\n+\n+import kotlin.test.*\n+import kotlinx.cinterop.*\n+\n+fun box(): String {", - "comment_created_at": "2024-12-12T09:32:06+00:00", - "comment_author": "sbogolepov", - "comment_body": "Please add tests for top-level functions as well.", - "pr_file_module": null - }, - { - "comment_id": "1881822457", - "repo_full_name": "JetBrains/kotlin", - "pr_number": 5385, - "pr_file": "native/native.tests/testData/codegen/cinterop/kt73565.kt", - "discussion_id": "1881708047", - "commented_code": "@@ -0,0 +1,53 @@\n+// TARGET_BACKEND: NATIVE\n+@file:OptIn(kotlinx.cinterop.ExperimentalForeignApi::class)\n+\n+import kotlin.test.*\n+import kotlinx.cinterop.*\n+\n+fun box(): String {", - "comment_created_at": "2024-12-12T10:41:58+00:00", - "comment_author": "KitsuneAlex", - "comment_body": "Good point, don't know why i didn't think of that, will add.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1870271493", - "pr_number": 4871, - "pr_file": "libraries/stdlib/samples/test/samples/text/strings.kt", - "created_at": "2024-12-04T20:59:23+00:00", - "commented_code": "assertPrints(textWithoutDigits, \"abcde\")\n }\n\n @Sample\n fun findLast() {\n val text = \"a1b2c3d4e5\"\n\n val lastNumberInText = text.findLast { it.isDigit() }\n val lastUpperCaseInText = text.findLast { it.isUpperCase() }\n\n assertPrints(lastNumberInText, \"5\")\n assertPrints(lastUpperCaseInText, \"null\")\n }", - "repo_full_name": "JetBrains/kotlin", - "discussion_comments": [ - { - "comment_id": "1870271493", - "repo_full_name": "JetBrains/kotlin", - "pr_number": 4871, - "pr_file": "libraries/stdlib/samples/test/samples/text/strings.kt", - "discussion_id": "1870271493", - "commented_code": "@@ -109,6 +109,17 @@ class Strings {\n assertPrints(textWithoutDigits, \"abcde\")\n }\n \n+ @Sample\n+ fun findLast() {\n+ val text = \"a1b2c3d4e5\"\n+\n+ val lastNumberInText = text.findLast { it.isDigit() }\n+ val lastUpperCaseInText = text.findLast { it.isUpperCase() }\n+\n+ assertPrints(lastNumberInText, \"5\")\n+ assertPrints(lastUpperCaseInText, \"null\")\n+ }", - "comment_created_at": "2024-12-04T20:59:23+00:00", - "comment_author": "fzhinkin", - "comment_body": "It would be nice to also showcase how the function works when invoked on an empty string: `assertPrints(\"\".findLast { true }, \"null\")`", - "pr_file_module": null - }, - { - "comment_id": "1874614782", - "repo_full_name": "JetBrains/kotlin", - "pr_number": 4871, - "pr_file": "libraries/stdlib/samples/test/samples/text/strings.kt", - "discussion_id": "1870271493", - "commented_code": "@@ -109,6 +109,17 @@ class Strings {\n assertPrints(textWithoutDigits, \"abcde\")\n }\n \n+ @Sample\n+ fun findLast() {\n+ val text = \"a1b2c3d4e5\"\n+\n+ val lastNumberInText = text.findLast { it.isDigit() }\n+ val lastUpperCaseInText = text.findLast { it.isUpperCase() }\n+\n+ assertPrints(lastNumberInText, \"5\")\n+ assertPrints(lastUpperCaseInText, \"null\")\n+ }", - "comment_created_at": "2024-12-08T05:00:24+00:00", - "comment_author": "simonNozaki", - "comment_body": "Added: https://github.com/JetBrains/kotlin/pull/4871/commits/bce231a8be4dc97e07424b3635b708a11cf25c85\r\n\r\nAnd, as you advised me, I updates all assertions to inline calling.", - "pr_file_module": null - }, - { - "comment_id": "1876523186", - "repo_full_name": "JetBrains/kotlin", - "pr_number": 4871, - "pr_file": "libraries/stdlib/samples/test/samples/text/strings.kt", - "discussion_id": "1870271493", - "commented_code": "@@ -109,6 +109,17 @@ class Strings {\n assertPrints(textWithoutDigits, \"abcde\")\n }\n \n+ @Sample\n+ fun findLast() {\n+ val text = \"a1b2c3d4e5\"\n+\n+ val lastNumberInText = text.findLast { it.isDigit() }\n+ val lastUpperCaseInText = text.findLast { it.isUpperCase() }\n+\n+ assertPrints(lastNumberInText, \"5\")\n+ assertPrints(lastUpperCaseInText, \"null\")\n+ }", - "comment_created_at": "2024-12-09T19:00:50+00:00", - "comment_author": "fzhinkin", - "comment_body": "Thank you!", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1581162687", - "pr_number": 5281, - "pr_file": "kotlin-native/backend.native/tests/runtime/program_name/runtime_program_name.kt", - "created_at": "2024-04-26T15:02:50+00:00", - "commented_code": "@file:OptIn(kotlin.experimental.ExperimentalNativeApi::class)\n\nimport kotlin.test.*\nimport kotlin.native.Platform\n\nfun main(args: Array) {\n // Remove path and extension (.kexe or .exe)\n val programFileName = Platform.programName.substringAfterLast(\"/\").substringBeforeLast(\".\")\n\n assertEquals(\"program_name\", programFileName)\n}", - "repo_full_name": "JetBrains/kotlin", - "discussion_comments": [ - { - "comment_id": "1581162687", - "repo_full_name": "JetBrains/kotlin", - "pr_number": 5281, - "pr_file": "kotlin-native/backend.native/tests/runtime/program_name/runtime_program_name.kt", - "discussion_id": "1581162687", - "commented_code": "@@ -0,0 +1,11 @@\n+@file:OptIn(kotlin.experimental.ExperimentalNativeApi::class)\n+\n+import kotlin.test.*\n+import kotlin.native.Platform\n+\n+fun main(args: Array) {\n+ // Remove path and extension (.kexe or .exe)\n+ val programFileName = Platform.programName.substringAfterLast(\"/\").substringBeforeLast(\".\")\n+\n+ assertEquals(\"program_name\", programFileName)\n+}", - "comment_created_at": "2024-04-26T15:02:50+00:00", - "comment_author": "SvyatoslavScherbina", - "comment_body": "More tests are necessary. For example, the original issue mentions a nice use case:\r\n> Build multi-call binaries. For example, busybox is a single binary that contains many tools, and decides which tool to run based on how the symbolic link is called: https://busybox.net/downloads/BusyBox.html#usage (that's how it can be so small)\r\n\r\nSo checking a case with symbolic link would also be useful.", - "pr_file_module": null - }, - { - "comment_id": "1584883309", - "repo_full_name": "JetBrains/kotlin", - "pr_number": 5281, - "pr_file": "kotlin-native/backend.native/tests/runtime/program_name/runtime_program_name.kt", - "discussion_id": "1581162687", - "commented_code": "@@ -0,0 +1,11 @@\n+@file:OptIn(kotlin.experimental.ExperimentalNativeApi::class)\n+\n+import kotlin.test.*\n+import kotlin.native.Platform\n+\n+fun main(args: Array) {\n+ // Remove path and extension (.kexe or .exe)\n+ val programFileName = Platform.programName.substringAfterLast(\"/\").substringBeforeLast(\".\")\n+\n+ assertEquals(\"program_name\", programFileName)\n+}", - "comment_created_at": "2024-04-30T13:59:47+00:00", - "comment_author": "vonox7", - "comment_body": "I added test for that by calling `execv()` in the C code. This allowed to test even more use-cases (no programName at all), while still covering the use-case of renaming/linking a binary.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1560651387", - "pr_number": 5270, - "pr_file": "native/native.tests/testData/codegen/cinterop/objc/kt65260.kt", - "created_at": "2024-04-11T08:39:21+00:00", - "commented_code": "/*\n * Copyright 2010-2024 JetBrains s.r.o. and Kotlin Programming Language contributors.\n * Use of this source code is governed by the Apache 2.0 license that can be found in the license/LICENSE.txt file.\n */\n// TARGET_BACKEND: NATIVE\n// DISABLE_NATIVE: isAppleTarget=false\n\n// MODULE: cinterop\n// FILE: conversion.def\nlanguage = Objective-C\nheaders = conversion.h\nheaderFilter = conversion.h\n\n// FILE: conversion.h\n#import \n\nNS_ASSUME_NONNULL_BEGIN\n\n__attribute__((objc_runtime_name(\"Foo\")))\n@interface A : NSObject\n@end\n\n__attribute__((objc_runtime_name(\"Bar\")))\n@interface B : A\n@end\n\nNS_ASSUME_NONNULL_END\n\n// FILE: conversion.m\n#import \"conversion.h\"\n\n@implementation A\n@end\n\n@implementation B\n@end\n\n// MODULE: lib(cinterop)\n// FILE: lib.kt\n@file:OptIn(kotlinx.cinterop.ExperimentalForeignApi::class)\nimport conversion.*\nimport platform.darwin.*\nimport kotlinx.cinterop.*\nimport kotlin.test.*\n\nclass ANativeHeir : A() {\n companion object\n}\n\nclass BNativeHeir : B() {\n companion object\n}\n\nfun testExternalObjCMetaClassCast() {\n try {\n BNativeHeir as A.Companion\n } catch (e: Exception) {\n assertTrue(e is ClassCastException)\n }", - "repo_full_name": "JetBrains/kotlin", - "discussion_comments": [ - { - "comment_id": "1560651387", - "repo_full_name": "JetBrains/kotlin", - "pr_number": 5270, - "pr_file": "native/native.tests/testData/codegen/cinterop/objc/kt65260.kt", - "discussion_id": "1560651387", - "commented_code": "@@ -0,0 +1,138 @@\n+/*\n+ * Copyright 2010-2024 JetBrains s.r.o. and Kotlin Programming Language contributors.\n+ * Use of this source code is governed by the Apache 2.0 license that can be found in the license/LICENSE.txt file.\n+ */\n+// TARGET_BACKEND: NATIVE\n+// DISABLE_NATIVE: isAppleTarget=false\n+\n+// MODULE: cinterop\n+// FILE: conversion.def\n+language = Objective-C\n+headers = conversion.h\n+headerFilter = conversion.h\n+\n+// FILE: conversion.h\n+#import \n+\n+NS_ASSUME_NONNULL_BEGIN\n+\n+__attribute__((objc_runtime_name(\"Foo\")))\n+@interface A : NSObject\n+@end\n+\n+__attribute__((objc_runtime_name(\"Bar\")))\n+@interface B : A\n+@end\n+\n+NS_ASSUME_NONNULL_END\n+\n+// FILE: conversion.m\n+#import \"conversion.h\"\n+\n+@implementation A\n+@end\n+\n+@implementation B\n+@end\n+\n+// MODULE: lib(cinterop)\n+// FILE: lib.kt\n+@file:OptIn(kotlinx.cinterop.ExperimentalForeignApi::class)\n+import conversion.*\n+import platform.darwin.*\n+import kotlinx.cinterop.*\n+import kotlin.test.*\n+\n+class ANativeHeir : A() {\n+ companion object\n+}\n+\n+class BNativeHeir : B() {\n+ companion object\n+}\n+\n+fun testExternalObjCMetaClassCast() {\n+ try {\n+ BNativeHeir as A.Companion\n+ } catch (e: Exception) {\n+ assertTrue(e is ClassCastException)\n+ }", - "comment_created_at": "2024-04-11T08:39:21+00:00", - "comment_author": "SvyatoslavScherbina", - "comment_body": "This test doesn't actually check that the cast fails. It checks that *if it fails, then it fails with ClassCastException*. In other words, if the cast doesn't fail, the test will still pass.", - "pr_file_module": null - }, - { - "comment_id": "1561088794", - "repo_full_name": "JetBrains/kotlin", - "pr_number": 5270, - "pr_file": "native/native.tests/testData/codegen/cinterop/objc/kt65260.kt", - "discussion_id": "1560651387", - "commented_code": "@@ -0,0 +1,138 @@\n+/*\n+ * Copyright 2010-2024 JetBrains s.r.o. and Kotlin Programming Language contributors.\n+ * Use of this source code is governed by the Apache 2.0 license that can be found in the license/LICENSE.txt file.\n+ */\n+// TARGET_BACKEND: NATIVE\n+// DISABLE_NATIVE: isAppleTarget=false\n+\n+// MODULE: cinterop\n+// FILE: conversion.def\n+language = Objective-C\n+headers = conversion.h\n+headerFilter = conversion.h\n+\n+// FILE: conversion.h\n+#import \n+\n+NS_ASSUME_NONNULL_BEGIN\n+\n+__attribute__((objc_runtime_name(\"Foo\")))\n+@interface A : NSObject\n+@end\n+\n+__attribute__((objc_runtime_name(\"Bar\")))\n+@interface B : A\n+@end\n+\n+NS_ASSUME_NONNULL_END\n+\n+// FILE: conversion.m\n+#import \"conversion.h\"\n+\n+@implementation A\n+@end\n+\n+@implementation B\n+@end\n+\n+// MODULE: lib(cinterop)\n+// FILE: lib.kt\n+@file:OptIn(kotlinx.cinterop.ExperimentalForeignApi::class)\n+import conversion.*\n+import platform.darwin.*\n+import kotlinx.cinterop.*\n+import kotlin.test.*\n+\n+class ANativeHeir : A() {\n+ companion object\n+}\n+\n+class BNativeHeir : B() {\n+ companion object\n+}\n+\n+fun testExternalObjCMetaClassCast() {\n+ try {\n+ BNativeHeir as A.Companion\n+ } catch (e: Exception) {\n+ assertTrue(e is ClassCastException)\n+ }", - "comment_created_at": "2024-04-11T14:09:39+00:00", - "comment_author": "edisongz", - "comment_body": "Actually the cast will always fail, because `BNativeHeir` isn't equal to `A`,\r\n`try-catch` is for test infra runner execute normally.\r\n", - "pr_file_module": null - }, - { - "comment_id": "1608507573", - "repo_full_name": "JetBrains/kotlin", - "pr_number": 5270, - "pr_file": "native/native.tests/testData/codegen/cinterop/objc/kt65260.kt", - "discussion_id": "1560651387", - "commented_code": "@@ -0,0 +1,138 @@\n+/*\n+ * Copyright 2010-2024 JetBrains s.r.o. and Kotlin Programming Language contributors.\n+ * Use of this source code is governed by the Apache 2.0 license that can be found in the license/LICENSE.txt file.\n+ */\n+// TARGET_BACKEND: NATIVE\n+// DISABLE_NATIVE: isAppleTarget=false\n+\n+// MODULE: cinterop\n+// FILE: conversion.def\n+language = Objective-C\n+headers = conversion.h\n+headerFilter = conversion.h\n+\n+// FILE: conversion.h\n+#import \n+\n+NS_ASSUME_NONNULL_BEGIN\n+\n+__attribute__((objc_runtime_name(\"Foo\")))\n+@interface A : NSObject\n+@end\n+\n+__attribute__((objc_runtime_name(\"Bar\")))\n+@interface B : A\n+@end\n+\n+NS_ASSUME_NONNULL_END\n+\n+// FILE: conversion.m\n+#import \"conversion.h\"\n+\n+@implementation A\n+@end\n+\n+@implementation B\n+@end\n+\n+// MODULE: lib(cinterop)\n+// FILE: lib.kt\n+@file:OptIn(kotlinx.cinterop.ExperimentalForeignApi::class)\n+import conversion.*\n+import platform.darwin.*\n+import kotlinx.cinterop.*\n+import kotlin.test.*\n+\n+class ANativeHeir : A() {\n+ companion object\n+}\n+\n+class BNativeHeir : B() {\n+ companion object\n+}\n+\n+fun testExternalObjCMetaClassCast() {\n+ try {\n+ BNativeHeir as A.Companion\n+ } catch (e: Exception) {\n+ assertTrue(e is ClassCastException)\n+ }", - "comment_created_at": "2024-05-21T15:12:30+00:00", - "comment_author": "SvyatoslavScherbina", - "comment_body": "The purpose of a test is to anticipate and prevent bugs. If the test is inteded to check that the cast fails, then it does that wrong, see my first comment.\r\n\r\nIn other words, consider there is a bug (e.g. a regression) in the compiler, and the cast no longer fails. Well, the test won't report this then.\r\n\r\nPlease either use `assertFailsWith`, or check `BNativeHeir is A.Companion` instead. \r\n\r\nSame in many places below.", - "pr_file_module": null - }, - { - "comment_id": "1608735483", - "repo_full_name": "JetBrains/kotlin", - "pr_number": 5270, - "pr_file": "native/native.tests/testData/codegen/cinterop/objc/kt65260.kt", - "discussion_id": "1560651387", - "commented_code": "@@ -0,0 +1,138 @@\n+/*\n+ * Copyright 2010-2024 JetBrains s.r.o. and Kotlin Programming Language contributors.\n+ * Use of this source code is governed by the Apache 2.0 license that can be found in the license/LICENSE.txt file.\n+ */\n+// TARGET_BACKEND: NATIVE\n+// DISABLE_NATIVE: isAppleTarget=false\n+\n+// MODULE: cinterop\n+// FILE: conversion.def\n+language = Objective-C\n+headers = conversion.h\n+headerFilter = conversion.h\n+\n+// FILE: conversion.h\n+#import \n+\n+NS_ASSUME_NONNULL_BEGIN\n+\n+__attribute__((objc_runtime_name(\"Foo\")))\n+@interface A : NSObject\n+@end\n+\n+__attribute__((objc_runtime_name(\"Bar\")))\n+@interface B : A\n+@end\n+\n+NS_ASSUME_NONNULL_END\n+\n+// FILE: conversion.m\n+#import \"conversion.h\"\n+\n+@implementation A\n+@end\n+\n+@implementation B\n+@end\n+\n+// MODULE: lib(cinterop)\n+// FILE: lib.kt\n+@file:OptIn(kotlinx.cinterop.ExperimentalForeignApi::class)\n+import conversion.*\n+import platform.darwin.*\n+import kotlinx.cinterop.*\n+import kotlin.test.*\n+\n+class ANativeHeir : A() {\n+ companion object\n+}\n+\n+class BNativeHeir : B() {\n+ companion object\n+}\n+\n+fun testExternalObjCMetaClassCast() {\n+ try {\n+ BNativeHeir as A.Companion\n+ } catch (e: Exception) {\n+ assertTrue(e is ClassCastException)\n+ }", - "comment_created_at": "2024-05-21T18:06:36+00:00", - "comment_author": "edisongz", - "comment_body": "Use `assertFailsWith` instead.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "964724921", - "pr_number": 4933, - "pr_file": "plugins/value-container-assignment/testData/diagnostics/incorrectUsageTest.kt", - "created_at": "2022-09-07T11:25:20+00:00", - "commented_code": "// !DIAGNOSTICS: -UNUSED_PARAMETER,-UNUSED_VARIABLE, -ASSIGNED_BUT_NEVER_ACCESSED_VARIABLE\n// !RENDER_DIAGNOSTICS_FULL_TEXT\n\nannotation class ValueContainer\n\n@ValueContainer\ndata class StringProperty(var v: String) {\n fun assign(v: String) {\n this.v = v\n }\n fun assign(v: StringProperty) {\n this.v = v.get()\n }\n fun get(): String = v\n}\n\ndata class Task(val input: StringProperty)\n\nfun test() {\n run {\n // Should report error if type doesn't match\n val task = Task(StringProperty(\"Fail\"))\n task.input = 42\n }\n\n run {\n // Should report error if type doesn't match with apply\n val task = Task(StringProperty(\"Fail\"))\n task.apply {\n input = 42\n }\n }\n\n run {\n // obj[i] = v should not be translated to obj.get(i).assign(v)\n operator fun Task.get(i: Int) = this.input\n val task = Task(StringProperty(\"Fail\"))\n task[0] = StringProperty(\"Fail\")", - "repo_full_name": "JetBrains/kotlin", - "discussion_comments": [ - { - "comment_id": "964724921", - "repo_full_name": "JetBrains/kotlin", - "pr_number": 4933, - "pr_file": "plugins/value-container-assignment/testData/diagnostics/incorrectUsageTest.kt", - "discussion_id": "964724921", - "commented_code": "@@ -0,0 +1,41 @@\n+// !DIAGNOSTICS: -UNUSED_PARAMETER,-UNUSED_VARIABLE, -ASSIGNED_BUT_NEVER_ACCESSED_VARIABLE\n+// !RENDER_DIAGNOSTICS_FULL_TEXT\n+\n+annotation class ValueContainer\n+\n+@ValueContainer\n+data class StringProperty(var v: String) {\n+ fun assign(v: String) {\n+ this.v = v\n+ }\n+ fun assign(v: StringProperty) {\n+ this.v = v.get()\n+ }\n+ fun get(): String = v\n+}\n+\n+data class Task(val input: StringProperty)\n+\n+fun test() {\n+ run {\n+ // Should report error if type doesn't match\n+ val task = Task(StringProperty(\"Fail\"))\n+ task.input = 42\n+ }\n+\n+ run {\n+ // Should report error if type doesn't match with apply\n+ val task = Task(StringProperty(\"Fail\"))\n+ task.apply {\n+ input = 42\n+ }\n+ }\n+\n+ run {\n+ // obj[i] = v should not be translated to obj.get(i).assign(v)\n+ operator fun Task.get(i: Int) = this.input\n+ val task = Task(StringProperty(\"Fail\"))\n+ task[0] = StringProperty(\"Fail\")", - "comment_created_at": "2022-09-07T11:25:20+00:00", - "comment_author": "demiurg906", - "comment_body": "Please also add tests for other operators with assignment\n- `a.b += c`\n- `a[b] += c`\n- `a?.b[c] = d`\n- `a?.b[c] += d`\n- etc", - "pr_file_module": null - }, - { - "comment_id": "966271356", - "repo_full_name": "JetBrains/kotlin", - "pr_number": 4933, - "pr_file": "plugins/value-container-assignment/testData/diagnostics/incorrectUsageTest.kt", - "discussion_id": "964724921", - "commented_code": "@@ -0,0 +1,41 @@\n+// !DIAGNOSTICS: -UNUSED_PARAMETER,-UNUSED_VARIABLE, -ASSIGNED_BUT_NEVER_ACCESSED_VARIABLE\n+// !RENDER_DIAGNOSTICS_FULL_TEXT\n+\n+annotation class ValueContainer\n+\n+@ValueContainer\n+data class StringProperty(var v: String) {\n+ fun assign(v: String) {\n+ this.v = v\n+ }\n+ fun assign(v: StringProperty) {\n+ this.v = v.get()\n+ }\n+ fun get(): String = v\n+}\n+\n+data class Task(val input: StringProperty)\n+\n+fun test() {\n+ run {\n+ // Should report error if type doesn't match\n+ val task = Task(StringProperty(\"Fail\"))\n+ task.input = 42\n+ }\n+\n+ run {\n+ // Should report error if type doesn't match with apply\n+ val task = Task(StringProperty(\"Fail\"))\n+ task.apply {\n+ input = 42\n+ }\n+ }\n+\n+ run {\n+ // obj[i] = v should not be translated to obj.get(i).assign(v)\n+ operator fun Task.get(i: Int) = this.input\n+ val task = Task(StringProperty(\"Fail\"))\n+ task[0] = StringProperty(\"Fail\")", - "comment_created_at": "2022-09-08T18:06:24+00:00", - "comment_author": "asodja", - "comment_body": "Added `otherOperators.kt` tests for diagnostics and box tests. Let me know if I missed some cases.", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/kotlin-use-configuration-property-providers.json b/_reviewers/kotlin-use-configuration-property-providers.json new file mode 100644 index 0000000..c93f7c9 --- /dev/null +++ b/_reviewers/kotlin-use-configuration-property-providers.json @@ -0,0 +1,204 @@ +[ + { + "discussion_id": "449662023", + "pr_number": 3521, + "pr_file": "libraries/tools/kotlin-gradle-plugin/src/main/kotlin/org/jetbrains/kotlin/gradle/internal/kapt/KaptGenerateStubsTask.kt", + "created_at": "2020-07-03T17:06:24+00:00", + "commented_code": "error(\"KaptGenerateStubsTask.useModuleDetection setter should not be called!\")\n }\n\n @get:Input\n val verbose = (project.hasProperty(\"kapt.verbose\") && project.property(\"kapt.verbose\").toString().toBoolean() == true)", + "repo_full_name": "JetBrains/kotlin", + "discussion_comments": [ + { + "comment_id": "449662023", + "repo_full_name": "JetBrains/kotlin", + "pr_number": 3521, + "pr_file": "libraries/tools/kotlin-gradle-plugin/src/main/kotlin/org/jetbrains/kotlin/gradle/internal/kapt/KaptGenerateStubsTask.kt", + "discussion_id": "449662023", + "commented_code": "@@ -69,6 +69,9 @@ open class KaptGenerateStubsTask : KotlinCompile() {\n error(\"KaptGenerateStubsTask.useModuleDetection setter should not be called!\")\n }\n \n+ @get:Input\n+ val verbose = (project.hasProperty(\"kapt.verbose\") && project.property(\"kapt.verbose\").toString().toBoolean() == true)", + "comment_created_at": "2020-07-03T17:06:24+00:00", + "comment_author": "eskatos", + "comment_body": ":o: This could be modelled as a `Provider` to account for later changes to that property:\r\n\r\n```kotlin\r\nproject.providers.gradleProperty(\"kapt.verbose\").map { it.toBoolean() }.orElse(false)\r\n```", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "449666668", + "pr_number": 3521, + "pr_file": "libraries/tools/kotlin-gradle-plugin/src/main/kotlin/org/jetbrains/kotlin/gradle/plugin/BuildEventsListenerRegistryHolder.kt", + "created_at": "2020-07-03T17:29:12+00:00", + "commented_code": "/*\n * Copyright 2010-2020 JetBrains s.r.o. and Kotlin Programming Language contributors.\n * Use of this source code is governed by the Apache 2.0 license that can be found in the license/LICENSE.txt file.\n */\n\npackage org.jetbrains.kotlin.gradle.plugin\n\nimport org.gradle.api.Project\nimport org.gradle.build.event.BuildEventsListenerRegistry\nimport org.jetbrains.kotlin.gradle.utils.isConfigurationCacheAvailable\nimport javax.inject.Inject\n\nopen class BuildEventsListenerRegistryHolder @Inject constructor(val listenerRegistry: BuildEventsListenerRegistry?) {\n companion object {\n fun getInstance(project: Project) = run {\n if (isConfigurationCacheAvailable(project.gradle)) {", + "repo_full_name": "JetBrains/kotlin", + "discussion_comments": [ + { + "comment_id": "449666668", + "repo_full_name": "JetBrains/kotlin", + "pr_number": 3521, + "pr_file": "libraries/tools/kotlin-gradle-plugin/src/main/kotlin/org/jetbrains/kotlin/gradle/plugin/BuildEventsListenerRegistryHolder.kt", + "discussion_id": "449666668", + "commented_code": "@@ -0,0 +1,23 @@\n+/*\n+ * Copyright 2010-2020 JetBrains s.r.o. and Kotlin Programming Language contributors.\n+ * Use of this source code is governed by the Apache 2.0 license that can be found in the license/LICENSE.txt file.\n+ */\n+\n+package org.jetbrains.kotlin.gradle.plugin\n+\n+import org.gradle.api.Project\n+import org.gradle.build.event.BuildEventsListenerRegistry\n+import org.jetbrains.kotlin.gradle.utils.isConfigurationCacheAvailable\n+import javax.inject.Inject\n+\n+open class BuildEventsListenerRegistryHolder @Inject constructor(val listenerRegistry: BuildEventsListenerRegistry?) {\n+ companion object {\n+ fun getInstance(project: Project) = run {\n+ if (isConfigurationCacheAvailable(project.gradle)) {", + "comment_created_at": "2020-07-03T17:29:12+00:00", + "comment_author": "eskatos", + "comment_body": ":x: the check should be `gradleVersion >= 6.1` instead where `BuildEventsListenerRegistry` is available", + "pr_file_module": null + }, + { + "comment_id": "454990923", + "repo_full_name": "JetBrains/kotlin", + "pr_number": 3521, + "pr_file": "libraries/tools/kotlin-gradle-plugin/src/main/kotlin/org/jetbrains/kotlin/gradle/plugin/BuildEventsListenerRegistryHolder.kt", + "discussion_id": "449666668", + "commented_code": "@@ -0,0 +1,23 @@\n+/*\n+ * Copyright 2010-2020 JetBrains s.r.o. and Kotlin Programming Language contributors.\n+ * Use of this source code is governed by the Apache 2.0 license that can be found in the license/LICENSE.txt file.\n+ */\n+\n+package org.jetbrains.kotlin.gradle.plugin\n+\n+import org.gradle.api.Project\n+import org.gradle.build.event.BuildEventsListenerRegistry\n+import org.jetbrains.kotlin.gradle.utils.isConfigurationCacheAvailable\n+import javax.inject.Inject\n+\n+open class BuildEventsListenerRegistryHolder @Inject constructor(val listenerRegistry: BuildEventsListenerRegistry?) {\n+ companion object {\n+ fun getInstance(project: Project) = run {\n+ if (isConfigurationCacheAvailable(project.gradle)) {", + "comment_created_at": "2020-07-15T11:47:18+00:00", + "comment_author": "nav-nav", + "comment_body": "New listener is not ready for configuration cache", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "449666709", + "pr_number": 3521, + "pr_file": "libraries/tools/kotlin-gradle-plugin/src/main/kotlin/org/jetbrains/kotlin/gradle/plugin/KotlinGradleBuildServices.kt", + "created_at": "2020-07-03T17:29:28+00:00", + "commented_code": "val ALREADY_INITIALIZED_MESSAGE = \"$CLASS_NAME is already initialized\"\n\n @field:Volatile\n private var instance: KotlinGradleBuildServices? = null\n internal var instance: KotlinGradleBuildServices? = null\n\n// @JvmStatic\n// @Synchronized\n// fun getInstance(gradle: Gradle): KotlinGradleBuildServices {\n// val log = Logging.getLogger(KotlinGradleBuildServices::class.java)\n//\n// if (instance != null) {\n// log.kotlinDebug(ALREADY_INITIALIZED_MESSAGE)\n// return instance!!\n// }\n//\n// val services = KotlinGradleBuildServices(gradle)\n// instance = services\n// if (!isGradleVersionAtLeast(6,1)) {\n// gradle.addBuildListener(services)\n// log.kotlinDebug(INIT_MESSAGE)\n// } else {\n// BuildEventsListenerRegistry.\n// }\n//\n// services.buildStarted()\n// return services\n// }\n\n\n @JvmStatic\n @Synchronized\n fun getInstance(gradle: Gradle): KotlinGradleBuildServices {\n fun getInstance(project: Project, listenerRegistryHolder: BuildEventsListenerRegistryHolder): KotlinGradleBuildServices {\n val log = Logging.getLogger(KotlinGradleBuildServices::class.java)\n val kotlinGradleListenerProvider: org.gradle.api.provider.Provider = project.provider {\n KotlinGradleBuildListener(KotlinGradleFinishBuildHandler())\n }\n\n if (instance != null) {\n log.kotlinDebug(ALREADY_INITIALIZED_MESSAGE)\n return instance!!\n }\n\n val gradle = project.gradle\n val services = KotlinGradleBuildServices(gradle)\n gradle.addBuildListener(services)\n instance = services\n log.kotlinDebug(INIT_MESSAGE)\n if (isConfigurationCacheAvailable(gradle)) {", + "repo_full_name": "JetBrains/kotlin", + "discussion_comments": [ + { + "comment_id": "449666709", + "repo_full_name": "JetBrains/kotlin", + "pr_number": 3521, + "pr_file": "libraries/tools/kotlin-gradle-plugin/src/main/kotlin/org/jetbrains/kotlin/gradle/plugin/KotlinGradleBuildServices.kt", + "discussion_id": "449666709", + "commented_code": "@@ -34,36 +32,69 @@ internal class KotlinGradleBuildServices private constructor(\n val ALREADY_INITIALIZED_MESSAGE = \"$CLASS_NAME is already initialized\"\n \n @field:Volatile\n- private var instance: KotlinGradleBuildServices? = null\n+ internal var instance: KotlinGradleBuildServices? = null\n+\n+// @JvmStatic\n+// @Synchronized\n+// fun getInstance(gradle: Gradle): KotlinGradleBuildServices {\n+// val log = Logging.getLogger(KotlinGradleBuildServices::class.java)\n+//\n+// if (instance != null) {\n+// log.kotlinDebug(ALREADY_INITIALIZED_MESSAGE)\n+// return instance!!\n+// }\n+//\n+// val services = KotlinGradleBuildServices(gradle)\n+// instance = services\n+// if (!isGradleVersionAtLeast(6,1)) {\n+// gradle.addBuildListener(services)\n+// log.kotlinDebug(INIT_MESSAGE)\n+// } else {\n+// BuildEventsListenerRegistry.\n+// }\n+//\n+// services.buildStarted()\n+// return services\n+// }\n+\n \n @JvmStatic\n @Synchronized\n- fun getInstance(gradle: Gradle): KotlinGradleBuildServices {\n+ fun getInstance(project: Project, listenerRegistryHolder: BuildEventsListenerRegistryHolder): KotlinGradleBuildServices {\n val log = Logging.getLogger(KotlinGradleBuildServices::class.java)\n+ val kotlinGradleListenerProvider: org.gradle.api.provider.Provider = project.provider {\n+ KotlinGradleBuildListener(KotlinGradleFinishBuildHandler())\n+ }\n \n if (instance != null) {\n log.kotlinDebug(ALREADY_INITIALIZED_MESSAGE)\n return instance!!\n }\n \n+ val gradle = project.gradle\n val services = KotlinGradleBuildServices(gradle)\n- gradle.addBuildListener(services)\n- instance = services\n- log.kotlinDebug(INIT_MESSAGE)\n+ if (isConfigurationCacheAvailable(gradle)) {", + "comment_created_at": "2020-07-03T17:29:28+00:00", + "comment_author": "eskatos", + "comment_body": ":x: the check should be `gradleVersion >= 6.1` instead where `BuildEventsListenerRegistry` is available", + "pr_file_module": null + }, + { + "comment_id": "454991396", + "repo_full_name": "JetBrains/kotlin", + "pr_number": 3521, + "pr_file": "libraries/tools/kotlin-gradle-plugin/src/main/kotlin/org/jetbrains/kotlin/gradle/plugin/KotlinGradleBuildServices.kt", + "discussion_id": "449666709", + "commented_code": "@@ -34,36 +32,69 @@ internal class KotlinGradleBuildServices private constructor(\n val ALREADY_INITIALIZED_MESSAGE = \"$CLASS_NAME is already initialized\"\n \n @field:Volatile\n- private var instance: KotlinGradleBuildServices? = null\n+ internal var instance: KotlinGradleBuildServices? = null\n+\n+// @JvmStatic\n+// @Synchronized\n+// fun getInstance(gradle: Gradle): KotlinGradleBuildServices {\n+// val log = Logging.getLogger(KotlinGradleBuildServices::class.java)\n+//\n+// if (instance != null) {\n+// log.kotlinDebug(ALREADY_INITIALIZED_MESSAGE)\n+// return instance!!\n+// }\n+//\n+// val services = KotlinGradleBuildServices(gradle)\n+// instance = services\n+// if (!isGradleVersionAtLeast(6,1)) {\n+// gradle.addBuildListener(services)\n+// log.kotlinDebug(INIT_MESSAGE)\n+// } else {\n+// BuildEventsListenerRegistry.\n+// }\n+//\n+// services.buildStarted()\n+// return services\n+// }\n+\n \n @JvmStatic\n @Synchronized\n- fun getInstance(gradle: Gradle): KotlinGradleBuildServices {\n+ fun getInstance(project: Project, listenerRegistryHolder: BuildEventsListenerRegistryHolder): KotlinGradleBuildServices {\n val log = Logging.getLogger(KotlinGradleBuildServices::class.java)\n+ val kotlinGradleListenerProvider: org.gradle.api.provider.Provider = project.provider {\n+ KotlinGradleBuildListener(KotlinGradleFinishBuildHandler())\n+ }\n \n if (instance != null) {\n log.kotlinDebug(ALREADY_INITIALIZED_MESSAGE)\n return instance!!\n }\n \n+ val gradle = project.gradle\n val services = KotlinGradleBuildServices(gradle)\n- gradle.addBuildListener(services)\n- instance = services\n- log.kotlinDebug(INIT_MESSAGE)\n+ if (isConfigurationCacheAvailable(gradle)) {", + "comment_created_at": "2020-07-15T11:48:11+00:00", + "comment_author": "nav-nav", + "comment_body": "New listener is not ready for configuration cache ", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "449666750", + "pr_number": 3521, + "pr_file": "libraries/tools/kotlin-gradle-plugin/src/main/kotlin/org/jetbrains/kotlin/gradle/plugin/statistics/KotlinBuildStatsService.kt", + "created_at": "2020-07-03T17:29:43+00:00", + "commented_code": ")\n instance = JMXKotlinBuildStatsService(mbs, beanName)\n } else {\n val kotlinBuildStatProvider = project.provider{ KotlinBuildStatListener(beanName) }\n val newInstance = DefaultKotlinBuildStatsService(gradle, beanName)\n\n if (isConfigurationCacheAvailable(gradle)) {", + "repo_full_name": "JetBrains/kotlin", + "discussion_comments": [ + { + "comment_id": "449666750", + "repo_full_name": "JetBrains/kotlin", + "pr_number": 3521, + "pr_file": "libraries/tools/kotlin-gradle-plugin/src/main/kotlin/org/jetbrains/kotlin/gradle/plugin/statistics/KotlinBuildStatsService.kt", + "discussion_id": "449666750", + "commented_code": "@@ -106,21 +105,26 @@ internal abstract class KotlinBuildStatsService internal constructor() : BuildAd\n )\n instance = JMXKotlinBuildStatsService(mbs, beanName)\n } else {\n+ val kotlinBuildStatProvider = project.provider{ KotlinBuildStatListener(beanName) }\n val newInstance = DefaultKotlinBuildStatsService(gradle, beanName)\n+\n+ if (isConfigurationCacheAvailable(gradle)) {", + "comment_created_at": "2020-07-03T17:29:43+00:00", + "comment_author": "eskatos", + "comment_body": ":x: the check should be `gradleVersion >= 6.1` instead where `BuildEventsListenerRegistry` is available", + "pr_file_module": null + }, + { + "comment_id": "454991473", + "repo_full_name": "JetBrains/kotlin", + "pr_number": 3521, + "pr_file": "libraries/tools/kotlin-gradle-plugin/src/main/kotlin/org/jetbrains/kotlin/gradle/plugin/statistics/KotlinBuildStatsService.kt", + "discussion_id": "449666750", + "commented_code": "@@ -106,21 +105,26 @@ internal abstract class KotlinBuildStatsService internal constructor() : BuildAd\n )\n instance = JMXKotlinBuildStatsService(mbs, beanName)\n } else {\n+ val kotlinBuildStatProvider = project.provider{ KotlinBuildStatListener(beanName) }\n val newInstance = DefaultKotlinBuildStatsService(gradle, beanName)\n+\n+ if (isConfigurationCacheAvailable(gradle)) {", + "comment_created_at": "2020-07-15T11:48:19+00:00", + "comment_author": "nav-nav", + "comment_body": "New listener is not ready for configuration cache", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "450245646", + "pr_number": 3521, + "pr_file": "libraries/tools/kotlin-gradle-plugin/src/main/kotlin/org/jetbrains/kotlin/gradle/plugin/KotlinGradleFinishBuildHandler.kt", + "created_at": "2020-07-06T14:07:53+00:00", + "commented_code": "/*\n * Copyright 2010-2020 JetBrains s.r.o. and Kotlin Programming Language contributors.\n * Use of this source code is governed by the Apache 2.0 license that can be found in the license/LICENSE.txt file.\n */\n\npackage org.jetbrains.kotlin.gradle.plugin\n\nimport org.gradle.api.invocation.Gradle\nimport org.gradle.api.logging.Logging\nimport org.jetbrains.kotlin.compilerRunner.DELETED_SESSION_FILE_PREFIX\nimport org.jetbrains.kotlin.compilerRunner.GradleCompilerRunner\nimport org.jetbrains.kotlin.gradle.logging.kotlinDebug\nimport org.jetbrains.kotlin.gradle.plugin.internal.state.TaskExecutionResults\nimport org.jetbrains.kotlin.gradle.plugin.internal.state.TaskLoggers\nimport org.jetbrains.kotlin.gradle.utils.relativeToRoot\nimport org.jetbrains.kotlin.utils.addToStdlib.sumByLong\nimport java.lang.management.ManagementFactory\nimport kotlin.math.max\n\n// move to KotlinGradleBuildListener when min supported version is 6.1\nclass KotlinGradleFinishBuildHandler {\n\n private val log = Logging.getLogger(this.javaClass)\n private var startMemory: Long? = null\n private val shouldReportMemoryUsage = System.getProperty(KotlinGradleBuildServices.SHOULD_REPORT_MEMORY_USAGE_PROPERTY) != null", + "repo_full_name": "JetBrains/kotlin", + "discussion_comments": [ + { + "comment_id": "450245646", + "repo_full_name": "JetBrains/kotlin", + "pr_number": 3521, + "pr_file": "libraries/tools/kotlin-gradle-plugin/src/main/kotlin/org/jetbrains/kotlin/gradle/plugin/KotlinGradleFinishBuildHandler.kt", + "discussion_id": "450245646", + "commented_code": "@@ -0,0 +1,77 @@\n+/*\n+ * Copyright 2010-2020 JetBrains s.r.o. and Kotlin Programming Language contributors.\n+ * Use of this source code is governed by the Apache 2.0 license that can be found in the license/LICENSE.txt file.\n+ */\n+\n+package org.jetbrains.kotlin.gradle.plugin\n+\n+import org.gradle.api.invocation.Gradle\n+import org.gradle.api.logging.Logging\n+import org.jetbrains.kotlin.compilerRunner.DELETED_SESSION_FILE_PREFIX\n+import org.jetbrains.kotlin.compilerRunner.GradleCompilerRunner\n+import org.jetbrains.kotlin.gradle.logging.kotlinDebug\n+import org.jetbrains.kotlin.gradle.plugin.internal.state.TaskExecutionResults\n+import org.jetbrains.kotlin.gradle.plugin.internal.state.TaskLoggers\n+import org.jetbrains.kotlin.gradle.utils.relativeToRoot\n+import org.jetbrains.kotlin.utils.addToStdlib.sumByLong\n+import java.lang.management.ManagementFactory\n+import kotlin.math.max\n+\n+// move to KotlinGradleBuildListener when min supported version is 6.1\n+class KotlinGradleFinishBuildHandler {\n+\n+ private val log = Logging.getLogger(this.javaClass)\n+ private var startMemory: Long? = null\n+ private val shouldReportMemoryUsage = System.getProperty(KotlinGradleBuildServices.SHOULD_REPORT_MEMORY_USAGE_PROPERTY) != null", + "comment_created_at": "2020-07-06T14:07:53+00:00", + "comment_author": "gavra0", + "comment_body": "Use `ObjectFactory.systemProperty` to get the value to allow reconfiguration if it changes.", + "pr_file_module": null + }, + { + "comment_id": "450265691", + "repo_full_name": "JetBrains/kotlin", + "pr_number": 3521, + "pr_file": "libraries/tools/kotlin-gradle-plugin/src/main/kotlin/org/jetbrains/kotlin/gradle/plugin/KotlinGradleFinishBuildHandler.kt", + "discussion_id": "450245646", + "commented_code": "@@ -0,0 +1,77 @@\n+/*\n+ * Copyright 2010-2020 JetBrains s.r.o. and Kotlin Programming Language contributors.\n+ * Use of this source code is governed by the Apache 2.0 license that can be found in the license/LICENSE.txt file.\n+ */\n+\n+package org.jetbrains.kotlin.gradle.plugin\n+\n+import org.gradle.api.invocation.Gradle\n+import org.gradle.api.logging.Logging\n+import org.jetbrains.kotlin.compilerRunner.DELETED_SESSION_FILE_PREFIX\n+import org.jetbrains.kotlin.compilerRunner.GradleCompilerRunner\n+import org.jetbrains.kotlin.gradle.logging.kotlinDebug\n+import org.jetbrains.kotlin.gradle.plugin.internal.state.TaskExecutionResults\n+import org.jetbrains.kotlin.gradle.plugin.internal.state.TaskLoggers\n+import org.jetbrains.kotlin.gradle.utils.relativeToRoot\n+import org.jetbrains.kotlin.utils.addToStdlib.sumByLong\n+import java.lang.management.ManagementFactory\n+import kotlin.math.max\n+\n+// move to KotlinGradleBuildListener when min supported version is 6.1\n+class KotlinGradleFinishBuildHandler {\n+\n+ private val log = Logging.getLogger(this.javaClass)\n+ private var startMemory: Long? = null\n+ private val shouldReportMemoryUsage = System.getProperty(KotlinGradleBuildServices.SHOULD_REPORT_MEMORY_USAGE_PROPERTY) != null", + "comment_created_at": "2020-07-06T14:35:54+00:00", + "comment_author": "eskatos", + "comment_body": "Should be `ProviderFactory.systemProperty(name)`", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "450262095", + "pr_number": 3521, + "pr_file": "libraries/tools/kotlin-gradle-plugin/src/main/kotlin/org/jetbrains/kotlin/gradle/plugin/mpp/TransformKotlinGranularMetadata.kt", + "created_at": "2020-07-06T14:30:50+00:00", + "commented_code": "import org.jetbrains.kotlin.gradle.plugin.sources.getSourceSetHierarchy\nimport org.jetbrains.kotlin.gradle.targets.metadata.ALL_COMPILE_METADATA_CONFIGURATION_NAME\nimport org.jetbrains.kotlin.gradle.targets.metadata.KotlinMetadataTargetConfigurator\nimport org.jetbrains.kotlin.gradle.utils.getValue\nimport java.io.File\nimport javax.inject.Inject\n\nopen class TransformKotlinGranularMetadata\n@Inject constructor(\n @get:Internal\n @field:Transient\n val kotlinSourceSet: KotlinSourceSet\n) : DefaultTask() {\n\n @get:OutputDirectory\n val outputsDir: File = project.buildDir.resolve(\"kotlinSourceSetMetadata/${kotlinSourceSet.name}\")\n val outputsDir: File by project.provider {\n project.buildDir.resolve(\"kotlinSourceSetMetadata/${kotlinSourceSet.name}\")", + "repo_full_name": "JetBrains/kotlin", + "discussion_comments": [ + { + "comment_id": "450262095", + "repo_full_name": "JetBrains/kotlin", + "pr_number": 3521, + "pr_file": "libraries/tools/kotlin-gradle-plugin/src/main/kotlin/org/jetbrains/kotlin/gradle/plugin/mpp/TransformKotlinGranularMetadata.kt", + "discussion_id": "450262095", + "commented_code": "@@ -15,22 +15,28 @@ import org.jetbrains.kotlin.gradle.plugin.sources.KotlinDependencyScope.*\n import org.jetbrains.kotlin.gradle.plugin.sources.getSourceSetHierarchy\n import org.jetbrains.kotlin.gradle.targets.metadata.ALL_COMPILE_METADATA_CONFIGURATION_NAME\n import org.jetbrains.kotlin.gradle.targets.metadata.KotlinMetadataTargetConfigurator\n+import org.jetbrains.kotlin.gradle.utils.getValue\n import java.io.File\n import javax.inject.Inject\n \n open class TransformKotlinGranularMetadata\n @Inject constructor(\n @get:Internal\n+ @field:Transient\n val kotlinSourceSet: KotlinSourceSet\n ) : DefaultTask() {\n \n @get:OutputDirectory\n- val outputsDir: File = project.buildDir.resolve(\"kotlinSourceSetMetadata/${kotlinSourceSet.name}\")\n+ val outputsDir: File by project.provider {\n+ project.buildDir.resolve(\"kotlinSourceSetMetadata/${kotlinSourceSet.name}\")", + "comment_created_at": "2020-07-06T14:30:50+00:00", + "comment_author": "gavra0", + "comment_body": "Use `ProjectLayout` methods to compute this. E.g. this will not detect changes if buildDir changes.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "456298937", + "pr_number": 3571, + "pr_file": "libraries/tools/kotlin-gradle-plugin/src/main/kotlin/org/jetbrains/kotlin/gradle/internal/kapt/KaptGenerateStubsTask.kt", + "created_at": "2020-07-17T08:29:57+00:00", + "commented_code": "error(\"KaptGenerateStubsTask.useModuleDetection setter should not be called!\")\n }\n\n @get:Input\n val verbose = (project.hasProperty(\"kapt.verbose\") && project.property(\"kapt.verbose\").toString().toBoolean() == true)", + "repo_full_name": "JetBrains/kotlin", + "discussion_comments": [ + { + "comment_id": "456298937", + "repo_full_name": "JetBrains/kotlin", + "pr_number": 3571, + "pr_file": "libraries/tools/kotlin-gradle-plugin/src/main/kotlin/org/jetbrains/kotlin/gradle/internal/kapt/KaptGenerateStubsTask.kt", + "discussion_id": "456298937", + "commented_code": "@@ -69,6 +69,9 @@ open class KaptGenerateStubsTask : KotlinCompile() {\n error(\"KaptGenerateStubsTask.useModuleDetection setter should not be called!\")\n }\n \n+ @get:Input\n+ val verbose = (project.hasProperty(\"kapt.verbose\") && project.property(\"kapt.verbose\").toString().toBoolean() == true)", + "comment_created_at": "2020-07-17T08:29:57+00:00", + "comment_author": "eskatos", + "comment_body": ":o: This could be modelled as a `Provider` to account for later changes to that property:\r\n\r\n```kotlin\r\nproject.providers.gradleProperty(\"kapt.verbose\").map { it.toBoolean() }.orElse(false)\r\n```", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/kotlin-use-configuration-property-providers.md b/_reviewers/kotlin-use-configuration-property-providers.md index b0f79b9..da255c9 100644 --- a/_reviewers/kotlin-use-configuration-property-providers.md +++ b/_reviewers/kotlin-use-configuration-property-providers.md @@ -42,209 +42,3 @@ When dealing with file paths or build directory locations, use ProjectLayout: val layout = project.layout val outputFile = layout.buildDirectory.file("output/file.txt") ``` - - -[ - { - "discussion_id": "449662023", - "pr_number": 3521, - "pr_file": "libraries/tools/kotlin-gradle-plugin/src/main/kotlin/org/jetbrains/kotlin/gradle/internal/kapt/KaptGenerateStubsTask.kt", - "created_at": "2020-07-03T17:06:24+00:00", - "commented_code": "error(\"KaptGenerateStubsTask.useModuleDetection setter should not be called!\")\n }\n\n @get:Input\n val verbose = (project.hasProperty(\"kapt.verbose\") && project.property(\"kapt.verbose\").toString().toBoolean() == true)", - "repo_full_name": "JetBrains/kotlin", - "discussion_comments": [ - { - "comment_id": "449662023", - "repo_full_name": "JetBrains/kotlin", - "pr_number": 3521, - "pr_file": "libraries/tools/kotlin-gradle-plugin/src/main/kotlin/org/jetbrains/kotlin/gradle/internal/kapt/KaptGenerateStubsTask.kt", - "discussion_id": "449662023", - "commented_code": "@@ -69,6 +69,9 @@ open class KaptGenerateStubsTask : KotlinCompile() {\n error(\"KaptGenerateStubsTask.useModuleDetection setter should not be called!\")\n }\n \n+ @get:Input\n+ val verbose = (project.hasProperty(\"kapt.verbose\") && project.property(\"kapt.verbose\").toString().toBoolean() == true)", - "comment_created_at": "2020-07-03T17:06:24+00:00", - "comment_author": "eskatos", - "comment_body": ":o: This could be modelled as a `Provider` to account for later changes to that property:\r\n\r\n```kotlin\r\nproject.providers.gradleProperty(\"kapt.verbose\").map { it.toBoolean() }.orElse(false)\r\n```", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "449666668", - "pr_number": 3521, - "pr_file": "libraries/tools/kotlin-gradle-plugin/src/main/kotlin/org/jetbrains/kotlin/gradle/plugin/BuildEventsListenerRegistryHolder.kt", - "created_at": "2020-07-03T17:29:12+00:00", - "commented_code": "/*\n * Copyright 2010-2020 JetBrains s.r.o. and Kotlin Programming Language contributors.\n * Use of this source code is governed by the Apache 2.0 license that can be found in the license/LICENSE.txt file.\n */\n\npackage org.jetbrains.kotlin.gradle.plugin\n\nimport org.gradle.api.Project\nimport org.gradle.build.event.BuildEventsListenerRegistry\nimport org.jetbrains.kotlin.gradle.utils.isConfigurationCacheAvailable\nimport javax.inject.Inject\n\nopen class BuildEventsListenerRegistryHolder @Inject constructor(val listenerRegistry: BuildEventsListenerRegistry?) {\n companion object {\n fun getInstance(project: Project) = run {\n if (isConfigurationCacheAvailable(project.gradle)) {", - "repo_full_name": "JetBrains/kotlin", - "discussion_comments": [ - { - "comment_id": "449666668", - "repo_full_name": "JetBrains/kotlin", - "pr_number": 3521, - "pr_file": "libraries/tools/kotlin-gradle-plugin/src/main/kotlin/org/jetbrains/kotlin/gradle/plugin/BuildEventsListenerRegistryHolder.kt", - "discussion_id": "449666668", - "commented_code": "@@ -0,0 +1,23 @@\n+/*\n+ * Copyright 2010-2020 JetBrains s.r.o. and Kotlin Programming Language contributors.\n+ * Use of this source code is governed by the Apache 2.0 license that can be found in the license/LICENSE.txt file.\n+ */\n+\n+package org.jetbrains.kotlin.gradle.plugin\n+\n+import org.gradle.api.Project\n+import org.gradle.build.event.BuildEventsListenerRegistry\n+import org.jetbrains.kotlin.gradle.utils.isConfigurationCacheAvailable\n+import javax.inject.Inject\n+\n+open class BuildEventsListenerRegistryHolder @Inject constructor(val listenerRegistry: BuildEventsListenerRegistry?) {\n+ companion object {\n+ fun getInstance(project: Project) = run {\n+ if (isConfigurationCacheAvailable(project.gradle)) {", - "comment_created_at": "2020-07-03T17:29:12+00:00", - "comment_author": "eskatos", - "comment_body": ":x: the check should be `gradleVersion >= 6.1` instead where `BuildEventsListenerRegistry` is available", - "pr_file_module": null - }, - { - "comment_id": "454990923", - "repo_full_name": "JetBrains/kotlin", - "pr_number": 3521, - "pr_file": "libraries/tools/kotlin-gradle-plugin/src/main/kotlin/org/jetbrains/kotlin/gradle/plugin/BuildEventsListenerRegistryHolder.kt", - "discussion_id": "449666668", - "commented_code": "@@ -0,0 +1,23 @@\n+/*\n+ * Copyright 2010-2020 JetBrains s.r.o. and Kotlin Programming Language contributors.\n+ * Use of this source code is governed by the Apache 2.0 license that can be found in the license/LICENSE.txt file.\n+ */\n+\n+package org.jetbrains.kotlin.gradle.plugin\n+\n+import org.gradle.api.Project\n+import org.gradle.build.event.BuildEventsListenerRegistry\n+import org.jetbrains.kotlin.gradle.utils.isConfigurationCacheAvailable\n+import javax.inject.Inject\n+\n+open class BuildEventsListenerRegistryHolder @Inject constructor(val listenerRegistry: BuildEventsListenerRegistry?) {\n+ companion object {\n+ fun getInstance(project: Project) = run {\n+ if (isConfigurationCacheAvailable(project.gradle)) {", - "comment_created_at": "2020-07-15T11:47:18+00:00", - "comment_author": "nav-nav", - "comment_body": "New listener is not ready for configuration cache", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "449666709", - "pr_number": 3521, - "pr_file": "libraries/tools/kotlin-gradle-plugin/src/main/kotlin/org/jetbrains/kotlin/gradle/plugin/KotlinGradleBuildServices.kt", - "created_at": "2020-07-03T17:29:28+00:00", - "commented_code": "val ALREADY_INITIALIZED_MESSAGE = \"$CLASS_NAME is already initialized\"\n\n @field:Volatile\n private var instance: KotlinGradleBuildServices? = null\n internal var instance: KotlinGradleBuildServices? = null\n\n// @JvmStatic\n// @Synchronized\n// fun getInstance(gradle: Gradle): KotlinGradleBuildServices {\n// val log = Logging.getLogger(KotlinGradleBuildServices::class.java)\n//\n// if (instance != null) {\n// log.kotlinDebug(ALREADY_INITIALIZED_MESSAGE)\n// return instance!!\n// }\n//\n// val services = KotlinGradleBuildServices(gradle)\n// instance = services\n// if (!isGradleVersionAtLeast(6,1)) {\n// gradle.addBuildListener(services)\n// log.kotlinDebug(INIT_MESSAGE)\n// } else {\n// BuildEventsListenerRegistry.\n// }\n//\n// services.buildStarted()\n// return services\n// }\n\n\n @JvmStatic\n @Synchronized\n fun getInstance(gradle: Gradle): KotlinGradleBuildServices {\n fun getInstance(project: Project, listenerRegistryHolder: BuildEventsListenerRegistryHolder): KotlinGradleBuildServices {\n val log = Logging.getLogger(KotlinGradleBuildServices::class.java)\n val kotlinGradleListenerProvider: org.gradle.api.provider.Provider = project.provider {\n KotlinGradleBuildListener(KotlinGradleFinishBuildHandler())\n }\n\n if (instance != null) {\n log.kotlinDebug(ALREADY_INITIALIZED_MESSAGE)\n return instance!!\n }\n\n val gradle = project.gradle\n val services = KotlinGradleBuildServices(gradle)\n gradle.addBuildListener(services)\n instance = services\n log.kotlinDebug(INIT_MESSAGE)\n if (isConfigurationCacheAvailable(gradle)) {", - "repo_full_name": "JetBrains/kotlin", - "discussion_comments": [ - { - "comment_id": "449666709", - "repo_full_name": "JetBrains/kotlin", - "pr_number": 3521, - "pr_file": "libraries/tools/kotlin-gradle-plugin/src/main/kotlin/org/jetbrains/kotlin/gradle/plugin/KotlinGradleBuildServices.kt", - "discussion_id": "449666709", - "commented_code": "@@ -34,36 +32,69 @@ internal class KotlinGradleBuildServices private constructor(\n val ALREADY_INITIALIZED_MESSAGE = \"$CLASS_NAME is already initialized\"\n \n @field:Volatile\n- private var instance: KotlinGradleBuildServices? = null\n+ internal var instance: KotlinGradleBuildServices? = null\n+\n+// @JvmStatic\n+// @Synchronized\n+// fun getInstance(gradle: Gradle): KotlinGradleBuildServices {\n+// val log = Logging.getLogger(KotlinGradleBuildServices::class.java)\n+//\n+// if (instance != null) {\n+// log.kotlinDebug(ALREADY_INITIALIZED_MESSAGE)\n+// return instance!!\n+// }\n+//\n+// val services = KotlinGradleBuildServices(gradle)\n+// instance = services\n+// if (!isGradleVersionAtLeast(6,1)) {\n+// gradle.addBuildListener(services)\n+// log.kotlinDebug(INIT_MESSAGE)\n+// } else {\n+// BuildEventsListenerRegistry.\n+// }\n+//\n+// services.buildStarted()\n+// return services\n+// }\n+\n \n @JvmStatic\n @Synchronized\n- fun getInstance(gradle: Gradle): KotlinGradleBuildServices {\n+ fun getInstance(project: Project, listenerRegistryHolder: BuildEventsListenerRegistryHolder): KotlinGradleBuildServices {\n val log = Logging.getLogger(KotlinGradleBuildServices::class.java)\n+ val kotlinGradleListenerProvider: org.gradle.api.provider.Provider = project.provider {\n+ KotlinGradleBuildListener(KotlinGradleFinishBuildHandler())\n+ }\n \n if (instance != null) {\n log.kotlinDebug(ALREADY_INITIALIZED_MESSAGE)\n return instance!!\n }\n \n+ val gradle = project.gradle\n val services = KotlinGradleBuildServices(gradle)\n- gradle.addBuildListener(services)\n- instance = services\n- log.kotlinDebug(INIT_MESSAGE)\n+ if (isConfigurationCacheAvailable(gradle)) {", - "comment_created_at": "2020-07-03T17:29:28+00:00", - "comment_author": "eskatos", - "comment_body": ":x: the check should be `gradleVersion >= 6.1` instead where `BuildEventsListenerRegistry` is available", - "pr_file_module": null - }, - { - "comment_id": "454991396", - "repo_full_name": "JetBrains/kotlin", - "pr_number": 3521, - "pr_file": "libraries/tools/kotlin-gradle-plugin/src/main/kotlin/org/jetbrains/kotlin/gradle/plugin/KotlinGradleBuildServices.kt", - "discussion_id": "449666709", - "commented_code": "@@ -34,36 +32,69 @@ internal class KotlinGradleBuildServices private constructor(\n val ALREADY_INITIALIZED_MESSAGE = \"$CLASS_NAME is already initialized\"\n \n @field:Volatile\n- private var instance: KotlinGradleBuildServices? = null\n+ internal var instance: KotlinGradleBuildServices? = null\n+\n+// @JvmStatic\n+// @Synchronized\n+// fun getInstance(gradle: Gradle): KotlinGradleBuildServices {\n+// val log = Logging.getLogger(KotlinGradleBuildServices::class.java)\n+//\n+// if (instance != null) {\n+// log.kotlinDebug(ALREADY_INITIALIZED_MESSAGE)\n+// return instance!!\n+// }\n+//\n+// val services = KotlinGradleBuildServices(gradle)\n+// instance = services\n+// if (!isGradleVersionAtLeast(6,1)) {\n+// gradle.addBuildListener(services)\n+// log.kotlinDebug(INIT_MESSAGE)\n+// } else {\n+// BuildEventsListenerRegistry.\n+// }\n+//\n+// services.buildStarted()\n+// return services\n+// }\n+\n \n @JvmStatic\n @Synchronized\n- fun getInstance(gradle: Gradle): KotlinGradleBuildServices {\n+ fun getInstance(project: Project, listenerRegistryHolder: BuildEventsListenerRegistryHolder): KotlinGradleBuildServices {\n val log = Logging.getLogger(KotlinGradleBuildServices::class.java)\n+ val kotlinGradleListenerProvider: org.gradle.api.provider.Provider = project.provider {\n+ KotlinGradleBuildListener(KotlinGradleFinishBuildHandler())\n+ }\n \n if (instance != null) {\n log.kotlinDebug(ALREADY_INITIALIZED_MESSAGE)\n return instance!!\n }\n \n+ val gradle = project.gradle\n val services = KotlinGradleBuildServices(gradle)\n- gradle.addBuildListener(services)\n- instance = services\n- log.kotlinDebug(INIT_MESSAGE)\n+ if (isConfigurationCacheAvailable(gradle)) {", - "comment_created_at": "2020-07-15T11:48:11+00:00", - "comment_author": "nav-nav", - "comment_body": "New listener is not ready for configuration cache ", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "449666750", - "pr_number": 3521, - "pr_file": "libraries/tools/kotlin-gradle-plugin/src/main/kotlin/org/jetbrains/kotlin/gradle/plugin/statistics/KotlinBuildStatsService.kt", - "created_at": "2020-07-03T17:29:43+00:00", - "commented_code": ")\n instance = JMXKotlinBuildStatsService(mbs, beanName)\n } else {\n val kotlinBuildStatProvider = project.provider{ KotlinBuildStatListener(beanName) }\n val newInstance = DefaultKotlinBuildStatsService(gradle, beanName)\n\n if (isConfigurationCacheAvailable(gradle)) {", - "repo_full_name": "JetBrains/kotlin", - "discussion_comments": [ - { - "comment_id": "449666750", - "repo_full_name": "JetBrains/kotlin", - "pr_number": 3521, - "pr_file": "libraries/tools/kotlin-gradle-plugin/src/main/kotlin/org/jetbrains/kotlin/gradle/plugin/statistics/KotlinBuildStatsService.kt", - "discussion_id": "449666750", - "commented_code": "@@ -106,21 +105,26 @@ internal abstract class KotlinBuildStatsService internal constructor() : BuildAd\n )\n instance = JMXKotlinBuildStatsService(mbs, beanName)\n } else {\n+ val kotlinBuildStatProvider = project.provider{ KotlinBuildStatListener(beanName) }\n val newInstance = DefaultKotlinBuildStatsService(gradle, beanName)\n+\n+ if (isConfigurationCacheAvailable(gradle)) {", - "comment_created_at": "2020-07-03T17:29:43+00:00", - "comment_author": "eskatos", - "comment_body": ":x: the check should be `gradleVersion >= 6.1` instead where `BuildEventsListenerRegistry` is available", - "pr_file_module": null - }, - { - "comment_id": "454991473", - "repo_full_name": "JetBrains/kotlin", - "pr_number": 3521, - "pr_file": "libraries/tools/kotlin-gradle-plugin/src/main/kotlin/org/jetbrains/kotlin/gradle/plugin/statistics/KotlinBuildStatsService.kt", - "discussion_id": "449666750", - "commented_code": "@@ -106,21 +105,26 @@ internal abstract class KotlinBuildStatsService internal constructor() : BuildAd\n )\n instance = JMXKotlinBuildStatsService(mbs, beanName)\n } else {\n+ val kotlinBuildStatProvider = project.provider{ KotlinBuildStatListener(beanName) }\n val newInstance = DefaultKotlinBuildStatsService(gradle, beanName)\n+\n+ if (isConfigurationCacheAvailable(gradle)) {", - "comment_created_at": "2020-07-15T11:48:19+00:00", - "comment_author": "nav-nav", - "comment_body": "New listener is not ready for configuration cache", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "450245646", - "pr_number": 3521, - "pr_file": "libraries/tools/kotlin-gradle-plugin/src/main/kotlin/org/jetbrains/kotlin/gradle/plugin/KotlinGradleFinishBuildHandler.kt", - "created_at": "2020-07-06T14:07:53+00:00", - "commented_code": "/*\n * Copyright 2010-2020 JetBrains s.r.o. and Kotlin Programming Language contributors.\n * Use of this source code is governed by the Apache 2.0 license that can be found in the license/LICENSE.txt file.\n */\n\npackage org.jetbrains.kotlin.gradle.plugin\n\nimport org.gradle.api.invocation.Gradle\nimport org.gradle.api.logging.Logging\nimport org.jetbrains.kotlin.compilerRunner.DELETED_SESSION_FILE_PREFIX\nimport org.jetbrains.kotlin.compilerRunner.GradleCompilerRunner\nimport org.jetbrains.kotlin.gradle.logging.kotlinDebug\nimport org.jetbrains.kotlin.gradle.plugin.internal.state.TaskExecutionResults\nimport org.jetbrains.kotlin.gradle.plugin.internal.state.TaskLoggers\nimport org.jetbrains.kotlin.gradle.utils.relativeToRoot\nimport org.jetbrains.kotlin.utils.addToStdlib.sumByLong\nimport java.lang.management.ManagementFactory\nimport kotlin.math.max\n\n// move to KotlinGradleBuildListener when min supported version is 6.1\nclass KotlinGradleFinishBuildHandler {\n\n private val log = Logging.getLogger(this.javaClass)\n private var startMemory: Long? = null\n private val shouldReportMemoryUsage = System.getProperty(KotlinGradleBuildServices.SHOULD_REPORT_MEMORY_USAGE_PROPERTY) != null", - "repo_full_name": "JetBrains/kotlin", - "discussion_comments": [ - { - "comment_id": "450245646", - "repo_full_name": "JetBrains/kotlin", - "pr_number": 3521, - "pr_file": "libraries/tools/kotlin-gradle-plugin/src/main/kotlin/org/jetbrains/kotlin/gradle/plugin/KotlinGradleFinishBuildHandler.kt", - "discussion_id": "450245646", - "commented_code": "@@ -0,0 +1,77 @@\n+/*\n+ * Copyright 2010-2020 JetBrains s.r.o. and Kotlin Programming Language contributors.\n+ * Use of this source code is governed by the Apache 2.0 license that can be found in the license/LICENSE.txt file.\n+ */\n+\n+package org.jetbrains.kotlin.gradle.plugin\n+\n+import org.gradle.api.invocation.Gradle\n+import org.gradle.api.logging.Logging\n+import org.jetbrains.kotlin.compilerRunner.DELETED_SESSION_FILE_PREFIX\n+import org.jetbrains.kotlin.compilerRunner.GradleCompilerRunner\n+import org.jetbrains.kotlin.gradle.logging.kotlinDebug\n+import org.jetbrains.kotlin.gradle.plugin.internal.state.TaskExecutionResults\n+import org.jetbrains.kotlin.gradle.plugin.internal.state.TaskLoggers\n+import org.jetbrains.kotlin.gradle.utils.relativeToRoot\n+import org.jetbrains.kotlin.utils.addToStdlib.sumByLong\n+import java.lang.management.ManagementFactory\n+import kotlin.math.max\n+\n+// move to KotlinGradleBuildListener when min supported version is 6.1\n+class KotlinGradleFinishBuildHandler {\n+\n+ private val log = Logging.getLogger(this.javaClass)\n+ private var startMemory: Long? = null\n+ private val shouldReportMemoryUsage = System.getProperty(KotlinGradleBuildServices.SHOULD_REPORT_MEMORY_USAGE_PROPERTY) != null", - "comment_created_at": "2020-07-06T14:07:53+00:00", - "comment_author": "gavra0", - "comment_body": "Use `ObjectFactory.systemProperty` to get the value to allow reconfiguration if it changes.", - "pr_file_module": null - }, - { - "comment_id": "450265691", - "repo_full_name": "JetBrains/kotlin", - "pr_number": 3521, - "pr_file": "libraries/tools/kotlin-gradle-plugin/src/main/kotlin/org/jetbrains/kotlin/gradle/plugin/KotlinGradleFinishBuildHandler.kt", - "discussion_id": "450245646", - "commented_code": "@@ -0,0 +1,77 @@\n+/*\n+ * Copyright 2010-2020 JetBrains s.r.o. and Kotlin Programming Language contributors.\n+ * Use of this source code is governed by the Apache 2.0 license that can be found in the license/LICENSE.txt file.\n+ */\n+\n+package org.jetbrains.kotlin.gradle.plugin\n+\n+import org.gradle.api.invocation.Gradle\n+import org.gradle.api.logging.Logging\n+import org.jetbrains.kotlin.compilerRunner.DELETED_SESSION_FILE_PREFIX\n+import org.jetbrains.kotlin.compilerRunner.GradleCompilerRunner\n+import org.jetbrains.kotlin.gradle.logging.kotlinDebug\n+import org.jetbrains.kotlin.gradle.plugin.internal.state.TaskExecutionResults\n+import org.jetbrains.kotlin.gradle.plugin.internal.state.TaskLoggers\n+import org.jetbrains.kotlin.gradle.utils.relativeToRoot\n+import org.jetbrains.kotlin.utils.addToStdlib.sumByLong\n+import java.lang.management.ManagementFactory\n+import kotlin.math.max\n+\n+// move to KotlinGradleBuildListener when min supported version is 6.1\n+class KotlinGradleFinishBuildHandler {\n+\n+ private val log = Logging.getLogger(this.javaClass)\n+ private var startMemory: Long? = null\n+ private val shouldReportMemoryUsage = System.getProperty(KotlinGradleBuildServices.SHOULD_REPORT_MEMORY_USAGE_PROPERTY) != null", - "comment_created_at": "2020-07-06T14:35:54+00:00", - "comment_author": "eskatos", - "comment_body": "Should be `ProviderFactory.systemProperty(name)`", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "450262095", - "pr_number": 3521, - "pr_file": "libraries/tools/kotlin-gradle-plugin/src/main/kotlin/org/jetbrains/kotlin/gradle/plugin/mpp/TransformKotlinGranularMetadata.kt", - "created_at": "2020-07-06T14:30:50+00:00", - "commented_code": "import org.jetbrains.kotlin.gradle.plugin.sources.getSourceSetHierarchy\nimport org.jetbrains.kotlin.gradle.targets.metadata.ALL_COMPILE_METADATA_CONFIGURATION_NAME\nimport org.jetbrains.kotlin.gradle.targets.metadata.KotlinMetadataTargetConfigurator\nimport org.jetbrains.kotlin.gradle.utils.getValue\nimport java.io.File\nimport javax.inject.Inject\n\nopen class TransformKotlinGranularMetadata\n@Inject constructor(\n @get:Internal\n @field:Transient\n val kotlinSourceSet: KotlinSourceSet\n) : DefaultTask() {\n\n @get:OutputDirectory\n val outputsDir: File = project.buildDir.resolve(\"kotlinSourceSetMetadata/${kotlinSourceSet.name}\")\n val outputsDir: File by project.provider {\n project.buildDir.resolve(\"kotlinSourceSetMetadata/${kotlinSourceSet.name}\")", - "repo_full_name": "JetBrains/kotlin", - "discussion_comments": [ - { - "comment_id": "450262095", - "repo_full_name": "JetBrains/kotlin", - "pr_number": 3521, - "pr_file": "libraries/tools/kotlin-gradle-plugin/src/main/kotlin/org/jetbrains/kotlin/gradle/plugin/mpp/TransformKotlinGranularMetadata.kt", - "discussion_id": "450262095", - "commented_code": "@@ -15,22 +15,28 @@ import org.jetbrains.kotlin.gradle.plugin.sources.KotlinDependencyScope.*\n import org.jetbrains.kotlin.gradle.plugin.sources.getSourceSetHierarchy\n import org.jetbrains.kotlin.gradle.targets.metadata.ALL_COMPILE_METADATA_CONFIGURATION_NAME\n import org.jetbrains.kotlin.gradle.targets.metadata.KotlinMetadataTargetConfigurator\n+import org.jetbrains.kotlin.gradle.utils.getValue\n import java.io.File\n import javax.inject.Inject\n \n open class TransformKotlinGranularMetadata\n @Inject constructor(\n @get:Internal\n+ @field:Transient\n val kotlinSourceSet: KotlinSourceSet\n ) : DefaultTask() {\n \n @get:OutputDirectory\n- val outputsDir: File = project.buildDir.resolve(\"kotlinSourceSetMetadata/${kotlinSourceSet.name}\")\n+ val outputsDir: File by project.provider {\n+ project.buildDir.resolve(\"kotlinSourceSetMetadata/${kotlinSourceSet.name}\")", - "comment_created_at": "2020-07-06T14:30:50+00:00", - "comment_author": "gavra0", - "comment_body": "Use `ProjectLayout` methods to compute this. E.g. this will not detect changes if buildDir changes.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "456298937", - "pr_number": 3571, - "pr_file": "libraries/tools/kotlin-gradle-plugin/src/main/kotlin/org/jetbrains/kotlin/gradle/internal/kapt/KaptGenerateStubsTask.kt", - "created_at": "2020-07-17T08:29:57+00:00", - "commented_code": "error(\"KaptGenerateStubsTask.useModuleDetection setter should not be called!\")\n }\n\n @get:Input\n val verbose = (project.hasProperty(\"kapt.verbose\") && project.property(\"kapt.verbose\").toString().toBoolean() == true)", - "repo_full_name": "JetBrains/kotlin", - "discussion_comments": [ - { - "comment_id": "456298937", - "repo_full_name": "JetBrains/kotlin", - "pr_number": 3571, - "pr_file": "libraries/tools/kotlin-gradle-plugin/src/main/kotlin/org/jetbrains/kotlin/gradle/internal/kapt/KaptGenerateStubsTask.kt", - "discussion_id": "456298937", - "commented_code": "@@ -69,6 +69,9 @@ open class KaptGenerateStubsTask : KotlinCompile() {\n error(\"KaptGenerateStubsTask.useModuleDetection setter should not be called!\")\n }\n \n+ @get:Input\n+ val verbose = (project.hasProperty(\"kapt.verbose\") && project.property(\"kapt.verbose\").toString().toBoolean() == true)", - "comment_created_at": "2020-07-17T08:29:57+00:00", - "comment_author": "eskatos", - "comment_body": ":o: This could be modelled as a `Provider` to account for later changes to that property:\r\n\r\n```kotlin\r\nproject.providers.gradleProperty(\"kapt.verbose\").map { it.toBoolean() }.orElse(false)\r\n```", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/kotlin-use-defensive-null-checks.json b/_reviewers/kotlin-use-defensive-null-checks.json new file mode 100644 index 0000000..0f9f318 --- /dev/null +++ b/_reviewers/kotlin-use-defensive-null-checks.json @@ -0,0 +1,174 @@ +[ + { + "discussion_id": "1717528372", + "pr_number": 5311, + "pr_file": "compiler/fir/raw-fir/light-tree2fir/src/org/jetbrains/kotlin/fir/lightTree/converter/LightTreeRawFirExpressionBuilder.kt", + "created_at": "2024-08-14T20:43:41+00:00", + "commented_code": "}\n }\n\n val result = firExpression ?: buildErrorExpression(null, ConeNotAnnotationContainer(\"???\"))\n val result = firExpression ?: buildErrorExpression(annotatedExpression.toFirSourceElement(), ConeNotAnnotationContainer(\"???\"))", + "repo_full_name": "JetBrains/kotlin", + "discussion_comments": [ + { + "comment_id": "1717528372", + "repo_full_name": "JetBrains/kotlin", + "pr_number": 5311, + "pr_file": "compiler/fir/raw-fir/light-tree2fir/src/org/jetbrains/kotlin/fir/lightTree/converter/LightTreeRawFirExpressionBuilder.kt", + "discussion_id": "1717528372", + "commented_code": "@@ -498,7 +498,7 @@ class LightTreeRawFirExpressionBuilder(\n }\n }\n \n- val result = firExpression ?: buildErrorExpression(null, ConeNotAnnotationContainer(\"???\"))\n+ val result = firExpression ?: buildErrorExpression(annotatedExpression.toFirSourceElement(), ConeNotAnnotationContainer(\"???\"))", + "comment_created_at": "2024-08-14T20:43:41+00:00", + "comment_author": "isuckatcs", + "comment_body": "This must have a location, otherwise we get a crash in `FirContractChecker`.\r\n\r\n```kotlin\r\nobject FirContractChecker {\r\n ...\r\n private fun checkUnresolvedEffects(\r\n ...\r\n ) {\r\n ...\r\n reporter.reportOn(unresolvedEffect.source, ...)\r\n // ^ source is asserted to be non-null inside reportOn()\r\n ...\r\n }\r\n ...\r\n}\r\n```", + "pr_file_module": null + }, + { + "comment_id": "1721465179", + "repo_full_name": "JetBrains/kotlin", + "pr_number": 5311, + "pr_file": "compiler/fir/raw-fir/light-tree2fir/src/org/jetbrains/kotlin/fir/lightTree/converter/LightTreeRawFirExpressionBuilder.kt", + "discussion_id": "1717528372", + "commented_code": "@@ -498,7 +498,7 @@ class LightTreeRawFirExpressionBuilder(\n }\n }\n \n- val result = firExpression ?: buildErrorExpression(null, ConeNotAnnotationContainer(\"???\"))\n+ val result = firExpression ?: buildErrorExpression(annotatedExpression.toFirSourceElement(), ConeNotAnnotationContainer(\"???\"))", + "comment_created_at": "2024-08-19T09:13:38+00:00", + "comment_author": "demiurg906", + "comment_body": "Yep, this is a correct fix. Could you please check that the corresponding place in `PsiRawFirBuilder` also correctly sets the source? ", + "pr_file_module": null + }, + { + "comment_id": "1721651171", + "repo_full_name": "JetBrains/kotlin", + "pr_number": 5311, + "pr_file": "compiler/fir/raw-fir/light-tree2fir/src/org/jetbrains/kotlin/fir/lightTree/converter/LightTreeRawFirExpressionBuilder.kt", + "discussion_id": "1717528372", + "commented_code": "@@ -498,7 +498,7 @@ class LightTreeRawFirExpressionBuilder(\n }\n }\n \n- val result = firExpression ?: buildErrorExpression(null, ConeNotAnnotationContainer(\"???\"))\n+ val result = firExpression ?: buildErrorExpression(annotatedExpression.toFirSourceElement(), ConeNotAnnotationContainer(\"???\"))", + "comment_created_at": "2024-08-19T11:34:50+00:00", + "comment_author": "isuckatcs", + "comment_body": "Yes, it sets it. In fact I checked that first and that was my leading clue that this one should be set too.\r\n\r\n```kotlin\r\nval result = rawResult as? FirAnnotationContainer\r\n ?: buildErrorExpression(\r\n expression.toFirSourceElement(),\r\n ConeNotAnnotationContainer(rawResult?.render() ?: \"???\")\r\n )\r\n```", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1327002233", + "pr_number": 5184, + "pr_file": "plugins/parcelize/parcelize-compiler/parcelize.backend/src/org/jetbrains/kotlin/parcelize/AndroidSymbols.kt", + "created_at": "2023-09-15T08:50:43+00:00", + "commented_code": "isStatic = true\n }.symbol\n\n private val kotlinxCollectionsImmutable = FqName(kotlinxImmutable())\n private val kotlinCollections = FqName(\"kotlin.collections\")\n private val kotlinIterable: FqName = kotlinCollections.child(Name.identifier(\"Iterable\"))\n private val kotlinMap: FqName = kotlinCollections.child(Name.identifier(\"Map\"))\n\n private fun findKotlinxImmutableCollectionExtensionFunction(\n receiver: FqName,\n functionName: String,\n ): IrSimpleFunctionSymbol {\n val functionSymbol =\n pluginContext\n .referenceFunctions(CallableId(kotlinxCollectionsImmutable, Name.identifier(functionName)))\n .firstOrNull { it.owner.extensionReceiverParameter!!.type.classFqName == receiver }", + "repo_full_name": "JetBrains/kotlin", + "discussion_comments": [ + { + "comment_id": "1327002233", + "repo_full_name": "JetBrains/kotlin", + "pr_number": 5184, + "pr_file": "plugins/parcelize/parcelize-compiler/parcelize.backend/src/org/jetbrains/kotlin/parcelize/AndroidSymbols.kt", + "discussion_id": "1327002233", + "commented_code": "@@ -499,6 +502,39 @@ class AndroidSymbols(\n isStatic = true\n }.symbol\n \n+ private val kotlinxCollectionsImmutable = FqName(kotlinxImmutable())\n+ private val kotlinCollections = FqName(\"kotlin.collections\")\n+ private val kotlinIterable: FqName = kotlinCollections.child(Name.identifier(\"Iterable\"))\n+ private val kotlinMap: FqName = kotlinCollections.child(Name.identifier(\"Map\"))\n+\n+ private fun findKotlinxImmutableCollectionExtensionFunction(\n+ receiver: FqName,\n+ functionName: String,\n+ ): IrSimpleFunctionSymbol {\n+ val functionSymbol =\n+ pluginContext\n+ .referenceFunctions(CallableId(kotlinxCollectionsImmutable, Name.identifier(functionName)))\n+ .firstOrNull { it.owner.extensionReceiverParameter!!.type.classFqName == receiver }", + "comment_created_at": "2023-09-15T08:50:43+00:00", + "comment_author": "madsager", + "comment_body": "Just to be safe, maybe it would be better to use null safe operations:\r\n\r\n`it.owner.extensionReceiverParameter?.type?.classFqName == receiver`\r\n\r\nThat way, if users put their own code with these package and function names (but where the function is not an extension function) on the classpath they will get the error message below instead of a null pointer exception.", + "pr_file_module": null + }, + { + "comment_id": "1327143301", + "repo_full_name": "JetBrains/kotlin", + "pr_number": 5184, + "pr_file": "plugins/parcelize/parcelize-compiler/parcelize.backend/src/org/jetbrains/kotlin/parcelize/AndroidSymbols.kt", + "discussion_id": "1327002233", + "commented_code": "@@ -499,6 +502,39 @@ class AndroidSymbols(\n isStatic = true\n }.symbol\n \n+ private val kotlinxCollectionsImmutable = FqName(kotlinxImmutable())\n+ private val kotlinCollections = FqName(\"kotlin.collections\")\n+ private val kotlinIterable: FqName = kotlinCollections.child(Name.identifier(\"Iterable\"))\n+ private val kotlinMap: FqName = kotlinCollections.child(Name.identifier(\"Map\"))\n+\n+ private fun findKotlinxImmutableCollectionExtensionFunction(\n+ receiver: FqName,\n+ functionName: String,\n+ ): IrSimpleFunctionSymbol {\n+ val functionSymbol =\n+ pluginContext\n+ .referenceFunctions(CallableId(kotlinxCollectionsImmutable, Name.identifier(functionName)))\n+ .firstOrNull { it.owner.extensionReceiverParameter!!.type.classFqName == receiver }", + "comment_created_at": "2023-09-15T11:08:09+00:00", + "comment_author": "IlyaGulya", + "comment_body": "Yeah, sure, I've just forgot about this one, thanks!", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "713622898", + "pr_number": 4581, + "pr_file": "plugins/atomicfu/atomicfu-compiler/src/org.jetbrains.kotlinx.atomicfu.compiler/extensions/AtomicFUTransformer.kt", + "created_at": "2021-09-22T06:18:07+00:00", + "commented_code": "/*\n * Copyright 2010-2020 JetBrains s.r.o. and Kotlin Programming Language contributors.\n * Use of this source code is governed by the Apache 2.0 license that can be found in the license/LICENSE.txt file.\n */\n\npackage org.jetbrains.kotlinx.atomicfu.compiler.extensions\n\nimport org.jetbrains.kotlin.backend.common.deepCopyWithVariables\nimport org.jetbrains.kotlin.backend.common.extensions.IrPluginContext\nimport org.jetbrains.kotlin.descriptors.DescriptorVisibilities\nimport org.jetbrains.kotlin.ir.*\nimport org.jetbrains.kotlin.ir.backend.js.ir.JsIrBuilder.buildValueParameter\nimport org.jetbrains.kotlin.ir.expressions.impl.*\nimport org.jetbrains.kotlin.ir.declarations.*\nimport org.jetbrains.kotlin.ir.expressions.*\nimport org.jetbrains.kotlin.ir.symbols.*\nimport org.jetbrains.kotlin.ir.types.*\nimport org.jetbrains.kotlin.ir.util.*\nimport org.jetbrains.kotlin.ir.visitors.IrElementTransformerVoid\nimport org.jetbrains.kotlin.ir.visitors.IrElementVisitorVoid\nimport org.jetbrains.kotlin.ir.visitors.acceptChildrenVoid\nimport org.jetbrains.kotlin.name.*\n\nprivate const val KOTLIN = \"kotlin\"\nprivate const val AFU_PKG = \"kotlinx/atomicfu\"\nprivate const val LOCKS = \"locks\"\nprivate const val ATOMIC_CONSTRUCTOR = \"atomic\"\nprivate const val ATOMICFU_VALUE_TYPE = \"\"\"Atomic(Int|Long|Boolean|Ref)\"\"\"\nprivate const val ATOMIC_ARRAY_TYPE = \"\"\"Atomic(Int|Long|Boolean|)Array\"\"\"\nprivate const val ATOMIC_ARRAY_FACTORY_FUNCTION = \"atomicArrayOfNulls\"\nprivate const val ATOMICFU_RUNTIME_FUNCTION_PREDICATE = \"atomicfu_\"\nprivate const val REENTRANT_LOCK_TYPE = \"ReentrantLock\"\nprivate const val GETTER = \"atomicfu\\$getter\"\nprivate const val SETTER = \"atomicfu\\$setter\"\nprivate const val GET = \"get\"\nprivate const val SET = \"set\"\nprivate const val ATOMICFU_INLINE_FUNCTION = \"\"\"atomicfu_(loop|update|getAndUpdate|updateAndGet)\"\"\"\n\nprivate fun String.prettyStr() = replace('/', '.')\n\nclass AtomicFUTransformer(override val context: IrPluginContext) : IrElementTransformerVoid(), TransformerHelpers {\n\n private val irBuiltIns = context.irBuiltIns\n\n private val AFU_CLASSES: Map = mapOf(\n \"AtomicInt\" to irBuiltIns.intType,\n \"AtomicLong\" to irBuiltIns.longType,\n \"AtomicRef\" to irBuiltIns.anyType,\n \"AtomicBoolean\" to irBuiltIns.booleanType\n )\n\n private val AFU_ARRAY_CLASSES: Map = mapOf(\n \"AtomicIntArray\" to \"IntArray\",\n \"AtomicLongArray\" to \"LongArray\",\n \"AtomicBooleanArray\" to \"BooleanArray\",\n \"AtomicArray\" to \"Array\"\n )\n\n override fun visitFile(declaration: IrFile): IrFile {\n val transformedDeclarations = mutableListOf()\n declaration.declarations.forEach { irDeclaration ->\n irDeclaration.transformInlineAtomicExtension()?.let { transformedDeclarations.add(it) }\n }\n declaration.declarations.addAll(transformedDeclarations)\n return super.visitFile(declaration)\n }\n\n override fun visitClass(declaration: IrClass): IrStatement {\n val transformedDeclarations = mutableListOf()\n declaration.declarations.forEach { irDeclaration ->\n irDeclaration.transformInlineAtomicExtension()?.let { transformedDeclarations.add(it) }\n }\n declaration.declarations.addAll(transformedDeclarations)\n return super.visitClass(declaration)\n }\n\n override fun visitProperty(declaration: IrProperty): IrStatement {\n if (declaration.backingField != null) {\n val backingField = declaration.backingField!!\n if (backingField.initializer != null) {\n val initializer = backingField.initializer!!.expression.eraseAtomicConstructor(backingField)\n declaration.backingField!!.initializer = context.irFactory.createExpressionBody(initializer)\n }\n }\n return super.visitProperty(declaration)\n }\n\n override fun visitFunction(declaration: IrFunction): IrStatement {\n transformDeclarationBody(declaration.body, declaration)\n return super.visitFunction(declaration)\n }\n\n override fun visitAnonymousInitializer(declaration: IrAnonymousInitializer): IrStatement {\n transformDeclarationBody(declaration.body, declaration)\n return super.visitAnonymousInitializer(declaration)\n }\n\n private fun transformDeclarationBody(body: IrBody?, parent: IrDeclaration, lambda: IrFunction? = null) {\n if (body is IrBlockBody) {\n body.statements.forEachIndexed { i, stmt ->\n val transformedStmt = stmt.transformStatement(parent, lambda)\n body.statements[i] = transformedStmt\n }\n }\n }\n\n private fun IrExpression.eraseAtomicConstructor(parentDeclaration: IrDeclaration, lambda: IrFunction? = null) =\n when {\n type.isAtomicValueType() -> getPureTypeValue().transformAtomicFunctionCall(parentDeclaration, lambda)\n type.isAtomicArrayType() -> buildPureTypeArrayConstructor()\n type.isReentrantLockType() -> buildConstNull()\n else -> this\n }\n\n private fun IrDeclaration.transformInlineAtomicExtension(): IrDeclaration? {\n // transform the signature of the inline Atomic* extension declaration\n // inline fun AtomicRef.foo(args) { ... } -> inline fun foo(args, getter: () -> T, setter: (T) -> Unit)\n if (this is IrFunction &&\n isInline &&\n extensionReceiverParameter != null &&\n extensionReceiverParameter!!.type.isAtomicValueType()\n ) {\n val newDeclaration = this.deepCopyWithSymbols(parent)\n val oldValueParameters = valueParameters.map { it.deepCopyWithSymbols(newDeclaration) }\n val valueParametersCount = valueParameters.size\n val receiverValueType = extensionReceiverParameter!!.type.atomicToValueType() // T\n val getterType = buildGetterType(receiverValueType)\n val setterType = buildSetterType(receiverValueType)\n newDeclaration.valueParameters = oldValueParameters + listOf(\n buildValueParameter(newDeclaration, GETTER, valueParametersCount, getterType),\n buildValueParameter(newDeclaration, SETTER, valueParametersCount + 1, setterType)\n )\n newDeclaration.extensionReceiverParameter = null\n return newDeclaration\n }\n return null\n }\n\n private fun IrExpression.getPureTypeValue(): IrExpression {\n require(this is IrCall && isAtomicFactory()) { \"Illegal initializer for the atomic property $this\" }\n return getValueArgument(0)!!\n }\n\n private fun IrExpression.buildPureTypeArrayConstructor() =\n when (this) {\n is IrConstructorCall -> {\n require(isAtomicArrayConstructor()) { \"Unsupported atomic array constructor $this\" }\n val arrayConstructorSymbol = type.getArrayConstructorSymbol { it.owner.valueParameters.size == 1 }\n val size = getValueArgument(0)\n IrConstructorCallImpl(\n UNDEFINED_OFFSET, UNDEFINED_OFFSET,\n irBuiltIns.unitType, arrayConstructorSymbol,\n 0, 0, 1\n ).apply {\n putValueArgument(0, size)\n }\n }\n is IrCall -> {\n require(isAtomicArrayFactory()) { \"Unsupported atomic array factory function $this\" }\n val arrayFactorySymbol = referencePackageFunction(\"kotlin\", \"arrayOfNulls\")\n val arrayElementType = getTypeArgument(0)!!\n val size = getValueArgument(0)\n buildCall(\n target = arrayFactorySymbol,\n type = type,\n typeArguments = listOf(arrayElementType),\n valueArguments = listOf(size)\n )\n }\n else -> error(\"Illegal type of atomic array initializer\")\n }\n\n private fun IrCall.inlineAtomicFunction(atomicType: IrType, accessors: List): IrCall {\n val valueArguments = getValueArguments()\n val functionName = getAtomicFunctionName()\n val runtimeFunction = getRuntimeFunctionSymbol(functionName, atomicType)\n return buildCall(\n target = runtimeFunction,\n type = type,\n origin = IrStatementOrigin.INVOKE,\n typeArguments = if (runtimeFunction.owner.typeParameters.size == 1) listOf(atomicType) else emptyList(),\n valueArguments = valueArguments + accessors\n )\n }\n\n private fun IrStatement.transformStatement(parentDeclaration: IrDeclaration, lambda: IrFunction? = null) =\n when (this) {\n is IrExpression -> transformAtomicFunctionCall(parentDeclaration, lambda)\n is IrVariable -> {\n apply { initializer = initializer?.transformAtomicFunctionCall(parentDeclaration, lambda) }\n }\n else -> this\n }\n\n private fun IrExpression.transformAtomicFunctionCall(parentDeclaration: IrDeclaration, lambda: IrFunction? = null): IrExpression {\n // erase unchecked cast to the Atomic* type\n if (this is IrTypeOperatorCall && operator == IrTypeOperator.CAST && typeOperand.isAtomicValueType()) {\n return argument\n }\n if (isAtomicValueInitializer()) {\n return eraseAtomicConstructor(parentDeclaration, lambda)\n }\n when (this) {\n is IrTypeOperatorCall -> {\n return apply { argument = argument.transformAtomicFunctionCall(parentDeclaration, lambda) }\n }\n is IrStringConcatenationImpl -> {\n return apply {\n arguments.forEachIndexed { i, arg ->\n arguments[i] = arg.transformAtomicFunctionCall(parentDeclaration, lambda)\n }\n }\n }\n is IrReturn -> {\n if (parentDeclaration.isTransformedAtomicExtensionFunction() && (parentDeclaration as IrFunction).isInline &&\n returnTargetSymbol !== parentDeclaration.symbol) {\n return IrReturnImpl(\n startOffset,\n endOffset,\n type,\n parentDeclaration.symbol,\n value.transformAtomicFunctionCall(parentDeclaration, lambda)\n )\n }\n return apply { value = value.transformAtomicFunctionCall(parentDeclaration, lambda) }\n }\n is IrSetValue -> {\n return apply { value = value.transformAtomicFunctionCall(parentDeclaration, lambda) }\n }\n is IrSetField -> {\n return apply { value = value.transformAtomicFunctionCall(parentDeclaration, lambda) }\n }\n is IrIfThenElseImpl -> {\n return apply {\n branches.forEachIndexed { i, branch ->\n branches[i] = branch.apply {\n condition = condition.transformAtomicFunctionCall(parentDeclaration, lambda)\n result = result.transformAtomicFunctionCall(parentDeclaration, lambda)\n }\n }\n }\n }\n is IrWhenImpl -> {\n return apply {\n branches.forEachIndexed { i, branch ->\n branches[i] = branch.apply {\n condition = condition.transformAtomicFunctionCall(parentDeclaration, lambda)\n result = result.transformAtomicFunctionCall(parentDeclaration, lambda)\n }\n }\n }\n }\n is IrTry -> {\n return apply {\n tryResult = tryResult.transformAtomicFunctionCall(parentDeclaration, lambda)\n catches.forEach {\n it.result = it.result.transformAtomicFunctionCall(parentDeclaration, lambda)\n }\n finallyExpression = finallyExpression?.transformAtomicFunctionCall(parentDeclaration, lambda)\n }\n }\n is IrBlock -> {\n return apply {\n statements.forEachIndexed { i, stmt ->\n statements[i] = stmt.transformStatement(parentDeclaration, lambda)\n }\n }\n }\n is IrGetValue -> {\n if (lambda != null && symbol.owner.parent == lambda) return this\n if (symbol is IrValueParameterSymbol && parentDeclaration.isTransformedAtomicExtensionFunction()) {\n // replace use site of the value parameter with it's copy from the transformed declaration\n val index = (symbol.owner as IrValueParameter).index\n if (index >= 0) { // index == -1 for `this` parameter\n val transformedValueParameter = (parentDeclaration as IrFunction).valueParameters[index]\n return buildGetValue(transformedValueParameter.symbol)\n }\n }\n }\n is IrCall -> {\n dispatchReceiver?.let { dispatchReceiver = it.transformAtomicFunctionCall(parentDeclaration, lambda) }\n extensionReceiver?.let { extensionReceiver = it.transformAtomicFunctionCall(parentDeclaration, lambda) }\n getValueArguments().forEachIndexed { i, arg ->\n putValueArgument(i, arg?.transformAtomicFunctionCall(parentDeclaration, lambda))\n }\n val isInline = symbol.owner.isInline\n val receiver = extensionReceiver ?: dispatchReceiver ?: return this\n // Invocation of atomic functions on atomic receivers\n // are substituted with the corresponding inline declarations from `kotlinx-atomicfu-runtime`\n if (symbol.isKotlinxAtomicfuPackage() && receiver.type.isAtomicValueType()) {\n // 1. Receiver is the atomic field:\n // a.incrementAndGet() -> atomicfu_incrementAndGet(get_a {..}, set_a {..})\n if (receiver is IrCall) { // property accessor \n val accessors = receiver.getAccessors(lambda ?: parentDeclaration)\n return inlineAtomicFunction(receiver.type.atomicToValueType(), accessors).apply {\n transformAtomicfuLoopBody(parentDeclaration)\n }\n }\n // 2. Atomic extension receiver `this`, the reciver of the parent function.\n // Parent function is the inline Atomic* extension function already transformed with `transformAtomicInlineDeclaration`\n // inline fun foo(getter: () -> T, setter: (T) -> Unit) { incrementAndGet() } ->\n // inline fun foo(getter: () -> T, setter: (T) -> Unit) { atomicfu_incrementAndGet(getter, setter) }\n if (receiver is IrGetValue && parentDeclaration.isTransformedAtomicExtensionFunction()) {\n val accessorParameters = (parentDeclaration as IrFunction).valueParameters.takeLast(2).map { it.capture() }\n return inlineAtomicFunction(receiver.type.atomicToValueType(), accessorParameters).apply {\n transformAtomicfuLoopBody(parentDeclaration)\n }\n }\n } else {\n // 3. transform invocation of an inline Atomic* extension function call: a.foo()\n if (isInline && receiver is IrCall && receiver.type.isAtomicValueType()) {\n val accessors = receiver.getAccessors(lambda ?: parentDeclaration)\n val dispatch = dispatchReceiver\n val args = getValueArguments()\n val transformedTarget = symbol.owner.getDeclarationWithAccessorParameters()\n return buildCall(\n target = transformedTarget.symbol,\n type = type,\n origin = IrStatementOrigin.INVOKE,\n valueArguments = args + accessors\n ).apply {\n dispatchReceiver = dispatch\n }\n }\n }\n }\n is IrConstructorCall -> {\n getValueArguments().forEachIndexed { i, arg ->\n putValueArgument(i, arg?.transformAtomicFunctionCall(parentDeclaration, lambda))\n }\n }\n }\n return this\n }\n\n private fun IrCall.transformAtomicfuLoopBody(parent: IrDeclaration) {\n if (symbol.owner.name.asString().matches(ATOMICFU_INLINE_FUNCTION.toRegex())) {\n val lambdaLoop = (getValueArgument(0) as IrFunctionExpression).function\n transformDeclarationBody(lambdaLoop.body, parent, lambdaLoop)\n }\n }\n\n private fun IrFunction.hasReceiverAccessorParameters(): Boolean {\n if (valueParameters.size < 2) return false\n val params = valueParameters.takeLast(2)\n return params[0].name.asString() == GETTER && params[1].name.asString() == SETTER\n }\n\n private fun IrDeclaration.isTransformedAtomicExtensionFunction(): Boolean =\n this is IrFunction && this.hasReceiverAccessorParameters()\n\n private fun IrFunction.getDeclarationWithAccessorParameters(): IrSimpleFunction {\n val parent = parent as IrDeclarationContainer\n val params = valueParameters.map { it.type }\n val extensionType = extensionReceiverParameter?.type?.atomicToValueType()\n return try {\n parent.declarations.single {\n it is IrSimpleFunction &&\n it.name == symbol.owner.name &&\n it.valueParameters.size == params.size + 2 &&\n it.valueParameters.dropLast(2).withIndex().all { p -> p.value.type.classifierOrNull?.owner == params[p.index].classifierOrNull?.owner } &&\n it.getGetterReturnType()?.classifierOrNull?.owner == extensionType?.classifierOrNull?.owner\n } as IrSimpleFunction\n } catch (e: RuntimeException) {\n error(\"Exception while looking for the declaration with accessor parameters: ${e.message}\")\n }\n }\n\n private fun IrExpression.isAtomicValueInitializer() =\n (this is IrCall && (this.isAtomicFactory() || this.isAtomicArrayFactory())) ||\n (this is IrConstructorCall && this.isAtomicArrayConstructor()) ||\n type.isReentrantLockType()\n\n private fun IrCall.isArrayElementGetter() =\n dispatchReceiver != null &&\n dispatchReceiver!!.type.isAtomicArrayType() &&\n symbol.owner.name.asString() == GET\n\n private fun IrCall.getBackingField(): IrField {\n val correspondingPropertySymbol = symbol.owner.correspondingPropertySymbol!!\n return correspondingPropertySymbol.owner.backingField!!\n }\n\n private fun IrCall.buildAccessorLambda(\n isSetter: Boolean,\n parentDeclaration: IrDeclaration\n ): IrFunctionExpression {\n val isArrayElement = isArrayElementGetter()\n val getterCall = if (isArrayElement) dispatchReceiver as IrCall else this\n val valueType = type.atomicToValueType()\n val type = if (isSetter) buildSetterType(valueType) else buildGetterType(valueType)\n val name = if (isSetter) setterName(getterCall.symbol.owner.name.getFieldName())\n else getterName(getterCall.symbol.owner.name.getFieldName())\n val returnType = if (isSetter) context.irBuiltIns.unitType else valueType\n val accessorFunction = buildFunction(\n parent = parentDeclaration as IrDeclarationParent,\n origin = IrDeclarationOrigin.DEFAULT_PROPERTY_ACCESSOR,\n name = Name.identifier(name),\n visibility = DescriptorVisibilities.LOCAL,\n isInline = true,\n returnType = returnType\n ).apply {\n val valueParameter = buildValueParameter(this, name, 0, valueType)\n this.valueParameters = if (isSetter) listOf(valueParameter) else emptyList()\n val body = if (isSetter) {\n if (isArrayElement) {\n val setSymbol = referenceFunction(getterCall.type.referenceClass(), SET)\n val elementIndex = getValueArgument(0)!!.deepCopyWithVariables()\n buildCall(\n target = setSymbol,\n type = context.irBuiltIns.unitType,\n origin = IrStatementOrigin.LAMBDA,\n valueArguments = listOf(elementIndex, valueParameter.capture())\n ).apply {\n dispatchReceiver = getterCall\n }\n } else {\n buildSetField(getterCall.getBackingField(), getterCall.dispatchReceiver, valueParameter.capture())\n }\n } else {\n val getField = buildGetField(getterCall.getBackingField(), getterCall.dispatchReceiver)\n if (isArrayElement) {\n val getSymbol = referenceFunction(getterCall.type.referenceClass(), GET)\n val elementIndex = getValueArgument(0)!!.deepCopyWithVariables()\n buildCall(\n target = getSymbol,\n type = valueType,\n origin = IrStatementOrigin.LAMBDA,\n valueArguments = listOf(elementIndex)\n ).apply {\n dispatchReceiver = getField.deepCopyWithVariables()\n }\n } else {\n getField.deepCopyWithVariables()\n }\n }\n this.body = buildBlockBody(listOf(body))\n origin = IrDeclarationOrigin.DEFAULT_PROPERTY_ACCESSOR\n }\n return IrFunctionExpressionImpl(\n UNDEFINED_OFFSET, UNDEFINED_OFFSET,\n type,\n accessorFunction,\n IrStatementOrigin.LAMBDA\n )\n }\n\n private fun buildSetField(backingField: IrField, ownerClass: IrExpression?, value: IrGetValue): IrSetField {\n val receiver = if (ownerClass is IrTypeOperatorCall) ownerClass.argument as IrGetValue else ownerClass\n val fieldSymbol = backingField.symbol\n return buildSetField(\n symbol = fieldSymbol,\n receiver = receiver,\n value = value\n )\n }\n\n private fun buildGetField(backingField: IrField, ownerClass: IrExpression?): IrGetField {\n val receiver = if (ownerClass is IrTypeOperatorCall) ownerClass.argument as IrGetValue else ownerClass\n return buildGetField(\n symbol = backingField.symbol,\n receiver = receiver\n )\n }\n\n private fun IrCall.getAccessors(parentDeclaration: IrDeclaration): List =\n listOf(buildAccessorLambda(isSetter = false, parentDeclaration = parentDeclaration),\n buildAccessorLambda(isSetter = true, parentDeclaration = parentDeclaration))\n\n private fun getRuntimeFunctionSymbol(name: String, type: IrType): IrSimpleFunctionSymbol {\n val functionName = when (name) {\n \"value.\" -> \"getValue\"\n \"value.\" -> \"setValue\"\n else -> name\n }\n return referencePackageFunction(\"kotlinx.atomicfu\", \"$ATOMICFU_RUNTIME_FUNCTION_PREDICATE$functionName\") {\n val typeArg = it.owner.getGetterReturnType()\n !(typeArg as IrType).isPrimitiveType() || typeArg == type\n }\n }\n\n private fun IrFunction.getGetterReturnType() = (valueParameters[valueParameters.lastIndex - 1].type as IrSimpleType).arguments.first().typeOrNull\n\n private fun IrCall.isAtomicFactory(): Boolean {\n val name = symbol.owner.name\n return !name.isSpecial && name.identifier == ATOMIC_CONSTRUCTOR\n }\n\n private fun IrCall.isAtomicArrayFactory(): Boolean {\n val name = symbol.owner.name\n return !name.isSpecial && name.identifier == ATOMIC_ARRAY_FACTORY_FUNCTION\n }\n\n private fun IrConstructorCall.isAtomicArrayConstructor() = (type as IrSimpleType).isAtomicArrayType()\n\n private fun IrSymbol.isKotlinxAtomicfuPackage() =\n this.isPublicApi && signature?.packageFqName()?.asString() == AFU_PKG.prettyStr()\n\n private fun IrType.isAtomicValueType() = belongsTo(ATOMICFU_VALUE_TYPE)\n private fun IrType.isAtomicArrayType() = belongsTo(ATOMIC_ARRAY_TYPE)\n private fun IrType.isReentrantLockType() = belongsTo(\"$AFU_PKG/$LOCKS\", REENTRANT_LOCK_TYPE)\n\n private fun IrType.belongsTo(typeName: String) = belongsTo(AFU_PKG, typeName)\n\n private fun IrType.belongsTo(packageName: String, typeName: String): Boolean {", + "repo_full_name": "JetBrains/kotlin", + "discussion_comments": [ + { + "comment_id": "713622898", + "repo_full_name": "JetBrains/kotlin", + "pr_number": 4581, + "pr_file": "plugins/atomicfu/atomicfu-compiler/src/org.jetbrains.kotlinx.atomicfu.compiler/extensions/AtomicFUTransformer.kt", + "discussion_id": "713622898", + "commented_code": "@@ -0,0 +1,595 @@\n+/*\n+ * Copyright 2010-2020 JetBrains s.r.o. and Kotlin Programming Language contributors.\n+ * Use of this source code is governed by the Apache 2.0 license that can be found in the license/LICENSE.txt file.\n+ */\n+\n+package org.jetbrains.kotlinx.atomicfu.compiler.extensions\n+\n+import org.jetbrains.kotlin.backend.common.deepCopyWithVariables\n+import org.jetbrains.kotlin.backend.common.extensions.IrPluginContext\n+import org.jetbrains.kotlin.descriptors.DescriptorVisibilities\n+import org.jetbrains.kotlin.ir.*\n+import org.jetbrains.kotlin.ir.backend.js.ir.JsIrBuilder.buildValueParameter\n+import org.jetbrains.kotlin.ir.expressions.impl.*\n+import org.jetbrains.kotlin.ir.declarations.*\n+import org.jetbrains.kotlin.ir.expressions.*\n+import org.jetbrains.kotlin.ir.symbols.*\n+import org.jetbrains.kotlin.ir.types.*\n+import org.jetbrains.kotlin.ir.util.*\n+import org.jetbrains.kotlin.ir.visitors.IrElementTransformerVoid\n+import org.jetbrains.kotlin.ir.visitors.IrElementVisitorVoid\n+import org.jetbrains.kotlin.ir.visitors.acceptChildrenVoid\n+import org.jetbrains.kotlin.name.*\n+\n+private const val KOTLIN = \"kotlin\"\n+private const val AFU_PKG = \"kotlinx/atomicfu\"\n+private const val LOCKS = \"locks\"\n+private const val ATOMIC_CONSTRUCTOR = \"atomic\"\n+private const val ATOMICFU_VALUE_TYPE = \"\"\"Atomic(Int|Long|Boolean|Ref)\"\"\"\n+private const val ATOMIC_ARRAY_TYPE = \"\"\"Atomic(Int|Long|Boolean|)Array\"\"\"\n+private const val ATOMIC_ARRAY_FACTORY_FUNCTION = \"atomicArrayOfNulls\"\n+private const val ATOMICFU_RUNTIME_FUNCTION_PREDICATE = \"atomicfu_\"\n+private const val REENTRANT_LOCK_TYPE = \"ReentrantLock\"\n+private const val GETTER = \"atomicfu\\$getter\"\n+private const val SETTER = \"atomicfu\\$setter\"\n+private const val GET = \"get\"\n+private const val SET = \"set\"\n+private const val ATOMICFU_INLINE_FUNCTION = \"\"\"atomicfu_(loop|update|getAndUpdate|updateAndGet)\"\"\"\n+\n+private fun String.prettyStr() = replace('/', '.')\n+\n+class AtomicFUTransformer(override val context: IrPluginContext) : IrElementTransformerVoid(), TransformerHelpers {\n+\n+ private val irBuiltIns = context.irBuiltIns\n+\n+ private val AFU_CLASSES: Map = mapOf(\n+ \"AtomicInt\" to irBuiltIns.intType,\n+ \"AtomicLong\" to irBuiltIns.longType,\n+ \"AtomicRef\" to irBuiltIns.anyType,\n+ \"AtomicBoolean\" to irBuiltIns.booleanType\n+ )\n+\n+ private val AFU_ARRAY_CLASSES: Map = mapOf(\n+ \"AtomicIntArray\" to \"IntArray\",\n+ \"AtomicLongArray\" to \"LongArray\",\n+ \"AtomicBooleanArray\" to \"BooleanArray\",\n+ \"AtomicArray\" to \"Array\"\n+ )\n+\n+ override fun visitFile(declaration: IrFile): IrFile {\n+ val transformedDeclarations = mutableListOf()\n+ declaration.declarations.forEach { irDeclaration ->\n+ irDeclaration.transformInlineAtomicExtension()?.let { transformedDeclarations.add(it) }\n+ }\n+ declaration.declarations.addAll(transformedDeclarations)\n+ return super.visitFile(declaration)\n+ }\n+\n+ override fun visitClass(declaration: IrClass): IrStatement {\n+ val transformedDeclarations = mutableListOf()\n+ declaration.declarations.forEach { irDeclaration ->\n+ irDeclaration.transformInlineAtomicExtension()?.let { transformedDeclarations.add(it) }\n+ }\n+ declaration.declarations.addAll(transformedDeclarations)\n+ return super.visitClass(declaration)\n+ }\n+\n+ override fun visitProperty(declaration: IrProperty): IrStatement {\n+ if (declaration.backingField != null) {\n+ val backingField = declaration.backingField!!\n+ if (backingField.initializer != null) {\n+ val initializer = backingField.initializer!!.expression.eraseAtomicConstructor(backingField)\n+ declaration.backingField!!.initializer = context.irFactory.createExpressionBody(initializer)\n+ }\n+ }\n+ return super.visitProperty(declaration)\n+ }\n+\n+ override fun visitFunction(declaration: IrFunction): IrStatement {\n+ transformDeclarationBody(declaration.body, declaration)\n+ return super.visitFunction(declaration)\n+ }\n+\n+ override fun visitAnonymousInitializer(declaration: IrAnonymousInitializer): IrStatement {\n+ transformDeclarationBody(declaration.body, declaration)\n+ return super.visitAnonymousInitializer(declaration)\n+ }\n+\n+ private fun transformDeclarationBody(body: IrBody?, parent: IrDeclaration, lambda: IrFunction? = null) {\n+ if (body is IrBlockBody) {\n+ body.statements.forEachIndexed { i, stmt ->\n+ val transformedStmt = stmt.transformStatement(parent, lambda)\n+ body.statements[i] = transformedStmt\n+ }\n+ }\n+ }\n+\n+ private fun IrExpression.eraseAtomicConstructor(parentDeclaration: IrDeclaration, lambda: IrFunction? = null) =\n+ when {\n+ type.isAtomicValueType() -> getPureTypeValue().transformAtomicFunctionCall(parentDeclaration, lambda)\n+ type.isAtomicArrayType() -> buildPureTypeArrayConstructor()\n+ type.isReentrantLockType() -> buildConstNull()\n+ else -> this\n+ }\n+\n+ private fun IrDeclaration.transformInlineAtomicExtension(): IrDeclaration? {\n+ // transform the signature of the inline Atomic* extension declaration\n+ // inline fun AtomicRef.foo(args) { ... } -> inline fun foo(args, getter: () -> T, setter: (T) -> Unit)\n+ if (this is IrFunction &&\n+ isInline &&\n+ extensionReceiverParameter != null &&\n+ extensionReceiverParameter!!.type.isAtomicValueType()\n+ ) {\n+ val newDeclaration = this.deepCopyWithSymbols(parent)\n+ val oldValueParameters = valueParameters.map { it.deepCopyWithSymbols(newDeclaration) }\n+ val valueParametersCount = valueParameters.size\n+ val receiverValueType = extensionReceiverParameter!!.type.atomicToValueType() // T\n+ val getterType = buildGetterType(receiverValueType)\n+ val setterType = buildSetterType(receiverValueType)\n+ newDeclaration.valueParameters = oldValueParameters + listOf(\n+ buildValueParameter(newDeclaration, GETTER, valueParametersCount, getterType),\n+ buildValueParameter(newDeclaration, SETTER, valueParametersCount + 1, setterType)\n+ )\n+ newDeclaration.extensionReceiverParameter = null\n+ return newDeclaration\n+ }\n+ return null\n+ }\n+\n+ private fun IrExpression.getPureTypeValue(): IrExpression {\n+ require(this is IrCall && isAtomicFactory()) { \"Illegal initializer for the atomic property $this\" }\n+ return getValueArgument(0)!!\n+ }\n+\n+ private fun IrExpression.buildPureTypeArrayConstructor() =\n+ when (this) {\n+ is IrConstructorCall -> {\n+ require(isAtomicArrayConstructor()) { \"Unsupported atomic array constructor $this\" }\n+ val arrayConstructorSymbol = type.getArrayConstructorSymbol { it.owner.valueParameters.size == 1 }\n+ val size = getValueArgument(0)\n+ IrConstructorCallImpl(\n+ UNDEFINED_OFFSET, UNDEFINED_OFFSET,\n+ irBuiltIns.unitType, arrayConstructorSymbol,\n+ 0, 0, 1\n+ ).apply {\n+ putValueArgument(0, size)\n+ }\n+ }\n+ is IrCall -> {\n+ require(isAtomicArrayFactory()) { \"Unsupported atomic array factory function $this\" }\n+ val arrayFactorySymbol = referencePackageFunction(\"kotlin\", \"arrayOfNulls\")\n+ val arrayElementType = getTypeArgument(0)!!\n+ val size = getValueArgument(0)\n+ buildCall(\n+ target = arrayFactorySymbol,\n+ type = type,\n+ typeArguments = listOf(arrayElementType),\n+ valueArguments = listOf(size)\n+ )\n+ }\n+ else -> error(\"Illegal type of atomic array initializer\")\n+ }\n+\n+ private fun IrCall.inlineAtomicFunction(atomicType: IrType, accessors: List): IrCall {\n+ val valueArguments = getValueArguments()\n+ val functionName = getAtomicFunctionName()\n+ val runtimeFunction = getRuntimeFunctionSymbol(functionName, atomicType)\n+ return buildCall(\n+ target = runtimeFunction,\n+ type = type,\n+ origin = IrStatementOrigin.INVOKE,\n+ typeArguments = if (runtimeFunction.owner.typeParameters.size == 1) listOf(atomicType) else emptyList(),\n+ valueArguments = valueArguments + accessors\n+ )\n+ }\n+\n+ private fun IrStatement.transformStatement(parentDeclaration: IrDeclaration, lambda: IrFunction? = null) =\n+ when (this) {\n+ is IrExpression -> transformAtomicFunctionCall(parentDeclaration, lambda)\n+ is IrVariable -> {\n+ apply { initializer = initializer?.transformAtomicFunctionCall(parentDeclaration, lambda) }\n+ }\n+ else -> this\n+ }\n+\n+ private fun IrExpression.transformAtomicFunctionCall(parentDeclaration: IrDeclaration, lambda: IrFunction? = null): IrExpression {\n+ // erase unchecked cast to the Atomic* type\n+ if (this is IrTypeOperatorCall && operator == IrTypeOperator.CAST && typeOperand.isAtomicValueType()) {\n+ return argument\n+ }\n+ if (isAtomicValueInitializer()) {\n+ return eraseAtomicConstructor(parentDeclaration, lambda)\n+ }\n+ when (this) {\n+ is IrTypeOperatorCall -> {\n+ return apply { argument = argument.transformAtomicFunctionCall(parentDeclaration, lambda) }\n+ }\n+ is IrStringConcatenationImpl -> {\n+ return apply {\n+ arguments.forEachIndexed { i, arg ->\n+ arguments[i] = arg.transformAtomicFunctionCall(parentDeclaration, lambda)\n+ }\n+ }\n+ }\n+ is IrReturn -> {\n+ if (parentDeclaration.isTransformedAtomicExtensionFunction() && (parentDeclaration as IrFunction).isInline &&\n+ returnTargetSymbol !== parentDeclaration.symbol) {\n+ return IrReturnImpl(\n+ startOffset,\n+ endOffset,\n+ type,\n+ parentDeclaration.symbol,\n+ value.transformAtomicFunctionCall(parentDeclaration, lambda)\n+ )\n+ }\n+ return apply { value = value.transformAtomicFunctionCall(parentDeclaration, lambda) }\n+ }\n+ is IrSetValue -> {\n+ return apply { value = value.transformAtomicFunctionCall(parentDeclaration, lambda) }\n+ }\n+ is IrSetField -> {\n+ return apply { value = value.transformAtomicFunctionCall(parentDeclaration, lambda) }\n+ }\n+ is IrIfThenElseImpl -> {\n+ return apply {\n+ branches.forEachIndexed { i, branch ->\n+ branches[i] = branch.apply {\n+ condition = condition.transformAtomicFunctionCall(parentDeclaration, lambda)\n+ result = result.transformAtomicFunctionCall(parentDeclaration, lambda)\n+ }\n+ }\n+ }\n+ }\n+ is IrWhenImpl -> {\n+ return apply {\n+ branches.forEachIndexed { i, branch ->\n+ branches[i] = branch.apply {\n+ condition = condition.transformAtomicFunctionCall(parentDeclaration, lambda)\n+ result = result.transformAtomicFunctionCall(parentDeclaration, lambda)\n+ }\n+ }\n+ }\n+ }\n+ is IrTry -> {\n+ return apply {\n+ tryResult = tryResult.transformAtomicFunctionCall(parentDeclaration, lambda)\n+ catches.forEach {\n+ it.result = it.result.transformAtomicFunctionCall(parentDeclaration, lambda)\n+ }\n+ finallyExpression = finallyExpression?.transformAtomicFunctionCall(parentDeclaration, lambda)\n+ }\n+ }\n+ is IrBlock -> {\n+ return apply {\n+ statements.forEachIndexed { i, stmt ->\n+ statements[i] = stmt.transformStatement(parentDeclaration, lambda)\n+ }\n+ }\n+ }\n+ is IrGetValue -> {\n+ if (lambda != null && symbol.owner.parent == lambda) return this\n+ if (symbol is IrValueParameterSymbol && parentDeclaration.isTransformedAtomicExtensionFunction()) {\n+ // replace use site of the value parameter with it's copy from the transformed declaration\n+ val index = (symbol.owner as IrValueParameter).index\n+ if (index >= 0) { // index == -1 for `this` parameter\n+ val transformedValueParameter = (parentDeclaration as IrFunction).valueParameters[index]\n+ return buildGetValue(transformedValueParameter.symbol)\n+ }\n+ }\n+ }\n+ is IrCall -> {\n+ dispatchReceiver?.let { dispatchReceiver = it.transformAtomicFunctionCall(parentDeclaration, lambda) }\n+ extensionReceiver?.let { extensionReceiver = it.transformAtomicFunctionCall(parentDeclaration, lambda) }\n+ getValueArguments().forEachIndexed { i, arg ->\n+ putValueArgument(i, arg?.transformAtomicFunctionCall(parentDeclaration, lambda))\n+ }\n+ val isInline = symbol.owner.isInline\n+ val receiver = extensionReceiver ?: dispatchReceiver ?: return this\n+ // Invocation of atomic functions on atomic receivers\n+ // are substituted with the corresponding inline declarations from `kotlinx-atomicfu-runtime`\n+ if (symbol.isKotlinxAtomicfuPackage() && receiver.type.isAtomicValueType()) {\n+ // 1. Receiver is the atomic field:\n+ // a.incrementAndGet() -> atomicfu_incrementAndGet(get_a {..}, set_a {..})\n+ if (receiver is IrCall) { // property accessor \n+ val accessors = receiver.getAccessors(lambda ?: parentDeclaration)\n+ return inlineAtomicFunction(receiver.type.atomicToValueType(), accessors).apply {\n+ transformAtomicfuLoopBody(parentDeclaration)\n+ }\n+ }\n+ // 2. Atomic extension receiver `this`, the reciver of the parent function.\n+ // Parent function is the inline Atomic* extension function already transformed with `transformAtomicInlineDeclaration`\n+ // inline fun foo(getter: () -> T, setter: (T) -> Unit) { incrementAndGet() } ->\n+ // inline fun foo(getter: () -> T, setter: (T) -> Unit) { atomicfu_incrementAndGet(getter, setter) }\n+ if (receiver is IrGetValue && parentDeclaration.isTransformedAtomicExtensionFunction()) {\n+ val accessorParameters = (parentDeclaration as IrFunction).valueParameters.takeLast(2).map { it.capture() }\n+ return inlineAtomicFunction(receiver.type.atomicToValueType(), accessorParameters).apply {\n+ transformAtomicfuLoopBody(parentDeclaration)\n+ }\n+ }\n+ } else {\n+ // 3. transform invocation of an inline Atomic* extension function call: a.foo()\n+ if (isInline && receiver is IrCall && receiver.type.isAtomicValueType()) {\n+ val accessors = receiver.getAccessors(lambda ?: parentDeclaration)\n+ val dispatch = dispatchReceiver\n+ val args = getValueArguments()\n+ val transformedTarget = symbol.owner.getDeclarationWithAccessorParameters()\n+ return buildCall(\n+ target = transformedTarget.symbol,\n+ type = type,\n+ origin = IrStatementOrigin.INVOKE,\n+ valueArguments = args + accessors\n+ ).apply {\n+ dispatchReceiver = dispatch\n+ }\n+ }\n+ }\n+ }\n+ is IrConstructorCall -> {\n+ getValueArguments().forEachIndexed { i, arg ->\n+ putValueArgument(i, arg?.transformAtomicFunctionCall(parentDeclaration, lambda))\n+ }\n+ }\n+ }\n+ return this\n+ }\n+\n+ private fun IrCall.transformAtomicfuLoopBody(parent: IrDeclaration) {\n+ if (symbol.owner.name.asString().matches(ATOMICFU_INLINE_FUNCTION.toRegex())) {\n+ val lambdaLoop = (getValueArgument(0) as IrFunctionExpression).function\n+ transformDeclarationBody(lambdaLoop.body, parent, lambdaLoop)\n+ }\n+ }\n+\n+ private fun IrFunction.hasReceiverAccessorParameters(): Boolean {\n+ if (valueParameters.size < 2) return false\n+ val params = valueParameters.takeLast(2)\n+ return params[0].name.asString() == GETTER && params[1].name.asString() == SETTER\n+ }\n+\n+ private fun IrDeclaration.isTransformedAtomicExtensionFunction(): Boolean =\n+ this is IrFunction && this.hasReceiverAccessorParameters()\n+\n+ private fun IrFunction.getDeclarationWithAccessorParameters(): IrSimpleFunction {\n+ val parent = parent as IrDeclarationContainer\n+ val params = valueParameters.map { it.type }\n+ val extensionType = extensionReceiverParameter?.type?.atomicToValueType()\n+ return try {\n+ parent.declarations.single {\n+ it is IrSimpleFunction &&\n+ it.name == symbol.owner.name &&\n+ it.valueParameters.size == params.size + 2 &&\n+ it.valueParameters.dropLast(2).withIndex().all { p -> p.value.type.classifierOrNull?.owner == params[p.index].classifierOrNull?.owner } &&\n+ it.getGetterReturnType()?.classifierOrNull?.owner == extensionType?.classifierOrNull?.owner\n+ } as IrSimpleFunction\n+ } catch (e: RuntimeException) {\n+ error(\"Exception while looking for the declaration with accessor parameters: ${e.message}\")\n+ }\n+ }\n+\n+ private fun IrExpression.isAtomicValueInitializer() =\n+ (this is IrCall && (this.isAtomicFactory() || this.isAtomicArrayFactory())) ||\n+ (this is IrConstructorCall && this.isAtomicArrayConstructor()) ||\n+ type.isReentrantLockType()\n+\n+ private fun IrCall.isArrayElementGetter() =\n+ dispatchReceiver != null &&\n+ dispatchReceiver!!.type.isAtomicArrayType() &&\n+ symbol.owner.name.asString() == GET\n+\n+ private fun IrCall.getBackingField(): IrField {\n+ val correspondingPropertySymbol = symbol.owner.correspondingPropertySymbol!!\n+ return correspondingPropertySymbol.owner.backingField!!\n+ }\n+\n+ private fun IrCall.buildAccessorLambda(\n+ isSetter: Boolean,\n+ parentDeclaration: IrDeclaration\n+ ): IrFunctionExpression {\n+ val isArrayElement = isArrayElementGetter()\n+ val getterCall = if (isArrayElement) dispatchReceiver as IrCall else this\n+ val valueType = type.atomicToValueType()\n+ val type = if (isSetter) buildSetterType(valueType) else buildGetterType(valueType)\n+ val name = if (isSetter) setterName(getterCall.symbol.owner.name.getFieldName())\n+ else getterName(getterCall.symbol.owner.name.getFieldName())\n+ val returnType = if (isSetter) context.irBuiltIns.unitType else valueType\n+ val accessorFunction = buildFunction(\n+ parent = parentDeclaration as IrDeclarationParent,\n+ origin = IrDeclarationOrigin.DEFAULT_PROPERTY_ACCESSOR,\n+ name = Name.identifier(name),\n+ visibility = DescriptorVisibilities.LOCAL,\n+ isInline = true,\n+ returnType = returnType\n+ ).apply {\n+ val valueParameter = buildValueParameter(this, name, 0, valueType)\n+ this.valueParameters = if (isSetter) listOf(valueParameter) else emptyList()\n+ val body = if (isSetter) {\n+ if (isArrayElement) {\n+ val setSymbol = referenceFunction(getterCall.type.referenceClass(), SET)\n+ val elementIndex = getValueArgument(0)!!.deepCopyWithVariables()\n+ buildCall(\n+ target = setSymbol,\n+ type = context.irBuiltIns.unitType,\n+ origin = IrStatementOrigin.LAMBDA,\n+ valueArguments = listOf(elementIndex, valueParameter.capture())\n+ ).apply {\n+ dispatchReceiver = getterCall\n+ }\n+ } else {\n+ buildSetField(getterCall.getBackingField(), getterCall.dispatchReceiver, valueParameter.capture())\n+ }\n+ } else {\n+ val getField = buildGetField(getterCall.getBackingField(), getterCall.dispatchReceiver)\n+ if (isArrayElement) {\n+ val getSymbol = referenceFunction(getterCall.type.referenceClass(), GET)\n+ val elementIndex = getValueArgument(0)!!.deepCopyWithVariables()\n+ buildCall(\n+ target = getSymbol,\n+ type = valueType,\n+ origin = IrStatementOrigin.LAMBDA,\n+ valueArguments = listOf(elementIndex)\n+ ).apply {\n+ dispatchReceiver = getField.deepCopyWithVariables()\n+ }\n+ } else {\n+ getField.deepCopyWithVariables()\n+ }\n+ }\n+ this.body = buildBlockBody(listOf(body))\n+ origin = IrDeclarationOrigin.DEFAULT_PROPERTY_ACCESSOR\n+ }\n+ return IrFunctionExpressionImpl(\n+ UNDEFINED_OFFSET, UNDEFINED_OFFSET,\n+ type,\n+ accessorFunction,\n+ IrStatementOrigin.LAMBDA\n+ )\n+ }\n+\n+ private fun buildSetField(backingField: IrField, ownerClass: IrExpression?, value: IrGetValue): IrSetField {\n+ val receiver = if (ownerClass is IrTypeOperatorCall) ownerClass.argument as IrGetValue else ownerClass\n+ val fieldSymbol = backingField.symbol\n+ return buildSetField(\n+ symbol = fieldSymbol,\n+ receiver = receiver,\n+ value = value\n+ )\n+ }\n+\n+ private fun buildGetField(backingField: IrField, ownerClass: IrExpression?): IrGetField {\n+ val receiver = if (ownerClass is IrTypeOperatorCall) ownerClass.argument as IrGetValue else ownerClass\n+ return buildGetField(\n+ symbol = backingField.symbol,\n+ receiver = receiver\n+ )\n+ }\n+\n+ private fun IrCall.getAccessors(parentDeclaration: IrDeclaration): List =\n+ listOf(buildAccessorLambda(isSetter = false, parentDeclaration = parentDeclaration),\n+ buildAccessorLambda(isSetter = true, parentDeclaration = parentDeclaration))\n+\n+ private fun getRuntimeFunctionSymbol(name: String, type: IrType): IrSimpleFunctionSymbol {\n+ val functionName = when (name) {\n+ \"value.\" -> \"getValue\"\n+ \"value.\" -> \"setValue\"\n+ else -> name\n+ }\n+ return referencePackageFunction(\"kotlinx.atomicfu\", \"$ATOMICFU_RUNTIME_FUNCTION_PREDICATE$functionName\") {\n+ val typeArg = it.owner.getGetterReturnType()\n+ !(typeArg as IrType).isPrimitiveType() || typeArg == type\n+ }\n+ }\n+\n+ private fun IrFunction.getGetterReturnType() = (valueParameters[valueParameters.lastIndex - 1].type as IrSimpleType).arguments.first().typeOrNull\n+\n+ private fun IrCall.isAtomicFactory(): Boolean {\n+ val name = symbol.owner.name\n+ return !name.isSpecial && name.identifier == ATOMIC_CONSTRUCTOR\n+ }\n+\n+ private fun IrCall.isAtomicArrayFactory(): Boolean {\n+ val name = symbol.owner.name\n+ return !name.isSpecial && name.identifier == ATOMIC_ARRAY_FACTORY_FUNCTION\n+ }\n+\n+ private fun IrConstructorCall.isAtomicArrayConstructor() = (type as IrSimpleType).isAtomicArrayType()\n+\n+ private fun IrSymbol.isKotlinxAtomicfuPackage() =\n+ this.isPublicApi && signature?.packageFqName()?.asString() == AFU_PKG.prettyStr()\n+\n+ private fun IrType.isAtomicValueType() = belongsTo(ATOMICFU_VALUE_TYPE)\n+ private fun IrType.isAtomicArrayType() = belongsTo(ATOMIC_ARRAY_TYPE)\n+ private fun IrType.isReentrantLockType() = belongsTo(\"$AFU_PKG/$LOCKS\", REENTRANT_LOCK_TYPE)\n+\n+ private fun IrType.belongsTo(typeName: String) = belongsTo(AFU_PKG, typeName)\n+\n+ private fun IrType.belongsTo(packageName: String, typeName: String): Boolean {", + "comment_created_at": "2021-09-22T06:18:07+00:00", + "comment_author": "romanart", + "comment_body": "```\r\n return classOrNull?.let { \r\n it.signature?.asPublic()?.let { sig -> \r\n sig.packageFqName == packageName && sig.declarationFqName.matches(typeNameReg)\r\n }\r\n } ?: false\r\n```", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "630924715", + "pr_number": 4363, + "pr_file": "libraries/scripting/jvm/src/kotlin/script/experimental/jvm/impl/KJvmCompiledScript.kt", + "created_at": "2021-05-12T10:37:02+00:00", + "commented_code": "val processedClasspathElements = mutableSetOf()\n fun recursiveClassPath(res: Sequence, classLoader: ClassLoader?): Sequence =\n when (classLoader) {\n null -> res\n null, baseClassLoader -> res", + "repo_full_name": "JetBrains/kotlin", + "discussion_comments": [ + { + "comment_id": "630924715", + "repo_full_name": "JetBrains/kotlin", + "pr_number": 4363, + "pr_file": "libraries/scripting/jvm/src/kotlin/script/experimental/jvm/impl/KJvmCompiledScript.kt", + "discussion_id": "630924715", + "commented_code": "@@ -139,17 +140,17 @@ private fun CompiledScript.makeClassLoaderFromDependencies(baseClassLoader: Clas\n val processedClasspathElements = mutableSetOf()\n fun recursiveClassPath(res: Sequence, classLoader: ClassLoader?): Sequence =\n when (classLoader) {\n- null -> res\n+ null, baseClassLoader -> res", + "comment_created_at": "2021-05-12T10:37:02+00:00", + "comment_author": "ligee", + "comment_body": "I would still process the `null` case, e.g. generating an error if we've got null.", + "pr_file_module": null + }, + { + "comment_id": "630928510", + "repo_full_name": "JetBrains/kotlin", + "pr_number": 4363, + "pr_file": "libraries/scripting/jvm/src/kotlin/script/experimental/jvm/impl/KJvmCompiledScript.kt", + "discussion_id": "630924715", + "commented_code": "@@ -139,17 +140,17 @@ private fun CompiledScript.makeClassLoaderFromDependencies(baseClassLoader: Clas\n val processedClasspathElements = mutableSetOf()\n fun recursiveClassPath(res: Sequence, classLoader: ClassLoader?): Sequence =\n when (classLoader) {\n- null -> res\n+ null, baseClassLoader -> res", + "comment_created_at": "2021-05-12T10:43:07+00:00", + "comment_author": "ligee", + "comment_body": "But it should be after `baseClassLoader` check, since it could be `null`, and it is a valid situation.", + "pr_file_module": null + }, + { + "comment_id": "630949276", + "repo_full_name": "JetBrains/kotlin", + "pr_number": 4363, + "pr_file": "libraries/scripting/jvm/src/kotlin/script/experimental/jvm/impl/KJvmCompiledScript.kt", + "discussion_id": "630924715", + "commented_code": "@@ -139,17 +140,17 @@ private fun CompiledScript.makeClassLoaderFromDependencies(baseClassLoader: Clas\n val processedClasspathElements = mutableSetOf()\n fun recursiveClassPath(res: Sequence, classLoader: ClassLoader?): Sequence =\n when (classLoader) {\n- null -> res\n+ null, baseClassLoader -> res", + "comment_created_at": "2021-05-12T11:17:27+00:00", + "comment_author": "ileasile", + "comment_body": "@ligee I'm not sure that fallbackClassloader of DualClassLoader should necessarily have base classloader as a parent. Is it really so?", + "pr_file_module": null + }, + { + "comment_id": "630952983", + "repo_full_name": "JetBrains/kotlin", + "pr_number": 4363, + "pr_file": "libraries/scripting/jvm/src/kotlin/script/experimental/jvm/impl/KJvmCompiledScript.kt", + "discussion_id": "630924715", + "commented_code": "@@ -139,17 +140,17 @@ private fun CompiledScript.makeClassLoaderFromDependencies(baseClassLoader: Clas\n val processedClasspathElements = mutableSetOf()\n fun recursiveClassPath(res: Sequence, classLoader: ClassLoader?): Sequence =\n when (classLoader) {\n- null -> res\n+ null, baseClassLoader -> res", + "comment_created_at": "2021-05-12T11:23:39+00:00", + "comment_author": "ileasile", + "comment_body": "As I can see from usages, it is always a thread context classloader that is a base classloader at the same time, so there is no appropriate test", + "pr_file_module": null + }, + { + "comment_id": "630967327", + "repo_full_name": "JetBrains/kotlin", + "pr_number": 4363, + "pr_file": "libraries/scripting/jvm/src/kotlin/script/experimental/jvm/impl/KJvmCompiledScript.kt", + "discussion_id": "630924715", + "commented_code": "@@ -139,17 +140,17 @@ private fun CompiledScript.makeClassLoaderFromDependencies(baseClassLoader: Clas\n val processedClasspathElements = mutableSetOf()\n fun recursiveClassPath(res: Sequence, classLoader: ClassLoader?): Sequence =\n when (classLoader) {\n- null -> res\n+ null, baseClassLoader -> res", + "comment_created_at": "2021-05-12T11:47:21+00:00", + "comment_author": "ligee", + "comment_body": "I'd say it should convert to the baseClassLoader, but since it is possible to replace it, it is safer to leave the code as you wrote it.\r\nAbout the tests - there are some tests with baseClassLoader set to null, and that's probably it.", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/kotlin-use-defensive-null-checks.md b/_reviewers/kotlin-use-defensive-null-checks.md index a630cfe..fe742d2 100644 --- a/_reviewers/kotlin-use-defensive-null-checks.md +++ b/_reviewers/kotlin-use-defensive-null-checks.md @@ -45,179 +45,3 @@ This approach is especially important when: - Creating code that others will depend on Remember that source locations, error reporting systems, and other infrastructure often assume non-null values. Adding defensive checks prevents crashes even when the underlying problem may be elsewhere. - - -[ - { - "discussion_id": "1717528372", - "pr_number": 5311, - "pr_file": "compiler/fir/raw-fir/light-tree2fir/src/org/jetbrains/kotlin/fir/lightTree/converter/LightTreeRawFirExpressionBuilder.kt", - "created_at": "2024-08-14T20:43:41+00:00", - "commented_code": "}\n }\n\n val result = firExpression ?: buildErrorExpression(null, ConeNotAnnotationContainer(\"???\"))\n val result = firExpression ?: buildErrorExpression(annotatedExpression.toFirSourceElement(), ConeNotAnnotationContainer(\"???\"))", - "repo_full_name": "JetBrains/kotlin", - "discussion_comments": [ - { - "comment_id": "1717528372", - "repo_full_name": "JetBrains/kotlin", - "pr_number": 5311, - "pr_file": "compiler/fir/raw-fir/light-tree2fir/src/org/jetbrains/kotlin/fir/lightTree/converter/LightTreeRawFirExpressionBuilder.kt", - "discussion_id": "1717528372", - "commented_code": "@@ -498,7 +498,7 @@ class LightTreeRawFirExpressionBuilder(\n }\n }\n \n- val result = firExpression ?: buildErrorExpression(null, ConeNotAnnotationContainer(\"???\"))\n+ val result = firExpression ?: buildErrorExpression(annotatedExpression.toFirSourceElement(), ConeNotAnnotationContainer(\"???\"))", - "comment_created_at": "2024-08-14T20:43:41+00:00", - "comment_author": "isuckatcs", - "comment_body": "This must have a location, otherwise we get a crash in `FirContractChecker`.\r\n\r\n```kotlin\r\nobject FirContractChecker {\r\n ...\r\n private fun checkUnresolvedEffects(\r\n ...\r\n ) {\r\n ...\r\n reporter.reportOn(unresolvedEffect.source, ...)\r\n // ^ source is asserted to be non-null inside reportOn()\r\n ...\r\n }\r\n ...\r\n}\r\n```", - "pr_file_module": null - }, - { - "comment_id": "1721465179", - "repo_full_name": "JetBrains/kotlin", - "pr_number": 5311, - "pr_file": "compiler/fir/raw-fir/light-tree2fir/src/org/jetbrains/kotlin/fir/lightTree/converter/LightTreeRawFirExpressionBuilder.kt", - "discussion_id": "1717528372", - "commented_code": "@@ -498,7 +498,7 @@ class LightTreeRawFirExpressionBuilder(\n }\n }\n \n- val result = firExpression ?: buildErrorExpression(null, ConeNotAnnotationContainer(\"???\"))\n+ val result = firExpression ?: buildErrorExpression(annotatedExpression.toFirSourceElement(), ConeNotAnnotationContainer(\"???\"))", - "comment_created_at": "2024-08-19T09:13:38+00:00", - "comment_author": "demiurg906", - "comment_body": "Yep, this is a correct fix. Could you please check that the corresponding place in `PsiRawFirBuilder` also correctly sets the source? ", - "pr_file_module": null - }, - { - "comment_id": "1721651171", - "repo_full_name": "JetBrains/kotlin", - "pr_number": 5311, - "pr_file": "compiler/fir/raw-fir/light-tree2fir/src/org/jetbrains/kotlin/fir/lightTree/converter/LightTreeRawFirExpressionBuilder.kt", - "discussion_id": "1717528372", - "commented_code": "@@ -498,7 +498,7 @@ class LightTreeRawFirExpressionBuilder(\n }\n }\n \n- val result = firExpression ?: buildErrorExpression(null, ConeNotAnnotationContainer(\"???\"))\n+ val result = firExpression ?: buildErrorExpression(annotatedExpression.toFirSourceElement(), ConeNotAnnotationContainer(\"???\"))", - "comment_created_at": "2024-08-19T11:34:50+00:00", - "comment_author": "isuckatcs", - "comment_body": "Yes, it sets it. In fact I checked that first and that was my leading clue that this one should be set too.\r\n\r\n```kotlin\r\nval result = rawResult as? FirAnnotationContainer\r\n ?: buildErrorExpression(\r\n expression.toFirSourceElement(),\r\n ConeNotAnnotationContainer(rawResult?.render() ?: \"???\")\r\n )\r\n```", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1327002233", - "pr_number": 5184, - "pr_file": "plugins/parcelize/parcelize-compiler/parcelize.backend/src/org/jetbrains/kotlin/parcelize/AndroidSymbols.kt", - "created_at": "2023-09-15T08:50:43+00:00", - "commented_code": "isStatic = true\n }.symbol\n\n private val kotlinxCollectionsImmutable = FqName(kotlinxImmutable())\n private val kotlinCollections = FqName(\"kotlin.collections\")\n private val kotlinIterable: FqName = kotlinCollections.child(Name.identifier(\"Iterable\"))\n private val kotlinMap: FqName = kotlinCollections.child(Name.identifier(\"Map\"))\n\n private fun findKotlinxImmutableCollectionExtensionFunction(\n receiver: FqName,\n functionName: String,\n ): IrSimpleFunctionSymbol {\n val functionSymbol =\n pluginContext\n .referenceFunctions(CallableId(kotlinxCollectionsImmutable, Name.identifier(functionName)))\n .firstOrNull { it.owner.extensionReceiverParameter!!.type.classFqName == receiver }", - "repo_full_name": "JetBrains/kotlin", - "discussion_comments": [ - { - "comment_id": "1327002233", - "repo_full_name": "JetBrains/kotlin", - "pr_number": 5184, - "pr_file": "plugins/parcelize/parcelize-compiler/parcelize.backend/src/org/jetbrains/kotlin/parcelize/AndroidSymbols.kt", - "discussion_id": "1327002233", - "commented_code": "@@ -499,6 +502,39 @@ class AndroidSymbols(\n isStatic = true\n }.symbol\n \n+ private val kotlinxCollectionsImmutable = FqName(kotlinxImmutable())\n+ private val kotlinCollections = FqName(\"kotlin.collections\")\n+ private val kotlinIterable: FqName = kotlinCollections.child(Name.identifier(\"Iterable\"))\n+ private val kotlinMap: FqName = kotlinCollections.child(Name.identifier(\"Map\"))\n+\n+ private fun findKotlinxImmutableCollectionExtensionFunction(\n+ receiver: FqName,\n+ functionName: String,\n+ ): IrSimpleFunctionSymbol {\n+ val functionSymbol =\n+ pluginContext\n+ .referenceFunctions(CallableId(kotlinxCollectionsImmutable, Name.identifier(functionName)))\n+ .firstOrNull { it.owner.extensionReceiverParameter!!.type.classFqName == receiver }", - "comment_created_at": "2023-09-15T08:50:43+00:00", - "comment_author": "madsager", - "comment_body": "Just to be safe, maybe it would be better to use null safe operations:\r\n\r\n`it.owner.extensionReceiverParameter?.type?.classFqName == receiver`\r\n\r\nThat way, if users put their own code with these package and function names (but where the function is not an extension function) on the classpath they will get the error message below instead of a null pointer exception.", - "pr_file_module": null - }, - { - "comment_id": "1327143301", - "repo_full_name": "JetBrains/kotlin", - "pr_number": 5184, - "pr_file": "plugins/parcelize/parcelize-compiler/parcelize.backend/src/org/jetbrains/kotlin/parcelize/AndroidSymbols.kt", - "discussion_id": "1327002233", - "commented_code": "@@ -499,6 +502,39 @@ class AndroidSymbols(\n isStatic = true\n }.symbol\n \n+ private val kotlinxCollectionsImmutable = FqName(kotlinxImmutable())\n+ private val kotlinCollections = FqName(\"kotlin.collections\")\n+ private val kotlinIterable: FqName = kotlinCollections.child(Name.identifier(\"Iterable\"))\n+ private val kotlinMap: FqName = kotlinCollections.child(Name.identifier(\"Map\"))\n+\n+ private fun findKotlinxImmutableCollectionExtensionFunction(\n+ receiver: FqName,\n+ functionName: String,\n+ ): IrSimpleFunctionSymbol {\n+ val functionSymbol =\n+ pluginContext\n+ .referenceFunctions(CallableId(kotlinxCollectionsImmutable, Name.identifier(functionName)))\n+ .firstOrNull { it.owner.extensionReceiverParameter!!.type.classFqName == receiver }", - "comment_created_at": "2023-09-15T11:08:09+00:00", - "comment_author": "IlyaGulya", - "comment_body": "Yeah, sure, I've just forgot about this one, thanks!", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "713622898", - "pr_number": 4581, - "pr_file": "plugins/atomicfu/atomicfu-compiler/src/org.jetbrains.kotlinx.atomicfu.compiler/extensions/AtomicFUTransformer.kt", - "created_at": "2021-09-22T06:18:07+00:00", - "commented_code": "/*\n * Copyright 2010-2020 JetBrains s.r.o. and Kotlin Programming Language contributors.\n * Use of this source code is governed by the Apache 2.0 license that can be found in the license/LICENSE.txt file.\n */\n\npackage org.jetbrains.kotlinx.atomicfu.compiler.extensions\n\nimport org.jetbrains.kotlin.backend.common.deepCopyWithVariables\nimport org.jetbrains.kotlin.backend.common.extensions.IrPluginContext\nimport org.jetbrains.kotlin.descriptors.DescriptorVisibilities\nimport org.jetbrains.kotlin.ir.*\nimport org.jetbrains.kotlin.ir.backend.js.ir.JsIrBuilder.buildValueParameter\nimport org.jetbrains.kotlin.ir.expressions.impl.*\nimport org.jetbrains.kotlin.ir.declarations.*\nimport org.jetbrains.kotlin.ir.expressions.*\nimport org.jetbrains.kotlin.ir.symbols.*\nimport org.jetbrains.kotlin.ir.types.*\nimport org.jetbrains.kotlin.ir.util.*\nimport org.jetbrains.kotlin.ir.visitors.IrElementTransformerVoid\nimport org.jetbrains.kotlin.ir.visitors.IrElementVisitorVoid\nimport org.jetbrains.kotlin.ir.visitors.acceptChildrenVoid\nimport org.jetbrains.kotlin.name.*\n\nprivate const val KOTLIN = \"kotlin\"\nprivate const val AFU_PKG = \"kotlinx/atomicfu\"\nprivate const val LOCKS = \"locks\"\nprivate const val ATOMIC_CONSTRUCTOR = \"atomic\"\nprivate const val ATOMICFU_VALUE_TYPE = \"\"\"Atomic(Int|Long|Boolean|Ref)\"\"\"\nprivate const val ATOMIC_ARRAY_TYPE = \"\"\"Atomic(Int|Long|Boolean|)Array\"\"\"\nprivate const val ATOMIC_ARRAY_FACTORY_FUNCTION = \"atomicArrayOfNulls\"\nprivate const val ATOMICFU_RUNTIME_FUNCTION_PREDICATE = \"atomicfu_\"\nprivate const val REENTRANT_LOCK_TYPE = \"ReentrantLock\"\nprivate const val GETTER = \"atomicfu\\$getter\"\nprivate const val SETTER = \"atomicfu\\$setter\"\nprivate const val GET = \"get\"\nprivate const val SET = \"set\"\nprivate const val ATOMICFU_INLINE_FUNCTION = \"\"\"atomicfu_(loop|update|getAndUpdate|updateAndGet)\"\"\"\n\nprivate fun String.prettyStr() = replace('/', '.')\n\nclass AtomicFUTransformer(override val context: IrPluginContext) : IrElementTransformerVoid(), TransformerHelpers {\n\n private val irBuiltIns = context.irBuiltIns\n\n private val AFU_CLASSES: Map = mapOf(\n \"AtomicInt\" to irBuiltIns.intType,\n \"AtomicLong\" to irBuiltIns.longType,\n \"AtomicRef\" to irBuiltIns.anyType,\n \"AtomicBoolean\" to irBuiltIns.booleanType\n )\n\n private val AFU_ARRAY_CLASSES: Map = mapOf(\n \"AtomicIntArray\" to \"IntArray\",\n \"AtomicLongArray\" to \"LongArray\",\n \"AtomicBooleanArray\" to \"BooleanArray\",\n \"AtomicArray\" to \"Array\"\n )\n\n override fun visitFile(declaration: IrFile): IrFile {\n val transformedDeclarations = mutableListOf()\n declaration.declarations.forEach { irDeclaration ->\n irDeclaration.transformInlineAtomicExtension()?.let { transformedDeclarations.add(it) }\n }\n declaration.declarations.addAll(transformedDeclarations)\n return super.visitFile(declaration)\n }\n\n override fun visitClass(declaration: IrClass): IrStatement {\n val transformedDeclarations = mutableListOf()\n declaration.declarations.forEach { irDeclaration ->\n irDeclaration.transformInlineAtomicExtension()?.let { transformedDeclarations.add(it) }\n }\n declaration.declarations.addAll(transformedDeclarations)\n return super.visitClass(declaration)\n }\n\n override fun visitProperty(declaration: IrProperty): IrStatement {\n if (declaration.backingField != null) {\n val backingField = declaration.backingField!!\n if (backingField.initializer != null) {\n val initializer = backingField.initializer!!.expression.eraseAtomicConstructor(backingField)\n declaration.backingField!!.initializer = context.irFactory.createExpressionBody(initializer)\n }\n }\n return super.visitProperty(declaration)\n }\n\n override fun visitFunction(declaration: IrFunction): IrStatement {\n transformDeclarationBody(declaration.body, declaration)\n return super.visitFunction(declaration)\n }\n\n override fun visitAnonymousInitializer(declaration: IrAnonymousInitializer): IrStatement {\n transformDeclarationBody(declaration.body, declaration)\n return super.visitAnonymousInitializer(declaration)\n }\n\n private fun transformDeclarationBody(body: IrBody?, parent: IrDeclaration, lambda: IrFunction? = null) {\n if (body is IrBlockBody) {\n body.statements.forEachIndexed { i, stmt ->\n val transformedStmt = stmt.transformStatement(parent, lambda)\n body.statements[i] = transformedStmt\n }\n }\n }\n\n private fun IrExpression.eraseAtomicConstructor(parentDeclaration: IrDeclaration, lambda: IrFunction? = null) =\n when {\n type.isAtomicValueType() -> getPureTypeValue().transformAtomicFunctionCall(parentDeclaration, lambda)\n type.isAtomicArrayType() -> buildPureTypeArrayConstructor()\n type.isReentrantLockType() -> buildConstNull()\n else -> this\n }\n\n private fun IrDeclaration.transformInlineAtomicExtension(): IrDeclaration? {\n // transform the signature of the inline Atomic* extension declaration\n // inline fun AtomicRef.foo(args) { ... } -> inline fun foo(args, getter: () -> T, setter: (T) -> Unit)\n if (this is IrFunction &&\n isInline &&\n extensionReceiverParameter != null &&\n extensionReceiverParameter!!.type.isAtomicValueType()\n ) {\n val newDeclaration = this.deepCopyWithSymbols(parent)\n val oldValueParameters = valueParameters.map { it.deepCopyWithSymbols(newDeclaration) }\n val valueParametersCount = valueParameters.size\n val receiverValueType = extensionReceiverParameter!!.type.atomicToValueType() // T\n val getterType = buildGetterType(receiverValueType)\n val setterType = buildSetterType(receiverValueType)\n newDeclaration.valueParameters = oldValueParameters + listOf(\n buildValueParameter(newDeclaration, GETTER, valueParametersCount, getterType),\n buildValueParameter(newDeclaration, SETTER, valueParametersCount + 1, setterType)\n )\n newDeclaration.extensionReceiverParameter = null\n return newDeclaration\n }\n return null\n }\n\n private fun IrExpression.getPureTypeValue(): IrExpression {\n require(this is IrCall && isAtomicFactory()) { \"Illegal initializer for the atomic property $this\" }\n return getValueArgument(0)!!\n }\n\n private fun IrExpression.buildPureTypeArrayConstructor() =\n when (this) {\n is IrConstructorCall -> {\n require(isAtomicArrayConstructor()) { \"Unsupported atomic array constructor $this\" }\n val arrayConstructorSymbol = type.getArrayConstructorSymbol { it.owner.valueParameters.size == 1 }\n val size = getValueArgument(0)\n IrConstructorCallImpl(\n UNDEFINED_OFFSET, UNDEFINED_OFFSET,\n irBuiltIns.unitType, arrayConstructorSymbol,\n 0, 0, 1\n ).apply {\n putValueArgument(0, size)\n }\n }\n is IrCall -> {\n require(isAtomicArrayFactory()) { \"Unsupported atomic array factory function $this\" }\n val arrayFactorySymbol = referencePackageFunction(\"kotlin\", \"arrayOfNulls\")\n val arrayElementType = getTypeArgument(0)!!\n val size = getValueArgument(0)\n buildCall(\n target = arrayFactorySymbol,\n type = type,\n typeArguments = listOf(arrayElementType),\n valueArguments = listOf(size)\n )\n }\n else -> error(\"Illegal type of atomic array initializer\")\n }\n\n private fun IrCall.inlineAtomicFunction(atomicType: IrType, accessors: List): IrCall {\n val valueArguments = getValueArguments()\n val functionName = getAtomicFunctionName()\n val runtimeFunction = getRuntimeFunctionSymbol(functionName, atomicType)\n return buildCall(\n target = runtimeFunction,\n type = type,\n origin = IrStatementOrigin.INVOKE,\n typeArguments = if (runtimeFunction.owner.typeParameters.size == 1) listOf(atomicType) else emptyList(),\n valueArguments = valueArguments + accessors\n )\n }\n\n private fun IrStatement.transformStatement(parentDeclaration: IrDeclaration, lambda: IrFunction? = null) =\n when (this) {\n is IrExpression -> transformAtomicFunctionCall(parentDeclaration, lambda)\n is IrVariable -> {\n apply { initializer = initializer?.transformAtomicFunctionCall(parentDeclaration, lambda) }\n }\n else -> this\n }\n\n private fun IrExpression.transformAtomicFunctionCall(parentDeclaration: IrDeclaration, lambda: IrFunction? = null): IrExpression {\n // erase unchecked cast to the Atomic* type\n if (this is IrTypeOperatorCall && operator == IrTypeOperator.CAST && typeOperand.isAtomicValueType()) {\n return argument\n }\n if (isAtomicValueInitializer()) {\n return eraseAtomicConstructor(parentDeclaration, lambda)\n }\n when (this) {\n is IrTypeOperatorCall -> {\n return apply { argument = argument.transformAtomicFunctionCall(parentDeclaration, lambda) }\n }\n is IrStringConcatenationImpl -> {\n return apply {\n arguments.forEachIndexed { i, arg ->\n arguments[i] = arg.transformAtomicFunctionCall(parentDeclaration, lambda)\n }\n }\n }\n is IrReturn -> {\n if (parentDeclaration.isTransformedAtomicExtensionFunction() && (parentDeclaration as IrFunction).isInline &&\n returnTargetSymbol !== parentDeclaration.symbol) {\n return IrReturnImpl(\n startOffset,\n endOffset,\n type,\n parentDeclaration.symbol,\n value.transformAtomicFunctionCall(parentDeclaration, lambda)\n )\n }\n return apply { value = value.transformAtomicFunctionCall(parentDeclaration, lambda) }\n }\n is IrSetValue -> {\n return apply { value = value.transformAtomicFunctionCall(parentDeclaration, lambda) }\n }\n is IrSetField -> {\n return apply { value = value.transformAtomicFunctionCall(parentDeclaration, lambda) }\n }\n is IrIfThenElseImpl -> {\n return apply {\n branches.forEachIndexed { i, branch ->\n branches[i] = branch.apply {\n condition = condition.transformAtomicFunctionCall(parentDeclaration, lambda)\n result = result.transformAtomicFunctionCall(parentDeclaration, lambda)\n }\n }\n }\n }\n is IrWhenImpl -> {\n return apply {\n branches.forEachIndexed { i, branch ->\n branches[i] = branch.apply {\n condition = condition.transformAtomicFunctionCall(parentDeclaration, lambda)\n result = result.transformAtomicFunctionCall(parentDeclaration, lambda)\n }\n }\n }\n }\n is IrTry -> {\n return apply {\n tryResult = tryResult.transformAtomicFunctionCall(parentDeclaration, lambda)\n catches.forEach {\n it.result = it.result.transformAtomicFunctionCall(parentDeclaration, lambda)\n }\n finallyExpression = finallyExpression?.transformAtomicFunctionCall(parentDeclaration, lambda)\n }\n }\n is IrBlock -> {\n return apply {\n statements.forEachIndexed { i, stmt ->\n statements[i] = stmt.transformStatement(parentDeclaration, lambda)\n }\n }\n }\n is IrGetValue -> {\n if (lambda != null && symbol.owner.parent == lambda) return this\n if (symbol is IrValueParameterSymbol && parentDeclaration.isTransformedAtomicExtensionFunction()) {\n // replace use site of the value parameter with it's copy from the transformed declaration\n val index = (symbol.owner as IrValueParameter).index\n if (index >= 0) { // index == -1 for `this` parameter\n val transformedValueParameter = (parentDeclaration as IrFunction).valueParameters[index]\n return buildGetValue(transformedValueParameter.symbol)\n }\n }\n }\n is IrCall -> {\n dispatchReceiver?.let { dispatchReceiver = it.transformAtomicFunctionCall(parentDeclaration, lambda) }\n extensionReceiver?.let { extensionReceiver = it.transformAtomicFunctionCall(parentDeclaration, lambda) }\n getValueArguments().forEachIndexed { i, arg ->\n putValueArgument(i, arg?.transformAtomicFunctionCall(parentDeclaration, lambda))\n }\n val isInline = symbol.owner.isInline\n val receiver = extensionReceiver ?: dispatchReceiver ?: return this\n // Invocation of atomic functions on atomic receivers\n // are substituted with the corresponding inline declarations from `kotlinx-atomicfu-runtime`\n if (symbol.isKotlinxAtomicfuPackage() && receiver.type.isAtomicValueType()) {\n // 1. Receiver is the atomic field:\n // a.incrementAndGet() -> atomicfu_incrementAndGet(get_a {..}, set_a {..})\n if (receiver is IrCall) { // property accessor \n val accessors = receiver.getAccessors(lambda ?: parentDeclaration)\n return inlineAtomicFunction(receiver.type.atomicToValueType(), accessors).apply {\n transformAtomicfuLoopBody(parentDeclaration)\n }\n }\n // 2. Atomic extension receiver `this`, the reciver of the parent function.\n // Parent function is the inline Atomic* extension function already transformed with `transformAtomicInlineDeclaration`\n // inline fun foo(getter: () -> T, setter: (T) -> Unit) { incrementAndGet() } ->\n // inline fun foo(getter: () -> T, setter: (T) -> Unit) { atomicfu_incrementAndGet(getter, setter) }\n if (receiver is IrGetValue && parentDeclaration.isTransformedAtomicExtensionFunction()) {\n val accessorParameters = (parentDeclaration as IrFunction).valueParameters.takeLast(2).map { it.capture() }\n return inlineAtomicFunction(receiver.type.atomicToValueType(), accessorParameters).apply {\n transformAtomicfuLoopBody(parentDeclaration)\n }\n }\n } else {\n // 3. transform invocation of an inline Atomic* extension function call: a.foo()\n if (isInline && receiver is IrCall && receiver.type.isAtomicValueType()) {\n val accessors = receiver.getAccessors(lambda ?: parentDeclaration)\n val dispatch = dispatchReceiver\n val args = getValueArguments()\n val transformedTarget = symbol.owner.getDeclarationWithAccessorParameters()\n return buildCall(\n target = transformedTarget.symbol,\n type = type,\n origin = IrStatementOrigin.INVOKE,\n valueArguments = args + accessors\n ).apply {\n dispatchReceiver = dispatch\n }\n }\n }\n }\n is IrConstructorCall -> {\n getValueArguments().forEachIndexed { i, arg ->\n putValueArgument(i, arg?.transformAtomicFunctionCall(parentDeclaration, lambda))\n }\n }\n }\n return this\n }\n\n private fun IrCall.transformAtomicfuLoopBody(parent: IrDeclaration) {\n if (symbol.owner.name.asString().matches(ATOMICFU_INLINE_FUNCTION.toRegex())) {\n val lambdaLoop = (getValueArgument(0) as IrFunctionExpression).function\n transformDeclarationBody(lambdaLoop.body, parent, lambdaLoop)\n }\n }\n\n private fun IrFunction.hasReceiverAccessorParameters(): Boolean {\n if (valueParameters.size < 2) return false\n val params = valueParameters.takeLast(2)\n return params[0].name.asString() == GETTER && params[1].name.asString() == SETTER\n }\n\n private fun IrDeclaration.isTransformedAtomicExtensionFunction(): Boolean =\n this is IrFunction && this.hasReceiverAccessorParameters()\n\n private fun IrFunction.getDeclarationWithAccessorParameters(): IrSimpleFunction {\n val parent = parent as IrDeclarationContainer\n val params = valueParameters.map { it.type }\n val extensionType = extensionReceiverParameter?.type?.atomicToValueType()\n return try {\n parent.declarations.single {\n it is IrSimpleFunction &&\n it.name == symbol.owner.name &&\n it.valueParameters.size == params.size + 2 &&\n it.valueParameters.dropLast(2).withIndex().all { p -> p.value.type.classifierOrNull?.owner == params[p.index].classifierOrNull?.owner } &&\n it.getGetterReturnType()?.classifierOrNull?.owner == extensionType?.classifierOrNull?.owner\n } as IrSimpleFunction\n } catch (e: RuntimeException) {\n error(\"Exception while looking for the declaration with accessor parameters: ${e.message}\")\n }\n }\n\n private fun IrExpression.isAtomicValueInitializer() =\n (this is IrCall && (this.isAtomicFactory() || this.isAtomicArrayFactory())) ||\n (this is IrConstructorCall && this.isAtomicArrayConstructor()) ||\n type.isReentrantLockType()\n\n private fun IrCall.isArrayElementGetter() =\n dispatchReceiver != null &&\n dispatchReceiver!!.type.isAtomicArrayType() &&\n symbol.owner.name.asString() == GET\n\n private fun IrCall.getBackingField(): IrField {\n val correspondingPropertySymbol = symbol.owner.correspondingPropertySymbol!!\n return correspondingPropertySymbol.owner.backingField!!\n }\n\n private fun IrCall.buildAccessorLambda(\n isSetter: Boolean,\n parentDeclaration: IrDeclaration\n ): IrFunctionExpression {\n val isArrayElement = isArrayElementGetter()\n val getterCall = if (isArrayElement) dispatchReceiver as IrCall else this\n val valueType = type.atomicToValueType()\n val type = if (isSetter) buildSetterType(valueType) else buildGetterType(valueType)\n val name = if (isSetter) setterName(getterCall.symbol.owner.name.getFieldName())\n else getterName(getterCall.symbol.owner.name.getFieldName())\n val returnType = if (isSetter) context.irBuiltIns.unitType else valueType\n val accessorFunction = buildFunction(\n parent = parentDeclaration as IrDeclarationParent,\n origin = IrDeclarationOrigin.DEFAULT_PROPERTY_ACCESSOR,\n name = Name.identifier(name),\n visibility = DescriptorVisibilities.LOCAL,\n isInline = true,\n returnType = returnType\n ).apply {\n val valueParameter = buildValueParameter(this, name, 0, valueType)\n this.valueParameters = if (isSetter) listOf(valueParameter) else emptyList()\n val body = if (isSetter) {\n if (isArrayElement) {\n val setSymbol = referenceFunction(getterCall.type.referenceClass(), SET)\n val elementIndex = getValueArgument(0)!!.deepCopyWithVariables()\n buildCall(\n target = setSymbol,\n type = context.irBuiltIns.unitType,\n origin = IrStatementOrigin.LAMBDA,\n valueArguments = listOf(elementIndex, valueParameter.capture())\n ).apply {\n dispatchReceiver = getterCall\n }\n } else {\n buildSetField(getterCall.getBackingField(), getterCall.dispatchReceiver, valueParameter.capture())\n }\n } else {\n val getField = buildGetField(getterCall.getBackingField(), getterCall.dispatchReceiver)\n if (isArrayElement) {\n val getSymbol = referenceFunction(getterCall.type.referenceClass(), GET)\n val elementIndex = getValueArgument(0)!!.deepCopyWithVariables()\n buildCall(\n target = getSymbol,\n type = valueType,\n origin = IrStatementOrigin.LAMBDA,\n valueArguments = listOf(elementIndex)\n ).apply {\n dispatchReceiver = getField.deepCopyWithVariables()\n }\n } else {\n getField.deepCopyWithVariables()\n }\n }\n this.body = buildBlockBody(listOf(body))\n origin = IrDeclarationOrigin.DEFAULT_PROPERTY_ACCESSOR\n }\n return IrFunctionExpressionImpl(\n UNDEFINED_OFFSET, UNDEFINED_OFFSET,\n type,\n accessorFunction,\n IrStatementOrigin.LAMBDA\n )\n }\n\n private fun buildSetField(backingField: IrField, ownerClass: IrExpression?, value: IrGetValue): IrSetField {\n val receiver = if (ownerClass is IrTypeOperatorCall) ownerClass.argument as IrGetValue else ownerClass\n val fieldSymbol = backingField.symbol\n return buildSetField(\n symbol = fieldSymbol,\n receiver = receiver,\n value = value\n )\n }\n\n private fun buildGetField(backingField: IrField, ownerClass: IrExpression?): IrGetField {\n val receiver = if (ownerClass is IrTypeOperatorCall) ownerClass.argument as IrGetValue else ownerClass\n return buildGetField(\n symbol = backingField.symbol,\n receiver = receiver\n )\n }\n\n private fun IrCall.getAccessors(parentDeclaration: IrDeclaration): List =\n listOf(buildAccessorLambda(isSetter = false, parentDeclaration = parentDeclaration),\n buildAccessorLambda(isSetter = true, parentDeclaration = parentDeclaration))\n\n private fun getRuntimeFunctionSymbol(name: String, type: IrType): IrSimpleFunctionSymbol {\n val functionName = when (name) {\n \"value.\" -> \"getValue\"\n \"value.\" -> \"setValue\"\n else -> name\n }\n return referencePackageFunction(\"kotlinx.atomicfu\", \"$ATOMICFU_RUNTIME_FUNCTION_PREDICATE$functionName\") {\n val typeArg = it.owner.getGetterReturnType()\n !(typeArg as IrType).isPrimitiveType() || typeArg == type\n }\n }\n\n private fun IrFunction.getGetterReturnType() = (valueParameters[valueParameters.lastIndex - 1].type as IrSimpleType).arguments.first().typeOrNull\n\n private fun IrCall.isAtomicFactory(): Boolean {\n val name = symbol.owner.name\n return !name.isSpecial && name.identifier == ATOMIC_CONSTRUCTOR\n }\n\n private fun IrCall.isAtomicArrayFactory(): Boolean {\n val name = symbol.owner.name\n return !name.isSpecial && name.identifier == ATOMIC_ARRAY_FACTORY_FUNCTION\n }\n\n private fun IrConstructorCall.isAtomicArrayConstructor() = (type as IrSimpleType).isAtomicArrayType()\n\n private fun IrSymbol.isKotlinxAtomicfuPackage() =\n this.isPublicApi && signature?.packageFqName()?.asString() == AFU_PKG.prettyStr()\n\n private fun IrType.isAtomicValueType() = belongsTo(ATOMICFU_VALUE_TYPE)\n private fun IrType.isAtomicArrayType() = belongsTo(ATOMIC_ARRAY_TYPE)\n private fun IrType.isReentrantLockType() = belongsTo(\"$AFU_PKG/$LOCKS\", REENTRANT_LOCK_TYPE)\n\n private fun IrType.belongsTo(typeName: String) = belongsTo(AFU_PKG, typeName)\n\n private fun IrType.belongsTo(packageName: String, typeName: String): Boolean {", - "repo_full_name": "JetBrains/kotlin", - "discussion_comments": [ - { - "comment_id": "713622898", - "repo_full_name": "JetBrains/kotlin", - "pr_number": 4581, - "pr_file": "plugins/atomicfu/atomicfu-compiler/src/org.jetbrains.kotlinx.atomicfu.compiler/extensions/AtomicFUTransformer.kt", - "discussion_id": "713622898", - "commented_code": "@@ -0,0 +1,595 @@\n+/*\n+ * Copyright 2010-2020 JetBrains s.r.o. and Kotlin Programming Language contributors.\n+ * Use of this source code is governed by the Apache 2.0 license that can be found in the license/LICENSE.txt file.\n+ */\n+\n+package org.jetbrains.kotlinx.atomicfu.compiler.extensions\n+\n+import org.jetbrains.kotlin.backend.common.deepCopyWithVariables\n+import org.jetbrains.kotlin.backend.common.extensions.IrPluginContext\n+import org.jetbrains.kotlin.descriptors.DescriptorVisibilities\n+import org.jetbrains.kotlin.ir.*\n+import org.jetbrains.kotlin.ir.backend.js.ir.JsIrBuilder.buildValueParameter\n+import org.jetbrains.kotlin.ir.expressions.impl.*\n+import org.jetbrains.kotlin.ir.declarations.*\n+import org.jetbrains.kotlin.ir.expressions.*\n+import org.jetbrains.kotlin.ir.symbols.*\n+import org.jetbrains.kotlin.ir.types.*\n+import org.jetbrains.kotlin.ir.util.*\n+import org.jetbrains.kotlin.ir.visitors.IrElementTransformerVoid\n+import org.jetbrains.kotlin.ir.visitors.IrElementVisitorVoid\n+import org.jetbrains.kotlin.ir.visitors.acceptChildrenVoid\n+import org.jetbrains.kotlin.name.*\n+\n+private const val KOTLIN = \"kotlin\"\n+private const val AFU_PKG = \"kotlinx/atomicfu\"\n+private const val LOCKS = \"locks\"\n+private const val ATOMIC_CONSTRUCTOR = \"atomic\"\n+private const val ATOMICFU_VALUE_TYPE = \"\"\"Atomic(Int|Long|Boolean|Ref)\"\"\"\n+private const val ATOMIC_ARRAY_TYPE = \"\"\"Atomic(Int|Long|Boolean|)Array\"\"\"\n+private const val ATOMIC_ARRAY_FACTORY_FUNCTION = \"atomicArrayOfNulls\"\n+private const val ATOMICFU_RUNTIME_FUNCTION_PREDICATE = \"atomicfu_\"\n+private const val REENTRANT_LOCK_TYPE = \"ReentrantLock\"\n+private const val GETTER = \"atomicfu\\$getter\"\n+private const val SETTER = \"atomicfu\\$setter\"\n+private const val GET = \"get\"\n+private const val SET = \"set\"\n+private const val ATOMICFU_INLINE_FUNCTION = \"\"\"atomicfu_(loop|update|getAndUpdate|updateAndGet)\"\"\"\n+\n+private fun String.prettyStr() = replace('/', '.')\n+\n+class AtomicFUTransformer(override val context: IrPluginContext) : IrElementTransformerVoid(), TransformerHelpers {\n+\n+ private val irBuiltIns = context.irBuiltIns\n+\n+ private val AFU_CLASSES: Map = mapOf(\n+ \"AtomicInt\" to irBuiltIns.intType,\n+ \"AtomicLong\" to irBuiltIns.longType,\n+ \"AtomicRef\" to irBuiltIns.anyType,\n+ \"AtomicBoolean\" to irBuiltIns.booleanType\n+ )\n+\n+ private val AFU_ARRAY_CLASSES: Map = mapOf(\n+ \"AtomicIntArray\" to \"IntArray\",\n+ \"AtomicLongArray\" to \"LongArray\",\n+ \"AtomicBooleanArray\" to \"BooleanArray\",\n+ \"AtomicArray\" to \"Array\"\n+ )\n+\n+ override fun visitFile(declaration: IrFile): IrFile {\n+ val transformedDeclarations = mutableListOf()\n+ declaration.declarations.forEach { irDeclaration ->\n+ irDeclaration.transformInlineAtomicExtension()?.let { transformedDeclarations.add(it) }\n+ }\n+ declaration.declarations.addAll(transformedDeclarations)\n+ return super.visitFile(declaration)\n+ }\n+\n+ override fun visitClass(declaration: IrClass): IrStatement {\n+ val transformedDeclarations = mutableListOf()\n+ declaration.declarations.forEach { irDeclaration ->\n+ irDeclaration.transformInlineAtomicExtension()?.let { transformedDeclarations.add(it) }\n+ }\n+ declaration.declarations.addAll(transformedDeclarations)\n+ return super.visitClass(declaration)\n+ }\n+\n+ override fun visitProperty(declaration: IrProperty): IrStatement {\n+ if (declaration.backingField != null) {\n+ val backingField = declaration.backingField!!\n+ if (backingField.initializer != null) {\n+ val initializer = backingField.initializer!!.expression.eraseAtomicConstructor(backingField)\n+ declaration.backingField!!.initializer = context.irFactory.createExpressionBody(initializer)\n+ }\n+ }\n+ return super.visitProperty(declaration)\n+ }\n+\n+ override fun visitFunction(declaration: IrFunction): IrStatement {\n+ transformDeclarationBody(declaration.body, declaration)\n+ return super.visitFunction(declaration)\n+ }\n+\n+ override fun visitAnonymousInitializer(declaration: IrAnonymousInitializer): IrStatement {\n+ transformDeclarationBody(declaration.body, declaration)\n+ return super.visitAnonymousInitializer(declaration)\n+ }\n+\n+ private fun transformDeclarationBody(body: IrBody?, parent: IrDeclaration, lambda: IrFunction? = null) {\n+ if (body is IrBlockBody) {\n+ body.statements.forEachIndexed { i, stmt ->\n+ val transformedStmt = stmt.transformStatement(parent, lambda)\n+ body.statements[i] = transformedStmt\n+ }\n+ }\n+ }\n+\n+ private fun IrExpression.eraseAtomicConstructor(parentDeclaration: IrDeclaration, lambda: IrFunction? = null) =\n+ when {\n+ type.isAtomicValueType() -> getPureTypeValue().transformAtomicFunctionCall(parentDeclaration, lambda)\n+ type.isAtomicArrayType() -> buildPureTypeArrayConstructor()\n+ type.isReentrantLockType() -> buildConstNull()\n+ else -> this\n+ }\n+\n+ private fun IrDeclaration.transformInlineAtomicExtension(): IrDeclaration? {\n+ // transform the signature of the inline Atomic* extension declaration\n+ // inline fun AtomicRef.foo(args) { ... } -> inline fun foo(args, getter: () -> T, setter: (T) -> Unit)\n+ if (this is IrFunction &&\n+ isInline &&\n+ extensionReceiverParameter != null &&\n+ extensionReceiverParameter!!.type.isAtomicValueType()\n+ ) {\n+ val newDeclaration = this.deepCopyWithSymbols(parent)\n+ val oldValueParameters = valueParameters.map { it.deepCopyWithSymbols(newDeclaration) }\n+ val valueParametersCount = valueParameters.size\n+ val receiverValueType = extensionReceiverParameter!!.type.atomicToValueType() // T\n+ val getterType = buildGetterType(receiverValueType)\n+ val setterType = buildSetterType(receiverValueType)\n+ newDeclaration.valueParameters = oldValueParameters + listOf(\n+ buildValueParameter(newDeclaration, GETTER, valueParametersCount, getterType),\n+ buildValueParameter(newDeclaration, SETTER, valueParametersCount + 1, setterType)\n+ )\n+ newDeclaration.extensionReceiverParameter = null\n+ return newDeclaration\n+ }\n+ return null\n+ }\n+\n+ private fun IrExpression.getPureTypeValue(): IrExpression {\n+ require(this is IrCall && isAtomicFactory()) { \"Illegal initializer for the atomic property $this\" }\n+ return getValueArgument(0)!!\n+ }\n+\n+ private fun IrExpression.buildPureTypeArrayConstructor() =\n+ when (this) {\n+ is IrConstructorCall -> {\n+ require(isAtomicArrayConstructor()) { \"Unsupported atomic array constructor $this\" }\n+ val arrayConstructorSymbol = type.getArrayConstructorSymbol { it.owner.valueParameters.size == 1 }\n+ val size = getValueArgument(0)\n+ IrConstructorCallImpl(\n+ UNDEFINED_OFFSET, UNDEFINED_OFFSET,\n+ irBuiltIns.unitType, arrayConstructorSymbol,\n+ 0, 0, 1\n+ ).apply {\n+ putValueArgument(0, size)\n+ }\n+ }\n+ is IrCall -> {\n+ require(isAtomicArrayFactory()) { \"Unsupported atomic array factory function $this\" }\n+ val arrayFactorySymbol = referencePackageFunction(\"kotlin\", \"arrayOfNulls\")\n+ val arrayElementType = getTypeArgument(0)!!\n+ val size = getValueArgument(0)\n+ buildCall(\n+ target = arrayFactorySymbol,\n+ type = type,\n+ typeArguments = listOf(arrayElementType),\n+ valueArguments = listOf(size)\n+ )\n+ }\n+ else -> error(\"Illegal type of atomic array initializer\")\n+ }\n+\n+ private fun IrCall.inlineAtomicFunction(atomicType: IrType, accessors: List): IrCall {\n+ val valueArguments = getValueArguments()\n+ val functionName = getAtomicFunctionName()\n+ val runtimeFunction = getRuntimeFunctionSymbol(functionName, atomicType)\n+ return buildCall(\n+ target = runtimeFunction,\n+ type = type,\n+ origin = IrStatementOrigin.INVOKE,\n+ typeArguments = if (runtimeFunction.owner.typeParameters.size == 1) listOf(atomicType) else emptyList(),\n+ valueArguments = valueArguments + accessors\n+ )\n+ }\n+\n+ private fun IrStatement.transformStatement(parentDeclaration: IrDeclaration, lambda: IrFunction? = null) =\n+ when (this) {\n+ is IrExpression -> transformAtomicFunctionCall(parentDeclaration, lambda)\n+ is IrVariable -> {\n+ apply { initializer = initializer?.transformAtomicFunctionCall(parentDeclaration, lambda) }\n+ }\n+ else -> this\n+ }\n+\n+ private fun IrExpression.transformAtomicFunctionCall(parentDeclaration: IrDeclaration, lambda: IrFunction? = null): IrExpression {\n+ // erase unchecked cast to the Atomic* type\n+ if (this is IrTypeOperatorCall && operator == IrTypeOperator.CAST && typeOperand.isAtomicValueType()) {\n+ return argument\n+ }\n+ if (isAtomicValueInitializer()) {\n+ return eraseAtomicConstructor(parentDeclaration, lambda)\n+ }\n+ when (this) {\n+ is IrTypeOperatorCall -> {\n+ return apply { argument = argument.transformAtomicFunctionCall(parentDeclaration, lambda) }\n+ }\n+ is IrStringConcatenationImpl -> {\n+ return apply {\n+ arguments.forEachIndexed { i, arg ->\n+ arguments[i] = arg.transformAtomicFunctionCall(parentDeclaration, lambda)\n+ }\n+ }\n+ }\n+ is IrReturn -> {\n+ if (parentDeclaration.isTransformedAtomicExtensionFunction() && (parentDeclaration as IrFunction).isInline &&\n+ returnTargetSymbol !== parentDeclaration.symbol) {\n+ return IrReturnImpl(\n+ startOffset,\n+ endOffset,\n+ type,\n+ parentDeclaration.symbol,\n+ value.transformAtomicFunctionCall(parentDeclaration, lambda)\n+ )\n+ }\n+ return apply { value = value.transformAtomicFunctionCall(parentDeclaration, lambda) }\n+ }\n+ is IrSetValue -> {\n+ return apply { value = value.transformAtomicFunctionCall(parentDeclaration, lambda) }\n+ }\n+ is IrSetField -> {\n+ return apply { value = value.transformAtomicFunctionCall(parentDeclaration, lambda) }\n+ }\n+ is IrIfThenElseImpl -> {\n+ return apply {\n+ branches.forEachIndexed { i, branch ->\n+ branches[i] = branch.apply {\n+ condition = condition.transformAtomicFunctionCall(parentDeclaration, lambda)\n+ result = result.transformAtomicFunctionCall(parentDeclaration, lambda)\n+ }\n+ }\n+ }\n+ }\n+ is IrWhenImpl -> {\n+ return apply {\n+ branches.forEachIndexed { i, branch ->\n+ branches[i] = branch.apply {\n+ condition = condition.transformAtomicFunctionCall(parentDeclaration, lambda)\n+ result = result.transformAtomicFunctionCall(parentDeclaration, lambda)\n+ }\n+ }\n+ }\n+ }\n+ is IrTry -> {\n+ return apply {\n+ tryResult = tryResult.transformAtomicFunctionCall(parentDeclaration, lambda)\n+ catches.forEach {\n+ it.result = it.result.transformAtomicFunctionCall(parentDeclaration, lambda)\n+ }\n+ finallyExpression = finallyExpression?.transformAtomicFunctionCall(parentDeclaration, lambda)\n+ }\n+ }\n+ is IrBlock -> {\n+ return apply {\n+ statements.forEachIndexed { i, stmt ->\n+ statements[i] = stmt.transformStatement(parentDeclaration, lambda)\n+ }\n+ }\n+ }\n+ is IrGetValue -> {\n+ if (lambda != null && symbol.owner.parent == lambda) return this\n+ if (symbol is IrValueParameterSymbol && parentDeclaration.isTransformedAtomicExtensionFunction()) {\n+ // replace use site of the value parameter with it's copy from the transformed declaration\n+ val index = (symbol.owner as IrValueParameter).index\n+ if (index >= 0) { // index == -1 for `this` parameter\n+ val transformedValueParameter = (parentDeclaration as IrFunction).valueParameters[index]\n+ return buildGetValue(transformedValueParameter.symbol)\n+ }\n+ }\n+ }\n+ is IrCall -> {\n+ dispatchReceiver?.let { dispatchReceiver = it.transformAtomicFunctionCall(parentDeclaration, lambda) }\n+ extensionReceiver?.let { extensionReceiver = it.transformAtomicFunctionCall(parentDeclaration, lambda) }\n+ getValueArguments().forEachIndexed { i, arg ->\n+ putValueArgument(i, arg?.transformAtomicFunctionCall(parentDeclaration, lambda))\n+ }\n+ val isInline = symbol.owner.isInline\n+ val receiver = extensionReceiver ?: dispatchReceiver ?: return this\n+ // Invocation of atomic functions on atomic receivers\n+ // are substituted with the corresponding inline declarations from `kotlinx-atomicfu-runtime`\n+ if (symbol.isKotlinxAtomicfuPackage() && receiver.type.isAtomicValueType()) {\n+ // 1. Receiver is the atomic field:\n+ // a.incrementAndGet() -> atomicfu_incrementAndGet(get_a {..}, set_a {..})\n+ if (receiver is IrCall) { // property accessor \n+ val accessors = receiver.getAccessors(lambda ?: parentDeclaration)\n+ return inlineAtomicFunction(receiver.type.atomicToValueType(), accessors).apply {\n+ transformAtomicfuLoopBody(parentDeclaration)\n+ }\n+ }\n+ // 2. Atomic extension receiver `this`, the reciver of the parent function.\n+ // Parent function is the inline Atomic* extension function already transformed with `transformAtomicInlineDeclaration`\n+ // inline fun foo(getter: () -> T, setter: (T) -> Unit) { incrementAndGet() } ->\n+ // inline fun foo(getter: () -> T, setter: (T) -> Unit) { atomicfu_incrementAndGet(getter, setter) }\n+ if (receiver is IrGetValue && parentDeclaration.isTransformedAtomicExtensionFunction()) {\n+ val accessorParameters = (parentDeclaration as IrFunction).valueParameters.takeLast(2).map { it.capture() }\n+ return inlineAtomicFunction(receiver.type.atomicToValueType(), accessorParameters).apply {\n+ transformAtomicfuLoopBody(parentDeclaration)\n+ }\n+ }\n+ } else {\n+ // 3. transform invocation of an inline Atomic* extension function call: a.foo()\n+ if (isInline && receiver is IrCall && receiver.type.isAtomicValueType()) {\n+ val accessors = receiver.getAccessors(lambda ?: parentDeclaration)\n+ val dispatch = dispatchReceiver\n+ val args = getValueArguments()\n+ val transformedTarget = symbol.owner.getDeclarationWithAccessorParameters()\n+ return buildCall(\n+ target = transformedTarget.symbol,\n+ type = type,\n+ origin = IrStatementOrigin.INVOKE,\n+ valueArguments = args + accessors\n+ ).apply {\n+ dispatchReceiver = dispatch\n+ }\n+ }\n+ }\n+ }\n+ is IrConstructorCall -> {\n+ getValueArguments().forEachIndexed { i, arg ->\n+ putValueArgument(i, arg?.transformAtomicFunctionCall(parentDeclaration, lambda))\n+ }\n+ }\n+ }\n+ return this\n+ }\n+\n+ private fun IrCall.transformAtomicfuLoopBody(parent: IrDeclaration) {\n+ if (symbol.owner.name.asString().matches(ATOMICFU_INLINE_FUNCTION.toRegex())) {\n+ val lambdaLoop = (getValueArgument(0) as IrFunctionExpression).function\n+ transformDeclarationBody(lambdaLoop.body, parent, lambdaLoop)\n+ }\n+ }\n+\n+ private fun IrFunction.hasReceiverAccessorParameters(): Boolean {\n+ if (valueParameters.size < 2) return false\n+ val params = valueParameters.takeLast(2)\n+ return params[0].name.asString() == GETTER && params[1].name.asString() == SETTER\n+ }\n+\n+ private fun IrDeclaration.isTransformedAtomicExtensionFunction(): Boolean =\n+ this is IrFunction && this.hasReceiverAccessorParameters()\n+\n+ private fun IrFunction.getDeclarationWithAccessorParameters(): IrSimpleFunction {\n+ val parent = parent as IrDeclarationContainer\n+ val params = valueParameters.map { it.type }\n+ val extensionType = extensionReceiverParameter?.type?.atomicToValueType()\n+ return try {\n+ parent.declarations.single {\n+ it is IrSimpleFunction &&\n+ it.name == symbol.owner.name &&\n+ it.valueParameters.size == params.size + 2 &&\n+ it.valueParameters.dropLast(2).withIndex().all { p -> p.value.type.classifierOrNull?.owner == params[p.index].classifierOrNull?.owner } &&\n+ it.getGetterReturnType()?.classifierOrNull?.owner == extensionType?.classifierOrNull?.owner\n+ } as IrSimpleFunction\n+ } catch (e: RuntimeException) {\n+ error(\"Exception while looking for the declaration with accessor parameters: ${e.message}\")\n+ }\n+ }\n+\n+ private fun IrExpression.isAtomicValueInitializer() =\n+ (this is IrCall && (this.isAtomicFactory() || this.isAtomicArrayFactory())) ||\n+ (this is IrConstructorCall && this.isAtomicArrayConstructor()) ||\n+ type.isReentrantLockType()\n+\n+ private fun IrCall.isArrayElementGetter() =\n+ dispatchReceiver != null &&\n+ dispatchReceiver!!.type.isAtomicArrayType() &&\n+ symbol.owner.name.asString() == GET\n+\n+ private fun IrCall.getBackingField(): IrField {\n+ val correspondingPropertySymbol = symbol.owner.correspondingPropertySymbol!!\n+ return correspondingPropertySymbol.owner.backingField!!\n+ }\n+\n+ private fun IrCall.buildAccessorLambda(\n+ isSetter: Boolean,\n+ parentDeclaration: IrDeclaration\n+ ): IrFunctionExpression {\n+ val isArrayElement = isArrayElementGetter()\n+ val getterCall = if (isArrayElement) dispatchReceiver as IrCall else this\n+ val valueType = type.atomicToValueType()\n+ val type = if (isSetter) buildSetterType(valueType) else buildGetterType(valueType)\n+ val name = if (isSetter) setterName(getterCall.symbol.owner.name.getFieldName())\n+ else getterName(getterCall.symbol.owner.name.getFieldName())\n+ val returnType = if (isSetter) context.irBuiltIns.unitType else valueType\n+ val accessorFunction = buildFunction(\n+ parent = parentDeclaration as IrDeclarationParent,\n+ origin = IrDeclarationOrigin.DEFAULT_PROPERTY_ACCESSOR,\n+ name = Name.identifier(name),\n+ visibility = DescriptorVisibilities.LOCAL,\n+ isInline = true,\n+ returnType = returnType\n+ ).apply {\n+ val valueParameter = buildValueParameter(this, name, 0, valueType)\n+ this.valueParameters = if (isSetter) listOf(valueParameter) else emptyList()\n+ val body = if (isSetter) {\n+ if (isArrayElement) {\n+ val setSymbol = referenceFunction(getterCall.type.referenceClass(), SET)\n+ val elementIndex = getValueArgument(0)!!.deepCopyWithVariables()\n+ buildCall(\n+ target = setSymbol,\n+ type = context.irBuiltIns.unitType,\n+ origin = IrStatementOrigin.LAMBDA,\n+ valueArguments = listOf(elementIndex, valueParameter.capture())\n+ ).apply {\n+ dispatchReceiver = getterCall\n+ }\n+ } else {\n+ buildSetField(getterCall.getBackingField(), getterCall.dispatchReceiver, valueParameter.capture())\n+ }\n+ } else {\n+ val getField = buildGetField(getterCall.getBackingField(), getterCall.dispatchReceiver)\n+ if (isArrayElement) {\n+ val getSymbol = referenceFunction(getterCall.type.referenceClass(), GET)\n+ val elementIndex = getValueArgument(0)!!.deepCopyWithVariables()\n+ buildCall(\n+ target = getSymbol,\n+ type = valueType,\n+ origin = IrStatementOrigin.LAMBDA,\n+ valueArguments = listOf(elementIndex)\n+ ).apply {\n+ dispatchReceiver = getField.deepCopyWithVariables()\n+ }\n+ } else {\n+ getField.deepCopyWithVariables()\n+ }\n+ }\n+ this.body = buildBlockBody(listOf(body))\n+ origin = IrDeclarationOrigin.DEFAULT_PROPERTY_ACCESSOR\n+ }\n+ return IrFunctionExpressionImpl(\n+ UNDEFINED_OFFSET, UNDEFINED_OFFSET,\n+ type,\n+ accessorFunction,\n+ IrStatementOrigin.LAMBDA\n+ )\n+ }\n+\n+ private fun buildSetField(backingField: IrField, ownerClass: IrExpression?, value: IrGetValue): IrSetField {\n+ val receiver = if (ownerClass is IrTypeOperatorCall) ownerClass.argument as IrGetValue else ownerClass\n+ val fieldSymbol = backingField.symbol\n+ return buildSetField(\n+ symbol = fieldSymbol,\n+ receiver = receiver,\n+ value = value\n+ )\n+ }\n+\n+ private fun buildGetField(backingField: IrField, ownerClass: IrExpression?): IrGetField {\n+ val receiver = if (ownerClass is IrTypeOperatorCall) ownerClass.argument as IrGetValue else ownerClass\n+ return buildGetField(\n+ symbol = backingField.symbol,\n+ receiver = receiver\n+ )\n+ }\n+\n+ private fun IrCall.getAccessors(parentDeclaration: IrDeclaration): List =\n+ listOf(buildAccessorLambda(isSetter = false, parentDeclaration = parentDeclaration),\n+ buildAccessorLambda(isSetter = true, parentDeclaration = parentDeclaration))\n+\n+ private fun getRuntimeFunctionSymbol(name: String, type: IrType): IrSimpleFunctionSymbol {\n+ val functionName = when (name) {\n+ \"value.\" -> \"getValue\"\n+ \"value.\" -> \"setValue\"\n+ else -> name\n+ }\n+ return referencePackageFunction(\"kotlinx.atomicfu\", \"$ATOMICFU_RUNTIME_FUNCTION_PREDICATE$functionName\") {\n+ val typeArg = it.owner.getGetterReturnType()\n+ !(typeArg as IrType).isPrimitiveType() || typeArg == type\n+ }\n+ }\n+\n+ private fun IrFunction.getGetterReturnType() = (valueParameters[valueParameters.lastIndex - 1].type as IrSimpleType).arguments.first().typeOrNull\n+\n+ private fun IrCall.isAtomicFactory(): Boolean {\n+ val name = symbol.owner.name\n+ return !name.isSpecial && name.identifier == ATOMIC_CONSTRUCTOR\n+ }\n+\n+ private fun IrCall.isAtomicArrayFactory(): Boolean {\n+ val name = symbol.owner.name\n+ return !name.isSpecial && name.identifier == ATOMIC_ARRAY_FACTORY_FUNCTION\n+ }\n+\n+ private fun IrConstructorCall.isAtomicArrayConstructor() = (type as IrSimpleType).isAtomicArrayType()\n+\n+ private fun IrSymbol.isKotlinxAtomicfuPackage() =\n+ this.isPublicApi && signature?.packageFqName()?.asString() == AFU_PKG.prettyStr()\n+\n+ private fun IrType.isAtomicValueType() = belongsTo(ATOMICFU_VALUE_TYPE)\n+ private fun IrType.isAtomicArrayType() = belongsTo(ATOMIC_ARRAY_TYPE)\n+ private fun IrType.isReentrantLockType() = belongsTo(\"$AFU_PKG/$LOCKS\", REENTRANT_LOCK_TYPE)\n+\n+ private fun IrType.belongsTo(typeName: String) = belongsTo(AFU_PKG, typeName)\n+\n+ private fun IrType.belongsTo(packageName: String, typeName: String): Boolean {", - "comment_created_at": "2021-09-22T06:18:07+00:00", - "comment_author": "romanart", - "comment_body": "```\r\n return classOrNull?.let { \r\n it.signature?.asPublic()?.let { sig -> \r\n sig.packageFqName == packageName && sig.declarationFqName.matches(typeNameReg)\r\n }\r\n } ?: false\r\n```", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "630924715", - "pr_number": 4363, - "pr_file": "libraries/scripting/jvm/src/kotlin/script/experimental/jvm/impl/KJvmCompiledScript.kt", - "created_at": "2021-05-12T10:37:02+00:00", - "commented_code": "val processedClasspathElements = mutableSetOf()\n fun recursiveClassPath(res: Sequence, classLoader: ClassLoader?): Sequence =\n when (classLoader) {\n null -> res\n null, baseClassLoader -> res", - "repo_full_name": "JetBrains/kotlin", - "discussion_comments": [ - { - "comment_id": "630924715", - "repo_full_name": "JetBrains/kotlin", - "pr_number": 4363, - "pr_file": "libraries/scripting/jvm/src/kotlin/script/experimental/jvm/impl/KJvmCompiledScript.kt", - "discussion_id": "630924715", - "commented_code": "@@ -139,17 +140,17 @@ private fun CompiledScript.makeClassLoaderFromDependencies(baseClassLoader: Clas\n val processedClasspathElements = mutableSetOf()\n fun recursiveClassPath(res: Sequence, classLoader: ClassLoader?): Sequence =\n when (classLoader) {\n- null -> res\n+ null, baseClassLoader -> res", - "comment_created_at": "2021-05-12T10:37:02+00:00", - "comment_author": "ligee", - "comment_body": "I would still process the `null` case, e.g. generating an error if we've got null.", - "pr_file_module": null - }, - { - "comment_id": "630928510", - "repo_full_name": "JetBrains/kotlin", - "pr_number": 4363, - "pr_file": "libraries/scripting/jvm/src/kotlin/script/experimental/jvm/impl/KJvmCompiledScript.kt", - "discussion_id": "630924715", - "commented_code": "@@ -139,17 +140,17 @@ private fun CompiledScript.makeClassLoaderFromDependencies(baseClassLoader: Clas\n val processedClasspathElements = mutableSetOf()\n fun recursiveClassPath(res: Sequence, classLoader: ClassLoader?): Sequence =\n when (classLoader) {\n- null -> res\n+ null, baseClassLoader -> res", - "comment_created_at": "2021-05-12T10:43:07+00:00", - "comment_author": "ligee", - "comment_body": "But it should be after `baseClassLoader` check, since it could be `null`, and it is a valid situation.", - "pr_file_module": null - }, - { - "comment_id": "630949276", - "repo_full_name": "JetBrains/kotlin", - "pr_number": 4363, - "pr_file": "libraries/scripting/jvm/src/kotlin/script/experimental/jvm/impl/KJvmCompiledScript.kt", - "discussion_id": "630924715", - "commented_code": "@@ -139,17 +140,17 @@ private fun CompiledScript.makeClassLoaderFromDependencies(baseClassLoader: Clas\n val processedClasspathElements = mutableSetOf()\n fun recursiveClassPath(res: Sequence, classLoader: ClassLoader?): Sequence =\n when (classLoader) {\n- null -> res\n+ null, baseClassLoader -> res", - "comment_created_at": "2021-05-12T11:17:27+00:00", - "comment_author": "ileasile", - "comment_body": "@ligee I'm not sure that fallbackClassloader of DualClassLoader should necessarily have base classloader as a parent. Is it really so?", - "pr_file_module": null - }, - { - "comment_id": "630952983", - "repo_full_name": "JetBrains/kotlin", - "pr_number": 4363, - "pr_file": "libraries/scripting/jvm/src/kotlin/script/experimental/jvm/impl/KJvmCompiledScript.kt", - "discussion_id": "630924715", - "commented_code": "@@ -139,17 +140,17 @@ private fun CompiledScript.makeClassLoaderFromDependencies(baseClassLoader: Clas\n val processedClasspathElements = mutableSetOf()\n fun recursiveClassPath(res: Sequence, classLoader: ClassLoader?): Sequence =\n when (classLoader) {\n- null -> res\n+ null, baseClassLoader -> res", - "comment_created_at": "2021-05-12T11:23:39+00:00", - "comment_author": "ileasile", - "comment_body": "As I can see from usages, it is always a thread context classloader that is a base classloader at the same time, so there is no appropriate test", - "pr_file_module": null - }, - { - "comment_id": "630967327", - "repo_full_name": "JetBrains/kotlin", - "pr_number": 4363, - "pr_file": "libraries/scripting/jvm/src/kotlin/script/experimental/jvm/impl/KJvmCompiledScript.kt", - "discussion_id": "630924715", - "commented_code": "@@ -139,17 +140,17 @@ private fun CompiledScript.makeClassLoaderFromDependencies(baseClassLoader: Clas\n val processedClasspathElements = mutableSetOf()\n fun recursiveClassPath(res: Sequence, classLoader: ClassLoader?): Sequence =\n when (classLoader) {\n- null -> res\n+ null, baseClassLoader -> res", - "comment_created_at": "2021-05-12T11:47:21+00:00", - "comment_author": "ligee", - "comment_body": "I'd say it should convert to the baseClassLoader, but since it is possible to replace it, it is safer to leave the code as you wrote it.\r\nAbout the tests - there are some tests with baseClassLoader set to null, and that's probably it.", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/kotlin-use-modern-test-infrastructure.json b/_reviewers/kotlin-use-modern-test-infrastructure.json new file mode 100644 index 0000000..880632a --- /dev/null +++ b/_reviewers/kotlin-use-modern-test-infrastructure.json @@ -0,0 +1,58 @@ +[ + { + "discussion_id": "1581150497", + "pr_number": 5281, + "pr_file": "kotlin-native/backend.native/tests/build.gradle", + "created_at": "2024-04-26T14:53:10+00:00", + "commented_code": "source = \"runtime/exceptions/throw_from_except_constr.kt\"\n}\n\nstandaloneTest(\"program_name\") {\n source = \"runtime/program_name/runtime_program_name.kt\"\n}", + "repo_full_name": "JetBrains/kotlin", + "discussion_comments": [ + { + "comment_id": "1581150497", + "repo_full_name": "JetBrains/kotlin", + "pr_number": 5281, + "pr_file": "kotlin-native/backend.native/tests/build.gradle", + "discussion_id": "1581150497", + "commented_code": "@@ -380,6 +380,10 @@ standaloneTest(\"throw_from_except_constr\") {\n source = \"runtime/exceptions/throw_from_except_constr.kt\"\n }\n \n+standaloneTest(\"program_name\") {\n+ source = \"runtime/program_name/runtime_program_name.kt\"\n+}", + "comment_created_at": "2024-04-26T14:53:10+00:00", + "comment_author": "SvyatoslavScherbina", + "comment_body": "This file represents the legacy test infrastructure that we are actively trying to get rid of.\r\n\r\nPlease create a proper test class in\r\nhttps://github.com/JetBrains/kotlin/tree/master/native/native.tests/tests/org/jetbrains/kotlin/konan/test/blackbox instead.", + "pr_file_module": null + }, + { + "comment_id": "1584875172", + "repo_full_name": "JetBrains/kotlin", + "pr_number": 5281, + "pr_file": "kotlin-native/backend.native/tests/build.gradle", + "discussion_id": "1581150497", + "commented_code": "@@ -380,6 +380,10 @@ standaloneTest(\"throw_from_except_constr\") {\n source = \"runtime/exceptions/throw_from_except_constr.kt\"\n }\n \n+standaloneTest(\"program_name\") {\n+ source = \"runtime/program_name/runtime_program_name.kt\"\n+}", + "comment_created_at": "2024-04-30T13:54:43+00:00", + "comment_author": "vonox7", + "comment_body": "I ported the testcase to the new infrastructure.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1533445212", + "pr_number": 5270, + "pr_file": "kotlin-native/backend.native/tests/build.gradle", + "created_at": "2024-03-21T08:47:02+00:00", + "commented_code": "UtilsKt.dependsOnPlatformLibs(it)\n }\n\n standaloneTest(\"interop_objc_kt65260\") {\n source = \"interop/objc/kt65260.kt\"\n UtilsKt.dependsOnPlatformLibs(it)\n }", + "repo_full_name": "JetBrains/kotlin", + "discussion_comments": [ + { + "comment_id": "1533445212", + "repo_full_name": "JetBrains/kotlin", + "pr_number": 5270, + "pr_file": "kotlin-native/backend.native/tests/build.gradle", + "discussion_id": "1533445212", + "commented_code": "@@ -1103,6 +1103,11 @@ if (PlatformInfo.isAppleTarget(project)) {\n UtilsKt.dependsOnPlatformLibs(it)\n }\n \n+ standaloneTest(\"interop_objc_kt65260\") {\n+ source = \"interop/objc/kt65260.kt\"\n+ UtilsKt.dependsOnPlatformLibs(it)\n+ }", + "comment_created_at": "2024-03-21T08:47:02+00:00", + "comment_author": "SvyatoslavScherbina", + "comment_body": "This file is the legacy test infrastructure. We are currently migrating all tests from here.\r\nPlease add the new test there instead: https://github.com/JetBrains/kotlin/tree/master/native/native.tests/testData/codegen/cinterop/objc\r\nAfter creating a file, you will need to run `generateTests` Gradle task (or `Generate Tests` run configuration in IDEA). This will generate JUnit tests for the new testdata file.", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/kotlin-use-modern-test-infrastructure.md b/_reviewers/kotlin-use-modern-test-infrastructure.md index 6910b6f..4ff59a9 100644 --- a/_reviewers/kotlin-use-modern-test-infrastructure.md +++ b/_reviewers/kotlin-use-modern-test-infrastructure.md @@ -35,63 +35,3 @@ class NewFeatureTest : AbstractNativeTest() { ``` For some test types, you may need to run generation tasks after adding new test data files. - - -[ - { - "discussion_id": "1581150497", - "pr_number": 5281, - "pr_file": "kotlin-native/backend.native/tests/build.gradle", - "created_at": "2024-04-26T14:53:10+00:00", - "commented_code": "source = \"runtime/exceptions/throw_from_except_constr.kt\"\n}\n\nstandaloneTest(\"program_name\") {\n source = \"runtime/program_name/runtime_program_name.kt\"\n}", - "repo_full_name": "JetBrains/kotlin", - "discussion_comments": [ - { - "comment_id": "1581150497", - "repo_full_name": "JetBrains/kotlin", - "pr_number": 5281, - "pr_file": "kotlin-native/backend.native/tests/build.gradle", - "discussion_id": "1581150497", - "commented_code": "@@ -380,6 +380,10 @@ standaloneTest(\"throw_from_except_constr\") {\n source = \"runtime/exceptions/throw_from_except_constr.kt\"\n }\n \n+standaloneTest(\"program_name\") {\n+ source = \"runtime/program_name/runtime_program_name.kt\"\n+}", - "comment_created_at": "2024-04-26T14:53:10+00:00", - "comment_author": "SvyatoslavScherbina", - "comment_body": "This file represents the legacy test infrastructure that we are actively trying to get rid of.\r\n\r\nPlease create a proper test class in\r\nhttps://github.com/JetBrains/kotlin/tree/master/native/native.tests/tests/org/jetbrains/kotlin/konan/test/blackbox instead.", - "pr_file_module": null - }, - { - "comment_id": "1584875172", - "repo_full_name": "JetBrains/kotlin", - "pr_number": 5281, - "pr_file": "kotlin-native/backend.native/tests/build.gradle", - "discussion_id": "1581150497", - "commented_code": "@@ -380,6 +380,10 @@ standaloneTest(\"throw_from_except_constr\") {\n source = \"runtime/exceptions/throw_from_except_constr.kt\"\n }\n \n+standaloneTest(\"program_name\") {\n+ source = \"runtime/program_name/runtime_program_name.kt\"\n+}", - "comment_created_at": "2024-04-30T13:54:43+00:00", - "comment_author": "vonox7", - "comment_body": "I ported the testcase to the new infrastructure.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1533445212", - "pr_number": 5270, - "pr_file": "kotlin-native/backend.native/tests/build.gradle", - "created_at": "2024-03-21T08:47:02+00:00", - "commented_code": "UtilsKt.dependsOnPlatformLibs(it)\n }\n\n standaloneTest(\"interop_objc_kt65260\") {\n source = \"interop/objc/kt65260.kt\"\n UtilsKt.dependsOnPlatformLibs(it)\n }", - "repo_full_name": "JetBrains/kotlin", - "discussion_comments": [ - { - "comment_id": "1533445212", - "repo_full_name": "JetBrains/kotlin", - "pr_number": 5270, - "pr_file": "kotlin-native/backend.native/tests/build.gradle", - "discussion_id": "1533445212", - "commented_code": "@@ -1103,6 +1103,11 @@ if (PlatformInfo.isAppleTarget(project)) {\n UtilsKt.dependsOnPlatformLibs(it)\n }\n \n+ standaloneTest(\"interop_objc_kt65260\") {\n+ source = \"interop/objc/kt65260.kt\"\n+ UtilsKt.dependsOnPlatformLibs(it)\n+ }", - "comment_created_at": "2024-03-21T08:47:02+00:00", - "comment_author": "SvyatoslavScherbina", - "comment_body": "This file is the legacy test infrastructure. We are currently migrating all tests from here.\r\nPlease add the new test there instead: https://github.com/JetBrains/kotlin/tree/master/native/native.tests/testData/codegen/cinterop/objc\r\nAfter creating a file, you will need to run `generateTests` Gradle task (or `Generate Tests` run configuration in IDEA). This will generate JUnit tests for the new testdata file.", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/kubeflow-api-structure-balance.json b/_reviewers/kubeflow-api-structure-balance.json new file mode 100644 index 0000000..0a52765 --- /dev/null +++ b/_reviewers/kubeflow-api-structure-balance.json @@ -0,0 +1,70 @@ +[ + { + "discussion_id": "1135817898", + "pr_number": 7019, + "pr_file": "components/crud-web-apps/common/backend/kubeflow/kubeflow/crud_backend/api/isvc.py", + "created_at": "2023-03-14T16:16:13+00:00", + "commented_code": "from . import utils, events\n\n\ndef list_inference_service_events(name, namespace):", + "repo_full_name": "kubeflow/kubeflow", + "discussion_comments": [ + { + "comment_id": "1135817898", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 7019, + "pr_file": "components/crud-web-apps/common/backend/kubeflow/kubeflow/crud_backend/api/isvc.py", + "discussion_id": "1135817898", + "commented_code": "@@ -0,0 +1,8 @@\n+from . import utils, events\n+\n+\n+def list_inference_service_events(name, namespace):", + "comment_created_at": "2023-03-14T16:16:13+00:00", + "comment_author": "kimwnasptd", + "comment_body": "I'd propose to avoid introducing a handler for the InferenceServices in the common backend code.\r\n\r\nThis would mean we'd need to add more handlers for list/delete/get/logs as well, which as a result will make the MWA depend even more in this common code. I'd propose to instead keep on using the `custom_resource.py` file's handlers for the ISVC logic in the MWA", + "pr_file_module": null + }, + { + "comment_id": "1136863197", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 7019, + "pr_file": "components/crud-web-apps/common/backend/kubeflow/kubeflow/crud_backend/api/isvc.py", + "discussion_id": "1135817898", + "commented_code": "@@ -0,0 +1,8 @@\n+from . import utils, events\n+\n+\n+def list_inference_service_events(name, namespace):", + "comment_created_at": "2023-03-15T10:48:54+00:00", + "comment_author": "elenzio9", + "comment_body": "ACK! I've just removed that part and will add the changes to the follow-up PR, which will introduce the `EVENTS` tab, as they'll relate to the MWA backend code.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "467145237", + "pr_number": 5180, + "pr_file": "components/crud-web-apps/tensorboards/backend/app/utils.py", + "created_at": "2020-08-07T16:30:12+00:00", + "commented_code": "from kubeflow.kubeflow.crud_backend import helpers\n\n\ndef parse_tensorboard(tensorboard):\n \"\"\"\n Process the Tensorboard object and format it as the UI expects it.\n \"\"\"\n\n parsed_tensorboard = {\n \"name\": tensorboard['metadata']['name'],\n \"namespace\": tensorboard['metadata']['namespace'],\n \"logspath\": tensorboard['spec']['logspath'],\n \"age\": {", + "repo_full_name": "kubeflow/kubeflow", + "discussion_comments": [ + { + "comment_id": "467145237", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 5180, + "pr_file": "components/crud-web-apps/tensorboards/backend/app/utils.py", + "discussion_id": "467145237", + "commented_code": "@@ -0,0 +1,39 @@\n+from kubeflow.kubeflow.crud_backend import helpers\n+\n+\n+def parse_tensorboard(tensorboard):\n+ \"\"\"\n+ Process the Tensorboard object and format it as the UI expects it.\n+ \"\"\"\n+\n+ parsed_tensorboard = {\n+ \"name\": tensorboard['metadata']['name'],\n+ \"namespace\": tensorboard['metadata']['namespace'],\n+ \"logspath\": tensorboard['spec']['logspath'],\n+ \"age\": {", + "comment_created_at": "2020-08-07T16:30:12+00:00", + "comment_author": "kimwnasptd", + "comment_body": "Could you add a `get_age(k8s_object)` in our common [helpers.py](https://github.com/kubeflow/kubeflow/blob/e99b5e18697a15088abea12543bd4e3f180ff984/components/crud-web-apps/common/backend/kubeflow/kubeflow/crud_backend/helpers.py) file that would return a dictionary with the `uptime` and `timestamp` values? Since other web apps will want to use this convention, for returning age information to their frontends, it would be a good idea to add this logic into a common place.\r\n\r\nFor now it could only work with dict objects, like CRs, and later one we could extend it to work with class objects as well, such as PVCs for example.", + "pr_file_module": null + }, + { + "comment_id": "467804614", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 5180, + "pr_file": "components/crud-web-apps/tensorboards/backend/app/utils.py", + "discussion_id": "467145237", + "commented_code": "@@ -0,0 +1,39 @@\n+from kubeflow.kubeflow.crud_backend import helpers\n+\n+\n+def parse_tensorboard(tensorboard):\n+ \"\"\"\n+ Process the Tensorboard object and format it as the UI expects it.\n+ \"\"\"\n+\n+ parsed_tensorboard = {\n+ \"name\": tensorboard['metadata']['name'],\n+ \"namespace\": tensorboard['metadata']['namespace'],\n+ \"logspath\": tensorboard['spec']['logspath'],\n+ \"age\": {", + "comment_created_at": "2020-08-10T10:06:02+00:00", + "comment_author": "kandrio", + "comment_body": "Thanks for the comment. I totally agree, so I just added a new commit that adds `get_age(k8s_object)` to the common code.", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/kubeflow-api-structure-balance.md b/_reviewers/kubeflow-api-structure-balance.md index 17f1865..f41ce6f 100644 --- a/_reviewers/kubeflow-api-structure-balance.md +++ b/_reviewers/kubeflow-api-structure-balance.md @@ -35,75 +35,3 @@ def list_inference_services(namespace): ``` This approach prevents common code from accumulating resource-specific dependencies while still promoting reuse of utility functions that apply across multiple API endpoints. When designing APIs, always evaluate whether functionality belongs in shared utilities or should remain in resource-specific implementations. - - -[ - { - "discussion_id": "1135817898", - "pr_number": 7019, - "pr_file": "components/crud-web-apps/common/backend/kubeflow/kubeflow/crud_backend/api/isvc.py", - "created_at": "2023-03-14T16:16:13+00:00", - "commented_code": "from . import utils, events\n\n\ndef list_inference_service_events(name, namespace):", - "repo_full_name": "kubeflow/kubeflow", - "discussion_comments": [ - { - "comment_id": "1135817898", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 7019, - "pr_file": "components/crud-web-apps/common/backend/kubeflow/kubeflow/crud_backend/api/isvc.py", - "discussion_id": "1135817898", - "commented_code": "@@ -0,0 +1,8 @@\n+from . import utils, events\n+\n+\n+def list_inference_service_events(name, namespace):", - "comment_created_at": "2023-03-14T16:16:13+00:00", - "comment_author": "kimwnasptd", - "comment_body": "I'd propose to avoid introducing a handler for the InferenceServices in the common backend code.\r\n\r\nThis would mean we'd need to add more handlers for list/delete/get/logs as well, which as a result will make the MWA depend even more in this common code. I'd propose to instead keep on using the `custom_resource.py` file's handlers for the ISVC logic in the MWA", - "pr_file_module": null - }, - { - "comment_id": "1136863197", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 7019, - "pr_file": "components/crud-web-apps/common/backend/kubeflow/kubeflow/crud_backend/api/isvc.py", - "discussion_id": "1135817898", - "commented_code": "@@ -0,0 +1,8 @@\n+from . import utils, events\n+\n+\n+def list_inference_service_events(name, namespace):", - "comment_created_at": "2023-03-15T10:48:54+00:00", - "comment_author": "elenzio9", - "comment_body": "ACK! I've just removed that part and will add the changes to the follow-up PR, which will introduce the `EVENTS` tab, as they'll relate to the MWA backend code.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "467145237", - "pr_number": 5180, - "pr_file": "components/crud-web-apps/tensorboards/backend/app/utils.py", - "created_at": "2020-08-07T16:30:12+00:00", - "commented_code": "from kubeflow.kubeflow.crud_backend import helpers\n\n\ndef parse_tensorboard(tensorboard):\n \"\"\"\n Process the Tensorboard object and format it as the UI expects it.\n \"\"\"\n\n parsed_tensorboard = {\n \"name\": tensorboard['metadata']['name'],\n \"namespace\": tensorboard['metadata']['namespace'],\n \"logspath\": tensorboard['spec']['logspath'],\n \"age\": {", - "repo_full_name": "kubeflow/kubeflow", - "discussion_comments": [ - { - "comment_id": "467145237", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 5180, - "pr_file": "components/crud-web-apps/tensorboards/backend/app/utils.py", - "discussion_id": "467145237", - "commented_code": "@@ -0,0 +1,39 @@\n+from kubeflow.kubeflow.crud_backend import helpers\n+\n+\n+def parse_tensorboard(tensorboard):\n+ \"\"\"\n+ Process the Tensorboard object and format it as the UI expects it.\n+ \"\"\"\n+\n+ parsed_tensorboard = {\n+ \"name\": tensorboard['metadata']['name'],\n+ \"namespace\": tensorboard['metadata']['namespace'],\n+ \"logspath\": tensorboard['spec']['logspath'],\n+ \"age\": {", - "comment_created_at": "2020-08-07T16:30:12+00:00", - "comment_author": "kimwnasptd", - "comment_body": "Could you add a `get_age(k8s_object)` in our common [helpers.py](https://github.com/kubeflow/kubeflow/blob/e99b5e18697a15088abea12543bd4e3f180ff984/components/crud-web-apps/common/backend/kubeflow/kubeflow/crud_backend/helpers.py) file that would return a dictionary with the `uptime` and `timestamp` values? Since other web apps will want to use this convention, for returning age information to their frontends, it would be a good idea to add this logic into a common place.\r\n\r\nFor now it could only work with dict objects, like CRs, and later one we could extend it to work with class objects as well, such as PVCs for example.", - "pr_file_module": null - }, - { - "comment_id": "467804614", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 5180, - "pr_file": "components/crud-web-apps/tensorboards/backend/app/utils.py", - "discussion_id": "467145237", - "commented_code": "@@ -0,0 +1,39 @@\n+from kubeflow.kubeflow.crud_backend import helpers\n+\n+\n+def parse_tensorboard(tensorboard):\n+ \"\"\"\n+ Process the Tensorboard object and format it as the UI expects it.\n+ \"\"\"\n+\n+ parsed_tensorboard = {\n+ \"name\": tensorboard['metadata']['name'],\n+ \"namespace\": tensorboard['metadata']['namespace'],\n+ \"logspath\": tensorboard['spec']['logspath'],\n+ \"age\": {", - "comment_created_at": "2020-08-10T10:06:02+00:00", - "comment_author": "kandrio", - "comment_body": "Thanks for the comment. I totally agree, so I just added a new commit that adds `get_age(k8s_object)` to the common code.", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/kubeflow-automate-style-enforcement.json b/_reviewers/kubeflow-automate-style-enforcement.json new file mode 100644 index 0000000..ea3e997 --- /dev/null +++ b/_reviewers/kubeflow-automate-style-enforcement.json @@ -0,0 +1,58 @@ +[ + { + "discussion_id": "1039529505", + "pr_number": 6786, + "pr_file": ".github/workflows/centraldb_angular_backend_check.yaml", + "created_at": "2022-12-05T12:28:44+00:00", + "commented_code": "name: CentralDashboard-angular Backend check\non:\n pull_request:\n paths:\n - components/centraldashboard-angular/backend/**\n\njobs:\n run-backend-unittests:\n name: Unit tests\n runs-on: ubuntu-latest\n steps:\n - uses: actions/checkout@v3\n - uses: actions/setup-node@v3\n with:\n node-version: 12\n - name: Run unit tests\n run: |\n cd components/centraldashboard-angular/backend/\n npm i\n npm run test\n\n run-backend-tslint:", + "repo_full_name": "kubeflow/kubeflow", + "discussion_comments": [ + { + "comment_id": "1039529505", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 6786, + "pr_file": ".github/workflows/centraldb_angular_backend_check.yaml", + "discussion_id": "1039529505", + "commented_code": "@@ -0,0 +1,34 @@\n+name: CentralDashboard-angular Backend check\n+on:\n+ pull_request:\n+ paths:\n+ - components/centraldashboard-angular/backend/**\n+\n+jobs:\n+ run-backend-unittests:\n+ name: Unit tests\n+ runs-on: ubuntu-latest\n+ steps:\n+ - uses: actions/checkout@v3\n+ - uses: actions/setup-node@v3\n+ with:\n+ node-version: 12\n+ - name: Run unit tests\n+ run: |\n+ cd components/centraldashboard-angular/backend/\n+ npm i\n+ npm run test\n+\n+ run-backend-tslint:", + "comment_created_at": "2022-12-05T12:28:44+00:00", + "comment_author": "kimwnasptd", + "comment_body": "We should look into moving away from `tslint` in the backend as well, now that we've decoupled the frontend and backend code", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "618186426", + "pr_number": 5761, + "pr_file": "components/profile-controller/config/overlays/kubeflow/namespace-labels.yaml", + "created_at": "2021-04-22T08:21:05+00:00", + "commented_code": "# Below is a list of labels to be set by default.\n#\n# To add a namespace label, use `key: 'value'`, for example:\n# istio.io/rev: 'asm-191-1'\n#\n# To remove a namespace label, use `key: ''` to remove key, for example:\n# istio-injection: ''\n# \n# Profile controller will not replace value if namespace label already exists.\n# But profile controller keeps removing namespace label if removing is specified here.\n# In order to change this enforcement:\n# 1. If your profile already has this label:\n# First, remove this label by using `key: ''` and deploy.\n# Second, add this label by using `key: 'value'` and deploy.\n# 2. If your profile doesn't have this label:\n# you can add label and value to this file and deploy.\n# Reason:\n# Profile controller will enforce flag removal, but not enforce value replacement.\n# \nkatib-metricscollector-injection: 'enabled'\nserving.kubeflow.org/inferenceservice: 'enabled'\npipelines.kubeflow.org/enabled: 'true'\napp.kubernetes.io/part-of: 'kubeflow-profile'", + "repo_full_name": "kubeflow/kubeflow", + "discussion_comments": [ + { + "comment_id": "618186426", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 5761, + "pr_file": "components/profile-controller/config/overlays/kubeflow/namespace-labels.yaml", + "discussion_id": "618186426", + "commented_code": "@@ -0,0 +1,23 @@\n+# Below is a list of labels to be set by default.\n+#\n+# To add a namespace label, use `key: 'value'`, for example:\n+# istio.io/rev: 'asm-191-1'\n+#\n+# To remove a namespace label, use `key: ''` to remove key, for example:\n+# istio-injection: ''\n+# \n+# Profile controller will not replace value if namespace label already exists.\n+# But profile controller keeps removing namespace label if removing is specified here.\n+# In order to change this enforcement:\n+# 1. If your profile already has this label:\n+# First, remove this label by using `key: ''` and deploy.\n+# Second, add this label by using `key: 'value'` and deploy.\n+# 2. If your profile doesn't have this label:\n+# you can add label and value to this file and deploy.\n+# Reason:\n+# Profile controller will enforce flag removal, but not enforce value replacement.\n+# \n+katib-metricscollector-injection: 'enabled'\n+serving.kubeflow.org/inferenceservice: 'enabled'\n+pipelines.kubeflow.org/enabled: 'true'\n+app.kubernetes.io/part-of: 'kubeflow-profile'", + "comment_created_at": "2021-04-22T08:21:05+00:00", + "comment_author": "Bobgy", + "comment_body": "nit: generally, always add a newline at end of file. Same for other files\r\n\r\nYou can configure your IDE to automatically do that.", + "pr_file_module": null + }, + { + "comment_id": "618731511", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 5761, + "pr_file": "components/profile-controller/config/overlays/kubeflow/namespace-labels.yaml", + "discussion_id": "618186426", + "commented_code": "@@ -0,0 +1,23 @@\n+# Below is a list of labels to be set by default.\n+#\n+# To add a namespace label, use `key: 'value'`, for example:\n+# istio.io/rev: 'asm-191-1'\n+#\n+# To remove a namespace label, use `key: ''` to remove key, for example:\n+# istio-injection: ''\n+# \n+# Profile controller will not replace value if namespace label already exists.\n+# But profile controller keeps removing namespace label if removing is specified here.\n+# In order to change this enforcement:\n+# 1. If your profile already has this label:\n+# First, remove this label by using `key: ''` and deploy.\n+# Second, add this label by using `key: 'value'` and deploy.\n+# 2. If your profile doesn't have this label:\n+# you can add label and value to this file and deploy.\n+# Reason:\n+# Profile controller will enforce flag removal, but not enforce value replacement.\n+# \n+katib-metricscollector-injection: 'enabled'\n+serving.kubeflow.org/inferenceservice: 'enabled'\n+pipelines.kubeflow.org/enabled: 'true'\n+app.kubernetes.io/part-of: 'kubeflow-profile'", + "comment_created_at": "2021-04-22T20:53:58+00:00", + "comment_author": "zijianjoy", + "comment_body": "Thank you Yuan! I updated my IDE for keeping newline at the end of file.", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/kubeflow-automate-style-enforcement.md b/_reviewers/kubeflow-automate-style-enforcement.md index 19d91f8..df4115d 100644 --- a/_reviewers/kubeflow-automate-style-enforcement.md +++ b/_reviewers/kubeflow-automate-style-enforcement.md @@ -25,63 +25,3 @@ Example IDE configuration for VSCode (settings.json): ``` This approach reduces style-related comments in code reviews, ensures consistency across the codebase, and allows reviewers to focus on more substantive issues. - - -[ - { - "discussion_id": "1039529505", - "pr_number": 6786, - "pr_file": ".github/workflows/centraldb_angular_backend_check.yaml", - "created_at": "2022-12-05T12:28:44+00:00", - "commented_code": "name: CentralDashboard-angular Backend check\non:\n pull_request:\n paths:\n - components/centraldashboard-angular/backend/**\n\njobs:\n run-backend-unittests:\n name: Unit tests\n runs-on: ubuntu-latest\n steps:\n - uses: actions/checkout@v3\n - uses: actions/setup-node@v3\n with:\n node-version: 12\n - name: Run unit tests\n run: |\n cd components/centraldashboard-angular/backend/\n npm i\n npm run test\n\n run-backend-tslint:", - "repo_full_name": "kubeflow/kubeflow", - "discussion_comments": [ - { - "comment_id": "1039529505", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 6786, - "pr_file": ".github/workflows/centraldb_angular_backend_check.yaml", - "discussion_id": "1039529505", - "commented_code": "@@ -0,0 +1,34 @@\n+name: CentralDashboard-angular Backend check\n+on:\n+ pull_request:\n+ paths:\n+ - components/centraldashboard-angular/backend/**\n+\n+jobs:\n+ run-backend-unittests:\n+ name: Unit tests\n+ runs-on: ubuntu-latest\n+ steps:\n+ - uses: actions/checkout@v3\n+ - uses: actions/setup-node@v3\n+ with:\n+ node-version: 12\n+ - name: Run unit tests\n+ run: |\n+ cd components/centraldashboard-angular/backend/\n+ npm i\n+ npm run test\n+\n+ run-backend-tslint:", - "comment_created_at": "2022-12-05T12:28:44+00:00", - "comment_author": "kimwnasptd", - "comment_body": "We should look into moving away from `tslint` in the backend as well, now that we've decoupled the frontend and backend code", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "618186426", - "pr_number": 5761, - "pr_file": "components/profile-controller/config/overlays/kubeflow/namespace-labels.yaml", - "created_at": "2021-04-22T08:21:05+00:00", - "commented_code": "# Below is a list of labels to be set by default.\n#\n# To add a namespace label, use `key: 'value'`, for example:\n# istio.io/rev: 'asm-191-1'\n#\n# To remove a namespace label, use `key: ''` to remove key, for example:\n# istio-injection: ''\n# \n# Profile controller will not replace value if namespace label already exists.\n# But profile controller keeps removing namespace label if removing is specified here.\n# In order to change this enforcement:\n# 1. If your profile already has this label:\n# First, remove this label by using `key: ''` and deploy.\n# Second, add this label by using `key: 'value'` and deploy.\n# 2. If your profile doesn't have this label:\n# you can add label and value to this file and deploy.\n# Reason:\n# Profile controller will enforce flag removal, but not enforce value replacement.\n# \nkatib-metricscollector-injection: 'enabled'\nserving.kubeflow.org/inferenceservice: 'enabled'\npipelines.kubeflow.org/enabled: 'true'\napp.kubernetes.io/part-of: 'kubeflow-profile'", - "repo_full_name": "kubeflow/kubeflow", - "discussion_comments": [ - { - "comment_id": "618186426", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 5761, - "pr_file": "components/profile-controller/config/overlays/kubeflow/namespace-labels.yaml", - "discussion_id": "618186426", - "commented_code": "@@ -0,0 +1,23 @@\n+# Below is a list of labels to be set by default.\n+#\n+# To add a namespace label, use `key: 'value'`, for example:\n+# istio.io/rev: 'asm-191-1'\n+#\n+# To remove a namespace label, use `key: ''` to remove key, for example:\n+# istio-injection: ''\n+# \n+# Profile controller will not replace value if namespace label already exists.\n+# But profile controller keeps removing namespace label if removing is specified here.\n+# In order to change this enforcement:\n+# 1. If your profile already has this label:\n+# First, remove this label by using `key: ''` and deploy.\n+# Second, add this label by using `key: 'value'` and deploy.\n+# 2. If your profile doesn't have this label:\n+# you can add label and value to this file and deploy.\n+# Reason:\n+# Profile controller will enforce flag removal, but not enforce value replacement.\n+# \n+katib-metricscollector-injection: 'enabled'\n+serving.kubeflow.org/inferenceservice: 'enabled'\n+pipelines.kubeflow.org/enabled: 'true'\n+app.kubernetes.io/part-of: 'kubeflow-profile'", - "comment_created_at": "2021-04-22T08:21:05+00:00", - "comment_author": "Bobgy", - "comment_body": "nit: generally, always add a newline at end of file. Same for other files\r\n\r\nYou can configure your IDE to automatically do that.", - "pr_file_module": null - }, - { - "comment_id": "618731511", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 5761, - "pr_file": "components/profile-controller/config/overlays/kubeflow/namespace-labels.yaml", - "discussion_id": "618186426", - "commented_code": "@@ -0,0 +1,23 @@\n+# Below is a list of labels to be set by default.\n+#\n+# To add a namespace label, use `key: 'value'`, for example:\n+# istio.io/rev: 'asm-191-1'\n+#\n+# To remove a namespace label, use `key: ''` to remove key, for example:\n+# istio-injection: ''\n+# \n+# Profile controller will not replace value if namespace label already exists.\n+# But profile controller keeps removing namespace label if removing is specified here.\n+# In order to change this enforcement:\n+# 1. If your profile already has this label:\n+# First, remove this label by using `key: ''` and deploy.\n+# Second, add this label by using `key: 'value'` and deploy.\n+# 2. If your profile doesn't have this label:\n+# you can add label and value to this file and deploy.\n+# Reason:\n+# Profile controller will enforce flag removal, but not enforce value replacement.\n+# \n+katib-metricscollector-injection: 'enabled'\n+serving.kubeflow.org/inferenceservice: 'enabled'\n+pipelines.kubeflow.org/enabled: 'true'\n+app.kubernetes.io/part-of: 'kubeflow-profile'", - "comment_created_at": "2021-04-22T20:53:58+00:00", - "comment_author": "zijianjoy", - "comment_body": "Thank you Yuan! I updated my IDE for keeping newline at the end of file.", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/kubeflow-centralize-configuration-constants.json b/_reviewers/kubeflow-centralize-configuration-constants.json new file mode 100644 index 0000000..f40eb20 --- /dev/null +++ b/_reviewers/kubeflow-centralize-configuration-constants.json @@ -0,0 +1,186 @@ +[ + { + "discussion_id": "1740935864", + "pr_number": 7634, + "pr_file": "components/crud-web-apps/volumes/backend/entrypoint.py", + "created_at": "2024-09-02T13:33:53+00:00", + "commented_code": "log = logging.getLogger(__name__)\n\n\ndef get_config(mode):\n \"\"\"Return a config based on the selected mode.\"\"\"\n config_classes = {\n config.BackendMode.DEVELOPMENT.value: config.DevConfig,\n config.BackendMode.DEVELOPMENT_FULL.value: config.DevConfig,\n config.BackendMode.PRODUCTION.value: config.ProdConfig,\n config.BackendMode.PRODUCTION_FULL.value: config.ProdConfig,\n }\n cfg_class = config_classes.get(mode)\n if not cfg_class:\n raise RuntimeError(\"Backend mode '%s' is not implemented. Choose one\"\n \" of %s\" % (mode, list(config_classes.keys())))\n return cfg_class()\n\n\nAPP_NAME = os.environ.get(\"APP_NAME\", \"Volumes Web App\")\nBACKEND_MODE = os.environ.get(\"BACKEND_MODE\",\n config.BackendMode.PRODUCTION.value)\nUI_FLAVOR = os.environ.get(\"UI_FLAVOR\", \"default\")\nPREFIX = os.environ.get(\"APP_PREFIX\", \"/\")\nMETRICS = bool(os.environ.get(\"METRICS\", False))", + "repo_full_name": "kubeflow/kubeflow", + "discussion_comments": [ + { + "comment_id": "1740935864", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 7634, + "pr_file": "components/crud-web-apps/volumes/backend/entrypoint.py", + "discussion_id": "1740935864", + "commented_code": "@@ -7,29 +7,16 @@\n log = logging.getLogger(__name__)\n \n \n-def get_config(mode):\n- \"\"\"Return a config based on the selected mode.\"\"\"\n- config_classes = {\n- config.BackendMode.DEVELOPMENT.value: config.DevConfig,\n- config.BackendMode.DEVELOPMENT_FULL.value: config.DevConfig,\n- config.BackendMode.PRODUCTION.value: config.ProdConfig,\n- config.BackendMode.PRODUCTION_FULL.value: config.ProdConfig,\n- }\n- cfg_class = config_classes.get(mode)\n- if not cfg_class:\n- raise RuntimeError(\"Backend mode '%s' is not implemented. Choose one\"\n- \" of %s\" % (mode, list(config_classes.keys())))\n- return cfg_class()\n-\n-\n APP_NAME = os.environ.get(\"APP_NAME\", \"Volumes Web App\")\n BACKEND_MODE = os.environ.get(\"BACKEND_MODE\",\n config.BackendMode.PRODUCTION.value)\n UI_FLAVOR = os.environ.get(\"UI_FLAVOR\", \"default\")\n PREFIX = os.environ.get(\"APP_PREFIX\", \"/\")\n+METRICS = bool(os.environ.get(\"METRICS\", False))", + "comment_created_at": "2024-09-02T13:33:53+00:00", + "comment_author": "orfeas-k", + "comment_body": "Do we need to set a default value both in each specific app and in the config? Maybe we should do it like `backend_mode`\r\n```\r\nBACKEND_MODE = os.environ.get(\"BACKEND_MODE\",\r\n config.BackendMode.PRODUCTION.value)\r\n```\r\n\r\n", + "pr_file_module": null + }, + { + "comment_id": "1741232798", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 7634, + "pr_file": "components/crud-web-apps/volumes/backend/entrypoint.py", + "discussion_id": "1740935864", + "commented_code": "@@ -7,29 +7,16 @@\n log = logging.getLogger(__name__)\n \n \n-def get_config(mode):\n- \"\"\"Return a config based on the selected mode.\"\"\"\n- config_classes = {\n- config.BackendMode.DEVELOPMENT.value: config.DevConfig,\n- config.BackendMode.DEVELOPMENT_FULL.value: config.DevConfig,\n- config.BackendMode.PRODUCTION.value: config.ProdConfig,\n- config.BackendMode.PRODUCTION_FULL.value: config.ProdConfig,\n- }\n- cfg_class = config_classes.get(mode)\n- if not cfg_class:\n- raise RuntimeError(\"Backend mode '%s' is not implemented. Choose one\"\n- \" of %s\" % (mode, list(config_classes.keys())))\n- return cfg_class()\n-\n-\n APP_NAME = os.environ.get(\"APP_NAME\", \"Volumes Web App\")\n BACKEND_MODE = os.environ.get(\"BACKEND_MODE\",\n config.BackendMode.PRODUCTION.value)\n UI_FLAVOR = os.environ.get(\"UI_FLAVOR\", \"default\")\n PREFIX = os.environ.get(\"APP_PREFIX\", \"/\")\n+METRICS = bool(os.environ.get(\"METRICS\", False))", + "comment_created_at": "2024-09-02T20:20:00+00:00", + "comment_author": "rgildein", + "comment_body": "Actually I moved the `self.METRICS = bool(os.environ.get(\"METRICS\", True))` to config __init__, since the logic is for all apps sames (check if env is set to 1, true, ...). WDYT about it?", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "588562507", + "pr_number": 5663, + "pr_file": "py/kubeflow/kubeflow/cd/jwa.py", + "created_at": "2021-03-05T18:30:21+00:00", + "commented_code": "\"\"\"\"Argo Workflow for building Jupyter web app's OCI image using Kaniko\"\"\"\nimport os\n\nfrom kubeflow.kubeflow import ci\nfrom kubeflow.testing import argo_build_util\n\nTAG = os.getenv(\"PULL_BASE_SHA\", \"kaniko-local-test\")\nAWS_REGISTRY = \"public.ecr.aws/j1r0q0g6/jupyter-web-app\"", + "repo_full_name": "kubeflow/kubeflow", + "discussion_comments": [ + { + "comment_id": "588562507", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 5663, + "pr_file": "py/kubeflow/kubeflow/cd/jwa.py", + "discussion_id": "588562507", + "commented_code": "@@ -0,0 +1,48 @@\n+\"\"\"\"Argo Workflow for building Jupyter web app's OCI image using Kaniko\"\"\"\n+import os\n+\n+from kubeflow.kubeflow import ci\n+from kubeflow.testing import argo_build_util\n+\n+TAG = os.getenv(\"PULL_BASE_SHA\", \"kaniko-local-test\")\n+AWS_REGISTRY = \"public.ecr.aws/j1r0q0g6/jupyter-web-app\"", + "comment_created_at": "2021-03-05T18:30:21+00:00", + "comment_author": "PatrickXYS", + "comment_body": "NIT: since WG-notebooks have multiple ECR registries (7 now, maybe more in the future), I would recommend you maintain a static configmap or mapping in other python files.\r\n\r\nE.g, py/kubeflow/kubeflow/cd/config.py", + "pr_file_module": null + }, + { + "comment_id": "588581750", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 5663, + "pr_file": "py/kubeflow/kubeflow/cd/jwa.py", + "discussion_id": "588562507", + "commented_code": "@@ -0,0 +1,48 @@\n+\"\"\"\"Argo Workflow for building Jupyter web app's OCI image using Kaniko\"\"\"\n+import os\n+\n+from kubeflow.kubeflow import ci\n+from kubeflow.testing import argo_build_util\n+\n+TAG = os.getenv(\"PULL_BASE_SHA\", \"kaniko-local-test\")\n+AWS_REGISTRY = \"public.ecr.aws/j1r0q0g6/jupyter-web-app\"", + "comment_created_at": "2021-03-05T18:51:30+00:00", + "comment_author": "kimwnasptd", + "comment_body": "That's a good idea, it will help us have all the names organized in one place.\r\nAdding it", + "pr_file_module": null + }, + { + "comment_id": "588603635", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 5663, + "pr_file": "py/kubeflow/kubeflow/cd/jwa.py", + "discussion_id": "588562507", + "commented_code": "@@ -0,0 +1,48 @@\n+\"\"\"\"Argo Workflow for building Jupyter web app's OCI image using Kaniko\"\"\"\n+import os\n+\n+from kubeflow.kubeflow import ci\n+from kubeflow.testing import argo_build_util\n+\n+TAG = os.getenv(\"PULL_BASE_SHA\", \"kaniko-local-test\")\n+AWS_REGISTRY = \"public.ecr.aws/j1r0q0g6/jupyter-web-app\"", + "comment_created_at": "2021-03-05T19:14:27+00:00", + "comment_author": "kimwnasptd", + "comment_body": "@PatrickXYS I added another commit that introduces this `config.py` file.\r\nPTAL and if you are OK I can resolve this conversation", + "pr_file_module": null + }, + { + "comment_id": "588604805", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 5663, + "pr_file": "py/kubeflow/kubeflow/cd/jwa.py", + "discussion_id": "588562507", + "commented_code": "@@ -0,0 +1,48 @@\n+\"\"\"\"Argo Workflow for building Jupyter web app's OCI image using Kaniko\"\"\"\n+import os\n+\n+from kubeflow.kubeflow import ci\n+from kubeflow.testing import argo_build_util\n+\n+TAG = os.getenv(\"PULL_BASE_SHA\", \"kaniko-local-test\")\n+AWS_REGISTRY = \"public.ecr.aws/j1r0q0g6/jupyter-web-app\"", + "comment_created_at": "2021-03-05T19:15:40+00:00", + "comment_author": "PatrickXYS", + "comment_body": "LGTM feel free to resolve", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "570412551", + "pr_number": 5577, + "pr_file": "py/kubeflow/kubeflow/ci/workflow_utils.py", + "created_at": "2021-02-04T17:32:50+00:00", + "commented_code": "import logging\nimport os\n\nfrom kubeflow.testing import argo_build_util\n\n# The name of the NFS volume claim to use for test files.\nNFS_VOLUME_CLAIM = \"nfs-external\"\n# The name to use for the volume to use to contain test data\nDATA_VOLUME = \"kubeflow-test-volume\"\n\nE2E_DAG_NAME = \"e2e\"\nEXIT_DAG_NAME = \"exit-handler\"\n\n\nLOCAL_TESTING = os.getenv(\"LOCAL_TESTING\", \"False\")\nCREDENTIALS_VOLUME = {\"name\": \"aws-secret\",\n \"secret\": {\"secretName\": \"aws-secret\"}}\nCREDENTIALS_MOUNT = {\"mountPath\": \"/root/.aws/\",\n \"name\": \"aws-secret\"}\n\nAWS_WORKER_IMAGE = (\"527798164940.dkr.ecr.us-west-2.amazonaws.com/\"\n \"aws-kubeflow-ci/test-worker:latest\")\nPUBLIC_WORKER_IMAGE = \"gcr.io/kubeflow-ci/test-worker:latest\"\n\n\nclass ArgoTestBuilder:\n def __init__(self, name=None, namespace=None, bucket=None,\n test_target_name=None,\n **kwargs):\n self.name = name\n self.namespace = namespace\n self.bucket = bucket\n self.template_label = \"argo_test\"\n self.test_target_name = test_target_name\n self.mkdir_task_name = \"make-artifacts-dir\"\n\n # *********************************************************************\n #\n # Define directory locations\n #\n # *********************************************************************\n\n # mount_path is the directory where the volume to store the test data\n # should be mounted.\n self.mount_path = \"/mnt/\" + \"test-data-volume\"\n # test_dir is the root directory for all data for a particular test\n # run.\n self.test_dir = self.mount_path + \"/\" + self.name\n # output_dir is the directory to sync to GCS to contain the output for\n # this job.\n self.output_dir = self.test_dir + \"/output\"\n\n self.artifacts_dir = \"%s/artifacts/junit_%s\" % (self.output_dir, name)\n\n # source directory where all repos should be checked out\n self.src_root_dir = \"%s/src\" % self.test_dir\n # The directory containing the kubeflow/kubeflow repo\n self.src_dir = \"%s/kubeflow/kubeflow\" % self.src_root_dir\n\n # Root of testing repo.\n self.testing_src_dir = os.path.join(self.src_root_dir,\n \"kubeflow/testing\")\n\n # Top level directories for python code\n self.kubeflow_py = self.src_dir\n\n # The directory within the kubeflow_testing submodule containing\n # py scripts to use.\n self.kubeflow_testing_py = \"%s/kubeflow/testing/py\" % self.src_root_dir\n\n self.go_path = self.test_dir\n\n def _build_workflow(self):\n \"\"\"Create a scaffolding CR for the Argo workflow\"\"\"\n volumes = [{\n \"name\": DATA_VOLUME,\n \"persistentVolumeClaim\": {\n \"claimName\": NFS_VOLUME_CLAIM\n },\n }]\n if LOCAL_TESTING == \"False\":\n volumes.append(CREDENTIALS_VOLUME)\n\n workflow = {\n \"apiVersion\": \"argoproj.io/v1alpha1\",\n \"kind\": \"Workflow\",\n \"metadata\": {\n \"name\": self.name,\n \"namespace\": self.namespace,\n \"labels\": argo_build_util.add_dicts([\n {\n \"workflow\": self.name,\n \"workflow_template\": self.template_label,\n },\n argo_build_util.get_prow_labels()\n ]),\n },\n \"spec\": {\n \"entrypoint\": E2E_DAG_NAME,\n # Have argo garbage collect old workflows otherwise we overload\n # the API server.\n \"ttlSecondsAfterFinished\": 7 * 24 * 60 * 60,", + "repo_full_name": "kubeflow/kubeflow", + "discussion_comments": [ + { + "comment_id": "570412551", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 5577, + "pr_file": "py/kubeflow/kubeflow/ci/workflow_utils.py", + "discussion_id": "570412551", + "commented_code": "@@ -0,0 +1,249 @@\n+import logging\n+import os\n+\n+from kubeflow.testing import argo_build_util\n+\n+# The name of the NFS volume claim to use for test files.\n+NFS_VOLUME_CLAIM = \"nfs-external\"\n+# The name to use for the volume to use to contain test data\n+DATA_VOLUME = \"kubeflow-test-volume\"\n+\n+E2E_DAG_NAME = \"e2e\"\n+EXIT_DAG_NAME = \"exit-handler\"\n+\n+\n+LOCAL_TESTING = os.getenv(\"LOCAL_TESTING\", \"False\")\n+CREDENTIALS_VOLUME = {\"name\": \"aws-secret\",\n+ \"secret\": {\"secretName\": \"aws-secret\"}}\n+CREDENTIALS_MOUNT = {\"mountPath\": \"/root/.aws/\",\n+ \"name\": \"aws-secret\"}\n+\n+AWS_WORKER_IMAGE = (\"527798164940.dkr.ecr.us-west-2.amazonaws.com/\"\n+ \"aws-kubeflow-ci/test-worker:latest\")\n+PUBLIC_WORKER_IMAGE = \"gcr.io/kubeflow-ci/test-worker:latest\"\n+\n+\n+class ArgoTestBuilder:\n+ def __init__(self, name=None, namespace=None, bucket=None,\n+ test_target_name=None,\n+ **kwargs):\n+ self.name = name\n+ self.namespace = namespace\n+ self.bucket = bucket\n+ self.template_label = \"argo_test\"\n+ self.test_target_name = test_target_name\n+ self.mkdir_task_name = \"make-artifacts-dir\"\n+\n+ # *********************************************************************\n+ #\n+ # Define directory locations\n+ #\n+ # *********************************************************************\n+\n+ # mount_path is the directory where the volume to store the test data\n+ # should be mounted.\n+ self.mount_path = \"/mnt/\" + \"test-data-volume\"\n+ # test_dir is the root directory for all data for a particular test\n+ # run.\n+ self.test_dir = self.mount_path + \"/\" + self.name\n+ # output_dir is the directory to sync to GCS to contain the output for\n+ # this job.\n+ self.output_dir = self.test_dir + \"/output\"\n+\n+ self.artifacts_dir = \"%s/artifacts/junit_%s\" % (self.output_dir, name)\n+\n+ # source directory where all repos should be checked out\n+ self.src_root_dir = \"%s/src\" % self.test_dir\n+ # The directory containing the kubeflow/kubeflow repo\n+ self.src_dir = \"%s/kubeflow/kubeflow\" % self.src_root_dir\n+\n+ # Root of testing repo.\n+ self.testing_src_dir = os.path.join(self.src_root_dir,\n+ \"kubeflow/testing\")\n+\n+ # Top level directories for python code\n+ self.kubeflow_py = self.src_dir\n+\n+ # The directory within the kubeflow_testing submodule containing\n+ # py scripts to use.\n+ self.kubeflow_testing_py = \"%s/kubeflow/testing/py\" % self.src_root_dir\n+\n+ self.go_path = self.test_dir\n+\n+ def _build_workflow(self):\n+ \"\"\"Create a scaffolding CR for the Argo workflow\"\"\"\n+ volumes = [{\n+ \"name\": DATA_VOLUME,\n+ \"persistentVolumeClaim\": {\n+ \"claimName\": NFS_VOLUME_CLAIM\n+ },\n+ }]\n+ if LOCAL_TESTING == \"False\":\n+ volumes.append(CREDENTIALS_VOLUME)\n+\n+ workflow = {\n+ \"apiVersion\": \"argoproj.io/v1alpha1\",\n+ \"kind\": \"Workflow\",\n+ \"metadata\": {\n+ \"name\": self.name,\n+ \"namespace\": self.namespace,\n+ \"labels\": argo_build_util.add_dicts([\n+ {\n+ \"workflow\": self.name,\n+ \"workflow_template\": self.template_label,\n+ },\n+ argo_build_util.get_prow_labels()\n+ ]),\n+ },\n+ \"spec\": {\n+ \"entrypoint\": E2E_DAG_NAME,\n+ # Have argo garbage collect old workflows otherwise we overload\n+ # the API server.\n+ \"ttlSecondsAfterFinished\": 7 * 24 * 60 * 60,", + "comment_created_at": "2021-02-04T17:32:50+00:00", + "comment_author": "PatrickXYS", + "comment_body": "In new argo cluster, we installed v2.12.3 argo workflow, this parameter has been deprecated. \r\n\r\nhttps://github.com/argoproj/argo/blob/master/examples/gc-ttl.yaml\r\n\r\nYou can instead use ttlStrategy", + "pr_file_module": null + }, + { + "comment_id": "570425003", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 5577, + "pr_file": "py/kubeflow/kubeflow/ci/workflow_utils.py", + "discussion_id": "570412551", + "commented_code": "@@ -0,0 +1,249 @@\n+import logging\n+import os\n+\n+from kubeflow.testing import argo_build_util\n+\n+# The name of the NFS volume claim to use for test files.\n+NFS_VOLUME_CLAIM = \"nfs-external\"\n+# The name to use for the volume to use to contain test data\n+DATA_VOLUME = \"kubeflow-test-volume\"\n+\n+E2E_DAG_NAME = \"e2e\"\n+EXIT_DAG_NAME = \"exit-handler\"\n+\n+\n+LOCAL_TESTING = os.getenv(\"LOCAL_TESTING\", \"False\")\n+CREDENTIALS_VOLUME = {\"name\": \"aws-secret\",\n+ \"secret\": {\"secretName\": \"aws-secret\"}}\n+CREDENTIALS_MOUNT = {\"mountPath\": \"/root/.aws/\",\n+ \"name\": \"aws-secret\"}\n+\n+AWS_WORKER_IMAGE = (\"527798164940.dkr.ecr.us-west-2.amazonaws.com/\"\n+ \"aws-kubeflow-ci/test-worker:latest\")\n+PUBLIC_WORKER_IMAGE = \"gcr.io/kubeflow-ci/test-worker:latest\"\n+\n+\n+class ArgoTestBuilder:\n+ def __init__(self, name=None, namespace=None, bucket=None,\n+ test_target_name=None,\n+ **kwargs):\n+ self.name = name\n+ self.namespace = namespace\n+ self.bucket = bucket\n+ self.template_label = \"argo_test\"\n+ self.test_target_name = test_target_name\n+ self.mkdir_task_name = \"make-artifacts-dir\"\n+\n+ # *********************************************************************\n+ #\n+ # Define directory locations\n+ #\n+ # *********************************************************************\n+\n+ # mount_path is the directory where the volume to store the test data\n+ # should be mounted.\n+ self.mount_path = \"/mnt/\" + \"test-data-volume\"\n+ # test_dir is the root directory for all data for a particular test\n+ # run.\n+ self.test_dir = self.mount_path + \"/\" + self.name\n+ # output_dir is the directory to sync to GCS to contain the output for\n+ # this job.\n+ self.output_dir = self.test_dir + \"/output\"\n+\n+ self.artifacts_dir = \"%s/artifacts/junit_%s\" % (self.output_dir, name)\n+\n+ # source directory where all repos should be checked out\n+ self.src_root_dir = \"%s/src\" % self.test_dir\n+ # The directory containing the kubeflow/kubeflow repo\n+ self.src_dir = \"%s/kubeflow/kubeflow\" % self.src_root_dir\n+\n+ # Root of testing repo.\n+ self.testing_src_dir = os.path.join(self.src_root_dir,\n+ \"kubeflow/testing\")\n+\n+ # Top level directories for python code\n+ self.kubeflow_py = self.src_dir\n+\n+ # The directory within the kubeflow_testing submodule containing\n+ # py scripts to use.\n+ self.kubeflow_testing_py = \"%s/kubeflow/testing/py\" % self.src_root_dir\n+\n+ self.go_path = self.test_dir\n+\n+ def _build_workflow(self):\n+ \"\"\"Create a scaffolding CR for the Argo workflow\"\"\"\n+ volumes = [{\n+ \"name\": DATA_VOLUME,\n+ \"persistentVolumeClaim\": {\n+ \"claimName\": NFS_VOLUME_CLAIM\n+ },\n+ }]\n+ if LOCAL_TESTING == \"False\":\n+ volumes.append(CREDENTIALS_VOLUME)\n+\n+ workflow = {\n+ \"apiVersion\": \"argoproj.io/v1alpha1\",\n+ \"kind\": \"Workflow\",\n+ \"metadata\": {\n+ \"name\": self.name,\n+ \"namespace\": self.namespace,\n+ \"labels\": argo_build_util.add_dicts([\n+ {\n+ \"workflow\": self.name,\n+ \"workflow_template\": self.template_label,\n+ },\n+ argo_build_util.get_prow_labels()\n+ ]),\n+ },\n+ \"spec\": {\n+ \"entrypoint\": E2E_DAG_NAME,\n+ # Have argo garbage collect old workflows otherwise we overload\n+ # the API server.\n+ \"ttlSecondsAfterFinished\": 7 * 24 * 60 * 60,", + "comment_created_at": "2021-02-04T17:50:25+00:00", + "comment_author": "PatrickXYS", + "comment_body": "Or make it simpler, you don't need to set it up, we have already configured default workflow spec \r\n\r\n```\r\nconfigmap -o yaml\r\napiVersion: v1\r\ndata:\r\n workflowDefaults: |\r\n metadata:\r\n annotations:\r\n argo: workflows\r\n spec:\r\n ttlStrategy:\r\n secondsAfterSuccess: 604800\r\n secondsAfterCompletion: 604800\r\n secondsAfterFailure: 604800\r\n activeDeadlineSeconds: 86400\r\n...\r\n```", + "pr_file_module": null + }, + { + "comment_id": "570511192", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 5577, + "pr_file": "py/kubeflow/kubeflow/ci/workflow_utils.py", + "discussion_id": "570412551", + "commented_code": "@@ -0,0 +1,249 @@\n+import logging\n+import os\n+\n+from kubeflow.testing import argo_build_util\n+\n+# The name of the NFS volume claim to use for test files.\n+NFS_VOLUME_CLAIM = \"nfs-external\"\n+# The name to use for the volume to use to contain test data\n+DATA_VOLUME = \"kubeflow-test-volume\"\n+\n+E2E_DAG_NAME = \"e2e\"\n+EXIT_DAG_NAME = \"exit-handler\"\n+\n+\n+LOCAL_TESTING = os.getenv(\"LOCAL_TESTING\", \"False\")\n+CREDENTIALS_VOLUME = {\"name\": \"aws-secret\",\n+ \"secret\": {\"secretName\": \"aws-secret\"}}\n+CREDENTIALS_MOUNT = {\"mountPath\": \"/root/.aws/\",\n+ \"name\": \"aws-secret\"}\n+\n+AWS_WORKER_IMAGE = (\"527798164940.dkr.ecr.us-west-2.amazonaws.com/\"\n+ \"aws-kubeflow-ci/test-worker:latest\")\n+PUBLIC_WORKER_IMAGE = \"gcr.io/kubeflow-ci/test-worker:latest\"\n+\n+\n+class ArgoTestBuilder:\n+ def __init__(self, name=None, namespace=None, bucket=None,\n+ test_target_name=None,\n+ **kwargs):\n+ self.name = name\n+ self.namespace = namespace\n+ self.bucket = bucket\n+ self.template_label = \"argo_test\"\n+ self.test_target_name = test_target_name\n+ self.mkdir_task_name = \"make-artifacts-dir\"\n+\n+ # *********************************************************************\n+ #\n+ # Define directory locations\n+ #\n+ # *********************************************************************\n+\n+ # mount_path is the directory where the volume to store the test data\n+ # should be mounted.\n+ self.mount_path = \"/mnt/\" + \"test-data-volume\"\n+ # test_dir is the root directory for all data for a particular test\n+ # run.\n+ self.test_dir = self.mount_path + \"/\" + self.name\n+ # output_dir is the directory to sync to GCS to contain the output for\n+ # this job.\n+ self.output_dir = self.test_dir + \"/output\"\n+\n+ self.artifacts_dir = \"%s/artifacts/junit_%s\" % (self.output_dir, name)\n+\n+ # source directory where all repos should be checked out\n+ self.src_root_dir = \"%s/src\" % self.test_dir\n+ # The directory containing the kubeflow/kubeflow repo\n+ self.src_dir = \"%s/kubeflow/kubeflow\" % self.src_root_dir\n+\n+ # Root of testing repo.\n+ self.testing_src_dir = os.path.join(self.src_root_dir,\n+ \"kubeflow/testing\")\n+\n+ # Top level directories for python code\n+ self.kubeflow_py = self.src_dir\n+\n+ # The directory within the kubeflow_testing submodule containing\n+ # py scripts to use.\n+ self.kubeflow_testing_py = \"%s/kubeflow/testing/py\" % self.src_root_dir\n+\n+ self.go_path = self.test_dir\n+\n+ def _build_workflow(self):\n+ \"\"\"Create a scaffolding CR for the Argo workflow\"\"\"\n+ volumes = [{\n+ \"name\": DATA_VOLUME,\n+ \"persistentVolumeClaim\": {\n+ \"claimName\": NFS_VOLUME_CLAIM\n+ },\n+ }]\n+ if LOCAL_TESTING == \"False\":\n+ volumes.append(CREDENTIALS_VOLUME)\n+\n+ workflow = {\n+ \"apiVersion\": \"argoproj.io/v1alpha1\",\n+ \"kind\": \"Workflow\",\n+ \"metadata\": {\n+ \"name\": self.name,\n+ \"namespace\": self.namespace,\n+ \"labels\": argo_build_util.add_dicts([\n+ {\n+ \"workflow\": self.name,\n+ \"workflow_template\": self.template_label,\n+ },\n+ argo_build_util.get_prow_labels()\n+ ]),\n+ },\n+ \"spec\": {\n+ \"entrypoint\": E2E_DAG_NAME,\n+ # Have argo garbage collect old workflows otherwise we overload\n+ # the API server.\n+ \"ttlSecondsAfterFinished\": 7 * 24 * 60 * 60,", + "comment_created_at": "2021-02-04T20:08:03+00:00", + "comment_author": "kimwnasptd", + "comment_body": "OK, I'll just omit this field entirely. There no issue by completely omitting the field and the configmap's value will be used right?", + "pr_file_module": null + }, + { + "comment_id": "570516493", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 5577, + "pr_file": "py/kubeflow/kubeflow/ci/workflow_utils.py", + "discussion_id": "570412551", + "commented_code": "@@ -0,0 +1,249 @@\n+import logging\n+import os\n+\n+from kubeflow.testing import argo_build_util\n+\n+# The name of the NFS volume claim to use for test files.\n+NFS_VOLUME_CLAIM = \"nfs-external\"\n+# The name to use for the volume to use to contain test data\n+DATA_VOLUME = \"kubeflow-test-volume\"\n+\n+E2E_DAG_NAME = \"e2e\"\n+EXIT_DAG_NAME = \"exit-handler\"\n+\n+\n+LOCAL_TESTING = os.getenv(\"LOCAL_TESTING\", \"False\")\n+CREDENTIALS_VOLUME = {\"name\": \"aws-secret\",\n+ \"secret\": {\"secretName\": \"aws-secret\"}}\n+CREDENTIALS_MOUNT = {\"mountPath\": \"/root/.aws/\",\n+ \"name\": \"aws-secret\"}\n+\n+AWS_WORKER_IMAGE = (\"527798164940.dkr.ecr.us-west-2.amazonaws.com/\"\n+ \"aws-kubeflow-ci/test-worker:latest\")\n+PUBLIC_WORKER_IMAGE = \"gcr.io/kubeflow-ci/test-worker:latest\"\n+\n+\n+class ArgoTestBuilder:\n+ def __init__(self, name=None, namespace=None, bucket=None,\n+ test_target_name=None,\n+ **kwargs):\n+ self.name = name\n+ self.namespace = namespace\n+ self.bucket = bucket\n+ self.template_label = \"argo_test\"\n+ self.test_target_name = test_target_name\n+ self.mkdir_task_name = \"make-artifacts-dir\"\n+\n+ # *********************************************************************\n+ #\n+ # Define directory locations\n+ #\n+ # *********************************************************************\n+\n+ # mount_path is the directory where the volume to store the test data\n+ # should be mounted.\n+ self.mount_path = \"/mnt/\" + \"test-data-volume\"\n+ # test_dir is the root directory for all data for a particular test\n+ # run.\n+ self.test_dir = self.mount_path + \"/\" + self.name\n+ # output_dir is the directory to sync to GCS to contain the output for\n+ # this job.\n+ self.output_dir = self.test_dir + \"/output\"\n+\n+ self.artifacts_dir = \"%s/artifacts/junit_%s\" % (self.output_dir, name)\n+\n+ # source directory where all repos should be checked out\n+ self.src_root_dir = \"%s/src\" % self.test_dir\n+ # The directory containing the kubeflow/kubeflow repo\n+ self.src_dir = \"%s/kubeflow/kubeflow\" % self.src_root_dir\n+\n+ # Root of testing repo.\n+ self.testing_src_dir = os.path.join(self.src_root_dir,\n+ \"kubeflow/testing\")\n+\n+ # Top level directories for python code\n+ self.kubeflow_py = self.src_dir\n+\n+ # The directory within the kubeflow_testing submodule containing\n+ # py scripts to use.\n+ self.kubeflow_testing_py = \"%s/kubeflow/testing/py\" % self.src_root_dir\n+\n+ self.go_path = self.test_dir\n+\n+ def _build_workflow(self):\n+ \"\"\"Create a scaffolding CR for the Argo workflow\"\"\"\n+ volumes = [{\n+ \"name\": DATA_VOLUME,\n+ \"persistentVolumeClaim\": {\n+ \"claimName\": NFS_VOLUME_CLAIM\n+ },\n+ }]\n+ if LOCAL_TESTING == \"False\":\n+ volumes.append(CREDENTIALS_VOLUME)\n+\n+ workflow = {\n+ \"apiVersion\": \"argoproj.io/v1alpha1\",\n+ \"kind\": \"Workflow\",\n+ \"metadata\": {\n+ \"name\": self.name,\n+ \"namespace\": self.namespace,\n+ \"labels\": argo_build_util.add_dicts([\n+ {\n+ \"workflow\": self.name,\n+ \"workflow_template\": self.template_label,\n+ },\n+ argo_build_util.get_prow_labels()\n+ ]),\n+ },\n+ \"spec\": {\n+ \"entrypoint\": E2E_DAG_NAME,\n+ # Have argo garbage collect old workflows otherwise we overload\n+ # the API server.\n+ \"ttlSecondsAfterFinished\": 7 * 24 * 60 * 60,", + "comment_created_at": "2021-02-04T20:17:21+00:00", + "comment_author": "PatrickXYS", + "comment_body": "Yeah default workflow should handle it.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1732661929", + "pr_number": 7634, + "pr_file": "components/crud-web-apps/common/backend/setup.py", + "created_at": "2024-08-27T11:26:07+00:00", + "commented_code": "\"Werkzeug >= 0.16.0\",\n \"Flask-Cors >= 3.0.8\",\n \"gevent\",\n \"prometheus-flask-exporter >= 0.23.1\",\n \"importlib-metadata >= 1.0;python_version<'3.8'\",\n]", + "repo_full_name": "kubeflow/kubeflow", + "discussion_comments": [ + { + "comment_id": "1732661929", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 7634, + "pr_file": "components/crud-web-apps/common/backend/setup.py", + "discussion_id": "1732661929", + "commented_code": "@@ -9,6 +9,8 @@\n \"Werkzeug >= 0.16.0\",\n \"Flask-Cors >= 3.0.8\",\n \"gevent\",\n+ \"prometheus-flask-exporter >= 0.23.1\",\n+ \"importlib-metadata >= 1.0;python_version<'3.8'\",\n ]", + "comment_created_at": "2024-08-27T11:26:07+00:00", + "comment_author": "orfeas-k", + "comment_body": "Why do we need to set `python_version` here?", + "pr_file_module": null + }, + { + "comment_id": "1734095817", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 7634, + "pr_file": "components/crud-web-apps/common/backend/setup.py", + "discussion_id": "1732661929", + "commented_code": "@@ -9,6 +9,8 @@\n \"Werkzeug >= 0.16.0\",\n \"Flask-Cors >= 3.0.8\",\n \"gevent\",\n+ \"prometheus-flask-exporter >= 0.23.1\",\n+ \"importlib-metadata >= 1.0;python_version<'3.8'\",\n ]", + "comment_created_at": "2024-08-28T06:54:41+00:00", + "comment_author": "rgildein", + "comment_body": "From python 3.8 the `importlib.metadata` is build in package. You can see it [here](https://docs.python.org/3.8/library/importlib.metadata.html).", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/kubeflow-centralize-configuration-constants.md b/_reviewers/kubeflow-centralize-configuration-constants.md index f6200a3..b6b68ae 100644 --- a/_reviewers/kubeflow-centralize-configuration-constants.md +++ b/_reviewers/kubeflow-centralize-configuration-constants.md @@ -38,191 +38,3 @@ registry = AWS_REGISTRIES["jupyter"] ``` When system-wide defaults are available (like in configmaps), prefer those over hardcoded values to ensure consistency across components. - - -[ - { - "discussion_id": "1740935864", - "pr_number": 7634, - "pr_file": "components/crud-web-apps/volumes/backend/entrypoint.py", - "created_at": "2024-09-02T13:33:53+00:00", - "commented_code": "log = logging.getLogger(__name__)\n\n\ndef get_config(mode):\n \"\"\"Return a config based on the selected mode.\"\"\"\n config_classes = {\n config.BackendMode.DEVELOPMENT.value: config.DevConfig,\n config.BackendMode.DEVELOPMENT_FULL.value: config.DevConfig,\n config.BackendMode.PRODUCTION.value: config.ProdConfig,\n config.BackendMode.PRODUCTION_FULL.value: config.ProdConfig,\n }\n cfg_class = config_classes.get(mode)\n if not cfg_class:\n raise RuntimeError(\"Backend mode '%s' is not implemented. Choose one\"\n \" of %s\" % (mode, list(config_classes.keys())))\n return cfg_class()\n\n\nAPP_NAME = os.environ.get(\"APP_NAME\", \"Volumes Web App\")\nBACKEND_MODE = os.environ.get(\"BACKEND_MODE\",\n config.BackendMode.PRODUCTION.value)\nUI_FLAVOR = os.environ.get(\"UI_FLAVOR\", \"default\")\nPREFIX = os.environ.get(\"APP_PREFIX\", \"/\")\nMETRICS = bool(os.environ.get(\"METRICS\", False))", - "repo_full_name": "kubeflow/kubeflow", - "discussion_comments": [ - { - "comment_id": "1740935864", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 7634, - "pr_file": "components/crud-web-apps/volumes/backend/entrypoint.py", - "discussion_id": "1740935864", - "commented_code": "@@ -7,29 +7,16 @@\n log = logging.getLogger(__name__)\n \n \n-def get_config(mode):\n- \"\"\"Return a config based on the selected mode.\"\"\"\n- config_classes = {\n- config.BackendMode.DEVELOPMENT.value: config.DevConfig,\n- config.BackendMode.DEVELOPMENT_FULL.value: config.DevConfig,\n- config.BackendMode.PRODUCTION.value: config.ProdConfig,\n- config.BackendMode.PRODUCTION_FULL.value: config.ProdConfig,\n- }\n- cfg_class = config_classes.get(mode)\n- if not cfg_class:\n- raise RuntimeError(\"Backend mode '%s' is not implemented. Choose one\"\n- \" of %s\" % (mode, list(config_classes.keys())))\n- return cfg_class()\n-\n-\n APP_NAME = os.environ.get(\"APP_NAME\", \"Volumes Web App\")\n BACKEND_MODE = os.environ.get(\"BACKEND_MODE\",\n config.BackendMode.PRODUCTION.value)\n UI_FLAVOR = os.environ.get(\"UI_FLAVOR\", \"default\")\n PREFIX = os.environ.get(\"APP_PREFIX\", \"/\")\n+METRICS = bool(os.environ.get(\"METRICS\", False))", - "comment_created_at": "2024-09-02T13:33:53+00:00", - "comment_author": "orfeas-k", - "comment_body": "Do we need to set a default value both in each specific app and in the config? Maybe we should do it like `backend_mode`\r\n```\r\nBACKEND_MODE = os.environ.get(\"BACKEND_MODE\",\r\n config.BackendMode.PRODUCTION.value)\r\n```\r\n\r\n", - "pr_file_module": null - }, - { - "comment_id": "1741232798", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 7634, - "pr_file": "components/crud-web-apps/volumes/backend/entrypoint.py", - "discussion_id": "1740935864", - "commented_code": "@@ -7,29 +7,16 @@\n log = logging.getLogger(__name__)\n \n \n-def get_config(mode):\n- \"\"\"Return a config based on the selected mode.\"\"\"\n- config_classes = {\n- config.BackendMode.DEVELOPMENT.value: config.DevConfig,\n- config.BackendMode.DEVELOPMENT_FULL.value: config.DevConfig,\n- config.BackendMode.PRODUCTION.value: config.ProdConfig,\n- config.BackendMode.PRODUCTION_FULL.value: config.ProdConfig,\n- }\n- cfg_class = config_classes.get(mode)\n- if not cfg_class:\n- raise RuntimeError(\"Backend mode '%s' is not implemented. Choose one\"\n- \" of %s\" % (mode, list(config_classes.keys())))\n- return cfg_class()\n-\n-\n APP_NAME = os.environ.get(\"APP_NAME\", \"Volumes Web App\")\n BACKEND_MODE = os.environ.get(\"BACKEND_MODE\",\n config.BackendMode.PRODUCTION.value)\n UI_FLAVOR = os.environ.get(\"UI_FLAVOR\", \"default\")\n PREFIX = os.environ.get(\"APP_PREFIX\", \"/\")\n+METRICS = bool(os.environ.get(\"METRICS\", False))", - "comment_created_at": "2024-09-02T20:20:00+00:00", - "comment_author": "rgildein", - "comment_body": "Actually I moved the `self.METRICS = bool(os.environ.get(\"METRICS\", True))` to config __init__, since the logic is for all apps sames (check if env is set to 1, true, ...). WDYT about it?", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "588562507", - "pr_number": 5663, - "pr_file": "py/kubeflow/kubeflow/cd/jwa.py", - "created_at": "2021-03-05T18:30:21+00:00", - "commented_code": "\"\"\"\"Argo Workflow for building Jupyter web app's OCI image using Kaniko\"\"\"\nimport os\n\nfrom kubeflow.kubeflow import ci\nfrom kubeflow.testing import argo_build_util\n\nTAG = os.getenv(\"PULL_BASE_SHA\", \"kaniko-local-test\")\nAWS_REGISTRY = \"public.ecr.aws/j1r0q0g6/jupyter-web-app\"", - "repo_full_name": "kubeflow/kubeflow", - "discussion_comments": [ - { - "comment_id": "588562507", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 5663, - "pr_file": "py/kubeflow/kubeflow/cd/jwa.py", - "discussion_id": "588562507", - "commented_code": "@@ -0,0 +1,48 @@\n+\"\"\"\"Argo Workflow for building Jupyter web app's OCI image using Kaniko\"\"\"\n+import os\n+\n+from kubeflow.kubeflow import ci\n+from kubeflow.testing import argo_build_util\n+\n+TAG = os.getenv(\"PULL_BASE_SHA\", \"kaniko-local-test\")\n+AWS_REGISTRY = \"public.ecr.aws/j1r0q0g6/jupyter-web-app\"", - "comment_created_at": "2021-03-05T18:30:21+00:00", - "comment_author": "PatrickXYS", - "comment_body": "NIT: since WG-notebooks have multiple ECR registries (7 now, maybe more in the future), I would recommend you maintain a static configmap or mapping in other python files.\r\n\r\nE.g, py/kubeflow/kubeflow/cd/config.py", - "pr_file_module": null - }, - { - "comment_id": "588581750", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 5663, - "pr_file": "py/kubeflow/kubeflow/cd/jwa.py", - "discussion_id": "588562507", - "commented_code": "@@ -0,0 +1,48 @@\n+\"\"\"\"Argo Workflow for building Jupyter web app's OCI image using Kaniko\"\"\"\n+import os\n+\n+from kubeflow.kubeflow import ci\n+from kubeflow.testing import argo_build_util\n+\n+TAG = os.getenv(\"PULL_BASE_SHA\", \"kaniko-local-test\")\n+AWS_REGISTRY = \"public.ecr.aws/j1r0q0g6/jupyter-web-app\"", - "comment_created_at": "2021-03-05T18:51:30+00:00", - "comment_author": "kimwnasptd", - "comment_body": "That's a good idea, it will help us have all the names organized in one place.\r\nAdding it", - "pr_file_module": null - }, - { - "comment_id": "588603635", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 5663, - "pr_file": "py/kubeflow/kubeflow/cd/jwa.py", - "discussion_id": "588562507", - "commented_code": "@@ -0,0 +1,48 @@\n+\"\"\"\"Argo Workflow for building Jupyter web app's OCI image using Kaniko\"\"\"\n+import os\n+\n+from kubeflow.kubeflow import ci\n+from kubeflow.testing import argo_build_util\n+\n+TAG = os.getenv(\"PULL_BASE_SHA\", \"kaniko-local-test\")\n+AWS_REGISTRY = \"public.ecr.aws/j1r0q0g6/jupyter-web-app\"", - "comment_created_at": "2021-03-05T19:14:27+00:00", - "comment_author": "kimwnasptd", - "comment_body": "@PatrickXYS I added another commit that introduces this `config.py` file.\r\nPTAL and if you are OK I can resolve this conversation", - "pr_file_module": null - }, - { - "comment_id": "588604805", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 5663, - "pr_file": "py/kubeflow/kubeflow/cd/jwa.py", - "discussion_id": "588562507", - "commented_code": "@@ -0,0 +1,48 @@\n+\"\"\"\"Argo Workflow for building Jupyter web app's OCI image using Kaniko\"\"\"\n+import os\n+\n+from kubeflow.kubeflow import ci\n+from kubeflow.testing import argo_build_util\n+\n+TAG = os.getenv(\"PULL_BASE_SHA\", \"kaniko-local-test\")\n+AWS_REGISTRY = \"public.ecr.aws/j1r0q0g6/jupyter-web-app\"", - "comment_created_at": "2021-03-05T19:15:40+00:00", - "comment_author": "PatrickXYS", - "comment_body": "LGTM feel free to resolve", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "570412551", - "pr_number": 5577, - "pr_file": "py/kubeflow/kubeflow/ci/workflow_utils.py", - "created_at": "2021-02-04T17:32:50+00:00", - "commented_code": "import logging\nimport os\n\nfrom kubeflow.testing import argo_build_util\n\n# The name of the NFS volume claim to use for test files.\nNFS_VOLUME_CLAIM = \"nfs-external\"\n# The name to use for the volume to use to contain test data\nDATA_VOLUME = \"kubeflow-test-volume\"\n\nE2E_DAG_NAME = \"e2e\"\nEXIT_DAG_NAME = \"exit-handler\"\n\n\nLOCAL_TESTING = os.getenv(\"LOCAL_TESTING\", \"False\")\nCREDENTIALS_VOLUME = {\"name\": \"aws-secret\",\n \"secret\": {\"secretName\": \"aws-secret\"}}\nCREDENTIALS_MOUNT = {\"mountPath\": \"/root/.aws/\",\n \"name\": \"aws-secret\"}\n\nAWS_WORKER_IMAGE = (\"527798164940.dkr.ecr.us-west-2.amazonaws.com/\"\n \"aws-kubeflow-ci/test-worker:latest\")\nPUBLIC_WORKER_IMAGE = \"gcr.io/kubeflow-ci/test-worker:latest\"\n\n\nclass ArgoTestBuilder:\n def __init__(self, name=None, namespace=None, bucket=None,\n test_target_name=None,\n **kwargs):\n self.name = name\n self.namespace = namespace\n self.bucket = bucket\n self.template_label = \"argo_test\"\n self.test_target_name = test_target_name\n self.mkdir_task_name = \"make-artifacts-dir\"\n\n # *********************************************************************\n #\n # Define directory locations\n #\n # *********************************************************************\n\n # mount_path is the directory where the volume to store the test data\n # should be mounted.\n self.mount_path = \"/mnt/\" + \"test-data-volume\"\n # test_dir is the root directory for all data for a particular test\n # run.\n self.test_dir = self.mount_path + \"/\" + self.name\n # output_dir is the directory to sync to GCS to contain the output for\n # this job.\n self.output_dir = self.test_dir + \"/output\"\n\n self.artifacts_dir = \"%s/artifacts/junit_%s\" % (self.output_dir, name)\n\n # source directory where all repos should be checked out\n self.src_root_dir = \"%s/src\" % self.test_dir\n # The directory containing the kubeflow/kubeflow repo\n self.src_dir = \"%s/kubeflow/kubeflow\" % self.src_root_dir\n\n # Root of testing repo.\n self.testing_src_dir = os.path.join(self.src_root_dir,\n \"kubeflow/testing\")\n\n # Top level directories for python code\n self.kubeflow_py = self.src_dir\n\n # The directory within the kubeflow_testing submodule containing\n # py scripts to use.\n self.kubeflow_testing_py = \"%s/kubeflow/testing/py\" % self.src_root_dir\n\n self.go_path = self.test_dir\n\n def _build_workflow(self):\n \"\"\"Create a scaffolding CR for the Argo workflow\"\"\"\n volumes = [{\n \"name\": DATA_VOLUME,\n \"persistentVolumeClaim\": {\n \"claimName\": NFS_VOLUME_CLAIM\n },\n }]\n if LOCAL_TESTING == \"False\":\n volumes.append(CREDENTIALS_VOLUME)\n\n workflow = {\n \"apiVersion\": \"argoproj.io/v1alpha1\",\n \"kind\": \"Workflow\",\n \"metadata\": {\n \"name\": self.name,\n \"namespace\": self.namespace,\n \"labels\": argo_build_util.add_dicts([\n {\n \"workflow\": self.name,\n \"workflow_template\": self.template_label,\n },\n argo_build_util.get_prow_labels()\n ]),\n },\n \"spec\": {\n \"entrypoint\": E2E_DAG_NAME,\n # Have argo garbage collect old workflows otherwise we overload\n # the API server.\n \"ttlSecondsAfterFinished\": 7 * 24 * 60 * 60,", - "repo_full_name": "kubeflow/kubeflow", - "discussion_comments": [ - { - "comment_id": "570412551", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 5577, - "pr_file": "py/kubeflow/kubeflow/ci/workflow_utils.py", - "discussion_id": "570412551", - "commented_code": "@@ -0,0 +1,249 @@\n+import logging\n+import os\n+\n+from kubeflow.testing import argo_build_util\n+\n+# The name of the NFS volume claim to use for test files.\n+NFS_VOLUME_CLAIM = \"nfs-external\"\n+# The name to use for the volume to use to contain test data\n+DATA_VOLUME = \"kubeflow-test-volume\"\n+\n+E2E_DAG_NAME = \"e2e\"\n+EXIT_DAG_NAME = \"exit-handler\"\n+\n+\n+LOCAL_TESTING = os.getenv(\"LOCAL_TESTING\", \"False\")\n+CREDENTIALS_VOLUME = {\"name\": \"aws-secret\",\n+ \"secret\": {\"secretName\": \"aws-secret\"}}\n+CREDENTIALS_MOUNT = {\"mountPath\": \"/root/.aws/\",\n+ \"name\": \"aws-secret\"}\n+\n+AWS_WORKER_IMAGE = (\"527798164940.dkr.ecr.us-west-2.amazonaws.com/\"\n+ \"aws-kubeflow-ci/test-worker:latest\")\n+PUBLIC_WORKER_IMAGE = \"gcr.io/kubeflow-ci/test-worker:latest\"\n+\n+\n+class ArgoTestBuilder:\n+ def __init__(self, name=None, namespace=None, bucket=None,\n+ test_target_name=None,\n+ **kwargs):\n+ self.name = name\n+ self.namespace = namespace\n+ self.bucket = bucket\n+ self.template_label = \"argo_test\"\n+ self.test_target_name = test_target_name\n+ self.mkdir_task_name = \"make-artifacts-dir\"\n+\n+ # *********************************************************************\n+ #\n+ # Define directory locations\n+ #\n+ # *********************************************************************\n+\n+ # mount_path is the directory where the volume to store the test data\n+ # should be mounted.\n+ self.mount_path = \"/mnt/\" + \"test-data-volume\"\n+ # test_dir is the root directory for all data for a particular test\n+ # run.\n+ self.test_dir = self.mount_path + \"/\" + self.name\n+ # output_dir is the directory to sync to GCS to contain the output for\n+ # this job.\n+ self.output_dir = self.test_dir + \"/output\"\n+\n+ self.artifacts_dir = \"%s/artifacts/junit_%s\" % (self.output_dir, name)\n+\n+ # source directory where all repos should be checked out\n+ self.src_root_dir = \"%s/src\" % self.test_dir\n+ # The directory containing the kubeflow/kubeflow repo\n+ self.src_dir = \"%s/kubeflow/kubeflow\" % self.src_root_dir\n+\n+ # Root of testing repo.\n+ self.testing_src_dir = os.path.join(self.src_root_dir,\n+ \"kubeflow/testing\")\n+\n+ # Top level directories for python code\n+ self.kubeflow_py = self.src_dir\n+\n+ # The directory within the kubeflow_testing submodule containing\n+ # py scripts to use.\n+ self.kubeflow_testing_py = \"%s/kubeflow/testing/py\" % self.src_root_dir\n+\n+ self.go_path = self.test_dir\n+\n+ def _build_workflow(self):\n+ \"\"\"Create a scaffolding CR for the Argo workflow\"\"\"\n+ volumes = [{\n+ \"name\": DATA_VOLUME,\n+ \"persistentVolumeClaim\": {\n+ \"claimName\": NFS_VOLUME_CLAIM\n+ },\n+ }]\n+ if LOCAL_TESTING == \"False\":\n+ volumes.append(CREDENTIALS_VOLUME)\n+\n+ workflow = {\n+ \"apiVersion\": \"argoproj.io/v1alpha1\",\n+ \"kind\": \"Workflow\",\n+ \"metadata\": {\n+ \"name\": self.name,\n+ \"namespace\": self.namespace,\n+ \"labels\": argo_build_util.add_dicts([\n+ {\n+ \"workflow\": self.name,\n+ \"workflow_template\": self.template_label,\n+ },\n+ argo_build_util.get_prow_labels()\n+ ]),\n+ },\n+ \"spec\": {\n+ \"entrypoint\": E2E_DAG_NAME,\n+ # Have argo garbage collect old workflows otherwise we overload\n+ # the API server.\n+ \"ttlSecondsAfterFinished\": 7 * 24 * 60 * 60,", - "comment_created_at": "2021-02-04T17:32:50+00:00", - "comment_author": "PatrickXYS", - "comment_body": "In new argo cluster, we installed v2.12.3 argo workflow, this parameter has been deprecated. \r\n\r\nhttps://github.com/argoproj/argo/blob/master/examples/gc-ttl.yaml\r\n\r\nYou can instead use ttlStrategy", - "pr_file_module": null - }, - { - "comment_id": "570425003", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 5577, - "pr_file": "py/kubeflow/kubeflow/ci/workflow_utils.py", - "discussion_id": "570412551", - "commented_code": "@@ -0,0 +1,249 @@\n+import logging\n+import os\n+\n+from kubeflow.testing import argo_build_util\n+\n+# The name of the NFS volume claim to use for test files.\n+NFS_VOLUME_CLAIM = \"nfs-external\"\n+# The name to use for the volume to use to contain test data\n+DATA_VOLUME = \"kubeflow-test-volume\"\n+\n+E2E_DAG_NAME = \"e2e\"\n+EXIT_DAG_NAME = \"exit-handler\"\n+\n+\n+LOCAL_TESTING = os.getenv(\"LOCAL_TESTING\", \"False\")\n+CREDENTIALS_VOLUME = {\"name\": \"aws-secret\",\n+ \"secret\": {\"secretName\": \"aws-secret\"}}\n+CREDENTIALS_MOUNT = {\"mountPath\": \"/root/.aws/\",\n+ \"name\": \"aws-secret\"}\n+\n+AWS_WORKER_IMAGE = (\"527798164940.dkr.ecr.us-west-2.amazonaws.com/\"\n+ \"aws-kubeflow-ci/test-worker:latest\")\n+PUBLIC_WORKER_IMAGE = \"gcr.io/kubeflow-ci/test-worker:latest\"\n+\n+\n+class ArgoTestBuilder:\n+ def __init__(self, name=None, namespace=None, bucket=None,\n+ test_target_name=None,\n+ **kwargs):\n+ self.name = name\n+ self.namespace = namespace\n+ self.bucket = bucket\n+ self.template_label = \"argo_test\"\n+ self.test_target_name = test_target_name\n+ self.mkdir_task_name = \"make-artifacts-dir\"\n+\n+ # *********************************************************************\n+ #\n+ # Define directory locations\n+ #\n+ # *********************************************************************\n+\n+ # mount_path is the directory where the volume to store the test data\n+ # should be mounted.\n+ self.mount_path = \"/mnt/\" + \"test-data-volume\"\n+ # test_dir is the root directory for all data for a particular test\n+ # run.\n+ self.test_dir = self.mount_path + \"/\" + self.name\n+ # output_dir is the directory to sync to GCS to contain the output for\n+ # this job.\n+ self.output_dir = self.test_dir + \"/output\"\n+\n+ self.artifacts_dir = \"%s/artifacts/junit_%s\" % (self.output_dir, name)\n+\n+ # source directory where all repos should be checked out\n+ self.src_root_dir = \"%s/src\" % self.test_dir\n+ # The directory containing the kubeflow/kubeflow repo\n+ self.src_dir = \"%s/kubeflow/kubeflow\" % self.src_root_dir\n+\n+ # Root of testing repo.\n+ self.testing_src_dir = os.path.join(self.src_root_dir,\n+ \"kubeflow/testing\")\n+\n+ # Top level directories for python code\n+ self.kubeflow_py = self.src_dir\n+\n+ # The directory within the kubeflow_testing submodule containing\n+ # py scripts to use.\n+ self.kubeflow_testing_py = \"%s/kubeflow/testing/py\" % self.src_root_dir\n+\n+ self.go_path = self.test_dir\n+\n+ def _build_workflow(self):\n+ \"\"\"Create a scaffolding CR for the Argo workflow\"\"\"\n+ volumes = [{\n+ \"name\": DATA_VOLUME,\n+ \"persistentVolumeClaim\": {\n+ \"claimName\": NFS_VOLUME_CLAIM\n+ },\n+ }]\n+ if LOCAL_TESTING == \"False\":\n+ volumes.append(CREDENTIALS_VOLUME)\n+\n+ workflow = {\n+ \"apiVersion\": \"argoproj.io/v1alpha1\",\n+ \"kind\": \"Workflow\",\n+ \"metadata\": {\n+ \"name\": self.name,\n+ \"namespace\": self.namespace,\n+ \"labels\": argo_build_util.add_dicts([\n+ {\n+ \"workflow\": self.name,\n+ \"workflow_template\": self.template_label,\n+ },\n+ argo_build_util.get_prow_labels()\n+ ]),\n+ },\n+ \"spec\": {\n+ \"entrypoint\": E2E_DAG_NAME,\n+ # Have argo garbage collect old workflows otherwise we overload\n+ # the API server.\n+ \"ttlSecondsAfterFinished\": 7 * 24 * 60 * 60,", - "comment_created_at": "2021-02-04T17:50:25+00:00", - "comment_author": "PatrickXYS", - "comment_body": "Or make it simpler, you don't need to set it up, we have already configured default workflow spec \r\n\r\n```\r\nconfigmap -o yaml\r\napiVersion: v1\r\ndata:\r\n workflowDefaults: |\r\n metadata:\r\n annotations:\r\n argo: workflows\r\n spec:\r\n ttlStrategy:\r\n secondsAfterSuccess: 604800\r\n secondsAfterCompletion: 604800\r\n secondsAfterFailure: 604800\r\n activeDeadlineSeconds: 86400\r\n...\r\n```", - "pr_file_module": null - }, - { - "comment_id": "570511192", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 5577, - "pr_file": "py/kubeflow/kubeflow/ci/workflow_utils.py", - "discussion_id": "570412551", - "commented_code": "@@ -0,0 +1,249 @@\n+import logging\n+import os\n+\n+from kubeflow.testing import argo_build_util\n+\n+# The name of the NFS volume claim to use for test files.\n+NFS_VOLUME_CLAIM = \"nfs-external\"\n+# The name to use for the volume to use to contain test data\n+DATA_VOLUME = \"kubeflow-test-volume\"\n+\n+E2E_DAG_NAME = \"e2e\"\n+EXIT_DAG_NAME = \"exit-handler\"\n+\n+\n+LOCAL_TESTING = os.getenv(\"LOCAL_TESTING\", \"False\")\n+CREDENTIALS_VOLUME = {\"name\": \"aws-secret\",\n+ \"secret\": {\"secretName\": \"aws-secret\"}}\n+CREDENTIALS_MOUNT = {\"mountPath\": \"/root/.aws/\",\n+ \"name\": \"aws-secret\"}\n+\n+AWS_WORKER_IMAGE = (\"527798164940.dkr.ecr.us-west-2.amazonaws.com/\"\n+ \"aws-kubeflow-ci/test-worker:latest\")\n+PUBLIC_WORKER_IMAGE = \"gcr.io/kubeflow-ci/test-worker:latest\"\n+\n+\n+class ArgoTestBuilder:\n+ def __init__(self, name=None, namespace=None, bucket=None,\n+ test_target_name=None,\n+ **kwargs):\n+ self.name = name\n+ self.namespace = namespace\n+ self.bucket = bucket\n+ self.template_label = \"argo_test\"\n+ self.test_target_name = test_target_name\n+ self.mkdir_task_name = \"make-artifacts-dir\"\n+\n+ # *********************************************************************\n+ #\n+ # Define directory locations\n+ #\n+ # *********************************************************************\n+\n+ # mount_path is the directory where the volume to store the test data\n+ # should be mounted.\n+ self.mount_path = \"/mnt/\" + \"test-data-volume\"\n+ # test_dir is the root directory for all data for a particular test\n+ # run.\n+ self.test_dir = self.mount_path + \"/\" + self.name\n+ # output_dir is the directory to sync to GCS to contain the output for\n+ # this job.\n+ self.output_dir = self.test_dir + \"/output\"\n+\n+ self.artifacts_dir = \"%s/artifacts/junit_%s\" % (self.output_dir, name)\n+\n+ # source directory where all repos should be checked out\n+ self.src_root_dir = \"%s/src\" % self.test_dir\n+ # The directory containing the kubeflow/kubeflow repo\n+ self.src_dir = \"%s/kubeflow/kubeflow\" % self.src_root_dir\n+\n+ # Root of testing repo.\n+ self.testing_src_dir = os.path.join(self.src_root_dir,\n+ \"kubeflow/testing\")\n+\n+ # Top level directories for python code\n+ self.kubeflow_py = self.src_dir\n+\n+ # The directory within the kubeflow_testing submodule containing\n+ # py scripts to use.\n+ self.kubeflow_testing_py = \"%s/kubeflow/testing/py\" % self.src_root_dir\n+\n+ self.go_path = self.test_dir\n+\n+ def _build_workflow(self):\n+ \"\"\"Create a scaffolding CR for the Argo workflow\"\"\"\n+ volumes = [{\n+ \"name\": DATA_VOLUME,\n+ \"persistentVolumeClaim\": {\n+ \"claimName\": NFS_VOLUME_CLAIM\n+ },\n+ }]\n+ if LOCAL_TESTING == \"False\":\n+ volumes.append(CREDENTIALS_VOLUME)\n+\n+ workflow = {\n+ \"apiVersion\": \"argoproj.io/v1alpha1\",\n+ \"kind\": \"Workflow\",\n+ \"metadata\": {\n+ \"name\": self.name,\n+ \"namespace\": self.namespace,\n+ \"labels\": argo_build_util.add_dicts([\n+ {\n+ \"workflow\": self.name,\n+ \"workflow_template\": self.template_label,\n+ },\n+ argo_build_util.get_prow_labels()\n+ ]),\n+ },\n+ \"spec\": {\n+ \"entrypoint\": E2E_DAG_NAME,\n+ # Have argo garbage collect old workflows otherwise we overload\n+ # the API server.\n+ \"ttlSecondsAfterFinished\": 7 * 24 * 60 * 60,", - "comment_created_at": "2021-02-04T20:08:03+00:00", - "comment_author": "kimwnasptd", - "comment_body": "OK, I'll just omit this field entirely. There no issue by completely omitting the field and the configmap's value will be used right?", - "pr_file_module": null - }, - { - "comment_id": "570516493", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 5577, - "pr_file": "py/kubeflow/kubeflow/ci/workflow_utils.py", - "discussion_id": "570412551", - "commented_code": "@@ -0,0 +1,249 @@\n+import logging\n+import os\n+\n+from kubeflow.testing import argo_build_util\n+\n+# The name of the NFS volume claim to use for test files.\n+NFS_VOLUME_CLAIM = \"nfs-external\"\n+# The name to use for the volume to use to contain test data\n+DATA_VOLUME = \"kubeflow-test-volume\"\n+\n+E2E_DAG_NAME = \"e2e\"\n+EXIT_DAG_NAME = \"exit-handler\"\n+\n+\n+LOCAL_TESTING = os.getenv(\"LOCAL_TESTING\", \"False\")\n+CREDENTIALS_VOLUME = {\"name\": \"aws-secret\",\n+ \"secret\": {\"secretName\": \"aws-secret\"}}\n+CREDENTIALS_MOUNT = {\"mountPath\": \"/root/.aws/\",\n+ \"name\": \"aws-secret\"}\n+\n+AWS_WORKER_IMAGE = (\"527798164940.dkr.ecr.us-west-2.amazonaws.com/\"\n+ \"aws-kubeflow-ci/test-worker:latest\")\n+PUBLIC_WORKER_IMAGE = \"gcr.io/kubeflow-ci/test-worker:latest\"\n+\n+\n+class ArgoTestBuilder:\n+ def __init__(self, name=None, namespace=None, bucket=None,\n+ test_target_name=None,\n+ **kwargs):\n+ self.name = name\n+ self.namespace = namespace\n+ self.bucket = bucket\n+ self.template_label = \"argo_test\"\n+ self.test_target_name = test_target_name\n+ self.mkdir_task_name = \"make-artifacts-dir\"\n+\n+ # *********************************************************************\n+ #\n+ # Define directory locations\n+ #\n+ # *********************************************************************\n+\n+ # mount_path is the directory where the volume to store the test data\n+ # should be mounted.\n+ self.mount_path = \"/mnt/\" + \"test-data-volume\"\n+ # test_dir is the root directory for all data for a particular test\n+ # run.\n+ self.test_dir = self.mount_path + \"/\" + self.name\n+ # output_dir is the directory to sync to GCS to contain the output for\n+ # this job.\n+ self.output_dir = self.test_dir + \"/output\"\n+\n+ self.artifacts_dir = \"%s/artifacts/junit_%s\" % (self.output_dir, name)\n+\n+ # source directory where all repos should be checked out\n+ self.src_root_dir = \"%s/src\" % self.test_dir\n+ # The directory containing the kubeflow/kubeflow repo\n+ self.src_dir = \"%s/kubeflow/kubeflow\" % self.src_root_dir\n+\n+ # Root of testing repo.\n+ self.testing_src_dir = os.path.join(self.src_root_dir,\n+ \"kubeflow/testing\")\n+\n+ # Top level directories for python code\n+ self.kubeflow_py = self.src_dir\n+\n+ # The directory within the kubeflow_testing submodule containing\n+ # py scripts to use.\n+ self.kubeflow_testing_py = \"%s/kubeflow/testing/py\" % self.src_root_dir\n+\n+ self.go_path = self.test_dir\n+\n+ def _build_workflow(self):\n+ \"\"\"Create a scaffolding CR for the Argo workflow\"\"\"\n+ volumes = [{\n+ \"name\": DATA_VOLUME,\n+ \"persistentVolumeClaim\": {\n+ \"claimName\": NFS_VOLUME_CLAIM\n+ },\n+ }]\n+ if LOCAL_TESTING == \"False\":\n+ volumes.append(CREDENTIALS_VOLUME)\n+\n+ workflow = {\n+ \"apiVersion\": \"argoproj.io/v1alpha1\",\n+ \"kind\": \"Workflow\",\n+ \"metadata\": {\n+ \"name\": self.name,\n+ \"namespace\": self.namespace,\n+ \"labels\": argo_build_util.add_dicts([\n+ {\n+ \"workflow\": self.name,\n+ \"workflow_template\": self.template_label,\n+ },\n+ argo_build_util.get_prow_labels()\n+ ]),\n+ },\n+ \"spec\": {\n+ \"entrypoint\": E2E_DAG_NAME,\n+ # Have argo garbage collect old workflows otherwise we overload\n+ # the API server.\n+ \"ttlSecondsAfterFinished\": 7 * 24 * 60 * 60,", - "comment_created_at": "2021-02-04T20:17:21+00:00", - "comment_author": "PatrickXYS", - "comment_body": "Yeah default workflow should handle it.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1732661929", - "pr_number": 7634, - "pr_file": "components/crud-web-apps/common/backend/setup.py", - "created_at": "2024-08-27T11:26:07+00:00", - "commented_code": "\"Werkzeug >= 0.16.0\",\n \"Flask-Cors >= 3.0.8\",\n \"gevent\",\n \"prometheus-flask-exporter >= 0.23.1\",\n \"importlib-metadata >= 1.0;python_version<'3.8'\",\n]", - "repo_full_name": "kubeflow/kubeflow", - "discussion_comments": [ - { - "comment_id": "1732661929", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 7634, - "pr_file": "components/crud-web-apps/common/backend/setup.py", - "discussion_id": "1732661929", - "commented_code": "@@ -9,6 +9,8 @@\n \"Werkzeug >= 0.16.0\",\n \"Flask-Cors >= 3.0.8\",\n \"gevent\",\n+ \"prometheus-flask-exporter >= 0.23.1\",\n+ \"importlib-metadata >= 1.0;python_version<'3.8'\",\n ]", - "comment_created_at": "2024-08-27T11:26:07+00:00", - "comment_author": "orfeas-k", - "comment_body": "Why do we need to set `python_version` here?", - "pr_file_module": null - }, - { - "comment_id": "1734095817", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 7634, - "pr_file": "components/crud-web-apps/common/backend/setup.py", - "discussion_id": "1732661929", - "commented_code": "@@ -9,6 +9,8 @@\n \"Werkzeug >= 0.16.0\",\n \"Flask-Cors >= 3.0.8\",\n \"gevent\",\n+ \"prometheus-flask-exporter >= 0.23.1\",\n+ \"importlib-metadata >= 1.0;python_version<'3.8'\",\n ]", - "comment_created_at": "2024-08-28T06:54:41+00:00", - "comment_author": "rgildein", - "comment_body": "From python 3.8 the `importlib.metadata` is build in package. You can see it [here](https://docs.python.org/3.8/library/importlib.metadata.html).", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/kubeflow-centralize-configuration-values.json b/_reviewers/kubeflow-centralize-configuration-values.json new file mode 100644 index 0000000..3464e49 --- /dev/null +++ b/_reviewers/kubeflow-centralize-configuration-values.json @@ -0,0 +1,208 @@ +[ + { + "discussion_id": "651645749", + "pr_number": 5880, + "pr_file": "components/crud-web-apps/jupyter/frontend/src/app/app.component.ts", + "created_at": "2021-06-15T10:08:27+00:00", + "commented_code": "import { Component } from '@angular/core';\nimport { TranslateService } from '@ngx-translate/core';\n\n@Component({\n selector: 'app-root',\n templateUrl: './app.component.html',\n styleUrls: ['./app.component.scss'],\n})\nexport class AppComponent {\n constructor(private translate: TranslateService) {\n const currentLanguage = this.translate.getBrowserLang();\n const lang = currentLanguage.match(/en|fr/) ? currentLanguage : 'en';", + "repo_full_name": "kubeflow/kubeflow", + "discussion_comments": [ + { + "comment_id": "651645749", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 5880, + "pr_file": "components/crud-web-apps/jupyter/frontend/src/app/app.component.ts", + "discussion_id": "651645749", + "commented_code": "@@ -1,10 +1,16 @@\n import { Component } from '@angular/core';\n+import { TranslateService } from '@ngx-translate/core';\n \n @Component({\n selector: 'app-root',\n templateUrl: './app.component.html',\n styleUrls: ['./app.component.scss'],\n })\n export class AppComponent {\n+ constructor(private translate: TranslateService) {\n+ const currentLanguage = this.translate.getBrowserLang();\n+ const lang = currentLanguage.match(/en|fr/) ? currentLanguage : 'en';", + "comment_created_at": "2021-06-15T10:08:27+00:00", + "comment_author": "davidspek", + "comment_body": "Will new languages need to be added to the `currentLanguage.match(/en|fr/)` section once the JSON files are added?", + "pr_file_module": null + }, + { + "comment_id": "651832266", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 5880, + "pr_file": "components/crud-web-apps/jupyter/frontend/src/app/app.component.ts", + "discussion_id": "651645749", + "commented_code": "@@ -1,10 +1,16 @@\n import { Component } from '@angular/core';\n+import { TranslateService } from '@ngx-translate/core';\n \n @Component({\n selector: 'app-root',\n templateUrl: './app.component.html',\n styleUrls: ['./app.component.scss'],\n })\n export class AppComponent {\n+ constructor(private translate: TranslateService) {\n+ const currentLanguage = this.translate.getBrowserLang();\n+ const lang = currentLanguage.match(/en|fr/) ? currentLanguage : 'en';", + "comment_created_at": "2021-06-15T14:12:05+00:00", + "comment_author": "Jose-Matsuda", + "comment_body": "That is correct. Once a language's JSON file is added the corresponding `app.component.ts` will need to be updated as well", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "596260211", + "pr_number": 5646, + "pr_file": "components/crud-web-apps/jupyter/frontend/src/app/pages/form/form-default/form-image/form-image.component.ts", + "created_at": "2021-03-17T17:57:25+00:00", + "commented_code": "export class FormImageComponent implements OnInit, OnDestroy {\n @Input() parentForm: FormGroup;\n @Input() images: string[];\n @Input() readonly: boolean;\n @Input() jupyterReadonly: boolean;\n @Input() imagesVSCode: string[];\n @Input() vscodeReadonly: boolean;\n @Input() imagesRStudio: string[];\n @Input() rstudioReadonly: boolean;\n\n subs = new Subscription();\n\n constructor() {}\n constructor(iconRegistry: MatIconRegistry, sanitizer: DomSanitizer) {\n iconRegistry.addSvgIcon('jupyterlab', sanitizer.bypassSecurityTrustResourceUrl('static/assets/jupyterlab-wordmark.svg'));", + "repo_full_name": "kubeflow/kubeflow", + "discussion_comments": [ + { + "comment_id": "596260211", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 5646, + "pr_file": "components/crud-web-apps/jupyter/frontend/src/app/pages/form/form-default/form-image/form-image.component.ts", + "discussion_id": "596260211", + "commented_code": "@@ -10,11 +12,19 @@ import { Subscription } from 'rxjs';\n export class FormImageComponent implements OnInit, OnDestroy {\n @Input() parentForm: FormGroup;\n @Input() images: string[];\n- @Input() readonly: boolean;\n+ @Input() jupyterReadonly: boolean;\n+ @Input() imagesVSCode: string[];\n+ @Input() vscodeReadonly: boolean;\n+ @Input() imagesRStudio: string[];\n+ @Input() rstudioReadonly: boolean;\n \n subs = new Subscription();\n \n- constructor() {}\n+ constructor(iconRegistry: MatIconRegistry, sanitizer: DomSanitizer) {\n+ iconRegistry.addSvgIcon('jupyterlab', sanitizer.bypassSecurityTrustResourceUrl('static/assets/jupyterlab-wordmark.svg'));", + "comment_created_at": "2021-03-17T17:57:25+00:00", + "comment_author": "kimwnasptd", + "comment_body": "Could you add the urls for the icons into the environment files instead? https://github.com/kubeflow/kubeflow/tree/master/components/crud-web-apps/jupyter/frontend/src/environments\r\n\r\nLets keep these links in a central place, where we might also add other links in the future like the svgs for the main page", + "pr_file_module": null + }, + { + "comment_id": "597052809", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 5646, + "pr_file": "components/crud-web-apps/jupyter/frontend/src/app/pages/form/form-default/form-image/form-image.component.ts", + "discussion_id": "596260211", + "commented_code": "@@ -10,11 +12,19 @@ import { Subscription } from 'rxjs';\n export class FormImageComponent implements OnInit, OnDestroy {\n @Input() parentForm: FormGroup;\n @Input() images: string[];\n- @Input() readonly: boolean;\n+ @Input() jupyterReadonly: boolean;\n+ @Input() imagesVSCode: string[];\n+ @Input() vscodeReadonly: boolean;\n+ @Input() imagesRStudio: string[];\n+ @Input() rstudioReadonly: boolean;\n \n subs = new Subscription();\n \n- constructor() {}\n+ constructor(iconRegistry: MatIconRegistry, sanitizer: DomSanitizer) {\n+ iconRegistry.addSvgIcon('jupyterlab', sanitizer.bypassSecurityTrustResourceUrl('static/assets/jupyterlab-wordmark.svg'));", + "comment_created_at": "2021-03-18T16:37:25+00:00", + "comment_author": "davidspek", + "comment_body": "Just to be clear from what was discussed above, should the URL in the environment be were the SVG is hosted publicly, or should that first be downloaded by the backend and placed in the `static/assets/` directory, or have we decided to just add the SVGs in the static folder by default? Using the public URL directly means I'll need to deal with CORS. ", + "pr_file_module": null + }, + { + "comment_id": "597147456", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 5646, + "pr_file": "components/crud-web-apps/jupyter/frontend/src/app/pages/form/form-default/form-image/form-image.component.ts", + "discussion_id": "596260211", + "commented_code": "@@ -10,11 +12,19 @@ import { Subscription } from 'rxjs';\n export class FormImageComponent implements OnInit, OnDestroy {\n @Input() parentForm: FormGroup;\n @Input() images: string[];\n- @Input() readonly: boolean;\n+ @Input() jupyterReadonly: boolean;\n+ @Input() imagesVSCode: string[];\n+ @Input() vscodeReadonly: boolean;\n+ @Input() imagesRStudio: string[];\n+ @Input() rstudioReadonly: boolean;\n \n subs = new Subscription();\n \n- constructor() {}\n+ constructor(iconRegistry: MatIconRegistry, sanitizer: DomSanitizer) {\n+ iconRegistry.addSvgIcon('jupyterlab', sanitizer.bypassSecurityTrustResourceUrl('static/assets/jupyterlab-wordmark.svg'));", + "comment_created_at": "2021-03-18T18:42:36+00:00", + "comment_author": "davidspek", + "comment_body": "@kimwnasptd Friendly ping on this in case you missed it, so I know what steps I need to take to get this resolved. ", + "pr_file_module": null + }, + { + "comment_id": "597157251", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 5646, + "pr_file": "components/crud-web-apps/jupyter/frontend/src/app/pages/form/form-default/form-image/form-image.component.ts", + "discussion_id": "596260211", + "commented_code": "@@ -10,11 +12,19 @@ import { Subscription } from 'rxjs';\n export class FormImageComponent implements OnInit, OnDestroy {\n @Input() parentForm: FormGroup;\n @Input() images: string[];\n- @Input() readonly: boolean;\n+ @Input() jupyterReadonly: boolean;\n+ @Input() imagesVSCode: string[];\n+ @Input() vscodeReadonly: boolean;\n+ @Input() imagesRStudio: string[];\n+ @Input() rstudioReadonly: boolean;\n \n subs = new Subscription();\n \n- constructor() {}\n+ constructor(iconRegistry: MatIconRegistry, sanitizer: DomSanitizer) {\n+ iconRegistry.addSvgIcon('jupyterlab', sanitizer.bypassSecurityTrustResourceUrl('static/assets/jupyterlab-wordmark.svg'));", + "comment_created_at": "2021-03-18T18:57:18+00:00", + "comment_author": "kimwnasptd", + "comment_body": "Thanks for the ping! \r\n\r\nGood catch on the CORS issue. Lets dump the SVGs in the `static/assets` folder. And also define the links that the frontend will use in the `environment` files as mentioned above.", + "pr_file_module": null + }, + { + "comment_id": "597174913", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 5646, + "pr_file": "components/crud-web-apps/jupyter/frontend/src/app/pages/form/form-default/form-image/form-image.component.ts", + "discussion_id": "596260211", + "commented_code": "@@ -10,11 +12,19 @@ import { Subscription } from 'rxjs';\n export class FormImageComponent implements OnInit, OnDestroy {\n @Input() parentForm: FormGroup;\n @Input() images: string[];\n- @Input() readonly: boolean;\n+ @Input() jupyterReadonly: boolean;\n+ @Input() imagesVSCode: string[];\n+ @Input() vscodeReadonly: boolean;\n+ @Input() imagesRStudio: string[];\n+ @Input() rstudioReadonly: boolean;\n \n subs = new Subscription();\n \n- constructor() {}\n+ constructor(iconRegistry: MatIconRegistry, sanitizer: DomSanitizer) {\n+ iconRegistry.addSvgIcon('jupyterlab', sanitizer.bypassSecurityTrustResourceUrl('static/assets/jupyterlab-wordmark.svg'));", + "comment_created_at": "2021-03-18T19:23:55+00:00", + "comment_author": "davidspek", + "comment_body": "Cool! This shouldn't take long to implement then.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "306063156", + "pr_number": 3599, + "pr_file": "components/gcp-click-to-deploy/src/Utils.ts", + "created_at": "2019-07-22T22:42:07+00:00", + "commented_code": "import * as bcrypt from 'bcryptjs';\n\nconst GITHUB_BASE_URL =\n 'https://raw.githubusercontent.com/kubeflow/kubeflow/master/';", + "repo_full_name": "kubeflow/kubeflow", + "discussion_comments": [ + { + "comment_id": "306063156", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 3599, + "pr_file": "components/gcp-click-to-deploy/src/Utils.ts", + "discussion_id": "306063156", + "commented_code": "@@ -1,5 +1,8 @@\n import * as bcrypt from 'bcryptjs';\n \n+const GITHUB_BASE_URL =\n+ 'https://raw.githubusercontent.com/kubeflow/kubeflow/master/';\n+", + "comment_created_at": "2019-07-22T22:42:07+00:00", + "comment_author": "kunmingg", + "comment_body": "Config file on kubeflow master (or other branch) might not compatible with kfctl backend.\r\nShall we bake v0.5 & v0.6 config files into image during build? ", + "pr_file_module": null + }, + { + "comment_id": "306085992", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 3599, + "pr_file": "components/gcp-click-to-deploy/src/Utils.ts", + "discussion_id": "306063156", + "commented_code": "@@ -1,5 +1,8 @@\n import * as bcrypt from 'bcryptjs';\n \n+const GITHUB_BASE_URL =\n+ 'https://raw.githubusercontent.com/kubeflow/kubeflow/master/';\n+", + "comment_created_at": "2019-07-23T00:28:03+00:00", + "comment_author": "prodonjs", + "comment_body": "Currently, that's how it's done for the 0.5.0 compatible deployment but Jeremy had a comment to try to fetch it from GitHub. Pulling from master is probably problematic but we can easily point to a branch for each version to ensure that we get a consistent file that should be compatible with the version in question.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1746876292", + "pr_number": 7639, + "pr_file": "components/centraldashboard/app/server.ts", + "created_at": "2024-09-06T10:07:03+00:00", + "commented_code": "REGISTRATION_FLOW = \"true\",\n PROMETHEUS_URL = undefined,\n METRICS_DASHBOARD = undefined,\n METRICS_PORT = 9100,", + "repo_full_name": "kubeflow/kubeflow", + "discussion_comments": [ + { + "comment_id": "1746876292", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 7639, + "pr_file": "components/centraldashboard/app/server.ts", + "discussion_id": "1746876292", + "commented_code": "@@ -33,6 +39,7 @@ const {\n REGISTRATION_FLOW = \"true\",\n PROMETHEUS_URL = undefined,\n METRICS_DASHBOARD = undefined,\n+ METRICS_PORT = 9100,", + "comment_created_at": "2024-09-06T10:07:03+00:00", + "comment_author": "orfeas-k", + "comment_body": "Should we expose this also in the [Dockerfile](https://github.com/rgildein/kubeflow/blob/94995d6a519183fc19a25ba36a5f23cd6b78f681/components/centraldashboard/Dockerfile#L30-L31)?", + "pr_file_module": null + }, + { + "comment_id": "1746910272", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 7639, + "pr_file": "components/centraldashboard/app/server.ts", + "discussion_id": "1746876292", + "commented_code": "@@ -33,6 +39,7 @@ const {\n REGISTRATION_FLOW = \"true\",\n PROMETHEUS_URL = undefined,\n METRICS_DASHBOARD = undefined,\n+ METRICS_PORT = 9100,", + "comment_created_at": "2024-09-06T10:35:09+00:00", + "comment_author": "orfeas-k", + "comment_body": "and probably manifests too?\r\nhttps://github.com/kubeflow/kubeflow/blob/54201e3d28e4673d465e47e464e86aa698ca0dfa/components/centraldashboard/manifests/base/deployment.yaml#L29-L31\r\nhttps://github.com/kubeflow/kubeflow/blob/master/components/centraldashboard/manifests/base/service.yaml#L9-L11", + "pr_file_module": null + }, + { + "comment_id": "1746930746", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 7639, + "pr_file": "components/centraldashboard/app/server.ts", + "discussion_id": "1746876292", + "commented_code": "@@ -33,6 +39,7 @@ const {\n REGISTRATION_FLOW = \"true\",\n PROMETHEUS_URL = undefined,\n METRICS_DASHBOARD = undefined,\n+ METRICS_PORT = 9100,", + "comment_created_at": "2024-09-06T10:55:37+00:00", + "comment_author": "rgildein", + "comment_body": "What do you say if we change this to be only `METRICS`, which will enable or disable the metrics and port will be always 9100? I don't think there is much need to change the port, but we can use both `METRICS` and `METRICS PORT`. WDYT?", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1746905300", + "pr_number": 7639, + "pr_file": "components/centraldashboard/app/server.ts", + "created_at": "2024-09-06T10:30:11+00:00", + "commented_code": "console.info(`Using Profiles service at ${profilesServiceUrl}`);\n const profilesService = new DefaultApi(profilesServiceUrl);\n const metricsPort: number = Number(METRICS_PORT);\n\n // Custom metrics configuration\n app.use(\n responseTime((req: Request, res: Response, time: number) => {\n restHttpRequestTotal.labels({ method: req.method, status: res.statusCode }).inc();\n restHttpRequestDuration.labels(\n { method: req.method, path: req.baseUrl, status: res.statusCode }).observe(time);\n }),\n );", + "repo_full_name": "kubeflow/kubeflow", + "discussion_comments": [ + { + "comment_id": "1746905300", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 7639, + "pr_file": "components/centraldashboard/app/server.ts", + "discussion_id": "1746905300", + "commented_code": "@@ -52,6 +59,16 @@ async function main() {\n \n console.info(`Using Profiles service at ${profilesServiceUrl}`);\n const profilesService = new DefaultApi(profilesServiceUrl);\n+ const metricsPort: number = Number(METRICS_PORT);\n+\n+ // Custom metrics configuration\n+ app.use(\n+ responseTime((req: Request, res: Response, time: number) => {\n+ restHttpRequestTotal.labels({ method: req.method, status: res.statusCode }).inc();\n+ restHttpRequestDuration.labels(\n+ { method: req.method, path: req.baseUrl, status: res.statusCode }).observe(time);\n+ }),\n+ );", + "comment_created_at": "2024-09-06T10:30:11+00:00", + "comment_author": "orfeas-k", + "comment_body": "Would it make sense to enable/disable the feature according to env variables, similar to the crud-web-apps? And then add the required environment variable in this deployment", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/kubeflow-centralize-configuration-values.md b/_reviewers/kubeflow-centralize-configuration-values.md index 2251ea2..bf194ba 100644 --- a/_reviewers/kubeflow-centralize-configuration-values.md +++ b/_reviewers/kubeflow-centralize-configuration-values.md @@ -57,213 +57,3 @@ constructor(iconRegistry: MatIconRegistry, sanitizer: DomSanitizer) { ``` This approach makes it easier to add new languages or assets without modifying component code, and keeps configuration organized in central locations. - - -[ - { - "discussion_id": "651645749", - "pr_number": 5880, - "pr_file": "components/crud-web-apps/jupyter/frontend/src/app/app.component.ts", - "created_at": "2021-06-15T10:08:27+00:00", - "commented_code": "import { Component } from '@angular/core';\nimport { TranslateService } from '@ngx-translate/core';\n\n@Component({\n selector: 'app-root',\n templateUrl: './app.component.html',\n styleUrls: ['./app.component.scss'],\n})\nexport class AppComponent {\n constructor(private translate: TranslateService) {\n const currentLanguage = this.translate.getBrowserLang();\n const lang = currentLanguage.match(/en|fr/) ? currentLanguage : 'en';", - "repo_full_name": "kubeflow/kubeflow", - "discussion_comments": [ - { - "comment_id": "651645749", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 5880, - "pr_file": "components/crud-web-apps/jupyter/frontend/src/app/app.component.ts", - "discussion_id": "651645749", - "commented_code": "@@ -1,10 +1,16 @@\n import { Component } from '@angular/core';\n+import { TranslateService } from '@ngx-translate/core';\n \n @Component({\n selector: 'app-root',\n templateUrl: './app.component.html',\n styleUrls: ['./app.component.scss'],\n })\n export class AppComponent {\n+ constructor(private translate: TranslateService) {\n+ const currentLanguage = this.translate.getBrowserLang();\n+ const lang = currentLanguage.match(/en|fr/) ? currentLanguage : 'en';", - "comment_created_at": "2021-06-15T10:08:27+00:00", - "comment_author": "davidspek", - "comment_body": "Will new languages need to be added to the `currentLanguage.match(/en|fr/)` section once the JSON files are added?", - "pr_file_module": null - }, - { - "comment_id": "651832266", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 5880, - "pr_file": "components/crud-web-apps/jupyter/frontend/src/app/app.component.ts", - "discussion_id": "651645749", - "commented_code": "@@ -1,10 +1,16 @@\n import { Component } from '@angular/core';\n+import { TranslateService } from '@ngx-translate/core';\n \n @Component({\n selector: 'app-root',\n templateUrl: './app.component.html',\n styleUrls: ['./app.component.scss'],\n })\n export class AppComponent {\n+ constructor(private translate: TranslateService) {\n+ const currentLanguage = this.translate.getBrowserLang();\n+ const lang = currentLanguage.match(/en|fr/) ? currentLanguage : 'en';", - "comment_created_at": "2021-06-15T14:12:05+00:00", - "comment_author": "Jose-Matsuda", - "comment_body": "That is correct. Once a language's JSON file is added the corresponding `app.component.ts` will need to be updated as well", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "596260211", - "pr_number": 5646, - "pr_file": "components/crud-web-apps/jupyter/frontend/src/app/pages/form/form-default/form-image/form-image.component.ts", - "created_at": "2021-03-17T17:57:25+00:00", - "commented_code": "export class FormImageComponent implements OnInit, OnDestroy {\n @Input() parentForm: FormGroup;\n @Input() images: string[];\n @Input() readonly: boolean;\n @Input() jupyterReadonly: boolean;\n @Input() imagesVSCode: string[];\n @Input() vscodeReadonly: boolean;\n @Input() imagesRStudio: string[];\n @Input() rstudioReadonly: boolean;\n\n subs = new Subscription();\n\n constructor() {}\n constructor(iconRegistry: MatIconRegistry, sanitizer: DomSanitizer) {\n iconRegistry.addSvgIcon('jupyterlab', sanitizer.bypassSecurityTrustResourceUrl('static/assets/jupyterlab-wordmark.svg'));", - "repo_full_name": "kubeflow/kubeflow", - "discussion_comments": [ - { - "comment_id": "596260211", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 5646, - "pr_file": "components/crud-web-apps/jupyter/frontend/src/app/pages/form/form-default/form-image/form-image.component.ts", - "discussion_id": "596260211", - "commented_code": "@@ -10,11 +12,19 @@ import { Subscription } from 'rxjs';\n export class FormImageComponent implements OnInit, OnDestroy {\n @Input() parentForm: FormGroup;\n @Input() images: string[];\n- @Input() readonly: boolean;\n+ @Input() jupyterReadonly: boolean;\n+ @Input() imagesVSCode: string[];\n+ @Input() vscodeReadonly: boolean;\n+ @Input() imagesRStudio: string[];\n+ @Input() rstudioReadonly: boolean;\n \n subs = new Subscription();\n \n- constructor() {}\n+ constructor(iconRegistry: MatIconRegistry, sanitizer: DomSanitizer) {\n+ iconRegistry.addSvgIcon('jupyterlab', sanitizer.bypassSecurityTrustResourceUrl('static/assets/jupyterlab-wordmark.svg'));", - "comment_created_at": "2021-03-17T17:57:25+00:00", - "comment_author": "kimwnasptd", - "comment_body": "Could you add the urls for the icons into the environment files instead? https://github.com/kubeflow/kubeflow/tree/master/components/crud-web-apps/jupyter/frontend/src/environments\r\n\r\nLets keep these links in a central place, where we might also add other links in the future like the svgs for the main page", - "pr_file_module": null - }, - { - "comment_id": "597052809", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 5646, - "pr_file": "components/crud-web-apps/jupyter/frontend/src/app/pages/form/form-default/form-image/form-image.component.ts", - "discussion_id": "596260211", - "commented_code": "@@ -10,11 +12,19 @@ import { Subscription } from 'rxjs';\n export class FormImageComponent implements OnInit, OnDestroy {\n @Input() parentForm: FormGroup;\n @Input() images: string[];\n- @Input() readonly: boolean;\n+ @Input() jupyterReadonly: boolean;\n+ @Input() imagesVSCode: string[];\n+ @Input() vscodeReadonly: boolean;\n+ @Input() imagesRStudio: string[];\n+ @Input() rstudioReadonly: boolean;\n \n subs = new Subscription();\n \n- constructor() {}\n+ constructor(iconRegistry: MatIconRegistry, sanitizer: DomSanitizer) {\n+ iconRegistry.addSvgIcon('jupyterlab', sanitizer.bypassSecurityTrustResourceUrl('static/assets/jupyterlab-wordmark.svg'));", - "comment_created_at": "2021-03-18T16:37:25+00:00", - "comment_author": "davidspek", - "comment_body": "Just to be clear from what was discussed above, should the URL in the environment be were the SVG is hosted publicly, or should that first be downloaded by the backend and placed in the `static/assets/` directory, or have we decided to just add the SVGs in the static folder by default? Using the public URL directly means I'll need to deal with CORS. ", - "pr_file_module": null - }, - { - "comment_id": "597147456", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 5646, - "pr_file": "components/crud-web-apps/jupyter/frontend/src/app/pages/form/form-default/form-image/form-image.component.ts", - "discussion_id": "596260211", - "commented_code": "@@ -10,11 +12,19 @@ import { Subscription } from 'rxjs';\n export class FormImageComponent implements OnInit, OnDestroy {\n @Input() parentForm: FormGroup;\n @Input() images: string[];\n- @Input() readonly: boolean;\n+ @Input() jupyterReadonly: boolean;\n+ @Input() imagesVSCode: string[];\n+ @Input() vscodeReadonly: boolean;\n+ @Input() imagesRStudio: string[];\n+ @Input() rstudioReadonly: boolean;\n \n subs = new Subscription();\n \n- constructor() {}\n+ constructor(iconRegistry: MatIconRegistry, sanitizer: DomSanitizer) {\n+ iconRegistry.addSvgIcon('jupyterlab', sanitizer.bypassSecurityTrustResourceUrl('static/assets/jupyterlab-wordmark.svg'));", - "comment_created_at": "2021-03-18T18:42:36+00:00", - "comment_author": "davidspek", - "comment_body": "@kimwnasptd Friendly ping on this in case you missed it, so I know what steps I need to take to get this resolved. ", - "pr_file_module": null - }, - { - "comment_id": "597157251", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 5646, - "pr_file": "components/crud-web-apps/jupyter/frontend/src/app/pages/form/form-default/form-image/form-image.component.ts", - "discussion_id": "596260211", - "commented_code": "@@ -10,11 +12,19 @@ import { Subscription } from 'rxjs';\n export class FormImageComponent implements OnInit, OnDestroy {\n @Input() parentForm: FormGroup;\n @Input() images: string[];\n- @Input() readonly: boolean;\n+ @Input() jupyterReadonly: boolean;\n+ @Input() imagesVSCode: string[];\n+ @Input() vscodeReadonly: boolean;\n+ @Input() imagesRStudio: string[];\n+ @Input() rstudioReadonly: boolean;\n \n subs = new Subscription();\n \n- constructor() {}\n+ constructor(iconRegistry: MatIconRegistry, sanitizer: DomSanitizer) {\n+ iconRegistry.addSvgIcon('jupyterlab', sanitizer.bypassSecurityTrustResourceUrl('static/assets/jupyterlab-wordmark.svg'));", - "comment_created_at": "2021-03-18T18:57:18+00:00", - "comment_author": "kimwnasptd", - "comment_body": "Thanks for the ping! \r\n\r\nGood catch on the CORS issue. Lets dump the SVGs in the `static/assets` folder. And also define the links that the frontend will use in the `environment` files as mentioned above.", - "pr_file_module": null - }, - { - "comment_id": "597174913", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 5646, - "pr_file": "components/crud-web-apps/jupyter/frontend/src/app/pages/form/form-default/form-image/form-image.component.ts", - "discussion_id": "596260211", - "commented_code": "@@ -10,11 +12,19 @@ import { Subscription } from 'rxjs';\n export class FormImageComponent implements OnInit, OnDestroy {\n @Input() parentForm: FormGroup;\n @Input() images: string[];\n- @Input() readonly: boolean;\n+ @Input() jupyterReadonly: boolean;\n+ @Input() imagesVSCode: string[];\n+ @Input() vscodeReadonly: boolean;\n+ @Input() imagesRStudio: string[];\n+ @Input() rstudioReadonly: boolean;\n \n subs = new Subscription();\n \n- constructor() {}\n+ constructor(iconRegistry: MatIconRegistry, sanitizer: DomSanitizer) {\n+ iconRegistry.addSvgIcon('jupyterlab', sanitizer.bypassSecurityTrustResourceUrl('static/assets/jupyterlab-wordmark.svg'));", - "comment_created_at": "2021-03-18T19:23:55+00:00", - "comment_author": "davidspek", - "comment_body": "Cool! This shouldn't take long to implement then.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "306063156", - "pr_number": 3599, - "pr_file": "components/gcp-click-to-deploy/src/Utils.ts", - "created_at": "2019-07-22T22:42:07+00:00", - "commented_code": "import * as bcrypt from 'bcryptjs';\n\nconst GITHUB_BASE_URL =\n 'https://raw.githubusercontent.com/kubeflow/kubeflow/master/';", - "repo_full_name": "kubeflow/kubeflow", - "discussion_comments": [ - { - "comment_id": "306063156", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 3599, - "pr_file": "components/gcp-click-to-deploy/src/Utils.ts", - "discussion_id": "306063156", - "commented_code": "@@ -1,5 +1,8 @@\n import * as bcrypt from 'bcryptjs';\n \n+const GITHUB_BASE_URL =\n+ 'https://raw.githubusercontent.com/kubeflow/kubeflow/master/';\n+", - "comment_created_at": "2019-07-22T22:42:07+00:00", - "comment_author": "kunmingg", - "comment_body": "Config file on kubeflow master (or other branch) might not compatible with kfctl backend.\r\nShall we bake v0.5 & v0.6 config files into image during build? ", - "pr_file_module": null - }, - { - "comment_id": "306085992", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 3599, - "pr_file": "components/gcp-click-to-deploy/src/Utils.ts", - "discussion_id": "306063156", - "commented_code": "@@ -1,5 +1,8 @@\n import * as bcrypt from 'bcryptjs';\n \n+const GITHUB_BASE_URL =\n+ 'https://raw.githubusercontent.com/kubeflow/kubeflow/master/';\n+", - "comment_created_at": "2019-07-23T00:28:03+00:00", - "comment_author": "prodonjs", - "comment_body": "Currently, that's how it's done for the 0.5.0 compatible deployment but Jeremy had a comment to try to fetch it from GitHub. Pulling from master is probably problematic but we can easily point to a branch for each version to ensure that we get a consistent file that should be compatible with the version in question.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1746876292", - "pr_number": 7639, - "pr_file": "components/centraldashboard/app/server.ts", - "created_at": "2024-09-06T10:07:03+00:00", - "commented_code": "REGISTRATION_FLOW = \"true\",\n PROMETHEUS_URL = undefined,\n METRICS_DASHBOARD = undefined,\n METRICS_PORT = 9100,", - "repo_full_name": "kubeflow/kubeflow", - "discussion_comments": [ - { - "comment_id": "1746876292", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 7639, - "pr_file": "components/centraldashboard/app/server.ts", - "discussion_id": "1746876292", - "commented_code": "@@ -33,6 +39,7 @@ const {\n REGISTRATION_FLOW = \"true\",\n PROMETHEUS_URL = undefined,\n METRICS_DASHBOARD = undefined,\n+ METRICS_PORT = 9100,", - "comment_created_at": "2024-09-06T10:07:03+00:00", - "comment_author": "orfeas-k", - "comment_body": "Should we expose this also in the [Dockerfile](https://github.com/rgildein/kubeflow/blob/94995d6a519183fc19a25ba36a5f23cd6b78f681/components/centraldashboard/Dockerfile#L30-L31)?", - "pr_file_module": null - }, - { - "comment_id": "1746910272", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 7639, - "pr_file": "components/centraldashboard/app/server.ts", - "discussion_id": "1746876292", - "commented_code": "@@ -33,6 +39,7 @@ const {\n REGISTRATION_FLOW = \"true\",\n PROMETHEUS_URL = undefined,\n METRICS_DASHBOARD = undefined,\n+ METRICS_PORT = 9100,", - "comment_created_at": "2024-09-06T10:35:09+00:00", - "comment_author": "orfeas-k", - "comment_body": "and probably manifests too?\r\nhttps://github.com/kubeflow/kubeflow/blob/54201e3d28e4673d465e47e464e86aa698ca0dfa/components/centraldashboard/manifests/base/deployment.yaml#L29-L31\r\nhttps://github.com/kubeflow/kubeflow/blob/master/components/centraldashboard/manifests/base/service.yaml#L9-L11", - "pr_file_module": null - }, - { - "comment_id": "1746930746", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 7639, - "pr_file": "components/centraldashboard/app/server.ts", - "discussion_id": "1746876292", - "commented_code": "@@ -33,6 +39,7 @@ const {\n REGISTRATION_FLOW = \"true\",\n PROMETHEUS_URL = undefined,\n METRICS_DASHBOARD = undefined,\n+ METRICS_PORT = 9100,", - "comment_created_at": "2024-09-06T10:55:37+00:00", - "comment_author": "rgildein", - "comment_body": "What do you say if we change this to be only `METRICS`, which will enable or disable the metrics and port will be always 9100? I don't think there is much need to change the port, but we can use both `METRICS` and `METRICS PORT`. WDYT?", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1746905300", - "pr_number": 7639, - "pr_file": "components/centraldashboard/app/server.ts", - "created_at": "2024-09-06T10:30:11+00:00", - "commented_code": "console.info(`Using Profiles service at ${profilesServiceUrl}`);\n const profilesService = new DefaultApi(profilesServiceUrl);\n const metricsPort: number = Number(METRICS_PORT);\n\n // Custom metrics configuration\n app.use(\n responseTime((req: Request, res: Response, time: number) => {\n restHttpRequestTotal.labels({ method: req.method, status: res.statusCode }).inc();\n restHttpRequestDuration.labels(\n { method: req.method, path: req.baseUrl, status: res.statusCode }).observe(time);\n }),\n );", - "repo_full_name": "kubeflow/kubeflow", - "discussion_comments": [ - { - "comment_id": "1746905300", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 7639, - "pr_file": "components/centraldashboard/app/server.ts", - "discussion_id": "1746905300", - "commented_code": "@@ -52,6 +59,16 @@ async function main() {\n \n console.info(`Using Profiles service at ${profilesServiceUrl}`);\n const profilesService = new DefaultApi(profilesServiceUrl);\n+ const metricsPort: number = Number(METRICS_PORT);\n+\n+ // Custom metrics configuration\n+ app.use(\n+ responseTime((req: Request, res: Response, time: number) => {\n+ restHttpRequestTotal.labels({ method: req.method, status: res.statusCode }).inc();\n+ restHttpRequestDuration.labels(\n+ { method: req.method, path: req.baseUrl, status: res.statusCode }).observe(time);\n+ }),\n+ );", - "comment_created_at": "2024-09-06T10:30:11+00:00", - "comment_author": "orfeas-k", - "comment_body": "Would it make sense to enable/disable the feature according to env variables, similar to the crud-web-apps? And then add the required environment variable in this deployment", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/kubeflow-centralize-dependency-configurations.json b/_reviewers/kubeflow-centralize-dependency-configurations.json new file mode 100644 index 0000000..45126ed --- /dev/null +++ b/_reviewers/kubeflow-centralize-dependency-configurations.json @@ -0,0 +1,82 @@ +[ + { + "discussion_id": "1356977840", + "pr_number": 7303, + "pr_file": "components/example-notebook-servers/jupyter-pytorch-full/requirements.txt", + "created_at": "2023-10-12T15:11:47+00:00", + "commented_code": "# kubeflow packages\nkfp==1.6.3\nkfp-server-api==1.6.0\nkfserving==0.5.1\nkfp==2.3.0\nkfp-server-api==2.0.1\nkfserving==0.6.1", + "repo_full_name": "kubeflow/kubeflow", + "discussion_comments": [ + { + "comment_id": "1356977840", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 7303, + "pr_file": "components/example-notebook-servers/jupyter-pytorch-full/requirements.txt", + "discussion_id": "1356977840", + "commented_code": "@@ -1,23 +1,23 @@\n # kubeflow packages\n-kfp==1.6.3\n-kfp-server-api==1.6.0\n-kfserving==0.5.1\n+kfp==2.3.0\n+kfp-server-api==2.0.1\n+kfserving==0.6.1", + "comment_created_at": "2023-10-12T15:11:47+00:00", + "comment_author": "kimwnasptd", + "comment_body": "Should we drop this package and install the corresponding kserve package instead?", + "pr_file_module": null + }, + { + "comment_id": "1360925751", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 7303, + "pr_file": "components/example-notebook-servers/jupyter-pytorch-full/requirements.txt", + "discussion_id": "1356977840", + "commented_code": "@@ -1,23 +1,23 @@\n # kubeflow packages\n-kfp==1.6.3\n-kfp-server-api==1.6.0\n-kfserving==0.5.1\n+kfp==2.3.0\n+kfp-server-api==2.0.1\n+kfserving==0.6.1", + "comment_created_at": "2023-10-16T16:20:55+00:00", + "comment_author": "juliusvonkohout", + "comment_body": "yes drop kfserving and install 0.11.1 from here https://pypi.org/project/kserve/", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "850324947", + "pr_number": 6422, + "pr_file": "components/crud-web-apps/jupyter/backend/requirements.txt", + "created_at": "2022-04-14T10:50:25+00:00", + "commented_code": "kubernetes==v22.6.0", + "repo_full_name": "kubeflow/kubeflow", + "discussion_comments": [ + { + "comment_id": "850324947", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 6422, + "pr_file": "components/crud-web-apps/jupyter/backend/requirements.txt", + "discussion_id": "850324947", + "commented_code": "@@ -1 +1,2 @@\n+kubernetes==v22.6.0", + "comment_created_at": "2022-04-14T10:50:25+00:00", + "comment_author": "kimwnasptd", + "comment_body": "Could you instead make this change into the common code, so that it takes effect for all the web apps?\r\n\r\nhttps://github.com/kubeflow/kubeflow/blob/master/components/crud-web-apps/common/backend/setup.py#L6", + "pr_file_module": null + }, + { + "comment_id": "851208458", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 6422, + "pr_file": "components/crud-web-apps/jupyter/backend/requirements.txt", + "discussion_id": "850324947", + "commented_code": "@@ -1 +1,2 @@\n+kubernetes==v22.6.0", + "comment_created_at": "2022-04-15T10:48:38+00:00", + "comment_author": "benjamintanweihao", + "comment_body": "> @benjamintanweihao could you verify that deleting objects [i.e. Notebooks] also works as expected?\r\n> \r\n> I remember that there was a change in the newer version of the library on how to pass the deletion-policy, but I might be wrong.\r\n\r\nJust tried deleting the notebook and it's working :D. Sure thing, I'll also make the change everywhere.", + "pr_file_module": null + }, + { + "comment_id": "851995813", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 6422, + "pr_file": "components/crud-web-apps/jupyter/backend/requirements.txt", + "discussion_id": "850324947", + "commented_code": "@@ -1 +1,2 @@\n+kubernetes==v22.6.0", + "comment_created_at": "2022-04-18T09:17:19+00:00", + "comment_author": "benjamintanweihao", + "comment_body": "Done. :) ", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/kubeflow-centralize-dependency-configurations.md b/_reviewers/kubeflow-centralize-dependency-configurations.md index 136b6fc..dc3e701 100644 --- a/_reviewers/kubeflow-centralize-dependency-configurations.md +++ b/_reviewers/kubeflow-centralize-dependency-configurations.md @@ -36,87 +36,3 @@ kfserving==0.5.1 # deprecated package # Do: kserve==0.11.1 # recommended successor ``` - - -[ - { - "discussion_id": "1356977840", - "pr_number": 7303, - "pr_file": "components/example-notebook-servers/jupyter-pytorch-full/requirements.txt", - "created_at": "2023-10-12T15:11:47+00:00", - "commented_code": "# kubeflow packages\nkfp==1.6.3\nkfp-server-api==1.6.0\nkfserving==0.5.1\nkfp==2.3.0\nkfp-server-api==2.0.1\nkfserving==0.6.1", - "repo_full_name": "kubeflow/kubeflow", - "discussion_comments": [ - { - "comment_id": "1356977840", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 7303, - "pr_file": "components/example-notebook-servers/jupyter-pytorch-full/requirements.txt", - "discussion_id": "1356977840", - "commented_code": "@@ -1,23 +1,23 @@\n # kubeflow packages\n-kfp==1.6.3\n-kfp-server-api==1.6.0\n-kfserving==0.5.1\n+kfp==2.3.0\n+kfp-server-api==2.0.1\n+kfserving==0.6.1", - "comment_created_at": "2023-10-12T15:11:47+00:00", - "comment_author": "kimwnasptd", - "comment_body": "Should we drop this package and install the corresponding kserve package instead?", - "pr_file_module": null - }, - { - "comment_id": "1360925751", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 7303, - "pr_file": "components/example-notebook-servers/jupyter-pytorch-full/requirements.txt", - "discussion_id": "1356977840", - "commented_code": "@@ -1,23 +1,23 @@\n # kubeflow packages\n-kfp==1.6.3\n-kfp-server-api==1.6.0\n-kfserving==0.5.1\n+kfp==2.3.0\n+kfp-server-api==2.0.1\n+kfserving==0.6.1", - "comment_created_at": "2023-10-16T16:20:55+00:00", - "comment_author": "juliusvonkohout", - "comment_body": "yes drop kfserving and install 0.11.1 from here https://pypi.org/project/kserve/", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "850324947", - "pr_number": 6422, - "pr_file": "components/crud-web-apps/jupyter/backend/requirements.txt", - "created_at": "2022-04-14T10:50:25+00:00", - "commented_code": "kubernetes==v22.6.0", - "repo_full_name": "kubeflow/kubeflow", - "discussion_comments": [ - { - "comment_id": "850324947", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 6422, - "pr_file": "components/crud-web-apps/jupyter/backend/requirements.txt", - "discussion_id": "850324947", - "commented_code": "@@ -1 +1,2 @@\n+kubernetes==v22.6.0", - "comment_created_at": "2022-04-14T10:50:25+00:00", - "comment_author": "kimwnasptd", - "comment_body": "Could you instead make this change into the common code, so that it takes effect for all the web apps?\r\n\r\nhttps://github.com/kubeflow/kubeflow/blob/master/components/crud-web-apps/common/backend/setup.py#L6", - "pr_file_module": null - }, - { - "comment_id": "851208458", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 6422, - "pr_file": "components/crud-web-apps/jupyter/backend/requirements.txt", - "discussion_id": "850324947", - "commented_code": "@@ -1 +1,2 @@\n+kubernetes==v22.6.0", - "comment_created_at": "2022-04-15T10:48:38+00:00", - "comment_author": "benjamintanweihao", - "comment_body": "> @benjamintanweihao could you verify that deleting objects [i.e. Notebooks] also works as expected?\r\n> \r\n> I remember that there was a change in the newer version of the library on how to pass the deletion-policy, but I might be wrong.\r\n\r\nJust tried deleting the notebook and it's working :D. Sure thing, I'll also make the change everywhere.", - "pr_file_module": null - }, - { - "comment_id": "851995813", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 6422, - "pr_file": "components/crud-web-apps/jupyter/backend/requirements.txt", - "discussion_id": "850324947", - "commented_code": "@@ -1 +1,2 @@\n+kubernetes==v22.6.0", - "comment_created_at": "2022-04-18T09:17:19+00:00", - "comment_author": "benjamintanweihao", - "comment_body": "Done. :) ", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/kubeflow-check-before-use.json b/_reviewers/kubeflow-check-before-use.json new file mode 100644 index 0000000..a0ba586 --- /dev/null +++ b/_reviewers/kubeflow-check-before-use.json @@ -0,0 +1,80 @@ +[ + { + "discussion_id": "956151854", + "pr_number": 6628, + "pr_file": "components/notebook-controller/controllers/notebook_controller.go", + "created_at": "2022-08-26T15:16:18+00:00", + "commented_code": "return ctrl.Result{RequeueAfter: culler.GetRequeueTime()}, nil\n}\n\nfunc updateNotebookStatus(r *NotebookReconciler, nb *v1beta1.Notebook,\n\tsts *appsv1.StatefulSet, req ctrl.Request) (bool, error) {\n\n\tlog := r.Log.WithValues(\"notebook\", req.NamespacedName)\n\tctx := context.Background()\n\n\tstatusUpdate := false\n\t// Initialize Notebook CR Status\n\tif nb.Status.Conditions == nil {\n\t\tlog.Info(\"Initializing Notebook CR Status\", \"namespace\", nb.Namespace, \"name\", nb.Name)\n\t\tnewStatus := initializeStatus()\n\t\tnb.Status = newStatus\n\t\tstatusUpdate = true\n\t}\n\n\t// Update the Notebook CR status.readyReplicas if the status is changed\n\tif sts.Status.ReadyReplicas != nb.Status.ReadyReplicas {\n\t\tlog.Info(\"Updating status.ReadyReplicas\", \"namespace\", nb.Namespace, \"name\", nb.Name)\n\t\tnb.Status.ReadyReplicas = sts.Status.ReadyReplicas\n\t\tstatusUpdate = true\n\t}\n\n\t// Check the pod status\n\tpod := &corev1.Pod{}\n\tpodFound := false\n\terr := r.Get(ctx, types.NamespacedName{Name: sts.Name + \"-0\", Namespace: sts.Namespace}, pod)", + "repo_full_name": "kubeflow/kubeflow", + "discussion_comments": [ + { + "comment_id": "956151854", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 6628, + "pr_file": "components/notebook-controller/controllers/notebook_controller.go", + "discussion_id": "956151854", + "commented_code": "@@ -324,6 +269,102 @@ func (r *NotebookReconciler) Reconcile(ctx context.Context, req ctrl.Request) (c\n \treturn ctrl.Result{RequeueAfter: culler.GetRequeueTime()}, nil\n }\n \n+func updateNotebookStatus(r *NotebookReconciler, nb *v1beta1.Notebook,\n+\tsts *appsv1.StatefulSet, req ctrl.Request) (bool, error) {\n+\n+\tlog := r.Log.WithValues(\"notebook\", req.NamespacedName)\n+\tctx := context.Background()\n+\n+\tstatusUpdate := false\n+\t// Initialize Notebook CR Status\n+\tif nb.Status.Conditions == nil {\n+\t\tlog.Info(\"Initializing Notebook CR Status\", \"namespace\", nb.Namespace, \"name\", nb.Name)\n+\t\tnewStatus := initializeStatus()\n+\t\tnb.Status = newStatus\n+\t\tstatusUpdate = true\n+\t}\n+\n+\t// Update the Notebook CR status.readyReplicas if the status is changed\n+\tif sts.Status.ReadyReplicas != nb.Status.ReadyReplicas {\n+\t\tlog.Info(\"Updating status.ReadyReplicas\", \"namespace\", nb.Namespace, \"name\", nb.Name)\n+\t\tnb.Status.ReadyReplicas = sts.Status.ReadyReplicas\n+\t\tstatusUpdate = true\n+\t}\n+\n+\t// Check the pod status\n+\tpod := &corev1.Pod{}\n+\tpodFound := false\n+\terr := r.Get(ctx, types.NamespacedName{Name: sts.Name + \"-0\", Namespace: sts.Namespace}, pod)", + "comment_created_at": "2022-08-26T15:16:18+00:00", + "comment_author": "kimwnasptd", + "comment_body": "Let's move this part completely outside of this function and treat the Pod as the StatefulSet. \r\n\r\nThe main reconciliation logic is responsible for finding the objects (Pod, StatefulSet) and our function will need to check if they are `nil` or not", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "956155664", + "pr_number": 6628, + "pr_file": "components/notebook-controller/controllers/notebook_controller.go", + "created_at": "2022-08-26T15:20:25+00:00", + "commented_code": "}\n\t}\n\n\t// Update the readyReplicas if the status is changed\n\tif foundStateful.Status.ReadyReplicas != instance.Status.ReadyReplicas {\n\t\tlog.Info(\"Updating Status\", \"namespace\", instance.Namespace, \"name\", instance.Name)\n\t\tinstance.Status.ReadyReplicas = foundStateful.Status.ReadyReplicas\n\t\terr = r.Status().Update(ctx, instance)\n\t\tif err != nil {\n\t\t\treturn ctrl.Result{}, err\n\t\t}\n\t}\n\n\t// Check the pod status\n\tpod := &corev1.Pod{}\n\tpodFound := false\n\terr = r.Get(ctx, types.NamespacedName{Name: ss.Name + \"-0\", Namespace: ss.Namespace}, pod)\n\tif err != nil && apierrs.IsNotFound(err) {\n\t\t// This should be reconciled by the StatefulSet\n\t\tlog.Info(\"Pod not found...\")\n\t} else if err != nil {\n\t// Update Notebook CR status\n\tpodFound, err := updateNotebookStatus(r, instance, foundStateful, req)\n\tif err != nil {\n\t\treturn ctrl.Result{}, err\n\t} else {", + "repo_full_name": "kubeflow/kubeflow", + "discussion_comments": [ + { + "comment_id": "956155664", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 6628, + "pr_file": "components/notebook-controller/controllers/notebook_controller.go", + "discussion_id": "956155664", + "commented_code": "@@ -206,65 +206,10 @@ func (r *NotebookReconciler) Reconcile(ctx context.Context, req ctrl.Request) (c\n \t\t}\n \t}\n \n-\t// Update the readyReplicas if the status is changed\n-\tif foundStateful.Status.ReadyReplicas != instance.Status.ReadyReplicas {\n-\t\tlog.Info(\"Updating Status\", \"namespace\", instance.Namespace, \"name\", instance.Name)\n-\t\tinstance.Status.ReadyReplicas = foundStateful.Status.ReadyReplicas\n-\t\terr = r.Status().Update(ctx, instance)\n-\t\tif err != nil {\n-\t\t\treturn ctrl.Result{}, err\n-\t\t}\n-\t}\n-\n-\t// Check the pod status\n-\tpod := &corev1.Pod{}\n-\tpodFound := false\n-\terr = r.Get(ctx, types.NamespacedName{Name: ss.Name + \"-0\", Namespace: ss.Namespace}, pod)\n-\tif err != nil && apierrs.IsNotFound(err) {\n-\t\t// This should be reconciled by the StatefulSet\n-\t\tlog.Info(\"Pod not found...\")\n-\t} else if err != nil {\n+\t// Update Notebook CR status\n+\tpodFound, err := updateNotebookStatus(r, instance, foundStateful, req)\n+\tif err != nil {\n \t\treturn ctrl.Result{}, err\n-\t} else {", + "comment_created_at": "2022-08-26T15:20:25+00:00", + "comment_author": "kimwnasptd", + "comment_body": "This nesting level can be simplified by checking the incoming `pod` object:\r\n\r\n```golang\r\nif pod == nil {\r\n log.Info(\"No pod found. Won't update notebook conditions and containerState\")\r\n return status, nil\r\n}\r\n\r\n```", + "pr_file_module": null + }, + { + "comment_id": "957340345", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 6628, + "pr_file": "components/notebook-controller/controllers/notebook_controller.go", + "discussion_id": "956155664", + "commented_code": "@@ -206,65 +206,10 @@ func (r *NotebookReconciler) Reconcile(ctx context.Context, req ctrl.Request) (c\n \t\t}\n \t}\n \n-\t// Update the readyReplicas if the status is changed\n-\tif foundStateful.Status.ReadyReplicas != instance.Status.ReadyReplicas {\n-\t\tlog.Info(\"Updating Status\", \"namespace\", instance.Namespace, \"name\", instance.Name)\n-\t\tinstance.Status.ReadyReplicas = foundStateful.Status.ReadyReplicas\n-\t\terr = r.Status().Update(ctx, instance)\n-\t\tif err != nil {\n-\t\t\treturn ctrl.Result{}, err\n-\t\t}\n-\t}\n-\n-\t// Check the pod status\n-\tpod := &corev1.Pod{}\n-\tpodFound := false\n-\terr = r.Get(ctx, types.NamespacedName{Name: ss.Name + \"-0\", Namespace: ss.Namespace}, pod)\n-\tif err != nil && apierrs.IsNotFound(err) {\n-\t\t// This should be reconciled by the StatefulSet\n-\t\tlog.Info(\"Pod not found...\")\n-\t} else if err != nil {\n+\t// Update Notebook CR status\n+\tpodFound, err := updateNotebookStatus(r, instance, foundStateful, req)\n+\tif err != nil {\n \t\treturn ctrl.Result{}, err\n-\t} else {", + "comment_created_at": "2022-08-29T13:31:02+00:00", + "comment_author": "apo-ger", + "comment_body": "Fixed!", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "335224658", + "pr_number": 4309, + "pr_file": "bootstrap/pkg/kfapp/gcp/gcp.go", + "created_at": "2019-10-15T23:46:15+00:00", + "commented_code": "return nil\n}\n\n// createK8sServiceAccount creates k8s servicea account with annotation\n// createOrUpdateK8sServiceAccount creates or updates k8s service account with annotation\n// iam.gke.io/gcp-service-account=gsa\n// TODO(lunkai): Ideally the k8s service account should be specified by kustomize.\nfunc createK8sServiceAccount(k8sClientset *clientset.Clientset, namespace string, name string, gsa string) error {\nfunc createOrUpdateK8sServiceAccount(k8sClientset *clientset.Clientset, namespace string, name string, gsa string) error {\n\tlog.Infof(\"Creating service account %v in namespace %v\", name, namespace)\n\t_, err := k8sClientset.CoreV1().ServiceAccounts(namespace).Get(name, metav1.GetOptions{})\n\tcurrSA, err := k8sClientset.CoreV1().ServiceAccounts(namespace).Get(name, metav1.GetOptions{})\n\tif err == nil {\n\t\tlog.Infof(\"Service account already exists...\")\n\t\tif currSA.Annotations == nil {\n\t\t\tcurrSA.Annotations = map[string]string{", + "repo_full_name": "kubeflow/kubeflow", + "discussion_comments": [ + { + "comment_id": "335224658", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 4309, + "pr_file": "bootstrap/pkg/kfapp/gcp/gcp.go", + "discussion_id": "335224658", + "commented_code": "@@ -1675,14 +1709,28 @@ func (gcp *Gcp) setupWorkloadIdentity(namespace string) error {\n \treturn nil\n }\n \n-// createK8sServiceAccount creates k8s servicea account with annotation\n+// createOrUpdateK8sServiceAccount creates or updates k8s service account with annotation\n // iam.gke.io/gcp-service-account=gsa\n // TODO(lunkai): Ideally the k8s service account should be specified by kustomize.\n-func createK8sServiceAccount(k8sClientset *clientset.Clientset, namespace string, name string, gsa string) error {\n+func createOrUpdateK8sServiceAccount(k8sClientset *clientset.Clientset, namespace string, name string, gsa string) error {\n \tlog.Infof(\"Creating service account %v in namespace %v\", name, namespace)\n-\t_, err := k8sClientset.CoreV1().ServiceAccounts(namespace).Get(name, metav1.GetOptions{})\n+\tcurrSA, err := k8sClientset.CoreV1().ServiceAccounts(namespace).Get(name, metav1.GetOptions{})\n \tif err == nil {\n \t\tlog.Infof(\"Service account already exists...\")\n+\t\tif currSA.Annotations == nil {\n+\t\t\tcurrSA.Annotations = map[string]string{", + "comment_created_at": "2019-10-15T23:46:15+00:00", + "comment_author": "jlewi", + "comment_body": "nit: The code would be more robust as\r\n\r\nif currSa.Annotation == nil {\r\n curSA.Annotations = map[string]string{}\r\n}\r\n\r\ncurrSa.Annotations[...] =....\r\n\r\nThis way you only end up writing this once.", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/kubeflow-check-before-use.md b/_reviewers/kubeflow-check-before-use.md index 1085c50..e0cef78 100644 --- a/_reviewers/kubeflow-check-before-use.md +++ b/_reviewers/kubeflow-check-before-use.md @@ -32,85 +32,3 @@ currSA.Annotations[key] = value ``` This pattern reduces nesting levels, makes error conditions explicit, and prevents the most common source of runtime panics in Go. When functions receive objects from external sources or other functions, always check their validity before proceeding with operations on them. - - -[ - { - "discussion_id": "956151854", - "pr_number": 6628, - "pr_file": "components/notebook-controller/controllers/notebook_controller.go", - "created_at": "2022-08-26T15:16:18+00:00", - "commented_code": "return ctrl.Result{RequeueAfter: culler.GetRequeueTime()}, nil\n}\n\nfunc updateNotebookStatus(r *NotebookReconciler, nb *v1beta1.Notebook,\n\tsts *appsv1.StatefulSet, req ctrl.Request) (bool, error) {\n\n\tlog := r.Log.WithValues(\"notebook\", req.NamespacedName)\n\tctx := context.Background()\n\n\tstatusUpdate := false\n\t// Initialize Notebook CR Status\n\tif nb.Status.Conditions == nil {\n\t\tlog.Info(\"Initializing Notebook CR Status\", \"namespace\", nb.Namespace, \"name\", nb.Name)\n\t\tnewStatus := initializeStatus()\n\t\tnb.Status = newStatus\n\t\tstatusUpdate = true\n\t}\n\n\t// Update the Notebook CR status.readyReplicas if the status is changed\n\tif sts.Status.ReadyReplicas != nb.Status.ReadyReplicas {\n\t\tlog.Info(\"Updating status.ReadyReplicas\", \"namespace\", nb.Namespace, \"name\", nb.Name)\n\t\tnb.Status.ReadyReplicas = sts.Status.ReadyReplicas\n\t\tstatusUpdate = true\n\t}\n\n\t// Check the pod status\n\tpod := &corev1.Pod{}\n\tpodFound := false\n\terr := r.Get(ctx, types.NamespacedName{Name: sts.Name + \"-0\", Namespace: sts.Namespace}, pod)", - "repo_full_name": "kubeflow/kubeflow", - "discussion_comments": [ - { - "comment_id": "956151854", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 6628, - "pr_file": "components/notebook-controller/controllers/notebook_controller.go", - "discussion_id": "956151854", - "commented_code": "@@ -324,6 +269,102 @@ func (r *NotebookReconciler) Reconcile(ctx context.Context, req ctrl.Request) (c\n \treturn ctrl.Result{RequeueAfter: culler.GetRequeueTime()}, nil\n }\n \n+func updateNotebookStatus(r *NotebookReconciler, nb *v1beta1.Notebook,\n+\tsts *appsv1.StatefulSet, req ctrl.Request) (bool, error) {\n+\n+\tlog := r.Log.WithValues(\"notebook\", req.NamespacedName)\n+\tctx := context.Background()\n+\n+\tstatusUpdate := false\n+\t// Initialize Notebook CR Status\n+\tif nb.Status.Conditions == nil {\n+\t\tlog.Info(\"Initializing Notebook CR Status\", \"namespace\", nb.Namespace, \"name\", nb.Name)\n+\t\tnewStatus := initializeStatus()\n+\t\tnb.Status = newStatus\n+\t\tstatusUpdate = true\n+\t}\n+\n+\t// Update the Notebook CR status.readyReplicas if the status is changed\n+\tif sts.Status.ReadyReplicas != nb.Status.ReadyReplicas {\n+\t\tlog.Info(\"Updating status.ReadyReplicas\", \"namespace\", nb.Namespace, \"name\", nb.Name)\n+\t\tnb.Status.ReadyReplicas = sts.Status.ReadyReplicas\n+\t\tstatusUpdate = true\n+\t}\n+\n+\t// Check the pod status\n+\tpod := &corev1.Pod{}\n+\tpodFound := false\n+\terr := r.Get(ctx, types.NamespacedName{Name: sts.Name + \"-0\", Namespace: sts.Namespace}, pod)", - "comment_created_at": "2022-08-26T15:16:18+00:00", - "comment_author": "kimwnasptd", - "comment_body": "Let's move this part completely outside of this function and treat the Pod as the StatefulSet. \r\n\r\nThe main reconciliation logic is responsible for finding the objects (Pod, StatefulSet) and our function will need to check if they are `nil` or not", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "956155664", - "pr_number": 6628, - "pr_file": "components/notebook-controller/controllers/notebook_controller.go", - "created_at": "2022-08-26T15:20:25+00:00", - "commented_code": "}\n\t}\n\n\t// Update the readyReplicas if the status is changed\n\tif foundStateful.Status.ReadyReplicas != instance.Status.ReadyReplicas {\n\t\tlog.Info(\"Updating Status\", \"namespace\", instance.Namespace, \"name\", instance.Name)\n\t\tinstance.Status.ReadyReplicas = foundStateful.Status.ReadyReplicas\n\t\terr = r.Status().Update(ctx, instance)\n\t\tif err != nil {\n\t\t\treturn ctrl.Result{}, err\n\t\t}\n\t}\n\n\t// Check the pod status\n\tpod := &corev1.Pod{}\n\tpodFound := false\n\terr = r.Get(ctx, types.NamespacedName{Name: ss.Name + \"-0\", Namespace: ss.Namespace}, pod)\n\tif err != nil && apierrs.IsNotFound(err) {\n\t\t// This should be reconciled by the StatefulSet\n\t\tlog.Info(\"Pod not found...\")\n\t} else if err != nil {\n\t// Update Notebook CR status\n\tpodFound, err := updateNotebookStatus(r, instance, foundStateful, req)\n\tif err != nil {\n\t\treturn ctrl.Result{}, err\n\t} else {", - "repo_full_name": "kubeflow/kubeflow", - "discussion_comments": [ - { - "comment_id": "956155664", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 6628, - "pr_file": "components/notebook-controller/controllers/notebook_controller.go", - "discussion_id": "956155664", - "commented_code": "@@ -206,65 +206,10 @@ func (r *NotebookReconciler) Reconcile(ctx context.Context, req ctrl.Request) (c\n \t\t}\n \t}\n \n-\t// Update the readyReplicas if the status is changed\n-\tif foundStateful.Status.ReadyReplicas != instance.Status.ReadyReplicas {\n-\t\tlog.Info(\"Updating Status\", \"namespace\", instance.Namespace, \"name\", instance.Name)\n-\t\tinstance.Status.ReadyReplicas = foundStateful.Status.ReadyReplicas\n-\t\terr = r.Status().Update(ctx, instance)\n-\t\tif err != nil {\n-\t\t\treturn ctrl.Result{}, err\n-\t\t}\n-\t}\n-\n-\t// Check the pod status\n-\tpod := &corev1.Pod{}\n-\tpodFound := false\n-\terr = r.Get(ctx, types.NamespacedName{Name: ss.Name + \"-0\", Namespace: ss.Namespace}, pod)\n-\tif err != nil && apierrs.IsNotFound(err) {\n-\t\t// This should be reconciled by the StatefulSet\n-\t\tlog.Info(\"Pod not found...\")\n-\t} else if err != nil {\n+\t// Update Notebook CR status\n+\tpodFound, err := updateNotebookStatus(r, instance, foundStateful, req)\n+\tif err != nil {\n \t\treturn ctrl.Result{}, err\n-\t} else {", - "comment_created_at": "2022-08-26T15:20:25+00:00", - "comment_author": "kimwnasptd", - "comment_body": "This nesting level can be simplified by checking the incoming `pod` object:\r\n\r\n```golang\r\nif pod == nil {\r\n log.Info(\"No pod found. Won't update notebook conditions and containerState\")\r\n return status, nil\r\n}\r\n\r\n```", - "pr_file_module": null - }, - { - "comment_id": "957340345", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 6628, - "pr_file": "components/notebook-controller/controllers/notebook_controller.go", - "discussion_id": "956155664", - "commented_code": "@@ -206,65 +206,10 @@ func (r *NotebookReconciler) Reconcile(ctx context.Context, req ctrl.Request) (c\n \t\t}\n \t}\n \n-\t// Update the readyReplicas if the status is changed\n-\tif foundStateful.Status.ReadyReplicas != instance.Status.ReadyReplicas {\n-\t\tlog.Info(\"Updating Status\", \"namespace\", instance.Namespace, \"name\", instance.Name)\n-\t\tinstance.Status.ReadyReplicas = foundStateful.Status.ReadyReplicas\n-\t\terr = r.Status().Update(ctx, instance)\n-\t\tif err != nil {\n-\t\t\treturn ctrl.Result{}, err\n-\t\t}\n-\t}\n-\n-\t// Check the pod status\n-\tpod := &corev1.Pod{}\n-\tpodFound := false\n-\terr = r.Get(ctx, types.NamespacedName{Name: ss.Name + \"-0\", Namespace: ss.Namespace}, pod)\n-\tif err != nil && apierrs.IsNotFound(err) {\n-\t\t// This should be reconciled by the StatefulSet\n-\t\tlog.Info(\"Pod not found...\")\n-\t} else if err != nil {\n+\t// Update Notebook CR status\n+\tpodFound, err := updateNotebookStatus(r, instance, foundStateful, req)\n+\tif err != nil {\n \t\treturn ctrl.Result{}, err\n-\t} else {", - "comment_created_at": "2022-08-29T13:31:02+00:00", - "comment_author": "apo-ger", - "comment_body": "Fixed!", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "335224658", - "pr_number": 4309, - "pr_file": "bootstrap/pkg/kfapp/gcp/gcp.go", - "created_at": "2019-10-15T23:46:15+00:00", - "commented_code": "return nil\n}\n\n// createK8sServiceAccount creates k8s servicea account with annotation\n// createOrUpdateK8sServiceAccount creates or updates k8s service account with annotation\n// iam.gke.io/gcp-service-account=gsa\n// TODO(lunkai): Ideally the k8s service account should be specified by kustomize.\nfunc createK8sServiceAccount(k8sClientset *clientset.Clientset, namespace string, name string, gsa string) error {\nfunc createOrUpdateK8sServiceAccount(k8sClientset *clientset.Clientset, namespace string, name string, gsa string) error {\n\tlog.Infof(\"Creating service account %v in namespace %v\", name, namespace)\n\t_, err := k8sClientset.CoreV1().ServiceAccounts(namespace).Get(name, metav1.GetOptions{})\n\tcurrSA, err := k8sClientset.CoreV1().ServiceAccounts(namespace).Get(name, metav1.GetOptions{})\n\tif err == nil {\n\t\tlog.Infof(\"Service account already exists...\")\n\t\tif currSA.Annotations == nil {\n\t\t\tcurrSA.Annotations = map[string]string{", - "repo_full_name": "kubeflow/kubeflow", - "discussion_comments": [ - { - "comment_id": "335224658", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 4309, - "pr_file": "bootstrap/pkg/kfapp/gcp/gcp.go", - "discussion_id": "335224658", - "commented_code": "@@ -1675,14 +1709,28 @@ func (gcp *Gcp) setupWorkloadIdentity(namespace string) error {\n \treturn nil\n }\n \n-// createK8sServiceAccount creates k8s servicea account with annotation\n+// createOrUpdateK8sServiceAccount creates or updates k8s service account with annotation\n // iam.gke.io/gcp-service-account=gsa\n // TODO(lunkai): Ideally the k8s service account should be specified by kustomize.\n-func createK8sServiceAccount(k8sClientset *clientset.Clientset, namespace string, name string, gsa string) error {\n+func createOrUpdateK8sServiceAccount(k8sClientset *clientset.Clientset, namespace string, name string, gsa string) error {\n \tlog.Infof(\"Creating service account %v in namespace %v\", name, namespace)\n-\t_, err := k8sClientset.CoreV1().ServiceAccounts(namespace).Get(name, metav1.GetOptions{})\n+\tcurrSA, err := k8sClientset.CoreV1().ServiceAccounts(namespace).Get(name, metav1.GetOptions{})\n \tif err == nil {\n \t\tlog.Infof(\"Service account already exists...\")\n+\t\tif currSA.Annotations == nil {\n+\t\t\tcurrSA.Annotations = map[string]string{", - "comment_created_at": "2019-10-15T23:46:15+00:00", - "comment_author": "jlewi", - "comment_body": "nit: The code would be more robust as\r\n\r\nif currSa.Annotation == nil {\r\n curSA.Annotations = map[string]string{}\r\n}\r\n\r\ncurrSa.Annotations[...] =....\r\n\r\nThis way you only end up writing this once.", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/kubeflow-component-agnostic-styling-principles.json b/_reviewers/kubeflow-component-agnostic-styling-principles.json new file mode 100644 index 0000000..581a97a --- /dev/null +++ b/_reviewers/kubeflow-component-agnostic-styling-principles.json @@ -0,0 +1,46 @@ +[ + { + "discussion_id": "1132106601", + "pr_number": 6952, + "pr_file": "components/crud-web-apps/common/frontend/kubeflow-common-lib/projects/kubeflow/src/lib/status-info/status-info.component.scss", + "created_at": "2023-03-10T08:55:50+00:00", + "commented_code": ".panel-body {\n background-color: rgba(0, 0, 0, 0.05);\n border-radius: 4px;\n padding: 1px;\n display: flex;\n font-size: 14px;\n margin-top: 5px;", + "repo_full_name": "kubeflow/kubeflow", + "discussion_comments": [ + { + "comment_id": "1132106601", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 6952, + "pr_file": "components/crud-web-apps/common/frontend/kubeflow-common-lib/projects/kubeflow/src/lib/status-info/status-info.component.scss", + "discussion_id": "1132106601", + "commented_code": "@@ -0,0 +1,13 @@\n+.panel-body {\n+ background-color: rgba(0, 0, 0, 0.05);\n+ border-radius: 4px;\n+ padding: 1px;\n+ display: flex;\n+ font-size: 14px;\n+ margin-top: 5px;", + "comment_created_at": "2023-03-10T08:55:50+00:00", + "comment_author": "tasos-ale", + "comment_body": "You have created a component that we might use in other WAs so it is better not to apply any styles that change its position. I suggest moving this margin in the component that is using the `status-info`.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1153302280", + "pr_number": 7071, + "pr_file": "components/centraldashboard-angular/frontend/src/app/pages/namespace-needed-page/namespace-needed-page.component.scss", + "created_at": "2023-03-30T13:54:06+00:00", + "commented_code": ":host {\n flex: 1;", + "repo_full_name": "kubeflow/kubeflow", + "discussion_comments": [ + { + "comment_id": "1153302280", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 7071, + "pr_file": "components/centraldashboard-angular/frontend/src/app/pages/namespace-needed-page/namespace-needed-page.component.scss", + "discussion_id": "1153302280", + "commented_code": "@@ -0,0 +1,3 @@\n+:host {\n+ flex: 1;", + "comment_created_at": "2023-03-30T13:54:06+00:00", + "comment_author": "tasos-ale", + "comment_body": "```suggestion\r\n height: 100%;\r\n```\r\nSince we can have the same effect with `height` we can avoid assuming that component's parent has `display:flex`.", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/kubeflow-component-agnostic-styling-principles.md b/_reviewers/kubeflow-component-agnostic-styling-principles.md index 5cd9f5f..1a5ced3 100644 --- a/_reviewers/kubeflow-component-agnostic-styling-principles.md +++ b/_reviewers/kubeflow-component-agnostic-styling-principles.md @@ -37,51 +37,3 @@ Create reusable components with styles that don't make assumptions about parent ``` This approach ensures components remain flexible and can be reused in different contexts without layout issues. - - -[ - { - "discussion_id": "1132106601", - "pr_number": 6952, - "pr_file": "components/crud-web-apps/common/frontend/kubeflow-common-lib/projects/kubeflow/src/lib/status-info/status-info.component.scss", - "created_at": "2023-03-10T08:55:50+00:00", - "commented_code": ".panel-body {\n background-color: rgba(0, 0, 0, 0.05);\n border-radius: 4px;\n padding: 1px;\n display: flex;\n font-size: 14px;\n margin-top: 5px;", - "repo_full_name": "kubeflow/kubeflow", - "discussion_comments": [ - { - "comment_id": "1132106601", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 6952, - "pr_file": "components/crud-web-apps/common/frontend/kubeflow-common-lib/projects/kubeflow/src/lib/status-info/status-info.component.scss", - "discussion_id": "1132106601", - "commented_code": "@@ -0,0 +1,13 @@\n+.panel-body {\n+ background-color: rgba(0, 0, 0, 0.05);\n+ border-radius: 4px;\n+ padding: 1px;\n+ display: flex;\n+ font-size: 14px;\n+ margin-top: 5px;", - "comment_created_at": "2023-03-10T08:55:50+00:00", - "comment_author": "tasos-ale", - "comment_body": "You have created a component that we might use in other WAs so it is better not to apply any styles that change its position. I suggest moving this margin in the component that is using the `status-info`.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1153302280", - "pr_number": 7071, - "pr_file": "components/centraldashboard-angular/frontend/src/app/pages/namespace-needed-page/namespace-needed-page.component.scss", - "created_at": "2023-03-30T13:54:06+00:00", - "commented_code": ":host {\n flex: 1;", - "repo_full_name": "kubeflow/kubeflow", - "discussion_comments": [ - { - "comment_id": "1153302280", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 7071, - "pr_file": "components/centraldashboard-angular/frontend/src/app/pages/namespace-needed-page/namespace-needed-page.component.scss", - "discussion_id": "1153302280", - "commented_code": "@@ -0,0 +1,3 @@\n+:host {\n+ flex: 1;", - "comment_created_at": "2023-03-30T13:54:06+00:00", - "comment_author": "tasos-ale", - "comment_body": "```suggestion\r\n height: 100%;\r\n```\r\nSince we can have the same effect with `height` we can avoid assuming that component's parent has `display:flex`.", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/kubeflow-configurable-security-with-defaults.json b/_reviewers/kubeflow-configurable-security-with-defaults.json new file mode 100644 index 0000000..0557c87 --- /dev/null +++ b/_reviewers/kubeflow-configurable-security-with-defaults.json @@ -0,0 +1,72 @@ +[ + { + "discussion_id": "547897084", + "pr_number": 5472, + "pr_file": "components/crud-web-apps/common/backend/kubeflow/kubeflow/crud_backend/csrf.py", + "created_at": "2020-12-23T10:49:30+00:00", + "commented_code": "\"\"\"\nCross Site Request Forgery Blueprint.\n\nThis module provides a Flask blueprint that implements protection against\nrequest forgeries from other sites. Currently, it is only meant to be used with\nan AJAX frontend, not with server-side rendered forms.\n\nThe module implements the following protecting measures against CSRF:\n- Double Submit Cookie.\n- Custom HTTP Headers.\n- SameSite cookie attribute.\n\nTo elaborate, the `Double Submit Cookie` procedure looks like the following:\n1. Browser requests `index.html`, which contains the compiled Javascript.\n2. Backend sets the `CSRF_COOKIE` by calling `set_cookie`. If the cookie\n already exists, `set_cookie` overrides it with a new one. The cookie\n contains a random value.\n3. Frontend (`index.html`) is loaded and starts making requests to the backend.\n For every request, the frontend reads the `CSRF_COOKIE` value and adds a\n `CSRF_HEADER` with the same value.\n4. Backend checks that the value of `CSRF_COOKIE` matches the value of\n `CSRF_HEADER`. All endpoints are checked, except the index endpoint and\n endpoints with safe methods (GET, HEAD, OPTIONS, TRACE).\n\nCustom Headers (`CSRF_HEADER`) provide an extra layer of protection, as\ncross-origin requests cannot include custom headers (assuming CORS is not\nmisconfigured) because of the Same-Origin policy.\n\nThe SameSite cookie attribute provides another layer of protection, but may\nimpede usability so it is configurable. This attribute controls whether a", + "repo_full_name": "kubeflow/kubeflow", + "discussion_comments": [ + { + "comment_id": "547897084", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 5472, + "pr_file": "components/crud-web-apps/common/backend/kubeflow/kubeflow/crud_backend/csrf.py", + "discussion_id": "547897084", + "commented_code": "@@ -0,0 +1,108 @@\n+\"\"\"\n+Cross Site Request Forgery Blueprint.\n+\n+This module provides a Flask blueprint that implements protection against\n+request forgeries from other sites. Currently, it is only meant to be used with\n+an AJAX frontend, not with server-side rendered forms.\n+\n+The module implements the following protecting measures against CSRF:\n+- Double Submit Cookie.\n+- Custom HTTP Headers.\n+- SameSite cookie attribute.\n+\n+To elaborate, the `Double Submit Cookie` procedure looks like the following:\n+1. Browser requests `index.html`, which contains the compiled Javascript.\n+2. Backend sets the `CSRF_COOKIE` by calling `set_cookie`. If the cookie\n+ already exists, `set_cookie` overrides it with a new one. The cookie\n+ contains a random value.\n+3. Frontend (`index.html`) is loaded and starts making requests to the backend.\n+ For every request, the frontend reads the `CSRF_COOKIE` value and adds a\n+ `CSRF_HEADER` with the same value.\n+4. Backend checks that the value of `CSRF_COOKIE` matches the value of\n+ `CSRF_HEADER`. All endpoints are checked, except the index endpoint and\n+ endpoints with safe methods (GET, HEAD, OPTIONS, TRACE).\n+\n+Custom Headers (`CSRF_HEADER`) provide an extra layer of protection, as\n+cross-origin requests cannot include custom headers (assuming CORS is not\n+misconfigured) because of the Same-Origin policy.\n+\n+The SameSite cookie attribute provides another layer of protection, but may\n+impede usability so it is configurable. This attribute controls whether a", + "comment_created_at": "2020-12-23T10:49:30+00:00", + "comment_author": "yanniszark", + "comment_body": "The SameSite attribute is currently not configurable. Should we change the docstring to reflect that? I see that you've added a note that we may need to make it configurable in the future.", + "pr_file_module": null + }, + { + "comment_id": "547931071", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 5472, + "pr_file": "components/crud-web-apps/common/backend/kubeflow/kubeflow/crud_backend/csrf.py", + "discussion_id": "547897084", + "commented_code": "@@ -0,0 +1,108 @@\n+\"\"\"\n+Cross Site Request Forgery Blueprint.\n+\n+This module provides a Flask blueprint that implements protection against\n+request forgeries from other sites. Currently, it is only meant to be used with\n+an AJAX frontend, not with server-side rendered forms.\n+\n+The module implements the following protecting measures against CSRF:\n+- Double Submit Cookie.\n+- Custom HTTP Headers.\n+- SameSite cookie attribute.\n+\n+To elaborate, the `Double Submit Cookie` procedure looks like the following:\n+1. Browser requests `index.html`, which contains the compiled Javascript.\n+2. Backend sets the `CSRF_COOKIE` by calling `set_cookie`. If the cookie\n+ already exists, `set_cookie` overrides it with a new one. The cookie\n+ contains a random value.\n+3. Frontend (`index.html`) is loaded and starts making requests to the backend.\n+ For every request, the frontend reads the `CSRF_COOKIE` value and adds a\n+ `CSRF_HEADER` with the same value.\n+4. Backend checks that the value of `CSRF_COOKIE` matches the value of\n+ `CSRF_HEADER`. All endpoints are checked, except the index endpoint and\n+ endpoints with safe methods (GET, HEAD, OPTIONS, TRACE).\n+\n+Custom Headers (`CSRF_HEADER`) provide an extra layer of protection, as\n+cross-origin requests cannot include custom headers (assuming CORS is not\n+misconfigured) because of the Same-Origin policy.\n+\n+The SameSite cookie attribute provides another layer of protection, but may\n+impede usability so it is configurable. This attribute controls whether a", + "comment_created_at": "2020-12-23T12:17:56+00:00", + "comment_author": "kimwnasptd", + "comment_body": "Lets make it configurable then, it should be a small change.\r\n\r\nI'll make it look for a `CSRF_SAMESITE` env var with a default value to `Strict`. If the value provided by the user is not `Strict`, `Lax` or `None` then again it will default to `Strict`\r\nhttps://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Set-Cookie#SameSite", + "pr_file_module": null + }, + { + "comment_id": "547936642", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 5472, + "pr_file": "components/crud-web-apps/common/backend/kubeflow/kubeflow/crud_backend/csrf.py", + "discussion_id": "547897084", + "commented_code": "@@ -0,0 +1,108 @@\n+\"\"\"\n+Cross Site Request Forgery Blueprint.\n+\n+This module provides a Flask blueprint that implements protection against\n+request forgeries from other sites. Currently, it is only meant to be used with\n+an AJAX frontend, not with server-side rendered forms.\n+\n+The module implements the following protecting measures against CSRF:\n+- Double Submit Cookie.\n+- Custom HTTP Headers.\n+- SameSite cookie attribute.\n+\n+To elaborate, the `Double Submit Cookie` procedure looks like the following:\n+1. Browser requests `index.html`, which contains the compiled Javascript.\n+2. Backend sets the `CSRF_COOKIE` by calling `set_cookie`. If the cookie\n+ already exists, `set_cookie` overrides it with a new one. The cookie\n+ contains a random value.\n+3. Frontend (`index.html`) is loaded and starts making requests to the backend.\n+ For every request, the frontend reads the `CSRF_COOKIE` value and adds a\n+ `CSRF_HEADER` with the same value.\n+4. Backend checks that the value of `CSRF_COOKIE` matches the value of\n+ `CSRF_HEADER`. All endpoints are checked, except the index endpoint and\n+ endpoints with safe methods (GET, HEAD, OPTIONS, TRACE).\n+\n+Custom Headers (`CSRF_HEADER`) provide an extra layer of protection, as\n+cross-origin requests cannot include custom headers (assuming CORS is not\n+misconfigured) because of the Same-Origin policy.\n+\n+The SameSite cookie attribute provides another layer of protection, but may\n+impede usability so it is configurable. This attribute controls whether a", + "comment_created_at": "2020-12-23T12:32:18+00:00", + "comment_author": "kimwnasptd", + "comment_body": "force pushed. The PR should only contain one commit responsible for the CSRF functionality.\r\nI also made the `SameSite` attribute of the cookie configurable via the `CSRF_SAMESITE` variable.\r\n\r\nI also think a good next step would be to document the ENV Vars that each web app uses in the respective READMEs. This will make clear to the users how they can configure each app.", + "pr_file_module": null + }, + { + "comment_id": "548001562", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 5472, + "pr_file": "components/crud-web-apps/common/backend/kubeflow/kubeflow/crud_backend/csrf.py", + "discussion_id": "547897084", + "commented_code": "@@ -0,0 +1,108 @@\n+\"\"\"\n+Cross Site Request Forgery Blueprint.\n+\n+This module provides a Flask blueprint that implements protection against\n+request forgeries from other sites. Currently, it is only meant to be used with\n+an AJAX frontend, not with server-side rendered forms.\n+\n+The module implements the following protecting measures against CSRF:\n+- Double Submit Cookie.\n+- Custom HTTP Headers.\n+- SameSite cookie attribute.\n+\n+To elaborate, the `Double Submit Cookie` procedure looks like the following:\n+1. Browser requests `index.html`, which contains the compiled Javascript.\n+2. Backend sets the `CSRF_COOKIE` by calling `set_cookie`. If the cookie\n+ already exists, `set_cookie` overrides it with a new one. The cookie\n+ contains a random value.\n+3. Frontend (`index.html`) is loaded and starts making requests to the backend.\n+ For every request, the frontend reads the `CSRF_COOKIE` value and adds a\n+ `CSRF_HEADER` with the same value.\n+4. Backend checks that the value of `CSRF_COOKIE` matches the value of\n+ `CSRF_HEADER`. All endpoints are checked, except the index endpoint and\n+ endpoints with safe methods (GET, HEAD, OPTIONS, TRACE).\n+\n+Custom Headers (`CSRF_HEADER`) provide an extra layer of protection, as\n+cross-origin requests cannot include custom headers (assuming CORS is not\n+misconfigured) because of the Same-Origin policy.\n+\n+The SameSite cookie attribute provides another layer of protection, but may\n+impede usability so it is configurable. This attribute controls whether a", + "comment_created_at": "2020-12-23T15:04:14+00:00", + "comment_author": "yanniszark", + "comment_body": "That's great! One question: maybe it's better to start the env var section in the README now and document this specific settings with this PR, which introduces it. You can add a note that the section is a work in-progress so that readers don't get confused. What do you think?\r\n", + "pr_file_module": null + }, + { + "comment_id": "548016953", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 5472, + "pr_file": "components/crud-web-apps/common/backend/kubeflow/kubeflow/crud_backend/csrf.py", + "discussion_id": "547897084", + "commented_code": "@@ -0,0 +1,108 @@\n+\"\"\"\n+Cross Site Request Forgery Blueprint.\n+\n+This module provides a Flask blueprint that implements protection against\n+request forgeries from other sites. Currently, it is only meant to be used with\n+an AJAX frontend, not with server-side rendered forms.\n+\n+The module implements the following protecting measures against CSRF:\n+- Double Submit Cookie.\n+- Custom HTTP Headers.\n+- SameSite cookie attribute.\n+\n+To elaborate, the `Double Submit Cookie` procedure looks like the following:\n+1. Browser requests `index.html`, which contains the compiled Javascript.\n+2. Backend sets the `CSRF_COOKIE` by calling `set_cookie`. If the cookie\n+ already exists, `set_cookie` overrides it with a new one. The cookie\n+ contains a random value.\n+3. Frontend (`index.html`) is loaded and starts making requests to the backend.\n+ For every request, the frontend reads the `CSRF_COOKIE` value and adds a\n+ `CSRF_HEADER` with the same value.\n+4. Backend checks that the value of `CSRF_COOKIE` matches the value of\n+ `CSRF_HEADER`. All endpoints are checked, except the index endpoint and\n+ endpoints with safe methods (GET, HEAD, OPTIONS, TRACE).\n+\n+Custom Headers (`CSRF_HEADER`) provide an extra layer of protection, as\n+cross-origin requests cannot include custom headers (assuming CORS is not\n+misconfigured) because of the Same-Origin policy.\n+\n+The SameSite cookie attribute provides another layer of protection, but may\n+impede usability so it is configurable. This attribute controls whether a", + "comment_created_at": "2020-12-23T15:36:34+00:00", + "comment_author": "kimwnasptd", + "comment_body": "ACK! I modified the README of the common code.\r\nI also created #5483 as an umbrella issue for the web apps", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/kubeflow-configurable-security-with-defaults.md b/_reviewers/kubeflow-configurable-security-with-defaults.md index 93e7270..6349185 100644 --- a/_reviewers/kubeflow-configurable-security-with-defaults.md +++ b/_reviewers/kubeflow-configurable-security-with-defaults.md @@ -47,77 +47,3 @@ Document this configuration in your README: ``` This approach balances security with flexibility, allowing secure operation in various environments while maintaining strong defaults. - - -[ - { - "discussion_id": "547897084", - "pr_number": 5472, - "pr_file": "components/crud-web-apps/common/backend/kubeflow/kubeflow/crud_backend/csrf.py", - "created_at": "2020-12-23T10:49:30+00:00", - "commented_code": "\"\"\"\nCross Site Request Forgery Blueprint.\n\nThis module provides a Flask blueprint that implements protection against\nrequest forgeries from other sites. Currently, it is only meant to be used with\nan AJAX frontend, not with server-side rendered forms.\n\nThe module implements the following protecting measures against CSRF:\n- Double Submit Cookie.\n- Custom HTTP Headers.\n- SameSite cookie attribute.\n\nTo elaborate, the `Double Submit Cookie` procedure looks like the following:\n1. Browser requests `index.html`, which contains the compiled Javascript.\n2. Backend sets the `CSRF_COOKIE` by calling `set_cookie`. If the cookie\n already exists, `set_cookie` overrides it with a new one. The cookie\n contains a random value.\n3. Frontend (`index.html`) is loaded and starts making requests to the backend.\n For every request, the frontend reads the `CSRF_COOKIE` value and adds a\n `CSRF_HEADER` with the same value.\n4. Backend checks that the value of `CSRF_COOKIE` matches the value of\n `CSRF_HEADER`. All endpoints are checked, except the index endpoint and\n endpoints with safe methods (GET, HEAD, OPTIONS, TRACE).\n\nCustom Headers (`CSRF_HEADER`) provide an extra layer of protection, as\ncross-origin requests cannot include custom headers (assuming CORS is not\nmisconfigured) because of the Same-Origin policy.\n\nThe SameSite cookie attribute provides another layer of protection, but may\nimpede usability so it is configurable. This attribute controls whether a", - "repo_full_name": "kubeflow/kubeflow", - "discussion_comments": [ - { - "comment_id": "547897084", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 5472, - "pr_file": "components/crud-web-apps/common/backend/kubeflow/kubeflow/crud_backend/csrf.py", - "discussion_id": "547897084", - "commented_code": "@@ -0,0 +1,108 @@\n+\"\"\"\n+Cross Site Request Forgery Blueprint.\n+\n+This module provides a Flask blueprint that implements protection against\n+request forgeries from other sites. Currently, it is only meant to be used with\n+an AJAX frontend, not with server-side rendered forms.\n+\n+The module implements the following protecting measures against CSRF:\n+- Double Submit Cookie.\n+- Custom HTTP Headers.\n+- SameSite cookie attribute.\n+\n+To elaborate, the `Double Submit Cookie` procedure looks like the following:\n+1. Browser requests `index.html`, which contains the compiled Javascript.\n+2. Backend sets the `CSRF_COOKIE` by calling `set_cookie`. If the cookie\n+ already exists, `set_cookie` overrides it with a new one. The cookie\n+ contains a random value.\n+3. Frontend (`index.html`) is loaded and starts making requests to the backend.\n+ For every request, the frontend reads the `CSRF_COOKIE` value and adds a\n+ `CSRF_HEADER` with the same value.\n+4. Backend checks that the value of `CSRF_COOKIE` matches the value of\n+ `CSRF_HEADER`. All endpoints are checked, except the index endpoint and\n+ endpoints with safe methods (GET, HEAD, OPTIONS, TRACE).\n+\n+Custom Headers (`CSRF_HEADER`) provide an extra layer of protection, as\n+cross-origin requests cannot include custom headers (assuming CORS is not\n+misconfigured) because of the Same-Origin policy.\n+\n+The SameSite cookie attribute provides another layer of protection, but may\n+impede usability so it is configurable. This attribute controls whether a", - "comment_created_at": "2020-12-23T10:49:30+00:00", - "comment_author": "yanniszark", - "comment_body": "The SameSite attribute is currently not configurable. Should we change the docstring to reflect that? I see that you've added a note that we may need to make it configurable in the future.", - "pr_file_module": null - }, - { - "comment_id": "547931071", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 5472, - "pr_file": "components/crud-web-apps/common/backend/kubeflow/kubeflow/crud_backend/csrf.py", - "discussion_id": "547897084", - "commented_code": "@@ -0,0 +1,108 @@\n+\"\"\"\n+Cross Site Request Forgery Blueprint.\n+\n+This module provides a Flask blueprint that implements protection against\n+request forgeries from other sites. Currently, it is only meant to be used with\n+an AJAX frontend, not with server-side rendered forms.\n+\n+The module implements the following protecting measures against CSRF:\n+- Double Submit Cookie.\n+- Custom HTTP Headers.\n+- SameSite cookie attribute.\n+\n+To elaborate, the `Double Submit Cookie` procedure looks like the following:\n+1. Browser requests `index.html`, which contains the compiled Javascript.\n+2. Backend sets the `CSRF_COOKIE` by calling `set_cookie`. If the cookie\n+ already exists, `set_cookie` overrides it with a new one. The cookie\n+ contains a random value.\n+3. Frontend (`index.html`) is loaded and starts making requests to the backend.\n+ For every request, the frontend reads the `CSRF_COOKIE` value and adds a\n+ `CSRF_HEADER` with the same value.\n+4. Backend checks that the value of `CSRF_COOKIE` matches the value of\n+ `CSRF_HEADER`. All endpoints are checked, except the index endpoint and\n+ endpoints with safe methods (GET, HEAD, OPTIONS, TRACE).\n+\n+Custom Headers (`CSRF_HEADER`) provide an extra layer of protection, as\n+cross-origin requests cannot include custom headers (assuming CORS is not\n+misconfigured) because of the Same-Origin policy.\n+\n+The SameSite cookie attribute provides another layer of protection, but may\n+impede usability so it is configurable. This attribute controls whether a", - "comment_created_at": "2020-12-23T12:17:56+00:00", - "comment_author": "kimwnasptd", - "comment_body": "Lets make it configurable then, it should be a small change.\r\n\r\nI'll make it look for a `CSRF_SAMESITE` env var with a default value to `Strict`. If the value provided by the user is not `Strict`, `Lax` or `None` then again it will default to `Strict`\r\nhttps://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Set-Cookie#SameSite", - "pr_file_module": null - }, - { - "comment_id": "547936642", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 5472, - "pr_file": "components/crud-web-apps/common/backend/kubeflow/kubeflow/crud_backend/csrf.py", - "discussion_id": "547897084", - "commented_code": "@@ -0,0 +1,108 @@\n+\"\"\"\n+Cross Site Request Forgery Blueprint.\n+\n+This module provides a Flask blueprint that implements protection against\n+request forgeries from other sites. Currently, it is only meant to be used with\n+an AJAX frontend, not with server-side rendered forms.\n+\n+The module implements the following protecting measures against CSRF:\n+- Double Submit Cookie.\n+- Custom HTTP Headers.\n+- SameSite cookie attribute.\n+\n+To elaborate, the `Double Submit Cookie` procedure looks like the following:\n+1. Browser requests `index.html`, which contains the compiled Javascript.\n+2. Backend sets the `CSRF_COOKIE` by calling `set_cookie`. If the cookie\n+ already exists, `set_cookie` overrides it with a new one. The cookie\n+ contains a random value.\n+3. Frontend (`index.html`) is loaded and starts making requests to the backend.\n+ For every request, the frontend reads the `CSRF_COOKIE` value and adds a\n+ `CSRF_HEADER` with the same value.\n+4. Backend checks that the value of `CSRF_COOKIE` matches the value of\n+ `CSRF_HEADER`. All endpoints are checked, except the index endpoint and\n+ endpoints with safe methods (GET, HEAD, OPTIONS, TRACE).\n+\n+Custom Headers (`CSRF_HEADER`) provide an extra layer of protection, as\n+cross-origin requests cannot include custom headers (assuming CORS is not\n+misconfigured) because of the Same-Origin policy.\n+\n+The SameSite cookie attribute provides another layer of protection, but may\n+impede usability so it is configurable. This attribute controls whether a", - "comment_created_at": "2020-12-23T12:32:18+00:00", - "comment_author": "kimwnasptd", - "comment_body": "force pushed. The PR should only contain one commit responsible for the CSRF functionality.\r\nI also made the `SameSite` attribute of the cookie configurable via the `CSRF_SAMESITE` variable.\r\n\r\nI also think a good next step would be to document the ENV Vars that each web app uses in the respective READMEs. This will make clear to the users how they can configure each app.", - "pr_file_module": null - }, - { - "comment_id": "548001562", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 5472, - "pr_file": "components/crud-web-apps/common/backend/kubeflow/kubeflow/crud_backend/csrf.py", - "discussion_id": "547897084", - "commented_code": "@@ -0,0 +1,108 @@\n+\"\"\"\n+Cross Site Request Forgery Blueprint.\n+\n+This module provides a Flask blueprint that implements protection against\n+request forgeries from other sites. Currently, it is only meant to be used with\n+an AJAX frontend, not with server-side rendered forms.\n+\n+The module implements the following protecting measures against CSRF:\n+- Double Submit Cookie.\n+- Custom HTTP Headers.\n+- SameSite cookie attribute.\n+\n+To elaborate, the `Double Submit Cookie` procedure looks like the following:\n+1. Browser requests `index.html`, which contains the compiled Javascript.\n+2. Backend sets the `CSRF_COOKIE` by calling `set_cookie`. If the cookie\n+ already exists, `set_cookie` overrides it with a new one. The cookie\n+ contains a random value.\n+3. Frontend (`index.html`) is loaded and starts making requests to the backend.\n+ For every request, the frontend reads the `CSRF_COOKIE` value and adds a\n+ `CSRF_HEADER` with the same value.\n+4. Backend checks that the value of `CSRF_COOKIE` matches the value of\n+ `CSRF_HEADER`. All endpoints are checked, except the index endpoint and\n+ endpoints with safe methods (GET, HEAD, OPTIONS, TRACE).\n+\n+Custom Headers (`CSRF_HEADER`) provide an extra layer of protection, as\n+cross-origin requests cannot include custom headers (assuming CORS is not\n+misconfigured) because of the Same-Origin policy.\n+\n+The SameSite cookie attribute provides another layer of protection, but may\n+impede usability so it is configurable. This attribute controls whether a", - "comment_created_at": "2020-12-23T15:04:14+00:00", - "comment_author": "yanniszark", - "comment_body": "That's great! One question: maybe it's better to start the env var section in the README now and document this specific settings with this PR, which introduces it. You can add a note that the section is a work in-progress so that readers don't get confused. What do you think?\r\n", - "pr_file_module": null - }, - { - "comment_id": "548016953", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 5472, - "pr_file": "components/crud-web-apps/common/backend/kubeflow/kubeflow/crud_backend/csrf.py", - "discussion_id": "547897084", - "commented_code": "@@ -0,0 +1,108 @@\n+\"\"\"\n+Cross Site Request Forgery Blueprint.\n+\n+This module provides a Flask blueprint that implements protection against\n+request forgeries from other sites. Currently, it is only meant to be used with\n+an AJAX frontend, not with server-side rendered forms.\n+\n+The module implements the following protecting measures against CSRF:\n+- Double Submit Cookie.\n+- Custom HTTP Headers.\n+- SameSite cookie attribute.\n+\n+To elaborate, the `Double Submit Cookie` procedure looks like the following:\n+1. Browser requests `index.html`, which contains the compiled Javascript.\n+2. Backend sets the `CSRF_COOKIE` by calling `set_cookie`. If the cookie\n+ already exists, `set_cookie` overrides it with a new one. The cookie\n+ contains a random value.\n+3. Frontend (`index.html`) is loaded and starts making requests to the backend.\n+ For every request, the frontend reads the `CSRF_COOKIE` value and adds a\n+ `CSRF_HEADER` with the same value.\n+4. Backend checks that the value of `CSRF_COOKIE` matches the value of\n+ `CSRF_HEADER`. All endpoints are checked, except the index endpoint and\n+ endpoints with safe methods (GET, HEAD, OPTIONS, TRACE).\n+\n+Custom Headers (`CSRF_HEADER`) provide an extra layer of protection, as\n+cross-origin requests cannot include custom headers (assuming CORS is not\n+misconfigured) because of the Same-Origin policy.\n+\n+The SameSite cookie attribute provides another layer of protection, but may\n+impede usability so it is configurable. This attribute controls whether a", - "comment_created_at": "2020-12-23T15:36:34+00:00", - "comment_author": "kimwnasptd", - "comment_body": "ACK! I modified the README of the common code.\r\nI also created #5483 as an umbrella issue for the web apps", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/kubeflow-consistent-descriptive-naming.json b/_reviewers/kubeflow-consistent-descriptive-naming.json new file mode 100644 index 0000000..e7708d0 --- /dev/null +++ b/_reviewers/kubeflow-consistent-descriptive-naming.json @@ -0,0 +1,46 @@ +[ + { + "discussion_id": "1216666765", + "pr_number": 6876, + "pr_file": "components/pvc-viewer/config/default/kustomization.yaml", + "created_at": "2023-06-04T12:51:45+00:00", + "commented_code": "# Adds namespace to all resources.\n# Note: the namespace name needs to be changed here and cannot be changed in an overlay\n# This is because certmanager and webhook use kustomize names to configure resources.\n# Setting another namespace here as in the overlay will lead to e.g. wrong DNSNames being generated.\nnamespace: kubeflow\n\n# Value of this field is prepended to the\n# names of all resources, e.g. a deployment named\n# \"wordpress\" becomes \"alices-wordpress\".\n# Note that it should also match with the prefix (text before '-') of the namespace\n# field above.\nnamePrefix: pvc-viewer-", + "repo_full_name": "kubeflow/kubeflow", + "discussion_comments": [ + { + "comment_id": "1216666765", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 6876, + "pr_file": "components/pvc-viewer/config/default/kustomization.yaml", + "discussion_id": "1216666765", + "commented_code": "@@ -0,0 +1,135 @@\n+# Adds namespace to all resources.\n+# Note: the namespace name needs to be changed here and cannot be changed in an overlay\n+# This is because certmanager and webhook use kustomize names to configure resources.\n+# Setting another namespace here as in the overlay will lead to e.g. wrong DNSNames being generated.\n+namespace: kubeflow\n+\n+# Value of this field is prepended to the\n+# names of all resources, e.g. a deployment named\n+# \"wordpress\" becomes \"alices-wordpress\".\n+# Note that it should also match with the prefix (text before '-') of the namespace\n+# field above.\n+namePrefix: pvc-viewer-", + "comment_created_at": "2023-06-04T12:51:45+00:00", + "comment_author": "kimwnasptd", + "comment_body": "nit: can we make this `pvcviewer-` instead? In the labels as well we do `pvcviewers` so let's be uniform", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1039531607", + "pr_number": 6786, + "pr_file": ".github/workflows/centraldb_angular_frontend_check.yaml", + "created_at": "2022-12-05T12:30:57+00:00", + "commented_code": "name: CentralDashboard-angular Frontend check", + "repo_full_name": "kubeflow/kubeflow", + "discussion_comments": [ + { + "comment_id": "1039531607", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 6786, + "pr_file": ".github/workflows/centraldb_angular_frontend_check.yaml", + "discussion_id": "1039531607", + "commented_code": "@@ -0,0 +1,34 @@\n+name: CentralDashboard-angular Frontend check", + "comment_created_at": "2022-12-05T12:30:57+00:00", + "comment_author": "kimwnasptd", + "comment_body": "Let's rename this to `CentralDashboard-angular Frontend Tests` and also the file to `centraldb_angular_frontend_test.yaml`", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/kubeflow-consistent-descriptive-naming.md b/_reviewers/kubeflow-consistent-descriptive-naming.md index 134474c..053fa36 100644 --- a/_reviewers/kubeflow-consistent-descriptive-naming.md +++ b/_reviewers/kubeflow-consistent-descriptive-naming.md @@ -37,51 +37,3 @@ name: CentralDashboard-angular Frontend Tests name: CentralDashboard-angular Frontend check # filename: centraldb_angular_frontend_check.yaml ``` - - -[ - { - "discussion_id": "1216666765", - "pr_number": 6876, - "pr_file": "components/pvc-viewer/config/default/kustomization.yaml", - "created_at": "2023-06-04T12:51:45+00:00", - "commented_code": "# Adds namespace to all resources.\n# Note: the namespace name needs to be changed here and cannot be changed in an overlay\n# This is because certmanager and webhook use kustomize names to configure resources.\n# Setting another namespace here as in the overlay will lead to e.g. wrong DNSNames being generated.\nnamespace: kubeflow\n\n# Value of this field is prepended to the\n# names of all resources, e.g. a deployment named\n# \"wordpress\" becomes \"alices-wordpress\".\n# Note that it should also match with the prefix (text before '-') of the namespace\n# field above.\nnamePrefix: pvc-viewer-", - "repo_full_name": "kubeflow/kubeflow", - "discussion_comments": [ - { - "comment_id": "1216666765", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 6876, - "pr_file": "components/pvc-viewer/config/default/kustomization.yaml", - "discussion_id": "1216666765", - "commented_code": "@@ -0,0 +1,135 @@\n+# Adds namespace to all resources.\n+# Note: the namespace name needs to be changed here and cannot be changed in an overlay\n+# This is because certmanager and webhook use kustomize names to configure resources.\n+# Setting another namespace here as in the overlay will lead to e.g. wrong DNSNames being generated.\n+namespace: kubeflow\n+\n+# Value of this field is prepended to the\n+# names of all resources, e.g. a deployment named\n+# \"wordpress\" becomes \"alices-wordpress\".\n+# Note that it should also match with the prefix (text before '-') of the namespace\n+# field above.\n+namePrefix: pvc-viewer-", - "comment_created_at": "2023-06-04T12:51:45+00:00", - "comment_author": "kimwnasptd", - "comment_body": "nit: can we make this `pvcviewer-` instead? In the labels as well we do `pvcviewers` so let's be uniform", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1039531607", - "pr_number": 6786, - "pr_file": ".github/workflows/centraldb_angular_frontend_check.yaml", - "created_at": "2022-12-05T12:30:57+00:00", - "commented_code": "name: CentralDashboard-angular Frontend check", - "repo_full_name": "kubeflow/kubeflow", - "discussion_comments": [ - { - "comment_id": "1039531607", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 6786, - "pr_file": ".github/workflows/centraldb_angular_frontend_check.yaml", - "discussion_id": "1039531607", - "commented_code": "@@ -0,0 +1,34 @@\n+name: CentralDashboard-angular Frontend check", - "comment_created_at": "2022-12-05T12:30:57+00:00", - "comment_author": "kimwnasptd", - "comment_body": "Let's rename this to `CentralDashboard-angular Frontend Tests` and also the file to `centraldb_angular_frontend_test.yaml`", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/kubeflow-consistent-separator-conventions.json b/_reviewers/kubeflow-consistent-separator-conventions.json new file mode 100644 index 0000000..c61364a --- /dev/null +++ b/_reviewers/kubeflow-consistent-separator-conventions.json @@ -0,0 +1,82 @@ +[ + { + "discussion_id": "540654806", + "pr_number": 5430, + "pr_file": "README.md", + "created_at": "2020-12-11T03:01:05+00:00", + "commented_code": "\nKubeflow is a Cloud Native platform for machine learning based on Google’s internal machine learning pipelines.\nKubeflow the cloud-native platform for machine learning operations - pipelines, training and deployment.\n\n---\nPlease refer to the official docs at [kubeflow.org](http://kubeflow.org).\n\n## Working Groups\nThe Kubeflow community is organized into working groups (WGs) with associated repositories, that focus on specific pieces of the ML platform. \n\n* [AutoML](https://github.com/kubeflow/community/tree/master/wg-automl)\n* [Deployment](https://github.com/kubeflow/community/tree/master/wgdeployment)\n* [Manifests](https://github.com/kubeflow/community/tree/master/wg-manifests)", + "repo_full_name": "kubeflow/kubeflow", + "discussion_comments": [ + { + "comment_id": "540654806", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 5430, + "pr_file": "README.md", + "discussion_id": "540654806", + "commented_code": "@@ -1,9 +1,19 @@\n \n-Kubeflow is a Cloud Native platform for machine learning based on Google’s internal machine learning pipelines.\n+Kubeflow the cloud-native platform for machine learning operations - pipelines, training and deployment.\n \n ---\n Please refer to the official docs at [kubeflow.org](http://kubeflow.org).\n \n+## Working Groups\n+The Kubeflow community is organized into working groups (WGs) with associated repositories, that focus on specific pieces of the ML platform. \n+\n+* [AutoML](https://github.com/kubeflow/community/tree/master/wg-automl)\n+* [Deployment](https://github.com/kubeflow/community/tree/master/wgdeployment)\n+* [Manifests](https://github.com/kubeflow/community/tree/master/wg-manifests)", + "comment_created_at": "2020-12-11T03:01:05+00:00", + "comment_author": "PatrickXYS", + "comment_body": "We need to update those links to make sure they can persist for a long time without going back and forth to fix it.", + "pr_file_module": null + }, + { + "comment_id": "542447900", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 5430, + "pr_file": "README.md", + "discussion_id": "540654806", + "commented_code": "@@ -1,9 +1,19 @@\n \n-Kubeflow is a Cloud Native platform for machine learning based on Google’s internal machine learning pipelines.\n+Kubeflow the cloud-native platform for machine learning operations - pipelines, training and deployment.\n \n ---\n Please refer to the official docs at [kubeflow.org](http://kubeflow.org).\n \n+## Working Groups\n+The Kubeflow community is organized into working groups (WGs) with associated repositories, that focus on specific pieces of the ML platform. \n+\n+* [AutoML](https://github.com/kubeflow/community/tree/master/wg-automl)\n+* [Deployment](https://github.com/kubeflow/community/tree/master/wgdeployment)\n+* [Manifests](https://github.com/kubeflow/community/tree/master/wg-manifests)", + "comment_created_at": "2020-12-14T14:56:32+00:00", + "comment_author": "rui-vas", + "comment_body": "That is a good idea! What would be a good way to do it? ", + "pr_file_module": null + }, + { + "comment_id": "543062959", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 5430, + "pr_file": "README.md", + "discussion_id": "540654806", + "commented_code": "@@ -1,9 +1,19 @@\n \n-Kubeflow is a Cloud Native platform for machine learning based on Google’s internal machine learning pipelines.\n+Kubeflow the cloud-native platform for machine learning operations - pipelines, training and deployment.\n \n ---\n Please refer to the official docs at [kubeflow.org](http://kubeflow.org).\n \n+## Working Groups\n+The Kubeflow community is organized into working groups (WGs) with associated repositories, that focus on specific pieces of the ML platform. \n+\n+* [AutoML](https://github.com/kubeflow/community/tree/master/wg-automl)\n+* [Deployment](https://github.com/kubeflow/community/tree/master/wgdeployment)\n+* [Manifests](https://github.com/kubeflow/community/tree/master/wg-manifests)", + "comment_created_at": "2020-12-15T05:44:38+00:00", + "comment_author": "Bobgy", + "comment_body": "The current links look right. kubeflow/community/master is the long term location", + "pr_file_module": null + }, + { + "comment_id": "551420552", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 5430, + "pr_file": "README.md", + "discussion_id": "540654806", + "commented_code": "@@ -1,9 +1,19 @@\n \n-Kubeflow is a Cloud Native platform for machine learning based on Google’s internal machine learning pipelines.\n+Kubeflow the cloud-native platform for machine learning operations - pipelines, training and deployment.\n \n ---\n Please refer to the official docs at [kubeflow.org](http://kubeflow.org).\n \n+## Working Groups\n+The Kubeflow community is organized into working groups (WGs) with associated repositories, that focus on specific pieces of the ML platform. \n+\n+* [AutoML](https://github.com/kubeflow/community/tree/master/wg-automl)\n+* [Deployment](https://github.com/kubeflow/community/tree/master/wgdeployment)\n+* [Manifests](https://github.com/kubeflow/community/tree/master/wg-manifests)", + "comment_created_at": "2021-01-04T16:23:47+00:00", + "comment_author": "PatrickXYS", + "comment_body": "> * [Deployment] (https://github.com/kubeflow/community/tree/master/wgdeployment)\r\n\r\nThis is not correct, can we make it with a dash? `wg-deployment`", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "375242639", + "pr_number": 4717, + "pr_file": "docs_dev/releasing.md", + "created_at": "2020-02-05T13:05:26+00:00", + "commented_code": "```\n * Set the tag to be the correct version for the tag.\n\n## Version the website\n# Version the website\n\nThe main Kubeflow website at [www.kubeflow.org](www.kubeflow.org) points to the\n**master** branch of the `kubeflow/website` repo. Similarly, \n[master.kubeflow.org](https://master.kubeflow.org/) also points to the master\nbranch.\nversion of the website we want users to see.\n\n* Most of the time this will correspond to the **master** branch of the `kubeflow/website` repo\n\n* However, leading up to a minor release (0.Y) [www.kubeflow.org](www.kubeflow.org)", + "repo_full_name": "kubeflow/kubeflow", + "discussion_comments": [ + { + "comment_id": "375242639", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 4717, + "pr_file": "docs_dev/releasing.md", + "discussion_id": "375242639", + "commented_code": "@@ -179,27 +185,47 @@ Alternatively you can use the UI.\n ```\n * Set the tag to be the correct version for the tag.\n \n-## Version the website\n+# Version the website\n \n The main Kubeflow website at [www.kubeflow.org](www.kubeflow.org) points to the\n-**master** branch of the `kubeflow/website` repo. Similarly, \n-[master.kubeflow.org](https://master.kubeflow.org/) also points to the master\n-branch.\n+version of the website we want users to see.\n+\n+* Most of the time this will correspond to the **master** branch of the `kubeflow/website` repo\n+\n+* However, leading up to a minor release (0.Y) [www.kubeflow.org](www.kubeflow.org) ", + "comment_created_at": "2020-02-05T13:05:26+00:00", + "comment_author": "sarahmaddox", + "comment_body": "Should we change `0.Y` to `vX.Y`? (Because soon the `0` will not mean much, as we've moving to v1.) Also, `vX.Y` is consistent with the section \"Creating and publishing a website branch for vX.Y\".)", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/kubeflow-consistent-separator-conventions.md b/_reviewers/kubeflow-consistent-separator-conventions.md index 72e08f8..e85e509 100644 --- a/_reviewers/kubeflow-consistent-separator-conventions.md +++ b/_reviewers/kubeflow-consistent-separator-conventions.md @@ -24,87 +24,3 @@ Use appropriate separators in compound identifiers to improve readability and en - Avoid: `0.Y`, `1.0` (without v prefix) This practice makes identifiers more readable, maintains consistency with standard conventions, and helps ensure references remain valid over time as the codebase evolves. Consistent naming patterns reduce the need for future refactoring and make documentation more predictable and easier to navigate. - - -[ - { - "discussion_id": "540654806", - "pr_number": 5430, - "pr_file": "README.md", - "created_at": "2020-12-11T03:01:05+00:00", - "commented_code": "\nKubeflow is a Cloud Native platform for machine learning based on Google\u2019s internal machine learning pipelines.\nKubeflow the cloud-native platform for machine learning operations - pipelines, training and deployment.\n\n---\nPlease refer to the official docs at [kubeflow.org](http://kubeflow.org).\n\n## Working Groups\nThe Kubeflow community is organized into working groups (WGs) with associated repositories, that focus on specific pieces of the ML platform. \n\n* [AutoML](https://github.com/kubeflow/community/tree/master/wg-automl)\n* [Deployment](https://github.com/kubeflow/community/tree/master/wgdeployment)\n* [Manifests](https://github.com/kubeflow/community/tree/master/wg-manifests)", - "repo_full_name": "kubeflow/kubeflow", - "discussion_comments": [ - { - "comment_id": "540654806", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 5430, - "pr_file": "README.md", - "discussion_id": "540654806", - "commented_code": "@@ -1,9 +1,19 @@\n \n-Kubeflow is a Cloud Native platform for machine learning based on Google\u2019s internal machine learning pipelines.\n+Kubeflow the cloud-native platform for machine learning operations - pipelines, training and deployment.\n \n ---\n Please refer to the official docs at [kubeflow.org](http://kubeflow.org).\n \n+## Working Groups\n+The Kubeflow community is organized into working groups (WGs) with associated repositories, that focus on specific pieces of the ML platform. \n+\n+* [AutoML](https://github.com/kubeflow/community/tree/master/wg-automl)\n+* [Deployment](https://github.com/kubeflow/community/tree/master/wgdeployment)\n+* [Manifests](https://github.com/kubeflow/community/tree/master/wg-manifests)", - "comment_created_at": "2020-12-11T03:01:05+00:00", - "comment_author": "PatrickXYS", - "comment_body": "We need to update those links to make sure they can persist for a long time without going back and forth to fix it.", - "pr_file_module": null - }, - { - "comment_id": "542447900", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 5430, - "pr_file": "README.md", - "discussion_id": "540654806", - "commented_code": "@@ -1,9 +1,19 @@\n \n-Kubeflow is a Cloud Native platform for machine learning based on Google\u2019s internal machine learning pipelines.\n+Kubeflow the cloud-native platform for machine learning operations - pipelines, training and deployment.\n \n ---\n Please refer to the official docs at [kubeflow.org](http://kubeflow.org).\n \n+## Working Groups\n+The Kubeflow community is organized into working groups (WGs) with associated repositories, that focus on specific pieces of the ML platform. \n+\n+* [AutoML](https://github.com/kubeflow/community/tree/master/wg-automl)\n+* [Deployment](https://github.com/kubeflow/community/tree/master/wgdeployment)\n+* [Manifests](https://github.com/kubeflow/community/tree/master/wg-manifests)", - "comment_created_at": "2020-12-14T14:56:32+00:00", - "comment_author": "rui-vas", - "comment_body": "That is a good idea! What would be a good way to do it? ", - "pr_file_module": null - }, - { - "comment_id": "543062959", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 5430, - "pr_file": "README.md", - "discussion_id": "540654806", - "commented_code": "@@ -1,9 +1,19 @@\n \n-Kubeflow is a Cloud Native platform for machine learning based on Google\u2019s internal machine learning pipelines.\n+Kubeflow the cloud-native platform for machine learning operations - pipelines, training and deployment.\n \n ---\n Please refer to the official docs at [kubeflow.org](http://kubeflow.org).\n \n+## Working Groups\n+The Kubeflow community is organized into working groups (WGs) with associated repositories, that focus on specific pieces of the ML platform. \n+\n+* [AutoML](https://github.com/kubeflow/community/tree/master/wg-automl)\n+* [Deployment](https://github.com/kubeflow/community/tree/master/wgdeployment)\n+* [Manifests](https://github.com/kubeflow/community/tree/master/wg-manifests)", - "comment_created_at": "2020-12-15T05:44:38+00:00", - "comment_author": "Bobgy", - "comment_body": "The current links look right. kubeflow/community/master is the long term location", - "pr_file_module": null - }, - { - "comment_id": "551420552", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 5430, - "pr_file": "README.md", - "discussion_id": "540654806", - "commented_code": "@@ -1,9 +1,19 @@\n \n-Kubeflow is a Cloud Native platform for machine learning based on Google\u2019s internal machine learning pipelines.\n+Kubeflow the cloud-native platform for machine learning operations - pipelines, training and deployment.\n \n ---\n Please refer to the official docs at [kubeflow.org](http://kubeflow.org).\n \n+## Working Groups\n+The Kubeflow community is organized into working groups (WGs) with associated repositories, that focus on specific pieces of the ML platform. \n+\n+* [AutoML](https://github.com/kubeflow/community/tree/master/wg-automl)\n+* [Deployment](https://github.com/kubeflow/community/tree/master/wgdeployment)\n+* [Manifests](https://github.com/kubeflow/community/tree/master/wg-manifests)", - "comment_created_at": "2021-01-04T16:23:47+00:00", - "comment_author": "PatrickXYS", - "comment_body": "> * [Deployment] (https://github.com/kubeflow/community/tree/master/wgdeployment)\r\n\r\nThis is not correct, can we make it with a dash? `wg-deployment`", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "375242639", - "pr_number": 4717, - "pr_file": "docs_dev/releasing.md", - "created_at": "2020-02-05T13:05:26+00:00", - "commented_code": "```\n * Set the tag to be the correct version for the tag.\n\n## Version the website\n# Version the website\n\nThe main Kubeflow website at [www.kubeflow.org](www.kubeflow.org) points to the\n**master** branch of the `kubeflow/website` repo. Similarly, \n[master.kubeflow.org](https://master.kubeflow.org/) also points to the master\nbranch.\nversion of the website we want users to see.\n\n* Most of the time this will correspond to the **master** branch of the `kubeflow/website` repo\n\n* However, leading up to a minor release (0.Y) [www.kubeflow.org](www.kubeflow.org)", - "repo_full_name": "kubeflow/kubeflow", - "discussion_comments": [ - { - "comment_id": "375242639", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 4717, - "pr_file": "docs_dev/releasing.md", - "discussion_id": "375242639", - "commented_code": "@@ -179,27 +185,47 @@ Alternatively you can use the UI.\n ```\n * Set the tag to be the correct version for the tag.\n \n-## Version the website\n+# Version the website\n \n The main Kubeflow website at [www.kubeflow.org](www.kubeflow.org) points to the\n-**master** branch of the `kubeflow/website` repo. Similarly, \n-[master.kubeflow.org](https://master.kubeflow.org/) also points to the master\n-branch.\n+version of the website we want users to see.\n+\n+* Most of the time this will correspond to the **master** branch of the `kubeflow/website` repo\n+\n+* However, leading up to a minor release (0.Y) [www.kubeflow.org](www.kubeflow.org) ", - "comment_created_at": "2020-02-05T13:05:26+00:00", - "comment_author": "sarahmaddox", - "comment_body": "Should we change `0.Y` to `vX.Y`? (Because soon the `0` will not mean much, as we've moving to v1.) Also, `vX.Y` is consistent with the section \"Creating and publishing a website branch for vX.Y\".)", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/kubeflow-contextualize-and-classify-errors.json b/_reviewers/kubeflow-contextualize-and-classify-errors.json new file mode 100644 index 0000000..ba08ed1 --- /dev/null +++ b/_reviewers/kubeflow-contextualize-and-classify-errors.json @@ -0,0 +1,102 @@ +[ + { + "discussion_id": "369260124", + "pr_number": 4674, + "pr_file": "components/access-management/kfam/api_default.go", + "created_at": "2020-01-21T21:46:33+00:00", + "commented_code": "if c.isOwnerOrAdmin(useremail, binding.ReferredNamespace) {\n\t\terr := c.bindingClient.Create(&binding, c.userIdHeader, c.userIdPrefix)\n\t\tif err == nil {\n\t\t\tIncRequestCounter(\"\", useremail, action, r.URL.Path)\n\t\t\tw.WriteHeader(http.StatusOK)\n\t\t} else {\n\t\t\tIncRequestErrorCounter(err.Error(), useremail, action, r.URL.Path,\n\t\t\t\tSEVERITY_MAJOR)\n\t\t\tw.WriteHeader(http.StatusForbidden)\n\t\t\tw.Write([]byte(err.Error()))\n\t\t\tif _, err := w.Write([]byte(err.Error())); err != nil {\n\t\t\t\tlog.Error(err)\n\t\t\t}\n\t\t}\n\t} else {\n\t\tIncRequestErrorCounter(\"forbidden\", useremail, action, r.URL.Path,\n\t\t\tSEVERITY_MINOR)\n\t\tw.WriteHeader(http.StatusForbidden)\n\t}\n}\n\nfunc (c *KfamV1Alpha1Client) CreateProfile(w http.ResponseWriter, r *http.Request) {\n\tw.Header().Set(\"Content-Type\", \"application/json; charset=UTF-8\")\n\tconst action = \"create\"\n\tvar profile profilev1beta1.Profile\n\tif err := json.NewDecoder(r.Body).Decode(&profile); err != nil {\n\t\tjson.NewEncoder(w).Encode(err)\n\t\tIncRequestErrorCounter(\"decode error\", \"\", action, r.URL.Path,\n\t\t\tSEVERITY_MAJOR)\n\t\tif err := json.NewEncoder(w).Encode(err); err != nil {\n\t\t\tlog.Error(err)\n\t\t}\n\t\tw.WriteHeader(http.StatusForbidden)\n\t\treturn\n\t}\n\t_, err := c.profileClient.Create(&profile)\n\tif err == nil {", + "repo_full_name": "kubeflow/kubeflow", + "discussion_comments": [ + { + "comment_id": "369260124", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 4674, + "pr_file": "components/access-management/kfam/api_default.go", + "discussion_id": "369260124", + "commented_code": "@@ -102,38 +108,60 @@ func (c *KfamV1Alpha1Client) CreateBinding(w http.ResponseWriter, r *http.Reques\n \tif c.isOwnerOrAdmin(useremail, binding.ReferredNamespace) {\n \t\terr := c.bindingClient.Create(&binding, c.userIdHeader, c.userIdPrefix)\n \t\tif err == nil {\n+\t\t\tIncRequestCounter(\"\", useremail, action, r.URL.Path)\n \t\t\tw.WriteHeader(http.StatusOK)\n \t\t} else {\n+\t\t\tIncRequestErrorCounter(err.Error(), useremail, action, r.URL.Path,\n+\t\t\t\tSEVERITY_MAJOR)\n \t\t\tw.WriteHeader(http.StatusForbidden)\n-\t\t\tw.Write([]byte(err.Error()))\n+\t\t\tif _, err := w.Write([]byte(err.Error())); err != nil {\n+\t\t\t\tlog.Error(err)\n+\t\t\t}\n \t\t}\n \t} else {\n+\t\tIncRequestErrorCounter(\"forbidden\", useremail, action, r.URL.Path,\n+\t\t\tSEVERITY_MINOR)\n \t\tw.WriteHeader(http.StatusForbidden)\n \t}\n }\n \n func (c *KfamV1Alpha1Client) CreateProfile(w http.ResponseWriter, r *http.Request) {\n \tw.Header().Set(\"Content-Type\", \"application/json; charset=UTF-8\")\n+\tconst action = \"create\"\n \tvar profile profilev1beta1.Profile\n \tif err := json.NewDecoder(r.Body).Decode(&profile); err != nil {\n-\t\tjson.NewEncoder(w).Encode(err)\n+\t\tIncRequestErrorCounter(\"decode error\", \"\", action, r.URL.Path,\n+\t\t\tSEVERITY_MAJOR)\n+\t\tif err := json.NewEncoder(w).Encode(err); err != nil {\n+\t\t\tlog.Error(err)\n+\t\t}\n \t\tw.WriteHeader(http.StatusForbidden)\n \t\treturn\n \t}\n \t_, err := c.profileClient.Create(&profile)\n \tif err == nil {", + "comment_created_at": "2020-01-21T21:46:33+00:00", + "comment_author": "zhenghuiwang", + "comment_body": "This if-else now becomes more complex code. I think it is time to consider following the Go error handling pattern to increase readability:\r\n\r\n```\r\nif err != nil {\r\n // log error and return response.\r\n return\r\n}\r\n// handle the err = nil case\r\n```", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "369265640", + "pr_number": 4674, + "pr_file": "components/access-management/kfam/api_default.go", + "created_at": "2020-01-21T21:58:53+00:00", + "commented_code": "if c.isOwnerOrAdmin(useremail, binding.ReferredNamespace) {\n\t\terr := c.bindingClient.Create(&binding, c.userIdHeader, c.userIdPrefix)\n\t\tif err == nil {\n\t\t\tIncRequestCounter(\"\", useremail, action, r.URL.Path)\n\t\t\tw.WriteHeader(http.StatusOK)\n\t\t} else {\n\t\t\tIncRequestErrorCounter(err.Error(), useremail, action, r.URL.Path,\n\t\t\t\tSEVERITY_MAJOR)\n\t\t\tw.WriteHeader(http.StatusForbidden)\n\t\t\tw.Write([]byte(err.Error()))\n\t\t\tif _, err := w.Write([]byte(err.Error())); err != nil {\n\t\t\t\tlog.Error(err)\n\t\t\t}\n\t\t}\n\t} else {\n\t\tIncRequestErrorCounter(\"forbidden\", useremail, action, r.URL.Path,\n\t\t\tSEVERITY_MINOR)\n\t\tw.WriteHeader(http.StatusForbidden)\n\t}\n}\n\nfunc (c *KfamV1Alpha1Client) CreateProfile(w http.ResponseWriter, r *http.Request) {\n\tw.Header().Set(\"Content-Type\", \"application/json; charset=UTF-8\")\n\tconst action = \"create\"\n\tvar profile profilev1beta1.Profile\n\tif err := json.NewDecoder(r.Body).Decode(&profile); err != nil {\n\t\tjson.NewEncoder(w).Encode(err)\n\t\tIncRequestErrorCounter(\"decode error\", \"\", action, r.URL.Path,\n\t\t\tSEVERITY_MAJOR)\n\t\tif err := json.NewEncoder(w).Encode(err); err != nil {\n\t\t\tlog.Error(err)\n\t\t}\n\t\tw.WriteHeader(http.StatusForbidden)\n\t\treturn\n\t}\n\t_, err := c.profileClient.Create(&profile)\n\tif err == nil {\n\t\tIncRequestCounter(\"\", \"\", action, r.URL.Path)\n\t\tw.WriteHeader(http.StatusOK)\n\t} else {\n\t\tIncRequestErrorCounter(err.Error(), \"\", action, r.URL.Path,\n\t\t\tSEVERITY_MAJOR)\n\t\tw.WriteHeader(http.StatusForbidden)\n\t\tw.Write([]byte(err.Error()))\n\t\tif _, err := w.Write([]byte(err.Error())); err != nil {\n\t\t\tlog.Error(err)", + "repo_full_name": "kubeflow/kubeflow", + "discussion_comments": [ + { + "comment_id": "369265640", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 4674, + "pr_file": "components/access-management/kfam/api_default.go", + "discussion_id": "369265640", + "commented_code": "@@ -102,38 +108,60 @@ func (c *KfamV1Alpha1Client) CreateBinding(w http.ResponseWriter, r *http.Reques\n \tif c.isOwnerOrAdmin(useremail, binding.ReferredNamespace) {\n \t\terr := c.bindingClient.Create(&binding, c.userIdHeader, c.userIdPrefix)\n \t\tif err == nil {\n+\t\t\tIncRequestCounter(\"\", useremail, action, r.URL.Path)\n \t\t\tw.WriteHeader(http.StatusOK)\n \t\t} else {\n+\t\t\tIncRequestErrorCounter(err.Error(), useremail, action, r.URL.Path,\n+\t\t\t\tSEVERITY_MAJOR)\n \t\t\tw.WriteHeader(http.StatusForbidden)\n-\t\t\tw.Write([]byte(err.Error()))\n+\t\t\tif _, err := w.Write([]byte(err.Error())); err != nil {\n+\t\t\t\tlog.Error(err)\n+\t\t\t}\n \t\t}\n \t} else {\n+\t\tIncRequestErrorCounter(\"forbidden\", useremail, action, r.URL.Path,\n+\t\t\tSEVERITY_MINOR)\n \t\tw.WriteHeader(http.StatusForbidden)\n \t}\n }\n \n func (c *KfamV1Alpha1Client) CreateProfile(w http.ResponseWriter, r *http.Request) {\n \tw.Header().Set(\"Content-Type\", \"application/json; charset=UTF-8\")\n+\tconst action = \"create\"\n \tvar profile profilev1beta1.Profile\n \tif err := json.NewDecoder(r.Body).Decode(&profile); err != nil {\n-\t\tjson.NewEncoder(w).Encode(err)\n+\t\tIncRequestErrorCounter(\"decode error\", \"\", action, r.URL.Path,\n+\t\t\tSEVERITY_MAJOR)\n+\t\tif err := json.NewEncoder(w).Encode(err); err != nil {\n+\t\t\tlog.Error(err)\n+\t\t}\n \t\tw.WriteHeader(http.StatusForbidden)\n \t\treturn\n \t}\n \t_, err := c.profileClient.Create(&profile)\n \tif err == nil {\n+\t\tIncRequestCounter(\"\", \"\", action, r.URL.Path)\n \t\tw.WriteHeader(http.StatusOK)\n \t} else {\n+\t\tIncRequestErrorCounter(err.Error(), \"\", action, r.URL.Path,\n+\t\t\tSEVERITY_MAJOR)\n \t\tw.WriteHeader(http.StatusForbidden)\n-\t\tw.Write([]byte(err.Error()))\n+\t\tif _, err := w.Write([]byte(err.Error())); err != nil {\n+\t\t\tlog.Error(err)", + "comment_created_at": "2020-01-21T21:58:53+00:00", + "comment_author": "zhenghuiwang", + "comment_body": "Couple of suggestions for error handling in this case and other similar cases:\r\n\r\n1. There are two variables both named `err` here with different scopes. I would suggest using two different names to make it clear which error is used, like `err` & `e`.\r\n\r\n2. The `log.Error(err)` refers to the error of failing to write the response bytes. Besides this, the error message of status forbidden(non-OK response) also worth logging on server side.\r\n\r\n3. Consider wrapping the context of errors for future debugging, such as\r\n```\r\nlog.Error(fmt.Errorf(\"Request for profile %s is forbidden: %v\", action, err))\r\nlog.Error(fmt.Errorf(\"Internal server error: failed to write response bytes: %v\", e))\r\n```\r\nThere are libraries for wrapping error in more structured way.\r\n ", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "320266099", + "pr_number": 4040, + "pr_file": "bootstrap/cmd/kfctl/cmd/apply.go", + "created_at": "2019-09-03T13:18:13+00:00", + "commented_code": "if resourceErr != nil {\n\t\t\treturn fmt.Errorf(\"invalid resource: %v\", resourceErr)", + "repo_full_name": "kubeflow/kubeflow", + "discussion_comments": [ + { + "comment_id": "320266099", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 4040, + "pr_file": "bootstrap/cmd/kfctl/cmd/apply.go", + "discussion_id": "320266099", + "commented_code": "@@ -39,7 +45,19 @@ var applyCmd = &cobra.Command{\n \t\tif resourceErr != nil {\n \t\t\treturn fmt.Errorf(\"invalid resource: %v\", resourceErr)", + "comment_created_at": "2019-09-03T13:18:13+00:00", + "comment_author": "yanniszark", + "comment_body": "I believe we want to return a KFError everywhere.\r\nCheck out how the coordinator does it: https://github.com/kubeflow/kubeflow/blob/7d976050933d642069591b72138eb3c152224d08/bootstrap/pkg/kfapp/coordinator/coordinator.go#L79\r\n\r\nIdeally, we could make a helper function `NewInternalKFError(e error, msg string)` in `pkg/apis/kferrors.go` to reduce boilerplate.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "322854742", + "pr_number": 4040, + "pr_file": "bootstrap/cmd/kfctl/cmd/apply/apply.go", + "created_at": "2019-09-10T16:53:36+00:00", + "commented_code": "package apply\n\nimport (\n\t\"errors\"\n\t\"fmt\"\n\t\"io\"\n\t\"os\"\n\n\tkfapis \"github.com/kubeflow/kubeflow/bootstrap/v3/pkg/apis\"\n\tkftypes \"github.com/kubeflow/kubeflow/bootstrap/v3/pkg/apis/apps\"\n\tkfdefsv3 \"github.com/kubeflow/kubeflow/bootstrap/v3/pkg/apis/apps/kfdef/v1alpha1\"\n\t\"github.com/kubeflow/kubeflow/bootstrap/v3/pkg/kfapp/coordinator\"\n\tlog \"github.com/sirupsen/logrus\"\n)\n\n// Set default app name to kf-app\nconst appName = \"kf-app\"\n\nvar kfErr = &kfapis.KfError{\n\tCode: int(kfapis.INTERNAL_ERROR),", + "repo_full_name": "kubeflow/kubeflow", + "discussion_comments": [ + { + "comment_id": "322854742", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 4040, + "pr_file": "bootstrap/cmd/kfctl/cmd/apply/apply.go", + "discussion_id": "322854742", + "commented_code": "@@ -0,0 +1,112 @@\n+package apply\n+\n+import (\n+\t\"errors\"\n+\t\"fmt\"\n+\t\"io\"\n+\t\"os\"\n+\n+\tkfapis \"github.com/kubeflow/kubeflow/bootstrap/v3/pkg/apis\"\n+\tkftypes \"github.com/kubeflow/kubeflow/bootstrap/v3/pkg/apis/apps\"\n+\tkfdefsv3 \"github.com/kubeflow/kubeflow/bootstrap/v3/pkg/apis/apps/kfdef/v1alpha1\"\n+\t\"github.com/kubeflow/kubeflow/bootstrap/v3/pkg/kfapp/coordinator\"\n+\tlog \"github.com/sirupsen/logrus\"\n+)\n+\n+// Set default app name to kf-app\n+const appName = \"kf-app\"\n+\n+var kfErr = &kfapis.KfError{\n+\tCode: int(kfapis.INTERNAL_ERROR),", + "comment_created_at": "2019-09-10T16:53:36+00:00", + "comment_author": "gabrielwen", + "comment_body": "are all errors internal errors? internal errors mean there is something wrong with `kfctl` and we need to fix it. shouldn't part of errors be invalid arguments?", + "pr_file_module": null + }, + { + "comment_id": "323031180", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 4040, + "pr_file": "bootstrap/cmd/kfctl/cmd/apply/apply.go", + "discussion_id": "322854742", + "commented_code": "@@ -0,0 +1,112 @@\n+package apply\n+\n+import (\n+\t\"errors\"\n+\t\"fmt\"\n+\t\"io\"\n+\t\"os\"\n+\n+\tkfapis \"github.com/kubeflow/kubeflow/bootstrap/v3/pkg/apis\"\n+\tkftypes \"github.com/kubeflow/kubeflow/bootstrap/v3/pkg/apis/apps\"\n+\tkfdefsv3 \"github.com/kubeflow/kubeflow/bootstrap/v3/pkg/apis/apps/kfdef/v1alpha1\"\n+\t\"github.com/kubeflow/kubeflow/bootstrap/v3/pkg/kfapp/coordinator\"\n+\tlog \"github.com/sirupsen/logrus\"\n+)\n+\n+// Set default app name to kf-app\n+const appName = \"kf-app\"\n+\n+var kfErr = &kfapis.KfError{\n+\tCode: int(kfapis.INTERNAL_ERROR),", + "comment_created_at": "2019-09-11T01:57:05+00:00", + "comment_author": "swiftdiaries", + "comment_body": "Added a bunch of Invalid argument errors for when appropriate.", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/kubeflow-contextualize-and-classify-errors.md b/_reviewers/kubeflow-contextualize-and-classify-errors.md index 2661b66..bd9a4b6 100644 --- a/_reviewers/kubeflow-contextualize-and-classify-errors.md +++ b/_reviewers/kubeflow-contextualize-and-classify-errors.md @@ -70,107 +70,3 @@ func NewInvalidArgumentError(err error, msg string) *kfapis.KfError { } } ``` - - -[ - { - "discussion_id": "369260124", - "pr_number": 4674, - "pr_file": "components/access-management/kfam/api_default.go", - "created_at": "2020-01-21T21:46:33+00:00", - "commented_code": "if c.isOwnerOrAdmin(useremail, binding.ReferredNamespace) {\n\t\terr := c.bindingClient.Create(&binding, c.userIdHeader, c.userIdPrefix)\n\t\tif err == nil {\n\t\t\tIncRequestCounter(\"\", useremail, action, r.URL.Path)\n\t\t\tw.WriteHeader(http.StatusOK)\n\t\t} else {\n\t\t\tIncRequestErrorCounter(err.Error(), useremail, action, r.URL.Path,\n\t\t\t\tSEVERITY_MAJOR)\n\t\t\tw.WriteHeader(http.StatusForbidden)\n\t\t\tw.Write([]byte(err.Error()))\n\t\t\tif _, err := w.Write([]byte(err.Error())); err != nil {\n\t\t\t\tlog.Error(err)\n\t\t\t}\n\t\t}\n\t} else {\n\t\tIncRequestErrorCounter(\"forbidden\", useremail, action, r.URL.Path,\n\t\t\tSEVERITY_MINOR)\n\t\tw.WriteHeader(http.StatusForbidden)\n\t}\n}\n\nfunc (c *KfamV1Alpha1Client) CreateProfile(w http.ResponseWriter, r *http.Request) {\n\tw.Header().Set(\"Content-Type\", \"application/json; charset=UTF-8\")\n\tconst action = \"create\"\n\tvar profile profilev1beta1.Profile\n\tif err := json.NewDecoder(r.Body).Decode(&profile); err != nil {\n\t\tjson.NewEncoder(w).Encode(err)\n\t\tIncRequestErrorCounter(\"decode error\", \"\", action, r.URL.Path,\n\t\t\tSEVERITY_MAJOR)\n\t\tif err := json.NewEncoder(w).Encode(err); err != nil {\n\t\t\tlog.Error(err)\n\t\t}\n\t\tw.WriteHeader(http.StatusForbidden)\n\t\treturn\n\t}\n\t_, err := c.profileClient.Create(&profile)\n\tif err == nil {", - "repo_full_name": "kubeflow/kubeflow", - "discussion_comments": [ - { - "comment_id": "369260124", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 4674, - "pr_file": "components/access-management/kfam/api_default.go", - "discussion_id": "369260124", - "commented_code": "@@ -102,38 +108,60 @@ func (c *KfamV1Alpha1Client) CreateBinding(w http.ResponseWriter, r *http.Reques\n \tif c.isOwnerOrAdmin(useremail, binding.ReferredNamespace) {\n \t\terr := c.bindingClient.Create(&binding, c.userIdHeader, c.userIdPrefix)\n \t\tif err == nil {\n+\t\t\tIncRequestCounter(\"\", useremail, action, r.URL.Path)\n \t\t\tw.WriteHeader(http.StatusOK)\n \t\t} else {\n+\t\t\tIncRequestErrorCounter(err.Error(), useremail, action, r.URL.Path,\n+\t\t\t\tSEVERITY_MAJOR)\n \t\t\tw.WriteHeader(http.StatusForbidden)\n-\t\t\tw.Write([]byte(err.Error()))\n+\t\t\tif _, err := w.Write([]byte(err.Error())); err != nil {\n+\t\t\t\tlog.Error(err)\n+\t\t\t}\n \t\t}\n \t} else {\n+\t\tIncRequestErrorCounter(\"forbidden\", useremail, action, r.URL.Path,\n+\t\t\tSEVERITY_MINOR)\n \t\tw.WriteHeader(http.StatusForbidden)\n \t}\n }\n \n func (c *KfamV1Alpha1Client) CreateProfile(w http.ResponseWriter, r *http.Request) {\n \tw.Header().Set(\"Content-Type\", \"application/json; charset=UTF-8\")\n+\tconst action = \"create\"\n \tvar profile profilev1beta1.Profile\n \tif err := json.NewDecoder(r.Body).Decode(&profile); err != nil {\n-\t\tjson.NewEncoder(w).Encode(err)\n+\t\tIncRequestErrorCounter(\"decode error\", \"\", action, r.URL.Path,\n+\t\t\tSEVERITY_MAJOR)\n+\t\tif err := json.NewEncoder(w).Encode(err); err != nil {\n+\t\t\tlog.Error(err)\n+\t\t}\n \t\tw.WriteHeader(http.StatusForbidden)\n \t\treturn\n \t}\n \t_, err := c.profileClient.Create(&profile)\n \tif err == nil {", - "comment_created_at": "2020-01-21T21:46:33+00:00", - "comment_author": "zhenghuiwang", - "comment_body": "This if-else now becomes more complex code. I think it is time to consider following the Go error handling pattern to increase readability:\r\n\r\n```\r\nif err != nil {\r\n // log error and return response.\r\n return\r\n}\r\n// handle the err = nil case\r\n```", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "369265640", - "pr_number": 4674, - "pr_file": "components/access-management/kfam/api_default.go", - "created_at": "2020-01-21T21:58:53+00:00", - "commented_code": "if c.isOwnerOrAdmin(useremail, binding.ReferredNamespace) {\n\t\terr := c.bindingClient.Create(&binding, c.userIdHeader, c.userIdPrefix)\n\t\tif err == nil {\n\t\t\tIncRequestCounter(\"\", useremail, action, r.URL.Path)\n\t\t\tw.WriteHeader(http.StatusOK)\n\t\t} else {\n\t\t\tIncRequestErrorCounter(err.Error(), useremail, action, r.URL.Path,\n\t\t\t\tSEVERITY_MAJOR)\n\t\t\tw.WriteHeader(http.StatusForbidden)\n\t\t\tw.Write([]byte(err.Error()))\n\t\t\tif _, err := w.Write([]byte(err.Error())); err != nil {\n\t\t\t\tlog.Error(err)\n\t\t\t}\n\t\t}\n\t} else {\n\t\tIncRequestErrorCounter(\"forbidden\", useremail, action, r.URL.Path,\n\t\t\tSEVERITY_MINOR)\n\t\tw.WriteHeader(http.StatusForbidden)\n\t}\n}\n\nfunc (c *KfamV1Alpha1Client) CreateProfile(w http.ResponseWriter, r *http.Request) {\n\tw.Header().Set(\"Content-Type\", \"application/json; charset=UTF-8\")\n\tconst action = \"create\"\n\tvar profile profilev1beta1.Profile\n\tif err := json.NewDecoder(r.Body).Decode(&profile); err != nil {\n\t\tjson.NewEncoder(w).Encode(err)\n\t\tIncRequestErrorCounter(\"decode error\", \"\", action, r.URL.Path,\n\t\t\tSEVERITY_MAJOR)\n\t\tif err := json.NewEncoder(w).Encode(err); err != nil {\n\t\t\tlog.Error(err)\n\t\t}\n\t\tw.WriteHeader(http.StatusForbidden)\n\t\treturn\n\t}\n\t_, err := c.profileClient.Create(&profile)\n\tif err == nil {\n\t\tIncRequestCounter(\"\", \"\", action, r.URL.Path)\n\t\tw.WriteHeader(http.StatusOK)\n\t} else {\n\t\tIncRequestErrorCounter(err.Error(), \"\", action, r.URL.Path,\n\t\t\tSEVERITY_MAJOR)\n\t\tw.WriteHeader(http.StatusForbidden)\n\t\tw.Write([]byte(err.Error()))\n\t\tif _, err := w.Write([]byte(err.Error())); err != nil {\n\t\t\tlog.Error(err)", - "repo_full_name": "kubeflow/kubeflow", - "discussion_comments": [ - { - "comment_id": "369265640", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 4674, - "pr_file": "components/access-management/kfam/api_default.go", - "discussion_id": "369265640", - "commented_code": "@@ -102,38 +108,60 @@ func (c *KfamV1Alpha1Client) CreateBinding(w http.ResponseWriter, r *http.Reques\n \tif c.isOwnerOrAdmin(useremail, binding.ReferredNamespace) {\n \t\terr := c.bindingClient.Create(&binding, c.userIdHeader, c.userIdPrefix)\n \t\tif err == nil {\n+\t\t\tIncRequestCounter(\"\", useremail, action, r.URL.Path)\n \t\t\tw.WriteHeader(http.StatusOK)\n \t\t} else {\n+\t\t\tIncRequestErrorCounter(err.Error(), useremail, action, r.URL.Path,\n+\t\t\t\tSEVERITY_MAJOR)\n \t\t\tw.WriteHeader(http.StatusForbidden)\n-\t\t\tw.Write([]byte(err.Error()))\n+\t\t\tif _, err := w.Write([]byte(err.Error())); err != nil {\n+\t\t\t\tlog.Error(err)\n+\t\t\t}\n \t\t}\n \t} else {\n+\t\tIncRequestErrorCounter(\"forbidden\", useremail, action, r.URL.Path,\n+\t\t\tSEVERITY_MINOR)\n \t\tw.WriteHeader(http.StatusForbidden)\n \t}\n }\n \n func (c *KfamV1Alpha1Client) CreateProfile(w http.ResponseWriter, r *http.Request) {\n \tw.Header().Set(\"Content-Type\", \"application/json; charset=UTF-8\")\n+\tconst action = \"create\"\n \tvar profile profilev1beta1.Profile\n \tif err := json.NewDecoder(r.Body).Decode(&profile); err != nil {\n-\t\tjson.NewEncoder(w).Encode(err)\n+\t\tIncRequestErrorCounter(\"decode error\", \"\", action, r.URL.Path,\n+\t\t\tSEVERITY_MAJOR)\n+\t\tif err := json.NewEncoder(w).Encode(err); err != nil {\n+\t\t\tlog.Error(err)\n+\t\t}\n \t\tw.WriteHeader(http.StatusForbidden)\n \t\treturn\n \t}\n \t_, err := c.profileClient.Create(&profile)\n \tif err == nil {\n+\t\tIncRequestCounter(\"\", \"\", action, r.URL.Path)\n \t\tw.WriteHeader(http.StatusOK)\n \t} else {\n+\t\tIncRequestErrorCounter(err.Error(), \"\", action, r.URL.Path,\n+\t\t\tSEVERITY_MAJOR)\n \t\tw.WriteHeader(http.StatusForbidden)\n-\t\tw.Write([]byte(err.Error()))\n+\t\tif _, err := w.Write([]byte(err.Error())); err != nil {\n+\t\t\tlog.Error(err)", - "comment_created_at": "2020-01-21T21:58:53+00:00", - "comment_author": "zhenghuiwang", - "comment_body": "Couple of suggestions for error handling in this case and other similar cases:\r\n\r\n1. There are two variables both named `err` here with different scopes. I would suggest using two different names to make it clear which error is used, like `err` & `e`.\r\n\r\n2. The `log.Error(err)` refers to the error of failing to write the response bytes. Besides this, the error message of status forbidden(non-OK response) also worth logging on server side.\r\n\r\n3. Consider wrapping the context of errors for future debugging, such as\r\n```\r\nlog.Error(fmt.Errorf(\"Request for profile %s is forbidden: %v\", action, err))\r\nlog.Error(fmt.Errorf(\"Internal server error: failed to write response bytes: %v\", e))\r\n```\r\nThere are libraries for wrapping error in more structured way.\r\n ", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "320266099", - "pr_number": 4040, - "pr_file": "bootstrap/cmd/kfctl/cmd/apply.go", - "created_at": "2019-09-03T13:18:13+00:00", - "commented_code": "if resourceErr != nil {\n\t\t\treturn fmt.Errorf(\"invalid resource: %v\", resourceErr)", - "repo_full_name": "kubeflow/kubeflow", - "discussion_comments": [ - { - "comment_id": "320266099", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 4040, - "pr_file": "bootstrap/cmd/kfctl/cmd/apply.go", - "discussion_id": "320266099", - "commented_code": "@@ -39,7 +45,19 @@ var applyCmd = &cobra.Command{\n \t\tif resourceErr != nil {\n \t\t\treturn fmt.Errorf(\"invalid resource: %v\", resourceErr)", - "comment_created_at": "2019-09-03T13:18:13+00:00", - "comment_author": "yanniszark", - "comment_body": "I believe we want to return a KFError everywhere.\r\nCheck out how the coordinator does it: https://github.com/kubeflow/kubeflow/blob/7d976050933d642069591b72138eb3c152224d08/bootstrap/pkg/kfapp/coordinator/coordinator.go#L79\r\n\r\nIdeally, we could make a helper function `NewInternalKFError(e error, msg string)` in `pkg/apis/kferrors.go` to reduce boilerplate.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "322854742", - "pr_number": 4040, - "pr_file": "bootstrap/cmd/kfctl/cmd/apply/apply.go", - "created_at": "2019-09-10T16:53:36+00:00", - "commented_code": "package apply\n\nimport (\n\t\"errors\"\n\t\"fmt\"\n\t\"io\"\n\t\"os\"\n\n\tkfapis \"github.com/kubeflow/kubeflow/bootstrap/v3/pkg/apis\"\n\tkftypes \"github.com/kubeflow/kubeflow/bootstrap/v3/pkg/apis/apps\"\n\tkfdefsv3 \"github.com/kubeflow/kubeflow/bootstrap/v3/pkg/apis/apps/kfdef/v1alpha1\"\n\t\"github.com/kubeflow/kubeflow/bootstrap/v3/pkg/kfapp/coordinator\"\n\tlog \"github.com/sirupsen/logrus\"\n)\n\n// Set default app name to kf-app\nconst appName = \"kf-app\"\n\nvar kfErr = &kfapis.KfError{\n\tCode: int(kfapis.INTERNAL_ERROR),", - "repo_full_name": "kubeflow/kubeflow", - "discussion_comments": [ - { - "comment_id": "322854742", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 4040, - "pr_file": "bootstrap/cmd/kfctl/cmd/apply/apply.go", - "discussion_id": "322854742", - "commented_code": "@@ -0,0 +1,112 @@\n+package apply\n+\n+import (\n+\t\"errors\"\n+\t\"fmt\"\n+\t\"io\"\n+\t\"os\"\n+\n+\tkfapis \"github.com/kubeflow/kubeflow/bootstrap/v3/pkg/apis\"\n+\tkftypes \"github.com/kubeflow/kubeflow/bootstrap/v3/pkg/apis/apps\"\n+\tkfdefsv3 \"github.com/kubeflow/kubeflow/bootstrap/v3/pkg/apis/apps/kfdef/v1alpha1\"\n+\t\"github.com/kubeflow/kubeflow/bootstrap/v3/pkg/kfapp/coordinator\"\n+\tlog \"github.com/sirupsen/logrus\"\n+)\n+\n+// Set default app name to kf-app\n+const appName = \"kf-app\"\n+\n+var kfErr = &kfapis.KfError{\n+\tCode: int(kfapis.INTERNAL_ERROR),", - "comment_created_at": "2019-09-10T16:53:36+00:00", - "comment_author": "gabrielwen", - "comment_body": "are all errors internal errors? internal errors mean there is something wrong with `kfctl` and we need to fix it. shouldn't part of errors be invalid arguments?", - "pr_file_module": null - }, - { - "comment_id": "323031180", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 4040, - "pr_file": "bootstrap/cmd/kfctl/cmd/apply/apply.go", - "discussion_id": "322854742", - "commented_code": "@@ -0,0 +1,112 @@\n+package apply\n+\n+import (\n+\t\"errors\"\n+\t\"fmt\"\n+\t\"io\"\n+\t\"os\"\n+\n+\tkfapis \"github.com/kubeflow/kubeflow/bootstrap/v3/pkg/apis\"\n+\tkftypes \"github.com/kubeflow/kubeflow/bootstrap/v3/pkg/apis/apps\"\n+\tkfdefsv3 \"github.com/kubeflow/kubeflow/bootstrap/v3/pkg/apis/apps/kfdef/v1alpha1\"\n+\t\"github.com/kubeflow/kubeflow/bootstrap/v3/pkg/kfapp/coordinator\"\n+\tlog \"github.com/sirupsen/logrus\"\n+)\n+\n+// Set default app name to kf-app\n+const appName = \"kf-app\"\n+\n+var kfErr = &kfapis.KfError{\n+\tCode: int(kfapis.INTERNAL_ERROR),", - "comment_created_at": "2019-09-11T01:57:05+00:00", - "comment_author": "swiftdiaries", - "comment_body": "Added a bunch of Invalid argument errors for when appropriate.", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/kubeflow-control-header-modification.json b/_reviewers/kubeflow-control-header-modification.json new file mode 100644 index 0000000..55e7c42 --- /dev/null +++ b/_reviewers/kubeflow-control-header-modification.json @@ -0,0 +1,180 @@ +[ + { + "discussion_id": "591695608", + "pr_number": 5660, + "pr_file": "components/notebook-controller/controllers/notebook_controller.go", + "created_at": "2021-03-10T16:53:30+00:00", + "commented_code": "\"timeout\": \"300s\",\n\t\t},\n\t}\n\n\t// Create a template map for the headers section\n\t// of the Istio VirtualService\n\theaders := map[string]interface{}{\n\t\t\"request\": map[string]interface{}{\n\t\t\t\"set\": map[string]interface{}{},\n\t\t},\n\t}\n\n\t// If key \"notebooks.kubeflow.org/http-headers-request-set\" is present on the notebook resource,\n\t// split the literal string into keys and values and add them\n\t// to the headers variable. Finally, add the headers section to\n\t// the variable with the http spec.\n\tif _, ok := annotations[\"notebooks.kubeflow.org/http-headers-request-set\"]; ok && len(annotations[\"notebooks.kubeflow.org/http-headers-request-set\"]) > 0 {\n\t\trequestHeaders := strings.Split(annotations[\"notebooks.kubeflow.org/http-headers-request-set\"], \"\\n\")\n\t\tfor _, kv := range requestHeaders {\n\t\t\tif len(strings.Split(kv, \": \")) == 2 {\n\t\t\t\tk := strings.Split(kv, \": \")[0]\n\t\t\t\tv := strings.Split(kv, \": \")[1]\n\t\t\t\theaders[\"request\"].(map[string]interface{})[\"set\"].(map[string]interface{})[k] = v", + "repo_full_name": "kubeflow/kubeflow", + "discussion_comments": [ + { + "comment_id": "591695608", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 5660, + "pr_file": "components/notebook-controller/controllers/notebook_controller.go", + "discussion_id": "591695608", + "commented_code": "@@ -449,6 +464,32 @@ func generateVirtualService(instance *v1beta1.Notebook) (*unstructured.Unstructu\n \t\t\t\"timeout\": \"300s\",\n \t\t},\n \t}\n+\n+\t// Create a template map for the headers section\n+\t// of the Istio VirtualService\n+\theaders := map[string]interface{}{\n+\t\t\"request\": map[string]interface{}{\n+\t\t\t\"set\": map[string]interface{}{},\n+\t\t},\n+\t}\n+\n+\t// If key \"notebooks.kubeflow.org/http-headers-request-set\" is present on the notebook resource,\n+\t// split the literal string into keys and values and add them\n+\t// to the headers variable. Finally, add the headers section to\n+\t// the variable with the http spec.\n+\tif _, ok := annotations[\"notebooks.kubeflow.org/http-headers-request-set\"]; ok && len(annotations[\"notebooks.kubeflow.org/http-headers-request-set\"]) > 0 {\n+\t\trequestHeaders := strings.Split(annotations[\"notebooks.kubeflow.org/http-headers-request-set\"], \"\\n\")\n+\t\tfor _, kv := range requestHeaders {\n+\t\t\tif len(strings.Split(kv, \": \")) == 2 {\n+\t\t\t\tk := strings.Split(kv, \": \")[0]\n+\t\t\t\tv := strings.Split(kv, \": \")[1]\n+\t\t\t\theaders[\"request\"].(map[string]interface{})[\"set\"].(map[string]interface{})[k] = v", + "comment_created_at": "2021-03-10T16:53:30+00:00", + "comment_author": "kimwnasptd", + "comment_body": "I think we should not let users arbitrarily add headers to the requests that pass from the ingress gateway. For example they could even override the ISTIO `userid-header`, the `X-Forwarded-Proto` so that the server assumes http instead of https etc.\r\n\r\nI think we should reconsider the design we take here\r\nalso cc @yanniszark who has a lot of experience on security", + "pr_file_module": null + }, + { + "comment_id": "591699285", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 5660, + "pr_file": "components/notebook-controller/controllers/notebook_controller.go", + "discussion_id": "591695608", + "commented_code": "@@ -449,6 +464,32 @@ func generateVirtualService(instance *v1beta1.Notebook) (*unstructured.Unstructu\n \t\t\t\"timeout\": \"300s\",\n \t\t},\n \t}\n+\n+\t// Create a template map for the headers section\n+\t// of the Istio VirtualService\n+\theaders := map[string]interface{}{\n+\t\t\"request\": map[string]interface{}{\n+\t\t\t\"set\": map[string]interface{}{},\n+\t\t},\n+\t}\n+\n+\t// If key \"notebooks.kubeflow.org/http-headers-request-set\" is present on the notebook resource,\n+\t// split the literal string into keys and values and add them\n+\t// to the headers variable. Finally, add the headers section to\n+\t// the variable with the http spec.\n+\tif _, ok := annotations[\"notebooks.kubeflow.org/http-headers-request-set\"]; ok && len(annotations[\"notebooks.kubeflow.org/http-headers-request-set\"]) > 0 {\n+\t\trequestHeaders := strings.Split(annotations[\"notebooks.kubeflow.org/http-headers-request-set\"], \"\\n\")\n+\t\tfor _, kv := range requestHeaders {\n+\t\t\tif len(strings.Split(kv, \": \")) == 2 {\n+\t\t\t\tk := strings.Split(kv, \": \")[0]\n+\t\t\t\tv := strings.Split(kv, \": \")[1]\n+\t\t\t\theaders[\"request\"].(map[string]interface{})[\"set\"].(map[string]interface{})[k] = v", + "comment_created_at": "2021-03-10T16:57:41+00:00", + "comment_author": "davidspek", + "comment_body": "I believe if we change the method to `add` instead of `set` the header only gets added if it is not already present. ", + "pr_file_module": null + }, + { + "comment_id": "591773485", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 5660, + "pr_file": "components/notebook-controller/controllers/notebook_controller.go", + "discussion_id": "591695608", + "commented_code": "@@ -449,6 +464,32 @@ func generateVirtualService(instance *v1beta1.Notebook) (*unstructured.Unstructu\n \t\t\t\"timeout\": \"300s\",\n \t\t},\n \t}\n+\n+\t// Create a template map for the headers section\n+\t// of the Istio VirtualService\n+\theaders := map[string]interface{}{\n+\t\t\"request\": map[string]interface{}{\n+\t\t\t\"set\": map[string]interface{}{},\n+\t\t},\n+\t}\n+\n+\t// If key \"notebooks.kubeflow.org/http-headers-request-set\" is present on the notebook resource,\n+\t// split the literal string into keys and values and add them\n+\t// to the headers variable. Finally, add the headers section to\n+\t// the variable with the http spec.\n+\tif _, ok := annotations[\"notebooks.kubeflow.org/http-headers-request-set\"]; ok && len(annotations[\"notebooks.kubeflow.org/http-headers-request-set\"]) > 0 {\n+\t\trequestHeaders := strings.Split(annotations[\"notebooks.kubeflow.org/http-headers-request-set\"], \"\\n\")\n+\t\tfor _, kv := range requestHeaders {\n+\t\t\tif len(strings.Split(kv, \": \")) == 2 {\n+\t\t\t\tk := strings.Split(kv, \": \")[0]\n+\t\t\t\tv := strings.Split(kv, \": \")[1]\n+\t\t\t\theaders[\"request\"].(map[string]interface{})[\"set\"].(map[string]interface{})[k] = v", + "comment_created_at": "2021-03-10T18:36:49+00:00", + "comment_author": "davidspek", + "comment_body": "If this is a security issue, I think https://github.com/kubeflow/kubeflow/issues/5588 is a much larger problem. ", + "pr_file_module": null + }, + { + "comment_id": "591823260", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 5660, + "pr_file": "components/notebook-controller/controllers/notebook_controller.go", + "discussion_id": "591695608", + "commented_code": "@@ -449,6 +464,32 @@ func generateVirtualService(instance *v1beta1.Notebook) (*unstructured.Unstructu\n \t\t\t\"timeout\": \"300s\",\n \t\t},\n \t}\n+\n+\t// Create a template map for the headers section\n+\t// of the Istio VirtualService\n+\theaders := map[string]interface{}{\n+\t\t\"request\": map[string]interface{}{\n+\t\t\t\"set\": map[string]interface{}{},\n+\t\t},\n+\t}\n+\n+\t// If key \"notebooks.kubeflow.org/http-headers-request-set\" is present on the notebook resource,\n+\t// split the literal string into keys and values and add them\n+\t// to the headers variable. Finally, add the headers section to\n+\t// the variable with the http spec.\n+\tif _, ok := annotations[\"notebooks.kubeflow.org/http-headers-request-set\"]; ok && len(annotations[\"notebooks.kubeflow.org/http-headers-request-set\"]) > 0 {\n+\t\trequestHeaders := strings.Split(annotations[\"notebooks.kubeflow.org/http-headers-request-set\"], \"\\n\")\n+\t\tfor _, kv := range requestHeaders {\n+\t\t\tif len(strings.Split(kv, \": \")) == 2 {\n+\t\t\t\tk := strings.Split(kv, \": \")[0]\n+\t\t\t\tv := strings.Split(kv, \": \")[1]\n+\t\t\t\theaders[\"request\"].(map[string]interface{})[\"set\"].(map[string]interface{})[k] = v", + "comment_created_at": "2021-03-10T19:48:04+00:00", + "comment_author": "davidspek", + "comment_body": "@kimwnasptd I don't think this is a big problem, as the rest of the virtual service is not touched. So all the user can do is mess up the requests going to the service he already has access to, I don't think this would actually enable somebody to be able to access a notebook in another namespace (unlike the issue linked above). But I am all about learning so I'd like to know the actual dangers/problems this could have. \r\n\r\nDon't forget, users are already able to can already create any virtual service spec they want in their namespace. If there is a security issue with any of this, allowing users to create or edit virtual services would be the actual issue. ", + "pr_file_module": null + }, + { + "comment_id": "591882365", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 5660, + "pr_file": "components/notebook-controller/controllers/notebook_controller.go", + "discussion_id": "591695608", + "commented_code": "@@ -449,6 +464,32 @@ func generateVirtualService(instance *v1beta1.Notebook) (*unstructured.Unstructu\n \t\t\t\"timeout\": \"300s\",\n \t\t},\n \t}\n+\n+\t// Create a template map for the headers section\n+\t// of the Istio VirtualService\n+\theaders := map[string]interface{}{\n+\t\t\"request\": map[string]interface{}{\n+\t\t\t\"set\": map[string]interface{}{},\n+\t\t},\n+\t}\n+\n+\t// If key \"notebooks.kubeflow.org/http-headers-request-set\" is present on the notebook resource,\n+\t// split the literal string into keys and values and add them\n+\t// to the headers variable. Finally, add the headers section to\n+\t// the variable with the http spec.\n+\tif _, ok := annotations[\"notebooks.kubeflow.org/http-headers-request-set\"]; ok && len(annotations[\"notebooks.kubeflow.org/http-headers-request-set\"]) > 0 {\n+\t\trequestHeaders := strings.Split(annotations[\"notebooks.kubeflow.org/http-headers-request-set\"], \"\\n\")\n+\t\tfor _, kv := range requestHeaders {\n+\t\t\tif len(strings.Split(kv, \": \")) == 2 {\n+\t\t\t\tk := strings.Split(kv, \": \")[0]\n+\t\t\t\tv := strings.Split(kv, \": \")[1]\n+\t\t\t\theaders[\"request\"].(map[string]interface{})[\"set\"].(map[string]interface{})[k] = v", + "comment_created_at": "2021-03-10T21:24:50+00:00", + "comment_author": "davidspek", + "comment_body": "@kimwnasptd To continue on your last comment below, there are a large amount of header configurations that can be used with RStudio. `X-RStudio-Root-Path` is one of course, `X-RStudio-Request` is another. However, much more can be set with headers: https://docs.rstudio.com/connect/admin/appendix/configuration/#ProxyAuth.Settings. I could be mistaken, but I thought you could also configure or use custom headers with JupyterLab as well. I will need to look at it further tomorrow morning, however, I'm sure there are other applications for which setting headers to configure them is supported and would be useful. I think with the URI rewriting, the configurable request headers, and allowing to set the application port (not included here but I do have a PR open for this), the notebook controller will be compatible with most web-applications a user could want to deploy. \r\n\r\nHowever, seeing as the issue described here is an issue that is already present, I do not think this is a blocker. Fixing the underlying issue will be a larger task and not something that will be done before the 1.3 release. ", + "pr_file_module": null + }, + { + "comment_id": "591927042", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 5660, + "pr_file": "components/notebook-controller/controllers/notebook_controller.go", + "discussion_id": "591695608", + "commented_code": "@@ -449,6 +464,32 @@ func generateVirtualService(instance *v1beta1.Notebook) (*unstructured.Unstructu\n \t\t\t\"timeout\": \"300s\",\n \t\t},\n \t}\n+\n+\t// Create a template map for the headers section\n+\t// of the Istio VirtualService\n+\theaders := map[string]interface{}{\n+\t\t\"request\": map[string]interface{}{\n+\t\t\t\"set\": map[string]interface{}{},\n+\t\t},\n+\t}\n+\n+\t// If key \"notebooks.kubeflow.org/http-headers-request-set\" is present on the notebook resource,\n+\t// split the literal string into keys and values and add them\n+\t// to the headers variable. Finally, add the headers section to\n+\t// the variable with the http spec.\n+\tif _, ok := annotations[\"notebooks.kubeflow.org/http-headers-request-set\"]; ok && len(annotations[\"notebooks.kubeflow.org/http-headers-request-set\"]) > 0 {\n+\t\trequestHeaders := strings.Split(annotations[\"notebooks.kubeflow.org/http-headers-request-set\"], \"\\n\")\n+\t\tfor _, kv := range requestHeaders {\n+\t\t\tif len(strings.Split(kv, \": \")) == 2 {\n+\t\t\t\tk := strings.Split(kv, \": \")[0]\n+\t\t\t\tv := strings.Split(kv, \": \")[1]\n+\t\t\t\theaders[\"request\"].(map[string]interface{})[\"set\"].(map[string]interface{})[k] = v", + "comment_created_at": "2021-03-10T22:44:53+00:00", + "comment_author": "thesuperzapper", + "comment_body": "@kimwnasptd There is a very simple solution here, just reject the annotation if it sets a restricted header name.\r\n\r\nThis should be easy to implement quickly, and I think we can start with a blacklist (which can be extended in future if issues are found).\r\n\r\nSo @kimwnasptd what headers should be blacklisted?", + "pr_file_module": null + }, + { + "comment_id": "592021956", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 5660, + "pr_file": "components/notebook-controller/controllers/notebook_controller.go", + "discussion_id": "591695608", + "commented_code": "@@ -449,6 +464,32 @@ func generateVirtualService(instance *v1beta1.Notebook) (*unstructured.Unstructu\n \t\t\t\"timeout\": \"300s\",\n \t\t},\n \t}\n+\n+\t// Create a template map for the headers section\n+\t// of the Istio VirtualService\n+\theaders := map[string]interface{}{\n+\t\t\"request\": map[string]interface{}{\n+\t\t\t\"set\": map[string]interface{}{},\n+\t\t},\n+\t}\n+\n+\t// If key \"notebooks.kubeflow.org/http-headers-request-set\" is present on the notebook resource,\n+\t// split the literal string into keys and values and add them\n+\t// to the headers variable. Finally, add the headers section to\n+\t// the variable with the http spec.\n+\tif _, ok := annotations[\"notebooks.kubeflow.org/http-headers-request-set\"]; ok && len(annotations[\"notebooks.kubeflow.org/http-headers-request-set\"]) > 0 {\n+\t\trequestHeaders := strings.Split(annotations[\"notebooks.kubeflow.org/http-headers-request-set\"], \"\\n\")\n+\t\tfor _, kv := range requestHeaders {\n+\t\t\tif len(strings.Split(kv, \": \")) == 2 {\n+\t\t\t\tk := strings.Split(kv, \": \")[0]\n+\t\t\t\tv := strings.Split(kv, \": \")[1]\n+\t\t\t\theaders[\"request\"].(map[string]interface{})[\"set\"].(map[string]interface{})[k] = v", + "comment_created_at": "2021-03-11T02:27:36+00:00", + "comment_author": "davidspek", + "comment_body": "I added a blacklist, so I believe this issue is now resolved.", + "pr_file_module": null + }, + { + "comment_id": "592145006", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 5660, + "pr_file": "components/notebook-controller/controllers/notebook_controller.go", + "discussion_id": "591695608", + "commented_code": "@@ -449,6 +464,32 @@ func generateVirtualService(instance *v1beta1.Notebook) (*unstructured.Unstructu\n \t\t\t\"timeout\": \"300s\",\n \t\t},\n \t}\n+\n+\t// Create a template map for the headers section\n+\t// of the Istio VirtualService\n+\theaders := map[string]interface{}{\n+\t\t\"request\": map[string]interface{}{\n+\t\t\t\"set\": map[string]interface{}{},\n+\t\t},\n+\t}\n+\n+\t// If key \"notebooks.kubeflow.org/http-headers-request-set\" is present on the notebook resource,\n+\t// split the literal string into keys and values and add them\n+\t// to the headers variable. Finally, add the headers section to\n+\t// the variable with the http spec.\n+\tif _, ok := annotations[\"notebooks.kubeflow.org/http-headers-request-set\"]; ok && len(annotations[\"notebooks.kubeflow.org/http-headers-request-set\"]) > 0 {\n+\t\trequestHeaders := strings.Split(annotations[\"notebooks.kubeflow.org/http-headers-request-set\"], \"\\n\")\n+\t\tfor _, kv := range requestHeaders {\n+\t\t\tif len(strings.Split(kv, \": \")) == 2 {\n+\t\t\t\tk := strings.Split(kv, \": \")[0]\n+\t\t\t\tv := strings.Split(kv, \": \")[1]\n+\t\t\t\theaders[\"request\"].(map[string]interface{})[\"set\"].(map[string]interface{})[k] = v", + "comment_created_at": "2021-03-11T08:11:53+00:00", + "comment_author": "kimwnasptd", + "comment_body": "I took a more extensive look at the docs and I propose the following solution:\r\n\r\nThe headers that are used to configure parts of R-Studio are all non-standard headers, starting with `X-*`. Such example headers are\r\n* `X-RStudio-Request`\r\n* `X-RStudio-Root-Path`\r\n* `X-Forwarded-Host`\r\n* `X-Forwarded-Proto`\r\n* `X-Auth-Token`\r\n\r\nSo I propose to go the other way around from blacklisting specific headers. Let's only allow the users to edit/set non-standard headers that start with `X-*`. This way we provide all the necessary configuration for R-Studio and at the same time don't allow the users to edit any header arbitrarily.\r\n\r\nAnd if in the future we find users that need to specifically set well defined headers for a Notebook then we can discuss based on the specific use case on how to proceed and allow users to add more headers.\r\n\r\nThis is the case for the docs I looked up to now\r\nhttps://docs.rstudio.com/ide/server-pro/access-and-security.htmlhttps://docs.rstudio.com/connect/admin/authentication/proxied/\r\nhttps://docs.rstudio.com/connect/admin/appendix/configuration/#ProxyAuth.Settings\r\n", + "pr_file_module": null + }, + { + "comment_id": "592156403", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 5660, + "pr_file": "components/notebook-controller/controllers/notebook_controller.go", + "discussion_id": "591695608", + "commented_code": "@@ -449,6 +464,32 @@ func generateVirtualService(instance *v1beta1.Notebook) (*unstructured.Unstructu\n \t\t\t\"timeout\": \"300s\",\n \t\t},\n \t}\n+\n+\t// Create a template map for the headers section\n+\t// of the Istio VirtualService\n+\theaders := map[string]interface{}{\n+\t\t\"request\": map[string]interface{}{\n+\t\t\t\"set\": map[string]interface{}{},\n+\t\t},\n+\t}\n+\n+\t// If key \"notebooks.kubeflow.org/http-headers-request-set\" is present on the notebook resource,\n+\t// split the literal string into keys and values and add them\n+\t// to the headers variable. Finally, add the headers section to\n+\t// the variable with the http spec.\n+\tif _, ok := annotations[\"notebooks.kubeflow.org/http-headers-request-set\"]; ok && len(annotations[\"notebooks.kubeflow.org/http-headers-request-set\"]) > 0 {\n+\t\trequestHeaders := strings.Split(annotations[\"notebooks.kubeflow.org/http-headers-request-set\"], \"\\n\")\n+\t\tfor _, kv := range requestHeaders {\n+\t\t\tif len(strings.Split(kv, \": \")) == 2 {\n+\t\t\t\tk := strings.Split(kv, \": \")[0]\n+\t\t\t\tv := strings.Split(kv, \": \")[1]\n+\t\t\t\theaders[\"request\"].(map[string]interface{})[\"set\"].(map[string]interface{})[k] = v", + "comment_created_at": "2021-03-11T08:30:30+00:00", + "comment_author": "kimwnasptd", + "comment_body": "And lastly, the controller will set accordingly the `X-RStudio-Root-Path` header if the `server-type` is `rstudio`. But the user could still be able to override this by manually specifying a different value.\r\n\r\nThis way in order for a CR to support RStuido it will only need the `notebooks.kubeflow.org/server-type` annotation, which provides a good default mechanism and extensibility to the users.", + "pr_file_module": null + }, + { + "comment_id": "592225937", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 5660, + "pr_file": "components/notebook-controller/controllers/notebook_controller.go", + "discussion_id": "591695608", + "commented_code": "@@ -449,6 +464,32 @@ func generateVirtualService(instance *v1beta1.Notebook) (*unstructured.Unstructu\n \t\t\t\"timeout\": \"300s\",\n \t\t},\n \t}\n+\n+\t// Create a template map for the headers section\n+\t// of the Istio VirtualService\n+\theaders := map[string]interface{}{\n+\t\t\"request\": map[string]interface{}{\n+\t\t\t\"set\": map[string]interface{}{},\n+\t\t},\n+\t}\n+\n+\t// If key \"notebooks.kubeflow.org/http-headers-request-set\" is present on the notebook resource,\n+\t// split the literal string into keys and values and add them\n+\t// to the headers variable. Finally, add the headers section to\n+\t// the variable with the http spec.\n+\tif _, ok := annotations[\"notebooks.kubeflow.org/http-headers-request-set\"]; ok && len(annotations[\"notebooks.kubeflow.org/http-headers-request-set\"]) > 0 {\n+\t\trequestHeaders := strings.Split(annotations[\"notebooks.kubeflow.org/http-headers-request-set\"], \"\\n\")\n+\t\tfor _, kv := range requestHeaders {\n+\t\t\tif len(strings.Split(kv, \": \")) == 2 {\n+\t\t\t\tk := strings.Split(kv, \": \")[0]\n+\t\t\t\tv := strings.Split(kv, \": \")[1]\n+\t\t\t\theaders[\"request\"].(map[string]interface{})[\"set\"].(map[string]interface{})[k] = v", + "comment_created_at": "2021-03-11T10:06:45+00:00", + "comment_author": "davidspek", + "comment_body": "@kimwnasptd A white-list rather than a blacklist would indeed be a possibility. However, by backing this into the controller administrators cannot easily customize these values. Also, using `X-*` for headers seems to have been deprecated int 2012: see https://tools.ietf.org/html/rfc6648. There might be a way to make this configurable, but I'm not sure this can be done on time. \r\n\r\nRegarding your last comment, are you suggesting that the current mechanisms added to the controller stay as they are, but the controller itself adds the annotations to the CR when `notebooks.kubeflow.org/server-type: rstudio` is present? Because I don't think this would work, as the controller will forcefully change the annotations if they are edited. I think what you are proposing, is essentially already what is being done except for that it is done in the frontend/backend rather than the controller. Users even looking into this feature are probably already aware, or capable of, figuring out how to change the virtual service spec. ", + "pr_file_module": null + }, + { + "comment_id": "592245141", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 5660, + "pr_file": "components/notebook-controller/controllers/notebook_controller.go", + "discussion_id": "591695608", + "commented_code": "@@ -449,6 +464,32 @@ func generateVirtualService(instance *v1beta1.Notebook) (*unstructured.Unstructu\n \t\t\t\"timeout\": \"300s\",\n \t\t},\n \t}\n+\n+\t// Create a template map for the headers section\n+\t// of the Istio VirtualService\n+\theaders := map[string]interface{}{\n+\t\t\"request\": map[string]interface{}{\n+\t\t\t\"set\": map[string]interface{}{},\n+\t\t},\n+\t}\n+\n+\t// If key \"notebooks.kubeflow.org/http-headers-request-set\" is present on the notebook resource,\n+\t// split the literal string into keys and values and add them\n+\t// to the headers variable. Finally, add the headers section to\n+\t// the variable with the http spec.\n+\tif _, ok := annotations[\"notebooks.kubeflow.org/http-headers-request-set\"]; ok && len(annotations[\"notebooks.kubeflow.org/http-headers-request-set\"]) > 0 {\n+\t\trequestHeaders := strings.Split(annotations[\"notebooks.kubeflow.org/http-headers-request-set\"], \"\\n\")\n+\t\tfor _, kv := range requestHeaders {\n+\t\t\tif len(strings.Split(kv, \": \")) == 2 {\n+\t\t\t\tk := strings.Split(kv, \": \")[0]\n+\t\t\t\tv := strings.Split(kv, \": \")[1]\n+\t\t\t\theaders[\"request\"].(map[string]interface{})[\"set\"].(map[string]interface{})[k] = v", + "comment_created_at": "2021-03-11T10:33:12+00:00", + "comment_author": "kimwnasptd", + "comment_body": "> Also, using X-* for headers seems to have been deprecated int 2012: see https://tools.ietf.org/html/rfc6648\r\n\r\nThis is a very good argument to make to the folks maintaining the R-Studio code :) \r\n\r\n> However, by backing this into the controller administrators cannot easily customize these values.\r\n\r\nWhat extra headers do you think they would need to set aside, from the `X-*` headers R-Studio knows/expects?\r\n\r\n> Regarding your last comment, are you suggesting that the current mechanisms added to the controller stay as they are, but the controller itself adds the annotations to the CR when notebooks.kubeflow.org/server-type: rstudio is present?\r\n\r\nNo, the controller will not add any extra annotations if the `server-type` is present. It will only configure the vsvc accordingly.\r\nLet me try to clarify with two examples:\r\n\r\nCR 1:\r\n```yaml\r\nmetadata:\r\n annotations:\r\n notebooks.kubeflow.org/server-type: rstudio\r\n```\r\n\r\nIn this case the controller will see that the Notebook is for R-Studio so it will set the `/` url rewrite in the vsvc and also set the `X-RStudio-Root-Path` header in the vsvc for the prefix\r\n\r\nCR 2:\r\n```yaml\r\nmetadata:\r\n annotations:\r\n notebooks.kubeflow.org/server-type: rstudio\r\n notebooks.kubeflow.org/http-rewrite-uri: /some/path\r\n```\r\n\r\nIn this case the controller will first do exactly what it would for CR 1. But then it would also detect the `notebooks.kubeflow.org/http-rewrite-uri` and use this value for the Istio rewrite, instead of the default it previously put. \r\n\r\nAlso the same approach will be used with the `notebooks.kubeflow.org/http-headers-request-set`. If this annotation is present then the controller will use the `X-*` headers mentioned in this annotations instead", + "pr_file_module": null + }, + { + "comment_id": "592282634", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 5660, + "pr_file": "components/notebook-controller/controllers/notebook_controller.go", + "discussion_id": "591695608", + "commented_code": "@@ -449,6 +464,32 @@ func generateVirtualService(instance *v1beta1.Notebook) (*unstructured.Unstructu\n \t\t\t\"timeout\": \"300s\",\n \t\t},\n \t}\n+\n+\t// Create a template map for the headers section\n+\t// of the Istio VirtualService\n+\theaders := map[string]interface{}{\n+\t\t\"request\": map[string]interface{}{\n+\t\t\t\"set\": map[string]interface{}{},\n+\t\t},\n+\t}\n+\n+\t// If key \"notebooks.kubeflow.org/http-headers-request-set\" is present on the notebook resource,\n+\t// split the literal string into keys and values and add them\n+\t// to the headers variable. Finally, add the headers section to\n+\t// the variable with the http spec.\n+\tif _, ok := annotations[\"notebooks.kubeflow.org/http-headers-request-set\"]; ok && len(annotations[\"notebooks.kubeflow.org/http-headers-request-set\"]) > 0 {\n+\t\trequestHeaders := strings.Split(annotations[\"notebooks.kubeflow.org/http-headers-request-set\"], \"\\n\")\n+\t\tfor _, kv := range requestHeaders {\n+\t\t\tif len(strings.Split(kv, \": \")) == 2 {\n+\t\t\t\tk := strings.Split(kv, \": \")[0]\n+\t\t\t\tv := strings.Split(kv, \": \")[1]\n+\t\t\t\theaders[\"request\"].(map[string]interface{})[\"set\"].(map[string]interface{})[k] = v", + "comment_created_at": "2021-03-11T11:28:19+00:00", + "comment_author": "davidspek", + "comment_body": "@kimwnasptd This is now implemented, as well as a whitelist instead of a blacklist", + "pr_file_module": null + }, + { + "comment_id": "592287637", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 5660, + "pr_file": "components/notebook-controller/controllers/notebook_controller.go", + "discussion_id": "591695608", + "commented_code": "@@ -449,6 +464,32 @@ func generateVirtualService(instance *v1beta1.Notebook) (*unstructured.Unstructu\n \t\t\t\"timeout\": \"300s\",\n \t\t},\n \t}\n+\n+\t// Create a template map for the headers section\n+\t// of the Istio VirtualService\n+\theaders := map[string]interface{}{\n+\t\t\"request\": map[string]interface{}{\n+\t\t\t\"set\": map[string]interface{}{},\n+\t\t},\n+\t}\n+\n+\t// If key \"notebooks.kubeflow.org/http-headers-request-set\" is present on the notebook resource,\n+\t// split the literal string into keys and values and add them\n+\t// to the headers variable. Finally, add the headers section to\n+\t// the variable with the http spec.\n+\tif _, ok := annotations[\"notebooks.kubeflow.org/http-headers-request-set\"]; ok && len(annotations[\"notebooks.kubeflow.org/http-headers-request-set\"]) > 0 {\n+\t\trequestHeaders := strings.Split(annotations[\"notebooks.kubeflow.org/http-headers-request-set\"], \"\\n\")\n+\t\tfor _, kv := range requestHeaders {\n+\t\t\tif len(strings.Split(kv, \": \")) == 2 {\n+\t\t\t\tk := strings.Split(kv, \": \")[0]\n+\t\t\t\tv := strings.Split(kv, \": \")[1]\n+\t\t\t\theaders[\"request\"].(map[string]interface{})[\"set\"].(map[string]interface{})[k] = v", + "comment_created_at": "2021-03-11T11:36:33+00:00", + "comment_author": "thesuperzapper", + "comment_body": "I think with the blacklist there is very little additional risk from this PR, and I think it's good to merge.\r\n\r\nRegarding the server-type thing, we can do that in the future but its not necessary right now. \r\n\r\nAdding a whitelist would only act to reduce the usefulness of this feature for users why might have their own container images.", + "pr_file_module": null + }, + { + "comment_id": "592294749", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 5660, + "pr_file": "components/notebook-controller/controllers/notebook_controller.go", + "discussion_id": "591695608", + "commented_code": "@@ -449,6 +464,32 @@ func generateVirtualService(instance *v1beta1.Notebook) (*unstructured.Unstructu\n \t\t\t\"timeout\": \"300s\",\n \t\t},\n \t}\n+\n+\t// Create a template map for the headers section\n+\t// of the Istio VirtualService\n+\theaders := map[string]interface{}{\n+\t\t\"request\": map[string]interface{}{\n+\t\t\t\"set\": map[string]interface{}{},\n+\t\t},\n+\t}\n+\n+\t// If key \"notebooks.kubeflow.org/http-headers-request-set\" is present on the notebook resource,\n+\t// split the literal string into keys and values and add them\n+\t// to the headers variable. Finally, add the headers section to\n+\t// the variable with the http spec.\n+\tif _, ok := annotations[\"notebooks.kubeflow.org/http-headers-request-set\"]; ok && len(annotations[\"notebooks.kubeflow.org/http-headers-request-set\"]) > 0 {\n+\t\trequestHeaders := strings.Split(annotations[\"notebooks.kubeflow.org/http-headers-request-set\"], \"\\n\")\n+\t\tfor _, kv := range requestHeaders {\n+\t\t\tif len(strings.Split(kv, \": \")) == 2 {\n+\t\t\t\tk := strings.Split(kv, \": \")[0]\n+\t\t\t\tv := strings.Split(kv, \": \")[1]\n+\t\t\t\theaders[\"request\"].(map[string]interface{})[\"set\"].(map[string]interface{})[k] = v", + "comment_created_at": "2021-03-11T11:48:07+00:00", + "comment_author": "kimwnasptd", + "comment_body": "> Regarding the server-type thing, we can do that in the future but its not necessary right now.\r\n\r\n@thesuperzapper I can understand your urge to have this effort merged as soon as possible, but this is not a big feature so lets get it right from the beginning and not worry about it in the future. The functionality I described is almost there.\r\n\r\n> Adding a whitelist would only act to reduce the usefulness of this feature for users why might have their own container images.\r\n\r\nCould you elaborate on this? What use cases do you think we limit? With the mention mechanism, of allowing the user to set any `X-*` header they want, we still support all of the configuration options from R-Studio.", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/kubeflow-control-header-modification.md b/_reviewers/kubeflow-control-header-modification.md index 094e86e..3b148cb 100644 --- a/_reviewers/kubeflow-control-header-modification.md +++ b/_reviewers/kubeflow-control-header-modification.md @@ -37,185 +37,3 @@ if _, ok := annotations["app.kubeflow.org/http-headers-request-set"]; ok { ``` This prevents attackers from overriding critical headers like authentication tokens (e.g., `userid-header`), protocol information (e.g., `X-Forwarded-Proto`), or other security mechanisms. For applications with specific header requirements, maintain a carefully curated whitelist rather than allowing arbitrary header manipulation. - - -[ - { - "discussion_id": "591695608", - "pr_number": 5660, - "pr_file": "components/notebook-controller/controllers/notebook_controller.go", - "created_at": "2021-03-10T16:53:30+00:00", - "commented_code": "\"timeout\": \"300s\",\n\t\t},\n\t}\n\n\t// Create a template map for the headers section\n\t// of the Istio VirtualService\n\theaders := map[string]interface{}{\n\t\t\"request\": map[string]interface{}{\n\t\t\t\"set\": map[string]interface{}{},\n\t\t},\n\t}\n\n\t// If key \"notebooks.kubeflow.org/http-headers-request-set\" is present on the notebook resource,\n\t// split the literal string into keys and values and add them\n\t// to the headers variable. Finally, add the headers section to\n\t// the variable with the http spec.\n\tif _, ok := annotations[\"notebooks.kubeflow.org/http-headers-request-set\"]; ok && len(annotations[\"notebooks.kubeflow.org/http-headers-request-set\"]) > 0 {\n\t\trequestHeaders := strings.Split(annotations[\"notebooks.kubeflow.org/http-headers-request-set\"], \"\\n\")\n\t\tfor _, kv := range requestHeaders {\n\t\t\tif len(strings.Split(kv, \": \")) == 2 {\n\t\t\t\tk := strings.Split(kv, \": \")[0]\n\t\t\t\tv := strings.Split(kv, \": \")[1]\n\t\t\t\theaders[\"request\"].(map[string]interface{})[\"set\"].(map[string]interface{})[k] = v", - "repo_full_name": "kubeflow/kubeflow", - "discussion_comments": [ - { - "comment_id": "591695608", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 5660, - "pr_file": "components/notebook-controller/controllers/notebook_controller.go", - "discussion_id": "591695608", - "commented_code": "@@ -449,6 +464,32 @@ func generateVirtualService(instance *v1beta1.Notebook) (*unstructured.Unstructu\n \t\t\t\"timeout\": \"300s\",\n \t\t},\n \t}\n+\n+\t// Create a template map for the headers section\n+\t// of the Istio VirtualService\n+\theaders := map[string]interface{}{\n+\t\t\"request\": map[string]interface{}{\n+\t\t\t\"set\": map[string]interface{}{},\n+\t\t},\n+\t}\n+\n+\t// If key \"notebooks.kubeflow.org/http-headers-request-set\" is present on the notebook resource,\n+\t// split the literal string into keys and values and add them\n+\t// to the headers variable. Finally, add the headers section to\n+\t// the variable with the http spec.\n+\tif _, ok := annotations[\"notebooks.kubeflow.org/http-headers-request-set\"]; ok && len(annotations[\"notebooks.kubeflow.org/http-headers-request-set\"]) > 0 {\n+\t\trequestHeaders := strings.Split(annotations[\"notebooks.kubeflow.org/http-headers-request-set\"], \"\\n\")\n+\t\tfor _, kv := range requestHeaders {\n+\t\t\tif len(strings.Split(kv, \": \")) == 2 {\n+\t\t\t\tk := strings.Split(kv, \": \")[0]\n+\t\t\t\tv := strings.Split(kv, \": \")[1]\n+\t\t\t\theaders[\"request\"].(map[string]interface{})[\"set\"].(map[string]interface{})[k] = v", - "comment_created_at": "2021-03-10T16:53:30+00:00", - "comment_author": "kimwnasptd", - "comment_body": "I think we should not let users arbitrarily add headers to the requests that pass from the ingress gateway. For example they could even override the ISTIO `userid-header`, the `X-Forwarded-Proto` so that the server assumes http instead of https etc.\r\n\r\nI think we should reconsider the design we take here\r\nalso cc @yanniszark who has a lot of experience on security", - "pr_file_module": null - }, - { - "comment_id": "591699285", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 5660, - "pr_file": "components/notebook-controller/controllers/notebook_controller.go", - "discussion_id": "591695608", - "commented_code": "@@ -449,6 +464,32 @@ func generateVirtualService(instance *v1beta1.Notebook) (*unstructured.Unstructu\n \t\t\t\"timeout\": \"300s\",\n \t\t},\n \t}\n+\n+\t// Create a template map for the headers section\n+\t// of the Istio VirtualService\n+\theaders := map[string]interface{}{\n+\t\t\"request\": map[string]interface{}{\n+\t\t\t\"set\": map[string]interface{}{},\n+\t\t},\n+\t}\n+\n+\t// If key \"notebooks.kubeflow.org/http-headers-request-set\" is present on the notebook resource,\n+\t// split the literal string into keys and values and add them\n+\t// to the headers variable. Finally, add the headers section to\n+\t// the variable with the http spec.\n+\tif _, ok := annotations[\"notebooks.kubeflow.org/http-headers-request-set\"]; ok && len(annotations[\"notebooks.kubeflow.org/http-headers-request-set\"]) > 0 {\n+\t\trequestHeaders := strings.Split(annotations[\"notebooks.kubeflow.org/http-headers-request-set\"], \"\\n\")\n+\t\tfor _, kv := range requestHeaders {\n+\t\t\tif len(strings.Split(kv, \": \")) == 2 {\n+\t\t\t\tk := strings.Split(kv, \": \")[0]\n+\t\t\t\tv := strings.Split(kv, \": \")[1]\n+\t\t\t\theaders[\"request\"].(map[string]interface{})[\"set\"].(map[string]interface{})[k] = v", - "comment_created_at": "2021-03-10T16:57:41+00:00", - "comment_author": "davidspek", - "comment_body": "I believe if we change the method to `add` instead of `set` the header only gets added if it is not already present. ", - "pr_file_module": null - }, - { - "comment_id": "591773485", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 5660, - "pr_file": "components/notebook-controller/controllers/notebook_controller.go", - "discussion_id": "591695608", - "commented_code": "@@ -449,6 +464,32 @@ func generateVirtualService(instance *v1beta1.Notebook) (*unstructured.Unstructu\n \t\t\t\"timeout\": \"300s\",\n \t\t},\n \t}\n+\n+\t// Create a template map for the headers section\n+\t// of the Istio VirtualService\n+\theaders := map[string]interface{}{\n+\t\t\"request\": map[string]interface{}{\n+\t\t\t\"set\": map[string]interface{}{},\n+\t\t},\n+\t}\n+\n+\t// If key \"notebooks.kubeflow.org/http-headers-request-set\" is present on the notebook resource,\n+\t// split the literal string into keys and values and add them\n+\t// to the headers variable. Finally, add the headers section to\n+\t// the variable with the http spec.\n+\tif _, ok := annotations[\"notebooks.kubeflow.org/http-headers-request-set\"]; ok && len(annotations[\"notebooks.kubeflow.org/http-headers-request-set\"]) > 0 {\n+\t\trequestHeaders := strings.Split(annotations[\"notebooks.kubeflow.org/http-headers-request-set\"], \"\\n\")\n+\t\tfor _, kv := range requestHeaders {\n+\t\t\tif len(strings.Split(kv, \": \")) == 2 {\n+\t\t\t\tk := strings.Split(kv, \": \")[0]\n+\t\t\t\tv := strings.Split(kv, \": \")[1]\n+\t\t\t\theaders[\"request\"].(map[string]interface{})[\"set\"].(map[string]interface{})[k] = v", - "comment_created_at": "2021-03-10T18:36:49+00:00", - "comment_author": "davidspek", - "comment_body": "If this is a security issue, I think https://github.com/kubeflow/kubeflow/issues/5588 is a much larger problem. ", - "pr_file_module": null - }, - { - "comment_id": "591823260", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 5660, - "pr_file": "components/notebook-controller/controllers/notebook_controller.go", - "discussion_id": "591695608", - "commented_code": "@@ -449,6 +464,32 @@ func generateVirtualService(instance *v1beta1.Notebook) (*unstructured.Unstructu\n \t\t\t\"timeout\": \"300s\",\n \t\t},\n \t}\n+\n+\t// Create a template map for the headers section\n+\t// of the Istio VirtualService\n+\theaders := map[string]interface{}{\n+\t\t\"request\": map[string]interface{}{\n+\t\t\t\"set\": map[string]interface{}{},\n+\t\t},\n+\t}\n+\n+\t// If key \"notebooks.kubeflow.org/http-headers-request-set\" is present on the notebook resource,\n+\t// split the literal string into keys and values and add them\n+\t// to the headers variable. Finally, add the headers section to\n+\t// the variable with the http spec.\n+\tif _, ok := annotations[\"notebooks.kubeflow.org/http-headers-request-set\"]; ok && len(annotations[\"notebooks.kubeflow.org/http-headers-request-set\"]) > 0 {\n+\t\trequestHeaders := strings.Split(annotations[\"notebooks.kubeflow.org/http-headers-request-set\"], \"\\n\")\n+\t\tfor _, kv := range requestHeaders {\n+\t\t\tif len(strings.Split(kv, \": \")) == 2 {\n+\t\t\t\tk := strings.Split(kv, \": \")[0]\n+\t\t\t\tv := strings.Split(kv, \": \")[1]\n+\t\t\t\theaders[\"request\"].(map[string]interface{})[\"set\"].(map[string]interface{})[k] = v", - "comment_created_at": "2021-03-10T19:48:04+00:00", - "comment_author": "davidspek", - "comment_body": "@kimwnasptd I don't think this is a big problem, as the rest of the virtual service is not touched. So all the user can do is mess up the requests going to the service he already has access to, I don't think this would actually enable somebody to be able to access a notebook in another namespace (unlike the issue linked above). But I am all about learning so I'd like to know the actual dangers/problems this could have. \r\n\r\nDon't forget, users are already able to can already create any virtual service spec they want in their namespace. If there is a security issue with any of this, allowing users to create or edit virtual services would be the actual issue. ", - "pr_file_module": null - }, - { - "comment_id": "591882365", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 5660, - "pr_file": "components/notebook-controller/controllers/notebook_controller.go", - "discussion_id": "591695608", - "commented_code": "@@ -449,6 +464,32 @@ func generateVirtualService(instance *v1beta1.Notebook) (*unstructured.Unstructu\n \t\t\t\"timeout\": \"300s\",\n \t\t},\n \t}\n+\n+\t// Create a template map for the headers section\n+\t// of the Istio VirtualService\n+\theaders := map[string]interface{}{\n+\t\t\"request\": map[string]interface{}{\n+\t\t\t\"set\": map[string]interface{}{},\n+\t\t},\n+\t}\n+\n+\t// If key \"notebooks.kubeflow.org/http-headers-request-set\" is present on the notebook resource,\n+\t// split the literal string into keys and values and add them\n+\t// to the headers variable. Finally, add the headers section to\n+\t// the variable with the http spec.\n+\tif _, ok := annotations[\"notebooks.kubeflow.org/http-headers-request-set\"]; ok && len(annotations[\"notebooks.kubeflow.org/http-headers-request-set\"]) > 0 {\n+\t\trequestHeaders := strings.Split(annotations[\"notebooks.kubeflow.org/http-headers-request-set\"], \"\\n\")\n+\t\tfor _, kv := range requestHeaders {\n+\t\t\tif len(strings.Split(kv, \": \")) == 2 {\n+\t\t\t\tk := strings.Split(kv, \": \")[0]\n+\t\t\t\tv := strings.Split(kv, \": \")[1]\n+\t\t\t\theaders[\"request\"].(map[string]interface{})[\"set\"].(map[string]interface{})[k] = v", - "comment_created_at": "2021-03-10T21:24:50+00:00", - "comment_author": "davidspek", - "comment_body": "@kimwnasptd To continue on your last comment below, there are a large amount of header configurations that can be used with RStudio. `X-RStudio-Root-Path` is one of course, `X-RStudio-Request` is another. However, much more can be set with headers: https://docs.rstudio.com/connect/admin/appendix/configuration/#ProxyAuth.Settings. I could be mistaken, but I thought you could also configure or use custom headers with JupyterLab as well. I will need to look at it further tomorrow morning, however, I'm sure there are other applications for which setting headers to configure them is supported and would be useful. I think with the URI rewriting, the configurable request headers, and allowing to set the application port (not included here but I do have a PR open for this), the notebook controller will be compatible with most web-applications a user could want to deploy. \r\n\r\nHowever, seeing as the issue described here is an issue that is already present, I do not think this is a blocker. Fixing the underlying issue will be a larger task and not something that will be done before the 1.3 release. ", - "pr_file_module": null - }, - { - "comment_id": "591927042", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 5660, - "pr_file": "components/notebook-controller/controllers/notebook_controller.go", - "discussion_id": "591695608", - "commented_code": "@@ -449,6 +464,32 @@ func generateVirtualService(instance *v1beta1.Notebook) (*unstructured.Unstructu\n \t\t\t\"timeout\": \"300s\",\n \t\t},\n \t}\n+\n+\t// Create a template map for the headers section\n+\t// of the Istio VirtualService\n+\theaders := map[string]interface{}{\n+\t\t\"request\": map[string]interface{}{\n+\t\t\t\"set\": map[string]interface{}{},\n+\t\t},\n+\t}\n+\n+\t// If key \"notebooks.kubeflow.org/http-headers-request-set\" is present on the notebook resource,\n+\t// split the literal string into keys and values and add them\n+\t// to the headers variable. Finally, add the headers section to\n+\t// the variable with the http spec.\n+\tif _, ok := annotations[\"notebooks.kubeflow.org/http-headers-request-set\"]; ok && len(annotations[\"notebooks.kubeflow.org/http-headers-request-set\"]) > 0 {\n+\t\trequestHeaders := strings.Split(annotations[\"notebooks.kubeflow.org/http-headers-request-set\"], \"\\n\")\n+\t\tfor _, kv := range requestHeaders {\n+\t\t\tif len(strings.Split(kv, \": \")) == 2 {\n+\t\t\t\tk := strings.Split(kv, \": \")[0]\n+\t\t\t\tv := strings.Split(kv, \": \")[1]\n+\t\t\t\theaders[\"request\"].(map[string]interface{})[\"set\"].(map[string]interface{})[k] = v", - "comment_created_at": "2021-03-10T22:44:53+00:00", - "comment_author": "thesuperzapper", - "comment_body": "@kimwnasptd There is a very simple solution here, just reject the annotation if it sets a restricted header name.\r\n\r\nThis should be easy to implement quickly, and I think we can start with a blacklist (which can be extended in future if issues are found).\r\n\r\nSo @kimwnasptd what headers should be blacklisted?", - "pr_file_module": null - }, - { - "comment_id": "592021956", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 5660, - "pr_file": "components/notebook-controller/controllers/notebook_controller.go", - "discussion_id": "591695608", - "commented_code": "@@ -449,6 +464,32 @@ func generateVirtualService(instance *v1beta1.Notebook) (*unstructured.Unstructu\n \t\t\t\"timeout\": \"300s\",\n \t\t},\n \t}\n+\n+\t// Create a template map for the headers section\n+\t// of the Istio VirtualService\n+\theaders := map[string]interface{}{\n+\t\t\"request\": map[string]interface{}{\n+\t\t\t\"set\": map[string]interface{}{},\n+\t\t},\n+\t}\n+\n+\t// If key \"notebooks.kubeflow.org/http-headers-request-set\" is present on the notebook resource,\n+\t// split the literal string into keys and values and add them\n+\t// to the headers variable. Finally, add the headers section to\n+\t// the variable with the http spec.\n+\tif _, ok := annotations[\"notebooks.kubeflow.org/http-headers-request-set\"]; ok && len(annotations[\"notebooks.kubeflow.org/http-headers-request-set\"]) > 0 {\n+\t\trequestHeaders := strings.Split(annotations[\"notebooks.kubeflow.org/http-headers-request-set\"], \"\\n\")\n+\t\tfor _, kv := range requestHeaders {\n+\t\t\tif len(strings.Split(kv, \": \")) == 2 {\n+\t\t\t\tk := strings.Split(kv, \": \")[0]\n+\t\t\t\tv := strings.Split(kv, \": \")[1]\n+\t\t\t\theaders[\"request\"].(map[string]interface{})[\"set\"].(map[string]interface{})[k] = v", - "comment_created_at": "2021-03-11T02:27:36+00:00", - "comment_author": "davidspek", - "comment_body": "I added a blacklist, so I believe this issue is now resolved.", - "pr_file_module": null - }, - { - "comment_id": "592145006", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 5660, - "pr_file": "components/notebook-controller/controllers/notebook_controller.go", - "discussion_id": "591695608", - "commented_code": "@@ -449,6 +464,32 @@ func generateVirtualService(instance *v1beta1.Notebook) (*unstructured.Unstructu\n \t\t\t\"timeout\": \"300s\",\n \t\t},\n \t}\n+\n+\t// Create a template map for the headers section\n+\t// of the Istio VirtualService\n+\theaders := map[string]interface{}{\n+\t\t\"request\": map[string]interface{}{\n+\t\t\t\"set\": map[string]interface{}{},\n+\t\t},\n+\t}\n+\n+\t// If key \"notebooks.kubeflow.org/http-headers-request-set\" is present on the notebook resource,\n+\t// split the literal string into keys and values and add them\n+\t// to the headers variable. Finally, add the headers section to\n+\t// the variable with the http spec.\n+\tif _, ok := annotations[\"notebooks.kubeflow.org/http-headers-request-set\"]; ok && len(annotations[\"notebooks.kubeflow.org/http-headers-request-set\"]) > 0 {\n+\t\trequestHeaders := strings.Split(annotations[\"notebooks.kubeflow.org/http-headers-request-set\"], \"\\n\")\n+\t\tfor _, kv := range requestHeaders {\n+\t\t\tif len(strings.Split(kv, \": \")) == 2 {\n+\t\t\t\tk := strings.Split(kv, \": \")[0]\n+\t\t\t\tv := strings.Split(kv, \": \")[1]\n+\t\t\t\theaders[\"request\"].(map[string]interface{})[\"set\"].(map[string]interface{})[k] = v", - "comment_created_at": "2021-03-11T08:11:53+00:00", - "comment_author": "kimwnasptd", - "comment_body": "I took a more extensive look at the docs and I propose the following solution:\r\n\r\nThe headers that are used to configure parts of R-Studio are all non-standard headers, starting with `X-*`. Such example headers are\r\n* `X-RStudio-Request`\r\n* `X-RStudio-Root-Path`\r\n* `X-Forwarded-Host`\r\n* `X-Forwarded-Proto`\r\n* `X-Auth-Token`\r\n\r\nSo I propose to go the other way around from blacklisting specific headers. Let's only allow the users to edit/set non-standard headers that start with `X-*`. This way we provide all the necessary configuration for R-Studio and at the same time don't allow the users to edit any header arbitrarily.\r\n\r\nAnd if in the future we find users that need to specifically set well defined headers for a Notebook then we can discuss based on the specific use case on how to proceed and allow users to add more headers.\r\n\r\nThis is the case for the docs I looked up to now\r\nhttps://docs.rstudio.com/ide/server-pro/access-and-security.htmlhttps://docs.rstudio.com/connect/admin/authentication/proxied/\r\nhttps://docs.rstudio.com/connect/admin/appendix/configuration/#ProxyAuth.Settings\r\n", - "pr_file_module": null - }, - { - "comment_id": "592156403", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 5660, - "pr_file": "components/notebook-controller/controllers/notebook_controller.go", - "discussion_id": "591695608", - "commented_code": "@@ -449,6 +464,32 @@ func generateVirtualService(instance *v1beta1.Notebook) (*unstructured.Unstructu\n \t\t\t\"timeout\": \"300s\",\n \t\t},\n \t}\n+\n+\t// Create a template map for the headers section\n+\t// of the Istio VirtualService\n+\theaders := map[string]interface{}{\n+\t\t\"request\": map[string]interface{}{\n+\t\t\t\"set\": map[string]interface{}{},\n+\t\t},\n+\t}\n+\n+\t// If key \"notebooks.kubeflow.org/http-headers-request-set\" is present on the notebook resource,\n+\t// split the literal string into keys and values and add them\n+\t// to the headers variable. Finally, add the headers section to\n+\t// the variable with the http spec.\n+\tif _, ok := annotations[\"notebooks.kubeflow.org/http-headers-request-set\"]; ok && len(annotations[\"notebooks.kubeflow.org/http-headers-request-set\"]) > 0 {\n+\t\trequestHeaders := strings.Split(annotations[\"notebooks.kubeflow.org/http-headers-request-set\"], \"\\n\")\n+\t\tfor _, kv := range requestHeaders {\n+\t\t\tif len(strings.Split(kv, \": \")) == 2 {\n+\t\t\t\tk := strings.Split(kv, \": \")[0]\n+\t\t\t\tv := strings.Split(kv, \": \")[1]\n+\t\t\t\theaders[\"request\"].(map[string]interface{})[\"set\"].(map[string]interface{})[k] = v", - "comment_created_at": "2021-03-11T08:30:30+00:00", - "comment_author": "kimwnasptd", - "comment_body": "And lastly, the controller will set accordingly the `X-RStudio-Root-Path` header if the `server-type` is `rstudio`. But the user could still be able to override this by manually specifying a different value.\r\n\r\nThis way in order for a CR to support RStuido it will only need the `notebooks.kubeflow.org/server-type` annotation, which provides a good default mechanism and extensibility to the users.", - "pr_file_module": null - }, - { - "comment_id": "592225937", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 5660, - "pr_file": "components/notebook-controller/controllers/notebook_controller.go", - "discussion_id": "591695608", - "commented_code": "@@ -449,6 +464,32 @@ func generateVirtualService(instance *v1beta1.Notebook) (*unstructured.Unstructu\n \t\t\t\"timeout\": \"300s\",\n \t\t},\n \t}\n+\n+\t// Create a template map for the headers section\n+\t// of the Istio VirtualService\n+\theaders := map[string]interface{}{\n+\t\t\"request\": map[string]interface{}{\n+\t\t\t\"set\": map[string]interface{}{},\n+\t\t},\n+\t}\n+\n+\t// If key \"notebooks.kubeflow.org/http-headers-request-set\" is present on the notebook resource,\n+\t// split the literal string into keys and values and add them\n+\t// to the headers variable. Finally, add the headers section to\n+\t// the variable with the http spec.\n+\tif _, ok := annotations[\"notebooks.kubeflow.org/http-headers-request-set\"]; ok && len(annotations[\"notebooks.kubeflow.org/http-headers-request-set\"]) > 0 {\n+\t\trequestHeaders := strings.Split(annotations[\"notebooks.kubeflow.org/http-headers-request-set\"], \"\\n\")\n+\t\tfor _, kv := range requestHeaders {\n+\t\t\tif len(strings.Split(kv, \": \")) == 2 {\n+\t\t\t\tk := strings.Split(kv, \": \")[0]\n+\t\t\t\tv := strings.Split(kv, \": \")[1]\n+\t\t\t\theaders[\"request\"].(map[string]interface{})[\"set\"].(map[string]interface{})[k] = v", - "comment_created_at": "2021-03-11T10:06:45+00:00", - "comment_author": "davidspek", - "comment_body": "@kimwnasptd A white-list rather than a blacklist would indeed be a possibility. However, by backing this into the controller administrators cannot easily customize these values. Also, using `X-*` for headers seems to have been deprecated int 2012: see https://tools.ietf.org/html/rfc6648. There might be a way to make this configurable, but I'm not sure this can be done on time. \r\n\r\nRegarding your last comment, are you suggesting that the current mechanisms added to the controller stay as they are, but the controller itself adds the annotations to the CR when `notebooks.kubeflow.org/server-type: rstudio` is present? Because I don't think this would work, as the controller will forcefully change the annotations if they are edited. I think what you are proposing, is essentially already what is being done except for that it is done in the frontend/backend rather than the controller. Users even looking into this feature are probably already aware, or capable of, figuring out how to change the virtual service spec. ", - "pr_file_module": null - }, - { - "comment_id": "592245141", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 5660, - "pr_file": "components/notebook-controller/controllers/notebook_controller.go", - "discussion_id": "591695608", - "commented_code": "@@ -449,6 +464,32 @@ func generateVirtualService(instance *v1beta1.Notebook) (*unstructured.Unstructu\n \t\t\t\"timeout\": \"300s\",\n \t\t},\n \t}\n+\n+\t// Create a template map for the headers section\n+\t// of the Istio VirtualService\n+\theaders := map[string]interface{}{\n+\t\t\"request\": map[string]interface{}{\n+\t\t\t\"set\": map[string]interface{}{},\n+\t\t},\n+\t}\n+\n+\t// If key \"notebooks.kubeflow.org/http-headers-request-set\" is present on the notebook resource,\n+\t// split the literal string into keys and values and add them\n+\t// to the headers variable. Finally, add the headers section to\n+\t// the variable with the http spec.\n+\tif _, ok := annotations[\"notebooks.kubeflow.org/http-headers-request-set\"]; ok && len(annotations[\"notebooks.kubeflow.org/http-headers-request-set\"]) > 0 {\n+\t\trequestHeaders := strings.Split(annotations[\"notebooks.kubeflow.org/http-headers-request-set\"], \"\\n\")\n+\t\tfor _, kv := range requestHeaders {\n+\t\t\tif len(strings.Split(kv, \": \")) == 2 {\n+\t\t\t\tk := strings.Split(kv, \": \")[0]\n+\t\t\t\tv := strings.Split(kv, \": \")[1]\n+\t\t\t\theaders[\"request\"].(map[string]interface{})[\"set\"].(map[string]interface{})[k] = v", - "comment_created_at": "2021-03-11T10:33:12+00:00", - "comment_author": "kimwnasptd", - "comment_body": "> Also, using X-* for headers seems to have been deprecated int 2012: see https://tools.ietf.org/html/rfc6648\r\n\r\nThis is a very good argument to make to the folks maintaining the R-Studio code :) \r\n\r\n> However, by backing this into the controller administrators cannot easily customize these values.\r\n\r\nWhat extra headers do you think they would need to set aside, from the `X-*` headers R-Studio knows/expects?\r\n\r\n> Regarding your last comment, are you suggesting that the current mechanisms added to the controller stay as they are, but the controller itself adds the annotations to the CR when notebooks.kubeflow.org/server-type: rstudio is present?\r\n\r\nNo, the controller will not add any extra annotations if the `server-type` is present. It will only configure the vsvc accordingly.\r\nLet me try to clarify with two examples:\r\n\r\nCR 1:\r\n```yaml\r\nmetadata:\r\n annotations:\r\n notebooks.kubeflow.org/server-type: rstudio\r\n```\r\n\r\nIn this case the controller will see that the Notebook is for R-Studio so it will set the `/` url rewrite in the vsvc and also set the `X-RStudio-Root-Path` header in the vsvc for the prefix\r\n\r\nCR 2:\r\n```yaml\r\nmetadata:\r\n annotations:\r\n notebooks.kubeflow.org/server-type: rstudio\r\n notebooks.kubeflow.org/http-rewrite-uri: /some/path\r\n```\r\n\r\nIn this case the controller will first do exactly what it would for CR 1. But then it would also detect the `notebooks.kubeflow.org/http-rewrite-uri` and use this value for the Istio rewrite, instead of the default it previously put. \r\n\r\nAlso the same approach will be used with the `notebooks.kubeflow.org/http-headers-request-set`. If this annotation is present then the controller will use the `X-*` headers mentioned in this annotations instead", - "pr_file_module": null - }, - { - "comment_id": "592282634", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 5660, - "pr_file": "components/notebook-controller/controllers/notebook_controller.go", - "discussion_id": "591695608", - "commented_code": "@@ -449,6 +464,32 @@ func generateVirtualService(instance *v1beta1.Notebook) (*unstructured.Unstructu\n \t\t\t\"timeout\": \"300s\",\n \t\t},\n \t}\n+\n+\t// Create a template map for the headers section\n+\t// of the Istio VirtualService\n+\theaders := map[string]interface{}{\n+\t\t\"request\": map[string]interface{}{\n+\t\t\t\"set\": map[string]interface{}{},\n+\t\t},\n+\t}\n+\n+\t// If key \"notebooks.kubeflow.org/http-headers-request-set\" is present on the notebook resource,\n+\t// split the literal string into keys and values and add them\n+\t// to the headers variable. Finally, add the headers section to\n+\t// the variable with the http spec.\n+\tif _, ok := annotations[\"notebooks.kubeflow.org/http-headers-request-set\"]; ok && len(annotations[\"notebooks.kubeflow.org/http-headers-request-set\"]) > 0 {\n+\t\trequestHeaders := strings.Split(annotations[\"notebooks.kubeflow.org/http-headers-request-set\"], \"\\n\")\n+\t\tfor _, kv := range requestHeaders {\n+\t\t\tif len(strings.Split(kv, \": \")) == 2 {\n+\t\t\t\tk := strings.Split(kv, \": \")[0]\n+\t\t\t\tv := strings.Split(kv, \": \")[1]\n+\t\t\t\theaders[\"request\"].(map[string]interface{})[\"set\"].(map[string]interface{})[k] = v", - "comment_created_at": "2021-03-11T11:28:19+00:00", - "comment_author": "davidspek", - "comment_body": "@kimwnasptd This is now implemented, as well as a whitelist instead of a blacklist", - "pr_file_module": null - }, - { - "comment_id": "592287637", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 5660, - "pr_file": "components/notebook-controller/controllers/notebook_controller.go", - "discussion_id": "591695608", - "commented_code": "@@ -449,6 +464,32 @@ func generateVirtualService(instance *v1beta1.Notebook) (*unstructured.Unstructu\n \t\t\t\"timeout\": \"300s\",\n \t\t},\n \t}\n+\n+\t// Create a template map for the headers section\n+\t// of the Istio VirtualService\n+\theaders := map[string]interface{}{\n+\t\t\"request\": map[string]interface{}{\n+\t\t\t\"set\": map[string]interface{}{},\n+\t\t},\n+\t}\n+\n+\t// If key \"notebooks.kubeflow.org/http-headers-request-set\" is present on the notebook resource,\n+\t// split the literal string into keys and values and add them\n+\t// to the headers variable. Finally, add the headers section to\n+\t// the variable with the http spec.\n+\tif _, ok := annotations[\"notebooks.kubeflow.org/http-headers-request-set\"]; ok && len(annotations[\"notebooks.kubeflow.org/http-headers-request-set\"]) > 0 {\n+\t\trequestHeaders := strings.Split(annotations[\"notebooks.kubeflow.org/http-headers-request-set\"], \"\\n\")\n+\t\tfor _, kv := range requestHeaders {\n+\t\t\tif len(strings.Split(kv, \": \")) == 2 {\n+\t\t\t\tk := strings.Split(kv, \": \")[0]\n+\t\t\t\tv := strings.Split(kv, \": \")[1]\n+\t\t\t\theaders[\"request\"].(map[string]interface{})[\"set\"].(map[string]interface{})[k] = v", - "comment_created_at": "2021-03-11T11:36:33+00:00", - "comment_author": "thesuperzapper", - "comment_body": "I think with the blacklist there is very little additional risk from this PR, and I think it's good to merge.\r\n\r\nRegarding the server-type thing, we can do that in the future but its not necessary right now. \r\n\r\nAdding a whitelist would only act to reduce the usefulness of this feature for users why might have their own container images.", - "pr_file_module": null - }, - { - "comment_id": "592294749", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 5660, - "pr_file": "components/notebook-controller/controllers/notebook_controller.go", - "discussion_id": "591695608", - "commented_code": "@@ -449,6 +464,32 @@ func generateVirtualService(instance *v1beta1.Notebook) (*unstructured.Unstructu\n \t\t\t\"timeout\": \"300s\",\n \t\t},\n \t}\n+\n+\t// Create a template map for the headers section\n+\t// of the Istio VirtualService\n+\theaders := map[string]interface{}{\n+\t\t\"request\": map[string]interface{}{\n+\t\t\t\"set\": map[string]interface{}{},\n+\t\t},\n+\t}\n+\n+\t// If key \"notebooks.kubeflow.org/http-headers-request-set\" is present on the notebook resource,\n+\t// split the literal string into keys and values and add them\n+\t// to the headers variable. Finally, add the headers section to\n+\t// the variable with the http spec.\n+\tif _, ok := annotations[\"notebooks.kubeflow.org/http-headers-request-set\"]; ok && len(annotations[\"notebooks.kubeflow.org/http-headers-request-set\"]) > 0 {\n+\t\trequestHeaders := strings.Split(annotations[\"notebooks.kubeflow.org/http-headers-request-set\"], \"\\n\")\n+\t\tfor _, kv := range requestHeaders {\n+\t\t\tif len(strings.Split(kv, \": \")) == 2 {\n+\t\t\t\tk := strings.Split(kv, \": \")[0]\n+\t\t\t\tv := strings.Split(kv, \": \")[1]\n+\t\t\t\theaders[\"request\"].(map[string]interface{})[\"set\"].(map[string]interface{})[k] = v", - "comment_created_at": "2021-03-11T11:48:07+00:00", - "comment_author": "kimwnasptd", - "comment_body": "> Regarding the server-type thing, we can do that in the future but its not necessary right now.\r\n\r\n@thesuperzapper I can understand your urge to have this effort merged as soon as possible, but this is not a big feature so lets get it right from the beginning and not worry about it in the future. The functionality I described is almost there.\r\n\r\n> Adding a whitelist would only act to reduce the usefulness of this feature for users why might have their own container images.\r\n\r\nCould you elaborate on this? What use cases do you think we limit? With the mention mechanism, of allowing the user to set any `X-*` header they want, we still support all of the configuration options from R-Studio.", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/kubeflow-descriptive-consistent-naming.json b/_reviewers/kubeflow-descriptive-consistent-naming.json new file mode 100644 index 0000000..4aebc12 --- /dev/null +++ b/_reviewers/kubeflow-descriptive-consistent-naming.json @@ -0,0 +1,70 @@ +[ + { + "discussion_id": "1035716130", + "pr_number": 6781, + "pr_file": "components/centraldashboard-angular/Makefile", + "created_at": "2022-11-30T09:18:48+00:00", + "commented_code": "IMG ?= centraldashboard", + "repo_full_name": "kubeflow/kubeflow", + "discussion_comments": [ + { + "comment_id": "1035716130", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 6781, + "pr_file": "components/centraldashboard-angular/Makefile", + "discussion_id": "1035716130", + "commented_code": "@@ -0,0 +1,14 @@\n+IMG ?= centraldashboard", + "comment_created_at": "2022-11-30T09:18:48+00:00", + "comment_author": "kimwnasptd", + "comment_body": "nit: Let's use `centraldashboard-angular` here as well, to make sure we use the same value everywhere.\r\n\r\nWe can revert to `centraldashboard` once we are confident with switching the web apps", + "pr_file_module": null + }, + { + "comment_id": "1035726127", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 6781, + "pr_file": "components/centraldashboard-angular/Makefile", + "discussion_id": "1035716130", + "commented_code": "@@ -0,0 +1,14 @@\n+IMG ?= centraldashboard", + "comment_created_at": "2022-11-30T09:27:36+00:00", + "comment_author": "orfeas-k", + "comment_body": "@kimwnasptd Fixed it and pushed again!", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "600884787", + "pr_number": 5746, + "pr_file": "releasing/update-manifests-images", + "created_at": "2021-03-24T21:25:28+00:00", + "commented_code": "#!/usr/bin/env python3\n\nimport sys\nimport logging\nimport argparse\nimport ruamel.yaml\n\n\nlog = logging.getLogger(__name__)\n\n\nclass YAMLEmitterNoVersionDirective(ruamel.yaml.emitter.Emitter):\n \"\"\"YAML Emitter that doesn't emit the YAML version directive.\"\"\"\n\n def write_version_directive(self, version_text):\n \"\"\"Disable emitting version directive, i.e., %YAML 1.1.\"\"\"\n pass\n\n def expect_document_start(self, first=False):\n \"\"\"Do not print '---' at the beginning.\"\"\"\n if not isinstance(self.event, ruamel.yaml.events.DocumentStartEvent):\n return super(YAMLEmitterNoVersionDirective, self).\\\n expect_document_start(first=first)\n\n version = self.event.version\n self.event.version = None\n ret = super(YAMLEmitterNoVersionDirective, self).\\\n expect_document_start(first=first)\n self.event.version = version\n return ret\n\n\nclass YAML(ruamel.yaml.YAML):\n \"\"\"Wrapper of the ruamel.yaml.YAML class with our custom settings.\"\"\"\n\n def __init__(self, *args, **kwargs):\n super(YAML, self).__init__(*args, **kwargs)\n # XXX: Explicitly set version for producing K8s compatible manifests.\n # https://yaml.readthedocs.io/en/latest/detail.html#document-version-support\n self.version = (1, 1)\n # XXX: Do not emit version directive since tools might fail to\n # parse manifests.\n self.Emitter = YAMLEmitterNoVersionDirective\n # Preserve original quotes\n self.preserve_quotes = True\n\n\nyaml = YAML()\n\napps = [\n {\n \"name\": \"Admission Webhook\",\n \"kustomization\": \"components/admission-webhook/manifests/base/kustomization.yaml\",\n \"images\": [\n {\n \"name\": \"public.ecr.aws/j1r0q0g6/notebooks/admission-webhook\",\n \"newName\": \"public.ecr.aws/j1r0q0g6/notebooks/admission-webhook\",\n },\n ],\n },\n {\n \"name\": \"Central Dashboard\",\n \"kustomization\": \"components/centraldashboard/manifests/base/kustomization.yaml\",\n \"images\": [\n {\n \"name\": \"public.ecr.aws/j1r0q0g6/notebooks/central-dashboard\",\n \"newName\": \"public.ecr.aws/j1r0q0g6/notebooks/central-dashboard\",\n },\n ],\n },\n {\n \"name\": \"Jupyter Web App\",\n \"kustomization\": \"components/crud-web-apps/jupyter/manifests/base/kustomization.yaml\",\n \"images\": [\n {\n \"name\": \"public.ecr.aws/j1r0q0g6/notebooks/jupyter-web-app\",\n \"newName\": \"public.ecr.aws/j1r0q0g6/notebooks/jupyter-web-app\",\n },\n ],\n },\n {\n \"name\": \"Tensorboard Web App\",\n \"kustomization\": \"components/crud-web-apps/tensorboards/manifests/base/kustomization.yaml\",\n \"images\": [\n {\n \"name\": \"public.ecr.aws/j1r0q0g6/notebooks/tensorboards-web-app\",\n \"newName\": \"public.ecr.aws/j1r0q0g6/notebooks/tensorboards-web-app\",\n },\n ],\n },\n {\n \"name\": \"Volumes Web App\",\n \"kustomization\": \"components/crud-web-apps/volumes/manifests/base/kustomization.yaml\",\n \"images\": [\n {\n \"name\": \"public.ecr.aws/j1r0q0g6/notebooks/volumes-web-app\",\n \"newName\": \"public.ecr.aws/j1r0q0g6/notebooks/volumes-web-app\",\n },\n ],\n },\n {\n \"name\": \"Notebook Controller\",\n \"kustomization\": \"components/notebook-controller/config/base/kustomization.yaml\",\n \"images\": [\n {\n \"name\": \"public.ecr.aws/j1r0q0g6/notebooks/notebook-controller\",\n \"newName\": \"public.ecr.aws/j1r0q0g6/notebooks/notebook-controller\",\n },\n ],\n },\n {\n \"name\": \"Tensorboard Controller\",\n \"kustomization\": \"components/tensorboard-controller/config/base/kustomization.yaml\",\n \"images\": [\n {\n \"name\": \"public.ecr.aws/j1r0q0g6/notebooks/tensorboard-controller\",\n \"newName\": \"public.ecr.aws/j1r0q0g6/notebooks/tensorboard-controller\",\n },\n ],\n },\n {\n \"name\": \"Profile Controller\",\n \"kustomization\": \"components/profile-controller/config/base/kustomization.yaml\",\n \"images\": [\n {\n \"name\": \"public.ecr.aws/j1r0q0g6/notebooks/profile-controller\",\n \"newName\": \"public.ecr.aws/j1r0q0g6/notebooks/profile-controller\",\n },\n ],\n },\n {\n \"name\": \"KFAM\",", + "repo_full_name": "kubeflow/kubeflow", + "discussion_comments": [ + { + "comment_id": "600884787", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 5746, + "pr_file": "releasing/update-manifests-images", + "discussion_id": "600884787", + "commented_code": "@@ -0,0 +1,200 @@\n+#!/usr/bin/env python3\n+\n+import sys\n+import logging\n+import argparse\n+import ruamel.yaml\n+\n+\n+log = logging.getLogger(__name__)\n+\n+\n+class YAMLEmitterNoVersionDirective(ruamel.yaml.emitter.Emitter):\n+ \"\"\"YAML Emitter that doesn't emit the YAML version directive.\"\"\"\n+\n+ def write_version_directive(self, version_text):\n+ \"\"\"Disable emitting version directive, i.e., %YAML 1.1.\"\"\"\n+ pass\n+\n+ def expect_document_start(self, first=False):\n+ \"\"\"Do not print '---' at the beginning.\"\"\"\n+ if not isinstance(self.event, ruamel.yaml.events.DocumentStartEvent):\n+ return super(YAMLEmitterNoVersionDirective, self).\\\n+ expect_document_start(first=first)\n+\n+ version = self.event.version\n+ self.event.version = None\n+ ret = super(YAMLEmitterNoVersionDirective, self).\\\n+ expect_document_start(first=first)\n+ self.event.version = version\n+ return ret\n+\n+\n+class YAML(ruamel.yaml.YAML):\n+ \"\"\"Wrapper of the ruamel.yaml.YAML class with our custom settings.\"\"\"\n+\n+ def __init__(self, *args, **kwargs):\n+ super(YAML, self).__init__(*args, **kwargs)\n+ # XXX: Explicitly set version for producing K8s compatible manifests.\n+ # https://yaml.readthedocs.io/en/latest/detail.html#document-version-support\n+ self.version = (1, 1)\n+ # XXX: Do not emit version directive since tools might fail to\n+ # parse manifests.\n+ self.Emitter = YAMLEmitterNoVersionDirective\n+ # Preserve original quotes\n+ self.preserve_quotes = True\n+\n+\n+yaml = YAML()\n+\n+apps = [\n+ {\n+ \"name\": \"Admission Webhook\",\n+ \"kustomization\": \"components/admission-webhook/manifests/base/kustomization.yaml\",\n+ \"images\": [\n+ {\n+ \"name\": \"public.ecr.aws/j1r0q0g6/notebooks/admission-webhook\",\n+ \"newName\": \"public.ecr.aws/j1r0q0g6/notebooks/admission-webhook\",\n+ },\n+ ],\n+ },\n+ {\n+ \"name\": \"Central Dashboard\",\n+ \"kustomization\": \"components/centraldashboard/manifests/base/kustomization.yaml\",\n+ \"images\": [\n+ {\n+ \"name\": \"public.ecr.aws/j1r0q0g6/notebooks/central-dashboard\",\n+ \"newName\": \"public.ecr.aws/j1r0q0g6/notebooks/central-dashboard\",\n+ },\n+ ],\n+ },\n+ {\n+ \"name\": \"Jupyter Web App\",\n+ \"kustomization\": \"components/crud-web-apps/jupyter/manifests/base/kustomization.yaml\",\n+ \"images\": [\n+ {\n+ \"name\": \"public.ecr.aws/j1r0q0g6/notebooks/jupyter-web-app\",\n+ \"newName\": \"public.ecr.aws/j1r0q0g6/notebooks/jupyter-web-app\",\n+ },\n+ ],\n+ },\n+ {\n+ \"name\": \"Tensorboard Web App\",\n+ \"kustomization\": \"components/crud-web-apps/tensorboards/manifests/base/kustomization.yaml\",\n+ \"images\": [\n+ {\n+ \"name\": \"public.ecr.aws/j1r0q0g6/notebooks/tensorboards-web-app\",\n+ \"newName\": \"public.ecr.aws/j1r0q0g6/notebooks/tensorboards-web-app\",\n+ },\n+ ],\n+ },\n+ {\n+ \"name\": \"Volumes Web App\",\n+ \"kustomization\": \"components/crud-web-apps/volumes/manifests/base/kustomization.yaml\",\n+ \"images\": [\n+ {\n+ \"name\": \"public.ecr.aws/j1r0q0g6/notebooks/volumes-web-app\",\n+ \"newName\": \"public.ecr.aws/j1r0q0g6/notebooks/volumes-web-app\",\n+ },\n+ ],\n+ },\n+ {\n+ \"name\": \"Notebook Controller\",\n+ \"kustomization\": \"components/notebook-controller/config/base/kustomization.yaml\",\n+ \"images\": [\n+ {\n+ \"name\": \"public.ecr.aws/j1r0q0g6/notebooks/notebook-controller\",\n+ \"newName\": \"public.ecr.aws/j1r0q0g6/notebooks/notebook-controller\",\n+ },\n+ ],\n+ },\n+ {\n+ \"name\": \"Tensorboard Controller\",\n+ \"kustomization\": \"components/tensorboard-controller/config/base/kustomization.yaml\",\n+ \"images\": [\n+ {\n+ \"name\": \"public.ecr.aws/j1r0q0g6/notebooks/tensorboard-controller\",\n+ \"newName\": \"public.ecr.aws/j1r0q0g6/notebooks/tensorboard-controller\",\n+ },\n+ ],\n+ },\n+ {\n+ \"name\": \"Profile Controller\",\n+ \"kustomization\": \"components/profile-controller/config/base/kustomization.yaml\",\n+ \"images\": [\n+ {\n+ \"name\": \"public.ecr.aws/j1r0q0g6/notebooks/profile-controller\",\n+ \"newName\": \"public.ecr.aws/j1r0q0g6/notebooks/profile-controller\",\n+ },\n+ ],\n+ },\n+ {\n+ \"name\": \"KFAM\",", + "comment_created_at": "2021-03-24T21:25:28+00:00", + "comment_author": "davidspek", + "comment_body": "Small nit, this name stands out compared to the rest. People not familiar with the code might now know what `KFAM` means. Not a blocker for the PR but `Access Management` might be better to use. ", + "pr_file_module": null + }, + { + "comment_id": "600885034", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 5746, + "pr_file": "releasing/update-manifests-images", + "discussion_id": "600884787", + "commented_code": "@@ -0,0 +1,200 @@\n+#!/usr/bin/env python3\n+\n+import sys\n+import logging\n+import argparse\n+import ruamel.yaml\n+\n+\n+log = logging.getLogger(__name__)\n+\n+\n+class YAMLEmitterNoVersionDirective(ruamel.yaml.emitter.Emitter):\n+ \"\"\"YAML Emitter that doesn't emit the YAML version directive.\"\"\"\n+\n+ def write_version_directive(self, version_text):\n+ \"\"\"Disable emitting version directive, i.e., %YAML 1.1.\"\"\"\n+ pass\n+\n+ def expect_document_start(self, first=False):\n+ \"\"\"Do not print '---' at the beginning.\"\"\"\n+ if not isinstance(self.event, ruamel.yaml.events.DocumentStartEvent):\n+ return super(YAMLEmitterNoVersionDirective, self).\\\n+ expect_document_start(first=first)\n+\n+ version = self.event.version\n+ self.event.version = None\n+ ret = super(YAMLEmitterNoVersionDirective, self).\\\n+ expect_document_start(first=first)\n+ self.event.version = version\n+ return ret\n+\n+\n+class YAML(ruamel.yaml.YAML):\n+ \"\"\"Wrapper of the ruamel.yaml.YAML class with our custom settings.\"\"\"\n+\n+ def __init__(self, *args, **kwargs):\n+ super(YAML, self).__init__(*args, **kwargs)\n+ # XXX: Explicitly set version for producing K8s compatible manifests.\n+ # https://yaml.readthedocs.io/en/latest/detail.html#document-version-support\n+ self.version = (1, 1)\n+ # XXX: Do not emit version directive since tools might fail to\n+ # parse manifests.\n+ self.Emitter = YAMLEmitterNoVersionDirective\n+ # Preserve original quotes\n+ self.preserve_quotes = True\n+\n+\n+yaml = YAML()\n+\n+apps = [\n+ {\n+ \"name\": \"Admission Webhook\",\n+ \"kustomization\": \"components/admission-webhook/manifests/base/kustomization.yaml\",\n+ \"images\": [\n+ {\n+ \"name\": \"public.ecr.aws/j1r0q0g6/notebooks/admission-webhook\",\n+ \"newName\": \"public.ecr.aws/j1r0q0g6/notebooks/admission-webhook\",\n+ },\n+ ],\n+ },\n+ {\n+ \"name\": \"Central Dashboard\",\n+ \"kustomization\": \"components/centraldashboard/manifests/base/kustomization.yaml\",\n+ \"images\": [\n+ {\n+ \"name\": \"public.ecr.aws/j1r0q0g6/notebooks/central-dashboard\",\n+ \"newName\": \"public.ecr.aws/j1r0q0g6/notebooks/central-dashboard\",\n+ },\n+ ],\n+ },\n+ {\n+ \"name\": \"Jupyter Web App\",\n+ \"kustomization\": \"components/crud-web-apps/jupyter/manifests/base/kustomization.yaml\",\n+ \"images\": [\n+ {\n+ \"name\": \"public.ecr.aws/j1r0q0g6/notebooks/jupyter-web-app\",\n+ \"newName\": \"public.ecr.aws/j1r0q0g6/notebooks/jupyter-web-app\",\n+ },\n+ ],\n+ },\n+ {\n+ \"name\": \"Tensorboard Web App\",\n+ \"kustomization\": \"components/crud-web-apps/tensorboards/manifests/base/kustomization.yaml\",\n+ \"images\": [\n+ {\n+ \"name\": \"public.ecr.aws/j1r0q0g6/notebooks/tensorboards-web-app\",\n+ \"newName\": \"public.ecr.aws/j1r0q0g6/notebooks/tensorboards-web-app\",\n+ },\n+ ],\n+ },\n+ {\n+ \"name\": \"Volumes Web App\",\n+ \"kustomization\": \"components/crud-web-apps/volumes/manifests/base/kustomization.yaml\",\n+ \"images\": [\n+ {\n+ \"name\": \"public.ecr.aws/j1r0q0g6/notebooks/volumes-web-app\",\n+ \"newName\": \"public.ecr.aws/j1r0q0g6/notebooks/volumes-web-app\",\n+ },\n+ ],\n+ },\n+ {\n+ \"name\": \"Notebook Controller\",\n+ \"kustomization\": \"components/notebook-controller/config/base/kustomization.yaml\",\n+ \"images\": [\n+ {\n+ \"name\": \"public.ecr.aws/j1r0q0g6/notebooks/notebook-controller\",\n+ \"newName\": \"public.ecr.aws/j1r0q0g6/notebooks/notebook-controller\",\n+ },\n+ ],\n+ },\n+ {\n+ \"name\": \"Tensorboard Controller\",\n+ \"kustomization\": \"components/tensorboard-controller/config/base/kustomization.yaml\",\n+ \"images\": [\n+ {\n+ \"name\": \"public.ecr.aws/j1r0q0g6/notebooks/tensorboard-controller\",\n+ \"newName\": \"public.ecr.aws/j1r0q0g6/notebooks/tensorboard-controller\",\n+ },\n+ ],\n+ },\n+ {\n+ \"name\": \"Profile Controller\",\n+ \"kustomization\": \"components/profile-controller/config/base/kustomization.yaml\",\n+ \"images\": [\n+ {\n+ \"name\": \"public.ecr.aws/j1r0q0g6/notebooks/profile-controller\",\n+ \"newName\": \"public.ecr.aws/j1r0q0g6/notebooks/profile-controller\",\n+ },\n+ ],\n+ },\n+ {\n+ \"name\": \"KFAM\",", + "comment_created_at": "2021-03-24T21:25:51+00:00", + "comment_author": "yanniszark", + "comment_body": "Sounds reasonable", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/kubeflow-descriptive-consistent-naming.md b/_reviewers/kubeflow-descriptive-consistent-naming.md index 86a75f2..3c6981e 100644 --- a/_reviewers/kubeflow-descriptive-consistent-naming.md +++ b/_reviewers/kubeflow-descriptive-consistent-naming.md @@ -18,75 +18,3 @@ Examples: - Use `Access Management` instead of `KFAM` to clearly indicate the component's purpose When introducing new components or refactoring existing ones, ensure the same naming pattern is followed in all references to maintain consistency and avoid confusion. This includes Makefiles, configuration files, documentation, and code comments. - - -[ - { - "discussion_id": "1035716130", - "pr_number": 6781, - "pr_file": "components/centraldashboard-angular/Makefile", - "created_at": "2022-11-30T09:18:48+00:00", - "commented_code": "IMG ?= centraldashboard", - "repo_full_name": "kubeflow/kubeflow", - "discussion_comments": [ - { - "comment_id": "1035716130", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 6781, - "pr_file": "components/centraldashboard-angular/Makefile", - "discussion_id": "1035716130", - "commented_code": "@@ -0,0 +1,14 @@\n+IMG ?= centraldashboard", - "comment_created_at": "2022-11-30T09:18:48+00:00", - "comment_author": "kimwnasptd", - "comment_body": "nit: Let's use `centraldashboard-angular` here as well, to make sure we use the same value everywhere.\r\n\r\nWe can revert to `centraldashboard` once we are confident with switching the web apps", - "pr_file_module": null - }, - { - "comment_id": "1035726127", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 6781, - "pr_file": "components/centraldashboard-angular/Makefile", - "discussion_id": "1035716130", - "commented_code": "@@ -0,0 +1,14 @@\n+IMG ?= centraldashboard", - "comment_created_at": "2022-11-30T09:27:36+00:00", - "comment_author": "orfeas-k", - "comment_body": "@kimwnasptd Fixed it and pushed again!", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "600884787", - "pr_number": 5746, - "pr_file": "releasing/update-manifests-images", - "created_at": "2021-03-24T21:25:28+00:00", - "commented_code": "#!/usr/bin/env python3\n\nimport sys\nimport logging\nimport argparse\nimport ruamel.yaml\n\n\nlog = logging.getLogger(__name__)\n\n\nclass YAMLEmitterNoVersionDirective(ruamel.yaml.emitter.Emitter):\n \"\"\"YAML Emitter that doesn't emit the YAML version directive.\"\"\"\n\n def write_version_directive(self, version_text):\n \"\"\"Disable emitting version directive, i.e., %YAML 1.1.\"\"\"\n pass\n\n def expect_document_start(self, first=False):\n \"\"\"Do not print '---' at the beginning.\"\"\"\n if not isinstance(self.event, ruamel.yaml.events.DocumentStartEvent):\n return super(YAMLEmitterNoVersionDirective, self).\\\n expect_document_start(first=first)\n\n version = self.event.version\n self.event.version = None\n ret = super(YAMLEmitterNoVersionDirective, self).\\\n expect_document_start(first=first)\n self.event.version = version\n return ret\n\n\nclass YAML(ruamel.yaml.YAML):\n \"\"\"Wrapper of the ruamel.yaml.YAML class with our custom settings.\"\"\"\n\n def __init__(self, *args, **kwargs):\n super(YAML, self).__init__(*args, **kwargs)\n # XXX: Explicitly set version for producing K8s compatible manifests.\n # https://yaml.readthedocs.io/en/latest/detail.html#document-version-support\n self.version = (1, 1)\n # XXX: Do not emit version directive since tools might fail to\n # parse manifests.\n self.Emitter = YAMLEmitterNoVersionDirective\n # Preserve original quotes\n self.preserve_quotes = True\n\n\nyaml = YAML()\n\napps = [\n {\n \"name\": \"Admission Webhook\",\n \"kustomization\": \"components/admission-webhook/manifests/base/kustomization.yaml\",\n \"images\": [\n {\n \"name\": \"public.ecr.aws/j1r0q0g6/notebooks/admission-webhook\",\n \"newName\": \"public.ecr.aws/j1r0q0g6/notebooks/admission-webhook\",\n },\n ],\n },\n {\n \"name\": \"Central Dashboard\",\n \"kustomization\": \"components/centraldashboard/manifests/base/kustomization.yaml\",\n \"images\": [\n {\n \"name\": \"public.ecr.aws/j1r0q0g6/notebooks/central-dashboard\",\n \"newName\": \"public.ecr.aws/j1r0q0g6/notebooks/central-dashboard\",\n },\n ],\n },\n {\n \"name\": \"Jupyter Web App\",\n \"kustomization\": \"components/crud-web-apps/jupyter/manifests/base/kustomization.yaml\",\n \"images\": [\n {\n \"name\": \"public.ecr.aws/j1r0q0g6/notebooks/jupyter-web-app\",\n \"newName\": \"public.ecr.aws/j1r0q0g6/notebooks/jupyter-web-app\",\n },\n ],\n },\n {\n \"name\": \"Tensorboard Web App\",\n \"kustomization\": \"components/crud-web-apps/tensorboards/manifests/base/kustomization.yaml\",\n \"images\": [\n {\n \"name\": \"public.ecr.aws/j1r0q0g6/notebooks/tensorboards-web-app\",\n \"newName\": \"public.ecr.aws/j1r0q0g6/notebooks/tensorboards-web-app\",\n },\n ],\n },\n {\n \"name\": \"Volumes Web App\",\n \"kustomization\": \"components/crud-web-apps/volumes/manifests/base/kustomization.yaml\",\n \"images\": [\n {\n \"name\": \"public.ecr.aws/j1r0q0g6/notebooks/volumes-web-app\",\n \"newName\": \"public.ecr.aws/j1r0q0g6/notebooks/volumes-web-app\",\n },\n ],\n },\n {\n \"name\": \"Notebook Controller\",\n \"kustomization\": \"components/notebook-controller/config/base/kustomization.yaml\",\n \"images\": [\n {\n \"name\": \"public.ecr.aws/j1r0q0g6/notebooks/notebook-controller\",\n \"newName\": \"public.ecr.aws/j1r0q0g6/notebooks/notebook-controller\",\n },\n ],\n },\n {\n \"name\": \"Tensorboard Controller\",\n \"kustomization\": \"components/tensorboard-controller/config/base/kustomization.yaml\",\n \"images\": [\n {\n \"name\": \"public.ecr.aws/j1r0q0g6/notebooks/tensorboard-controller\",\n \"newName\": \"public.ecr.aws/j1r0q0g6/notebooks/tensorboard-controller\",\n },\n ],\n },\n {\n \"name\": \"Profile Controller\",\n \"kustomization\": \"components/profile-controller/config/base/kustomization.yaml\",\n \"images\": [\n {\n \"name\": \"public.ecr.aws/j1r0q0g6/notebooks/profile-controller\",\n \"newName\": \"public.ecr.aws/j1r0q0g6/notebooks/profile-controller\",\n },\n ],\n },\n {\n \"name\": \"KFAM\",", - "repo_full_name": "kubeflow/kubeflow", - "discussion_comments": [ - { - "comment_id": "600884787", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 5746, - "pr_file": "releasing/update-manifests-images", - "discussion_id": "600884787", - "commented_code": "@@ -0,0 +1,200 @@\n+#!/usr/bin/env python3\n+\n+import sys\n+import logging\n+import argparse\n+import ruamel.yaml\n+\n+\n+log = logging.getLogger(__name__)\n+\n+\n+class YAMLEmitterNoVersionDirective(ruamel.yaml.emitter.Emitter):\n+ \"\"\"YAML Emitter that doesn't emit the YAML version directive.\"\"\"\n+\n+ def write_version_directive(self, version_text):\n+ \"\"\"Disable emitting version directive, i.e., %YAML 1.1.\"\"\"\n+ pass\n+\n+ def expect_document_start(self, first=False):\n+ \"\"\"Do not print '---' at the beginning.\"\"\"\n+ if not isinstance(self.event, ruamel.yaml.events.DocumentStartEvent):\n+ return super(YAMLEmitterNoVersionDirective, self).\\\n+ expect_document_start(first=first)\n+\n+ version = self.event.version\n+ self.event.version = None\n+ ret = super(YAMLEmitterNoVersionDirective, self).\\\n+ expect_document_start(first=first)\n+ self.event.version = version\n+ return ret\n+\n+\n+class YAML(ruamel.yaml.YAML):\n+ \"\"\"Wrapper of the ruamel.yaml.YAML class with our custom settings.\"\"\"\n+\n+ def __init__(self, *args, **kwargs):\n+ super(YAML, self).__init__(*args, **kwargs)\n+ # XXX: Explicitly set version for producing K8s compatible manifests.\n+ # https://yaml.readthedocs.io/en/latest/detail.html#document-version-support\n+ self.version = (1, 1)\n+ # XXX: Do not emit version directive since tools might fail to\n+ # parse manifests.\n+ self.Emitter = YAMLEmitterNoVersionDirective\n+ # Preserve original quotes\n+ self.preserve_quotes = True\n+\n+\n+yaml = YAML()\n+\n+apps = [\n+ {\n+ \"name\": \"Admission Webhook\",\n+ \"kustomization\": \"components/admission-webhook/manifests/base/kustomization.yaml\",\n+ \"images\": [\n+ {\n+ \"name\": \"public.ecr.aws/j1r0q0g6/notebooks/admission-webhook\",\n+ \"newName\": \"public.ecr.aws/j1r0q0g6/notebooks/admission-webhook\",\n+ },\n+ ],\n+ },\n+ {\n+ \"name\": \"Central Dashboard\",\n+ \"kustomization\": \"components/centraldashboard/manifests/base/kustomization.yaml\",\n+ \"images\": [\n+ {\n+ \"name\": \"public.ecr.aws/j1r0q0g6/notebooks/central-dashboard\",\n+ \"newName\": \"public.ecr.aws/j1r0q0g6/notebooks/central-dashboard\",\n+ },\n+ ],\n+ },\n+ {\n+ \"name\": \"Jupyter Web App\",\n+ \"kustomization\": \"components/crud-web-apps/jupyter/manifests/base/kustomization.yaml\",\n+ \"images\": [\n+ {\n+ \"name\": \"public.ecr.aws/j1r0q0g6/notebooks/jupyter-web-app\",\n+ \"newName\": \"public.ecr.aws/j1r0q0g6/notebooks/jupyter-web-app\",\n+ },\n+ ],\n+ },\n+ {\n+ \"name\": \"Tensorboard Web App\",\n+ \"kustomization\": \"components/crud-web-apps/tensorboards/manifests/base/kustomization.yaml\",\n+ \"images\": [\n+ {\n+ \"name\": \"public.ecr.aws/j1r0q0g6/notebooks/tensorboards-web-app\",\n+ \"newName\": \"public.ecr.aws/j1r0q0g6/notebooks/tensorboards-web-app\",\n+ },\n+ ],\n+ },\n+ {\n+ \"name\": \"Volumes Web App\",\n+ \"kustomization\": \"components/crud-web-apps/volumes/manifests/base/kustomization.yaml\",\n+ \"images\": [\n+ {\n+ \"name\": \"public.ecr.aws/j1r0q0g6/notebooks/volumes-web-app\",\n+ \"newName\": \"public.ecr.aws/j1r0q0g6/notebooks/volumes-web-app\",\n+ },\n+ ],\n+ },\n+ {\n+ \"name\": \"Notebook Controller\",\n+ \"kustomization\": \"components/notebook-controller/config/base/kustomization.yaml\",\n+ \"images\": [\n+ {\n+ \"name\": \"public.ecr.aws/j1r0q0g6/notebooks/notebook-controller\",\n+ \"newName\": \"public.ecr.aws/j1r0q0g6/notebooks/notebook-controller\",\n+ },\n+ ],\n+ },\n+ {\n+ \"name\": \"Tensorboard Controller\",\n+ \"kustomization\": \"components/tensorboard-controller/config/base/kustomization.yaml\",\n+ \"images\": [\n+ {\n+ \"name\": \"public.ecr.aws/j1r0q0g6/notebooks/tensorboard-controller\",\n+ \"newName\": \"public.ecr.aws/j1r0q0g6/notebooks/tensorboard-controller\",\n+ },\n+ ],\n+ },\n+ {\n+ \"name\": \"Profile Controller\",\n+ \"kustomization\": \"components/profile-controller/config/base/kustomization.yaml\",\n+ \"images\": [\n+ {\n+ \"name\": \"public.ecr.aws/j1r0q0g6/notebooks/profile-controller\",\n+ \"newName\": \"public.ecr.aws/j1r0q0g6/notebooks/profile-controller\",\n+ },\n+ ],\n+ },\n+ {\n+ \"name\": \"KFAM\",", - "comment_created_at": "2021-03-24T21:25:28+00:00", - "comment_author": "davidspek", - "comment_body": "Small nit, this name stands out compared to the rest. People not familiar with the code might now know what `KFAM` means. Not a blocker for the PR but `Access Management` might be better to use. ", - "pr_file_module": null - }, - { - "comment_id": "600885034", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 5746, - "pr_file": "releasing/update-manifests-images", - "discussion_id": "600884787", - "commented_code": "@@ -0,0 +1,200 @@\n+#!/usr/bin/env python3\n+\n+import sys\n+import logging\n+import argparse\n+import ruamel.yaml\n+\n+\n+log = logging.getLogger(__name__)\n+\n+\n+class YAMLEmitterNoVersionDirective(ruamel.yaml.emitter.Emitter):\n+ \"\"\"YAML Emitter that doesn't emit the YAML version directive.\"\"\"\n+\n+ def write_version_directive(self, version_text):\n+ \"\"\"Disable emitting version directive, i.e., %YAML 1.1.\"\"\"\n+ pass\n+\n+ def expect_document_start(self, first=False):\n+ \"\"\"Do not print '---' at the beginning.\"\"\"\n+ if not isinstance(self.event, ruamel.yaml.events.DocumentStartEvent):\n+ return super(YAMLEmitterNoVersionDirective, self).\\\n+ expect_document_start(first=first)\n+\n+ version = self.event.version\n+ self.event.version = None\n+ ret = super(YAMLEmitterNoVersionDirective, self).\\\n+ expect_document_start(first=first)\n+ self.event.version = version\n+ return ret\n+\n+\n+class YAML(ruamel.yaml.YAML):\n+ \"\"\"Wrapper of the ruamel.yaml.YAML class with our custom settings.\"\"\"\n+\n+ def __init__(self, *args, **kwargs):\n+ super(YAML, self).__init__(*args, **kwargs)\n+ # XXX: Explicitly set version for producing K8s compatible manifests.\n+ # https://yaml.readthedocs.io/en/latest/detail.html#document-version-support\n+ self.version = (1, 1)\n+ # XXX: Do not emit version directive since tools might fail to\n+ # parse manifests.\n+ self.Emitter = YAMLEmitterNoVersionDirective\n+ # Preserve original quotes\n+ self.preserve_quotes = True\n+\n+\n+yaml = YAML()\n+\n+apps = [\n+ {\n+ \"name\": \"Admission Webhook\",\n+ \"kustomization\": \"components/admission-webhook/manifests/base/kustomization.yaml\",\n+ \"images\": [\n+ {\n+ \"name\": \"public.ecr.aws/j1r0q0g6/notebooks/admission-webhook\",\n+ \"newName\": \"public.ecr.aws/j1r0q0g6/notebooks/admission-webhook\",\n+ },\n+ ],\n+ },\n+ {\n+ \"name\": \"Central Dashboard\",\n+ \"kustomization\": \"components/centraldashboard/manifests/base/kustomization.yaml\",\n+ \"images\": [\n+ {\n+ \"name\": \"public.ecr.aws/j1r0q0g6/notebooks/central-dashboard\",\n+ \"newName\": \"public.ecr.aws/j1r0q0g6/notebooks/central-dashboard\",\n+ },\n+ ],\n+ },\n+ {\n+ \"name\": \"Jupyter Web App\",\n+ \"kustomization\": \"components/crud-web-apps/jupyter/manifests/base/kustomization.yaml\",\n+ \"images\": [\n+ {\n+ \"name\": \"public.ecr.aws/j1r0q0g6/notebooks/jupyter-web-app\",\n+ \"newName\": \"public.ecr.aws/j1r0q0g6/notebooks/jupyter-web-app\",\n+ },\n+ ],\n+ },\n+ {\n+ \"name\": \"Tensorboard Web App\",\n+ \"kustomization\": \"components/crud-web-apps/tensorboards/manifests/base/kustomization.yaml\",\n+ \"images\": [\n+ {\n+ \"name\": \"public.ecr.aws/j1r0q0g6/notebooks/tensorboards-web-app\",\n+ \"newName\": \"public.ecr.aws/j1r0q0g6/notebooks/tensorboards-web-app\",\n+ },\n+ ],\n+ },\n+ {\n+ \"name\": \"Volumes Web App\",\n+ \"kustomization\": \"components/crud-web-apps/volumes/manifests/base/kustomization.yaml\",\n+ \"images\": [\n+ {\n+ \"name\": \"public.ecr.aws/j1r0q0g6/notebooks/volumes-web-app\",\n+ \"newName\": \"public.ecr.aws/j1r0q0g6/notebooks/volumes-web-app\",\n+ },\n+ ],\n+ },\n+ {\n+ \"name\": \"Notebook Controller\",\n+ \"kustomization\": \"components/notebook-controller/config/base/kustomization.yaml\",\n+ \"images\": [\n+ {\n+ \"name\": \"public.ecr.aws/j1r0q0g6/notebooks/notebook-controller\",\n+ \"newName\": \"public.ecr.aws/j1r0q0g6/notebooks/notebook-controller\",\n+ },\n+ ],\n+ },\n+ {\n+ \"name\": \"Tensorboard Controller\",\n+ \"kustomization\": \"components/tensorboard-controller/config/base/kustomization.yaml\",\n+ \"images\": [\n+ {\n+ \"name\": \"public.ecr.aws/j1r0q0g6/notebooks/tensorboard-controller\",\n+ \"newName\": \"public.ecr.aws/j1r0q0g6/notebooks/tensorboard-controller\",\n+ },\n+ ],\n+ },\n+ {\n+ \"name\": \"Profile Controller\",\n+ \"kustomization\": \"components/profile-controller/config/base/kustomization.yaml\",\n+ \"images\": [\n+ {\n+ \"name\": \"public.ecr.aws/j1r0q0g6/notebooks/profile-controller\",\n+ \"newName\": \"public.ecr.aws/j1r0q0g6/notebooks/profile-controller\",\n+ },\n+ ],\n+ },\n+ {\n+ \"name\": \"KFAM\",", - "comment_created_at": "2021-03-24T21:25:51+00:00", - "comment_author": "yanniszark", - "comment_body": "Sounds reasonable", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/kubeflow-document-code-thoroughly.json b/_reviewers/kubeflow-document-code-thoroughly.json new file mode 100644 index 0000000..a3a8b37 --- /dev/null +++ b/_reviewers/kubeflow-document-code-thoroughly.json @@ -0,0 +1,90 @@ +[ + { + "discussion_id": "491874831", + "pr_number": 5314, + "pr_file": "components/notebook-controller/controllers/notebook_controller.go", + "created_at": "2020-09-21T08:38:29+00:00", + "commented_code": "} else {\n\t\t// Got the pod\n\t\tpodFound = true\n\t\tif len(pod.Status.ContainerStatuses) > 0 &&\n\t\t\tpod.Status.ContainerStatuses[0].State != instance.Status.ContainerState {\n\t\t\tlog.Info(\"Updating container state: \", \"namespace\", instance.Namespace, \"name\", instance.Name)\n\t\t\tcs := pod.Status.ContainerStatuses[0].State\n\t\t\tinstance.Status.ContainerState = cs\n\t\t\toldConditions := instance.Status.Conditions\n\t\t\tnewCondition := getNextCondition(cs)\n\t\t\t// Append new condition\n\t\t\tif len(oldConditions) == 0 || oldConditions[0].Type != newCondition.Type ||\n\t\t\t\toldConditions[0].Reason != newCondition.Reason ||\n\t\t\t\toldConditions[0].Message != newCondition.Message {\n\t\t\t\tlog.Info(\"Appending to conditions: \", \"namespace\", instance.Namespace, \"name\", instance.Name, \"type\", newCondition.Type, \"reason\", newCondition.Reason, \"message\", newCondition.Message)\n\t\t\t\tinstance.Status.Conditions = append([]v1beta1.NotebookCondition{newCondition}, oldConditions...)\n\n\t\tif len(pod.Status.ContainerStatuses) > 0 {\n\t\t\tnotebookContainerFound := false\n\t\t\tfor i := range pod.Status.ContainerStatuses {", + "repo_full_name": "kubeflow/kubeflow", + "discussion_comments": [ + { + "comment_id": "491874831", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 5314, + "pr_file": "components/notebook-controller/controllers/notebook_controller.go", + "discussion_id": "491874831", + "commented_code": "@@ -206,23 +206,34 @@ func (r *NotebookReconciler) Reconcile(req ctrl.Request) (ctrl.Result, error) {\n \t} else {\n \t\t// Got the pod\n \t\tpodFound = true\n-\t\tif len(pod.Status.ContainerStatuses) > 0 &&\n-\t\t\tpod.Status.ContainerStatuses[0].State != instance.Status.ContainerState {\n-\t\t\tlog.Info(\"Updating container state: \", \"namespace\", instance.Namespace, \"name\", instance.Name)\n-\t\t\tcs := pod.Status.ContainerStatuses[0].State\n-\t\t\tinstance.Status.ContainerState = cs\n-\t\t\toldConditions := instance.Status.Conditions\n-\t\t\tnewCondition := getNextCondition(cs)\n-\t\t\t// Append new condition\n-\t\t\tif len(oldConditions) == 0 || oldConditions[0].Type != newCondition.Type ||\n-\t\t\t\toldConditions[0].Reason != newCondition.Reason ||\n-\t\t\t\toldConditions[0].Message != newCondition.Message {\n-\t\t\t\tlog.Info(\"Appending to conditions: \", \"namespace\", instance.Namespace, \"name\", instance.Name, \"type\", newCondition.Type, \"reason\", newCondition.Reason, \"message\", newCondition.Message)\n-\t\t\t\tinstance.Status.Conditions = append([]v1beta1.NotebookCondition{newCondition}, oldConditions...)\n+\n+\t\tif len(pod.Status.ContainerStatuses) > 0 {\n+\t\t\tnotebookContainerFound := false\n+\t\t\tfor i := range pod.Status.ContainerStatuses {", + "comment_created_at": "2020-09-21T08:38:29+00:00", + "comment_author": "kimwnasptd", + "comment_body": "Could you add a comment here to document that the status of the CR will be updated based on the `ContainerState` of the container that has the same name with the CR?", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "332759147", + "pr_number": 4251, + "pr_file": "components/kf-monitoring/MetricsExporter.go", + "created_at": "2019-10-08T22:22:03+00:00", + "commented_code": "package monitoring_util\n\nimport (\n\t\"fmt\"\n\t\"time\"\n\n\t\"github.com/prometheus/client_golang/prometheus\"\n\t\"github.com/prometheus/client_golang/prometheus/promhttp\"\n\t\"github.com/prometheus/common/log\"\n\t\"net\"\n\t\"net/http\"\n)\n\n// Common label keys for all metrics signals\n// >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n// COMPONENT: name of Component outputing the metrics, eg. \"tf-operator\"\nconst COMPONENT = \"Component\"\n// KIND: each componenet can label their metrics with custom tag \"kind\". Suggest keeping \"kind\" value to be CONSTANT PER METRICS.\nconst KIND = \"kind\"\nconst NAMESPACE = \"namespace\"\n// ACTION: request action type of the metrics, eg. \"CRUD\"\nconst ACTION = \"action\"\n// SEVERITY: level of importance, used to filter alerts\nconst SEVERITY = \"severity\"\n// <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n\n// Alerting metrics severity levels\n// >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\nconst SEVERITY_MINOR = \"Minor\"\nconst SEVERITY_MAJOR = \"Major\"\nconst SEVERITY_CRITICAL = \"Critical\"\n// <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n\n// Util const values\n// >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\nconst METRICSPORT = 8079\nconst METRICSPATH = \"/metrics\"\n// <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n\nvar (\n\t// Counter metrics\n\t// num of requests counter vec\n\trequestCounter = prometheus.NewCounterVec(\n\t\tprometheus.CounterOpts{\n\t\t\tName: \"request_counter\",\n\t\t\tHelp: \"Number of request_counter\",\n\t\t},\n\t\t[]string{COMPONENT, KIND, NAMESPACE, ACTION, SEVERITY},\n\t)\n\t// Counter metrics for failed requests\n\trequestFailureCounter = prometheus.NewCounterVec(prometheus.CounterOpts{\n\t\tName: \"request_failure_counter\",\n\t\tHelp: \"Number of request_failure_counter\",\n\t}, []string{COMPONENT, KIND, NAMESPACE, ACTION, SEVERITY})\n\n\t// Gauge metrics\n\trequestGauge = prometheus.NewGaugeVec(prometheus.GaugeOpts{\n\t\tName: \"requests_gauge\",\n\t\tHelp: \"Number of requests_gauge\",\n\t}, []string{COMPONENT, KIND, NAMESPACE, ACTION, SEVERITY})\n\n\t// Gauge metrics for failed requests\n\trequestFailureGauge = prometheus.NewGaugeVec(prometheus.GaugeOpts{\n\t\tName: \"requests_failure_gauge\",\n\t\tHelp: \"Number of requests_failure_gauge\",\n\t}, []string{COMPONENT, KIND, NAMESPACE, ACTION, SEVERITY})\n\n\t// Linear latencies\n\trequestLinearLatency = prometheus.NewHistogramVec(prometheus.HistogramOpts{\n\t\tName: \"request_linear_latency\",\n\t\tHelp: \"A histogram of request_linear_latency\",\n\t\tBuckets: prometheus.LinearBuckets(1, 1, 15),\n\t}, []string{COMPONENT, KIND, NAMESPACE, ACTION, SEVERITY})\n\n\t// Exponential latencies\n\trequestExponentialLatency = prometheus.NewHistogramVec(prometheus.HistogramOpts{\n\t\tName: \"request_exponential_latency\",\n\t\tHelp: \"A histogram of request_exponential_latency\",\n\t}, []string{COMPONENT, KIND, NAMESPACE, ACTION, SEVERITY})\n\n\tserviceHeartbeat = prometheus.NewCounterVec(prometheus.CounterOpts{\n\t\tName: \"service_heartbeat\",\n\t\tHelp: \"Heartbeat signal every 10 seconds indicating pods are alive.\",\n\t}, []string{COMPONENT, SEVERITY})\n)\n\nfunc init() {\n\t// Register prometheus counters\n\tprometheus.MustRegister(requestCounter)\n\tprometheus.MustRegister(requestFailureCounter)\n\tprometheus.MustRegister(requestGauge)\n\tprometheus.MustRegister(requestFailureGauge)\n\tprometheus.MustRegister(requestLinearLatency)\n\tprometheus.MustRegister(requestExponentialLatency)\n\tprometheus.MustRegister(serviceHeartbeat)\n}\n\ntype MetricsExporter struct {", + "repo_full_name": "kubeflow/kubeflow", + "discussion_comments": [ + { + "comment_id": "332759147", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 4251, + "pr_file": "components/kf-monitoring/MetricsExporter.go", + "discussion_id": "332759147", + "commented_code": "@@ -0,0 +1,161 @@\n+package monitoring_util\n+\n+import (\n+\t\"fmt\"\n+\t\"time\"\n+\n+\t\"github.com/prometheus/client_golang/prometheus\"\n+\t\"github.com/prometheus/client_golang/prometheus/promhttp\"\n+\t\"github.com/prometheus/common/log\"\n+\t\"net\"\n+\t\"net/http\"\n+)\n+\n+// Common label keys for all metrics signals\n+// >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n+// COMPONENT: name of Component outputing the metrics, eg. \"tf-operator\"\n+const COMPONENT = \"Component\"\n+// KIND: each componenet can label their metrics with custom tag \"kind\". Suggest keeping \"kind\" value to be CONSTANT PER METRICS.\n+const KIND = \"kind\"\n+const NAMESPACE = \"namespace\"\n+// ACTION: request action type of the metrics, eg. \"CRUD\"\n+const ACTION = \"action\"\n+// SEVERITY: level of importance, used to filter alerts\n+const SEVERITY = \"severity\"\n+// <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n+\n+// Alerting metrics severity levels\n+// >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n+const SEVERITY_MINOR = \"Minor\"\n+const SEVERITY_MAJOR = \"Major\"\n+const SEVERITY_CRITICAL = \"Critical\"\n+// <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n+\n+// Util const values\n+// >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n+const METRICSPORT = 8079\n+const METRICSPATH = \"/metrics\"\n+// <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n+\n+var (\n+\t// Counter metrics\n+\t// num of requests counter vec\n+\trequestCounter = prometheus.NewCounterVec(\n+\t\tprometheus.CounterOpts{\n+\t\t\tName: \"request_counter\",\n+\t\t\tHelp: \"Number of request_counter\",\n+\t\t},\n+\t\t[]string{COMPONENT, KIND, NAMESPACE, ACTION, SEVERITY},\n+\t)\n+\t// Counter metrics for failed requests\n+\trequestFailureCounter = prometheus.NewCounterVec(prometheus.CounterOpts{\n+\t\tName: \"request_failure_counter\",\n+\t\tHelp: \"Number of request_failure_counter\",\n+\t}, []string{COMPONENT, KIND, NAMESPACE, ACTION, SEVERITY})\n+\n+\t// Gauge metrics\n+\trequestGauge = prometheus.NewGaugeVec(prometheus.GaugeOpts{\n+\t\tName: \"requests_gauge\",\n+\t\tHelp: \"Number of requests_gauge\",\n+\t}, []string{COMPONENT, KIND, NAMESPACE, ACTION, SEVERITY})\n+\n+\t// Gauge metrics for failed requests\n+\trequestFailureGauge = prometheus.NewGaugeVec(prometheus.GaugeOpts{\n+\t\tName: \"requests_failure_gauge\",\n+\t\tHelp: \"Number of requests_failure_gauge\",\n+\t}, []string{COMPONENT, KIND, NAMESPACE, ACTION, SEVERITY})\n+\n+\t// Linear latencies\n+\trequestLinearLatency = prometheus.NewHistogramVec(prometheus.HistogramOpts{\n+\t\tName: \"request_linear_latency\",\n+\t\tHelp: \"A histogram of request_linear_latency\",\n+\t\tBuckets: prometheus.LinearBuckets(1, 1, 15),\n+\t}, []string{COMPONENT, KIND, NAMESPACE, ACTION, SEVERITY})\n+\n+\t// Exponential latencies\n+\trequestExponentialLatency = prometheus.NewHistogramVec(prometheus.HistogramOpts{\n+\t\tName: \"request_exponential_latency\",\n+\t\tHelp: \"A histogram of request_exponential_latency\",\n+\t}, []string{COMPONENT, KIND, NAMESPACE, ACTION, SEVERITY})\n+\n+\tserviceHeartbeat = prometheus.NewCounterVec(prometheus.CounterOpts{\n+\t\tName: \"service_heartbeat\",\n+\t\tHelp: \"Heartbeat signal every 10 seconds indicating pods are alive.\",\n+\t}, []string{COMPONENT, SEVERITY})\n+)\n+\n+func init() {\n+\t// Register prometheus counters\n+\tprometheus.MustRegister(requestCounter)\n+\tprometheus.MustRegister(requestFailureCounter)\n+\tprometheus.MustRegister(requestGauge)\n+\tprometheus.MustRegister(requestFailureGauge)\n+\tprometheus.MustRegister(requestLinearLatency)\n+\tprometheus.MustRegister(requestExponentialLatency)\n+\tprometheus.MustRegister(serviceHeartbeat)\n+}\n+\n+type MetricsExporter struct {", + "comment_created_at": "2019-10-08T22:22:03+00:00", + "comment_author": "zhenghuiwang", + "comment_body": "A public struct (and method) should have a comment like \"// MetricsExporter ...\".\r\nCan you run `go vet ./...`?\r\n\r\n", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "369340848", + "pr_number": 4676, + "pr_file": "components/profile-controller/controllers/moniotring.go", + "created_at": "2020-01-22T02:14:40+00:00", + "commented_code": "package controllers\n\nimport (\n\t\"time\"\n\n\t\"github.com/prometheus/client_golang/prometheus\"\n\tlog \"github.com/sirupsen/logrus\"\n)\n\nconst PROFILE = \"profile_controller\"\nconst COMPONENT = \"component\"\nconst KIND = \"kind\"\n// User that make the request\nconst REQUSER = \"user\"\nconst ACTION = \"action\"\nconst PATH = \"path\"\nconst SEVERITY = \"severity\"\nconst SEVERITY_MINOR = \"minor\"\nconst SEVERITY_MAJOR = \"major\"\nconst SEVERITY_CRITICAL = \"critical\"\nconst MAX_TAG_LEN = 30\n\nvar (\n\t// Counter metrics\n\t// num of requests counter vec\n\trequestCounter = prometheus.NewCounterVec(\n\t\tprometheus.CounterOpts{\n\t\t\tName: \"request_kf\",\n\t\t\tHelp: \"Number of request_counter\",\n\t\t},\n\t\t[]string{COMPONENT, KIND, REQUSER, ACTION, PATH},\n\t)\n\t// Counter metrics for failed requests\n\trequestErrorCounter = prometheus.NewCounterVec(prometheus.CounterOpts{\n\t\tName: \"request_kf_failure\",\n\t\tHelp: \"Number of request_failure_counter\",\n\t}, []string{COMPONENT, KIND, REQUSER, ACTION, PATH, SEVERITY})\n\n\tserviceHeartbeat = prometheus.NewCounterVec(prometheus.CounterOpts{\n\t\tName: \"service_heartbeat\",\n\t\tHelp: \"Heartbeat signal every 10 seconds indicating pods are alive.\",\n\t}, []string{COMPONENT, SEVERITY})\n)\n\nfunc init() {\n\t// Register prometheus counters\n\tprometheus.MustRegister(requestCounter)\n\tprometheus.MustRegister(requestErrorCounter)\n\tprometheus.MustRegister(serviceHeartbeat)\n\t// Count heartbeat\n\tgo func() {\n\t\tlabels := prometheus.Labels{COMPONENT: PROFILE, SEVERITY: SEVERITY_CRITICAL}\n\t\tfor {\n\t\t\ttime.Sleep(10 * time.Second)\n\t\t\tserviceHeartbeat.With(labels).Inc()\n\t\t}\n\t}()\n}\n\nfunc IncRequestCounter(kind string) {", + "repo_full_name": "kubeflow/kubeflow", + "discussion_comments": [ + { + "comment_id": "369340848", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 4676, + "pr_file": "components/profile-controller/controllers/moniotring.go", + "discussion_id": "369340848", + "commented_code": "@@ -0,0 +1,75 @@\n+package controllers\n+\n+import (\n+\t\"time\"\n+\n+\t\"github.com/prometheus/client_golang/prometheus\"\n+\tlog \"github.com/sirupsen/logrus\"\n+)\n+\n+const PROFILE = \"profile_controller\"\n+const COMPONENT = \"component\"\n+const KIND = \"kind\"\n+// User that make the request\n+const REQUSER = \"user\"\n+const ACTION = \"action\"\n+const PATH = \"path\"\n+const SEVERITY = \"severity\"\n+const SEVERITY_MINOR = \"minor\"\n+const SEVERITY_MAJOR = \"major\"\n+const SEVERITY_CRITICAL = \"critical\"\n+const MAX_TAG_LEN = 30\n+\n+var (\n+\t// Counter metrics\n+\t// num of requests counter vec\n+\trequestCounter = prometheus.NewCounterVec(\n+\t\tprometheus.CounterOpts{\n+\t\t\tName: \"request_kf\",\n+\t\t\tHelp: \"Number of request_counter\",\n+\t\t},\n+\t\t[]string{COMPONENT, KIND, REQUSER, ACTION, PATH},\n+\t)\n+\t// Counter metrics for failed requests\n+\trequestErrorCounter = prometheus.NewCounterVec(prometheus.CounterOpts{\n+\t\tName: \"request_kf_failure\",\n+\t\tHelp: \"Number of request_failure_counter\",\n+\t}, []string{COMPONENT, KIND, REQUSER, ACTION, PATH, SEVERITY})\n+\n+\tserviceHeartbeat = prometheus.NewCounterVec(prometheus.CounterOpts{\n+\t\tName: \"service_heartbeat\",\n+\t\tHelp: \"Heartbeat signal every 10 seconds indicating pods are alive.\",\n+\t}, []string{COMPONENT, SEVERITY})\n+)\n+\n+func init() {\n+\t// Register prometheus counters\n+\tprometheus.MustRegister(requestCounter)\n+\tprometheus.MustRegister(requestErrorCounter)\n+\tprometheus.MustRegister(serviceHeartbeat)\n+\t// Count heartbeat\n+\tgo func() {\n+\t\tlabels := prometheus.Labels{COMPONENT: PROFILE, SEVERITY: SEVERITY_CRITICAL}\n+\t\tfor {\n+\t\t\ttime.Sleep(10 * time.Second)\n+\t\t\tserviceHeartbeat.With(labels).Inc()\n+\t\t}\n+\t}()\n+}\n+\n+func IncRequestCounter(kind string) {", + "comment_created_at": "2020-01-22T02:14:40+00:00", + "comment_author": "jlewi", + "comment_body": "Can you add go doc strings please for these public methods?", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "339181399", + "pr_number": 4399, + "pr_file": "bootstrap/cmd/bootstrap/app/router.go", + "created_at": "2019-10-25T18:21:59+00:00", + "commented_code": "// Continue request process in separate thread.\n\tgo c.CreateDeployment(context.Background(), req)\n\treturn &req, nil\n}\n\n// GetLatestKfdef returns latest kfdef status.\nfunc (r *kfctlRouter) GetLatestKfdef(req kfdefs.KfDef) (*kfdefs.KfDef, error) {\n\tname, err := k8sName(req.Name, req.Spec.Project)\n\tif err != nil {\n\t\tlog.Errorf(\"Could not generate the name; error %v\", err)\n\t\treturn nil, err\n\t}\n\taddress := fmt.Sprintf(\"http://%v.%v.svc.cluster.local:80\", name, r.namespace)\n\tlog.Infof(\"Creating client for %v\", address)\n\tc, err := NewKfctlClient(address)\n\treturn c.GetLatestKfdef(req)\n\t// TODO\n\treturn &kfdefs.KfDef{}, nil\n}\n//\n//// GetLatestKfdef returns latest kfdef status.", + "repo_full_name": "kubeflow/kubeflow", + "discussion_comments": [ + { + "comment_id": "339181399", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 4399, + "pr_file": "bootstrap/cmd/bootstrap/app/router.go", + "discussion_id": "339181399", + "commented_code": "@@ -439,18 +468,19 @@ func (r *kfctlRouter) CreateDeployment(ctx context.Context, req kfdefs.KfDef) (*\n \n \t// Continue request process in separate thread.\n \tgo c.CreateDeployment(context.Background(), req)\n-\treturn &req, nil\n-}\n-\n-// GetLatestKfdef returns latest kfdef status.\n-func (r *kfctlRouter) GetLatestKfdef(req kfdefs.KfDef) (*kfdefs.KfDef, error) {\n-\tname, err := k8sName(req.Name, req.Spec.Project)\n-\tif err != nil {\n-\t\tlog.Errorf(\"Could not generate the name; error %v\", err)\n-\t\treturn nil, err\n-\t}\n-\taddress := fmt.Sprintf(\"http://%v.%v.svc.cluster.local:80\", name, r.namespace)\n-\tlog.Infof(\"Creating client for %v\", address)\n-\tc, err := NewKfctlClient(address)\n-\treturn c.GetLatestKfdef(req)\n+\t// TODO\n+\treturn &kfdefs.KfDef{}, nil\n }\n+//\n+//// GetLatestKfdef returns latest kfdef status.", + "comment_created_at": "2019-10-25T18:21:59+00:00", + "comment_author": "jlewi", + "comment_body": "Please add a TODO explaining why this is commented out.\r\n\r\nThe best thing would be to link to an issue.", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/kubeflow-document-code-thoroughly.md b/_reviewers/kubeflow-document-code-thoroughly.md index 07261aa..03bef62 100644 --- a/_reviewers/kubeflow-document-code-thoroughly.md +++ b/_reviewers/kubeflow-document-code-thoroughly.md @@ -47,95 +47,3 @@ Always include appropriate documentation in your code to improve readability, ma ``` Following these practices ensures code is understandable to new team members, facilitates easier maintenance, and helps API consumers use your code correctly. Run tools like `go vet ./...` regularly to catch missing documentation on public elements. - - -[ - { - "discussion_id": "491874831", - "pr_number": 5314, - "pr_file": "components/notebook-controller/controllers/notebook_controller.go", - "created_at": "2020-09-21T08:38:29+00:00", - "commented_code": "} else {\n\t\t// Got the pod\n\t\tpodFound = true\n\t\tif len(pod.Status.ContainerStatuses) > 0 &&\n\t\t\tpod.Status.ContainerStatuses[0].State != instance.Status.ContainerState {\n\t\t\tlog.Info(\"Updating container state: \", \"namespace\", instance.Namespace, \"name\", instance.Name)\n\t\t\tcs := pod.Status.ContainerStatuses[0].State\n\t\t\tinstance.Status.ContainerState = cs\n\t\t\toldConditions := instance.Status.Conditions\n\t\t\tnewCondition := getNextCondition(cs)\n\t\t\t// Append new condition\n\t\t\tif len(oldConditions) == 0 || oldConditions[0].Type != newCondition.Type ||\n\t\t\t\toldConditions[0].Reason != newCondition.Reason ||\n\t\t\t\toldConditions[0].Message != newCondition.Message {\n\t\t\t\tlog.Info(\"Appending to conditions: \", \"namespace\", instance.Namespace, \"name\", instance.Name, \"type\", newCondition.Type, \"reason\", newCondition.Reason, \"message\", newCondition.Message)\n\t\t\t\tinstance.Status.Conditions = append([]v1beta1.NotebookCondition{newCondition}, oldConditions...)\n\n\t\tif len(pod.Status.ContainerStatuses) > 0 {\n\t\t\tnotebookContainerFound := false\n\t\t\tfor i := range pod.Status.ContainerStatuses {", - "repo_full_name": "kubeflow/kubeflow", - "discussion_comments": [ - { - "comment_id": "491874831", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 5314, - "pr_file": "components/notebook-controller/controllers/notebook_controller.go", - "discussion_id": "491874831", - "commented_code": "@@ -206,23 +206,34 @@ func (r *NotebookReconciler) Reconcile(req ctrl.Request) (ctrl.Result, error) {\n \t} else {\n \t\t// Got the pod\n \t\tpodFound = true\n-\t\tif len(pod.Status.ContainerStatuses) > 0 &&\n-\t\t\tpod.Status.ContainerStatuses[0].State != instance.Status.ContainerState {\n-\t\t\tlog.Info(\"Updating container state: \", \"namespace\", instance.Namespace, \"name\", instance.Name)\n-\t\t\tcs := pod.Status.ContainerStatuses[0].State\n-\t\t\tinstance.Status.ContainerState = cs\n-\t\t\toldConditions := instance.Status.Conditions\n-\t\t\tnewCondition := getNextCondition(cs)\n-\t\t\t// Append new condition\n-\t\t\tif len(oldConditions) == 0 || oldConditions[0].Type != newCondition.Type ||\n-\t\t\t\toldConditions[0].Reason != newCondition.Reason ||\n-\t\t\t\toldConditions[0].Message != newCondition.Message {\n-\t\t\t\tlog.Info(\"Appending to conditions: \", \"namespace\", instance.Namespace, \"name\", instance.Name, \"type\", newCondition.Type, \"reason\", newCondition.Reason, \"message\", newCondition.Message)\n-\t\t\t\tinstance.Status.Conditions = append([]v1beta1.NotebookCondition{newCondition}, oldConditions...)\n+\n+\t\tif len(pod.Status.ContainerStatuses) > 0 {\n+\t\t\tnotebookContainerFound := false\n+\t\t\tfor i := range pod.Status.ContainerStatuses {", - "comment_created_at": "2020-09-21T08:38:29+00:00", - "comment_author": "kimwnasptd", - "comment_body": "Could you add a comment here to document that the status of the CR will be updated based on the `ContainerState` of the container that has the same name with the CR?", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "332759147", - "pr_number": 4251, - "pr_file": "components/kf-monitoring/MetricsExporter.go", - "created_at": "2019-10-08T22:22:03+00:00", - "commented_code": "package monitoring_util\n\nimport (\n\t\"fmt\"\n\t\"time\"\n\n\t\"github.com/prometheus/client_golang/prometheus\"\n\t\"github.com/prometheus/client_golang/prometheus/promhttp\"\n\t\"github.com/prometheus/common/log\"\n\t\"net\"\n\t\"net/http\"\n)\n\n// Common label keys for all metrics signals\n// >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n// COMPONENT: name of Component outputing the metrics, eg. \"tf-operator\"\nconst COMPONENT = \"Component\"\n// KIND: each componenet can label their metrics with custom tag \"kind\". Suggest keeping \"kind\" value to be CONSTANT PER METRICS.\nconst KIND = \"kind\"\nconst NAMESPACE = \"namespace\"\n// ACTION: request action type of the metrics, eg. \"CRUD\"\nconst ACTION = \"action\"\n// SEVERITY: level of importance, used to filter alerts\nconst SEVERITY = \"severity\"\n// <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n\n// Alerting metrics severity levels\n// >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\nconst SEVERITY_MINOR = \"Minor\"\nconst SEVERITY_MAJOR = \"Major\"\nconst SEVERITY_CRITICAL = \"Critical\"\n// <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n\n// Util const values\n// >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\nconst METRICSPORT = 8079\nconst METRICSPATH = \"/metrics\"\n// <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n\nvar (\n\t// Counter metrics\n\t// num of requests counter vec\n\trequestCounter = prometheus.NewCounterVec(\n\t\tprometheus.CounterOpts{\n\t\t\tName: \"request_counter\",\n\t\t\tHelp: \"Number of request_counter\",\n\t\t},\n\t\t[]string{COMPONENT, KIND, NAMESPACE, ACTION, SEVERITY},\n\t)\n\t// Counter metrics for failed requests\n\trequestFailureCounter = prometheus.NewCounterVec(prometheus.CounterOpts{\n\t\tName: \"request_failure_counter\",\n\t\tHelp: \"Number of request_failure_counter\",\n\t}, []string{COMPONENT, KIND, NAMESPACE, ACTION, SEVERITY})\n\n\t// Gauge metrics\n\trequestGauge = prometheus.NewGaugeVec(prometheus.GaugeOpts{\n\t\tName: \"requests_gauge\",\n\t\tHelp: \"Number of requests_gauge\",\n\t}, []string{COMPONENT, KIND, NAMESPACE, ACTION, SEVERITY})\n\n\t// Gauge metrics for failed requests\n\trequestFailureGauge = prometheus.NewGaugeVec(prometheus.GaugeOpts{\n\t\tName: \"requests_failure_gauge\",\n\t\tHelp: \"Number of requests_failure_gauge\",\n\t}, []string{COMPONENT, KIND, NAMESPACE, ACTION, SEVERITY})\n\n\t// Linear latencies\n\trequestLinearLatency = prometheus.NewHistogramVec(prometheus.HistogramOpts{\n\t\tName: \"request_linear_latency\",\n\t\tHelp: \"A histogram of request_linear_latency\",\n\t\tBuckets: prometheus.LinearBuckets(1, 1, 15),\n\t}, []string{COMPONENT, KIND, NAMESPACE, ACTION, SEVERITY})\n\n\t// Exponential latencies\n\trequestExponentialLatency = prometheus.NewHistogramVec(prometheus.HistogramOpts{\n\t\tName: \"request_exponential_latency\",\n\t\tHelp: \"A histogram of request_exponential_latency\",\n\t}, []string{COMPONENT, KIND, NAMESPACE, ACTION, SEVERITY})\n\n\tserviceHeartbeat = prometheus.NewCounterVec(prometheus.CounterOpts{\n\t\tName: \"service_heartbeat\",\n\t\tHelp: \"Heartbeat signal every 10 seconds indicating pods are alive.\",\n\t}, []string{COMPONENT, SEVERITY})\n)\n\nfunc init() {\n\t// Register prometheus counters\n\tprometheus.MustRegister(requestCounter)\n\tprometheus.MustRegister(requestFailureCounter)\n\tprometheus.MustRegister(requestGauge)\n\tprometheus.MustRegister(requestFailureGauge)\n\tprometheus.MustRegister(requestLinearLatency)\n\tprometheus.MustRegister(requestExponentialLatency)\n\tprometheus.MustRegister(serviceHeartbeat)\n}\n\ntype MetricsExporter struct {", - "repo_full_name": "kubeflow/kubeflow", - "discussion_comments": [ - { - "comment_id": "332759147", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 4251, - "pr_file": "components/kf-monitoring/MetricsExporter.go", - "discussion_id": "332759147", - "commented_code": "@@ -0,0 +1,161 @@\n+package monitoring_util\n+\n+import (\n+\t\"fmt\"\n+\t\"time\"\n+\n+\t\"github.com/prometheus/client_golang/prometheus\"\n+\t\"github.com/prometheus/client_golang/prometheus/promhttp\"\n+\t\"github.com/prometheus/common/log\"\n+\t\"net\"\n+\t\"net/http\"\n+)\n+\n+// Common label keys for all metrics signals\n+// >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n+// COMPONENT: name of Component outputing the metrics, eg. \"tf-operator\"\n+const COMPONENT = \"Component\"\n+// KIND: each componenet can label their metrics with custom tag \"kind\". Suggest keeping \"kind\" value to be CONSTANT PER METRICS.\n+const KIND = \"kind\"\n+const NAMESPACE = \"namespace\"\n+// ACTION: request action type of the metrics, eg. \"CRUD\"\n+const ACTION = \"action\"\n+// SEVERITY: level of importance, used to filter alerts\n+const SEVERITY = \"severity\"\n+// <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n+\n+// Alerting metrics severity levels\n+// >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n+const SEVERITY_MINOR = \"Minor\"\n+const SEVERITY_MAJOR = \"Major\"\n+const SEVERITY_CRITICAL = \"Critical\"\n+// <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n+\n+// Util const values\n+// >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n+const METRICSPORT = 8079\n+const METRICSPATH = \"/metrics\"\n+// <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n+\n+var (\n+\t// Counter metrics\n+\t// num of requests counter vec\n+\trequestCounter = prometheus.NewCounterVec(\n+\t\tprometheus.CounterOpts{\n+\t\t\tName: \"request_counter\",\n+\t\t\tHelp: \"Number of request_counter\",\n+\t\t},\n+\t\t[]string{COMPONENT, KIND, NAMESPACE, ACTION, SEVERITY},\n+\t)\n+\t// Counter metrics for failed requests\n+\trequestFailureCounter = prometheus.NewCounterVec(prometheus.CounterOpts{\n+\t\tName: \"request_failure_counter\",\n+\t\tHelp: \"Number of request_failure_counter\",\n+\t}, []string{COMPONENT, KIND, NAMESPACE, ACTION, SEVERITY})\n+\n+\t// Gauge metrics\n+\trequestGauge = prometheus.NewGaugeVec(prometheus.GaugeOpts{\n+\t\tName: \"requests_gauge\",\n+\t\tHelp: \"Number of requests_gauge\",\n+\t}, []string{COMPONENT, KIND, NAMESPACE, ACTION, SEVERITY})\n+\n+\t// Gauge metrics for failed requests\n+\trequestFailureGauge = prometheus.NewGaugeVec(prometheus.GaugeOpts{\n+\t\tName: \"requests_failure_gauge\",\n+\t\tHelp: \"Number of requests_failure_gauge\",\n+\t}, []string{COMPONENT, KIND, NAMESPACE, ACTION, SEVERITY})\n+\n+\t// Linear latencies\n+\trequestLinearLatency = prometheus.NewHistogramVec(prometheus.HistogramOpts{\n+\t\tName: \"request_linear_latency\",\n+\t\tHelp: \"A histogram of request_linear_latency\",\n+\t\tBuckets: prometheus.LinearBuckets(1, 1, 15),\n+\t}, []string{COMPONENT, KIND, NAMESPACE, ACTION, SEVERITY})\n+\n+\t// Exponential latencies\n+\trequestExponentialLatency = prometheus.NewHistogramVec(prometheus.HistogramOpts{\n+\t\tName: \"request_exponential_latency\",\n+\t\tHelp: \"A histogram of request_exponential_latency\",\n+\t}, []string{COMPONENT, KIND, NAMESPACE, ACTION, SEVERITY})\n+\n+\tserviceHeartbeat = prometheus.NewCounterVec(prometheus.CounterOpts{\n+\t\tName: \"service_heartbeat\",\n+\t\tHelp: \"Heartbeat signal every 10 seconds indicating pods are alive.\",\n+\t}, []string{COMPONENT, SEVERITY})\n+)\n+\n+func init() {\n+\t// Register prometheus counters\n+\tprometheus.MustRegister(requestCounter)\n+\tprometheus.MustRegister(requestFailureCounter)\n+\tprometheus.MustRegister(requestGauge)\n+\tprometheus.MustRegister(requestFailureGauge)\n+\tprometheus.MustRegister(requestLinearLatency)\n+\tprometheus.MustRegister(requestExponentialLatency)\n+\tprometheus.MustRegister(serviceHeartbeat)\n+}\n+\n+type MetricsExporter struct {", - "comment_created_at": "2019-10-08T22:22:03+00:00", - "comment_author": "zhenghuiwang", - "comment_body": "A public struct (and method) should have a comment like \"// MetricsExporter ...\".\r\nCan you run `go vet ./...`?\r\n\r\n", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "369340848", - "pr_number": 4676, - "pr_file": "components/profile-controller/controllers/moniotring.go", - "created_at": "2020-01-22T02:14:40+00:00", - "commented_code": "package controllers\n\nimport (\n\t\"time\"\n\n\t\"github.com/prometheus/client_golang/prometheus\"\n\tlog \"github.com/sirupsen/logrus\"\n)\n\nconst PROFILE = \"profile_controller\"\nconst COMPONENT = \"component\"\nconst KIND = \"kind\"\n// User that make the request\nconst REQUSER = \"user\"\nconst ACTION = \"action\"\nconst PATH = \"path\"\nconst SEVERITY = \"severity\"\nconst SEVERITY_MINOR = \"minor\"\nconst SEVERITY_MAJOR = \"major\"\nconst SEVERITY_CRITICAL = \"critical\"\nconst MAX_TAG_LEN = 30\n\nvar (\n\t// Counter metrics\n\t// num of requests counter vec\n\trequestCounter = prometheus.NewCounterVec(\n\t\tprometheus.CounterOpts{\n\t\t\tName: \"request_kf\",\n\t\t\tHelp: \"Number of request_counter\",\n\t\t},\n\t\t[]string{COMPONENT, KIND, REQUSER, ACTION, PATH},\n\t)\n\t// Counter metrics for failed requests\n\trequestErrorCounter = prometheus.NewCounterVec(prometheus.CounterOpts{\n\t\tName: \"request_kf_failure\",\n\t\tHelp: \"Number of request_failure_counter\",\n\t}, []string{COMPONENT, KIND, REQUSER, ACTION, PATH, SEVERITY})\n\n\tserviceHeartbeat = prometheus.NewCounterVec(prometheus.CounterOpts{\n\t\tName: \"service_heartbeat\",\n\t\tHelp: \"Heartbeat signal every 10 seconds indicating pods are alive.\",\n\t}, []string{COMPONENT, SEVERITY})\n)\n\nfunc init() {\n\t// Register prometheus counters\n\tprometheus.MustRegister(requestCounter)\n\tprometheus.MustRegister(requestErrorCounter)\n\tprometheus.MustRegister(serviceHeartbeat)\n\t// Count heartbeat\n\tgo func() {\n\t\tlabels := prometheus.Labels{COMPONENT: PROFILE, SEVERITY: SEVERITY_CRITICAL}\n\t\tfor {\n\t\t\ttime.Sleep(10 * time.Second)\n\t\t\tserviceHeartbeat.With(labels).Inc()\n\t\t}\n\t}()\n}\n\nfunc IncRequestCounter(kind string) {", - "repo_full_name": "kubeflow/kubeflow", - "discussion_comments": [ - { - "comment_id": "369340848", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 4676, - "pr_file": "components/profile-controller/controllers/moniotring.go", - "discussion_id": "369340848", - "commented_code": "@@ -0,0 +1,75 @@\n+package controllers\n+\n+import (\n+\t\"time\"\n+\n+\t\"github.com/prometheus/client_golang/prometheus\"\n+\tlog \"github.com/sirupsen/logrus\"\n+)\n+\n+const PROFILE = \"profile_controller\"\n+const COMPONENT = \"component\"\n+const KIND = \"kind\"\n+// User that make the request\n+const REQUSER = \"user\"\n+const ACTION = \"action\"\n+const PATH = \"path\"\n+const SEVERITY = \"severity\"\n+const SEVERITY_MINOR = \"minor\"\n+const SEVERITY_MAJOR = \"major\"\n+const SEVERITY_CRITICAL = \"critical\"\n+const MAX_TAG_LEN = 30\n+\n+var (\n+\t// Counter metrics\n+\t// num of requests counter vec\n+\trequestCounter = prometheus.NewCounterVec(\n+\t\tprometheus.CounterOpts{\n+\t\t\tName: \"request_kf\",\n+\t\t\tHelp: \"Number of request_counter\",\n+\t\t},\n+\t\t[]string{COMPONENT, KIND, REQUSER, ACTION, PATH},\n+\t)\n+\t// Counter metrics for failed requests\n+\trequestErrorCounter = prometheus.NewCounterVec(prometheus.CounterOpts{\n+\t\tName: \"request_kf_failure\",\n+\t\tHelp: \"Number of request_failure_counter\",\n+\t}, []string{COMPONENT, KIND, REQUSER, ACTION, PATH, SEVERITY})\n+\n+\tserviceHeartbeat = prometheus.NewCounterVec(prometheus.CounterOpts{\n+\t\tName: \"service_heartbeat\",\n+\t\tHelp: \"Heartbeat signal every 10 seconds indicating pods are alive.\",\n+\t}, []string{COMPONENT, SEVERITY})\n+)\n+\n+func init() {\n+\t// Register prometheus counters\n+\tprometheus.MustRegister(requestCounter)\n+\tprometheus.MustRegister(requestErrorCounter)\n+\tprometheus.MustRegister(serviceHeartbeat)\n+\t// Count heartbeat\n+\tgo func() {\n+\t\tlabels := prometheus.Labels{COMPONENT: PROFILE, SEVERITY: SEVERITY_CRITICAL}\n+\t\tfor {\n+\t\t\ttime.Sleep(10 * time.Second)\n+\t\t\tserviceHeartbeat.With(labels).Inc()\n+\t\t}\n+\t}()\n+}\n+\n+func IncRequestCounter(kind string) {", - "comment_created_at": "2020-01-22T02:14:40+00:00", - "comment_author": "jlewi", - "comment_body": "Can you add go doc strings please for these public methods?", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "339181399", - "pr_number": 4399, - "pr_file": "bootstrap/cmd/bootstrap/app/router.go", - "created_at": "2019-10-25T18:21:59+00:00", - "commented_code": "// Continue request process in separate thread.\n\tgo c.CreateDeployment(context.Background(), req)\n\treturn &req, nil\n}\n\n// GetLatestKfdef returns latest kfdef status.\nfunc (r *kfctlRouter) GetLatestKfdef(req kfdefs.KfDef) (*kfdefs.KfDef, error) {\n\tname, err := k8sName(req.Name, req.Spec.Project)\n\tif err != nil {\n\t\tlog.Errorf(\"Could not generate the name; error %v\", err)\n\t\treturn nil, err\n\t}\n\taddress := fmt.Sprintf(\"http://%v.%v.svc.cluster.local:80\", name, r.namespace)\n\tlog.Infof(\"Creating client for %v\", address)\n\tc, err := NewKfctlClient(address)\n\treturn c.GetLatestKfdef(req)\n\t// TODO\n\treturn &kfdefs.KfDef{}, nil\n}\n//\n//// GetLatestKfdef returns latest kfdef status.", - "repo_full_name": "kubeflow/kubeflow", - "discussion_comments": [ - { - "comment_id": "339181399", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 4399, - "pr_file": "bootstrap/cmd/bootstrap/app/router.go", - "discussion_id": "339181399", - "commented_code": "@@ -439,18 +468,19 @@ func (r *kfctlRouter) CreateDeployment(ctx context.Context, req kfdefs.KfDef) (*\n \n \t// Continue request process in separate thread.\n \tgo c.CreateDeployment(context.Background(), req)\n-\treturn &req, nil\n-}\n-\n-// GetLatestKfdef returns latest kfdef status.\n-func (r *kfctlRouter) GetLatestKfdef(req kfdefs.KfDef) (*kfdefs.KfDef, error) {\n-\tname, err := k8sName(req.Name, req.Spec.Project)\n-\tif err != nil {\n-\t\tlog.Errorf(\"Could not generate the name; error %v\", err)\n-\t\treturn nil, err\n-\t}\n-\taddress := fmt.Sprintf(\"http://%v.%v.svc.cluster.local:80\", name, r.namespace)\n-\tlog.Infof(\"Creating client for %v\", address)\n-\tc, err := NewKfctlClient(address)\n-\treturn c.GetLatestKfdef(req)\n+\t// TODO\n+\treturn &kfdefs.KfDef{}, nil\n }\n+//\n+//// GetLatestKfdef returns latest kfdef status.", - "comment_created_at": "2019-10-25T18:21:59+00:00", - "comment_author": "jlewi", - "comment_body": "Please add a TODO explaining why this is commented out.\r\n\r\nThe best thing would be to link to an issue.", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/kubeflow-document-migration-paths.json b/_reviewers/kubeflow-document-migration-paths.json new file mode 100644 index 0000000..9cdf63a --- /dev/null +++ b/_reviewers/kubeflow-document-migration-paths.json @@ -0,0 +1,70 @@ +[ + { + "discussion_id": "1194039704", + "pr_number": 7132, + "pr_file": "ROADMAP.md", + "created_at": "2023-05-15T15:55:13+00:00", + "commented_code": "# Kubeflow Roadmap\n\n## Kubeflow 1.8 Release, Planned: May 2023\nThe Kubeflow Community plans to deliver its v1.8 release in Oct 2023 per this timeline(pending link). The high level deliverables are tracked in the [v1.8 Release](https://github.com/orgs/kubeflow/projects/58/) Github project board. The v1.8 release process will be managed by the v1.8 [release team](https://github.com/kubeflow/community/blob/a956b3f6f15c49f928e37eaafec40d7f73ee1d5b/releases/release-team.md) using the best practices in the [Release Handbook](https://github.com/kubeflow/community/blob/master/releases/handbook.md)\n\n### Themes\n* Kubernetes 1.26 and 1.27 support\n* Kubeflow Pipelines v2.0 release\n* Kubeflow security (TBD)\n* Kubeflow upgrade process (TBD)", + "repo_full_name": "kubeflow/kubeflow", + "discussion_comments": [ + { + "comment_id": "1194039704", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 7132, + "pr_file": "ROADMAP.md", + "discussion_id": "1194039704", + "commented_code": "@@ -1,5 +1,38 @@\n # Kubeflow Roadmap\n \n+## Kubeflow 1.8 Release, Planned: May 2023\n+The Kubeflow Community plans to deliver its v1.8 release in Oct 2023 per this timeline(pending link). The high level deliverables are tracked in the [v1.8 Release](https://github.com/orgs/kubeflow/projects/58/) Github project board. The v1.8 release process will be managed by the v1.8 [release team](https://github.com/kubeflow/community/blob/a956b3f6f15c49f928e37eaafec40d7f73ee1d5b/releases/release-team.md) using the best practices in the [Release Handbook](https://github.com/kubeflow/community/blob/master/releases/handbook.md)\n+\n+### Themes\n+* Kubernetes 1.26 and 1.27 support\n+* Kubeflow Pipelines v2.0 release\n+* Kubeflow security (TBD)\n+* Kubeflow upgrade process (TBD)", + "comment_created_at": "2023-05-15T15:55:13+00:00", + "comment_author": "jbottum", + "comment_body": "We have listed upgrades on the roadmap before and have not delivered. I am cautious to list upgrades without qualifier (stretch goal) or some well defined step towards upgrade i.e. list breaking changes, identify to/from dependencies and statements for upgrading a limited part of KF. The documentation for an upgrade could be extensive as there are potentially a lot of choices.", + "pr_file_module": null + }, + { + "comment_id": "1209421928", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 7132, + "pr_file": "ROADMAP.md", + "discussion_id": "1194039704", + "commented_code": "@@ -1,5 +1,38 @@\n # Kubeflow Roadmap\n \n+## Kubeflow 1.8 Release, Planned: May 2023\n+The Kubeflow Community plans to deliver its v1.8 release in Oct 2023 per this timeline(pending link). The high level deliverables are tracked in the [v1.8 Release](https://github.com/orgs/kubeflow/projects/58/) Github project board. The v1.8 release process will be managed by the v1.8 [release team](https://github.com/kubeflow/community/blob/a956b3f6f15c49f928e37eaafec40d7f73ee1d5b/releases/release-team.md) using the best practices in the [Release Handbook](https://github.com/kubeflow/community/blob/master/releases/handbook.md)\n+\n+### Themes\n+* Kubernetes 1.26 and 1.27 support\n+* Kubeflow Pipelines v2.0 release\n+* Kubeflow security (TBD)\n+* Kubeflow upgrade process (TBD)", + "comment_created_at": "2023-05-29T15:36:50+00:00", + "comment_author": "DnPlas", + "comment_body": "Agree, after some discussion with the release team, I think I can narrow this down. I'll update this.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "334699694", + "pr_number": 4298, + "pr_file": "bootstrap/cmd/kfdefConverter/README.md", + "created_at": "2019-10-14T23:28:37+00:00", + "commented_code": "# KfDef version converter\n\n## Overview\n\nThis is a simple helper CLI that converts KfDef between versions.\n\n## Usage\n\n```\nA simple CLI to convert KfDef from v1alpha1 to v1beta1\n\nUsage:\n kfdef-converter [command]\n\nAvailable Commands:\n help Help about any command\n tov1beta1 Convert a KfDef config in v1alpha1 into v1beta1 format.", + "repo_full_name": "kubeflow/kubeflow", + "discussion_comments": [ + { + "comment_id": "334699694", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 4298, + "pr_file": "bootstrap/cmd/kfdefConverter/README.md", + "discussion_id": "334699694", + "commented_code": "@@ -0,0 +1,23 @@\n+# KfDef version converter\n+\n+## Overview\n+\n+This is a simple helper CLI that converts KfDef between versions.\n+\n+## Usage\n+\n+```\n+A simple CLI to convert KfDef from v1alpha1 to v1beta1\n+\n+Usage:\n+ kfdef-converter [command]\n+\n+Available Commands:\n+ help Help about any command\n+ tov1beta1 Convert a KfDef config in v1alpha1 into v1beta1 format.\n+", + "comment_created_at": "2019-10-14T23:28:37+00:00", + "comment_author": "lluunn", + "comment_body": "probably add an example usage of kfdef-converter tov1beta1", + "pr_file_module": null + }, + { + "comment_id": "334743542", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 4298, + "pr_file": "bootstrap/cmd/kfdefConverter/README.md", + "discussion_id": "334699694", + "commented_code": "@@ -0,0 +1,23 @@\n+# KfDef version converter\n+\n+## Overview\n+\n+This is a simple helper CLI that converts KfDef between versions.\n+\n+## Usage\n+\n+```\n+A simple CLI to convert KfDef from v1alpha1 to v1beta1\n+\n+Usage:\n+ kfdef-converter [command]\n+\n+Available Commands:\n+ help Help about any command\n+ tov1beta1 Convert a KfDef config in v1alpha1 into v1beta1 format.\n+", + "comment_created_at": "2019-10-15T03:59:01+00:00", + "comment_author": "gabrielwen", + "comment_body": "Added.", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/kubeflow-document-migration-paths.md b/_reviewers/kubeflow-document-migration-paths.md index a4938b6..77183a5 100644 --- a/_reviewers/kubeflow-document-migration-paths.md +++ b/_reviewers/kubeflow-document-migration-paths.md @@ -40,75 +40,3 @@ kfdef-converter tov1beta1 --input=/path/to/old-config.yaml --output=/path/to/new ``` When planning roadmaps that include migrations, be specific about upgrade capabilities rather than making general promises. If complete migration support isn't feasible, consider defining limited scope migrations or marking them as stretch goals. - - -[ - { - "discussion_id": "1194039704", - "pr_number": 7132, - "pr_file": "ROADMAP.md", - "created_at": "2023-05-15T15:55:13+00:00", - "commented_code": "# Kubeflow Roadmap\n\n## Kubeflow 1.8 Release, Planned: May 2023\nThe Kubeflow Community plans to deliver its v1.8 release in Oct 2023 per this timeline(pending link). The high level deliverables are tracked in the [v1.8 Release](https://github.com/orgs/kubeflow/projects/58/) Github project board. The v1.8 release process will be managed by the v1.8 [release team](https://github.com/kubeflow/community/blob/a956b3f6f15c49f928e37eaafec40d7f73ee1d5b/releases/release-team.md) using the best practices in the [Release Handbook](https://github.com/kubeflow/community/blob/master/releases/handbook.md)\n\n### Themes\n* Kubernetes 1.26 and 1.27 support\n* Kubeflow Pipelines v2.0 release\n* Kubeflow security (TBD)\n* Kubeflow upgrade process (TBD)", - "repo_full_name": "kubeflow/kubeflow", - "discussion_comments": [ - { - "comment_id": "1194039704", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 7132, - "pr_file": "ROADMAP.md", - "discussion_id": "1194039704", - "commented_code": "@@ -1,5 +1,38 @@\n # Kubeflow Roadmap\n \n+## Kubeflow 1.8 Release, Planned: May 2023\n+The Kubeflow Community plans to deliver its v1.8 release in Oct 2023 per this timeline(pending link). The high level deliverables are tracked in the [v1.8 Release](https://github.com/orgs/kubeflow/projects/58/) Github project board. The v1.8 release process will be managed by the v1.8 [release team](https://github.com/kubeflow/community/blob/a956b3f6f15c49f928e37eaafec40d7f73ee1d5b/releases/release-team.md) using the best practices in the [Release Handbook](https://github.com/kubeflow/community/blob/master/releases/handbook.md)\n+\n+### Themes\n+* Kubernetes 1.26 and 1.27 support\n+* Kubeflow Pipelines v2.0 release\n+* Kubeflow security (TBD)\n+* Kubeflow upgrade process (TBD)", - "comment_created_at": "2023-05-15T15:55:13+00:00", - "comment_author": "jbottum", - "comment_body": "We have listed upgrades on the roadmap before and have not delivered. I am cautious to list upgrades without qualifier (stretch goal) or some well defined step towards upgrade i.e. list breaking changes, identify to/from dependencies and statements for upgrading a limited part of KF. The documentation for an upgrade could be extensive as there are potentially a lot of choices.", - "pr_file_module": null - }, - { - "comment_id": "1209421928", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 7132, - "pr_file": "ROADMAP.md", - "discussion_id": "1194039704", - "commented_code": "@@ -1,5 +1,38 @@\n # Kubeflow Roadmap\n \n+## Kubeflow 1.8 Release, Planned: May 2023\n+The Kubeflow Community plans to deliver its v1.8 release in Oct 2023 per this timeline(pending link). The high level deliverables are tracked in the [v1.8 Release](https://github.com/orgs/kubeflow/projects/58/) Github project board. The v1.8 release process will be managed by the v1.8 [release team](https://github.com/kubeflow/community/blob/a956b3f6f15c49f928e37eaafec40d7f73ee1d5b/releases/release-team.md) using the best practices in the [Release Handbook](https://github.com/kubeflow/community/blob/master/releases/handbook.md)\n+\n+### Themes\n+* Kubernetes 1.26 and 1.27 support\n+* Kubeflow Pipelines v2.0 release\n+* Kubeflow security (TBD)\n+* Kubeflow upgrade process (TBD)", - "comment_created_at": "2023-05-29T15:36:50+00:00", - "comment_author": "DnPlas", - "comment_body": "Agree, after some discussion with the release team, I think I can narrow this down. I'll update this.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "334699694", - "pr_number": 4298, - "pr_file": "bootstrap/cmd/kfdefConverter/README.md", - "created_at": "2019-10-14T23:28:37+00:00", - "commented_code": "# KfDef version converter\n\n## Overview\n\nThis is a simple helper CLI that converts KfDef between versions.\n\n## Usage\n\n```\nA simple CLI to convert KfDef from v1alpha1 to v1beta1\n\nUsage:\n kfdef-converter [command]\n\nAvailable Commands:\n help Help about any command\n tov1beta1 Convert a KfDef config in v1alpha1 into v1beta1 format.", - "repo_full_name": "kubeflow/kubeflow", - "discussion_comments": [ - { - "comment_id": "334699694", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 4298, - "pr_file": "bootstrap/cmd/kfdefConverter/README.md", - "discussion_id": "334699694", - "commented_code": "@@ -0,0 +1,23 @@\n+# KfDef version converter\n+\n+## Overview\n+\n+This is a simple helper CLI that converts KfDef between versions.\n+\n+## Usage\n+\n+```\n+A simple CLI to convert KfDef from v1alpha1 to v1beta1\n+\n+Usage:\n+ kfdef-converter [command]\n+\n+Available Commands:\n+ help Help about any command\n+ tov1beta1 Convert a KfDef config in v1alpha1 into v1beta1 format.\n+", - "comment_created_at": "2019-10-14T23:28:37+00:00", - "comment_author": "lluunn", - "comment_body": "probably add an example usage of kfdef-converter tov1beta1", - "pr_file_module": null - }, - { - "comment_id": "334743542", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 4298, - "pr_file": "bootstrap/cmd/kfdefConverter/README.md", - "discussion_id": "334699694", - "commented_code": "@@ -0,0 +1,23 @@\n+# KfDef version converter\n+\n+## Overview\n+\n+This is a simple helper CLI that converts KfDef between versions.\n+\n+## Usage\n+\n+```\n+A simple CLI to convert KfDef from v1alpha1 to v1beta1\n+\n+Usage:\n+ kfdef-converter [command]\n+\n+Available Commands:\n+ help Help about any command\n+ tov1beta1 Convert a KfDef config in v1alpha1 into v1beta1 format.\n+", - "comment_created_at": "2019-10-15T03:59:01+00:00", - "comment_author": "gabrielwen", - "comment_body": "Added.", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/kubeflow-document-networking-annotations.json b/_reviewers/kubeflow-document-networking-annotations.json new file mode 100644 index 0000000..c90b3e2 --- /dev/null +++ b/_reviewers/kubeflow-document-networking-annotations.json @@ -0,0 +1,94 @@ +[ + { + "discussion_id": "612290343", + "pr_number": 5823, + "pr_file": "components/crud-web-apps/jupyter/backend/apps/common/yaml/spawner_ui_config.yaml", + "created_at": "2021-04-13T09:36:07+00:00", + "commented_code": "- public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/jupyter-pytorch-cuda-full:master-1831e436\n - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/jupyter-tensorflow-full:master-1831e436\n - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/jupyter-tensorflow-cuda-full:master-1831e436\n imageVSCode:\n # The container Image for the user's VS-Code Server\n imageGroupOne:\n # The container Image for the user's Group One Server", + "repo_full_name": "kubeflow/kubeflow", + "discussion_comments": [ + { + "comment_id": "612290343", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 5823, + "pr_file": "components/crud-web-apps/jupyter/backend/apps/common/yaml/spawner_ui_config.yaml", + "discussion_id": "612290343", + "commented_code": "@@ -25,14 +25,14 @@ spawnerFormDefaults:\n - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/jupyter-pytorch-cuda-full:master-1831e436\n - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/jupyter-tensorflow-full:master-1831e436\n - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/jupyter-tensorflow-cuda-full:master-1831e436\n- imageVSCode:\n- # The container Image for the user's VS-Code Server\n+ imageGroupOne:\n+ # The container Image for the user's Group One Server", + "comment_created_at": "2021-04-13T09:36:07+00:00", + "comment_author": "kimwnasptd", + "comment_body": "Can you add a comment here that for images in this group the backend will be:\r\n1. Adding the `notebooks.kubeflow.org/http-rewrite-uri` annotation to the CR, to rewrite the path that ends up in the running container to be `/`\r\n\r\nThis will help exposing to the users the constraints and logic that will be applied to images that belong to that group", + "pr_file_module": null + }, + { + "comment_id": "612319265", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 5823, + "pr_file": "components/crud-web-apps/jupyter/backend/apps/common/yaml/spawner_ui_config.yaml", + "discussion_id": "612290343", + "commented_code": "@@ -25,14 +25,14 @@ spawnerFormDefaults:\n - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/jupyter-pytorch-cuda-full:master-1831e436\n - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/jupyter-tensorflow-full:master-1831e436\n - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/jupyter-tensorflow-cuda-full:master-1831e436\n- imageVSCode:\n- # The container Image for the user's VS-Code Server\n+ imageGroupOne:\n+ # The container Image for the user's Group One Server", + "comment_created_at": "2021-04-13T10:19:40+00:00", + "comment_author": "davidspek", + "comment_body": "How about:\r\n```yaml\r\n# The annotation `notebooks.kubeflow.org/http-rewrite-uri: /`\r\n# is applied to notebook in this group, configuring\r\n# the Istio rewrite for containers that host their web UI at `/`\r\n```", + "pr_file_module": null + }, + { + "comment_id": "612323552", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 5823, + "pr_file": "components/crud-web-apps/jupyter/backend/apps/common/yaml/spawner_ui_config.yaml", + "discussion_id": "612290343", + "commented_code": "@@ -25,14 +25,14 @@ spawnerFormDefaults:\n - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/jupyter-pytorch-cuda-full:master-1831e436\n - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/jupyter-tensorflow-full:master-1831e436\n - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/jupyter-tensorflow-cuda-full:master-1831e436\n- imageVSCode:\n- # The container Image for the user's VS-Code Server\n+ imageGroupOne:\n+ # The container Image for the user's Group One Server", + "comment_created_at": "2021-04-13T10:26:48+00:00", + "comment_author": "kimwnasptd", + "comment_body": "LGTM", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "612290516", + "pr_number": 5823, + "pr_file": "components/crud-web-apps/jupyter/backend/apps/common/yaml/spawner_ui_config.yaml", + "created_at": "2021-04-13T09:36:24+00:00", + "commented_code": "- public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/jupyter-pytorch-cuda-full:master-1831e436\n - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/jupyter-tensorflow-full:master-1831e436\n - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/jupyter-tensorflow-cuda-full:master-1831e436\n imageVSCode:\n # The container Image for the user's VS-Code Server\n imageGroupOne:\n # The container Image for the user's Group One Server\n value: public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/codeserver-python:master-1831e436\n # The list of available standard container Images\n options:\n - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/codeserver-python:master-1831e436\n imageRStudio:\n # The container Image for the user's RStudio Server\n imageGroupTwo:\n # The container Image for the user's Group Two Server", + "repo_full_name": "kubeflow/kubeflow", + "discussion_comments": [ + { + "comment_id": "612290516", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 5823, + "pr_file": "components/crud-web-apps/jupyter/backend/apps/common/yaml/spawner_ui_config.yaml", + "discussion_id": "612290516", + "commented_code": "@@ -25,14 +25,14 @@ spawnerFormDefaults:\n - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/jupyter-pytorch-cuda-full:master-1831e436\n - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/jupyter-tensorflow-full:master-1831e436\n - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/jupyter-tensorflow-cuda-full:master-1831e436\n- imageVSCode:\n- # The container Image for the user's VS-Code Server\n+ imageGroupOne:\n+ # The container Image for the user's Group One Server\n value: public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/codeserver-python:master-1831e436\n # The list of available standard container Images\n options:\n - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/codeserver-python:master-1831e436\n- imageRStudio:\n- # The container Image for the user's RStudio Server\n+ imageGroupTwo:\n+ # The container Image for the user's Group Two Server", + "comment_created_at": "2021-04-13T09:36:24+00:00", + "comment_author": "kimwnasptd", + "comment_body": "Can you add a comment here that for images in this group the backend will be:\r\n1. Adding the `notebooks.kubeflow.org/http-headers-request-set` annotation to the CR, for setting the `X-RStudio-Root-Path: ${ISTIO_PREFIX}` header to each request to the running container\r\n2. Adding the `notebooks.kubeflow.org/http-rewrite-uri` annotation to the CR, to rewrite the path that ends up in the running container to be `/`\r\n\r\nThis will help exposing to the users the constraints and logic that will be applied to images that belong to that group", + "pr_file_module": null + }, + { + "comment_id": "612319636", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 5823, + "pr_file": "components/crud-web-apps/jupyter/backend/apps/common/yaml/spawner_ui_config.yaml", + "discussion_id": "612290516", + "commented_code": "@@ -25,14 +25,14 @@ spawnerFormDefaults:\n - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/jupyter-pytorch-cuda-full:master-1831e436\n - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/jupyter-tensorflow-full:master-1831e436\n - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/jupyter-tensorflow-cuda-full:master-1831e436\n- imageVSCode:\n- # The container Image for the user's VS-Code Server\n+ imageGroupOne:\n+ # The container Image for the user's Group One Server\n value: public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/codeserver-python:master-1831e436\n # The list of available standard container Images\n options:\n - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/codeserver-python:master-1831e436\n- imageRStudio:\n- # The container Image for the user's RStudio Server\n+ imageGroupTwo:\n+ # The container Image for the user's Group Two Server", + "comment_created_at": "2021-04-13T10:20:15+00:00", + "comment_author": "davidspek", + "comment_body": "How about:\r\n```yaml\r\n# The annotation `notebooks.kubeflow.org/http-rewrite-uri: /`\r\n# is applied to notebook in this group, configuring\r\n# the Istio rewrite for containers that host their web UI at `/`\r\n# The annotation `notebooks.kubeflow.org/http-headers-request-set`\r\n# is applied to notebook in this group, configuring Istio\r\n# to add the `X-RStudio-Root-Path` header to requests\r\n```", + "pr_file_module": null + }, + { + "comment_id": "612323612", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 5823, + "pr_file": "components/crud-web-apps/jupyter/backend/apps/common/yaml/spawner_ui_config.yaml", + "discussion_id": "612290516", + "commented_code": "@@ -25,14 +25,14 @@ spawnerFormDefaults:\n - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/jupyter-pytorch-cuda-full:master-1831e436\n - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/jupyter-tensorflow-full:master-1831e436\n - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/jupyter-tensorflow-cuda-full:master-1831e436\n- imageVSCode:\n- # The container Image for the user's VS-Code Server\n+ imageGroupOne:\n+ # The container Image for the user's Group One Server\n value: public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/codeserver-python:master-1831e436\n # The list of available standard container Images\n options:\n - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/codeserver-python:master-1831e436\n- imageRStudio:\n- # The container Image for the user's RStudio Server\n+ imageGroupTwo:\n+ # The container Image for the user's Group Two Server", + "comment_created_at": "2021-04-13T10:26:53+00:00", + "comment_author": "kimwnasptd", + "comment_body": "LGTM", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/kubeflow-document-networking-annotations.md b/_reviewers/kubeflow-document-networking-annotations.md index 9256c2d..107e83e 100644 --- a/_reviewers/kubeflow-document-networking-annotations.md +++ b/_reviewers/kubeflow-document-networking-annotations.md @@ -37,99 +37,3 @@ imageGroupTwo: ``` This practice ensures that network routing behaviors are explicit, making the system more maintainable and reducing confusion when debugging networking issues. - - -[ - { - "discussion_id": "612290343", - "pr_number": 5823, - "pr_file": "components/crud-web-apps/jupyter/backend/apps/common/yaml/spawner_ui_config.yaml", - "created_at": "2021-04-13T09:36:07+00:00", - "commented_code": "- public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/jupyter-pytorch-cuda-full:master-1831e436\n - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/jupyter-tensorflow-full:master-1831e436\n - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/jupyter-tensorflow-cuda-full:master-1831e436\n imageVSCode:\n # The container Image for the user's VS-Code Server\n imageGroupOne:\n # The container Image for the user's Group One Server", - "repo_full_name": "kubeflow/kubeflow", - "discussion_comments": [ - { - "comment_id": "612290343", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 5823, - "pr_file": "components/crud-web-apps/jupyter/backend/apps/common/yaml/spawner_ui_config.yaml", - "discussion_id": "612290343", - "commented_code": "@@ -25,14 +25,14 @@ spawnerFormDefaults:\n - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/jupyter-pytorch-cuda-full:master-1831e436\n - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/jupyter-tensorflow-full:master-1831e436\n - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/jupyter-tensorflow-cuda-full:master-1831e436\n- imageVSCode:\n- # The container Image for the user's VS-Code Server\n+ imageGroupOne:\n+ # The container Image for the user's Group One Server", - "comment_created_at": "2021-04-13T09:36:07+00:00", - "comment_author": "kimwnasptd", - "comment_body": "Can you add a comment here that for images in this group the backend will be:\r\n1. Adding the `notebooks.kubeflow.org/http-rewrite-uri` annotation to the CR, to rewrite the path that ends up in the running container to be `/`\r\n\r\nThis will help exposing to the users the constraints and logic that will be applied to images that belong to that group", - "pr_file_module": null - }, - { - "comment_id": "612319265", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 5823, - "pr_file": "components/crud-web-apps/jupyter/backend/apps/common/yaml/spawner_ui_config.yaml", - "discussion_id": "612290343", - "commented_code": "@@ -25,14 +25,14 @@ spawnerFormDefaults:\n - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/jupyter-pytorch-cuda-full:master-1831e436\n - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/jupyter-tensorflow-full:master-1831e436\n - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/jupyter-tensorflow-cuda-full:master-1831e436\n- imageVSCode:\n- # The container Image for the user's VS-Code Server\n+ imageGroupOne:\n+ # The container Image for the user's Group One Server", - "comment_created_at": "2021-04-13T10:19:40+00:00", - "comment_author": "davidspek", - "comment_body": "How about:\r\n```yaml\r\n# The annotation `notebooks.kubeflow.org/http-rewrite-uri: /`\r\n# is applied to notebook in this group, configuring\r\n# the Istio rewrite for containers that host their web UI at `/`\r\n```", - "pr_file_module": null - }, - { - "comment_id": "612323552", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 5823, - "pr_file": "components/crud-web-apps/jupyter/backend/apps/common/yaml/spawner_ui_config.yaml", - "discussion_id": "612290343", - "commented_code": "@@ -25,14 +25,14 @@ spawnerFormDefaults:\n - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/jupyter-pytorch-cuda-full:master-1831e436\n - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/jupyter-tensorflow-full:master-1831e436\n - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/jupyter-tensorflow-cuda-full:master-1831e436\n- imageVSCode:\n- # The container Image for the user's VS-Code Server\n+ imageGroupOne:\n+ # The container Image for the user's Group One Server", - "comment_created_at": "2021-04-13T10:26:48+00:00", - "comment_author": "kimwnasptd", - "comment_body": "LGTM", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "612290516", - "pr_number": 5823, - "pr_file": "components/crud-web-apps/jupyter/backend/apps/common/yaml/spawner_ui_config.yaml", - "created_at": "2021-04-13T09:36:24+00:00", - "commented_code": "- public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/jupyter-pytorch-cuda-full:master-1831e436\n - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/jupyter-tensorflow-full:master-1831e436\n - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/jupyter-tensorflow-cuda-full:master-1831e436\n imageVSCode:\n # The container Image for the user's VS-Code Server\n imageGroupOne:\n # The container Image for the user's Group One Server\n value: public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/codeserver-python:master-1831e436\n # The list of available standard container Images\n options:\n - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/codeserver-python:master-1831e436\n imageRStudio:\n # The container Image for the user's RStudio Server\n imageGroupTwo:\n # The container Image for the user's Group Two Server", - "repo_full_name": "kubeflow/kubeflow", - "discussion_comments": [ - { - "comment_id": "612290516", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 5823, - "pr_file": "components/crud-web-apps/jupyter/backend/apps/common/yaml/spawner_ui_config.yaml", - "discussion_id": "612290516", - "commented_code": "@@ -25,14 +25,14 @@ spawnerFormDefaults:\n - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/jupyter-pytorch-cuda-full:master-1831e436\n - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/jupyter-tensorflow-full:master-1831e436\n - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/jupyter-tensorflow-cuda-full:master-1831e436\n- imageVSCode:\n- # The container Image for the user's VS-Code Server\n+ imageGroupOne:\n+ # The container Image for the user's Group One Server\n value: public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/codeserver-python:master-1831e436\n # The list of available standard container Images\n options:\n - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/codeserver-python:master-1831e436\n- imageRStudio:\n- # The container Image for the user's RStudio Server\n+ imageGroupTwo:\n+ # The container Image for the user's Group Two Server", - "comment_created_at": "2021-04-13T09:36:24+00:00", - "comment_author": "kimwnasptd", - "comment_body": "Can you add a comment here that for images in this group the backend will be:\r\n1. Adding the `notebooks.kubeflow.org/http-headers-request-set` annotation to the CR, for setting the `X-RStudio-Root-Path: ${ISTIO_PREFIX}` header to each request to the running container\r\n2. Adding the `notebooks.kubeflow.org/http-rewrite-uri` annotation to the CR, to rewrite the path that ends up in the running container to be `/`\r\n\r\nThis will help exposing to the users the constraints and logic that will be applied to images that belong to that group", - "pr_file_module": null - }, - { - "comment_id": "612319636", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 5823, - "pr_file": "components/crud-web-apps/jupyter/backend/apps/common/yaml/spawner_ui_config.yaml", - "discussion_id": "612290516", - "commented_code": "@@ -25,14 +25,14 @@ spawnerFormDefaults:\n - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/jupyter-pytorch-cuda-full:master-1831e436\n - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/jupyter-tensorflow-full:master-1831e436\n - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/jupyter-tensorflow-cuda-full:master-1831e436\n- imageVSCode:\n- # The container Image for the user's VS-Code Server\n+ imageGroupOne:\n+ # The container Image for the user's Group One Server\n value: public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/codeserver-python:master-1831e436\n # The list of available standard container Images\n options:\n - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/codeserver-python:master-1831e436\n- imageRStudio:\n- # The container Image for the user's RStudio Server\n+ imageGroupTwo:\n+ # The container Image for the user's Group Two Server", - "comment_created_at": "2021-04-13T10:20:15+00:00", - "comment_author": "davidspek", - "comment_body": "How about:\r\n```yaml\r\n# The annotation `notebooks.kubeflow.org/http-rewrite-uri: /`\r\n# is applied to notebook in this group, configuring\r\n# the Istio rewrite for containers that host their web UI at `/`\r\n# The annotation `notebooks.kubeflow.org/http-headers-request-set`\r\n# is applied to notebook in this group, configuring Istio\r\n# to add the `X-RStudio-Root-Path` header to requests\r\n```", - "pr_file_module": null - }, - { - "comment_id": "612323612", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 5823, - "pr_file": "components/crud-web-apps/jupyter/backend/apps/common/yaml/spawner_ui_config.yaml", - "discussion_id": "612290516", - "commented_code": "@@ -25,14 +25,14 @@ spawnerFormDefaults:\n - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/jupyter-pytorch-cuda-full:master-1831e436\n - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/jupyter-tensorflow-full:master-1831e436\n - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/jupyter-tensorflow-cuda-full:master-1831e436\n- imageVSCode:\n- # The container Image for the user's VS-Code Server\n+ imageGroupOne:\n+ # The container Image for the user's Group One Server\n value: public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/codeserver-python:master-1831e436\n # The list of available standard container Images\n options:\n - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/codeserver-python:master-1831e436\n- imageRStudio:\n- # The container Image for the user's RStudio Server\n+ imageGroupTwo:\n+ # The container Image for the user's Group Two Server", - "comment_created_at": "2021-04-13T10:26:53+00:00", - "comment_author": "kimwnasptd", - "comment_body": "LGTM", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/kubeflow-document-with-precision.json b/_reviewers/kubeflow-document-with-precision.json new file mode 100644 index 0000000..2c64443 --- /dev/null +++ b/_reviewers/kubeflow-document-with-precision.json @@ -0,0 +1,68 @@ +[ + { + "discussion_id": "1720450533", + "pr_number": 7635, + "pr_file": "components/example-notebook-servers/jupyter-pytorch-gaudi/Dockerfile", + "created_at": "2024-08-16T23:15:07+00:00", + "commented_code": "#\n# NOTE: Use the Makefiles to build this image correctly.\n#\n\nARG BASE_IMG=\nFROM $BASE_IMG\n\n# Content below is extracted from scripts/Dockerfiles here:\n# https://github.com/HabanaAI/Setup_and_Install/tree/main/dockerfiles/base\n# https://github.com/HabanaAI/Setup_and_Install/tree/main/dockerfiles/pytorch", + "repo_full_name": "kubeflow/kubeflow", + "discussion_comments": [ + { + "comment_id": "1720450533", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 7635, + "pr_file": "components/example-notebook-servers/jupyter-pytorch-gaudi/Dockerfile", + "discussion_id": "1720450533", + "commented_code": "@@ -0,0 +1,143 @@\n+#\n+# NOTE: Use the Makefiles to build this image correctly.\n+#\n+\n+ARG BASE_IMG=\n+FROM $BASE_IMG\n+\n+# Content below is extracted from scripts/Dockerfiles here:\n+# https://github.com/HabanaAI/Setup_and_Install/tree/main/dockerfiles/base\n+# https://github.com/HabanaAI/Setup_and_Install/tree/main/dockerfiles/pytorch", + "comment_created_at": "2024-08-16T23:15:07+00:00", + "comment_author": "thesuperzapper", + "comment_body": "Can we update these links to be to a specific version, and file, e.g.:\r\n\r\n- https://github.com/HabanaAI/Setup_and_Install/blob/1.17.0/dockerfiles/base/Dockerfile.ubuntu22.04\r\n- https://github.com/HabanaAI/Setup_and_Install/blob/1.17.0/dockerfiles/pytorch/Dockerfile.ubuntu\r\n\r\nPlease also say \"based on\" because the content is not the same.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1720480710", + "pr_number": 7635, + "pr_file": "components/example-notebook-servers/jupyter-pytorch-gaudi/Dockerfile", + "created_at": "2024-08-17T00:02:29+00:00", + "commented_code": "#\n# NOTE: Use the Makefiles to build this image correctly.\n#\n\nARG BASE_IMG=\nFROM $BASE_IMG\n\n# Content below is extracted from scripts/Dockerfiles here:\n# https://github.com/HabanaAI/Setup_and_Install/tree/main/dockerfiles/base\n# https://github.com/HabanaAI/Setup_and_Install/tree/main/dockerfiles/pytorch\n\n# version details\nARG ARTIFACTORY_URL=vault.habana.ai\nARG VERSION=1.16.2\nARG REVISION=2\nARG PYTORCH_VERSION=2.2.2\nARG OS_NUMBER=2204\n\n# runtime variables\nENV DEBIAN_FRONTEND=noninteractive\nENV GC_KERNEL_PATH=/usr/lib/habanalabs/libtpc_kernels.so\nENV HABANA_LOGS=/var/log/habana_logs/\nENV HABANA_SCAL_BIN_PATH=/opt/habanalabs/engines_fw\nENV HABANA_PLUGINS_LIB_PATH=/opt/habanalabs/habana_plugins\nENV TCMALLOC_LARGE_ALLOC_REPORT_THRESHOLD=7516192768\nENV PYTHONPATH=/home/$NB_UID:/usr/lib/habanalabs/\n\n# There is no need to store pip installation files inside docker image\nENV PIP_NO_CACHE_DIR=on\nENV PIP_DISABLE_PIP_VERSION_CHECK=1\n\n# libfabric and scaling variables\nENV LIBFABRIC_VERSION=\"1.20.0\"\nENV LIBFABRIC_ROOT=\"/opt/habanalabs/libfabric-${LIBFABRIC_VERSION}\"\nENV MPI_ROOT=/opt/amazon/openmpi\nENV LD_LIBRARY_PATH=$LIBFABRIC_ROOT/lib:${MPI_ROOT}/lib:/usr/lib/habanalabs:$LD_LIBRARY_PATH\nENV PATH=${LIBFABRIC_ROOT}/bin:${MPI_ROOT}/bin:$PATH\nENV MPICC=${MPI_ROOT}/bin/mpicc\nENV OPAL_PREFIX=${MPI_ROOT}\nENV RDMAV_FORK_SAFE=1\nENV FI_EFA_USE_DEVICE_RDMA=1\nENV RDMA_CORE_ROOT=/opt/habanalabs/rdma-core/src\nENV RDMA_CORE_LIB=${RDMA_CORE_ROOT}/build/lib\n\n# Gaudi stack doesn't (yet) support 3.11, thus\n# downgrade python from 3.11.x to 3.10.14\nRUN source /opt/conda/etc/profile.d/conda.sh \\\n && conda activate base \\\n && conda install -y -q --no-pin python=3.10.14 \\\n && sed -i 's/python ==.*/python ==3.10.14/' /opt/conda/conda-meta/pinned \\\n && conda clean -a -f -y\n\nUSER root\n\n# install various support libraries\nRUN apt-get update \\\n && apt-get install -y --no-install-recommends \\\n apt-utils \\\n bc \\\n build-essential \\\n graphviz \\\n iproute2 \\\n libcairo2-dev \\\n libgl1 \\\n libglib2.0-dev \\\n libgnutls30 \\\n libgoogle-glog0v5 \\\n libgoogle-perftools-dev \\\n libhdf5-dev \\\n libjemalloc2 \\\n libjpeg-dev \\\n liblapack-dev \\\n libmkl-dev \\\n libnuma-dev \\\n libopenblas-dev \\\n libpcre2-dev \\\n libpq-dev \\\n libselinux1-dev \\\n lsof \\\n moreutils \\\n numactl \\\n protobuf-compiler \\\n && apt-get autoremove \\\n && apt-get clean \\\n && rm -rf /var/lib/apt/lists/*\n\n# install elastic fabric adapter\nRUN apt-get update && export EFA_VERSION=1.29.0 && mkdir /tmp/efa \\\n && wget -q -O- https://efa-installer.amazonaws.com/aws-efa-installer-$EFA_VERSION.tar.gz | tar xz -C /tmp/efa \\\n && cd /tmp/efa/aws-efa-installer && ./efa_installer.sh -y --skip-kmod --skip-limit-conf --no-verify \\\n && rm -rf /tmp/efa && rm -rf /etc/ld.so.conf.d/efa.conf /etc/profile.d/efa.sh \\\n && apt-get autoremove && apt-get clean && rm -rf /var/lib/apt/lists/*\n\n# install habana packages\nRUN wget -O- https://${ARTIFACTORY_URL}/artifactory/api/gpg/key/public | gpg --dearmor -o /usr/share/keyrings/habana-artifactory.gpg \\\n && chown root:root /usr/share/keyrings/habana-artifactory.gpg \\\n && chmod 644 /usr/share/keyrings/habana-artifactory.gpg \\\n && echo \"deb [signed-by=/usr/share/keyrings/habana-artifactory.gpg] https://${ARTIFACTORY_URL}/artifactory/debian jammy main\" | tee -a /etc/apt/sources.list \\\n && apt-get update \\\n && apt-get install -y --no-install-recommends \\\n habanalabs-rdma-core=\"$VERSION\"-\"$REVISION\" \\\n habanalabs-thunk=\"$VERSION\"-\"$REVISION\" \\\n habanalabs-firmware-tools=\"$VERSION\"-\"$REVISION\" \\\n habanalabs-graph=\"$VERSION\"-\"$REVISION\" \\\n && apt-get autoremove --yes && apt-get clean && rm -rf /var/lib/apt/lists/* \\\n && sed --in-place \"/$ARTIFACTORY_URL/d\" /etc/apt/sources.list\n\n# install libfabric\nRUN wget -nv -O /tmp/libfabric-${LIBFABRIC_VERSION}.tar.bz2 https://github.com/ofiwg/libfabric/releases/download/v${LIBFABRIC_VERSION}/libfabric-${LIBFABRIC_VERSION}.tar.bz2 \\\n && cd /tmp/ && tar xf /tmp/libfabric-${LIBFABRIC_VERSION}.tar.bz2 \\\n && cd /tmp/libfabric-${LIBFABRIC_VERSION} \\\n && ./configure --prefix=$LIBFABRIC_ROOT --enable-psm3-verbs --enable-verbs=yes --with-synapseai=/usr \\\n && make -j && make install && cd / && rm -rf /tmp/libfabric-${LIBFABRIC_VERSION}.tar.bz2 /tmp/libfabric-${LIBFABRIC_VERSION}\n\n# install hccl wrapper for ofi\nRUN wget -nv -O /tmp/main.zip https://github.com/HabanaAI/hccl_ofi_wrapper/archive/refs/heads/main.zip \\\n && unzip /tmp/main.zip -d /tmp \\\n && cd /tmp/hccl_ofi_wrapper-main \\\n && make \\\n && cp -f libhccl_ofi_wrapper.so /usr/lib/habanalabs/libhccl_ofi_wrapper.so \\\n && cd / \\\n && rm -rf /tmp/main.zip /tmp/hccl_ofi_wrapper-main\n\nRUN /sbin/ldconfig\n\nUSER $NB_UID\n\n# install habana pytorch and media python libraries\nRUN python3 -m pip install habana_media_loader==\"${VERSION}\".\"${REVISION}\"\n\nRUN mkdir /tmp/gaudipt \\", + "repo_full_name": "kubeflow/kubeflow", + "discussion_comments": [ + { + "comment_id": "1720480710", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 7635, + "pr_file": "components/example-notebook-servers/jupyter-pytorch-gaudi/Dockerfile", + "discussion_id": "1720480710", + "commented_code": "@@ -0,0 +1,143 @@\n+#\n+# NOTE: Use the Makefiles to build this image correctly.\n+#\n+\n+ARG BASE_IMG=\n+FROM $BASE_IMG\n+\n+# Content below is extracted from scripts/Dockerfiles here:\n+# https://github.com/HabanaAI/Setup_and_Install/tree/main/dockerfiles/base\n+# https://github.com/HabanaAI/Setup_and_Install/tree/main/dockerfiles/pytorch\n+\n+# version details\n+ARG ARTIFACTORY_URL=vault.habana.ai\n+ARG VERSION=1.16.2\n+ARG REVISION=2\n+ARG PYTORCH_VERSION=2.2.2\n+ARG OS_NUMBER=2204\n+\n+# runtime variables\n+ENV DEBIAN_FRONTEND=noninteractive\n+ENV GC_KERNEL_PATH=/usr/lib/habanalabs/libtpc_kernels.so\n+ENV HABANA_LOGS=/var/log/habana_logs/\n+ENV HABANA_SCAL_BIN_PATH=/opt/habanalabs/engines_fw\n+ENV HABANA_PLUGINS_LIB_PATH=/opt/habanalabs/habana_plugins\n+ENV TCMALLOC_LARGE_ALLOC_REPORT_THRESHOLD=7516192768\n+ENV PYTHONPATH=/home/$NB_UID:/usr/lib/habanalabs/\n+\n+# There is no need to store pip installation files inside docker image\n+ENV PIP_NO_CACHE_DIR=on\n+ENV PIP_DISABLE_PIP_VERSION_CHECK=1\n+\n+# libfabric and scaling variables\n+ENV LIBFABRIC_VERSION=\"1.20.0\"\n+ENV LIBFABRIC_ROOT=\"/opt/habanalabs/libfabric-${LIBFABRIC_VERSION}\"\n+ENV MPI_ROOT=/opt/amazon/openmpi\n+ENV LD_LIBRARY_PATH=$LIBFABRIC_ROOT/lib:${MPI_ROOT}/lib:/usr/lib/habanalabs:$LD_LIBRARY_PATH\n+ENV PATH=${LIBFABRIC_ROOT}/bin:${MPI_ROOT}/bin:$PATH\n+ENV MPICC=${MPI_ROOT}/bin/mpicc\n+ENV OPAL_PREFIX=${MPI_ROOT}\n+ENV RDMAV_FORK_SAFE=1\n+ENV FI_EFA_USE_DEVICE_RDMA=1\n+ENV RDMA_CORE_ROOT=/opt/habanalabs/rdma-core/src\n+ENV RDMA_CORE_LIB=${RDMA_CORE_ROOT}/build/lib\n+\n+# Gaudi stack doesn't (yet) support 3.11, thus\n+# downgrade python from 3.11.x to 3.10.14\n+RUN source /opt/conda/etc/profile.d/conda.sh \\\n+ && conda activate base \\\n+ && conda install -y -q --no-pin python=3.10.14 \\\n+ && sed -i 's/python ==.*/python ==3.10.14/' /opt/conda/conda-meta/pinned \\\n+ && conda clean -a -f -y\n+\n+USER root\n+\n+# install various support libraries\n+RUN apt-get update \\\n+ && apt-get install -y --no-install-recommends \\\n+ apt-utils \\\n+ bc \\\n+ build-essential \\\n+ graphviz \\\n+ iproute2 \\\n+ libcairo2-dev \\\n+ libgl1 \\\n+ libglib2.0-dev \\\n+ libgnutls30 \\\n+ libgoogle-glog0v5 \\\n+ libgoogle-perftools-dev \\\n+ libhdf5-dev \\\n+ libjemalloc2 \\\n+ libjpeg-dev \\\n+ liblapack-dev \\\n+ libmkl-dev \\\n+ libnuma-dev \\\n+ libopenblas-dev \\\n+ libpcre2-dev \\\n+ libpq-dev \\\n+ libselinux1-dev \\\n+ lsof \\\n+ moreutils \\\n+ numactl \\\n+ protobuf-compiler \\\n+ && apt-get autoremove \\\n+ && apt-get clean \\\n+ && rm -rf /var/lib/apt/lists/*\n+\n+# install elastic fabric adapter\n+RUN apt-get update && export EFA_VERSION=1.29.0 && mkdir /tmp/efa \\\n+ && wget -q -O- https://efa-installer.amazonaws.com/aws-efa-installer-$EFA_VERSION.tar.gz | tar xz -C /tmp/efa \\\n+ && cd /tmp/efa/aws-efa-installer && ./efa_installer.sh -y --skip-kmod --skip-limit-conf --no-verify \\\n+ && rm -rf /tmp/efa && rm -rf /etc/ld.so.conf.d/efa.conf /etc/profile.d/efa.sh \\\n+ && apt-get autoremove && apt-get clean && rm -rf /var/lib/apt/lists/*\n+\n+# install habana packages\n+RUN wget -O- https://${ARTIFACTORY_URL}/artifactory/api/gpg/key/public | gpg --dearmor -o /usr/share/keyrings/habana-artifactory.gpg \\\n+ && chown root:root /usr/share/keyrings/habana-artifactory.gpg \\\n+ && chmod 644 /usr/share/keyrings/habana-artifactory.gpg \\\n+ && echo \"deb [signed-by=/usr/share/keyrings/habana-artifactory.gpg] https://${ARTIFACTORY_URL}/artifactory/debian jammy main\" | tee -a /etc/apt/sources.list \\\n+ && apt-get update \\\n+ && apt-get install -y --no-install-recommends \\\n+ habanalabs-rdma-core=\"$VERSION\"-\"$REVISION\" \\\n+ habanalabs-thunk=\"$VERSION\"-\"$REVISION\" \\\n+ habanalabs-firmware-tools=\"$VERSION\"-\"$REVISION\" \\\n+ habanalabs-graph=\"$VERSION\"-\"$REVISION\" \\\n+ && apt-get autoremove --yes && apt-get clean && rm -rf /var/lib/apt/lists/* \\\n+ && sed --in-place \"/$ARTIFACTORY_URL/d\" /etc/apt/sources.list\n+\n+# install libfabric\n+RUN wget -nv -O /tmp/libfabric-${LIBFABRIC_VERSION}.tar.bz2 https://github.com/ofiwg/libfabric/releases/download/v${LIBFABRIC_VERSION}/libfabric-${LIBFABRIC_VERSION}.tar.bz2 \\\n+ && cd /tmp/ && tar xf /tmp/libfabric-${LIBFABRIC_VERSION}.tar.bz2 \\\n+ && cd /tmp/libfabric-${LIBFABRIC_VERSION} \\\n+ && ./configure --prefix=$LIBFABRIC_ROOT --enable-psm3-verbs --enable-verbs=yes --with-synapseai=/usr \\\n+ && make -j && make install && cd / && rm -rf /tmp/libfabric-${LIBFABRIC_VERSION}.tar.bz2 /tmp/libfabric-${LIBFABRIC_VERSION}\n+\n+# install hccl wrapper for ofi\n+RUN wget -nv -O /tmp/main.zip https://github.com/HabanaAI/hccl_ofi_wrapper/archive/refs/heads/main.zip \\\n+ && unzip /tmp/main.zip -d /tmp \\\n+ && cd /tmp/hccl_ofi_wrapper-main \\\n+ && make \\\n+ && cp -f libhccl_ofi_wrapper.so /usr/lib/habanalabs/libhccl_ofi_wrapper.so \\\n+ && cd / \\\n+ && rm -rf /tmp/main.zip /tmp/hccl_ofi_wrapper-main\n+\n+RUN /sbin/ldconfig\n+\n+USER $NB_UID\n+\n+# install habana pytorch and media python libraries\n+RUN python3 -m pip install habana_media_loader==\"${VERSION}\".\"${REVISION}\"\n+\n+RUN mkdir /tmp/gaudipt \\", + "comment_created_at": "2024-08-17T00:02:29+00:00", + "comment_author": "thesuperzapper", + "comment_body": "Please add a breif `#` comment explaining what is being installed.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1720488570", + "pr_number": 7635, + "pr_file": "components/example-notebook-servers/jupyter-pytorch-gaudi/Dockerfile", + "created_at": "2024-08-17T00:25:48+00:00", + "commented_code": "#\n# NOTE: Use the Makefiles to build this image correctly.\n#\n\nARG BASE_IMG=\nFROM $BASE_IMG\n\n# Content below is extracted from scripts/Dockerfiles here:\n# https://github.com/HabanaAI/Setup_and_Install/tree/main/dockerfiles/base\n# https://github.com/HabanaAI/Setup_and_Install/tree/main/dockerfiles/pytorch\n\n# version details", + "repo_full_name": "kubeflow/kubeflow", + "discussion_comments": [ + { + "comment_id": "1720488570", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 7635, + "pr_file": "components/example-notebook-servers/jupyter-pytorch-gaudi/Dockerfile", + "discussion_id": "1720488570", + "commented_code": "@@ -0,0 +1,143 @@\n+#\n+# NOTE: Use the Makefiles to build this image correctly.\n+#\n+\n+ARG BASE_IMG=\n+FROM $BASE_IMG\n+\n+# Content below is extracted from scripts/Dockerfiles here:\n+# https://github.com/HabanaAI/Setup_and_Install/tree/main/dockerfiles/base\n+# https://github.com/HabanaAI/Setup_and_Install/tree/main/dockerfiles/pytorch\n+\n+# version details", + "comment_created_at": "2024-08-17T00:25:48+00:00", + "comment_author": "thesuperzapper", + "comment_body": "For consistency with other images:\r\n\r\n```suggestion\r\n# args - software versions\r\n```", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/kubeflow-document-with-precision.md b/_reviewers/kubeflow-document-with-precision.md index dbd7878..088a80d 100644 --- a/_reviewers/kubeflow-document-with-precision.md +++ b/_reviewers/kubeflow-document-with-precision.md @@ -52,73 +52,3 @@ RUN python3 -m pip install habana_media_loader=="${VERSION}"."${REVISION}" ``` Well-documented code improves comprehension, facilitates maintenance, and enhances collaboration among team members. - - -[ - { - "discussion_id": "1720450533", - "pr_number": 7635, - "pr_file": "components/example-notebook-servers/jupyter-pytorch-gaudi/Dockerfile", - "created_at": "2024-08-16T23:15:07+00:00", - "commented_code": "#\n# NOTE: Use the Makefiles to build this image correctly.\n#\n\nARG BASE_IMG=\nFROM $BASE_IMG\n\n# Content below is extracted from scripts/Dockerfiles here:\n# https://github.com/HabanaAI/Setup_and_Install/tree/main/dockerfiles/base\n# https://github.com/HabanaAI/Setup_and_Install/tree/main/dockerfiles/pytorch", - "repo_full_name": "kubeflow/kubeflow", - "discussion_comments": [ - { - "comment_id": "1720450533", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 7635, - "pr_file": "components/example-notebook-servers/jupyter-pytorch-gaudi/Dockerfile", - "discussion_id": "1720450533", - "commented_code": "@@ -0,0 +1,143 @@\n+#\n+# NOTE: Use the Makefiles to build this image correctly.\n+#\n+\n+ARG BASE_IMG=\n+FROM $BASE_IMG\n+\n+# Content below is extracted from scripts/Dockerfiles here:\n+# https://github.com/HabanaAI/Setup_and_Install/tree/main/dockerfiles/base\n+# https://github.com/HabanaAI/Setup_and_Install/tree/main/dockerfiles/pytorch", - "comment_created_at": "2024-08-16T23:15:07+00:00", - "comment_author": "thesuperzapper", - "comment_body": "Can we update these links to be to a specific version, and file, e.g.:\r\n\r\n- https://github.com/HabanaAI/Setup_and_Install/blob/1.17.0/dockerfiles/base/Dockerfile.ubuntu22.04\r\n- https://github.com/HabanaAI/Setup_and_Install/blob/1.17.0/dockerfiles/pytorch/Dockerfile.ubuntu\r\n\r\nPlease also say \"based on\" because the content is not the same.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1720480710", - "pr_number": 7635, - "pr_file": "components/example-notebook-servers/jupyter-pytorch-gaudi/Dockerfile", - "created_at": "2024-08-17T00:02:29+00:00", - "commented_code": "#\n# NOTE: Use the Makefiles to build this image correctly.\n#\n\nARG BASE_IMG=\nFROM $BASE_IMG\n\n# Content below is extracted from scripts/Dockerfiles here:\n# https://github.com/HabanaAI/Setup_and_Install/tree/main/dockerfiles/base\n# https://github.com/HabanaAI/Setup_and_Install/tree/main/dockerfiles/pytorch\n\n# version details\nARG ARTIFACTORY_URL=vault.habana.ai\nARG VERSION=1.16.2\nARG REVISION=2\nARG PYTORCH_VERSION=2.2.2\nARG OS_NUMBER=2204\n\n# runtime variables\nENV DEBIAN_FRONTEND=noninteractive\nENV GC_KERNEL_PATH=/usr/lib/habanalabs/libtpc_kernels.so\nENV HABANA_LOGS=/var/log/habana_logs/\nENV HABANA_SCAL_BIN_PATH=/opt/habanalabs/engines_fw\nENV HABANA_PLUGINS_LIB_PATH=/opt/habanalabs/habana_plugins\nENV TCMALLOC_LARGE_ALLOC_REPORT_THRESHOLD=7516192768\nENV PYTHONPATH=/home/$NB_UID:/usr/lib/habanalabs/\n\n# There is no need to store pip installation files inside docker image\nENV PIP_NO_CACHE_DIR=on\nENV PIP_DISABLE_PIP_VERSION_CHECK=1\n\n# libfabric and scaling variables\nENV LIBFABRIC_VERSION=\"1.20.0\"\nENV LIBFABRIC_ROOT=\"/opt/habanalabs/libfabric-${LIBFABRIC_VERSION}\"\nENV MPI_ROOT=/opt/amazon/openmpi\nENV LD_LIBRARY_PATH=$LIBFABRIC_ROOT/lib:${MPI_ROOT}/lib:/usr/lib/habanalabs:$LD_LIBRARY_PATH\nENV PATH=${LIBFABRIC_ROOT}/bin:${MPI_ROOT}/bin:$PATH\nENV MPICC=${MPI_ROOT}/bin/mpicc\nENV OPAL_PREFIX=${MPI_ROOT}\nENV RDMAV_FORK_SAFE=1\nENV FI_EFA_USE_DEVICE_RDMA=1\nENV RDMA_CORE_ROOT=/opt/habanalabs/rdma-core/src\nENV RDMA_CORE_LIB=${RDMA_CORE_ROOT}/build/lib\n\n# Gaudi stack doesn't (yet) support 3.11, thus\n# downgrade python from 3.11.x to 3.10.14\nRUN source /opt/conda/etc/profile.d/conda.sh \\\n && conda activate base \\\n && conda install -y -q --no-pin python=3.10.14 \\\n && sed -i 's/python ==.*/python ==3.10.14/' /opt/conda/conda-meta/pinned \\\n && conda clean -a -f -y\n\nUSER root\n\n# install various support libraries\nRUN apt-get update \\\n && apt-get install -y --no-install-recommends \\\n apt-utils \\\n bc \\\n build-essential \\\n graphviz \\\n iproute2 \\\n libcairo2-dev \\\n libgl1 \\\n libglib2.0-dev \\\n libgnutls30 \\\n libgoogle-glog0v5 \\\n libgoogle-perftools-dev \\\n libhdf5-dev \\\n libjemalloc2 \\\n libjpeg-dev \\\n liblapack-dev \\\n libmkl-dev \\\n libnuma-dev \\\n libopenblas-dev \\\n libpcre2-dev \\\n libpq-dev \\\n libselinux1-dev \\\n lsof \\\n moreutils \\\n numactl \\\n protobuf-compiler \\\n && apt-get autoremove \\\n && apt-get clean \\\n && rm -rf /var/lib/apt/lists/*\n\n# install elastic fabric adapter\nRUN apt-get update && export EFA_VERSION=1.29.0 && mkdir /tmp/efa \\\n && wget -q -O- https://efa-installer.amazonaws.com/aws-efa-installer-$EFA_VERSION.tar.gz | tar xz -C /tmp/efa \\\n && cd /tmp/efa/aws-efa-installer && ./efa_installer.sh -y --skip-kmod --skip-limit-conf --no-verify \\\n && rm -rf /tmp/efa && rm -rf /etc/ld.so.conf.d/efa.conf /etc/profile.d/efa.sh \\\n && apt-get autoremove && apt-get clean && rm -rf /var/lib/apt/lists/*\n\n# install habana packages\nRUN wget -O- https://${ARTIFACTORY_URL}/artifactory/api/gpg/key/public | gpg --dearmor -o /usr/share/keyrings/habana-artifactory.gpg \\\n && chown root:root /usr/share/keyrings/habana-artifactory.gpg \\\n && chmod 644 /usr/share/keyrings/habana-artifactory.gpg \\\n && echo \"deb [signed-by=/usr/share/keyrings/habana-artifactory.gpg] https://${ARTIFACTORY_URL}/artifactory/debian jammy main\" | tee -a /etc/apt/sources.list \\\n && apt-get update \\\n && apt-get install -y --no-install-recommends \\\n habanalabs-rdma-core=\"$VERSION\"-\"$REVISION\" \\\n habanalabs-thunk=\"$VERSION\"-\"$REVISION\" \\\n habanalabs-firmware-tools=\"$VERSION\"-\"$REVISION\" \\\n habanalabs-graph=\"$VERSION\"-\"$REVISION\" \\\n && apt-get autoremove --yes && apt-get clean && rm -rf /var/lib/apt/lists/* \\\n && sed --in-place \"/$ARTIFACTORY_URL/d\" /etc/apt/sources.list\n\n# install libfabric\nRUN wget -nv -O /tmp/libfabric-${LIBFABRIC_VERSION}.tar.bz2 https://github.com/ofiwg/libfabric/releases/download/v${LIBFABRIC_VERSION}/libfabric-${LIBFABRIC_VERSION}.tar.bz2 \\\n && cd /tmp/ && tar xf /tmp/libfabric-${LIBFABRIC_VERSION}.tar.bz2 \\\n && cd /tmp/libfabric-${LIBFABRIC_VERSION} \\\n && ./configure --prefix=$LIBFABRIC_ROOT --enable-psm3-verbs --enable-verbs=yes --with-synapseai=/usr \\\n && make -j && make install && cd / && rm -rf /tmp/libfabric-${LIBFABRIC_VERSION}.tar.bz2 /tmp/libfabric-${LIBFABRIC_VERSION}\n\n# install hccl wrapper for ofi\nRUN wget -nv -O /tmp/main.zip https://github.com/HabanaAI/hccl_ofi_wrapper/archive/refs/heads/main.zip \\\n && unzip /tmp/main.zip -d /tmp \\\n && cd /tmp/hccl_ofi_wrapper-main \\\n && make \\\n && cp -f libhccl_ofi_wrapper.so /usr/lib/habanalabs/libhccl_ofi_wrapper.so \\\n && cd / \\\n && rm -rf /tmp/main.zip /tmp/hccl_ofi_wrapper-main\n\nRUN /sbin/ldconfig\n\nUSER $NB_UID\n\n# install habana pytorch and media python libraries\nRUN python3 -m pip install habana_media_loader==\"${VERSION}\".\"${REVISION}\"\n\nRUN mkdir /tmp/gaudipt \\", - "repo_full_name": "kubeflow/kubeflow", - "discussion_comments": [ - { - "comment_id": "1720480710", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 7635, - "pr_file": "components/example-notebook-servers/jupyter-pytorch-gaudi/Dockerfile", - "discussion_id": "1720480710", - "commented_code": "@@ -0,0 +1,143 @@\n+#\n+# NOTE: Use the Makefiles to build this image correctly.\n+#\n+\n+ARG BASE_IMG=\n+FROM $BASE_IMG\n+\n+# Content below is extracted from scripts/Dockerfiles here:\n+# https://github.com/HabanaAI/Setup_and_Install/tree/main/dockerfiles/base\n+# https://github.com/HabanaAI/Setup_and_Install/tree/main/dockerfiles/pytorch\n+\n+# version details\n+ARG ARTIFACTORY_URL=vault.habana.ai\n+ARG VERSION=1.16.2\n+ARG REVISION=2\n+ARG PYTORCH_VERSION=2.2.2\n+ARG OS_NUMBER=2204\n+\n+# runtime variables\n+ENV DEBIAN_FRONTEND=noninteractive\n+ENV GC_KERNEL_PATH=/usr/lib/habanalabs/libtpc_kernels.so\n+ENV HABANA_LOGS=/var/log/habana_logs/\n+ENV HABANA_SCAL_BIN_PATH=/opt/habanalabs/engines_fw\n+ENV HABANA_PLUGINS_LIB_PATH=/opt/habanalabs/habana_plugins\n+ENV TCMALLOC_LARGE_ALLOC_REPORT_THRESHOLD=7516192768\n+ENV PYTHONPATH=/home/$NB_UID:/usr/lib/habanalabs/\n+\n+# There is no need to store pip installation files inside docker image\n+ENV PIP_NO_CACHE_DIR=on\n+ENV PIP_DISABLE_PIP_VERSION_CHECK=1\n+\n+# libfabric and scaling variables\n+ENV LIBFABRIC_VERSION=\"1.20.0\"\n+ENV LIBFABRIC_ROOT=\"/opt/habanalabs/libfabric-${LIBFABRIC_VERSION}\"\n+ENV MPI_ROOT=/opt/amazon/openmpi\n+ENV LD_LIBRARY_PATH=$LIBFABRIC_ROOT/lib:${MPI_ROOT}/lib:/usr/lib/habanalabs:$LD_LIBRARY_PATH\n+ENV PATH=${LIBFABRIC_ROOT}/bin:${MPI_ROOT}/bin:$PATH\n+ENV MPICC=${MPI_ROOT}/bin/mpicc\n+ENV OPAL_PREFIX=${MPI_ROOT}\n+ENV RDMAV_FORK_SAFE=1\n+ENV FI_EFA_USE_DEVICE_RDMA=1\n+ENV RDMA_CORE_ROOT=/opt/habanalabs/rdma-core/src\n+ENV RDMA_CORE_LIB=${RDMA_CORE_ROOT}/build/lib\n+\n+# Gaudi stack doesn't (yet) support 3.11, thus\n+# downgrade python from 3.11.x to 3.10.14\n+RUN source /opt/conda/etc/profile.d/conda.sh \\\n+ && conda activate base \\\n+ && conda install -y -q --no-pin python=3.10.14 \\\n+ && sed -i 's/python ==.*/python ==3.10.14/' /opt/conda/conda-meta/pinned \\\n+ && conda clean -a -f -y\n+\n+USER root\n+\n+# install various support libraries\n+RUN apt-get update \\\n+ && apt-get install -y --no-install-recommends \\\n+ apt-utils \\\n+ bc \\\n+ build-essential \\\n+ graphviz \\\n+ iproute2 \\\n+ libcairo2-dev \\\n+ libgl1 \\\n+ libglib2.0-dev \\\n+ libgnutls30 \\\n+ libgoogle-glog0v5 \\\n+ libgoogle-perftools-dev \\\n+ libhdf5-dev \\\n+ libjemalloc2 \\\n+ libjpeg-dev \\\n+ liblapack-dev \\\n+ libmkl-dev \\\n+ libnuma-dev \\\n+ libopenblas-dev \\\n+ libpcre2-dev \\\n+ libpq-dev \\\n+ libselinux1-dev \\\n+ lsof \\\n+ moreutils \\\n+ numactl \\\n+ protobuf-compiler \\\n+ && apt-get autoremove \\\n+ && apt-get clean \\\n+ && rm -rf /var/lib/apt/lists/*\n+\n+# install elastic fabric adapter\n+RUN apt-get update && export EFA_VERSION=1.29.0 && mkdir /tmp/efa \\\n+ && wget -q -O- https://efa-installer.amazonaws.com/aws-efa-installer-$EFA_VERSION.tar.gz | tar xz -C /tmp/efa \\\n+ && cd /tmp/efa/aws-efa-installer && ./efa_installer.sh -y --skip-kmod --skip-limit-conf --no-verify \\\n+ && rm -rf /tmp/efa && rm -rf /etc/ld.so.conf.d/efa.conf /etc/profile.d/efa.sh \\\n+ && apt-get autoremove && apt-get clean && rm -rf /var/lib/apt/lists/*\n+\n+# install habana packages\n+RUN wget -O- https://${ARTIFACTORY_URL}/artifactory/api/gpg/key/public | gpg --dearmor -o /usr/share/keyrings/habana-artifactory.gpg \\\n+ && chown root:root /usr/share/keyrings/habana-artifactory.gpg \\\n+ && chmod 644 /usr/share/keyrings/habana-artifactory.gpg \\\n+ && echo \"deb [signed-by=/usr/share/keyrings/habana-artifactory.gpg] https://${ARTIFACTORY_URL}/artifactory/debian jammy main\" | tee -a /etc/apt/sources.list \\\n+ && apt-get update \\\n+ && apt-get install -y --no-install-recommends \\\n+ habanalabs-rdma-core=\"$VERSION\"-\"$REVISION\" \\\n+ habanalabs-thunk=\"$VERSION\"-\"$REVISION\" \\\n+ habanalabs-firmware-tools=\"$VERSION\"-\"$REVISION\" \\\n+ habanalabs-graph=\"$VERSION\"-\"$REVISION\" \\\n+ && apt-get autoremove --yes && apt-get clean && rm -rf /var/lib/apt/lists/* \\\n+ && sed --in-place \"/$ARTIFACTORY_URL/d\" /etc/apt/sources.list\n+\n+# install libfabric\n+RUN wget -nv -O /tmp/libfabric-${LIBFABRIC_VERSION}.tar.bz2 https://github.com/ofiwg/libfabric/releases/download/v${LIBFABRIC_VERSION}/libfabric-${LIBFABRIC_VERSION}.tar.bz2 \\\n+ && cd /tmp/ && tar xf /tmp/libfabric-${LIBFABRIC_VERSION}.tar.bz2 \\\n+ && cd /tmp/libfabric-${LIBFABRIC_VERSION} \\\n+ && ./configure --prefix=$LIBFABRIC_ROOT --enable-psm3-verbs --enable-verbs=yes --with-synapseai=/usr \\\n+ && make -j && make install && cd / && rm -rf /tmp/libfabric-${LIBFABRIC_VERSION}.tar.bz2 /tmp/libfabric-${LIBFABRIC_VERSION}\n+\n+# install hccl wrapper for ofi\n+RUN wget -nv -O /tmp/main.zip https://github.com/HabanaAI/hccl_ofi_wrapper/archive/refs/heads/main.zip \\\n+ && unzip /tmp/main.zip -d /tmp \\\n+ && cd /tmp/hccl_ofi_wrapper-main \\\n+ && make \\\n+ && cp -f libhccl_ofi_wrapper.so /usr/lib/habanalabs/libhccl_ofi_wrapper.so \\\n+ && cd / \\\n+ && rm -rf /tmp/main.zip /tmp/hccl_ofi_wrapper-main\n+\n+RUN /sbin/ldconfig\n+\n+USER $NB_UID\n+\n+# install habana pytorch and media python libraries\n+RUN python3 -m pip install habana_media_loader==\"${VERSION}\".\"${REVISION}\"\n+\n+RUN mkdir /tmp/gaudipt \\", - "comment_created_at": "2024-08-17T00:02:29+00:00", - "comment_author": "thesuperzapper", - "comment_body": "Please add a breif `#` comment explaining what is being installed.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1720488570", - "pr_number": 7635, - "pr_file": "components/example-notebook-servers/jupyter-pytorch-gaudi/Dockerfile", - "created_at": "2024-08-17T00:25:48+00:00", - "commented_code": "#\n# NOTE: Use the Makefiles to build this image correctly.\n#\n\nARG BASE_IMG=\nFROM $BASE_IMG\n\n# Content below is extracted from scripts/Dockerfiles here:\n# https://github.com/HabanaAI/Setup_and_Install/tree/main/dockerfiles/base\n# https://github.com/HabanaAI/Setup_and_Install/tree/main/dockerfiles/pytorch\n\n# version details", - "repo_full_name": "kubeflow/kubeflow", - "discussion_comments": [ - { - "comment_id": "1720488570", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 7635, - "pr_file": "components/example-notebook-servers/jupyter-pytorch-gaudi/Dockerfile", - "discussion_id": "1720488570", - "commented_code": "@@ -0,0 +1,143 @@\n+#\n+# NOTE: Use the Makefiles to build this image correctly.\n+#\n+\n+ARG BASE_IMG=\n+FROM $BASE_IMG\n+\n+# Content below is extracted from scripts/Dockerfiles here:\n+# https://github.com/HabanaAI/Setup_and_Install/tree/main/dockerfiles/base\n+# https://github.com/HabanaAI/Setup_and_Install/tree/main/dockerfiles/pytorch\n+\n+# version details", - "comment_created_at": "2024-08-17T00:25:48+00:00", - "comment_author": "thesuperzapper", - "comment_body": "For consistency with other images:\r\n\r\n```suggestion\r\n# args - software versions\r\n```", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/kubeflow-enforce-https-protocol.json b/_reviewers/kubeflow-enforce-https-protocol.json new file mode 100644 index 0000000..c065cf5 --- /dev/null +++ b/_reviewers/kubeflow-enforce-https-protocol.json @@ -0,0 +1,36 @@ +[ + { + "discussion_id": "382068193", + "pr_number": 4780, + "pr_file": "components/centraldashboard/test/e2e.test.ts", + "created_at": "2020-02-20T15:21:29+00:00", + "commented_code": "import 'jasmine';\n\nimport {Credentials, JWT} from 'google-auth-library';\nimport {launch, Page} from 'puppeteer';\nimport request from 'request';\n\n\n/**\n * Look here to see setup instructions for Environment Setup for Cluster Auth:\n * https://cloud.google.com/kubernetes-engine/docs/tutorials/authenticating-to-cloud-platform\n * \n * Use Scope: Owner\n */ \nconst ORIGINAL_TIMEOUT = jasmine.DEFAULT_TIMEOUT_INTERVAL;\nconst {\n KF_HOST, // The url to your cluster, usually: https://.endpoints..cloud.goog/\n CLIENT_ID, // This is the CLIENT_ID token used to create the cluster (used in http://deploy.kubeflow.cloud/)\n SERVICE_ACCOUNT_EMAIL, // The email address for the service account you created @.iam.gserviceaccount.com\n SERVICE_ACCOUNT_KEY, // Should be a JSON file downloaded from cloud console service accounts\n} = process.env;\n\n['KF_HOST', 'CLIENT_ID', 'SERVICE_ACCOUNT_EMAIL', 'SERVICE_ACCOUNT_KEY'].forEach(envVar => {\n if (!process.env[envVar]) {console.log(`${envVar} environment variable must be set`);process.exit(1);}\n});\nif (!/^https?:\\/\\/\\S+/.test(KF_HOST)) {console.log('Invalid HOST url provided, must be like http*://*');process.exit(1);}", + "repo_full_name": "kubeflow/kubeflow", + "discussion_comments": [ + { + "comment_id": "382068193", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 4780, + "pr_file": "components/centraldashboard/test/e2e.test.ts", + "discussion_id": "382068193", + "commented_code": "@@ -0,0 +1,144 @@\n+import 'jasmine';\n+\n+import {Credentials, JWT} from 'google-auth-library';\n+import {launch, Page} from 'puppeteer';\n+import request from 'request';\n+\n+\n+/**\n+ * Look here to see setup instructions for Environment Setup for Cluster Auth:\n+ * https://cloud.google.com/kubernetes-engine/docs/tutorials/authenticating-to-cloud-platform\n+ * \n+ * Use Scope: Owner\n+ */ \n+const ORIGINAL_TIMEOUT = jasmine.DEFAULT_TIMEOUT_INTERVAL;\n+const {\n+ KF_HOST, // The url to your cluster, usually: https://.endpoints..cloud.goog/\n+ CLIENT_ID, // This is the CLIENT_ID token used to create the cluster (used in http://deploy.kubeflow.cloud/)\n+ SERVICE_ACCOUNT_EMAIL, // The email address for the service account you created @.iam.gserviceaccount.com\n+ SERVICE_ACCOUNT_KEY, // Should be a JSON file downloaded from cloud console service accounts\n+} = process.env;\n+\n+['KF_HOST', 'CLIENT_ID', 'SERVICE_ACCOUNT_EMAIL', 'SERVICE_ACCOUNT_KEY'].forEach(envVar => {\n+ if (!process.env[envVar]) {console.log(`${envVar} environment variable must be set`);process.exit(1);}\n+});\n+if (!/^https?:\\/\\/\\S+/.test(KF_HOST)) {console.log('Invalid HOST url provided, must be like http*://*');process.exit(1);}", + "comment_created_at": "2020-02-20T15:21:29+00:00", + "comment_author": "kimwnasptd", + "comment_body": "nit: `Invalid HOST url provided, must be like https*://*` (the protocol should be https in the end)", + "pr_file_module": null + }, + { + "comment_id": "382317648", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 4780, + "pr_file": "components/centraldashboard/test/e2e.test.ts", + "discussion_id": "382068193", + "commented_code": "@@ -0,0 +1,144 @@\n+import 'jasmine';\n+\n+import {Credentials, JWT} from 'google-auth-library';\n+import {launch, Page} from 'puppeteer';\n+import request from 'request';\n+\n+\n+/**\n+ * Look here to see setup instructions for Environment Setup for Cluster Auth:\n+ * https://cloud.google.com/kubernetes-engine/docs/tutorials/authenticating-to-cloud-platform\n+ * \n+ * Use Scope: Owner\n+ */ \n+const ORIGINAL_TIMEOUT = jasmine.DEFAULT_TIMEOUT_INTERVAL;\n+const {\n+ KF_HOST, // The url to your cluster, usually: https://.endpoints..cloud.goog/\n+ CLIENT_ID, // This is the CLIENT_ID token used to create the cluster (used in http://deploy.kubeflow.cloud/)\n+ SERVICE_ACCOUNT_EMAIL, // The email address for the service account you created @.iam.gserviceaccount.com\n+ SERVICE_ACCOUNT_KEY, // Should be a JSON file downloaded from cloud console service accounts\n+} = process.env;\n+\n+['KF_HOST', 'CLIENT_ID', 'SERVICE_ACCOUNT_EMAIL', 'SERVICE_ACCOUNT_KEY'].forEach(envVar => {\n+ if (!process.env[envVar]) {console.log(`${envVar} environment variable must be set`);process.exit(1);}\n+});\n+if (!/^https?:\\/\\/\\S+/.test(KF_HOST)) {console.log('Invalid HOST url provided, must be like http*://*');process.exit(1);}", + "comment_created_at": "2020-02-20T23:31:51+00:00", + "comment_author": "avdaredevil", + "comment_body": "Right, usually clusters redirect, so I allowed users to pass in `http` or `https`, but I've changed the log statement AND regex to simply only allow `https`", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/kubeflow-enforce-https-protocol.md b/_reviewers/kubeflow-enforce-https-protocol.md index 10ba991..f12ea6e 100644 --- a/_reviewers/kubeflow-enforce-https-protocol.md +++ b/_reviewers/kubeflow-enforce-https-protocol.md @@ -29,41 +29,3 @@ if (!/^https:\/\/\S+/.test(url)) { ``` This helps ensure all communications are encrypted and prevents security vulnerabilities that can arise from initial insecure connections. - - -[ - { - "discussion_id": "382068193", - "pr_number": 4780, - "pr_file": "components/centraldashboard/test/e2e.test.ts", - "created_at": "2020-02-20T15:21:29+00:00", - "commented_code": "import 'jasmine';\n\nimport {Credentials, JWT} from 'google-auth-library';\nimport {launch, Page} from 'puppeteer';\nimport request from 'request';\n\n\n/**\n * Look here to see setup instructions for Environment Setup for Cluster Auth:\n * https://cloud.google.com/kubernetes-engine/docs/tutorials/authenticating-to-cloud-platform\n * \n * Use Scope: Owner\n */ \nconst ORIGINAL_TIMEOUT = jasmine.DEFAULT_TIMEOUT_INTERVAL;\nconst {\n KF_HOST, // The url to your cluster, usually: https://.endpoints..cloud.goog/\n CLIENT_ID, // This is the CLIENT_ID token used to create the cluster (used in http://deploy.kubeflow.cloud/)\n SERVICE_ACCOUNT_EMAIL, // The email address for the service account you created @.iam.gserviceaccount.com\n SERVICE_ACCOUNT_KEY, // Should be a JSON file downloaded from cloud console service accounts\n} = process.env;\n\n['KF_HOST', 'CLIENT_ID', 'SERVICE_ACCOUNT_EMAIL', 'SERVICE_ACCOUNT_KEY'].forEach(envVar => {\n if (!process.env[envVar]) {console.log(`${envVar} environment variable must be set`);process.exit(1);}\n});\nif (!/^https?:\\/\\/\\S+/.test(KF_HOST)) {console.log('Invalid HOST url provided, must be like http*://*');process.exit(1);}", - "repo_full_name": "kubeflow/kubeflow", - "discussion_comments": [ - { - "comment_id": "382068193", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 4780, - "pr_file": "components/centraldashboard/test/e2e.test.ts", - "discussion_id": "382068193", - "commented_code": "@@ -0,0 +1,144 @@\n+import 'jasmine';\n+\n+import {Credentials, JWT} from 'google-auth-library';\n+import {launch, Page} from 'puppeteer';\n+import request from 'request';\n+\n+\n+/**\n+ * Look here to see setup instructions for Environment Setup for Cluster Auth:\n+ * https://cloud.google.com/kubernetes-engine/docs/tutorials/authenticating-to-cloud-platform\n+ * \n+ * Use Scope: Owner\n+ */ \n+const ORIGINAL_TIMEOUT = jasmine.DEFAULT_TIMEOUT_INTERVAL;\n+const {\n+ KF_HOST, // The url to your cluster, usually: https://.endpoints..cloud.goog/\n+ CLIENT_ID, // This is the CLIENT_ID token used to create the cluster (used in http://deploy.kubeflow.cloud/)\n+ SERVICE_ACCOUNT_EMAIL, // The email address for the service account you created @.iam.gserviceaccount.com\n+ SERVICE_ACCOUNT_KEY, // Should be a JSON file downloaded from cloud console service accounts\n+} = process.env;\n+\n+['KF_HOST', 'CLIENT_ID', 'SERVICE_ACCOUNT_EMAIL', 'SERVICE_ACCOUNT_KEY'].forEach(envVar => {\n+ if (!process.env[envVar]) {console.log(`${envVar} environment variable must be set`);process.exit(1);}\n+});\n+if (!/^https?:\\/\\/\\S+/.test(KF_HOST)) {console.log('Invalid HOST url provided, must be like http*://*');process.exit(1);}", - "comment_created_at": "2020-02-20T15:21:29+00:00", - "comment_author": "kimwnasptd", - "comment_body": "nit: `Invalid HOST url provided, must be like https*://*` (the protocol should be https in the end)", - "pr_file_module": null - }, - { - "comment_id": "382317648", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 4780, - "pr_file": "components/centraldashboard/test/e2e.test.ts", - "discussion_id": "382068193", - "commented_code": "@@ -0,0 +1,144 @@\n+import 'jasmine';\n+\n+import {Credentials, JWT} from 'google-auth-library';\n+import {launch, Page} from 'puppeteer';\n+import request from 'request';\n+\n+\n+/**\n+ * Look here to see setup instructions for Environment Setup for Cluster Auth:\n+ * https://cloud.google.com/kubernetes-engine/docs/tutorials/authenticating-to-cloud-platform\n+ * \n+ * Use Scope: Owner\n+ */ \n+const ORIGINAL_TIMEOUT = jasmine.DEFAULT_TIMEOUT_INTERVAL;\n+const {\n+ KF_HOST, // The url to your cluster, usually: https://.endpoints..cloud.goog/\n+ CLIENT_ID, // This is the CLIENT_ID token used to create the cluster (used in http://deploy.kubeflow.cloud/)\n+ SERVICE_ACCOUNT_EMAIL, // The email address for the service account you created @.iam.gserviceaccount.com\n+ SERVICE_ACCOUNT_KEY, // Should be a JSON file downloaded from cloud console service accounts\n+} = process.env;\n+\n+['KF_HOST', 'CLIENT_ID', 'SERVICE_ACCOUNT_EMAIL', 'SERVICE_ACCOUNT_KEY'].forEach(envVar => {\n+ if (!process.env[envVar]) {console.log(`${envVar} environment variable must be set`);process.exit(1);}\n+});\n+if (!/^https?:\\/\\/\\S+/.test(KF_HOST)) {console.log('Invalid HOST url provided, must be like http*://*');process.exit(1);}", - "comment_created_at": "2020-02-20T23:31:51+00:00", - "comment_author": "avdaredevil", - "comment_body": "Right, usually clusters redirect, so I allowed users to pass in `http` or `https`, but I've changed the log statement AND regex to simply only allow `https`", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/kubeflow-enforce-least-privilege.json b/_reviewers/kubeflow-enforce-least-privilege.json new file mode 100644 index 0000000..5e9cc48 --- /dev/null +++ b/_reviewers/kubeflow-enforce-least-privilege.json @@ -0,0 +1,130 @@ +[ + { + "discussion_id": "2031568051", + "pr_number": 7710, + "pr_file": "components/admission-webhook/manifests/base/deployment.yaml", + "created_at": "2025-04-07T16:04:48+00:00", + "commented_code": "ports:\n - name: https-webhook\n containerPort: 4443\n securityContext:\n runAsNonRoot: true\n allowPrivilegeEscalation: false\n runAsUser: 1000", + "repo_full_name": "kubeflow/kubeflow", + "discussion_comments": [ + { + "comment_id": "2031568051", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 7710, + "pr_file": "components/admission-webhook/manifests/base/deployment.yaml", + "discussion_id": "2031568051", + "commented_code": "@@ -18,6 +18,15 @@ spec:\n ports:\n - name: https-webhook\n containerPort: 4443\n+ securityContext:\n+ runAsNonRoot: true\n+ allowPrivilegeEscalation: false\n+ runAsUser: 1000", + "comment_created_at": "2025-04-07T16:04:48+00:00", + "comment_author": "thesuperzapper", + "comment_body": "Is there any particular reason that `UID: 1000` was chosen, I think its not correct?\r\n\r\nThe Docker image should run as `65532` in most cases:\r\n- https://github.com/kubeflow/kubeflow/blob/v1.10.0/components/admission-webhook/Dockerfile#L22\r\n\r\nAlso, perhaps we should not specify a `runAsUser` in the first place, as we already have `runAsNonRoot`.", + "pr_file_module": null + }, + { + "comment_id": "2032627919", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 7710, + "pr_file": "components/admission-webhook/manifests/base/deployment.yaml", + "discussion_id": "2031568051", + "commented_code": "@@ -18,6 +18,15 @@ spec:\n ports:\n - name: https-webhook\n containerPort: 4443\n+ securityContext:\n+ runAsNonRoot: true\n+ allowPrivilegeEscalation: false\n+ runAsUser: 1000", + "comment_created_at": "2025-04-08T08:01:15+00:00", + "comment_author": "juliusvonkohout", + "comment_body": "So far 1000 works in our tests in https://github.com/kubeflow/manifests/pull/3050. I mean we can of course change it to 65532 if it works and you like that more, but having a proper securitycontext first is a massive improvement and then in a follow up PR we can do refinements.", + "pr_file_module": null + }, + { + "comment_id": "2032772944", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 7710, + "pr_file": "components/admission-webhook/manifests/base/deployment.yaml", + "discussion_id": "2031568051", + "commented_code": "@@ -18,6 +18,15 @@ spec:\n ports:\n - name: https-webhook\n containerPort: 4443\n+ securityContext:\n+ runAsNonRoot: true\n+ allowPrivilegeEscalation: false\n+ runAsUser: 1000", + "comment_created_at": "2025-04-08T09:21:29+00:00", + "comment_author": "juliusvonkohout", + "comment_body": "@akagami-harsh can you check whether 65532 works as well?", + "pr_file_module": null + }, + { + "comment_id": "2032847785", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 7710, + "pr_file": "components/admission-webhook/manifests/base/deployment.yaml", + "discussion_id": "2031568051", + "commented_code": "@@ -18,6 +18,15 @@ spec:\n ports:\n - name: https-webhook\n containerPort: 4443\n+ securityContext:\n+ runAsNonRoot: true\n+ allowPrivilegeEscalation: false\n+ runAsUser: 1000", + "comment_created_at": "2025-04-08T10:03:05+00:00", + "comment_author": "akagami-harsh", + "comment_body": "Yes it works, so changed user to 65532", + "pr_file_module": null + }, + { + "comment_id": "2032891985", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 7710, + "pr_file": "components/admission-webhook/manifests/base/deployment.yaml", + "discussion_id": "2031568051", + "commented_code": "@@ -18,6 +18,15 @@ spec:\n ports:\n - name: https-webhook\n containerPort: 4443\n+ securityContext:\n+ runAsNonRoot: true\n+ allowPrivilegeEscalation: false\n+ runAsUser: 1000", + "comment_created_at": "2025-04-08T10:28:41+00:00", + "comment_author": "akagami-harsh", + "comment_body": "https://github.com/kubeflow/manifests/actions/runs/14330485734/job/40165118566?pr=3050", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "798028619", + "pr_number": 6241, + "pr_file": "components/crud-web-apps/jupyter/manifests/base/cluster-role.yaml", + "created_at": "2022-02-02T21:30:29+00:00", + "commented_code": "metadata:\n name: cluster-role\nrules:\n- apiGroups:\n - \"\"\n resources:\n - namespaces\n verbs:\n - get\n - list", + "repo_full_name": "kubeflow/kubeflow", + "discussion_comments": [ + { + "comment_id": "798028619", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 6241, + "pr_file": "components/crud-web-apps/jupyter/manifests/base/cluster-role.yaml", + "discussion_id": "798028619", + "commented_code": "@@ -3,15 +3,6 @@ kind: ClusterRole\n metadata:\n name: cluster-role\n rules:\n-- apiGroups:\n- - \"\"\n- resources:\n- - namespaces\n- verbs:\n- - get\n- - list", + "comment_created_at": "2022-02-02T21:30:29+00:00", + "comment_author": "thesuperzapper", + "comment_body": "@juliusvonkohout Can you confirm that the jupyter-web-app still functions correctly without \"get\" and \"list\" on namespaces?", + "pr_file_module": null + }, + { + "comment_id": "799020047", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 6241, + "pr_file": "components/crud-web-apps/jupyter/manifests/base/cluster-role.yaml", + "discussion_id": "798028619", + "commented_code": "@@ -3,15 +3,6 @@ kind: ClusterRole\n metadata:\n name: cluster-role\n rules:\n-- apiGroups:\n- - \"\"\n- resources:\n- - namespaces\n- verbs:\n- - get\n- - list", + "comment_created_at": "2022-02-03T22:25:50+00:00", + "comment_author": "juliusvonkohout", + "comment_body": "That seems to be the case. I just updated the roles, restarted jupyter-web-app and notebook-controller and was able to start, stop, create, delete jupyterlabs. There are also no errors in the logs.\r\n\r\n[jupyter-web-app-deployment-7f7f55f9d8-slb8k-jupyter-web-app.log](https://github.com/kubeflow/kubeflow/files/7998674/jupyter-web-app-deployment-7f7f55f9d8-slb8k-jupyter-web-app.log)\r\n\r\n", + "pr_file_module": null + }, + { + "comment_id": "799021186", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 6241, + "pr_file": "components/crud-web-apps/jupyter/manifests/base/cluster-role.yaml", + "discussion_id": "798028619", + "commented_code": "@@ -3,15 +3,6 @@ kind: ClusterRole\n metadata:\n name: cluster-role\n rules:\n-- apiGroups:\n- - \"\"\n- resources:\n- - namespaces\n- verbs:\n- - get\n- - list", + "comment_created_at": "2022-02-03T22:27:42+00:00", + "comment_author": "juliusvonkohout", + "comment_body": "Is there more to test or it is ready to be merged?", + "pr_file_module": null + }, + { + "comment_id": "799369886", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 6241, + "pr_file": "components/crud-web-apps/jupyter/manifests/base/cluster-role.yaml", + "discussion_id": "798028619", + "commented_code": "@@ -3,15 +3,6 @@ kind: ClusterRole\n metadata:\n name: cluster-role\n rules:\n-- apiGroups:\n- - \"\"\n- resources:\n- - namespaces\n- verbs:\n- - get\n- - list", + "comment_created_at": "2022-02-04T11:05:40+00:00", + "comment_author": "kimwnasptd", + "comment_body": "Should be good to go. It would also be a bug from the current code if it couldn't operate without listing namespaces, since it shouldn't need these permissions in the first place.\r\n\r\nNote that we might need to re-introduce them though, in a future iteration when we would want the apps to be self-standing. But we need to discuss other items for this effort. But I'm also mentioning this context to keep in mind", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/kubeflow-enforce-least-privilege.md b/_reviewers/kubeflow-enforce-least-privilege.md index 3673580..3349630 100644 --- a/_reviewers/kubeflow-enforce-least-privilege.md +++ b/_reviewers/kubeflow-enforce-least-privilege.md @@ -20,135 +20,3 @@ securityContext: ``` For RBAC permissions, regularly audit and remove unnecessary permissions. Test thoroughly after removing permissions to ensure the application still functions correctly. When introducing new permissions, document their purpose and limit their scope as much as possible. Following this principle reduces attack surface and minimizes potential damage from compromised components. - - -[ - { - "discussion_id": "2031568051", - "pr_number": 7710, - "pr_file": "components/admission-webhook/manifests/base/deployment.yaml", - "created_at": "2025-04-07T16:04:48+00:00", - "commented_code": "ports:\n - name: https-webhook\n containerPort: 4443\n securityContext:\n runAsNonRoot: true\n allowPrivilegeEscalation: false\n runAsUser: 1000", - "repo_full_name": "kubeflow/kubeflow", - "discussion_comments": [ - { - "comment_id": "2031568051", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 7710, - "pr_file": "components/admission-webhook/manifests/base/deployment.yaml", - "discussion_id": "2031568051", - "commented_code": "@@ -18,6 +18,15 @@ spec:\n ports:\n - name: https-webhook\n containerPort: 4443\n+ securityContext:\n+ runAsNonRoot: true\n+ allowPrivilegeEscalation: false\n+ runAsUser: 1000", - "comment_created_at": "2025-04-07T16:04:48+00:00", - "comment_author": "thesuperzapper", - "comment_body": "Is there any particular reason that `UID: 1000` was chosen, I think its not correct?\r\n\r\nThe Docker image should run as `65532` in most cases:\r\n- https://github.com/kubeflow/kubeflow/blob/v1.10.0/components/admission-webhook/Dockerfile#L22\r\n\r\nAlso, perhaps we should not specify a `runAsUser` in the first place, as we already have `runAsNonRoot`.", - "pr_file_module": null - }, - { - "comment_id": "2032627919", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 7710, - "pr_file": "components/admission-webhook/manifests/base/deployment.yaml", - "discussion_id": "2031568051", - "commented_code": "@@ -18,6 +18,15 @@ spec:\n ports:\n - name: https-webhook\n containerPort: 4443\n+ securityContext:\n+ runAsNonRoot: true\n+ allowPrivilegeEscalation: false\n+ runAsUser: 1000", - "comment_created_at": "2025-04-08T08:01:15+00:00", - "comment_author": "juliusvonkohout", - "comment_body": "So far 1000 works in our tests in https://github.com/kubeflow/manifests/pull/3050. I mean we can of course change it to 65532 if it works and you like that more, but having a proper securitycontext first is a massive improvement and then in a follow up PR we can do refinements.", - "pr_file_module": null - }, - { - "comment_id": "2032772944", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 7710, - "pr_file": "components/admission-webhook/manifests/base/deployment.yaml", - "discussion_id": "2031568051", - "commented_code": "@@ -18,6 +18,15 @@ spec:\n ports:\n - name: https-webhook\n containerPort: 4443\n+ securityContext:\n+ runAsNonRoot: true\n+ allowPrivilegeEscalation: false\n+ runAsUser: 1000", - "comment_created_at": "2025-04-08T09:21:29+00:00", - "comment_author": "juliusvonkohout", - "comment_body": "@akagami-harsh can you check whether 65532 works as well?", - "pr_file_module": null - }, - { - "comment_id": "2032847785", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 7710, - "pr_file": "components/admission-webhook/manifests/base/deployment.yaml", - "discussion_id": "2031568051", - "commented_code": "@@ -18,6 +18,15 @@ spec:\n ports:\n - name: https-webhook\n containerPort: 4443\n+ securityContext:\n+ runAsNonRoot: true\n+ allowPrivilegeEscalation: false\n+ runAsUser: 1000", - "comment_created_at": "2025-04-08T10:03:05+00:00", - "comment_author": "akagami-harsh", - "comment_body": "Yes it works, so changed user to 65532", - "pr_file_module": null - }, - { - "comment_id": "2032891985", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 7710, - "pr_file": "components/admission-webhook/manifests/base/deployment.yaml", - "discussion_id": "2031568051", - "commented_code": "@@ -18,6 +18,15 @@ spec:\n ports:\n - name: https-webhook\n containerPort: 4443\n+ securityContext:\n+ runAsNonRoot: true\n+ allowPrivilegeEscalation: false\n+ runAsUser: 1000", - "comment_created_at": "2025-04-08T10:28:41+00:00", - "comment_author": "akagami-harsh", - "comment_body": "https://github.com/kubeflow/manifests/actions/runs/14330485734/job/40165118566?pr=3050", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "798028619", - "pr_number": 6241, - "pr_file": "components/crud-web-apps/jupyter/manifests/base/cluster-role.yaml", - "created_at": "2022-02-02T21:30:29+00:00", - "commented_code": "metadata:\n name: cluster-role\nrules:\n- apiGroups:\n - \"\"\n resources:\n - namespaces\n verbs:\n - get\n - list", - "repo_full_name": "kubeflow/kubeflow", - "discussion_comments": [ - { - "comment_id": "798028619", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 6241, - "pr_file": "components/crud-web-apps/jupyter/manifests/base/cluster-role.yaml", - "discussion_id": "798028619", - "commented_code": "@@ -3,15 +3,6 @@ kind: ClusterRole\n metadata:\n name: cluster-role\n rules:\n-- apiGroups:\n- - \"\"\n- resources:\n- - namespaces\n- verbs:\n- - get\n- - list", - "comment_created_at": "2022-02-02T21:30:29+00:00", - "comment_author": "thesuperzapper", - "comment_body": "@juliusvonkohout Can you confirm that the jupyter-web-app still functions correctly without \"get\" and \"list\" on namespaces?", - "pr_file_module": null - }, - { - "comment_id": "799020047", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 6241, - "pr_file": "components/crud-web-apps/jupyter/manifests/base/cluster-role.yaml", - "discussion_id": "798028619", - "commented_code": "@@ -3,15 +3,6 @@ kind: ClusterRole\n metadata:\n name: cluster-role\n rules:\n-- apiGroups:\n- - \"\"\n- resources:\n- - namespaces\n- verbs:\n- - get\n- - list", - "comment_created_at": "2022-02-03T22:25:50+00:00", - "comment_author": "juliusvonkohout", - "comment_body": "That seems to be the case. I just updated the roles, restarted jupyter-web-app and notebook-controller and was able to start, stop, create, delete jupyterlabs. There are also no errors in the logs.\r\n\r\n[jupyter-web-app-deployment-7f7f55f9d8-slb8k-jupyter-web-app.log](https://github.com/kubeflow/kubeflow/files/7998674/jupyter-web-app-deployment-7f7f55f9d8-slb8k-jupyter-web-app.log)\r\n\r\n", - "pr_file_module": null - }, - { - "comment_id": "799021186", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 6241, - "pr_file": "components/crud-web-apps/jupyter/manifests/base/cluster-role.yaml", - "discussion_id": "798028619", - "commented_code": "@@ -3,15 +3,6 @@ kind: ClusterRole\n metadata:\n name: cluster-role\n rules:\n-- apiGroups:\n- - \"\"\n- resources:\n- - namespaces\n- verbs:\n- - get\n- - list", - "comment_created_at": "2022-02-03T22:27:42+00:00", - "comment_author": "juliusvonkohout", - "comment_body": "Is there more to test or it is ready to be merged?", - "pr_file_module": null - }, - { - "comment_id": "799369886", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 6241, - "pr_file": "components/crud-web-apps/jupyter/manifests/base/cluster-role.yaml", - "discussion_id": "798028619", - "commented_code": "@@ -3,15 +3,6 @@ kind: ClusterRole\n metadata:\n name: cluster-role\n rules:\n-- apiGroups:\n- - \"\"\n- resources:\n- - namespaces\n- verbs:\n- - get\n- - list", - "comment_created_at": "2022-02-04T11:05:40+00:00", - "comment_author": "kimwnasptd", - "comment_body": "Should be good to go. It would also be a bug from the current code if it couldn't operate without listing namespaces, since it shouldn't need these permissions in the first place.\r\n\r\nNote that we might need to re-introduce them though, in a future iteration when we would want the apps to be self-standing. But we need to discuss other items for this effort. But I'm also mentioning this context to keep in mind", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/kubeflow-environment-aware-configuration-design.json b/_reviewers/kubeflow-environment-aware-configuration-design.json new file mode 100644 index 0000000..fbfce9a --- /dev/null +++ b/_reviewers/kubeflow-environment-aware-configuration-design.json @@ -0,0 +1,212 @@ +[ + { + "discussion_id": "1313599914", + "pr_number": 7252, + "pr_file": ".github/workflows/centraldb_intergration_test.yaml", + "created_at": "2023-09-01T22:33:28+00:00", + "commented_code": "- name: Build CentralDashboard Image\n run: |\n TAG=${{env.TAG}}", + "repo_full_name": "kubeflow/kubeflow", + "discussion_comments": [ + { + "comment_id": "1313599914", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 7252, + "pr_file": ".github/workflows/centraldb_intergration_test.yaml", + "discussion_id": "1313599914", + "commented_code": "@@ -26,6 +30,7 @@ jobs:\n \n - name: Build CentralDashboard Image\n run: |\n+ TAG=${{env.TAG}}", + "comment_created_at": "2023-09-01T22:33:28+00:00", + "comment_author": "thesuperzapper", + "comment_body": "I don't understand what this is doing.", + "pr_file_module": null + }, + { + "comment_id": "1313938139", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 7252, + "pr_file": ".github/workflows/centraldb_intergration_test.yaml", + "discussion_id": "1313599914", + "commented_code": "@@ -26,6 +30,7 @@ jobs:\n \n - name: Build CentralDashboard Image\n run: |\n+ TAG=${{env.TAG}}", + "comment_created_at": "2023-09-02T18:25:18+00:00", + "comment_author": "kimwnasptd", + "comment_body": "It makes sure the image build, and stored in docker, will use the TAG defined in the `env` and not the default one which is\r\nhttps://github.com/kubeflow/kubeflow/blob/master/components/centraldashboard/Makefile#L2", + "pr_file_module": null + }, + { + "comment_id": "1316535596", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 7252, + "pr_file": ".github/workflows/centraldb_intergration_test.yaml", + "discussion_id": "1313599914", + "commented_code": "@@ -26,6 +30,7 @@ jobs:\n \n - name: Build CentralDashboard Image\n run: |\n+ TAG=${{env.TAG}}", + "comment_created_at": "2023-09-06T00:01:47+00:00", + "comment_author": "thesuperzapper", + "comment_body": "I meant that this line is redundant because all job steps get all the global `env` exported into them.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1313601818", + "pr_number": 7252, + "pr_file": ".github/workflows/centraldb_intergration_test.yaml", + "created_at": "2023-09-01T22:34:49+00:00", + "commented_code": "cd components/centraldashboard/manifests\n kubectl create ns kubeflow\n\n export CURRENT_CENTRALDB_IMG=docker.io/kubeflownotebookswg/centraldashboard:latest\n export PR_CENTRALDB_IMG=${{env.IMG}}:${{env.TAG}}\n kustomize build overlays/kserve | kubectl apply -f -\n\n DEPLOYMENT=centraldashboard\n CONTAINER=centraldashboard\n\n IMG=${{env.IMG}}:${{env.TAG}}\n kubectl patch deployment $DEPLOYMENT -n kubeflow --patch \\\n '{\"spec\": {\"template\": {\"spec\": {\"containers\": [{\"name\": \"'\"$CONTAINER\"'\",\"image\": \"'\"$IMG\"'\"}]}}}}'", + "repo_full_name": "kubeflow/kubeflow", + "discussion_comments": [ + { + "comment_id": "1313601818", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 7252, + "pr_file": ".github/workflows/centraldb_intergration_test.yaml", + "discussion_id": "1313601818", + "commented_code": "@@ -51,8 +56,13 @@ jobs:\n cd components/centraldashboard/manifests\n kubectl create ns kubeflow\n \n- export CURRENT_CENTRALDB_IMG=docker.io/kubeflownotebookswg/centraldashboard:latest\n- export PR_CENTRALDB_IMG=${{env.IMG}}:${{env.TAG}}\n+ kustomize build overlays/kserve | kubectl apply -f -\n+\n+ DEPLOYMENT=centraldashboard\n+ CONTAINER=centraldashboard\n+\n+ IMG=${{env.IMG}}:${{env.TAG}}\n+ kubectl patch deployment $DEPLOYMENT -n kubeflow --patch \\\n+ '{\"spec\": {\"template\": {\"spec\": {\"containers\": [{\"name\": \"'\"$CONTAINER\"'\",\"image\": \"'\"$IMG\"'\"}]}}}}'", + "comment_created_at": "2023-09-01T22:34:49+00:00", + "comment_author": "thesuperzapper", + "comment_body": "@kimwnasptd I don't like that this applies the manifests and then patches them after, this will result in a generation of the pod that fails to run, quickly followed by one with the correct image.\r\n\r\nI don't understand what was not working with the old `sed` approach, which modified the manifests before applying them?", + "pr_file_module": null + }, + { + "comment_id": "1313941985", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 7252, + "pr_file": ".github/workflows/centraldb_intergration_test.yaml", + "discussion_id": "1313601818", + "commented_code": "@@ -51,8 +56,13 @@ jobs:\n cd components/centraldashboard/manifests\n kubectl create ns kubeflow\n \n- export CURRENT_CENTRALDB_IMG=docker.io/kubeflownotebookswg/centraldashboard:latest\n- export PR_CENTRALDB_IMG=${{env.IMG}}:${{env.TAG}}\n+ kustomize build overlays/kserve | kubectl apply -f -\n+\n+ DEPLOYMENT=centraldashboard\n+ CONTAINER=centraldashboard\n+\n+ IMG=${{env.IMG}}:${{env.TAG}}\n+ kubectl patch deployment $DEPLOYMENT -n kubeflow --patch \\\n+ '{\"spec\": {\"template\": {\"spec\": {\"containers\": [{\"name\": \"'\"$CONTAINER\"'\",\"image\": \"'\"$IMG\"'\"}]}}}}'", + "comment_created_at": "2023-09-02T18:35:54+00:00", + "comment_author": "kimwnasptd", + "comment_body": "> @kimwnasptd I don't like that this applies the manifests and then patches them after, this will result in a generation of the pod that fails to run, quickly followed by one with the correct image.\r\n\r\nWhy is this a hard problem? The deployment will be patched immediately, and we wait for the Deployment object either way\r\n\r\n> I don't understand what was not working with the old sed approach, which modified the manifests before applying them?\r\n\r\nPreviously the flow was:\r\n1. build the manifests\r\n 1. These manifests in `master` would have `latest` tag in images\r\n 2. In a release branch we update the tag, so it would be something like `v1.8.0-rc.0`\r\n3. `sed` was used change the image with tag `latest`, to `${{env.TAG}}`\r\n4. The above worked for `master`, which has tag `latest` but doesn't for release branches\r\n\r\nSo the script works for `master`, because `latest` image tag is used in manifests, but fails in release branches which use a different tag in manifests.", + "pr_file_module": null + }, + { + "comment_id": "1316526987", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 7252, + "pr_file": ".github/workflows/centraldb_intergration_test.yaml", + "discussion_id": "1313601818", + "commented_code": "@@ -51,8 +56,13 @@ jobs:\n cd components/centraldashboard/manifests\n kubectl create ns kubeflow\n \n- export CURRENT_CENTRALDB_IMG=docker.io/kubeflownotebookswg/centraldashboard:latest\n- export PR_CENTRALDB_IMG=${{env.IMG}}:${{env.TAG}}\n+ kustomize build overlays/kserve | kubectl apply -f -\n+\n+ DEPLOYMENT=centraldashboard\n+ CONTAINER=centraldashboard\n+\n+ IMG=${{env.IMG}}:${{env.TAG}}\n+ kubectl patch deployment $DEPLOYMENT -n kubeflow --patch \\\n+ '{\"spec\": {\"template\": {\"spec\": {\"containers\": [{\"name\": \"'\"$CONTAINER\"'\",\"image\": \"'\"$IMG\"'\"}]}}}}'", + "comment_created_at": "2023-09-05T23:50:34+00:00", + "comment_author": "thesuperzapper", + "comment_body": "See https://github.com/kubeflow/kubeflow/pull/7252#issuecomment-1707359973", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "596258697", + "pr_number": 5646, + "pr_file": "components/crud-web-apps/jupyter/backend/apps/common/yaml/spawner_ui_config.yaml", + "created_at": "2021-03-17T17:55:31+00:00", + "commented_code": "image:\n # The container Image for the user's Jupyter Notebook\n # If readonly, this value must be a member of the list below\n value: gcr.io/kubeflow-images-public/tensorflow-1.13.1-notebook-cpu:v0.5.0\n value: public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/jupyter-scipy:master-28af7716\n # The list of available standard container Images\n options:\n - gcr.io/kubeflow-images-public/tensorflow-1.5.1-notebook-cpu:v0.5.0\n - gcr.io/kubeflow-images-public/tensorflow-1.5.1-notebook-gpu:v0.5.0\n - gcr.io/kubeflow-images-public/tensorflow-1.6.0-notebook-cpu:v0.5.0\n - gcr.io/kubeflow-images-public/tensorflow-1.6.0-notebook-gpu:v0.5.0\n - gcr.io/kubeflow-images-public/tensorflow-1.7.0-notebook-cpu:v0.5.0\n - gcr.io/kubeflow-images-public/tensorflow-1.7.0-notebook-gpu:v0.5.0\n - gcr.io/kubeflow-images-public/tensorflow-1.8.0-notebook-cpu:v0.5.0\n - gcr.io/kubeflow-images-public/tensorflow-1.8.0-notebook-gpu:v0.5.0\n - gcr.io/kubeflow-images-public/tensorflow-1.9.0-notebook-cpu:v0.5.0\n - gcr.io/kubeflow-images-public/tensorflow-1.9.0-notebook-gpu:v0.5.0\n - gcr.io/kubeflow-images-public/tensorflow-1.10.1-notebook-cpu:v0.5.0\n - gcr.io/kubeflow-images-public/tensorflow-1.10.1-notebook-gpu:v0.5.0\n - gcr.io/kubeflow-images-public/tensorflow-1.11.0-notebook-cpu:v0.5.0\n - gcr.io/kubeflow-images-public/tensorflow-1.11.0-notebook-gpu:v0.5.0\n - gcr.io/kubeflow-images-public/tensorflow-1.12.0-notebook-cpu:v0.5.0\n - gcr.io/kubeflow-images-public/tensorflow-1.12.0-notebook-gpu:v0.5.0\n - gcr.io/kubeflow-images-public/tensorflow-1.13.1-notebook-cpu:v0.5.0\n - gcr.io/kubeflow-images-public/tensorflow-1.13.1-notebook-gpu:v0.5.0\n - gcr.io/kubeflow-images-public/tensorflow-2.0.0a-notebook-cpu:v0.5.0\n - gcr.io/kubeflow-images-public/tensorflow-2.0.0a-notebook-gpu:v0.5.0\n - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/jupyter-scipy:master-28af7716\n - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/jupyter-pytorch-full:master-28af7716\n - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/jupyter-pytorch-cuda-full:master-28af7716\n - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/jupyter-tensorflow-full:master-28af7716\n - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/jupyter-tensorflow-cuda-full:master-28af7716\n # By default, custom container Images are allowed\n # Uncomment the following line to only enable standard container Images\n readOnly: false\n imageVSCode:\n # The container Image for the user's VS-Code Server\n # If readonly, this value must be a member of the list below\n value: public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/codeserver-python:master-28af7716\n # The list of available standard container Images\n options:\n - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/codeserver-python:master-28af7716\n logo: ''\n # By default, custom container Images are allowed\n # Uncomment the following line to only enable standard container Images\n readOnly: false\n imageRStudio:\n # The container Image for the user's RStudio Server\n # If readonly, this value must be a member of the list below\n value: public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/rstudio:master-28af7716\n # The list of available standard container Images\n options:\n - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/rstudio:master-28af7716\n logo: |-", + "repo_full_name": "kubeflow/kubeflow", + "discussion_comments": [ + { + "comment_id": "596258697", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 5646, + "pr_file": "components/crud-web-apps/jupyter/backend/apps/common/yaml/spawner_ui_config.yaml", + "discussion_id": "596258697", + "commented_code": "@@ -18,29 +18,106 @@ spawnerFormDefaults:\n image:\n # The container Image for the user's Jupyter Notebook\n # If readonly, this value must be a member of the list below\n- value: gcr.io/kubeflow-images-public/tensorflow-1.13.1-notebook-cpu:v0.5.0\n+ value: public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/jupyter-scipy:master-28af7716\n # The list of available standard container Images\n options:\n- - gcr.io/kubeflow-images-public/tensorflow-1.5.1-notebook-cpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.5.1-notebook-gpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.6.0-notebook-cpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.6.0-notebook-gpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.7.0-notebook-cpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.7.0-notebook-gpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.8.0-notebook-cpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.8.0-notebook-gpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.9.0-notebook-cpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.9.0-notebook-gpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.10.1-notebook-cpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.10.1-notebook-gpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.11.0-notebook-cpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.11.0-notebook-gpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.12.0-notebook-cpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.12.0-notebook-gpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.13.1-notebook-cpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.13.1-notebook-gpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-2.0.0a-notebook-cpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-2.0.0a-notebook-gpu:v0.5.0\n+ - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/jupyter-scipy:master-28af7716\n+ - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/jupyter-pytorch-full:master-28af7716\n+ - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/jupyter-pytorch-cuda-full:master-28af7716\n+ - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/jupyter-tensorflow-full:master-28af7716\n+ - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/jupyter-tensorflow-cuda-full:master-28af7716\n+ # By default, custom container Images are allowed\n+ # Uncomment the following line to only enable standard container Images\n+ readOnly: false\n+ imageVSCode:\n+ # The container Image for the user's VS-Code Server\n+ # If readonly, this value must be a member of the list below\n+ value: public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/codeserver-python:master-28af7716\n+ # The list of available standard container Images\n+ options:\n+ - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/codeserver-python:master-28af7716\n+ logo: ''\n+ # By default, custom container Images are allowed\n+ # Uncomment the following line to only enable standard container Images\n+ readOnly: false\n+ imageRStudio:\n+ # The container Image for the user's RStudio Server\n+ # If readonly, this value must be a member of the list below\n+ value: public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/rstudio:master-28af7716\n+ # The list of available standard container Images\n+ options:\n+ - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/rstudio:master-28af7716\n+ logo: |-", + "comment_created_at": "2021-03-17T17:55:31+00:00", + "comment_author": "kimwnasptd", + "comment_body": "Lets remove the `logo` sections in the ConfigMap. As mentioned above they add a lot of boilerplate code that most of the users won't need to configure.", + "pr_file_module": null + }, + { + "comment_id": "596305952", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 5646, + "pr_file": "components/crud-web-apps/jupyter/backend/apps/common/yaml/spawner_ui_config.yaml", + "discussion_id": "596258697", + "commented_code": "@@ -18,29 +18,106 @@ spawnerFormDefaults:\n image:\n # The container Image for the user's Jupyter Notebook\n # If readonly, this value must be a member of the list below\n- value: gcr.io/kubeflow-images-public/tensorflow-1.13.1-notebook-cpu:v0.5.0\n+ value: public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/jupyter-scipy:master-28af7716\n # The list of available standard container Images\n options:\n- - gcr.io/kubeflow-images-public/tensorflow-1.5.1-notebook-cpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.5.1-notebook-gpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.6.0-notebook-cpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.6.0-notebook-gpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.7.0-notebook-cpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.7.0-notebook-gpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.8.0-notebook-cpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.8.0-notebook-gpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.9.0-notebook-cpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.9.0-notebook-gpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.10.1-notebook-cpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.10.1-notebook-gpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.11.0-notebook-cpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.11.0-notebook-gpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.12.0-notebook-cpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.12.0-notebook-gpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.13.1-notebook-cpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.13.1-notebook-gpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-2.0.0a-notebook-cpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-2.0.0a-notebook-gpu:v0.5.0\n+ - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/jupyter-scipy:master-28af7716\n+ - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/jupyter-pytorch-full:master-28af7716\n+ - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/jupyter-pytorch-cuda-full:master-28af7716\n+ - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/jupyter-tensorflow-full:master-28af7716\n+ - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/jupyter-tensorflow-cuda-full:master-28af7716\n+ # By default, custom container Images are allowed\n+ # Uncomment the following line to only enable standard container Images\n+ readOnly: false\n+ imageVSCode:\n+ # The container Image for the user's VS-Code Server\n+ # If readonly, this value must be a member of the list below\n+ value: public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/codeserver-python:master-28af7716\n+ # The list of available standard container Images\n+ options:\n+ - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/codeserver-python:master-28af7716\n+ logo: ''\n+ # By default, custom container Images are allowed\n+ # Uncomment the following line to only enable standard container Images\n+ readOnly: false\n+ imageRStudio:\n+ # The container Image for the user's RStudio Server\n+ # If readonly, this value must be a member of the list below\n+ value: public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/rstudio:master-28af7716\n+ # The list of available standard container Images\n+ options:\n+ - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/rstudio:master-28af7716\n+ logo: |-", + "comment_created_at": "2021-03-17T19:04:33+00:00", + "comment_author": "kimwnasptd", + "comment_body": "@DavidSpek taking your comment https://github.com/kubeflow/kubeflow/pull/5646#issuecomment-801308346 here to have everything coordinated in this place\r\n\r\n> @kimwnasptd Could we do some sort of combination of the things you describe and what is being done now? More specifically, allow the user to set the URLs for the icons in the config map. \r\n\r\nThis is the initial idea I had https://github.com/kubeflow/kubeflow/pull/5646#issuecomment-797521345, although thinking about it more I believe it will be more involved to get it working. In this case we will need to first fetch the yaml [ and the request could fail ] and then inject the urls and use them.\r\n\r\n> Due to the use of trademarks and the applicable guidelines I think it is import to make it very easy for people to remove the logos (preferably without needing to build their own image).\r\n\r\nCould you elaborate a little bit more on this? How does the trademarks and applicable guidelines fit into the picture?\r\n\r\nI agree that we should make it easy for people to swap the logos they prefer, but these users are the minority and most probably won't have a hard time to rebuild the image.", + "pr_file_module": null + }, + { + "comment_id": "596313222", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 5646, + "pr_file": "components/crud-web-apps/jupyter/backend/apps/common/yaml/spawner_ui_config.yaml", + "discussion_id": "596258697", + "commented_code": "@@ -18,29 +18,106 @@ spawnerFormDefaults:\n image:\n # The container Image for the user's Jupyter Notebook\n # If readonly, this value must be a member of the list below\n- value: gcr.io/kubeflow-images-public/tensorflow-1.13.1-notebook-cpu:v0.5.0\n+ value: public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/jupyter-scipy:master-28af7716\n # The list of available standard container Images\n options:\n- - gcr.io/kubeflow-images-public/tensorflow-1.5.1-notebook-cpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.5.1-notebook-gpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.6.0-notebook-cpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.6.0-notebook-gpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.7.0-notebook-cpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.7.0-notebook-gpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.8.0-notebook-cpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.8.0-notebook-gpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.9.0-notebook-cpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.9.0-notebook-gpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.10.1-notebook-cpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.10.1-notebook-gpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.11.0-notebook-cpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.11.0-notebook-gpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.12.0-notebook-cpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.12.0-notebook-gpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.13.1-notebook-cpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.13.1-notebook-gpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-2.0.0a-notebook-cpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-2.0.0a-notebook-gpu:v0.5.0\n+ - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/jupyter-scipy:master-28af7716\n+ - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/jupyter-pytorch-full:master-28af7716\n+ - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/jupyter-pytorch-cuda-full:master-28af7716\n+ - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/jupyter-tensorflow-full:master-28af7716\n+ - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/jupyter-tensorflow-cuda-full:master-28af7716\n+ # By default, custom container Images are allowed\n+ # Uncomment the following line to only enable standard container Images\n+ readOnly: false\n+ imageVSCode:\n+ # The container Image for the user's VS-Code Server\n+ # If readonly, this value must be a member of the list below\n+ value: public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/codeserver-python:master-28af7716\n+ # The list of available standard container Images\n+ options:\n+ - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/codeserver-python:master-28af7716\n+ logo: ''\n+ # By default, custom container Images are allowed\n+ # Uncomment the following line to only enable standard container Images\n+ readOnly: false\n+ imageRStudio:\n+ # The container Image for the user's RStudio Server\n+ # If readonly, this value must be a member of the list below\n+ value: public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/rstudio:master-28af7716\n+ # The list of available standard container Images\n+ options:\n+ - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/rstudio:master-28af7716\n+ logo: |-", + "comment_created_at": "2021-03-17T19:16:09+00:00", + "comment_author": "davidspek", + "comment_body": "This was very briefly discussed on above. However, the RStudio guidelines for using their trademark are very strict (Jupyter is the complete opposite and definitely not a problem, VSCode is still unknown). For RStudio, the only permitted use that could apply is: \"Your project deploys RStudio products in unmodified form supplied by RStudio, PBC **for internal use only**;\". In other words, EKF can probably not use the RStudio logo (you build your own images anyways, but still), and any vendor that offers Kubeflow-as-a-Service (StatCan, though it might pass as internal for now) or provides a distribution. These uses would all need written approval from RStudio to use their logo in the UI. And VSCode is still unknown at this point. \r\nThis is the reason why I would like to have a solution that doesn't require building a new image, but rather just changing a configmap or something similar. This logo issue is something that will also be faced if the Spawner UI becomes more configurable in the future (the admin can create whatever groups of images he wants, and the logos will need to come from somewhere). ", + "pr_file_module": null + }, + { + "comment_id": "596676032", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 5646, + "pr_file": "components/crud-web-apps/jupyter/backend/apps/common/yaml/spawner_ui_config.yaml", + "discussion_id": "596258697", + "commented_code": "@@ -18,29 +18,106 @@ spawnerFormDefaults:\n image:\n # The container Image for the user's Jupyter Notebook\n # If readonly, this value must be a member of the list below\n- value: gcr.io/kubeflow-images-public/tensorflow-1.13.1-notebook-cpu:v0.5.0\n+ value: public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/jupyter-scipy:master-28af7716\n # The list of available standard container Images\n options:\n- - gcr.io/kubeflow-images-public/tensorflow-1.5.1-notebook-cpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.5.1-notebook-gpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.6.0-notebook-cpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.6.0-notebook-gpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.7.0-notebook-cpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.7.0-notebook-gpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.8.0-notebook-cpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.8.0-notebook-gpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.9.0-notebook-cpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.9.0-notebook-gpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.10.1-notebook-cpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.10.1-notebook-gpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.11.0-notebook-cpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.11.0-notebook-gpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.12.0-notebook-cpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.12.0-notebook-gpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.13.1-notebook-cpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.13.1-notebook-gpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-2.0.0a-notebook-cpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-2.0.0a-notebook-gpu:v0.5.0\n+ - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/jupyter-scipy:master-28af7716\n+ - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/jupyter-pytorch-full:master-28af7716\n+ - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/jupyter-pytorch-cuda-full:master-28af7716\n+ - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/jupyter-tensorflow-full:master-28af7716\n+ - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/jupyter-tensorflow-cuda-full:master-28af7716\n+ # By default, custom container Images are allowed\n+ # Uncomment the following line to only enable standard container Images\n+ readOnly: false\n+ imageVSCode:\n+ # The container Image for the user's VS-Code Server\n+ # If readonly, this value must be a member of the list below\n+ value: public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/codeserver-python:master-28af7716\n+ # The list of available standard container Images\n+ options:\n+ - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/codeserver-python:master-28af7716\n+ logo: ''\n+ # By default, custom container Images are allowed\n+ # Uncomment the following line to only enable standard container Images\n+ readOnly: false\n+ imageRStudio:\n+ # The container Image for the user's RStudio Server\n+ # If readonly, this value must be a member of the list below\n+ value: public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/rstudio:master-28af7716\n+ # The list of available standard container Images\n+ options:\n+ - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/rstudio:master-28af7716\n+ logo: |-", + "comment_created_at": "2021-03-18T09:15:59+00:00", + "comment_author": "davidspek", + "comment_body": "@kimwnasptd I think I just thought of the solution for this. If we use links for the logos that direct to the SVGs hosted in this repository, we can swap the SVGs for something else if using the logos turns out to be a problem. I believe if this would the propagate to all running installations as well (if the cache of the original SVG is cleared). Does this make sense?", + "pr_file_module": null + }, + { + "comment_id": "596792020", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 5646, + "pr_file": "components/crud-web-apps/jupyter/backend/apps/common/yaml/spawner_ui_config.yaml", + "discussion_id": "596258697", + "commented_code": "@@ -18,29 +18,106 @@ spawnerFormDefaults:\n image:\n # The container Image for the user's Jupyter Notebook\n # If readonly, this value must be a member of the list below\n- value: gcr.io/kubeflow-images-public/tensorflow-1.13.1-notebook-cpu:v0.5.0\n+ value: public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/jupyter-scipy:master-28af7716\n # The list of available standard container Images\n options:\n- - gcr.io/kubeflow-images-public/tensorflow-1.5.1-notebook-cpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.5.1-notebook-gpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.6.0-notebook-cpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.6.0-notebook-gpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.7.0-notebook-cpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.7.0-notebook-gpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.8.0-notebook-cpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.8.0-notebook-gpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.9.0-notebook-cpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.9.0-notebook-gpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.10.1-notebook-cpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.10.1-notebook-gpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.11.0-notebook-cpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.11.0-notebook-gpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.12.0-notebook-cpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.12.0-notebook-gpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.13.1-notebook-cpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.13.1-notebook-gpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-2.0.0a-notebook-cpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-2.0.0a-notebook-gpu:v0.5.0\n+ - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/jupyter-scipy:master-28af7716\n+ - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/jupyter-pytorch-full:master-28af7716\n+ - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/jupyter-pytorch-cuda-full:master-28af7716\n+ - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/jupyter-tensorflow-full:master-28af7716\n+ - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/jupyter-tensorflow-cuda-full:master-28af7716\n+ # By default, custom container Images are allowed\n+ # Uncomment the following line to only enable standard container Images\n+ readOnly: false\n+ imageVSCode:\n+ # The container Image for the user's VS-Code Server\n+ # If readonly, this value must be a member of the list below\n+ value: public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/codeserver-python:master-28af7716\n+ # The list of available standard container Images\n+ options:\n+ - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/codeserver-python:master-28af7716\n+ logo: ''\n+ # By default, custom container Images are allowed\n+ # Uncomment the following line to only enable standard container Images\n+ readOnly: false\n+ imageRStudio:\n+ # The container Image for the user's RStudio Server\n+ # If readonly, this value must be a member of the list below\n+ value: public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/rstudio:master-28af7716\n+ # The list of available standard container Images\n+ options:\n+ - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/rstudio:master-28af7716\n+ logo: |-", + "comment_created_at": "2021-03-18T11:47:18+00:00", + "comment_author": "kimwnasptd", + "comment_body": "From what I understand the issue is not how the icon will appear to the UI [ via the backend directly, fetched from somewhere public etc ] but the fact that we use this logo in the software. With that I mean that if someone would use the app he would end up seeing the logo, which could imply that this is endorsed from RStudio etc.\r\n\r\nAlso an issue with fetching resources in the UI directly from the public internet is that the app unable will not work in air-gaped environments, where the users would only have access to specific IPs.\r\n\r\n", + "pr_file_module": null + }, + { + "comment_id": "596796051", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 5646, + "pr_file": "components/crud-web-apps/jupyter/backend/apps/common/yaml/spawner_ui_config.yaml", + "discussion_id": "596258697", + "commented_code": "@@ -18,29 +18,106 @@ spawnerFormDefaults:\n image:\n # The container Image for the user's Jupyter Notebook\n # If readonly, this value must be a member of the list below\n- value: gcr.io/kubeflow-images-public/tensorflow-1.13.1-notebook-cpu:v0.5.0\n+ value: public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/jupyter-scipy:master-28af7716\n # The list of available standard container Images\n options:\n- - gcr.io/kubeflow-images-public/tensorflow-1.5.1-notebook-cpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.5.1-notebook-gpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.6.0-notebook-cpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.6.0-notebook-gpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.7.0-notebook-cpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.7.0-notebook-gpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.8.0-notebook-cpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.8.0-notebook-gpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.9.0-notebook-cpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.9.0-notebook-gpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.10.1-notebook-cpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.10.1-notebook-gpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.11.0-notebook-cpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.11.0-notebook-gpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.12.0-notebook-cpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.12.0-notebook-gpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.13.1-notebook-cpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.13.1-notebook-gpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-2.0.0a-notebook-cpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-2.0.0a-notebook-gpu:v0.5.0\n+ - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/jupyter-scipy:master-28af7716\n+ - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/jupyter-pytorch-full:master-28af7716\n+ - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/jupyter-pytorch-cuda-full:master-28af7716\n+ - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/jupyter-tensorflow-full:master-28af7716\n+ - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/jupyter-tensorflow-cuda-full:master-28af7716\n+ # By default, custom container Images are allowed\n+ # Uncomment the following line to only enable standard container Images\n+ readOnly: false\n+ imageVSCode:\n+ # The container Image for the user's VS-Code Server\n+ # If readonly, this value must be a member of the list below\n+ value: public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/codeserver-python:master-28af7716\n+ # The list of available standard container Images\n+ options:\n+ - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/codeserver-python:master-28af7716\n+ logo: ''\n+ # By default, custom container Images are allowed\n+ # Uncomment the following line to only enable standard container Images\n+ readOnly: false\n+ imageRStudio:\n+ # The container Image for the user's RStudio Server\n+ # If readonly, this value must be a member of the list below\n+ value: public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/rstudio:master-28af7716\n+ # The list of available standard container Images\n+ options:\n+ - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/rstudio:master-28af7716\n+ logo: |-", + "comment_created_at": "2021-03-18T11:51:13+00:00", + "comment_author": "kimwnasptd", + "comment_body": "So judging from our current situation, where we are tied by time constraints and the trademark situation would need some communication and tip toeing into what it's acceptable I would propose the following solution to unblock ourselves:\r\n\r\n1. Don't use the logos in this PR. We could use the [group options](https://v8.material.angular.io/components/select/overview#creating-groups-of-options) from Angular material, or just use buttons with text [ would prefer the first approach tbh ] to show the images for the different servers\r\n2. Open an issue about it and involve @castrojo in this. And once we figure out what we are allowed to do with the rstudio trademark we can open a new PR and add this functionality. We've done it once here so this won't be difficult to do in the future\r\n\r\nWDYT?", + "pr_file_module": null + }, + { + "comment_id": "596809742", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 5646, + "pr_file": "components/crud-web-apps/jupyter/backend/apps/common/yaml/spawner_ui_config.yaml", + "discussion_id": "596258697", + "commented_code": "@@ -18,29 +18,106 @@ spawnerFormDefaults:\n image:\n # The container Image for the user's Jupyter Notebook\n # If readonly, this value must be a member of the list below\n- value: gcr.io/kubeflow-images-public/tensorflow-1.13.1-notebook-cpu:v0.5.0\n+ value: public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/jupyter-scipy:master-28af7716\n # The list of available standard container Images\n options:\n- - gcr.io/kubeflow-images-public/tensorflow-1.5.1-notebook-cpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.5.1-notebook-gpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.6.0-notebook-cpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.6.0-notebook-gpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.7.0-notebook-cpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.7.0-notebook-gpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.8.0-notebook-cpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.8.0-notebook-gpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.9.0-notebook-cpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.9.0-notebook-gpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.10.1-notebook-cpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.10.1-notebook-gpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.11.0-notebook-cpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.11.0-notebook-gpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.12.0-notebook-cpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.12.0-notebook-gpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.13.1-notebook-cpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.13.1-notebook-gpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-2.0.0a-notebook-cpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-2.0.0a-notebook-gpu:v0.5.0\n+ - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/jupyter-scipy:master-28af7716\n+ - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/jupyter-pytorch-full:master-28af7716\n+ - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/jupyter-pytorch-cuda-full:master-28af7716\n+ - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/jupyter-tensorflow-full:master-28af7716\n+ - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/jupyter-tensorflow-cuda-full:master-28af7716\n+ # By default, custom container Images are allowed\n+ # Uncomment the following line to only enable standard container Images\n+ readOnly: false\n+ imageVSCode:\n+ # The container Image for the user's VS-Code Server\n+ # If readonly, this value must be a member of the list below\n+ value: public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/codeserver-python:master-28af7716\n+ # The list of available standard container Images\n+ options:\n+ - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/codeserver-python:master-28af7716\n+ logo: ''\n+ # By default, custom container Images are allowed\n+ # Uncomment the following line to only enable standard container Images\n+ readOnly: false\n+ imageRStudio:\n+ # The container Image for the user's RStudio Server\n+ # If readonly, this value must be a member of the list below\n+ value: public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/rstudio:master-28af7716\n+ # The list of available standard container Images\n+ options:\n+ - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/rstudio:master-28af7716\n+ logo: |-", + "comment_created_at": "2021-03-18T12:10:39+00:00", + "comment_author": "davidspek", + "comment_body": "Pulling it from the repo wouldn't solve your first point, but it does blur the distribution and way the logo is implemented in the software a bit. It could even directly link to the original source of the RStudio SVG. Indeed, for airgaped environment this might pose a slight problem. However, I think that people in that situation could build their own image (and might already be doing so as they would have a private registry as well). Also, I don't think the functionality of the JWA would actually break, the button would just be empty. \n\nI think the other problem with this is decision is who is responsible for the decision to include the logos or not and who would be responsible if RStudio isn't happy. If I'm the person responsible since it is my PR that adds their logo, I'm fine with that (and would prefer this actually). If I can be the person responsible for this, I'd go for adding the logos and remove them if there is a complaint. I think an argument can be made that the logo is being used according to the guidelines, if internal use is interpreted as \"the kubeflow/wg-notebooks developers deploying unmodified RStudio binaries supplied by RStudio to test their functionality in Kubeflow during development\". ", + "pr_file_module": null + }, + { + "comment_id": "596957490", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 5646, + "pr_file": "components/crud-web-apps/jupyter/backend/apps/common/yaml/spawner_ui_config.yaml", + "discussion_id": "596258697", + "commented_code": "@@ -18,29 +18,106 @@ spawnerFormDefaults:\n image:\n # The container Image for the user's Jupyter Notebook\n # If readonly, this value must be a member of the list below\n- value: gcr.io/kubeflow-images-public/tensorflow-1.13.1-notebook-cpu:v0.5.0\n+ value: public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/jupyter-scipy:master-28af7716\n # The list of available standard container Images\n options:\n- - gcr.io/kubeflow-images-public/tensorflow-1.5.1-notebook-cpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.5.1-notebook-gpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.6.0-notebook-cpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.6.0-notebook-gpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.7.0-notebook-cpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.7.0-notebook-gpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.8.0-notebook-cpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.8.0-notebook-gpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.9.0-notebook-cpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.9.0-notebook-gpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.10.1-notebook-cpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.10.1-notebook-gpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.11.0-notebook-cpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.11.0-notebook-gpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.12.0-notebook-cpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.12.0-notebook-gpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.13.1-notebook-cpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.13.1-notebook-gpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-2.0.0a-notebook-cpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-2.0.0a-notebook-gpu:v0.5.0\n+ - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/jupyter-scipy:master-28af7716\n+ - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/jupyter-pytorch-full:master-28af7716\n+ - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/jupyter-pytorch-cuda-full:master-28af7716\n+ - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/jupyter-tensorflow-full:master-28af7716\n+ - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/jupyter-tensorflow-cuda-full:master-28af7716\n+ # By default, custom container Images are allowed\n+ # Uncomment the following line to only enable standard container Images\n+ readOnly: false\n+ imageVSCode:\n+ # The container Image for the user's VS-Code Server\n+ # If readonly, this value must be a member of the list below\n+ value: public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/codeserver-python:master-28af7716\n+ # The list of available standard container Images\n+ options:\n+ - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/codeserver-python:master-28af7716\n+ logo: ''\n+ # By default, custom container Images are allowed\n+ # Uncomment the following line to only enable standard container Images\n+ readOnly: false\n+ imageRStudio:\n+ # The container Image for the user's RStudio Server\n+ # If readonly, this value must be a member of the list below\n+ value: public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/rstudio:master-28af7716\n+ # The list of available standard container Images\n+ options:\n+ - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/rstudio:master-28af7716\n+ logo: |-", + "comment_created_at": "2021-03-18T15:04:52+00:00", + "comment_author": "davidspek", + "comment_body": "I only just saw your second comment. Personally, I would then be more in favor of using the button toggle that is there now, but using an SVG with text in it for VSCode and RStudio. This still makes it rather easy for users to customize if they want the actual logos (which they might be allowed to use in their situation). It also allows us to keep using the Jupyter logo (as we can definitely use that one), and works together with https://github.com/kubeflow/kubeflow/pull/5646#pullrequestreview-614539090 as well (assuming I get this implemented quickly). ", + "pr_file_module": null + }, + { + "comment_id": "596958460", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 5646, + "pr_file": "components/crud-web-apps/jupyter/backend/apps/common/yaml/spawner_ui_config.yaml", + "discussion_id": "596258697", + "commented_code": "@@ -18,29 +18,106 @@ spawnerFormDefaults:\n image:\n # The container Image for the user's Jupyter Notebook\n # If readonly, this value must be a member of the list below\n- value: gcr.io/kubeflow-images-public/tensorflow-1.13.1-notebook-cpu:v0.5.0\n+ value: public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/jupyter-scipy:master-28af7716\n # The list of available standard container Images\n options:\n- - gcr.io/kubeflow-images-public/tensorflow-1.5.1-notebook-cpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.5.1-notebook-gpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.6.0-notebook-cpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.6.0-notebook-gpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.7.0-notebook-cpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.7.0-notebook-gpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.8.0-notebook-cpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.8.0-notebook-gpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.9.0-notebook-cpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.9.0-notebook-gpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.10.1-notebook-cpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.10.1-notebook-gpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.11.0-notebook-cpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.11.0-notebook-gpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.12.0-notebook-cpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.12.0-notebook-gpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.13.1-notebook-cpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.13.1-notebook-gpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-2.0.0a-notebook-cpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-2.0.0a-notebook-gpu:v0.5.0\n+ - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/jupyter-scipy:master-28af7716\n+ - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/jupyter-pytorch-full:master-28af7716\n+ - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/jupyter-pytorch-cuda-full:master-28af7716\n+ - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/jupyter-tensorflow-full:master-28af7716\n+ - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/jupyter-tensorflow-cuda-full:master-28af7716\n+ # By default, custom container Images are allowed\n+ # Uncomment the following line to only enable standard container Images\n+ readOnly: false\n+ imageVSCode:\n+ # The container Image for the user's VS-Code Server\n+ # If readonly, this value must be a member of the list below\n+ value: public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/codeserver-python:master-28af7716\n+ # The list of available standard container Images\n+ options:\n+ - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/codeserver-python:master-28af7716\n+ logo: ''\n+ # By default, custom container Images are allowed\n+ # Uncomment the following line to only enable standard container Images\n+ readOnly: false\n+ imageRStudio:\n+ # The container Image for the user's RStudio Server\n+ # If readonly, this value must be a member of the list below\n+ value: public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/rstudio:master-28af7716\n+ # The list of available standard container Images\n+ options:\n+ - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/rstudio:master-28af7716\n+ logo: |-", + "comment_created_at": "2021-03-18T15:05:54+00:00", + "comment_author": "kimwnasptd", + "comment_body": "Let's go with using the svgs. We'll use links to fetch them from where they publicly host them. Distributions could make a small change and add their svgs to the app and simply change these links, which is something that we were OK with from the beginning. Lets just add a small section for this in the app's README.\r\n\r\nRegarding the responsibility part, I'm not a layer on this so not an expert but we are a team and all of us move forward together. We are not going to point fingers to people if things go south. And also since I'm a reviewer and agreed to move forward with this so I'm also responsible.\r\n\r\nBut if indeed people complain about the usage then we will sincerely apologize and just change this to something like we discussed above. ", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/kubeflow-environment-aware-configuration-design.md b/_reviewers/kubeflow-environment-aware-configuration-design.md index 7f347fd..772deaf 100644 --- a/_reviewers/kubeflow-environment-aware-configuration-design.md +++ b/_reviewers/kubeflow-environment-aware-configuration-design.md @@ -42,217 +42,3 @@ imageOptions: - source: internal # For air-gapped environments - source: external # For internet-connected environments ``` - - -[ - { - "discussion_id": "1313599914", - "pr_number": 7252, - "pr_file": ".github/workflows/centraldb_intergration_test.yaml", - "created_at": "2023-09-01T22:33:28+00:00", - "commented_code": "- name: Build CentralDashboard Image\n run: |\n TAG=${{env.TAG}}", - "repo_full_name": "kubeflow/kubeflow", - "discussion_comments": [ - { - "comment_id": "1313599914", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 7252, - "pr_file": ".github/workflows/centraldb_intergration_test.yaml", - "discussion_id": "1313599914", - "commented_code": "@@ -26,6 +30,7 @@ jobs:\n \n - name: Build CentralDashboard Image\n run: |\n+ TAG=${{env.TAG}}", - "comment_created_at": "2023-09-01T22:33:28+00:00", - "comment_author": "thesuperzapper", - "comment_body": "I don't understand what this is doing.", - "pr_file_module": null - }, - { - "comment_id": "1313938139", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 7252, - "pr_file": ".github/workflows/centraldb_intergration_test.yaml", - "discussion_id": "1313599914", - "commented_code": "@@ -26,6 +30,7 @@ jobs:\n \n - name: Build CentralDashboard Image\n run: |\n+ TAG=${{env.TAG}}", - "comment_created_at": "2023-09-02T18:25:18+00:00", - "comment_author": "kimwnasptd", - "comment_body": "It makes sure the image build, and stored in docker, will use the TAG defined in the `env` and not the default one which is\r\nhttps://github.com/kubeflow/kubeflow/blob/master/components/centraldashboard/Makefile#L2", - "pr_file_module": null - }, - { - "comment_id": "1316535596", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 7252, - "pr_file": ".github/workflows/centraldb_intergration_test.yaml", - "discussion_id": "1313599914", - "commented_code": "@@ -26,6 +30,7 @@ jobs:\n \n - name: Build CentralDashboard Image\n run: |\n+ TAG=${{env.TAG}}", - "comment_created_at": "2023-09-06T00:01:47+00:00", - "comment_author": "thesuperzapper", - "comment_body": "I meant that this line is redundant because all job steps get all the global `env` exported into them.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1313601818", - "pr_number": 7252, - "pr_file": ".github/workflows/centraldb_intergration_test.yaml", - "created_at": "2023-09-01T22:34:49+00:00", - "commented_code": "cd components/centraldashboard/manifests\n kubectl create ns kubeflow\n\n export CURRENT_CENTRALDB_IMG=docker.io/kubeflownotebookswg/centraldashboard:latest\n export PR_CENTRALDB_IMG=${{env.IMG}}:${{env.TAG}}\n kustomize build overlays/kserve | kubectl apply -f -\n\n DEPLOYMENT=centraldashboard\n CONTAINER=centraldashboard\n\n IMG=${{env.IMG}}:${{env.TAG}}\n kubectl patch deployment $DEPLOYMENT -n kubeflow --patch \\\n '{\"spec\": {\"template\": {\"spec\": {\"containers\": [{\"name\": \"'\"$CONTAINER\"'\",\"image\": \"'\"$IMG\"'\"}]}}}}'", - "repo_full_name": "kubeflow/kubeflow", - "discussion_comments": [ - { - "comment_id": "1313601818", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 7252, - "pr_file": ".github/workflows/centraldb_intergration_test.yaml", - "discussion_id": "1313601818", - "commented_code": "@@ -51,8 +56,13 @@ jobs:\n cd components/centraldashboard/manifests\n kubectl create ns kubeflow\n \n- export CURRENT_CENTRALDB_IMG=docker.io/kubeflownotebookswg/centraldashboard:latest\n- export PR_CENTRALDB_IMG=${{env.IMG}}:${{env.TAG}}\n+ kustomize build overlays/kserve | kubectl apply -f -\n+\n+ DEPLOYMENT=centraldashboard\n+ CONTAINER=centraldashboard\n+\n+ IMG=${{env.IMG}}:${{env.TAG}}\n+ kubectl patch deployment $DEPLOYMENT -n kubeflow --patch \\\n+ '{\"spec\": {\"template\": {\"spec\": {\"containers\": [{\"name\": \"'\"$CONTAINER\"'\",\"image\": \"'\"$IMG\"'\"}]}}}}'", - "comment_created_at": "2023-09-01T22:34:49+00:00", - "comment_author": "thesuperzapper", - "comment_body": "@kimwnasptd I don't like that this applies the manifests and then patches them after, this will result in a generation of the pod that fails to run, quickly followed by one with the correct image.\r\n\r\nI don't understand what was not working with the old `sed` approach, which modified the manifests before applying them?", - "pr_file_module": null - }, - { - "comment_id": "1313941985", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 7252, - "pr_file": ".github/workflows/centraldb_intergration_test.yaml", - "discussion_id": "1313601818", - "commented_code": "@@ -51,8 +56,13 @@ jobs:\n cd components/centraldashboard/manifests\n kubectl create ns kubeflow\n \n- export CURRENT_CENTRALDB_IMG=docker.io/kubeflownotebookswg/centraldashboard:latest\n- export PR_CENTRALDB_IMG=${{env.IMG}}:${{env.TAG}}\n+ kustomize build overlays/kserve | kubectl apply -f -\n+\n+ DEPLOYMENT=centraldashboard\n+ CONTAINER=centraldashboard\n+\n+ IMG=${{env.IMG}}:${{env.TAG}}\n+ kubectl patch deployment $DEPLOYMENT -n kubeflow --patch \\\n+ '{\"spec\": {\"template\": {\"spec\": {\"containers\": [{\"name\": \"'\"$CONTAINER\"'\",\"image\": \"'\"$IMG\"'\"}]}}}}'", - "comment_created_at": "2023-09-02T18:35:54+00:00", - "comment_author": "kimwnasptd", - "comment_body": "> @kimwnasptd I don't like that this applies the manifests and then patches them after, this will result in a generation of the pod that fails to run, quickly followed by one with the correct image.\r\n\r\nWhy is this a hard problem? The deployment will be patched immediately, and we wait for the Deployment object either way\r\n\r\n> I don't understand what was not working with the old sed approach, which modified the manifests before applying them?\r\n\r\nPreviously the flow was:\r\n1. build the manifests\r\n 1. These manifests in `master` would have `latest` tag in images\r\n 2. In a release branch we update the tag, so it would be something like `v1.8.0-rc.0`\r\n3. `sed` was used change the image with tag `latest`, to `${{env.TAG}}`\r\n4. The above worked for `master`, which has tag `latest` but doesn't for release branches\r\n\r\nSo the script works for `master`, because `latest` image tag is used in manifests, but fails in release branches which use a different tag in manifests.", - "pr_file_module": null - }, - { - "comment_id": "1316526987", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 7252, - "pr_file": ".github/workflows/centraldb_intergration_test.yaml", - "discussion_id": "1313601818", - "commented_code": "@@ -51,8 +56,13 @@ jobs:\n cd components/centraldashboard/manifests\n kubectl create ns kubeflow\n \n- export CURRENT_CENTRALDB_IMG=docker.io/kubeflownotebookswg/centraldashboard:latest\n- export PR_CENTRALDB_IMG=${{env.IMG}}:${{env.TAG}}\n+ kustomize build overlays/kserve | kubectl apply -f -\n+\n+ DEPLOYMENT=centraldashboard\n+ CONTAINER=centraldashboard\n+\n+ IMG=${{env.IMG}}:${{env.TAG}}\n+ kubectl patch deployment $DEPLOYMENT -n kubeflow --patch \\\n+ '{\"spec\": {\"template\": {\"spec\": {\"containers\": [{\"name\": \"'\"$CONTAINER\"'\",\"image\": \"'\"$IMG\"'\"}]}}}}'", - "comment_created_at": "2023-09-05T23:50:34+00:00", - "comment_author": "thesuperzapper", - "comment_body": "See https://github.com/kubeflow/kubeflow/pull/7252#issuecomment-1707359973", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "596258697", - "pr_number": 5646, - "pr_file": "components/crud-web-apps/jupyter/backend/apps/common/yaml/spawner_ui_config.yaml", - "created_at": "2021-03-17T17:55:31+00:00", - "commented_code": "image:\n # The container Image for the user's Jupyter Notebook\n # If readonly, this value must be a member of the list below\n value: gcr.io/kubeflow-images-public/tensorflow-1.13.1-notebook-cpu:v0.5.0\n value: public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/jupyter-scipy:master-28af7716\n # The list of available standard container Images\n options:\n - gcr.io/kubeflow-images-public/tensorflow-1.5.1-notebook-cpu:v0.5.0\n - gcr.io/kubeflow-images-public/tensorflow-1.5.1-notebook-gpu:v0.5.0\n - gcr.io/kubeflow-images-public/tensorflow-1.6.0-notebook-cpu:v0.5.0\n - gcr.io/kubeflow-images-public/tensorflow-1.6.0-notebook-gpu:v0.5.0\n - gcr.io/kubeflow-images-public/tensorflow-1.7.0-notebook-cpu:v0.5.0\n - gcr.io/kubeflow-images-public/tensorflow-1.7.0-notebook-gpu:v0.5.0\n - gcr.io/kubeflow-images-public/tensorflow-1.8.0-notebook-cpu:v0.5.0\n - gcr.io/kubeflow-images-public/tensorflow-1.8.0-notebook-gpu:v0.5.0\n - gcr.io/kubeflow-images-public/tensorflow-1.9.0-notebook-cpu:v0.5.0\n - gcr.io/kubeflow-images-public/tensorflow-1.9.0-notebook-gpu:v0.5.0\n - gcr.io/kubeflow-images-public/tensorflow-1.10.1-notebook-cpu:v0.5.0\n - gcr.io/kubeflow-images-public/tensorflow-1.10.1-notebook-gpu:v0.5.0\n - gcr.io/kubeflow-images-public/tensorflow-1.11.0-notebook-cpu:v0.5.0\n - gcr.io/kubeflow-images-public/tensorflow-1.11.0-notebook-gpu:v0.5.0\n - gcr.io/kubeflow-images-public/tensorflow-1.12.0-notebook-cpu:v0.5.0\n - gcr.io/kubeflow-images-public/tensorflow-1.12.0-notebook-gpu:v0.5.0\n - gcr.io/kubeflow-images-public/tensorflow-1.13.1-notebook-cpu:v0.5.0\n - gcr.io/kubeflow-images-public/tensorflow-1.13.1-notebook-gpu:v0.5.0\n - gcr.io/kubeflow-images-public/tensorflow-2.0.0a-notebook-cpu:v0.5.0\n - gcr.io/kubeflow-images-public/tensorflow-2.0.0a-notebook-gpu:v0.5.0\n - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/jupyter-scipy:master-28af7716\n - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/jupyter-pytorch-full:master-28af7716\n - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/jupyter-pytorch-cuda-full:master-28af7716\n - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/jupyter-tensorflow-full:master-28af7716\n - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/jupyter-tensorflow-cuda-full:master-28af7716\n # By default, custom container Images are allowed\n # Uncomment the following line to only enable standard container Images\n readOnly: false\n imageVSCode:\n # The container Image for the user's VS-Code Server\n # If readonly, this value must be a member of the list below\n value: public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/codeserver-python:master-28af7716\n # The list of available standard container Images\n options:\n - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/codeserver-python:master-28af7716\n logo: ''\n # By default, custom container Images are allowed\n # Uncomment the following line to only enable standard container Images\n readOnly: false\n imageRStudio:\n # The container Image for the user's RStudio Server\n # If readonly, this value must be a member of the list below\n value: public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/rstudio:master-28af7716\n # The list of available standard container Images\n options:\n - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/rstudio:master-28af7716\n logo: |-", - "repo_full_name": "kubeflow/kubeflow", - "discussion_comments": [ - { - "comment_id": "596258697", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 5646, - "pr_file": "components/crud-web-apps/jupyter/backend/apps/common/yaml/spawner_ui_config.yaml", - "discussion_id": "596258697", - "commented_code": "@@ -18,29 +18,106 @@ spawnerFormDefaults:\n image:\n # The container Image for the user's Jupyter Notebook\n # If readonly, this value must be a member of the list below\n- value: gcr.io/kubeflow-images-public/tensorflow-1.13.1-notebook-cpu:v0.5.0\n+ value: public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/jupyter-scipy:master-28af7716\n # The list of available standard container Images\n options:\n- - gcr.io/kubeflow-images-public/tensorflow-1.5.1-notebook-cpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.5.1-notebook-gpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.6.0-notebook-cpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.6.0-notebook-gpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.7.0-notebook-cpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.7.0-notebook-gpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.8.0-notebook-cpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.8.0-notebook-gpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.9.0-notebook-cpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.9.0-notebook-gpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.10.1-notebook-cpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.10.1-notebook-gpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.11.0-notebook-cpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.11.0-notebook-gpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.12.0-notebook-cpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.12.0-notebook-gpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.13.1-notebook-cpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.13.1-notebook-gpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-2.0.0a-notebook-cpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-2.0.0a-notebook-gpu:v0.5.0\n+ - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/jupyter-scipy:master-28af7716\n+ - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/jupyter-pytorch-full:master-28af7716\n+ - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/jupyter-pytorch-cuda-full:master-28af7716\n+ - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/jupyter-tensorflow-full:master-28af7716\n+ - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/jupyter-tensorflow-cuda-full:master-28af7716\n+ # By default, custom container Images are allowed\n+ # Uncomment the following line to only enable standard container Images\n+ readOnly: false\n+ imageVSCode:\n+ # The container Image for the user's VS-Code Server\n+ # If readonly, this value must be a member of the list below\n+ value: public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/codeserver-python:master-28af7716\n+ # The list of available standard container Images\n+ options:\n+ - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/codeserver-python:master-28af7716\n+ logo: ''\n+ # By default, custom container Images are allowed\n+ # Uncomment the following line to only enable standard container Images\n+ readOnly: false\n+ imageRStudio:\n+ # The container Image for the user's RStudio Server\n+ # If readonly, this value must be a member of the list below\n+ value: public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/rstudio:master-28af7716\n+ # The list of available standard container Images\n+ options:\n+ - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/rstudio:master-28af7716\n+ logo: |-", - "comment_created_at": "2021-03-17T17:55:31+00:00", - "comment_author": "kimwnasptd", - "comment_body": "Lets remove the `logo` sections in the ConfigMap. As mentioned above they add a lot of boilerplate code that most of the users won't need to configure.", - "pr_file_module": null - }, - { - "comment_id": "596305952", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 5646, - "pr_file": "components/crud-web-apps/jupyter/backend/apps/common/yaml/spawner_ui_config.yaml", - "discussion_id": "596258697", - "commented_code": "@@ -18,29 +18,106 @@ spawnerFormDefaults:\n image:\n # The container Image for the user's Jupyter Notebook\n # If readonly, this value must be a member of the list below\n- value: gcr.io/kubeflow-images-public/tensorflow-1.13.1-notebook-cpu:v0.5.0\n+ value: public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/jupyter-scipy:master-28af7716\n # The list of available standard container Images\n options:\n- - gcr.io/kubeflow-images-public/tensorflow-1.5.1-notebook-cpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.5.1-notebook-gpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.6.0-notebook-cpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.6.0-notebook-gpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.7.0-notebook-cpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.7.0-notebook-gpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.8.0-notebook-cpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.8.0-notebook-gpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.9.0-notebook-cpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.9.0-notebook-gpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.10.1-notebook-cpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.10.1-notebook-gpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.11.0-notebook-cpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.11.0-notebook-gpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.12.0-notebook-cpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.12.0-notebook-gpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.13.1-notebook-cpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.13.1-notebook-gpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-2.0.0a-notebook-cpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-2.0.0a-notebook-gpu:v0.5.0\n+ - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/jupyter-scipy:master-28af7716\n+ - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/jupyter-pytorch-full:master-28af7716\n+ - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/jupyter-pytorch-cuda-full:master-28af7716\n+ - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/jupyter-tensorflow-full:master-28af7716\n+ - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/jupyter-tensorflow-cuda-full:master-28af7716\n+ # By default, custom container Images are allowed\n+ # Uncomment the following line to only enable standard container Images\n+ readOnly: false\n+ imageVSCode:\n+ # The container Image for the user's VS-Code Server\n+ # If readonly, this value must be a member of the list below\n+ value: public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/codeserver-python:master-28af7716\n+ # The list of available standard container Images\n+ options:\n+ - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/codeserver-python:master-28af7716\n+ logo: ''\n+ # By default, custom container Images are allowed\n+ # Uncomment the following line to only enable standard container Images\n+ readOnly: false\n+ imageRStudio:\n+ # The container Image for the user's RStudio Server\n+ # If readonly, this value must be a member of the list below\n+ value: public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/rstudio:master-28af7716\n+ # The list of available standard container Images\n+ options:\n+ - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/rstudio:master-28af7716\n+ logo: |-", - "comment_created_at": "2021-03-17T19:04:33+00:00", - "comment_author": "kimwnasptd", - "comment_body": "@DavidSpek taking your comment https://github.com/kubeflow/kubeflow/pull/5646#issuecomment-801308346 here to have everything coordinated in this place\r\n\r\n> @kimwnasptd Could we do some sort of combination of the things you describe and what is being done now? More specifically, allow the user to set the URLs for the icons in the config map. \r\n\r\nThis is the initial idea I had https://github.com/kubeflow/kubeflow/pull/5646#issuecomment-797521345, although thinking about it more I believe it will be more involved to get it working. In this case we will need to first fetch the yaml [ and the request could fail ] and then inject the urls and use them.\r\n\r\n> Due to the use of trademarks and the applicable guidelines I think it is import to make it very easy for people to remove the logos (preferably without needing to build their own image).\r\n\r\nCould you elaborate a little bit more on this? How does the trademarks and applicable guidelines fit into the picture?\r\n\r\nI agree that we should make it easy for people to swap the logos they prefer, but these users are the minority and most probably won't have a hard time to rebuild the image.", - "pr_file_module": null - }, - { - "comment_id": "596313222", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 5646, - "pr_file": "components/crud-web-apps/jupyter/backend/apps/common/yaml/spawner_ui_config.yaml", - "discussion_id": "596258697", - "commented_code": "@@ -18,29 +18,106 @@ spawnerFormDefaults:\n image:\n # The container Image for the user's Jupyter Notebook\n # If readonly, this value must be a member of the list below\n- value: gcr.io/kubeflow-images-public/tensorflow-1.13.1-notebook-cpu:v0.5.0\n+ value: public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/jupyter-scipy:master-28af7716\n # The list of available standard container Images\n options:\n- - gcr.io/kubeflow-images-public/tensorflow-1.5.1-notebook-cpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.5.1-notebook-gpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.6.0-notebook-cpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.6.0-notebook-gpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.7.0-notebook-cpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.7.0-notebook-gpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.8.0-notebook-cpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.8.0-notebook-gpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.9.0-notebook-cpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.9.0-notebook-gpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.10.1-notebook-cpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.10.1-notebook-gpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.11.0-notebook-cpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.11.0-notebook-gpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.12.0-notebook-cpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.12.0-notebook-gpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.13.1-notebook-cpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.13.1-notebook-gpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-2.0.0a-notebook-cpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-2.0.0a-notebook-gpu:v0.5.0\n+ - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/jupyter-scipy:master-28af7716\n+ - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/jupyter-pytorch-full:master-28af7716\n+ - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/jupyter-pytorch-cuda-full:master-28af7716\n+ - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/jupyter-tensorflow-full:master-28af7716\n+ - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/jupyter-tensorflow-cuda-full:master-28af7716\n+ # By default, custom container Images are allowed\n+ # Uncomment the following line to only enable standard container Images\n+ readOnly: false\n+ imageVSCode:\n+ # The container Image for the user's VS-Code Server\n+ # If readonly, this value must be a member of the list below\n+ value: public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/codeserver-python:master-28af7716\n+ # The list of available standard container Images\n+ options:\n+ - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/codeserver-python:master-28af7716\n+ logo: ''\n+ # By default, custom container Images are allowed\n+ # Uncomment the following line to only enable standard container Images\n+ readOnly: false\n+ imageRStudio:\n+ # The container Image for the user's RStudio Server\n+ # If readonly, this value must be a member of the list below\n+ value: public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/rstudio:master-28af7716\n+ # The list of available standard container Images\n+ options:\n+ - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/rstudio:master-28af7716\n+ logo: |-", - "comment_created_at": "2021-03-17T19:16:09+00:00", - "comment_author": "davidspek", - "comment_body": "This was very briefly discussed on above. However, the RStudio guidelines for using their trademark are very strict (Jupyter is the complete opposite and definitely not a problem, VSCode is still unknown). For RStudio, the only permitted use that could apply is: \"Your project deploys RStudio products in unmodified form supplied by RStudio, PBC **for internal use only**;\". In other words, EKF can probably not use the RStudio logo (you build your own images anyways, but still), and any vendor that offers Kubeflow-as-a-Service (StatCan, though it might pass as internal for now) or provides a distribution. These uses would all need written approval from RStudio to use their logo in the UI. And VSCode is still unknown at this point. \r\nThis is the reason why I would like to have a solution that doesn't require building a new image, but rather just changing a configmap or something similar. This logo issue is something that will also be faced if the Spawner UI becomes more configurable in the future (the admin can create whatever groups of images he wants, and the logos will need to come from somewhere). ", - "pr_file_module": null - }, - { - "comment_id": "596676032", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 5646, - "pr_file": "components/crud-web-apps/jupyter/backend/apps/common/yaml/spawner_ui_config.yaml", - "discussion_id": "596258697", - "commented_code": "@@ -18,29 +18,106 @@ spawnerFormDefaults:\n image:\n # The container Image for the user's Jupyter Notebook\n # If readonly, this value must be a member of the list below\n- value: gcr.io/kubeflow-images-public/tensorflow-1.13.1-notebook-cpu:v0.5.0\n+ value: public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/jupyter-scipy:master-28af7716\n # The list of available standard container Images\n options:\n- - gcr.io/kubeflow-images-public/tensorflow-1.5.1-notebook-cpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.5.1-notebook-gpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.6.0-notebook-cpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.6.0-notebook-gpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.7.0-notebook-cpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.7.0-notebook-gpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.8.0-notebook-cpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.8.0-notebook-gpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.9.0-notebook-cpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.9.0-notebook-gpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.10.1-notebook-cpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.10.1-notebook-gpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.11.0-notebook-cpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.11.0-notebook-gpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.12.0-notebook-cpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.12.0-notebook-gpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.13.1-notebook-cpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.13.1-notebook-gpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-2.0.0a-notebook-cpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-2.0.0a-notebook-gpu:v0.5.0\n+ - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/jupyter-scipy:master-28af7716\n+ - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/jupyter-pytorch-full:master-28af7716\n+ - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/jupyter-pytorch-cuda-full:master-28af7716\n+ - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/jupyter-tensorflow-full:master-28af7716\n+ - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/jupyter-tensorflow-cuda-full:master-28af7716\n+ # By default, custom container Images are allowed\n+ # Uncomment the following line to only enable standard container Images\n+ readOnly: false\n+ imageVSCode:\n+ # The container Image for the user's VS-Code Server\n+ # If readonly, this value must be a member of the list below\n+ value: public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/codeserver-python:master-28af7716\n+ # The list of available standard container Images\n+ options:\n+ - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/codeserver-python:master-28af7716\n+ logo: ''\n+ # By default, custom container Images are allowed\n+ # Uncomment the following line to only enable standard container Images\n+ readOnly: false\n+ imageRStudio:\n+ # The container Image for the user's RStudio Server\n+ # If readonly, this value must be a member of the list below\n+ value: public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/rstudio:master-28af7716\n+ # The list of available standard container Images\n+ options:\n+ - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/rstudio:master-28af7716\n+ logo: |-", - "comment_created_at": "2021-03-18T09:15:59+00:00", - "comment_author": "davidspek", - "comment_body": "@kimwnasptd I think I just thought of the solution for this. If we use links for the logos that direct to the SVGs hosted in this repository, we can swap the SVGs for something else if using the logos turns out to be a problem. I believe if this would the propagate to all running installations as well (if the cache of the original SVG is cleared). Does this make sense?", - "pr_file_module": null - }, - { - "comment_id": "596792020", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 5646, - "pr_file": "components/crud-web-apps/jupyter/backend/apps/common/yaml/spawner_ui_config.yaml", - "discussion_id": "596258697", - "commented_code": "@@ -18,29 +18,106 @@ spawnerFormDefaults:\n image:\n # The container Image for the user's Jupyter Notebook\n # If readonly, this value must be a member of the list below\n- value: gcr.io/kubeflow-images-public/tensorflow-1.13.1-notebook-cpu:v0.5.0\n+ value: public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/jupyter-scipy:master-28af7716\n # The list of available standard container Images\n options:\n- - gcr.io/kubeflow-images-public/tensorflow-1.5.1-notebook-cpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.5.1-notebook-gpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.6.0-notebook-cpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.6.0-notebook-gpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.7.0-notebook-cpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.7.0-notebook-gpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.8.0-notebook-cpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.8.0-notebook-gpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.9.0-notebook-cpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.9.0-notebook-gpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.10.1-notebook-cpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.10.1-notebook-gpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.11.0-notebook-cpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.11.0-notebook-gpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.12.0-notebook-cpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.12.0-notebook-gpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.13.1-notebook-cpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.13.1-notebook-gpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-2.0.0a-notebook-cpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-2.0.0a-notebook-gpu:v0.5.0\n+ - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/jupyter-scipy:master-28af7716\n+ - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/jupyter-pytorch-full:master-28af7716\n+ - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/jupyter-pytorch-cuda-full:master-28af7716\n+ - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/jupyter-tensorflow-full:master-28af7716\n+ - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/jupyter-tensorflow-cuda-full:master-28af7716\n+ # By default, custom container Images are allowed\n+ # Uncomment the following line to only enable standard container Images\n+ readOnly: false\n+ imageVSCode:\n+ # The container Image for the user's VS-Code Server\n+ # If readonly, this value must be a member of the list below\n+ value: public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/codeserver-python:master-28af7716\n+ # The list of available standard container Images\n+ options:\n+ - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/codeserver-python:master-28af7716\n+ logo: ''\n+ # By default, custom container Images are allowed\n+ # Uncomment the following line to only enable standard container Images\n+ readOnly: false\n+ imageRStudio:\n+ # The container Image for the user's RStudio Server\n+ # If readonly, this value must be a member of the list below\n+ value: public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/rstudio:master-28af7716\n+ # The list of available standard container Images\n+ options:\n+ - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/rstudio:master-28af7716\n+ logo: |-", - "comment_created_at": "2021-03-18T11:47:18+00:00", - "comment_author": "kimwnasptd", - "comment_body": "From what I understand the issue is not how the icon will appear to the UI [ via the backend directly, fetched from somewhere public etc ] but the fact that we use this logo in the software. With that I mean that if someone would use the app he would end up seeing the logo, which could imply that this is endorsed from RStudio etc.\r\n\r\nAlso an issue with fetching resources in the UI directly from the public internet is that the app unable will not work in air-gaped environments, where the users would only have access to specific IPs.\r\n\r\n", - "pr_file_module": null - }, - { - "comment_id": "596796051", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 5646, - "pr_file": "components/crud-web-apps/jupyter/backend/apps/common/yaml/spawner_ui_config.yaml", - "discussion_id": "596258697", - "commented_code": "@@ -18,29 +18,106 @@ spawnerFormDefaults:\n image:\n # The container Image for the user's Jupyter Notebook\n # If readonly, this value must be a member of the list below\n- value: gcr.io/kubeflow-images-public/tensorflow-1.13.1-notebook-cpu:v0.5.0\n+ value: public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/jupyter-scipy:master-28af7716\n # The list of available standard container Images\n options:\n- - gcr.io/kubeflow-images-public/tensorflow-1.5.1-notebook-cpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.5.1-notebook-gpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.6.0-notebook-cpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.6.0-notebook-gpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.7.0-notebook-cpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.7.0-notebook-gpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.8.0-notebook-cpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.8.0-notebook-gpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.9.0-notebook-cpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.9.0-notebook-gpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.10.1-notebook-cpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.10.1-notebook-gpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.11.0-notebook-cpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.11.0-notebook-gpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.12.0-notebook-cpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.12.0-notebook-gpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.13.1-notebook-cpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.13.1-notebook-gpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-2.0.0a-notebook-cpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-2.0.0a-notebook-gpu:v0.5.0\n+ - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/jupyter-scipy:master-28af7716\n+ - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/jupyter-pytorch-full:master-28af7716\n+ - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/jupyter-pytorch-cuda-full:master-28af7716\n+ - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/jupyter-tensorflow-full:master-28af7716\n+ - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/jupyter-tensorflow-cuda-full:master-28af7716\n+ # By default, custom container Images are allowed\n+ # Uncomment the following line to only enable standard container Images\n+ readOnly: false\n+ imageVSCode:\n+ # The container Image for the user's VS-Code Server\n+ # If readonly, this value must be a member of the list below\n+ value: public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/codeserver-python:master-28af7716\n+ # The list of available standard container Images\n+ options:\n+ - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/codeserver-python:master-28af7716\n+ logo: ''\n+ # By default, custom container Images are allowed\n+ # Uncomment the following line to only enable standard container Images\n+ readOnly: false\n+ imageRStudio:\n+ # The container Image for the user's RStudio Server\n+ # If readonly, this value must be a member of the list below\n+ value: public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/rstudio:master-28af7716\n+ # The list of available standard container Images\n+ options:\n+ - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/rstudio:master-28af7716\n+ logo: |-", - "comment_created_at": "2021-03-18T11:51:13+00:00", - "comment_author": "kimwnasptd", - "comment_body": "So judging from our current situation, where we are tied by time constraints and the trademark situation would need some communication and tip toeing into what it's acceptable I would propose the following solution to unblock ourselves:\r\n\r\n1. Don't use the logos in this PR. We could use the [group options](https://v8.material.angular.io/components/select/overview#creating-groups-of-options) from Angular material, or just use buttons with text [ would prefer the first approach tbh ] to show the images for the different servers\r\n2. Open an issue about it and involve @castrojo in this. And once we figure out what we are allowed to do with the rstudio trademark we can open a new PR and add this functionality. We've done it once here so this won't be difficult to do in the future\r\n\r\nWDYT?", - "pr_file_module": null - }, - { - "comment_id": "596809742", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 5646, - "pr_file": "components/crud-web-apps/jupyter/backend/apps/common/yaml/spawner_ui_config.yaml", - "discussion_id": "596258697", - "commented_code": "@@ -18,29 +18,106 @@ spawnerFormDefaults:\n image:\n # The container Image for the user's Jupyter Notebook\n # If readonly, this value must be a member of the list below\n- value: gcr.io/kubeflow-images-public/tensorflow-1.13.1-notebook-cpu:v0.5.0\n+ value: public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/jupyter-scipy:master-28af7716\n # The list of available standard container Images\n options:\n- - gcr.io/kubeflow-images-public/tensorflow-1.5.1-notebook-cpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.5.1-notebook-gpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.6.0-notebook-cpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.6.0-notebook-gpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.7.0-notebook-cpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.7.0-notebook-gpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.8.0-notebook-cpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.8.0-notebook-gpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.9.0-notebook-cpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.9.0-notebook-gpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.10.1-notebook-cpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.10.1-notebook-gpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.11.0-notebook-cpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.11.0-notebook-gpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.12.0-notebook-cpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.12.0-notebook-gpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.13.1-notebook-cpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.13.1-notebook-gpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-2.0.0a-notebook-cpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-2.0.0a-notebook-gpu:v0.5.0\n+ - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/jupyter-scipy:master-28af7716\n+ - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/jupyter-pytorch-full:master-28af7716\n+ - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/jupyter-pytorch-cuda-full:master-28af7716\n+ - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/jupyter-tensorflow-full:master-28af7716\n+ - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/jupyter-tensorflow-cuda-full:master-28af7716\n+ # By default, custom container Images are allowed\n+ # Uncomment the following line to only enable standard container Images\n+ readOnly: false\n+ imageVSCode:\n+ # The container Image for the user's VS-Code Server\n+ # If readonly, this value must be a member of the list below\n+ value: public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/codeserver-python:master-28af7716\n+ # The list of available standard container Images\n+ options:\n+ - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/codeserver-python:master-28af7716\n+ logo: ''\n+ # By default, custom container Images are allowed\n+ # Uncomment the following line to only enable standard container Images\n+ readOnly: false\n+ imageRStudio:\n+ # The container Image for the user's RStudio Server\n+ # If readonly, this value must be a member of the list below\n+ value: public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/rstudio:master-28af7716\n+ # The list of available standard container Images\n+ options:\n+ - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/rstudio:master-28af7716\n+ logo: |-", - "comment_created_at": "2021-03-18T12:10:39+00:00", - "comment_author": "davidspek", - "comment_body": "Pulling it from the repo wouldn't solve your first point, but it does blur the distribution and way the logo is implemented in the software a bit. It could even directly link to the original source of the RStudio SVG. Indeed, for airgaped environment this might pose a slight problem. However, I think that people in that situation could build their own image (and might already be doing so as they would have a private registry as well). Also, I don't think the functionality of the JWA would actually break, the button would just be empty. \n\nI think the other problem with this is decision is who is responsible for the decision to include the logos or not and who would be responsible if RStudio isn't happy. If I'm the person responsible since it is my PR that adds their logo, I'm fine with that (and would prefer this actually). If I can be the person responsible for this, I'd go for adding the logos and remove them if there is a complaint. I think an argument can be made that the logo is being used according to the guidelines, if internal use is interpreted as \"the kubeflow/wg-notebooks developers deploying unmodified RStudio binaries supplied by RStudio to test their functionality in Kubeflow during development\". ", - "pr_file_module": null - }, - { - "comment_id": "596957490", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 5646, - "pr_file": "components/crud-web-apps/jupyter/backend/apps/common/yaml/spawner_ui_config.yaml", - "discussion_id": "596258697", - "commented_code": "@@ -18,29 +18,106 @@ spawnerFormDefaults:\n image:\n # The container Image for the user's Jupyter Notebook\n # If readonly, this value must be a member of the list below\n- value: gcr.io/kubeflow-images-public/tensorflow-1.13.1-notebook-cpu:v0.5.0\n+ value: public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/jupyter-scipy:master-28af7716\n # The list of available standard container Images\n options:\n- - gcr.io/kubeflow-images-public/tensorflow-1.5.1-notebook-cpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.5.1-notebook-gpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.6.0-notebook-cpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.6.0-notebook-gpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.7.0-notebook-cpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.7.0-notebook-gpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.8.0-notebook-cpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.8.0-notebook-gpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.9.0-notebook-cpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.9.0-notebook-gpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.10.1-notebook-cpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.10.1-notebook-gpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.11.0-notebook-cpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.11.0-notebook-gpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.12.0-notebook-cpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.12.0-notebook-gpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.13.1-notebook-cpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.13.1-notebook-gpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-2.0.0a-notebook-cpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-2.0.0a-notebook-gpu:v0.5.0\n+ - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/jupyter-scipy:master-28af7716\n+ - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/jupyter-pytorch-full:master-28af7716\n+ - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/jupyter-pytorch-cuda-full:master-28af7716\n+ - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/jupyter-tensorflow-full:master-28af7716\n+ - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/jupyter-tensorflow-cuda-full:master-28af7716\n+ # By default, custom container Images are allowed\n+ # Uncomment the following line to only enable standard container Images\n+ readOnly: false\n+ imageVSCode:\n+ # The container Image for the user's VS-Code Server\n+ # If readonly, this value must be a member of the list below\n+ value: public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/codeserver-python:master-28af7716\n+ # The list of available standard container Images\n+ options:\n+ - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/codeserver-python:master-28af7716\n+ logo: ''\n+ # By default, custom container Images are allowed\n+ # Uncomment the following line to only enable standard container Images\n+ readOnly: false\n+ imageRStudio:\n+ # The container Image for the user's RStudio Server\n+ # If readonly, this value must be a member of the list below\n+ value: public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/rstudio:master-28af7716\n+ # The list of available standard container Images\n+ options:\n+ - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/rstudio:master-28af7716\n+ logo: |-", - "comment_created_at": "2021-03-18T15:04:52+00:00", - "comment_author": "davidspek", - "comment_body": "I only just saw your second comment. Personally, I would then be more in favor of using the button toggle that is there now, but using an SVG with text in it for VSCode and RStudio. This still makes it rather easy for users to customize if they want the actual logos (which they might be allowed to use in their situation). It also allows us to keep using the Jupyter logo (as we can definitely use that one), and works together with https://github.com/kubeflow/kubeflow/pull/5646#pullrequestreview-614539090 as well (assuming I get this implemented quickly). ", - "pr_file_module": null - }, - { - "comment_id": "596958460", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 5646, - "pr_file": "components/crud-web-apps/jupyter/backend/apps/common/yaml/spawner_ui_config.yaml", - "discussion_id": "596258697", - "commented_code": "@@ -18,29 +18,106 @@ spawnerFormDefaults:\n image:\n # The container Image for the user's Jupyter Notebook\n # If readonly, this value must be a member of the list below\n- value: gcr.io/kubeflow-images-public/tensorflow-1.13.1-notebook-cpu:v0.5.0\n+ value: public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/jupyter-scipy:master-28af7716\n # The list of available standard container Images\n options:\n- - gcr.io/kubeflow-images-public/tensorflow-1.5.1-notebook-cpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.5.1-notebook-gpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.6.0-notebook-cpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.6.0-notebook-gpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.7.0-notebook-cpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.7.0-notebook-gpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.8.0-notebook-cpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.8.0-notebook-gpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.9.0-notebook-cpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.9.0-notebook-gpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.10.1-notebook-cpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.10.1-notebook-gpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.11.0-notebook-cpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.11.0-notebook-gpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.12.0-notebook-cpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.12.0-notebook-gpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.13.1-notebook-cpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-1.13.1-notebook-gpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-2.0.0a-notebook-cpu:v0.5.0\n- - gcr.io/kubeflow-images-public/tensorflow-2.0.0a-notebook-gpu:v0.5.0\n+ - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/jupyter-scipy:master-28af7716\n+ - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/jupyter-pytorch-full:master-28af7716\n+ - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/jupyter-pytorch-cuda-full:master-28af7716\n+ - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/jupyter-tensorflow-full:master-28af7716\n+ - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/jupyter-tensorflow-cuda-full:master-28af7716\n+ # By default, custom container Images are allowed\n+ # Uncomment the following line to only enable standard container Images\n+ readOnly: false\n+ imageVSCode:\n+ # The container Image for the user's VS-Code Server\n+ # If readonly, this value must be a member of the list below\n+ value: public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/codeserver-python:master-28af7716\n+ # The list of available standard container Images\n+ options:\n+ - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/codeserver-python:master-28af7716\n+ logo: ''\n+ # By default, custom container Images are allowed\n+ # Uncomment the following line to only enable standard container Images\n+ readOnly: false\n+ imageRStudio:\n+ # The container Image for the user's RStudio Server\n+ # If readonly, this value must be a member of the list below\n+ value: public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/rstudio:master-28af7716\n+ # The list of available standard container Images\n+ options:\n+ - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/rstudio:master-28af7716\n+ logo: |-", - "comment_created_at": "2021-03-18T15:05:54+00:00", - "comment_author": "kimwnasptd", - "comment_body": "Let's go with using the svgs. We'll use links to fetch them from where they publicly host them. Distributions could make a small change and add their svgs to the app and simply change these links, which is something that we were OK with from the beginning. Lets just add a small section for this in the app's README.\r\n\r\nRegarding the responsibility part, I'm not a layer on this so not an expert but we are a team and all of us move forward together. We are not going to point fingers to people if things go south. And also since I'm a reviewer and agreed to move forward with this so I'm also responsible.\r\n\r\nBut if indeed people complain about the usage then we will sincerely apologize and just change this to something like we discussed above. ", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/kubeflow-environment-variable-management.json b/_reviewers/kubeflow-environment-variable-management.json new file mode 100644 index 0000000..e8ef9de --- /dev/null +++ b/_reviewers/kubeflow-environment-variable-management.json @@ -0,0 +1,184 @@ +[ + { + "discussion_id": "1720455821", + "pr_number": 7635, + "pr_file": "components/example-notebook-servers/jupyter-pytorch-gaudi/Dockerfile", + "created_at": "2024-08-16T23:18:57+00:00", + "commented_code": "#\n# NOTE: Use the Makefiles to build this image correctly.\n#\n\nARG BASE_IMG=\nFROM $BASE_IMG\n\n# Content below is extracted from scripts/Dockerfiles here:\n# https://github.com/HabanaAI/Setup_and_Install/tree/main/dockerfiles/base\n# https://github.com/HabanaAI/Setup_and_Install/tree/main/dockerfiles/pytorch\n\n# version details\nARG ARTIFACTORY_URL=vault.habana.ai\nARG VERSION=1.16.2\nARG REVISION=2\nARG PYTORCH_VERSION=2.2.2\nARG OS_NUMBER=2204\n\n# runtime variables\nENV DEBIAN_FRONTEND=noninteractive", + "repo_full_name": "kubeflow/kubeflow", + "discussion_comments": [ + { + "comment_id": "1720455821", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 7635, + "pr_file": "components/example-notebook-servers/jupyter-pytorch-gaudi/Dockerfile", + "discussion_id": "1720455821", + "commented_code": "@@ -0,0 +1,143 @@\n+#\n+# NOTE: Use the Makefiles to build this image correctly.\n+#\n+\n+ARG BASE_IMG=\n+FROM $BASE_IMG\n+\n+# Content below is extracted from scripts/Dockerfiles here:\n+# https://github.com/HabanaAI/Setup_and_Install/tree/main/dockerfiles/base\n+# https://github.com/HabanaAI/Setup_and_Install/tree/main/dockerfiles/pytorch\n+\n+# version details\n+ARG ARTIFACTORY_URL=vault.habana.ai\n+ARG VERSION=1.16.2\n+ARG REVISION=2\n+ARG PYTORCH_VERSION=2.2.2\n+ARG OS_NUMBER=2204\n+\n+# runtime variables\n+ENV DEBIAN_FRONTEND=noninteractive", + "comment_created_at": "2024-08-16T23:18:57+00:00", + "comment_author": "thesuperzapper", + "comment_body": "Is there a particular reason `DEBIAN_FRONTEND` is being set on the overall container, rather than within the steps which need it? Because the container is sort of \"interactive\" once the notebook is actually running.\r\n\r\nRight now, we only do this on the base image [during the `apt-get` commands](https://github.com/kubeflow/kubeflow/blob/f6563b293808c771db05023bccdd793528c53745/components/example-notebook-servers/base/Dockerfile#L47), but you might also need to do it on your install script steps, if they check for this variable.\r\n\r\nI think we should either:\r\n\r\n1. Decide that all images need this, and set it in the base image as an `ENV`\r\n2. Set it only on RUN commands which need it.", + "pr_file_module": null + }, + { + "comment_id": "1724980190", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 7635, + "pr_file": "components/example-notebook-servers/jupyter-pytorch-gaudi/Dockerfile", + "discussion_id": "1720455821", + "commented_code": "@@ -0,0 +1,143 @@\n+#\n+# NOTE: Use the Makefiles to build this image correctly.\n+#\n+\n+ARG BASE_IMG=\n+FROM $BASE_IMG\n+\n+# Content below is extracted from scripts/Dockerfiles here:\n+# https://github.com/HabanaAI/Setup_and_Install/tree/main/dockerfiles/base\n+# https://github.com/HabanaAI/Setup_and_Install/tree/main/dockerfiles/pytorch\n+\n+# version details\n+ARG ARTIFACTORY_URL=vault.habana.ai\n+ARG VERSION=1.16.2\n+ARG REVISION=2\n+ARG PYTORCH_VERSION=2.2.2\n+ARG OS_NUMBER=2204\n+\n+# runtime variables\n+ENV DEBIAN_FRONTEND=noninteractive", + "comment_created_at": "2024-08-21T12:38:01+00:00", + "comment_author": "tkatila", + "comment_body": "I'll move the env variable inside the RUNs that run apt-get.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1720483506", + "pr_number": 7635, + "pr_file": "components/example-notebook-servers/jupyter-pytorch-gaudi/Dockerfile", + "created_at": "2024-08-17T00:10:54+00:00", + "commented_code": "#\n# NOTE: Use the Makefiles to build this image correctly.\n#\n\nARG BASE_IMG=\nFROM $BASE_IMG\n\n# Content below is extracted from scripts/Dockerfiles here:\n# https://github.com/HabanaAI/Setup_and_Install/tree/main/dockerfiles/base\n# https://github.com/HabanaAI/Setup_and_Install/tree/main/dockerfiles/pytorch\n\n# version details\nARG ARTIFACTORY_URL=vault.habana.ai\nARG VERSION=1.16.2\nARG REVISION=2\nARG PYTORCH_VERSION=2.2.2\nARG OS_NUMBER=2204\n\n# runtime variables\nENV DEBIAN_FRONTEND=noninteractive\nENV GC_KERNEL_PATH=/usr/lib/habanalabs/libtpc_kernels.so\nENV HABANA_LOGS=/var/log/habana_logs/\nENV HABANA_SCAL_BIN_PATH=/opt/habanalabs/engines_fw\nENV HABANA_PLUGINS_LIB_PATH=/opt/habanalabs/habana_plugins\nENV TCMALLOC_LARGE_ALLOC_REPORT_THRESHOLD=7516192768\nENV PYTHONPATH=/home/$NB_UID:/usr/lib/habanalabs/\n\n# There is no need to store pip installation files inside docker image\nENV PIP_NO_CACHE_DIR=on\nENV PIP_DISABLE_PIP_VERSION_CHECK=1\n\n# libfabric and scaling variables\nENV LIBFABRIC_VERSION=\"1.20.0\"\nENV LIBFABRIC_ROOT=\"/opt/habanalabs/libfabric-${LIBFABRIC_VERSION}\"\nENV MPI_ROOT=/opt/amazon/openmpi\nENV LD_LIBRARY_PATH=$LIBFABRIC_ROOT/lib:${MPI_ROOT}/lib:/usr/lib/habanalabs:$LD_LIBRARY_PATH\nENV PATH=${LIBFABRIC_ROOT}/bin:${MPI_ROOT}/bin:$PATH\nENV MPICC=${MPI_ROOT}/bin/mpicc\nENV OPAL_PREFIX=${MPI_ROOT}\nENV RDMAV_FORK_SAFE=1\nENV FI_EFA_USE_DEVICE_RDMA=1\nENV RDMA_CORE_ROOT=/opt/habanalabs/rdma-core/src\nENV RDMA_CORE_LIB=${RDMA_CORE_ROOT}/build/lib", + "repo_full_name": "kubeflow/kubeflow", + "discussion_comments": [ + { + "comment_id": "1720483506", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 7635, + "pr_file": "components/example-notebook-servers/jupyter-pytorch-gaudi/Dockerfile", + "discussion_id": "1720483506", + "commented_code": "@@ -0,0 +1,143 @@\n+#\n+# NOTE: Use the Makefiles to build this image correctly.\n+#\n+\n+ARG BASE_IMG=\n+FROM $BASE_IMG\n+\n+# Content below is extracted from scripts/Dockerfiles here:\n+# https://github.com/HabanaAI/Setup_and_Install/tree/main/dockerfiles/base\n+# https://github.com/HabanaAI/Setup_and_Install/tree/main/dockerfiles/pytorch\n+\n+# version details\n+ARG ARTIFACTORY_URL=vault.habana.ai\n+ARG VERSION=1.16.2\n+ARG REVISION=2\n+ARG PYTORCH_VERSION=2.2.2\n+ARG OS_NUMBER=2204\n+\n+# runtime variables\n+ENV DEBIAN_FRONTEND=noninteractive\n+ENV GC_KERNEL_PATH=/usr/lib/habanalabs/libtpc_kernels.so\n+ENV HABANA_LOGS=/var/log/habana_logs/\n+ENV HABANA_SCAL_BIN_PATH=/opt/habanalabs/engines_fw\n+ENV HABANA_PLUGINS_LIB_PATH=/opt/habanalabs/habana_plugins\n+ENV TCMALLOC_LARGE_ALLOC_REPORT_THRESHOLD=7516192768\n+ENV PYTHONPATH=/home/$NB_UID:/usr/lib/habanalabs/\n+\n+# There is no need to store pip installation files inside docker image\n+ENV PIP_NO_CACHE_DIR=on\n+ENV PIP_DISABLE_PIP_VERSION_CHECK=1\n+\n+# libfabric and scaling variables\n+ENV LIBFABRIC_VERSION=\"1.20.0\"\n+ENV LIBFABRIC_ROOT=\"/opt/habanalabs/libfabric-${LIBFABRIC_VERSION}\"\n+ENV MPI_ROOT=/opt/amazon/openmpi\n+ENV LD_LIBRARY_PATH=$LIBFABRIC_ROOT/lib:${MPI_ROOT}/lib:/usr/lib/habanalabs:$LD_LIBRARY_PATH\n+ENV PATH=${LIBFABRIC_ROOT}/bin:${MPI_ROOT}/bin:$PATH\n+ENV MPICC=${MPI_ROOT}/bin/mpicc\n+ENV OPAL_PREFIX=${MPI_ROOT}\n+ENV RDMAV_FORK_SAFE=1\n+ENV FI_EFA_USE_DEVICE_RDMA=1\n+ENV RDMA_CORE_ROOT=/opt/habanalabs/rdma-core/src\n+ENV RDMA_CORE_LIB=${RDMA_CORE_ROOT}/build/lib", + "comment_created_at": "2024-08-17T00:10:54+00:00", + "comment_author": "thesuperzapper", + "comment_body": "As there are a lot of envs being set here, some suggestions:\r\n\r\n1. If they are doing things like updating the PATH or setting configs, please place them above/below the `RUN` command which is adding that package (rather than in a common section at the top of the file).\r\n2. For the ones which are left (e.g. not related to installing a package), please group them logically underneath `#` comments which explain why they are needed and if necessary link to some external docs.", + "pr_file_module": null + }, + { + "comment_id": "1724994936", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 7635, + "pr_file": "components/example-notebook-servers/jupyter-pytorch-gaudi/Dockerfile", + "discussion_id": "1720483506", + "commented_code": "@@ -0,0 +1,143 @@\n+#\n+# NOTE: Use the Makefiles to build this image correctly.\n+#\n+\n+ARG BASE_IMG=\n+FROM $BASE_IMG\n+\n+# Content below is extracted from scripts/Dockerfiles here:\n+# https://github.com/HabanaAI/Setup_and_Install/tree/main/dockerfiles/base\n+# https://github.com/HabanaAI/Setup_and_Install/tree/main/dockerfiles/pytorch\n+\n+# version details\n+ARG ARTIFACTORY_URL=vault.habana.ai\n+ARG VERSION=1.16.2\n+ARG REVISION=2\n+ARG PYTORCH_VERSION=2.2.2\n+ARG OS_NUMBER=2204\n+\n+# runtime variables\n+ENV DEBIAN_FRONTEND=noninteractive\n+ENV GC_KERNEL_PATH=/usr/lib/habanalabs/libtpc_kernels.so\n+ENV HABANA_LOGS=/var/log/habana_logs/\n+ENV HABANA_SCAL_BIN_PATH=/opt/habanalabs/engines_fw\n+ENV HABANA_PLUGINS_LIB_PATH=/opt/habanalabs/habana_plugins\n+ENV TCMALLOC_LARGE_ALLOC_REPORT_THRESHOLD=7516192768\n+ENV PYTHONPATH=/home/$NB_UID:/usr/lib/habanalabs/\n+\n+# There is no need to store pip installation files inside docker image\n+ENV PIP_NO_CACHE_DIR=on\n+ENV PIP_DISABLE_PIP_VERSION_CHECK=1\n+\n+# libfabric and scaling variables\n+ENV LIBFABRIC_VERSION=\"1.20.0\"\n+ENV LIBFABRIC_ROOT=\"/opt/habanalabs/libfabric-${LIBFABRIC_VERSION}\"\n+ENV MPI_ROOT=/opt/amazon/openmpi\n+ENV LD_LIBRARY_PATH=$LIBFABRIC_ROOT/lib:${MPI_ROOT}/lib:/usr/lib/habanalabs:$LD_LIBRARY_PATH\n+ENV PATH=${LIBFABRIC_ROOT}/bin:${MPI_ROOT}/bin:$PATH\n+ENV MPICC=${MPI_ROOT}/bin/mpicc\n+ENV OPAL_PREFIX=${MPI_ROOT}\n+ENV RDMAV_FORK_SAFE=1\n+ENV FI_EFA_USE_DEVICE_RDMA=1\n+ENV RDMA_CORE_ROOT=/opt/habanalabs/rdma-core/src\n+ENV RDMA_CORE_LIB=${RDMA_CORE_ROOT}/build/lib", + "comment_created_at": "2024-08-21T12:48:30+00:00", + "comment_author": "tkatila", + "comment_body": "sure, I'll move these so that they are next to the relevant RUNs", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1720484378", + "pr_number": 7635, + "pr_file": "components/example-notebook-servers/jupyter-pytorch-gaudi/Dockerfile", + "created_at": "2024-08-17T00:13:54+00:00", + "commented_code": "#\n# NOTE: Use the Makefiles to build this image correctly.\n#\n\nARG BASE_IMG=\nFROM $BASE_IMG\n\n# Content below is extracted from scripts/Dockerfiles here:\n# https://github.com/HabanaAI/Setup_and_Install/tree/main/dockerfiles/base\n# https://github.com/HabanaAI/Setup_and_Install/tree/main/dockerfiles/pytorch\n\n# version details\nARG ARTIFACTORY_URL=vault.habana.ai\nARG VERSION=1.16.2\nARG REVISION=2\nARG PYTORCH_VERSION=2.2.2\nARG OS_NUMBER=2204\n\n# runtime variables\nENV DEBIAN_FRONTEND=noninteractive\nENV GC_KERNEL_PATH=/usr/lib/habanalabs/libtpc_kernels.so\nENV HABANA_LOGS=/var/log/habana_logs/\nENV HABANA_SCAL_BIN_PATH=/opt/habanalabs/engines_fw\nENV HABANA_PLUGINS_LIB_PATH=/opt/habanalabs/habana_plugins\nENV TCMALLOC_LARGE_ALLOC_REPORT_THRESHOLD=7516192768\nENV PYTHONPATH=/home/$NB_UID:/usr/lib/habanalabs/\n\n# There is no need to store pip installation files inside docker image\nENV PIP_NO_CACHE_DIR=on\nENV PIP_DISABLE_PIP_VERSION_CHECK=1\n\n# libfabric and scaling variables\nENV LIBFABRIC_VERSION=\"1.20.0\"", + "repo_full_name": "kubeflow/kubeflow", + "discussion_comments": [ + { + "comment_id": "1720484378", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 7635, + "pr_file": "components/example-notebook-servers/jupyter-pytorch-gaudi/Dockerfile", + "discussion_id": "1720484378", + "commented_code": "@@ -0,0 +1,143 @@\n+#\n+# NOTE: Use the Makefiles to build this image correctly.\n+#\n+\n+ARG BASE_IMG=\n+FROM $BASE_IMG\n+\n+# Content below is extracted from scripts/Dockerfiles here:\n+# https://github.com/HabanaAI/Setup_and_Install/tree/main/dockerfiles/base\n+# https://github.com/HabanaAI/Setup_and_Install/tree/main/dockerfiles/pytorch\n+\n+# version details\n+ARG ARTIFACTORY_URL=vault.habana.ai\n+ARG VERSION=1.16.2\n+ARG REVISION=2\n+ARG PYTORCH_VERSION=2.2.2\n+ARG OS_NUMBER=2204\n+\n+# runtime variables\n+ENV DEBIAN_FRONTEND=noninteractive\n+ENV GC_KERNEL_PATH=/usr/lib/habanalabs/libtpc_kernels.so\n+ENV HABANA_LOGS=/var/log/habana_logs/\n+ENV HABANA_SCAL_BIN_PATH=/opt/habanalabs/engines_fw\n+ENV HABANA_PLUGINS_LIB_PATH=/opt/habanalabs/habana_plugins\n+ENV TCMALLOC_LARGE_ALLOC_REPORT_THRESHOLD=7516192768\n+ENV PYTHONPATH=/home/$NB_UID:/usr/lib/habanalabs/\n+\n+# There is no need to store pip installation files inside docker image\n+ENV PIP_NO_CACHE_DIR=on\n+ENV PIP_DISABLE_PIP_VERSION_CHECK=1\n+\n+# libfabric and scaling variables\n+ENV LIBFABRIC_VERSION=\"1.20.0\"", + "comment_created_at": "2024-08-17T00:13:54+00:00", + "comment_author": "thesuperzapper", + "comment_body": "Is it possible to factor this into an `ARG` rather than an `ENV`, as we do for all other versions?", + "pr_file_module": null + }, + { + "comment_id": "1724977244", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 7635, + "pr_file": "components/example-notebook-servers/jupyter-pytorch-gaudi/Dockerfile", + "discussion_id": "1720484378", + "commented_code": "@@ -0,0 +1,143 @@\n+#\n+# NOTE: Use the Makefiles to build this image correctly.\n+#\n+\n+ARG BASE_IMG=\n+FROM $BASE_IMG\n+\n+# Content below is extracted from scripts/Dockerfiles here:\n+# https://github.com/HabanaAI/Setup_and_Install/tree/main/dockerfiles/base\n+# https://github.com/HabanaAI/Setup_and_Install/tree/main/dockerfiles/pytorch\n+\n+# version details\n+ARG ARTIFACTORY_URL=vault.habana.ai\n+ARG VERSION=1.16.2\n+ARG REVISION=2\n+ARG PYTORCH_VERSION=2.2.2\n+ARG OS_NUMBER=2204\n+\n+# runtime variables\n+ENV DEBIAN_FRONTEND=noninteractive\n+ENV GC_KERNEL_PATH=/usr/lib/habanalabs/libtpc_kernels.so\n+ENV HABANA_LOGS=/var/log/habana_logs/\n+ENV HABANA_SCAL_BIN_PATH=/opt/habanalabs/engines_fw\n+ENV HABANA_PLUGINS_LIB_PATH=/opt/habanalabs/habana_plugins\n+ENV TCMALLOC_LARGE_ALLOC_REPORT_THRESHOLD=7516192768\n+ENV PYTHONPATH=/home/$NB_UID:/usr/lib/habanalabs/\n+\n+# There is no need to store pip installation files inside docker image\n+ENV PIP_NO_CACHE_DIR=on\n+ENV PIP_DISABLE_PIP_VERSION_CHECK=1\n+\n+# libfabric and scaling variables\n+ENV LIBFABRIC_VERSION=\"1.20.0\"", + "comment_created_at": "2024-08-21T12:35:53+00:00", + "comment_author": "tkatila", + "comment_body": "yes, I'll change it.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1720486507", + "pr_number": 7635, + "pr_file": "components/example-notebook-servers/jupyter-pytorch-gaudi/Dockerfile", + "created_at": "2024-08-17T00:18:49+00:00", + "commented_code": "#\n# NOTE: Use the Makefiles to build this image correctly.\n#\n\nARG BASE_IMG=\nFROM $BASE_IMG\n\n# Content below is extracted from scripts/Dockerfiles here:\n# https://github.com/HabanaAI/Setup_and_Install/tree/main/dockerfiles/base\n# https://github.com/HabanaAI/Setup_and_Install/tree/main/dockerfiles/pytorch\n\n# version details\nARG ARTIFACTORY_URL=vault.habana.ai\nARG VERSION=1.16.2\nARG REVISION=2\nARG PYTORCH_VERSION=2.2.2\nARG OS_NUMBER=2204\n\n# runtime variables\nENV DEBIAN_FRONTEND=noninteractive\nENV GC_KERNEL_PATH=/usr/lib/habanalabs/libtpc_kernels.so\nENV HABANA_LOGS=/var/log/habana_logs/\nENV HABANA_SCAL_BIN_PATH=/opt/habanalabs/engines_fw\nENV HABANA_PLUGINS_LIB_PATH=/opt/habanalabs/habana_plugins\nENV TCMALLOC_LARGE_ALLOC_REPORT_THRESHOLD=7516192768\nENV PYTHONPATH=/home/$NB_UID:/usr/lib/habanalabs/", + "repo_full_name": "kubeflow/kubeflow", + "discussion_comments": [ + { + "comment_id": "1720486507", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 7635, + "pr_file": "components/example-notebook-servers/jupyter-pytorch-gaudi/Dockerfile", + "discussion_id": "1720486507", + "commented_code": "@@ -0,0 +1,143 @@\n+#\n+# NOTE: Use the Makefiles to build this image correctly.\n+#\n+\n+ARG BASE_IMG=\n+FROM $BASE_IMG\n+\n+# Content below is extracted from scripts/Dockerfiles here:\n+# https://github.com/HabanaAI/Setup_and_Install/tree/main/dockerfiles/base\n+# https://github.com/HabanaAI/Setup_and_Install/tree/main/dockerfiles/pytorch\n+\n+# version details\n+ARG ARTIFACTORY_URL=vault.habana.ai\n+ARG VERSION=1.16.2\n+ARG REVISION=2\n+ARG PYTORCH_VERSION=2.2.2\n+ARG OS_NUMBER=2204\n+\n+# runtime variables\n+ENV DEBIAN_FRONTEND=noninteractive\n+ENV GC_KERNEL_PATH=/usr/lib/habanalabs/libtpc_kernels.so\n+ENV HABANA_LOGS=/var/log/habana_logs/\n+ENV HABANA_SCAL_BIN_PATH=/opt/habanalabs/engines_fw\n+ENV HABANA_PLUGINS_LIB_PATH=/opt/habanalabs/habana_plugins\n+ENV TCMALLOC_LARGE_ALLOC_REPORT_THRESHOLD=7516192768\n+ENV PYTHONPATH=/home/$NB_UID:/usr/lib/habanalabs/", + "comment_created_at": "2024-08-17T00:18:49+00:00", + "comment_author": "thesuperzapper", + "comment_body": "Is setting `PYTHONPATH` still necessary when we are sourcing `/etc/profile.d/habanalabs.sh` in bashrc/profile?\r\n\r\nI only ask because having `PYTHONPATH` set can break things for end users in unexpected ways.\r\n\r\nIf it turns out to still be necessary, please move this it closer to the `RUN` where we install the files under `/usr/lib/habanalabs/`.", + "pr_file_module": null + }, + { + "comment_id": "1724976985", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 7635, + "pr_file": "components/example-notebook-servers/jupyter-pytorch-gaudi/Dockerfile", + "discussion_id": "1720486507", + "commented_code": "@@ -0,0 +1,143 @@\n+#\n+# NOTE: Use the Makefiles to build this image correctly.\n+#\n+\n+ARG BASE_IMG=\n+FROM $BASE_IMG\n+\n+# Content below is extracted from scripts/Dockerfiles here:\n+# https://github.com/HabanaAI/Setup_and_Install/tree/main/dockerfiles/base\n+# https://github.com/HabanaAI/Setup_and_Install/tree/main/dockerfiles/pytorch\n+\n+# version details\n+ARG ARTIFACTORY_URL=vault.habana.ai\n+ARG VERSION=1.16.2\n+ARG REVISION=2\n+ARG PYTORCH_VERSION=2.2.2\n+ARG OS_NUMBER=2204\n+\n+# runtime variables\n+ENV DEBIAN_FRONTEND=noninteractive\n+ENV GC_KERNEL_PATH=/usr/lib/habanalabs/libtpc_kernels.so\n+ENV HABANA_LOGS=/var/log/habana_logs/\n+ENV HABANA_SCAL_BIN_PATH=/opt/habanalabs/engines_fw\n+ENV HABANA_PLUGINS_LIB_PATH=/opt/habanalabs/habana_plugins\n+ENV TCMALLOC_LARGE_ALLOC_REPORT_THRESHOLD=7516192768\n+ENV PYTHONPATH=/home/$NB_UID:/usr/lib/habanalabs/", + "comment_created_at": "2024-08-21T12:35:42+00:00", + "comment_author": "tkatila", + "comment_body": "PYTHONPATH is not set in the `habanalabs.sh` but I'll need to double check if it's really needed.", + "pr_file_module": null + }, + { + "comment_id": "1728862104", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 7635, + "pr_file": "components/example-notebook-servers/jupyter-pytorch-gaudi/Dockerfile", + "discussion_id": "1720486507", + "commented_code": "@@ -0,0 +1,143 @@\n+#\n+# NOTE: Use the Makefiles to build this image correctly.\n+#\n+\n+ARG BASE_IMG=\n+FROM $BASE_IMG\n+\n+# Content below is extracted from scripts/Dockerfiles here:\n+# https://github.com/HabanaAI/Setup_and_Install/tree/main/dockerfiles/base\n+# https://github.com/HabanaAI/Setup_and_Install/tree/main/dockerfiles/pytorch\n+\n+# version details\n+ARG ARTIFACTORY_URL=vault.habana.ai\n+ARG VERSION=1.16.2\n+ARG REVISION=2\n+ARG PYTORCH_VERSION=2.2.2\n+ARG OS_NUMBER=2204\n+\n+# runtime variables\n+ENV DEBIAN_FRONTEND=noninteractive\n+ENV GC_KERNEL_PATH=/usr/lib/habanalabs/libtpc_kernels.so\n+ENV HABANA_LOGS=/var/log/habana_logs/\n+ENV HABANA_SCAL_BIN_PATH=/opt/habanalabs/engines_fw\n+ENV HABANA_PLUGINS_LIB_PATH=/opt/habanalabs/habana_plugins\n+ENV TCMALLOC_LARGE_ALLOC_REPORT_THRESHOLD=7516192768\n+ENV PYTHONPATH=/home/$NB_UID:/usr/lib/habanalabs/", + "comment_created_at": "2024-08-23T12:04:00+00:00", + "comment_author": "tkatila", + "comment_body": "PYTHONPATH was necessary for normal functionality.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1720488961", + "pr_number": 7635, + "pr_file": "components/example-notebook-servers/jupyter-pytorch-gaudi/Dockerfile", + "created_at": "2024-08-17T00:27:41+00:00", + "commented_code": "#\n# NOTE: Use the Makefiles to build this image correctly.\n#\n\nARG BASE_IMG=\nFROM $BASE_IMG\n\n# Content below is extracted from scripts/Dockerfiles here:\n# https://github.com/HabanaAI/Setup_and_Install/tree/main/dockerfiles/base\n# https://github.com/HabanaAI/Setup_and_Install/tree/main/dockerfiles/pytorch\n\n# version details\nARG ARTIFACTORY_URL=vault.habana.ai\nARG VERSION=1.16.2\nARG REVISION=2\nARG PYTORCH_VERSION=2.2.2\nARG OS_NUMBER=2204\n\n# runtime variables\nENV DEBIAN_FRONTEND=noninteractive\nENV GC_KERNEL_PATH=/usr/lib/habanalabs/libtpc_kernels.so\nENV HABANA_LOGS=/var/log/habana_logs/\nENV HABANA_SCAL_BIN_PATH=/opt/habanalabs/engines_fw\nENV HABANA_PLUGINS_LIB_PATH=/opt/habanalabs/habana_plugins\nENV TCMALLOC_LARGE_ALLOC_REPORT_THRESHOLD=7516192768", + "repo_full_name": "kubeflow/kubeflow", + "discussion_comments": [ + { + "comment_id": "1720488961", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 7635, + "pr_file": "components/example-notebook-servers/jupyter-pytorch-gaudi/Dockerfile", + "discussion_id": "1720488961", + "commented_code": "@@ -0,0 +1,143 @@\n+#\n+# NOTE: Use the Makefiles to build this image correctly.\n+#\n+\n+ARG BASE_IMG=\n+FROM $BASE_IMG\n+\n+# Content below is extracted from scripts/Dockerfiles here:\n+# https://github.com/HabanaAI/Setup_and_Install/tree/main/dockerfiles/base\n+# https://github.com/HabanaAI/Setup_and_Install/tree/main/dockerfiles/pytorch\n+\n+# version details\n+ARG ARTIFACTORY_URL=vault.habana.ai\n+ARG VERSION=1.16.2\n+ARG REVISION=2\n+ARG PYTORCH_VERSION=2.2.2\n+ARG OS_NUMBER=2204\n+\n+# runtime variables\n+ENV DEBIAN_FRONTEND=noninteractive\n+ENV GC_KERNEL_PATH=/usr/lib/habanalabs/libtpc_kernels.so\n+ENV HABANA_LOGS=/var/log/habana_logs/\n+ENV HABANA_SCAL_BIN_PATH=/opt/habanalabs/engines_fw\n+ENV HABANA_PLUGINS_LIB_PATH=/opt/habanalabs/habana_plugins\n+ENV TCMALLOC_LARGE_ALLOC_REPORT_THRESHOLD=7516192768", + "comment_created_at": "2024-08-17T00:27:41+00:00", + "comment_author": "thesuperzapper", + "comment_body": "These should probably be under a `# configs - habana` section.\r\n\r\nAlso, feel like `TCMALLOC_LARGE_ALLOC_REPORT_THRESHOLD` needs an explanation of what it's for and why it's necessary in that comment.", + "pr_file_module": null + }, + { + "comment_id": "1724974865", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 7635, + "pr_file": "components/example-notebook-servers/jupyter-pytorch-gaudi/Dockerfile", + "discussion_id": "1720488961", + "commented_code": "@@ -0,0 +1,143 @@\n+#\n+# NOTE: Use the Makefiles to build this image correctly.\n+#\n+\n+ARG BASE_IMG=\n+FROM $BASE_IMG\n+\n+# Content below is extracted from scripts/Dockerfiles here:\n+# https://github.com/HabanaAI/Setup_and_Install/tree/main/dockerfiles/base\n+# https://github.com/HabanaAI/Setup_and_Install/tree/main/dockerfiles/pytorch\n+\n+# version details\n+ARG ARTIFACTORY_URL=vault.habana.ai\n+ARG VERSION=1.16.2\n+ARG REVISION=2\n+ARG PYTORCH_VERSION=2.2.2\n+ARG OS_NUMBER=2204\n+\n+# runtime variables\n+ENV DEBIAN_FRONTEND=noninteractive\n+ENV GC_KERNEL_PATH=/usr/lib/habanalabs/libtpc_kernels.so\n+ENV HABANA_LOGS=/var/log/habana_logs/\n+ENV HABANA_SCAL_BIN_PATH=/opt/habanalabs/engines_fw\n+ENV HABANA_PLUGINS_LIB_PATH=/opt/habanalabs/habana_plugins\n+ENV TCMALLOC_LARGE_ALLOC_REPORT_THRESHOLD=7516192768", + "comment_created_at": "2024-08-21T12:34:15+00:00", + "comment_author": "tkatila", + "comment_body": "Top four variables are actually set in habanalab.sh which is sourced, so I'll drop them from the Dockerfile.\r\nThe TCMALLOC env variable is obsolete and I'll remove it.", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/kubeflow-environment-variable-management.md b/_reviewers/kubeflow-environment-variable-management.md index ec9322f..6ce1e93 100644 --- a/_reviewers/kubeflow-environment-variable-management.md +++ b/_reviewers/kubeflow-environment-variable-management.md @@ -50,189 +50,3 @@ Manage environment variables in Docker configurations with appropriate scope, pl 4. **Clean up unnecessary variables**: - Remove obsolete or unused environment variables - Consider using dedicated configuration files under /etc/ instead of environment variables when appropriate - - -[ - { - "discussion_id": "1720455821", - "pr_number": 7635, - "pr_file": "components/example-notebook-servers/jupyter-pytorch-gaudi/Dockerfile", - "created_at": "2024-08-16T23:18:57+00:00", - "commented_code": "#\n# NOTE: Use the Makefiles to build this image correctly.\n#\n\nARG BASE_IMG=\nFROM $BASE_IMG\n\n# Content below is extracted from scripts/Dockerfiles here:\n# https://github.com/HabanaAI/Setup_and_Install/tree/main/dockerfiles/base\n# https://github.com/HabanaAI/Setup_and_Install/tree/main/dockerfiles/pytorch\n\n# version details\nARG ARTIFACTORY_URL=vault.habana.ai\nARG VERSION=1.16.2\nARG REVISION=2\nARG PYTORCH_VERSION=2.2.2\nARG OS_NUMBER=2204\n\n# runtime variables\nENV DEBIAN_FRONTEND=noninteractive", - "repo_full_name": "kubeflow/kubeflow", - "discussion_comments": [ - { - "comment_id": "1720455821", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 7635, - "pr_file": "components/example-notebook-servers/jupyter-pytorch-gaudi/Dockerfile", - "discussion_id": "1720455821", - "commented_code": "@@ -0,0 +1,143 @@\n+#\n+# NOTE: Use the Makefiles to build this image correctly.\n+#\n+\n+ARG BASE_IMG=\n+FROM $BASE_IMG\n+\n+# Content below is extracted from scripts/Dockerfiles here:\n+# https://github.com/HabanaAI/Setup_and_Install/tree/main/dockerfiles/base\n+# https://github.com/HabanaAI/Setup_and_Install/tree/main/dockerfiles/pytorch\n+\n+# version details\n+ARG ARTIFACTORY_URL=vault.habana.ai\n+ARG VERSION=1.16.2\n+ARG REVISION=2\n+ARG PYTORCH_VERSION=2.2.2\n+ARG OS_NUMBER=2204\n+\n+# runtime variables\n+ENV DEBIAN_FRONTEND=noninteractive", - "comment_created_at": "2024-08-16T23:18:57+00:00", - "comment_author": "thesuperzapper", - "comment_body": "Is there a particular reason `DEBIAN_FRONTEND` is being set on the overall container, rather than within the steps which need it? Because the container is sort of \"interactive\" once the notebook is actually running.\r\n\r\nRight now, we only do this on the base image [during the `apt-get` commands](https://github.com/kubeflow/kubeflow/blob/f6563b293808c771db05023bccdd793528c53745/components/example-notebook-servers/base/Dockerfile#L47), but you might also need to do it on your install script steps, if they check for this variable.\r\n\r\nI think we should either:\r\n\r\n1. Decide that all images need this, and set it in the base image as an `ENV`\r\n2. Set it only on RUN commands which need it.", - "pr_file_module": null - }, - { - "comment_id": "1724980190", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 7635, - "pr_file": "components/example-notebook-servers/jupyter-pytorch-gaudi/Dockerfile", - "discussion_id": "1720455821", - "commented_code": "@@ -0,0 +1,143 @@\n+#\n+# NOTE: Use the Makefiles to build this image correctly.\n+#\n+\n+ARG BASE_IMG=\n+FROM $BASE_IMG\n+\n+# Content below is extracted from scripts/Dockerfiles here:\n+# https://github.com/HabanaAI/Setup_and_Install/tree/main/dockerfiles/base\n+# https://github.com/HabanaAI/Setup_and_Install/tree/main/dockerfiles/pytorch\n+\n+# version details\n+ARG ARTIFACTORY_URL=vault.habana.ai\n+ARG VERSION=1.16.2\n+ARG REVISION=2\n+ARG PYTORCH_VERSION=2.2.2\n+ARG OS_NUMBER=2204\n+\n+# runtime variables\n+ENV DEBIAN_FRONTEND=noninteractive", - "comment_created_at": "2024-08-21T12:38:01+00:00", - "comment_author": "tkatila", - "comment_body": "I'll move the env variable inside the RUNs that run apt-get.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1720483506", - "pr_number": 7635, - "pr_file": "components/example-notebook-servers/jupyter-pytorch-gaudi/Dockerfile", - "created_at": "2024-08-17T00:10:54+00:00", - "commented_code": "#\n# NOTE: Use the Makefiles to build this image correctly.\n#\n\nARG BASE_IMG=\nFROM $BASE_IMG\n\n# Content below is extracted from scripts/Dockerfiles here:\n# https://github.com/HabanaAI/Setup_and_Install/tree/main/dockerfiles/base\n# https://github.com/HabanaAI/Setup_and_Install/tree/main/dockerfiles/pytorch\n\n# version details\nARG ARTIFACTORY_URL=vault.habana.ai\nARG VERSION=1.16.2\nARG REVISION=2\nARG PYTORCH_VERSION=2.2.2\nARG OS_NUMBER=2204\n\n# runtime variables\nENV DEBIAN_FRONTEND=noninteractive\nENV GC_KERNEL_PATH=/usr/lib/habanalabs/libtpc_kernels.so\nENV HABANA_LOGS=/var/log/habana_logs/\nENV HABANA_SCAL_BIN_PATH=/opt/habanalabs/engines_fw\nENV HABANA_PLUGINS_LIB_PATH=/opt/habanalabs/habana_plugins\nENV TCMALLOC_LARGE_ALLOC_REPORT_THRESHOLD=7516192768\nENV PYTHONPATH=/home/$NB_UID:/usr/lib/habanalabs/\n\n# There is no need to store pip installation files inside docker image\nENV PIP_NO_CACHE_DIR=on\nENV PIP_DISABLE_PIP_VERSION_CHECK=1\n\n# libfabric and scaling variables\nENV LIBFABRIC_VERSION=\"1.20.0\"\nENV LIBFABRIC_ROOT=\"/opt/habanalabs/libfabric-${LIBFABRIC_VERSION}\"\nENV MPI_ROOT=/opt/amazon/openmpi\nENV LD_LIBRARY_PATH=$LIBFABRIC_ROOT/lib:${MPI_ROOT}/lib:/usr/lib/habanalabs:$LD_LIBRARY_PATH\nENV PATH=${LIBFABRIC_ROOT}/bin:${MPI_ROOT}/bin:$PATH\nENV MPICC=${MPI_ROOT}/bin/mpicc\nENV OPAL_PREFIX=${MPI_ROOT}\nENV RDMAV_FORK_SAFE=1\nENV FI_EFA_USE_DEVICE_RDMA=1\nENV RDMA_CORE_ROOT=/opt/habanalabs/rdma-core/src\nENV RDMA_CORE_LIB=${RDMA_CORE_ROOT}/build/lib", - "repo_full_name": "kubeflow/kubeflow", - "discussion_comments": [ - { - "comment_id": "1720483506", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 7635, - "pr_file": "components/example-notebook-servers/jupyter-pytorch-gaudi/Dockerfile", - "discussion_id": "1720483506", - "commented_code": "@@ -0,0 +1,143 @@\n+#\n+# NOTE: Use the Makefiles to build this image correctly.\n+#\n+\n+ARG BASE_IMG=\n+FROM $BASE_IMG\n+\n+# Content below is extracted from scripts/Dockerfiles here:\n+# https://github.com/HabanaAI/Setup_and_Install/tree/main/dockerfiles/base\n+# https://github.com/HabanaAI/Setup_and_Install/tree/main/dockerfiles/pytorch\n+\n+# version details\n+ARG ARTIFACTORY_URL=vault.habana.ai\n+ARG VERSION=1.16.2\n+ARG REVISION=2\n+ARG PYTORCH_VERSION=2.2.2\n+ARG OS_NUMBER=2204\n+\n+# runtime variables\n+ENV DEBIAN_FRONTEND=noninteractive\n+ENV GC_KERNEL_PATH=/usr/lib/habanalabs/libtpc_kernels.so\n+ENV HABANA_LOGS=/var/log/habana_logs/\n+ENV HABANA_SCAL_BIN_PATH=/opt/habanalabs/engines_fw\n+ENV HABANA_PLUGINS_LIB_PATH=/opt/habanalabs/habana_plugins\n+ENV TCMALLOC_LARGE_ALLOC_REPORT_THRESHOLD=7516192768\n+ENV PYTHONPATH=/home/$NB_UID:/usr/lib/habanalabs/\n+\n+# There is no need to store pip installation files inside docker image\n+ENV PIP_NO_CACHE_DIR=on\n+ENV PIP_DISABLE_PIP_VERSION_CHECK=1\n+\n+# libfabric and scaling variables\n+ENV LIBFABRIC_VERSION=\"1.20.0\"\n+ENV LIBFABRIC_ROOT=\"/opt/habanalabs/libfabric-${LIBFABRIC_VERSION}\"\n+ENV MPI_ROOT=/opt/amazon/openmpi\n+ENV LD_LIBRARY_PATH=$LIBFABRIC_ROOT/lib:${MPI_ROOT}/lib:/usr/lib/habanalabs:$LD_LIBRARY_PATH\n+ENV PATH=${LIBFABRIC_ROOT}/bin:${MPI_ROOT}/bin:$PATH\n+ENV MPICC=${MPI_ROOT}/bin/mpicc\n+ENV OPAL_PREFIX=${MPI_ROOT}\n+ENV RDMAV_FORK_SAFE=1\n+ENV FI_EFA_USE_DEVICE_RDMA=1\n+ENV RDMA_CORE_ROOT=/opt/habanalabs/rdma-core/src\n+ENV RDMA_CORE_LIB=${RDMA_CORE_ROOT}/build/lib", - "comment_created_at": "2024-08-17T00:10:54+00:00", - "comment_author": "thesuperzapper", - "comment_body": "As there are a lot of envs being set here, some suggestions:\r\n\r\n1. If they are doing things like updating the PATH or setting configs, please place them above/below the `RUN` command which is adding that package (rather than in a common section at the top of the file).\r\n2. For the ones which are left (e.g. not related to installing a package), please group them logically underneath `#` comments which explain why they are needed and if necessary link to some external docs.", - "pr_file_module": null - }, - { - "comment_id": "1724994936", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 7635, - "pr_file": "components/example-notebook-servers/jupyter-pytorch-gaudi/Dockerfile", - "discussion_id": "1720483506", - "commented_code": "@@ -0,0 +1,143 @@\n+#\n+# NOTE: Use the Makefiles to build this image correctly.\n+#\n+\n+ARG BASE_IMG=\n+FROM $BASE_IMG\n+\n+# Content below is extracted from scripts/Dockerfiles here:\n+# https://github.com/HabanaAI/Setup_and_Install/tree/main/dockerfiles/base\n+# https://github.com/HabanaAI/Setup_and_Install/tree/main/dockerfiles/pytorch\n+\n+# version details\n+ARG ARTIFACTORY_URL=vault.habana.ai\n+ARG VERSION=1.16.2\n+ARG REVISION=2\n+ARG PYTORCH_VERSION=2.2.2\n+ARG OS_NUMBER=2204\n+\n+# runtime variables\n+ENV DEBIAN_FRONTEND=noninteractive\n+ENV GC_KERNEL_PATH=/usr/lib/habanalabs/libtpc_kernels.so\n+ENV HABANA_LOGS=/var/log/habana_logs/\n+ENV HABANA_SCAL_BIN_PATH=/opt/habanalabs/engines_fw\n+ENV HABANA_PLUGINS_LIB_PATH=/opt/habanalabs/habana_plugins\n+ENV TCMALLOC_LARGE_ALLOC_REPORT_THRESHOLD=7516192768\n+ENV PYTHONPATH=/home/$NB_UID:/usr/lib/habanalabs/\n+\n+# There is no need to store pip installation files inside docker image\n+ENV PIP_NO_CACHE_DIR=on\n+ENV PIP_DISABLE_PIP_VERSION_CHECK=1\n+\n+# libfabric and scaling variables\n+ENV LIBFABRIC_VERSION=\"1.20.0\"\n+ENV LIBFABRIC_ROOT=\"/opt/habanalabs/libfabric-${LIBFABRIC_VERSION}\"\n+ENV MPI_ROOT=/opt/amazon/openmpi\n+ENV LD_LIBRARY_PATH=$LIBFABRIC_ROOT/lib:${MPI_ROOT}/lib:/usr/lib/habanalabs:$LD_LIBRARY_PATH\n+ENV PATH=${LIBFABRIC_ROOT}/bin:${MPI_ROOT}/bin:$PATH\n+ENV MPICC=${MPI_ROOT}/bin/mpicc\n+ENV OPAL_PREFIX=${MPI_ROOT}\n+ENV RDMAV_FORK_SAFE=1\n+ENV FI_EFA_USE_DEVICE_RDMA=1\n+ENV RDMA_CORE_ROOT=/opt/habanalabs/rdma-core/src\n+ENV RDMA_CORE_LIB=${RDMA_CORE_ROOT}/build/lib", - "comment_created_at": "2024-08-21T12:48:30+00:00", - "comment_author": "tkatila", - "comment_body": "sure, I'll move these so that they are next to the relevant RUNs", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1720484378", - "pr_number": 7635, - "pr_file": "components/example-notebook-servers/jupyter-pytorch-gaudi/Dockerfile", - "created_at": "2024-08-17T00:13:54+00:00", - "commented_code": "#\n# NOTE: Use the Makefiles to build this image correctly.\n#\n\nARG BASE_IMG=\nFROM $BASE_IMG\n\n# Content below is extracted from scripts/Dockerfiles here:\n# https://github.com/HabanaAI/Setup_and_Install/tree/main/dockerfiles/base\n# https://github.com/HabanaAI/Setup_and_Install/tree/main/dockerfiles/pytorch\n\n# version details\nARG ARTIFACTORY_URL=vault.habana.ai\nARG VERSION=1.16.2\nARG REVISION=2\nARG PYTORCH_VERSION=2.2.2\nARG OS_NUMBER=2204\n\n# runtime variables\nENV DEBIAN_FRONTEND=noninteractive\nENV GC_KERNEL_PATH=/usr/lib/habanalabs/libtpc_kernels.so\nENV HABANA_LOGS=/var/log/habana_logs/\nENV HABANA_SCAL_BIN_PATH=/opt/habanalabs/engines_fw\nENV HABANA_PLUGINS_LIB_PATH=/opt/habanalabs/habana_plugins\nENV TCMALLOC_LARGE_ALLOC_REPORT_THRESHOLD=7516192768\nENV PYTHONPATH=/home/$NB_UID:/usr/lib/habanalabs/\n\n# There is no need to store pip installation files inside docker image\nENV PIP_NO_CACHE_DIR=on\nENV PIP_DISABLE_PIP_VERSION_CHECK=1\n\n# libfabric and scaling variables\nENV LIBFABRIC_VERSION=\"1.20.0\"", - "repo_full_name": "kubeflow/kubeflow", - "discussion_comments": [ - { - "comment_id": "1720484378", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 7635, - "pr_file": "components/example-notebook-servers/jupyter-pytorch-gaudi/Dockerfile", - "discussion_id": "1720484378", - "commented_code": "@@ -0,0 +1,143 @@\n+#\n+# NOTE: Use the Makefiles to build this image correctly.\n+#\n+\n+ARG BASE_IMG=\n+FROM $BASE_IMG\n+\n+# Content below is extracted from scripts/Dockerfiles here:\n+# https://github.com/HabanaAI/Setup_and_Install/tree/main/dockerfiles/base\n+# https://github.com/HabanaAI/Setup_and_Install/tree/main/dockerfiles/pytorch\n+\n+# version details\n+ARG ARTIFACTORY_URL=vault.habana.ai\n+ARG VERSION=1.16.2\n+ARG REVISION=2\n+ARG PYTORCH_VERSION=2.2.2\n+ARG OS_NUMBER=2204\n+\n+# runtime variables\n+ENV DEBIAN_FRONTEND=noninteractive\n+ENV GC_KERNEL_PATH=/usr/lib/habanalabs/libtpc_kernels.so\n+ENV HABANA_LOGS=/var/log/habana_logs/\n+ENV HABANA_SCAL_BIN_PATH=/opt/habanalabs/engines_fw\n+ENV HABANA_PLUGINS_LIB_PATH=/opt/habanalabs/habana_plugins\n+ENV TCMALLOC_LARGE_ALLOC_REPORT_THRESHOLD=7516192768\n+ENV PYTHONPATH=/home/$NB_UID:/usr/lib/habanalabs/\n+\n+# There is no need to store pip installation files inside docker image\n+ENV PIP_NO_CACHE_DIR=on\n+ENV PIP_DISABLE_PIP_VERSION_CHECK=1\n+\n+# libfabric and scaling variables\n+ENV LIBFABRIC_VERSION=\"1.20.0\"", - "comment_created_at": "2024-08-17T00:13:54+00:00", - "comment_author": "thesuperzapper", - "comment_body": "Is it possible to factor this into an `ARG` rather than an `ENV`, as we do for all other versions?", - "pr_file_module": null - }, - { - "comment_id": "1724977244", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 7635, - "pr_file": "components/example-notebook-servers/jupyter-pytorch-gaudi/Dockerfile", - "discussion_id": "1720484378", - "commented_code": "@@ -0,0 +1,143 @@\n+#\n+# NOTE: Use the Makefiles to build this image correctly.\n+#\n+\n+ARG BASE_IMG=\n+FROM $BASE_IMG\n+\n+# Content below is extracted from scripts/Dockerfiles here:\n+# https://github.com/HabanaAI/Setup_and_Install/tree/main/dockerfiles/base\n+# https://github.com/HabanaAI/Setup_and_Install/tree/main/dockerfiles/pytorch\n+\n+# version details\n+ARG ARTIFACTORY_URL=vault.habana.ai\n+ARG VERSION=1.16.2\n+ARG REVISION=2\n+ARG PYTORCH_VERSION=2.2.2\n+ARG OS_NUMBER=2204\n+\n+# runtime variables\n+ENV DEBIAN_FRONTEND=noninteractive\n+ENV GC_KERNEL_PATH=/usr/lib/habanalabs/libtpc_kernels.so\n+ENV HABANA_LOGS=/var/log/habana_logs/\n+ENV HABANA_SCAL_BIN_PATH=/opt/habanalabs/engines_fw\n+ENV HABANA_PLUGINS_LIB_PATH=/opt/habanalabs/habana_plugins\n+ENV TCMALLOC_LARGE_ALLOC_REPORT_THRESHOLD=7516192768\n+ENV PYTHONPATH=/home/$NB_UID:/usr/lib/habanalabs/\n+\n+# There is no need to store pip installation files inside docker image\n+ENV PIP_NO_CACHE_DIR=on\n+ENV PIP_DISABLE_PIP_VERSION_CHECK=1\n+\n+# libfabric and scaling variables\n+ENV LIBFABRIC_VERSION=\"1.20.0\"", - "comment_created_at": "2024-08-21T12:35:53+00:00", - "comment_author": "tkatila", - "comment_body": "yes, I'll change it.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1720486507", - "pr_number": 7635, - "pr_file": "components/example-notebook-servers/jupyter-pytorch-gaudi/Dockerfile", - "created_at": "2024-08-17T00:18:49+00:00", - "commented_code": "#\n# NOTE: Use the Makefiles to build this image correctly.\n#\n\nARG BASE_IMG=\nFROM $BASE_IMG\n\n# Content below is extracted from scripts/Dockerfiles here:\n# https://github.com/HabanaAI/Setup_and_Install/tree/main/dockerfiles/base\n# https://github.com/HabanaAI/Setup_and_Install/tree/main/dockerfiles/pytorch\n\n# version details\nARG ARTIFACTORY_URL=vault.habana.ai\nARG VERSION=1.16.2\nARG REVISION=2\nARG PYTORCH_VERSION=2.2.2\nARG OS_NUMBER=2204\n\n# runtime variables\nENV DEBIAN_FRONTEND=noninteractive\nENV GC_KERNEL_PATH=/usr/lib/habanalabs/libtpc_kernels.so\nENV HABANA_LOGS=/var/log/habana_logs/\nENV HABANA_SCAL_BIN_PATH=/opt/habanalabs/engines_fw\nENV HABANA_PLUGINS_LIB_PATH=/opt/habanalabs/habana_plugins\nENV TCMALLOC_LARGE_ALLOC_REPORT_THRESHOLD=7516192768\nENV PYTHONPATH=/home/$NB_UID:/usr/lib/habanalabs/", - "repo_full_name": "kubeflow/kubeflow", - "discussion_comments": [ - { - "comment_id": "1720486507", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 7635, - "pr_file": "components/example-notebook-servers/jupyter-pytorch-gaudi/Dockerfile", - "discussion_id": "1720486507", - "commented_code": "@@ -0,0 +1,143 @@\n+#\n+# NOTE: Use the Makefiles to build this image correctly.\n+#\n+\n+ARG BASE_IMG=\n+FROM $BASE_IMG\n+\n+# Content below is extracted from scripts/Dockerfiles here:\n+# https://github.com/HabanaAI/Setup_and_Install/tree/main/dockerfiles/base\n+# https://github.com/HabanaAI/Setup_and_Install/tree/main/dockerfiles/pytorch\n+\n+# version details\n+ARG ARTIFACTORY_URL=vault.habana.ai\n+ARG VERSION=1.16.2\n+ARG REVISION=2\n+ARG PYTORCH_VERSION=2.2.2\n+ARG OS_NUMBER=2204\n+\n+# runtime variables\n+ENV DEBIAN_FRONTEND=noninteractive\n+ENV GC_KERNEL_PATH=/usr/lib/habanalabs/libtpc_kernels.so\n+ENV HABANA_LOGS=/var/log/habana_logs/\n+ENV HABANA_SCAL_BIN_PATH=/opt/habanalabs/engines_fw\n+ENV HABANA_PLUGINS_LIB_PATH=/opt/habanalabs/habana_plugins\n+ENV TCMALLOC_LARGE_ALLOC_REPORT_THRESHOLD=7516192768\n+ENV PYTHONPATH=/home/$NB_UID:/usr/lib/habanalabs/", - "comment_created_at": "2024-08-17T00:18:49+00:00", - "comment_author": "thesuperzapper", - "comment_body": "Is setting `PYTHONPATH` still necessary when we are sourcing `/etc/profile.d/habanalabs.sh` in bashrc/profile?\r\n\r\nI only ask because having `PYTHONPATH` set can break things for end users in unexpected ways.\r\n\r\nIf it turns out to still be necessary, please move this it closer to the `RUN` where we install the files under `/usr/lib/habanalabs/`.", - "pr_file_module": null - }, - { - "comment_id": "1724976985", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 7635, - "pr_file": "components/example-notebook-servers/jupyter-pytorch-gaudi/Dockerfile", - "discussion_id": "1720486507", - "commented_code": "@@ -0,0 +1,143 @@\n+#\n+# NOTE: Use the Makefiles to build this image correctly.\n+#\n+\n+ARG BASE_IMG=\n+FROM $BASE_IMG\n+\n+# Content below is extracted from scripts/Dockerfiles here:\n+# https://github.com/HabanaAI/Setup_and_Install/tree/main/dockerfiles/base\n+# https://github.com/HabanaAI/Setup_and_Install/tree/main/dockerfiles/pytorch\n+\n+# version details\n+ARG ARTIFACTORY_URL=vault.habana.ai\n+ARG VERSION=1.16.2\n+ARG REVISION=2\n+ARG PYTORCH_VERSION=2.2.2\n+ARG OS_NUMBER=2204\n+\n+# runtime variables\n+ENV DEBIAN_FRONTEND=noninteractive\n+ENV GC_KERNEL_PATH=/usr/lib/habanalabs/libtpc_kernels.so\n+ENV HABANA_LOGS=/var/log/habana_logs/\n+ENV HABANA_SCAL_BIN_PATH=/opt/habanalabs/engines_fw\n+ENV HABANA_PLUGINS_LIB_PATH=/opt/habanalabs/habana_plugins\n+ENV TCMALLOC_LARGE_ALLOC_REPORT_THRESHOLD=7516192768\n+ENV PYTHONPATH=/home/$NB_UID:/usr/lib/habanalabs/", - "comment_created_at": "2024-08-21T12:35:42+00:00", - "comment_author": "tkatila", - "comment_body": "PYTHONPATH is not set in the `habanalabs.sh` but I'll need to double check if it's really needed.", - "pr_file_module": null - }, - { - "comment_id": "1728862104", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 7635, - "pr_file": "components/example-notebook-servers/jupyter-pytorch-gaudi/Dockerfile", - "discussion_id": "1720486507", - "commented_code": "@@ -0,0 +1,143 @@\n+#\n+# NOTE: Use the Makefiles to build this image correctly.\n+#\n+\n+ARG BASE_IMG=\n+FROM $BASE_IMG\n+\n+# Content below is extracted from scripts/Dockerfiles here:\n+# https://github.com/HabanaAI/Setup_and_Install/tree/main/dockerfiles/base\n+# https://github.com/HabanaAI/Setup_and_Install/tree/main/dockerfiles/pytorch\n+\n+# version details\n+ARG ARTIFACTORY_URL=vault.habana.ai\n+ARG VERSION=1.16.2\n+ARG REVISION=2\n+ARG PYTORCH_VERSION=2.2.2\n+ARG OS_NUMBER=2204\n+\n+# runtime variables\n+ENV DEBIAN_FRONTEND=noninteractive\n+ENV GC_KERNEL_PATH=/usr/lib/habanalabs/libtpc_kernels.so\n+ENV HABANA_LOGS=/var/log/habana_logs/\n+ENV HABANA_SCAL_BIN_PATH=/opt/habanalabs/engines_fw\n+ENV HABANA_PLUGINS_LIB_PATH=/opt/habanalabs/habana_plugins\n+ENV TCMALLOC_LARGE_ALLOC_REPORT_THRESHOLD=7516192768\n+ENV PYTHONPATH=/home/$NB_UID:/usr/lib/habanalabs/", - "comment_created_at": "2024-08-23T12:04:00+00:00", - "comment_author": "tkatila", - "comment_body": "PYTHONPATH was necessary for normal functionality.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1720488961", - "pr_number": 7635, - "pr_file": "components/example-notebook-servers/jupyter-pytorch-gaudi/Dockerfile", - "created_at": "2024-08-17T00:27:41+00:00", - "commented_code": "#\n# NOTE: Use the Makefiles to build this image correctly.\n#\n\nARG BASE_IMG=\nFROM $BASE_IMG\n\n# Content below is extracted from scripts/Dockerfiles here:\n# https://github.com/HabanaAI/Setup_and_Install/tree/main/dockerfiles/base\n# https://github.com/HabanaAI/Setup_and_Install/tree/main/dockerfiles/pytorch\n\n# version details\nARG ARTIFACTORY_URL=vault.habana.ai\nARG VERSION=1.16.2\nARG REVISION=2\nARG PYTORCH_VERSION=2.2.2\nARG OS_NUMBER=2204\n\n# runtime variables\nENV DEBIAN_FRONTEND=noninteractive\nENV GC_KERNEL_PATH=/usr/lib/habanalabs/libtpc_kernels.so\nENV HABANA_LOGS=/var/log/habana_logs/\nENV HABANA_SCAL_BIN_PATH=/opt/habanalabs/engines_fw\nENV HABANA_PLUGINS_LIB_PATH=/opt/habanalabs/habana_plugins\nENV TCMALLOC_LARGE_ALLOC_REPORT_THRESHOLD=7516192768", - "repo_full_name": "kubeflow/kubeflow", - "discussion_comments": [ - { - "comment_id": "1720488961", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 7635, - "pr_file": "components/example-notebook-servers/jupyter-pytorch-gaudi/Dockerfile", - "discussion_id": "1720488961", - "commented_code": "@@ -0,0 +1,143 @@\n+#\n+# NOTE: Use the Makefiles to build this image correctly.\n+#\n+\n+ARG BASE_IMG=\n+FROM $BASE_IMG\n+\n+# Content below is extracted from scripts/Dockerfiles here:\n+# https://github.com/HabanaAI/Setup_and_Install/tree/main/dockerfiles/base\n+# https://github.com/HabanaAI/Setup_and_Install/tree/main/dockerfiles/pytorch\n+\n+# version details\n+ARG ARTIFACTORY_URL=vault.habana.ai\n+ARG VERSION=1.16.2\n+ARG REVISION=2\n+ARG PYTORCH_VERSION=2.2.2\n+ARG OS_NUMBER=2204\n+\n+# runtime variables\n+ENV DEBIAN_FRONTEND=noninteractive\n+ENV GC_KERNEL_PATH=/usr/lib/habanalabs/libtpc_kernels.so\n+ENV HABANA_LOGS=/var/log/habana_logs/\n+ENV HABANA_SCAL_BIN_PATH=/opt/habanalabs/engines_fw\n+ENV HABANA_PLUGINS_LIB_PATH=/opt/habanalabs/habana_plugins\n+ENV TCMALLOC_LARGE_ALLOC_REPORT_THRESHOLD=7516192768", - "comment_created_at": "2024-08-17T00:27:41+00:00", - "comment_author": "thesuperzapper", - "comment_body": "These should probably be under a `# configs - habana` section.\r\n\r\nAlso, feel like `TCMALLOC_LARGE_ALLOC_REPORT_THRESHOLD` needs an explanation of what it's for and why it's necessary in that comment.", - "pr_file_module": null - }, - { - "comment_id": "1724974865", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 7635, - "pr_file": "components/example-notebook-servers/jupyter-pytorch-gaudi/Dockerfile", - "discussion_id": "1720488961", - "commented_code": "@@ -0,0 +1,143 @@\n+#\n+# NOTE: Use the Makefiles to build this image correctly.\n+#\n+\n+ARG BASE_IMG=\n+FROM $BASE_IMG\n+\n+# Content below is extracted from scripts/Dockerfiles here:\n+# https://github.com/HabanaAI/Setup_and_Install/tree/main/dockerfiles/base\n+# https://github.com/HabanaAI/Setup_and_Install/tree/main/dockerfiles/pytorch\n+\n+# version details\n+ARG ARTIFACTORY_URL=vault.habana.ai\n+ARG VERSION=1.16.2\n+ARG REVISION=2\n+ARG PYTORCH_VERSION=2.2.2\n+ARG OS_NUMBER=2204\n+\n+# runtime variables\n+ENV DEBIAN_FRONTEND=noninteractive\n+ENV GC_KERNEL_PATH=/usr/lib/habanalabs/libtpc_kernels.so\n+ENV HABANA_LOGS=/var/log/habana_logs/\n+ENV HABANA_SCAL_BIN_PATH=/opt/habanalabs/engines_fw\n+ENV HABANA_PLUGINS_LIB_PATH=/opt/habanalabs/habana_plugins\n+ENV TCMALLOC_LARGE_ALLOC_REPORT_THRESHOLD=7516192768", - "comment_created_at": "2024-08-21T12:34:15+00:00", - "comment_author": "tkatila", - "comment_body": "Top four variables are actually set in habanalab.sh which is sourced, so I'll drop them from the Dockerfile.\r\nThe TCMALLOC env variable is obsolete and I'll remove it.", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/kubeflow-externalize-configuration-parameters.json b/_reviewers/kubeflow-externalize-configuration-parameters.json new file mode 100644 index 0000000..1951baf --- /dev/null +++ b/_reviewers/kubeflow-externalize-configuration-parameters.json @@ -0,0 +1,150 @@ +[ + { + "discussion_id": "1373281958", + "pr_number": 7371, + "pr_file": "components/proposals/20231023-profile-controller-custom-resources.md", + "created_at": "2023-10-26T14:33:23+00:00", + "commented_code": "# Profile Controller Custom Resources\n\n**Authors**: Omri Shiv ([@OmriShiv](https://github.com/OmriShiv))\n\n## Goal\n\nFirst and foremost, the goal of creating these custom resources is to ease the burden of creating common Kubeflow\ncomponents. Users should not need to manually create `RoleBinding`s, Istio `AuthorizationPolicy`s, and others when the\ngoal is to give a user access to a namespace. By creating higher level resources, one small, purposeful resource can\ncreate the Kubernetes objects needed to support the task. This purpose enables some sub-goals:\n\nBy creating simple custom resources, these files can be checked into a GitOps system for automated deploying and\nreconciliation. There is auditability over who create a resource and a source of truth.\n\nAdditionally, this facilitates a security goal of allowing much easier clean up of resources. By adding ownership to the\nsub-resources, deleting the parents object will cascade delete the created resources. It can also serve the task of\nregenerating secrets in the event that a user has left the company and secrets need to be rotated. This can also be\nextended to trigger an external hook that may clean up resources in other systems.\n\n## Proposal\n\nThe proposed solution is to add a small set of Custom Resources (described below) to enable the creation of the needed\nsub-resources to facilitate each purpose\n\n## Resources\n\n`profile.yaml`\n\n```yaml\napiVersion: kubeflow.org/v2\nkind: Profile\nmetadata:\n name: ml\nspec:\n resourceQuotaSpec:\n hard:\n cpu: \"2\"\n memory: 2Gi\n requests.nvidia.com/gpu: \"1\"\n persistentvolumeclaims: \"1\"\n requests.storage: \"5Gi\"\n```\n\nA slightly simplified version of the current Kubeflow Profile. `resourceQuotaSpec` is optional. This will create\na `kubeflow-ml` profile, but should be able to look up the Kubeflow namespace `prefix` and adjust based on that (for\ninstance `kf-ml`)\n\n`service_account.yaml`\n\n```yaml\napiVersion: kubeflow.org/v2\nkind: ServiceAccount\nmetadata:\n name: omri-sa\n namespace: kubeflow-ml\nspec:\n role: edit\n```\n\nThis will create a `omri-sa` service account in the `kubeflow-ml` namespace with `edit` role. Internally, this will\ncreate:\n\n- ServiceAccount\n- Secret (for the service account)\n- Role (To get a token)\n- RoleBinding (For the token role)\n- ClusterRoleBinding (For the Kubeflow role)\n\n`user_permission.yaml`\n\n```yaml\napiVersion: kubeflow.org/v2\nkind: Permission", + "repo_full_name": "kubeflow/kubeflow", + "discussion_comments": [ + { + "comment_id": "1373281958", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 7371, + "pr_file": "components/proposals/20231023-profile-controller-custom-resources.md", + "discussion_id": "1373281958", + "commented_code": "@@ -0,0 +1,110 @@\n+# Profile Controller Custom Resources\n+\n+**Authors**: Omri Shiv ([@OmriShiv](https://github.com/OmriShiv))\n+\n+## Goal\n+\n+First and foremost, the goal of creating these custom resources is to ease the burden of creating common Kubeflow\n+components. Users should not need to manually create `RoleBinding`s, Istio `AuthorizationPolicy`s, and others when the\n+goal is to give a user access to a namespace. By creating higher level resources, one small, purposeful resource can\n+create the Kubernetes objects needed to support the task. This purpose enables some sub-goals:\n+\n+By creating simple custom resources, these files can be checked into a GitOps system for automated deploying and\n+reconciliation. There is auditability over who create a resource and a source of truth.\n+\n+Additionally, this facilitates a security goal of allowing much easier clean up of resources. By adding ownership to the\n+sub-resources, deleting the parents object will cascade delete the created resources. It can also serve the task of\n+regenerating secrets in the event that a user has left the company and secrets need to be rotated. This can also be\n+extended to trigger an external hook that may clean up resources in other systems.\n+\n+## Proposal\n+\n+The proposed solution is to add a small set of Custom Resources (described below) to enable the creation of the needed\n+sub-resources to facilitate each purpose\n+\n+## Resources\n+\n+`profile.yaml`\n+\n+```yaml\n+apiVersion: kubeflow.org/v2\n+kind: Profile\n+metadata:\n+ name: ml\n+spec:\n+ resourceQuotaSpec:\n+ hard:\n+ cpu: \"2\"\n+ memory: 2Gi\n+ requests.nvidia.com/gpu: \"1\"\n+ persistentvolumeclaims: \"1\"\n+ requests.storage: \"5Gi\"\n+```\n+\n+A slightly simplified version of the current Kubeflow Profile. `resourceQuotaSpec` is optional. This will create\n+a `kubeflow-ml` profile, but should be able to look up the Kubeflow namespace `prefix` and adjust based on that (for\n+instance `kf-ml`)\n+\n+`service_account.yaml`\n+\n+```yaml\n+apiVersion: kubeflow.org/v2\n+kind: ServiceAccount\n+metadata:\n+ name: omri-sa\n+ namespace: kubeflow-ml\n+spec:\n+ role: edit\n+```\n+\n+This will create a `omri-sa` service account in the `kubeflow-ml` namespace with `edit` role. Internally, this will\n+create:\n+\n+- ServiceAccount\n+- Secret (for the service account)\n+- Role (To get a token)\n+- RoleBinding (For the token role)\n+- ClusterRoleBinding (For the Kubeflow role)\n+\n+`user_permission.yaml`\n+\n+```yaml\n+apiVersion: kubeflow.org/v2\n+kind: Permission", + "comment_created_at": "2023-10-26T14:33:23+00:00", + "comment_author": "kimwnasptd", + "comment_body": "Also, ultimately we could use these CRs then to list who the contributors in the namespace are from the CentralDashboard?", + "pr_file_module": null + }, + { + "comment_id": "1373319340", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 7371, + "pr_file": "components/proposals/20231023-profile-controller-custom-resources.md", + "discussion_id": "1373281958", + "commented_code": "@@ -0,0 +1,110 @@\n+# Profile Controller Custom Resources\n+\n+**Authors**: Omri Shiv ([@OmriShiv](https://github.com/OmriShiv))\n+\n+## Goal\n+\n+First and foremost, the goal of creating these custom resources is to ease the burden of creating common Kubeflow\n+components. Users should not need to manually create `RoleBinding`s, Istio `AuthorizationPolicy`s, and others when the\n+goal is to give a user access to a namespace. By creating higher level resources, one small, purposeful resource can\n+create the Kubernetes objects needed to support the task. This purpose enables some sub-goals:\n+\n+By creating simple custom resources, these files can be checked into a GitOps system for automated deploying and\n+reconciliation. There is auditability over who create a resource and a source of truth.\n+\n+Additionally, this facilitates a security goal of allowing much easier clean up of resources. By adding ownership to the\n+sub-resources, deleting the parents object will cascade delete the created resources. It can also serve the task of\n+regenerating secrets in the event that a user has left the company and secrets need to be rotated. This can also be\n+extended to trigger an external hook that may clean up resources in other systems.\n+\n+## Proposal\n+\n+The proposed solution is to add a small set of Custom Resources (described below) to enable the creation of the needed\n+sub-resources to facilitate each purpose\n+\n+## Resources\n+\n+`profile.yaml`\n+\n+```yaml\n+apiVersion: kubeflow.org/v2\n+kind: Profile\n+metadata:\n+ name: ml\n+spec:\n+ resourceQuotaSpec:\n+ hard:\n+ cpu: \"2\"\n+ memory: 2Gi\n+ requests.nvidia.com/gpu: \"1\"\n+ persistentvolumeclaims: \"1\"\n+ requests.storage: \"5Gi\"\n+```\n+\n+A slightly simplified version of the current Kubeflow Profile. `resourceQuotaSpec` is optional. This will create\n+a `kubeflow-ml` profile, but should be able to look up the Kubeflow namespace `prefix` and adjust based on that (for\n+instance `kf-ml`)\n+\n+`service_account.yaml`\n+\n+```yaml\n+apiVersion: kubeflow.org/v2\n+kind: ServiceAccount\n+metadata:\n+ name: omri-sa\n+ namespace: kubeflow-ml\n+spec:\n+ role: edit\n+```\n+\n+This will create a `omri-sa` service account in the `kubeflow-ml` namespace with `edit` role. Internally, this will\n+create:\n+\n+- ServiceAccount\n+- Secret (for the service account)\n+- Role (To get a token)\n+- RoleBinding (For the token role)\n+- ClusterRoleBinding (For the Kubeflow role)\n+\n+`user_permission.yaml`\n+\n+```yaml\n+apiVersion: kubeflow.org/v2\n+kind: Permission", + "comment_created_at": "2023-10-26T14:57:54+00:00", + "comment_author": "omrishiv", + "comment_body": "That would be a really nice extension. Maybe a facility to use the UI to ultimately create these, if the admin allows it. One other thing we haven't talked about is the `admin` role, which today, I have a Kyverno policy I manually apply to each admin to create `Permission`s in each `kubeflow-` namespace. I would love to see if we can handle this properly without kyverno. The admin role could create this policy as a first step, but it would be nice to be properly handled internally. ", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1373376622", + "pr_number": 7371, + "pr_file": "components/proposals/20231023-profile-controller-custom-resources.md", + "created_at": "2023-10-26T15:38:54+00:00", + "commented_code": "# Profile Controller Custom Resources\n\n**Authors**: Omri Shiv ([@OmriShiv](https://github.com/OmriShiv))\n\n## Goal\n\nFirst and foremost, the goal of creating these custom resources is to ease the burden of creating common Kubeflow\ncomponents. Users should not need to manually create `RoleBinding`s, Istio `AuthorizationPolicy`s, and others when the\ngoal is to give a user access to a namespace. By creating higher level resources, one small, purposeful resource can\ncreate the Kubernetes objects needed to support the task. This purpose enables some sub-goals:\n\nBy creating simple custom resources, these files can be checked into a GitOps system for automated deploying and\nreconciliation. There is auditability over who create a resource and a source of truth.\n\nAdditionally, this facilitates a security goal of allowing much easier clean up of resources. By adding ownership to the\nsub-resources, deleting the parents object will cascade delete the created resources. It can also serve the task of\nregenerating secrets in the event that a user has left the company and secrets need to be rotated. This can also be\nextended to trigger an external hook that may clean up resources in other systems.\n\n## Proposal\n\nThe proposed solution is to add a small set of Custom Resources (described below) to enable the creation of the needed\nsub-resources to facilitate each purpose\n\n## Resources\n\n`profile.yaml`\n\n```yaml\napiVersion: kubeflow.org/v2\nkind: Profile\nmetadata:\n name: ml\nspec:\n resourceQuotaSpec:\n hard:\n cpu: \"2\"\n memory: 2Gi\n requests.nvidia.com/gpu: \"1\"\n persistentvolumeclaims: \"1\"\n requests.storage: \"5Gi\"\n```\n\nA slightly simplified version of the current Kubeflow Profile. `resourceQuotaSpec` is optional. This will create\na `kubeflow-ml` profile, but should be able to look up the Kubeflow namespace `prefix` and adjust based on that (for\ninstance `kf-ml`)", + "repo_full_name": "kubeflow/kubeflow", + "discussion_comments": [ + { + "comment_id": "1373376622", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 7371, + "pr_file": "components/proposals/20231023-profile-controller-custom-resources.md", + "discussion_id": "1373376622", + "commented_code": "@@ -0,0 +1,110 @@\n+# Profile Controller Custom Resources\n+\n+**Authors**: Omri Shiv ([@OmriShiv](https://github.com/OmriShiv))\n+\n+## Goal\n+\n+First and foremost, the goal of creating these custom resources is to ease the burden of creating common Kubeflow\n+components. Users should not need to manually create `RoleBinding`s, Istio `AuthorizationPolicy`s, and others when the\n+goal is to give a user access to a namespace. By creating higher level resources, one small, purposeful resource can\n+create the Kubernetes objects needed to support the task. This purpose enables some sub-goals:\n+\n+By creating simple custom resources, these files can be checked into a GitOps system for automated deploying and\n+reconciliation. There is auditability over who create a resource and a source of truth.\n+\n+Additionally, this facilitates a security goal of allowing much easier clean up of resources. By adding ownership to the\n+sub-resources, deleting the parents object will cascade delete the created resources. It can also serve the task of\n+regenerating secrets in the event that a user has left the company and secrets need to be rotated. This can also be\n+extended to trigger an external hook that may clean up resources in other systems.\n+\n+## Proposal\n+\n+The proposed solution is to add a small set of Custom Resources (described below) to enable the creation of the needed\n+sub-resources to facilitate each purpose\n+\n+## Resources\n+\n+`profile.yaml`\n+\n+```yaml\n+apiVersion: kubeflow.org/v2\n+kind: Profile\n+metadata:\n+ name: ml\n+spec:\n+ resourceQuotaSpec:\n+ hard:\n+ cpu: \"2\"\n+ memory: 2Gi\n+ requests.nvidia.com/gpu: \"1\"\n+ persistentvolumeclaims: \"1\"\n+ requests.storage: \"5Gi\"\n+```\n+\n+A slightly simplified version of the current Kubeflow Profile. `resourceQuotaSpec` is optional. This will create\n+a `kubeflow-ml` profile, but should be able to look up the Kubeflow namespace `prefix` and adjust based on that (for\n+instance `kf-ml`)\n+", + "comment_created_at": "2023-10-26T15:38:54+00:00", + "comment_author": "TobiasGoerke", + "comment_body": "While we're extending the profile CRD: what do you think about adding an option for a LimitRange besides the ResourceQuota?", + "pr_file_module": null + }, + { + "comment_id": "1373610658", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 7371, + "pr_file": "components/proposals/20231023-profile-controller-custom-resources.md", + "discussion_id": "1373376622", + "commented_code": "@@ -0,0 +1,110 @@\n+# Profile Controller Custom Resources\n+\n+**Authors**: Omri Shiv ([@OmriShiv](https://github.com/OmriShiv))\n+\n+## Goal\n+\n+First and foremost, the goal of creating these custom resources is to ease the burden of creating common Kubeflow\n+components. Users should not need to manually create `RoleBinding`s, Istio `AuthorizationPolicy`s, and others when the\n+goal is to give a user access to a namespace. By creating higher level resources, one small, purposeful resource can\n+create the Kubernetes objects needed to support the task. This purpose enables some sub-goals:\n+\n+By creating simple custom resources, these files can be checked into a GitOps system for automated deploying and\n+reconciliation. There is auditability over who create a resource and a source of truth.\n+\n+Additionally, this facilitates a security goal of allowing much easier clean up of resources. By adding ownership to the\n+sub-resources, deleting the parents object will cascade delete the created resources. It can also serve the task of\n+regenerating secrets in the event that a user has left the company and secrets need to be rotated. This can also be\n+extended to trigger an external hook that may clean up resources in other systems.\n+\n+## Proposal\n+\n+The proposed solution is to add a small set of Custom Resources (described below) to enable the creation of the needed\n+sub-resources to facilitate each purpose\n+\n+## Resources\n+\n+`profile.yaml`\n+\n+```yaml\n+apiVersion: kubeflow.org/v2\n+kind: Profile\n+metadata:\n+ name: ml\n+spec:\n+ resourceQuotaSpec:\n+ hard:\n+ cpu: \"2\"\n+ memory: 2Gi\n+ requests.nvidia.com/gpu: \"1\"\n+ persistentvolumeclaims: \"1\"\n+ requests.storage: \"5Gi\"\n+```\n+\n+A slightly simplified version of the current Kubeflow Profile. `resourceQuotaSpec` is optional. This will create\n+a `kubeflow-ml` profile, but should be able to look up the Kubeflow namespace `prefix` and adjust based on that (for\n+instance `kf-ml`)\n+", + "comment_created_at": "2023-10-26T18:32:42+00:00", + "comment_author": "omrishiv", + "comment_body": "This is a great addition. We currently do it using Kyverno, but would be nice to be handled internally", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1185621732", + "pr_number": 7123, + "pr_file": "conformance/1.7/README.md", + "created_at": "2023-05-05T00:34:25+00:00", + "commented_code": "# Kubeflow conformance program (WIP)\n\nBefore running the conformance tests, you need to configure kubectl default context to point to the k8s cluster that is hosting Kubeflow.\n\nTODO: Make the kubeflow namespace configurable.", + "repo_full_name": "kubeflow/kubeflow", + "discussion_comments": [ + { + "comment_id": "1185621732", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 7123, + "pr_file": "conformance/1.7/README.md", + "discussion_id": "1185621732", + "commented_code": "@@ -0,0 +1,27 @@\n+# Kubeflow conformance program (WIP)\n+\n+Before running the conformance tests, you need to configure kubectl default context to point to the k8s cluster that is hosting Kubeflow.\n+\n+TODO: Make the kubeflow namespace configurable.", + "comment_created_at": "2023-05-05T00:34:25+00:00", + "comment_author": "gkcalat", + "comment_body": "NIT: move this out of `README.md`. `Makefile`, maybe?", + "pr_file_module": null + }, + { + "comment_id": "1186507328", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 7123, + "pr_file": "conformance/1.7/README.md", + "discussion_id": "1185621732", + "commented_code": "@@ -0,0 +1,27 @@\n+# Kubeflow conformance program (WIP)\n+\n+Before running the conformance tests, you need to configure kubectl default context to point to the k8s cluster that is hosting Kubeflow.\n+\n+TODO: Make the kubeflow namespace configurable.", + "comment_created_at": "2023-05-05T21:18:38+00:00", + "comment_author": "james-jwu", + "comment_body": "Removed. The KF service namespace was made configurable.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "596656576", + "pr_number": 5694, + "pr_file": "components/example-notebook-servers/README.md", + "created_at": "2021-03-18T08:49:21+00:00", + "commented_code": "# Example Notebook Servers\n\n**DISCLAIMER:** The Notebooks Working Group provides these Dockerfiles and\ntheir images as examples only, they are not tested with our CI/CD pipelines\nand are not certified to work in every situation. As such, issues related to\nthese images will be dealt with in a best efforts approach. The Notebooks\nWorking Group will work on officially supporting some of these Notebook\nServer images in the near future. If you do encounter a problem in one of\nthese images, contributions and issue reports are greatly appreciated.\n\n## Introduction\n\nThis directory contains Dockerfiles that are intended to show users how to\ncreate images for various types of Notebook Servers that can run on Kubeflow.\nThere is a common base images that starts from Ubuntu 20.04, adds some\ndependencies and common utilities from APT, installs `kubectl`, creates the\nuser account for the downstream images and adds the\n[S6-overlay](https://github.com/just-containers/s6-overlay) for the init system.\nFrom this base image, images for Jupyter, RStudio and Visual Studio Code\n(through [code-server]([https://github.com/cdr/code-server)) are created.\nFrom the Jupyter image, images are created that show users how to install\nTensorFlow and PyTorch both with and without support for NVIDIA GPUs.\nIn turn, these images are extended with an opinionated set of common tools\nand packages to help new users to quickly get started with Kubeflow.\n\n**Lineage of the provided example Notebook Server Images:**\n\n![Flow Chart of example Notebook Servers](./kubeflow-example-notebook-servers.png)\n\n## Creating custom Notebook Server images\n\nBecause the needs of users varies greatly, the provided images were created to\nbe easily extended by using the [S6-overlay](https://github.com/just-containers/s6-overlay)\ninit system and including [Conda](https://github.com/conda-forge/miniforge).\n\n### Installing packages\n\nThe most common customization to Notebook Server images will be to install\na selection of packages specific that will be needed in the Notebook Server.\nUsers can install packages in a running Notebook Server using `pip install ...`\nor `conda install ...`. However, packages installed in this manner will not\npersist if the Notebook Server is stopped or the Pod is restarted.\nWhile it is possible to install Python packages in the user's home directory\nby using `pip install --user ...`, this can cause unexpected behaviour if the\nworkspace volume is later used in a different Notebook Server, as the packages\npresent in the home directory might be used rather than the ones present in the\nNotebook Server's image. For this reason it is recommended to create custom", + "repo_full_name": "kubeflow/kubeflow", + "discussion_comments": [ + { + "comment_id": "596656576", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 5694, + "pr_file": "components/example-notebook-servers/README.md", + "discussion_id": "596656576", + "commented_code": "@@ -0,0 +1,139 @@\n+# Example Notebook Servers\n+\n+**DISCLAIMER:** The Notebooks Working Group provides these Dockerfiles and\n+their images as examples only, they are not tested with our CI/CD pipelines\n+and are not certified to work in every situation. As such, issues related to\n+these images will be dealt with in a best efforts approach. The Notebooks\n+Working Group will work on officially supporting some of these Notebook\n+Server images in the near future. If you do encounter a problem in one of\n+these images, contributions and issue reports are greatly appreciated.\n+\n+## Introduction\n+\n+This directory contains Dockerfiles that are intended to show users how to\n+create images for various types of Notebook Servers that can run on Kubeflow.\n+There is a common base images that starts from Ubuntu 20.04, adds some\n+dependencies and common utilities from APT, installs `kubectl`, creates the\n+user account for the downstream images and adds the\n+[S6-overlay](https://github.com/just-containers/s6-overlay) for the init system.\n+From this base image, images for Jupyter, RStudio and Visual Studio Code\n+(through [code-server]([https://github.com/cdr/code-server)) are created.\n+From the Jupyter image, images are created that show users how to install\n+TensorFlow and PyTorch both with and without support for NVIDIA GPUs.\n+In turn, these images are extended with an opinionated set of common tools\n+and packages to help new users to quickly get started with Kubeflow.\n+\n+**Lineage of the provided example Notebook Server Images:**\n+\n+![Flow Chart of example Notebook Servers](./kubeflow-example-notebook-servers.png)\n+\n+## Creating custom Notebook Server images\n+\n+Because the needs of users varies greatly, the provided images were created to\n+be easily extended by using the [S6-overlay](https://github.com/just-containers/s6-overlay)\n+init system and including [Conda](https://github.com/conda-forge/miniforge).\n+\n+### Installing packages\n+\n+The most common customization to Notebook Server images will be to install\n+a selection of packages specific that will be needed in the Notebook Server.\n+Users can install packages in a running Notebook Server using `pip install ...`\n+or `conda install ...`. However, packages installed in this manner will not\n+persist if the Notebook Server is stopped or the Pod is restarted.\n+While it is possible to install Python packages in the user's home directory\n+by using `pip install --user ...`, this can cause unexpected behaviour if the\n+workspace volume is later used in a different Notebook Server, as the packages\n+present in the home directory might be used rather than the ones present in the\n+Notebook Server's image. For this reason it is recommended to create custom", + "comment_created_at": "2021-03-18T08:49:21+00:00", + "comment_author": "StefanoFioravanzo", + "comment_body": "I am not sure the discussion about the lifecycle of packages and PVCs is relevant to this document. I would remove it", + "pr_file_module": null + }, + { + "comment_id": "596686760", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 5694, + "pr_file": "components/example-notebook-servers/README.md", + "discussion_id": "596656576", + "commented_code": "@@ -0,0 +1,139 @@\n+# Example Notebook Servers\n+\n+**DISCLAIMER:** The Notebooks Working Group provides these Dockerfiles and\n+their images as examples only, they are not tested with our CI/CD pipelines\n+and are not certified to work in every situation. As such, issues related to\n+these images will be dealt with in a best efforts approach. The Notebooks\n+Working Group will work on officially supporting some of these Notebook\n+Server images in the near future. If you do encounter a problem in one of\n+these images, contributions and issue reports are greatly appreciated.\n+\n+## Introduction\n+\n+This directory contains Dockerfiles that are intended to show users how to\n+create images for various types of Notebook Servers that can run on Kubeflow.\n+There is a common base images that starts from Ubuntu 20.04, adds some\n+dependencies and common utilities from APT, installs `kubectl`, creates the\n+user account for the downstream images and adds the\n+[S6-overlay](https://github.com/just-containers/s6-overlay) for the init system.\n+From this base image, images for Jupyter, RStudio and Visual Studio Code\n+(through [code-server]([https://github.com/cdr/code-server)) are created.\n+From the Jupyter image, images are created that show users how to install\n+TensorFlow and PyTorch both with and without support for NVIDIA GPUs.\n+In turn, these images are extended with an opinionated set of common tools\n+and packages to help new users to quickly get started with Kubeflow.\n+\n+**Lineage of the provided example Notebook Server Images:**\n+\n+![Flow Chart of example Notebook Servers](./kubeflow-example-notebook-servers.png)\n+\n+## Creating custom Notebook Server images\n+\n+Because the needs of users varies greatly, the provided images were created to\n+be easily extended by using the [S6-overlay](https://github.com/just-containers/s6-overlay)\n+init system and including [Conda](https://github.com/conda-forge/miniforge).\n+\n+### Installing packages\n+\n+The most common customization to Notebook Server images will be to install\n+a selection of packages specific that will be needed in the Notebook Server.\n+Users can install packages in a running Notebook Server using `pip install ...`\n+or `conda install ...`. However, packages installed in this manner will not\n+persist if the Notebook Server is stopped or the Pod is restarted.\n+While it is possible to install Python packages in the user's home directory\n+by using `pip install --user ...`, this can cause unexpected behaviour if the\n+workspace volume is later used in a different Notebook Server, as the packages\n+present in the home directory might be used rather than the ones present in the\n+Notebook Server's image. For this reason it is recommended to create custom", + "comment_created_at": "2021-03-18T09:29:57+00:00", + "comment_author": "davidspek", + "comment_body": "While not directly relevant to this document, this very situation has been the cause of issues in the past so I do think this should be stated somewhere. I agree, it is a bit out of place here. I'll leave it in here for the moment until while I think of what to do with it. If needed, it can quickly be removed before merging. \r\n\r\nI'm thinking a `Best practices and considerations` section might be on option (in this README, as well as on the website, which I'm guessing will be mostly copy/paste from here with some formatting changes). ", + "pr_file_module": null + }, + { + "comment_id": "608237698", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 5694, + "pr_file": "components/example-notebook-servers/README.md", + "discussion_id": "596656576", + "commented_code": "@@ -0,0 +1,139 @@\n+# Example Notebook Servers\n+\n+**DISCLAIMER:** The Notebooks Working Group provides these Dockerfiles and\n+their images as examples only, they are not tested with our CI/CD pipelines\n+and are not certified to work in every situation. As such, issues related to\n+these images will be dealt with in a best efforts approach. The Notebooks\n+Working Group will work on officially supporting some of these Notebook\n+Server images in the near future. If you do encounter a problem in one of\n+these images, contributions and issue reports are greatly appreciated.\n+\n+## Introduction\n+\n+This directory contains Dockerfiles that are intended to show users how to\n+create images for various types of Notebook Servers that can run on Kubeflow.\n+There is a common base images that starts from Ubuntu 20.04, adds some\n+dependencies and common utilities from APT, installs `kubectl`, creates the\n+user account for the downstream images and adds the\n+[S6-overlay](https://github.com/just-containers/s6-overlay) for the init system.\n+From this base image, images for Jupyter, RStudio and Visual Studio Code\n+(through [code-server]([https://github.com/cdr/code-server)) are created.\n+From the Jupyter image, images are created that show users how to install\n+TensorFlow and PyTorch both with and without support for NVIDIA GPUs.\n+In turn, these images are extended with an opinionated set of common tools\n+and packages to help new users to quickly get started with Kubeflow.\n+\n+**Lineage of the provided example Notebook Server Images:**\n+\n+![Flow Chart of example Notebook Servers](./kubeflow-example-notebook-servers.png)\n+\n+## Creating custom Notebook Server images\n+\n+Because the needs of users varies greatly, the provided images were created to\n+be easily extended by using the [S6-overlay](https://github.com/just-containers/s6-overlay)\n+init system and including [Conda](https://github.com/conda-forge/miniforge).\n+\n+### Installing packages\n+\n+The most common customization to Notebook Server images will be to install\n+a selection of packages specific that will be needed in the Notebook Server.\n+Users can install packages in a running Notebook Server using `pip install ...`\n+or `conda install ...`. However, packages installed in this manner will not\n+persist if the Notebook Server is stopped or the Pod is restarted.\n+While it is possible to install Python packages in the user's home directory\n+by using `pip install --user ...`, this can cause unexpected behaviour if the\n+workspace volume is later used in a different Notebook Server, as the packages\n+present in the home directory might be used rather than the ones present in the\n+Notebook Server's image. For this reason it is recommended to create custom", + "comment_created_at": "2021-04-06T23:15:38+00:00", + "comment_author": "thesuperzapper", + "comment_body": "While I think this wording can be simplified, we should definitely discuss the implications of how kubeflow mounts PVCs to the home directory for `pip install`", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/kubeflow-externalize-configuration-parameters.md b/_reviewers/kubeflow-externalize-configuration-parameters.md index bbcbe37..79b85a1 100644 --- a/_reviewers/kubeflow-externalize-configuration-parameters.md +++ b/_reviewers/kubeflow-externalize-configuration-parameters.md @@ -41,155 +41,3 @@ Key practices: - Add ownership relationships to resources created from configurations to enable proper cleanup When handling package installation in environments like notebooks, be aware of how configuration choices affect persistence across restarts and different environments. - - -[ - { - "discussion_id": "1373281958", - "pr_number": 7371, - "pr_file": "components/proposals/20231023-profile-controller-custom-resources.md", - "created_at": "2023-10-26T14:33:23+00:00", - "commented_code": "# Profile Controller Custom Resources\n\n**Authors**: Omri Shiv ([@OmriShiv](https://github.com/OmriShiv))\n\n## Goal\n\nFirst and foremost, the goal of creating these custom resources is to ease the burden of creating common Kubeflow\ncomponents. Users should not need to manually create `RoleBinding`s, Istio `AuthorizationPolicy`s, and others when the\ngoal is to give a user access to a namespace. By creating higher level resources, one small, purposeful resource can\ncreate the Kubernetes objects needed to support the task. This purpose enables some sub-goals:\n\nBy creating simple custom resources, these files can be checked into a GitOps system for automated deploying and\nreconciliation. There is auditability over who create a resource and a source of truth.\n\nAdditionally, this facilitates a security goal of allowing much easier clean up of resources. By adding ownership to the\nsub-resources, deleting the parents object will cascade delete the created resources. It can also serve the task of\nregenerating secrets in the event that a user has left the company and secrets need to be rotated. This can also be\nextended to trigger an external hook that may clean up resources in other systems.\n\n## Proposal\n\nThe proposed solution is to add a small set of Custom Resources (described below) to enable the creation of the needed\nsub-resources to facilitate each purpose\n\n## Resources\n\n`profile.yaml`\n\n```yaml\napiVersion: kubeflow.org/v2\nkind: Profile\nmetadata:\n name: ml\nspec:\n resourceQuotaSpec:\n hard:\n cpu: \"2\"\n memory: 2Gi\n requests.nvidia.com/gpu: \"1\"\n persistentvolumeclaims: \"1\"\n requests.storage: \"5Gi\"\n```\n\nA slightly simplified version of the current Kubeflow Profile. `resourceQuotaSpec` is optional. This will create\na `kubeflow-ml` profile, but should be able to look up the Kubeflow namespace `prefix` and adjust based on that (for\ninstance `kf-ml`)\n\n`service_account.yaml`\n\n```yaml\napiVersion: kubeflow.org/v2\nkind: ServiceAccount\nmetadata:\n name: omri-sa\n namespace: kubeflow-ml\nspec:\n role: edit\n```\n\nThis will create a `omri-sa` service account in the `kubeflow-ml` namespace with `edit` role. Internally, this will\ncreate:\n\n- ServiceAccount\n- Secret (for the service account)\n- Role (To get a token)\n- RoleBinding (For the token role)\n- ClusterRoleBinding (For the Kubeflow role)\n\n`user_permission.yaml`\n\n```yaml\napiVersion: kubeflow.org/v2\nkind: Permission", - "repo_full_name": "kubeflow/kubeflow", - "discussion_comments": [ - { - "comment_id": "1373281958", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 7371, - "pr_file": "components/proposals/20231023-profile-controller-custom-resources.md", - "discussion_id": "1373281958", - "commented_code": "@@ -0,0 +1,110 @@\n+# Profile Controller Custom Resources\n+\n+**Authors**: Omri Shiv ([@OmriShiv](https://github.com/OmriShiv))\n+\n+## Goal\n+\n+First and foremost, the goal of creating these custom resources is to ease the burden of creating common Kubeflow\n+components. Users should not need to manually create `RoleBinding`s, Istio `AuthorizationPolicy`s, and others when the\n+goal is to give a user access to a namespace. By creating higher level resources, one small, purposeful resource can\n+create the Kubernetes objects needed to support the task. This purpose enables some sub-goals:\n+\n+By creating simple custom resources, these files can be checked into a GitOps system for automated deploying and\n+reconciliation. There is auditability over who create a resource and a source of truth.\n+\n+Additionally, this facilitates a security goal of allowing much easier clean up of resources. By adding ownership to the\n+sub-resources, deleting the parents object will cascade delete the created resources. It can also serve the task of\n+regenerating secrets in the event that a user has left the company and secrets need to be rotated. This can also be\n+extended to trigger an external hook that may clean up resources in other systems.\n+\n+## Proposal\n+\n+The proposed solution is to add a small set of Custom Resources (described below) to enable the creation of the needed\n+sub-resources to facilitate each purpose\n+\n+## Resources\n+\n+`profile.yaml`\n+\n+```yaml\n+apiVersion: kubeflow.org/v2\n+kind: Profile\n+metadata:\n+ name: ml\n+spec:\n+ resourceQuotaSpec:\n+ hard:\n+ cpu: \"2\"\n+ memory: 2Gi\n+ requests.nvidia.com/gpu: \"1\"\n+ persistentvolumeclaims: \"1\"\n+ requests.storage: \"5Gi\"\n+```\n+\n+A slightly simplified version of the current Kubeflow Profile. `resourceQuotaSpec` is optional. This will create\n+a `kubeflow-ml` profile, but should be able to look up the Kubeflow namespace `prefix` and adjust based on that (for\n+instance `kf-ml`)\n+\n+`service_account.yaml`\n+\n+```yaml\n+apiVersion: kubeflow.org/v2\n+kind: ServiceAccount\n+metadata:\n+ name: omri-sa\n+ namespace: kubeflow-ml\n+spec:\n+ role: edit\n+```\n+\n+This will create a `omri-sa` service account in the `kubeflow-ml` namespace with `edit` role. Internally, this will\n+create:\n+\n+- ServiceAccount\n+- Secret (for the service account)\n+- Role (To get a token)\n+- RoleBinding (For the token role)\n+- ClusterRoleBinding (For the Kubeflow role)\n+\n+`user_permission.yaml`\n+\n+```yaml\n+apiVersion: kubeflow.org/v2\n+kind: Permission", - "comment_created_at": "2023-10-26T14:33:23+00:00", - "comment_author": "kimwnasptd", - "comment_body": "Also, ultimately we could use these CRs then to list who the contributors in the namespace are from the CentralDashboard?", - "pr_file_module": null - }, - { - "comment_id": "1373319340", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 7371, - "pr_file": "components/proposals/20231023-profile-controller-custom-resources.md", - "discussion_id": "1373281958", - "commented_code": "@@ -0,0 +1,110 @@\n+# Profile Controller Custom Resources\n+\n+**Authors**: Omri Shiv ([@OmriShiv](https://github.com/OmriShiv))\n+\n+## Goal\n+\n+First and foremost, the goal of creating these custom resources is to ease the burden of creating common Kubeflow\n+components. Users should not need to manually create `RoleBinding`s, Istio `AuthorizationPolicy`s, and others when the\n+goal is to give a user access to a namespace. By creating higher level resources, one small, purposeful resource can\n+create the Kubernetes objects needed to support the task. This purpose enables some sub-goals:\n+\n+By creating simple custom resources, these files can be checked into a GitOps system for automated deploying and\n+reconciliation. There is auditability over who create a resource and a source of truth.\n+\n+Additionally, this facilitates a security goal of allowing much easier clean up of resources. By adding ownership to the\n+sub-resources, deleting the parents object will cascade delete the created resources. It can also serve the task of\n+regenerating secrets in the event that a user has left the company and secrets need to be rotated. This can also be\n+extended to trigger an external hook that may clean up resources in other systems.\n+\n+## Proposal\n+\n+The proposed solution is to add a small set of Custom Resources (described below) to enable the creation of the needed\n+sub-resources to facilitate each purpose\n+\n+## Resources\n+\n+`profile.yaml`\n+\n+```yaml\n+apiVersion: kubeflow.org/v2\n+kind: Profile\n+metadata:\n+ name: ml\n+spec:\n+ resourceQuotaSpec:\n+ hard:\n+ cpu: \"2\"\n+ memory: 2Gi\n+ requests.nvidia.com/gpu: \"1\"\n+ persistentvolumeclaims: \"1\"\n+ requests.storage: \"5Gi\"\n+```\n+\n+A slightly simplified version of the current Kubeflow Profile. `resourceQuotaSpec` is optional. This will create\n+a `kubeflow-ml` profile, but should be able to look up the Kubeflow namespace `prefix` and adjust based on that (for\n+instance `kf-ml`)\n+\n+`service_account.yaml`\n+\n+```yaml\n+apiVersion: kubeflow.org/v2\n+kind: ServiceAccount\n+metadata:\n+ name: omri-sa\n+ namespace: kubeflow-ml\n+spec:\n+ role: edit\n+```\n+\n+This will create a `omri-sa` service account in the `kubeflow-ml` namespace with `edit` role. Internally, this will\n+create:\n+\n+- ServiceAccount\n+- Secret (for the service account)\n+- Role (To get a token)\n+- RoleBinding (For the token role)\n+- ClusterRoleBinding (For the Kubeflow role)\n+\n+`user_permission.yaml`\n+\n+```yaml\n+apiVersion: kubeflow.org/v2\n+kind: Permission", - "comment_created_at": "2023-10-26T14:57:54+00:00", - "comment_author": "omrishiv", - "comment_body": "That would be a really nice extension. Maybe a facility to use the UI to ultimately create these, if the admin allows it. One other thing we haven't talked about is the `admin` role, which today, I have a Kyverno policy I manually apply to each admin to create `Permission`s in each `kubeflow-` namespace. I would love to see if we can handle this properly without kyverno. The admin role could create this policy as a first step, but it would be nice to be properly handled internally. ", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1373376622", - "pr_number": 7371, - "pr_file": "components/proposals/20231023-profile-controller-custom-resources.md", - "created_at": "2023-10-26T15:38:54+00:00", - "commented_code": "# Profile Controller Custom Resources\n\n**Authors**: Omri Shiv ([@OmriShiv](https://github.com/OmriShiv))\n\n## Goal\n\nFirst and foremost, the goal of creating these custom resources is to ease the burden of creating common Kubeflow\ncomponents. Users should not need to manually create `RoleBinding`s, Istio `AuthorizationPolicy`s, and others when the\ngoal is to give a user access to a namespace. By creating higher level resources, one small, purposeful resource can\ncreate the Kubernetes objects needed to support the task. This purpose enables some sub-goals:\n\nBy creating simple custom resources, these files can be checked into a GitOps system for automated deploying and\nreconciliation. There is auditability over who create a resource and a source of truth.\n\nAdditionally, this facilitates a security goal of allowing much easier clean up of resources. By adding ownership to the\nsub-resources, deleting the parents object will cascade delete the created resources. It can also serve the task of\nregenerating secrets in the event that a user has left the company and secrets need to be rotated. This can also be\nextended to trigger an external hook that may clean up resources in other systems.\n\n## Proposal\n\nThe proposed solution is to add a small set of Custom Resources (described below) to enable the creation of the needed\nsub-resources to facilitate each purpose\n\n## Resources\n\n`profile.yaml`\n\n```yaml\napiVersion: kubeflow.org/v2\nkind: Profile\nmetadata:\n name: ml\nspec:\n resourceQuotaSpec:\n hard:\n cpu: \"2\"\n memory: 2Gi\n requests.nvidia.com/gpu: \"1\"\n persistentvolumeclaims: \"1\"\n requests.storage: \"5Gi\"\n```\n\nA slightly simplified version of the current Kubeflow Profile. `resourceQuotaSpec` is optional. This will create\na `kubeflow-ml` profile, but should be able to look up the Kubeflow namespace `prefix` and adjust based on that (for\ninstance `kf-ml`)", - "repo_full_name": "kubeflow/kubeflow", - "discussion_comments": [ - { - "comment_id": "1373376622", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 7371, - "pr_file": "components/proposals/20231023-profile-controller-custom-resources.md", - "discussion_id": "1373376622", - "commented_code": "@@ -0,0 +1,110 @@\n+# Profile Controller Custom Resources\n+\n+**Authors**: Omri Shiv ([@OmriShiv](https://github.com/OmriShiv))\n+\n+## Goal\n+\n+First and foremost, the goal of creating these custom resources is to ease the burden of creating common Kubeflow\n+components. Users should not need to manually create `RoleBinding`s, Istio `AuthorizationPolicy`s, and others when the\n+goal is to give a user access to a namespace. By creating higher level resources, one small, purposeful resource can\n+create the Kubernetes objects needed to support the task. This purpose enables some sub-goals:\n+\n+By creating simple custom resources, these files can be checked into a GitOps system for automated deploying and\n+reconciliation. There is auditability over who create a resource and a source of truth.\n+\n+Additionally, this facilitates a security goal of allowing much easier clean up of resources. By adding ownership to the\n+sub-resources, deleting the parents object will cascade delete the created resources. It can also serve the task of\n+regenerating secrets in the event that a user has left the company and secrets need to be rotated. This can also be\n+extended to trigger an external hook that may clean up resources in other systems.\n+\n+## Proposal\n+\n+The proposed solution is to add a small set of Custom Resources (described below) to enable the creation of the needed\n+sub-resources to facilitate each purpose\n+\n+## Resources\n+\n+`profile.yaml`\n+\n+```yaml\n+apiVersion: kubeflow.org/v2\n+kind: Profile\n+metadata:\n+ name: ml\n+spec:\n+ resourceQuotaSpec:\n+ hard:\n+ cpu: \"2\"\n+ memory: 2Gi\n+ requests.nvidia.com/gpu: \"1\"\n+ persistentvolumeclaims: \"1\"\n+ requests.storage: \"5Gi\"\n+```\n+\n+A slightly simplified version of the current Kubeflow Profile. `resourceQuotaSpec` is optional. This will create\n+a `kubeflow-ml` profile, but should be able to look up the Kubeflow namespace `prefix` and adjust based on that (for\n+instance `kf-ml`)\n+", - "comment_created_at": "2023-10-26T15:38:54+00:00", - "comment_author": "TobiasGoerke", - "comment_body": "While we're extending the profile CRD: what do you think about adding an option for a LimitRange besides the ResourceQuota?", - "pr_file_module": null - }, - { - "comment_id": "1373610658", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 7371, - "pr_file": "components/proposals/20231023-profile-controller-custom-resources.md", - "discussion_id": "1373376622", - "commented_code": "@@ -0,0 +1,110 @@\n+# Profile Controller Custom Resources\n+\n+**Authors**: Omri Shiv ([@OmriShiv](https://github.com/OmriShiv))\n+\n+## Goal\n+\n+First and foremost, the goal of creating these custom resources is to ease the burden of creating common Kubeflow\n+components. Users should not need to manually create `RoleBinding`s, Istio `AuthorizationPolicy`s, and others when the\n+goal is to give a user access to a namespace. By creating higher level resources, one small, purposeful resource can\n+create the Kubernetes objects needed to support the task. This purpose enables some sub-goals:\n+\n+By creating simple custom resources, these files can be checked into a GitOps system for automated deploying and\n+reconciliation. There is auditability over who create a resource and a source of truth.\n+\n+Additionally, this facilitates a security goal of allowing much easier clean up of resources. By adding ownership to the\n+sub-resources, deleting the parents object will cascade delete the created resources. It can also serve the task of\n+regenerating secrets in the event that a user has left the company and secrets need to be rotated. This can also be\n+extended to trigger an external hook that may clean up resources in other systems.\n+\n+## Proposal\n+\n+The proposed solution is to add a small set of Custom Resources (described below) to enable the creation of the needed\n+sub-resources to facilitate each purpose\n+\n+## Resources\n+\n+`profile.yaml`\n+\n+```yaml\n+apiVersion: kubeflow.org/v2\n+kind: Profile\n+metadata:\n+ name: ml\n+spec:\n+ resourceQuotaSpec:\n+ hard:\n+ cpu: \"2\"\n+ memory: 2Gi\n+ requests.nvidia.com/gpu: \"1\"\n+ persistentvolumeclaims: \"1\"\n+ requests.storage: \"5Gi\"\n+```\n+\n+A slightly simplified version of the current Kubeflow Profile. `resourceQuotaSpec` is optional. This will create\n+a `kubeflow-ml` profile, but should be able to look up the Kubeflow namespace `prefix` and adjust based on that (for\n+instance `kf-ml`)\n+", - "comment_created_at": "2023-10-26T18:32:42+00:00", - "comment_author": "omrishiv", - "comment_body": "This is a great addition. We currently do it using Kyverno, but would be nice to be handled internally", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1185621732", - "pr_number": 7123, - "pr_file": "conformance/1.7/README.md", - "created_at": "2023-05-05T00:34:25+00:00", - "commented_code": "# Kubeflow conformance program (WIP)\n\nBefore running the conformance tests, you need to configure kubectl default context to point to the k8s cluster that is hosting Kubeflow.\n\nTODO: Make the kubeflow namespace configurable.", - "repo_full_name": "kubeflow/kubeflow", - "discussion_comments": [ - { - "comment_id": "1185621732", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 7123, - "pr_file": "conformance/1.7/README.md", - "discussion_id": "1185621732", - "commented_code": "@@ -0,0 +1,27 @@\n+# Kubeflow conformance program (WIP)\n+\n+Before running the conformance tests, you need to configure kubectl default context to point to the k8s cluster that is hosting Kubeflow.\n+\n+TODO: Make the kubeflow namespace configurable.", - "comment_created_at": "2023-05-05T00:34:25+00:00", - "comment_author": "gkcalat", - "comment_body": "NIT: move this out of `README.md`. `Makefile`, maybe?", - "pr_file_module": null - }, - { - "comment_id": "1186507328", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 7123, - "pr_file": "conformance/1.7/README.md", - "discussion_id": "1185621732", - "commented_code": "@@ -0,0 +1,27 @@\n+# Kubeflow conformance program (WIP)\n+\n+Before running the conformance tests, you need to configure kubectl default context to point to the k8s cluster that is hosting Kubeflow.\n+\n+TODO: Make the kubeflow namespace configurable.", - "comment_created_at": "2023-05-05T21:18:38+00:00", - "comment_author": "james-jwu", - "comment_body": "Removed. The KF service namespace was made configurable.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "596656576", - "pr_number": 5694, - "pr_file": "components/example-notebook-servers/README.md", - "created_at": "2021-03-18T08:49:21+00:00", - "commented_code": "# Example Notebook Servers\n\n**DISCLAIMER:** The Notebooks Working Group provides these Dockerfiles and\ntheir images as examples only, they are not tested with our CI/CD pipelines\nand are not certified to work in every situation. As such, issues related to\nthese images will be dealt with in a best efforts approach. The Notebooks\nWorking Group will work on officially supporting some of these Notebook\nServer images in the near future. If you do encounter a problem in one of\nthese images, contributions and issue reports are greatly appreciated.\n\n## Introduction\n\nThis directory contains Dockerfiles that are intended to show users how to\ncreate images for various types of Notebook Servers that can run on Kubeflow.\nThere is a common base images that starts from Ubuntu 20.04, adds some\ndependencies and common utilities from APT, installs `kubectl`, creates the\nuser account for the downstream images and adds the\n[S6-overlay](https://github.com/just-containers/s6-overlay) for the init system.\nFrom this base image, images for Jupyter, RStudio and Visual Studio Code\n(through [code-server]([https://github.com/cdr/code-server)) are created.\nFrom the Jupyter image, images are created that show users how to install\nTensorFlow and PyTorch both with and without support for NVIDIA GPUs.\nIn turn, these images are extended with an opinionated set of common tools\nand packages to help new users to quickly get started with Kubeflow.\n\n**Lineage of the provided example Notebook Server Images:**\n\n![Flow Chart of example Notebook Servers](./kubeflow-example-notebook-servers.png)\n\n## Creating custom Notebook Server images\n\nBecause the needs of users varies greatly, the provided images were created to\nbe easily extended by using the [S6-overlay](https://github.com/just-containers/s6-overlay)\ninit system and including [Conda](https://github.com/conda-forge/miniforge).\n\n### Installing packages\n\nThe most common customization to Notebook Server images will be to install\na selection of packages specific that will be needed in the Notebook Server.\nUsers can install packages in a running Notebook Server using `pip install ...`\nor `conda install ...`. However, packages installed in this manner will not\npersist if the Notebook Server is stopped or the Pod is restarted.\nWhile it is possible to install Python packages in the user's home directory\nby using `pip install --user ...`, this can cause unexpected behaviour if the\nworkspace volume is later used in a different Notebook Server, as the packages\npresent in the home directory might be used rather than the ones present in the\nNotebook Server's image. For this reason it is recommended to create custom", - "repo_full_name": "kubeflow/kubeflow", - "discussion_comments": [ - { - "comment_id": "596656576", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 5694, - "pr_file": "components/example-notebook-servers/README.md", - "discussion_id": "596656576", - "commented_code": "@@ -0,0 +1,139 @@\n+# Example Notebook Servers\n+\n+**DISCLAIMER:** The Notebooks Working Group provides these Dockerfiles and\n+their images as examples only, they are not tested with our CI/CD pipelines\n+and are not certified to work in every situation. As such, issues related to\n+these images will be dealt with in a best efforts approach. The Notebooks\n+Working Group will work on officially supporting some of these Notebook\n+Server images in the near future. If you do encounter a problem in one of\n+these images, contributions and issue reports are greatly appreciated.\n+\n+## Introduction\n+\n+This directory contains Dockerfiles that are intended to show users how to\n+create images for various types of Notebook Servers that can run on Kubeflow.\n+There is a common base images that starts from Ubuntu 20.04, adds some\n+dependencies and common utilities from APT, installs `kubectl`, creates the\n+user account for the downstream images and adds the\n+[S6-overlay](https://github.com/just-containers/s6-overlay) for the init system.\n+From this base image, images for Jupyter, RStudio and Visual Studio Code\n+(through [code-server]([https://github.com/cdr/code-server)) are created.\n+From the Jupyter image, images are created that show users how to install\n+TensorFlow and PyTorch both with and without support for NVIDIA GPUs.\n+In turn, these images are extended with an opinionated set of common tools\n+and packages to help new users to quickly get started with Kubeflow.\n+\n+**Lineage of the provided example Notebook Server Images:**\n+\n+![Flow Chart of example Notebook Servers](./kubeflow-example-notebook-servers.png)\n+\n+## Creating custom Notebook Server images\n+\n+Because the needs of users varies greatly, the provided images were created to\n+be easily extended by using the [S6-overlay](https://github.com/just-containers/s6-overlay)\n+init system and including [Conda](https://github.com/conda-forge/miniforge).\n+\n+### Installing packages\n+\n+The most common customization to Notebook Server images will be to install\n+a selection of packages specific that will be needed in the Notebook Server.\n+Users can install packages in a running Notebook Server using `pip install ...`\n+or `conda install ...`. However, packages installed in this manner will not\n+persist if the Notebook Server is stopped or the Pod is restarted.\n+While it is possible to install Python packages in the user's home directory\n+by using `pip install --user ...`, this can cause unexpected behaviour if the\n+workspace volume is later used in a different Notebook Server, as the packages\n+present in the home directory might be used rather than the ones present in the\n+Notebook Server's image. For this reason it is recommended to create custom", - "comment_created_at": "2021-03-18T08:49:21+00:00", - "comment_author": "StefanoFioravanzo", - "comment_body": "I am not sure the discussion about the lifecycle of packages and PVCs is relevant to this document. I would remove it", - "pr_file_module": null - }, - { - "comment_id": "596686760", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 5694, - "pr_file": "components/example-notebook-servers/README.md", - "discussion_id": "596656576", - "commented_code": "@@ -0,0 +1,139 @@\n+# Example Notebook Servers\n+\n+**DISCLAIMER:** The Notebooks Working Group provides these Dockerfiles and\n+their images as examples only, they are not tested with our CI/CD pipelines\n+and are not certified to work in every situation. As such, issues related to\n+these images will be dealt with in a best efforts approach. The Notebooks\n+Working Group will work on officially supporting some of these Notebook\n+Server images in the near future. If you do encounter a problem in one of\n+these images, contributions and issue reports are greatly appreciated.\n+\n+## Introduction\n+\n+This directory contains Dockerfiles that are intended to show users how to\n+create images for various types of Notebook Servers that can run on Kubeflow.\n+There is a common base images that starts from Ubuntu 20.04, adds some\n+dependencies and common utilities from APT, installs `kubectl`, creates the\n+user account for the downstream images and adds the\n+[S6-overlay](https://github.com/just-containers/s6-overlay) for the init system.\n+From this base image, images for Jupyter, RStudio and Visual Studio Code\n+(through [code-server]([https://github.com/cdr/code-server)) are created.\n+From the Jupyter image, images are created that show users how to install\n+TensorFlow and PyTorch both with and without support for NVIDIA GPUs.\n+In turn, these images are extended with an opinionated set of common tools\n+and packages to help new users to quickly get started with Kubeflow.\n+\n+**Lineage of the provided example Notebook Server Images:**\n+\n+![Flow Chart of example Notebook Servers](./kubeflow-example-notebook-servers.png)\n+\n+## Creating custom Notebook Server images\n+\n+Because the needs of users varies greatly, the provided images were created to\n+be easily extended by using the [S6-overlay](https://github.com/just-containers/s6-overlay)\n+init system and including [Conda](https://github.com/conda-forge/miniforge).\n+\n+### Installing packages\n+\n+The most common customization to Notebook Server images will be to install\n+a selection of packages specific that will be needed in the Notebook Server.\n+Users can install packages in a running Notebook Server using `pip install ...`\n+or `conda install ...`. However, packages installed in this manner will not\n+persist if the Notebook Server is stopped or the Pod is restarted.\n+While it is possible to install Python packages in the user's home directory\n+by using `pip install --user ...`, this can cause unexpected behaviour if the\n+workspace volume is later used in a different Notebook Server, as the packages\n+present in the home directory might be used rather than the ones present in the\n+Notebook Server's image. For this reason it is recommended to create custom", - "comment_created_at": "2021-03-18T09:29:57+00:00", - "comment_author": "davidspek", - "comment_body": "While not directly relevant to this document, this very situation has been the cause of issues in the past so I do think this should be stated somewhere. I agree, it is a bit out of place here. I'll leave it in here for the moment until while I think of what to do with it. If needed, it can quickly be removed before merging. \r\n\r\nI'm thinking a `Best practices and considerations` section might be on option (in this README, as well as on the website, which I'm guessing will be mostly copy/paste from here with some formatting changes). ", - "pr_file_module": null - }, - { - "comment_id": "608237698", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 5694, - "pr_file": "components/example-notebook-servers/README.md", - "discussion_id": "596656576", - "commented_code": "@@ -0,0 +1,139 @@\n+# Example Notebook Servers\n+\n+**DISCLAIMER:** The Notebooks Working Group provides these Dockerfiles and\n+their images as examples only, they are not tested with our CI/CD pipelines\n+and are not certified to work in every situation. As such, issues related to\n+these images will be dealt with in a best efforts approach. The Notebooks\n+Working Group will work on officially supporting some of these Notebook\n+Server images in the near future. If you do encounter a problem in one of\n+these images, contributions and issue reports are greatly appreciated.\n+\n+## Introduction\n+\n+This directory contains Dockerfiles that are intended to show users how to\n+create images for various types of Notebook Servers that can run on Kubeflow.\n+There is a common base images that starts from Ubuntu 20.04, adds some\n+dependencies and common utilities from APT, installs `kubectl`, creates the\n+user account for the downstream images and adds the\n+[S6-overlay](https://github.com/just-containers/s6-overlay) for the init system.\n+From this base image, images for Jupyter, RStudio and Visual Studio Code\n+(through [code-server]([https://github.com/cdr/code-server)) are created.\n+From the Jupyter image, images are created that show users how to install\n+TensorFlow and PyTorch both with and without support for NVIDIA GPUs.\n+In turn, these images are extended with an opinionated set of common tools\n+and packages to help new users to quickly get started with Kubeflow.\n+\n+**Lineage of the provided example Notebook Server Images:**\n+\n+![Flow Chart of example Notebook Servers](./kubeflow-example-notebook-servers.png)\n+\n+## Creating custom Notebook Server images\n+\n+Because the needs of users varies greatly, the provided images were created to\n+be easily extended by using the [S6-overlay](https://github.com/just-containers/s6-overlay)\n+init system and including [Conda](https://github.com/conda-forge/miniforge).\n+\n+### Installing packages\n+\n+The most common customization to Notebook Server images will be to install\n+a selection of packages specific that will be needed in the Notebook Server.\n+Users can install packages in a running Notebook Server using `pip install ...`\n+or `conda install ...`. However, packages installed in this manner will not\n+persist if the Notebook Server is stopped or the Pod is restarted.\n+While it is possible to install Python packages in the user's home directory\n+by using `pip install --user ...`, this can cause unexpected behaviour if the\n+workspace volume is later used in a different Notebook Server, as the packages\n+present in the home directory might be used rather than the ones present in the\n+Notebook Server's image. For this reason it is recommended to create custom", - "comment_created_at": "2021-04-06T23:15:38+00:00", - "comment_author": "thesuperzapper", - "comment_body": "While I think this wording can be simplified, we should definitely discuss the implications of how kubeflow mounts PVCs to the home directory for `pip install`", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/kubeflow-follow-api-conventions.json b/_reviewers/kubeflow-follow-api-conventions.json new file mode 100644 index 0000000..79cdd50 --- /dev/null +++ b/_reviewers/kubeflow-follow-api-conventions.json @@ -0,0 +1,258 @@ +[ + { + "discussion_id": "592260994", + "pr_number": 5660, + "pr_file": "components/notebook-controller/controllers/notebook_controller.go", + "created_at": "2021-03-11T10:55:40+00:00", + "commented_code": "\"timeout\": \"300s\",\n\t\t},\n\t}\n\n\t// Create a template map for the headers section\n\t// of the Istio VirtualService\n\theaders := map[string]interface{}{\n\t\t\"request\": map[string]interface{}{\n\t\t\t\"set\": map[string]interface{}{},\n\t\t},\n\t}\n\n\t// Blacklist for request headers that are not allowed to be set\n\tblacklist := []string{\"userid-header\", \"X-Forwarded-Proto\"}\n\n\t// If key \"notebooks.kubeflow.org/http-headers-request-set\" is present on the notebook resource,\n\t// split the literal string into keys and values and add them\n\t// to the headers variable. Finally, add the headers section to\n\t// the variable with the http spec.\n\tif _, ok := annotations[\"notebooks.kubeflow.org/http-headers-request-set\"]; ok && len(annotations[\"notebooks.kubeflow.org/http-headers-request-set\"]) > 0 {\n\t\trequestHeaders := strings.Split(annotations[\"notebooks.kubeflow.org/http-headers-request-set\"], \"\\n\")\n\t\tfor _, kv := range requestHeaders {\n\t\t\tif len(strings.Split(kv, \": \")) == 2 {\n\t\t\t\tk := strings.Split(kv, \": \")[0]\n\t\t\t\tv := strings.Split(kv, \": \")[1]", + "repo_full_name": "kubeflow/kubeflow", + "discussion_comments": [ + { + "comment_id": "592260994", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 5660, + "pr_file": "components/notebook-controller/controllers/notebook_controller.go", + "discussion_id": "592260994", + "commented_code": "@@ -449,6 +473,39 @@ func generateVirtualService(instance *v1beta1.Notebook) (*unstructured.Unstructu\n \t\t\t\"timeout\": \"300s\",\n \t\t},\n \t}\n+\n+\t// Create a template map for the headers section\n+\t// of the Istio VirtualService\n+\theaders := map[string]interface{}{\n+\t\t\"request\": map[string]interface{}{\n+\t\t\t\"set\": map[string]interface{}{},\n+\t\t},\n+\t}\n+\n+\t// Blacklist for request headers that are not allowed to be set\n+\tblacklist := []string{\"userid-header\", \"X-Forwarded-Proto\"}\n+\n+\t// If key \"notebooks.kubeflow.org/http-headers-request-set\" is present on the notebook resource,\n+\t// split the literal string into keys and values and add them\n+\t// to the headers variable. Finally, add the headers section to\n+\t// the variable with the http spec.\n+\tif _, ok := annotations[\"notebooks.kubeflow.org/http-headers-request-set\"]; ok && len(annotations[\"notebooks.kubeflow.org/http-headers-request-set\"]) > 0 {\n+\t\trequestHeaders := strings.Split(annotations[\"notebooks.kubeflow.org/http-headers-request-set\"], \"\\n\")\n+\t\tfor _, kv := range requestHeaders {\n+\t\t\tif len(strings.Split(kv, \": \")) == 2 {\n+\t\t\t\tk := strings.Split(kv, \": \")[0]\n+\t\t\t\tv := strings.Split(kv, \": \")[1]", + "comment_created_at": "2021-03-11T10:55:40+00:00", + "comment_author": "yanniszark", + "comment_body": "I believe we shouldn't use our own serialization format, but instead take a page from Kubernetes' book.\r\nKubernetes uses annotations for alpha features. How are those annotations written? With JSON. For example, the `scheduler.alpha.kubernetes.io/affinity` annotation.\r\n\r\nUsing JSON allows us to leverage existing battle-tested parsers and not implement our own. What do you think?", + "pr_file_module": null + }, + { + "comment_id": "592267539", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 5660, + "pr_file": "components/notebook-controller/controllers/notebook_controller.go", + "discussion_id": "592260994", + "commented_code": "@@ -449,6 +473,39 @@ func generateVirtualService(instance *v1beta1.Notebook) (*unstructured.Unstructu\n \t\t\t\"timeout\": \"300s\",\n \t\t},\n \t}\n+\n+\t// Create a template map for the headers section\n+\t// of the Istio VirtualService\n+\theaders := map[string]interface{}{\n+\t\t\"request\": map[string]interface{}{\n+\t\t\t\"set\": map[string]interface{}{},\n+\t\t},\n+\t}\n+\n+\t// Blacklist for request headers that are not allowed to be set\n+\tblacklist := []string{\"userid-header\", \"X-Forwarded-Proto\"}\n+\n+\t// If key \"notebooks.kubeflow.org/http-headers-request-set\" is present on the notebook resource,\n+\t// split the literal string into keys and values and add them\n+\t// to the headers variable. Finally, add the headers section to\n+\t// the variable with the http spec.\n+\tif _, ok := annotations[\"notebooks.kubeflow.org/http-headers-request-set\"]; ok && len(annotations[\"notebooks.kubeflow.org/http-headers-request-set\"]) > 0 {\n+\t\trequestHeaders := strings.Split(annotations[\"notebooks.kubeflow.org/http-headers-request-set\"], \"\\n\")\n+\t\tfor _, kv := range requestHeaders {\n+\t\t\tif len(strings.Split(kv, \": \")) == 2 {\n+\t\t\t\tk := strings.Split(kv, \": \")[0]\n+\t\t\t\tv := strings.Split(kv, \": \")[1]", + "comment_created_at": "2021-03-11T11:05:32+00:00", + "comment_author": "davidspek", + "comment_body": "I have no idea how to start implementing this, and I don't believe there is enough time to do so anymore either. I'm mainly following what is already being done in the code elsewhere. If you want to improve the code, I'm happy to include it in the PR. ", + "pr_file_module": null + }, + { + "comment_id": "592284821", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 5660, + "pr_file": "components/notebook-controller/controllers/notebook_controller.go", + "discussion_id": "592260994", + "commented_code": "@@ -449,6 +473,39 @@ func generateVirtualService(instance *v1beta1.Notebook) (*unstructured.Unstructu\n \t\t\t\"timeout\": \"300s\",\n \t\t},\n \t}\n+\n+\t// Create a template map for the headers section\n+\t// of the Istio VirtualService\n+\theaders := map[string]interface{}{\n+\t\t\"request\": map[string]interface{}{\n+\t\t\t\"set\": map[string]interface{}{},\n+\t\t},\n+\t}\n+\n+\t// Blacklist for request headers that are not allowed to be set\n+\tblacklist := []string{\"userid-header\", \"X-Forwarded-Proto\"}\n+\n+\t// If key \"notebooks.kubeflow.org/http-headers-request-set\" is present on the notebook resource,\n+\t// split the literal string into keys and values and add them\n+\t// to the headers variable. Finally, add the headers section to\n+\t// the variable with the http spec.\n+\tif _, ok := annotations[\"notebooks.kubeflow.org/http-headers-request-set\"]; ok && len(annotations[\"notebooks.kubeflow.org/http-headers-request-set\"]) > 0 {\n+\t\trequestHeaders := strings.Split(annotations[\"notebooks.kubeflow.org/http-headers-request-set\"], \"\\n\")\n+\t\tfor _, kv := range requestHeaders {\n+\t\t\tif len(strings.Split(kv, \": \")) == 2 {\n+\t\t\t\tk := strings.Split(kv, \": \")[0]\n+\t\t\t\tv := strings.Split(kv, \": \")[1]", + "comment_created_at": "2021-03-11T11:31:55+00:00", + "comment_author": "yanniszark", + "comment_body": "@DavidSpek it's a small change. Here is an example you can follow to unmarshal a json map (the use-case we have here):\r\nhttps://play.golang.org/p/TR-6kdk6s91", + "pr_file_module": null + }, + { + "comment_id": "593057363", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 5660, + "pr_file": "components/notebook-controller/controllers/notebook_controller.go", + "discussion_id": "592260994", + "commented_code": "@@ -449,6 +473,39 @@ func generateVirtualService(instance *v1beta1.Notebook) (*unstructured.Unstructu\n \t\t\t\"timeout\": \"300s\",\n \t\t},\n \t}\n+\n+\t// Create a template map for the headers section\n+\t// of the Istio VirtualService\n+\theaders := map[string]interface{}{\n+\t\t\"request\": map[string]interface{}{\n+\t\t\t\"set\": map[string]interface{}{},\n+\t\t},\n+\t}\n+\n+\t// Blacklist for request headers that are not allowed to be set\n+\tblacklist := []string{\"userid-header\", \"X-Forwarded-Proto\"}\n+\n+\t// If key \"notebooks.kubeflow.org/http-headers-request-set\" is present on the notebook resource,\n+\t// split the literal string into keys and values and add them\n+\t// to the headers variable. Finally, add the headers section to\n+\t// the variable with the http spec.\n+\tif _, ok := annotations[\"notebooks.kubeflow.org/http-headers-request-set\"]; ok && len(annotations[\"notebooks.kubeflow.org/http-headers-request-set\"]) > 0 {\n+\t\trequestHeaders := strings.Split(annotations[\"notebooks.kubeflow.org/http-headers-request-set\"], \"\\n\")\n+\t\tfor _, kv := range requestHeaders {\n+\t\t\tif len(strings.Split(kv, \": \")) == 2 {\n+\t\t\t\tk := strings.Split(kv, \": \")[0]\n+\t\t\t\tv := strings.Split(kv, \": \")[1]", + "comment_created_at": "2021-03-12T10:11:24+00:00", + "comment_author": "kimwnasptd", + "comment_body": "@DavidSpek show your comment below about the error with Unmarshal https://github.com/kubeflow/kubeflow/pull/5660#issuecomment-797378906. Writing it here to keep it in one place.\r\n\r\nI'm not sure how you used the Unmarshal function but I believe you could also try and catch the error and not let it panic. Here's a very similar code I've seen https://github.com/kubeflow/katib/blob/master/pkg/new-ui/v1beta1/backend.go#L73", + "pr_file_module": null + }, + { + "comment_id": "593072068", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 5660, + "pr_file": "components/notebook-controller/controllers/notebook_controller.go", + "discussion_id": "592260994", + "commented_code": "@@ -449,6 +473,39 @@ func generateVirtualService(instance *v1beta1.Notebook) (*unstructured.Unstructu\n \t\t\t\"timeout\": \"300s\",\n \t\t},\n \t}\n+\n+\t// Create a template map for the headers section\n+\t// of the Istio VirtualService\n+\theaders := map[string]interface{}{\n+\t\t\"request\": map[string]interface{}{\n+\t\t\t\"set\": map[string]interface{}{},\n+\t\t},\n+\t}\n+\n+\t// Blacklist for request headers that are not allowed to be set\n+\tblacklist := []string{\"userid-header\", \"X-Forwarded-Proto\"}\n+\n+\t// If key \"notebooks.kubeflow.org/http-headers-request-set\" is present on the notebook resource,\n+\t// split the literal string into keys and values and add them\n+\t// to the headers variable. Finally, add the headers section to\n+\t// the variable with the http spec.\n+\tif _, ok := annotations[\"notebooks.kubeflow.org/http-headers-request-set\"]; ok && len(annotations[\"notebooks.kubeflow.org/http-headers-request-set\"]) > 0 {\n+\t\trequestHeaders := strings.Split(annotations[\"notebooks.kubeflow.org/http-headers-request-set\"], \"\\n\")\n+\t\tfor _, kv := range requestHeaders {\n+\t\t\tif len(strings.Split(kv, \": \")) == 2 {\n+\t\t\t\tk := strings.Split(kv, \": \")[0]\n+\t\t\t\tv := strings.Split(kv, \": \")[1]", + "comment_created_at": "2021-03-12T10:34:35+00:00", + "comment_author": "yanniszark", + "comment_body": "@DavidSpek in the example I gave you, I omitted the error handling. Let me give you an example with error handling. This way you can catch the bad case in the controller, log it and emit an event:\r\nhttps://play.golang.org/p/nxmAh3VaNZr", + "pr_file_module": null + }, + { + "comment_id": "593074840", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 5660, + "pr_file": "components/notebook-controller/controllers/notebook_controller.go", + "discussion_id": "592260994", + "commented_code": "@@ -449,6 +473,39 @@ func generateVirtualService(instance *v1beta1.Notebook) (*unstructured.Unstructu\n \t\t\t\"timeout\": \"300s\",\n \t\t},\n \t}\n+\n+\t// Create a template map for the headers section\n+\t// of the Istio VirtualService\n+\theaders := map[string]interface{}{\n+\t\t\"request\": map[string]interface{}{\n+\t\t\t\"set\": map[string]interface{}{},\n+\t\t},\n+\t}\n+\n+\t// Blacklist for request headers that are not allowed to be set\n+\tblacklist := []string{\"userid-header\", \"X-Forwarded-Proto\"}\n+\n+\t// If key \"notebooks.kubeflow.org/http-headers-request-set\" is present on the notebook resource,\n+\t// split the literal string into keys and values and add them\n+\t// to the headers variable. Finally, add the headers section to\n+\t// the variable with the http spec.\n+\tif _, ok := annotations[\"notebooks.kubeflow.org/http-headers-request-set\"]; ok && len(annotations[\"notebooks.kubeflow.org/http-headers-request-set\"]) > 0 {\n+\t\trequestHeaders := strings.Split(annotations[\"notebooks.kubeflow.org/http-headers-request-set\"], \"\\n\")\n+\t\tfor _, kv := range requestHeaders {\n+\t\t\tif len(strings.Split(kv, \": \")) == 2 {\n+\t\t\t\tk := strings.Split(kv, \": \")[0]\n+\t\t\t\tv := strings.Split(kv, \": \")[1]", + "comment_created_at": "2021-03-12T10:38:47+00:00", + "comment_author": "davidspek", + "comment_body": "@yanniszark Thanks for the example. I just noticed the actual latest changes were not pushed yesterday. I just pushed them and I believe the error handling from you example is included in there. However, the controller crashing is still a problem. ", + "pr_file_module": null + }, + { + "comment_id": "593087493", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 5660, + "pr_file": "components/notebook-controller/controllers/notebook_controller.go", + "discussion_id": "592260994", + "commented_code": "@@ -449,6 +473,39 @@ func generateVirtualService(instance *v1beta1.Notebook) (*unstructured.Unstructu\n \t\t\t\"timeout\": \"300s\",\n \t\t},\n \t}\n+\n+\t// Create a template map for the headers section\n+\t// of the Istio VirtualService\n+\theaders := map[string]interface{}{\n+\t\t\"request\": map[string]interface{}{\n+\t\t\t\"set\": map[string]interface{}{},\n+\t\t},\n+\t}\n+\n+\t// Blacklist for request headers that are not allowed to be set\n+\tblacklist := []string{\"userid-header\", \"X-Forwarded-Proto\"}\n+\n+\t// If key \"notebooks.kubeflow.org/http-headers-request-set\" is present on the notebook resource,\n+\t// split the literal string into keys and values and add them\n+\t// to the headers variable. Finally, add the headers section to\n+\t// the variable with the http spec.\n+\tif _, ok := annotations[\"notebooks.kubeflow.org/http-headers-request-set\"]; ok && len(annotations[\"notebooks.kubeflow.org/http-headers-request-set\"]) > 0 {\n+\t\trequestHeaders := strings.Split(annotations[\"notebooks.kubeflow.org/http-headers-request-set\"], \"\\n\")\n+\t\tfor _, kv := range requestHeaders {\n+\t\t\tif len(strings.Split(kv, \": \")) == 2 {\n+\t\t\t\tk := strings.Split(kv, \": \")[0]\n+\t\t\t\tv := strings.Split(kv, \": \")[1]", + "comment_created_at": "2021-03-12T10:59:39+00:00", + "comment_author": "yanniszark", + "comment_body": "Then we need to find the cause. Can you point to the exact line in your code that crashes?\r\nWhat have you tried / what do you think is wrong?", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "352725761", + "pr_number": 4539, + "pr_file": "components/admission-webhook/pkg/apis/settings/v1alpha1/poddefault_types.go", + "created_at": "2019-12-02T17:23:05+00:00", + "commented_code": "// VolumeMounts defines the collection of VolumeMount to inject into containers.\n\t// +optional\n\tVolumeMounts []v1.VolumeMount `json:\"volumeMounts,omitempty\"`\n\n\t// ObjectMeta defines the metadata to inject labels and annotations from into the metadata field of a pod.\n\tmetav1.ObjectMeta `json:\"metadata,omitempty\"`", + "repo_full_name": "kubeflow/kubeflow", + "discussion_comments": [ + { + "comment_id": "352725761", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 4539, + "pr_file": "components/admission-webhook/pkg/apis/settings/v1alpha1/poddefault_types.go", + "discussion_id": "352725761", + "commented_code": "@@ -53,6 +53,9 @@ type PodDefaultSpec struct {\n \t// VolumeMounts defines the collection of VolumeMount to inject into containers.\n \t// +optional\n \tVolumeMounts []v1.VolumeMount `json:\"volumeMounts,omitempty\"`\n+\n+\t// ObjectMeta defines the metadata to inject labels and annotations from into the metadata field of a pod.\n+\tmetav1.ObjectMeta `json:\"metadata,omitempty\"`", + "comment_created_at": "2019-12-02T17:23:05+00:00", + "comment_author": "jlewi", + "comment_body": "Did you consider having separate fields for labels & annotations rather than a metadata field?\r\nmetadata has a bunch of fields\r\nhttps://kubernetes.io/docs/reference/generated/kubernetes-api/v1.15/#objectmeta-v1-meta\r\n\r\nIf we include ObjectMeta does that create confusion about what happens for other fields (e.g. name, namespace, ownerReferences, finalizers etc...)?\r\n\r\n", + "pr_file_module": null + }, + { + "comment_id": "352733846", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 4539, + "pr_file": "components/admission-webhook/pkg/apis/settings/v1alpha1/poddefault_types.go", + "discussion_id": "352725761", + "commented_code": "@@ -53,6 +53,9 @@ type PodDefaultSpec struct {\n \t// VolumeMounts defines the collection of VolumeMount to inject into containers.\n \t// +optional\n \tVolumeMounts []v1.VolumeMount `json:\"volumeMounts,omitempty\"`\n+\n+\t// ObjectMeta defines the metadata to inject labels and annotations from into the metadata field of a pod.\n+\tmetav1.ObjectMeta `json:\"metadata,omitempty\"`", + "comment_created_at": "2019-12-02T17:39:44+00:00", + "comment_author": "yanniszark", + "comment_body": "How about introducing the whole field but only use those certain parts?\r\neg StatefulSet uses PersistentVolumeClaim as the type for volumeClaimTemplates, even if it doesn't use all the fields.\r\n\r\nWe could also maybe use some of them in the future?", + "pr_file_module": null + }, + { + "comment_id": "352749580", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 4539, + "pr_file": "components/admission-webhook/pkg/apis/settings/v1alpha1/poddefault_types.go", + "discussion_id": "352725761", + "commented_code": "@@ -53,6 +53,9 @@ type PodDefaultSpec struct {\n \t// VolumeMounts defines the collection of VolumeMount to inject into containers.\n \t// +optional\n \tVolumeMounts []v1.VolumeMount `json:\"volumeMounts,omitempty\"`\n+\n+\t// ObjectMeta defines the metadata to inject labels and annotations from into the metadata field of a pod.\n+\tmetav1.ObjectMeta `json:\"metadata,omitempty\"`", + "comment_created_at": "2019-12-02T18:13:37+00:00", + "comment_author": "jlewi", + "comment_body": "The downside of using ObjectMeta is that the API is no longer self-specifying and well defined. A user has to read the docs in order to understand the behavior; i.e. which fields are copied and which are ignored. Whereas if we have fields labels and annotations that are copied then it is arguably more self explanatory.\r\n\r\nFurthermore we are in effect saying that the behavior for other fields e.g \"finalizers, name, namespace, ownerReferences\" are undefined. Its not clear whether these fields are intentionally excluded or they are excluded only because we did a partial implementation in the need of expediency.\r\n\r\nDo we have a strong reason to expect that we will want to include these fields in the future? finalizers seems like the only other field that we might need to set in the future. Even then I'm not sure if setting finalizers on pods as opposed to the owning object is common.\r\n\r\nFor all the other fields (name, uid, ownerReferences) etc... setting them in podDefaults seems incorrect.\r\n\r\n\r\n", + "pr_file_module": null + }, + { + "comment_id": "352861812", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 4539, + "pr_file": "components/admission-webhook/pkg/apis/settings/v1alpha1/poddefault_types.go", + "discussion_id": "352725761", + "commented_code": "@@ -53,6 +53,9 @@ type PodDefaultSpec struct {\n \t// VolumeMounts defines the collection of VolumeMount to inject into containers.\n \t// +optional\n \tVolumeMounts []v1.VolumeMount `json:\"volumeMounts,omitempty\"`\n+\n+\t// ObjectMeta defines the metadata to inject labels and annotations from into the metadata field of a pod.\n+\tmetav1.ObjectMeta `json:\"metadata,omitempty\"`", + "comment_created_at": "2019-12-02T21:24:50+00:00", + "comment_author": "discordianfish", + "comment_body": "Yeah I wasn't sure either about using ObjectMeta but with only the annotations/labels fields (what I'm doing here and what @yanniszark describes unless I'm mistaken), or add annotations/labels as separate fields. I've went with using ObjectMeta because having a struct for metadata as oppose to just two map[string]string seemed nicer, but not feeling strongly that way.\r\nI'll update the PR to use Annotations and Labels fields instead.", + "pr_file_module": null + }, + { + "comment_id": "353298581", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 4539, + "pr_file": "components/admission-webhook/pkg/apis/settings/v1alpha1/poddefault_types.go", + "discussion_id": "352725761", + "commented_code": "@@ -53,6 +53,9 @@ type PodDefaultSpec struct {\n \t// VolumeMounts defines the collection of VolumeMount to inject into containers.\n \t// +optional\n \tVolumeMounts []v1.VolumeMount `json:\"volumeMounts,omitempty\"`\n+\n+\t// ObjectMeta defines the metadata to inject labels and annotations from into the metadata field of a pod.\n+\tmetav1.ObjectMeta `json:\"metadata,omitempty\"`", + "comment_created_at": "2019-12-03T16:50:46+00:00", + "comment_author": "jlewi", + "comment_body": "Thanks. I think this is better. Expanding an API is always easier than shrinking it.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "326742755", + "pr_number": 4155, + "pr_file": "components/profile-controller/pkg/apis/kubeflow/v1beta1/profile_types.go", + "created_at": "2019-09-20T18:04:45+00:00", + "commented_code": "/*\nCopyright 2019 The Kubeflow Authors.\n\nLicensed under the Apache License, Version 2.0 (the \"License\");\nyou may not use this file except in compliance with the License.\nYou may obtain a copy of the License at\n\n http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n*/\n\npackage v1beta1\n\nimport (\n\t\"k8s.io/api/core/v1\"\n\trbacv1 \"k8s.io/api/rbac/v1\"\n\tmetav1 \"k8s.io/apimachinery/pkg/apis/meta/v1\"\n\t\"k8s.io/apimachinery/pkg/runtime\"\n)\n\n// Plugin is for customize actions on different platform.\ntype Plugin struct {\n\tmetav1.TypeMeta `json:\",inline\"`\n\tmetav1.ObjectMeta `json:\"metadata,omitempty\"`\n\tSpec *runtime.RawExtension `json:\"spec,omitempty\"`\n}\n\n// ProfileSpec defines the desired state of Profile\ntype ProfileSpec struct {\n\t// The profile owner\n\tOwner rbacv1.Subject `json:\"owner,omitempty\"`\n\tPlugins []Plugin `json:\"plugins,omitempty\"`\n\t// Resourcequota that will be applied to target namespace\n\tResourceQuotaSpec v1.ResourceQuotaSpec `json:\"resourcequotaspec,omitempty\"`\n}\n\ntype ProfileState string", + "repo_full_name": "kubeflow/kubeflow", + "discussion_comments": [ + { + "comment_id": "326742755", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 4155, + "pr_file": "components/profile-controller/pkg/apis/kubeflow/v1beta1/profile_types.go", + "discussion_id": "326742755", + "commented_code": "@@ -0,0 +1,83 @@\n+/*\n+Copyright 2019 The Kubeflow Authors.\n+\n+Licensed under the Apache License, Version 2.0 (the \"License\");\n+you may not use this file except in compliance with the License.\n+You may obtain a copy of the License at\n+\n+ http://www.apache.org/licenses/LICENSE-2.0\n+\n+Unless required by applicable law or agreed to in writing, software\n+distributed under the License is distributed on an \"AS IS\" BASIS,\n+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n+See the License for the specific language governing permissions and\n+limitations under the License.\n+*/\n+\n+package v1beta1\n+\n+import (\n+\t\"k8s.io/api/core/v1\"\n+\trbacv1 \"k8s.io/api/rbac/v1\"\n+\tmetav1 \"k8s.io/apimachinery/pkg/apis/meta/v1\"\n+\t\"k8s.io/apimachinery/pkg/runtime\"\n+)\n+\n+// Plugin is for customize actions on different platform.\n+type Plugin struct {\n+\tmetav1.TypeMeta `json:\",inline\"`\n+\tmetav1.ObjectMeta `json:\"metadata,omitempty\"`\n+\tSpec *runtime.RawExtension `json:\"spec,omitempty\"`\n+}\n+\n+// ProfileSpec defines the desired state of Profile\n+type ProfileSpec struct {\n+\t// The profile owner\n+\tOwner rbacv1.Subject `json:\"owner,omitempty\"`\n+\tPlugins []Plugin `json:\"plugins,omitempty\"`\n+\t// Resourcequota that will be applied to target namespace\n+\tResourceQuotaSpec v1.ResourceQuotaSpec `json:\"resourcequotaspec,omitempty\"`\n+}\n+\n+type ProfileState string", + "comment_created_at": "2019-09-20T18:04:45+00:00", + "comment_author": "yanniszark", + "comment_body": "I would suggest we avoid using enum fields in status, as they break if we add new possible values.\r\nInstead, the K8s api convention advises one to use Conditions.\r\nhttps://github.com/kubernetes/community/blob/master/contributors/devel/sig-architecture/api-conventions.md", + "pr_file_module": null + }, + { + "comment_id": "326834486", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 4155, + "pr_file": "components/profile-controller/pkg/apis/kubeflow/v1beta1/profile_types.go", + "discussion_id": "326742755", + "commented_code": "@@ -0,0 +1,83 @@\n+/*\n+Copyright 2019 The Kubeflow Authors.\n+\n+Licensed under the Apache License, Version 2.0 (the \"License\");\n+you may not use this file except in compliance with the License.\n+You may obtain a copy of the License at\n+\n+ http://www.apache.org/licenses/LICENSE-2.0\n+\n+Unless required by applicable law or agreed to in writing, software\n+distributed under the License is distributed on an \"AS IS\" BASIS,\n+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n+See the License for the specific language governing permissions and\n+limitations under the License.\n+*/\n+\n+package v1beta1\n+\n+import (\n+\t\"k8s.io/api/core/v1\"\n+\trbacv1 \"k8s.io/api/rbac/v1\"\n+\tmetav1 \"k8s.io/apimachinery/pkg/apis/meta/v1\"\n+\t\"k8s.io/apimachinery/pkg/runtime\"\n+)\n+\n+// Plugin is for customize actions on different platform.\n+type Plugin struct {\n+\tmetav1.TypeMeta `json:\",inline\"`\n+\tmetav1.ObjectMeta `json:\"metadata,omitempty\"`\n+\tSpec *runtime.RawExtension `json:\"spec,omitempty\"`\n+}\n+\n+// ProfileSpec defines the desired state of Profile\n+type ProfileSpec struct {\n+\t// The profile owner\n+\tOwner rbacv1.Subject `json:\"owner,omitempty\"`\n+\tPlugins []Plugin `json:\"plugins,omitempty\"`\n+\t// Resourcequota that will be applied to target namespace\n+\tResourceQuotaSpec v1.ResourceQuotaSpec `json:\"resourcequotaspec,omitempty\"`\n+}\n+\n+type ProfileState string", + "comment_created_at": "2019-09-20T23:50:32+00:00", + "comment_author": "kunmingg", + "comment_body": "ProfileState is saved as plain string, adding new values should not change existing messages.\r\nWe can use conditions in followup PRs if needed.", + "pr_file_module": null + }, + { + "comment_id": "326863827", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 4155, + "pr_file": "components/profile-controller/pkg/apis/kubeflow/v1beta1/profile_types.go", + "discussion_id": "326742755", + "commented_code": "@@ -0,0 +1,83 @@\n+/*\n+Copyright 2019 The Kubeflow Authors.\n+\n+Licensed under the Apache License, Version 2.0 (the \"License\");\n+you may not use this file except in compliance with the License.\n+You may obtain a copy of the License at\n+\n+ http://www.apache.org/licenses/LICENSE-2.0\n+\n+Unless required by applicable law or agreed to in writing, software\n+distributed under the License is distributed on an \"AS IS\" BASIS,\n+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n+See the License for the specific language governing permissions and\n+limitations under the License.\n+*/\n+\n+package v1beta1\n+\n+import (\n+\t\"k8s.io/api/core/v1\"\n+\trbacv1 \"k8s.io/api/rbac/v1\"\n+\tmetav1 \"k8s.io/apimachinery/pkg/apis/meta/v1\"\n+\t\"k8s.io/apimachinery/pkg/runtime\"\n+)\n+\n+// Plugin is for customize actions on different platform.\n+type Plugin struct {\n+\tmetav1.TypeMeta `json:\",inline\"`\n+\tmetav1.ObjectMeta `json:\"metadata,omitempty\"`\n+\tSpec *runtime.RawExtension `json:\"spec,omitempty\"`\n+}\n+\n+// ProfileSpec defines the desired state of Profile\n+type ProfileSpec struct {\n+\t// The profile owner\n+\tOwner rbacv1.Subject `json:\"owner,omitempty\"`\n+\tPlugins []Plugin `json:\"plugins,omitempty\"`\n+\t// Resourcequota that will be applied to target namespace\n+\tResourceQuotaSpec v1.ResourceQuotaSpec `json:\"resourcequotaspec,omitempty\"`\n+}\n+\n+type ProfileState string", + "comment_created_at": "2019-09-21T15:28:41+00:00", + "comment_author": "yanniszark", + "comment_body": "@kunmingg I'm a bit hesitant to go forward with a Phase enum-type status, because this is the pattern that Kubernetes used and then decided explicitly against it, in favor of conditions.\r\nSee [this section](https://github.com/kubernetes/community/blob/master/contributors/devel/sig-architecture/api-conventions.md#typical-status-properties) for more information: \r\n\r\n> pattern of using phase is deprecated. Newer API types should use conditions instead. Phase was essentially a state-machine enumeration field, that contradicted system-design principles and hampered evolution, since adding new enum values breaks backward compatibility. \r\n\r\n\r\n/cc @jlewi @kkasravi ", + "pr_file_module": null + }, + { + "comment_id": "328205201", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 4155, + "pr_file": "components/profile-controller/pkg/apis/kubeflow/v1beta1/profile_types.go", + "discussion_id": "326742755", + "commented_code": "@@ -0,0 +1,83 @@\n+/*\n+Copyright 2019 The Kubeflow Authors.\n+\n+Licensed under the Apache License, Version 2.0 (the \"License\");\n+you may not use this file except in compliance with the License.\n+You may obtain a copy of the License at\n+\n+ http://www.apache.org/licenses/LICENSE-2.0\n+\n+Unless required by applicable law or agreed to in writing, software\n+distributed under the License is distributed on an \"AS IS\" BASIS,\n+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n+See the License for the specific language governing permissions and\n+limitations under the License.\n+*/\n+\n+package v1beta1\n+\n+import (\n+\t\"k8s.io/api/core/v1\"\n+\trbacv1 \"k8s.io/api/rbac/v1\"\n+\tmetav1 \"k8s.io/apimachinery/pkg/apis/meta/v1\"\n+\t\"k8s.io/apimachinery/pkg/runtime\"\n+)\n+\n+// Plugin is for customize actions on different platform.\n+type Plugin struct {\n+\tmetav1.TypeMeta `json:\",inline\"`\n+\tmetav1.ObjectMeta `json:\"metadata,omitempty\"`\n+\tSpec *runtime.RawExtension `json:\"spec,omitempty\"`\n+}\n+\n+// ProfileSpec defines the desired state of Profile\n+type ProfileSpec struct {\n+\t// The profile owner\n+\tOwner rbacv1.Subject `json:\"owner,omitempty\"`\n+\tPlugins []Plugin `json:\"plugins,omitempty\"`\n+\t// Resourcequota that will be applied to target namespace\n+\tResourceQuotaSpec v1.ResourceQuotaSpec `json:\"resourcequotaspec,omitempty\"`\n+}\n+\n+type ProfileState string", + "comment_created_at": "2019-09-25T15:54:59+00:00", + "comment_author": "jlewi", + "comment_body": "+1 to using conditions. \r\n\r\n@kunmingg are there problems with using conditions rather than enums?", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "328288394", + "pr_number": 4155, + "pr_file": "components/profile-controller/pkg/apis/kubeflow/v1beta1/profile_types.go", + "created_at": "2019-09-25T18:58:07+00:00", + "commented_code": "/*\nCopyright 2019 The Kubeflow Authors.\n\nLicensed under the Apache License, Version 2.0 (the \"License\");\nyou may not use this file except in compliance with the License.\nYou may obtain a copy of the License at\n\n http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n*/\n\npackage v1beta1\n\nimport (\n\t\"k8s.io/api/core/v1\"\n\trbacv1 \"k8s.io/api/rbac/v1\"\n\tmetav1 \"k8s.io/apimachinery/pkg/apis/meta/v1\"\n\t\"k8s.io/apimachinery/pkg/runtime\"\n)\n\n// Plugin is for customize actions on different platform.\ntype Plugin struct {\n\tmetav1.TypeMeta `json:\",inline\"`\n\tmetav1.ObjectMeta `json:\"metadata,omitempty\"`\n\tSpec\t*runtime.RawExtension `json:\"spec,omitempty\"`\n}\n\ntype ProfileCondition struct {\n\tMessage\tstring `json:\"message,omitempty\"`\n\tStatus\tstring `json:\"status,omitempty\"`", + "repo_full_name": "kubeflow/kubeflow", + "discussion_comments": [ + { + "comment_id": "328288394", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 4155, + "pr_file": "components/profile-controller/pkg/apis/kubeflow/v1beta1/profile_types.go", + "discussion_id": "328288394", + "commented_code": "@@ -0,0 +1,83 @@\n+/*\n+Copyright 2019 The Kubeflow Authors.\n+\n+Licensed under the Apache License, Version 2.0 (the \"License\");\n+you may not use this file except in compliance with the License.\n+You may obtain a copy of the License at\n+\n+ http://www.apache.org/licenses/LICENSE-2.0\n+\n+Unless required by applicable law or agreed to in writing, software\n+distributed under the License is distributed on an \"AS IS\" BASIS,\n+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n+See the License for the specific language governing permissions and\n+limitations under the License.\n+*/\n+\n+package v1beta1\n+\n+import (\n+\t\"k8s.io/api/core/v1\"\n+\trbacv1 \"k8s.io/api/rbac/v1\"\n+\tmetav1 \"k8s.io/apimachinery/pkg/apis/meta/v1\"\n+\t\"k8s.io/apimachinery/pkg/runtime\"\n+)\n+\n+// Plugin is for customize actions on different platform.\n+type Plugin struct {\n+\tmetav1.TypeMeta `json:\",inline\"`\n+\tmetav1.ObjectMeta `json:\"metadata,omitempty\"`\n+\tSpec\t*runtime.RawExtension `json:\"spec,omitempty\"`\n+}\n+\n+type ProfileCondition struct {\n+\tMessage\tstring `json:\"message,omitempty\"`\n+\tStatus\tstring `json:\"status,omitempty\"`", + "comment_created_at": "2019-09-25T18:58:07+00:00", + "comment_author": "yanniszark", + "comment_body": "I believe it's better to follow the standard condition fields here as well.\r\nhttps://github.com/kubernetes/community/blob/master/contributors/devel/sig-architecture/api-conventions.md#typical-status-properties\r\n\r\n/cc @jlewi ", + "pr_file_module": null + }, + { + "comment_id": "328377372", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 4155, + "pr_file": "components/profile-controller/pkg/apis/kubeflow/v1beta1/profile_types.go", + "discussion_id": "328288394", + "commented_code": "@@ -0,0 +1,83 @@\n+/*\n+Copyright 2019 The Kubeflow Authors.\n+\n+Licensed under the Apache License, Version 2.0 (the \"License\");\n+you may not use this file except in compliance with the License.\n+You may obtain a copy of the License at\n+\n+ http://www.apache.org/licenses/LICENSE-2.0\n+\n+Unless required by applicable law or agreed to in writing, software\n+distributed under the License is distributed on an \"AS IS\" BASIS,\n+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n+See the License for the specific language governing permissions and\n+limitations under the License.\n+*/\n+\n+package v1beta1\n+\n+import (\n+\t\"k8s.io/api/core/v1\"\n+\trbacv1 \"k8s.io/api/rbac/v1\"\n+\tmetav1 \"k8s.io/apimachinery/pkg/apis/meta/v1\"\n+\t\"k8s.io/apimachinery/pkg/runtime\"\n+)\n+\n+// Plugin is for customize actions on different platform.\n+type Plugin struct {\n+\tmetav1.TypeMeta `json:\",inline\"`\n+\tmetav1.ObjectMeta `json:\"metadata,omitempty\"`\n+\tSpec\t*runtime.RawExtension `json:\"spec,omitempty\"`\n+}\n+\n+type ProfileCondition struct {\n+\tMessage\tstring `json:\"message,omitempty\"`\n+\tStatus\tstring `json:\"status,omitempty\"`", + "comment_created_at": "2019-09-25T23:12:50+00:00", + "comment_author": "jlewi", + "comment_body": "+1", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/kubeflow-follow-api-conventions.md b/_reviewers/kubeflow-follow-api-conventions.md index 24180d9..dde8b37 100644 --- a/_reviewers/kubeflow-follow-api-conventions.md +++ b/_reviewers/kubeflow-follow-api-conventions.md @@ -59,263 +59,3 @@ Key practices: ``` Following these conventions enhances API clarity, avoids reinventing the wheel, and ensures your APIs evolve gracefully as requirements change. - - -[ - { - "discussion_id": "592260994", - "pr_number": 5660, - "pr_file": "components/notebook-controller/controllers/notebook_controller.go", - "created_at": "2021-03-11T10:55:40+00:00", - "commented_code": "\"timeout\": \"300s\",\n\t\t},\n\t}\n\n\t// Create a template map for the headers section\n\t// of the Istio VirtualService\n\theaders := map[string]interface{}{\n\t\t\"request\": map[string]interface{}{\n\t\t\t\"set\": map[string]interface{}{},\n\t\t},\n\t}\n\n\t// Blacklist for request headers that are not allowed to be set\n\tblacklist := []string{\"userid-header\", \"X-Forwarded-Proto\"}\n\n\t// If key \"notebooks.kubeflow.org/http-headers-request-set\" is present on the notebook resource,\n\t// split the literal string into keys and values and add them\n\t// to the headers variable. Finally, add the headers section to\n\t// the variable with the http spec.\n\tif _, ok := annotations[\"notebooks.kubeflow.org/http-headers-request-set\"]; ok && len(annotations[\"notebooks.kubeflow.org/http-headers-request-set\"]) > 0 {\n\t\trequestHeaders := strings.Split(annotations[\"notebooks.kubeflow.org/http-headers-request-set\"], \"\\n\")\n\t\tfor _, kv := range requestHeaders {\n\t\t\tif len(strings.Split(kv, \": \")) == 2 {\n\t\t\t\tk := strings.Split(kv, \": \")[0]\n\t\t\t\tv := strings.Split(kv, \": \")[1]", - "repo_full_name": "kubeflow/kubeflow", - "discussion_comments": [ - { - "comment_id": "592260994", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 5660, - "pr_file": "components/notebook-controller/controllers/notebook_controller.go", - "discussion_id": "592260994", - "commented_code": "@@ -449,6 +473,39 @@ func generateVirtualService(instance *v1beta1.Notebook) (*unstructured.Unstructu\n \t\t\t\"timeout\": \"300s\",\n \t\t},\n \t}\n+\n+\t// Create a template map for the headers section\n+\t// of the Istio VirtualService\n+\theaders := map[string]interface{}{\n+\t\t\"request\": map[string]interface{}{\n+\t\t\t\"set\": map[string]interface{}{},\n+\t\t},\n+\t}\n+\n+\t// Blacklist for request headers that are not allowed to be set\n+\tblacklist := []string{\"userid-header\", \"X-Forwarded-Proto\"}\n+\n+\t// If key \"notebooks.kubeflow.org/http-headers-request-set\" is present on the notebook resource,\n+\t// split the literal string into keys and values and add them\n+\t// to the headers variable. Finally, add the headers section to\n+\t// the variable with the http spec.\n+\tif _, ok := annotations[\"notebooks.kubeflow.org/http-headers-request-set\"]; ok && len(annotations[\"notebooks.kubeflow.org/http-headers-request-set\"]) > 0 {\n+\t\trequestHeaders := strings.Split(annotations[\"notebooks.kubeflow.org/http-headers-request-set\"], \"\\n\")\n+\t\tfor _, kv := range requestHeaders {\n+\t\t\tif len(strings.Split(kv, \": \")) == 2 {\n+\t\t\t\tk := strings.Split(kv, \": \")[0]\n+\t\t\t\tv := strings.Split(kv, \": \")[1]", - "comment_created_at": "2021-03-11T10:55:40+00:00", - "comment_author": "yanniszark", - "comment_body": "I believe we shouldn't use our own serialization format, but instead take a page from Kubernetes' book.\r\nKubernetes uses annotations for alpha features. How are those annotations written? With JSON. For example, the `scheduler.alpha.kubernetes.io/affinity` annotation.\r\n\r\nUsing JSON allows us to leverage existing battle-tested parsers and not implement our own. What do you think?", - "pr_file_module": null - }, - { - "comment_id": "592267539", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 5660, - "pr_file": "components/notebook-controller/controllers/notebook_controller.go", - "discussion_id": "592260994", - "commented_code": "@@ -449,6 +473,39 @@ func generateVirtualService(instance *v1beta1.Notebook) (*unstructured.Unstructu\n \t\t\t\"timeout\": \"300s\",\n \t\t},\n \t}\n+\n+\t// Create a template map for the headers section\n+\t// of the Istio VirtualService\n+\theaders := map[string]interface{}{\n+\t\t\"request\": map[string]interface{}{\n+\t\t\t\"set\": map[string]interface{}{},\n+\t\t},\n+\t}\n+\n+\t// Blacklist for request headers that are not allowed to be set\n+\tblacklist := []string{\"userid-header\", \"X-Forwarded-Proto\"}\n+\n+\t// If key \"notebooks.kubeflow.org/http-headers-request-set\" is present on the notebook resource,\n+\t// split the literal string into keys and values and add them\n+\t// to the headers variable. Finally, add the headers section to\n+\t// the variable with the http spec.\n+\tif _, ok := annotations[\"notebooks.kubeflow.org/http-headers-request-set\"]; ok && len(annotations[\"notebooks.kubeflow.org/http-headers-request-set\"]) > 0 {\n+\t\trequestHeaders := strings.Split(annotations[\"notebooks.kubeflow.org/http-headers-request-set\"], \"\\n\")\n+\t\tfor _, kv := range requestHeaders {\n+\t\t\tif len(strings.Split(kv, \": \")) == 2 {\n+\t\t\t\tk := strings.Split(kv, \": \")[0]\n+\t\t\t\tv := strings.Split(kv, \": \")[1]", - "comment_created_at": "2021-03-11T11:05:32+00:00", - "comment_author": "davidspek", - "comment_body": "I have no idea how to start implementing this, and I don't believe there is enough time to do so anymore either. I'm mainly following what is already being done in the code elsewhere. If you want to improve the code, I'm happy to include it in the PR. ", - "pr_file_module": null - }, - { - "comment_id": "592284821", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 5660, - "pr_file": "components/notebook-controller/controllers/notebook_controller.go", - "discussion_id": "592260994", - "commented_code": "@@ -449,6 +473,39 @@ func generateVirtualService(instance *v1beta1.Notebook) (*unstructured.Unstructu\n \t\t\t\"timeout\": \"300s\",\n \t\t},\n \t}\n+\n+\t// Create a template map for the headers section\n+\t// of the Istio VirtualService\n+\theaders := map[string]interface{}{\n+\t\t\"request\": map[string]interface{}{\n+\t\t\t\"set\": map[string]interface{}{},\n+\t\t},\n+\t}\n+\n+\t// Blacklist for request headers that are not allowed to be set\n+\tblacklist := []string{\"userid-header\", \"X-Forwarded-Proto\"}\n+\n+\t// If key \"notebooks.kubeflow.org/http-headers-request-set\" is present on the notebook resource,\n+\t// split the literal string into keys and values and add them\n+\t// to the headers variable. Finally, add the headers section to\n+\t// the variable with the http spec.\n+\tif _, ok := annotations[\"notebooks.kubeflow.org/http-headers-request-set\"]; ok && len(annotations[\"notebooks.kubeflow.org/http-headers-request-set\"]) > 0 {\n+\t\trequestHeaders := strings.Split(annotations[\"notebooks.kubeflow.org/http-headers-request-set\"], \"\\n\")\n+\t\tfor _, kv := range requestHeaders {\n+\t\t\tif len(strings.Split(kv, \": \")) == 2 {\n+\t\t\t\tk := strings.Split(kv, \": \")[0]\n+\t\t\t\tv := strings.Split(kv, \": \")[1]", - "comment_created_at": "2021-03-11T11:31:55+00:00", - "comment_author": "yanniszark", - "comment_body": "@DavidSpek it's a small change. Here is an example you can follow to unmarshal a json map (the use-case we have here):\r\nhttps://play.golang.org/p/TR-6kdk6s91", - "pr_file_module": null - }, - { - "comment_id": "593057363", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 5660, - "pr_file": "components/notebook-controller/controllers/notebook_controller.go", - "discussion_id": "592260994", - "commented_code": "@@ -449,6 +473,39 @@ func generateVirtualService(instance *v1beta1.Notebook) (*unstructured.Unstructu\n \t\t\t\"timeout\": \"300s\",\n \t\t},\n \t}\n+\n+\t// Create a template map for the headers section\n+\t// of the Istio VirtualService\n+\theaders := map[string]interface{}{\n+\t\t\"request\": map[string]interface{}{\n+\t\t\t\"set\": map[string]interface{}{},\n+\t\t},\n+\t}\n+\n+\t// Blacklist for request headers that are not allowed to be set\n+\tblacklist := []string{\"userid-header\", \"X-Forwarded-Proto\"}\n+\n+\t// If key \"notebooks.kubeflow.org/http-headers-request-set\" is present on the notebook resource,\n+\t// split the literal string into keys and values and add them\n+\t// to the headers variable. Finally, add the headers section to\n+\t// the variable with the http spec.\n+\tif _, ok := annotations[\"notebooks.kubeflow.org/http-headers-request-set\"]; ok && len(annotations[\"notebooks.kubeflow.org/http-headers-request-set\"]) > 0 {\n+\t\trequestHeaders := strings.Split(annotations[\"notebooks.kubeflow.org/http-headers-request-set\"], \"\\n\")\n+\t\tfor _, kv := range requestHeaders {\n+\t\t\tif len(strings.Split(kv, \": \")) == 2 {\n+\t\t\t\tk := strings.Split(kv, \": \")[0]\n+\t\t\t\tv := strings.Split(kv, \": \")[1]", - "comment_created_at": "2021-03-12T10:11:24+00:00", - "comment_author": "kimwnasptd", - "comment_body": "@DavidSpek show your comment below about the error with Unmarshal https://github.com/kubeflow/kubeflow/pull/5660#issuecomment-797378906. Writing it here to keep it in one place.\r\n\r\nI'm not sure how you used the Unmarshal function but I believe you could also try and catch the error and not let it panic. Here's a very similar code I've seen https://github.com/kubeflow/katib/blob/master/pkg/new-ui/v1beta1/backend.go#L73", - "pr_file_module": null - }, - { - "comment_id": "593072068", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 5660, - "pr_file": "components/notebook-controller/controllers/notebook_controller.go", - "discussion_id": "592260994", - "commented_code": "@@ -449,6 +473,39 @@ func generateVirtualService(instance *v1beta1.Notebook) (*unstructured.Unstructu\n \t\t\t\"timeout\": \"300s\",\n \t\t},\n \t}\n+\n+\t// Create a template map for the headers section\n+\t// of the Istio VirtualService\n+\theaders := map[string]interface{}{\n+\t\t\"request\": map[string]interface{}{\n+\t\t\t\"set\": map[string]interface{}{},\n+\t\t},\n+\t}\n+\n+\t// Blacklist for request headers that are not allowed to be set\n+\tblacklist := []string{\"userid-header\", \"X-Forwarded-Proto\"}\n+\n+\t// If key \"notebooks.kubeflow.org/http-headers-request-set\" is present on the notebook resource,\n+\t// split the literal string into keys and values and add them\n+\t// to the headers variable. Finally, add the headers section to\n+\t// the variable with the http spec.\n+\tif _, ok := annotations[\"notebooks.kubeflow.org/http-headers-request-set\"]; ok && len(annotations[\"notebooks.kubeflow.org/http-headers-request-set\"]) > 0 {\n+\t\trequestHeaders := strings.Split(annotations[\"notebooks.kubeflow.org/http-headers-request-set\"], \"\\n\")\n+\t\tfor _, kv := range requestHeaders {\n+\t\t\tif len(strings.Split(kv, \": \")) == 2 {\n+\t\t\t\tk := strings.Split(kv, \": \")[0]\n+\t\t\t\tv := strings.Split(kv, \": \")[1]", - "comment_created_at": "2021-03-12T10:34:35+00:00", - "comment_author": "yanniszark", - "comment_body": "@DavidSpek in the example I gave you, I omitted the error handling. Let me give you an example with error handling. This way you can catch the bad case in the controller, log it and emit an event:\r\nhttps://play.golang.org/p/nxmAh3VaNZr", - "pr_file_module": null - }, - { - "comment_id": "593074840", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 5660, - "pr_file": "components/notebook-controller/controllers/notebook_controller.go", - "discussion_id": "592260994", - "commented_code": "@@ -449,6 +473,39 @@ func generateVirtualService(instance *v1beta1.Notebook) (*unstructured.Unstructu\n \t\t\t\"timeout\": \"300s\",\n \t\t},\n \t}\n+\n+\t// Create a template map for the headers section\n+\t// of the Istio VirtualService\n+\theaders := map[string]interface{}{\n+\t\t\"request\": map[string]interface{}{\n+\t\t\t\"set\": map[string]interface{}{},\n+\t\t},\n+\t}\n+\n+\t// Blacklist for request headers that are not allowed to be set\n+\tblacklist := []string{\"userid-header\", \"X-Forwarded-Proto\"}\n+\n+\t// If key \"notebooks.kubeflow.org/http-headers-request-set\" is present on the notebook resource,\n+\t// split the literal string into keys and values and add them\n+\t// to the headers variable. Finally, add the headers section to\n+\t// the variable with the http spec.\n+\tif _, ok := annotations[\"notebooks.kubeflow.org/http-headers-request-set\"]; ok && len(annotations[\"notebooks.kubeflow.org/http-headers-request-set\"]) > 0 {\n+\t\trequestHeaders := strings.Split(annotations[\"notebooks.kubeflow.org/http-headers-request-set\"], \"\\n\")\n+\t\tfor _, kv := range requestHeaders {\n+\t\t\tif len(strings.Split(kv, \": \")) == 2 {\n+\t\t\t\tk := strings.Split(kv, \": \")[0]\n+\t\t\t\tv := strings.Split(kv, \": \")[1]", - "comment_created_at": "2021-03-12T10:38:47+00:00", - "comment_author": "davidspek", - "comment_body": "@yanniszark Thanks for the example. I just noticed the actual latest changes were not pushed yesterday. I just pushed them and I believe the error handling from you example is included in there. However, the controller crashing is still a problem. ", - "pr_file_module": null - }, - { - "comment_id": "593087493", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 5660, - "pr_file": "components/notebook-controller/controllers/notebook_controller.go", - "discussion_id": "592260994", - "commented_code": "@@ -449,6 +473,39 @@ func generateVirtualService(instance *v1beta1.Notebook) (*unstructured.Unstructu\n \t\t\t\"timeout\": \"300s\",\n \t\t},\n \t}\n+\n+\t// Create a template map for the headers section\n+\t// of the Istio VirtualService\n+\theaders := map[string]interface{}{\n+\t\t\"request\": map[string]interface{}{\n+\t\t\t\"set\": map[string]interface{}{},\n+\t\t},\n+\t}\n+\n+\t// Blacklist for request headers that are not allowed to be set\n+\tblacklist := []string{\"userid-header\", \"X-Forwarded-Proto\"}\n+\n+\t// If key \"notebooks.kubeflow.org/http-headers-request-set\" is present on the notebook resource,\n+\t// split the literal string into keys and values and add them\n+\t// to the headers variable. Finally, add the headers section to\n+\t// the variable with the http spec.\n+\tif _, ok := annotations[\"notebooks.kubeflow.org/http-headers-request-set\"]; ok && len(annotations[\"notebooks.kubeflow.org/http-headers-request-set\"]) > 0 {\n+\t\trequestHeaders := strings.Split(annotations[\"notebooks.kubeflow.org/http-headers-request-set\"], \"\\n\")\n+\t\tfor _, kv := range requestHeaders {\n+\t\t\tif len(strings.Split(kv, \": \")) == 2 {\n+\t\t\t\tk := strings.Split(kv, \": \")[0]\n+\t\t\t\tv := strings.Split(kv, \": \")[1]", - "comment_created_at": "2021-03-12T10:59:39+00:00", - "comment_author": "yanniszark", - "comment_body": "Then we need to find the cause. Can you point to the exact line in your code that crashes?\r\nWhat have you tried / what do you think is wrong?", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "352725761", - "pr_number": 4539, - "pr_file": "components/admission-webhook/pkg/apis/settings/v1alpha1/poddefault_types.go", - "created_at": "2019-12-02T17:23:05+00:00", - "commented_code": "// VolumeMounts defines the collection of VolumeMount to inject into containers.\n\t// +optional\n\tVolumeMounts []v1.VolumeMount `json:\"volumeMounts,omitempty\"`\n\n\t// ObjectMeta defines the metadata to inject labels and annotations from into the metadata field of a pod.\n\tmetav1.ObjectMeta `json:\"metadata,omitempty\"`", - "repo_full_name": "kubeflow/kubeflow", - "discussion_comments": [ - { - "comment_id": "352725761", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 4539, - "pr_file": "components/admission-webhook/pkg/apis/settings/v1alpha1/poddefault_types.go", - "discussion_id": "352725761", - "commented_code": "@@ -53,6 +53,9 @@ type PodDefaultSpec struct {\n \t// VolumeMounts defines the collection of VolumeMount to inject into containers.\n \t// +optional\n \tVolumeMounts []v1.VolumeMount `json:\"volumeMounts,omitempty\"`\n+\n+\t// ObjectMeta defines the metadata to inject labels and annotations from into the metadata field of a pod.\n+\tmetav1.ObjectMeta `json:\"metadata,omitempty\"`", - "comment_created_at": "2019-12-02T17:23:05+00:00", - "comment_author": "jlewi", - "comment_body": "Did you consider having separate fields for labels & annotations rather than a metadata field?\r\nmetadata has a bunch of fields\r\nhttps://kubernetes.io/docs/reference/generated/kubernetes-api/v1.15/#objectmeta-v1-meta\r\n\r\nIf we include ObjectMeta does that create confusion about what happens for other fields (e.g. name, namespace, ownerReferences, finalizers etc...)?\r\n\r\n", - "pr_file_module": null - }, - { - "comment_id": "352733846", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 4539, - "pr_file": "components/admission-webhook/pkg/apis/settings/v1alpha1/poddefault_types.go", - "discussion_id": "352725761", - "commented_code": "@@ -53,6 +53,9 @@ type PodDefaultSpec struct {\n \t// VolumeMounts defines the collection of VolumeMount to inject into containers.\n \t// +optional\n \tVolumeMounts []v1.VolumeMount `json:\"volumeMounts,omitempty\"`\n+\n+\t// ObjectMeta defines the metadata to inject labels and annotations from into the metadata field of a pod.\n+\tmetav1.ObjectMeta `json:\"metadata,omitempty\"`", - "comment_created_at": "2019-12-02T17:39:44+00:00", - "comment_author": "yanniszark", - "comment_body": "How about introducing the whole field but only use those certain parts?\r\neg StatefulSet uses PersistentVolumeClaim as the type for volumeClaimTemplates, even if it doesn't use all the fields.\r\n\r\nWe could also maybe use some of them in the future?", - "pr_file_module": null - }, - { - "comment_id": "352749580", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 4539, - "pr_file": "components/admission-webhook/pkg/apis/settings/v1alpha1/poddefault_types.go", - "discussion_id": "352725761", - "commented_code": "@@ -53,6 +53,9 @@ type PodDefaultSpec struct {\n \t// VolumeMounts defines the collection of VolumeMount to inject into containers.\n \t// +optional\n \tVolumeMounts []v1.VolumeMount `json:\"volumeMounts,omitempty\"`\n+\n+\t// ObjectMeta defines the metadata to inject labels and annotations from into the metadata field of a pod.\n+\tmetav1.ObjectMeta `json:\"metadata,omitempty\"`", - "comment_created_at": "2019-12-02T18:13:37+00:00", - "comment_author": "jlewi", - "comment_body": "The downside of using ObjectMeta is that the API is no longer self-specifying and well defined. A user has to read the docs in order to understand the behavior; i.e. which fields are copied and which are ignored. Whereas if we have fields labels and annotations that are copied then it is arguably more self explanatory.\r\n\r\nFurthermore we are in effect saying that the behavior for other fields e.g \"finalizers, name, namespace, ownerReferences\" are undefined. Its not clear whether these fields are intentionally excluded or they are excluded only because we did a partial implementation in the need of expediency.\r\n\r\nDo we have a strong reason to expect that we will want to include these fields in the future? finalizers seems like the only other field that we might need to set in the future. Even then I'm not sure if setting finalizers on pods as opposed to the owning object is common.\r\n\r\nFor all the other fields (name, uid, ownerReferences) etc... setting them in podDefaults seems incorrect.\r\n\r\n\r\n", - "pr_file_module": null - }, - { - "comment_id": "352861812", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 4539, - "pr_file": "components/admission-webhook/pkg/apis/settings/v1alpha1/poddefault_types.go", - "discussion_id": "352725761", - "commented_code": "@@ -53,6 +53,9 @@ type PodDefaultSpec struct {\n \t// VolumeMounts defines the collection of VolumeMount to inject into containers.\n \t// +optional\n \tVolumeMounts []v1.VolumeMount `json:\"volumeMounts,omitempty\"`\n+\n+\t// ObjectMeta defines the metadata to inject labels and annotations from into the metadata field of a pod.\n+\tmetav1.ObjectMeta `json:\"metadata,omitempty\"`", - "comment_created_at": "2019-12-02T21:24:50+00:00", - "comment_author": "discordianfish", - "comment_body": "Yeah I wasn't sure either about using ObjectMeta but with only the annotations/labels fields (what I'm doing here and what @yanniszark describes unless I'm mistaken), or add annotations/labels as separate fields. I've went with using ObjectMeta because having a struct for metadata as oppose to just two map[string]string seemed nicer, but not feeling strongly that way.\r\nI'll update the PR to use Annotations and Labels fields instead.", - "pr_file_module": null - }, - { - "comment_id": "353298581", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 4539, - "pr_file": "components/admission-webhook/pkg/apis/settings/v1alpha1/poddefault_types.go", - "discussion_id": "352725761", - "commented_code": "@@ -53,6 +53,9 @@ type PodDefaultSpec struct {\n \t// VolumeMounts defines the collection of VolumeMount to inject into containers.\n \t// +optional\n \tVolumeMounts []v1.VolumeMount `json:\"volumeMounts,omitempty\"`\n+\n+\t// ObjectMeta defines the metadata to inject labels and annotations from into the metadata field of a pod.\n+\tmetav1.ObjectMeta `json:\"metadata,omitempty\"`", - "comment_created_at": "2019-12-03T16:50:46+00:00", - "comment_author": "jlewi", - "comment_body": "Thanks. I think this is better. Expanding an API is always easier than shrinking it.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "326742755", - "pr_number": 4155, - "pr_file": "components/profile-controller/pkg/apis/kubeflow/v1beta1/profile_types.go", - "created_at": "2019-09-20T18:04:45+00:00", - "commented_code": "/*\nCopyright 2019 The Kubeflow Authors.\n\nLicensed under the Apache License, Version 2.0 (the \"License\");\nyou may not use this file except in compliance with the License.\nYou may obtain a copy of the License at\n\n http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n*/\n\npackage v1beta1\n\nimport (\n\t\"k8s.io/api/core/v1\"\n\trbacv1 \"k8s.io/api/rbac/v1\"\n\tmetav1 \"k8s.io/apimachinery/pkg/apis/meta/v1\"\n\t\"k8s.io/apimachinery/pkg/runtime\"\n)\n\n// Plugin is for customize actions on different platform.\ntype Plugin struct {\n\tmetav1.TypeMeta `json:\",inline\"`\n\tmetav1.ObjectMeta `json:\"metadata,omitempty\"`\n\tSpec *runtime.RawExtension `json:\"spec,omitempty\"`\n}\n\n// ProfileSpec defines the desired state of Profile\ntype ProfileSpec struct {\n\t// The profile owner\n\tOwner rbacv1.Subject `json:\"owner,omitempty\"`\n\tPlugins []Plugin `json:\"plugins,omitempty\"`\n\t// Resourcequota that will be applied to target namespace\n\tResourceQuotaSpec v1.ResourceQuotaSpec `json:\"resourcequotaspec,omitempty\"`\n}\n\ntype ProfileState string", - "repo_full_name": "kubeflow/kubeflow", - "discussion_comments": [ - { - "comment_id": "326742755", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 4155, - "pr_file": "components/profile-controller/pkg/apis/kubeflow/v1beta1/profile_types.go", - "discussion_id": "326742755", - "commented_code": "@@ -0,0 +1,83 @@\n+/*\n+Copyright 2019 The Kubeflow Authors.\n+\n+Licensed under the Apache License, Version 2.0 (the \"License\");\n+you may not use this file except in compliance with the License.\n+You may obtain a copy of the License at\n+\n+ http://www.apache.org/licenses/LICENSE-2.0\n+\n+Unless required by applicable law or agreed to in writing, software\n+distributed under the License is distributed on an \"AS IS\" BASIS,\n+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n+See the License for the specific language governing permissions and\n+limitations under the License.\n+*/\n+\n+package v1beta1\n+\n+import (\n+\t\"k8s.io/api/core/v1\"\n+\trbacv1 \"k8s.io/api/rbac/v1\"\n+\tmetav1 \"k8s.io/apimachinery/pkg/apis/meta/v1\"\n+\t\"k8s.io/apimachinery/pkg/runtime\"\n+)\n+\n+// Plugin is for customize actions on different platform.\n+type Plugin struct {\n+\tmetav1.TypeMeta `json:\",inline\"`\n+\tmetav1.ObjectMeta `json:\"metadata,omitempty\"`\n+\tSpec *runtime.RawExtension `json:\"spec,omitempty\"`\n+}\n+\n+// ProfileSpec defines the desired state of Profile\n+type ProfileSpec struct {\n+\t// The profile owner\n+\tOwner rbacv1.Subject `json:\"owner,omitempty\"`\n+\tPlugins []Plugin `json:\"plugins,omitempty\"`\n+\t// Resourcequota that will be applied to target namespace\n+\tResourceQuotaSpec v1.ResourceQuotaSpec `json:\"resourcequotaspec,omitempty\"`\n+}\n+\n+type ProfileState string", - "comment_created_at": "2019-09-20T18:04:45+00:00", - "comment_author": "yanniszark", - "comment_body": "I would suggest we avoid using enum fields in status, as they break if we add new possible values.\r\nInstead, the K8s api convention advises one to use Conditions.\r\nhttps://github.com/kubernetes/community/blob/master/contributors/devel/sig-architecture/api-conventions.md", - "pr_file_module": null - }, - { - "comment_id": "326834486", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 4155, - "pr_file": "components/profile-controller/pkg/apis/kubeflow/v1beta1/profile_types.go", - "discussion_id": "326742755", - "commented_code": "@@ -0,0 +1,83 @@\n+/*\n+Copyright 2019 The Kubeflow Authors.\n+\n+Licensed under the Apache License, Version 2.0 (the \"License\");\n+you may not use this file except in compliance with the License.\n+You may obtain a copy of the License at\n+\n+ http://www.apache.org/licenses/LICENSE-2.0\n+\n+Unless required by applicable law or agreed to in writing, software\n+distributed under the License is distributed on an \"AS IS\" BASIS,\n+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n+See the License for the specific language governing permissions and\n+limitations under the License.\n+*/\n+\n+package v1beta1\n+\n+import (\n+\t\"k8s.io/api/core/v1\"\n+\trbacv1 \"k8s.io/api/rbac/v1\"\n+\tmetav1 \"k8s.io/apimachinery/pkg/apis/meta/v1\"\n+\t\"k8s.io/apimachinery/pkg/runtime\"\n+)\n+\n+// Plugin is for customize actions on different platform.\n+type Plugin struct {\n+\tmetav1.TypeMeta `json:\",inline\"`\n+\tmetav1.ObjectMeta `json:\"metadata,omitempty\"`\n+\tSpec *runtime.RawExtension `json:\"spec,omitempty\"`\n+}\n+\n+// ProfileSpec defines the desired state of Profile\n+type ProfileSpec struct {\n+\t// The profile owner\n+\tOwner rbacv1.Subject `json:\"owner,omitempty\"`\n+\tPlugins []Plugin `json:\"plugins,omitempty\"`\n+\t// Resourcequota that will be applied to target namespace\n+\tResourceQuotaSpec v1.ResourceQuotaSpec `json:\"resourcequotaspec,omitempty\"`\n+}\n+\n+type ProfileState string", - "comment_created_at": "2019-09-20T23:50:32+00:00", - "comment_author": "kunmingg", - "comment_body": "ProfileState is saved as plain string, adding new values should not change existing messages.\r\nWe can use conditions in followup PRs if needed.", - "pr_file_module": null - }, - { - "comment_id": "326863827", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 4155, - "pr_file": "components/profile-controller/pkg/apis/kubeflow/v1beta1/profile_types.go", - "discussion_id": "326742755", - "commented_code": "@@ -0,0 +1,83 @@\n+/*\n+Copyright 2019 The Kubeflow Authors.\n+\n+Licensed under the Apache License, Version 2.0 (the \"License\");\n+you may not use this file except in compliance with the License.\n+You may obtain a copy of the License at\n+\n+ http://www.apache.org/licenses/LICENSE-2.0\n+\n+Unless required by applicable law or agreed to in writing, software\n+distributed under the License is distributed on an \"AS IS\" BASIS,\n+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n+See the License for the specific language governing permissions and\n+limitations under the License.\n+*/\n+\n+package v1beta1\n+\n+import (\n+\t\"k8s.io/api/core/v1\"\n+\trbacv1 \"k8s.io/api/rbac/v1\"\n+\tmetav1 \"k8s.io/apimachinery/pkg/apis/meta/v1\"\n+\t\"k8s.io/apimachinery/pkg/runtime\"\n+)\n+\n+// Plugin is for customize actions on different platform.\n+type Plugin struct {\n+\tmetav1.TypeMeta `json:\",inline\"`\n+\tmetav1.ObjectMeta `json:\"metadata,omitempty\"`\n+\tSpec *runtime.RawExtension `json:\"spec,omitempty\"`\n+}\n+\n+// ProfileSpec defines the desired state of Profile\n+type ProfileSpec struct {\n+\t// The profile owner\n+\tOwner rbacv1.Subject `json:\"owner,omitempty\"`\n+\tPlugins []Plugin `json:\"plugins,omitempty\"`\n+\t// Resourcequota that will be applied to target namespace\n+\tResourceQuotaSpec v1.ResourceQuotaSpec `json:\"resourcequotaspec,omitempty\"`\n+}\n+\n+type ProfileState string", - "comment_created_at": "2019-09-21T15:28:41+00:00", - "comment_author": "yanniszark", - "comment_body": "@kunmingg I'm a bit hesitant to go forward with a Phase enum-type status, because this is the pattern that Kubernetes used and then decided explicitly against it, in favor of conditions.\r\nSee [this section](https://github.com/kubernetes/community/blob/master/contributors/devel/sig-architecture/api-conventions.md#typical-status-properties) for more information: \r\n\r\n> pattern of using phase is deprecated. Newer API types should use conditions instead. Phase was essentially a state-machine enumeration field, that contradicted system-design principles and hampered evolution, since adding new enum values breaks backward compatibility. \r\n\r\n\r\n/cc @jlewi @kkasravi ", - "pr_file_module": null - }, - { - "comment_id": "328205201", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 4155, - "pr_file": "components/profile-controller/pkg/apis/kubeflow/v1beta1/profile_types.go", - "discussion_id": "326742755", - "commented_code": "@@ -0,0 +1,83 @@\n+/*\n+Copyright 2019 The Kubeflow Authors.\n+\n+Licensed under the Apache License, Version 2.0 (the \"License\");\n+you may not use this file except in compliance with the License.\n+You may obtain a copy of the License at\n+\n+ http://www.apache.org/licenses/LICENSE-2.0\n+\n+Unless required by applicable law or agreed to in writing, software\n+distributed under the License is distributed on an \"AS IS\" BASIS,\n+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n+See the License for the specific language governing permissions and\n+limitations under the License.\n+*/\n+\n+package v1beta1\n+\n+import (\n+\t\"k8s.io/api/core/v1\"\n+\trbacv1 \"k8s.io/api/rbac/v1\"\n+\tmetav1 \"k8s.io/apimachinery/pkg/apis/meta/v1\"\n+\t\"k8s.io/apimachinery/pkg/runtime\"\n+)\n+\n+// Plugin is for customize actions on different platform.\n+type Plugin struct {\n+\tmetav1.TypeMeta `json:\",inline\"`\n+\tmetav1.ObjectMeta `json:\"metadata,omitempty\"`\n+\tSpec *runtime.RawExtension `json:\"spec,omitempty\"`\n+}\n+\n+// ProfileSpec defines the desired state of Profile\n+type ProfileSpec struct {\n+\t// The profile owner\n+\tOwner rbacv1.Subject `json:\"owner,omitempty\"`\n+\tPlugins []Plugin `json:\"plugins,omitempty\"`\n+\t// Resourcequota that will be applied to target namespace\n+\tResourceQuotaSpec v1.ResourceQuotaSpec `json:\"resourcequotaspec,omitempty\"`\n+}\n+\n+type ProfileState string", - "comment_created_at": "2019-09-25T15:54:59+00:00", - "comment_author": "jlewi", - "comment_body": "+1 to using conditions. \r\n\r\n@kunmingg are there problems with using conditions rather than enums?", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "328288394", - "pr_number": 4155, - "pr_file": "components/profile-controller/pkg/apis/kubeflow/v1beta1/profile_types.go", - "created_at": "2019-09-25T18:58:07+00:00", - "commented_code": "/*\nCopyright 2019 The Kubeflow Authors.\n\nLicensed under the Apache License, Version 2.0 (the \"License\");\nyou may not use this file except in compliance with the License.\nYou may obtain a copy of the License at\n\n http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n*/\n\npackage v1beta1\n\nimport (\n\t\"k8s.io/api/core/v1\"\n\trbacv1 \"k8s.io/api/rbac/v1\"\n\tmetav1 \"k8s.io/apimachinery/pkg/apis/meta/v1\"\n\t\"k8s.io/apimachinery/pkg/runtime\"\n)\n\n// Plugin is for customize actions on different platform.\ntype Plugin struct {\n\tmetav1.TypeMeta `json:\",inline\"`\n\tmetav1.ObjectMeta `json:\"metadata,omitempty\"`\n\tSpec\t*runtime.RawExtension `json:\"spec,omitempty\"`\n}\n\ntype ProfileCondition struct {\n\tMessage\tstring `json:\"message,omitempty\"`\n\tStatus\tstring `json:\"status,omitempty\"`", - "repo_full_name": "kubeflow/kubeflow", - "discussion_comments": [ - { - "comment_id": "328288394", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 4155, - "pr_file": "components/profile-controller/pkg/apis/kubeflow/v1beta1/profile_types.go", - "discussion_id": "328288394", - "commented_code": "@@ -0,0 +1,83 @@\n+/*\n+Copyright 2019 The Kubeflow Authors.\n+\n+Licensed under the Apache License, Version 2.0 (the \"License\");\n+you may not use this file except in compliance with the License.\n+You may obtain a copy of the License at\n+\n+ http://www.apache.org/licenses/LICENSE-2.0\n+\n+Unless required by applicable law or agreed to in writing, software\n+distributed under the License is distributed on an \"AS IS\" BASIS,\n+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n+See the License for the specific language governing permissions and\n+limitations under the License.\n+*/\n+\n+package v1beta1\n+\n+import (\n+\t\"k8s.io/api/core/v1\"\n+\trbacv1 \"k8s.io/api/rbac/v1\"\n+\tmetav1 \"k8s.io/apimachinery/pkg/apis/meta/v1\"\n+\t\"k8s.io/apimachinery/pkg/runtime\"\n+)\n+\n+// Plugin is for customize actions on different platform.\n+type Plugin struct {\n+\tmetav1.TypeMeta `json:\",inline\"`\n+\tmetav1.ObjectMeta `json:\"metadata,omitempty\"`\n+\tSpec\t*runtime.RawExtension `json:\"spec,omitempty\"`\n+}\n+\n+type ProfileCondition struct {\n+\tMessage\tstring `json:\"message,omitempty\"`\n+\tStatus\tstring `json:\"status,omitempty\"`", - "comment_created_at": "2019-09-25T18:58:07+00:00", - "comment_author": "yanniszark", - "comment_body": "I believe it's better to follow the standard condition fields here as well.\r\nhttps://github.com/kubernetes/community/blob/master/contributors/devel/sig-architecture/api-conventions.md#typical-status-properties\r\n\r\n/cc @jlewi ", - "pr_file_module": null - }, - { - "comment_id": "328377372", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 4155, - "pr_file": "components/profile-controller/pkg/apis/kubeflow/v1beta1/profile_types.go", - "discussion_id": "328288394", - "commented_code": "@@ -0,0 +1,83 @@\n+/*\n+Copyright 2019 The Kubeflow Authors.\n+\n+Licensed under the Apache License, Version 2.0 (the \"License\");\n+you may not use this file except in compliance with the License.\n+You may obtain a copy of the License at\n+\n+ http://www.apache.org/licenses/LICENSE-2.0\n+\n+Unless required by applicable law or agreed to in writing, software\n+distributed under the License is distributed on an \"AS IS\" BASIS,\n+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n+See the License for the specific language governing permissions and\n+limitations under the License.\n+*/\n+\n+package v1beta1\n+\n+import (\n+\t\"k8s.io/api/core/v1\"\n+\trbacv1 \"k8s.io/api/rbac/v1\"\n+\tmetav1 \"k8s.io/apimachinery/pkg/apis/meta/v1\"\n+\t\"k8s.io/apimachinery/pkg/runtime\"\n+)\n+\n+// Plugin is for customize actions on different platform.\n+type Plugin struct {\n+\tmetav1.TypeMeta `json:\",inline\"`\n+\tmetav1.ObjectMeta `json:\"metadata,omitempty\"`\n+\tSpec\t*runtime.RawExtension `json:\"spec,omitempty\"`\n+}\n+\n+type ProfileCondition struct {\n+\tMessage\tstring `json:\"message,omitempty\"`\n+\tStatus\tstring `json:\"status,omitempty\"`", - "comment_created_at": "2019-09-25T23:12:50+00:00", - "comment_author": "jlewi", - "comment_body": "+1", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/kubeflow-go-export-naming-conventions.json b/_reviewers/kubeflow-go-export-naming-conventions.json new file mode 100644 index 0000000..9bb23b2 --- /dev/null +++ b/_reviewers/kubeflow-go-export-naming-conventions.json @@ -0,0 +1,216 @@ +[ + { + "discussion_id": "1373286709", + "pr_number": 7365, + "pr_file": "components/notebook-controller/controllers/culling_controller.go", + "created_at": "2023-10-26T14:36:41+00:00", + "commented_code": "return false\n}\n\nfunc DisableCullingAnnotationIsSet(meta metav1.ObjectMeta) bool {", + "repo_full_name": "kubeflow/kubeflow", + "discussion_comments": [ + { + "comment_id": "1373286709", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 7365, + "pr_file": "components/notebook-controller/controllers/culling_controller.go", + "discussion_id": "1373286709", + "commented_code": "@@ -381,6 +388,16 @@ func StopAnnotationIsSet(meta metav1.ObjectMeta) bool {\n \treturn false\n }\n \n+func DisableCullingAnnotationIsSet(meta metav1.ObjectMeta) bool {", + "comment_created_at": "2023-10-26T14:36:41+00:00", + "comment_author": "kimwnasptd", + "comment_body": "small nit, shouldn't this function start from lowercase since it's not meant to be used from outside this go module?", + "pr_file_module": null + }, + { + "comment_id": "1373310284", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 7365, + "pr_file": "components/notebook-controller/controllers/culling_controller.go", + "discussion_id": "1373286709", + "commented_code": "@@ -381,6 +388,16 @@ func StopAnnotationIsSet(meta metav1.ObjectMeta) bool {\n \treturn false\n }\n \n+func DisableCullingAnnotationIsSet(meta metav1.ObjectMeta) bool {", + "comment_created_at": "2023-10-26T14:51:59+00:00", + "comment_author": "omrishiv", + "comment_body": "I was following the style of the current code using `StopAnnotationIsSet` above. Agreed it should be lowercase", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "592236690", + "pr_number": 5660, + "pr_file": "components/notebook-controller/controllers/notebook_controller.go", + "created_at": "2021-03-11T10:21:18+00:00", + "commented_code": "return fmt.Sprintf(\"notebook-%s-%s\", namespace, kfName)\n}\n\n// Function for the VirtualService header blacklist\nfunc Find(slice []string, val string) (int, bool) {", + "repo_full_name": "kubeflow/kubeflow", + "discussion_comments": [ + { + "comment_id": "592236690", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 5660, + "pr_file": "components/notebook-controller/controllers/notebook_controller.go", + "discussion_id": "592236690", + "commented_code": "@@ -395,12 +395,35 @@ func virtualServiceName(kfName string, namespace string) string {\n \treturn fmt.Sprintf(\"notebook-%s-%s\", namespace, kfName)\n }\n \n+// Function for the VirtualService header blacklist\n+func Find(slice []string, val string) (int, bool) {", + "comment_created_at": "2021-03-11T10:21:18+00:00", + "comment_author": "yanniszark", + "comment_body": "Two comments here:\r\n1. nit: Let's use lowercase function names for functions that we do not export (uppercase functions are exported and become part of the package api in Go)\r\n2. The function is called `Find`, but you only use its second argument. I think what you want is a function that checks for existence, cause that's what you check. So how about renaming this to `exists` and returning only a bool? ", + "pr_file_module": null + }, + { + "comment_id": "592269369", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 5660, + "pr_file": "components/notebook-controller/controllers/notebook_controller.go", + "discussion_id": "592236690", + "commented_code": "@@ -395,12 +395,35 @@ func virtualServiceName(kfName string, namespace string) string {\n \treturn fmt.Sprintf(\"notebook-%s-%s\", namespace, kfName)\n }\n \n+// Function for the VirtualService header blacklist\n+func Find(slice []string, val string) (int, bool) {", + "comment_created_at": "2021-03-11T11:08:19+00:00", + "comment_author": "davidspek", + "comment_body": "How do you want it to return only the bool?", + "pr_file_module": null + }, + { + "comment_id": "592283008", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 5660, + "pr_file": "components/notebook-controller/controllers/notebook_controller.go", + "discussion_id": "592236690", + "commented_code": "@@ -395,12 +395,35 @@ func virtualServiceName(kfName string, namespace string) string {\n \treturn fmt.Sprintf(\"notebook-%s-%s\", namespace, kfName)\n }\n \n+// Function for the VirtualService header blacklist\n+func Find(slice []string, val string) (int, bool) {", + "comment_created_at": "2021-03-11T11:28:56+00:00", + "comment_author": "yanniszark", + "comment_body": "This is what I mean:\r\n```go\r\nfunc exists(slice []string, val string) bool {\r\n\tfor i, item := range slice {\r\n\t\tif item == val {\r\n\t\t\treturn true\r\n\t\t}\r\n\t}\r\n\treturn false\r\n}\r\n```", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "332287666", + "pr_number": 4251, + "pr_file": "components/kf-monitoring/MetricsExporter.go", + "created_at": "2019-10-08T00:01:00+00:00", + "commented_code": "package monitoring_util\n\nimport (\n\t\"fmt\"\n\t\"time\"\n\n\t\"github.com/prometheus/client_golang/prometheus\"\n\t\"github.com/prometheus/client_golang/prometheus/promhttp\"\n\t\"github.com/prometheus/common/log\"\n\t\"net\"\n\t\"net/http\"\n)\n\n// Common label keys for all metrics signals\n// COMPONENT: name of component outputing the metrics, eg. \"tf-operator\"\nconst COMPONENT = \"component\"\n// KIND: each componenet can label their metrics with custom tag \"kind\". Suggest keeping \"kind\" value to be CONSTANT PER METRICS.\nconst KIND = \"kind\"\nconst NAMESPACE = \"namespace\"\n// ACTION: request action type of the metrics, eg. \"CRUD\"\nconst ACTION = \"action\"\n// SEVERITY: level of importance, used to filter alerts\nconst SEVERITY = \"severity\"\n\nconst SEVERITY_MINOR = \"Minor\"\nconst SEVERITY_MAJOR = \"Major\"\nconst SEVERITY_CRITICAL = \"Critical\"\n\nconst METRICSPORT = 8079\nconst METRICSPATH = \"/metrics\"\n\nvar (\n\t// Counter metrics\n\t// num of requests counter vec\n\trequestCounter = prometheus.NewCounterVec(\n\t\tprometheus.CounterOpts{\n\t\t\tName: \"request_counter\",\n\t\t\tHelp: \"Number of request_counter\",\n\t\t},\n\t\t[]string{COMPONENT, KIND, NAMESPACE, ACTION, SEVERITY},\n\t)\n\t// Counter metrics for failed requests\n\trequestFailureCounter = prometheus.NewCounterVec(prometheus.CounterOpts{\n\t\tName: \"request_failure_counter\",\n\t\tHelp: \"Number of request_failure_counter\",\n\t}, []string{COMPONENT, KIND, NAMESPACE, ACTION, SEVERITY})\n\n\t// Gauge metrics\n\trequestGauge = prometheus.NewGaugeVec(prometheus.GaugeOpts{\n\t\tName: \"requests_gauge\",\n\t\tHelp: \"Number of requests_gauge\",\n\t}, []string{COMPONENT, KIND, NAMESPACE, ACTION, SEVERITY})\n\n\t// Gauge metrics for failed requests\n\trequestFailureGauge = prometheus.NewGaugeVec(prometheus.GaugeOpts{\n\t\tName: \"requests_failure_gauge\",\n\t\tHelp: \"Number of requests_failure_gauge\",\n\t}, []string{COMPONENT, KIND, NAMESPACE, ACTION, SEVERITY})\n\n\t// Linear latencies\n\trequestLinearLatency = prometheus.NewHistogramVec(prometheus.HistogramOpts{\n\t\tName: \"request_linear_latency\",\n\t\tHelp: \"A histogram of request_linear_latency\",\n\t\tBuckets: prometheus.LinearBuckets(1, 1, 15),\n\t}, []string{COMPONENT, KIND, NAMESPACE, ACTION, SEVERITY})\n\n\t// Exponential latencies\n\trequestExponentialLatency = prometheus.NewHistogramVec(prometheus.HistogramOpts{\n\t\tName: \"request_exponential_latency\",\n\t\tHelp: \"A histogram of request_exponential_latency\",\n\t}, []string{COMPONENT, KIND, NAMESPACE, ACTION, SEVERITY})\n\n\tserviceHeartbeat = prometheus.NewCounter(prometheus.CounterOpts{\n\t\tName: \"service_heartbeat\",\n\t\tHelp: \"Heartbeat signal every 10 seconds indicating pods are alive.\",\n\t})\n)\n\nfunc init() {\n\t// Register prometheus counters\n\tprometheus.MustRegister(requestCounter)\n\tprometheus.MustRegister(requestFailureCounter)\n\tprometheus.MustRegister(requestGauge)\n\tprometheus.MustRegister(requestFailureGauge)\n\tprometheus.MustRegister(requestLinearLatency)\n\tprometheus.MustRegister(requestExponentialLatency)\n\tprometheus.MustRegister(serviceHeartbeat)\n}\n\ntype MetricsExporter struct {\n\tcomponent\tstring\n\t// Add error channel so that kf_monitor user can get notified when monitoring thread is down.\n\tErrChan chan error\n\tmetricsPort int\n}\n\nfunc NewMetricsExporter(component string) *MetricsExporter {\n\treturn &MetricsExporter{\n\t\tcomponent: component,\n\t\tErrChan: make(chan error),\n\t\tmetricsPort: METRICSPORT,\n\t}\n}\n\nfunc (me *MetricsExporter) SetPort(port int) *MetricsExporter {", + "repo_full_name": "kubeflow/kubeflow", + "discussion_comments": [ + { + "comment_id": "332287666", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 4251, + "pr_file": "components/kf-monitoring/MetricsExporter.go", + "discussion_id": "332287666", + "commented_code": "@@ -0,0 +1,161 @@\n+package monitoring_util\n+\n+import (\n+\t\"fmt\"\n+\t\"time\"\n+\n+\t\"github.com/prometheus/client_golang/prometheus\"\n+\t\"github.com/prometheus/client_golang/prometheus/promhttp\"\n+\t\"github.com/prometheus/common/log\"\n+\t\"net\"\n+\t\"net/http\"\n+)\n+\n+// Common label keys for all metrics signals\n+// COMPONENT: name of component outputing the metrics, eg. \"tf-operator\"\n+const COMPONENT = \"component\"\n+// KIND: each componenet can label their metrics with custom tag \"kind\". Suggest keeping \"kind\" value to be CONSTANT PER METRICS.\n+const KIND = \"kind\"\n+const NAMESPACE = \"namespace\"\n+// ACTION: request action type of the metrics, eg. \"CRUD\"\n+const ACTION = \"action\"\n+// SEVERITY: level of importance, used to filter alerts\n+const SEVERITY = \"severity\"\n+\n+const SEVERITY_MINOR = \"Minor\"\n+const SEVERITY_MAJOR = \"Major\"\n+const SEVERITY_CRITICAL = \"Critical\"\n+\n+const METRICSPORT = 8079\n+const METRICSPATH = \"/metrics\"\n+\n+var (\n+\t// Counter metrics\n+\t// num of requests counter vec\n+\trequestCounter = prometheus.NewCounterVec(\n+\t\tprometheus.CounterOpts{\n+\t\t\tName: \"request_counter\",\n+\t\t\tHelp: \"Number of request_counter\",\n+\t\t},\n+\t\t[]string{COMPONENT, KIND, NAMESPACE, ACTION, SEVERITY},\n+\t)\n+\t// Counter metrics for failed requests\n+\trequestFailureCounter = prometheus.NewCounterVec(prometheus.CounterOpts{\n+\t\tName: \"request_failure_counter\",\n+\t\tHelp: \"Number of request_failure_counter\",\n+\t}, []string{COMPONENT, KIND, NAMESPACE, ACTION, SEVERITY})\n+\n+\t// Gauge metrics\n+\trequestGauge = prometheus.NewGaugeVec(prometheus.GaugeOpts{\n+\t\tName: \"requests_gauge\",\n+\t\tHelp: \"Number of requests_gauge\",\n+\t}, []string{COMPONENT, KIND, NAMESPACE, ACTION, SEVERITY})\n+\n+\t// Gauge metrics for failed requests\n+\trequestFailureGauge = prometheus.NewGaugeVec(prometheus.GaugeOpts{\n+\t\tName: \"requests_failure_gauge\",\n+\t\tHelp: \"Number of requests_failure_gauge\",\n+\t}, []string{COMPONENT, KIND, NAMESPACE, ACTION, SEVERITY})\n+\n+\t// Linear latencies\n+\trequestLinearLatency = prometheus.NewHistogramVec(prometheus.HistogramOpts{\n+\t\tName: \"request_linear_latency\",\n+\t\tHelp: \"A histogram of request_linear_latency\",\n+\t\tBuckets: prometheus.LinearBuckets(1, 1, 15),\n+\t}, []string{COMPONENT, KIND, NAMESPACE, ACTION, SEVERITY})\n+\n+\t// Exponential latencies\n+\trequestExponentialLatency = prometheus.NewHistogramVec(prometheus.HistogramOpts{\n+\t\tName: \"request_exponential_latency\",\n+\t\tHelp: \"A histogram of request_exponential_latency\",\n+\t}, []string{COMPONENT, KIND, NAMESPACE, ACTION, SEVERITY})\n+\n+\tserviceHeartbeat = prometheus.NewCounter(prometheus.CounterOpts{\n+\t\tName: \"service_heartbeat\",\n+\t\tHelp: \"Heartbeat signal every 10 seconds indicating pods are alive.\",\n+\t})\n+)\n+\n+func init() {\n+\t// Register prometheus counters\n+\tprometheus.MustRegister(requestCounter)\n+\tprometheus.MustRegister(requestFailureCounter)\n+\tprometheus.MustRegister(requestGauge)\n+\tprometheus.MustRegister(requestFailureGauge)\n+\tprometheus.MustRegister(requestLinearLatency)\n+\tprometheus.MustRegister(requestExponentialLatency)\n+\tprometheus.MustRegister(serviceHeartbeat)\n+}\n+\n+type MetricsExporter struct {\n+\tcomponent\tstring\n+\t// Add error channel so that kf_monitor user can get notified when monitoring thread is down.\n+\tErrChan chan error\n+\tmetricsPort int\n+}\n+\n+func NewMetricsExporter(component string) *MetricsExporter {\n+\treturn &MetricsExporter{\n+\t\tcomponent: component,\n+\t\tErrChan: make(chan error),\n+\t\tmetricsPort: METRICSPORT,\n+\t}\n+}\n+\n+func (me *MetricsExporter) SetPort(port int) *MetricsExporter {", + "comment_created_at": "2019-10-08T00:01:00+00:00", + "comment_author": "zhenghuiwang", + "comment_body": "If port can be changed, why not simply export the field as `Port`?", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "320269639", + "pr_number": 4040, + "pr_file": "bootstrap/pkg/kfapp/apply/apply.go", + "created_at": "2019-09-03T13:25:22+00:00", + "commented_code": "package apply\n\nimport (\n\t\"errors\"\n\t\"fmt\"\n\t\"io\"\n\t\"os\"\n\n\tkftypes \"github.com/kubeflow/kubeflow/bootstrap/v3/pkg/apis/apps\"\n\tkfdefsv3 \"github.com/kubeflow/kubeflow/bootstrap/v3/pkg/apis/apps/kfdef/v1alpha1\"\n\t\"github.com/kubeflow/kubeflow/bootstrap/v3/pkg/kfapp/coordinator\"\n\tlog \"github.com/sirupsen/logrus\"\n)\n\n// BootstrapKubeflow is used by the kfctl apply sub-command to take in a configfile\n// as a flag and boostrap a KfApp and deploy it\nfunc BootstrapKubeflow(configFilePath string, kfResource kftypes.ResourceEnum) error {\n\t// Set default app name to kf-app\n\tappName := \"kf-app\"\n\n\t// Construct KfDef from the configFilePath provided\n\tkfDef := &kfdefsv3.KfDef{}\n\tkfDef, err := kfdefsv3.LoadKFDefFromURI(configFilePath)\n\tif err != nil {\n\t\tlog.Printf(\"Unable to create KfDef from config file: %v\", err)\n\t}\n\tif kfDef.Name != \"\" {\n\t\tlog.Warnf(\"Overriding KfDef.Spec.Name; old value %v; new value %v\", kfDef.Name, appName)\n\t}\n\tkfDef.Name = appName\n\tisValid, msg := kfDef.IsValid()\n\tif !isValid {\n\t\tlog.Printf(\"Invalid kfdef: %v\", isValid)\n\t\tlog.Printf(\"Error validating generated KfDef, please check config file validity: %v\", msg)\n\t}\n\tcwd, err := os.Getwd()\n\tif err != nil {\n\t\tlog.Printf(\"Error getting current working directory: %v\", err)\n\t}\n\tfmt.Println(\"Present working directory is: %v\", cwd)\n\t// Check if current directory is empty and if it is error out\n\tisEmpty, err := IsCwdEmpty(cwd)\n\tif !isEmpty && err != nil {\n\t\treturn fmt.Errorf(\"Current directory is not empty, please try again in an empty directory: %v\", err)\n\t}\n\tkfDef.Spec.AppDir = cwd\n\tif kfDef.Spec.AppDir == \"\" {\n\t\treturn errors.New(\"kfDef App Dir not set\")\n\t}\n\tlog.Warnf(\"App directory name: %v\", kfDef.Spec.AppDir)\n\tcfgFilePath, err := coordinator.CreateKfAppCfgFile(kfDef)\n\tif err != nil {\n\t\tlog.Errorf(\"Error creating app.yaml from KfDef: %v\", err)\n\t\treturn err\n\t}\n\n\tlog.Printf(\"Syncing Cache\")\n\terr = kfDef.SyncCache()\n\tif err != nil {\n\t\tlog.Errorf(\"Failed to synchronize the cache; error: %v\", err)\n\t\treturn err\n\t}\n\t// Save app.yaml because we need to preserve information about the cache.\n\tif err := kfDef.WriteToFile(cfgFilePath); err != nil {\n\t\tlog.Errorf(\"Failed to save KfDef to %v; error %v\", cfgFilePath, err)\n\t\treturn err\n\t}\n\tlog.Infof(\"Saved configfile as kfdef in path: %v\", cfgFilePath)\n\n\t// Load KfApp for Generate and Apply\n\tKfApp, KfErr := coordinator.LoadKfAppCfgFile(cfgFilePath)\n\tif KfErr != nil {\n\t\tlog.Printf(\"Error loading KfApp from configfilepath: %v\", KfErr)\n\t}\n\t// Once init is done, we generate and apply subsequently\n\tlog.Println(\"Kubeflow Generate...\")\n\tgenerateErr := KfApp.Generate(kfResource)\n\tif generateErr != nil {\n\t\tlog.Println(\"Unable to generate resources for KfApp\", generateErr)\n\t\treturn generateErr\n\t}\n\tlog.Println(\"Kubeflow Apply...\")\n\tapplyErr := KfApp.Apply(kfResource)\n\tif applyErr != nil {\n\t\tlog.Println(\"Unable to apply resources for KfApp\", applyErr)\n\t}\n\treturn nil\n}\n\n// IsCwdEmpty - quick check to determine if the working directory is empty\nfunc IsCwdEmpty(name string) (bool, error) {", + "repo_full_name": "kubeflow/kubeflow", + "discussion_comments": [ + { + "comment_id": "320269639", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 4040, + "pr_file": "bootstrap/pkg/kfapp/apply/apply.go", + "discussion_id": "320269639", + "commented_code": "@@ -0,0 +1,103 @@\n+package apply\n+\n+import (\n+\t\"errors\"\n+\t\"fmt\"\n+\t\"io\"\n+\t\"os\"\n+\n+\tkftypes \"github.com/kubeflow/kubeflow/bootstrap/v3/pkg/apis/apps\"\n+\tkfdefsv3 \"github.com/kubeflow/kubeflow/bootstrap/v3/pkg/apis/apps/kfdef/v1alpha1\"\n+\t\"github.com/kubeflow/kubeflow/bootstrap/v3/pkg/kfapp/coordinator\"\n+\tlog \"github.com/sirupsen/logrus\"\n+)\n+\n+// BootstrapKubeflow is used by the kfctl apply sub-command to take in a configfile\n+// as a flag and boostrap a KfApp and deploy it\n+func BootstrapKubeflow(configFilePath string, kfResource kftypes.ResourceEnum) error {\n+\t// Set default app name to kf-app\n+\tappName := \"kf-app\"\n+\n+\t// Construct KfDef from the configFilePath provided\n+\tkfDef := &kfdefsv3.KfDef{}\n+\tkfDef, err := kfdefsv3.LoadKFDefFromURI(configFilePath)\n+\tif err != nil {\n+\t\tlog.Printf(\"Unable to create KfDef from config file: %v\", err)\n+\t}\n+\tif kfDef.Name != \"\" {\n+\t\tlog.Warnf(\"Overriding KfDef.Spec.Name; old value %v; new value %v\", kfDef.Name, appName)\n+\t}\n+\tkfDef.Name = appName\n+\tisValid, msg := kfDef.IsValid()\n+\tif !isValid {\n+\t\tlog.Printf(\"Invalid kfdef: %v\", isValid)\n+\t\tlog.Printf(\"Error validating generated KfDef, please check config file validity: %v\", msg)\n+\t}\n+\tcwd, err := os.Getwd()\n+\tif err != nil {\n+\t\tlog.Printf(\"Error getting current working directory: %v\", err)\n+\t}\n+\tfmt.Println(\"Present working directory is: %v\", cwd)\n+\t// Check if current directory is empty and if it is error out\n+\tisEmpty, err := IsCwdEmpty(cwd)\n+\tif !isEmpty && err != nil {\n+\t\treturn fmt.Errorf(\"Current directory is not empty, please try again in an empty directory: %v\", err)\n+\t}\n+\tkfDef.Spec.AppDir = cwd\n+\tif kfDef.Spec.AppDir == \"\" {\n+\t\treturn errors.New(\"kfDef App Dir not set\")\n+\t}\n+\tlog.Warnf(\"App directory name: %v\", kfDef.Spec.AppDir)\n+\tcfgFilePath, err := coordinator.CreateKfAppCfgFile(kfDef)\n+\tif err != nil {\n+\t\tlog.Errorf(\"Error creating app.yaml from KfDef: %v\", err)\n+\t\treturn err\n+\t}\n+\n+\tlog.Printf(\"Syncing Cache\")\n+\terr = kfDef.SyncCache()\n+\tif err != nil {\n+\t\tlog.Errorf(\"Failed to synchronize the cache; error: %v\", err)\n+\t\treturn err\n+\t}\n+\t// Save app.yaml because we need to preserve information about the cache.\n+\tif err := kfDef.WriteToFile(cfgFilePath); err != nil {\n+\t\tlog.Errorf(\"Failed to save KfDef to %v; error %v\", cfgFilePath, err)\n+\t\treturn err\n+\t}\n+\tlog.Infof(\"Saved configfile as kfdef in path: %v\", cfgFilePath)\n+\n+\t// Load KfApp for Generate and Apply\n+\tKfApp, KfErr := coordinator.LoadKfAppCfgFile(cfgFilePath)\n+\tif KfErr != nil {\n+\t\tlog.Printf(\"Error loading KfApp from configfilepath: %v\", KfErr)\n+\t}\n+\t// Once init is done, we generate and apply subsequently\n+\tlog.Println(\"Kubeflow Generate...\")\n+\tgenerateErr := KfApp.Generate(kfResource)\n+\tif generateErr != nil {\n+\t\tlog.Println(\"Unable to generate resources for KfApp\", generateErr)\n+\t\treturn generateErr\n+\t}\n+\tlog.Println(\"Kubeflow Apply...\")\n+\tapplyErr := KfApp.Apply(kfResource)\n+\tif applyErr != nil {\n+\t\tlog.Println(\"Unable to apply resources for KfApp\", applyErr)\n+\t}\n+\treturn nil\n+}\n+\n+// IsCwdEmpty - quick check to determine if the working directory is empty\n+func IsCwdEmpty(name string) (bool, error) {", + "comment_created_at": "2019-09-03T13:25:22+00:00", + "comment_author": "yanniszark", + "comment_body": "Can you make this a non-exported function? (non-capital first letter: isCwdEmpty)", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "618194134", + "pr_number": 5761, + "pr_file": "components/profile-controller/controllers/profile_controller.go", + "created_at": "2021-04-22T08:31:24+00:00", + "commented_code": "return\n}\n\nfunc updateNamespaceLabels(ns *corev1.Namespace) bool {\n\tupdated := false\nfunc setNamespaceLabelsFromConfig(ns *corev1.Namespace) {", + "repo_full_name": "kubeflow/kubeflow", + "discussion_comments": [ + { + "comment_id": "618194134", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 5761, + "pr_file": "components/profile-controller/controllers/profile_controller.go", + "discussion_id": "618194134", + "commented_code": "@@ -617,16 +621,23 @@ func removeString(slice []string, s string) (result []string) {\n \treturn\n }\n \n-func updateNamespaceLabels(ns *corev1.Namespace) bool {\n-\tupdated := false\n+func setNamespaceLabelsFromConfig(ns *corev1.Namespace) {", + "comment_created_at": "2021-04-22T08:31:24+00:00", + "comment_author": "Bobgy", + "comment_body": "nit: maybe we should rename now? because we no longer read config here", + "pr_file_module": null + }, + { + "comment_id": "618735512", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 5761, + "pr_file": "components/profile-controller/controllers/profile_controller.go", + "discussion_id": "618194134", + "commented_code": "@@ -617,16 +621,23 @@ func removeString(slice []string, s string) (result []string) {\n \treturn\n }\n \n-func updateNamespaceLabels(ns *corev1.Namespace) bool {\n-\tupdated := false\n+func setNamespaceLabelsFromConfig(ns *corev1.Namespace) {", + "comment_created_at": "2021-04-22T21:00:44+00:00", + "comment_author": "zijianjoy", + "comment_body": "Sounds good, I have renamed this function.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "426264174", + "pr_number": 4979, + "pr_file": "components/access-management/kfam/bindings_test.go", + "created_at": "2020-05-17T13:52:27+00:00", + "commented_code": "package kfam\n\nimport (\n\t\"testing\"\n\n\trbacv1 \"k8s.io/api/rbac/v1\"\n)\n\n// Building Binding Object from k8s.io/api/rbac/v1\nfunc getBindingObject(binding string) *Binding {\n\n\treturn &Binding{\n\t\tUser: &rbacv1.Subject{\n\t\t\tKind: \"user\",\n\t\t\tName: binding,\n\t\t},\n\t\tRoleRef: &rbacv1.RoleRef{\n\t\t\tKind: \"clusterrole\",\n\t\t\tName: \"edit\",\n\t\t},\n\t}\n\n}\n\n//Table driven tests\nvar tests = []struct {\n\tin *Binding\n\tout string\n\thasError bool\n}{\n\t{getBindingObject(\"lalith.vaka@zq.msds.kp.org\"), \"user-lalith-vaka-zq-msds-kp-org-clusterrole-edit\", false},\n\t{getBindingObject(\"397401@zq.msds.kp.org\"), \"user-397401-zq-msds-kp-org-clusterrole-edit\", false},\n\t{getBindingObject(\"lalith.397401@zq.msds.kp.org\"), \"user-lalith-397401-zq-msds-kp-org-clusterrole-edit\", false},\n\t{getBindingObject(\"397401.vaka@zq.msds.kp.org\"), \"user-397401-vaka-zq-msds-kp-org-clusterrole-edit\", false},\n\t{getBindingObject(\"i397401@zq.msds.kp.org\"), \"user-i397401-zq-msds-kp-org-clusterrole-edit\", false},\n}\n\nfunc TestGetBindingName(t *testing.T) {\n\n\t/* Read the test data from a file if needed\n\n\t// create a testdata folder under this package and add files as needed\n\tfile, err := os.Open(\"./testdata/file.go\")\n\tif err != nil {\n\t\tlog.Fatal(err)\n\t}\n\n\t*/\n\t//format := \"--- %s: %s (%s)\\n\"\n\tfor _, tt := range tests {\n\n\t\ts, error := getBindingName(tt.in)", + "repo_full_name": "kubeflow/kubeflow", + "discussion_comments": [ + { + "comment_id": "426264174", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 4979, + "pr_file": "components/access-management/kfam/bindings_test.go", + "discussion_id": "426264174", + "commented_code": "@@ -0,0 +1,72 @@\n+package kfam\n+\n+import (\n+\t\"testing\"\n+\n+\trbacv1 \"k8s.io/api/rbac/v1\"\n+)\n+\n+// Building Binding Object from k8s.io/api/rbac/v1\n+func getBindingObject(binding string) *Binding {\n+\n+\treturn &Binding{\n+\t\tUser: &rbacv1.Subject{\n+\t\t\tKind: \"user\",\n+\t\t\tName: binding,\n+\t\t},\n+\t\tRoleRef: &rbacv1.RoleRef{\n+\t\t\tKind: \"clusterrole\",\n+\t\t\tName: \"edit\",\n+\t\t},\n+\t}\n+\n+}\n+\n+//Table driven tests\n+var tests = []struct {\n+\tin *Binding\n+\tout string\n+\thasError bool\n+}{\n+\t{getBindingObject(\"lalith.vaka@zq.msds.kp.org\"), \"user-lalith-vaka-zq-msds-kp-org-clusterrole-edit\", false},\n+\t{getBindingObject(\"397401@zq.msds.kp.org\"), \"user-397401-zq-msds-kp-org-clusterrole-edit\", false},\n+\t{getBindingObject(\"lalith.397401@zq.msds.kp.org\"), \"user-lalith-397401-zq-msds-kp-org-clusterrole-edit\", false},\n+\t{getBindingObject(\"397401.vaka@zq.msds.kp.org\"), \"user-397401-vaka-zq-msds-kp-org-clusterrole-edit\", false},\n+\t{getBindingObject(\"i397401@zq.msds.kp.org\"), \"user-i397401-zq-msds-kp-org-clusterrole-edit\", false},\n+}\n+\n+func TestGetBindingName(t *testing.T) {\n+\n+\t/* Read the test data from a file if needed\n+\n+\t// create a testdata folder under this package and add files as needed\n+\tfile, err := os.Open(\"./testdata/file.go\")\n+\tif err != nil {\n+\t\tlog.Fatal(err)\n+\t}\n+\n+\t*/\n+\t//format := \"--- %s: %s (%s)\\n\"\n+\tfor _, tt := range tests {\n+\n+\t\ts, error := getBindingName(tt.in)", + "comment_created_at": "2020-05-17T13:52:27+00:00", + "comment_author": "yanniszark", + "comment_body": "Since `error` is an already declared interface type, let's use a different name for this variable.\r\nA common name for error variables in Go is `err`.", + "pr_file_module": null + }, + { + "comment_id": "426272904", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 4979, + "pr_file": "components/access-management/kfam/bindings_test.go", + "discussion_id": "426264174", + "commented_code": "@@ -0,0 +1,72 @@\n+package kfam\n+\n+import (\n+\t\"testing\"\n+\n+\trbacv1 \"k8s.io/api/rbac/v1\"\n+)\n+\n+// Building Binding Object from k8s.io/api/rbac/v1\n+func getBindingObject(binding string) *Binding {\n+\n+\treturn &Binding{\n+\t\tUser: &rbacv1.Subject{\n+\t\t\tKind: \"user\",\n+\t\t\tName: binding,\n+\t\t},\n+\t\tRoleRef: &rbacv1.RoleRef{\n+\t\t\tKind: \"clusterrole\",\n+\t\t\tName: \"edit\",\n+\t\t},\n+\t}\n+\n+}\n+\n+//Table driven tests\n+var tests = []struct {\n+\tin *Binding\n+\tout string\n+\thasError bool\n+}{\n+\t{getBindingObject(\"lalith.vaka@zq.msds.kp.org\"), \"user-lalith-vaka-zq-msds-kp-org-clusterrole-edit\", false},\n+\t{getBindingObject(\"397401@zq.msds.kp.org\"), \"user-397401-zq-msds-kp-org-clusterrole-edit\", false},\n+\t{getBindingObject(\"lalith.397401@zq.msds.kp.org\"), \"user-lalith-397401-zq-msds-kp-org-clusterrole-edit\", false},\n+\t{getBindingObject(\"397401.vaka@zq.msds.kp.org\"), \"user-397401-vaka-zq-msds-kp-org-clusterrole-edit\", false},\n+\t{getBindingObject(\"i397401@zq.msds.kp.org\"), \"user-i397401-zq-msds-kp-org-clusterrole-edit\", false},\n+}\n+\n+func TestGetBindingName(t *testing.T) {\n+\n+\t/* Read the test data from a file if needed\n+\n+\t// create a testdata folder under this package and add files as needed\n+\tfile, err := os.Open(\"./testdata/file.go\")\n+\tif err != nil {\n+\t\tlog.Fatal(err)\n+\t}\n+\n+\t*/\n+\t//format := \"--- %s: %s (%s)\\n\"\n+\tfor _, tt := range tests {\n+\n+\t\ts, error := getBindingName(tt.in)", + "comment_created_at": "2020-05-17T15:18:42+00:00", + "comment_author": "lalithvaka", + "comment_body": "Changed the error name", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "320268163", + "pr_number": 4040, + "pr_file": "bootstrap/pkg/kfapp/apply/apply.go", + "created_at": "2019-09-03T13:22:23+00:00", + "commented_code": "package apply\n\nimport (\n\t\"errors\"\n\t\"fmt\"\n\t\"io\"\n\t\"os\"\n\n\tkftypes \"github.com/kubeflow/kubeflow/bootstrap/v3/pkg/apis/apps\"\n\tkfdefsv3 \"github.com/kubeflow/kubeflow/bootstrap/v3/pkg/apis/apps/kfdef/v1alpha1\"\n\t\"github.com/kubeflow/kubeflow/bootstrap/v3/pkg/kfapp/coordinator\"\n\tlog \"github.com/sirupsen/logrus\"\n)\n\n// BootstrapKubeflow is used by the kfctl apply sub-command to take in a configfile\n// as a flag and boostrap a KfApp and deploy it\nfunc BootstrapKubeflow(configFilePath string, kfResource kftypes.ResourceEnum) error {\n\t// Set default app name to kf-app\n\tappName := \"kf-app\"\n\n\t// Construct KfDef from the configFilePath provided\n\tkfDef := &kfdefsv3.KfDef{}\n\tkfDef, err := kfdefsv3.LoadKFDefFromURI(configFilePath)\n\tif err != nil {\n\t\tlog.Printf(\"Unable to create KfDef from config file: %v\", err)\n\t}\n\tif kfDef.Name != \"\" {\n\t\tlog.Warnf(\"Overriding KfDef.Spec.Name; old value %v; new value %v\", kfDef.Name, appName)\n\t}\n\tkfDef.Name = appName\n\tisValid, msg := kfDef.IsValid()\n\tif !isValid {\n\t\tlog.Printf(\"Invalid kfdef: %v\", isValid)\n\t\tlog.Printf(\"Error validating generated KfDef, please check config file validity: %v\", msg)\n\t}\n\tcwd, err := os.Getwd()\n\tif err != nil {\n\t\tlog.Printf(\"Error getting current working directory: %v\", err)\n\t}\n\tfmt.Println(\"Present working directory is: %v\", cwd)\n\t// Check if current directory is empty and if it is error out\n\tisEmpty, err := IsCwdEmpty(cwd)\n\tif !isEmpty && err != nil {\n\t\treturn fmt.Errorf(\"Current directory is not empty, please try again in an empty directory: %v\", err)\n\t}\n\tkfDef.Spec.AppDir = cwd\n\tif kfDef.Spec.AppDir == \"\" {\n\t\treturn errors.New(\"kfDef App Dir not set\")\n\t}\n\tlog.Warnf(\"App directory name: %v\", kfDef.Spec.AppDir)\n\tcfgFilePath, err := coordinator.CreateKfAppCfgFile(kfDef)\n\tif err != nil {\n\t\tlog.Errorf(\"Error creating app.yaml from KfDef: %v\", err)\n\t\treturn err\n\t}\n\n\tlog.Printf(\"Syncing Cache\")\n\terr = kfDef.SyncCache()\n\tif err != nil {\n\t\tlog.Errorf(\"Failed to synchronize the cache; error: %v\", err)\n\t\treturn err\n\t}\n\t// Save app.yaml because we need to preserve information about the cache.\n\tif err := kfDef.WriteToFile(cfgFilePath); err != nil {\n\t\tlog.Errorf(\"Failed to save KfDef to %v; error %v\", cfgFilePath, err)\n\t\treturn err\n\t}\n\tlog.Infof(\"Saved configfile as kfdef in path: %v\", cfgFilePath)\n\n\t// Load KfApp for Generate and Apply\n\tKfApp, KfErr := coordinator.LoadKfAppCfgFile(cfgFilePath)\n\tif KfErr != nil {", + "repo_full_name": "kubeflow/kubeflow", + "discussion_comments": [ + { + "comment_id": "320268163", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 4040, + "pr_file": "bootstrap/pkg/kfapp/apply/apply.go", + "discussion_id": "320268163", + "commented_code": "@@ -0,0 +1,103 @@\n+package apply\n+\n+import (\n+\t\"errors\"\n+\t\"fmt\"\n+\t\"io\"\n+\t\"os\"\n+\n+\tkftypes \"github.com/kubeflow/kubeflow/bootstrap/v3/pkg/apis/apps\"\n+\tkfdefsv3 \"github.com/kubeflow/kubeflow/bootstrap/v3/pkg/apis/apps/kfdef/v1alpha1\"\n+\t\"github.com/kubeflow/kubeflow/bootstrap/v3/pkg/kfapp/coordinator\"\n+\tlog \"github.com/sirupsen/logrus\"\n+)\n+\n+// BootstrapKubeflow is used by the kfctl apply sub-command to take in a configfile\n+// as a flag and boostrap a KfApp and deploy it\n+func BootstrapKubeflow(configFilePath string, kfResource kftypes.ResourceEnum) error {\n+\t// Set default app name to kf-app\n+\tappName := \"kf-app\"\n+\n+\t// Construct KfDef from the configFilePath provided\n+\tkfDef := &kfdefsv3.KfDef{}\n+\tkfDef, err := kfdefsv3.LoadKFDefFromURI(configFilePath)\n+\tif err != nil {\n+\t\tlog.Printf(\"Unable to create KfDef from config file: %v\", err)\n+\t}\n+\tif kfDef.Name != \"\" {\n+\t\tlog.Warnf(\"Overriding KfDef.Spec.Name; old value %v; new value %v\", kfDef.Name, appName)\n+\t}\n+\tkfDef.Name = appName\n+\tisValid, msg := kfDef.IsValid()\n+\tif !isValid {\n+\t\tlog.Printf(\"Invalid kfdef: %v\", isValid)\n+\t\tlog.Printf(\"Error validating generated KfDef, please check config file validity: %v\", msg)\n+\t}\n+\tcwd, err := os.Getwd()\n+\tif err != nil {\n+\t\tlog.Printf(\"Error getting current working directory: %v\", err)\n+\t}\n+\tfmt.Println(\"Present working directory is: %v\", cwd)\n+\t// Check if current directory is empty and if it is error out\n+\tisEmpty, err := IsCwdEmpty(cwd)\n+\tif !isEmpty && err != nil {\n+\t\treturn fmt.Errorf(\"Current directory is not empty, please try again in an empty directory: %v\", err)\n+\t}\n+\tkfDef.Spec.AppDir = cwd\n+\tif kfDef.Spec.AppDir == \"\" {\n+\t\treturn errors.New(\"kfDef App Dir not set\")\n+\t}\n+\tlog.Warnf(\"App directory name: %v\", kfDef.Spec.AppDir)\n+\tcfgFilePath, err := coordinator.CreateKfAppCfgFile(kfDef)\n+\tif err != nil {\n+\t\tlog.Errorf(\"Error creating app.yaml from KfDef: %v\", err)\n+\t\treturn err\n+\t}\n+\n+\tlog.Printf(\"Syncing Cache\")\n+\terr = kfDef.SyncCache()\n+\tif err != nil {\n+\t\tlog.Errorf(\"Failed to synchronize the cache; error: %v\", err)\n+\t\treturn err\n+\t}\n+\t// Save app.yaml because we need to preserve information about the cache.\n+\tif err := kfDef.WriteToFile(cfgFilePath); err != nil {\n+\t\tlog.Errorf(\"Failed to save KfDef to %v; error %v\", cfgFilePath, err)\n+\t\treturn err\n+\t}\n+\tlog.Infof(\"Saved configfile as kfdef in path: %v\", cfgFilePath)\n+\n+\t// Load KfApp for Generate and Apply\n+\tKfApp, KfErr := coordinator.LoadKfAppCfgFile(cfgFilePath)\n+\tif KfErr != nil {", + "comment_created_at": "2019-09-03T13:22:23+00:00", + "comment_author": "yanniszark", + "comment_body": "Can you change all of these to just err? (kfErr, generateErr, applyErr, ...)", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/kubeflow-go-export-naming-conventions.md b/_reviewers/kubeflow-go-export-naming-conventions.md index 60da1f1..663485c 100644 --- a/_reviewers/kubeflow-go-export-naming-conventions.md +++ b/_reviewers/kubeflow-go-export-naming-conventions.md @@ -54,221 +54,3 @@ type MetricsExporter struct { - Use consistent error variable names, typically `err` rather than custom names like `kfErr`, `generateErr`, etc. This convention is a critical part of Go's visibility system and module design - it's not just a style preference but affects how your code can be used by others. - - -[ - { - "discussion_id": "1373286709", - "pr_number": 7365, - "pr_file": "components/notebook-controller/controllers/culling_controller.go", - "created_at": "2023-10-26T14:36:41+00:00", - "commented_code": "return false\n}\n\nfunc DisableCullingAnnotationIsSet(meta metav1.ObjectMeta) bool {", - "repo_full_name": "kubeflow/kubeflow", - "discussion_comments": [ - { - "comment_id": "1373286709", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 7365, - "pr_file": "components/notebook-controller/controllers/culling_controller.go", - "discussion_id": "1373286709", - "commented_code": "@@ -381,6 +388,16 @@ func StopAnnotationIsSet(meta metav1.ObjectMeta) bool {\n \treturn false\n }\n \n+func DisableCullingAnnotationIsSet(meta metav1.ObjectMeta) bool {", - "comment_created_at": "2023-10-26T14:36:41+00:00", - "comment_author": "kimwnasptd", - "comment_body": "small nit, shouldn't this function start from lowercase since it's not meant to be used from outside this go module?", - "pr_file_module": null - }, - { - "comment_id": "1373310284", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 7365, - "pr_file": "components/notebook-controller/controllers/culling_controller.go", - "discussion_id": "1373286709", - "commented_code": "@@ -381,6 +388,16 @@ func StopAnnotationIsSet(meta metav1.ObjectMeta) bool {\n \treturn false\n }\n \n+func DisableCullingAnnotationIsSet(meta metav1.ObjectMeta) bool {", - "comment_created_at": "2023-10-26T14:51:59+00:00", - "comment_author": "omrishiv", - "comment_body": "I was following the style of the current code using `StopAnnotationIsSet` above. Agreed it should be lowercase", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "592236690", - "pr_number": 5660, - "pr_file": "components/notebook-controller/controllers/notebook_controller.go", - "created_at": "2021-03-11T10:21:18+00:00", - "commented_code": "return fmt.Sprintf(\"notebook-%s-%s\", namespace, kfName)\n}\n\n// Function for the VirtualService header blacklist\nfunc Find(slice []string, val string) (int, bool) {", - "repo_full_name": "kubeflow/kubeflow", - "discussion_comments": [ - { - "comment_id": "592236690", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 5660, - "pr_file": "components/notebook-controller/controllers/notebook_controller.go", - "discussion_id": "592236690", - "commented_code": "@@ -395,12 +395,35 @@ func virtualServiceName(kfName string, namespace string) string {\n \treturn fmt.Sprintf(\"notebook-%s-%s\", namespace, kfName)\n }\n \n+// Function for the VirtualService header blacklist\n+func Find(slice []string, val string) (int, bool) {", - "comment_created_at": "2021-03-11T10:21:18+00:00", - "comment_author": "yanniszark", - "comment_body": "Two comments here:\r\n1. nit: Let's use lowercase function names for functions that we do not export (uppercase functions are exported and become part of the package api in Go)\r\n2. The function is called `Find`, but you only use its second argument. I think what you want is a function that checks for existence, cause that's what you check. So how about renaming this to `exists` and returning only a bool? ", - "pr_file_module": null - }, - { - "comment_id": "592269369", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 5660, - "pr_file": "components/notebook-controller/controllers/notebook_controller.go", - "discussion_id": "592236690", - "commented_code": "@@ -395,12 +395,35 @@ func virtualServiceName(kfName string, namespace string) string {\n \treturn fmt.Sprintf(\"notebook-%s-%s\", namespace, kfName)\n }\n \n+// Function for the VirtualService header blacklist\n+func Find(slice []string, val string) (int, bool) {", - "comment_created_at": "2021-03-11T11:08:19+00:00", - "comment_author": "davidspek", - "comment_body": "How do you want it to return only the bool?", - "pr_file_module": null - }, - { - "comment_id": "592283008", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 5660, - "pr_file": "components/notebook-controller/controllers/notebook_controller.go", - "discussion_id": "592236690", - "commented_code": "@@ -395,12 +395,35 @@ func virtualServiceName(kfName string, namespace string) string {\n \treturn fmt.Sprintf(\"notebook-%s-%s\", namespace, kfName)\n }\n \n+// Function for the VirtualService header blacklist\n+func Find(slice []string, val string) (int, bool) {", - "comment_created_at": "2021-03-11T11:28:56+00:00", - "comment_author": "yanniszark", - "comment_body": "This is what I mean:\r\n```go\r\nfunc exists(slice []string, val string) bool {\r\n\tfor i, item := range slice {\r\n\t\tif item == val {\r\n\t\t\treturn true\r\n\t\t}\r\n\t}\r\n\treturn false\r\n}\r\n```", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "332287666", - "pr_number": 4251, - "pr_file": "components/kf-monitoring/MetricsExporter.go", - "created_at": "2019-10-08T00:01:00+00:00", - "commented_code": "package monitoring_util\n\nimport (\n\t\"fmt\"\n\t\"time\"\n\n\t\"github.com/prometheus/client_golang/prometheus\"\n\t\"github.com/prometheus/client_golang/prometheus/promhttp\"\n\t\"github.com/prometheus/common/log\"\n\t\"net\"\n\t\"net/http\"\n)\n\n// Common label keys for all metrics signals\n// COMPONENT: name of component outputing the metrics, eg. \"tf-operator\"\nconst COMPONENT = \"component\"\n// KIND: each componenet can label their metrics with custom tag \"kind\". Suggest keeping \"kind\" value to be CONSTANT PER METRICS.\nconst KIND = \"kind\"\nconst NAMESPACE = \"namespace\"\n// ACTION: request action type of the metrics, eg. \"CRUD\"\nconst ACTION = \"action\"\n// SEVERITY: level of importance, used to filter alerts\nconst SEVERITY = \"severity\"\n\nconst SEVERITY_MINOR = \"Minor\"\nconst SEVERITY_MAJOR = \"Major\"\nconst SEVERITY_CRITICAL = \"Critical\"\n\nconst METRICSPORT = 8079\nconst METRICSPATH = \"/metrics\"\n\nvar (\n\t// Counter metrics\n\t// num of requests counter vec\n\trequestCounter = prometheus.NewCounterVec(\n\t\tprometheus.CounterOpts{\n\t\t\tName: \"request_counter\",\n\t\t\tHelp: \"Number of request_counter\",\n\t\t},\n\t\t[]string{COMPONENT, KIND, NAMESPACE, ACTION, SEVERITY},\n\t)\n\t// Counter metrics for failed requests\n\trequestFailureCounter = prometheus.NewCounterVec(prometheus.CounterOpts{\n\t\tName: \"request_failure_counter\",\n\t\tHelp: \"Number of request_failure_counter\",\n\t}, []string{COMPONENT, KIND, NAMESPACE, ACTION, SEVERITY})\n\n\t// Gauge metrics\n\trequestGauge = prometheus.NewGaugeVec(prometheus.GaugeOpts{\n\t\tName: \"requests_gauge\",\n\t\tHelp: \"Number of requests_gauge\",\n\t}, []string{COMPONENT, KIND, NAMESPACE, ACTION, SEVERITY})\n\n\t// Gauge metrics for failed requests\n\trequestFailureGauge = prometheus.NewGaugeVec(prometheus.GaugeOpts{\n\t\tName: \"requests_failure_gauge\",\n\t\tHelp: \"Number of requests_failure_gauge\",\n\t}, []string{COMPONENT, KIND, NAMESPACE, ACTION, SEVERITY})\n\n\t// Linear latencies\n\trequestLinearLatency = prometheus.NewHistogramVec(prometheus.HistogramOpts{\n\t\tName: \"request_linear_latency\",\n\t\tHelp: \"A histogram of request_linear_latency\",\n\t\tBuckets: prometheus.LinearBuckets(1, 1, 15),\n\t}, []string{COMPONENT, KIND, NAMESPACE, ACTION, SEVERITY})\n\n\t// Exponential latencies\n\trequestExponentialLatency = prometheus.NewHistogramVec(prometheus.HistogramOpts{\n\t\tName: \"request_exponential_latency\",\n\t\tHelp: \"A histogram of request_exponential_latency\",\n\t}, []string{COMPONENT, KIND, NAMESPACE, ACTION, SEVERITY})\n\n\tserviceHeartbeat = prometheus.NewCounter(prometheus.CounterOpts{\n\t\tName: \"service_heartbeat\",\n\t\tHelp: \"Heartbeat signal every 10 seconds indicating pods are alive.\",\n\t})\n)\n\nfunc init() {\n\t// Register prometheus counters\n\tprometheus.MustRegister(requestCounter)\n\tprometheus.MustRegister(requestFailureCounter)\n\tprometheus.MustRegister(requestGauge)\n\tprometheus.MustRegister(requestFailureGauge)\n\tprometheus.MustRegister(requestLinearLatency)\n\tprometheus.MustRegister(requestExponentialLatency)\n\tprometheus.MustRegister(serviceHeartbeat)\n}\n\ntype MetricsExporter struct {\n\tcomponent\tstring\n\t// Add error channel so that kf_monitor user can get notified when monitoring thread is down.\n\tErrChan chan error\n\tmetricsPort int\n}\n\nfunc NewMetricsExporter(component string) *MetricsExporter {\n\treturn &MetricsExporter{\n\t\tcomponent: component,\n\t\tErrChan: make(chan error),\n\t\tmetricsPort: METRICSPORT,\n\t}\n}\n\nfunc (me *MetricsExporter) SetPort(port int) *MetricsExporter {", - "repo_full_name": "kubeflow/kubeflow", - "discussion_comments": [ - { - "comment_id": "332287666", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 4251, - "pr_file": "components/kf-monitoring/MetricsExporter.go", - "discussion_id": "332287666", - "commented_code": "@@ -0,0 +1,161 @@\n+package monitoring_util\n+\n+import (\n+\t\"fmt\"\n+\t\"time\"\n+\n+\t\"github.com/prometheus/client_golang/prometheus\"\n+\t\"github.com/prometheus/client_golang/prometheus/promhttp\"\n+\t\"github.com/prometheus/common/log\"\n+\t\"net\"\n+\t\"net/http\"\n+)\n+\n+// Common label keys for all metrics signals\n+// COMPONENT: name of component outputing the metrics, eg. \"tf-operator\"\n+const COMPONENT = \"component\"\n+// KIND: each componenet can label their metrics with custom tag \"kind\". Suggest keeping \"kind\" value to be CONSTANT PER METRICS.\n+const KIND = \"kind\"\n+const NAMESPACE = \"namespace\"\n+// ACTION: request action type of the metrics, eg. \"CRUD\"\n+const ACTION = \"action\"\n+// SEVERITY: level of importance, used to filter alerts\n+const SEVERITY = \"severity\"\n+\n+const SEVERITY_MINOR = \"Minor\"\n+const SEVERITY_MAJOR = \"Major\"\n+const SEVERITY_CRITICAL = \"Critical\"\n+\n+const METRICSPORT = 8079\n+const METRICSPATH = \"/metrics\"\n+\n+var (\n+\t// Counter metrics\n+\t// num of requests counter vec\n+\trequestCounter = prometheus.NewCounterVec(\n+\t\tprometheus.CounterOpts{\n+\t\t\tName: \"request_counter\",\n+\t\t\tHelp: \"Number of request_counter\",\n+\t\t},\n+\t\t[]string{COMPONENT, KIND, NAMESPACE, ACTION, SEVERITY},\n+\t)\n+\t// Counter metrics for failed requests\n+\trequestFailureCounter = prometheus.NewCounterVec(prometheus.CounterOpts{\n+\t\tName: \"request_failure_counter\",\n+\t\tHelp: \"Number of request_failure_counter\",\n+\t}, []string{COMPONENT, KIND, NAMESPACE, ACTION, SEVERITY})\n+\n+\t// Gauge metrics\n+\trequestGauge = prometheus.NewGaugeVec(prometheus.GaugeOpts{\n+\t\tName: \"requests_gauge\",\n+\t\tHelp: \"Number of requests_gauge\",\n+\t}, []string{COMPONENT, KIND, NAMESPACE, ACTION, SEVERITY})\n+\n+\t// Gauge metrics for failed requests\n+\trequestFailureGauge = prometheus.NewGaugeVec(prometheus.GaugeOpts{\n+\t\tName: \"requests_failure_gauge\",\n+\t\tHelp: \"Number of requests_failure_gauge\",\n+\t}, []string{COMPONENT, KIND, NAMESPACE, ACTION, SEVERITY})\n+\n+\t// Linear latencies\n+\trequestLinearLatency = prometheus.NewHistogramVec(prometheus.HistogramOpts{\n+\t\tName: \"request_linear_latency\",\n+\t\tHelp: \"A histogram of request_linear_latency\",\n+\t\tBuckets: prometheus.LinearBuckets(1, 1, 15),\n+\t}, []string{COMPONENT, KIND, NAMESPACE, ACTION, SEVERITY})\n+\n+\t// Exponential latencies\n+\trequestExponentialLatency = prometheus.NewHistogramVec(prometheus.HistogramOpts{\n+\t\tName: \"request_exponential_latency\",\n+\t\tHelp: \"A histogram of request_exponential_latency\",\n+\t}, []string{COMPONENT, KIND, NAMESPACE, ACTION, SEVERITY})\n+\n+\tserviceHeartbeat = prometheus.NewCounter(prometheus.CounterOpts{\n+\t\tName: \"service_heartbeat\",\n+\t\tHelp: \"Heartbeat signal every 10 seconds indicating pods are alive.\",\n+\t})\n+)\n+\n+func init() {\n+\t// Register prometheus counters\n+\tprometheus.MustRegister(requestCounter)\n+\tprometheus.MustRegister(requestFailureCounter)\n+\tprometheus.MustRegister(requestGauge)\n+\tprometheus.MustRegister(requestFailureGauge)\n+\tprometheus.MustRegister(requestLinearLatency)\n+\tprometheus.MustRegister(requestExponentialLatency)\n+\tprometheus.MustRegister(serviceHeartbeat)\n+}\n+\n+type MetricsExporter struct {\n+\tcomponent\tstring\n+\t// Add error channel so that kf_monitor user can get notified when monitoring thread is down.\n+\tErrChan chan error\n+\tmetricsPort int\n+}\n+\n+func NewMetricsExporter(component string) *MetricsExporter {\n+\treturn &MetricsExporter{\n+\t\tcomponent: component,\n+\t\tErrChan: make(chan error),\n+\t\tmetricsPort: METRICSPORT,\n+\t}\n+}\n+\n+func (me *MetricsExporter) SetPort(port int) *MetricsExporter {", - "comment_created_at": "2019-10-08T00:01:00+00:00", - "comment_author": "zhenghuiwang", - "comment_body": "If port can be changed, why not simply export the field as `Port`?", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "320269639", - "pr_number": 4040, - "pr_file": "bootstrap/pkg/kfapp/apply/apply.go", - "created_at": "2019-09-03T13:25:22+00:00", - "commented_code": "package apply\n\nimport (\n\t\"errors\"\n\t\"fmt\"\n\t\"io\"\n\t\"os\"\n\n\tkftypes \"github.com/kubeflow/kubeflow/bootstrap/v3/pkg/apis/apps\"\n\tkfdefsv3 \"github.com/kubeflow/kubeflow/bootstrap/v3/pkg/apis/apps/kfdef/v1alpha1\"\n\t\"github.com/kubeflow/kubeflow/bootstrap/v3/pkg/kfapp/coordinator\"\n\tlog \"github.com/sirupsen/logrus\"\n)\n\n// BootstrapKubeflow is used by the kfctl apply sub-command to take in a configfile\n// as a flag and boostrap a KfApp and deploy it\nfunc BootstrapKubeflow(configFilePath string, kfResource kftypes.ResourceEnum) error {\n\t// Set default app name to kf-app\n\tappName := \"kf-app\"\n\n\t// Construct KfDef from the configFilePath provided\n\tkfDef := &kfdefsv3.KfDef{}\n\tkfDef, err := kfdefsv3.LoadKFDefFromURI(configFilePath)\n\tif err != nil {\n\t\tlog.Printf(\"Unable to create KfDef from config file: %v\", err)\n\t}\n\tif kfDef.Name != \"\" {\n\t\tlog.Warnf(\"Overriding KfDef.Spec.Name; old value %v; new value %v\", kfDef.Name, appName)\n\t}\n\tkfDef.Name = appName\n\tisValid, msg := kfDef.IsValid()\n\tif !isValid {\n\t\tlog.Printf(\"Invalid kfdef: %v\", isValid)\n\t\tlog.Printf(\"Error validating generated KfDef, please check config file validity: %v\", msg)\n\t}\n\tcwd, err := os.Getwd()\n\tif err != nil {\n\t\tlog.Printf(\"Error getting current working directory: %v\", err)\n\t}\n\tfmt.Println(\"Present working directory is: %v\", cwd)\n\t// Check if current directory is empty and if it is error out\n\tisEmpty, err := IsCwdEmpty(cwd)\n\tif !isEmpty && err != nil {\n\t\treturn fmt.Errorf(\"Current directory is not empty, please try again in an empty directory: %v\", err)\n\t}\n\tkfDef.Spec.AppDir = cwd\n\tif kfDef.Spec.AppDir == \"\" {\n\t\treturn errors.New(\"kfDef App Dir not set\")\n\t}\n\tlog.Warnf(\"App directory name: %v\", kfDef.Spec.AppDir)\n\tcfgFilePath, err := coordinator.CreateKfAppCfgFile(kfDef)\n\tif err != nil {\n\t\tlog.Errorf(\"Error creating app.yaml from KfDef: %v\", err)\n\t\treturn err\n\t}\n\n\tlog.Printf(\"Syncing Cache\")\n\terr = kfDef.SyncCache()\n\tif err != nil {\n\t\tlog.Errorf(\"Failed to synchronize the cache; error: %v\", err)\n\t\treturn err\n\t}\n\t// Save app.yaml because we need to preserve information about the cache.\n\tif err := kfDef.WriteToFile(cfgFilePath); err != nil {\n\t\tlog.Errorf(\"Failed to save KfDef to %v; error %v\", cfgFilePath, err)\n\t\treturn err\n\t}\n\tlog.Infof(\"Saved configfile as kfdef in path: %v\", cfgFilePath)\n\n\t// Load KfApp for Generate and Apply\n\tKfApp, KfErr := coordinator.LoadKfAppCfgFile(cfgFilePath)\n\tif KfErr != nil {\n\t\tlog.Printf(\"Error loading KfApp from configfilepath: %v\", KfErr)\n\t}\n\t// Once init is done, we generate and apply subsequently\n\tlog.Println(\"Kubeflow Generate...\")\n\tgenerateErr := KfApp.Generate(kfResource)\n\tif generateErr != nil {\n\t\tlog.Println(\"Unable to generate resources for KfApp\", generateErr)\n\t\treturn generateErr\n\t}\n\tlog.Println(\"Kubeflow Apply...\")\n\tapplyErr := KfApp.Apply(kfResource)\n\tif applyErr != nil {\n\t\tlog.Println(\"Unable to apply resources for KfApp\", applyErr)\n\t}\n\treturn nil\n}\n\n// IsCwdEmpty - quick check to determine if the working directory is empty\nfunc IsCwdEmpty(name string) (bool, error) {", - "repo_full_name": "kubeflow/kubeflow", - "discussion_comments": [ - { - "comment_id": "320269639", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 4040, - "pr_file": "bootstrap/pkg/kfapp/apply/apply.go", - "discussion_id": "320269639", - "commented_code": "@@ -0,0 +1,103 @@\n+package apply\n+\n+import (\n+\t\"errors\"\n+\t\"fmt\"\n+\t\"io\"\n+\t\"os\"\n+\n+\tkftypes \"github.com/kubeflow/kubeflow/bootstrap/v3/pkg/apis/apps\"\n+\tkfdefsv3 \"github.com/kubeflow/kubeflow/bootstrap/v3/pkg/apis/apps/kfdef/v1alpha1\"\n+\t\"github.com/kubeflow/kubeflow/bootstrap/v3/pkg/kfapp/coordinator\"\n+\tlog \"github.com/sirupsen/logrus\"\n+)\n+\n+// BootstrapKubeflow is used by the kfctl apply sub-command to take in a configfile\n+// as a flag and boostrap a KfApp and deploy it\n+func BootstrapKubeflow(configFilePath string, kfResource kftypes.ResourceEnum) error {\n+\t// Set default app name to kf-app\n+\tappName := \"kf-app\"\n+\n+\t// Construct KfDef from the configFilePath provided\n+\tkfDef := &kfdefsv3.KfDef{}\n+\tkfDef, err := kfdefsv3.LoadKFDefFromURI(configFilePath)\n+\tif err != nil {\n+\t\tlog.Printf(\"Unable to create KfDef from config file: %v\", err)\n+\t}\n+\tif kfDef.Name != \"\" {\n+\t\tlog.Warnf(\"Overriding KfDef.Spec.Name; old value %v; new value %v\", kfDef.Name, appName)\n+\t}\n+\tkfDef.Name = appName\n+\tisValid, msg := kfDef.IsValid()\n+\tif !isValid {\n+\t\tlog.Printf(\"Invalid kfdef: %v\", isValid)\n+\t\tlog.Printf(\"Error validating generated KfDef, please check config file validity: %v\", msg)\n+\t}\n+\tcwd, err := os.Getwd()\n+\tif err != nil {\n+\t\tlog.Printf(\"Error getting current working directory: %v\", err)\n+\t}\n+\tfmt.Println(\"Present working directory is: %v\", cwd)\n+\t// Check if current directory is empty and if it is error out\n+\tisEmpty, err := IsCwdEmpty(cwd)\n+\tif !isEmpty && err != nil {\n+\t\treturn fmt.Errorf(\"Current directory is not empty, please try again in an empty directory: %v\", err)\n+\t}\n+\tkfDef.Spec.AppDir = cwd\n+\tif kfDef.Spec.AppDir == \"\" {\n+\t\treturn errors.New(\"kfDef App Dir not set\")\n+\t}\n+\tlog.Warnf(\"App directory name: %v\", kfDef.Spec.AppDir)\n+\tcfgFilePath, err := coordinator.CreateKfAppCfgFile(kfDef)\n+\tif err != nil {\n+\t\tlog.Errorf(\"Error creating app.yaml from KfDef: %v\", err)\n+\t\treturn err\n+\t}\n+\n+\tlog.Printf(\"Syncing Cache\")\n+\terr = kfDef.SyncCache()\n+\tif err != nil {\n+\t\tlog.Errorf(\"Failed to synchronize the cache; error: %v\", err)\n+\t\treturn err\n+\t}\n+\t// Save app.yaml because we need to preserve information about the cache.\n+\tif err := kfDef.WriteToFile(cfgFilePath); err != nil {\n+\t\tlog.Errorf(\"Failed to save KfDef to %v; error %v\", cfgFilePath, err)\n+\t\treturn err\n+\t}\n+\tlog.Infof(\"Saved configfile as kfdef in path: %v\", cfgFilePath)\n+\n+\t// Load KfApp for Generate and Apply\n+\tKfApp, KfErr := coordinator.LoadKfAppCfgFile(cfgFilePath)\n+\tif KfErr != nil {\n+\t\tlog.Printf(\"Error loading KfApp from configfilepath: %v\", KfErr)\n+\t}\n+\t// Once init is done, we generate and apply subsequently\n+\tlog.Println(\"Kubeflow Generate...\")\n+\tgenerateErr := KfApp.Generate(kfResource)\n+\tif generateErr != nil {\n+\t\tlog.Println(\"Unable to generate resources for KfApp\", generateErr)\n+\t\treturn generateErr\n+\t}\n+\tlog.Println(\"Kubeflow Apply...\")\n+\tapplyErr := KfApp.Apply(kfResource)\n+\tif applyErr != nil {\n+\t\tlog.Println(\"Unable to apply resources for KfApp\", applyErr)\n+\t}\n+\treturn nil\n+}\n+\n+// IsCwdEmpty - quick check to determine if the working directory is empty\n+func IsCwdEmpty(name string) (bool, error) {", - "comment_created_at": "2019-09-03T13:25:22+00:00", - "comment_author": "yanniszark", - "comment_body": "Can you make this a non-exported function? (non-capital first letter: isCwdEmpty)", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "618194134", - "pr_number": 5761, - "pr_file": "components/profile-controller/controllers/profile_controller.go", - "created_at": "2021-04-22T08:31:24+00:00", - "commented_code": "return\n}\n\nfunc updateNamespaceLabels(ns *corev1.Namespace) bool {\n\tupdated := false\nfunc setNamespaceLabelsFromConfig(ns *corev1.Namespace) {", - "repo_full_name": "kubeflow/kubeflow", - "discussion_comments": [ - { - "comment_id": "618194134", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 5761, - "pr_file": "components/profile-controller/controllers/profile_controller.go", - "discussion_id": "618194134", - "commented_code": "@@ -617,16 +621,23 @@ func removeString(slice []string, s string) (result []string) {\n \treturn\n }\n \n-func updateNamespaceLabels(ns *corev1.Namespace) bool {\n-\tupdated := false\n+func setNamespaceLabelsFromConfig(ns *corev1.Namespace) {", - "comment_created_at": "2021-04-22T08:31:24+00:00", - "comment_author": "Bobgy", - "comment_body": "nit: maybe we should rename now? because we no longer read config here", - "pr_file_module": null - }, - { - "comment_id": "618735512", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 5761, - "pr_file": "components/profile-controller/controllers/profile_controller.go", - "discussion_id": "618194134", - "commented_code": "@@ -617,16 +621,23 @@ func removeString(slice []string, s string) (result []string) {\n \treturn\n }\n \n-func updateNamespaceLabels(ns *corev1.Namespace) bool {\n-\tupdated := false\n+func setNamespaceLabelsFromConfig(ns *corev1.Namespace) {", - "comment_created_at": "2021-04-22T21:00:44+00:00", - "comment_author": "zijianjoy", - "comment_body": "Sounds good, I have renamed this function.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "426264174", - "pr_number": 4979, - "pr_file": "components/access-management/kfam/bindings_test.go", - "created_at": "2020-05-17T13:52:27+00:00", - "commented_code": "package kfam\n\nimport (\n\t\"testing\"\n\n\trbacv1 \"k8s.io/api/rbac/v1\"\n)\n\n// Building Binding Object from k8s.io/api/rbac/v1\nfunc getBindingObject(binding string) *Binding {\n\n\treturn &Binding{\n\t\tUser: &rbacv1.Subject{\n\t\t\tKind: \"user\",\n\t\t\tName: binding,\n\t\t},\n\t\tRoleRef: &rbacv1.RoleRef{\n\t\t\tKind: \"clusterrole\",\n\t\t\tName: \"edit\",\n\t\t},\n\t}\n\n}\n\n//Table driven tests\nvar tests = []struct {\n\tin *Binding\n\tout string\n\thasError bool\n}{\n\t{getBindingObject(\"lalith.vaka@zq.msds.kp.org\"), \"user-lalith-vaka-zq-msds-kp-org-clusterrole-edit\", false},\n\t{getBindingObject(\"397401@zq.msds.kp.org\"), \"user-397401-zq-msds-kp-org-clusterrole-edit\", false},\n\t{getBindingObject(\"lalith.397401@zq.msds.kp.org\"), \"user-lalith-397401-zq-msds-kp-org-clusterrole-edit\", false},\n\t{getBindingObject(\"397401.vaka@zq.msds.kp.org\"), \"user-397401-vaka-zq-msds-kp-org-clusterrole-edit\", false},\n\t{getBindingObject(\"i397401@zq.msds.kp.org\"), \"user-i397401-zq-msds-kp-org-clusterrole-edit\", false},\n}\n\nfunc TestGetBindingName(t *testing.T) {\n\n\t/* Read the test data from a file if needed\n\n\t// create a testdata folder under this package and add files as needed\n\tfile, err := os.Open(\"./testdata/file.go\")\n\tif err != nil {\n\t\tlog.Fatal(err)\n\t}\n\n\t*/\n\t//format := \"--- %s: %s (%s)\\n\"\n\tfor _, tt := range tests {\n\n\t\ts, error := getBindingName(tt.in)", - "repo_full_name": "kubeflow/kubeflow", - "discussion_comments": [ - { - "comment_id": "426264174", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 4979, - "pr_file": "components/access-management/kfam/bindings_test.go", - "discussion_id": "426264174", - "commented_code": "@@ -0,0 +1,72 @@\n+package kfam\n+\n+import (\n+\t\"testing\"\n+\n+\trbacv1 \"k8s.io/api/rbac/v1\"\n+)\n+\n+// Building Binding Object from k8s.io/api/rbac/v1\n+func getBindingObject(binding string) *Binding {\n+\n+\treturn &Binding{\n+\t\tUser: &rbacv1.Subject{\n+\t\t\tKind: \"user\",\n+\t\t\tName: binding,\n+\t\t},\n+\t\tRoleRef: &rbacv1.RoleRef{\n+\t\t\tKind: \"clusterrole\",\n+\t\t\tName: \"edit\",\n+\t\t},\n+\t}\n+\n+}\n+\n+//Table driven tests\n+var tests = []struct {\n+\tin *Binding\n+\tout string\n+\thasError bool\n+}{\n+\t{getBindingObject(\"lalith.vaka@zq.msds.kp.org\"), \"user-lalith-vaka-zq-msds-kp-org-clusterrole-edit\", false},\n+\t{getBindingObject(\"397401@zq.msds.kp.org\"), \"user-397401-zq-msds-kp-org-clusterrole-edit\", false},\n+\t{getBindingObject(\"lalith.397401@zq.msds.kp.org\"), \"user-lalith-397401-zq-msds-kp-org-clusterrole-edit\", false},\n+\t{getBindingObject(\"397401.vaka@zq.msds.kp.org\"), \"user-397401-vaka-zq-msds-kp-org-clusterrole-edit\", false},\n+\t{getBindingObject(\"i397401@zq.msds.kp.org\"), \"user-i397401-zq-msds-kp-org-clusterrole-edit\", false},\n+}\n+\n+func TestGetBindingName(t *testing.T) {\n+\n+\t/* Read the test data from a file if needed\n+\n+\t// create a testdata folder under this package and add files as needed\n+\tfile, err := os.Open(\"./testdata/file.go\")\n+\tif err != nil {\n+\t\tlog.Fatal(err)\n+\t}\n+\n+\t*/\n+\t//format := \"--- %s: %s (%s)\\n\"\n+\tfor _, tt := range tests {\n+\n+\t\ts, error := getBindingName(tt.in)", - "comment_created_at": "2020-05-17T13:52:27+00:00", - "comment_author": "yanniszark", - "comment_body": "Since `error` is an already declared interface type, let's use a different name for this variable.\r\nA common name for error variables in Go is `err`.", - "pr_file_module": null - }, - { - "comment_id": "426272904", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 4979, - "pr_file": "components/access-management/kfam/bindings_test.go", - "discussion_id": "426264174", - "commented_code": "@@ -0,0 +1,72 @@\n+package kfam\n+\n+import (\n+\t\"testing\"\n+\n+\trbacv1 \"k8s.io/api/rbac/v1\"\n+)\n+\n+// Building Binding Object from k8s.io/api/rbac/v1\n+func getBindingObject(binding string) *Binding {\n+\n+\treturn &Binding{\n+\t\tUser: &rbacv1.Subject{\n+\t\t\tKind: \"user\",\n+\t\t\tName: binding,\n+\t\t},\n+\t\tRoleRef: &rbacv1.RoleRef{\n+\t\t\tKind: \"clusterrole\",\n+\t\t\tName: \"edit\",\n+\t\t},\n+\t}\n+\n+}\n+\n+//Table driven tests\n+var tests = []struct {\n+\tin *Binding\n+\tout string\n+\thasError bool\n+}{\n+\t{getBindingObject(\"lalith.vaka@zq.msds.kp.org\"), \"user-lalith-vaka-zq-msds-kp-org-clusterrole-edit\", false},\n+\t{getBindingObject(\"397401@zq.msds.kp.org\"), \"user-397401-zq-msds-kp-org-clusterrole-edit\", false},\n+\t{getBindingObject(\"lalith.397401@zq.msds.kp.org\"), \"user-lalith-397401-zq-msds-kp-org-clusterrole-edit\", false},\n+\t{getBindingObject(\"397401.vaka@zq.msds.kp.org\"), \"user-397401-vaka-zq-msds-kp-org-clusterrole-edit\", false},\n+\t{getBindingObject(\"i397401@zq.msds.kp.org\"), \"user-i397401-zq-msds-kp-org-clusterrole-edit\", false},\n+}\n+\n+func TestGetBindingName(t *testing.T) {\n+\n+\t/* Read the test data from a file if needed\n+\n+\t// create a testdata folder under this package and add files as needed\n+\tfile, err := os.Open(\"./testdata/file.go\")\n+\tif err != nil {\n+\t\tlog.Fatal(err)\n+\t}\n+\n+\t*/\n+\t//format := \"--- %s: %s (%s)\\n\"\n+\tfor _, tt := range tests {\n+\n+\t\ts, error := getBindingName(tt.in)", - "comment_created_at": "2020-05-17T15:18:42+00:00", - "comment_author": "lalithvaka", - "comment_body": "Changed the error name", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "320268163", - "pr_number": 4040, - "pr_file": "bootstrap/pkg/kfapp/apply/apply.go", - "created_at": "2019-09-03T13:22:23+00:00", - "commented_code": "package apply\n\nimport (\n\t\"errors\"\n\t\"fmt\"\n\t\"io\"\n\t\"os\"\n\n\tkftypes \"github.com/kubeflow/kubeflow/bootstrap/v3/pkg/apis/apps\"\n\tkfdefsv3 \"github.com/kubeflow/kubeflow/bootstrap/v3/pkg/apis/apps/kfdef/v1alpha1\"\n\t\"github.com/kubeflow/kubeflow/bootstrap/v3/pkg/kfapp/coordinator\"\n\tlog \"github.com/sirupsen/logrus\"\n)\n\n// BootstrapKubeflow is used by the kfctl apply sub-command to take in a configfile\n// as a flag and boostrap a KfApp and deploy it\nfunc BootstrapKubeflow(configFilePath string, kfResource kftypes.ResourceEnum) error {\n\t// Set default app name to kf-app\n\tappName := \"kf-app\"\n\n\t// Construct KfDef from the configFilePath provided\n\tkfDef := &kfdefsv3.KfDef{}\n\tkfDef, err := kfdefsv3.LoadKFDefFromURI(configFilePath)\n\tif err != nil {\n\t\tlog.Printf(\"Unable to create KfDef from config file: %v\", err)\n\t}\n\tif kfDef.Name != \"\" {\n\t\tlog.Warnf(\"Overriding KfDef.Spec.Name; old value %v; new value %v\", kfDef.Name, appName)\n\t}\n\tkfDef.Name = appName\n\tisValid, msg := kfDef.IsValid()\n\tif !isValid {\n\t\tlog.Printf(\"Invalid kfdef: %v\", isValid)\n\t\tlog.Printf(\"Error validating generated KfDef, please check config file validity: %v\", msg)\n\t}\n\tcwd, err := os.Getwd()\n\tif err != nil {\n\t\tlog.Printf(\"Error getting current working directory: %v\", err)\n\t}\n\tfmt.Println(\"Present working directory is: %v\", cwd)\n\t// Check if current directory is empty and if it is error out\n\tisEmpty, err := IsCwdEmpty(cwd)\n\tif !isEmpty && err != nil {\n\t\treturn fmt.Errorf(\"Current directory is not empty, please try again in an empty directory: %v\", err)\n\t}\n\tkfDef.Spec.AppDir = cwd\n\tif kfDef.Spec.AppDir == \"\" {\n\t\treturn errors.New(\"kfDef App Dir not set\")\n\t}\n\tlog.Warnf(\"App directory name: %v\", kfDef.Spec.AppDir)\n\tcfgFilePath, err := coordinator.CreateKfAppCfgFile(kfDef)\n\tif err != nil {\n\t\tlog.Errorf(\"Error creating app.yaml from KfDef: %v\", err)\n\t\treturn err\n\t}\n\n\tlog.Printf(\"Syncing Cache\")\n\terr = kfDef.SyncCache()\n\tif err != nil {\n\t\tlog.Errorf(\"Failed to synchronize the cache; error: %v\", err)\n\t\treturn err\n\t}\n\t// Save app.yaml because we need to preserve information about the cache.\n\tif err := kfDef.WriteToFile(cfgFilePath); err != nil {\n\t\tlog.Errorf(\"Failed to save KfDef to %v; error %v\", cfgFilePath, err)\n\t\treturn err\n\t}\n\tlog.Infof(\"Saved configfile as kfdef in path: %v\", cfgFilePath)\n\n\t// Load KfApp for Generate and Apply\n\tKfApp, KfErr := coordinator.LoadKfAppCfgFile(cfgFilePath)\n\tif KfErr != nil {", - "repo_full_name": "kubeflow/kubeflow", - "discussion_comments": [ - { - "comment_id": "320268163", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 4040, - "pr_file": "bootstrap/pkg/kfapp/apply/apply.go", - "discussion_id": "320268163", - "commented_code": "@@ -0,0 +1,103 @@\n+package apply\n+\n+import (\n+\t\"errors\"\n+\t\"fmt\"\n+\t\"io\"\n+\t\"os\"\n+\n+\tkftypes \"github.com/kubeflow/kubeflow/bootstrap/v3/pkg/apis/apps\"\n+\tkfdefsv3 \"github.com/kubeflow/kubeflow/bootstrap/v3/pkg/apis/apps/kfdef/v1alpha1\"\n+\t\"github.com/kubeflow/kubeflow/bootstrap/v3/pkg/kfapp/coordinator\"\n+\tlog \"github.com/sirupsen/logrus\"\n+)\n+\n+// BootstrapKubeflow is used by the kfctl apply sub-command to take in a configfile\n+// as a flag and boostrap a KfApp and deploy it\n+func BootstrapKubeflow(configFilePath string, kfResource kftypes.ResourceEnum) error {\n+\t// Set default app name to kf-app\n+\tappName := \"kf-app\"\n+\n+\t// Construct KfDef from the configFilePath provided\n+\tkfDef := &kfdefsv3.KfDef{}\n+\tkfDef, err := kfdefsv3.LoadKFDefFromURI(configFilePath)\n+\tif err != nil {\n+\t\tlog.Printf(\"Unable to create KfDef from config file: %v\", err)\n+\t}\n+\tif kfDef.Name != \"\" {\n+\t\tlog.Warnf(\"Overriding KfDef.Spec.Name; old value %v; new value %v\", kfDef.Name, appName)\n+\t}\n+\tkfDef.Name = appName\n+\tisValid, msg := kfDef.IsValid()\n+\tif !isValid {\n+\t\tlog.Printf(\"Invalid kfdef: %v\", isValid)\n+\t\tlog.Printf(\"Error validating generated KfDef, please check config file validity: %v\", msg)\n+\t}\n+\tcwd, err := os.Getwd()\n+\tif err != nil {\n+\t\tlog.Printf(\"Error getting current working directory: %v\", err)\n+\t}\n+\tfmt.Println(\"Present working directory is: %v\", cwd)\n+\t// Check if current directory is empty and if it is error out\n+\tisEmpty, err := IsCwdEmpty(cwd)\n+\tif !isEmpty && err != nil {\n+\t\treturn fmt.Errorf(\"Current directory is not empty, please try again in an empty directory: %v\", err)\n+\t}\n+\tkfDef.Spec.AppDir = cwd\n+\tif kfDef.Spec.AppDir == \"\" {\n+\t\treturn errors.New(\"kfDef App Dir not set\")\n+\t}\n+\tlog.Warnf(\"App directory name: %v\", kfDef.Spec.AppDir)\n+\tcfgFilePath, err := coordinator.CreateKfAppCfgFile(kfDef)\n+\tif err != nil {\n+\t\tlog.Errorf(\"Error creating app.yaml from KfDef: %v\", err)\n+\t\treturn err\n+\t}\n+\n+\tlog.Printf(\"Syncing Cache\")\n+\terr = kfDef.SyncCache()\n+\tif err != nil {\n+\t\tlog.Errorf(\"Failed to synchronize the cache; error: %v\", err)\n+\t\treturn err\n+\t}\n+\t// Save app.yaml because we need to preserve information about the cache.\n+\tif err := kfDef.WriteToFile(cfgFilePath); err != nil {\n+\t\tlog.Errorf(\"Failed to save KfDef to %v; error %v\", cfgFilePath, err)\n+\t\treturn err\n+\t}\n+\tlog.Infof(\"Saved configfile as kfdef in path: %v\", cfgFilePath)\n+\n+\t// Load KfApp for Generate and Apply\n+\tKfApp, KfErr := coordinator.LoadKfAppCfgFile(cfgFilePath)\n+\tif KfErr != nil {", - "comment_created_at": "2019-09-03T13:22:23+00:00", - "comment_author": "yanniszark", - "comment_body": "Can you change all of these to just err? (kfErr, generateErr, applyErr, ...)", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/kubeflow-harden-container-security.json b/_reviewers/kubeflow-harden-container-security.json new file mode 100644 index 0000000..1f56a91 --- /dev/null +++ b/_reviewers/kubeflow-harden-container-security.json @@ -0,0 +1,36 @@ +[ + { + "discussion_id": "1660234226", + "pr_number": 7622, + "pr_file": "components/example-notebook-servers/base/Dockerfile", + "created_at": "2024-06-30T18:56:09+00:00", + "commented_code": "# common environemnt variables\nENV NB_USER jovyan", + "repo_full_name": "kubeflow/kubeflow", + "discussion_comments": [ + { + "comment_id": "1660234226", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 7622, + "pr_file": "components/example-notebook-servers/base/Dockerfile", + "discussion_id": "1660234226", + "commented_code": "@@ -10,6 +10,11 @@ ARG TARGETARCH\n # common environemnt variables\n ENV NB_USER jovyan", + "comment_created_at": "2024-06-30T18:56:09+00:00", + "comment_author": "jiridanek", + "comment_body": "I recently had https://issues.redhat.com/browse/RHOAIENG-72 in my hands, and there the problem was using a symbolic username in the `USER jovyan` statement at the end of the Dockerfile.\r\n\r\nThese images already used the uid even before this PR, so this is good, and therefore \".spec.template.spec.securityContext.runAsNonRoot\" works fine for the images. The runAsNonRoot is something that's normally not set on OpenShifts (where we have SCCs that do the random-uid-0-gid thing), but it's good to be ready for it, and other Kubernetes is more likely to use this.", + "pr_file_module": null + }, + { + "comment_id": "1660306770", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 7622, + "pr_file": "components/example-notebook-servers/base/Dockerfile", + "discussion_id": "1660234226", + "commented_code": "@@ -10,6 +10,11 @@ ARG TARGETARCH\n # common environemnt variables\n ENV NB_USER jovyan", + "comment_created_at": "2024-06-30T20:54:34+00:00", + "comment_author": "thesuperzapper", + "comment_body": "@jiridanek To be clear, the issue we are fixing by this PR is mainly about the following `securityContext` configs, which previously the images did not work with:\r\n\r\n```\r\n allowPrivilegeEscalation: false\r\n capabilities:\r\n drop:\r\n - ALL\r\n```", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/kubeflow-harden-container-security.md b/_reviewers/kubeflow-harden-container-security.md index 0e3b6cc..d839a12 100644 --- a/_reviewers/kubeflow-harden-container-security.md +++ b/_reviewers/kubeflow-harden-container-security.md @@ -22,41 +22,3 @@ securityContext: ``` This approach prevents privilege escalation attacks and follows the principle of least privilege. For example, in a Dockerfile, prefer `USER 1000` over `USER jovyan` to ensure compatibility with security contexts that enforce non-root execution. This practice enhances security in various Kubernetes environments, including those that don't automatically apply these restrictions. - - -[ - { - "discussion_id": "1660234226", - "pr_number": 7622, - "pr_file": "components/example-notebook-servers/base/Dockerfile", - "created_at": "2024-06-30T18:56:09+00:00", - "commented_code": "# common environemnt variables\nENV NB_USER jovyan", - "repo_full_name": "kubeflow/kubeflow", - "discussion_comments": [ - { - "comment_id": "1660234226", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 7622, - "pr_file": "components/example-notebook-servers/base/Dockerfile", - "discussion_id": "1660234226", - "commented_code": "@@ -10,6 +10,11 @@ ARG TARGETARCH\n # common environemnt variables\n ENV NB_USER jovyan", - "comment_created_at": "2024-06-30T18:56:09+00:00", - "comment_author": "jiridanek", - "comment_body": "I recently had https://issues.redhat.com/browse/RHOAIENG-72 in my hands, and there the problem was using a symbolic username in the `USER jovyan` statement at the end of the Dockerfile.\r\n\r\nThese images already used the uid even before this PR, so this is good, and therefore \".spec.template.spec.securityContext.runAsNonRoot\" works fine for the images. The runAsNonRoot is something that's normally not set on OpenShifts (where we have SCCs that do the random-uid-0-gid thing), but it's good to be ready for it, and other Kubernetes is more likely to use this.", - "pr_file_module": null - }, - { - "comment_id": "1660306770", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 7622, - "pr_file": "components/example-notebook-servers/base/Dockerfile", - "discussion_id": "1660234226", - "commented_code": "@@ -10,6 +10,11 @@ ARG TARGETARCH\n # common environemnt variables\n ENV NB_USER jovyan", - "comment_created_at": "2024-06-30T20:54:34+00:00", - "comment_author": "thesuperzapper", - "comment_body": "@jiridanek To be clear, the issue we are fixing by this PR is mainly about the following `securityContext` configs, which previously the images did not work with:\r\n\r\n```\r\n allowPrivilegeEscalation: false\r\n capabilities:\r\n drop:\r\n - ALL\r\n```", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/kubeflow-isolate-test-cases.json b/_reviewers/kubeflow-isolate-test-cases.json new file mode 100644 index 0000000..ad15ca6 --- /dev/null +++ b/_reviewers/kubeflow-isolate-test-cases.json @@ -0,0 +1,46 @@ +[ + { + "discussion_id": "336700782", + "pr_number": 4350, + "pr_file": "testing/kfctl/kf_is_ready_test.py", + "created_at": "2019-10-18T22:51:34+00:00", + "commented_code": "logging.info(\"Verifying that deployment %s started...\", deployment_name)\n util.wait_for_deployment(api_client, knative_namespace, deployment_name, 10)\n\n if platform == \"gcp\":", + "repo_full_name": "kubeflow/kubeflow", + "discussion_comments": [ + { + "comment_id": "336700782", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 4350, + "pr_file": "testing/kfctl/kf_is_ready_test.py", + "discussion_id": "336700782", + "commented_code": "@@ -145,6 +145,34 @@ def test_kf_is_ready(namespace, use_basic_auth, use_istio, app_path):\n logging.info(\"Verifying that deployment %s started...\", deployment_name)\n util.wait_for_deployment(api_client, knative_namespace, deployment_name, 10)\n \n+ if platform == \"gcp\":", + "comment_created_at": "2019-10-18T22:51:34+00:00", + "comment_author": "jlewi", + "comment_body": "This should be a new test function in this module (a pytest module can have multiple test functions)\r\n\r\ne.g.\r\n\r\n```\r\ntest_workload_identity\r\n```\r\n\r\nWe want separate functions to make it easier to distinguish failures with workload identity versus other tests.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "336700925", + "pr_number": 4350, + "pr_file": "testing/kfctl/kf_is_ready_test.py", + "created_at": "2019-10-18T22:52:22+00:00", + "commented_code": "logging.info(\"Verifying that deployment %s started...\", deployment_name)\n util.wait_for_deployment(api_client, knative_namespace, deployment_name, 10)\n\n if platform == \"gcp\":", + "repo_full_name": "kubeflow/kubeflow", + "discussion_comments": [ + { + "comment_id": "336700925", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 4350, + "pr_file": "testing/kfctl/kf_is_ready_test.py", + "discussion_id": "336700925", + "commented_code": "@@ -145,6 +145,34 @@ def test_kf_is_ready(namespace, use_basic_auth, use_istio, app_path):\n logging.info(\"Verifying that deployment %s started...\", deployment_name)\n util.wait_for_deployment(api_client, knative_namespace, deployment_name, 10)\n \n+ if platform == \"gcp\":", + "comment_created_at": "2019-10-18T22:52:22+00:00", + "comment_author": "jlewi", + "comment_body": "Use pytest.Skip to skip the test if we aren't running on GCP\r\nhttp://doc.pytest.org/en/latest/skipping.html", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/kubeflow-isolate-test-cases.md b/_reviewers/kubeflow-isolate-test-cases.md index fbe225e..8ca50fc 100644 --- a/_reviewers/kubeflow-isolate-test-cases.md +++ b/_reviewers/kubeflow-isolate-test-cases.md @@ -39,51 +39,3 @@ def test_workload_identity(): ``` This organization makes it immediately clear which specific feature is failing when tests don't pass, simplifies debugging, and enables more effective test filtering when running targeted test suites. - - -[ - { - "discussion_id": "336700782", - "pr_number": 4350, - "pr_file": "testing/kfctl/kf_is_ready_test.py", - "created_at": "2019-10-18T22:51:34+00:00", - "commented_code": "logging.info(\"Verifying that deployment %s started...\", deployment_name)\n util.wait_for_deployment(api_client, knative_namespace, deployment_name, 10)\n\n if platform == \"gcp\":", - "repo_full_name": "kubeflow/kubeflow", - "discussion_comments": [ - { - "comment_id": "336700782", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 4350, - "pr_file": "testing/kfctl/kf_is_ready_test.py", - "discussion_id": "336700782", - "commented_code": "@@ -145,6 +145,34 @@ def test_kf_is_ready(namespace, use_basic_auth, use_istio, app_path):\n logging.info(\"Verifying that deployment %s started...\", deployment_name)\n util.wait_for_deployment(api_client, knative_namespace, deployment_name, 10)\n \n+ if platform == \"gcp\":", - "comment_created_at": "2019-10-18T22:51:34+00:00", - "comment_author": "jlewi", - "comment_body": "This should be a new test function in this module (a pytest module can have multiple test functions)\r\n\r\ne.g.\r\n\r\n```\r\ntest_workload_identity\r\n```\r\n\r\nWe want separate functions to make it easier to distinguish failures with workload identity versus other tests.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "336700925", - "pr_number": 4350, - "pr_file": "testing/kfctl/kf_is_ready_test.py", - "created_at": "2019-10-18T22:52:22+00:00", - "commented_code": "logging.info(\"Verifying that deployment %s started...\", deployment_name)\n util.wait_for_deployment(api_client, knative_namespace, deployment_name, 10)\n\n if platform == \"gcp\":", - "repo_full_name": "kubeflow/kubeflow", - "discussion_comments": [ - { - "comment_id": "336700925", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 4350, - "pr_file": "testing/kfctl/kf_is_ready_test.py", - "discussion_id": "336700925", - "commented_code": "@@ -145,6 +145,34 @@ def test_kf_is_ready(namespace, use_basic_auth, use_istio, app_path):\n logging.info(\"Verifying that deployment %s started...\", deployment_name)\n util.wait_for_deployment(api_client, knative_namespace, deployment_name, 10)\n \n+ if platform == \"gcp\":", - "comment_created_at": "2019-10-18T22:52:22+00:00", - "comment_author": "jlewi", - "comment_body": "Use pytest.Skip to skip the test if we aren't running on GCP\r\nhttp://doc.pytest.org/en/latest/skipping.html", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/kubeflow-load-configurations-efficiently.json b/_reviewers/kubeflow-load-configurations-efficiently.json new file mode 100644 index 0000000..828516e --- /dev/null +++ b/_reviewers/kubeflow-load-configurations-efficiently.json @@ -0,0 +1,174 @@ +[ + { + "discussion_id": "600238600", + "pr_number": 5761, + "pr_file": "components/profile-controller/controllers/profile_controller.go", + "created_at": "2021-03-24T07:44:10+00:00", + "commented_code": "return\n}\n\nfunc mergeLabelPropertiesToMap(logger logr.Logger) {\n\tfile, err := os.Open(NAMESPACE_LABELS_PROPERTIES)\n\tif err != nil {\n\t\tlogger.Info(\"namespace labels properties file doesn't exist, using default value\")\n\t}\n\tdefer file.Close()\n\n\tscanner := bufio.NewScanner(file)\n\tfor scanner.Scan() {\n\t\tlabel := scanner.Text()\n\t\tarr := strings.Split(label, \"=\")\n\t\tif len(arr) > 2 || len(arr) == 0 {\n\t\t\tlogger.Info(\"label config format incorrect, should be {key}={value}\", \"label\", label)\n\t\t} else {\n\t\t\tif arr[0] == \"\" {\n\t\t\t\tlogger.Info(\"label key should not be empty\", \"label\", label)\n\t\t\t} else if len(arr) == 2 {\n\t\t\t\t// Add or overwrite label map if value exists.\n\t\t\t\tkubeflowNamespaceLabels[arr[0]] = arr[1]\n\t\t\t} else {\n\t\t\t\t// Set value to be empty if nothing exists after \"=\".\n\t\t\t\t// It means this label needs to be removed.\n\t\t\t\tkubeflowNamespaceLabels[arr[0]] = \"\"\n\t\t\t}\n\t\t}\n\t}", + "repo_full_name": "kubeflow/kubeflow", + "discussion_comments": [ + { + "comment_id": "600238600", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 5761, + "pr_file": "components/profile-controller/controllers/profile_controller.go", + "discussion_id": "600238600", + "commented_code": "@@ -616,15 +624,59 @@ func removeString(slice []string, s string) (result []string) {\n \treturn\n }\n \n+func mergeLabelPropertiesToMap(logger logr.Logger) {\n+\tfile, err := os.Open(NAMESPACE_LABELS_PROPERTIES)\n+\tif err != nil {\n+\t\tlogger.Info(\"namespace labels properties file doesn't exist, using default value\")\n+\t}\n+\tdefer file.Close()\n+\n+\tscanner := bufio.NewScanner(file)\n+\tfor scanner.Scan() {\n+\t\tlabel := scanner.Text()\n+\t\tarr := strings.Split(label, \"=\")\n+\t\tif len(arr) > 2 || len(arr) == 0 {\n+\t\t\tlogger.Info(\"label config format incorrect, should be {key}={value}\", \"label\", label)\n+\t\t} else {\n+\t\t\tif arr[0] == \"\" {\n+\t\t\t\tlogger.Info(\"label key should not be empty\", \"label\", label)\n+\t\t\t} else if len(arr) == 2 {\n+\t\t\t\t// Add or overwrite label map if value exists.\n+\t\t\t\tkubeflowNamespaceLabels[arr[0]] = arr[1]\n+\t\t\t} else {\n+\t\t\t\t// Set value to be empty if nothing exists after \"=\".\n+\t\t\t\t// It means this label needs to be removed.\n+\t\t\t\tkubeflowNamespaceLabels[arr[0]] = \"\"\n+\t\t\t}\n+\t\t}\n+\t}", + "comment_created_at": "2021-03-24T07:44:10+00:00", + "comment_author": "Bobgy", + "comment_body": "It's a nice implementation of the syntax you are proposing.\r\n\r\nHowever, it seems a little unnecessary if we can just use YAML format as the config format, so people do not need to reinvent the wheels and consumers won't guess about how the format is parsed. There are standard libraries for parsing YAML in go: https://github.com/go-yaml/yaml/tree/v2.\r\n\r\nUsing the standards mean that e.g. it's very easy to insert comments in the configfile, without you needing to implement logic to ignore comments.", + "pr_file_module": null + }, + { + "comment_id": "600794348", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 5761, + "pr_file": "components/profile-controller/controllers/profile_controller.go", + "discussion_id": "600238600", + "commented_code": "@@ -616,15 +624,59 @@ func removeString(slice []string, s string) (result []string) {\n \treturn\n }\n \n+func mergeLabelPropertiesToMap(logger logr.Logger) {\n+\tfile, err := os.Open(NAMESPACE_LABELS_PROPERTIES)\n+\tif err != nil {\n+\t\tlogger.Info(\"namespace labels properties file doesn't exist, using default value\")\n+\t}\n+\tdefer file.Close()\n+\n+\tscanner := bufio.NewScanner(file)\n+\tfor scanner.Scan() {\n+\t\tlabel := scanner.Text()\n+\t\tarr := strings.Split(label, \"=\")\n+\t\tif len(arr) > 2 || len(arr) == 0 {\n+\t\t\tlogger.Info(\"label config format incorrect, should be {key}={value}\", \"label\", label)\n+\t\t} else {\n+\t\t\tif arr[0] == \"\" {\n+\t\t\t\tlogger.Info(\"label key should not be empty\", \"label\", label)\n+\t\t\t} else if len(arr) == 2 {\n+\t\t\t\t// Add or overwrite label map if value exists.\n+\t\t\t\tkubeflowNamespaceLabels[arr[0]] = arr[1]\n+\t\t\t} else {\n+\t\t\t\t// Set value to be empty if nothing exists after \"=\".\n+\t\t\t\t// It means this label needs to be removed.\n+\t\t\t\tkubeflowNamespaceLabels[arr[0]] = \"\"\n+\t\t\t}\n+\t\t}\n+\t}", + "comment_created_at": "2021-03-24T19:08:28+00:00", + "comment_author": "zijianjoy", + "comment_body": "Thank you for the suggestion! I have switched to yaml format in new commit.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "600239852", + "pr_number": 5761, + "pr_file": "components/profile-controller/controllers/profile_controller.go", + "created_at": "2021-03-24T07:46:37+00:00", + "commented_code": "return\n}\n\nfunc mergeLabelPropertiesToMap(logger logr.Logger) {\n\tfile, err := os.Open(NAMESPACE_LABELS_PROPERTIES)", + "repo_full_name": "kubeflow/kubeflow", + "discussion_comments": [ + { + "comment_id": "600239852", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 5761, + "pr_file": "components/profile-controller/controllers/profile_controller.go", + "discussion_id": "600239852", + "commented_code": "@@ -616,15 +624,59 @@ func removeString(slice []string, s string) (result []string) {\n \treturn\n }\n \n+func mergeLabelPropertiesToMap(logger logr.Logger) {\n+\tfile, err := os.Open(NAMESPACE_LABELS_PROPERTIES)", + "comment_created_at": "2021-03-24T07:46:37+00:00", + "comment_author": "Bobgy", + "comment_body": "A controller runs in reconcile loops, this method is called very very often, so it'd be better to load the config file at controller startup and fail the controller if any errors show up.", + "pr_file_module": null + }, + { + "comment_id": "600795933", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 5761, + "pr_file": "components/profile-controller/controllers/profile_controller.go", + "discussion_id": "600239852", + "commented_code": "@@ -616,15 +624,59 @@ func removeString(slice []string, s string) (result []string) {\n \treturn\n }\n \n+func mergeLabelPropertiesToMap(logger logr.Logger) {\n+\tfile, err := os.Open(NAMESPACE_LABELS_PROPERTIES)", + "comment_created_at": "2021-03-24T19:10:02+00:00", + "comment_author": "zijianjoy", + "comment_body": "If the configmap is updated, we want to read from the latest data instead of cache for label configuration. So we will need to read from VolumeMount. Is it very resource intense to read a file? If yes, what is your suggestion of reading new ConfigMap data?", + "pr_file_module": null + }, + { + "comment_id": "600933543", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 5761, + "pr_file": "components/profile-controller/controllers/profile_controller.go", + "discussion_id": "600239852", + "commented_code": "@@ -616,15 +624,59 @@ func removeString(slice []string, s string) (result []string) {\n \treturn\n }\n \n+func mergeLabelPropertiesToMap(logger logr.Logger) {\n+\tfile, err := os.Open(NAMESPACE_LABELS_PROPERTIES)", + "comment_created_at": "2021-03-24T23:11:05+00:00", + "comment_author": "Bobgy", + "comment_body": "Note this is a controller, not a service. It's not a big deal to restart an instance to update configs.\n\nRegarding live configmap refresh, I believe there is a config library -- viper which KFP is using too, it can be set-up to watch file/configmap changes, however I think that's not worth the complexity.", + "pr_file_module": null + }, + { + "comment_id": "604343502", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 5761, + "pr_file": "components/profile-controller/controllers/profile_controller.go", + "discussion_id": "600239852", + "commented_code": "@@ -616,15 +624,59 @@ func removeString(slice []string, s string) (result []string) {\n \treturn\n }\n \n+func mergeLabelPropertiesToMap(logger logr.Logger) {\n+\tfile, err := os.Open(NAMESPACE_LABELS_PROPERTIES)", + "comment_created_at": "2021-03-30T18:38:31+00:00", + "comment_author": "zijianjoy", + "comment_body": "Thank you for confirming! I think it makes sense and updated the code to reflect one-time file read. I would love to learn about viper later on.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "600240614", + "pr_number": 5761, + "pr_file": "components/profile-controller/controllers/profile_controller.go", + "created_at": "2021-03-24T07:48:03+00:00", + "commented_code": "const ROLE = \"role\"\nconst ADMIN = \"admin\"\n\n// External input for labels to be added to namespace.\nconst NAMESPACE_LABELS_PROPERTIES = \"/etc/config/labels/namespace-labels.properties\"", + "repo_full_name": "kubeflow/kubeflow", + "discussion_comments": [ + { + "comment_id": "600240614", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 5761, + "pr_file": "components/profile-controller/controllers/profile_controller.go", + "discussion_id": "600240614", + "commented_code": "@@ -55,6 +58,9 @@ const USER = \"user\"\n const ROLE = \"role\"\n const ADMIN = \"admin\"\n \n+// External input for labels to be added to namespace.\n+const NAMESPACE_LABELS_PROPERTIES = \"/etc/config/labels/namespace-labels.properties\"", + "comment_created_at": "2021-03-24T07:48:03+00:00", + "comment_author": "Bobgy", + "comment_body": "it's not a good practice to hard-code config file location, it's reasonable to specify a default location, but the default location should typically be overridable from a command line argument.", + "pr_file_module": null + }, + { + "comment_id": "600796354", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 5761, + "pr_file": "components/profile-controller/controllers/profile_controller.go", + "discussion_id": "600240614", + "commented_code": "@@ -55,6 +58,9 @@ const USER = \"user\"\n const ROLE = \"role\"\n const ADMIN = \"admin\"\n \n+// External input for labels to be added to namespace.\n+const NAMESPACE_LABELS_PROPERTIES = \"/etc/config/labels/namespace-labels.properties\"", + "comment_created_at": "2021-03-24T19:10:29+00:00", + "comment_author": "zijianjoy", + "comment_body": "Make sense, I changed it to a CLI argument with default value. Thank you!", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "332201398", + "pr_number": 4244, + "pr_file": "bootstrap/pkg/kfapp/coordinator/coordinator.go", + "created_at": "2019-10-07T19:42:49+00:00", + "commented_code": "// kftypesv3.LoadKfApp which will try and dynamically load a .so\n//\nfunc getPackageManager(kfdef *kfdefsv3.KfDef) (kftypesv3.KfApp, error) {\n\tswitch kfdef.Spec.PackageManager {\n\n\tpackageManager := kfdef.Spec.PackageManager", + "repo_full_name": "kubeflow/kubeflow", + "discussion_comments": [ + { + "comment_id": "332201398", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 4244, + "pr_file": "bootstrap/pkg/kfapp/coordinator/coordinator.go", + "discussion_id": "332201398", + "commented_code": "@@ -142,7 +142,13 @@ func (coord *coordinator) getPackageManagers(kfdef *kfdefsv3.KfDef) *map[string]\n // kftypesv3.LoadKfApp which will try and dynamically load a .so\n //\n func getPackageManager(kfdef *kfdefsv3.KfDef) (kftypesv3.KfApp, error) {\n-\tswitch kfdef.Spec.PackageManager {\n+\n+\tpackageManager := kfdef.Spec.PackageManager", + "comment_created_at": "2019-10-07T19:42:49+00:00", + "comment_author": "jlewi", + "comment_body": "Shouldn't the defaults be filled in before we call getPackageManager? Specifically when we call NewKfApp\r\nhttps://github.com/kubeflow/kubeflow/blob/2395c5fc0d384cf6dd4d7167bbe9010cd1315b54/bootstrap/pkg/kfapp/coordinator/coordinator.go#L468\r\n\r\nIsn't that where any initialization code that sets default values should happen? So that's where we could set PackageManager to Kustomize.", + "pr_file_module": null + }, + { + "comment_id": "332471183", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 4244, + "pr_file": "bootstrap/pkg/kfapp/coordinator/coordinator.go", + "discussion_id": "332201398", + "commented_code": "@@ -142,7 +142,13 @@ func (coord *coordinator) getPackageManagers(kfdef *kfdefsv3.KfDef) *map[string]\n // kftypesv3.LoadKfApp which will try and dynamically load a .so\n //\n func getPackageManager(kfdef *kfdefsv3.KfDef) (kftypesv3.KfApp, error) {\n-\tswitch kfdef.Spec.PackageManager {\n+\n+\tpackageManager := kfdef.Spec.PackageManager", + "comment_created_at": "2019-10-08T11:58:12+00:00", + "comment_author": "yanniszark", + "comment_body": "@jlewi `NewKfApp` isn't used anymore.\r\nI don't think we have a clear way of doing defaulting right now.\r\nRight now we have the following call chains:\r\n\r\nbuild: `BuildKfAppFromURI` -> `NewLoadKfAppFromURI` which loads a KfDef (`LoadKFDefFromURI`), persists it as app.yaml (`CreateKfAppCfgFile`) and then loads it (`LoadKfAppCfgFile`).\r\napply: `LoadKfAppCfgFile`\r\n\r\nIMO it makes most sense to have it inside `NewLoadKfAppFromURI`, I see that there are also some GCP checks in there.", + "pr_file_module": null + }, + { + "comment_id": "332499138", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 4244, + "pr_file": "bootstrap/pkg/kfapp/coordinator/coordinator.go", + "discussion_id": "332201398", + "commented_code": "@@ -142,7 +142,13 @@ func (coord *coordinator) getPackageManagers(kfdef *kfdefsv3.KfDef) *map[string]\n // kftypesv3.LoadKfApp which will try and dynamically load a .so\n //\n func getPackageManager(kfdef *kfdefsv3.KfDef) (kftypesv3.KfApp, error) {\n-\tswitch kfdef.Spec.PackageManager {\n+\n+\tpackageManager := kfdef.Spec.PackageManager", + "comment_created_at": "2019-10-08T13:05:21+00:00", + "comment_author": "jlewi", + "comment_body": "Doing it in the new functions seems better. Since we have multiple entry points I might suggest creating a new function \"initDefaults\" or something like that, that can be called from multiple entry points if needed.\r\n\r\n", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/kubeflow-load-configurations-efficiently.md b/_reviewers/kubeflow-load-configurations-efficiently.md index 1282286..74f26dd 100644 --- a/_reviewers/kubeflow-load-configurations-efficiently.md +++ b/_reviewers/kubeflow-load-configurations-efficiently.md @@ -74,179 +74,3 @@ func main() { 4. **Set sensible defaults** for configuration values but provide clear ways to override them through explicit configuration. These practices help avoid reinventing parsing logic, improve performance by reducing unnecessary I/O operations, and make your components more configurable across different environments. - - -[ - { - "discussion_id": "600238600", - "pr_number": 5761, - "pr_file": "components/profile-controller/controllers/profile_controller.go", - "created_at": "2021-03-24T07:44:10+00:00", - "commented_code": "return\n}\n\nfunc mergeLabelPropertiesToMap(logger logr.Logger) {\n\tfile, err := os.Open(NAMESPACE_LABELS_PROPERTIES)\n\tif err != nil {\n\t\tlogger.Info(\"namespace labels properties file doesn't exist, using default value\")\n\t}\n\tdefer file.Close()\n\n\tscanner := bufio.NewScanner(file)\n\tfor scanner.Scan() {\n\t\tlabel := scanner.Text()\n\t\tarr := strings.Split(label, \"=\")\n\t\tif len(arr) > 2 || len(arr) == 0 {\n\t\t\tlogger.Info(\"label config format incorrect, should be {key}={value}\", \"label\", label)\n\t\t} else {\n\t\t\tif arr[0] == \"\" {\n\t\t\t\tlogger.Info(\"label key should not be empty\", \"label\", label)\n\t\t\t} else if len(arr) == 2 {\n\t\t\t\t// Add or overwrite label map if value exists.\n\t\t\t\tkubeflowNamespaceLabels[arr[0]] = arr[1]\n\t\t\t} else {\n\t\t\t\t// Set value to be empty if nothing exists after \"=\".\n\t\t\t\t// It means this label needs to be removed.\n\t\t\t\tkubeflowNamespaceLabels[arr[0]] = \"\"\n\t\t\t}\n\t\t}\n\t}", - "repo_full_name": "kubeflow/kubeflow", - "discussion_comments": [ - { - "comment_id": "600238600", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 5761, - "pr_file": "components/profile-controller/controllers/profile_controller.go", - "discussion_id": "600238600", - "commented_code": "@@ -616,15 +624,59 @@ func removeString(slice []string, s string) (result []string) {\n \treturn\n }\n \n+func mergeLabelPropertiesToMap(logger logr.Logger) {\n+\tfile, err := os.Open(NAMESPACE_LABELS_PROPERTIES)\n+\tif err != nil {\n+\t\tlogger.Info(\"namespace labels properties file doesn't exist, using default value\")\n+\t}\n+\tdefer file.Close()\n+\n+\tscanner := bufio.NewScanner(file)\n+\tfor scanner.Scan() {\n+\t\tlabel := scanner.Text()\n+\t\tarr := strings.Split(label, \"=\")\n+\t\tif len(arr) > 2 || len(arr) == 0 {\n+\t\t\tlogger.Info(\"label config format incorrect, should be {key}={value}\", \"label\", label)\n+\t\t} else {\n+\t\t\tif arr[0] == \"\" {\n+\t\t\t\tlogger.Info(\"label key should not be empty\", \"label\", label)\n+\t\t\t} else if len(arr) == 2 {\n+\t\t\t\t// Add or overwrite label map if value exists.\n+\t\t\t\tkubeflowNamespaceLabels[arr[0]] = arr[1]\n+\t\t\t} else {\n+\t\t\t\t// Set value to be empty if nothing exists after \"=\".\n+\t\t\t\t// It means this label needs to be removed.\n+\t\t\t\tkubeflowNamespaceLabels[arr[0]] = \"\"\n+\t\t\t}\n+\t\t}\n+\t}", - "comment_created_at": "2021-03-24T07:44:10+00:00", - "comment_author": "Bobgy", - "comment_body": "It's a nice implementation of the syntax you are proposing.\r\n\r\nHowever, it seems a little unnecessary if we can just use YAML format as the config format, so people do not need to reinvent the wheels and consumers won't guess about how the format is parsed. There are standard libraries for parsing YAML in go: https://github.com/go-yaml/yaml/tree/v2.\r\n\r\nUsing the standards mean that e.g. it's very easy to insert comments in the configfile, without you needing to implement logic to ignore comments.", - "pr_file_module": null - }, - { - "comment_id": "600794348", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 5761, - "pr_file": "components/profile-controller/controllers/profile_controller.go", - "discussion_id": "600238600", - "commented_code": "@@ -616,15 +624,59 @@ func removeString(slice []string, s string) (result []string) {\n \treturn\n }\n \n+func mergeLabelPropertiesToMap(logger logr.Logger) {\n+\tfile, err := os.Open(NAMESPACE_LABELS_PROPERTIES)\n+\tif err != nil {\n+\t\tlogger.Info(\"namespace labels properties file doesn't exist, using default value\")\n+\t}\n+\tdefer file.Close()\n+\n+\tscanner := bufio.NewScanner(file)\n+\tfor scanner.Scan() {\n+\t\tlabel := scanner.Text()\n+\t\tarr := strings.Split(label, \"=\")\n+\t\tif len(arr) > 2 || len(arr) == 0 {\n+\t\t\tlogger.Info(\"label config format incorrect, should be {key}={value}\", \"label\", label)\n+\t\t} else {\n+\t\t\tif arr[0] == \"\" {\n+\t\t\t\tlogger.Info(\"label key should not be empty\", \"label\", label)\n+\t\t\t} else if len(arr) == 2 {\n+\t\t\t\t// Add or overwrite label map if value exists.\n+\t\t\t\tkubeflowNamespaceLabels[arr[0]] = arr[1]\n+\t\t\t} else {\n+\t\t\t\t// Set value to be empty if nothing exists after \"=\".\n+\t\t\t\t// It means this label needs to be removed.\n+\t\t\t\tkubeflowNamespaceLabels[arr[0]] = \"\"\n+\t\t\t}\n+\t\t}\n+\t}", - "comment_created_at": "2021-03-24T19:08:28+00:00", - "comment_author": "zijianjoy", - "comment_body": "Thank you for the suggestion! I have switched to yaml format in new commit.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "600239852", - "pr_number": 5761, - "pr_file": "components/profile-controller/controllers/profile_controller.go", - "created_at": "2021-03-24T07:46:37+00:00", - "commented_code": "return\n}\n\nfunc mergeLabelPropertiesToMap(logger logr.Logger) {\n\tfile, err := os.Open(NAMESPACE_LABELS_PROPERTIES)", - "repo_full_name": "kubeflow/kubeflow", - "discussion_comments": [ - { - "comment_id": "600239852", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 5761, - "pr_file": "components/profile-controller/controllers/profile_controller.go", - "discussion_id": "600239852", - "commented_code": "@@ -616,15 +624,59 @@ func removeString(slice []string, s string) (result []string) {\n \treturn\n }\n \n+func mergeLabelPropertiesToMap(logger logr.Logger) {\n+\tfile, err := os.Open(NAMESPACE_LABELS_PROPERTIES)", - "comment_created_at": "2021-03-24T07:46:37+00:00", - "comment_author": "Bobgy", - "comment_body": "A controller runs in reconcile loops, this method is called very very often, so it'd be better to load the config file at controller startup and fail the controller if any errors show up.", - "pr_file_module": null - }, - { - "comment_id": "600795933", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 5761, - "pr_file": "components/profile-controller/controllers/profile_controller.go", - "discussion_id": "600239852", - "commented_code": "@@ -616,15 +624,59 @@ func removeString(slice []string, s string) (result []string) {\n \treturn\n }\n \n+func mergeLabelPropertiesToMap(logger logr.Logger) {\n+\tfile, err := os.Open(NAMESPACE_LABELS_PROPERTIES)", - "comment_created_at": "2021-03-24T19:10:02+00:00", - "comment_author": "zijianjoy", - "comment_body": "If the configmap is updated, we want to read from the latest data instead of cache for label configuration. So we will need to read from VolumeMount. Is it very resource intense to read a file? If yes, what is your suggestion of reading new ConfigMap data?", - "pr_file_module": null - }, - { - "comment_id": "600933543", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 5761, - "pr_file": "components/profile-controller/controllers/profile_controller.go", - "discussion_id": "600239852", - "commented_code": "@@ -616,15 +624,59 @@ func removeString(slice []string, s string) (result []string) {\n \treturn\n }\n \n+func mergeLabelPropertiesToMap(logger logr.Logger) {\n+\tfile, err := os.Open(NAMESPACE_LABELS_PROPERTIES)", - "comment_created_at": "2021-03-24T23:11:05+00:00", - "comment_author": "Bobgy", - "comment_body": "Note this is a controller, not a service. It's not a big deal to restart an instance to update configs.\n\nRegarding live configmap refresh, I believe there is a config library -- viper which KFP is using too, it can be set-up to watch file/configmap changes, however I think that's not worth the complexity.", - "pr_file_module": null - }, - { - "comment_id": "604343502", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 5761, - "pr_file": "components/profile-controller/controllers/profile_controller.go", - "discussion_id": "600239852", - "commented_code": "@@ -616,15 +624,59 @@ func removeString(slice []string, s string) (result []string) {\n \treturn\n }\n \n+func mergeLabelPropertiesToMap(logger logr.Logger) {\n+\tfile, err := os.Open(NAMESPACE_LABELS_PROPERTIES)", - "comment_created_at": "2021-03-30T18:38:31+00:00", - "comment_author": "zijianjoy", - "comment_body": "Thank you for confirming! I think it makes sense and updated the code to reflect one-time file read. I would love to learn about viper later on.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "600240614", - "pr_number": 5761, - "pr_file": "components/profile-controller/controllers/profile_controller.go", - "created_at": "2021-03-24T07:48:03+00:00", - "commented_code": "const ROLE = \"role\"\nconst ADMIN = \"admin\"\n\n// External input for labels to be added to namespace.\nconst NAMESPACE_LABELS_PROPERTIES = \"/etc/config/labels/namespace-labels.properties\"", - "repo_full_name": "kubeflow/kubeflow", - "discussion_comments": [ - { - "comment_id": "600240614", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 5761, - "pr_file": "components/profile-controller/controllers/profile_controller.go", - "discussion_id": "600240614", - "commented_code": "@@ -55,6 +58,9 @@ const USER = \"user\"\n const ROLE = \"role\"\n const ADMIN = \"admin\"\n \n+// External input for labels to be added to namespace.\n+const NAMESPACE_LABELS_PROPERTIES = \"/etc/config/labels/namespace-labels.properties\"", - "comment_created_at": "2021-03-24T07:48:03+00:00", - "comment_author": "Bobgy", - "comment_body": "it's not a good practice to hard-code config file location, it's reasonable to specify a default location, but the default location should typically be overridable from a command line argument.", - "pr_file_module": null - }, - { - "comment_id": "600796354", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 5761, - "pr_file": "components/profile-controller/controllers/profile_controller.go", - "discussion_id": "600240614", - "commented_code": "@@ -55,6 +58,9 @@ const USER = \"user\"\n const ROLE = \"role\"\n const ADMIN = \"admin\"\n \n+// External input for labels to be added to namespace.\n+const NAMESPACE_LABELS_PROPERTIES = \"/etc/config/labels/namespace-labels.properties\"", - "comment_created_at": "2021-03-24T19:10:29+00:00", - "comment_author": "zijianjoy", - "comment_body": "Make sense, I changed it to a CLI argument with default value. Thank you!", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "332201398", - "pr_number": 4244, - "pr_file": "bootstrap/pkg/kfapp/coordinator/coordinator.go", - "created_at": "2019-10-07T19:42:49+00:00", - "commented_code": "// kftypesv3.LoadKfApp which will try and dynamically load a .so\n//\nfunc getPackageManager(kfdef *kfdefsv3.KfDef) (kftypesv3.KfApp, error) {\n\tswitch kfdef.Spec.PackageManager {\n\n\tpackageManager := kfdef.Spec.PackageManager", - "repo_full_name": "kubeflow/kubeflow", - "discussion_comments": [ - { - "comment_id": "332201398", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 4244, - "pr_file": "bootstrap/pkg/kfapp/coordinator/coordinator.go", - "discussion_id": "332201398", - "commented_code": "@@ -142,7 +142,13 @@ func (coord *coordinator) getPackageManagers(kfdef *kfdefsv3.KfDef) *map[string]\n // kftypesv3.LoadKfApp which will try and dynamically load a .so\n //\n func getPackageManager(kfdef *kfdefsv3.KfDef) (kftypesv3.KfApp, error) {\n-\tswitch kfdef.Spec.PackageManager {\n+\n+\tpackageManager := kfdef.Spec.PackageManager", - "comment_created_at": "2019-10-07T19:42:49+00:00", - "comment_author": "jlewi", - "comment_body": "Shouldn't the defaults be filled in before we call getPackageManager? Specifically when we call NewKfApp\r\nhttps://github.com/kubeflow/kubeflow/blob/2395c5fc0d384cf6dd4d7167bbe9010cd1315b54/bootstrap/pkg/kfapp/coordinator/coordinator.go#L468\r\n\r\nIsn't that where any initialization code that sets default values should happen? So that's where we could set PackageManager to Kustomize.", - "pr_file_module": null - }, - { - "comment_id": "332471183", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 4244, - "pr_file": "bootstrap/pkg/kfapp/coordinator/coordinator.go", - "discussion_id": "332201398", - "commented_code": "@@ -142,7 +142,13 @@ func (coord *coordinator) getPackageManagers(kfdef *kfdefsv3.KfDef) *map[string]\n // kftypesv3.LoadKfApp which will try and dynamically load a .so\n //\n func getPackageManager(kfdef *kfdefsv3.KfDef) (kftypesv3.KfApp, error) {\n-\tswitch kfdef.Spec.PackageManager {\n+\n+\tpackageManager := kfdef.Spec.PackageManager", - "comment_created_at": "2019-10-08T11:58:12+00:00", - "comment_author": "yanniszark", - "comment_body": "@jlewi `NewKfApp` isn't used anymore.\r\nI don't think we have a clear way of doing defaulting right now.\r\nRight now we have the following call chains:\r\n\r\nbuild: `BuildKfAppFromURI` -> `NewLoadKfAppFromURI` which loads a KfDef (`LoadKFDefFromURI`), persists it as app.yaml (`CreateKfAppCfgFile`) and then loads it (`LoadKfAppCfgFile`).\r\napply: `LoadKfAppCfgFile`\r\n\r\nIMO it makes most sense to have it inside `NewLoadKfAppFromURI`, I see that there are also some GCP checks in there.", - "pr_file_module": null - }, - { - "comment_id": "332499138", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 4244, - "pr_file": "bootstrap/pkg/kfapp/coordinator/coordinator.go", - "discussion_id": "332201398", - "commented_code": "@@ -142,7 +142,13 @@ func (coord *coordinator) getPackageManagers(kfdef *kfdefsv3.KfDef) *map[string]\n // kftypesv3.LoadKfApp which will try and dynamically load a .so\n //\n func getPackageManager(kfdef *kfdefsv3.KfDef) (kftypesv3.KfApp, error) {\n-\tswitch kfdef.Spec.PackageManager {\n+\n+\tpackageManager := kfdef.Spec.PackageManager", - "comment_created_at": "2019-10-08T13:05:21+00:00", - "comment_author": "jlewi", - "comment_body": "Doing it in the new functions seems better. Since we have multiple entry points I might suggest creating a new function \"initDefaults\" or something like that, that can be called from multiple entry points if needed.\r\n\r\n", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/kubeflow-manage-configuration-changes.json b/_reviewers/kubeflow-manage-configuration-changes.json new file mode 100644 index 0000000..d624f84 --- /dev/null +++ b/_reviewers/kubeflow-manage-configuration-changes.json @@ -0,0 +1,116 @@ +[ + { + "discussion_id": "774008435", + "pr_number": 6258, + "pr_file": "components/centraldashboard/tsconfig.json", + "created_at": "2021-12-22T16:15:56+00:00", + "commented_code": "\"compilerOptions\":{\n \"module\":\"commonjs\",\n \"esModuleInterop\":true,\n \"target\":\"es6\",\n \"target\":\"es2020\",\n \"noImplicitAny\":true,", + "repo_full_name": "kubeflow/kubeflow", + "discussion_comments": [ + { + "comment_id": "774008435", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 6258, + "pr_file": "components/centraldashboard/tsconfig.json", + "discussion_id": "774008435", + "commented_code": "@@ -2,7 +2,7 @@\n \"compilerOptions\":{\n \"module\":\"commonjs\",\n \"esModuleInterop\":true,\n- \"target\":\"es6\",\n+ \"target\":\"es2020\",\n \"noImplicitAny\":true,", + "comment_created_at": "2021-12-22T16:15:56+00:00", + "comment_author": "haoxins", + "comment_body": "Change to `es2020` because of the latest K8s Node.js client used `BigInt` (Node.js 12 supported).", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "593452634", + "pr_number": 5693, + "pr_file": "components/crud-web-apps/tensorboards/frontend/package.json", + "created_at": "2021-03-12T21:11:39+00:00", + "commented_code": "\"scripts\": {\n \"ng\": \"ng\",\n \"start\": \"npm run copyLibAssets && ng serve --base-href /tensorboard/ --deploy-url /tensorboard/\",\n \"build\": \"npm run copyLibAssets && ng build --prod --base-href /tensorboard/ --deploy-url /tensorboard/static/\",\n \"build\": \"npm run copyLibAssets && ng build --prod --base-href /tensorboards/ --deploy-url static/\",", + "repo_full_name": "kubeflow/kubeflow", + "discussion_comments": [ + { + "comment_id": "593452634", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 5693, + "pr_file": "components/crud-web-apps/tensorboards/frontend/package.json", + "discussion_id": "593452634", + "commented_code": "@@ -4,7 +4,7 @@\n \"scripts\": {\n \"ng\": \"ng\",\n \"start\": \"npm run copyLibAssets && ng serve --base-href /tensorboard/ --deploy-url /tensorboard/\",\n- \"build\": \"npm run copyLibAssets && ng build --prod --base-href /tensorboard/ --deploy-url /tensorboard/static/\",\n+ \"build\": \"npm run copyLibAssets && ng build --prod --base-href /tensorboards/ --deploy-url static/\",", + "comment_created_at": "2021-03-12T21:11:39+00:00", + "comment_author": "davidspek", + "comment_body": "In the line above the base-href is `/tensorboard/` instead of `/tensorboards/` which it is here. ", + "pr_file_module": null + }, + { + "comment_id": "595241172", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 5693, + "pr_file": "components/crud-web-apps/tensorboards/frontend/package.json", + "discussion_id": "593452634", + "commented_code": "@@ -4,7 +4,7 @@\n \"scripts\": {\n \"ng\": \"ng\",\n \"start\": \"npm run copyLibAssets && ng serve --base-href /tensorboard/ --deploy-url /tensorboard/\",\n- \"build\": \"npm run copyLibAssets && ng build --prod --base-href /tensorboard/ --deploy-url /tensorboard/static/\",\n+ \"build\": \"npm run copyLibAssets && ng build --prod --base-href /tensorboards/ --deploy-url static/\",", + "comment_created_at": "2021-03-16T14:48:02+00:00", + "comment_author": "kimwnasptd", + "comment_body": "I'm not sure I understand what the question or suggested change is here. Could you rephrase this?", + "pr_file_module": null + }, + { + "comment_id": "595266057", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 5693, + "pr_file": "components/crud-web-apps/tensorboards/frontend/package.json", + "discussion_id": "593452634", + "commented_code": "@@ -4,7 +4,7 @@\n \"scripts\": {\n \"ng\": \"ng\",\n \"start\": \"npm run copyLibAssets && ng serve --base-href /tensorboard/ --deploy-url /tensorboard/\",\n- \"build\": \"npm run copyLibAssets && ng build --prod --base-href /tensorboard/ --deploy-url /tensorboard/static/\",\n+ \"build\": \"npm run copyLibAssets && ng build --prod --base-href /tensorboards/ --deploy-url static/\",", + "comment_created_at": "2021-03-16T15:12:21+00:00", + "comment_author": "davidspek", + "comment_body": "There is an inconsistency in the usage of `--base-href /tensorboard/ --deploy-url /tensorboard/\"` on Line 6 and `--base-href /tensorboards/` on Line 7.", + "pr_file_module": null + }, + { + "comment_id": "595303549", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 5693, + "pr_file": "components/crud-web-apps/tensorboards/frontend/package.json", + "discussion_id": "593452634", + "commented_code": "@@ -4,7 +4,7 @@\n \"scripts\": {\n \"ng\": \"ng\",\n \"start\": \"npm run copyLibAssets && ng serve --base-href /tensorboard/ --deploy-url /tensorboard/\",\n- \"build\": \"npm run copyLibAssets && ng build --prod --base-href /tensorboard/ --deploy-url /tensorboard/static/\",\n+ \"build\": \"npm run copyLibAssets && ng build --prod --base-href /tensorboards/ --deploy-url static/\",", + "comment_created_at": "2021-03-16T15:51:35+00:00", + "comment_author": "kimwnasptd", + "comment_body": "I see what you mean. The `start` script should be completely removed, since the users should run the UI locally with `build:watch` for now.\r\n\r\nI'll push a commit to remove this script", + "pr_file_module": null + }, + { + "comment_id": "595369242", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 5693, + "pr_file": "components/crud-web-apps/tensorboards/frontend/package.json", + "discussion_id": "593452634", + "commented_code": "@@ -4,7 +4,7 @@\n \"scripts\": {\n \"ng\": \"ng\",\n \"start\": \"npm run copyLibAssets && ng serve --base-href /tensorboard/ --deploy-url /tensorboard/\",\n- \"build\": \"npm run copyLibAssets && ng build --prod --base-href /tensorboard/ --deploy-url /tensorboard/static/\",\n+ \"build\": \"npm run copyLibAssets && ng build --prod --base-href /tensorboards/ --deploy-url static/\",", + "comment_created_at": "2021-03-16T17:02:35+00:00", + "comment_author": "kimwnasptd", + "comment_body": "Added a commit that removes the unused npm script", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "612292983", + "pr_number": 5823, + "pr_file": "components/crud-web-apps/jupyter/frontend/package.json", + "created_at": "2021-04-13T09:39:46+00:00", + "commented_code": "\"@fortawesome/free-brands-svg-icons\": \"^5.12.0\",\n \"@fortawesome/free-solid-svg-icons\": \"^5.12.0\",\n \"@kubernetes/client-node\": \"^0.12.2\",\n \"hammerjs\": \"^2.0.8\",\n \"date-fns\": \"^1.29.0\",\n \"lodash-es\": \"^4.17.11\",", + "repo_full_name": "kubeflow/kubeflow", + "discussion_comments": [ + { + "comment_id": "612292983", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 5823, + "pr_file": "components/crud-web-apps/jupyter/frontend/package.json", + "discussion_id": "612292983", + "commented_code": "@@ -32,10 +32,10 @@\n \"@fortawesome/free-brands-svg-icons\": \"^5.12.0\",\n \"@fortawesome/free-solid-svg-icons\": \"^5.12.0\",\n \"@kubernetes/client-node\": \"^0.12.2\",\n- \"hammerjs\": \"^2.0.8\",\n \"date-fns\": \"^1.29.0\",\n- \"lodash-es\": \"^4.17.11\",", + "comment_created_at": "2021-04-13T09:39:46+00:00", + "comment_author": "kimwnasptd", + "comment_body": "Let's exclude this change from this PR and keep the touched lines to the minimum. This will also remove the 15.000 modified lines to the `package-lock.json` because of this change.\r\n\r\nAlthough I'd like to progressively replace the use of `lodash` with `lodash-es`, but until I get to it we can remove this from the `package.json`, but lets just do it in another PR.", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/kubeflow-manage-configuration-changes.md b/_reviewers/kubeflow-manage-configuration-changes.md index 554a98d..6e1b797 100644 --- a/_reviewers/kubeflow-manage-configuration-changes.md +++ b/_reviewers/kubeflow-manage-configuration-changes.md @@ -24,121 +24,3 @@ Carefully manage configuration file changes to ensure consistency and minimize u ``` 3) Isolate high-impact configuration changes (like dependency updates that affect package-lock.json) into separate PRs to maintain reviewability and minimize unintended side effects - - -[ - { - "discussion_id": "774008435", - "pr_number": 6258, - "pr_file": "components/centraldashboard/tsconfig.json", - "created_at": "2021-12-22T16:15:56+00:00", - "commented_code": "\"compilerOptions\":{\n \"module\":\"commonjs\",\n \"esModuleInterop\":true,\n \"target\":\"es6\",\n \"target\":\"es2020\",\n \"noImplicitAny\":true,", - "repo_full_name": "kubeflow/kubeflow", - "discussion_comments": [ - { - "comment_id": "774008435", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 6258, - "pr_file": "components/centraldashboard/tsconfig.json", - "discussion_id": "774008435", - "commented_code": "@@ -2,7 +2,7 @@\n \"compilerOptions\":{\n \"module\":\"commonjs\",\n \"esModuleInterop\":true,\n- \"target\":\"es6\",\n+ \"target\":\"es2020\",\n \"noImplicitAny\":true,", - "comment_created_at": "2021-12-22T16:15:56+00:00", - "comment_author": "haoxins", - "comment_body": "Change to `es2020` because of the latest K8s Node.js client used `BigInt` (Node.js 12 supported).", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "593452634", - "pr_number": 5693, - "pr_file": "components/crud-web-apps/tensorboards/frontend/package.json", - "created_at": "2021-03-12T21:11:39+00:00", - "commented_code": "\"scripts\": {\n \"ng\": \"ng\",\n \"start\": \"npm run copyLibAssets && ng serve --base-href /tensorboard/ --deploy-url /tensorboard/\",\n \"build\": \"npm run copyLibAssets && ng build --prod --base-href /tensorboard/ --deploy-url /tensorboard/static/\",\n \"build\": \"npm run copyLibAssets && ng build --prod --base-href /tensorboards/ --deploy-url static/\",", - "repo_full_name": "kubeflow/kubeflow", - "discussion_comments": [ - { - "comment_id": "593452634", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 5693, - "pr_file": "components/crud-web-apps/tensorboards/frontend/package.json", - "discussion_id": "593452634", - "commented_code": "@@ -4,7 +4,7 @@\n \"scripts\": {\n \"ng\": \"ng\",\n \"start\": \"npm run copyLibAssets && ng serve --base-href /tensorboard/ --deploy-url /tensorboard/\",\n- \"build\": \"npm run copyLibAssets && ng build --prod --base-href /tensorboard/ --deploy-url /tensorboard/static/\",\n+ \"build\": \"npm run copyLibAssets && ng build --prod --base-href /tensorboards/ --deploy-url static/\",", - "comment_created_at": "2021-03-12T21:11:39+00:00", - "comment_author": "davidspek", - "comment_body": "In the line above the base-href is `/tensorboard/` instead of `/tensorboards/` which it is here. ", - "pr_file_module": null - }, - { - "comment_id": "595241172", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 5693, - "pr_file": "components/crud-web-apps/tensorboards/frontend/package.json", - "discussion_id": "593452634", - "commented_code": "@@ -4,7 +4,7 @@\n \"scripts\": {\n \"ng\": \"ng\",\n \"start\": \"npm run copyLibAssets && ng serve --base-href /tensorboard/ --deploy-url /tensorboard/\",\n- \"build\": \"npm run copyLibAssets && ng build --prod --base-href /tensorboard/ --deploy-url /tensorboard/static/\",\n+ \"build\": \"npm run copyLibAssets && ng build --prod --base-href /tensorboards/ --deploy-url static/\",", - "comment_created_at": "2021-03-16T14:48:02+00:00", - "comment_author": "kimwnasptd", - "comment_body": "I'm not sure I understand what the question or suggested change is here. Could you rephrase this?", - "pr_file_module": null - }, - { - "comment_id": "595266057", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 5693, - "pr_file": "components/crud-web-apps/tensorboards/frontend/package.json", - "discussion_id": "593452634", - "commented_code": "@@ -4,7 +4,7 @@\n \"scripts\": {\n \"ng\": \"ng\",\n \"start\": \"npm run copyLibAssets && ng serve --base-href /tensorboard/ --deploy-url /tensorboard/\",\n- \"build\": \"npm run copyLibAssets && ng build --prod --base-href /tensorboard/ --deploy-url /tensorboard/static/\",\n+ \"build\": \"npm run copyLibAssets && ng build --prod --base-href /tensorboards/ --deploy-url static/\",", - "comment_created_at": "2021-03-16T15:12:21+00:00", - "comment_author": "davidspek", - "comment_body": "There is an inconsistency in the usage of `--base-href /tensorboard/ --deploy-url /tensorboard/\"` on Line 6 and `--base-href /tensorboards/` on Line 7.", - "pr_file_module": null - }, - { - "comment_id": "595303549", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 5693, - "pr_file": "components/crud-web-apps/tensorboards/frontend/package.json", - "discussion_id": "593452634", - "commented_code": "@@ -4,7 +4,7 @@\n \"scripts\": {\n \"ng\": \"ng\",\n \"start\": \"npm run copyLibAssets && ng serve --base-href /tensorboard/ --deploy-url /tensorboard/\",\n- \"build\": \"npm run copyLibAssets && ng build --prod --base-href /tensorboard/ --deploy-url /tensorboard/static/\",\n+ \"build\": \"npm run copyLibAssets && ng build --prod --base-href /tensorboards/ --deploy-url static/\",", - "comment_created_at": "2021-03-16T15:51:35+00:00", - "comment_author": "kimwnasptd", - "comment_body": "I see what you mean. The `start` script should be completely removed, since the users should run the UI locally with `build:watch` for now.\r\n\r\nI'll push a commit to remove this script", - "pr_file_module": null - }, - { - "comment_id": "595369242", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 5693, - "pr_file": "components/crud-web-apps/tensorboards/frontend/package.json", - "discussion_id": "593452634", - "commented_code": "@@ -4,7 +4,7 @@\n \"scripts\": {\n \"ng\": \"ng\",\n \"start\": \"npm run copyLibAssets && ng serve --base-href /tensorboard/ --deploy-url /tensorboard/\",\n- \"build\": \"npm run copyLibAssets && ng build --prod --base-href /tensorboard/ --deploy-url /tensorboard/static/\",\n+ \"build\": \"npm run copyLibAssets && ng build --prod --base-href /tensorboards/ --deploy-url static/\",", - "comment_created_at": "2021-03-16T17:02:35+00:00", - "comment_author": "kimwnasptd", - "comment_body": "Added a commit that removes the unused npm script", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "612292983", - "pr_number": 5823, - "pr_file": "components/crud-web-apps/jupyter/frontend/package.json", - "created_at": "2021-04-13T09:39:46+00:00", - "commented_code": "\"@fortawesome/free-brands-svg-icons\": \"^5.12.0\",\n \"@fortawesome/free-solid-svg-icons\": \"^5.12.0\",\n \"@kubernetes/client-node\": \"^0.12.2\",\n \"hammerjs\": \"^2.0.8\",\n \"date-fns\": \"^1.29.0\",\n \"lodash-es\": \"^4.17.11\",", - "repo_full_name": "kubeflow/kubeflow", - "discussion_comments": [ - { - "comment_id": "612292983", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 5823, - "pr_file": "components/crud-web-apps/jupyter/frontend/package.json", - "discussion_id": "612292983", - "commented_code": "@@ -32,10 +32,10 @@\n \"@fortawesome/free-brands-svg-icons\": \"^5.12.0\",\n \"@fortawesome/free-solid-svg-icons\": \"^5.12.0\",\n \"@kubernetes/client-node\": \"^0.12.2\",\n- \"hammerjs\": \"^2.0.8\",\n \"date-fns\": \"^1.29.0\",\n- \"lodash-es\": \"^4.17.11\",", - "comment_created_at": "2021-04-13T09:39:46+00:00", - "comment_author": "kimwnasptd", - "comment_body": "Let's exclude this change from this PR and keep the touched lines to the minimum. This will also remove the 15.000 modified lines to the `package-lock.json` because of this change.\r\n\r\nAlthough I'd like to progressively replace the use of `lodash` with `lodash-es`, but until I get to it we can remove this from the `package.json`, but lets just do it in another PR.", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/kubeflow-mark-ui-text-i18n.json b/_reviewers/kubeflow-mark-ui-text-i18n.json new file mode 100644 index 0000000..a69551c --- /dev/null +++ b/_reviewers/kubeflow-mark-ui-text-i18n.json @@ -0,0 +1,104 @@ +[ + { + "discussion_id": "687156585", + "pr_number": 6065, + "pr_file": "components/crud-web-apps/common/frontend/kubeflow-common-lib/projects/kubeflow/src/lib/form/name-namespace-inputs/name-input/name-input.component.html", + "created_at": "2021-08-11T20:17:17+00:00", + "commented_code": "\n {{ 'common.name' | translate }}\n \n {{ nameError().key | translate: nameError().params }}\n Name\n ", + "repo_full_name": "kubeflow/kubeflow", + "discussion_comments": [ + { + "comment_id": "687156585", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 6065, + "pr_file": "components/crud-web-apps/common/frontend/kubeflow-common-lib/projects/kubeflow/src/lib/form/name-namespace-inputs/name-input/name-input.component.html", + "discussion_id": "687156585", + "commented_code": "@@ -1,9 +1,5 @@\n \n- {{ 'common.name' | translate }}\n- \n- {{ nameError().key | translate: nameError().params }}\n+ Name\n+ ", + "comment_created_at": "2021-08-11T20:17:17+00:00", + "comment_author": "tasos-ale", + "comment_body": "You forgot to mark the placeholder for translation here.", + "pr_file_module": null + }, + { + "comment_id": "687546076", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 6065, + "pr_file": "components/crud-web-apps/common/frontend/kubeflow-common-lib/projects/kubeflow/src/lib/form/name-namespace-inputs/name-input/name-input.component.html", + "discussion_id": "687156585", + "commented_code": "@@ -1,9 +1,5 @@\n \n- {{ 'common.name' | translate }}\n- \n- {{ nameError().key | translate: nameError().params }}\n+ Name\n+ ", + "comment_created_at": "2021-08-12T09:28:59+00:00", + "comment_author": "kimwnasptd", + "comment_body": "Pushed a commit", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "687158053", + "pr_number": 6065, + "pr_file": "components/crud-web-apps/jupyter/frontend/src/app/pages/form/form-default/form-image/form-image.component.html", + "created_at": "2021-08-11T20:18:46+00:00", + "commented_code": "{{ imageDisplayName(img) }}\n \n \n \n {{ 'jupyter.formImage.errorImageRequired' | translate }}\n \n Please provide an Image to use\n \n\n \n {{ 'jupyter.formImage.customImage' | translate }}\n Custom Image\n \n \n- \n- {{ 'jupyter.formImage.errorImageRequired' | translate }}\n- \n+ Please provide an Image to use\n \n \n \n- {{ 'jupyter.formImage.customImage' | translate }}\n+ Custom Image\n \n \n- \n- {{ 'jupyter.formImage.errorImageRequired' | translate }}\n- \n+ Please provide an Image to use\n \n \n \n- {{ 'jupyter.formImage.customImage' | translate }}\n+ Custom Image\n \n {{ 'common.name' | translate }}\n \n {{ nameError().key | translate: nameError().params }}\n Name\n ", - "repo_full_name": "kubeflow/kubeflow", - "discussion_comments": [ - { - "comment_id": "687156585", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 6065, - "pr_file": "components/crud-web-apps/common/frontend/kubeflow-common-lib/projects/kubeflow/src/lib/form/name-namespace-inputs/name-input/name-input.component.html", - "discussion_id": "687156585", - "commented_code": "@@ -1,9 +1,5 @@\n \n- {{ 'common.name' | translate }}\n- \n- {{ nameError().key | translate: nameError().params }}\n+ Name\n+ ", - "comment_created_at": "2021-08-11T20:17:17+00:00", - "comment_author": "tasos-ale", - "comment_body": "You forgot to mark the placeholder for translation here.", - "pr_file_module": null - }, - { - "comment_id": "687546076", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 6065, - "pr_file": "components/crud-web-apps/common/frontend/kubeflow-common-lib/projects/kubeflow/src/lib/form/name-namespace-inputs/name-input/name-input.component.html", - "discussion_id": "687156585", - "commented_code": "@@ -1,9 +1,5 @@\n \n- {{ 'common.name' | translate }}\n- \n- {{ nameError().key | translate: nameError().params }}\n+ Name\n+ ", - "comment_created_at": "2021-08-12T09:28:59+00:00", - "comment_author": "kimwnasptd", - "comment_body": "Pushed a commit", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "687158053", - "pr_number": 6065, - "pr_file": "components/crud-web-apps/jupyter/frontend/src/app/pages/form/form-default/form-image/form-image.component.html", - "created_at": "2021-08-11T20:18:46+00:00", - "commented_code": "{{ imageDisplayName(img) }}\n \n \n \n {{ 'jupyter.formImage.errorImageRequired' | translate }}\n \n Please provide an Image to use\n \n\n \n {{ 'jupyter.formImage.customImage' | translate }}\n Custom Image\n \n \n- \n- {{ 'jupyter.formImage.errorImageRequired' | translate }}\n- \n+ Please provide an Image to use\n \n \n \n- {{ 'jupyter.formImage.customImage' | translate }}\n+ Custom Image\n \n \n- \n- {{ 'jupyter.formImage.errorImageRequired' | translate }}\n- \n+ Please provide an Image to use\n \n \n \n- {{ 'jupyter.formImage.customImage' | translate }}\n+ Custom Image\n \n\nimport { equalUrlPaths, removePrefixFrom } from 'src/app/shared/utils';\n\nCypress.Commands.add('getIframeBody', () => {\n cy.log('getIframeBody');\n // Cypress yields jQuery element, which has the real\n // DOM element under property \"0\".\n // From the real DOM iframe element we can get\n // the \"document\" element, it is stored in \"contentDocument\" property\n // Cypress \"its\" command can access deep properties using dot notation\n // https://on.cypress.io/its\n\n // get the iframe > document > body\n // and retry until the body element is not empty\n return (\n cy\n .get('iframe', { log: false })\n .its('0.contentDocument.body', { log: false })\n .should('not.be.empty')\n // wraps \"body\" DOM element to allow\n // chaining more Cypress commands, like \".find(...)\"\n // https://on.cypress.io/wrap\n .then(body => cy.wrap(body, { log: false }))\n );\n});\n\nCypress.Commands.add('getIframeUrl', () => {\n cy.log('getIframeLocation');\n cy.get('iframe', { log: false })\n .its('0.contentWindow.location', { log: false })\n .then(iframeLocation => {\n let iframeUrl: string = iframeLocation.pathname + iframeLocation.search;\n cy.wrap(iframeUrl, { log: false });\n });\n});\n\nCypress.Commands.add('equalUrls', () => {\n cy.log('equalUrls command');\n\n let iframeUrl: string;\n\n cy.getIframeUrl().then(url => {\n iframeUrl = url;\n cy.location({ log: false }).should(browserLocation => {\n let browserUrl = browserLocation.pathname + browserLocation.search;\n browserUrl = removePrefixFrom(browserUrl);\n\n expect(equalUrlPaths(browserUrl, iframeUrl)).to.be.true;", + "repo_full_name": "kubeflow/kubeflow", + "discussion_comments": [ + { + "comment_id": "1090491700", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 6895, + "pr_file": "components/centraldashboard-angular/frontend/cypress/support/commands.ts", + "discussion_id": "1090491700", + "commented_code": "@@ -0,0 +1,91 @@\n+/// \n+\n+import { equalUrlPaths, removePrefixFrom } from 'src/app/shared/utils';\n+\n+Cypress.Commands.add('getIframeBody', () => {\n+ cy.log('getIframeBody');\n+ // Cypress yields jQuery element, which has the real\n+ // DOM element under property \"0\".\n+ // From the real DOM iframe element we can get\n+ // the \"document\" element, it is stored in \"contentDocument\" property\n+ // Cypress \"its\" command can access deep properties using dot notation\n+ // https://on.cypress.io/its\n+\n+ // get the iframe > document > body\n+ // and retry until the body element is not empty\n+ return (\n+ cy\n+ .get('iframe', { log: false })\n+ .its('0.contentDocument.body', { log: false })\n+ .should('not.be.empty')\n+ // wraps \"body\" DOM element to allow\n+ // chaining more Cypress commands, like \".find(...)\"\n+ // https://on.cypress.io/wrap\n+ .then(body => cy.wrap(body, { log: false }))\n+ );\n+});\n+\n+Cypress.Commands.add('getIframeUrl', () => {\n+ cy.log('getIframeLocation');\n+ cy.get('iframe', { log: false })\n+ .its('0.contentWindow.location', { log: false })\n+ .then(iframeLocation => {\n+ let iframeUrl: string = iframeLocation.pathname + iframeLocation.search;\n+ cy.wrap(iframeUrl, { log: false });\n+ });\n+});\n+\n+Cypress.Commands.add('equalUrls', () => {\n+ cy.log('equalUrls command');\n+\n+ let iframeUrl: string;\n+\n+ cy.getIframeUrl().then(url => {\n+ iframeUrl = url;\n+ cy.location({ log: false }).should(browserLocation => {\n+ let browserUrl = browserLocation.pathname + browserLocation.search;\n+ browserUrl = removePrefixFrom(browserUrl);\n+\n+ expect(equalUrlPaths(browserUrl, iframeUrl)).to.be.true;", + "comment_created_at": "2023-01-30T11:17:32+00:00", + "comment_author": "tasos-ale", + "comment_body": "```suggestion\r\n cy.getIframeUrl().then(url => {\r\n cy.location({ log: false }).should(browserLocation => {\r\n let browserUrl = browserLocation.pathname + browserLocation.search;\r\n browserUrl = removePrefixFrom(browserUrl);\\\r\n expect(equalUrlPaths(browserUrl, url)).to.be.true;\r\n```", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1065779821", + "pr_number": 6856, + "pr_file": "components/centraldashboard-angular/frontend/src/app/pages/iframe-wrapper/iframe-wrapper.component.ts", + "created_at": "2023-01-10T13:23:29+00:00", + "commented_code": "import {\n AfterViewInit,\n Component,\n ElementRef,\n OnDestroy,\n ViewChild,\n} from '@angular/core';\nimport { NavigationEnd, Router } from '@angular/router';\nimport { Subscription } from 'rxjs';\n\n@Component({\n selector: 'app-iframe-wrapper',\n templateUrl: './iframe-wrapper.component.html',\n styleUrls: ['./iframe-wrapper.component.scss'],\n})\nexport class IframeWrapperComponent implements AfterViewInit, OnDestroy {\n @ViewChild('iframe') iframe: ElementRef;\n\n public prvSrcPath: string;\n get srcPath(): string {\n return this.prvSrcPath;\n }\n set srcPath(src: string) {\n /*\n * The following hacky logic ensures that the Iframe will reload,\n * even when it receives values for src that are the same with the\n * one it already has. This is because, in order to avoid the iframe\n * reloading constantly, its src is not updated on each navigation\n * a user does.\n */\n if (window.location.origin) {\n if (!this.prvSrcPath?.includes(window.location.origin)) {\n src = window.location.origin + src;\n }\n }\n\n this.prvSrcPath = src;\n // Some KF distributions need a trailing slash \"/\" in order to resolve paths", + "repo_full_name": "kubeflow/kubeflow", + "discussion_comments": [ + { + "comment_id": "1065779821", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 6856, + "pr_file": "components/centraldashboard-angular/frontend/src/app/pages/iframe-wrapper/iframe-wrapper.component.ts", + "discussion_id": "1065779821", + "commented_code": "@@ -0,0 +1,121 @@\n+import {\n+ AfterViewInit,\n+ Component,\n+ ElementRef,\n+ OnDestroy,\n+ ViewChild,\n+} from '@angular/core';\n+import { NavigationEnd, Router } from '@angular/router';\n+import { Subscription } from 'rxjs';\n+\n+@Component({\n+ selector: 'app-iframe-wrapper',\n+ templateUrl: './iframe-wrapper.component.html',\n+ styleUrls: ['./iframe-wrapper.component.scss'],\n+})\n+export class IframeWrapperComponent implements AfterViewInit, OnDestroy {\n+ @ViewChild('iframe') iframe: ElementRef;\n+\n+ public prvSrcPath: string;\n+ get srcPath(): string {\n+ return this.prvSrcPath;\n+ }\n+ set srcPath(src: string) {\n+ /*\n+ * The following hacky logic ensures that the Iframe will reload,\n+ * even when it receives values for src that are the same with the\n+ * one it already has. This is because, in order to avoid the iframe\n+ * reloading constantly, its src is not updated on each navigation\n+ * a user does.\n+ */\n+ if (window.location.origin) {\n+ if (!this.prvSrcPath?.includes(window.location.origin)) {\n+ src = window.location.origin + src;\n+ }\n+ }\n+\n+ this.prvSrcPath = src;\n+ // Some KF distributions need a trailing slash \"/\" in order to resolve paths", + "comment_created_at": "2023-01-10T13:23:29+00:00", + "comment_author": "kimwnasptd", + "comment_body": "Let's rephrase this to something like the following:\r\n\r\nWhen Istio exports Services it always expects a `/` at the end. SO we'll need to make sure the links propagated to the iframe end with a `/`", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1065862401", + "pr_number": 6856, + "pr_file": "components/centraldashboard-angular/frontend/src/app/pages/iframe-wrapper/iframe-wrapper.component.ts", + "created_at": "2023-01-10T14:37:43+00:00", + "commented_code": "import {\n AfterViewInit,\n Component,\n ElementRef,\n OnDestroy,\n ViewChild,\n} from '@angular/core';\nimport { NavigationEnd, Router } from '@angular/router';\nimport { Subscription } from 'rxjs';\n\n@Component({\n selector: 'app-iframe-wrapper',\n templateUrl: './iframe-wrapper.component.html',\n styleUrls: ['./iframe-wrapper.component.scss'],\n})\nexport class IframeWrapperComponent implements AfterViewInit, OnDestroy {\n @ViewChild('iframe') iframe: ElementRef;\n\n public prvSrcPath: string;\n get srcPath(): string {\n return this.prvSrcPath;\n }\n set srcPath(src: string) {\n /*\n * The following hacky logic ensures that the Iframe will reload,\n * even when it receives values for src that are the same with the\n * one it already has. This is because, in order to avoid the iframe\n * reloading constantly, its src is not updated on each navigation\n * a user does.\n */\n if (window.location.origin) {\n if (!this.prvSrcPath?.includes(window.location.origin)) {\n src = window.location.origin + src;\n }\n }\n\n this.prvSrcPath = src;\n // Some KF distributions need a trailing slash \"/\" in order to resolve paths\n this.prvSrcPath += this.prvSrcPath?.endsWith('/') ? '' : '/';\n }\n public iframeLocation: string | undefined = 'about:blank';\n private urlSub: Subscription;\n private interval: any;\n\n constructor(private router: Router) {\n this.urlSub = this.router.events.subscribe(event => {\n if (!(event instanceof NavigationEnd)) {\n return;\n }\n\n const iframeWindow = this.iframe?.nativeElement?.contentWindow;\n let iframeUrl = iframeWindow?.location.pathname;\n if (iframeUrl) {\n // Include URL's query parameters\n iframeUrl += iframeWindow?.location.search;\n }\n\n if (!this.equalUrlPaths(event.url, iframeUrl)) {\n this.srcPath = event.url;\n }\n });\n }\n\n equalUrlPaths(firstUrl: string, secondUrl: string | undefined) {\n if (!firstUrl && !secondUrl) {", + "repo_full_name": "kubeflow/kubeflow", + "discussion_comments": [ + { + "comment_id": "1065862401", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 6856, + "pr_file": "components/centraldashboard-angular/frontend/src/app/pages/iframe-wrapper/iframe-wrapper.component.ts", + "discussion_id": "1065862401", + "commented_code": "@@ -0,0 +1,121 @@\n+import {\n+ AfterViewInit,\n+ Component,\n+ ElementRef,\n+ OnDestroy,\n+ ViewChild,\n+} from '@angular/core';\n+import { NavigationEnd, Router } from '@angular/router';\n+import { Subscription } from 'rxjs';\n+\n+@Component({\n+ selector: 'app-iframe-wrapper',\n+ templateUrl: './iframe-wrapper.component.html',\n+ styleUrls: ['./iframe-wrapper.component.scss'],\n+})\n+export class IframeWrapperComponent implements AfterViewInit, OnDestroy {\n+ @ViewChild('iframe') iframe: ElementRef;\n+\n+ public prvSrcPath: string;\n+ get srcPath(): string {\n+ return this.prvSrcPath;\n+ }\n+ set srcPath(src: string) {\n+ /*\n+ * The following hacky logic ensures that the Iframe will reload,\n+ * even when it receives values for src that are the same with the\n+ * one it already has. This is because, in order to avoid the iframe\n+ * reloading constantly, its src is not updated on each navigation\n+ * a user does.\n+ */\n+ if (window.location.origin) {\n+ if (!this.prvSrcPath?.includes(window.location.origin)) {\n+ src = window.location.origin + src;\n+ }\n+ }\n+\n+ this.prvSrcPath = src;\n+ // Some KF distributions need a trailing slash \"/\" in order to resolve paths\n+ this.prvSrcPath += this.prvSrcPath?.endsWith('/') ? '' : '/';\n+ }\n+ public iframeLocation: string | undefined = 'about:blank';\n+ private urlSub: Subscription;\n+ private interval: any;\n+\n+ constructor(private router: Router) {\n+ this.urlSub = this.router.events.subscribe(event => {\n+ if (!(event instanceof NavigationEnd)) {\n+ return;\n+ }\n+\n+ const iframeWindow = this.iframe?.nativeElement?.contentWindow;\n+ let iframeUrl = iframeWindow?.location.pathname;\n+ if (iframeUrl) {\n+ // Include URL's query parameters\n+ iframeUrl += iframeWindow?.location.search;\n+ }\n+\n+ if (!this.equalUrlPaths(event.url, iframeUrl)) {\n+ this.srcPath = event.url;\n+ }\n+ });\n+ }\n+\n+ equalUrlPaths(firstUrl: string, secondUrl: string | undefined) {\n+ if (!firstUrl && !secondUrl) {", + "comment_created_at": "2023-01-10T14:37:43+00:00", + "comment_author": "kimwnasptd", + "comment_body": "Let's add a comment here to explain why we need this function, which is to handle the sometimes missing `/` from some urls", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/kubeflow-normalize-url-paths.md b/_reviewers/kubeflow-normalize-url-paths.md index f50d168..1cf89bc 100644 --- a/_reviewers/kubeflow-normalize-url-paths.md +++ b/_reviewers/kubeflow-normalize-url-paths.md @@ -33,73 +33,3 @@ function equalUrlPaths(firstUrl: string, secondUrl: string): boolean { return normalizedFirst === normalizedSecond; } ``` - - -[ - { - "discussion_id": "1090491700", - "pr_number": 6895, - "pr_file": "components/centraldashboard-angular/frontend/cypress/support/commands.ts", - "created_at": "2023-01-30T11:17:32+00:00", - "commented_code": "/// \n\nimport { equalUrlPaths, removePrefixFrom } from 'src/app/shared/utils';\n\nCypress.Commands.add('getIframeBody', () => {\n cy.log('getIframeBody');\n // Cypress yields jQuery element, which has the real\n // DOM element under property \"0\".\n // From the real DOM iframe element we can get\n // the \"document\" element, it is stored in \"contentDocument\" property\n // Cypress \"its\" command can access deep properties using dot notation\n // https://on.cypress.io/its\n\n // get the iframe > document > body\n // and retry until the body element is not empty\n return (\n cy\n .get('iframe', { log: false })\n .its('0.contentDocument.body', { log: false })\n .should('not.be.empty')\n // wraps \"body\" DOM element to allow\n // chaining more Cypress commands, like \".find(...)\"\n // https://on.cypress.io/wrap\n .then(body => cy.wrap(body, { log: false }))\n );\n});\n\nCypress.Commands.add('getIframeUrl', () => {\n cy.log('getIframeLocation');\n cy.get('iframe', { log: false })\n .its('0.contentWindow.location', { log: false })\n .then(iframeLocation => {\n let iframeUrl: string = iframeLocation.pathname + iframeLocation.search;\n cy.wrap(iframeUrl, { log: false });\n });\n});\n\nCypress.Commands.add('equalUrls', () => {\n cy.log('equalUrls command');\n\n let iframeUrl: string;\n\n cy.getIframeUrl().then(url => {\n iframeUrl = url;\n cy.location({ log: false }).should(browserLocation => {\n let browserUrl = browserLocation.pathname + browserLocation.search;\n browserUrl = removePrefixFrom(browserUrl);\n\n expect(equalUrlPaths(browserUrl, iframeUrl)).to.be.true;", - "repo_full_name": "kubeflow/kubeflow", - "discussion_comments": [ - { - "comment_id": "1090491700", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 6895, - "pr_file": "components/centraldashboard-angular/frontend/cypress/support/commands.ts", - "discussion_id": "1090491700", - "commented_code": "@@ -0,0 +1,91 @@\n+/// \n+\n+import { equalUrlPaths, removePrefixFrom } from 'src/app/shared/utils';\n+\n+Cypress.Commands.add('getIframeBody', () => {\n+ cy.log('getIframeBody');\n+ // Cypress yields jQuery element, which has the real\n+ // DOM element under property \"0\".\n+ // From the real DOM iframe element we can get\n+ // the \"document\" element, it is stored in \"contentDocument\" property\n+ // Cypress \"its\" command can access deep properties using dot notation\n+ // https://on.cypress.io/its\n+\n+ // get the iframe > document > body\n+ // and retry until the body element is not empty\n+ return (\n+ cy\n+ .get('iframe', { log: false })\n+ .its('0.contentDocument.body', { log: false })\n+ .should('not.be.empty')\n+ // wraps \"body\" DOM element to allow\n+ // chaining more Cypress commands, like \".find(...)\"\n+ // https://on.cypress.io/wrap\n+ .then(body => cy.wrap(body, { log: false }))\n+ );\n+});\n+\n+Cypress.Commands.add('getIframeUrl', () => {\n+ cy.log('getIframeLocation');\n+ cy.get('iframe', { log: false })\n+ .its('0.contentWindow.location', { log: false })\n+ .then(iframeLocation => {\n+ let iframeUrl: string = iframeLocation.pathname + iframeLocation.search;\n+ cy.wrap(iframeUrl, { log: false });\n+ });\n+});\n+\n+Cypress.Commands.add('equalUrls', () => {\n+ cy.log('equalUrls command');\n+\n+ let iframeUrl: string;\n+\n+ cy.getIframeUrl().then(url => {\n+ iframeUrl = url;\n+ cy.location({ log: false }).should(browserLocation => {\n+ let browserUrl = browserLocation.pathname + browserLocation.search;\n+ browserUrl = removePrefixFrom(browserUrl);\n+\n+ expect(equalUrlPaths(browserUrl, iframeUrl)).to.be.true;", - "comment_created_at": "2023-01-30T11:17:32+00:00", - "comment_author": "tasos-ale", - "comment_body": "```suggestion\r\n cy.getIframeUrl().then(url => {\r\n cy.location({ log: false }).should(browserLocation => {\r\n let browserUrl = browserLocation.pathname + browserLocation.search;\r\n browserUrl = removePrefixFrom(browserUrl);\\\r\n expect(equalUrlPaths(browserUrl, url)).to.be.true;\r\n```", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1065779821", - "pr_number": 6856, - "pr_file": "components/centraldashboard-angular/frontend/src/app/pages/iframe-wrapper/iframe-wrapper.component.ts", - "created_at": "2023-01-10T13:23:29+00:00", - "commented_code": "import {\n AfterViewInit,\n Component,\n ElementRef,\n OnDestroy,\n ViewChild,\n} from '@angular/core';\nimport { NavigationEnd, Router } from '@angular/router';\nimport { Subscription } from 'rxjs';\n\n@Component({\n selector: 'app-iframe-wrapper',\n templateUrl: './iframe-wrapper.component.html',\n styleUrls: ['./iframe-wrapper.component.scss'],\n})\nexport class IframeWrapperComponent implements AfterViewInit, OnDestroy {\n @ViewChild('iframe') iframe: ElementRef;\n\n public prvSrcPath: string;\n get srcPath(): string {\n return this.prvSrcPath;\n }\n set srcPath(src: string) {\n /*\n * The following hacky logic ensures that the Iframe will reload,\n * even when it receives values for src that are the same with the\n * one it already has. This is because, in order to avoid the iframe\n * reloading constantly, its src is not updated on each navigation\n * a user does.\n */\n if (window.location.origin) {\n if (!this.prvSrcPath?.includes(window.location.origin)) {\n src = window.location.origin + src;\n }\n }\n\n this.prvSrcPath = src;\n // Some KF distributions need a trailing slash \"/\" in order to resolve paths", - "repo_full_name": "kubeflow/kubeflow", - "discussion_comments": [ - { - "comment_id": "1065779821", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 6856, - "pr_file": "components/centraldashboard-angular/frontend/src/app/pages/iframe-wrapper/iframe-wrapper.component.ts", - "discussion_id": "1065779821", - "commented_code": "@@ -0,0 +1,121 @@\n+import {\n+ AfterViewInit,\n+ Component,\n+ ElementRef,\n+ OnDestroy,\n+ ViewChild,\n+} from '@angular/core';\n+import { NavigationEnd, Router } from '@angular/router';\n+import { Subscription } from 'rxjs';\n+\n+@Component({\n+ selector: 'app-iframe-wrapper',\n+ templateUrl: './iframe-wrapper.component.html',\n+ styleUrls: ['./iframe-wrapper.component.scss'],\n+})\n+export class IframeWrapperComponent implements AfterViewInit, OnDestroy {\n+ @ViewChild('iframe') iframe: ElementRef;\n+\n+ public prvSrcPath: string;\n+ get srcPath(): string {\n+ return this.prvSrcPath;\n+ }\n+ set srcPath(src: string) {\n+ /*\n+ * The following hacky logic ensures that the Iframe will reload,\n+ * even when it receives values for src that are the same with the\n+ * one it already has. This is because, in order to avoid the iframe\n+ * reloading constantly, its src is not updated on each navigation\n+ * a user does.\n+ */\n+ if (window.location.origin) {\n+ if (!this.prvSrcPath?.includes(window.location.origin)) {\n+ src = window.location.origin + src;\n+ }\n+ }\n+\n+ this.prvSrcPath = src;\n+ // Some KF distributions need a trailing slash \"/\" in order to resolve paths", - "comment_created_at": "2023-01-10T13:23:29+00:00", - "comment_author": "kimwnasptd", - "comment_body": "Let's rephrase this to something like the following:\r\n\r\nWhen Istio exports Services it always expects a `/` at the end. SO we'll need to make sure the links propagated to the iframe end with a `/`", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1065862401", - "pr_number": 6856, - "pr_file": "components/centraldashboard-angular/frontend/src/app/pages/iframe-wrapper/iframe-wrapper.component.ts", - "created_at": "2023-01-10T14:37:43+00:00", - "commented_code": "import {\n AfterViewInit,\n Component,\n ElementRef,\n OnDestroy,\n ViewChild,\n} from '@angular/core';\nimport { NavigationEnd, Router } from '@angular/router';\nimport { Subscription } from 'rxjs';\n\n@Component({\n selector: 'app-iframe-wrapper',\n templateUrl: './iframe-wrapper.component.html',\n styleUrls: ['./iframe-wrapper.component.scss'],\n})\nexport class IframeWrapperComponent implements AfterViewInit, OnDestroy {\n @ViewChild('iframe') iframe: ElementRef;\n\n public prvSrcPath: string;\n get srcPath(): string {\n return this.prvSrcPath;\n }\n set srcPath(src: string) {\n /*\n * The following hacky logic ensures that the Iframe will reload,\n * even when it receives values for src that are the same with the\n * one it already has. This is because, in order to avoid the iframe\n * reloading constantly, its src is not updated on each navigation\n * a user does.\n */\n if (window.location.origin) {\n if (!this.prvSrcPath?.includes(window.location.origin)) {\n src = window.location.origin + src;\n }\n }\n\n this.prvSrcPath = src;\n // Some KF distributions need a trailing slash \"/\" in order to resolve paths\n this.prvSrcPath += this.prvSrcPath?.endsWith('/') ? '' : '/';\n }\n public iframeLocation: string | undefined = 'about:blank';\n private urlSub: Subscription;\n private interval: any;\n\n constructor(private router: Router) {\n this.urlSub = this.router.events.subscribe(event => {\n if (!(event instanceof NavigationEnd)) {\n return;\n }\n\n const iframeWindow = this.iframe?.nativeElement?.contentWindow;\n let iframeUrl = iframeWindow?.location.pathname;\n if (iframeUrl) {\n // Include URL's query parameters\n iframeUrl += iframeWindow?.location.search;\n }\n\n if (!this.equalUrlPaths(event.url, iframeUrl)) {\n this.srcPath = event.url;\n }\n });\n }\n\n equalUrlPaths(firstUrl: string, secondUrl: string | undefined) {\n if (!firstUrl && !secondUrl) {", - "repo_full_name": "kubeflow/kubeflow", - "discussion_comments": [ - { - "comment_id": "1065862401", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 6856, - "pr_file": "components/centraldashboard-angular/frontend/src/app/pages/iframe-wrapper/iframe-wrapper.component.ts", - "discussion_id": "1065862401", - "commented_code": "@@ -0,0 +1,121 @@\n+import {\n+ AfterViewInit,\n+ Component,\n+ ElementRef,\n+ OnDestroy,\n+ ViewChild,\n+} from '@angular/core';\n+import { NavigationEnd, Router } from '@angular/router';\n+import { Subscription } from 'rxjs';\n+\n+@Component({\n+ selector: 'app-iframe-wrapper',\n+ templateUrl: './iframe-wrapper.component.html',\n+ styleUrls: ['./iframe-wrapper.component.scss'],\n+})\n+export class IframeWrapperComponent implements AfterViewInit, OnDestroy {\n+ @ViewChild('iframe') iframe: ElementRef;\n+\n+ public prvSrcPath: string;\n+ get srcPath(): string {\n+ return this.prvSrcPath;\n+ }\n+ set srcPath(src: string) {\n+ /*\n+ * The following hacky logic ensures that the Iframe will reload,\n+ * even when it receives values for src that are the same with the\n+ * one it already has. This is because, in order to avoid the iframe\n+ * reloading constantly, its src is not updated on each navigation\n+ * a user does.\n+ */\n+ if (window.location.origin) {\n+ if (!this.prvSrcPath?.includes(window.location.origin)) {\n+ src = window.location.origin + src;\n+ }\n+ }\n+\n+ this.prvSrcPath = src;\n+ // Some KF distributions need a trailing slash \"/\" in order to resolve paths\n+ this.prvSrcPath += this.prvSrcPath?.endsWith('/') ? '' : '/';\n+ }\n+ public iframeLocation: string | undefined = 'about:blank';\n+ private urlSub: Subscription;\n+ private interval: any;\n+\n+ constructor(private router: Router) {\n+ this.urlSub = this.router.events.subscribe(event => {\n+ if (!(event instanceof NavigationEnd)) {\n+ return;\n+ }\n+\n+ const iframeWindow = this.iframe?.nativeElement?.contentWindow;\n+ let iframeUrl = iframeWindow?.location.pathname;\n+ if (iframeUrl) {\n+ // Include URL's query parameters\n+ iframeUrl += iframeWindow?.location.search;\n+ }\n+\n+ if (!this.equalUrlPaths(event.url, iframeUrl)) {\n+ this.srcPath = event.url;\n+ }\n+ });\n+ }\n+\n+ equalUrlPaths(firstUrl: string, secondUrl: string | undefined) {\n+ if (!firstUrl && !secondUrl) {", - "comment_created_at": "2023-01-10T14:37:43+00:00", - "comment_author": "kimwnasptd", - "comment_body": "Let's add a comment here to explain why we need this function, which is to handle the sometimes missing `/` from some urls", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/kubeflow-optimize-container-build-configurations.json b/_reviewers/kubeflow-optimize-container-build-configurations.json new file mode 100644 index 0000000..9cb5a04 --- /dev/null +++ b/_reviewers/kubeflow-optimize-container-build-configurations.json @@ -0,0 +1,118 @@ +[ + { + "discussion_id": "1029353841", + "pr_number": 6650, + "pr_file": "components/admission-webhook/Dockerfile", + "created_at": "2022-11-22T13:51:09+00:00", + "commented_code": "ENV GO111MODULE=on\n\n# Build\nRUN if [ \"$(uname -m)\" = \"aarch64\" ]; then \\\n CGO_ENABLED=0 GOOS=linux GOARCH=arm64 go build -o webhook -a . ; \\\n else \\\n CGO_ENABLED=0 GOOS=linux GOARCH=amd64 go build -o webhook -a . ; \\\n fi\n\nRUN CGO_ENABLED=0 GOOS=linux go build -o webhook -a . ;", + "repo_full_name": "kubeflow/kubeflow", + "discussion_comments": [ + { + "comment_id": "1029353841", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 6650, + "pr_file": "components/admission-webhook/Dockerfile", + "discussion_id": "1029353841", + "commented_code": "@@ -10,12 +10,8 @@ COPY . .\n ENV GO111MODULE=on\n \n # Build\n-RUN if [ \"$(uname -m)\" = \"aarch64\" ]; then \\\n- CGO_ENABLED=0 GOOS=linux GOARCH=arm64 go build -o webhook -a . ; \\\n- else \\\n- CGO_ENABLED=0 GOOS=linux GOARCH=amd64 go build -o webhook -a . ; \\\n- fi\n-\n+RUN CGO_ENABLED=0 GOOS=linux go build -o webhook -a . ;", + "comment_created_at": "2022-11-22T13:51:09+00:00", + "comment_author": "kimwnasptd", + "comment_body": "I understand that by omitting the GOARCH env var we let golang decide on the architecture based on the node environment (machine). \r\n\r\nI'm just trying to wrap my mind around the next step. Is it going to be the process described in https://docs.docker.com/build/building/multi-platform/ to build images that have manifests for different platforms?", + "pr_file_module": null + }, + { + "comment_id": "1029586906", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 6650, + "pr_file": "components/admission-webhook/Dockerfile", + "discussion_id": "1029353841", + "commented_code": "@@ -10,12 +10,8 @@ COPY . .\n ENV GO111MODULE=on\n \n # Build\n-RUN if [ \"$(uname -m)\" = \"aarch64\" ]; then \\\n- CGO_ENABLED=0 GOOS=linux GOARCH=arm64 go build -o webhook -a . ; \\\n- else \\\n- CGO_ENABLED=0 GOOS=linux GOARCH=amd64 go build -o webhook -a . ; \\\n- fi\n-\n+RUN CGO_ENABLED=0 GOOS=linux go build -o webhook -a . ;", + "comment_created_at": "2022-11-22T16:41:41+00:00", + "comment_author": "lehrig", + "comment_body": "Yes, it's good to let GO determine the arch, so we don't have to maintain an arch list explicitly here or wrap around boiler-plate code with arch-specific if/else statements.\r\n\r\nInstead, we now shift the control which arch is actually build to the build system. If doing nothing special, the arch of the build machine is simply used (and as Kubeflow is currently build on amd64, we are backwards-compatible to the current behavior).\r\n\r\nIn further PRs, we will additionally modify docker-based builds using buildx, where you can, for instance, do something like this:\r\n`docker buildx build --platform linux/amd64,linux/ppc64le ... `\r\n\r\nHere, docker will automatically run actually 2 builds: one for amd64 and one for ppc64le. When coming to above GO-code, GO will acknowledge the external platform configuration and build it correctly. In case no native hardware is available for the given platform, docker will emulate the architecture using QEMU - so you can also build for different archs on amd64.\r\n\r\nThe final outcome is a single multi-arch image with support of all archs listed in Docker's platform statement.", + "pr_file_module": null + }, + { + "comment_id": "1030447960", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 6650, + "pr_file": "components/admission-webhook/Dockerfile", + "discussion_id": "1029353841", + "commented_code": "@@ -10,12 +10,8 @@ COPY . .\n ENV GO111MODULE=on\n \n # Build\n-RUN if [ \"$(uname -m)\" = \"aarch64\" ]; then \\\n- CGO_ENABLED=0 GOOS=linux GOARCH=arm64 go build -o webhook -a . ; \\\n- else \\\n- CGO_ENABLED=0 GOOS=linux GOARCH=amd64 go build -o webhook -a . ; \\\n- fi\n-\n+RUN CGO_ENABLED=0 GOOS=linux go build -o webhook -a . ;", + "comment_created_at": "2022-11-23T13:32:16+00:00", + "comment_author": "kimwnasptd", + "comment_body": "@lehrig this was a *very* thorough explanation of the suggested approach! I'd also suggest to cross post it in the umbrella issue, so that it's readily available for anyone wondering how to e2e approach is going to be.\r\n\r\nAlso LGTM", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "382505839", + "pr_number": 4775, + "pr_file": "components/admission-webhook/Dockerfile", + "created_at": "2020-02-21T10:25:50+00:00", + "commented_code": "RUN go build -o webhook .", + "repo_full_name": "kubeflow/kubeflow", + "discussion_comments": [ + { + "comment_id": "382505839", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 4775, + "pr_file": "components/admission-webhook/Dockerfile", + "discussion_id": "382505839", + "commented_code": "@@ -13,7 +13,7 @@ ENV GO111MODULE=on\n RUN go build -o webhook .", + "comment_created_at": "2020-02-21T10:25:50+00:00", + "comment_author": "swiftdiaries", + "comment_body": "I think this part is what prevents us from using the static image.\r\n\r\n`CGO_ENABLED=0 GOOS=linux go build -o webook -ldflags \"-w\" -a .`\r\nCould you please try this instead?\r\nWithout `CGO` we should be able to build with static", + "pr_file_module": null + }, + { + "comment_id": "382575357", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 4775, + "pr_file": "components/admission-webhook/Dockerfile", + "discussion_id": "382505839", + "commented_code": "@@ -13,7 +13,7 @@ ENV GO111MODULE=on\n RUN go build -o webhook .", + "comment_created_at": "2020-02-21T13:16:47+00:00", + "comment_author": "jtfogarty", + "comment_body": "ok, I will test this", + "pr_file_module": null + }, + { + "comment_id": "382592936", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 4775, + "pr_file": "components/admission-webhook/Dockerfile", + "discussion_id": "382505839", + "commented_code": "@@ -13,7 +13,7 @@ ENV GO111MODULE=on\n RUN go build -o webhook .", + "comment_created_at": "2020-02-21T13:55:51+00:00", + "comment_author": "jtfogarty", + "comment_body": "@swiftdiaries where is that line `CGO_ENABLED=0 GOOS=linux go build -o webook -ldflags \"-w\" -a .` \r\nit's not in the Makefile", + "pr_file_module": null + }, + { + "comment_id": "382685635", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 4775, + "pr_file": "components/admission-webhook/Dockerfile", + "discussion_id": "382505839", + "commented_code": "@@ -13,7 +13,7 @@ ENV GO111MODULE=on\n RUN go build -o webhook .", + "comment_created_at": "2020-02-21T16:40:45+00:00", + "comment_author": "jlewi", + "comment_body": "I think @swiftdiaries is suggesting changing the existing go-build line to what he provided. I think setting CGO_ENABLED=0 removes a dependency on some c stuff in the compiled binary which might allow the use of static instead of base. ", + "pr_file_module": null + }, + { + "comment_id": "383312620", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 4775, + "pr_file": "components/admission-webhook/Dockerfile", + "discussion_id": "382505839", + "commented_code": "@@ -13,7 +13,7 @@ ENV GO111MODULE=on\n RUN go build -o webhook .", + "comment_created_at": "2020-02-24T14:56:26+00:00", + "comment_author": "krishnadurai", + "comment_body": "@jeff in the line:\r\n```\r\nRUN CGO_ENABLED=0 GOOS=linux go build -o webook -ldflags \"-w\" -a .\r\n```\r\nWebhook is misspelt.\r\n", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/kubeflow-optimize-container-build-configurations.md b/_reviewers/kubeflow-optimize-container-build-configurations.md index 8bd9a06..9ace54d 100644 --- a/_reviewers/kubeflow-optimize-container-build-configurations.md +++ b/_reviewers/kubeflow-optimize-container-build-configurations.md @@ -30,123 +30,3 @@ RUN CGO_ENABLED=0 GOOS=linux go build -o webhook -ldflags "-w" -a . ``` This approach simplifies maintenance, allows multi-architecture builds through tools like Docker buildx (e.g., `docker buildx build --platform linux/amd64,linux/arm64 ...`), and produces more efficient container images. - - -[ - { - "discussion_id": "1029353841", - "pr_number": 6650, - "pr_file": "components/admission-webhook/Dockerfile", - "created_at": "2022-11-22T13:51:09+00:00", - "commented_code": "ENV GO111MODULE=on\n\n# Build\nRUN if [ \"$(uname -m)\" = \"aarch64\" ]; then \\\n CGO_ENABLED=0 GOOS=linux GOARCH=arm64 go build -o webhook -a . ; \\\n else \\\n CGO_ENABLED=0 GOOS=linux GOARCH=amd64 go build -o webhook -a . ; \\\n fi\n\nRUN CGO_ENABLED=0 GOOS=linux go build -o webhook -a . ;", - "repo_full_name": "kubeflow/kubeflow", - "discussion_comments": [ - { - "comment_id": "1029353841", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 6650, - "pr_file": "components/admission-webhook/Dockerfile", - "discussion_id": "1029353841", - "commented_code": "@@ -10,12 +10,8 @@ COPY . .\n ENV GO111MODULE=on\n \n # Build\n-RUN if [ \"$(uname -m)\" = \"aarch64\" ]; then \\\n- CGO_ENABLED=0 GOOS=linux GOARCH=arm64 go build -o webhook -a . ; \\\n- else \\\n- CGO_ENABLED=0 GOOS=linux GOARCH=amd64 go build -o webhook -a . ; \\\n- fi\n-\n+RUN CGO_ENABLED=0 GOOS=linux go build -o webhook -a . ;", - "comment_created_at": "2022-11-22T13:51:09+00:00", - "comment_author": "kimwnasptd", - "comment_body": "I understand that by omitting the GOARCH env var we let golang decide on the architecture based on the node environment (machine). \r\n\r\nI'm just trying to wrap my mind around the next step. Is it going to be the process described in https://docs.docker.com/build/building/multi-platform/ to build images that have manifests for different platforms?", - "pr_file_module": null - }, - { - "comment_id": "1029586906", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 6650, - "pr_file": "components/admission-webhook/Dockerfile", - "discussion_id": "1029353841", - "commented_code": "@@ -10,12 +10,8 @@ COPY . .\n ENV GO111MODULE=on\n \n # Build\n-RUN if [ \"$(uname -m)\" = \"aarch64\" ]; then \\\n- CGO_ENABLED=0 GOOS=linux GOARCH=arm64 go build -o webhook -a . ; \\\n- else \\\n- CGO_ENABLED=0 GOOS=linux GOARCH=amd64 go build -o webhook -a . ; \\\n- fi\n-\n+RUN CGO_ENABLED=0 GOOS=linux go build -o webhook -a . ;", - "comment_created_at": "2022-11-22T16:41:41+00:00", - "comment_author": "lehrig", - "comment_body": "Yes, it's good to let GO determine the arch, so we don't have to maintain an arch list explicitly here or wrap around boiler-plate code with arch-specific if/else statements.\r\n\r\nInstead, we now shift the control which arch is actually build to the build system. If doing nothing special, the arch of the build machine is simply used (and as Kubeflow is currently build on amd64, we are backwards-compatible to the current behavior).\r\n\r\nIn further PRs, we will additionally modify docker-based builds using buildx, where you can, for instance, do something like this:\r\n`docker buildx build --platform linux/amd64,linux/ppc64le ... `\r\n\r\nHere, docker will automatically run actually 2 builds: one for amd64 and one for ppc64le. When coming to above GO-code, GO will acknowledge the external platform configuration and build it correctly. In case no native hardware is available for the given platform, docker will emulate the architecture using QEMU - so you can also build for different archs on amd64.\r\n\r\nThe final outcome is a single multi-arch image with support of all archs listed in Docker's platform statement.", - "pr_file_module": null - }, - { - "comment_id": "1030447960", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 6650, - "pr_file": "components/admission-webhook/Dockerfile", - "discussion_id": "1029353841", - "commented_code": "@@ -10,12 +10,8 @@ COPY . .\n ENV GO111MODULE=on\n \n # Build\n-RUN if [ \"$(uname -m)\" = \"aarch64\" ]; then \\\n- CGO_ENABLED=0 GOOS=linux GOARCH=arm64 go build -o webhook -a . ; \\\n- else \\\n- CGO_ENABLED=0 GOOS=linux GOARCH=amd64 go build -o webhook -a . ; \\\n- fi\n-\n+RUN CGO_ENABLED=0 GOOS=linux go build -o webhook -a . ;", - "comment_created_at": "2022-11-23T13:32:16+00:00", - "comment_author": "kimwnasptd", - "comment_body": "@lehrig this was a *very* thorough explanation of the suggested approach! I'd also suggest to cross post it in the umbrella issue, so that it's readily available for anyone wondering how to e2e approach is going to be.\r\n\r\nAlso LGTM", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "382505839", - "pr_number": 4775, - "pr_file": "components/admission-webhook/Dockerfile", - "created_at": "2020-02-21T10:25:50+00:00", - "commented_code": "RUN go build -o webhook .", - "repo_full_name": "kubeflow/kubeflow", - "discussion_comments": [ - { - "comment_id": "382505839", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 4775, - "pr_file": "components/admission-webhook/Dockerfile", - "discussion_id": "382505839", - "commented_code": "@@ -13,7 +13,7 @@ ENV GO111MODULE=on\n RUN go build -o webhook .", - "comment_created_at": "2020-02-21T10:25:50+00:00", - "comment_author": "swiftdiaries", - "comment_body": "I think this part is what prevents us from using the static image.\r\n\r\n`CGO_ENABLED=0 GOOS=linux go build -o webook -ldflags \"-w\" -a .`\r\nCould you please try this instead?\r\nWithout `CGO` we should be able to build with static", - "pr_file_module": null - }, - { - "comment_id": "382575357", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 4775, - "pr_file": "components/admission-webhook/Dockerfile", - "discussion_id": "382505839", - "commented_code": "@@ -13,7 +13,7 @@ ENV GO111MODULE=on\n RUN go build -o webhook .", - "comment_created_at": "2020-02-21T13:16:47+00:00", - "comment_author": "jtfogarty", - "comment_body": "ok, I will test this", - "pr_file_module": null - }, - { - "comment_id": "382592936", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 4775, - "pr_file": "components/admission-webhook/Dockerfile", - "discussion_id": "382505839", - "commented_code": "@@ -13,7 +13,7 @@ ENV GO111MODULE=on\n RUN go build -o webhook .", - "comment_created_at": "2020-02-21T13:55:51+00:00", - "comment_author": "jtfogarty", - "comment_body": "@swiftdiaries where is that line `CGO_ENABLED=0 GOOS=linux go build -o webook -ldflags \"-w\" -a .` \r\nit's not in the Makefile", - "pr_file_module": null - }, - { - "comment_id": "382685635", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 4775, - "pr_file": "components/admission-webhook/Dockerfile", - "discussion_id": "382505839", - "commented_code": "@@ -13,7 +13,7 @@ ENV GO111MODULE=on\n RUN go build -o webhook .", - "comment_created_at": "2020-02-21T16:40:45+00:00", - "comment_author": "jlewi", - "comment_body": "I think @swiftdiaries is suggesting changing the existing go-build line to what he provided. I think setting CGO_ENABLED=0 removes a dependency on some c stuff in the compiled binary which might allow the use of static instead of base. ", - "pr_file_module": null - }, - { - "comment_id": "383312620", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 4775, - "pr_file": "components/admission-webhook/Dockerfile", - "discussion_id": "382505839", - "commented_code": "@@ -13,7 +13,7 @@ ENV GO111MODULE=on\n RUN go build -o webhook .", - "comment_created_at": "2020-02-24T14:56:26+00:00", - "comment_author": "krishnadurai", - "comment_body": "@jeff in the line:\r\n```\r\nRUN CGO_ENABLED=0 GOOS=linux go build -o webook -ldflags \"-w\" -a .\r\n```\r\nWebhook is misspelt.\r\n", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/kubeflow-pin-version-dependencies.json b/_reviewers/kubeflow-pin-version-dependencies.json new file mode 100644 index 0000000..29b9e3a --- /dev/null +++ b/_reviewers/kubeflow-pin-version-dependencies.json @@ -0,0 +1,234 @@ +[ + { + "discussion_id": "920964404", + "pr_number": 6491, + "pr_file": "components/profile-controller/Makefile", + "created_at": "2022-07-14T09:43:16+00:00", + "commented_code": "GOBIN=$(shell go env GOBIN)\nendif\n\nall: manager\n# Setting SHELL to bash allows bash commands to be executed by recipes.\n# This is a requirement for 'setup-envtest.sh' in the test target.\n# Options are set to exit when a recipe line exits non-zero or a piped command fails.\nSHELL = /usr/bin/env bash -o pipefail\n.SHELLFLAGS = -ec\n\n# Run tests\ntest: generate fmt vet\n\tgo test ./... -coverprofile cover.out\n.PHONY: all\nall: build\n\n# Build manager binary\nmanager: generate fmt vet\n\tgo build -o bin/manager main.go\n\n# Run against the configured Kubernetes cluster in ~/.kube/config\nrun: generate fmt vet\n\tgo run ./main.go\n\n# Install CRDs into a cluster\ninstall:\n\tkustomize build config/crd | kubectl apply -f -\n.PHONY: help\nhelp: ## Display this help.\n\t@awk 'BEGIN {FS = \":.*##\"; printf \"\\nUsage:\\n make \\033[36m\\033[0m\\n\"} /^[a-zA-Z_0-9-]+:.*?##/ { printf \" \\033[36m%-15s\\033[0m %s\\n\", $$1, $$2 } /^##@/ { printf \"\\n\\033[1m%s\\033[0m\\n\", substr($$0, 5) } ' $(MAKEFILE_LIST)\n\n# Deploy controller in the configured Kubernetes cluster in ~/.kube/config\ndeploy: manifests\n\tcd config/manager && kustomize edit set image gcr.io/kubeflow-images-public/profile-controller=${IMG}\n\tkustomize build config/default | kubectl apply -f -\n##@ Development\n\n# Generate manifests e.g. CRD, RBAC etc.\nmanifests: controller-gen\n\t$(CONTROLLER_GEN) crd:trivialVersions=true paths=\"./...\" output:crd:artifacts:config=config/crd/bases\n.PHONY: manifests\nmanifests: controller-gen ## Generate WebhookConfiguration, ClusterRole and CustomResourceDefinition objects.\n\t$(CONTROLLER_GEN) crd paths=\"./...\" output:crd:artifacts:config=config/crd/bases\n\t# Uncomment when we remove the permissive ClusterRoleBinding to cluster-admin\n\t# $(CONTROLLER_GEN) rbac:roleName=cluster-role-binding webhook paths=\"./...\"\n\n# Run go fmt against code\nfmt:\n.PHONY: generate\ngenerate: controller-gen ## Generate code containing DeepCopy, DeepCopyInto, and DeepCopyObject method implementations.\n\t$(CONTROLLER_GEN) object:headerFile=\"hack/boilerplate.go.txt\" paths=\"./...\"\n\t\n\n.PHONY: fmt\nfmt: ## Run go fmt against code.\n\tgo fmt ./...\n\n# Run go vet against code\nvet:\n.PHONY: vet\nvet: ## Run go vet against code.\n\tgo vet ./...\n\n# Generate code\ngenerate: controller-gen\n\t$(CONTROLLER_GEN) object:headerFile=./hack/boilerplate.go.txt paths=\"./...\"\n.PHONY: test\ntest: manifests generate fmt vet envtest ## Run tests.\n\tKUBEBUILDER_ASSETS=\"$(shell $(ENVTEST) use $(ENVTEST_K8S_VERSION) -p path)\" go test ./... -coverprofile cover.out\n\n##@ Build\n\n.PHONY: build\nbuild: generate fmt vet ## Build manager binary.\n\tgo build -o bin/manager main.go\n\n.PHONY: run\nrun: manifests generate fmt vet ## Run a controller from your host.\n\tgo run ./main.go -namespace-labels-path ./config/base/namespace-labels.yaml \n\n# Build the docker image\ndocker-build:\n\tdocker build --build-arg GOLANG_VERSION=${GOLANG_VERSION} -t ${IMG}:${TAG} .\n\t@echo Built ${IMG}:${TAG}\n.PHONY: docker-build\ndocker-build: test ## Build docker image with the manager.", + "repo_full_name": "kubeflow/kubeflow", + "discussion_comments": [ + { + "comment_id": "920964404", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 6491, + "pr_file": "components/profile-controller/Makefile", + "discussion_id": "920964404", + "commented_code": "@@ -11,65 +11,136 @@ else\n GOBIN=$(shell go env GOBIN)\n endif\n \n-all: manager\n+# Setting SHELL to bash allows bash commands to be executed by recipes.\n+# This is a requirement for 'setup-envtest.sh' in the test target.\n+# Options are set to exit when a recipe line exits non-zero or a piped command fails.\n+SHELL = /usr/bin/env bash -o pipefail\n+.SHELLFLAGS = -ec\n \n-# Run tests\n-test: generate fmt vet\n-\tgo test ./... -coverprofile cover.out\n+.PHONY: all\n+all: build\n \n-# Build manager binary\n-manager: generate fmt vet\n-\tgo build -o bin/manager main.go\n-\n-# Run against the configured Kubernetes cluster in ~/.kube/config\n-run: generate fmt vet\n-\tgo run ./main.go\n-\n-# Install CRDs into a cluster\n-install:\n-\tkustomize build config/crd | kubectl apply -f -\n+.PHONY: help\n+help: ## Display this help.\n+\t@awk 'BEGIN {FS = \":.*##\"; printf \"\\nUsage:\\n make \\033[36m\\033[0m\\n\"} /^[a-zA-Z_0-9-]+:.*?##/ { printf \" \\033[36m%-15s\\033[0m %s\\n\", $$1, $$2 } /^##@/ { printf \"\\n\\033[1m%s\\033[0m\\n\", substr($$0, 5) } ' $(MAKEFILE_LIST)\n \n-# Deploy controller in the configured Kubernetes cluster in ~/.kube/config\n-deploy: manifests\n-\tcd config/manager && kustomize edit set image gcr.io/kubeflow-images-public/profile-controller=${IMG}\n-\tkustomize build config/default | kubectl apply -f -\n+##@ Development\n \n-# Generate manifests e.g. CRD, RBAC etc.\n-manifests: controller-gen\n-\t$(CONTROLLER_GEN) crd:trivialVersions=true paths=\"./...\" output:crd:artifacts:config=config/crd/bases\n+.PHONY: manifests\n+manifests: controller-gen ## Generate WebhookConfiguration, ClusterRole and CustomResourceDefinition objects.\n+\t$(CONTROLLER_GEN) crd paths=\"./...\" output:crd:artifacts:config=config/crd/bases\n \t# Uncomment when we remove the permissive ClusterRoleBinding to cluster-admin\n \t# $(CONTROLLER_GEN) rbac:roleName=cluster-role-binding webhook paths=\"./...\"\n \n-# Run go fmt against code\n-fmt:\n+.PHONY: generate\n+generate: controller-gen ## Generate code containing DeepCopy, DeepCopyInto, and DeepCopyObject method implementations.\n+\t$(CONTROLLER_GEN) object:headerFile=\"hack/boilerplate.go.txt\" paths=\"./...\"\n+\t\n+\n+.PHONY: fmt\n+fmt: ## Run go fmt against code.\n \tgo fmt ./...\n \n-# Run go vet against code\n-vet:\n+.PHONY: vet\n+vet: ## Run go vet against code.\n \tgo vet ./...\n \n-# Generate code\n-generate: controller-gen\n-\t$(CONTROLLER_GEN) object:headerFile=./hack/boilerplate.go.txt paths=\"./...\"\n+.PHONY: test\n+test: manifests generate fmt vet envtest ## Run tests.\n+\tKUBEBUILDER_ASSETS=\"$(shell $(ENVTEST) use $(ENVTEST_K8S_VERSION) -p path)\" go test ./... -coverprofile cover.out\n+\n+##@ Build\n+\n+.PHONY: build\n+build: generate fmt vet ## Build manager binary.\n+\tgo build -o bin/manager main.go\n+\n+.PHONY: run\n+run: manifests generate fmt vet ## Run a controller from your host.\n+\tgo run ./main.go -namespace-labels-path ./config/base/namespace-labels.yaml \n \n-# Build the docker image\n-docker-build:\n-\tdocker build --build-arg GOLANG_VERSION=${GOLANG_VERSION} -t ${IMG}:${TAG} .\n-\t@echo Built ${IMG}:${TAG}\n+.PHONY: docker-build\n+docker-build: test ## Build docker image with the manager.", + "comment_created_at": "2022-07-14T09:43:16+00:00", + "comment_author": "kimwnasptd", + "comment_body": "let's remove the dependency on the `test` rule. This would require users to have a configured Go environment to build the controller. \r\n\r\nIf in the future we'd like to run these tests then we should do this inside of the Dockerfile, just like we do for the CentralDashboard", + "pr_file_module": null + }, + { + "comment_id": "920997563", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 6491, + "pr_file": "components/profile-controller/Makefile", + "discussion_id": "920964404", + "commented_code": "@@ -11,65 +11,136 @@ else\n GOBIN=$(shell go env GOBIN)\n endif\n \n-all: manager\n+# Setting SHELL to bash allows bash commands to be executed by recipes.\n+# This is a requirement for 'setup-envtest.sh' in the test target.\n+# Options are set to exit when a recipe line exits non-zero or a piped command fails.\n+SHELL = /usr/bin/env bash -o pipefail\n+.SHELLFLAGS = -ec\n \n-# Run tests\n-test: generate fmt vet\n-\tgo test ./... -coverprofile cover.out\n+.PHONY: all\n+all: build\n \n-# Build manager binary\n-manager: generate fmt vet\n-\tgo build -o bin/manager main.go\n-\n-# Run against the configured Kubernetes cluster in ~/.kube/config\n-run: generate fmt vet\n-\tgo run ./main.go\n-\n-# Install CRDs into a cluster\n-install:\n-\tkustomize build config/crd | kubectl apply -f -\n+.PHONY: help\n+help: ## Display this help.\n+\t@awk 'BEGIN {FS = \":.*##\"; printf \"\\nUsage:\\n make \\033[36m\\033[0m\\n\"} /^[a-zA-Z_0-9-]+:.*?##/ { printf \" \\033[36m%-15s\\033[0m %s\\n\", $$1, $$2 } /^##@/ { printf \"\\n\\033[1m%s\\033[0m\\n\", substr($$0, 5) } ' $(MAKEFILE_LIST)\n \n-# Deploy controller in the configured Kubernetes cluster in ~/.kube/config\n-deploy: manifests\n-\tcd config/manager && kustomize edit set image gcr.io/kubeflow-images-public/profile-controller=${IMG}\n-\tkustomize build config/default | kubectl apply -f -\n+##@ Development\n \n-# Generate manifests e.g. CRD, RBAC etc.\n-manifests: controller-gen\n-\t$(CONTROLLER_GEN) crd:trivialVersions=true paths=\"./...\" output:crd:artifacts:config=config/crd/bases\n+.PHONY: manifests\n+manifests: controller-gen ## Generate WebhookConfiguration, ClusterRole and CustomResourceDefinition objects.\n+\t$(CONTROLLER_GEN) crd paths=\"./...\" output:crd:artifacts:config=config/crd/bases\n \t# Uncomment when we remove the permissive ClusterRoleBinding to cluster-admin\n \t# $(CONTROLLER_GEN) rbac:roleName=cluster-role-binding webhook paths=\"./...\"\n \n-# Run go fmt against code\n-fmt:\n+.PHONY: generate\n+generate: controller-gen ## Generate code containing DeepCopy, DeepCopyInto, and DeepCopyObject method implementations.\n+\t$(CONTROLLER_GEN) object:headerFile=\"hack/boilerplate.go.txt\" paths=\"./...\"\n+\t\n+\n+.PHONY: fmt\n+fmt: ## Run go fmt against code.\n \tgo fmt ./...\n \n-# Run go vet against code\n-vet:\n+.PHONY: vet\n+vet: ## Run go vet against code.\n \tgo vet ./...\n \n-# Generate code\n-generate: controller-gen\n-\t$(CONTROLLER_GEN) object:headerFile=./hack/boilerplate.go.txt paths=\"./...\"\n+.PHONY: test\n+test: manifests generate fmt vet envtest ## Run tests.\n+\tKUBEBUILDER_ASSETS=\"$(shell $(ENVTEST) use $(ENVTEST_K8S_VERSION) -p path)\" go test ./... -coverprofile cover.out\n+\n+##@ Build\n+\n+.PHONY: build\n+build: generate fmt vet ## Build manager binary.\n+\tgo build -o bin/manager main.go\n+\n+.PHONY: run\n+run: manifests generate fmt vet ## Run a controller from your host.\n+\tgo run ./main.go -namespace-labels-path ./config/base/namespace-labels.yaml \n \n-# Build the docker image\n-docker-build:\n-\tdocker build --build-arg GOLANG_VERSION=${GOLANG_VERSION} -t ${IMG}:${TAG} .\n-\t@echo Built ${IMG}:${TAG}\n+.PHONY: docker-build\n+docker-build: test ## Build docker image with the manager.", + "comment_created_at": "2022-07-14T10:21:27+00:00", + "comment_author": "apo-ger", + "comment_body": "test dependency is removed", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "839931745", + "pr_number": 6374, + "pr_file": "components/notebook-controller/Makefile", + "created_at": "2022-03-31T19:02:40+00:00", + "commented_code": "# Image URL to use all building/pushing image targets\nIMG ?= gcr.io/kubeflow-images-public/notebook-controller\nIMG ?= public.ecr.aws/j1r0q0g6/notebooks/notebook-controller\nTAG ?= $(shell git describe --tags --always)\nSHELL := /bin/bash\nGOLANG_VERSION ?= 1.15\n\n# Whether to use cached images with GCB\nUSE_IMAGE_CACHE ?= true\n\n# Produce CRDs that work back to Kubernetes 1.11 (no version conversion)\nCRD_OPTIONS ?= \"crd\"", + "repo_full_name": "kubeflow/kubeflow", + "discussion_comments": [ + { + "comment_id": "839931745", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 6374, + "pr_file": "components/notebook-controller/Makefile", + "discussion_id": "839931745", + "commented_code": "@@ -1,18 +1,10 @@\n \n # Image URL to use all building/pushing image targets\n-IMG ?= gcr.io/kubeflow-images-public/notebook-controller\n+IMG ?= public.ecr.aws/j1r0q0g6/notebooks/notebook-controller\n TAG ?= $(shell git describe --tags --always)\n-SHELL := /bin/bash\n-GOLANG_VERSION ?= 1.15\n \n-# Whether to use cached images with GCB\n-USE_IMAGE_CACHE ?= true\n-\n-# Produce CRDs that work back to Kubernetes 1.11 (no version conversion)\n-CRD_OPTIONS ?= \"crd\"", + "comment_created_at": "2022-03-31T19:02:40+00:00", + "comment_author": "kimwnasptd", + "comment_body": "When I tried applying the new CRD on a cluster with the previous CRD I got the following errors back:\r\n```\r\nThe CustomResourceDefinition \"notebooks.kubeflow.org\" is invalid:\r\n* metadata.annotations: Too long: must have at most 262144 bytes\r\n* spec.preserveUnknownFields: Invalid value: true: must be false in order to use defaults in the schema\r\n```\r\n\r\nRegarding the `preserveUnknownFIelds`, could you explicitly set it to `false`?\r\n\r\nThis is also the recommended way in the docs to have compatibility: \r\nhttps://kubernetes.io/docs/tasks/extend-kubernetes/custom-resources/custom-resource-definitions/#field-pruning\r\nhttps://github.com/kubernetes-sigs/controller-tools/issues/476#issuecomment-691519936\r\n\r\nAlthough with a quick look at the kubebuilder docs I'm not 100% sure whether we should do this with a [marker](https://book.kubebuilder.io/reference/markers/crd-processing.html) or via the [makefile](https://book.kubebuilder.io/reference/generating-crd.html#multiple-versions)", + "pr_file_module": null + }, + { + "comment_id": "840440645", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 6374, + "pr_file": "components/notebook-controller/Makefile", + "discussion_id": "839931745", + "commented_code": "@@ -1,18 +1,10 @@\n \n # Image URL to use all building/pushing image targets\n-IMG ?= gcr.io/kubeflow-images-public/notebook-controller\n+IMG ?= public.ecr.aws/j1r0q0g6/notebooks/notebook-controller\n TAG ?= $(shell git describe --tags --always)\n-SHELL := /bin/bash\n-GOLANG_VERSION ?= 1.15\n \n-# Whether to use cached images with GCB\n-USE_IMAGE_CACHE ?= true\n-\n-# Produce CRDs that work back to Kubernetes 1.11 (no version conversion)\n-CRD_OPTIONS ?= \"crd\"", + "comment_created_at": "2022-04-01T10:17:54+00:00", + "comment_author": "samuelvl", + "comment_body": "Thanks for the comments! I'm starting to review them.\r\n\r\nIs `kubectl replace` instead of `kubectl apply` an option? It should fix both errors:\r\n\r\n```\r\nkubectl replace -f config/crd/bases/kubeflow.org_notebooks.yaml\r\n```\r\n", + "pr_file_module": null + }, + { + "comment_id": "850313417", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 6374, + "pr_file": "components/notebook-controller/Makefile", + "discussion_id": "839931745", + "commented_code": "@@ -1,18 +1,10 @@\n \n # Image URL to use all building/pushing image targets\n-IMG ?= gcr.io/kubeflow-images-public/notebook-controller\n+IMG ?= public.ecr.aws/j1r0q0g6/notebooks/notebook-controller\n TAG ?= $(shell git describe --tags --always)\n-SHELL := /bin/bash\n-GOLANG_VERSION ?= 1.15\n \n-# Whether to use cached images with GCB\n-USE_IMAGE_CACHE ?= true\n-\n-# Produce CRDs that work back to Kubernetes 1.11 (no version conversion)\n-CRD_OPTIONS ?= \"crd\"", + "comment_created_at": "2022-04-14T10:32:50+00:00", + "comment_author": "kimwnasptd", + "comment_body": "After looking at this again I propose that we manually \r\n1. add the `spec.preserveUnknownFields: false` just for this release\r\n2. remove this field in KF 1.7, since it's only needed for the `apiextensions.k8s.io/v1beta1` to `apiextensions.k8s.io/v1` transition.\r\n\r\nThe `controller-gen` does not allow us to explicitly set this to false https://book.kubebuilder.io/reference/markers/crd-processing.html\r\n\r\n", + "pr_file_module": null + }, + { + "comment_id": "853985090", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 6374, + "pr_file": "components/notebook-controller/Makefile", + "discussion_id": "839931745", + "commented_code": "@@ -1,18 +1,10 @@\n \n # Image URL to use all building/pushing image targets\n-IMG ?= gcr.io/kubeflow-images-public/notebook-controller\n+IMG ?= public.ecr.aws/j1r0q0g6/notebooks/notebook-controller\n TAG ?= $(shell git describe --tags --always)\n-SHELL := /bin/bash\n-GOLANG_VERSION ?= 1.15\n \n-# Whether to use cached images with GCB\n-USE_IMAGE_CACHE ?= true\n-\n-# Produce CRDs that work back to Kubernetes 1.11 (no version conversion)\n-CRD_OPTIONS ?= \"crd\"", + "comment_created_at": "2022-04-20T10:36:05+00:00", + "comment_author": "samuelvl", + "comment_body": "I have added it to the `config/crd/patches/trivial_conversion_patch.yaml` file instead of using `yq` or `sed` hacks:\r\n\r\n```yaml\r\napiVersion: apiextensions.k8s.io/v1\r\nkind: CustomResourceDefinition\r\nmetadata:\r\n name: notebooks.kubeflow.org\r\nspec:\r\n preserveUnknownFields: false # TODO: Remove in Kubeflow 1.7 release\r\n conversion:\r\n strategy: None\r\n```\r\n", + "pr_file_module": null + }, + { + "comment_id": "862437781", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 6374, + "pr_file": "components/notebook-controller/Makefile", + "discussion_id": "839931745", + "commented_code": "@@ -1,18 +1,10 @@\n \n # Image URL to use all building/pushing image targets\n-IMG ?= gcr.io/kubeflow-images-public/notebook-controller\n+IMG ?= public.ecr.aws/j1r0q0g6/notebooks/notebook-controller\n TAG ?= $(shell git describe --tags --always)\n-SHELL := /bin/bash\n-GOLANG_VERSION ?= 1.15\n \n-# Whether to use cached images with GCB\n-USE_IMAGE_CACHE ?= true\n-\n-# Produce CRDs that work back to Kubernetes 1.11 (no version conversion)\n-CRD_OPTIONS ?= \"crd\"", + "comment_created_at": "2022-05-01T07:52:29+00:00", + "comment_author": "kimwnasptd", + "comment_body": "This is awesome @samuelvl!\r\n\r\nOne final nit, I see that you've commented out the `crd` part in the manifests, which results in the CRD to not be included when generating `base` or `overlays/kubeflow` \r\n\r\nhttps://github.com/kubeflow/kubeflow/pull/6374/files#diff-28481732533c7c75d2d2ba504e9670d90e72ed98f999115fd92a0f8e8a5aace2R20\r\n\r\nCould you revert it back, so that the CRD is included as well? We should be ready to merge afterwards", + "pr_file_module": null + }, + { + "comment_id": "863698254", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 6374, + "pr_file": "components/notebook-controller/Makefile", + "discussion_id": "839931745", + "commented_code": "@@ -1,18 +1,10 @@\n \n # Image URL to use all building/pushing image targets\n-IMG ?= gcr.io/kubeflow-images-public/notebook-controller\n+IMG ?= public.ecr.aws/j1r0q0g6/notebooks/notebook-controller\n TAG ?= $(shell git describe --tags --always)\n-SHELL := /bin/bash\n-GOLANG_VERSION ?= 1.15\n \n-# Whether to use cached images with GCB\n-USE_IMAGE_CACHE ?= true\n-\n-# Produce CRDs that work back to Kubernetes 1.11 (no version conversion)\n-CRD_OPTIONS ?= \"crd\"", + "comment_created_at": "2022-05-03T11:55:24+00:00", + "comment_author": "samuelvl", + "comment_body": "I commented the `crd` part to avoid this issue (https://github.com/prometheus-community/helm-charts/issues/1500 is similar): \r\n\r\n```\r\nThe CustomResourceDefinition \"notebooks.kubeflow.org\" is invalid: metadata.annotations: Too long: must have at most 262144 bytes\r\n``` \r\n\r\nI have reverted the original logic and added the `maxDescLen=0` option when generating the crd to avoid this issue, however we'll miss some information when using the `kubectl explain notebook` command:\r\n\r\n```\r\n$(CONTROLLER_GEN) rbac:roleName=role crd:maxDescLen=0 ...\r\n```", + "pr_file_module": null + }, + { + "comment_id": "863922630", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 6374, + "pr_file": "components/notebook-controller/Makefile", + "discussion_id": "839931745", + "commented_code": "@@ -1,18 +1,10 @@\n \n # Image URL to use all building/pushing image targets\n-IMG ?= gcr.io/kubeflow-images-public/notebook-controller\n+IMG ?= public.ecr.aws/j1r0q0g6/notebooks/notebook-controller\n TAG ?= $(shell git describe --tags --always)\n-SHELL := /bin/bash\n-GOLANG_VERSION ?= 1.15\n \n-# Whether to use cached images with GCB\n-USE_IMAGE_CACHE ?= true\n-\n-# Produce CRDs that work back to Kubernetes 1.11 (no version conversion)\n-CRD_OPTIONS ?= \"crd\"", + "comment_created_at": "2022-05-03T15:47:44+00:00", + "comment_author": "kimwnasptd", + "comment_body": "Hmm, indeed in this PR we made the CRD quite bigger since we now actually include the full PodSpec for validation.\r\n\r\nBut the `maxDescLen=0` seems like a good way to cut down significantly the size of it. The spec is currently only the PodSpec, for which descriptions are widely available so we are OK.\r\n\r\nGood job @samuelvl!", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "839948826", + "pr_number": 6374, + "pr_file": "components/notebook-controller/Makefile", + "created_at": "2022-03-31T19:26:41+00:00", + "commented_code": "GOBIN=$(shell go env GOBIN)\nendif\n\n# Setting SHELL to bash allows bash commands to be executed by recipes.\n# This is a requirement for 'setup-envtest.sh' in the test target.\n# Options are set to exit when a recipe line exits non-zero or a piped command fails.\nSHELL = /usr/bin/env bash -o pipefail\n.SHELLFLAGS = -ec\n\n.PHONY: all\nall: manager\n\n# check license\ncheck-license:\n\t./third_party/check-license.sh\n##@ General\n\n# Run tests\ntest: generate fmt vet manifests\n\tmkdir -p ${ENVTEST_ASSETS_DIR}\n\ttest -f ${ENVTEST_ASSETS_DIR}/setup-envtest.sh || curl -sSLo ${ENVTEST_ASSETS_DIR}/setup-envtest.sh https://raw.githubusercontent.com/kubernetes-sigs/controller-runtime/a9bd9117a77a2f84bbc546e28991136fe0000dc0/hack/setup-envtest.sh\n\tsource ${ENVTEST_ASSETS_DIR}/setup-envtest.sh; fetch_envtest_tools $(ENVTEST_ASSETS_DIR); setup_envtest_env $(ENVTEST_ASSETS_DIR);\n\tgo test ./api/... ./controllers/... -coverprofile cover.out\n# The help target prints out all targets with their descriptions organized\n# beneath their categories. The categories are represented by '##@' and the\n# target descriptions by '##'. The awk commands is responsible for reading the\n# entire set of makefiles included in this invocation, looking for lines of the\n# file as xyz: ## something, and then pretty-format the target and help. Then,\n# if there's a line with ##@ something, that gets pretty-printed as a category.\n# More info on the usage of ANSI control characters for terminal formatting:\n# https://en.wikipedia.org/wiki/ANSI_escape_code#SGR_parameters\n# More info on the awk command:\n# http://linuxcommand.org/lc3_adv_awk.php\n\n# Build manager binary\nmanager: generate fmt vet\n\tgo build -o bin/manager main.go\n.PHONY: help\nhelp: ## Display this help.\n\t@awk 'BEGIN {FS = \":.*##\"; printf \"\\nUsage:\\n make \\033[36m\\033[0m\\n\"} /^[a-zA-Z_0-9-]+:.*?##/ { printf \" \\033[36m%-15s\\033[0m %s\\n\", $$1, $$2 } /^##@/ { printf \"\\n\\033[1m%s\\033[0m\\n\", substr($$0, 5) } ' $(MAKEFILE_LIST)\n\n# Run against the configured Kubernetes cluster in ~/.kube/config\nrun: generate fmt vet\n\tgo run ./main.go\n##@ Development\n\n# Install CRDs into a cluster\ninstall: manifests\n\tkubectl apply -f config/crd/bases\n.PHONY: check-license\ncheck-license: ## Check third-party license\n\t./third_party/check-license.sh\n\n# Deploy controller in the configured Kubernetes cluster in ~/.kube/config\ndeploy: manifests\n\tkubectl apply -f config/crd/bases\n\tkustomize build config/default | kubectl apply -f -\n.PHONY: manifests\nmanifests: controller-gen ## Generate WebhookConfiguration, ClusterRole and CustomResourceDefinition objects.\n\t$(CONTROLLER_GEN) rbac:roleName=role crd webhook paths=\"./...\" output:crd:artifacts:config=config/crd/bases\n\n# Generate manifests e.g. CRD, RBAC etc.\nmanifests: controller-gen\n\t$(CONTROLLER_GEN) $(CRD_OPTIONS) rbac:roleName=role webhook paths=\"./...\" output:crd:artifacts:config=config/crd/bases\n.PHONY: generate\ngenerate: controller-gen ## Generate code containing DeepCopy, DeepCopyInto, and DeepCopyObject method implementations.\n\t$(CONTROLLER_GEN) object:headerFile=\"hack/boilerplate.go.txt\" paths=\"./...\"\n\n# Run go fmt against code\nfmt:\n.PHONY: fmt\nfmt: ## Run go fmt against code.\n\tgo fmt ./...\n\n# Run go vet against code\nvet:\n.PHONY: vet\nvet: ## Run go vet against code.\n\tgo vet ./...\n\n# Generate code\ngenerate: controller-gen\n\t$(CONTROLLER_GEN) object:headerFile=./hack/boilerplate.go.txt paths=./api/...\n.PHONY: test\ntest: manifests generate fmt vet envtest ## Run tests.\n\tKUBEBUILDER_ASSETS=\"$(shell $(ENVTEST) use $(ENVTEST_K8S_VERSION) -p path)\" go test ./... -coverprofile cover.out\n\n##@ Build\n\n# Build the docker image\ndocker-build: test\n.PHONY: manager\nmanager: generate fmt vet ## Build manager binary.\n\tgo build -o bin/manager main.go\n\n.PHONY: run\nrun: manifests generate fmt vet ## Run a controller from your host.\n\tgo run ./main.go\n\n.PHONY: docker-build\ndocker-build: test ## Build docker image with the manager.\n\tcd .. && docker build . -t ${IMG}:${TAG} -f ./notebook-controller/Dockerfile\n\t@echo \"updating kustomize image patch file for manager resource\"\n\tsed -i'' -e 's@image: .*@image: '\"${IMG}:${TAG}\"'@' ./config/default/manager_image_patch.yaml\n\n# Push the docker image\ndocker-push:\n.PHONY: docker-push\ndocker-push: ## Push docker image with the manager.\n\tdocker push ${IMG}:${TAG}\n\n# find or download controller-gen\n# download controller-gen if necessary\ncontroller-gen:\nifeq (, $(shell which controller-gen))\n\tgo get sigs.k8s.io/controller-tools/cmd/controller-gen@v0.2.0\nCONTROLLER_GEN=$(GOBIN)/controller-gen\nelse\nCONTROLLER_GEN=$(shell which controller-gen)\nendif\n##@ Deployment\n\n# TODO(jlewi): Can we get rid of this and just use skaffold?\nbuild-gcr: test\n\tdocker build -t $(IMG):$(TAG) .\n\t@echo Built $(IMG):$(TAG)\nifndef ignore-not-found\n ignore-not-found = false\nendif\n\npush-gcr: build-gcr\n\tdocker push $(IMG):$(TAG)\n\t@echo Pushed $(IMG):$(TAG)\n.PHONY: install\ninstall: manifests kustomize ## Install CRDs into the K8s cluster specified in ~/.kube/config.\n\tkubectl get crd notebooks.kubeflow.org || $(KUSTOMIZE) build config/crd | kubectl create -f -\n\n.PHONY: uninstall\nuninstall: manifests kustomize ## Uninstall CRDs from the K8s cluster specified in ~/.kube/config. Call with ignore-not-found=true to ignore resource not found errors during deletion.\n\t$(KUSTOMIZE) build config/crd | kubectl delete --ignore-not-found=$(ignore-not-found) -f -\n\n.PHONY: deploy\ndeploy: manifests kustomize ## Deploy controller to the K8s cluster specified in ~/.kube/config.\n\tsed -i'' -e 's@newName: .*@newName: '\"${IMG}\"'@' ./config/base/kustomization.yaml\n\tsed -i'' -e 's@newTag: .*@newTag: '\"${TAG}\"'@' ./config/base/kustomization.yaml\n\t$(KUSTOMIZE) build config/base | kubectl apply -f -\n\n.PHONY: undeploy\nundeploy: ## Undeploy controller from the K8s cluster specified in ~/.kube/config. Call with ignore-not-found=true to ignore resource not found errors during deletion.\n\t$(KUSTOMIZE) build config/base | kubectl delete --ignore-not-found=$(ignore-not-found) -f -\n\nCONTROLLER_GEN = $(shell pwd)/bin/controller-gen\n.PHONY: controller-gen\ncontroller-gen: ## Download controller-gen locally if necessary.\n\t$(call go-get-tool,$(CONTROLLER_GEN),sigs.k8s.io/controller-tools/cmd/controller-gen@v0.8.0)\n\nKUSTOMIZE = $(shell pwd)/bin/kustomize\n.PHONY: kustomize\nkustomize: ## Download kustomize locally if necessary.\n\t$(call go-get-tool,$(KUSTOMIZE),sigs.k8s.io/kustomize/kustomize/v3@v3.8.7)", + "repo_full_name": "kubeflow/kubeflow", + "discussion_comments": [ + { + "comment_id": "839948826", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 6374, + "pr_file": "components/notebook-controller/Makefile", + "discussion_id": "839948826", + "commented_code": "@@ -21,77 +13,125 @@ else\n GOBIN=$(shell go env GOBIN)\n endif\n \n+# Setting SHELL to bash allows bash commands to be executed by recipes.\n+# This is a requirement for 'setup-envtest.sh' in the test target.\n+# Options are set to exit when a recipe line exits non-zero or a piped command fails.\n+SHELL = /usr/bin/env bash -o pipefail\n+.SHELLFLAGS = -ec\n+\n+.PHONY: all\n all: manager\n \n-# check license\n-check-license:\n-\t./third_party/check-license.sh\n+##@ General\n \n-# Run tests\n-test: generate fmt vet manifests\n-\tmkdir -p ${ENVTEST_ASSETS_DIR}\n-\ttest -f ${ENVTEST_ASSETS_DIR}/setup-envtest.sh || curl -sSLo ${ENVTEST_ASSETS_DIR}/setup-envtest.sh https://raw.githubusercontent.com/kubernetes-sigs/controller-runtime/a9bd9117a77a2f84bbc546e28991136fe0000dc0/hack/setup-envtest.sh\n-\tsource ${ENVTEST_ASSETS_DIR}/setup-envtest.sh; fetch_envtest_tools $(ENVTEST_ASSETS_DIR); setup_envtest_env $(ENVTEST_ASSETS_DIR);\n-\tgo test ./api/... ./controllers/... -coverprofile cover.out\n+# The help target prints out all targets with their descriptions organized\n+# beneath their categories. The categories are represented by '##@' and the\n+# target descriptions by '##'. The awk commands is responsible for reading the\n+# entire set of makefiles included in this invocation, looking for lines of the\n+# file as xyz: ## something, and then pretty-format the target and help. Then,\n+# if there's a line with ##@ something, that gets pretty-printed as a category.\n+# More info on the usage of ANSI control characters for terminal formatting:\n+# https://en.wikipedia.org/wiki/ANSI_escape_code#SGR_parameters\n+# More info on the awk command:\n+# http://linuxcommand.org/lc3_adv_awk.php\n \n-# Build manager binary\n-manager: generate fmt vet\n-\tgo build -o bin/manager main.go\n+.PHONY: help\n+help: ## Display this help.\n+\t@awk 'BEGIN {FS = \":.*##\"; printf \"\\nUsage:\\n make \\033[36m\\033[0m\\n\"} /^[a-zA-Z_0-9-]+:.*?##/ { printf \" \\033[36m%-15s\\033[0m %s\\n\", $$1, $$2 } /^##@/ { printf \"\\n\\033[1m%s\\033[0m\\n\", substr($$0, 5) } ' $(MAKEFILE_LIST)\n \n-# Run against the configured Kubernetes cluster in ~/.kube/config\n-run: generate fmt vet\n-\tgo run ./main.go\n+##@ Development\n \n-# Install CRDs into a cluster\n-install: manifests\n-\tkubectl apply -f config/crd/bases\n+.PHONY: check-license\n+check-license: ## Check third-party license\n+\t./third_party/check-license.sh\n \n-# Deploy controller in the configured Kubernetes cluster in ~/.kube/config\n-deploy: manifests\n-\tkubectl apply -f config/crd/bases\n-\tkustomize build config/default | kubectl apply -f -\n+.PHONY: manifests\n+manifests: controller-gen ## Generate WebhookConfiguration, ClusterRole and CustomResourceDefinition objects.\n+\t$(CONTROLLER_GEN) rbac:roleName=role crd webhook paths=\"./...\" output:crd:artifacts:config=config/crd/bases\n \n-# Generate manifests e.g. CRD, RBAC etc.\n-manifests: controller-gen\n-\t$(CONTROLLER_GEN) $(CRD_OPTIONS) rbac:roleName=role webhook paths=\"./...\" output:crd:artifacts:config=config/crd/bases\n+.PHONY: generate\n+generate: controller-gen ## Generate code containing DeepCopy, DeepCopyInto, and DeepCopyObject method implementations.\n+\t$(CONTROLLER_GEN) object:headerFile=\"hack/boilerplate.go.txt\" paths=\"./...\"\n \n-# Run go fmt against code\n-fmt:\n+.PHONY: fmt\n+fmt: ## Run go fmt against code.\n \tgo fmt ./...\n \n-# Run go vet against code\n-vet:\n+.PHONY: vet\n+vet: ## Run go vet against code.\n \tgo vet ./...\n \n-# Generate code\n-generate: controller-gen\n-\t$(CONTROLLER_GEN) object:headerFile=./hack/boilerplate.go.txt paths=./api/...\n+.PHONY: test\n+test: manifests generate fmt vet envtest ## Run tests.\n+\tKUBEBUILDER_ASSETS=\"$(shell $(ENVTEST) use $(ENVTEST_K8S_VERSION) -p path)\" go test ./... -coverprofile cover.out\n+\n+##@ Build\n \n-# Build the docker image\n-docker-build: test\n+.PHONY: manager\n+manager: generate fmt vet ## Build manager binary.\n+\tgo build -o bin/manager main.go\n+\n+.PHONY: run\n+run: manifests generate fmt vet ## Run a controller from your host.\n+\tgo run ./main.go\n+\n+.PHONY: docker-build\n+docker-build: test ## Build docker image with the manager.\n \tcd .. && docker build . -t ${IMG}:${TAG} -f ./notebook-controller/Dockerfile\n-\t@echo \"updating kustomize image patch file for manager resource\"\n-\tsed -i'' -e 's@image: .*@image: '\"${IMG}:${TAG}\"'@' ./config/default/manager_image_patch.yaml\n \n-# Push the docker image\n-docker-push:\n+.PHONY: docker-push\n+docker-push: ## Push docker image with the manager.\n \tdocker push ${IMG}:${TAG}\n \n-# find or download controller-gen\n-# download controller-gen if necessary\n-controller-gen:\n-ifeq (, $(shell which controller-gen))\n-\tgo get sigs.k8s.io/controller-tools/cmd/controller-gen@v0.2.0\n-CONTROLLER_GEN=$(GOBIN)/controller-gen\n-else\n-CONTROLLER_GEN=$(shell which controller-gen)\n-endif\n+##@ Deployment\n \n-# TODO(jlewi): Can we get rid of this and just use skaffold?\n-build-gcr: test\n-\tdocker build -t $(IMG):$(TAG) .\n-\t@echo Built $(IMG):$(TAG)\n+ifndef ignore-not-found\n+ ignore-not-found = false\n+endif\n \n-push-gcr: build-gcr\n-\tdocker push $(IMG):$(TAG)\n-\t@echo Pushed $(IMG):$(TAG)\n+.PHONY: install\n+install: manifests kustomize ## Install CRDs into the K8s cluster specified in ~/.kube/config.\n+\tkubectl get crd notebooks.kubeflow.org || $(KUSTOMIZE) build config/crd | kubectl create -f -\n+\n+.PHONY: uninstall\n+uninstall: manifests kustomize ## Uninstall CRDs from the K8s cluster specified in ~/.kube/config. Call with ignore-not-found=true to ignore resource not found errors during deletion.\n+\t$(KUSTOMIZE) build config/crd | kubectl delete --ignore-not-found=$(ignore-not-found) -f -\n+\n+.PHONY: deploy\n+deploy: manifests kustomize ## Deploy controller to the K8s cluster specified in ~/.kube/config.\n+\tsed -i'' -e 's@newName: .*@newName: '\"${IMG}\"'@' ./config/base/kustomization.yaml\n+\tsed -i'' -e 's@newTag: .*@newTag: '\"${TAG}\"'@' ./config/base/kustomization.yaml\n+\t$(KUSTOMIZE) build config/base | kubectl apply -f -\n+\n+.PHONY: undeploy\n+undeploy: ## Undeploy controller from the K8s cluster specified in ~/.kube/config. Call with ignore-not-found=true to ignore resource not found errors during deletion.\n+\t$(KUSTOMIZE) build config/base | kubectl delete --ignore-not-found=$(ignore-not-found) -f -\n+\n+CONTROLLER_GEN = $(shell pwd)/bin/controller-gen\n+.PHONY: controller-gen\n+controller-gen: ## Download controller-gen locally if necessary.\n+\t$(call go-get-tool,$(CONTROLLER_GEN),sigs.k8s.io/controller-tools/cmd/controller-gen@v0.8.0)\n+\n+KUSTOMIZE = $(shell pwd)/bin/kustomize\n+.PHONY: kustomize\n+kustomize: ## Download kustomize locally if necessary.\n+\t$(call go-get-tool,$(KUSTOMIZE),sigs.k8s.io/kustomize/kustomize/v3@v3.8.7)", + "comment_created_at": "2022-03-31T19:26:41+00:00", + "comment_author": "kimwnasptd", + "comment_body": "I tried to `kustomize build config/crd` with a 3.2 version, since this was the one [used from manifests](https://github.com/kubeflow/manifests#prerequisites) but I got the following errors:\r\n```\r\nError: no matches for OriginalId apiextensions.k8s.io_v1beta1_CustomResourceDefinition|~X|notebooks.kubeflow.org; no matches for CurrentId apiextensions.k8s.io_v1beta1_CustomResourceDefinition|~X|notebooks.kubeflow.org; failed to find unique target for patch apiextensions.k8s.io_v1beta1_CustomResourceDefinition|notebooks.kubeflow.org\r\n``` \r\n\r\nDo you know why these occur? They were not shows in the previous iteration of the controller, so maybe something slightly changed in the autogenerated manifests?\r\n\r\nIf I use the 3.8 version from the makefile this error goes away. Also it's not shown even with 3.2 when I build the `base`, so it's not that critical. But let's try to understand why this happens. I'll try to look into it within the next days as well", + "pr_file_module": null + }, + { + "comment_id": "840777413", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 6374, + "pr_file": "components/notebook-controller/Makefile", + "discussion_id": "839948826", + "commented_code": "@@ -21,77 +13,125 @@ else\n GOBIN=$(shell go env GOBIN)\n endif\n \n+# Setting SHELL to bash allows bash commands to be executed by recipes.\n+# This is a requirement for 'setup-envtest.sh' in the test target.\n+# Options are set to exit when a recipe line exits non-zero or a piped command fails.\n+SHELL = /usr/bin/env bash -o pipefail\n+.SHELLFLAGS = -ec\n+\n+.PHONY: all\n all: manager\n \n-# check license\n-check-license:\n-\t./third_party/check-license.sh\n+##@ General\n \n-# Run tests\n-test: generate fmt vet manifests\n-\tmkdir -p ${ENVTEST_ASSETS_DIR}\n-\ttest -f ${ENVTEST_ASSETS_DIR}/setup-envtest.sh || curl -sSLo ${ENVTEST_ASSETS_DIR}/setup-envtest.sh https://raw.githubusercontent.com/kubernetes-sigs/controller-runtime/a9bd9117a77a2f84bbc546e28991136fe0000dc0/hack/setup-envtest.sh\n-\tsource ${ENVTEST_ASSETS_DIR}/setup-envtest.sh; fetch_envtest_tools $(ENVTEST_ASSETS_DIR); setup_envtest_env $(ENVTEST_ASSETS_DIR);\n-\tgo test ./api/... ./controllers/... -coverprofile cover.out\n+# The help target prints out all targets with their descriptions organized\n+# beneath their categories. The categories are represented by '##@' and the\n+# target descriptions by '##'. The awk commands is responsible for reading the\n+# entire set of makefiles included in this invocation, looking for lines of the\n+# file as xyz: ## something, and then pretty-format the target and help. Then,\n+# if there's a line with ##@ something, that gets pretty-printed as a category.\n+# More info on the usage of ANSI control characters for terminal formatting:\n+# https://en.wikipedia.org/wiki/ANSI_escape_code#SGR_parameters\n+# More info on the awk command:\n+# http://linuxcommand.org/lc3_adv_awk.php\n \n-# Build manager binary\n-manager: generate fmt vet\n-\tgo build -o bin/manager main.go\n+.PHONY: help\n+help: ## Display this help.\n+\t@awk 'BEGIN {FS = \":.*##\"; printf \"\\nUsage:\\n make \\033[36m\\033[0m\\n\"} /^[a-zA-Z_0-9-]+:.*?##/ { printf \" \\033[36m%-15s\\033[0m %s\\n\", $$1, $$2 } /^##@/ { printf \"\\n\\033[1m%s\\033[0m\\n\", substr($$0, 5) } ' $(MAKEFILE_LIST)\n \n-# Run against the configured Kubernetes cluster in ~/.kube/config\n-run: generate fmt vet\n-\tgo run ./main.go\n+##@ Development\n \n-# Install CRDs into a cluster\n-install: manifests\n-\tkubectl apply -f config/crd/bases\n+.PHONY: check-license\n+check-license: ## Check third-party license\n+\t./third_party/check-license.sh\n \n-# Deploy controller in the configured Kubernetes cluster in ~/.kube/config\n-deploy: manifests\n-\tkubectl apply -f config/crd/bases\n-\tkustomize build config/default | kubectl apply -f -\n+.PHONY: manifests\n+manifests: controller-gen ## Generate WebhookConfiguration, ClusterRole and CustomResourceDefinition objects.\n+\t$(CONTROLLER_GEN) rbac:roleName=role crd webhook paths=\"./...\" output:crd:artifacts:config=config/crd/bases\n \n-# Generate manifests e.g. CRD, RBAC etc.\n-manifests: controller-gen\n-\t$(CONTROLLER_GEN) $(CRD_OPTIONS) rbac:roleName=role webhook paths=\"./...\" output:crd:artifacts:config=config/crd/bases\n+.PHONY: generate\n+generate: controller-gen ## Generate code containing DeepCopy, DeepCopyInto, and DeepCopyObject method implementations.\n+\t$(CONTROLLER_GEN) object:headerFile=\"hack/boilerplate.go.txt\" paths=\"./...\"\n \n-# Run go fmt against code\n-fmt:\n+.PHONY: fmt\n+fmt: ## Run go fmt against code.\n \tgo fmt ./...\n \n-# Run go vet against code\n-vet:\n+.PHONY: vet\n+vet: ## Run go vet against code.\n \tgo vet ./...\n \n-# Generate code\n-generate: controller-gen\n-\t$(CONTROLLER_GEN) object:headerFile=./hack/boilerplate.go.txt paths=./api/...\n+.PHONY: test\n+test: manifests generate fmt vet envtest ## Run tests.\n+\tKUBEBUILDER_ASSETS=\"$(shell $(ENVTEST) use $(ENVTEST_K8S_VERSION) -p path)\" go test ./... -coverprofile cover.out\n+\n+##@ Build\n \n-# Build the docker image\n-docker-build: test\n+.PHONY: manager\n+manager: generate fmt vet ## Build manager binary.\n+\tgo build -o bin/manager main.go\n+\n+.PHONY: run\n+run: manifests generate fmt vet ## Run a controller from your host.\n+\tgo run ./main.go\n+\n+.PHONY: docker-build\n+docker-build: test ## Build docker image with the manager.\n \tcd .. && docker build . -t ${IMG}:${TAG} -f ./notebook-controller/Dockerfile\n-\t@echo \"updating kustomize image patch file for manager resource\"\n-\tsed -i'' -e 's@image: .*@image: '\"${IMG}:${TAG}\"'@' ./config/default/manager_image_patch.yaml\n \n-# Push the docker image\n-docker-push:\n+.PHONY: docker-push\n+docker-push: ## Push docker image with the manager.\n \tdocker push ${IMG}:${TAG}\n \n-# find or download controller-gen\n-# download controller-gen if necessary\n-controller-gen:\n-ifeq (, $(shell which controller-gen))\n-\tgo get sigs.k8s.io/controller-tools/cmd/controller-gen@v0.2.0\n-CONTROLLER_GEN=$(GOBIN)/controller-gen\n-else\n-CONTROLLER_GEN=$(shell which controller-gen)\n-endif\n+##@ Deployment\n \n-# TODO(jlewi): Can we get rid of this and just use skaffold?\n-build-gcr: test\n-\tdocker build -t $(IMG):$(TAG) .\n-\t@echo Built $(IMG):$(TAG)\n+ifndef ignore-not-found\n+ ignore-not-found = false\n+endif\n \n-push-gcr: build-gcr\n-\tdocker push $(IMG):$(TAG)\n-\t@echo Pushed $(IMG):$(TAG)\n+.PHONY: install\n+install: manifests kustomize ## Install CRDs into the K8s cluster specified in ~/.kube/config.\n+\tkubectl get crd notebooks.kubeflow.org || $(KUSTOMIZE) build config/crd | kubectl create -f -\n+\n+.PHONY: uninstall\n+uninstall: manifests kustomize ## Uninstall CRDs from the K8s cluster specified in ~/.kube/config. Call with ignore-not-found=true to ignore resource not found errors during deletion.\n+\t$(KUSTOMIZE) build config/crd | kubectl delete --ignore-not-found=$(ignore-not-found) -f -\n+\n+.PHONY: deploy\n+deploy: manifests kustomize ## Deploy controller to the K8s cluster specified in ~/.kube/config.\n+\tsed -i'' -e 's@newName: .*@newName: '\"${IMG}\"'@' ./config/base/kustomization.yaml\n+\tsed -i'' -e 's@newTag: .*@newTag: '\"${TAG}\"'@' ./config/base/kustomization.yaml\n+\t$(KUSTOMIZE) build config/base | kubectl apply -f -\n+\n+.PHONY: undeploy\n+undeploy: ## Undeploy controller from the K8s cluster specified in ~/.kube/config. Call with ignore-not-found=true to ignore resource not found errors during deletion.\n+\t$(KUSTOMIZE) build config/base | kubectl delete --ignore-not-found=$(ignore-not-found) -f -\n+\n+CONTROLLER_GEN = $(shell pwd)/bin/controller-gen\n+.PHONY: controller-gen\n+controller-gen: ## Download controller-gen locally if necessary.\n+\t$(call go-get-tool,$(CONTROLLER_GEN),sigs.k8s.io/controller-tools/cmd/controller-gen@v0.8.0)\n+\n+KUSTOMIZE = $(shell pwd)/bin/kustomize\n+.PHONY: kustomize\n+kustomize: ## Download kustomize locally if necessary.\n+\t$(call go-get-tool,$(KUSTOMIZE),sigs.k8s.io/kustomize/kustomize/v3@v3.8.7)", + "comment_created_at": "2022-04-01T17:14:26+00:00", + "comment_author": "samuelvl", + "comment_body": "It's failing because there is a patch trying to modify the old v1beta1 CRD but there is no match (obviously):\r\n\r\n```\r\npatchesJson6902:\r\n# Remove once the following issue is resolved:\r\n# https://github.com/kubeflow/kubeflow/issues/5722\r\n- path: patches/old_crd.yaml\r\n target:\r\n group: apiextensions.k8s.io\r\n version: v1beta1 # <--------- This should be v1\r\n kind: CustomResourceDefinition\r\n name: notebooks.kubeflow.org\r\n ```\r\n \r\nSince https://github.com/kubeflow/kubeflow/issues/5722 is resolved I think the best solution is to remove the entire patch block, what do you think?\r\n\r\nIf we are using Kustomize 3.2 in manifests I can update the Makefile to download that version to be aligned, that may save some compatibility issues in the future.", + "pr_file_module": null + }, + { + "comment_id": "850178203", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 6374, + "pr_file": "components/notebook-controller/Makefile", + "discussion_id": "839948826", + "commented_code": "@@ -21,77 +13,125 @@ else\n GOBIN=$(shell go env GOBIN)\n endif\n \n+# Setting SHELL to bash allows bash commands to be executed by recipes.\n+# This is a requirement for 'setup-envtest.sh' in the test target.\n+# Options are set to exit when a recipe line exits non-zero or a piped command fails.\n+SHELL = /usr/bin/env bash -o pipefail\n+.SHELLFLAGS = -ec\n+\n+.PHONY: all\n all: manager\n \n-# check license\n-check-license:\n-\t./third_party/check-license.sh\n+##@ General\n \n-# Run tests\n-test: generate fmt vet manifests\n-\tmkdir -p ${ENVTEST_ASSETS_DIR}\n-\ttest -f ${ENVTEST_ASSETS_DIR}/setup-envtest.sh || curl -sSLo ${ENVTEST_ASSETS_DIR}/setup-envtest.sh https://raw.githubusercontent.com/kubernetes-sigs/controller-runtime/a9bd9117a77a2f84bbc546e28991136fe0000dc0/hack/setup-envtest.sh\n-\tsource ${ENVTEST_ASSETS_DIR}/setup-envtest.sh; fetch_envtest_tools $(ENVTEST_ASSETS_DIR); setup_envtest_env $(ENVTEST_ASSETS_DIR);\n-\tgo test ./api/... ./controllers/... -coverprofile cover.out\n+# The help target prints out all targets with their descriptions organized\n+# beneath their categories. The categories are represented by '##@' and the\n+# target descriptions by '##'. The awk commands is responsible for reading the\n+# entire set of makefiles included in this invocation, looking for lines of the\n+# file as xyz: ## something, and then pretty-format the target and help. Then,\n+# if there's a line with ##@ something, that gets pretty-printed as a category.\n+# More info on the usage of ANSI control characters for terminal formatting:\n+# https://en.wikipedia.org/wiki/ANSI_escape_code#SGR_parameters\n+# More info on the awk command:\n+# http://linuxcommand.org/lc3_adv_awk.php\n \n-# Build manager binary\n-manager: generate fmt vet\n-\tgo build -o bin/manager main.go\n+.PHONY: help\n+help: ## Display this help.\n+\t@awk 'BEGIN {FS = \":.*##\"; printf \"\\nUsage:\\n make \\033[36m\\033[0m\\n\"} /^[a-zA-Z_0-9-]+:.*?##/ { printf \" \\033[36m%-15s\\033[0m %s\\n\", $$1, $$2 } /^##@/ { printf \"\\n\\033[1m%s\\033[0m\\n\", substr($$0, 5) } ' $(MAKEFILE_LIST)\n \n-# Run against the configured Kubernetes cluster in ~/.kube/config\n-run: generate fmt vet\n-\tgo run ./main.go\n+##@ Development\n \n-# Install CRDs into a cluster\n-install: manifests\n-\tkubectl apply -f config/crd/bases\n+.PHONY: check-license\n+check-license: ## Check third-party license\n+\t./third_party/check-license.sh\n \n-# Deploy controller in the configured Kubernetes cluster in ~/.kube/config\n-deploy: manifests\n-\tkubectl apply -f config/crd/bases\n-\tkustomize build config/default | kubectl apply -f -\n+.PHONY: manifests\n+manifests: controller-gen ## Generate WebhookConfiguration, ClusterRole and CustomResourceDefinition objects.\n+\t$(CONTROLLER_GEN) rbac:roleName=role crd webhook paths=\"./...\" output:crd:artifacts:config=config/crd/bases\n \n-# Generate manifests e.g. CRD, RBAC etc.\n-manifests: controller-gen\n-\t$(CONTROLLER_GEN) $(CRD_OPTIONS) rbac:roleName=role webhook paths=\"./...\" output:crd:artifacts:config=config/crd/bases\n+.PHONY: generate\n+generate: controller-gen ## Generate code containing DeepCopy, DeepCopyInto, and DeepCopyObject method implementations.\n+\t$(CONTROLLER_GEN) object:headerFile=\"hack/boilerplate.go.txt\" paths=\"./...\"\n \n-# Run go fmt against code\n-fmt:\n+.PHONY: fmt\n+fmt: ## Run go fmt against code.\n \tgo fmt ./...\n \n-# Run go vet against code\n-vet:\n+.PHONY: vet\n+vet: ## Run go vet against code.\n \tgo vet ./...\n \n-# Generate code\n-generate: controller-gen\n-\t$(CONTROLLER_GEN) object:headerFile=./hack/boilerplate.go.txt paths=./api/...\n+.PHONY: test\n+test: manifests generate fmt vet envtest ## Run tests.\n+\tKUBEBUILDER_ASSETS=\"$(shell $(ENVTEST) use $(ENVTEST_K8S_VERSION) -p path)\" go test ./... -coverprofile cover.out\n+\n+##@ Build\n \n-# Build the docker image\n-docker-build: test\n+.PHONY: manager\n+manager: generate fmt vet ## Build manager binary.\n+\tgo build -o bin/manager main.go\n+\n+.PHONY: run\n+run: manifests generate fmt vet ## Run a controller from your host.\n+\tgo run ./main.go\n+\n+.PHONY: docker-build\n+docker-build: test ## Build docker image with the manager.\n \tcd .. && docker build . -t ${IMG}:${TAG} -f ./notebook-controller/Dockerfile\n-\t@echo \"updating kustomize image patch file for manager resource\"\n-\tsed -i'' -e 's@image: .*@image: '\"${IMG}:${TAG}\"'@' ./config/default/manager_image_patch.yaml\n \n-# Push the docker image\n-docker-push:\n+.PHONY: docker-push\n+docker-push: ## Push docker image with the manager.\n \tdocker push ${IMG}:${TAG}\n \n-# find or download controller-gen\n-# download controller-gen if necessary\n-controller-gen:\n-ifeq (, $(shell which controller-gen))\n-\tgo get sigs.k8s.io/controller-tools/cmd/controller-gen@v0.2.0\n-CONTROLLER_GEN=$(GOBIN)/controller-gen\n-else\n-CONTROLLER_GEN=$(shell which controller-gen)\n-endif\n+##@ Deployment\n \n-# TODO(jlewi): Can we get rid of this and just use skaffold?\n-build-gcr: test\n-\tdocker build -t $(IMG):$(TAG) .\n-\t@echo Built $(IMG):$(TAG)\n+ifndef ignore-not-found\n+ ignore-not-found = false\n+endif\n \n-push-gcr: build-gcr\n-\tdocker push $(IMG):$(TAG)\n-\t@echo Pushed $(IMG):$(TAG)\n+.PHONY: install\n+install: manifests kustomize ## Install CRDs into the K8s cluster specified in ~/.kube/config.\n+\tkubectl get crd notebooks.kubeflow.org || $(KUSTOMIZE) build config/crd | kubectl create -f -\n+\n+.PHONY: uninstall\n+uninstall: manifests kustomize ## Uninstall CRDs from the K8s cluster specified in ~/.kube/config. Call with ignore-not-found=true to ignore resource not found errors during deletion.\n+\t$(KUSTOMIZE) build config/crd | kubectl delete --ignore-not-found=$(ignore-not-found) -f -\n+\n+.PHONY: deploy\n+deploy: manifests kustomize ## Deploy controller to the K8s cluster specified in ~/.kube/config.\n+\tsed -i'' -e 's@newName: .*@newName: '\"${IMG}\"'@' ./config/base/kustomization.yaml\n+\tsed -i'' -e 's@newTag: .*@newTag: '\"${TAG}\"'@' ./config/base/kustomization.yaml\n+\t$(KUSTOMIZE) build config/base | kubectl apply -f -\n+\n+.PHONY: undeploy\n+undeploy: ## Undeploy controller from the K8s cluster specified in ~/.kube/config. Call with ignore-not-found=true to ignore resource not found errors during deletion.\n+\t$(KUSTOMIZE) build config/base | kubectl delete --ignore-not-found=$(ignore-not-found) -f -\n+\n+CONTROLLER_GEN = $(shell pwd)/bin/controller-gen\n+.PHONY: controller-gen\n+controller-gen: ## Download controller-gen locally if necessary.\n+\t$(call go-get-tool,$(CONTROLLER_GEN),sigs.k8s.io/controller-tools/cmd/controller-gen@v0.8.0)\n+\n+KUSTOMIZE = $(shell pwd)/bin/kustomize\n+.PHONY: kustomize\n+kustomize: ## Download kustomize locally if necessary.\n+\t$(call go-get-tool,$(KUSTOMIZE),sigs.k8s.io/kustomize/kustomize/v3@v3.8.7)", + "comment_created_at": "2022-04-14T07:50:45+00:00", + "comment_author": "kimwnasptd", + "comment_body": "I agree to remove the patch altogether. The patch was making the validation more lax, by removing validation from the CPU/Memory fields.\r\n\r\n@samuelvl can you double check that creating a Notebook with the Jupyter web app also works as expected after this change? To make sure the backend is submitting objects that are valid.", + "pr_file_module": null + }, + { + "comment_id": "854062860", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 6374, + "pr_file": "components/notebook-controller/Makefile", + "discussion_id": "839948826", + "commented_code": "@@ -21,77 +13,125 @@ else\n GOBIN=$(shell go env GOBIN)\n endif\n \n+# Setting SHELL to bash allows bash commands to be executed by recipes.\n+# This is a requirement for 'setup-envtest.sh' in the test target.\n+# Options are set to exit when a recipe line exits non-zero or a piped command fails.\n+SHELL = /usr/bin/env bash -o pipefail\n+.SHELLFLAGS = -ec\n+\n+.PHONY: all\n all: manager\n \n-# check license\n-check-license:\n-\t./third_party/check-license.sh\n+##@ General\n \n-# Run tests\n-test: generate fmt vet manifests\n-\tmkdir -p ${ENVTEST_ASSETS_DIR}\n-\ttest -f ${ENVTEST_ASSETS_DIR}/setup-envtest.sh || curl -sSLo ${ENVTEST_ASSETS_DIR}/setup-envtest.sh https://raw.githubusercontent.com/kubernetes-sigs/controller-runtime/a9bd9117a77a2f84bbc546e28991136fe0000dc0/hack/setup-envtest.sh\n-\tsource ${ENVTEST_ASSETS_DIR}/setup-envtest.sh; fetch_envtest_tools $(ENVTEST_ASSETS_DIR); setup_envtest_env $(ENVTEST_ASSETS_DIR);\n-\tgo test ./api/... ./controllers/... -coverprofile cover.out\n+# The help target prints out all targets with their descriptions organized\n+# beneath their categories. The categories are represented by '##@' and the\n+# target descriptions by '##'. The awk commands is responsible for reading the\n+# entire set of makefiles included in this invocation, looking for lines of the\n+# file as xyz: ## something, and then pretty-format the target and help. Then,\n+# if there's a line with ##@ something, that gets pretty-printed as a category.\n+# More info on the usage of ANSI control characters for terminal formatting:\n+# https://en.wikipedia.org/wiki/ANSI_escape_code#SGR_parameters\n+# More info on the awk command:\n+# http://linuxcommand.org/lc3_adv_awk.php\n \n-# Build manager binary\n-manager: generate fmt vet\n-\tgo build -o bin/manager main.go\n+.PHONY: help\n+help: ## Display this help.\n+\t@awk 'BEGIN {FS = \":.*##\"; printf \"\\nUsage:\\n make \\033[36m\\033[0m\\n\"} /^[a-zA-Z_0-9-]+:.*?##/ { printf \" \\033[36m%-15s\\033[0m %s\\n\", $$1, $$2 } /^##@/ { printf \"\\n\\033[1m%s\\033[0m\\n\", substr($$0, 5) } ' $(MAKEFILE_LIST)\n \n-# Run against the configured Kubernetes cluster in ~/.kube/config\n-run: generate fmt vet\n-\tgo run ./main.go\n+##@ Development\n \n-# Install CRDs into a cluster\n-install: manifests\n-\tkubectl apply -f config/crd/bases\n+.PHONY: check-license\n+check-license: ## Check third-party license\n+\t./third_party/check-license.sh\n \n-# Deploy controller in the configured Kubernetes cluster in ~/.kube/config\n-deploy: manifests\n-\tkubectl apply -f config/crd/bases\n-\tkustomize build config/default | kubectl apply -f -\n+.PHONY: manifests\n+manifests: controller-gen ## Generate WebhookConfiguration, ClusterRole and CustomResourceDefinition objects.\n+\t$(CONTROLLER_GEN) rbac:roleName=role crd webhook paths=\"./...\" output:crd:artifacts:config=config/crd/bases\n \n-# Generate manifests e.g. CRD, RBAC etc.\n-manifests: controller-gen\n-\t$(CONTROLLER_GEN) $(CRD_OPTIONS) rbac:roleName=role webhook paths=\"./...\" output:crd:artifacts:config=config/crd/bases\n+.PHONY: generate\n+generate: controller-gen ## Generate code containing DeepCopy, DeepCopyInto, and DeepCopyObject method implementations.\n+\t$(CONTROLLER_GEN) object:headerFile=\"hack/boilerplate.go.txt\" paths=\"./...\"\n \n-# Run go fmt against code\n-fmt:\n+.PHONY: fmt\n+fmt: ## Run go fmt against code.\n \tgo fmt ./...\n \n-# Run go vet against code\n-vet:\n+.PHONY: vet\n+vet: ## Run go vet against code.\n \tgo vet ./...\n \n-# Generate code\n-generate: controller-gen\n-\t$(CONTROLLER_GEN) object:headerFile=./hack/boilerplate.go.txt paths=./api/...\n+.PHONY: test\n+test: manifests generate fmt vet envtest ## Run tests.\n+\tKUBEBUILDER_ASSETS=\"$(shell $(ENVTEST) use $(ENVTEST_K8S_VERSION) -p path)\" go test ./... -coverprofile cover.out\n+\n+##@ Build\n \n-# Build the docker image\n-docker-build: test\n+.PHONY: manager\n+manager: generate fmt vet ## Build manager binary.\n+\tgo build -o bin/manager main.go\n+\n+.PHONY: run\n+run: manifests generate fmt vet ## Run a controller from your host.\n+\tgo run ./main.go\n+\n+.PHONY: docker-build\n+docker-build: test ## Build docker image with the manager.\n \tcd .. && docker build . -t ${IMG}:${TAG} -f ./notebook-controller/Dockerfile\n-\t@echo \"updating kustomize image patch file for manager resource\"\n-\tsed -i'' -e 's@image: .*@image: '\"${IMG}:${TAG}\"'@' ./config/default/manager_image_patch.yaml\n \n-# Push the docker image\n-docker-push:\n+.PHONY: docker-push\n+docker-push: ## Push docker image with the manager.\n \tdocker push ${IMG}:${TAG}\n \n-# find or download controller-gen\n-# download controller-gen if necessary\n-controller-gen:\n-ifeq (, $(shell which controller-gen))\n-\tgo get sigs.k8s.io/controller-tools/cmd/controller-gen@v0.2.0\n-CONTROLLER_GEN=$(GOBIN)/controller-gen\n-else\n-CONTROLLER_GEN=$(shell which controller-gen)\n-endif\n+##@ Deployment\n \n-# TODO(jlewi): Can we get rid of this and just use skaffold?\n-build-gcr: test\n-\tdocker build -t $(IMG):$(TAG) .\n-\t@echo Built $(IMG):$(TAG)\n+ifndef ignore-not-found\n+ ignore-not-found = false\n+endif\n \n-push-gcr: build-gcr\n-\tdocker push $(IMG):$(TAG)\n-\t@echo Pushed $(IMG):$(TAG)\n+.PHONY: install\n+install: manifests kustomize ## Install CRDs into the K8s cluster specified in ~/.kube/config.\n+\tkubectl get crd notebooks.kubeflow.org || $(KUSTOMIZE) build config/crd | kubectl create -f -\n+\n+.PHONY: uninstall\n+uninstall: manifests kustomize ## Uninstall CRDs from the K8s cluster specified in ~/.kube/config. Call with ignore-not-found=true to ignore resource not found errors during deletion.\n+\t$(KUSTOMIZE) build config/crd | kubectl delete --ignore-not-found=$(ignore-not-found) -f -\n+\n+.PHONY: deploy\n+deploy: manifests kustomize ## Deploy controller to the K8s cluster specified in ~/.kube/config.\n+\tsed -i'' -e 's@newName: .*@newName: '\"${IMG}\"'@' ./config/base/kustomization.yaml\n+\tsed -i'' -e 's@newTag: .*@newTag: '\"${TAG}\"'@' ./config/base/kustomization.yaml\n+\t$(KUSTOMIZE) build config/base | kubectl apply -f -\n+\n+.PHONY: undeploy\n+undeploy: ## Undeploy controller from the K8s cluster specified in ~/.kube/config. Call with ignore-not-found=true to ignore resource not found errors during deletion.\n+\t$(KUSTOMIZE) build config/base | kubectl delete --ignore-not-found=$(ignore-not-found) -f -\n+\n+CONTROLLER_GEN = $(shell pwd)/bin/controller-gen\n+.PHONY: controller-gen\n+controller-gen: ## Download controller-gen locally if necessary.\n+\t$(call go-get-tool,$(CONTROLLER_GEN),sigs.k8s.io/controller-tools/cmd/controller-gen@v0.8.0)\n+\n+KUSTOMIZE = $(shell pwd)/bin/kustomize\n+.PHONY: kustomize\n+kustomize: ## Download kustomize locally if necessary.\n+\t$(call go-get-tool,$(KUSTOMIZE),sigs.k8s.io/kustomize/kustomize/v3@v3.8.7)", + "comment_created_at": "2022-04-20T12:14:24+00:00", + "comment_author": "samuelvl", + "comment_body": "I do confirm the backend creates the notebook object correctly after removing the patch:\r\n\r\n``` shell\r\n$ kubectl get notebooks\r\nNAME AGE\r\nscipy 13m\r\n\r\n$ kubectl get pods \r\nNAME READY STATUS RESTARTS AGE\r\nscipy-0 1/1 Running 0 10m\r\n```\r\n\r\nThis is the notebook created by the JWA:\r\n\r\n```yaml\r\napiVersion: kubeflow.org/v1\r\nkind: Notebook\r\nmetadata:\r\n annotations:\r\n notebooks.kubeflow.org/last-activity: \"2022-04-20T12:06:44Z\"\r\n notebooks.kubeflow.org/server-type: jupyter\r\n labels:\r\n app: scipy\r\n name: scipy\r\n namespace: kubeflow-user\r\nspec:\r\n template:\r\n spec:\r\n containers:\r\n - image: public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/jupyter-scipy:v1.5.0\r\n imagePullPolicy: IfNotPresent\r\n name: scipy\r\n resources:\r\n limits:\r\n cpu: 600m\r\n memory: 1288490188800m\r\n requests:\r\n cpu: 500m\r\n memory: 1Gi\r\n volumeMounts:\r\n - mountPath: /dev/shm\r\n name: dshm\r\n - mountPath: /home/jovyan\r\n name: scipy-volume\r\n serviceAccountName: default-editor\r\n volumes:\r\n - emptyDir:\r\n medium: Memory\r\n name: dshm\r\n - name: scipy-volume\r\n persistentVolumeClaim:\r\n claimName: scipy-volume\r\n``` \r\n\r\n\r\n", + "pr_file_module": null + }, + { + "comment_id": "862436970", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 6374, + "pr_file": "components/notebook-controller/Makefile", + "discussion_id": "839948826", + "commented_code": "@@ -21,77 +13,125 @@ else\n GOBIN=$(shell go env GOBIN)\n endif\n \n+# Setting SHELL to bash allows bash commands to be executed by recipes.\n+# This is a requirement for 'setup-envtest.sh' in the test target.\n+# Options are set to exit when a recipe line exits non-zero or a piped command fails.\n+SHELL = /usr/bin/env bash -o pipefail\n+.SHELLFLAGS = -ec\n+\n+.PHONY: all\n all: manager\n \n-# check license\n-check-license:\n-\t./third_party/check-license.sh\n+##@ General\n \n-# Run tests\n-test: generate fmt vet manifests\n-\tmkdir -p ${ENVTEST_ASSETS_DIR}\n-\ttest -f ${ENVTEST_ASSETS_DIR}/setup-envtest.sh || curl -sSLo ${ENVTEST_ASSETS_DIR}/setup-envtest.sh https://raw.githubusercontent.com/kubernetes-sigs/controller-runtime/a9bd9117a77a2f84bbc546e28991136fe0000dc0/hack/setup-envtest.sh\n-\tsource ${ENVTEST_ASSETS_DIR}/setup-envtest.sh; fetch_envtest_tools $(ENVTEST_ASSETS_DIR); setup_envtest_env $(ENVTEST_ASSETS_DIR);\n-\tgo test ./api/... ./controllers/... -coverprofile cover.out\n+# The help target prints out all targets with their descriptions organized\n+# beneath their categories. The categories are represented by '##@' and the\n+# target descriptions by '##'. The awk commands is responsible for reading the\n+# entire set of makefiles included in this invocation, looking for lines of the\n+# file as xyz: ## something, and then pretty-format the target and help. Then,\n+# if there's a line with ##@ something, that gets pretty-printed as a category.\n+# More info on the usage of ANSI control characters for terminal formatting:\n+# https://en.wikipedia.org/wiki/ANSI_escape_code#SGR_parameters\n+# More info on the awk command:\n+# http://linuxcommand.org/lc3_adv_awk.php\n \n-# Build manager binary\n-manager: generate fmt vet\n-\tgo build -o bin/manager main.go\n+.PHONY: help\n+help: ## Display this help.\n+\t@awk 'BEGIN {FS = \":.*##\"; printf \"\\nUsage:\\n make \\033[36m\\033[0m\\n\"} /^[a-zA-Z_0-9-]+:.*?##/ { printf \" \\033[36m%-15s\\033[0m %s\\n\", $$1, $$2 } /^##@/ { printf \"\\n\\033[1m%s\\033[0m\\n\", substr($$0, 5) } ' $(MAKEFILE_LIST)\n \n-# Run against the configured Kubernetes cluster in ~/.kube/config\n-run: generate fmt vet\n-\tgo run ./main.go\n+##@ Development\n \n-# Install CRDs into a cluster\n-install: manifests\n-\tkubectl apply -f config/crd/bases\n+.PHONY: check-license\n+check-license: ## Check third-party license\n+\t./third_party/check-license.sh\n \n-# Deploy controller in the configured Kubernetes cluster in ~/.kube/config\n-deploy: manifests\n-\tkubectl apply -f config/crd/bases\n-\tkustomize build config/default | kubectl apply -f -\n+.PHONY: manifests\n+manifests: controller-gen ## Generate WebhookConfiguration, ClusterRole and CustomResourceDefinition objects.\n+\t$(CONTROLLER_GEN) rbac:roleName=role crd webhook paths=\"./...\" output:crd:artifacts:config=config/crd/bases\n \n-# Generate manifests e.g. CRD, RBAC etc.\n-manifests: controller-gen\n-\t$(CONTROLLER_GEN) $(CRD_OPTIONS) rbac:roleName=role webhook paths=\"./...\" output:crd:artifacts:config=config/crd/bases\n+.PHONY: generate\n+generate: controller-gen ## Generate code containing DeepCopy, DeepCopyInto, and DeepCopyObject method implementations.\n+\t$(CONTROLLER_GEN) object:headerFile=\"hack/boilerplate.go.txt\" paths=\"./...\"\n \n-# Run go fmt against code\n-fmt:\n+.PHONY: fmt\n+fmt: ## Run go fmt against code.\n \tgo fmt ./...\n \n-# Run go vet against code\n-vet:\n+.PHONY: vet\n+vet: ## Run go vet against code.\n \tgo vet ./...\n \n-# Generate code\n-generate: controller-gen\n-\t$(CONTROLLER_GEN) object:headerFile=./hack/boilerplate.go.txt paths=./api/...\n+.PHONY: test\n+test: manifests generate fmt vet envtest ## Run tests.\n+\tKUBEBUILDER_ASSETS=\"$(shell $(ENVTEST) use $(ENVTEST_K8S_VERSION) -p path)\" go test ./... -coverprofile cover.out\n+\n+##@ Build\n \n-# Build the docker image\n-docker-build: test\n+.PHONY: manager\n+manager: generate fmt vet ## Build manager binary.\n+\tgo build -o bin/manager main.go\n+\n+.PHONY: run\n+run: manifests generate fmt vet ## Run a controller from your host.\n+\tgo run ./main.go\n+\n+.PHONY: docker-build\n+docker-build: test ## Build docker image with the manager.\n \tcd .. && docker build . -t ${IMG}:${TAG} -f ./notebook-controller/Dockerfile\n-\t@echo \"updating kustomize image patch file for manager resource\"\n-\tsed -i'' -e 's@image: .*@image: '\"${IMG}:${TAG}\"'@' ./config/default/manager_image_patch.yaml\n \n-# Push the docker image\n-docker-push:\n+.PHONY: docker-push\n+docker-push: ## Push docker image with the manager.\n \tdocker push ${IMG}:${TAG}\n \n-# find or download controller-gen\n-# download controller-gen if necessary\n-controller-gen:\n-ifeq (, $(shell which controller-gen))\n-\tgo get sigs.k8s.io/controller-tools/cmd/controller-gen@v0.2.0\n-CONTROLLER_GEN=$(GOBIN)/controller-gen\n-else\n-CONTROLLER_GEN=$(shell which controller-gen)\n-endif\n+##@ Deployment\n \n-# TODO(jlewi): Can we get rid of this and just use skaffold?\n-build-gcr: test\n-\tdocker build -t $(IMG):$(TAG) .\n-\t@echo Built $(IMG):$(TAG)\n+ifndef ignore-not-found\n+ ignore-not-found = false\n+endif\n \n-push-gcr: build-gcr\n-\tdocker push $(IMG):$(TAG)\n-\t@echo Pushed $(IMG):$(TAG)\n+.PHONY: install\n+install: manifests kustomize ## Install CRDs into the K8s cluster specified in ~/.kube/config.\n+\tkubectl get crd notebooks.kubeflow.org || $(KUSTOMIZE) build config/crd | kubectl create -f -\n+\n+.PHONY: uninstall\n+uninstall: manifests kustomize ## Uninstall CRDs from the K8s cluster specified in ~/.kube/config. Call with ignore-not-found=true to ignore resource not found errors during deletion.\n+\t$(KUSTOMIZE) build config/crd | kubectl delete --ignore-not-found=$(ignore-not-found) -f -\n+\n+.PHONY: deploy\n+deploy: manifests kustomize ## Deploy controller to the K8s cluster specified in ~/.kube/config.\n+\tsed -i'' -e 's@newName: .*@newName: '\"${IMG}\"'@' ./config/base/kustomization.yaml\n+\tsed -i'' -e 's@newTag: .*@newTag: '\"${TAG}\"'@' ./config/base/kustomization.yaml\n+\t$(KUSTOMIZE) build config/base | kubectl apply -f -\n+\n+.PHONY: undeploy\n+undeploy: ## Undeploy controller from the K8s cluster specified in ~/.kube/config. Call with ignore-not-found=true to ignore resource not found errors during deletion.\n+\t$(KUSTOMIZE) build config/base | kubectl delete --ignore-not-found=$(ignore-not-found) -f -\n+\n+CONTROLLER_GEN = $(shell pwd)/bin/controller-gen\n+.PHONY: controller-gen\n+controller-gen: ## Download controller-gen locally if necessary.\n+\t$(call go-get-tool,$(CONTROLLER_GEN),sigs.k8s.io/controller-tools/cmd/controller-gen@v0.8.0)\n+\n+KUSTOMIZE = $(shell pwd)/bin/kustomize\n+.PHONY: kustomize\n+kustomize: ## Download kustomize locally if necessary.\n+\t$(call go-get-tool,$(KUSTOMIZE),sigs.k8s.io/kustomize/kustomize/v3@v3.8.7)", + "comment_created_at": "2022-05-01T07:44:23+00:00", + "comment_author": "kimwnasptd", + "comment_body": "Perfect!", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "519691721", + "pr_number": 5378, + "pr_file": "components/notebook-controller/Makefile", + "created_at": "2020-11-09T10:10:35+00:00", + "commented_code": "# Run tests\ntest: generate fmt vet manifests check-license\n\tmkdir -p ${ENVTEST_ASSETS_DIR}\n\ttest -f ${ENVTEST_ASSETS_DIR}/setup-envtest.sh || curl -sSLo ${ENVTEST_ASSETS_DIR}/setup-envtest.sh https://raw.githubusercontent.com/kubernetes-sigs/controller-runtime/master/hack/setup-envtest.sh", + "repo_full_name": "kubeflow/kubeflow", + "discussion_comments": [ + { + "comment_id": "519691721", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 5378, + "pr_file": "components/notebook-controller/Makefile", + "discussion_id": "519691721", + "commented_code": "@@ -25,6 +28,9 @@ check-license:\n \n # Run tests\n test: generate fmt vet manifests check-license\n+\tmkdir -p ${ENVTEST_ASSETS_DIR}\n+\ttest -f ${ENVTEST_ASSETS_DIR}/setup-envtest.sh || curl -sSLo ${ENVTEST_ASSETS_DIR}/setup-envtest.sh https://raw.githubusercontent.com/kubernetes-sigs/controller-runtime/master/hack/setup-envtest.sh", + "comment_created_at": "2020-11-09T10:10:35+00:00", + "comment_author": "yanniszark", + "comment_body": "Can you pin the script at a particular commit?\r\nThis way, we can be sure an update in this external repo doesn't break us.", + "pr_file_module": null + }, + { + "comment_id": "520057276", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 5378, + "pr_file": "components/notebook-controller/Makefile", + "discussion_id": "519691721", + "commented_code": "@@ -25,6 +28,9 @@ check-license:\n \n # Run tests\n test: generate fmt vet manifests check-license\n+\tmkdir -p ${ENVTEST_ASSETS_DIR}\n+\ttest -f ${ENVTEST_ASSETS_DIR}/setup-envtest.sh || curl -sSLo ${ENVTEST_ASSETS_DIR}/setup-envtest.sh https://raw.githubusercontent.com/kubernetes-sigs/controller-runtime/master/hack/setup-envtest.sh", + "comment_created_at": "2020-11-09T19:12:56+00:00", + "comment_author": "naveensrinivasan", + "comment_body": "Thanks, I will do that. 👍 ", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/kubeflow-pin-version-dependencies.md b/_reviewers/kubeflow-pin-version-dependencies.md index 7dd92df..b8d5f16 100644 --- a/_reviewers/kubeflow-pin-version-dependencies.md +++ b/_reviewers/kubeflow-pin-version-dependencies.md @@ -38,239 +38,3 @@ SHELL = /usr/bin/env bash -o pipefail ``` Regularly clean up unused dependencies with tools like `go mod tidy` to maintain clean configuration files. - - -[ - { - "discussion_id": "920964404", - "pr_number": 6491, - "pr_file": "components/profile-controller/Makefile", - "created_at": "2022-07-14T09:43:16+00:00", - "commented_code": "GOBIN=$(shell go env GOBIN)\nendif\n\nall: manager\n# Setting SHELL to bash allows bash commands to be executed by recipes.\n# This is a requirement for 'setup-envtest.sh' in the test target.\n# Options are set to exit when a recipe line exits non-zero or a piped command fails.\nSHELL = /usr/bin/env bash -o pipefail\n.SHELLFLAGS = -ec\n\n# Run tests\ntest: generate fmt vet\n\tgo test ./... -coverprofile cover.out\n.PHONY: all\nall: build\n\n# Build manager binary\nmanager: generate fmt vet\n\tgo build -o bin/manager main.go\n\n# Run against the configured Kubernetes cluster in ~/.kube/config\nrun: generate fmt vet\n\tgo run ./main.go\n\n# Install CRDs into a cluster\ninstall:\n\tkustomize build config/crd | kubectl apply -f -\n.PHONY: help\nhelp: ## Display this help.\n\t@awk 'BEGIN {FS = \":.*##\"; printf \"\\nUsage:\\n make \\033[36m\\033[0m\\n\"} /^[a-zA-Z_0-9-]+:.*?##/ { printf \" \\033[36m%-15s\\033[0m %s\\n\", $$1, $$2 } /^##@/ { printf \"\\n\\033[1m%s\\033[0m\\n\", substr($$0, 5) } ' $(MAKEFILE_LIST)\n\n# Deploy controller in the configured Kubernetes cluster in ~/.kube/config\ndeploy: manifests\n\tcd config/manager && kustomize edit set image gcr.io/kubeflow-images-public/profile-controller=${IMG}\n\tkustomize build config/default | kubectl apply -f -\n##@ Development\n\n# Generate manifests e.g. CRD, RBAC etc.\nmanifests: controller-gen\n\t$(CONTROLLER_GEN) crd:trivialVersions=true paths=\"./...\" output:crd:artifacts:config=config/crd/bases\n.PHONY: manifests\nmanifests: controller-gen ## Generate WebhookConfiguration, ClusterRole and CustomResourceDefinition objects.\n\t$(CONTROLLER_GEN) crd paths=\"./...\" output:crd:artifacts:config=config/crd/bases\n\t# Uncomment when we remove the permissive ClusterRoleBinding to cluster-admin\n\t# $(CONTROLLER_GEN) rbac:roleName=cluster-role-binding webhook paths=\"./...\"\n\n# Run go fmt against code\nfmt:\n.PHONY: generate\ngenerate: controller-gen ## Generate code containing DeepCopy, DeepCopyInto, and DeepCopyObject method implementations.\n\t$(CONTROLLER_GEN) object:headerFile=\"hack/boilerplate.go.txt\" paths=\"./...\"\n\t\n\n.PHONY: fmt\nfmt: ## Run go fmt against code.\n\tgo fmt ./...\n\n# Run go vet against code\nvet:\n.PHONY: vet\nvet: ## Run go vet against code.\n\tgo vet ./...\n\n# Generate code\ngenerate: controller-gen\n\t$(CONTROLLER_GEN) object:headerFile=./hack/boilerplate.go.txt paths=\"./...\"\n.PHONY: test\ntest: manifests generate fmt vet envtest ## Run tests.\n\tKUBEBUILDER_ASSETS=\"$(shell $(ENVTEST) use $(ENVTEST_K8S_VERSION) -p path)\" go test ./... -coverprofile cover.out\n\n##@ Build\n\n.PHONY: build\nbuild: generate fmt vet ## Build manager binary.\n\tgo build -o bin/manager main.go\n\n.PHONY: run\nrun: manifests generate fmt vet ## Run a controller from your host.\n\tgo run ./main.go -namespace-labels-path ./config/base/namespace-labels.yaml \n\n# Build the docker image\ndocker-build:\n\tdocker build --build-arg GOLANG_VERSION=${GOLANG_VERSION} -t ${IMG}:${TAG} .\n\t@echo Built ${IMG}:${TAG}\n.PHONY: docker-build\ndocker-build: test ## Build docker image with the manager.", - "repo_full_name": "kubeflow/kubeflow", - "discussion_comments": [ - { - "comment_id": "920964404", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 6491, - "pr_file": "components/profile-controller/Makefile", - "discussion_id": "920964404", - "commented_code": "@@ -11,65 +11,136 @@ else\n GOBIN=$(shell go env GOBIN)\n endif\n \n-all: manager\n+# Setting SHELL to bash allows bash commands to be executed by recipes.\n+# This is a requirement for 'setup-envtest.sh' in the test target.\n+# Options are set to exit when a recipe line exits non-zero or a piped command fails.\n+SHELL = /usr/bin/env bash -o pipefail\n+.SHELLFLAGS = -ec\n \n-# Run tests\n-test: generate fmt vet\n-\tgo test ./... -coverprofile cover.out\n+.PHONY: all\n+all: build\n \n-# Build manager binary\n-manager: generate fmt vet\n-\tgo build -o bin/manager main.go\n-\n-# Run against the configured Kubernetes cluster in ~/.kube/config\n-run: generate fmt vet\n-\tgo run ./main.go\n-\n-# Install CRDs into a cluster\n-install:\n-\tkustomize build config/crd | kubectl apply -f -\n+.PHONY: help\n+help: ## Display this help.\n+\t@awk 'BEGIN {FS = \":.*##\"; printf \"\\nUsage:\\n make \\033[36m\\033[0m\\n\"} /^[a-zA-Z_0-9-]+:.*?##/ { printf \" \\033[36m%-15s\\033[0m %s\\n\", $$1, $$2 } /^##@/ { printf \"\\n\\033[1m%s\\033[0m\\n\", substr($$0, 5) } ' $(MAKEFILE_LIST)\n \n-# Deploy controller in the configured Kubernetes cluster in ~/.kube/config\n-deploy: manifests\n-\tcd config/manager && kustomize edit set image gcr.io/kubeflow-images-public/profile-controller=${IMG}\n-\tkustomize build config/default | kubectl apply -f -\n+##@ Development\n \n-# Generate manifests e.g. CRD, RBAC etc.\n-manifests: controller-gen\n-\t$(CONTROLLER_GEN) crd:trivialVersions=true paths=\"./...\" output:crd:artifacts:config=config/crd/bases\n+.PHONY: manifests\n+manifests: controller-gen ## Generate WebhookConfiguration, ClusterRole and CustomResourceDefinition objects.\n+\t$(CONTROLLER_GEN) crd paths=\"./...\" output:crd:artifacts:config=config/crd/bases\n \t# Uncomment when we remove the permissive ClusterRoleBinding to cluster-admin\n \t# $(CONTROLLER_GEN) rbac:roleName=cluster-role-binding webhook paths=\"./...\"\n \n-# Run go fmt against code\n-fmt:\n+.PHONY: generate\n+generate: controller-gen ## Generate code containing DeepCopy, DeepCopyInto, and DeepCopyObject method implementations.\n+\t$(CONTROLLER_GEN) object:headerFile=\"hack/boilerplate.go.txt\" paths=\"./...\"\n+\t\n+\n+.PHONY: fmt\n+fmt: ## Run go fmt against code.\n \tgo fmt ./...\n \n-# Run go vet against code\n-vet:\n+.PHONY: vet\n+vet: ## Run go vet against code.\n \tgo vet ./...\n \n-# Generate code\n-generate: controller-gen\n-\t$(CONTROLLER_GEN) object:headerFile=./hack/boilerplate.go.txt paths=\"./...\"\n+.PHONY: test\n+test: manifests generate fmt vet envtest ## Run tests.\n+\tKUBEBUILDER_ASSETS=\"$(shell $(ENVTEST) use $(ENVTEST_K8S_VERSION) -p path)\" go test ./... -coverprofile cover.out\n+\n+##@ Build\n+\n+.PHONY: build\n+build: generate fmt vet ## Build manager binary.\n+\tgo build -o bin/manager main.go\n+\n+.PHONY: run\n+run: manifests generate fmt vet ## Run a controller from your host.\n+\tgo run ./main.go -namespace-labels-path ./config/base/namespace-labels.yaml \n \n-# Build the docker image\n-docker-build:\n-\tdocker build --build-arg GOLANG_VERSION=${GOLANG_VERSION} -t ${IMG}:${TAG} .\n-\t@echo Built ${IMG}:${TAG}\n+.PHONY: docker-build\n+docker-build: test ## Build docker image with the manager.", - "comment_created_at": "2022-07-14T09:43:16+00:00", - "comment_author": "kimwnasptd", - "comment_body": "let's remove the dependency on the `test` rule. This would require users to have a configured Go environment to build the controller. \r\n\r\nIf in the future we'd like to run these tests then we should do this inside of the Dockerfile, just like we do for the CentralDashboard", - "pr_file_module": null - }, - { - "comment_id": "920997563", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 6491, - "pr_file": "components/profile-controller/Makefile", - "discussion_id": "920964404", - "commented_code": "@@ -11,65 +11,136 @@ else\n GOBIN=$(shell go env GOBIN)\n endif\n \n-all: manager\n+# Setting SHELL to bash allows bash commands to be executed by recipes.\n+# This is a requirement for 'setup-envtest.sh' in the test target.\n+# Options are set to exit when a recipe line exits non-zero or a piped command fails.\n+SHELL = /usr/bin/env bash -o pipefail\n+.SHELLFLAGS = -ec\n \n-# Run tests\n-test: generate fmt vet\n-\tgo test ./... -coverprofile cover.out\n+.PHONY: all\n+all: build\n \n-# Build manager binary\n-manager: generate fmt vet\n-\tgo build -o bin/manager main.go\n-\n-# Run against the configured Kubernetes cluster in ~/.kube/config\n-run: generate fmt vet\n-\tgo run ./main.go\n-\n-# Install CRDs into a cluster\n-install:\n-\tkustomize build config/crd | kubectl apply -f -\n+.PHONY: help\n+help: ## Display this help.\n+\t@awk 'BEGIN {FS = \":.*##\"; printf \"\\nUsage:\\n make \\033[36m\\033[0m\\n\"} /^[a-zA-Z_0-9-]+:.*?##/ { printf \" \\033[36m%-15s\\033[0m %s\\n\", $$1, $$2 } /^##@/ { printf \"\\n\\033[1m%s\\033[0m\\n\", substr($$0, 5) } ' $(MAKEFILE_LIST)\n \n-# Deploy controller in the configured Kubernetes cluster in ~/.kube/config\n-deploy: manifests\n-\tcd config/manager && kustomize edit set image gcr.io/kubeflow-images-public/profile-controller=${IMG}\n-\tkustomize build config/default | kubectl apply -f -\n+##@ Development\n \n-# Generate manifests e.g. CRD, RBAC etc.\n-manifests: controller-gen\n-\t$(CONTROLLER_GEN) crd:trivialVersions=true paths=\"./...\" output:crd:artifacts:config=config/crd/bases\n+.PHONY: manifests\n+manifests: controller-gen ## Generate WebhookConfiguration, ClusterRole and CustomResourceDefinition objects.\n+\t$(CONTROLLER_GEN) crd paths=\"./...\" output:crd:artifacts:config=config/crd/bases\n \t# Uncomment when we remove the permissive ClusterRoleBinding to cluster-admin\n \t# $(CONTROLLER_GEN) rbac:roleName=cluster-role-binding webhook paths=\"./...\"\n \n-# Run go fmt against code\n-fmt:\n+.PHONY: generate\n+generate: controller-gen ## Generate code containing DeepCopy, DeepCopyInto, and DeepCopyObject method implementations.\n+\t$(CONTROLLER_GEN) object:headerFile=\"hack/boilerplate.go.txt\" paths=\"./...\"\n+\t\n+\n+.PHONY: fmt\n+fmt: ## Run go fmt against code.\n \tgo fmt ./...\n \n-# Run go vet against code\n-vet:\n+.PHONY: vet\n+vet: ## Run go vet against code.\n \tgo vet ./...\n \n-# Generate code\n-generate: controller-gen\n-\t$(CONTROLLER_GEN) object:headerFile=./hack/boilerplate.go.txt paths=\"./...\"\n+.PHONY: test\n+test: manifests generate fmt vet envtest ## Run tests.\n+\tKUBEBUILDER_ASSETS=\"$(shell $(ENVTEST) use $(ENVTEST_K8S_VERSION) -p path)\" go test ./... -coverprofile cover.out\n+\n+##@ Build\n+\n+.PHONY: build\n+build: generate fmt vet ## Build manager binary.\n+\tgo build -o bin/manager main.go\n+\n+.PHONY: run\n+run: manifests generate fmt vet ## Run a controller from your host.\n+\tgo run ./main.go -namespace-labels-path ./config/base/namespace-labels.yaml \n \n-# Build the docker image\n-docker-build:\n-\tdocker build --build-arg GOLANG_VERSION=${GOLANG_VERSION} -t ${IMG}:${TAG} .\n-\t@echo Built ${IMG}:${TAG}\n+.PHONY: docker-build\n+docker-build: test ## Build docker image with the manager.", - "comment_created_at": "2022-07-14T10:21:27+00:00", - "comment_author": "apo-ger", - "comment_body": "test dependency is removed", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "839931745", - "pr_number": 6374, - "pr_file": "components/notebook-controller/Makefile", - "created_at": "2022-03-31T19:02:40+00:00", - "commented_code": "# Image URL to use all building/pushing image targets\nIMG ?= gcr.io/kubeflow-images-public/notebook-controller\nIMG ?= public.ecr.aws/j1r0q0g6/notebooks/notebook-controller\nTAG ?= $(shell git describe --tags --always)\nSHELL := /bin/bash\nGOLANG_VERSION ?= 1.15\n\n# Whether to use cached images with GCB\nUSE_IMAGE_CACHE ?= true\n\n# Produce CRDs that work back to Kubernetes 1.11 (no version conversion)\nCRD_OPTIONS ?= \"crd\"", - "repo_full_name": "kubeflow/kubeflow", - "discussion_comments": [ - { - "comment_id": "839931745", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 6374, - "pr_file": "components/notebook-controller/Makefile", - "discussion_id": "839931745", - "commented_code": "@@ -1,18 +1,10 @@\n \n # Image URL to use all building/pushing image targets\n-IMG ?= gcr.io/kubeflow-images-public/notebook-controller\n+IMG ?= public.ecr.aws/j1r0q0g6/notebooks/notebook-controller\n TAG ?= $(shell git describe --tags --always)\n-SHELL := /bin/bash\n-GOLANG_VERSION ?= 1.15\n \n-# Whether to use cached images with GCB\n-USE_IMAGE_CACHE ?= true\n-\n-# Produce CRDs that work back to Kubernetes 1.11 (no version conversion)\n-CRD_OPTIONS ?= \"crd\"", - "comment_created_at": "2022-03-31T19:02:40+00:00", - "comment_author": "kimwnasptd", - "comment_body": "When I tried applying the new CRD on a cluster with the previous CRD I got the following errors back:\r\n```\r\nThe CustomResourceDefinition \"notebooks.kubeflow.org\" is invalid:\r\n* metadata.annotations: Too long: must have at most 262144 bytes\r\n* spec.preserveUnknownFields: Invalid value: true: must be false in order to use defaults in the schema\r\n```\r\n\r\nRegarding the `preserveUnknownFIelds`, could you explicitly set it to `false`?\r\n\r\nThis is also the recommended way in the docs to have compatibility: \r\nhttps://kubernetes.io/docs/tasks/extend-kubernetes/custom-resources/custom-resource-definitions/#field-pruning\r\nhttps://github.com/kubernetes-sigs/controller-tools/issues/476#issuecomment-691519936\r\n\r\nAlthough with a quick look at the kubebuilder docs I'm not 100% sure whether we should do this with a [marker](https://book.kubebuilder.io/reference/markers/crd-processing.html) or via the [makefile](https://book.kubebuilder.io/reference/generating-crd.html#multiple-versions)", - "pr_file_module": null - }, - { - "comment_id": "840440645", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 6374, - "pr_file": "components/notebook-controller/Makefile", - "discussion_id": "839931745", - "commented_code": "@@ -1,18 +1,10 @@\n \n # Image URL to use all building/pushing image targets\n-IMG ?= gcr.io/kubeflow-images-public/notebook-controller\n+IMG ?= public.ecr.aws/j1r0q0g6/notebooks/notebook-controller\n TAG ?= $(shell git describe --tags --always)\n-SHELL := /bin/bash\n-GOLANG_VERSION ?= 1.15\n \n-# Whether to use cached images with GCB\n-USE_IMAGE_CACHE ?= true\n-\n-# Produce CRDs that work back to Kubernetes 1.11 (no version conversion)\n-CRD_OPTIONS ?= \"crd\"", - "comment_created_at": "2022-04-01T10:17:54+00:00", - "comment_author": "samuelvl", - "comment_body": "Thanks for the comments! I'm starting to review them.\r\n\r\nIs `kubectl replace` instead of `kubectl apply` an option? It should fix both errors:\r\n\r\n```\r\nkubectl replace -f config/crd/bases/kubeflow.org_notebooks.yaml\r\n```\r\n", - "pr_file_module": null - }, - { - "comment_id": "850313417", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 6374, - "pr_file": "components/notebook-controller/Makefile", - "discussion_id": "839931745", - "commented_code": "@@ -1,18 +1,10 @@\n \n # Image URL to use all building/pushing image targets\n-IMG ?= gcr.io/kubeflow-images-public/notebook-controller\n+IMG ?= public.ecr.aws/j1r0q0g6/notebooks/notebook-controller\n TAG ?= $(shell git describe --tags --always)\n-SHELL := /bin/bash\n-GOLANG_VERSION ?= 1.15\n \n-# Whether to use cached images with GCB\n-USE_IMAGE_CACHE ?= true\n-\n-# Produce CRDs that work back to Kubernetes 1.11 (no version conversion)\n-CRD_OPTIONS ?= \"crd\"", - "comment_created_at": "2022-04-14T10:32:50+00:00", - "comment_author": "kimwnasptd", - "comment_body": "After looking at this again I propose that we manually \r\n1. add the `spec.preserveUnknownFields: false` just for this release\r\n2. remove this field in KF 1.7, since it's only needed for the `apiextensions.k8s.io/v1beta1` to `apiextensions.k8s.io/v1` transition.\r\n\r\nThe `controller-gen` does not allow us to explicitly set this to false https://book.kubebuilder.io/reference/markers/crd-processing.html\r\n\r\n", - "pr_file_module": null - }, - { - "comment_id": "853985090", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 6374, - "pr_file": "components/notebook-controller/Makefile", - "discussion_id": "839931745", - "commented_code": "@@ -1,18 +1,10 @@\n \n # Image URL to use all building/pushing image targets\n-IMG ?= gcr.io/kubeflow-images-public/notebook-controller\n+IMG ?= public.ecr.aws/j1r0q0g6/notebooks/notebook-controller\n TAG ?= $(shell git describe --tags --always)\n-SHELL := /bin/bash\n-GOLANG_VERSION ?= 1.15\n \n-# Whether to use cached images with GCB\n-USE_IMAGE_CACHE ?= true\n-\n-# Produce CRDs that work back to Kubernetes 1.11 (no version conversion)\n-CRD_OPTIONS ?= \"crd\"", - "comment_created_at": "2022-04-20T10:36:05+00:00", - "comment_author": "samuelvl", - "comment_body": "I have added it to the `config/crd/patches/trivial_conversion_patch.yaml` file instead of using `yq` or `sed` hacks:\r\n\r\n```yaml\r\napiVersion: apiextensions.k8s.io/v1\r\nkind: CustomResourceDefinition\r\nmetadata:\r\n name: notebooks.kubeflow.org\r\nspec:\r\n preserveUnknownFields: false # TODO: Remove in Kubeflow 1.7 release\r\n conversion:\r\n strategy: None\r\n```\r\n", - "pr_file_module": null - }, - { - "comment_id": "862437781", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 6374, - "pr_file": "components/notebook-controller/Makefile", - "discussion_id": "839931745", - "commented_code": "@@ -1,18 +1,10 @@\n \n # Image URL to use all building/pushing image targets\n-IMG ?= gcr.io/kubeflow-images-public/notebook-controller\n+IMG ?= public.ecr.aws/j1r0q0g6/notebooks/notebook-controller\n TAG ?= $(shell git describe --tags --always)\n-SHELL := /bin/bash\n-GOLANG_VERSION ?= 1.15\n \n-# Whether to use cached images with GCB\n-USE_IMAGE_CACHE ?= true\n-\n-# Produce CRDs that work back to Kubernetes 1.11 (no version conversion)\n-CRD_OPTIONS ?= \"crd\"", - "comment_created_at": "2022-05-01T07:52:29+00:00", - "comment_author": "kimwnasptd", - "comment_body": "This is awesome @samuelvl!\r\n\r\nOne final nit, I see that you've commented out the `crd` part in the manifests, which results in the CRD to not be included when generating `base` or `overlays/kubeflow` \r\n\r\nhttps://github.com/kubeflow/kubeflow/pull/6374/files#diff-28481732533c7c75d2d2ba504e9670d90e72ed98f999115fd92a0f8e8a5aace2R20\r\n\r\nCould you revert it back, so that the CRD is included as well? We should be ready to merge afterwards", - "pr_file_module": null - }, - { - "comment_id": "863698254", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 6374, - "pr_file": "components/notebook-controller/Makefile", - "discussion_id": "839931745", - "commented_code": "@@ -1,18 +1,10 @@\n \n # Image URL to use all building/pushing image targets\n-IMG ?= gcr.io/kubeflow-images-public/notebook-controller\n+IMG ?= public.ecr.aws/j1r0q0g6/notebooks/notebook-controller\n TAG ?= $(shell git describe --tags --always)\n-SHELL := /bin/bash\n-GOLANG_VERSION ?= 1.15\n \n-# Whether to use cached images with GCB\n-USE_IMAGE_CACHE ?= true\n-\n-# Produce CRDs that work back to Kubernetes 1.11 (no version conversion)\n-CRD_OPTIONS ?= \"crd\"", - "comment_created_at": "2022-05-03T11:55:24+00:00", - "comment_author": "samuelvl", - "comment_body": "I commented the `crd` part to avoid this issue (https://github.com/prometheus-community/helm-charts/issues/1500 is similar): \r\n\r\n```\r\nThe CustomResourceDefinition \"notebooks.kubeflow.org\" is invalid: metadata.annotations: Too long: must have at most 262144 bytes\r\n``` \r\n\r\nI have reverted the original logic and added the `maxDescLen=0` option when generating the crd to avoid this issue, however we'll miss some information when using the `kubectl explain notebook` command:\r\n\r\n```\r\n$(CONTROLLER_GEN) rbac:roleName=role crd:maxDescLen=0 ...\r\n```", - "pr_file_module": null - }, - { - "comment_id": "863922630", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 6374, - "pr_file": "components/notebook-controller/Makefile", - "discussion_id": "839931745", - "commented_code": "@@ -1,18 +1,10 @@\n \n # Image URL to use all building/pushing image targets\n-IMG ?= gcr.io/kubeflow-images-public/notebook-controller\n+IMG ?= public.ecr.aws/j1r0q0g6/notebooks/notebook-controller\n TAG ?= $(shell git describe --tags --always)\n-SHELL := /bin/bash\n-GOLANG_VERSION ?= 1.15\n \n-# Whether to use cached images with GCB\n-USE_IMAGE_CACHE ?= true\n-\n-# Produce CRDs that work back to Kubernetes 1.11 (no version conversion)\n-CRD_OPTIONS ?= \"crd\"", - "comment_created_at": "2022-05-03T15:47:44+00:00", - "comment_author": "kimwnasptd", - "comment_body": "Hmm, indeed in this PR we made the CRD quite bigger since we now actually include the full PodSpec for validation.\r\n\r\nBut the `maxDescLen=0` seems like a good way to cut down significantly the size of it. The spec is currently only the PodSpec, for which descriptions are widely available so we are OK.\r\n\r\nGood job @samuelvl!", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "839948826", - "pr_number": 6374, - "pr_file": "components/notebook-controller/Makefile", - "created_at": "2022-03-31T19:26:41+00:00", - "commented_code": "GOBIN=$(shell go env GOBIN)\nendif\n\n# Setting SHELL to bash allows bash commands to be executed by recipes.\n# This is a requirement for 'setup-envtest.sh' in the test target.\n# Options are set to exit when a recipe line exits non-zero or a piped command fails.\nSHELL = /usr/bin/env bash -o pipefail\n.SHELLFLAGS = -ec\n\n.PHONY: all\nall: manager\n\n# check license\ncheck-license:\n\t./third_party/check-license.sh\n##@ General\n\n# Run tests\ntest: generate fmt vet manifests\n\tmkdir -p ${ENVTEST_ASSETS_DIR}\n\ttest -f ${ENVTEST_ASSETS_DIR}/setup-envtest.sh || curl -sSLo ${ENVTEST_ASSETS_DIR}/setup-envtest.sh https://raw.githubusercontent.com/kubernetes-sigs/controller-runtime/a9bd9117a77a2f84bbc546e28991136fe0000dc0/hack/setup-envtest.sh\n\tsource ${ENVTEST_ASSETS_DIR}/setup-envtest.sh; fetch_envtest_tools $(ENVTEST_ASSETS_DIR); setup_envtest_env $(ENVTEST_ASSETS_DIR);\n\tgo test ./api/... ./controllers/... -coverprofile cover.out\n# The help target prints out all targets with their descriptions organized\n# beneath their categories. The categories are represented by '##@' and the\n# target descriptions by '##'. The awk commands is responsible for reading the\n# entire set of makefiles included in this invocation, looking for lines of the\n# file as xyz: ## something, and then pretty-format the target and help. Then,\n# if there's a line with ##@ something, that gets pretty-printed as a category.\n# More info on the usage of ANSI control characters for terminal formatting:\n# https://en.wikipedia.org/wiki/ANSI_escape_code#SGR_parameters\n# More info on the awk command:\n# http://linuxcommand.org/lc3_adv_awk.php\n\n# Build manager binary\nmanager: generate fmt vet\n\tgo build -o bin/manager main.go\n.PHONY: help\nhelp: ## Display this help.\n\t@awk 'BEGIN {FS = \":.*##\"; printf \"\\nUsage:\\n make \\033[36m\\033[0m\\n\"} /^[a-zA-Z_0-9-]+:.*?##/ { printf \" \\033[36m%-15s\\033[0m %s\\n\", $$1, $$2 } /^##@/ { printf \"\\n\\033[1m%s\\033[0m\\n\", substr($$0, 5) } ' $(MAKEFILE_LIST)\n\n# Run against the configured Kubernetes cluster in ~/.kube/config\nrun: generate fmt vet\n\tgo run ./main.go\n##@ Development\n\n# Install CRDs into a cluster\ninstall: manifests\n\tkubectl apply -f config/crd/bases\n.PHONY: check-license\ncheck-license: ## Check third-party license\n\t./third_party/check-license.sh\n\n# Deploy controller in the configured Kubernetes cluster in ~/.kube/config\ndeploy: manifests\n\tkubectl apply -f config/crd/bases\n\tkustomize build config/default | kubectl apply -f -\n.PHONY: manifests\nmanifests: controller-gen ## Generate WebhookConfiguration, ClusterRole and CustomResourceDefinition objects.\n\t$(CONTROLLER_GEN) rbac:roleName=role crd webhook paths=\"./...\" output:crd:artifacts:config=config/crd/bases\n\n# Generate manifests e.g. CRD, RBAC etc.\nmanifests: controller-gen\n\t$(CONTROLLER_GEN) $(CRD_OPTIONS) rbac:roleName=role webhook paths=\"./...\" output:crd:artifacts:config=config/crd/bases\n.PHONY: generate\ngenerate: controller-gen ## Generate code containing DeepCopy, DeepCopyInto, and DeepCopyObject method implementations.\n\t$(CONTROLLER_GEN) object:headerFile=\"hack/boilerplate.go.txt\" paths=\"./...\"\n\n# Run go fmt against code\nfmt:\n.PHONY: fmt\nfmt: ## Run go fmt against code.\n\tgo fmt ./...\n\n# Run go vet against code\nvet:\n.PHONY: vet\nvet: ## Run go vet against code.\n\tgo vet ./...\n\n# Generate code\ngenerate: controller-gen\n\t$(CONTROLLER_GEN) object:headerFile=./hack/boilerplate.go.txt paths=./api/...\n.PHONY: test\ntest: manifests generate fmt vet envtest ## Run tests.\n\tKUBEBUILDER_ASSETS=\"$(shell $(ENVTEST) use $(ENVTEST_K8S_VERSION) -p path)\" go test ./... -coverprofile cover.out\n\n##@ Build\n\n# Build the docker image\ndocker-build: test\n.PHONY: manager\nmanager: generate fmt vet ## Build manager binary.\n\tgo build -o bin/manager main.go\n\n.PHONY: run\nrun: manifests generate fmt vet ## Run a controller from your host.\n\tgo run ./main.go\n\n.PHONY: docker-build\ndocker-build: test ## Build docker image with the manager.\n\tcd .. && docker build . -t ${IMG}:${TAG} -f ./notebook-controller/Dockerfile\n\t@echo \"updating kustomize image patch file for manager resource\"\n\tsed -i'' -e 's@image: .*@image: '\"${IMG}:${TAG}\"'@' ./config/default/manager_image_patch.yaml\n\n# Push the docker image\ndocker-push:\n.PHONY: docker-push\ndocker-push: ## Push docker image with the manager.\n\tdocker push ${IMG}:${TAG}\n\n# find or download controller-gen\n# download controller-gen if necessary\ncontroller-gen:\nifeq (, $(shell which controller-gen))\n\tgo get sigs.k8s.io/controller-tools/cmd/controller-gen@v0.2.0\nCONTROLLER_GEN=$(GOBIN)/controller-gen\nelse\nCONTROLLER_GEN=$(shell which controller-gen)\nendif\n##@ Deployment\n\n# TODO(jlewi): Can we get rid of this and just use skaffold?\nbuild-gcr: test\n\tdocker build -t $(IMG):$(TAG) .\n\t@echo Built $(IMG):$(TAG)\nifndef ignore-not-found\n ignore-not-found = false\nendif\n\npush-gcr: build-gcr\n\tdocker push $(IMG):$(TAG)\n\t@echo Pushed $(IMG):$(TAG)\n.PHONY: install\ninstall: manifests kustomize ## Install CRDs into the K8s cluster specified in ~/.kube/config.\n\tkubectl get crd notebooks.kubeflow.org || $(KUSTOMIZE) build config/crd | kubectl create -f -\n\n.PHONY: uninstall\nuninstall: manifests kustomize ## Uninstall CRDs from the K8s cluster specified in ~/.kube/config. Call with ignore-not-found=true to ignore resource not found errors during deletion.\n\t$(KUSTOMIZE) build config/crd | kubectl delete --ignore-not-found=$(ignore-not-found) -f -\n\n.PHONY: deploy\ndeploy: manifests kustomize ## Deploy controller to the K8s cluster specified in ~/.kube/config.\n\tsed -i'' -e 's@newName: .*@newName: '\"${IMG}\"'@' ./config/base/kustomization.yaml\n\tsed -i'' -e 's@newTag: .*@newTag: '\"${TAG}\"'@' ./config/base/kustomization.yaml\n\t$(KUSTOMIZE) build config/base | kubectl apply -f -\n\n.PHONY: undeploy\nundeploy: ## Undeploy controller from the K8s cluster specified in ~/.kube/config. Call with ignore-not-found=true to ignore resource not found errors during deletion.\n\t$(KUSTOMIZE) build config/base | kubectl delete --ignore-not-found=$(ignore-not-found) -f -\n\nCONTROLLER_GEN = $(shell pwd)/bin/controller-gen\n.PHONY: controller-gen\ncontroller-gen: ## Download controller-gen locally if necessary.\n\t$(call go-get-tool,$(CONTROLLER_GEN),sigs.k8s.io/controller-tools/cmd/controller-gen@v0.8.0)\n\nKUSTOMIZE = $(shell pwd)/bin/kustomize\n.PHONY: kustomize\nkustomize: ## Download kustomize locally if necessary.\n\t$(call go-get-tool,$(KUSTOMIZE),sigs.k8s.io/kustomize/kustomize/v3@v3.8.7)", - "repo_full_name": "kubeflow/kubeflow", - "discussion_comments": [ - { - "comment_id": "839948826", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 6374, - "pr_file": "components/notebook-controller/Makefile", - "discussion_id": "839948826", - "commented_code": "@@ -21,77 +13,125 @@ else\n GOBIN=$(shell go env GOBIN)\n endif\n \n+# Setting SHELL to bash allows bash commands to be executed by recipes.\n+# This is a requirement for 'setup-envtest.sh' in the test target.\n+# Options are set to exit when a recipe line exits non-zero or a piped command fails.\n+SHELL = /usr/bin/env bash -o pipefail\n+.SHELLFLAGS = -ec\n+\n+.PHONY: all\n all: manager\n \n-# check license\n-check-license:\n-\t./third_party/check-license.sh\n+##@ General\n \n-# Run tests\n-test: generate fmt vet manifests\n-\tmkdir -p ${ENVTEST_ASSETS_DIR}\n-\ttest -f ${ENVTEST_ASSETS_DIR}/setup-envtest.sh || curl -sSLo ${ENVTEST_ASSETS_DIR}/setup-envtest.sh https://raw.githubusercontent.com/kubernetes-sigs/controller-runtime/a9bd9117a77a2f84bbc546e28991136fe0000dc0/hack/setup-envtest.sh\n-\tsource ${ENVTEST_ASSETS_DIR}/setup-envtest.sh; fetch_envtest_tools $(ENVTEST_ASSETS_DIR); setup_envtest_env $(ENVTEST_ASSETS_DIR);\n-\tgo test ./api/... ./controllers/... -coverprofile cover.out\n+# The help target prints out all targets with their descriptions organized\n+# beneath their categories. The categories are represented by '##@' and the\n+# target descriptions by '##'. The awk commands is responsible for reading the\n+# entire set of makefiles included in this invocation, looking for lines of the\n+# file as xyz: ## something, and then pretty-format the target and help. Then,\n+# if there's a line with ##@ something, that gets pretty-printed as a category.\n+# More info on the usage of ANSI control characters for terminal formatting:\n+# https://en.wikipedia.org/wiki/ANSI_escape_code#SGR_parameters\n+# More info on the awk command:\n+# http://linuxcommand.org/lc3_adv_awk.php\n \n-# Build manager binary\n-manager: generate fmt vet\n-\tgo build -o bin/manager main.go\n+.PHONY: help\n+help: ## Display this help.\n+\t@awk 'BEGIN {FS = \":.*##\"; printf \"\\nUsage:\\n make \\033[36m\\033[0m\\n\"} /^[a-zA-Z_0-9-]+:.*?##/ { printf \" \\033[36m%-15s\\033[0m %s\\n\", $$1, $$2 } /^##@/ { printf \"\\n\\033[1m%s\\033[0m\\n\", substr($$0, 5) } ' $(MAKEFILE_LIST)\n \n-# Run against the configured Kubernetes cluster in ~/.kube/config\n-run: generate fmt vet\n-\tgo run ./main.go\n+##@ Development\n \n-# Install CRDs into a cluster\n-install: manifests\n-\tkubectl apply -f config/crd/bases\n+.PHONY: check-license\n+check-license: ## Check third-party license\n+\t./third_party/check-license.sh\n \n-# Deploy controller in the configured Kubernetes cluster in ~/.kube/config\n-deploy: manifests\n-\tkubectl apply -f config/crd/bases\n-\tkustomize build config/default | kubectl apply -f -\n+.PHONY: manifests\n+manifests: controller-gen ## Generate WebhookConfiguration, ClusterRole and CustomResourceDefinition objects.\n+\t$(CONTROLLER_GEN) rbac:roleName=role crd webhook paths=\"./...\" output:crd:artifacts:config=config/crd/bases\n \n-# Generate manifests e.g. CRD, RBAC etc.\n-manifests: controller-gen\n-\t$(CONTROLLER_GEN) $(CRD_OPTIONS) rbac:roleName=role webhook paths=\"./...\" output:crd:artifacts:config=config/crd/bases\n+.PHONY: generate\n+generate: controller-gen ## Generate code containing DeepCopy, DeepCopyInto, and DeepCopyObject method implementations.\n+\t$(CONTROLLER_GEN) object:headerFile=\"hack/boilerplate.go.txt\" paths=\"./...\"\n \n-# Run go fmt against code\n-fmt:\n+.PHONY: fmt\n+fmt: ## Run go fmt against code.\n \tgo fmt ./...\n \n-# Run go vet against code\n-vet:\n+.PHONY: vet\n+vet: ## Run go vet against code.\n \tgo vet ./...\n \n-# Generate code\n-generate: controller-gen\n-\t$(CONTROLLER_GEN) object:headerFile=./hack/boilerplate.go.txt paths=./api/...\n+.PHONY: test\n+test: manifests generate fmt vet envtest ## Run tests.\n+\tKUBEBUILDER_ASSETS=\"$(shell $(ENVTEST) use $(ENVTEST_K8S_VERSION) -p path)\" go test ./... -coverprofile cover.out\n+\n+##@ Build\n \n-# Build the docker image\n-docker-build: test\n+.PHONY: manager\n+manager: generate fmt vet ## Build manager binary.\n+\tgo build -o bin/manager main.go\n+\n+.PHONY: run\n+run: manifests generate fmt vet ## Run a controller from your host.\n+\tgo run ./main.go\n+\n+.PHONY: docker-build\n+docker-build: test ## Build docker image with the manager.\n \tcd .. && docker build . -t ${IMG}:${TAG} -f ./notebook-controller/Dockerfile\n-\t@echo \"updating kustomize image patch file for manager resource\"\n-\tsed -i'' -e 's@image: .*@image: '\"${IMG}:${TAG}\"'@' ./config/default/manager_image_patch.yaml\n \n-# Push the docker image\n-docker-push:\n+.PHONY: docker-push\n+docker-push: ## Push docker image with the manager.\n \tdocker push ${IMG}:${TAG}\n \n-# find or download controller-gen\n-# download controller-gen if necessary\n-controller-gen:\n-ifeq (, $(shell which controller-gen))\n-\tgo get sigs.k8s.io/controller-tools/cmd/controller-gen@v0.2.0\n-CONTROLLER_GEN=$(GOBIN)/controller-gen\n-else\n-CONTROLLER_GEN=$(shell which controller-gen)\n-endif\n+##@ Deployment\n \n-# TODO(jlewi): Can we get rid of this and just use skaffold?\n-build-gcr: test\n-\tdocker build -t $(IMG):$(TAG) .\n-\t@echo Built $(IMG):$(TAG)\n+ifndef ignore-not-found\n+ ignore-not-found = false\n+endif\n \n-push-gcr: build-gcr\n-\tdocker push $(IMG):$(TAG)\n-\t@echo Pushed $(IMG):$(TAG)\n+.PHONY: install\n+install: manifests kustomize ## Install CRDs into the K8s cluster specified in ~/.kube/config.\n+\tkubectl get crd notebooks.kubeflow.org || $(KUSTOMIZE) build config/crd | kubectl create -f -\n+\n+.PHONY: uninstall\n+uninstall: manifests kustomize ## Uninstall CRDs from the K8s cluster specified in ~/.kube/config. Call with ignore-not-found=true to ignore resource not found errors during deletion.\n+\t$(KUSTOMIZE) build config/crd | kubectl delete --ignore-not-found=$(ignore-not-found) -f -\n+\n+.PHONY: deploy\n+deploy: manifests kustomize ## Deploy controller to the K8s cluster specified in ~/.kube/config.\n+\tsed -i'' -e 's@newName: .*@newName: '\"${IMG}\"'@' ./config/base/kustomization.yaml\n+\tsed -i'' -e 's@newTag: .*@newTag: '\"${TAG}\"'@' ./config/base/kustomization.yaml\n+\t$(KUSTOMIZE) build config/base | kubectl apply -f -\n+\n+.PHONY: undeploy\n+undeploy: ## Undeploy controller from the K8s cluster specified in ~/.kube/config. Call with ignore-not-found=true to ignore resource not found errors during deletion.\n+\t$(KUSTOMIZE) build config/base | kubectl delete --ignore-not-found=$(ignore-not-found) -f -\n+\n+CONTROLLER_GEN = $(shell pwd)/bin/controller-gen\n+.PHONY: controller-gen\n+controller-gen: ## Download controller-gen locally if necessary.\n+\t$(call go-get-tool,$(CONTROLLER_GEN),sigs.k8s.io/controller-tools/cmd/controller-gen@v0.8.0)\n+\n+KUSTOMIZE = $(shell pwd)/bin/kustomize\n+.PHONY: kustomize\n+kustomize: ## Download kustomize locally if necessary.\n+\t$(call go-get-tool,$(KUSTOMIZE),sigs.k8s.io/kustomize/kustomize/v3@v3.8.7)", - "comment_created_at": "2022-03-31T19:26:41+00:00", - "comment_author": "kimwnasptd", - "comment_body": "I tried to `kustomize build config/crd` with a 3.2 version, since this was the one [used from manifests](https://github.com/kubeflow/manifests#prerequisites) but I got the following errors:\r\n```\r\nError: no matches for OriginalId apiextensions.k8s.io_v1beta1_CustomResourceDefinition|~X|notebooks.kubeflow.org; no matches for CurrentId apiextensions.k8s.io_v1beta1_CustomResourceDefinition|~X|notebooks.kubeflow.org; failed to find unique target for patch apiextensions.k8s.io_v1beta1_CustomResourceDefinition|notebooks.kubeflow.org\r\n``` \r\n\r\nDo you know why these occur? They were not shows in the previous iteration of the controller, so maybe something slightly changed in the autogenerated manifests?\r\n\r\nIf I use the 3.8 version from the makefile this error goes away. Also it's not shown even with 3.2 when I build the `base`, so it's not that critical. But let's try to understand why this happens. I'll try to look into it within the next days as well", - "pr_file_module": null - }, - { - "comment_id": "840777413", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 6374, - "pr_file": "components/notebook-controller/Makefile", - "discussion_id": "839948826", - "commented_code": "@@ -21,77 +13,125 @@ else\n GOBIN=$(shell go env GOBIN)\n endif\n \n+# Setting SHELL to bash allows bash commands to be executed by recipes.\n+# This is a requirement for 'setup-envtest.sh' in the test target.\n+# Options are set to exit when a recipe line exits non-zero or a piped command fails.\n+SHELL = /usr/bin/env bash -o pipefail\n+.SHELLFLAGS = -ec\n+\n+.PHONY: all\n all: manager\n \n-# check license\n-check-license:\n-\t./third_party/check-license.sh\n+##@ General\n \n-# Run tests\n-test: generate fmt vet manifests\n-\tmkdir -p ${ENVTEST_ASSETS_DIR}\n-\ttest -f ${ENVTEST_ASSETS_DIR}/setup-envtest.sh || curl -sSLo ${ENVTEST_ASSETS_DIR}/setup-envtest.sh https://raw.githubusercontent.com/kubernetes-sigs/controller-runtime/a9bd9117a77a2f84bbc546e28991136fe0000dc0/hack/setup-envtest.sh\n-\tsource ${ENVTEST_ASSETS_DIR}/setup-envtest.sh; fetch_envtest_tools $(ENVTEST_ASSETS_DIR); setup_envtest_env $(ENVTEST_ASSETS_DIR);\n-\tgo test ./api/... ./controllers/... -coverprofile cover.out\n+# The help target prints out all targets with their descriptions organized\n+# beneath their categories. The categories are represented by '##@' and the\n+# target descriptions by '##'. The awk commands is responsible for reading the\n+# entire set of makefiles included in this invocation, looking for lines of the\n+# file as xyz: ## something, and then pretty-format the target and help. Then,\n+# if there's a line with ##@ something, that gets pretty-printed as a category.\n+# More info on the usage of ANSI control characters for terminal formatting:\n+# https://en.wikipedia.org/wiki/ANSI_escape_code#SGR_parameters\n+# More info on the awk command:\n+# http://linuxcommand.org/lc3_adv_awk.php\n \n-# Build manager binary\n-manager: generate fmt vet\n-\tgo build -o bin/manager main.go\n+.PHONY: help\n+help: ## Display this help.\n+\t@awk 'BEGIN {FS = \":.*##\"; printf \"\\nUsage:\\n make \\033[36m\\033[0m\\n\"} /^[a-zA-Z_0-9-]+:.*?##/ { printf \" \\033[36m%-15s\\033[0m %s\\n\", $$1, $$2 } /^##@/ { printf \"\\n\\033[1m%s\\033[0m\\n\", substr($$0, 5) } ' $(MAKEFILE_LIST)\n \n-# Run against the configured Kubernetes cluster in ~/.kube/config\n-run: generate fmt vet\n-\tgo run ./main.go\n+##@ Development\n \n-# Install CRDs into a cluster\n-install: manifests\n-\tkubectl apply -f config/crd/bases\n+.PHONY: check-license\n+check-license: ## Check third-party license\n+\t./third_party/check-license.sh\n \n-# Deploy controller in the configured Kubernetes cluster in ~/.kube/config\n-deploy: manifests\n-\tkubectl apply -f config/crd/bases\n-\tkustomize build config/default | kubectl apply -f -\n+.PHONY: manifests\n+manifests: controller-gen ## Generate WebhookConfiguration, ClusterRole and CustomResourceDefinition objects.\n+\t$(CONTROLLER_GEN) rbac:roleName=role crd webhook paths=\"./...\" output:crd:artifacts:config=config/crd/bases\n \n-# Generate manifests e.g. CRD, RBAC etc.\n-manifests: controller-gen\n-\t$(CONTROLLER_GEN) $(CRD_OPTIONS) rbac:roleName=role webhook paths=\"./...\" output:crd:artifacts:config=config/crd/bases\n+.PHONY: generate\n+generate: controller-gen ## Generate code containing DeepCopy, DeepCopyInto, and DeepCopyObject method implementations.\n+\t$(CONTROLLER_GEN) object:headerFile=\"hack/boilerplate.go.txt\" paths=\"./...\"\n \n-# Run go fmt against code\n-fmt:\n+.PHONY: fmt\n+fmt: ## Run go fmt against code.\n \tgo fmt ./...\n \n-# Run go vet against code\n-vet:\n+.PHONY: vet\n+vet: ## Run go vet against code.\n \tgo vet ./...\n \n-# Generate code\n-generate: controller-gen\n-\t$(CONTROLLER_GEN) object:headerFile=./hack/boilerplate.go.txt paths=./api/...\n+.PHONY: test\n+test: manifests generate fmt vet envtest ## Run tests.\n+\tKUBEBUILDER_ASSETS=\"$(shell $(ENVTEST) use $(ENVTEST_K8S_VERSION) -p path)\" go test ./... -coverprofile cover.out\n+\n+##@ Build\n \n-# Build the docker image\n-docker-build: test\n+.PHONY: manager\n+manager: generate fmt vet ## Build manager binary.\n+\tgo build -o bin/manager main.go\n+\n+.PHONY: run\n+run: manifests generate fmt vet ## Run a controller from your host.\n+\tgo run ./main.go\n+\n+.PHONY: docker-build\n+docker-build: test ## Build docker image with the manager.\n \tcd .. && docker build . -t ${IMG}:${TAG} -f ./notebook-controller/Dockerfile\n-\t@echo \"updating kustomize image patch file for manager resource\"\n-\tsed -i'' -e 's@image: .*@image: '\"${IMG}:${TAG}\"'@' ./config/default/manager_image_patch.yaml\n \n-# Push the docker image\n-docker-push:\n+.PHONY: docker-push\n+docker-push: ## Push docker image with the manager.\n \tdocker push ${IMG}:${TAG}\n \n-# find or download controller-gen\n-# download controller-gen if necessary\n-controller-gen:\n-ifeq (, $(shell which controller-gen))\n-\tgo get sigs.k8s.io/controller-tools/cmd/controller-gen@v0.2.0\n-CONTROLLER_GEN=$(GOBIN)/controller-gen\n-else\n-CONTROLLER_GEN=$(shell which controller-gen)\n-endif\n+##@ Deployment\n \n-# TODO(jlewi): Can we get rid of this and just use skaffold?\n-build-gcr: test\n-\tdocker build -t $(IMG):$(TAG) .\n-\t@echo Built $(IMG):$(TAG)\n+ifndef ignore-not-found\n+ ignore-not-found = false\n+endif\n \n-push-gcr: build-gcr\n-\tdocker push $(IMG):$(TAG)\n-\t@echo Pushed $(IMG):$(TAG)\n+.PHONY: install\n+install: manifests kustomize ## Install CRDs into the K8s cluster specified in ~/.kube/config.\n+\tkubectl get crd notebooks.kubeflow.org || $(KUSTOMIZE) build config/crd | kubectl create -f -\n+\n+.PHONY: uninstall\n+uninstall: manifests kustomize ## Uninstall CRDs from the K8s cluster specified in ~/.kube/config. Call with ignore-not-found=true to ignore resource not found errors during deletion.\n+\t$(KUSTOMIZE) build config/crd | kubectl delete --ignore-not-found=$(ignore-not-found) -f -\n+\n+.PHONY: deploy\n+deploy: manifests kustomize ## Deploy controller to the K8s cluster specified in ~/.kube/config.\n+\tsed -i'' -e 's@newName: .*@newName: '\"${IMG}\"'@' ./config/base/kustomization.yaml\n+\tsed -i'' -e 's@newTag: .*@newTag: '\"${TAG}\"'@' ./config/base/kustomization.yaml\n+\t$(KUSTOMIZE) build config/base | kubectl apply -f -\n+\n+.PHONY: undeploy\n+undeploy: ## Undeploy controller from the K8s cluster specified in ~/.kube/config. Call with ignore-not-found=true to ignore resource not found errors during deletion.\n+\t$(KUSTOMIZE) build config/base | kubectl delete --ignore-not-found=$(ignore-not-found) -f -\n+\n+CONTROLLER_GEN = $(shell pwd)/bin/controller-gen\n+.PHONY: controller-gen\n+controller-gen: ## Download controller-gen locally if necessary.\n+\t$(call go-get-tool,$(CONTROLLER_GEN),sigs.k8s.io/controller-tools/cmd/controller-gen@v0.8.0)\n+\n+KUSTOMIZE = $(shell pwd)/bin/kustomize\n+.PHONY: kustomize\n+kustomize: ## Download kustomize locally if necessary.\n+\t$(call go-get-tool,$(KUSTOMIZE),sigs.k8s.io/kustomize/kustomize/v3@v3.8.7)", - "comment_created_at": "2022-04-01T17:14:26+00:00", - "comment_author": "samuelvl", - "comment_body": "It's failing because there is a patch trying to modify the old v1beta1 CRD but there is no match (obviously):\r\n\r\n```\r\npatchesJson6902:\r\n# Remove once the following issue is resolved:\r\n# https://github.com/kubeflow/kubeflow/issues/5722\r\n- path: patches/old_crd.yaml\r\n target:\r\n group: apiextensions.k8s.io\r\n version: v1beta1 # <--------- This should be v1\r\n kind: CustomResourceDefinition\r\n name: notebooks.kubeflow.org\r\n ```\r\n \r\nSince https://github.com/kubeflow/kubeflow/issues/5722 is resolved I think the best solution is to remove the entire patch block, what do you think?\r\n\r\nIf we are using Kustomize 3.2 in manifests I can update the Makefile to download that version to be aligned, that may save some compatibility issues in the future.", - "pr_file_module": null - }, - { - "comment_id": "850178203", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 6374, - "pr_file": "components/notebook-controller/Makefile", - "discussion_id": "839948826", - "commented_code": "@@ -21,77 +13,125 @@ else\n GOBIN=$(shell go env GOBIN)\n endif\n \n+# Setting SHELL to bash allows bash commands to be executed by recipes.\n+# This is a requirement for 'setup-envtest.sh' in the test target.\n+# Options are set to exit when a recipe line exits non-zero or a piped command fails.\n+SHELL = /usr/bin/env bash -o pipefail\n+.SHELLFLAGS = -ec\n+\n+.PHONY: all\n all: manager\n \n-# check license\n-check-license:\n-\t./third_party/check-license.sh\n+##@ General\n \n-# Run tests\n-test: generate fmt vet manifests\n-\tmkdir -p ${ENVTEST_ASSETS_DIR}\n-\ttest -f ${ENVTEST_ASSETS_DIR}/setup-envtest.sh || curl -sSLo ${ENVTEST_ASSETS_DIR}/setup-envtest.sh https://raw.githubusercontent.com/kubernetes-sigs/controller-runtime/a9bd9117a77a2f84bbc546e28991136fe0000dc0/hack/setup-envtest.sh\n-\tsource ${ENVTEST_ASSETS_DIR}/setup-envtest.sh; fetch_envtest_tools $(ENVTEST_ASSETS_DIR); setup_envtest_env $(ENVTEST_ASSETS_DIR);\n-\tgo test ./api/... ./controllers/... -coverprofile cover.out\n+# The help target prints out all targets with their descriptions organized\n+# beneath their categories. The categories are represented by '##@' and the\n+# target descriptions by '##'. The awk commands is responsible for reading the\n+# entire set of makefiles included in this invocation, looking for lines of the\n+# file as xyz: ## something, and then pretty-format the target and help. Then,\n+# if there's a line with ##@ something, that gets pretty-printed as a category.\n+# More info on the usage of ANSI control characters for terminal formatting:\n+# https://en.wikipedia.org/wiki/ANSI_escape_code#SGR_parameters\n+# More info on the awk command:\n+# http://linuxcommand.org/lc3_adv_awk.php\n \n-# Build manager binary\n-manager: generate fmt vet\n-\tgo build -o bin/manager main.go\n+.PHONY: help\n+help: ## Display this help.\n+\t@awk 'BEGIN {FS = \":.*##\"; printf \"\\nUsage:\\n make \\033[36m\\033[0m\\n\"} /^[a-zA-Z_0-9-]+:.*?##/ { printf \" \\033[36m%-15s\\033[0m %s\\n\", $$1, $$2 } /^##@/ { printf \"\\n\\033[1m%s\\033[0m\\n\", substr($$0, 5) } ' $(MAKEFILE_LIST)\n \n-# Run against the configured Kubernetes cluster in ~/.kube/config\n-run: generate fmt vet\n-\tgo run ./main.go\n+##@ Development\n \n-# Install CRDs into a cluster\n-install: manifests\n-\tkubectl apply -f config/crd/bases\n+.PHONY: check-license\n+check-license: ## Check third-party license\n+\t./third_party/check-license.sh\n \n-# Deploy controller in the configured Kubernetes cluster in ~/.kube/config\n-deploy: manifests\n-\tkubectl apply -f config/crd/bases\n-\tkustomize build config/default | kubectl apply -f -\n+.PHONY: manifests\n+manifests: controller-gen ## Generate WebhookConfiguration, ClusterRole and CustomResourceDefinition objects.\n+\t$(CONTROLLER_GEN) rbac:roleName=role crd webhook paths=\"./...\" output:crd:artifacts:config=config/crd/bases\n \n-# Generate manifests e.g. CRD, RBAC etc.\n-manifests: controller-gen\n-\t$(CONTROLLER_GEN) $(CRD_OPTIONS) rbac:roleName=role webhook paths=\"./...\" output:crd:artifacts:config=config/crd/bases\n+.PHONY: generate\n+generate: controller-gen ## Generate code containing DeepCopy, DeepCopyInto, and DeepCopyObject method implementations.\n+\t$(CONTROLLER_GEN) object:headerFile=\"hack/boilerplate.go.txt\" paths=\"./...\"\n \n-# Run go fmt against code\n-fmt:\n+.PHONY: fmt\n+fmt: ## Run go fmt against code.\n \tgo fmt ./...\n \n-# Run go vet against code\n-vet:\n+.PHONY: vet\n+vet: ## Run go vet against code.\n \tgo vet ./...\n \n-# Generate code\n-generate: controller-gen\n-\t$(CONTROLLER_GEN) object:headerFile=./hack/boilerplate.go.txt paths=./api/...\n+.PHONY: test\n+test: manifests generate fmt vet envtest ## Run tests.\n+\tKUBEBUILDER_ASSETS=\"$(shell $(ENVTEST) use $(ENVTEST_K8S_VERSION) -p path)\" go test ./... -coverprofile cover.out\n+\n+##@ Build\n \n-# Build the docker image\n-docker-build: test\n+.PHONY: manager\n+manager: generate fmt vet ## Build manager binary.\n+\tgo build -o bin/manager main.go\n+\n+.PHONY: run\n+run: manifests generate fmt vet ## Run a controller from your host.\n+\tgo run ./main.go\n+\n+.PHONY: docker-build\n+docker-build: test ## Build docker image with the manager.\n \tcd .. && docker build . -t ${IMG}:${TAG} -f ./notebook-controller/Dockerfile\n-\t@echo \"updating kustomize image patch file for manager resource\"\n-\tsed -i'' -e 's@image: .*@image: '\"${IMG}:${TAG}\"'@' ./config/default/manager_image_patch.yaml\n \n-# Push the docker image\n-docker-push:\n+.PHONY: docker-push\n+docker-push: ## Push docker image with the manager.\n \tdocker push ${IMG}:${TAG}\n \n-# find or download controller-gen\n-# download controller-gen if necessary\n-controller-gen:\n-ifeq (, $(shell which controller-gen))\n-\tgo get sigs.k8s.io/controller-tools/cmd/controller-gen@v0.2.0\n-CONTROLLER_GEN=$(GOBIN)/controller-gen\n-else\n-CONTROLLER_GEN=$(shell which controller-gen)\n-endif\n+##@ Deployment\n \n-# TODO(jlewi): Can we get rid of this and just use skaffold?\n-build-gcr: test\n-\tdocker build -t $(IMG):$(TAG) .\n-\t@echo Built $(IMG):$(TAG)\n+ifndef ignore-not-found\n+ ignore-not-found = false\n+endif\n \n-push-gcr: build-gcr\n-\tdocker push $(IMG):$(TAG)\n-\t@echo Pushed $(IMG):$(TAG)\n+.PHONY: install\n+install: manifests kustomize ## Install CRDs into the K8s cluster specified in ~/.kube/config.\n+\tkubectl get crd notebooks.kubeflow.org || $(KUSTOMIZE) build config/crd | kubectl create -f -\n+\n+.PHONY: uninstall\n+uninstall: manifests kustomize ## Uninstall CRDs from the K8s cluster specified in ~/.kube/config. Call with ignore-not-found=true to ignore resource not found errors during deletion.\n+\t$(KUSTOMIZE) build config/crd | kubectl delete --ignore-not-found=$(ignore-not-found) -f -\n+\n+.PHONY: deploy\n+deploy: manifests kustomize ## Deploy controller to the K8s cluster specified in ~/.kube/config.\n+\tsed -i'' -e 's@newName: .*@newName: '\"${IMG}\"'@' ./config/base/kustomization.yaml\n+\tsed -i'' -e 's@newTag: .*@newTag: '\"${TAG}\"'@' ./config/base/kustomization.yaml\n+\t$(KUSTOMIZE) build config/base | kubectl apply -f -\n+\n+.PHONY: undeploy\n+undeploy: ## Undeploy controller from the K8s cluster specified in ~/.kube/config. Call with ignore-not-found=true to ignore resource not found errors during deletion.\n+\t$(KUSTOMIZE) build config/base | kubectl delete --ignore-not-found=$(ignore-not-found) -f -\n+\n+CONTROLLER_GEN = $(shell pwd)/bin/controller-gen\n+.PHONY: controller-gen\n+controller-gen: ## Download controller-gen locally if necessary.\n+\t$(call go-get-tool,$(CONTROLLER_GEN),sigs.k8s.io/controller-tools/cmd/controller-gen@v0.8.0)\n+\n+KUSTOMIZE = $(shell pwd)/bin/kustomize\n+.PHONY: kustomize\n+kustomize: ## Download kustomize locally if necessary.\n+\t$(call go-get-tool,$(KUSTOMIZE),sigs.k8s.io/kustomize/kustomize/v3@v3.8.7)", - "comment_created_at": "2022-04-14T07:50:45+00:00", - "comment_author": "kimwnasptd", - "comment_body": "I agree to remove the patch altogether. The patch was making the validation more lax, by removing validation from the CPU/Memory fields.\r\n\r\n@samuelvl can you double check that creating a Notebook with the Jupyter web app also works as expected after this change? To make sure the backend is submitting objects that are valid.", - "pr_file_module": null - }, - { - "comment_id": "854062860", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 6374, - "pr_file": "components/notebook-controller/Makefile", - "discussion_id": "839948826", - "commented_code": "@@ -21,77 +13,125 @@ else\n GOBIN=$(shell go env GOBIN)\n endif\n \n+# Setting SHELL to bash allows bash commands to be executed by recipes.\n+# This is a requirement for 'setup-envtest.sh' in the test target.\n+# Options are set to exit when a recipe line exits non-zero or a piped command fails.\n+SHELL = /usr/bin/env bash -o pipefail\n+.SHELLFLAGS = -ec\n+\n+.PHONY: all\n all: manager\n \n-# check license\n-check-license:\n-\t./third_party/check-license.sh\n+##@ General\n \n-# Run tests\n-test: generate fmt vet manifests\n-\tmkdir -p ${ENVTEST_ASSETS_DIR}\n-\ttest -f ${ENVTEST_ASSETS_DIR}/setup-envtest.sh || curl -sSLo ${ENVTEST_ASSETS_DIR}/setup-envtest.sh https://raw.githubusercontent.com/kubernetes-sigs/controller-runtime/a9bd9117a77a2f84bbc546e28991136fe0000dc0/hack/setup-envtest.sh\n-\tsource ${ENVTEST_ASSETS_DIR}/setup-envtest.sh; fetch_envtest_tools $(ENVTEST_ASSETS_DIR); setup_envtest_env $(ENVTEST_ASSETS_DIR);\n-\tgo test ./api/... ./controllers/... -coverprofile cover.out\n+# The help target prints out all targets with their descriptions organized\n+# beneath their categories. The categories are represented by '##@' and the\n+# target descriptions by '##'. The awk commands is responsible for reading the\n+# entire set of makefiles included in this invocation, looking for lines of the\n+# file as xyz: ## something, and then pretty-format the target and help. Then,\n+# if there's a line with ##@ something, that gets pretty-printed as a category.\n+# More info on the usage of ANSI control characters for terminal formatting:\n+# https://en.wikipedia.org/wiki/ANSI_escape_code#SGR_parameters\n+# More info on the awk command:\n+# http://linuxcommand.org/lc3_adv_awk.php\n \n-# Build manager binary\n-manager: generate fmt vet\n-\tgo build -o bin/manager main.go\n+.PHONY: help\n+help: ## Display this help.\n+\t@awk 'BEGIN {FS = \":.*##\"; printf \"\\nUsage:\\n make \\033[36m\\033[0m\\n\"} /^[a-zA-Z_0-9-]+:.*?##/ { printf \" \\033[36m%-15s\\033[0m %s\\n\", $$1, $$2 } /^##@/ { printf \"\\n\\033[1m%s\\033[0m\\n\", substr($$0, 5) } ' $(MAKEFILE_LIST)\n \n-# Run against the configured Kubernetes cluster in ~/.kube/config\n-run: generate fmt vet\n-\tgo run ./main.go\n+##@ Development\n \n-# Install CRDs into a cluster\n-install: manifests\n-\tkubectl apply -f config/crd/bases\n+.PHONY: check-license\n+check-license: ## Check third-party license\n+\t./third_party/check-license.sh\n \n-# Deploy controller in the configured Kubernetes cluster in ~/.kube/config\n-deploy: manifests\n-\tkubectl apply -f config/crd/bases\n-\tkustomize build config/default | kubectl apply -f -\n+.PHONY: manifests\n+manifests: controller-gen ## Generate WebhookConfiguration, ClusterRole and CustomResourceDefinition objects.\n+\t$(CONTROLLER_GEN) rbac:roleName=role crd webhook paths=\"./...\" output:crd:artifacts:config=config/crd/bases\n \n-# Generate manifests e.g. CRD, RBAC etc.\n-manifests: controller-gen\n-\t$(CONTROLLER_GEN) $(CRD_OPTIONS) rbac:roleName=role webhook paths=\"./...\" output:crd:artifacts:config=config/crd/bases\n+.PHONY: generate\n+generate: controller-gen ## Generate code containing DeepCopy, DeepCopyInto, and DeepCopyObject method implementations.\n+\t$(CONTROLLER_GEN) object:headerFile=\"hack/boilerplate.go.txt\" paths=\"./...\"\n \n-# Run go fmt against code\n-fmt:\n+.PHONY: fmt\n+fmt: ## Run go fmt against code.\n \tgo fmt ./...\n \n-# Run go vet against code\n-vet:\n+.PHONY: vet\n+vet: ## Run go vet against code.\n \tgo vet ./...\n \n-# Generate code\n-generate: controller-gen\n-\t$(CONTROLLER_GEN) object:headerFile=./hack/boilerplate.go.txt paths=./api/...\n+.PHONY: test\n+test: manifests generate fmt vet envtest ## Run tests.\n+\tKUBEBUILDER_ASSETS=\"$(shell $(ENVTEST) use $(ENVTEST_K8S_VERSION) -p path)\" go test ./... -coverprofile cover.out\n+\n+##@ Build\n \n-# Build the docker image\n-docker-build: test\n+.PHONY: manager\n+manager: generate fmt vet ## Build manager binary.\n+\tgo build -o bin/manager main.go\n+\n+.PHONY: run\n+run: manifests generate fmt vet ## Run a controller from your host.\n+\tgo run ./main.go\n+\n+.PHONY: docker-build\n+docker-build: test ## Build docker image with the manager.\n \tcd .. && docker build . -t ${IMG}:${TAG} -f ./notebook-controller/Dockerfile\n-\t@echo \"updating kustomize image patch file for manager resource\"\n-\tsed -i'' -e 's@image: .*@image: '\"${IMG}:${TAG}\"'@' ./config/default/manager_image_patch.yaml\n \n-# Push the docker image\n-docker-push:\n+.PHONY: docker-push\n+docker-push: ## Push docker image with the manager.\n \tdocker push ${IMG}:${TAG}\n \n-# find or download controller-gen\n-# download controller-gen if necessary\n-controller-gen:\n-ifeq (, $(shell which controller-gen))\n-\tgo get sigs.k8s.io/controller-tools/cmd/controller-gen@v0.2.0\n-CONTROLLER_GEN=$(GOBIN)/controller-gen\n-else\n-CONTROLLER_GEN=$(shell which controller-gen)\n-endif\n+##@ Deployment\n \n-# TODO(jlewi): Can we get rid of this and just use skaffold?\n-build-gcr: test\n-\tdocker build -t $(IMG):$(TAG) .\n-\t@echo Built $(IMG):$(TAG)\n+ifndef ignore-not-found\n+ ignore-not-found = false\n+endif\n \n-push-gcr: build-gcr\n-\tdocker push $(IMG):$(TAG)\n-\t@echo Pushed $(IMG):$(TAG)\n+.PHONY: install\n+install: manifests kustomize ## Install CRDs into the K8s cluster specified in ~/.kube/config.\n+\tkubectl get crd notebooks.kubeflow.org || $(KUSTOMIZE) build config/crd | kubectl create -f -\n+\n+.PHONY: uninstall\n+uninstall: manifests kustomize ## Uninstall CRDs from the K8s cluster specified in ~/.kube/config. Call with ignore-not-found=true to ignore resource not found errors during deletion.\n+\t$(KUSTOMIZE) build config/crd | kubectl delete --ignore-not-found=$(ignore-not-found) -f -\n+\n+.PHONY: deploy\n+deploy: manifests kustomize ## Deploy controller to the K8s cluster specified in ~/.kube/config.\n+\tsed -i'' -e 's@newName: .*@newName: '\"${IMG}\"'@' ./config/base/kustomization.yaml\n+\tsed -i'' -e 's@newTag: .*@newTag: '\"${TAG}\"'@' ./config/base/kustomization.yaml\n+\t$(KUSTOMIZE) build config/base | kubectl apply -f -\n+\n+.PHONY: undeploy\n+undeploy: ## Undeploy controller from the K8s cluster specified in ~/.kube/config. Call with ignore-not-found=true to ignore resource not found errors during deletion.\n+\t$(KUSTOMIZE) build config/base | kubectl delete --ignore-not-found=$(ignore-not-found) -f -\n+\n+CONTROLLER_GEN = $(shell pwd)/bin/controller-gen\n+.PHONY: controller-gen\n+controller-gen: ## Download controller-gen locally if necessary.\n+\t$(call go-get-tool,$(CONTROLLER_GEN),sigs.k8s.io/controller-tools/cmd/controller-gen@v0.8.0)\n+\n+KUSTOMIZE = $(shell pwd)/bin/kustomize\n+.PHONY: kustomize\n+kustomize: ## Download kustomize locally if necessary.\n+\t$(call go-get-tool,$(KUSTOMIZE),sigs.k8s.io/kustomize/kustomize/v3@v3.8.7)", - "comment_created_at": "2022-04-20T12:14:24+00:00", - "comment_author": "samuelvl", - "comment_body": "I do confirm the backend creates the notebook object correctly after removing the patch:\r\n\r\n``` shell\r\n$ kubectl get notebooks\r\nNAME AGE\r\nscipy 13m\r\n\r\n$ kubectl get pods \r\nNAME READY STATUS RESTARTS AGE\r\nscipy-0 1/1 Running 0 10m\r\n```\r\n\r\nThis is the notebook created by the JWA:\r\n\r\n```yaml\r\napiVersion: kubeflow.org/v1\r\nkind: Notebook\r\nmetadata:\r\n annotations:\r\n notebooks.kubeflow.org/last-activity: \"2022-04-20T12:06:44Z\"\r\n notebooks.kubeflow.org/server-type: jupyter\r\n labels:\r\n app: scipy\r\n name: scipy\r\n namespace: kubeflow-user\r\nspec:\r\n template:\r\n spec:\r\n containers:\r\n - image: public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/jupyter-scipy:v1.5.0\r\n imagePullPolicy: IfNotPresent\r\n name: scipy\r\n resources:\r\n limits:\r\n cpu: 600m\r\n memory: 1288490188800m\r\n requests:\r\n cpu: 500m\r\n memory: 1Gi\r\n volumeMounts:\r\n - mountPath: /dev/shm\r\n name: dshm\r\n - mountPath: /home/jovyan\r\n name: scipy-volume\r\n serviceAccountName: default-editor\r\n volumes:\r\n - emptyDir:\r\n medium: Memory\r\n name: dshm\r\n - name: scipy-volume\r\n persistentVolumeClaim:\r\n claimName: scipy-volume\r\n``` \r\n\r\n\r\n", - "pr_file_module": null - }, - { - "comment_id": "862436970", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 6374, - "pr_file": "components/notebook-controller/Makefile", - "discussion_id": "839948826", - "commented_code": "@@ -21,77 +13,125 @@ else\n GOBIN=$(shell go env GOBIN)\n endif\n \n+# Setting SHELL to bash allows bash commands to be executed by recipes.\n+# This is a requirement for 'setup-envtest.sh' in the test target.\n+# Options are set to exit when a recipe line exits non-zero or a piped command fails.\n+SHELL = /usr/bin/env bash -o pipefail\n+.SHELLFLAGS = -ec\n+\n+.PHONY: all\n all: manager\n \n-# check license\n-check-license:\n-\t./third_party/check-license.sh\n+##@ General\n \n-# Run tests\n-test: generate fmt vet manifests\n-\tmkdir -p ${ENVTEST_ASSETS_DIR}\n-\ttest -f ${ENVTEST_ASSETS_DIR}/setup-envtest.sh || curl -sSLo ${ENVTEST_ASSETS_DIR}/setup-envtest.sh https://raw.githubusercontent.com/kubernetes-sigs/controller-runtime/a9bd9117a77a2f84bbc546e28991136fe0000dc0/hack/setup-envtest.sh\n-\tsource ${ENVTEST_ASSETS_DIR}/setup-envtest.sh; fetch_envtest_tools $(ENVTEST_ASSETS_DIR); setup_envtest_env $(ENVTEST_ASSETS_DIR);\n-\tgo test ./api/... ./controllers/... -coverprofile cover.out\n+# The help target prints out all targets with their descriptions organized\n+# beneath their categories. The categories are represented by '##@' and the\n+# target descriptions by '##'. The awk commands is responsible for reading the\n+# entire set of makefiles included in this invocation, looking for lines of the\n+# file as xyz: ## something, and then pretty-format the target and help. Then,\n+# if there's a line with ##@ something, that gets pretty-printed as a category.\n+# More info on the usage of ANSI control characters for terminal formatting:\n+# https://en.wikipedia.org/wiki/ANSI_escape_code#SGR_parameters\n+# More info on the awk command:\n+# http://linuxcommand.org/lc3_adv_awk.php\n \n-# Build manager binary\n-manager: generate fmt vet\n-\tgo build -o bin/manager main.go\n+.PHONY: help\n+help: ## Display this help.\n+\t@awk 'BEGIN {FS = \":.*##\"; printf \"\\nUsage:\\n make \\033[36m\\033[0m\\n\"} /^[a-zA-Z_0-9-]+:.*?##/ { printf \" \\033[36m%-15s\\033[0m %s\\n\", $$1, $$2 } /^##@/ { printf \"\\n\\033[1m%s\\033[0m\\n\", substr($$0, 5) } ' $(MAKEFILE_LIST)\n \n-# Run against the configured Kubernetes cluster in ~/.kube/config\n-run: generate fmt vet\n-\tgo run ./main.go\n+##@ Development\n \n-# Install CRDs into a cluster\n-install: manifests\n-\tkubectl apply -f config/crd/bases\n+.PHONY: check-license\n+check-license: ## Check third-party license\n+\t./third_party/check-license.sh\n \n-# Deploy controller in the configured Kubernetes cluster in ~/.kube/config\n-deploy: manifests\n-\tkubectl apply -f config/crd/bases\n-\tkustomize build config/default | kubectl apply -f -\n+.PHONY: manifests\n+manifests: controller-gen ## Generate WebhookConfiguration, ClusterRole and CustomResourceDefinition objects.\n+\t$(CONTROLLER_GEN) rbac:roleName=role crd webhook paths=\"./...\" output:crd:artifacts:config=config/crd/bases\n \n-# Generate manifests e.g. CRD, RBAC etc.\n-manifests: controller-gen\n-\t$(CONTROLLER_GEN) $(CRD_OPTIONS) rbac:roleName=role webhook paths=\"./...\" output:crd:artifacts:config=config/crd/bases\n+.PHONY: generate\n+generate: controller-gen ## Generate code containing DeepCopy, DeepCopyInto, and DeepCopyObject method implementations.\n+\t$(CONTROLLER_GEN) object:headerFile=\"hack/boilerplate.go.txt\" paths=\"./...\"\n \n-# Run go fmt against code\n-fmt:\n+.PHONY: fmt\n+fmt: ## Run go fmt against code.\n \tgo fmt ./...\n \n-# Run go vet against code\n-vet:\n+.PHONY: vet\n+vet: ## Run go vet against code.\n \tgo vet ./...\n \n-# Generate code\n-generate: controller-gen\n-\t$(CONTROLLER_GEN) object:headerFile=./hack/boilerplate.go.txt paths=./api/...\n+.PHONY: test\n+test: manifests generate fmt vet envtest ## Run tests.\n+\tKUBEBUILDER_ASSETS=\"$(shell $(ENVTEST) use $(ENVTEST_K8S_VERSION) -p path)\" go test ./... -coverprofile cover.out\n+\n+##@ Build\n \n-# Build the docker image\n-docker-build: test\n+.PHONY: manager\n+manager: generate fmt vet ## Build manager binary.\n+\tgo build -o bin/manager main.go\n+\n+.PHONY: run\n+run: manifests generate fmt vet ## Run a controller from your host.\n+\tgo run ./main.go\n+\n+.PHONY: docker-build\n+docker-build: test ## Build docker image with the manager.\n \tcd .. && docker build . -t ${IMG}:${TAG} -f ./notebook-controller/Dockerfile\n-\t@echo \"updating kustomize image patch file for manager resource\"\n-\tsed -i'' -e 's@image: .*@image: '\"${IMG}:${TAG}\"'@' ./config/default/manager_image_patch.yaml\n \n-# Push the docker image\n-docker-push:\n+.PHONY: docker-push\n+docker-push: ## Push docker image with the manager.\n \tdocker push ${IMG}:${TAG}\n \n-# find or download controller-gen\n-# download controller-gen if necessary\n-controller-gen:\n-ifeq (, $(shell which controller-gen))\n-\tgo get sigs.k8s.io/controller-tools/cmd/controller-gen@v0.2.0\n-CONTROLLER_GEN=$(GOBIN)/controller-gen\n-else\n-CONTROLLER_GEN=$(shell which controller-gen)\n-endif\n+##@ Deployment\n \n-# TODO(jlewi): Can we get rid of this and just use skaffold?\n-build-gcr: test\n-\tdocker build -t $(IMG):$(TAG) .\n-\t@echo Built $(IMG):$(TAG)\n+ifndef ignore-not-found\n+ ignore-not-found = false\n+endif\n \n-push-gcr: build-gcr\n-\tdocker push $(IMG):$(TAG)\n-\t@echo Pushed $(IMG):$(TAG)\n+.PHONY: install\n+install: manifests kustomize ## Install CRDs into the K8s cluster specified in ~/.kube/config.\n+\tkubectl get crd notebooks.kubeflow.org || $(KUSTOMIZE) build config/crd | kubectl create -f -\n+\n+.PHONY: uninstall\n+uninstall: manifests kustomize ## Uninstall CRDs from the K8s cluster specified in ~/.kube/config. Call with ignore-not-found=true to ignore resource not found errors during deletion.\n+\t$(KUSTOMIZE) build config/crd | kubectl delete --ignore-not-found=$(ignore-not-found) -f -\n+\n+.PHONY: deploy\n+deploy: manifests kustomize ## Deploy controller to the K8s cluster specified in ~/.kube/config.\n+\tsed -i'' -e 's@newName: .*@newName: '\"${IMG}\"'@' ./config/base/kustomization.yaml\n+\tsed -i'' -e 's@newTag: .*@newTag: '\"${TAG}\"'@' ./config/base/kustomization.yaml\n+\t$(KUSTOMIZE) build config/base | kubectl apply -f -\n+\n+.PHONY: undeploy\n+undeploy: ## Undeploy controller from the K8s cluster specified in ~/.kube/config. Call with ignore-not-found=true to ignore resource not found errors during deletion.\n+\t$(KUSTOMIZE) build config/base | kubectl delete --ignore-not-found=$(ignore-not-found) -f -\n+\n+CONTROLLER_GEN = $(shell pwd)/bin/controller-gen\n+.PHONY: controller-gen\n+controller-gen: ## Download controller-gen locally if necessary.\n+\t$(call go-get-tool,$(CONTROLLER_GEN),sigs.k8s.io/controller-tools/cmd/controller-gen@v0.8.0)\n+\n+KUSTOMIZE = $(shell pwd)/bin/kustomize\n+.PHONY: kustomize\n+kustomize: ## Download kustomize locally if necessary.\n+\t$(call go-get-tool,$(KUSTOMIZE),sigs.k8s.io/kustomize/kustomize/v3@v3.8.7)", - "comment_created_at": "2022-05-01T07:44:23+00:00", - "comment_author": "kimwnasptd", - "comment_body": "Perfect!", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "519691721", - "pr_number": 5378, - "pr_file": "components/notebook-controller/Makefile", - "created_at": "2020-11-09T10:10:35+00:00", - "commented_code": "# Run tests\ntest: generate fmt vet manifests check-license\n\tmkdir -p ${ENVTEST_ASSETS_DIR}\n\ttest -f ${ENVTEST_ASSETS_DIR}/setup-envtest.sh || curl -sSLo ${ENVTEST_ASSETS_DIR}/setup-envtest.sh https://raw.githubusercontent.com/kubernetes-sigs/controller-runtime/master/hack/setup-envtest.sh", - "repo_full_name": "kubeflow/kubeflow", - "discussion_comments": [ - { - "comment_id": "519691721", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 5378, - "pr_file": "components/notebook-controller/Makefile", - "discussion_id": "519691721", - "commented_code": "@@ -25,6 +28,9 @@ check-license:\n \n # Run tests\n test: generate fmt vet manifests check-license\n+\tmkdir -p ${ENVTEST_ASSETS_DIR}\n+\ttest -f ${ENVTEST_ASSETS_DIR}/setup-envtest.sh || curl -sSLo ${ENVTEST_ASSETS_DIR}/setup-envtest.sh https://raw.githubusercontent.com/kubernetes-sigs/controller-runtime/master/hack/setup-envtest.sh", - "comment_created_at": "2020-11-09T10:10:35+00:00", - "comment_author": "yanniszark", - "comment_body": "Can you pin the script at a particular commit?\r\nThis way, we can be sure an update in this external repo doesn't break us.", - "pr_file_module": null - }, - { - "comment_id": "520057276", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 5378, - "pr_file": "components/notebook-controller/Makefile", - "discussion_id": "519691721", - "commented_code": "@@ -25,6 +28,9 @@ check-license:\n \n # Run tests\n test: generate fmt vet manifests check-license\n+\tmkdir -p ${ENVTEST_ASSETS_DIR}\n+\ttest -f ${ENVTEST_ASSETS_DIR}/setup-envtest.sh || curl -sSLo ${ENVTEST_ASSETS_DIR}/setup-envtest.sh https://raw.githubusercontent.com/kubernetes-sigs/controller-runtime/master/hack/setup-envtest.sh", - "comment_created_at": "2020-11-09T19:12:56+00:00", - "comment_author": "naveensrinivasan", - "comment_body": "Thanks, I will do that. \ud83d\udc4d ", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/kubeflow-precise-workflow-triggers.json b/_reviewers/kubeflow-precise-workflow-triggers.json new file mode 100644 index 0000000..03e0cde --- /dev/null +++ b/_reviewers/kubeflow-precise-workflow-triggers.json @@ -0,0 +1,276 @@ +[ + { + "discussion_id": "1053079365", + "pr_number": 6854, + "pr_file": ".github/workflows/centraldb_angular_docker_publish.yaml", + "created_at": "2022-12-20T09:16:52+00:00", + "commented_code": "id: version\n if: steps.filter.outputs.version == 'true'\n run: |\n docker tag ${{env.IMG}}:${TAG} ${{env.IMG}}:${VERSION_TAG}\n export VERSION_TAG=$(cat releasing/version/VERSION)\n docker tag ${{env.IMG}}:${{env.TAG}} ${{env.IMG}}:${VERSION_TAG}\n docker push ${{env.IMG}}:${VERSION_TAG}", + "repo_full_name": "kubeflow/kubeflow", + "discussion_comments": [ + { + "comment_id": "1053079365", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 6854, + "pr_file": ".github/workflows/centraldb_angular_docker_publish.yaml", + "discussion_id": "1053079365", + "commented_code": "@@ -49,8 +50,6 @@ jobs:\n id: version\n if: steps.filter.outputs.version == 'true'\n run: |\n- docker tag ${{env.IMG}}:${TAG} ${{env.IMG}}:${VERSION_TAG}\n+ export VERSION_TAG=$(cat releasing/version/VERSION)\n+ docker tag ${{env.IMG}}:${{env.TAG}} ${{env.IMG}}:${VERSION_TAG}\n docker push ${{env.IMG}}:${VERSION_TAG}", + "comment_created_at": "2022-12-20T09:16:52+00:00", + "comment_author": "kimwnasptd", + "comment_body": "Let's re-use the makefile rules here, to avoid duplication of code in makefiles and GH Actions. The layers are already cached so rebuilding the image should be very fast\r\n```bash\r\nexport TAG=$(cat releasing/version/VERSION)\r\ncd components/centraldashboard-angular\r\nmake docker-build docker-push\r\n```", + "pr_file_module": null + }, + { + "comment_id": "1053177606", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 6854, + "pr_file": ".github/workflows/centraldb_angular_docker_publish.yaml", + "discussion_id": "1053079365", + "commented_code": "@@ -49,8 +50,6 @@ jobs:\n id: version\n if: steps.filter.outputs.version == 'true'\n run: |\n- docker tag ${{env.IMG}}:${TAG} ${{env.IMG}}:${VERSION_TAG}\n+ export VERSION_TAG=$(cat releasing/version/VERSION)\n+ docker tag ${{env.IMG}}:${{env.TAG}} ${{env.IMG}}:${VERSION_TAG}\n docker push ${{env.IMG}}:${VERSION_TAG}", + "comment_created_at": "2022-12-20T10:48:13+00:00", + "comment_author": "apo-ger", + "comment_body": "Sure! However, we can't avoid duplication of code for building/pushing the notebook server images. This is because we don't have an `IMG` env var in the Makefiles (e.g [codeserver-python Makefile](https://github.com/kubeflow/kubeflow/blob/658f5b8a76e3b19438678decaf21a9c2fdfd9af1/components/example-notebook-servers/codeserver-python/Makefile#L1)) to set the `registry` there. Therefore with the current Makefiles we have to use:\r\n- `make docker-build`\r\n- `docker tag` to add the registry\r\n- and finally `docker push`\r\n\r\nFor example:\r\n```yaml\r\n - name: Build and push Notebook Server images\r\n run: |\r\n export TAG=$(shell git describe --tags --always --dirty)\r\n cd components/example-notebook-servers/\r\n make docker-build -C codeserver-python TAG=${TAG}\r\n docker tag codeserver-python:${TAG} ${{env.REGISTRY}}/codeserver-python:${TAG}\r\n docker push ${{env.REGISTRY}}/codeserver-python:${TAG}\r\n\r\n - name: Build and push latest Notebook Server images\r\n if: github.ref == 'refs/heads/master'\r\n run: | \r\n export TAG=latest\r\n cd components/example-notebook-servers/\r\n make docker-build -C codeserver-python TAG=${TAG}\r\n docker tag codeserver-python:${TAG} ${{env.REGISTRY}}/codeserver-python:${TAG}\r\n docker push ${{env.REGISTRY}}/codeserver-python:${TAG}\r\n\r\n - name: Build and push Notebook Server images on Version change\r\n id: version\r\n if: steps.filter.outputs.version == 'true'\r\n run: |\r\n export TAG=$(cat releasing/version/VERSION)\r\n cd components/example-notebook-servers/\r\n make docker-build -C codeserver-python TAG=${TAG}\r\n docker tag codeserver-python:${TAG} ${{env.REGISTRY}}/codeserver-python:${TAG}\r\n docker push ${{env.REGISTRY}}/codeserver-python:${TAG}\r\n```", + "pr_file_module": null + }, + { + "comment_id": "1053230770", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 6854, + "pr_file": ".github/workflows/centraldb_angular_docker_publish.yaml", + "discussion_id": "1053079365", + "commented_code": "@@ -49,8 +50,6 @@ jobs:\n id: version\n if: steps.filter.outputs.version == 'true'\n run: |\n- docker tag ${{env.IMG}}:${TAG} ${{env.IMG}}:${VERSION_TAG}\n+ export VERSION_TAG=$(cat releasing/version/VERSION)\n+ docker tag ${{env.IMG}}:${{env.TAG}} ${{env.IMG}}:${VERSION_TAG}\n docker push ${{env.IMG}}:${VERSION_TAG}", + "comment_created_at": "2022-12-20T11:49:53+00:00", + "comment_author": "kimwnasptd", + "comment_body": "@apo-ger let's leave the notebook images completely out for this effort.\r\n\r\nI believe we would be able to use the Makefiles if we'd use a `REGISTRY` env var in each Makefile. But let's discuss this in a follow up PR, since this one is already getting big", + "pr_file_module": null + }, + { + "comment_id": "1053247283", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 6854, + "pr_file": ".github/workflows/centraldb_angular_docker_publish.yaml", + "discussion_id": "1053079365", + "commented_code": "@@ -49,8 +50,6 @@ jobs:\n id: version\n if: steps.filter.outputs.version == 'true'\n run: |\n- docker tag ${{env.IMG}}:${TAG} ${{env.IMG}}:${VERSION_TAG}\n+ export VERSION_TAG=$(cat releasing/version/VERSION)\n+ docker tag ${{env.IMG}}:${{env.TAG}} ${{env.IMG}}:${VERSION_TAG}\n docker push ${{env.IMG}}:${VERSION_TAG}", + "comment_created_at": "2022-12-20T12:10:38+00:00", + "comment_author": "apo-ger", + "comment_body": "@kimwnasptd \r\n> let's leave the notebook images completely out for this effort.\r\n\r\nShould I remove the logic for building/pushing with latest tag or remove the notebook-server workflows completely and tackle them in the follow up PR (manifests/Makefiles/workflows)?", + "pr_file_module": null + }, + { + "comment_id": "1053260293", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 6854, + "pr_file": ".github/workflows/centraldb_angular_docker_publish.yaml", + "discussion_id": "1053079365", + "commented_code": "@@ -49,8 +50,6 @@ jobs:\n id: version\n if: steps.filter.outputs.version == 'true'\n run: |\n- docker tag ${{env.IMG}}:${TAG} ${{env.IMG}}:${VERSION_TAG}\n+ export VERSION_TAG=$(cat releasing/version/VERSION)\n+ docker tag ${{env.IMG}}:${{env.TAG}} ${{env.IMG}}:${VERSION_TAG}\n docker push ${{env.IMG}}:${VERSION_TAG}", + "comment_created_at": "2022-12-20T12:26:10+00:00", + "comment_author": "kimwnasptd", + "comment_body": "just the changes from this PR, and we can send another PR for the latest tag in a separate PR that:\r\n1. Uses a `REGISTRY` env var in all makefiles (let's see if we need to do more)\r\n2. Use the Makefiles to build each notebook, but with a different TAG each time like we do for the rest of the components", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "900127527", + "pr_number": 6524, + "pr_file": ".github/workflows/centraldb_manifests_build.yaml", + "created_at": "2022-06-17T13:39:53+00:00", + "commented_code": "name: Build CentralDashboard manifests\non:\n pull_request:\n paths:\n - components/centraldashboard/**", + "repo_full_name": "kubeflow/kubeflow", + "discussion_comments": [ + { + "comment_id": "900127527", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 6524, + "pr_file": ".github/workflows/centraldb_manifests_build.yaml", + "discussion_id": "900127527", + "commented_code": "@@ -0,0 +1,23 @@\n+name: Build CentralDashboard manifests\n+on:\n+ pull_request:\n+ paths:\n+ - components/centraldashboard/**", + "comment_created_at": "2022-06-17T13:39:53+00:00", + "comment_author": "kimwnasptd", + "comment_body": "Since we are testing manifests this time we don't want to trigger the workflows when code changes, but rather when the **manifests** change.\r\n\r\nSo in this case it will be `components/centraldashboard/manifests/**`", + "pr_file_module": null + }, + { + "comment_id": "900190123", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 6524, + "pr_file": ".github/workflows/centraldb_manifests_build.yaml", + "discussion_id": "900127527", + "commented_code": "@@ -0,0 +1,23 @@\n+name: Build CentralDashboard manifests\n+on:\n+ pull_request:\n+ paths:\n+ - components/centraldashboard/**", + "comment_created_at": "2022-06-17T14:44:05+00:00", + "comment_author": "NickLoukas", + "comment_body": "I made the change.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "900197491", + "pr_number": 6524, + "pr_file": ".github/workflows/prof_controller_manifests_test.yaml", + "created_at": "2022-06-17T14:52:02+00:00", + "commented_code": "name: Build Profile Controller manifests\non:\n pull_request:\n paths:\n - components/profile-controller/**", + "repo_full_name": "kubeflow/kubeflow", + "discussion_comments": [ + { + "comment_id": "900197491", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 6524, + "pr_file": ".github/workflows/prof_controller_manifests_test.yaml", + "discussion_id": "900197491", + "commented_code": "@@ -0,0 +1,23 @@\n+name: Build Profile Controller manifests\n+on:\n+ pull_request:\n+ paths:\n+ - components/profile-controller/**", + "comment_created_at": "2022-06-17T14:52:02+00:00", + "comment_author": "kimwnasptd", + "comment_body": "I think you missed this one. It should be `components/profile-controller/config/**`", + "pr_file_module": null + }, + { + "comment_id": "900214466", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 6524, + "pr_file": ".github/workflows/prof_controller_manifests_test.yaml", + "discussion_id": "900197491", + "commented_code": "@@ -0,0 +1,23 @@\n+name: Build Profile Controller manifests\n+on:\n+ pull_request:\n+ paths:\n+ - components/profile-controller/**", + "comment_created_at": "2022-06-17T15:10:07+00:00", + "comment_author": "NickLoukas", + "comment_body": "Yeap you are correct. Fixed it.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "893852939", + "pr_number": 6510, + "pr_file": ".github/workflows/jwa_docker_publish.yaml", + "created_at": "2022-06-09T18:51:06+00:00", + "commented_code": "name: Build & Publish JWA Docker image\non:\n push:\n branches:\n - master\n - v*-branch\n paths:\n - components/crud-web-apps/jupyter/**", + "repo_full_name": "kubeflow/kubeflow", + "discussion_comments": [ + { + "comment_id": "893852939", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 6510, + "pr_file": ".github/workflows/jwa_docker_publish.yaml", + "discussion_id": "893852939", + "commented_code": "@@ -0,0 +1,28 @@\n+name: Build & Publish JWA Docker image\n+on:\n+ push:\n+ branches:\n+ - master\n+ - v*-branch\n+ paths:\n+ - components/crud-web-apps/jupyter/**", + "comment_created_at": "2022-06-09T18:51:06+00:00", + "comment_author": "kimwnasptd", + "comment_body": "Lets add another path here for `components/crud-web-apps/common/**`. We want the web app to be rebuild whenever we also touch the common code.", + "pr_file_module": null + }, + { + "comment_id": "894351947", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 6510, + "pr_file": ".github/workflows/jwa_docker_publish.yaml", + "discussion_id": "893852939", + "commented_code": "@@ -0,0 +1,28 @@\n+name: Build & Publish JWA Docker image\n+on:\n+ push:\n+ branches:\n+ - master\n+ - v*-branch\n+ paths:\n+ - components/crud-web-apps/jupyter/**", + "comment_created_at": "2022-06-10T09:45:07+00:00", + "comment_author": "NickLoukas", + "comment_body": "Added that.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "893857912", + "pr_number": 6510, + "pr_file": ".github/workflows/twa_docker_publish.yaml", + "created_at": "2022-06-09T18:57:27+00:00", + "commented_code": "name: Build & Publish TWA Docker image\non:\n push:\n branches:\n - master\n - v*-branch\n paths:\n - components/crud-web-apps/tensorboards/**", + "repo_full_name": "kubeflow/kubeflow", + "discussion_comments": [ + { + "comment_id": "893857912", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 6510, + "pr_file": ".github/workflows/twa_docker_publish.yaml", + "discussion_id": "893857912", + "commented_code": "@@ -0,0 +1,28 @@\n+name: Build & Publish TWA Docker image\n+on:\n+ push:\n+ branches:\n+ - master\n+ - v*-branch\n+ paths:\n+ - components/crud-web-apps/tensorboards/**", + "comment_created_at": "2022-06-09T18:57:27+00:00", + "comment_author": "kimwnasptd", + "comment_body": "Lets add another path here for `components/crud-web-apps/common/**`. We want the web app to be rebuild whenever we also touch the common code.", + "pr_file_module": null + }, + { + "comment_id": "894352668", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 6510, + "pr_file": ".github/workflows/twa_docker_publish.yaml", + "discussion_id": "893857912", + "commented_code": "@@ -0,0 +1,28 @@\n+name: Build & Publish TWA Docker image\n+on:\n+ push:\n+ branches:\n+ - master\n+ - v*-branch\n+ paths:\n+ - components/crud-web-apps/tensorboards/**", + "comment_created_at": "2022-06-10T09:46:00+00:00", + "comment_author": "NickLoukas", + "comment_body": "Added that.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "893858030", + "pr_number": 6510, + "pr_file": ".github/workflows/vwa_docker_publish.yaml", + "created_at": "2022-06-09T18:57:35+00:00", + "commented_code": "name: Build & Publish VWA Docker image\non:\n push:\n branches:\n - master\n - v*-branch\n paths:\n - components/crud-web-apps/volumes/**", + "repo_full_name": "kubeflow/kubeflow", + "discussion_comments": [ + { + "comment_id": "893858030", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 6510, + "pr_file": ".github/workflows/vwa_docker_publish.yaml", + "discussion_id": "893858030", + "commented_code": "@@ -0,0 +1,28 @@\n+name: Build & Publish VWA Docker image\n+on:\n+ push:\n+ branches:\n+ - master\n+ - v*-branch\n+ paths:\n+ - components/crud-web-apps/volumes/**", + "comment_created_at": "2022-06-09T18:57:35+00:00", + "comment_author": "kimwnasptd", + "comment_body": "Lets add another path here for `components/crud-web-apps/common/**`. We want the web app to be rebuild whenever we also touch the common code.", + "pr_file_module": null + }, + { + "comment_id": "894352798", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 6510, + "pr_file": ".github/workflows/vwa_docker_publish.yaml", + "discussion_id": "893858030", + "commented_code": "@@ -0,0 +1,28 @@\n+name: Build & Publish VWA Docker image\n+on:\n+ push:\n+ branches:\n+ - master\n+ - v*-branch\n+ paths:\n+ - components/crud-web-apps/volumes/**", + "comment_created_at": "2022-06-10T09:46:10+00:00", + "comment_author": "NickLoukas", + "comment_body": "Added that.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "893898341", + "pr_number": 6510, + "pr_file": ".github/workflows/nb_controller_docker_publish.yaml", + "created_at": "2022-06-09T19:50:43+00:00", + "commented_code": "name: Build & Publish Notebook Controller Docker image\non:\n push:\n branches:\n - master\n - v*-branch\n paths:\n - components/notebook-controller/**", + "repo_full_name": "kubeflow/kubeflow", + "discussion_comments": [ + { + "comment_id": "893898341", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 6510, + "pr_file": ".github/workflows/nb_controller_docker_publish.yaml", + "discussion_id": "893898341", + "commented_code": "@@ -0,0 +1,30 @@\n+name: Build & Publish Notebook Controller Docker image\n+on:\n+ push:\n+ branches:\n+ - master\n+ - v*-branch\n+ paths:\n+ - components/notebook-controller/**", + "comment_created_at": "2022-06-09T19:50:43+00:00", + "comment_author": "kimwnasptd", + "comment_body": "Let's also trigger this workflow for the `/components/common/**` directory, since it's using common code from there", + "pr_file_module": null + }, + { + "comment_id": "894353026", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 6510, + "pr_file": ".github/workflows/nb_controller_docker_publish.yaml", + "discussion_id": "893898341", + "commented_code": "@@ -0,0 +1,30 @@\n+name: Build & Publish Notebook Controller Docker image\n+on:\n+ push:\n+ branches:\n+ - master\n+ - v*-branch\n+ paths:\n+ - components/notebook-controller/**", + "comment_created_at": "2022-06-10T09:46:28+00:00", + "comment_author": "NickLoukas", + "comment_body": "Added that.", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/kubeflow-precise-workflow-triggers.md b/_reviewers/kubeflow-precise-workflow-triggers.md index 7742451..b4966d2 100644 --- a/_reviewers/kubeflow-precise-workflow-triggers.md +++ b/_reviewers/kubeflow-precise-workflow-triggers.md @@ -42,281 +42,3 @@ on: ``` Centralize build logic in Makefiles instead of duplicating in GitHub Actions. This allows workflows to simply call make targets, making pipelines more maintainable and consistent across environments. - - -[ - { - "discussion_id": "1053079365", - "pr_number": 6854, - "pr_file": ".github/workflows/centraldb_angular_docker_publish.yaml", - "created_at": "2022-12-20T09:16:52+00:00", - "commented_code": "id: version\n if: steps.filter.outputs.version == 'true'\n run: |\n docker tag ${{env.IMG}}:${TAG} ${{env.IMG}}:${VERSION_TAG}\n export VERSION_TAG=$(cat releasing/version/VERSION)\n docker tag ${{env.IMG}}:${{env.TAG}} ${{env.IMG}}:${VERSION_TAG}\n docker push ${{env.IMG}}:${VERSION_TAG}", - "repo_full_name": "kubeflow/kubeflow", - "discussion_comments": [ - { - "comment_id": "1053079365", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 6854, - "pr_file": ".github/workflows/centraldb_angular_docker_publish.yaml", - "discussion_id": "1053079365", - "commented_code": "@@ -49,8 +50,6 @@ jobs:\n id: version\n if: steps.filter.outputs.version == 'true'\n run: |\n- docker tag ${{env.IMG}}:${TAG} ${{env.IMG}}:${VERSION_TAG}\n+ export VERSION_TAG=$(cat releasing/version/VERSION)\n+ docker tag ${{env.IMG}}:${{env.TAG}} ${{env.IMG}}:${VERSION_TAG}\n docker push ${{env.IMG}}:${VERSION_TAG}", - "comment_created_at": "2022-12-20T09:16:52+00:00", - "comment_author": "kimwnasptd", - "comment_body": "Let's re-use the makefile rules here, to avoid duplication of code in makefiles and GH Actions. The layers are already cached so rebuilding the image should be very fast\r\n```bash\r\nexport TAG=$(cat releasing/version/VERSION)\r\ncd components/centraldashboard-angular\r\nmake docker-build docker-push\r\n```", - "pr_file_module": null - }, - { - "comment_id": "1053177606", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 6854, - "pr_file": ".github/workflows/centraldb_angular_docker_publish.yaml", - "discussion_id": "1053079365", - "commented_code": "@@ -49,8 +50,6 @@ jobs:\n id: version\n if: steps.filter.outputs.version == 'true'\n run: |\n- docker tag ${{env.IMG}}:${TAG} ${{env.IMG}}:${VERSION_TAG}\n+ export VERSION_TAG=$(cat releasing/version/VERSION)\n+ docker tag ${{env.IMG}}:${{env.TAG}} ${{env.IMG}}:${VERSION_TAG}\n docker push ${{env.IMG}}:${VERSION_TAG}", - "comment_created_at": "2022-12-20T10:48:13+00:00", - "comment_author": "apo-ger", - "comment_body": "Sure! However, we can't avoid duplication of code for building/pushing the notebook server images. This is because we don't have an `IMG` env var in the Makefiles (e.g [codeserver-python Makefile](https://github.com/kubeflow/kubeflow/blob/658f5b8a76e3b19438678decaf21a9c2fdfd9af1/components/example-notebook-servers/codeserver-python/Makefile#L1)) to set the `registry` there. Therefore with the current Makefiles we have to use:\r\n- `make docker-build`\r\n- `docker tag` to add the registry\r\n- and finally `docker push`\r\n\r\nFor example:\r\n```yaml\r\n - name: Build and push Notebook Server images\r\n run: |\r\n export TAG=$(shell git describe --tags --always --dirty)\r\n cd components/example-notebook-servers/\r\n make docker-build -C codeserver-python TAG=${TAG}\r\n docker tag codeserver-python:${TAG} ${{env.REGISTRY}}/codeserver-python:${TAG}\r\n docker push ${{env.REGISTRY}}/codeserver-python:${TAG}\r\n\r\n - name: Build and push latest Notebook Server images\r\n if: github.ref == 'refs/heads/master'\r\n run: | \r\n export TAG=latest\r\n cd components/example-notebook-servers/\r\n make docker-build -C codeserver-python TAG=${TAG}\r\n docker tag codeserver-python:${TAG} ${{env.REGISTRY}}/codeserver-python:${TAG}\r\n docker push ${{env.REGISTRY}}/codeserver-python:${TAG}\r\n\r\n - name: Build and push Notebook Server images on Version change\r\n id: version\r\n if: steps.filter.outputs.version == 'true'\r\n run: |\r\n export TAG=$(cat releasing/version/VERSION)\r\n cd components/example-notebook-servers/\r\n make docker-build -C codeserver-python TAG=${TAG}\r\n docker tag codeserver-python:${TAG} ${{env.REGISTRY}}/codeserver-python:${TAG}\r\n docker push ${{env.REGISTRY}}/codeserver-python:${TAG}\r\n```", - "pr_file_module": null - }, - { - "comment_id": "1053230770", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 6854, - "pr_file": ".github/workflows/centraldb_angular_docker_publish.yaml", - "discussion_id": "1053079365", - "commented_code": "@@ -49,8 +50,6 @@ jobs:\n id: version\n if: steps.filter.outputs.version == 'true'\n run: |\n- docker tag ${{env.IMG}}:${TAG} ${{env.IMG}}:${VERSION_TAG}\n+ export VERSION_TAG=$(cat releasing/version/VERSION)\n+ docker tag ${{env.IMG}}:${{env.TAG}} ${{env.IMG}}:${VERSION_TAG}\n docker push ${{env.IMG}}:${VERSION_TAG}", - "comment_created_at": "2022-12-20T11:49:53+00:00", - "comment_author": "kimwnasptd", - "comment_body": "@apo-ger let's leave the notebook images completely out for this effort.\r\n\r\nI believe we would be able to use the Makefiles if we'd use a `REGISTRY` env var in each Makefile. But let's discuss this in a follow up PR, since this one is already getting big", - "pr_file_module": null - }, - { - "comment_id": "1053247283", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 6854, - "pr_file": ".github/workflows/centraldb_angular_docker_publish.yaml", - "discussion_id": "1053079365", - "commented_code": "@@ -49,8 +50,6 @@ jobs:\n id: version\n if: steps.filter.outputs.version == 'true'\n run: |\n- docker tag ${{env.IMG}}:${TAG} ${{env.IMG}}:${VERSION_TAG}\n+ export VERSION_TAG=$(cat releasing/version/VERSION)\n+ docker tag ${{env.IMG}}:${{env.TAG}} ${{env.IMG}}:${VERSION_TAG}\n docker push ${{env.IMG}}:${VERSION_TAG}", - "comment_created_at": "2022-12-20T12:10:38+00:00", - "comment_author": "apo-ger", - "comment_body": "@kimwnasptd \r\n> let's leave the notebook images completely out for this effort.\r\n\r\nShould I remove the logic for building/pushing with latest tag or remove the notebook-server workflows completely and tackle them in the follow up PR (manifests/Makefiles/workflows)?", - "pr_file_module": null - }, - { - "comment_id": "1053260293", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 6854, - "pr_file": ".github/workflows/centraldb_angular_docker_publish.yaml", - "discussion_id": "1053079365", - "commented_code": "@@ -49,8 +50,6 @@ jobs:\n id: version\n if: steps.filter.outputs.version == 'true'\n run: |\n- docker tag ${{env.IMG}}:${TAG} ${{env.IMG}}:${VERSION_TAG}\n+ export VERSION_TAG=$(cat releasing/version/VERSION)\n+ docker tag ${{env.IMG}}:${{env.TAG}} ${{env.IMG}}:${VERSION_TAG}\n docker push ${{env.IMG}}:${VERSION_TAG}", - "comment_created_at": "2022-12-20T12:26:10+00:00", - "comment_author": "kimwnasptd", - "comment_body": "just the changes from this PR, and we can send another PR for the latest tag in a separate PR that:\r\n1. Uses a `REGISTRY` env var in all makefiles (let's see if we need to do more)\r\n2. Use the Makefiles to build each notebook, but with a different TAG each time like we do for the rest of the components", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "900127527", - "pr_number": 6524, - "pr_file": ".github/workflows/centraldb_manifests_build.yaml", - "created_at": "2022-06-17T13:39:53+00:00", - "commented_code": "name: Build CentralDashboard manifests\non:\n pull_request:\n paths:\n - components/centraldashboard/**", - "repo_full_name": "kubeflow/kubeflow", - "discussion_comments": [ - { - "comment_id": "900127527", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 6524, - "pr_file": ".github/workflows/centraldb_manifests_build.yaml", - "discussion_id": "900127527", - "commented_code": "@@ -0,0 +1,23 @@\n+name: Build CentralDashboard manifests\n+on:\n+ pull_request:\n+ paths:\n+ - components/centraldashboard/**", - "comment_created_at": "2022-06-17T13:39:53+00:00", - "comment_author": "kimwnasptd", - "comment_body": "Since we are testing manifests this time we don't want to trigger the workflows when code changes, but rather when the **manifests** change.\r\n\r\nSo in this case it will be `components/centraldashboard/manifests/**`", - "pr_file_module": null - }, - { - "comment_id": "900190123", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 6524, - "pr_file": ".github/workflows/centraldb_manifests_build.yaml", - "discussion_id": "900127527", - "commented_code": "@@ -0,0 +1,23 @@\n+name: Build CentralDashboard manifests\n+on:\n+ pull_request:\n+ paths:\n+ - components/centraldashboard/**", - "comment_created_at": "2022-06-17T14:44:05+00:00", - "comment_author": "NickLoukas", - "comment_body": "I made the change.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "900197491", - "pr_number": 6524, - "pr_file": ".github/workflows/prof_controller_manifests_test.yaml", - "created_at": "2022-06-17T14:52:02+00:00", - "commented_code": "name: Build Profile Controller manifests\non:\n pull_request:\n paths:\n - components/profile-controller/**", - "repo_full_name": "kubeflow/kubeflow", - "discussion_comments": [ - { - "comment_id": "900197491", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 6524, - "pr_file": ".github/workflows/prof_controller_manifests_test.yaml", - "discussion_id": "900197491", - "commented_code": "@@ -0,0 +1,23 @@\n+name: Build Profile Controller manifests\n+on:\n+ pull_request:\n+ paths:\n+ - components/profile-controller/**", - "comment_created_at": "2022-06-17T14:52:02+00:00", - "comment_author": "kimwnasptd", - "comment_body": "I think you missed this one. It should be `components/profile-controller/config/**`", - "pr_file_module": null - }, - { - "comment_id": "900214466", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 6524, - "pr_file": ".github/workflows/prof_controller_manifests_test.yaml", - "discussion_id": "900197491", - "commented_code": "@@ -0,0 +1,23 @@\n+name: Build Profile Controller manifests\n+on:\n+ pull_request:\n+ paths:\n+ - components/profile-controller/**", - "comment_created_at": "2022-06-17T15:10:07+00:00", - "comment_author": "NickLoukas", - "comment_body": "Yeap you are correct. Fixed it.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "893852939", - "pr_number": 6510, - "pr_file": ".github/workflows/jwa_docker_publish.yaml", - "created_at": "2022-06-09T18:51:06+00:00", - "commented_code": "name: Build & Publish JWA Docker image\non:\n push:\n branches:\n - master\n - v*-branch\n paths:\n - components/crud-web-apps/jupyter/**", - "repo_full_name": "kubeflow/kubeflow", - "discussion_comments": [ - { - "comment_id": "893852939", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 6510, - "pr_file": ".github/workflows/jwa_docker_publish.yaml", - "discussion_id": "893852939", - "commented_code": "@@ -0,0 +1,28 @@\n+name: Build & Publish JWA Docker image\n+on:\n+ push:\n+ branches:\n+ - master\n+ - v*-branch\n+ paths:\n+ - components/crud-web-apps/jupyter/**", - "comment_created_at": "2022-06-09T18:51:06+00:00", - "comment_author": "kimwnasptd", - "comment_body": "Lets add another path here for `components/crud-web-apps/common/**`. We want the web app to be rebuild whenever we also touch the common code.", - "pr_file_module": null - }, - { - "comment_id": "894351947", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 6510, - "pr_file": ".github/workflows/jwa_docker_publish.yaml", - "discussion_id": "893852939", - "commented_code": "@@ -0,0 +1,28 @@\n+name: Build & Publish JWA Docker image\n+on:\n+ push:\n+ branches:\n+ - master\n+ - v*-branch\n+ paths:\n+ - components/crud-web-apps/jupyter/**", - "comment_created_at": "2022-06-10T09:45:07+00:00", - "comment_author": "NickLoukas", - "comment_body": "Added that.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "893857912", - "pr_number": 6510, - "pr_file": ".github/workflows/twa_docker_publish.yaml", - "created_at": "2022-06-09T18:57:27+00:00", - "commented_code": "name: Build & Publish TWA Docker image\non:\n push:\n branches:\n - master\n - v*-branch\n paths:\n - components/crud-web-apps/tensorboards/**", - "repo_full_name": "kubeflow/kubeflow", - "discussion_comments": [ - { - "comment_id": "893857912", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 6510, - "pr_file": ".github/workflows/twa_docker_publish.yaml", - "discussion_id": "893857912", - "commented_code": "@@ -0,0 +1,28 @@\n+name: Build & Publish TWA Docker image\n+on:\n+ push:\n+ branches:\n+ - master\n+ - v*-branch\n+ paths:\n+ - components/crud-web-apps/tensorboards/**", - "comment_created_at": "2022-06-09T18:57:27+00:00", - "comment_author": "kimwnasptd", - "comment_body": "Lets add another path here for `components/crud-web-apps/common/**`. We want the web app to be rebuild whenever we also touch the common code.", - "pr_file_module": null - }, - { - "comment_id": "894352668", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 6510, - "pr_file": ".github/workflows/twa_docker_publish.yaml", - "discussion_id": "893857912", - "commented_code": "@@ -0,0 +1,28 @@\n+name: Build & Publish TWA Docker image\n+on:\n+ push:\n+ branches:\n+ - master\n+ - v*-branch\n+ paths:\n+ - components/crud-web-apps/tensorboards/**", - "comment_created_at": "2022-06-10T09:46:00+00:00", - "comment_author": "NickLoukas", - "comment_body": "Added that.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "893858030", - "pr_number": 6510, - "pr_file": ".github/workflows/vwa_docker_publish.yaml", - "created_at": "2022-06-09T18:57:35+00:00", - "commented_code": "name: Build & Publish VWA Docker image\non:\n push:\n branches:\n - master\n - v*-branch\n paths:\n - components/crud-web-apps/volumes/**", - "repo_full_name": "kubeflow/kubeflow", - "discussion_comments": [ - { - "comment_id": "893858030", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 6510, - "pr_file": ".github/workflows/vwa_docker_publish.yaml", - "discussion_id": "893858030", - "commented_code": "@@ -0,0 +1,28 @@\n+name: Build & Publish VWA Docker image\n+on:\n+ push:\n+ branches:\n+ - master\n+ - v*-branch\n+ paths:\n+ - components/crud-web-apps/volumes/**", - "comment_created_at": "2022-06-09T18:57:35+00:00", - "comment_author": "kimwnasptd", - "comment_body": "Lets add another path here for `components/crud-web-apps/common/**`. We want the web app to be rebuild whenever we also touch the common code.", - "pr_file_module": null - }, - { - "comment_id": "894352798", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 6510, - "pr_file": ".github/workflows/vwa_docker_publish.yaml", - "discussion_id": "893858030", - "commented_code": "@@ -0,0 +1,28 @@\n+name: Build & Publish VWA Docker image\n+on:\n+ push:\n+ branches:\n+ - master\n+ - v*-branch\n+ paths:\n+ - components/crud-web-apps/volumes/**", - "comment_created_at": "2022-06-10T09:46:10+00:00", - "comment_author": "NickLoukas", - "comment_body": "Added that.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "893898341", - "pr_number": 6510, - "pr_file": ".github/workflows/nb_controller_docker_publish.yaml", - "created_at": "2022-06-09T19:50:43+00:00", - "commented_code": "name: Build & Publish Notebook Controller Docker image\non:\n push:\n branches:\n - master\n - v*-branch\n paths:\n - components/notebook-controller/**", - "repo_full_name": "kubeflow/kubeflow", - "discussion_comments": [ - { - "comment_id": "893898341", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 6510, - "pr_file": ".github/workflows/nb_controller_docker_publish.yaml", - "discussion_id": "893898341", - "commented_code": "@@ -0,0 +1,30 @@\n+name: Build & Publish Notebook Controller Docker image\n+on:\n+ push:\n+ branches:\n+ - master\n+ - v*-branch\n+ paths:\n+ - components/notebook-controller/**", - "comment_created_at": "2022-06-09T19:50:43+00:00", - "comment_author": "kimwnasptd", - "comment_body": "Let's also trigger this workflow for the `/components/common/**` directory, since it's using common code from there", - "pr_file_module": null - }, - { - "comment_id": "894353026", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 6510, - "pr_file": ".github/workflows/nb_controller_docker_publish.yaml", - "discussion_id": "893898341", - "commented_code": "@@ -0,0 +1,30 @@\n+name: Build & Publish Notebook Controller Docker image\n+on:\n+ push:\n+ branches:\n+ - master\n+ - v*-branch\n+ paths:\n+ - components/notebook-controller/**", - "comment_created_at": "2022-06-10T09:46:28+00:00", - "comment_author": "NickLoukas", - "comment_body": "Added that.", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/kubeflow-prefer-external-configuration.json b/_reviewers/kubeflow-prefer-external-configuration.json new file mode 100644 index 0000000..e1a0ae6 --- /dev/null +++ b/_reviewers/kubeflow-prefer-external-configuration.json @@ -0,0 +1,82 @@ +[ + { + "discussion_id": "996970427", + "pr_number": 6674, + "pr_file": "components/centraldashboard/public/components/namespace-selector.js", + "created_at": "2022-10-17T11:58:08+00:00", + "commented_code": "import {html, PolymerElement} from '@polymer/polymer/polymer-element.js';\n\nexport const ALL_NAMESPACES = 'All namespaces';\nexport const ALL_NAMESPACES_ALLOWED_LIST = ['jupyter'];", + "repo_full_name": "kubeflow/kubeflow", + "discussion_comments": [ + { + "comment_id": "996970427", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 6674, + "pr_file": "components/centraldashboard/public/components/namespace-selector.js", + "discussion_id": "996970427", + "commented_code": "@@ -7,6 +7,12 @@ import '@polymer/paper-item/paper-item.js';\n \n import {html, PolymerElement} from '@polymer/polymer/polymer-element.js';\n \n+export const ALL_NAMESPACES = 'All namespaces';\n+export const ALL_NAMESPACES_ALLOWED_LIST = ['jupyter'];", + "comment_created_at": "2022-10-17T11:58:08+00:00", + "comment_author": "kimwnasptd", + "comment_body": "@tasos-ale let's have this as an empty list initially, since we don't have an app that supports it at this point in time", + "pr_file_module": null + }, + { + "comment_id": "996980605", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 6674, + "pr_file": "components/centraldashboard/public/components/namespace-selector.js", + "discussion_id": "996970427", + "commented_code": "@@ -7,6 +7,12 @@ import '@polymer/paper-item/paper-item.js';\n \n import {html, PolymerElement} from '@polymer/polymer/polymer-element.js';\n \n+export const ALL_NAMESPACES = 'All namespaces';\n+export const ALL_NAMESPACES_ALLOWED_LIST = ['jupyter'];", + "comment_created_at": "2022-10-17T12:09:38+00:00", + "comment_author": "kimwnasptd", + "comment_body": "Ideally I'd like this to info (which web app supports all namespaces) to be transmitted via the common library, so that the dashboard can dynamically know if an app supports this once it loads it.\r\n\r\nBut we can do with hardcoding the urls for now. If in the future we see a bigger need for this we can look into it", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "551857860", + "pr_number": 5474, + "pr_file": "components/centraldashboard/public/components/resources/kubeflow-icons.js", + "created_at": "2021-01-05T10:53:36+00:00", + "commented_code": "*/\nimport '@polymer/iron-icon/iron-icon.js';\nimport '@polymer/iron-iconset-svg/iron-iconset-svg.js';\nimport '@polymer/iron-icons/communication-icons.js';", + "repo_full_name": "kubeflow/kubeflow", + "discussion_comments": [ + { + "comment_id": "551857860", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 5474, + "pr_file": "components/centraldashboard/public/components/resources/kubeflow-icons.js", + "discussion_id": "551857860", + "commented_code": "@@ -4,6 +4,11 @@\n */\n import '@polymer/iron-icon/iron-icon.js';\n import '@polymer/iron-iconset-svg/iron-iconset-svg.js';\n+import '@polymer/iron-icons/communication-icons.js';", + "comment_created_at": "2021-01-05T10:53:36+00:00", + "comment_author": "kimwnasptd", + "comment_body": "Do we need all of these new imports or only a subset of them?", + "pr_file_module": null + }, + { + "comment_id": "553202269", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 5474, + "pr_file": "components/centraldashboard/public/components/resources/kubeflow-icons.js", + "discussion_id": "551857860", + "commented_code": "@@ -4,6 +4,11 @@\n */\n import '@polymer/iron-icon/iron-icon.js';\n import '@polymer/iron-iconset-svg/iron-iconset-svg.js';\n+import '@polymer/iron-icons/communication-icons.js';", + "comment_created_at": "2021-01-07T09:19:26+00:00", + "comment_author": "StefanoFioravanzo", + "comment_body": "This is a good point but:\r\n\r\n1. I wouldn't know how to import just specific icons from these files, I had tried but didn't find a way\r\n2. Even if it was possible to import just specific icons, I would prefer to import these bundles so that people can then configure the sidebar with whatever icons they like (via ConfigMap) without having to change the source code", + "pr_file_module": null + }, + { + "comment_id": "553287795", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 5474, + "pr_file": "components/centraldashboard/public/components/resources/kubeflow-icons.js", + "discussion_id": "551857860", + "commented_code": "@@ -4,6 +4,11 @@\n */\n import '@polymer/iron-icon/iron-icon.js';\n import '@polymer/iron-iconset-svg/iron-iconset-svg.js';\n+import '@polymer/iron-icons/communication-icons.js';", + "comment_created_at": "2021-01-07T12:05:55+00:00", + "comment_author": "kimwnasptd", + "comment_body": "Makes sense, lets keep them then", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/kubeflow-prefer-external-configuration.md b/_reviewers/kubeflow-prefer-external-configuration.md index ffd353f..f8f3d02 100644 --- a/_reviewers/kubeflow-prefer-external-configuration.md +++ b/_reviewers/kubeflow-prefer-external-configuration.md @@ -31,87 +31,3 @@ Similarly, when importing resources to support configurable features, consider i // This allows for configurable icons via ConfigMap without source code changes import '@polymer/iron-icons/communication-icons.js'; ``` - - -[ - { - "discussion_id": "996970427", - "pr_number": 6674, - "pr_file": "components/centraldashboard/public/components/namespace-selector.js", - "created_at": "2022-10-17T11:58:08+00:00", - "commented_code": "import {html, PolymerElement} from '@polymer/polymer/polymer-element.js';\n\nexport const ALL_NAMESPACES = 'All namespaces';\nexport const ALL_NAMESPACES_ALLOWED_LIST = ['jupyter'];", - "repo_full_name": "kubeflow/kubeflow", - "discussion_comments": [ - { - "comment_id": "996970427", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 6674, - "pr_file": "components/centraldashboard/public/components/namespace-selector.js", - "discussion_id": "996970427", - "commented_code": "@@ -7,6 +7,12 @@ import '@polymer/paper-item/paper-item.js';\n \n import {html, PolymerElement} from '@polymer/polymer/polymer-element.js';\n \n+export const ALL_NAMESPACES = 'All namespaces';\n+export const ALL_NAMESPACES_ALLOWED_LIST = ['jupyter'];", - "comment_created_at": "2022-10-17T11:58:08+00:00", - "comment_author": "kimwnasptd", - "comment_body": "@tasos-ale let's have this as an empty list initially, since we don't have an app that supports it at this point in time", - "pr_file_module": null - }, - { - "comment_id": "996980605", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 6674, - "pr_file": "components/centraldashboard/public/components/namespace-selector.js", - "discussion_id": "996970427", - "commented_code": "@@ -7,6 +7,12 @@ import '@polymer/paper-item/paper-item.js';\n \n import {html, PolymerElement} from '@polymer/polymer/polymer-element.js';\n \n+export const ALL_NAMESPACES = 'All namespaces';\n+export const ALL_NAMESPACES_ALLOWED_LIST = ['jupyter'];", - "comment_created_at": "2022-10-17T12:09:38+00:00", - "comment_author": "kimwnasptd", - "comment_body": "Ideally I'd like this to info (which web app supports all namespaces) to be transmitted via the common library, so that the dashboard can dynamically know if an app supports this once it loads it.\r\n\r\nBut we can do with hardcoding the urls for now. If in the future we see a bigger need for this we can look into it", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "551857860", - "pr_number": 5474, - "pr_file": "components/centraldashboard/public/components/resources/kubeflow-icons.js", - "created_at": "2021-01-05T10:53:36+00:00", - "commented_code": "*/\nimport '@polymer/iron-icon/iron-icon.js';\nimport '@polymer/iron-iconset-svg/iron-iconset-svg.js';\nimport '@polymer/iron-icons/communication-icons.js';", - "repo_full_name": "kubeflow/kubeflow", - "discussion_comments": [ - { - "comment_id": "551857860", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 5474, - "pr_file": "components/centraldashboard/public/components/resources/kubeflow-icons.js", - "discussion_id": "551857860", - "commented_code": "@@ -4,6 +4,11 @@\n */\n import '@polymer/iron-icon/iron-icon.js';\n import '@polymer/iron-iconset-svg/iron-iconset-svg.js';\n+import '@polymer/iron-icons/communication-icons.js';", - "comment_created_at": "2021-01-05T10:53:36+00:00", - "comment_author": "kimwnasptd", - "comment_body": "Do we need all of these new imports or only a subset of them?", - "pr_file_module": null - }, - { - "comment_id": "553202269", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 5474, - "pr_file": "components/centraldashboard/public/components/resources/kubeflow-icons.js", - "discussion_id": "551857860", - "commented_code": "@@ -4,6 +4,11 @@\n */\n import '@polymer/iron-icon/iron-icon.js';\n import '@polymer/iron-iconset-svg/iron-iconset-svg.js';\n+import '@polymer/iron-icons/communication-icons.js';", - "comment_created_at": "2021-01-07T09:19:26+00:00", - "comment_author": "StefanoFioravanzo", - "comment_body": "This is a good point but:\r\n\r\n1. I wouldn't know how to import just specific icons from these files, I had tried but didn't find a way\r\n2. Even if it was possible to import just specific icons, I would prefer to import these bundles so that people can then configure the sidebar with whatever icons they like (via ConfigMap) without having to change the source code", - "pr_file_module": null - }, - { - "comment_id": "553287795", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 5474, - "pr_file": "components/centraldashboard/public/components/resources/kubeflow-icons.js", - "discussion_id": "551857860", - "commented_code": "@@ -4,6 +4,11 @@\n */\n import '@polymer/iron-icon/iron-icon.js';\n import '@polymer/iron-iconset-svg/iron-iconset-svg.js';\n+import '@polymer/iron-icons/communication-icons.js';", - "comment_created_at": "2021-01-07T12:05:55+00:00", - "comment_author": "kimwnasptd", - "comment_body": "Makes sense, lets keep them then", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/kubeflow-prevent-xss-vulnerabilities.json b/_reviewers/kubeflow-prevent-xss-vulnerabilities.json new file mode 100644 index 0000000..370612d --- /dev/null +++ b/_reviewers/kubeflow-prevent-xss-vulnerabilities.json @@ -0,0 +1,24 @@ +[ + { + "discussion_id": "255707918", + "pr_number": 2357, + "pr_file": "components/jupyter-web-app/jupyter/static/js/notebooks.js", + "created_at": "2019-02-11T21:50:16+00:00", + "commented_code": "$(document).ready(function(){\n // Get Notebooks for the ServiceAccount's Namespace\n var ns = new URL(window.location.href).searchParams.get(\"namespace\")\n if (ns) {\n // Load the selected ns\n $(\"#ns-select\").val(ns)\n }\n $(\"#ns-select\").trigger(\"change\");\n});\n\nfunction deleteNotebook(ns, nb) {\n $.getJSON(prefix + \"/delete-notebook\", { namespace:ns, notebook:nb}, function(data, status) {\n var innerHTML = $(\"#error-msgs\").html()\n if(data.success == true) {\n updateNotebooksInNamespace(ns)\n innerHTML = '';\n }\n else {\n innerHTML = '
';\n innerHTML += '×';\n innerHTML += 'Warning! ' + data.log + '
';", + "repo_full_name": "kubeflow/kubeflow", + "discussion_comments": [ + { + "comment_id": "255707918", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 2357, + "pr_file": "components/jupyter-web-app/jupyter/static/js/notebooks.js", + "discussion_id": "255707918", + "commented_code": "@@ -0,0 +1,149 @@\n+$(document).ready(function(){\n+ // Get Notebooks for the ServiceAccount's Namespace\n+ var ns = new URL(window.location.href).searchParams.get(\"namespace\")\n+ if (ns) {\n+ // Load the selected ns\n+ $(\"#ns-select\").val(ns)\n+ }\n+ $(\"#ns-select\").trigger(\"change\");\n+});\n+\n+function deleteNotebook(ns, nb) {\n+ $.getJSON(prefix + \"/delete-notebook\", { namespace:ns, notebook:nb}, function(data, status) {\n+ var innerHTML = $(\"#error-msgs\").html()\n+ if(data.success == true) {\n+ updateNotebooksInNamespace(ns)\n+ innerHTML = '';\n+ }\n+ else {\n+ innerHTML = '
';\n+ innerHTML += '×';\n+ innerHTML += 'Warning! ' + data.log + '
';", + "comment_created_at": "2019-02-11T21:50:16+00:00", + "comment_author": "avdaredevil", + "comment_body": "This seems to be XSS prone. Maybe consider doing something like:\r\n\r\n```javascript\r\ninnerHTML = `\r\n
\r\n ×\r\n Warning!\r\n
`\r\n```\r\nand on line 24:\r\n\r\n```javascript\r\nconst $e = $(\"#error-msgs\").html(innerHTML)\r\n$('.warning-log', $e).text(data.log) // This will not fail even if your `innerText` is empty\r\n```", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/kubeflow-prevent-xss-vulnerabilities.md b/_reviewers/kubeflow-prevent-xss-vulnerabilities.md index 2f1c5ac..538c83d 100644 --- a/_reviewers/kubeflow-prevent-xss-vulnerabilities.md +++ b/_reviewers/kubeflow-prevent-xss-vulnerabilities.md @@ -33,29 +33,3 @@ innerHTML = ` const $e = $("#error-msgs").html(innerHTML); $('.warning-log', $e).text(data.log); ``` - - -[ - { - "discussion_id": "255707918", - "pr_number": 2357, - "pr_file": "components/jupyter-web-app/jupyter/static/js/notebooks.js", - "created_at": "2019-02-11T21:50:16+00:00", - "commented_code": "$(document).ready(function(){\n // Get Notebooks for the ServiceAccount's Namespace\n var ns = new URL(window.location.href).searchParams.get(\"namespace\")\n if (ns) {\n // Load the selected ns\n $(\"#ns-select\").val(ns)\n }\n $(\"#ns-select\").trigger(\"change\");\n});\n\nfunction deleteNotebook(ns, nb) {\n $.getJSON(prefix + \"/delete-notebook\", { namespace:ns, notebook:nb}, function(data, status) {\n var innerHTML = $(\"#error-msgs\").html()\n if(data.success == true) {\n updateNotebooksInNamespace(ns)\n innerHTML = '';\n }\n else {\n innerHTML = '
';\n innerHTML += '×';\n innerHTML += 'Warning! ' + data.log + '
';", - "repo_full_name": "kubeflow/kubeflow", - "discussion_comments": [ - { - "comment_id": "255707918", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 2357, - "pr_file": "components/jupyter-web-app/jupyter/static/js/notebooks.js", - "discussion_id": "255707918", - "commented_code": "@@ -0,0 +1,149 @@\n+$(document).ready(function(){\n+ // Get Notebooks for the ServiceAccount's Namespace\n+ var ns = new URL(window.location.href).searchParams.get(\"namespace\")\n+ if (ns) {\n+ // Load the selected ns\n+ $(\"#ns-select\").val(ns)\n+ }\n+ $(\"#ns-select\").trigger(\"change\");\n+});\n+\n+function deleteNotebook(ns, nb) {\n+ $.getJSON(prefix + \"/delete-notebook\", { namespace:ns, notebook:nb}, function(data, status) {\n+ var innerHTML = $(\"#error-msgs\").html()\n+ if(data.success == true) {\n+ updateNotebooksInNamespace(ns)\n+ innerHTML = '';\n+ }\n+ else {\n+ innerHTML = '
';\n+ innerHTML += '×';\n+ innerHTML += 'Warning! ' + data.log + '
';", - "comment_created_at": "2019-02-11T21:50:16+00:00", - "comment_author": "avdaredevil", - "comment_body": "This seems to be XSS prone. Maybe consider doing something like:\r\n\r\n```javascript\r\ninnerHTML = `\r\n
\r\n ×\r\n Warning!\r\n
`\r\n```\r\nand on line 24:\r\n\r\n```javascript\r\nconst $e = $(\"#error-msgs\").html(innerHTML)\r\n$('.warning-log', $e).text(data.log) // This will not fail even if your `innerText` is empty\r\n```", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/kubeflow-prioritize-readability-over-brevity.json b/_reviewers/kubeflow-prioritize-readability-over-brevity.json new file mode 100644 index 0000000..954135b --- /dev/null +++ b/_reviewers/kubeflow-prioritize-readability-over-brevity.json @@ -0,0 +1,162 @@ +[ + { + "discussion_id": "493976616", + "pr_number": 5316, + "pr_file": "components/crud-web-apps/jupyter/backend/apps/common/routes/patch.py", + "created_at": "2020-09-24T00:47:04+00:00", + "commented_code": "import datetime as dt\n\nfrom flask import request\nfrom werkzeug import exceptions\n\nfrom kubeflow.kubeflow.flask_rest_backend import api, decorators, logging\n\nfrom .. import status\nfrom . import bp\n\nlog = logging.getLogger(__name__)\n\nSTOP_ATTR = \"stopped\"\nATTRIBUTES = set([STOP_ATTR])\n\n\n# Routes\n@bp.route(\n \"/api/namespaces//notebooks/\", methods=[\"PATCH\"]\n)\n@decorators.request_is_json_type\ndef patch_notebook(namespace, notebook):\n request_body = request.get_json()\n log.info(\"Got body: %s\", request_body)\n\n if request_body is None:\n raise exceptions.BadRequest(\"Request doesn't have a body.\")\n\n # Ensure request has at least one valid command\n if not any(attr in ATTRIBUTES for attr in request_body.keys()):\n raise exceptions.BadRequest(\n \"Request body must include at least one supported key: %s\"\n % list(ATTRIBUTES)\n )\n\n # start/stop a notebook\n if STOP_ATTR in request_body:\n start_stop_notebook(namespace, notebook, request_body)\n\n return api.success_response()\n\n\n# helper functions\ndef start_stop_notebook(namespace, notebook, request_body):", + "repo_full_name": "kubeflow/kubeflow", + "discussion_comments": [ + { + "comment_id": "493976616", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 5316, + "pr_file": "components/crud-web-apps/jupyter/backend/apps/common/routes/patch.py", + "discussion_id": "493976616", + "commented_code": "@@ -0,0 +1,80 @@\n+import datetime as dt\n+\n+from flask import request\n+from werkzeug import exceptions\n+\n+from kubeflow.kubeflow.flask_rest_backend import api, decorators, logging\n+\n+from .. import status\n+from . import bp\n+\n+log = logging.getLogger(__name__)\n+\n+STOP_ATTR = \"stopped\"\n+ATTRIBUTES = set([STOP_ATTR])\n+\n+\n+# Routes\n+@bp.route(\n+ \"/api/namespaces//notebooks/\", methods=[\"PATCH\"]\n+)\n+@decorators.request_is_json_type\n+def patch_notebook(namespace, notebook):\n+ request_body = request.get_json()\n+ log.info(\"Got body: %s\", request_body)\n+\n+ if request_body is None:\n+ raise exceptions.BadRequest(\"Request doesn't have a body.\")\n+\n+ # Ensure request has at least one valid command\n+ if not any(attr in ATTRIBUTES for attr in request_body.keys()):\n+ raise exceptions.BadRequest(\n+ \"Request body must include at least one supported key: %s\"\n+ % list(ATTRIBUTES)\n+ )\n+\n+ # start/stop a notebook\n+ if STOP_ATTR in request_body:\n+ start_stop_notebook(namespace, notebook, request_body)\n+\n+ return api.success_response()\n+\n+\n+# helper functions\n+def start_stop_notebook(namespace, notebook, request_body):", + "comment_created_at": "2020-09-24T00:47:04+00:00", + "comment_author": "thesuperzapper", + "comment_body": "Why does this need to be a separate function?\r\n\r\nThe patch function is sufficient? ", + "pr_file_module": null + }, + { + "comment_id": "494179332", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 5316, + "pr_file": "components/crud-web-apps/jupyter/backend/apps/common/routes/patch.py", + "discussion_id": "493976616", + "commented_code": "@@ -0,0 +1,80 @@\n+import datetime as dt\n+\n+from flask import request\n+from werkzeug import exceptions\n+\n+from kubeflow.kubeflow.flask_rest_backend import api, decorators, logging\n+\n+from .. import status\n+from . import bp\n+\n+log = logging.getLogger(__name__)\n+\n+STOP_ATTR = \"stopped\"\n+ATTRIBUTES = set([STOP_ATTR])\n+\n+\n+# Routes\n+@bp.route(\n+ \"/api/namespaces//notebooks/\", methods=[\"PATCH\"]\n+)\n+@decorators.request_is_json_type\n+def patch_notebook(namespace, notebook):\n+ request_body = request.get_json()\n+ log.info(\"Got body: %s\", request_body)\n+\n+ if request_body is None:\n+ raise exceptions.BadRequest(\"Request doesn't have a body.\")\n+\n+ # Ensure request has at least one valid command\n+ if not any(attr in ATTRIBUTES for attr in request_body.keys()):\n+ raise exceptions.BadRequest(\n+ \"Request body must include at least one supported key: %s\"\n+ % list(ATTRIBUTES)\n+ )\n+\n+ # start/stop a notebook\n+ if STOP_ATTR in request_body:\n+ start_stop_notebook(namespace, notebook, request_body)\n+\n+ return api.success_response()\n+\n+\n+# helper functions\n+def start_stop_notebook(namespace, notebook, request_body):", + "comment_created_at": "2020-09-24T09:41:56+00:00", + "comment_author": "kimwnasptd", + "comment_body": "I was thinking that If in the future we want to handle other cases in the PATCH request, then each one of them should be its own function and not have all the logic unfolded in the PATCH handler", + "pr_file_module": null + }, + { + "comment_id": "494180275", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 5316, + "pr_file": "components/crud-web-apps/jupyter/backend/apps/common/routes/patch.py", + "discussion_id": "493976616", + "commented_code": "@@ -0,0 +1,80 @@\n+import datetime as dt\n+\n+from flask import request\n+from werkzeug import exceptions\n+\n+from kubeflow.kubeflow.flask_rest_backend import api, decorators, logging\n+\n+from .. import status\n+from . import bp\n+\n+log = logging.getLogger(__name__)\n+\n+STOP_ATTR = \"stopped\"\n+ATTRIBUTES = set([STOP_ATTR])\n+\n+\n+# Routes\n+@bp.route(\n+ \"/api/namespaces//notebooks/\", methods=[\"PATCH\"]\n+)\n+@decorators.request_is_json_type\n+def patch_notebook(namespace, notebook):\n+ request_body = request.get_json()\n+ log.info(\"Got body: %s\", request_body)\n+\n+ if request_body is None:\n+ raise exceptions.BadRequest(\"Request doesn't have a body.\")\n+\n+ # Ensure request has at least one valid command\n+ if not any(attr in ATTRIBUTES for attr in request_body.keys()):\n+ raise exceptions.BadRequest(\n+ \"Request body must include at least one supported key: %s\"\n+ % list(ATTRIBUTES)\n+ )\n+\n+ # start/stop a notebook\n+ if STOP_ATTR in request_body:\n+ start_stop_notebook(namespace, notebook, request_body)\n+\n+ return api.success_response()\n+\n+\n+# helper functions\n+def start_stop_notebook(namespace, notebook, request_body):", + "comment_created_at": "2020-09-24T09:43:32+00:00", + "comment_author": "kimwnasptd", + "comment_body": "I was thinking that If in the future we want to handle other cases in the PATCH request, then each one of them should be its own function and not have all the logic unfolded in the PATCH handler", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "467112305", + "pr_number": 5180, + "pr_file": "components/crud-web-apps/tensorboards/backend/app/routes/delete.py", + "created_at": "2020-08-07T15:29:25+00:00", + "commented_code": "from kubeflow.kubeflow.crud_backend import api, logging\n\nfrom . import bp\n\nlog = logging.getLogger(__name__)\n\n\n@bp.route(\"/api/namespaces//tensorboards/\", methods=[\"DELETE\"]) # noqa E501", + "repo_full_name": "kubeflow/kubeflow", + "discussion_comments": [ + { + "comment_id": "467112305", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 5180, + "pr_file": "components/crud-web-apps/tensorboards/backend/app/routes/delete.py", + "discussion_id": "467112305", + "commented_code": "@@ -0,0 +1,19 @@\n+from kubeflow.kubeflow.crud_backend import api, logging\n+\n+from . import bp\n+\n+log = logging.getLogger(__name__)\n+\n+\n+@bp.route(\"/api/namespaces//tensorboards/\", methods=[\"DELETE\"]) # noqa E501", + "comment_created_at": "2020-08-07T15:29:25+00:00", + "comment_author": "kimwnasptd", + "comment_body": "We should not ignore the E501 warnings.\r\nIf the line is too long you could put each argument in a distinct line.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "447076008", + "pr_number": 5074, + "pr_file": "components/jupyter-web-app/backend/kubeflow_jupyter/common/utils.py", + "created_at": "2020-06-29T15:53:11+00:00", + "commented_code": "# VAR: change this function according to the main resource\n cntr = rsrc[\"spec\"][\"template\"][\"spec\"][\"containers\"][0]\n status, reason = process_status(rsrc, rsrc_events)\n \n # GPUs may not have been assigned to a pod", + "repo_full_name": "kubeflow/kubeflow", + "discussion_comments": [ + { + "comment_id": "447076008", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 5074, + "pr_file": "components/jupyter-web-app/backend/kubeflow_jupyter/common/utils.py", + "discussion_id": "447076008", + "commented_code": "@@ -263,6 +263,19 @@ def process_resource(rsrc, rsrc_events):\n # VAR: change this function according to the main resource\n cntr = rsrc[\"spec\"][\"template\"][\"spec\"][\"containers\"][0]\n status, reason = process_status(rsrc, rsrc_events)\n+ \n+ # GPUs may not have been assigned to a pod ", + "comment_created_at": "2020-06-29T15:53:11+00:00", + "comment_author": "kimwnasptd", + "comment_body": "nit: @lalithvaka could you add this logic into a distinct function on its own?", + "pr_file_module": null + }, + { + "comment_id": "447478189", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 5074, + "pr_file": "components/jupyter-web-app/backend/kubeflow_jupyter/common/utils.py", + "discussion_id": "447076008", + "commented_code": "@@ -263,6 +263,19 @@ def process_resource(rsrc, rsrc_events):\n # VAR: change this function according to the main resource\n cntr = rsrc[\"spec\"][\"template\"][\"spec\"][\"containers\"][0]\n status, reason = process_status(rsrc, rsrc_events)\n+ \n+ # GPUs may not have been assigned to a pod ", + "comment_created_at": "2020-06-30T07:44:47+00:00", + "comment_author": "lalithvaka", + "comment_body": "@kimwnasptd submitted the change requested. ", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "341452867", + "pr_number": 4447, + "pr_file": "testing/gcp_util.py", + "created_at": "2019-11-01T05:07:38+00:00", + "commented_code": "resp = requests.request(\n \"GET\",\n url,\n headers={\n \"Authorization\":\n \"Bearer {}\".format(google_open_id_connect_token)\n },\n verify=False)\n verify=False) if use_basic_auth else requests.request(", + "repo_full_name": "kubeflow/kubeflow", + "discussion_comments": [ + { + "comment_id": "341452867", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 4447, + "pr_file": "testing/gcp_util.py", + "discussion_id": "341452867", + "commented_code": "@@ -100,19 +102,22 @@ def endpoint_is_ready(url, wait_min=15):\n resp = requests.request(\n \"GET\",\n url,\n- headers={\n- \"Authorization\":\n- \"Bearer {}\".format(google_open_id_connect_token)\n- },\n- verify=False)\n+ verify=False) if use_basic_auth else requests.request(", + "comment_created_at": "2019-11-01T05:07:38+00:00", + "comment_author": "jlewi", + "comment_body": "Is there a typo here? This doesn't looks like valid syntax", + "pr_file_module": null + }, + { + "comment_id": "341682418", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 4447, + "pr_file": "testing/gcp_util.py", + "discussion_id": "341452867", + "commented_code": "@@ -100,19 +102,22 @@ def endpoint_is_ready(url, wait_min=15):\n resp = requests.request(\n \"GET\",\n url,\n- headers={\n- \"Authorization\":\n- \"Bearer {}\".format(google_open_id_connect_token)\n- },\n- verify=False)\n+ verify=False) if use_basic_auth else requests.request(", + "comment_created_at": "2019-11-01T17:49:02+00:00", + "comment_author": "gabrielwen", + "comment_body": "yes, it is valid - this is like Python version of syntax sugar.", + "pr_file_module": null + }, + { + "comment_id": "341717177", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 4447, + "pr_file": "testing/gcp_util.py", + "discussion_id": "341452867", + "commented_code": "@@ -100,19 +102,22 @@ def endpoint_is_ready(url, wait_min=15):\n resp = requests.request(\n \"GET\",\n url,\n- headers={\n- \"Authorization\":\n- \"Bearer {}\".format(google_open_id_connect_token)\n- },\n- verify=False)\n+ verify=False) if use_basic_auth else requests.request(", + "comment_created_at": "2019-11-01T19:19:29+00:00", + "comment_author": "jlewi", + "comment_body": "Google style guide suggests eschewing complex \"list comprehensions\". If it doesn't fit on one line its a good indicator its too complicated.\r\n\r\nCan we use a standard multi-line if/else statement?", + "pr_file_module": null + }, + { + "comment_id": "341742715", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 4447, + "pr_file": "testing/gcp_util.py", + "discussion_id": "341452867", + "commented_code": "@@ -100,19 +102,22 @@ def endpoint_is_ready(url, wait_min=15):\n resp = requests.request(\n \"GET\",\n url,\n- headers={\n- \"Authorization\":\n- \"Bearer {}\".format(google_open_id_connect_token)\n- },\n- verify=False)\n+ verify=False) if use_basic_auth else requests.request(", + "comment_created_at": "2019-11-01T20:39:41+00:00", + "comment_author": "gabrielwen", + "comment_body": "done.", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/kubeflow-prioritize-readability-over-brevity.md b/_reviewers/kubeflow-prioritize-readability-over-brevity.md index 552d7c2..7c94d95 100644 --- a/_reviewers/kubeflow-prioritize-readability-over-brevity.md +++ b/_reviewers/kubeflow-prioritize-readability-over-brevity.md @@ -65,167 +65,3 @@ else: ``` This approach makes code easier to read, maintain, and debug, improving the overall quality of the codebase. - - -[ - { - "discussion_id": "493976616", - "pr_number": 5316, - "pr_file": "components/crud-web-apps/jupyter/backend/apps/common/routes/patch.py", - "created_at": "2020-09-24T00:47:04+00:00", - "commented_code": "import datetime as dt\n\nfrom flask import request\nfrom werkzeug import exceptions\n\nfrom kubeflow.kubeflow.flask_rest_backend import api, decorators, logging\n\nfrom .. import status\nfrom . import bp\n\nlog = logging.getLogger(__name__)\n\nSTOP_ATTR = \"stopped\"\nATTRIBUTES = set([STOP_ATTR])\n\n\n# Routes\n@bp.route(\n \"/api/namespaces//notebooks/\", methods=[\"PATCH\"]\n)\n@decorators.request_is_json_type\ndef patch_notebook(namespace, notebook):\n request_body = request.get_json()\n log.info(\"Got body: %s\", request_body)\n\n if request_body is None:\n raise exceptions.BadRequest(\"Request doesn't have a body.\")\n\n # Ensure request has at least one valid command\n if not any(attr in ATTRIBUTES for attr in request_body.keys()):\n raise exceptions.BadRequest(\n \"Request body must include at least one supported key: %s\"\n % list(ATTRIBUTES)\n )\n\n # start/stop a notebook\n if STOP_ATTR in request_body:\n start_stop_notebook(namespace, notebook, request_body)\n\n return api.success_response()\n\n\n# helper functions\ndef start_stop_notebook(namespace, notebook, request_body):", - "repo_full_name": "kubeflow/kubeflow", - "discussion_comments": [ - { - "comment_id": "493976616", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 5316, - "pr_file": "components/crud-web-apps/jupyter/backend/apps/common/routes/patch.py", - "discussion_id": "493976616", - "commented_code": "@@ -0,0 +1,80 @@\n+import datetime as dt\n+\n+from flask import request\n+from werkzeug import exceptions\n+\n+from kubeflow.kubeflow.flask_rest_backend import api, decorators, logging\n+\n+from .. import status\n+from . import bp\n+\n+log = logging.getLogger(__name__)\n+\n+STOP_ATTR = \"stopped\"\n+ATTRIBUTES = set([STOP_ATTR])\n+\n+\n+# Routes\n+@bp.route(\n+ \"/api/namespaces//notebooks/\", methods=[\"PATCH\"]\n+)\n+@decorators.request_is_json_type\n+def patch_notebook(namespace, notebook):\n+ request_body = request.get_json()\n+ log.info(\"Got body: %s\", request_body)\n+\n+ if request_body is None:\n+ raise exceptions.BadRequest(\"Request doesn't have a body.\")\n+\n+ # Ensure request has at least one valid command\n+ if not any(attr in ATTRIBUTES for attr in request_body.keys()):\n+ raise exceptions.BadRequest(\n+ \"Request body must include at least one supported key: %s\"\n+ % list(ATTRIBUTES)\n+ )\n+\n+ # start/stop a notebook\n+ if STOP_ATTR in request_body:\n+ start_stop_notebook(namespace, notebook, request_body)\n+\n+ return api.success_response()\n+\n+\n+# helper functions\n+def start_stop_notebook(namespace, notebook, request_body):", - "comment_created_at": "2020-09-24T00:47:04+00:00", - "comment_author": "thesuperzapper", - "comment_body": "Why does this need to be a separate function?\r\n\r\nThe patch function is sufficient? ", - "pr_file_module": null - }, - { - "comment_id": "494179332", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 5316, - "pr_file": "components/crud-web-apps/jupyter/backend/apps/common/routes/patch.py", - "discussion_id": "493976616", - "commented_code": "@@ -0,0 +1,80 @@\n+import datetime as dt\n+\n+from flask import request\n+from werkzeug import exceptions\n+\n+from kubeflow.kubeflow.flask_rest_backend import api, decorators, logging\n+\n+from .. import status\n+from . import bp\n+\n+log = logging.getLogger(__name__)\n+\n+STOP_ATTR = \"stopped\"\n+ATTRIBUTES = set([STOP_ATTR])\n+\n+\n+# Routes\n+@bp.route(\n+ \"/api/namespaces//notebooks/\", methods=[\"PATCH\"]\n+)\n+@decorators.request_is_json_type\n+def patch_notebook(namespace, notebook):\n+ request_body = request.get_json()\n+ log.info(\"Got body: %s\", request_body)\n+\n+ if request_body is None:\n+ raise exceptions.BadRequest(\"Request doesn't have a body.\")\n+\n+ # Ensure request has at least one valid command\n+ if not any(attr in ATTRIBUTES for attr in request_body.keys()):\n+ raise exceptions.BadRequest(\n+ \"Request body must include at least one supported key: %s\"\n+ % list(ATTRIBUTES)\n+ )\n+\n+ # start/stop a notebook\n+ if STOP_ATTR in request_body:\n+ start_stop_notebook(namespace, notebook, request_body)\n+\n+ return api.success_response()\n+\n+\n+# helper functions\n+def start_stop_notebook(namespace, notebook, request_body):", - "comment_created_at": "2020-09-24T09:41:56+00:00", - "comment_author": "kimwnasptd", - "comment_body": "I was thinking that If in the future we want to handle other cases in the PATCH request, then each one of them should be its own function and not have all the logic unfolded in the PATCH handler", - "pr_file_module": null - }, - { - "comment_id": "494180275", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 5316, - "pr_file": "components/crud-web-apps/jupyter/backend/apps/common/routes/patch.py", - "discussion_id": "493976616", - "commented_code": "@@ -0,0 +1,80 @@\n+import datetime as dt\n+\n+from flask import request\n+from werkzeug import exceptions\n+\n+from kubeflow.kubeflow.flask_rest_backend import api, decorators, logging\n+\n+from .. import status\n+from . import bp\n+\n+log = logging.getLogger(__name__)\n+\n+STOP_ATTR = \"stopped\"\n+ATTRIBUTES = set([STOP_ATTR])\n+\n+\n+# Routes\n+@bp.route(\n+ \"/api/namespaces//notebooks/\", methods=[\"PATCH\"]\n+)\n+@decorators.request_is_json_type\n+def patch_notebook(namespace, notebook):\n+ request_body = request.get_json()\n+ log.info(\"Got body: %s\", request_body)\n+\n+ if request_body is None:\n+ raise exceptions.BadRequest(\"Request doesn't have a body.\")\n+\n+ # Ensure request has at least one valid command\n+ if not any(attr in ATTRIBUTES for attr in request_body.keys()):\n+ raise exceptions.BadRequest(\n+ \"Request body must include at least one supported key: %s\"\n+ % list(ATTRIBUTES)\n+ )\n+\n+ # start/stop a notebook\n+ if STOP_ATTR in request_body:\n+ start_stop_notebook(namespace, notebook, request_body)\n+\n+ return api.success_response()\n+\n+\n+# helper functions\n+def start_stop_notebook(namespace, notebook, request_body):", - "comment_created_at": "2020-09-24T09:43:32+00:00", - "comment_author": "kimwnasptd", - "comment_body": "I was thinking that If in the future we want to handle other cases in the PATCH request, then each one of them should be its own function and not have all the logic unfolded in the PATCH handler", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "467112305", - "pr_number": 5180, - "pr_file": "components/crud-web-apps/tensorboards/backend/app/routes/delete.py", - "created_at": "2020-08-07T15:29:25+00:00", - "commented_code": "from kubeflow.kubeflow.crud_backend import api, logging\n\nfrom . import bp\n\nlog = logging.getLogger(__name__)\n\n\n@bp.route(\"/api/namespaces//tensorboards/\", methods=[\"DELETE\"]) # noqa E501", - "repo_full_name": "kubeflow/kubeflow", - "discussion_comments": [ - { - "comment_id": "467112305", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 5180, - "pr_file": "components/crud-web-apps/tensorboards/backend/app/routes/delete.py", - "discussion_id": "467112305", - "commented_code": "@@ -0,0 +1,19 @@\n+from kubeflow.kubeflow.crud_backend import api, logging\n+\n+from . import bp\n+\n+log = logging.getLogger(__name__)\n+\n+\n+@bp.route(\"/api/namespaces//tensorboards/\", methods=[\"DELETE\"]) # noqa E501", - "comment_created_at": "2020-08-07T15:29:25+00:00", - "comment_author": "kimwnasptd", - "comment_body": "We should not ignore the E501 warnings.\r\nIf the line is too long you could put each argument in a distinct line.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "447076008", - "pr_number": 5074, - "pr_file": "components/jupyter-web-app/backend/kubeflow_jupyter/common/utils.py", - "created_at": "2020-06-29T15:53:11+00:00", - "commented_code": "# VAR: change this function according to the main resource\n cntr = rsrc[\"spec\"][\"template\"][\"spec\"][\"containers\"][0]\n status, reason = process_status(rsrc, rsrc_events)\n \n # GPUs may not have been assigned to a pod", - "repo_full_name": "kubeflow/kubeflow", - "discussion_comments": [ - { - "comment_id": "447076008", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 5074, - "pr_file": "components/jupyter-web-app/backend/kubeflow_jupyter/common/utils.py", - "discussion_id": "447076008", - "commented_code": "@@ -263,6 +263,19 @@ def process_resource(rsrc, rsrc_events):\n # VAR: change this function according to the main resource\n cntr = rsrc[\"spec\"][\"template\"][\"spec\"][\"containers\"][0]\n status, reason = process_status(rsrc, rsrc_events)\n+ \n+ # GPUs may not have been assigned to a pod ", - "comment_created_at": "2020-06-29T15:53:11+00:00", - "comment_author": "kimwnasptd", - "comment_body": "nit: @lalithvaka could you add this logic into a distinct function on its own?", - "pr_file_module": null - }, - { - "comment_id": "447478189", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 5074, - "pr_file": "components/jupyter-web-app/backend/kubeflow_jupyter/common/utils.py", - "discussion_id": "447076008", - "commented_code": "@@ -263,6 +263,19 @@ def process_resource(rsrc, rsrc_events):\n # VAR: change this function according to the main resource\n cntr = rsrc[\"spec\"][\"template\"][\"spec\"][\"containers\"][0]\n status, reason = process_status(rsrc, rsrc_events)\n+ \n+ # GPUs may not have been assigned to a pod ", - "comment_created_at": "2020-06-30T07:44:47+00:00", - "comment_author": "lalithvaka", - "comment_body": "@kimwnasptd submitted the change requested. ", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "341452867", - "pr_number": 4447, - "pr_file": "testing/gcp_util.py", - "created_at": "2019-11-01T05:07:38+00:00", - "commented_code": "resp = requests.request(\n \"GET\",\n url,\n headers={\n \"Authorization\":\n \"Bearer {}\".format(google_open_id_connect_token)\n },\n verify=False)\n verify=False) if use_basic_auth else requests.request(", - "repo_full_name": "kubeflow/kubeflow", - "discussion_comments": [ - { - "comment_id": "341452867", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 4447, - "pr_file": "testing/gcp_util.py", - "discussion_id": "341452867", - "commented_code": "@@ -100,19 +102,22 @@ def endpoint_is_ready(url, wait_min=15):\n resp = requests.request(\n \"GET\",\n url,\n- headers={\n- \"Authorization\":\n- \"Bearer {}\".format(google_open_id_connect_token)\n- },\n- verify=False)\n+ verify=False) if use_basic_auth else requests.request(", - "comment_created_at": "2019-11-01T05:07:38+00:00", - "comment_author": "jlewi", - "comment_body": "Is there a typo here? This doesn't looks like valid syntax", - "pr_file_module": null - }, - { - "comment_id": "341682418", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 4447, - "pr_file": "testing/gcp_util.py", - "discussion_id": "341452867", - "commented_code": "@@ -100,19 +102,22 @@ def endpoint_is_ready(url, wait_min=15):\n resp = requests.request(\n \"GET\",\n url,\n- headers={\n- \"Authorization\":\n- \"Bearer {}\".format(google_open_id_connect_token)\n- },\n- verify=False)\n+ verify=False) if use_basic_auth else requests.request(", - "comment_created_at": "2019-11-01T17:49:02+00:00", - "comment_author": "gabrielwen", - "comment_body": "yes, it is valid - this is like Python version of syntax sugar.", - "pr_file_module": null - }, - { - "comment_id": "341717177", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 4447, - "pr_file": "testing/gcp_util.py", - "discussion_id": "341452867", - "commented_code": "@@ -100,19 +102,22 @@ def endpoint_is_ready(url, wait_min=15):\n resp = requests.request(\n \"GET\",\n url,\n- headers={\n- \"Authorization\":\n- \"Bearer {}\".format(google_open_id_connect_token)\n- },\n- verify=False)\n+ verify=False) if use_basic_auth else requests.request(", - "comment_created_at": "2019-11-01T19:19:29+00:00", - "comment_author": "jlewi", - "comment_body": "Google style guide suggests eschewing complex \"list comprehensions\". If it doesn't fit on one line its a good indicator its too complicated.\r\n\r\nCan we use a standard multi-line if/else statement?", - "pr_file_module": null - }, - { - "comment_id": "341742715", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 4447, - "pr_file": "testing/gcp_util.py", - "discussion_id": "341452867", - "commented_code": "@@ -100,19 +102,22 @@ def endpoint_is_ready(url, wait_min=15):\n resp = requests.request(\n \"GET\",\n url,\n- headers={\n- \"Authorization\":\n- \"Bearer {}\".format(google_open_id_connect_token)\n- },\n- verify=False)\n+ verify=False) if use_basic_auth else requests.request(", - "comment_created_at": "2019-11-01T20:39:41+00:00", - "comment_author": "gabrielwen", - "comment_body": "done.", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/kubeflow-private-variable-naming-convention.json b/_reviewers/kubeflow-private-variable-naming-convention.json new file mode 100644 index 0000000..48226ec --- /dev/null +++ b/_reviewers/kubeflow-private-variable-naming-convention.json @@ -0,0 +1,58 @@ +[ + { + "discussion_id": "1147731722", + "pr_number": 7030, + "pr_file": "components/centraldashboard-angular/frontend/src/app/services/namespace.service.ts", + "created_at": "2023-03-24T15:20:07+00:00", + "commented_code": "import { Injectable } from '@angular/core';\nimport { ActivatedRoute, NavigationEnd, Router } from '@angular/router';\nimport { ReplaySubject } from 'rxjs';\nimport { filter, map, switchMap, take } from 'rxjs/operators';\nimport { getQueryParams, getUrlFragment } from '../shared/utils';\nimport { Namespace } from '../types/namespace';\nimport { EnvironmentService } from './environment.service';\nimport { LocalStorageService } from './local-storage.service';\n\n@Injectable({\n providedIn: 'root',\n})\nexport class CDBNamespaceService {\n private _namespacesSubject = new ReplaySubject(1);\n private _currentNamespaceSubject = new ReplaySubject(1);\n\n namespacesSubject = this._namespacesSubject.asObservable();\n currentNamespaceSubject = this._currentNamespaceSubject.asObservable();\n\n private user: string;\n private currentNamespace: Namespace | undefined;", + "repo_full_name": "kubeflow/kubeflow", + "discussion_comments": [ + { + "comment_id": "1147731722", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 7030, + "pr_file": "components/centraldashboard-angular/frontend/src/app/services/namespace.service.ts", + "discussion_id": "1147731722", + "commented_code": "@@ -0,0 +1,201 @@\n+import { Injectable } from '@angular/core';\n+import { ActivatedRoute, NavigationEnd, Router } from '@angular/router';\n+import { ReplaySubject } from 'rxjs';\n+import { filter, map, switchMap, take } from 'rxjs/operators';\n+import { getQueryParams, getUrlFragment } from '../shared/utils';\n+import { Namespace } from '../types/namespace';\n+import { EnvironmentService } from './environment.service';\n+import { LocalStorageService } from './local-storage.service';\n+\n+@Injectable({\n+ providedIn: 'root',\n+})\n+export class CDBNamespaceService {\n+ private _namespacesSubject = new ReplaySubject(1);\n+ private _currentNamespaceSubject = new ReplaySubject(1);\n+\n+ namespacesSubject = this._namespacesSubject.asObservable();\n+ currentNamespaceSubject = this._currentNamespaceSubject.asObservable();\n+\n+ private user: string;\n+ private currentNamespace: Namespace | undefined;", + "comment_created_at": "2023-03-24T15:20:07+00:00", + "comment_author": "tasos-ale", + "comment_body": "Maybe we can do this:\r\n```suggestion\r\n namespaces = this._namespacesSubject.asObservable();\r\n currentNamespace = this._currentNamespaceSubject.asObservable();\r\n\r\n private _user: string;\r\n private _currentNamespace: Namespace | undefined;\r\n```\r\nwdyt @orfeas-k ?", + "pr_file_module": null + }, + { + "comment_id": "1147753168", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 7030, + "pr_file": "components/centraldashboard-angular/frontend/src/app/services/namespace.service.ts", + "discussion_id": "1147731722", + "commented_code": "@@ -0,0 +1,201 @@\n+import { Injectable } from '@angular/core';\n+import { ActivatedRoute, NavigationEnd, Router } from '@angular/router';\n+import { ReplaySubject } from 'rxjs';\n+import { filter, map, switchMap, take } from 'rxjs/operators';\n+import { getQueryParams, getUrlFragment } from '../shared/utils';\n+import { Namespace } from '../types/namespace';\n+import { EnvironmentService } from './environment.service';\n+import { LocalStorageService } from './local-storage.service';\n+\n+@Injectable({\n+ providedIn: 'root',\n+})\n+export class CDBNamespaceService {\n+ private _namespacesSubject = new ReplaySubject(1);\n+ private _currentNamespaceSubject = new ReplaySubject(1);\n+\n+ namespacesSubject = this._namespacesSubject.asObservable();\n+ currentNamespaceSubject = this._currentNamespaceSubject.asObservable();\n+\n+ private user: string;\n+ private currentNamespace: Namespace | undefined;", + "comment_created_at": "2023-03-24T15:35:45+00:00", + "comment_author": "orfeas-k", + "comment_body": "Sure, but I 'll rename them from `_variable` to `prvVariable` because that's what we do the in the rest of KF WAs as well.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1065742934", + "pr_number": 6856, + "pr_file": "components/centraldashboard-angular/frontend/src/app/pages/iframe-wrapper/iframe-wrapper.component.ts", + "created_at": "2023-01-10T12:45:55+00:00", + "commented_code": "import {\n AfterViewInit,\n Component,\n ElementRef,\n OnDestroy,\n ViewChild,\n} from '@angular/core';\nimport { NavigationEnd, Router } from '@angular/router';\nimport { Subscription } from 'rxjs';\n\n@Component({\n selector: 'app-iframe-wrapper',\n templateUrl: './iframe-wrapper.component.html',\n styleUrls: ['./iframe-wrapper.component.scss'],\n})\nexport class IframeWrapperComponent implements AfterViewInit, OnDestroy {\n @ViewChild('iframe') iframe: ElementRef;\n\n public prvSrcPath: string;", + "repo_full_name": "kubeflow/kubeflow", + "discussion_comments": [ + { + "comment_id": "1065742934", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 6856, + "pr_file": "components/centraldashboard-angular/frontend/src/app/pages/iframe-wrapper/iframe-wrapper.component.ts", + "discussion_id": "1065742934", + "commented_code": "@@ -0,0 +1,121 @@\n+import {\n+ AfterViewInit,\n+ Component,\n+ ElementRef,\n+ OnDestroy,\n+ ViewChild,\n+} from '@angular/core';\n+import { NavigationEnd, Router } from '@angular/router';\n+import { Subscription } from 'rxjs';\n+\n+@Component({\n+ selector: 'app-iframe-wrapper',\n+ templateUrl: './iframe-wrapper.component.html',\n+ styleUrls: ['./iframe-wrapper.component.scss'],\n+})\n+export class IframeWrapperComponent implements AfterViewInit, OnDestroy {\n+ @ViewChild('iframe') iframe: ElementRef;\n+\n+ public prvSrcPath: string;", + "comment_created_at": "2023-01-10T12:45:55+00:00", + "comment_author": "kimwnasptd", + "comment_body": "let's make this `private`, to comply with the var's name", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/kubeflow-private-variable-naming-convention.md b/_reviewers/kubeflow-private-variable-naming-convention.md index fc102c4..a5e427c 100644 --- a/_reviewers/kubeflow-private-variable-naming-convention.md +++ b/_reviewers/kubeflow-private-variable-naming-convention.md @@ -36,63 +36,3 @@ export class MyService { data = this.prvDataSubject.asObservable(); } ``` - - -[ - { - "discussion_id": "1147731722", - "pr_number": 7030, - "pr_file": "components/centraldashboard-angular/frontend/src/app/services/namespace.service.ts", - "created_at": "2023-03-24T15:20:07+00:00", - "commented_code": "import { Injectable } from '@angular/core';\nimport { ActivatedRoute, NavigationEnd, Router } from '@angular/router';\nimport { ReplaySubject } from 'rxjs';\nimport { filter, map, switchMap, take } from 'rxjs/operators';\nimport { getQueryParams, getUrlFragment } from '../shared/utils';\nimport { Namespace } from '../types/namespace';\nimport { EnvironmentService } from './environment.service';\nimport { LocalStorageService } from './local-storage.service';\n\n@Injectable({\n providedIn: 'root',\n})\nexport class CDBNamespaceService {\n private _namespacesSubject = new ReplaySubject(1);\n private _currentNamespaceSubject = new ReplaySubject(1);\n\n namespacesSubject = this._namespacesSubject.asObservable();\n currentNamespaceSubject = this._currentNamespaceSubject.asObservable();\n\n private user: string;\n private currentNamespace: Namespace | undefined;", - "repo_full_name": "kubeflow/kubeflow", - "discussion_comments": [ - { - "comment_id": "1147731722", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 7030, - "pr_file": "components/centraldashboard-angular/frontend/src/app/services/namespace.service.ts", - "discussion_id": "1147731722", - "commented_code": "@@ -0,0 +1,201 @@\n+import { Injectable } from '@angular/core';\n+import { ActivatedRoute, NavigationEnd, Router } from '@angular/router';\n+import { ReplaySubject } from 'rxjs';\n+import { filter, map, switchMap, take } from 'rxjs/operators';\n+import { getQueryParams, getUrlFragment } from '../shared/utils';\n+import { Namespace } from '../types/namespace';\n+import { EnvironmentService } from './environment.service';\n+import { LocalStorageService } from './local-storage.service';\n+\n+@Injectable({\n+ providedIn: 'root',\n+})\n+export class CDBNamespaceService {\n+ private _namespacesSubject = new ReplaySubject(1);\n+ private _currentNamespaceSubject = new ReplaySubject(1);\n+\n+ namespacesSubject = this._namespacesSubject.asObservable();\n+ currentNamespaceSubject = this._currentNamespaceSubject.asObservable();\n+\n+ private user: string;\n+ private currentNamespace: Namespace | undefined;", - "comment_created_at": "2023-03-24T15:20:07+00:00", - "comment_author": "tasos-ale", - "comment_body": "Maybe we can do this:\r\n```suggestion\r\n namespaces = this._namespacesSubject.asObservable();\r\n currentNamespace = this._currentNamespaceSubject.asObservable();\r\n\r\n private _user: string;\r\n private _currentNamespace: Namespace | undefined;\r\n```\r\nwdyt @orfeas-k ?", - "pr_file_module": null - }, - { - "comment_id": "1147753168", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 7030, - "pr_file": "components/centraldashboard-angular/frontend/src/app/services/namespace.service.ts", - "discussion_id": "1147731722", - "commented_code": "@@ -0,0 +1,201 @@\n+import { Injectable } from '@angular/core';\n+import { ActivatedRoute, NavigationEnd, Router } from '@angular/router';\n+import { ReplaySubject } from 'rxjs';\n+import { filter, map, switchMap, take } from 'rxjs/operators';\n+import { getQueryParams, getUrlFragment } from '../shared/utils';\n+import { Namespace } from '../types/namespace';\n+import { EnvironmentService } from './environment.service';\n+import { LocalStorageService } from './local-storage.service';\n+\n+@Injectable({\n+ providedIn: 'root',\n+})\n+export class CDBNamespaceService {\n+ private _namespacesSubject = new ReplaySubject(1);\n+ private _currentNamespaceSubject = new ReplaySubject(1);\n+\n+ namespacesSubject = this._namespacesSubject.asObservable();\n+ currentNamespaceSubject = this._currentNamespaceSubject.asObservable();\n+\n+ private user: string;\n+ private currentNamespace: Namespace | undefined;", - "comment_created_at": "2023-03-24T15:35:45+00:00", - "comment_author": "orfeas-k", - "comment_body": "Sure, but I 'll rename them from `_variable` to `prvVariable` because that's what we do the in the rest of KF WAs as well.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1065742934", - "pr_number": 6856, - "pr_file": "components/centraldashboard-angular/frontend/src/app/pages/iframe-wrapper/iframe-wrapper.component.ts", - "created_at": "2023-01-10T12:45:55+00:00", - "commented_code": "import {\n AfterViewInit,\n Component,\n ElementRef,\n OnDestroy,\n ViewChild,\n} from '@angular/core';\nimport { NavigationEnd, Router } from '@angular/router';\nimport { Subscription } from 'rxjs';\n\n@Component({\n selector: 'app-iframe-wrapper',\n templateUrl: './iframe-wrapper.component.html',\n styleUrls: ['./iframe-wrapper.component.scss'],\n})\nexport class IframeWrapperComponent implements AfterViewInit, OnDestroy {\n @ViewChild('iframe') iframe: ElementRef;\n\n public prvSrcPath: string;", - "repo_full_name": "kubeflow/kubeflow", - "discussion_comments": [ - { - "comment_id": "1065742934", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 6856, - "pr_file": "components/centraldashboard-angular/frontend/src/app/pages/iframe-wrapper/iframe-wrapper.component.ts", - "discussion_id": "1065742934", - "commented_code": "@@ -0,0 +1,121 @@\n+import {\n+ AfterViewInit,\n+ Component,\n+ ElementRef,\n+ OnDestroy,\n+ ViewChild,\n+} from '@angular/core';\n+import { NavigationEnd, Router } from '@angular/router';\n+import { Subscription } from 'rxjs';\n+\n+@Component({\n+ selector: 'app-iframe-wrapper',\n+ templateUrl: './iframe-wrapper.component.html',\n+ styleUrls: ['./iframe-wrapper.component.scss'],\n+})\n+export class IframeWrapperComponent implements AfterViewInit, OnDestroy {\n+ @ViewChild('iframe') iframe: ElementRef;\n+\n+ public prvSrcPath: string;", - "comment_created_at": "2023-01-10T12:45:55+00:00", - "comment_author": "kimwnasptd", - "comment_body": "let's make this `private`, to comply with the var's name", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/kubeflow-private-vulnerability-reporting.json b/_reviewers/kubeflow-private-vulnerability-reporting.json new file mode 100644 index 0000000..0a097fa --- /dev/null +++ b/_reviewers/kubeflow-private-vulnerability-reporting.json @@ -0,0 +1,48 @@ +[ + { + "discussion_id": "1195398442", + "pr_number": 7043, + "pr_file": "security/policies-and-procedures.md", + "created_at": "2023-05-16T16:10:44+00:00", + "commented_code": "# Policies and Procedures\n\nCurrently the team is working to finalize this work and have it completed for the 1.8 Release and for the transition to the CNCF.\nThese policies and procedures will be based of the [Kubernetes Policy](https://kubernetes.io/docs/reference/issues-security/security/)\nWe are working on updating and editing these policies and procedures for the Kubeflow Community. Our very **rough DRAFT and working docs** can be found [here](https://docs.google.com/document/d/1sjWa0G2UqdsP1QROYVEl9iQR6v1x8HD7-F4TQo7lV6M/edit?usp=sharing)\n\n## The first iteration of the Kubeflow Security Policies and Procedures are as follows*:\n\n*Please note we will continue to iterate on this with each release and as we get commitments from our distributions and volunteers from each of the Kubeflow components and working groups (WG). As the team grows we will work to set up roles and responsibilities to bring us closer in line with the Kubernetes policies and procedures.* \n\nThe Kubeflow Security team (listed below as “we”) will take a multi-level, transparent approach to the security of Kubeflow (product/code). At this time we will base this effort to align with each release. As noted above as we get more volunteers to help with these efforts we will continue to iterate on the cadence. \n\n1. We will encourage Kubeflow users to open issues with the respective WG that the vulnerability was found and tag it with the “security” tag. If they don’t know how to do that or don’t feel comfortable doing so, they can also contact us via our #security slack channel. And if a private mailing list/group is needed we will set that up.", + "repo_full_name": "kubeflow/kubeflow", + "discussion_comments": [ + { + "comment_id": "1195398442", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 7043, + "pr_file": "security/policies-and-procedures.md", + "discussion_id": "1195398442", + "commented_code": "@@ -0,0 +1,24 @@\n+# Policies and Procedures\n+\n+Currently the team is working to finalize this work and have it completed for the 1.8 Release and for the transition to the CNCF.\n+These policies and procedures will be based of the [Kubernetes Policy](https://kubernetes.io/docs/reference/issues-security/security/)\n+We are working on updating and editing these policies and procedures for the Kubeflow Community. Our very **rough DRAFT and working docs** can be found [here](https://docs.google.com/document/d/1sjWa0G2UqdsP1QROYVEl9iQR6v1x8HD7-F4TQo7lV6M/edit?usp=sharing)\n+\n+## The first iteration of the Kubeflow Security Policies and Procedures are as follows*:\n+\n+*Please note we will continue to iterate on this with each release and as we get commitments from our distributions and volunteers from each of the Kubeflow components and working groups (WG). As the team grows we will work to set up roles and responsibilities to bring us closer in line with the Kubernetes policies and procedures.* \n+\n+The Kubeflow Security team (listed below as “we”) will take a multi-level, transparent approach to the security of Kubeflow (product/code). At this time we will base this effort to align with each release. As noted above as we get more volunteers to help with these efforts we will continue to iterate on the cadence. \n+\n+1. We will encourage Kubeflow users to open issues with the respective WG that the vulnerability was found and tag it with the “security” tag. If they don’t know how to do that or don’t feel comfortable doing so, they can also contact us via our #security slack channel. And if a private mailing list/group is needed we will set that up. ", + "comment_created_at": "2023-05-16T16:10:44+00:00", + "comment_author": "zijianjoy", + "comment_body": "It is strongly recommended that we don't add a security tag to an open issue. \r\n\r\nYou can find a security template put up by CNCF in https://github.com/cncf/tag-security/tree/main/project-resources.\r\n\r\nReference: https://github.com/cncf/tag-security/blob/main/project-resources/templates/SECURITY.md#reporting-a-vulnerability\r\n\r\nMost CNCF projects right now use email to notify security issue. GitHub just released their private vuln reporting feature (https://docs.github.com/en/code-security/security-advisories/guidance-on-reporting-and-writing/privately-reporting-a-security-vulnerability). Either email shared among the people who will be responsible for vuln response or this new GitHub reporting channel are fine.", + "pr_file_module": null + }, + { + "comment_id": "1195431510", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 7043, + "pr_file": "security/policies-and-procedures.md", + "discussion_id": "1195398442", + "commented_code": "@@ -0,0 +1,24 @@\n+# Policies and Procedures\n+\n+Currently the team is working to finalize this work and have it completed for the 1.8 Release and for the transition to the CNCF.\n+These policies and procedures will be based of the [Kubernetes Policy](https://kubernetes.io/docs/reference/issues-security/security/)\n+We are working on updating and editing these policies and procedures for the Kubeflow Community. Our very **rough DRAFT and working docs** can be found [here](https://docs.google.com/document/d/1sjWa0G2UqdsP1QROYVEl9iQR6v1x8HD7-F4TQo7lV6M/edit?usp=sharing)\n+\n+## The first iteration of the Kubeflow Security Policies and Procedures are as follows*:\n+\n+*Please note we will continue to iterate on this with each release and as we get commitments from our distributions and volunteers from each of the Kubeflow components and working groups (WG). As the team grows we will work to set up roles and responsibilities to bring us closer in line with the Kubernetes policies and procedures.* \n+\n+The Kubeflow Security team (listed below as “we”) will take a multi-level, transparent approach to the security of Kubeflow (product/code). At this time we will base this effort to align with each release. As noted above as we get more volunteers to help with these efforts we will continue to iterate on the cadence. \n+\n+1. We will encourage Kubeflow users to open issues with the respective WG that the vulnerability was found and tag it with the “security” tag. If they don’t know how to do that or don’t feel comfortable doing so, they can also contact us via our #security slack channel. And if a private mailing list/group is needed we will set that up. ", + "comment_created_at": "2023-05-16T16:39:37+00:00", + "comment_author": "akgraner", + "comment_body": "@zijianjoy, I agree, but when I asked about getting an email created for the security team there was some push back. Since Google has to create the email address can you all create a security@kubeflow.org email for the team. The Working Groups asked that security issues related respective working groups be filed there so (at the time) without being able to create an official email for the group and the resources we had available we came up with the above process. I'll take a look at the resources you have listed and rework the process and bring it up to the group on the 24th at our next meeting. \r\n\r\nIn order to get this PR approved if I change this file to say we are currently working on the process and a link to our meeting notes would this work for sake of approving this?", + "pr_file_module": null + }, + { + "comment_id": "1195545712", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 7043, + "pr_file": "security/policies-and-procedures.md", + "discussion_id": "1195398442", + "commented_code": "@@ -0,0 +1,24 @@\n+# Policies and Procedures\n+\n+Currently the team is working to finalize this work and have it completed for the 1.8 Release and for the transition to the CNCF.\n+These policies and procedures will be based of the [Kubernetes Policy](https://kubernetes.io/docs/reference/issues-security/security/)\n+We are working on updating and editing these policies and procedures for the Kubeflow Community. Our very **rough DRAFT and working docs** can be found [here](https://docs.google.com/document/d/1sjWa0G2UqdsP1QROYVEl9iQR6v1x8HD7-F4TQo7lV6M/edit?usp=sharing)\n+\n+## The first iteration of the Kubeflow Security Policies and Procedures are as follows*:\n+\n+*Please note we will continue to iterate on this with each release and as we get commitments from our distributions and volunteers from each of the Kubeflow components and working groups (WG). As the team grows we will work to set up roles and responsibilities to bring us closer in line with the Kubernetes policies and procedures.* \n+\n+The Kubeflow Security team (listed below as “we”) will take a multi-level, transparent approach to the security of Kubeflow (product/code). At this time we will base this effort to align with each release. As noted above as we get more volunteers to help with these efforts we will continue to iterate on the cadence. \n+\n+1. We will encourage Kubeflow users to open issues with the respective WG that the vulnerability was found and tag it with the “security” tag. If they don’t know how to do that or don’t feel comfortable doing so, they can also contact us via our #security slack channel. And if a private mailing list/group is needed we will set that up. ", + "comment_created_at": "2023-05-16T18:26:44+00:00", + "comment_author": "zijianjoy", + "comment_body": " We are in discussion about creating such email address. Make sense to track this effort in another PR. At the meantime, James Wu has LGTMed your PR here. This PR looks good to me too.", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/kubeflow-private-vulnerability-reporting.md b/_reviewers/kubeflow-private-vulnerability-reporting.md index d488c33..d8eb105 100644 --- a/_reviewers/kubeflow-private-vulnerability-reporting.md +++ b/_reviewers/kubeflow-private-vulnerability-reporting.md @@ -34,53 +34,3 @@ Include as much information as possible about the vulnerability. The security te ``` This practice helps protect users while vulnerabilities are being addressed and follows security industry best practices. - - -[ - { - "discussion_id": "1195398442", - "pr_number": 7043, - "pr_file": "security/policies-and-procedures.md", - "created_at": "2023-05-16T16:10:44+00:00", - "commented_code": "# Policies and Procedures\n\nCurrently the team is working to finalize this work and have it completed for the 1.8 Release and for the transition to the CNCF.\nThese policies and procedures will be based of the [Kubernetes Policy](https://kubernetes.io/docs/reference/issues-security/security/)\nWe are working on updating and editing these policies and procedures for the Kubeflow Community. Our very **rough DRAFT and working docs** can be found [here](https://docs.google.com/document/d/1sjWa0G2UqdsP1QROYVEl9iQR6v1x8HD7-F4TQo7lV6M/edit?usp=sharing)\n\n## The first iteration of the Kubeflow Security Policies and Procedures are as follows*:\n\n*Please note we will continue to iterate on this with each release and as we get commitments from our distributions and volunteers from each of the Kubeflow components and working groups (WG). As the team grows we will work to set up roles and responsibilities to bring us closer in line with the Kubernetes policies and procedures.* \n\nThe Kubeflow Security team (listed below as \u201cwe\u201d) will take a multi-level, transparent approach to the security of Kubeflow (product/code). At this time we will base this effort to align with each release. As noted above as we get more volunteers to help with these efforts we will continue to iterate on the cadence. \n\n1. We will encourage Kubeflow users to open issues with the respective WG that the vulnerability was found and tag it with the \u201csecurity\u201d tag. If they don\u2019t know how to do that or don\u2019t feel comfortable doing so, they can also contact us via our #security slack channel. And if a private mailing list/group is needed we will set that up.", - "repo_full_name": "kubeflow/kubeflow", - "discussion_comments": [ - { - "comment_id": "1195398442", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 7043, - "pr_file": "security/policies-and-procedures.md", - "discussion_id": "1195398442", - "commented_code": "@@ -0,0 +1,24 @@\n+# Policies and Procedures\n+\n+Currently the team is working to finalize this work and have it completed for the 1.8 Release and for the transition to the CNCF.\n+These policies and procedures will be based of the [Kubernetes Policy](https://kubernetes.io/docs/reference/issues-security/security/)\n+We are working on updating and editing these policies and procedures for the Kubeflow Community. Our very **rough DRAFT and working docs** can be found [here](https://docs.google.com/document/d/1sjWa0G2UqdsP1QROYVEl9iQR6v1x8HD7-F4TQo7lV6M/edit?usp=sharing)\n+\n+## The first iteration of the Kubeflow Security Policies and Procedures are as follows*:\n+\n+*Please note we will continue to iterate on this with each release and as we get commitments from our distributions and volunteers from each of the Kubeflow components and working groups (WG). As the team grows we will work to set up roles and responsibilities to bring us closer in line with the Kubernetes policies and procedures.* \n+\n+The Kubeflow Security team (listed below as \u201cwe\u201d) will take a multi-level, transparent approach to the security of Kubeflow (product/code). At this time we will base this effort to align with each release. As noted above as we get more volunteers to help with these efforts we will continue to iterate on the cadence. \n+\n+1. We will encourage Kubeflow users to open issues with the respective WG that the vulnerability was found and tag it with the \u201csecurity\u201d tag. If they don\u2019t know how to do that or don\u2019t feel comfortable doing so, they can also contact us via our #security slack channel. And if a private mailing list/group is needed we will set that up. ", - "comment_created_at": "2023-05-16T16:10:44+00:00", - "comment_author": "zijianjoy", - "comment_body": "It is strongly recommended that we don't add a security tag to an open issue. \r\n\r\nYou can find a security template put up by CNCF in https://github.com/cncf/tag-security/tree/main/project-resources.\r\n\r\nReference: https://github.com/cncf/tag-security/blob/main/project-resources/templates/SECURITY.md#reporting-a-vulnerability\r\n\r\nMost CNCF projects right now use email to notify security issue. GitHub just released their private vuln reporting feature (https://docs.github.com/en/code-security/security-advisories/guidance-on-reporting-and-writing/privately-reporting-a-security-vulnerability). Either email shared among the people who will be responsible for vuln response or this new GitHub reporting channel are fine.", - "pr_file_module": null - }, - { - "comment_id": "1195431510", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 7043, - "pr_file": "security/policies-and-procedures.md", - "discussion_id": "1195398442", - "commented_code": "@@ -0,0 +1,24 @@\n+# Policies and Procedures\n+\n+Currently the team is working to finalize this work and have it completed for the 1.8 Release and for the transition to the CNCF.\n+These policies and procedures will be based of the [Kubernetes Policy](https://kubernetes.io/docs/reference/issues-security/security/)\n+We are working on updating and editing these policies and procedures for the Kubeflow Community. Our very **rough DRAFT and working docs** can be found [here](https://docs.google.com/document/d/1sjWa0G2UqdsP1QROYVEl9iQR6v1x8HD7-F4TQo7lV6M/edit?usp=sharing)\n+\n+## The first iteration of the Kubeflow Security Policies and Procedures are as follows*:\n+\n+*Please note we will continue to iterate on this with each release and as we get commitments from our distributions and volunteers from each of the Kubeflow components and working groups (WG). As the team grows we will work to set up roles and responsibilities to bring us closer in line with the Kubernetes policies and procedures.* \n+\n+The Kubeflow Security team (listed below as \u201cwe\u201d) will take a multi-level, transparent approach to the security of Kubeflow (product/code). At this time we will base this effort to align with each release. As noted above as we get more volunteers to help with these efforts we will continue to iterate on the cadence. \n+\n+1. We will encourage Kubeflow users to open issues with the respective WG that the vulnerability was found and tag it with the \u201csecurity\u201d tag. If they don\u2019t know how to do that or don\u2019t feel comfortable doing so, they can also contact us via our #security slack channel. And if a private mailing list/group is needed we will set that up. ", - "comment_created_at": "2023-05-16T16:39:37+00:00", - "comment_author": "akgraner", - "comment_body": "@zijianjoy, I agree, but when I asked about getting an email created for the security team there was some push back. Since Google has to create the email address can you all create a security@kubeflow.org email for the team. The Working Groups asked that security issues related respective working groups be filed there so (at the time) without being able to create an official email for the group and the resources we had available we came up with the above process. I'll take a look at the resources you have listed and rework the process and bring it up to the group on the 24th at our next meeting. \r\n\r\nIn order to get this PR approved if I change this file to say we are currently working on the process and a link to our meeting notes would this work for sake of approving this?", - "pr_file_module": null - }, - { - "comment_id": "1195545712", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 7043, - "pr_file": "security/policies-and-procedures.md", - "discussion_id": "1195398442", - "commented_code": "@@ -0,0 +1,24 @@\n+# Policies and Procedures\n+\n+Currently the team is working to finalize this work and have it completed for the 1.8 Release and for the transition to the CNCF.\n+These policies and procedures will be based of the [Kubernetes Policy](https://kubernetes.io/docs/reference/issues-security/security/)\n+We are working on updating and editing these policies and procedures for the Kubeflow Community. Our very **rough DRAFT and working docs** can be found [here](https://docs.google.com/document/d/1sjWa0G2UqdsP1QROYVEl9iQR6v1x8HD7-F4TQo7lV6M/edit?usp=sharing)\n+\n+## The first iteration of the Kubeflow Security Policies and Procedures are as follows*:\n+\n+*Please note we will continue to iterate on this with each release and as we get commitments from our distributions and volunteers from each of the Kubeflow components and working groups (WG). As the team grows we will work to set up roles and responsibilities to bring us closer in line with the Kubernetes policies and procedures.* \n+\n+The Kubeflow Security team (listed below as \u201cwe\u201d) will take a multi-level, transparent approach to the security of Kubeflow (product/code). At this time we will base this effort to align with each release. As noted above as we get more volunteers to help with these efforts we will continue to iterate on the cadence. \n+\n+1. We will encourage Kubeflow users to open issues with the respective WG that the vulnerability was found and tag it with the \u201csecurity\u201d tag. If they don\u2019t know how to do that or don\u2019t feel comfortable doing so, they can also contact us via our #security slack channel. And if a private mailing list/group is needed we will set that up. ", - "comment_created_at": "2023-05-16T18:26:44+00:00", - "comment_author": "zijianjoy", - "comment_body": " We are in discussion about creating such email address. Make sense to track this effort in another PR. At the meantime, James Wu has LGTMed your PR here. This PR looks good to me too.", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/kubeflow-reduce-nesting-depth.json b/_reviewers/kubeflow-reduce-nesting-depth.json new file mode 100644 index 0000000..3ca6e68 --- /dev/null +++ b/_reviewers/kubeflow-reduce-nesting-depth.json @@ -0,0 +1,70 @@ +[ + { + "discussion_id": "1135937150", + "pr_number": 7030, + "pr_file": "components/centraldashboard-angular/frontend/src/app/services/namespace.service.ts", + "created_at": "2023-03-14T17:18:35+00:00", + "commented_code": "import { Injectable } from '@angular/core';\nimport { ActivatedRoute, NavigationEnd, Params, Router } from '@angular/router';\nimport { ReplaySubject } from 'rxjs';\nimport { getQueryParams, getUrlFragment } from '../shared/utils';\nimport { Namespace } from '../types/namespace';\nimport { EnvironmentService } from './environment.service';\nimport { LocalStorageService } from './local-storage.service';\n\n@Injectable({\n providedIn: 'root',\n})\nexport class CDBNamespaceService {\n public namespacesSubject = new ReplaySubject();\n public currentNamespaceSubject = new ReplaySubject();\n public allNamespacesAllowedSubject = new ReplaySubject();\n\n private namespaces: Namespace[];\n private user: string;\n private allNamespacesAllowed: boolean;\n private currentNamespace: Namespace | undefined;\n\n readonly ALL_NAMESPACES_ALLOWED_LIST = [\n 'jupyter',\n 'volumes',\n 'tensorboards',\n 'katib',\n 'models',\n ];\n readonly ALL_NAMESPACES = 'All namespaces';\n readonly allNamespacesOption: Namespace = {\n namespace: this.ALL_NAMESPACES,\n role: '',\n user: '',\n };\n\n constructor(\n private env: EnvironmentService,\n private localStorage: LocalStorageService,\n private router: Router,\n private route: ActivatedRoute,\n ) {\n this.env.user.subscribe((user: string) => {\n this.user = user;\n });\n\n this.env.namespaces.subscribe((namespaces: Namespace[]) => {\n namespaces = [this.allNamespacesOption, ...namespaces];\n this.namespaces = namespaces;\n this.namespacesSubject.next(namespaces);\n\n this.allNamespacesAllowed = this.calcAllNamespacesOption(this.router.url);\n this.allNamespacesAllowedSubject.next(this.allNamespacesAllowed);\n\n this.setCurrentNamespace(namespaces, this.user);\n });\n\n this.router.events.subscribe(event => {\n if (event instanceof NavigationEnd) {", + "repo_full_name": "kubeflow/kubeflow", + "discussion_comments": [ + { + "comment_id": "1135937150", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 7030, + "pr_file": "components/centraldashboard-angular/frontend/src/app/services/namespace.service.ts", + "discussion_id": "1135937150", + "commented_code": "@@ -0,0 +1,199 @@\n+import { Injectable } from '@angular/core';\n+import { ActivatedRoute, NavigationEnd, Params, Router } from '@angular/router';\n+import { ReplaySubject } from 'rxjs';\n+import { getQueryParams, getUrlFragment } from '../shared/utils';\n+import { Namespace } from '../types/namespace';\n+import { EnvironmentService } from './environment.service';\n+import { LocalStorageService } from './local-storage.service';\n+\n+@Injectable({\n+ providedIn: 'root',\n+})\n+export class CDBNamespaceService {\n+ public namespacesSubject = new ReplaySubject();\n+ public currentNamespaceSubject = new ReplaySubject();\n+ public allNamespacesAllowedSubject = new ReplaySubject();\n+\n+ private namespaces: Namespace[];\n+ private user: string;\n+ private allNamespacesAllowed: boolean;\n+ private currentNamespace: Namespace | undefined;\n+\n+ readonly ALL_NAMESPACES_ALLOWED_LIST = [\n+ 'jupyter',\n+ 'volumes',\n+ 'tensorboards',\n+ 'katib',\n+ 'models',\n+ ];\n+ readonly ALL_NAMESPACES = 'All namespaces';\n+ readonly allNamespacesOption: Namespace = {\n+ namespace: this.ALL_NAMESPACES,\n+ role: '',\n+ user: '',\n+ };\n+\n+ constructor(\n+ private env: EnvironmentService,\n+ private localStorage: LocalStorageService,\n+ private router: Router,\n+ private route: ActivatedRoute,\n+ ) {\n+ this.env.user.subscribe((user: string) => {\n+ this.user = user;\n+ });\n+\n+ this.env.namespaces.subscribe((namespaces: Namespace[]) => {\n+ namespaces = [this.allNamespacesOption, ...namespaces];\n+ this.namespaces = namespaces;\n+ this.namespacesSubject.next(namespaces);\n+\n+ this.allNamespacesAllowed = this.calcAllNamespacesOption(this.router.url);\n+ this.allNamespacesAllowedSubject.next(this.allNamespacesAllowed);\n+\n+ this.setCurrentNamespace(namespaces, this.user);\n+ });\n+\n+ this.router.events.subscribe(event => {\n+ if (event instanceof NavigationEnd) {", + "comment_created_at": "2023-03-14T17:18:35+00:00", + "comment_author": "tasos-ale", + "comment_body": "We can use a `filter()` and remove the `if statement` since we only care for `NavigationEnd` ", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "443845848", + "pr_number": 5086, + "pr_file": "components/jupyter-web-app/frontend/src/app/resource-form/volume/volume.component.ts", + "created_at": "2020-06-22T21:50:01+00:00", + "commented_code": "}\n }\n\n// ----- onChange of the New / Existing Volume -----\n selectType(event): void {\n this.typeSelected = event.value;\n if (this.typeSelected === 'New') {\n this.volume.controls.name.setValue(this.currentVolName);\n }", + "repo_full_name": "kubeflow/kubeflow", + "discussion_comments": [ + { + "comment_id": "443845848", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 5086, + "pr_file": "components/jupyter-web-app/frontend/src/app/resource-form/volume/volume.component.ts", + "discussion_id": "443845848", + "commented_code": "@@ -58,6 +63,14 @@ export class VolumeComponent implements OnInit, OnDestroy {\n }\n }\n \n+// ----- onChange of the New / Existing Volume -----\n+ selectType(event): void {\n+ this.typeSelected = event.value;\n+ if (this.typeSelected === 'New') {\n+ this.volume.controls.name.setValue(this.currentVolName);\n+ }", + "comment_created_at": "2020-06-22T21:50:01+00:00", + "comment_author": "avdaredevil", + "comment_body": "*nit*: Prevent unnecessary code depth\r\n```suggestion\r\n if (this.typeSelected !== 'New') return;\r\n this.volume.controls.name.setValue(this.currentVolName);\r\n```", + "pr_file_module": null + }, + { + "comment_id": "443890027", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 5086, + "pr_file": "components/jupyter-web-app/frontend/src/app/resource-form/volume/volume.component.ts", + "discussion_id": "443845848", + "commented_code": "@@ -58,6 +63,14 @@ export class VolumeComponent implements OnInit, OnDestroy {\n }\n }\n \n+// ----- onChange of the New / Existing Volume -----\n+ selectType(event): void {\n+ this.typeSelected = event.value;\n+ if (this.typeSelected === 'New') {\n+ this.volume.controls.name.setValue(this.currentVolName);\n+ }", + "comment_created_at": "2020-06-23T00:08:14+00:00", + "comment_author": "lalithvaka", + "comment_body": "Will test this and commit again", + "pr_file_module": null + }, + { + "comment_id": "443902905", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 5086, + "pr_file": "components/jupyter-web-app/frontend/src/app/resource-form/volume/volume.component.ts", + "discussion_id": "443845848", + "commented_code": "@@ -58,6 +63,14 @@ export class VolumeComponent implements OnInit, OnDestroy {\n }\n }\n \n+// ----- onChange of the New / Existing Volume -----\n+ selectType(event): void {\n+ this.typeSelected = event.value;\n+ if (this.typeSelected === 'New') {\n+ this.volume.controls.name.setValue(this.currentVolName);\n+ }", + "comment_created_at": "2020-06-23T00:57:33+00:00", + "comment_author": "lalithvaka", + "comment_body": "Updated and submitted the commit", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/kubeflow-reduce-nesting-depth.md b/_reviewers/kubeflow-reduce-nesting-depth.md index fea0ef9..b31fb0c 100644 --- a/_reviewers/kubeflow-reduce-nesting-depth.md +++ b/_reviewers/kubeflow-reduce-nesting-depth.md @@ -46,75 +46,3 @@ this.router.events // Do something }); ``` - - -[ - { - "discussion_id": "1135937150", - "pr_number": 7030, - "pr_file": "components/centraldashboard-angular/frontend/src/app/services/namespace.service.ts", - "created_at": "2023-03-14T17:18:35+00:00", - "commented_code": "import { Injectable } from '@angular/core';\nimport { ActivatedRoute, NavigationEnd, Params, Router } from '@angular/router';\nimport { ReplaySubject } from 'rxjs';\nimport { getQueryParams, getUrlFragment } from '../shared/utils';\nimport { Namespace } from '../types/namespace';\nimport { EnvironmentService } from './environment.service';\nimport { LocalStorageService } from './local-storage.service';\n\n@Injectable({\n providedIn: 'root',\n})\nexport class CDBNamespaceService {\n public namespacesSubject = new ReplaySubject();\n public currentNamespaceSubject = new ReplaySubject();\n public allNamespacesAllowedSubject = new ReplaySubject();\n\n private namespaces: Namespace[];\n private user: string;\n private allNamespacesAllowed: boolean;\n private currentNamespace: Namespace | undefined;\n\n readonly ALL_NAMESPACES_ALLOWED_LIST = [\n 'jupyter',\n 'volumes',\n 'tensorboards',\n 'katib',\n 'models',\n ];\n readonly ALL_NAMESPACES = 'All namespaces';\n readonly allNamespacesOption: Namespace = {\n namespace: this.ALL_NAMESPACES,\n role: '',\n user: '',\n };\n\n constructor(\n private env: EnvironmentService,\n private localStorage: LocalStorageService,\n private router: Router,\n private route: ActivatedRoute,\n ) {\n this.env.user.subscribe((user: string) => {\n this.user = user;\n });\n\n this.env.namespaces.subscribe((namespaces: Namespace[]) => {\n namespaces = [this.allNamespacesOption, ...namespaces];\n this.namespaces = namespaces;\n this.namespacesSubject.next(namespaces);\n\n this.allNamespacesAllowed = this.calcAllNamespacesOption(this.router.url);\n this.allNamespacesAllowedSubject.next(this.allNamespacesAllowed);\n\n this.setCurrentNamespace(namespaces, this.user);\n });\n\n this.router.events.subscribe(event => {\n if (event instanceof NavigationEnd) {", - "repo_full_name": "kubeflow/kubeflow", - "discussion_comments": [ - { - "comment_id": "1135937150", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 7030, - "pr_file": "components/centraldashboard-angular/frontend/src/app/services/namespace.service.ts", - "discussion_id": "1135937150", - "commented_code": "@@ -0,0 +1,199 @@\n+import { Injectable } from '@angular/core';\n+import { ActivatedRoute, NavigationEnd, Params, Router } from '@angular/router';\n+import { ReplaySubject } from 'rxjs';\n+import { getQueryParams, getUrlFragment } from '../shared/utils';\n+import { Namespace } from '../types/namespace';\n+import { EnvironmentService } from './environment.service';\n+import { LocalStorageService } from './local-storage.service';\n+\n+@Injectable({\n+ providedIn: 'root',\n+})\n+export class CDBNamespaceService {\n+ public namespacesSubject = new ReplaySubject();\n+ public currentNamespaceSubject = new ReplaySubject();\n+ public allNamespacesAllowedSubject = new ReplaySubject();\n+\n+ private namespaces: Namespace[];\n+ private user: string;\n+ private allNamespacesAllowed: boolean;\n+ private currentNamespace: Namespace | undefined;\n+\n+ readonly ALL_NAMESPACES_ALLOWED_LIST = [\n+ 'jupyter',\n+ 'volumes',\n+ 'tensorboards',\n+ 'katib',\n+ 'models',\n+ ];\n+ readonly ALL_NAMESPACES = 'All namespaces';\n+ readonly allNamespacesOption: Namespace = {\n+ namespace: this.ALL_NAMESPACES,\n+ role: '',\n+ user: '',\n+ };\n+\n+ constructor(\n+ private env: EnvironmentService,\n+ private localStorage: LocalStorageService,\n+ private router: Router,\n+ private route: ActivatedRoute,\n+ ) {\n+ this.env.user.subscribe((user: string) => {\n+ this.user = user;\n+ });\n+\n+ this.env.namespaces.subscribe((namespaces: Namespace[]) => {\n+ namespaces = [this.allNamespacesOption, ...namespaces];\n+ this.namespaces = namespaces;\n+ this.namespacesSubject.next(namespaces);\n+\n+ this.allNamespacesAllowed = this.calcAllNamespacesOption(this.router.url);\n+ this.allNamespacesAllowedSubject.next(this.allNamespacesAllowed);\n+\n+ this.setCurrentNamespace(namespaces, this.user);\n+ });\n+\n+ this.router.events.subscribe(event => {\n+ if (event instanceof NavigationEnd) {", - "comment_created_at": "2023-03-14T17:18:35+00:00", - "comment_author": "tasos-ale", - "comment_body": "We can use a `filter()` and remove the `if statement` since we only care for `NavigationEnd` ", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "443845848", - "pr_number": 5086, - "pr_file": "components/jupyter-web-app/frontend/src/app/resource-form/volume/volume.component.ts", - "created_at": "2020-06-22T21:50:01+00:00", - "commented_code": "}\n }\n\n// ----- onChange of the New / Existing Volume -----\n selectType(event): void {\n this.typeSelected = event.value;\n if (this.typeSelected === 'New') {\n this.volume.controls.name.setValue(this.currentVolName);\n }", - "repo_full_name": "kubeflow/kubeflow", - "discussion_comments": [ - { - "comment_id": "443845848", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 5086, - "pr_file": "components/jupyter-web-app/frontend/src/app/resource-form/volume/volume.component.ts", - "discussion_id": "443845848", - "commented_code": "@@ -58,6 +63,14 @@ export class VolumeComponent implements OnInit, OnDestroy {\n }\n }\n \n+// ----- onChange of the New / Existing Volume -----\n+ selectType(event): void {\n+ this.typeSelected = event.value;\n+ if (this.typeSelected === 'New') {\n+ this.volume.controls.name.setValue(this.currentVolName);\n+ }", - "comment_created_at": "2020-06-22T21:50:01+00:00", - "comment_author": "avdaredevil", - "comment_body": "*nit*: Prevent unnecessary code depth\r\n```suggestion\r\n if (this.typeSelected !== 'New') return;\r\n this.volume.controls.name.setValue(this.currentVolName);\r\n```", - "pr_file_module": null - }, - { - "comment_id": "443890027", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 5086, - "pr_file": "components/jupyter-web-app/frontend/src/app/resource-form/volume/volume.component.ts", - "discussion_id": "443845848", - "commented_code": "@@ -58,6 +63,14 @@ export class VolumeComponent implements OnInit, OnDestroy {\n }\n }\n \n+// ----- onChange of the New / Existing Volume -----\n+ selectType(event): void {\n+ this.typeSelected = event.value;\n+ if (this.typeSelected === 'New') {\n+ this.volume.controls.name.setValue(this.currentVolName);\n+ }", - "comment_created_at": "2020-06-23T00:08:14+00:00", - "comment_author": "lalithvaka", - "comment_body": "Will test this and commit again", - "pr_file_module": null - }, - { - "comment_id": "443902905", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 5086, - "pr_file": "components/jupyter-web-app/frontend/src/app/resource-form/volume/volume.component.ts", - "discussion_id": "443845848", - "commented_code": "@@ -58,6 +63,14 @@ export class VolumeComponent implements OnInit, OnDestroy {\n }\n }\n \n+// ----- onChange of the New / Existing Volume -----\n+ selectType(event): void {\n+ this.typeSelected = event.value;\n+ if (this.typeSelected === 'New') {\n+ this.volume.controls.name.setValue(this.currentVolName);\n+ }", - "comment_created_at": "2020-06-23T00:57:33+00:00", - "comment_author": "lalithvaka", - "comment_body": "Updated and submitted the commit", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/kubeflow-safe-url-operations.json b/_reviewers/kubeflow-safe-url-operations.json new file mode 100644 index 0000000..633074f --- /dev/null +++ b/_reviewers/kubeflow-safe-url-operations.json @@ -0,0 +1,58 @@ +[ + { + "discussion_id": "1116709963", + "pr_number": 6953, + "pr_file": "components/centraldashboard-angular/frontend/src/app/pages/main-page/main-page.component.ts", + "created_at": "2023-02-24T09:20:02+00:00", + "commented_code": "computeBuildValue(label: string, buildValue: string): string {\n return `${label} ${buildValue}`;\n }\n\n handleDashboardLinks(links: DashboardLinks) {\n const { menuLinks, externalLinks, quickLinks, documentationItems } = links;\n this.menuLinks = menuLinks || [];\n this.externalLinks = externalLinks || [];\n this.quickLinks = quickLinks || [];\n this.documentationItems = documentationItems || [];\n }\n\n getUrlPath(url: string) {\n const urlWithoutFragment = url.split('#')[0];\n return this.appendPrefix(urlWithoutFragment);\n }\n\n getUrlFragment(url: string): string {\n const fragment = url.split('#')[1];\n return fragment;\n }\n\n appendPrefix(url: string): string {\n return '/_' + url;\n }\n\n isLinkActive(url: string): boolean {\n let browserUrl = this.router.url;\n browserUrl = appendBackslash(browserUrl);\n url = appendBackslash(url);\n /**\n * We need this condition because Pipelines Web App removes\n * the 's' from its path when navigating inside a Recurring\n * Run's details page\n */\n if (\n browserUrl.includes('pipeline/#/recurringrun') &&\n url.includes('/pipeline/#/recurringruns')\n ) {\n return true;\n }\n\n return browserUrl.includes(url);", + "repo_full_name": "kubeflow/kubeflow", + "discussion_comments": [ + { + "comment_id": "1116709963", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 6953, + "pr_file": "components/centraldashboard-angular/frontend/src/app/pages/main-page/main-page.component.ts", + "discussion_id": "1116709963", + "commented_code": "@@ -63,4 +83,45 @@ export class MainPageComponent implements OnInit, OnDestroy {\n computeBuildValue(label: string, buildValue: string): string {\n return `${label} ${buildValue}`;\n }\n+\n+ handleDashboardLinks(links: DashboardLinks) {\n+ const { menuLinks, externalLinks, quickLinks, documentationItems } = links;\n+ this.menuLinks = menuLinks || [];\n+ this.externalLinks = externalLinks || [];\n+ this.quickLinks = quickLinks || [];\n+ this.documentationItems = documentationItems || [];\n+ }\n+\n+ getUrlPath(url: string) {\n+ const urlWithoutFragment = url.split('#')[0];\n+ return this.appendPrefix(urlWithoutFragment);\n+ }\n+\n+ getUrlFragment(url: string): string {\n+ const fragment = url.split('#')[1];\n+ return fragment;\n+ }\n+\n+ appendPrefix(url: string): string {\n+ return '/_' + url;\n+ }\n+\n+ isLinkActive(url: string): boolean {\n+ let browserUrl = this.router.url;\n+ browserUrl = appendBackslash(browserUrl);\n+ url = appendBackslash(url);\n+ /**\n+ * We need this condition because Pipelines Web App removes\n+ * the 's' from its path when navigating inside a Recurring\n+ * Run's details page\n+ */\n+ if (\n+ browserUrl.includes('pipeline/#/recurringrun') &&\n+ url.includes('/pipeline/#/recurringruns')\n+ ) {\n+ return true;\n+ }\n+\n+ return browserUrl.includes(url);", + "comment_created_at": "2023-02-24T09:20:02+00:00", + "comment_author": "tasos-ale", + "comment_body": "It works, but it will be safer to use `startsWith()` in case we end up with a `url` inside the `browserUrl` that we don't want to match ", + "pr_file_module": null + }, + { + "comment_id": "1117244598", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 6953, + "pr_file": "components/centraldashboard-angular/frontend/src/app/pages/main-page/main-page.component.ts", + "discussion_id": "1116709963", + "commented_code": "@@ -63,4 +83,45 @@ export class MainPageComponent implements OnInit, OnDestroy {\n computeBuildValue(label: string, buildValue: string): string {\n return `${label} ${buildValue}`;\n }\n+\n+ handleDashboardLinks(links: DashboardLinks) {\n+ const { menuLinks, externalLinks, quickLinks, documentationItems } = links;\n+ this.menuLinks = menuLinks || [];\n+ this.externalLinks = externalLinks || [];\n+ this.quickLinks = quickLinks || [];\n+ this.documentationItems = documentationItems || [];\n+ }\n+\n+ getUrlPath(url: string) {\n+ const urlWithoutFragment = url.split('#')[0];\n+ return this.appendPrefix(urlWithoutFragment);\n+ }\n+\n+ getUrlFragment(url: string): string {\n+ const fragment = url.split('#')[1];\n+ return fragment;\n+ }\n+\n+ appendPrefix(url: string): string {\n+ return '/_' + url;\n+ }\n+\n+ isLinkActive(url: string): boolean {\n+ let browserUrl = this.router.url;\n+ browserUrl = appendBackslash(browserUrl);\n+ url = appendBackslash(url);\n+ /**\n+ * We need this condition because Pipelines Web App removes\n+ * the 's' from its path when navigating inside a Recurring\n+ * Run's details page\n+ */\n+ if (\n+ browserUrl.includes('pipeline/#/recurringrun') &&\n+ url.includes('/pipeline/#/recurringruns')\n+ ) {\n+ return true;\n+ }\n+\n+ return browserUrl.includes(url);", + "comment_created_at": "2023-02-24T16:08:55+00:00", + "comment_author": "orfeas-k", + "comment_body": "Sure, I removed the prefix from browser's URL and changed that to `startsWith()`.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1090461801", + "pr_number": 6895, + "pr_file": "components/centraldashboard-angular/frontend/src/app/shared/utils.ts", + "created_at": "2023-01-30T10:51:33+00:00", + "commented_code": "/**\n * We treat URLs with or without a trailing slash as the same\n * URL. Thus, in order to compare URLs, we need to use\n * appendBackslash for both URLS to avoid false statements\n * in cases where they only differ in the trailing slash.\n */\nexport function equalUrlPaths(firstUrl: string, secondUrl: string | undefined) {\n if (!firstUrl && !secondUrl) {\n console.warn(`Got undefined URLs ${firstUrl} and ${secondUrl}`);\n return true;\n }\n if (!firstUrl || !secondUrl) {\n return false;\n }\n firstUrl = appendBackslash(firstUrl);\n secondUrl = appendBackslash(secondUrl);\n return firstUrl === secondUrl;\n}\n\n/**\n * Appends a trailing slash either at the end of the URL\n * or at the end of path, just before query parameters\n */\nexport function appendBackslash(url: string): string {\n const href = window.location.origin + url;\n const urlObject = new URL(href);", + "repo_full_name": "kubeflow/kubeflow", + "discussion_comments": [ + { + "comment_id": "1090461801", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 6895, + "pr_file": "components/centraldashboard-angular/frontend/src/app/shared/utils.ts", + "discussion_id": "1090461801", + "commented_code": "@@ -0,0 +1,37 @@\n+/**\n+ * We treat URLs with or without a trailing slash as the same\n+ * URL. Thus, in order to compare URLs, we need to use\n+ * appendBackslash for both URLS to avoid false statements\n+ * in cases where they only differ in the trailing slash.\n+ */\n+export function equalUrlPaths(firstUrl: string, secondUrl: string | undefined) {\n+ if (!firstUrl && !secondUrl) {\n+ console.warn(`Got undefined URLs ${firstUrl} and ${secondUrl}`);\n+ return true;\n+ }\n+ if (!firstUrl || !secondUrl) {\n+ return false;\n+ }\n+ firstUrl = appendBackslash(firstUrl);\n+ secondUrl = appendBackslash(secondUrl);\n+ return firstUrl === secondUrl;\n+}\n+\n+/**\n+ * Appends a trailing slash either at the end of the URL\n+ * or at the end of path, just before query parameters\n+ */\n+export function appendBackslash(url: string): string {\n+ const href = window.location.origin + url;\n+ const urlObject = new URL(href);", + "comment_created_at": "2023-01-30T10:51:33+00:00", + "comment_author": "tasos-ale", + "comment_body": "We can make that change in case `url` does not start with `/`.\r\n```suggestion\r\n const urlObject = new URL(url, window.location.origin);\r\n```", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/kubeflow-safe-url-operations.md b/_reviewers/kubeflow-safe-url-operations.md index 62a7083..6a53345 100644 --- a/_reviewers/kubeflow-safe-url-operations.md +++ b/_reviewers/kubeflow-safe-url-operations.md @@ -37,63 +37,3 @@ const urlObject = new URL(url, window.location.origin); ``` These practices ensure reliable API interactions and prevent navigation edge cases when handling routes and endpoints. - - -[ - { - "discussion_id": "1116709963", - "pr_number": 6953, - "pr_file": "components/centraldashboard-angular/frontend/src/app/pages/main-page/main-page.component.ts", - "created_at": "2023-02-24T09:20:02+00:00", - "commented_code": "computeBuildValue(label: string, buildValue: string): string {\n return `${label} ${buildValue}`;\n }\n\n handleDashboardLinks(links: DashboardLinks) {\n const { menuLinks, externalLinks, quickLinks, documentationItems } = links;\n this.menuLinks = menuLinks || [];\n this.externalLinks = externalLinks || [];\n this.quickLinks = quickLinks || [];\n this.documentationItems = documentationItems || [];\n }\n\n getUrlPath(url: string) {\n const urlWithoutFragment = url.split('#')[0];\n return this.appendPrefix(urlWithoutFragment);\n }\n\n getUrlFragment(url: string): string {\n const fragment = url.split('#')[1];\n return fragment;\n }\n\n appendPrefix(url: string): string {\n return '/_' + url;\n }\n\n isLinkActive(url: string): boolean {\n let browserUrl = this.router.url;\n browserUrl = appendBackslash(browserUrl);\n url = appendBackslash(url);\n /**\n * We need this condition because Pipelines Web App removes\n * the 's' from its path when navigating inside a Recurring\n * Run's details page\n */\n if (\n browserUrl.includes('pipeline/#/recurringrun') &&\n url.includes('/pipeline/#/recurringruns')\n ) {\n return true;\n }\n\n return browserUrl.includes(url);", - "repo_full_name": "kubeflow/kubeflow", - "discussion_comments": [ - { - "comment_id": "1116709963", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 6953, - "pr_file": "components/centraldashboard-angular/frontend/src/app/pages/main-page/main-page.component.ts", - "discussion_id": "1116709963", - "commented_code": "@@ -63,4 +83,45 @@ export class MainPageComponent implements OnInit, OnDestroy {\n computeBuildValue(label: string, buildValue: string): string {\n return `${label} ${buildValue}`;\n }\n+\n+ handleDashboardLinks(links: DashboardLinks) {\n+ const { menuLinks, externalLinks, quickLinks, documentationItems } = links;\n+ this.menuLinks = menuLinks || [];\n+ this.externalLinks = externalLinks || [];\n+ this.quickLinks = quickLinks || [];\n+ this.documentationItems = documentationItems || [];\n+ }\n+\n+ getUrlPath(url: string) {\n+ const urlWithoutFragment = url.split('#')[0];\n+ return this.appendPrefix(urlWithoutFragment);\n+ }\n+\n+ getUrlFragment(url: string): string {\n+ const fragment = url.split('#')[1];\n+ return fragment;\n+ }\n+\n+ appendPrefix(url: string): string {\n+ return '/_' + url;\n+ }\n+\n+ isLinkActive(url: string): boolean {\n+ let browserUrl = this.router.url;\n+ browserUrl = appendBackslash(browserUrl);\n+ url = appendBackslash(url);\n+ /**\n+ * We need this condition because Pipelines Web App removes\n+ * the 's' from its path when navigating inside a Recurring\n+ * Run's details page\n+ */\n+ if (\n+ browserUrl.includes('pipeline/#/recurringrun') &&\n+ url.includes('/pipeline/#/recurringruns')\n+ ) {\n+ return true;\n+ }\n+\n+ return browserUrl.includes(url);", - "comment_created_at": "2023-02-24T09:20:02+00:00", - "comment_author": "tasos-ale", - "comment_body": "It works, but it will be safer to use `startsWith()` in case we end up with a `url` inside the `browserUrl` that we don't want to match ", - "pr_file_module": null - }, - { - "comment_id": "1117244598", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 6953, - "pr_file": "components/centraldashboard-angular/frontend/src/app/pages/main-page/main-page.component.ts", - "discussion_id": "1116709963", - "commented_code": "@@ -63,4 +83,45 @@ export class MainPageComponent implements OnInit, OnDestroy {\n computeBuildValue(label: string, buildValue: string): string {\n return `${label} ${buildValue}`;\n }\n+\n+ handleDashboardLinks(links: DashboardLinks) {\n+ const { menuLinks, externalLinks, quickLinks, documentationItems } = links;\n+ this.menuLinks = menuLinks || [];\n+ this.externalLinks = externalLinks || [];\n+ this.quickLinks = quickLinks || [];\n+ this.documentationItems = documentationItems || [];\n+ }\n+\n+ getUrlPath(url: string) {\n+ const urlWithoutFragment = url.split('#')[0];\n+ return this.appendPrefix(urlWithoutFragment);\n+ }\n+\n+ getUrlFragment(url: string): string {\n+ const fragment = url.split('#')[1];\n+ return fragment;\n+ }\n+\n+ appendPrefix(url: string): string {\n+ return '/_' + url;\n+ }\n+\n+ isLinkActive(url: string): boolean {\n+ let browserUrl = this.router.url;\n+ browserUrl = appendBackslash(browserUrl);\n+ url = appendBackslash(url);\n+ /**\n+ * We need this condition because Pipelines Web App removes\n+ * the 's' from its path when navigating inside a Recurring\n+ * Run's details page\n+ */\n+ if (\n+ browserUrl.includes('pipeline/#/recurringrun') &&\n+ url.includes('/pipeline/#/recurringruns')\n+ ) {\n+ return true;\n+ }\n+\n+ return browserUrl.includes(url);", - "comment_created_at": "2023-02-24T16:08:55+00:00", - "comment_author": "orfeas-k", - "comment_body": "Sure, I removed the prefix from browser's URL and changed that to `startsWith()`.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1090461801", - "pr_number": 6895, - "pr_file": "components/centraldashboard-angular/frontend/src/app/shared/utils.ts", - "created_at": "2023-01-30T10:51:33+00:00", - "commented_code": "/**\n * We treat URLs with or without a trailing slash as the same\n * URL. Thus, in order to compare URLs, we need to use\n * appendBackslash for both URLS to avoid false statements\n * in cases where they only differ in the trailing slash.\n */\nexport function equalUrlPaths(firstUrl: string, secondUrl: string | undefined) {\n if (!firstUrl && !secondUrl) {\n console.warn(`Got undefined URLs ${firstUrl} and ${secondUrl}`);\n return true;\n }\n if (!firstUrl || !secondUrl) {\n return false;\n }\n firstUrl = appendBackslash(firstUrl);\n secondUrl = appendBackslash(secondUrl);\n return firstUrl === secondUrl;\n}\n\n/**\n * Appends a trailing slash either at the end of the URL\n * or at the end of path, just before query parameters\n */\nexport function appendBackslash(url: string): string {\n const href = window.location.origin + url;\n const urlObject = new URL(href);", - "repo_full_name": "kubeflow/kubeflow", - "discussion_comments": [ - { - "comment_id": "1090461801", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 6895, - "pr_file": "components/centraldashboard-angular/frontend/src/app/shared/utils.ts", - "discussion_id": "1090461801", - "commented_code": "@@ -0,0 +1,37 @@\n+/**\n+ * We treat URLs with or without a trailing slash as the same\n+ * URL. Thus, in order to compare URLs, we need to use\n+ * appendBackslash for both URLS to avoid false statements\n+ * in cases where they only differ in the trailing slash.\n+ */\n+export function equalUrlPaths(firstUrl: string, secondUrl: string | undefined) {\n+ if (!firstUrl && !secondUrl) {\n+ console.warn(`Got undefined URLs ${firstUrl} and ${secondUrl}`);\n+ return true;\n+ }\n+ if (!firstUrl || !secondUrl) {\n+ return false;\n+ }\n+ firstUrl = appendBackslash(firstUrl);\n+ secondUrl = appendBackslash(secondUrl);\n+ return firstUrl === secondUrl;\n+}\n+\n+/**\n+ * Appends a trailing slash either at the end of the URL\n+ * or at the end of path, just before query parameters\n+ */\n+export function appendBackslash(url: string): string {\n+ const href = window.location.origin + url;\n+ const urlObject = new URL(href);", - "comment_created_at": "2023-01-30T10:51:33+00:00", - "comment_author": "tasos-ale", - "comment_body": "We can make that change in case `url` does not start with `/`.\r\n```suggestion\r\n const urlObject = new URL(url, window.location.origin);\r\n```", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/kubeflow-simplify-code-structure.json b/_reviewers/kubeflow-simplify-code-structure.json new file mode 100644 index 0000000..be57593 --- /dev/null +++ b/_reviewers/kubeflow-simplify-code-structure.json @@ -0,0 +1,192 @@ +[ + { + "discussion_id": "857440867", + "pr_number": 6435, + "pr_file": "components/profile-controller/controllers/profile_controller.go", + "created_at": "2022-04-25T09:39:08+00:00", + "commented_code": "},\n\t\t\t})\n\t\t}\n\t}\n\tif err := r.Update(ctx, profileIns); err != nil {\n\t\treturn err\n\t\tif err := r.Update(ctx, profileIns); err != nil {\n\t\t\treturn err\n\t\t}", + "repo_full_name": "kubeflow/kubeflow", + "discussion_comments": [ + { + "comment_id": "857440867", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 6435, + "pr_file": "components/profile-controller/controllers/profile_controller.go", + "discussion_id": "857440867", + "commented_code": "@@ -710,9 +710,10 @@ func (r *ProfileReconciler) PatchDefaultPluginSpec(ctx context.Context, profileI\n \t\t\t\t},\n \t\t\t})\n \t\t}\n-\t}\n-\tif err := r.Update(ctx, profileIns); err != nil {\n-\t\treturn err\n+\t\tif err := r.Update(ctx, profileIns); err != nil {\n+\t\t\treturn err\n+\t\t}\n+", + "comment_created_at": "2022-04-25T09:39:08+00:00", + "comment_author": "umka1332", + "comment_body": "You should put it even higher - there is no point in updating the profile unless something is changed in this profile. And `profileIns` is changed only in the innermost if statement.", + "pr_file_module": null + }, + { + "comment_id": "881192272", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 6435, + "pr_file": "components/profile-controller/controllers/profile_controller.go", + "discussion_id": "857440867", + "commented_code": "@@ -710,9 +710,10 @@ func (r *ProfileReconciler) PatchDefaultPluginSpec(ctx context.Context, profileI\n \t\t\t\t},\n \t\t\t})\n \t\t}\n-\t}\n-\tif err := r.Update(ctx, profileIns); err != nil {\n-\t\treturn err\n+\t\tif err := r.Update(ctx, profileIns); err != nil {\n+\t\t\treturn err\n+\t\t}\n+", + "comment_created_at": "2022-05-25T03:50:59+00:00", + "comment_author": "henrysecond1", + "comment_body": "Nice catch! Thank you", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "955983665", + "pr_number": 6628, + "pr_file": "components/notebook-controller/controllers/notebook_controller.go", + "created_at": "2022-08-26T12:17:52+00:00", + "commented_code": "return ctrl.Result{RequeueAfter: culler.GetRequeueTime()}, nil\n}\n\nfunc updateNotebookStatus(r *NotebookReconciler, nb *v1beta1.Notebook,\n\tsts *appsv1.StatefulSet, req ctrl.Request) (bool, error) {\n\n\tlog := r.Log.WithValues(\"notebook\", req.NamespacedName)\n\tctx := context.Background()\n\n\tstatusUpdate := false\n\t// Initialize Notebook CR Status\n\tif nb.Status.Conditions == nil {\n\t\tlog.Info(\"Initializing Notebook CR Status\", \"namespace\", nb.Namespace, \"name\", nb.Name)\n\t\tnewStatus := initializeStatus()\n\t\tnb.Status = newStatus\n\t\tstatusUpdate = true\n\t}\n\n\t// Update the Notebook CR status.readyReplicas if the status is changed\n\tif sts.Status.ReadyReplicas != nb.Status.ReadyReplicas {\n\t\tlog.Info(\"Updating status.ReadyReplicas\", \"namespace\", nb.Namespace, \"name\", nb.Name)\n\t\tnb.Status.ReadyReplicas = sts.Status.ReadyReplicas\n\t\tstatusUpdate = true\n\t}\n\n\t// Check the pod status\n\tpod := &corev1.Pod{}\n\tpodFound := false\n\terr := r.Get(ctx, types.NamespacedName{Name: sts.Name + \"-0\", Namespace: sts.Namespace}, pod)\n\tif err != nil && apierrs.IsNotFound(err) {\n\t\t// This should be reconciled by the StatefulSet\n\t\tlog.Info(\"Pod not found...\")\n\t\treturn podFound, nil\n\t} else if err != nil {\n\t\treturn podFound, err\n\t} else {\n\t\tpodFound = true\n\t\t// Update status of the CR using the ContainerState of\n\t\t// the container that has the same name as the CR.\n\t\t// If no container of same name is found, the state of the CR is not updated.\n\t\tif len(pod.Status.ContainerStatuses) > 0 {\n\t\t\tnotebookContainerFound := false\n\t\t\tfor i := range pod.Status.ContainerStatuses {\n\t\t\t\tif pod.Status.ContainerStatuses[i].Name != nb.Name {\n\t\t\t\t\tcontinue\n\t\t\t\t}\n\t\t\t\tif pod.Status.ContainerStatuses[i].State == nb.Status.ContainerState {\n\t\t\t\t\tcontinue\n\t\t\t\t}\n\t\t\t\t// Update Notebook CR's status.ContainerState\n\t\t\t\tlog.Info(\"Updating Notebook CR state: \", \"namespace\", nb.Namespace, \"name\", nb.Name)\n\t\t\t\tcs := pod.Status.ContainerStatuses[i].State\n\t\t\t\tnb.Status.ContainerState = cs\n\t\t\t\t// Mirroring pod condition\n\t\t\t\tnotebookConditions := []v1beta1.NotebookCondition{}\n\t\t\t\tfor i := range pod.Status.Conditions {\n\t\t\t\t\tcondition := PodCondToNotebookCond(pod.Status.Conditions[i])\n\t\t\t\t\tlog.Info(\"Mirroring pod condition: \", \"namespace\", nb.Namespace, \"name\", nb.Name, \"type\", condition.Type,\n\t\t\t\t\t\t\"status\", condition.Status, \"reason\", condition.Reason, \"message\", condition.Message)\n\t\t\t\t\tnotebookConditions = append(notebookConditions, condition)\n\t\t\t\t}\n\t\t\t\tnb.Status.Conditions = notebookConditions\n\n\t\t\t\tstatusUpdate = true\n\t\t\t\tnotebookContainerFound = true\n\t\t\t\tbreak\n\n\t\t\t}\n\t\t\tif !notebookContainerFound {\n\t\t\t\tlog.Error(nil, \"Could not find the Notebook container, will not update the status of the CR. No container has the same name as the CR.\", \"CR name:\", nb.Name)\n\t\t\t}\n\t\t}\n\t}\n\n\tif statusUpdate {\n\t\tlog.Info(\"Updating Notebook CR Status\", \"namespace\", nb.Namespace, \"name\", nb.Name)\n\t\terr = r.Status().Update(ctx, nb)\n\t\tif err != nil {\n\t\t\treturn podFound, err\n\t\t}\n\t}\n\n\treturn podFound, nil\n}\n\nfunc initializeStatus() v1beta1.NotebookStatus {", + "repo_full_name": "kubeflow/kubeflow", + "discussion_comments": [ + { + "comment_id": "955983665", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 6628, + "pr_file": "components/notebook-controller/controllers/notebook_controller.go", + "discussion_id": "955983665", + "commented_code": "@@ -324,6 +269,102 @@ func (r *NotebookReconciler) Reconcile(ctx context.Context, req ctrl.Request) (c\n \treturn ctrl.Result{RequeueAfter: culler.GetRequeueTime()}, nil\n }\n \n+func updateNotebookStatus(r *NotebookReconciler, nb *v1beta1.Notebook,\n+\tsts *appsv1.StatefulSet, req ctrl.Request) (bool, error) {\n+\n+\tlog := r.Log.WithValues(\"notebook\", req.NamespacedName)\n+\tctx := context.Background()\n+\n+\tstatusUpdate := false\n+\t// Initialize Notebook CR Status\n+\tif nb.Status.Conditions == nil {\n+\t\tlog.Info(\"Initializing Notebook CR Status\", \"namespace\", nb.Namespace, \"name\", nb.Name)\n+\t\tnewStatus := initializeStatus()\n+\t\tnb.Status = newStatus\n+\t\tstatusUpdate = true\n+\t}\n+\n+\t// Update the Notebook CR status.readyReplicas if the status is changed\n+\tif sts.Status.ReadyReplicas != nb.Status.ReadyReplicas {\n+\t\tlog.Info(\"Updating status.ReadyReplicas\", \"namespace\", nb.Namespace, \"name\", nb.Name)\n+\t\tnb.Status.ReadyReplicas = sts.Status.ReadyReplicas\n+\t\tstatusUpdate = true\n+\t}\n+\n+\t// Check the pod status\n+\tpod := &corev1.Pod{}\n+\tpodFound := false\n+\terr := r.Get(ctx, types.NamespacedName{Name: sts.Name + \"-0\", Namespace: sts.Namespace}, pod)\n+\tif err != nil && apierrs.IsNotFound(err) {\n+\t\t// This should be reconciled by the StatefulSet\n+\t\tlog.Info(\"Pod not found...\")\n+\t\treturn podFound, nil\n+\t} else if err != nil {\n+\t\treturn podFound, err\n+\t} else {\n+\t\tpodFound = true\n+\t\t// Update status of the CR using the ContainerState of\n+\t\t// the container that has the same name as the CR.\n+\t\t// If no container of same name is found, the state of the CR is not updated.\n+\t\tif len(pod.Status.ContainerStatuses) > 0 {\n+\t\t\tnotebookContainerFound := false\n+\t\t\tfor i := range pod.Status.ContainerStatuses {\n+\t\t\t\tif pod.Status.ContainerStatuses[i].Name != nb.Name {\n+\t\t\t\t\tcontinue\n+\t\t\t\t}\n+\t\t\t\tif pod.Status.ContainerStatuses[i].State == nb.Status.ContainerState {\n+\t\t\t\t\tcontinue\n+\t\t\t\t}\n+\t\t\t\t// Update Notebook CR's status.ContainerState\n+\t\t\t\tlog.Info(\"Updating Notebook CR state: \", \"namespace\", nb.Namespace, \"name\", nb.Name)\n+\t\t\t\tcs := pod.Status.ContainerStatuses[i].State\n+\t\t\t\tnb.Status.ContainerState = cs\n+\t\t\t\t// Mirroring pod condition\n+\t\t\t\tnotebookConditions := []v1beta1.NotebookCondition{}\n+\t\t\t\tfor i := range pod.Status.Conditions {\n+\t\t\t\t\tcondition := PodCondToNotebookCond(pod.Status.Conditions[i])\n+\t\t\t\t\tlog.Info(\"Mirroring pod condition: \", \"namespace\", nb.Namespace, \"name\", nb.Name, \"type\", condition.Type,\n+\t\t\t\t\t\t\"status\", condition.Status, \"reason\", condition.Reason, \"message\", condition.Message)\n+\t\t\t\t\tnotebookConditions = append(notebookConditions, condition)\n+\t\t\t\t}\n+\t\t\t\tnb.Status.Conditions = notebookConditions\n+\n+\t\t\t\tstatusUpdate = true\n+\t\t\t\tnotebookContainerFound = true\n+\t\t\t\tbreak\n+\n+\t\t\t}\n+\t\t\tif !notebookContainerFound {\n+\t\t\t\tlog.Error(nil, \"Could not find the Notebook container, will not update the status of the CR. No container has the same name as the CR.\", \"CR name:\", nb.Name)\n+\t\t\t}\n+\t\t}\n+\t}\n+\n+\tif statusUpdate {\n+\t\tlog.Info(\"Updating Notebook CR Status\", \"namespace\", nb.Namespace, \"name\", nb.Name)\n+\t\terr = r.Status().Update(ctx, nb)\n+\t\tif err != nil {\n+\t\t\treturn podFound, err\n+\t\t}\n+\t}\n+\n+\treturn podFound, nil\n+}\n+\n+func initializeStatus() v1beta1.NotebookStatus {", + "comment_created_at": "2022-08-26T12:17:52+00:00", + "comment_author": "kimwnasptd", + "comment_body": "Let's remove this function. We only need this initialization in one place and can simplify with something like:\r\n\r\n```golang\r\nstatus := v1beta1.NotebookStatus{\r\n Conditions: make([]v1beta1.NotebookCondition, 0),\r\n ReadyReplicas: sts.Status.ReadyReplicas,\r\n ContainerState: corev1.ContainerState{},\r\n}\r\n```", + "pr_file_module": null + }, + { + "comment_id": "957337073", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 6628, + "pr_file": "components/notebook-controller/controllers/notebook_controller.go", + "discussion_id": "955983665", + "commented_code": "@@ -324,6 +269,102 @@ func (r *NotebookReconciler) Reconcile(ctx context.Context, req ctrl.Request) (c\n \treturn ctrl.Result{RequeueAfter: culler.GetRequeueTime()}, nil\n }\n \n+func updateNotebookStatus(r *NotebookReconciler, nb *v1beta1.Notebook,\n+\tsts *appsv1.StatefulSet, req ctrl.Request) (bool, error) {\n+\n+\tlog := r.Log.WithValues(\"notebook\", req.NamespacedName)\n+\tctx := context.Background()\n+\n+\tstatusUpdate := false\n+\t// Initialize Notebook CR Status\n+\tif nb.Status.Conditions == nil {\n+\t\tlog.Info(\"Initializing Notebook CR Status\", \"namespace\", nb.Namespace, \"name\", nb.Name)\n+\t\tnewStatus := initializeStatus()\n+\t\tnb.Status = newStatus\n+\t\tstatusUpdate = true\n+\t}\n+\n+\t// Update the Notebook CR status.readyReplicas if the status is changed\n+\tif sts.Status.ReadyReplicas != nb.Status.ReadyReplicas {\n+\t\tlog.Info(\"Updating status.ReadyReplicas\", \"namespace\", nb.Namespace, \"name\", nb.Name)\n+\t\tnb.Status.ReadyReplicas = sts.Status.ReadyReplicas\n+\t\tstatusUpdate = true\n+\t}\n+\n+\t// Check the pod status\n+\tpod := &corev1.Pod{}\n+\tpodFound := false\n+\terr := r.Get(ctx, types.NamespacedName{Name: sts.Name + \"-0\", Namespace: sts.Namespace}, pod)\n+\tif err != nil && apierrs.IsNotFound(err) {\n+\t\t// This should be reconciled by the StatefulSet\n+\t\tlog.Info(\"Pod not found...\")\n+\t\treturn podFound, nil\n+\t} else if err != nil {\n+\t\treturn podFound, err\n+\t} else {\n+\t\tpodFound = true\n+\t\t// Update status of the CR using the ContainerState of\n+\t\t// the container that has the same name as the CR.\n+\t\t// If no container of same name is found, the state of the CR is not updated.\n+\t\tif len(pod.Status.ContainerStatuses) > 0 {\n+\t\t\tnotebookContainerFound := false\n+\t\t\tfor i := range pod.Status.ContainerStatuses {\n+\t\t\t\tif pod.Status.ContainerStatuses[i].Name != nb.Name {\n+\t\t\t\t\tcontinue\n+\t\t\t\t}\n+\t\t\t\tif pod.Status.ContainerStatuses[i].State == nb.Status.ContainerState {\n+\t\t\t\t\tcontinue\n+\t\t\t\t}\n+\t\t\t\t// Update Notebook CR's status.ContainerState\n+\t\t\t\tlog.Info(\"Updating Notebook CR state: \", \"namespace\", nb.Namespace, \"name\", nb.Name)\n+\t\t\t\tcs := pod.Status.ContainerStatuses[i].State\n+\t\t\t\tnb.Status.ContainerState = cs\n+\t\t\t\t// Mirroring pod condition\n+\t\t\t\tnotebookConditions := []v1beta1.NotebookCondition{}\n+\t\t\t\tfor i := range pod.Status.Conditions {\n+\t\t\t\t\tcondition := PodCondToNotebookCond(pod.Status.Conditions[i])\n+\t\t\t\t\tlog.Info(\"Mirroring pod condition: \", \"namespace\", nb.Namespace, \"name\", nb.Name, \"type\", condition.Type,\n+\t\t\t\t\t\t\"status\", condition.Status, \"reason\", condition.Reason, \"message\", condition.Message)\n+\t\t\t\t\tnotebookConditions = append(notebookConditions, condition)\n+\t\t\t\t}\n+\t\t\t\tnb.Status.Conditions = notebookConditions\n+\n+\t\t\t\tstatusUpdate = true\n+\t\t\t\tnotebookContainerFound = true\n+\t\t\t\tbreak\n+\n+\t\t\t}\n+\t\t\tif !notebookContainerFound {\n+\t\t\t\tlog.Error(nil, \"Could not find the Notebook container, will not update the status of the CR. No container has the same name as the CR.\", \"CR name:\", nb.Name)\n+\t\t\t}\n+\t\t}\n+\t}\n+\n+\tif statusUpdate {\n+\t\tlog.Info(\"Updating Notebook CR Status\", \"namespace\", nb.Namespace, \"name\", nb.Name)\n+\t\terr = r.Status().Update(ctx, nb)\n+\t\tif err != nil {\n+\t\t\treturn podFound, err\n+\t\t}\n+\t}\n+\n+\treturn podFound, nil\n+}\n+\n+func initializeStatus() v1beta1.NotebookStatus {", + "comment_created_at": "2022-08-29T13:27:57+00:00", + "comment_author": "apo-ger", + "comment_body": "Fixed!", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "604789893", + "pr_number": 5761, + "pr_file": "components/profile-controller/controllers/profile_controller.go", + "created_at": "2021-03-31T10:44:12+00:00", + "commented_code": "return\n}\n\nfunc enforceNamespaceLabelsFromConfig(ns *corev1.Namespace, logger logr.Logger) {\n\tif ns.Labels == nil {\n\t\tns.Labels = make(map[string]string)\n\t}\n\n\tfor k, v := range enforcedKubeflowNamespaceLabels {\n\t\texistingValue, ok := ns.Labels[k]\n\t\tif len(v) == 0 {\n\t\t\t// When there is an empty value, k should be removed.\n\t\t\tif ok {\n\t\t\t\tdelete(ns.Labels, k)\n\t\t\t}\n\t\t} else {\n\t\t\tif !ok || existingValue != v {", + "repo_full_name": "kubeflow/kubeflow", + "discussion_comments": [ + { + "comment_id": "604789893", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 5761, + "pr_file": "components/profile-controller/controllers/profile_controller.go", + "discussion_id": "604789893", + "commented_code": "@@ -617,15 +630,48 @@ func removeString(slice []string, s string) (result []string) {\n \treturn\n }\n \n+func enforceNamespaceLabelsFromConfig(ns *corev1.Namespace, logger logr.Logger) {\n+\tif ns.Labels == nil {\n+\t\tns.Labels = make(map[string]string)\n+\t}\n+\n+\tfor k, v := range enforcedKubeflowNamespaceLabels {\n+\t\texistingValue, ok := ns.Labels[k]\n+\t\tif len(v) == 0 {\n+\t\t\t// When there is an empty value, k should be removed.\n+\t\t\tif ok {\n+\t\t\t\tdelete(ns.Labels, k)\n+\t\t\t}\n+\t\t} else {\n+\t\t\tif !ok || existingValue != v {", + "comment_created_at": "2021-03-31T10:44:12+00:00", + "comment_author": "Bobgy", + "comment_body": "The if seems unnecessary, no matter what, you will set this label", + "pr_file_module": null + }, + { + "comment_id": "605286906", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 5761, + "pr_file": "components/profile-controller/controllers/profile_controller.go", + "discussion_id": "604789893", + "commented_code": "@@ -617,15 +630,48 @@ func removeString(slice []string, s string) (result []string) {\n \treturn\n }\n \n+func enforceNamespaceLabelsFromConfig(ns *corev1.Namespace, logger logr.Logger) {\n+\tif ns.Labels == nil {\n+\t\tns.Labels = make(map[string]string)\n+\t}\n+\n+\tfor k, v := range enforcedKubeflowNamespaceLabels {\n+\t\texistingValue, ok := ns.Labels[k]\n+\t\tif len(v) == 0 {\n+\t\t\t// When there is an empty value, k should be removed.\n+\t\t\tif ok {\n+\t\t\t\tdelete(ns.Labels, k)\n+\t\t\t}\n+\t\t} else {\n+\t\t\tif !ok || existingValue != v {", + "comment_created_at": "2021-03-31T23:38:03+00:00", + "comment_author": "zijianjoy", + "comment_body": "You are right! Removed this if condition.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "547974495", + "pr_number": 5314, + "pr_file": "components/notebook-controller/controllers/notebook_controller.go", + "created_at": "2020-12-23T14:05:47+00:00", + "commented_code": "} else {\n\t\t// Got the pod\n\t\tpodFound = true\n\t\tif len(pod.Status.ContainerStatuses) > 0 &&\n\t\t\tpod.Status.ContainerStatuses[0].State != instance.Status.ContainerState {\n\t\t\tlog.Info(\"Updating container state: \", \"namespace\", instance.Namespace, \"name\", instance.Name)\n\t\t\tcs := pod.Status.ContainerStatuses[0].State\n\t\t\tinstance.Status.ContainerState = cs\n\t\t\toldConditions := instance.Status.Conditions\n\t\t\tnewCondition := getNextCondition(cs)\n\t\t\t// Append new condition\n\t\t\tif len(oldConditions) == 0 || oldConditions[0].Type != newCondition.Type ||\n\t\t\t\toldConditions[0].Reason != newCondition.Reason ||\n\t\t\t\toldConditions[0].Message != newCondition.Message {\n\t\t\t\tlog.Info(\"Appending to conditions: \", \"namespace\", instance.Namespace, \"name\", instance.Name, \"type\", newCondition.Type, \"reason\", newCondition.Reason, \"message\", newCondition.Message)\n\t\t\t\tinstance.Status.Conditions = append([]v1beta1.NotebookCondition{newCondition}, oldConditions...)\n\n\t\t// Update status of the CR using the ContainerState of\n\t\t// the container that has the same name as the CR.\n\t\t// If no container of same name is found, the state of the CR is not updated.\n\t\tif len(pod.Status.ContainerStatuses) > 0 {\n\t\t\tnotebookContainerFound := false\n\t\t\tfor i := range pod.Status.ContainerStatuses {\n\t\t\t\tif pod.Status.ContainerStatuses[i].Name == instance.Name &&", + "repo_full_name": "kubeflow/kubeflow", + "discussion_comments": [ + { + "comment_id": "547974495", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 5314, + "pr_file": "components/notebook-controller/controllers/notebook_controller.go", + "discussion_id": "547974495", + "commented_code": "@@ -206,23 +206,37 @@ func (r *NotebookReconciler) Reconcile(req ctrl.Request) (ctrl.Result, error) {\n \t} else {\n \t\t// Got the pod\n \t\tpodFound = true\n-\t\tif len(pod.Status.ContainerStatuses) > 0 &&\n-\t\t\tpod.Status.ContainerStatuses[0].State != instance.Status.ContainerState {\n-\t\t\tlog.Info(\"Updating container state: \", \"namespace\", instance.Namespace, \"name\", instance.Name)\n-\t\t\tcs := pod.Status.ContainerStatuses[0].State\n-\t\t\tinstance.Status.ContainerState = cs\n-\t\t\toldConditions := instance.Status.Conditions\n-\t\t\tnewCondition := getNextCondition(cs)\n-\t\t\t// Append new condition\n-\t\t\tif len(oldConditions) == 0 || oldConditions[0].Type != newCondition.Type ||\n-\t\t\t\toldConditions[0].Reason != newCondition.Reason ||\n-\t\t\t\toldConditions[0].Message != newCondition.Message {\n-\t\t\t\tlog.Info(\"Appending to conditions: \", \"namespace\", instance.Namespace, \"name\", instance.Name, \"type\", newCondition.Type, \"reason\", newCondition.Reason, \"message\", newCondition.Message)\n-\t\t\t\tinstance.Status.Conditions = append([]v1beta1.NotebookCondition{newCondition}, oldConditions...)\n+\n+\t\t// Update status of the CR using the ContainerState of\n+\t\t// the container that has the same name as the CR.\n+\t\t// If no container of same name is found, the state of the CR is not updated.\n+\t\tif len(pod.Status.ContainerStatuses) > 0 {\n+\t\t\tnotebookContainerFound := false\n+\t\t\tfor i := range pod.Status.ContainerStatuses {\n+\t\t\t\tif pod.Status.ContainerStatuses[i].Name == instance.Name &&", + "comment_created_at": "2020-12-23T14:05:47+00:00", + "comment_author": "kimwnasptd", + "comment_body": "Could you break this if statement to the following ones:\r\n\r\n```golang\r\nif pod.Status.ContainerStatuses[i].Name != instance.Name {\r\n continue\r\n}\r\n\r\nif pod.Status.ContainerStatuses[i].State == instance.Status.ContainerState {\r\n continue\r\n}\r\n\r\nlog.Info(\"Updating Notebook CR state: \", \"namespace\", instance.Namespace, \"name\", instance.Name)\r\ncs := pod.Status.ContainerStatuses[i].State\r\n...\r\n```", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "473846716", + "pr_number": 5218, + "pr_file": "components/tensorboard-controller/controllers/tensorboard_controller.go", + "created_at": "2020-08-20T10:11:05+00:00", + "commented_code": "func isGoogleCloudPath(path string) bool {\n\treturn strings.HasPrefix(path, \"gs://\")\n}\n\nfunc isPVCPath(path string) bool {\n\treturn strings.HasPrefix(path, \"pvc://\")\n}\n\nfunc extractPVCName(path string) string {\n\ttrimmed := strings.TrimPrefix(path, \"pvc://\") //removing \"pvc://\" prefix\n\tending := strings.Index(trimmed, \"/\") //finding ending index of pvc-name string\n\tif ending == -1 {\n\t\treturn trimmed\n\t} else {\n\t\treturn trimmed[0:ending]\n\t}\n}\n\nfunc extractPVCSubPath(path string) string {\n\ttrimmed := strings.TrimPrefix(path, \"pvc://\") //removing \"pvc://\" prefix\n\tstart := strings.Index(trimmed, \"/\") //finding starting index of local path inside PVC\n\tif start == -1 || len(trimmed) == start+1 {\n\t\treturn \"\"\n\t} else {\n\t\treturn trimmed[start+1:]\n\t}\n}\n\n//Searches a corev1.PodList for running pods and returns\n//a running corev1.Pod (if exists)\nfunc findRunningPod(pods *corev1.PodList) corev1.Pod {\n\tfor _, pod := range pods.Items {\n\t\tif pod.Status.Phase == \"Running\" {\n\t\t\treturn pod\n\t\t}\n\t}\n\n\treturn corev1.Pod{}\n}\n\nfunc extractNodeName(pod corev1.Pod) string {\n\treturn pod.Spec.NodeName\n}\n\nfunc generateNodeAffinity(affinity *corev1.Affinity, pvcname string, r *TensorboardReconciler, tb *tensorboardv1alpha1.Tensorboard) error {\n\tvar nodename string\n\tvar pods = &corev1.PodList{}\n\tvar pod corev1.Pod\n\n\t//List all pods that access the PVC that has ClaimName: pvcname.\n\t//NOTE: We use only one custom field selector to filter out pods that don't use this PVC.\n\tif err := r.List(context.Background(), pods, client.InNamespace(tb.Namespace), client.MatchingFields{\"spec.volumes.persistentvolumeclaim.claimname\": pvcname}); err != nil {\n\t\treturn fmt.Errorf(\"List pods error: %v\", err)\n\t}\n\n\t//Find a running pod that uses the PVC.\n\tpod = findRunningPod(pods)\n\n\t//If there is no running pod that uses the PVC, then: nodename == \"\".\n\t//Else, nodename contains the name of the node that the pod is running on.\n\tnodename = extractNodeName(pod)\n\n\t//In this case, there is a running pod that uses the PVC, therefore we create\n\t//a nodeAffinity field so that the Tensorboard server will be scheduled (if possible)\n\t//on the same node as the running pod.\n\tif nodename != \"\" {", + "repo_full_name": "kubeflow/kubeflow", + "discussion_comments": [ + { + "comment_id": "473846716", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 5218, + "pr_file": "components/tensorboard-controller/controllers/tensorboard_controller.go", + "discussion_id": "473846716", + "commented_code": "@@ -281,3 +328,102 @@ func isCloudPath(path string) bool {\n func isGoogleCloudPath(path string) bool {\n \treturn strings.HasPrefix(path, \"gs://\")\n }\n+\n+func isPVCPath(path string) bool {\n+\treturn strings.HasPrefix(path, \"pvc://\")\n+}\n+\n+func extractPVCName(path string) string {\n+\ttrimmed := strings.TrimPrefix(path, \"pvc://\") //removing \"pvc://\" prefix\n+\tending := strings.Index(trimmed, \"/\") //finding ending index of pvc-name string\n+\tif ending == -1 {\n+\t\treturn trimmed\n+\t} else {\n+\t\treturn trimmed[0:ending]\n+\t}\n+}\n+\n+func extractPVCSubPath(path string) string {\n+\ttrimmed := strings.TrimPrefix(path, \"pvc://\") //removing \"pvc://\" prefix\n+\tstart := strings.Index(trimmed, \"/\") //finding starting index of local path inside PVC\n+\tif start == -1 || len(trimmed) == start+1 {\n+\t\treturn \"\"\n+\t} else {\n+\t\treturn trimmed[start+1:]\n+\t}\n+}\n+\n+//Searches a corev1.PodList for running pods and returns\n+//a running corev1.Pod (if exists)\n+func findRunningPod(pods *corev1.PodList) corev1.Pod {\n+\tfor _, pod := range pods.Items {\n+\t\tif pod.Status.Phase == \"Running\" {\n+\t\t\treturn pod\n+\t\t}\n+\t}\n+\n+\treturn corev1.Pod{}\n+}\n+\n+func extractNodeName(pod corev1.Pod) string {\n+\treturn pod.Spec.NodeName\n+}\n+\n+func generateNodeAffinity(affinity *corev1.Affinity, pvcname string, r *TensorboardReconciler, tb *tensorboardv1alpha1.Tensorboard) error {\n+\tvar nodename string\n+\tvar pods = &corev1.PodList{}\n+\tvar pod corev1.Pod\n+\n+\t//List all pods that access the PVC that has ClaimName: pvcname.\n+\t//NOTE: We use only one custom field selector to filter out pods that don't use this PVC.\n+\tif err := r.List(context.Background(), pods, client.InNamespace(tb.Namespace), client.MatchingFields{\"spec.volumes.persistentvolumeclaim.claimname\": pvcname}); err != nil {\n+\t\treturn fmt.Errorf(\"List pods error: %v\", err)\n+\t}\n+\n+\t//Find a running pod that uses the PVC.\n+\tpod = findRunningPod(pods)\n+\n+\t//If there is no running pod that uses the PVC, then: nodename == \"\".\n+\t//Else, nodename contains the name of the node that the pod is running on.\n+\tnodename = extractNodeName(pod)\n+\n+\t//In this case, there is a running pod that uses the PVC, therefore we create\n+\t//a nodeAffinity field so that the Tensorboard server will be scheduled (if possible)\n+\t//on the same node as the running pod.\n+\tif nodename != \"\" {", + "comment_created_at": "2020-08-20T10:11:05+00:00", + "comment_author": "kimwnasptd", + "comment_body": "nit: You could have an `if nodename == \"\" {` here and return nil if that holds.\r\n\r\nThen you could move the code below one tab to the left, outside of an if clause, to make it a little bit more simple to read", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "320263186", + "pr_number": 4040, + "pr_file": "bootstrap/pkg/kfapp/apply/apply.go", + "created_at": "2019-09-03T13:12:21+00:00", + "commented_code": "package apply\n\nimport (\n\t\"errors\"\n\t\"fmt\"\n\t\"io\"\n\t\"os\"\n\n\tkftypes \"github.com/kubeflow/kubeflow/bootstrap/v3/pkg/apis/apps\"\n\tkfdefsv3 \"github.com/kubeflow/kubeflow/bootstrap/v3/pkg/apis/apps/kfdef/v1alpha1\"\n\t\"github.com/kubeflow/kubeflow/bootstrap/v3/pkg/kfapp/coordinator\"\n\tlog \"github.com/sirupsen/logrus\"\n)\n\n// BootstrapKubeflow is used by the kfctl apply sub-command to take in a configfile\n// as a flag and boostrap a KfApp and deploy it\nfunc BootstrapKubeflow(configFilePath string, kfResource kftypes.ResourceEnum) error {\n\t// Set default app name to kf-app\n\tappName := \"kf-app\"\n\n\t// Construct KfDef from the configFilePath provided\n\tkfDef := &kfdefsv3.KfDef{}\n\tkfDef, err := kfdefsv3.LoadKFDefFromURI(configFilePath)\n\tif err != nil {\n\t\tlog.Printf(\"Unable to create KfDef from config file: %v\", err)\n\t}\n\tif kfDef.Name != \"\" {\n\t\tlog.Warnf(\"Overriding KfDef.Spec.Name; old value %v; new value %v\", kfDef.Name, appName)\n\t}\n\tkfDef.Name = appName", + "repo_full_name": "kubeflow/kubeflow", + "discussion_comments": [ + { + "comment_id": "320263186", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 4040, + "pr_file": "bootstrap/pkg/kfapp/apply/apply.go", + "discussion_id": "320263186", + "commented_code": "@@ -0,0 +1,103 @@\n+package apply\n+\n+import (\n+\t\"errors\"\n+\t\"fmt\"\n+\t\"io\"\n+\t\"os\"\n+\n+\tkftypes \"github.com/kubeflow/kubeflow/bootstrap/v3/pkg/apis/apps\"\n+\tkfdefsv3 \"github.com/kubeflow/kubeflow/bootstrap/v3/pkg/apis/apps/kfdef/v1alpha1\"\n+\t\"github.com/kubeflow/kubeflow/bootstrap/v3/pkg/kfapp/coordinator\"\n+\tlog \"github.com/sirupsen/logrus\"\n+)\n+\n+// BootstrapKubeflow is used by the kfctl apply sub-command to take in a configfile\n+// as a flag and boostrap a KfApp and deploy it\n+func BootstrapKubeflow(configFilePath string, kfResource kftypes.ResourceEnum) error {\n+\t// Set default app name to kf-app\n+\tappName := \"kf-app\"\n+\n+\t// Construct KfDef from the configFilePath provided\n+\tkfDef := &kfdefsv3.KfDef{}\n+\tkfDef, err := kfdefsv3.LoadKFDefFromURI(configFilePath)\n+\tif err != nil {\n+\t\tlog.Printf(\"Unable to create KfDef from config file: %v\", err)\n+\t}\n+\tif kfDef.Name != \"\" {\n+\t\tlog.Warnf(\"Overriding KfDef.Spec.Name; old value %v; new value %v\", kfDef.Name, appName)\n+\t}\n+\tkfDef.Name = appName", + "comment_created_at": "2019-09-03T13:12:21+00:00", + "comment_author": "yanniszark", + "comment_body": "This line should be inside the if statement.\r\nNow it always overrides.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "300474899", + "pr_number": 3607, + "pr_file": "bootstrap/v2/pkg/kfapp/existing_arrikto/existing.go", + "created_at": "2019-07-04T17:54:25+00:00", + "commented_code": "}\n\treturn string(b)\n}\n\nfunc initialiseHMACSecretFromEnvOrGen(secEnv string, reqLen int) []byte {\n\tenvContent := os.Getenv(secEnv)\n\n\tif len(envContent) < reqLen {\n\t\tlog.Println(\"WARNING: HMAC secret not provided or secret too short. Generating a random one from nonce characters.\")\n\t\treturn []byte(createNonce(reqLen))\n\t}\n\n\treturn []byte(envContent)\n}\n\nfunc createNonce(length int) string {", + "repo_full_name": "kubeflow/kubeflow", + "discussion_comments": [ + { + "comment_id": "300474899", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 3607, + "pr_file": "bootstrap/v2/pkg/kfapp/existing_arrikto/existing.go", + "discussion_id": "300474899", + "commented_code": "@@ -470,3 +480,24 @@ func genRandomString(length int) string {\n \t}\n \treturn string(b)\n }\n+\n+func initialiseHMACSecretFromEnvOrGen(secEnv string, reqLen int) []byte {\n+\tenvContent := os.Getenv(secEnv)\n+\n+\tif len(envContent) < reqLen {\n+\t\tlog.Println(\"WARNING: HMAC secret not provided or secret too short. Generating a random one from nonce characters.\")\n+\t\treturn []byte(createNonce(reqLen))\n+\t}\n+\n+\treturn []byte(envContent)\n+}\n+\n+func createNonce(length int) string {", + "comment_created_at": "2019-07-04T17:54:25+00:00", + "comment_author": "SachinVarghese", + "comment_body": "You can use the genRandomString function here and the createNonce is not really required.", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/kubeflow-simplify-code-structure.md b/_reviewers/kubeflow-simplify-code-structure.md index b3d5f78..1df0711 100644 --- a/_reviewers/kubeflow-simplify-code-structure.md +++ b/_reviewers/kubeflow-simplify-code-structure.md @@ -84,197 +84,3 @@ if nodename == "" { 5. **Reuse existing functions** instead of creating new ones with similar functionality. This makes your code more readable, maintainable, and less error-prone. - - -[ - { - "discussion_id": "857440867", - "pr_number": 6435, - "pr_file": "components/profile-controller/controllers/profile_controller.go", - "created_at": "2022-04-25T09:39:08+00:00", - "commented_code": "},\n\t\t\t})\n\t\t}\n\t}\n\tif err := r.Update(ctx, profileIns); err != nil {\n\t\treturn err\n\t\tif err := r.Update(ctx, profileIns); err != nil {\n\t\t\treturn err\n\t\t}", - "repo_full_name": "kubeflow/kubeflow", - "discussion_comments": [ - { - "comment_id": "857440867", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 6435, - "pr_file": "components/profile-controller/controllers/profile_controller.go", - "discussion_id": "857440867", - "commented_code": "@@ -710,9 +710,10 @@ func (r *ProfileReconciler) PatchDefaultPluginSpec(ctx context.Context, profileI\n \t\t\t\t},\n \t\t\t})\n \t\t}\n-\t}\n-\tif err := r.Update(ctx, profileIns); err != nil {\n-\t\treturn err\n+\t\tif err := r.Update(ctx, profileIns); err != nil {\n+\t\t\treturn err\n+\t\t}\n+", - "comment_created_at": "2022-04-25T09:39:08+00:00", - "comment_author": "umka1332", - "comment_body": "You should put it even higher - there is no point in updating the profile unless something is changed in this profile. And `profileIns` is changed only in the innermost if statement.", - "pr_file_module": null - }, - { - "comment_id": "881192272", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 6435, - "pr_file": "components/profile-controller/controllers/profile_controller.go", - "discussion_id": "857440867", - "commented_code": "@@ -710,9 +710,10 @@ func (r *ProfileReconciler) PatchDefaultPluginSpec(ctx context.Context, profileI\n \t\t\t\t},\n \t\t\t})\n \t\t}\n-\t}\n-\tif err := r.Update(ctx, profileIns); err != nil {\n-\t\treturn err\n+\t\tif err := r.Update(ctx, profileIns); err != nil {\n+\t\t\treturn err\n+\t\t}\n+", - "comment_created_at": "2022-05-25T03:50:59+00:00", - "comment_author": "henrysecond1", - "comment_body": "Nice catch! Thank you", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "955983665", - "pr_number": 6628, - "pr_file": "components/notebook-controller/controllers/notebook_controller.go", - "created_at": "2022-08-26T12:17:52+00:00", - "commented_code": "return ctrl.Result{RequeueAfter: culler.GetRequeueTime()}, nil\n}\n\nfunc updateNotebookStatus(r *NotebookReconciler, nb *v1beta1.Notebook,\n\tsts *appsv1.StatefulSet, req ctrl.Request) (bool, error) {\n\n\tlog := r.Log.WithValues(\"notebook\", req.NamespacedName)\n\tctx := context.Background()\n\n\tstatusUpdate := false\n\t// Initialize Notebook CR Status\n\tif nb.Status.Conditions == nil {\n\t\tlog.Info(\"Initializing Notebook CR Status\", \"namespace\", nb.Namespace, \"name\", nb.Name)\n\t\tnewStatus := initializeStatus()\n\t\tnb.Status = newStatus\n\t\tstatusUpdate = true\n\t}\n\n\t// Update the Notebook CR status.readyReplicas if the status is changed\n\tif sts.Status.ReadyReplicas != nb.Status.ReadyReplicas {\n\t\tlog.Info(\"Updating status.ReadyReplicas\", \"namespace\", nb.Namespace, \"name\", nb.Name)\n\t\tnb.Status.ReadyReplicas = sts.Status.ReadyReplicas\n\t\tstatusUpdate = true\n\t}\n\n\t// Check the pod status\n\tpod := &corev1.Pod{}\n\tpodFound := false\n\terr := r.Get(ctx, types.NamespacedName{Name: sts.Name + \"-0\", Namespace: sts.Namespace}, pod)\n\tif err != nil && apierrs.IsNotFound(err) {\n\t\t// This should be reconciled by the StatefulSet\n\t\tlog.Info(\"Pod not found...\")\n\t\treturn podFound, nil\n\t} else if err != nil {\n\t\treturn podFound, err\n\t} else {\n\t\tpodFound = true\n\t\t// Update status of the CR using the ContainerState of\n\t\t// the container that has the same name as the CR.\n\t\t// If no container of same name is found, the state of the CR is not updated.\n\t\tif len(pod.Status.ContainerStatuses) > 0 {\n\t\t\tnotebookContainerFound := false\n\t\t\tfor i := range pod.Status.ContainerStatuses {\n\t\t\t\tif pod.Status.ContainerStatuses[i].Name != nb.Name {\n\t\t\t\t\tcontinue\n\t\t\t\t}\n\t\t\t\tif pod.Status.ContainerStatuses[i].State == nb.Status.ContainerState {\n\t\t\t\t\tcontinue\n\t\t\t\t}\n\t\t\t\t// Update Notebook CR's status.ContainerState\n\t\t\t\tlog.Info(\"Updating Notebook CR state: \", \"namespace\", nb.Namespace, \"name\", nb.Name)\n\t\t\t\tcs := pod.Status.ContainerStatuses[i].State\n\t\t\t\tnb.Status.ContainerState = cs\n\t\t\t\t// Mirroring pod condition\n\t\t\t\tnotebookConditions := []v1beta1.NotebookCondition{}\n\t\t\t\tfor i := range pod.Status.Conditions {\n\t\t\t\t\tcondition := PodCondToNotebookCond(pod.Status.Conditions[i])\n\t\t\t\t\tlog.Info(\"Mirroring pod condition: \", \"namespace\", nb.Namespace, \"name\", nb.Name, \"type\", condition.Type,\n\t\t\t\t\t\t\"status\", condition.Status, \"reason\", condition.Reason, \"message\", condition.Message)\n\t\t\t\t\tnotebookConditions = append(notebookConditions, condition)\n\t\t\t\t}\n\t\t\t\tnb.Status.Conditions = notebookConditions\n\n\t\t\t\tstatusUpdate = true\n\t\t\t\tnotebookContainerFound = true\n\t\t\t\tbreak\n\n\t\t\t}\n\t\t\tif !notebookContainerFound {\n\t\t\t\tlog.Error(nil, \"Could not find the Notebook container, will not update the status of the CR. No container has the same name as the CR.\", \"CR name:\", nb.Name)\n\t\t\t}\n\t\t}\n\t}\n\n\tif statusUpdate {\n\t\tlog.Info(\"Updating Notebook CR Status\", \"namespace\", nb.Namespace, \"name\", nb.Name)\n\t\terr = r.Status().Update(ctx, nb)\n\t\tif err != nil {\n\t\t\treturn podFound, err\n\t\t}\n\t}\n\n\treturn podFound, nil\n}\n\nfunc initializeStatus() v1beta1.NotebookStatus {", - "repo_full_name": "kubeflow/kubeflow", - "discussion_comments": [ - { - "comment_id": "955983665", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 6628, - "pr_file": "components/notebook-controller/controllers/notebook_controller.go", - "discussion_id": "955983665", - "commented_code": "@@ -324,6 +269,102 @@ func (r *NotebookReconciler) Reconcile(ctx context.Context, req ctrl.Request) (c\n \treturn ctrl.Result{RequeueAfter: culler.GetRequeueTime()}, nil\n }\n \n+func updateNotebookStatus(r *NotebookReconciler, nb *v1beta1.Notebook,\n+\tsts *appsv1.StatefulSet, req ctrl.Request) (bool, error) {\n+\n+\tlog := r.Log.WithValues(\"notebook\", req.NamespacedName)\n+\tctx := context.Background()\n+\n+\tstatusUpdate := false\n+\t// Initialize Notebook CR Status\n+\tif nb.Status.Conditions == nil {\n+\t\tlog.Info(\"Initializing Notebook CR Status\", \"namespace\", nb.Namespace, \"name\", nb.Name)\n+\t\tnewStatus := initializeStatus()\n+\t\tnb.Status = newStatus\n+\t\tstatusUpdate = true\n+\t}\n+\n+\t// Update the Notebook CR status.readyReplicas if the status is changed\n+\tif sts.Status.ReadyReplicas != nb.Status.ReadyReplicas {\n+\t\tlog.Info(\"Updating status.ReadyReplicas\", \"namespace\", nb.Namespace, \"name\", nb.Name)\n+\t\tnb.Status.ReadyReplicas = sts.Status.ReadyReplicas\n+\t\tstatusUpdate = true\n+\t}\n+\n+\t// Check the pod status\n+\tpod := &corev1.Pod{}\n+\tpodFound := false\n+\terr := r.Get(ctx, types.NamespacedName{Name: sts.Name + \"-0\", Namespace: sts.Namespace}, pod)\n+\tif err != nil && apierrs.IsNotFound(err) {\n+\t\t// This should be reconciled by the StatefulSet\n+\t\tlog.Info(\"Pod not found...\")\n+\t\treturn podFound, nil\n+\t} else if err != nil {\n+\t\treturn podFound, err\n+\t} else {\n+\t\tpodFound = true\n+\t\t// Update status of the CR using the ContainerState of\n+\t\t// the container that has the same name as the CR.\n+\t\t// If no container of same name is found, the state of the CR is not updated.\n+\t\tif len(pod.Status.ContainerStatuses) > 0 {\n+\t\t\tnotebookContainerFound := false\n+\t\t\tfor i := range pod.Status.ContainerStatuses {\n+\t\t\t\tif pod.Status.ContainerStatuses[i].Name != nb.Name {\n+\t\t\t\t\tcontinue\n+\t\t\t\t}\n+\t\t\t\tif pod.Status.ContainerStatuses[i].State == nb.Status.ContainerState {\n+\t\t\t\t\tcontinue\n+\t\t\t\t}\n+\t\t\t\t// Update Notebook CR's status.ContainerState\n+\t\t\t\tlog.Info(\"Updating Notebook CR state: \", \"namespace\", nb.Namespace, \"name\", nb.Name)\n+\t\t\t\tcs := pod.Status.ContainerStatuses[i].State\n+\t\t\t\tnb.Status.ContainerState = cs\n+\t\t\t\t// Mirroring pod condition\n+\t\t\t\tnotebookConditions := []v1beta1.NotebookCondition{}\n+\t\t\t\tfor i := range pod.Status.Conditions {\n+\t\t\t\t\tcondition := PodCondToNotebookCond(pod.Status.Conditions[i])\n+\t\t\t\t\tlog.Info(\"Mirroring pod condition: \", \"namespace\", nb.Namespace, \"name\", nb.Name, \"type\", condition.Type,\n+\t\t\t\t\t\t\"status\", condition.Status, \"reason\", condition.Reason, \"message\", condition.Message)\n+\t\t\t\t\tnotebookConditions = append(notebookConditions, condition)\n+\t\t\t\t}\n+\t\t\t\tnb.Status.Conditions = notebookConditions\n+\n+\t\t\t\tstatusUpdate = true\n+\t\t\t\tnotebookContainerFound = true\n+\t\t\t\tbreak\n+\n+\t\t\t}\n+\t\t\tif !notebookContainerFound {\n+\t\t\t\tlog.Error(nil, \"Could not find the Notebook container, will not update the status of the CR. No container has the same name as the CR.\", \"CR name:\", nb.Name)\n+\t\t\t}\n+\t\t}\n+\t}\n+\n+\tif statusUpdate {\n+\t\tlog.Info(\"Updating Notebook CR Status\", \"namespace\", nb.Namespace, \"name\", nb.Name)\n+\t\terr = r.Status().Update(ctx, nb)\n+\t\tif err != nil {\n+\t\t\treturn podFound, err\n+\t\t}\n+\t}\n+\n+\treturn podFound, nil\n+}\n+\n+func initializeStatus() v1beta1.NotebookStatus {", - "comment_created_at": "2022-08-26T12:17:52+00:00", - "comment_author": "kimwnasptd", - "comment_body": "Let's remove this function. We only need this initialization in one place and can simplify with something like:\r\n\r\n```golang\r\nstatus := v1beta1.NotebookStatus{\r\n Conditions: make([]v1beta1.NotebookCondition, 0),\r\n ReadyReplicas: sts.Status.ReadyReplicas,\r\n ContainerState: corev1.ContainerState{},\r\n}\r\n```", - "pr_file_module": null - }, - { - "comment_id": "957337073", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 6628, - "pr_file": "components/notebook-controller/controllers/notebook_controller.go", - "discussion_id": "955983665", - "commented_code": "@@ -324,6 +269,102 @@ func (r *NotebookReconciler) Reconcile(ctx context.Context, req ctrl.Request) (c\n \treturn ctrl.Result{RequeueAfter: culler.GetRequeueTime()}, nil\n }\n \n+func updateNotebookStatus(r *NotebookReconciler, nb *v1beta1.Notebook,\n+\tsts *appsv1.StatefulSet, req ctrl.Request) (bool, error) {\n+\n+\tlog := r.Log.WithValues(\"notebook\", req.NamespacedName)\n+\tctx := context.Background()\n+\n+\tstatusUpdate := false\n+\t// Initialize Notebook CR Status\n+\tif nb.Status.Conditions == nil {\n+\t\tlog.Info(\"Initializing Notebook CR Status\", \"namespace\", nb.Namespace, \"name\", nb.Name)\n+\t\tnewStatus := initializeStatus()\n+\t\tnb.Status = newStatus\n+\t\tstatusUpdate = true\n+\t}\n+\n+\t// Update the Notebook CR status.readyReplicas if the status is changed\n+\tif sts.Status.ReadyReplicas != nb.Status.ReadyReplicas {\n+\t\tlog.Info(\"Updating status.ReadyReplicas\", \"namespace\", nb.Namespace, \"name\", nb.Name)\n+\t\tnb.Status.ReadyReplicas = sts.Status.ReadyReplicas\n+\t\tstatusUpdate = true\n+\t}\n+\n+\t// Check the pod status\n+\tpod := &corev1.Pod{}\n+\tpodFound := false\n+\terr := r.Get(ctx, types.NamespacedName{Name: sts.Name + \"-0\", Namespace: sts.Namespace}, pod)\n+\tif err != nil && apierrs.IsNotFound(err) {\n+\t\t// This should be reconciled by the StatefulSet\n+\t\tlog.Info(\"Pod not found...\")\n+\t\treturn podFound, nil\n+\t} else if err != nil {\n+\t\treturn podFound, err\n+\t} else {\n+\t\tpodFound = true\n+\t\t// Update status of the CR using the ContainerState of\n+\t\t// the container that has the same name as the CR.\n+\t\t// If no container of same name is found, the state of the CR is not updated.\n+\t\tif len(pod.Status.ContainerStatuses) > 0 {\n+\t\t\tnotebookContainerFound := false\n+\t\t\tfor i := range pod.Status.ContainerStatuses {\n+\t\t\t\tif pod.Status.ContainerStatuses[i].Name != nb.Name {\n+\t\t\t\t\tcontinue\n+\t\t\t\t}\n+\t\t\t\tif pod.Status.ContainerStatuses[i].State == nb.Status.ContainerState {\n+\t\t\t\t\tcontinue\n+\t\t\t\t}\n+\t\t\t\t// Update Notebook CR's status.ContainerState\n+\t\t\t\tlog.Info(\"Updating Notebook CR state: \", \"namespace\", nb.Namespace, \"name\", nb.Name)\n+\t\t\t\tcs := pod.Status.ContainerStatuses[i].State\n+\t\t\t\tnb.Status.ContainerState = cs\n+\t\t\t\t// Mirroring pod condition\n+\t\t\t\tnotebookConditions := []v1beta1.NotebookCondition{}\n+\t\t\t\tfor i := range pod.Status.Conditions {\n+\t\t\t\t\tcondition := PodCondToNotebookCond(pod.Status.Conditions[i])\n+\t\t\t\t\tlog.Info(\"Mirroring pod condition: \", \"namespace\", nb.Namespace, \"name\", nb.Name, \"type\", condition.Type,\n+\t\t\t\t\t\t\"status\", condition.Status, \"reason\", condition.Reason, \"message\", condition.Message)\n+\t\t\t\t\tnotebookConditions = append(notebookConditions, condition)\n+\t\t\t\t}\n+\t\t\t\tnb.Status.Conditions = notebookConditions\n+\n+\t\t\t\tstatusUpdate = true\n+\t\t\t\tnotebookContainerFound = true\n+\t\t\t\tbreak\n+\n+\t\t\t}\n+\t\t\tif !notebookContainerFound {\n+\t\t\t\tlog.Error(nil, \"Could not find the Notebook container, will not update the status of the CR. No container has the same name as the CR.\", \"CR name:\", nb.Name)\n+\t\t\t}\n+\t\t}\n+\t}\n+\n+\tif statusUpdate {\n+\t\tlog.Info(\"Updating Notebook CR Status\", \"namespace\", nb.Namespace, \"name\", nb.Name)\n+\t\terr = r.Status().Update(ctx, nb)\n+\t\tif err != nil {\n+\t\t\treturn podFound, err\n+\t\t}\n+\t}\n+\n+\treturn podFound, nil\n+}\n+\n+func initializeStatus() v1beta1.NotebookStatus {", - "comment_created_at": "2022-08-29T13:27:57+00:00", - "comment_author": "apo-ger", - "comment_body": "Fixed!", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "604789893", - "pr_number": 5761, - "pr_file": "components/profile-controller/controllers/profile_controller.go", - "created_at": "2021-03-31T10:44:12+00:00", - "commented_code": "return\n}\n\nfunc enforceNamespaceLabelsFromConfig(ns *corev1.Namespace, logger logr.Logger) {\n\tif ns.Labels == nil {\n\t\tns.Labels = make(map[string]string)\n\t}\n\n\tfor k, v := range enforcedKubeflowNamespaceLabels {\n\t\texistingValue, ok := ns.Labels[k]\n\t\tif len(v) == 0 {\n\t\t\t// When there is an empty value, k should be removed.\n\t\t\tif ok {\n\t\t\t\tdelete(ns.Labels, k)\n\t\t\t}\n\t\t} else {\n\t\t\tif !ok || existingValue != v {", - "repo_full_name": "kubeflow/kubeflow", - "discussion_comments": [ - { - "comment_id": "604789893", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 5761, - "pr_file": "components/profile-controller/controllers/profile_controller.go", - "discussion_id": "604789893", - "commented_code": "@@ -617,15 +630,48 @@ func removeString(slice []string, s string) (result []string) {\n \treturn\n }\n \n+func enforceNamespaceLabelsFromConfig(ns *corev1.Namespace, logger logr.Logger) {\n+\tif ns.Labels == nil {\n+\t\tns.Labels = make(map[string]string)\n+\t}\n+\n+\tfor k, v := range enforcedKubeflowNamespaceLabels {\n+\t\texistingValue, ok := ns.Labels[k]\n+\t\tif len(v) == 0 {\n+\t\t\t// When there is an empty value, k should be removed.\n+\t\t\tif ok {\n+\t\t\t\tdelete(ns.Labels, k)\n+\t\t\t}\n+\t\t} else {\n+\t\t\tif !ok || existingValue != v {", - "comment_created_at": "2021-03-31T10:44:12+00:00", - "comment_author": "Bobgy", - "comment_body": "The if seems unnecessary, no matter what, you will set this label", - "pr_file_module": null - }, - { - "comment_id": "605286906", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 5761, - "pr_file": "components/profile-controller/controllers/profile_controller.go", - "discussion_id": "604789893", - "commented_code": "@@ -617,15 +630,48 @@ func removeString(slice []string, s string) (result []string) {\n \treturn\n }\n \n+func enforceNamespaceLabelsFromConfig(ns *corev1.Namespace, logger logr.Logger) {\n+\tif ns.Labels == nil {\n+\t\tns.Labels = make(map[string]string)\n+\t}\n+\n+\tfor k, v := range enforcedKubeflowNamespaceLabels {\n+\t\texistingValue, ok := ns.Labels[k]\n+\t\tif len(v) == 0 {\n+\t\t\t// When there is an empty value, k should be removed.\n+\t\t\tif ok {\n+\t\t\t\tdelete(ns.Labels, k)\n+\t\t\t}\n+\t\t} else {\n+\t\t\tif !ok || existingValue != v {", - "comment_created_at": "2021-03-31T23:38:03+00:00", - "comment_author": "zijianjoy", - "comment_body": "You are right! Removed this if condition.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "547974495", - "pr_number": 5314, - "pr_file": "components/notebook-controller/controllers/notebook_controller.go", - "created_at": "2020-12-23T14:05:47+00:00", - "commented_code": "} else {\n\t\t// Got the pod\n\t\tpodFound = true\n\t\tif len(pod.Status.ContainerStatuses) > 0 &&\n\t\t\tpod.Status.ContainerStatuses[0].State != instance.Status.ContainerState {\n\t\t\tlog.Info(\"Updating container state: \", \"namespace\", instance.Namespace, \"name\", instance.Name)\n\t\t\tcs := pod.Status.ContainerStatuses[0].State\n\t\t\tinstance.Status.ContainerState = cs\n\t\t\toldConditions := instance.Status.Conditions\n\t\t\tnewCondition := getNextCondition(cs)\n\t\t\t// Append new condition\n\t\t\tif len(oldConditions) == 0 || oldConditions[0].Type != newCondition.Type ||\n\t\t\t\toldConditions[0].Reason != newCondition.Reason ||\n\t\t\t\toldConditions[0].Message != newCondition.Message {\n\t\t\t\tlog.Info(\"Appending to conditions: \", \"namespace\", instance.Namespace, \"name\", instance.Name, \"type\", newCondition.Type, \"reason\", newCondition.Reason, \"message\", newCondition.Message)\n\t\t\t\tinstance.Status.Conditions = append([]v1beta1.NotebookCondition{newCondition}, oldConditions...)\n\n\t\t// Update status of the CR using the ContainerState of\n\t\t// the container that has the same name as the CR.\n\t\t// If no container of same name is found, the state of the CR is not updated.\n\t\tif len(pod.Status.ContainerStatuses) > 0 {\n\t\t\tnotebookContainerFound := false\n\t\t\tfor i := range pod.Status.ContainerStatuses {\n\t\t\t\tif pod.Status.ContainerStatuses[i].Name == instance.Name &&", - "repo_full_name": "kubeflow/kubeflow", - "discussion_comments": [ - { - "comment_id": "547974495", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 5314, - "pr_file": "components/notebook-controller/controllers/notebook_controller.go", - "discussion_id": "547974495", - "commented_code": "@@ -206,23 +206,37 @@ func (r *NotebookReconciler) Reconcile(req ctrl.Request) (ctrl.Result, error) {\n \t} else {\n \t\t// Got the pod\n \t\tpodFound = true\n-\t\tif len(pod.Status.ContainerStatuses) > 0 &&\n-\t\t\tpod.Status.ContainerStatuses[0].State != instance.Status.ContainerState {\n-\t\t\tlog.Info(\"Updating container state: \", \"namespace\", instance.Namespace, \"name\", instance.Name)\n-\t\t\tcs := pod.Status.ContainerStatuses[0].State\n-\t\t\tinstance.Status.ContainerState = cs\n-\t\t\toldConditions := instance.Status.Conditions\n-\t\t\tnewCondition := getNextCondition(cs)\n-\t\t\t// Append new condition\n-\t\t\tif len(oldConditions) == 0 || oldConditions[0].Type != newCondition.Type ||\n-\t\t\t\toldConditions[0].Reason != newCondition.Reason ||\n-\t\t\t\toldConditions[0].Message != newCondition.Message {\n-\t\t\t\tlog.Info(\"Appending to conditions: \", \"namespace\", instance.Namespace, \"name\", instance.Name, \"type\", newCondition.Type, \"reason\", newCondition.Reason, \"message\", newCondition.Message)\n-\t\t\t\tinstance.Status.Conditions = append([]v1beta1.NotebookCondition{newCondition}, oldConditions...)\n+\n+\t\t// Update status of the CR using the ContainerState of\n+\t\t// the container that has the same name as the CR.\n+\t\t// If no container of same name is found, the state of the CR is not updated.\n+\t\tif len(pod.Status.ContainerStatuses) > 0 {\n+\t\t\tnotebookContainerFound := false\n+\t\t\tfor i := range pod.Status.ContainerStatuses {\n+\t\t\t\tif pod.Status.ContainerStatuses[i].Name == instance.Name &&", - "comment_created_at": "2020-12-23T14:05:47+00:00", - "comment_author": "kimwnasptd", - "comment_body": "Could you break this if statement to the following ones:\r\n\r\n```golang\r\nif pod.Status.ContainerStatuses[i].Name != instance.Name {\r\n continue\r\n}\r\n\r\nif pod.Status.ContainerStatuses[i].State == instance.Status.ContainerState {\r\n continue\r\n}\r\n\r\nlog.Info(\"Updating Notebook CR state: \", \"namespace\", instance.Namespace, \"name\", instance.Name)\r\ncs := pod.Status.ContainerStatuses[i].State\r\n...\r\n```", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "473846716", - "pr_number": 5218, - "pr_file": "components/tensorboard-controller/controllers/tensorboard_controller.go", - "created_at": "2020-08-20T10:11:05+00:00", - "commented_code": "func isGoogleCloudPath(path string) bool {\n\treturn strings.HasPrefix(path, \"gs://\")\n}\n\nfunc isPVCPath(path string) bool {\n\treturn strings.HasPrefix(path, \"pvc://\")\n}\n\nfunc extractPVCName(path string) string {\n\ttrimmed := strings.TrimPrefix(path, \"pvc://\") //removing \"pvc://\" prefix\n\tending := strings.Index(trimmed, \"/\") //finding ending index of pvc-name string\n\tif ending == -1 {\n\t\treturn trimmed\n\t} else {\n\t\treturn trimmed[0:ending]\n\t}\n}\n\nfunc extractPVCSubPath(path string) string {\n\ttrimmed := strings.TrimPrefix(path, \"pvc://\") //removing \"pvc://\" prefix\n\tstart := strings.Index(trimmed, \"/\") //finding starting index of local path inside PVC\n\tif start == -1 || len(trimmed) == start+1 {\n\t\treturn \"\"\n\t} else {\n\t\treturn trimmed[start+1:]\n\t}\n}\n\n//Searches a corev1.PodList for running pods and returns\n//a running corev1.Pod (if exists)\nfunc findRunningPod(pods *corev1.PodList) corev1.Pod {\n\tfor _, pod := range pods.Items {\n\t\tif pod.Status.Phase == \"Running\" {\n\t\t\treturn pod\n\t\t}\n\t}\n\n\treturn corev1.Pod{}\n}\n\nfunc extractNodeName(pod corev1.Pod) string {\n\treturn pod.Spec.NodeName\n}\n\nfunc generateNodeAffinity(affinity *corev1.Affinity, pvcname string, r *TensorboardReconciler, tb *tensorboardv1alpha1.Tensorboard) error {\n\tvar nodename string\n\tvar pods = &corev1.PodList{}\n\tvar pod corev1.Pod\n\n\t//List all pods that access the PVC that has ClaimName: pvcname.\n\t//NOTE: We use only one custom field selector to filter out pods that don't use this PVC.\n\tif err := r.List(context.Background(), pods, client.InNamespace(tb.Namespace), client.MatchingFields{\"spec.volumes.persistentvolumeclaim.claimname\": pvcname}); err != nil {\n\t\treturn fmt.Errorf(\"List pods error: %v\", err)\n\t}\n\n\t//Find a running pod that uses the PVC.\n\tpod = findRunningPod(pods)\n\n\t//If there is no running pod that uses the PVC, then: nodename == \"\".\n\t//Else, nodename contains the name of the node that the pod is running on.\n\tnodename = extractNodeName(pod)\n\n\t//In this case, there is a running pod that uses the PVC, therefore we create\n\t//a nodeAffinity field so that the Tensorboard server will be scheduled (if possible)\n\t//on the same node as the running pod.\n\tif nodename != \"\" {", - "repo_full_name": "kubeflow/kubeflow", - "discussion_comments": [ - { - "comment_id": "473846716", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 5218, - "pr_file": "components/tensorboard-controller/controllers/tensorboard_controller.go", - "discussion_id": "473846716", - "commented_code": "@@ -281,3 +328,102 @@ func isCloudPath(path string) bool {\n func isGoogleCloudPath(path string) bool {\n \treturn strings.HasPrefix(path, \"gs://\")\n }\n+\n+func isPVCPath(path string) bool {\n+\treturn strings.HasPrefix(path, \"pvc://\")\n+}\n+\n+func extractPVCName(path string) string {\n+\ttrimmed := strings.TrimPrefix(path, \"pvc://\") //removing \"pvc://\" prefix\n+\tending := strings.Index(trimmed, \"/\") //finding ending index of pvc-name string\n+\tif ending == -1 {\n+\t\treturn trimmed\n+\t} else {\n+\t\treturn trimmed[0:ending]\n+\t}\n+}\n+\n+func extractPVCSubPath(path string) string {\n+\ttrimmed := strings.TrimPrefix(path, \"pvc://\") //removing \"pvc://\" prefix\n+\tstart := strings.Index(trimmed, \"/\") //finding starting index of local path inside PVC\n+\tif start == -1 || len(trimmed) == start+1 {\n+\t\treturn \"\"\n+\t} else {\n+\t\treturn trimmed[start+1:]\n+\t}\n+}\n+\n+//Searches a corev1.PodList for running pods and returns\n+//a running corev1.Pod (if exists)\n+func findRunningPod(pods *corev1.PodList) corev1.Pod {\n+\tfor _, pod := range pods.Items {\n+\t\tif pod.Status.Phase == \"Running\" {\n+\t\t\treturn pod\n+\t\t}\n+\t}\n+\n+\treturn corev1.Pod{}\n+}\n+\n+func extractNodeName(pod corev1.Pod) string {\n+\treturn pod.Spec.NodeName\n+}\n+\n+func generateNodeAffinity(affinity *corev1.Affinity, pvcname string, r *TensorboardReconciler, tb *tensorboardv1alpha1.Tensorboard) error {\n+\tvar nodename string\n+\tvar pods = &corev1.PodList{}\n+\tvar pod corev1.Pod\n+\n+\t//List all pods that access the PVC that has ClaimName: pvcname.\n+\t//NOTE: We use only one custom field selector to filter out pods that don't use this PVC.\n+\tif err := r.List(context.Background(), pods, client.InNamespace(tb.Namespace), client.MatchingFields{\"spec.volumes.persistentvolumeclaim.claimname\": pvcname}); err != nil {\n+\t\treturn fmt.Errorf(\"List pods error: %v\", err)\n+\t}\n+\n+\t//Find a running pod that uses the PVC.\n+\tpod = findRunningPod(pods)\n+\n+\t//If there is no running pod that uses the PVC, then: nodename == \"\".\n+\t//Else, nodename contains the name of the node that the pod is running on.\n+\tnodename = extractNodeName(pod)\n+\n+\t//In this case, there is a running pod that uses the PVC, therefore we create\n+\t//a nodeAffinity field so that the Tensorboard server will be scheduled (if possible)\n+\t//on the same node as the running pod.\n+\tif nodename != \"\" {", - "comment_created_at": "2020-08-20T10:11:05+00:00", - "comment_author": "kimwnasptd", - "comment_body": "nit: You could have an `if nodename == \"\" {` here and return nil if that holds.\r\n\r\nThen you could move the code below one tab to the left, outside of an if clause, to make it a little bit more simple to read", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "320263186", - "pr_number": 4040, - "pr_file": "bootstrap/pkg/kfapp/apply/apply.go", - "created_at": "2019-09-03T13:12:21+00:00", - "commented_code": "package apply\n\nimport (\n\t\"errors\"\n\t\"fmt\"\n\t\"io\"\n\t\"os\"\n\n\tkftypes \"github.com/kubeflow/kubeflow/bootstrap/v3/pkg/apis/apps\"\n\tkfdefsv3 \"github.com/kubeflow/kubeflow/bootstrap/v3/pkg/apis/apps/kfdef/v1alpha1\"\n\t\"github.com/kubeflow/kubeflow/bootstrap/v3/pkg/kfapp/coordinator\"\n\tlog \"github.com/sirupsen/logrus\"\n)\n\n// BootstrapKubeflow is used by the kfctl apply sub-command to take in a configfile\n// as a flag and boostrap a KfApp and deploy it\nfunc BootstrapKubeflow(configFilePath string, kfResource kftypes.ResourceEnum) error {\n\t// Set default app name to kf-app\n\tappName := \"kf-app\"\n\n\t// Construct KfDef from the configFilePath provided\n\tkfDef := &kfdefsv3.KfDef{}\n\tkfDef, err := kfdefsv3.LoadKFDefFromURI(configFilePath)\n\tif err != nil {\n\t\tlog.Printf(\"Unable to create KfDef from config file: %v\", err)\n\t}\n\tif kfDef.Name != \"\" {\n\t\tlog.Warnf(\"Overriding KfDef.Spec.Name; old value %v; new value %v\", kfDef.Name, appName)\n\t}\n\tkfDef.Name = appName", - "repo_full_name": "kubeflow/kubeflow", - "discussion_comments": [ - { - "comment_id": "320263186", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 4040, - "pr_file": "bootstrap/pkg/kfapp/apply/apply.go", - "discussion_id": "320263186", - "commented_code": "@@ -0,0 +1,103 @@\n+package apply\n+\n+import (\n+\t\"errors\"\n+\t\"fmt\"\n+\t\"io\"\n+\t\"os\"\n+\n+\tkftypes \"github.com/kubeflow/kubeflow/bootstrap/v3/pkg/apis/apps\"\n+\tkfdefsv3 \"github.com/kubeflow/kubeflow/bootstrap/v3/pkg/apis/apps/kfdef/v1alpha1\"\n+\t\"github.com/kubeflow/kubeflow/bootstrap/v3/pkg/kfapp/coordinator\"\n+\tlog \"github.com/sirupsen/logrus\"\n+)\n+\n+// BootstrapKubeflow is used by the kfctl apply sub-command to take in a configfile\n+// as a flag and boostrap a KfApp and deploy it\n+func BootstrapKubeflow(configFilePath string, kfResource kftypes.ResourceEnum) error {\n+\t// Set default app name to kf-app\n+\tappName := \"kf-app\"\n+\n+\t// Construct KfDef from the configFilePath provided\n+\tkfDef := &kfdefsv3.KfDef{}\n+\tkfDef, err := kfdefsv3.LoadKFDefFromURI(configFilePath)\n+\tif err != nil {\n+\t\tlog.Printf(\"Unable to create KfDef from config file: %v\", err)\n+\t}\n+\tif kfDef.Name != \"\" {\n+\t\tlog.Warnf(\"Overriding KfDef.Spec.Name; old value %v; new value %v\", kfDef.Name, appName)\n+\t}\n+\tkfDef.Name = appName", - "comment_created_at": "2019-09-03T13:12:21+00:00", - "comment_author": "yanniszark", - "comment_body": "This line should be inside the if statement.\r\nNow it always overrides.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "300474899", - "pr_number": 3607, - "pr_file": "bootstrap/v2/pkg/kfapp/existing_arrikto/existing.go", - "created_at": "2019-07-04T17:54:25+00:00", - "commented_code": "}\n\treturn string(b)\n}\n\nfunc initialiseHMACSecretFromEnvOrGen(secEnv string, reqLen int) []byte {\n\tenvContent := os.Getenv(secEnv)\n\n\tif len(envContent) < reqLen {\n\t\tlog.Println(\"WARNING: HMAC secret not provided or secret too short. Generating a random one from nonce characters.\")\n\t\treturn []byte(createNonce(reqLen))\n\t}\n\n\treturn []byte(envContent)\n}\n\nfunc createNonce(length int) string {", - "repo_full_name": "kubeflow/kubeflow", - "discussion_comments": [ - { - "comment_id": "300474899", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 3607, - "pr_file": "bootstrap/v2/pkg/kfapp/existing_arrikto/existing.go", - "discussion_id": "300474899", - "commented_code": "@@ -470,3 +480,24 @@ func genRandomString(length int) string {\n \t}\n \treturn string(b)\n }\n+\n+func initialiseHMACSecretFromEnvOrGen(secEnv string, reqLen int) []byte {\n+\tenvContent := os.Getenv(secEnv)\n+\n+\tif len(envContent) < reqLen {\n+\t\tlog.Println(\"WARNING: HMAC secret not provided or secret too short. Generating a random one from nonce characters.\")\n+\t\treturn []byte(createNonce(reqLen))\n+\t}\n+\n+\treturn []byte(envContent)\n+}\n+\n+func createNonce(length int) string {", - "comment_created_at": "2019-07-04T17:54:25+00:00", - "comment_author": "SachinVarghese", - "comment_body": "You can use the genRandomString function here and the createNonce is not really required.", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/kubeflow-specific-network-access-documentation.json b/_reviewers/kubeflow-specific-network-access-documentation.json new file mode 100644 index 0000000..21c4571 --- /dev/null +++ b/_reviewers/kubeflow-specific-network-access-documentation.json @@ -0,0 +1,92 @@ +[ + { + "discussion_id": "1030570642", + "pr_number": 6753, + "pr_file": "components/centraldashboard/README.md", + "created_at": "2022-11-23T15:18:19+00:00", + "commented_code": "from the front-end starting with `/api` are proxied to the Express\n server. All other requests are handled by the front-end server which\n mirrors the production configuration.\n4. To access the Kubenetes REST API, run `kubectl proxy --port=8083`.\n - This runs [kubectl proxy](https://kubernetes.io/docs/reference/generated/kubectl/kubectl-commands#proxy) to create a reverse proxy at http://localhost:8083. Requests from the front-end starting with `/jupyter`, `/metadata`, `/notebook` and `/pipeline` are proxied to the [Kubenetes API server](https://kubernetes.io/docs/tasks/administer-cluster/access-cluster-api/). See the [webpack config file](https://github.com/kubeflow/kubeflow/blob/master/components/centraldashboard/webpack.config.js) for more details.\n4. To access a Kubenetes service, run `kubectl port-forward -n kubeflow svc/ :` e.g. `kubectl port-forward -n kubeflow svc/jupyter-web-app-service 8085:80`.", + "repo_full_name": "kubeflow/kubeflow", + "discussion_comments": [ + { + "comment_id": "1030570642", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 6753, + "pr_file": "components/centraldashboard/README.md", + "discussion_id": "1030570642", + "commented_code": "@@ -45,8 +45,8 @@ Make sure you have the latest LTS version of `node` installed along with `npm`.\n from the front-end starting with `/api` are proxied to the Express\n server. All other requests are handled by the front-end server which\n mirrors the production configuration.\n-4. To access the Kubenetes REST API, run `kubectl proxy --port=8083`.\n- - This runs [kubectl proxy](https://kubernetes.io/docs/reference/generated/kubectl/kubectl-commands#proxy) to create a reverse proxy at http://localhost:8083. Requests from the front-end starting with `/jupyter`, `/metadata`, `/notebook` and `/pipeline` are proxied to the [Kubenetes API server](https://kubernetes.io/docs/tasks/administer-cluster/access-cluster-api/). See the [webpack config file](https://github.com/kubeflow/kubeflow/blob/master/components/centraldashboard/webpack.config.js) for more details.\n+4. To access a Kubenetes service, run `kubectl port-forward -n kubeflow svc/ :` e.g. `kubectl port-forward -n kubeflow svc/jupyter-web-app-service 8085:80`.", + "comment_created_at": "2022-11-23T15:18:19+00:00", + "comment_author": "kimwnasptd", + "comment_body": "Could you instead have a list for this numbered step and expose each `kubectl port-forward` command that a user needs to run?\r\n\r\nWe can just have commands for the Services that we proxy in the webpack config", + "pr_file_module": null + }, + { + "comment_id": "1030685930", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 6753, + "pr_file": "components/centraldashboard/README.md", + "discussion_id": "1030570642", + "commented_code": "@@ -45,8 +45,8 @@ Make sure you have the latest LTS version of `node` installed along with `npm`.\n from the front-end starting with `/api` are proxied to the Express\n server. All other requests are handled by the front-end server which\n mirrors the production configuration.\n-4. To access the Kubenetes REST API, run `kubectl proxy --port=8083`.\n- - This runs [kubectl proxy](https://kubernetes.io/docs/reference/generated/kubectl/kubectl-commands#proxy) to create a reverse proxy at http://localhost:8083. Requests from the front-end starting with `/jupyter`, `/metadata`, `/notebook` and `/pipeline` are proxied to the [Kubenetes API server](https://kubernetes.io/docs/tasks/administer-cluster/access-cluster-api/). See the [webpack config file](https://github.com/kubeflow/kubeflow/blob/master/components/centraldashboard/webpack.config.js) for more details.\n+4. To access a Kubenetes service, run `kubectl port-forward -n kubeflow svc/ :` e.g. `kubectl port-forward -n kubeflow svc/jupyter-web-app-service 8085:80`.", + "comment_created_at": "2022-11-23T16:53:33+00:00", + "comment_author": "orfeas-k", + "comment_body": "Pushed a new commit for both of the comments @kimwnasptd. ", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1030571442", + "pr_number": 6753, + "pr_file": "components/centraldashboard/README.md", + "created_at": "2022-11-23T15:19:00+00:00", + "commented_code": "from the front-end starting with `/api` are proxied to the Express\n server. All other requests are handled by the front-end server which\n mirrors the production configuration.\n4. To access the Kubenetes REST API, run `kubectl proxy --port=8083`.\n - This runs [kubectl proxy](https://kubernetes.io/docs/reference/generated/kubectl/kubectl-commands#proxy) to create a reverse proxy at http://localhost:8083. Requests from the front-end starting with `/jupyter`, `/metadata`, `/notebook` and `/pipeline` are proxied to the [Kubenetes API server](https://kubernetes.io/docs/tasks/administer-cluster/access-cluster-api/). See the [webpack config file](https://github.com/kubeflow/kubeflow/blob/master/components/centraldashboard/webpack.config.js) for more details.\n4. To access a Kubenetes service, run `kubectl port-forward -n kubeflow svc/ :` e.g. `kubectl port-forward -n kubeflow svc/jupyter-web-app-service 8085:80`.\n - This forwards requests to Kubernetes services from http://localhost:service-proxy-port. Requests from the front-end starting with `/jupyter`, `/notebook` and `/pipeline` are proxied there. See the [webpack config file](https://github.com/kubeflow/kubeflow/blob/master/components/centraldashboard/webpack.config.js) for more details.", + "repo_full_name": "kubeflow/kubeflow", + "discussion_comments": [ + { + "comment_id": "1030571442", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 6753, + "pr_file": "components/centraldashboard/README.md", + "discussion_id": "1030571442", + "commented_code": "@@ -45,8 +45,8 @@ Make sure you have the latest LTS version of `node` installed along with `npm`.\n from the front-end starting with `/api` are proxied to the Express\n server. All other requests are handled by the front-end server which\n mirrors the production configuration.\n-4. To access the Kubenetes REST API, run `kubectl proxy --port=8083`.\n- - This runs [kubectl proxy](https://kubernetes.io/docs/reference/generated/kubectl/kubectl-commands#proxy) to create a reverse proxy at http://localhost:8083. Requests from the front-end starting with `/jupyter`, `/metadata`, `/notebook` and `/pipeline` are proxied to the [Kubenetes API server](https://kubernetes.io/docs/tasks/administer-cluster/access-cluster-api/). See the [webpack config file](https://github.com/kubeflow/kubeflow/blob/master/components/centraldashboard/webpack.config.js) for more details.\n+4. To access a Kubenetes service, run `kubectl port-forward -n kubeflow svc/ :` e.g. `kubectl port-forward -n kubeflow svc/jupyter-web-app-service 8085:80`.\n+ - This forwards requests to Kubernetes services from http://localhost:service-proxy-port. Requests from the front-end starting with `/jupyter`, `/notebook` and `/pipeline` are proxied there. See the [webpack config file](https://github.com/kubeflow/kubeflow/blob/master/components/centraldashboard/webpack.config.js) for more details.", + "comment_created_at": "2022-11-23T15:19:00+00:00", + "comment_author": "kimwnasptd", + "comment_body": "Let's add this explanation at the numbered step, since it explains how the proxying works. And, as mentioned above, have the bullet list for the different proxying commands", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "870246337", + "pr_number": 6459, + "pr_file": "components/admission-webhook/README.md", + "created_at": "2022-05-11T12:34:24+00:00", + "commented_code": "1. When there is a pod being created (see `rules`),\n1. call the webhook service `gcp-cred-webhook.default` at path `/add-cred` (see `clientConfig`)\n\n### admissionregistration.k8s.io/v1 default failurePolicy\nIn adopting `admissionregistration.k8s.io/v1` for the `MutatingWebhookConfiguration` we accept the default value\nfor `failurePolicy` to be `Fail` per [documentation](https://kubernetes.io/docs/reference/access-authn-authz/extensible-admission-controllers/#failure-policy). Upon testing this default feature it was discovered if the `AdmissionWebhook`'s `mutating-webhook-configuration` failed to mutate a pod then the pod would fail to start, its associated `Deployment` or `StatefulSet` would continually attempt to create the pod until the process that created the pod was terminated. This continuous failure to mutate the target pod may block other target pods from mutation\nuntil the failing process ends.", + "repo_full_name": "kubeflow/kubeflow", + "discussion_comments": [ + { + "comment_id": "870246337", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 6459, + "pr_file": "components/admission-webhook/README.md", + "discussion_id": "870246337", + "commented_code": "@@ -75,10 +75,19 @@ This specifies\n 1. When there is a pod being created (see `rules`),\n 1. call the webhook service `gcp-cred-webhook.default` at path `/add-cred` (see `clientConfig`)\n \n+### admissionregistration.k8s.io/v1 default failurePolicy\n+In adopting `admissionregistration.k8s.io/v1` for the `MutatingWebhookConfiguration` we accept the default value\n+for `failurePolicy` to be `Fail` per [documentation](https://kubernetes.io/docs/reference/access-authn-authz/extensible-admission-controllers/#failure-policy). Upon testing this default feature it was discovered if the `AdmissionWebhook`'s `mutating-webhook-configuration` failed to mutate a pod then the pod would fail to start, its associated `Deployment` or `StatefulSet` would continually attempt to create the pod until the process that created the pod was terminated. This continuous failure to mutate the target pod may block other target pods from mutation\n+until the failing process ends.", + "comment_created_at": "2022-05-11T12:34:24+00:00", + "comment_author": "kimwnasptd", + "comment_body": ">This continuous failure to mutate the target pod may block other target pods from mutation\r\nuntil the failing process ends.\r\n\r\nCould you clarify a little bit more what you mean here? By other Pods you are not referring to other unrelated Pods in the cluster right?\r\n\r\nIIUC, as you described, the Deployment/StatefulSet will be retrying to create the Pod which will keep failing. Are there other side effects to other pods?", + "pr_file_module": null + }, + { + "comment_id": "870631231", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 6459, + "pr_file": "components/admission-webhook/README.md", + "discussion_id": "870246337", + "commented_code": "@@ -75,10 +75,19 @@ This specifies\n 1. When there is a pod being created (see `rules`),\n 1. call the webhook service `gcp-cred-webhook.default` at path `/add-cred` (see `clientConfig`)\n \n+### admissionregistration.k8s.io/v1 default failurePolicy\n+In adopting `admissionregistration.k8s.io/v1` for the `MutatingWebhookConfiguration` we accept the default value\n+for `failurePolicy` to be `Fail` per [documentation](https://kubernetes.io/docs/reference/access-authn-authz/extensible-admission-controllers/#failure-policy). Upon testing this default feature it was discovered if the `AdmissionWebhook`'s `mutating-webhook-configuration` failed to mutate a pod then the pod would fail to start, its associated `Deployment` or `StatefulSet` would continually attempt to create the pod until the process that created the pod was terminated. This continuous failure to mutate the target pod may block other target pods from mutation\n+until the failing process ends.", + "comment_created_at": "2022-05-11T18:33:42+00:00", + "comment_author": "aaron-arellano", + "comment_body": "yep. will clarify. Pretty much any pod that is targeted by the admission webhook for mutation is effected and will fail to start if there are configuration collisions or some other issue. Already running pods are not effected at all, again only pods being created after the policy change. Will add this to Readme for clarification.", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/kubeflow-specific-network-access-documentation.md b/_reviewers/kubeflow-specific-network-access-documentation.md index 4f0878e..aafc03f 100644 --- a/_reviewers/kubeflow-specific-network-access-documentation.md +++ b/_reviewers/kubeflow-specific-network-access-documentation.md @@ -25,97 +25,3 @@ e.g. `kubectl port-forward -n kubeflow svc/jupyter-web-app-service 8085:80`. ``` Additionally, document how proxying works and what happens if network connections fail, including any potential cascade effects on dependent services. This specificity helps developers understand exactly what networking configurations are needed and prevents confusion when setting up local development environments or troubleshooting connection issues. - - -[ - { - "discussion_id": "1030570642", - "pr_number": 6753, - "pr_file": "components/centraldashboard/README.md", - "created_at": "2022-11-23T15:18:19+00:00", - "commented_code": "from the front-end starting with `/api` are proxied to the Express\n server. All other requests are handled by the front-end server which\n mirrors the production configuration.\n4. To access the Kubenetes REST API, run `kubectl proxy --port=8083`.\n - This runs [kubectl proxy](https://kubernetes.io/docs/reference/generated/kubectl/kubectl-commands#proxy) to create a reverse proxy at http://localhost:8083. Requests from the front-end starting with `/jupyter`, `/metadata`, `/notebook` and `/pipeline` are proxied to the [Kubenetes API server](https://kubernetes.io/docs/tasks/administer-cluster/access-cluster-api/). See the [webpack config file](https://github.com/kubeflow/kubeflow/blob/master/components/centraldashboard/webpack.config.js) for more details.\n4. To access a Kubenetes service, run `kubectl port-forward -n kubeflow svc/ :` e.g. `kubectl port-forward -n kubeflow svc/jupyter-web-app-service 8085:80`.", - "repo_full_name": "kubeflow/kubeflow", - "discussion_comments": [ - { - "comment_id": "1030570642", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 6753, - "pr_file": "components/centraldashboard/README.md", - "discussion_id": "1030570642", - "commented_code": "@@ -45,8 +45,8 @@ Make sure you have the latest LTS version of `node` installed along with `npm`.\n from the front-end starting with `/api` are proxied to the Express\n server. All other requests are handled by the front-end server which\n mirrors the production configuration.\n-4. To access the Kubenetes REST API, run `kubectl proxy --port=8083`.\n- - This runs [kubectl proxy](https://kubernetes.io/docs/reference/generated/kubectl/kubectl-commands#proxy) to create a reverse proxy at http://localhost:8083. Requests from the front-end starting with `/jupyter`, `/metadata`, `/notebook` and `/pipeline` are proxied to the [Kubenetes API server](https://kubernetes.io/docs/tasks/administer-cluster/access-cluster-api/). See the [webpack config file](https://github.com/kubeflow/kubeflow/blob/master/components/centraldashboard/webpack.config.js) for more details.\n+4. To access a Kubenetes service, run `kubectl port-forward -n kubeflow svc/ :` e.g. `kubectl port-forward -n kubeflow svc/jupyter-web-app-service 8085:80`.", - "comment_created_at": "2022-11-23T15:18:19+00:00", - "comment_author": "kimwnasptd", - "comment_body": "Could you instead have a list for this numbered step and expose each `kubectl port-forward` command that a user needs to run?\r\n\r\nWe can just have commands for the Services that we proxy in the webpack config", - "pr_file_module": null - }, - { - "comment_id": "1030685930", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 6753, - "pr_file": "components/centraldashboard/README.md", - "discussion_id": "1030570642", - "commented_code": "@@ -45,8 +45,8 @@ Make sure you have the latest LTS version of `node` installed along with `npm`.\n from the front-end starting with `/api` are proxied to the Express\n server. All other requests are handled by the front-end server which\n mirrors the production configuration.\n-4. To access the Kubenetes REST API, run `kubectl proxy --port=8083`.\n- - This runs [kubectl proxy](https://kubernetes.io/docs/reference/generated/kubectl/kubectl-commands#proxy) to create a reverse proxy at http://localhost:8083. Requests from the front-end starting with `/jupyter`, `/metadata`, `/notebook` and `/pipeline` are proxied to the [Kubenetes API server](https://kubernetes.io/docs/tasks/administer-cluster/access-cluster-api/). See the [webpack config file](https://github.com/kubeflow/kubeflow/blob/master/components/centraldashboard/webpack.config.js) for more details.\n+4. To access a Kubenetes service, run `kubectl port-forward -n kubeflow svc/ :` e.g. `kubectl port-forward -n kubeflow svc/jupyter-web-app-service 8085:80`.", - "comment_created_at": "2022-11-23T16:53:33+00:00", - "comment_author": "orfeas-k", - "comment_body": "Pushed a new commit for both of the comments @kimwnasptd. ", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1030571442", - "pr_number": 6753, - "pr_file": "components/centraldashboard/README.md", - "created_at": "2022-11-23T15:19:00+00:00", - "commented_code": "from the front-end starting with `/api` are proxied to the Express\n server. All other requests are handled by the front-end server which\n mirrors the production configuration.\n4. To access the Kubenetes REST API, run `kubectl proxy --port=8083`.\n - This runs [kubectl proxy](https://kubernetes.io/docs/reference/generated/kubectl/kubectl-commands#proxy) to create a reverse proxy at http://localhost:8083. Requests from the front-end starting with `/jupyter`, `/metadata`, `/notebook` and `/pipeline` are proxied to the [Kubenetes API server](https://kubernetes.io/docs/tasks/administer-cluster/access-cluster-api/). See the [webpack config file](https://github.com/kubeflow/kubeflow/blob/master/components/centraldashboard/webpack.config.js) for more details.\n4. To access a Kubenetes service, run `kubectl port-forward -n kubeflow svc/ :` e.g. `kubectl port-forward -n kubeflow svc/jupyter-web-app-service 8085:80`.\n - This forwards requests to Kubernetes services from http://localhost:service-proxy-port. Requests from the front-end starting with `/jupyter`, `/notebook` and `/pipeline` are proxied there. See the [webpack config file](https://github.com/kubeflow/kubeflow/blob/master/components/centraldashboard/webpack.config.js) for more details.", - "repo_full_name": "kubeflow/kubeflow", - "discussion_comments": [ - { - "comment_id": "1030571442", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 6753, - "pr_file": "components/centraldashboard/README.md", - "discussion_id": "1030571442", - "commented_code": "@@ -45,8 +45,8 @@ Make sure you have the latest LTS version of `node` installed along with `npm`.\n from the front-end starting with `/api` are proxied to the Express\n server. All other requests are handled by the front-end server which\n mirrors the production configuration.\n-4. To access the Kubenetes REST API, run `kubectl proxy --port=8083`.\n- - This runs [kubectl proxy](https://kubernetes.io/docs/reference/generated/kubectl/kubectl-commands#proxy) to create a reverse proxy at http://localhost:8083. Requests from the front-end starting with `/jupyter`, `/metadata`, `/notebook` and `/pipeline` are proxied to the [Kubenetes API server](https://kubernetes.io/docs/tasks/administer-cluster/access-cluster-api/). See the [webpack config file](https://github.com/kubeflow/kubeflow/blob/master/components/centraldashboard/webpack.config.js) for more details.\n+4. To access a Kubenetes service, run `kubectl port-forward -n kubeflow svc/ :` e.g. `kubectl port-forward -n kubeflow svc/jupyter-web-app-service 8085:80`.\n+ - This forwards requests to Kubernetes services from http://localhost:service-proxy-port. Requests from the front-end starting with `/jupyter`, `/notebook` and `/pipeline` are proxied there. See the [webpack config file](https://github.com/kubeflow/kubeflow/blob/master/components/centraldashboard/webpack.config.js) for more details.", - "comment_created_at": "2022-11-23T15:19:00+00:00", - "comment_author": "kimwnasptd", - "comment_body": "Let's add this explanation at the numbered step, since it explains how the proxying works. And, as mentioned above, have the bullet list for the different proxying commands", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "870246337", - "pr_number": 6459, - "pr_file": "components/admission-webhook/README.md", - "created_at": "2022-05-11T12:34:24+00:00", - "commented_code": "1. When there is a pod being created (see `rules`),\n1. call the webhook service `gcp-cred-webhook.default` at path `/add-cred` (see `clientConfig`)\n\n### admissionregistration.k8s.io/v1 default failurePolicy\nIn adopting `admissionregistration.k8s.io/v1` for the `MutatingWebhookConfiguration` we accept the default value\nfor `failurePolicy` to be `Fail` per [documentation](https://kubernetes.io/docs/reference/access-authn-authz/extensible-admission-controllers/#failure-policy). Upon testing this default feature it was discovered if the `AdmissionWebhook`'s `mutating-webhook-configuration` failed to mutate a pod then the pod would fail to start, its associated `Deployment` or `StatefulSet` would continually attempt to create the pod until the process that created the pod was terminated. This continuous failure to mutate the target pod may block other target pods from mutation\nuntil the failing process ends.", - "repo_full_name": "kubeflow/kubeflow", - "discussion_comments": [ - { - "comment_id": "870246337", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 6459, - "pr_file": "components/admission-webhook/README.md", - "discussion_id": "870246337", - "commented_code": "@@ -75,10 +75,19 @@ This specifies\n 1. When there is a pod being created (see `rules`),\n 1. call the webhook service `gcp-cred-webhook.default` at path `/add-cred` (see `clientConfig`)\n \n+### admissionregistration.k8s.io/v1 default failurePolicy\n+In adopting `admissionregistration.k8s.io/v1` for the `MutatingWebhookConfiguration` we accept the default value\n+for `failurePolicy` to be `Fail` per [documentation](https://kubernetes.io/docs/reference/access-authn-authz/extensible-admission-controllers/#failure-policy). Upon testing this default feature it was discovered if the `AdmissionWebhook`'s `mutating-webhook-configuration` failed to mutate a pod then the pod would fail to start, its associated `Deployment` or `StatefulSet` would continually attempt to create the pod until the process that created the pod was terminated. This continuous failure to mutate the target pod may block other target pods from mutation\n+until the failing process ends.", - "comment_created_at": "2022-05-11T12:34:24+00:00", - "comment_author": "kimwnasptd", - "comment_body": ">This continuous failure to mutate the target pod may block other target pods from mutation\r\nuntil the failing process ends.\r\n\r\nCould you clarify a little bit more what you mean here? By other Pods you are not referring to other unrelated Pods in the cluster right?\r\n\r\nIIUC, as you described, the Deployment/StatefulSet will be retrying to create the Pod which will keep failing. Are there other side effects to other pods?", - "pr_file_module": null - }, - { - "comment_id": "870631231", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 6459, - "pr_file": "components/admission-webhook/README.md", - "discussion_id": "870246337", - "commented_code": "@@ -75,10 +75,19 @@ This specifies\n 1. When there is a pod being created (see `rules`),\n 1. call the webhook service `gcp-cred-webhook.default` at path `/add-cred` (see `clientConfig`)\n \n+### admissionregistration.k8s.io/v1 default failurePolicy\n+In adopting `admissionregistration.k8s.io/v1` for the `MutatingWebhookConfiguration` we accept the default value\n+for `failurePolicy` to be `Fail` per [documentation](https://kubernetes.io/docs/reference/access-authn-authz/extensible-admission-controllers/#failure-policy). Upon testing this default feature it was discovered if the `AdmissionWebhook`'s `mutating-webhook-configuration` failed to mutate a pod then the pod would fail to start, its associated `Deployment` or `StatefulSet` would continually attempt to create the pod until the process that created the pod was terminated. This continuous failure to mutate the target pod may block other target pods from mutation\n+until the failing process ends.", - "comment_created_at": "2022-05-11T18:33:42+00:00", - "comment_author": "aaron-arellano", - "comment_body": "yep. will clarify. Pretty much any pod that is targeted by the admission webhook for mutation is effected and will fail to start if there are configuration collisions or some other issue. Already running pods are not effected at all, again only pods being created after the policy change. Will add this to Readme for clarification.", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/kubeflow-stable-configuration-management.json b/_reviewers/kubeflow-stable-configuration-management.json new file mode 100644 index 0000000..49f0047 --- /dev/null +++ b/_reviewers/kubeflow-stable-configuration-management.json @@ -0,0 +1,70 @@ +[ + { + "discussion_id": "350458713", + "pr_number": 4487, + "pr_file": "components/gcp-click-to-deploy/src/DeployForm.tsx", + "created_at": "2019-11-25T22:34:34+00:00", + "commented_code": "kfctlLib: false,\n kfversion: Version.V06,\n // Version for local test. Staging and Prod with overwrite with their env vars.\n kfversionList: [Version.V06, Version.V05],\n kfversionList: ['v0.7.0', Version.V06],", + "repo_full_name": "kubeflow/kubeflow", + "discussion_comments": [ + { + "comment_id": "350458713", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 4487, + "pr_file": "components/gcp-click-to-deploy/src/DeployForm.tsx", + "discussion_id": "350458713", + "commented_code": "@@ -221,7 +259,7 @@ export default class DeployForm extends React.Component {\n kfctlLib: false,\n kfversion: Version.V06,\n // Version for local test. Staging and Prod with overwrite with their env vars.\n- kfversionList: [Version.V06, Version.V05],\n+ kfversionList: ['v0.7.0', Version.V06],", + "comment_created_at": "2019-11-25T22:34:34+00:00", + "comment_author": "abhi-g", + "comment_body": "why not use the constant defined above for 0.7?", + "pr_file_module": null + }, + { + "comment_id": "350459520", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 4487, + "pr_file": "components/gcp-click-to-deploy/src/DeployForm.tsx", + "discussion_id": "350458713", + "commented_code": "@@ -221,7 +259,7 @@ export default class DeployForm extends React.Component {\n kfctlLib: false,\n kfversion: Version.V06,\n // Version for local test. Staging and Prod with overwrite with their env vars.\n- kfversionList: [Version.V06, Version.V05],\n+ kfversionList: ['v0.7.0', Version.V06],", + "comment_created_at": "2019-11-25T22:37:05+00:00", + "comment_author": "kunmingg", + "comment_body": "The version here `v0.7.0` is for local test, version will be over written by env vars.\r\nAbove was a prefix `v0.7` to match v0.7.0 and sub-releases to new backend.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "311760821", + "pr_number": 3599, + "pr_file": "components/gcp-click-to-deploy/src/DeployForm.tsx", + "created_at": "2019-08-07T21:00:18+00:00", + "commented_code": "import * as request from 'request';\n\nimport Gapi from './Gapi';\nimport {encryptPassword, flattenDeploymentOperationError, getFileContentsFromGitHub, log, wait} from './Utils';\nimport {\n encryptPassword, flattenDeploymentOperationError,\n getFileContentsFromGitHub, log, wait\n} from './Utils';\n\n/** Relative paths from the root of the repository. */\nenum ConfigPath {\n V05 = 'v0.5-branch/components/gcp-click-to-deploy/app-config.yaml',\n V06 = 'v0.6-branch/bootstrap/config/kfctl_gcp_iap.0.6.yaml'\n V06 = 'master/bootstrap/config/kfctl_gcp_iap.yaml'", + "repo_full_name": "kubeflow/kubeflow", + "discussion_comments": [ + { + "comment_id": "311760821", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 3599, + "pr_file": "components/gcp-click-to-deploy/src/DeployForm.tsx", + "discussion_id": "311760821", + "commented_code": "@@ -14,31 +14,33 @@ import * as React from 'react';\n import * as request from 'request';\n \n import Gapi from './Gapi';\n-import {encryptPassword, flattenDeploymentOperationError, getFileContentsFromGitHub, log, wait} from './Utils';\n+import {\n+ encryptPassword, flattenDeploymentOperationError,\n+ getFileContentsFromGitHub, log, wait\n+} from './Utils';\n \n /** Relative paths from the root of the repository. */\n enum ConfigPath {\n V05 = 'v0.5-branch/components/gcp-click-to-deploy/app-config.yaml',\n- V06 = 'v0.6-branch/bootstrap/config/kfctl_gcp_iap.0.6.yaml'\n+ V06 = 'master/bootstrap/config/kfctl_gcp_iap.yaml'", + "comment_created_at": "2019-08-07T21:00:18+00:00", + "comment_author": "kunmingg", + "comment_body": "`v0.6-branch` should be more stable compared to `master`\r\n`master` branch could be broken at any time.", + "pr_file_module": null + }, + { + "comment_id": "312060561", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 3599, + "pr_file": "components/gcp-click-to-deploy/src/DeployForm.tsx", + "discussion_id": "311760821", + "commented_code": "@@ -14,31 +14,33 @@ import * as React from 'react';\n import * as request from 'request';\n \n import Gapi from './Gapi';\n-import {encryptPassword, flattenDeploymentOperationError, getFileContentsFromGitHub, log, wait} from './Utils';\n+import {\n+ encryptPassword, flattenDeploymentOperationError,\n+ getFileContentsFromGitHub, log, wait\n+} from './Utils';\n \n /** Relative paths from the root of the repository. */\n enum ConfigPath {\n V05 = 'v0.5-branch/components/gcp-click-to-deploy/app-config.yaml',\n- V06 = 'v0.6-branch/bootstrap/config/kfctl_gcp_iap.0.6.yaml'\n+ V06 = 'master/bootstrap/config/kfctl_gcp_iap.yaml'", + "comment_created_at": "2019-08-08T14:17:35+00:00", + "comment_author": "prodonjs", + "comment_body": "Yes I agree. But [v0.6-branch/bootstrap/config/kfctl_gcp_iap.0.6.yaml](https://github.com/kubeflow/kubeflow/blob/v0.6-branch/bootstrap/config/kfctl_gcp_iap.0.6.yaml) doesn't have the `applications` section set. [master/bootstrap/config/kfctl_gcp_iap.yaml](https://github.com/kubeflow/kubeflow/blob/master/bootstrap/config/kfctl_gcp_iap.yaml) has `applications` but not `components`, `componentParams`, or `packages`.\r\n\r\nIf possible, we should check a base config file into the v0.6-branch that has the main structure that needs to be in the request body so that the UI doesn't have to build any new elements but simply replace parameter values. Alternatively, we could revert to the mechanism used in the current deployment UI where we provide the config file at deployment time, but pulling from GitHub definitely reduces the deployment complexity.", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/kubeflow-stable-configuration-management.md b/_reviewers/kubeflow-stable-configuration-management.md index 9026a96..5cee9b8 100644 --- a/_reviewers/kubeflow-stable-configuration-management.md +++ b/_reviewers/kubeflow-stable-configuration-management.md @@ -45,75 +45,3 @@ const versionList = [ Version.V06 ]; ``` - - -[ - { - "discussion_id": "350458713", - "pr_number": 4487, - "pr_file": "components/gcp-click-to-deploy/src/DeployForm.tsx", - "created_at": "2019-11-25T22:34:34+00:00", - "commented_code": "kfctlLib: false,\n kfversion: Version.V06,\n // Version for local test. Staging and Prod with overwrite with their env vars.\n kfversionList: [Version.V06, Version.V05],\n kfversionList: ['v0.7.0', Version.V06],", - "repo_full_name": "kubeflow/kubeflow", - "discussion_comments": [ - { - "comment_id": "350458713", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 4487, - "pr_file": "components/gcp-click-to-deploy/src/DeployForm.tsx", - "discussion_id": "350458713", - "commented_code": "@@ -221,7 +259,7 @@ export default class DeployForm extends React.Component {\n kfctlLib: false,\n kfversion: Version.V06,\n // Version for local test. Staging and Prod with overwrite with their env vars.\n- kfversionList: [Version.V06, Version.V05],\n+ kfversionList: ['v0.7.0', Version.V06],", - "comment_created_at": "2019-11-25T22:34:34+00:00", - "comment_author": "abhi-g", - "comment_body": "why not use the constant defined above for 0.7?", - "pr_file_module": null - }, - { - "comment_id": "350459520", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 4487, - "pr_file": "components/gcp-click-to-deploy/src/DeployForm.tsx", - "discussion_id": "350458713", - "commented_code": "@@ -221,7 +259,7 @@ export default class DeployForm extends React.Component {\n kfctlLib: false,\n kfversion: Version.V06,\n // Version for local test. Staging and Prod with overwrite with their env vars.\n- kfversionList: [Version.V06, Version.V05],\n+ kfversionList: ['v0.7.0', Version.V06],", - "comment_created_at": "2019-11-25T22:37:05+00:00", - "comment_author": "kunmingg", - "comment_body": "The version here `v0.7.0` is for local test, version will be over written by env vars.\r\nAbove was a prefix `v0.7` to match v0.7.0 and sub-releases to new backend.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "311760821", - "pr_number": 3599, - "pr_file": "components/gcp-click-to-deploy/src/DeployForm.tsx", - "created_at": "2019-08-07T21:00:18+00:00", - "commented_code": "import * as request from 'request';\n\nimport Gapi from './Gapi';\nimport {encryptPassword, flattenDeploymentOperationError, getFileContentsFromGitHub, log, wait} from './Utils';\nimport {\n encryptPassword, flattenDeploymentOperationError,\n getFileContentsFromGitHub, log, wait\n} from './Utils';\n\n/** Relative paths from the root of the repository. */\nenum ConfigPath {\n V05 = 'v0.5-branch/components/gcp-click-to-deploy/app-config.yaml',\n V06 = 'v0.6-branch/bootstrap/config/kfctl_gcp_iap.0.6.yaml'\n V06 = 'master/bootstrap/config/kfctl_gcp_iap.yaml'", - "repo_full_name": "kubeflow/kubeflow", - "discussion_comments": [ - { - "comment_id": "311760821", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 3599, - "pr_file": "components/gcp-click-to-deploy/src/DeployForm.tsx", - "discussion_id": "311760821", - "commented_code": "@@ -14,31 +14,33 @@ import * as React from 'react';\n import * as request from 'request';\n \n import Gapi from './Gapi';\n-import {encryptPassword, flattenDeploymentOperationError, getFileContentsFromGitHub, log, wait} from './Utils';\n+import {\n+ encryptPassword, flattenDeploymentOperationError,\n+ getFileContentsFromGitHub, log, wait\n+} from './Utils';\n \n /** Relative paths from the root of the repository. */\n enum ConfigPath {\n V05 = 'v0.5-branch/components/gcp-click-to-deploy/app-config.yaml',\n- V06 = 'v0.6-branch/bootstrap/config/kfctl_gcp_iap.0.6.yaml'\n+ V06 = 'master/bootstrap/config/kfctl_gcp_iap.yaml'", - "comment_created_at": "2019-08-07T21:00:18+00:00", - "comment_author": "kunmingg", - "comment_body": "`v0.6-branch` should be more stable compared to `master`\r\n`master` branch could be broken at any time.", - "pr_file_module": null - }, - { - "comment_id": "312060561", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 3599, - "pr_file": "components/gcp-click-to-deploy/src/DeployForm.tsx", - "discussion_id": "311760821", - "commented_code": "@@ -14,31 +14,33 @@ import * as React from 'react';\n import * as request from 'request';\n \n import Gapi from './Gapi';\n-import {encryptPassword, flattenDeploymentOperationError, getFileContentsFromGitHub, log, wait} from './Utils';\n+import {\n+ encryptPassword, flattenDeploymentOperationError,\n+ getFileContentsFromGitHub, log, wait\n+} from './Utils';\n \n /** Relative paths from the root of the repository. */\n enum ConfigPath {\n V05 = 'v0.5-branch/components/gcp-click-to-deploy/app-config.yaml',\n- V06 = 'v0.6-branch/bootstrap/config/kfctl_gcp_iap.0.6.yaml'\n+ V06 = 'master/bootstrap/config/kfctl_gcp_iap.yaml'", - "comment_created_at": "2019-08-08T14:17:35+00:00", - "comment_author": "prodonjs", - "comment_body": "Yes I agree. But [v0.6-branch/bootstrap/config/kfctl_gcp_iap.0.6.yaml](https://github.com/kubeflow/kubeflow/blob/v0.6-branch/bootstrap/config/kfctl_gcp_iap.0.6.yaml) doesn't have the `applications` section set. [master/bootstrap/config/kfctl_gcp_iap.yaml](https://github.com/kubeflow/kubeflow/blob/master/bootstrap/config/kfctl_gcp_iap.yaml) has `applications` but not `components`, `componentParams`, or `packages`.\r\n\r\nIf possible, we should check a base config file into the v0.6-branch that has the main structure that needs to be in the request body so that the UI doesn't have to build any new elements but simply replace parameter values. Alternatively, we could revert to the mechanism used in the current deployment UI where we provide the config file at deployment time, but pulling from GitHub definitely reduces the deployment complexity.", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/kubeflow-standardize-build-configurations.json b/_reviewers/kubeflow-standardize-build-configurations.json new file mode 100644 index 0000000..9b93c47 --- /dev/null +++ b/_reviewers/kubeflow-standardize-build-configurations.json @@ -0,0 +1,80 @@ +[ + { + "discussion_id": "559964118", + "pr_number": 5530, + "pr_file": "components/crud-web-apps/README.md", + "created_at": "2021-01-19T07:33:04+00:00", + "commented_code": "# Build instructions for docker images\nTo build the Jupyter and Tensorboards docker images directly (without the Makefile) for development purposes, the following commands can be used from this directory.", + "repo_full_name": "kubeflow/kubeflow", + "discussion_comments": [ + { + "comment_id": "559964118", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 5530, + "pr_file": "components/crud-web-apps/README.md", + "discussion_id": "559964118", + "commented_code": "@@ -0,0 +1,8 @@\n+# Build instructions for docker images\n+To build the Jupyter and Tensorboards docker images directly (without the Makefile) for development purposes, the following commands can be used from this directory.", + "comment_created_at": "2021-01-19T07:33:04+00:00", + "comment_author": "kimwnasptd", + "comment_body": "I don't like the phrasing of this sentence, as it implies that for development purposes we need to run the command directly and can't use the Makefifle.\r\n\r\nWe can use the Makefile to build Jupyter app's image by running\r\n```bash\r\nREGISTRY_PROJECT=my-repo make docker-build\r\n```\r\n\r\n[ Tensorboard's makefile uses slightly different vars but we should iron them out and have the same template when we setup the CI/CD ]", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "595821558", + "pr_number": 5694, + "pr_file": "components/example-notebook-servers/README.md", + "created_at": "2021-03-17T08:54:33+00:00", + "commented_code": "# Kubeflow Example Notebook Servers\n\n**These dockerfiles and the resulting images are provided as examples only. These images do not (yet) undergo automated testing and are only briefly tested manually. As such, these images are not guaranteed to contain no problems. If you do encounter a problem in one of these images, we greatly appreciate contributions to further improve these images.**", + "repo_full_name": "kubeflow/kubeflow", + "discussion_comments": [ + { + "comment_id": "595821558", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 5694, + "pr_file": "components/example-notebook-servers/README.md", + "discussion_id": "595821558", + "commented_code": "@@ -0,0 +1,48 @@\n+# Kubeflow Example Notebook Servers\n+\n+**These dockerfiles and the resulting images are provided as examples only. These images do not (yet) undergo automated testing and are only briefly tested manually. As such, these images are not guaranteed to contain no problems. If you do encounter a problem in one of these images, we greatly appreciate contributions to further improve these images.**", + "comment_created_at": "2021-03-17T08:54:33+00:00", + "comment_author": "StefanoFioravanzo", + "comment_body": "I would rephrase as follows:\r\n\r\n```\r\n**NOTE:** The Notebooks Working Group provides these Dockerfiles as examples only, \r\nthey are not part of our official CI/CD pipeline and are not guaranteed to work in every \r\ncase. As such, we are not providing timely SLA. The Notebooks WG will work on officially \r\nsupporting some of these Dockerfiles in the near future. Contributions and issue reports \r\nare greatly appreciated.\r\n```", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "325432133", + "pr_number": 3968, + "pr_file": "docs_dev/releasing.md", + "created_at": "2019-09-17T23:52:25+00:00", + "commented_code": "gcloud container clusters get-credentials kubeflow-releasing --zone us-central1-a --project kubeflow-releasing\n```\n\nUse [this script](https://github.com/jlewi/kubeflow-dev/blob/master/create_context.sh) to set up your context properly:\n\n```\ncreate_context.sh $(kubectl config current-context) kubeflow-releasing\n```\n## Authorization to Publish a Release\n\nNeed to join [release team](https://github.com/kubeflow/internal-acls/blob/1234654eb219e2c06ed5fdc2c4e662a02fccc740/github-orgs/kubeflow/org.yaml#L388) before you can publish a release.", + "repo_full_name": "kubeflow/kubeflow", + "discussion_comments": [ + { + "comment_id": "325432133", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 3968, + "pr_file": "docs_dev/releasing.md", + "discussion_id": "325432133", + "commented_code": "@@ -150,12 +151,9 @@ gcloud config set project kubeflow-releasing\n gcloud container clusters get-credentials kubeflow-releasing --zone us-central1-a --project kubeflow-releasing\n ```\n \n-Use [this script](https://github.com/jlewi/kubeflow-dev/blob/master/create_context.sh) to set up your context properly:\n-\n-```\n-create_context.sh $(kubectl config current-context) kubeflow-releasing\n-```\n+## Authorization to Publish a Release\n \n+Need to join [release team](https://github.com/kubeflow/internal-acls/blob/1234654eb219e2c06ed5fdc2c4e662a02fccc740/github-orgs/kubeflow/org.yaml#L388) before you can publish a release.\n ", + "comment_created_at": "2019-09-17T23:52:25+00:00", + "comment_author": "jlewi", + "comment_body": "How about adding a section \r\n\r\nGetting ready to cut a release \r\n\r\nand adding the following\r\n\r\n1. Create a P0 bug of kind process to track the major release.\r\n1. Announce the release and link to the issue in the #release channel in kubeflow.slack.com", + "pr_file_module": null + }, + { + "comment_id": "327372828", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 3968, + "pr_file": "docs_dev/releasing.md", + "discussion_id": "325432133", + "commented_code": "@@ -150,12 +151,9 @@ gcloud config set project kubeflow-releasing\n gcloud container clusters get-credentials kubeflow-releasing --zone us-central1-a --project kubeflow-releasing\n ```\n \n-Use [this script](https://github.com/jlewi/kubeflow-dev/blob/master/create_context.sh) to set up your context properly:\n-\n-```\n-create_context.sh $(kubectl config current-context) kubeflow-releasing\n-```\n+## Authorization to Publish a Release\n \n+Need to join [release team](https://github.com/kubeflow/internal-acls/blob/1234654eb219e2c06ed5fdc2c4e662a02fccc740/github-orgs/kubeflow/org.yaml#L388) before you can publish a release.\n ", + "comment_created_at": "2019-09-23T23:31:23+00:00", + "comment_author": "gabrielwen", + "comment_body": "done", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/kubeflow-standardize-build-configurations.md b/_reviewers/kubeflow-standardize-build-configurations.md index 90acb6a..cc1e451 100644 --- a/_reviewers/kubeflow-standardize-build-configurations.md +++ b/_reviewers/kubeflow-standardize-build-configurations.md @@ -23,85 +23,3 @@ Instead of having different variable names for the same concept in different com "Tensorboard's makefile uses slightly different vars but we should iron them out and have the same template" This standardization simplifies CI/CD pipeline maintenance, improves developer experience when working across components, and ensures that all artifacts follow the same build and testing patterns. It also makes it clearer which components are part of the official CI/CD pipeline and which are provided as examples only. - - -[ - { - "discussion_id": "559964118", - "pr_number": 5530, - "pr_file": "components/crud-web-apps/README.md", - "created_at": "2021-01-19T07:33:04+00:00", - "commented_code": "# Build instructions for docker images\nTo build the Jupyter and Tensorboards docker images directly (without the Makefile) for development purposes, the following commands can be used from this directory.", - "repo_full_name": "kubeflow/kubeflow", - "discussion_comments": [ - { - "comment_id": "559964118", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 5530, - "pr_file": "components/crud-web-apps/README.md", - "discussion_id": "559964118", - "commented_code": "@@ -0,0 +1,8 @@\n+# Build instructions for docker images\n+To build the Jupyter and Tensorboards docker images directly (without the Makefile) for development purposes, the following commands can be used from this directory.", - "comment_created_at": "2021-01-19T07:33:04+00:00", - "comment_author": "kimwnasptd", - "comment_body": "I don't like the phrasing of this sentence, as it implies that for development purposes we need to run the command directly and can't use the Makefifle.\r\n\r\nWe can use the Makefile to build Jupyter app's image by running\r\n```bash\r\nREGISTRY_PROJECT=my-repo make docker-build\r\n```\r\n\r\n[ Tensorboard's makefile uses slightly different vars but we should iron them out and have the same template when we setup the CI/CD ]", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "595821558", - "pr_number": 5694, - "pr_file": "components/example-notebook-servers/README.md", - "created_at": "2021-03-17T08:54:33+00:00", - "commented_code": "# Kubeflow Example Notebook Servers\n\n**These dockerfiles and the resulting images are provided as examples only. These images do not (yet) undergo automated testing and are only briefly tested manually. As such, these images are not guaranteed to contain no problems. If you do encounter a problem in one of these images, we greatly appreciate contributions to further improve these images.**", - "repo_full_name": "kubeflow/kubeflow", - "discussion_comments": [ - { - "comment_id": "595821558", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 5694, - "pr_file": "components/example-notebook-servers/README.md", - "discussion_id": "595821558", - "commented_code": "@@ -0,0 +1,48 @@\n+# Kubeflow Example Notebook Servers\n+\n+**These dockerfiles and the resulting images are provided as examples only. These images do not (yet) undergo automated testing and are only briefly tested manually. As such, these images are not guaranteed to contain no problems. If you do encounter a problem in one of these images, we greatly appreciate contributions to further improve these images.**", - "comment_created_at": "2021-03-17T08:54:33+00:00", - "comment_author": "StefanoFioravanzo", - "comment_body": "I would rephrase as follows:\r\n\r\n```\r\n**NOTE:** The Notebooks Working Group provides these Dockerfiles as examples only, \r\nthey are not part of our official CI/CD pipeline and are not guaranteed to work in every \r\ncase. As such, we are not providing timely SLA. The Notebooks WG will work on officially \r\nsupporting some of these Dockerfiles in the near future. Contributions and issue reports \r\nare greatly appreciated.\r\n```", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "325432133", - "pr_number": 3968, - "pr_file": "docs_dev/releasing.md", - "created_at": "2019-09-17T23:52:25+00:00", - "commented_code": "gcloud container clusters get-credentials kubeflow-releasing --zone us-central1-a --project kubeflow-releasing\n```\n\nUse [this script](https://github.com/jlewi/kubeflow-dev/blob/master/create_context.sh) to set up your context properly:\n\n```\ncreate_context.sh $(kubectl config current-context) kubeflow-releasing\n```\n## Authorization to Publish a Release\n\nNeed to join [release team](https://github.com/kubeflow/internal-acls/blob/1234654eb219e2c06ed5fdc2c4e662a02fccc740/github-orgs/kubeflow/org.yaml#L388) before you can publish a release.", - "repo_full_name": "kubeflow/kubeflow", - "discussion_comments": [ - { - "comment_id": "325432133", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 3968, - "pr_file": "docs_dev/releasing.md", - "discussion_id": "325432133", - "commented_code": "@@ -150,12 +151,9 @@ gcloud config set project kubeflow-releasing\n gcloud container clusters get-credentials kubeflow-releasing --zone us-central1-a --project kubeflow-releasing\n ```\n \n-Use [this script](https://github.com/jlewi/kubeflow-dev/blob/master/create_context.sh) to set up your context properly:\n-\n-```\n-create_context.sh $(kubectl config current-context) kubeflow-releasing\n-```\n+## Authorization to Publish a Release\n \n+Need to join [release team](https://github.com/kubeflow/internal-acls/blob/1234654eb219e2c06ed5fdc2c4e662a02fccc740/github-orgs/kubeflow/org.yaml#L388) before you can publish a release.\n ", - "comment_created_at": "2019-09-17T23:52:25+00:00", - "comment_author": "jlewi", - "comment_body": "How about adding a section \r\n\r\nGetting ready to cut a release \r\n\r\nand adding the following\r\n\r\n1. Create a P0 bug of kind process to track the major release.\r\n1. Announce the release and link to the issue in the #release channel in kubeflow.slack.com", - "pr_file_module": null - }, - { - "comment_id": "327372828", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 3968, - "pr_file": "docs_dev/releasing.md", - "discussion_id": "325432133", - "commented_code": "@@ -150,12 +151,9 @@ gcloud config set project kubeflow-releasing\n gcloud container clusters get-credentials kubeflow-releasing --zone us-central1-a --project kubeflow-releasing\n ```\n \n-Use [this script](https://github.com/jlewi/kubeflow-dev/blob/master/create_context.sh) to set up your context properly:\n-\n-```\n-create_context.sh $(kubectl config current-context) kubeflow-releasing\n-```\n+## Authorization to Publish a Release\n \n+Need to join [release team](https://github.com/kubeflow/internal-acls/blob/1234654eb219e2c06ed5fdc2c4e662a02fccc740/github-orgs/kubeflow/org.yaml#L388) before you can publish a release.\n ", - "comment_created_at": "2019-09-23T23:31:23+00:00", - "comment_author": "gabrielwen", - "comment_body": "done", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/kubeflow-standardize-makefile-patterns.json b/_reviewers/kubeflow-standardize-makefile-patterns.json new file mode 100644 index 0000000..9baafb0 --- /dev/null +++ b/_reviewers/kubeflow-standardize-makefile-patterns.json @@ -0,0 +1,172 @@ +[ + { + "discussion_id": "911899164", + "pr_number": 6555, + "pr_file": "components/access-management/Makefile", + "created_at": "2022-07-01T12:00:51+00:00", + "commented_code": "REGISTRY_PROJECT ?= kubeflow-dev\nGOLANG_VERSION ?= 1.12\nGCLOUD_PROJECT ?= kubeflow-images-public\nIMG ?= gcr.io/$(GCLOUD_PROJECT)/kfam\nPROJECT ?= kubeflow-dev\nTAG ?= $(eval TAG := $(shell date +v%Y%m%d)-$(shell git describe --tags --always --dirty)-$(shell git diff | shasum -a256 | cut -c -6))$(TAG)\nGO_SWAGGER_URL ?= https://github.com/go-swagger/go-swagger/releases/download/v0.21.0/swagger_linux_amd64\n\nCHANGED_FILES := $(shell git diff-files --relative=components/centraldashboard)\nGIT_VERSION := $(shell git describe --tags --always --dirty)\n\nifeq ($(strip $(CHANGED_FILES)),)\n# Changed files is empty; not dirty\n# Don't include --dirty because it could be dirty if files outside the ones we care\n# about changed.\nGIT_VERSION := $(shell git describe --tags --long)\nelse\nGIT_VERSION := $(shell git describe --tags --long)-dirty-$(shell git diff | shasum -a256 | cut -c -6)\nendif\nIMG ?= kfam\nTAG ?= $(GIT_VERSION)\n\nbuild:\ngenerate-go-client: bin/swagger\n\tbin/swagger generate client -f api/swagger.yaml -t api/go_client\n\nbin/swagger:\n\tmkdir -p bin\n\twget -O bin/swagger '$(GO_SWAGGER_URL)'\n\tchmod +x bin/swagger\n\ndocker-build:\n\tdocker build -t $(IMG):$(TAG) .\n\ndocker-push: \n\tdocker push $(IMG):$(TAG)\n\nimage: docker-build docker-push \n\nbuild-gcb:", + "repo_full_name": "kubeflow/kubeflow", + "discussion_comments": [ + { + "comment_id": "911899164", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 6555, + "pr_file": "components/access-management/Makefile", + "discussion_id": "911899164", + "commented_code": "@@ -1,30 +1,31 @@\n REGISTRY_PROJECT ?= kubeflow-dev\n-GOLANG_VERSION ?= 1.12\n-GCLOUD_PROJECT ?= kubeflow-images-public\n-IMG ?= gcr.io/$(GCLOUD_PROJECT)/kfam\n PROJECT ?= kubeflow-dev\n-TAG ?= $(eval TAG := $(shell date +v%Y%m%d)-$(shell git describe --tags --always --dirty)-$(shell git diff | shasum -a256 | cut -c -6))$(TAG)\n+GO_SWAGGER_URL ?= https://github.com/go-swagger/go-swagger/releases/download/v0.21.0/swagger_linux_amd64\n \n-CHANGED_FILES := $(shell git diff-files --relative=components/centraldashboard)\n+GIT_VERSION := $(shell git describe --tags --always --dirty)\n \n-ifeq ($(strip $(CHANGED_FILES)),)\n-# Changed files is empty; not dirty\n-# Don't include --dirty because it could be dirty if files outside the ones we care\n-# about changed.\n-GIT_VERSION := $(shell git describe --tags --long)\n-else\n-GIT_VERSION := $(shell git describe --tags --long)-dirty-$(shell git diff | shasum -a256 | cut -c -6)\n-endif\n+IMG ?= kfam\n+TAG ?= $(GIT_VERSION)\n \n-build:\n+generate-go-client: bin/swagger\n+\tbin/swagger generate client -f api/swagger.yaml -t api/go_client\n+\n+bin/swagger:\n+\tmkdir -p bin\n+\twget -O bin/swagger '$(GO_SWAGGER_URL)'\n+\tchmod +x bin/swagger\n+\n+docker-build:\n \tdocker build -t $(IMG):$(TAG) .\n \n+docker-push: \n+\tdocker push $(IMG):$(TAG)\n+\n+image: docker-build docker-push \n+\n build-gcb:", + "comment_created_at": "2022-07-01T12:00:51+00:00", + "comment_author": "kimwnasptd", + "comment_body": "Can you also remove this rule? We don't use `build-gcb` anywhere in the project.\r\n\r\nIt's a good chance to clean up the project", + "pr_file_module": null + }, + { + "comment_id": "912027678", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 6555, + "pr_file": "components/access-management/Makefile", + "discussion_id": "911899164", + "commented_code": "@@ -1,30 +1,31 @@\n REGISTRY_PROJECT ?= kubeflow-dev\n-GOLANG_VERSION ?= 1.12\n-GCLOUD_PROJECT ?= kubeflow-images-public\n-IMG ?= gcr.io/$(GCLOUD_PROJECT)/kfam\n PROJECT ?= kubeflow-dev\n-TAG ?= $(eval TAG := $(shell date +v%Y%m%d)-$(shell git describe --tags --always --dirty)-$(shell git diff | shasum -a256 | cut -c -6))$(TAG)\n+GO_SWAGGER_URL ?= https://github.com/go-swagger/go-swagger/releases/download/v0.21.0/swagger_linux_amd64\n \n-CHANGED_FILES := $(shell git diff-files --relative=components/centraldashboard)\n+GIT_VERSION := $(shell git describe --tags --always --dirty)\n \n-ifeq ($(strip $(CHANGED_FILES)),)\n-# Changed files is empty; not dirty\n-# Don't include --dirty because it could be dirty if files outside the ones we care\n-# about changed.\n-GIT_VERSION := $(shell git describe --tags --long)\n-else\n-GIT_VERSION := $(shell git describe --tags --long)-dirty-$(shell git diff | shasum -a256 | cut -c -6)\n-endif\n+IMG ?= kfam\n+TAG ?= $(GIT_VERSION)\n \n-build:\n+generate-go-client: bin/swagger\n+\tbin/swagger generate client -f api/swagger.yaml -t api/go_client\n+\n+bin/swagger:\n+\tmkdir -p bin\n+\twget -O bin/swagger '$(GO_SWAGGER_URL)'\n+\tchmod +x bin/swagger\n+\n+docker-build:\n \tdocker build -t $(IMG):$(TAG) .\n \n+docker-push: \n+\tdocker push $(IMG):$(TAG)\n+\n+image: docker-build docker-push \n+\n build-gcb:", + "comment_created_at": "2022-07-01T14:45:55+00:00", + "comment_author": "apo-ger", + "comment_body": "Done!", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1186255342", + "pr_number": 7123, + "pr_file": "conformance/1.7/Makefile", + "created_at": "2023-05-05T15:53:46+00:00", + "commented_code": "# Copyright 2022 The Kubeflow Authors.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n# http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\nTEST_PROFILE ?= kf-conformance-test\nKUBEFLOW_NAMESPACE ?= kubeflow\n\nsetup:", + "repo_full_name": "kubeflow/kubeflow", + "discussion_comments": [ + { + "comment_id": "1186255342", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 7123, + "pr_file": "conformance/1.7/Makefile", + "discussion_id": "1186255342", + "commented_code": "@@ -0,0 +1,59 @@\n+# Copyright 2022 The Kubeflow Authors.\n+#\n+# Licensed under the Apache License, Version 2.0 (the \"License\");\n+# you may not use this file except in compliance with the License.\n+# You may obtain a copy of the License at\n+#\n+# http://www.apache.org/licenses/LICENSE-2.0\n+#\n+# Unless required by applicable law or agreed to in writing, software\n+# distributed under the License is distributed on an \"AS IS\" BASIS,\n+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n+# See the License for the specific language governing permissions and\n+# limitations under the License.\n+\n+TEST_PROFILE ?= kf-conformance-test\n+KUBEFLOW_NAMESPACE ?= kubeflow\n+\n+setup:", + "comment_created_at": "2023-05-05T15:53:46+00:00", + "comment_author": "gkcalat", + "comment_body": "NIT: Replace here and below:\r\n```\r\n.PHONY: setup\r\nsetup:\r\n...\r\n```\r\n\r\nThis will prevent the `make ` from failing when there is a `` file inside the folder. For example, if someone removes the `.yaml` extension from `setup.yaml`, `make setup` won't behave as expected.", + "pr_file_module": null + }, + { + "comment_id": "1186508559", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 7123, + "pr_file": "conformance/1.7/Makefile", + "discussion_id": "1186255342", + "commented_code": "@@ -0,0 +1,59 @@\n+# Copyright 2022 The Kubeflow Authors.\n+#\n+# Licensed under the Apache License, Version 2.0 (the \"License\");\n+# you may not use this file except in compliance with the License.\n+# You may obtain a copy of the License at\n+#\n+# http://www.apache.org/licenses/LICENSE-2.0\n+#\n+# Unless required by applicable law or agreed to in writing, software\n+# distributed under the License is distributed on an \"AS IS\" BASIS,\n+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n+# See the License for the specific language governing permissions and\n+# limitations under the License.\n+\n+TEST_PROFILE ?= kf-conformance-test\n+KUBEFLOW_NAMESPACE ?= kubeflow\n+\n+setup:", + "comment_created_at": "2023-05-05T21:21:04+00:00", + "comment_author": "james-jwu", + "comment_body": "Done", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "897092814", + "pr_number": 6406, + "pr_file": "components/tensorboard-controller/Makefile", + "created_at": "2022-06-14T17:14:22+00:00", + "commented_code": "# Image URL to use all building/pushing image targets\nIMG ?= controller:latest", + "repo_full_name": "kubeflow/kubeflow", + "discussion_comments": [ + { + "comment_id": "897092814", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 6406, + "pr_file": "components/tensorboard-controller/Makefile", + "discussion_id": "897092814", + "commented_code": "@@ -1,8 +1,8 @@\n \n # Image URL to use all building/pushing image targets\n IMG ?= controller:latest", + "comment_created_at": "2022-06-14T17:14:22+00:00", + "comment_author": "kimwnasptd", + "comment_body": "Can you add a `TAG ?= $(shell git describe --tags --always)` here? This is the one we use in the web apps as well. Let's standardize on this one", + "pr_file_module": null + }, + { + "comment_id": "897805578", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 6406, + "pr_file": "components/tensorboard-controller/Makefile", + "discussion_id": "897092814", + "commented_code": "@@ -1,8 +1,8 @@\n \n # Image URL to use all building/pushing image targets\n IMG ?= controller:latest", + "comment_created_at": "2022-06-15T10:23:02+00:00", + "comment_author": "alembiewski", + "comment_body": "Addressed", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "839946589", + "pr_number": 6374, + "pr_file": "components/notebook-controller/Makefile", + "created_at": "2022-03-31T19:23:29+00:00", + "commented_code": "GOBIN=$(shell go env GOBIN)\nendif\n\n# Setting SHELL to bash allows bash commands to be executed by recipes.\n# This is a requirement for 'setup-envtest.sh' in the test target.\n# Options are set to exit when a recipe line exits non-zero or a piped command fails.\nSHELL = /usr/bin/env bash -o pipefail\n.SHELLFLAGS = -ec\n\n.PHONY: all\nall: manager\n\n# check license\ncheck-license:\n\t./third_party/check-license.sh\n##@ General\n\n# Run tests\ntest: generate fmt vet manifests\n\tmkdir -p ${ENVTEST_ASSETS_DIR}\n\ttest -f ${ENVTEST_ASSETS_DIR}/setup-envtest.sh || curl -sSLo ${ENVTEST_ASSETS_DIR}/setup-envtest.sh https://raw.githubusercontent.com/kubernetes-sigs/controller-runtime/a9bd9117a77a2f84bbc546e28991136fe0000dc0/hack/setup-envtest.sh\n\tsource ${ENVTEST_ASSETS_DIR}/setup-envtest.sh; fetch_envtest_tools $(ENVTEST_ASSETS_DIR); setup_envtest_env $(ENVTEST_ASSETS_DIR);\n\tgo test ./api/... ./controllers/... -coverprofile cover.out\n# The help target prints out all targets with their descriptions organized\n# beneath their categories. The categories are represented by '##@' and the\n# target descriptions by '##'. The awk commands is responsible for reading the\n# entire set of makefiles included in this invocation, looking for lines of the\n# file as xyz: ## something, and then pretty-format the target and help. Then,\n# if there's a line with ##@ something, that gets pretty-printed as a category.\n# More info on the usage of ANSI control characters for terminal formatting:\n# https://en.wikipedia.org/wiki/ANSI_escape_code#SGR_parameters\n# More info on the awk command:\n# http://linuxcommand.org/lc3_adv_awk.php\n\n# Build manager binary\nmanager: generate fmt vet\n\tgo build -o bin/manager main.go\n.PHONY: help\nhelp: ## Display this help.\n\t@awk 'BEGIN {FS = \":.*##\"; printf \"\\nUsage:\\n make \\033[36m\\033[0m\\n\"} /^[a-zA-Z_0-9-]+:.*?##/ { printf \" \\033[36m%-15s\\033[0m %s\\n\", $$1, $$2 } /^##@/ { printf \"\\n\\033[1m%s\\033[0m\\n\", substr($$0, 5) } ' $(MAKEFILE_LIST)\n\n# Run against the configured Kubernetes cluster in ~/.kube/config\nrun: generate fmt vet\n\tgo run ./main.go\n##@ Development\n\n# Install CRDs into a cluster\ninstall: manifests\n\tkubectl apply -f config/crd/bases\n.PHONY: check-license\ncheck-license: ## Check third-party license\n\t./third_party/check-license.sh\n\n# Deploy controller in the configured Kubernetes cluster in ~/.kube/config\ndeploy: manifests\n\tkubectl apply -f config/crd/bases\n\tkustomize build config/default | kubectl apply -f -\n.PHONY: manifests\nmanifests: controller-gen ## Generate WebhookConfiguration, ClusterRole and CustomResourceDefinition objects.\n\t$(CONTROLLER_GEN) rbac:roleName=role crd webhook paths=\"./...\" output:crd:artifacts:config=config/crd/bases\n\n# Generate manifests e.g. CRD, RBAC etc.\nmanifests: controller-gen\n\t$(CONTROLLER_GEN) $(CRD_OPTIONS) rbac:roleName=role webhook paths=\"./...\" output:crd:artifacts:config=config/crd/bases\n.PHONY: generate\ngenerate: controller-gen ## Generate code containing DeepCopy, DeepCopyInto, and DeepCopyObject method implementations.\n\t$(CONTROLLER_GEN) object:headerFile=\"hack/boilerplate.go.txt\" paths=\"./...\"\n\n# Run go fmt against code\nfmt:\n.PHONY: fmt\nfmt: ## Run go fmt against code.\n\tgo fmt ./...\n\n# Run go vet against code\nvet:\n.PHONY: vet\nvet: ## Run go vet against code.\n\tgo vet ./...\n\n# Generate code\ngenerate: controller-gen\n\t$(CONTROLLER_GEN) object:headerFile=./hack/boilerplate.go.txt paths=./api/...\n.PHONY: test\ntest: manifests generate fmt vet envtest ## Run tests.\n\tKUBEBUILDER_ASSETS=\"$(shell $(ENVTEST) use $(ENVTEST_K8S_VERSION) -p path)\" go test ./... -coverprofile cover.out\n\n##@ Build\n\n# Build the docker image\ndocker-build: test\n.PHONY: manager\nmanager: generate fmt vet ## Build manager binary.\n\tgo build -o bin/manager main.go\n\n.PHONY: run\nrun: manifests generate fmt vet ## Run a controller from your host.\n\tgo run ./main.go\n\n.PHONY: docker-build\ndocker-build: test ## Build docker image with the manager.\n\tcd .. && docker build . -t ${IMG}:${TAG} -f ./notebook-controller/Dockerfile\n\t@echo \"updating kustomize image patch file for manager resource\"\n\tsed -i'' -e 's@image: .*@image: '\"${IMG}:${TAG}\"'@' ./config/default/manager_image_patch.yaml\n\n# Push the docker image\ndocker-push:\n.PHONY: docker-push\ndocker-push: ## Push docker image with the manager.", + "repo_full_name": "kubeflow/kubeflow", + "discussion_comments": [ + { + "comment_id": "839946589", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 6374, + "pr_file": "components/notebook-controller/Makefile", + "discussion_id": "839946589", + "commented_code": "@@ -21,77 +13,125 @@ else\n GOBIN=$(shell go env GOBIN)\n endif\n \n+# Setting SHELL to bash allows bash commands to be executed by recipes.\n+# This is a requirement for 'setup-envtest.sh' in the test target.\n+# Options are set to exit when a recipe line exits non-zero or a piped command fails.\n+SHELL = /usr/bin/env bash -o pipefail\n+.SHELLFLAGS = -ec\n+\n+.PHONY: all\n all: manager\n \n-# check license\n-check-license:\n-\t./third_party/check-license.sh\n+##@ General\n \n-# Run tests\n-test: generate fmt vet manifests\n-\tmkdir -p ${ENVTEST_ASSETS_DIR}\n-\ttest -f ${ENVTEST_ASSETS_DIR}/setup-envtest.sh || curl -sSLo ${ENVTEST_ASSETS_DIR}/setup-envtest.sh https://raw.githubusercontent.com/kubernetes-sigs/controller-runtime/a9bd9117a77a2f84bbc546e28991136fe0000dc0/hack/setup-envtest.sh\n-\tsource ${ENVTEST_ASSETS_DIR}/setup-envtest.sh; fetch_envtest_tools $(ENVTEST_ASSETS_DIR); setup_envtest_env $(ENVTEST_ASSETS_DIR);\n-\tgo test ./api/... ./controllers/... -coverprofile cover.out\n+# The help target prints out all targets with their descriptions organized\n+# beneath their categories. The categories are represented by '##@' and the\n+# target descriptions by '##'. The awk commands is responsible for reading the\n+# entire set of makefiles included in this invocation, looking for lines of the\n+# file as xyz: ## something, and then pretty-format the target and help. Then,\n+# if there's a line with ##@ something, that gets pretty-printed as a category.\n+# More info on the usage of ANSI control characters for terminal formatting:\n+# https://en.wikipedia.org/wiki/ANSI_escape_code#SGR_parameters\n+# More info on the awk command:\n+# http://linuxcommand.org/lc3_adv_awk.php\n \n-# Build manager binary\n-manager: generate fmt vet\n-\tgo build -o bin/manager main.go\n+.PHONY: help\n+help: ## Display this help.\n+\t@awk 'BEGIN {FS = \":.*##\"; printf \"\\nUsage:\\n make \\033[36m\\033[0m\\n\"} /^[a-zA-Z_0-9-]+:.*?##/ { printf \" \\033[36m%-15s\\033[0m %s\\n\", $$1, $$2 } /^##@/ { printf \"\\n\\033[1m%s\\033[0m\\n\", substr($$0, 5) } ' $(MAKEFILE_LIST)\n \n-# Run against the configured Kubernetes cluster in ~/.kube/config\n-run: generate fmt vet\n-\tgo run ./main.go\n+##@ Development\n \n-# Install CRDs into a cluster\n-install: manifests\n-\tkubectl apply -f config/crd/bases\n+.PHONY: check-license\n+check-license: ## Check third-party license\n+\t./third_party/check-license.sh\n \n-# Deploy controller in the configured Kubernetes cluster in ~/.kube/config\n-deploy: manifests\n-\tkubectl apply -f config/crd/bases\n-\tkustomize build config/default | kubectl apply -f -\n+.PHONY: manifests\n+manifests: controller-gen ## Generate WebhookConfiguration, ClusterRole and CustomResourceDefinition objects.\n+\t$(CONTROLLER_GEN) rbac:roleName=role crd webhook paths=\"./...\" output:crd:artifacts:config=config/crd/bases\n \n-# Generate manifests e.g. CRD, RBAC etc.\n-manifests: controller-gen\n-\t$(CONTROLLER_GEN) $(CRD_OPTIONS) rbac:roleName=role webhook paths=\"./...\" output:crd:artifacts:config=config/crd/bases\n+.PHONY: generate\n+generate: controller-gen ## Generate code containing DeepCopy, DeepCopyInto, and DeepCopyObject method implementations.\n+\t$(CONTROLLER_GEN) object:headerFile=\"hack/boilerplate.go.txt\" paths=\"./...\"\n \n-# Run go fmt against code\n-fmt:\n+.PHONY: fmt\n+fmt: ## Run go fmt against code.\n \tgo fmt ./...\n \n-# Run go vet against code\n-vet:\n+.PHONY: vet\n+vet: ## Run go vet against code.\n \tgo vet ./...\n \n-# Generate code\n-generate: controller-gen\n-\t$(CONTROLLER_GEN) object:headerFile=./hack/boilerplate.go.txt paths=./api/...\n+.PHONY: test\n+test: manifests generate fmt vet envtest ## Run tests.\n+\tKUBEBUILDER_ASSETS=\"$(shell $(ENVTEST) use $(ENVTEST_K8S_VERSION) -p path)\" go test ./... -coverprofile cover.out\n+\n+##@ Build\n \n-# Build the docker image\n-docker-build: test\n+.PHONY: manager\n+manager: generate fmt vet ## Build manager binary.\n+\tgo build -o bin/manager main.go\n+\n+.PHONY: run\n+run: manifests generate fmt vet ## Run a controller from your host.\n+\tgo run ./main.go\n+\n+.PHONY: docker-build\n+docker-build: test ## Build docker image with the manager.\n \tcd .. && docker build . -t ${IMG}:${TAG} -f ./notebook-controller/Dockerfile\n-\t@echo \"updating kustomize image patch file for manager resource\"\n-\tsed -i'' -e 's@image: .*@image: '\"${IMG}:${TAG}\"'@' ./config/default/manager_image_patch.yaml\n \n-# Push the docker image\n-docker-push:\n+.PHONY: docker-push\n+docker-push: ## Push docker image with the manager.", + "comment_created_at": "2022-03-31T19:23:29+00:00", + "comment_author": "kimwnasptd", + "comment_body": "nit: Could you also create an `image` rule, which builds and pushes the image?\r\n\r\nWe have this for the web apps as well and would be nice to keep this convention across all components\r\nhttps://github.com/kubeflow/kubeflow/blob/master/components/crud-web-apps/jupyter/Makefile#L11", + "pr_file_module": null + }, + { + "comment_id": "840447074", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 6374, + "pr_file": "components/notebook-controller/Makefile", + "discussion_id": "839946589", + "commented_code": "@@ -21,77 +13,125 @@ else\n GOBIN=$(shell go env GOBIN)\n endif\n \n+# Setting SHELL to bash allows bash commands to be executed by recipes.\n+# This is a requirement for 'setup-envtest.sh' in the test target.\n+# Options are set to exit when a recipe line exits non-zero or a piped command fails.\n+SHELL = /usr/bin/env bash -o pipefail\n+.SHELLFLAGS = -ec\n+\n+.PHONY: all\n all: manager\n \n-# check license\n-check-license:\n-\t./third_party/check-license.sh\n+##@ General\n \n-# Run tests\n-test: generate fmt vet manifests\n-\tmkdir -p ${ENVTEST_ASSETS_DIR}\n-\ttest -f ${ENVTEST_ASSETS_DIR}/setup-envtest.sh || curl -sSLo ${ENVTEST_ASSETS_DIR}/setup-envtest.sh https://raw.githubusercontent.com/kubernetes-sigs/controller-runtime/a9bd9117a77a2f84bbc546e28991136fe0000dc0/hack/setup-envtest.sh\n-\tsource ${ENVTEST_ASSETS_DIR}/setup-envtest.sh; fetch_envtest_tools $(ENVTEST_ASSETS_DIR); setup_envtest_env $(ENVTEST_ASSETS_DIR);\n-\tgo test ./api/... ./controllers/... -coverprofile cover.out\n+# The help target prints out all targets with their descriptions organized\n+# beneath their categories. The categories are represented by '##@' and the\n+# target descriptions by '##'. The awk commands is responsible for reading the\n+# entire set of makefiles included in this invocation, looking for lines of the\n+# file as xyz: ## something, and then pretty-format the target and help. Then,\n+# if there's a line with ##@ something, that gets pretty-printed as a category.\n+# More info on the usage of ANSI control characters for terminal formatting:\n+# https://en.wikipedia.org/wiki/ANSI_escape_code#SGR_parameters\n+# More info on the awk command:\n+# http://linuxcommand.org/lc3_adv_awk.php\n \n-# Build manager binary\n-manager: generate fmt vet\n-\tgo build -o bin/manager main.go\n+.PHONY: help\n+help: ## Display this help.\n+\t@awk 'BEGIN {FS = \":.*##\"; printf \"\\nUsage:\\n make \\033[36m\\033[0m\\n\"} /^[a-zA-Z_0-9-]+:.*?##/ { printf \" \\033[36m%-15s\\033[0m %s\\n\", $$1, $$2 } /^##@/ { printf \"\\n\\033[1m%s\\033[0m\\n\", substr($$0, 5) } ' $(MAKEFILE_LIST)\n \n-# Run against the configured Kubernetes cluster in ~/.kube/config\n-run: generate fmt vet\n-\tgo run ./main.go\n+##@ Development\n \n-# Install CRDs into a cluster\n-install: manifests\n-\tkubectl apply -f config/crd/bases\n+.PHONY: check-license\n+check-license: ## Check third-party license\n+\t./third_party/check-license.sh\n \n-# Deploy controller in the configured Kubernetes cluster in ~/.kube/config\n-deploy: manifests\n-\tkubectl apply -f config/crd/bases\n-\tkustomize build config/default | kubectl apply -f -\n+.PHONY: manifests\n+manifests: controller-gen ## Generate WebhookConfiguration, ClusterRole and CustomResourceDefinition objects.\n+\t$(CONTROLLER_GEN) rbac:roleName=role crd webhook paths=\"./...\" output:crd:artifacts:config=config/crd/bases\n \n-# Generate manifests e.g. CRD, RBAC etc.\n-manifests: controller-gen\n-\t$(CONTROLLER_GEN) $(CRD_OPTIONS) rbac:roleName=role webhook paths=\"./...\" output:crd:artifacts:config=config/crd/bases\n+.PHONY: generate\n+generate: controller-gen ## Generate code containing DeepCopy, DeepCopyInto, and DeepCopyObject method implementations.\n+\t$(CONTROLLER_GEN) object:headerFile=\"hack/boilerplate.go.txt\" paths=\"./...\"\n \n-# Run go fmt against code\n-fmt:\n+.PHONY: fmt\n+fmt: ## Run go fmt against code.\n \tgo fmt ./...\n \n-# Run go vet against code\n-vet:\n+.PHONY: vet\n+vet: ## Run go vet against code.\n \tgo vet ./...\n \n-# Generate code\n-generate: controller-gen\n-\t$(CONTROLLER_GEN) object:headerFile=./hack/boilerplate.go.txt paths=./api/...\n+.PHONY: test\n+test: manifests generate fmt vet envtest ## Run tests.\n+\tKUBEBUILDER_ASSETS=\"$(shell $(ENVTEST) use $(ENVTEST_K8S_VERSION) -p path)\" go test ./... -coverprofile cover.out\n+\n+##@ Build\n \n-# Build the docker image\n-docker-build: test\n+.PHONY: manager\n+manager: generate fmt vet ## Build manager binary.\n+\tgo build -o bin/manager main.go\n+\n+.PHONY: run\n+run: manifests generate fmt vet ## Run a controller from your host.\n+\tgo run ./main.go\n+\n+.PHONY: docker-build\n+docker-build: test ## Build docker image with the manager.\n \tcd .. && docker build . -t ${IMG}:${TAG} -f ./notebook-controller/Dockerfile\n-\t@echo \"updating kustomize image patch file for manager resource\"\n-\tsed -i'' -e 's@image: .*@image: '\"${IMG}:${TAG}\"'@' ./config/default/manager_image_patch.yaml\n \n-# Push the docker image\n-docker-push:\n+.PHONY: docker-push\n+docker-push: ## Push docker image with the manager.", + "comment_created_at": "2022-04-01T10:26:53+00:00", + "comment_author": "samuelvl", + "comment_body": "Done!", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "613160932", + "pr_number": 5820, + "pr_file": "components/crud-web-apps/volumes/Makefile", + "created_at": "2021-04-14T11:23:03+00:00", + "commented_code": "IMG ?= gcr.io/arrikto-playground/kubeflow/volumes-web-app\nTAG ?= $(shell git describe --tags)\nDOCKERFILE=volumes/Dockerfile\nIMG ?= public.ecr.aws/j1r0q0g6/notebooks/volumes-web-app\nTAG ?= $(shell git describe --tags --always)\nDOCKERFILE ?= volumes/Dockerfile\n\ndocker-build:\n\tcp Dockerfile.dockerignore ../.dockerignore\n\t-cd ../ && docker build -t ${IMG}:${TAG} -f ${DOCKERFILE} .\n\trm ../.dockerignore\n\tcd ../ && docker build -t ${IMG}:${TAG} -f ${DOCKERFILE} .", + "repo_full_name": "kubeflow/kubeflow", + "discussion_comments": [ + { + "comment_id": "613160932", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 5820, + "pr_file": "components/crud-web-apps/volumes/Makefile", + "discussion_id": "613160932", + "commented_code": "@@ -1,11 +1,9 @@\n-IMG ?= gcr.io/arrikto-playground/kubeflow/volumes-web-app\n-TAG ?= $(shell git describe --tags)\n-DOCKERFILE=volumes/Dockerfile\n+IMG ?= public.ecr.aws/j1r0q0g6/notebooks/volumes-web-app\n+TAG ?= $(shell git describe --tags --always)\n+DOCKERFILE ?= volumes/Dockerfile\n \n docker-build:\n-\tcp Dockerfile.dockerignore ../.dockerignore\n-\t-cd ../ && docker build -t ${IMG}:${TAG} -f ${DOCKERFILE} .\n-\trm ../.dockerignore\n+\tcd ../ && docker build -t ${IMG}:${TAG} -f ${DOCKERFILE} .", + "comment_created_at": "2021-04-14T11:23:03+00:00", + "comment_author": "elikatsis", + "comment_body": "This part is a bit weird. I mean mentioning paths as if we are in a different directory because we will be using them in a different context (after `cd`ing).\r\n\r\nMay I suggest we\r\n1. Drop the `DOCKERFILE` variable (I think we shouldn't be messing with it) OR make it relative to this directory\r\n I.e.,\r\n ```\r\n DOCKERFILE=Dockerfile\r\n ```\r\n2. Drop the `cd`ing part from the Makefile target and use the parent directory as the path for the Docker building context (https://docs.docker.com/engine/reference/commandline/build/#build-with-path)\r\n This will make the Makefile target as follows\r\n ```Makefile\r\n docker-build:\r\n docker build -t ${IMG}:${TAG} -f Dockerfile .. # Or \"-f ${DOCKERFILE}\" depending on the decision of (1)\r\n ```\r\n\r\nI understand the reason we are `cd`ing to the parent directory is to use a single `.dockerignore` file for all the web apps.\r\nI tested locally that we maintain this behavior with the change I proposed, so there should be no problem. Let's also have others verify this as well (you, the CI, other?).\r\n\r\nThen, we can follow up changing this for the other web apps as well (this is not a blocker for the release of course).", + "pr_file_module": null + }, + { + "comment_id": "633478595", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 5820, + "pr_file": "components/crud-web-apps/volumes/Makefile", + "discussion_id": "613160932", + "commented_code": "@@ -1,11 +1,9 @@\n-IMG ?= gcr.io/arrikto-playground/kubeflow/volumes-web-app\n-TAG ?= $(shell git describe --tags)\n-DOCKERFILE=volumes/Dockerfile\n+IMG ?= public.ecr.aws/j1r0q0g6/notebooks/volumes-web-app\n+TAG ?= $(shell git describe --tags --always)\n+DOCKERFILE ?= volumes/Dockerfile\n \n docker-build:\n-\tcp Dockerfile.dockerignore ../.dockerignore\n-\t-cd ../ && docker build -t ${IMG}:${TAG} -f ${DOCKERFILE} .\n-\trm ../.dockerignore\n+\tcd ../ && docker build -t ${IMG}:${TAG} -f ${DOCKERFILE} .", + "comment_created_at": "2021-05-17T12:16:43+00:00", + "comment_author": "kimwnasptd", + "comment_body": "@elikatsis ACK, this is a very good suggestion!\r\n\r\nI'll add a commit for Volumes web app in this PR, since it's fixing the formatting for this component. I'll then open up a new issue to keep track of the uniformity in the dockerfiles for the other web apps as well.", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/kubeflow-standardize-makefile-patterns.md b/_reviewers/kubeflow-standardize-makefile-patterns.md index dcb0d0b..58329c0 100644 --- a/_reviewers/kubeflow-standardize-makefile-patterns.md +++ b/_reviewers/kubeflow-standardize-makefile-patterns.md @@ -49,177 +49,3 @@ Key practices to follow: 5. Remove unused or deprecated build rules to keep Makefiles maintainable. These standardized patterns make it easier for developers to work across different components, improve CI/CD pipeline reliability, and reduce confusion when building and deploying components. - - -[ - { - "discussion_id": "911899164", - "pr_number": 6555, - "pr_file": "components/access-management/Makefile", - "created_at": "2022-07-01T12:00:51+00:00", - "commented_code": "REGISTRY_PROJECT ?= kubeflow-dev\nGOLANG_VERSION ?= 1.12\nGCLOUD_PROJECT ?= kubeflow-images-public\nIMG ?= gcr.io/$(GCLOUD_PROJECT)/kfam\nPROJECT ?= kubeflow-dev\nTAG ?= $(eval TAG := $(shell date +v%Y%m%d)-$(shell git describe --tags --always --dirty)-$(shell git diff | shasum -a256 | cut -c -6))$(TAG)\nGO_SWAGGER_URL ?= https://github.com/go-swagger/go-swagger/releases/download/v0.21.0/swagger_linux_amd64\n\nCHANGED_FILES := $(shell git diff-files --relative=components/centraldashboard)\nGIT_VERSION := $(shell git describe --tags --always --dirty)\n\nifeq ($(strip $(CHANGED_FILES)),)\n# Changed files is empty; not dirty\n# Don't include --dirty because it could be dirty if files outside the ones we care\n# about changed.\nGIT_VERSION := $(shell git describe --tags --long)\nelse\nGIT_VERSION := $(shell git describe --tags --long)-dirty-$(shell git diff | shasum -a256 | cut -c -6)\nendif\nIMG ?= kfam\nTAG ?= $(GIT_VERSION)\n\nbuild:\ngenerate-go-client: bin/swagger\n\tbin/swagger generate client -f api/swagger.yaml -t api/go_client\n\nbin/swagger:\n\tmkdir -p bin\n\twget -O bin/swagger '$(GO_SWAGGER_URL)'\n\tchmod +x bin/swagger\n\ndocker-build:\n\tdocker build -t $(IMG):$(TAG) .\n\ndocker-push: \n\tdocker push $(IMG):$(TAG)\n\nimage: docker-build docker-push \n\nbuild-gcb:", - "repo_full_name": "kubeflow/kubeflow", - "discussion_comments": [ - { - "comment_id": "911899164", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 6555, - "pr_file": "components/access-management/Makefile", - "discussion_id": "911899164", - "commented_code": "@@ -1,30 +1,31 @@\n REGISTRY_PROJECT ?= kubeflow-dev\n-GOLANG_VERSION ?= 1.12\n-GCLOUD_PROJECT ?= kubeflow-images-public\n-IMG ?= gcr.io/$(GCLOUD_PROJECT)/kfam\n PROJECT ?= kubeflow-dev\n-TAG ?= $(eval TAG := $(shell date +v%Y%m%d)-$(shell git describe --tags --always --dirty)-$(shell git diff | shasum -a256 | cut -c -6))$(TAG)\n+GO_SWAGGER_URL ?= https://github.com/go-swagger/go-swagger/releases/download/v0.21.0/swagger_linux_amd64\n \n-CHANGED_FILES := $(shell git diff-files --relative=components/centraldashboard)\n+GIT_VERSION := $(shell git describe --tags --always --dirty)\n \n-ifeq ($(strip $(CHANGED_FILES)),)\n-# Changed files is empty; not dirty\n-# Don't include --dirty because it could be dirty if files outside the ones we care\n-# about changed.\n-GIT_VERSION := $(shell git describe --tags --long)\n-else\n-GIT_VERSION := $(shell git describe --tags --long)-dirty-$(shell git diff | shasum -a256 | cut -c -6)\n-endif\n+IMG ?= kfam\n+TAG ?= $(GIT_VERSION)\n \n-build:\n+generate-go-client: bin/swagger\n+\tbin/swagger generate client -f api/swagger.yaml -t api/go_client\n+\n+bin/swagger:\n+\tmkdir -p bin\n+\twget -O bin/swagger '$(GO_SWAGGER_URL)'\n+\tchmod +x bin/swagger\n+\n+docker-build:\n \tdocker build -t $(IMG):$(TAG) .\n \n+docker-push: \n+\tdocker push $(IMG):$(TAG)\n+\n+image: docker-build docker-push \n+\n build-gcb:", - "comment_created_at": "2022-07-01T12:00:51+00:00", - "comment_author": "kimwnasptd", - "comment_body": "Can you also remove this rule? We don't use `build-gcb` anywhere in the project.\r\n\r\nIt's a good chance to clean up the project", - "pr_file_module": null - }, - { - "comment_id": "912027678", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 6555, - "pr_file": "components/access-management/Makefile", - "discussion_id": "911899164", - "commented_code": "@@ -1,30 +1,31 @@\n REGISTRY_PROJECT ?= kubeflow-dev\n-GOLANG_VERSION ?= 1.12\n-GCLOUD_PROJECT ?= kubeflow-images-public\n-IMG ?= gcr.io/$(GCLOUD_PROJECT)/kfam\n PROJECT ?= kubeflow-dev\n-TAG ?= $(eval TAG := $(shell date +v%Y%m%d)-$(shell git describe --tags --always --dirty)-$(shell git diff | shasum -a256 | cut -c -6))$(TAG)\n+GO_SWAGGER_URL ?= https://github.com/go-swagger/go-swagger/releases/download/v0.21.0/swagger_linux_amd64\n \n-CHANGED_FILES := $(shell git diff-files --relative=components/centraldashboard)\n+GIT_VERSION := $(shell git describe --tags --always --dirty)\n \n-ifeq ($(strip $(CHANGED_FILES)),)\n-# Changed files is empty; not dirty\n-# Don't include --dirty because it could be dirty if files outside the ones we care\n-# about changed.\n-GIT_VERSION := $(shell git describe --tags --long)\n-else\n-GIT_VERSION := $(shell git describe --tags --long)-dirty-$(shell git diff | shasum -a256 | cut -c -6)\n-endif\n+IMG ?= kfam\n+TAG ?= $(GIT_VERSION)\n \n-build:\n+generate-go-client: bin/swagger\n+\tbin/swagger generate client -f api/swagger.yaml -t api/go_client\n+\n+bin/swagger:\n+\tmkdir -p bin\n+\twget -O bin/swagger '$(GO_SWAGGER_URL)'\n+\tchmod +x bin/swagger\n+\n+docker-build:\n \tdocker build -t $(IMG):$(TAG) .\n \n+docker-push: \n+\tdocker push $(IMG):$(TAG)\n+\n+image: docker-build docker-push \n+\n build-gcb:", - "comment_created_at": "2022-07-01T14:45:55+00:00", - "comment_author": "apo-ger", - "comment_body": "Done!", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1186255342", - "pr_number": 7123, - "pr_file": "conformance/1.7/Makefile", - "created_at": "2023-05-05T15:53:46+00:00", - "commented_code": "# Copyright 2022 The Kubeflow Authors.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n# http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\nTEST_PROFILE ?= kf-conformance-test\nKUBEFLOW_NAMESPACE ?= kubeflow\n\nsetup:", - "repo_full_name": "kubeflow/kubeflow", - "discussion_comments": [ - { - "comment_id": "1186255342", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 7123, - "pr_file": "conformance/1.7/Makefile", - "discussion_id": "1186255342", - "commented_code": "@@ -0,0 +1,59 @@\n+# Copyright 2022 The Kubeflow Authors.\n+#\n+# Licensed under the Apache License, Version 2.0 (the \"License\");\n+# you may not use this file except in compliance with the License.\n+# You may obtain a copy of the License at\n+#\n+# http://www.apache.org/licenses/LICENSE-2.0\n+#\n+# Unless required by applicable law or agreed to in writing, software\n+# distributed under the License is distributed on an \"AS IS\" BASIS,\n+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n+# See the License for the specific language governing permissions and\n+# limitations under the License.\n+\n+TEST_PROFILE ?= kf-conformance-test\n+KUBEFLOW_NAMESPACE ?= kubeflow\n+\n+setup:", - "comment_created_at": "2023-05-05T15:53:46+00:00", - "comment_author": "gkcalat", - "comment_body": "NIT: Replace here and below:\r\n```\r\n.PHONY: setup\r\nsetup:\r\n...\r\n```\r\n\r\nThis will prevent the `make ` from failing when there is a `` file inside the folder. For example, if someone removes the `.yaml` extension from `setup.yaml`, `make setup` won't behave as expected.", - "pr_file_module": null - }, - { - "comment_id": "1186508559", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 7123, - "pr_file": "conformance/1.7/Makefile", - "discussion_id": "1186255342", - "commented_code": "@@ -0,0 +1,59 @@\n+# Copyright 2022 The Kubeflow Authors.\n+#\n+# Licensed under the Apache License, Version 2.0 (the \"License\");\n+# you may not use this file except in compliance with the License.\n+# You may obtain a copy of the License at\n+#\n+# http://www.apache.org/licenses/LICENSE-2.0\n+#\n+# Unless required by applicable law or agreed to in writing, software\n+# distributed under the License is distributed on an \"AS IS\" BASIS,\n+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n+# See the License for the specific language governing permissions and\n+# limitations under the License.\n+\n+TEST_PROFILE ?= kf-conformance-test\n+KUBEFLOW_NAMESPACE ?= kubeflow\n+\n+setup:", - "comment_created_at": "2023-05-05T21:21:04+00:00", - "comment_author": "james-jwu", - "comment_body": "Done", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "897092814", - "pr_number": 6406, - "pr_file": "components/tensorboard-controller/Makefile", - "created_at": "2022-06-14T17:14:22+00:00", - "commented_code": "# Image URL to use all building/pushing image targets\nIMG ?= controller:latest", - "repo_full_name": "kubeflow/kubeflow", - "discussion_comments": [ - { - "comment_id": "897092814", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 6406, - "pr_file": "components/tensorboard-controller/Makefile", - "discussion_id": "897092814", - "commented_code": "@@ -1,8 +1,8 @@\n \n # Image URL to use all building/pushing image targets\n IMG ?= controller:latest", - "comment_created_at": "2022-06-14T17:14:22+00:00", - "comment_author": "kimwnasptd", - "comment_body": "Can you add a `TAG ?= $(shell git describe --tags --always)` here? This is the one we use in the web apps as well. Let's standardize on this one", - "pr_file_module": null - }, - { - "comment_id": "897805578", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 6406, - "pr_file": "components/tensorboard-controller/Makefile", - "discussion_id": "897092814", - "commented_code": "@@ -1,8 +1,8 @@\n \n # Image URL to use all building/pushing image targets\n IMG ?= controller:latest", - "comment_created_at": "2022-06-15T10:23:02+00:00", - "comment_author": "alembiewski", - "comment_body": "Addressed", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "839946589", - "pr_number": 6374, - "pr_file": "components/notebook-controller/Makefile", - "created_at": "2022-03-31T19:23:29+00:00", - "commented_code": "GOBIN=$(shell go env GOBIN)\nendif\n\n# Setting SHELL to bash allows bash commands to be executed by recipes.\n# This is a requirement for 'setup-envtest.sh' in the test target.\n# Options are set to exit when a recipe line exits non-zero or a piped command fails.\nSHELL = /usr/bin/env bash -o pipefail\n.SHELLFLAGS = -ec\n\n.PHONY: all\nall: manager\n\n# check license\ncheck-license:\n\t./third_party/check-license.sh\n##@ General\n\n# Run tests\ntest: generate fmt vet manifests\n\tmkdir -p ${ENVTEST_ASSETS_DIR}\n\ttest -f ${ENVTEST_ASSETS_DIR}/setup-envtest.sh || curl -sSLo ${ENVTEST_ASSETS_DIR}/setup-envtest.sh https://raw.githubusercontent.com/kubernetes-sigs/controller-runtime/a9bd9117a77a2f84bbc546e28991136fe0000dc0/hack/setup-envtest.sh\n\tsource ${ENVTEST_ASSETS_DIR}/setup-envtest.sh; fetch_envtest_tools $(ENVTEST_ASSETS_DIR); setup_envtest_env $(ENVTEST_ASSETS_DIR);\n\tgo test ./api/... ./controllers/... -coverprofile cover.out\n# The help target prints out all targets with their descriptions organized\n# beneath their categories. The categories are represented by '##@' and the\n# target descriptions by '##'. The awk commands is responsible for reading the\n# entire set of makefiles included in this invocation, looking for lines of the\n# file as xyz: ## something, and then pretty-format the target and help. Then,\n# if there's a line with ##@ something, that gets pretty-printed as a category.\n# More info on the usage of ANSI control characters for terminal formatting:\n# https://en.wikipedia.org/wiki/ANSI_escape_code#SGR_parameters\n# More info on the awk command:\n# http://linuxcommand.org/lc3_adv_awk.php\n\n# Build manager binary\nmanager: generate fmt vet\n\tgo build -o bin/manager main.go\n.PHONY: help\nhelp: ## Display this help.\n\t@awk 'BEGIN {FS = \":.*##\"; printf \"\\nUsage:\\n make \\033[36m\\033[0m\\n\"} /^[a-zA-Z_0-9-]+:.*?##/ { printf \" \\033[36m%-15s\\033[0m %s\\n\", $$1, $$2 } /^##@/ { printf \"\\n\\033[1m%s\\033[0m\\n\", substr($$0, 5) } ' $(MAKEFILE_LIST)\n\n# Run against the configured Kubernetes cluster in ~/.kube/config\nrun: generate fmt vet\n\tgo run ./main.go\n##@ Development\n\n# Install CRDs into a cluster\ninstall: manifests\n\tkubectl apply -f config/crd/bases\n.PHONY: check-license\ncheck-license: ## Check third-party license\n\t./third_party/check-license.sh\n\n# Deploy controller in the configured Kubernetes cluster in ~/.kube/config\ndeploy: manifests\n\tkubectl apply -f config/crd/bases\n\tkustomize build config/default | kubectl apply -f -\n.PHONY: manifests\nmanifests: controller-gen ## Generate WebhookConfiguration, ClusterRole and CustomResourceDefinition objects.\n\t$(CONTROLLER_GEN) rbac:roleName=role crd webhook paths=\"./...\" output:crd:artifacts:config=config/crd/bases\n\n# Generate manifests e.g. CRD, RBAC etc.\nmanifests: controller-gen\n\t$(CONTROLLER_GEN) $(CRD_OPTIONS) rbac:roleName=role webhook paths=\"./...\" output:crd:artifacts:config=config/crd/bases\n.PHONY: generate\ngenerate: controller-gen ## Generate code containing DeepCopy, DeepCopyInto, and DeepCopyObject method implementations.\n\t$(CONTROLLER_GEN) object:headerFile=\"hack/boilerplate.go.txt\" paths=\"./...\"\n\n# Run go fmt against code\nfmt:\n.PHONY: fmt\nfmt: ## Run go fmt against code.\n\tgo fmt ./...\n\n# Run go vet against code\nvet:\n.PHONY: vet\nvet: ## Run go vet against code.\n\tgo vet ./...\n\n# Generate code\ngenerate: controller-gen\n\t$(CONTROLLER_GEN) object:headerFile=./hack/boilerplate.go.txt paths=./api/...\n.PHONY: test\ntest: manifests generate fmt vet envtest ## Run tests.\n\tKUBEBUILDER_ASSETS=\"$(shell $(ENVTEST) use $(ENVTEST_K8S_VERSION) -p path)\" go test ./... -coverprofile cover.out\n\n##@ Build\n\n# Build the docker image\ndocker-build: test\n.PHONY: manager\nmanager: generate fmt vet ## Build manager binary.\n\tgo build -o bin/manager main.go\n\n.PHONY: run\nrun: manifests generate fmt vet ## Run a controller from your host.\n\tgo run ./main.go\n\n.PHONY: docker-build\ndocker-build: test ## Build docker image with the manager.\n\tcd .. && docker build . -t ${IMG}:${TAG} -f ./notebook-controller/Dockerfile\n\t@echo \"updating kustomize image patch file for manager resource\"\n\tsed -i'' -e 's@image: .*@image: '\"${IMG}:${TAG}\"'@' ./config/default/manager_image_patch.yaml\n\n# Push the docker image\ndocker-push:\n.PHONY: docker-push\ndocker-push: ## Push docker image with the manager.", - "repo_full_name": "kubeflow/kubeflow", - "discussion_comments": [ - { - "comment_id": "839946589", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 6374, - "pr_file": "components/notebook-controller/Makefile", - "discussion_id": "839946589", - "commented_code": "@@ -21,77 +13,125 @@ else\n GOBIN=$(shell go env GOBIN)\n endif\n \n+# Setting SHELL to bash allows bash commands to be executed by recipes.\n+# This is a requirement for 'setup-envtest.sh' in the test target.\n+# Options are set to exit when a recipe line exits non-zero or a piped command fails.\n+SHELL = /usr/bin/env bash -o pipefail\n+.SHELLFLAGS = -ec\n+\n+.PHONY: all\n all: manager\n \n-# check license\n-check-license:\n-\t./third_party/check-license.sh\n+##@ General\n \n-# Run tests\n-test: generate fmt vet manifests\n-\tmkdir -p ${ENVTEST_ASSETS_DIR}\n-\ttest -f ${ENVTEST_ASSETS_DIR}/setup-envtest.sh || curl -sSLo ${ENVTEST_ASSETS_DIR}/setup-envtest.sh https://raw.githubusercontent.com/kubernetes-sigs/controller-runtime/a9bd9117a77a2f84bbc546e28991136fe0000dc0/hack/setup-envtest.sh\n-\tsource ${ENVTEST_ASSETS_DIR}/setup-envtest.sh; fetch_envtest_tools $(ENVTEST_ASSETS_DIR); setup_envtest_env $(ENVTEST_ASSETS_DIR);\n-\tgo test ./api/... ./controllers/... -coverprofile cover.out\n+# The help target prints out all targets with their descriptions organized\n+# beneath their categories. The categories are represented by '##@' and the\n+# target descriptions by '##'. The awk commands is responsible for reading the\n+# entire set of makefiles included in this invocation, looking for lines of the\n+# file as xyz: ## something, and then pretty-format the target and help. Then,\n+# if there's a line with ##@ something, that gets pretty-printed as a category.\n+# More info on the usage of ANSI control characters for terminal formatting:\n+# https://en.wikipedia.org/wiki/ANSI_escape_code#SGR_parameters\n+# More info on the awk command:\n+# http://linuxcommand.org/lc3_adv_awk.php\n \n-# Build manager binary\n-manager: generate fmt vet\n-\tgo build -o bin/manager main.go\n+.PHONY: help\n+help: ## Display this help.\n+\t@awk 'BEGIN {FS = \":.*##\"; printf \"\\nUsage:\\n make \\033[36m\\033[0m\\n\"} /^[a-zA-Z_0-9-]+:.*?##/ { printf \" \\033[36m%-15s\\033[0m %s\\n\", $$1, $$2 } /^##@/ { printf \"\\n\\033[1m%s\\033[0m\\n\", substr($$0, 5) } ' $(MAKEFILE_LIST)\n \n-# Run against the configured Kubernetes cluster in ~/.kube/config\n-run: generate fmt vet\n-\tgo run ./main.go\n+##@ Development\n \n-# Install CRDs into a cluster\n-install: manifests\n-\tkubectl apply -f config/crd/bases\n+.PHONY: check-license\n+check-license: ## Check third-party license\n+\t./third_party/check-license.sh\n \n-# Deploy controller in the configured Kubernetes cluster in ~/.kube/config\n-deploy: manifests\n-\tkubectl apply -f config/crd/bases\n-\tkustomize build config/default | kubectl apply -f -\n+.PHONY: manifests\n+manifests: controller-gen ## Generate WebhookConfiguration, ClusterRole and CustomResourceDefinition objects.\n+\t$(CONTROLLER_GEN) rbac:roleName=role crd webhook paths=\"./...\" output:crd:artifacts:config=config/crd/bases\n \n-# Generate manifests e.g. CRD, RBAC etc.\n-manifests: controller-gen\n-\t$(CONTROLLER_GEN) $(CRD_OPTIONS) rbac:roleName=role webhook paths=\"./...\" output:crd:artifacts:config=config/crd/bases\n+.PHONY: generate\n+generate: controller-gen ## Generate code containing DeepCopy, DeepCopyInto, and DeepCopyObject method implementations.\n+\t$(CONTROLLER_GEN) object:headerFile=\"hack/boilerplate.go.txt\" paths=\"./...\"\n \n-# Run go fmt against code\n-fmt:\n+.PHONY: fmt\n+fmt: ## Run go fmt against code.\n \tgo fmt ./...\n \n-# Run go vet against code\n-vet:\n+.PHONY: vet\n+vet: ## Run go vet against code.\n \tgo vet ./...\n \n-# Generate code\n-generate: controller-gen\n-\t$(CONTROLLER_GEN) object:headerFile=./hack/boilerplate.go.txt paths=./api/...\n+.PHONY: test\n+test: manifests generate fmt vet envtest ## Run tests.\n+\tKUBEBUILDER_ASSETS=\"$(shell $(ENVTEST) use $(ENVTEST_K8S_VERSION) -p path)\" go test ./... -coverprofile cover.out\n+\n+##@ Build\n \n-# Build the docker image\n-docker-build: test\n+.PHONY: manager\n+manager: generate fmt vet ## Build manager binary.\n+\tgo build -o bin/manager main.go\n+\n+.PHONY: run\n+run: manifests generate fmt vet ## Run a controller from your host.\n+\tgo run ./main.go\n+\n+.PHONY: docker-build\n+docker-build: test ## Build docker image with the manager.\n \tcd .. && docker build . -t ${IMG}:${TAG} -f ./notebook-controller/Dockerfile\n-\t@echo \"updating kustomize image patch file for manager resource\"\n-\tsed -i'' -e 's@image: .*@image: '\"${IMG}:${TAG}\"'@' ./config/default/manager_image_patch.yaml\n \n-# Push the docker image\n-docker-push:\n+.PHONY: docker-push\n+docker-push: ## Push docker image with the manager.", - "comment_created_at": "2022-03-31T19:23:29+00:00", - "comment_author": "kimwnasptd", - "comment_body": "nit: Could you also create an `image` rule, which builds and pushes the image?\r\n\r\nWe have this for the web apps as well and would be nice to keep this convention across all components\r\nhttps://github.com/kubeflow/kubeflow/blob/master/components/crud-web-apps/jupyter/Makefile#L11", - "pr_file_module": null - }, - { - "comment_id": "840447074", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 6374, - "pr_file": "components/notebook-controller/Makefile", - "discussion_id": "839946589", - "commented_code": "@@ -21,77 +13,125 @@ else\n GOBIN=$(shell go env GOBIN)\n endif\n \n+# Setting SHELL to bash allows bash commands to be executed by recipes.\n+# This is a requirement for 'setup-envtest.sh' in the test target.\n+# Options are set to exit when a recipe line exits non-zero or a piped command fails.\n+SHELL = /usr/bin/env bash -o pipefail\n+.SHELLFLAGS = -ec\n+\n+.PHONY: all\n all: manager\n \n-# check license\n-check-license:\n-\t./third_party/check-license.sh\n+##@ General\n \n-# Run tests\n-test: generate fmt vet manifests\n-\tmkdir -p ${ENVTEST_ASSETS_DIR}\n-\ttest -f ${ENVTEST_ASSETS_DIR}/setup-envtest.sh || curl -sSLo ${ENVTEST_ASSETS_DIR}/setup-envtest.sh https://raw.githubusercontent.com/kubernetes-sigs/controller-runtime/a9bd9117a77a2f84bbc546e28991136fe0000dc0/hack/setup-envtest.sh\n-\tsource ${ENVTEST_ASSETS_DIR}/setup-envtest.sh; fetch_envtest_tools $(ENVTEST_ASSETS_DIR); setup_envtest_env $(ENVTEST_ASSETS_DIR);\n-\tgo test ./api/... ./controllers/... -coverprofile cover.out\n+# The help target prints out all targets with their descriptions organized\n+# beneath their categories. The categories are represented by '##@' and the\n+# target descriptions by '##'. The awk commands is responsible for reading the\n+# entire set of makefiles included in this invocation, looking for lines of the\n+# file as xyz: ## something, and then pretty-format the target and help. Then,\n+# if there's a line with ##@ something, that gets pretty-printed as a category.\n+# More info on the usage of ANSI control characters for terminal formatting:\n+# https://en.wikipedia.org/wiki/ANSI_escape_code#SGR_parameters\n+# More info on the awk command:\n+# http://linuxcommand.org/lc3_adv_awk.php\n \n-# Build manager binary\n-manager: generate fmt vet\n-\tgo build -o bin/manager main.go\n+.PHONY: help\n+help: ## Display this help.\n+\t@awk 'BEGIN {FS = \":.*##\"; printf \"\\nUsage:\\n make \\033[36m\\033[0m\\n\"} /^[a-zA-Z_0-9-]+:.*?##/ { printf \" \\033[36m%-15s\\033[0m %s\\n\", $$1, $$2 } /^##@/ { printf \"\\n\\033[1m%s\\033[0m\\n\", substr($$0, 5) } ' $(MAKEFILE_LIST)\n \n-# Run against the configured Kubernetes cluster in ~/.kube/config\n-run: generate fmt vet\n-\tgo run ./main.go\n+##@ Development\n \n-# Install CRDs into a cluster\n-install: manifests\n-\tkubectl apply -f config/crd/bases\n+.PHONY: check-license\n+check-license: ## Check third-party license\n+\t./third_party/check-license.sh\n \n-# Deploy controller in the configured Kubernetes cluster in ~/.kube/config\n-deploy: manifests\n-\tkubectl apply -f config/crd/bases\n-\tkustomize build config/default | kubectl apply -f -\n+.PHONY: manifests\n+manifests: controller-gen ## Generate WebhookConfiguration, ClusterRole and CustomResourceDefinition objects.\n+\t$(CONTROLLER_GEN) rbac:roleName=role crd webhook paths=\"./...\" output:crd:artifacts:config=config/crd/bases\n \n-# Generate manifests e.g. CRD, RBAC etc.\n-manifests: controller-gen\n-\t$(CONTROLLER_GEN) $(CRD_OPTIONS) rbac:roleName=role webhook paths=\"./...\" output:crd:artifacts:config=config/crd/bases\n+.PHONY: generate\n+generate: controller-gen ## Generate code containing DeepCopy, DeepCopyInto, and DeepCopyObject method implementations.\n+\t$(CONTROLLER_GEN) object:headerFile=\"hack/boilerplate.go.txt\" paths=\"./...\"\n \n-# Run go fmt against code\n-fmt:\n+.PHONY: fmt\n+fmt: ## Run go fmt against code.\n \tgo fmt ./...\n \n-# Run go vet against code\n-vet:\n+.PHONY: vet\n+vet: ## Run go vet against code.\n \tgo vet ./...\n \n-# Generate code\n-generate: controller-gen\n-\t$(CONTROLLER_GEN) object:headerFile=./hack/boilerplate.go.txt paths=./api/...\n+.PHONY: test\n+test: manifests generate fmt vet envtest ## Run tests.\n+\tKUBEBUILDER_ASSETS=\"$(shell $(ENVTEST) use $(ENVTEST_K8S_VERSION) -p path)\" go test ./... -coverprofile cover.out\n+\n+##@ Build\n \n-# Build the docker image\n-docker-build: test\n+.PHONY: manager\n+manager: generate fmt vet ## Build manager binary.\n+\tgo build -o bin/manager main.go\n+\n+.PHONY: run\n+run: manifests generate fmt vet ## Run a controller from your host.\n+\tgo run ./main.go\n+\n+.PHONY: docker-build\n+docker-build: test ## Build docker image with the manager.\n \tcd .. && docker build . -t ${IMG}:${TAG} -f ./notebook-controller/Dockerfile\n-\t@echo \"updating kustomize image patch file for manager resource\"\n-\tsed -i'' -e 's@image: .*@image: '\"${IMG}:${TAG}\"'@' ./config/default/manager_image_patch.yaml\n \n-# Push the docker image\n-docker-push:\n+.PHONY: docker-push\n+docker-push: ## Push docker image with the manager.", - "comment_created_at": "2022-04-01T10:26:53+00:00", - "comment_author": "samuelvl", - "comment_body": "Done!", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "613160932", - "pr_number": 5820, - "pr_file": "components/crud-web-apps/volumes/Makefile", - "created_at": "2021-04-14T11:23:03+00:00", - "commented_code": "IMG ?= gcr.io/arrikto-playground/kubeflow/volumes-web-app\nTAG ?= $(shell git describe --tags)\nDOCKERFILE=volumes/Dockerfile\nIMG ?= public.ecr.aws/j1r0q0g6/notebooks/volumes-web-app\nTAG ?= $(shell git describe --tags --always)\nDOCKERFILE ?= volumes/Dockerfile\n\ndocker-build:\n\tcp Dockerfile.dockerignore ../.dockerignore\n\t-cd ../ && docker build -t ${IMG}:${TAG} -f ${DOCKERFILE} .\n\trm ../.dockerignore\n\tcd ../ && docker build -t ${IMG}:${TAG} -f ${DOCKERFILE} .", - "repo_full_name": "kubeflow/kubeflow", - "discussion_comments": [ - { - "comment_id": "613160932", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 5820, - "pr_file": "components/crud-web-apps/volumes/Makefile", - "discussion_id": "613160932", - "commented_code": "@@ -1,11 +1,9 @@\n-IMG ?= gcr.io/arrikto-playground/kubeflow/volumes-web-app\n-TAG ?= $(shell git describe --tags)\n-DOCKERFILE=volumes/Dockerfile\n+IMG ?= public.ecr.aws/j1r0q0g6/notebooks/volumes-web-app\n+TAG ?= $(shell git describe --tags --always)\n+DOCKERFILE ?= volumes/Dockerfile\n \n docker-build:\n-\tcp Dockerfile.dockerignore ../.dockerignore\n-\t-cd ../ && docker build -t ${IMG}:${TAG} -f ${DOCKERFILE} .\n-\trm ../.dockerignore\n+\tcd ../ && docker build -t ${IMG}:${TAG} -f ${DOCKERFILE} .", - "comment_created_at": "2021-04-14T11:23:03+00:00", - "comment_author": "elikatsis", - "comment_body": "This part is a bit weird. I mean mentioning paths as if we are in a different directory because we will be using them in a different context (after `cd`ing).\r\n\r\nMay I suggest we\r\n1. Drop the `DOCKERFILE` variable (I think we shouldn't be messing with it) OR make it relative to this directory\r\n I.e.,\r\n ```\r\n DOCKERFILE=Dockerfile\r\n ```\r\n2. Drop the `cd`ing part from the Makefile target and use the parent directory as the path for the Docker building context (https://docs.docker.com/engine/reference/commandline/build/#build-with-path)\r\n This will make the Makefile target as follows\r\n ```Makefile\r\n docker-build:\r\n docker build -t ${IMG}:${TAG} -f Dockerfile .. # Or \"-f ${DOCKERFILE}\" depending on the decision of (1)\r\n ```\r\n\r\nI understand the reason we are `cd`ing to the parent directory is to use a single `.dockerignore` file for all the web apps.\r\nI tested locally that we maintain this behavior with the change I proposed, so there should be no problem. Let's also have others verify this as well (you, the CI, other?).\r\n\r\nThen, we can follow up changing this for the other web apps as well (this is not a blocker for the release of course).", - "pr_file_module": null - }, - { - "comment_id": "633478595", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 5820, - "pr_file": "components/crud-web-apps/volumes/Makefile", - "discussion_id": "613160932", - "commented_code": "@@ -1,11 +1,9 @@\n-IMG ?= gcr.io/arrikto-playground/kubeflow/volumes-web-app\n-TAG ?= $(shell git describe --tags)\n-DOCKERFILE=volumes/Dockerfile\n+IMG ?= public.ecr.aws/j1r0q0g6/notebooks/volumes-web-app\n+TAG ?= $(shell git describe --tags --always)\n+DOCKERFILE ?= volumes/Dockerfile\n \n docker-build:\n-\tcp Dockerfile.dockerignore ../.dockerignore\n-\t-cd ../ && docker build -t ${IMG}:${TAG} -f ${DOCKERFILE} .\n-\trm ../.dockerignore\n+\tcd ../ && docker build -t ${IMG}:${TAG} -f ${DOCKERFILE} .", - "comment_created_at": "2021-05-17T12:16:43+00:00", - "comment_author": "kimwnasptd", - "comment_body": "@elikatsis ACK, this is a very good suggestion!\r\n\r\nI'll add a commit for Volumes web app in this PR, since it's fixing the formatting for this component. I'll then open up a new issue to keep track of the uniformity in the dockerfiles for the other web apps as well.", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/kubeflow-standardize-network-tools.json b/_reviewers/kubeflow-standardize-network-tools.json new file mode 100644 index 0000000..5fcbd20 --- /dev/null +++ b/_reviewers/kubeflow-standardize-network-tools.json @@ -0,0 +1,138 @@ +[ + { + "discussion_id": "1720474884", + "pr_number": 7635, + "pr_file": "components/example-notebook-servers/jupyter-pytorch-gaudi/Dockerfile", + "created_at": "2024-08-16T23:41:52+00:00", + "commented_code": "#\n# NOTE: Use the Makefiles to build this image correctly.\n#\n\nARG BASE_IMG=\nFROM $BASE_IMG\n\n# Content below is extracted from scripts/Dockerfiles here:\n# https://github.com/HabanaAI/Setup_and_Install/tree/main/dockerfiles/base\n# https://github.com/HabanaAI/Setup_and_Install/tree/main/dockerfiles/pytorch\n\n# version details\nARG ARTIFACTORY_URL=vault.habana.ai\nARG VERSION=1.16.2\nARG REVISION=2\nARG PYTORCH_VERSION=2.2.2\nARG OS_NUMBER=2204\n\n# runtime variables\nENV DEBIAN_FRONTEND=noninteractive\nENV GC_KERNEL_PATH=/usr/lib/habanalabs/libtpc_kernels.so\nENV HABANA_LOGS=/var/log/habana_logs/\nENV HABANA_SCAL_BIN_PATH=/opt/habanalabs/engines_fw\nENV HABANA_PLUGINS_LIB_PATH=/opt/habanalabs/habana_plugins\nENV TCMALLOC_LARGE_ALLOC_REPORT_THRESHOLD=7516192768\nENV PYTHONPATH=/home/$NB_UID:/usr/lib/habanalabs/\n\n# There is no need to store pip installation files inside docker image\nENV PIP_NO_CACHE_DIR=on\nENV PIP_DISABLE_PIP_VERSION_CHECK=1\n\n# libfabric and scaling variables\nENV LIBFABRIC_VERSION=\"1.20.0\"\nENV LIBFABRIC_ROOT=\"/opt/habanalabs/libfabric-${LIBFABRIC_VERSION}\"\nENV MPI_ROOT=/opt/amazon/openmpi\nENV LD_LIBRARY_PATH=$LIBFABRIC_ROOT/lib:${MPI_ROOT}/lib:/usr/lib/habanalabs:$LD_LIBRARY_PATH\nENV PATH=${LIBFABRIC_ROOT}/bin:${MPI_ROOT}/bin:$PATH\nENV MPICC=${MPI_ROOT}/bin/mpicc\nENV OPAL_PREFIX=${MPI_ROOT}\nENV RDMAV_FORK_SAFE=1\nENV FI_EFA_USE_DEVICE_RDMA=1\nENV RDMA_CORE_ROOT=/opt/habanalabs/rdma-core/src\nENV RDMA_CORE_LIB=${RDMA_CORE_ROOT}/build/lib\n\n# Gaudi stack doesn't (yet) support 3.11, thus\n# downgrade python from 3.11.x to 3.10.14\nRUN source /opt/conda/etc/profile.d/conda.sh \\\n && conda activate base \\\n && conda install -y -q --no-pin python=3.10.14 \\\n && sed -i 's/python ==.*/python ==3.10.14/' /opt/conda/conda-meta/pinned \\\n && conda clean -a -f -y\n\nUSER root\n\n# install various support libraries\nRUN apt-get update \\\n && apt-get install -y --no-install-recommends \\\n apt-utils \\\n bc \\\n build-essential \\\n graphviz \\\n iproute2 \\\n libcairo2-dev \\\n libgl1 \\\n libglib2.0-dev \\\n libgnutls30 \\\n libgoogle-glog0v5 \\\n libgoogle-perftools-dev \\\n libhdf5-dev \\\n libjemalloc2 \\\n libjpeg-dev \\\n liblapack-dev \\\n libmkl-dev \\\n libnuma-dev \\\n libopenblas-dev \\\n libpcre2-dev \\\n libpq-dev \\\n libselinux1-dev \\\n lsof \\\n moreutils \\\n numactl \\\n protobuf-compiler \\\n && apt-get autoremove \\\n && apt-get clean \\\n && rm -rf /var/lib/apt/lists/*\n\n# install elastic fabric adapter\nRUN apt-get update && export EFA_VERSION=1.29.0 && mkdir /tmp/efa \\\n && wget -q -O- https://efa-installer.amazonaws.com/aws-efa-installer-$EFA_VERSION.tar.gz | tar xz -C /tmp/efa \\\n && cd /tmp/efa/aws-efa-installer && ./efa_installer.sh -y --skip-kmod --skip-limit-conf --no-verify \\\n && rm -rf /tmp/efa && rm -rf /etc/ld.so.conf.d/efa.conf /etc/profile.d/efa.sh \\\n && apt-get autoremove && apt-get clean && rm -rf /var/lib/apt/lists/*\n\n# install habana packages\nRUN wget -O- https://${ARTIFACTORY_URL}/artifactory/api/gpg/key/public | gpg --dearmor -o /usr/share/keyrings/habana-artifactory.gpg \\", + "repo_full_name": "kubeflow/kubeflow", + "discussion_comments": [ + { + "comment_id": "1720474884", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 7635, + "pr_file": "components/example-notebook-servers/jupyter-pytorch-gaudi/Dockerfile", + "discussion_id": "1720474884", + "commented_code": "@@ -0,0 +1,143 @@\n+#\n+# NOTE: Use the Makefiles to build this image correctly.\n+#\n+\n+ARG BASE_IMG=\n+FROM $BASE_IMG\n+\n+# Content below is extracted from scripts/Dockerfiles here:\n+# https://github.com/HabanaAI/Setup_and_Install/tree/main/dockerfiles/base\n+# https://github.com/HabanaAI/Setup_and_Install/tree/main/dockerfiles/pytorch\n+\n+# version details\n+ARG ARTIFACTORY_URL=vault.habana.ai\n+ARG VERSION=1.16.2\n+ARG REVISION=2\n+ARG PYTORCH_VERSION=2.2.2\n+ARG OS_NUMBER=2204\n+\n+# runtime variables\n+ENV DEBIAN_FRONTEND=noninteractive\n+ENV GC_KERNEL_PATH=/usr/lib/habanalabs/libtpc_kernels.so\n+ENV HABANA_LOGS=/var/log/habana_logs/\n+ENV HABANA_SCAL_BIN_PATH=/opt/habanalabs/engines_fw\n+ENV HABANA_PLUGINS_LIB_PATH=/opt/habanalabs/habana_plugins\n+ENV TCMALLOC_LARGE_ALLOC_REPORT_THRESHOLD=7516192768\n+ENV PYTHONPATH=/home/$NB_UID:/usr/lib/habanalabs/\n+\n+# There is no need to store pip installation files inside docker image\n+ENV PIP_NO_CACHE_DIR=on\n+ENV PIP_DISABLE_PIP_VERSION_CHECK=1\n+\n+# libfabric and scaling variables\n+ENV LIBFABRIC_VERSION=\"1.20.0\"\n+ENV LIBFABRIC_ROOT=\"/opt/habanalabs/libfabric-${LIBFABRIC_VERSION}\"\n+ENV MPI_ROOT=/opt/amazon/openmpi\n+ENV LD_LIBRARY_PATH=$LIBFABRIC_ROOT/lib:${MPI_ROOT}/lib:/usr/lib/habanalabs:$LD_LIBRARY_PATH\n+ENV PATH=${LIBFABRIC_ROOT}/bin:${MPI_ROOT}/bin:$PATH\n+ENV MPICC=${MPI_ROOT}/bin/mpicc\n+ENV OPAL_PREFIX=${MPI_ROOT}\n+ENV RDMAV_FORK_SAFE=1\n+ENV FI_EFA_USE_DEVICE_RDMA=1\n+ENV RDMA_CORE_ROOT=/opt/habanalabs/rdma-core/src\n+ENV RDMA_CORE_LIB=${RDMA_CORE_ROOT}/build/lib\n+\n+# Gaudi stack doesn't (yet) support 3.11, thus\n+# downgrade python from 3.11.x to 3.10.14\n+RUN source /opt/conda/etc/profile.d/conda.sh \\\n+ && conda activate base \\\n+ && conda install -y -q --no-pin python=3.10.14 \\\n+ && sed -i 's/python ==.*/python ==3.10.14/' /opt/conda/conda-meta/pinned \\\n+ && conda clean -a -f -y\n+\n+USER root\n+\n+# install various support libraries\n+RUN apt-get update \\\n+ && apt-get install -y --no-install-recommends \\\n+ apt-utils \\\n+ bc \\\n+ build-essential \\\n+ graphviz \\\n+ iproute2 \\\n+ libcairo2-dev \\\n+ libgl1 \\\n+ libglib2.0-dev \\\n+ libgnutls30 \\\n+ libgoogle-glog0v5 \\\n+ libgoogle-perftools-dev \\\n+ libhdf5-dev \\\n+ libjemalloc2 \\\n+ libjpeg-dev \\\n+ liblapack-dev \\\n+ libmkl-dev \\\n+ libnuma-dev \\\n+ libopenblas-dev \\\n+ libpcre2-dev \\\n+ libpq-dev \\\n+ libselinux1-dev \\\n+ lsof \\\n+ moreutils \\\n+ numactl \\\n+ protobuf-compiler \\\n+ && apt-get autoremove \\\n+ && apt-get clean \\\n+ && rm -rf /var/lib/apt/lists/*\n+\n+# install elastic fabric adapter\n+RUN apt-get update && export EFA_VERSION=1.29.0 && mkdir /tmp/efa \\\n+ && wget -q -O- https://efa-installer.amazonaws.com/aws-efa-installer-$EFA_VERSION.tar.gz | tar xz -C /tmp/efa \\\n+ && cd /tmp/efa/aws-efa-installer && ./efa_installer.sh -y --skip-kmod --skip-limit-conf --no-verify \\\n+ && rm -rf /tmp/efa && rm -rf /etc/ld.so.conf.d/efa.conf /etc/profile.d/efa.sh \\\n+ && apt-get autoremove && apt-get clean && rm -rf /var/lib/apt/lists/*\n+\n+# install habana packages\n+RUN wget -O- https://${ARTIFACTORY_URL}/artifactory/api/gpg/key/public | gpg --dearmor -o /usr/share/keyrings/habana-artifactory.gpg \\", + "comment_created_at": "2024-08-16T23:41:52+00:00", + "comment_author": "thesuperzapper", + "comment_body": "Where possible, please use `curl -fsSL URL -o TARGET` so we don't need to require `wget`.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1720474977", + "pr_number": 7635, + "pr_file": "components/example-notebook-servers/jupyter-pytorch-gaudi/Dockerfile", + "created_at": "2024-08-16T23:42:14+00:00", + "commented_code": "#\n# NOTE: Use the Makefiles to build this image correctly.\n#\n\nARG BASE_IMG=\nFROM $BASE_IMG\n\n# Content below is extracted from scripts/Dockerfiles here:\n# https://github.com/HabanaAI/Setup_and_Install/tree/main/dockerfiles/base\n# https://github.com/HabanaAI/Setup_and_Install/tree/main/dockerfiles/pytorch\n\n# version details\nARG ARTIFACTORY_URL=vault.habana.ai\nARG VERSION=1.16.2\nARG REVISION=2\nARG PYTORCH_VERSION=2.2.2\nARG OS_NUMBER=2204\n\n# runtime variables\nENV DEBIAN_FRONTEND=noninteractive\nENV GC_KERNEL_PATH=/usr/lib/habanalabs/libtpc_kernels.so\nENV HABANA_LOGS=/var/log/habana_logs/\nENV HABANA_SCAL_BIN_PATH=/opt/habanalabs/engines_fw\nENV HABANA_PLUGINS_LIB_PATH=/opt/habanalabs/habana_plugins\nENV TCMALLOC_LARGE_ALLOC_REPORT_THRESHOLD=7516192768\nENV PYTHONPATH=/home/$NB_UID:/usr/lib/habanalabs/\n\n# There is no need to store pip installation files inside docker image\nENV PIP_NO_CACHE_DIR=on\nENV PIP_DISABLE_PIP_VERSION_CHECK=1\n\n# libfabric and scaling variables\nENV LIBFABRIC_VERSION=\"1.20.0\"\nENV LIBFABRIC_ROOT=\"/opt/habanalabs/libfabric-${LIBFABRIC_VERSION}\"\nENV MPI_ROOT=/opt/amazon/openmpi\nENV LD_LIBRARY_PATH=$LIBFABRIC_ROOT/lib:${MPI_ROOT}/lib:/usr/lib/habanalabs:$LD_LIBRARY_PATH\nENV PATH=${LIBFABRIC_ROOT}/bin:${MPI_ROOT}/bin:$PATH\nENV MPICC=${MPI_ROOT}/bin/mpicc\nENV OPAL_PREFIX=${MPI_ROOT}\nENV RDMAV_FORK_SAFE=1\nENV FI_EFA_USE_DEVICE_RDMA=1\nENV RDMA_CORE_ROOT=/opt/habanalabs/rdma-core/src\nENV RDMA_CORE_LIB=${RDMA_CORE_ROOT}/build/lib\n\n# Gaudi stack doesn't (yet) support 3.11, thus\n# downgrade python from 3.11.x to 3.10.14\nRUN source /opt/conda/etc/profile.d/conda.sh \\\n && conda activate base \\\n && conda install -y -q --no-pin python=3.10.14 \\\n && sed -i 's/python ==.*/python ==3.10.14/' /opt/conda/conda-meta/pinned \\\n && conda clean -a -f -y\n\nUSER root\n\n# install various support libraries\nRUN apt-get update \\\n && apt-get install -y --no-install-recommends \\\n apt-utils \\\n bc \\\n build-essential \\\n graphviz \\\n iproute2 \\\n libcairo2-dev \\\n libgl1 \\\n libglib2.0-dev \\\n libgnutls30 \\\n libgoogle-glog0v5 \\\n libgoogle-perftools-dev \\\n libhdf5-dev \\\n libjemalloc2 \\\n libjpeg-dev \\\n liblapack-dev \\\n libmkl-dev \\\n libnuma-dev \\\n libopenblas-dev \\\n libpcre2-dev \\\n libpq-dev \\\n libselinux1-dev \\\n lsof \\\n moreutils \\\n numactl \\\n protobuf-compiler \\\n && apt-get autoremove \\\n && apt-get clean \\\n && rm -rf /var/lib/apt/lists/*\n\n# install elastic fabric adapter\nRUN apt-get update && export EFA_VERSION=1.29.0 && mkdir /tmp/efa \\\n && wget -q -O- https://efa-installer.amazonaws.com/aws-efa-installer-$EFA_VERSION.tar.gz | tar xz -C /tmp/efa \\", + "repo_full_name": "kubeflow/kubeflow", + "discussion_comments": [ + { + "comment_id": "1720474977", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 7635, + "pr_file": "components/example-notebook-servers/jupyter-pytorch-gaudi/Dockerfile", + "discussion_id": "1720474977", + "commented_code": "@@ -0,0 +1,143 @@\n+#\n+# NOTE: Use the Makefiles to build this image correctly.\n+#\n+\n+ARG BASE_IMG=\n+FROM $BASE_IMG\n+\n+# Content below is extracted from scripts/Dockerfiles here:\n+# https://github.com/HabanaAI/Setup_and_Install/tree/main/dockerfiles/base\n+# https://github.com/HabanaAI/Setup_and_Install/tree/main/dockerfiles/pytorch\n+\n+# version details\n+ARG ARTIFACTORY_URL=vault.habana.ai\n+ARG VERSION=1.16.2\n+ARG REVISION=2\n+ARG PYTORCH_VERSION=2.2.2\n+ARG OS_NUMBER=2204\n+\n+# runtime variables\n+ENV DEBIAN_FRONTEND=noninteractive\n+ENV GC_KERNEL_PATH=/usr/lib/habanalabs/libtpc_kernels.so\n+ENV HABANA_LOGS=/var/log/habana_logs/\n+ENV HABANA_SCAL_BIN_PATH=/opt/habanalabs/engines_fw\n+ENV HABANA_PLUGINS_LIB_PATH=/opt/habanalabs/habana_plugins\n+ENV TCMALLOC_LARGE_ALLOC_REPORT_THRESHOLD=7516192768\n+ENV PYTHONPATH=/home/$NB_UID:/usr/lib/habanalabs/\n+\n+# There is no need to store pip installation files inside docker image\n+ENV PIP_NO_CACHE_DIR=on\n+ENV PIP_DISABLE_PIP_VERSION_CHECK=1\n+\n+# libfabric and scaling variables\n+ENV LIBFABRIC_VERSION=\"1.20.0\"\n+ENV LIBFABRIC_ROOT=\"/opt/habanalabs/libfabric-${LIBFABRIC_VERSION}\"\n+ENV MPI_ROOT=/opt/amazon/openmpi\n+ENV LD_LIBRARY_PATH=$LIBFABRIC_ROOT/lib:${MPI_ROOT}/lib:/usr/lib/habanalabs:$LD_LIBRARY_PATH\n+ENV PATH=${LIBFABRIC_ROOT}/bin:${MPI_ROOT}/bin:$PATH\n+ENV MPICC=${MPI_ROOT}/bin/mpicc\n+ENV OPAL_PREFIX=${MPI_ROOT}\n+ENV RDMAV_FORK_SAFE=1\n+ENV FI_EFA_USE_DEVICE_RDMA=1\n+ENV RDMA_CORE_ROOT=/opt/habanalabs/rdma-core/src\n+ENV RDMA_CORE_LIB=${RDMA_CORE_ROOT}/build/lib\n+\n+# Gaudi stack doesn't (yet) support 3.11, thus\n+# downgrade python from 3.11.x to 3.10.14\n+RUN source /opt/conda/etc/profile.d/conda.sh \\\n+ && conda activate base \\\n+ && conda install -y -q --no-pin python=3.10.14 \\\n+ && sed -i 's/python ==.*/python ==3.10.14/' /opt/conda/conda-meta/pinned \\\n+ && conda clean -a -f -y\n+\n+USER root\n+\n+# install various support libraries\n+RUN apt-get update \\\n+ && apt-get install -y --no-install-recommends \\\n+ apt-utils \\\n+ bc \\\n+ build-essential \\\n+ graphviz \\\n+ iproute2 \\\n+ libcairo2-dev \\\n+ libgl1 \\\n+ libglib2.0-dev \\\n+ libgnutls30 \\\n+ libgoogle-glog0v5 \\\n+ libgoogle-perftools-dev \\\n+ libhdf5-dev \\\n+ libjemalloc2 \\\n+ libjpeg-dev \\\n+ liblapack-dev \\\n+ libmkl-dev \\\n+ libnuma-dev \\\n+ libopenblas-dev \\\n+ libpcre2-dev \\\n+ libpq-dev \\\n+ libselinux1-dev \\\n+ lsof \\\n+ moreutils \\\n+ numactl \\\n+ protobuf-compiler \\\n+ && apt-get autoremove \\\n+ && apt-get clean \\\n+ && rm -rf /var/lib/apt/lists/*\n+\n+# install elastic fabric adapter\n+RUN apt-get update && export EFA_VERSION=1.29.0 && mkdir /tmp/efa \\\n+ && wget -q -O- https://efa-installer.amazonaws.com/aws-efa-installer-$EFA_VERSION.tar.gz | tar xz -C /tmp/efa \\", + "comment_created_at": "2024-08-16T23:42:14+00:00", + "comment_author": "thesuperzapper", + "comment_body": "Where possible, please use `curl -fsSL URL -o TARGET` so we don't need to require `wget`.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1720475020", + "pr_number": 7635, + "pr_file": "components/example-notebook-servers/jupyter-pytorch-gaudi/Dockerfile", + "created_at": "2024-08-16T23:42:24+00:00", + "commented_code": "#\n# NOTE: Use the Makefiles to build this image correctly.\n#\n\nARG BASE_IMG=\nFROM $BASE_IMG\n\n# Content below is extracted from scripts/Dockerfiles here:\n# https://github.com/HabanaAI/Setup_and_Install/tree/main/dockerfiles/base\n# https://github.com/HabanaAI/Setup_and_Install/tree/main/dockerfiles/pytorch\n\n# version details\nARG ARTIFACTORY_URL=vault.habana.ai\nARG VERSION=1.16.2\nARG REVISION=2\nARG PYTORCH_VERSION=2.2.2\nARG OS_NUMBER=2204\n\n# runtime variables\nENV DEBIAN_FRONTEND=noninteractive\nENV GC_KERNEL_PATH=/usr/lib/habanalabs/libtpc_kernels.so\nENV HABANA_LOGS=/var/log/habana_logs/\nENV HABANA_SCAL_BIN_PATH=/opt/habanalabs/engines_fw\nENV HABANA_PLUGINS_LIB_PATH=/opt/habanalabs/habana_plugins\nENV TCMALLOC_LARGE_ALLOC_REPORT_THRESHOLD=7516192768\nENV PYTHONPATH=/home/$NB_UID:/usr/lib/habanalabs/\n\n# There is no need to store pip installation files inside docker image\nENV PIP_NO_CACHE_DIR=on\nENV PIP_DISABLE_PIP_VERSION_CHECK=1\n\n# libfabric and scaling variables\nENV LIBFABRIC_VERSION=\"1.20.0\"\nENV LIBFABRIC_ROOT=\"/opt/habanalabs/libfabric-${LIBFABRIC_VERSION}\"\nENV MPI_ROOT=/opt/amazon/openmpi\nENV LD_LIBRARY_PATH=$LIBFABRIC_ROOT/lib:${MPI_ROOT}/lib:/usr/lib/habanalabs:$LD_LIBRARY_PATH\nENV PATH=${LIBFABRIC_ROOT}/bin:${MPI_ROOT}/bin:$PATH\nENV MPICC=${MPI_ROOT}/bin/mpicc\nENV OPAL_PREFIX=${MPI_ROOT}\nENV RDMAV_FORK_SAFE=1\nENV FI_EFA_USE_DEVICE_RDMA=1\nENV RDMA_CORE_ROOT=/opt/habanalabs/rdma-core/src\nENV RDMA_CORE_LIB=${RDMA_CORE_ROOT}/build/lib\n\n# Gaudi stack doesn't (yet) support 3.11, thus\n# downgrade python from 3.11.x to 3.10.14\nRUN source /opt/conda/etc/profile.d/conda.sh \\\n && conda activate base \\\n && conda install -y -q --no-pin python=3.10.14 \\\n && sed -i 's/python ==.*/python ==3.10.14/' /opt/conda/conda-meta/pinned \\\n && conda clean -a -f -y\n\nUSER root\n\n# install various support libraries\nRUN apt-get update \\\n && apt-get install -y --no-install-recommends \\\n apt-utils \\\n bc \\\n build-essential \\\n graphviz \\\n iproute2 \\\n libcairo2-dev \\\n libgl1 \\\n libglib2.0-dev \\\n libgnutls30 \\\n libgoogle-glog0v5 \\\n libgoogle-perftools-dev \\\n libhdf5-dev \\\n libjemalloc2 \\\n libjpeg-dev \\\n liblapack-dev \\\n libmkl-dev \\\n libnuma-dev \\\n libopenblas-dev \\\n libpcre2-dev \\\n libpq-dev \\\n libselinux1-dev \\\n lsof \\\n moreutils \\\n numactl \\\n protobuf-compiler \\\n && apt-get autoremove \\\n && apt-get clean \\\n && rm -rf /var/lib/apt/lists/*\n\n# install elastic fabric adapter\nRUN apt-get update && export EFA_VERSION=1.29.0 && mkdir /tmp/efa \\\n && wget -q -O- https://efa-installer.amazonaws.com/aws-efa-installer-$EFA_VERSION.tar.gz | tar xz -C /tmp/efa \\\n && cd /tmp/efa/aws-efa-installer && ./efa_installer.sh -y --skip-kmod --skip-limit-conf --no-verify \\\n && rm -rf /tmp/efa && rm -rf /etc/ld.so.conf.d/efa.conf /etc/profile.d/efa.sh \\\n && apt-get autoremove && apt-get clean && rm -rf /var/lib/apt/lists/*\n\n# install habana packages\nRUN wget -O- https://${ARTIFACTORY_URL}/artifactory/api/gpg/key/public | gpg --dearmor -o /usr/share/keyrings/habana-artifactory.gpg \\\n && chown root:root /usr/share/keyrings/habana-artifactory.gpg \\\n && chmod 644 /usr/share/keyrings/habana-artifactory.gpg \\\n && echo \"deb [signed-by=/usr/share/keyrings/habana-artifactory.gpg] https://${ARTIFACTORY_URL}/artifactory/debian jammy main\" | tee -a /etc/apt/sources.list \\\n && apt-get update \\\n && apt-get install -y --no-install-recommends \\\n habanalabs-rdma-core=\"$VERSION\"-\"$REVISION\" \\\n habanalabs-thunk=\"$VERSION\"-\"$REVISION\" \\\n habanalabs-firmware-tools=\"$VERSION\"-\"$REVISION\" \\\n habanalabs-graph=\"$VERSION\"-\"$REVISION\" \\\n && apt-get autoremove --yes && apt-get clean && rm -rf /var/lib/apt/lists/* \\\n && sed --in-place \"/$ARTIFACTORY_URL/d\" /etc/apt/sources.list\n\n# install libfabric\nRUN wget -nv -O /tmp/libfabric-${LIBFABRIC_VERSION}.tar.bz2 https://github.com/ofiwg/libfabric/releases/download/v${LIBFABRIC_VERSION}/libfabric-${LIBFABRIC_VERSION}.tar.bz2 \\\n && cd /tmp/ && tar xf /tmp/libfabric-${LIBFABRIC_VERSION}.tar.bz2 \\\n && cd /tmp/libfabric-${LIBFABRIC_VERSION} \\\n && ./configure --prefix=$LIBFABRIC_ROOT --enable-psm3-verbs --enable-verbs=yes --with-synapseai=/usr \\\n && make -j && make install && cd / && rm -rf /tmp/libfabric-${LIBFABRIC_VERSION}.tar.bz2 /tmp/libfabric-${LIBFABRIC_VERSION}\n\n# install hccl wrapper for ofi\nRUN wget -nv -O /tmp/main.zip https://github.com/HabanaAI/hccl_ofi_wrapper/archive/refs/heads/main.zip \\", + "repo_full_name": "kubeflow/kubeflow", + "discussion_comments": [ + { + "comment_id": "1720475020", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 7635, + "pr_file": "components/example-notebook-servers/jupyter-pytorch-gaudi/Dockerfile", + "discussion_id": "1720475020", + "commented_code": "@@ -0,0 +1,143 @@\n+#\n+# NOTE: Use the Makefiles to build this image correctly.\n+#\n+\n+ARG BASE_IMG=\n+FROM $BASE_IMG\n+\n+# Content below is extracted from scripts/Dockerfiles here:\n+# https://github.com/HabanaAI/Setup_and_Install/tree/main/dockerfiles/base\n+# https://github.com/HabanaAI/Setup_and_Install/tree/main/dockerfiles/pytorch\n+\n+# version details\n+ARG ARTIFACTORY_URL=vault.habana.ai\n+ARG VERSION=1.16.2\n+ARG REVISION=2\n+ARG PYTORCH_VERSION=2.2.2\n+ARG OS_NUMBER=2204\n+\n+# runtime variables\n+ENV DEBIAN_FRONTEND=noninteractive\n+ENV GC_KERNEL_PATH=/usr/lib/habanalabs/libtpc_kernels.so\n+ENV HABANA_LOGS=/var/log/habana_logs/\n+ENV HABANA_SCAL_BIN_PATH=/opt/habanalabs/engines_fw\n+ENV HABANA_PLUGINS_LIB_PATH=/opt/habanalabs/habana_plugins\n+ENV TCMALLOC_LARGE_ALLOC_REPORT_THRESHOLD=7516192768\n+ENV PYTHONPATH=/home/$NB_UID:/usr/lib/habanalabs/\n+\n+# There is no need to store pip installation files inside docker image\n+ENV PIP_NO_CACHE_DIR=on\n+ENV PIP_DISABLE_PIP_VERSION_CHECK=1\n+\n+# libfabric and scaling variables\n+ENV LIBFABRIC_VERSION=\"1.20.0\"\n+ENV LIBFABRIC_ROOT=\"/opt/habanalabs/libfabric-${LIBFABRIC_VERSION}\"\n+ENV MPI_ROOT=/opt/amazon/openmpi\n+ENV LD_LIBRARY_PATH=$LIBFABRIC_ROOT/lib:${MPI_ROOT}/lib:/usr/lib/habanalabs:$LD_LIBRARY_PATH\n+ENV PATH=${LIBFABRIC_ROOT}/bin:${MPI_ROOT}/bin:$PATH\n+ENV MPICC=${MPI_ROOT}/bin/mpicc\n+ENV OPAL_PREFIX=${MPI_ROOT}\n+ENV RDMAV_FORK_SAFE=1\n+ENV FI_EFA_USE_DEVICE_RDMA=1\n+ENV RDMA_CORE_ROOT=/opt/habanalabs/rdma-core/src\n+ENV RDMA_CORE_LIB=${RDMA_CORE_ROOT}/build/lib\n+\n+# Gaudi stack doesn't (yet) support 3.11, thus\n+# downgrade python from 3.11.x to 3.10.14\n+RUN source /opt/conda/etc/profile.d/conda.sh \\\n+ && conda activate base \\\n+ && conda install -y -q --no-pin python=3.10.14 \\\n+ && sed -i 's/python ==.*/python ==3.10.14/' /opt/conda/conda-meta/pinned \\\n+ && conda clean -a -f -y\n+\n+USER root\n+\n+# install various support libraries\n+RUN apt-get update \\\n+ && apt-get install -y --no-install-recommends \\\n+ apt-utils \\\n+ bc \\\n+ build-essential \\\n+ graphviz \\\n+ iproute2 \\\n+ libcairo2-dev \\\n+ libgl1 \\\n+ libglib2.0-dev \\\n+ libgnutls30 \\\n+ libgoogle-glog0v5 \\\n+ libgoogle-perftools-dev \\\n+ libhdf5-dev \\\n+ libjemalloc2 \\\n+ libjpeg-dev \\\n+ liblapack-dev \\\n+ libmkl-dev \\\n+ libnuma-dev \\\n+ libopenblas-dev \\\n+ libpcre2-dev \\\n+ libpq-dev \\\n+ libselinux1-dev \\\n+ lsof \\\n+ moreutils \\\n+ numactl \\\n+ protobuf-compiler \\\n+ && apt-get autoremove \\\n+ && apt-get clean \\\n+ && rm -rf /var/lib/apt/lists/*\n+\n+# install elastic fabric adapter\n+RUN apt-get update && export EFA_VERSION=1.29.0 && mkdir /tmp/efa \\\n+ && wget -q -O- https://efa-installer.amazonaws.com/aws-efa-installer-$EFA_VERSION.tar.gz | tar xz -C /tmp/efa \\\n+ && cd /tmp/efa/aws-efa-installer && ./efa_installer.sh -y --skip-kmod --skip-limit-conf --no-verify \\\n+ && rm -rf /tmp/efa && rm -rf /etc/ld.so.conf.d/efa.conf /etc/profile.d/efa.sh \\\n+ && apt-get autoremove && apt-get clean && rm -rf /var/lib/apt/lists/*\n+\n+# install habana packages\n+RUN wget -O- https://${ARTIFACTORY_URL}/artifactory/api/gpg/key/public | gpg --dearmor -o /usr/share/keyrings/habana-artifactory.gpg \\\n+ && chown root:root /usr/share/keyrings/habana-artifactory.gpg \\\n+ && chmod 644 /usr/share/keyrings/habana-artifactory.gpg \\\n+ && echo \"deb [signed-by=/usr/share/keyrings/habana-artifactory.gpg] https://${ARTIFACTORY_URL}/artifactory/debian jammy main\" | tee -a /etc/apt/sources.list \\\n+ && apt-get update \\\n+ && apt-get install -y --no-install-recommends \\\n+ habanalabs-rdma-core=\"$VERSION\"-\"$REVISION\" \\\n+ habanalabs-thunk=\"$VERSION\"-\"$REVISION\" \\\n+ habanalabs-firmware-tools=\"$VERSION\"-\"$REVISION\" \\\n+ habanalabs-graph=\"$VERSION\"-\"$REVISION\" \\\n+ && apt-get autoremove --yes && apt-get clean && rm -rf /var/lib/apt/lists/* \\\n+ && sed --in-place \"/$ARTIFACTORY_URL/d\" /etc/apt/sources.list\n+\n+# install libfabric\n+RUN wget -nv -O /tmp/libfabric-${LIBFABRIC_VERSION}.tar.bz2 https://github.com/ofiwg/libfabric/releases/download/v${LIBFABRIC_VERSION}/libfabric-${LIBFABRIC_VERSION}.tar.bz2 \\\n+ && cd /tmp/ && tar xf /tmp/libfabric-${LIBFABRIC_VERSION}.tar.bz2 \\\n+ && cd /tmp/libfabric-${LIBFABRIC_VERSION} \\\n+ && ./configure --prefix=$LIBFABRIC_ROOT --enable-psm3-verbs --enable-verbs=yes --with-synapseai=/usr \\\n+ && make -j && make install && cd / && rm -rf /tmp/libfabric-${LIBFABRIC_VERSION}.tar.bz2 /tmp/libfabric-${LIBFABRIC_VERSION}\n+\n+# install hccl wrapper for ofi\n+RUN wget -nv -O /tmp/main.zip https://github.com/HabanaAI/hccl_ofi_wrapper/archive/refs/heads/main.zip \\", + "comment_created_at": "2024-08-16T23:42:24+00:00", + "comment_author": "thesuperzapper", + "comment_body": "Where possible, please use `curl -fsSL URL -o TARGET` so we don't need to require `wget`.", + "pr_file_module": null + }, + { + "comment_id": "1724998664", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 7635, + "pr_file": "components/example-notebook-servers/jupyter-pytorch-gaudi/Dockerfile", + "discussion_id": "1720475020", + "commented_code": "@@ -0,0 +1,143 @@\n+#\n+# NOTE: Use the Makefiles to build this image correctly.\n+#\n+\n+ARG BASE_IMG=\n+FROM $BASE_IMG\n+\n+# Content below is extracted from scripts/Dockerfiles here:\n+# https://github.com/HabanaAI/Setup_and_Install/tree/main/dockerfiles/base\n+# https://github.com/HabanaAI/Setup_and_Install/tree/main/dockerfiles/pytorch\n+\n+# version details\n+ARG ARTIFACTORY_URL=vault.habana.ai\n+ARG VERSION=1.16.2\n+ARG REVISION=2\n+ARG PYTORCH_VERSION=2.2.2\n+ARG OS_NUMBER=2204\n+\n+# runtime variables\n+ENV DEBIAN_FRONTEND=noninteractive\n+ENV GC_KERNEL_PATH=/usr/lib/habanalabs/libtpc_kernels.so\n+ENV HABANA_LOGS=/var/log/habana_logs/\n+ENV HABANA_SCAL_BIN_PATH=/opt/habanalabs/engines_fw\n+ENV HABANA_PLUGINS_LIB_PATH=/opt/habanalabs/habana_plugins\n+ENV TCMALLOC_LARGE_ALLOC_REPORT_THRESHOLD=7516192768\n+ENV PYTHONPATH=/home/$NB_UID:/usr/lib/habanalabs/\n+\n+# There is no need to store pip installation files inside docker image\n+ENV PIP_NO_CACHE_DIR=on\n+ENV PIP_DISABLE_PIP_VERSION_CHECK=1\n+\n+# libfabric and scaling variables\n+ENV LIBFABRIC_VERSION=\"1.20.0\"\n+ENV LIBFABRIC_ROOT=\"/opt/habanalabs/libfabric-${LIBFABRIC_VERSION}\"\n+ENV MPI_ROOT=/opt/amazon/openmpi\n+ENV LD_LIBRARY_PATH=$LIBFABRIC_ROOT/lib:${MPI_ROOT}/lib:/usr/lib/habanalabs:$LD_LIBRARY_PATH\n+ENV PATH=${LIBFABRIC_ROOT}/bin:${MPI_ROOT}/bin:$PATH\n+ENV MPICC=${MPI_ROOT}/bin/mpicc\n+ENV OPAL_PREFIX=${MPI_ROOT}\n+ENV RDMAV_FORK_SAFE=1\n+ENV FI_EFA_USE_DEVICE_RDMA=1\n+ENV RDMA_CORE_ROOT=/opt/habanalabs/rdma-core/src\n+ENV RDMA_CORE_LIB=${RDMA_CORE_ROOT}/build/lib\n+\n+# Gaudi stack doesn't (yet) support 3.11, thus\n+# downgrade python from 3.11.x to 3.10.14\n+RUN source /opt/conda/etc/profile.d/conda.sh \\\n+ && conda activate base \\\n+ && conda install -y -q --no-pin python=3.10.14 \\\n+ && sed -i 's/python ==.*/python ==3.10.14/' /opt/conda/conda-meta/pinned \\\n+ && conda clean -a -f -y\n+\n+USER root\n+\n+# install various support libraries\n+RUN apt-get update \\\n+ && apt-get install -y --no-install-recommends \\\n+ apt-utils \\\n+ bc \\\n+ build-essential \\\n+ graphviz \\\n+ iproute2 \\\n+ libcairo2-dev \\\n+ libgl1 \\\n+ libglib2.0-dev \\\n+ libgnutls30 \\\n+ libgoogle-glog0v5 \\\n+ libgoogle-perftools-dev \\\n+ libhdf5-dev \\\n+ libjemalloc2 \\\n+ libjpeg-dev \\\n+ liblapack-dev \\\n+ libmkl-dev \\\n+ libnuma-dev \\\n+ libopenblas-dev \\\n+ libpcre2-dev \\\n+ libpq-dev \\\n+ libselinux1-dev \\\n+ lsof \\\n+ moreutils \\\n+ numactl \\\n+ protobuf-compiler \\\n+ && apt-get autoremove \\\n+ && apt-get clean \\\n+ && rm -rf /var/lib/apt/lists/*\n+\n+# install elastic fabric adapter\n+RUN apt-get update && export EFA_VERSION=1.29.0 && mkdir /tmp/efa \\\n+ && wget -q -O- https://efa-installer.amazonaws.com/aws-efa-installer-$EFA_VERSION.tar.gz | tar xz -C /tmp/efa \\\n+ && cd /tmp/efa/aws-efa-installer && ./efa_installer.sh -y --skip-kmod --skip-limit-conf --no-verify \\\n+ && rm -rf /tmp/efa && rm -rf /etc/ld.so.conf.d/efa.conf /etc/profile.d/efa.sh \\\n+ && apt-get autoremove && apt-get clean && rm -rf /var/lib/apt/lists/*\n+\n+# install habana packages\n+RUN wget -O- https://${ARTIFACTORY_URL}/artifactory/api/gpg/key/public | gpg --dearmor -o /usr/share/keyrings/habana-artifactory.gpg \\\n+ && chown root:root /usr/share/keyrings/habana-artifactory.gpg \\\n+ && chmod 644 /usr/share/keyrings/habana-artifactory.gpg \\\n+ && echo \"deb [signed-by=/usr/share/keyrings/habana-artifactory.gpg] https://${ARTIFACTORY_URL}/artifactory/debian jammy main\" | tee -a /etc/apt/sources.list \\\n+ && apt-get update \\\n+ && apt-get install -y --no-install-recommends \\\n+ habanalabs-rdma-core=\"$VERSION\"-\"$REVISION\" \\\n+ habanalabs-thunk=\"$VERSION\"-\"$REVISION\" \\\n+ habanalabs-firmware-tools=\"$VERSION\"-\"$REVISION\" \\\n+ habanalabs-graph=\"$VERSION\"-\"$REVISION\" \\\n+ && apt-get autoremove --yes && apt-get clean && rm -rf /var/lib/apt/lists/* \\\n+ && sed --in-place \"/$ARTIFACTORY_URL/d\" /etc/apt/sources.list\n+\n+# install libfabric\n+RUN wget -nv -O /tmp/libfabric-${LIBFABRIC_VERSION}.tar.bz2 https://github.com/ofiwg/libfabric/releases/download/v${LIBFABRIC_VERSION}/libfabric-${LIBFABRIC_VERSION}.tar.bz2 \\\n+ && cd /tmp/ && tar xf /tmp/libfabric-${LIBFABRIC_VERSION}.tar.bz2 \\\n+ && cd /tmp/libfabric-${LIBFABRIC_VERSION} \\\n+ && ./configure --prefix=$LIBFABRIC_ROOT --enable-psm3-verbs --enable-verbs=yes --with-synapseai=/usr \\\n+ && make -j && make install && cd / && rm -rf /tmp/libfabric-${LIBFABRIC_VERSION}.tar.bz2 /tmp/libfabric-${LIBFABRIC_VERSION}\n+\n+# install hccl wrapper for ofi\n+RUN wget -nv -O /tmp/main.zip https://github.com/HabanaAI/hccl_ofi_wrapper/archive/refs/heads/main.zip \\", + "comment_created_at": "2024-08-21T12:51:02+00:00", + "comment_author": "tkatila", + "comment_body": "roger", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "594100126", + "pr_number": 5693, + "pr_file": "components/crud-web-apps/tensorboards/Dockerfile", + "created_at": "2021-03-15T07:27:00+00:00", + "commented_code": "COPY --from=frontend /src/dist/ /src/app/static/\r\n\r\nENTRYPOINT make run\r\nENTRYPOINT gunicorn -w 3 --bind 0.0.0.0:5000 --access-logfile - entrypoint:app", + "repo_full_name": "kubeflow/kubeflow", + "discussion_comments": [ + { + "comment_id": "594100126", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 5693, + "pr_file": "components/crud-web-apps/tensorboards/Dockerfile", + "discussion_id": "594100126", + "commented_code": "@@ -48,4 +48,4 @@ COPY ./tensorboards/backend/Makefile .\n \r\n COPY --from=frontend /src/dist/ /src/app/static/\r\n \r\n-ENTRYPOINT make run\r\n+ENTRYPOINT gunicorn -w 3 --bind 0.0.0.0:5000 --access-logfile - entrypoint:app\r", + "comment_created_at": "2021-03-15T07:27:00+00:00", + "comment_author": "nrchakradhar", + "comment_body": "Will this have any incompatibilities for IPv6?", + "pr_file_module": null + }, + { + "comment_id": "594201616", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 5693, + "pr_file": "components/crud-web-apps/tensorboards/Dockerfile", + "discussion_id": "594100126", + "commented_code": "@@ -48,4 +48,4 @@ COPY ./tensorboards/backend/Makefile .\n \r\n COPY --from=frontend /src/dist/ /src/app/static/\r\n \r\n-ENTRYPOINT make run\r\n+ENTRYPOINT gunicorn -w 3 --bind 0.0.0.0:5000 --access-logfile - entrypoint:app\r", + "comment_created_at": "2021-03-15T10:07:46+00:00", + "comment_author": "davidspek", + "comment_body": "@nrchakradhar Good point. These same entrypoint is being used by the Jupyter-Web-App. Do you have suggestions how to make this IPv6 compatible?", + "pr_file_module": null + }, + { + "comment_id": "594277714", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 5693, + "pr_file": "components/crud-web-apps/tensorboards/Dockerfile", + "discussion_id": "594100126", + "commented_code": "@@ -48,4 +48,4 @@ COPY ./tensorboards/backend/Makefile .\n \r\n COPY --from=frontend /src/dist/ /src/app/static/\r\n \r\n-ENTRYPOINT make run\r\n+ENTRYPOINT gunicorn -w 3 --bind 0.0.0.0:5000 --access-logfile - entrypoint:app\r", + "comment_created_at": "2021-03-15T12:04:49+00:00", + "comment_author": "nrchakradhar", + "comment_body": "I do not know anything about gunicorn, but from its documentation, the following may work:\r\nhttps://docs.gunicorn.org/en/stable/settings.html\r\nMultiple addresses can be bound. ex.:\r\n\r\n$ gunicorn -b 127.0.0.1:8000 -b [::1]:8000 test:app\r\nwill bind the test:app application on localhost both on ipv6 and ipv4 interfaces.\r\n\r\nI found an interesting observation here(its quite old though)\r\nhttps://github.com/benoitc/gunicorn/issues/1138#issuecomment-158358666\r\n\r\nCan we make this configurable from manifests so that IPv6 support is provided. There are few issues created already for IPv6 support: #5615 #5605 ", + "pr_file_module": null + }, + { + "comment_id": "595368484", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 5693, + "pr_file": "components/crud-web-apps/tensorboards/Dockerfile", + "discussion_id": "594100126", + "commented_code": "@@ -48,4 +48,4 @@ COPY ./tensorboards/backend/Makefile .\n \r\n COPY --from=frontend /src/dist/ /src/app/static/\r\n \r\n-ENTRYPOINT make run\r\n+ENTRYPOINT gunicorn -w 3 --bind 0.0.0.0:5000 --access-logfile - entrypoint:app\r", + "comment_created_at": "2021-03-16T17:01:39+00:00", + "comment_author": "kimwnasptd", + "comment_body": "@nrchakradhar thank you for your comments. I'm planning on tackling the IPv6 support for all the web apps after 1.3.\r\n\r\nThe first step is to bind the IPv6 equivalent addresses as well, as you've pointed out in your links. But I want to do a good deep dive on this and test some edge cases for which I don't have the cycles right now. ", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/kubeflow-standardize-network-tools.md b/_reviewers/kubeflow-standardize-network-tools.md index bb157d4..bfa2d27 100644 --- a/_reviewers/kubeflow-standardize-network-tools.md +++ b/_reviewers/kubeflow-standardize-network-tools.md @@ -32,143 +32,3 @@ gunicorn -w 3 --bind 0.0.0.0:5000 app:app # Use: gunicorn -w 3 --bind 0.0.0.0:5000 --bind [::]:5000 app:app ``` - - -[ - { - "discussion_id": "1720474884", - "pr_number": 7635, - "pr_file": "components/example-notebook-servers/jupyter-pytorch-gaudi/Dockerfile", - "created_at": "2024-08-16T23:41:52+00:00", - "commented_code": "#\n# NOTE: Use the Makefiles to build this image correctly.\n#\n\nARG BASE_IMG=\nFROM $BASE_IMG\n\n# Content below is extracted from scripts/Dockerfiles here:\n# https://github.com/HabanaAI/Setup_and_Install/tree/main/dockerfiles/base\n# https://github.com/HabanaAI/Setup_and_Install/tree/main/dockerfiles/pytorch\n\n# version details\nARG ARTIFACTORY_URL=vault.habana.ai\nARG VERSION=1.16.2\nARG REVISION=2\nARG PYTORCH_VERSION=2.2.2\nARG OS_NUMBER=2204\n\n# runtime variables\nENV DEBIAN_FRONTEND=noninteractive\nENV GC_KERNEL_PATH=/usr/lib/habanalabs/libtpc_kernels.so\nENV HABANA_LOGS=/var/log/habana_logs/\nENV HABANA_SCAL_BIN_PATH=/opt/habanalabs/engines_fw\nENV HABANA_PLUGINS_LIB_PATH=/opt/habanalabs/habana_plugins\nENV TCMALLOC_LARGE_ALLOC_REPORT_THRESHOLD=7516192768\nENV PYTHONPATH=/home/$NB_UID:/usr/lib/habanalabs/\n\n# There is no need to store pip installation files inside docker image\nENV PIP_NO_CACHE_DIR=on\nENV PIP_DISABLE_PIP_VERSION_CHECK=1\n\n# libfabric and scaling variables\nENV LIBFABRIC_VERSION=\"1.20.0\"\nENV LIBFABRIC_ROOT=\"/opt/habanalabs/libfabric-${LIBFABRIC_VERSION}\"\nENV MPI_ROOT=/opt/amazon/openmpi\nENV LD_LIBRARY_PATH=$LIBFABRIC_ROOT/lib:${MPI_ROOT}/lib:/usr/lib/habanalabs:$LD_LIBRARY_PATH\nENV PATH=${LIBFABRIC_ROOT}/bin:${MPI_ROOT}/bin:$PATH\nENV MPICC=${MPI_ROOT}/bin/mpicc\nENV OPAL_PREFIX=${MPI_ROOT}\nENV RDMAV_FORK_SAFE=1\nENV FI_EFA_USE_DEVICE_RDMA=1\nENV RDMA_CORE_ROOT=/opt/habanalabs/rdma-core/src\nENV RDMA_CORE_LIB=${RDMA_CORE_ROOT}/build/lib\n\n# Gaudi stack doesn't (yet) support 3.11, thus\n# downgrade python from 3.11.x to 3.10.14\nRUN source /opt/conda/etc/profile.d/conda.sh \\\n && conda activate base \\\n && conda install -y -q --no-pin python=3.10.14 \\\n && sed -i 's/python ==.*/python ==3.10.14/' /opt/conda/conda-meta/pinned \\\n && conda clean -a -f -y\n\nUSER root\n\n# install various support libraries\nRUN apt-get update \\\n && apt-get install -y --no-install-recommends \\\n apt-utils \\\n bc \\\n build-essential \\\n graphviz \\\n iproute2 \\\n libcairo2-dev \\\n libgl1 \\\n libglib2.0-dev \\\n libgnutls30 \\\n libgoogle-glog0v5 \\\n libgoogle-perftools-dev \\\n libhdf5-dev \\\n libjemalloc2 \\\n libjpeg-dev \\\n liblapack-dev \\\n libmkl-dev \\\n libnuma-dev \\\n libopenblas-dev \\\n libpcre2-dev \\\n libpq-dev \\\n libselinux1-dev \\\n lsof \\\n moreutils \\\n numactl \\\n protobuf-compiler \\\n && apt-get autoremove \\\n && apt-get clean \\\n && rm -rf /var/lib/apt/lists/*\n\n# install elastic fabric adapter\nRUN apt-get update && export EFA_VERSION=1.29.0 && mkdir /tmp/efa \\\n && wget -q -O- https://efa-installer.amazonaws.com/aws-efa-installer-$EFA_VERSION.tar.gz | tar xz -C /tmp/efa \\\n && cd /tmp/efa/aws-efa-installer && ./efa_installer.sh -y --skip-kmod --skip-limit-conf --no-verify \\\n && rm -rf /tmp/efa && rm -rf /etc/ld.so.conf.d/efa.conf /etc/profile.d/efa.sh \\\n && apt-get autoremove && apt-get clean && rm -rf /var/lib/apt/lists/*\n\n# install habana packages\nRUN wget -O- https://${ARTIFACTORY_URL}/artifactory/api/gpg/key/public | gpg --dearmor -o /usr/share/keyrings/habana-artifactory.gpg \\", - "repo_full_name": "kubeflow/kubeflow", - "discussion_comments": [ - { - "comment_id": "1720474884", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 7635, - "pr_file": "components/example-notebook-servers/jupyter-pytorch-gaudi/Dockerfile", - "discussion_id": "1720474884", - "commented_code": "@@ -0,0 +1,143 @@\n+#\n+# NOTE: Use the Makefiles to build this image correctly.\n+#\n+\n+ARG BASE_IMG=\n+FROM $BASE_IMG\n+\n+# Content below is extracted from scripts/Dockerfiles here:\n+# https://github.com/HabanaAI/Setup_and_Install/tree/main/dockerfiles/base\n+# https://github.com/HabanaAI/Setup_and_Install/tree/main/dockerfiles/pytorch\n+\n+# version details\n+ARG ARTIFACTORY_URL=vault.habana.ai\n+ARG VERSION=1.16.2\n+ARG REVISION=2\n+ARG PYTORCH_VERSION=2.2.2\n+ARG OS_NUMBER=2204\n+\n+# runtime variables\n+ENV DEBIAN_FRONTEND=noninteractive\n+ENV GC_KERNEL_PATH=/usr/lib/habanalabs/libtpc_kernels.so\n+ENV HABANA_LOGS=/var/log/habana_logs/\n+ENV HABANA_SCAL_BIN_PATH=/opt/habanalabs/engines_fw\n+ENV HABANA_PLUGINS_LIB_PATH=/opt/habanalabs/habana_plugins\n+ENV TCMALLOC_LARGE_ALLOC_REPORT_THRESHOLD=7516192768\n+ENV PYTHONPATH=/home/$NB_UID:/usr/lib/habanalabs/\n+\n+# There is no need to store pip installation files inside docker image\n+ENV PIP_NO_CACHE_DIR=on\n+ENV PIP_DISABLE_PIP_VERSION_CHECK=1\n+\n+# libfabric and scaling variables\n+ENV LIBFABRIC_VERSION=\"1.20.0\"\n+ENV LIBFABRIC_ROOT=\"/opt/habanalabs/libfabric-${LIBFABRIC_VERSION}\"\n+ENV MPI_ROOT=/opt/amazon/openmpi\n+ENV LD_LIBRARY_PATH=$LIBFABRIC_ROOT/lib:${MPI_ROOT}/lib:/usr/lib/habanalabs:$LD_LIBRARY_PATH\n+ENV PATH=${LIBFABRIC_ROOT}/bin:${MPI_ROOT}/bin:$PATH\n+ENV MPICC=${MPI_ROOT}/bin/mpicc\n+ENV OPAL_PREFIX=${MPI_ROOT}\n+ENV RDMAV_FORK_SAFE=1\n+ENV FI_EFA_USE_DEVICE_RDMA=1\n+ENV RDMA_CORE_ROOT=/opt/habanalabs/rdma-core/src\n+ENV RDMA_CORE_LIB=${RDMA_CORE_ROOT}/build/lib\n+\n+# Gaudi stack doesn't (yet) support 3.11, thus\n+# downgrade python from 3.11.x to 3.10.14\n+RUN source /opt/conda/etc/profile.d/conda.sh \\\n+ && conda activate base \\\n+ && conda install -y -q --no-pin python=3.10.14 \\\n+ && sed -i 's/python ==.*/python ==3.10.14/' /opt/conda/conda-meta/pinned \\\n+ && conda clean -a -f -y\n+\n+USER root\n+\n+# install various support libraries\n+RUN apt-get update \\\n+ && apt-get install -y --no-install-recommends \\\n+ apt-utils \\\n+ bc \\\n+ build-essential \\\n+ graphviz \\\n+ iproute2 \\\n+ libcairo2-dev \\\n+ libgl1 \\\n+ libglib2.0-dev \\\n+ libgnutls30 \\\n+ libgoogle-glog0v5 \\\n+ libgoogle-perftools-dev \\\n+ libhdf5-dev \\\n+ libjemalloc2 \\\n+ libjpeg-dev \\\n+ liblapack-dev \\\n+ libmkl-dev \\\n+ libnuma-dev \\\n+ libopenblas-dev \\\n+ libpcre2-dev \\\n+ libpq-dev \\\n+ libselinux1-dev \\\n+ lsof \\\n+ moreutils \\\n+ numactl \\\n+ protobuf-compiler \\\n+ && apt-get autoremove \\\n+ && apt-get clean \\\n+ && rm -rf /var/lib/apt/lists/*\n+\n+# install elastic fabric adapter\n+RUN apt-get update && export EFA_VERSION=1.29.0 && mkdir /tmp/efa \\\n+ && wget -q -O- https://efa-installer.amazonaws.com/aws-efa-installer-$EFA_VERSION.tar.gz | tar xz -C /tmp/efa \\\n+ && cd /tmp/efa/aws-efa-installer && ./efa_installer.sh -y --skip-kmod --skip-limit-conf --no-verify \\\n+ && rm -rf /tmp/efa && rm -rf /etc/ld.so.conf.d/efa.conf /etc/profile.d/efa.sh \\\n+ && apt-get autoremove && apt-get clean && rm -rf /var/lib/apt/lists/*\n+\n+# install habana packages\n+RUN wget -O- https://${ARTIFACTORY_URL}/artifactory/api/gpg/key/public | gpg --dearmor -o /usr/share/keyrings/habana-artifactory.gpg \\", - "comment_created_at": "2024-08-16T23:41:52+00:00", - "comment_author": "thesuperzapper", - "comment_body": "Where possible, please use `curl -fsSL URL -o TARGET` so we don't need to require `wget`.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1720474977", - "pr_number": 7635, - "pr_file": "components/example-notebook-servers/jupyter-pytorch-gaudi/Dockerfile", - "created_at": "2024-08-16T23:42:14+00:00", - "commented_code": "#\n# NOTE: Use the Makefiles to build this image correctly.\n#\n\nARG BASE_IMG=\nFROM $BASE_IMG\n\n# Content below is extracted from scripts/Dockerfiles here:\n# https://github.com/HabanaAI/Setup_and_Install/tree/main/dockerfiles/base\n# https://github.com/HabanaAI/Setup_and_Install/tree/main/dockerfiles/pytorch\n\n# version details\nARG ARTIFACTORY_URL=vault.habana.ai\nARG VERSION=1.16.2\nARG REVISION=2\nARG PYTORCH_VERSION=2.2.2\nARG OS_NUMBER=2204\n\n# runtime variables\nENV DEBIAN_FRONTEND=noninteractive\nENV GC_KERNEL_PATH=/usr/lib/habanalabs/libtpc_kernels.so\nENV HABANA_LOGS=/var/log/habana_logs/\nENV HABANA_SCAL_BIN_PATH=/opt/habanalabs/engines_fw\nENV HABANA_PLUGINS_LIB_PATH=/opt/habanalabs/habana_plugins\nENV TCMALLOC_LARGE_ALLOC_REPORT_THRESHOLD=7516192768\nENV PYTHONPATH=/home/$NB_UID:/usr/lib/habanalabs/\n\n# There is no need to store pip installation files inside docker image\nENV PIP_NO_CACHE_DIR=on\nENV PIP_DISABLE_PIP_VERSION_CHECK=1\n\n# libfabric and scaling variables\nENV LIBFABRIC_VERSION=\"1.20.0\"\nENV LIBFABRIC_ROOT=\"/opt/habanalabs/libfabric-${LIBFABRIC_VERSION}\"\nENV MPI_ROOT=/opt/amazon/openmpi\nENV LD_LIBRARY_PATH=$LIBFABRIC_ROOT/lib:${MPI_ROOT}/lib:/usr/lib/habanalabs:$LD_LIBRARY_PATH\nENV PATH=${LIBFABRIC_ROOT}/bin:${MPI_ROOT}/bin:$PATH\nENV MPICC=${MPI_ROOT}/bin/mpicc\nENV OPAL_PREFIX=${MPI_ROOT}\nENV RDMAV_FORK_SAFE=1\nENV FI_EFA_USE_DEVICE_RDMA=1\nENV RDMA_CORE_ROOT=/opt/habanalabs/rdma-core/src\nENV RDMA_CORE_LIB=${RDMA_CORE_ROOT}/build/lib\n\n# Gaudi stack doesn't (yet) support 3.11, thus\n# downgrade python from 3.11.x to 3.10.14\nRUN source /opt/conda/etc/profile.d/conda.sh \\\n && conda activate base \\\n && conda install -y -q --no-pin python=3.10.14 \\\n && sed -i 's/python ==.*/python ==3.10.14/' /opt/conda/conda-meta/pinned \\\n && conda clean -a -f -y\n\nUSER root\n\n# install various support libraries\nRUN apt-get update \\\n && apt-get install -y --no-install-recommends \\\n apt-utils \\\n bc \\\n build-essential \\\n graphviz \\\n iproute2 \\\n libcairo2-dev \\\n libgl1 \\\n libglib2.0-dev \\\n libgnutls30 \\\n libgoogle-glog0v5 \\\n libgoogle-perftools-dev \\\n libhdf5-dev \\\n libjemalloc2 \\\n libjpeg-dev \\\n liblapack-dev \\\n libmkl-dev \\\n libnuma-dev \\\n libopenblas-dev \\\n libpcre2-dev \\\n libpq-dev \\\n libselinux1-dev \\\n lsof \\\n moreutils \\\n numactl \\\n protobuf-compiler \\\n && apt-get autoremove \\\n && apt-get clean \\\n && rm -rf /var/lib/apt/lists/*\n\n# install elastic fabric adapter\nRUN apt-get update && export EFA_VERSION=1.29.0 && mkdir /tmp/efa \\\n && wget -q -O- https://efa-installer.amazonaws.com/aws-efa-installer-$EFA_VERSION.tar.gz | tar xz -C /tmp/efa \\", - "repo_full_name": "kubeflow/kubeflow", - "discussion_comments": [ - { - "comment_id": "1720474977", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 7635, - "pr_file": "components/example-notebook-servers/jupyter-pytorch-gaudi/Dockerfile", - "discussion_id": "1720474977", - "commented_code": "@@ -0,0 +1,143 @@\n+#\n+# NOTE: Use the Makefiles to build this image correctly.\n+#\n+\n+ARG BASE_IMG=\n+FROM $BASE_IMG\n+\n+# Content below is extracted from scripts/Dockerfiles here:\n+# https://github.com/HabanaAI/Setup_and_Install/tree/main/dockerfiles/base\n+# https://github.com/HabanaAI/Setup_and_Install/tree/main/dockerfiles/pytorch\n+\n+# version details\n+ARG ARTIFACTORY_URL=vault.habana.ai\n+ARG VERSION=1.16.2\n+ARG REVISION=2\n+ARG PYTORCH_VERSION=2.2.2\n+ARG OS_NUMBER=2204\n+\n+# runtime variables\n+ENV DEBIAN_FRONTEND=noninteractive\n+ENV GC_KERNEL_PATH=/usr/lib/habanalabs/libtpc_kernels.so\n+ENV HABANA_LOGS=/var/log/habana_logs/\n+ENV HABANA_SCAL_BIN_PATH=/opt/habanalabs/engines_fw\n+ENV HABANA_PLUGINS_LIB_PATH=/opt/habanalabs/habana_plugins\n+ENV TCMALLOC_LARGE_ALLOC_REPORT_THRESHOLD=7516192768\n+ENV PYTHONPATH=/home/$NB_UID:/usr/lib/habanalabs/\n+\n+# There is no need to store pip installation files inside docker image\n+ENV PIP_NO_CACHE_DIR=on\n+ENV PIP_DISABLE_PIP_VERSION_CHECK=1\n+\n+# libfabric and scaling variables\n+ENV LIBFABRIC_VERSION=\"1.20.0\"\n+ENV LIBFABRIC_ROOT=\"/opt/habanalabs/libfabric-${LIBFABRIC_VERSION}\"\n+ENV MPI_ROOT=/opt/amazon/openmpi\n+ENV LD_LIBRARY_PATH=$LIBFABRIC_ROOT/lib:${MPI_ROOT}/lib:/usr/lib/habanalabs:$LD_LIBRARY_PATH\n+ENV PATH=${LIBFABRIC_ROOT}/bin:${MPI_ROOT}/bin:$PATH\n+ENV MPICC=${MPI_ROOT}/bin/mpicc\n+ENV OPAL_PREFIX=${MPI_ROOT}\n+ENV RDMAV_FORK_SAFE=1\n+ENV FI_EFA_USE_DEVICE_RDMA=1\n+ENV RDMA_CORE_ROOT=/opt/habanalabs/rdma-core/src\n+ENV RDMA_CORE_LIB=${RDMA_CORE_ROOT}/build/lib\n+\n+# Gaudi stack doesn't (yet) support 3.11, thus\n+# downgrade python from 3.11.x to 3.10.14\n+RUN source /opt/conda/etc/profile.d/conda.sh \\\n+ && conda activate base \\\n+ && conda install -y -q --no-pin python=3.10.14 \\\n+ && sed -i 's/python ==.*/python ==3.10.14/' /opt/conda/conda-meta/pinned \\\n+ && conda clean -a -f -y\n+\n+USER root\n+\n+# install various support libraries\n+RUN apt-get update \\\n+ && apt-get install -y --no-install-recommends \\\n+ apt-utils \\\n+ bc \\\n+ build-essential \\\n+ graphviz \\\n+ iproute2 \\\n+ libcairo2-dev \\\n+ libgl1 \\\n+ libglib2.0-dev \\\n+ libgnutls30 \\\n+ libgoogle-glog0v5 \\\n+ libgoogle-perftools-dev \\\n+ libhdf5-dev \\\n+ libjemalloc2 \\\n+ libjpeg-dev \\\n+ liblapack-dev \\\n+ libmkl-dev \\\n+ libnuma-dev \\\n+ libopenblas-dev \\\n+ libpcre2-dev \\\n+ libpq-dev \\\n+ libselinux1-dev \\\n+ lsof \\\n+ moreutils \\\n+ numactl \\\n+ protobuf-compiler \\\n+ && apt-get autoremove \\\n+ && apt-get clean \\\n+ && rm -rf /var/lib/apt/lists/*\n+\n+# install elastic fabric adapter\n+RUN apt-get update && export EFA_VERSION=1.29.0 && mkdir /tmp/efa \\\n+ && wget -q -O- https://efa-installer.amazonaws.com/aws-efa-installer-$EFA_VERSION.tar.gz | tar xz -C /tmp/efa \\", - "comment_created_at": "2024-08-16T23:42:14+00:00", - "comment_author": "thesuperzapper", - "comment_body": "Where possible, please use `curl -fsSL URL -o TARGET` so we don't need to require `wget`.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1720475020", - "pr_number": 7635, - "pr_file": "components/example-notebook-servers/jupyter-pytorch-gaudi/Dockerfile", - "created_at": "2024-08-16T23:42:24+00:00", - "commented_code": "#\n# NOTE: Use the Makefiles to build this image correctly.\n#\n\nARG BASE_IMG=\nFROM $BASE_IMG\n\n# Content below is extracted from scripts/Dockerfiles here:\n# https://github.com/HabanaAI/Setup_and_Install/tree/main/dockerfiles/base\n# https://github.com/HabanaAI/Setup_and_Install/tree/main/dockerfiles/pytorch\n\n# version details\nARG ARTIFACTORY_URL=vault.habana.ai\nARG VERSION=1.16.2\nARG REVISION=2\nARG PYTORCH_VERSION=2.2.2\nARG OS_NUMBER=2204\n\n# runtime variables\nENV DEBIAN_FRONTEND=noninteractive\nENV GC_KERNEL_PATH=/usr/lib/habanalabs/libtpc_kernels.so\nENV HABANA_LOGS=/var/log/habana_logs/\nENV HABANA_SCAL_BIN_PATH=/opt/habanalabs/engines_fw\nENV HABANA_PLUGINS_LIB_PATH=/opt/habanalabs/habana_plugins\nENV TCMALLOC_LARGE_ALLOC_REPORT_THRESHOLD=7516192768\nENV PYTHONPATH=/home/$NB_UID:/usr/lib/habanalabs/\n\n# There is no need to store pip installation files inside docker image\nENV PIP_NO_CACHE_DIR=on\nENV PIP_DISABLE_PIP_VERSION_CHECK=1\n\n# libfabric and scaling variables\nENV LIBFABRIC_VERSION=\"1.20.0\"\nENV LIBFABRIC_ROOT=\"/opt/habanalabs/libfabric-${LIBFABRIC_VERSION}\"\nENV MPI_ROOT=/opt/amazon/openmpi\nENV LD_LIBRARY_PATH=$LIBFABRIC_ROOT/lib:${MPI_ROOT}/lib:/usr/lib/habanalabs:$LD_LIBRARY_PATH\nENV PATH=${LIBFABRIC_ROOT}/bin:${MPI_ROOT}/bin:$PATH\nENV MPICC=${MPI_ROOT}/bin/mpicc\nENV OPAL_PREFIX=${MPI_ROOT}\nENV RDMAV_FORK_SAFE=1\nENV FI_EFA_USE_DEVICE_RDMA=1\nENV RDMA_CORE_ROOT=/opt/habanalabs/rdma-core/src\nENV RDMA_CORE_LIB=${RDMA_CORE_ROOT}/build/lib\n\n# Gaudi stack doesn't (yet) support 3.11, thus\n# downgrade python from 3.11.x to 3.10.14\nRUN source /opt/conda/etc/profile.d/conda.sh \\\n && conda activate base \\\n && conda install -y -q --no-pin python=3.10.14 \\\n && sed -i 's/python ==.*/python ==3.10.14/' /opt/conda/conda-meta/pinned \\\n && conda clean -a -f -y\n\nUSER root\n\n# install various support libraries\nRUN apt-get update \\\n && apt-get install -y --no-install-recommends \\\n apt-utils \\\n bc \\\n build-essential \\\n graphviz \\\n iproute2 \\\n libcairo2-dev \\\n libgl1 \\\n libglib2.0-dev \\\n libgnutls30 \\\n libgoogle-glog0v5 \\\n libgoogle-perftools-dev \\\n libhdf5-dev \\\n libjemalloc2 \\\n libjpeg-dev \\\n liblapack-dev \\\n libmkl-dev \\\n libnuma-dev \\\n libopenblas-dev \\\n libpcre2-dev \\\n libpq-dev \\\n libselinux1-dev \\\n lsof \\\n moreutils \\\n numactl \\\n protobuf-compiler \\\n && apt-get autoremove \\\n && apt-get clean \\\n && rm -rf /var/lib/apt/lists/*\n\n# install elastic fabric adapter\nRUN apt-get update && export EFA_VERSION=1.29.0 && mkdir /tmp/efa \\\n && wget -q -O- https://efa-installer.amazonaws.com/aws-efa-installer-$EFA_VERSION.tar.gz | tar xz -C /tmp/efa \\\n && cd /tmp/efa/aws-efa-installer && ./efa_installer.sh -y --skip-kmod --skip-limit-conf --no-verify \\\n && rm -rf /tmp/efa && rm -rf /etc/ld.so.conf.d/efa.conf /etc/profile.d/efa.sh \\\n && apt-get autoremove && apt-get clean && rm -rf /var/lib/apt/lists/*\n\n# install habana packages\nRUN wget -O- https://${ARTIFACTORY_URL}/artifactory/api/gpg/key/public | gpg --dearmor -o /usr/share/keyrings/habana-artifactory.gpg \\\n && chown root:root /usr/share/keyrings/habana-artifactory.gpg \\\n && chmod 644 /usr/share/keyrings/habana-artifactory.gpg \\\n && echo \"deb [signed-by=/usr/share/keyrings/habana-artifactory.gpg] https://${ARTIFACTORY_URL}/artifactory/debian jammy main\" | tee -a /etc/apt/sources.list \\\n && apt-get update \\\n && apt-get install -y --no-install-recommends \\\n habanalabs-rdma-core=\"$VERSION\"-\"$REVISION\" \\\n habanalabs-thunk=\"$VERSION\"-\"$REVISION\" \\\n habanalabs-firmware-tools=\"$VERSION\"-\"$REVISION\" \\\n habanalabs-graph=\"$VERSION\"-\"$REVISION\" \\\n && apt-get autoremove --yes && apt-get clean && rm -rf /var/lib/apt/lists/* \\\n && sed --in-place \"/$ARTIFACTORY_URL/d\" /etc/apt/sources.list\n\n# install libfabric\nRUN wget -nv -O /tmp/libfabric-${LIBFABRIC_VERSION}.tar.bz2 https://github.com/ofiwg/libfabric/releases/download/v${LIBFABRIC_VERSION}/libfabric-${LIBFABRIC_VERSION}.tar.bz2 \\\n && cd /tmp/ && tar xf /tmp/libfabric-${LIBFABRIC_VERSION}.tar.bz2 \\\n && cd /tmp/libfabric-${LIBFABRIC_VERSION} \\\n && ./configure --prefix=$LIBFABRIC_ROOT --enable-psm3-verbs --enable-verbs=yes --with-synapseai=/usr \\\n && make -j && make install && cd / && rm -rf /tmp/libfabric-${LIBFABRIC_VERSION}.tar.bz2 /tmp/libfabric-${LIBFABRIC_VERSION}\n\n# install hccl wrapper for ofi\nRUN wget -nv -O /tmp/main.zip https://github.com/HabanaAI/hccl_ofi_wrapper/archive/refs/heads/main.zip \\", - "repo_full_name": "kubeflow/kubeflow", - "discussion_comments": [ - { - "comment_id": "1720475020", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 7635, - "pr_file": "components/example-notebook-servers/jupyter-pytorch-gaudi/Dockerfile", - "discussion_id": "1720475020", - "commented_code": "@@ -0,0 +1,143 @@\n+#\n+# NOTE: Use the Makefiles to build this image correctly.\n+#\n+\n+ARG BASE_IMG=\n+FROM $BASE_IMG\n+\n+# Content below is extracted from scripts/Dockerfiles here:\n+# https://github.com/HabanaAI/Setup_and_Install/tree/main/dockerfiles/base\n+# https://github.com/HabanaAI/Setup_and_Install/tree/main/dockerfiles/pytorch\n+\n+# version details\n+ARG ARTIFACTORY_URL=vault.habana.ai\n+ARG VERSION=1.16.2\n+ARG REVISION=2\n+ARG PYTORCH_VERSION=2.2.2\n+ARG OS_NUMBER=2204\n+\n+# runtime variables\n+ENV DEBIAN_FRONTEND=noninteractive\n+ENV GC_KERNEL_PATH=/usr/lib/habanalabs/libtpc_kernels.so\n+ENV HABANA_LOGS=/var/log/habana_logs/\n+ENV HABANA_SCAL_BIN_PATH=/opt/habanalabs/engines_fw\n+ENV HABANA_PLUGINS_LIB_PATH=/opt/habanalabs/habana_plugins\n+ENV TCMALLOC_LARGE_ALLOC_REPORT_THRESHOLD=7516192768\n+ENV PYTHONPATH=/home/$NB_UID:/usr/lib/habanalabs/\n+\n+# There is no need to store pip installation files inside docker image\n+ENV PIP_NO_CACHE_DIR=on\n+ENV PIP_DISABLE_PIP_VERSION_CHECK=1\n+\n+# libfabric and scaling variables\n+ENV LIBFABRIC_VERSION=\"1.20.0\"\n+ENV LIBFABRIC_ROOT=\"/opt/habanalabs/libfabric-${LIBFABRIC_VERSION}\"\n+ENV MPI_ROOT=/opt/amazon/openmpi\n+ENV LD_LIBRARY_PATH=$LIBFABRIC_ROOT/lib:${MPI_ROOT}/lib:/usr/lib/habanalabs:$LD_LIBRARY_PATH\n+ENV PATH=${LIBFABRIC_ROOT}/bin:${MPI_ROOT}/bin:$PATH\n+ENV MPICC=${MPI_ROOT}/bin/mpicc\n+ENV OPAL_PREFIX=${MPI_ROOT}\n+ENV RDMAV_FORK_SAFE=1\n+ENV FI_EFA_USE_DEVICE_RDMA=1\n+ENV RDMA_CORE_ROOT=/opt/habanalabs/rdma-core/src\n+ENV RDMA_CORE_LIB=${RDMA_CORE_ROOT}/build/lib\n+\n+# Gaudi stack doesn't (yet) support 3.11, thus\n+# downgrade python from 3.11.x to 3.10.14\n+RUN source /opt/conda/etc/profile.d/conda.sh \\\n+ && conda activate base \\\n+ && conda install -y -q --no-pin python=3.10.14 \\\n+ && sed -i 's/python ==.*/python ==3.10.14/' /opt/conda/conda-meta/pinned \\\n+ && conda clean -a -f -y\n+\n+USER root\n+\n+# install various support libraries\n+RUN apt-get update \\\n+ && apt-get install -y --no-install-recommends \\\n+ apt-utils \\\n+ bc \\\n+ build-essential \\\n+ graphviz \\\n+ iproute2 \\\n+ libcairo2-dev \\\n+ libgl1 \\\n+ libglib2.0-dev \\\n+ libgnutls30 \\\n+ libgoogle-glog0v5 \\\n+ libgoogle-perftools-dev \\\n+ libhdf5-dev \\\n+ libjemalloc2 \\\n+ libjpeg-dev \\\n+ liblapack-dev \\\n+ libmkl-dev \\\n+ libnuma-dev \\\n+ libopenblas-dev \\\n+ libpcre2-dev \\\n+ libpq-dev \\\n+ libselinux1-dev \\\n+ lsof \\\n+ moreutils \\\n+ numactl \\\n+ protobuf-compiler \\\n+ && apt-get autoremove \\\n+ && apt-get clean \\\n+ && rm -rf /var/lib/apt/lists/*\n+\n+# install elastic fabric adapter\n+RUN apt-get update && export EFA_VERSION=1.29.0 && mkdir /tmp/efa \\\n+ && wget -q -O- https://efa-installer.amazonaws.com/aws-efa-installer-$EFA_VERSION.tar.gz | tar xz -C /tmp/efa \\\n+ && cd /tmp/efa/aws-efa-installer && ./efa_installer.sh -y --skip-kmod --skip-limit-conf --no-verify \\\n+ && rm -rf /tmp/efa && rm -rf /etc/ld.so.conf.d/efa.conf /etc/profile.d/efa.sh \\\n+ && apt-get autoremove && apt-get clean && rm -rf /var/lib/apt/lists/*\n+\n+# install habana packages\n+RUN wget -O- https://${ARTIFACTORY_URL}/artifactory/api/gpg/key/public | gpg --dearmor -o /usr/share/keyrings/habana-artifactory.gpg \\\n+ && chown root:root /usr/share/keyrings/habana-artifactory.gpg \\\n+ && chmod 644 /usr/share/keyrings/habana-artifactory.gpg \\\n+ && echo \"deb [signed-by=/usr/share/keyrings/habana-artifactory.gpg] https://${ARTIFACTORY_URL}/artifactory/debian jammy main\" | tee -a /etc/apt/sources.list \\\n+ && apt-get update \\\n+ && apt-get install -y --no-install-recommends \\\n+ habanalabs-rdma-core=\"$VERSION\"-\"$REVISION\" \\\n+ habanalabs-thunk=\"$VERSION\"-\"$REVISION\" \\\n+ habanalabs-firmware-tools=\"$VERSION\"-\"$REVISION\" \\\n+ habanalabs-graph=\"$VERSION\"-\"$REVISION\" \\\n+ && apt-get autoremove --yes && apt-get clean && rm -rf /var/lib/apt/lists/* \\\n+ && sed --in-place \"/$ARTIFACTORY_URL/d\" /etc/apt/sources.list\n+\n+# install libfabric\n+RUN wget -nv -O /tmp/libfabric-${LIBFABRIC_VERSION}.tar.bz2 https://github.com/ofiwg/libfabric/releases/download/v${LIBFABRIC_VERSION}/libfabric-${LIBFABRIC_VERSION}.tar.bz2 \\\n+ && cd /tmp/ && tar xf /tmp/libfabric-${LIBFABRIC_VERSION}.tar.bz2 \\\n+ && cd /tmp/libfabric-${LIBFABRIC_VERSION} \\\n+ && ./configure --prefix=$LIBFABRIC_ROOT --enable-psm3-verbs --enable-verbs=yes --with-synapseai=/usr \\\n+ && make -j && make install && cd / && rm -rf /tmp/libfabric-${LIBFABRIC_VERSION}.tar.bz2 /tmp/libfabric-${LIBFABRIC_VERSION}\n+\n+# install hccl wrapper for ofi\n+RUN wget -nv -O /tmp/main.zip https://github.com/HabanaAI/hccl_ofi_wrapper/archive/refs/heads/main.zip \\", - "comment_created_at": "2024-08-16T23:42:24+00:00", - "comment_author": "thesuperzapper", - "comment_body": "Where possible, please use `curl -fsSL URL -o TARGET` so we don't need to require `wget`.", - "pr_file_module": null - }, - { - "comment_id": "1724998664", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 7635, - "pr_file": "components/example-notebook-servers/jupyter-pytorch-gaudi/Dockerfile", - "discussion_id": "1720475020", - "commented_code": "@@ -0,0 +1,143 @@\n+#\n+# NOTE: Use the Makefiles to build this image correctly.\n+#\n+\n+ARG BASE_IMG=\n+FROM $BASE_IMG\n+\n+# Content below is extracted from scripts/Dockerfiles here:\n+# https://github.com/HabanaAI/Setup_and_Install/tree/main/dockerfiles/base\n+# https://github.com/HabanaAI/Setup_and_Install/tree/main/dockerfiles/pytorch\n+\n+# version details\n+ARG ARTIFACTORY_URL=vault.habana.ai\n+ARG VERSION=1.16.2\n+ARG REVISION=2\n+ARG PYTORCH_VERSION=2.2.2\n+ARG OS_NUMBER=2204\n+\n+# runtime variables\n+ENV DEBIAN_FRONTEND=noninteractive\n+ENV GC_KERNEL_PATH=/usr/lib/habanalabs/libtpc_kernels.so\n+ENV HABANA_LOGS=/var/log/habana_logs/\n+ENV HABANA_SCAL_BIN_PATH=/opt/habanalabs/engines_fw\n+ENV HABANA_PLUGINS_LIB_PATH=/opt/habanalabs/habana_plugins\n+ENV TCMALLOC_LARGE_ALLOC_REPORT_THRESHOLD=7516192768\n+ENV PYTHONPATH=/home/$NB_UID:/usr/lib/habanalabs/\n+\n+# There is no need to store pip installation files inside docker image\n+ENV PIP_NO_CACHE_DIR=on\n+ENV PIP_DISABLE_PIP_VERSION_CHECK=1\n+\n+# libfabric and scaling variables\n+ENV LIBFABRIC_VERSION=\"1.20.0\"\n+ENV LIBFABRIC_ROOT=\"/opt/habanalabs/libfabric-${LIBFABRIC_VERSION}\"\n+ENV MPI_ROOT=/opt/amazon/openmpi\n+ENV LD_LIBRARY_PATH=$LIBFABRIC_ROOT/lib:${MPI_ROOT}/lib:/usr/lib/habanalabs:$LD_LIBRARY_PATH\n+ENV PATH=${LIBFABRIC_ROOT}/bin:${MPI_ROOT}/bin:$PATH\n+ENV MPICC=${MPI_ROOT}/bin/mpicc\n+ENV OPAL_PREFIX=${MPI_ROOT}\n+ENV RDMAV_FORK_SAFE=1\n+ENV FI_EFA_USE_DEVICE_RDMA=1\n+ENV RDMA_CORE_ROOT=/opt/habanalabs/rdma-core/src\n+ENV RDMA_CORE_LIB=${RDMA_CORE_ROOT}/build/lib\n+\n+# Gaudi stack doesn't (yet) support 3.11, thus\n+# downgrade python from 3.11.x to 3.10.14\n+RUN source /opt/conda/etc/profile.d/conda.sh \\\n+ && conda activate base \\\n+ && conda install -y -q --no-pin python=3.10.14 \\\n+ && sed -i 's/python ==.*/python ==3.10.14/' /opt/conda/conda-meta/pinned \\\n+ && conda clean -a -f -y\n+\n+USER root\n+\n+# install various support libraries\n+RUN apt-get update \\\n+ && apt-get install -y --no-install-recommends \\\n+ apt-utils \\\n+ bc \\\n+ build-essential \\\n+ graphviz \\\n+ iproute2 \\\n+ libcairo2-dev \\\n+ libgl1 \\\n+ libglib2.0-dev \\\n+ libgnutls30 \\\n+ libgoogle-glog0v5 \\\n+ libgoogle-perftools-dev \\\n+ libhdf5-dev \\\n+ libjemalloc2 \\\n+ libjpeg-dev \\\n+ liblapack-dev \\\n+ libmkl-dev \\\n+ libnuma-dev \\\n+ libopenblas-dev \\\n+ libpcre2-dev \\\n+ libpq-dev \\\n+ libselinux1-dev \\\n+ lsof \\\n+ moreutils \\\n+ numactl \\\n+ protobuf-compiler \\\n+ && apt-get autoremove \\\n+ && apt-get clean \\\n+ && rm -rf /var/lib/apt/lists/*\n+\n+# install elastic fabric adapter\n+RUN apt-get update && export EFA_VERSION=1.29.0 && mkdir /tmp/efa \\\n+ && wget -q -O- https://efa-installer.amazonaws.com/aws-efa-installer-$EFA_VERSION.tar.gz | tar xz -C /tmp/efa \\\n+ && cd /tmp/efa/aws-efa-installer && ./efa_installer.sh -y --skip-kmod --skip-limit-conf --no-verify \\\n+ && rm -rf /tmp/efa && rm -rf /etc/ld.so.conf.d/efa.conf /etc/profile.d/efa.sh \\\n+ && apt-get autoremove && apt-get clean && rm -rf /var/lib/apt/lists/*\n+\n+# install habana packages\n+RUN wget -O- https://${ARTIFACTORY_URL}/artifactory/api/gpg/key/public | gpg --dearmor -o /usr/share/keyrings/habana-artifactory.gpg \\\n+ && chown root:root /usr/share/keyrings/habana-artifactory.gpg \\\n+ && chmod 644 /usr/share/keyrings/habana-artifactory.gpg \\\n+ && echo \"deb [signed-by=/usr/share/keyrings/habana-artifactory.gpg] https://${ARTIFACTORY_URL}/artifactory/debian jammy main\" | tee -a /etc/apt/sources.list \\\n+ && apt-get update \\\n+ && apt-get install -y --no-install-recommends \\\n+ habanalabs-rdma-core=\"$VERSION\"-\"$REVISION\" \\\n+ habanalabs-thunk=\"$VERSION\"-\"$REVISION\" \\\n+ habanalabs-firmware-tools=\"$VERSION\"-\"$REVISION\" \\\n+ habanalabs-graph=\"$VERSION\"-\"$REVISION\" \\\n+ && apt-get autoremove --yes && apt-get clean && rm -rf /var/lib/apt/lists/* \\\n+ && sed --in-place \"/$ARTIFACTORY_URL/d\" /etc/apt/sources.list\n+\n+# install libfabric\n+RUN wget -nv -O /tmp/libfabric-${LIBFABRIC_VERSION}.tar.bz2 https://github.com/ofiwg/libfabric/releases/download/v${LIBFABRIC_VERSION}/libfabric-${LIBFABRIC_VERSION}.tar.bz2 \\\n+ && cd /tmp/ && tar xf /tmp/libfabric-${LIBFABRIC_VERSION}.tar.bz2 \\\n+ && cd /tmp/libfabric-${LIBFABRIC_VERSION} \\\n+ && ./configure --prefix=$LIBFABRIC_ROOT --enable-psm3-verbs --enable-verbs=yes --with-synapseai=/usr \\\n+ && make -j && make install && cd / && rm -rf /tmp/libfabric-${LIBFABRIC_VERSION}.tar.bz2 /tmp/libfabric-${LIBFABRIC_VERSION}\n+\n+# install hccl wrapper for ofi\n+RUN wget -nv -O /tmp/main.zip https://github.com/HabanaAI/hccl_ofi_wrapper/archive/refs/heads/main.zip \\", - "comment_created_at": "2024-08-21T12:51:02+00:00", - "comment_author": "tkatila", - "comment_body": "roger", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "594100126", - "pr_number": 5693, - "pr_file": "components/crud-web-apps/tensorboards/Dockerfile", - "created_at": "2021-03-15T07:27:00+00:00", - "commented_code": "COPY --from=frontend /src/dist/ /src/app/static/\r\n\r\nENTRYPOINT make run\r\nENTRYPOINT gunicorn -w 3 --bind 0.0.0.0:5000 --access-logfile - entrypoint:app", - "repo_full_name": "kubeflow/kubeflow", - "discussion_comments": [ - { - "comment_id": "594100126", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 5693, - "pr_file": "components/crud-web-apps/tensorboards/Dockerfile", - "discussion_id": "594100126", - "commented_code": "@@ -48,4 +48,4 @@ COPY ./tensorboards/backend/Makefile .\n \r\n COPY --from=frontend /src/dist/ /src/app/static/\r\n \r\n-ENTRYPOINT make run\r\n+ENTRYPOINT gunicorn -w 3 --bind 0.0.0.0:5000 --access-logfile - entrypoint:app\r", - "comment_created_at": "2021-03-15T07:27:00+00:00", - "comment_author": "nrchakradhar", - "comment_body": "Will this have any incompatibilities for IPv6?", - "pr_file_module": null - }, - { - "comment_id": "594201616", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 5693, - "pr_file": "components/crud-web-apps/tensorboards/Dockerfile", - "discussion_id": "594100126", - "commented_code": "@@ -48,4 +48,4 @@ COPY ./tensorboards/backend/Makefile .\n \r\n COPY --from=frontend /src/dist/ /src/app/static/\r\n \r\n-ENTRYPOINT make run\r\n+ENTRYPOINT gunicorn -w 3 --bind 0.0.0.0:5000 --access-logfile - entrypoint:app\r", - "comment_created_at": "2021-03-15T10:07:46+00:00", - "comment_author": "davidspek", - "comment_body": "@nrchakradhar Good point. These same entrypoint is being used by the Jupyter-Web-App. Do you have suggestions how to make this IPv6 compatible?", - "pr_file_module": null - }, - { - "comment_id": "594277714", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 5693, - "pr_file": "components/crud-web-apps/tensorboards/Dockerfile", - "discussion_id": "594100126", - "commented_code": "@@ -48,4 +48,4 @@ COPY ./tensorboards/backend/Makefile .\n \r\n COPY --from=frontend /src/dist/ /src/app/static/\r\n \r\n-ENTRYPOINT make run\r\n+ENTRYPOINT gunicorn -w 3 --bind 0.0.0.0:5000 --access-logfile - entrypoint:app\r", - "comment_created_at": "2021-03-15T12:04:49+00:00", - "comment_author": "nrchakradhar", - "comment_body": "I do not know anything about gunicorn, but from its documentation, the following may work:\r\nhttps://docs.gunicorn.org/en/stable/settings.html\r\nMultiple addresses can be bound. ex.:\r\n\r\n$ gunicorn -b 127.0.0.1:8000 -b [::1]:8000 test:app\r\nwill bind the test:app application on localhost both on ipv6 and ipv4 interfaces.\r\n\r\nI found an interesting observation here(its quite old though)\r\nhttps://github.com/benoitc/gunicorn/issues/1138#issuecomment-158358666\r\n\r\nCan we make this configurable from manifests so that IPv6 support is provided. There are few issues created already for IPv6 support: #5615 #5605 ", - "pr_file_module": null - }, - { - "comment_id": "595368484", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 5693, - "pr_file": "components/crud-web-apps/tensorboards/Dockerfile", - "discussion_id": "594100126", - "commented_code": "@@ -48,4 +48,4 @@ COPY ./tensorboards/backend/Makefile .\n \r\n COPY --from=frontend /src/dist/ /src/app/static/\r\n \r\n-ENTRYPOINT make run\r\n+ENTRYPOINT gunicorn -w 3 --bind 0.0.0.0:5000 --access-logfile - entrypoint:app\r", - "comment_created_at": "2021-03-16T17:01:39+00:00", - "comment_author": "kimwnasptd", - "comment_body": "@nrchakradhar thank you for your comments. I'm planning on tackling the IPv6 support for all the web apps after 1.3.\r\n\r\nThe first step is to bind the IPv6 equivalent addresses as well, as you've pointed out in your links. But I want to do a good deep dive on this and test some edge cases for which I don't have the cycles right now. ", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/kubeflow-standardize-style-scripts.json b/_reviewers/kubeflow-standardize-style-scripts.json new file mode 100644 index 0000000..bc3efda --- /dev/null +++ b/_reviewers/kubeflow-standardize-style-scripts.json @@ -0,0 +1,82 @@ +[ + { + "discussion_id": "1039528459", + "pr_number": 6786, + "pr_file": "components/centraldashboard-angular/frontend/package.json", + "created_at": "2022-12-05T12:27:32+00:00", + "commented_code": "\"start\": \"ng serve --proxy-config proxy.conf.json\",\n \"build\": \"ng build\",\n \"watch\": \"ng build --watch --configuration development\",\n \"test\": \"ng test\"\n \"test\": \"ng test\",\n \"test:prod\": \"ng test --no-watch --browsers ChromeHeadlessNoSandbox\",\n \"tslint\": \"tslint -c tslint.json -p tsconfig.json\",", + "repo_full_name": "kubeflow/kubeflow", + "discussion_comments": [ + { + "comment_id": "1039528459", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 6786, + "pr_file": "components/centraldashboard-angular/frontend/package.json", + "discussion_id": "1039528459", + "commented_code": "@@ -6,7 +6,12 @@\n \"start\": \"ng serve --proxy-config proxy.conf.json\",\n \"build\": \"ng build\",\n \"watch\": \"ng build --watch --configuration development\",\n- \"test\": \"ng test\"\n+ \"test\": \"ng test\",\n+ \"test:prod\": \"ng test --no-watch --browsers ChromeHeadlessNoSandbox\",\n+ \"tslint\": \"tslint -c tslint.json -p tsconfig.json\",", + "comment_created_at": "2022-12-05T12:27:32+00:00", + "comment_author": "kimwnasptd", + "comment_body": "Why do we introduce rules for `tslint`, which is getting deprecated?\r\n\r\nLet's instead have the following 2 rules:\r\n```json\r\n \"lint-check\": \"ng lint\",\r\n \"lint\": \"ng lint --fix\",\r\n```\r\nsimilarly to KWA https://github.com/kubeflow/katib/blob/master/pkg/new-ui/v1beta1/frontend/package.json#L12-L13\r\n\r\nWe might also need to make some small changes to the `angular.json`. Take a look at the previous effort for this in https://github.com/kubeflow/kubeflow/pull/6464", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1049832416", + "pr_number": 6850, + "pr_file": "components/crud-web-apps/tensorboards/frontend/package.json", + "created_at": "2022-12-15T16:02:48+00:00", + "commented_code": "\"test\": \"ng test\",\n \"test:prod\": \"ng test --browsers=ChromeHeadless --watch=false\",\n \"lint-check\": \"ng lint\",\n \"lint\": \"ng lint --fix\"\n \"lint\": \"ng lint --fix\",\n \"format:check\": \"prettier --check 'src/**/*.{js,ts,html,scss,css}' || node scripts/check-format-error.js\",", + "repo_full_name": "kubeflow/kubeflow", + "discussion_comments": [ + { + "comment_id": "1049832416", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 6850, + "pr_file": "components/crud-web-apps/tensorboards/frontend/package.json", + "discussion_id": "1049832416", + "commented_code": "@@ -12,7 +12,9 @@\n \"test\": \"ng test\",\n \"test:prod\": \"ng test --browsers=ChromeHeadless --watch=false\",\n \"lint-check\": \"ng lint\",\n- \"lint\": \"ng lint --fix\"\n+ \"lint\": \"ng lint --fix\",\n+ \"format:check\": \"prettier --check 'src/**/*.{js,ts,html,scss,css}' || node scripts/check-format-error.js\",", + "comment_created_at": "2022-12-15T16:02:48+00:00", + "comment_author": "kimwnasptd", + "comment_body": "I see that we don't have this script in TWA, like we do in Katib\r\nhttps://github.com/kubeflow/katib/tree/master/pkg/new-ui/v1beta1/frontend/scripts\r\n\r\nYet the action didn't fail. Why is that?", + "pr_file_module": null + }, + { + "comment_id": "1049839951", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 6850, + "pr_file": "components/crud-web-apps/tensorboards/frontend/package.json", + "discussion_id": "1049832416", + "commented_code": "@@ -12,7 +12,9 @@\n \"test\": \"ng test\",\n \"test:prod\": \"ng test --browsers=ChromeHeadless --watch=false\",\n \"lint-check\": \"ng lint\",\n- \"lint\": \"ng lint --fix\"\n+ \"lint\": \"ng lint --fix\",\n+ \"format:check\": \"prettier --check 'src/**/*.{js,ts,html,scss,css}' || node scripts/check-format-error.js\",", + "comment_created_at": "2022-12-15T16:09:09+00:00", + "comment_author": "elenzio9", + "comment_body": "Oh sorry I missed that. Should I add this script in TWA as well?", + "pr_file_module": null + }, + { + "comment_id": "1049918468", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 6850, + "pr_file": "components/crud-web-apps/tensorboards/frontend/package.json", + "discussion_id": "1049832416", + "commented_code": "@@ -12,7 +12,9 @@\n \"test\": \"ng test\",\n \"test:prod\": \"ng test --browsers=ChromeHeadless --watch=false\",\n \"lint-check\": \"ng lint\",\n- \"lint\": \"ng lint --fix\"\n+ \"lint\": \"ng lint --fix\",\n+ \"format:check\": \"prettier --check 'src/**/*.{js,ts,html,scss,css}' || node scripts/check-format-error.js\",", + "comment_created_at": "2022-12-15T17:02:25+00:00", + "comment_author": "kimwnasptd", + "comment_body": "OK I see what happened. `||` means that the second command (node script in this case) will NOT be run if the first part was successful. But the node script will run if the prettier check fails.\r\n\r\nThat script is just there to instruct users to run the formatting. Let's include it in this PR", + "pr_file_module": null + }, + { + "comment_id": "1049930407", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 6850, + "pr_file": "components/crud-web-apps/tensorboards/frontend/package.json", + "discussion_id": "1049832416", + "commented_code": "@@ -12,7 +12,9 @@\n \"test\": \"ng test\",\n \"test:prod\": \"ng test --browsers=ChromeHeadless --watch=false\",\n \"lint-check\": \"ng lint\",\n- \"lint\": \"ng lint --fix\"\n+ \"lint\": \"ng lint --fix\",\n+ \"format:check\": \"prettier --check 'src/**/*.{js,ts,html,scss,css}' || node scripts/check-format-error.js\",", + "comment_created_at": "2022-12-15T17:11:24+00:00", + "comment_author": "elenzio9", + "comment_body": "ACK! I just added it!", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/kubeflow-standardize-style-scripts.md b/_reviewers/kubeflow-standardize-style-scripts.md index c785ac5..205a9eb 100644 --- a/_reviewers/kubeflow-standardize-style-scripts.md +++ b/_reviewers/kubeflow-standardize-style-scripts.md @@ -31,87 +31,3 @@ This approach ensures: 4. Alignment with established patterns in other repositories like Katib When adding these scripts, make appropriate configurations in angular.json as needed to support them. - - -[ - { - "discussion_id": "1039528459", - "pr_number": 6786, - "pr_file": "components/centraldashboard-angular/frontend/package.json", - "created_at": "2022-12-05T12:27:32+00:00", - "commented_code": "\"start\": \"ng serve --proxy-config proxy.conf.json\",\n \"build\": \"ng build\",\n \"watch\": \"ng build --watch --configuration development\",\n \"test\": \"ng test\"\n \"test\": \"ng test\",\n \"test:prod\": \"ng test --no-watch --browsers ChromeHeadlessNoSandbox\",\n \"tslint\": \"tslint -c tslint.json -p tsconfig.json\",", - "repo_full_name": "kubeflow/kubeflow", - "discussion_comments": [ - { - "comment_id": "1039528459", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 6786, - "pr_file": "components/centraldashboard-angular/frontend/package.json", - "discussion_id": "1039528459", - "commented_code": "@@ -6,7 +6,12 @@\n \"start\": \"ng serve --proxy-config proxy.conf.json\",\n \"build\": \"ng build\",\n \"watch\": \"ng build --watch --configuration development\",\n- \"test\": \"ng test\"\n+ \"test\": \"ng test\",\n+ \"test:prod\": \"ng test --no-watch --browsers ChromeHeadlessNoSandbox\",\n+ \"tslint\": \"tslint -c tslint.json -p tsconfig.json\",", - "comment_created_at": "2022-12-05T12:27:32+00:00", - "comment_author": "kimwnasptd", - "comment_body": "Why do we introduce rules for `tslint`, which is getting deprecated?\r\n\r\nLet's instead have the following 2 rules:\r\n```json\r\n \"lint-check\": \"ng lint\",\r\n \"lint\": \"ng lint --fix\",\r\n```\r\nsimilarly to KWA https://github.com/kubeflow/katib/blob/master/pkg/new-ui/v1beta1/frontend/package.json#L12-L13\r\n\r\nWe might also need to make some small changes to the `angular.json`. Take a look at the previous effort for this in https://github.com/kubeflow/kubeflow/pull/6464", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1049832416", - "pr_number": 6850, - "pr_file": "components/crud-web-apps/tensorboards/frontend/package.json", - "created_at": "2022-12-15T16:02:48+00:00", - "commented_code": "\"test\": \"ng test\",\n \"test:prod\": \"ng test --browsers=ChromeHeadless --watch=false\",\n \"lint-check\": \"ng lint\",\n \"lint\": \"ng lint --fix\"\n \"lint\": \"ng lint --fix\",\n \"format:check\": \"prettier --check 'src/**/*.{js,ts,html,scss,css}' || node scripts/check-format-error.js\",", - "repo_full_name": "kubeflow/kubeflow", - "discussion_comments": [ - { - "comment_id": "1049832416", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 6850, - "pr_file": "components/crud-web-apps/tensorboards/frontend/package.json", - "discussion_id": "1049832416", - "commented_code": "@@ -12,7 +12,9 @@\n \"test\": \"ng test\",\n \"test:prod\": \"ng test --browsers=ChromeHeadless --watch=false\",\n \"lint-check\": \"ng lint\",\n- \"lint\": \"ng lint --fix\"\n+ \"lint\": \"ng lint --fix\",\n+ \"format:check\": \"prettier --check 'src/**/*.{js,ts,html,scss,css}' || node scripts/check-format-error.js\",", - "comment_created_at": "2022-12-15T16:02:48+00:00", - "comment_author": "kimwnasptd", - "comment_body": "I see that we don't have this script in TWA, like we do in Katib\r\nhttps://github.com/kubeflow/katib/tree/master/pkg/new-ui/v1beta1/frontend/scripts\r\n\r\nYet the action didn't fail. Why is that?", - "pr_file_module": null - }, - { - "comment_id": "1049839951", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 6850, - "pr_file": "components/crud-web-apps/tensorboards/frontend/package.json", - "discussion_id": "1049832416", - "commented_code": "@@ -12,7 +12,9 @@\n \"test\": \"ng test\",\n \"test:prod\": \"ng test --browsers=ChromeHeadless --watch=false\",\n \"lint-check\": \"ng lint\",\n- \"lint\": \"ng lint --fix\"\n+ \"lint\": \"ng lint --fix\",\n+ \"format:check\": \"prettier --check 'src/**/*.{js,ts,html,scss,css}' || node scripts/check-format-error.js\",", - "comment_created_at": "2022-12-15T16:09:09+00:00", - "comment_author": "elenzio9", - "comment_body": "Oh sorry I missed that. Should I add this script in TWA as well?", - "pr_file_module": null - }, - { - "comment_id": "1049918468", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 6850, - "pr_file": "components/crud-web-apps/tensorboards/frontend/package.json", - "discussion_id": "1049832416", - "commented_code": "@@ -12,7 +12,9 @@\n \"test\": \"ng test\",\n \"test:prod\": \"ng test --browsers=ChromeHeadless --watch=false\",\n \"lint-check\": \"ng lint\",\n- \"lint\": \"ng lint --fix\"\n+ \"lint\": \"ng lint --fix\",\n+ \"format:check\": \"prettier --check 'src/**/*.{js,ts,html,scss,css}' || node scripts/check-format-error.js\",", - "comment_created_at": "2022-12-15T17:02:25+00:00", - "comment_author": "kimwnasptd", - "comment_body": "OK I see what happened. `||` means that the second command (node script in this case) will NOT be run if the first part was successful. But the node script will run if the prettier check fails.\r\n\r\nThat script is just there to instruct users to run the formatting. Let's include it in this PR", - "pr_file_module": null - }, - { - "comment_id": "1049930407", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 6850, - "pr_file": "components/crud-web-apps/tensorboards/frontend/package.json", - "discussion_id": "1049832416", - "commented_code": "@@ -12,7 +12,9 @@\n \"test\": \"ng test\",\n \"test:prod\": \"ng test --browsers=ChromeHeadless --watch=false\",\n \"lint-check\": \"ng lint\",\n- \"lint\": \"ng lint --fix\"\n+ \"lint\": \"ng lint --fix\",\n+ \"format:check\": \"prettier --check 'src/**/*.{js,ts,html,scss,css}' || node scripts/check-format-error.js\",", - "comment_created_at": "2022-12-15T17:11:24+00:00", - "comment_author": "elenzio9", - "comment_body": "ACK! I just added it!", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/kubeflow-structure-for-navigation.json b/_reviewers/kubeflow-structure-for-navigation.json new file mode 100644 index 0000000..364044c --- /dev/null +++ b/_reviewers/kubeflow-structure-for-navigation.json @@ -0,0 +1,106 @@ +[ + { + "discussion_id": "1746922261", + "pr_number": 7642, + "pr_file": ".github/ISSUE_TEMPLATE/config.yml", + "created_at": "2024-09-06T10:46:48+00:00", + "commented_code": "contact_links:\n - name: Kubeflow Documentation\n url: https://www.kubeflow.org/\n about: The official Kubeflow documentation\n - name: Issues related to KServe\n url: https://github.com/kserve/kserve/issues/new/choose\n about: Create issue in kserve/kserve repository\n - name: Issues related to Kubeflow Katib\n url: https://github.com/kubeflow/katib/issues/new/choose\n about: Create issue in kubeflow/katib repository\n - name: Issues related to Kubeflow Model Registry\n url: https://github.com/kubeflow/model-registry/issues/new/choose\n about: Create issue in kubeflow/model-registry repository\n - name: Issues related to Kubeflow MPI Operator\n url: https://github.com/kubeflow/mpi-operator/issues/new/choose\n about: Create issue in kubeflow/mpi-operator repository\n - name: Issues related to Kubeflow Notebooks\n url: https://github.com/kubeflow/notebooks/issues/new/choose\n about: Create issue in kubeflow/notebooks repository\n - name: Issues related to Kubeflow Pipelines\n url: https://github.com/kubeflow/pipelines/issues/new/choose\n about: Create issue in kubeflow/pipelines repository\n - name: Issues related to Kubeflow Spark Operator\n url: https://github.com/kubeflow/spark-operator/issues/new/choose\n about: Create issue in kubeflow/spark-operator repository\n - name: Issues related to Kubeflow Training Operator\n url: https://github.com/kubeflow/training-operator/issues/new/choose\n about: Create issue in kubeflow/training-operator repository\n - name: Issues related to Kubeflow Platform Manifests\n url: https://github.com/kubeflow/manifests/issues/new/choose\n about: Create issue in kubeflow/manifests repository\n - name: Issues related to other Kubeflow Platform components\n url: https://github.com/kubeflow/dashboard/issues/new/choose\n about: Create issue in kubeflow/dashboard repository\n - name: Issues related to Kubeflow community or Kubeflow ecosystem\n url: https://github.com/kubeflow/community/issues/new/choose\n about: Create issue in kubeflow/community repository\n about: Official Kubeflow Documentation\n\n - name: Kubeflow Slack\n url: https://www.kubeflow.org/docs/about/community/#kubeflow-slack-channels\n about: Join the Kubeflow Slack\n\n - name: Issues - Community & Governance\n url: https://github.com/kubeflow/community/issues\n about: Please use the `kubeflow/community` repository\n\n - name: Issues - KServe\n url: https://github.com/kserve/kserve/issues\n about: Please use the `kserve/kserve` repository\n\n - name: Issues - Kubeflow Katib\n url: https://github.com/kubeflow/katib/issues\n about: Please use the `kubeflow/katib` repository\n\n - name: Issues - Kubeflow Model Registry\n url: https://github.com/kubeflow/model-registry/issues\n about: Please use the `kubeflow/model-registry` repository\n\n - name: Issues - Kubeflow MPI Operator\n url: https://github.com/kubeflow/mpi-operator/issues\n about: Please use the `kubeflow/mpi-operator` repository\n\n - name: Issues - Kubeflow Notebooks\n url: https://github.com/kubeflow/notebooks/issues\n about: Please use the `kubeflow/notebooks` repository\n\n - name: Issues - Kubeflow Pipelines\n url: https://github.com/kubeflow/pipelines/issues\n about: Please use the `kubeflow/pipelines` repository\n\n - name: Issues - Kubeflow Spark Operator\n url: https://github.com/kubeflow/spark-operator/issues\n about: Please use the `kubeflow/spark-operator` repository\n\n - name: Issues - Kubeflow Training Operator\n url: https://github.com/kubeflow/training-operator/issues\n about: Please use the `kubeflow/training-operator` repository\n\n - name: Issues - Kubeflow Platform - Central Dashboard\n url: https://github.com/kubeflow/dashboard/issues\n about: Please use the `kubeflow/dashboard` repository\n\n - name: Issues - Kubeflow Platform - Profile Controller\n url: https://github.com/kubeflow/dashboard/issues\n about: Please use the `kubeflow/dashboard` repository", + "repo_full_name": "kubeflow/kubeflow", + "discussion_comments": [ + { + "comment_id": "1746922261", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 7642, + "pr_file": ".github/ISSUE_TEMPLATE/config.yml", + "discussion_id": "1746922261", + "commented_code": "@@ -2,37 +2,56 @@ blank_issues_enabled: false\n contact_links:\n - name: Kubeflow Documentation\n url: https://www.kubeflow.org/\n- about: The official Kubeflow documentation\n- - name: Issues related to KServe\n- url: https://github.com/kserve/kserve/issues/new/choose\n- about: Create issue in kserve/kserve repository\n- - name: Issues related to Kubeflow Katib\n- url: https://github.com/kubeflow/katib/issues/new/choose\n- about: Create issue in kubeflow/katib repository\n- - name: Issues related to Kubeflow Model Registry\n- url: https://github.com/kubeflow/model-registry/issues/new/choose\n- about: Create issue in kubeflow/model-registry repository\n- - name: Issues related to Kubeflow MPI Operator\n- url: https://github.com/kubeflow/mpi-operator/issues/new/choose\n- about: Create issue in kubeflow/mpi-operator repository\n- - name: Issues related to Kubeflow Notebooks\n- url: https://github.com/kubeflow/notebooks/issues/new/choose\n- about: Create issue in kubeflow/notebooks repository\n- - name: Issues related to Kubeflow Pipelines\n- url: https://github.com/kubeflow/pipelines/issues/new/choose\n- about: Create issue in kubeflow/pipelines repository\n- - name: Issues related to Kubeflow Spark Operator\n- url: https://github.com/kubeflow/spark-operator/issues/new/choose\n- about: Create issue in kubeflow/spark-operator repository\n- - name: Issues related to Kubeflow Training Operator\n- url: https://github.com/kubeflow/training-operator/issues/new/choose\n- about: Create issue in kubeflow/training-operator repository\n- - name: Issues related to Kubeflow Platform Manifests\n- url: https://github.com/kubeflow/manifests/issues/new/choose\n- about: Create issue in kubeflow/manifests repository\n- - name: Issues related to other Kubeflow Platform components\n- url: https://github.com/kubeflow/dashboard/issues/new/choose\n- about: Create issue in kubeflow/dashboard repository\n- - name: Issues related to Kubeflow community or Kubeflow ecosystem\n- url: https://github.com/kubeflow/community/issues/new/choose\n- about: Create issue in kubeflow/community repository\n+ about: Official Kubeflow Documentation\n+\n+ - name: Kubeflow Slack\n+ url: https://www.kubeflow.org/docs/about/community/#kubeflow-slack-channels\n+ about: Join the Kubeflow Slack\n+\n+ - name: Issues - Community & Governance\n+ url: https://github.com/kubeflow/community/issues\n+ about: Please use the `kubeflow/community` repository\n+\n+ - name: Issues - KServe\n+ url: https://github.com/kserve/kserve/issues\n+ about: Please use the `kserve/kserve` repository\n+\n+ - name: Issues - Kubeflow Katib\n+ url: https://github.com/kubeflow/katib/issues\n+ about: Please use the `kubeflow/katib` repository\n+\n+ - name: Issues - Kubeflow Model Registry\n+ url: https://github.com/kubeflow/model-registry/issues\n+ about: Please use the `kubeflow/model-registry` repository\n+\n+ - name: Issues - Kubeflow MPI Operator\n+ url: https://github.com/kubeflow/mpi-operator/issues\n+ about: Please use the `kubeflow/mpi-operator` repository\n+\n+ - name: Issues - Kubeflow Notebooks\n+ url: https://github.com/kubeflow/notebooks/issues\n+ about: Please use the `kubeflow/notebooks` repository\n+\n+ - name: Issues - Kubeflow Pipelines\n+ url: https://github.com/kubeflow/pipelines/issues\n+ about: Please use the `kubeflow/pipelines` repository\n+\n+ - name: Issues - Kubeflow Spark Operator\n+ url: https://github.com/kubeflow/spark-operator/issues\n+ about: Please use the `kubeflow/spark-operator` repository\n+\n+ - name: Issues - Kubeflow Training Operator\n+ url: https://github.com/kubeflow/training-operator/issues\n+ about: Please use the `kubeflow/training-operator` repository\n+\n+ - name: Issues - Kubeflow Platform - Central Dashboard\n+ url: https://github.com/kubeflow/dashboard/issues\n+ about: Please use the `kubeflow/dashboard` repository\n+\n+ - name: Issues - Kubeflow Platform - Profile Controller\n+ url: https://github.com/kubeflow/dashboard/issues\n+ about: Please use the `kubeflow/dashboard` repository", + "comment_created_at": "2024-09-06T10:46:48+00:00", + "comment_author": "andreyvelich", + "comment_body": "Should we combine this to the single entry ?\r\n```\r\n- name: Issues - Kubeflow Platform - Central Dashboard and Profile Controller\r\n```", + "pr_file_module": null + }, + { + "comment_id": "1747524894", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 7642, + "pr_file": ".github/ISSUE_TEMPLATE/config.yml", + "discussion_id": "1746922261", + "commented_code": "@@ -2,37 +2,56 @@ blank_issues_enabled: false\n contact_links:\n - name: Kubeflow Documentation\n url: https://www.kubeflow.org/\n- about: The official Kubeflow documentation\n- - name: Issues related to KServe\n- url: https://github.com/kserve/kserve/issues/new/choose\n- about: Create issue in kserve/kserve repository\n- - name: Issues related to Kubeflow Katib\n- url: https://github.com/kubeflow/katib/issues/new/choose\n- about: Create issue in kubeflow/katib repository\n- - name: Issues related to Kubeflow Model Registry\n- url: https://github.com/kubeflow/model-registry/issues/new/choose\n- about: Create issue in kubeflow/model-registry repository\n- - name: Issues related to Kubeflow MPI Operator\n- url: https://github.com/kubeflow/mpi-operator/issues/new/choose\n- about: Create issue in kubeflow/mpi-operator repository\n- - name: Issues related to Kubeflow Notebooks\n- url: https://github.com/kubeflow/notebooks/issues/new/choose\n- about: Create issue in kubeflow/notebooks repository\n- - name: Issues related to Kubeflow Pipelines\n- url: https://github.com/kubeflow/pipelines/issues/new/choose\n- about: Create issue in kubeflow/pipelines repository\n- - name: Issues related to Kubeflow Spark Operator\n- url: https://github.com/kubeflow/spark-operator/issues/new/choose\n- about: Create issue in kubeflow/spark-operator repository\n- - name: Issues related to Kubeflow Training Operator\n- url: https://github.com/kubeflow/training-operator/issues/new/choose\n- about: Create issue in kubeflow/training-operator repository\n- - name: Issues related to Kubeflow Platform Manifests\n- url: https://github.com/kubeflow/manifests/issues/new/choose\n- about: Create issue in kubeflow/manifests repository\n- - name: Issues related to other Kubeflow Platform components\n- url: https://github.com/kubeflow/dashboard/issues/new/choose\n- about: Create issue in kubeflow/dashboard repository\n- - name: Issues related to Kubeflow community or Kubeflow ecosystem\n- url: https://github.com/kubeflow/community/issues/new/choose\n- about: Create issue in kubeflow/community repository\n+ about: Official Kubeflow Documentation\n+\n+ - name: Kubeflow Slack\n+ url: https://www.kubeflow.org/docs/about/community/#kubeflow-slack-channels\n+ about: Join the Kubeflow Slack\n+\n+ - name: Issues - Community & Governance\n+ url: https://github.com/kubeflow/community/issues\n+ about: Please use the `kubeflow/community` repository\n+\n+ - name: Issues - KServe\n+ url: https://github.com/kserve/kserve/issues\n+ about: Please use the `kserve/kserve` repository\n+\n+ - name: Issues - Kubeflow Katib\n+ url: https://github.com/kubeflow/katib/issues\n+ about: Please use the `kubeflow/katib` repository\n+\n+ - name: Issues - Kubeflow Model Registry\n+ url: https://github.com/kubeflow/model-registry/issues\n+ about: Please use the `kubeflow/model-registry` repository\n+\n+ - name: Issues - Kubeflow MPI Operator\n+ url: https://github.com/kubeflow/mpi-operator/issues\n+ about: Please use the `kubeflow/mpi-operator` repository\n+\n+ - name: Issues - Kubeflow Notebooks\n+ url: https://github.com/kubeflow/notebooks/issues\n+ about: Please use the `kubeflow/notebooks` repository\n+\n+ - name: Issues - Kubeflow Pipelines\n+ url: https://github.com/kubeflow/pipelines/issues\n+ about: Please use the `kubeflow/pipelines` repository\n+\n+ - name: Issues - Kubeflow Spark Operator\n+ url: https://github.com/kubeflow/spark-operator/issues\n+ about: Please use the `kubeflow/spark-operator` repository\n+\n+ - name: Issues - Kubeflow Training Operator\n+ url: https://github.com/kubeflow/training-operator/issues\n+ about: Please use the `kubeflow/training-operator` repository\n+\n+ - name: Issues - Kubeflow Platform - Central Dashboard\n+ url: https://github.com/kubeflow/dashboard/issues\n+ about: Please use the `kubeflow/dashboard` repository\n+\n+ - name: Issues - Kubeflow Platform - Profile Controller\n+ url: https://github.com/kubeflow/dashboard/issues\n+ about: Please use the `kubeflow/dashboard` repository", + "comment_created_at": "2024-09-06T18:02:25+00:00", + "comment_author": "thesuperzapper", + "comment_body": "Personally, I think it's easier for users to skim if everything is at the same approximate width, and it's not a huge deal to have two entries (similar to what I have done in the main README's \"Kubeflow Platform\" table).\r\n\r\nThat is, users might only read the first part of the \"Central Dashboard\" and then not see the \"Profile Controller\" part, and not know where to raise the issue.", + "pr_file_module": null + }, + { + "comment_id": "1747585806", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 7642, + "pr_file": ".github/ISSUE_TEMPLATE/config.yml", + "discussion_id": "1746922261", + "commented_code": "@@ -2,37 +2,56 @@ blank_issues_enabled: false\n contact_links:\n - name: Kubeflow Documentation\n url: https://www.kubeflow.org/\n- about: The official Kubeflow documentation\n- - name: Issues related to KServe\n- url: https://github.com/kserve/kserve/issues/new/choose\n- about: Create issue in kserve/kserve repository\n- - name: Issues related to Kubeflow Katib\n- url: https://github.com/kubeflow/katib/issues/new/choose\n- about: Create issue in kubeflow/katib repository\n- - name: Issues related to Kubeflow Model Registry\n- url: https://github.com/kubeflow/model-registry/issues/new/choose\n- about: Create issue in kubeflow/model-registry repository\n- - name: Issues related to Kubeflow MPI Operator\n- url: https://github.com/kubeflow/mpi-operator/issues/new/choose\n- about: Create issue in kubeflow/mpi-operator repository\n- - name: Issues related to Kubeflow Notebooks\n- url: https://github.com/kubeflow/notebooks/issues/new/choose\n- about: Create issue in kubeflow/notebooks repository\n- - name: Issues related to Kubeflow Pipelines\n- url: https://github.com/kubeflow/pipelines/issues/new/choose\n- about: Create issue in kubeflow/pipelines repository\n- - name: Issues related to Kubeflow Spark Operator\n- url: https://github.com/kubeflow/spark-operator/issues/new/choose\n- about: Create issue in kubeflow/spark-operator repository\n- - name: Issues related to Kubeflow Training Operator\n- url: https://github.com/kubeflow/training-operator/issues/new/choose\n- about: Create issue in kubeflow/training-operator repository\n- - name: Issues related to Kubeflow Platform Manifests\n- url: https://github.com/kubeflow/manifests/issues/new/choose\n- about: Create issue in kubeflow/manifests repository\n- - name: Issues related to other Kubeflow Platform components\n- url: https://github.com/kubeflow/dashboard/issues/new/choose\n- about: Create issue in kubeflow/dashboard repository\n- - name: Issues related to Kubeflow community or Kubeflow ecosystem\n- url: https://github.com/kubeflow/community/issues/new/choose\n- about: Create issue in kubeflow/community repository\n+ about: Official Kubeflow Documentation\n+\n+ - name: Kubeflow Slack\n+ url: https://www.kubeflow.org/docs/about/community/#kubeflow-slack-channels\n+ about: Join the Kubeflow Slack\n+\n+ - name: Issues - Community & Governance\n+ url: https://github.com/kubeflow/community/issues\n+ about: Please use the `kubeflow/community` repository\n+\n+ - name: Issues - KServe\n+ url: https://github.com/kserve/kserve/issues\n+ about: Please use the `kserve/kserve` repository\n+\n+ - name: Issues - Kubeflow Katib\n+ url: https://github.com/kubeflow/katib/issues\n+ about: Please use the `kubeflow/katib` repository\n+\n+ - name: Issues - Kubeflow Model Registry\n+ url: https://github.com/kubeflow/model-registry/issues\n+ about: Please use the `kubeflow/model-registry` repository\n+\n+ - name: Issues - Kubeflow MPI Operator\n+ url: https://github.com/kubeflow/mpi-operator/issues\n+ about: Please use the `kubeflow/mpi-operator` repository\n+\n+ - name: Issues - Kubeflow Notebooks\n+ url: https://github.com/kubeflow/notebooks/issues\n+ about: Please use the `kubeflow/notebooks` repository\n+\n+ - name: Issues - Kubeflow Pipelines\n+ url: https://github.com/kubeflow/pipelines/issues\n+ about: Please use the `kubeflow/pipelines` repository\n+\n+ - name: Issues - Kubeflow Spark Operator\n+ url: https://github.com/kubeflow/spark-operator/issues\n+ about: Please use the `kubeflow/spark-operator` repository\n+\n+ - name: Issues - Kubeflow Training Operator\n+ url: https://github.com/kubeflow/training-operator/issues\n+ about: Please use the `kubeflow/training-operator` repository\n+\n+ - name: Issues - Kubeflow Platform - Central Dashboard\n+ url: https://github.com/kubeflow/dashboard/issues\n+ about: Please use the `kubeflow/dashboard` repository\n+\n+ - name: Issues - Kubeflow Platform - Profile Controller\n+ url: https://github.com/kubeflow/dashboard/issues\n+ about: Please use the `kubeflow/dashboard` repository", + "comment_created_at": "2024-09-06T18:55:32+00:00", + "comment_author": "andreyvelich", + "comment_body": "@thesuperzapper How we can explain users if they see issues with KFAM and Admission Webhook, they should use `kubeflow/dashboard` repo ?", + "pr_file_module": null + }, + { + "comment_id": "1747739895", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 7642, + "pr_file": ".github/ISSUE_TEMPLATE/config.yml", + "discussion_id": "1746922261", + "commented_code": "@@ -2,37 +2,56 @@ blank_issues_enabled: false\n contact_links:\n - name: Kubeflow Documentation\n url: https://www.kubeflow.org/\n- about: The official Kubeflow documentation\n- - name: Issues related to KServe\n- url: https://github.com/kserve/kserve/issues/new/choose\n- about: Create issue in kserve/kserve repository\n- - name: Issues related to Kubeflow Katib\n- url: https://github.com/kubeflow/katib/issues/new/choose\n- about: Create issue in kubeflow/katib repository\n- - name: Issues related to Kubeflow Model Registry\n- url: https://github.com/kubeflow/model-registry/issues/new/choose\n- about: Create issue in kubeflow/model-registry repository\n- - name: Issues related to Kubeflow MPI Operator\n- url: https://github.com/kubeflow/mpi-operator/issues/new/choose\n- about: Create issue in kubeflow/mpi-operator repository\n- - name: Issues related to Kubeflow Notebooks\n- url: https://github.com/kubeflow/notebooks/issues/new/choose\n- about: Create issue in kubeflow/notebooks repository\n- - name: Issues related to Kubeflow Pipelines\n- url: https://github.com/kubeflow/pipelines/issues/new/choose\n- about: Create issue in kubeflow/pipelines repository\n- - name: Issues related to Kubeflow Spark Operator\n- url: https://github.com/kubeflow/spark-operator/issues/new/choose\n- about: Create issue in kubeflow/spark-operator repository\n- - name: Issues related to Kubeflow Training Operator\n- url: https://github.com/kubeflow/training-operator/issues/new/choose\n- about: Create issue in kubeflow/training-operator repository\n- - name: Issues related to Kubeflow Platform Manifests\n- url: https://github.com/kubeflow/manifests/issues/new/choose\n- about: Create issue in kubeflow/manifests repository\n- - name: Issues related to other Kubeflow Platform components\n- url: https://github.com/kubeflow/dashboard/issues/new/choose\n- about: Create issue in kubeflow/dashboard repository\n- - name: Issues related to Kubeflow community or Kubeflow ecosystem\n- url: https://github.com/kubeflow/community/issues/new/choose\n- about: Create issue in kubeflow/community repository\n+ about: Official Kubeflow Documentation\n+\n+ - name: Kubeflow Slack\n+ url: https://www.kubeflow.org/docs/about/community/#kubeflow-slack-channels\n+ about: Join the Kubeflow Slack\n+\n+ - name: Issues - Community & Governance\n+ url: https://github.com/kubeflow/community/issues\n+ about: Please use the `kubeflow/community` repository\n+\n+ - name: Issues - KServe\n+ url: https://github.com/kserve/kserve/issues\n+ about: Please use the `kserve/kserve` repository\n+\n+ - name: Issues - Kubeflow Katib\n+ url: https://github.com/kubeflow/katib/issues\n+ about: Please use the `kubeflow/katib` repository\n+\n+ - name: Issues - Kubeflow Model Registry\n+ url: https://github.com/kubeflow/model-registry/issues\n+ about: Please use the `kubeflow/model-registry` repository\n+\n+ - name: Issues - Kubeflow MPI Operator\n+ url: https://github.com/kubeflow/mpi-operator/issues\n+ about: Please use the `kubeflow/mpi-operator` repository\n+\n+ - name: Issues - Kubeflow Notebooks\n+ url: https://github.com/kubeflow/notebooks/issues\n+ about: Please use the `kubeflow/notebooks` repository\n+\n+ - name: Issues - Kubeflow Pipelines\n+ url: https://github.com/kubeflow/pipelines/issues\n+ about: Please use the `kubeflow/pipelines` repository\n+\n+ - name: Issues - Kubeflow Spark Operator\n+ url: https://github.com/kubeflow/spark-operator/issues\n+ about: Please use the `kubeflow/spark-operator` repository\n+\n+ - name: Issues - Kubeflow Training Operator\n+ url: https://github.com/kubeflow/training-operator/issues\n+ about: Please use the `kubeflow/training-operator` repository\n+\n+ - name: Issues - Kubeflow Platform - Central Dashboard\n+ url: https://github.com/kubeflow/dashboard/issues\n+ about: Please use the `kubeflow/dashboard` repository\n+\n+ - name: Issues - Kubeflow Platform - Profile Controller\n+ url: https://github.com/kubeflow/dashboard/issues\n+ about: Please use the `kubeflow/dashboard` repository", + "comment_created_at": "2024-09-06T21:31:53+00:00", + "comment_author": "thesuperzapper", + "comment_body": "I think most people would associate them with the profile controller or dashboard, and those who understand the distinction will already know about the code location (or be smart enough to find the repo).\r\n\r\nI want to avoid including the confusing components here, as its mostly for lost users to find somewhere to get help.", + "pr_file_module": null + }, + { + "comment_id": "1747744318", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 7642, + "pr_file": ".github/ISSUE_TEMPLATE/config.yml", + "discussion_id": "1746922261", + "commented_code": "@@ -2,37 +2,56 @@ blank_issues_enabled: false\n contact_links:\n - name: Kubeflow Documentation\n url: https://www.kubeflow.org/\n- about: The official Kubeflow documentation\n- - name: Issues related to KServe\n- url: https://github.com/kserve/kserve/issues/new/choose\n- about: Create issue in kserve/kserve repository\n- - name: Issues related to Kubeflow Katib\n- url: https://github.com/kubeflow/katib/issues/new/choose\n- about: Create issue in kubeflow/katib repository\n- - name: Issues related to Kubeflow Model Registry\n- url: https://github.com/kubeflow/model-registry/issues/new/choose\n- about: Create issue in kubeflow/model-registry repository\n- - name: Issues related to Kubeflow MPI Operator\n- url: https://github.com/kubeflow/mpi-operator/issues/new/choose\n- about: Create issue in kubeflow/mpi-operator repository\n- - name: Issues related to Kubeflow Notebooks\n- url: https://github.com/kubeflow/notebooks/issues/new/choose\n- about: Create issue in kubeflow/notebooks repository\n- - name: Issues related to Kubeflow Pipelines\n- url: https://github.com/kubeflow/pipelines/issues/new/choose\n- about: Create issue in kubeflow/pipelines repository\n- - name: Issues related to Kubeflow Spark Operator\n- url: https://github.com/kubeflow/spark-operator/issues/new/choose\n- about: Create issue in kubeflow/spark-operator repository\n- - name: Issues related to Kubeflow Training Operator\n- url: https://github.com/kubeflow/training-operator/issues/new/choose\n- about: Create issue in kubeflow/training-operator repository\n- - name: Issues related to Kubeflow Platform Manifests\n- url: https://github.com/kubeflow/manifests/issues/new/choose\n- about: Create issue in kubeflow/manifests repository\n- - name: Issues related to other Kubeflow Platform components\n- url: https://github.com/kubeflow/dashboard/issues/new/choose\n- about: Create issue in kubeflow/dashboard repository\n- - name: Issues related to Kubeflow community or Kubeflow ecosystem\n- url: https://github.com/kubeflow/community/issues/new/choose\n- about: Create issue in kubeflow/community repository\n+ about: Official Kubeflow Documentation\n+\n+ - name: Kubeflow Slack\n+ url: https://www.kubeflow.org/docs/about/community/#kubeflow-slack-channels\n+ about: Join the Kubeflow Slack\n+\n+ - name: Issues - Community & Governance\n+ url: https://github.com/kubeflow/community/issues\n+ about: Please use the `kubeflow/community` repository\n+\n+ - name: Issues - KServe\n+ url: https://github.com/kserve/kserve/issues\n+ about: Please use the `kserve/kserve` repository\n+\n+ - name: Issues - Kubeflow Katib\n+ url: https://github.com/kubeflow/katib/issues\n+ about: Please use the `kubeflow/katib` repository\n+\n+ - name: Issues - Kubeflow Model Registry\n+ url: https://github.com/kubeflow/model-registry/issues\n+ about: Please use the `kubeflow/model-registry` repository\n+\n+ - name: Issues - Kubeflow MPI Operator\n+ url: https://github.com/kubeflow/mpi-operator/issues\n+ about: Please use the `kubeflow/mpi-operator` repository\n+\n+ - name: Issues - Kubeflow Notebooks\n+ url: https://github.com/kubeflow/notebooks/issues\n+ about: Please use the `kubeflow/notebooks` repository\n+\n+ - name: Issues - Kubeflow Pipelines\n+ url: https://github.com/kubeflow/pipelines/issues\n+ about: Please use the `kubeflow/pipelines` repository\n+\n+ - name: Issues - Kubeflow Spark Operator\n+ url: https://github.com/kubeflow/spark-operator/issues\n+ about: Please use the `kubeflow/spark-operator` repository\n+\n+ - name: Issues - Kubeflow Training Operator\n+ url: https://github.com/kubeflow/training-operator/issues\n+ about: Please use the `kubeflow/training-operator` repository\n+\n+ - name: Issues - Kubeflow Platform - Central Dashboard\n+ url: https://github.com/kubeflow/dashboard/issues\n+ about: Please use the `kubeflow/dashboard` repository\n+\n+ - name: Issues - Kubeflow Platform - Profile Controller\n+ url: https://github.com/kubeflow/dashboard/issues\n+ about: Please use the `kubeflow/dashboard` repository", + "comment_created_at": "2024-09-06T21:37:39+00:00", + "comment_author": "andreyvelich", + "comment_body": "Sounds good.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1750425882", + "pr_number": 7642, + "pr_file": ".github/ISSUE_TEMPLATE/config.yml", + "created_at": "2024-09-09T14:56:33+00:00", + "commented_code": null, + "repo_full_name": "kubeflow/kubeflow", + "discussion_comments": [ + { + "comment_id": "1750425882", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 7642, + "pr_file": ".github/ISSUE_TEMPLATE/config.yml", + "discussion_id": "1750425882", + "commented_code": null, + "comment_created_at": "2024-09-09T14:56:33+00:00", + "comment_author": "juliusvonkohout", + "comment_body": "Shall we list them alphabetically?", + "pr_file_module": null + }, + { + "comment_id": "1750714061", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 7642, + "pr_file": ".github/ISSUE_TEMPLATE/config.yml", + "discussion_id": "1750425882", + "commented_code": null, + "comment_created_at": "2024-09-09T18:09:16+00:00", + "comment_author": "thesuperzapper", + "comment_body": "We already do, but in three \"sub groups\" (which are all sorted alphabetically within themselves):\r\n\r\n1. Community links:\r\n - Docs\r\n - Slack\r\n - Community & Governance\r\n2. Component Docs:\r\n - KServe\r\n - Kubeflow Katib\r\n - ...\r\n3. Platform Docs:\r\n - Central Dashboard\r\n - Profile Controller\r\n - Manifests\r\n\r\nThe goal is to help users find the place to raise an issue, so I think grouping them like this will help users find the appropriate place faster.", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/kubeflow-structure-for-navigation.md b/_reviewers/kubeflow-structure-for-navigation.md index 1e350d4..0bed0ef 100644 --- a/_reviewers/kubeflow-structure-for-navigation.md +++ b/_reviewers/kubeflow-structure-for-navigation.md @@ -45,111 +45,3 @@ documentation_links: ``` This organization style makes navigation intuitive and ensures users can quickly find the information they need, rather than getting lost in poorly structured documentation. - - -[ - { - "discussion_id": "1746922261", - "pr_number": 7642, - "pr_file": ".github/ISSUE_TEMPLATE/config.yml", - "created_at": "2024-09-06T10:46:48+00:00", - "commented_code": "contact_links:\n - name: Kubeflow Documentation\n url: https://www.kubeflow.org/\n about: The official Kubeflow documentation\n - name: Issues related to KServe\n url: https://github.com/kserve/kserve/issues/new/choose\n about: Create issue in kserve/kserve repository\n - name: Issues related to Kubeflow Katib\n url: https://github.com/kubeflow/katib/issues/new/choose\n about: Create issue in kubeflow/katib repository\n - name: Issues related to Kubeflow Model Registry\n url: https://github.com/kubeflow/model-registry/issues/new/choose\n about: Create issue in kubeflow/model-registry repository\n - name: Issues related to Kubeflow MPI Operator\n url: https://github.com/kubeflow/mpi-operator/issues/new/choose\n about: Create issue in kubeflow/mpi-operator repository\n - name: Issues related to Kubeflow Notebooks\n url: https://github.com/kubeflow/notebooks/issues/new/choose\n about: Create issue in kubeflow/notebooks repository\n - name: Issues related to Kubeflow Pipelines\n url: https://github.com/kubeflow/pipelines/issues/new/choose\n about: Create issue in kubeflow/pipelines repository\n - name: Issues related to Kubeflow Spark Operator\n url: https://github.com/kubeflow/spark-operator/issues/new/choose\n about: Create issue in kubeflow/spark-operator repository\n - name: Issues related to Kubeflow Training Operator\n url: https://github.com/kubeflow/training-operator/issues/new/choose\n about: Create issue in kubeflow/training-operator repository\n - name: Issues related to Kubeflow Platform Manifests\n url: https://github.com/kubeflow/manifests/issues/new/choose\n about: Create issue in kubeflow/manifests repository\n - name: Issues related to other Kubeflow Platform components\n url: https://github.com/kubeflow/dashboard/issues/new/choose\n about: Create issue in kubeflow/dashboard repository\n - name: Issues related to Kubeflow community or Kubeflow ecosystem\n url: https://github.com/kubeflow/community/issues/new/choose\n about: Create issue in kubeflow/community repository\n about: Official Kubeflow Documentation\n\n - name: Kubeflow Slack\n url: https://www.kubeflow.org/docs/about/community/#kubeflow-slack-channels\n about: Join the Kubeflow Slack\n\n - name: Issues - Community & Governance\n url: https://github.com/kubeflow/community/issues\n about: Please use the `kubeflow/community` repository\n\n - name: Issues - KServe\n url: https://github.com/kserve/kserve/issues\n about: Please use the `kserve/kserve` repository\n\n - name: Issues - Kubeflow Katib\n url: https://github.com/kubeflow/katib/issues\n about: Please use the `kubeflow/katib` repository\n\n - name: Issues - Kubeflow Model Registry\n url: https://github.com/kubeflow/model-registry/issues\n about: Please use the `kubeflow/model-registry` repository\n\n - name: Issues - Kubeflow MPI Operator\n url: https://github.com/kubeflow/mpi-operator/issues\n about: Please use the `kubeflow/mpi-operator` repository\n\n - name: Issues - Kubeflow Notebooks\n url: https://github.com/kubeflow/notebooks/issues\n about: Please use the `kubeflow/notebooks` repository\n\n - name: Issues - Kubeflow Pipelines\n url: https://github.com/kubeflow/pipelines/issues\n about: Please use the `kubeflow/pipelines` repository\n\n - name: Issues - Kubeflow Spark Operator\n url: https://github.com/kubeflow/spark-operator/issues\n about: Please use the `kubeflow/spark-operator` repository\n\n - name: Issues - Kubeflow Training Operator\n url: https://github.com/kubeflow/training-operator/issues\n about: Please use the `kubeflow/training-operator` repository\n\n - name: Issues - Kubeflow Platform - Central Dashboard\n url: https://github.com/kubeflow/dashboard/issues\n about: Please use the `kubeflow/dashboard` repository\n\n - name: Issues - Kubeflow Platform - Profile Controller\n url: https://github.com/kubeflow/dashboard/issues\n about: Please use the `kubeflow/dashboard` repository", - "repo_full_name": "kubeflow/kubeflow", - "discussion_comments": [ - { - "comment_id": "1746922261", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 7642, - "pr_file": ".github/ISSUE_TEMPLATE/config.yml", - "discussion_id": "1746922261", - "commented_code": "@@ -2,37 +2,56 @@ blank_issues_enabled: false\n contact_links:\n - name: Kubeflow Documentation\n url: https://www.kubeflow.org/\n- about: The official Kubeflow documentation\n- - name: Issues related to KServe\n- url: https://github.com/kserve/kserve/issues/new/choose\n- about: Create issue in kserve/kserve repository\n- - name: Issues related to Kubeflow Katib\n- url: https://github.com/kubeflow/katib/issues/new/choose\n- about: Create issue in kubeflow/katib repository\n- - name: Issues related to Kubeflow Model Registry\n- url: https://github.com/kubeflow/model-registry/issues/new/choose\n- about: Create issue in kubeflow/model-registry repository\n- - name: Issues related to Kubeflow MPI Operator\n- url: https://github.com/kubeflow/mpi-operator/issues/new/choose\n- about: Create issue in kubeflow/mpi-operator repository\n- - name: Issues related to Kubeflow Notebooks\n- url: https://github.com/kubeflow/notebooks/issues/new/choose\n- about: Create issue in kubeflow/notebooks repository\n- - name: Issues related to Kubeflow Pipelines\n- url: https://github.com/kubeflow/pipelines/issues/new/choose\n- about: Create issue in kubeflow/pipelines repository\n- - name: Issues related to Kubeflow Spark Operator\n- url: https://github.com/kubeflow/spark-operator/issues/new/choose\n- about: Create issue in kubeflow/spark-operator repository\n- - name: Issues related to Kubeflow Training Operator\n- url: https://github.com/kubeflow/training-operator/issues/new/choose\n- about: Create issue in kubeflow/training-operator repository\n- - name: Issues related to Kubeflow Platform Manifests\n- url: https://github.com/kubeflow/manifests/issues/new/choose\n- about: Create issue in kubeflow/manifests repository\n- - name: Issues related to other Kubeflow Platform components\n- url: https://github.com/kubeflow/dashboard/issues/new/choose\n- about: Create issue in kubeflow/dashboard repository\n- - name: Issues related to Kubeflow community or Kubeflow ecosystem\n- url: https://github.com/kubeflow/community/issues/new/choose\n- about: Create issue in kubeflow/community repository\n+ about: Official Kubeflow Documentation\n+\n+ - name: Kubeflow Slack\n+ url: https://www.kubeflow.org/docs/about/community/#kubeflow-slack-channels\n+ about: Join the Kubeflow Slack\n+\n+ - name: Issues - Community & Governance\n+ url: https://github.com/kubeflow/community/issues\n+ about: Please use the `kubeflow/community` repository\n+\n+ - name: Issues - KServe\n+ url: https://github.com/kserve/kserve/issues\n+ about: Please use the `kserve/kserve` repository\n+\n+ - name: Issues - Kubeflow Katib\n+ url: https://github.com/kubeflow/katib/issues\n+ about: Please use the `kubeflow/katib` repository\n+\n+ - name: Issues - Kubeflow Model Registry\n+ url: https://github.com/kubeflow/model-registry/issues\n+ about: Please use the `kubeflow/model-registry` repository\n+\n+ - name: Issues - Kubeflow MPI Operator\n+ url: https://github.com/kubeflow/mpi-operator/issues\n+ about: Please use the `kubeflow/mpi-operator` repository\n+\n+ - name: Issues - Kubeflow Notebooks\n+ url: https://github.com/kubeflow/notebooks/issues\n+ about: Please use the `kubeflow/notebooks` repository\n+\n+ - name: Issues - Kubeflow Pipelines\n+ url: https://github.com/kubeflow/pipelines/issues\n+ about: Please use the `kubeflow/pipelines` repository\n+\n+ - name: Issues - Kubeflow Spark Operator\n+ url: https://github.com/kubeflow/spark-operator/issues\n+ about: Please use the `kubeflow/spark-operator` repository\n+\n+ - name: Issues - Kubeflow Training Operator\n+ url: https://github.com/kubeflow/training-operator/issues\n+ about: Please use the `kubeflow/training-operator` repository\n+\n+ - name: Issues - Kubeflow Platform - Central Dashboard\n+ url: https://github.com/kubeflow/dashboard/issues\n+ about: Please use the `kubeflow/dashboard` repository\n+\n+ - name: Issues - Kubeflow Platform - Profile Controller\n+ url: https://github.com/kubeflow/dashboard/issues\n+ about: Please use the `kubeflow/dashboard` repository", - "comment_created_at": "2024-09-06T10:46:48+00:00", - "comment_author": "andreyvelich", - "comment_body": "Should we combine this to the single entry ?\r\n```\r\n- name: Issues - Kubeflow Platform - Central Dashboard and Profile Controller\r\n```", - "pr_file_module": null - }, - { - "comment_id": "1747524894", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 7642, - "pr_file": ".github/ISSUE_TEMPLATE/config.yml", - "discussion_id": "1746922261", - "commented_code": "@@ -2,37 +2,56 @@ blank_issues_enabled: false\n contact_links:\n - name: Kubeflow Documentation\n url: https://www.kubeflow.org/\n- about: The official Kubeflow documentation\n- - name: Issues related to KServe\n- url: https://github.com/kserve/kserve/issues/new/choose\n- about: Create issue in kserve/kserve repository\n- - name: Issues related to Kubeflow Katib\n- url: https://github.com/kubeflow/katib/issues/new/choose\n- about: Create issue in kubeflow/katib repository\n- - name: Issues related to Kubeflow Model Registry\n- url: https://github.com/kubeflow/model-registry/issues/new/choose\n- about: Create issue in kubeflow/model-registry repository\n- - name: Issues related to Kubeflow MPI Operator\n- url: https://github.com/kubeflow/mpi-operator/issues/new/choose\n- about: Create issue in kubeflow/mpi-operator repository\n- - name: Issues related to Kubeflow Notebooks\n- url: https://github.com/kubeflow/notebooks/issues/new/choose\n- about: Create issue in kubeflow/notebooks repository\n- - name: Issues related to Kubeflow Pipelines\n- url: https://github.com/kubeflow/pipelines/issues/new/choose\n- about: Create issue in kubeflow/pipelines repository\n- - name: Issues related to Kubeflow Spark Operator\n- url: https://github.com/kubeflow/spark-operator/issues/new/choose\n- about: Create issue in kubeflow/spark-operator repository\n- - name: Issues related to Kubeflow Training Operator\n- url: https://github.com/kubeflow/training-operator/issues/new/choose\n- about: Create issue in kubeflow/training-operator repository\n- - name: Issues related to Kubeflow Platform Manifests\n- url: https://github.com/kubeflow/manifests/issues/new/choose\n- about: Create issue in kubeflow/manifests repository\n- - name: Issues related to other Kubeflow Platform components\n- url: https://github.com/kubeflow/dashboard/issues/new/choose\n- about: Create issue in kubeflow/dashboard repository\n- - name: Issues related to Kubeflow community or Kubeflow ecosystem\n- url: https://github.com/kubeflow/community/issues/new/choose\n- about: Create issue in kubeflow/community repository\n+ about: Official Kubeflow Documentation\n+\n+ - name: Kubeflow Slack\n+ url: https://www.kubeflow.org/docs/about/community/#kubeflow-slack-channels\n+ about: Join the Kubeflow Slack\n+\n+ - name: Issues - Community & Governance\n+ url: https://github.com/kubeflow/community/issues\n+ about: Please use the `kubeflow/community` repository\n+\n+ - name: Issues - KServe\n+ url: https://github.com/kserve/kserve/issues\n+ about: Please use the `kserve/kserve` repository\n+\n+ - name: Issues - Kubeflow Katib\n+ url: https://github.com/kubeflow/katib/issues\n+ about: Please use the `kubeflow/katib` repository\n+\n+ - name: Issues - Kubeflow Model Registry\n+ url: https://github.com/kubeflow/model-registry/issues\n+ about: Please use the `kubeflow/model-registry` repository\n+\n+ - name: Issues - Kubeflow MPI Operator\n+ url: https://github.com/kubeflow/mpi-operator/issues\n+ about: Please use the `kubeflow/mpi-operator` repository\n+\n+ - name: Issues - Kubeflow Notebooks\n+ url: https://github.com/kubeflow/notebooks/issues\n+ about: Please use the `kubeflow/notebooks` repository\n+\n+ - name: Issues - Kubeflow Pipelines\n+ url: https://github.com/kubeflow/pipelines/issues\n+ about: Please use the `kubeflow/pipelines` repository\n+\n+ - name: Issues - Kubeflow Spark Operator\n+ url: https://github.com/kubeflow/spark-operator/issues\n+ about: Please use the `kubeflow/spark-operator` repository\n+\n+ - name: Issues - Kubeflow Training Operator\n+ url: https://github.com/kubeflow/training-operator/issues\n+ about: Please use the `kubeflow/training-operator` repository\n+\n+ - name: Issues - Kubeflow Platform - Central Dashboard\n+ url: https://github.com/kubeflow/dashboard/issues\n+ about: Please use the `kubeflow/dashboard` repository\n+\n+ - name: Issues - Kubeflow Platform - Profile Controller\n+ url: https://github.com/kubeflow/dashboard/issues\n+ about: Please use the `kubeflow/dashboard` repository", - "comment_created_at": "2024-09-06T18:02:25+00:00", - "comment_author": "thesuperzapper", - "comment_body": "Personally, I think it's easier for users to skim if everything is at the same approximate width, and it's not a huge deal to have two entries (similar to what I have done in the main README's \"Kubeflow Platform\" table).\r\n\r\nThat is, users might only read the first part of the \"Central Dashboard\" and then not see the \"Profile Controller\" part, and not know where to raise the issue.", - "pr_file_module": null - }, - { - "comment_id": "1747585806", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 7642, - "pr_file": ".github/ISSUE_TEMPLATE/config.yml", - "discussion_id": "1746922261", - "commented_code": "@@ -2,37 +2,56 @@ blank_issues_enabled: false\n contact_links:\n - name: Kubeflow Documentation\n url: https://www.kubeflow.org/\n- about: The official Kubeflow documentation\n- - name: Issues related to KServe\n- url: https://github.com/kserve/kserve/issues/new/choose\n- about: Create issue in kserve/kserve repository\n- - name: Issues related to Kubeflow Katib\n- url: https://github.com/kubeflow/katib/issues/new/choose\n- about: Create issue in kubeflow/katib repository\n- - name: Issues related to Kubeflow Model Registry\n- url: https://github.com/kubeflow/model-registry/issues/new/choose\n- about: Create issue in kubeflow/model-registry repository\n- - name: Issues related to Kubeflow MPI Operator\n- url: https://github.com/kubeflow/mpi-operator/issues/new/choose\n- about: Create issue in kubeflow/mpi-operator repository\n- - name: Issues related to Kubeflow Notebooks\n- url: https://github.com/kubeflow/notebooks/issues/new/choose\n- about: Create issue in kubeflow/notebooks repository\n- - name: Issues related to Kubeflow Pipelines\n- url: https://github.com/kubeflow/pipelines/issues/new/choose\n- about: Create issue in kubeflow/pipelines repository\n- - name: Issues related to Kubeflow Spark Operator\n- url: https://github.com/kubeflow/spark-operator/issues/new/choose\n- about: Create issue in kubeflow/spark-operator repository\n- - name: Issues related to Kubeflow Training Operator\n- url: https://github.com/kubeflow/training-operator/issues/new/choose\n- about: Create issue in kubeflow/training-operator repository\n- - name: Issues related to Kubeflow Platform Manifests\n- url: https://github.com/kubeflow/manifests/issues/new/choose\n- about: Create issue in kubeflow/manifests repository\n- - name: Issues related to other Kubeflow Platform components\n- url: https://github.com/kubeflow/dashboard/issues/new/choose\n- about: Create issue in kubeflow/dashboard repository\n- - name: Issues related to Kubeflow community or Kubeflow ecosystem\n- url: https://github.com/kubeflow/community/issues/new/choose\n- about: Create issue in kubeflow/community repository\n+ about: Official Kubeflow Documentation\n+\n+ - name: Kubeflow Slack\n+ url: https://www.kubeflow.org/docs/about/community/#kubeflow-slack-channels\n+ about: Join the Kubeflow Slack\n+\n+ - name: Issues - Community & Governance\n+ url: https://github.com/kubeflow/community/issues\n+ about: Please use the `kubeflow/community` repository\n+\n+ - name: Issues - KServe\n+ url: https://github.com/kserve/kserve/issues\n+ about: Please use the `kserve/kserve` repository\n+\n+ - name: Issues - Kubeflow Katib\n+ url: https://github.com/kubeflow/katib/issues\n+ about: Please use the `kubeflow/katib` repository\n+\n+ - name: Issues - Kubeflow Model Registry\n+ url: https://github.com/kubeflow/model-registry/issues\n+ about: Please use the `kubeflow/model-registry` repository\n+\n+ - name: Issues - Kubeflow MPI Operator\n+ url: https://github.com/kubeflow/mpi-operator/issues\n+ about: Please use the `kubeflow/mpi-operator` repository\n+\n+ - name: Issues - Kubeflow Notebooks\n+ url: https://github.com/kubeflow/notebooks/issues\n+ about: Please use the `kubeflow/notebooks` repository\n+\n+ - name: Issues - Kubeflow Pipelines\n+ url: https://github.com/kubeflow/pipelines/issues\n+ about: Please use the `kubeflow/pipelines` repository\n+\n+ - name: Issues - Kubeflow Spark Operator\n+ url: https://github.com/kubeflow/spark-operator/issues\n+ about: Please use the `kubeflow/spark-operator` repository\n+\n+ - name: Issues - Kubeflow Training Operator\n+ url: https://github.com/kubeflow/training-operator/issues\n+ about: Please use the `kubeflow/training-operator` repository\n+\n+ - name: Issues - Kubeflow Platform - Central Dashboard\n+ url: https://github.com/kubeflow/dashboard/issues\n+ about: Please use the `kubeflow/dashboard` repository\n+\n+ - name: Issues - Kubeflow Platform - Profile Controller\n+ url: https://github.com/kubeflow/dashboard/issues\n+ about: Please use the `kubeflow/dashboard` repository", - "comment_created_at": "2024-09-06T18:55:32+00:00", - "comment_author": "andreyvelich", - "comment_body": "@thesuperzapper How we can explain users if they see issues with KFAM and Admission Webhook, they should use `kubeflow/dashboard` repo ?", - "pr_file_module": null - }, - { - "comment_id": "1747739895", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 7642, - "pr_file": ".github/ISSUE_TEMPLATE/config.yml", - "discussion_id": "1746922261", - "commented_code": "@@ -2,37 +2,56 @@ blank_issues_enabled: false\n contact_links:\n - name: Kubeflow Documentation\n url: https://www.kubeflow.org/\n- about: The official Kubeflow documentation\n- - name: Issues related to KServe\n- url: https://github.com/kserve/kserve/issues/new/choose\n- about: Create issue in kserve/kserve repository\n- - name: Issues related to Kubeflow Katib\n- url: https://github.com/kubeflow/katib/issues/new/choose\n- about: Create issue in kubeflow/katib repository\n- - name: Issues related to Kubeflow Model Registry\n- url: https://github.com/kubeflow/model-registry/issues/new/choose\n- about: Create issue in kubeflow/model-registry repository\n- - name: Issues related to Kubeflow MPI Operator\n- url: https://github.com/kubeflow/mpi-operator/issues/new/choose\n- about: Create issue in kubeflow/mpi-operator repository\n- - name: Issues related to Kubeflow Notebooks\n- url: https://github.com/kubeflow/notebooks/issues/new/choose\n- about: Create issue in kubeflow/notebooks repository\n- - name: Issues related to Kubeflow Pipelines\n- url: https://github.com/kubeflow/pipelines/issues/new/choose\n- about: Create issue in kubeflow/pipelines repository\n- - name: Issues related to Kubeflow Spark Operator\n- url: https://github.com/kubeflow/spark-operator/issues/new/choose\n- about: Create issue in kubeflow/spark-operator repository\n- - name: Issues related to Kubeflow Training Operator\n- url: https://github.com/kubeflow/training-operator/issues/new/choose\n- about: Create issue in kubeflow/training-operator repository\n- - name: Issues related to Kubeflow Platform Manifests\n- url: https://github.com/kubeflow/manifests/issues/new/choose\n- about: Create issue in kubeflow/manifests repository\n- - name: Issues related to other Kubeflow Platform components\n- url: https://github.com/kubeflow/dashboard/issues/new/choose\n- about: Create issue in kubeflow/dashboard repository\n- - name: Issues related to Kubeflow community or Kubeflow ecosystem\n- url: https://github.com/kubeflow/community/issues/new/choose\n- about: Create issue in kubeflow/community repository\n+ about: Official Kubeflow Documentation\n+\n+ - name: Kubeflow Slack\n+ url: https://www.kubeflow.org/docs/about/community/#kubeflow-slack-channels\n+ about: Join the Kubeflow Slack\n+\n+ - name: Issues - Community & Governance\n+ url: https://github.com/kubeflow/community/issues\n+ about: Please use the `kubeflow/community` repository\n+\n+ - name: Issues - KServe\n+ url: https://github.com/kserve/kserve/issues\n+ about: Please use the `kserve/kserve` repository\n+\n+ - name: Issues - Kubeflow Katib\n+ url: https://github.com/kubeflow/katib/issues\n+ about: Please use the `kubeflow/katib` repository\n+\n+ - name: Issues - Kubeflow Model Registry\n+ url: https://github.com/kubeflow/model-registry/issues\n+ about: Please use the `kubeflow/model-registry` repository\n+\n+ - name: Issues - Kubeflow MPI Operator\n+ url: https://github.com/kubeflow/mpi-operator/issues\n+ about: Please use the `kubeflow/mpi-operator` repository\n+\n+ - name: Issues - Kubeflow Notebooks\n+ url: https://github.com/kubeflow/notebooks/issues\n+ about: Please use the `kubeflow/notebooks` repository\n+\n+ - name: Issues - Kubeflow Pipelines\n+ url: https://github.com/kubeflow/pipelines/issues\n+ about: Please use the `kubeflow/pipelines` repository\n+\n+ - name: Issues - Kubeflow Spark Operator\n+ url: https://github.com/kubeflow/spark-operator/issues\n+ about: Please use the `kubeflow/spark-operator` repository\n+\n+ - name: Issues - Kubeflow Training Operator\n+ url: https://github.com/kubeflow/training-operator/issues\n+ about: Please use the `kubeflow/training-operator` repository\n+\n+ - name: Issues - Kubeflow Platform - Central Dashboard\n+ url: https://github.com/kubeflow/dashboard/issues\n+ about: Please use the `kubeflow/dashboard` repository\n+\n+ - name: Issues - Kubeflow Platform - Profile Controller\n+ url: https://github.com/kubeflow/dashboard/issues\n+ about: Please use the `kubeflow/dashboard` repository", - "comment_created_at": "2024-09-06T21:31:53+00:00", - "comment_author": "thesuperzapper", - "comment_body": "I think most people would associate them with the profile controller or dashboard, and those who understand the distinction will already know about the code location (or be smart enough to find the repo).\r\n\r\nI want to avoid including the confusing components here, as its mostly for lost users to find somewhere to get help.", - "pr_file_module": null - }, - { - "comment_id": "1747744318", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 7642, - "pr_file": ".github/ISSUE_TEMPLATE/config.yml", - "discussion_id": "1746922261", - "commented_code": "@@ -2,37 +2,56 @@ blank_issues_enabled: false\n contact_links:\n - name: Kubeflow Documentation\n url: https://www.kubeflow.org/\n- about: The official Kubeflow documentation\n- - name: Issues related to KServe\n- url: https://github.com/kserve/kserve/issues/new/choose\n- about: Create issue in kserve/kserve repository\n- - name: Issues related to Kubeflow Katib\n- url: https://github.com/kubeflow/katib/issues/new/choose\n- about: Create issue in kubeflow/katib repository\n- - name: Issues related to Kubeflow Model Registry\n- url: https://github.com/kubeflow/model-registry/issues/new/choose\n- about: Create issue in kubeflow/model-registry repository\n- - name: Issues related to Kubeflow MPI Operator\n- url: https://github.com/kubeflow/mpi-operator/issues/new/choose\n- about: Create issue in kubeflow/mpi-operator repository\n- - name: Issues related to Kubeflow Notebooks\n- url: https://github.com/kubeflow/notebooks/issues/new/choose\n- about: Create issue in kubeflow/notebooks repository\n- - name: Issues related to Kubeflow Pipelines\n- url: https://github.com/kubeflow/pipelines/issues/new/choose\n- about: Create issue in kubeflow/pipelines repository\n- - name: Issues related to Kubeflow Spark Operator\n- url: https://github.com/kubeflow/spark-operator/issues/new/choose\n- about: Create issue in kubeflow/spark-operator repository\n- - name: Issues related to Kubeflow Training Operator\n- url: https://github.com/kubeflow/training-operator/issues/new/choose\n- about: Create issue in kubeflow/training-operator repository\n- - name: Issues related to Kubeflow Platform Manifests\n- url: https://github.com/kubeflow/manifests/issues/new/choose\n- about: Create issue in kubeflow/manifests repository\n- - name: Issues related to other Kubeflow Platform components\n- url: https://github.com/kubeflow/dashboard/issues/new/choose\n- about: Create issue in kubeflow/dashboard repository\n- - name: Issues related to Kubeflow community or Kubeflow ecosystem\n- url: https://github.com/kubeflow/community/issues/new/choose\n- about: Create issue in kubeflow/community repository\n+ about: Official Kubeflow Documentation\n+\n+ - name: Kubeflow Slack\n+ url: https://www.kubeflow.org/docs/about/community/#kubeflow-slack-channels\n+ about: Join the Kubeflow Slack\n+\n+ - name: Issues - Community & Governance\n+ url: https://github.com/kubeflow/community/issues\n+ about: Please use the `kubeflow/community` repository\n+\n+ - name: Issues - KServe\n+ url: https://github.com/kserve/kserve/issues\n+ about: Please use the `kserve/kserve` repository\n+\n+ - name: Issues - Kubeflow Katib\n+ url: https://github.com/kubeflow/katib/issues\n+ about: Please use the `kubeflow/katib` repository\n+\n+ - name: Issues - Kubeflow Model Registry\n+ url: https://github.com/kubeflow/model-registry/issues\n+ about: Please use the `kubeflow/model-registry` repository\n+\n+ - name: Issues - Kubeflow MPI Operator\n+ url: https://github.com/kubeflow/mpi-operator/issues\n+ about: Please use the `kubeflow/mpi-operator` repository\n+\n+ - name: Issues - Kubeflow Notebooks\n+ url: https://github.com/kubeflow/notebooks/issues\n+ about: Please use the `kubeflow/notebooks` repository\n+\n+ - name: Issues - Kubeflow Pipelines\n+ url: https://github.com/kubeflow/pipelines/issues\n+ about: Please use the `kubeflow/pipelines` repository\n+\n+ - name: Issues - Kubeflow Spark Operator\n+ url: https://github.com/kubeflow/spark-operator/issues\n+ about: Please use the `kubeflow/spark-operator` repository\n+\n+ - name: Issues - Kubeflow Training Operator\n+ url: https://github.com/kubeflow/training-operator/issues\n+ about: Please use the `kubeflow/training-operator` repository\n+\n+ - name: Issues - Kubeflow Platform - Central Dashboard\n+ url: https://github.com/kubeflow/dashboard/issues\n+ about: Please use the `kubeflow/dashboard` repository\n+\n+ - name: Issues - Kubeflow Platform - Profile Controller\n+ url: https://github.com/kubeflow/dashboard/issues\n+ about: Please use the `kubeflow/dashboard` repository", - "comment_created_at": "2024-09-06T21:37:39+00:00", - "comment_author": "andreyvelich", - "comment_body": "Sounds good.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1750425882", - "pr_number": 7642, - "pr_file": ".github/ISSUE_TEMPLATE/config.yml", - "created_at": "2024-09-09T14:56:33+00:00", - "commented_code": null, - "repo_full_name": "kubeflow/kubeflow", - "discussion_comments": [ - { - "comment_id": "1750425882", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 7642, - "pr_file": ".github/ISSUE_TEMPLATE/config.yml", - "discussion_id": "1750425882", - "commented_code": null, - "comment_created_at": "2024-09-09T14:56:33+00:00", - "comment_author": "juliusvonkohout", - "comment_body": "Shall we list them alphabetically?", - "pr_file_module": null - }, - { - "comment_id": "1750714061", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 7642, - "pr_file": ".github/ISSUE_TEMPLATE/config.yml", - "discussion_id": "1750425882", - "commented_code": null, - "comment_created_at": "2024-09-09T18:09:16+00:00", - "comment_author": "thesuperzapper", - "comment_body": "We already do, but in three \"sub groups\" (which are all sorted alphabetically within themselves):\r\n\r\n1. Community links:\r\n - Docs\r\n - Slack\r\n - Community & Governance\r\n2. Component Docs:\r\n - KServe\r\n - Kubeflow Katib\r\n - ...\r\n3. Platform Docs:\r\n - Central Dashboard\r\n - Profile Controller\r\n - Manifests\r\n\r\nThe goal is to help users find the place to raise an issue, so I think grouping them like this will help users find the appropriate place faster.", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/kubeflow-structured-documentation-with-examples.json b/_reviewers/kubeflow-structured-documentation-with-examples.json new file mode 100644 index 0000000..0cde482 --- /dev/null +++ b/_reviewers/kubeflow-structured-documentation-with-examples.json @@ -0,0 +1,292 @@ +[ + { + "discussion_id": "1746924443", + "pr_number": 7642, + "pr_file": "CHANGELOG.md", + "created_at": "2024-09-06T10:48:58+00:00", + "commented_code": "# Change Log\n# Changelog\n\nThe [GitHub Releases](https://github.com/kubeflow/kubeflow/releases) page covers newer versions of `kubeflow/kubeflow` components.", + "repo_full_name": "kubeflow/kubeflow", + "discussion_comments": [ + { + "comment_id": "1746924443", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 7642, + "pr_file": "CHANGELOG.md", + "discussion_id": "1746924443", + "commented_code": "@@ -1,4 +1,10 @@\n-# Change Log\n+# Changelog\n+\n+The [GitHub Releases](https://github.com/kubeflow/kubeflow/releases) page covers newer versions of `kubeflow/kubeflow` components.", + "comment_created_at": "2024-09-06T10:48:58+00:00", + "comment_author": "andreyvelich", + "comment_body": "Does it make sense to list which components that release page covers (e.g. Central Dashboard, Notebooks, etc.)?", + "pr_file_module": null + }, + { + "comment_id": "1747526308", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 7642, + "pr_file": "CHANGELOG.md", + "discussion_id": "1746924443", + "commented_code": "@@ -1,4 +1,10 @@\n-# Change Log\n+# Changelog\n+\n+The [GitHub Releases](https://github.com/kubeflow/kubeflow/releases) page covers newer versions of `kubeflow/kubeflow` components.", + "comment_created_at": "2024-09-06T18:03:24+00:00", + "comment_author": "thesuperzapper", + "comment_body": "It covers everything, so that is probably not that useful.\r\n\r\nIt's the overall \"platform version\" so by proxy includes everything.", + "pr_file_module": null + }, + { + "comment_id": "1747580441", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 7642, + "pr_file": "CHANGELOG.md", + "discussion_id": "1746924443", + "commented_code": "@@ -1,4 +1,10 @@\n-# Change Log\n+# Changelog\n+\n+The [GitHub Releases](https://github.com/kubeflow/kubeflow/releases) page covers newer versions of `kubeflow/kubeflow` components.", + "comment_created_at": "2024-09-06T18:51:51+00:00", + "comment_author": "andreyvelich", + "comment_body": "@thesuperzapper Are we planing to remove this changelog from `kubeflow/kubeflow` once we split the repos ?", + "pr_file_module": null + }, + { + "comment_id": "1747736618", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 7642, + "pr_file": "CHANGELOG.md", + "discussion_id": "1746924443", + "commented_code": "@@ -1,4 +1,10 @@\n-# Change Log\n+# Changelog\n+\n+The [GitHub Releases](https://github.com/kubeflow/kubeflow/releases) page covers newer versions of `kubeflow/kubeflow` components.", + "comment_created_at": "2024-09-06T21:27:38+00:00", + "comment_author": "thesuperzapper", + "comment_body": "This changelog is only here for historical reasons, it only includes stuff from 0.5 and earlier.\r\n\r\nWe may as well leave it once we split the repo, and leave the top section which links to the more modern changelogs. ", + "pr_file_module": null + }, + { + "comment_id": "1747743581", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 7642, + "pr_file": "CHANGELOG.md", + "discussion_id": "1746924443", + "commented_code": "@@ -1,4 +1,10 @@\n-# Change Log\n+# Changelog\n+\n+The [GitHub Releases](https://github.com/kubeflow/kubeflow/releases) page covers newer versions of `kubeflow/kubeflow` components.", + "comment_created_at": "2024-09-06T21:36:36+00:00", + "comment_author": "andreyvelich", + "comment_body": ">We may as well leave it once we split the repo, and leave the top section which links to the more modern changelogs.\r\n\r\nYeah, but in that case you have to constantly update this changelog which links to changelogs of every Kubeflow components after Kubeflow release.\r\nDo we want to do it in the future ?", + "pr_file_module": null + }, + { + "comment_id": "1747744882", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 7642, + "pr_file": "CHANGELOG.md", + "discussion_id": "1746924443", + "commented_code": "@@ -1,4 +1,10 @@\n-# Change Log\n+# Changelog\n+\n+The [GitHub Releases](https://github.com/kubeflow/kubeflow/releases) page covers newer versions of `kubeflow/kubeflow` components.", + "comment_created_at": "2024-09-06T21:38:31+00:00", + "comment_author": "thesuperzapper", + "comment_body": "We should 100% have a page under this section of the website which includes those links:\r\n\r\n- https://www.kubeflow.org/docs/releases/\r\n\r\nAnd just link to that page.", + "pr_file_module": null + }, + { + "comment_id": "1747746000", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 7642, + "pr_file": "CHANGELOG.md", + "discussion_id": "1746924443", + "commented_code": "@@ -1,4 +1,10 @@\n-# Change Log\n+# Changelog\n+\n+The [GitHub Releases](https://github.com/kubeflow/kubeflow/releases) page covers newer versions of `kubeflow/kubeflow` components.", + "comment_created_at": "2024-09-06T21:40:01+00:00", + "comment_author": "andreyvelich", + "comment_body": "Should we just use section in README for it to minimize number of files in `kubeflow/kubeflow` repo ?", + "pr_file_module": null + }, + { + "comment_id": "1747753171", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 7642, + "pr_file": "CHANGELOG.md", + "discussion_id": "1746924443", + "commented_code": "@@ -1,4 +1,10 @@\n-# Change Log\n+# Changelog\n+\n+The [GitHub Releases](https://github.com/kubeflow/kubeflow/releases) page covers newer versions of `kubeflow/kubeflow` components.", + "comment_created_at": "2024-09-06T21:50:17+00:00", + "comment_author": "thesuperzapper", + "comment_body": "As I was saying, google loves CHANGELOG.md files, so even a largely empty file which just links to the website is good.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1746925519", + "pr_number": 7642, + "pr_file": "CONTRIBUTING.md", + "created_at": "2024-09-06T10:50:06+00:00", + "commented_code": "# Kubeflow Contributor Guide", + "repo_full_name": "kubeflow/kubeflow", + "discussion_comments": [ + { + "comment_id": "1746925519", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 7642, + "pr_file": "CONTRIBUTING.md", + "discussion_id": "1746925519", + "commented_code": "@@ -1,13 +1,6 @@\n # Kubeflow Contributor Guide", + "comment_created_at": "2024-09-06T10:50:06+00:00", + "comment_author": "andreyvelich", + "comment_body": "@thesuperzapper Would it be easier to remove this file and just add the `## Contributing` section to the README where we link the https://www.kubeflow.org/docs/about/contributing/ page ?", + "pr_file_module": null + }, + { + "comment_id": "1747527963", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 7642, + "pr_file": "CONTRIBUTING.md", + "discussion_id": "1746925519", + "commented_code": "@@ -1,13 +1,6 @@\n # Kubeflow Contributor Guide", + "comment_created_at": "2024-09-06T18:04:35+00:00", + "comment_author": "thesuperzapper", + "comment_body": "We should probably have both, because google loves to index files like `CONTRIBUTING.md`.", + "pr_file_module": null + }, + { + "comment_id": "1747540602", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 7642, + "pr_file": "CONTRIBUTING.md", + "discussion_id": "1746925519", + "commented_code": "@@ -1,13 +1,6 @@\n # Kubeflow Contributor Guide", + "comment_created_at": "2024-09-06T18:15:19+00:00", + "comment_author": "thesuperzapper", + "comment_body": "I added `& Contributing` to the final README heading in https://github.com/kubeflow/kubeflow/pull/7642/commits/5513f05f5a67250aeb152b4154f1bb1ed523b727", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1746931065", + "pr_number": 7642, + "pr_file": "README.md", + "created_at": "2024-09-06T10:55:58+00:00", + "commented_code": "\n# Kubeflow\n\n[![Slack Status](https://img.shields.io/badge/slack-join_chat-white.svg?logo=slack&style=social)](https://www.kubeflow.org/docs/about/community/#kubeflow-slack-channels)\n[![Join Kubeflow Slack](https://img.shields.io/badge/slack-join_chat-white.svg?logo=slack&style=social)](https://www.kubeflow.org/docs/about/community/#kubeflow-slack-channels)\n[![CLOMonitor](https://img.shields.io/endpoint?url=https://clomonitor.io/api/projects/cncf/kubeflow/badge)](https://clomonitor.io/projects/cncf/kubeflow)\n[![OpenSSF Best Practices](https://www.bestpractices.dev/projects/2660/badge)](https://www.bestpractices.dev/projects/2660)\n\nThe Kubeflow makes AI/ML on Kubernetes simple, portable and scalable.\n\n\n---\n## About Kubeflow\n\n## Documentation\n[Kubeflow](https://www.kubeflow.org/) makes artificial intelligence and machine learning simple, portable, and scalable.\nWe are an _ecosystem_ of [Kubernetes](https://kubernetes.io/) based components for each stage in the [AI/ML Lifecycle](https://www.kubeflow.org/docs/started/architecture/#kubeflow-components-in-the-ml-lifecycle) with support for best-in-class open source [tools and frameworks](https://www.kubeflow.org/docs/started/architecture/#kubeflow-ecosystem).\n\nPlease refer to the official docs at [kubeflow.org](http://kubeflow.org).\nPlease refer to the official [documentation](https://www.kubeflow.org/docs/) for more information.\n\n## Kubeflow Components\n\nThe Kubeflow ecosystem is composed of multiple open-source projects for each stage in\n[the ML lifecycle](https://www.kubeflow.org/docs/started/architecture/#kubeflow-components-in-the-ml-lifecycle).\nThe [Kubeflow Ecosystem](https://www.kubeflow.org/docs/started/architecture/#kubeflow-ecosystem) is composed of several projects known as [Kubeflow Components](https://www.kubeflow.org/docs/components/).\n\nLearn more about each project in [the Kubeflow documentation](https://www.kubeflow.org/docs/components/).\nThe following table lists the components and their respective source code repositories:\n\nPlease use the following GitHub repositories to open issues and pull requests for\n[the different Kubeflow components](https://www.kubeflow.org/docs/started/introduction/#what-are-standalone-kubeflow-components):\n| Component | Source Code |\n|-------------------------------------------------------------------------------------|-------------------------------------------------------------------------------|\n| [KServe](https://www.kubeflow.org/docs/external-add-ons/kserve/) | [`kserve/kserve`](https://github.com/kserve/kserve) |\n| [Kubeflow Katib](https://www.kubeflow.org/docs/components/katib/) | [`kubeflow/katib`](https://github.com/kubeflow/katib) |\n| [Kubeflow Model Registry](https://www.kubeflow.org/docs/components/model-registry/) | [`kubeflow/model-registry`](https://github.com/kubeflow/model-registry) |\n| Kubeflow MPI Operator | [`kubeflow/mpi-operator`](https://github.com/kubeflow/mpi-operator) |\n| [Kubeflow Notebooks](https://www.kubeflow.org/docs/components/notebooks/) | [`kubeflow/notebooks`](https://github.com/kubeflow/notebooks) |\n| [Kubeflow Pipelines](https://www.kubeflow.org/docs/components/pipelines/) | [`kubeflow/pipelines`](https://github.com/kubeflow/pipelines) |\n| [Kubeflow Spark Operator](https://www.kubeflow.org/docs/components/spark-operator/) | [`kubeflow/spark-operator`](https://github.com/kubeflow/spark-operator) |\n| [Kubeflow Training Operator](https://www.kubeflow.org/docs/components/training/) | [`kubeflow/training-operator`](https://github.com/kubeflow/training-operator) |\n\n| Component | Source Code |\n| -------------------------- | ----------------------------------------------------------------------------- |\n| KServe | [`kserve/kserve`](https://github.com/kserve/kserve) |\n| Kubeflow Katib | [`kubeflow/katib`](https://github.com/kubeflow/katib) |\n| Kubeflow Model Registry | [`kubeflow/model-registry`](https://github.com/kubeflow/model-registry) |\n| Kubeflow MPI Operator | [`kubeflow/mpi-operator`](https://github.com/kubeflow/mpi-operator) |\n| Kubeflow Notebooks | [`kubeflow/notebooks`](https://github.com/kubeflow/notebooks) |\n| Kubeflow Pipelines | [`kubeflow/pipelines`](https://github.com/kubeflow/pipelines) |\n| Kubeflow Spark Operator | [`kubeflow/spark-operator`](https://github.com/kubeflow/spark-operator) |\n| Kubeflow Training Operator | [`kubeflow/training-operator`](https://github.com/kubeflow/training-operator) |\n## Kubeflow Platform\n\nIf you want to open issue or pull request for the\n[Kubeflow Platform](https://www.kubeflow.org/docs/started/introduction/#what-is-kubeflow-platform)\ncomponents:\nThe [Kubeflow Platform](https://www.kubeflow.org/docs/started/introduction/#what-is-kubeflow-platform) refers to the full suite of Kubeflow Components bundled together with additional integration and management tools.\n\n- Use the [`kubeflow/manifests`](https://github.com/kubeflow/manifests) GitHub repository for\n the Kubeflow Manifests.\n- Use the [`kubeflow/dashboard`](https://github.com/kubeflow/dashboard) GitHub repository for\n the Kubeflow Profile Controller, Central Dashboard, CRUD Web Apps, PVC Viewer, PodDefault, and\n Access Management components.\nThe following table lists the platform components and their respective source code repositories:\n\nIf you have questions about Kubeflow community or Kubeflow ecosystem, please use the\n[`kubeflow/community`](https://github.com/kubeflow/community) GitHub repository.\n| Component | Source Code |\n|---------------------------------------------------------------------------------------|---------------------------------------------------------------|\n| [Central Dashboard](https://www.kubeflow.org/docs/components/central-dash/) | [`kubeflow/dashboard`](https://github.com/kubeflow/dashboard) |\n| [Profile Controller](https://www.kubeflow.org/docs/components/central-dash/profiles/) | [`kubeflow/dashboard`](https://github.com/kubeflow/dashboard) |\n| Kubeflow Manifests | [`kubeflow/manifests`](https://github.com/kubeflow/manifests) |", + "repo_full_name": "kubeflow/kubeflow", + "discussion_comments": [ + { + "comment_id": "1746931065", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 7642, + "pr_file": "README.md", + "discussion_id": "1746931065", + "commented_code": "@@ -1,52 +1,49 @@\n-\n+# Kubeflow\n \n-[![Slack Status](https://img.shields.io/badge/slack-join_chat-white.svg?logo=slack&style=social)](https://www.kubeflow.org/docs/about/community/#kubeflow-slack-channels)\n+[![Join Kubeflow Slack](https://img.shields.io/badge/slack-join_chat-white.svg?logo=slack&style=social)](https://www.kubeflow.org/docs/about/community/#kubeflow-slack-channels)\n [![CLOMonitor](https://img.shields.io/endpoint?url=https://clomonitor.io/api/projects/cncf/kubeflow/badge)](https://clomonitor.io/projects/cncf/kubeflow)\n+[![OpenSSF Best Practices](https://www.bestpractices.dev/projects/2660/badge)](https://www.bestpractices.dev/projects/2660)\n \n-The Kubeflow makes AI/ML on Kubernetes simple, portable and scalable.\n+\n \n----\n+## About Kubeflow\n \n-## Documentation\n+[Kubeflow](https://www.kubeflow.org/) makes artificial intelligence and machine learning simple, portable, and scalable.\n+We are an _ecosystem_ of [Kubernetes](https://kubernetes.io/) based components for each stage in the [AI/ML Lifecycle](https://www.kubeflow.org/docs/started/architecture/#kubeflow-components-in-the-ml-lifecycle) with support for best-in-class open source [tools and frameworks](https://www.kubeflow.org/docs/started/architecture/#kubeflow-ecosystem).\n \n-Please refer to the official docs at [kubeflow.org](http://kubeflow.org).\n+Please refer to the official [documentation](https://www.kubeflow.org/docs/) for more information.\n \n ## Kubeflow Components\n \n-The Kubeflow ecosystem is composed of multiple open-source projects for each stage in\n-[the ML lifecycle](https://www.kubeflow.org/docs/started/architecture/#kubeflow-components-in-the-ml-lifecycle).\n+The [Kubeflow Ecosystem](https://www.kubeflow.org/docs/started/architecture/#kubeflow-ecosystem) is composed of several projects known as [Kubeflow Components](https://www.kubeflow.org/docs/components/).\n \n-Learn more about each project in [the Kubeflow documentation](https://www.kubeflow.org/docs/components/).\n+The following table lists the components and their respective source code repositories:\n \n-Please use the following GitHub repositories to open issues and pull requests for\n-[the different Kubeflow components](https://www.kubeflow.org/docs/started/introduction/#what-are-standalone-kubeflow-components):\n+| Component | Source Code |\n+|-------------------------------------------------------------------------------------|-------------------------------------------------------------------------------|\n+| [KServe](https://www.kubeflow.org/docs/external-add-ons/kserve/) | [`kserve/kserve`](https://github.com/kserve/kserve) |\n+| [Kubeflow Katib](https://www.kubeflow.org/docs/components/katib/) | [`kubeflow/katib`](https://github.com/kubeflow/katib) |\n+| [Kubeflow Model Registry](https://www.kubeflow.org/docs/components/model-registry/) | [`kubeflow/model-registry`](https://github.com/kubeflow/model-registry) |\n+| Kubeflow MPI Operator | [`kubeflow/mpi-operator`](https://github.com/kubeflow/mpi-operator) |\n+| [Kubeflow Notebooks](https://www.kubeflow.org/docs/components/notebooks/) | [`kubeflow/notebooks`](https://github.com/kubeflow/notebooks) |\n+| [Kubeflow Pipelines](https://www.kubeflow.org/docs/components/pipelines/) | [`kubeflow/pipelines`](https://github.com/kubeflow/pipelines) |\n+| [Kubeflow Spark Operator](https://www.kubeflow.org/docs/components/spark-operator/) | [`kubeflow/spark-operator`](https://github.com/kubeflow/spark-operator) |\n+| [Kubeflow Training Operator](https://www.kubeflow.org/docs/components/training/) | [`kubeflow/training-operator`](https://github.com/kubeflow/training-operator) |\n \n-| Component | Source Code |\n-| -------------------------- | ----------------------------------------------------------------------------- |\n-| KServe | [`kserve/kserve`](https://github.com/kserve/kserve) |\n-| Kubeflow Katib | [`kubeflow/katib`](https://github.com/kubeflow/katib) |\n-| Kubeflow Model Registry | [`kubeflow/model-registry`](https://github.com/kubeflow/model-registry) |\n-| Kubeflow MPI Operator | [`kubeflow/mpi-operator`](https://github.com/kubeflow/mpi-operator) |\n-| Kubeflow Notebooks | [`kubeflow/notebooks`](https://github.com/kubeflow/notebooks) |\n-| Kubeflow Pipelines | [`kubeflow/pipelines`](https://github.com/kubeflow/pipelines) |\n-| Kubeflow Spark Operator | [`kubeflow/spark-operator`](https://github.com/kubeflow/spark-operator) |\n-| Kubeflow Training Operator | [`kubeflow/training-operator`](https://github.com/kubeflow/training-operator) |\n+## Kubeflow Platform\n \n-If you want to open issue or pull request for the\n-[Kubeflow Platform](https://www.kubeflow.org/docs/started/introduction/#what-is-kubeflow-platform)\n-components:\n+The [Kubeflow Platform](https://www.kubeflow.org/docs/started/introduction/#what-is-kubeflow-platform) refers to the full suite of Kubeflow Components bundled together with additional integration and management tools.\n \n-- Use the [`kubeflow/manifests`](https://github.com/kubeflow/manifests) GitHub repository for\n- the Kubeflow Manifests.\n-- Use the [`kubeflow/dashboard`](https://github.com/kubeflow/dashboard) GitHub repository for\n- the Kubeflow Profile Controller, Central Dashboard, CRUD Web Apps, PVC Viewer, PodDefault, and\n- Access Management components.\n+The following table lists the platform components and their respective source code repositories:\n \n-If you have questions about Kubeflow community or Kubeflow ecosystem, please use the\n-[`kubeflow/community`](https://github.com/kubeflow/community) GitHub repository.\n+| Component | Source Code |\n+|---------------------------------------------------------------------------------------|---------------------------------------------------------------|\n+| [Central Dashboard](https://www.kubeflow.org/docs/components/central-dash/) | [`kubeflow/dashboard`](https://github.com/kubeflow/dashboard) |\n+| [Profile Controller](https://www.kubeflow.org/docs/components/central-dash/profiles/) | [`kubeflow/dashboard`](https://github.com/kubeflow/dashboard) |\n+| Kubeflow Manifests | [`kubeflow/manifests`](https://github.com/kubeflow/manifests) |", + "comment_created_at": "2024-09-06T10:55:58+00:00", + "comment_author": "andreyvelich", + "comment_body": "Should we link the README for Kubeflow Manifests: https://github.com/kubeflow/manifests?tab=readme-ov-file#overview-of-the-kubeflow-platform ?", + "pr_file_module": null + }, + { + "comment_id": "1747534477", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 7642, + "pr_file": "README.md", + "discussion_id": "1746931065", + "commented_code": "@@ -1,52 +1,49 @@\n-\n+# Kubeflow\n \n-[![Slack Status](https://img.shields.io/badge/slack-join_chat-white.svg?logo=slack&style=social)](https://www.kubeflow.org/docs/about/community/#kubeflow-slack-channels)\n+[![Join Kubeflow Slack](https://img.shields.io/badge/slack-join_chat-white.svg?logo=slack&style=social)](https://www.kubeflow.org/docs/about/community/#kubeflow-slack-channels)\n [![CLOMonitor](https://img.shields.io/endpoint?url=https://clomonitor.io/api/projects/cncf/kubeflow/badge)](https://clomonitor.io/projects/cncf/kubeflow)\n+[![OpenSSF Best Practices](https://www.bestpractices.dev/projects/2660/badge)](https://www.bestpractices.dev/projects/2660)\n \n-The Kubeflow makes AI/ML on Kubernetes simple, portable and scalable.\n+\n \n----\n+## About Kubeflow\n \n-## Documentation\n+[Kubeflow](https://www.kubeflow.org/) makes artificial intelligence and machine learning simple, portable, and scalable.\n+We are an _ecosystem_ of [Kubernetes](https://kubernetes.io/) based components for each stage in the [AI/ML Lifecycle](https://www.kubeflow.org/docs/started/architecture/#kubeflow-components-in-the-ml-lifecycle) with support for best-in-class open source [tools and frameworks](https://www.kubeflow.org/docs/started/architecture/#kubeflow-ecosystem).\n \n-Please refer to the official docs at [kubeflow.org](http://kubeflow.org).\n+Please refer to the official [documentation](https://www.kubeflow.org/docs/) for more information.\n \n ## Kubeflow Components\n \n-The Kubeflow ecosystem is composed of multiple open-source projects for each stage in\n-[the ML lifecycle](https://www.kubeflow.org/docs/started/architecture/#kubeflow-components-in-the-ml-lifecycle).\n+The [Kubeflow Ecosystem](https://www.kubeflow.org/docs/started/architecture/#kubeflow-ecosystem) is composed of several projects known as [Kubeflow Components](https://www.kubeflow.org/docs/components/).\n \n-Learn more about each project in [the Kubeflow documentation](https://www.kubeflow.org/docs/components/).\n+The following table lists the components and their respective source code repositories:\n \n-Please use the following GitHub repositories to open issues and pull requests for\n-[the different Kubeflow components](https://www.kubeflow.org/docs/started/introduction/#what-are-standalone-kubeflow-components):\n+| Component | Source Code |\n+|-------------------------------------------------------------------------------------|-------------------------------------------------------------------------------|\n+| [KServe](https://www.kubeflow.org/docs/external-add-ons/kserve/) | [`kserve/kserve`](https://github.com/kserve/kserve) |\n+| [Kubeflow Katib](https://www.kubeflow.org/docs/components/katib/) | [`kubeflow/katib`](https://github.com/kubeflow/katib) |\n+| [Kubeflow Model Registry](https://www.kubeflow.org/docs/components/model-registry/) | [`kubeflow/model-registry`](https://github.com/kubeflow/model-registry) |\n+| Kubeflow MPI Operator | [`kubeflow/mpi-operator`](https://github.com/kubeflow/mpi-operator) |\n+| [Kubeflow Notebooks](https://www.kubeflow.org/docs/components/notebooks/) | [`kubeflow/notebooks`](https://github.com/kubeflow/notebooks) |\n+| [Kubeflow Pipelines](https://www.kubeflow.org/docs/components/pipelines/) | [`kubeflow/pipelines`](https://github.com/kubeflow/pipelines) |\n+| [Kubeflow Spark Operator](https://www.kubeflow.org/docs/components/spark-operator/) | [`kubeflow/spark-operator`](https://github.com/kubeflow/spark-operator) |\n+| [Kubeflow Training Operator](https://www.kubeflow.org/docs/components/training/) | [`kubeflow/training-operator`](https://github.com/kubeflow/training-operator) |\n \n-| Component | Source Code |\n-| -------------------------- | ----------------------------------------------------------------------------- |\n-| KServe | [`kserve/kserve`](https://github.com/kserve/kserve) |\n-| Kubeflow Katib | [`kubeflow/katib`](https://github.com/kubeflow/katib) |\n-| Kubeflow Model Registry | [`kubeflow/model-registry`](https://github.com/kubeflow/model-registry) |\n-| Kubeflow MPI Operator | [`kubeflow/mpi-operator`](https://github.com/kubeflow/mpi-operator) |\n-| Kubeflow Notebooks | [`kubeflow/notebooks`](https://github.com/kubeflow/notebooks) |\n-| Kubeflow Pipelines | [`kubeflow/pipelines`](https://github.com/kubeflow/pipelines) |\n-| Kubeflow Spark Operator | [`kubeflow/spark-operator`](https://github.com/kubeflow/spark-operator) |\n-| Kubeflow Training Operator | [`kubeflow/training-operator`](https://github.com/kubeflow/training-operator) |\n+## Kubeflow Platform\n \n-If you want to open issue or pull request for the\n-[Kubeflow Platform](https://www.kubeflow.org/docs/started/introduction/#what-is-kubeflow-platform)\n-components:\n+The [Kubeflow Platform](https://www.kubeflow.org/docs/started/introduction/#what-is-kubeflow-platform) refers to the full suite of Kubeflow Components bundled together with additional integration and management tools.\n \n-- Use the [`kubeflow/manifests`](https://github.com/kubeflow/manifests) GitHub repository for\n- the Kubeflow Manifests.\n-- Use the [`kubeflow/dashboard`](https://github.com/kubeflow/dashboard) GitHub repository for\n- the Kubeflow Profile Controller, Central Dashboard, CRUD Web Apps, PVC Viewer, PodDefault, and\n- Access Management components.\n+The following table lists the platform components and their respective source code repositories:\n \n-If you have questions about Kubeflow community or Kubeflow ecosystem, please use the\n-[`kubeflow/community`](https://github.com/kubeflow/community) GitHub repository.\n+| Component | Source Code |\n+|---------------------------------------------------------------------------------------|---------------------------------------------------------------|\n+| [Central Dashboard](https://www.kubeflow.org/docs/components/central-dash/) | [`kubeflow/dashboard`](https://github.com/kubeflow/dashboard) |\n+| [Profile Controller](https://www.kubeflow.org/docs/components/central-dash/profiles/) | [`kubeflow/dashboard`](https://github.com/kubeflow/dashboard) |\n+| Kubeflow Manifests | [`kubeflow/manifests`](https://github.com/kubeflow/manifests) |", + "comment_created_at": "2024-09-06T18:09:59+00:00", + "comment_author": "thesuperzapper", + "comment_body": "It's the same link as the repo, also, we need to be careful about linking people directly to the `master` of manifests, as they might try and install from there, rather than an actual version like `v1.9.0` tag.", + "pr_file_module": null + }, + { + "comment_id": "1747578150", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 7642, + "pr_file": "README.md", + "discussion_id": "1746931065", + "commented_code": "@@ -1,52 +1,49 @@\n-\n+# Kubeflow\n \n-[![Slack Status](https://img.shields.io/badge/slack-join_chat-white.svg?logo=slack&style=social)](https://www.kubeflow.org/docs/about/community/#kubeflow-slack-channels)\n+[![Join Kubeflow Slack](https://img.shields.io/badge/slack-join_chat-white.svg?logo=slack&style=social)](https://www.kubeflow.org/docs/about/community/#kubeflow-slack-channels)\n [![CLOMonitor](https://img.shields.io/endpoint?url=https://clomonitor.io/api/projects/cncf/kubeflow/badge)](https://clomonitor.io/projects/cncf/kubeflow)\n+[![OpenSSF Best Practices](https://www.bestpractices.dev/projects/2660/badge)](https://www.bestpractices.dev/projects/2660)\n \n-The Kubeflow makes AI/ML on Kubernetes simple, portable and scalable.\n+\n \n----\n+## About Kubeflow\n \n-## Documentation\n+[Kubeflow](https://www.kubeflow.org/) makes artificial intelligence and machine learning simple, portable, and scalable.\n+We are an _ecosystem_ of [Kubernetes](https://kubernetes.io/) based components for each stage in the [AI/ML Lifecycle](https://www.kubeflow.org/docs/started/architecture/#kubeflow-components-in-the-ml-lifecycle) with support for best-in-class open source [tools and frameworks](https://www.kubeflow.org/docs/started/architecture/#kubeflow-ecosystem).\n \n-Please refer to the official docs at [kubeflow.org](http://kubeflow.org).\n+Please refer to the official [documentation](https://www.kubeflow.org/docs/) for more information.\n \n ## Kubeflow Components\n \n-The Kubeflow ecosystem is composed of multiple open-source projects for each stage in\n-[the ML lifecycle](https://www.kubeflow.org/docs/started/architecture/#kubeflow-components-in-the-ml-lifecycle).\n+The [Kubeflow Ecosystem](https://www.kubeflow.org/docs/started/architecture/#kubeflow-ecosystem) is composed of several projects known as [Kubeflow Components](https://www.kubeflow.org/docs/components/).\n \n-Learn more about each project in [the Kubeflow documentation](https://www.kubeflow.org/docs/components/).\n+The following table lists the components and their respective source code repositories:\n \n-Please use the following GitHub repositories to open issues and pull requests for\n-[the different Kubeflow components](https://www.kubeflow.org/docs/started/introduction/#what-are-standalone-kubeflow-components):\n+| Component | Source Code |\n+|-------------------------------------------------------------------------------------|-------------------------------------------------------------------------------|\n+| [KServe](https://www.kubeflow.org/docs/external-add-ons/kserve/) | [`kserve/kserve`](https://github.com/kserve/kserve) |\n+| [Kubeflow Katib](https://www.kubeflow.org/docs/components/katib/) | [`kubeflow/katib`](https://github.com/kubeflow/katib) |\n+| [Kubeflow Model Registry](https://www.kubeflow.org/docs/components/model-registry/) | [`kubeflow/model-registry`](https://github.com/kubeflow/model-registry) |\n+| Kubeflow MPI Operator | [`kubeflow/mpi-operator`](https://github.com/kubeflow/mpi-operator) |\n+| [Kubeflow Notebooks](https://www.kubeflow.org/docs/components/notebooks/) | [`kubeflow/notebooks`](https://github.com/kubeflow/notebooks) |\n+| [Kubeflow Pipelines](https://www.kubeflow.org/docs/components/pipelines/) | [`kubeflow/pipelines`](https://github.com/kubeflow/pipelines) |\n+| [Kubeflow Spark Operator](https://www.kubeflow.org/docs/components/spark-operator/) | [`kubeflow/spark-operator`](https://github.com/kubeflow/spark-operator) |\n+| [Kubeflow Training Operator](https://www.kubeflow.org/docs/components/training/) | [`kubeflow/training-operator`](https://github.com/kubeflow/training-operator) |\n \n-| Component | Source Code |\n-| -------------------------- | ----------------------------------------------------------------------------- |\n-| KServe | [`kserve/kserve`](https://github.com/kserve/kserve) |\n-| Kubeflow Katib | [`kubeflow/katib`](https://github.com/kubeflow/katib) |\n-| Kubeflow Model Registry | [`kubeflow/model-registry`](https://github.com/kubeflow/model-registry) |\n-| Kubeflow MPI Operator | [`kubeflow/mpi-operator`](https://github.com/kubeflow/mpi-operator) |\n-| Kubeflow Notebooks | [`kubeflow/notebooks`](https://github.com/kubeflow/notebooks) |\n-| Kubeflow Pipelines | [`kubeflow/pipelines`](https://github.com/kubeflow/pipelines) |\n-| Kubeflow Spark Operator | [`kubeflow/spark-operator`](https://github.com/kubeflow/spark-operator) |\n-| Kubeflow Training Operator | [`kubeflow/training-operator`](https://github.com/kubeflow/training-operator) |\n+## Kubeflow Platform\n \n-If you want to open issue or pull request for the\n-[Kubeflow Platform](https://www.kubeflow.org/docs/started/introduction/#what-is-kubeflow-platform)\n-components:\n+The [Kubeflow Platform](https://www.kubeflow.org/docs/started/introduction/#what-is-kubeflow-platform) refers to the full suite of Kubeflow Components bundled together with additional integration and management tools.\n \n-- Use the [`kubeflow/manifests`](https://github.com/kubeflow/manifests) GitHub repository for\n- the Kubeflow Manifests.\n-- Use the [`kubeflow/dashboard`](https://github.com/kubeflow/dashboard) GitHub repository for\n- the Kubeflow Profile Controller, Central Dashboard, CRUD Web Apps, PVC Viewer, PodDefault, and\n- Access Management components.\n+The following table lists the platform components and their respective source code repositories:\n \n-If you have questions about Kubeflow community or Kubeflow ecosystem, please use the\n-[`kubeflow/community`](https://github.com/kubeflow/community) GitHub repository.\n+| Component | Source Code |\n+|---------------------------------------------------------------------------------------|---------------------------------------------------------------|\n+| [Central Dashboard](https://www.kubeflow.org/docs/components/central-dash/) | [`kubeflow/dashboard`](https://github.com/kubeflow/dashboard) |\n+| [Profile Controller](https://www.kubeflow.org/docs/components/central-dash/profiles/) | [`kubeflow/dashboard`](https://github.com/kubeflow/dashboard) |\n+| Kubeflow Manifests | [`kubeflow/manifests`](https://github.com/kubeflow/manifests) |", + "comment_created_at": "2024-09-06T18:50:25+00:00", + "comment_author": "andreyvelich", + "comment_body": "Sure, @kubeflow/wg-manifests-leads any comments there ?\r\nI am fine to make this as a followup change when we have dedicated website section that explains Kubeflow Manifests. ", + "pr_file_module": null + }, + { + "comment_id": "1747739190", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 7642, + "pr_file": "README.md", + "discussion_id": "1746931065", + "commented_code": "@@ -1,52 +1,49 @@\n-\n+# Kubeflow\n \n-[![Slack Status](https://img.shields.io/badge/slack-join_chat-white.svg?logo=slack&style=social)](https://www.kubeflow.org/docs/about/community/#kubeflow-slack-channels)\n+[![Join Kubeflow Slack](https://img.shields.io/badge/slack-join_chat-white.svg?logo=slack&style=social)](https://www.kubeflow.org/docs/about/community/#kubeflow-slack-channels)\n [![CLOMonitor](https://img.shields.io/endpoint?url=https://clomonitor.io/api/projects/cncf/kubeflow/badge)](https://clomonitor.io/projects/cncf/kubeflow)\n+[![OpenSSF Best Practices](https://www.bestpractices.dev/projects/2660/badge)](https://www.bestpractices.dev/projects/2660)\n \n-The Kubeflow makes AI/ML on Kubernetes simple, portable and scalable.\n+\n \n----\n+## About Kubeflow\n \n-## Documentation\n+[Kubeflow](https://www.kubeflow.org/) makes artificial intelligence and machine learning simple, portable, and scalable.\n+We are an _ecosystem_ of [Kubernetes](https://kubernetes.io/) based components for each stage in the [AI/ML Lifecycle](https://www.kubeflow.org/docs/started/architecture/#kubeflow-components-in-the-ml-lifecycle) with support for best-in-class open source [tools and frameworks](https://www.kubeflow.org/docs/started/architecture/#kubeflow-ecosystem).\n \n-Please refer to the official docs at [kubeflow.org](http://kubeflow.org).\n+Please refer to the official [documentation](https://www.kubeflow.org/docs/) for more information.\n \n ## Kubeflow Components\n \n-The Kubeflow ecosystem is composed of multiple open-source projects for each stage in\n-[the ML lifecycle](https://www.kubeflow.org/docs/started/architecture/#kubeflow-components-in-the-ml-lifecycle).\n+The [Kubeflow Ecosystem](https://www.kubeflow.org/docs/started/architecture/#kubeflow-ecosystem) is composed of several projects known as [Kubeflow Components](https://www.kubeflow.org/docs/components/).\n \n-Learn more about each project in [the Kubeflow documentation](https://www.kubeflow.org/docs/components/).\n+The following table lists the components and their respective source code repositories:\n \n-Please use the following GitHub repositories to open issues and pull requests for\n-[the different Kubeflow components](https://www.kubeflow.org/docs/started/introduction/#what-are-standalone-kubeflow-components):\n+| Component | Source Code |\n+|-------------------------------------------------------------------------------------|-------------------------------------------------------------------------------|\n+| [KServe](https://www.kubeflow.org/docs/external-add-ons/kserve/) | [`kserve/kserve`](https://github.com/kserve/kserve) |\n+| [Kubeflow Katib](https://www.kubeflow.org/docs/components/katib/) | [`kubeflow/katib`](https://github.com/kubeflow/katib) |\n+| [Kubeflow Model Registry](https://www.kubeflow.org/docs/components/model-registry/) | [`kubeflow/model-registry`](https://github.com/kubeflow/model-registry) |\n+| Kubeflow MPI Operator | [`kubeflow/mpi-operator`](https://github.com/kubeflow/mpi-operator) |\n+| [Kubeflow Notebooks](https://www.kubeflow.org/docs/components/notebooks/) | [`kubeflow/notebooks`](https://github.com/kubeflow/notebooks) |\n+| [Kubeflow Pipelines](https://www.kubeflow.org/docs/components/pipelines/) | [`kubeflow/pipelines`](https://github.com/kubeflow/pipelines) |\n+| [Kubeflow Spark Operator](https://www.kubeflow.org/docs/components/spark-operator/) | [`kubeflow/spark-operator`](https://github.com/kubeflow/spark-operator) |\n+| [Kubeflow Training Operator](https://www.kubeflow.org/docs/components/training/) | [`kubeflow/training-operator`](https://github.com/kubeflow/training-operator) |\n \n-| Component | Source Code |\n-| -------------------------- | ----------------------------------------------------------------------------- |\n-| KServe | [`kserve/kserve`](https://github.com/kserve/kserve) |\n-| Kubeflow Katib | [`kubeflow/katib`](https://github.com/kubeflow/katib) |\n-| Kubeflow Model Registry | [`kubeflow/model-registry`](https://github.com/kubeflow/model-registry) |\n-| Kubeflow MPI Operator | [`kubeflow/mpi-operator`](https://github.com/kubeflow/mpi-operator) |\n-| Kubeflow Notebooks | [`kubeflow/notebooks`](https://github.com/kubeflow/notebooks) |\n-| Kubeflow Pipelines | [`kubeflow/pipelines`](https://github.com/kubeflow/pipelines) |\n-| Kubeflow Spark Operator | [`kubeflow/spark-operator`](https://github.com/kubeflow/spark-operator) |\n-| Kubeflow Training Operator | [`kubeflow/training-operator`](https://github.com/kubeflow/training-operator) |\n+## Kubeflow Platform\n \n-If you want to open issue or pull request for the\n-[Kubeflow Platform](https://www.kubeflow.org/docs/started/introduction/#what-is-kubeflow-platform)\n-components:\n+The [Kubeflow Platform](https://www.kubeflow.org/docs/started/introduction/#what-is-kubeflow-platform) refers to the full suite of Kubeflow Components bundled together with additional integration and management tools.\n \n-- Use the [`kubeflow/manifests`](https://github.com/kubeflow/manifests) GitHub repository for\n- the Kubeflow Manifests.\n-- Use the [`kubeflow/dashboard`](https://github.com/kubeflow/dashboard) GitHub repository for\n- the Kubeflow Profile Controller, Central Dashboard, CRUD Web Apps, PVC Viewer, PodDefault, and\n- Access Management components.\n+The following table lists the platform components and their respective source code repositories:\n \n-If you have questions about Kubeflow community or Kubeflow ecosystem, please use the\n-[`kubeflow/community`](https://github.com/kubeflow/community) GitHub repository.\n+| Component | Source Code |\n+|---------------------------------------------------------------------------------------|---------------------------------------------------------------|\n+| [Central Dashboard](https://www.kubeflow.org/docs/components/central-dash/) | [`kubeflow/dashboard`](https://github.com/kubeflow/dashboard) |\n+| [Profile Controller](https://www.kubeflow.org/docs/components/central-dash/profiles/) | [`kubeflow/dashboard`](https://github.com/kubeflow/dashboard) |\n+| Kubeflow Manifests | [`kubeflow/manifests`](https://github.com/kubeflow/manifests) |", + "comment_created_at": "2024-09-06T21:30:57+00:00", + "comment_author": "thesuperzapper", + "comment_body": "@andreyvelich I don't think we need more than we already have on the \"Installing Kubeflow\" page: \r\n- https://www.kubeflow.org/docs/started/installing-kubeflow/#kubeflow-manifests\r\n\r\nAs I keep highlighting, the manifests are intended to be used as the base of distribution, and by advanced users wanting to test Kubeflow out.\r\n\r\nFor almost everyone else, they will have a bad experience trying to use the manifests in production. We already get a lot of spam issues from people asking for help on issues that are unrelated to our Kubeflow and are more about their specific platform, I don't want to increase the amount of that.", + "pr_file_module": null + }, + { + "comment_id": "1747755050", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 7642, + "pr_file": "README.md", + "discussion_id": "1746931065", + "commented_code": "@@ -1,52 +1,49 @@\n-\n+# Kubeflow\n \n-[![Slack Status](https://img.shields.io/badge/slack-join_chat-white.svg?logo=slack&style=social)](https://www.kubeflow.org/docs/about/community/#kubeflow-slack-channels)\n+[![Join Kubeflow Slack](https://img.shields.io/badge/slack-join_chat-white.svg?logo=slack&style=social)](https://www.kubeflow.org/docs/about/community/#kubeflow-slack-channels)\n [![CLOMonitor](https://img.shields.io/endpoint?url=https://clomonitor.io/api/projects/cncf/kubeflow/badge)](https://clomonitor.io/projects/cncf/kubeflow)\n+[![OpenSSF Best Practices](https://www.bestpractices.dev/projects/2660/badge)](https://www.bestpractices.dev/projects/2660)\n \n-The Kubeflow makes AI/ML on Kubernetes simple, portable and scalable.\n+\n \n----\n+## About Kubeflow\n \n-## Documentation\n+[Kubeflow](https://www.kubeflow.org/) makes artificial intelligence and machine learning simple, portable, and scalable.\n+We are an _ecosystem_ of [Kubernetes](https://kubernetes.io/) based components for each stage in the [AI/ML Lifecycle](https://www.kubeflow.org/docs/started/architecture/#kubeflow-components-in-the-ml-lifecycle) with support for best-in-class open source [tools and frameworks](https://www.kubeflow.org/docs/started/architecture/#kubeflow-ecosystem).\n \n-Please refer to the official docs at [kubeflow.org](http://kubeflow.org).\n+Please refer to the official [documentation](https://www.kubeflow.org/docs/) for more information.\n \n ## Kubeflow Components\n \n-The Kubeflow ecosystem is composed of multiple open-source projects for each stage in\n-[the ML lifecycle](https://www.kubeflow.org/docs/started/architecture/#kubeflow-components-in-the-ml-lifecycle).\n+The [Kubeflow Ecosystem](https://www.kubeflow.org/docs/started/architecture/#kubeflow-ecosystem) is composed of several projects known as [Kubeflow Components](https://www.kubeflow.org/docs/components/).\n \n-Learn more about each project in [the Kubeflow documentation](https://www.kubeflow.org/docs/components/).\n+The following table lists the components and their respective source code repositories:\n \n-Please use the following GitHub repositories to open issues and pull requests for\n-[the different Kubeflow components](https://www.kubeflow.org/docs/started/introduction/#what-are-standalone-kubeflow-components):\n+| Component | Source Code |\n+|-------------------------------------------------------------------------------------|-------------------------------------------------------------------------------|\n+| [KServe](https://www.kubeflow.org/docs/external-add-ons/kserve/) | [`kserve/kserve`](https://github.com/kserve/kserve) |\n+| [Kubeflow Katib](https://www.kubeflow.org/docs/components/katib/) | [`kubeflow/katib`](https://github.com/kubeflow/katib) |\n+| [Kubeflow Model Registry](https://www.kubeflow.org/docs/components/model-registry/) | [`kubeflow/model-registry`](https://github.com/kubeflow/model-registry) |\n+| Kubeflow MPI Operator | [`kubeflow/mpi-operator`](https://github.com/kubeflow/mpi-operator) |\n+| [Kubeflow Notebooks](https://www.kubeflow.org/docs/components/notebooks/) | [`kubeflow/notebooks`](https://github.com/kubeflow/notebooks) |\n+| [Kubeflow Pipelines](https://www.kubeflow.org/docs/components/pipelines/) | [`kubeflow/pipelines`](https://github.com/kubeflow/pipelines) |\n+| [Kubeflow Spark Operator](https://www.kubeflow.org/docs/components/spark-operator/) | [`kubeflow/spark-operator`](https://github.com/kubeflow/spark-operator) |\n+| [Kubeflow Training Operator](https://www.kubeflow.org/docs/components/training/) | [`kubeflow/training-operator`](https://github.com/kubeflow/training-operator) |\n \n-| Component | Source Code |\n-| -------------------------- | ----------------------------------------------------------------------------- |\n-| KServe | [`kserve/kserve`](https://github.com/kserve/kserve) |\n-| Kubeflow Katib | [`kubeflow/katib`](https://github.com/kubeflow/katib) |\n-| Kubeflow Model Registry | [`kubeflow/model-registry`](https://github.com/kubeflow/model-registry) |\n-| Kubeflow MPI Operator | [`kubeflow/mpi-operator`](https://github.com/kubeflow/mpi-operator) |\n-| Kubeflow Notebooks | [`kubeflow/notebooks`](https://github.com/kubeflow/notebooks) |\n-| Kubeflow Pipelines | [`kubeflow/pipelines`](https://github.com/kubeflow/pipelines) |\n-| Kubeflow Spark Operator | [`kubeflow/spark-operator`](https://github.com/kubeflow/spark-operator) |\n-| Kubeflow Training Operator | [`kubeflow/training-operator`](https://github.com/kubeflow/training-operator) |\n+## Kubeflow Platform\n \n-If you want to open issue or pull request for the\n-[Kubeflow Platform](https://www.kubeflow.org/docs/started/introduction/#what-is-kubeflow-platform)\n-components:\n+The [Kubeflow Platform](https://www.kubeflow.org/docs/started/introduction/#what-is-kubeflow-platform) refers to the full suite of Kubeflow Components bundled together with additional integration and management tools.\n \n-- Use the [`kubeflow/manifests`](https://github.com/kubeflow/manifests) GitHub repository for\n- the Kubeflow Manifests.\n-- Use the [`kubeflow/dashboard`](https://github.com/kubeflow/dashboard) GitHub repository for\n- the Kubeflow Profile Controller, Central Dashboard, CRUD Web Apps, PVC Viewer, PodDefault, and\n- Access Management components.\n+The following table lists the platform components and their respective source code repositories:\n \n-If you have questions about Kubeflow community or Kubeflow ecosystem, please use the\n-[`kubeflow/community`](https://github.com/kubeflow/community) GitHub repository.\n+| Component | Source Code |\n+|---------------------------------------------------------------------------------------|---------------------------------------------------------------|\n+| [Central Dashboard](https://www.kubeflow.org/docs/components/central-dash/) | [`kubeflow/dashboard`](https://github.com/kubeflow/dashboard) |\n+| [Profile Controller](https://www.kubeflow.org/docs/components/central-dash/profiles/) | [`kubeflow/dashboard`](https://github.com/kubeflow/dashboard) |\n+| Kubeflow Manifests | [`kubeflow/manifests`](https://github.com/kubeflow/manifests) |", + "comment_created_at": "2024-09-06T21:53:12+00:00", + "comment_author": "thesuperzapper", + "comment_body": "I have added the link above in https://github.com/kubeflow/kubeflow/pull/7642/commits/7abcd7727c4611076aa22c4d63eced167e14b619", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1186256926", + "pr_number": 7123, + "pr_file": "conformance/1.7/README.md", + "created_at": "2023-05-05T15:55:30+00:00", + "commented_code": "# Kubeflow conformance program (WIP)\n\nBefore running the conformance tests, you need to configure kubectl default context to point to the k8s cluster that is hosting Kubeflow.\n\nTODO: Make the kubeflow namespace configurable.\n\nTo run version of the conformance test.\n\n`cd kubeflow/conformance/`\n\n**Run conformance test**\n\n`make run`", + "repo_full_name": "kubeflow/kubeflow", + "discussion_comments": [ + { + "comment_id": "1186256926", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 7123, + "pr_file": "conformance/1.7/README.md", + "discussion_id": "1186256926", + "commented_code": "@@ -0,0 +1,27 @@\n+# Kubeflow conformance program (WIP)\n+\n+Before running the conformance tests, you need to configure kubectl default context to point to the k8s cluster that is hosting Kubeflow.\n+\n+TODO: Make the kubeflow namespace configurable.\n+\n+To run version of the conformance test.\n+\n+`cd kubeflow/conformance/`\n+\n+**Run conformance test**\n+\n+`make run`", + "comment_created_at": "2023-05-05T15:55:30+00:00", + "comment_author": "gkcalat", + "comment_body": "NIT: add brief comments on how to run component-specific tests (e.g. `make run-kfp`)", + "pr_file_module": null + }, + { + "comment_id": "1186512090", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 7123, + "pr_file": "conformance/1.7/README.md", + "discussion_id": "1186256926", + "commented_code": "@@ -0,0 +1,27 @@\n+# Kubeflow conformance program (WIP)\n+\n+Before running the conformance tests, you need to configure kubectl default context to point to the k8s cluster that is hosting Kubeflow.\n+\n+TODO: Make the kubeflow namespace configurable.\n+\n+To run version of the conformance test.\n+\n+`cd kubeflow/conformance/`\n+\n+**Run conformance test**\n+\n+`make run`", + "comment_created_at": "2023-05-05T21:27:39+00:00", + "comment_author": "james-jwu", + "comment_body": "Done.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "596651177", + "pr_number": 5694, + "pr_file": "components/example-notebook-servers/README.md", + "created_at": "2021-03-18T08:41:34+00:00", + "commented_code": "# Example Notebook Servers\n\n**DISCLAIMER:** The Notebooks Working Group provides these Dockerfiles and\ntheir images as examples only, they are not tested with our CI/CD pipelines\nand are not certified to work in every situation. As such, issues related to\nthese images will be dealt with in a best efforts approach. The Notebooks\nWorking Group will work on officially supporting some of these Notebook\nServer images in the near future. If you do encounter a problem in one of\nthese images, contributions and issue reports are greatly appreciated.\n\n## Introduction", + "repo_full_name": "kubeflow/kubeflow", + "discussion_comments": [ + { + "comment_id": "596651177", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 5694, + "pr_file": "components/example-notebook-servers/README.md", + "discussion_id": "596651177", + "commented_code": "@@ -0,0 +1,139 @@\n+# Example Notebook Servers\n+\n+**DISCLAIMER:** The Notebooks Working Group provides these Dockerfiles and\n+their images as examples only, they are not tested with our CI/CD pipelines\n+and are not certified to work in every situation. As such, issues related to\n+these images will be dealt with in a best efforts approach. The Notebooks\n+Working Group will work on officially supporting some of these Notebook\n+Server images in the near future. If you do encounter a problem in one of\n+these images, contributions and issue reports are greatly appreciated.\n+\n+## Introduction", + "comment_created_at": "2021-03-18T08:41:34+00:00", + "comment_author": "StefanoFioravanzo", + "comment_body": "I would suggest trying to remove the passive voice in favour for an active voice. This is a general comment for the entire document.", + "pr_file_module": null + }, + { + "comment_id": "596679316", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 5694, + "pr_file": "components/example-notebook-servers/README.md", + "discussion_id": "596651177", + "commented_code": "@@ -0,0 +1,139 @@\n+# Example Notebook Servers\n+\n+**DISCLAIMER:** The Notebooks Working Group provides these Dockerfiles and\n+their images as examples only, they are not tested with our CI/CD pipelines\n+and are not certified to work in every situation. As such, issues related to\n+these images will be dealt with in a best efforts approach. The Notebooks\n+Working Group will work on officially supporting some of these Notebook\n+Server images in the near future. If you do encounter a problem in one of\n+these images, contributions and issue reports are greatly appreciated.\n+\n+## Introduction", + "comment_created_at": "2021-03-18T09:20:21+00:00", + "comment_author": "davidspek", + "comment_body": "Yeah, I will look into this. I'm used to scientific writing so tend to default to the passive voice. ", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/kubeflow-structured-documentation-with-examples.md b/_reviewers/kubeflow-structured-documentation-with-examples.md index 7710f88..3da6bdf 100644 --- a/_reviewers/kubeflow-structured-documentation-with-examples.md +++ b/_reviewers/kubeflow-structured-documentation-with-examples.md @@ -51,297 +51,3 @@ Brief description of the component's purpose. 5. **Links to stable versions** rather than development branches to prevent users from accidentally using unstable code. Following these practices makes documentation more useful, accessible, and maintainable while improving the overall user experience. - - -[ - { - "discussion_id": "1746924443", - "pr_number": 7642, - "pr_file": "CHANGELOG.md", - "created_at": "2024-09-06T10:48:58+00:00", - "commented_code": "# Change Log\n# Changelog\n\nThe [GitHub Releases](https://github.com/kubeflow/kubeflow/releases) page covers newer versions of `kubeflow/kubeflow` components.", - "repo_full_name": "kubeflow/kubeflow", - "discussion_comments": [ - { - "comment_id": "1746924443", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 7642, - "pr_file": "CHANGELOG.md", - "discussion_id": "1746924443", - "commented_code": "@@ -1,4 +1,10 @@\n-# Change Log\n+# Changelog\n+\n+The [GitHub Releases](https://github.com/kubeflow/kubeflow/releases) page covers newer versions of `kubeflow/kubeflow` components.", - "comment_created_at": "2024-09-06T10:48:58+00:00", - "comment_author": "andreyvelich", - "comment_body": "Does it make sense to list which components that release page covers (e.g. Central Dashboard, Notebooks, etc.)?", - "pr_file_module": null - }, - { - "comment_id": "1747526308", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 7642, - "pr_file": "CHANGELOG.md", - "discussion_id": "1746924443", - "commented_code": "@@ -1,4 +1,10 @@\n-# Change Log\n+# Changelog\n+\n+The [GitHub Releases](https://github.com/kubeflow/kubeflow/releases) page covers newer versions of `kubeflow/kubeflow` components.", - "comment_created_at": "2024-09-06T18:03:24+00:00", - "comment_author": "thesuperzapper", - "comment_body": "It covers everything, so that is probably not that useful.\r\n\r\nIt's the overall \"platform version\" so by proxy includes everything.", - "pr_file_module": null - }, - { - "comment_id": "1747580441", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 7642, - "pr_file": "CHANGELOG.md", - "discussion_id": "1746924443", - "commented_code": "@@ -1,4 +1,10 @@\n-# Change Log\n+# Changelog\n+\n+The [GitHub Releases](https://github.com/kubeflow/kubeflow/releases) page covers newer versions of `kubeflow/kubeflow` components.", - "comment_created_at": "2024-09-06T18:51:51+00:00", - "comment_author": "andreyvelich", - "comment_body": "@thesuperzapper Are we planing to remove this changelog from `kubeflow/kubeflow` once we split the repos ?", - "pr_file_module": null - }, - { - "comment_id": "1747736618", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 7642, - "pr_file": "CHANGELOG.md", - "discussion_id": "1746924443", - "commented_code": "@@ -1,4 +1,10 @@\n-# Change Log\n+# Changelog\n+\n+The [GitHub Releases](https://github.com/kubeflow/kubeflow/releases) page covers newer versions of `kubeflow/kubeflow` components.", - "comment_created_at": "2024-09-06T21:27:38+00:00", - "comment_author": "thesuperzapper", - "comment_body": "This changelog is only here for historical reasons, it only includes stuff from 0.5 and earlier.\r\n\r\nWe may as well leave it once we split the repo, and leave the top section which links to the more modern changelogs. ", - "pr_file_module": null - }, - { - "comment_id": "1747743581", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 7642, - "pr_file": "CHANGELOG.md", - "discussion_id": "1746924443", - "commented_code": "@@ -1,4 +1,10 @@\n-# Change Log\n+# Changelog\n+\n+The [GitHub Releases](https://github.com/kubeflow/kubeflow/releases) page covers newer versions of `kubeflow/kubeflow` components.", - "comment_created_at": "2024-09-06T21:36:36+00:00", - "comment_author": "andreyvelich", - "comment_body": ">We may as well leave it once we split the repo, and leave the top section which links to the more modern changelogs.\r\n\r\nYeah, but in that case you have to constantly update this changelog which links to changelogs of every Kubeflow components after Kubeflow release.\r\nDo we want to do it in the future ?", - "pr_file_module": null - }, - { - "comment_id": "1747744882", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 7642, - "pr_file": "CHANGELOG.md", - "discussion_id": "1746924443", - "commented_code": "@@ -1,4 +1,10 @@\n-# Change Log\n+# Changelog\n+\n+The [GitHub Releases](https://github.com/kubeflow/kubeflow/releases) page covers newer versions of `kubeflow/kubeflow` components.", - "comment_created_at": "2024-09-06T21:38:31+00:00", - "comment_author": "thesuperzapper", - "comment_body": "We should 100% have a page under this section of the website which includes those links:\r\n\r\n- https://www.kubeflow.org/docs/releases/\r\n\r\nAnd just link to that page.", - "pr_file_module": null - }, - { - "comment_id": "1747746000", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 7642, - "pr_file": "CHANGELOG.md", - "discussion_id": "1746924443", - "commented_code": "@@ -1,4 +1,10 @@\n-# Change Log\n+# Changelog\n+\n+The [GitHub Releases](https://github.com/kubeflow/kubeflow/releases) page covers newer versions of `kubeflow/kubeflow` components.", - "comment_created_at": "2024-09-06T21:40:01+00:00", - "comment_author": "andreyvelich", - "comment_body": "Should we just use section in README for it to minimize number of files in `kubeflow/kubeflow` repo ?", - "pr_file_module": null - }, - { - "comment_id": "1747753171", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 7642, - "pr_file": "CHANGELOG.md", - "discussion_id": "1746924443", - "commented_code": "@@ -1,4 +1,10 @@\n-# Change Log\n+# Changelog\n+\n+The [GitHub Releases](https://github.com/kubeflow/kubeflow/releases) page covers newer versions of `kubeflow/kubeflow` components.", - "comment_created_at": "2024-09-06T21:50:17+00:00", - "comment_author": "thesuperzapper", - "comment_body": "As I was saying, google loves CHANGELOG.md files, so even a largely empty file which just links to the website is good.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1746925519", - "pr_number": 7642, - "pr_file": "CONTRIBUTING.md", - "created_at": "2024-09-06T10:50:06+00:00", - "commented_code": "# Kubeflow Contributor Guide", - "repo_full_name": "kubeflow/kubeflow", - "discussion_comments": [ - { - "comment_id": "1746925519", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 7642, - "pr_file": "CONTRIBUTING.md", - "discussion_id": "1746925519", - "commented_code": "@@ -1,13 +1,6 @@\n # Kubeflow Contributor Guide", - "comment_created_at": "2024-09-06T10:50:06+00:00", - "comment_author": "andreyvelich", - "comment_body": "@thesuperzapper Would it be easier to remove this file and just add the `## Contributing` section to the README where we link the https://www.kubeflow.org/docs/about/contributing/ page ?", - "pr_file_module": null - }, - { - "comment_id": "1747527963", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 7642, - "pr_file": "CONTRIBUTING.md", - "discussion_id": "1746925519", - "commented_code": "@@ -1,13 +1,6 @@\n # Kubeflow Contributor Guide", - "comment_created_at": "2024-09-06T18:04:35+00:00", - "comment_author": "thesuperzapper", - "comment_body": "We should probably have both, because google loves to index files like `CONTRIBUTING.md`.", - "pr_file_module": null - }, - { - "comment_id": "1747540602", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 7642, - "pr_file": "CONTRIBUTING.md", - "discussion_id": "1746925519", - "commented_code": "@@ -1,13 +1,6 @@\n # Kubeflow Contributor Guide", - "comment_created_at": "2024-09-06T18:15:19+00:00", - "comment_author": "thesuperzapper", - "comment_body": "I added `& Contributing` to the final README heading in https://github.com/kubeflow/kubeflow/pull/7642/commits/5513f05f5a67250aeb152b4154f1bb1ed523b727", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1746931065", - "pr_number": 7642, - "pr_file": "README.md", - "created_at": "2024-09-06T10:55:58+00:00", - "commented_code": "\n# Kubeflow\n\n[![Slack Status](https://img.shields.io/badge/slack-join_chat-white.svg?logo=slack&style=social)](https://www.kubeflow.org/docs/about/community/#kubeflow-slack-channels)\n[![Join Kubeflow Slack](https://img.shields.io/badge/slack-join_chat-white.svg?logo=slack&style=social)](https://www.kubeflow.org/docs/about/community/#kubeflow-slack-channels)\n[![CLOMonitor](https://img.shields.io/endpoint?url=https://clomonitor.io/api/projects/cncf/kubeflow/badge)](https://clomonitor.io/projects/cncf/kubeflow)\n[![OpenSSF Best Practices](https://www.bestpractices.dev/projects/2660/badge)](https://www.bestpractices.dev/projects/2660)\n\nThe Kubeflow makes AI/ML on Kubernetes simple, portable and scalable.\n\n\n---\n## About Kubeflow\n\n## Documentation\n[Kubeflow](https://www.kubeflow.org/) makes artificial intelligence and machine learning simple, portable, and scalable.\nWe are an _ecosystem_ of [Kubernetes](https://kubernetes.io/) based components for each stage in the [AI/ML Lifecycle](https://www.kubeflow.org/docs/started/architecture/#kubeflow-components-in-the-ml-lifecycle) with support for best-in-class open source [tools and frameworks](https://www.kubeflow.org/docs/started/architecture/#kubeflow-ecosystem).\n\nPlease refer to the official docs at [kubeflow.org](http://kubeflow.org).\nPlease refer to the official [documentation](https://www.kubeflow.org/docs/) for more information.\n\n## Kubeflow Components\n\nThe Kubeflow ecosystem is composed of multiple open-source projects for each stage in\n[the ML lifecycle](https://www.kubeflow.org/docs/started/architecture/#kubeflow-components-in-the-ml-lifecycle).\nThe [Kubeflow Ecosystem](https://www.kubeflow.org/docs/started/architecture/#kubeflow-ecosystem) is composed of several projects known as [Kubeflow Components](https://www.kubeflow.org/docs/components/).\n\nLearn more about each project in [the Kubeflow documentation](https://www.kubeflow.org/docs/components/).\nThe following table lists the components and their respective source code repositories:\n\nPlease use the following GitHub repositories to open issues and pull requests for\n[the different Kubeflow components](https://www.kubeflow.org/docs/started/introduction/#what-are-standalone-kubeflow-components):\n| Component | Source Code |\n|-------------------------------------------------------------------------------------|-------------------------------------------------------------------------------|\n| [KServe](https://www.kubeflow.org/docs/external-add-ons/kserve/) | [`kserve/kserve`](https://github.com/kserve/kserve) |\n| [Kubeflow Katib](https://www.kubeflow.org/docs/components/katib/) | [`kubeflow/katib`](https://github.com/kubeflow/katib) |\n| [Kubeflow Model Registry](https://www.kubeflow.org/docs/components/model-registry/) | [`kubeflow/model-registry`](https://github.com/kubeflow/model-registry) |\n| Kubeflow MPI Operator | [`kubeflow/mpi-operator`](https://github.com/kubeflow/mpi-operator) |\n| [Kubeflow Notebooks](https://www.kubeflow.org/docs/components/notebooks/) | [`kubeflow/notebooks`](https://github.com/kubeflow/notebooks) |\n| [Kubeflow Pipelines](https://www.kubeflow.org/docs/components/pipelines/) | [`kubeflow/pipelines`](https://github.com/kubeflow/pipelines) |\n| [Kubeflow Spark Operator](https://www.kubeflow.org/docs/components/spark-operator/) | [`kubeflow/spark-operator`](https://github.com/kubeflow/spark-operator) |\n| [Kubeflow Training Operator](https://www.kubeflow.org/docs/components/training/) | [`kubeflow/training-operator`](https://github.com/kubeflow/training-operator) |\n\n| Component | Source Code |\n| -------------------------- | ----------------------------------------------------------------------------- |\n| KServe | [`kserve/kserve`](https://github.com/kserve/kserve) |\n| Kubeflow Katib | [`kubeflow/katib`](https://github.com/kubeflow/katib) |\n| Kubeflow Model Registry | [`kubeflow/model-registry`](https://github.com/kubeflow/model-registry) |\n| Kubeflow MPI Operator | [`kubeflow/mpi-operator`](https://github.com/kubeflow/mpi-operator) |\n| Kubeflow Notebooks | [`kubeflow/notebooks`](https://github.com/kubeflow/notebooks) |\n| Kubeflow Pipelines | [`kubeflow/pipelines`](https://github.com/kubeflow/pipelines) |\n| Kubeflow Spark Operator | [`kubeflow/spark-operator`](https://github.com/kubeflow/spark-operator) |\n| Kubeflow Training Operator | [`kubeflow/training-operator`](https://github.com/kubeflow/training-operator) |\n## Kubeflow Platform\n\nIf you want to open issue or pull request for the\n[Kubeflow Platform](https://www.kubeflow.org/docs/started/introduction/#what-is-kubeflow-platform)\ncomponents:\nThe [Kubeflow Platform](https://www.kubeflow.org/docs/started/introduction/#what-is-kubeflow-platform) refers to the full suite of Kubeflow Components bundled together with additional integration and management tools.\n\n- Use the [`kubeflow/manifests`](https://github.com/kubeflow/manifests) GitHub repository for\n the Kubeflow Manifests.\n- Use the [`kubeflow/dashboard`](https://github.com/kubeflow/dashboard) GitHub repository for\n the Kubeflow Profile Controller, Central Dashboard, CRUD Web Apps, PVC Viewer, PodDefault, and\n Access Management components.\nThe following table lists the platform components and their respective source code repositories:\n\nIf you have questions about Kubeflow community or Kubeflow ecosystem, please use the\n[`kubeflow/community`](https://github.com/kubeflow/community) GitHub repository.\n| Component | Source Code |\n|---------------------------------------------------------------------------------------|---------------------------------------------------------------|\n| [Central Dashboard](https://www.kubeflow.org/docs/components/central-dash/) | [`kubeflow/dashboard`](https://github.com/kubeflow/dashboard) |\n| [Profile Controller](https://www.kubeflow.org/docs/components/central-dash/profiles/) | [`kubeflow/dashboard`](https://github.com/kubeflow/dashboard) |\n| Kubeflow Manifests | [`kubeflow/manifests`](https://github.com/kubeflow/manifests) |", - "repo_full_name": "kubeflow/kubeflow", - "discussion_comments": [ - { - "comment_id": "1746931065", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 7642, - "pr_file": "README.md", - "discussion_id": "1746931065", - "commented_code": "@@ -1,52 +1,49 @@\n-\n+# Kubeflow\n \n-[![Slack Status](https://img.shields.io/badge/slack-join_chat-white.svg?logo=slack&style=social)](https://www.kubeflow.org/docs/about/community/#kubeflow-slack-channels)\n+[![Join Kubeflow Slack](https://img.shields.io/badge/slack-join_chat-white.svg?logo=slack&style=social)](https://www.kubeflow.org/docs/about/community/#kubeflow-slack-channels)\n [![CLOMonitor](https://img.shields.io/endpoint?url=https://clomonitor.io/api/projects/cncf/kubeflow/badge)](https://clomonitor.io/projects/cncf/kubeflow)\n+[![OpenSSF Best Practices](https://www.bestpractices.dev/projects/2660/badge)](https://www.bestpractices.dev/projects/2660)\n \n-The Kubeflow makes AI/ML on Kubernetes simple, portable and scalable.\n+\n \n----\n+## About Kubeflow\n \n-## Documentation\n+[Kubeflow](https://www.kubeflow.org/) makes artificial intelligence and machine learning simple, portable, and scalable.\n+We are an _ecosystem_ of [Kubernetes](https://kubernetes.io/) based components for each stage in the [AI/ML Lifecycle](https://www.kubeflow.org/docs/started/architecture/#kubeflow-components-in-the-ml-lifecycle) with support for best-in-class open source [tools and frameworks](https://www.kubeflow.org/docs/started/architecture/#kubeflow-ecosystem).\n \n-Please refer to the official docs at [kubeflow.org](http://kubeflow.org).\n+Please refer to the official [documentation](https://www.kubeflow.org/docs/) for more information.\n \n ## Kubeflow Components\n \n-The Kubeflow ecosystem is composed of multiple open-source projects for each stage in\n-[the ML lifecycle](https://www.kubeflow.org/docs/started/architecture/#kubeflow-components-in-the-ml-lifecycle).\n+The [Kubeflow Ecosystem](https://www.kubeflow.org/docs/started/architecture/#kubeflow-ecosystem) is composed of several projects known as [Kubeflow Components](https://www.kubeflow.org/docs/components/).\n \n-Learn more about each project in [the Kubeflow documentation](https://www.kubeflow.org/docs/components/).\n+The following table lists the components and their respective source code repositories:\n \n-Please use the following GitHub repositories to open issues and pull requests for\n-[the different Kubeflow components](https://www.kubeflow.org/docs/started/introduction/#what-are-standalone-kubeflow-components):\n+| Component | Source Code |\n+|-------------------------------------------------------------------------------------|-------------------------------------------------------------------------------|\n+| [KServe](https://www.kubeflow.org/docs/external-add-ons/kserve/) | [`kserve/kserve`](https://github.com/kserve/kserve) |\n+| [Kubeflow Katib](https://www.kubeflow.org/docs/components/katib/) | [`kubeflow/katib`](https://github.com/kubeflow/katib) |\n+| [Kubeflow Model Registry](https://www.kubeflow.org/docs/components/model-registry/) | [`kubeflow/model-registry`](https://github.com/kubeflow/model-registry) |\n+| Kubeflow MPI Operator | [`kubeflow/mpi-operator`](https://github.com/kubeflow/mpi-operator) |\n+| [Kubeflow Notebooks](https://www.kubeflow.org/docs/components/notebooks/) | [`kubeflow/notebooks`](https://github.com/kubeflow/notebooks) |\n+| [Kubeflow Pipelines](https://www.kubeflow.org/docs/components/pipelines/) | [`kubeflow/pipelines`](https://github.com/kubeflow/pipelines) |\n+| [Kubeflow Spark Operator](https://www.kubeflow.org/docs/components/spark-operator/) | [`kubeflow/spark-operator`](https://github.com/kubeflow/spark-operator) |\n+| [Kubeflow Training Operator](https://www.kubeflow.org/docs/components/training/) | [`kubeflow/training-operator`](https://github.com/kubeflow/training-operator) |\n \n-| Component | Source Code |\n-| -------------------------- | ----------------------------------------------------------------------------- |\n-| KServe | [`kserve/kserve`](https://github.com/kserve/kserve) |\n-| Kubeflow Katib | [`kubeflow/katib`](https://github.com/kubeflow/katib) |\n-| Kubeflow Model Registry | [`kubeflow/model-registry`](https://github.com/kubeflow/model-registry) |\n-| Kubeflow MPI Operator | [`kubeflow/mpi-operator`](https://github.com/kubeflow/mpi-operator) |\n-| Kubeflow Notebooks | [`kubeflow/notebooks`](https://github.com/kubeflow/notebooks) |\n-| Kubeflow Pipelines | [`kubeflow/pipelines`](https://github.com/kubeflow/pipelines) |\n-| Kubeflow Spark Operator | [`kubeflow/spark-operator`](https://github.com/kubeflow/spark-operator) |\n-| Kubeflow Training Operator | [`kubeflow/training-operator`](https://github.com/kubeflow/training-operator) |\n+## Kubeflow Platform\n \n-If you want to open issue or pull request for the\n-[Kubeflow Platform](https://www.kubeflow.org/docs/started/introduction/#what-is-kubeflow-platform)\n-components:\n+The [Kubeflow Platform](https://www.kubeflow.org/docs/started/introduction/#what-is-kubeflow-platform) refers to the full suite of Kubeflow Components bundled together with additional integration and management tools.\n \n-- Use the [`kubeflow/manifests`](https://github.com/kubeflow/manifests) GitHub repository for\n- the Kubeflow Manifests.\n-- Use the [`kubeflow/dashboard`](https://github.com/kubeflow/dashboard) GitHub repository for\n- the Kubeflow Profile Controller, Central Dashboard, CRUD Web Apps, PVC Viewer, PodDefault, and\n- Access Management components.\n+The following table lists the platform components and their respective source code repositories:\n \n-If you have questions about Kubeflow community or Kubeflow ecosystem, please use the\n-[`kubeflow/community`](https://github.com/kubeflow/community) GitHub repository.\n+| Component | Source Code |\n+|---------------------------------------------------------------------------------------|---------------------------------------------------------------|\n+| [Central Dashboard](https://www.kubeflow.org/docs/components/central-dash/) | [`kubeflow/dashboard`](https://github.com/kubeflow/dashboard) |\n+| [Profile Controller](https://www.kubeflow.org/docs/components/central-dash/profiles/) | [`kubeflow/dashboard`](https://github.com/kubeflow/dashboard) |\n+| Kubeflow Manifests | [`kubeflow/manifests`](https://github.com/kubeflow/manifests) |", - "comment_created_at": "2024-09-06T10:55:58+00:00", - "comment_author": "andreyvelich", - "comment_body": "Should we link the README for Kubeflow Manifests: https://github.com/kubeflow/manifests?tab=readme-ov-file#overview-of-the-kubeflow-platform ?", - "pr_file_module": null - }, - { - "comment_id": "1747534477", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 7642, - "pr_file": "README.md", - "discussion_id": "1746931065", - "commented_code": "@@ -1,52 +1,49 @@\n-\n+# Kubeflow\n \n-[![Slack Status](https://img.shields.io/badge/slack-join_chat-white.svg?logo=slack&style=social)](https://www.kubeflow.org/docs/about/community/#kubeflow-slack-channels)\n+[![Join Kubeflow Slack](https://img.shields.io/badge/slack-join_chat-white.svg?logo=slack&style=social)](https://www.kubeflow.org/docs/about/community/#kubeflow-slack-channels)\n [![CLOMonitor](https://img.shields.io/endpoint?url=https://clomonitor.io/api/projects/cncf/kubeflow/badge)](https://clomonitor.io/projects/cncf/kubeflow)\n+[![OpenSSF Best Practices](https://www.bestpractices.dev/projects/2660/badge)](https://www.bestpractices.dev/projects/2660)\n \n-The Kubeflow makes AI/ML on Kubernetes simple, portable and scalable.\n+\n \n----\n+## About Kubeflow\n \n-## Documentation\n+[Kubeflow](https://www.kubeflow.org/) makes artificial intelligence and machine learning simple, portable, and scalable.\n+We are an _ecosystem_ of [Kubernetes](https://kubernetes.io/) based components for each stage in the [AI/ML Lifecycle](https://www.kubeflow.org/docs/started/architecture/#kubeflow-components-in-the-ml-lifecycle) with support for best-in-class open source [tools and frameworks](https://www.kubeflow.org/docs/started/architecture/#kubeflow-ecosystem).\n \n-Please refer to the official docs at [kubeflow.org](http://kubeflow.org).\n+Please refer to the official [documentation](https://www.kubeflow.org/docs/) for more information.\n \n ## Kubeflow Components\n \n-The Kubeflow ecosystem is composed of multiple open-source projects for each stage in\n-[the ML lifecycle](https://www.kubeflow.org/docs/started/architecture/#kubeflow-components-in-the-ml-lifecycle).\n+The [Kubeflow Ecosystem](https://www.kubeflow.org/docs/started/architecture/#kubeflow-ecosystem) is composed of several projects known as [Kubeflow Components](https://www.kubeflow.org/docs/components/).\n \n-Learn more about each project in [the Kubeflow documentation](https://www.kubeflow.org/docs/components/).\n+The following table lists the components and their respective source code repositories:\n \n-Please use the following GitHub repositories to open issues and pull requests for\n-[the different Kubeflow components](https://www.kubeflow.org/docs/started/introduction/#what-are-standalone-kubeflow-components):\n+| Component | Source Code |\n+|-------------------------------------------------------------------------------------|-------------------------------------------------------------------------------|\n+| [KServe](https://www.kubeflow.org/docs/external-add-ons/kserve/) | [`kserve/kserve`](https://github.com/kserve/kserve) |\n+| [Kubeflow Katib](https://www.kubeflow.org/docs/components/katib/) | [`kubeflow/katib`](https://github.com/kubeflow/katib) |\n+| [Kubeflow Model Registry](https://www.kubeflow.org/docs/components/model-registry/) | [`kubeflow/model-registry`](https://github.com/kubeflow/model-registry) |\n+| Kubeflow MPI Operator | [`kubeflow/mpi-operator`](https://github.com/kubeflow/mpi-operator) |\n+| [Kubeflow Notebooks](https://www.kubeflow.org/docs/components/notebooks/) | [`kubeflow/notebooks`](https://github.com/kubeflow/notebooks) |\n+| [Kubeflow Pipelines](https://www.kubeflow.org/docs/components/pipelines/) | [`kubeflow/pipelines`](https://github.com/kubeflow/pipelines) |\n+| [Kubeflow Spark Operator](https://www.kubeflow.org/docs/components/spark-operator/) | [`kubeflow/spark-operator`](https://github.com/kubeflow/spark-operator) |\n+| [Kubeflow Training Operator](https://www.kubeflow.org/docs/components/training/) | [`kubeflow/training-operator`](https://github.com/kubeflow/training-operator) |\n \n-| Component | Source Code |\n-| -------------------------- | ----------------------------------------------------------------------------- |\n-| KServe | [`kserve/kserve`](https://github.com/kserve/kserve) |\n-| Kubeflow Katib | [`kubeflow/katib`](https://github.com/kubeflow/katib) |\n-| Kubeflow Model Registry | [`kubeflow/model-registry`](https://github.com/kubeflow/model-registry) |\n-| Kubeflow MPI Operator | [`kubeflow/mpi-operator`](https://github.com/kubeflow/mpi-operator) |\n-| Kubeflow Notebooks | [`kubeflow/notebooks`](https://github.com/kubeflow/notebooks) |\n-| Kubeflow Pipelines | [`kubeflow/pipelines`](https://github.com/kubeflow/pipelines) |\n-| Kubeflow Spark Operator | [`kubeflow/spark-operator`](https://github.com/kubeflow/spark-operator) |\n-| Kubeflow Training Operator | [`kubeflow/training-operator`](https://github.com/kubeflow/training-operator) |\n+## Kubeflow Platform\n \n-If you want to open issue or pull request for the\n-[Kubeflow Platform](https://www.kubeflow.org/docs/started/introduction/#what-is-kubeflow-platform)\n-components:\n+The [Kubeflow Platform](https://www.kubeflow.org/docs/started/introduction/#what-is-kubeflow-platform) refers to the full suite of Kubeflow Components bundled together with additional integration and management tools.\n \n-- Use the [`kubeflow/manifests`](https://github.com/kubeflow/manifests) GitHub repository for\n- the Kubeflow Manifests.\n-- Use the [`kubeflow/dashboard`](https://github.com/kubeflow/dashboard) GitHub repository for\n- the Kubeflow Profile Controller, Central Dashboard, CRUD Web Apps, PVC Viewer, PodDefault, and\n- Access Management components.\n+The following table lists the platform components and their respective source code repositories:\n \n-If you have questions about Kubeflow community or Kubeflow ecosystem, please use the\n-[`kubeflow/community`](https://github.com/kubeflow/community) GitHub repository.\n+| Component | Source Code |\n+|---------------------------------------------------------------------------------------|---------------------------------------------------------------|\n+| [Central Dashboard](https://www.kubeflow.org/docs/components/central-dash/) | [`kubeflow/dashboard`](https://github.com/kubeflow/dashboard) |\n+| [Profile Controller](https://www.kubeflow.org/docs/components/central-dash/profiles/) | [`kubeflow/dashboard`](https://github.com/kubeflow/dashboard) |\n+| Kubeflow Manifests | [`kubeflow/manifests`](https://github.com/kubeflow/manifests) |", - "comment_created_at": "2024-09-06T18:09:59+00:00", - "comment_author": "thesuperzapper", - "comment_body": "It's the same link as the repo, also, we need to be careful about linking people directly to the `master` of manifests, as they might try and install from there, rather than an actual version like `v1.9.0` tag.", - "pr_file_module": null - }, - { - "comment_id": "1747578150", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 7642, - "pr_file": "README.md", - "discussion_id": "1746931065", - "commented_code": "@@ -1,52 +1,49 @@\n-\n+# Kubeflow\n \n-[![Slack Status](https://img.shields.io/badge/slack-join_chat-white.svg?logo=slack&style=social)](https://www.kubeflow.org/docs/about/community/#kubeflow-slack-channels)\n+[![Join Kubeflow Slack](https://img.shields.io/badge/slack-join_chat-white.svg?logo=slack&style=social)](https://www.kubeflow.org/docs/about/community/#kubeflow-slack-channels)\n [![CLOMonitor](https://img.shields.io/endpoint?url=https://clomonitor.io/api/projects/cncf/kubeflow/badge)](https://clomonitor.io/projects/cncf/kubeflow)\n+[![OpenSSF Best Practices](https://www.bestpractices.dev/projects/2660/badge)](https://www.bestpractices.dev/projects/2660)\n \n-The Kubeflow makes AI/ML on Kubernetes simple, portable and scalable.\n+\n \n----\n+## About Kubeflow\n \n-## Documentation\n+[Kubeflow](https://www.kubeflow.org/) makes artificial intelligence and machine learning simple, portable, and scalable.\n+We are an _ecosystem_ of [Kubernetes](https://kubernetes.io/) based components for each stage in the [AI/ML Lifecycle](https://www.kubeflow.org/docs/started/architecture/#kubeflow-components-in-the-ml-lifecycle) with support for best-in-class open source [tools and frameworks](https://www.kubeflow.org/docs/started/architecture/#kubeflow-ecosystem).\n \n-Please refer to the official docs at [kubeflow.org](http://kubeflow.org).\n+Please refer to the official [documentation](https://www.kubeflow.org/docs/) for more information.\n \n ## Kubeflow Components\n \n-The Kubeflow ecosystem is composed of multiple open-source projects for each stage in\n-[the ML lifecycle](https://www.kubeflow.org/docs/started/architecture/#kubeflow-components-in-the-ml-lifecycle).\n+The [Kubeflow Ecosystem](https://www.kubeflow.org/docs/started/architecture/#kubeflow-ecosystem) is composed of several projects known as [Kubeflow Components](https://www.kubeflow.org/docs/components/).\n \n-Learn more about each project in [the Kubeflow documentation](https://www.kubeflow.org/docs/components/).\n+The following table lists the components and their respective source code repositories:\n \n-Please use the following GitHub repositories to open issues and pull requests for\n-[the different Kubeflow components](https://www.kubeflow.org/docs/started/introduction/#what-are-standalone-kubeflow-components):\n+| Component | Source Code |\n+|-------------------------------------------------------------------------------------|-------------------------------------------------------------------------------|\n+| [KServe](https://www.kubeflow.org/docs/external-add-ons/kserve/) | [`kserve/kserve`](https://github.com/kserve/kserve) |\n+| [Kubeflow Katib](https://www.kubeflow.org/docs/components/katib/) | [`kubeflow/katib`](https://github.com/kubeflow/katib) |\n+| [Kubeflow Model Registry](https://www.kubeflow.org/docs/components/model-registry/) | [`kubeflow/model-registry`](https://github.com/kubeflow/model-registry) |\n+| Kubeflow MPI Operator | [`kubeflow/mpi-operator`](https://github.com/kubeflow/mpi-operator) |\n+| [Kubeflow Notebooks](https://www.kubeflow.org/docs/components/notebooks/) | [`kubeflow/notebooks`](https://github.com/kubeflow/notebooks) |\n+| [Kubeflow Pipelines](https://www.kubeflow.org/docs/components/pipelines/) | [`kubeflow/pipelines`](https://github.com/kubeflow/pipelines) |\n+| [Kubeflow Spark Operator](https://www.kubeflow.org/docs/components/spark-operator/) | [`kubeflow/spark-operator`](https://github.com/kubeflow/spark-operator) |\n+| [Kubeflow Training Operator](https://www.kubeflow.org/docs/components/training/) | [`kubeflow/training-operator`](https://github.com/kubeflow/training-operator) |\n \n-| Component | Source Code |\n-| -------------------------- | ----------------------------------------------------------------------------- |\n-| KServe | [`kserve/kserve`](https://github.com/kserve/kserve) |\n-| Kubeflow Katib | [`kubeflow/katib`](https://github.com/kubeflow/katib) |\n-| Kubeflow Model Registry | [`kubeflow/model-registry`](https://github.com/kubeflow/model-registry) |\n-| Kubeflow MPI Operator | [`kubeflow/mpi-operator`](https://github.com/kubeflow/mpi-operator) |\n-| Kubeflow Notebooks | [`kubeflow/notebooks`](https://github.com/kubeflow/notebooks) |\n-| Kubeflow Pipelines | [`kubeflow/pipelines`](https://github.com/kubeflow/pipelines) |\n-| Kubeflow Spark Operator | [`kubeflow/spark-operator`](https://github.com/kubeflow/spark-operator) |\n-| Kubeflow Training Operator | [`kubeflow/training-operator`](https://github.com/kubeflow/training-operator) |\n+## Kubeflow Platform\n \n-If you want to open issue or pull request for the\n-[Kubeflow Platform](https://www.kubeflow.org/docs/started/introduction/#what-is-kubeflow-platform)\n-components:\n+The [Kubeflow Platform](https://www.kubeflow.org/docs/started/introduction/#what-is-kubeflow-platform) refers to the full suite of Kubeflow Components bundled together with additional integration and management tools.\n \n-- Use the [`kubeflow/manifests`](https://github.com/kubeflow/manifests) GitHub repository for\n- the Kubeflow Manifests.\n-- Use the [`kubeflow/dashboard`](https://github.com/kubeflow/dashboard) GitHub repository for\n- the Kubeflow Profile Controller, Central Dashboard, CRUD Web Apps, PVC Viewer, PodDefault, and\n- Access Management components.\n+The following table lists the platform components and their respective source code repositories:\n \n-If you have questions about Kubeflow community or Kubeflow ecosystem, please use the\n-[`kubeflow/community`](https://github.com/kubeflow/community) GitHub repository.\n+| Component | Source Code |\n+|---------------------------------------------------------------------------------------|---------------------------------------------------------------|\n+| [Central Dashboard](https://www.kubeflow.org/docs/components/central-dash/) | [`kubeflow/dashboard`](https://github.com/kubeflow/dashboard) |\n+| [Profile Controller](https://www.kubeflow.org/docs/components/central-dash/profiles/) | [`kubeflow/dashboard`](https://github.com/kubeflow/dashboard) |\n+| Kubeflow Manifests | [`kubeflow/manifests`](https://github.com/kubeflow/manifests) |", - "comment_created_at": "2024-09-06T18:50:25+00:00", - "comment_author": "andreyvelich", - "comment_body": "Sure, @kubeflow/wg-manifests-leads any comments there ?\r\nI am fine to make this as a followup change when we have dedicated website section that explains Kubeflow Manifests. ", - "pr_file_module": null - }, - { - "comment_id": "1747739190", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 7642, - "pr_file": "README.md", - "discussion_id": "1746931065", - "commented_code": "@@ -1,52 +1,49 @@\n-\n+# Kubeflow\n \n-[![Slack Status](https://img.shields.io/badge/slack-join_chat-white.svg?logo=slack&style=social)](https://www.kubeflow.org/docs/about/community/#kubeflow-slack-channels)\n+[![Join Kubeflow Slack](https://img.shields.io/badge/slack-join_chat-white.svg?logo=slack&style=social)](https://www.kubeflow.org/docs/about/community/#kubeflow-slack-channels)\n [![CLOMonitor](https://img.shields.io/endpoint?url=https://clomonitor.io/api/projects/cncf/kubeflow/badge)](https://clomonitor.io/projects/cncf/kubeflow)\n+[![OpenSSF Best Practices](https://www.bestpractices.dev/projects/2660/badge)](https://www.bestpractices.dev/projects/2660)\n \n-The Kubeflow makes AI/ML on Kubernetes simple, portable and scalable.\n+\n \n----\n+## About Kubeflow\n \n-## Documentation\n+[Kubeflow](https://www.kubeflow.org/) makes artificial intelligence and machine learning simple, portable, and scalable.\n+We are an _ecosystem_ of [Kubernetes](https://kubernetes.io/) based components for each stage in the [AI/ML Lifecycle](https://www.kubeflow.org/docs/started/architecture/#kubeflow-components-in-the-ml-lifecycle) with support for best-in-class open source [tools and frameworks](https://www.kubeflow.org/docs/started/architecture/#kubeflow-ecosystem).\n \n-Please refer to the official docs at [kubeflow.org](http://kubeflow.org).\n+Please refer to the official [documentation](https://www.kubeflow.org/docs/) for more information.\n \n ## Kubeflow Components\n \n-The Kubeflow ecosystem is composed of multiple open-source projects for each stage in\n-[the ML lifecycle](https://www.kubeflow.org/docs/started/architecture/#kubeflow-components-in-the-ml-lifecycle).\n+The [Kubeflow Ecosystem](https://www.kubeflow.org/docs/started/architecture/#kubeflow-ecosystem) is composed of several projects known as [Kubeflow Components](https://www.kubeflow.org/docs/components/).\n \n-Learn more about each project in [the Kubeflow documentation](https://www.kubeflow.org/docs/components/).\n+The following table lists the components and their respective source code repositories:\n \n-Please use the following GitHub repositories to open issues and pull requests for\n-[the different Kubeflow components](https://www.kubeflow.org/docs/started/introduction/#what-are-standalone-kubeflow-components):\n+| Component | Source Code |\n+|-------------------------------------------------------------------------------------|-------------------------------------------------------------------------------|\n+| [KServe](https://www.kubeflow.org/docs/external-add-ons/kserve/) | [`kserve/kserve`](https://github.com/kserve/kserve) |\n+| [Kubeflow Katib](https://www.kubeflow.org/docs/components/katib/) | [`kubeflow/katib`](https://github.com/kubeflow/katib) |\n+| [Kubeflow Model Registry](https://www.kubeflow.org/docs/components/model-registry/) | [`kubeflow/model-registry`](https://github.com/kubeflow/model-registry) |\n+| Kubeflow MPI Operator | [`kubeflow/mpi-operator`](https://github.com/kubeflow/mpi-operator) |\n+| [Kubeflow Notebooks](https://www.kubeflow.org/docs/components/notebooks/) | [`kubeflow/notebooks`](https://github.com/kubeflow/notebooks) |\n+| [Kubeflow Pipelines](https://www.kubeflow.org/docs/components/pipelines/) | [`kubeflow/pipelines`](https://github.com/kubeflow/pipelines) |\n+| [Kubeflow Spark Operator](https://www.kubeflow.org/docs/components/spark-operator/) | [`kubeflow/spark-operator`](https://github.com/kubeflow/spark-operator) |\n+| [Kubeflow Training Operator](https://www.kubeflow.org/docs/components/training/) | [`kubeflow/training-operator`](https://github.com/kubeflow/training-operator) |\n \n-| Component | Source Code |\n-| -------------------------- | ----------------------------------------------------------------------------- |\n-| KServe | [`kserve/kserve`](https://github.com/kserve/kserve) |\n-| Kubeflow Katib | [`kubeflow/katib`](https://github.com/kubeflow/katib) |\n-| Kubeflow Model Registry | [`kubeflow/model-registry`](https://github.com/kubeflow/model-registry) |\n-| Kubeflow MPI Operator | [`kubeflow/mpi-operator`](https://github.com/kubeflow/mpi-operator) |\n-| Kubeflow Notebooks | [`kubeflow/notebooks`](https://github.com/kubeflow/notebooks) |\n-| Kubeflow Pipelines | [`kubeflow/pipelines`](https://github.com/kubeflow/pipelines) |\n-| Kubeflow Spark Operator | [`kubeflow/spark-operator`](https://github.com/kubeflow/spark-operator) |\n-| Kubeflow Training Operator | [`kubeflow/training-operator`](https://github.com/kubeflow/training-operator) |\n+## Kubeflow Platform\n \n-If you want to open issue or pull request for the\n-[Kubeflow Platform](https://www.kubeflow.org/docs/started/introduction/#what-is-kubeflow-platform)\n-components:\n+The [Kubeflow Platform](https://www.kubeflow.org/docs/started/introduction/#what-is-kubeflow-platform) refers to the full suite of Kubeflow Components bundled together with additional integration and management tools.\n \n-- Use the [`kubeflow/manifests`](https://github.com/kubeflow/manifests) GitHub repository for\n- the Kubeflow Manifests.\n-- Use the [`kubeflow/dashboard`](https://github.com/kubeflow/dashboard) GitHub repository for\n- the Kubeflow Profile Controller, Central Dashboard, CRUD Web Apps, PVC Viewer, PodDefault, and\n- Access Management components.\n+The following table lists the platform components and their respective source code repositories:\n \n-If you have questions about Kubeflow community or Kubeflow ecosystem, please use the\n-[`kubeflow/community`](https://github.com/kubeflow/community) GitHub repository.\n+| Component | Source Code |\n+|---------------------------------------------------------------------------------------|---------------------------------------------------------------|\n+| [Central Dashboard](https://www.kubeflow.org/docs/components/central-dash/) | [`kubeflow/dashboard`](https://github.com/kubeflow/dashboard) |\n+| [Profile Controller](https://www.kubeflow.org/docs/components/central-dash/profiles/) | [`kubeflow/dashboard`](https://github.com/kubeflow/dashboard) |\n+| Kubeflow Manifests | [`kubeflow/manifests`](https://github.com/kubeflow/manifests) |", - "comment_created_at": "2024-09-06T21:30:57+00:00", - "comment_author": "thesuperzapper", - "comment_body": "@andreyvelich I don't think we need more than we already have on the \"Installing Kubeflow\" page: \r\n- https://www.kubeflow.org/docs/started/installing-kubeflow/#kubeflow-manifests\r\n\r\nAs I keep highlighting, the manifests are intended to be used as the base of distribution, and by advanced users wanting to test Kubeflow out.\r\n\r\nFor almost everyone else, they will have a bad experience trying to use the manifests in production. We already get a lot of spam issues from people asking for help on issues that are unrelated to our Kubeflow and are more about their specific platform, I don't want to increase the amount of that.", - "pr_file_module": null - }, - { - "comment_id": "1747755050", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 7642, - "pr_file": "README.md", - "discussion_id": "1746931065", - "commented_code": "@@ -1,52 +1,49 @@\n-\n+# Kubeflow\n \n-[![Slack Status](https://img.shields.io/badge/slack-join_chat-white.svg?logo=slack&style=social)](https://www.kubeflow.org/docs/about/community/#kubeflow-slack-channels)\n+[![Join Kubeflow Slack](https://img.shields.io/badge/slack-join_chat-white.svg?logo=slack&style=social)](https://www.kubeflow.org/docs/about/community/#kubeflow-slack-channels)\n [![CLOMonitor](https://img.shields.io/endpoint?url=https://clomonitor.io/api/projects/cncf/kubeflow/badge)](https://clomonitor.io/projects/cncf/kubeflow)\n+[![OpenSSF Best Practices](https://www.bestpractices.dev/projects/2660/badge)](https://www.bestpractices.dev/projects/2660)\n \n-The Kubeflow makes AI/ML on Kubernetes simple, portable and scalable.\n+\n \n----\n+## About Kubeflow\n \n-## Documentation\n+[Kubeflow](https://www.kubeflow.org/) makes artificial intelligence and machine learning simple, portable, and scalable.\n+We are an _ecosystem_ of [Kubernetes](https://kubernetes.io/) based components for each stage in the [AI/ML Lifecycle](https://www.kubeflow.org/docs/started/architecture/#kubeflow-components-in-the-ml-lifecycle) with support for best-in-class open source [tools and frameworks](https://www.kubeflow.org/docs/started/architecture/#kubeflow-ecosystem).\n \n-Please refer to the official docs at [kubeflow.org](http://kubeflow.org).\n+Please refer to the official [documentation](https://www.kubeflow.org/docs/) for more information.\n \n ## Kubeflow Components\n \n-The Kubeflow ecosystem is composed of multiple open-source projects for each stage in\n-[the ML lifecycle](https://www.kubeflow.org/docs/started/architecture/#kubeflow-components-in-the-ml-lifecycle).\n+The [Kubeflow Ecosystem](https://www.kubeflow.org/docs/started/architecture/#kubeflow-ecosystem) is composed of several projects known as [Kubeflow Components](https://www.kubeflow.org/docs/components/).\n \n-Learn more about each project in [the Kubeflow documentation](https://www.kubeflow.org/docs/components/).\n+The following table lists the components and their respective source code repositories:\n \n-Please use the following GitHub repositories to open issues and pull requests for\n-[the different Kubeflow components](https://www.kubeflow.org/docs/started/introduction/#what-are-standalone-kubeflow-components):\n+| Component | Source Code |\n+|-------------------------------------------------------------------------------------|-------------------------------------------------------------------------------|\n+| [KServe](https://www.kubeflow.org/docs/external-add-ons/kserve/) | [`kserve/kserve`](https://github.com/kserve/kserve) |\n+| [Kubeflow Katib](https://www.kubeflow.org/docs/components/katib/) | [`kubeflow/katib`](https://github.com/kubeflow/katib) |\n+| [Kubeflow Model Registry](https://www.kubeflow.org/docs/components/model-registry/) | [`kubeflow/model-registry`](https://github.com/kubeflow/model-registry) |\n+| Kubeflow MPI Operator | [`kubeflow/mpi-operator`](https://github.com/kubeflow/mpi-operator) |\n+| [Kubeflow Notebooks](https://www.kubeflow.org/docs/components/notebooks/) | [`kubeflow/notebooks`](https://github.com/kubeflow/notebooks) |\n+| [Kubeflow Pipelines](https://www.kubeflow.org/docs/components/pipelines/) | [`kubeflow/pipelines`](https://github.com/kubeflow/pipelines) |\n+| [Kubeflow Spark Operator](https://www.kubeflow.org/docs/components/spark-operator/) | [`kubeflow/spark-operator`](https://github.com/kubeflow/spark-operator) |\n+| [Kubeflow Training Operator](https://www.kubeflow.org/docs/components/training/) | [`kubeflow/training-operator`](https://github.com/kubeflow/training-operator) |\n \n-| Component | Source Code |\n-| -------------------------- | ----------------------------------------------------------------------------- |\n-| KServe | [`kserve/kserve`](https://github.com/kserve/kserve) |\n-| Kubeflow Katib | [`kubeflow/katib`](https://github.com/kubeflow/katib) |\n-| Kubeflow Model Registry | [`kubeflow/model-registry`](https://github.com/kubeflow/model-registry) |\n-| Kubeflow MPI Operator | [`kubeflow/mpi-operator`](https://github.com/kubeflow/mpi-operator) |\n-| Kubeflow Notebooks | [`kubeflow/notebooks`](https://github.com/kubeflow/notebooks) |\n-| Kubeflow Pipelines | [`kubeflow/pipelines`](https://github.com/kubeflow/pipelines) |\n-| Kubeflow Spark Operator | [`kubeflow/spark-operator`](https://github.com/kubeflow/spark-operator) |\n-| Kubeflow Training Operator | [`kubeflow/training-operator`](https://github.com/kubeflow/training-operator) |\n+## Kubeflow Platform\n \n-If you want to open issue or pull request for the\n-[Kubeflow Platform](https://www.kubeflow.org/docs/started/introduction/#what-is-kubeflow-platform)\n-components:\n+The [Kubeflow Platform](https://www.kubeflow.org/docs/started/introduction/#what-is-kubeflow-platform) refers to the full suite of Kubeflow Components bundled together with additional integration and management tools.\n \n-- Use the [`kubeflow/manifests`](https://github.com/kubeflow/manifests) GitHub repository for\n- the Kubeflow Manifests.\n-- Use the [`kubeflow/dashboard`](https://github.com/kubeflow/dashboard) GitHub repository for\n- the Kubeflow Profile Controller, Central Dashboard, CRUD Web Apps, PVC Viewer, PodDefault, and\n- Access Management components.\n+The following table lists the platform components and their respective source code repositories:\n \n-If you have questions about Kubeflow community or Kubeflow ecosystem, please use the\n-[`kubeflow/community`](https://github.com/kubeflow/community) GitHub repository.\n+| Component | Source Code |\n+|---------------------------------------------------------------------------------------|---------------------------------------------------------------|\n+| [Central Dashboard](https://www.kubeflow.org/docs/components/central-dash/) | [`kubeflow/dashboard`](https://github.com/kubeflow/dashboard) |\n+| [Profile Controller](https://www.kubeflow.org/docs/components/central-dash/profiles/) | [`kubeflow/dashboard`](https://github.com/kubeflow/dashboard) |\n+| Kubeflow Manifests | [`kubeflow/manifests`](https://github.com/kubeflow/manifests) |", - "comment_created_at": "2024-09-06T21:53:12+00:00", - "comment_author": "thesuperzapper", - "comment_body": "I have added the link above in https://github.com/kubeflow/kubeflow/pull/7642/commits/7abcd7727c4611076aa22c4d63eced167e14b619", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1186256926", - "pr_number": 7123, - "pr_file": "conformance/1.7/README.md", - "created_at": "2023-05-05T15:55:30+00:00", - "commented_code": "# Kubeflow conformance program (WIP)\n\nBefore running the conformance tests, you need to configure kubectl default context to point to the k8s cluster that is hosting Kubeflow.\n\nTODO: Make the kubeflow namespace configurable.\n\nTo run version of the conformance test.\n\n`cd kubeflow/conformance/`\n\n**Run conformance test**\n\n`make run`", - "repo_full_name": "kubeflow/kubeflow", - "discussion_comments": [ - { - "comment_id": "1186256926", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 7123, - "pr_file": "conformance/1.7/README.md", - "discussion_id": "1186256926", - "commented_code": "@@ -0,0 +1,27 @@\n+# Kubeflow conformance program (WIP)\n+\n+Before running the conformance tests, you need to configure kubectl default context to point to the k8s cluster that is hosting Kubeflow.\n+\n+TODO: Make the kubeflow namespace configurable.\n+\n+To run version of the conformance test.\n+\n+`cd kubeflow/conformance/`\n+\n+**Run conformance test**\n+\n+`make run`", - "comment_created_at": "2023-05-05T15:55:30+00:00", - "comment_author": "gkcalat", - "comment_body": "NIT: add brief comments on how to run component-specific tests (e.g. `make run-kfp`)", - "pr_file_module": null - }, - { - "comment_id": "1186512090", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 7123, - "pr_file": "conformance/1.7/README.md", - "discussion_id": "1186256926", - "commented_code": "@@ -0,0 +1,27 @@\n+# Kubeflow conformance program (WIP)\n+\n+Before running the conformance tests, you need to configure kubectl default context to point to the k8s cluster that is hosting Kubeflow.\n+\n+TODO: Make the kubeflow namespace configurable.\n+\n+To run version of the conformance test.\n+\n+`cd kubeflow/conformance/`\n+\n+**Run conformance test**\n+\n+`make run`", - "comment_created_at": "2023-05-05T21:27:39+00:00", - "comment_author": "james-jwu", - "comment_body": "Done.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "596651177", - "pr_number": 5694, - "pr_file": "components/example-notebook-servers/README.md", - "created_at": "2021-03-18T08:41:34+00:00", - "commented_code": "# Example Notebook Servers\n\n**DISCLAIMER:** The Notebooks Working Group provides these Dockerfiles and\ntheir images as examples only, they are not tested with our CI/CD pipelines\nand are not certified to work in every situation. As such, issues related to\nthese images will be dealt with in a best efforts approach. The Notebooks\nWorking Group will work on officially supporting some of these Notebook\nServer images in the near future. If you do encounter a problem in one of\nthese images, contributions and issue reports are greatly appreciated.\n\n## Introduction", - "repo_full_name": "kubeflow/kubeflow", - "discussion_comments": [ - { - "comment_id": "596651177", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 5694, - "pr_file": "components/example-notebook-servers/README.md", - "discussion_id": "596651177", - "commented_code": "@@ -0,0 +1,139 @@\n+# Example Notebook Servers\n+\n+**DISCLAIMER:** The Notebooks Working Group provides these Dockerfiles and\n+their images as examples only, they are not tested with our CI/CD pipelines\n+and are not certified to work in every situation. As such, issues related to\n+these images will be dealt with in a best efforts approach. The Notebooks\n+Working Group will work on officially supporting some of these Notebook\n+Server images in the near future. If you do encounter a problem in one of\n+these images, contributions and issue reports are greatly appreciated.\n+\n+## Introduction", - "comment_created_at": "2021-03-18T08:41:34+00:00", - "comment_author": "StefanoFioravanzo", - "comment_body": "I would suggest trying to remove the passive voice in favour for an active voice. This is a general comment for the entire document.", - "pr_file_module": null - }, - { - "comment_id": "596679316", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 5694, - "pr_file": "components/example-notebook-servers/README.md", - "discussion_id": "596651177", - "commented_code": "@@ -0,0 +1,139 @@\n+# Example Notebook Servers\n+\n+**DISCLAIMER:** The Notebooks Working Group provides these Dockerfiles and\n+their images as examples only, they are not tested with our CI/CD pipelines\n+and are not certified to work in every situation. As such, issues related to\n+these images will be dealt with in a best efforts approach. The Notebooks\n+Working Group will work on officially supporting some of these Notebook\n+Server images in the near future. If you do encounter a problem in one of\n+these images, contributions and issue reports are greatly appreciated.\n+\n+## Introduction", - "comment_created_at": "2021-03-18T09:20:21+00:00", - "comment_author": "davidspek", - "comment_body": "Yeah, I will look into this. I'm used to scientific writing so tend to default to the passive voice. ", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/kubeflow-structured-owners-files.json b/_reviewers/kubeflow-structured-owners-files.json new file mode 100644 index 0000000..08e694b --- /dev/null +++ b/_reviewers/kubeflow-structured-owners-files.json @@ -0,0 +1,128 @@ +[ + { + "discussion_id": "1215392226", + "pr_number": 6876, + "pr_file": "components/pvc-viewer/OWNERS", + "created_at": "2023-06-03T09:54:07+00:00", + "commented_code": "approvers:", + "repo_full_name": "kubeflow/kubeflow", + "discussion_comments": [ + { + "comment_id": "1215392226", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 6876, + "pr_file": "components/pvc-viewer/OWNERS", + "discussion_id": "1215392226", + "commented_code": "@@ -0,0 +1,7 @@\n+approvers:", + "comment_created_at": "2023-06-03T09:54:07+00:00", + "comment_author": "kimwnasptd", + "comment_body": "@TobiasGoerke why are you not an approver? :) \r\n\r\nWe want the OWNERS file to depict the folks that are driving the component. Considering that you, @apo-ger and I have worked on the proposal of this I'd create the OWNERS file like this:\r\n\r\n```yaml\r\napprovers:\r\n - apo-ger\r\n - kimwnasptd\r\n - TobiasGoerke\r\n```", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "465339868", + "pr_number": 5184, + "pr_file": "components/web-apps/OWNERS", + "created_at": "2020-08-04T21:26:35+00:00", + "commented_code": "approvers:\n - elikatsis\n - kimwnasptd\n - StefanoFioravanzo\nreviewers:\n - avdaredevil", + "repo_full_name": "kubeflow/kubeflow", + "discussion_comments": [ + { + "comment_id": "465339868", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 5184, + "pr_file": "components/web-apps/OWNERS", + "discussion_id": "465339868", + "commented_code": "@@ -0,0 +1,11 @@\n+approvers:\n+ - elikatsis\n+ - kimwnasptd\n+ - StefanoFioravanzo\n+reviewers:\n+ - avdaredevil", + "comment_created_at": "2020-08-04T21:26:35+00:00", + "comment_author": "jlewi", + "comment_body": "Have @avdaredevil and @prodonjs agreed to participate in this? \r\n\r\nDo not duplicate reviewers and approvers. Please refer to\r\nhttps://www.kubeflow.org/docs/about/contributing/#maintaining-owners-files", + "pr_file_module": null + }, + { + "comment_id": "465645242", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 5184, + "pr_file": "components/web-apps/OWNERS", + "discussion_id": "465339868", + "commented_code": "@@ -0,0 +1,11 @@\n+approvers:\n+ - elikatsis\n+ - kimwnasptd\n+ - StefanoFioravanzo\n+reviewers:\n+ - avdaredevil", + "comment_created_at": "2020-08-05T11:03:12+00:00", + "comment_author": "kimwnasptd", + "comment_body": "My bad, I just copied the reviewers from the jupyter web app.\r\nLets start with an empty list of reviewers for now", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "444207757", + "pr_number": 5088, + "pr_file": "components/tensorboard-controller/OWNERS", + "created_at": "2020-06-23T13:06:10+00:00", + "commented_code": "approvers:\n - elikatsis\n - kandrio98\n - kimwnasptd\n - quanjielin\nreviewers:", + "repo_full_name": "kubeflow/kubeflow", + "discussion_comments": [ + { + "comment_id": "444207757", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 5088, + "pr_file": "components/tensorboard-controller/OWNERS", + "discussion_id": "444207757", + "commented_code": "@@ -0,0 +1,10 @@\n+approvers:\n+ - elikatsis\n+ - kandrio98\n+ - kimwnasptd\n+ - quanjielin\n+reviewers:", + "comment_created_at": "2020-06-23T13:06:10+00:00", + "comment_author": "jlewi", + "comment_body": "approvers shouldn't be listed as approvers", + "pr_file_module": null + }, + { + "comment_id": "444210570", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 5088, + "pr_file": "components/tensorboard-controller/OWNERS", + "discussion_id": "444207757", + "commented_code": "@@ -0,0 +1,10 @@\n+approvers:\n+ - elikatsis\n+ - kandrio98\n+ - kimwnasptd\n+ - quanjielin\n+reviewers:", + "comment_created_at": "2020-06-23T13:10:28+00:00", + "comment_author": "kimwnasptd", + "comment_body": "I think I'm missing something here, where should approvers be listed?", + "pr_file_module": null + }, + { + "comment_id": "444519651", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 5088, + "pr_file": "components/tensorboard-controller/OWNERS", + "discussion_id": "444207757", + "commented_code": "@@ -0,0 +1,10 @@\n+approvers:\n+ - elikatsis\n+ - kandrio98\n+ - kimwnasptd\n+ - quanjielin\n+reviewers:", + "comment_created_at": "2020-06-23T21:29:42+00:00", + "comment_author": "jlewi", + "comment_body": "My bad typo; meant to say that approvers should be listed as reviewers.", + "pr_file_module": null + }, + { + "comment_id": "444720070", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 5088, + "pr_file": "components/tensorboard-controller/OWNERS", + "discussion_id": "444207757", + "commented_code": "@@ -0,0 +1,10 @@\n+approvers:\n+ - elikatsis\n+ - kandrio98\n+ - kimwnasptd\n+ - quanjielin\n+reviewers:", + "comment_created_at": "2020-06-24T08:10:41+00:00", + "comment_author": "kimwnasptd", + "comment_body": "I see. Which people should I assign as reviewers?", + "pr_file_module": null + }, + { + "comment_id": "446243147", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 5088, + "pr_file": "components/tensorboard-controller/OWNERS", + "discussion_id": "444207757", + "commented_code": "@@ -0,0 +1,10 @@\n+approvers:\n+ - elikatsis\n+ - kandrio98\n+ - kimwnasptd\n+ - quanjielin\n+reviewers:", + "comment_created_at": "2020-06-26T15:10:01+00:00", + "comment_author": "jlewi", + "comment_body": "@kimwnasptd You can just leave it blank if there is no one that makes sense to add as a reviewer.", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/kubeflow-structured-owners-files.md b/_reviewers/kubeflow-structured-owners-files.md index e0949e4..50518f8 100644 --- a/_reviewers/kubeflow-structured-owners-files.md +++ b/_reviewers/kubeflow-structured-owners-files.md @@ -29,133 +29,3 @@ reviewers: ``` If someone should be both an approver and a reviewer, only list them as an approver. This ensures clear documentation of component ownership and streamlines the review process. - - -[ - { - "discussion_id": "1215392226", - "pr_number": 6876, - "pr_file": "components/pvc-viewer/OWNERS", - "created_at": "2023-06-03T09:54:07+00:00", - "commented_code": "approvers:", - "repo_full_name": "kubeflow/kubeflow", - "discussion_comments": [ - { - "comment_id": "1215392226", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 6876, - "pr_file": "components/pvc-viewer/OWNERS", - "discussion_id": "1215392226", - "commented_code": "@@ -0,0 +1,7 @@\n+approvers:", - "comment_created_at": "2023-06-03T09:54:07+00:00", - "comment_author": "kimwnasptd", - "comment_body": "@TobiasGoerke why are you not an approver? :) \r\n\r\nWe want the OWNERS file to depict the folks that are driving the component. Considering that you, @apo-ger and I have worked on the proposal of this I'd create the OWNERS file like this:\r\n\r\n```yaml\r\napprovers:\r\n - apo-ger\r\n - kimwnasptd\r\n - TobiasGoerke\r\n```", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "465339868", - "pr_number": 5184, - "pr_file": "components/web-apps/OWNERS", - "created_at": "2020-08-04T21:26:35+00:00", - "commented_code": "approvers:\n - elikatsis\n - kimwnasptd\n - StefanoFioravanzo\nreviewers:\n - avdaredevil", - "repo_full_name": "kubeflow/kubeflow", - "discussion_comments": [ - { - "comment_id": "465339868", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 5184, - "pr_file": "components/web-apps/OWNERS", - "discussion_id": "465339868", - "commented_code": "@@ -0,0 +1,11 @@\n+approvers:\n+ - elikatsis\n+ - kimwnasptd\n+ - StefanoFioravanzo\n+reviewers:\n+ - avdaredevil", - "comment_created_at": "2020-08-04T21:26:35+00:00", - "comment_author": "jlewi", - "comment_body": "Have @avdaredevil and @prodonjs agreed to participate in this? \r\n\r\nDo not duplicate reviewers and approvers. Please refer to\r\nhttps://www.kubeflow.org/docs/about/contributing/#maintaining-owners-files", - "pr_file_module": null - }, - { - "comment_id": "465645242", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 5184, - "pr_file": "components/web-apps/OWNERS", - "discussion_id": "465339868", - "commented_code": "@@ -0,0 +1,11 @@\n+approvers:\n+ - elikatsis\n+ - kimwnasptd\n+ - StefanoFioravanzo\n+reviewers:\n+ - avdaredevil", - "comment_created_at": "2020-08-05T11:03:12+00:00", - "comment_author": "kimwnasptd", - "comment_body": "My bad, I just copied the reviewers from the jupyter web app.\r\nLets start with an empty list of reviewers for now", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "444207757", - "pr_number": 5088, - "pr_file": "components/tensorboard-controller/OWNERS", - "created_at": "2020-06-23T13:06:10+00:00", - "commented_code": "approvers:\n - elikatsis\n - kandrio98\n - kimwnasptd\n - quanjielin\nreviewers:", - "repo_full_name": "kubeflow/kubeflow", - "discussion_comments": [ - { - "comment_id": "444207757", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 5088, - "pr_file": "components/tensorboard-controller/OWNERS", - "discussion_id": "444207757", - "commented_code": "@@ -0,0 +1,10 @@\n+approvers:\n+ - elikatsis\n+ - kandrio98\n+ - kimwnasptd\n+ - quanjielin\n+reviewers:", - "comment_created_at": "2020-06-23T13:06:10+00:00", - "comment_author": "jlewi", - "comment_body": "approvers shouldn't be listed as approvers", - "pr_file_module": null - }, - { - "comment_id": "444210570", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 5088, - "pr_file": "components/tensorboard-controller/OWNERS", - "discussion_id": "444207757", - "commented_code": "@@ -0,0 +1,10 @@\n+approvers:\n+ - elikatsis\n+ - kandrio98\n+ - kimwnasptd\n+ - quanjielin\n+reviewers:", - "comment_created_at": "2020-06-23T13:10:28+00:00", - "comment_author": "kimwnasptd", - "comment_body": "I think I'm missing something here, where should approvers be listed?", - "pr_file_module": null - }, - { - "comment_id": "444519651", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 5088, - "pr_file": "components/tensorboard-controller/OWNERS", - "discussion_id": "444207757", - "commented_code": "@@ -0,0 +1,10 @@\n+approvers:\n+ - elikatsis\n+ - kandrio98\n+ - kimwnasptd\n+ - quanjielin\n+reviewers:", - "comment_created_at": "2020-06-23T21:29:42+00:00", - "comment_author": "jlewi", - "comment_body": "My bad typo; meant to say that approvers should be listed as reviewers.", - "pr_file_module": null - }, - { - "comment_id": "444720070", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 5088, - "pr_file": "components/tensorboard-controller/OWNERS", - "discussion_id": "444207757", - "commented_code": "@@ -0,0 +1,10 @@\n+approvers:\n+ - elikatsis\n+ - kandrio98\n+ - kimwnasptd\n+ - quanjielin\n+reviewers:", - "comment_created_at": "2020-06-24T08:10:41+00:00", - "comment_author": "kimwnasptd", - "comment_body": "I see. Which people should I assign as reviewers?", - "pr_file_module": null - }, - { - "comment_id": "446243147", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 5088, - "pr_file": "components/tensorboard-controller/OWNERS", - "discussion_id": "444207757", - "commented_code": "@@ -0,0 +1,10 @@\n+approvers:\n+ - elikatsis\n+ - kandrio98\n+ - kimwnasptd\n+ - quanjielin\n+reviewers:", - "comment_created_at": "2020-06-26T15:10:01+00:00", - "comment_author": "jlewi", - "comment_body": "@kimwnasptd You can just leave it blank if there is no one that makes sense to add as a reviewer.", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/kubeflow-type-appropriate-default-values.json b/_reviewers/kubeflow-type-appropriate-default-values.json new file mode 100644 index 0000000..15771ae --- /dev/null +++ b/_reviewers/kubeflow-type-appropriate-default-values.json @@ -0,0 +1,46 @@ +[ + { + "discussion_id": "1146054284", + "pr_number": 6952, + "pr_file": "components/crud-web-apps/jupyter/backend/apps/common/status.py", + "created_at": "2023-03-23T11:38:26+00:00", + "commented_code": "def process_status(notebook):\n \"\"\"\n Return status and reason. Status may be [running|waiting|warning|error]\n Return status and reason. Status may be [running|waiting|warning]\n \"\"\"\n # Check if the Notebook is stopped\n readyReplicas = notebook.get(\"status\", {}).get(\"readyReplicas\", 0)\n metadata = notebook.get(\"metadata\", {})\n annotations = metadata.get(\"annotations\", {})\n\n if STOP_ANNOTATION in annotations:\n if readyReplicas == 0:\n return status.create_status(\n status.STATUS_PHASE.STOPPED,\n \"No Pods are currently running for this Notebook Server.\",\n )\n else:\n return status.create_status(\n status.STATUS_PHASE.TERMINATING, \"Notebook Server is stopping.\"\n )\n creationTimestamp = notebook[\"metadata\"][\"creationTimestamp\"]\n\n # If the Notebook is being deleted, the status will be waiting\n if \"deletionTimestamp\" in metadata:\n return status.create_status(\n status.STATUS_PHASE.TERMINATING, \"Deleting this notebook server\"\n )\n\n # Check the status\n state = notebook.get(\"status\", {}).get(\"containerState\", \"\")\n\n # Use conditions on the Jupyter notebook (i.e., s) to determine overall\n # status. If no container state is available, we try to extract information\n # about why the notebook is not starting from the notebook's events\n # (see find_error_event)\n if readyReplicas == 1:\n return status.create_status(status.STATUS_PHASE.READY, \"Running\")\n\n if \"waiting\" in state:\n return status.create_status(\n status.STATUS_PHASE.WAITING, state[\"waiting\"][\"reason\"]\n )\n\n # Provide the user with detailed information (if any) about\n # why the notebook is not starting\n notebook_events = get_notebook_events(notebook)\n status_val, reason = status.STATUS_PHASE.WAITING, \"Scheduling the Pod\"\n status_event, reason_event = find_error_event(notebook_events)\n if status_event is not None:\n status_val, reason = status_event, reason_event\n\n return status.create_status(status_val, reason)\n # Convert a date string of a format to datetime object\n nb_creation_time = dt.datetime.strptime(\n creationTimestamp, \"%Y-%m-%dT%H:%M:%SZ\")\n current_time = dt.datetime.utcnow().replace(microsecond=0)\n delta = (current_time - nb_creation_time)\n\n while delta.total_seconds() <= 10:\n status_val, reason = status.STATUS_PHASE.WAITING, \"Waiting for StatefulSet to create the underlying Pod.\"\n\n # Check if the Notebook is stopped or deleted\n status_phase, status_message = check_stopped_deleted_nb(notebook)\n if status_phase is not None:\n status_val, reason = status_phase, status_message\n\n return status.create_status(status_val, reason)\n else:\n # Check if the Notebook is stopped or deleted\n status_phase, status_message = check_stopped_deleted_nb(notebook)\n if status_phase is not None:\n status_val, reason = status_phase, status_message\n return status.create_status(status_val, reason)\n\n # If the Notebook is running, the status will be ready\n if readyReplicas == 1:\n return status.create_status(status.STATUS_PHASE.READY, \"Running\")\n\n # Otherwise, first check .status.containerState to determine the status\n # reason and message to determine the status\n containerState = notebook.get(\"status\", {}).get(\"containerState\", \"\")\n\n if \"waiting\" in containerState:\n if containerState[\"waiting\"][\"reason\"] == 'PodInitializing':\n return status.create_status(\n status.STATUS_PHASE.WAITING, containerState[\"waiting\"][\"reason\"]\n )\n else:\n return status.create_status(\n status.STATUS_PHASE.WARNING, containerState[\"waiting\"][\"reason\"] +\n ': ' + containerState[\"waiting\"][\"message\"]\n )\n\n # If no containerState is available, check .status.conditions, since they have\n # a reason and a message to determine the status\n conditions = notebook.get(\"status\", {}).get(\"conditions\", \"\")", + "repo_full_name": "kubeflow/kubeflow", + "discussion_comments": [ + { + "comment_id": "1146054284", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 6952, + "pr_file": "components/crud-web-apps/jupyter/backend/apps/common/status.py", + "discussion_id": "1146054284", + "commented_code": "@@ -8,54 +8,75 @@\n \n def process_status(notebook):\n \"\"\"\n- Return status and reason. Status may be [running|waiting|warning|error]\n+ Return status and reason. Status may be [running|waiting|warning]\n \"\"\"\n- # Check if the Notebook is stopped\n readyReplicas = notebook.get(\"status\", {}).get(\"readyReplicas\", 0)\n- metadata = notebook.get(\"metadata\", {})\n- annotations = metadata.get(\"annotations\", {})\n-\n- if STOP_ANNOTATION in annotations:\n- if readyReplicas == 0:\n- return status.create_status(\n- status.STATUS_PHASE.STOPPED,\n- \"No Pods are currently running for this Notebook Server.\",\n- )\n- else:\n- return status.create_status(\n- status.STATUS_PHASE.TERMINATING, \"Notebook Server is stopping.\"\n- )\n+ creationTimestamp = notebook[\"metadata\"][\"creationTimestamp\"]\n \n- # If the Notebook is being deleted, the status will be waiting\n- if \"deletionTimestamp\" in metadata:\n- return status.create_status(\n- status.STATUS_PHASE.TERMINATING, \"Deleting this notebook server\"\n- )\n-\n- # Check the status\n- state = notebook.get(\"status\", {}).get(\"containerState\", \"\")\n-\n- # Use conditions on the Jupyter notebook (i.e., s) to determine overall\n- # status. If no container state is available, we try to extract information\n- # about why the notebook is not starting from the notebook's events\n- # (see find_error_event)\n- if readyReplicas == 1:\n- return status.create_status(status.STATUS_PHASE.READY, \"Running\")\n-\n- if \"waiting\" in state:\n- return status.create_status(\n- status.STATUS_PHASE.WAITING, state[\"waiting\"][\"reason\"]\n- )\n-\n- # Provide the user with detailed information (if any) about\n- # why the notebook is not starting\n- notebook_events = get_notebook_events(notebook)\n- status_val, reason = status.STATUS_PHASE.WAITING, \"Scheduling the Pod\"\n- status_event, reason_event = find_error_event(notebook_events)\n- if status_event is not None:\n- status_val, reason = status_event, reason_event\n-\n- return status.create_status(status_val, reason)\n+ # Convert a date string of a format to datetime object\n+ nb_creation_time = dt.datetime.strptime(\n+ creationTimestamp, \"%Y-%m-%dT%H:%M:%SZ\")\n+ current_time = dt.datetime.utcnow().replace(microsecond=0)\n+ delta = (current_time - nb_creation_time)\n+\n+ while delta.total_seconds() <= 10:\n+ status_val, reason = status.STATUS_PHASE.WAITING, \"Waiting for StatefulSet to create the underlying Pod.\"\n+\n+ # Check if the Notebook is stopped or deleted\n+ status_phase, status_message = check_stopped_deleted_nb(notebook)\n+ if status_phase is not None:\n+ status_val, reason = status_phase, status_message\n+\n+ return status.create_status(status_val, reason)\n+ else:\n+ # Check if the Notebook is stopped or deleted\n+ status_phase, status_message = check_stopped_deleted_nb(notebook)\n+ if status_phase is not None:\n+ status_val, reason = status_phase, status_message\n+ return status.create_status(status_val, reason)\n+\n+ # If the Notebook is running, the status will be ready\n+ if readyReplicas == 1:\n+ return status.create_status(status.STATUS_PHASE.READY, \"Running\")\n+\n+ # Otherwise, first check .status.containerState to determine the status\n+ # reason and message to determine the status\n+ containerState = notebook.get(\"status\", {}).get(\"containerState\", \"\")\n+\n+ if \"waiting\" in containerState:\n+ if containerState[\"waiting\"][\"reason\"] == 'PodInitializing':\n+ return status.create_status(\n+ status.STATUS_PHASE.WAITING, containerState[\"waiting\"][\"reason\"]\n+ )\n+ else:\n+ return status.create_status(\n+ status.STATUS_PHASE.WARNING, containerState[\"waiting\"][\"reason\"] +\n+ ': ' + containerState[\"waiting\"][\"message\"]\n+ )\n+\n+ # If no containerState is available, check .status.conditions, since they have\n+ # a reason and a message to determine the status\n+ conditions = notebook.get(\"status\", {}).get(\"conditions\", \"\")", + "comment_created_at": "2023-03-23T11:38:26+00:00", + "comment_author": "kimwnasptd", + "comment_body": "In this case the default value for the conditions should be a `[]`, instead of `\"\"` since this var is a list", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "265194749", + "pr_number": 2679, + "pr_file": "components/jupyter-web-app/default/kubeflow/jupyter/server.py", + "created_at": "2019-03-13T15:43:06+00:00", + "commented_code": "return err\n\n\ndef create_workspace_pvc(body):\n \"\"\"If the type is New, then create a new PVC, else use an existing one\"\"\"\n if body[\"ws_type\"] == \"New\":\n pvc = client.V1PersistentVolumeClaim(\n metadata=client.V1ObjectMeta(\n name=body['ws_name'],\n namespace=body['ns']\n ),\n spec=client.V1PersistentVolumeClaimSpec(\n access_modes=[body['ws_access_modes']],\n resources=client.V1ResourceRequirements(\n requests={\n 'storage': body['ws_size'] + 'Gi'\n }\n )\n )\n )\n\n create_pvc(pvc)\n\n return\n\n\ndef create_datavol_pvc(body, i):", + "repo_full_name": "kubeflow/kubeflow", + "discussion_comments": [ + { + "comment_id": "265194749", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 2679, + "pr_file": "components/jupyter-web-app/default/kubeflow/jupyter/server.py", + "discussion_id": "265194749", + "commented_code": "@@ -26,10 +30,79 @@ def parse_error(e):\n return err\n \n \n+def create_workspace_pvc(body):\n+ \"\"\"If the type is New, then create a new PVC, else use an existing one\"\"\"\n+ if body[\"ws_type\"] == \"New\":\n+ pvc = client.V1PersistentVolumeClaim(\n+ metadata=client.V1ObjectMeta(\n+ name=body['ws_name'],\n+ namespace=body['ns']\n+ ),\n+ spec=client.V1PersistentVolumeClaimSpec(\n+ access_modes=[body['ws_access_modes']],\n+ resources=client.V1ResourceRequirements(\n+ requests={\n+ 'storage': body['ws_size'] + 'Gi'\n+ }\n+ )\n+ )\n+ )\n+\n+ create_pvc(pvc)\n+\n+ return\n+\n+\n+def create_datavol_pvc(body, i):", + "comment_created_at": "2019-03-13T15:43:06+00:00", + "comment_author": "prodonjs", + "comment_body": "Ideally I think there would be some kind of definition for the shape of the body dict being passed in, but since I have almost no context of looking at this I'm not going to be picky.", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/kubeflow-type-appropriate-default-values.md b/_reviewers/kubeflow-type-appropriate-default-values.md index 76d000e..debd013 100644 --- a/_reviewers/kubeflow-type-appropriate-default-values.md +++ b/_reviewers/kubeflow-type-appropriate-default-values.md @@ -31,51 +31,3 @@ if some_condition: ``` For complex data structures like dictionaries passed as parameters, consider adding type hints or documentation that clearly defines the expected structure, including which fields are optional vs. required. This helps prevent null reference errors when accessing dictionary keys. - - -[ - { - "discussion_id": "1146054284", - "pr_number": 6952, - "pr_file": "components/crud-web-apps/jupyter/backend/apps/common/status.py", - "created_at": "2023-03-23T11:38:26+00:00", - "commented_code": "def process_status(notebook):\n \"\"\"\n Return status and reason. Status may be [running|waiting|warning|error]\n Return status and reason. Status may be [running|waiting|warning]\n \"\"\"\n # Check if the Notebook is stopped\n readyReplicas = notebook.get(\"status\", {}).get(\"readyReplicas\", 0)\n metadata = notebook.get(\"metadata\", {})\n annotations = metadata.get(\"annotations\", {})\n\n if STOP_ANNOTATION in annotations:\n if readyReplicas == 0:\n return status.create_status(\n status.STATUS_PHASE.STOPPED,\n \"No Pods are currently running for this Notebook Server.\",\n )\n else:\n return status.create_status(\n status.STATUS_PHASE.TERMINATING, \"Notebook Server is stopping.\"\n )\n creationTimestamp = notebook[\"metadata\"][\"creationTimestamp\"]\n\n # If the Notebook is being deleted, the status will be waiting\n if \"deletionTimestamp\" in metadata:\n return status.create_status(\n status.STATUS_PHASE.TERMINATING, \"Deleting this notebook server\"\n )\n\n # Check the status\n state = notebook.get(\"status\", {}).get(\"containerState\", \"\")\n\n # Use conditions on the Jupyter notebook (i.e., s) to determine overall\n # status. If no container state is available, we try to extract information\n # about why the notebook is not starting from the notebook's events\n # (see find_error_event)\n if readyReplicas == 1:\n return status.create_status(status.STATUS_PHASE.READY, \"Running\")\n\n if \"waiting\" in state:\n return status.create_status(\n status.STATUS_PHASE.WAITING, state[\"waiting\"][\"reason\"]\n )\n\n # Provide the user with detailed information (if any) about\n # why the notebook is not starting\n notebook_events = get_notebook_events(notebook)\n status_val, reason = status.STATUS_PHASE.WAITING, \"Scheduling the Pod\"\n status_event, reason_event = find_error_event(notebook_events)\n if status_event is not None:\n status_val, reason = status_event, reason_event\n\n return status.create_status(status_val, reason)\n # Convert a date string of a format to datetime object\n nb_creation_time = dt.datetime.strptime(\n creationTimestamp, \"%Y-%m-%dT%H:%M:%SZ\")\n current_time = dt.datetime.utcnow().replace(microsecond=0)\n delta = (current_time - nb_creation_time)\n\n while delta.total_seconds() <= 10:\n status_val, reason = status.STATUS_PHASE.WAITING, \"Waiting for StatefulSet to create the underlying Pod.\"\n\n # Check if the Notebook is stopped or deleted\n status_phase, status_message = check_stopped_deleted_nb(notebook)\n if status_phase is not None:\n status_val, reason = status_phase, status_message\n\n return status.create_status(status_val, reason)\n else:\n # Check if the Notebook is stopped or deleted\n status_phase, status_message = check_stopped_deleted_nb(notebook)\n if status_phase is not None:\n status_val, reason = status_phase, status_message\n return status.create_status(status_val, reason)\n\n # If the Notebook is running, the status will be ready\n if readyReplicas == 1:\n return status.create_status(status.STATUS_PHASE.READY, \"Running\")\n\n # Otherwise, first check .status.containerState to determine the status\n # reason and message to determine the status\n containerState = notebook.get(\"status\", {}).get(\"containerState\", \"\")\n\n if \"waiting\" in containerState:\n if containerState[\"waiting\"][\"reason\"] == 'PodInitializing':\n return status.create_status(\n status.STATUS_PHASE.WAITING, containerState[\"waiting\"][\"reason\"]\n )\n else:\n return status.create_status(\n status.STATUS_PHASE.WARNING, containerState[\"waiting\"][\"reason\"] +\n ': ' + containerState[\"waiting\"][\"message\"]\n )\n\n # If no containerState is available, check .status.conditions, since they have\n # a reason and a message to determine the status\n conditions = notebook.get(\"status\", {}).get(\"conditions\", \"\")", - "repo_full_name": "kubeflow/kubeflow", - "discussion_comments": [ - { - "comment_id": "1146054284", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 6952, - "pr_file": "components/crud-web-apps/jupyter/backend/apps/common/status.py", - "discussion_id": "1146054284", - "commented_code": "@@ -8,54 +8,75 @@\n \n def process_status(notebook):\n \"\"\"\n- Return status and reason. Status may be [running|waiting|warning|error]\n+ Return status and reason. Status may be [running|waiting|warning]\n \"\"\"\n- # Check if the Notebook is stopped\n readyReplicas = notebook.get(\"status\", {}).get(\"readyReplicas\", 0)\n- metadata = notebook.get(\"metadata\", {})\n- annotations = metadata.get(\"annotations\", {})\n-\n- if STOP_ANNOTATION in annotations:\n- if readyReplicas == 0:\n- return status.create_status(\n- status.STATUS_PHASE.STOPPED,\n- \"No Pods are currently running for this Notebook Server.\",\n- )\n- else:\n- return status.create_status(\n- status.STATUS_PHASE.TERMINATING, \"Notebook Server is stopping.\"\n- )\n+ creationTimestamp = notebook[\"metadata\"][\"creationTimestamp\"]\n \n- # If the Notebook is being deleted, the status will be waiting\n- if \"deletionTimestamp\" in metadata:\n- return status.create_status(\n- status.STATUS_PHASE.TERMINATING, \"Deleting this notebook server\"\n- )\n-\n- # Check the status\n- state = notebook.get(\"status\", {}).get(\"containerState\", \"\")\n-\n- # Use conditions on the Jupyter notebook (i.e., s) to determine overall\n- # status. If no container state is available, we try to extract information\n- # about why the notebook is not starting from the notebook's events\n- # (see find_error_event)\n- if readyReplicas == 1:\n- return status.create_status(status.STATUS_PHASE.READY, \"Running\")\n-\n- if \"waiting\" in state:\n- return status.create_status(\n- status.STATUS_PHASE.WAITING, state[\"waiting\"][\"reason\"]\n- )\n-\n- # Provide the user with detailed information (if any) about\n- # why the notebook is not starting\n- notebook_events = get_notebook_events(notebook)\n- status_val, reason = status.STATUS_PHASE.WAITING, \"Scheduling the Pod\"\n- status_event, reason_event = find_error_event(notebook_events)\n- if status_event is not None:\n- status_val, reason = status_event, reason_event\n-\n- return status.create_status(status_val, reason)\n+ # Convert a date string of a format to datetime object\n+ nb_creation_time = dt.datetime.strptime(\n+ creationTimestamp, \"%Y-%m-%dT%H:%M:%SZ\")\n+ current_time = dt.datetime.utcnow().replace(microsecond=0)\n+ delta = (current_time - nb_creation_time)\n+\n+ while delta.total_seconds() <= 10:\n+ status_val, reason = status.STATUS_PHASE.WAITING, \"Waiting for StatefulSet to create the underlying Pod.\"\n+\n+ # Check if the Notebook is stopped or deleted\n+ status_phase, status_message = check_stopped_deleted_nb(notebook)\n+ if status_phase is not None:\n+ status_val, reason = status_phase, status_message\n+\n+ return status.create_status(status_val, reason)\n+ else:\n+ # Check if the Notebook is stopped or deleted\n+ status_phase, status_message = check_stopped_deleted_nb(notebook)\n+ if status_phase is not None:\n+ status_val, reason = status_phase, status_message\n+ return status.create_status(status_val, reason)\n+\n+ # If the Notebook is running, the status will be ready\n+ if readyReplicas == 1:\n+ return status.create_status(status.STATUS_PHASE.READY, \"Running\")\n+\n+ # Otherwise, first check .status.containerState to determine the status\n+ # reason and message to determine the status\n+ containerState = notebook.get(\"status\", {}).get(\"containerState\", \"\")\n+\n+ if \"waiting\" in containerState:\n+ if containerState[\"waiting\"][\"reason\"] == 'PodInitializing':\n+ return status.create_status(\n+ status.STATUS_PHASE.WAITING, containerState[\"waiting\"][\"reason\"]\n+ )\n+ else:\n+ return status.create_status(\n+ status.STATUS_PHASE.WARNING, containerState[\"waiting\"][\"reason\"] +\n+ ': ' + containerState[\"waiting\"][\"message\"]\n+ )\n+\n+ # If no containerState is available, check .status.conditions, since they have\n+ # a reason and a message to determine the status\n+ conditions = notebook.get(\"status\", {}).get(\"conditions\", \"\")", - "comment_created_at": "2023-03-23T11:38:26+00:00", - "comment_author": "kimwnasptd", - "comment_body": "In this case the default value for the conditions should be a `[]`, instead of `\"\"` since this var is a list", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "265194749", - "pr_number": 2679, - "pr_file": "components/jupyter-web-app/default/kubeflow/jupyter/server.py", - "created_at": "2019-03-13T15:43:06+00:00", - "commented_code": "return err\n\n\ndef create_workspace_pvc(body):\n \"\"\"If the type is New, then create a new PVC, else use an existing one\"\"\"\n if body[\"ws_type\"] == \"New\":\n pvc = client.V1PersistentVolumeClaim(\n metadata=client.V1ObjectMeta(\n name=body['ws_name'],\n namespace=body['ns']\n ),\n spec=client.V1PersistentVolumeClaimSpec(\n access_modes=[body['ws_access_modes']],\n resources=client.V1ResourceRequirements(\n requests={\n 'storage': body['ws_size'] + 'Gi'\n }\n )\n )\n )\n\n create_pvc(pvc)\n\n return\n\n\ndef create_datavol_pvc(body, i):", - "repo_full_name": "kubeflow/kubeflow", - "discussion_comments": [ - { - "comment_id": "265194749", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 2679, - "pr_file": "components/jupyter-web-app/default/kubeflow/jupyter/server.py", - "discussion_id": "265194749", - "commented_code": "@@ -26,10 +30,79 @@ def parse_error(e):\n return err\n \n \n+def create_workspace_pvc(body):\n+ \"\"\"If the type is New, then create a new PVC, else use an existing one\"\"\"\n+ if body[\"ws_type\"] == \"New\":\n+ pvc = client.V1PersistentVolumeClaim(\n+ metadata=client.V1ObjectMeta(\n+ name=body['ws_name'],\n+ namespace=body['ns']\n+ ),\n+ spec=client.V1PersistentVolumeClaimSpec(\n+ access_modes=[body['ws_access_modes']],\n+ resources=client.V1ResourceRequirements(\n+ requests={\n+ 'storage': body['ws_size'] + 'Gi'\n+ }\n+ )\n+ )\n+ )\n+\n+ create_pvc(pvc)\n+\n+ return\n+\n+\n+def create_datavol_pvc(body, i):", - "comment_created_at": "2019-03-13T15:43:06+00:00", - "comment_author": "prodonjs", - "comment_body": "Ideally I think there would be some kind of definition for the shape of the body dict being passed in, but since I have almost no context of looking at this I'm not going to be picky.", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/kubeflow-unique-workflow-step-names.json b/_reviewers/kubeflow-unique-workflow-step-names.json new file mode 100644 index 0000000..3a5ddf1 --- /dev/null +++ b/_reviewers/kubeflow-unique-workflow-step-names.json @@ -0,0 +1,286 @@ +[ + { + "discussion_id": "593307884", + "pr_number": 5631, + "pr_file": "py/kubeflow/kubeflow/ci/workflow_utils.py", + "created_at": "2021-03-12T16:38:21+00:00", + "commented_code": "extend the code.\n \"\"\"\n kaniko = argo_build_util.deep_copy(task_template)\n # for shurt UUID generation\n alphabet = string.ascii_lowercase + string.digits\n\n # append the tag base-commit[0:7]\n if \":\" not in destination:\n sha = os.getenv(\"PULL_BASE_SHA\", \"12341234kanikotest\")\n base = os.getenv(\"PULL_BASE_REF\", \"master\")\n destination += \":%s-%s\" % (base, sha[0:8])\n\n kaniko[\"name\"] = \"kaniko-build-push\"\n # add short UUID to step name to ensure it is unique\n kaniko[\"name\"] = \"kaniko-build-push-\" + ''.join(random.choices(alphabet, k=8))", + "repo_full_name": "kubeflow/kubeflow", + "discussion_comments": [ + { + "comment_id": "593307884", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 5631, + "pr_file": "py/kubeflow/kubeflow/ci/workflow_utils.py", + "discussion_id": "593307884", + "commented_code": "@@ -200,14 +202,17 @@ def create_kaniko_task(self, task_template, dockerfile, context,\n extend the code.\n \"\"\"\n kaniko = argo_build_util.deep_copy(task_template)\n+ # for shurt UUID generation\n+ alphabet = string.ascii_lowercase + string.digits\n \n # append the tag base-commit[0:7]\n if \":\" not in destination:\n sha = os.getenv(\"PULL_BASE_SHA\", \"12341234kanikotest\")\n base = os.getenv(\"PULL_BASE_REF\", \"master\")\n destination += \":%s-%s\" % (base, sha[0:8])\n \n- kaniko[\"name\"] = \"kaniko-build-push\"\n+ # add short UUID to step name to ensure it is unique\n+ kaniko[\"name\"] = \"kaniko-build-push-\" + ''.join(random.choices(alphabet, k=8))", + "comment_created_at": "2021-03-12T16:38:21+00:00", + "comment_author": "kimwnasptd", + "comment_body": "Could you provide some more details as to why this change is needed?", + "pr_file_module": null + }, + { + "comment_id": "593309537", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 5631, + "pr_file": "py/kubeflow/kubeflow/ci/workflow_utils.py", + "discussion_id": "593307884", + "commented_code": "@@ -200,14 +202,17 @@ def create_kaniko_task(self, task_template, dockerfile, context,\n extend the code.\n \"\"\"\n kaniko = argo_build_util.deep_copy(task_template)\n+ # for shurt UUID generation\n+ alphabet = string.ascii_lowercase + string.digits\n \n # append the tag base-commit[0:7]\n if \":\" not in destination:\n sha = os.getenv(\"PULL_BASE_SHA\", \"12341234kanikotest\")\n base = os.getenv(\"PULL_BASE_REF\", \"master\")\n destination += \":%s-%s\" % (base, sha[0:8])\n \n- kaniko[\"name\"] = \"kaniko-build-push\"\n+ # add short UUID to step name to ensure it is unique\n+ kaniko[\"name\"] = \"kaniko-build-push-\" + ''.join(random.choices(alphabet, k=8))", + "comment_created_at": "2021-03-12T16:40:39+00:00", + "comment_author": "davidspek", + "comment_body": "Otherwise there are 2 steps in the workflow (this workflow builds 2 images) that are both named `kaniko-build-push`. I wasn't sure if it would like that so I tested it and Argo does not :). ", + "pr_file_module": null + }, + { + "comment_id": "593311545", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 5631, + "pr_file": "py/kubeflow/kubeflow/ci/workflow_utils.py", + "discussion_id": "593307884", + "commented_code": "@@ -200,14 +202,17 @@ def create_kaniko_task(self, task_template, dockerfile, context,\n extend the code.\n \"\"\"\n kaniko = argo_build_util.deep_copy(task_template)\n+ # for shurt UUID generation\n+ alphabet = string.ascii_lowercase + string.digits\n \n # append the tag base-commit[0:7]\n if \":\" not in destination:\n sha = os.getenv(\"PULL_BASE_SHA\", \"12341234kanikotest\")\n base = os.getenv(\"PULL_BASE_REF\", \"master\")\n destination += \":%s-%s\" % (base, sha[0:8])\n \n- kaniko[\"name\"] = \"kaniko-build-push\"\n+ # add short UUID to step name to ensure it is unique\n+ kaniko[\"name\"] = \"kaniko-build-push-\" + ''.join(random.choices(alphabet, k=8))", + "comment_created_at": "2021-03-12T16:43:46+00:00", + "comment_author": "davidspek", + "comment_body": "The same thing is done here BTW: https://github.com/kubeflow/kubeflow/pull/5626/files#diff-a58831ea15b3ba92fe93b2196dc2867189dc87c3836c617833dbdf652ad22fd0R205", + "pr_file_module": null + }, + { + "comment_id": "593312071", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 5631, + "pr_file": "py/kubeflow/kubeflow/ci/workflow_utils.py", + "discussion_id": "593307884", + "commented_code": "@@ -200,14 +202,17 @@ def create_kaniko_task(self, task_template, dockerfile, context,\n extend the code.\n \"\"\"\n kaniko = argo_build_util.deep_copy(task_template)\n+ # for shurt UUID generation\n+ alphabet = string.ascii_lowercase + string.digits\n \n # append the tag base-commit[0:7]\n if \":\" not in destination:\n sha = os.getenv(\"PULL_BASE_SHA\", \"12341234kanikotest\")\n base = os.getenv(\"PULL_BASE_REF\", \"master\")\n destination += \":%s-%s\" % (base, sha[0:8])\n \n- kaniko[\"name\"] = \"kaniko-build-push\"\n+ # add short UUID to step name to ensure it is unique\n+ kaniko[\"name\"] = \"kaniko-build-push-\" + ''.join(random.choices(alphabet, k=8))", + "comment_created_at": "2021-03-12T16:44:34+00:00", + "comment_author": "davidspek", + "comment_body": "Here you can see what the workflow I just linked looks like when it runs. \r\nhttps://argo.kubeflow-testing.com/workflows/kubeflow-test-infra/kubeflow-kubeflow-presubmit-nb-j-pt-5626-5feb726-6224-63fc?tab=workflow&nodeId=kubeflow-kubeflow-presubmit-nb-j-pt-5626-5feb726-6224-63fc-2134638194", + "pr_file_module": null + }, + { + "comment_id": "593312328", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 5631, + "pr_file": "py/kubeflow/kubeflow/ci/workflow_utils.py", + "discussion_id": "593307884", + "commented_code": "@@ -200,14 +202,17 @@ def create_kaniko_task(self, task_template, dockerfile, context,\n extend the code.\n \"\"\"\n kaniko = argo_build_util.deep_copy(task_template)\n+ # for shurt UUID generation\n+ alphabet = string.ascii_lowercase + string.digits\n \n # append the tag base-commit[0:7]\n if \":\" not in destination:\n sha = os.getenv(\"PULL_BASE_SHA\", \"12341234kanikotest\")\n base = os.getenv(\"PULL_BASE_REF\", \"master\")\n destination += \":%s-%s\" % (base, sha[0:8])\n \n- kaniko[\"name\"] = \"kaniko-build-push\"\n+ # add short UUID to step name to ensure it is unique\n+ kaniko[\"name\"] = \"kaniko-build-push-\" + ''.join(random.choices(alphabet, k=8))", + "comment_created_at": "2021-03-12T16:44:55+00:00", + "comment_author": "kimwnasptd", + "comment_body": "Makes sense", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "588900578", + "pr_number": 5640, + "pr_file": "py/kubeflow/kubeflow/ci/access_management_tests.py", + "created_at": "2021-03-06T16:23:14+00:00", + "commented_code": "\"\"\"\"Argo Workflow for testing Access Management's OCI image using Kaniko\"\"\"", + "repo_full_name": "kubeflow/kubeflow", + "discussion_comments": [ + { + "comment_id": "588900578", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 5640, + "pr_file": "py/kubeflow/kubeflow/ci/access_management_tests.py", + "discussion_id": "588900578", + "commented_code": "@@ -0,0 +1,43 @@\n+\"\"\"\"Argo Workflow for testing Access Management's OCI image using Kaniko\"\"\"", + "comment_created_at": "2021-03-06T16:23:14+00:00", + "comment_author": "PatrickXYS", + "comment_body": "Same herer: \r\n\r\nI feel like you can abstract this Builder python file given you have multiple almost same file here, add it in e.g, py/kubeflow/kubeflow/ci/base_builder.py\r\n\r\nAnd for each Argo Workflow, just reference it should be fine", + "pr_file_module": null + }, + { + "comment_id": "589016392", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 5640, + "pr_file": "py/kubeflow/kubeflow/ci/access_management_tests.py", + "discussion_id": "588900578", + "commented_code": "@@ -0,0 +1,43 @@\n+\"\"\"\"Argo Workflow for testing Access Management's OCI image using Kaniko\"\"\"", + "comment_created_at": "2021-03-07T11:37:14+00:00", + "comment_author": "davidspek", + "comment_body": "I'm not sure it makes sense for these CI files, as I am assuming they will be extended with more tests similar to https://github.com/kubeflow/kubeflow/blob/master/py/kubeflow/kubeflow/ci/common_ui_tests.py", + "pr_file_module": null + }, + { + "comment_id": "589060586", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 5640, + "pr_file": "py/kubeflow/kubeflow/ci/access_management_tests.py", + "discussion_id": "588900578", + "commented_code": "@@ -0,0 +1,43 @@\n+\"\"\"\"Argo Workflow for testing Access Management's OCI image using Kaniko\"\"\"", + "comment_created_at": "2021-03-07T17:17:58+00:00", + "comment_author": "PatrickXYS", + "comment_body": "For extensibility, I would suggest using parent class as base, and child class for patch.", + "pr_file_module": null + }, + { + "comment_id": "589383241", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 5640, + "pr_file": "py/kubeflow/kubeflow/ci/access_management_tests.py", + "discussion_id": "588900578", + "commented_code": "@@ -0,0 +1,43 @@\n+\"\"\"\"Argo Workflow for testing Access Management's OCI image using Kaniko\"\"\"", + "comment_created_at": "2021-03-08T12:24:27+00:00", + "comment_author": "davidspek", + "comment_body": "@kimwnasptd What is your thought on this?", + "pr_file_module": null + }, + { + "comment_id": "589383886", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 5640, + "pr_file": "py/kubeflow/kubeflow/ci/access_management_tests.py", + "discussion_id": "588900578", + "commented_code": "@@ -0,0 +1,43 @@\n+\"\"\"\"Argo Workflow for testing Access Management's OCI image using Kaniko\"\"\"", + "comment_created_at": "2021-03-08T12:25:34+00:00", + "comment_author": "kimwnasptd", + "comment_body": "@PatrickXYS to make sure I'm synced with the proposed changes; the argument is to create a common base class for building a workflow that always has a step for kaniko-build, with `--no-push`, since we essentially use it in all of the component CI files?", + "pr_file_module": null + }, + { + "comment_id": "589518357", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 5640, + "pr_file": "py/kubeflow/kubeflow/ci/access_management_tests.py", + "discussion_id": "588900578", + "commented_code": "@@ -0,0 +1,43 @@\n+\"\"\"\"Argo Workflow for testing Access Management's OCI image using Kaniko\"\"\"", + "comment_created_at": "2021-03-08T15:38:17+00:00", + "comment_author": "davidspek", + "comment_body": "@kimwnasptd I'm also not completely clear what the proposed implementation would look like. Either way, I'm not sure if this is a blocker as it testing if images can be built seems like something that should be added sooner rather than later. This way we would have discovered that the admission-webhook was broken before https://github.com/kubeflow/kubeflow/pull/5661.\r\nAlso, https://github.com/kubeflow/kubeflow/issues/5482#issuecomment-791906596 still needs to be resolved. Should the registry be called `access-management` or should it be `kfam`?", + "pr_file_module": null + }, + { + "comment_id": "589576038", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 5640, + "pr_file": "py/kubeflow/kubeflow/ci/access_management_tests.py", + "discussion_id": "588900578", + "commented_code": "@@ -0,0 +1,43 @@\n+\"\"\"\"Argo Workflow for testing Access Management's OCI image using Kaniko\"\"\"", + "comment_created_at": "2021-03-08T16:46:05+00:00", + "comment_author": "PatrickXYS", + "comment_body": "This is only NIT change, if we feel it's urgent, feel free to move forward", + "pr_file_module": null + }, + { + "comment_id": "589721942", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 5640, + "pr_file": "py/kubeflow/kubeflow/ci/access_management_tests.py", + "discussion_id": "588900578", + "commented_code": "@@ -0,0 +1,43 @@\n+\"\"\"\"Argo Workflow for testing Access Management's OCI image using Kaniko\"\"\"", + "comment_created_at": "2021-03-08T20:21:44+00:00", + "comment_author": "davidspek", + "comment_body": "I think this can be dealt with in the future when the tests for each component get expanded. For now, I think it is most important images start getting built and the PRs that are being merged for the release are at least tested if they break the image building. ", + "pr_file_module": null + }, + { + "comment_id": "590290661", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 5640, + "pr_file": "py/kubeflow/kubeflow/ci/access_management_tests.py", + "discussion_id": "588900578", + "commented_code": "@@ -0,0 +1,43 @@\n+\"\"\"\"Argo Workflow for testing Access Management's OCI image using Kaniko\"\"\"", + "comment_created_at": "2021-03-09T12:00:00+00:00", + "comment_author": "kimwnasptd", + "comment_body": "Regarding the changes for using a different class I agree with @DavidSpek that this isn't a blocker, so let's go with this implementation for now and if in the future we find ourselves repeating too much code then lets go over this argument.\r\nDoes this sound OK to you @PatrickXYS ?\r\n", + "pr_file_module": null + }, + { + "comment_id": "590291700", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 5640, + "pr_file": "py/kubeflow/kubeflow/ci/access_management_tests.py", + "discussion_id": "588900578", + "commented_code": "@@ -0,0 +1,43 @@\n+\"\"\"\"Argo Workflow for testing Access Management's OCI image using Kaniko\"\"\"", + "comment_created_at": "2021-03-09T12:01:03+00:00", + "comment_author": "kimwnasptd", + "comment_body": "> Should the registry be called access-management or should it be kfam?\r\n\r\n@yanniszark what are your thoughts on this? I guess we should stick to `access-management` for now?\r\n\r\nI'll also give a heads up to our tracking issue to add this registry as well ", + "pr_file_module": null + }, + { + "comment_id": "590304574", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 5640, + "pr_file": "py/kubeflow/kubeflow/ci/access_management_tests.py", + "discussion_id": "588900578", + "commented_code": "@@ -0,0 +1,43 @@\n+\"\"\"\"Argo Workflow for testing Access Management's OCI image using Kaniko\"\"\"", + "comment_created_at": "2021-03-09T12:15:29+00:00", + "comment_author": "davidspek", + "comment_body": "@kimwnasptd I actually just ran into an issue where using the common `kaniko_builder.py` actually complicates things slightly. For this situation specifically it is easily fixed, however; for the CI testing I think it could just complicate things with little added value. ", + "pr_file_module": null + }, + { + "comment_id": "590362886", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 5640, + "pr_file": "py/kubeflow/kubeflow/ci/access_management_tests.py", + "discussion_id": "588900578", + "commented_code": "@@ -0,0 +1,43 @@\n+\"\"\"\"Argo Workflow for testing Access Management's OCI image using Kaniko\"\"\"", + "comment_created_at": "2021-03-09T13:21:39+00:00", + "comment_author": "kimwnasptd", + "comment_body": "> @kimwnasptd I actually just ran into an issue where using the common kaniko_builder.py actually complicates things slightly. For this situation specifically it is easily fixed\r\n\r\nCould you provide some more details for this? Just so that we can identify the pattern/pain-point once we see it reoccurring ", + "pr_file_module": null + }, + { + "comment_id": "590369432", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 5640, + "pr_file": "py/kubeflow/kubeflow/ci/access_management_tests.py", + "discussion_id": "588900578", + "commented_code": "@@ -0,0 +1,43 @@\n+\"\"\"\"Argo Workflow for testing Access Management's OCI image using Kaniko\"\"\"", + "comment_created_at": "2021-03-09T13:30:34+00:00", + "comment_author": "davidspek", + "comment_body": "I need to build 2 dockerfiles that exist in the same folder. For that, I just added a `second_dockerfile` and `second_destination` to the build function in `kaniko_builder.py`. I also added a small UUID tag for the task name in the workflow in the `create_kaniko_task` function in `workflow_utils.py` so that each kaniko building step has a unique name in the workflow. So for building the images I don't think it is a big deal at this point. ", + "pr_file_module": null + }, + { + "comment_id": "590413497", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 5640, + "pr_file": "py/kubeflow/kubeflow/ci/access_management_tests.py", + "discussion_id": "588900578", + "commented_code": "@@ -0,0 +1,43 @@\n+\"\"\"\"Argo Workflow for testing Access Management's OCI image using Kaniko\"\"\"", + "comment_created_at": "2021-03-09T14:24:57+00:00", + "comment_author": "yanniszark", + "comment_body": "> I guess we should stick to access-management for now?\r\n\r\n@kimwnasptd I would vote for `kfam`, as it is what we usually call the component. What do you think?", + "pr_file_module": null + }, + { + "comment_id": "590416936", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 5640, + "pr_file": "py/kubeflow/kubeflow/ci/access_management_tests.py", + "discussion_id": "588900578", + "commented_code": "@@ -0,0 +1,43 @@\n+\"\"\"\"Argo Workflow for testing Access Management's OCI image using Kaniko\"\"\"", + "comment_created_at": "2021-03-09T14:28:41+00:00", + "comment_author": "davidspek", + "comment_body": "I think it's better to be consistent in the naming scheme and use the folder name just like every other component. So `access-management`. ", + "pr_file_module": null + }, + { + "comment_id": "590420959", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 5640, + "pr_file": "py/kubeflow/kubeflow/ci/access_management_tests.py", + "discussion_id": "588900578", + "commented_code": "@@ -0,0 +1,43 @@\n+\"\"\"\"Argo Workflow for testing Access Management's OCI image using Kaniko\"\"\"", + "comment_created_at": "2021-03-09T14:33:12+00:00", + "comment_author": "thesuperzapper", + "comment_body": "Also kfam actually stands for Kubeflow Access Management (just like we don't call it kubeflow-pipelines, but kubeflow/pipelines, I think we can leave it as `access-management`)", + "pr_file_module": null + }, + { + "comment_id": "590421528", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 5640, + "pr_file": "py/kubeflow/kubeflow/ci/access_management_tests.py", + "discussion_id": "588900578", + "commented_code": "@@ -0,0 +1,43 @@\n+\"\"\"\"Argo Workflow for testing Access Management's OCI image using Kaniko\"\"\"", + "comment_created_at": "2021-03-09T14:33:52+00:00", + "comment_author": "davidspek", + "comment_body": "@yanniszark @kimwnasptd I think this discussion should move to https://github.com/kubeflow/kubeflow/issues/5675. Then this PR can be merged and I will update all the registry naming in a new PR depending on the outcome of https://github.com/kubeflow/kubeflow/issues/5675.", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/kubeflow-unique-workflow-step-names.md b/_reviewers/kubeflow-unique-workflow-step-names.md index 174b192..e939035 100644 --- a/_reviewers/kubeflow-unique-workflow-step-names.md +++ b/_reviewers/kubeflow-unique-workflow-step-names.md @@ -32,291 +32,3 @@ task["name"] = "base-task-name-" + ''.join(random.choices(alphabet, k=8)) ``` This approach ensures each workflow step has a unique identifier while maintaining readability with a consistent prefix, allowing for reliable execution and easier debugging of CI/CD pipelines. - - -[ - { - "discussion_id": "593307884", - "pr_number": 5631, - "pr_file": "py/kubeflow/kubeflow/ci/workflow_utils.py", - "created_at": "2021-03-12T16:38:21+00:00", - "commented_code": "extend the code.\n \"\"\"\n kaniko = argo_build_util.deep_copy(task_template)\n # for shurt UUID generation\n alphabet = string.ascii_lowercase + string.digits\n\n # append the tag base-commit[0:7]\n if \":\" not in destination:\n sha = os.getenv(\"PULL_BASE_SHA\", \"12341234kanikotest\")\n base = os.getenv(\"PULL_BASE_REF\", \"master\")\n destination += \":%s-%s\" % (base, sha[0:8])\n\n kaniko[\"name\"] = \"kaniko-build-push\"\n # add short UUID to step name to ensure it is unique\n kaniko[\"name\"] = \"kaniko-build-push-\" + ''.join(random.choices(alphabet, k=8))", - "repo_full_name": "kubeflow/kubeflow", - "discussion_comments": [ - { - "comment_id": "593307884", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 5631, - "pr_file": "py/kubeflow/kubeflow/ci/workflow_utils.py", - "discussion_id": "593307884", - "commented_code": "@@ -200,14 +202,17 @@ def create_kaniko_task(self, task_template, dockerfile, context,\n extend the code.\n \"\"\"\n kaniko = argo_build_util.deep_copy(task_template)\n+ # for shurt UUID generation\n+ alphabet = string.ascii_lowercase + string.digits\n \n # append the tag base-commit[0:7]\n if \":\" not in destination:\n sha = os.getenv(\"PULL_BASE_SHA\", \"12341234kanikotest\")\n base = os.getenv(\"PULL_BASE_REF\", \"master\")\n destination += \":%s-%s\" % (base, sha[0:8])\n \n- kaniko[\"name\"] = \"kaniko-build-push\"\n+ # add short UUID to step name to ensure it is unique\n+ kaniko[\"name\"] = \"kaniko-build-push-\" + ''.join(random.choices(alphabet, k=8))", - "comment_created_at": "2021-03-12T16:38:21+00:00", - "comment_author": "kimwnasptd", - "comment_body": "Could you provide some more details as to why this change is needed?", - "pr_file_module": null - }, - { - "comment_id": "593309537", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 5631, - "pr_file": "py/kubeflow/kubeflow/ci/workflow_utils.py", - "discussion_id": "593307884", - "commented_code": "@@ -200,14 +202,17 @@ def create_kaniko_task(self, task_template, dockerfile, context,\n extend the code.\n \"\"\"\n kaniko = argo_build_util.deep_copy(task_template)\n+ # for shurt UUID generation\n+ alphabet = string.ascii_lowercase + string.digits\n \n # append the tag base-commit[0:7]\n if \":\" not in destination:\n sha = os.getenv(\"PULL_BASE_SHA\", \"12341234kanikotest\")\n base = os.getenv(\"PULL_BASE_REF\", \"master\")\n destination += \":%s-%s\" % (base, sha[0:8])\n \n- kaniko[\"name\"] = \"kaniko-build-push\"\n+ # add short UUID to step name to ensure it is unique\n+ kaniko[\"name\"] = \"kaniko-build-push-\" + ''.join(random.choices(alphabet, k=8))", - "comment_created_at": "2021-03-12T16:40:39+00:00", - "comment_author": "davidspek", - "comment_body": "Otherwise there are 2 steps in the workflow (this workflow builds 2 images) that are both named `kaniko-build-push`. I wasn't sure if it would like that so I tested it and Argo does not :). ", - "pr_file_module": null - }, - { - "comment_id": "593311545", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 5631, - "pr_file": "py/kubeflow/kubeflow/ci/workflow_utils.py", - "discussion_id": "593307884", - "commented_code": "@@ -200,14 +202,17 @@ def create_kaniko_task(self, task_template, dockerfile, context,\n extend the code.\n \"\"\"\n kaniko = argo_build_util.deep_copy(task_template)\n+ # for shurt UUID generation\n+ alphabet = string.ascii_lowercase + string.digits\n \n # append the tag base-commit[0:7]\n if \":\" not in destination:\n sha = os.getenv(\"PULL_BASE_SHA\", \"12341234kanikotest\")\n base = os.getenv(\"PULL_BASE_REF\", \"master\")\n destination += \":%s-%s\" % (base, sha[0:8])\n \n- kaniko[\"name\"] = \"kaniko-build-push\"\n+ # add short UUID to step name to ensure it is unique\n+ kaniko[\"name\"] = \"kaniko-build-push-\" + ''.join(random.choices(alphabet, k=8))", - "comment_created_at": "2021-03-12T16:43:46+00:00", - "comment_author": "davidspek", - "comment_body": "The same thing is done here BTW: https://github.com/kubeflow/kubeflow/pull/5626/files#diff-a58831ea15b3ba92fe93b2196dc2867189dc87c3836c617833dbdf652ad22fd0R205", - "pr_file_module": null - }, - { - "comment_id": "593312071", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 5631, - "pr_file": "py/kubeflow/kubeflow/ci/workflow_utils.py", - "discussion_id": "593307884", - "commented_code": "@@ -200,14 +202,17 @@ def create_kaniko_task(self, task_template, dockerfile, context,\n extend the code.\n \"\"\"\n kaniko = argo_build_util.deep_copy(task_template)\n+ # for shurt UUID generation\n+ alphabet = string.ascii_lowercase + string.digits\n \n # append the tag base-commit[0:7]\n if \":\" not in destination:\n sha = os.getenv(\"PULL_BASE_SHA\", \"12341234kanikotest\")\n base = os.getenv(\"PULL_BASE_REF\", \"master\")\n destination += \":%s-%s\" % (base, sha[0:8])\n \n- kaniko[\"name\"] = \"kaniko-build-push\"\n+ # add short UUID to step name to ensure it is unique\n+ kaniko[\"name\"] = \"kaniko-build-push-\" + ''.join(random.choices(alphabet, k=8))", - "comment_created_at": "2021-03-12T16:44:34+00:00", - "comment_author": "davidspek", - "comment_body": "Here you can see what the workflow I just linked looks like when it runs. \r\nhttps://argo.kubeflow-testing.com/workflows/kubeflow-test-infra/kubeflow-kubeflow-presubmit-nb-j-pt-5626-5feb726-6224-63fc?tab=workflow&nodeId=kubeflow-kubeflow-presubmit-nb-j-pt-5626-5feb726-6224-63fc-2134638194", - "pr_file_module": null - }, - { - "comment_id": "593312328", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 5631, - "pr_file": "py/kubeflow/kubeflow/ci/workflow_utils.py", - "discussion_id": "593307884", - "commented_code": "@@ -200,14 +202,17 @@ def create_kaniko_task(self, task_template, dockerfile, context,\n extend the code.\n \"\"\"\n kaniko = argo_build_util.deep_copy(task_template)\n+ # for shurt UUID generation\n+ alphabet = string.ascii_lowercase + string.digits\n \n # append the tag base-commit[0:7]\n if \":\" not in destination:\n sha = os.getenv(\"PULL_BASE_SHA\", \"12341234kanikotest\")\n base = os.getenv(\"PULL_BASE_REF\", \"master\")\n destination += \":%s-%s\" % (base, sha[0:8])\n \n- kaniko[\"name\"] = \"kaniko-build-push\"\n+ # add short UUID to step name to ensure it is unique\n+ kaniko[\"name\"] = \"kaniko-build-push-\" + ''.join(random.choices(alphabet, k=8))", - "comment_created_at": "2021-03-12T16:44:55+00:00", - "comment_author": "kimwnasptd", - "comment_body": "Makes sense", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "588900578", - "pr_number": 5640, - "pr_file": "py/kubeflow/kubeflow/ci/access_management_tests.py", - "created_at": "2021-03-06T16:23:14+00:00", - "commented_code": "\"\"\"\"Argo Workflow for testing Access Management's OCI image using Kaniko\"\"\"", - "repo_full_name": "kubeflow/kubeflow", - "discussion_comments": [ - { - "comment_id": "588900578", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 5640, - "pr_file": "py/kubeflow/kubeflow/ci/access_management_tests.py", - "discussion_id": "588900578", - "commented_code": "@@ -0,0 +1,43 @@\n+\"\"\"\"Argo Workflow for testing Access Management's OCI image using Kaniko\"\"\"", - "comment_created_at": "2021-03-06T16:23:14+00:00", - "comment_author": "PatrickXYS", - "comment_body": "Same herer: \r\n\r\nI feel like you can abstract this Builder python file given you have multiple almost same file here, add it in e.g, py/kubeflow/kubeflow/ci/base_builder.py\r\n\r\nAnd for each Argo Workflow, just reference it should be fine", - "pr_file_module": null - }, - { - "comment_id": "589016392", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 5640, - "pr_file": "py/kubeflow/kubeflow/ci/access_management_tests.py", - "discussion_id": "588900578", - "commented_code": "@@ -0,0 +1,43 @@\n+\"\"\"\"Argo Workflow for testing Access Management's OCI image using Kaniko\"\"\"", - "comment_created_at": "2021-03-07T11:37:14+00:00", - "comment_author": "davidspek", - "comment_body": "I'm not sure it makes sense for these CI files, as I am assuming they will be extended with more tests similar to https://github.com/kubeflow/kubeflow/blob/master/py/kubeflow/kubeflow/ci/common_ui_tests.py", - "pr_file_module": null - }, - { - "comment_id": "589060586", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 5640, - "pr_file": "py/kubeflow/kubeflow/ci/access_management_tests.py", - "discussion_id": "588900578", - "commented_code": "@@ -0,0 +1,43 @@\n+\"\"\"\"Argo Workflow for testing Access Management's OCI image using Kaniko\"\"\"", - "comment_created_at": "2021-03-07T17:17:58+00:00", - "comment_author": "PatrickXYS", - "comment_body": "For extensibility, I would suggest using parent class as base, and child class for patch.", - "pr_file_module": null - }, - { - "comment_id": "589383241", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 5640, - "pr_file": "py/kubeflow/kubeflow/ci/access_management_tests.py", - "discussion_id": "588900578", - "commented_code": "@@ -0,0 +1,43 @@\n+\"\"\"\"Argo Workflow for testing Access Management's OCI image using Kaniko\"\"\"", - "comment_created_at": "2021-03-08T12:24:27+00:00", - "comment_author": "davidspek", - "comment_body": "@kimwnasptd What is your thought on this?", - "pr_file_module": null - }, - { - "comment_id": "589383886", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 5640, - "pr_file": "py/kubeflow/kubeflow/ci/access_management_tests.py", - "discussion_id": "588900578", - "commented_code": "@@ -0,0 +1,43 @@\n+\"\"\"\"Argo Workflow for testing Access Management's OCI image using Kaniko\"\"\"", - "comment_created_at": "2021-03-08T12:25:34+00:00", - "comment_author": "kimwnasptd", - "comment_body": "@PatrickXYS to make sure I'm synced with the proposed changes; the argument is to create a common base class for building a workflow that always has a step for kaniko-build, with `--no-push`, since we essentially use it in all of the component CI files?", - "pr_file_module": null - }, - { - "comment_id": "589518357", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 5640, - "pr_file": "py/kubeflow/kubeflow/ci/access_management_tests.py", - "discussion_id": "588900578", - "commented_code": "@@ -0,0 +1,43 @@\n+\"\"\"\"Argo Workflow for testing Access Management's OCI image using Kaniko\"\"\"", - "comment_created_at": "2021-03-08T15:38:17+00:00", - "comment_author": "davidspek", - "comment_body": "@kimwnasptd I'm also not completely clear what the proposed implementation would look like. Either way, I'm not sure if this is a blocker as it testing if images can be built seems like something that should be added sooner rather than later. This way we would have discovered that the admission-webhook was broken before https://github.com/kubeflow/kubeflow/pull/5661.\r\nAlso, https://github.com/kubeflow/kubeflow/issues/5482#issuecomment-791906596 still needs to be resolved. Should the registry be called `access-management` or should it be `kfam`?", - "pr_file_module": null - }, - { - "comment_id": "589576038", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 5640, - "pr_file": "py/kubeflow/kubeflow/ci/access_management_tests.py", - "discussion_id": "588900578", - "commented_code": "@@ -0,0 +1,43 @@\n+\"\"\"\"Argo Workflow for testing Access Management's OCI image using Kaniko\"\"\"", - "comment_created_at": "2021-03-08T16:46:05+00:00", - "comment_author": "PatrickXYS", - "comment_body": "This is only NIT change, if we feel it's urgent, feel free to move forward", - "pr_file_module": null - }, - { - "comment_id": "589721942", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 5640, - "pr_file": "py/kubeflow/kubeflow/ci/access_management_tests.py", - "discussion_id": "588900578", - "commented_code": "@@ -0,0 +1,43 @@\n+\"\"\"\"Argo Workflow for testing Access Management's OCI image using Kaniko\"\"\"", - "comment_created_at": "2021-03-08T20:21:44+00:00", - "comment_author": "davidspek", - "comment_body": "I think this can be dealt with in the future when the tests for each component get expanded. For now, I think it is most important images start getting built and the PRs that are being merged for the release are at least tested if they break the image building. ", - "pr_file_module": null - }, - { - "comment_id": "590290661", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 5640, - "pr_file": "py/kubeflow/kubeflow/ci/access_management_tests.py", - "discussion_id": "588900578", - "commented_code": "@@ -0,0 +1,43 @@\n+\"\"\"\"Argo Workflow for testing Access Management's OCI image using Kaniko\"\"\"", - "comment_created_at": "2021-03-09T12:00:00+00:00", - "comment_author": "kimwnasptd", - "comment_body": "Regarding the changes for using a different class I agree with @DavidSpek that this isn't a blocker, so let's go with this implementation for now and if in the future we find ourselves repeating too much code then lets go over this argument.\r\nDoes this sound OK to you @PatrickXYS ?\r\n", - "pr_file_module": null - }, - { - "comment_id": "590291700", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 5640, - "pr_file": "py/kubeflow/kubeflow/ci/access_management_tests.py", - "discussion_id": "588900578", - "commented_code": "@@ -0,0 +1,43 @@\n+\"\"\"\"Argo Workflow for testing Access Management's OCI image using Kaniko\"\"\"", - "comment_created_at": "2021-03-09T12:01:03+00:00", - "comment_author": "kimwnasptd", - "comment_body": "> Should the registry be called access-management or should it be kfam?\r\n\r\n@yanniszark what are your thoughts on this? I guess we should stick to `access-management` for now?\r\n\r\nI'll also give a heads up to our tracking issue to add this registry as well ", - "pr_file_module": null - }, - { - "comment_id": "590304574", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 5640, - "pr_file": "py/kubeflow/kubeflow/ci/access_management_tests.py", - "discussion_id": "588900578", - "commented_code": "@@ -0,0 +1,43 @@\n+\"\"\"\"Argo Workflow for testing Access Management's OCI image using Kaniko\"\"\"", - "comment_created_at": "2021-03-09T12:15:29+00:00", - "comment_author": "davidspek", - "comment_body": "@kimwnasptd I actually just ran into an issue where using the common `kaniko_builder.py` actually complicates things slightly. For this situation specifically it is easily fixed, however; for the CI testing I think it could just complicate things with little added value. ", - "pr_file_module": null - }, - { - "comment_id": "590362886", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 5640, - "pr_file": "py/kubeflow/kubeflow/ci/access_management_tests.py", - "discussion_id": "588900578", - "commented_code": "@@ -0,0 +1,43 @@\n+\"\"\"\"Argo Workflow for testing Access Management's OCI image using Kaniko\"\"\"", - "comment_created_at": "2021-03-09T13:21:39+00:00", - "comment_author": "kimwnasptd", - "comment_body": "> @kimwnasptd I actually just ran into an issue where using the common kaniko_builder.py actually complicates things slightly. For this situation specifically it is easily fixed\r\n\r\nCould you provide some more details for this? Just so that we can identify the pattern/pain-point once we see it reoccurring ", - "pr_file_module": null - }, - { - "comment_id": "590369432", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 5640, - "pr_file": "py/kubeflow/kubeflow/ci/access_management_tests.py", - "discussion_id": "588900578", - "commented_code": "@@ -0,0 +1,43 @@\n+\"\"\"\"Argo Workflow for testing Access Management's OCI image using Kaniko\"\"\"", - "comment_created_at": "2021-03-09T13:30:34+00:00", - "comment_author": "davidspek", - "comment_body": "I need to build 2 dockerfiles that exist in the same folder. For that, I just added a `second_dockerfile` and `second_destination` to the build function in `kaniko_builder.py`. I also added a small UUID tag for the task name in the workflow in the `create_kaniko_task` function in `workflow_utils.py` so that each kaniko building step has a unique name in the workflow. So for building the images I don't think it is a big deal at this point. ", - "pr_file_module": null - }, - { - "comment_id": "590413497", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 5640, - "pr_file": "py/kubeflow/kubeflow/ci/access_management_tests.py", - "discussion_id": "588900578", - "commented_code": "@@ -0,0 +1,43 @@\n+\"\"\"\"Argo Workflow for testing Access Management's OCI image using Kaniko\"\"\"", - "comment_created_at": "2021-03-09T14:24:57+00:00", - "comment_author": "yanniszark", - "comment_body": "> I guess we should stick to access-management for now?\r\n\r\n@kimwnasptd I would vote for `kfam`, as it is what we usually call the component. What do you think?", - "pr_file_module": null - }, - { - "comment_id": "590416936", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 5640, - "pr_file": "py/kubeflow/kubeflow/ci/access_management_tests.py", - "discussion_id": "588900578", - "commented_code": "@@ -0,0 +1,43 @@\n+\"\"\"\"Argo Workflow for testing Access Management's OCI image using Kaniko\"\"\"", - "comment_created_at": "2021-03-09T14:28:41+00:00", - "comment_author": "davidspek", - "comment_body": "I think it's better to be consistent in the naming scheme and use the folder name just like every other component. So `access-management`. ", - "pr_file_module": null - }, - { - "comment_id": "590420959", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 5640, - "pr_file": "py/kubeflow/kubeflow/ci/access_management_tests.py", - "discussion_id": "588900578", - "commented_code": "@@ -0,0 +1,43 @@\n+\"\"\"\"Argo Workflow for testing Access Management's OCI image using Kaniko\"\"\"", - "comment_created_at": "2021-03-09T14:33:12+00:00", - "comment_author": "thesuperzapper", - "comment_body": "Also kfam actually stands for Kubeflow Access Management (just like we don't call it kubeflow-pipelines, but kubeflow/pipelines, I think we can leave it as `access-management`)", - "pr_file_module": null - }, - { - "comment_id": "590421528", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 5640, - "pr_file": "py/kubeflow/kubeflow/ci/access_management_tests.py", - "discussion_id": "588900578", - "commented_code": "@@ -0,0 +1,43 @@\n+\"\"\"\"Argo Workflow for testing Access Management's OCI image using Kaniko\"\"\"", - "comment_created_at": "2021-03-09T14:33:52+00:00", - "comment_author": "davidspek", - "comment_body": "@yanniszark @kimwnasptd I think this discussion should move to https://github.com/kubeflow/kubeflow/issues/5675. Then this PR can be merged and I will update all the registry naming in a new PR depending on the outcome of https://github.com/kubeflow/kubeflow/issues/5675.", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/kubeflow-use-appropriate-log-levels.json b/_reviewers/kubeflow-use-appropriate-log-levels.json new file mode 100644 index 0000000..5cfa79c --- /dev/null +++ b/_reviewers/kubeflow-use-appropriate-log-levels.json @@ -0,0 +1,92 @@ +[ + { + "discussion_id": "1165424028", + "pr_number": 7091, + "pr_file": "components/profile-controller/controllers/plugin_ad_workload_identity_azure.go", + "created_at": "2023-04-13T12:03:03+00:00", + "commented_code": "/*\nCopyright 2019 The Kubeflow Authors.\n\nLicensed under the Apache License, Version 2.0 (the \"License\");\nyou may not use this file except in compliance with the License.\nYou may obtain a copy of the License at\n\n http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n*/\n\npackage controllers\n\nimport (\n\t\"context\"\n\t\"errors\"\n\n\t\"github.com/go-logr/logr\"\n\tprofilev1 \"github.com/kubeflow/kubeflow/components/profile-controller/api/v1\"\n\tcorev1 \"k8s.io/api/core/v1\"\n\t\"k8s.io/apimachinery/pkg/types\"\n)\n\nconst (\n\tKIND_AZURE_AD_WORKLOAD_IDENTITY = \"AzureAdWorkloadIdentity\"\n\tAD_DEFAULT_SERVICE_ACCOUNT = DEFAULT_EDITOR\n\tAZURE_CLIENT_ID_ANNOTATION_KEY = \"azure.workload.identity/client-id\"\n\tAZURE_TENANT_ID_ANNOTATION_KEY = \"azure.workload.identity/tenant-id\"\n\n\tAZURE_SA_TOKEN_EXPIRATION_ANNOTATION_KEY = \"azure.workload.identity/service-account-token-expiration\"\n\n\tAZURE_WORKLOAD_IDENTITY_POD_ANNOTATION = \"azure.workload.identity/use\"\n)\n\n// AzureAdWorkloadIdentity: plugin that setup Azure AD workload identity for target profile namespace\ntype AzureAdWorkloadIdentity struct {\n\tAzureIdentityClientId string `json:\"identityClientId,omitempty\"`\n\tAzureIdentityTenantId string `json:\"identityTenantId,omitempty\"`\n\tAzureServiceAccountTokenExpiration string `json:\"serviceAccountTokenExpiration,omitempty\"`\n}\n\n// ApplyPlugin will grant Azure workload identity to service account DEFAULT_EDITOR\nfunc (azure *AzureAdWorkloadIdentity) ApplyPlugin(r *ProfileReconciler, profile *profilev1.Profile) error {\n\tlogger := r.Log.WithValues(\"profile\", profile.Name)\n\n\tlogger.Info(\"Setting up workload identity\", \"ClientId\", azure.AzureIdentityClientId)", + "repo_full_name": "kubeflow/kubeflow", + "discussion_comments": [ + { + "comment_id": "1165424028", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 7091, + "pr_file": "components/profile-controller/controllers/plugin_ad_workload_identity_azure.go", + "discussion_id": "1165424028", + "commented_code": "@@ -0,0 +1,107 @@\n+/*\n+Copyright 2019 The Kubeflow Authors.\n+\n+Licensed under the Apache License, Version 2.0 (the \"License\");\n+you may not use this file except in compliance with the License.\n+You may obtain a copy of the License at\n+\n+ http://www.apache.org/licenses/LICENSE-2.0\n+\n+Unless required by applicable law or agreed to in writing, software\n+distributed under the License is distributed on an \"AS IS\" BASIS,\n+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n+See the License for the specific language governing permissions and\n+limitations under the License.\n+*/\n+\n+package controllers\n+\n+import (\n+\t\"context\"\n+\t\"errors\"\n+\n+\t\"github.com/go-logr/logr\"\n+\tprofilev1 \"github.com/kubeflow/kubeflow/components/profile-controller/api/v1\"\n+\tcorev1 \"k8s.io/api/core/v1\"\n+\t\"k8s.io/apimachinery/pkg/types\"\n+)\n+\n+const (\n+\tKIND_AZURE_AD_WORKLOAD_IDENTITY = \"AzureAdWorkloadIdentity\"\n+\tAD_DEFAULT_SERVICE_ACCOUNT = DEFAULT_EDITOR\n+\tAZURE_CLIENT_ID_ANNOTATION_KEY = \"azure.workload.identity/client-id\"\n+\tAZURE_TENANT_ID_ANNOTATION_KEY = \"azure.workload.identity/tenant-id\"\n+\n+\tAZURE_SA_TOKEN_EXPIRATION_ANNOTATION_KEY = \"azure.workload.identity/service-account-token-expiration\"\n+\n+\tAZURE_WORKLOAD_IDENTITY_POD_ANNOTATION = \"azure.workload.identity/use\"\n+)\n+\n+// AzureAdWorkloadIdentity: plugin that setup Azure AD workload identity for target profile namespace\n+type AzureAdWorkloadIdentity struct {\n+\tAzureIdentityClientId string `json:\"identityClientId,omitempty\"`\n+\tAzureIdentityTenantId string `json:\"identityTenantId,omitempty\"`\n+\tAzureServiceAccountTokenExpiration string `json:\"serviceAccountTokenExpiration,omitempty\"`\n+}\n+\n+// ApplyPlugin will grant Azure workload identity to service account DEFAULT_EDITOR\n+func (azure *AzureAdWorkloadIdentity) ApplyPlugin(r *ProfileReconciler, profile *profilev1.Profile) error {\n+\tlogger := r.Log.WithValues(\"profile\", profile.Name)\n+\n+\tlogger.Info(\"Setting up workload identity\", \"ClientId\", azure.AzureIdentityClientId)", + "comment_created_at": "2023-04-13T12:03:03+00:00", + "comment_author": "AhmedGrati", + "comment_body": "I think that, preferably, log messages should start in lowercase.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "491869454", + "pr_number": 5314, + "pr_file": "components/notebook-controller/controllers/notebook_controller.go", + "created_at": "2020-09-21T08:29:05+00:00", + "commented_code": "} else {\n\t\t// Got the pod\n\t\tpodFound = true\n\t\tif len(pod.Status.ContainerStatuses) > 0 &&\n\t\t\tpod.Status.ContainerStatuses[0].State != instance.Status.ContainerState {\n\t\t\tlog.Info(\"Updating container state: \", \"namespace\", instance.Namespace, \"name\", instance.Name)\n\t\t\tcs := pod.Status.ContainerStatuses[0].State\n\t\t\tinstance.Status.ContainerState = cs\n\t\t\toldConditions := instance.Status.Conditions\n\t\t\tnewCondition := getNextCondition(cs)\n\t\t\t// Append new condition\n\t\t\tif len(oldConditions) == 0 || oldConditions[0].Type != newCondition.Type ||\n\t\t\t\toldConditions[0].Reason != newCondition.Reason ||\n\t\t\t\toldConditions[0].Message != newCondition.Message {\n\t\t\t\tlog.Info(\"Appending to conditions: \", \"namespace\", instance.Namespace, \"name\", instance.Name, \"type\", newCondition.Type, \"reason\", newCondition.Reason, \"message\", newCondition.Message)\n\t\t\t\tinstance.Status.Conditions = append([]v1beta1.NotebookCondition{newCondition}, oldConditions...)\n\n\t\tif len(pod.Status.ContainerStatuses) > 0 {\n\t\t\tnotebookContainerFound := false\n\t\t\tfor i := range pod.Status.ContainerStatuses {\n\t\t\t\tif pod.Status.ContainerStatuses[i].Name == instance.Name &&\n\t\t\t\t\tpod.Status.ContainerStatuses[i].State != instance.Status.ContainerState {\n\t\t\t\t\tlog.Info(\"Updating Notebook CR state: \", \"namespace\", instance.Namespace, \"name\", instance.Name)\n\t\t\t\t\tcs := pod.Status.ContainerStatuses[i].State\n\t\t\t\t\tinstance.Status.ContainerState = cs\n\t\t\t\t\toldConditions := instance.Status.Conditions\n\t\t\t\t\tnewCondition := getNextCondition(cs)\n\t\t\t\t\t// Append new condition\n\t\t\t\t\tif len(oldConditions) == 0 || oldConditions[0].Type != newCondition.Type ||\n\t\t\t\t\t\toldConditions[0].Reason != newCondition.Reason ||\n\t\t\t\t\t\toldConditions[0].Message != newCondition.Message {\n\t\t\t\t\t\tlog.Info(\"Appending to conditions: \", \"namespace\", instance.Namespace, \"name\", instance.Name, \"type\", newCondition.Type, \"reason\", newCondition.Reason, \"message\", newCondition.Message)\n\t\t\t\t\t\tinstance.Status.Conditions = append([]v1beta1.NotebookCondition{newCondition}, oldConditions...)\n\t\t\t\t\t}\n\t\t\t\t\terr = r.Status().Update(ctx, instance)\n\t\t\t\t\tif err != nil {\n\t\t\t\t\t\treturn ctrl.Result{}, err\n\t\t\t\t\t}\n\t\t\t\t\tnotebookContainerFound = true\n\t\t\t\t\tbreak\n\t\t\t\t}\n\t\t\t}\n\t\t\terr = r.Status().Update(ctx, instance)\n\t\t\tif err != nil {\n\t\t\t\treturn ctrl.Result{}, err\n\t\t\tif !notebookContainerFound {\n\t\t\t\tlog.Info(\"WARNING: Notebook container is not found, so could not update State of Notebook CR\")", + "repo_full_name": "kubeflow/kubeflow", + "discussion_comments": [ + { + "comment_id": "491869454", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 5314, + "pr_file": "components/notebook-controller/controllers/notebook_controller.go", + "discussion_id": "491869454", + "commented_code": "@@ -206,23 +206,34 @@ func (r *NotebookReconciler) Reconcile(req ctrl.Request) (ctrl.Result, error) {\n \t} else {\n \t\t// Got the pod\n \t\tpodFound = true\n-\t\tif len(pod.Status.ContainerStatuses) > 0 &&\n-\t\t\tpod.Status.ContainerStatuses[0].State != instance.Status.ContainerState {\n-\t\t\tlog.Info(\"Updating container state: \", \"namespace\", instance.Namespace, \"name\", instance.Name)\n-\t\t\tcs := pod.Status.ContainerStatuses[0].State\n-\t\t\tinstance.Status.ContainerState = cs\n-\t\t\toldConditions := instance.Status.Conditions\n-\t\t\tnewCondition := getNextCondition(cs)\n-\t\t\t// Append new condition\n-\t\t\tif len(oldConditions) == 0 || oldConditions[0].Type != newCondition.Type ||\n-\t\t\t\toldConditions[0].Reason != newCondition.Reason ||\n-\t\t\t\toldConditions[0].Message != newCondition.Message {\n-\t\t\t\tlog.Info(\"Appending to conditions: \", \"namespace\", instance.Namespace, \"name\", instance.Name, \"type\", newCondition.Type, \"reason\", newCondition.Reason, \"message\", newCondition.Message)\n-\t\t\t\tinstance.Status.Conditions = append([]v1beta1.NotebookCondition{newCondition}, oldConditions...)\n+\n+\t\tif len(pod.Status.ContainerStatuses) > 0 {\n+\t\t\tnotebookContainerFound := false\n+\t\t\tfor i := range pod.Status.ContainerStatuses {\n+\t\t\t\tif pod.Status.ContainerStatuses[i].Name == instance.Name &&\n+\t\t\t\t\tpod.Status.ContainerStatuses[i].State != instance.Status.ContainerState {\n+\t\t\t\t\tlog.Info(\"Updating Notebook CR state: \", \"namespace\", instance.Namespace, \"name\", instance.Name)\n+\t\t\t\t\tcs := pod.Status.ContainerStatuses[i].State\n+\t\t\t\t\tinstance.Status.ContainerState = cs\n+\t\t\t\t\toldConditions := instance.Status.Conditions\n+\t\t\t\t\tnewCondition := getNextCondition(cs)\n+\t\t\t\t\t// Append new condition\n+\t\t\t\t\tif len(oldConditions) == 0 || oldConditions[0].Type != newCondition.Type ||\n+\t\t\t\t\t\toldConditions[0].Reason != newCondition.Reason ||\n+\t\t\t\t\t\toldConditions[0].Message != newCondition.Message {\n+\t\t\t\t\t\tlog.Info(\"Appending to conditions: \", \"namespace\", instance.Namespace, \"name\", instance.Name, \"type\", newCondition.Type, \"reason\", newCondition.Reason, \"message\", newCondition.Message)\n+\t\t\t\t\t\tinstance.Status.Conditions = append([]v1beta1.NotebookCondition{newCondition}, oldConditions...)\n+\t\t\t\t\t}\n+\t\t\t\t\terr = r.Status().Update(ctx, instance)\n+\t\t\t\t\tif err != nil {\n+\t\t\t\t\t\treturn ctrl.Result{}, err\n+\t\t\t\t\t}\n+\t\t\t\t\tnotebookContainerFound = true\n+\t\t\t\t\tbreak\n+\t\t\t\t}\n \t\t\t}\n-\t\t\terr = r.Status().Update(ctx, instance)\n-\t\t\tif err != nil {\n-\t\t\t\treturn ctrl.Result{}, err\n+\t\t\tif !notebookContainerFound {\n+\t\t\t\tlog.Info(\"WARNING: Notebook container is not found, so could not update State of Notebook CR\")", + "comment_created_at": "2020-09-21T08:29:05+00:00", + "comment_author": "yanniszark", + "comment_body": "I don't think that a log.Info should emit a `WARNING` message.\r\nIf the message is indeed a warning, use `log.Warn`.\r\nThis sounds like an error though.", + "pr_file_module": null + }, + { + "comment_id": "491884574", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 5314, + "pr_file": "components/notebook-controller/controllers/notebook_controller.go", + "discussion_id": "491869454", + "commented_code": "@@ -206,23 +206,34 @@ func (r *NotebookReconciler) Reconcile(req ctrl.Request) (ctrl.Result, error) {\n \t} else {\n \t\t// Got the pod\n \t\tpodFound = true\n-\t\tif len(pod.Status.ContainerStatuses) > 0 &&\n-\t\t\tpod.Status.ContainerStatuses[0].State != instance.Status.ContainerState {\n-\t\t\tlog.Info(\"Updating container state: \", \"namespace\", instance.Namespace, \"name\", instance.Name)\n-\t\t\tcs := pod.Status.ContainerStatuses[0].State\n-\t\t\tinstance.Status.ContainerState = cs\n-\t\t\toldConditions := instance.Status.Conditions\n-\t\t\tnewCondition := getNextCondition(cs)\n-\t\t\t// Append new condition\n-\t\t\tif len(oldConditions) == 0 || oldConditions[0].Type != newCondition.Type ||\n-\t\t\t\toldConditions[0].Reason != newCondition.Reason ||\n-\t\t\t\toldConditions[0].Message != newCondition.Message {\n-\t\t\t\tlog.Info(\"Appending to conditions: \", \"namespace\", instance.Namespace, \"name\", instance.Name, \"type\", newCondition.Type, \"reason\", newCondition.Reason, \"message\", newCondition.Message)\n-\t\t\t\tinstance.Status.Conditions = append([]v1beta1.NotebookCondition{newCondition}, oldConditions...)\n+\n+\t\tif len(pod.Status.ContainerStatuses) > 0 {\n+\t\t\tnotebookContainerFound := false\n+\t\t\tfor i := range pod.Status.ContainerStatuses {\n+\t\t\t\tif pod.Status.ContainerStatuses[i].Name == instance.Name &&\n+\t\t\t\t\tpod.Status.ContainerStatuses[i].State != instance.Status.ContainerState {\n+\t\t\t\t\tlog.Info(\"Updating Notebook CR state: \", \"namespace\", instance.Namespace, \"name\", instance.Name)\n+\t\t\t\t\tcs := pod.Status.ContainerStatuses[i].State\n+\t\t\t\t\tinstance.Status.ContainerState = cs\n+\t\t\t\t\toldConditions := instance.Status.Conditions\n+\t\t\t\t\tnewCondition := getNextCondition(cs)\n+\t\t\t\t\t// Append new condition\n+\t\t\t\t\tif len(oldConditions) == 0 || oldConditions[0].Type != newCondition.Type ||\n+\t\t\t\t\t\toldConditions[0].Reason != newCondition.Reason ||\n+\t\t\t\t\t\toldConditions[0].Message != newCondition.Message {\n+\t\t\t\t\t\tlog.Info(\"Appending to conditions: \", \"namespace\", instance.Namespace, \"name\", instance.Name, \"type\", newCondition.Type, \"reason\", newCondition.Reason, \"message\", newCondition.Message)\n+\t\t\t\t\t\tinstance.Status.Conditions = append([]v1beta1.NotebookCondition{newCondition}, oldConditions...)\n+\t\t\t\t\t}\n+\t\t\t\t\terr = r.Status().Update(ctx, instance)\n+\t\t\t\t\tif err != nil {\n+\t\t\t\t\t\treturn ctrl.Result{}, err\n+\t\t\t\t\t}\n+\t\t\t\t\tnotebookContainerFound = true\n+\t\t\t\t\tbreak\n+\t\t\t\t}\n \t\t\t}\n-\t\t\terr = r.Status().Update(ctx, instance)\n-\t\t\tif err != nil {\n-\t\t\t\treturn ctrl.Result{}, err\n+\t\t\tif !notebookContainerFound {\n+\t\t\t\tlog.Info(\"WARNING: Notebook container is not found, so could not update State of Notebook CR\")", + "comment_created_at": "2020-09-21T08:55:09+00:00", + "comment_author": "kimwnasptd", + "comment_body": "I agree with @yanniszark.\r\nAlthough AFAIK `logr` does not have a [Warning level](https://github.com/go-logr/logr#why-not-more-named-levels-like-warning). So we should do this an Error instead.\r\n\r\n@gilbeckers could you also make the error message more explicit as to why it couldn't find the Notebook Container? Something like: `Could not find the Notebook container. No container has the same name with the CR '{notebook-name}'. Will not update the status of the CR.`", + "pr_file_module": null + }, + { + "comment_id": "492286533", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 5314, + "pr_file": "components/notebook-controller/controllers/notebook_controller.go", + "discussion_id": "491869454", + "commented_code": "@@ -206,23 +206,34 @@ func (r *NotebookReconciler) Reconcile(req ctrl.Request) (ctrl.Result, error) {\n \t} else {\n \t\t// Got the pod\n \t\tpodFound = true\n-\t\tif len(pod.Status.ContainerStatuses) > 0 &&\n-\t\t\tpod.Status.ContainerStatuses[0].State != instance.Status.ContainerState {\n-\t\t\tlog.Info(\"Updating container state: \", \"namespace\", instance.Namespace, \"name\", instance.Name)\n-\t\t\tcs := pod.Status.ContainerStatuses[0].State\n-\t\t\tinstance.Status.ContainerState = cs\n-\t\t\toldConditions := instance.Status.Conditions\n-\t\t\tnewCondition := getNextCondition(cs)\n-\t\t\t// Append new condition\n-\t\t\tif len(oldConditions) == 0 || oldConditions[0].Type != newCondition.Type ||\n-\t\t\t\toldConditions[0].Reason != newCondition.Reason ||\n-\t\t\t\toldConditions[0].Message != newCondition.Message {\n-\t\t\t\tlog.Info(\"Appending to conditions: \", \"namespace\", instance.Namespace, \"name\", instance.Name, \"type\", newCondition.Type, \"reason\", newCondition.Reason, \"message\", newCondition.Message)\n-\t\t\t\tinstance.Status.Conditions = append([]v1beta1.NotebookCondition{newCondition}, oldConditions...)\n+\n+\t\tif len(pod.Status.ContainerStatuses) > 0 {\n+\t\t\tnotebookContainerFound := false\n+\t\t\tfor i := range pod.Status.ContainerStatuses {\n+\t\t\t\tif pod.Status.ContainerStatuses[i].Name == instance.Name &&\n+\t\t\t\t\tpod.Status.ContainerStatuses[i].State != instance.Status.ContainerState {\n+\t\t\t\t\tlog.Info(\"Updating Notebook CR state: \", \"namespace\", instance.Namespace, \"name\", instance.Name)\n+\t\t\t\t\tcs := pod.Status.ContainerStatuses[i].State\n+\t\t\t\t\tinstance.Status.ContainerState = cs\n+\t\t\t\t\toldConditions := instance.Status.Conditions\n+\t\t\t\t\tnewCondition := getNextCondition(cs)\n+\t\t\t\t\t// Append new condition\n+\t\t\t\t\tif len(oldConditions) == 0 || oldConditions[0].Type != newCondition.Type ||\n+\t\t\t\t\t\toldConditions[0].Reason != newCondition.Reason ||\n+\t\t\t\t\t\toldConditions[0].Message != newCondition.Message {\n+\t\t\t\t\t\tlog.Info(\"Appending to conditions: \", \"namespace\", instance.Namespace, \"name\", instance.Name, \"type\", newCondition.Type, \"reason\", newCondition.Reason, \"message\", newCondition.Message)\n+\t\t\t\t\t\tinstance.Status.Conditions = append([]v1beta1.NotebookCondition{newCondition}, oldConditions...)\n+\t\t\t\t\t}\n+\t\t\t\t\terr = r.Status().Update(ctx, instance)\n+\t\t\t\t\tif err != nil {\n+\t\t\t\t\t\treturn ctrl.Result{}, err\n+\t\t\t\t\t}\n+\t\t\t\t\tnotebookContainerFound = true\n+\t\t\t\t\tbreak\n+\t\t\t\t}\n \t\t\t}\n-\t\t\terr = r.Status().Update(ctx, instance)\n-\t\t\tif err != nil {\n-\t\t\t\treturn ctrl.Result{}, err\n+\t\t\tif !notebookContainerFound {\n+\t\t\t\tlog.Info(\"WARNING: Notebook container is not found, so could not update State of Notebook CR\")", + "comment_created_at": "2020-09-21T19:11:06+00:00", + "comment_author": "gilbeckers", + "comment_body": "Done :+1: ", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "320260335", + "pr_number": 4040, + "pr_file": "bootstrap/pkg/kfapp/apply/apply.go", + "created_at": "2019-09-03T13:06:07+00:00", + "commented_code": "package apply\n\nimport (\n\t\"errors\"\n\t\"fmt\"\n\t\"io\"\n\t\"os\"\n\n\tkftypes \"github.com/kubeflow/kubeflow/bootstrap/v3/pkg/apis/apps\"\n\tkfdefsv3 \"github.com/kubeflow/kubeflow/bootstrap/v3/pkg/apis/apps/kfdef/v1alpha1\"\n\t\"github.com/kubeflow/kubeflow/bootstrap/v3/pkg/kfapp/coordinator\"\n\tlog \"github.com/sirupsen/logrus\"\n)\n\n// BootstrapKubeflow is used by the kfctl apply sub-command to take in a configfile\n// as a flag and boostrap a KfApp and deploy it\nfunc BootstrapKubeflow(configFilePath string, kfResource kftypes.ResourceEnum) error {\n\t// Set default app name to kf-app\n\tappName := \"kf-app\"\n\n\t// Construct KfDef from the configFilePath provided\n\tkfDef := &kfdefsv3.KfDef{}\n\tkfDef, err := kfdefsv3.LoadKFDefFromURI(configFilePath)\n\tif err != nil {\n\t\tlog.Printf(\"Unable to create KfDef from config file: %v\", err)\n\t}\n\tif kfDef.Name != \"\" {\n\t\tlog.Warnf(\"Overriding KfDef.Spec.Name; old value %v; new value %v\", kfDef.Name, appName)\n\t}\n\tkfDef.Name = appName\n\tisValid, msg := kfDef.IsValid()\n\tif !isValid {\n\t\tlog.Printf(\"Invalid kfdef: %v\", isValid)\n\t\tlog.Printf(\"Error validating generated KfDef, please check config file validity: %v\", msg)\n\t}\n\tcwd, err := os.Getwd()\n\tif err != nil {\n\t\tlog.Printf(\"Error getting current working directory: %v\", err)\n\t}\n\tfmt.Println(\"Present working directory is: %v\", cwd)", + "repo_full_name": "kubeflow/kubeflow", + "discussion_comments": [ + { + "comment_id": "320260335", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 4040, + "pr_file": "bootstrap/pkg/kfapp/apply/apply.go", + "discussion_id": "320260335", + "commented_code": "@@ -0,0 +1,103 @@\n+package apply\n+\n+import (\n+\t\"errors\"\n+\t\"fmt\"\n+\t\"io\"\n+\t\"os\"\n+\n+\tkftypes \"github.com/kubeflow/kubeflow/bootstrap/v3/pkg/apis/apps\"\n+\tkfdefsv3 \"github.com/kubeflow/kubeflow/bootstrap/v3/pkg/apis/apps/kfdef/v1alpha1\"\n+\t\"github.com/kubeflow/kubeflow/bootstrap/v3/pkg/kfapp/coordinator\"\n+\tlog \"github.com/sirupsen/logrus\"\n+)\n+\n+// BootstrapKubeflow is used by the kfctl apply sub-command to take in a configfile\n+// as a flag and boostrap a KfApp and deploy it\n+func BootstrapKubeflow(configFilePath string, kfResource kftypes.ResourceEnum) error {\n+\t// Set default app name to kf-app\n+\tappName := \"kf-app\"\n+\n+\t// Construct KfDef from the configFilePath provided\n+\tkfDef := &kfdefsv3.KfDef{}\n+\tkfDef, err := kfdefsv3.LoadKFDefFromURI(configFilePath)\n+\tif err != nil {\n+\t\tlog.Printf(\"Unable to create KfDef from config file: %v\", err)\n+\t}\n+\tif kfDef.Name != \"\" {\n+\t\tlog.Warnf(\"Overriding KfDef.Spec.Name; old value %v; new value %v\", kfDef.Name, appName)\n+\t}\n+\tkfDef.Name = appName\n+\tisValid, msg := kfDef.IsValid()\n+\tif !isValid {\n+\t\tlog.Printf(\"Invalid kfdef: %v\", isValid)\n+\t\tlog.Printf(\"Error validating generated KfDef, please check config file validity: %v\", msg)\n+\t}\n+\tcwd, err := os.Getwd()\n+\tif err != nil {\n+\t\tlog.Printf(\"Error getting current working directory: %v\", err)\n+\t}\n+\tfmt.Println(\"Present working directory is: %v\", cwd)", + "comment_created_at": "2019-09-03T13:06:07+00:00", + "comment_author": "yanniszark", + "comment_body": "Can you use log instead of fmt and change all instances of Println to a log-level function? (eg Info, Warn).", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/kubeflow-use-appropriate-log-levels.md b/_reviewers/kubeflow-use-appropriate-log-levels.md index 2e0b98a..adab886 100644 --- a/_reviewers/kubeflow-use-appropriate-log-levels.md +++ b/_reviewers/kubeflow-use-appropriate-log-levels.md @@ -33,97 +33,3 @@ log.Error("could not find the notebook container", "notebook-name", instance.Nam ``` Following these practices helps with log filtering, improves troubleshooting, and creates a consistent logging experience throughout the codebase. - - -[ - { - "discussion_id": "1165424028", - "pr_number": 7091, - "pr_file": "components/profile-controller/controllers/plugin_ad_workload_identity_azure.go", - "created_at": "2023-04-13T12:03:03+00:00", - "commented_code": "/*\nCopyright 2019 The Kubeflow Authors.\n\nLicensed under the Apache License, Version 2.0 (the \"License\");\nyou may not use this file except in compliance with the License.\nYou may obtain a copy of the License at\n\n http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n*/\n\npackage controllers\n\nimport (\n\t\"context\"\n\t\"errors\"\n\n\t\"github.com/go-logr/logr\"\n\tprofilev1 \"github.com/kubeflow/kubeflow/components/profile-controller/api/v1\"\n\tcorev1 \"k8s.io/api/core/v1\"\n\t\"k8s.io/apimachinery/pkg/types\"\n)\n\nconst (\n\tKIND_AZURE_AD_WORKLOAD_IDENTITY = \"AzureAdWorkloadIdentity\"\n\tAD_DEFAULT_SERVICE_ACCOUNT = DEFAULT_EDITOR\n\tAZURE_CLIENT_ID_ANNOTATION_KEY = \"azure.workload.identity/client-id\"\n\tAZURE_TENANT_ID_ANNOTATION_KEY = \"azure.workload.identity/tenant-id\"\n\n\tAZURE_SA_TOKEN_EXPIRATION_ANNOTATION_KEY = \"azure.workload.identity/service-account-token-expiration\"\n\n\tAZURE_WORKLOAD_IDENTITY_POD_ANNOTATION = \"azure.workload.identity/use\"\n)\n\n// AzureAdWorkloadIdentity: plugin that setup Azure AD workload identity for target profile namespace\ntype AzureAdWorkloadIdentity struct {\n\tAzureIdentityClientId string `json:\"identityClientId,omitempty\"`\n\tAzureIdentityTenantId string `json:\"identityTenantId,omitempty\"`\n\tAzureServiceAccountTokenExpiration string `json:\"serviceAccountTokenExpiration,omitempty\"`\n}\n\n// ApplyPlugin will grant Azure workload identity to service account DEFAULT_EDITOR\nfunc (azure *AzureAdWorkloadIdentity) ApplyPlugin(r *ProfileReconciler, profile *profilev1.Profile) error {\n\tlogger := r.Log.WithValues(\"profile\", profile.Name)\n\n\tlogger.Info(\"Setting up workload identity\", \"ClientId\", azure.AzureIdentityClientId)", - "repo_full_name": "kubeflow/kubeflow", - "discussion_comments": [ - { - "comment_id": "1165424028", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 7091, - "pr_file": "components/profile-controller/controllers/plugin_ad_workload_identity_azure.go", - "discussion_id": "1165424028", - "commented_code": "@@ -0,0 +1,107 @@\n+/*\n+Copyright 2019 The Kubeflow Authors.\n+\n+Licensed under the Apache License, Version 2.0 (the \"License\");\n+you may not use this file except in compliance with the License.\n+You may obtain a copy of the License at\n+\n+ http://www.apache.org/licenses/LICENSE-2.0\n+\n+Unless required by applicable law or agreed to in writing, software\n+distributed under the License is distributed on an \"AS IS\" BASIS,\n+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n+See the License for the specific language governing permissions and\n+limitations under the License.\n+*/\n+\n+package controllers\n+\n+import (\n+\t\"context\"\n+\t\"errors\"\n+\n+\t\"github.com/go-logr/logr\"\n+\tprofilev1 \"github.com/kubeflow/kubeflow/components/profile-controller/api/v1\"\n+\tcorev1 \"k8s.io/api/core/v1\"\n+\t\"k8s.io/apimachinery/pkg/types\"\n+)\n+\n+const (\n+\tKIND_AZURE_AD_WORKLOAD_IDENTITY = \"AzureAdWorkloadIdentity\"\n+\tAD_DEFAULT_SERVICE_ACCOUNT = DEFAULT_EDITOR\n+\tAZURE_CLIENT_ID_ANNOTATION_KEY = \"azure.workload.identity/client-id\"\n+\tAZURE_TENANT_ID_ANNOTATION_KEY = \"azure.workload.identity/tenant-id\"\n+\n+\tAZURE_SA_TOKEN_EXPIRATION_ANNOTATION_KEY = \"azure.workload.identity/service-account-token-expiration\"\n+\n+\tAZURE_WORKLOAD_IDENTITY_POD_ANNOTATION = \"azure.workload.identity/use\"\n+)\n+\n+// AzureAdWorkloadIdentity: plugin that setup Azure AD workload identity for target profile namespace\n+type AzureAdWorkloadIdentity struct {\n+\tAzureIdentityClientId string `json:\"identityClientId,omitempty\"`\n+\tAzureIdentityTenantId string `json:\"identityTenantId,omitempty\"`\n+\tAzureServiceAccountTokenExpiration string `json:\"serviceAccountTokenExpiration,omitempty\"`\n+}\n+\n+// ApplyPlugin will grant Azure workload identity to service account DEFAULT_EDITOR\n+func (azure *AzureAdWorkloadIdentity) ApplyPlugin(r *ProfileReconciler, profile *profilev1.Profile) error {\n+\tlogger := r.Log.WithValues(\"profile\", profile.Name)\n+\n+\tlogger.Info(\"Setting up workload identity\", \"ClientId\", azure.AzureIdentityClientId)", - "comment_created_at": "2023-04-13T12:03:03+00:00", - "comment_author": "AhmedGrati", - "comment_body": "I think that, preferably, log messages should start in lowercase.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "491869454", - "pr_number": 5314, - "pr_file": "components/notebook-controller/controllers/notebook_controller.go", - "created_at": "2020-09-21T08:29:05+00:00", - "commented_code": "} else {\n\t\t// Got the pod\n\t\tpodFound = true\n\t\tif len(pod.Status.ContainerStatuses) > 0 &&\n\t\t\tpod.Status.ContainerStatuses[0].State != instance.Status.ContainerState {\n\t\t\tlog.Info(\"Updating container state: \", \"namespace\", instance.Namespace, \"name\", instance.Name)\n\t\t\tcs := pod.Status.ContainerStatuses[0].State\n\t\t\tinstance.Status.ContainerState = cs\n\t\t\toldConditions := instance.Status.Conditions\n\t\t\tnewCondition := getNextCondition(cs)\n\t\t\t// Append new condition\n\t\t\tif len(oldConditions) == 0 || oldConditions[0].Type != newCondition.Type ||\n\t\t\t\toldConditions[0].Reason != newCondition.Reason ||\n\t\t\t\toldConditions[0].Message != newCondition.Message {\n\t\t\t\tlog.Info(\"Appending to conditions: \", \"namespace\", instance.Namespace, \"name\", instance.Name, \"type\", newCondition.Type, \"reason\", newCondition.Reason, \"message\", newCondition.Message)\n\t\t\t\tinstance.Status.Conditions = append([]v1beta1.NotebookCondition{newCondition}, oldConditions...)\n\n\t\tif len(pod.Status.ContainerStatuses) > 0 {\n\t\t\tnotebookContainerFound := false\n\t\t\tfor i := range pod.Status.ContainerStatuses {\n\t\t\t\tif pod.Status.ContainerStatuses[i].Name == instance.Name &&\n\t\t\t\t\tpod.Status.ContainerStatuses[i].State != instance.Status.ContainerState {\n\t\t\t\t\tlog.Info(\"Updating Notebook CR state: \", \"namespace\", instance.Namespace, \"name\", instance.Name)\n\t\t\t\t\tcs := pod.Status.ContainerStatuses[i].State\n\t\t\t\t\tinstance.Status.ContainerState = cs\n\t\t\t\t\toldConditions := instance.Status.Conditions\n\t\t\t\t\tnewCondition := getNextCondition(cs)\n\t\t\t\t\t// Append new condition\n\t\t\t\t\tif len(oldConditions) == 0 || oldConditions[0].Type != newCondition.Type ||\n\t\t\t\t\t\toldConditions[0].Reason != newCondition.Reason ||\n\t\t\t\t\t\toldConditions[0].Message != newCondition.Message {\n\t\t\t\t\t\tlog.Info(\"Appending to conditions: \", \"namespace\", instance.Namespace, \"name\", instance.Name, \"type\", newCondition.Type, \"reason\", newCondition.Reason, \"message\", newCondition.Message)\n\t\t\t\t\t\tinstance.Status.Conditions = append([]v1beta1.NotebookCondition{newCondition}, oldConditions...)\n\t\t\t\t\t}\n\t\t\t\t\terr = r.Status().Update(ctx, instance)\n\t\t\t\t\tif err != nil {\n\t\t\t\t\t\treturn ctrl.Result{}, err\n\t\t\t\t\t}\n\t\t\t\t\tnotebookContainerFound = true\n\t\t\t\t\tbreak\n\t\t\t\t}\n\t\t\t}\n\t\t\terr = r.Status().Update(ctx, instance)\n\t\t\tif err != nil {\n\t\t\t\treturn ctrl.Result{}, err\n\t\t\tif !notebookContainerFound {\n\t\t\t\tlog.Info(\"WARNING: Notebook container is not found, so could not update State of Notebook CR\")", - "repo_full_name": "kubeflow/kubeflow", - "discussion_comments": [ - { - "comment_id": "491869454", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 5314, - "pr_file": "components/notebook-controller/controllers/notebook_controller.go", - "discussion_id": "491869454", - "commented_code": "@@ -206,23 +206,34 @@ func (r *NotebookReconciler) Reconcile(req ctrl.Request) (ctrl.Result, error) {\n \t} else {\n \t\t// Got the pod\n \t\tpodFound = true\n-\t\tif len(pod.Status.ContainerStatuses) > 0 &&\n-\t\t\tpod.Status.ContainerStatuses[0].State != instance.Status.ContainerState {\n-\t\t\tlog.Info(\"Updating container state: \", \"namespace\", instance.Namespace, \"name\", instance.Name)\n-\t\t\tcs := pod.Status.ContainerStatuses[0].State\n-\t\t\tinstance.Status.ContainerState = cs\n-\t\t\toldConditions := instance.Status.Conditions\n-\t\t\tnewCondition := getNextCondition(cs)\n-\t\t\t// Append new condition\n-\t\t\tif len(oldConditions) == 0 || oldConditions[0].Type != newCondition.Type ||\n-\t\t\t\toldConditions[0].Reason != newCondition.Reason ||\n-\t\t\t\toldConditions[0].Message != newCondition.Message {\n-\t\t\t\tlog.Info(\"Appending to conditions: \", \"namespace\", instance.Namespace, \"name\", instance.Name, \"type\", newCondition.Type, \"reason\", newCondition.Reason, \"message\", newCondition.Message)\n-\t\t\t\tinstance.Status.Conditions = append([]v1beta1.NotebookCondition{newCondition}, oldConditions...)\n+\n+\t\tif len(pod.Status.ContainerStatuses) > 0 {\n+\t\t\tnotebookContainerFound := false\n+\t\t\tfor i := range pod.Status.ContainerStatuses {\n+\t\t\t\tif pod.Status.ContainerStatuses[i].Name == instance.Name &&\n+\t\t\t\t\tpod.Status.ContainerStatuses[i].State != instance.Status.ContainerState {\n+\t\t\t\t\tlog.Info(\"Updating Notebook CR state: \", \"namespace\", instance.Namespace, \"name\", instance.Name)\n+\t\t\t\t\tcs := pod.Status.ContainerStatuses[i].State\n+\t\t\t\t\tinstance.Status.ContainerState = cs\n+\t\t\t\t\toldConditions := instance.Status.Conditions\n+\t\t\t\t\tnewCondition := getNextCondition(cs)\n+\t\t\t\t\t// Append new condition\n+\t\t\t\t\tif len(oldConditions) == 0 || oldConditions[0].Type != newCondition.Type ||\n+\t\t\t\t\t\toldConditions[0].Reason != newCondition.Reason ||\n+\t\t\t\t\t\toldConditions[0].Message != newCondition.Message {\n+\t\t\t\t\t\tlog.Info(\"Appending to conditions: \", \"namespace\", instance.Namespace, \"name\", instance.Name, \"type\", newCondition.Type, \"reason\", newCondition.Reason, \"message\", newCondition.Message)\n+\t\t\t\t\t\tinstance.Status.Conditions = append([]v1beta1.NotebookCondition{newCondition}, oldConditions...)\n+\t\t\t\t\t}\n+\t\t\t\t\terr = r.Status().Update(ctx, instance)\n+\t\t\t\t\tif err != nil {\n+\t\t\t\t\t\treturn ctrl.Result{}, err\n+\t\t\t\t\t}\n+\t\t\t\t\tnotebookContainerFound = true\n+\t\t\t\t\tbreak\n+\t\t\t\t}\n \t\t\t}\n-\t\t\terr = r.Status().Update(ctx, instance)\n-\t\t\tif err != nil {\n-\t\t\t\treturn ctrl.Result{}, err\n+\t\t\tif !notebookContainerFound {\n+\t\t\t\tlog.Info(\"WARNING: Notebook container is not found, so could not update State of Notebook CR\")", - "comment_created_at": "2020-09-21T08:29:05+00:00", - "comment_author": "yanniszark", - "comment_body": "I don't think that a log.Info should emit a `WARNING` message.\r\nIf the message is indeed a warning, use `log.Warn`.\r\nThis sounds like an error though.", - "pr_file_module": null - }, - { - "comment_id": "491884574", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 5314, - "pr_file": "components/notebook-controller/controllers/notebook_controller.go", - "discussion_id": "491869454", - "commented_code": "@@ -206,23 +206,34 @@ func (r *NotebookReconciler) Reconcile(req ctrl.Request) (ctrl.Result, error) {\n \t} else {\n \t\t// Got the pod\n \t\tpodFound = true\n-\t\tif len(pod.Status.ContainerStatuses) > 0 &&\n-\t\t\tpod.Status.ContainerStatuses[0].State != instance.Status.ContainerState {\n-\t\t\tlog.Info(\"Updating container state: \", \"namespace\", instance.Namespace, \"name\", instance.Name)\n-\t\t\tcs := pod.Status.ContainerStatuses[0].State\n-\t\t\tinstance.Status.ContainerState = cs\n-\t\t\toldConditions := instance.Status.Conditions\n-\t\t\tnewCondition := getNextCondition(cs)\n-\t\t\t// Append new condition\n-\t\t\tif len(oldConditions) == 0 || oldConditions[0].Type != newCondition.Type ||\n-\t\t\t\toldConditions[0].Reason != newCondition.Reason ||\n-\t\t\t\toldConditions[0].Message != newCondition.Message {\n-\t\t\t\tlog.Info(\"Appending to conditions: \", \"namespace\", instance.Namespace, \"name\", instance.Name, \"type\", newCondition.Type, \"reason\", newCondition.Reason, \"message\", newCondition.Message)\n-\t\t\t\tinstance.Status.Conditions = append([]v1beta1.NotebookCondition{newCondition}, oldConditions...)\n+\n+\t\tif len(pod.Status.ContainerStatuses) > 0 {\n+\t\t\tnotebookContainerFound := false\n+\t\t\tfor i := range pod.Status.ContainerStatuses {\n+\t\t\t\tif pod.Status.ContainerStatuses[i].Name == instance.Name &&\n+\t\t\t\t\tpod.Status.ContainerStatuses[i].State != instance.Status.ContainerState {\n+\t\t\t\t\tlog.Info(\"Updating Notebook CR state: \", \"namespace\", instance.Namespace, \"name\", instance.Name)\n+\t\t\t\t\tcs := pod.Status.ContainerStatuses[i].State\n+\t\t\t\t\tinstance.Status.ContainerState = cs\n+\t\t\t\t\toldConditions := instance.Status.Conditions\n+\t\t\t\t\tnewCondition := getNextCondition(cs)\n+\t\t\t\t\t// Append new condition\n+\t\t\t\t\tif len(oldConditions) == 0 || oldConditions[0].Type != newCondition.Type ||\n+\t\t\t\t\t\toldConditions[0].Reason != newCondition.Reason ||\n+\t\t\t\t\t\toldConditions[0].Message != newCondition.Message {\n+\t\t\t\t\t\tlog.Info(\"Appending to conditions: \", \"namespace\", instance.Namespace, \"name\", instance.Name, \"type\", newCondition.Type, \"reason\", newCondition.Reason, \"message\", newCondition.Message)\n+\t\t\t\t\t\tinstance.Status.Conditions = append([]v1beta1.NotebookCondition{newCondition}, oldConditions...)\n+\t\t\t\t\t}\n+\t\t\t\t\terr = r.Status().Update(ctx, instance)\n+\t\t\t\t\tif err != nil {\n+\t\t\t\t\t\treturn ctrl.Result{}, err\n+\t\t\t\t\t}\n+\t\t\t\t\tnotebookContainerFound = true\n+\t\t\t\t\tbreak\n+\t\t\t\t}\n \t\t\t}\n-\t\t\terr = r.Status().Update(ctx, instance)\n-\t\t\tif err != nil {\n-\t\t\t\treturn ctrl.Result{}, err\n+\t\t\tif !notebookContainerFound {\n+\t\t\t\tlog.Info(\"WARNING: Notebook container is not found, so could not update State of Notebook CR\")", - "comment_created_at": "2020-09-21T08:55:09+00:00", - "comment_author": "kimwnasptd", - "comment_body": "I agree with @yanniszark.\r\nAlthough AFAIK `logr` does not have a [Warning level](https://github.com/go-logr/logr#why-not-more-named-levels-like-warning). So we should do this an Error instead.\r\n\r\n@gilbeckers could you also make the error message more explicit as to why it couldn't find the Notebook Container? Something like: `Could not find the Notebook container. No container has the same name with the CR '{notebook-name}'. Will not update the status of the CR.`", - "pr_file_module": null - }, - { - "comment_id": "492286533", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 5314, - "pr_file": "components/notebook-controller/controllers/notebook_controller.go", - "discussion_id": "491869454", - "commented_code": "@@ -206,23 +206,34 @@ func (r *NotebookReconciler) Reconcile(req ctrl.Request) (ctrl.Result, error) {\n \t} else {\n \t\t// Got the pod\n \t\tpodFound = true\n-\t\tif len(pod.Status.ContainerStatuses) > 0 &&\n-\t\t\tpod.Status.ContainerStatuses[0].State != instance.Status.ContainerState {\n-\t\t\tlog.Info(\"Updating container state: \", \"namespace\", instance.Namespace, \"name\", instance.Name)\n-\t\t\tcs := pod.Status.ContainerStatuses[0].State\n-\t\t\tinstance.Status.ContainerState = cs\n-\t\t\toldConditions := instance.Status.Conditions\n-\t\t\tnewCondition := getNextCondition(cs)\n-\t\t\t// Append new condition\n-\t\t\tif len(oldConditions) == 0 || oldConditions[0].Type != newCondition.Type ||\n-\t\t\t\toldConditions[0].Reason != newCondition.Reason ||\n-\t\t\t\toldConditions[0].Message != newCondition.Message {\n-\t\t\t\tlog.Info(\"Appending to conditions: \", \"namespace\", instance.Namespace, \"name\", instance.Name, \"type\", newCondition.Type, \"reason\", newCondition.Reason, \"message\", newCondition.Message)\n-\t\t\t\tinstance.Status.Conditions = append([]v1beta1.NotebookCondition{newCondition}, oldConditions...)\n+\n+\t\tif len(pod.Status.ContainerStatuses) > 0 {\n+\t\t\tnotebookContainerFound := false\n+\t\t\tfor i := range pod.Status.ContainerStatuses {\n+\t\t\t\tif pod.Status.ContainerStatuses[i].Name == instance.Name &&\n+\t\t\t\t\tpod.Status.ContainerStatuses[i].State != instance.Status.ContainerState {\n+\t\t\t\t\tlog.Info(\"Updating Notebook CR state: \", \"namespace\", instance.Namespace, \"name\", instance.Name)\n+\t\t\t\t\tcs := pod.Status.ContainerStatuses[i].State\n+\t\t\t\t\tinstance.Status.ContainerState = cs\n+\t\t\t\t\toldConditions := instance.Status.Conditions\n+\t\t\t\t\tnewCondition := getNextCondition(cs)\n+\t\t\t\t\t// Append new condition\n+\t\t\t\t\tif len(oldConditions) == 0 || oldConditions[0].Type != newCondition.Type ||\n+\t\t\t\t\t\toldConditions[0].Reason != newCondition.Reason ||\n+\t\t\t\t\t\toldConditions[0].Message != newCondition.Message {\n+\t\t\t\t\t\tlog.Info(\"Appending to conditions: \", \"namespace\", instance.Namespace, \"name\", instance.Name, \"type\", newCondition.Type, \"reason\", newCondition.Reason, \"message\", newCondition.Message)\n+\t\t\t\t\t\tinstance.Status.Conditions = append([]v1beta1.NotebookCondition{newCondition}, oldConditions...)\n+\t\t\t\t\t}\n+\t\t\t\t\terr = r.Status().Update(ctx, instance)\n+\t\t\t\t\tif err != nil {\n+\t\t\t\t\t\treturn ctrl.Result{}, err\n+\t\t\t\t\t}\n+\t\t\t\t\tnotebookContainerFound = true\n+\t\t\t\t\tbreak\n+\t\t\t\t}\n \t\t\t}\n-\t\t\terr = r.Status().Update(ctx, instance)\n-\t\t\tif err != nil {\n-\t\t\t\treturn ctrl.Result{}, err\n+\t\t\tif !notebookContainerFound {\n+\t\t\t\tlog.Info(\"WARNING: Notebook container is not found, so could not update State of Notebook CR\")", - "comment_created_at": "2020-09-21T19:11:06+00:00", - "comment_author": "gilbeckers", - "comment_body": "Done :+1: ", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "320260335", - "pr_number": 4040, - "pr_file": "bootstrap/pkg/kfapp/apply/apply.go", - "created_at": "2019-09-03T13:06:07+00:00", - "commented_code": "package apply\n\nimport (\n\t\"errors\"\n\t\"fmt\"\n\t\"io\"\n\t\"os\"\n\n\tkftypes \"github.com/kubeflow/kubeflow/bootstrap/v3/pkg/apis/apps\"\n\tkfdefsv3 \"github.com/kubeflow/kubeflow/bootstrap/v3/pkg/apis/apps/kfdef/v1alpha1\"\n\t\"github.com/kubeflow/kubeflow/bootstrap/v3/pkg/kfapp/coordinator\"\n\tlog \"github.com/sirupsen/logrus\"\n)\n\n// BootstrapKubeflow is used by the kfctl apply sub-command to take in a configfile\n// as a flag and boostrap a KfApp and deploy it\nfunc BootstrapKubeflow(configFilePath string, kfResource kftypes.ResourceEnum) error {\n\t// Set default app name to kf-app\n\tappName := \"kf-app\"\n\n\t// Construct KfDef from the configFilePath provided\n\tkfDef := &kfdefsv3.KfDef{}\n\tkfDef, err := kfdefsv3.LoadKFDefFromURI(configFilePath)\n\tif err != nil {\n\t\tlog.Printf(\"Unable to create KfDef from config file: %v\", err)\n\t}\n\tif kfDef.Name != \"\" {\n\t\tlog.Warnf(\"Overriding KfDef.Spec.Name; old value %v; new value %v\", kfDef.Name, appName)\n\t}\n\tkfDef.Name = appName\n\tisValid, msg := kfDef.IsValid()\n\tif !isValid {\n\t\tlog.Printf(\"Invalid kfdef: %v\", isValid)\n\t\tlog.Printf(\"Error validating generated KfDef, please check config file validity: %v\", msg)\n\t}\n\tcwd, err := os.Getwd()\n\tif err != nil {\n\t\tlog.Printf(\"Error getting current working directory: %v\", err)\n\t}\n\tfmt.Println(\"Present working directory is: %v\", cwd)", - "repo_full_name": "kubeflow/kubeflow", - "discussion_comments": [ - { - "comment_id": "320260335", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 4040, - "pr_file": "bootstrap/pkg/kfapp/apply/apply.go", - "discussion_id": "320260335", - "commented_code": "@@ -0,0 +1,103 @@\n+package apply\n+\n+import (\n+\t\"errors\"\n+\t\"fmt\"\n+\t\"io\"\n+\t\"os\"\n+\n+\tkftypes \"github.com/kubeflow/kubeflow/bootstrap/v3/pkg/apis/apps\"\n+\tkfdefsv3 \"github.com/kubeflow/kubeflow/bootstrap/v3/pkg/apis/apps/kfdef/v1alpha1\"\n+\t\"github.com/kubeflow/kubeflow/bootstrap/v3/pkg/kfapp/coordinator\"\n+\tlog \"github.com/sirupsen/logrus\"\n+)\n+\n+// BootstrapKubeflow is used by the kfctl apply sub-command to take in a configfile\n+// as a flag and boostrap a KfApp and deploy it\n+func BootstrapKubeflow(configFilePath string, kfResource kftypes.ResourceEnum) error {\n+\t// Set default app name to kf-app\n+\tappName := \"kf-app\"\n+\n+\t// Construct KfDef from the configFilePath provided\n+\tkfDef := &kfdefsv3.KfDef{}\n+\tkfDef, err := kfdefsv3.LoadKFDefFromURI(configFilePath)\n+\tif err != nil {\n+\t\tlog.Printf(\"Unable to create KfDef from config file: %v\", err)\n+\t}\n+\tif kfDef.Name != \"\" {\n+\t\tlog.Warnf(\"Overriding KfDef.Spec.Name; old value %v; new value %v\", kfDef.Name, appName)\n+\t}\n+\tkfDef.Name = appName\n+\tisValid, msg := kfDef.IsValid()\n+\tif !isValid {\n+\t\tlog.Printf(\"Invalid kfdef: %v\", isValid)\n+\t\tlog.Printf(\"Error validating generated KfDef, please check config file validity: %v\", msg)\n+\t}\n+\tcwd, err := os.Getwd()\n+\tif err != nil {\n+\t\tlog.Printf(\"Error getting current working directory: %v\", err)\n+\t}\n+\tfmt.Println(\"Present working directory is: %v\", cwd)", - "comment_created_at": "2019-09-03T13:06:07+00:00", - "comment_author": "yanniszark", - "comment_body": "Can you use log instead of fmt and change all instances of Println to a log-level function? (eg Info, Warn).", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/kubeflow-use-css-classes-properly.json b/_reviewers/kubeflow-use-css-classes-properly.json new file mode 100644 index 0000000..4759605 --- /dev/null +++ b/_reviewers/kubeflow-use-css-classes-properly.json @@ -0,0 +1,46 @@ +[ + { + "discussion_id": "595474491", + "pr_number": 5646, + "pr_file": "components/crud-web-apps/jupyter/frontend/src/app/pages/form/form-default/form-image/form-image.component.html", + "created_at": "2021-03-16T19:20:17+00:00", + "commented_code": "ML packages\"\n icon=\"fa:fab:docker\"\n>\n
\n
Choose server type:
\n \n \n ", + "repo_full_name": "kubeflow/kubeflow", + "discussion_comments": [ + { + "comment_id": "595474491", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 5646, + "pr_file": "components/crud-web-apps/jupyter/frontend/src/app/pages/form/form-default/form-image/form-image.component.html", + "discussion_id": "595474491", + "commented_code": "@@ -4,8 +4,23 @@\n ML packages\"\n icon=\"fa:fab:docker\"\n >\n+
\n+
Choose server type:
\n+ \n+ \n+ ", + "comment_created_at": "2021-03-16T19:20:17+00:00", + "comment_author": "kimwnasptd", + "comment_body": "Don't modify html elements directly with styles, always use a class instead that wraps what the style changes are. Also try to avoid as much as possible `!important` in CSS. Instead try to use more specific css selectors.\r\n\r\nHere you could introduce and use the following CSS class for each ``\r\n```css\r\n.server-type {\r\n height: 32px;\r\n width: 150px;\r\n}\r\n```", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "595475080", + "pr_number": 5646, + "pr_file": "components/crud-web-apps/jupyter/frontend/src/app/pages/form/form-default/form-image/form-image.component.html", + "created_at": "2021-03-16T19:21:12+00:00", + "commented_code": "ML packages\"\n icon=\"fa:fab:docker\"\n>\n
\n
Choose server type:
\n ", + "repo_full_name": "kubeflow/kubeflow", + "discussion_comments": [ + { + "comment_id": "595475080", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 5646, + "pr_file": "components/crud-web-apps/jupyter/frontend/src/app/pages/form/form-default/form-image/form-image.component.html", + "discussion_id": "595475080", + "commented_code": "@@ -4,8 +4,23 @@\n ML packages\"\n icon=\"fa:fab:docker\"\n >\n+
\n+
Choose server type:
\n+ ", + "comment_created_at": "2021-03-16T19:21:12+00:00", + "comment_author": "kimwnasptd", + "comment_body": "Add a wrapper class here to give this group a bottom margin\r\n\r\n```css\r\n.server-type-wrapper {\r\n margin-bottom: 1rem;\r\n}", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/kubeflow-use-css-classes-properly.md b/_reviewers/kubeflow-use-css-classes-properly.md index c322ece..6314173 100644 --- a/_reviewers/kubeflow-use-css-classes-properly.md +++ b/_reviewers/kubeflow-use-css-classes-properly.md @@ -34,51 +34,3 @@ Good example: ``` This approach makes your styles more maintainable, easier to debug, and allows for better responsiveness. CSS classes can be reused across components and modified centrally rather than hunting through templates for inline styles. - - -[ - { - "discussion_id": "595474491", - "pr_number": 5646, - "pr_file": "components/crud-web-apps/jupyter/frontend/src/app/pages/form/form-default/form-image/form-image.component.html", - "created_at": "2021-03-16T19:20:17+00:00", - "commented_code": "ML packages\"\n icon=\"fa:fab:docker\"\n>\n
\n
Choose server type:
\n \n \n ", - "repo_full_name": "kubeflow/kubeflow", - "discussion_comments": [ - { - "comment_id": "595474491", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 5646, - "pr_file": "components/crud-web-apps/jupyter/frontend/src/app/pages/form/form-default/form-image/form-image.component.html", - "discussion_id": "595474491", - "commented_code": "@@ -4,8 +4,23 @@\n ML packages\"\n icon=\"fa:fab:docker\"\n >\n+
\n+
Choose server type:
\n+ \n+ \n+ ", - "comment_created_at": "2021-03-16T19:20:17+00:00", - "comment_author": "kimwnasptd", - "comment_body": "Don't modify html elements directly with styles, always use a class instead that wraps what the style changes are. Also try to avoid as much as possible `!important` in CSS. Instead try to use more specific css selectors.\r\n\r\nHere you could introduce and use the following CSS class for each ``\r\n```css\r\n.server-type {\r\n height: 32px;\r\n width: 150px;\r\n}\r\n```", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "595475080", - "pr_number": 5646, - "pr_file": "components/crud-web-apps/jupyter/frontend/src/app/pages/form/form-default/form-image/form-image.component.html", - "created_at": "2021-03-16T19:21:12+00:00", - "commented_code": "ML packages\"\n icon=\"fa:fab:docker\"\n>\n
\n
Choose server type:
\n ", - "repo_full_name": "kubeflow/kubeflow", - "discussion_comments": [ - { - "comment_id": "595475080", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 5646, - "pr_file": "components/crud-web-apps/jupyter/frontend/src/app/pages/form/form-default/form-image/form-image.component.html", - "discussion_id": "595475080", - "commented_code": "@@ -4,8 +4,23 @@\n ML packages\"\n icon=\"fa:fab:docker\"\n >\n+
\n+
Choose server type:
\n+ ", - "comment_created_at": "2021-03-16T19:21:12+00:00", - "comment_author": "kimwnasptd", - "comment_body": "Add a wrapper class here to give this group a bottom margin\r\n\r\n```css\r\n.server-type-wrapper {\r\n margin-bottom: 1rem;\r\n}", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/kubeflow-use-enums-for-state.json b/_reviewers/kubeflow-use-enums-for-state.json new file mode 100644 index 0000000..0b4403f --- /dev/null +++ b/_reviewers/kubeflow-use-enums-for-state.json @@ -0,0 +1,94 @@ +[ + { + "discussion_id": "1090467817", + "pr_number": 6895, + "pr_file": "components/centraldashboard-angular/frontend/src/app/shared/utils.ts", + "created_at": "2023-01-30T10:57:01+00:00", + "commented_code": "/**\n * We treat URLs with or without a trailing slash as the same\n * URL. Thus, in order to compare URLs, we need to use\n * appendBackslash for both URLS to avoid false statements\n * in cases where they only differ in the trailing slash.\n */\nexport function equalUrlPaths(firstUrl: string, secondUrl: string | undefined) {\n if (!firstUrl && !secondUrl) {\n console.warn(`Got undefined URLs ${firstUrl} and ${secondUrl}`);\n return true;\n }", + "repo_full_name": "kubeflow/kubeflow", + "discussion_comments": [ + { + "comment_id": "1090467817", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 6895, + "pr_file": "components/centraldashboard-angular/frontend/src/app/shared/utils.ts", + "discussion_id": "1090467817", + "commented_code": "@@ -0,0 +1,37 @@\n+/**\n+ * We treat URLs with or without a trailing slash as the same\n+ * URL. Thus, in order to compare URLs, we need to use\n+ * appendBackslash for both URLS to avoid false statements\n+ * in cases where they only differ in the trailing slash.\n+ */\n+export function equalUrlPaths(firstUrl: string, secondUrl: string | undefined) {\n+ if (!firstUrl && !secondUrl) {\n+ console.warn(`Got undefined URLs ${firstUrl} and ${secondUrl}`);\n+ return true;\n+ }", + "comment_created_at": "2023-01-30T10:57:01+00:00", + "comment_author": "tasos-ale", + "comment_body": "This is not this PR, but maybe we want to think if we want to consider `undefined` URLs as equals.", + "pr_file_module": null + }, + { + "comment_id": "1091898423", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 6895, + "pr_file": "components/centraldashboard-angular/frontend/src/app/shared/utils.ts", + "discussion_id": "1090467817", + "commented_code": "@@ -0,0 +1,37 @@\n+/**\n+ * We treat URLs with or without a trailing slash as the same\n+ * URL. Thus, in order to compare URLs, we need to use\n+ * appendBackslash for both URLS to avoid false statements\n+ * in cases where they only differ in the trailing slash.\n+ */\n+export function equalUrlPaths(firstUrl: string, secondUrl: string | undefined) {\n+ if (!firstUrl && !secondUrl) {\n+ console.warn(`Got undefined URLs ${firstUrl} and ${secondUrl}`);\n+ return true;\n+ }", + "comment_created_at": "2023-01-31T13:06:10+00:00", + "comment_author": "orfeas-k", + "comment_body": "Normally, we don't stumble upon 2 undefined URLs (we did with a previous implementation of URLs syncing). Thus, I think we can go ahead and remove this `if` completely @tasos-ale ", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "596628238", + "pr_number": 5710, + "pr_file": "components/crud-web-apps/common/frontend/kubeflow-common-lib/projects/kubeflow/src/lib/services/namespace.service.ts", + "created_at": "2021-03-18T08:04:47+00:00", + "commented_code": "// Observable string sources\n private selectedNamespaceSource = new ReplaySubject(1);\n private dashboardConnectedSource = new BehaviorSubject(true);", + "repo_full_name": "kubeflow/kubeflow", + "discussion_comments": [ + { + "comment_id": "596628238", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 5710, + "pr_file": "components/crud-web-apps/common/frontend/kubeflow-common-lib/projects/kubeflow/src/lib/services/namespace.service.ts", + "discussion_id": "596628238", + "commented_code": "@@ -15,9 +15,11 @@ export class NamespaceService {\n \n // Observable string sources\n private selectedNamespaceSource = new ReplaySubject(1);\n+ private dashboardConnectedSource = new BehaviorSubject(true);", + "comment_created_at": "2021-03-18T08:04:47+00:00", + "comment_author": "tasos-ale", + "comment_body": "Hello @kimwnasptd. I think this Observable should not start with `true` as a default value. If someone uses the `dashboardConnected$` property and the `centraldashboard` is not ready, it will wrongly inform the user that it is ready. Maybe you can use `null` as the unknown state or even better use an enum. Something like this:\r\n```\r\nenum DashboardState {\r\n Unknown = 0, // or Connecting\r\n Connected,\r\n Disconnected,\r\n}\r\n```", + "pr_file_module": null + }, + { + "comment_id": "597086580", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 5710, + "pr_file": "components/crud-web-apps/common/frontend/kubeflow-common-lib/projects/kubeflow/src/lib/services/namespace.service.ts", + "discussion_id": "596628238", + "commented_code": "@@ -15,9 +15,11 @@ export class NamespaceService {\n \n // Observable string sources\n private selectedNamespaceSource = new ReplaySubject(1);\n+ private dashboardConnectedSource = new BehaviorSubject(true);", + "comment_created_at": "2021-03-18T17:18:56+00:00", + "comment_author": "kimwnasptd", + "comment_body": "Thank you for the feedback @tasos-ale! \r\n\r\nThe reason I used `true` as a default value here is because if it starts as `null` then the app will think that the dashboard is not present and try to make a request to fetch the namespaces. \r\n\r\nSo in this case I take for granted the opposite, which is expect the dashboard to be present and if after the check it's missing then the apps will make the request to fetch the namespaces", + "pr_file_module": null + }, + { + "comment_id": "597087081", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 5710, + "pr_file": "components/crud-web-apps/common/frontend/kubeflow-common-lib/projects/kubeflow/src/lib/services/namespace.service.ts", + "discussion_id": "596628238", + "commented_code": "@@ -15,9 +15,11 @@ export class NamespaceService {\n \n // Observable string sources\n private selectedNamespaceSource = new ReplaySubject(1);\n+ private dashboardConnectedSource = new BehaviorSubject(true);", + "comment_created_at": "2021-03-18T17:19:36+00:00", + "comment_author": "kimwnasptd", + "comment_body": "but indeed the enumeration is a better way to express this. I'll use this enum instead of a boolean value and the apps will fetch the namespaces only if the state is `Disconnected`", + "pr_file_module": null + }, + { + "comment_id": "597155578", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 5710, + "pr_file": "components/crud-web-apps/common/frontend/kubeflow-common-lib/projects/kubeflow/src/lib/services/namespace.service.ts", + "discussion_id": "596628238", + "commented_code": "@@ -15,9 +15,11 @@ export class NamespaceService {\n \n // Observable string sources\n private selectedNamespaceSource = new ReplaySubject(1);\n+ private dashboardConnectedSource = new BehaviorSubject(true);", + "comment_created_at": "2021-03-18T18:54:56+00:00", + "comment_author": "kimwnasptd", + "comment_body": "pushed a commit for the described changes", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/kubeflow-use-enums-for-state.md b/_reviewers/kubeflow-use-enums-for-state.md index 44344a1..a945835 100644 --- a/_reviewers/kubeflow-use-enums-for-state.md +++ b/_reviewers/kubeflow-use-enums-for-state.md @@ -30,99 +30,3 @@ private dashboardStateSource = new BehaviorSubject(DashboardStat ``` This approach prevents misleading default values, improves code readability, and provides better type safety. When making decisions based on state, the enum forces you to handle all possibilities, reducing the likelihood of bugs related to unexpected null or undefined values. - - -[ - { - "discussion_id": "1090467817", - "pr_number": 6895, - "pr_file": "components/centraldashboard-angular/frontend/src/app/shared/utils.ts", - "created_at": "2023-01-30T10:57:01+00:00", - "commented_code": "/**\n * We treat URLs with or without a trailing slash as the same\n * URL. Thus, in order to compare URLs, we need to use\n * appendBackslash for both URLS to avoid false statements\n * in cases where they only differ in the trailing slash.\n */\nexport function equalUrlPaths(firstUrl: string, secondUrl: string | undefined) {\n if (!firstUrl && !secondUrl) {\n console.warn(`Got undefined URLs ${firstUrl} and ${secondUrl}`);\n return true;\n }", - "repo_full_name": "kubeflow/kubeflow", - "discussion_comments": [ - { - "comment_id": "1090467817", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 6895, - "pr_file": "components/centraldashboard-angular/frontend/src/app/shared/utils.ts", - "discussion_id": "1090467817", - "commented_code": "@@ -0,0 +1,37 @@\n+/**\n+ * We treat URLs with or without a trailing slash as the same\n+ * URL. Thus, in order to compare URLs, we need to use\n+ * appendBackslash for both URLS to avoid false statements\n+ * in cases where they only differ in the trailing slash.\n+ */\n+export function equalUrlPaths(firstUrl: string, secondUrl: string | undefined) {\n+ if (!firstUrl && !secondUrl) {\n+ console.warn(`Got undefined URLs ${firstUrl} and ${secondUrl}`);\n+ return true;\n+ }", - "comment_created_at": "2023-01-30T10:57:01+00:00", - "comment_author": "tasos-ale", - "comment_body": "This is not this PR, but maybe we want to think if we want to consider `undefined` URLs as equals.", - "pr_file_module": null - }, - { - "comment_id": "1091898423", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 6895, - "pr_file": "components/centraldashboard-angular/frontend/src/app/shared/utils.ts", - "discussion_id": "1090467817", - "commented_code": "@@ -0,0 +1,37 @@\n+/**\n+ * We treat URLs with or without a trailing slash as the same\n+ * URL. Thus, in order to compare URLs, we need to use\n+ * appendBackslash for both URLS to avoid false statements\n+ * in cases where they only differ in the trailing slash.\n+ */\n+export function equalUrlPaths(firstUrl: string, secondUrl: string | undefined) {\n+ if (!firstUrl && !secondUrl) {\n+ console.warn(`Got undefined URLs ${firstUrl} and ${secondUrl}`);\n+ return true;\n+ }", - "comment_created_at": "2023-01-31T13:06:10+00:00", - "comment_author": "orfeas-k", - "comment_body": "Normally, we don't stumble upon 2 undefined URLs (we did with a previous implementation of URLs syncing). Thus, I think we can go ahead and remove this `if` completely @tasos-ale ", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "596628238", - "pr_number": 5710, - "pr_file": "components/crud-web-apps/common/frontend/kubeflow-common-lib/projects/kubeflow/src/lib/services/namespace.service.ts", - "created_at": "2021-03-18T08:04:47+00:00", - "commented_code": "// Observable string sources\n private selectedNamespaceSource = new ReplaySubject(1);\n private dashboardConnectedSource = new BehaviorSubject(true);", - "repo_full_name": "kubeflow/kubeflow", - "discussion_comments": [ - { - "comment_id": "596628238", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 5710, - "pr_file": "components/crud-web-apps/common/frontend/kubeflow-common-lib/projects/kubeflow/src/lib/services/namespace.service.ts", - "discussion_id": "596628238", - "commented_code": "@@ -15,9 +15,11 @@ export class NamespaceService {\n \n // Observable string sources\n private selectedNamespaceSource = new ReplaySubject(1);\n+ private dashboardConnectedSource = new BehaviorSubject(true);", - "comment_created_at": "2021-03-18T08:04:47+00:00", - "comment_author": "tasos-ale", - "comment_body": "Hello @kimwnasptd. I think this Observable should not start with `true` as a default value. If someone uses the `dashboardConnected$` property and the `centraldashboard` is not ready, it will wrongly inform the user that it is ready. Maybe you can use `null` as the unknown state or even better use an enum. Something like this:\r\n```\r\nenum DashboardState {\r\n Unknown = 0, // or Connecting\r\n Connected,\r\n Disconnected,\r\n}\r\n```", - "pr_file_module": null - }, - { - "comment_id": "597086580", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 5710, - "pr_file": "components/crud-web-apps/common/frontend/kubeflow-common-lib/projects/kubeflow/src/lib/services/namespace.service.ts", - "discussion_id": "596628238", - "commented_code": "@@ -15,9 +15,11 @@ export class NamespaceService {\n \n // Observable string sources\n private selectedNamespaceSource = new ReplaySubject(1);\n+ private dashboardConnectedSource = new BehaviorSubject(true);", - "comment_created_at": "2021-03-18T17:18:56+00:00", - "comment_author": "kimwnasptd", - "comment_body": "Thank you for the feedback @tasos-ale! \r\n\r\nThe reason I used `true` as a default value here is because if it starts as `null` then the app will think that the dashboard is not present and try to make a request to fetch the namespaces. \r\n\r\nSo in this case I take for granted the opposite, which is expect the dashboard to be present and if after the check it's missing then the apps will make the request to fetch the namespaces", - "pr_file_module": null - }, - { - "comment_id": "597087081", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 5710, - "pr_file": "components/crud-web-apps/common/frontend/kubeflow-common-lib/projects/kubeflow/src/lib/services/namespace.service.ts", - "discussion_id": "596628238", - "commented_code": "@@ -15,9 +15,11 @@ export class NamespaceService {\n \n // Observable string sources\n private selectedNamespaceSource = new ReplaySubject(1);\n+ private dashboardConnectedSource = new BehaviorSubject(true);", - "comment_created_at": "2021-03-18T17:19:36+00:00", - "comment_author": "kimwnasptd", - "comment_body": "but indeed the enumeration is a better way to express this. I'll use this enum instead of a boolean value and the apps will fetch the namespaces only if the state is `Disconnected`", - "pr_file_module": null - }, - { - "comment_id": "597155578", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 5710, - "pr_file": "components/crud-web-apps/common/frontend/kubeflow-common-lib/projects/kubeflow/src/lib/services/namespace.service.ts", - "discussion_id": "596628238", - "commented_code": "@@ -15,9 +15,11 @@ export class NamespaceService {\n \n // Observable string sources\n private selectedNamespaceSource = new ReplaySubject(1);\n+ private dashboardConnectedSource = new BehaviorSubject(true);", - "comment_created_at": "2021-03-18T18:54:56+00:00", - "comment_author": "kimwnasptd", - "comment_body": "pushed a commit for the described changes", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/kubeflow-use-modern-javascript-idioms.json b/_reviewers/kubeflow-use-modern-javascript-idioms.json new file mode 100644 index 0000000..d6ad86b --- /dev/null +++ b/_reviewers/kubeflow-use-modern-javascript-idioms.json @@ -0,0 +1,160 @@ +[ + { + "discussion_id": "640449762", + "pr_number": 5871, + "pr_file": "components/centraldashboard/public/components/main-page.js", + "created_at": "2021-05-27T09:22:44+00:00", + "commented_code": "// A temporary workaround is to use \"this.queryParams\" as an input\n // instead of \"queryParamsChange.base\".\n // const queryParams = queryParamsChange.base;\n const queryParams = this.queryParams;\n if (queryParams) {\n if (queryParams[\"ns\"]) {\n if (queryParams[\"ns\"] !== null){\n return this.buildHref(", + "repo_full_name": "kubeflow/kubeflow", + "discussion_comments": [ + { + "comment_id": "640449762", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 5871, + "pr_file": "components/centraldashboard/public/components/main-page.js", + "discussion_id": "640449762", + "commented_code": "@@ -291,6 +291,18 @@ export class MainPage extends utilitiesMixin(PolymerElement) {\n // A temporary workaround is to use \"this.queryParams\" as an input\n // instead of \"queryParamsChange.base\".\n // const queryParams = queryParamsChange.base;\n+ const queryParams = this.queryParams;\n+ if (queryParams) {\n+ if (queryParams[\"ns\"]) {\n+ if (queryParams[\"ns\"] !== null){\n+ return this.buildHref(", + "comment_created_at": "2021-05-27T09:22:44+00:00", + "comment_author": "kimwnasptd", + "comment_body": "Could you restructure the if statements in order to reduce the nesting levels?\r\n\r\nSomething like:\r\n```javascript\r\nif (!queryParams || !queryParams[\"ns\"]) {\r\n return this.buildHref(href, this.queryParams);\r\n}\r\n\r\nreturn this.buildHref(href.replace('{ns}', queryParams[\"ns\"]), queryParams);\r\n```", + "pr_file_module": null + }, + { + "comment_id": "640607721", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 5871, + "pr_file": "components/centraldashboard/public/components/main-page.js", + "discussion_id": "640449762", + "commented_code": "@@ -291,6 +291,18 @@ export class MainPage extends utilitiesMixin(PolymerElement) {\n // A temporary workaround is to use \"this.queryParams\" as an input\n // instead of \"queryParamsChange.base\".\n // const queryParams = queryParamsChange.base;\n+ const queryParams = this.queryParams;\n+ if (queryParams) {\n+ if (queryParams[\"ns\"]) {\n+ if (queryParams[\"ns\"] !== null){\n+ return this.buildHref(", + "comment_created_at": "2021-05-27T13:09:59+00:00", + "comment_author": "toshi-k", + "comment_body": "@kimwnasptd Good idea ! I corrected this part, please look at my latest PR.\r\nI inserted a line break not to exceed 80 chars.", + "pr_file_module": null + }, + { + "comment_id": "642539612", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 5871, + "pr_file": "components/centraldashboard/public/components/main-page.js", + "discussion_id": "640449762", + "commented_code": "@@ -291,6 +291,18 @@ export class MainPage extends utilitiesMixin(PolymerElement) {\n // A temporary workaround is to use \"this.queryParams\" as an input\n // instead of \"queryParamsChange.base\".\n // const queryParams = queryParamsChange.base;\n+ const queryParams = this.queryParams;\n+ if (queryParams) {\n+ if (queryParams[\"ns\"]) {\n+ if (queryParams[\"ns\"] !== null){\n+ return this.buildHref(", + "comment_created_at": "2021-05-31T14:46:31+00:00", + "comment_author": "kimwnasptd", + "comment_body": "LGTM!", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "255709670", + "pr_number": 2357, + "pr_file": "components/jupyter-web-app/jupyter/static/js/notebooks.js", + "created_at": "2019-02-11T21:55:07+00:00", + "commented_code": "$(document).ready(function(){\n // Get Notebooks for the ServiceAccount's Namespace\n var ns = new URL(window.location.href).searchParams.get(\"namespace\")\n if (ns) {\n // Load the selected ns\n $(\"#ns-select\").val(ns)\n }\n $(\"#ns-select\").trigger(\"change\");\n});\n\nfunction deleteNotebook(ns, nb) {\n $.getJSON(prefix + \"/delete-notebook\", { namespace:ns, notebook:nb}, function(data, status) {\n var innerHTML = $(\"#error-msgs\").html()\n if(data.success == true) {\n updateNotebooksInNamespace(ns)\n innerHTML = '';\n }\n else {\n innerHTML = '
';\n innerHTML += '×';\n innerHTML += 'Warning! ' + data.log + '
';\n }\n $(\"#error-msgs\").html(innerHTML);\n });\n};\n\nfunction connectNotebook(ns, nb) {\n window.open(\"/\" + ns + \"/\" + nb, \"_blank\");", + "repo_full_name": "kubeflow/kubeflow", + "discussion_comments": [ + { + "comment_id": "255709670", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 2357, + "pr_file": "components/jupyter-web-app/jupyter/static/js/notebooks.js", + "discussion_id": "255709670", + "commented_code": "@@ -0,0 +1,149 @@\n+$(document).ready(function(){\n+ // Get Notebooks for the ServiceAccount's Namespace\n+ var ns = new URL(window.location.href).searchParams.get(\"namespace\")\n+ if (ns) {\n+ // Load the selected ns\n+ $(\"#ns-select\").val(ns)\n+ }\n+ $(\"#ns-select\").trigger(\"change\");\n+});\n+\n+function deleteNotebook(ns, nb) {\n+ $.getJSON(prefix + \"/delete-notebook\", { namespace:ns, notebook:nb}, function(data, status) {\n+ var innerHTML = $(\"#error-msgs\").html()\n+ if(data.success == true) {\n+ updateNotebooksInNamespace(ns)\n+ innerHTML = '';\n+ }\n+ else {\n+ innerHTML = '
';\n+ innerHTML += '×';\n+ innerHTML += 'Warning! ' + data.log + '
';\n+ }\n+ $(\"#error-msgs\").html(innerHTML);\n+ });\n+};\n+\n+function connectNotebook(ns, nb) {\n+ window.open(\"/\" + ns + \"/\" + nb, \"_blank\");", + "comment_created_at": "2019-02-11T21:55:07+00:00", + "comment_author": "avdaredevil", + "comment_body": "_**[nitpick]** This could be written as:_\r\n\r\n```js\r\nwindow.open(`/${ns}/${nb}`, \"_blank\");\r\n```", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "463524341", + "pr_number": 5174, + "pr_file": "components/centraldashboard/public/components/main-page.js", + "created_at": "2020-07-31T10:08:24+00:00", + "commented_code": "import template from './main-page.pug';\nimport logo from '../assets/logo.svg';\nimport '../assets/anon-user.png';", + "repo_full_name": "kubeflow/kubeflow", + "discussion_comments": [ + { + "comment_id": "463524341", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 5174, + "pr_file": "components/centraldashboard/public/components/main-page.js", + "discussion_id": "463524341", + "commented_code": "@@ -30,6 +30,7 @@ import css from './main-page.css';\n import template from './main-page.pug';\n import logo from '../assets/logo.svg';\n import '../assets/anon-user.png';", + "comment_created_at": "2020-07-31T10:08:24+00:00", + "comment_author": "SachinVarghese", + "comment_body": "Remove the import of `anon-user.png` if not being used.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "370910626", + "pr_number": 4684, + "pr_file": "components/centraldashboard/public/components/iframe-container.js", + "created_at": "2020-01-25T03:19:53+00:00", + "commented_code": "// the captured iframe and set the page property which notifies\n const iframe = this.$.iframe;\n iframe.addEventListener('load', () => {\n iframe.contentDocument.addEventListener('click', () => {\n const syncIframePage = () => {\n const iframeLocation = iframe.contentWindow.location;\n const newIframePage = iframeLocation.href.slice(\n iframeLocation.origin.length);\n if (this.page !== newIframePage) {\n this.page = newIframePage;\n }\n });\n };\n iframe.contentDocument.addEventListener('click', syncIframePage);", + "repo_full_name": "kubeflow/kubeflow", + "discussion_comments": [ + { + "comment_id": "370910626", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 4684, + "pr_file": "components/centraldashboard/public/components/iframe-container.js", + "discussion_id": "370910626", + "commented_code": "@@ -51,14 +51,19 @@ export class IframeContainer extends PolymerElement {\n // the captured iframe and set the page property which notifies\n const iframe = this.$.iframe;\n iframe.addEventListener('load', () => {\n- iframe.contentDocument.addEventListener('click', () => {\n+ const syncIframePage = () => {\n const iframeLocation = iframe.contentWindow.location;\n const newIframePage = iframeLocation.href.slice(\n iframeLocation.origin.length);\n if (this.page !== newIframePage) {\n this.page = newIframePage;\n }\n- });\n+ };\n+ iframe.contentDocument.addEventListener('click', syncIframePage);", + "comment_created_at": "2020-01-25T03:19:53+00:00", + "comment_author": "avdaredevil", + "comment_body": "***nit:** small nit for succinctness*\r\n\r\n```suggestion\r\n const {contentDocument} = iframe\r\n contentDocument.addEventListener('click', syncIframePage);\r\n contentDocument.addEventListener('hashchange', syncIframePage);\r\n```", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "367935667", + "pr_number": 4659, + "pr_file": "components/centraldashboard/public/components/namespace-selector.js", + "created_at": "2020-01-17T13:31:41+00:00", + "commented_code": "return selected;\n }\n\n /**\n * Check if role is owner\n */\n validate() {\n const {namespaces} = this;\n if (!namespaces) return;\n const nsSet = new Set(namespaces.map((i) => i.namespace));\n if (nsSet.has(this.selected)) return;\n\n const owned = namespaces.find((n) => n.role == 'owner');\n // eslint-disable-next-line\n console.log({own: (owned||[]).namespace, namespaces, ns: this.selected, yeah: namespaces.includes('kubeflow')});", + "repo_full_name": "kubeflow/kubeflow", + "discussion_comments": [ + { + "comment_id": "367935667", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 4659, + "pr_file": "components/centraldashboard/public/components/namespace-selector.js", + "discussion_id": "367935667", + "commented_code": "@@ -153,6 +154,24 @@ export class NamespaceSelector extends PolymerElement {\n return selected;\n }\n \n+ /**\n+ * Check if role is owner\n+ */\n+ validate() {\n+ const {namespaces} = this;\n+ if (!namespaces) return;\n+ const nsSet = new Set(namespaces.map((i) => i.namespace));\n+ if (nsSet.has(this.selected)) return;\n+\n+ const owned = namespaces.find((n) => n.role == 'owner');\n+ // eslint-disable-next-line\n+ console.log({own: (owned||[]).namespace, namespaces, ns: this.selected, yeah: namespaces.includes('kubeflow')});", + "comment_created_at": "2020-01-17T13:31:41+00:00", + "comment_author": "prodonjs", + "comment_body": "I don't think there are any other `console.log` statements for non-error conditions so I'd suggest removing this line.", + "pr_file_module": null + }, + { + "comment_id": "368110888", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 4659, + "pr_file": "components/centraldashboard/public/components/namespace-selector.js", + "discussion_id": "367935667", + "commented_code": "@@ -153,6 +154,24 @@ export class NamespaceSelector extends PolymerElement {\n return selected;\n }\n \n+ /**\n+ * Check if role is owner\n+ */\n+ validate() {\n+ const {namespaces} = this;\n+ if (!namespaces) return;\n+ const nsSet = new Set(namespaces.map((i) => i.namespace));\n+ if (nsSet.has(this.selected)) return;\n+\n+ const owned = namespaces.find((n) => n.role == 'owner');\n+ // eslint-disable-next-line\n+ console.log({own: (owned||[]).namespace, namespaces, ns: this.selected, yeah: namespaces.includes('kubeflow')});", + "comment_created_at": "2020-01-17T19:56:20+00:00", + "comment_author": "avdaredevil", + "comment_body": "Good catch, I forgot to remove this line. Thanks!", + "pr_file_module": null + }, + { + "comment_id": "368111190", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 4659, + "pr_file": "components/centraldashboard/public/components/namespace-selector.js", + "discussion_id": "367935667", + "commented_code": "@@ -153,6 +154,24 @@ export class NamespaceSelector extends PolymerElement {\n return selected;\n }\n \n+ /**\n+ * Check if role is owner\n+ */\n+ validate() {\n+ const {namespaces} = this;\n+ if (!namespaces) return;\n+ const nsSet = new Set(namespaces.map((i) => i.namespace));\n+ if (nsSet.has(this.selected)) return;\n+\n+ const owned = namespaces.find((n) => n.role == 'owner');\n+ // eslint-disable-next-line\n+ console.log({own: (owned||[]).namespace, namespaces, ns: this.selected, yeah: namespaces.includes('kubeflow')});", + "comment_created_at": "2020-01-17T19:57:03+00:00", + "comment_author": "avdaredevil", + "comment_body": "Good catch, I forgot to remove this before submitting the PR. Thanks!", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/kubeflow-use-modern-javascript-idioms.md b/_reviewers/kubeflow-use-modern-javascript-idioms.md index bab57f8..606ec83 100644 --- a/_reviewers/kubeflow-use-modern-javascript-idioms.md +++ b/_reviewers/kubeflow-use-modern-javascript-idioms.md @@ -59,165 +59,3 @@ console.log({own: (owned||[]).namespace, namespaces, ns: this.selected}); ``` Maintaining these practices will lead to more consistent, readable, and maintainable code across the codebase. - - -[ - { - "discussion_id": "640449762", - "pr_number": 5871, - "pr_file": "components/centraldashboard/public/components/main-page.js", - "created_at": "2021-05-27T09:22:44+00:00", - "commented_code": "// A temporary workaround is to use \"this.queryParams\" as an input\n // instead of \"queryParamsChange.base\".\n // const queryParams = queryParamsChange.base;\n const queryParams = this.queryParams;\n if (queryParams) {\n if (queryParams[\"ns\"]) {\n if (queryParams[\"ns\"] !== null){\n return this.buildHref(", - "repo_full_name": "kubeflow/kubeflow", - "discussion_comments": [ - { - "comment_id": "640449762", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 5871, - "pr_file": "components/centraldashboard/public/components/main-page.js", - "discussion_id": "640449762", - "commented_code": "@@ -291,6 +291,18 @@ export class MainPage extends utilitiesMixin(PolymerElement) {\n // A temporary workaround is to use \"this.queryParams\" as an input\n // instead of \"queryParamsChange.base\".\n // const queryParams = queryParamsChange.base;\n+ const queryParams = this.queryParams;\n+ if (queryParams) {\n+ if (queryParams[\"ns\"]) {\n+ if (queryParams[\"ns\"] !== null){\n+ return this.buildHref(", - "comment_created_at": "2021-05-27T09:22:44+00:00", - "comment_author": "kimwnasptd", - "comment_body": "Could you restructure the if statements in order to reduce the nesting levels?\r\n\r\nSomething like:\r\n```javascript\r\nif (!queryParams || !queryParams[\"ns\"]) {\r\n return this.buildHref(href, this.queryParams);\r\n}\r\n\r\nreturn this.buildHref(href.replace('{ns}', queryParams[\"ns\"]), queryParams);\r\n```", - "pr_file_module": null - }, - { - "comment_id": "640607721", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 5871, - "pr_file": "components/centraldashboard/public/components/main-page.js", - "discussion_id": "640449762", - "commented_code": "@@ -291,6 +291,18 @@ export class MainPage extends utilitiesMixin(PolymerElement) {\n // A temporary workaround is to use \"this.queryParams\" as an input\n // instead of \"queryParamsChange.base\".\n // const queryParams = queryParamsChange.base;\n+ const queryParams = this.queryParams;\n+ if (queryParams) {\n+ if (queryParams[\"ns\"]) {\n+ if (queryParams[\"ns\"] !== null){\n+ return this.buildHref(", - "comment_created_at": "2021-05-27T13:09:59+00:00", - "comment_author": "toshi-k", - "comment_body": "@kimwnasptd Good idea ! I corrected this part, please look at my latest PR.\r\nI inserted a line break not to exceed 80 chars.", - "pr_file_module": null - }, - { - "comment_id": "642539612", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 5871, - "pr_file": "components/centraldashboard/public/components/main-page.js", - "discussion_id": "640449762", - "commented_code": "@@ -291,6 +291,18 @@ export class MainPage extends utilitiesMixin(PolymerElement) {\n // A temporary workaround is to use \"this.queryParams\" as an input\n // instead of \"queryParamsChange.base\".\n // const queryParams = queryParamsChange.base;\n+ const queryParams = this.queryParams;\n+ if (queryParams) {\n+ if (queryParams[\"ns\"]) {\n+ if (queryParams[\"ns\"] !== null){\n+ return this.buildHref(", - "comment_created_at": "2021-05-31T14:46:31+00:00", - "comment_author": "kimwnasptd", - "comment_body": "LGTM!", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "255709670", - "pr_number": 2357, - "pr_file": "components/jupyter-web-app/jupyter/static/js/notebooks.js", - "created_at": "2019-02-11T21:55:07+00:00", - "commented_code": "$(document).ready(function(){\n // Get Notebooks for the ServiceAccount's Namespace\n var ns = new URL(window.location.href).searchParams.get(\"namespace\")\n if (ns) {\n // Load the selected ns\n $(\"#ns-select\").val(ns)\n }\n $(\"#ns-select\").trigger(\"change\");\n});\n\nfunction deleteNotebook(ns, nb) {\n $.getJSON(prefix + \"/delete-notebook\", { namespace:ns, notebook:nb}, function(data, status) {\n var innerHTML = $(\"#error-msgs\").html()\n if(data.success == true) {\n updateNotebooksInNamespace(ns)\n innerHTML = '';\n }\n else {\n innerHTML = '
';\n innerHTML += '×';\n innerHTML += 'Warning! ' + data.log + '
';\n }\n $(\"#error-msgs\").html(innerHTML);\n });\n};\n\nfunction connectNotebook(ns, nb) {\n window.open(\"/\" + ns + \"/\" + nb, \"_blank\");", - "repo_full_name": "kubeflow/kubeflow", - "discussion_comments": [ - { - "comment_id": "255709670", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 2357, - "pr_file": "components/jupyter-web-app/jupyter/static/js/notebooks.js", - "discussion_id": "255709670", - "commented_code": "@@ -0,0 +1,149 @@\n+$(document).ready(function(){\n+ // Get Notebooks for the ServiceAccount's Namespace\n+ var ns = new URL(window.location.href).searchParams.get(\"namespace\")\n+ if (ns) {\n+ // Load the selected ns\n+ $(\"#ns-select\").val(ns)\n+ }\n+ $(\"#ns-select\").trigger(\"change\");\n+});\n+\n+function deleteNotebook(ns, nb) {\n+ $.getJSON(prefix + \"/delete-notebook\", { namespace:ns, notebook:nb}, function(data, status) {\n+ var innerHTML = $(\"#error-msgs\").html()\n+ if(data.success == true) {\n+ updateNotebooksInNamespace(ns)\n+ innerHTML = '';\n+ }\n+ else {\n+ innerHTML = '
';\n+ innerHTML += '×';\n+ innerHTML += 'Warning! ' + data.log + '
';\n+ }\n+ $(\"#error-msgs\").html(innerHTML);\n+ });\n+};\n+\n+function connectNotebook(ns, nb) {\n+ window.open(\"/\" + ns + \"/\" + nb, \"_blank\");", - "comment_created_at": "2019-02-11T21:55:07+00:00", - "comment_author": "avdaredevil", - "comment_body": "_**[nitpick]** This could be written as:_\r\n\r\n```js\r\nwindow.open(`/${ns}/${nb}`, \"_blank\");\r\n```", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "463524341", - "pr_number": 5174, - "pr_file": "components/centraldashboard/public/components/main-page.js", - "created_at": "2020-07-31T10:08:24+00:00", - "commented_code": "import template from './main-page.pug';\nimport logo from '../assets/logo.svg';\nimport '../assets/anon-user.png';", - "repo_full_name": "kubeflow/kubeflow", - "discussion_comments": [ - { - "comment_id": "463524341", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 5174, - "pr_file": "components/centraldashboard/public/components/main-page.js", - "discussion_id": "463524341", - "commented_code": "@@ -30,6 +30,7 @@ import css from './main-page.css';\n import template from './main-page.pug';\n import logo from '../assets/logo.svg';\n import '../assets/anon-user.png';", - "comment_created_at": "2020-07-31T10:08:24+00:00", - "comment_author": "SachinVarghese", - "comment_body": "Remove the import of `anon-user.png` if not being used.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "370910626", - "pr_number": 4684, - "pr_file": "components/centraldashboard/public/components/iframe-container.js", - "created_at": "2020-01-25T03:19:53+00:00", - "commented_code": "// the captured iframe and set the page property which notifies\n const iframe = this.$.iframe;\n iframe.addEventListener('load', () => {\n iframe.contentDocument.addEventListener('click', () => {\n const syncIframePage = () => {\n const iframeLocation = iframe.contentWindow.location;\n const newIframePage = iframeLocation.href.slice(\n iframeLocation.origin.length);\n if (this.page !== newIframePage) {\n this.page = newIframePage;\n }\n });\n };\n iframe.contentDocument.addEventListener('click', syncIframePage);", - "repo_full_name": "kubeflow/kubeflow", - "discussion_comments": [ - { - "comment_id": "370910626", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 4684, - "pr_file": "components/centraldashboard/public/components/iframe-container.js", - "discussion_id": "370910626", - "commented_code": "@@ -51,14 +51,19 @@ export class IframeContainer extends PolymerElement {\n // the captured iframe and set the page property which notifies\n const iframe = this.$.iframe;\n iframe.addEventListener('load', () => {\n- iframe.contentDocument.addEventListener('click', () => {\n+ const syncIframePage = () => {\n const iframeLocation = iframe.contentWindow.location;\n const newIframePage = iframeLocation.href.slice(\n iframeLocation.origin.length);\n if (this.page !== newIframePage) {\n this.page = newIframePage;\n }\n- });\n+ };\n+ iframe.contentDocument.addEventListener('click', syncIframePage);", - "comment_created_at": "2020-01-25T03:19:53+00:00", - "comment_author": "avdaredevil", - "comment_body": "***nit:** small nit for succinctness*\r\n\r\n```suggestion\r\n const {contentDocument} = iframe\r\n contentDocument.addEventListener('click', syncIframePage);\r\n contentDocument.addEventListener('hashchange', syncIframePage);\r\n```", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "367935667", - "pr_number": 4659, - "pr_file": "components/centraldashboard/public/components/namespace-selector.js", - "created_at": "2020-01-17T13:31:41+00:00", - "commented_code": "return selected;\n }\n\n /**\n * Check if role is owner\n */\n validate() {\n const {namespaces} = this;\n if (!namespaces) return;\n const nsSet = new Set(namespaces.map((i) => i.namespace));\n if (nsSet.has(this.selected)) return;\n\n const owned = namespaces.find((n) => n.role == 'owner');\n // eslint-disable-next-line\n console.log({own: (owned||[]).namespace, namespaces, ns: this.selected, yeah: namespaces.includes('kubeflow')});", - "repo_full_name": "kubeflow/kubeflow", - "discussion_comments": [ - { - "comment_id": "367935667", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 4659, - "pr_file": "components/centraldashboard/public/components/namespace-selector.js", - "discussion_id": "367935667", - "commented_code": "@@ -153,6 +154,24 @@ export class NamespaceSelector extends PolymerElement {\n return selected;\n }\n \n+ /**\n+ * Check if role is owner\n+ */\n+ validate() {\n+ const {namespaces} = this;\n+ if (!namespaces) return;\n+ const nsSet = new Set(namespaces.map((i) => i.namespace));\n+ if (nsSet.has(this.selected)) return;\n+\n+ const owned = namespaces.find((n) => n.role == 'owner');\n+ // eslint-disable-next-line\n+ console.log({own: (owned||[]).namespace, namespaces, ns: this.selected, yeah: namespaces.includes('kubeflow')});", - "comment_created_at": "2020-01-17T13:31:41+00:00", - "comment_author": "prodonjs", - "comment_body": "I don't think there are any other `console.log` statements for non-error conditions so I'd suggest removing this line.", - "pr_file_module": null - }, - { - "comment_id": "368110888", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 4659, - "pr_file": "components/centraldashboard/public/components/namespace-selector.js", - "discussion_id": "367935667", - "commented_code": "@@ -153,6 +154,24 @@ export class NamespaceSelector extends PolymerElement {\n return selected;\n }\n \n+ /**\n+ * Check if role is owner\n+ */\n+ validate() {\n+ const {namespaces} = this;\n+ if (!namespaces) return;\n+ const nsSet = new Set(namespaces.map((i) => i.namespace));\n+ if (nsSet.has(this.selected)) return;\n+\n+ const owned = namespaces.find((n) => n.role == 'owner');\n+ // eslint-disable-next-line\n+ console.log({own: (owned||[]).namespace, namespaces, ns: this.selected, yeah: namespaces.includes('kubeflow')});", - "comment_created_at": "2020-01-17T19:56:20+00:00", - "comment_author": "avdaredevil", - "comment_body": "Good catch, I forgot to remove this line. Thanks!", - "pr_file_module": null - }, - { - "comment_id": "368111190", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 4659, - "pr_file": "components/centraldashboard/public/components/namespace-selector.js", - "discussion_id": "367935667", - "commented_code": "@@ -153,6 +154,24 @@ export class NamespaceSelector extends PolymerElement {\n return selected;\n }\n \n+ /**\n+ * Check if role is owner\n+ */\n+ validate() {\n+ const {namespaces} = this;\n+ if (!namespaces) return;\n+ const nsSet = new Set(namespaces.map((i) => i.namespace));\n+ if (nsSet.has(this.selected)) return;\n+\n+ const owned = namespaces.find((n) => n.role == 'owner');\n+ // eslint-disable-next-line\n+ console.log({own: (owned||[]).namespace, namespaces, ns: this.selected, yeah: namespaces.includes('kubeflow')});", - "comment_created_at": "2020-01-17T19:57:03+00:00", - "comment_author": "avdaredevil", - "comment_body": "Good catch, I forgot to remove this before submitting the PR. Thanks!", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/kubeflow-use-snake-case-in-python.json b/_reviewers/kubeflow-use-snake-case-in-python.json new file mode 100644 index 0000000..189ce7f --- /dev/null +++ b/_reviewers/kubeflow-use-snake-case-in-python.json @@ -0,0 +1,80 @@ +[ + { + "discussion_id": "1145091149", + "pr_number": 6952, + "pr_file": "components/crud-web-apps/jupyter/backend/apps/common/status.py", + "created_at": "2023-03-22T16:18:12+00:00", + "commented_code": "def process_status(notebook):\n \"\"\"\n Return status and reason. Status may be [running|waiting|warning|error]\n Return status and reason. Status may be [running|waiting|warning]\n \"\"\"\n # Check if the Notebook is stopped\n readyReplicas = notebook.get(\"status\", {}).get(\"readyReplicas\", 0)\n metadata = notebook.get(\"metadata\", {})\n annotations = metadata.get(\"annotations\", {})\n\n if STOP_ANNOTATION in annotations:\n if readyReplicas == 0:\n return status.create_status(\n status.STATUS_PHASE.STOPPED,\n \"No Pods are currently running for this Notebook Server.\",\n )\n else:\n return status.create_status(\n status.STATUS_PHASE.TERMINATING, \"Notebook Server is stopping.\"\n )\n creationTimestamp = notebook[\"metadata\"][\"creationTimestamp\"]", + "repo_full_name": "kubeflow/kubeflow", + "discussion_comments": [ + { + "comment_id": "1145091149", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 6952, + "pr_file": "components/crud-web-apps/jupyter/backend/apps/common/status.py", + "discussion_id": "1145091149", + "commented_code": "@@ -8,54 +8,75 @@\n \n def process_status(notebook):\n \"\"\"\n- Return status and reason. Status may be [running|waiting|warning|error]\n+ Return status and reason. Status may be [running|waiting|warning]\n \"\"\"\n- # Check if the Notebook is stopped\n readyReplicas = notebook.get(\"status\", {}).get(\"readyReplicas\", 0)\n- metadata = notebook.get(\"metadata\", {})\n- annotations = metadata.get(\"annotations\", {})\n-\n- if STOP_ANNOTATION in annotations:\n- if readyReplicas == 0:\n- return status.create_status(\n- status.STATUS_PHASE.STOPPED,\n- \"No Pods are currently running for this Notebook Server.\",\n- )\n- else:\n- return status.create_status(\n- status.STATUS_PHASE.TERMINATING, \"Notebook Server is stopping.\"\n- )\n+ creationTimestamp = notebook[\"metadata\"][\"creationTimestamp\"]", + "comment_created_at": "2023-03-22T16:18:12+00:00", + "comment_author": "kimwnasptd", + "comment_body": "Let's rename this var to `creation_timestamp`. Python is using snake_case for var names \r\nhttps://peps.python.org/pep-0008/#descriptive-naming-styles", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "612350224", + "pr_number": 5815, + "pr_file": "components/crud-web-apps/jupyter/backend/apps/common/form.py", + "created_at": "2021-04-13T11:11:00+00:00", + "commented_code": "container = notebook[\"spec\"][\"template\"][\"spec\"][\"containers\"][0]\n\n cpu = get_form_value(body, defaults, \"cpu\")\n cpuLimit = get_form_value(body, defaults, \"cpuLimit\")", + "repo_full_name": "kubeflow/kubeflow", + "discussion_comments": [ + { + "comment_id": "612350224", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 5815, + "pr_file": "components/crud-web-apps/jupyter/backend/apps/common/form.py", + "discussion_id": "612350224", + "commented_code": "@@ -163,6 +163,22 @@ def set_notebook_cpu(notebook, body, defaults):\n container = notebook[\"spec\"][\"template\"][\"spec\"][\"containers\"][0]\n \n cpu = get_form_value(body, defaults, \"cpu\")\n+ cpuLimit = get_form_value(body, defaults, \"cpuLimit\")", + "comment_created_at": "2021-04-13T11:11:00+00:00", + "comment_author": "kimwnasptd", + "comment_body": "let's not use `camelCase` for Python variables. This should be `cpu_limit`", + "pr_file_module": null + }, + { + "comment_id": "612415438", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 5815, + "pr_file": "components/crud-web-apps/jupyter/backend/apps/common/form.py", + "discussion_id": "612350224", + "commented_code": "@@ -163,6 +163,22 @@ def set_notebook_cpu(notebook, body, defaults):\n container = notebook[\"spec\"][\"template\"][\"spec\"][\"containers\"][0]\n \n cpu = get_form_value(body, defaults, \"cpu\")\n+ cpuLimit = get_form_value(body, defaults, \"cpuLimit\")", + "comment_created_at": "2021-04-13T12:50:06+00:00", + "comment_author": "davidspek", + "comment_body": "My bad, this was not my intention.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "612385678", + "pr_number": 5815, + "pr_file": "components/crud-web-apps/jupyter/backend/apps/common/form.py", + "created_at": "2021-04-13T12:08:36+00:00", + "commented_code": "container = notebook[\"spec\"][\"template\"][\"spec\"][\"containers\"][0]\n\n memory = get_form_value(body, defaults, \"memory\")\n memoryLimit = get_form_value(body, defaults, \"memoryLimit\")", + "repo_full_name": "kubeflow/kubeflow", + "discussion_comments": [ + { + "comment_id": "612385678", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 5815, + "pr_file": "components/crud-web-apps/jupyter/backend/apps/common/form.py", + "discussion_id": "612385678", + "commented_code": "@@ -171,6 +187,27 @@ def set_notebook_memory(notebook, body, defaults):\n container = notebook[\"spec\"][\"template\"][\"spec\"][\"containers\"][0]\n \n memory = get_form_value(body, defaults, \"memory\")\n+ memoryLimit = get_form_value(body, defaults, \"memoryLimit\")", + "comment_created_at": "2021-04-13T12:08:36+00:00", + "comment_author": "kimwnasptd", + "comment_body": "same with `camelCase` as above. This should be `memory_limit`", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/kubeflow-use-snake-case-in-python.md b/_reviewers/kubeflow-use-snake-case-in-python.md index 47910d9..06f243e 100644 --- a/_reviewers/kubeflow-use-snake-case-in-python.md +++ b/_reviewers/kubeflow-use-snake-case-in-python.md @@ -33,85 +33,3 @@ def set_notebook_memory(notebook, body, defaults): ``` This applies to all variable names, function names, method names, and module names in Python code. Class names should use CapWords (PascalCase) convention. - - -[ - { - "discussion_id": "1145091149", - "pr_number": 6952, - "pr_file": "components/crud-web-apps/jupyter/backend/apps/common/status.py", - "created_at": "2023-03-22T16:18:12+00:00", - "commented_code": "def process_status(notebook):\n \"\"\"\n Return status and reason. Status may be [running|waiting|warning|error]\n Return status and reason. Status may be [running|waiting|warning]\n \"\"\"\n # Check if the Notebook is stopped\n readyReplicas = notebook.get(\"status\", {}).get(\"readyReplicas\", 0)\n metadata = notebook.get(\"metadata\", {})\n annotations = metadata.get(\"annotations\", {})\n\n if STOP_ANNOTATION in annotations:\n if readyReplicas == 0:\n return status.create_status(\n status.STATUS_PHASE.STOPPED,\n \"No Pods are currently running for this Notebook Server.\",\n )\n else:\n return status.create_status(\n status.STATUS_PHASE.TERMINATING, \"Notebook Server is stopping.\"\n )\n creationTimestamp = notebook[\"metadata\"][\"creationTimestamp\"]", - "repo_full_name": "kubeflow/kubeflow", - "discussion_comments": [ - { - "comment_id": "1145091149", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 6952, - "pr_file": "components/crud-web-apps/jupyter/backend/apps/common/status.py", - "discussion_id": "1145091149", - "commented_code": "@@ -8,54 +8,75 @@\n \n def process_status(notebook):\n \"\"\"\n- Return status and reason. Status may be [running|waiting|warning|error]\n+ Return status and reason. Status may be [running|waiting|warning]\n \"\"\"\n- # Check if the Notebook is stopped\n readyReplicas = notebook.get(\"status\", {}).get(\"readyReplicas\", 0)\n- metadata = notebook.get(\"metadata\", {})\n- annotations = metadata.get(\"annotations\", {})\n-\n- if STOP_ANNOTATION in annotations:\n- if readyReplicas == 0:\n- return status.create_status(\n- status.STATUS_PHASE.STOPPED,\n- \"No Pods are currently running for this Notebook Server.\",\n- )\n- else:\n- return status.create_status(\n- status.STATUS_PHASE.TERMINATING, \"Notebook Server is stopping.\"\n- )\n+ creationTimestamp = notebook[\"metadata\"][\"creationTimestamp\"]", - "comment_created_at": "2023-03-22T16:18:12+00:00", - "comment_author": "kimwnasptd", - "comment_body": "Let's rename this var to `creation_timestamp`. Python is using snake_case for var names \r\nhttps://peps.python.org/pep-0008/#descriptive-naming-styles", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "612350224", - "pr_number": 5815, - "pr_file": "components/crud-web-apps/jupyter/backend/apps/common/form.py", - "created_at": "2021-04-13T11:11:00+00:00", - "commented_code": "container = notebook[\"spec\"][\"template\"][\"spec\"][\"containers\"][0]\n\n cpu = get_form_value(body, defaults, \"cpu\")\n cpuLimit = get_form_value(body, defaults, \"cpuLimit\")", - "repo_full_name": "kubeflow/kubeflow", - "discussion_comments": [ - { - "comment_id": "612350224", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 5815, - "pr_file": "components/crud-web-apps/jupyter/backend/apps/common/form.py", - "discussion_id": "612350224", - "commented_code": "@@ -163,6 +163,22 @@ def set_notebook_cpu(notebook, body, defaults):\n container = notebook[\"spec\"][\"template\"][\"spec\"][\"containers\"][0]\n \n cpu = get_form_value(body, defaults, \"cpu\")\n+ cpuLimit = get_form_value(body, defaults, \"cpuLimit\")", - "comment_created_at": "2021-04-13T11:11:00+00:00", - "comment_author": "kimwnasptd", - "comment_body": "let's not use `camelCase` for Python variables. This should be `cpu_limit`", - "pr_file_module": null - }, - { - "comment_id": "612415438", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 5815, - "pr_file": "components/crud-web-apps/jupyter/backend/apps/common/form.py", - "discussion_id": "612350224", - "commented_code": "@@ -163,6 +163,22 @@ def set_notebook_cpu(notebook, body, defaults):\n container = notebook[\"spec\"][\"template\"][\"spec\"][\"containers\"][0]\n \n cpu = get_form_value(body, defaults, \"cpu\")\n+ cpuLimit = get_form_value(body, defaults, \"cpuLimit\")", - "comment_created_at": "2021-04-13T12:50:06+00:00", - "comment_author": "davidspek", - "comment_body": "My bad, this was not my intention.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "612385678", - "pr_number": 5815, - "pr_file": "components/crud-web-apps/jupyter/backend/apps/common/form.py", - "created_at": "2021-04-13T12:08:36+00:00", - "commented_code": "container = notebook[\"spec\"][\"template\"][\"spec\"][\"containers\"][0]\n\n memory = get_form_value(body, defaults, \"memory\")\n memoryLimit = get_form_value(body, defaults, \"memoryLimit\")", - "repo_full_name": "kubeflow/kubeflow", - "discussion_comments": [ - { - "comment_id": "612385678", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 5815, - "pr_file": "components/crud-web-apps/jupyter/backend/apps/common/form.py", - "discussion_id": "612385678", - "commented_code": "@@ -171,6 +187,27 @@ def set_notebook_memory(notebook, body, defaults):\n container = notebook[\"spec\"][\"template\"][\"spec\"][\"containers\"][0]\n \n memory = get_form_value(body, defaults, \"memory\")\n+ memoryLimit = get_form_value(body, defaults, \"memoryLimit\")", - "comment_created_at": "2021-04-13T12:08:36+00:00", - "comment_author": "kimwnasptd", - "comment_body": "same with `camelCase` as above. This should be `memory_limit`", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/kubeflow-use-table-driven-tests.json b/_reviewers/kubeflow-use-table-driven-tests.json new file mode 100644 index 0000000..e97839f --- /dev/null +++ b/_reviewers/kubeflow-use-table-driven-tests.json @@ -0,0 +1,140 @@ +[ + { + "discussion_id": "618190706", + "pr_number": 5761, + "pr_file": "components/profile-controller/controllers/profile_controller_test.go", + "created_at": "2021-04-22T08:26:44+00:00", + "commented_code": "},\n\t}\n\tfor _, test := range tests {\n\t\tupdateNamespaceLabels(test[\"current\"])\n\t\tdefaultKubeflowNamespaceLabels = map[string]string{\n\t\t\t\"katib-metricscollector-injection\": \"enabled\",\n\t\t\t\"serving.kubeflow.org/inferenceservice\": \"enabled\",\n\t\t\t\"pipelines.kubeflow.org/enabled\": \"true\",\n\t\t\t\"app.kubernetes.io/part-of\": \"kubeflow-profile\",\n\t\t}\n\t\tsetNamespaceLabelsFromConfig(test[\"current\"])\n\t\tif !reflect.DeepEqual(test[\"expected\"], test[\"current\"]) {\n\t\t\tt.Errorf(\"Expect:\\n%v; Output:\\n%v\", test[\"current\"], test[\"expected\"])\n\t\t\tt.Errorf(\"Expect:\\n%v; Output:\\n%v\", test[\"expected\"], test[\"current\"])\n\t\t}\n\t}\n}\n\nfunc TestUpdateNamespaceLabels_withLabelRemoval(t *testing.T) {", + "repo_full_name": "kubeflow/kubeflow", + "discussion_comments": [ + { + "comment_id": "618190706", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 5761, + "pr_file": "components/profile-controller/controllers/profile_controller_test.go", + "discussion_id": "618190706", + "commented_code": "@@ -54,9 +54,51 @@ func TestUpdateNamespaceLabels(t *testing.T) {\n \t\t},\n \t}\n \tfor _, test := range tests {\n-\t\tupdateNamespaceLabels(test[\"current\"])\n+\t\tdefaultKubeflowNamespaceLabels = map[string]string{\n+\t\t\t\"katib-metricscollector-injection\": \"enabled\",\n+\t\t\t\"serving.kubeflow.org/inferenceservice\": \"enabled\",\n+\t\t\t\"pipelines.kubeflow.org/enabled\": \"true\",\n+\t\t\t\"app.kubernetes.io/part-of\": \"kubeflow-profile\",\n+\t\t}\n+\t\tsetNamespaceLabelsFromConfig(test[\"current\"])\n \t\tif !reflect.DeepEqual(test[\"expected\"], test[\"current\"]) {\n-\t\t\tt.Errorf(\"Expect:\\n%v; Output:\\n%v\", test[\"current\"], test[\"expected\"])\n+\t\t\tt.Errorf(\"Expect:\\n%v; Output:\\n%v\", test[\"expected\"], test[\"current\"])\n \t\t}\n \t}\n }\n+\n+func TestUpdateNamespaceLabels_withLabelRemoval(t *testing.T) {", + "comment_created_at": "2021-04-22T08:26:44+00:00", + "comment_author": "Bobgy", + "comment_body": "nit: suggest merging this with TestEnforceNamespaceLabelsFromConfig, and allow a test case to specify config yaml.\r\nIn go, we typically prefer table tests than separate test funcs.", + "pr_file_module": null + }, + { + "comment_id": "618735327", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 5761, + "pr_file": "components/profile-controller/controllers/profile_controller_test.go", + "discussion_id": "618190706", + "commented_code": "@@ -54,9 +54,51 @@ func TestUpdateNamespaceLabels(t *testing.T) {\n \t\t},\n \t}\n \tfor _, test := range tests {\n-\t\tupdateNamespaceLabels(test[\"current\"])\n+\t\tdefaultKubeflowNamespaceLabels = map[string]string{\n+\t\t\t\"katib-metricscollector-injection\": \"enabled\",\n+\t\t\t\"serving.kubeflow.org/inferenceservice\": \"enabled\",\n+\t\t\t\"pipelines.kubeflow.org/enabled\": \"true\",\n+\t\t\t\"app.kubernetes.io/part-of\": \"kubeflow-profile\",\n+\t\t}\n+\t\tsetNamespaceLabelsFromConfig(test[\"current\"])\n \t\tif !reflect.DeepEqual(test[\"expected\"], test[\"current\"]) {\n-\t\t\tt.Errorf(\"Expect:\\n%v; Output:\\n%v\", test[\"current\"], test[\"expected\"])\n+\t\t\tt.Errorf(\"Expect:\\n%v; Output:\\n%v\", test[\"expected\"], test[\"current\"])\n \t\t}\n \t}\n }\n+\n+func TestUpdateNamespaceLabels_withLabelRemoval(t *testing.T) {", + "comment_created_at": "2021-04-22T21:00:22+00:00", + "comment_author": "zijianjoy", + "comment_body": "Updated to use table tests. ", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "956005218", + "pr_number": 6628, + "pr_file": "components/notebook-controller/controllers/notebook_controller.go", + "created_at": "2022-08-26T12:45:06+00:00", + "commented_code": "return ctrl.Result{RequeueAfter: culler.GetRequeueTime()}, nil\n}\n\nfunc updateNotebookStatus(r *NotebookReconciler, nb *v1beta1.Notebook,", + "repo_full_name": "kubeflow/kubeflow", + "discussion_comments": [ + { + "comment_id": "956005218", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 6628, + "pr_file": "components/notebook-controller/controllers/notebook_controller.go", + "discussion_id": "956005218", + "commented_code": "@@ -324,6 +269,102 @@ func (r *NotebookReconciler) Reconcile(ctx context.Context, req ctrl.Request) (c\n \treturn ctrl.Result{RequeueAfter: culler.GetRequeueTime()}, nil\n }\n \n+func updateNotebookStatus(r *NotebookReconciler, nb *v1beta1.Notebook,", + "comment_created_at": "2022-08-26T12:45:06+00:00", + "comment_author": "kimwnasptd", + "comment_body": "Let's change the name of this function to `createNotebookStatus` and update it's signature to:\r\n```golang\r\nfunc createNotebookStatus(r *NotebookReconciler, nb *v1beta1.Notebook,\r\n\tsts *appsv1.StatefulSet, pod *corev1.Pod, req ctrl.Request) (v1beta1.NotebookStatus, error) {\r\n```\r\n\r\nThis means that:\r\n1. We will be passing all necessary objects, statefulset, pod, as arguments\r\n2. The function will be just calculating the status\r\n\r\nThis will also allow us to write some unit tests to ensure we calculate the status as expected.", + "pr_file_module": null + }, + { + "comment_id": "957338776", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 6628, + "pr_file": "components/notebook-controller/controllers/notebook_controller.go", + "discussion_id": "956005218", + "commented_code": "@@ -324,6 +269,102 @@ func (r *NotebookReconciler) Reconcile(ctx context.Context, req ctrl.Request) (c\n \treturn ctrl.Result{RequeueAfter: culler.GetRequeueTime()}, nil\n }\n \n+func updateNotebookStatus(r *NotebookReconciler, nb *v1beta1.Notebook,", + "comment_created_at": "2022-08-29T13:29:31+00:00", + "comment_author": "apo-ger", + "comment_body": "Done! I have also added some basic unit tests. Let me know if everything is ok ", + "pr_file_module": null + }, + { + "comment_id": "957500711", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 6628, + "pr_file": "components/notebook-controller/controllers/notebook_controller.go", + "discussion_id": "956005218", + "commented_code": "@@ -324,6 +269,102 @@ func (r *NotebookReconciler) Reconcile(ctx context.Context, req ctrl.Request) (c\n \treturn ctrl.Result{RequeueAfter: culler.GetRequeueTime()}, nil\n }\n \n+func updateNotebookStatus(r *NotebookReconciler, nb *v1beta1.Notebook,", + "comment_created_at": "2022-08-29T15:41:13+00:00", + "comment_author": "kimwnasptd", + "comment_body": "Nice! Let's also include another unit test for the case where the Notebook's Pod is unschedulable", + "pr_file_module": null + }, + { + "comment_id": "958375886", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 6628, + "pr_file": "components/notebook-controller/controllers/notebook_controller.go", + "discussion_id": "956005218", + "commented_code": "@@ -324,6 +269,102 @@ func (r *NotebookReconciler) Reconcile(ctx context.Context, req ctrl.Request) (c\n \treturn ctrl.Result{RequeueAfter: culler.GetRequeueTime()}, nil\n }\n \n+func updateNotebookStatus(r *NotebookReconciler, nb *v1beta1.Notebook,", + "comment_created_at": "2022-08-30T11:47:27+00:00", + "comment_author": "apo-ger", + "comment_body": "Done! ", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "518703865", + "pr_number": 5378, + "pr_file": "components/notebook-controller/controllers/functional_test.go", + "created_at": "2020-11-06T11:53:15+00:00", + "commented_code": "/*\n\nLicensed under the Apache License, Version 2.0 (the \"License\");\nyou may not use this file except in compliance with the License.\nYou may obtain a copy of the License at\n\n http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n*/\n\npackage controllers\n\nimport (\n\t\"context\"\n\t\"time\"\n\n\t. \"github.com/onsi/ginkgo\"\n\t. \"github.com/onsi/gomega\"\n\tappsv1 \"k8s.io/api/apps/v1\"\n\tv1 \"k8s.io/api/core/v1\"\n\tmetav1 \"k8s.io/apimachinery/pkg/apis/meta/v1\"\n\t\"k8s.io/apimachinery/pkg/types\"\n\n\tbeta1 \"github.com/kubeflow/kubeflow/components/notebook-controller/api/v1beta1\"\n)\n\nvar _ = Describe(\"Notebook controller\", func() {\n\n\t// Define utility constants for object names and testing timeouts/durations and intervals.\n\tconst (\n\t\tName = \"test-notebook\"\n\t\tNamespace = \"default\"\n\t\ttimeout = time.Second * 10\n\t\tinterval = time.Millisecond * 250\n\t)\n\n\tContext(\"When validating the notebook controller\", func() {\n\t\tIt(\"Should create replicas\", func() {\n\t\t\tBy(\"By creating a new Notebook\")\n\t\t\tctx := context.Background()\n\t\t\tnotebook := &beta1.Notebook{\n\t\t\t\tObjectMeta: metav1.ObjectMeta{\n\t\t\t\t\tName: Name,\n\t\t\t\t\tNamespace: Namespace,\n\t\t\t\t},\n\t\t\t\tSpec: beta1.NotebookSpec{\n\t\t\t\t\tTemplate: beta1.NotebookTemplateSpec{\n\t\t\t\t\t\tSpec: v1.PodSpec{Containers: []v1.Container{{\n\t\t\t\t\t\t\tName: \"busybox\",\n\t\t\t\t\t\t\tImage: \"busybox\",\n\t\t\t\t\t\t}}}},\n\t\t\t\t}}\n\t\t\tExpect(k8sClient.Create(ctx, notebook)).Should(Succeed())\n\n\t\t\tnotebookLookupKey := types.NamespacedName{Name: Name, Namespace: Namespace}\n\t\t\tcreatedNotebook := &beta1.Notebook{}\n\n\t\t\tEventually(func() bool {\n\t\t\t\terr := k8sClient.Get(ctx, notebookLookupKey, createdNotebook)\n\t\t\t\tif err != nil {\n\t\t\t\t\treturn false\n\t\t\t\t}\n\t\t\t\treturn true\n\t\t\t}, timeout, interval).Should(BeTrue())\n\t\t\t/*\n\t\t\t\tChecking for the underlying statefulset\n\t\t\t*/\n\t\t\tBy(\"By checking that the Notebook has statefulset\")\n\t\t\tEventually(func() (bool, error) {\n\t\t\t\tsts := &appsv1.StatefulSet{ObjectMeta: metav1.ObjectMeta{\n\t\t\t\t\tName: Name,\n\t\t\t\t\tNamespace: Namespace,\n\t\t\t\t}}\n\t\t\t\terr := k8sClient.Get(ctx, notebookLookupKey, sts)\n\t\t\t\tif err != nil {\n\t\t\t\t\treturn false, err\n\t\t\t\t}\n\t\t\t\treturn true, nil\n\t\t\t}, timeout, interval).Should(BeTrue())\n\t\t\t/*\n\t\t\t\tChecking for replica count\n\t\t\t*/\n\t\t\tBy(\"By checking that the Notebook is running with at least 1 replica\")\n\t\t\tEventually(func() bool {\n\t\t\t\treturn notebook.Status.ReadyReplicas > 0\n\t\t\t}, timeout, interval).ShouldNot(Equal(0))", + "repo_full_name": "kubeflow/kubeflow", + "discussion_comments": [ + { + "comment_id": "518703865", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 5378, + "pr_file": "components/notebook-controller/controllers/functional_test.go", + "discussion_id": "518703865", + "commented_code": "@@ -0,0 +1,94 @@\n+/*\n+\n+Licensed under the Apache License, Version 2.0 (the \"License\");\n+you may not use this file except in compliance with the License.\n+You may obtain a copy of the License at\n+\n+ http://www.apache.org/licenses/LICENSE-2.0\n+\n+Unless required by applicable law or agreed to in writing, software\n+distributed under the License is distributed on an \"AS IS\" BASIS,\n+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n+See the License for the specific language governing permissions and\n+limitations under the License.\n+*/\n+\n+package controllers\n+\n+import (\n+\t\"context\"\n+\t\"time\"\n+\n+\t. \"github.com/onsi/ginkgo\"\n+\t. \"github.com/onsi/gomega\"\n+\tappsv1 \"k8s.io/api/apps/v1\"\n+\tv1 \"k8s.io/api/core/v1\"\n+\tmetav1 \"k8s.io/apimachinery/pkg/apis/meta/v1\"\n+\t\"k8s.io/apimachinery/pkg/types\"\n+\n+\tbeta1 \"github.com/kubeflow/kubeflow/components/notebook-controller/api/v1beta1\"\n+)\n+\n+var _ = Describe(\"Notebook controller\", func() {\n+\n+\t// Define utility constants for object names and testing timeouts/durations and intervals.\n+\tconst (\n+\t\tName = \"test-notebook\"\n+\t\tNamespace = \"default\"\n+\t\ttimeout = time.Second * 10\n+\t\tinterval = time.Millisecond * 250\n+\t)\n+\n+\tContext(\"When validating the notebook controller\", func() {\n+\t\tIt(\"Should create replicas\", func() {\n+\t\t\tBy(\"By creating a new Notebook\")\n+\t\t\tctx := context.Background()\n+\t\t\tnotebook := &beta1.Notebook{\n+\t\t\t\tObjectMeta: metav1.ObjectMeta{\n+\t\t\t\t\tName: Name,\n+\t\t\t\t\tNamespace: Namespace,\n+\t\t\t\t},\n+\t\t\t\tSpec: beta1.NotebookSpec{\n+\t\t\t\t\tTemplate: beta1.NotebookTemplateSpec{\n+\t\t\t\t\t\tSpec: v1.PodSpec{Containers: []v1.Container{{\n+\t\t\t\t\t\t\tName: \"busybox\",\n+\t\t\t\t\t\t\tImage: \"busybox\",\n+\t\t\t\t\t\t}}}},\n+\t\t\t\t}}\n+\t\t\tExpect(k8sClient.Create(ctx, notebook)).Should(Succeed())\n+\n+\t\t\tnotebookLookupKey := types.NamespacedName{Name: Name, Namespace: Namespace}\n+\t\t\tcreatedNotebook := &beta1.Notebook{}\n+\n+\t\t\tEventually(func() bool {\n+\t\t\t\terr := k8sClient.Get(ctx, notebookLookupKey, createdNotebook)\n+\t\t\t\tif err != nil {\n+\t\t\t\t\treturn false\n+\t\t\t\t}\n+\t\t\t\treturn true\n+\t\t\t}, timeout, interval).Should(BeTrue())\n+\t\t\t/*\n+\t\t\t\tChecking for the underlying statefulset\n+\t\t\t*/\n+\t\t\tBy(\"By checking that the Notebook has statefulset\")\n+\t\t\tEventually(func() (bool, error) {\n+\t\t\t\tsts := &appsv1.StatefulSet{ObjectMeta: metav1.ObjectMeta{\n+\t\t\t\t\tName: Name,\n+\t\t\t\t\tNamespace: Namespace,\n+\t\t\t\t}}\n+\t\t\t\terr := k8sClient.Get(ctx, notebookLookupKey, sts)\n+\t\t\t\tif err != nil {\n+\t\t\t\t\treturn false, err\n+\t\t\t\t}\n+\t\t\t\treturn true, nil\n+\t\t\t}, timeout, interval).Should(BeTrue())\n+\t\t\t/*\n+\t\t\t\tChecking for replica count\n+\t\t\t*/\n+\t\t\tBy(\"By checking that the Notebook is running with at least 1 replica\")\n+\t\t\tEventually(func() bool {\n+\t\t\t\treturn notebook.Status.ReadyReplicas > 0\n+\t\t\t}, timeout, interval).ShouldNot(Equal(0))", + "comment_created_at": "2020-11-06T11:53:15+00:00", + "comment_author": "yanniszark", + "comment_body": "Is this correct? You return a bool but then check if it equal to zero (False).\r\nSo in this case, you actually check if `notebook.Status.ReadyReplicas <= 0`.\r\nI attempted to change this to:\r\n\r\n```go\r\n\t\t\tBy(\"By checking that the Notebook is running with at least 1 replica\")\r\n\t\t\tEventually(func() int32 {\r\n\t\t\t\t// Shouldn't we get the statefulset again?\r\n\t\t\t\treturn notebook.Status.ReadyReplicas\r\n\t\t\t}, timeout, interval).ShouldNot(BeZero())\r\n```\r\nbut the test failed.\r\n\r\nWhich leads me to ask, in controller-runtime's test environment, is the statefulset controller running?\r\nIf it is running, what happens to the Pods it creates? I assume that they can't be scheduled, as there is no Node for them to be scheduled to. Can you dig a bit deeper on what happens with Pods in controller-runtime's test env and if they have any suggestions?\r\n\r\nFinally, you wrapped the replica check in an `Eventually` block, but since the block doesn't get the statefulset again, the retries are meaningless.", + "pr_file_module": null + }, + { + "comment_id": "518888927", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 5378, + "pr_file": "components/notebook-controller/controllers/functional_test.go", + "discussion_id": "518703865", + "commented_code": "@@ -0,0 +1,94 @@\n+/*\n+\n+Licensed under the Apache License, Version 2.0 (the \"License\");\n+you may not use this file except in compliance with the License.\n+You may obtain a copy of the License at\n+\n+ http://www.apache.org/licenses/LICENSE-2.0\n+\n+Unless required by applicable law or agreed to in writing, software\n+distributed under the License is distributed on an \"AS IS\" BASIS,\n+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n+See the License for the specific language governing permissions and\n+limitations under the License.\n+*/\n+\n+package controllers\n+\n+import (\n+\t\"context\"\n+\t\"time\"\n+\n+\t. \"github.com/onsi/ginkgo\"\n+\t. \"github.com/onsi/gomega\"\n+\tappsv1 \"k8s.io/api/apps/v1\"\n+\tv1 \"k8s.io/api/core/v1\"\n+\tmetav1 \"k8s.io/apimachinery/pkg/apis/meta/v1\"\n+\t\"k8s.io/apimachinery/pkg/types\"\n+\n+\tbeta1 \"github.com/kubeflow/kubeflow/components/notebook-controller/api/v1beta1\"\n+)\n+\n+var _ = Describe(\"Notebook controller\", func() {\n+\n+\t// Define utility constants for object names and testing timeouts/durations and intervals.\n+\tconst (\n+\t\tName = \"test-notebook\"\n+\t\tNamespace = \"default\"\n+\t\ttimeout = time.Second * 10\n+\t\tinterval = time.Millisecond * 250\n+\t)\n+\n+\tContext(\"When validating the notebook controller\", func() {\n+\t\tIt(\"Should create replicas\", func() {\n+\t\t\tBy(\"By creating a new Notebook\")\n+\t\t\tctx := context.Background()\n+\t\t\tnotebook := &beta1.Notebook{\n+\t\t\t\tObjectMeta: metav1.ObjectMeta{\n+\t\t\t\t\tName: Name,\n+\t\t\t\t\tNamespace: Namespace,\n+\t\t\t\t},\n+\t\t\t\tSpec: beta1.NotebookSpec{\n+\t\t\t\t\tTemplate: beta1.NotebookTemplateSpec{\n+\t\t\t\t\t\tSpec: v1.PodSpec{Containers: []v1.Container{{\n+\t\t\t\t\t\t\tName: \"busybox\",\n+\t\t\t\t\t\t\tImage: \"busybox\",\n+\t\t\t\t\t\t}}}},\n+\t\t\t\t}}\n+\t\t\tExpect(k8sClient.Create(ctx, notebook)).Should(Succeed())\n+\n+\t\t\tnotebookLookupKey := types.NamespacedName{Name: Name, Namespace: Namespace}\n+\t\t\tcreatedNotebook := &beta1.Notebook{}\n+\n+\t\t\tEventually(func() bool {\n+\t\t\t\terr := k8sClient.Get(ctx, notebookLookupKey, createdNotebook)\n+\t\t\t\tif err != nil {\n+\t\t\t\t\treturn false\n+\t\t\t\t}\n+\t\t\t\treturn true\n+\t\t\t}, timeout, interval).Should(BeTrue())\n+\t\t\t/*\n+\t\t\t\tChecking for the underlying statefulset\n+\t\t\t*/\n+\t\t\tBy(\"By checking that the Notebook has statefulset\")\n+\t\t\tEventually(func() (bool, error) {\n+\t\t\t\tsts := &appsv1.StatefulSet{ObjectMeta: metav1.ObjectMeta{\n+\t\t\t\t\tName: Name,\n+\t\t\t\t\tNamespace: Namespace,\n+\t\t\t\t}}\n+\t\t\t\terr := k8sClient.Get(ctx, notebookLookupKey, sts)\n+\t\t\t\tif err != nil {\n+\t\t\t\t\treturn false, err\n+\t\t\t\t}\n+\t\t\t\treturn true, nil\n+\t\t\t}, timeout, interval).Should(BeTrue())\n+\t\t\t/*\n+\t\t\t\tChecking for replica count\n+\t\t\t*/\n+\t\t\tBy(\"By checking that the Notebook is running with at least 1 replica\")\n+\t\t\tEventually(func() bool {\n+\t\t\t\treturn notebook.Status.ReadyReplicas > 0\n+\t\t\t}, timeout, interval).ShouldNot(Equal(0))", + "comment_created_at": "2020-11-06T17:12:22+00:00", + "comment_author": "naveensrinivasan", + "comment_body": "I agree on Pod it creates can't be scheduled without a Node. I will check on that.\r\n\r\nAlso 🤦 on the replica check in the `Eventually` block without getting the state refreshed.\r\nIt should be more like this.\r\n\r\n``` go\r\nBy(\"By checking that the Notebook is running with at least 1 replica\")\r\n\t\t\tEventually(func() int32 {\r\n\t\t\t\terr := k8sClient.Get(ctx, notebookLookupKey, createdNotebook)\r\n\t\t\t\tif err != nil {\r\n\t\t\t\t\treturn 0\r\n\t\t\t\t}\r\n\t\t\t\treturn createdNotebook.Status.ReadyReplicas\r\n\t\t\t}, timeout, interval).ShouldNot(BeZero())\r\n```\r\n\r\nIf the controller runtime does not have an option to create `pod` then probably this test would have to be changed. \r\nBut I am not confident about the controller runtime not having that. Let me check and keep you posted.\r\n\r\n", + "pr_file_module": null + }, + { + "comment_id": "518945153", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 5378, + "pr_file": "components/notebook-controller/controllers/functional_test.go", + "discussion_id": "518703865", + "commented_code": "@@ -0,0 +1,94 @@\n+/*\n+\n+Licensed under the Apache License, Version 2.0 (the \"License\");\n+you may not use this file except in compliance with the License.\n+You may obtain a copy of the License at\n+\n+ http://www.apache.org/licenses/LICENSE-2.0\n+\n+Unless required by applicable law or agreed to in writing, software\n+distributed under the License is distributed on an \"AS IS\" BASIS,\n+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n+See the License for the specific language governing permissions and\n+limitations under the License.\n+*/\n+\n+package controllers\n+\n+import (\n+\t\"context\"\n+\t\"time\"\n+\n+\t. \"github.com/onsi/ginkgo\"\n+\t. \"github.com/onsi/gomega\"\n+\tappsv1 \"k8s.io/api/apps/v1\"\n+\tv1 \"k8s.io/api/core/v1\"\n+\tmetav1 \"k8s.io/apimachinery/pkg/apis/meta/v1\"\n+\t\"k8s.io/apimachinery/pkg/types\"\n+\n+\tbeta1 \"github.com/kubeflow/kubeflow/components/notebook-controller/api/v1beta1\"\n+)\n+\n+var _ = Describe(\"Notebook controller\", func() {\n+\n+\t// Define utility constants for object names and testing timeouts/durations and intervals.\n+\tconst (\n+\t\tName = \"test-notebook\"\n+\t\tNamespace = \"default\"\n+\t\ttimeout = time.Second * 10\n+\t\tinterval = time.Millisecond * 250\n+\t)\n+\n+\tContext(\"When validating the notebook controller\", func() {\n+\t\tIt(\"Should create replicas\", func() {\n+\t\t\tBy(\"By creating a new Notebook\")\n+\t\t\tctx := context.Background()\n+\t\t\tnotebook := &beta1.Notebook{\n+\t\t\t\tObjectMeta: metav1.ObjectMeta{\n+\t\t\t\t\tName: Name,\n+\t\t\t\t\tNamespace: Namespace,\n+\t\t\t\t},\n+\t\t\t\tSpec: beta1.NotebookSpec{\n+\t\t\t\t\tTemplate: beta1.NotebookTemplateSpec{\n+\t\t\t\t\t\tSpec: v1.PodSpec{Containers: []v1.Container{{\n+\t\t\t\t\t\t\tName: \"busybox\",\n+\t\t\t\t\t\t\tImage: \"busybox\",\n+\t\t\t\t\t\t}}}},\n+\t\t\t\t}}\n+\t\t\tExpect(k8sClient.Create(ctx, notebook)).Should(Succeed())\n+\n+\t\t\tnotebookLookupKey := types.NamespacedName{Name: Name, Namespace: Namespace}\n+\t\t\tcreatedNotebook := &beta1.Notebook{}\n+\n+\t\t\tEventually(func() bool {\n+\t\t\t\terr := k8sClient.Get(ctx, notebookLookupKey, createdNotebook)\n+\t\t\t\tif err != nil {\n+\t\t\t\t\treturn false\n+\t\t\t\t}\n+\t\t\t\treturn true\n+\t\t\t}, timeout, interval).Should(BeTrue())\n+\t\t\t/*\n+\t\t\t\tChecking for the underlying statefulset\n+\t\t\t*/\n+\t\t\tBy(\"By checking that the Notebook has statefulset\")\n+\t\t\tEventually(func() (bool, error) {\n+\t\t\t\tsts := &appsv1.StatefulSet{ObjectMeta: metav1.ObjectMeta{\n+\t\t\t\t\tName: Name,\n+\t\t\t\t\tNamespace: Namespace,\n+\t\t\t\t}}\n+\t\t\t\terr := k8sClient.Get(ctx, notebookLookupKey, sts)\n+\t\t\t\tif err != nil {\n+\t\t\t\t\treturn false, err\n+\t\t\t\t}\n+\t\t\t\treturn true, nil\n+\t\t\t}, timeout, interval).Should(BeTrue())\n+\t\t\t/*\n+\t\t\t\tChecking for replica count\n+\t\t\t*/\n+\t\t\tBy(\"By checking that the Notebook is running with at least 1 replica\")\n+\t\t\tEventually(func() bool {\n+\t\t\t\treturn notebook.Status.ReadyReplicas > 0\n+\t\t\t}, timeout, interval).ShouldNot(Equal(0))", + "comment_created_at": "2020-11-06T18:59:10+00:00", + "comment_author": "naveensrinivasan", + "comment_body": "@yanniszark I checked on the `envtest` and confirmed that the `statefulset` controller isn't running. Only the `api server` itself is run by default. So we cannot check the `pod`. So I am refactoring my tests to check for `statefulset`\r\n", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/kubeflow-use-table-driven-tests.md b/_reviewers/kubeflow-use-table-driven-tests.md index 1ba4008..aaa80be 100644 --- a/_reviewers/kubeflow-use-table-driven-tests.md +++ b/_reviewers/kubeflow-use-table-driven-tests.md @@ -49,145 +49,3 @@ func TestSomething(t *testing.T) { ``` This pattern makes your test suite more maintainable and encourages thorough testing of edge cases by making it trivial to add new scenarios. When reviewing code, ensure new functionality is tested with table-driven tests rather than creating separate test functions for related scenarios. - - -[ - { - "discussion_id": "618190706", - "pr_number": 5761, - "pr_file": "components/profile-controller/controllers/profile_controller_test.go", - "created_at": "2021-04-22T08:26:44+00:00", - "commented_code": "},\n\t}\n\tfor _, test := range tests {\n\t\tupdateNamespaceLabels(test[\"current\"])\n\t\tdefaultKubeflowNamespaceLabels = map[string]string{\n\t\t\t\"katib-metricscollector-injection\": \"enabled\",\n\t\t\t\"serving.kubeflow.org/inferenceservice\": \"enabled\",\n\t\t\t\"pipelines.kubeflow.org/enabled\": \"true\",\n\t\t\t\"app.kubernetes.io/part-of\": \"kubeflow-profile\",\n\t\t}\n\t\tsetNamespaceLabelsFromConfig(test[\"current\"])\n\t\tif !reflect.DeepEqual(test[\"expected\"], test[\"current\"]) {\n\t\t\tt.Errorf(\"Expect:\\n%v; Output:\\n%v\", test[\"current\"], test[\"expected\"])\n\t\t\tt.Errorf(\"Expect:\\n%v; Output:\\n%v\", test[\"expected\"], test[\"current\"])\n\t\t}\n\t}\n}\n\nfunc TestUpdateNamespaceLabels_withLabelRemoval(t *testing.T) {", - "repo_full_name": "kubeflow/kubeflow", - "discussion_comments": [ - { - "comment_id": "618190706", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 5761, - "pr_file": "components/profile-controller/controllers/profile_controller_test.go", - "discussion_id": "618190706", - "commented_code": "@@ -54,9 +54,51 @@ func TestUpdateNamespaceLabels(t *testing.T) {\n \t\t},\n \t}\n \tfor _, test := range tests {\n-\t\tupdateNamespaceLabels(test[\"current\"])\n+\t\tdefaultKubeflowNamespaceLabels = map[string]string{\n+\t\t\t\"katib-metricscollector-injection\": \"enabled\",\n+\t\t\t\"serving.kubeflow.org/inferenceservice\": \"enabled\",\n+\t\t\t\"pipelines.kubeflow.org/enabled\": \"true\",\n+\t\t\t\"app.kubernetes.io/part-of\": \"kubeflow-profile\",\n+\t\t}\n+\t\tsetNamespaceLabelsFromConfig(test[\"current\"])\n \t\tif !reflect.DeepEqual(test[\"expected\"], test[\"current\"]) {\n-\t\t\tt.Errorf(\"Expect:\\n%v; Output:\\n%v\", test[\"current\"], test[\"expected\"])\n+\t\t\tt.Errorf(\"Expect:\\n%v; Output:\\n%v\", test[\"expected\"], test[\"current\"])\n \t\t}\n \t}\n }\n+\n+func TestUpdateNamespaceLabels_withLabelRemoval(t *testing.T) {", - "comment_created_at": "2021-04-22T08:26:44+00:00", - "comment_author": "Bobgy", - "comment_body": "nit: suggest merging this with TestEnforceNamespaceLabelsFromConfig, and allow a test case to specify config yaml.\r\nIn go, we typically prefer table tests than separate test funcs.", - "pr_file_module": null - }, - { - "comment_id": "618735327", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 5761, - "pr_file": "components/profile-controller/controllers/profile_controller_test.go", - "discussion_id": "618190706", - "commented_code": "@@ -54,9 +54,51 @@ func TestUpdateNamespaceLabels(t *testing.T) {\n \t\t},\n \t}\n \tfor _, test := range tests {\n-\t\tupdateNamespaceLabels(test[\"current\"])\n+\t\tdefaultKubeflowNamespaceLabels = map[string]string{\n+\t\t\t\"katib-metricscollector-injection\": \"enabled\",\n+\t\t\t\"serving.kubeflow.org/inferenceservice\": \"enabled\",\n+\t\t\t\"pipelines.kubeflow.org/enabled\": \"true\",\n+\t\t\t\"app.kubernetes.io/part-of\": \"kubeflow-profile\",\n+\t\t}\n+\t\tsetNamespaceLabelsFromConfig(test[\"current\"])\n \t\tif !reflect.DeepEqual(test[\"expected\"], test[\"current\"]) {\n-\t\t\tt.Errorf(\"Expect:\\n%v; Output:\\n%v\", test[\"current\"], test[\"expected\"])\n+\t\t\tt.Errorf(\"Expect:\\n%v; Output:\\n%v\", test[\"expected\"], test[\"current\"])\n \t\t}\n \t}\n }\n+\n+func TestUpdateNamespaceLabels_withLabelRemoval(t *testing.T) {", - "comment_created_at": "2021-04-22T21:00:22+00:00", - "comment_author": "zijianjoy", - "comment_body": "Updated to use table tests. ", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "956005218", - "pr_number": 6628, - "pr_file": "components/notebook-controller/controllers/notebook_controller.go", - "created_at": "2022-08-26T12:45:06+00:00", - "commented_code": "return ctrl.Result{RequeueAfter: culler.GetRequeueTime()}, nil\n}\n\nfunc updateNotebookStatus(r *NotebookReconciler, nb *v1beta1.Notebook,", - "repo_full_name": "kubeflow/kubeflow", - "discussion_comments": [ - { - "comment_id": "956005218", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 6628, - "pr_file": "components/notebook-controller/controllers/notebook_controller.go", - "discussion_id": "956005218", - "commented_code": "@@ -324,6 +269,102 @@ func (r *NotebookReconciler) Reconcile(ctx context.Context, req ctrl.Request) (c\n \treturn ctrl.Result{RequeueAfter: culler.GetRequeueTime()}, nil\n }\n \n+func updateNotebookStatus(r *NotebookReconciler, nb *v1beta1.Notebook,", - "comment_created_at": "2022-08-26T12:45:06+00:00", - "comment_author": "kimwnasptd", - "comment_body": "Let's change the name of this function to `createNotebookStatus` and update it's signature to:\r\n```golang\r\nfunc createNotebookStatus(r *NotebookReconciler, nb *v1beta1.Notebook,\r\n\tsts *appsv1.StatefulSet, pod *corev1.Pod, req ctrl.Request) (v1beta1.NotebookStatus, error) {\r\n```\r\n\r\nThis means that:\r\n1. We will be passing all necessary objects, statefulset, pod, as arguments\r\n2. The function will be just calculating the status\r\n\r\nThis will also allow us to write some unit tests to ensure we calculate the status as expected.", - "pr_file_module": null - }, - { - "comment_id": "957338776", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 6628, - "pr_file": "components/notebook-controller/controllers/notebook_controller.go", - "discussion_id": "956005218", - "commented_code": "@@ -324,6 +269,102 @@ func (r *NotebookReconciler) Reconcile(ctx context.Context, req ctrl.Request) (c\n \treturn ctrl.Result{RequeueAfter: culler.GetRequeueTime()}, nil\n }\n \n+func updateNotebookStatus(r *NotebookReconciler, nb *v1beta1.Notebook,", - "comment_created_at": "2022-08-29T13:29:31+00:00", - "comment_author": "apo-ger", - "comment_body": "Done! I have also added some basic unit tests. Let me know if everything is ok ", - "pr_file_module": null - }, - { - "comment_id": "957500711", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 6628, - "pr_file": "components/notebook-controller/controllers/notebook_controller.go", - "discussion_id": "956005218", - "commented_code": "@@ -324,6 +269,102 @@ func (r *NotebookReconciler) Reconcile(ctx context.Context, req ctrl.Request) (c\n \treturn ctrl.Result{RequeueAfter: culler.GetRequeueTime()}, nil\n }\n \n+func updateNotebookStatus(r *NotebookReconciler, nb *v1beta1.Notebook,", - "comment_created_at": "2022-08-29T15:41:13+00:00", - "comment_author": "kimwnasptd", - "comment_body": "Nice! Let's also include another unit test for the case where the Notebook's Pod is unschedulable", - "pr_file_module": null - }, - { - "comment_id": "958375886", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 6628, - "pr_file": "components/notebook-controller/controllers/notebook_controller.go", - "discussion_id": "956005218", - "commented_code": "@@ -324,6 +269,102 @@ func (r *NotebookReconciler) Reconcile(ctx context.Context, req ctrl.Request) (c\n \treturn ctrl.Result{RequeueAfter: culler.GetRequeueTime()}, nil\n }\n \n+func updateNotebookStatus(r *NotebookReconciler, nb *v1beta1.Notebook,", - "comment_created_at": "2022-08-30T11:47:27+00:00", - "comment_author": "apo-ger", - "comment_body": "Done! ", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "518703865", - "pr_number": 5378, - "pr_file": "components/notebook-controller/controllers/functional_test.go", - "created_at": "2020-11-06T11:53:15+00:00", - "commented_code": "/*\n\nLicensed under the Apache License, Version 2.0 (the \"License\");\nyou may not use this file except in compliance with the License.\nYou may obtain a copy of the License at\n\n http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n*/\n\npackage controllers\n\nimport (\n\t\"context\"\n\t\"time\"\n\n\t. \"github.com/onsi/ginkgo\"\n\t. \"github.com/onsi/gomega\"\n\tappsv1 \"k8s.io/api/apps/v1\"\n\tv1 \"k8s.io/api/core/v1\"\n\tmetav1 \"k8s.io/apimachinery/pkg/apis/meta/v1\"\n\t\"k8s.io/apimachinery/pkg/types\"\n\n\tbeta1 \"github.com/kubeflow/kubeflow/components/notebook-controller/api/v1beta1\"\n)\n\nvar _ = Describe(\"Notebook controller\", func() {\n\n\t// Define utility constants for object names and testing timeouts/durations and intervals.\n\tconst (\n\t\tName = \"test-notebook\"\n\t\tNamespace = \"default\"\n\t\ttimeout = time.Second * 10\n\t\tinterval = time.Millisecond * 250\n\t)\n\n\tContext(\"When validating the notebook controller\", func() {\n\t\tIt(\"Should create replicas\", func() {\n\t\t\tBy(\"By creating a new Notebook\")\n\t\t\tctx := context.Background()\n\t\t\tnotebook := &beta1.Notebook{\n\t\t\t\tObjectMeta: metav1.ObjectMeta{\n\t\t\t\t\tName: Name,\n\t\t\t\t\tNamespace: Namespace,\n\t\t\t\t},\n\t\t\t\tSpec: beta1.NotebookSpec{\n\t\t\t\t\tTemplate: beta1.NotebookTemplateSpec{\n\t\t\t\t\t\tSpec: v1.PodSpec{Containers: []v1.Container{{\n\t\t\t\t\t\t\tName: \"busybox\",\n\t\t\t\t\t\t\tImage: \"busybox\",\n\t\t\t\t\t\t}}}},\n\t\t\t\t}}\n\t\t\tExpect(k8sClient.Create(ctx, notebook)).Should(Succeed())\n\n\t\t\tnotebookLookupKey := types.NamespacedName{Name: Name, Namespace: Namespace}\n\t\t\tcreatedNotebook := &beta1.Notebook{}\n\n\t\t\tEventually(func() bool {\n\t\t\t\terr := k8sClient.Get(ctx, notebookLookupKey, createdNotebook)\n\t\t\t\tif err != nil {\n\t\t\t\t\treturn false\n\t\t\t\t}\n\t\t\t\treturn true\n\t\t\t}, timeout, interval).Should(BeTrue())\n\t\t\t/*\n\t\t\t\tChecking for the underlying statefulset\n\t\t\t*/\n\t\t\tBy(\"By checking that the Notebook has statefulset\")\n\t\t\tEventually(func() (bool, error) {\n\t\t\t\tsts := &appsv1.StatefulSet{ObjectMeta: metav1.ObjectMeta{\n\t\t\t\t\tName: Name,\n\t\t\t\t\tNamespace: Namespace,\n\t\t\t\t}}\n\t\t\t\terr := k8sClient.Get(ctx, notebookLookupKey, sts)\n\t\t\t\tif err != nil {\n\t\t\t\t\treturn false, err\n\t\t\t\t}\n\t\t\t\treturn true, nil\n\t\t\t}, timeout, interval).Should(BeTrue())\n\t\t\t/*\n\t\t\t\tChecking for replica count\n\t\t\t*/\n\t\t\tBy(\"By checking that the Notebook is running with at least 1 replica\")\n\t\t\tEventually(func() bool {\n\t\t\t\treturn notebook.Status.ReadyReplicas > 0\n\t\t\t}, timeout, interval).ShouldNot(Equal(0))", - "repo_full_name": "kubeflow/kubeflow", - "discussion_comments": [ - { - "comment_id": "518703865", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 5378, - "pr_file": "components/notebook-controller/controllers/functional_test.go", - "discussion_id": "518703865", - "commented_code": "@@ -0,0 +1,94 @@\n+/*\n+\n+Licensed under the Apache License, Version 2.0 (the \"License\");\n+you may not use this file except in compliance with the License.\n+You may obtain a copy of the License at\n+\n+ http://www.apache.org/licenses/LICENSE-2.0\n+\n+Unless required by applicable law or agreed to in writing, software\n+distributed under the License is distributed on an \"AS IS\" BASIS,\n+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n+See the License for the specific language governing permissions and\n+limitations under the License.\n+*/\n+\n+package controllers\n+\n+import (\n+\t\"context\"\n+\t\"time\"\n+\n+\t. \"github.com/onsi/ginkgo\"\n+\t. \"github.com/onsi/gomega\"\n+\tappsv1 \"k8s.io/api/apps/v1\"\n+\tv1 \"k8s.io/api/core/v1\"\n+\tmetav1 \"k8s.io/apimachinery/pkg/apis/meta/v1\"\n+\t\"k8s.io/apimachinery/pkg/types\"\n+\n+\tbeta1 \"github.com/kubeflow/kubeflow/components/notebook-controller/api/v1beta1\"\n+)\n+\n+var _ = Describe(\"Notebook controller\", func() {\n+\n+\t// Define utility constants for object names and testing timeouts/durations and intervals.\n+\tconst (\n+\t\tName = \"test-notebook\"\n+\t\tNamespace = \"default\"\n+\t\ttimeout = time.Second * 10\n+\t\tinterval = time.Millisecond * 250\n+\t)\n+\n+\tContext(\"When validating the notebook controller\", func() {\n+\t\tIt(\"Should create replicas\", func() {\n+\t\t\tBy(\"By creating a new Notebook\")\n+\t\t\tctx := context.Background()\n+\t\t\tnotebook := &beta1.Notebook{\n+\t\t\t\tObjectMeta: metav1.ObjectMeta{\n+\t\t\t\t\tName: Name,\n+\t\t\t\t\tNamespace: Namespace,\n+\t\t\t\t},\n+\t\t\t\tSpec: beta1.NotebookSpec{\n+\t\t\t\t\tTemplate: beta1.NotebookTemplateSpec{\n+\t\t\t\t\t\tSpec: v1.PodSpec{Containers: []v1.Container{{\n+\t\t\t\t\t\t\tName: \"busybox\",\n+\t\t\t\t\t\t\tImage: \"busybox\",\n+\t\t\t\t\t\t}}}},\n+\t\t\t\t}}\n+\t\t\tExpect(k8sClient.Create(ctx, notebook)).Should(Succeed())\n+\n+\t\t\tnotebookLookupKey := types.NamespacedName{Name: Name, Namespace: Namespace}\n+\t\t\tcreatedNotebook := &beta1.Notebook{}\n+\n+\t\t\tEventually(func() bool {\n+\t\t\t\terr := k8sClient.Get(ctx, notebookLookupKey, createdNotebook)\n+\t\t\t\tif err != nil {\n+\t\t\t\t\treturn false\n+\t\t\t\t}\n+\t\t\t\treturn true\n+\t\t\t}, timeout, interval).Should(BeTrue())\n+\t\t\t/*\n+\t\t\t\tChecking for the underlying statefulset\n+\t\t\t*/\n+\t\t\tBy(\"By checking that the Notebook has statefulset\")\n+\t\t\tEventually(func() (bool, error) {\n+\t\t\t\tsts := &appsv1.StatefulSet{ObjectMeta: metav1.ObjectMeta{\n+\t\t\t\t\tName: Name,\n+\t\t\t\t\tNamespace: Namespace,\n+\t\t\t\t}}\n+\t\t\t\terr := k8sClient.Get(ctx, notebookLookupKey, sts)\n+\t\t\t\tif err != nil {\n+\t\t\t\t\treturn false, err\n+\t\t\t\t}\n+\t\t\t\treturn true, nil\n+\t\t\t}, timeout, interval).Should(BeTrue())\n+\t\t\t/*\n+\t\t\t\tChecking for replica count\n+\t\t\t*/\n+\t\t\tBy(\"By checking that the Notebook is running with at least 1 replica\")\n+\t\t\tEventually(func() bool {\n+\t\t\t\treturn notebook.Status.ReadyReplicas > 0\n+\t\t\t}, timeout, interval).ShouldNot(Equal(0))", - "comment_created_at": "2020-11-06T11:53:15+00:00", - "comment_author": "yanniszark", - "comment_body": "Is this correct? You return a bool but then check if it equal to zero (False).\r\nSo in this case, you actually check if `notebook.Status.ReadyReplicas <= 0`.\r\nI attempted to change this to:\r\n\r\n```go\r\n\t\t\tBy(\"By checking that the Notebook is running with at least 1 replica\")\r\n\t\t\tEventually(func() int32 {\r\n\t\t\t\t// Shouldn't we get the statefulset again?\r\n\t\t\t\treturn notebook.Status.ReadyReplicas\r\n\t\t\t}, timeout, interval).ShouldNot(BeZero())\r\n```\r\nbut the test failed.\r\n\r\nWhich leads me to ask, in controller-runtime's test environment, is the statefulset controller running?\r\nIf it is running, what happens to the Pods it creates? I assume that they can't be scheduled, as there is no Node for them to be scheduled to. Can you dig a bit deeper on what happens with Pods in controller-runtime's test env and if they have any suggestions?\r\n\r\nFinally, you wrapped the replica check in an `Eventually` block, but since the block doesn't get the statefulset again, the retries are meaningless.", - "pr_file_module": null - }, - { - "comment_id": "518888927", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 5378, - "pr_file": "components/notebook-controller/controllers/functional_test.go", - "discussion_id": "518703865", - "commented_code": "@@ -0,0 +1,94 @@\n+/*\n+\n+Licensed under the Apache License, Version 2.0 (the \"License\");\n+you may not use this file except in compliance with the License.\n+You may obtain a copy of the License at\n+\n+ http://www.apache.org/licenses/LICENSE-2.0\n+\n+Unless required by applicable law or agreed to in writing, software\n+distributed under the License is distributed on an \"AS IS\" BASIS,\n+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n+See the License for the specific language governing permissions and\n+limitations under the License.\n+*/\n+\n+package controllers\n+\n+import (\n+\t\"context\"\n+\t\"time\"\n+\n+\t. \"github.com/onsi/ginkgo\"\n+\t. \"github.com/onsi/gomega\"\n+\tappsv1 \"k8s.io/api/apps/v1\"\n+\tv1 \"k8s.io/api/core/v1\"\n+\tmetav1 \"k8s.io/apimachinery/pkg/apis/meta/v1\"\n+\t\"k8s.io/apimachinery/pkg/types\"\n+\n+\tbeta1 \"github.com/kubeflow/kubeflow/components/notebook-controller/api/v1beta1\"\n+)\n+\n+var _ = Describe(\"Notebook controller\", func() {\n+\n+\t// Define utility constants for object names and testing timeouts/durations and intervals.\n+\tconst (\n+\t\tName = \"test-notebook\"\n+\t\tNamespace = \"default\"\n+\t\ttimeout = time.Second * 10\n+\t\tinterval = time.Millisecond * 250\n+\t)\n+\n+\tContext(\"When validating the notebook controller\", func() {\n+\t\tIt(\"Should create replicas\", func() {\n+\t\t\tBy(\"By creating a new Notebook\")\n+\t\t\tctx := context.Background()\n+\t\t\tnotebook := &beta1.Notebook{\n+\t\t\t\tObjectMeta: metav1.ObjectMeta{\n+\t\t\t\t\tName: Name,\n+\t\t\t\t\tNamespace: Namespace,\n+\t\t\t\t},\n+\t\t\t\tSpec: beta1.NotebookSpec{\n+\t\t\t\t\tTemplate: beta1.NotebookTemplateSpec{\n+\t\t\t\t\t\tSpec: v1.PodSpec{Containers: []v1.Container{{\n+\t\t\t\t\t\t\tName: \"busybox\",\n+\t\t\t\t\t\t\tImage: \"busybox\",\n+\t\t\t\t\t\t}}}},\n+\t\t\t\t}}\n+\t\t\tExpect(k8sClient.Create(ctx, notebook)).Should(Succeed())\n+\n+\t\t\tnotebookLookupKey := types.NamespacedName{Name: Name, Namespace: Namespace}\n+\t\t\tcreatedNotebook := &beta1.Notebook{}\n+\n+\t\t\tEventually(func() bool {\n+\t\t\t\terr := k8sClient.Get(ctx, notebookLookupKey, createdNotebook)\n+\t\t\t\tif err != nil {\n+\t\t\t\t\treturn false\n+\t\t\t\t}\n+\t\t\t\treturn true\n+\t\t\t}, timeout, interval).Should(BeTrue())\n+\t\t\t/*\n+\t\t\t\tChecking for the underlying statefulset\n+\t\t\t*/\n+\t\t\tBy(\"By checking that the Notebook has statefulset\")\n+\t\t\tEventually(func() (bool, error) {\n+\t\t\t\tsts := &appsv1.StatefulSet{ObjectMeta: metav1.ObjectMeta{\n+\t\t\t\t\tName: Name,\n+\t\t\t\t\tNamespace: Namespace,\n+\t\t\t\t}}\n+\t\t\t\terr := k8sClient.Get(ctx, notebookLookupKey, sts)\n+\t\t\t\tif err != nil {\n+\t\t\t\t\treturn false, err\n+\t\t\t\t}\n+\t\t\t\treturn true, nil\n+\t\t\t}, timeout, interval).Should(BeTrue())\n+\t\t\t/*\n+\t\t\t\tChecking for replica count\n+\t\t\t*/\n+\t\t\tBy(\"By checking that the Notebook is running with at least 1 replica\")\n+\t\t\tEventually(func() bool {\n+\t\t\t\treturn notebook.Status.ReadyReplicas > 0\n+\t\t\t}, timeout, interval).ShouldNot(Equal(0))", - "comment_created_at": "2020-11-06T17:12:22+00:00", - "comment_author": "naveensrinivasan", - "comment_body": "I agree on Pod it creates can't be scheduled without a Node. I will check on that.\r\n\r\nAlso \ud83e\udd26 on the replica check in the `Eventually` block without getting the state refreshed.\r\nIt should be more like this.\r\n\r\n``` go\r\nBy(\"By checking that the Notebook is running with at least 1 replica\")\r\n\t\t\tEventually(func() int32 {\r\n\t\t\t\terr := k8sClient.Get(ctx, notebookLookupKey, createdNotebook)\r\n\t\t\t\tif err != nil {\r\n\t\t\t\t\treturn 0\r\n\t\t\t\t}\r\n\t\t\t\treturn createdNotebook.Status.ReadyReplicas\r\n\t\t\t}, timeout, interval).ShouldNot(BeZero())\r\n```\r\n\r\nIf the controller runtime does not have an option to create `pod` then probably this test would have to be changed. \r\nBut I am not confident about the controller runtime not having that. Let me check and keep you posted.\r\n\r\n", - "pr_file_module": null - }, - { - "comment_id": "518945153", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 5378, - "pr_file": "components/notebook-controller/controllers/functional_test.go", - "discussion_id": "518703865", - "commented_code": "@@ -0,0 +1,94 @@\n+/*\n+\n+Licensed under the Apache License, Version 2.0 (the \"License\");\n+you may not use this file except in compliance with the License.\n+You may obtain a copy of the License at\n+\n+ http://www.apache.org/licenses/LICENSE-2.0\n+\n+Unless required by applicable law or agreed to in writing, software\n+distributed under the License is distributed on an \"AS IS\" BASIS,\n+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n+See the License for the specific language governing permissions and\n+limitations under the License.\n+*/\n+\n+package controllers\n+\n+import (\n+\t\"context\"\n+\t\"time\"\n+\n+\t. \"github.com/onsi/ginkgo\"\n+\t. \"github.com/onsi/gomega\"\n+\tappsv1 \"k8s.io/api/apps/v1\"\n+\tv1 \"k8s.io/api/core/v1\"\n+\tmetav1 \"k8s.io/apimachinery/pkg/apis/meta/v1\"\n+\t\"k8s.io/apimachinery/pkg/types\"\n+\n+\tbeta1 \"github.com/kubeflow/kubeflow/components/notebook-controller/api/v1beta1\"\n+)\n+\n+var _ = Describe(\"Notebook controller\", func() {\n+\n+\t// Define utility constants for object names and testing timeouts/durations and intervals.\n+\tconst (\n+\t\tName = \"test-notebook\"\n+\t\tNamespace = \"default\"\n+\t\ttimeout = time.Second * 10\n+\t\tinterval = time.Millisecond * 250\n+\t)\n+\n+\tContext(\"When validating the notebook controller\", func() {\n+\t\tIt(\"Should create replicas\", func() {\n+\t\t\tBy(\"By creating a new Notebook\")\n+\t\t\tctx := context.Background()\n+\t\t\tnotebook := &beta1.Notebook{\n+\t\t\t\tObjectMeta: metav1.ObjectMeta{\n+\t\t\t\t\tName: Name,\n+\t\t\t\t\tNamespace: Namespace,\n+\t\t\t\t},\n+\t\t\t\tSpec: beta1.NotebookSpec{\n+\t\t\t\t\tTemplate: beta1.NotebookTemplateSpec{\n+\t\t\t\t\t\tSpec: v1.PodSpec{Containers: []v1.Container{{\n+\t\t\t\t\t\t\tName: \"busybox\",\n+\t\t\t\t\t\t\tImage: \"busybox\",\n+\t\t\t\t\t\t}}}},\n+\t\t\t\t}}\n+\t\t\tExpect(k8sClient.Create(ctx, notebook)).Should(Succeed())\n+\n+\t\t\tnotebookLookupKey := types.NamespacedName{Name: Name, Namespace: Namespace}\n+\t\t\tcreatedNotebook := &beta1.Notebook{}\n+\n+\t\t\tEventually(func() bool {\n+\t\t\t\terr := k8sClient.Get(ctx, notebookLookupKey, createdNotebook)\n+\t\t\t\tif err != nil {\n+\t\t\t\t\treturn false\n+\t\t\t\t}\n+\t\t\t\treturn true\n+\t\t\t}, timeout, interval).Should(BeTrue())\n+\t\t\t/*\n+\t\t\t\tChecking for the underlying statefulset\n+\t\t\t*/\n+\t\t\tBy(\"By checking that the Notebook has statefulset\")\n+\t\t\tEventually(func() (bool, error) {\n+\t\t\t\tsts := &appsv1.StatefulSet{ObjectMeta: metav1.ObjectMeta{\n+\t\t\t\t\tName: Name,\n+\t\t\t\t\tNamespace: Namespace,\n+\t\t\t\t}}\n+\t\t\t\terr := k8sClient.Get(ctx, notebookLookupKey, sts)\n+\t\t\t\tif err != nil {\n+\t\t\t\t\treturn false, err\n+\t\t\t\t}\n+\t\t\t\treturn true, nil\n+\t\t\t}, timeout, interval).Should(BeTrue())\n+\t\t\t/*\n+\t\t\t\tChecking for replica count\n+\t\t\t*/\n+\t\t\tBy(\"By checking that the Notebook is running with at least 1 replica\")\n+\t\t\tEventually(func() bool {\n+\t\t\t\treturn notebook.Status.ReadyReplicas > 0\n+\t\t\t}, timeout, interval).ShouldNot(Equal(0))", - "comment_created_at": "2020-11-06T18:59:10+00:00", - "comment_author": "naveensrinivasan", - "comment_body": "@yanniszark I checked on the `envtest` and confirmed that the `statefulset` controller isn't running. Only the `api server` itself is run by default. So we cannot check the `pod`. So I am refactoring my tests to check for `statefulset`\r\n", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/kubeflow-validate-inputs-explicitly.json b/_reviewers/kubeflow-validate-inputs-explicitly.json new file mode 100644 index 0000000..6969373 --- /dev/null +++ b/_reviewers/kubeflow-validate-inputs-explicitly.json @@ -0,0 +1,130 @@ +[ + { + "discussion_id": "688421843", + "pr_number": 6092, + "pr_file": "components/crud-web-apps/jupyter/backend/apps/common/form.py", + "created_at": "2021-08-13T10:47:42+00:00", + "commented_code": "container = notebook[\"spec\"][\"template\"][\"spec\"][\"containers\"][0]\n\n cpu = get_form_value(body, defaults, \"cpu\")\n\n cpu_limit = get_form_value(body, defaults, \"cpuLimit\")\n if cpu_limit == \"NaN\":", + "repo_full_name": "kubeflow/kubeflow", + "discussion_comments": [ + { + "comment_id": "688421843", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 6092, + "pr_file": "components/crud-web-apps/jupyter/backend/apps/common/form.py", + "discussion_id": "688421843", + "commented_code": "@@ -163,7 +163,10 @@ def set_notebook_cpu(notebook, body, defaults):\n container = notebook[\"spec\"][\"template\"][\"spec\"][\"containers\"][0]\n \n cpu = get_form_value(body, defaults, \"cpu\")\n+\n cpu_limit = get_form_value(body, defaults, \"cpuLimit\")\n+ if cpu_limit == \"NaN\":", + "comment_created_at": "2021-08-13T10:47:42+00:00", + "comment_author": "kimwnasptd", + "comment_body": "let's instead make a check if `NaN` is included in the value of either `cpu` or `cpu_limit`.\r\n\r\nAnd if that's the case then the backend should raise a `BadRequest` werkzeug exception (400)", + "pr_file_module": null + }, + { + "comment_id": "688445484", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 6092, + "pr_file": "components/crud-web-apps/jupyter/backend/apps/common/form.py", + "discussion_id": "688421843", + "commented_code": "@@ -163,7 +163,10 @@ def set_notebook_cpu(notebook, body, defaults):\n container = notebook[\"spec\"][\"template\"][\"spec\"][\"containers\"][0]\n \n cpu = get_form_value(body, defaults, \"cpu\")\n+\n cpu_limit = get_form_value(body, defaults, \"cpuLimit\")\n+ if cpu_limit == \"NaN\":", + "comment_created_at": "2021-08-13T11:34:06+00:00", + "comment_author": "munagekar", + "comment_body": "@kimwnasptd Thank you for the quick review. I will merge in changes from https://github.com/kubeflow/kubeflow/pull/6058 and verify the behavior the locally and make changes as per your suggestions.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "596310181", + "pr_number": 5646, + "pr_file": "components/crud-web-apps/jupyter/backend/apps/common/form.py", + "created_at": "2021-03-17T19:11:08+00:00", + "commented_code": ")\n\n\ndef set_notebook_http_rewrite_uri(notebook, body, defaults):\n notebook_annotations = notebook[\"metadata\"][\"annotations\"]\n http_rewrite_uri = get_form_value(body, defaults, \"httpRewriteURI\")\n if http_rewrite_uri:\n notebook_annotations[\"notebooks.kubeflow.org/http-rewrite-uri\"] = http_rewrite_uri\n\n\ndef set_http_headers_request_set(notebook, body, defaults):\n notebook_annotations = notebook[\"metadata\"][\"annotations\"]\n http_headers_request_set = get_form_value(body, defaults, \"httpHeadersRequestSet\")\n if http_headers_request_set:\n notebook_annotations[\"notebooks.kubeflow.org/http-headers-request-set\"] = http_headers_request_set\n\n\ndef set_server_type(notebook, body, defaults):\n notebook_annotations = notebook[\"metadata\"][\"annotations\"]\n notebook_annotations[\"notebooks.kubeflow.org/server-type\"] = get_form_value(body, defaults, \"serverType\")", + "repo_full_name": "kubeflow/kubeflow", + "discussion_comments": [ + { + "comment_id": "596310181", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 5646, + "pr_file": "components/crud-web-apps/jupyter/backend/apps/common/form.py", + "discussion_id": "596310181", + "commented_code": "@@ -135,6 +135,25 @@ def set_notebook_image_pull_policy(notebook, body, defaults):\n )\n \n \n+def set_notebook_http_rewrite_uri(notebook, body, defaults):\n+ notebook_annotations = notebook[\"metadata\"][\"annotations\"]\n+ http_rewrite_uri = get_form_value(body, defaults, \"httpRewriteURI\")\n+ if http_rewrite_uri:\n+ notebook_annotations[\"notebooks.kubeflow.org/http-rewrite-uri\"] = http_rewrite_uri\n+\n+\n+def set_http_headers_request_set(notebook, body, defaults):\n+ notebook_annotations = notebook[\"metadata\"][\"annotations\"]\n+ http_headers_request_set = get_form_value(body, defaults, \"httpHeadersRequestSet\")\n+ if http_headers_request_set:\n+ notebook_annotations[\"notebooks.kubeflow.org/http-headers-request-set\"] = http_headers_request_set\n+\n+\n+def set_server_type(notebook, body, defaults):\n+ notebook_annotations = notebook[\"metadata\"][\"annotations\"]\n+ notebook_annotations[\"notebooks.kubeflow.org/server-type\"] = get_form_value(body, defaults, \"serverType\")", + "comment_created_at": "2021-03-17T19:11:08+00:00", + "comment_author": "kimwnasptd", + "comment_body": "Lets also add some logic here that would handle incorrect values.\r\n\r\nThe expected values here, from the request, would be:\r\n* `None`, which means `\"jupyter\"`\r\n* `\"jupyter\"`\r\n* `\"rstudio\"`\r\n* `\"vscode\"`\r\n\r\nIf this field is present in the request but has a different value from the expected ones then the backend should raise a `400` error", + "pr_file_module": null + }, + { + "comment_id": "596695235", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 5646, + "pr_file": "components/crud-web-apps/jupyter/backend/apps/common/form.py", + "discussion_id": "596310181", + "commented_code": "@@ -135,6 +135,25 @@ def set_notebook_image_pull_policy(notebook, body, defaults):\n )\n \n \n+def set_notebook_http_rewrite_uri(notebook, body, defaults):\n+ notebook_annotations = notebook[\"metadata\"][\"annotations\"]\n+ http_rewrite_uri = get_form_value(body, defaults, \"httpRewriteURI\")\n+ if http_rewrite_uri:\n+ notebook_annotations[\"notebooks.kubeflow.org/http-rewrite-uri\"] = http_rewrite_uri\n+\n+\n+def set_http_headers_request_set(notebook, body, defaults):\n+ notebook_annotations = notebook[\"metadata\"][\"annotations\"]\n+ http_headers_request_set = get_form_value(body, defaults, \"httpHeadersRequestSet\")\n+ if http_headers_request_set:\n+ notebook_annotations[\"notebooks.kubeflow.org/http-headers-request-set\"] = http_headers_request_set\n+\n+\n+def set_server_type(notebook, body, defaults):\n+ notebook_annotations = notebook[\"metadata\"][\"annotations\"]\n+ notebook_annotations[\"notebooks.kubeflow.org/server-type\"] = get_form_value(body, defaults, \"serverType\")", + "comment_created_at": "2021-03-18T09:41:22+00:00", + "comment_author": "davidspek", + "comment_body": "Does adding error logic make sense here? The frontend doesn't rely on user input so unless there is a MITM change to the request a wrong input cannot happen. Also `none` cannot happen, as one of the toggles is always selected. It's not as if the JWA backend is decoupled from the frontend (they exist in the same container and are built together). \r\nRelated to this, how would I go about causing a 400 error specifically in the backend code?", + "pr_file_module": null + }, + { + "comment_id": "596762071", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 5646, + "pr_file": "components/crud-web-apps/jupyter/backend/apps/common/form.py", + "discussion_id": "596310181", + "commented_code": "@@ -135,6 +135,25 @@ def set_notebook_image_pull_policy(notebook, body, defaults):\n )\n \n \n+def set_notebook_http_rewrite_uri(notebook, body, defaults):\n+ notebook_annotations = notebook[\"metadata\"][\"annotations\"]\n+ http_rewrite_uri = get_form_value(body, defaults, \"httpRewriteURI\")\n+ if http_rewrite_uri:\n+ notebook_annotations[\"notebooks.kubeflow.org/http-rewrite-uri\"] = http_rewrite_uri\n+\n+\n+def set_http_headers_request_set(notebook, body, defaults):\n+ notebook_annotations = notebook[\"metadata\"][\"annotations\"]\n+ http_headers_request_set = get_form_value(body, defaults, \"httpHeadersRequestSet\")\n+ if http_headers_request_set:\n+ notebook_annotations[\"notebooks.kubeflow.org/http-headers-request-set\"] = http_headers_request_set\n+\n+\n+def set_server_type(notebook, body, defaults):\n+ notebook_annotations = notebook[\"metadata\"][\"annotations\"]\n+ notebook_annotations[\"notebooks.kubeflow.org/server-type\"] = get_form_value(body, defaults, \"serverType\")", + "comment_created_at": "2021-03-18T11:16:43+00:00", + "comment_author": "kimwnasptd", + "comment_body": "Indeed, the frontend sends only specific values but it's a good practice for the backend to make it explicit to the user/frontend that the data it received was not formatted as expected.", + "pr_file_module": null + }, + { + "comment_id": "596769984", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 5646, + "pr_file": "components/crud-web-apps/jupyter/backend/apps/common/form.py", + "discussion_id": "596310181", + "commented_code": "@@ -135,6 +135,25 @@ def set_notebook_image_pull_policy(notebook, body, defaults):\n )\n \n \n+def set_notebook_http_rewrite_uri(notebook, body, defaults):\n+ notebook_annotations = notebook[\"metadata\"][\"annotations\"]\n+ http_rewrite_uri = get_form_value(body, defaults, \"httpRewriteURI\")\n+ if http_rewrite_uri:\n+ notebook_annotations[\"notebooks.kubeflow.org/http-rewrite-uri\"] = http_rewrite_uri\n+\n+\n+def set_http_headers_request_set(notebook, body, defaults):\n+ notebook_annotations = notebook[\"metadata\"][\"annotations\"]\n+ http_headers_request_set = get_form_value(body, defaults, \"httpHeadersRequestSet\")\n+ if http_headers_request_set:\n+ notebook_annotations[\"notebooks.kubeflow.org/http-headers-request-set\"] = http_headers_request_set\n+\n+\n+def set_server_type(notebook, body, defaults):\n+ notebook_annotations = notebook[\"metadata\"][\"annotations\"]\n+ notebook_annotations[\"notebooks.kubeflow.org/server-type\"] = get_form_value(body, defaults, \"serverType\")", + "comment_created_at": "2021-03-18T11:27:25+00:00", + "comment_author": "davidspek", + "comment_body": "Should I handle the error with `log.error`?", + "pr_file_module": null + }, + { + "comment_id": "596775389", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 5646, + "pr_file": "components/crud-web-apps/jupyter/backend/apps/common/form.py", + "discussion_id": "596310181", + "commented_code": "@@ -135,6 +135,25 @@ def set_notebook_image_pull_policy(notebook, body, defaults):\n )\n \n \n+def set_notebook_http_rewrite_uri(notebook, body, defaults):\n+ notebook_annotations = notebook[\"metadata\"][\"annotations\"]\n+ http_rewrite_uri = get_form_value(body, defaults, \"httpRewriteURI\")\n+ if http_rewrite_uri:\n+ notebook_annotations[\"notebooks.kubeflow.org/http-rewrite-uri\"] = http_rewrite_uri\n+\n+\n+def set_http_headers_request_set(notebook, body, defaults):\n+ notebook_annotations = notebook[\"metadata\"][\"annotations\"]\n+ http_headers_request_set = get_form_value(body, defaults, \"httpHeadersRequestSet\")\n+ if http_headers_request_set:\n+ notebook_annotations[\"notebooks.kubeflow.org/http-headers-request-set\"] = http_headers_request_set\n+\n+\n+def set_server_type(notebook, body, defaults):\n+ notebook_annotations = notebook[\"metadata\"][\"annotations\"]\n+ notebook_annotations[\"notebooks.kubeflow.org/server-type\"] = get_form_value(body, defaults, \"serverType\")", + "comment_created_at": "2021-03-18T11:32:58+00:00", + "comment_author": "kimwnasptd", + "comment_body": "And also you could look at this snippet for how to raise this error\r\n\r\nhttps://github.com/kubeflow/kubeflow/blob/master/components/crud-web-apps/jupyter/backend/apps/common/form.py#L31-L34\r\n\r\nEDIT: was typing it before I saw your comment above ", + "pr_file_module": null + }, + { + "comment_id": "596930230", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 5646, + "pr_file": "components/crud-web-apps/jupyter/backend/apps/common/form.py", + "discussion_id": "596310181", + "commented_code": "@@ -135,6 +135,25 @@ def set_notebook_image_pull_policy(notebook, body, defaults):\n )\n \n \n+def set_notebook_http_rewrite_uri(notebook, body, defaults):\n+ notebook_annotations = notebook[\"metadata\"][\"annotations\"]\n+ http_rewrite_uri = get_form_value(body, defaults, \"httpRewriteURI\")\n+ if http_rewrite_uri:\n+ notebook_annotations[\"notebooks.kubeflow.org/http-rewrite-uri\"] = http_rewrite_uri\n+\n+\n+def set_http_headers_request_set(notebook, body, defaults):\n+ notebook_annotations = notebook[\"metadata\"][\"annotations\"]\n+ http_headers_request_set = get_form_value(body, defaults, \"httpHeadersRequestSet\")\n+ if http_headers_request_set:\n+ notebook_annotations[\"notebooks.kubeflow.org/http-headers-request-set\"] = http_headers_request_set\n+\n+\n+def set_server_type(notebook, body, defaults):\n+ notebook_annotations = notebook[\"metadata\"][\"annotations\"]\n+ notebook_annotations[\"notebooks.kubeflow.org/server-type\"] = get_form_value(body, defaults, \"serverType\")", + "comment_created_at": "2021-03-18T14:35:03+00:00", + "comment_author": "davidspek", + "comment_body": "Thanks for the link. Sorry for wasting your time on this, I should have looked at the code a bit better and I would have found it. ", + "pr_file_module": null + }, + { + "comment_id": "596951137", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 5646, + "pr_file": "components/crud-web-apps/jupyter/backend/apps/common/form.py", + "discussion_id": "596310181", + "commented_code": "@@ -135,6 +135,25 @@ def set_notebook_image_pull_policy(notebook, body, defaults):\n )\n \n \n+def set_notebook_http_rewrite_uri(notebook, body, defaults):\n+ notebook_annotations = notebook[\"metadata\"][\"annotations\"]\n+ http_rewrite_uri = get_form_value(body, defaults, \"httpRewriteURI\")\n+ if http_rewrite_uri:\n+ notebook_annotations[\"notebooks.kubeflow.org/http-rewrite-uri\"] = http_rewrite_uri\n+\n+\n+def set_http_headers_request_set(notebook, body, defaults):\n+ notebook_annotations = notebook[\"metadata\"][\"annotations\"]\n+ http_headers_request_set = get_form_value(body, defaults, \"httpHeadersRequestSet\")\n+ if http_headers_request_set:\n+ notebook_annotations[\"notebooks.kubeflow.org/http-headers-request-set\"] = http_headers_request_set\n+\n+\n+def set_server_type(notebook, body, defaults):\n+ notebook_annotations = notebook[\"metadata\"][\"annotations\"]\n+ notebook_annotations[\"notebooks.kubeflow.org/server-type\"] = get_form_value(body, defaults, \"serverType\")", + "comment_created_at": "2021-03-18T14:57:45+00:00", + "comment_author": "kimwnasptd", + "comment_body": "You are not wasting anyone's time. Your efforts are as important as mine and no releases were delayed because of a question :) ", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/kubeflow-validate-inputs-explicitly.md b/_reviewers/kubeflow-validate-inputs-explicitly.md index 7b616b4..f368653 100644 --- a/_reviewers/kubeflow-validate-inputs-explicitly.md +++ b/_reviewers/kubeflow-validate-inputs-explicitly.md @@ -30,135 +30,3 @@ def set_server_type(notebook, body, defaults): ``` This practice ensures your API is robust against malformed inputs regardless of their source, provides clear error messages for debugging, and maintains a clean separation of concerns between frontend and backend validation. - - -[ - { - "discussion_id": "688421843", - "pr_number": 6092, - "pr_file": "components/crud-web-apps/jupyter/backend/apps/common/form.py", - "created_at": "2021-08-13T10:47:42+00:00", - "commented_code": "container = notebook[\"spec\"][\"template\"][\"spec\"][\"containers\"][0]\n\n cpu = get_form_value(body, defaults, \"cpu\")\n\n cpu_limit = get_form_value(body, defaults, \"cpuLimit\")\n if cpu_limit == \"NaN\":", - "repo_full_name": "kubeflow/kubeflow", - "discussion_comments": [ - { - "comment_id": "688421843", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 6092, - "pr_file": "components/crud-web-apps/jupyter/backend/apps/common/form.py", - "discussion_id": "688421843", - "commented_code": "@@ -163,7 +163,10 @@ def set_notebook_cpu(notebook, body, defaults):\n container = notebook[\"spec\"][\"template\"][\"spec\"][\"containers\"][0]\n \n cpu = get_form_value(body, defaults, \"cpu\")\n+\n cpu_limit = get_form_value(body, defaults, \"cpuLimit\")\n+ if cpu_limit == \"NaN\":", - "comment_created_at": "2021-08-13T10:47:42+00:00", - "comment_author": "kimwnasptd", - "comment_body": "let's instead make a check if `NaN` is included in the value of either `cpu` or `cpu_limit`.\r\n\r\nAnd if that's the case then the backend should raise a `BadRequest` werkzeug exception (400)", - "pr_file_module": null - }, - { - "comment_id": "688445484", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 6092, - "pr_file": "components/crud-web-apps/jupyter/backend/apps/common/form.py", - "discussion_id": "688421843", - "commented_code": "@@ -163,7 +163,10 @@ def set_notebook_cpu(notebook, body, defaults):\n container = notebook[\"spec\"][\"template\"][\"spec\"][\"containers\"][0]\n \n cpu = get_form_value(body, defaults, \"cpu\")\n+\n cpu_limit = get_form_value(body, defaults, \"cpuLimit\")\n+ if cpu_limit == \"NaN\":", - "comment_created_at": "2021-08-13T11:34:06+00:00", - "comment_author": "munagekar", - "comment_body": "@kimwnasptd Thank you for the quick review. I will merge in changes from https://github.com/kubeflow/kubeflow/pull/6058 and verify the behavior the locally and make changes as per your suggestions.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "596310181", - "pr_number": 5646, - "pr_file": "components/crud-web-apps/jupyter/backend/apps/common/form.py", - "created_at": "2021-03-17T19:11:08+00:00", - "commented_code": ")\n\n\ndef set_notebook_http_rewrite_uri(notebook, body, defaults):\n notebook_annotations = notebook[\"metadata\"][\"annotations\"]\n http_rewrite_uri = get_form_value(body, defaults, \"httpRewriteURI\")\n if http_rewrite_uri:\n notebook_annotations[\"notebooks.kubeflow.org/http-rewrite-uri\"] = http_rewrite_uri\n\n\ndef set_http_headers_request_set(notebook, body, defaults):\n notebook_annotations = notebook[\"metadata\"][\"annotations\"]\n http_headers_request_set = get_form_value(body, defaults, \"httpHeadersRequestSet\")\n if http_headers_request_set:\n notebook_annotations[\"notebooks.kubeflow.org/http-headers-request-set\"] = http_headers_request_set\n\n\ndef set_server_type(notebook, body, defaults):\n notebook_annotations = notebook[\"metadata\"][\"annotations\"]\n notebook_annotations[\"notebooks.kubeflow.org/server-type\"] = get_form_value(body, defaults, \"serverType\")", - "repo_full_name": "kubeflow/kubeflow", - "discussion_comments": [ - { - "comment_id": "596310181", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 5646, - "pr_file": "components/crud-web-apps/jupyter/backend/apps/common/form.py", - "discussion_id": "596310181", - "commented_code": "@@ -135,6 +135,25 @@ def set_notebook_image_pull_policy(notebook, body, defaults):\n )\n \n \n+def set_notebook_http_rewrite_uri(notebook, body, defaults):\n+ notebook_annotations = notebook[\"metadata\"][\"annotations\"]\n+ http_rewrite_uri = get_form_value(body, defaults, \"httpRewriteURI\")\n+ if http_rewrite_uri:\n+ notebook_annotations[\"notebooks.kubeflow.org/http-rewrite-uri\"] = http_rewrite_uri\n+\n+\n+def set_http_headers_request_set(notebook, body, defaults):\n+ notebook_annotations = notebook[\"metadata\"][\"annotations\"]\n+ http_headers_request_set = get_form_value(body, defaults, \"httpHeadersRequestSet\")\n+ if http_headers_request_set:\n+ notebook_annotations[\"notebooks.kubeflow.org/http-headers-request-set\"] = http_headers_request_set\n+\n+\n+def set_server_type(notebook, body, defaults):\n+ notebook_annotations = notebook[\"metadata\"][\"annotations\"]\n+ notebook_annotations[\"notebooks.kubeflow.org/server-type\"] = get_form_value(body, defaults, \"serverType\")", - "comment_created_at": "2021-03-17T19:11:08+00:00", - "comment_author": "kimwnasptd", - "comment_body": "Lets also add some logic here that would handle incorrect values.\r\n\r\nThe expected values here, from the request, would be:\r\n* `None`, which means `\"jupyter\"`\r\n* `\"jupyter\"`\r\n* `\"rstudio\"`\r\n* `\"vscode\"`\r\n\r\nIf this field is present in the request but has a different value from the expected ones then the backend should raise a `400` error", - "pr_file_module": null - }, - { - "comment_id": "596695235", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 5646, - "pr_file": "components/crud-web-apps/jupyter/backend/apps/common/form.py", - "discussion_id": "596310181", - "commented_code": "@@ -135,6 +135,25 @@ def set_notebook_image_pull_policy(notebook, body, defaults):\n )\n \n \n+def set_notebook_http_rewrite_uri(notebook, body, defaults):\n+ notebook_annotations = notebook[\"metadata\"][\"annotations\"]\n+ http_rewrite_uri = get_form_value(body, defaults, \"httpRewriteURI\")\n+ if http_rewrite_uri:\n+ notebook_annotations[\"notebooks.kubeflow.org/http-rewrite-uri\"] = http_rewrite_uri\n+\n+\n+def set_http_headers_request_set(notebook, body, defaults):\n+ notebook_annotations = notebook[\"metadata\"][\"annotations\"]\n+ http_headers_request_set = get_form_value(body, defaults, \"httpHeadersRequestSet\")\n+ if http_headers_request_set:\n+ notebook_annotations[\"notebooks.kubeflow.org/http-headers-request-set\"] = http_headers_request_set\n+\n+\n+def set_server_type(notebook, body, defaults):\n+ notebook_annotations = notebook[\"metadata\"][\"annotations\"]\n+ notebook_annotations[\"notebooks.kubeflow.org/server-type\"] = get_form_value(body, defaults, \"serverType\")", - "comment_created_at": "2021-03-18T09:41:22+00:00", - "comment_author": "davidspek", - "comment_body": "Does adding error logic make sense here? The frontend doesn't rely on user input so unless there is a MITM change to the request a wrong input cannot happen. Also `none` cannot happen, as one of the toggles is always selected. It's not as if the JWA backend is decoupled from the frontend (they exist in the same container and are built together). \r\nRelated to this, how would I go about causing a 400 error specifically in the backend code?", - "pr_file_module": null - }, - { - "comment_id": "596762071", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 5646, - "pr_file": "components/crud-web-apps/jupyter/backend/apps/common/form.py", - "discussion_id": "596310181", - "commented_code": "@@ -135,6 +135,25 @@ def set_notebook_image_pull_policy(notebook, body, defaults):\n )\n \n \n+def set_notebook_http_rewrite_uri(notebook, body, defaults):\n+ notebook_annotations = notebook[\"metadata\"][\"annotations\"]\n+ http_rewrite_uri = get_form_value(body, defaults, \"httpRewriteURI\")\n+ if http_rewrite_uri:\n+ notebook_annotations[\"notebooks.kubeflow.org/http-rewrite-uri\"] = http_rewrite_uri\n+\n+\n+def set_http_headers_request_set(notebook, body, defaults):\n+ notebook_annotations = notebook[\"metadata\"][\"annotations\"]\n+ http_headers_request_set = get_form_value(body, defaults, \"httpHeadersRequestSet\")\n+ if http_headers_request_set:\n+ notebook_annotations[\"notebooks.kubeflow.org/http-headers-request-set\"] = http_headers_request_set\n+\n+\n+def set_server_type(notebook, body, defaults):\n+ notebook_annotations = notebook[\"metadata\"][\"annotations\"]\n+ notebook_annotations[\"notebooks.kubeflow.org/server-type\"] = get_form_value(body, defaults, \"serverType\")", - "comment_created_at": "2021-03-18T11:16:43+00:00", - "comment_author": "kimwnasptd", - "comment_body": "Indeed, the frontend sends only specific values but it's a good practice for the backend to make it explicit to the user/frontend that the data it received was not formatted as expected.", - "pr_file_module": null - }, - { - "comment_id": "596769984", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 5646, - "pr_file": "components/crud-web-apps/jupyter/backend/apps/common/form.py", - "discussion_id": "596310181", - "commented_code": "@@ -135,6 +135,25 @@ def set_notebook_image_pull_policy(notebook, body, defaults):\n )\n \n \n+def set_notebook_http_rewrite_uri(notebook, body, defaults):\n+ notebook_annotations = notebook[\"metadata\"][\"annotations\"]\n+ http_rewrite_uri = get_form_value(body, defaults, \"httpRewriteURI\")\n+ if http_rewrite_uri:\n+ notebook_annotations[\"notebooks.kubeflow.org/http-rewrite-uri\"] = http_rewrite_uri\n+\n+\n+def set_http_headers_request_set(notebook, body, defaults):\n+ notebook_annotations = notebook[\"metadata\"][\"annotations\"]\n+ http_headers_request_set = get_form_value(body, defaults, \"httpHeadersRequestSet\")\n+ if http_headers_request_set:\n+ notebook_annotations[\"notebooks.kubeflow.org/http-headers-request-set\"] = http_headers_request_set\n+\n+\n+def set_server_type(notebook, body, defaults):\n+ notebook_annotations = notebook[\"metadata\"][\"annotations\"]\n+ notebook_annotations[\"notebooks.kubeflow.org/server-type\"] = get_form_value(body, defaults, \"serverType\")", - "comment_created_at": "2021-03-18T11:27:25+00:00", - "comment_author": "davidspek", - "comment_body": "Should I handle the error with `log.error`?", - "pr_file_module": null - }, - { - "comment_id": "596775389", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 5646, - "pr_file": "components/crud-web-apps/jupyter/backend/apps/common/form.py", - "discussion_id": "596310181", - "commented_code": "@@ -135,6 +135,25 @@ def set_notebook_image_pull_policy(notebook, body, defaults):\n )\n \n \n+def set_notebook_http_rewrite_uri(notebook, body, defaults):\n+ notebook_annotations = notebook[\"metadata\"][\"annotations\"]\n+ http_rewrite_uri = get_form_value(body, defaults, \"httpRewriteURI\")\n+ if http_rewrite_uri:\n+ notebook_annotations[\"notebooks.kubeflow.org/http-rewrite-uri\"] = http_rewrite_uri\n+\n+\n+def set_http_headers_request_set(notebook, body, defaults):\n+ notebook_annotations = notebook[\"metadata\"][\"annotations\"]\n+ http_headers_request_set = get_form_value(body, defaults, \"httpHeadersRequestSet\")\n+ if http_headers_request_set:\n+ notebook_annotations[\"notebooks.kubeflow.org/http-headers-request-set\"] = http_headers_request_set\n+\n+\n+def set_server_type(notebook, body, defaults):\n+ notebook_annotations = notebook[\"metadata\"][\"annotations\"]\n+ notebook_annotations[\"notebooks.kubeflow.org/server-type\"] = get_form_value(body, defaults, \"serverType\")", - "comment_created_at": "2021-03-18T11:32:58+00:00", - "comment_author": "kimwnasptd", - "comment_body": "And also you could look at this snippet for how to raise this error\r\n\r\nhttps://github.com/kubeflow/kubeflow/blob/master/components/crud-web-apps/jupyter/backend/apps/common/form.py#L31-L34\r\n\r\nEDIT: was typing it before I saw your comment above ", - "pr_file_module": null - }, - { - "comment_id": "596930230", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 5646, - "pr_file": "components/crud-web-apps/jupyter/backend/apps/common/form.py", - "discussion_id": "596310181", - "commented_code": "@@ -135,6 +135,25 @@ def set_notebook_image_pull_policy(notebook, body, defaults):\n )\n \n \n+def set_notebook_http_rewrite_uri(notebook, body, defaults):\n+ notebook_annotations = notebook[\"metadata\"][\"annotations\"]\n+ http_rewrite_uri = get_form_value(body, defaults, \"httpRewriteURI\")\n+ if http_rewrite_uri:\n+ notebook_annotations[\"notebooks.kubeflow.org/http-rewrite-uri\"] = http_rewrite_uri\n+\n+\n+def set_http_headers_request_set(notebook, body, defaults):\n+ notebook_annotations = notebook[\"metadata\"][\"annotations\"]\n+ http_headers_request_set = get_form_value(body, defaults, \"httpHeadersRequestSet\")\n+ if http_headers_request_set:\n+ notebook_annotations[\"notebooks.kubeflow.org/http-headers-request-set\"] = http_headers_request_set\n+\n+\n+def set_server_type(notebook, body, defaults):\n+ notebook_annotations = notebook[\"metadata\"][\"annotations\"]\n+ notebook_annotations[\"notebooks.kubeflow.org/server-type\"] = get_form_value(body, defaults, \"serverType\")", - "comment_created_at": "2021-03-18T14:35:03+00:00", - "comment_author": "davidspek", - "comment_body": "Thanks for the link. Sorry for wasting your time on this, I should have looked at the code a bit better and I would have found it. ", - "pr_file_module": null - }, - { - "comment_id": "596951137", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 5646, - "pr_file": "components/crud-web-apps/jupyter/backend/apps/common/form.py", - "discussion_id": "596310181", - "commented_code": "@@ -135,6 +135,25 @@ def set_notebook_image_pull_policy(notebook, body, defaults):\n )\n \n \n+def set_notebook_http_rewrite_uri(notebook, body, defaults):\n+ notebook_annotations = notebook[\"metadata\"][\"annotations\"]\n+ http_rewrite_uri = get_form_value(body, defaults, \"httpRewriteURI\")\n+ if http_rewrite_uri:\n+ notebook_annotations[\"notebooks.kubeflow.org/http-rewrite-uri\"] = http_rewrite_uri\n+\n+\n+def set_http_headers_request_set(notebook, body, defaults):\n+ notebook_annotations = notebook[\"metadata\"][\"annotations\"]\n+ http_headers_request_set = get_form_value(body, defaults, \"httpHeadersRequestSet\")\n+ if http_headers_request_set:\n+ notebook_annotations[\"notebooks.kubeflow.org/http-headers-request-set\"] = http_headers_request_set\n+\n+\n+def set_server_type(notebook, body, defaults):\n+ notebook_annotations = notebook[\"metadata\"][\"annotations\"]\n+ notebook_annotations[\"notebooks.kubeflow.org/server-type\"] = get_form_value(body, defaults, \"serverType\")", - "comment_created_at": "2021-03-18T14:57:45+00:00", - "comment_author": "kimwnasptd", - "comment_body": "You are not wasting anyone's time. Your efforts are as important as mine and no releases were delayed because of a question :) ", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/kubeflow-validate-model-optimization.json b/_reviewers/kubeflow-validate-model-optimization.json new file mode 100644 index 0000000..2bd35bf --- /dev/null +++ b/_reviewers/kubeflow-validate-model-optimization.json @@ -0,0 +1,190 @@ +[ + { + "discussion_id": "794906792", + "pr_number": 6266, + "pr_file": "ROADMAP.md", + "created_at": "2022-01-28T21:40:42+00:00", + "commented_code": "# Kubeflow Roadmap\n\n## Kubeflow 1.5 Release, Due: February 2022\n* Kubeflow 1.5 timeline is tracked in [#538](https://github.com/kubeflow/community/pull/538)\n* Additional details will be posted in early 1Q'22\n## Kubeflow 1.5 Release, Due: March 2022\n* Kubeflow 1.5 [milestones and timeline](https://github.com/kubeflow/community/pull/538)\n\n### Themes\n* Switching to [Emissary executor](https://www.kubeflow.org/docs/components/pipelines/installation/choose-executor/#emissary-executor) enables Kubeflow Pipelines deployment on Kubernetes >= v1.20, which runs on containerd runtime instead of Docker runtime\n* Improve model accuracy and reduce overfitting, especially with hyper parameter tuning\n* Simplify operations and optimize utilization (including spot instance use cases for distributed training) \n* More consistent user experience - UI appearance, features and naming\n* Improved documentation, tutorials and examples\n* Stretch - Support for K8s 1.22 and associated dependencies (cert mgr, istio)\n\n### Major Features from each Working Group (note: Individual WG versions are independent of Kubeflow's)\n\n#### Kubeflow Pipelines, v1.8\n* Switching to [Emissary executor](https://www.kubeflow.org/docs/components/pipelines/installation/choose-executor/#emissary-executor) enables Kubeflow Pipelines deployment on Kubernetes >= v1.20, which runs on containerd runtime instead of Docker runtime.\n\n#### Katib, v0.13 \n* Hyperparameter leader election for HA operations and faster recovery\n* Validation for Early Stopping algorithm settings improves accuracy, reduces overfitting", + "repo_full_name": "kubeflow/kubeflow", + "discussion_comments": [ + { + "comment_id": "794906792", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 6266, + "pr_file": "ROADMAP.md", + "discussion_id": "794906792", + "commented_code": "@@ -1,10 +1,54 @@\n # Kubeflow Roadmap\n \n-## Kubeflow 1.5 Release, Due: February 2022\n-* Kubeflow 1.5 timeline is tracked in [#538](https://github.com/kubeflow/community/pull/538)\n-* Additional details will be posted in early 1Q'22\n+## Kubeflow 1.5 Release, Due: March 2022\n+* Kubeflow 1.5 [milestones and timeline](https://github.com/kubeflow/community/pull/538)\n+\n+### Themes\n+* Switching to [Emissary executor](https://www.kubeflow.org/docs/components/pipelines/installation/choose-executor/#emissary-executor) enables Kubeflow Pipelines deployment on Kubernetes >= v1.20, which runs on containerd runtime instead of Docker runtime\n+* Improve model accuracy and reduce overfitting, especially with hyper parameter tuning\n+* Simplify operations and optimize utilization (including spot instance use cases for distributed training) \n+* More consistent user experience - UI appearance, features and naming\n+* Improved documentation, tutorials and examples\n+* Stretch - Support for K8s 1.22 and associated dependencies (cert mgr, istio)\n+\n+### Major Features from each Working Group (note: Individual WG versions are independent of Kubeflow's)\n+\n+#### Kubeflow Pipelines, v1.8\n+* Switching to [Emissary executor](https://www.kubeflow.org/docs/components/pipelines/installation/choose-executor/#emissary-executor) enables Kubeflow Pipelines deployment on Kubernetes >= v1.20, which runs on containerd runtime instead of Docker runtime.\n+\n+#### Katib, v0.13 \n+* Hyperparameter leader election for HA operations and faster recovery\n+* Validation for Early Stopping algorithm settings improves accuracy, reduces overfitting", + "comment_created_at": "2022-01-28T21:40:42+00:00", + "comment_author": "andreyvelich", + "comment_body": "Should we modify this @jbottum @tenzen-y @johnugeorge ?\r\n```suggestion\r\n* Validation for Early Stopping algorithm settings helps users to proper reduce model overfitting\r\n```", + "pr_file_module": null + }, + { + "comment_id": "794989244", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 6266, + "pr_file": "ROADMAP.md", + "discussion_id": "794906792", + "commented_code": "@@ -1,10 +1,54 @@\n # Kubeflow Roadmap\n \n-## Kubeflow 1.5 Release, Due: February 2022\n-* Kubeflow 1.5 timeline is tracked in [#538](https://github.com/kubeflow/community/pull/538)\n-* Additional details will be posted in early 1Q'22\n+## Kubeflow 1.5 Release, Due: March 2022\n+* Kubeflow 1.5 [milestones and timeline](https://github.com/kubeflow/community/pull/538)\n+\n+### Themes\n+* Switching to [Emissary executor](https://www.kubeflow.org/docs/components/pipelines/installation/choose-executor/#emissary-executor) enables Kubeflow Pipelines deployment on Kubernetes >= v1.20, which runs on containerd runtime instead of Docker runtime\n+* Improve model accuracy and reduce overfitting, especially with hyper parameter tuning\n+* Simplify operations and optimize utilization (including spot instance use cases for distributed training) \n+* More consistent user experience - UI appearance, features and naming\n+* Improved documentation, tutorials and examples\n+* Stretch - Support for K8s 1.22 and associated dependencies (cert mgr, istio)\n+\n+### Major Features from each Working Group (note: Individual WG versions are independent of Kubeflow's)\n+\n+#### Kubeflow Pipelines, v1.8\n+* Switching to [Emissary executor](https://www.kubeflow.org/docs/components/pipelines/installation/choose-executor/#emissary-executor) enables Kubeflow Pipelines deployment on Kubernetes >= v1.20, which runs on containerd runtime instead of Docker runtime.\n+\n+#### Katib, v0.13 \n+* Hyperparameter leader election for HA operations and faster recovery\n+* Validation for Early Stopping algorithm settings improves accuracy, reduces overfitting", + "comment_created_at": "2022-01-29T02:59:02+00:00", + "comment_author": "tenzen-y", + "comment_body": "It sounds great to me @andreyvelich!", + "pr_file_module": null + }, + { + "comment_id": "795076103", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 6266, + "pr_file": "ROADMAP.md", + "discussion_id": "794906792", + "commented_code": "@@ -1,10 +1,54 @@\n # Kubeflow Roadmap\n \n-## Kubeflow 1.5 Release, Due: February 2022\n-* Kubeflow 1.5 timeline is tracked in [#538](https://github.com/kubeflow/community/pull/538)\n-* Additional details will be posted in early 1Q'22\n+## Kubeflow 1.5 Release, Due: March 2022\n+* Kubeflow 1.5 [milestones and timeline](https://github.com/kubeflow/community/pull/538)\n+\n+### Themes\n+* Switching to [Emissary executor](https://www.kubeflow.org/docs/components/pipelines/installation/choose-executor/#emissary-executor) enables Kubeflow Pipelines deployment on Kubernetes >= v1.20, which runs on containerd runtime instead of Docker runtime\n+* Improve model accuracy and reduce overfitting, especially with hyper parameter tuning\n+* Simplify operations and optimize utilization (including spot instance use cases for distributed training) \n+* More consistent user experience - UI appearance, features and naming\n+* Improved documentation, tutorials and examples\n+* Stretch - Support for K8s 1.22 and associated dependencies (cert mgr, istio)\n+\n+### Major Features from each Working Group (note: Individual WG versions are independent of Kubeflow's)\n+\n+#### Kubeflow Pipelines, v1.8\n+* Switching to [Emissary executor](https://www.kubeflow.org/docs/components/pipelines/installation/choose-executor/#emissary-executor) enables Kubeflow Pipelines deployment on Kubernetes >= v1.20, which runs on containerd runtime instead of Docker runtime.\n+\n+#### Katib, v0.13 \n+* Hyperparameter leader election for HA operations and faster recovery\n+* Validation for Early Stopping algorithm settings improves accuracy, reduces overfitting", + "comment_created_at": "2022-01-29T17:11:39+00:00", + "comment_author": "jbottum", + "comment_body": "@andreyvelich do we need the word \"proper\"", + "pr_file_module": null + }, + { + "comment_id": "795191497", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 6266, + "pr_file": "ROADMAP.md", + "discussion_id": "794906792", + "commented_code": "@@ -1,10 +1,54 @@\n # Kubeflow Roadmap\n \n-## Kubeflow 1.5 Release, Due: February 2022\n-* Kubeflow 1.5 timeline is tracked in [#538](https://github.com/kubeflow/community/pull/538)\n-* Additional details will be posted in early 1Q'22\n+## Kubeflow 1.5 Release, Due: March 2022\n+* Kubeflow 1.5 [milestones and timeline](https://github.com/kubeflow/community/pull/538)\n+\n+### Themes\n+* Switching to [Emissary executor](https://www.kubeflow.org/docs/components/pipelines/installation/choose-executor/#emissary-executor) enables Kubeflow Pipelines deployment on Kubernetes >= v1.20, which runs on containerd runtime instead of Docker runtime\n+* Improve model accuracy and reduce overfitting, especially with hyper parameter tuning\n+* Simplify operations and optimize utilization (including spot instance use cases for distributed training) \n+* More consistent user experience - UI appearance, features and naming\n+* Improved documentation, tutorials and examples\n+* Stretch - Support for K8s 1.22 and associated dependencies (cert mgr, istio)\n+\n+### Major Features from each Working Group (note: Individual WG versions are independent of Kubeflow's)\n+\n+#### Kubeflow Pipelines, v1.8\n+* Switching to [Emissary executor](https://www.kubeflow.org/docs/components/pipelines/installation/choose-executor/#emissary-executor) enables Kubeflow Pipelines deployment on Kubernetes >= v1.20, which runs on containerd runtime instead of Docker runtime.\n+\n+#### Katib, v0.13 \n+* Hyperparameter leader election for HA operations and faster recovery\n+* Validation for Early Stopping algorithm settings improves accuracy, reduces overfitting", + "comment_created_at": "2022-01-30T14:38:59+00:00", + "comment_author": "andreyvelich", + "comment_body": "@jbottum Basically, validation helps to avoid user's errors and mistakes in APIs when they use Early Stopping.\r\nThat is why I was thinking about this change.", + "pr_file_module": null + }, + { + "comment_id": "795214523", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 6266, + "pr_file": "ROADMAP.md", + "discussion_id": "794906792", + "commented_code": "@@ -1,10 +1,54 @@\n # Kubeflow Roadmap\n \n-## Kubeflow 1.5 Release, Due: February 2022\n-* Kubeflow 1.5 timeline is tracked in [#538](https://github.com/kubeflow/community/pull/538)\n-* Additional details will be posted in early 1Q'22\n+## Kubeflow 1.5 Release, Due: March 2022\n+* Kubeflow 1.5 [milestones and timeline](https://github.com/kubeflow/community/pull/538)\n+\n+### Themes\n+* Switching to [Emissary executor](https://www.kubeflow.org/docs/components/pipelines/installation/choose-executor/#emissary-executor) enables Kubeflow Pipelines deployment on Kubernetes >= v1.20, which runs on containerd runtime instead of Docker runtime\n+* Improve model accuracy and reduce overfitting, especially with hyper parameter tuning\n+* Simplify operations and optimize utilization (including spot instance use cases for distributed training) \n+* More consistent user experience - UI appearance, features and naming\n+* Improved documentation, tutorials and examples\n+* Stretch - Support for K8s 1.22 and associated dependencies (cert mgr, istio)\n+\n+### Major Features from each Working Group (note: Individual WG versions are independent of Kubeflow's)\n+\n+#### Kubeflow Pipelines, v1.8\n+* Switching to [Emissary executor](https://www.kubeflow.org/docs/components/pipelines/installation/choose-executor/#emissary-executor) enables Kubeflow Pipelines deployment on Kubernetes >= v1.20, which runs on containerd runtime instead of Docker runtime.\n+\n+#### Katib, v0.13 \n+* Hyperparameter leader election for HA operations and faster recovery\n+* Validation for Early Stopping algorithm settings improves accuracy, reduces overfitting", + "comment_created_at": "2022-01-30T16:59:53+00:00", + "comment_author": "jbottum", + "comment_body": "@jbottum understand but I am not sure this correct - helps users to **proper** reduce model overfitting....shouldn't it read...helps users to reduce model overfitting ?", + "pr_file_module": null + }, + { + "comment_id": "796063923", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 6266, + "pr_file": "ROADMAP.md", + "discussion_id": "794906792", + "commented_code": "@@ -1,10 +1,54 @@\n # Kubeflow Roadmap\n \n-## Kubeflow 1.5 Release, Due: February 2022\n-* Kubeflow 1.5 timeline is tracked in [#538](https://github.com/kubeflow/community/pull/538)\n-* Additional details will be posted in early 1Q'22\n+## Kubeflow 1.5 Release, Due: March 2022\n+* Kubeflow 1.5 [milestones and timeline](https://github.com/kubeflow/community/pull/538)\n+\n+### Themes\n+* Switching to [Emissary executor](https://www.kubeflow.org/docs/components/pipelines/installation/choose-executor/#emissary-executor) enables Kubeflow Pipelines deployment on Kubernetes >= v1.20, which runs on containerd runtime instead of Docker runtime\n+* Improve model accuracy and reduce overfitting, especially with hyper parameter tuning\n+* Simplify operations and optimize utilization (including spot instance use cases for distributed training) \n+* More consistent user experience - UI appearance, features and naming\n+* Improved documentation, tutorials and examples\n+* Stretch - Support for K8s 1.22 and associated dependencies (cert mgr, istio)\n+\n+### Major Features from each Working Group (note: Individual WG versions are independent of Kubeflow's)\n+\n+#### Kubeflow Pipelines, v1.8\n+* Switching to [Emissary executor](https://www.kubeflow.org/docs/components/pipelines/installation/choose-executor/#emissary-executor) enables Kubeflow Pipelines deployment on Kubernetes >= v1.20, which runs on containerd runtime instead of Docker runtime.\n+\n+#### Katib, v0.13 \n+* Hyperparameter leader election for HA operations and faster recovery\n+* Validation for Early Stopping algorithm settings improves accuracy, reduces overfitting", + "comment_created_at": "2022-01-31T21:06:12+00:00", + "comment_author": "jbottum", + "comment_body": "@andreyvelich I put this in another commit lower down, need to move this forward.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1022650192", + "pr_number": 6705, + "pr_file": "ROADMAP.md", + "created_at": "2022-11-15T11:02:56+00:00", + "commented_code": "# Kubeflow Roadmap\n\n## Kubeflow 1.7 Release - planning now in process \n* Please join the Kubeflow Community Meetings on Tuesdays for updates and for opportunities to contribute. \n## Kubeflow 1.7 Release, Planned: March 2023 \nThe Kubeflow Community plans to deliver its v1.7 release in March 2023, per this [timeline](https://github.com/kubeflow/community/pull/573). The high level deliveries are being tracked in this [Project Board](https://github.com/orgs/kubeflow/projects/50/views/1). The v1.7 release process will be managed by the v1.7 [Release Team](https://github.com/kubeflow/internal-acls/pull/576) using the best practices in the [Release Handbook](https://github.com/kubeflow/community/blob/master/releases/handbook.md)\n\nNotable feature candidates in the [Project Board](https://github.com/orgs/kubeflow/projects/50/views/1) are:\n* Support for Kubernetes 1.25\n* Improved user isolation especially for the Kubeflow pipelines user interface, database, and artifacts\n* Update Kubeflow Noteboks naming from Notebooks to Workbenches\n* Delivery of KFP V2 beta with its new front-end, backend and SDK \n* New, time saving workflows to simplify data exchange from Katib to Kubeflow Pipelines", + "repo_full_name": "kubeflow/kubeflow", + "discussion_comments": [ + { + "comment_id": "1022650192", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 6705, + "pr_file": "ROADMAP.md", + "discussion_id": "1022650192", + "commented_code": "@@ -1,7 +1,18 @@\n # Kubeflow Roadmap\n \n-## Kubeflow 1.7 Release - planning now in process \n-* Please join the Kubeflow Community Meetings on Tuesdays for updates and for opportunities to contribute. \n+## Kubeflow 1.7 Release, Planned: March 2023 \n+The Kubeflow Community plans to deliver its v1.7 release in March 2023, per this [timeline](https://github.com/kubeflow/community/pull/573). The high level deliveries are being tracked in this [Project Board](https://github.com/orgs/kubeflow/projects/50/views/1). The v1.7 release process will be managed by the v1.7 [Release Team](https://github.com/kubeflow/internal-acls/pull/576) using the best practices in the [Release Handbook](https://github.com/kubeflow/community/blob/master/releases/handbook.md)\n+\n+Notable feature candidates in the [Project Board](https://github.com/orgs/kubeflow/projects/50/views/1) are:\n+* Support for Kubernetes 1.25\n+* Improved user isolation especially for the Kubeflow pipelines user interface, database, and artifacts\n+* Update Kubeflow Noteboks naming from Notebooks to Workbenches\n+* Delivery of KFP V2 beta with its new front-end, backend and SDK \n+* New, time saving workflows to simplify data exchange from Katib to Kubeflow Pipelines", + "comment_created_at": "2022-11-15T11:02:56+00:00", + "comment_author": "andreyvelich", + "comment_body": "I think, we should remove this part for Katib also.\r\n\r\nMaybe we can add the following:\r\n- Simplified creation of Katib and Training Operator experiments using SDKs.\r\n\r\nAny other items that we certainly will deliver in KF 1.7 @kubeflow/wg-training-leads @tenzen-y @anencore94 ?\r\n", + "pr_file_module": null + }, + { + "comment_id": "1022881155", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 6705, + "pr_file": "ROADMAP.md", + "discussion_id": "1022650192", + "commented_code": "@@ -1,7 +1,18 @@\n # Kubeflow Roadmap\n \n-## Kubeflow 1.7 Release - planning now in process \n-* Please join the Kubeflow Community Meetings on Tuesdays for updates and for opportunities to contribute. \n+## Kubeflow 1.7 Release, Planned: March 2023 \n+The Kubeflow Community plans to deliver its v1.7 release in March 2023, per this [timeline](https://github.com/kubeflow/community/pull/573). The high level deliveries are being tracked in this [Project Board](https://github.com/orgs/kubeflow/projects/50/views/1). The v1.7 release process will be managed by the v1.7 [Release Team](https://github.com/kubeflow/internal-acls/pull/576) using the best practices in the [Release Handbook](https://github.com/kubeflow/community/blob/master/releases/handbook.md)\n+\n+Notable feature candidates in the [Project Board](https://github.com/orgs/kubeflow/projects/50/views/1) are:\n+* Support for Kubernetes 1.25\n+* Improved user isolation especially for the Kubeflow pipelines user interface, database, and artifacts\n+* Update Kubeflow Noteboks naming from Notebooks to Workbenches\n+* Delivery of KFP V2 beta with its new front-end, backend and SDK \n+* New, time saving workflows to simplify data exchange from Katib to Kubeflow Pipelines", + "comment_created_at": "2022-11-15T14:45:50+00:00", + "comment_author": "jbottum", + "comment_body": "@andreyvelich I have incorporated this update.", + "pr_file_module": null + }, + { + "comment_id": "1023583120", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 6705, + "pr_file": "ROADMAP.md", + "discussion_id": "1022650192", + "commented_code": "@@ -1,7 +1,18 @@\n # Kubeflow Roadmap\n \n-## Kubeflow 1.7 Release - planning now in process \n-* Please join the Kubeflow Community Meetings on Tuesdays for updates and for opportunities to contribute. \n+## Kubeflow 1.7 Release, Planned: March 2023 \n+The Kubeflow Community plans to deliver its v1.7 release in March 2023, per this [timeline](https://github.com/kubeflow/community/pull/573). The high level deliveries are being tracked in this [Project Board](https://github.com/orgs/kubeflow/projects/50/views/1). The v1.7 release process will be managed by the v1.7 [Release Team](https://github.com/kubeflow/internal-acls/pull/576) using the best practices in the [Release Handbook](https://github.com/kubeflow/community/blob/master/releases/handbook.md)\n+\n+Notable feature candidates in the [Project Board](https://github.com/orgs/kubeflow/projects/50/views/1) are:\n+* Support for Kubernetes 1.25\n+* Improved user isolation especially for the Kubeflow pipelines user interface, database, and artifacts\n+* Update Kubeflow Noteboks naming from Notebooks to Workbenches\n+* Delivery of KFP V2 beta with its new front-end, backend and SDK \n+* New, time saving workflows to simplify data exchange from Katib to Kubeflow Pipelines", + "comment_created_at": "2022-11-16T07:15:52+00:00", + "comment_author": "tenzen-y", + "comment_body": "I have no items.", + "pr_file_module": null + }, + { + "comment_id": "1024303442", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 6705, + "pr_file": "ROADMAP.md", + "discussion_id": "1022650192", + "commented_code": "@@ -1,7 +1,18 @@\n # Kubeflow Roadmap\n \n-## Kubeflow 1.7 Release - planning now in process \n-* Please join the Kubeflow Community Meetings on Tuesdays for updates and for opportunities to contribute. \n+## Kubeflow 1.7 Release, Planned: March 2023 \n+The Kubeflow Community plans to deliver its v1.7 release in March 2023, per this [timeline](https://github.com/kubeflow/community/pull/573). The high level deliveries are being tracked in this [Project Board](https://github.com/orgs/kubeflow/projects/50/views/1). The v1.7 release process will be managed by the v1.7 [Release Team](https://github.com/kubeflow/internal-acls/pull/576) using the best practices in the [Release Handbook](https://github.com/kubeflow/community/blob/master/releases/handbook.md)\n+\n+Notable feature candidates in the [Project Board](https://github.com/orgs/kubeflow/projects/50/views/1) are:\n+* Support for Kubernetes 1.25\n+* Improved user isolation especially for the Kubeflow pipelines user interface, database, and artifacts\n+* Update Kubeflow Noteboks naming from Notebooks to Workbenches\n+* Delivery of KFP V2 beta with its new front-end, backend and SDK \n+* New, time saving workflows to simplify data exchange from Katib to Kubeflow Pipelines", + "comment_created_at": "2022-11-16T17:31:13+00:00", + "comment_author": "andreyvelich", + "comment_body": "@jbottum We discussed on today's AutoML community meeting that we want to promote a few Katib UI changes as part of Kubeflow 1.7 ROADMAP. \r\nCan we indicate the most significant improvements/changes for the user that we will deliver in 1.7 ?\r\n@kimwnasptd @johnugeorge What Katib UI changes do we want on the ROADMAP that we deliver ?", + "pr_file_module": null + }, + { + "comment_id": "1024383796", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 6705, + "pr_file": "ROADMAP.md", + "discussion_id": "1022650192", + "commented_code": "@@ -1,7 +1,18 @@\n # Kubeflow Roadmap\n \n-## Kubeflow 1.7 Release - planning now in process \n-* Please join the Kubeflow Community Meetings on Tuesdays for updates and for opportunities to contribute. \n+## Kubeflow 1.7 Release, Planned: March 2023 \n+The Kubeflow Community plans to deliver its v1.7 release in March 2023, per this [timeline](https://github.com/kubeflow/community/pull/573). The high level deliveries are being tracked in this [Project Board](https://github.com/orgs/kubeflow/projects/50/views/1). The v1.7 release process will be managed by the v1.7 [Release Team](https://github.com/kubeflow/internal-acls/pull/576) using the best practices in the [Release Handbook](https://github.com/kubeflow/community/blob/master/releases/handbook.md)\n+\n+Notable feature candidates in the [Project Board](https://github.com/orgs/kubeflow/projects/50/views/1) are:\n+* Support for Kubernetes 1.25\n+* Improved user isolation especially for the Kubeflow pipelines user interface, database, and artifacts\n+* Update Kubeflow Noteboks naming from Notebooks to Workbenches\n+* Delivery of KFP V2 beta with its new front-end, backend and SDK \n+* New, time saving workflows to simplify data exchange from Katib to Kubeflow Pipelines", + "comment_created_at": "2022-11-16T18:45:55+00:00", + "comment_author": "johnugeorge", + "comment_body": "Expose trial logs(https://github.com/kubeflow/katib/issues/971 )\r\nFiltering, Sorting trial list(https://github.com/kubeflow/katib/issues/1441 )", + "pr_file_module": null + }, + { + "comment_id": "1024558646", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 6705, + "pr_file": "ROADMAP.md", + "discussion_id": "1022650192", + "commented_code": "@@ -1,7 +1,18 @@\n # Kubeflow Roadmap\n \n-## Kubeflow 1.7 Release - planning now in process \n-* Please join the Kubeflow Community Meetings on Tuesdays for updates and for opportunities to contribute. \n+## Kubeflow 1.7 Release, Planned: March 2023 \n+The Kubeflow Community plans to deliver its v1.7 release in March 2023, per this [timeline](https://github.com/kubeflow/community/pull/573). The high level deliveries are being tracked in this [Project Board](https://github.com/orgs/kubeflow/projects/50/views/1). The v1.7 release process will be managed by the v1.7 [Release Team](https://github.com/kubeflow/internal-acls/pull/576) using the best practices in the [Release Handbook](https://github.com/kubeflow/community/blob/master/releases/handbook.md)\n+\n+Notable feature candidates in the [Project Board](https://github.com/orgs/kubeflow/projects/50/views/1) are:\n+* Support for Kubernetes 1.25\n+* Improved user isolation especially for the Kubeflow pipelines user interface, database, and artifacts\n+* Update Kubeflow Noteboks naming from Notebooks to Workbenches\n+* Delivery of KFP V2 beta with its new front-end, backend and SDK \n+* New, time saving workflows to simplify data exchange from Katib to Kubeflow Pipelines", + "comment_created_at": "2022-11-16T22:14:08+00:00", + "comment_author": "jbottum", + "comment_body": "@andreyvelich @johnugeorge I have added line 13 to include these feature candidates in the roadmap. Please provide looks good or comments. thanks.", + "pr_file_module": null + }, + { + "comment_id": "1025136550", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 6705, + "pr_file": "ROADMAP.md", + "discussion_id": "1022650192", + "commented_code": "@@ -1,7 +1,18 @@\n # Kubeflow Roadmap\n \n-## Kubeflow 1.7 Release - planning now in process \n-* Please join the Kubeflow Community Meetings on Tuesdays for updates and for opportunities to contribute. \n+## Kubeflow 1.7 Release, Planned: March 2023 \n+The Kubeflow Community plans to deliver its v1.7 release in March 2023, per this [timeline](https://github.com/kubeflow/community/pull/573). The high level deliveries are being tracked in this [Project Board](https://github.com/orgs/kubeflow/projects/50/views/1). The v1.7 release process will be managed by the v1.7 [Release Team](https://github.com/kubeflow/internal-acls/pull/576) using the best practices in the [Release Handbook](https://github.com/kubeflow/community/blob/master/releases/handbook.md)\n+\n+Notable feature candidates in the [Project Board](https://github.com/orgs/kubeflow/projects/50/views/1) are:\n+* Support for Kubernetes 1.25\n+* Improved user isolation especially for the Kubeflow pipelines user interface, database, and artifacts\n+* Update Kubeflow Noteboks naming from Notebooks to Workbenches\n+* Delivery of KFP V2 beta with its new front-end, backend and SDK \n+* New, time saving workflows to simplify data exchange from Katib to Kubeflow Pipelines", + "comment_created_at": "2022-11-17T12:38:21+00:00", + "comment_author": "andreyvelich", + "comment_body": "Thank you @jbottum. LGTM", + "pr_file_module": null + }, + { + "comment_id": "1025630075", + "repo_full_name": "kubeflow/kubeflow", + "pr_number": 6705, + "pr_file": "ROADMAP.md", + "discussion_id": "1022650192", + "commented_code": "@@ -1,7 +1,18 @@\n # Kubeflow Roadmap\n \n-## Kubeflow 1.7 Release - planning now in process \n-* Please join the Kubeflow Community Meetings on Tuesdays for updates and for opportunities to contribute. \n+## Kubeflow 1.7 Release, Planned: March 2023 \n+The Kubeflow Community plans to deliver its v1.7 release in March 2023, per this [timeline](https://github.com/kubeflow/community/pull/573). The high level deliveries are being tracked in this [Project Board](https://github.com/orgs/kubeflow/projects/50/views/1). The v1.7 release process will be managed by the v1.7 [Release Team](https://github.com/kubeflow/internal-acls/pull/576) using the best practices in the [Release Handbook](https://github.com/kubeflow/community/blob/master/releases/handbook.md)\n+\n+Notable feature candidates in the [Project Board](https://github.com/orgs/kubeflow/projects/50/views/1) are:\n+* Support for Kubernetes 1.25\n+* Improved user isolation especially for the Kubeflow pipelines user interface, database, and artifacts\n+* Update Kubeflow Noteboks naming from Notebooks to Workbenches\n+* Delivery of KFP V2 beta with its new front-end, backend and SDK \n+* New, time saving workflows to simplify data exchange from Katib to Kubeflow Pipelines", + "comment_created_at": "2022-11-17T19:42:25+00:00", + "comment_author": "johnugeorge", + "comment_body": "LGTM ", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/kubeflow-validate-model-optimization.md b/_reviewers/kubeflow-validate-model-optimization.md index 8a19034..ce1e295 100644 --- a/_reviewers/kubeflow-validate-model-optimization.md +++ b/_reviewers/kubeflow-validate-model-optimization.md @@ -30,195 +30,3 @@ algorithm: ``` This approach helps prevent common errors in machine learning workflows, reduces debugging time, and improves model quality by ensuring optimization techniques are correctly applied. - - -[ - { - "discussion_id": "794906792", - "pr_number": 6266, - "pr_file": "ROADMAP.md", - "created_at": "2022-01-28T21:40:42+00:00", - "commented_code": "# Kubeflow Roadmap\n\n## Kubeflow 1.5 Release, Due: February 2022\n* Kubeflow 1.5 timeline is tracked in [#538](https://github.com/kubeflow/community/pull/538)\n* Additional details will be posted in early 1Q'22\n## Kubeflow 1.5 Release, Due: March 2022\n* Kubeflow 1.5 [milestones and timeline](https://github.com/kubeflow/community/pull/538)\n\n### Themes\n* Switching to [Emissary executor](https://www.kubeflow.org/docs/components/pipelines/installation/choose-executor/#emissary-executor) enables Kubeflow Pipelines deployment on Kubernetes >= v1.20, which runs on containerd runtime instead of Docker runtime\n* Improve model accuracy and reduce overfitting, especially with hyper parameter tuning\n* Simplify operations and optimize utilization (including spot instance use cases for distributed training) \n* More consistent user experience - UI appearance, features and naming\n* Improved documentation, tutorials and examples\n* Stretch - Support for K8s 1.22 and associated dependencies (cert mgr, istio)\n\n### Major Features from each Working Group (note: Individual WG versions are independent of Kubeflow's)\n\n#### Kubeflow Pipelines, v1.8\n* Switching to [Emissary executor](https://www.kubeflow.org/docs/components/pipelines/installation/choose-executor/#emissary-executor) enables Kubeflow Pipelines deployment on Kubernetes >= v1.20, which runs on containerd runtime instead of Docker runtime.\n\n#### Katib, v0.13 \n* Hyperparameter leader election for HA operations and faster recovery\n* Validation for Early Stopping algorithm settings improves accuracy, reduces overfitting", - "repo_full_name": "kubeflow/kubeflow", - "discussion_comments": [ - { - "comment_id": "794906792", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 6266, - "pr_file": "ROADMAP.md", - "discussion_id": "794906792", - "commented_code": "@@ -1,10 +1,54 @@\n # Kubeflow Roadmap\n \n-## Kubeflow 1.5 Release, Due: February 2022\n-* Kubeflow 1.5 timeline is tracked in [#538](https://github.com/kubeflow/community/pull/538)\n-* Additional details will be posted in early 1Q'22\n+## Kubeflow 1.5 Release, Due: March 2022\n+* Kubeflow 1.5 [milestones and timeline](https://github.com/kubeflow/community/pull/538)\n+\n+### Themes\n+* Switching to [Emissary executor](https://www.kubeflow.org/docs/components/pipelines/installation/choose-executor/#emissary-executor) enables Kubeflow Pipelines deployment on Kubernetes >= v1.20, which runs on containerd runtime instead of Docker runtime\n+* Improve model accuracy and reduce overfitting, especially with hyper parameter tuning\n+* Simplify operations and optimize utilization (including spot instance use cases for distributed training) \n+* More consistent user experience - UI appearance, features and naming\n+* Improved documentation, tutorials and examples\n+* Stretch - Support for K8s 1.22 and associated dependencies (cert mgr, istio)\n+\n+### Major Features from each Working Group (note: Individual WG versions are independent of Kubeflow's)\n+\n+#### Kubeflow Pipelines, v1.8\n+* Switching to [Emissary executor](https://www.kubeflow.org/docs/components/pipelines/installation/choose-executor/#emissary-executor) enables Kubeflow Pipelines deployment on Kubernetes >= v1.20, which runs on containerd runtime instead of Docker runtime.\n+\n+#### Katib, v0.13 \n+* Hyperparameter leader election for HA operations and faster recovery\n+* Validation for Early Stopping algorithm settings improves accuracy, reduces overfitting", - "comment_created_at": "2022-01-28T21:40:42+00:00", - "comment_author": "andreyvelich", - "comment_body": "Should we modify this @jbottum @tenzen-y @johnugeorge ?\r\n```suggestion\r\n* Validation for Early Stopping algorithm settings helps users to proper reduce model overfitting\r\n```", - "pr_file_module": null - }, - { - "comment_id": "794989244", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 6266, - "pr_file": "ROADMAP.md", - "discussion_id": "794906792", - "commented_code": "@@ -1,10 +1,54 @@\n # Kubeflow Roadmap\n \n-## Kubeflow 1.5 Release, Due: February 2022\n-* Kubeflow 1.5 timeline is tracked in [#538](https://github.com/kubeflow/community/pull/538)\n-* Additional details will be posted in early 1Q'22\n+## Kubeflow 1.5 Release, Due: March 2022\n+* Kubeflow 1.5 [milestones and timeline](https://github.com/kubeflow/community/pull/538)\n+\n+### Themes\n+* Switching to [Emissary executor](https://www.kubeflow.org/docs/components/pipelines/installation/choose-executor/#emissary-executor) enables Kubeflow Pipelines deployment on Kubernetes >= v1.20, which runs on containerd runtime instead of Docker runtime\n+* Improve model accuracy and reduce overfitting, especially with hyper parameter tuning\n+* Simplify operations and optimize utilization (including spot instance use cases for distributed training) \n+* More consistent user experience - UI appearance, features and naming\n+* Improved documentation, tutorials and examples\n+* Stretch - Support for K8s 1.22 and associated dependencies (cert mgr, istio)\n+\n+### Major Features from each Working Group (note: Individual WG versions are independent of Kubeflow's)\n+\n+#### Kubeflow Pipelines, v1.8\n+* Switching to [Emissary executor](https://www.kubeflow.org/docs/components/pipelines/installation/choose-executor/#emissary-executor) enables Kubeflow Pipelines deployment on Kubernetes >= v1.20, which runs on containerd runtime instead of Docker runtime.\n+\n+#### Katib, v0.13 \n+* Hyperparameter leader election for HA operations and faster recovery\n+* Validation for Early Stopping algorithm settings improves accuracy, reduces overfitting", - "comment_created_at": "2022-01-29T02:59:02+00:00", - "comment_author": "tenzen-y", - "comment_body": "It sounds great to me @andreyvelich!", - "pr_file_module": null - }, - { - "comment_id": "795076103", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 6266, - "pr_file": "ROADMAP.md", - "discussion_id": "794906792", - "commented_code": "@@ -1,10 +1,54 @@\n # Kubeflow Roadmap\n \n-## Kubeflow 1.5 Release, Due: February 2022\n-* Kubeflow 1.5 timeline is tracked in [#538](https://github.com/kubeflow/community/pull/538)\n-* Additional details will be posted in early 1Q'22\n+## Kubeflow 1.5 Release, Due: March 2022\n+* Kubeflow 1.5 [milestones and timeline](https://github.com/kubeflow/community/pull/538)\n+\n+### Themes\n+* Switching to [Emissary executor](https://www.kubeflow.org/docs/components/pipelines/installation/choose-executor/#emissary-executor) enables Kubeflow Pipelines deployment on Kubernetes >= v1.20, which runs on containerd runtime instead of Docker runtime\n+* Improve model accuracy and reduce overfitting, especially with hyper parameter tuning\n+* Simplify operations and optimize utilization (including spot instance use cases for distributed training) \n+* More consistent user experience - UI appearance, features and naming\n+* Improved documentation, tutorials and examples\n+* Stretch - Support for K8s 1.22 and associated dependencies (cert mgr, istio)\n+\n+### Major Features from each Working Group (note: Individual WG versions are independent of Kubeflow's)\n+\n+#### Kubeflow Pipelines, v1.8\n+* Switching to [Emissary executor](https://www.kubeflow.org/docs/components/pipelines/installation/choose-executor/#emissary-executor) enables Kubeflow Pipelines deployment on Kubernetes >= v1.20, which runs on containerd runtime instead of Docker runtime.\n+\n+#### Katib, v0.13 \n+* Hyperparameter leader election for HA operations and faster recovery\n+* Validation for Early Stopping algorithm settings improves accuracy, reduces overfitting", - "comment_created_at": "2022-01-29T17:11:39+00:00", - "comment_author": "jbottum", - "comment_body": "@andreyvelich do we need the word \"proper\"", - "pr_file_module": null - }, - { - "comment_id": "795191497", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 6266, - "pr_file": "ROADMAP.md", - "discussion_id": "794906792", - "commented_code": "@@ -1,10 +1,54 @@\n # Kubeflow Roadmap\n \n-## Kubeflow 1.5 Release, Due: February 2022\n-* Kubeflow 1.5 timeline is tracked in [#538](https://github.com/kubeflow/community/pull/538)\n-* Additional details will be posted in early 1Q'22\n+## Kubeflow 1.5 Release, Due: March 2022\n+* Kubeflow 1.5 [milestones and timeline](https://github.com/kubeflow/community/pull/538)\n+\n+### Themes\n+* Switching to [Emissary executor](https://www.kubeflow.org/docs/components/pipelines/installation/choose-executor/#emissary-executor) enables Kubeflow Pipelines deployment on Kubernetes >= v1.20, which runs on containerd runtime instead of Docker runtime\n+* Improve model accuracy and reduce overfitting, especially with hyper parameter tuning\n+* Simplify operations and optimize utilization (including spot instance use cases for distributed training) \n+* More consistent user experience - UI appearance, features and naming\n+* Improved documentation, tutorials and examples\n+* Stretch - Support for K8s 1.22 and associated dependencies (cert mgr, istio)\n+\n+### Major Features from each Working Group (note: Individual WG versions are independent of Kubeflow's)\n+\n+#### Kubeflow Pipelines, v1.8\n+* Switching to [Emissary executor](https://www.kubeflow.org/docs/components/pipelines/installation/choose-executor/#emissary-executor) enables Kubeflow Pipelines deployment on Kubernetes >= v1.20, which runs on containerd runtime instead of Docker runtime.\n+\n+#### Katib, v0.13 \n+* Hyperparameter leader election for HA operations and faster recovery\n+* Validation for Early Stopping algorithm settings improves accuracy, reduces overfitting", - "comment_created_at": "2022-01-30T14:38:59+00:00", - "comment_author": "andreyvelich", - "comment_body": "@jbottum Basically, validation helps to avoid user's errors and mistakes in APIs when they use Early Stopping.\r\nThat is why I was thinking about this change.", - "pr_file_module": null - }, - { - "comment_id": "795214523", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 6266, - "pr_file": "ROADMAP.md", - "discussion_id": "794906792", - "commented_code": "@@ -1,10 +1,54 @@\n # Kubeflow Roadmap\n \n-## Kubeflow 1.5 Release, Due: February 2022\n-* Kubeflow 1.5 timeline is tracked in [#538](https://github.com/kubeflow/community/pull/538)\n-* Additional details will be posted in early 1Q'22\n+## Kubeflow 1.5 Release, Due: March 2022\n+* Kubeflow 1.5 [milestones and timeline](https://github.com/kubeflow/community/pull/538)\n+\n+### Themes\n+* Switching to [Emissary executor](https://www.kubeflow.org/docs/components/pipelines/installation/choose-executor/#emissary-executor) enables Kubeflow Pipelines deployment on Kubernetes >= v1.20, which runs on containerd runtime instead of Docker runtime\n+* Improve model accuracy and reduce overfitting, especially with hyper parameter tuning\n+* Simplify operations and optimize utilization (including spot instance use cases for distributed training) \n+* More consistent user experience - UI appearance, features and naming\n+* Improved documentation, tutorials and examples\n+* Stretch - Support for K8s 1.22 and associated dependencies (cert mgr, istio)\n+\n+### Major Features from each Working Group (note: Individual WG versions are independent of Kubeflow's)\n+\n+#### Kubeflow Pipelines, v1.8\n+* Switching to [Emissary executor](https://www.kubeflow.org/docs/components/pipelines/installation/choose-executor/#emissary-executor) enables Kubeflow Pipelines deployment on Kubernetes >= v1.20, which runs on containerd runtime instead of Docker runtime.\n+\n+#### Katib, v0.13 \n+* Hyperparameter leader election for HA operations and faster recovery\n+* Validation for Early Stopping algorithm settings improves accuracy, reduces overfitting", - "comment_created_at": "2022-01-30T16:59:53+00:00", - "comment_author": "jbottum", - "comment_body": "@jbottum understand but I am not sure this correct - helps users to **proper** reduce model overfitting....shouldn't it read...helps users to reduce model overfitting ?", - "pr_file_module": null - }, - { - "comment_id": "796063923", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 6266, - "pr_file": "ROADMAP.md", - "discussion_id": "794906792", - "commented_code": "@@ -1,10 +1,54 @@\n # Kubeflow Roadmap\n \n-## Kubeflow 1.5 Release, Due: February 2022\n-* Kubeflow 1.5 timeline is tracked in [#538](https://github.com/kubeflow/community/pull/538)\n-* Additional details will be posted in early 1Q'22\n+## Kubeflow 1.5 Release, Due: March 2022\n+* Kubeflow 1.5 [milestones and timeline](https://github.com/kubeflow/community/pull/538)\n+\n+### Themes\n+* Switching to [Emissary executor](https://www.kubeflow.org/docs/components/pipelines/installation/choose-executor/#emissary-executor) enables Kubeflow Pipelines deployment on Kubernetes >= v1.20, which runs on containerd runtime instead of Docker runtime\n+* Improve model accuracy and reduce overfitting, especially with hyper parameter tuning\n+* Simplify operations and optimize utilization (including spot instance use cases for distributed training) \n+* More consistent user experience - UI appearance, features and naming\n+* Improved documentation, tutorials and examples\n+* Stretch - Support for K8s 1.22 and associated dependencies (cert mgr, istio)\n+\n+### Major Features from each Working Group (note: Individual WG versions are independent of Kubeflow's)\n+\n+#### Kubeflow Pipelines, v1.8\n+* Switching to [Emissary executor](https://www.kubeflow.org/docs/components/pipelines/installation/choose-executor/#emissary-executor) enables Kubeflow Pipelines deployment on Kubernetes >= v1.20, which runs on containerd runtime instead of Docker runtime.\n+\n+#### Katib, v0.13 \n+* Hyperparameter leader election for HA operations and faster recovery\n+* Validation for Early Stopping algorithm settings improves accuracy, reduces overfitting", - "comment_created_at": "2022-01-31T21:06:12+00:00", - "comment_author": "jbottum", - "comment_body": "@andreyvelich I put this in another commit lower down, need to move this forward.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1022650192", - "pr_number": 6705, - "pr_file": "ROADMAP.md", - "created_at": "2022-11-15T11:02:56+00:00", - "commented_code": "# Kubeflow Roadmap\n\n## Kubeflow 1.7 Release - planning now in process \n* Please join the Kubeflow Community Meetings on Tuesdays for updates and for opportunities to contribute. \n## Kubeflow 1.7 Release, Planned: March 2023 \nThe Kubeflow Community plans to deliver its v1.7 release in March 2023, per this [timeline](https://github.com/kubeflow/community/pull/573). The high level deliveries are being tracked in this [Project Board](https://github.com/orgs/kubeflow/projects/50/views/1). The v1.7 release process will be managed by the v1.7 [Release Team](https://github.com/kubeflow/internal-acls/pull/576) using the best practices in the [Release Handbook](https://github.com/kubeflow/community/blob/master/releases/handbook.md)\n\nNotable feature candidates in the [Project Board](https://github.com/orgs/kubeflow/projects/50/views/1) are:\n* Support for Kubernetes 1.25\n* Improved user isolation especially for the Kubeflow pipelines user interface, database, and artifacts\n* Update Kubeflow Noteboks naming from Notebooks to Workbenches\n* Delivery of KFP V2 beta with its new front-end, backend and SDK \n* New, time saving workflows to simplify data exchange from Katib to Kubeflow Pipelines", - "repo_full_name": "kubeflow/kubeflow", - "discussion_comments": [ - { - "comment_id": "1022650192", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 6705, - "pr_file": "ROADMAP.md", - "discussion_id": "1022650192", - "commented_code": "@@ -1,7 +1,18 @@\n # Kubeflow Roadmap\n \n-## Kubeflow 1.7 Release - planning now in process \n-* Please join the Kubeflow Community Meetings on Tuesdays for updates and for opportunities to contribute. \n+## Kubeflow 1.7 Release, Planned: March 2023 \n+The Kubeflow Community plans to deliver its v1.7 release in March 2023, per this [timeline](https://github.com/kubeflow/community/pull/573). The high level deliveries are being tracked in this [Project Board](https://github.com/orgs/kubeflow/projects/50/views/1). The v1.7 release process will be managed by the v1.7 [Release Team](https://github.com/kubeflow/internal-acls/pull/576) using the best practices in the [Release Handbook](https://github.com/kubeflow/community/blob/master/releases/handbook.md)\n+\n+Notable feature candidates in the [Project Board](https://github.com/orgs/kubeflow/projects/50/views/1) are:\n+* Support for Kubernetes 1.25\n+* Improved user isolation especially for the Kubeflow pipelines user interface, database, and artifacts\n+* Update Kubeflow Noteboks naming from Notebooks to Workbenches\n+* Delivery of KFP V2 beta with its new front-end, backend and SDK \n+* New, time saving workflows to simplify data exchange from Katib to Kubeflow Pipelines", - "comment_created_at": "2022-11-15T11:02:56+00:00", - "comment_author": "andreyvelich", - "comment_body": "I think, we should remove this part for Katib also.\r\n\r\nMaybe we can add the following:\r\n- Simplified creation of Katib and Training Operator experiments using SDKs.\r\n\r\nAny other items that we certainly will deliver in KF 1.7 @kubeflow/wg-training-leads @tenzen-y @anencore94 ?\r\n", - "pr_file_module": null - }, - { - "comment_id": "1022881155", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 6705, - "pr_file": "ROADMAP.md", - "discussion_id": "1022650192", - "commented_code": "@@ -1,7 +1,18 @@\n # Kubeflow Roadmap\n \n-## Kubeflow 1.7 Release - planning now in process \n-* Please join the Kubeflow Community Meetings on Tuesdays for updates and for opportunities to contribute. \n+## Kubeflow 1.7 Release, Planned: March 2023 \n+The Kubeflow Community plans to deliver its v1.7 release in March 2023, per this [timeline](https://github.com/kubeflow/community/pull/573). The high level deliveries are being tracked in this [Project Board](https://github.com/orgs/kubeflow/projects/50/views/1). The v1.7 release process will be managed by the v1.7 [Release Team](https://github.com/kubeflow/internal-acls/pull/576) using the best practices in the [Release Handbook](https://github.com/kubeflow/community/blob/master/releases/handbook.md)\n+\n+Notable feature candidates in the [Project Board](https://github.com/orgs/kubeflow/projects/50/views/1) are:\n+* Support for Kubernetes 1.25\n+* Improved user isolation especially for the Kubeflow pipelines user interface, database, and artifacts\n+* Update Kubeflow Noteboks naming from Notebooks to Workbenches\n+* Delivery of KFP V2 beta with its new front-end, backend and SDK \n+* New, time saving workflows to simplify data exchange from Katib to Kubeflow Pipelines", - "comment_created_at": "2022-11-15T14:45:50+00:00", - "comment_author": "jbottum", - "comment_body": "@andreyvelich I have incorporated this update.", - "pr_file_module": null - }, - { - "comment_id": "1023583120", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 6705, - "pr_file": "ROADMAP.md", - "discussion_id": "1022650192", - "commented_code": "@@ -1,7 +1,18 @@\n # Kubeflow Roadmap\n \n-## Kubeflow 1.7 Release - planning now in process \n-* Please join the Kubeflow Community Meetings on Tuesdays for updates and for opportunities to contribute. \n+## Kubeflow 1.7 Release, Planned: March 2023 \n+The Kubeflow Community plans to deliver its v1.7 release in March 2023, per this [timeline](https://github.com/kubeflow/community/pull/573). The high level deliveries are being tracked in this [Project Board](https://github.com/orgs/kubeflow/projects/50/views/1). The v1.7 release process will be managed by the v1.7 [Release Team](https://github.com/kubeflow/internal-acls/pull/576) using the best practices in the [Release Handbook](https://github.com/kubeflow/community/blob/master/releases/handbook.md)\n+\n+Notable feature candidates in the [Project Board](https://github.com/orgs/kubeflow/projects/50/views/1) are:\n+* Support for Kubernetes 1.25\n+* Improved user isolation especially for the Kubeflow pipelines user interface, database, and artifacts\n+* Update Kubeflow Noteboks naming from Notebooks to Workbenches\n+* Delivery of KFP V2 beta with its new front-end, backend and SDK \n+* New, time saving workflows to simplify data exchange from Katib to Kubeflow Pipelines", - "comment_created_at": "2022-11-16T07:15:52+00:00", - "comment_author": "tenzen-y", - "comment_body": "I have no items.", - "pr_file_module": null - }, - { - "comment_id": "1024303442", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 6705, - "pr_file": "ROADMAP.md", - "discussion_id": "1022650192", - "commented_code": "@@ -1,7 +1,18 @@\n # Kubeflow Roadmap\n \n-## Kubeflow 1.7 Release - planning now in process \n-* Please join the Kubeflow Community Meetings on Tuesdays for updates and for opportunities to contribute. \n+## Kubeflow 1.7 Release, Planned: March 2023 \n+The Kubeflow Community plans to deliver its v1.7 release in March 2023, per this [timeline](https://github.com/kubeflow/community/pull/573). The high level deliveries are being tracked in this [Project Board](https://github.com/orgs/kubeflow/projects/50/views/1). The v1.7 release process will be managed by the v1.7 [Release Team](https://github.com/kubeflow/internal-acls/pull/576) using the best practices in the [Release Handbook](https://github.com/kubeflow/community/blob/master/releases/handbook.md)\n+\n+Notable feature candidates in the [Project Board](https://github.com/orgs/kubeflow/projects/50/views/1) are:\n+* Support for Kubernetes 1.25\n+* Improved user isolation especially for the Kubeflow pipelines user interface, database, and artifacts\n+* Update Kubeflow Noteboks naming from Notebooks to Workbenches\n+* Delivery of KFP V2 beta with its new front-end, backend and SDK \n+* New, time saving workflows to simplify data exchange from Katib to Kubeflow Pipelines", - "comment_created_at": "2022-11-16T17:31:13+00:00", - "comment_author": "andreyvelich", - "comment_body": "@jbottum We discussed on today's AutoML community meeting that we want to promote a few Katib UI changes as part of Kubeflow 1.7 ROADMAP. \r\nCan we indicate the most significant improvements/changes for the user that we will deliver in 1.7 ?\r\n@kimwnasptd @johnugeorge What Katib UI changes do we want on the ROADMAP that we deliver ?", - "pr_file_module": null - }, - { - "comment_id": "1024383796", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 6705, - "pr_file": "ROADMAP.md", - "discussion_id": "1022650192", - "commented_code": "@@ -1,7 +1,18 @@\n # Kubeflow Roadmap\n \n-## Kubeflow 1.7 Release - planning now in process \n-* Please join the Kubeflow Community Meetings on Tuesdays for updates and for opportunities to contribute. \n+## Kubeflow 1.7 Release, Planned: March 2023 \n+The Kubeflow Community plans to deliver its v1.7 release in March 2023, per this [timeline](https://github.com/kubeflow/community/pull/573). The high level deliveries are being tracked in this [Project Board](https://github.com/orgs/kubeflow/projects/50/views/1). The v1.7 release process will be managed by the v1.7 [Release Team](https://github.com/kubeflow/internal-acls/pull/576) using the best practices in the [Release Handbook](https://github.com/kubeflow/community/blob/master/releases/handbook.md)\n+\n+Notable feature candidates in the [Project Board](https://github.com/orgs/kubeflow/projects/50/views/1) are:\n+* Support for Kubernetes 1.25\n+* Improved user isolation especially for the Kubeflow pipelines user interface, database, and artifacts\n+* Update Kubeflow Noteboks naming from Notebooks to Workbenches\n+* Delivery of KFP V2 beta with its new front-end, backend and SDK \n+* New, time saving workflows to simplify data exchange from Katib to Kubeflow Pipelines", - "comment_created_at": "2022-11-16T18:45:55+00:00", - "comment_author": "johnugeorge", - "comment_body": "Expose trial logs(https://github.com/kubeflow/katib/issues/971 )\r\nFiltering, Sorting trial list(https://github.com/kubeflow/katib/issues/1441 )", - "pr_file_module": null - }, - { - "comment_id": "1024558646", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 6705, - "pr_file": "ROADMAP.md", - "discussion_id": "1022650192", - "commented_code": "@@ -1,7 +1,18 @@\n # Kubeflow Roadmap\n \n-## Kubeflow 1.7 Release - planning now in process \n-* Please join the Kubeflow Community Meetings on Tuesdays for updates and for opportunities to contribute. \n+## Kubeflow 1.7 Release, Planned: March 2023 \n+The Kubeflow Community plans to deliver its v1.7 release in March 2023, per this [timeline](https://github.com/kubeflow/community/pull/573). The high level deliveries are being tracked in this [Project Board](https://github.com/orgs/kubeflow/projects/50/views/1). The v1.7 release process will be managed by the v1.7 [Release Team](https://github.com/kubeflow/internal-acls/pull/576) using the best practices in the [Release Handbook](https://github.com/kubeflow/community/blob/master/releases/handbook.md)\n+\n+Notable feature candidates in the [Project Board](https://github.com/orgs/kubeflow/projects/50/views/1) are:\n+* Support for Kubernetes 1.25\n+* Improved user isolation especially for the Kubeflow pipelines user interface, database, and artifacts\n+* Update Kubeflow Noteboks naming from Notebooks to Workbenches\n+* Delivery of KFP V2 beta with its new front-end, backend and SDK \n+* New, time saving workflows to simplify data exchange from Katib to Kubeflow Pipelines", - "comment_created_at": "2022-11-16T22:14:08+00:00", - "comment_author": "jbottum", - "comment_body": "@andreyvelich @johnugeorge I have added line 13 to include these feature candidates in the roadmap. Please provide looks good or comments. thanks.", - "pr_file_module": null - }, - { - "comment_id": "1025136550", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 6705, - "pr_file": "ROADMAP.md", - "discussion_id": "1022650192", - "commented_code": "@@ -1,7 +1,18 @@\n # Kubeflow Roadmap\n \n-## Kubeflow 1.7 Release - planning now in process \n-* Please join the Kubeflow Community Meetings on Tuesdays for updates and for opportunities to contribute. \n+## Kubeflow 1.7 Release, Planned: March 2023 \n+The Kubeflow Community plans to deliver its v1.7 release in March 2023, per this [timeline](https://github.com/kubeflow/community/pull/573). The high level deliveries are being tracked in this [Project Board](https://github.com/orgs/kubeflow/projects/50/views/1). The v1.7 release process will be managed by the v1.7 [Release Team](https://github.com/kubeflow/internal-acls/pull/576) using the best practices in the [Release Handbook](https://github.com/kubeflow/community/blob/master/releases/handbook.md)\n+\n+Notable feature candidates in the [Project Board](https://github.com/orgs/kubeflow/projects/50/views/1) are:\n+* Support for Kubernetes 1.25\n+* Improved user isolation especially for the Kubeflow pipelines user interface, database, and artifacts\n+* Update Kubeflow Noteboks naming from Notebooks to Workbenches\n+* Delivery of KFP V2 beta with its new front-end, backend and SDK \n+* New, time saving workflows to simplify data exchange from Katib to Kubeflow Pipelines", - "comment_created_at": "2022-11-17T12:38:21+00:00", - "comment_author": "andreyvelich", - "comment_body": "Thank you @jbottum. LGTM", - "pr_file_module": null - }, - { - "comment_id": "1025630075", - "repo_full_name": "kubeflow/kubeflow", - "pr_number": 6705, - "pr_file": "ROADMAP.md", - "discussion_id": "1022650192", - "commented_code": "@@ -1,7 +1,18 @@\n # Kubeflow Roadmap\n \n-## Kubeflow 1.7 Release - planning now in process \n-* Please join the Kubeflow Community Meetings on Tuesdays for updates and for opportunities to contribute. \n+## Kubeflow 1.7 Release, Planned: March 2023 \n+The Kubeflow Community plans to deliver its v1.7 release in March 2023, per this [timeline](https://github.com/kubeflow/community/pull/573). The high level deliveries are being tracked in this [Project Board](https://github.com/orgs/kubeflow/projects/50/views/1). The v1.7 release process will be managed by the v1.7 [Release Team](https://github.com/kubeflow/internal-acls/pull/576) using the best practices in the [Release Handbook](https://github.com/kubeflow/community/blob/master/releases/handbook.md)\n+\n+Notable feature candidates in the [Project Board](https://github.com/orgs/kubeflow/projects/50/views/1) are:\n+* Support for Kubernetes 1.25\n+* Improved user isolation especially for the Kubeflow pipelines user interface, database, and artifacts\n+* Update Kubeflow Noteboks naming from Notebooks to Workbenches\n+* Delivery of KFP V2 beta with its new front-end, backend and SDK \n+* New, time saving workflows to simplify data exchange from Katib to Kubeflow Pipelines", - "comment_created_at": "2022-11-17T19:42:25+00:00", - "comment_author": "johnugeorge", - "comment_body": "LGTM ", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/langchainjs-ai-dependency-management.json b/_reviewers/langchainjs-ai-dependency-management.json new file mode 100644 index 0000000..8b1e554 --- /dev/null +++ b/_reviewers/langchainjs-ai-dependency-management.json @@ -0,0 +1,104 @@ +[ + { + "discussion_id": "1845263088", + "pr_number": 7177, + "pr_file": "libs/langchain-community/package.json", + "created_at": "2024-11-17T00:13:53+00:00", + "commented_code": "\"node-llama-cpp\": \"3.1.1\",\n \"notion-to-md\": \"^3.1.0\",\n \"officeparser\": \"^4.0.4\",\n \"openai\": \"^4.72.0\",", + "repo_full_name": "langchain-ai/langchainjs", + "discussion_comments": [ + { + "comment_id": "1845263088", + "repo_full_name": "langchain-ai/langchainjs", + "pr_number": 7177, + "pr_file": "libs/langchain-community/package.json", + "discussion_id": "1845263088", + "commented_code": "@@ -190,6 +192,7 @@\n \"node-llama-cpp\": \"3.1.1\",\n \"notion-to-md\": \"^3.1.0\",\n \"officeparser\": \"^4.0.4\",\n+ \"openai\": \"^4.72.0\",", + "comment_created_at": "2024-11-17T00:13:53+00:00", + "comment_author": "jacoblee93", + "comment_body": "Did we need to add this?", + "pr_file_module": null + }, + { + "comment_id": "1850608332", + "repo_full_name": "langchain-ai/langchainjs", + "pr_number": 7177, + "pr_file": "libs/langchain-community/package.json", + "discussion_id": "1845263088", + "commented_code": "@@ -190,6 +192,7 @@\n \"node-llama-cpp\": \"3.1.1\",\n \"notion-to-md\": \"^3.1.0\",\n \"officeparser\": \"^4.0.4\",\n+ \"openai\": \"^4.72.0\",", + "comment_created_at": "2024-11-20T16:13:55+00:00", + "comment_author": "filip-michalsky", + "comment_body": "@jacoblee93 removed, will use `npm install @langchain/openai` instead.", + "pr_file_module": null + }, + { + "comment_id": "1851241728", + "repo_full_name": "langchain-ai/langchainjs", + "pr_number": 7177, + "pr_file": "libs/langchain-community/package.json", + "discussion_id": "1845263088", + "commented_code": "@@ -190,6 +192,7 @@\n \"node-llama-cpp\": \"3.1.1\",\n \"notion-to-md\": \"^3.1.0\",\n \"officeparser\": \"^4.0.4\",\n+ \"openai\": \"^4.72.0\",", + "comment_created_at": "2024-11-21T02:48:51+00:00", + "comment_author": "filip-michalsky", + "comment_body": "note that for this I had to remove this dependency from the stagehand lib itself. It will only work with stagehand version 1.2.0+", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1790892819", + "pr_number": 6916, + "pr_file": "libs/langchain-community/package.json", + "created_at": "2024-10-07T21:22:12+00:00", + "commented_code": "\"author\": \"LangChain\",\n \"license\": \"MIT\",\n \"dependencies\": {\n \"@ibm-cloud/watsonx-ai\": \"1.0.1\",", + "repo_full_name": "langchain-ai/langchainjs", + "discussion_comments": [ + { + "comment_id": "1790892819", + "repo_full_name": "langchain-ai/langchainjs", + "pr_number": 6916, + "pr_file": "libs/langchain-community/package.json", + "discussion_id": "1790892819", + "commented_code": "@@ -35,10 +35,12 @@\n \"author\": \"LangChain\",\n \"license\": \"MIT\",\n \"dependencies\": {\n+ \"@ibm-cloud/watsonx-ai\": \"1.0.1\",", + "comment_created_at": "2024-10-07T21:22:12+00:00", + "comment_author": "jacoblee93", + "comment_body": "We don't accept hard dependencies, see:\r\n\r\nhttps://github.com/langchain-ai/langchainjs/blob/main/.github/contributing/INTEGRATIONS.md ", + "pr_file_module": null + }, + { + "comment_id": "1792027774", + "repo_full_name": "langchain-ai/langchainjs", + "pr_number": 6916, + "pr_file": "libs/langchain-community/package.json", + "discussion_id": "1790892819", + "commented_code": "@@ -35,10 +35,12 @@\n \"author\": \"LangChain\",\n \"license\": \"MIT\",\n \"dependencies\": {\n+ \"@ibm-cloud/watsonx-ai\": \"1.0.1\",", + "comment_created_at": "2024-10-08T14:48:10+00:00", + "comment_author": "FilipZmijewski", + "comment_body": "Fixed", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1637150241", + "pr_number": 5737, + "pr_file": "libs/langchain-anthropic/package.json", + "created_at": "2024-06-12T21:53:24+00:00", + "commented_code": "\"author\": \"LangChain\",", + "repo_full_name": "langchain-ai/langchainjs", + "discussion_comments": [ + { + "comment_id": "1637150241", + "repo_full_name": "langchain-ai/langchainjs", + "pr_number": 5737, + "pr_file": "libs/langchain-anthropic/package.json", + "discussion_id": "1637150241", + "commented_code": "@@ -35,7 +35,7 @@\n \"author\": \"LangChain\",", + "comment_created_at": "2024-06-12T21:53:24+00:00", + "comment_author": "dosubot[bot]", + "comment_body": "Hey there! I noticed the dependency change for \"@anthropic-ai/sdk\" from \"^0.21.0\" to \"^0.22.0\" in the package.json file. This comment is to flag the change for maintainers to review. Great work on the PR!", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/langchainjs-ai-dependency-management.md b/_reviewers/langchainjs-ai-dependency-management.md index 6e4de71..725713c 100644 --- a/_reviewers/langchainjs-ai-dependency-management.md +++ b/_reviewers/langchainjs-ai-dependency-management.md @@ -34,109 +34,3 @@ When integrating AI models and language processing libraries, follow these depen 3. When updating AI SDK versions, clearly flag these changes for maintainers to review, as they may impact compatibility or introduce new features that require additional testing. This approach ensures better maintainability, clearer dependency trees, and more explicit relationships between your project and the AI services it leverages. - - -[ - { - "discussion_id": "1845263088", - "pr_number": 7177, - "pr_file": "libs/langchain-community/package.json", - "created_at": "2024-11-17T00:13:53+00:00", - "commented_code": "\"node-llama-cpp\": \"3.1.1\",\n \"notion-to-md\": \"^3.1.0\",\n \"officeparser\": \"^4.0.4\",\n \"openai\": \"^4.72.0\",", - "repo_full_name": "langchain-ai/langchainjs", - "discussion_comments": [ - { - "comment_id": "1845263088", - "repo_full_name": "langchain-ai/langchainjs", - "pr_number": 7177, - "pr_file": "libs/langchain-community/package.json", - "discussion_id": "1845263088", - "commented_code": "@@ -190,6 +192,7 @@\n \"node-llama-cpp\": \"3.1.1\",\n \"notion-to-md\": \"^3.1.0\",\n \"officeparser\": \"^4.0.4\",\n+ \"openai\": \"^4.72.0\",", - "comment_created_at": "2024-11-17T00:13:53+00:00", - "comment_author": "jacoblee93", - "comment_body": "Did we need to add this?", - "pr_file_module": null - }, - { - "comment_id": "1850608332", - "repo_full_name": "langchain-ai/langchainjs", - "pr_number": 7177, - "pr_file": "libs/langchain-community/package.json", - "discussion_id": "1845263088", - "commented_code": "@@ -190,6 +192,7 @@\n \"node-llama-cpp\": \"3.1.1\",\n \"notion-to-md\": \"^3.1.0\",\n \"officeparser\": \"^4.0.4\",\n+ \"openai\": \"^4.72.0\",", - "comment_created_at": "2024-11-20T16:13:55+00:00", - "comment_author": "filip-michalsky", - "comment_body": "@jacoblee93 removed, will use `npm install @langchain/openai` instead.", - "pr_file_module": null - }, - { - "comment_id": "1851241728", - "repo_full_name": "langchain-ai/langchainjs", - "pr_number": 7177, - "pr_file": "libs/langchain-community/package.json", - "discussion_id": "1845263088", - "commented_code": "@@ -190,6 +192,7 @@\n \"node-llama-cpp\": \"3.1.1\",\n \"notion-to-md\": \"^3.1.0\",\n \"officeparser\": \"^4.0.4\",\n+ \"openai\": \"^4.72.0\",", - "comment_created_at": "2024-11-21T02:48:51+00:00", - "comment_author": "filip-michalsky", - "comment_body": "note that for this I had to remove this dependency from the stagehand lib itself. It will only work with stagehand version 1.2.0+", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1790892819", - "pr_number": 6916, - "pr_file": "libs/langchain-community/package.json", - "created_at": "2024-10-07T21:22:12+00:00", - "commented_code": "\"author\": \"LangChain\",\n \"license\": \"MIT\",\n \"dependencies\": {\n \"@ibm-cloud/watsonx-ai\": \"1.0.1\",", - "repo_full_name": "langchain-ai/langchainjs", - "discussion_comments": [ - { - "comment_id": "1790892819", - "repo_full_name": "langchain-ai/langchainjs", - "pr_number": 6916, - "pr_file": "libs/langchain-community/package.json", - "discussion_id": "1790892819", - "commented_code": "@@ -35,10 +35,12 @@\n \"author\": \"LangChain\",\n \"license\": \"MIT\",\n \"dependencies\": {\n+ \"@ibm-cloud/watsonx-ai\": \"1.0.1\",", - "comment_created_at": "2024-10-07T21:22:12+00:00", - "comment_author": "jacoblee93", - "comment_body": "We don't accept hard dependencies, see:\r\n\r\nhttps://github.com/langchain-ai/langchainjs/blob/main/.github/contributing/INTEGRATIONS.md ", - "pr_file_module": null - }, - { - "comment_id": "1792027774", - "repo_full_name": "langchain-ai/langchainjs", - "pr_number": 6916, - "pr_file": "libs/langchain-community/package.json", - "discussion_id": "1790892819", - "commented_code": "@@ -35,10 +35,12 @@\n \"author\": \"LangChain\",\n \"license\": \"MIT\",\n \"dependencies\": {\n+ \"@ibm-cloud/watsonx-ai\": \"1.0.1\",", - "comment_created_at": "2024-10-08T14:48:10+00:00", - "comment_author": "FilipZmijewski", - "comment_body": "Fixed", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1637150241", - "pr_number": 5737, - "pr_file": "libs/langchain-anthropic/package.json", - "created_at": "2024-06-12T21:53:24+00:00", - "commented_code": "\"author\": \"LangChain\",", - "repo_full_name": "langchain-ai/langchainjs", - "discussion_comments": [ - { - "comment_id": "1637150241", - "repo_full_name": "langchain-ai/langchainjs", - "pr_number": 5737, - "pr_file": "libs/langchain-anthropic/package.json", - "discussion_id": "1637150241", - "commented_code": "@@ -35,7 +35,7 @@\n \"author\": \"LangChain\",", - "comment_created_at": "2024-06-12T21:53:24+00:00", - "comment_author": "dosubot[bot]", - "comment_body": "Hey there! I noticed the dependency change for \"@anthropic-ai/sdk\" from \"^0.21.0\" to \"^0.22.0\" in the package.json file. This comment is to flag the change for maintainers to review. Great work on the PR!", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/langchainjs-avoid-hardcoded-configurations.json b/_reviewers/langchainjs-avoid-hardcoded-configurations.json new file mode 100644 index 0000000..67ad644 --- /dev/null +++ b/_reviewers/langchainjs-avoid-hardcoded-configurations.json @@ -0,0 +1,162 @@ +[ + { + "discussion_id": "1417967601", + "pr_number": 3465, + "pr_file": "langchain/src/tools/outlook/authFlowREST.ts", + "created_at": "2023-12-06T20:54:51+00:00", + "commented_code": "import * as http from \"http\";\nimport * as url from \"url\";\nimport { AuthFlowBase } from \"./authFlowBase.js\";\nimport { getEnvironmentVariable } from \"../../util/env.js\";\n\ninterface AccessTokenResponse {\n access_token: string;\n refresh_token: string;\n}\n\n// this class uses your clientId, clientSecret and redirectUri\n// to get access token and refresh token, if you already have them,\n// you can use AuthFlowToken or AuthFlowRefresh instead\nexport class AuthFlowREST extends AuthFlowBase {\n private clientSecret: string;\n\n private redirectUri: string;\n\n private port: number;\n\n private pathname: string;\n\n private refreshToken = \"\";\n\n constructor({\n clientId,\n clientSecret,\n redirectUri,\n }: { clientId?: string; clientSecret?: string; redirectUri?: string } = {}) {\n let id = clientId;\n let secret = clientSecret;\n let uri = redirectUri;\n if (!id || !secret || !uri) {\n id = getEnvironmentVariable(\"OUTLOOK_CLIENT_ID\");\n secret = getEnvironmentVariable(\"OUTLOOK_CLIENT_SECRET\");\n uri = getEnvironmentVariable(\"OUTLOOK_REDIRECT_URI\");\n }", + "repo_full_name": "langchain-ai/langchainjs", + "discussion_comments": [ + { + "comment_id": "1417967601", + "repo_full_name": "langchain-ai/langchainjs", + "pr_number": 3465, + "pr_file": "langchain/src/tools/outlook/authFlowREST.ts", + "discussion_id": "1417967601", + "commented_code": "@@ -0,0 +1,205 @@\n+import * as http from \"http\";\n+import * as url from \"url\";\n+import { AuthFlowBase } from \"./authFlowBase.js\";\n+import { getEnvironmentVariable } from \"../../util/env.js\";\n+\n+interface AccessTokenResponse {\n+ access_token: string;\n+ refresh_token: string;\n+}\n+\n+// this class uses your clientId, clientSecret and redirectUri\n+// to get access token and refresh token, if you already have them,\n+// you can use AuthFlowToken or AuthFlowRefresh instead\n+export class AuthFlowREST extends AuthFlowBase {\n+ private clientSecret: string;\n+\n+ private redirectUri: string;\n+\n+ private port: number;\n+\n+ private pathname: string;\n+\n+ private refreshToken = \"\";\n+\n+ constructor({\n+ clientId,\n+ clientSecret,\n+ redirectUri,\n+ }: { clientId?: string; clientSecret?: string; redirectUri?: string } = {}) {\n+ let id = clientId;\n+ let secret = clientSecret;\n+ let uri = redirectUri;\n+ if (!id || !secret || !uri) {\n+ id = getEnvironmentVariable(\"OUTLOOK_CLIENT_ID\");\n+ secret = getEnvironmentVariable(\"OUTLOOK_CLIENT_SECRET\");\n+ uri = getEnvironmentVariable(\"OUTLOOK_REDIRECT_URI\");\n+ }", + "comment_created_at": "2023-12-06T20:54:51+00:00", + "comment_author": "bracesproul", + "comment_body": "We shouldn't override all if one isn't passed, instead lets do something like this:\r\n```suggestion\r\n let id = clientId;\r\n let secret = clientSecret;\r\n let uri = redirectUri;\r\n if (!id || !secret || !uri) {\r\n id = id ?? getEnvironmentVariable(\"OUTLOOK_CLIENT_ID\");\r\n secret = secret ?? getEnvironmentVariable(\"OUTLOOK_CLIENT_SECRET\");\r\n uri = uri ?? getEnvironmentVariable(\"OUTLOOK_REDIRECT_URI\");\r\n }\r\n```", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1456404750", + "pr_number": 4044, + "pr_file": "libs/langchain-community/src/vectorstores/azure_aisearch.ts", + "created_at": "2024-01-17T19:56:38+00:00", + "commented_code": "import * as uuid from \"uuid\";\nimport {\n SearchClient,\n SearchIndexClient,\n AzureKeyCredential,\n IndexingResult,\n SearchIndex,\n} from \"@azure/search-documents\";\nimport {\n MaxMarginalRelevanceSearchOptions,\n VectorStore,\n} from \"@langchain/core/vectorstores\";\nimport type { EmbeddingsInterface } from \"@langchain/core/embeddings\";\nimport { Document } from \"@langchain/core/documents\";\nimport { maximalMarginalRelevance } from \"@langchain/core/utils/math\";\nimport { getEnvironmentVariable } from \"@langchain/core/utils/env\";\n\n/**\n * Azure AI Search query type.\n */\nexport const AzureAISearchQueryType = {\n /** Vector search. */\n Similarity: \"similarity\",\n /** Hybrid full text and vector search. */\n SimilarityHybrid: \"similarity_hybrid\",\n /** Hybrid full text and vector search with semantic ranking. */\n SemanticHybrid: \"semantic_hybrid\",\n} as const;\n\n/**\n * Azure AI Search query type.\n */\nexport type AzureAISearchQueryType =\n (typeof AzureAISearchQueryType)[keyof typeof AzureAISearchQueryType];\n\n/**\n * Azure AI Search settings.\n */\nexport interface AzureAISearchQueryOptions {\n readonly type: AzureAISearchQueryType;\n readonly semanticConfigurationName?: string;\n}\n\n/**\n * Configuration options for the `AzureAISearchStore` constructor.\n */\nexport interface AzureAISearchConfig {\n readonly client?: SearchClient;\n readonly indexName?: string;\n readonly endpoint?: string;\n readonly key?: string;\n readonly search: AzureAISearchQueryOptions;\n /**\n * The amount of documents to chunk by when adding vectors.\n * @default 100\n */\n readonly chunkSize?: number;\n /**\n * The amount of documents to embed at once when adding documents.\n * Note that some providers like Azure OpenAI can only embed 16 documents\n * at a time.\n * @default 16\n */\n readonly embeddingBatchSize?: number;\n}\n\n/**\n * Azure AI Search options metadata schema.\n * If yout want to add custom data, use the attributes property.\n */\nexport type AzureAISearchDocumentMetadata = {\n source: string;\n attributes?: Array<{ key: string; value: string }>;\n};\n\n/**\n * Azure AI Search indexed document.\n */\nexport type AzureAISearchDocument = {\n id: string;\n content: string;\n content_vector: number[];\n metadata: AzureAISearchDocumentMetadata;\n};\n\n/**\n * Azure AI Search options for adding documents.\n */\nexport type AzureAISearchAddDocumentsOptions = {\n ids?: string[];\n};\n\nconst DEFAULT_FIELD_ID = \"id\";\nconst DEFAULT_FIELD_CONTENT = \"content\";\nconst DEFAULT_FIELD_CONTENT_VECTOR = \"content_vector\";\nconst DEFAULT_FIELD_METADATA = \"metadata\";\nconst DEFAULT_FIELD_METADATA_SOURCE = \"source\";\nconst DEFAULT_FIELD_METADATA_ATTRS = \"attributes\";\n\n/**\n * Azure AI Search vector store.\n * To use this, you should have:\n * - the `@azure/search-documents` NPM package installed\n * - an endpoint and key to the Azure AI Search instance\n *\n * If you directly provide a `SearchClient` instance, you need to ensure that\n * an index has been created. When using and endpoint and key, the index will\n * be created automatically if it does not exist.\n */\nexport class AzureAISearchVectorStore extends VectorStore {\n declare FilterType: string;\n\n get lc_secrets(): { [key: string]: string } {\n return {\n endpoint: \"AZURE_AISEARCH_ENDPOINT\",\n key: \"AZURE_AISEARCH_KEY\",\n };\n }\n\n _vectorstoreType(): string {\n return \"azure_aisearch\";\n }\n\n private readonly initPromise: Promise;\n\n private readonly client: SearchClient;\n\n private readonly indexName: string;\n\n private readonly chunkSize: number;\n\n private readonly embeddingBatchSize: number;\n\n private readonly options: AzureAISearchQueryOptions;\n\n constructor(embeddings: EmbeddingsInterface, config: AzureAISearchConfig) {\n super(embeddings, config);\n\n const endpoint =\n config.endpoint ?? getEnvironmentVariable(\"AZURE_AISEARCH_ENDPOINT\");\n const key = config.key ?? getEnvironmentVariable(\"AZURE_AISEARCH_KEY\");\n\n if (!config.client && !endpoint && !key) {\n throw new Error(\n \"Azure AI Search client or connection string must be set.\"\n );\n }\n\n this.indexName = config.indexName ?? \"vectorsearch\";\n this.chunkSize = config.chunkSize ?? 100;\n this.embeddingBatchSize = config.embeddingBatchSize ?? 16;\n\n if (!config.client) {\n // eslint-disable-next-line @typescript-eslint/no-non-null-assertion\n const credential = new AzureKeyCredential(key!);\n // eslint-disable-next-line @typescript-eslint/no-non-null-assertion\n this.client = new SearchClient(endpoint!, this.indexName, credential);\n // eslint-disable-next-line @typescript-eslint/no-non-null-assertion\n const indexClient = new SearchIndexClient(endpoint!, credential);\n\n // Start initialization, but don't wait for it to finish here\n this.initPromise = this.ensureIndexExists(indexClient).catch((error) => {\n console.error(\n \"Error during Azure AI Search index initialization:\",\n error\n );\n });\n } else {\n this.client = config.client;\n }\n\n this.options = config.search;\n this.embeddings = embeddings;\n }\n\n /**\n * Removes specified documents from the AzureAISearchVectorStore using a filter.\n * @param filter OData filter to find documents to delete.\n * @returns A promise that resolves when the documents have been removed.\n */\n async deleteMany(filter: string): Promise {\n const { results } = await this.client.search(\"*\", {\n filter,\n });\n\n const ids: string[] = [];\n for await (const item of results) {\n ids.push(item.document.id);\n }\n\n const { results: deleteResults } = await this.client.deleteDocuments(\n DEFAULT_FIELD_ID,\n ids\n );\n return deleteResults;\n }\n\n /**\n * Removes specified documents from the AzureAISearchVectorStore.\n * @param ids IDs of the documents to be removed.\n * @returns A promise that resolves when the documents have been removed.\n */\n async deleteById(ids: string | string[]): Promise {\n await this.initPromise;\n const { results } = await this.client.deleteDocuments(\n DEFAULT_FIELD_ID,\n Array.isArray(ids) ? ids : [ids]\n );\n return results;\n }\n\n /**\n * Adds documents to the AzureAISearchVectorStore.\n * Documents are chunked into batches of size `embeddingBatchSize` then\n * embedded and added to the AzureAISearchVectorStore.\n * @param documents The documents to add.\n * @param options Options for adding documents.\n * @returns A promise that resolves to the ids of the added documents.\n */\n async addDocuments(\n documents: Document[],\n options?: AzureAISearchAddDocumentsOptions\n ) {\n const texts = documents.map(({ pageContent }) => pageContent);\n const results: string[] = [];\n\n for (let i = 0; i < texts.length; i += this.embeddingBatchSize) {\n const batch = texts.slice(i, i + this.embeddingBatchSize);\n const docsBatch = documents.slice(i, i + this.embeddingBatchSize);\n const batchEmbeddings: number[][] = await this.embeddings.embedDocuments(\n batch\n );\n const batchResult = await this.addVectors(\n batchEmbeddings,\n docsBatch,\n options\n );\n\n results.push(...batchResult);\n }\n\n return results;\n }\n\n /**\n * Adds vectors to the AzureAISearchVectorStore.\n * @param vectors Vectors to be added.\n * @param documents Corresponding documents to be added.\n * @param options Options for adding documents.\n * @returns A promise that resolves to the ids of the added documents.\n */\n async addVectors(\n vectors: number[][],\n documents: Document[],\n options?: AzureAISearchAddDocumentsOptions\n ): Promise {\n const ids = options?.ids ?? documents.map(() => uuid.v4());\n const entities: AzureAISearchDocument[] = documents.map((doc, idx) => ({\n id: ids[idx],\n content: doc.pageContent,\n content_vector: vectors[idx],\n metadata: {\n source: doc.metadata?.source,\n attributes: doc.metadata?.attributes ?? [],\n },\n }));\n\n await this.initPromise;\n for (let i = 0; i < entities.length; i += this.chunkSize) {\n const chunk = entities.slice(i, i + this.chunkSize);\n await this.client.uploadDocuments(chunk, { throwOnAnyFailure: true });\n }\n\n return ids;\n }\n\n /**\n * Performs a similarity search using query type specified in configuration.\n * @param query Query text for the similarity search.\n * @param k=4 Number of nearest neighbors to return.\n * @param filter Optional OData filter for the documents.\n * @returns Promise that resolves to a list of documents and their corresponding similarity scores.\n */\n async similaritySearch(\n query: string,\n k = 4,\n filter: this[\"FilterType\"] | undefined = undefined\n ): Promise {\n const results = await this.similaritySearchWithScore(query, k, filter);\n\n return results.map((result) => result[0]);\n }\n\n /**\n * Performs a similarity search using query type specified in configuration.\n * @param query Query text for the similarity search.\n * @param k=4 Number of nearest neighbors to return.\n * @param filter Optional OData filter for the documents.\n * @returns Promise that resolves to a list of documents and their corresponding similarity scores.\n */\n async similaritySearchWithScore(\n query: string,\n k = 4,\n filter: this[\"FilterType\"] | undefined = undefined\n ): Promise<[Document, number][]> {\n const searchType = this.options.type;\n\n if (searchType === AzureAISearchQueryType.Similarity) {\n return this.similaritySearchVectorWithScore(\n await this.embeddings.embedQuery(query),\n k,\n filter\n );\n } else if (searchType === AzureAISearchQueryType.SimilarityHybrid) {\n return this.hybridSearchVectorWithScore(\n query,\n await this.embeddings.embedQuery(query),\n k,\n filter\n );\n } else if (searchType === AzureAISearchQueryType.SemanticHybrid) {\n return this.semanticHybridSearchVectorWithScore(\n query,\n await this.embeddings.embedQuery(query),\n k,\n filter\n );\n }\n\n throw new Error(`Unrecognized search type '${searchType}'`);\n }\n\n /**\n * Performs a hybrid search using query text.\n * @param query Query text for the similarity search.\n * @param queryVector Query vector for the similarity search.\n * If not provided, the query text will be embedded.\n * @param k=4 Number of nearest neighbors to return.\n * @param filter Optional OData filter for the documents.\n * @returns Promise that resolves to a list of documents and their corresponding similarity scores.\n */\n async hybridSearchVectorWithScore(\n query: string,\n queryVector?: number[],\n k = 4,\n filter: string | undefined = undefined\n ): Promise<[Document, number][]> {\n const vector = queryVector ?? (await this.embeddings.embedQuery(query));\n\n await this.initPromise;\n const { results } = await this.client.search(query, {\n vectorSearchOptions: {\n queries: [\n {\n kind: \"vector\",\n vector,\n kNearestNeighborsCount: k,\n fields: [DEFAULT_FIELD_CONTENT_VECTOR],\n },\n ],\n },\n filter,\n top: k,\n });\n\n const docsWithScore: [Document, number][] = [];\n\n for await (const item of results) {\n const document = new Document<\n AzureAISearchDocumentMetadata & { embedding: number[] }\n >({\n pageContent: item.document[DEFAULT_FIELD_CONTENT],\n metadata: {\n ...item.document[DEFAULT_FIELD_METADATA],\n embedding: item.document[DEFAULT_FIELD_CONTENT_VECTOR],\n },\n });\n docsWithScore.push([document, item.score]);\n }\n\n return docsWithScore;\n }\n\n /**\n * Performs a hybrid search with semantic reranker using query text.\n * @param query Query text for the similarity search.\n * @param queryVector Query vector for the similarity search.\n * If not provided, the query text will be embedded.\n * @param k=4 Number of nearest neighbors to return.\n * @param filter Optional OData filter for the documents.\n * @returns Promise that resolves to a list of documents and their corresponding similarity scores.\n */\n async semanticHybridSearchVectorWithScore(\n query: string,\n queryVector?: number[],\n k = 4,\n filter: string | undefined = undefined\n ): Promise<[Document, number][]> {\n const vector = queryVector ?? (await this.embeddings.embedQuery(query));\n\n await this.initPromise;\n const { results } = await this.client.search(query, {\n vectorSearchOptions: {\n queries: [\n {\n kind: \"vector\",\n vector,\n kNearestNeighborsCount: k,\n fields: [DEFAULT_FIELD_CONTENT_VECTOR],\n },\n ],\n },\n filter,\n top: k,\n queryType: \"semantic\",\n semanticSearchOptions: {\n configurationName: \"semantic-search-config\",\n captions: {\n captionType: \"extractive\",\n },\n answers: {\n answerType: \"extractive\",\n },\n },\n });\n\n const docsWithScore: [Document, number][] = [];\n\n for await (const item of results) {\n const document = new Document<\n AzureAISearchDocumentMetadata & { embedding: number[] }\n >({\n pageContent: item.document[DEFAULT_FIELD_CONTENT],\n metadata: {\n ...item.document[DEFAULT_FIELD_METADATA],\n embedding: item.document[DEFAULT_FIELD_CONTENT_VECTOR],\n },\n });\n docsWithScore.push([document, item.score]);\n }\n\n return docsWithScore;\n }\n\n /**\n * Performs a similarity search on the vectors stored in the collection.\n * @param queryVector Query vector for the similarity search.\n * @param k=4 Number of nearest neighbors to return.\n * @param filter string OData filter for the documents.\n * @returns Promise that resolves to a list of documents and their corresponding similarity scores.\n */\n async similaritySearchVectorWithScore(\n query: number[],\n k: number,\n filter?: string\n ): Promise<[Document, number][]> {\n await this.initPromise;\n\n const { results } = await this.client.search(\"*\", {\n vectorSearchOptions: {\n queries: [\n {\n kind: \"vector\",\n vector: query,\n kNearestNeighborsCount: k,\n fields: [DEFAULT_FIELD_CONTENT_VECTOR],\n },\n ],\n },\n filter,\n });\n\n const docsWithScore: [Document, number][] = [];\n\n for await (const item of results) {\n const document = new Document<\n AzureAISearchDocumentMetadata & { embedding: number[] }\n >({\n pageContent: item.document[DEFAULT_FIELD_CONTENT],\n metadata: {\n ...item.document[DEFAULT_FIELD_METADATA],\n embedding: item.document[DEFAULT_FIELD_CONTENT_VECTOR],\n },\n });\n docsWithScore.push([document, item.score]);\n }\n\n return docsWithScore;\n }\n\n /**\n * Return documents selected using the maximal marginal relevance.\n * Maximal marginal relevance optimizes for similarity to the query AND\n * diversity among selected documents.\n * @param query Text to look up documents similar to.\n * @param options.k Number of documents to return.\n * @param options.fetchK=20 Number of documents to fetch before passing to\n * the MMR algorithm.\n * @param options.lambda=0.5 Number between 0 and 1 that determines the\n * degree of diversity among the results, where 0 corresponds to maximum\n * diversity and 1 to minimum diversity.\n * @returns List of documents selected by maximal marginal relevance.\n */\n async maxMarginalRelevanceSearch(\n query: string,\n options: MaxMarginalRelevanceSearchOptions\n ): Promise {\n const { k, fetchK = 20, lambda = 0.5 } = options;\n\n const queryEmbedding = await this.embeddings.embedQuery(query);\n const docs = await this.similaritySearchVectorWithScore(\n queryEmbedding,\n fetchK\n );\n const embeddingList = docs.map((doc) => doc[0].metadata.embedding);\n\n // Re-rank the results using MMR\n const mmrIndexes = maximalMarginalRelevance(\n queryEmbedding,\n embeddingList,\n lambda,\n k\n );\n\n const mmrDocs = mmrIndexes.map((index) => docs[index][0]);\n return mmrDocs;\n }\n\n /**\n * Ensures that an index exists on the AzureAISearchVectorStore.\n * @param indexClient The Azure AI Search index client.\n * @returns A promise that resolves when the AzureAISearchVectorStore index has been initialized.\n * @protected\n */\n protected async ensureIndexExists(\n indexClient: SearchIndexClient\n ): Promise {\n try {\n await indexClient.getIndex(this.indexName);\n } catch (e) {\n // Index does not exists, create it\n const searchIndex = this.createSearchIndexDefinition(this.indexName);\n await indexClient.createIndex(searchIndex);\n }\n }\n\n /**\n * Prepares the search index definition for Azure AI Search.\n * @param indexName The name of the index.\n * @returns The SearchIndex object.\n * @protected\n */\n protected createSearchIndexDefinition(indexName: string): SearchIndex {\n return {\n name: indexName,\n vectorSearch: {\n algorithms: [\n {\n name: \"vector-search-algorithm\",\n kind: \"hnsw\",\n parameters: {\n m: 4,\n efSearch: 500,\n metric: \"cosine\",\n efConstruction: 400,\n },\n },\n ],\n profiles: [\n {\n name: \"vector-search-profile\",\n algorithmConfigurationName: \"vector-search-algorithm\",\n },\n ],\n },\n semanticSearch: {\n defaultConfigurationName: \"semantic-search-config\",\n configurations: [\n {\n name: \"semantic-search-config\",\n prioritizedFields: {\n contentFields: [\n {\n name: DEFAULT_FIELD_CONTENT,\n },\n ],\n keywordsFields: [\n {\n name: DEFAULT_FIELD_CONTENT,\n },\n ],\n },\n },\n ],\n },\n fields: [\n {\n name: DEFAULT_FIELD_ID,\n filterable: true,\n key: true,\n type: \"Edm.String\",\n },\n {\n name: DEFAULT_FIELD_CONTENT,\n searchable: true,\n filterable: true,\n type: \"Edm.String\",\n },\n {\n name: DEFAULT_FIELD_CONTENT_VECTOR,\n searchable: true,\n type: \"Collection(Edm.Single)\",\n vectorSearchDimensions: 1536,", + "repo_full_name": "langchain-ai/langchainjs", + "discussion_comments": [ + { + "comment_id": "1456404750", + "repo_full_name": "langchain-ai/langchainjs", + "pr_number": 4044, + "pr_file": "libs/langchain-community/src/vectorstores/azure_aisearch.ts", + "discussion_id": "1456404750", + "commented_code": "@@ -0,0 +1,694 @@\n+import * as uuid from \"uuid\";\n+import {\n+ SearchClient,\n+ SearchIndexClient,\n+ AzureKeyCredential,\n+ IndexingResult,\n+ SearchIndex,\n+} from \"@azure/search-documents\";\n+import {\n+ MaxMarginalRelevanceSearchOptions,\n+ VectorStore,\n+} from \"@langchain/core/vectorstores\";\n+import type { EmbeddingsInterface } from \"@langchain/core/embeddings\";\n+import { Document } from \"@langchain/core/documents\";\n+import { maximalMarginalRelevance } from \"@langchain/core/utils/math\";\n+import { getEnvironmentVariable } from \"@langchain/core/utils/env\";\n+\n+/**\n+ * Azure AI Search query type.\n+ */\n+export const AzureAISearchQueryType = {\n+ /** Vector search. */\n+ Similarity: \"similarity\",\n+ /** Hybrid full text and vector search. */\n+ SimilarityHybrid: \"similarity_hybrid\",\n+ /** Hybrid full text and vector search with semantic ranking. */\n+ SemanticHybrid: \"semantic_hybrid\",\n+} as const;\n+\n+/**\n+ * Azure AI Search query type.\n+ */\n+export type AzureAISearchQueryType =\n+ (typeof AzureAISearchQueryType)[keyof typeof AzureAISearchQueryType];\n+\n+/**\n+ * Azure AI Search settings.\n+ */\n+export interface AzureAISearchQueryOptions {\n+ readonly type: AzureAISearchQueryType;\n+ readonly semanticConfigurationName?: string;\n+}\n+\n+/**\n+ * Configuration options for the `AzureAISearchStore` constructor.\n+ */\n+export interface AzureAISearchConfig {\n+ readonly client?: SearchClient;\n+ readonly indexName?: string;\n+ readonly endpoint?: string;\n+ readonly key?: string;\n+ readonly search: AzureAISearchQueryOptions;\n+ /**\n+ * The amount of documents to chunk by when adding vectors.\n+ * @default 100\n+ */\n+ readonly chunkSize?: number;\n+ /**\n+ * The amount of documents to embed at once when adding documents.\n+ * Note that some providers like Azure OpenAI can only embed 16 documents\n+ * at a time.\n+ * @default 16\n+ */\n+ readonly embeddingBatchSize?: number;\n+}\n+\n+/**\n+ * Azure AI Search options metadata schema.\n+ * If yout want to add custom data, use the attributes property.\n+ */\n+export type AzureAISearchDocumentMetadata = {\n+ source: string;\n+ attributes?: Array<{ key: string; value: string }>;\n+};\n+\n+/**\n+ * Azure AI Search indexed document.\n+ */\n+export type AzureAISearchDocument = {\n+ id: string;\n+ content: string;\n+ content_vector: number[];\n+ metadata: AzureAISearchDocumentMetadata;\n+};\n+\n+/**\n+ * Azure AI Search options for adding documents.\n+ */\n+export type AzureAISearchAddDocumentsOptions = {\n+ ids?: string[];\n+};\n+\n+const DEFAULT_FIELD_ID = \"id\";\n+const DEFAULT_FIELD_CONTENT = \"content\";\n+const DEFAULT_FIELD_CONTENT_VECTOR = \"content_vector\";\n+const DEFAULT_FIELD_METADATA = \"metadata\";\n+const DEFAULT_FIELD_METADATA_SOURCE = \"source\";\n+const DEFAULT_FIELD_METADATA_ATTRS = \"attributes\";\n+\n+/**\n+ * Azure AI Search vector store.\n+ * To use this, you should have:\n+ * - the `@azure/search-documents` NPM package installed\n+ * - an endpoint and key to the Azure AI Search instance\n+ *\n+ * If you directly provide a `SearchClient` instance, you need to ensure that\n+ * an index has been created. When using and endpoint and key, the index will\n+ * be created automatically if it does not exist.\n+ */\n+export class AzureAISearchVectorStore extends VectorStore {\n+ declare FilterType: string;\n+\n+ get lc_secrets(): { [key: string]: string } {\n+ return {\n+ endpoint: \"AZURE_AISEARCH_ENDPOINT\",\n+ key: \"AZURE_AISEARCH_KEY\",\n+ };\n+ }\n+\n+ _vectorstoreType(): string {\n+ return \"azure_aisearch\";\n+ }\n+\n+ private readonly initPromise: Promise;\n+\n+ private readonly client: SearchClient;\n+\n+ private readonly indexName: string;\n+\n+ private readonly chunkSize: number;\n+\n+ private readonly embeddingBatchSize: number;\n+\n+ private readonly options: AzureAISearchQueryOptions;\n+\n+ constructor(embeddings: EmbeddingsInterface, config: AzureAISearchConfig) {\n+ super(embeddings, config);\n+\n+ const endpoint =\n+ config.endpoint ?? getEnvironmentVariable(\"AZURE_AISEARCH_ENDPOINT\");\n+ const key = config.key ?? getEnvironmentVariable(\"AZURE_AISEARCH_KEY\");\n+\n+ if (!config.client && !endpoint && !key) {\n+ throw new Error(\n+ \"Azure AI Search client or connection string must be set.\"\n+ );\n+ }\n+\n+ this.indexName = config.indexName ?? \"vectorsearch\";\n+ this.chunkSize = config.chunkSize ?? 100;\n+ this.embeddingBatchSize = config.embeddingBatchSize ?? 16;\n+\n+ if (!config.client) {\n+ // eslint-disable-next-line @typescript-eslint/no-non-null-assertion\n+ const credential = new AzureKeyCredential(key!);\n+ // eslint-disable-next-line @typescript-eslint/no-non-null-assertion\n+ this.client = new SearchClient(endpoint!, this.indexName, credential);\n+ // eslint-disable-next-line @typescript-eslint/no-non-null-assertion\n+ const indexClient = new SearchIndexClient(endpoint!, credential);\n+\n+ // Start initialization, but don't wait for it to finish here\n+ this.initPromise = this.ensureIndexExists(indexClient).catch((error) => {\n+ console.error(\n+ \"Error during Azure AI Search index initialization:\",\n+ error\n+ );\n+ });\n+ } else {\n+ this.client = config.client;\n+ }\n+\n+ this.options = config.search;\n+ this.embeddings = embeddings;\n+ }\n+\n+ /**\n+ * Removes specified documents from the AzureAISearchVectorStore using a filter.\n+ * @param filter OData filter to find documents to delete.\n+ * @returns A promise that resolves when the documents have been removed.\n+ */\n+ async deleteMany(filter: string): Promise {\n+ const { results } = await this.client.search(\"*\", {\n+ filter,\n+ });\n+\n+ const ids: string[] = [];\n+ for await (const item of results) {\n+ ids.push(item.document.id);\n+ }\n+\n+ const { results: deleteResults } = await this.client.deleteDocuments(\n+ DEFAULT_FIELD_ID,\n+ ids\n+ );\n+ return deleteResults;\n+ }\n+\n+ /**\n+ * Removes specified documents from the AzureAISearchVectorStore.\n+ * @param ids IDs of the documents to be removed.\n+ * @returns A promise that resolves when the documents have been removed.\n+ */\n+ async deleteById(ids: string | string[]): Promise {\n+ await this.initPromise;\n+ const { results } = await this.client.deleteDocuments(\n+ DEFAULT_FIELD_ID,\n+ Array.isArray(ids) ? ids : [ids]\n+ );\n+ return results;\n+ }\n+\n+ /**\n+ * Adds documents to the AzureAISearchVectorStore.\n+ * Documents are chunked into batches of size `embeddingBatchSize` then\n+ * embedded and added to the AzureAISearchVectorStore.\n+ * @param documents The documents to add.\n+ * @param options Options for adding documents.\n+ * @returns A promise that resolves to the ids of the added documents.\n+ */\n+ async addDocuments(\n+ documents: Document[],\n+ options?: AzureAISearchAddDocumentsOptions\n+ ) {\n+ const texts = documents.map(({ pageContent }) => pageContent);\n+ const results: string[] = [];\n+\n+ for (let i = 0; i < texts.length; i += this.embeddingBatchSize) {\n+ const batch = texts.slice(i, i + this.embeddingBatchSize);\n+ const docsBatch = documents.slice(i, i + this.embeddingBatchSize);\n+ const batchEmbeddings: number[][] = await this.embeddings.embedDocuments(\n+ batch\n+ );\n+ const batchResult = await this.addVectors(\n+ batchEmbeddings,\n+ docsBatch,\n+ options\n+ );\n+\n+ results.push(...batchResult);\n+ }\n+\n+ return results;\n+ }\n+\n+ /**\n+ * Adds vectors to the AzureAISearchVectorStore.\n+ * @param vectors Vectors to be added.\n+ * @param documents Corresponding documents to be added.\n+ * @param options Options for adding documents.\n+ * @returns A promise that resolves to the ids of the added documents.\n+ */\n+ async addVectors(\n+ vectors: number[][],\n+ documents: Document[],\n+ options?: AzureAISearchAddDocumentsOptions\n+ ): Promise {\n+ const ids = options?.ids ?? documents.map(() => uuid.v4());\n+ const entities: AzureAISearchDocument[] = documents.map((doc, idx) => ({\n+ id: ids[idx],\n+ content: doc.pageContent,\n+ content_vector: vectors[idx],\n+ metadata: {\n+ source: doc.metadata?.source,\n+ attributes: doc.metadata?.attributes ?? [],\n+ },\n+ }));\n+\n+ await this.initPromise;\n+ for (let i = 0; i < entities.length; i += this.chunkSize) {\n+ const chunk = entities.slice(i, i + this.chunkSize);\n+ await this.client.uploadDocuments(chunk, { throwOnAnyFailure: true });\n+ }\n+\n+ return ids;\n+ }\n+\n+ /**\n+ * Performs a similarity search using query type specified in configuration.\n+ * @param query Query text for the similarity search.\n+ * @param k=4 Number of nearest neighbors to return.\n+ * @param filter Optional OData filter for the documents.\n+ * @returns Promise that resolves to a list of documents and their corresponding similarity scores.\n+ */\n+ async similaritySearch(\n+ query: string,\n+ k = 4,\n+ filter: this[\"FilterType\"] | undefined = undefined\n+ ): Promise {\n+ const results = await this.similaritySearchWithScore(query, k, filter);\n+\n+ return results.map((result) => result[0]);\n+ }\n+\n+ /**\n+ * Performs a similarity search using query type specified in configuration.\n+ * @param query Query text for the similarity search.\n+ * @param k=4 Number of nearest neighbors to return.\n+ * @param filter Optional OData filter for the documents.\n+ * @returns Promise that resolves to a list of documents and their corresponding similarity scores.\n+ */\n+ async similaritySearchWithScore(\n+ query: string,\n+ k = 4,\n+ filter: this[\"FilterType\"] | undefined = undefined\n+ ): Promise<[Document, number][]> {\n+ const searchType = this.options.type;\n+\n+ if (searchType === AzureAISearchQueryType.Similarity) {\n+ return this.similaritySearchVectorWithScore(\n+ await this.embeddings.embedQuery(query),\n+ k,\n+ filter\n+ );\n+ } else if (searchType === AzureAISearchQueryType.SimilarityHybrid) {\n+ return this.hybridSearchVectorWithScore(\n+ query,\n+ await this.embeddings.embedQuery(query),\n+ k,\n+ filter\n+ );\n+ } else if (searchType === AzureAISearchQueryType.SemanticHybrid) {\n+ return this.semanticHybridSearchVectorWithScore(\n+ query,\n+ await this.embeddings.embedQuery(query),\n+ k,\n+ filter\n+ );\n+ }\n+\n+ throw new Error(`Unrecognized search type '${searchType}'`);\n+ }\n+\n+ /**\n+ * Performs a hybrid search using query text.\n+ * @param query Query text for the similarity search.\n+ * @param queryVector Query vector for the similarity search.\n+ * If not provided, the query text will be embedded.\n+ * @param k=4 Number of nearest neighbors to return.\n+ * @param filter Optional OData filter for the documents.\n+ * @returns Promise that resolves to a list of documents and their corresponding similarity scores.\n+ */\n+ async hybridSearchVectorWithScore(\n+ query: string,\n+ queryVector?: number[],\n+ k = 4,\n+ filter: string | undefined = undefined\n+ ): Promise<[Document, number][]> {\n+ const vector = queryVector ?? (await this.embeddings.embedQuery(query));\n+\n+ await this.initPromise;\n+ const { results } = await this.client.search(query, {\n+ vectorSearchOptions: {\n+ queries: [\n+ {\n+ kind: \"vector\",\n+ vector,\n+ kNearestNeighborsCount: k,\n+ fields: [DEFAULT_FIELD_CONTENT_VECTOR],\n+ },\n+ ],\n+ },\n+ filter,\n+ top: k,\n+ });\n+\n+ const docsWithScore: [Document, number][] = [];\n+\n+ for await (const item of results) {\n+ const document = new Document<\n+ AzureAISearchDocumentMetadata & { embedding: number[] }\n+ >({\n+ pageContent: item.document[DEFAULT_FIELD_CONTENT],\n+ metadata: {\n+ ...item.document[DEFAULT_FIELD_METADATA],\n+ embedding: item.document[DEFAULT_FIELD_CONTENT_VECTOR],\n+ },\n+ });\n+ docsWithScore.push([document, item.score]);\n+ }\n+\n+ return docsWithScore;\n+ }\n+\n+ /**\n+ * Performs a hybrid search with semantic reranker using query text.\n+ * @param query Query text for the similarity search.\n+ * @param queryVector Query vector for the similarity search.\n+ * If not provided, the query text will be embedded.\n+ * @param k=4 Number of nearest neighbors to return.\n+ * @param filter Optional OData filter for the documents.\n+ * @returns Promise that resolves to a list of documents and their corresponding similarity scores.\n+ */\n+ async semanticHybridSearchVectorWithScore(\n+ query: string,\n+ queryVector?: number[],\n+ k = 4,\n+ filter: string | undefined = undefined\n+ ): Promise<[Document, number][]> {\n+ const vector = queryVector ?? (await this.embeddings.embedQuery(query));\n+\n+ await this.initPromise;\n+ const { results } = await this.client.search(query, {\n+ vectorSearchOptions: {\n+ queries: [\n+ {\n+ kind: \"vector\",\n+ vector,\n+ kNearestNeighborsCount: k,\n+ fields: [DEFAULT_FIELD_CONTENT_VECTOR],\n+ },\n+ ],\n+ },\n+ filter,\n+ top: k,\n+ queryType: \"semantic\",\n+ semanticSearchOptions: {\n+ configurationName: \"semantic-search-config\",\n+ captions: {\n+ captionType: \"extractive\",\n+ },\n+ answers: {\n+ answerType: \"extractive\",\n+ },\n+ },\n+ });\n+\n+ const docsWithScore: [Document, number][] = [];\n+\n+ for await (const item of results) {\n+ const document = new Document<\n+ AzureAISearchDocumentMetadata & { embedding: number[] }\n+ >({\n+ pageContent: item.document[DEFAULT_FIELD_CONTENT],\n+ metadata: {\n+ ...item.document[DEFAULT_FIELD_METADATA],\n+ embedding: item.document[DEFAULT_FIELD_CONTENT_VECTOR],\n+ },\n+ });\n+ docsWithScore.push([document, item.score]);\n+ }\n+\n+ return docsWithScore;\n+ }\n+\n+ /**\n+ * Performs a similarity search on the vectors stored in the collection.\n+ * @param queryVector Query vector for the similarity search.\n+ * @param k=4 Number of nearest neighbors to return.\n+ * @param filter string OData filter for the documents.\n+ * @returns Promise that resolves to a list of documents and their corresponding similarity scores.\n+ */\n+ async similaritySearchVectorWithScore(\n+ query: number[],\n+ k: number,\n+ filter?: string\n+ ): Promise<[Document, number][]> {\n+ await this.initPromise;\n+\n+ const { results } = await this.client.search(\"*\", {\n+ vectorSearchOptions: {\n+ queries: [\n+ {\n+ kind: \"vector\",\n+ vector: query,\n+ kNearestNeighborsCount: k,\n+ fields: [DEFAULT_FIELD_CONTENT_VECTOR],\n+ },\n+ ],\n+ },\n+ filter,\n+ });\n+\n+ const docsWithScore: [Document, number][] = [];\n+\n+ for await (const item of results) {\n+ const document = new Document<\n+ AzureAISearchDocumentMetadata & { embedding: number[] }\n+ >({\n+ pageContent: item.document[DEFAULT_FIELD_CONTENT],\n+ metadata: {\n+ ...item.document[DEFAULT_FIELD_METADATA],\n+ embedding: item.document[DEFAULT_FIELD_CONTENT_VECTOR],\n+ },\n+ });\n+ docsWithScore.push([document, item.score]);\n+ }\n+\n+ return docsWithScore;\n+ }\n+\n+ /**\n+ * Return documents selected using the maximal marginal relevance.\n+ * Maximal marginal relevance optimizes for similarity to the query AND\n+ * diversity among selected documents.\n+ * @param query Text to look up documents similar to.\n+ * @param options.k Number of documents to return.\n+ * @param options.fetchK=20 Number of documents to fetch before passing to\n+ * the MMR algorithm.\n+ * @param options.lambda=0.5 Number between 0 and 1 that determines the\n+ * degree of diversity among the results, where 0 corresponds to maximum\n+ * diversity and 1 to minimum diversity.\n+ * @returns List of documents selected by maximal marginal relevance.\n+ */\n+ async maxMarginalRelevanceSearch(\n+ query: string,\n+ options: MaxMarginalRelevanceSearchOptions\n+ ): Promise {\n+ const { k, fetchK = 20, lambda = 0.5 } = options;\n+\n+ const queryEmbedding = await this.embeddings.embedQuery(query);\n+ const docs = await this.similaritySearchVectorWithScore(\n+ queryEmbedding,\n+ fetchK\n+ );\n+ const embeddingList = docs.map((doc) => doc[0].metadata.embedding);\n+\n+ // Re-rank the results using MMR\n+ const mmrIndexes = maximalMarginalRelevance(\n+ queryEmbedding,\n+ embeddingList,\n+ lambda,\n+ k\n+ );\n+\n+ const mmrDocs = mmrIndexes.map((index) => docs[index][0]);\n+ return mmrDocs;\n+ }\n+\n+ /**\n+ * Ensures that an index exists on the AzureAISearchVectorStore.\n+ * @param indexClient The Azure AI Search index client.\n+ * @returns A promise that resolves when the AzureAISearchVectorStore index has been initialized.\n+ * @protected\n+ */\n+ protected async ensureIndexExists(\n+ indexClient: SearchIndexClient\n+ ): Promise {\n+ try {\n+ await indexClient.getIndex(this.indexName);\n+ } catch (e) {\n+ // Index does not exists, create it\n+ const searchIndex = this.createSearchIndexDefinition(this.indexName);\n+ await indexClient.createIndex(searchIndex);\n+ }\n+ }\n+\n+ /**\n+ * Prepares the search index definition for Azure AI Search.\n+ * @param indexName The name of the index.\n+ * @returns The SearchIndex object.\n+ * @protected\n+ */\n+ protected createSearchIndexDefinition(indexName: string): SearchIndex {\n+ return {\n+ name: indexName,\n+ vectorSearch: {\n+ algorithms: [\n+ {\n+ name: \"vector-search-algorithm\",\n+ kind: \"hnsw\",\n+ parameters: {\n+ m: 4,\n+ efSearch: 500,\n+ metric: \"cosine\",\n+ efConstruction: 400,\n+ },\n+ },\n+ ],\n+ profiles: [\n+ {\n+ name: \"vector-search-profile\",\n+ algorithmConfigurationName: \"vector-search-algorithm\",\n+ },\n+ ],\n+ },\n+ semanticSearch: {\n+ defaultConfigurationName: \"semantic-search-config\",\n+ configurations: [\n+ {\n+ name: \"semantic-search-config\",\n+ prioritizedFields: {\n+ contentFields: [\n+ {\n+ name: DEFAULT_FIELD_CONTENT,\n+ },\n+ ],\n+ keywordsFields: [\n+ {\n+ name: DEFAULT_FIELD_CONTENT,\n+ },\n+ ],\n+ },\n+ },\n+ ],\n+ },\n+ fields: [\n+ {\n+ name: DEFAULT_FIELD_ID,\n+ filterable: true,\n+ key: true,\n+ type: \"Edm.String\",\n+ },\n+ {\n+ name: DEFAULT_FIELD_CONTENT,\n+ searchable: true,\n+ filterable: true,\n+ type: \"Edm.String\",\n+ },\n+ {\n+ name: DEFAULT_FIELD_CONTENT_VECTOR,\n+ searchable: true,\n+ type: \"Collection(Edm.Single)\",\n+ vectorSearchDimensions: 1536,", + "comment_created_at": "2024-01-17T19:56:38+00:00", + "comment_author": "pablocastro", + "comment_body": "Dimensions hardcoded to ada-002's size here, but I'm sure there are many cases where users will use other embedding models. Shouldn't this come from config or by sampling a vector of the ones being used for embeddings?", + "pr_file_module": null + }, + { + "comment_id": "1457272275", + "repo_full_name": "langchain-ai/langchainjs", + "pr_number": 4044, + "pr_file": "libs/langchain-community/src/vectorstores/azure_aisearch.ts", + "discussion_id": "1456404750", + "commented_code": "@@ -0,0 +1,694 @@\n+import * as uuid from \"uuid\";\n+import {\n+ SearchClient,\n+ SearchIndexClient,\n+ AzureKeyCredential,\n+ IndexingResult,\n+ SearchIndex,\n+} from \"@azure/search-documents\";\n+import {\n+ MaxMarginalRelevanceSearchOptions,\n+ VectorStore,\n+} from \"@langchain/core/vectorstores\";\n+import type { EmbeddingsInterface } from \"@langchain/core/embeddings\";\n+import { Document } from \"@langchain/core/documents\";\n+import { maximalMarginalRelevance } from \"@langchain/core/utils/math\";\n+import { getEnvironmentVariable } from \"@langchain/core/utils/env\";\n+\n+/**\n+ * Azure AI Search query type.\n+ */\n+export const AzureAISearchQueryType = {\n+ /** Vector search. */\n+ Similarity: \"similarity\",\n+ /** Hybrid full text and vector search. */\n+ SimilarityHybrid: \"similarity_hybrid\",\n+ /** Hybrid full text and vector search with semantic ranking. */\n+ SemanticHybrid: \"semantic_hybrid\",\n+} as const;\n+\n+/**\n+ * Azure AI Search query type.\n+ */\n+export type AzureAISearchQueryType =\n+ (typeof AzureAISearchQueryType)[keyof typeof AzureAISearchQueryType];\n+\n+/**\n+ * Azure AI Search settings.\n+ */\n+export interface AzureAISearchQueryOptions {\n+ readonly type: AzureAISearchQueryType;\n+ readonly semanticConfigurationName?: string;\n+}\n+\n+/**\n+ * Configuration options for the `AzureAISearchStore` constructor.\n+ */\n+export interface AzureAISearchConfig {\n+ readonly client?: SearchClient;\n+ readonly indexName?: string;\n+ readonly endpoint?: string;\n+ readonly key?: string;\n+ readonly search: AzureAISearchQueryOptions;\n+ /**\n+ * The amount of documents to chunk by when adding vectors.\n+ * @default 100\n+ */\n+ readonly chunkSize?: number;\n+ /**\n+ * The amount of documents to embed at once when adding documents.\n+ * Note that some providers like Azure OpenAI can only embed 16 documents\n+ * at a time.\n+ * @default 16\n+ */\n+ readonly embeddingBatchSize?: number;\n+}\n+\n+/**\n+ * Azure AI Search options metadata schema.\n+ * If yout want to add custom data, use the attributes property.\n+ */\n+export type AzureAISearchDocumentMetadata = {\n+ source: string;\n+ attributes?: Array<{ key: string; value: string }>;\n+};\n+\n+/**\n+ * Azure AI Search indexed document.\n+ */\n+export type AzureAISearchDocument = {\n+ id: string;\n+ content: string;\n+ content_vector: number[];\n+ metadata: AzureAISearchDocumentMetadata;\n+};\n+\n+/**\n+ * Azure AI Search options for adding documents.\n+ */\n+export type AzureAISearchAddDocumentsOptions = {\n+ ids?: string[];\n+};\n+\n+const DEFAULT_FIELD_ID = \"id\";\n+const DEFAULT_FIELD_CONTENT = \"content\";\n+const DEFAULT_FIELD_CONTENT_VECTOR = \"content_vector\";\n+const DEFAULT_FIELD_METADATA = \"metadata\";\n+const DEFAULT_FIELD_METADATA_SOURCE = \"source\";\n+const DEFAULT_FIELD_METADATA_ATTRS = \"attributes\";\n+\n+/**\n+ * Azure AI Search vector store.\n+ * To use this, you should have:\n+ * - the `@azure/search-documents` NPM package installed\n+ * - an endpoint and key to the Azure AI Search instance\n+ *\n+ * If you directly provide a `SearchClient` instance, you need to ensure that\n+ * an index has been created. When using and endpoint and key, the index will\n+ * be created automatically if it does not exist.\n+ */\n+export class AzureAISearchVectorStore extends VectorStore {\n+ declare FilterType: string;\n+\n+ get lc_secrets(): { [key: string]: string } {\n+ return {\n+ endpoint: \"AZURE_AISEARCH_ENDPOINT\",\n+ key: \"AZURE_AISEARCH_KEY\",\n+ };\n+ }\n+\n+ _vectorstoreType(): string {\n+ return \"azure_aisearch\";\n+ }\n+\n+ private readonly initPromise: Promise;\n+\n+ private readonly client: SearchClient;\n+\n+ private readonly indexName: string;\n+\n+ private readonly chunkSize: number;\n+\n+ private readonly embeddingBatchSize: number;\n+\n+ private readonly options: AzureAISearchQueryOptions;\n+\n+ constructor(embeddings: EmbeddingsInterface, config: AzureAISearchConfig) {\n+ super(embeddings, config);\n+\n+ const endpoint =\n+ config.endpoint ?? getEnvironmentVariable(\"AZURE_AISEARCH_ENDPOINT\");\n+ const key = config.key ?? getEnvironmentVariable(\"AZURE_AISEARCH_KEY\");\n+\n+ if (!config.client && !endpoint && !key) {\n+ throw new Error(\n+ \"Azure AI Search client or connection string must be set.\"\n+ );\n+ }\n+\n+ this.indexName = config.indexName ?? \"vectorsearch\";\n+ this.chunkSize = config.chunkSize ?? 100;\n+ this.embeddingBatchSize = config.embeddingBatchSize ?? 16;\n+\n+ if (!config.client) {\n+ // eslint-disable-next-line @typescript-eslint/no-non-null-assertion\n+ const credential = new AzureKeyCredential(key!);\n+ // eslint-disable-next-line @typescript-eslint/no-non-null-assertion\n+ this.client = new SearchClient(endpoint!, this.indexName, credential);\n+ // eslint-disable-next-line @typescript-eslint/no-non-null-assertion\n+ const indexClient = new SearchIndexClient(endpoint!, credential);\n+\n+ // Start initialization, but don't wait for it to finish here\n+ this.initPromise = this.ensureIndexExists(indexClient).catch((error) => {\n+ console.error(\n+ \"Error during Azure AI Search index initialization:\",\n+ error\n+ );\n+ });\n+ } else {\n+ this.client = config.client;\n+ }\n+\n+ this.options = config.search;\n+ this.embeddings = embeddings;\n+ }\n+\n+ /**\n+ * Removes specified documents from the AzureAISearchVectorStore using a filter.\n+ * @param filter OData filter to find documents to delete.\n+ * @returns A promise that resolves when the documents have been removed.\n+ */\n+ async deleteMany(filter: string): Promise {\n+ const { results } = await this.client.search(\"*\", {\n+ filter,\n+ });\n+\n+ const ids: string[] = [];\n+ for await (const item of results) {\n+ ids.push(item.document.id);\n+ }\n+\n+ const { results: deleteResults } = await this.client.deleteDocuments(\n+ DEFAULT_FIELD_ID,\n+ ids\n+ );\n+ return deleteResults;\n+ }\n+\n+ /**\n+ * Removes specified documents from the AzureAISearchVectorStore.\n+ * @param ids IDs of the documents to be removed.\n+ * @returns A promise that resolves when the documents have been removed.\n+ */\n+ async deleteById(ids: string | string[]): Promise {\n+ await this.initPromise;\n+ const { results } = await this.client.deleteDocuments(\n+ DEFAULT_FIELD_ID,\n+ Array.isArray(ids) ? ids : [ids]\n+ );\n+ return results;\n+ }\n+\n+ /**\n+ * Adds documents to the AzureAISearchVectorStore.\n+ * Documents are chunked into batches of size `embeddingBatchSize` then\n+ * embedded and added to the AzureAISearchVectorStore.\n+ * @param documents The documents to add.\n+ * @param options Options for adding documents.\n+ * @returns A promise that resolves to the ids of the added documents.\n+ */\n+ async addDocuments(\n+ documents: Document[],\n+ options?: AzureAISearchAddDocumentsOptions\n+ ) {\n+ const texts = documents.map(({ pageContent }) => pageContent);\n+ const results: string[] = [];\n+\n+ for (let i = 0; i < texts.length; i += this.embeddingBatchSize) {\n+ const batch = texts.slice(i, i + this.embeddingBatchSize);\n+ const docsBatch = documents.slice(i, i + this.embeddingBatchSize);\n+ const batchEmbeddings: number[][] = await this.embeddings.embedDocuments(\n+ batch\n+ );\n+ const batchResult = await this.addVectors(\n+ batchEmbeddings,\n+ docsBatch,\n+ options\n+ );\n+\n+ results.push(...batchResult);\n+ }\n+\n+ return results;\n+ }\n+\n+ /**\n+ * Adds vectors to the AzureAISearchVectorStore.\n+ * @param vectors Vectors to be added.\n+ * @param documents Corresponding documents to be added.\n+ * @param options Options for adding documents.\n+ * @returns A promise that resolves to the ids of the added documents.\n+ */\n+ async addVectors(\n+ vectors: number[][],\n+ documents: Document[],\n+ options?: AzureAISearchAddDocumentsOptions\n+ ): Promise {\n+ const ids = options?.ids ?? documents.map(() => uuid.v4());\n+ const entities: AzureAISearchDocument[] = documents.map((doc, idx) => ({\n+ id: ids[idx],\n+ content: doc.pageContent,\n+ content_vector: vectors[idx],\n+ metadata: {\n+ source: doc.metadata?.source,\n+ attributes: doc.metadata?.attributes ?? [],\n+ },\n+ }));\n+\n+ await this.initPromise;\n+ for (let i = 0; i < entities.length; i += this.chunkSize) {\n+ const chunk = entities.slice(i, i + this.chunkSize);\n+ await this.client.uploadDocuments(chunk, { throwOnAnyFailure: true });\n+ }\n+\n+ return ids;\n+ }\n+\n+ /**\n+ * Performs a similarity search using query type specified in configuration.\n+ * @param query Query text for the similarity search.\n+ * @param k=4 Number of nearest neighbors to return.\n+ * @param filter Optional OData filter for the documents.\n+ * @returns Promise that resolves to a list of documents and their corresponding similarity scores.\n+ */\n+ async similaritySearch(\n+ query: string,\n+ k = 4,\n+ filter: this[\"FilterType\"] | undefined = undefined\n+ ): Promise {\n+ const results = await this.similaritySearchWithScore(query, k, filter);\n+\n+ return results.map((result) => result[0]);\n+ }\n+\n+ /**\n+ * Performs a similarity search using query type specified in configuration.\n+ * @param query Query text for the similarity search.\n+ * @param k=4 Number of nearest neighbors to return.\n+ * @param filter Optional OData filter for the documents.\n+ * @returns Promise that resolves to a list of documents and their corresponding similarity scores.\n+ */\n+ async similaritySearchWithScore(\n+ query: string,\n+ k = 4,\n+ filter: this[\"FilterType\"] | undefined = undefined\n+ ): Promise<[Document, number][]> {\n+ const searchType = this.options.type;\n+\n+ if (searchType === AzureAISearchQueryType.Similarity) {\n+ return this.similaritySearchVectorWithScore(\n+ await this.embeddings.embedQuery(query),\n+ k,\n+ filter\n+ );\n+ } else if (searchType === AzureAISearchQueryType.SimilarityHybrid) {\n+ return this.hybridSearchVectorWithScore(\n+ query,\n+ await this.embeddings.embedQuery(query),\n+ k,\n+ filter\n+ );\n+ } else if (searchType === AzureAISearchQueryType.SemanticHybrid) {\n+ return this.semanticHybridSearchVectorWithScore(\n+ query,\n+ await this.embeddings.embedQuery(query),\n+ k,\n+ filter\n+ );\n+ }\n+\n+ throw new Error(`Unrecognized search type '${searchType}'`);\n+ }\n+\n+ /**\n+ * Performs a hybrid search using query text.\n+ * @param query Query text for the similarity search.\n+ * @param queryVector Query vector for the similarity search.\n+ * If not provided, the query text will be embedded.\n+ * @param k=4 Number of nearest neighbors to return.\n+ * @param filter Optional OData filter for the documents.\n+ * @returns Promise that resolves to a list of documents and their corresponding similarity scores.\n+ */\n+ async hybridSearchVectorWithScore(\n+ query: string,\n+ queryVector?: number[],\n+ k = 4,\n+ filter: string | undefined = undefined\n+ ): Promise<[Document, number][]> {\n+ const vector = queryVector ?? (await this.embeddings.embedQuery(query));\n+\n+ await this.initPromise;\n+ const { results } = await this.client.search(query, {\n+ vectorSearchOptions: {\n+ queries: [\n+ {\n+ kind: \"vector\",\n+ vector,\n+ kNearestNeighborsCount: k,\n+ fields: [DEFAULT_FIELD_CONTENT_VECTOR],\n+ },\n+ ],\n+ },\n+ filter,\n+ top: k,\n+ });\n+\n+ const docsWithScore: [Document, number][] = [];\n+\n+ for await (const item of results) {\n+ const document = new Document<\n+ AzureAISearchDocumentMetadata & { embedding: number[] }\n+ >({\n+ pageContent: item.document[DEFAULT_FIELD_CONTENT],\n+ metadata: {\n+ ...item.document[DEFAULT_FIELD_METADATA],\n+ embedding: item.document[DEFAULT_FIELD_CONTENT_VECTOR],\n+ },\n+ });\n+ docsWithScore.push([document, item.score]);\n+ }\n+\n+ return docsWithScore;\n+ }\n+\n+ /**\n+ * Performs a hybrid search with semantic reranker using query text.\n+ * @param query Query text for the similarity search.\n+ * @param queryVector Query vector for the similarity search.\n+ * If not provided, the query text will be embedded.\n+ * @param k=4 Number of nearest neighbors to return.\n+ * @param filter Optional OData filter for the documents.\n+ * @returns Promise that resolves to a list of documents and their corresponding similarity scores.\n+ */\n+ async semanticHybridSearchVectorWithScore(\n+ query: string,\n+ queryVector?: number[],\n+ k = 4,\n+ filter: string | undefined = undefined\n+ ): Promise<[Document, number][]> {\n+ const vector = queryVector ?? (await this.embeddings.embedQuery(query));\n+\n+ await this.initPromise;\n+ const { results } = await this.client.search(query, {\n+ vectorSearchOptions: {\n+ queries: [\n+ {\n+ kind: \"vector\",\n+ vector,\n+ kNearestNeighborsCount: k,\n+ fields: [DEFAULT_FIELD_CONTENT_VECTOR],\n+ },\n+ ],\n+ },\n+ filter,\n+ top: k,\n+ queryType: \"semantic\",\n+ semanticSearchOptions: {\n+ configurationName: \"semantic-search-config\",\n+ captions: {\n+ captionType: \"extractive\",\n+ },\n+ answers: {\n+ answerType: \"extractive\",\n+ },\n+ },\n+ });\n+\n+ const docsWithScore: [Document, number][] = [];\n+\n+ for await (const item of results) {\n+ const document = new Document<\n+ AzureAISearchDocumentMetadata & { embedding: number[] }\n+ >({\n+ pageContent: item.document[DEFAULT_FIELD_CONTENT],\n+ metadata: {\n+ ...item.document[DEFAULT_FIELD_METADATA],\n+ embedding: item.document[DEFAULT_FIELD_CONTENT_VECTOR],\n+ },\n+ });\n+ docsWithScore.push([document, item.score]);\n+ }\n+\n+ return docsWithScore;\n+ }\n+\n+ /**\n+ * Performs a similarity search on the vectors stored in the collection.\n+ * @param queryVector Query vector for the similarity search.\n+ * @param k=4 Number of nearest neighbors to return.\n+ * @param filter string OData filter for the documents.\n+ * @returns Promise that resolves to a list of documents and their corresponding similarity scores.\n+ */\n+ async similaritySearchVectorWithScore(\n+ query: number[],\n+ k: number,\n+ filter?: string\n+ ): Promise<[Document, number][]> {\n+ await this.initPromise;\n+\n+ const { results } = await this.client.search(\"*\", {\n+ vectorSearchOptions: {\n+ queries: [\n+ {\n+ kind: \"vector\",\n+ vector: query,\n+ kNearestNeighborsCount: k,\n+ fields: [DEFAULT_FIELD_CONTENT_VECTOR],\n+ },\n+ ],\n+ },\n+ filter,\n+ });\n+\n+ const docsWithScore: [Document, number][] = [];\n+\n+ for await (const item of results) {\n+ const document = new Document<\n+ AzureAISearchDocumentMetadata & { embedding: number[] }\n+ >({\n+ pageContent: item.document[DEFAULT_FIELD_CONTENT],\n+ metadata: {\n+ ...item.document[DEFAULT_FIELD_METADATA],\n+ embedding: item.document[DEFAULT_FIELD_CONTENT_VECTOR],\n+ },\n+ });\n+ docsWithScore.push([document, item.score]);\n+ }\n+\n+ return docsWithScore;\n+ }\n+\n+ /**\n+ * Return documents selected using the maximal marginal relevance.\n+ * Maximal marginal relevance optimizes for similarity to the query AND\n+ * diversity among selected documents.\n+ * @param query Text to look up documents similar to.\n+ * @param options.k Number of documents to return.\n+ * @param options.fetchK=20 Number of documents to fetch before passing to\n+ * the MMR algorithm.\n+ * @param options.lambda=0.5 Number between 0 and 1 that determines the\n+ * degree of diversity among the results, where 0 corresponds to maximum\n+ * diversity and 1 to minimum diversity.\n+ * @returns List of documents selected by maximal marginal relevance.\n+ */\n+ async maxMarginalRelevanceSearch(\n+ query: string,\n+ options: MaxMarginalRelevanceSearchOptions\n+ ): Promise {\n+ const { k, fetchK = 20, lambda = 0.5 } = options;\n+\n+ const queryEmbedding = await this.embeddings.embedQuery(query);\n+ const docs = await this.similaritySearchVectorWithScore(\n+ queryEmbedding,\n+ fetchK\n+ );\n+ const embeddingList = docs.map((doc) => doc[0].metadata.embedding);\n+\n+ // Re-rank the results using MMR\n+ const mmrIndexes = maximalMarginalRelevance(\n+ queryEmbedding,\n+ embeddingList,\n+ lambda,\n+ k\n+ );\n+\n+ const mmrDocs = mmrIndexes.map((index) => docs[index][0]);\n+ return mmrDocs;\n+ }\n+\n+ /**\n+ * Ensures that an index exists on the AzureAISearchVectorStore.\n+ * @param indexClient The Azure AI Search index client.\n+ * @returns A promise that resolves when the AzureAISearchVectorStore index has been initialized.\n+ * @protected\n+ */\n+ protected async ensureIndexExists(\n+ indexClient: SearchIndexClient\n+ ): Promise {\n+ try {\n+ await indexClient.getIndex(this.indexName);\n+ } catch (e) {\n+ // Index does not exists, create it\n+ const searchIndex = this.createSearchIndexDefinition(this.indexName);\n+ await indexClient.createIndex(searchIndex);\n+ }\n+ }\n+\n+ /**\n+ * Prepares the search index definition for Azure AI Search.\n+ * @param indexName The name of the index.\n+ * @returns The SearchIndex object.\n+ * @protected\n+ */\n+ protected createSearchIndexDefinition(indexName: string): SearchIndex {\n+ return {\n+ name: indexName,\n+ vectorSearch: {\n+ algorithms: [\n+ {\n+ name: \"vector-search-algorithm\",\n+ kind: \"hnsw\",\n+ parameters: {\n+ m: 4,\n+ efSearch: 500,\n+ metric: \"cosine\",\n+ efConstruction: 400,\n+ },\n+ },\n+ ],\n+ profiles: [\n+ {\n+ name: \"vector-search-profile\",\n+ algorithmConfigurationName: \"vector-search-algorithm\",\n+ },\n+ ],\n+ },\n+ semanticSearch: {\n+ defaultConfigurationName: \"semantic-search-config\",\n+ configurations: [\n+ {\n+ name: \"semantic-search-config\",\n+ prioritizedFields: {\n+ contentFields: [\n+ {\n+ name: DEFAULT_FIELD_CONTENT,\n+ },\n+ ],\n+ keywordsFields: [\n+ {\n+ name: DEFAULT_FIELD_CONTENT,\n+ },\n+ ],\n+ },\n+ },\n+ ],\n+ },\n+ fields: [\n+ {\n+ name: DEFAULT_FIELD_ID,\n+ filterable: true,\n+ key: true,\n+ type: \"Edm.String\",\n+ },\n+ {\n+ name: DEFAULT_FIELD_CONTENT,\n+ searchable: true,\n+ filterable: true,\n+ type: \"Edm.String\",\n+ },\n+ {\n+ name: DEFAULT_FIELD_CONTENT_VECTOR,\n+ searchable: true,\n+ type: \"Collection(Edm.Single)\",\n+ vectorSearchDimensions: 1536,", + "comment_created_at": "2024-01-18T10:55:02+00:00", + "comment_author": "sinedied", + "comment_body": "Good catch. I'll check if we can get it from the LC embedding model directly or add it to the config", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1924384814", + "pr_number": 7561, + "pr_file": "libs/langchain-community/src/tools/tavily_search.ts", + "created_at": "2025-01-21T21:28:07+00:00", + "commented_code": "protected kwargs: Record = {};\n\n protected includeImages = false;\n\n protected includeImageDescriptions = false;\n\n protected includeAnswer = false;\n\n protected includeRawContent = false;\n\n protected includeDomains: string[] = [];\n\n protected excludeDomains: string[] = [];\n\n protected searchDepth: \"basic\" | \"deep\" = \"basic\";", + "repo_full_name": "langchain-ai/langchainjs", + "discussion_comments": [ + { + "comment_id": "1924384814", + "repo_full_name": "langchain-ai/langchainjs", + "pr_number": 7561, + "pr_file": "libs/langchain-community/src/tools/tavily_search.ts", + "discussion_id": "1924384814", + "commented_code": "@@ -93,11 +170,41 @@ export class TavilySearchResults extends Tool {\n \n protected kwargs: Record = {};\n \n+ protected includeImages = false;\n+\n+ protected includeImageDescriptions = false;\n+\n+ protected includeAnswer = false;\n+\n+ protected includeRawContent = false;\n+\n+ protected includeDomains: string[] = [];\n+\n+ protected excludeDomains: string[] = [];\n+\n+ protected searchDepth: \"basic\" | \"deep\" = \"basic\";", + "comment_created_at": "2025-01-21T21:28:07+00:00", + "comment_author": "jacoblee93", + "comment_body": "Let's avoid defaults at this level when possible", + "pr_file_module": null + }, + { + "comment_id": "1927795454", + "repo_full_name": "langchain-ai/langchainjs", + "pr_number": 7561, + "pr_file": "libs/langchain-community/src/tools/tavily_search.ts", + "discussion_id": "1924384814", + "commented_code": "@@ -93,11 +170,41 @@ export class TavilySearchResults extends Tool {\n \n protected kwargs: Record = {};\n \n+ protected includeImages = false;\n+\n+ protected includeImageDescriptions = false;\n+\n+ protected includeAnswer = false;\n+\n+ protected includeRawContent = false;\n+\n+ protected includeDomains: string[] = [];\n+\n+ protected excludeDomains: string[] = [];\n+\n+ protected searchDepth: \"basic\" | \"deep\" = \"basic\";", + "comment_created_at": "2025-01-23T22:57:27+00:00", + "comment_author": "hugoleborso", + "comment_body": "Hi @jacoblee93, I did this because these are actually the default settings of the API doc, and also because `maxResults` already had a default value set corresponding to the default value in the doc.\r\n\r\nIf you want me too I can still change it of course, but I think it makes sense as is.\r\n\r\nLet me know what you think!", + "pr_file_module": null + }, + { + "comment_id": "1929361191", + "repo_full_name": "langchain-ai/langchainjs", + "pr_number": 7561, + "pr_file": "libs/langchain-community/src/tools/tavily_search.ts", + "discussion_id": "1924384814", + "commented_code": "@@ -93,11 +170,41 @@ export class TavilySearchResults extends Tool {\n \n protected kwargs: Record = {};\n \n+ protected includeImages = false;\n+\n+ protected includeImageDescriptions = false;\n+\n+ protected includeAnswer = false;\n+\n+ protected includeRawContent = false;\n+\n+ protected includeDomains: string[] = [];\n+\n+ protected excludeDomains: string[] = [];\n+\n+ protected searchDepth: \"basic\" | \"deep\" = \"basic\";", + "comment_created_at": "2025-01-24T23:25:06+00:00", + "comment_author": "jacoblee93", + "comment_body": "We should ideally let the backend set this in case they change best practices, would prefer to have things unset if it won't cause issues (which it wasn't before)", + "pr_file_module": null + }, + { + "comment_id": "1931704972", + "repo_full_name": "langchain-ai/langchainjs", + "pr_number": 7561, + "pr_file": "libs/langchain-community/src/tools/tavily_search.ts", + "discussion_id": "1924384814", + "commented_code": "@@ -93,11 +170,41 @@ export class TavilySearchResults extends Tool {\n \n protected kwargs: Record = {};\n \n+ protected includeImages = false;\n+\n+ protected includeImageDescriptions = false;\n+\n+ protected includeAnswer = false;\n+\n+ protected includeRawContent = false;\n+\n+ protected includeDomains: string[] = [];\n+\n+ protected excludeDomains: string[] = [];\n+\n+ protected searchDepth: \"basic\" | \"deep\" = \"basic\";", + "comment_created_at": "2025-01-28T08:16:23+00:00", + "comment_author": "hugoleborso", + "comment_body": "@jacoblee93 done, let me know if this fit your needs, and thanks for all the quick responses !", + "pr_file_module": null + }, + { + "comment_id": "1934820570", + "repo_full_name": "langchain-ai/langchainjs", + "pr_number": 7561, + "pr_file": "libs/langchain-community/src/tools/tavily_search.ts", + "discussion_id": "1924384814", + "commented_code": "@@ -93,11 +170,41 @@ export class TavilySearchResults extends Tool {\n \n protected kwargs: Record = {};\n \n+ protected includeImages = false;\n+\n+ protected includeImageDescriptions = false;\n+\n+ protected includeAnswer = false;\n+\n+ protected includeRawContent = false;\n+\n+ protected includeDomains: string[] = [];\n+\n+ protected excludeDomains: string[] = [];\n+\n+ protected searchDepth: \"basic\" | \"deep\" = \"basic\";", + "comment_created_at": "2025-01-29T23:52:18+00:00", + "comment_author": "jacoblee93", + "comment_body": "Thank you, old defaults have started causing issues for OpenAI so it's top of mind right now", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1829838921", + "pr_number": 6999, + "pr_file": "libs/langchain-google-common/src/connection.ts", + "created_at": "2024-11-05T18:39:30+00:00", + "commented_code": "}\n }\n\n get computedLocation(): string {\n switch (this.apiName) {\n case \"google\":\n return super.computedLocation;\n case \"anthropic\":\n return \"us-east5\";", + "repo_full_name": "langchain-ai/langchainjs", + "discussion_comments": [ + { + "comment_id": "1829838921", + "repo_full_name": "langchain-ai/langchainjs", + "pr_number": 6999, + "pr_file": "libs/langchain-google-common/src/connection.ts", + "discussion_id": "1829838921", + "commented_code": "@@ -245,6 +288,19 @@ export abstract class GoogleAIConnection<\n }\n }\n \n+ get computedLocation(): string {\n+ switch (this.apiName) {\n+ case \"google\":\n+ return super.computedLocation;\n+ case \"anthropic\":\n+ return \"us-east5\";", + "comment_created_at": "2024-11-05T18:39:30+00:00", + "comment_author": "jacoblee93", + "comment_body": "Should we be hardcoding this here?\r\n\r\nIf it's only available in one region now, would prefer to have this configurable or even not have a default at all and just have it documented", + "pr_file_module": null + }, + { + "comment_id": "1829905294", + "repo_full_name": "langchain-ai/langchainjs", + "pr_number": 6999, + "pr_file": "libs/langchain-google-common/src/connection.ts", + "discussion_id": "1829838921", + "commented_code": "@@ -245,6 +288,19 @@ export abstract class GoogleAIConnection<\n }\n }\n \n+ get computedLocation(): string {\n+ switch (this.apiName) {\n+ case \"google\":\n+ return super.computedLocation;\n+ case \"anthropic\":\n+ return \"us-east5\";", + "comment_created_at": "2024-11-05T19:35:53+00:00", + "comment_author": "afirstenberg", + "comment_body": "See above about making magic values as unnecessary as possible.\r\n\r\nSome of the Anthropic models are available in other regions, but all are available in us-east5. And none, mysteriously, are available in us-central1.\r\n\r\nI'd like to keep it as a default, but clearly have it available to be settable.", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/langchainjs-avoid-hardcoded-configurations.md b/_reviewers/langchainjs-avoid-hardcoded-configurations.md index 16a4744..bee454d 100644 --- a/_reviewers/langchainjs-avoid-hardcoded-configurations.md +++ b/_reviewers/langchainjs-avoid-hardcoded-configurations.md @@ -51,167 +51,3 @@ if (this.apiUrl !== undefined) { } const app = new FirecrawlApp(params); ``` - - -[ - { - "discussion_id": "1417967601", - "pr_number": 3465, - "pr_file": "langchain/src/tools/outlook/authFlowREST.ts", - "created_at": "2023-12-06T20:54:51+00:00", - "commented_code": "import * as http from \"http\";\nimport * as url from \"url\";\nimport { AuthFlowBase } from \"./authFlowBase.js\";\nimport { getEnvironmentVariable } from \"../../util/env.js\";\n\ninterface AccessTokenResponse {\n access_token: string;\n refresh_token: string;\n}\n\n// this class uses your clientId, clientSecret and redirectUri\n// to get access token and refresh token, if you already have them,\n// you can use AuthFlowToken or AuthFlowRefresh instead\nexport class AuthFlowREST extends AuthFlowBase {\n private clientSecret: string;\n\n private redirectUri: string;\n\n private port: number;\n\n private pathname: string;\n\n private refreshToken = \"\";\n\n constructor({\n clientId,\n clientSecret,\n redirectUri,\n }: { clientId?: string; clientSecret?: string; redirectUri?: string } = {}) {\n let id = clientId;\n let secret = clientSecret;\n let uri = redirectUri;\n if (!id || !secret || !uri) {\n id = getEnvironmentVariable(\"OUTLOOK_CLIENT_ID\");\n secret = getEnvironmentVariable(\"OUTLOOK_CLIENT_SECRET\");\n uri = getEnvironmentVariable(\"OUTLOOK_REDIRECT_URI\");\n }", - "repo_full_name": "langchain-ai/langchainjs", - "discussion_comments": [ - { - "comment_id": "1417967601", - "repo_full_name": "langchain-ai/langchainjs", - "pr_number": 3465, - "pr_file": "langchain/src/tools/outlook/authFlowREST.ts", - "discussion_id": "1417967601", - "commented_code": "@@ -0,0 +1,205 @@\n+import * as http from \"http\";\n+import * as url from \"url\";\n+import { AuthFlowBase } from \"./authFlowBase.js\";\n+import { getEnvironmentVariable } from \"../../util/env.js\";\n+\n+interface AccessTokenResponse {\n+ access_token: string;\n+ refresh_token: string;\n+}\n+\n+// this class uses your clientId, clientSecret and redirectUri\n+// to get access token and refresh token, if you already have them,\n+// you can use AuthFlowToken or AuthFlowRefresh instead\n+export class AuthFlowREST extends AuthFlowBase {\n+ private clientSecret: string;\n+\n+ private redirectUri: string;\n+\n+ private port: number;\n+\n+ private pathname: string;\n+\n+ private refreshToken = \"\";\n+\n+ constructor({\n+ clientId,\n+ clientSecret,\n+ redirectUri,\n+ }: { clientId?: string; clientSecret?: string; redirectUri?: string } = {}) {\n+ let id = clientId;\n+ let secret = clientSecret;\n+ let uri = redirectUri;\n+ if (!id || !secret || !uri) {\n+ id = getEnvironmentVariable(\"OUTLOOK_CLIENT_ID\");\n+ secret = getEnvironmentVariable(\"OUTLOOK_CLIENT_SECRET\");\n+ uri = getEnvironmentVariable(\"OUTLOOK_REDIRECT_URI\");\n+ }", - "comment_created_at": "2023-12-06T20:54:51+00:00", - "comment_author": "bracesproul", - "comment_body": "We shouldn't override all if one isn't passed, instead lets do something like this:\r\n```suggestion\r\n let id = clientId;\r\n let secret = clientSecret;\r\n let uri = redirectUri;\r\n if (!id || !secret || !uri) {\r\n id = id ?? getEnvironmentVariable(\"OUTLOOK_CLIENT_ID\");\r\n secret = secret ?? getEnvironmentVariable(\"OUTLOOK_CLIENT_SECRET\");\r\n uri = uri ?? getEnvironmentVariable(\"OUTLOOK_REDIRECT_URI\");\r\n }\r\n```", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1456404750", - "pr_number": 4044, - "pr_file": "libs/langchain-community/src/vectorstores/azure_aisearch.ts", - "created_at": "2024-01-17T19:56:38+00:00", - "commented_code": "import * as uuid from \"uuid\";\nimport {\n SearchClient,\n SearchIndexClient,\n AzureKeyCredential,\n IndexingResult,\n SearchIndex,\n} from \"@azure/search-documents\";\nimport {\n MaxMarginalRelevanceSearchOptions,\n VectorStore,\n} from \"@langchain/core/vectorstores\";\nimport type { EmbeddingsInterface } from \"@langchain/core/embeddings\";\nimport { Document } from \"@langchain/core/documents\";\nimport { maximalMarginalRelevance } from \"@langchain/core/utils/math\";\nimport { getEnvironmentVariable } from \"@langchain/core/utils/env\";\n\n/**\n * Azure AI Search query type.\n */\nexport const AzureAISearchQueryType = {\n /** Vector search. */\n Similarity: \"similarity\",\n /** Hybrid full text and vector search. */\n SimilarityHybrid: \"similarity_hybrid\",\n /** Hybrid full text and vector search with semantic ranking. */\n SemanticHybrid: \"semantic_hybrid\",\n} as const;\n\n/**\n * Azure AI Search query type.\n */\nexport type AzureAISearchQueryType =\n (typeof AzureAISearchQueryType)[keyof typeof AzureAISearchQueryType];\n\n/**\n * Azure AI Search settings.\n */\nexport interface AzureAISearchQueryOptions {\n readonly type: AzureAISearchQueryType;\n readonly semanticConfigurationName?: string;\n}\n\n/**\n * Configuration options for the `AzureAISearchStore` constructor.\n */\nexport interface AzureAISearchConfig {\n readonly client?: SearchClient;\n readonly indexName?: string;\n readonly endpoint?: string;\n readonly key?: string;\n readonly search: AzureAISearchQueryOptions;\n /**\n * The amount of documents to chunk by when adding vectors.\n * @default 100\n */\n readonly chunkSize?: number;\n /**\n * The amount of documents to embed at once when adding documents.\n * Note that some providers like Azure OpenAI can only embed 16 documents\n * at a time.\n * @default 16\n */\n readonly embeddingBatchSize?: number;\n}\n\n/**\n * Azure AI Search options metadata schema.\n * If yout want to add custom data, use the attributes property.\n */\nexport type AzureAISearchDocumentMetadata = {\n source: string;\n attributes?: Array<{ key: string; value: string }>;\n};\n\n/**\n * Azure AI Search indexed document.\n */\nexport type AzureAISearchDocument = {\n id: string;\n content: string;\n content_vector: number[];\n metadata: AzureAISearchDocumentMetadata;\n};\n\n/**\n * Azure AI Search options for adding documents.\n */\nexport type AzureAISearchAddDocumentsOptions = {\n ids?: string[];\n};\n\nconst DEFAULT_FIELD_ID = \"id\";\nconst DEFAULT_FIELD_CONTENT = \"content\";\nconst DEFAULT_FIELD_CONTENT_VECTOR = \"content_vector\";\nconst DEFAULT_FIELD_METADATA = \"metadata\";\nconst DEFAULT_FIELD_METADATA_SOURCE = \"source\";\nconst DEFAULT_FIELD_METADATA_ATTRS = \"attributes\";\n\n/**\n * Azure AI Search vector store.\n * To use this, you should have:\n * - the `@azure/search-documents` NPM package installed\n * - an endpoint and key to the Azure AI Search instance\n *\n * If you directly provide a `SearchClient` instance, you need to ensure that\n * an index has been created. When using and endpoint and key, the index will\n * be created automatically if it does not exist.\n */\nexport class AzureAISearchVectorStore extends VectorStore {\n declare FilterType: string;\n\n get lc_secrets(): { [key: string]: string } {\n return {\n endpoint: \"AZURE_AISEARCH_ENDPOINT\",\n key: \"AZURE_AISEARCH_KEY\",\n };\n }\n\n _vectorstoreType(): string {\n return \"azure_aisearch\";\n }\n\n private readonly initPromise: Promise;\n\n private readonly client: SearchClient;\n\n private readonly indexName: string;\n\n private readonly chunkSize: number;\n\n private readonly embeddingBatchSize: number;\n\n private readonly options: AzureAISearchQueryOptions;\n\n constructor(embeddings: EmbeddingsInterface, config: AzureAISearchConfig) {\n super(embeddings, config);\n\n const endpoint =\n config.endpoint ?? getEnvironmentVariable(\"AZURE_AISEARCH_ENDPOINT\");\n const key = config.key ?? getEnvironmentVariable(\"AZURE_AISEARCH_KEY\");\n\n if (!config.client && !endpoint && !key) {\n throw new Error(\n \"Azure AI Search client or connection string must be set.\"\n );\n }\n\n this.indexName = config.indexName ?? \"vectorsearch\";\n this.chunkSize = config.chunkSize ?? 100;\n this.embeddingBatchSize = config.embeddingBatchSize ?? 16;\n\n if (!config.client) {\n // eslint-disable-next-line @typescript-eslint/no-non-null-assertion\n const credential = new AzureKeyCredential(key!);\n // eslint-disable-next-line @typescript-eslint/no-non-null-assertion\n this.client = new SearchClient(endpoint!, this.indexName, credential);\n // eslint-disable-next-line @typescript-eslint/no-non-null-assertion\n const indexClient = new SearchIndexClient(endpoint!, credential);\n\n // Start initialization, but don't wait for it to finish here\n this.initPromise = this.ensureIndexExists(indexClient).catch((error) => {\n console.error(\n \"Error during Azure AI Search index initialization:\",\n error\n );\n });\n } else {\n this.client = config.client;\n }\n\n this.options = config.search;\n this.embeddings = embeddings;\n }\n\n /**\n * Removes specified documents from the AzureAISearchVectorStore using a filter.\n * @param filter OData filter to find documents to delete.\n * @returns A promise that resolves when the documents have been removed.\n */\n async deleteMany(filter: string): Promise {\n const { results } = await this.client.search(\"*\", {\n filter,\n });\n\n const ids: string[] = [];\n for await (const item of results) {\n ids.push(item.document.id);\n }\n\n const { results: deleteResults } = await this.client.deleteDocuments(\n DEFAULT_FIELD_ID,\n ids\n );\n return deleteResults;\n }\n\n /**\n * Removes specified documents from the AzureAISearchVectorStore.\n * @param ids IDs of the documents to be removed.\n * @returns A promise that resolves when the documents have been removed.\n */\n async deleteById(ids: string | string[]): Promise {\n await this.initPromise;\n const { results } = await this.client.deleteDocuments(\n DEFAULT_FIELD_ID,\n Array.isArray(ids) ? ids : [ids]\n );\n return results;\n }\n\n /**\n * Adds documents to the AzureAISearchVectorStore.\n * Documents are chunked into batches of size `embeddingBatchSize` then\n * embedded and added to the AzureAISearchVectorStore.\n * @param documents The documents to add.\n * @param options Options for adding documents.\n * @returns A promise that resolves to the ids of the added documents.\n */\n async addDocuments(\n documents: Document[],\n options?: AzureAISearchAddDocumentsOptions\n ) {\n const texts = documents.map(({ pageContent }) => pageContent);\n const results: string[] = [];\n\n for (let i = 0; i < texts.length; i += this.embeddingBatchSize) {\n const batch = texts.slice(i, i + this.embeddingBatchSize);\n const docsBatch = documents.slice(i, i + this.embeddingBatchSize);\n const batchEmbeddings: number[][] = await this.embeddings.embedDocuments(\n batch\n );\n const batchResult = await this.addVectors(\n batchEmbeddings,\n docsBatch,\n options\n );\n\n results.push(...batchResult);\n }\n\n return results;\n }\n\n /**\n * Adds vectors to the AzureAISearchVectorStore.\n * @param vectors Vectors to be added.\n * @param documents Corresponding documents to be added.\n * @param options Options for adding documents.\n * @returns A promise that resolves to the ids of the added documents.\n */\n async addVectors(\n vectors: number[][],\n documents: Document[],\n options?: AzureAISearchAddDocumentsOptions\n ): Promise {\n const ids = options?.ids ?? documents.map(() => uuid.v4());\n const entities: AzureAISearchDocument[] = documents.map((doc, idx) => ({\n id: ids[idx],\n content: doc.pageContent,\n content_vector: vectors[idx],\n metadata: {\n source: doc.metadata?.source,\n attributes: doc.metadata?.attributes ?? [],\n },\n }));\n\n await this.initPromise;\n for (let i = 0; i < entities.length; i += this.chunkSize) {\n const chunk = entities.slice(i, i + this.chunkSize);\n await this.client.uploadDocuments(chunk, { throwOnAnyFailure: true });\n }\n\n return ids;\n }\n\n /**\n * Performs a similarity search using query type specified in configuration.\n * @param query Query text for the similarity search.\n * @param k=4 Number of nearest neighbors to return.\n * @param filter Optional OData filter for the documents.\n * @returns Promise that resolves to a list of documents and their corresponding similarity scores.\n */\n async similaritySearch(\n query: string,\n k = 4,\n filter: this[\"FilterType\"] | undefined = undefined\n ): Promise {\n const results = await this.similaritySearchWithScore(query, k, filter);\n\n return results.map((result) => result[0]);\n }\n\n /**\n * Performs a similarity search using query type specified in configuration.\n * @param query Query text for the similarity search.\n * @param k=4 Number of nearest neighbors to return.\n * @param filter Optional OData filter for the documents.\n * @returns Promise that resolves to a list of documents and their corresponding similarity scores.\n */\n async similaritySearchWithScore(\n query: string,\n k = 4,\n filter: this[\"FilterType\"] | undefined = undefined\n ): Promise<[Document, number][]> {\n const searchType = this.options.type;\n\n if (searchType === AzureAISearchQueryType.Similarity) {\n return this.similaritySearchVectorWithScore(\n await this.embeddings.embedQuery(query),\n k,\n filter\n );\n } else if (searchType === AzureAISearchQueryType.SimilarityHybrid) {\n return this.hybridSearchVectorWithScore(\n query,\n await this.embeddings.embedQuery(query),\n k,\n filter\n );\n } else if (searchType === AzureAISearchQueryType.SemanticHybrid) {\n return this.semanticHybridSearchVectorWithScore(\n query,\n await this.embeddings.embedQuery(query),\n k,\n filter\n );\n }\n\n throw new Error(`Unrecognized search type '${searchType}'`);\n }\n\n /**\n * Performs a hybrid search using query text.\n * @param query Query text for the similarity search.\n * @param queryVector Query vector for the similarity search.\n * If not provided, the query text will be embedded.\n * @param k=4 Number of nearest neighbors to return.\n * @param filter Optional OData filter for the documents.\n * @returns Promise that resolves to a list of documents and their corresponding similarity scores.\n */\n async hybridSearchVectorWithScore(\n query: string,\n queryVector?: number[],\n k = 4,\n filter: string | undefined = undefined\n ): Promise<[Document, number][]> {\n const vector = queryVector ?? (await this.embeddings.embedQuery(query));\n\n await this.initPromise;\n const { results } = await this.client.search(query, {\n vectorSearchOptions: {\n queries: [\n {\n kind: \"vector\",\n vector,\n kNearestNeighborsCount: k,\n fields: [DEFAULT_FIELD_CONTENT_VECTOR],\n },\n ],\n },\n filter,\n top: k,\n });\n\n const docsWithScore: [Document, number][] = [];\n\n for await (const item of results) {\n const document = new Document<\n AzureAISearchDocumentMetadata & { embedding: number[] }\n >({\n pageContent: item.document[DEFAULT_FIELD_CONTENT],\n metadata: {\n ...item.document[DEFAULT_FIELD_METADATA],\n embedding: item.document[DEFAULT_FIELD_CONTENT_VECTOR],\n },\n });\n docsWithScore.push([document, item.score]);\n }\n\n return docsWithScore;\n }\n\n /**\n * Performs a hybrid search with semantic reranker using query text.\n * @param query Query text for the similarity search.\n * @param queryVector Query vector for the similarity search.\n * If not provided, the query text will be embedded.\n * @param k=4 Number of nearest neighbors to return.\n * @param filter Optional OData filter for the documents.\n * @returns Promise that resolves to a list of documents and their corresponding similarity scores.\n */\n async semanticHybridSearchVectorWithScore(\n query: string,\n queryVector?: number[],\n k = 4,\n filter: string | undefined = undefined\n ): Promise<[Document, number][]> {\n const vector = queryVector ?? (await this.embeddings.embedQuery(query));\n\n await this.initPromise;\n const { results } = await this.client.search(query, {\n vectorSearchOptions: {\n queries: [\n {\n kind: \"vector\",\n vector,\n kNearestNeighborsCount: k,\n fields: [DEFAULT_FIELD_CONTENT_VECTOR],\n },\n ],\n },\n filter,\n top: k,\n queryType: \"semantic\",\n semanticSearchOptions: {\n configurationName: \"semantic-search-config\",\n captions: {\n captionType: \"extractive\",\n },\n answers: {\n answerType: \"extractive\",\n },\n },\n });\n\n const docsWithScore: [Document, number][] = [];\n\n for await (const item of results) {\n const document = new Document<\n AzureAISearchDocumentMetadata & { embedding: number[] }\n >({\n pageContent: item.document[DEFAULT_FIELD_CONTENT],\n metadata: {\n ...item.document[DEFAULT_FIELD_METADATA],\n embedding: item.document[DEFAULT_FIELD_CONTENT_VECTOR],\n },\n });\n docsWithScore.push([document, item.score]);\n }\n\n return docsWithScore;\n }\n\n /**\n * Performs a similarity search on the vectors stored in the collection.\n * @param queryVector Query vector for the similarity search.\n * @param k=4 Number of nearest neighbors to return.\n * @param filter string OData filter for the documents.\n * @returns Promise that resolves to a list of documents and their corresponding similarity scores.\n */\n async similaritySearchVectorWithScore(\n query: number[],\n k: number,\n filter?: string\n ): Promise<[Document, number][]> {\n await this.initPromise;\n\n const { results } = await this.client.search(\"*\", {\n vectorSearchOptions: {\n queries: [\n {\n kind: \"vector\",\n vector: query,\n kNearestNeighborsCount: k,\n fields: [DEFAULT_FIELD_CONTENT_VECTOR],\n },\n ],\n },\n filter,\n });\n\n const docsWithScore: [Document, number][] = [];\n\n for await (const item of results) {\n const document = new Document<\n AzureAISearchDocumentMetadata & { embedding: number[] }\n >({\n pageContent: item.document[DEFAULT_FIELD_CONTENT],\n metadata: {\n ...item.document[DEFAULT_FIELD_METADATA],\n embedding: item.document[DEFAULT_FIELD_CONTENT_VECTOR],\n },\n });\n docsWithScore.push([document, item.score]);\n }\n\n return docsWithScore;\n }\n\n /**\n * Return documents selected using the maximal marginal relevance.\n * Maximal marginal relevance optimizes for similarity to the query AND\n * diversity among selected documents.\n * @param query Text to look up documents similar to.\n * @param options.k Number of documents to return.\n * @param options.fetchK=20 Number of documents to fetch before passing to\n * the MMR algorithm.\n * @param options.lambda=0.5 Number between 0 and 1 that determines the\n * degree of diversity among the results, where 0 corresponds to maximum\n * diversity and 1 to minimum diversity.\n * @returns List of documents selected by maximal marginal relevance.\n */\n async maxMarginalRelevanceSearch(\n query: string,\n options: MaxMarginalRelevanceSearchOptions\n ): Promise {\n const { k, fetchK = 20, lambda = 0.5 } = options;\n\n const queryEmbedding = await this.embeddings.embedQuery(query);\n const docs = await this.similaritySearchVectorWithScore(\n queryEmbedding,\n fetchK\n );\n const embeddingList = docs.map((doc) => doc[0].metadata.embedding);\n\n // Re-rank the results using MMR\n const mmrIndexes = maximalMarginalRelevance(\n queryEmbedding,\n embeddingList,\n lambda,\n k\n );\n\n const mmrDocs = mmrIndexes.map((index) => docs[index][0]);\n return mmrDocs;\n }\n\n /**\n * Ensures that an index exists on the AzureAISearchVectorStore.\n * @param indexClient The Azure AI Search index client.\n * @returns A promise that resolves when the AzureAISearchVectorStore index has been initialized.\n * @protected\n */\n protected async ensureIndexExists(\n indexClient: SearchIndexClient\n ): Promise {\n try {\n await indexClient.getIndex(this.indexName);\n } catch (e) {\n // Index does not exists, create it\n const searchIndex = this.createSearchIndexDefinition(this.indexName);\n await indexClient.createIndex(searchIndex);\n }\n }\n\n /**\n * Prepares the search index definition for Azure AI Search.\n * @param indexName The name of the index.\n * @returns The SearchIndex object.\n * @protected\n */\n protected createSearchIndexDefinition(indexName: string): SearchIndex {\n return {\n name: indexName,\n vectorSearch: {\n algorithms: [\n {\n name: \"vector-search-algorithm\",\n kind: \"hnsw\",\n parameters: {\n m: 4,\n efSearch: 500,\n metric: \"cosine\",\n efConstruction: 400,\n },\n },\n ],\n profiles: [\n {\n name: \"vector-search-profile\",\n algorithmConfigurationName: \"vector-search-algorithm\",\n },\n ],\n },\n semanticSearch: {\n defaultConfigurationName: \"semantic-search-config\",\n configurations: [\n {\n name: \"semantic-search-config\",\n prioritizedFields: {\n contentFields: [\n {\n name: DEFAULT_FIELD_CONTENT,\n },\n ],\n keywordsFields: [\n {\n name: DEFAULT_FIELD_CONTENT,\n },\n ],\n },\n },\n ],\n },\n fields: [\n {\n name: DEFAULT_FIELD_ID,\n filterable: true,\n key: true,\n type: \"Edm.String\",\n },\n {\n name: DEFAULT_FIELD_CONTENT,\n searchable: true,\n filterable: true,\n type: \"Edm.String\",\n },\n {\n name: DEFAULT_FIELD_CONTENT_VECTOR,\n searchable: true,\n type: \"Collection(Edm.Single)\",\n vectorSearchDimensions: 1536,", - "repo_full_name": "langchain-ai/langchainjs", - "discussion_comments": [ - { - "comment_id": "1456404750", - "repo_full_name": "langchain-ai/langchainjs", - "pr_number": 4044, - "pr_file": "libs/langchain-community/src/vectorstores/azure_aisearch.ts", - "discussion_id": "1456404750", - "commented_code": "@@ -0,0 +1,694 @@\n+import * as uuid from \"uuid\";\n+import {\n+ SearchClient,\n+ SearchIndexClient,\n+ AzureKeyCredential,\n+ IndexingResult,\n+ SearchIndex,\n+} from \"@azure/search-documents\";\n+import {\n+ MaxMarginalRelevanceSearchOptions,\n+ VectorStore,\n+} from \"@langchain/core/vectorstores\";\n+import type { EmbeddingsInterface } from \"@langchain/core/embeddings\";\n+import { Document } from \"@langchain/core/documents\";\n+import { maximalMarginalRelevance } from \"@langchain/core/utils/math\";\n+import { getEnvironmentVariable } from \"@langchain/core/utils/env\";\n+\n+/**\n+ * Azure AI Search query type.\n+ */\n+export const AzureAISearchQueryType = {\n+ /** Vector search. */\n+ Similarity: \"similarity\",\n+ /** Hybrid full text and vector search. */\n+ SimilarityHybrid: \"similarity_hybrid\",\n+ /** Hybrid full text and vector search with semantic ranking. */\n+ SemanticHybrid: \"semantic_hybrid\",\n+} as const;\n+\n+/**\n+ * Azure AI Search query type.\n+ */\n+export type AzureAISearchQueryType =\n+ (typeof AzureAISearchQueryType)[keyof typeof AzureAISearchQueryType];\n+\n+/**\n+ * Azure AI Search settings.\n+ */\n+export interface AzureAISearchQueryOptions {\n+ readonly type: AzureAISearchQueryType;\n+ readonly semanticConfigurationName?: string;\n+}\n+\n+/**\n+ * Configuration options for the `AzureAISearchStore` constructor.\n+ */\n+export interface AzureAISearchConfig {\n+ readonly client?: SearchClient;\n+ readonly indexName?: string;\n+ readonly endpoint?: string;\n+ readonly key?: string;\n+ readonly search: AzureAISearchQueryOptions;\n+ /**\n+ * The amount of documents to chunk by when adding vectors.\n+ * @default 100\n+ */\n+ readonly chunkSize?: number;\n+ /**\n+ * The amount of documents to embed at once when adding documents.\n+ * Note that some providers like Azure OpenAI can only embed 16 documents\n+ * at a time.\n+ * @default 16\n+ */\n+ readonly embeddingBatchSize?: number;\n+}\n+\n+/**\n+ * Azure AI Search options metadata schema.\n+ * If yout want to add custom data, use the attributes property.\n+ */\n+export type AzureAISearchDocumentMetadata = {\n+ source: string;\n+ attributes?: Array<{ key: string; value: string }>;\n+};\n+\n+/**\n+ * Azure AI Search indexed document.\n+ */\n+export type AzureAISearchDocument = {\n+ id: string;\n+ content: string;\n+ content_vector: number[];\n+ metadata: AzureAISearchDocumentMetadata;\n+};\n+\n+/**\n+ * Azure AI Search options for adding documents.\n+ */\n+export type AzureAISearchAddDocumentsOptions = {\n+ ids?: string[];\n+};\n+\n+const DEFAULT_FIELD_ID = \"id\";\n+const DEFAULT_FIELD_CONTENT = \"content\";\n+const DEFAULT_FIELD_CONTENT_VECTOR = \"content_vector\";\n+const DEFAULT_FIELD_METADATA = \"metadata\";\n+const DEFAULT_FIELD_METADATA_SOURCE = \"source\";\n+const DEFAULT_FIELD_METADATA_ATTRS = \"attributes\";\n+\n+/**\n+ * Azure AI Search vector store.\n+ * To use this, you should have:\n+ * - the `@azure/search-documents` NPM package installed\n+ * - an endpoint and key to the Azure AI Search instance\n+ *\n+ * If you directly provide a `SearchClient` instance, you need to ensure that\n+ * an index has been created. When using and endpoint and key, the index will\n+ * be created automatically if it does not exist.\n+ */\n+export class AzureAISearchVectorStore extends VectorStore {\n+ declare FilterType: string;\n+\n+ get lc_secrets(): { [key: string]: string } {\n+ return {\n+ endpoint: \"AZURE_AISEARCH_ENDPOINT\",\n+ key: \"AZURE_AISEARCH_KEY\",\n+ };\n+ }\n+\n+ _vectorstoreType(): string {\n+ return \"azure_aisearch\";\n+ }\n+\n+ private readonly initPromise: Promise;\n+\n+ private readonly client: SearchClient;\n+\n+ private readonly indexName: string;\n+\n+ private readonly chunkSize: number;\n+\n+ private readonly embeddingBatchSize: number;\n+\n+ private readonly options: AzureAISearchQueryOptions;\n+\n+ constructor(embeddings: EmbeddingsInterface, config: AzureAISearchConfig) {\n+ super(embeddings, config);\n+\n+ const endpoint =\n+ config.endpoint ?? getEnvironmentVariable(\"AZURE_AISEARCH_ENDPOINT\");\n+ const key = config.key ?? getEnvironmentVariable(\"AZURE_AISEARCH_KEY\");\n+\n+ if (!config.client && !endpoint && !key) {\n+ throw new Error(\n+ \"Azure AI Search client or connection string must be set.\"\n+ );\n+ }\n+\n+ this.indexName = config.indexName ?? \"vectorsearch\";\n+ this.chunkSize = config.chunkSize ?? 100;\n+ this.embeddingBatchSize = config.embeddingBatchSize ?? 16;\n+\n+ if (!config.client) {\n+ // eslint-disable-next-line @typescript-eslint/no-non-null-assertion\n+ const credential = new AzureKeyCredential(key!);\n+ // eslint-disable-next-line @typescript-eslint/no-non-null-assertion\n+ this.client = new SearchClient(endpoint!, this.indexName, credential);\n+ // eslint-disable-next-line @typescript-eslint/no-non-null-assertion\n+ const indexClient = new SearchIndexClient(endpoint!, credential);\n+\n+ // Start initialization, but don't wait for it to finish here\n+ this.initPromise = this.ensureIndexExists(indexClient).catch((error) => {\n+ console.error(\n+ \"Error during Azure AI Search index initialization:\",\n+ error\n+ );\n+ });\n+ } else {\n+ this.client = config.client;\n+ }\n+\n+ this.options = config.search;\n+ this.embeddings = embeddings;\n+ }\n+\n+ /**\n+ * Removes specified documents from the AzureAISearchVectorStore using a filter.\n+ * @param filter OData filter to find documents to delete.\n+ * @returns A promise that resolves when the documents have been removed.\n+ */\n+ async deleteMany(filter: string): Promise {\n+ const { results } = await this.client.search(\"*\", {\n+ filter,\n+ });\n+\n+ const ids: string[] = [];\n+ for await (const item of results) {\n+ ids.push(item.document.id);\n+ }\n+\n+ const { results: deleteResults } = await this.client.deleteDocuments(\n+ DEFAULT_FIELD_ID,\n+ ids\n+ );\n+ return deleteResults;\n+ }\n+\n+ /**\n+ * Removes specified documents from the AzureAISearchVectorStore.\n+ * @param ids IDs of the documents to be removed.\n+ * @returns A promise that resolves when the documents have been removed.\n+ */\n+ async deleteById(ids: string | string[]): Promise {\n+ await this.initPromise;\n+ const { results } = await this.client.deleteDocuments(\n+ DEFAULT_FIELD_ID,\n+ Array.isArray(ids) ? ids : [ids]\n+ );\n+ return results;\n+ }\n+\n+ /**\n+ * Adds documents to the AzureAISearchVectorStore.\n+ * Documents are chunked into batches of size `embeddingBatchSize` then\n+ * embedded and added to the AzureAISearchVectorStore.\n+ * @param documents The documents to add.\n+ * @param options Options for adding documents.\n+ * @returns A promise that resolves to the ids of the added documents.\n+ */\n+ async addDocuments(\n+ documents: Document[],\n+ options?: AzureAISearchAddDocumentsOptions\n+ ) {\n+ const texts = documents.map(({ pageContent }) => pageContent);\n+ const results: string[] = [];\n+\n+ for (let i = 0; i < texts.length; i += this.embeddingBatchSize) {\n+ const batch = texts.slice(i, i + this.embeddingBatchSize);\n+ const docsBatch = documents.slice(i, i + this.embeddingBatchSize);\n+ const batchEmbeddings: number[][] = await this.embeddings.embedDocuments(\n+ batch\n+ );\n+ const batchResult = await this.addVectors(\n+ batchEmbeddings,\n+ docsBatch,\n+ options\n+ );\n+\n+ results.push(...batchResult);\n+ }\n+\n+ return results;\n+ }\n+\n+ /**\n+ * Adds vectors to the AzureAISearchVectorStore.\n+ * @param vectors Vectors to be added.\n+ * @param documents Corresponding documents to be added.\n+ * @param options Options for adding documents.\n+ * @returns A promise that resolves to the ids of the added documents.\n+ */\n+ async addVectors(\n+ vectors: number[][],\n+ documents: Document[],\n+ options?: AzureAISearchAddDocumentsOptions\n+ ): Promise {\n+ const ids = options?.ids ?? documents.map(() => uuid.v4());\n+ const entities: AzureAISearchDocument[] = documents.map((doc, idx) => ({\n+ id: ids[idx],\n+ content: doc.pageContent,\n+ content_vector: vectors[idx],\n+ metadata: {\n+ source: doc.metadata?.source,\n+ attributes: doc.metadata?.attributes ?? [],\n+ },\n+ }));\n+\n+ await this.initPromise;\n+ for (let i = 0; i < entities.length; i += this.chunkSize) {\n+ const chunk = entities.slice(i, i + this.chunkSize);\n+ await this.client.uploadDocuments(chunk, { throwOnAnyFailure: true });\n+ }\n+\n+ return ids;\n+ }\n+\n+ /**\n+ * Performs a similarity search using query type specified in configuration.\n+ * @param query Query text for the similarity search.\n+ * @param k=4 Number of nearest neighbors to return.\n+ * @param filter Optional OData filter for the documents.\n+ * @returns Promise that resolves to a list of documents and their corresponding similarity scores.\n+ */\n+ async similaritySearch(\n+ query: string,\n+ k = 4,\n+ filter: this[\"FilterType\"] | undefined = undefined\n+ ): Promise {\n+ const results = await this.similaritySearchWithScore(query, k, filter);\n+\n+ return results.map((result) => result[0]);\n+ }\n+\n+ /**\n+ * Performs a similarity search using query type specified in configuration.\n+ * @param query Query text for the similarity search.\n+ * @param k=4 Number of nearest neighbors to return.\n+ * @param filter Optional OData filter for the documents.\n+ * @returns Promise that resolves to a list of documents and their corresponding similarity scores.\n+ */\n+ async similaritySearchWithScore(\n+ query: string,\n+ k = 4,\n+ filter: this[\"FilterType\"] | undefined = undefined\n+ ): Promise<[Document, number][]> {\n+ const searchType = this.options.type;\n+\n+ if (searchType === AzureAISearchQueryType.Similarity) {\n+ return this.similaritySearchVectorWithScore(\n+ await this.embeddings.embedQuery(query),\n+ k,\n+ filter\n+ );\n+ } else if (searchType === AzureAISearchQueryType.SimilarityHybrid) {\n+ return this.hybridSearchVectorWithScore(\n+ query,\n+ await this.embeddings.embedQuery(query),\n+ k,\n+ filter\n+ );\n+ } else if (searchType === AzureAISearchQueryType.SemanticHybrid) {\n+ return this.semanticHybridSearchVectorWithScore(\n+ query,\n+ await this.embeddings.embedQuery(query),\n+ k,\n+ filter\n+ );\n+ }\n+\n+ throw new Error(`Unrecognized search type '${searchType}'`);\n+ }\n+\n+ /**\n+ * Performs a hybrid search using query text.\n+ * @param query Query text for the similarity search.\n+ * @param queryVector Query vector for the similarity search.\n+ * If not provided, the query text will be embedded.\n+ * @param k=4 Number of nearest neighbors to return.\n+ * @param filter Optional OData filter for the documents.\n+ * @returns Promise that resolves to a list of documents and their corresponding similarity scores.\n+ */\n+ async hybridSearchVectorWithScore(\n+ query: string,\n+ queryVector?: number[],\n+ k = 4,\n+ filter: string | undefined = undefined\n+ ): Promise<[Document, number][]> {\n+ const vector = queryVector ?? (await this.embeddings.embedQuery(query));\n+\n+ await this.initPromise;\n+ const { results } = await this.client.search(query, {\n+ vectorSearchOptions: {\n+ queries: [\n+ {\n+ kind: \"vector\",\n+ vector,\n+ kNearestNeighborsCount: k,\n+ fields: [DEFAULT_FIELD_CONTENT_VECTOR],\n+ },\n+ ],\n+ },\n+ filter,\n+ top: k,\n+ });\n+\n+ const docsWithScore: [Document, number][] = [];\n+\n+ for await (const item of results) {\n+ const document = new Document<\n+ AzureAISearchDocumentMetadata & { embedding: number[] }\n+ >({\n+ pageContent: item.document[DEFAULT_FIELD_CONTENT],\n+ metadata: {\n+ ...item.document[DEFAULT_FIELD_METADATA],\n+ embedding: item.document[DEFAULT_FIELD_CONTENT_VECTOR],\n+ },\n+ });\n+ docsWithScore.push([document, item.score]);\n+ }\n+\n+ return docsWithScore;\n+ }\n+\n+ /**\n+ * Performs a hybrid search with semantic reranker using query text.\n+ * @param query Query text for the similarity search.\n+ * @param queryVector Query vector for the similarity search.\n+ * If not provided, the query text will be embedded.\n+ * @param k=4 Number of nearest neighbors to return.\n+ * @param filter Optional OData filter for the documents.\n+ * @returns Promise that resolves to a list of documents and their corresponding similarity scores.\n+ */\n+ async semanticHybridSearchVectorWithScore(\n+ query: string,\n+ queryVector?: number[],\n+ k = 4,\n+ filter: string | undefined = undefined\n+ ): Promise<[Document, number][]> {\n+ const vector = queryVector ?? (await this.embeddings.embedQuery(query));\n+\n+ await this.initPromise;\n+ const { results } = await this.client.search(query, {\n+ vectorSearchOptions: {\n+ queries: [\n+ {\n+ kind: \"vector\",\n+ vector,\n+ kNearestNeighborsCount: k,\n+ fields: [DEFAULT_FIELD_CONTENT_VECTOR],\n+ },\n+ ],\n+ },\n+ filter,\n+ top: k,\n+ queryType: \"semantic\",\n+ semanticSearchOptions: {\n+ configurationName: \"semantic-search-config\",\n+ captions: {\n+ captionType: \"extractive\",\n+ },\n+ answers: {\n+ answerType: \"extractive\",\n+ },\n+ },\n+ });\n+\n+ const docsWithScore: [Document, number][] = [];\n+\n+ for await (const item of results) {\n+ const document = new Document<\n+ AzureAISearchDocumentMetadata & { embedding: number[] }\n+ >({\n+ pageContent: item.document[DEFAULT_FIELD_CONTENT],\n+ metadata: {\n+ ...item.document[DEFAULT_FIELD_METADATA],\n+ embedding: item.document[DEFAULT_FIELD_CONTENT_VECTOR],\n+ },\n+ });\n+ docsWithScore.push([document, item.score]);\n+ }\n+\n+ return docsWithScore;\n+ }\n+\n+ /**\n+ * Performs a similarity search on the vectors stored in the collection.\n+ * @param queryVector Query vector for the similarity search.\n+ * @param k=4 Number of nearest neighbors to return.\n+ * @param filter string OData filter for the documents.\n+ * @returns Promise that resolves to a list of documents and their corresponding similarity scores.\n+ */\n+ async similaritySearchVectorWithScore(\n+ query: number[],\n+ k: number,\n+ filter?: string\n+ ): Promise<[Document, number][]> {\n+ await this.initPromise;\n+\n+ const { results } = await this.client.search(\"*\", {\n+ vectorSearchOptions: {\n+ queries: [\n+ {\n+ kind: \"vector\",\n+ vector: query,\n+ kNearestNeighborsCount: k,\n+ fields: [DEFAULT_FIELD_CONTENT_VECTOR],\n+ },\n+ ],\n+ },\n+ filter,\n+ });\n+\n+ const docsWithScore: [Document, number][] = [];\n+\n+ for await (const item of results) {\n+ const document = new Document<\n+ AzureAISearchDocumentMetadata & { embedding: number[] }\n+ >({\n+ pageContent: item.document[DEFAULT_FIELD_CONTENT],\n+ metadata: {\n+ ...item.document[DEFAULT_FIELD_METADATA],\n+ embedding: item.document[DEFAULT_FIELD_CONTENT_VECTOR],\n+ },\n+ });\n+ docsWithScore.push([document, item.score]);\n+ }\n+\n+ return docsWithScore;\n+ }\n+\n+ /**\n+ * Return documents selected using the maximal marginal relevance.\n+ * Maximal marginal relevance optimizes for similarity to the query AND\n+ * diversity among selected documents.\n+ * @param query Text to look up documents similar to.\n+ * @param options.k Number of documents to return.\n+ * @param options.fetchK=20 Number of documents to fetch before passing to\n+ * the MMR algorithm.\n+ * @param options.lambda=0.5 Number between 0 and 1 that determines the\n+ * degree of diversity among the results, where 0 corresponds to maximum\n+ * diversity and 1 to minimum diversity.\n+ * @returns List of documents selected by maximal marginal relevance.\n+ */\n+ async maxMarginalRelevanceSearch(\n+ query: string,\n+ options: MaxMarginalRelevanceSearchOptions\n+ ): Promise {\n+ const { k, fetchK = 20, lambda = 0.5 } = options;\n+\n+ const queryEmbedding = await this.embeddings.embedQuery(query);\n+ const docs = await this.similaritySearchVectorWithScore(\n+ queryEmbedding,\n+ fetchK\n+ );\n+ const embeddingList = docs.map((doc) => doc[0].metadata.embedding);\n+\n+ // Re-rank the results using MMR\n+ const mmrIndexes = maximalMarginalRelevance(\n+ queryEmbedding,\n+ embeddingList,\n+ lambda,\n+ k\n+ );\n+\n+ const mmrDocs = mmrIndexes.map((index) => docs[index][0]);\n+ return mmrDocs;\n+ }\n+\n+ /**\n+ * Ensures that an index exists on the AzureAISearchVectorStore.\n+ * @param indexClient The Azure AI Search index client.\n+ * @returns A promise that resolves when the AzureAISearchVectorStore index has been initialized.\n+ * @protected\n+ */\n+ protected async ensureIndexExists(\n+ indexClient: SearchIndexClient\n+ ): Promise {\n+ try {\n+ await indexClient.getIndex(this.indexName);\n+ } catch (e) {\n+ // Index does not exists, create it\n+ const searchIndex = this.createSearchIndexDefinition(this.indexName);\n+ await indexClient.createIndex(searchIndex);\n+ }\n+ }\n+\n+ /**\n+ * Prepares the search index definition for Azure AI Search.\n+ * @param indexName The name of the index.\n+ * @returns The SearchIndex object.\n+ * @protected\n+ */\n+ protected createSearchIndexDefinition(indexName: string): SearchIndex {\n+ return {\n+ name: indexName,\n+ vectorSearch: {\n+ algorithms: [\n+ {\n+ name: \"vector-search-algorithm\",\n+ kind: \"hnsw\",\n+ parameters: {\n+ m: 4,\n+ efSearch: 500,\n+ metric: \"cosine\",\n+ efConstruction: 400,\n+ },\n+ },\n+ ],\n+ profiles: [\n+ {\n+ name: \"vector-search-profile\",\n+ algorithmConfigurationName: \"vector-search-algorithm\",\n+ },\n+ ],\n+ },\n+ semanticSearch: {\n+ defaultConfigurationName: \"semantic-search-config\",\n+ configurations: [\n+ {\n+ name: \"semantic-search-config\",\n+ prioritizedFields: {\n+ contentFields: [\n+ {\n+ name: DEFAULT_FIELD_CONTENT,\n+ },\n+ ],\n+ keywordsFields: [\n+ {\n+ name: DEFAULT_FIELD_CONTENT,\n+ },\n+ ],\n+ },\n+ },\n+ ],\n+ },\n+ fields: [\n+ {\n+ name: DEFAULT_FIELD_ID,\n+ filterable: true,\n+ key: true,\n+ type: \"Edm.String\",\n+ },\n+ {\n+ name: DEFAULT_FIELD_CONTENT,\n+ searchable: true,\n+ filterable: true,\n+ type: \"Edm.String\",\n+ },\n+ {\n+ name: DEFAULT_FIELD_CONTENT_VECTOR,\n+ searchable: true,\n+ type: \"Collection(Edm.Single)\",\n+ vectorSearchDimensions: 1536,", - "comment_created_at": "2024-01-17T19:56:38+00:00", - "comment_author": "pablocastro", - "comment_body": "Dimensions hardcoded to ada-002's size here, but I'm sure there are many cases where users will use other embedding models. Shouldn't this come from config or by sampling a vector of the ones being used for embeddings?", - "pr_file_module": null - }, - { - "comment_id": "1457272275", - "repo_full_name": "langchain-ai/langchainjs", - "pr_number": 4044, - "pr_file": "libs/langchain-community/src/vectorstores/azure_aisearch.ts", - "discussion_id": "1456404750", - "commented_code": "@@ -0,0 +1,694 @@\n+import * as uuid from \"uuid\";\n+import {\n+ SearchClient,\n+ SearchIndexClient,\n+ AzureKeyCredential,\n+ IndexingResult,\n+ SearchIndex,\n+} from \"@azure/search-documents\";\n+import {\n+ MaxMarginalRelevanceSearchOptions,\n+ VectorStore,\n+} from \"@langchain/core/vectorstores\";\n+import type { EmbeddingsInterface } from \"@langchain/core/embeddings\";\n+import { Document } from \"@langchain/core/documents\";\n+import { maximalMarginalRelevance } from \"@langchain/core/utils/math\";\n+import { getEnvironmentVariable } from \"@langchain/core/utils/env\";\n+\n+/**\n+ * Azure AI Search query type.\n+ */\n+export const AzureAISearchQueryType = {\n+ /** Vector search. */\n+ Similarity: \"similarity\",\n+ /** Hybrid full text and vector search. */\n+ SimilarityHybrid: \"similarity_hybrid\",\n+ /** Hybrid full text and vector search with semantic ranking. */\n+ SemanticHybrid: \"semantic_hybrid\",\n+} as const;\n+\n+/**\n+ * Azure AI Search query type.\n+ */\n+export type AzureAISearchQueryType =\n+ (typeof AzureAISearchQueryType)[keyof typeof AzureAISearchQueryType];\n+\n+/**\n+ * Azure AI Search settings.\n+ */\n+export interface AzureAISearchQueryOptions {\n+ readonly type: AzureAISearchQueryType;\n+ readonly semanticConfigurationName?: string;\n+}\n+\n+/**\n+ * Configuration options for the `AzureAISearchStore` constructor.\n+ */\n+export interface AzureAISearchConfig {\n+ readonly client?: SearchClient;\n+ readonly indexName?: string;\n+ readonly endpoint?: string;\n+ readonly key?: string;\n+ readonly search: AzureAISearchQueryOptions;\n+ /**\n+ * The amount of documents to chunk by when adding vectors.\n+ * @default 100\n+ */\n+ readonly chunkSize?: number;\n+ /**\n+ * The amount of documents to embed at once when adding documents.\n+ * Note that some providers like Azure OpenAI can only embed 16 documents\n+ * at a time.\n+ * @default 16\n+ */\n+ readonly embeddingBatchSize?: number;\n+}\n+\n+/**\n+ * Azure AI Search options metadata schema.\n+ * If yout want to add custom data, use the attributes property.\n+ */\n+export type AzureAISearchDocumentMetadata = {\n+ source: string;\n+ attributes?: Array<{ key: string; value: string }>;\n+};\n+\n+/**\n+ * Azure AI Search indexed document.\n+ */\n+export type AzureAISearchDocument = {\n+ id: string;\n+ content: string;\n+ content_vector: number[];\n+ metadata: AzureAISearchDocumentMetadata;\n+};\n+\n+/**\n+ * Azure AI Search options for adding documents.\n+ */\n+export type AzureAISearchAddDocumentsOptions = {\n+ ids?: string[];\n+};\n+\n+const DEFAULT_FIELD_ID = \"id\";\n+const DEFAULT_FIELD_CONTENT = \"content\";\n+const DEFAULT_FIELD_CONTENT_VECTOR = \"content_vector\";\n+const DEFAULT_FIELD_METADATA = \"metadata\";\n+const DEFAULT_FIELD_METADATA_SOURCE = \"source\";\n+const DEFAULT_FIELD_METADATA_ATTRS = \"attributes\";\n+\n+/**\n+ * Azure AI Search vector store.\n+ * To use this, you should have:\n+ * - the `@azure/search-documents` NPM package installed\n+ * - an endpoint and key to the Azure AI Search instance\n+ *\n+ * If you directly provide a `SearchClient` instance, you need to ensure that\n+ * an index has been created. When using and endpoint and key, the index will\n+ * be created automatically if it does not exist.\n+ */\n+export class AzureAISearchVectorStore extends VectorStore {\n+ declare FilterType: string;\n+\n+ get lc_secrets(): { [key: string]: string } {\n+ return {\n+ endpoint: \"AZURE_AISEARCH_ENDPOINT\",\n+ key: \"AZURE_AISEARCH_KEY\",\n+ };\n+ }\n+\n+ _vectorstoreType(): string {\n+ return \"azure_aisearch\";\n+ }\n+\n+ private readonly initPromise: Promise;\n+\n+ private readonly client: SearchClient;\n+\n+ private readonly indexName: string;\n+\n+ private readonly chunkSize: number;\n+\n+ private readonly embeddingBatchSize: number;\n+\n+ private readonly options: AzureAISearchQueryOptions;\n+\n+ constructor(embeddings: EmbeddingsInterface, config: AzureAISearchConfig) {\n+ super(embeddings, config);\n+\n+ const endpoint =\n+ config.endpoint ?? getEnvironmentVariable(\"AZURE_AISEARCH_ENDPOINT\");\n+ const key = config.key ?? getEnvironmentVariable(\"AZURE_AISEARCH_KEY\");\n+\n+ if (!config.client && !endpoint && !key) {\n+ throw new Error(\n+ \"Azure AI Search client or connection string must be set.\"\n+ );\n+ }\n+\n+ this.indexName = config.indexName ?? \"vectorsearch\";\n+ this.chunkSize = config.chunkSize ?? 100;\n+ this.embeddingBatchSize = config.embeddingBatchSize ?? 16;\n+\n+ if (!config.client) {\n+ // eslint-disable-next-line @typescript-eslint/no-non-null-assertion\n+ const credential = new AzureKeyCredential(key!);\n+ // eslint-disable-next-line @typescript-eslint/no-non-null-assertion\n+ this.client = new SearchClient(endpoint!, this.indexName, credential);\n+ // eslint-disable-next-line @typescript-eslint/no-non-null-assertion\n+ const indexClient = new SearchIndexClient(endpoint!, credential);\n+\n+ // Start initialization, but don't wait for it to finish here\n+ this.initPromise = this.ensureIndexExists(indexClient).catch((error) => {\n+ console.error(\n+ \"Error during Azure AI Search index initialization:\",\n+ error\n+ );\n+ });\n+ } else {\n+ this.client = config.client;\n+ }\n+\n+ this.options = config.search;\n+ this.embeddings = embeddings;\n+ }\n+\n+ /**\n+ * Removes specified documents from the AzureAISearchVectorStore using a filter.\n+ * @param filter OData filter to find documents to delete.\n+ * @returns A promise that resolves when the documents have been removed.\n+ */\n+ async deleteMany(filter: string): Promise {\n+ const { results } = await this.client.search(\"*\", {\n+ filter,\n+ });\n+\n+ const ids: string[] = [];\n+ for await (const item of results) {\n+ ids.push(item.document.id);\n+ }\n+\n+ const { results: deleteResults } = await this.client.deleteDocuments(\n+ DEFAULT_FIELD_ID,\n+ ids\n+ );\n+ return deleteResults;\n+ }\n+\n+ /**\n+ * Removes specified documents from the AzureAISearchVectorStore.\n+ * @param ids IDs of the documents to be removed.\n+ * @returns A promise that resolves when the documents have been removed.\n+ */\n+ async deleteById(ids: string | string[]): Promise {\n+ await this.initPromise;\n+ const { results } = await this.client.deleteDocuments(\n+ DEFAULT_FIELD_ID,\n+ Array.isArray(ids) ? ids : [ids]\n+ );\n+ return results;\n+ }\n+\n+ /**\n+ * Adds documents to the AzureAISearchVectorStore.\n+ * Documents are chunked into batches of size `embeddingBatchSize` then\n+ * embedded and added to the AzureAISearchVectorStore.\n+ * @param documents The documents to add.\n+ * @param options Options for adding documents.\n+ * @returns A promise that resolves to the ids of the added documents.\n+ */\n+ async addDocuments(\n+ documents: Document[],\n+ options?: AzureAISearchAddDocumentsOptions\n+ ) {\n+ const texts = documents.map(({ pageContent }) => pageContent);\n+ const results: string[] = [];\n+\n+ for (let i = 0; i < texts.length; i += this.embeddingBatchSize) {\n+ const batch = texts.slice(i, i + this.embeddingBatchSize);\n+ const docsBatch = documents.slice(i, i + this.embeddingBatchSize);\n+ const batchEmbeddings: number[][] = await this.embeddings.embedDocuments(\n+ batch\n+ );\n+ const batchResult = await this.addVectors(\n+ batchEmbeddings,\n+ docsBatch,\n+ options\n+ );\n+\n+ results.push(...batchResult);\n+ }\n+\n+ return results;\n+ }\n+\n+ /**\n+ * Adds vectors to the AzureAISearchVectorStore.\n+ * @param vectors Vectors to be added.\n+ * @param documents Corresponding documents to be added.\n+ * @param options Options for adding documents.\n+ * @returns A promise that resolves to the ids of the added documents.\n+ */\n+ async addVectors(\n+ vectors: number[][],\n+ documents: Document[],\n+ options?: AzureAISearchAddDocumentsOptions\n+ ): Promise {\n+ const ids = options?.ids ?? documents.map(() => uuid.v4());\n+ const entities: AzureAISearchDocument[] = documents.map((doc, idx) => ({\n+ id: ids[idx],\n+ content: doc.pageContent,\n+ content_vector: vectors[idx],\n+ metadata: {\n+ source: doc.metadata?.source,\n+ attributes: doc.metadata?.attributes ?? [],\n+ },\n+ }));\n+\n+ await this.initPromise;\n+ for (let i = 0; i < entities.length; i += this.chunkSize) {\n+ const chunk = entities.slice(i, i + this.chunkSize);\n+ await this.client.uploadDocuments(chunk, { throwOnAnyFailure: true });\n+ }\n+\n+ return ids;\n+ }\n+\n+ /**\n+ * Performs a similarity search using query type specified in configuration.\n+ * @param query Query text for the similarity search.\n+ * @param k=4 Number of nearest neighbors to return.\n+ * @param filter Optional OData filter for the documents.\n+ * @returns Promise that resolves to a list of documents and their corresponding similarity scores.\n+ */\n+ async similaritySearch(\n+ query: string,\n+ k = 4,\n+ filter: this[\"FilterType\"] | undefined = undefined\n+ ): Promise {\n+ const results = await this.similaritySearchWithScore(query, k, filter);\n+\n+ return results.map((result) => result[0]);\n+ }\n+\n+ /**\n+ * Performs a similarity search using query type specified in configuration.\n+ * @param query Query text for the similarity search.\n+ * @param k=4 Number of nearest neighbors to return.\n+ * @param filter Optional OData filter for the documents.\n+ * @returns Promise that resolves to a list of documents and their corresponding similarity scores.\n+ */\n+ async similaritySearchWithScore(\n+ query: string,\n+ k = 4,\n+ filter: this[\"FilterType\"] | undefined = undefined\n+ ): Promise<[Document, number][]> {\n+ const searchType = this.options.type;\n+\n+ if (searchType === AzureAISearchQueryType.Similarity) {\n+ return this.similaritySearchVectorWithScore(\n+ await this.embeddings.embedQuery(query),\n+ k,\n+ filter\n+ );\n+ } else if (searchType === AzureAISearchQueryType.SimilarityHybrid) {\n+ return this.hybridSearchVectorWithScore(\n+ query,\n+ await this.embeddings.embedQuery(query),\n+ k,\n+ filter\n+ );\n+ } else if (searchType === AzureAISearchQueryType.SemanticHybrid) {\n+ return this.semanticHybridSearchVectorWithScore(\n+ query,\n+ await this.embeddings.embedQuery(query),\n+ k,\n+ filter\n+ );\n+ }\n+\n+ throw new Error(`Unrecognized search type '${searchType}'`);\n+ }\n+\n+ /**\n+ * Performs a hybrid search using query text.\n+ * @param query Query text for the similarity search.\n+ * @param queryVector Query vector for the similarity search.\n+ * If not provided, the query text will be embedded.\n+ * @param k=4 Number of nearest neighbors to return.\n+ * @param filter Optional OData filter for the documents.\n+ * @returns Promise that resolves to a list of documents and their corresponding similarity scores.\n+ */\n+ async hybridSearchVectorWithScore(\n+ query: string,\n+ queryVector?: number[],\n+ k = 4,\n+ filter: string | undefined = undefined\n+ ): Promise<[Document, number][]> {\n+ const vector = queryVector ?? (await this.embeddings.embedQuery(query));\n+\n+ await this.initPromise;\n+ const { results } = await this.client.search(query, {\n+ vectorSearchOptions: {\n+ queries: [\n+ {\n+ kind: \"vector\",\n+ vector,\n+ kNearestNeighborsCount: k,\n+ fields: [DEFAULT_FIELD_CONTENT_VECTOR],\n+ },\n+ ],\n+ },\n+ filter,\n+ top: k,\n+ });\n+\n+ const docsWithScore: [Document, number][] = [];\n+\n+ for await (const item of results) {\n+ const document = new Document<\n+ AzureAISearchDocumentMetadata & { embedding: number[] }\n+ >({\n+ pageContent: item.document[DEFAULT_FIELD_CONTENT],\n+ metadata: {\n+ ...item.document[DEFAULT_FIELD_METADATA],\n+ embedding: item.document[DEFAULT_FIELD_CONTENT_VECTOR],\n+ },\n+ });\n+ docsWithScore.push([document, item.score]);\n+ }\n+\n+ return docsWithScore;\n+ }\n+\n+ /**\n+ * Performs a hybrid search with semantic reranker using query text.\n+ * @param query Query text for the similarity search.\n+ * @param queryVector Query vector for the similarity search.\n+ * If not provided, the query text will be embedded.\n+ * @param k=4 Number of nearest neighbors to return.\n+ * @param filter Optional OData filter for the documents.\n+ * @returns Promise that resolves to a list of documents and their corresponding similarity scores.\n+ */\n+ async semanticHybridSearchVectorWithScore(\n+ query: string,\n+ queryVector?: number[],\n+ k = 4,\n+ filter: string | undefined = undefined\n+ ): Promise<[Document, number][]> {\n+ const vector = queryVector ?? (await this.embeddings.embedQuery(query));\n+\n+ await this.initPromise;\n+ const { results } = await this.client.search(query, {\n+ vectorSearchOptions: {\n+ queries: [\n+ {\n+ kind: \"vector\",\n+ vector,\n+ kNearestNeighborsCount: k,\n+ fields: [DEFAULT_FIELD_CONTENT_VECTOR],\n+ },\n+ ],\n+ },\n+ filter,\n+ top: k,\n+ queryType: \"semantic\",\n+ semanticSearchOptions: {\n+ configurationName: \"semantic-search-config\",\n+ captions: {\n+ captionType: \"extractive\",\n+ },\n+ answers: {\n+ answerType: \"extractive\",\n+ },\n+ },\n+ });\n+\n+ const docsWithScore: [Document, number][] = [];\n+\n+ for await (const item of results) {\n+ const document = new Document<\n+ AzureAISearchDocumentMetadata & { embedding: number[] }\n+ >({\n+ pageContent: item.document[DEFAULT_FIELD_CONTENT],\n+ metadata: {\n+ ...item.document[DEFAULT_FIELD_METADATA],\n+ embedding: item.document[DEFAULT_FIELD_CONTENT_VECTOR],\n+ },\n+ });\n+ docsWithScore.push([document, item.score]);\n+ }\n+\n+ return docsWithScore;\n+ }\n+\n+ /**\n+ * Performs a similarity search on the vectors stored in the collection.\n+ * @param queryVector Query vector for the similarity search.\n+ * @param k=4 Number of nearest neighbors to return.\n+ * @param filter string OData filter for the documents.\n+ * @returns Promise that resolves to a list of documents and their corresponding similarity scores.\n+ */\n+ async similaritySearchVectorWithScore(\n+ query: number[],\n+ k: number,\n+ filter?: string\n+ ): Promise<[Document, number][]> {\n+ await this.initPromise;\n+\n+ const { results } = await this.client.search(\"*\", {\n+ vectorSearchOptions: {\n+ queries: [\n+ {\n+ kind: \"vector\",\n+ vector: query,\n+ kNearestNeighborsCount: k,\n+ fields: [DEFAULT_FIELD_CONTENT_VECTOR],\n+ },\n+ ],\n+ },\n+ filter,\n+ });\n+\n+ const docsWithScore: [Document, number][] = [];\n+\n+ for await (const item of results) {\n+ const document = new Document<\n+ AzureAISearchDocumentMetadata & { embedding: number[] }\n+ >({\n+ pageContent: item.document[DEFAULT_FIELD_CONTENT],\n+ metadata: {\n+ ...item.document[DEFAULT_FIELD_METADATA],\n+ embedding: item.document[DEFAULT_FIELD_CONTENT_VECTOR],\n+ },\n+ });\n+ docsWithScore.push([document, item.score]);\n+ }\n+\n+ return docsWithScore;\n+ }\n+\n+ /**\n+ * Return documents selected using the maximal marginal relevance.\n+ * Maximal marginal relevance optimizes for similarity to the query AND\n+ * diversity among selected documents.\n+ * @param query Text to look up documents similar to.\n+ * @param options.k Number of documents to return.\n+ * @param options.fetchK=20 Number of documents to fetch before passing to\n+ * the MMR algorithm.\n+ * @param options.lambda=0.5 Number between 0 and 1 that determines the\n+ * degree of diversity among the results, where 0 corresponds to maximum\n+ * diversity and 1 to minimum diversity.\n+ * @returns List of documents selected by maximal marginal relevance.\n+ */\n+ async maxMarginalRelevanceSearch(\n+ query: string,\n+ options: MaxMarginalRelevanceSearchOptions\n+ ): Promise {\n+ const { k, fetchK = 20, lambda = 0.5 } = options;\n+\n+ const queryEmbedding = await this.embeddings.embedQuery(query);\n+ const docs = await this.similaritySearchVectorWithScore(\n+ queryEmbedding,\n+ fetchK\n+ );\n+ const embeddingList = docs.map((doc) => doc[0].metadata.embedding);\n+\n+ // Re-rank the results using MMR\n+ const mmrIndexes = maximalMarginalRelevance(\n+ queryEmbedding,\n+ embeddingList,\n+ lambda,\n+ k\n+ );\n+\n+ const mmrDocs = mmrIndexes.map((index) => docs[index][0]);\n+ return mmrDocs;\n+ }\n+\n+ /**\n+ * Ensures that an index exists on the AzureAISearchVectorStore.\n+ * @param indexClient The Azure AI Search index client.\n+ * @returns A promise that resolves when the AzureAISearchVectorStore index has been initialized.\n+ * @protected\n+ */\n+ protected async ensureIndexExists(\n+ indexClient: SearchIndexClient\n+ ): Promise {\n+ try {\n+ await indexClient.getIndex(this.indexName);\n+ } catch (e) {\n+ // Index does not exists, create it\n+ const searchIndex = this.createSearchIndexDefinition(this.indexName);\n+ await indexClient.createIndex(searchIndex);\n+ }\n+ }\n+\n+ /**\n+ * Prepares the search index definition for Azure AI Search.\n+ * @param indexName The name of the index.\n+ * @returns The SearchIndex object.\n+ * @protected\n+ */\n+ protected createSearchIndexDefinition(indexName: string): SearchIndex {\n+ return {\n+ name: indexName,\n+ vectorSearch: {\n+ algorithms: [\n+ {\n+ name: \"vector-search-algorithm\",\n+ kind: \"hnsw\",\n+ parameters: {\n+ m: 4,\n+ efSearch: 500,\n+ metric: \"cosine\",\n+ efConstruction: 400,\n+ },\n+ },\n+ ],\n+ profiles: [\n+ {\n+ name: \"vector-search-profile\",\n+ algorithmConfigurationName: \"vector-search-algorithm\",\n+ },\n+ ],\n+ },\n+ semanticSearch: {\n+ defaultConfigurationName: \"semantic-search-config\",\n+ configurations: [\n+ {\n+ name: \"semantic-search-config\",\n+ prioritizedFields: {\n+ contentFields: [\n+ {\n+ name: DEFAULT_FIELD_CONTENT,\n+ },\n+ ],\n+ keywordsFields: [\n+ {\n+ name: DEFAULT_FIELD_CONTENT,\n+ },\n+ ],\n+ },\n+ },\n+ ],\n+ },\n+ fields: [\n+ {\n+ name: DEFAULT_FIELD_ID,\n+ filterable: true,\n+ key: true,\n+ type: \"Edm.String\",\n+ },\n+ {\n+ name: DEFAULT_FIELD_CONTENT,\n+ searchable: true,\n+ filterable: true,\n+ type: \"Edm.String\",\n+ },\n+ {\n+ name: DEFAULT_FIELD_CONTENT_VECTOR,\n+ searchable: true,\n+ type: \"Collection(Edm.Single)\",\n+ vectorSearchDimensions: 1536,", - "comment_created_at": "2024-01-18T10:55:02+00:00", - "comment_author": "sinedied", - "comment_body": "Good catch. I'll check if we can get it from the LC embedding model directly or add it to the config", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1924384814", - "pr_number": 7561, - "pr_file": "libs/langchain-community/src/tools/tavily_search.ts", - "created_at": "2025-01-21T21:28:07+00:00", - "commented_code": "protected kwargs: Record = {};\n\n protected includeImages = false;\n\n protected includeImageDescriptions = false;\n\n protected includeAnswer = false;\n\n protected includeRawContent = false;\n\n protected includeDomains: string[] = [];\n\n protected excludeDomains: string[] = [];\n\n protected searchDepth: \"basic\" | \"deep\" = \"basic\";", - "repo_full_name": "langchain-ai/langchainjs", - "discussion_comments": [ - { - "comment_id": "1924384814", - "repo_full_name": "langchain-ai/langchainjs", - "pr_number": 7561, - "pr_file": "libs/langchain-community/src/tools/tavily_search.ts", - "discussion_id": "1924384814", - "commented_code": "@@ -93,11 +170,41 @@ export class TavilySearchResults extends Tool {\n \n protected kwargs: Record = {};\n \n+ protected includeImages = false;\n+\n+ protected includeImageDescriptions = false;\n+\n+ protected includeAnswer = false;\n+\n+ protected includeRawContent = false;\n+\n+ protected includeDomains: string[] = [];\n+\n+ protected excludeDomains: string[] = [];\n+\n+ protected searchDepth: \"basic\" | \"deep\" = \"basic\";", - "comment_created_at": "2025-01-21T21:28:07+00:00", - "comment_author": "jacoblee93", - "comment_body": "Let's avoid defaults at this level when possible", - "pr_file_module": null - }, - { - "comment_id": "1927795454", - "repo_full_name": "langchain-ai/langchainjs", - "pr_number": 7561, - "pr_file": "libs/langchain-community/src/tools/tavily_search.ts", - "discussion_id": "1924384814", - "commented_code": "@@ -93,11 +170,41 @@ export class TavilySearchResults extends Tool {\n \n protected kwargs: Record = {};\n \n+ protected includeImages = false;\n+\n+ protected includeImageDescriptions = false;\n+\n+ protected includeAnswer = false;\n+\n+ protected includeRawContent = false;\n+\n+ protected includeDomains: string[] = [];\n+\n+ protected excludeDomains: string[] = [];\n+\n+ protected searchDepth: \"basic\" | \"deep\" = \"basic\";", - "comment_created_at": "2025-01-23T22:57:27+00:00", - "comment_author": "hugoleborso", - "comment_body": "Hi @jacoblee93, I did this because these are actually the default settings of the API doc, and also because `maxResults` already had a default value set corresponding to the default value in the doc.\r\n\r\nIf you want me too I can still change it of course, but I think it makes sense as is.\r\n\r\nLet me know what you think!", - "pr_file_module": null - }, - { - "comment_id": "1929361191", - "repo_full_name": "langchain-ai/langchainjs", - "pr_number": 7561, - "pr_file": "libs/langchain-community/src/tools/tavily_search.ts", - "discussion_id": "1924384814", - "commented_code": "@@ -93,11 +170,41 @@ export class TavilySearchResults extends Tool {\n \n protected kwargs: Record = {};\n \n+ protected includeImages = false;\n+\n+ protected includeImageDescriptions = false;\n+\n+ protected includeAnswer = false;\n+\n+ protected includeRawContent = false;\n+\n+ protected includeDomains: string[] = [];\n+\n+ protected excludeDomains: string[] = [];\n+\n+ protected searchDepth: \"basic\" | \"deep\" = \"basic\";", - "comment_created_at": "2025-01-24T23:25:06+00:00", - "comment_author": "jacoblee93", - "comment_body": "We should ideally let the backend set this in case they change best practices, would prefer to have things unset if it won't cause issues (which it wasn't before)", - "pr_file_module": null - }, - { - "comment_id": "1931704972", - "repo_full_name": "langchain-ai/langchainjs", - "pr_number": 7561, - "pr_file": "libs/langchain-community/src/tools/tavily_search.ts", - "discussion_id": "1924384814", - "commented_code": "@@ -93,11 +170,41 @@ export class TavilySearchResults extends Tool {\n \n protected kwargs: Record = {};\n \n+ protected includeImages = false;\n+\n+ protected includeImageDescriptions = false;\n+\n+ protected includeAnswer = false;\n+\n+ protected includeRawContent = false;\n+\n+ protected includeDomains: string[] = [];\n+\n+ protected excludeDomains: string[] = [];\n+\n+ protected searchDepth: \"basic\" | \"deep\" = \"basic\";", - "comment_created_at": "2025-01-28T08:16:23+00:00", - "comment_author": "hugoleborso", - "comment_body": "@jacoblee93 done, let me know if this fit your needs, and thanks for all the quick responses !", - "pr_file_module": null - }, - { - "comment_id": "1934820570", - "repo_full_name": "langchain-ai/langchainjs", - "pr_number": 7561, - "pr_file": "libs/langchain-community/src/tools/tavily_search.ts", - "discussion_id": "1924384814", - "commented_code": "@@ -93,11 +170,41 @@ export class TavilySearchResults extends Tool {\n \n protected kwargs: Record = {};\n \n+ protected includeImages = false;\n+\n+ protected includeImageDescriptions = false;\n+\n+ protected includeAnswer = false;\n+\n+ protected includeRawContent = false;\n+\n+ protected includeDomains: string[] = [];\n+\n+ protected excludeDomains: string[] = [];\n+\n+ protected searchDepth: \"basic\" | \"deep\" = \"basic\";", - "comment_created_at": "2025-01-29T23:52:18+00:00", - "comment_author": "jacoblee93", - "comment_body": "Thank you, old defaults have started causing issues for OpenAI so it's top of mind right now", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1829838921", - "pr_number": 6999, - "pr_file": "libs/langchain-google-common/src/connection.ts", - "created_at": "2024-11-05T18:39:30+00:00", - "commented_code": "}\n }\n\n get computedLocation(): string {\n switch (this.apiName) {\n case \"google\":\n return super.computedLocation;\n case \"anthropic\":\n return \"us-east5\";", - "repo_full_name": "langchain-ai/langchainjs", - "discussion_comments": [ - { - "comment_id": "1829838921", - "repo_full_name": "langchain-ai/langchainjs", - "pr_number": 6999, - "pr_file": "libs/langchain-google-common/src/connection.ts", - "discussion_id": "1829838921", - "commented_code": "@@ -245,6 +288,19 @@ export abstract class GoogleAIConnection<\n }\n }\n \n+ get computedLocation(): string {\n+ switch (this.apiName) {\n+ case \"google\":\n+ return super.computedLocation;\n+ case \"anthropic\":\n+ return \"us-east5\";", - "comment_created_at": "2024-11-05T18:39:30+00:00", - "comment_author": "jacoblee93", - "comment_body": "Should we be hardcoding this here?\r\n\r\nIf it's only available in one region now, would prefer to have this configurable or even not have a default at all and just have it documented", - "pr_file_module": null - }, - { - "comment_id": "1829905294", - "repo_full_name": "langchain-ai/langchainjs", - "pr_number": 6999, - "pr_file": "libs/langchain-google-common/src/connection.ts", - "discussion_id": "1829838921", - "commented_code": "@@ -245,6 +288,19 @@ export abstract class GoogleAIConnection<\n }\n }\n \n+ get computedLocation(): string {\n+ switch (this.apiName) {\n+ case \"google\":\n+ return super.computedLocation;\n+ case \"anthropic\":\n+ return \"us-east5\";", - "comment_created_at": "2024-11-05T19:35:53+00:00", - "comment_author": "afirstenberg", - "comment_body": "See above about making magic values as unnecessary as possible.\r\n\r\nSome of the Anthropic models are available in other regions, but all are available in us-east5. And none, mysteriously, are available in us-central1.\r\n\r\nI'd like to keep it as a default, but clearly have it available to be settable.", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/langchainjs-chunked-data-processing.json b/_reviewers/langchainjs-chunked-data-processing.json new file mode 100644 index 0000000..12fc46e --- /dev/null +++ b/_reviewers/langchainjs-chunked-data-processing.json @@ -0,0 +1,104 @@ +[ + { + "discussion_id": "2116365677", + "pr_number": 8282, + "pr_file": "libs/langchain-mcp-adapters/src/types.ts", + "created_at": "2025-05-30T18:08:27+00:00", + "commented_code": "* Zod schema for an individual content item within a CallToolResult.\n * Derived from CallToolResultSchema.\n */\nconst callToolResultContentSchema = CallToolResultSchema.shape.content.element;\n/**\n * Recursively unwrap common Zod wrappers (default, optional, nullable, effects)\n */\nfunction unwrapSchema(schema) {\n const def = schema._def;\n switch (def?.typeName) {\n case ZodDefault.name:\n case ZodOptional.name:\n case ZodNullable.name:\n return unwrapSchema(def.innerType);\n case ZodEffects.name:\n return unwrapSchema(def.schema);\n default:\n return schema;\n }\n}\n\n/**\n * Zod schema for an individual content item within a CallToolResult.\n * Derived from CallToolResultSchema, with wrappers unwrapped.\n */\nconst arraySchema = CallToolResultSchema.shape.content;\nconst callToolResultContentSchema = unwrapSchema(arraySchema).element;", + "repo_full_name": "langchain-ai/langchainjs", + "discussion_comments": [ + { + "comment_id": "2116365677", + "repo_full_name": "langchain-ai/langchainjs", + "pr_number": 8282, + "pr_file": "libs/langchain-mcp-adapters/src/types.ts", + "discussion_id": "2116365677", + "commented_code": "@@ -38,7 +42,29 @@ function isZodLiteral(schema: unknown): schema is ZodLiteral {\n * Zod schema for an individual content item within a CallToolResult.\n * Derived from CallToolResultSchema.\n */\n-const callToolResultContentSchema = CallToolResultSchema.shape.content.element;\n+/**\n+ * Recursively unwrap common Zod wrappers (default, optional, nullable, effects)\n+ */\n+function unwrapSchema(schema) {\n+ const def = schema._def;\n+ switch (def?.typeName) {\n+ case ZodDefault.name:\n+ case ZodOptional.name:\n+ case ZodNullable.name:\n+ return unwrapSchema(def.innerType);\n+ case ZodEffects.name:\n+ return unwrapSchema(def.schema);\n+ default:\n+ return schema;\n+ }\n+}\n+\n+/**\n+ * Zod schema for an individual content item within a CallToolResult.\n+ * Derived from CallToolResultSchema, with wrappers unwrapped.\n+ */\n+const arraySchema = CallToolResultSchema.shape.content;\n+const callToolResultContentSchema = unwrapSchema(arraySchema).element;", + "comment_created_at": "2025-05-30T18:08:27+00:00", + "comment_author": "hntrl", + "comment_body": "```suggestion\r\nfunction isZodDefault(\r\n schema: unknown\r\n): schema is ZodDefault {\r\n return (\r\n typeof schema === \"object\" &&\r\n schema !== null &&\r\n \"_def\" in schema &&\r\n (schema as { _def: { typeName: string } })._def.typeName === \"ZodDefault\"\r\n );\r\n}\r\n\r\nfunction unwrapZodArray(\r\n schema: ZodArray | ZodDefault>\r\n): TElement {\r\n if (isZodDefault(schema)) {\r\n return schema._def.innerType.element;\r\n }\r\n return schema.element;\r\n}\r\n\r\n/**\r\n * Zod schema for an individual content item within a CallToolResult.\r\n * Derived from CallToolResultSchema, with wrappers unwrapped.\r\n */\r\nconst callToolResultContentSchema = unwrapZodArray(\r\n CallToolResultSchema.shape.content\r\n);\r\n```", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1667177345", + "pr_number": 5835, + "pr_file": "libs/langchain-google-common/src/experimental/utils/media_core.ts", + "created_at": "2024-07-05T21:46:14+00:00", + "commented_code": "import { v4 as uuidv4 } from \"uuid\";\nimport { BaseStore } from \"@langchain/core/stores\";\nimport { Serializable } from \"@langchain/core/load/serializable\";\n\nexport interface MediaBlobParameters {\n data?: Blob;\n\n metadata?: Record;\n\n path?: string;\n}\n\n/**\n * Represents a chunk of data that can be identified by the path where the\n * data is (or will be) located, along with optional metadata about the data.\n */\nexport class MediaBlob\n extends Serializable // FIXME - I'm not sure this serializes or deserializes correctly\n implements MediaBlobParameters\n{\n lc_serializable = true;\n\n lc_namespace = [\"langchain\", \"google-common\"]; // FIXME - What should this be? And why?\n\n data?: Blob;\n\n // eslint-disable-next-line @typescript-eslint/no-explicit-any\n metadata?: Record;\n\n path?: string;\n\n constructor(params?: MediaBlobParameters) {\n super(params);\n this.data = params?.data;\n this.metadata = params?.metadata;\n this.path = params?.path;\n }\n\n get size(): number {\n return this.data?.size ?? 0;\n }\n\n get dataType(): string {\n return this.data?.type ?? \"\";\n }\n\n get encoding(): string {\n const charsetEquals = this.dataType.indexOf(\"charset=\");\n return charsetEquals === -1\n ? \"utf-8\"\n : this.dataType.substring(charsetEquals + 8);\n }\n\n get mimetype(): string {\n const semicolon = this.dataType.indexOf(\";\");\n return semicolon === -1\n ? this.dataType\n : this.dataType.substring(0, semicolon);\n }\n\n async asString(): Promise {\n const data = this.data ?? new Blob([]);\n const dataBuffer = await data.arrayBuffer();\n const dataArray = new Uint8Array(dataBuffer);\n\n // Need to handle the array in smaller chunks to deal with stack size limits\n let ret = \"\";\n const chunkSize = 102400;\n for (let i = 0; i < dataArray.length; i += chunkSize) {\n const chunk = dataArray.subarray(i, i + chunkSize);\n ret += String.fromCharCode(...chunk);\n }\n\n return ret;\n }\n\n async asBase64(): Promise {\n return btoa(await this.asString());\n }\n\n async asDataUrl(): Promise {\n return `data:${this.mimetype};base64,${await this.asBase64()}`;\n }\n\n async asUri(): Promise {\n return this.path ?? (await this.asDataUrl());\n }\n\n async encode(): Promise<{ encoded: string; encoding: string }> {\n const dataUrl = await this.asDataUrl();\n const comma = dataUrl.indexOf(\",\");\n const encoded = dataUrl.substring(comma + 1);\n const encoding: string = dataUrl.indexOf(\"base64\") > -1 ? \"base64\" : \"8bit\";\n return {\n encoded,\n encoding,\n };\n }\n}\n\nexport type ActionIfInvalidAction =\n | \"ignore\"\n | \"prefixPath\"\n | \"prefixUuid\"\n | \"removePath\";\n\nexport interface BlobStoreStoreOptions {\n /**\n * If the path is missing or invalid in the blob, how should we create\n * a new path?\n * Subclasses may define their own methods, but the following are supported\n * by default:\n * - Undefined or an emtpy string: Reject the blob\n * - \"ignore\": Attempt to store it anyway (but this may fail)\n * - \"prefixPath\": Use the default prefix for the BlobStore and get the\n * unique portion from the URL. The original path is stored in the metadata\n * - \"prefixUuid\": Use the default prefix for the BlobStore and get the\n * unique portion from a generated UUID. The original path is stored\n * in the metadata\n */\n actionIfInvalid?: ActionIfInvalidAction;\n\n /**\n * The expected prefix for URIs that are stored.\n * This may be used to test if a MediaBlob is valid and used to create a new\n * path if \"prefixPath\" or \"prefixUuid\" is set for actionIfInvalid.\n */\n pathPrefix?: string;\n}\n\nexport type ActionIfBlobMissingAction = \"emptyBlob\";\n\nexport interface BlobStoreFetchOptions {\n /**\n * If the blob is not found when fetching, what should we do?\n * Subclasses may define their own methods, but the following are supported\n * by default:\n * - Undefined or an empty string: return undefined\n * - \"emptyBlob\": return a new MediaBlob that has the path set, but nothing else.\n */\n actionIfBlobMissing?: ActionIfBlobMissingAction;\n}\n\nexport interface BlobStoreOptions {\n defaultStoreOptions?: BlobStoreStoreOptions;\n\n defaultFetchOptions?: BlobStoreFetchOptions;\n}\n\n/**\n * A specialized Store that is designed to handle MediaBlobs and use the\n * key that is included in the blob to determine exactly how it is stored.\n *\n * The full details of a MediaBlob may be changed when it is stored.\n * For example, it may get additional or different Metadata. This should be\n * what is returned when the store() method is called.\n *\n * Although BlobStore extends BaseStore, not all of the methods from\n * BaseStore may be implemented (or even possible). Those that are not\n * implemented should be documented and throw an Error if called.\n */\nexport abstract class BlobStore extends BaseStore {\n lc_namespace = [\"langchain\", \"google-common\"]; // FIXME - What should this be? And why?\n\n defaultStoreOptions: BlobStoreStoreOptions;\n\n defaultFetchOptions: BlobStoreFetchOptions;\n\n constructor(opts?: BlobStoreOptions) {\n super(opts);\n this.defaultStoreOptions = opts?.defaultStoreOptions ?? {};\n this.defaultFetchOptions = opts?.defaultFetchOptions ?? {};\n }\n\n protected async _realKey(key: string | MediaBlob): Promise {\n return typeof key === \"string\" ? key : await key.asUri();\n }\n\n /**\n * Is the path set in the MediaBlob supported by this BlobStore?\n * Subclasses must implement and evaluate `blob.path` to make this\n * determination.\n *\n * Although this is async, this is expected to be a relatively fast operation\n * (ie - you shouldn't make network calls).\n *\n * The default implementation assumes that undefined blob.paths are invalid\n * and then uses the replacePathPrefix (or an empty string) as an assumed path\n * to start with.\n *\n * @param blob The blob to test\n * @param opts Any options (if needed) that may be used to determine if it is valid\n * @return If the string represented by blob.path is supported.\n */\n protected _hasValidPath(\n blob: MediaBlob,\n opts?: BlobStoreStoreOptions\n ): Promise {\n const path = blob.path ?? \"\";\n const prefix = opts?.pathPrefix ?? \"\";\n const isPrefixed =\n typeof blob.path !== \"undefined\" && path.startsWith(prefix);\n return Promise.resolve(isPrefixed);\n }\n\n protected _blobPathSuffix(blob: MediaBlob): string {\n // Get the path currently set and make sure we treat it as a string\n const blobPath = `${blob.path}`;\n\n // Advance past the first set of /\n let pathStart = blobPath.indexOf(\"/\") + 1;\n while (blobPath.charAt(pathStart) === \"/\") {\n pathStart += 1;\n }\n\n // We will use the rest as the path for a replacement\n return blobPath.substring(pathStart);\n }\n\n protected async _newBlob(\n oldBlob: MediaBlob,\n newPath?: string\n ): Promise {\n const oldPath = oldBlob.path;\n const metadata = oldBlob?.metadata ?? {};\n metadata.langchainOldPath = oldPath;\n const newBlob = new MediaBlob({\n ...oldBlob,\n metadata,\n });\n if (newPath) {\n newBlob.path = newPath;\n } else if (newBlob.path) {\n delete newBlob.path;\n }\n return newBlob;\n }\n\n protected async _validBlobPrefixPath(\n blob: MediaBlob,\n opts?: BlobStoreStoreOptions\n ): Promise {\n const prefix = opts?.pathPrefix ?? \"\";\n const suffix = this._blobPathSuffix(blob);\n const newPath = `${prefix}${suffix}`;\n return this._newBlob(blob, newPath);\n }\n\n protected async _validBlobPrefixUuid(\n blob: MediaBlob,\n opts?: BlobStoreStoreOptions\n ): Promise {\n const prefix = opts?.pathPrefix ?? \"\";\n const suffix = uuidv4(); // TODO - option to specify version?\n const newPath = `${prefix}${suffix}`;\n return this._newBlob(blob, newPath);\n }\n\n protected async _validBlobRemovePath(\n blob: MediaBlob,\n _opts?: BlobStoreStoreOptions\n ): Promise {\n return this._newBlob(blob, undefined);\n }\n\n /**\n * Based on the blob and options, return a blob that has a valid path\n * that can be saved.\n * @param blob\n * @param opts\n */\n protected async _validStoreBlob(\n blob: MediaBlob,\n opts?: BlobStoreStoreOptions\n ): Promise {\n if (await this._hasValidPath(blob, opts)) {\n return blob;\n }\n switch (opts?.actionIfInvalid) {\n case \"ignore\":\n return blob;\n case \"prefixPath\":\n return this._validBlobPrefixPath(blob, opts);\n case \"prefixUuid\":\n return this._validBlobPrefixUuid(blob, opts);\n case \"removePath\":\n return this._validBlobRemovePath(blob, opts);\n default:\n return undefined;\n }\n }\n\n async store(\n blob: MediaBlob,\n opts: BlobStoreStoreOptions = {}\n ): Promise {\n const allOpts: BlobStoreStoreOptions = {\n ...this.defaultStoreOptions,\n ...opts,\n };\n const validBlob = await this._validStoreBlob(blob, allOpts);\n if (typeof validBlob !== \"undefined\") {\n const validKey = await validBlob.asUri();\n await this.mset([[validKey, validBlob]]);\n const savedKey = await validBlob.asUri();\n return await this.fetch(savedKey);\n }\n return undefined;\n }\n\n protected async _missingFetchBlobEmpty(\n path: string,\n _opts?: BlobStoreFetchOptions\n ): Promise {\n return new MediaBlob({ path });\n }\n\n protected async _missingFetchBlob(\n path: string,\n opts?: BlobStoreFetchOptions\n ): Promise {\n switch (opts?.actionIfBlobMissing) {\n case \"emptyBlob\":\n return this._missingFetchBlobEmpty(path, opts);\n default:\n return undefined;\n }\n }\n\n async fetch(\n key: string | MediaBlob,\n opts: BlobStoreFetchOptions = {}\n ): Promise {\n const allOpts: BlobStoreFetchOptions = {\n ...this.defaultFetchOptions,\n ...opts,\n };\n const realKey = await this._realKey(key);\n const ret = await this.mget([realKey]);\n return ret?.[0] ?? (await this._missingFetchBlob(realKey, allOpts));\n }\n}\n\nexport interface BackedBlobStoreOptions extends BlobStoreOptions {\n backingStore: BaseStore;\n}\n\nexport class BackedBlobStore extends BlobStore {", + "repo_full_name": "langchain-ai/langchainjs", + "discussion_comments": [ + { + "comment_id": "1667177345", + "repo_full_name": "langchain-ai/langchainjs", + "pr_number": 5835, + "pr_file": "libs/langchain-google-common/src/experimental/utils/media_core.ts", + "discussion_id": "1667177345", + "commented_code": "@@ -0,0 +1,504 @@\n+import { v4 as uuidv4 } from \"uuid\";\n+import { BaseStore } from \"@langchain/core/stores\";\n+import { Serializable } from \"@langchain/core/load/serializable\";\n+\n+export interface MediaBlobParameters {\n+ data?: Blob;\n+\n+ metadata?: Record;\n+\n+ path?: string;\n+}\n+\n+/**\n+ * Represents a chunk of data that can be identified by the path where the\n+ * data is (or will be) located, along with optional metadata about the data.\n+ */\n+export class MediaBlob\n+ extends Serializable // FIXME - I'm not sure this serializes or deserializes correctly\n+ implements MediaBlobParameters\n+{\n+ lc_serializable = true;\n+\n+ lc_namespace = [\"langchain\", \"google-common\"]; // FIXME - What should this be? And why?\n+\n+ data?: Blob;\n+\n+ // eslint-disable-next-line @typescript-eslint/no-explicit-any\n+ metadata?: Record;\n+\n+ path?: string;\n+\n+ constructor(params?: MediaBlobParameters) {\n+ super(params);\n+ this.data = params?.data;\n+ this.metadata = params?.metadata;\n+ this.path = params?.path;\n+ }\n+\n+ get size(): number {\n+ return this.data?.size ?? 0;\n+ }\n+\n+ get dataType(): string {\n+ return this.data?.type ?? \"\";\n+ }\n+\n+ get encoding(): string {\n+ const charsetEquals = this.dataType.indexOf(\"charset=\");\n+ return charsetEquals === -1\n+ ? \"utf-8\"\n+ : this.dataType.substring(charsetEquals + 8);\n+ }\n+\n+ get mimetype(): string {\n+ const semicolon = this.dataType.indexOf(\";\");\n+ return semicolon === -1\n+ ? this.dataType\n+ : this.dataType.substring(0, semicolon);\n+ }\n+\n+ async asString(): Promise {\n+ const data = this.data ?? new Blob([]);\n+ const dataBuffer = await data.arrayBuffer();\n+ const dataArray = new Uint8Array(dataBuffer);\n+\n+ // Need to handle the array in smaller chunks to deal with stack size limits\n+ let ret = \"\";\n+ const chunkSize = 102400;\n+ for (let i = 0; i < dataArray.length; i += chunkSize) {\n+ const chunk = dataArray.subarray(i, i + chunkSize);\n+ ret += String.fromCharCode(...chunk);\n+ }\n+\n+ return ret;\n+ }\n+\n+ async asBase64(): Promise {\n+ return btoa(await this.asString());\n+ }\n+\n+ async asDataUrl(): Promise {\n+ return `data:${this.mimetype};base64,${await this.asBase64()}`;\n+ }\n+\n+ async asUri(): Promise {\n+ return this.path ?? (await this.asDataUrl());\n+ }\n+\n+ async encode(): Promise<{ encoded: string; encoding: string }> {\n+ const dataUrl = await this.asDataUrl();\n+ const comma = dataUrl.indexOf(\",\");\n+ const encoded = dataUrl.substring(comma + 1);\n+ const encoding: string = dataUrl.indexOf(\"base64\") > -1 ? \"base64\" : \"8bit\";\n+ return {\n+ encoded,\n+ encoding,\n+ };\n+ }\n+}\n+\n+export type ActionIfInvalidAction =\n+ | \"ignore\"\n+ | \"prefixPath\"\n+ | \"prefixUuid\"\n+ | \"removePath\";\n+\n+export interface BlobStoreStoreOptions {\n+ /**\n+ * If the path is missing or invalid in the blob, how should we create\n+ * a new path?\n+ * Subclasses may define their own methods, but the following are supported\n+ * by default:\n+ * - Undefined or an emtpy string: Reject the blob\n+ * - \"ignore\": Attempt to store it anyway (but this may fail)\n+ * - \"prefixPath\": Use the default prefix for the BlobStore and get the\n+ * unique portion from the URL. The original path is stored in the metadata\n+ * - \"prefixUuid\": Use the default prefix for the BlobStore and get the\n+ * unique portion from a generated UUID. The original path is stored\n+ * in the metadata\n+ */\n+ actionIfInvalid?: ActionIfInvalidAction;\n+\n+ /**\n+ * The expected prefix for URIs that are stored.\n+ * This may be used to test if a MediaBlob is valid and used to create a new\n+ * path if \"prefixPath\" or \"prefixUuid\" is set for actionIfInvalid.\n+ */\n+ pathPrefix?: string;\n+}\n+\n+export type ActionIfBlobMissingAction = \"emptyBlob\";\n+\n+export interface BlobStoreFetchOptions {\n+ /**\n+ * If the blob is not found when fetching, what should we do?\n+ * Subclasses may define their own methods, but the following are supported\n+ * by default:\n+ * - Undefined or an empty string: return undefined\n+ * - \"emptyBlob\": return a new MediaBlob that has the path set, but nothing else.\n+ */\n+ actionIfBlobMissing?: ActionIfBlobMissingAction;\n+}\n+\n+export interface BlobStoreOptions {\n+ defaultStoreOptions?: BlobStoreStoreOptions;\n+\n+ defaultFetchOptions?: BlobStoreFetchOptions;\n+}\n+\n+/**\n+ * A specialized Store that is designed to handle MediaBlobs and use the\n+ * key that is included in the blob to determine exactly how it is stored.\n+ *\n+ * The full details of a MediaBlob may be changed when it is stored.\n+ * For example, it may get additional or different Metadata. This should be\n+ * what is returned when the store() method is called.\n+ *\n+ * Although BlobStore extends BaseStore, not all of the methods from\n+ * BaseStore may be implemented (or even possible). Those that are not\n+ * implemented should be documented and throw an Error if called.\n+ */\n+export abstract class BlobStore extends BaseStore {\n+ lc_namespace = [\"langchain\", \"google-common\"]; // FIXME - What should this be? And why?\n+\n+ defaultStoreOptions: BlobStoreStoreOptions;\n+\n+ defaultFetchOptions: BlobStoreFetchOptions;\n+\n+ constructor(opts?: BlobStoreOptions) {\n+ super(opts);\n+ this.defaultStoreOptions = opts?.defaultStoreOptions ?? {};\n+ this.defaultFetchOptions = opts?.defaultFetchOptions ?? {};\n+ }\n+\n+ protected async _realKey(key: string | MediaBlob): Promise {\n+ return typeof key === \"string\" ? key : await key.asUri();\n+ }\n+\n+ /**\n+ * Is the path set in the MediaBlob supported by this BlobStore?\n+ * Subclasses must implement and evaluate `blob.path` to make this\n+ * determination.\n+ *\n+ * Although this is async, this is expected to be a relatively fast operation\n+ * (ie - you shouldn't make network calls).\n+ *\n+ * The default implementation assumes that undefined blob.paths are invalid\n+ * and then uses the replacePathPrefix (or an empty string) as an assumed path\n+ * to start with.\n+ *\n+ * @param blob The blob to test\n+ * @param opts Any options (if needed) that may be used to determine if it is valid\n+ * @return If the string represented by blob.path is supported.\n+ */\n+ protected _hasValidPath(\n+ blob: MediaBlob,\n+ opts?: BlobStoreStoreOptions\n+ ): Promise {\n+ const path = blob.path ?? \"\";\n+ const prefix = opts?.pathPrefix ?? \"\";\n+ const isPrefixed =\n+ typeof blob.path !== \"undefined\" && path.startsWith(prefix);\n+ return Promise.resolve(isPrefixed);\n+ }\n+\n+ protected _blobPathSuffix(blob: MediaBlob): string {\n+ // Get the path currently set and make sure we treat it as a string\n+ const blobPath = `${blob.path}`;\n+\n+ // Advance past the first set of /\n+ let pathStart = blobPath.indexOf(\"/\") + 1;\n+ while (blobPath.charAt(pathStart) === \"/\") {\n+ pathStart += 1;\n+ }\n+\n+ // We will use the rest as the path for a replacement\n+ return blobPath.substring(pathStart);\n+ }\n+\n+ protected async _newBlob(\n+ oldBlob: MediaBlob,\n+ newPath?: string\n+ ): Promise {\n+ const oldPath = oldBlob.path;\n+ const metadata = oldBlob?.metadata ?? {};\n+ metadata.langchainOldPath = oldPath;\n+ const newBlob = new MediaBlob({\n+ ...oldBlob,\n+ metadata,\n+ });\n+ if (newPath) {\n+ newBlob.path = newPath;\n+ } else if (newBlob.path) {\n+ delete newBlob.path;\n+ }\n+ return newBlob;\n+ }\n+\n+ protected async _validBlobPrefixPath(\n+ blob: MediaBlob,\n+ opts?: BlobStoreStoreOptions\n+ ): Promise {\n+ const prefix = opts?.pathPrefix ?? \"\";\n+ const suffix = this._blobPathSuffix(blob);\n+ const newPath = `${prefix}${suffix}`;\n+ return this._newBlob(blob, newPath);\n+ }\n+\n+ protected async _validBlobPrefixUuid(\n+ blob: MediaBlob,\n+ opts?: BlobStoreStoreOptions\n+ ): Promise {\n+ const prefix = opts?.pathPrefix ?? \"\";\n+ const suffix = uuidv4(); // TODO - option to specify version?\n+ const newPath = `${prefix}${suffix}`;\n+ return this._newBlob(blob, newPath);\n+ }\n+\n+ protected async _validBlobRemovePath(\n+ blob: MediaBlob,\n+ _opts?: BlobStoreStoreOptions\n+ ): Promise {\n+ return this._newBlob(blob, undefined);\n+ }\n+\n+ /**\n+ * Based on the blob and options, return a blob that has a valid path\n+ * that can be saved.\n+ * @param blob\n+ * @param opts\n+ */\n+ protected async _validStoreBlob(\n+ blob: MediaBlob,\n+ opts?: BlobStoreStoreOptions\n+ ): Promise {\n+ if (await this._hasValidPath(blob, opts)) {\n+ return blob;\n+ }\n+ switch (opts?.actionIfInvalid) {\n+ case \"ignore\":\n+ return blob;\n+ case \"prefixPath\":\n+ return this._validBlobPrefixPath(blob, opts);\n+ case \"prefixUuid\":\n+ return this._validBlobPrefixUuid(blob, opts);\n+ case \"removePath\":\n+ return this._validBlobRemovePath(blob, opts);\n+ default:\n+ return undefined;\n+ }\n+ }\n+\n+ async store(\n+ blob: MediaBlob,\n+ opts: BlobStoreStoreOptions = {}\n+ ): Promise {\n+ const allOpts: BlobStoreStoreOptions = {\n+ ...this.defaultStoreOptions,\n+ ...opts,\n+ };\n+ const validBlob = await this._validStoreBlob(blob, allOpts);\n+ if (typeof validBlob !== \"undefined\") {\n+ const validKey = await validBlob.asUri();\n+ await this.mset([[validKey, validBlob]]);\n+ const savedKey = await validBlob.asUri();\n+ return await this.fetch(savedKey);\n+ }\n+ return undefined;\n+ }\n+\n+ protected async _missingFetchBlobEmpty(\n+ path: string,\n+ _opts?: BlobStoreFetchOptions\n+ ): Promise {\n+ return new MediaBlob({ path });\n+ }\n+\n+ protected async _missingFetchBlob(\n+ path: string,\n+ opts?: BlobStoreFetchOptions\n+ ): Promise {\n+ switch (opts?.actionIfBlobMissing) {\n+ case \"emptyBlob\":\n+ return this._missingFetchBlobEmpty(path, opts);\n+ default:\n+ return undefined;\n+ }\n+ }\n+\n+ async fetch(\n+ key: string | MediaBlob,\n+ opts: BlobStoreFetchOptions = {}\n+ ): Promise {\n+ const allOpts: BlobStoreFetchOptions = {\n+ ...this.defaultFetchOptions,\n+ ...opts,\n+ };\n+ const realKey = await this._realKey(key);\n+ const ret = await this.mget([realKey]);\n+ return ret?.[0] ?? (await this._missingFetchBlob(realKey, allOpts));\n+ }\n+}\n+\n+export interface BackedBlobStoreOptions extends BlobStoreOptions {\n+ backingStore: BaseStore;\n+}\n+\n+export class BackedBlobStore extends BlobStore {", + "comment_created_at": "2024-07-05T21:46:14+00:00", + "comment_author": "eyurtsev", + "comment_body": "Is this abstraction used?", + "pr_file_module": null + }, + { + "comment_id": "1669461213", + "repo_full_name": "langchain-ai/langchainjs", + "pr_number": 5835, + "pr_file": "libs/langchain-google-common/src/experimental/utils/media_core.ts", + "discussion_id": "1667177345", + "commented_code": "@@ -0,0 +1,504 @@\n+import { v4 as uuidv4 } from \"uuid\";\n+import { BaseStore } from \"@langchain/core/stores\";\n+import { Serializable } from \"@langchain/core/load/serializable\";\n+\n+export interface MediaBlobParameters {\n+ data?: Blob;\n+\n+ metadata?: Record;\n+\n+ path?: string;\n+}\n+\n+/**\n+ * Represents a chunk of data that can be identified by the path where the\n+ * data is (or will be) located, along with optional metadata about the data.\n+ */\n+export class MediaBlob\n+ extends Serializable // FIXME - I'm not sure this serializes or deserializes correctly\n+ implements MediaBlobParameters\n+{\n+ lc_serializable = true;\n+\n+ lc_namespace = [\"langchain\", \"google-common\"]; // FIXME - What should this be? And why?\n+\n+ data?: Blob;\n+\n+ // eslint-disable-next-line @typescript-eslint/no-explicit-any\n+ metadata?: Record;\n+\n+ path?: string;\n+\n+ constructor(params?: MediaBlobParameters) {\n+ super(params);\n+ this.data = params?.data;\n+ this.metadata = params?.metadata;\n+ this.path = params?.path;\n+ }\n+\n+ get size(): number {\n+ return this.data?.size ?? 0;\n+ }\n+\n+ get dataType(): string {\n+ return this.data?.type ?? \"\";\n+ }\n+\n+ get encoding(): string {\n+ const charsetEquals = this.dataType.indexOf(\"charset=\");\n+ return charsetEquals === -1\n+ ? \"utf-8\"\n+ : this.dataType.substring(charsetEquals + 8);\n+ }\n+\n+ get mimetype(): string {\n+ const semicolon = this.dataType.indexOf(\";\");\n+ return semicolon === -1\n+ ? this.dataType\n+ : this.dataType.substring(0, semicolon);\n+ }\n+\n+ async asString(): Promise {\n+ const data = this.data ?? new Blob([]);\n+ const dataBuffer = await data.arrayBuffer();\n+ const dataArray = new Uint8Array(dataBuffer);\n+\n+ // Need to handle the array in smaller chunks to deal with stack size limits\n+ let ret = \"\";\n+ const chunkSize = 102400;\n+ for (let i = 0; i < dataArray.length; i += chunkSize) {\n+ const chunk = dataArray.subarray(i, i + chunkSize);\n+ ret += String.fromCharCode(...chunk);\n+ }\n+\n+ return ret;\n+ }\n+\n+ async asBase64(): Promise {\n+ return btoa(await this.asString());\n+ }\n+\n+ async asDataUrl(): Promise {\n+ return `data:${this.mimetype};base64,${await this.asBase64()}`;\n+ }\n+\n+ async asUri(): Promise {\n+ return this.path ?? (await this.asDataUrl());\n+ }\n+\n+ async encode(): Promise<{ encoded: string; encoding: string }> {\n+ const dataUrl = await this.asDataUrl();\n+ const comma = dataUrl.indexOf(\",\");\n+ const encoded = dataUrl.substring(comma + 1);\n+ const encoding: string = dataUrl.indexOf(\"base64\") > -1 ? \"base64\" : \"8bit\";\n+ return {\n+ encoded,\n+ encoding,\n+ };\n+ }\n+}\n+\n+export type ActionIfInvalidAction =\n+ | \"ignore\"\n+ | \"prefixPath\"\n+ | \"prefixUuid\"\n+ | \"removePath\";\n+\n+export interface BlobStoreStoreOptions {\n+ /**\n+ * If the path is missing or invalid in the blob, how should we create\n+ * a new path?\n+ * Subclasses may define their own methods, but the following are supported\n+ * by default:\n+ * - Undefined or an emtpy string: Reject the blob\n+ * - \"ignore\": Attempt to store it anyway (but this may fail)\n+ * - \"prefixPath\": Use the default prefix for the BlobStore and get the\n+ * unique portion from the URL. The original path is stored in the metadata\n+ * - \"prefixUuid\": Use the default prefix for the BlobStore and get the\n+ * unique portion from a generated UUID. The original path is stored\n+ * in the metadata\n+ */\n+ actionIfInvalid?: ActionIfInvalidAction;\n+\n+ /**\n+ * The expected prefix for URIs that are stored.\n+ * This may be used to test if a MediaBlob is valid and used to create a new\n+ * path if \"prefixPath\" or \"prefixUuid\" is set for actionIfInvalid.\n+ */\n+ pathPrefix?: string;\n+}\n+\n+export type ActionIfBlobMissingAction = \"emptyBlob\";\n+\n+export interface BlobStoreFetchOptions {\n+ /**\n+ * If the blob is not found when fetching, what should we do?\n+ * Subclasses may define their own methods, but the following are supported\n+ * by default:\n+ * - Undefined or an empty string: return undefined\n+ * - \"emptyBlob\": return a new MediaBlob that has the path set, but nothing else.\n+ */\n+ actionIfBlobMissing?: ActionIfBlobMissingAction;\n+}\n+\n+export interface BlobStoreOptions {\n+ defaultStoreOptions?: BlobStoreStoreOptions;\n+\n+ defaultFetchOptions?: BlobStoreFetchOptions;\n+}\n+\n+/**\n+ * A specialized Store that is designed to handle MediaBlobs and use the\n+ * key that is included in the blob to determine exactly how it is stored.\n+ *\n+ * The full details of a MediaBlob may be changed when it is stored.\n+ * For example, it may get additional or different Metadata. This should be\n+ * what is returned when the store() method is called.\n+ *\n+ * Although BlobStore extends BaseStore, not all of the methods from\n+ * BaseStore may be implemented (or even possible). Those that are not\n+ * implemented should be documented and throw an Error if called.\n+ */\n+export abstract class BlobStore extends BaseStore {\n+ lc_namespace = [\"langchain\", \"google-common\"]; // FIXME - What should this be? And why?\n+\n+ defaultStoreOptions: BlobStoreStoreOptions;\n+\n+ defaultFetchOptions: BlobStoreFetchOptions;\n+\n+ constructor(opts?: BlobStoreOptions) {\n+ super(opts);\n+ this.defaultStoreOptions = opts?.defaultStoreOptions ?? {};\n+ this.defaultFetchOptions = opts?.defaultFetchOptions ?? {};\n+ }\n+\n+ protected async _realKey(key: string | MediaBlob): Promise {\n+ return typeof key === \"string\" ? key : await key.asUri();\n+ }\n+\n+ /**\n+ * Is the path set in the MediaBlob supported by this BlobStore?\n+ * Subclasses must implement and evaluate `blob.path` to make this\n+ * determination.\n+ *\n+ * Although this is async, this is expected to be a relatively fast operation\n+ * (ie - you shouldn't make network calls).\n+ *\n+ * The default implementation assumes that undefined blob.paths are invalid\n+ * and then uses the replacePathPrefix (or an empty string) as an assumed path\n+ * to start with.\n+ *\n+ * @param blob The blob to test\n+ * @param opts Any options (if needed) that may be used to determine if it is valid\n+ * @return If the string represented by blob.path is supported.\n+ */\n+ protected _hasValidPath(\n+ blob: MediaBlob,\n+ opts?: BlobStoreStoreOptions\n+ ): Promise {\n+ const path = blob.path ?? \"\";\n+ const prefix = opts?.pathPrefix ?? \"\";\n+ const isPrefixed =\n+ typeof blob.path !== \"undefined\" && path.startsWith(prefix);\n+ return Promise.resolve(isPrefixed);\n+ }\n+\n+ protected _blobPathSuffix(blob: MediaBlob): string {\n+ // Get the path currently set and make sure we treat it as a string\n+ const blobPath = `${blob.path}`;\n+\n+ // Advance past the first set of /\n+ let pathStart = blobPath.indexOf(\"/\") + 1;\n+ while (blobPath.charAt(pathStart) === \"/\") {\n+ pathStart += 1;\n+ }\n+\n+ // We will use the rest as the path for a replacement\n+ return blobPath.substring(pathStart);\n+ }\n+\n+ protected async _newBlob(\n+ oldBlob: MediaBlob,\n+ newPath?: string\n+ ): Promise {\n+ const oldPath = oldBlob.path;\n+ const metadata = oldBlob?.metadata ?? {};\n+ metadata.langchainOldPath = oldPath;\n+ const newBlob = new MediaBlob({\n+ ...oldBlob,\n+ metadata,\n+ });\n+ if (newPath) {\n+ newBlob.path = newPath;\n+ } else if (newBlob.path) {\n+ delete newBlob.path;\n+ }\n+ return newBlob;\n+ }\n+\n+ protected async _validBlobPrefixPath(\n+ blob: MediaBlob,\n+ opts?: BlobStoreStoreOptions\n+ ): Promise {\n+ const prefix = opts?.pathPrefix ?? \"\";\n+ const suffix = this._blobPathSuffix(blob);\n+ const newPath = `${prefix}${suffix}`;\n+ return this._newBlob(blob, newPath);\n+ }\n+\n+ protected async _validBlobPrefixUuid(\n+ blob: MediaBlob,\n+ opts?: BlobStoreStoreOptions\n+ ): Promise {\n+ const prefix = opts?.pathPrefix ?? \"\";\n+ const suffix = uuidv4(); // TODO - option to specify version?\n+ const newPath = `${prefix}${suffix}`;\n+ return this._newBlob(blob, newPath);\n+ }\n+\n+ protected async _validBlobRemovePath(\n+ blob: MediaBlob,\n+ _opts?: BlobStoreStoreOptions\n+ ): Promise {\n+ return this._newBlob(blob, undefined);\n+ }\n+\n+ /**\n+ * Based on the blob and options, return a blob that has a valid path\n+ * that can be saved.\n+ * @param blob\n+ * @param opts\n+ */\n+ protected async _validStoreBlob(\n+ blob: MediaBlob,\n+ opts?: BlobStoreStoreOptions\n+ ): Promise {\n+ if (await this._hasValidPath(blob, opts)) {\n+ return blob;\n+ }\n+ switch (opts?.actionIfInvalid) {\n+ case \"ignore\":\n+ return blob;\n+ case \"prefixPath\":\n+ return this._validBlobPrefixPath(blob, opts);\n+ case \"prefixUuid\":\n+ return this._validBlobPrefixUuid(blob, opts);\n+ case \"removePath\":\n+ return this._validBlobRemovePath(blob, opts);\n+ default:\n+ return undefined;\n+ }\n+ }\n+\n+ async store(\n+ blob: MediaBlob,\n+ opts: BlobStoreStoreOptions = {}\n+ ): Promise {\n+ const allOpts: BlobStoreStoreOptions = {\n+ ...this.defaultStoreOptions,\n+ ...opts,\n+ };\n+ const validBlob = await this._validStoreBlob(blob, allOpts);\n+ if (typeof validBlob !== \"undefined\") {\n+ const validKey = await validBlob.asUri();\n+ await this.mset([[validKey, validBlob]]);\n+ const savedKey = await validBlob.asUri();\n+ return await this.fetch(savedKey);\n+ }\n+ return undefined;\n+ }\n+\n+ protected async _missingFetchBlobEmpty(\n+ path: string,\n+ _opts?: BlobStoreFetchOptions\n+ ): Promise {\n+ return new MediaBlob({ path });\n+ }\n+\n+ protected async _missingFetchBlob(\n+ path: string,\n+ opts?: BlobStoreFetchOptions\n+ ): Promise {\n+ switch (opts?.actionIfBlobMissing) {\n+ case \"emptyBlob\":\n+ return this._missingFetchBlobEmpty(path, opts);\n+ default:\n+ return undefined;\n+ }\n+ }\n+\n+ async fetch(\n+ key: string | MediaBlob,\n+ opts: BlobStoreFetchOptions = {}\n+ ): Promise {\n+ const allOpts: BlobStoreFetchOptions = {\n+ ...this.defaultFetchOptions,\n+ ...opts,\n+ };\n+ const realKey = await this._realKey(key);\n+ const ret = await this.mget([realKey]);\n+ return ret?.[0] ?? (await this._missingFetchBlob(realKey, allOpts));\n+ }\n+}\n+\n+export interface BackedBlobStoreOptions extends BlobStoreOptions {\n+ backingStore: BaseStore;\n+}\n+\n+export class BackedBlobStore extends BlobStore {", + "comment_created_at": "2024-07-09T00:02:08+00:00", + "comment_author": "afirstenberg", + "comment_body": "Yes.\r\n\r\nI use it in tests for `MediaManager` for an `aliasStore` that is backed by a memory store.\r\nI can also see it used in general for either the `aliasStore` or a retriever that will be using some other defined store to store things. I don't want people to need to re-invent the storage methodology to support the `BlobStore` semantics, but I do want to be able to support them.", + "pr_file_module": null + }, + { + "comment_id": "1699469919", + "repo_full_name": "langchain-ai/langchainjs", + "pr_number": 5835, + "pr_file": "libs/langchain-google-common/src/experimental/utils/media_core.ts", + "discussion_id": "1667177345", + "commented_code": "@@ -0,0 +1,504 @@\n+import { v4 as uuidv4 } from \"uuid\";\n+import { BaseStore } from \"@langchain/core/stores\";\n+import { Serializable } from \"@langchain/core/load/serializable\";\n+\n+export interface MediaBlobParameters {\n+ data?: Blob;\n+\n+ metadata?: Record;\n+\n+ path?: string;\n+}\n+\n+/**\n+ * Represents a chunk of data that can be identified by the path where the\n+ * data is (or will be) located, along with optional metadata about the data.\n+ */\n+export class MediaBlob\n+ extends Serializable // FIXME - I'm not sure this serializes or deserializes correctly\n+ implements MediaBlobParameters\n+{\n+ lc_serializable = true;\n+\n+ lc_namespace = [\"langchain\", \"google-common\"]; // FIXME - What should this be? And why?\n+\n+ data?: Blob;\n+\n+ // eslint-disable-next-line @typescript-eslint/no-explicit-any\n+ metadata?: Record;\n+\n+ path?: string;\n+\n+ constructor(params?: MediaBlobParameters) {\n+ super(params);\n+ this.data = params?.data;\n+ this.metadata = params?.metadata;\n+ this.path = params?.path;\n+ }\n+\n+ get size(): number {\n+ return this.data?.size ?? 0;\n+ }\n+\n+ get dataType(): string {\n+ return this.data?.type ?? \"\";\n+ }\n+\n+ get encoding(): string {\n+ const charsetEquals = this.dataType.indexOf(\"charset=\");\n+ return charsetEquals === -1\n+ ? \"utf-8\"\n+ : this.dataType.substring(charsetEquals + 8);\n+ }\n+\n+ get mimetype(): string {\n+ const semicolon = this.dataType.indexOf(\";\");\n+ return semicolon === -1\n+ ? this.dataType\n+ : this.dataType.substring(0, semicolon);\n+ }\n+\n+ async asString(): Promise {\n+ const data = this.data ?? new Blob([]);\n+ const dataBuffer = await data.arrayBuffer();\n+ const dataArray = new Uint8Array(dataBuffer);\n+\n+ // Need to handle the array in smaller chunks to deal with stack size limits\n+ let ret = \"\";\n+ const chunkSize = 102400;\n+ for (let i = 0; i < dataArray.length; i += chunkSize) {\n+ const chunk = dataArray.subarray(i, i + chunkSize);\n+ ret += String.fromCharCode(...chunk);\n+ }\n+\n+ return ret;\n+ }\n+\n+ async asBase64(): Promise {\n+ return btoa(await this.asString());\n+ }\n+\n+ async asDataUrl(): Promise {\n+ return `data:${this.mimetype};base64,${await this.asBase64()}`;\n+ }\n+\n+ async asUri(): Promise {\n+ return this.path ?? (await this.asDataUrl());\n+ }\n+\n+ async encode(): Promise<{ encoded: string; encoding: string }> {\n+ const dataUrl = await this.asDataUrl();\n+ const comma = dataUrl.indexOf(\",\");\n+ const encoded = dataUrl.substring(comma + 1);\n+ const encoding: string = dataUrl.indexOf(\"base64\") > -1 ? \"base64\" : \"8bit\";\n+ return {\n+ encoded,\n+ encoding,\n+ };\n+ }\n+}\n+\n+export type ActionIfInvalidAction =\n+ | \"ignore\"\n+ | \"prefixPath\"\n+ | \"prefixUuid\"\n+ | \"removePath\";\n+\n+export interface BlobStoreStoreOptions {\n+ /**\n+ * If the path is missing or invalid in the blob, how should we create\n+ * a new path?\n+ * Subclasses may define their own methods, but the following are supported\n+ * by default:\n+ * - Undefined or an emtpy string: Reject the blob\n+ * - \"ignore\": Attempt to store it anyway (but this may fail)\n+ * - \"prefixPath\": Use the default prefix for the BlobStore and get the\n+ * unique portion from the URL. The original path is stored in the metadata\n+ * - \"prefixUuid\": Use the default prefix for the BlobStore and get the\n+ * unique portion from a generated UUID. The original path is stored\n+ * in the metadata\n+ */\n+ actionIfInvalid?: ActionIfInvalidAction;\n+\n+ /**\n+ * The expected prefix for URIs that are stored.\n+ * This may be used to test if a MediaBlob is valid and used to create a new\n+ * path if \"prefixPath\" or \"prefixUuid\" is set for actionIfInvalid.\n+ */\n+ pathPrefix?: string;\n+}\n+\n+export type ActionIfBlobMissingAction = \"emptyBlob\";\n+\n+export interface BlobStoreFetchOptions {\n+ /**\n+ * If the blob is not found when fetching, what should we do?\n+ * Subclasses may define their own methods, but the following are supported\n+ * by default:\n+ * - Undefined or an empty string: return undefined\n+ * - \"emptyBlob\": return a new MediaBlob that has the path set, but nothing else.\n+ */\n+ actionIfBlobMissing?: ActionIfBlobMissingAction;\n+}\n+\n+export interface BlobStoreOptions {\n+ defaultStoreOptions?: BlobStoreStoreOptions;\n+\n+ defaultFetchOptions?: BlobStoreFetchOptions;\n+}\n+\n+/**\n+ * A specialized Store that is designed to handle MediaBlobs and use the\n+ * key that is included in the blob to determine exactly how it is stored.\n+ *\n+ * The full details of a MediaBlob may be changed when it is stored.\n+ * For example, it may get additional or different Metadata. This should be\n+ * what is returned when the store() method is called.\n+ *\n+ * Although BlobStore extends BaseStore, not all of the methods from\n+ * BaseStore may be implemented (or even possible). Those that are not\n+ * implemented should be documented and throw an Error if called.\n+ */\n+export abstract class BlobStore extends BaseStore {\n+ lc_namespace = [\"langchain\", \"google-common\"]; // FIXME - What should this be? And why?\n+\n+ defaultStoreOptions: BlobStoreStoreOptions;\n+\n+ defaultFetchOptions: BlobStoreFetchOptions;\n+\n+ constructor(opts?: BlobStoreOptions) {\n+ super(opts);\n+ this.defaultStoreOptions = opts?.defaultStoreOptions ?? {};\n+ this.defaultFetchOptions = opts?.defaultFetchOptions ?? {};\n+ }\n+\n+ protected async _realKey(key: string | MediaBlob): Promise {\n+ return typeof key === \"string\" ? key : await key.asUri();\n+ }\n+\n+ /**\n+ * Is the path set in the MediaBlob supported by this BlobStore?\n+ * Subclasses must implement and evaluate `blob.path` to make this\n+ * determination.\n+ *\n+ * Although this is async, this is expected to be a relatively fast operation\n+ * (ie - you shouldn't make network calls).\n+ *\n+ * The default implementation assumes that undefined blob.paths are invalid\n+ * and then uses the replacePathPrefix (or an empty string) as an assumed path\n+ * to start with.\n+ *\n+ * @param blob The blob to test\n+ * @param opts Any options (if needed) that may be used to determine if it is valid\n+ * @return If the string represented by blob.path is supported.\n+ */\n+ protected _hasValidPath(\n+ blob: MediaBlob,\n+ opts?: BlobStoreStoreOptions\n+ ): Promise {\n+ const path = blob.path ?? \"\";\n+ const prefix = opts?.pathPrefix ?? \"\";\n+ const isPrefixed =\n+ typeof blob.path !== \"undefined\" && path.startsWith(prefix);\n+ return Promise.resolve(isPrefixed);\n+ }\n+\n+ protected _blobPathSuffix(blob: MediaBlob): string {\n+ // Get the path currently set and make sure we treat it as a string\n+ const blobPath = `${blob.path}`;\n+\n+ // Advance past the first set of /\n+ let pathStart = blobPath.indexOf(\"/\") + 1;\n+ while (blobPath.charAt(pathStart) === \"/\") {\n+ pathStart += 1;\n+ }\n+\n+ // We will use the rest as the path for a replacement\n+ return blobPath.substring(pathStart);\n+ }\n+\n+ protected async _newBlob(\n+ oldBlob: MediaBlob,\n+ newPath?: string\n+ ): Promise {\n+ const oldPath = oldBlob.path;\n+ const metadata = oldBlob?.metadata ?? {};\n+ metadata.langchainOldPath = oldPath;\n+ const newBlob = new MediaBlob({\n+ ...oldBlob,\n+ metadata,\n+ });\n+ if (newPath) {\n+ newBlob.path = newPath;\n+ } else if (newBlob.path) {\n+ delete newBlob.path;\n+ }\n+ return newBlob;\n+ }\n+\n+ protected async _validBlobPrefixPath(\n+ blob: MediaBlob,\n+ opts?: BlobStoreStoreOptions\n+ ): Promise {\n+ const prefix = opts?.pathPrefix ?? \"\";\n+ const suffix = this._blobPathSuffix(blob);\n+ const newPath = `${prefix}${suffix}`;\n+ return this._newBlob(blob, newPath);\n+ }\n+\n+ protected async _validBlobPrefixUuid(\n+ blob: MediaBlob,\n+ opts?: BlobStoreStoreOptions\n+ ): Promise {\n+ const prefix = opts?.pathPrefix ?? \"\";\n+ const suffix = uuidv4(); // TODO - option to specify version?\n+ const newPath = `${prefix}${suffix}`;\n+ return this._newBlob(blob, newPath);\n+ }\n+\n+ protected async _validBlobRemovePath(\n+ blob: MediaBlob,\n+ _opts?: BlobStoreStoreOptions\n+ ): Promise {\n+ return this._newBlob(blob, undefined);\n+ }\n+\n+ /**\n+ * Based on the blob and options, return a blob that has a valid path\n+ * that can be saved.\n+ * @param blob\n+ * @param opts\n+ */\n+ protected async _validStoreBlob(\n+ blob: MediaBlob,\n+ opts?: BlobStoreStoreOptions\n+ ): Promise {\n+ if (await this._hasValidPath(blob, opts)) {\n+ return blob;\n+ }\n+ switch (opts?.actionIfInvalid) {\n+ case \"ignore\":\n+ return blob;\n+ case \"prefixPath\":\n+ return this._validBlobPrefixPath(blob, opts);\n+ case \"prefixUuid\":\n+ return this._validBlobPrefixUuid(blob, opts);\n+ case \"removePath\":\n+ return this._validBlobRemovePath(blob, opts);\n+ default:\n+ return undefined;\n+ }\n+ }\n+\n+ async store(\n+ blob: MediaBlob,\n+ opts: BlobStoreStoreOptions = {}\n+ ): Promise {\n+ const allOpts: BlobStoreStoreOptions = {\n+ ...this.defaultStoreOptions,\n+ ...opts,\n+ };\n+ const validBlob = await this._validStoreBlob(blob, allOpts);\n+ if (typeof validBlob !== \"undefined\") {\n+ const validKey = await validBlob.asUri();\n+ await this.mset([[validKey, validBlob]]);\n+ const savedKey = await validBlob.asUri();\n+ return await this.fetch(savedKey);\n+ }\n+ return undefined;\n+ }\n+\n+ protected async _missingFetchBlobEmpty(\n+ path: string,\n+ _opts?: BlobStoreFetchOptions\n+ ): Promise {\n+ return new MediaBlob({ path });\n+ }\n+\n+ protected async _missingFetchBlob(\n+ path: string,\n+ opts?: BlobStoreFetchOptions\n+ ): Promise {\n+ switch (opts?.actionIfBlobMissing) {\n+ case \"emptyBlob\":\n+ return this._missingFetchBlobEmpty(path, opts);\n+ default:\n+ return undefined;\n+ }\n+ }\n+\n+ async fetch(\n+ key: string | MediaBlob,\n+ opts: BlobStoreFetchOptions = {}\n+ ): Promise {\n+ const allOpts: BlobStoreFetchOptions = {\n+ ...this.defaultFetchOptions,\n+ ...opts,\n+ };\n+ const realKey = await this._realKey(key);\n+ const ret = await this.mget([realKey]);\n+ return ret?.[0] ?? (await this._missingFetchBlob(realKey, allOpts));\n+ }\n+}\n+\n+export interface BackedBlobStoreOptions extends BlobStoreOptions {\n+ backingStore: BaseStore;\n+}\n+\n+export class BackedBlobStore extends BlobStore {", + "comment_created_at": "2024-08-01T05:59:07+00:00", + "comment_author": "jacoblee93", + "comment_body": "Why not just interact with instances of `this.backingStore` directly?\r\n\r\nWould rather just init three different instances of it or just have some kind of `.bind()` semantic if absolutely necessary - there's a lot of inheritance already\r\n\r\nIf just used for tests, put in a `utils/testing` file", + "pr_file_module": null + }, + { + "comment_id": "1701014291", + "repo_full_name": "langchain-ai/langchainjs", + "pr_number": 5835, + "pr_file": "libs/langchain-google-common/src/experimental/utils/media_core.ts", + "discussion_id": "1667177345", + "commented_code": "@@ -0,0 +1,504 @@\n+import { v4 as uuidv4 } from \"uuid\";\n+import { BaseStore } from \"@langchain/core/stores\";\n+import { Serializable } from \"@langchain/core/load/serializable\";\n+\n+export interface MediaBlobParameters {\n+ data?: Blob;\n+\n+ metadata?: Record;\n+\n+ path?: string;\n+}\n+\n+/**\n+ * Represents a chunk of data that can be identified by the path where the\n+ * data is (or will be) located, along with optional metadata about the data.\n+ */\n+export class MediaBlob\n+ extends Serializable // FIXME - I'm not sure this serializes or deserializes correctly\n+ implements MediaBlobParameters\n+{\n+ lc_serializable = true;\n+\n+ lc_namespace = [\"langchain\", \"google-common\"]; // FIXME - What should this be? And why?\n+\n+ data?: Blob;\n+\n+ // eslint-disable-next-line @typescript-eslint/no-explicit-any\n+ metadata?: Record;\n+\n+ path?: string;\n+\n+ constructor(params?: MediaBlobParameters) {\n+ super(params);\n+ this.data = params?.data;\n+ this.metadata = params?.metadata;\n+ this.path = params?.path;\n+ }\n+\n+ get size(): number {\n+ return this.data?.size ?? 0;\n+ }\n+\n+ get dataType(): string {\n+ return this.data?.type ?? \"\";\n+ }\n+\n+ get encoding(): string {\n+ const charsetEquals = this.dataType.indexOf(\"charset=\");\n+ return charsetEquals === -1\n+ ? \"utf-8\"\n+ : this.dataType.substring(charsetEquals + 8);\n+ }\n+\n+ get mimetype(): string {\n+ const semicolon = this.dataType.indexOf(\";\");\n+ return semicolon === -1\n+ ? this.dataType\n+ : this.dataType.substring(0, semicolon);\n+ }\n+\n+ async asString(): Promise {\n+ const data = this.data ?? new Blob([]);\n+ const dataBuffer = await data.arrayBuffer();\n+ const dataArray = new Uint8Array(dataBuffer);\n+\n+ // Need to handle the array in smaller chunks to deal with stack size limits\n+ let ret = \"\";\n+ const chunkSize = 102400;\n+ for (let i = 0; i < dataArray.length; i += chunkSize) {\n+ const chunk = dataArray.subarray(i, i + chunkSize);\n+ ret += String.fromCharCode(...chunk);\n+ }\n+\n+ return ret;\n+ }\n+\n+ async asBase64(): Promise {\n+ return btoa(await this.asString());\n+ }\n+\n+ async asDataUrl(): Promise {\n+ return `data:${this.mimetype};base64,${await this.asBase64()}`;\n+ }\n+\n+ async asUri(): Promise {\n+ return this.path ?? (await this.asDataUrl());\n+ }\n+\n+ async encode(): Promise<{ encoded: string; encoding: string }> {\n+ const dataUrl = await this.asDataUrl();\n+ const comma = dataUrl.indexOf(\",\");\n+ const encoded = dataUrl.substring(comma + 1);\n+ const encoding: string = dataUrl.indexOf(\"base64\") > -1 ? \"base64\" : \"8bit\";\n+ return {\n+ encoded,\n+ encoding,\n+ };\n+ }\n+}\n+\n+export type ActionIfInvalidAction =\n+ | \"ignore\"\n+ | \"prefixPath\"\n+ | \"prefixUuid\"\n+ | \"removePath\";\n+\n+export interface BlobStoreStoreOptions {\n+ /**\n+ * If the path is missing or invalid in the blob, how should we create\n+ * a new path?\n+ * Subclasses may define their own methods, but the following are supported\n+ * by default:\n+ * - Undefined or an emtpy string: Reject the blob\n+ * - \"ignore\": Attempt to store it anyway (but this may fail)\n+ * - \"prefixPath\": Use the default prefix for the BlobStore and get the\n+ * unique portion from the URL. The original path is stored in the metadata\n+ * - \"prefixUuid\": Use the default prefix for the BlobStore and get the\n+ * unique portion from a generated UUID. The original path is stored\n+ * in the metadata\n+ */\n+ actionIfInvalid?: ActionIfInvalidAction;\n+\n+ /**\n+ * The expected prefix for URIs that are stored.\n+ * This may be used to test if a MediaBlob is valid and used to create a new\n+ * path if \"prefixPath\" or \"prefixUuid\" is set for actionIfInvalid.\n+ */\n+ pathPrefix?: string;\n+}\n+\n+export type ActionIfBlobMissingAction = \"emptyBlob\";\n+\n+export interface BlobStoreFetchOptions {\n+ /**\n+ * If the blob is not found when fetching, what should we do?\n+ * Subclasses may define their own methods, but the following are supported\n+ * by default:\n+ * - Undefined or an empty string: return undefined\n+ * - \"emptyBlob\": return a new MediaBlob that has the path set, but nothing else.\n+ */\n+ actionIfBlobMissing?: ActionIfBlobMissingAction;\n+}\n+\n+export interface BlobStoreOptions {\n+ defaultStoreOptions?: BlobStoreStoreOptions;\n+\n+ defaultFetchOptions?: BlobStoreFetchOptions;\n+}\n+\n+/**\n+ * A specialized Store that is designed to handle MediaBlobs and use the\n+ * key that is included in the blob to determine exactly how it is stored.\n+ *\n+ * The full details of a MediaBlob may be changed when it is stored.\n+ * For example, it may get additional or different Metadata. This should be\n+ * what is returned when the store() method is called.\n+ *\n+ * Although BlobStore extends BaseStore, not all of the methods from\n+ * BaseStore may be implemented (or even possible). Those that are not\n+ * implemented should be documented and throw an Error if called.\n+ */\n+export abstract class BlobStore extends BaseStore {\n+ lc_namespace = [\"langchain\", \"google-common\"]; // FIXME - What should this be? And why?\n+\n+ defaultStoreOptions: BlobStoreStoreOptions;\n+\n+ defaultFetchOptions: BlobStoreFetchOptions;\n+\n+ constructor(opts?: BlobStoreOptions) {\n+ super(opts);\n+ this.defaultStoreOptions = opts?.defaultStoreOptions ?? {};\n+ this.defaultFetchOptions = opts?.defaultFetchOptions ?? {};\n+ }\n+\n+ protected async _realKey(key: string | MediaBlob): Promise {\n+ return typeof key === \"string\" ? key : await key.asUri();\n+ }\n+\n+ /**\n+ * Is the path set in the MediaBlob supported by this BlobStore?\n+ * Subclasses must implement and evaluate `blob.path` to make this\n+ * determination.\n+ *\n+ * Although this is async, this is expected to be a relatively fast operation\n+ * (ie - you shouldn't make network calls).\n+ *\n+ * The default implementation assumes that undefined blob.paths are invalid\n+ * and then uses the replacePathPrefix (or an empty string) as an assumed path\n+ * to start with.\n+ *\n+ * @param blob The blob to test\n+ * @param opts Any options (if needed) that may be used to determine if it is valid\n+ * @return If the string represented by blob.path is supported.\n+ */\n+ protected _hasValidPath(\n+ blob: MediaBlob,\n+ opts?: BlobStoreStoreOptions\n+ ): Promise {\n+ const path = blob.path ?? \"\";\n+ const prefix = opts?.pathPrefix ?? \"\";\n+ const isPrefixed =\n+ typeof blob.path !== \"undefined\" && path.startsWith(prefix);\n+ return Promise.resolve(isPrefixed);\n+ }\n+\n+ protected _blobPathSuffix(blob: MediaBlob): string {\n+ // Get the path currently set and make sure we treat it as a string\n+ const blobPath = `${blob.path}`;\n+\n+ // Advance past the first set of /\n+ let pathStart = blobPath.indexOf(\"/\") + 1;\n+ while (blobPath.charAt(pathStart) === \"/\") {\n+ pathStart += 1;\n+ }\n+\n+ // We will use the rest as the path for a replacement\n+ return blobPath.substring(pathStart);\n+ }\n+\n+ protected async _newBlob(\n+ oldBlob: MediaBlob,\n+ newPath?: string\n+ ): Promise {\n+ const oldPath = oldBlob.path;\n+ const metadata = oldBlob?.metadata ?? {};\n+ metadata.langchainOldPath = oldPath;\n+ const newBlob = new MediaBlob({\n+ ...oldBlob,\n+ metadata,\n+ });\n+ if (newPath) {\n+ newBlob.path = newPath;\n+ } else if (newBlob.path) {\n+ delete newBlob.path;\n+ }\n+ return newBlob;\n+ }\n+\n+ protected async _validBlobPrefixPath(\n+ blob: MediaBlob,\n+ opts?: BlobStoreStoreOptions\n+ ): Promise {\n+ const prefix = opts?.pathPrefix ?? \"\";\n+ const suffix = this._blobPathSuffix(blob);\n+ const newPath = `${prefix}${suffix}`;\n+ return this._newBlob(blob, newPath);\n+ }\n+\n+ protected async _validBlobPrefixUuid(\n+ blob: MediaBlob,\n+ opts?: BlobStoreStoreOptions\n+ ): Promise {\n+ const prefix = opts?.pathPrefix ?? \"\";\n+ const suffix = uuidv4(); // TODO - option to specify version?\n+ const newPath = `${prefix}${suffix}`;\n+ return this._newBlob(blob, newPath);\n+ }\n+\n+ protected async _validBlobRemovePath(\n+ blob: MediaBlob,\n+ _opts?: BlobStoreStoreOptions\n+ ): Promise {\n+ return this._newBlob(blob, undefined);\n+ }\n+\n+ /**\n+ * Based on the blob and options, return a blob that has a valid path\n+ * that can be saved.\n+ * @param blob\n+ * @param opts\n+ */\n+ protected async _validStoreBlob(\n+ blob: MediaBlob,\n+ opts?: BlobStoreStoreOptions\n+ ): Promise {\n+ if (await this._hasValidPath(blob, opts)) {\n+ return blob;\n+ }\n+ switch (opts?.actionIfInvalid) {\n+ case \"ignore\":\n+ return blob;\n+ case \"prefixPath\":\n+ return this._validBlobPrefixPath(blob, opts);\n+ case \"prefixUuid\":\n+ return this._validBlobPrefixUuid(blob, opts);\n+ case \"removePath\":\n+ return this._validBlobRemovePath(blob, opts);\n+ default:\n+ return undefined;\n+ }\n+ }\n+\n+ async store(\n+ blob: MediaBlob,\n+ opts: BlobStoreStoreOptions = {}\n+ ): Promise {\n+ const allOpts: BlobStoreStoreOptions = {\n+ ...this.defaultStoreOptions,\n+ ...opts,\n+ };\n+ const validBlob = await this._validStoreBlob(blob, allOpts);\n+ if (typeof validBlob !== \"undefined\") {\n+ const validKey = await validBlob.asUri();\n+ await this.mset([[validKey, validBlob]]);\n+ const savedKey = await validBlob.asUri();\n+ return await this.fetch(savedKey);\n+ }\n+ return undefined;\n+ }\n+\n+ protected async _missingFetchBlobEmpty(\n+ path: string,\n+ _opts?: BlobStoreFetchOptions\n+ ): Promise {\n+ return new MediaBlob({ path });\n+ }\n+\n+ protected async _missingFetchBlob(\n+ path: string,\n+ opts?: BlobStoreFetchOptions\n+ ): Promise {\n+ switch (opts?.actionIfBlobMissing) {\n+ case \"emptyBlob\":\n+ return this._missingFetchBlobEmpty(path, opts);\n+ default:\n+ return undefined;\n+ }\n+ }\n+\n+ async fetch(\n+ key: string | MediaBlob,\n+ opts: BlobStoreFetchOptions = {}\n+ ): Promise {\n+ const allOpts: BlobStoreFetchOptions = {\n+ ...this.defaultFetchOptions,\n+ ...opts,\n+ };\n+ const realKey = await this._realKey(key);\n+ const ret = await this.mget([realKey]);\n+ return ret?.[0] ?? (await this._missingFetchBlob(realKey, allOpts));\n+ }\n+}\n+\n+export interface BackedBlobStoreOptions extends BlobStoreOptions {\n+ backingStore: BaseStore;\n+}\n+\n+export class BackedBlobStore extends BlobStore {", + "comment_created_at": "2024-08-01T23:41:41+00:00", + "comment_author": "afirstenberg", + "comment_body": "Because the `BlobStore` versions have different semantics for storing an object (they return values from a set while the `BaseStore` versions don't). So rather than reimplementing all of the `BaseStore` implementations (now and in the future), it guarantees those semantics for other `BaseStore` types. A memory store being the biggest example, but anything that may save to a database or whatever also makes sense.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1695528343", + "pr_number": 6250, + "pr_file": "libs/langchain-aws/src/common.ts", + "created_at": "2024-07-29T16:32:08+00:00", + "commented_code": ".map((msg) => {\n if (msg._getType() === \"ai\") {\n const castMsg = msg as AIMessage;\n if (typeof castMsg.content === \"string\" && castMsg.content !== \"\") {\n if (castMsg.tool_calls && castMsg.tool_calls.length) {", + "repo_full_name": "langchain-ai/langchainjs", + "discussion_comments": [ + { + "comment_id": "1695528343", + "repo_full_name": "langchain-ai/langchainjs", + "pr_number": 6250, + "pr_file": "libs/langchain-aws/src/common.ts", + "discussion_id": "1695528343", + "commented_code": "@@ -82,7 +82,26 @@ export function convertToConverseMessages(messages: BaseMessage[]): {\n .map((msg) => {\n if (msg._getType() === \"ai\") {\n const castMsg = msg as AIMessage;\n- if (typeof castMsg.content === \"string\" && castMsg.content !== \"\") {\n+ if (castMsg.tool_calls && castMsg.tool_calls.length) {", + "comment_created_at": "2024-07-29T16:32:08+00:00", + "comment_author": "bracesproul", + "comment_body": "Instead of mapping over it, calling `.reduce` would likely be a better method, so we can keep all content fields. Could you push up this refactor? If not I can push up in a little", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/langchainjs-chunked-data-processing.md b/_reviewers/langchainjs-chunked-data-processing.md index 3fcb127..0a1b132 100644 --- a/_reviewers/langchainjs-chunked-data-processing.md +++ b/_reviewers/langchainjs-chunked-data-processing.md @@ -41,109 +41,3 @@ async asString(): Promise { ``` Implementing chunked processing improves application stability by preventing stack overflows and can improve performance by managing memory more efficiently. This pattern is especially important when handling user-generated content, large API responses, or binary data operations. - - -[ - { - "discussion_id": "2116365677", - "pr_number": 8282, - "pr_file": "libs/langchain-mcp-adapters/src/types.ts", - "created_at": "2025-05-30T18:08:27+00:00", - "commented_code": "* Zod schema for an individual content item within a CallToolResult.\n * Derived from CallToolResultSchema.\n */\nconst callToolResultContentSchema = CallToolResultSchema.shape.content.element;\n/**\n * Recursively unwrap common Zod wrappers (default, optional, nullable, effects)\n */\nfunction unwrapSchema(schema) {\n const def = schema._def;\n switch (def?.typeName) {\n case ZodDefault.name:\n case ZodOptional.name:\n case ZodNullable.name:\n return unwrapSchema(def.innerType);\n case ZodEffects.name:\n return unwrapSchema(def.schema);\n default:\n return schema;\n }\n}\n\n/**\n * Zod schema for an individual content item within a CallToolResult.\n * Derived from CallToolResultSchema, with wrappers unwrapped.\n */\nconst arraySchema = CallToolResultSchema.shape.content;\nconst callToolResultContentSchema = unwrapSchema(arraySchema).element;", - "repo_full_name": "langchain-ai/langchainjs", - "discussion_comments": [ - { - "comment_id": "2116365677", - "repo_full_name": "langchain-ai/langchainjs", - "pr_number": 8282, - "pr_file": "libs/langchain-mcp-adapters/src/types.ts", - "discussion_id": "2116365677", - "commented_code": "@@ -38,7 +42,29 @@ function isZodLiteral(schema: unknown): schema is ZodLiteral {\n * Zod schema for an individual content item within a CallToolResult.\n * Derived from CallToolResultSchema.\n */\n-const callToolResultContentSchema = CallToolResultSchema.shape.content.element;\n+/**\n+ * Recursively unwrap common Zod wrappers (default, optional, nullable, effects)\n+ */\n+function unwrapSchema(schema) {\n+ const def = schema._def;\n+ switch (def?.typeName) {\n+ case ZodDefault.name:\n+ case ZodOptional.name:\n+ case ZodNullable.name:\n+ return unwrapSchema(def.innerType);\n+ case ZodEffects.name:\n+ return unwrapSchema(def.schema);\n+ default:\n+ return schema;\n+ }\n+}\n+\n+/**\n+ * Zod schema for an individual content item within a CallToolResult.\n+ * Derived from CallToolResultSchema, with wrappers unwrapped.\n+ */\n+const arraySchema = CallToolResultSchema.shape.content;\n+const callToolResultContentSchema = unwrapSchema(arraySchema).element;", - "comment_created_at": "2025-05-30T18:08:27+00:00", - "comment_author": "hntrl", - "comment_body": "```suggestion\r\nfunction isZodDefault(\r\n schema: unknown\r\n): schema is ZodDefault {\r\n return (\r\n typeof schema === \"object\" &&\r\n schema !== null &&\r\n \"_def\" in schema &&\r\n (schema as { _def: { typeName: string } })._def.typeName === \"ZodDefault\"\r\n );\r\n}\r\n\r\nfunction unwrapZodArray(\r\n schema: ZodArray | ZodDefault>\r\n): TElement {\r\n if (isZodDefault(schema)) {\r\n return schema._def.innerType.element;\r\n }\r\n return schema.element;\r\n}\r\n\r\n/**\r\n * Zod schema for an individual content item within a CallToolResult.\r\n * Derived from CallToolResultSchema, with wrappers unwrapped.\r\n */\r\nconst callToolResultContentSchema = unwrapZodArray(\r\n CallToolResultSchema.shape.content\r\n);\r\n```", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1667177345", - "pr_number": 5835, - "pr_file": "libs/langchain-google-common/src/experimental/utils/media_core.ts", - "created_at": "2024-07-05T21:46:14+00:00", - "commented_code": "import { v4 as uuidv4 } from \"uuid\";\nimport { BaseStore } from \"@langchain/core/stores\";\nimport { Serializable } from \"@langchain/core/load/serializable\";\n\nexport interface MediaBlobParameters {\n data?: Blob;\n\n metadata?: Record;\n\n path?: string;\n}\n\n/**\n * Represents a chunk of data that can be identified by the path where the\n * data is (or will be) located, along with optional metadata about the data.\n */\nexport class MediaBlob\n extends Serializable // FIXME - I'm not sure this serializes or deserializes correctly\n implements MediaBlobParameters\n{\n lc_serializable = true;\n\n lc_namespace = [\"langchain\", \"google-common\"]; // FIXME - What should this be? And why?\n\n data?: Blob;\n\n // eslint-disable-next-line @typescript-eslint/no-explicit-any\n metadata?: Record;\n\n path?: string;\n\n constructor(params?: MediaBlobParameters) {\n super(params);\n this.data = params?.data;\n this.metadata = params?.metadata;\n this.path = params?.path;\n }\n\n get size(): number {\n return this.data?.size ?? 0;\n }\n\n get dataType(): string {\n return this.data?.type ?? \"\";\n }\n\n get encoding(): string {\n const charsetEquals = this.dataType.indexOf(\"charset=\");\n return charsetEquals === -1\n ? \"utf-8\"\n : this.dataType.substring(charsetEquals + 8);\n }\n\n get mimetype(): string {\n const semicolon = this.dataType.indexOf(\";\");\n return semicolon === -1\n ? this.dataType\n : this.dataType.substring(0, semicolon);\n }\n\n async asString(): Promise {\n const data = this.data ?? new Blob([]);\n const dataBuffer = await data.arrayBuffer();\n const dataArray = new Uint8Array(dataBuffer);\n\n // Need to handle the array in smaller chunks to deal with stack size limits\n let ret = \"\";\n const chunkSize = 102400;\n for (let i = 0; i < dataArray.length; i += chunkSize) {\n const chunk = dataArray.subarray(i, i + chunkSize);\n ret += String.fromCharCode(...chunk);\n }\n\n return ret;\n }\n\n async asBase64(): Promise {\n return btoa(await this.asString());\n }\n\n async asDataUrl(): Promise {\n return `data:${this.mimetype};base64,${await this.asBase64()}`;\n }\n\n async asUri(): Promise {\n return this.path ?? (await this.asDataUrl());\n }\n\n async encode(): Promise<{ encoded: string; encoding: string }> {\n const dataUrl = await this.asDataUrl();\n const comma = dataUrl.indexOf(\",\");\n const encoded = dataUrl.substring(comma + 1);\n const encoding: string = dataUrl.indexOf(\"base64\") > -1 ? \"base64\" : \"8bit\";\n return {\n encoded,\n encoding,\n };\n }\n}\n\nexport type ActionIfInvalidAction =\n | \"ignore\"\n | \"prefixPath\"\n | \"prefixUuid\"\n | \"removePath\";\n\nexport interface BlobStoreStoreOptions {\n /**\n * If the path is missing or invalid in the blob, how should we create\n * a new path?\n * Subclasses may define their own methods, but the following are supported\n * by default:\n * - Undefined or an emtpy string: Reject the blob\n * - \"ignore\": Attempt to store it anyway (but this may fail)\n * - \"prefixPath\": Use the default prefix for the BlobStore and get the\n * unique portion from the URL. The original path is stored in the metadata\n * - \"prefixUuid\": Use the default prefix for the BlobStore and get the\n * unique portion from a generated UUID. The original path is stored\n * in the metadata\n */\n actionIfInvalid?: ActionIfInvalidAction;\n\n /**\n * The expected prefix for URIs that are stored.\n * This may be used to test if a MediaBlob is valid and used to create a new\n * path if \"prefixPath\" or \"prefixUuid\" is set for actionIfInvalid.\n */\n pathPrefix?: string;\n}\n\nexport type ActionIfBlobMissingAction = \"emptyBlob\";\n\nexport interface BlobStoreFetchOptions {\n /**\n * If the blob is not found when fetching, what should we do?\n * Subclasses may define their own methods, but the following are supported\n * by default:\n * - Undefined or an empty string: return undefined\n * - \"emptyBlob\": return a new MediaBlob that has the path set, but nothing else.\n */\n actionIfBlobMissing?: ActionIfBlobMissingAction;\n}\n\nexport interface BlobStoreOptions {\n defaultStoreOptions?: BlobStoreStoreOptions;\n\n defaultFetchOptions?: BlobStoreFetchOptions;\n}\n\n/**\n * A specialized Store that is designed to handle MediaBlobs and use the\n * key that is included in the blob to determine exactly how it is stored.\n *\n * The full details of a MediaBlob may be changed when it is stored.\n * For example, it may get additional or different Metadata. This should be\n * what is returned when the store() method is called.\n *\n * Although BlobStore extends BaseStore, not all of the methods from\n * BaseStore may be implemented (or even possible). Those that are not\n * implemented should be documented and throw an Error if called.\n */\nexport abstract class BlobStore extends BaseStore {\n lc_namespace = [\"langchain\", \"google-common\"]; // FIXME - What should this be? And why?\n\n defaultStoreOptions: BlobStoreStoreOptions;\n\n defaultFetchOptions: BlobStoreFetchOptions;\n\n constructor(opts?: BlobStoreOptions) {\n super(opts);\n this.defaultStoreOptions = opts?.defaultStoreOptions ?? {};\n this.defaultFetchOptions = opts?.defaultFetchOptions ?? {};\n }\n\n protected async _realKey(key: string | MediaBlob): Promise {\n return typeof key === \"string\" ? key : await key.asUri();\n }\n\n /**\n * Is the path set in the MediaBlob supported by this BlobStore?\n * Subclasses must implement and evaluate `blob.path` to make this\n * determination.\n *\n * Although this is async, this is expected to be a relatively fast operation\n * (ie - you shouldn't make network calls).\n *\n * The default implementation assumes that undefined blob.paths are invalid\n * and then uses the replacePathPrefix (or an empty string) as an assumed path\n * to start with.\n *\n * @param blob The blob to test\n * @param opts Any options (if needed) that may be used to determine if it is valid\n * @return If the string represented by blob.path is supported.\n */\n protected _hasValidPath(\n blob: MediaBlob,\n opts?: BlobStoreStoreOptions\n ): Promise {\n const path = blob.path ?? \"\";\n const prefix = opts?.pathPrefix ?? \"\";\n const isPrefixed =\n typeof blob.path !== \"undefined\" && path.startsWith(prefix);\n return Promise.resolve(isPrefixed);\n }\n\n protected _blobPathSuffix(blob: MediaBlob): string {\n // Get the path currently set and make sure we treat it as a string\n const blobPath = `${blob.path}`;\n\n // Advance past the first set of /\n let pathStart = blobPath.indexOf(\"/\") + 1;\n while (blobPath.charAt(pathStart) === \"/\") {\n pathStart += 1;\n }\n\n // We will use the rest as the path for a replacement\n return blobPath.substring(pathStart);\n }\n\n protected async _newBlob(\n oldBlob: MediaBlob,\n newPath?: string\n ): Promise {\n const oldPath = oldBlob.path;\n const metadata = oldBlob?.metadata ?? {};\n metadata.langchainOldPath = oldPath;\n const newBlob = new MediaBlob({\n ...oldBlob,\n metadata,\n });\n if (newPath) {\n newBlob.path = newPath;\n } else if (newBlob.path) {\n delete newBlob.path;\n }\n return newBlob;\n }\n\n protected async _validBlobPrefixPath(\n blob: MediaBlob,\n opts?: BlobStoreStoreOptions\n ): Promise {\n const prefix = opts?.pathPrefix ?? \"\";\n const suffix = this._blobPathSuffix(blob);\n const newPath = `${prefix}${suffix}`;\n return this._newBlob(blob, newPath);\n }\n\n protected async _validBlobPrefixUuid(\n blob: MediaBlob,\n opts?: BlobStoreStoreOptions\n ): Promise {\n const prefix = opts?.pathPrefix ?? \"\";\n const suffix = uuidv4(); // TODO - option to specify version?\n const newPath = `${prefix}${suffix}`;\n return this._newBlob(blob, newPath);\n }\n\n protected async _validBlobRemovePath(\n blob: MediaBlob,\n _opts?: BlobStoreStoreOptions\n ): Promise {\n return this._newBlob(blob, undefined);\n }\n\n /**\n * Based on the blob and options, return a blob that has a valid path\n * that can be saved.\n * @param blob\n * @param opts\n */\n protected async _validStoreBlob(\n blob: MediaBlob,\n opts?: BlobStoreStoreOptions\n ): Promise {\n if (await this._hasValidPath(blob, opts)) {\n return blob;\n }\n switch (opts?.actionIfInvalid) {\n case \"ignore\":\n return blob;\n case \"prefixPath\":\n return this._validBlobPrefixPath(blob, opts);\n case \"prefixUuid\":\n return this._validBlobPrefixUuid(blob, opts);\n case \"removePath\":\n return this._validBlobRemovePath(blob, opts);\n default:\n return undefined;\n }\n }\n\n async store(\n blob: MediaBlob,\n opts: BlobStoreStoreOptions = {}\n ): Promise {\n const allOpts: BlobStoreStoreOptions = {\n ...this.defaultStoreOptions,\n ...opts,\n };\n const validBlob = await this._validStoreBlob(blob, allOpts);\n if (typeof validBlob !== \"undefined\") {\n const validKey = await validBlob.asUri();\n await this.mset([[validKey, validBlob]]);\n const savedKey = await validBlob.asUri();\n return await this.fetch(savedKey);\n }\n return undefined;\n }\n\n protected async _missingFetchBlobEmpty(\n path: string,\n _opts?: BlobStoreFetchOptions\n ): Promise {\n return new MediaBlob({ path });\n }\n\n protected async _missingFetchBlob(\n path: string,\n opts?: BlobStoreFetchOptions\n ): Promise {\n switch (opts?.actionIfBlobMissing) {\n case \"emptyBlob\":\n return this._missingFetchBlobEmpty(path, opts);\n default:\n return undefined;\n }\n }\n\n async fetch(\n key: string | MediaBlob,\n opts: BlobStoreFetchOptions = {}\n ): Promise {\n const allOpts: BlobStoreFetchOptions = {\n ...this.defaultFetchOptions,\n ...opts,\n };\n const realKey = await this._realKey(key);\n const ret = await this.mget([realKey]);\n return ret?.[0] ?? (await this._missingFetchBlob(realKey, allOpts));\n }\n}\n\nexport interface BackedBlobStoreOptions extends BlobStoreOptions {\n backingStore: BaseStore;\n}\n\nexport class BackedBlobStore extends BlobStore {", - "repo_full_name": "langchain-ai/langchainjs", - "discussion_comments": [ - { - "comment_id": "1667177345", - "repo_full_name": "langchain-ai/langchainjs", - "pr_number": 5835, - "pr_file": "libs/langchain-google-common/src/experimental/utils/media_core.ts", - "discussion_id": "1667177345", - "commented_code": "@@ -0,0 +1,504 @@\n+import { v4 as uuidv4 } from \"uuid\";\n+import { BaseStore } from \"@langchain/core/stores\";\n+import { Serializable } from \"@langchain/core/load/serializable\";\n+\n+export interface MediaBlobParameters {\n+ data?: Blob;\n+\n+ metadata?: Record;\n+\n+ path?: string;\n+}\n+\n+/**\n+ * Represents a chunk of data that can be identified by the path where the\n+ * data is (or will be) located, along with optional metadata about the data.\n+ */\n+export class MediaBlob\n+ extends Serializable // FIXME - I'm not sure this serializes or deserializes correctly\n+ implements MediaBlobParameters\n+{\n+ lc_serializable = true;\n+\n+ lc_namespace = [\"langchain\", \"google-common\"]; // FIXME - What should this be? And why?\n+\n+ data?: Blob;\n+\n+ // eslint-disable-next-line @typescript-eslint/no-explicit-any\n+ metadata?: Record;\n+\n+ path?: string;\n+\n+ constructor(params?: MediaBlobParameters) {\n+ super(params);\n+ this.data = params?.data;\n+ this.metadata = params?.metadata;\n+ this.path = params?.path;\n+ }\n+\n+ get size(): number {\n+ return this.data?.size ?? 0;\n+ }\n+\n+ get dataType(): string {\n+ return this.data?.type ?? \"\";\n+ }\n+\n+ get encoding(): string {\n+ const charsetEquals = this.dataType.indexOf(\"charset=\");\n+ return charsetEquals === -1\n+ ? \"utf-8\"\n+ : this.dataType.substring(charsetEquals + 8);\n+ }\n+\n+ get mimetype(): string {\n+ const semicolon = this.dataType.indexOf(\";\");\n+ return semicolon === -1\n+ ? this.dataType\n+ : this.dataType.substring(0, semicolon);\n+ }\n+\n+ async asString(): Promise {\n+ const data = this.data ?? new Blob([]);\n+ const dataBuffer = await data.arrayBuffer();\n+ const dataArray = new Uint8Array(dataBuffer);\n+\n+ // Need to handle the array in smaller chunks to deal with stack size limits\n+ let ret = \"\";\n+ const chunkSize = 102400;\n+ for (let i = 0; i < dataArray.length; i += chunkSize) {\n+ const chunk = dataArray.subarray(i, i + chunkSize);\n+ ret += String.fromCharCode(...chunk);\n+ }\n+\n+ return ret;\n+ }\n+\n+ async asBase64(): Promise {\n+ return btoa(await this.asString());\n+ }\n+\n+ async asDataUrl(): Promise {\n+ return `data:${this.mimetype};base64,${await this.asBase64()}`;\n+ }\n+\n+ async asUri(): Promise {\n+ return this.path ?? (await this.asDataUrl());\n+ }\n+\n+ async encode(): Promise<{ encoded: string; encoding: string }> {\n+ const dataUrl = await this.asDataUrl();\n+ const comma = dataUrl.indexOf(\",\");\n+ const encoded = dataUrl.substring(comma + 1);\n+ const encoding: string = dataUrl.indexOf(\"base64\") > -1 ? \"base64\" : \"8bit\";\n+ return {\n+ encoded,\n+ encoding,\n+ };\n+ }\n+}\n+\n+export type ActionIfInvalidAction =\n+ | \"ignore\"\n+ | \"prefixPath\"\n+ | \"prefixUuid\"\n+ | \"removePath\";\n+\n+export interface BlobStoreStoreOptions {\n+ /**\n+ * If the path is missing or invalid in the blob, how should we create\n+ * a new path?\n+ * Subclasses may define their own methods, but the following are supported\n+ * by default:\n+ * - Undefined or an emtpy string: Reject the blob\n+ * - \"ignore\": Attempt to store it anyway (but this may fail)\n+ * - \"prefixPath\": Use the default prefix for the BlobStore and get the\n+ * unique portion from the URL. The original path is stored in the metadata\n+ * - \"prefixUuid\": Use the default prefix for the BlobStore and get the\n+ * unique portion from a generated UUID. The original path is stored\n+ * in the metadata\n+ */\n+ actionIfInvalid?: ActionIfInvalidAction;\n+\n+ /**\n+ * The expected prefix for URIs that are stored.\n+ * This may be used to test if a MediaBlob is valid and used to create a new\n+ * path if \"prefixPath\" or \"prefixUuid\" is set for actionIfInvalid.\n+ */\n+ pathPrefix?: string;\n+}\n+\n+export type ActionIfBlobMissingAction = \"emptyBlob\";\n+\n+export interface BlobStoreFetchOptions {\n+ /**\n+ * If the blob is not found when fetching, what should we do?\n+ * Subclasses may define their own methods, but the following are supported\n+ * by default:\n+ * - Undefined or an empty string: return undefined\n+ * - \"emptyBlob\": return a new MediaBlob that has the path set, but nothing else.\n+ */\n+ actionIfBlobMissing?: ActionIfBlobMissingAction;\n+}\n+\n+export interface BlobStoreOptions {\n+ defaultStoreOptions?: BlobStoreStoreOptions;\n+\n+ defaultFetchOptions?: BlobStoreFetchOptions;\n+}\n+\n+/**\n+ * A specialized Store that is designed to handle MediaBlobs and use the\n+ * key that is included in the blob to determine exactly how it is stored.\n+ *\n+ * The full details of a MediaBlob may be changed when it is stored.\n+ * For example, it may get additional or different Metadata. This should be\n+ * what is returned when the store() method is called.\n+ *\n+ * Although BlobStore extends BaseStore, not all of the methods from\n+ * BaseStore may be implemented (or even possible). Those that are not\n+ * implemented should be documented and throw an Error if called.\n+ */\n+export abstract class BlobStore extends BaseStore {\n+ lc_namespace = [\"langchain\", \"google-common\"]; // FIXME - What should this be? And why?\n+\n+ defaultStoreOptions: BlobStoreStoreOptions;\n+\n+ defaultFetchOptions: BlobStoreFetchOptions;\n+\n+ constructor(opts?: BlobStoreOptions) {\n+ super(opts);\n+ this.defaultStoreOptions = opts?.defaultStoreOptions ?? {};\n+ this.defaultFetchOptions = opts?.defaultFetchOptions ?? {};\n+ }\n+\n+ protected async _realKey(key: string | MediaBlob): Promise {\n+ return typeof key === \"string\" ? key : await key.asUri();\n+ }\n+\n+ /**\n+ * Is the path set in the MediaBlob supported by this BlobStore?\n+ * Subclasses must implement and evaluate `blob.path` to make this\n+ * determination.\n+ *\n+ * Although this is async, this is expected to be a relatively fast operation\n+ * (ie - you shouldn't make network calls).\n+ *\n+ * The default implementation assumes that undefined blob.paths are invalid\n+ * and then uses the replacePathPrefix (or an empty string) as an assumed path\n+ * to start with.\n+ *\n+ * @param blob The blob to test\n+ * @param opts Any options (if needed) that may be used to determine if it is valid\n+ * @return If the string represented by blob.path is supported.\n+ */\n+ protected _hasValidPath(\n+ blob: MediaBlob,\n+ opts?: BlobStoreStoreOptions\n+ ): Promise {\n+ const path = blob.path ?? \"\";\n+ const prefix = opts?.pathPrefix ?? \"\";\n+ const isPrefixed =\n+ typeof blob.path !== \"undefined\" && path.startsWith(prefix);\n+ return Promise.resolve(isPrefixed);\n+ }\n+\n+ protected _blobPathSuffix(blob: MediaBlob): string {\n+ // Get the path currently set and make sure we treat it as a string\n+ const blobPath = `${blob.path}`;\n+\n+ // Advance past the first set of /\n+ let pathStart = blobPath.indexOf(\"/\") + 1;\n+ while (blobPath.charAt(pathStart) === \"/\") {\n+ pathStart += 1;\n+ }\n+\n+ // We will use the rest as the path for a replacement\n+ return blobPath.substring(pathStart);\n+ }\n+\n+ protected async _newBlob(\n+ oldBlob: MediaBlob,\n+ newPath?: string\n+ ): Promise {\n+ const oldPath = oldBlob.path;\n+ const metadata = oldBlob?.metadata ?? {};\n+ metadata.langchainOldPath = oldPath;\n+ const newBlob = new MediaBlob({\n+ ...oldBlob,\n+ metadata,\n+ });\n+ if (newPath) {\n+ newBlob.path = newPath;\n+ } else if (newBlob.path) {\n+ delete newBlob.path;\n+ }\n+ return newBlob;\n+ }\n+\n+ protected async _validBlobPrefixPath(\n+ blob: MediaBlob,\n+ opts?: BlobStoreStoreOptions\n+ ): Promise {\n+ const prefix = opts?.pathPrefix ?? \"\";\n+ const suffix = this._blobPathSuffix(blob);\n+ const newPath = `${prefix}${suffix}`;\n+ return this._newBlob(blob, newPath);\n+ }\n+\n+ protected async _validBlobPrefixUuid(\n+ blob: MediaBlob,\n+ opts?: BlobStoreStoreOptions\n+ ): Promise {\n+ const prefix = opts?.pathPrefix ?? \"\";\n+ const suffix = uuidv4(); // TODO - option to specify version?\n+ const newPath = `${prefix}${suffix}`;\n+ return this._newBlob(blob, newPath);\n+ }\n+\n+ protected async _validBlobRemovePath(\n+ blob: MediaBlob,\n+ _opts?: BlobStoreStoreOptions\n+ ): Promise {\n+ return this._newBlob(blob, undefined);\n+ }\n+\n+ /**\n+ * Based on the blob and options, return a blob that has a valid path\n+ * that can be saved.\n+ * @param blob\n+ * @param opts\n+ */\n+ protected async _validStoreBlob(\n+ blob: MediaBlob,\n+ opts?: BlobStoreStoreOptions\n+ ): Promise {\n+ if (await this._hasValidPath(blob, opts)) {\n+ return blob;\n+ }\n+ switch (opts?.actionIfInvalid) {\n+ case \"ignore\":\n+ return blob;\n+ case \"prefixPath\":\n+ return this._validBlobPrefixPath(blob, opts);\n+ case \"prefixUuid\":\n+ return this._validBlobPrefixUuid(blob, opts);\n+ case \"removePath\":\n+ return this._validBlobRemovePath(blob, opts);\n+ default:\n+ return undefined;\n+ }\n+ }\n+\n+ async store(\n+ blob: MediaBlob,\n+ opts: BlobStoreStoreOptions = {}\n+ ): Promise {\n+ const allOpts: BlobStoreStoreOptions = {\n+ ...this.defaultStoreOptions,\n+ ...opts,\n+ };\n+ const validBlob = await this._validStoreBlob(blob, allOpts);\n+ if (typeof validBlob !== \"undefined\") {\n+ const validKey = await validBlob.asUri();\n+ await this.mset([[validKey, validBlob]]);\n+ const savedKey = await validBlob.asUri();\n+ return await this.fetch(savedKey);\n+ }\n+ return undefined;\n+ }\n+\n+ protected async _missingFetchBlobEmpty(\n+ path: string,\n+ _opts?: BlobStoreFetchOptions\n+ ): Promise {\n+ return new MediaBlob({ path });\n+ }\n+\n+ protected async _missingFetchBlob(\n+ path: string,\n+ opts?: BlobStoreFetchOptions\n+ ): Promise {\n+ switch (opts?.actionIfBlobMissing) {\n+ case \"emptyBlob\":\n+ return this._missingFetchBlobEmpty(path, opts);\n+ default:\n+ return undefined;\n+ }\n+ }\n+\n+ async fetch(\n+ key: string | MediaBlob,\n+ opts: BlobStoreFetchOptions = {}\n+ ): Promise {\n+ const allOpts: BlobStoreFetchOptions = {\n+ ...this.defaultFetchOptions,\n+ ...opts,\n+ };\n+ const realKey = await this._realKey(key);\n+ const ret = await this.mget([realKey]);\n+ return ret?.[0] ?? (await this._missingFetchBlob(realKey, allOpts));\n+ }\n+}\n+\n+export interface BackedBlobStoreOptions extends BlobStoreOptions {\n+ backingStore: BaseStore;\n+}\n+\n+export class BackedBlobStore extends BlobStore {", - "comment_created_at": "2024-07-05T21:46:14+00:00", - "comment_author": "eyurtsev", - "comment_body": "Is this abstraction used?", - "pr_file_module": null - }, - { - "comment_id": "1669461213", - "repo_full_name": "langchain-ai/langchainjs", - "pr_number": 5835, - "pr_file": "libs/langchain-google-common/src/experimental/utils/media_core.ts", - "discussion_id": "1667177345", - "commented_code": "@@ -0,0 +1,504 @@\n+import { v4 as uuidv4 } from \"uuid\";\n+import { BaseStore } from \"@langchain/core/stores\";\n+import { Serializable } from \"@langchain/core/load/serializable\";\n+\n+export interface MediaBlobParameters {\n+ data?: Blob;\n+\n+ metadata?: Record;\n+\n+ path?: string;\n+}\n+\n+/**\n+ * Represents a chunk of data that can be identified by the path where the\n+ * data is (or will be) located, along with optional metadata about the data.\n+ */\n+export class MediaBlob\n+ extends Serializable // FIXME - I'm not sure this serializes or deserializes correctly\n+ implements MediaBlobParameters\n+{\n+ lc_serializable = true;\n+\n+ lc_namespace = [\"langchain\", \"google-common\"]; // FIXME - What should this be? And why?\n+\n+ data?: Blob;\n+\n+ // eslint-disable-next-line @typescript-eslint/no-explicit-any\n+ metadata?: Record;\n+\n+ path?: string;\n+\n+ constructor(params?: MediaBlobParameters) {\n+ super(params);\n+ this.data = params?.data;\n+ this.metadata = params?.metadata;\n+ this.path = params?.path;\n+ }\n+\n+ get size(): number {\n+ return this.data?.size ?? 0;\n+ }\n+\n+ get dataType(): string {\n+ return this.data?.type ?? \"\";\n+ }\n+\n+ get encoding(): string {\n+ const charsetEquals = this.dataType.indexOf(\"charset=\");\n+ return charsetEquals === -1\n+ ? \"utf-8\"\n+ : this.dataType.substring(charsetEquals + 8);\n+ }\n+\n+ get mimetype(): string {\n+ const semicolon = this.dataType.indexOf(\";\");\n+ return semicolon === -1\n+ ? this.dataType\n+ : this.dataType.substring(0, semicolon);\n+ }\n+\n+ async asString(): Promise {\n+ const data = this.data ?? new Blob([]);\n+ const dataBuffer = await data.arrayBuffer();\n+ const dataArray = new Uint8Array(dataBuffer);\n+\n+ // Need to handle the array in smaller chunks to deal with stack size limits\n+ let ret = \"\";\n+ const chunkSize = 102400;\n+ for (let i = 0; i < dataArray.length; i += chunkSize) {\n+ const chunk = dataArray.subarray(i, i + chunkSize);\n+ ret += String.fromCharCode(...chunk);\n+ }\n+\n+ return ret;\n+ }\n+\n+ async asBase64(): Promise {\n+ return btoa(await this.asString());\n+ }\n+\n+ async asDataUrl(): Promise {\n+ return `data:${this.mimetype};base64,${await this.asBase64()}`;\n+ }\n+\n+ async asUri(): Promise {\n+ return this.path ?? (await this.asDataUrl());\n+ }\n+\n+ async encode(): Promise<{ encoded: string; encoding: string }> {\n+ const dataUrl = await this.asDataUrl();\n+ const comma = dataUrl.indexOf(\",\");\n+ const encoded = dataUrl.substring(comma + 1);\n+ const encoding: string = dataUrl.indexOf(\"base64\") > -1 ? \"base64\" : \"8bit\";\n+ return {\n+ encoded,\n+ encoding,\n+ };\n+ }\n+}\n+\n+export type ActionIfInvalidAction =\n+ | \"ignore\"\n+ | \"prefixPath\"\n+ | \"prefixUuid\"\n+ | \"removePath\";\n+\n+export interface BlobStoreStoreOptions {\n+ /**\n+ * If the path is missing or invalid in the blob, how should we create\n+ * a new path?\n+ * Subclasses may define their own methods, but the following are supported\n+ * by default:\n+ * - Undefined or an emtpy string: Reject the blob\n+ * - \"ignore\": Attempt to store it anyway (but this may fail)\n+ * - \"prefixPath\": Use the default prefix for the BlobStore and get the\n+ * unique portion from the URL. The original path is stored in the metadata\n+ * - \"prefixUuid\": Use the default prefix for the BlobStore and get the\n+ * unique portion from a generated UUID. The original path is stored\n+ * in the metadata\n+ */\n+ actionIfInvalid?: ActionIfInvalidAction;\n+\n+ /**\n+ * The expected prefix for URIs that are stored.\n+ * This may be used to test if a MediaBlob is valid and used to create a new\n+ * path if \"prefixPath\" or \"prefixUuid\" is set for actionIfInvalid.\n+ */\n+ pathPrefix?: string;\n+}\n+\n+export type ActionIfBlobMissingAction = \"emptyBlob\";\n+\n+export interface BlobStoreFetchOptions {\n+ /**\n+ * If the blob is not found when fetching, what should we do?\n+ * Subclasses may define their own methods, but the following are supported\n+ * by default:\n+ * - Undefined or an empty string: return undefined\n+ * - \"emptyBlob\": return a new MediaBlob that has the path set, but nothing else.\n+ */\n+ actionIfBlobMissing?: ActionIfBlobMissingAction;\n+}\n+\n+export interface BlobStoreOptions {\n+ defaultStoreOptions?: BlobStoreStoreOptions;\n+\n+ defaultFetchOptions?: BlobStoreFetchOptions;\n+}\n+\n+/**\n+ * A specialized Store that is designed to handle MediaBlobs and use the\n+ * key that is included in the blob to determine exactly how it is stored.\n+ *\n+ * The full details of a MediaBlob may be changed when it is stored.\n+ * For example, it may get additional or different Metadata. This should be\n+ * what is returned when the store() method is called.\n+ *\n+ * Although BlobStore extends BaseStore, not all of the methods from\n+ * BaseStore may be implemented (or even possible). Those that are not\n+ * implemented should be documented and throw an Error if called.\n+ */\n+export abstract class BlobStore extends BaseStore {\n+ lc_namespace = [\"langchain\", \"google-common\"]; // FIXME - What should this be? And why?\n+\n+ defaultStoreOptions: BlobStoreStoreOptions;\n+\n+ defaultFetchOptions: BlobStoreFetchOptions;\n+\n+ constructor(opts?: BlobStoreOptions) {\n+ super(opts);\n+ this.defaultStoreOptions = opts?.defaultStoreOptions ?? {};\n+ this.defaultFetchOptions = opts?.defaultFetchOptions ?? {};\n+ }\n+\n+ protected async _realKey(key: string | MediaBlob): Promise {\n+ return typeof key === \"string\" ? key : await key.asUri();\n+ }\n+\n+ /**\n+ * Is the path set in the MediaBlob supported by this BlobStore?\n+ * Subclasses must implement and evaluate `blob.path` to make this\n+ * determination.\n+ *\n+ * Although this is async, this is expected to be a relatively fast operation\n+ * (ie - you shouldn't make network calls).\n+ *\n+ * The default implementation assumes that undefined blob.paths are invalid\n+ * and then uses the replacePathPrefix (or an empty string) as an assumed path\n+ * to start with.\n+ *\n+ * @param blob The blob to test\n+ * @param opts Any options (if needed) that may be used to determine if it is valid\n+ * @return If the string represented by blob.path is supported.\n+ */\n+ protected _hasValidPath(\n+ blob: MediaBlob,\n+ opts?: BlobStoreStoreOptions\n+ ): Promise {\n+ const path = blob.path ?? \"\";\n+ const prefix = opts?.pathPrefix ?? \"\";\n+ const isPrefixed =\n+ typeof blob.path !== \"undefined\" && path.startsWith(prefix);\n+ return Promise.resolve(isPrefixed);\n+ }\n+\n+ protected _blobPathSuffix(blob: MediaBlob): string {\n+ // Get the path currently set and make sure we treat it as a string\n+ const blobPath = `${blob.path}`;\n+\n+ // Advance past the first set of /\n+ let pathStart = blobPath.indexOf(\"/\") + 1;\n+ while (blobPath.charAt(pathStart) === \"/\") {\n+ pathStart += 1;\n+ }\n+\n+ // We will use the rest as the path for a replacement\n+ return blobPath.substring(pathStart);\n+ }\n+\n+ protected async _newBlob(\n+ oldBlob: MediaBlob,\n+ newPath?: string\n+ ): Promise {\n+ const oldPath = oldBlob.path;\n+ const metadata = oldBlob?.metadata ?? {};\n+ metadata.langchainOldPath = oldPath;\n+ const newBlob = new MediaBlob({\n+ ...oldBlob,\n+ metadata,\n+ });\n+ if (newPath) {\n+ newBlob.path = newPath;\n+ } else if (newBlob.path) {\n+ delete newBlob.path;\n+ }\n+ return newBlob;\n+ }\n+\n+ protected async _validBlobPrefixPath(\n+ blob: MediaBlob,\n+ opts?: BlobStoreStoreOptions\n+ ): Promise {\n+ const prefix = opts?.pathPrefix ?? \"\";\n+ const suffix = this._blobPathSuffix(blob);\n+ const newPath = `${prefix}${suffix}`;\n+ return this._newBlob(blob, newPath);\n+ }\n+\n+ protected async _validBlobPrefixUuid(\n+ blob: MediaBlob,\n+ opts?: BlobStoreStoreOptions\n+ ): Promise {\n+ const prefix = opts?.pathPrefix ?? \"\";\n+ const suffix = uuidv4(); // TODO - option to specify version?\n+ const newPath = `${prefix}${suffix}`;\n+ return this._newBlob(blob, newPath);\n+ }\n+\n+ protected async _validBlobRemovePath(\n+ blob: MediaBlob,\n+ _opts?: BlobStoreStoreOptions\n+ ): Promise {\n+ return this._newBlob(blob, undefined);\n+ }\n+\n+ /**\n+ * Based on the blob and options, return a blob that has a valid path\n+ * that can be saved.\n+ * @param blob\n+ * @param opts\n+ */\n+ protected async _validStoreBlob(\n+ blob: MediaBlob,\n+ opts?: BlobStoreStoreOptions\n+ ): Promise {\n+ if (await this._hasValidPath(blob, opts)) {\n+ return blob;\n+ }\n+ switch (opts?.actionIfInvalid) {\n+ case \"ignore\":\n+ return blob;\n+ case \"prefixPath\":\n+ return this._validBlobPrefixPath(blob, opts);\n+ case \"prefixUuid\":\n+ return this._validBlobPrefixUuid(blob, opts);\n+ case \"removePath\":\n+ return this._validBlobRemovePath(blob, opts);\n+ default:\n+ return undefined;\n+ }\n+ }\n+\n+ async store(\n+ blob: MediaBlob,\n+ opts: BlobStoreStoreOptions = {}\n+ ): Promise {\n+ const allOpts: BlobStoreStoreOptions = {\n+ ...this.defaultStoreOptions,\n+ ...opts,\n+ };\n+ const validBlob = await this._validStoreBlob(blob, allOpts);\n+ if (typeof validBlob !== \"undefined\") {\n+ const validKey = await validBlob.asUri();\n+ await this.mset([[validKey, validBlob]]);\n+ const savedKey = await validBlob.asUri();\n+ return await this.fetch(savedKey);\n+ }\n+ return undefined;\n+ }\n+\n+ protected async _missingFetchBlobEmpty(\n+ path: string,\n+ _opts?: BlobStoreFetchOptions\n+ ): Promise {\n+ return new MediaBlob({ path });\n+ }\n+\n+ protected async _missingFetchBlob(\n+ path: string,\n+ opts?: BlobStoreFetchOptions\n+ ): Promise {\n+ switch (opts?.actionIfBlobMissing) {\n+ case \"emptyBlob\":\n+ return this._missingFetchBlobEmpty(path, opts);\n+ default:\n+ return undefined;\n+ }\n+ }\n+\n+ async fetch(\n+ key: string | MediaBlob,\n+ opts: BlobStoreFetchOptions = {}\n+ ): Promise {\n+ const allOpts: BlobStoreFetchOptions = {\n+ ...this.defaultFetchOptions,\n+ ...opts,\n+ };\n+ const realKey = await this._realKey(key);\n+ const ret = await this.mget([realKey]);\n+ return ret?.[0] ?? (await this._missingFetchBlob(realKey, allOpts));\n+ }\n+}\n+\n+export interface BackedBlobStoreOptions extends BlobStoreOptions {\n+ backingStore: BaseStore;\n+}\n+\n+export class BackedBlobStore extends BlobStore {", - "comment_created_at": "2024-07-09T00:02:08+00:00", - "comment_author": "afirstenberg", - "comment_body": "Yes.\r\n\r\nI use it in tests for `MediaManager` for an `aliasStore` that is backed by a memory store.\r\nI can also see it used in general for either the `aliasStore` or a retriever that will be using some other defined store to store things. I don't want people to need to re-invent the storage methodology to support the `BlobStore` semantics, but I do want to be able to support them.", - "pr_file_module": null - }, - { - "comment_id": "1699469919", - "repo_full_name": "langchain-ai/langchainjs", - "pr_number": 5835, - "pr_file": "libs/langchain-google-common/src/experimental/utils/media_core.ts", - "discussion_id": "1667177345", - "commented_code": "@@ -0,0 +1,504 @@\n+import { v4 as uuidv4 } from \"uuid\";\n+import { BaseStore } from \"@langchain/core/stores\";\n+import { Serializable } from \"@langchain/core/load/serializable\";\n+\n+export interface MediaBlobParameters {\n+ data?: Blob;\n+\n+ metadata?: Record;\n+\n+ path?: string;\n+}\n+\n+/**\n+ * Represents a chunk of data that can be identified by the path where the\n+ * data is (or will be) located, along with optional metadata about the data.\n+ */\n+export class MediaBlob\n+ extends Serializable // FIXME - I'm not sure this serializes or deserializes correctly\n+ implements MediaBlobParameters\n+{\n+ lc_serializable = true;\n+\n+ lc_namespace = [\"langchain\", \"google-common\"]; // FIXME - What should this be? And why?\n+\n+ data?: Blob;\n+\n+ // eslint-disable-next-line @typescript-eslint/no-explicit-any\n+ metadata?: Record;\n+\n+ path?: string;\n+\n+ constructor(params?: MediaBlobParameters) {\n+ super(params);\n+ this.data = params?.data;\n+ this.metadata = params?.metadata;\n+ this.path = params?.path;\n+ }\n+\n+ get size(): number {\n+ return this.data?.size ?? 0;\n+ }\n+\n+ get dataType(): string {\n+ return this.data?.type ?? \"\";\n+ }\n+\n+ get encoding(): string {\n+ const charsetEquals = this.dataType.indexOf(\"charset=\");\n+ return charsetEquals === -1\n+ ? \"utf-8\"\n+ : this.dataType.substring(charsetEquals + 8);\n+ }\n+\n+ get mimetype(): string {\n+ const semicolon = this.dataType.indexOf(\";\");\n+ return semicolon === -1\n+ ? this.dataType\n+ : this.dataType.substring(0, semicolon);\n+ }\n+\n+ async asString(): Promise {\n+ const data = this.data ?? new Blob([]);\n+ const dataBuffer = await data.arrayBuffer();\n+ const dataArray = new Uint8Array(dataBuffer);\n+\n+ // Need to handle the array in smaller chunks to deal with stack size limits\n+ let ret = \"\";\n+ const chunkSize = 102400;\n+ for (let i = 0; i < dataArray.length; i += chunkSize) {\n+ const chunk = dataArray.subarray(i, i + chunkSize);\n+ ret += String.fromCharCode(...chunk);\n+ }\n+\n+ return ret;\n+ }\n+\n+ async asBase64(): Promise {\n+ return btoa(await this.asString());\n+ }\n+\n+ async asDataUrl(): Promise {\n+ return `data:${this.mimetype};base64,${await this.asBase64()}`;\n+ }\n+\n+ async asUri(): Promise {\n+ return this.path ?? (await this.asDataUrl());\n+ }\n+\n+ async encode(): Promise<{ encoded: string; encoding: string }> {\n+ const dataUrl = await this.asDataUrl();\n+ const comma = dataUrl.indexOf(\",\");\n+ const encoded = dataUrl.substring(comma + 1);\n+ const encoding: string = dataUrl.indexOf(\"base64\") > -1 ? \"base64\" : \"8bit\";\n+ return {\n+ encoded,\n+ encoding,\n+ };\n+ }\n+}\n+\n+export type ActionIfInvalidAction =\n+ | \"ignore\"\n+ | \"prefixPath\"\n+ | \"prefixUuid\"\n+ | \"removePath\";\n+\n+export interface BlobStoreStoreOptions {\n+ /**\n+ * If the path is missing or invalid in the blob, how should we create\n+ * a new path?\n+ * Subclasses may define their own methods, but the following are supported\n+ * by default:\n+ * - Undefined or an emtpy string: Reject the blob\n+ * - \"ignore\": Attempt to store it anyway (but this may fail)\n+ * - \"prefixPath\": Use the default prefix for the BlobStore and get the\n+ * unique portion from the URL. The original path is stored in the metadata\n+ * - \"prefixUuid\": Use the default prefix for the BlobStore and get the\n+ * unique portion from a generated UUID. The original path is stored\n+ * in the metadata\n+ */\n+ actionIfInvalid?: ActionIfInvalidAction;\n+\n+ /**\n+ * The expected prefix for URIs that are stored.\n+ * This may be used to test if a MediaBlob is valid and used to create a new\n+ * path if \"prefixPath\" or \"prefixUuid\" is set for actionIfInvalid.\n+ */\n+ pathPrefix?: string;\n+}\n+\n+export type ActionIfBlobMissingAction = \"emptyBlob\";\n+\n+export interface BlobStoreFetchOptions {\n+ /**\n+ * If the blob is not found when fetching, what should we do?\n+ * Subclasses may define their own methods, but the following are supported\n+ * by default:\n+ * - Undefined or an empty string: return undefined\n+ * - \"emptyBlob\": return a new MediaBlob that has the path set, but nothing else.\n+ */\n+ actionIfBlobMissing?: ActionIfBlobMissingAction;\n+}\n+\n+export interface BlobStoreOptions {\n+ defaultStoreOptions?: BlobStoreStoreOptions;\n+\n+ defaultFetchOptions?: BlobStoreFetchOptions;\n+}\n+\n+/**\n+ * A specialized Store that is designed to handle MediaBlobs and use the\n+ * key that is included in the blob to determine exactly how it is stored.\n+ *\n+ * The full details of a MediaBlob may be changed when it is stored.\n+ * For example, it may get additional or different Metadata. This should be\n+ * what is returned when the store() method is called.\n+ *\n+ * Although BlobStore extends BaseStore, not all of the methods from\n+ * BaseStore may be implemented (or even possible). Those that are not\n+ * implemented should be documented and throw an Error if called.\n+ */\n+export abstract class BlobStore extends BaseStore {\n+ lc_namespace = [\"langchain\", \"google-common\"]; // FIXME - What should this be? And why?\n+\n+ defaultStoreOptions: BlobStoreStoreOptions;\n+\n+ defaultFetchOptions: BlobStoreFetchOptions;\n+\n+ constructor(opts?: BlobStoreOptions) {\n+ super(opts);\n+ this.defaultStoreOptions = opts?.defaultStoreOptions ?? {};\n+ this.defaultFetchOptions = opts?.defaultFetchOptions ?? {};\n+ }\n+\n+ protected async _realKey(key: string | MediaBlob): Promise {\n+ return typeof key === \"string\" ? key : await key.asUri();\n+ }\n+\n+ /**\n+ * Is the path set in the MediaBlob supported by this BlobStore?\n+ * Subclasses must implement and evaluate `blob.path` to make this\n+ * determination.\n+ *\n+ * Although this is async, this is expected to be a relatively fast operation\n+ * (ie - you shouldn't make network calls).\n+ *\n+ * The default implementation assumes that undefined blob.paths are invalid\n+ * and then uses the replacePathPrefix (or an empty string) as an assumed path\n+ * to start with.\n+ *\n+ * @param blob The blob to test\n+ * @param opts Any options (if needed) that may be used to determine if it is valid\n+ * @return If the string represented by blob.path is supported.\n+ */\n+ protected _hasValidPath(\n+ blob: MediaBlob,\n+ opts?: BlobStoreStoreOptions\n+ ): Promise {\n+ const path = blob.path ?? \"\";\n+ const prefix = opts?.pathPrefix ?? \"\";\n+ const isPrefixed =\n+ typeof blob.path !== \"undefined\" && path.startsWith(prefix);\n+ return Promise.resolve(isPrefixed);\n+ }\n+\n+ protected _blobPathSuffix(blob: MediaBlob): string {\n+ // Get the path currently set and make sure we treat it as a string\n+ const blobPath = `${blob.path}`;\n+\n+ // Advance past the first set of /\n+ let pathStart = blobPath.indexOf(\"/\") + 1;\n+ while (blobPath.charAt(pathStart) === \"/\") {\n+ pathStart += 1;\n+ }\n+\n+ // We will use the rest as the path for a replacement\n+ return blobPath.substring(pathStart);\n+ }\n+\n+ protected async _newBlob(\n+ oldBlob: MediaBlob,\n+ newPath?: string\n+ ): Promise {\n+ const oldPath = oldBlob.path;\n+ const metadata = oldBlob?.metadata ?? {};\n+ metadata.langchainOldPath = oldPath;\n+ const newBlob = new MediaBlob({\n+ ...oldBlob,\n+ metadata,\n+ });\n+ if (newPath) {\n+ newBlob.path = newPath;\n+ } else if (newBlob.path) {\n+ delete newBlob.path;\n+ }\n+ return newBlob;\n+ }\n+\n+ protected async _validBlobPrefixPath(\n+ blob: MediaBlob,\n+ opts?: BlobStoreStoreOptions\n+ ): Promise {\n+ const prefix = opts?.pathPrefix ?? \"\";\n+ const suffix = this._blobPathSuffix(blob);\n+ const newPath = `${prefix}${suffix}`;\n+ return this._newBlob(blob, newPath);\n+ }\n+\n+ protected async _validBlobPrefixUuid(\n+ blob: MediaBlob,\n+ opts?: BlobStoreStoreOptions\n+ ): Promise {\n+ const prefix = opts?.pathPrefix ?? \"\";\n+ const suffix = uuidv4(); // TODO - option to specify version?\n+ const newPath = `${prefix}${suffix}`;\n+ return this._newBlob(blob, newPath);\n+ }\n+\n+ protected async _validBlobRemovePath(\n+ blob: MediaBlob,\n+ _opts?: BlobStoreStoreOptions\n+ ): Promise {\n+ return this._newBlob(blob, undefined);\n+ }\n+\n+ /**\n+ * Based on the blob and options, return a blob that has a valid path\n+ * that can be saved.\n+ * @param blob\n+ * @param opts\n+ */\n+ protected async _validStoreBlob(\n+ blob: MediaBlob,\n+ opts?: BlobStoreStoreOptions\n+ ): Promise {\n+ if (await this._hasValidPath(blob, opts)) {\n+ return blob;\n+ }\n+ switch (opts?.actionIfInvalid) {\n+ case \"ignore\":\n+ return blob;\n+ case \"prefixPath\":\n+ return this._validBlobPrefixPath(blob, opts);\n+ case \"prefixUuid\":\n+ return this._validBlobPrefixUuid(blob, opts);\n+ case \"removePath\":\n+ return this._validBlobRemovePath(blob, opts);\n+ default:\n+ return undefined;\n+ }\n+ }\n+\n+ async store(\n+ blob: MediaBlob,\n+ opts: BlobStoreStoreOptions = {}\n+ ): Promise {\n+ const allOpts: BlobStoreStoreOptions = {\n+ ...this.defaultStoreOptions,\n+ ...opts,\n+ };\n+ const validBlob = await this._validStoreBlob(blob, allOpts);\n+ if (typeof validBlob !== \"undefined\") {\n+ const validKey = await validBlob.asUri();\n+ await this.mset([[validKey, validBlob]]);\n+ const savedKey = await validBlob.asUri();\n+ return await this.fetch(savedKey);\n+ }\n+ return undefined;\n+ }\n+\n+ protected async _missingFetchBlobEmpty(\n+ path: string,\n+ _opts?: BlobStoreFetchOptions\n+ ): Promise {\n+ return new MediaBlob({ path });\n+ }\n+\n+ protected async _missingFetchBlob(\n+ path: string,\n+ opts?: BlobStoreFetchOptions\n+ ): Promise {\n+ switch (opts?.actionIfBlobMissing) {\n+ case \"emptyBlob\":\n+ return this._missingFetchBlobEmpty(path, opts);\n+ default:\n+ return undefined;\n+ }\n+ }\n+\n+ async fetch(\n+ key: string | MediaBlob,\n+ opts: BlobStoreFetchOptions = {}\n+ ): Promise {\n+ const allOpts: BlobStoreFetchOptions = {\n+ ...this.defaultFetchOptions,\n+ ...opts,\n+ };\n+ const realKey = await this._realKey(key);\n+ const ret = await this.mget([realKey]);\n+ return ret?.[0] ?? (await this._missingFetchBlob(realKey, allOpts));\n+ }\n+}\n+\n+export interface BackedBlobStoreOptions extends BlobStoreOptions {\n+ backingStore: BaseStore;\n+}\n+\n+export class BackedBlobStore extends BlobStore {", - "comment_created_at": "2024-08-01T05:59:07+00:00", - "comment_author": "jacoblee93", - "comment_body": "Why not just interact with instances of `this.backingStore` directly?\r\n\r\nWould rather just init three different instances of it or just have some kind of `.bind()` semantic if absolutely necessary - there's a lot of inheritance already\r\n\r\nIf just used for tests, put in a `utils/testing` file", - "pr_file_module": null - }, - { - "comment_id": "1701014291", - "repo_full_name": "langchain-ai/langchainjs", - "pr_number": 5835, - "pr_file": "libs/langchain-google-common/src/experimental/utils/media_core.ts", - "discussion_id": "1667177345", - "commented_code": "@@ -0,0 +1,504 @@\n+import { v4 as uuidv4 } from \"uuid\";\n+import { BaseStore } from \"@langchain/core/stores\";\n+import { Serializable } from \"@langchain/core/load/serializable\";\n+\n+export interface MediaBlobParameters {\n+ data?: Blob;\n+\n+ metadata?: Record;\n+\n+ path?: string;\n+}\n+\n+/**\n+ * Represents a chunk of data that can be identified by the path where the\n+ * data is (or will be) located, along with optional metadata about the data.\n+ */\n+export class MediaBlob\n+ extends Serializable // FIXME - I'm not sure this serializes or deserializes correctly\n+ implements MediaBlobParameters\n+{\n+ lc_serializable = true;\n+\n+ lc_namespace = [\"langchain\", \"google-common\"]; // FIXME - What should this be? And why?\n+\n+ data?: Blob;\n+\n+ // eslint-disable-next-line @typescript-eslint/no-explicit-any\n+ metadata?: Record;\n+\n+ path?: string;\n+\n+ constructor(params?: MediaBlobParameters) {\n+ super(params);\n+ this.data = params?.data;\n+ this.metadata = params?.metadata;\n+ this.path = params?.path;\n+ }\n+\n+ get size(): number {\n+ return this.data?.size ?? 0;\n+ }\n+\n+ get dataType(): string {\n+ return this.data?.type ?? \"\";\n+ }\n+\n+ get encoding(): string {\n+ const charsetEquals = this.dataType.indexOf(\"charset=\");\n+ return charsetEquals === -1\n+ ? \"utf-8\"\n+ : this.dataType.substring(charsetEquals + 8);\n+ }\n+\n+ get mimetype(): string {\n+ const semicolon = this.dataType.indexOf(\";\");\n+ return semicolon === -1\n+ ? this.dataType\n+ : this.dataType.substring(0, semicolon);\n+ }\n+\n+ async asString(): Promise {\n+ const data = this.data ?? new Blob([]);\n+ const dataBuffer = await data.arrayBuffer();\n+ const dataArray = new Uint8Array(dataBuffer);\n+\n+ // Need to handle the array in smaller chunks to deal with stack size limits\n+ let ret = \"\";\n+ const chunkSize = 102400;\n+ for (let i = 0; i < dataArray.length; i += chunkSize) {\n+ const chunk = dataArray.subarray(i, i + chunkSize);\n+ ret += String.fromCharCode(...chunk);\n+ }\n+\n+ return ret;\n+ }\n+\n+ async asBase64(): Promise {\n+ return btoa(await this.asString());\n+ }\n+\n+ async asDataUrl(): Promise {\n+ return `data:${this.mimetype};base64,${await this.asBase64()}`;\n+ }\n+\n+ async asUri(): Promise {\n+ return this.path ?? (await this.asDataUrl());\n+ }\n+\n+ async encode(): Promise<{ encoded: string; encoding: string }> {\n+ const dataUrl = await this.asDataUrl();\n+ const comma = dataUrl.indexOf(\",\");\n+ const encoded = dataUrl.substring(comma + 1);\n+ const encoding: string = dataUrl.indexOf(\"base64\") > -1 ? \"base64\" : \"8bit\";\n+ return {\n+ encoded,\n+ encoding,\n+ };\n+ }\n+}\n+\n+export type ActionIfInvalidAction =\n+ | \"ignore\"\n+ | \"prefixPath\"\n+ | \"prefixUuid\"\n+ | \"removePath\";\n+\n+export interface BlobStoreStoreOptions {\n+ /**\n+ * If the path is missing or invalid in the blob, how should we create\n+ * a new path?\n+ * Subclasses may define their own methods, but the following are supported\n+ * by default:\n+ * - Undefined or an emtpy string: Reject the blob\n+ * - \"ignore\": Attempt to store it anyway (but this may fail)\n+ * - \"prefixPath\": Use the default prefix for the BlobStore and get the\n+ * unique portion from the URL. The original path is stored in the metadata\n+ * - \"prefixUuid\": Use the default prefix for the BlobStore and get the\n+ * unique portion from a generated UUID. The original path is stored\n+ * in the metadata\n+ */\n+ actionIfInvalid?: ActionIfInvalidAction;\n+\n+ /**\n+ * The expected prefix for URIs that are stored.\n+ * This may be used to test if a MediaBlob is valid and used to create a new\n+ * path if \"prefixPath\" or \"prefixUuid\" is set for actionIfInvalid.\n+ */\n+ pathPrefix?: string;\n+}\n+\n+export type ActionIfBlobMissingAction = \"emptyBlob\";\n+\n+export interface BlobStoreFetchOptions {\n+ /**\n+ * If the blob is not found when fetching, what should we do?\n+ * Subclasses may define their own methods, but the following are supported\n+ * by default:\n+ * - Undefined or an empty string: return undefined\n+ * - \"emptyBlob\": return a new MediaBlob that has the path set, but nothing else.\n+ */\n+ actionIfBlobMissing?: ActionIfBlobMissingAction;\n+}\n+\n+export interface BlobStoreOptions {\n+ defaultStoreOptions?: BlobStoreStoreOptions;\n+\n+ defaultFetchOptions?: BlobStoreFetchOptions;\n+}\n+\n+/**\n+ * A specialized Store that is designed to handle MediaBlobs and use the\n+ * key that is included in the blob to determine exactly how it is stored.\n+ *\n+ * The full details of a MediaBlob may be changed when it is stored.\n+ * For example, it may get additional or different Metadata. This should be\n+ * what is returned when the store() method is called.\n+ *\n+ * Although BlobStore extends BaseStore, not all of the methods from\n+ * BaseStore may be implemented (or even possible). Those that are not\n+ * implemented should be documented and throw an Error if called.\n+ */\n+export abstract class BlobStore extends BaseStore {\n+ lc_namespace = [\"langchain\", \"google-common\"]; // FIXME - What should this be? And why?\n+\n+ defaultStoreOptions: BlobStoreStoreOptions;\n+\n+ defaultFetchOptions: BlobStoreFetchOptions;\n+\n+ constructor(opts?: BlobStoreOptions) {\n+ super(opts);\n+ this.defaultStoreOptions = opts?.defaultStoreOptions ?? {};\n+ this.defaultFetchOptions = opts?.defaultFetchOptions ?? {};\n+ }\n+\n+ protected async _realKey(key: string | MediaBlob): Promise {\n+ return typeof key === \"string\" ? key : await key.asUri();\n+ }\n+\n+ /**\n+ * Is the path set in the MediaBlob supported by this BlobStore?\n+ * Subclasses must implement and evaluate `blob.path` to make this\n+ * determination.\n+ *\n+ * Although this is async, this is expected to be a relatively fast operation\n+ * (ie - you shouldn't make network calls).\n+ *\n+ * The default implementation assumes that undefined blob.paths are invalid\n+ * and then uses the replacePathPrefix (or an empty string) as an assumed path\n+ * to start with.\n+ *\n+ * @param blob The blob to test\n+ * @param opts Any options (if needed) that may be used to determine if it is valid\n+ * @return If the string represented by blob.path is supported.\n+ */\n+ protected _hasValidPath(\n+ blob: MediaBlob,\n+ opts?: BlobStoreStoreOptions\n+ ): Promise {\n+ const path = blob.path ?? \"\";\n+ const prefix = opts?.pathPrefix ?? \"\";\n+ const isPrefixed =\n+ typeof blob.path !== \"undefined\" && path.startsWith(prefix);\n+ return Promise.resolve(isPrefixed);\n+ }\n+\n+ protected _blobPathSuffix(blob: MediaBlob): string {\n+ // Get the path currently set and make sure we treat it as a string\n+ const blobPath = `${blob.path}`;\n+\n+ // Advance past the first set of /\n+ let pathStart = blobPath.indexOf(\"/\") + 1;\n+ while (blobPath.charAt(pathStart) === \"/\") {\n+ pathStart += 1;\n+ }\n+\n+ // We will use the rest as the path for a replacement\n+ return blobPath.substring(pathStart);\n+ }\n+\n+ protected async _newBlob(\n+ oldBlob: MediaBlob,\n+ newPath?: string\n+ ): Promise {\n+ const oldPath = oldBlob.path;\n+ const metadata = oldBlob?.metadata ?? {};\n+ metadata.langchainOldPath = oldPath;\n+ const newBlob = new MediaBlob({\n+ ...oldBlob,\n+ metadata,\n+ });\n+ if (newPath) {\n+ newBlob.path = newPath;\n+ } else if (newBlob.path) {\n+ delete newBlob.path;\n+ }\n+ return newBlob;\n+ }\n+\n+ protected async _validBlobPrefixPath(\n+ blob: MediaBlob,\n+ opts?: BlobStoreStoreOptions\n+ ): Promise {\n+ const prefix = opts?.pathPrefix ?? \"\";\n+ const suffix = this._blobPathSuffix(blob);\n+ const newPath = `${prefix}${suffix}`;\n+ return this._newBlob(blob, newPath);\n+ }\n+\n+ protected async _validBlobPrefixUuid(\n+ blob: MediaBlob,\n+ opts?: BlobStoreStoreOptions\n+ ): Promise {\n+ const prefix = opts?.pathPrefix ?? \"\";\n+ const suffix = uuidv4(); // TODO - option to specify version?\n+ const newPath = `${prefix}${suffix}`;\n+ return this._newBlob(blob, newPath);\n+ }\n+\n+ protected async _validBlobRemovePath(\n+ blob: MediaBlob,\n+ _opts?: BlobStoreStoreOptions\n+ ): Promise {\n+ return this._newBlob(blob, undefined);\n+ }\n+\n+ /**\n+ * Based on the blob and options, return a blob that has a valid path\n+ * that can be saved.\n+ * @param blob\n+ * @param opts\n+ */\n+ protected async _validStoreBlob(\n+ blob: MediaBlob,\n+ opts?: BlobStoreStoreOptions\n+ ): Promise {\n+ if (await this._hasValidPath(blob, opts)) {\n+ return blob;\n+ }\n+ switch (opts?.actionIfInvalid) {\n+ case \"ignore\":\n+ return blob;\n+ case \"prefixPath\":\n+ return this._validBlobPrefixPath(blob, opts);\n+ case \"prefixUuid\":\n+ return this._validBlobPrefixUuid(blob, opts);\n+ case \"removePath\":\n+ return this._validBlobRemovePath(blob, opts);\n+ default:\n+ return undefined;\n+ }\n+ }\n+\n+ async store(\n+ blob: MediaBlob,\n+ opts: BlobStoreStoreOptions = {}\n+ ): Promise {\n+ const allOpts: BlobStoreStoreOptions = {\n+ ...this.defaultStoreOptions,\n+ ...opts,\n+ };\n+ const validBlob = await this._validStoreBlob(blob, allOpts);\n+ if (typeof validBlob !== \"undefined\") {\n+ const validKey = await validBlob.asUri();\n+ await this.mset([[validKey, validBlob]]);\n+ const savedKey = await validBlob.asUri();\n+ return await this.fetch(savedKey);\n+ }\n+ return undefined;\n+ }\n+\n+ protected async _missingFetchBlobEmpty(\n+ path: string,\n+ _opts?: BlobStoreFetchOptions\n+ ): Promise {\n+ return new MediaBlob({ path });\n+ }\n+\n+ protected async _missingFetchBlob(\n+ path: string,\n+ opts?: BlobStoreFetchOptions\n+ ): Promise {\n+ switch (opts?.actionIfBlobMissing) {\n+ case \"emptyBlob\":\n+ return this._missingFetchBlobEmpty(path, opts);\n+ default:\n+ return undefined;\n+ }\n+ }\n+\n+ async fetch(\n+ key: string | MediaBlob,\n+ opts: BlobStoreFetchOptions = {}\n+ ): Promise {\n+ const allOpts: BlobStoreFetchOptions = {\n+ ...this.defaultFetchOptions,\n+ ...opts,\n+ };\n+ const realKey = await this._realKey(key);\n+ const ret = await this.mget([realKey]);\n+ return ret?.[0] ?? (await this._missingFetchBlob(realKey, allOpts));\n+ }\n+}\n+\n+export interface BackedBlobStoreOptions extends BlobStoreOptions {\n+ backingStore: BaseStore;\n+}\n+\n+export class BackedBlobStore extends BlobStore {", - "comment_created_at": "2024-08-01T23:41:41+00:00", - "comment_author": "afirstenberg", - "comment_body": "Because the `BlobStore` versions have different semantics for storing an object (they return values from a set while the `BaseStore` versions don't). So rather than reimplementing all of the `BaseStore` implementations (now and in the future), it guarantees those semantics for other `BaseStore` types. A memory store being the biggest example, but anything that may save to a database or whatever also makes sense.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1695528343", - "pr_number": 6250, - "pr_file": "libs/langchain-aws/src/common.ts", - "created_at": "2024-07-29T16:32:08+00:00", - "commented_code": ".map((msg) => {\n if (msg._getType() === \"ai\") {\n const castMsg = msg as AIMessage;\n if (typeof castMsg.content === \"string\" && castMsg.content !== \"\") {\n if (castMsg.tool_calls && castMsg.tool_calls.length) {", - "repo_full_name": "langchain-ai/langchainjs", - "discussion_comments": [ - { - "comment_id": "1695528343", - "repo_full_name": "langchain-ai/langchainjs", - "pr_number": 6250, - "pr_file": "libs/langchain-aws/src/common.ts", - "discussion_id": "1695528343", - "commented_code": "@@ -82,7 +82,26 @@ export function convertToConverseMessages(messages: BaseMessage[]): {\n .map((msg) => {\n if (msg._getType() === \"ai\") {\n const castMsg = msg as AIMessage;\n- if (typeof castMsg.content === \"string\" && castMsg.content !== \"\") {\n+ if (castMsg.tool_calls && castMsg.tool_calls.length) {", - "comment_created_at": "2024-07-29T16:32:08+00:00", - "comment_author": "bracesproul", - "comment_body": "Instead of mapping over it, calling `.reduce` would likely be a better method, so we can keep all content fields. Could you push up this refactor? If not I can push up in a little", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/langchainjs-comprehensive-ai-documentation.json b/_reviewers/langchainjs-comprehensive-ai-documentation.json new file mode 100644 index 0000000..0d2cb21 --- /dev/null +++ b/_reviewers/langchainjs-comprehensive-ai-documentation.json @@ -0,0 +1,102 @@ +[ + { + "discussion_id": "1818682202", + "pr_number": 7033, + "pr_file": "docs/core_docs/docs/integrations/platforms/microsoft.mdx", + "created_at": "2024-10-28T09:23:11+00:00", + "commented_code": "import { AzureCosmosDBMongoDBVectorStore } from \"@langchain/azure-cosmosdb\";\n```\n\n## Semantic Cache\n\n### Azure Cosmos DB NoSQL Semantic Cache\n\n> The Semantic Cache feature is supported with Azure Cosmos DB for NoSQL integration, enabling users to retrieve cached responses based on semantic similarity between the user input and previously cached results. It leverages AzureCosmosDBNoSQLVectorStore, which stores vector embeddings of cached prompts. These embeddings enable similarity-based searches, allowing the system to retrieve relevant cached results.", + "repo_full_name": "langchain-ai/langchainjs", + "discussion_comments": [ + { + "comment_id": "1818682202", + "repo_full_name": "langchain-ai/langchainjs", + "pr_number": 7033, + "pr_file": "docs/core_docs/docs/integrations/platforms/microsoft.mdx", + "discussion_id": "1818682202", + "commented_code": "@@ -132,6 +132,24 @@ See a [usage example](/docs/integrations/vectorstores/azure_cosmosdb_mongodb).\n import { AzureCosmosDBMongoDBVectorStore } from \"@langchain/azure-cosmosdb\";\n ```\n \n+## Semantic Cache\n+\n+### Azure Cosmos DB NoSQL Semantic Cache\n+\n+> The Semantic Cache feature is supported with Azure Cosmos DB for NoSQL integration, enabling users to retrieve cached responses based on semantic similarity between the user input and previously cached results. It leverages AzureCosmosDBNoSQLVectorStore, which stores vector embeddings of cached prompts. These embeddings enable similarity-based searches, allowing the system to retrieve relevant cached results.", + "comment_created_at": "2024-10-28T09:23:11+00:00", + "comment_author": "sinedied", + "comment_body": "nit: a link to `AzureCosmosDBNoSQLVectorStore` doc would be nice here", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1705945143", + "pr_number": 6414, + "pr_file": "docs/core_docs/docs/integrations/vectorstores/memory.ipynb", + "created_at": "2024-08-06T18:21:09+00:00", + "commented_code": "{\n \"cells\": [\n {\n \"cell_type\": \"raw\",\n \"id\": \"1957f5cb\",\n \"metadata\": {\n \"vscode\": {\n \"languageId\": \"raw\"\n }\n },\n \"source\": [\n \"---\\n\",\n \"sidebar_label: In Memory\\n\",\n \"---\"\n ]\n },\n {\n \"cell_type\": \"markdown\",\n \"id\": \"ef1f0986\",\n \"metadata\": {},\n \"source\": [\n \"# MemoryVectorStore\\n\",\n \"\\n\",\n \"MemoryVectorStore is an in-memory, ephemeral vectorstore that stores embeddings in-memory and does an exact, linear search for the most similar embeddings. The default similarity metric is cosine similarity, but can be changed to any of the similarity metrics supported by [ml-distance](https://mljs.github.io/distance/modules/similarity.html).\\n\"\n ]\n },\n {\n \"cell_type\": \"markdown\",\n \"id\": \"36fdc060\",\n \"metadata\": {},\n \"source\": [\n \"## Setup\\n\",\n \"\\n\",\n \"To access the in memory vector store you'll need to install the `langchain` integration package.\\n\",\n \"\\n\",\n \"```{=mdx}\\n\",\n \"import IntegrationInstallTooltip from \\\"@mdx_components/integration_install_tooltip.mdx\\\";\\n\",\n \"import Npm2Yarn from \\\"@theme/Npm2Yarn\\\";\\n\",\n \"\\n\",\n \"\\n\",\n \"\\n\",\n \"\\n\",\n \" langchain\\n\",\n \"\\n\",\n \"```\\n\",\n \"\\n\",\n \"### Credentials\\n\",\n \"\\n\",\n \"If you want to get automated tracing of your model calls you can also set your [LangSmith](https://docs.smith.langchain.com/) API key by uncommenting below:\\n\",\n \"\\n\",\n \"```typescript\\n\",\n \"// process.env.LANGCHAIN_TRACING_V2=\\\"true\\\"\\n\",\n \"// process.env.LANGCHAIN_API_KEY=\\\"your-api-key\\\"\\n\",\n \"```\"\n ]\n },\n {\n \"cell_type\": \"markdown\",\n \"id\": \"93df377e\",\n \"metadata\": {},\n \"source\": [\n \"## Instantiation\\n\",\n \"\\n\",\n \"In this example we'll use OpenAIEmbeddings, however you can use any embeddings you like.\"\n ]\n },\n {\n \"cell_type\": \"code\",\n \"execution_count\": 2,\n \"id\": \"dc37144c-208d-4ab3-9f3a-0407a69fe052\",\n \"metadata\": {\n \"tags\": []\n },\n \"outputs\": [],\n \"source\": [\n \"import { MemoryVectorStore } from 'langchain/vectorstores/memory'\\n\",\n \"import { OpenAIEmbeddings } from '@langchain/openai'\\n\",\n \"\\n\",\n \"const vectorStore = new MemoryVectorStore(new OpenAIEmbeddings())\"", + "repo_full_name": "langchain-ai/langchainjs", + "discussion_comments": [ + { + "comment_id": "1705945143", + "repo_full_name": "langchain-ai/langchainjs", + "pr_number": 6414, + "pr_file": "docs/core_docs/docs/integrations/vectorstores/memory.ipynb", + "discussion_id": "1705945143", + "commented_code": "@@ -0,0 +1,382 @@\n+{\n+ \"cells\": [\n+ {\n+ \"cell_type\": \"raw\",\n+ \"id\": \"1957f5cb\",\n+ \"metadata\": {\n+ \"vscode\": {\n+ \"languageId\": \"raw\"\n+ }\n+ },\n+ \"source\": [\n+ \"---\\n\",\n+ \"sidebar_label: In Memory\\n\",\n+ \"---\"\n+ ]\n+ },\n+ {\n+ \"cell_type\": \"markdown\",\n+ \"id\": \"ef1f0986\",\n+ \"metadata\": {},\n+ \"source\": [\n+ \"# MemoryVectorStore\\n\",\n+ \"\\n\",\n+ \"MemoryVectorStore is an in-memory, ephemeral vectorstore that stores embeddings in-memory and does an exact, linear search for the most similar embeddings. The default similarity metric is cosine similarity, but can be changed to any of the similarity metrics supported by [ml-distance](https://mljs.github.io/distance/modules/similarity.html).\\n\"\n+ ]\n+ },\n+ {\n+ \"cell_type\": \"markdown\",\n+ \"id\": \"36fdc060\",\n+ \"metadata\": {},\n+ \"source\": [\n+ \"## Setup\\n\",\n+ \"\\n\",\n+ \"To access the in memory vector store you'll need to install the `langchain` integration package.\\n\",\n+ \"\\n\",\n+ \"```{=mdx}\\n\",\n+ \"import IntegrationInstallTooltip from \\\"@mdx_components/integration_install_tooltip.mdx\\\";\\n\",\n+ \"import Npm2Yarn from \\\"@theme/Npm2Yarn\\\";\\n\",\n+ \"\\n\",\n+ \"\\n\",\n+ \"\\n\",\n+ \"\\n\",\n+ \" langchain\\n\",\n+ \"\\n\",\n+ \"```\\n\",\n+ \"\\n\",\n+ \"### Credentials\\n\",\n+ \"\\n\",\n+ \"If you want to get automated tracing of your model calls you can also set your [LangSmith](https://docs.smith.langchain.com/) API key by uncommenting below:\\n\",\n+ \"\\n\",\n+ \"```typescript\\n\",\n+ \"// process.env.LANGCHAIN_TRACING_V2=\\\"true\\\"\\n\",\n+ \"// process.env.LANGCHAIN_API_KEY=\\\"your-api-key\\\"\\n\",\n+ \"```\"\n+ ]\n+ },\n+ {\n+ \"cell_type\": \"markdown\",\n+ \"id\": \"93df377e\",\n+ \"metadata\": {},\n+ \"source\": [\n+ \"## Instantiation\\n\",\n+ \"\\n\",\n+ \"In this example we'll use OpenAIEmbeddings, however you can use any embeddings you like.\"\n+ ]\n+ },\n+ {\n+ \"cell_type\": \"code\",\n+ \"execution_count\": 2,\n+ \"id\": \"dc37144c-208d-4ab3-9f3a-0407a69fe052\",\n+ \"metadata\": {\n+ \"tags\": []\n+ },\n+ \"outputs\": [],\n+ \"source\": [\n+ \"import { MemoryVectorStore } from 'langchain/vectorstores/memory'\\n\",\n+ \"import { OpenAIEmbeddings } from '@langchain/openai'\\n\",\n+ \"\\n\",\n+ \"const vectorStore = new MemoryVectorStore(new OpenAIEmbeddings())\"", + "comment_created_at": "2024-08-06T18:21:09+00:00", + "comment_author": "jacoblee93", + "comment_body": "I would show how to initialize `.fromDocuments` as well as from an existing store", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1901243077", + "pr_number": 7344, + "pr_file": "docs/core_docs/docs/integrations/retrievers/azion-edgesql.mdx", + "created_at": "2025-01-02T20:39:24+00:00", + "commented_code": "### Azion Edge SQL Retriever", + "repo_full_name": "langchain-ai/langchainjs", + "discussion_comments": [ + { + "comment_id": "1901243077", + "repo_full_name": "langchain-ai/langchainjs", + "pr_number": 7344, + "pr_file": "docs/core_docs/docs/integrations/retrievers/azion-edgesql.mdx", + "discussion_id": "1901243077", + "commented_code": "@@ -0,0 +1,53 @@\n+### Azion Edge SQL Retriever", + "comment_created_at": "2025-01-02T20:39:24+00:00", + "comment_author": "jacoblee93", + "comment_body": "Can we have this use the standard retriever template?\r\n\r\nhttps://github.com/langchain-ai/langchainjs/blob/main/libs/langchain-scripts/src/cli/docs/templates/retrievers.ipynb", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1908152475", + "pr_number": 7450, + "pr_file": "docs/core_docs/docs/integrations/text_embedding/bytedance_doubao.mdx", + "created_at": "2025-01-09T04:32:02+00:00", + "commented_code": null, + "repo_full_name": "langchain-ai/langchainjs", + "discussion_comments": [ + { + "comment_id": "1908152475", + "repo_full_name": "langchain-ai/langchainjs", + "pr_number": 7450, + "pr_file": "docs/core_docs/docs/integrations/text_embedding/bytedance_doubao.mdx", + "discussion_id": "1908152475", + "commented_code": null, + "comment_created_at": "2025-01-09T04:32:02+00:00", + "comment_author": "jacoblee93", + "comment_body": "Can you use the embeddings docs template?\r\n\r\nhttps://github.com/langchain-ai/langchainjs/blob/main/.github/contributing/INTEGRATIONS.md#documentation-and-integration-tests\r\nhttps://github.com/langchain-ai/langchainjs/blob/main/libs/langchain-scripts/src/cli/docs/templates/text_embedding.ipynb", + "pr_file_module": null + }, + { + "comment_id": "1909012708", + "repo_full_name": "langchain-ai/langchainjs", + "pr_number": 7450, + "pr_file": "docs/core_docs/docs/integrations/text_embedding/bytedance_doubao.mdx", + "discussion_id": "1908152475", + "commented_code": null, + "comment_created_at": "2025-01-09T15:25:11+00:00", + "comment_author": "ucev", + "comment_body": "done. an exquisite design. i know how it works generally now.👍", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/langchainjs-comprehensive-ai-documentation.md b/_reviewers/langchainjs-comprehensive-ai-documentation.md index bcb8c29..6fba2f8 100644 --- a/_reviewers/langchainjs-comprehensive-ai-documentation.md +++ b/_reviewers/langchainjs-comprehensive-ai-documentation.md @@ -34,107 +34,3 @@ const results = await vectorStore.similaritySearch("query", 5); ``` This practice ensures developers can quickly understand and implement AI integrations without needing to search through multiple documentation pages or source code. - - -[ - { - "discussion_id": "1818682202", - "pr_number": 7033, - "pr_file": "docs/core_docs/docs/integrations/platforms/microsoft.mdx", - "created_at": "2024-10-28T09:23:11+00:00", - "commented_code": "import { AzureCosmosDBMongoDBVectorStore } from \"@langchain/azure-cosmosdb\";\n```\n\n## Semantic Cache\n\n### Azure Cosmos DB NoSQL Semantic Cache\n\n> The Semantic Cache feature is supported with Azure Cosmos DB for NoSQL integration, enabling users to retrieve cached responses based on semantic similarity between the user input and previously cached results. It leverages AzureCosmosDBNoSQLVectorStore, which stores vector embeddings of cached prompts. These embeddings enable similarity-based searches, allowing the system to retrieve relevant cached results.", - "repo_full_name": "langchain-ai/langchainjs", - "discussion_comments": [ - { - "comment_id": "1818682202", - "repo_full_name": "langchain-ai/langchainjs", - "pr_number": 7033, - "pr_file": "docs/core_docs/docs/integrations/platforms/microsoft.mdx", - "discussion_id": "1818682202", - "commented_code": "@@ -132,6 +132,24 @@ See a [usage example](/docs/integrations/vectorstores/azure_cosmosdb_mongodb).\n import { AzureCosmosDBMongoDBVectorStore } from \"@langchain/azure-cosmosdb\";\n ```\n \n+## Semantic Cache\n+\n+### Azure Cosmos DB NoSQL Semantic Cache\n+\n+> The Semantic Cache feature is supported with Azure Cosmos DB for NoSQL integration, enabling users to retrieve cached responses based on semantic similarity between the user input and previously cached results. It leverages AzureCosmosDBNoSQLVectorStore, which stores vector embeddings of cached prompts. These embeddings enable similarity-based searches, allowing the system to retrieve relevant cached results.", - "comment_created_at": "2024-10-28T09:23:11+00:00", - "comment_author": "sinedied", - "comment_body": "nit: a link to `AzureCosmosDBNoSQLVectorStore` doc would be nice here", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1705945143", - "pr_number": 6414, - "pr_file": "docs/core_docs/docs/integrations/vectorstores/memory.ipynb", - "created_at": "2024-08-06T18:21:09+00:00", - "commented_code": "{\n \"cells\": [\n {\n \"cell_type\": \"raw\",\n \"id\": \"1957f5cb\",\n \"metadata\": {\n \"vscode\": {\n \"languageId\": \"raw\"\n }\n },\n \"source\": [\n \"---\\n\",\n \"sidebar_label: In Memory\\n\",\n \"---\"\n ]\n },\n {\n \"cell_type\": \"markdown\",\n \"id\": \"ef1f0986\",\n \"metadata\": {},\n \"source\": [\n \"# MemoryVectorStore\\n\",\n \"\\n\",\n \"MemoryVectorStore is an in-memory, ephemeral vectorstore that stores embeddings in-memory and does an exact, linear search for the most similar embeddings. The default similarity metric is cosine similarity, but can be changed to any of the similarity metrics supported by [ml-distance](https://mljs.github.io/distance/modules/similarity.html).\\n\"\n ]\n },\n {\n \"cell_type\": \"markdown\",\n \"id\": \"36fdc060\",\n \"metadata\": {},\n \"source\": [\n \"## Setup\\n\",\n \"\\n\",\n \"To access the in memory vector store you'll need to install the `langchain` integration package.\\n\",\n \"\\n\",\n \"```{=mdx}\\n\",\n \"import IntegrationInstallTooltip from \\\"@mdx_components/integration_install_tooltip.mdx\\\";\\n\",\n \"import Npm2Yarn from \\\"@theme/Npm2Yarn\\\";\\n\",\n \"\\n\",\n \"\\n\",\n \"\\n\",\n \"\\n\",\n \" langchain\\n\",\n \"\\n\",\n \"```\\n\",\n \"\\n\",\n \"### Credentials\\n\",\n \"\\n\",\n \"If you want to get automated tracing of your model calls you can also set your [LangSmith](https://docs.smith.langchain.com/) API key by uncommenting below:\\n\",\n \"\\n\",\n \"```typescript\\n\",\n \"// process.env.LANGCHAIN_TRACING_V2=\\\"true\\\"\\n\",\n \"// process.env.LANGCHAIN_API_KEY=\\\"your-api-key\\\"\\n\",\n \"```\"\n ]\n },\n {\n \"cell_type\": \"markdown\",\n \"id\": \"93df377e\",\n \"metadata\": {},\n \"source\": [\n \"## Instantiation\\n\",\n \"\\n\",\n \"In this example we'll use OpenAIEmbeddings, however you can use any embeddings you like.\"\n ]\n },\n {\n \"cell_type\": \"code\",\n \"execution_count\": 2,\n \"id\": \"dc37144c-208d-4ab3-9f3a-0407a69fe052\",\n \"metadata\": {\n \"tags\": []\n },\n \"outputs\": [],\n \"source\": [\n \"import { MemoryVectorStore } from 'langchain/vectorstores/memory'\\n\",\n \"import { OpenAIEmbeddings } from '@langchain/openai'\\n\",\n \"\\n\",\n \"const vectorStore = new MemoryVectorStore(new OpenAIEmbeddings())\"", - "repo_full_name": "langchain-ai/langchainjs", - "discussion_comments": [ - { - "comment_id": "1705945143", - "repo_full_name": "langchain-ai/langchainjs", - "pr_number": 6414, - "pr_file": "docs/core_docs/docs/integrations/vectorstores/memory.ipynb", - "discussion_id": "1705945143", - "commented_code": "@@ -0,0 +1,382 @@\n+{\n+ \"cells\": [\n+ {\n+ \"cell_type\": \"raw\",\n+ \"id\": \"1957f5cb\",\n+ \"metadata\": {\n+ \"vscode\": {\n+ \"languageId\": \"raw\"\n+ }\n+ },\n+ \"source\": [\n+ \"---\\n\",\n+ \"sidebar_label: In Memory\\n\",\n+ \"---\"\n+ ]\n+ },\n+ {\n+ \"cell_type\": \"markdown\",\n+ \"id\": \"ef1f0986\",\n+ \"metadata\": {},\n+ \"source\": [\n+ \"# MemoryVectorStore\\n\",\n+ \"\\n\",\n+ \"MemoryVectorStore is an in-memory, ephemeral vectorstore that stores embeddings in-memory and does an exact, linear search for the most similar embeddings. The default similarity metric is cosine similarity, but can be changed to any of the similarity metrics supported by [ml-distance](https://mljs.github.io/distance/modules/similarity.html).\\n\"\n+ ]\n+ },\n+ {\n+ \"cell_type\": \"markdown\",\n+ \"id\": \"36fdc060\",\n+ \"metadata\": {},\n+ \"source\": [\n+ \"## Setup\\n\",\n+ \"\\n\",\n+ \"To access the in memory vector store you'll need to install the `langchain` integration package.\\n\",\n+ \"\\n\",\n+ \"```{=mdx}\\n\",\n+ \"import IntegrationInstallTooltip from \\\"@mdx_components/integration_install_tooltip.mdx\\\";\\n\",\n+ \"import Npm2Yarn from \\\"@theme/Npm2Yarn\\\";\\n\",\n+ \"\\n\",\n+ \"\\n\",\n+ \"\\n\",\n+ \"\\n\",\n+ \" langchain\\n\",\n+ \"\\n\",\n+ \"```\\n\",\n+ \"\\n\",\n+ \"### Credentials\\n\",\n+ \"\\n\",\n+ \"If you want to get automated tracing of your model calls you can also set your [LangSmith](https://docs.smith.langchain.com/) API key by uncommenting below:\\n\",\n+ \"\\n\",\n+ \"```typescript\\n\",\n+ \"// process.env.LANGCHAIN_TRACING_V2=\\\"true\\\"\\n\",\n+ \"// process.env.LANGCHAIN_API_KEY=\\\"your-api-key\\\"\\n\",\n+ \"```\"\n+ ]\n+ },\n+ {\n+ \"cell_type\": \"markdown\",\n+ \"id\": \"93df377e\",\n+ \"metadata\": {},\n+ \"source\": [\n+ \"## Instantiation\\n\",\n+ \"\\n\",\n+ \"In this example we'll use OpenAIEmbeddings, however you can use any embeddings you like.\"\n+ ]\n+ },\n+ {\n+ \"cell_type\": \"code\",\n+ \"execution_count\": 2,\n+ \"id\": \"dc37144c-208d-4ab3-9f3a-0407a69fe052\",\n+ \"metadata\": {\n+ \"tags\": []\n+ },\n+ \"outputs\": [],\n+ \"source\": [\n+ \"import { MemoryVectorStore } from 'langchain/vectorstores/memory'\\n\",\n+ \"import { OpenAIEmbeddings } from '@langchain/openai'\\n\",\n+ \"\\n\",\n+ \"const vectorStore = new MemoryVectorStore(new OpenAIEmbeddings())\"", - "comment_created_at": "2024-08-06T18:21:09+00:00", - "comment_author": "jacoblee93", - "comment_body": "I would show how to initialize `.fromDocuments` as well as from an existing store", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1901243077", - "pr_number": 7344, - "pr_file": "docs/core_docs/docs/integrations/retrievers/azion-edgesql.mdx", - "created_at": "2025-01-02T20:39:24+00:00", - "commented_code": "### Azion Edge SQL Retriever", - "repo_full_name": "langchain-ai/langchainjs", - "discussion_comments": [ - { - "comment_id": "1901243077", - "repo_full_name": "langchain-ai/langchainjs", - "pr_number": 7344, - "pr_file": "docs/core_docs/docs/integrations/retrievers/azion-edgesql.mdx", - "discussion_id": "1901243077", - "commented_code": "@@ -0,0 +1,53 @@\n+### Azion Edge SQL Retriever", - "comment_created_at": "2025-01-02T20:39:24+00:00", - "comment_author": "jacoblee93", - "comment_body": "Can we have this use the standard retriever template?\r\n\r\nhttps://github.com/langchain-ai/langchainjs/blob/main/libs/langchain-scripts/src/cli/docs/templates/retrievers.ipynb", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1908152475", - "pr_number": 7450, - "pr_file": "docs/core_docs/docs/integrations/text_embedding/bytedance_doubao.mdx", - "created_at": "2025-01-09T04:32:02+00:00", - "commented_code": null, - "repo_full_name": "langchain-ai/langchainjs", - "discussion_comments": [ - { - "comment_id": "1908152475", - "repo_full_name": "langchain-ai/langchainjs", - "pr_number": 7450, - "pr_file": "docs/core_docs/docs/integrations/text_embedding/bytedance_doubao.mdx", - "discussion_id": "1908152475", - "commented_code": null, - "comment_created_at": "2025-01-09T04:32:02+00:00", - "comment_author": "jacoblee93", - "comment_body": "Can you use the embeddings docs template?\r\n\r\nhttps://github.com/langchain-ai/langchainjs/blob/main/.github/contributing/INTEGRATIONS.md#documentation-and-integration-tests\r\nhttps://github.com/langchain-ai/langchainjs/blob/main/libs/langchain-scripts/src/cli/docs/templates/text_embedding.ipynb", - "pr_file_module": null - }, - { - "comment_id": "1909012708", - "repo_full_name": "langchain-ai/langchainjs", - "pr_number": 7450, - "pr_file": "docs/core_docs/docs/integrations/text_embedding/bytedance_doubao.mdx", - "discussion_id": "1908152475", - "commented_code": null, - "comment_created_at": "2025-01-09T15:25:11+00:00", - "comment_author": "ucev", - "comment_body": "done. an exquisite design. i know how it works generally now.\ud83d\udc4d", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/langchainjs-consistent-ai-naming.json b/_reviewers/langchainjs-consistent-ai-naming.json new file mode 100644 index 0000000..8a0e2ad --- /dev/null +++ b/_reviewers/langchainjs-consistent-ai-naming.json @@ -0,0 +1,68 @@ +[ + { + "discussion_id": "2063484090", + "pr_number": 5624, + "pr_file": "docs/core_docs/src/theme/ChatModelTabs.js", + "created_at": "2025-04-28T11:39:51+00:00", + "commented_code": "mistralParams: `{\\n model: \"mistral-large-latest\",\\n temperature: 0\\n}`,\n groqParams: `{\\n model: \"llama-3.3-70b-versatile\",\\n temperature: 0\\n}`,\n vertexParams: `{\\n model: \"gemini-1.5-flash\",\\n temperature: 0\\n}`,\n azureParams: `{\\n azure_endpoint=os.environ[\"AZURE_OPENAI_ENDPOINT\"],\\n azure_deployment=os.environ[\"AZURE_OPENAI_DEPLOYMENT_NAME\"],\\n openai_api_version=os.environ[\"AZURE_OPENAI_API_VERSION\"]\\n}`,", + "repo_full_name": "langchain-ai/langchainjs", + "discussion_comments": [ + { + "comment_id": "2063484090", + "repo_full_name": "langchain-ai/langchainjs", + "pr_number": 5624, + "pr_file": "docs/core_docs/src/theme/ChatModelTabs.js", + "discussion_id": "2063484090", + "commented_code": "@@ -31,6 +31,7 @@ const DEFAULTS = {\n mistralParams: `{\\n model: \"mistral-large-latest\",\\n temperature: 0\\n}`,\n groqParams: `{\\n model: \"llama-3.3-70b-versatile\",\\n temperature: 0\\n}`,\n vertexParams: `{\\n model: \"gemini-1.5-flash\",\\n temperature: 0\\n}`,\n+ azureParams: `{\\n azure_endpoint=os.environ[\"AZURE_OPENAI_ENDPOINT\"],\\n azure_deployment=os.environ[\"AZURE_OPENAI_DEPLOYMENT_NAME\"],\\n openai_api_version=os.environ[\"AZURE_OPENAI_API_VERSION\"]\\n}`,", + "comment_created_at": "2025-04-28T11:39:51+00:00", + "comment_author": "benjamincburns", + "comment_body": "```suggestion\r\n vertexParams: `{\\n model: \"gemini-1.5-flash\",\\n temperature: 0\\n}`,\r\n azureParams: `{\\n azure_endpoint=os.environ[\"AZURE_OPENAI_ENDPOINT\"],\\n azure_deployment=os.environ[\"AZURE_OPENAI_DEPLOYMENT_NAME\"],\\n openai_api_version=os.environ[\"AZURE_OPENAI_API_VERSION\"]\\n}`,\r\n```", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2063490565", + "pr_number": 5624, + "pr_file": "docs/core_docs/src/theme/ChatModelTabs.js", + "created_at": "2025-04-28T11:44:33+00:00", + "commented_code": "envs: `ANTHROPIC_API_KEY=your-api-key`,\n dependencies: \"@langchain/anthropic\",\n },\n azure: {\n value: \"azure\",\n label: \"Azure\",\n default: false,\n text: `import { AzureChatOpenAI } from \"@langchain/openai\";\\n\\nconst ${llmVarName} = new AzureChatOpenAI(${azureParams});`,", + "repo_full_name": "langchain-ai/langchainjs", + "discussion_comments": [ + { + "comment_id": "2063490565", + "repo_full_name": "langchain-ai/langchainjs", + "pr_number": 5624, + "pr_file": "docs/core_docs/src/theme/ChatModelTabs.js", + "discussion_id": "2063490565", + "commented_code": "@@ -95,6 +99,14 @@ export default function ChatModelTabs(props) {\n envs: `ANTHROPIC_API_KEY=your-api-key`,\n dependencies: \"@langchain/anthropic\",\n },\n+ azure: {\n+ value: \"azure\",\n+ label: \"Azure\",\n+ default: false,\n+ text: `import { AzureChatOpenAI } from \"@langchain/openai\";\\n\\nconst ${llmVarName} = new AzureChatOpenAI(${azureParams});`,", + "comment_created_at": "2025-04-28T11:44:33+00:00", + "comment_author": "benjamincburns", + "comment_body": "```suggestion\r\n text: `import { AzureChatOpenAI } from \"@langchain/openai\";\\n\\nconst ${llmVarName} = new AzureChatOpenAI(${azureOpenAIParams});`,\r\n```", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2063494283", + "pr_number": 5624, + "pr_file": "docs/core_docs/src/theme/ChatModelTabs.js", + "created_at": "2025-04-28T11:46:54+00:00", + "commented_code": "*\n * @property {boolean} [hideOpenai] - Whether or not to hide OpenAI chat model.\n * @property {boolean} [hideAnthropic] - Whether or not to hide Anthropic chat model.\n * @property {boolean} [hideAzure] - Whether or not to hide Microsoft Azure OpenAI chat model.", + "repo_full_name": "langchain-ai/langchainjs", + "discussion_comments": [ + { + "comment_id": "2063494283", + "repo_full_name": "langchain-ai/langchainjs", + "pr_number": 5624, + "pr_file": "docs/core_docs/src/theme/ChatModelTabs.js", + "discussion_id": "2063494283", + "commented_code": "@@ -46,6 +47,7 @@ const MODELS_WSO = [\"openai\", \"anthropic\", \"mistral\", \"groq\", \"vertex\"];\n *\n * @property {boolean} [hideOpenai] - Whether or not to hide OpenAI chat model.\n * @property {boolean} [hideAnthropic] - Whether or not to hide Anthropic chat model.\n+ * @property {boolean} [hideAzure] - Whether or not to hide Microsoft Azure OpenAI chat model.", + "comment_created_at": "2025-04-28T11:46:54+00:00", + "comment_author": "benjamincburns", + "comment_body": "```suggestion\r\n * @property {boolean} [hideAzureOpenAI] - Whether or not to hide Microsoft Azure OpenAI chat model.\r\n```", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/langchainjs-consistent-ai-naming.md b/_reviewers/langchainjs-consistent-ai-naming.md index 30351c4..3ba615b 100644 --- a/_reviewers/langchainjs-consistent-ai-naming.md +++ b/_reviewers/langchainjs-consistent-ai-naming.md @@ -38,73 +38,3 @@ const azureOpenAIParams = `{ ``` This practice improves code readability, reduces confusion when working with multiple AI services, and makes maintenance easier, especially as you integrate additional AI providers in the future. - - -[ - { - "discussion_id": "2063484090", - "pr_number": 5624, - "pr_file": "docs/core_docs/src/theme/ChatModelTabs.js", - "created_at": "2025-04-28T11:39:51+00:00", - "commented_code": "mistralParams: `{\\n model: \"mistral-large-latest\",\\n temperature: 0\\n}`,\n groqParams: `{\\n model: \"llama-3.3-70b-versatile\",\\n temperature: 0\\n}`,\n vertexParams: `{\\n model: \"gemini-1.5-flash\",\\n temperature: 0\\n}`,\n azureParams: `{\\n azure_endpoint=os.environ[\"AZURE_OPENAI_ENDPOINT\"],\\n azure_deployment=os.environ[\"AZURE_OPENAI_DEPLOYMENT_NAME\"],\\n openai_api_version=os.environ[\"AZURE_OPENAI_API_VERSION\"]\\n}`,", - "repo_full_name": "langchain-ai/langchainjs", - "discussion_comments": [ - { - "comment_id": "2063484090", - "repo_full_name": "langchain-ai/langchainjs", - "pr_number": 5624, - "pr_file": "docs/core_docs/src/theme/ChatModelTabs.js", - "discussion_id": "2063484090", - "commented_code": "@@ -31,6 +31,7 @@ const DEFAULTS = {\n mistralParams: `{\\n model: \"mistral-large-latest\",\\n temperature: 0\\n}`,\n groqParams: `{\\n model: \"llama-3.3-70b-versatile\",\\n temperature: 0\\n}`,\n vertexParams: `{\\n model: \"gemini-1.5-flash\",\\n temperature: 0\\n}`,\n+ azureParams: `{\\n azure_endpoint=os.environ[\"AZURE_OPENAI_ENDPOINT\"],\\n azure_deployment=os.environ[\"AZURE_OPENAI_DEPLOYMENT_NAME\"],\\n openai_api_version=os.environ[\"AZURE_OPENAI_API_VERSION\"]\\n}`,", - "comment_created_at": "2025-04-28T11:39:51+00:00", - "comment_author": "benjamincburns", - "comment_body": "```suggestion\r\n vertexParams: `{\\n model: \"gemini-1.5-flash\",\\n temperature: 0\\n}`,\r\n azureParams: `{\\n azure_endpoint=os.environ[\"AZURE_OPENAI_ENDPOINT\"],\\n azure_deployment=os.environ[\"AZURE_OPENAI_DEPLOYMENT_NAME\"],\\n openai_api_version=os.environ[\"AZURE_OPENAI_API_VERSION\"]\\n}`,\r\n```", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2063490565", - "pr_number": 5624, - "pr_file": "docs/core_docs/src/theme/ChatModelTabs.js", - "created_at": "2025-04-28T11:44:33+00:00", - "commented_code": "envs: `ANTHROPIC_API_KEY=your-api-key`,\n dependencies: \"@langchain/anthropic\",\n },\n azure: {\n value: \"azure\",\n label: \"Azure\",\n default: false,\n text: `import { AzureChatOpenAI } from \"@langchain/openai\";\\n\\nconst ${llmVarName} = new AzureChatOpenAI(${azureParams});`,", - "repo_full_name": "langchain-ai/langchainjs", - "discussion_comments": [ - { - "comment_id": "2063490565", - "repo_full_name": "langchain-ai/langchainjs", - "pr_number": 5624, - "pr_file": "docs/core_docs/src/theme/ChatModelTabs.js", - "discussion_id": "2063490565", - "commented_code": "@@ -95,6 +99,14 @@ export default function ChatModelTabs(props) {\n envs: `ANTHROPIC_API_KEY=your-api-key`,\n dependencies: \"@langchain/anthropic\",\n },\n+ azure: {\n+ value: \"azure\",\n+ label: \"Azure\",\n+ default: false,\n+ text: `import { AzureChatOpenAI } from \"@langchain/openai\";\\n\\nconst ${llmVarName} = new AzureChatOpenAI(${azureParams});`,", - "comment_created_at": "2025-04-28T11:44:33+00:00", - "comment_author": "benjamincburns", - "comment_body": "```suggestion\r\n text: `import { AzureChatOpenAI } from \"@langchain/openai\";\\n\\nconst ${llmVarName} = new AzureChatOpenAI(${azureOpenAIParams});`,\r\n```", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2063494283", - "pr_number": 5624, - "pr_file": "docs/core_docs/src/theme/ChatModelTabs.js", - "created_at": "2025-04-28T11:46:54+00:00", - "commented_code": "*\n * @property {boolean} [hideOpenai] - Whether or not to hide OpenAI chat model.\n * @property {boolean} [hideAnthropic] - Whether or not to hide Anthropic chat model.\n * @property {boolean} [hideAzure] - Whether or not to hide Microsoft Azure OpenAI chat model.", - "repo_full_name": "langchain-ai/langchainjs", - "discussion_comments": [ - { - "comment_id": "2063494283", - "repo_full_name": "langchain-ai/langchainjs", - "pr_number": 5624, - "pr_file": "docs/core_docs/src/theme/ChatModelTabs.js", - "discussion_id": "2063494283", - "commented_code": "@@ -46,6 +47,7 @@ const MODELS_WSO = [\"openai\", \"anthropic\", \"mistral\", \"groq\", \"vertex\"];\n *\n * @property {boolean} [hideOpenai] - Whether or not to hide OpenAI chat model.\n * @property {boolean} [hideAnthropic] - Whether or not to hide Anthropic chat model.\n+ * @property {boolean} [hideAzure] - Whether or not to hide Microsoft Azure OpenAI chat model.", - "comment_created_at": "2025-04-28T11:46:54+00:00", - "comment_author": "benjamincburns", - "comment_body": "```suggestion\r\n * @property {boolean} [hideAzureOpenAI] - Whether or not to hide Microsoft Azure OpenAI chat model.\r\n```", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/langchainjs-consistent-naming-conventions.json b/_reviewers/langchainjs-consistent-naming-conventions.json new file mode 100644 index 0000000..fc5b654 --- /dev/null +++ b/_reviewers/langchainjs-consistent-naming-conventions.json @@ -0,0 +1,92 @@ +[ + { + "discussion_id": "1896299341", + "pr_number": 7301, + "pr_file": "docs/core_docs/docs/integrations/document_loaders/web_loaders/dropbox.mdx", + "created_at": "2024-12-24T02:26:33+00:00", + "commented_code": "---\nhide_table_of_contents: true\nsidebar_class_name: node-only\n---\n\n# Dropbox Loader\n\nThe `DropboxLoader` allows you to load documents from Dropbox into your LangChain applications. It retrieves files or directories from your Dropbox account and converts them into documents ready for processing.\n\n## Overview\n\nDropbox is a file hosting service that brings all your files—traditional documents, cloud content, and web shortcuts—together in one place. With the `DropboxLoader`, you can seamlessly integrate Dropbox file retrieval into your projects.\n\n## Setup\n\n1. Create a dropbox app, using the [Dropbox App Console](https://www.dropbox.com/developers/apps/create).\n2. Ensure the app has the `files.metadata.read`, `files.content.read` scope permissions:\n3. Generate the access token from the Dropbox App Console.\n4. To use this loader, you'll need to have Unstructured already set up and ready to use at an available URL endpoint. It can also be configured to run locally.\n See the docs [here](https://www.dropbox.com/developers/apps/create) for information on how to do that.\n5. Install the necessary packages:\n\n ```bash npm2yarn\n npm install @langchain/community @langchain/core dropbox\n ```\n\n## Usage\n\n### Loading Specific Files\n\nTo load specific files from Dropbox, specify the file paths:\n\n```typescript\nimport { DropboxLoader } from \"@langchain/community/document_loaders/web/dropbox\";\n\nconst loader = new DropboxLoader({\n clientOptions: {\n accessToken: \"your-dropbox-access-token\",\n },\n unstructuredOptions: {\n apiUrl: \"http://localhost:8000/general/v0/general\", // Replace with your Unstructured API URL", + "repo_full_name": "langchain-ai/langchainjs", + "discussion_comments": [ + { + "comment_id": "1896299341", + "repo_full_name": "langchain-ai/langchainjs", + "pr_number": 7301, + "pr_file": "docs/core_docs/docs/integrations/document_loaders/web_loaders/dropbox.mdx", + "discussion_id": "1896299341", + "commented_code": "@@ -0,0 +1,138 @@\n+---\n+hide_table_of_contents: true\n+sidebar_class_name: node-only\n+---\n+\n+# Dropbox Loader\n+\n+The `DropboxLoader` allows you to load documents from Dropbox into your LangChain applications. It retrieves files or directories from your Dropbox account and converts them into documents ready for processing.\n+\n+## Overview\n+\n+Dropbox is a file hosting service that brings all your files—traditional documents, cloud content, and web shortcuts—together in one place. With the `DropboxLoader`, you can seamlessly integrate Dropbox file retrieval into your projects.\n+\n+## Setup\n+\n+1. Create a dropbox app, using the [Dropbox App Console](https://www.dropbox.com/developers/apps/create).\n+2. Ensure the app has the `files.metadata.read`, `files.content.read` scope permissions:\n+3. Generate the access token from the Dropbox App Console.\n+4. To use this loader, you'll need to have Unstructured already set up and ready to use at an available URL endpoint. It can also be configured to run locally.\n+ See the docs [here](https://www.dropbox.com/developers/apps/create) for information on how to do that.\n+5. Install the necessary packages:\n+\n+ ```bash npm2yarn\n+ npm install @langchain/community @langchain/core dropbox\n+ ```\n+\n+## Usage\n+\n+### Loading Specific Files\n+\n+To load specific files from Dropbox, specify the file paths:\n+\n+```typescript\n+import { DropboxLoader } from \"@langchain/community/document_loaders/web/dropbox\";\n+\n+const loader = new DropboxLoader({\n+ clientOptions: {\n+ accessToken: \"your-dropbox-access-token\",\n+ },\n+ unstructuredOptions: {\n+ apiUrl: \"http://localhost:8000/general/v0/general\", // Replace with your Unstructured API URL", + "comment_created_at": "2024-12-24T02:26:33+00:00", + "comment_author": "jacoblee93", + "comment_body": "Let's emphasize somewhere that this wraps Unstructured\r\n\r\nShould we call this `DropboxUnstructuredLoader` instead?", + "pr_file_module": null + }, + { + "comment_id": "1900509302", + "repo_full_name": "langchain-ai/langchainjs", + "pr_number": 7301, + "pr_file": "docs/core_docs/docs/integrations/document_loaders/web_loaders/dropbox.mdx", + "discussion_id": "1896299341", + "commented_code": "@@ -0,0 +1,138 @@\n+---\n+hide_table_of_contents: true\n+sidebar_class_name: node-only\n+---\n+\n+# Dropbox Loader\n+\n+The `DropboxLoader` allows you to load documents from Dropbox into your LangChain applications. It retrieves files or directories from your Dropbox account and converts them into documents ready for processing.\n+\n+## Overview\n+\n+Dropbox is a file hosting service that brings all your files—traditional documents, cloud content, and web shortcuts—together in one place. With the `DropboxLoader`, you can seamlessly integrate Dropbox file retrieval into your projects.\n+\n+## Setup\n+\n+1. Create a dropbox app, using the [Dropbox App Console](https://www.dropbox.com/developers/apps/create).\n+2. Ensure the app has the `files.metadata.read`, `files.content.read` scope permissions:\n+3. Generate the access token from the Dropbox App Console.\n+4. To use this loader, you'll need to have Unstructured already set up and ready to use at an available URL endpoint. It can also be configured to run locally.\n+ See the docs [here](https://www.dropbox.com/developers/apps/create) for information on how to do that.\n+5. Install the necessary packages:\n+\n+ ```bash npm2yarn\n+ npm install @langchain/community @langchain/core dropbox\n+ ```\n+\n+## Usage\n+\n+### Loading Specific Files\n+\n+To load specific files from Dropbox, specify the file paths:\n+\n+```typescript\n+import { DropboxLoader } from \"@langchain/community/document_loaders/web/dropbox\";\n+\n+const loader = new DropboxLoader({\n+ clientOptions: {\n+ accessToken: \"your-dropbox-access-token\",\n+ },\n+ unstructuredOptions: {\n+ apiUrl: \"http://localhost:8000/general/v0/general\", // Replace with your Unstructured API URL", + "comment_created_at": "2025-01-02T02:53:15+00:00", + "comment_author": "Ser0n-ath", + "comment_body": "Yes, we can rename the loader class to `DropboxUnstructuredLoader`\r\nI want to confirm if I need to rename the file to say `dropbox_unstructured.ts` as well?\r\n\r\nAlso, I noticed that a few preexisting loaders utilize unstructured as well. Would they need to be renamed as well in the future?:\r\n- [s3](https://github.com/langchain-ai/langchainjs/blob/main/libs/langchain-community/src/document_loaders/web/s3.ts)\r\n- [azure_blob_storage](https://github.com/langchain-ai/langchainjs/blob/main/libs/langchain-community/src/document_loaders/web/azure_blob_storage_file.ts)\r\n\r\n\r\n\r\n\r\n\r\n", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1690477167", + "pr_number": 6197, + "pr_file": "docs/core_docs/docs/integrations/chat/litellm_proxy.mdx", + "created_at": "2024-07-24T21:29:52+00:00", + "commented_code": "---\nsidebar_label: LiteLLM Proxy\n---\n\nimport CodeBlock from \"@theme/CodeBlock\";\n\n# LiteLLM Proxy Server\n\nUse [LiteLLM Proxy](https://docs.litellm.ai/docs/simple_proxy) for:\n\n- Calling 100+ LLMs OpenAI, Azure, Vertex, Bedrock/etc using OpenAI format\n- Use Virtual Keys to Set Budgets & Track Usage\n\nThis allows you to call 100+ LLMs using `ChatOpenAI`\n\n```js\nimport { ChatOpenAI } from \"@langchain/openai\";\n\nconst model = new ChatOpenAI({\n modelName: \"claude-3-5-sonnet-20240620\", // swap this for \"gpt-4o-mini\", \"gemini-1.5-pro\", \"claude-3-5-sonnet-20240620\"\n openAIApiKey: \"sk-1234\",\n}, {\n basePath: \"http://0.0.0.0:4000\", // set basePath to LiteLLM Proxy server\n});\n\nconst message = await model.invoke(\"Hi there!\");\n\nconsole.log(message);\n```\n\n\n## Step 1 Setup LiteLLM Proxy config.yaml\n\nLiteLLM Requires a config with all your models defined - we can call this file `litellm_config.yaml`\n\n[Detailed docs on how to setup litellm config - here](https://docs.litellm.ai/docs/proxy/configs)\n\n```yaml\nmodel_list:\n # Azure OpenAI\n # https://docs.litellm.ai/docs/providers/azure\n - model_name: gpt-azure\n litellm_params:\n model: azure/gpt-turbo-small-ca\n api_base: https://my-endpoint-canada-berri992.openai.azure.com/\n api_key: \"os.environ/AZURE_API_KEY\"\n\n # OpenAI API\n # https://docs.litellm.ai/docs/providers/openai\n - model_name: gpt-4o-mini\n litellm_params:\n model: openai/gpt-4o-mini\n api_key: \"os.environ/OPENAI_API_KEY\"\n\n\n # Vertex AI models\n # https://docs.litellm.ai/docs/providers/vertex\n - model_name: gemini-pro\n litellm_params:\n model: vertex_ai_beta/gemini-1.5-pro\n vertex_project: \"project-id\"\n vertex_location: \"us-central1\"\n vertex_credentials: \"/path/to/service_account.json\" # [OPTIONAL] Do this OR `!gcloud auth application-default login` - run this to add vertex credentials to your env\n \n # Bedrock Models\n # https://docs.litellm.ai/docs/providers/bedrock\n - model_name: anthropic-claude\n litellm_params: \n model: bedrock/anthropic.claude-instant-v1\n aws_access_key_id: os.environ/CUSTOM_AWS_ACCESS_KEY_ID\n aws_secret_access_key: os.environ/CUSTOM_AWS_SECRET_ACCESS_KEY\n aws_region_name: os.environ/CUSTOM_AWS_REGION_NAME\n \n```\n\n\n## Step 2. Start litellm proxy\n\n```shell\ndocker run \\\n -v $(pwd)/litellm_config.yaml:/app/config.yaml \\\n -p 4000:4000 \\\n -e AZURE_API_KEY \\\n -e OPENAI_API_KEY \\\n -e CUSTOM_AWS_ACCESS_KEY_ID \\\n -e CUSTOM_AWS_SECRET_ACCESS_KEY \\\n -e CUSTOM_AWS_REGION_NAME \\\n ghcr.io/berriai/litellm:main-latest \\\n --config /app/config.yaml --detailed_debug\n```\n\nProxy will start running on `http://0.0.0.0:4000/` You can use this endpoint to make all LLM Requests\n\n## Step 3. Make Requests using ChatOpenAI\n\nWe use Langchain JS to make requests to LiteLLM Proxy\n\n```bash npm2yarn\nnpm install @langchain/openai\n```\n\nYou can call all models defined on the `litellm_config.yaml`, all you need to do is change the `modelName`\n\n## Call `\"gpt-azure\"`\n```js\nimport { ChatOpenAI } from \"@langchain/openai\";\n\n\nconst model = new ChatOpenAI({\n modelName: \"gpt-azure\",", + "repo_full_name": "langchain-ai/langchainjs", + "discussion_comments": [ + { + "comment_id": "1690477167", + "repo_full_name": "langchain-ai/langchainjs", + "pr_number": 6197, + "pr_file": "docs/core_docs/docs/integrations/chat/litellm_proxy.mdx", + "discussion_id": "1690477167", + "commented_code": "@@ -0,0 +1,157 @@\n+---\n+sidebar_label: LiteLLM Proxy\n+---\n+\n+import CodeBlock from \"@theme/CodeBlock\";\n+\n+# LiteLLM Proxy Server\n+\n+Use [LiteLLM Proxy](https://docs.litellm.ai/docs/simple_proxy) for:\n+\n+- Calling 100+ LLMs OpenAI, Azure, Vertex, Bedrock/etc using OpenAI format\n+- Use Virtual Keys to Set Budgets & Track Usage\n+\n+This allows you to call 100+ LLMs using `ChatOpenAI`\n+\n+```js\n+import { ChatOpenAI } from \"@langchain/openai\";\n+\n+const model = new ChatOpenAI({\n+ modelName: \"claude-3-5-sonnet-20240620\", // swap this for \"gpt-4o-mini\", \"gemini-1.5-pro\", \"claude-3-5-sonnet-20240620\"\n+ openAIApiKey: \"sk-1234\",\n+}, {\n+ basePath: \"http://0.0.0.0:4000\", // set basePath to LiteLLM Proxy server\n+});\n+\n+const message = await model.invoke(\"Hi there!\");\n+\n+console.log(message);\n+```\n+\n+\n+## Step 1 Setup LiteLLM Proxy config.yaml\n+\n+LiteLLM Requires a config with all your models defined - we can call this file `litellm_config.yaml`\n+\n+[Detailed docs on how to setup litellm config - here](https://docs.litellm.ai/docs/proxy/configs)\n+\n+```yaml\n+model_list:\n+ # Azure OpenAI\n+ # https://docs.litellm.ai/docs/providers/azure\n+ - model_name: gpt-azure\n+ litellm_params:\n+ model: azure/gpt-turbo-small-ca\n+ api_base: https://my-endpoint-canada-berri992.openai.azure.com/\n+ api_key: \"os.environ/AZURE_API_KEY\"\n+\n+ # OpenAI API\n+ # https://docs.litellm.ai/docs/providers/openai\n+ - model_name: gpt-4o-mini\n+ litellm_params:\n+ model: openai/gpt-4o-mini\n+ api_key: \"os.environ/OPENAI_API_KEY\"\n+\n+\n+ # Vertex AI models\n+ # https://docs.litellm.ai/docs/providers/vertex\n+ - model_name: gemini-pro\n+ litellm_params:\n+ model: vertex_ai_beta/gemini-1.5-pro\n+ vertex_project: \"project-id\"\n+ vertex_location: \"us-central1\"\n+ vertex_credentials: \"/path/to/service_account.json\" # [OPTIONAL] Do this OR `!gcloud auth application-default login` - run this to add vertex credentials to your env\n+ \n+ # Bedrock Models\n+ # https://docs.litellm.ai/docs/providers/bedrock\n+ - model_name: anthropic-claude\n+ litellm_params: \n+ model: bedrock/anthropic.claude-instant-v1\n+ aws_access_key_id: os.environ/CUSTOM_AWS_ACCESS_KEY_ID\n+ aws_secret_access_key: os.environ/CUSTOM_AWS_SECRET_ACCESS_KEY\n+ aws_region_name: os.environ/CUSTOM_AWS_REGION_NAME\n+ \n+```\n+\n+\n+## Step 2. Start litellm proxy\n+\n+```shell\n+docker run \\\n+ -v $(pwd)/litellm_config.yaml:/app/config.yaml \\\n+ -p 4000:4000 \\\n+ -e AZURE_API_KEY \\\n+ -e OPENAI_API_KEY \\\n+ -e CUSTOM_AWS_ACCESS_KEY_ID \\\n+ -e CUSTOM_AWS_SECRET_ACCESS_KEY \\\n+ -e CUSTOM_AWS_REGION_NAME \\\n+ ghcr.io/berriai/litellm:main-latest \\\n+ --config /app/config.yaml --detailed_debug\n+```\n+\n+Proxy will start running on `http://0.0.0.0:4000/` You can use this endpoint to make all LLM Requests\n+\n+## Step 3. Make Requests using ChatOpenAI\n+\n+We use Langchain JS to make requests to LiteLLM Proxy\n+\n+```bash npm2yarn\n+npm install @langchain/openai\n+```\n+\n+You can call all models defined on the `litellm_config.yaml`, all you need to do is change the `modelName`\n+\n+## Call `\"gpt-azure\"`\n+```js\n+import { ChatOpenAI } from \"@langchain/openai\";\n+\n+\n+const model = new ChatOpenAI({\n+ modelName: \"gpt-azure\",", + "comment_created_at": "2024-07-24T21:29:52+00:00", + "comment_author": "bracesproul", + "comment_body": "`modelName` is deprecated, prefer `model`\r\n```suggestion\r\nYou can call all models defined on the `litellm_config.yaml`, all you need to do is change the `model` param.\r\n\r\n## Call `\"gpt-azure\"`\r\n```js\r\nimport { ChatOpenAI } from \"@langchain/openai\";\r\n\r\n\r\nconst model = new ChatOpenAI({\r\n model: \"gpt-azure\",\r\n```", + "pr_file_module": null + }, + { + "comment_id": "1690477573", + "repo_full_name": "langchain-ai/langchainjs", + "pr_number": 6197, + "pr_file": "docs/core_docs/docs/integrations/chat/litellm_proxy.mdx", + "discussion_id": "1690477167", + "commented_code": "@@ -0,0 +1,157 @@\n+---\n+sidebar_label: LiteLLM Proxy\n+---\n+\n+import CodeBlock from \"@theme/CodeBlock\";\n+\n+# LiteLLM Proxy Server\n+\n+Use [LiteLLM Proxy](https://docs.litellm.ai/docs/simple_proxy) for:\n+\n+- Calling 100+ LLMs OpenAI, Azure, Vertex, Bedrock/etc using OpenAI format\n+- Use Virtual Keys to Set Budgets & Track Usage\n+\n+This allows you to call 100+ LLMs using `ChatOpenAI`\n+\n+```js\n+import { ChatOpenAI } from \"@langchain/openai\";\n+\n+const model = new ChatOpenAI({\n+ modelName: \"claude-3-5-sonnet-20240620\", // swap this for \"gpt-4o-mini\", \"gemini-1.5-pro\", \"claude-3-5-sonnet-20240620\"\n+ openAIApiKey: \"sk-1234\",\n+}, {\n+ basePath: \"http://0.0.0.0:4000\", // set basePath to LiteLLM Proxy server\n+});\n+\n+const message = await model.invoke(\"Hi there!\");\n+\n+console.log(message);\n+```\n+\n+\n+## Step 1 Setup LiteLLM Proxy config.yaml\n+\n+LiteLLM Requires a config with all your models defined - we can call this file `litellm_config.yaml`\n+\n+[Detailed docs on how to setup litellm config - here](https://docs.litellm.ai/docs/proxy/configs)\n+\n+```yaml\n+model_list:\n+ # Azure OpenAI\n+ # https://docs.litellm.ai/docs/providers/azure\n+ - model_name: gpt-azure\n+ litellm_params:\n+ model: azure/gpt-turbo-small-ca\n+ api_base: https://my-endpoint-canada-berri992.openai.azure.com/\n+ api_key: \"os.environ/AZURE_API_KEY\"\n+\n+ # OpenAI API\n+ # https://docs.litellm.ai/docs/providers/openai\n+ - model_name: gpt-4o-mini\n+ litellm_params:\n+ model: openai/gpt-4o-mini\n+ api_key: \"os.environ/OPENAI_API_KEY\"\n+\n+\n+ # Vertex AI models\n+ # https://docs.litellm.ai/docs/providers/vertex\n+ - model_name: gemini-pro\n+ litellm_params:\n+ model: vertex_ai_beta/gemini-1.5-pro\n+ vertex_project: \"project-id\"\n+ vertex_location: \"us-central1\"\n+ vertex_credentials: \"/path/to/service_account.json\" # [OPTIONAL] Do this OR `!gcloud auth application-default login` - run this to add vertex credentials to your env\n+ \n+ # Bedrock Models\n+ # https://docs.litellm.ai/docs/providers/bedrock\n+ - model_name: anthropic-claude\n+ litellm_params: \n+ model: bedrock/anthropic.claude-instant-v1\n+ aws_access_key_id: os.environ/CUSTOM_AWS_ACCESS_KEY_ID\n+ aws_secret_access_key: os.environ/CUSTOM_AWS_SECRET_ACCESS_KEY\n+ aws_region_name: os.environ/CUSTOM_AWS_REGION_NAME\n+ \n+```\n+\n+\n+## Step 2. Start litellm proxy\n+\n+```shell\n+docker run \\\n+ -v $(pwd)/litellm_config.yaml:/app/config.yaml \\\n+ -p 4000:4000 \\\n+ -e AZURE_API_KEY \\\n+ -e OPENAI_API_KEY \\\n+ -e CUSTOM_AWS_ACCESS_KEY_ID \\\n+ -e CUSTOM_AWS_SECRET_ACCESS_KEY \\\n+ -e CUSTOM_AWS_REGION_NAME \\\n+ ghcr.io/berriai/litellm:main-latest \\\n+ --config /app/config.yaml --detailed_debug\n+```\n+\n+Proxy will start running on `http://0.0.0.0:4000/` You can use this endpoint to make all LLM Requests\n+\n+## Step 3. Make Requests using ChatOpenAI\n+\n+We use Langchain JS to make requests to LiteLLM Proxy\n+\n+```bash npm2yarn\n+npm install @langchain/openai\n+```\n+\n+You can call all models defined on the `litellm_config.yaml`, all you need to do is change the `modelName`\n+\n+## Call `\"gpt-azure\"`\n+```js\n+import { ChatOpenAI } from \"@langchain/openai\";\n+\n+\n+const model = new ChatOpenAI({\n+ modelName: \"gpt-azure\",", + "comment_created_at": "2024-07-24T21:30:12+00:00", + "comment_author": "bracesproul", + "comment_body": "Please update the rest of the `modelName` instances too.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1414383618", + "pr_number": 3474, + "pr_file": "docs/core_docs/docs/integrations/document_loaders/web_loaders/google_drive.mdx", + "created_at": "2023-12-04T19:24:37+00:00", + "commented_code": "# Google document loader\n\n## Getting Credentials \n\nhttps://developers.google.com/workspace/guides/get-started#5_steps_to_get_started\nFollow the 5 steps in the above guide to get the credentials to authenticate your app's end users or service accounts.\n\nThen, after getting the right credentials from google, add the file paths to the .env variables GOOGLE_DRIVE_CREDENTIALSPATH and GOOGLE_DRIVE_TOKENPATH", + "repo_full_name": "langchain-ai/langchainjs", + "discussion_comments": [ + { + "comment_id": "1414383618", + "repo_full_name": "langchain-ai/langchainjs", + "pr_number": 3474, + "pr_file": "docs/core_docs/docs/integrations/document_loaders/web_loaders/google_drive.mdx", + "discussion_id": "1414383618", + "commented_code": "@@ -0,0 +1,79 @@\n+# Google document loader\n+\n+## Getting Credentials \n+\n+https://developers.google.com/workspace/guides/get-started#5_steps_to_get_started\n+Follow the 5 steps in the above guide to get the credentials to authenticate your app's end users or service accounts.\n+\n+Then, after getting the right credentials from google, add the file paths to the .env variables GOOGLE_DRIVE_CREDENTIALSPATH and GOOGLE_DRIVE_TOKENPATH", + "comment_created_at": "2023-12-04T19:24:37+00:00", + "comment_author": "jacoblee93", + "comment_body": "Separate words with underscore:\r\n`GOOGLE_DRIVE_CREDENTIALSPATH` -> `GOOGLE_DRIVE_CREDENTIALS_PATH`", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/langchainjs-consistent-naming-conventions.md b/_reviewers/langchainjs-consistent-naming-conventions.md index 05a24b0..e1a283d 100644 --- a/_reviewers/langchainjs-consistent-naming-conventions.md +++ b/_reviewers/langchainjs-consistent-naming-conventions.md @@ -47,97 +47,3 @@ Maintain consistent and explicit naming conventions across your codebase that re ``` This consistent approach to naming makes your code more maintainable, reduces confusion for other developers, and provides better clarity about component dependencies and behavior. - - -[ - { - "discussion_id": "1896299341", - "pr_number": 7301, - "pr_file": "docs/core_docs/docs/integrations/document_loaders/web_loaders/dropbox.mdx", - "created_at": "2024-12-24T02:26:33+00:00", - "commented_code": "---\nhide_table_of_contents: true\nsidebar_class_name: node-only\n---\n\n# Dropbox Loader\n\nThe `DropboxLoader` allows you to load documents from Dropbox into your LangChain applications. It retrieves files or directories from your Dropbox account and converts them into documents ready for processing.\n\n## Overview\n\nDropbox is a file hosting service that brings all your files\u2014traditional documents, cloud content, and web shortcuts\u2014together in one place. With the `DropboxLoader`, you can seamlessly integrate Dropbox file retrieval into your projects.\n\n## Setup\n\n1. Create a dropbox app, using the [Dropbox App Console](https://www.dropbox.com/developers/apps/create).\n2. Ensure the app has the `files.metadata.read`, `files.content.read` scope permissions:\n3. Generate the access token from the Dropbox App Console.\n4. To use this loader, you'll need to have Unstructured already set up and ready to use at an available URL endpoint. It can also be configured to run locally.\n See the docs [here](https://www.dropbox.com/developers/apps/create) for information on how to do that.\n5. Install the necessary packages:\n\n ```bash npm2yarn\n npm install @langchain/community @langchain/core dropbox\n ```\n\n## Usage\n\n### Loading Specific Files\n\nTo load specific files from Dropbox, specify the file paths:\n\n```typescript\nimport { DropboxLoader } from \"@langchain/community/document_loaders/web/dropbox\";\n\nconst loader = new DropboxLoader({\n clientOptions: {\n accessToken: \"your-dropbox-access-token\",\n },\n unstructuredOptions: {\n apiUrl: \"http://localhost:8000/general/v0/general\", // Replace with your Unstructured API URL", - "repo_full_name": "langchain-ai/langchainjs", - "discussion_comments": [ - { - "comment_id": "1896299341", - "repo_full_name": "langchain-ai/langchainjs", - "pr_number": 7301, - "pr_file": "docs/core_docs/docs/integrations/document_loaders/web_loaders/dropbox.mdx", - "discussion_id": "1896299341", - "commented_code": "@@ -0,0 +1,138 @@\n+---\n+hide_table_of_contents: true\n+sidebar_class_name: node-only\n+---\n+\n+# Dropbox Loader\n+\n+The `DropboxLoader` allows you to load documents from Dropbox into your LangChain applications. It retrieves files or directories from your Dropbox account and converts them into documents ready for processing.\n+\n+## Overview\n+\n+Dropbox is a file hosting service that brings all your files\u2014traditional documents, cloud content, and web shortcuts\u2014together in one place. With the `DropboxLoader`, you can seamlessly integrate Dropbox file retrieval into your projects.\n+\n+## Setup\n+\n+1. Create a dropbox app, using the [Dropbox App Console](https://www.dropbox.com/developers/apps/create).\n+2. Ensure the app has the `files.metadata.read`, `files.content.read` scope permissions:\n+3. Generate the access token from the Dropbox App Console.\n+4. To use this loader, you'll need to have Unstructured already set up and ready to use at an available URL endpoint. It can also be configured to run locally.\n+ See the docs [here](https://www.dropbox.com/developers/apps/create) for information on how to do that.\n+5. Install the necessary packages:\n+\n+ ```bash npm2yarn\n+ npm install @langchain/community @langchain/core dropbox\n+ ```\n+\n+## Usage\n+\n+### Loading Specific Files\n+\n+To load specific files from Dropbox, specify the file paths:\n+\n+```typescript\n+import { DropboxLoader } from \"@langchain/community/document_loaders/web/dropbox\";\n+\n+const loader = new DropboxLoader({\n+ clientOptions: {\n+ accessToken: \"your-dropbox-access-token\",\n+ },\n+ unstructuredOptions: {\n+ apiUrl: \"http://localhost:8000/general/v0/general\", // Replace with your Unstructured API URL", - "comment_created_at": "2024-12-24T02:26:33+00:00", - "comment_author": "jacoblee93", - "comment_body": "Let's emphasize somewhere that this wraps Unstructured\r\n\r\nShould we call this `DropboxUnstructuredLoader` instead?", - "pr_file_module": null - }, - { - "comment_id": "1900509302", - "repo_full_name": "langchain-ai/langchainjs", - "pr_number": 7301, - "pr_file": "docs/core_docs/docs/integrations/document_loaders/web_loaders/dropbox.mdx", - "discussion_id": "1896299341", - "commented_code": "@@ -0,0 +1,138 @@\n+---\n+hide_table_of_contents: true\n+sidebar_class_name: node-only\n+---\n+\n+# Dropbox Loader\n+\n+The `DropboxLoader` allows you to load documents from Dropbox into your LangChain applications. It retrieves files or directories from your Dropbox account and converts them into documents ready for processing.\n+\n+## Overview\n+\n+Dropbox is a file hosting service that brings all your files\u2014traditional documents, cloud content, and web shortcuts\u2014together in one place. With the `DropboxLoader`, you can seamlessly integrate Dropbox file retrieval into your projects.\n+\n+## Setup\n+\n+1. Create a dropbox app, using the [Dropbox App Console](https://www.dropbox.com/developers/apps/create).\n+2. Ensure the app has the `files.metadata.read`, `files.content.read` scope permissions:\n+3. Generate the access token from the Dropbox App Console.\n+4. To use this loader, you'll need to have Unstructured already set up and ready to use at an available URL endpoint. It can also be configured to run locally.\n+ See the docs [here](https://www.dropbox.com/developers/apps/create) for information on how to do that.\n+5. Install the necessary packages:\n+\n+ ```bash npm2yarn\n+ npm install @langchain/community @langchain/core dropbox\n+ ```\n+\n+## Usage\n+\n+### Loading Specific Files\n+\n+To load specific files from Dropbox, specify the file paths:\n+\n+```typescript\n+import { DropboxLoader } from \"@langchain/community/document_loaders/web/dropbox\";\n+\n+const loader = new DropboxLoader({\n+ clientOptions: {\n+ accessToken: \"your-dropbox-access-token\",\n+ },\n+ unstructuredOptions: {\n+ apiUrl: \"http://localhost:8000/general/v0/general\", // Replace with your Unstructured API URL", - "comment_created_at": "2025-01-02T02:53:15+00:00", - "comment_author": "Ser0n-ath", - "comment_body": "Yes, we can rename the loader class to `DropboxUnstructuredLoader`\r\nI want to confirm if I need to rename the file to say `dropbox_unstructured.ts` as well?\r\n\r\nAlso, I noticed that a few preexisting loaders utilize unstructured as well. Would they need to be renamed as well in the future?:\r\n- [s3](https://github.com/langchain-ai/langchainjs/blob/main/libs/langchain-community/src/document_loaders/web/s3.ts)\r\n- [azure_blob_storage](https://github.com/langchain-ai/langchainjs/blob/main/libs/langchain-community/src/document_loaders/web/azure_blob_storage_file.ts)\r\n\r\n\r\n\r\n\r\n\r\n", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1690477167", - "pr_number": 6197, - "pr_file": "docs/core_docs/docs/integrations/chat/litellm_proxy.mdx", - "created_at": "2024-07-24T21:29:52+00:00", - "commented_code": "---\nsidebar_label: LiteLLM Proxy\n---\n\nimport CodeBlock from \"@theme/CodeBlock\";\n\n# LiteLLM Proxy Server\n\nUse [LiteLLM Proxy](https://docs.litellm.ai/docs/simple_proxy) for:\n\n- Calling 100+ LLMs OpenAI, Azure, Vertex, Bedrock/etc using OpenAI format\n- Use Virtual Keys to Set Budgets & Track Usage\n\nThis allows you to call 100+ LLMs using `ChatOpenAI`\n\n```js\nimport { ChatOpenAI } from \"@langchain/openai\";\n\nconst model = new ChatOpenAI({\n modelName: \"claude-3-5-sonnet-20240620\", // swap this for \"gpt-4o-mini\", \"gemini-1.5-pro\", \"claude-3-5-sonnet-20240620\"\n openAIApiKey: \"sk-1234\",\n}, {\n basePath: \"http://0.0.0.0:4000\", // set basePath to LiteLLM Proxy server\n});\n\nconst message = await model.invoke(\"Hi there!\");\n\nconsole.log(message);\n```\n\n\n## Step 1 Setup LiteLLM Proxy config.yaml\n\nLiteLLM Requires a config with all your models defined - we can call this file `litellm_config.yaml`\n\n[Detailed docs on how to setup litellm config - here](https://docs.litellm.ai/docs/proxy/configs)\n\n```yaml\nmodel_list:\n # Azure OpenAI\n # https://docs.litellm.ai/docs/providers/azure\n - model_name: gpt-azure\n litellm_params:\n model: azure/gpt-turbo-small-ca\n api_base: https://my-endpoint-canada-berri992.openai.azure.com/\n api_key: \"os.environ/AZURE_API_KEY\"\n\n # OpenAI API\n # https://docs.litellm.ai/docs/providers/openai\n - model_name: gpt-4o-mini\n litellm_params:\n model: openai/gpt-4o-mini\n api_key: \"os.environ/OPENAI_API_KEY\"\n\n\n # Vertex AI models\n # https://docs.litellm.ai/docs/providers/vertex\n - model_name: gemini-pro\n litellm_params:\n model: vertex_ai_beta/gemini-1.5-pro\n vertex_project: \"project-id\"\n vertex_location: \"us-central1\"\n vertex_credentials: \"/path/to/service_account.json\" # [OPTIONAL] Do this OR `!gcloud auth application-default login` - run this to add vertex credentials to your env\n \n # Bedrock Models\n # https://docs.litellm.ai/docs/providers/bedrock\n - model_name: anthropic-claude\n litellm_params: \n model: bedrock/anthropic.claude-instant-v1\n aws_access_key_id: os.environ/CUSTOM_AWS_ACCESS_KEY_ID\n aws_secret_access_key: os.environ/CUSTOM_AWS_SECRET_ACCESS_KEY\n aws_region_name: os.environ/CUSTOM_AWS_REGION_NAME\n \n```\n\n\n## Step 2. Start litellm proxy\n\n```shell\ndocker run \\\n -v $(pwd)/litellm_config.yaml:/app/config.yaml \\\n -p 4000:4000 \\\n -e AZURE_API_KEY \\\n -e OPENAI_API_KEY \\\n -e CUSTOM_AWS_ACCESS_KEY_ID \\\n -e CUSTOM_AWS_SECRET_ACCESS_KEY \\\n -e CUSTOM_AWS_REGION_NAME \\\n ghcr.io/berriai/litellm:main-latest \\\n --config /app/config.yaml --detailed_debug\n```\n\nProxy will start running on `http://0.0.0.0:4000/` You can use this endpoint to make all LLM Requests\n\n## Step 3. Make Requests using ChatOpenAI\n\nWe use Langchain JS to make requests to LiteLLM Proxy\n\n```bash npm2yarn\nnpm install @langchain/openai\n```\n\nYou can call all models defined on the `litellm_config.yaml`, all you need to do is change the `modelName`\n\n## Call `\"gpt-azure\"`\n```js\nimport { ChatOpenAI } from \"@langchain/openai\";\n\n\nconst model = new ChatOpenAI({\n modelName: \"gpt-azure\",", - "repo_full_name": "langchain-ai/langchainjs", - "discussion_comments": [ - { - "comment_id": "1690477167", - "repo_full_name": "langchain-ai/langchainjs", - "pr_number": 6197, - "pr_file": "docs/core_docs/docs/integrations/chat/litellm_proxy.mdx", - "discussion_id": "1690477167", - "commented_code": "@@ -0,0 +1,157 @@\n+---\n+sidebar_label: LiteLLM Proxy\n+---\n+\n+import CodeBlock from \"@theme/CodeBlock\";\n+\n+# LiteLLM Proxy Server\n+\n+Use [LiteLLM Proxy](https://docs.litellm.ai/docs/simple_proxy) for:\n+\n+- Calling 100+ LLMs OpenAI, Azure, Vertex, Bedrock/etc using OpenAI format\n+- Use Virtual Keys to Set Budgets & Track Usage\n+\n+This allows you to call 100+ LLMs using `ChatOpenAI`\n+\n+```js\n+import { ChatOpenAI } from \"@langchain/openai\";\n+\n+const model = new ChatOpenAI({\n+ modelName: \"claude-3-5-sonnet-20240620\", // swap this for \"gpt-4o-mini\", \"gemini-1.5-pro\", \"claude-3-5-sonnet-20240620\"\n+ openAIApiKey: \"sk-1234\",\n+}, {\n+ basePath: \"http://0.0.0.0:4000\", // set basePath to LiteLLM Proxy server\n+});\n+\n+const message = await model.invoke(\"Hi there!\");\n+\n+console.log(message);\n+```\n+\n+\n+## Step 1 Setup LiteLLM Proxy config.yaml\n+\n+LiteLLM Requires a config with all your models defined - we can call this file `litellm_config.yaml`\n+\n+[Detailed docs on how to setup litellm config - here](https://docs.litellm.ai/docs/proxy/configs)\n+\n+```yaml\n+model_list:\n+ # Azure OpenAI\n+ # https://docs.litellm.ai/docs/providers/azure\n+ - model_name: gpt-azure\n+ litellm_params:\n+ model: azure/gpt-turbo-small-ca\n+ api_base: https://my-endpoint-canada-berri992.openai.azure.com/\n+ api_key: \"os.environ/AZURE_API_KEY\"\n+\n+ # OpenAI API\n+ # https://docs.litellm.ai/docs/providers/openai\n+ - model_name: gpt-4o-mini\n+ litellm_params:\n+ model: openai/gpt-4o-mini\n+ api_key: \"os.environ/OPENAI_API_KEY\"\n+\n+\n+ # Vertex AI models\n+ # https://docs.litellm.ai/docs/providers/vertex\n+ - model_name: gemini-pro\n+ litellm_params:\n+ model: vertex_ai_beta/gemini-1.5-pro\n+ vertex_project: \"project-id\"\n+ vertex_location: \"us-central1\"\n+ vertex_credentials: \"/path/to/service_account.json\" # [OPTIONAL] Do this OR `!gcloud auth application-default login` - run this to add vertex credentials to your env\n+ \n+ # Bedrock Models\n+ # https://docs.litellm.ai/docs/providers/bedrock\n+ - model_name: anthropic-claude\n+ litellm_params: \n+ model: bedrock/anthropic.claude-instant-v1\n+ aws_access_key_id: os.environ/CUSTOM_AWS_ACCESS_KEY_ID\n+ aws_secret_access_key: os.environ/CUSTOM_AWS_SECRET_ACCESS_KEY\n+ aws_region_name: os.environ/CUSTOM_AWS_REGION_NAME\n+ \n+```\n+\n+\n+## Step 2. Start litellm proxy\n+\n+```shell\n+docker run \\\n+ -v $(pwd)/litellm_config.yaml:/app/config.yaml \\\n+ -p 4000:4000 \\\n+ -e AZURE_API_KEY \\\n+ -e OPENAI_API_KEY \\\n+ -e CUSTOM_AWS_ACCESS_KEY_ID \\\n+ -e CUSTOM_AWS_SECRET_ACCESS_KEY \\\n+ -e CUSTOM_AWS_REGION_NAME \\\n+ ghcr.io/berriai/litellm:main-latest \\\n+ --config /app/config.yaml --detailed_debug\n+```\n+\n+Proxy will start running on `http://0.0.0.0:4000/` You can use this endpoint to make all LLM Requests\n+\n+## Step 3. Make Requests using ChatOpenAI\n+\n+We use Langchain JS to make requests to LiteLLM Proxy\n+\n+```bash npm2yarn\n+npm install @langchain/openai\n+```\n+\n+You can call all models defined on the `litellm_config.yaml`, all you need to do is change the `modelName`\n+\n+## Call `\"gpt-azure\"`\n+```js\n+import { ChatOpenAI } from \"@langchain/openai\";\n+\n+\n+const model = new ChatOpenAI({\n+ modelName: \"gpt-azure\",", - "comment_created_at": "2024-07-24T21:29:52+00:00", - "comment_author": "bracesproul", - "comment_body": "`modelName` is deprecated, prefer `model`\r\n```suggestion\r\nYou can call all models defined on the `litellm_config.yaml`, all you need to do is change the `model` param.\r\n\r\n## Call `\"gpt-azure\"`\r\n```js\r\nimport { ChatOpenAI } from \"@langchain/openai\";\r\n\r\n\r\nconst model = new ChatOpenAI({\r\n model: \"gpt-azure\",\r\n```", - "pr_file_module": null - }, - { - "comment_id": "1690477573", - "repo_full_name": "langchain-ai/langchainjs", - "pr_number": 6197, - "pr_file": "docs/core_docs/docs/integrations/chat/litellm_proxy.mdx", - "discussion_id": "1690477167", - "commented_code": "@@ -0,0 +1,157 @@\n+---\n+sidebar_label: LiteLLM Proxy\n+---\n+\n+import CodeBlock from \"@theme/CodeBlock\";\n+\n+# LiteLLM Proxy Server\n+\n+Use [LiteLLM Proxy](https://docs.litellm.ai/docs/simple_proxy) for:\n+\n+- Calling 100+ LLMs OpenAI, Azure, Vertex, Bedrock/etc using OpenAI format\n+- Use Virtual Keys to Set Budgets & Track Usage\n+\n+This allows you to call 100+ LLMs using `ChatOpenAI`\n+\n+```js\n+import { ChatOpenAI } from \"@langchain/openai\";\n+\n+const model = new ChatOpenAI({\n+ modelName: \"claude-3-5-sonnet-20240620\", // swap this for \"gpt-4o-mini\", \"gemini-1.5-pro\", \"claude-3-5-sonnet-20240620\"\n+ openAIApiKey: \"sk-1234\",\n+}, {\n+ basePath: \"http://0.0.0.0:4000\", // set basePath to LiteLLM Proxy server\n+});\n+\n+const message = await model.invoke(\"Hi there!\");\n+\n+console.log(message);\n+```\n+\n+\n+## Step 1 Setup LiteLLM Proxy config.yaml\n+\n+LiteLLM Requires a config with all your models defined - we can call this file `litellm_config.yaml`\n+\n+[Detailed docs on how to setup litellm config - here](https://docs.litellm.ai/docs/proxy/configs)\n+\n+```yaml\n+model_list:\n+ # Azure OpenAI\n+ # https://docs.litellm.ai/docs/providers/azure\n+ - model_name: gpt-azure\n+ litellm_params:\n+ model: azure/gpt-turbo-small-ca\n+ api_base: https://my-endpoint-canada-berri992.openai.azure.com/\n+ api_key: \"os.environ/AZURE_API_KEY\"\n+\n+ # OpenAI API\n+ # https://docs.litellm.ai/docs/providers/openai\n+ - model_name: gpt-4o-mini\n+ litellm_params:\n+ model: openai/gpt-4o-mini\n+ api_key: \"os.environ/OPENAI_API_KEY\"\n+\n+\n+ # Vertex AI models\n+ # https://docs.litellm.ai/docs/providers/vertex\n+ - model_name: gemini-pro\n+ litellm_params:\n+ model: vertex_ai_beta/gemini-1.5-pro\n+ vertex_project: \"project-id\"\n+ vertex_location: \"us-central1\"\n+ vertex_credentials: \"/path/to/service_account.json\" # [OPTIONAL] Do this OR `!gcloud auth application-default login` - run this to add vertex credentials to your env\n+ \n+ # Bedrock Models\n+ # https://docs.litellm.ai/docs/providers/bedrock\n+ - model_name: anthropic-claude\n+ litellm_params: \n+ model: bedrock/anthropic.claude-instant-v1\n+ aws_access_key_id: os.environ/CUSTOM_AWS_ACCESS_KEY_ID\n+ aws_secret_access_key: os.environ/CUSTOM_AWS_SECRET_ACCESS_KEY\n+ aws_region_name: os.environ/CUSTOM_AWS_REGION_NAME\n+ \n+```\n+\n+\n+## Step 2. Start litellm proxy\n+\n+```shell\n+docker run \\\n+ -v $(pwd)/litellm_config.yaml:/app/config.yaml \\\n+ -p 4000:4000 \\\n+ -e AZURE_API_KEY \\\n+ -e OPENAI_API_KEY \\\n+ -e CUSTOM_AWS_ACCESS_KEY_ID \\\n+ -e CUSTOM_AWS_SECRET_ACCESS_KEY \\\n+ -e CUSTOM_AWS_REGION_NAME \\\n+ ghcr.io/berriai/litellm:main-latest \\\n+ --config /app/config.yaml --detailed_debug\n+```\n+\n+Proxy will start running on `http://0.0.0.0:4000/` You can use this endpoint to make all LLM Requests\n+\n+## Step 3. Make Requests using ChatOpenAI\n+\n+We use Langchain JS to make requests to LiteLLM Proxy\n+\n+```bash npm2yarn\n+npm install @langchain/openai\n+```\n+\n+You can call all models defined on the `litellm_config.yaml`, all you need to do is change the `modelName`\n+\n+## Call `\"gpt-azure\"`\n+```js\n+import { ChatOpenAI } from \"@langchain/openai\";\n+\n+\n+const model = new ChatOpenAI({\n+ modelName: \"gpt-azure\",", - "comment_created_at": "2024-07-24T21:30:12+00:00", - "comment_author": "bracesproul", - "comment_body": "Please update the rest of the `modelName` instances too.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1414383618", - "pr_number": 3474, - "pr_file": "docs/core_docs/docs/integrations/document_loaders/web_loaders/google_drive.mdx", - "created_at": "2023-12-04T19:24:37+00:00", - "commented_code": "# Google document loader\n\n## Getting Credentials \n\nhttps://developers.google.com/workspace/guides/get-started#5_steps_to_get_started\nFollow the 5 steps in the above guide to get the credentials to authenticate your app's end users or service accounts.\n\nThen, after getting the right credentials from google, add the file paths to the .env variables GOOGLE_DRIVE_CREDENTIALSPATH and GOOGLE_DRIVE_TOKENPATH", - "repo_full_name": "langchain-ai/langchainjs", - "discussion_comments": [ - { - "comment_id": "1414383618", - "repo_full_name": "langchain-ai/langchainjs", - "pr_number": 3474, - "pr_file": "docs/core_docs/docs/integrations/document_loaders/web_loaders/google_drive.mdx", - "discussion_id": "1414383618", - "commented_code": "@@ -0,0 +1,79 @@\n+# Google document loader\n+\n+## Getting Credentials \n+\n+https://developers.google.com/workspace/guides/get-started#5_steps_to_get_started\n+Follow the 5 steps in the above guide to get the credentials to authenticate your app's end users or service accounts.\n+\n+Then, after getting the right credentials from google, add the file paths to the .env variables GOOGLE_DRIVE_CREDENTIALSPATH and GOOGLE_DRIVE_TOKENPATH", - "comment_created_at": "2023-12-04T19:24:37+00:00", - "comment_author": "jacoblee93", - "comment_body": "Separate words with underscore:\r\n`GOOGLE_DRIVE_CREDENTIALSPATH` -> `GOOGLE_DRIVE_CREDENTIALS_PATH`", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/langchainjs-constructor-over-setter.json b/_reviewers/langchainjs-constructor-over-setter.json new file mode 100644 index 0000000..1642707 --- /dev/null +++ b/_reviewers/langchainjs-constructor-over-setter.json @@ -0,0 +1,128 @@ +[ + { + "discussion_id": "1414368375", + "pr_number": 3448, + "pr_file": "docs/core_docs/docs/integrations/chat/azure_ml.mdx", + "created_at": "2023-12-04T19:16:07+00:00", + "commented_code": "# Azure Machine Learning Chat\n\nYou can deploy models on Azure with the endpointUrl, apiKey, and deploymentName\nwhen creating the AzureMLChatParams to call upon later. Must import a ContentFormatter\nor create your own using the ChatContentFormatter interface.\n\n```typescript\nimport { AzureMLChatParams, LlamaContentFormatter } from \"langchain/chat_models/azure_ml\";\n\nconst model = new AzureMLModel({\n endpointUrl: \"YOUR_ENDPOINT_URL\", \n endpointApiKey: \"YOUR_ENDPOINT_API_KEY\", \n deploymentName: \"YOUR_MODEL_DEPLOYMENT_NAME\",\n contentFormatter: new LlamaContentFormatter()\n});\n\nconst res = model.call([\"Foo\"]);", + "repo_full_name": "langchain-ai/langchainjs", + "discussion_comments": [ + { + "comment_id": "1414368375", + "repo_full_name": "langchain-ai/langchainjs", + "pr_number": 3448, + "pr_file": "docs/core_docs/docs/integrations/chat/azure_ml.mdx", + "discussion_id": "1414368375", + "commented_code": "@@ -0,0 +1,19 @@\n+# Azure Machine Learning Chat\n+\n+You can deploy models on Azure with the endpointUrl, apiKey, and deploymentName\n+when creating the AzureMLChatParams to call upon later. Must import a ContentFormatter\n+or create your own using the ChatContentFormatter interface.\n+\n+```typescript\n+import { AzureMLChatParams, LlamaContentFormatter } from \"langchain/chat_models/azure_ml\";\n+\n+const model = new AzureMLModel({\n+ endpointUrl: \"YOUR_ENDPOINT_URL\", \n+ endpointApiKey: \"YOUR_ENDPOINT_API_KEY\", \n+ deploymentName: \"YOUR_MODEL_DEPLOYMENT_NAME\",\n+ contentFormatter: new LlamaContentFormatter()\n+});\n+\n+const res = model.call([\"Foo\"]);", + "comment_created_at": "2023-12-04T19:16:07+00:00", + "comment_author": "jacoblee93", + "comment_body": "Prefer `.invoke()`", + "pr_file_module": null + }, + { + "comment_id": "1414368463", + "repo_full_name": "langchain-ai/langchainjs", + "pr_number": 3448, + "pr_file": "docs/core_docs/docs/integrations/chat/azure_ml.mdx", + "discussion_id": "1414368375", + "commented_code": "@@ -0,0 +1,19 @@\n+# Azure Machine Learning Chat\n+\n+You can deploy models on Azure with the endpointUrl, apiKey, and deploymentName\n+when creating the AzureMLChatParams to call upon later. Must import a ContentFormatter\n+or create your own using the ChatContentFormatter interface.\n+\n+```typescript\n+import { AzureMLChatParams, LlamaContentFormatter } from \"langchain/chat_models/azure_ml\";\n+\n+const model = new AzureMLModel({\n+ endpointUrl: \"YOUR_ENDPOINT_URL\", \n+ endpointApiKey: \"YOUR_ENDPOINT_API_KEY\", \n+ deploymentName: \"YOUR_MODEL_DEPLOYMENT_NAME\",\n+ contentFormatter: new LlamaContentFormatter()\n+});\n+\n+const res = model.call([\"Foo\"]);", + "comment_created_at": "2023-12-04T19:16:13+00:00", + "comment_author": "jacoblee93", + "comment_body": "Also make sure this is tested", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1884574750", + "pr_number": 7358, + "pr_file": "docs/core_docs/docs/tutorials/retrievers.ipynb", + "created_at": "2024-12-13T21:43:59+00:00", + "commented_code": "{\n \"cells\": [\n {\n \"cell_type\": \"markdown\",\n \"id\": \"bf37a837-7a6a-447b-8779-38f26c585887\",\n \"metadata\": {},\n \"source\": [\n \"# Build a semantic search engine\\n\",\n \"\\n\",\n \"This tutorial will familiarize you with LangChain's [document loader](/docs/concepts/document_loaders), [embedding](/docs/concepts/embedding_models), and [vector store](/docs/concepts/vectorstores) abstractions. These abstractions are designed to support retrieval of data-- from (vector) databases and other sources-- for integration with LLM workflows. They are important for applications that fetch data to be reasoned over as part of model inference, as in the case of retrieval-augmented generation, or [RAG](/docs/concepts/rag) (see our RAG tutorial [here](/docs/tutorials/rag)).\\n\",\n \"\\n\",\n \"Here we will build a search engine over a PDF document. This will allow us to retrieve passages in the PDF that are similar to an input query.\\n\",\n \"\\n\",\n \"## Concepts\\n\",\n \"\\n\",\n \"This guide focuses on retrieval of text data. We will cover the following concepts:\\n\",\n \"\\n\",\n \"- Documents and document loaders;\\n\",\n \"- Text splitters;\\n\",\n \"- Embeddings;\\n\",\n \"- Vector stores and retrievers.\\n\",\n \"\\n\",\n \"## Setup\\n\",\n \"\\n\",\n \"### Jupyter Notebook\\n\",\n \"\\n\",\n \"This and other tutorials are perhaps most conveniently run in a Jupyter notebook. See [here](https://jupyter.org/install) for instructions on how to install.\\n\",\n \"\\n\",\n \"### Installation\\n\",\n \"\\n\",\n \"This guide requires `@langchain/community` and `pdf-parse`:\\n\",\n \"\\n\",\n \"```{=mdx}\\n\",\n \"import Npm2Yarn from '@theme/Npm2Yarn';\\n\",\n \"import TabItem from '@theme/TabItem';\\n\",\n \"import CodeBlock from \\\"@theme/CodeBlock\\\";\\n\",\n \"\\n\",\n \"\\n\",\n \" @langchain/community pdf-parse\\n\",\n \"\\n\",\n \"```\\n\",\n \"\\n\",\n \"For more details, see our [Installation guide](/docs/how_to/installation/).\\n\",\n \"\\n\",\n \"### LangSmith\\n\",\n \"\\n\",\n \"Many of the applications you build with LangChain will contain multiple steps with multiple invocations of LLM calls.\\n\",\n \"As these applications get more and more complex, it becomes crucial to be able to inspect what exactly is going on inside your chain or agent.\\n\",\n \"The best way to do this is with [LangSmith](https://smith.langchain.com).\\n\",\n \"\\n\",\n \"After you sign up at the link above, make sure to set your environment variables to start logging traces:\\n\",\n \"\\n\",\n \"```shell\\n\",\n \"export LANGCHAIN_TRACING_V2=\\\"true\\\"\\n\",\n \"export LANGCHAIN_API_KEY=\\\"...\\\"\\n\",\n \"\\n\",\n \"# Reduce tracing latency if you are not in a serverless environment\\n\",\n \"# export LANGCHAIN_CALLBACKS_BACKGROUND=true\\n\",\n \"```\\n\",\n \"\\n\",\n \"\\n\",\n \"## Documents and Document Loaders\\n\",\n \"\\n\",\n \"LangChain implements a [Document](https://v03.api.js.langchain.com/classes/_langchain_core.documents.Document.html) abstraction, which is intended to represent a unit of text and associated metadata. It has three attributes:\\n\",\n \"\\n\",\n \"- `pageContent`: a string representing the content;\\n\",\n \"- `metadata`: records of arbitrary metadata;\\n\",\n \"- `id`: (optional) a string identifier for the document.\\n\",\n \"\\n\",\n \"The `metadata` attribute can capture information about the source of the document, its relationship to other documents, and other information. Note that an individual `Document` object often represents a chunk of a larger document.\\n\",\n \"\\n\",\n \"We can generate sample documents when desired:\\n\",\n \"```javascript\\n\",\n \"import { Document } from \\\"@langchain/core/documents\\\";\\n\",\n \"\\n\",\n \"const documents = [\\n\",\n \" new Document({\\n\",\n \" pageContent: \\\"Dogs are great companions, known for their loyalty and friendliness.\\\",\\n\",\n \" metadata: {\\\"source\\\": \\\"mammal-pets-doc\\\"},\\n\",\n \" }),\\n\",\n \" new Document({\\n\",\n \" pageContent: \\\"Cats are independent pets that often enjoy their own space.\\\",\\n\",\n \" metadata: {\\\"source\\\": \\\"mammal-pets-doc\\\"},\\n\",\n \" }),\\n\",\n \"]\\n\",\n \"```\"\n ]\n },\n {\n \"cell_type\": \"markdown\",\n \"id\": \"f8593578-5699-4b19-96c4-7c990d37a2ec\",\n \"metadata\": {},\n \"source\": [\n \"However, the LangChain ecosystem implements [document loaders](/docs/concepts/document_loaders) that [integrate with hundreds of common sources](/docs/integrations/document_loaders/). This makes it easy to incorporate data from these sources into your AI application.\\n\",\n \"\\n\",\n \"### Loading documents\\n\",\n \"\\n\",\n \"Let's load a PDF into a sequence of `Document` objects. There is a sample PDF in the LangChain repo [here](https://github.com/langchain-ai/langchainjs/blob/main/docs/core_docs/data/nke-10k-2023.pdf) -- a 10-k filing for Nike from 2023. LangChain implements a [PDFLoader](/docs/integrations/document_loaders/file_loaders/pdf/) that we can use to parse the PDF:\"\n ]\n },\n {\n \"cell_type\": \"code\",\n \"execution_count\": 1,\n \"id\": \"67c39f96-b1f0-4610-979f-fe2d4d164e08\",\n \"metadata\": {},\n \"outputs\": [\n {\n \"name\": \"stdout\",\n \"output_type\": \"stream\",\n \"text\": [\n \"\\u001b[33m107\\u001b[39m\\n\"\n ]\n }\n ],\n \"source\": [\n \"import { PDFLoader } from \\\"@langchain/community/document_loaders/fs/pdf\\\";\\n\",\n \"\\n\",\n \"const loader = new PDFLoader(\\\"../../data/nke-10k-2023.pdf\\\");\\n\",\n \"\\n\",\n \"const docs = await loader.load();\\n\",\n \"console.log(docs.length)\"\n ]\n },\n {\n \"cell_type\": \"markdown\",\n \"id\": \"b90f4800-bb82-416b-beba-f42ae88a5c66\",\n \"metadata\": {},\n \"source\": [\n \"```{=mdx}\\n\",\n \":::tip\\n\",\n \"\\n\",\n \"See [this guide](/docs/how_to/document_loader_pdf/) for more detail on PDF document loaders.\\n\",\n \"\\n\",\n \":::\\n\",\n \"```\\n\",\n \"\\n\",\n \"`PyPDFLoader` loads one `Document` object per PDF page. For each, we can easily access:\\n\",", + "repo_full_name": "langchain-ai/langchainjs", + "discussion_comments": [ + { + "comment_id": "1884574750", + "repo_full_name": "langchain-ai/langchainjs", + "pr_number": 7358, + "pr_file": "docs/core_docs/docs/tutorials/retrievers.ipynb", + "discussion_id": "1884574750", + "commented_code": "@@ -0,0 +1,709 @@\n+{\n+ \"cells\": [\n+ {\n+ \"cell_type\": \"markdown\",\n+ \"id\": \"bf37a837-7a6a-447b-8779-38f26c585887\",\n+ \"metadata\": {},\n+ \"source\": [\n+ \"# Build a semantic search engine\\n\",\n+ \"\\n\",\n+ \"This tutorial will familiarize you with LangChain's [document loader](/docs/concepts/document_loaders), [embedding](/docs/concepts/embedding_models), and [vector store](/docs/concepts/vectorstores) abstractions. These abstractions are designed to support retrieval of data-- from (vector) databases and other sources-- for integration with LLM workflows. They are important for applications that fetch data to be reasoned over as part of model inference, as in the case of retrieval-augmented generation, or [RAG](/docs/concepts/rag) (see our RAG tutorial [here](/docs/tutorials/rag)).\\n\",\n+ \"\\n\",\n+ \"Here we will build a search engine over a PDF document. This will allow us to retrieve passages in the PDF that are similar to an input query.\\n\",\n+ \"\\n\",\n+ \"## Concepts\\n\",\n+ \"\\n\",\n+ \"This guide focuses on retrieval of text data. We will cover the following concepts:\\n\",\n+ \"\\n\",\n+ \"- Documents and document loaders;\\n\",\n+ \"- Text splitters;\\n\",\n+ \"- Embeddings;\\n\",\n+ \"- Vector stores and retrievers.\\n\",\n+ \"\\n\",\n+ \"## Setup\\n\",\n+ \"\\n\",\n+ \"### Jupyter Notebook\\n\",\n+ \"\\n\",\n+ \"This and other tutorials are perhaps most conveniently run in a Jupyter notebook. See [here](https://jupyter.org/install) for instructions on how to install.\\n\",\n+ \"\\n\",\n+ \"### Installation\\n\",\n+ \"\\n\",\n+ \"This guide requires `@langchain/community` and `pdf-parse`:\\n\",\n+ \"\\n\",\n+ \"```{=mdx}\\n\",\n+ \"import Npm2Yarn from '@theme/Npm2Yarn';\\n\",\n+ \"import TabItem from '@theme/TabItem';\\n\",\n+ \"import CodeBlock from \\\"@theme/CodeBlock\\\";\\n\",\n+ \"\\n\",\n+ \"\\n\",\n+ \" @langchain/community pdf-parse\\n\",\n+ \"\\n\",\n+ \"```\\n\",\n+ \"\\n\",\n+ \"For more details, see our [Installation guide](/docs/how_to/installation/).\\n\",\n+ \"\\n\",\n+ \"### LangSmith\\n\",\n+ \"\\n\",\n+ \"Many of the applications you build with LangChain will contain multiple steps with multiple invocations of LLM calls.\\n\",\n+ \"As these applications get more and more complex, it becomes crucial to be able to inspect what exactly is going on inside your chain or agent.\\n\",\n+ \"The best way to do this is with [LangSmith](https://smith.langchain.com).\\n\",\n+ \"\\n\",\n+ \"After you sign up at the link above, make sure to set your environment variables to start logging traces:\\n\",\n+ \"\\n\",\n+ \"```shell\\n\",\n+ \"export LANGCHAIN_TRACING_V2=\\\"true\\\"\\n\",\n+ \"export LANGCHAIN_API_KEY=\\\"...\\\"\\n\",\n+ \"\\n\",\n+ \"# Reduce tracing latency if you are not in a serverless environment\\n\",\n+ \"# export LANGCHAIN_CALLBACKS_BACKGROUND=true\\n\",\n+ \"```\\n\",\n+ \"\\n\",\n+ \"\\n\",\n+ \"## Documents and Document Loaders\\n\",\n+ \"\\n\",\n+ \"LangChain implements a [Document](https://v03.api.js.langchain.com/classes/_langchain_core.documents.Document.html) abstraction, which is intended to represent a unit of text and associated metadata. It has three attributes:\\n\",\n+ \"\\n\",\n+ \"- `pageContent`: a string representing the content;\\n\",\n+ \"- `metadata`: records of arbitrary metadata;\\n\",\n+ \"- `id`: (optional) a string identifier for the document.\\n\",\n+ \"\\n\",\n+ \"The `metadata` attribute can capture information about the source of the document, its relationship to other documents, and other information. Note that an individual `Document` object often represents a chunk of a larger document.\\n\",\n+ \"\\n\",\n+ \"We can generate sample documents when desired:\\n\",\n+ \"```javascript\\n\",\n+ \"import { Document } from \\\"@langchain/core/documents\\\";\\n\",\n+ \"\\n\",\n+ \"const documents = [\\n\",\n+ \" new Document({\\n\",\n+ \" pageContent: \\\"Dogs are great companions, known for their loyalty and friendliness.\\\",\\n\",\n+ \" metadata: {\\\"source\\\": \\\"mammal-pets-doc\\\"},\\n\",\n+ \" }),\\n\",\n+ \" new Document({\\n\",\n+ \" pageContent: \\\"Cats are independent pets that often enjoy their own space.\\\",\\n\",\n+ \" metadata: {\\\"source\\\": \\\"mammal-pets-doc\\\"},\\n\",\n+ \" }),\\n\",\n+ \"]\\n\",\n+ \"```\"\n+ ]\n+ },\n+ {\n+ \"cell_type\": \"markdown\",\n+ \"id\": \"f8593578-5699-4b19-96c4-7c990d37a2ec\",\n+ \"metadata\": {},\n+ \"source\": [\n+ \"However, the LangChain ecosystem implements [document loaders](/docs/concepts/document_loaders) that [integrate with hundreds of common sources](/docs/integrations/document_loaders/). This makes it easy to incorporate data from these sources into your AI application.\\n\",\n+ \"\\n\",\n+ \"### Loading documents\\n\",\n+ \"\\n\",\n+ \"Let's load a PDF into a sequence of `Document` objects. There is a sample PDF in the LangChain repo [here](https://github.com/langchain-ai/langchainjs/blob/main/docs/core_docs/data/nke-10k-2023.pdf) -- a 10-k filing for Nike from 2023. LangChain implements a [PDFLoader](/docs/integrations/document_loaders/file_loaders/pdf/) that we can use to parse the PDF:\"\n+ ]\n+ },\n+ {\n+ \"cell_type\": \"code\",\n+ \"execution_count\": 1,\n+ \"id\": \"67c39f96-b1f0-4610-979f-fe2d4d164e08\",\n+ \"metadata\": {},\n+ \"outputs\": [\n+ {\n+ \"name\": \"stdout\",\n+ \"output_type\": \"stream\",\n+ \"text\": [\n+ \"\\u001b[33m107\\u001b[39m\\n\"\n+ ]\n+ }\n+ ],\n+ \"source\": [\n+ \"import { PDFLoader } from \\\"@langchain/community/document_loaders/fs/pdf\\\";\\n\",\n+ \"\\n\",\n+ \"const loader = new PDFLoader(\\\"../../data/nke-10k-2023.pdf\\\");\\n\",\n+ \"\\n\",\n+ \"const docs = await loader.load();\\n\",\n+ \"console.log(docs.length)\"\n+ ]\n+ },\n+ {\n+ \"cell_type\": \"markdown\",\n+ \"id\": \"b90f4800-bb82-416b-beba-f42ae88a5c66\",\n+ \"metadata\": {},\n+ \"source\": [\n+ \"```{=mdx}\\n\",\n+ \":::tip\\n\",\n+ \"\\n\",\n+ \"See [this guide](/docs/how_to/document_loader_pdf/) for more detail on PDF document loaders.\\n\",\n+ \"\\n\",\n+ \":::\\n\",\n+ \"```\\n\",\n+ \"\\n\",\n+ \"`PyPDFLoader` loads one `Document` object per PDF page. For each, we can easily access:\\n\",", + "comment_created_at": "2024-12-13T21:43:59+00:00", + "comment_author": "jacoblee93", + "comment_body": "`PyPDFLoader` isn't in JS", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1435420736", + "pr_number": 3465, + "pr_file": "docs/core_docs/docs/integrations/toolkits/outlook.mdx", + "created_at": "2023-12-23T01:10:04+00:00", + "commented_code": "# Outlook Toolkit\n\n## Overview\n\nUsing this toolkit, you can integrate Microsoft Graph API functionalities into your applications to manage email interactions.\n\n## Learn Microsoft Graph\n\nMicrosoft Graph is a RESTful web API that enables you to access Microsoft Cloud service resources. With Microsoft Graph, you can integrate various Microsoft services and data, including users' emails, calendar, contacts, and more, into your applications.\n\nAfter you register your app and get authentication tokens for a user or service, you can make requests to the Microsoft Graph API.\n\nHow to get authentication tokens for a user: \nhttps://learn.microsoft.com/en-us/graph/auth-v2-user\n\n## App register and set environment variables\nAzure App Registration for Microsoft Outlook API Integration\n\n1. Register an Application:\n - Go to Azure Portal.\n - Navigate to Azure Active Directory -> App registrations.\n - Register a new application, note down the Client ID.\n\n2. Generate Client Secret:\n - In the application settings, go to Certificates & Secrets.\n - Create a new client secret and note down the Client Secret.\n\n3. Configure Redirect URI:\n - In the application settings, go to Authentication.\n - Add a Redirect URI and note down the Redirect URI.\n\n4. Permissions and Admin Consent:\n - In API permissions, grant necessary permissions for Microsoft Graph API or Outlook API.\n - Grant admin consent for the added permissions.\n\n6. Environment Variables:\n - Set the following environment variables:\n\n ```env\n OUTLOOK_CLIENT_ID=your_client_id\n OUTLOOK_CLIENT_SECRET=your_client_secret\n OUTLOOK_REDIRECT_URI=your_redirect_uri\n OUTLOOK_ACCESS_TOKEN=your_access_token\n OUTLOOK_REFRESH_TOKEN=your_refresh_token\n\n ```\n\n **Keep these values secure and avoid exposing them in public repositories.**\n\n## Setup\n\nYou only need an `accessToken` to use the Outlook Toolkit, but they expire after an hour.", + "repo_full_name": "langchain-ai/langchainjs", + "discussion_comments": [ + { + "comment_id": "1435420736", + "repo_full_name": "langchain-ai/langchainjs", + "pr_number": 3465, + "pr_file": "docs/core_docs/docs/integrations/toolkits/outlook.mdx", + "discussion_id": "1435420736", + "commented_code": "@@ -0,0 +1,143 @@\n+# Outlook Toolkit\n+\n+## Overview\n+\n+Using this toolkit, you can integrate Microsoft Graph API functionalities into your applications to manage email interactions.\n+\n+## Learn Microsoft Graph\n+\n+Microsoft Graph is a RESTful web API that enables you to access Microsoft Cloud service resources. With Microsoft Graph, you can integrate various Microsoft services and data, including users' emails, calendar, contacts, and more, into your applications.\n+\n+After you register your app and get authentication tokens for a user or service, you can make requests to the Microsoft Graph API.\n+\n+How to get authentication tokens for a user: \n+https://learn.microsoft.com/en-us/graph/auth-v2-user\n+\n+## App register and set environment variables\n+Azure App Registration for Microsoft Outlook API Integration\n+\n+1. Register an Application:\n+ - Go to Azure Portal.\n+ - Navigate to Azure Active Directory -> App registrations.\n+ - Register a new application, note down the Client ID.\n+\n+2. Generate Client Secret:\n+ - In the application settings, go to Certificates & Secrets.\n+ - Create a new client secret and note down the Client Secret.\n+\n+3. Configure Redirect URI:\n+ - In the application settings, go to Authentication.\n+ - Add a Redirect URI and note down the Redirect URI.\n+\n+4. Permissions and Admin Consent:\n+ - In API permissions, grant necessary permissions for Microsoft Graph API or Outlook API.\n+ - Grant admin consent for the added permissions.\n+\n+6. Environment Variables:\n+ - Set the following environment variables:\n+\n+ ```env\n+ OUTLOOK_CLIENT_ID=your_client_id\n+ OUTLOOK_CLIENT_SECRET=your_client_secret\n+ OUTLOOK_REDIRECT_URI=your_redirect_uri\n+ OUTLOOK_ACCESS_TOKEN=your_access_token\n+ OUTLOOK_REFRESH_TOKEN=your_refresh_token\n+\n+ ```\n+\n+ **Keep these values secure and avoid exposing them in public repositories.**\n+\n+## Setup\n+\n+You only need an `accessToken` to use the Outlook Toolkit, but they expire after an hour.", + "comment_created_at": "2023-12-23T01:10:04+00:00", + "comment_author": "jacoblee93", + "comment_body": "So you need to click a link every hour to use this integration?", + "pr_file_module": null + }, + { + "comment_id": "1435422188", + "repo_full_name": "langchain-ai/langchainjs", + "pr_number": 3465, + "pr_file": "docs/core_docs/docs/integrations/toolkits/outlook.mdx", + "discussion_id": "1435420736", + "commented_code": "@@ -0,0 +1,143 @@\n+# Outlook Toolkit\n+\n+## Overview\n+\n+Using this toolkit, you can integrate Microsoft Graph API functionalities into your applications to manage email interactions.\n+\n+## Learn Microsoft Graph\n+\n+Microsoft Graph is a RESTful web API that enables you to access Microsoft Cloud service resources. With Microsoft Graph, you can integrate various Microsoft services and data, including users' emails, calendar, contacts, and more, into your applications.\n+\n+After you register your app and get authentication tokens for a user or service, you can make requests to the Microsoft Graph API.\n+\n+How to get authentication tokens for a user: \n+https://learn.microsoft.com/en-us/graph/auth-v2-user\n+\n+## App register and set environment variables\n+Azure App Registration for Microsoft Outlook API Integration\n+\n+1. Register an Application:\n+ - Go to Azure Portal.\n+ - Navigate to Azure Active Directory -> App registrations.\n+ - Register a new application, note down the Client ID.\n+\n+2. Generate Client Secret:\n+ - In the application settings, go to Certificates & Secrets.\n+ - Create a new client secret and note down the Client Secret.\n+\n+3. Configure Redirect URI:\n+ - In the application settings, go to Authentication.\n+ - Add a Redirect URI and note down the Redirect URI.\n+\n+4. Permissions and Admin Consent:\n+ - In API permissions, grant necessary permissions for Microsoft Graph API or Outlook API.\n+ - Grant admin consent for the added permissions.\n+\n+6. Environment Variables:\n+ - Set the following environment variables:\n+\n+ ```env\n+ OUTLOOK_CLIENT_ID=your_client_id\n+ OUTLOOK_CLIENT_SECRET=your_client_secret\n+ OUTLOOK_REDIRECT_URI=your_redirect_uri\n+ OUTLOOK_ACCESS_TOKEN=your_access_token\n+ OUTLOOK_REFRESH_TOKEN=your_refresh_token\n+\n+ ```\n+\n+ **Keep these values secure and avoid exposing them in public repositories.**\n+\n+## Setup\n+\n+You only need an `accessToken` to use the Outlook Toolkit, but they expire after an hour.", + "comment_created_at": "2023-12-23T01:16:12+00:00", + "comment_author": "Qi123123Li", + "comment_body": "Typically, with the integration, you don't need to manually click a link every hour. Instead, once you obtain an accessToken, there is usually a corresponding refreshToken provided. The AuthFlowREST class can utilize this refreshToken along with clientId, clientSecret, and redirectUri to refresh the expired accessToken automatically. This eliminates the need for manual intervention and keeps the process secure without the necessity for a continuous login or keeping a server open.", + "pr_file_module": null + }, + { + "comment_id": "1435422366", + "repo_full_name": "langchain-ai/langchainjs", + "pr_number": 3465, + "pr_file": "docs/core_docs/docs/integrations/toolkits/outlook.mdx", + "discussion_id": "1435420736", + "commented_code": "@@ -0,0 +1,143 @@\n+# Outlook Toolkit\n+\n+## Overview\n+\n+Using this toolkit, you can integrate Microsoft Graph API functionalities into your applications to manage email interactions.\n+\n+## Learn Microsoft Graph\n+\n+Microsoft Graph is a RESTful web API that enables you to access Microsoft Cloud service resources. With Microsoft Graph, you can integrate various Microsoft services and data, including users' emails, calendar, contacts, and more, into your applications.\n+\n+After you register your app and get authentication tokens for a user or service, you can make requests to the Microsoft Graph API.\n+\n+How to get authentication tokens for a user: \n+https://learn.microsoft.com/en-us/graph/auth-v2-user\n+\n+## App register and set environment variables\n+Azure App Registration for Microsoft Outlook API Integration\n+\n+1. Register an Application:\n+ - Go to Azure Portal.\n+ - Navigate to Azure Active Directory -> App registrations.\n+ - Register a new application, note down the Client ID.\n+\n+2. Generate Client Secret:\n+ - In the application settings, go to Certificates & Secrets.\n+ - Create a new client secret and note down the Client Secret.\n+\n+3. Configure Redirect URI:\n+ - In the application settings, go to Authentication.\n+ - Add a Redirect URI and note down the Redirect URI.\n+\n+4. Permissions and Admin Consent:\n+ - In API permissions, grant necessary permissions for Microsoft Graph API or Outlook API.\n+ - Grant admin consent for the added permissions.\n+\n+6. Environment Variables:\n+ - Set the following environment variables:\n+\n+ ```env\n+ OUTLOOK_CLIENT_ID=your_client_id\n+ OUTLOOK_CLIENT_SECRET=your_client_secret\n+ OUTLOOK_REDIRECT_URI=your_redirect_uri\n+ OUTLOOK_ACCESS_TOKEN=your_access_token\n+ OUTLOOK_REFRESH_TOKEN=your_refresh_token\n+\n+ ```\n+\n+ **Keep these values secure and avoid exposing them in public repositories.**\n+\n+## Setup\n+\n+You only need an `accessToken` to use the Outlook Toolkit, but they expire after an hour.", + "comment_created_at": "2023-12-23T01:16:58+00:00", + "comment_author": "oscarchen178", + "comment_body": "No, I don't need to click the link every hour. If you have refreshToken and other credentials, the authFlowRefresh class will get a new valid token for you without the login step. Or if using authFlowToken, the same instance of the class only need to login once and then it will make use of the refreshToken.", + "pr_file_module": null + }, + { + "comment_id": "1435433693", + "repo_full_name": "langchain-ai/langchainjs", + "pr_number": 3465, + "pr_file": "docs/core_docs/docs/integrations/toolkits/outlook.mdx", + "discussion_id": "1435420736", + "commented_code": "@@ -0,0 +1,143 @@\n+# Outlook Toolkit\n+\n+## Overview\n+\n+Using this toolkit, you can integrate Microsoft Graph API functionalities into your applications to manage email interactions.\n+\n+## Learn Microsoft Graph\n+\n+Microsoft Graph is a RESTful web API that enables you to access Microsoft Cloud service resources. With Microsoft Graph, you can integrate various Microsoft services and data, including users' emails, calendar, contacts, and more, into your applications.\n+\n+After you register your app and get authentication tokens for a user or service, you can make requests to the Microsoft Graph API.\n+\n+How to get authentication tokens for a user: \n+https://learn.microsoft.com/en-us/graph/auth-v2-user\n+\n+## App register and set environment variables\n+Azure App Registration for Microsoft Outlook API Integration\n+\n+1. Register an Application:\n+ - Go to Azure Portal.\n+ - Navigate to Azure Active Directory -> App registrations.\n+ - Register a new application, note down the Client ID.\n+\n+2. Generate Client Secret:\n+ - In the application settings, go to Certificates & Secrets.\n+ - Create a new client secret and note down the Client Secret.\n+\n+3. Configure Redirect URI:\n+ - In the application settings, go to Authentication.\n+ - Add a Redirect URI and note down the Redirect URI.\n+\n+4. Permissions and Admin Consent:\n+ - In API permissions, grant necessary permissions for Microsoft Graph API or Outlook API.\n+ - Grant admin consent for the added permissions.\n+\n+6. Environment Variables:\n+ - Set the following environment variables:\n+\n+ ```env\n+ OUTLOOK_CLIENT_ID=your_client_id\n+ OUTLOOK_CLIENT_SECRET=your_client_secret\n+ OUTLOOK_REDIRECT_URI=your_redirect_uri\n+ OUTLOOK_ACCESS_TOKEN=your_access_token\n+ OUTLOOK_REFRESH_TOKEN=your_refresh_token\n+\n+ ```\n+\n+ **Keep these values secure and avoid exposing them in public repositories.**\n+\n+## Setup\n+\n+You only need an `accessToken` to use the Outlook Toolkit, but they expire after an hour.", + "comment_created_at": "2023-12-23T01:59:58+00:00", + "comment_author": "jacoblee93", + "comment_body": "I don't fully understand from the docs - is there a way to just get tht refresh token during setup and just use that?", + "pr_file_module": null + }, + { + "comment_id": "1435445127", + "repo_full_name": "langchain-ai/langchainjs", + "pr_number": 3465, + "pr_file": "docs/core_docs/docs/integrations/toolkits/outlook.mdx", + "discussion_id": "1435420736", + "commented_code": "@@ -0,0 +1,143 @@\n+# Outlook Toolkit\n+\n+## Overview\n+\n+Using this toolkit, you can integrate Microsoft Graph API functionalities into your applications to manage email interactions.\n+\n+## Learn Microsoft Graph\n+\n+Microsoft Graph is a RESTful web API that enables you to access Microsoft Cloud service resources. With Microsoft Graph, you can integrate various Microsoft services and data, including users' emails, calendar, contacts, and more, into your applications.\n+\n+After you register your app and get authentication tokens for a user or service, you can make requests to the Microsoft Graph API.\n+\n+How to get authentication tokens for a user: \n+https://learn.microsoft.com/en-us/graph/auth-v2-user\n+\n+## App register and set environment variables\n+Azure App Registration for Microsoft Outlook API Integration\n+\n+1. Register an Application:\n+ - Go to Azure Portal.\n+ - Navigate to Azure Active Directory -> App registrations.\n+ - Register a new application, note down the Client ID.\n+\n+2. Generate Client Secret:\n+ - In the application settings, go to Certificates & Secrets.\n+ - Create a new client secret and note down the Client Secret.\n+\n+3. Configure Redirect URI:\n+ - In the application settings, go to Authentication.\n+ - Add a Redirect URI and note down the Redirect URI.\n+\n+4. Permissions and Admin Consent:\n+ - In API permissions, grant necessary permissions for Microsoft Graph API or Outlook API.\n+ - Grant admin consent for the added permissions.\n+\n+6. Environment Variables:\n+ - Set the following environment variables:\n+\n+ ```env\n+ OUTLOOK_CLIENT_ID=your_client_id\n+ OUTLOOK_CLIENT_SECRET=your_client_secret\n+ OUTLOOK_REDIRECT_URI=your_redirect_uri\n+ OUTLOOK_ACCESS_TOKEN=your_access_token\n+ OUTLOOK_REFRESH_TOKEN=your_refresh_token\n+\n+ ```\n+\n+ **Keep these values secure and avoid exposing them in public repositories.**\n+\n+## Setup\n+\n+You only need an `accessToken` to use the Outlook Toolkit, but they expire after an hour.", + "comment_created_at": "2023-12-23T02:42:15+00:00", + "comment_author": "oscarchen178", + "comment_body": "Sorry for the confusion, the athentication flow to get the refresh token is to send request with clientId, clientSecret and redirectUri, and then open the link from response and login, after that the server will receive a code, then you send the code with other credentials to request refreshToken and accessToken. The authFlowREST implements this flow, so for this class only need clientId, clientSecret and redirectUri in .env. But this class does not store the tokens in env. You can use other ways like postman to get the refreshToken. The default expiry time of refresh token is much longer.\r\nThis video may help, https://www.youtube.com/watch?v=NAtiNpwnivI&t=309s", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/langchainjs-constructor-over-setter.md b/_reviewers/langchainjs-constructor-over-setter.md index 48fd806..778659d 100644 --- a/_reviewers/langchainjs-constructor-over-setter.md +++ b/_reviewers/langchainjs-constructor-over-setter.md @@ -29,133 +29,3 @@ const loader = new GoogleDriveLoader({ ``` Similarly, when adding methods to an API class, prefer descriptive action names like `invoke()` over generic terms like `call()`. This creates a more intuitive API that follows established patterns across the ecosystem. - - -[ - { - "discussion_id": "1414368375", - "pr_number": 3448, - "pr_file": "docs/core_docs/docs/integrations/chat/azure_ml.mdx", - "created_at": "2023-12-04T19:16:07+00:00", - "commented_code": "# Azure Machine Learning Chat\n\nYou can deploy models on Azure with the endpointUrl, apiKey, and deploymentName\nwhen creating the AzureMLChatParams to call upon later. Must import a ContentFormatter\nor create your own using the ChatContentFormatter interface.\n\n```typescript\nimport { AzureMLChatParams, LlamaContentFormatter } from \"langchain/chat_models/azure_ml\";\n\nconst model = new AzureMLModel({\n endpointUrl: \"YOUR_ENDPOINT_URL\", \n endpointApiKey: \"YOUR_ENDPOINT_API_KEY\", \n deploymentName: \"YOUR_MODEL_DEPLOYMENT_NAME\",\n contentFormatter: new LlamaContentFormatter()\n});\n\nconst res = model.call([\"Foo\"]);", - "repo_full_name": "langchain-ai/langchainjs", - "discussion_comments": [ - { - "comment_id": "1414368375", - "repo_full_name": "langchain-ai/langchainjs", - "pr_number": 3448, - "pr_file": "docs/core_docs/docs/integrations/chat/azure_ml.mdx", - "discussion_id": "1414368375", - "commented_code": "@@ -0,0 +1,19 @@\n+# Azure Machine Learning Chat\n+\n+You can deploy models on Azure with the endpointUrl, apiKey, and deploymentName\n+when creating the AzureMLChatParams to call upon later. Must import a ContentFormatter\n+or create your own using the ChatContentFormatter interface.\n+\n+```typescript\n+import { AzureMLChatParams, LlamaContentFormatter } from \"langchain/chat_models/azure_ml\";\n+\n+const model = new AzureMLModel({\n+ endpointUrl: \"YOUR_ENDPOINT_URL\", \n+ endpointApiKey: \"YOUR_ENDPOINT_API_KEY\", \n+ deploymentName: \"YOUR_MODEL_DEPLOYMENT_NAME\",\n+ contentFormatter: new LlamaContentFormatter()\n+});\n+\n+const res = model.call([\"Foo\"]);", - "comment_created_at": "2023-12-04T19:16:07+00:00", - "comment_author": "jacoblee93", - "comment_body": "Prefer `.invoke()`", - "pr_file_module": null - }, - { - "comment_id": "1414368463", - "repo_full_name": "langchain-ai/langchainjs", - "pr_number": 3448, - "pr_file": "docs/core_docs/docs/integrations/chat/azure_ml.mdx", - "discussion_id": "1414368375", - "commented_code": "@@ -0,0 +1,19 @@\n+# Azure Machine Learning Chat\n+\n+You can deploy models on Azure with the endpointUrl, apiKey, and deploymentName\n+when creating the AzureMLChatParams to call upon later. Must import a ContentFormatter\n+or create your own using the ChatContentFormatter interface.\n+\n+```typescript\n+import { AzureMLChatParams, LlamaContentFormatter } from \"langchain/chat_models/azure_ml\";\n+\n+const model = new AzureMLModel({\n+ endpointUrl: \"YOUR_ENDPOINT_URL\", \n+ endpointApiKey: \"YOUR_ENDPOINT_API_KEY\", \n+ deploymentName: \"YOUR_MODEL_DEPLOYMENT_NAME\",\n+ contentFormatter: new LlamaContentFormatter()\n+});\n+\n+const res = model.call([\"Foo\"]);", - "comment_created_at": "2023-12-04T19:16:13+00:00", - "comment_author": "jacoblee93", - "comment_body": "Also make sure this is tested", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1884574750", - "pr_number": 7358, - "pr_file": "docs/core_docs/docs/tutorials/retrievers.ipynb", - "created_at": "2024-12-13T21:43:59+00:00", - "commented_code": "{\n \"cells\": [\n {\n \"cell_type\": \"markdown\",\n \"id\": \"bf37a837-7a6a-447b-8779-38f26c585887\",\n \"metadata\": {},\n \"source\": [\n \"# Build a semantic search engine\\n\",\n \"\\n\",\n \"This tutorial will familiarize you with LangChain's [document loader](/docs/concepts/document_loaders), [embedding](/docs/concepts/embedding_models), and [vector store](/docs/concepts/vectorstores) abstractions. These abstractions are designed to support retrieval of data-- from (vector) databases and other sources-- for integration with LLM workflows. They are important for applications that fetch data to be reasoned over as part of model inference, as in the case of retrieval-augmented generation, or [RAG](/docs/concepts/rag) (see our RAG tutorial [here](/docs/tutorials/rag)).\\n\",\n \"\\n\",\n \"Here we will build a search engine over a PDF document. This will allow us to retrieve passages in the PDF that are similar to an input query.\\n\",\n \"\\n\",\n \"## Concepts\\n\",\n \"\\n\",\n \"This guide focuses on retrieval of text data. We will cover the following concepts:\\n\",\n \"\\n\",\n \"- Documents and document loaders;\\n\",\n \"- Text splitters;\\n\",\n \"- Embeddings;\\n\",\n \"- Vector stores and retrievers.\\n\",\n \"\\n\",\n \"## Setup\\n\",\n \"\\n\",\n \"### Jupyter Notebook\\n\",\n \"\\n\",\n \"This and other tutorials are perhaps most conveniently run in a Jupyter notebook. See [here](https://jupyter.org/install) for instructions on how to install.\\n\",\n \"\\n\",\n \"### Installation\\n\",\n \"\\n\",\n \"This guide requires `@langchain/community` and `pdf-parse`:\\n\",\n \"\\n\",\n \"```{=mdx}\\n\",\n \"import Npm2Yarn from '@theme/Npm2Yarn';\\n\",\n \"import TabItem from '@theme/TabItem';\\n\",\n \"import CodeBlock from \\\"@theme/CodeBlock\\\";\\n\",\n \"\\n\",\n \"\\n\",\n \" @langchain/community pdf-parse\\n\",\n \"\\n\",\n \"```\\n\",\n \"\\n\",\n \"For more details, see our [Installation guide](/docs/how_to/installation/).\\n\",\n \"\\n\",\n \"### LangSmith\\n\",\n \"\\n\",\n \"Many of the applications you build with LangChain will contain multiple steps with multiple invocations of LLM calls.\\n\",\n \"As these applications get more and more complex, it becomes crucial to be able to inspect what exactly is going on inside your chain or agent.\\n\",\n \"The best way to do this is with [LangSmith](https://smith.langchain.com).\\n\",\n \"\\n\",\n \"After you sign up at the link above, make sure to set your environment variables to start logging traces:\\n\",\n \"\\n\",\n \"```shell\\n\",\n \"export LANGCHAIN_TRACING_V2=\\\"true\\\"\\n\",\n \"export LANGCHAIN_API_KEY=\\\"...\\\"\\n\",\n \"\\n\",\n \"# Reduce tracing latency if you are not in a serverless environment\\n\",\n \"# export LANGCHAIN_CALLBACKS_BACKGROUND=true\\n\",\n \"```\\n\",\n \"\\n\",\n \"\\n\",\n \"## Documents and Document Loaders\\n\",\n \"\\n\",\n \"LangChain implements a [Document](https://v03.api.js.langchain.com/classes/_langchain_core.documents.Document.html) abstraction, which is intended to represent a unit of text and associated metadata. It has three attributes:\\n\",\n \"\\n\",\n \"- `pageContent`: a string representing the content;\\n\",\n \"- `metadata`: records of arbitrary metadata;\\n\",\n \"- `id`: (optional) a string identifier for the document.\\n\",\n \"\\n\",\n \"The `metadata` attribute can capture information about the source of the document, its relationship to other documents, and other information. Note that an individual `Document` object often represents a chunk of a larger document.\\n\",\n \"\\n\",\n \"We can generate sample documents when desired:\\n\",\n \"```javascript\\n\",\n \"import { Document } from \\\"@langchain/core/documents\\\";\\n\",\n \"\\n\",\n \"const documents = [\\n\",\n \" new Document({\\n\",\n \" pageContent: \\\"Dogs are great companions, known for their loyalty and friendliness.\\\",\\n\",\n \" metadata: {\\\"source\\\": \\\"mammal-pets-doc\\\"},\\n\",\n \" }),\\n\",\n \" new Document({\\n\",\n \" pageContent: \\\"Cats are independent pets that often enjoy their own space.\\\",\\n\",\n \" metadata: {\\\"source\\\": \\\"mammal-pets-doc\\\"},\\n\",\n \" }),\\n\",\n \"]\\n\",\n \"```\"\n ]\n },\n {\n \"cell_type\": \"markdown\",\n \"id\": \"f8593578-5699-4b19-96c4-7c990d37a2ec\",\n \"metadata\": {},\n \"source\": [\n \"However, the LangChain ecosystem implements [document loaders](/docs/concepts/document_loaders) that [integrate with hundreds of common sources](/docs/integrations/document_loaders/). This makes it easy to incorporate data from these sources into your AI application.\\n\",\n \"\\n\",\n \"### Loading documents\\n\",\n \"\\n\",\n \"Let's load a PDF into a sequence of `Document` objects. There is a sample PDF in the LangChain repo [here](https://github.com/langchain-ai/langchainjs/blob/main/docs/core_docs/data/nke-10k-2023.pdf) -- a 10-k filing for Nike from 2023. LangChain implements a [PDFLoader](/docs/integrations/document_loaders/file_loaders/pdf/) that we can use to parse the PDF:\"\n ]\n },\n {\n \"cell_type\": \"code\",\n \"execution_count\": 1,\n \"id\": \"67c39f96-b1f0-4610-979f-fe2d4d164e08\",\n \"metadata\": {},\n \"outputs\": [\n {\n \"name\": \"stdout\",\n \"output_type\": \"stream\",\n \"text\": [\n \"\\u001b[33m107\\u001b[39m\\n\"\n ]\n }\n ],\n \"source\": [\n \"import { PDFLoader } from \\\"@langchain/community/document_loaders/fs/pdf\\\";\\n\",\n \"\\n\",\n \"const loader = new PDFLoader(\\\"../../data/nke-10k-2023.pdf\\\");\\n\",\n \"\\n\",\n \"const docs = await loader.load();\\n\",\n \"console.log(docs.length)\"\n ]\n },\n {\n \"cell_type\": \"markdown\",\n \"id\": \"b90f4800-bb82-416b-beba-f42ae88a5c66\",\n \"metadata\": {},\n \"source\": [\n \"```{=mdx}\\n\",\n \":::tip\\n\",\n \"\\n\",\n \"See [this guide](/docs/how_to/document_loader_pdf/) for more detail on PDF document loaders.\\n\",\n \"\\n\",\n \":::\\n\",\n \"```\\n\",\n \"\\n\",\n \"`PyPDFLoader` loads one `Document` object per PDF page. For each, we can easily access:\\n\",", - "repo_full_name": "langchain-ai/langchainjs", - "discussion_comments": [ - { - "comment_id": "1884574750", - "repo_full_name": "langchain-ai/langchainjs", - "pr_number": 7358, - "pr_file": "docs/core_docs/docs/tutorials/retrievers.ipynb", - "discussion_id": "1884574750", - "commented_code": "@@ -0,0 +1,709 @@\n+{\n+ \"cells\": [\n+ {\n+ \"cell_type\": \"markdown\",\n+ \"id\": \"bf37a837-7a6a-447b-8779-38f26c585887\",\n+ \"metadata\": {},\n+ \"source\": [\n+ \"# Build a semantic search engine\\n\",\n+ \"\\n\",\n+ \"This tutorial will familiarize you with LangChain's [document loader](/docs/concepts/document_loaders), [embedding](/docs/concepts/embedding_models), and [vector store](/docs/concepts/vectorstores) abstractions. These abstractions are designed to support retrieval of data-- from (vector) databases and other sources-- for integration with LLM workflows. They are important for applications that fetch data to be reasoned over as part of model inference, as in the case of retrieval-augmented generation, or [RAG](/docs/concepts/rag) (see our RAG tutorial [here](/docs/tutorials/rag)).\\n\",\n+ \"\\n\",\n+ \"Here we will build a search engine over a PDF document. This will allow us to retrieve passages in the PDF that are similar to an input query.\\n\",\n+ \"\\n\",\n+ \"## Concepts\\n\",\n+ \"\\n\",\n+ \"This guide focuses on retrieval of text data. We will cover the following concepts:\\n\",\n+ \"\\n\",\n+ \"- Documents and document loaders;\\n\",\n+ \"- Text splitters;\\n\",\n+ \"- Embeddings;\\n\",\n+ \"- Vector stores and retrievers.\\n\",\n+ \"\\n\",\n+ \"## Setup\\n\",\n+ \"\\n\",\n+ \"### Jupyter Notebook\\n\",\n+ \"\\n\",\n+ \"This and other tutorials are perhaps most conveniently run in a Jupyter notebook. See [here](https://jupyter.org/install) for instructions on how to install.\\n\",\n+ \"\\n\",\n+ \"### Installation\\n\",\n+ \"\\n\",\n+ \"This guide requires `@langchain/community` and `pdf-parse`:\\n\",\n+ \"\\n\",\n+ \"```{=mdx}\\n\",\n+ \"import Npm2Yarn from '@theme/Npm2Yarn';\\n\",\n+ \"import TabItem from '@theme/TabItem';\\n\",\n+ \"import CodeBlock from \\\"@theme/CodeBlock\\\";\\n\",\n+ \"\\n\",\n+ \"\\n\",\n+ \" @langchain/community pdf-parse\\n\",\n+ \"\\n\",\n+ \"```\\n\",\n+ \"\\n\",\n+ \"For more details, see our [Installation guide](/docs/how_to/installation/).\\n\",\n+ \"\\n\",\n+ \"### LangSmith\\n\",\n+ \"\\n\",\n+ \"Many of the applications you build with LangChain will contain multiple steps with multiple invocations of LLM calls.\\n\",\n+ \"As these applications get more and more complex, it becomes crucial to be able to inspect what exactly is going on inside your chain or agent.\\n\",\n+ \"The best way to do this is with [LangSmith](https://smith.langchain.com).\\n\",\n+ \"\\n\",\n+ \"After you sign up at the link above, make sure to set your environment variables to start logging traces:\\n\",\n+ \"\\n\",\n+ \"```shell\\n\",\n+ \"export LANGCHAIN_TRACING_V2=\\\"true\\\"\\n\",\n+ \"export LANGCHAIN_API_KEY=\\\"...\\\"\\n\",\n+ \"\\n\",\n+ \"# Reduce tracing latency if you are not in a serverless environment\\n\",\n+ \"# export LANGCHAIN_CALLBACKS_BACKGROUND=true\\n\",\n+ \"```\\n\",\n+ \"\\n\",\n+ \"\\n\",\n+ \"## Documents and Document Loaders\\n\",\n+ \"\\n\",\n+ \"LangChain implements a [Document](https://v03.api.js.langchain.com/classes/_langchain_core.documents.Document.html) abstraction, which is intended to represent a unit of text and associated metadata. It has three attributes:\\n\",\n+ \"\\n\",\n+ \"- `pageContent`: a string representing the content;\\n\",\n+ \"- `metadata`: records of arbitrary metadata;\\n\",\n+ \"- `id`: (optional) a string identifier for the document.\\n\",\n+ \"\\n\",\n+ \"The `metadata` attribute can capture information about the source of the document, its relationship to other documents, and other information. Note that an individual `Document` object often represents a chunk of a larger document.\\n\",\n+ \"\\n\",\n+ \"We can generate sample documents when desired:\\n\",\n+ \"```javascript\\n\",\n+ \"import { Document } from \\\"@langchain/core/documents\\\";\\n\",\n+ \"\\n\",\n+ \"const documents = [\\n\",\n+ \" new Document({\\n\",\n+ \" pageContent: \\\"Dogs are great companions, known for their loyalty and friendliness.\\\",\\n\",\n+ \" metadata: {\\\"source\\\": \\\"mammal-pets-doc\\\"},\\n\",\n+ \" }),\\n\",\n+ \" new Document({\\n\",\n+ \" pageContent: \\\"Cats are independent pets that often enjoy their own space.\\\",\\n\",\n+ \" metadata: {\\\"source\\\": \\\"mammal-pets-doc\\\"},\\n\",\n+ \" }),\\n\",\n+ \"]\\n\",\n+ \"```\"\n+ ]\n+ },\n+ {\n+ \"cell_type\": \"markdown\",\n+ \"id\": \"f8593578-5699-4b19-96c4-7c990d37a2ec\",\n+ \"metadata\": {},\n+ \"source\": [\n+ \"However, the LangChain ecosystem implements [document loaders](/docs/concepts/document_loaders) that [integrate with hundreds of common sources](/docs/integrations/document_loaders/). This makes it easy to incorporate data from these sources into your AI application.\\n\",\n+ \"\\n\",\n+ \"### Loading documents\\n\",\n+ \"\\n\",\n+ \"Let's load a PDF into a sequence of `Document` objects. There is a sample PDF in the LangChain repo [here](https://github.com/langchain-ai/langchainjs/blob/main/docs/core_docs/data/nke-10k-2023.pdf) -- a 10-k filing for Nike from 2023. LangChain implements a [PDFLoader](/docs/integrations/document_loaders/file_loaders/pdf/) that we can use to parse the PDF:\"\n+ ]\n+ },\n+ {\n+ \"cell_type\": \"code\",\n+ \"execution_count\": 1,\n+ \"id\": \"67c39f96-b1f0-4610-979f-fe2d4d164e08\",\n+ \"metadata\": {},\n+ \"outputs\": [\n+ {\n+ \"name\": \"stdout\",\n+ \"output_type\": \"stream\",\n+ \"text\": [\n+ \"\\u001b[33m107\\u001b[39m\\n\"\n+ ]\n+ }\n+ ],\n+ \"source\": [\n+ \"import { PDFLoader } from \\\"@langchain/community/document_loaders/fs/pdf\\\";\\n\",\n+ \"\\n\",\n+ \"const loader = new PDFLoader(\\\"../../data/nke-10k-2023.pdf\\\");\\n\",\n+ \"\\n\",\n+ \"const docs = await loader.load();\\n\",\n+ \"console.log(docs.length)\"\n+ ]\n+ },\n+ {\n+ \"cell_type\": \"markdown\",\n+ \"id\": \"b90f4800-bb82-416b-beba-f42ae88a5c66\",\n+ \"metadata\": {},\n+ \"source\": [\n+ \"```{=mdx}\\n\",\n+ \":::tip\\n\",\n+ \"\\n\",\n+ \"See [this guide](/docs/how_to/document_loader_pdf/) for more detail on PDF document loaders.\\n\",\n+ \"\\n\",\n+ \":::\\n\",\n+ \"```\\n\",\n+ \"\\n\",\n+ \"`PyPDFLoader` loads one `Document` object per PDF page. For each, we can easily access:\\n\",", - "comment_created_at": "2024-12-13T21:43:59+00:00", - "comment_author": "jacoblee93", - "comment_body": "`PyPDFLoader` isn't in JS", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1435420736", - "pr_number": 3465, - "pr_file": "docs/core_docs/docs/integrations/toolkits/outlook.mdx", - "created_at": "2023-12-23T01:10:04+00:00", - "commented_code": "# Outlook Toolkit\n\n## Overview\n\nUsing this toolkit, you can integrate Microsoft Graph API functionalities into your applications to manage email interactions.\n\n## Learn Microsoft Graph\n\nMicrosoft Graph is a RESTful web API that enables you to access Microsoft Cloud service resources. With Microsoft Graph, you can integrate various Microsoft services and data, including users' emails, calendar, contacts, and more, into your applications.\n\nAfter you register your app and get authentication tokens for a user or service, you can make requests to the Microsoft Graph API.\n\nHow to get authentication tokens for a user: \nhttps://learn.microsoft.com/en-us/graph/auth-v2-user\n\n## App register and set environment variables\nAzure App Registration for Microsoft Outlook API Integration\n\n1. Register an Application:\n - Go to Azure Portal.\n - Navigate to Azure Active Directory -> App registrations.\n - Register a new application, note down the Client ID.\n\n2. Generate Client Secret:\n - In the application settings, go to Certificates & Secrets.\n - Create a new client secret and note down the Client Secret.\n\n3. Configure Redirect URI:\n - In the application settings, go to Authentication.\n - Add a Redirect URI and note down the Redirect URI.\n\n4. Permissions and Admin Consent:\n - In API permissions, grant necessary permissions for Microsoft Graph API or Outlook API.\n - Grant admin consent for the added permissions.\n\n6. Environment Variables:\n - Set the following environment variables:\n\n ```env\n OUTLOOK_CLIENT_ID=your_client_id\n OUTLOOK_CLIENT_SECRET=your_client_secret\n OUTLOOK_REDIRECT_URI=your_redirect_uri\n OUTLOOK_ACCESS_TOKEN=your_access_token\n OUTLOOK_REFRESH_TOKEN=your_refresh_token\n\n ```\n\n **Keep these values secure and avoid exposing them in public repositories.**\n\n## Setup\n\nYou only need an `accessToken` to use the Outlook Toolkit, but they expire after an hour.", - "repo_full_name": "langchain-ai/langchainjs", - "discussion_comments": [ - { - "comment_id": "1435420736", - "repo_full_name": "langchain-ai/langchainjs", - "pr_number": 3465, - "pr_file": "docs/core_docs/docs/integrations/toolkits/outlook.mdx", - "discussion_id": "1435420736", - "commented_code": "@@ -0,0 +1,143 @@\n+# Outlook Toolkit\n+\n+## Overview\n+\n+Using this toolkit, you can integrate Microsoft Graph API functionalities into your applications to manage email interactions.\n+\n+## Learn Microsoft Graph\n+\n+Microsoft Graph is a RESTful web API that enables you to access Microsoft Cloud service resources. With Microsoft Graph, you can integrate various Microsoft services and data, including users' emails, calendar, contacts, and more, into your applications.\n+\n+After you register your app and get authentication tokens for a user or service, you can make requests to the Microsoft Graph API.\n+\n+How to get authentication tokens for a user: \n+https://learn.microsoft.com/en-us/graph/auth-v2-user\n+\n+## App register and set environment variables\n+Azure App Registration for Microsoft Outlook API Integration\n+\n+1. Register an Application:\n+ - Go to Azure Portal.\n+ - Navigate to Azure Active Directory -> App registrations.\n+ - Register a new application, note down the Client ID.\n+\n+2. Generate Client Secret:\n+ - In the application settings, go to Certificates & Secrets.\n+ - Create a new client secret and note down the Client Secret.\n+\n+3. Configure Redirect URI:\n+ - In the application settings, go to Authentication.\n+ - Add a Redirect URI and note down the Redirect URI.\n+\n+4. Permissions and Admin Consent:\n+ - In API permissions, grant necessary permissions for Microsoft Graph API or Outlook API.\n+ - Grant admin consent for the added permissions.\n+\n+6. Environment Variables:\n+ - Set the following environment variables:\n+\n+ ```env\n+ OUTLOOK_CLIENT_ID=your_client_id\n+ OUTLOOK_CLIENT_SECRET=your_client_secret\n+ OUTLOOK_REDIRECT_URI=your_redirect_uri\n+ OUTLOOK_ACCESS_TOKEN=your_access_token\n+ OUTLOOK_REFRESH_TOKEN=your_refresh_token\n+\n+ ```\n+\n+ **Keep these values secure and avoid exposing them in public repositories.**\n+\n+## Setup\n+\n+You only need an `accessToken` to use the Outlook Toolkit, but they expire after an hour.", - "comment_created_at": "2023-12-23T01:10:04+00:00", - "comment_author": "jacoblee93", - "comment_body": "So you need to click a link every hour to use this integration?", - "pr_file_module": null - }, - { - "comment_id": "1435422188", - "repo_full_name": "langchain-ai/langchainjs", - "pr_number": 3465, - "pr_file": "docs/core_docs/docs/integrations/toolkits/outlook.mdx", - "discussion_id": "1435420736", - "commented_code": "@@ -0,0 +1,143 @@\n+# Outlook Toolkit\n+\n+## Overview\n+\n+Using this toolkit, you can integrate Microsoft Graph API functionalities into your applications to manage email interactions.\n+\n+## Learn Microsoft Graph\n+\n+Microsoft Graph is a RESTful web API that enables you to access Microsoft Cloud service resources. With Microsoft Graph, you can integrate various Microsoft services and data, including users' emails, calendar, contacts, and more, into your applications.\n+\n+After you register your app and get authentication tokens for a user or service, you can make requests to the Microsoft Graph API.\n+\n+How to get authentication tokens for a user: \n+https://learn.microsoft.com/en-us/graph/auth-v2-user\n+\n+## App register and set environment variables\n+Azure App Registration for Microsoft Outlook API Integration\n+\n+1. Register an Application:\n+ - Go to Azure Portal.\n+ - Navigate to Azure Active Directory -> App registrations.\n+ - Register a new application, note down the Client ID.\n+\n+2. Generate Client Secret:\n+ - In the application settings, go to Certificates & Secrets.\n+ - Create a new client secret and note down the Client Secret.\n+\n+3. Configure Redirect URI:\n+ - In the application settings, go to Authentication.\n+ - Add a Redirect URI and note down the Redirect URI.\n+\n+4. Permissions and Admin Consent:\n+ - In API permissions, grant necessary permissions for Microsoft Graph API or Outlook API.\n+ - Grant admin consent for the added permissions.\n+\n+6. Environment Variables:\n+ - Set the following environment variables:\n+\n+ ```env\n+ OUTLOOK_CLIENT_ID=your_client_id\n+ OUTLOOK_CLIENT_SECRET=your_client_secret\n+ OUTLOOK_REDIRECT_URI=your_redirect_uri\n+ OUTLOOK_ACCESS_TOKEN=your_access_token\n+ OUTLOOK_REFRESH_TOKEN=your_refresh_token\n+\n+ ```\n+\n+ **Keep these values secure and avoid exposing them in public repositories.**\n+\n+## Setup\n+\n+You only need an `accessToken` to use the Outlook Toolkit, but they expire after an hour.", - "comment_created_at": "2023-12-23T01:16:12+00:00", - "comment_author": "Qi123123Li", - "comment_body": "Typically, with the integration, you don't need to manually click a link every hour. Instead, once you obtain an accessToken, there is usually a corresponding refreshToken provided. The AuthFlowREST class can utilize this refreshToken along with clientId, clientSecret, and redirectUri to refresh the expired accessToken automatically. This eliminates the need for manual intervention and keeps the process secure without the necessity for a continuous login or keeping a server open.", - "pr_file_module": null - }, - { - "comment_id": "1435422366", - "repo_full_name": "langchain-ai/langchainjs", - "pr_number": 3465, - "pr_file": "docs/core_docs/docs/integrations/toolkits/outlook.mdx", - "discussion_id": "1435420736", - "commented_code": "@@ -0,0 +1,143 @@\n+# Outlook Toolkit\n+\n+## Overview\n+\n+Using this toolkit, you can integrate Microsoft Graph API functionalities into your applications to manage email interactions.\n+\n+## Learn Microsoft Graph\n+\n+Microsoft Graph is a RESTful web API that enables you to access Microsoft Cloud service resources. With Microsoft Graph, you can integrate various Microsoft services and data, including users' emails, calendar, contacts, and more, into your applications.\n+\n+After you register your app and get authentication tokens for a user or service, you can make requests to the Microsoft Graph API.\n+\n+How to get authentication tokens for a user: \n+https://learn.microsoft.com/en-us/graph/auth-v2-user\n+\n+## App register and set environment variables\n+Azure App Registration for Microsoft Outlook API Integration\n+\n+1. Register an Application:\n+ - Go to Azure Portal.\n+ - Navigate to Azure Active Directory -> App registrations.\n+ - Register a new application, note down the Client ID.\n+\n+2. Generate Client Secret:\n+ - In the application settings, go to Certificates & Secrets.\n+ - Create a new client secret and note down the Client Secret.\n+\n+3. Configure Redirect URI:\n+ - In the application settings, go to Authentication.\n+ - Add a Redirect URI and note down the Redirect URI.\n+\n+4. Permissions and Admin Consent:\n+ - In API permissions, grant necessary permissions for Microsoft Graph API or Outlook API.\n+ - Grant admin consent for the added permissions.\n+\n+6. Environment Variables:\n+ - Set the following environment variables:\n+\n+ ```env\n+ OUTLOOK_CLIENT_ID=your_client_id\n+ OUTLOOK_CLIENT_SECRET=your_client_secret\n+ OUTLOOK_REDIRECT_URI=your_redirect_uri\n+ OUTLOOK_ACCESS_TOKEN=your_access_token\n+ OUTLOOK_REFRESH_TOKEN=your_refresh_token\n+\n+ ```\n+\n+ **Keep these values secure and avoid exposing them in public repositories.**\n+\n+## Setup\n+\n+You only need an `accessToken` to use the Outlook Toolkit, but they expire after an hour.", - "comment_created_at": "2023-12-23T01:16:58+00:00", - "comment_author": "oscarchen178", - "comment_body": "No, I don't need to click the link every hour. If you have refreshToken and other credentials, the authFlowRefresh class will get a new valid token for you without the login step. Or if using authFlowToken, the same instance of the class only need to login once and then it will make use of the refreshToken.", - "pr_file_module": null - }, - { - "comment_id": "1435433693", - "repo_full_name": "langchain-ai/langchainjs", - "pr_number": 3465, - "pr_file": "docs/core_docs/docs/integrations/toolkits/outlook.mdx", - "discussion_id": "1435420736", - "commented_code": "@@ -0,0 +1,143 @@\n+# Outlook Toolkit\n+\n+## Overview\n+\n+Using this toolkit, you can integrate Microsoft Graph API functionalities into your applications to manage email interactions.\n+\n+## Learn Microsoft Graph\n+\n+Microsoft Graph is a RESTful web API that enables you to access Microsoft Cloud service resources. With Microsoft Graph, you can integrate various Microsoft services and data, including users' emails, calendar, contacts, and more, into your applications.\n+\n+After you register your app and get authentication tokens for a user or service, you can make requests to the Microsoft Graph API.\n+\n+How to get authentication tokens for a user: \n+https://learn.microsoft.com/en-us/graph/auth-v2-user\n+\n+## App register and set environment variables\n+Azure App Registration for Microsoft Outlook API Integration\n+\n+1. Register an Application:\n+ - Go to Azure Portal.\n+ - Navigate to Azure Active Directory -> App registrations.\n+ - Register a new application, note down the Client ID.\n+\n+2. Generate Client Secret:\n+ - In the application settings, go to Certificates & Secrets.\n+ - Create a new client secret and note down the Client Secret.\n+\n+3. Configure Redirect URI:\n+ - In the application settings, go to Authentication.\n+ - Add a Redirect URI and note down the Redirect URI.\n+\n+4. Permissions and Admin Consent:\n+ - In API permissions, grant necessary permissions for Microsoft Graph API or Outlook API.\n+ - Grant admin consent for the added permissions.\n+\n+6. Environment Variables:\n+ - Set the following environment variables:\n+\n+ ```env\n+ OUTLOOK_CLIENT_ID=your_client_id\n+ OUTLOOK_CLIENT_SECRET=your_client_secret\n+ OUTLOOK_REDIRECT_URI=your_redirect_uri\n+ OUTLOOK_ACCESS_TOKEN=your_access_token\n+ OUTLOOK_REFRESH_TOKEN=your_refresh_token\n+\n+ ```\n+\n+ **Keep these values secure and avoid exposing them in public repositories.**\n+\n+## Setup\n+\n+You only need an `accessToken` to use the Outlook Toolkit, but they expire after an hour.", - "comment_created_at": "2023-12-23T01:59:58+00:00", - "comment_author": "jacoblee93", - "comment_body": "I don't fully understand from the docs - is there a way to just get tht refresh token during setup and just use that?", - "pr_file_module": null - }, - { - "comment_id": "1435445127", - "repo_full_name": "langchain-ai/langchainjs", - "pr_number": 3465, - "pr_file": "docs/core_docs/docs/integrations/toolkits/outlook.mdx", - "discussion_id": "1435420736", - "commented_code": "@@ -0,0 +1,143 @@\n+# Outlook Toolkit\n+\n+## Overview\n+\n+Using this toolkit, you can integrate Microsoft Graph API functionalities into your applications to manage email interactions.\n+\n+## Learn Microsoft Graph\n+\n+Microsoft Graph is a RESTful web API that enables you to access Microsoft Cloud service resources. With Microsoft Graph, you can integrate various Microsoft services and data, including users' emails, calendar, contacts, and more, into your applications.\n+\n+After you register your app and get authentication tokens for a user or service, you can make requests to the Microsoft Graph API.\n+\n+How to get authentication tokens for a user: \n+https://learn.microsoft.com/en-us/graph/auth-v2-user\n+\n+## App register and set environment variables\n+Azure App Registration for Microsoft Outlook API Integration\n+\n+1. Register an Application:\n+ - Go to Azure Portal.\n+ - Navigate to Azure Active Directory -> App registrations.\n+ - Register a new application, note down the Client ID.\n+\n+2. Generate Client Secret:\n+ - In the application settings, go to Certificates & Secrets.\n+ - Create a new client secret and note down the Client Secret.\n+\n+3. Configure Redirect URI:\n+ - In the application settings, go to Authentication.\n+ - Add a Redirect URI and note down the Redirect URI.\n+\n+4. Permissions and Admin Consent:\n+ - In API permissions, grant necessary permissions for Microsoft Graph API or Outlook API.\n+ - Grant admin consent for the added permissions.\n+\n+6. Environment Variables:\n+ - Set the following environment variables:\n+\n+ ```env\n+ OUTLOOK_CLIENT_ID=your_client_id\n+ OUTLOOK_CLIENT_SECRET=your_client_secret\n+ OUTLOOK_REDIRECT_URI=your_redirect_uri\n+ OUTLOOK_ACCESS_TOKEN=your_access_token\n+ OUTLOOK_REFRESH_TOKEN=your_refresh_token\n+\n+ ```\n+\n+ **Keep these values secure and avoid exposing them in public repositories.**\n+\n+## Setup\n+\n+You only need an `accessToken` to use the Outlook Toolkit, but they expire after an hour.", - "comment_created_at": "2023-12-23T02:42:15+00:00", - "comment_author": "oscarchen178", - "comment_body": "Sorry for the confusion, the athentication flow to get the refresh token is to send request with clientId, clientSecret and redirectUri, and then open the link from response and login, after that the server will receive a code, then you send the code with other credentials to request refreshToken and accessToken. The authFlowREST implements this flow, so for this class only need clientId, clientSecret and redirectUri in .env. But this class does not store the tokens in env. You can use other ways like postman to get the refreshToken. The default expiry time of refresh token is much longer.\r\nThis video may help, https://www.youtube.com/watch?v=NAtiNpwnivI&t=309s", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/langchainjs-dependency-classification-standards.json b/_reviewers/langchainjs-dependency-classification-standards.json new file mode 100644 index 0000000..5a2d104 --- /dev/null +++ b/_reviewers/langchainjs-dependency-classification-standards.json @@ -0,0 +1,138 @@ +[ + { + "discussion_id": "1416502611", + "pr_number": 3465, + "pr_file": "langchain/package.json", + "created_at": "2023-12-06T01:12:43+00:00", + "commented_code": "\"openai\": \"^4.19.0\",\n \"openapi-types\": \"^12.1.3\",\n \"p-retry\": \"4\",\n \"url\": \"^0.11.3\",", + "repo_full_name": "langchain-ai/langchainjs", + "discussion_comments": [ + { + "comment_id": "1416502611", + "repo_full_name": "langchain-ai/langchainjs", + "pr_number": 3465, + "pr_file": "langchain/package.json", + "discussion_id": "1416502611", + "commented_code": "@@ -1445,6 +1453,7 @@\n \"openai\": \"^4.19.0\",\n \"openapi-types\": \"^12.1.3\",\n \"p-retry\": \"4\",\n+ \"url\": \"^0.11.3\",", + "comment_created_at": "2023-12-06T01:12:43+00:00", + "comment_author": "jacoblee93", + "comment_body": "I don't think this is necessary? There is a web built-in", + "pr_file_module": null + }, + { + "comment_id": "1416506907", + "repo_full_name": "langchain-ai/langchainjs", + "pr_number": 3465, + "pr_file": "langchain/package.json", + "discussion_id": "1416502611", + "commented_code": "@@ -1445,6 +1453,7 @@\n \"openai\": \"^4.19.0\",\n \"openapi-types\": \"^12.1.3\",\n \"p-retry\": \"4\",\n+ \"url\": \"^0.11.3\",", + "comment_created_at": "2023-12-06T01:16:20+00:00", + "comment_author": "oscarchen178", + "comment_body": "yep, no need, I will remove this", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1868462212", + "pr_number": 7253, + "pr_file": "libs/langchain-community/package.json", + "created_at": "2024-12-03T22:47:44+00:00", + "commented_code": "\"author\": \"LangChain\",\n \"license\": \"MIT\",\n \"dependencies\": {\n \"@hashgraph/sdk\": \"^2.53.0\",", + "repo_full_name": "langchain-ai/langchainjs", + "discussion_comments": [ + { + "comment_id": "1868462212", + "repo_full_name": "langchain-ai/langchainjs", + "pr_number": 7253, + "pr_file": "libs/langchain-community/package.json", + "discussion_id": "1868462212", + "commented_code": "@@ -35,6 +35,7 @@\n \"author\": \"LangChain\",\n \"license\": \"MIT\",\n \"dependencies\": {\n+ \"@hashgraph/sdk\": \"^2.53.0\",", + "comment_created_at": "2024-12-03T22:47:44+00:00", + "comment_author": "jacoblee93", + "comment_body": "Should be a peer + dev dep, not a direct dependency:\r\n\r\nhttps://github.com/langchain-ai/langchainjs/blob/main/.github/contributing/INTEGRATIONS.md#third-party-dependencies", + "pr_file_module": null + }, + { + "comment_id": "1883296405", + "repo_full_name": "langchain-ai/langchainjs", + "pr_number": 7253, + "pr_file": "libs/langchain-community/package.json", + "discussion_id": "1868462212", + "commented_code": "@@ -35,6 +35,7 @@\n \"author\": \"LangChain\",\n \"license\": \"MIT\",\n \"dependencies\": {\n+ \"@hashgraph/sdk\": \"^2.53.0\",", + "comment_created_at": "2024-12-13T05:03:56+00:00", + "comment_author": "syntaxsec", + "comment_body": "Will be done by EOD Monday. Are there any other changes we need to make? ", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1788322743", + "pr_number": 6904, + "pr_file": "libs/langchain-community/package.json", + "created_at": "2024-10-04T21:05:08+00:00", + "commented_code": "\"@langchain/scripts\": \">=0.1.0 <0.2.0\",\n \"@langchain/standard-tests\": \"0.0.0\",\n \"@layerup/layerup-security\": \"^1.5.12\",\n \"@libsql/client\": \"^0.14.0\",", + "repo_full_name": "langchain-ai/langchainjs", + "discussion_comments": [ + { + "comment_id": "1788322743", + "repo_full_name": "langchain-ai/langchainjs", + "pr_number": 6904, + "pr_file": "libs/langchain-community/package.json", + "discussion_id": "1788322743", + "commented_code": "@@ -82,6 +82,7 @@\n \"@langchain/scripts\": \">=0.1.0 <0.2.0\",\n \"@langchain/standard-tests\": \"0.0.0\",\n \"@layerup/layerup-security\": \"^1.5.12\",\n+ \"@libsql/client\": \"^0.14.0\",", + "comment_created_at": "2024-10-04T21:05:08+00:00", + "comment_author": "jacoblee93", + "comment_body": "This also should be an optional peer dep:\r\n\r\nhttps://js.langchain.com/docs/contributing/code/#adding-an-entrypoint", + "pr_file_module": null + }, + { + "comment_id": "1789879845", + "repo_full_name": "langchain-ai/langchainjs", + "pr_number": 6904, + "pr_file": "libs/langchain-community/package.json", + "discussion_id": "1788322743", + "commented_code": "@@ -82,6 +82,7 @@\n \"@langchain/scripts\": \">=0.1.0 <0.2.0\",\n \"@langchain/standard-tests\": \"0.0.0\",\n \"@layerup/layerup-security\": \"^1.5.12\",\n+ \"@libsql/client\": \"^0.14.0\",", + "comment_created_at": "2024-10-07T09:40:03+00:00", + "comment_author": "notrab", + "comment_body": "Added", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1591312677", + "pr_number": 5287, + "pr_file": "libs/langchain-cloudflare/package.json", + "created_at": "2024-05-06T17:06:25+00:00", + "commented_code": "\"author\": \"LangChain\",\n \"license\": \"MIT\",\n \"dependencies\": {\n \"@cloudflare/ai\": \"^1.0.47\",\n \"@langchain/core\": \"~0.1\",\n \"uuid\": \"^9.0.1\"\n },\n \"devDependencies\": {\n \"@cloudflare/workers-types\": \"^4.20231218.0\",\n \"@cloudflare/workers-types\": \"^4.20240502.0\",", + "repo_full_name": "langchain-ai/langchainjs", + "discussion_comments": [ + { + "comment_id": "1591312677", + "repo_full_name": "langchain-ai/langchainjs", + "pr_number": 5287, + "pr_file": "libs/langchain-cloudflare/package.json", + "discussion_id": "1591312677", + "commented_code": "@@ -38,12 +38,11 @@\n \"author\": \"LangChain\",\n \"license\": \"MIT\",\n \"dependencies\": {\n- \"@cloudflare/ai\": \"^1.0.47\",\n \"@langchain/core\": \"~0.1\",\n \"uuid\": \"^9.0.1\"\n },\n \"devDependencies\": {\n- \"@cloudflare/workers-types\": \"^4.20231218.0\",\n+ \"@cloudflare/workers-types\": \"^4.20240502.0\",", + "comment_created_at": "2024-05-06T17:06:25+00:00", + "comment_author": "bracesproul", + "comment_body": "If we're replacing `@cloudflare/ai` with this, it needs to be moved out of dev deps and into standard deps", + "pr_file_module": null + }, + { + "comment_id": "1591350680", + "repo_full_name": "langchain-ai/langchainjs", + "pr_number": 5287, + "pr_file": "libs/langchain-cloudflare/package.json", + "discussion_id": "1591312677", + "commented_code": "@@ -38,12 +38,11 @@\n \"author\": \"LangChain\",\n \"license\": \"MIT\",\n \"dependencies\": {\n- \"@cloudflare/ai\": \"^1.0.47\",\n \"@langchain/core\": \"~0.1\",\n \"uuid\": \"^9.0.1\"\n },\n \"devDependencies\": {\n- \"@cloudflare/workers-types\": \"^4.20231218.0\",\n+ \"@cloudflare/workers-types\": \"^4.20240502.0\",", + "comment_created_at": "2024-05-06T17:44:22+00:00", + "comment_author": "Cherry", + "comment_body": "`workers-types` only provides the types for the `Ai` binding - there's nothing runtime that would need this to be a dependency and not a devDependency. The runtime for the AI binding is handled entirely in workerd via wrangler.", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/langchainjs-dependency-classification-standards.md b/_reviewers/langchainjs-dependency-classification-standards.md index ba4dc4e..68ddf43 100644 --- a/_reviewers/langchainjs-dependency-classification-standards.md +++ b/_reviewers/langchainjs-dependency-classification-standards.md @@ -35,143 +35,3 @@ Example of correct dependency classification: ``` For third-party integrations, follow the project's integration guidelines to determine the appropriate dependency type. Avoid adding dependencies when equivalent functionality exists natively. - - -[ - { - "discussion_id": "1416502611", - "pr_number": 3465, - "pr_file": "langchain/package.json", - "created_at": "2023-12-06T01:12:43+00:00", - "commented_code": "\"openai\": \"^4.19.0\",\n \"openapi-types\": \"^12.1.3\",\n \"p-retry\": \"4\",\n \"url\": \"^0.11.3\",", - "repo_full_name": "langchain-ai/langchainjs", - "discussion_comments": [ - { - "comment_id": "1416502611", - "repo_full_name": "langchain-ai/langchainjs", - "pr_number": 3465, - "pr_file": "langchain/package.json", - "discussion_id": "1416502611", - "commented_code": "@@ -1445,6 +1453,7 @@\n \"openai\": \"^4.19.0\",\n \"openapi-types\": \"^12.1.3\",\n \"p-retry\": \"4\",\n+ \"url\": \"^0.11.3\",", - "comment_created_at": "2023-12-06T01:12:43+00:00", - "comment_author": "jacoblee93", - "comment_body": "I don't think this is necessary? There is a web built-in", - "pr_file_module": null - }, - { - "comment_id": "1416506907", - "repo_full_name": "langchain-ai/langchainjs", - "pr_number": 3465, - "pr_file": "langchain/package.json", - "discussion_id": "1416502611", - "commented_code": "@@ -1445,6 +1453,7 @@\n \"openai\": \"^4.19.0\",\n \"openapi-types\": \"^12.1.3\",\n \"p-retry\": \"4\",\n+ \"url\": \"^0.11.3\",", - "comment_created_at": "2023-12-06T01:16:20+00:00", - "comment_author": "oscarchen178", - "comment_body": "yep, no need, I will remove this", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1868462212", - "pr_number": 7253, - "pr_file": "libs/langchain-community/package.json", - "created_at": "2024-12-03T22:47:44+00:00", - "commented_code": "\"author\": \"LangChain\",\n \"license\": \"MIT\",\n \"dependencies\": {\n \"@hashgraph/sdk\": \"^2.53.0\",", - "repo_full_name": "langchain-ai/langchainjs", - "discussion_comments": [ - { - "comment_id": "1868462212", - "repo_full_name": "langchain-ai/langchainjs", - "pr_number": 7253, - "pr_file": "libs/langchain-community/package.json", - "discussion_id": "1868462212", - "commented_code": "@@ -35,6 +35,7 @@\n \"author\": \"LangChain\",\n \"license\": \"MIT\",\n \"dependencies\": {\n+ \"@hashgraph/sdk\": \"^2.53.0\",", - "comment_created_at": "2024-12-03T22:47:44+00:00", - "comment_author": "jacoblee93", - "comment_body": "Should be a peer + dev dep, not a direct dependency:\r\n\r\nhttps://github.com/langchain-ai/langchainjs/blob/main/.github/contributing/INTEGRATIONS.md#third-party-dependencies", - "pr_file_module": null - }, - { - "comment_id": "1883296405", - "repo_full_name": "langchain-ai/langchainjs", - "pr_number": 7253, - "pr_file": "libs/langchain-community/package.json", - "discussion_id": "1868462212", - "commented_code": "@@ -35,6 +35,7 @@\n \"author\": \"LangChain\",\n \"license\": \"MIT\",\n \"dependencies\": {\n+ \"@hashgraph/sdk\": \"^2.53.0\",", - "comment_created_at": "2024-12-13T05:03:56+00:00", - "comment_author": "syntaxsec", - "comment_body": "Will be done by EOD Monday. Are there any other changes we need to make? ", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1788322743", - "pr_number": 6904, - "pr_file": "libs/langchain-community/package.json", - "created_at": "2024-10-04T21:05:08+00:00", - "commented_code": "\"@langchain/scripts\": \">=0.1.0 <0.2.0\",\n \"@langchain/standard-tests\": \"0.0.0\",\n \"@layerup/layerup-security\": \"^1.5.12\",\n \"@libsql/client\": \"^0.14.0\",", - "repo_full_name": "langchain-ai/langchainjs", - "discussion_comments": [ - { - "comment_id": "1788322743", - "repo_full_name": "langchain-ai/langchainjs", - "pr_number": 6904, - "pr_file": "libs/langchain-community/package.json", - "discussion_id": "1788322743", - "commented_code": "@@ -82,6 +82,7 @@\n \"@langchain/scripts\": \">=0.1.0 <0.2.0\",\n \"@langchain/standard-tests\": \"0.0.0\",\n \"@layerup/layerup-security\": \"^1.5.12\",\n+ \"@libsql/client\": \"^0.14.0\",", - "comment_created_at": "2024-10-04T21:05:08+00:00", - "comment_author": "jacoblee93", - "comment_body": "This also should be an optional peer dep:\r\n\r\nhttps://js.langchain.com/docs/contributing/code/#adding-an-entrypoint", - "pr_file_module": null - }, - { - "comment_id": "1789879845", - "repo_full_name": "langchain-ai/langchainjs", - "pr_number": 6904, - "pr_file": "libs/langchain-community/package.json", - "discussion_id": "1788322743", - "commented_code": "@@ -82,6 +82,7 @@\n \"@langchain/scripts\": \">=0.1.0 <0.2.0\",\n \"@langchain/standard-tests\": \"0.0.0\",\n \"@layerup/layerup-security\": \"^1.5.12\",\n+ \"@libsql/client\": \"^0.14.0\",", - "comment_created_at": "2024-10-07T09:40:03+00:00", - "comment_author": "notrab", - "comment_body": "Added", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1591312677", - "pr_number": 5287, - "pr_file": "libs/langchain-cloudflare/package.json", - "created_at": "2024-05-06T17:06:25+00:00", - "commented_code": "\"author\": \"LangChain\",\n \"license\": \"MIT\",\n \"dependencies\": {\n \"@cloudflare/ai\": \"^1.0.47\",\n \"@langchain/core\": \"~0.1\",\n \"uuid\": \"^9.0.1\"\n },\n \"devDependencies\": {\n \"@cloudflare/workers-types\": \"^4.20231218.0\",\n \"@cloudflare/workers-types\": \"^4.20240502.0\",", - "repo_full_name": "langchain-ai/langchainjs", - "discussion_comments": [ - { - "comment_id": "1591312677", - "repo_full_name": "langchain-ai/langchainjs", - "pr_number": 5287, - "pr_file": "libs/langchain-cloudflare/package.json", - "discussion_id": "1591312677", - "commented_code": "@@ -38,12 +38,11 @@\n \"author\": \"LangChain\",\n \"license\": \"MIT\",\n \"dependencies\": {\n- \"@cloudflare/ai\": \"^1.0.47\",\n \"@langchain/core\": \"~0.1\",\n \"uuid\": \"^9.0.1\"\n },\n \"devDependencies\": {\n- \"@cloudflare/workers-types\": \"^4.20231218.0\",\n+ \"@cloudflare/workers-types\": \"^4.20240502.0\",", - "comment_created_at": "2024-05-06T17:06:25+00:00", - "comment_author": "bracesproul", - "comment_body": "If we're replacing `@cloudflare/ai` with this, it needs to be moved out of dev deps and into standard deps", - "pr_file_module": null - }, - { - "comment_id": "1591350680", - "repo_full_name": "langchain-ai/langchainjs", - "pr_number": 5287, - "pr_file": "libs/langchain-cloudflare/package.json", - "discussion_id": "1591312677", - "commented_code": "@@ -38,12 +38,11 @@\n \"author\": \"LangChain\",\n \"license\": \"MIT\",\n \"dependencies\": {\n- \"@cloudflare/ai\": \"^1.0.47\",\n \"@langchain/core\": \"~0.1\",\n \"uuid\": \"^9.0.1\"\n },\n \"devDependencies\": {\n- \"@cloudflare/workers-types\": \"^4.20231218.0\",\n+ \"@cloudflare/workers-types\": \"^4.20240502.0\",", - "comment_created_at": "2024-05-06T17:44:22+00:00", - "comment_author": "Cherry", - "comment_body": "`workers-types` only provides the types for the `Ai` binding - there's nothing runtime that would need this to be a dependency and not a devDependency. The runtime for the AI binding is handled entirely in workerd via wrangler.", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/langchainjs-documentation-completeness-check.json b/_reviewers/langchainjs-documentation-completeness-check.json new file mode 100644 index 0000000..fa024e3 --- /dev/null +++ b/_reviewers/langchainjs-documentation-completeness-check.json @@ -0,0 +1,80 @@ +[ + { + "discussion_id": "2092387294", + "pr_number": 8007, + "pr_file": "cookbook/oraclevs.md", + "created_at": "2025-05-16T06:08:25+00:00", + "commented_code": "Oracle AI Vector Search with LangchainJS Integration", + "repo_full_name": "langchain-ai/langchainjs", + "discussion_comments": [ + { + "comment_id": "2092387294", + "repo_full_name": "langchain-ai/langchainjs", + "pr_number": 8007, + "pr_file": "cookbook/oraclevs.md", + "discussion_id": "2092387294", + "commented_code": "@@ -0,0 +1,273 @@\n+Oracle AI Vector Search with LangchainJS Integration", + "comment_created_at": "2025-05-16T06:08:25+00:00", + "comment_author": "cjbj", + "comment_body": "Shouldn't headings like this have the appropriate markdown syntax? (There are many occurrences in this file that look odd).", + "pr_file_module": null + }, + { + "comment_id": "2094155633", + "repo_full_name": "langchain-ai/langchainjs", + "pr_number": 8007, + "pr_file": "cookbook/oraclevs.md", + "discussion_id": "2092387294", + "commented_code": "@@ -0,0 +1,273 @@\n+Oracle AI Vector Search with LangchainJS Integration", + "comment_created_at": "2025-05-17T16:01:02+00:00", + "comment_author": "hackerdave", + "comment_body": "Good catch, have added some markdown syntax to fix the headings and bullets", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2099206512", + "pr_number": 8007, + "pr_file": "cookbook/oraclevs.md", + "created_at": "2025-05-21T02:59:16+00:00", + "commented_code": "# Oracle AI Vector Search with LangchainJS Integration\n## Introduction\nOracle AI Vector Search enables semantic search on unstructured data while simultaneously providing relational search capabilities on business data, all within a unified system. This approach eliminates the need for a separate vector database, reducing data fragmentation and improving efficiency.\n\nBy integrating Oracle AI Vector Search with Langchain, you can build a powerful pipeline for Retrieval Augmented Generation (RAG), leveraging Oracle's robust database features.\n\nKey Advantages of Oracle Database\nOracle AI Vector Search is built on top of the Oracle Database, providing several key features:\n\n* Partitioning Support\n* Real Application Clusters (RAC) Scalability\n* Exadata Smart Scans\n* Geographically Distributed Shard Processing\n* Transactional Capabilities\n* Parallel SQL\n* Disaster Recovery\n* Advanced Security\n* Oracle Machine Learning\n* Oracle Graph Database\n* Oracle Spatial and Graph\n* Oracle Blockchain\n* JSON Support\n## Guide Overview\nThis guide demonstrates how to integrate Oracle AI Vector Search with Langchain to create an end-to-end RAG pipeline. You'll learn how to:\n\n* Load documents from different sources using OracleDocLoader.\n* Summarize documents inside or outside the database using OracleSummary.\n* Generate embeddings either inside or outside the database using OracleEmbeddings.\n* Chunk documents based on specific needs using OracleTextSplitter.\n* Store, index, and query data using OracleVS.\n## Getting Started\nIf you're new to Oracle Database, consider using the free Oracle 23 AI Database to get started.\n\n## Best Practices\n* User Management: Create dedicated users for your Oracle Database projects instead of using the system user for security and control purposes. See the end-to-end guide for more details.\n* User Privileges: Be sure to manage user privileges effectively to maintain database security. You can find more information in the official Oracle documentation.\n## Prerequisites\nTo get started, install the Oracle JavaScript client driver:\n\n``` typescript\nnpm install oracledb\n```\n\n## Document Preparation\nAssuming you have documents stored in a file system that you want to use with Oracle AI Vector Search and Langchain, these documents need to be instances of langchain/core/documents.\n\nExample: Ingesting JSON Documents\nIn the following TypeScript example, we demonstrate how to ingest documents from JSON files:\n\n```typescript\nprivate createDocument(row: DataRow): Document {\n const metadata = {\n id: row.id, \n link: row.link,\n };\n return new Document({ pageContent: row.text, metadata: metadata });\n}\n\npublic async ingestJson(): Promise {\n try {\n const filePath = `${this.docsDir}${this.filename}`;\n const fileContent = await fs.readFile(filePath, {encoding: 'utf8'});\n const jsonData: DataRow[] = JSON.parse(fileContent);\n return jsonData.map((row) => this.createDocument(row));\n } catch (error) {\n console.error('An error occurred while ingesting JSON:', error);\n throw error; // Rethrow for the calling function to handle\n }\n}\n```\n\n## Langchain and Oracle Integration\nThe Oracle AI Vector Search Langchain library offers a rich set of APIs for document processing, which includes loading, chunking, summarizing, and embedding generation. Here's how to set up a connection and integrate Oracle with Langchain.\n\n## Connecting to Oracle Database\nBelow is an example of how to connect to an Oracle Database using both a direct connection and a connection pool:", + "repo_full_name": "langchain-ai/langchainjs", + "discussion_comments": [ + { + "comment_id": "2099206512", + "repo_full_name": "langchain-ai/langchainjs", + "pr_number": 8007, + "pr_file": "cookbook/oraclevs.md", + "discussion_id": "2099206512", + "commented_code": "@@ -0,0 +1,273 @@\n+# Oracle AI Vector Search with LangchainJS Integration\n+## Introduction\n+Oracle AI Vector Search enables semantic search on unstructured data while simultaneously providing relational search capabilities on business data, all within a unified system. This approach eliminates the need for a separate vector database, reducing data fragmentation and improving efficiency.\n+\n+By integrating Oracle AI Vector Search with Langchain, you can build a powerful pipeline for Retrieval Augmented Generation (RAG), leveraging Oracle's robust database features.\n+\n+Key Advantages of Oracle Database\n+Oracle AI Vector Search is built on top of the Oracle Database, providing several key features:\n+\n+* Partitioning Support\n+* Real Application Clusters (RAC) Scalability\n+* Exadata Smart Scans\n+* Geographically Distributed Shard Processing\n+* Transactional Capabilities\n+* Parallel SQL\n+* Disaster Recovery\n+* Advanced Security\n+* Oracle Machine Learning\n+* Oracle Graph Database\n+* Oracle Spatial and Graph\n+* Oracle Blockchain\n+* JSON Support\n+## Guide Overview\n+This guide demonstrates how to integrate Oracle AI Vector Search with Langchain to create an end-to-end RAG pipeline. You'll learn how to:\n+\n+* Load documents from different sources using OracleDocLoader.\n+* Summarize documents inside or outside the database using OracleSummary.\n+* Generate embeddings either inside or outside the database using OracleEmbeddings.\n+* Chunk documents based on specific needs using OracleTextSplitter.\n+* Store, index, and query data using OracleVS.\n+## Getting Started\n+If you're new to Oracle Database, consider using the free Oracle 23 AI Database to get started.\n+\n+## Best Practices\n+* User Management: Create dedicated users for your Oracle Database projects instead of using the system user for security and control purposes. See the end-to-end guide for more details.\n+* User Privileges: Be sure to manage user privileges effectively to maintain database security. You can find more information in the official Oracle documentation.\n+## Prerequisites\n+To get started, install the Oracle JavaScript client driver:\n+\n+``` typescript\n+npm install oracledb\n+```\n+\n+## Document Preparation\n+Assuming you have documents stored in a file system that you want to use with Oracle AI Vector Search and Langchain, these documents need to be instances of langchain/core/documents.\n+\n+Example: Ingesting JSON Documents\n+In the following TypeScript example, we demonstrate how to ingest documents from JSON files:\n+\n+```typescript\n+private createDocument(row: DataRow): Document {\n+ const metadata = {\n+ id: row.id, \n+ link: row.link,\n+ };\n+ return new Document({ pageContent: row.text, metadata: metadata });\n+}\n+\n+public async ingestJson(): Promise {\n+ try {\n+ const filePath = `${this.docsDir}${this.filename}`;\n+ const fileContent = await fs.readFile(filePath, {encoding: 'utf8'});\n+ const jsonData: DataRow[] = JSON.parse(fileContent);\n+ return jsonData.map((row) => this.createDocument(row));\n+ } catch (error) {\n+ console.error('An error occurred while ingesting JSON:', error);\n+ throw error; // Rethrow for the calling function to handle\n+ }\n+}\n+```\n+\n+## Langchain and Oracle Integration\n+The Oracle AI Vector Search Langchain library offers a rich set of APIs for document processing, which includes loading, chunking, summarizing, and embedding generation. Here's how to set up a connection and integrate Oracle with Langchain.\n+\n+## Connecting to Oracle Database\n+Below is an example of how to connect to an Oracle Database using both a direct connection and a connection pool:", + "comment_created_at": "2025-05-21T02:59:16+00:00", + "comment_author": "cjbj", + "comment_body": "Please link here (or somewhere) to https://node-oracledb.readthedocs.io/en/latest/ so users can get more info.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2103914082", + "pr_number": 8241, + "pr_file": "libs/langchain-mcp-adapters/README.md", + "created_at": "2025-05-23T06:42:58+00:00", + "commented_code": "| `prefixToolNameWithServerName` | boolean | `true` | If true, prefixes all tool names with the server name (e.g., `serverName__toolName`) |\n| `additionalToolNamePrefix` | string | `mcp` | Additional prefix to add to tool names (e.g., `prefix__serverName__toolName`) |\n\n## Tool Timeout Configuration\n\nMCP tools support timeout configuration through LangChain's standard `RunnableConfig` interface. This allows you to set custom timeouts on a per-tool-call basis:\n\n```typescript\nconst client = new MultiServerMCPClient({\n mcpServers: {\n 'data-processor': {\n command: 'python',\n args: ['data_server.py']\n }\n }\n});\n\nconst tools = await client.getTools();\nconst slowTool = tools.find(t => t.name.includes('process_large_dataset'));\n\n// You can use withConfig to set tool-specific timeouts before handing\n// the tool off to a LangGraph ToolNode or some other part of your\n// application\nconst slowToolWithTimeout = slowTool.withConfig({ timeout: 300000 }); // 5 min timeout\n\n// This invocation will respect the 5 minute timeout\nconst result = await slowToolWithTimeout.invoke(\n { dataset: 'huge_file.csv' },\n);\n\n// or you can invoke directly without withConfig\nconst directResult = await slowTool.invoke(\n { dataset: 'huge_file.csv' },\n { timeout: 300000 }\n);\n\n// Quick timeout for fast operations\nconst quickResult = await fastTool.invoke(\n { query: 'simple_lookup' },\n { timeout: 5000 } // 5 seconds\n);\n\n// Default timeout (60 seconds from MCP SDK) when no config provided\nconst normalResult = await tool.invoke({ input: 'normal_processing' });\n```\n\n### Timeout Configuration\n\n| Parameter | Type | Default | Description |\n| --------- | ---- | ------- | ----------- |\n| `timeout` | number | 60000 | Timeout in milliseconds for the tool call |", + "repo_full_name": "langchain-ai/langchainjs", + "discussion_comments": [ + { + "comment_id": "2103914082", + "repo_full_name": "langchain-ai/langchainjs", + "pr_number": 8241, + "pr_file": "libs/langchain-mcp-adapters/README.md", + "discussion_id": "2103914082", + "commented_code": "@@ -224,6 +224,57 @@ When loading MCP tools either directly through `loadMcpTools` or via `MultiServe\n | `prefixToolNameWithServerName` | boolean | `true` | If true, prefixes all tool names with the server name (e.g., `serverName__toolName`) |\n | `additionalToolNamePrefix` | string | `mcp` | Additional prefix to add to tool names (e.g., `prefix__serverName__toolName`) |\n \n+## Tool Timeout Configuration\n+\n+MCP tools support timeout configuration through LangChain's standard `RunnableConfig` interface. This allows you to set custom timeouts on a per-tool-call basis:\n+\n+```typescript\n+const client = new MultiServerMCPClient({\n+ mcpServers: {\n+ 'data-processor': {\n+ command: 'python',\n+ args: ['data_server.py']\n+ }\n+ }\n+});\n+\n+const tools = await client.getTools();\n+const slowTool = tools.find(t => t.name.includes('process_large_dataset'));\n+\n+// You can use withConfig to set tool-specific timeouts before handing\n+// the tool off to a LangGraph ToolNode or some other part of your\n+// application\n+const slowToolWithTimeout = slowTool.withConfig({ timeout: 300000 }); // 5 min timeout\n+\n+// This invocation will respect the 5 minute timeout\n+const result = await slowToolWithTimeout.invoke(\n+ { dataset: 'huge_file.csv' },\n+);\n+\n+// or you can invoke directly without withConfig\n+const directResult = await slowTool.invoke(\n+ { dataset: 'huge_file.csv' },\n+ { timeout: 300000 }\n+);\n+\n+// Quick timeout for fast operations\n+const quickResult = await fastTool.invoke(\n+ { query: 'simple_lookup' },\n+ { timeout: 5000 } // 5 seconds\n+);\n+\n+// Default timeout (60 seconds from MCP SDK) when no config provided\n+const normalResult = await tool.invoke({ input: 'normal_processing' });\n+```\n+\n+### Timeout Configuration\n+\n+| Parameter | Type | Default | Description |\n+| --------- | ---- | ------- | ----------- |\n+| `timeout` | number | 60000 | Timeout in milliseconds for the tool call |", + "comment_created_at": "2025-05-23T06:42:58+00:00", + "comment_author": "Copilot", + "comment_body": "The table documents only the `timeout` parameter, but `signal` from `RunnableConfig` is also supported and should be listed here for completeness.\n```suggestion\n| `timeout` | number | 60000 | Timeout in milliseconds for the tool call |\n| `signal` | AbortSignal | `undefined` | Optional AbortSignal to cancel the tool call |\n```", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/langchainjs-documentation-completeness-check.md b/_reviewers/langchainjs-documentation-completeness-check.md index 0d0d055..b83ae97 100644 --- a/_reviewers/langchainjs-documentation-completeness-check.md +++ b/_reviewers/langchainjs-documentation-completeness-check.md @@ -46,85 +46,3 @@ Install the [Oracle JavaScript client driver](https://node-oracledb.readthedocs. ``` Complete documentation improves developer experience, reduces questions, and facilitates faster onboarding. - - -[ - { - "discussion_id": "2092387294", - "pr_number": 8007, - "pr_file": "cookbook/oraclevs.md", - "created_at": "2025-05-16T06:08:25+00:00", - "commented_code": "Oracle AI Vector Search with LangchainJS Integration", - "repo_full_name": "langchain-ai/langchainjs", - "discussion_comments": [ - { - "comment_id": "2092387294", - "repo_full_name": "langchain-ai/langchainjs", - "pr_number": 8007, - "pr_file": "cookbook/oraclevs.md", - "discussion_id": "2092387294", - "commented_code": "@@ -0,0 +1,273 @@\n+Oracle AI Vector Search with LangchainJS Integration", - "comment_created_at": "2025-05-16T06:08:25+00:00", - "comment_author": "cjbj", - "comment_body": "Shouldn't headings like this have the appropriate markdown syntax? (There are many occurrences in this file that look odd).", - "pr_file_module": null - }, - { - "comment_id": "2094155633", - "repo_full_name": "langchain-ai/langchainjs", - "pr_number": 8007, - "pr_file": "cookbook/oraclevs.md", - "discussion_id": "2092387294", - "commented_code": "@@ -0,0 +1,273 @@\n+Oracle AI Vector Search with LangchainJS Integration", - "comment_created_at": "2025-05-17T16:01:02+00:00", - "comment_author": "hackerdave", - "comment_body": "Good catch, have added some markdown syntax to fix the headings and bullets", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2099206512", - "pr_number": 8007, - "pr_file": "cookbook/oraclevs.md", - "created_at": "2025-05-21T02:59:16+00:00", - "commented_code": "# Oracle AI Vector Search with LangchainJS Integration\n## Introduction\nOracle AI Vector Search enables semantic search on unstructured data while simultaneously providing relational search capabilities on business data, all within a unified system. This approach eliminates the need for a separate vector database, reducing data fragmentation and improving efficiency.\n\nBy integrating Oracle AI Vector Search with Langchain, you can build a powerful pipeline for Retrieval Augmented Generation (RAG), leveraging Oracle's robust database features.\n\nKey Advantages of Oracle Database\nOracle AI Vector Search is built on top of the Oracle Database, providing several key features:\n\n* Partitioning Support\n* Real Application Clusters (RAC) Scalability\n* Exadata Smart Scans\n* Geographically Distributed Shard Processing\n* Transactional Capabilities\n* Parallel SQL\n* Disaster Recovery\n* Advanced Security\n* Oracle Machine Learning\n* Oracle Graph Database\n* Oracle Spatial and Graph\n* Oracle Blockchain\n* JSON Support\n## Guide Overview\nThis guide demonstrates how to integrate Oracle AI Vector Search with Langchain to create an end-to-end RAG pipeline. You'll learn how to:\n\n* Load documents from different sources using OracleDocLoader.\n* Summarize documents inside or outside the database using OracleSummary.\n* Generate embeddings either inside or outside the database using OracleEmbeddings.\n* Chunk documents based on specific needs using OracleTextSplitter.\n* Store, index, and query data using OracleVS.\n## Getting Started\nIf you're new to Oracle Database, consider using the free Oracle 23 AI Database to get started.\n\n## Best Practices\n* User Management: Create dedicated users for your Oracle Database projects instead of using the system user for security and control purposes. See the end-to-end guide for more details.\n* User Privileges: Be sure to manage user privileges effectively to maintain database security. You can find more information in the official Oracle documentation.\n## Prerequisites\nTo get started, install the Oracle JavaScript client driver:\n\n``` typescript\nnpm install oracledb\n```\n\n## Document Preparation\nAssuming you have documents stored in a file system that you want to use with Oracle AI Vector Search and Langchain, these documents need to be instances of langchain/core/documents.\n\nExample: Ingesting JSON Documents\nIn the following TypeScript example, we demonstrate how to ingest documents from JSON files:\n\n```typescript\nprivate createDocument(row: DataRow): Document {\n const metadata = {\n id: row.id, \n link: row.link,\n };\n return new Document({ pageContent: row.text, metadata: metadata });\n}\n\npublic async ingestJson(): Promise {\n try {\n const filePath = `${this.docsDir}${this.filename}`;\n const fileContent = await fs.readFile(filePath, {encoding: 'utf8'});\n const jsonData: DataRow[] = JSON.parse(fileContent);\n return jsonData.map((row) => this.createDocument(row));\n } catch (error) {\n console.error('An error occurred while ingesting JSON:', error);\n throw error; // Rethrow for the calling function to handle\n }\n}\n```\n\n## Langchain and Oracle Integration\nThe Oracle AI Vector Search Langchain library offers a rich set of APIs for document processing, which includes loading, chunking, summarizing, and embedding generation. Here's how to set up a connection and integrate Oracle with Langchain.\n\n## Connecting to Oracle Database\nBelow is an example of how to connect to an Oracle Database using both a direct connection and a connection pool:", - "repo_full_name": "langchain-ai/langchainjs", - "discussion_comments": [ - { - "comment_id": "2099206512", - "repo_full_name": "langchain-ai/langchainjs", - "pr_number": 8007, - "pr_file": "cookbook/oraclevs.md", - "discussion_id": "2099206512", - "commented_code": "@@ -0,0 +1,273 @@\n+# Oracle AI Vector Search with LangchainJS Integration\n+## Introduction\n+Oracle AI Vector Search enables semantic search on unstructured data while simultaneously providing relational search capabilities on business data, all within a unified system. This approach eliminates the need for a separate vector database, reducing data fragmentation and improving efficiency.\n+\n+By integrating Oracle AI Vector Search with Langchain, you can build a powerful pipeline for Retrieval Augmented Generation (RAG), leveraging Oracle's robust database features.\n+\n+Key Advantages of Oracle Database\n+Oracle AI Vector Search is built on top of the Oracle Database, providing several key features:\n+\n+* Partitioning Support\n+* Real Application Clusters (RAC) Scalability\n+* Exadata Smart Scans\n+* Geographically Distributed Shard Processing\n+* Transactional Capabilities\n+* Parallel SQL\n+* Disaster Recovery\n+* Advanced Security\n+* Oracle Machine Learning\n+* Oracle Graph Database\n+* Oracle Spatial and Graph\n+* Oracle Blockchain\n+* JSON Support\n+## Guide Overview\n+This guide demonstrates how to integrate Oracle AI Vector Search with Langchain to create an end-to-end RAG pipeline. You'll learn how to:\n+\n+* Load documents from different sources using OracleDocLoader.\n+* Summarize documents inside or outside the database using OracleSummary.\n+* Generate embeddings either inside or outside the database using OracleEmbeddings.\n+* Chunk documents based on specific needs using OracleTextSplitter.\n+* Store, index, and query data using OracleVS.\n+## Getting Started\n+If you're new to Oracle Database, consider using the free Oracle 23 AI Database to get started.\n+\n+## Best Practices\n+* User Management: Create dedicated users for your Oracle Database projects instead of using the system user for security and control purposes. See the end-to-end guide for more details.\n+* User Privileges: Be sure to manage user privileges effectively to maintain database security. You can find more information in the official Oracle documentation.\n+## Prerequisites\n+To get started, install the Oracle JavaScript client driver:\n+\n+``` typescript\n+npm install oracledb\n+```\n+\n+## Document Preparation\n+Assuming you have documents stored in a file system that you want to use with Oracle AI Vector Search and Langchain, these documents need to be instances of langchain/core/documents.\n+\n+Example: Ingesting JSON Documents\n+In the following TypeScript example, we demonstrate how to ingest documents from JSON files:\n+\n+```typescript\n+private createDocument(row: DataRow): Document {\n+ const metadata = {\n+ id: row.id, \n+ link: row.link,\n+ };\n+ return new Document({ pageContent: row.text, metadata: metadata });\n+}\n+\n+public async ingestJson(): Promise {\n+ try {\n+ const filePath = `${this.docsDir}${this.filename}`;\n+ const fileContent = await fs.readFile(filePath, {encoding: 'utf8'});\n+ const jsonData: DataRow[] = JSON.parse(fileContent);\n+ return jsonData.map((row) => this.createDocument(row));\n+ } catch (error) {\n+ console.error('An error occurred while ingesting JSON:', error);\n+ throw error; // Rethrow for the calling function to handle\n+ }\n+}\n+```\n+\n+## Langchain and Oracle Integration\n+The Oracle AI Vector Search Langchain library offers a rich set of APIs for document processing, which includes loading, chunking, summarizing, and embedding generation. Here's how to set up a connection and integrate Oracle with Langchain.\n+\n+## Connecting to Oracle Database\n+Below is an example of how to connect to an Oracle Database using both a direct connection and a connection pool:", - "comment_created_at": "2025-05-21T02:59:16+00:00", - "comment_author": "cjbj", - "comment_body": "Please link here (or somewhere) to https://node-oracledb.readthedocs.io/en/latest/ so users can get more info.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2103914082", - "pr_number": 8241, - "pr_file": "libs/langchain-mcp-adapters/README.md", - "created_at": "2025-05-23T06:42:58+00:00", - "commented_code": "| `prefixToolNameWithServerName` | boolean | `true` | If true, prefixes all tool names with the server name (e.g., `serverName__toolName`) |\n| `additionalToolNamePrefix` | string | `mcp` | Additional prefix to add to tool names (e.g., `prefix__serverName__toolName`) |\n\n## Tool Timeout Configuration\n\nMCP tools support timeout configuration through LangChain's standard `RunnableConfig` interface. This allows you to set custom timeouts on a per-tool-call basis:\n\n```typescript\nconst client = new MultiServerMCPClient({\n mcpServers: {\n 'data-processor': {\n command: 'python',\n args: ['data_server.py']\n }\n }\n});\n\nconst tools = await client.getTools();\nconst slowTool = tools.find(t => t.name.includes('process_large_dataset'));\n\n// You can use withConfig to set tool-specific timeouts before handing\n// the tool off to a LangGraph ToolNode or some other part of your\n// application\nconst slowToolWithTimeout = slowTool.withConfig({ timeout: 300000 }); // 5 min timeout\n\n// This invocation will respect the 5 minute timeout\nconst result = await slowToolWithTimeout.invoke(\n { dataset: 'huge_file.csv' },\n);\n\n// or you can invoke directly without withConfig\nconst directResult = await slowTool.invoke(\n { dataset: 'huge_file.csv' },\n { timeout: 300000 }\n);\n\n// Quick timeout for fast operations\nconst quickResult = await fastTool.invoke(\n { query: 'simple_lookup' },\n { timeout: 5000 } // 5 seconds\n);\n\n// Default timeout (60 seconds from MCP SDK) when no config provided\nconst normalResult = await tool.invoke({ input: 'normal_processing' });\n```\n\n### Timeout Configuration\n\n| Parameter | Type | Default | Description |\n| --------- | ---- | ------- | ----------- |\n| `timeout` | number | 60000 | Timeout in milliseconds for the tool call |", - "repo_full_name": "langchain-ai/langchainjs", - "discussion_comments": [ - { - "comment_id": "2103914082", - "repo_full_name": "langchain-ai/langchainjs", - "pr_number": 8241, - "pr_file": "libs/langchain-mcp-adapters/README.md", - "discussion_id": "2103914082", - "commented_code": "@@ -224,6 +224,57 @@ When loading MCP tools either directly through `loadMcpTools` or via `MultiServe\n | `prefixToolNameWithServerName` | boolean | `true` | If true, prefixes all tool names with the server name (e.g., `serverName__toolName`) |\n | `additionalToolNamePrefix` | string | `mcp` | Additional prefix to add to tool names (e.g., `prefix__serverName__toolName`) |\n \n+## Tool Timeout Configuration\n+\n+MCP tools support timeout configuration through LangChain's standard `RunnableConfig` interface. This allows you to set custom timeouts on a per-tool-call basis:\n+\n+```typescript\n+const client = new MultiServerMCPClient({\n+ mcpServers: {\n+ 'data-processor': {\n+ command: 'python',\n+ args: ['data_server.py']\n+ }\n+ }\n+});\n+\n+const tools = await client.getTools();\n+const slowTool = tools.find(t => t.name.includes('process_large_dataset'));\n+\n+// You can use withConfig to set tool-specific timeouts before handing\n+// the tool off to a LangGraph ToolNode or some other part of your\n+// application\n+const slowToolWithTimeout = slowTool.withConfig({ timeout: 300000 }); // 5 min timeout\n+\n+// This invocation will respect the 5 minute timeout\n+const result = await slowToolWithTimeout.invoke(\n+ { dataset: 'huge_file.csv' },\n+);\n+\n+// or you can invoke directly without withConfig\n+const directResult = await slowTool.invoke(\n+ { dataset: 'huge_file.csv' },\n+ { timeout: 300000 }\n+);\n+\n+// Quick timeout for fast operations\n+const quickResult = await fastTool.invoke(\n+ { query: 'simple_lookup' },\n+ { timeout: 5000 } // 5 seconds\n+);\n+\n+// Default timeout (60 seconds from MCP SDK) when no config provided\n+const normalResult = await tool.invoke({ input: 'normal_processing' });\n+```\n+\n+### Timeout Configuration\n+\n+| Parameter | Type | Default | Description |\n+| --------- | ---- | ------- | ----------- |\n+| `timeout` | number | 60000 | Timeout in milliseconds for the tool call |", - "comment_created_at": "2025-05-23T06:42:58+00:00", - "comment_author": "Copilot", - "comment_body": "The table documents only the `timeout` parameter, but `signal` from `RunnableConfig` is also supported and should be listed here for completeness.\n```suggestion\n| `timeout` | number | 60000 | Timeout in milliseconds for the tool call |\n| `signal` | AbortSignal | `undefined` | Optional AbortSignal to cancel the tool call |\n```", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/langchainjs-eliminate-redundant-code.json b/_reviewers/langchainjs-eliminate-redundant-code.json new file mode 100644 index 0000000..2492068 --- /dev/null +++ b/_reviewers/langchainjs-eliminate-redundant-code.json @@ -0,0 +1,90 @@ +[ + { + "discussion_id": "2106380214", + "pr_number": 8241, + "pr_file": "libs/langchain-mcp-adapters/src/tools.ts", + "created_at": "2025-05-26T01:25:52+00:00", + "commented_code": "* @param client - The MCP client\n * @param toolName - The name of the tool (forwarded to the client)\n * @param args - The arguments to pass to the tool\n * @param config - Optional RunnableConfig with timeout settings\n * @returns A tuple of [textContent, nonTextContent]\n */\nasync function _callTool(\n serverName: string,\n toolName: string,\n client: Client,\n args: Record\n args: Record,\n config?: RunnableConfig\n): Promise<[MessageContent, EmbeddedResource[]]> {\n let result: CallToolResult;\n try {\n getDebugLog()(`INFO: Calling tool ${toolName}(${JSON.stringify(args)})`);\n result = (await client.callTool({\n name: toolName,\n arguments: args,\n })) as CallToolResult;\n\n // Extract timeout from RunnableConfig and pass to MCP SDK\n let requestOptions: RequestOptions | undefined = {\n ...(config?.timeout ? { timeout: config.timeout } : {}),\n ...(config?.signal ? { signal: config.signal } : {}),\n };\n\n if (Object.keys(requestOptions).length === 0) {\n requestOptions = undefined;\n }", + "repo_full_name": "langchain-ai/langchainjs", + "discussion_comments": [ + { + "comment_id": "2106380214", + "repo_full_name": "langchain-ai/langchainjs", + "pr_number": 8241, + "pr_file": "libs/langchain-mcp-adapters/src/tools.ts", + "discussion_id": "2106380214", + "commented_code": "@@ -161,21 +163,38 @@ async function _convertCallToolResult(\n * @param client - The MCP client\n * @param toolName - The name of the tool (forwarded to the client)\n * @param args - The arguments to pass to the tool\n+ * @param config - Optional RunnableConfig with timeout settings\n * @returns A tuple of [textContent, nonTextContent]\n */\n async function _callTool(\n serverName: string,\n toolName: string,\n client: Client,\n- args: Record\n+ args: Record,\n+ config?: RunnableConfig\n ): Promise<[MessageContent, EmbeddedResource[]]> {\n let result: CallToolResult;\n try {\n getDebugLog()(`INFO: Calling tool ${toolName}(${JSON.stringify(args)})`);\n- result = (await client.callTool({\n- name: toolName,\n- arguments: args,\n- })) as CallToolResult;\n+\n+ // Extract timeout from RunnableConfig and pass to MCP SDK\n+ let requestOptions: RequestOptions | undefined = {\n+ ...(config?.timeout ? { timeout: config.timeout } : {}),\n+ ...(config?.signal ? { signal: config.signal } : {}),\n+ };\n+\n+ if (Object.keys(requestOptions).length === 0) {\n+ requestOptions = undefined;\n+ }\n+", + "comment_created_at": "2025-05-26T01:25:52+00:00", + "comment_author": "Copilot", + "comment_body": "[nitpick] The logic to build and then drop an empty `requestOptions` object adds cognitive overhead. You could simplify by only constructing `requestOptions` when `config?.timeout` or `config?.signal` is present, e.g.: `const requestOptions = config ? { timeout: config.timeout, signal: config.signal } : undefined;`.\n```suggestion\n const requestOptions: RequestOptions | undefined = config?.timeout || config?.signal\n ? { timeout: config.timeout, signal: config.signal }\n : undefined;\n```", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2106472869", + "pr_number": 8252, + "pr_file": "libs/langchain-mcp-adapters/src/tools.ts", + "created_at": "2025-05-26T04:07:08+00:00", + "commented_code": "}\n\n const result = await client.callTool(...callToolArgs);\n return _convertCallToolResult(serverName, toolName, result, client);\n return await _convertCallToolResult({", + "repo_full_name": "langchain-ai/langchainjs", + "discussion_comments": [ + { + "comment_id": "2106472869", + "repo_full_name": "langchain-ai/langchainjs", + "pr_number": 8252, + "pr_file": "libs/langchain-mcp-adapters/src/tools.ts", + "discussion_id": "2106472869", + "commented_code": "@@ -207,7 +289,13 @@ async function _callTool(\n }\n \n const result = await client.callTool(...callToolArgs);\n- return _convertCallToolResult(serverName, toolName, result, client);\n+ return await _convertCallToolResult({", + "comment_created_at": "2025-05-26T04:07:08+00:00", + "comment_author": "Copilot", + "comment_body": "In an async function, 'return await' is generally redundant. Consider returning the promise directly to simplify the code.\n```suggestion\n return _convertCallToolResult({\n```", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1583887935", + "pr_number": 5251, + "pr_file": "libs/langchain-azure-openai/src/embeddings.ts", + "created_at": "2024-04-29T23:11:30+00:00", + "commented_code": "const batchRequests = batches.map((batch) => this.getEmbeddings(batch));\n const embeddings = await Promise.all(batchRequests);\n\n return embeddings;\n return embeddings.flat(1);", + "repo_full_name": "langchain-ai/langchainjs", + "discussion_comments": [ + { + "comment_id": "1583887935", + "repo_full_name": "langchain-ai/langchainjs", + "pr_number": 5251, + "pr_file": "libs/langchain-azure-openai/src/embeddings.ts", + "discussion_id": "1583887935", + "commented_code": "@@ -135,18 +135,18 @@ export class AzureOpenAIEmbeddings\n \n const batchRequests = batches.map((batch) => this.getEmbeddings(batch));\n const embeddings = await Promise.all(batchRequests);\n-\n- return embeddings;\n+ return embeddings.flat(1);", + "comment_created_at": "2024-04-29T23:11:30+00:00", + "comment_author": "bracesproul", + "comment_body": "Nit, defaults to 1 so this can be dropped.\r\n```suggestion\r\n return embeddings.flat();\r\n```", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1719005999", + "pr_number": 6519, + "pr_file": "libs/langchain-google-genai/src/chat_models.ts", + "created_at": "2024-08-15T21:16:00+00:00", + "commented_code": "streamUsage = true;\n\n json = false", + "repo_full_name": "langchain-ai/langchainjs", + "discussion_comments": [ + { + "comment_id": "1719005999", + "repo_full_name": "langchain-ai/langchainjs", + "pr_number": 6519, + "pr_file": "libs/langchain-google-genai/src/chat_models.ts", + "discussion_id": "1719005999", + "commented_code": "@@ -244,6 +247,8 @@ export class ChatGoogleGenerativeAI\n \n streamUsage = true;\n \n+ json = false", + "comment_created_at": "2024-08-15T21:16:00+00:00", + "comment_author": "bracesproul", + "comment_body": "If this is only accessed inside the class constructor we don't need to make it a class property.\r\n```suggestion\r\n```", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/langchainjs-eliminate-redundant-code.md b/_reviewers/langchainjs-eliminate-redundant-code.md index 5f320ef..f331fe9 100644 --- a/_reviewers/langchainjs-eliminate-redundant-code.md +++ b/_reviewers/langchainjs-eliminate-redundant-code.md @@ -78,95 +78,3 @@ class MyClass { ``` Eliminating redundancy leads to cleaner, more maintainable code that's easier to review and less prone to errors. - - -[ - { - "discussion_id": "2106380214", - "pr_number": 8241, - "pr_file": "libs/langchain-mcp-adapters/src/tools.ts", - "created_at": "2025-05-26T01:25:52+00:00", - "commented_code": "* @param client - The MCP client\n * @param toolName - The name of the tool (forwarded to the client)\n * @param args - The arguments to pass to the tool\n * @param config - Optional RunnableConfig with timeout settings\n * @returns A tuple of [textContent, nonTextContent]\n */\nasync function _callTool(\n serverName: string,\n toolName: string,\n client: Client,\n args: Record\n args: Record,\n config?: RunnableConfig\n): Promise<[MessageContent, EmbeddedResource[]]> {\n let result: CallToolResult;\n try {\n getDebugLog()(`INFO: Calling tool ${toolName}(${JSON.stringify(args)})`);\n result = (await client.callTool({\n name: toolName,\n arguments: args,\n })) as CallToolResult;\n\n // Extract timeout from RunnableConfig and pass to MCP SDK\n let requestOptions: RequestOptions | undefined = {\n ...(config?.timeout ? { timeout: config.timeout } : {}),\n ...(config?.signal ? { signal: config.signal } : {}),\n };\n\n if (Object.keys(requestOptions).length === 0) {\n requestOptions = undefined;\n }", - "repo_full_name": "langchain-ai/langchainjs", - "discussion_comments": [ - { - "comment_id": "2106380214", - "repo_full_name": "langchain-ai/langchainjs", - "pr_number": 8241, - "pr_file": "libs/langchain-mcp-adapters/src/tools.ts", - "discussion_id": "2106380214", - "commented_code": "@@ -161,21 +163,38 @@ async function _convertCallToolResult(\n * @param client - The MCP client\n * @param toolName - The name of the tool (forwarded to the client)\n * @param args - The arguments to pass to the tool\n+ * @param config - Optional RunnableConfig with timeout settings\n * @returns A tuple of [textContent, nonTextContent]\n */\n async function _callTool(\n serverName: string,\n toolName: string,\n client: Client,\n- args: Record\n+ args: Record,\n+ config?: RunnableConfig\n ): Promise<[MessageContent, EmbeddedResource[]]> {\n let result: CallToolResult;\n try {\n getDebugLog()(`INFO: Calling tool ${toolName}(${JSON.stringify(args)})`);\n- result = (await client.callTool({\n- name: toolName,\n- arguments: args,\n- })) as CallToolResult;\n+\n+ // Extract timeout from RunnableConfig and pass to MCP SDK\n+ let requestOptions: RequestOptions | undefined = {\n+ ...(config?.timeout ? { timeout: config.timeout } : {}),\n+ ...(config?.signal ? { signal: config.signal } : {}),\n+ };\n+\n+ if (Object.keys(requestOptions).length === 0) {\n+ requestOptions = undefined;\n+ }\n+", - "comment_created_at": "2025-05-26T01:25:52+00:00", - "comment_author": "Copilot", - "comment_body": "[nitpick] The logic to build and then drop an empty `requestOptions` object adds cognitive overhead. You could simplify by only constructing `requestOptions` when `config?.timeout` or `config?.signal` is present, e.g.: `const requestOptions = config ? { timeout: config.timeout, signal: config.signal } : undefined;`.\n```suggestion\n const requestOptions: RequestOptions | undefined = config?.timeout || config?.signal\n ? { timeout: config.timeout, signal: config.signal }\n : undefined;\n```", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2106472869", - "pr_number": 8252, - "pr_file": "libs/langchain-mcp-adapters/src/tools.ts", - "created_at": "2025-05-26T04:07:08+00:00", - "commented_code": "}\n\n const result = await client.callTool(...callToolArgs);\n return _convertCallToolResult(serverName, toolName, result, client);\n return await _convertCallToolResult({", - "repo_full_name": "langchain-ai/langchainjs", - "discussion_comments": [ - { - "comment_id": "2106472869", - "repo_full_name": "langchain-ai/langchainjs", - "pr_number": 8252, - "pr_file": "libs/langchain-mcp-adapters/src/tools.ts", - "discussion_id": "2106472869", - "commented_code": "@@ -207,7 +289,13 @@ async function _callTool(\n }\n \n const result = await client.callTool(...callToolArgs);\n- return _convertCallToolResult(serverName, toolName, result, client);\n+ return await _convertCallToolResult({", - "comment_created_at": "2025-05-26T04:07:08+00:00", - "comment_author": "Copilot", - "comment_body": "In an async function, 'return await' is generally redundant. Consider returning the promise directly to simplify the code.\n```suggestion\n return _convertCallToolResult({\n```", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1583887935", - "pr_number": 5251, - "pr_file": "libs/langchain-azure-openai/src/embeddings.ts", - "created_at": "2024-04-29T23:11:30+00:00", - "commented_code": "const batchRequests = batches.map((batch) => this.getEmbeddings(batch));\n const embeddings = await Promise.all(batchRequests);\n\n return embeddings;\n return embeddings.flat(1);", - "repo_full_name": "langchain-ai/langchainjs", - "discussion_comments": [ - { - "comment_id": "1583887935", - "repo_full_name": "langchain-ai/langchainjs", - "pr_number": 5251, - "pr_file": "libs/langchain-azure-openai/src/embeddings.ts", - "discussion_id": "1583887935", - "commented_code": "@@ -135,18 +135,18 @@ export class AzureOpenAIEmbeddings\n \n const batchRequests = batches.map((batch) => this.getEmbeddings(batch));\n const embeddings = await Promise.all(batchRequests);\n-\n- return embeddings;\n+ return embeddings.flat(1);", - "comment_created_at": "2024-04-29T23:11:30+00:00", - "comment_author": "bracesproul", - "comment_body": "Nit, defaults to 1 so this can be dropped.\r\n```suggestion\r\n return embeddings.flat();\r\n```", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1719005999", - "pr_number": 6519, - "pr_file": "libs/langchain-google-genai/src/chat_models.ts", - "created_at": "2024-08-15T21:16:00+00:00", - "commented_code": "streamUsage = true;\n\n json = false", - "repo_full_name": "langchain-ai/langchainjs", - "discussion_comments": [ - { - "comment_id": "1719005999", - "repo_full_name": "langchain-ai/langchainjs", - "pr_number": 6519, - "pr_file": "libs/langchain-google-genai/src/chat_models.ts", - "discussion_id": "1719005999", - "commented_code": "@@ -244,6 +247,8 @@ export class ChatGoogleGenerativeAI\n \n streamUsage = true;\n \n+ json = false", - "comment_created_at": "2024-08-15T21:16:00+00:00", - "comment_author": "bracesproul", - "comment_body": "If this is only accessed inside the class constructor we don't need to make it a class property.\r\n```suggestion\r\n```", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/langchainjs-follow-documentation-standards.json b/_reviewers/langchainjs-follow-documentation-standards.json new file mode 100644 index 0000000..3e3d379 --- /dev/null +++ b/_reviewers/langchainjs-follow-documentation-standards.json @@ -0,0 +1,148 @@ +[ + { + "discussion_id": "1818683829", + "pr_number": 7033, + "pr_file": "docs/core_docs/docs/integrations/semantic_caches/azure_cosmosdb_nosql.mdx", + "created_at": "2024-10-28T09:23:41+00:00", + "commented_code": "# Azure Cosmos DB NoSQL Semantic Cache\n\n> The Semantic Cache feature is supported with Azure Cosmos DB for NoSQL integration, enabling users to retrieve cached responses based on semantic similarity between the user input and previously cached results. It leverages AzureCosmosDBNoSQLVectorStore, which stores vector embeddings of cached prompts. These embeddings enable similarity-based searches, allowing the system to retrieve relevant cached results.", + "repo_full_name": "langchain-ai/langchainjs", + "discussion_comments": [ + { + "comment_id": "1818683829", + "repo_full_name": "langchain-ai/langchainjs", + "pr_number": 7033, + "pr_file": "docs/core_docs/docs/integrations/semantic_caches/azure_cosmosdb_nosql.mdx", + "discussion_id": "1818683829", + "commented_code": "@@ -0,0 +1,40 @@\n+# Azure Cosmos DB NoSQL Semantic Cache\n+\n+> The Semantic Cache feature is supported with Azure Cosmos DB for NoSQL integration, enabling users to retrieve cached responses based on semantic similarity between the user input and previously cached results. It leverages AzureCosmosDBNoSQLVectorStore, which stores vector embeddings of cached prompts. These embeddings enable similarity-based searches, allowing the system to retrieve relevant cached results.", + "comment_created_at": "2024-10-28T09:23:41+00:00", + "comment_author": "sinedied", + "comment_body": "nit: a link to AzureCosmosDBNoSQLVectorStore doc would be nice here", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1790893773", + "pr_number": 6916, + "pr_file": "docs/core_docs/docs/integrations/llms/ibm.mdx", + "created_at": "2024-10-07T21:23:10+00:00", + "commented_code": "# @langchain/community/llm/ibm", + "repo_full_name": "langchain-ai/langchainjs", + "discussion_comments": [ + { + "comment_id": "1790893773", + "repo_full_name": "langchain-ai/langchainjs", + "pr_number": 6916, + "pr_file": "docs/core_docs/docs/integrations/llms/ibm.mdx", + "discussion_id": "1790893773", + "commented_code": "@@ -0,0 +1,166 @@\n+# @langchain/community/llm/ibm", + "comment_created_at": "2024-10-07T21:23:10+00:00", + "comment_author": "jacoblee93", + "comment_body": "Please make the docs pages follow this format:\r\n\r\nhttps://github.com/langchain-ai/langchainjs/blob/main/libs/langchain-scripts/src/cli/docs/templates/llms.ipynb", + "pr_file_module": null + }, + { + "comment_id": "1792028645", + "repo_full_name": "langchain-ai/langchainjs", + "pr_number": 6916, + "pr_file": "docs/core_docs/docs/integrations/llms/ibm.mdx", + "discussion_id": "1790893773", + "commented_code": "@@ -0,0 +1,166 @@\n+# @langchain/community/llm/ibm", + "comment_created_at": "2024-10-08T14:48:40+00:00", + "comment_author": "FilipZmijewski", + "comment_body": "Done", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1788321017", + "pr_number": 6904, + "pr_file": "docs/core_docs/docs/integrations/vectorstores/libsql.mdx", + "created_at": "2024-10-04T21:03:58+00:00", + "commented_code": "# libSQL", + "repo_full_name": "langchain-ai/langchainjs", + "discussion_comments": [ + { + "comment_id": "1788321017", + "repo_full_name": "langchain-ai/langchainjs", + "pr_number": 6904, + "pr_file": "docs/core_docs/docs/integrations/vectorstores/libsql.mdx", + "discussion_id": "1788321017", + "commented_code": "@@ -0,0 +1,112 @@\n+# libSQL", + "comment_created_at": "2024-10-04T21:03:58+00:00", + "comment_author": "jacoblee93", + "comment_body": "Can we make the docs follow this template?\r\n\r\nhttps://github.com/langchain-ai/langchainjs/blob/main/libs/langchain-scripts/src/cli/docs/templates/vectorstores.ipynb\r\n\r\nI need to update the contributing instructions...", + "pr_file_module": null + }, + { + "comment_id": "1789964364", + "repo_full_name": "langchain-ai/langchainjs", + "pr_number": 6904, + "pr_file": "docs/core_docs/docs/integrations/vectorstores/libsql.mdx", + "discussion_id": "1788321017", + "commented_code": "@@ -0,0 +1,112 @@\n+# libSQL", + "comment_created_at": "2024-10-07T10:35:37+00:00", + "comment_author": "notrab", + "comment_body": "@jacoblee93 I updated the docs to follow more closely your example, however if you have time, please suggest or commit any changes you desire.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1375272196", + "pr_number": 3065, + "pr_file": "docs/docs/modules/chains/popular/structured_output.mdx", + "created_at": "2023-10-28T15:33:49+00:00", + "commented_code": "{FormatExample}\n\n# With chat history memory\n\nIn this example we'll construct a simple chain which uses OpenAI functions, structured output parser and chat history memory.\nWe'll provide the chain with two functions:\n\n- `food_recorder` -> A function to record what food a user likes.\n- `final_response` -> The function that returns a response to the user\n\nAfter each LLM response, we'll take our structured data and save it to memory (with a little formatting too). By doing this, we're enabling our LLM to\nremember what the user has said and use that information to generate a response.\n\nThe first step is to create our functions. For this we'll use `zod` to create the function schemas.\n\nWe're going to create two, one which returns an array of foods with their name, whether or not they're healthy and their color.\nThe second simply returns an object containing the final response.\n\n```typescript\nconst foodZodSchema = z.object({\n foods: z\n .array(\n z.object({\n name: z.string().describe(\"The name of the food item\"),\n healthy: z.boolean().describe(\"Whether the food is good for you\"),\n color: z.string().optional().describe(\"The color of the food\"),\n })\n )\n .describe(\"An array of food items mentioned in the text\"),\n});\n\nconst finalResponseZodSchema = z.object({\n finalResponse: z.string().describe(\"The final response\"),\n});\n```\n\nNext we'll construct the prompt. We have two \"messages\", one from the AI with a simple explanation on when to use the functions, and an input variable for the chat history. The second only contains the users question.\n\n```typescript\nconst prompt = ChatPromptTemplate.fromMessages([\n [\n \"ai\",\n `You are a helpful assistant.\nYou are provided with two functions: one to use to record what food the user likes, and one to use when replying to user questions.\nChat history: {history}`,\n ],\n [\"human\", \"Question: {input}\"],\n]);\n```\n\nThen, define the LLM model and bind the function arguments to it. By using `.bind()` we're adding the functions to the model for every call.\n\nWe also must wrap our `zod` schemas with `zodToJsonSchema` to appropriately format them for OpenAI.\n\n```typescript\nconst model = new ChatOpenAI({\n temperature: 0,\n}).bind({\n functions: [\n {\n name: \"food_recorder\",\n description: \"A function to record what food a user likes\",\n parameters: zodToJsonSchema(foodZodSchema),\n },\n {\n name: \"final_response\",\n description: \"The function that returns a response to the user\",\n parameters: zodToJsonSchema(finalResponseZodSchema),\n },\n ],\n});\n```\n\nNext we can define our memory using the `BufferMemory` class, and the output parser for structured data: `JsonOutputFunctionsParser`.", + "repo_full_name": "langchain-ai/langchainjs", + "discussion_comments": [ + { + "comment_id": "1375272196", + "repo_full_name": "langchain-ai/langchainjs", + "pr_number": 3065, + "pr_file": "docs/docs/modules/chains/popular/structured_output.mdx", + "discussion_id": "1375272196", + "commented_code": "@@ -34,6 +34,167 @@ import FormatExample from \"@examples/chains/openai_functions_structured_format.t\n \n {FormatExample}\n \n+# With chat history memory\n+\n+In this example we'll construct a simple chain which uses OpenAI functions, structured output parser and chat history memory.\n+We'll provide the chain with two functions:\n+\n+- `food_recorder` -> A function to record what food a user likes.\n+- `final_response` -> The function that returns a response to the user\n+\n+After each LLM response, we'll take our structured data and save it to memory (with a little formatting too). By doing this, we're enabling our LLM to\n+remember what the user has said and use that information to generate a response.\n+\n+The first step is to create our functions. For this we'll use `zod` to create the function schemas.\n+\n+We're going to create two, one which returns an array of foods with their name, whether or not they're healthy and their color.\n+The second simply returns an object containing the final response.\n+\n+```typescript\n+const foodZodSchema = z.object({\n+ foods: z\n+ .array(\n+ z.object({\n+ name: z.string().describe(\"The name of the food item\"),\n+ healthy: z.boolean().describe(\"Whether the food is good for you\"),\n+ color: z.string().optional().describe(\"The color of the food\"),\n+ })\n+ )\n+ .describe(\"An array of food items mentioned in the text\"),\n+});\n+\n+const finalResponseZodSchema = z.object({\n+ finalResponse: z.string().describe(\"The final response\"),\n+});\n+```\n+\n+Next we'll construct the prompt. We have two \"messages\", one from the AI with a simple explanation on when to use the functions, and an input variable for the chat history. The second only contains the users question.\n+\n+```typescript\n+const prompt = ChatPromptTemplate.fromMessages([\n+ [\n+ \"ai\",\n+ `You are a helpful assistant.\n+You are provided with two functions: one to use to record what food the user likes, and one to use when replying to user questions.\n+Chat history: {history}`,\n+ ],\n+ [\"human\", \"Question: {input}\"],\n+]);\n+```\n+\n+Then, define the LLM model and bind the function arguments to it. By using `.bind()` we're adding the functions to the model for every call.\n+\n+We also must wrap our `zod` schemas with `zodToJsonSchema` to appropriately format them for OpenAI.\n+\n+```typescript\n+const model = new ChatOpenAI({\n+ temperature: 0,\n+}).bind({\n+ functions: [\n+ {\n+ name: \"food_recorder\",\n+ description: \"A function to record what food a user likes\",\n+ parameters: zodToJsonSchema(foodZodSchema),\n+ },\n+ {\n+ name: \"final_response\",\n+ description: \"The function that returns a response to the user\",\n+ parameters: zodToJsonSchema(finalResponseZodSchema),\n+ },\n+ ],\n+});\n+```\n+\n+Next we can define our memory using the `BufferMemory` class, and the output parser for structured data: `JsonOutputFunctionsParser`.", + "comment_created_at": "2023-10-28T15:33:49+00:00", + "comment_author": "jacoblee93", + "comment_body": "We should definitely add docs for `JsonOutputFunctionsParser`", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1705941918", + "pr_number": 6414, + "pr_file": "docs/core_docs/docs/integrations/vectorstores/memory.ipynb", + "created_at": "2024-08-06T18:18:05+00:00", + "commented_code": "{\n \"cells\": [\n {\n \"cell_type\": \"raw\",\n \"id\": \"1957f5cb\",\n \"metadata\": {\n \"vscode\": {\n \"languageId\": \"raw\"\n }\n },\n \"source\": [\n \"---\\n\",\n \"sidebar_label: In Memory\\n\",\n \"---\"\n ]\n },\n {\n \"cell_type\": \"markdown\",\n \"id\": \"ef1f0986\",\n \"metadata\": {},\n \"source\": [\n \"# MemoryVectorStore\\n\",\n \"\\n\",\n \"MemoryVectorStore is an in-memory, ephemeral vectorstore that stores embeddings in-memory and does an exact, linear search for the most similar embeddings. The default similarity metric is cosine similarity, but can be changed to any of the similarity metrics supported by [ml-distance](https://mljs.github.io/distance/modules/similarity.html).\\n\"\n ]", + "repo_full_name": "langchain-ai/langchainjs", + "discussion_comments": [ + { + "comment_id": "1705941918", + "repo_full_name": "langchain-ai/langchainjs", + "pr_number": 6414, + "pr_file": "docs/core_docs/docs/integrations/vectorstores/memory.ipynb", + "discussion_id": "1705941918", + "commented_code": "@@ -0,0 +1,382 @@\n+{\n+ \"cells\": [\n+ {\n+ \"cell_type\": \"raw\",\n+ \"id\": \"1957f5cb\",\n+ \"metadata\": {\n+ \"vscode\": {\n+ \"languageId\": \"raw\"\n+ }\n+ },\n+ \"source\": [\n+ \"---\\n\",\n+ \"sidebar_label: In Memory\\n\",\n+ \"---\"\n+ ]\n+ },\n+ {\n+ \"cell_type\": \"markdown\",\n+ \"id\": \"ef1f0986\",\n+ \"metadata\": {},\n+ \"source\": [\n+ \"# MemoryVectorStore\\n\",\n+ \"\\n\",\n+ \"MemoryVectorStore is an in-memory, ephemeral vectorstore that stores embeddings in-memory and does an exact, linear search for the most similar embeddings. The default similarity metric is cosine similarity, but can be changed to any of the similarity metrics supported by [ml-distance](https://mljs.github.io/distance/modules/similarity.html).\\n\"\n+ ]", + "comment_created_at": "2024-08-06T18:18:05+00:00", + "comment_author": "jacoblee93", + "comment_body": "Should we have the `## Overview` boilerplate?", + "pr_file_module": null + }, + { + "comment_id": "1705942925", + "repo_full_name": "langchain-ai/langchainjs", + "pr_number": 6414, + "pr_file": "docs/core_docs/docs/integrations/vectorstores/memory.ipynb", + "discussion_id": "1705941918", + "commented_code": "@@ -0,0 +1,382 @@\n+{\n+ \"cells\": [\n+ {\n+ \"cell_type\": \"raw\",\n+ \"id\": \"1957f5cb\",\n+ \"metadata\": {\n+ \"vscode\": {\n+ \"languageId\": \"raw\"\n+ }\n+ },\n+ \"source\": [\n+ \"---\\n\",\n+ \"sidebar_label: In Memory\\n\",\n+ \"---\"\n+ ]\n+ },\n+ {\n+ \"cell_type\": \"markdown\",\n+ \"id\": \"ef1f0986\",\n+ \"metadata\": {},\n+ \"source\": [\n+ \"# MemoryVectorStore\\n\",\n+ \"\\n\",\n+ \"MemoryVectorStore is an in-memory, ephemeral vectorstore that stores embeddings in-memory and does an exact, linear search for the most similar embeddings. The default similarity metric is cosine similarity, but can be changed to any of the similarity metrics supported by [ml-distance](https://mljs.github.io/distance/modules/similarity.html).\\n\"\n+ ]", + "comment_created_at": "2024-08-06T18:19:01+00:00", + "comment_author": "jacoblee93", + "comment_body": "Should at least link to vector store conceptual docs and details table", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/langchainjs-follow-documentation-standards.md b/_reviewers/langchainjs-follow-documentation-standards.md index 6cde009..0af2717 100644 --- a/_reviewers/langchainjs-follow-documentation-standards.md +++ b/_reviewers/langchainjs-follow-documentation-standards.md @@ -27,153 +27,3 @@ When a component is mentioned in documentation text, include a hyperlink to its ``` Complete documentation should exist for all public APIs and referenced components. If documentation for a referenced component is missing (like `JsonOutputFunctionsParser`), create it before finalizing the referencing documentation. - - -[ - { - "discussion_id": "1818683829", - "pr_number": 7033, - "pr_file": "docs/core_docs/docs/integrations/semantic_caches/azure_cosmosdb_nosql.mdx", - "created_at": "2024-10-28T09:23:41+00:00", - "commented_code": "# Azure Cosmos DB NoSQL Semantic Cache\n\n> The Semantic Cache feature is supported with Azure Cosmos DB for NoSQL integration, enabling users to retrieve cached responses based on semantic similarity between the user input and previously cached results. It leverages AzureCosmosDBNoSQLVectorStore, which stores vector embeddings of cached prompts. These embeddings enable similarity-based searches, allowing the system to retrieve relevant cached results.", - "repo_full_name": "langchain-ai/langchainjs", - "discussion_comments": [ - { - "comment_id": "1818683829", - "repo_full_name": "langchain-ai/langchainjs", - "pr_number": 7033, - "pr_file": "docs/core_docs/docs/integrations/semantic_caches/azure_cosmosdb_nosql.mdx", - "discussion_id": "1818683829", - "commented_code": "@@ -0,0 +1,40 @@\n+# Azure Cosmos DB NoSQL Semantic Cache\n+\n+> The Semantic Cache feature is supported with Azure Cosmos DB for NoSQL integration, enabling users to retrieve cached responses based on semantic similarity between the user input and previously cached results. It leverages AzureCosmosDBNoSQLVectorStore, which stores vector embeddings of cached prompts. These embeddings enable similarity-based searches, allowing the system to retrieve relevant cached results.", - "comment_created_at": "2024-10-28T09:23:41+00:00", - "comment_author": "sinedied", - "comment_body": "nit: a link to AzureCosmosDBNoSQLVectorStore doc would be nice here", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1790893773", - "pr_number": 6916, - "pr_file": "docs/core_docs/docs/integrations/llms/ibm.mdx", - "created_at": "2024-10-07T21:23:10+00:00", - "commented_code": "# @langchain/community/llm/ibm", - "repo_full_name": "langchain-ai/langchainjs", - "discussion_comments": [ - { - "comment_id": "1790893773", - "repo_full_name": "langchain-ai/langchainjs", - "pr_number": 6916, - "pr_file": "docs/core_docs/docs/integrations/llms/ibm.mdx", - "discussion_id": "1790893773", - "commented_code": "@@ -0,0 +1,166 @@\n+# @langchain/community/llm/ibm", - "comment_created_at": "2024-10-07T21:23:10+00:00", - "comment_author": "jacoblee93", - "comment_body": "Please make the docs pages follow this format:\r\n\r\nhttps://github.com/langchain-ai/langchainjs/blob/main/libs/langchain-scripts/src/cli/docs/templates/llms.ipynb", - "pr_file_module": null - }, - { - "comment_id": "1792028645", - "repo_full_name": "langchain-ai/langchainjs", - "pr_number": 6916, - "pr_file": "docs/core_docs/docs/integrations/llms/ibm.mdx", - "discussion_id": "1790893773", - "commented_code": "@@ -0,0 +1,166 @@\n+# @langchain/community/llm/ibm", - "comment_created_at": "2024-10-08T14:48:40+00:00", - "comment_author": "FilipZmijewski", - "comment_body": "Done", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1788321017", - "pr_number": 6904, - "pr_file": "docs/core_docs/docs/integrations/vectorstores/libsql.mdx", - "created_at": "2024-10-04T21:03:58+00:00", - "commented_code": "# libSQL", - "repo_full_name": "langchain-ai/langchainjs", - "discussion_comments": [ - { - "comment_id": "1788321017", - "repo_full_name": "langchain-ai/langchainjs", - "pr_number": 6904, - "pr_file": "docs/core_docs/docs/integrations/vectorstores/libsql.mdx", - "discussion_id": "1788321017", - "commented_code": "@@ -0,0 +1,112 @@\n+# libSQL", - "comment_created_at": "2024-10-04T21:03:58+00:00", - "comment_author": "jacoblee93", - "comment_body": "Can we make the docs follow this template?\r\n\r\nhttps://github.com/langchain-ai/langchainjs/blob/main/libs/langchain-scripts/src/cli/docs/templates/vectorstores.ipynb\r\n\r\nI need to update the contributing instructions...", - "pr_file_module": null - }, - { - "comment_id": "1789964364", - "repo_full_name": "langchain-ai/langchainjs", - "pr_number": 6904, - "pr_file": "docs/core_docs/docs/integrations/vectorstores/libsql.mdx", - "discussion_id": "1788321017", - "commented_code": "@@ -0,0 +1,112 @@\n+# libSQL", - "comment_created_at": "2024-10-07T10:35:37+00:00", - "comment_author": "notrab", - "comment_body": "@jacoblee93 I updated the docs to follow more closely your example, however if you have time, please suggest or commit any changes you desire.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1375272196", - "pr_number": 3065, - "pr_file": "docs/docs/modules/chains/popular/structured_output.mdx", - "created_at": "2023-10-28T15:33:49+00:00", - "commented_code": "{FormatExample}\n\n# With chat history memory\n\nIn this example we'll construct a simple chain which uses OpenAI functions, structured output parser and chat history memory.\nWe'll provide the chain with two functions:\n\n- `food_recorder` -> A function to record what food a user likes.\n- `final_response` -> The function that returns a response to the user\n\nAfter each LLM response, we'll take our structured data and save it to memory (with a little formatting too). By doing this, we're enabling our LLM to\nremember what the user has said and use that information to generate a response.\n\nThe first step is to create our functions. For this we'll use `zod` to create the function schemas.\n\nWe're going to create two, one which returns an array of foods with their name, whether or not they're healthy and their color.\nThe second simply returns an object containing the final response.\n\n```typescript\nconst foodZodSchema = z.object({\n foods: z\n .array(\n z.object({\n name: z.string().describe(\"The name of the food item\"),\n healthy: z.boolean().describe(\"Whether the food is good for you\"),\n color: z.string().optional().describe(\"The color of the food\"),\n })\n )\n .describe(\"An array of food items mentioned in the text\"),\n});\n\nconst finalResponseZodSchema = z.object({\n finalResponse: z.string().describe(\"The final response\"),\n});\n```\n\nNext we'll construct the prompt. We have two \"messages\", one from the AI with a simple explanation on when to use the functions, and an input variable for the chat history. The second only contains the users question.\n\n```typescript\nconst prompt = ChatPromptTemplate.fromMessages([\n [\n \"ai\",\n `You are a helpful assistant.\nYou are provided with two functions: one to use to record what food the user likes, and one to use when replying to user questions.\nChat history: {history}`,\n ],\n [\"human\", \"Question: {input}\"],\n]);\n```\n\nThen, define the LLM model and bind the function arguments to it. By using `.bind()` we're adding the functions to the model for every call.\n\nWe also must wrap our `zod` schemas with `zodToJsonSchema` to appropriately format them for OpenAI.\n\n```typescript\nconst model = new ChatOpenAI({\n temperature: 0,\n}).bind({\n functions: [\n {\n name: \"food_recorder\",\n description: \"A function to record what food a user likes\",\n parameters: zodToJsonSchema(foodZodSchema),\n },\n {\n name: \"final_response\",\n description: \"The function that returns a response to the user\",\n parameters: zodToJsonSchema(finalResponseZodSchema),\n },\n ],\n});\n```\n\nNext we can define our memory using the `BufferMemory` class, and the output parser for structured data: `JsonOutputFunctionsParser`.", - "repo_full_name": "langchain-ai/langchainjs", - "discussion_comments": [ - { - "comment_id": "1375272196", - "repo_full_name": "langchain-ai/langchainjs", - "pr_number": 3065, - "pr_file": "docs/docs/modules/chains/popular/structured_output.mdx", - "discussion_id": "1375272196", - "commented_code": "@@ -34,6 +34,167 @@ import FormatExample from \"@examples/chains/openai_functions_structured_format.t\n \n {FormatExample}\n \n+# With chat history memory\n+\n+In this example we'll construct a simple chain which uses OpenAI functions, structured output parser and chat history memory.\n+We'll provide the chain with two functions:\n+\n+- `food_recorder` -> A function to record what food a user likes.\n+- `final_response` -> The function that returns a response to the user\n+\n+After each LLM response, we'll take our structured data and save it to memory (with a little formatting too). By doing this, we're enabling our LLM to\n+remember what the user has said and use that information to generate a response.\n+\n+The first step is to create our functions. For this we'll use `zod` to create the function schemas.\n+\n+We're going to create two, one which returns an array of foods with their name, whether or not they're healthy and their color.\n+The second simply returns an object containing the final response.\n+\n+```typescript\n+const foodZodSchema = z.object({\n+ foods: z\n+ .array(\n+ z.object({\n+ name: z.string().describe(\"The name of the food item\"),\n+ healthy: z.boolean().describe(\"Whether the food is good for you\"),\n+ color: z.string().optional().describe(\"The color of the food\"),\n+ })\n+ )\n+ .describe(\"An array of food items mentioned in the text\"),\n+});\n+\n+const finalResponseZodSchema = z.object({\n+ finalResponse: z.string().describe(\"The final response\"),\n+});\n+```\n+\n+Next we'll construct the prompt. We have two \"messages\", one from the AI with a simple explanation on when to use the functions, and an input variable for the chat history. The second only contains the users question.\n+\n+```typescript\n+const prompt = ChatPromptTemplate.fromMessages([\n+ [\n+ \"ai\",\n+ `You are a helpful assistant.\n+You are provided with two functions: one to use to record what food the user likes, and one to use when replying to user questions.\n+Chat history: {history}`,\n+ ],\n+ [\"human\", \"Question: {input}\"],\n+]);\n+```\n+\n+Then, define the LLM model and bind the function arguments to it. By using `.bind()` we're adding the functions to the model for every call.\n+\n+We also must wrap our `zod` schemas with `zodToJsonSchema` to appropriately format them for OpenAI.\n+\n+```typescript\n+const model = new ChatOpenAI({\n+ temperature: 0,\n+}).bind({\n+ functions: [\n+ {\n+ name: \"food_recorder\",\n+ description: \"A function to record what food a user likes\",\n+ parameters: zodToJsonSchema(foodZodSchema),\n+ },\n+ {\n+ name: \"final_response\",\n+ description: \"The function that returns a response to the user\",\n+ parameters: zodToJsonSchema(finalResponseZodSchema),\n+ },\n+ ],\n+});\n+```\n+\n+Next we can define our memory using the `BufferMemory` class, and the output parser for structured data: `JsonOutputFunctionsParser`.", - "comment_created_at": "2023-10-28T15:33:49+00:00", - "comment_author": "jacoblee93", - "comment_body": "We should definitely add docs for `JsonOutputFunctionsParser`", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1705941918", - "pr_number": 6414, - "pr_file": "docs/core_docs/docs/integrations/vectorstores/memory.ipynb", - "created_at": "2024-08-06T18:18:05+00:00", - "commented_code": "{\n \"cells\": [\n {\n \"cell_type\": \"raw\",\n \"id\": \"1957f5cb\",\n \"metadata\": {\n \"vscode\": {\n \"languageId\": \"raw\"\n }\n },\n \"source\": [\n \"---\\n\",\n \"sidebar_label: In Memory\\n\",\n \"---\"\n ]\n },\n {\n \"cell_type\": \"markdown\",\n \"id\": \"ef1f0986\",\n \"metadata\": {},\n \"source\": [\n \"# MemoryVectorStore\\n\",\n \"\\n\",\n \"MemoryVectorStore is an in-memory, ephemeral vectorstore that stores embeddings in-memory and does an exact, linear search for the most similar embeddings. The default similarity metric is cosine similarity, but can be changed to any of the similarity metrics supported by [ml-distance](https://mljs.github.io/distance/modules/similarity.html).\\n\"\n ]", - "repo_full_name": "langchain-ai/langchainjs", - "discussion_comments": [ - { - "comment_id": "1705941918", - "repo_full_name": "langchain-ai/langchainjs", - "pr_number": 6414, - "pr_file": "docs/core_docs/docs/integrations/vectorstores/memory.ipynb", - "discussion_id": "1705941918", - "commented_code": "@@ -0,0 +1,382 @@\n+{\n+ \"cells\": [\n+ {\n+ \"cell_type\": \"raw\",\n+ \"id\": \"1957f5cb\",\n+ \"metadata\": {\n+ \"vscode\": {\n+ \"languageId\": \"raw\"\n+ }\n+ },\n+ \"source\": [\n+ \"---\\n\",\n+ \"sidebar_label: In Memory\\n\",\n+ \"---\"\n+ ]\n+ },\n+ {\n+ \"cell_type\": \"markdown\",\n+ \"id\": \"ef1f0986\",\n+ \"metadata\": {},\n+ \"source\": [\n+ \"# MemoryVectorStore\\n\",\n+ \"\\n\",\n+ \"MemoryVectorStore is an in-memory, ephemeral vectorstore that stores embeddings in-memory and does an exact, linear search for the most similar embeddings. The default similarity metric is cosine similarity, but can be changed to any of the similarity metrics supported by [ml-distance](https://mljs.github.io/distance/modules/similarity.html).\\n\"\n+ ]", - "comment_created_at": "2024-08-06T18:18:05+00:00", - "comment_author": "jacoblee93", - "comment_body": "Should we have the `## Overview` boilerplate?", - "pr_file_module": null - }, - { - "comment_id": "1705942925", - "repo_full_name": "langchain-ai/langchainjs", - "pr_number": 6414, - "pr_file": "docs/core_docs/docs/integrations/vectorstores/memory.ipynb", - "discussion_id": "1705941918", - "commented_code": "@@ -0,0 +1,382 @@\n+{\n+ \"cells\": [\n+ {\n+ \"cell_type\": \"raw\",\n+ \"id\": \"1957f5cb\",\n+ \"metadata\": {\n+ \"vscode\": {\n+ \"languageId\": \"raw\"\n+ }\n+ },\n+ \"source\": [\n+ \"---\\n\",\n+ \"sidebar_label: In Memory\\n\",\n+ \"---\"\n+ ]\n+ },\n+ {\n+ \"cell_type\": \"markdown\",\n+ \"id\": \"ef1f0986\",\n+ \"metadata\": {},\n+ \"source\": [\n+ \"# MemoryVectorStore\\n\",\n+ \"\\n\",\n+ \"MemoryVectorStore is an in-memory, ephemeral vectorstore that stores embeddings in-memory and does an exact, linear search for the most similar embeddings. The default similarity metric is cosine similarity, but can be changed to any of the similarity metrics supported by [ml-distance](https://mljs.github.io/distance/modules/similarity.html).\\n\"\n+ ]", - "comment_created_at": "2024-08-06T18:19:01+00:00", - "comment_author": "jacoblee93", - "comment_body": "Should at least link to vector store conceptual docs and details table", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/langchainjs-follow-standard-naming.json b/_reviewers/langchainjs-follow-standard-naming.json new file mode 100644 index 0000000..6f56be2 --- /dev/null +++ b/_reviewers/langchainjs-follow-standard-naming.json @@ -0,0 +1,94 @@ +[ + { + "discussion_id": "2169419919", + "pr_number": 8416, + "pr_file": "internal/net-mocks/README.md", + "created_at": "2025-06-26T16:10:00+00:00", + "commented_code": "# @langchain/net-mocks\n\nThis is an internal utility used within LangChain to record & mock network activity for use in tests. Here's how it works:\n\n1. **Record:** When running tests for the first time, `net-mocks` intercepts outgoing HTTP requests and records them, along with the corresponding responses, into a `.har` file. These files are stored in a `__data__` directory (by default) alongside the tests.", + "repo_full_name": "langchain-ai/langchainjs", + "discussion_comments": [ + { + "comment_id": "2169419919", + "repo_full_name": "langchain-ai/langchainjs", + "pr_number": 8416, + "pr_file": "internal/net-mocks/README.md", + "discussion_id": "2169419919", + "commented_code": "@@ -0,0 +1,107 @@\n+# @langchain/net-mocks\n+\n+This is an internal utility used within LangChain to record & mock network activity for use in tests. Here's how it works:\n+\n+1. **Record:** When running tests for the first time, `net-mocks` intercepts outgoing HTTP requests and records them, along with the corresponding responses, into a `.har` file. These files are stored in a `__data__` directory (by default) alongside the tests.", + "comment_created_at": "2025-06-26T16:10:00+00:00", + "comment_author": "dqbd", + "comment_body": "_nit_: should we follow the `__snapshots__` convention instead? ", + "pr_file_module": null + }, + { + "comment_id": "2169436990", + "repo_full_name": "langchain-ai/langchainjs", + "pr_number": 8416, + "pr_file": "internal/net-mocks/README.md", + "discussion_id": "2169419919", + "commented_code": "@@ -0,0 +1,107 @@\n+# @langchain/net-mocks\n+\n+This is an internal utility used within LangChain to record & mock network activity for use in tests. Here's how it works:\n+\n+1. **Record:** When running tests for the first time, `net-mocks` intercepts outgoing HTTP requests and records them, along with the corresponding responses, into a `.har` file. These files are stored in a `__data__` directory (by default) alongside the tests.", + "comment_created_at": "2025-06-26T16:20:09+00:00", + "comment_author": "hntrl", + "comment_body": "~~I called it `__data__` since that's what we call it in the few other places where we have mocking in langchain today (google packages are probably the best example). I won't die on this hill if you think this is better though~~\r\n\r\nnvm I like snapshots more", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2099212550", + "pr_number": 8007, + "pr_file": "cookbook/oraclevs.md", + "created_at": "2025-05-21T03:06:47+00:00", + "commented_code": "# Oracle AI Vector Search with LangchainJS Integration", + "repo_full_name": "langchain-ai/langchainjs", + "discussion_comments": [ + { + "comment_id": "2099212550", + "repo_full_name": "langchain-ai/langchainjs", + "pr_number": 8007, + "pr_file": "cookbook/oraclevs.md", + "discussion_id": "2099212550", + "commented_code": "@@ -0,0 +1,273 @@\n+# Oracle AI Vector Search with LangchainJS Integration", + "comment_created_at": "2025-05-21T03:06:47+00:00", + "comment_author": "cjbj", + "comment_body": "It would be nice to use the correct product name ", + "pr_file_module": null + }, + { + "comment_id": "2101482613", + "repo_full_name": "langchain-ai/langchainjs", + "pr_number": 8007, + "pr_file": "cookbook/oraclevs.md", + "discussion_id": "2099212550", + "commented_code": "@@ -0,0 +1,273 @@\n+# Oracle AI Vector Search with LangchainJS Integration", + "comment_created_at": "2025-05-22T02:09:37+00:00", + "comment_author": "hackerdave", + "comment_body": "Not sure if you are referring to Oracle AI Vector Search? This is how the feature is being referred to, e.g., https://www.oracle.com/database/ai-vector-search/ and https://docs.oracle.com/en/database/oracle/oracle-database/23/vecse/overview-ai-vector-search.html. However, did change other places where we referred to Oracle as Oracle Database.", + "pr_file_module": null + }, + { + "comment_id": "2101494767", + "repo_full_name": "langchain-ai/langchainjs", + "pr_number": 8007, + "pr_file": "cookbook/oraclevs.md", + "discussion_id": "2099212550", + "commented_code": "@@ -0,0 +1,273 @@\n+# Oracle AI Vector Search with LangchainJS Integration", + "comment_created_at": "2025-05-22T02:21:55+00:00", + "comment_author": "cjbj", + "comment_body": "What is \"LangchainJS\"? Did you mean LangChain or perhaps LangChain.js or something else?", + "pr_file_module": null + }, + { + "comment_id": "2102580670", + "repo_full_name": "langchain-ai/langchainjs", + "pr_number": 8007, + "pr_file": "cookbook/oraclevs.md", + "discussion_id": "2099212550", + "commented_code": "@@ -0,0 +1,273 @@\n+# Oracle AI Vector Search with LangchainJS Integration", + "comment_created_at": "2025-05-22T13:34:36+00:00", + "comment_author": "hackerdave", + "comment_body": "In this case LangChain.js. Changed all to proper case so they should be either LangChain or LangChain.js", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/langchainjs-follow-standard-naming.md b/_reviewers/langchainjs-follow-standard-naming.md index 6796be9..0d70e9a 100644 --- a/_reviewers/langchainjs-follow-standard-naming.md +++ b/_reviewers/langchainjs-follow-standard-naming.md @@ -35,99 +35,3 @@ For product and technology names, use proper capitalization and complete names: ``` Consistent naming improves readability, maintainability, and shows professionalism in your code and documentation. - - -[ - { - "discussion_id": "2169419919", - "pr_number": 8416, - "pr_file": "internal/net-mocks/README.md", - "created_at": "2025-06-26T16:10:00+00:00", - "commented_code": "# @langchain/net-mocks\n\nThis is an internal utility used within LangChain to record & mock network activity for use in tests. Here's how it works:\n\n1. **Record:** When running tests for the first time, `net-mocks` intercepts outgoing HTTP requests and records them, along with the corresponding responses, into a `.har` file. These files are stored in a `__data__` directory (by default) alongside the tests.", - "repo_full_name": "langchain-ai/langchainjs", - "discussion_comments": [ - { - "comment_id": "2169419919", - "repo_full_name": "langchain-ai/langchainjs", - "pr_number": 8416, - "pr_file": "internal/net-mocks/README.md", - "discussion_id": "2169419919", - "commented_code": "@@ -0,0 +1,107 @@\n+# @langchain/net-mocks\n+\n+This is an internal utility used within LangChain to record & mock network activity for use in tests. Here's how it works:\n+\n+1. **Record:** When running tests for the first time, `net-mocks` intercepts outgoing HTTP requests and records them, along with the corresponding responses, into a `.har` file. These files are stored in a `__data__` directory (by default) alongside the tests.", - "comment_created_at": "2025-06-26T16:10:00+00:00", - "comment_author": "dqbd", - "comment_body": "_nit_: should we follow the `__snapshots__` convention instead? ", - "pr_file_module": null - }, - { - "comment_id": "2169436990", - "repo_full_name": "langchain-ai/langchainjs", - "pr_number": 8416, - "pr_file": "internal/net-mocks/README.md", - "discussion_id": "2169419919", - "commented_code": "@@ -0,0 +1,107 @@\n+# @langchain/net-mocks\n+\n+This is an internal utility used within LangChain to record & mock network activity for use in tests. Here's how it works:\n+\n+1. **Record:** When running tests for the first time, `net-mocks` intercepts outgoing HTTP requests and records them, along with the corresponding responses, into a `.har` file. These files are stored in a `__data__` directory (by default) alongside the tests.", - "comment_created_at": "2025-06-26T16:20:09+00:00", - "comment_author": "hntrl", - "comment_body": "~~I called it `__data__` since that's what we call it in the few other places where we have mocking in langchain today (google packages are probably the best example). I won't die on this hill if you think this is better though~~\r\n\r\nnvm I like snapshots more", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2099212550", - "pr_number": 8007, - "pr_file": "cookbook/oraclevs.md", - "created_at": "2025-05-21T03:06:47+00:00", - "commented_code": "# Oracle AI Vector Search with LangchainJS Integration", - "repo_full_name": "langchain-ai/langchainjs", - "discussion_comments": [ - { - "comment_id": "2099212550", - "repo_full_name": "langchain-ai/langchainjs", - "pr_number": 8007, - "pr_file": "cookbook/oraclevs.md", - "discussion_id": "2099212550", - "commented_code": "@@ -0,0 +1,273 @@\n+# Oracle AI Vector Search with LangchainJS Integration", - "comment_created_at": "2025-05-21T03:06:47+00:00", - "comment_author": "cjbj", - "comment_body": "It would be nice to use the correct product name ", - "pr_file_module": null - }, - { - "comment_id": "2101482613", - "repo_full_name": "langchain-ai/langchainjs", - "pr_number": 8007, - "pr_file": "cookbook/oraclevs.md", - "discussion_id": "2099212550", - "commented_code": "@@ -0,0 +1,273 @@\n+# Oracle AI Vector Search with LangchainJS Integration", - "comment_created_at": "2025-05-22T02:09:37+00:00", - "comment_author": "hackerdave", - "comment_body": "Not sure if you are referring to Oracle AI Vector Search? This is how the feature is being referred to, e.g., https://www.oracle.com/database/ai-vector-search/ and https://docs.oracle.com/en/database/oracle/oracle-database/23/vecse/overview-ai-vector-search.html. However, did change other places where we referred to Oracle as Oracle Database.", - "pr_file_module": null - }, - { - "comment_id": "2101494767", - "repo_full_name": "langchain-ai/langchainjs", - "pr_number": 8007, - "pr_file": "cookbook/oraclevs.md", - "discussion_id": "2099212550", - "commented_code": "@@ -0,0 +1,273 @@\n+# Oracle AI Vector Search with LangchainJS Integration", - "comment_created_at": "2025-05-22T02:21:55+00:00", - "comment_author": "cjbj", - "comment_body": "What is \"LangchainJS\"? Did you mean LangChain or perhaps LangChain.js or something else?", - "pr_file_module": null - }, - { - "comment_id": "2102580670", - "repo_full_name": "langchain-ai/langchainjs", - "pr_number": 8007, - "pr_file": "cookbook/oraclevs.md", - "discussion_id": "2099212550", - "commented_code": "@@ -0,0 +1,273 @@\n+# Oracle AI Vector Search with LangchainJS Integration", - "comment_created_at": "2025-05-22T13:34:36+00:00", - "comment_author": "hackerdave", - "comment_body": "In this case LangChain.js. Changed all to proper case so they should be either LangChain or LangChain.js", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/langchainjs-match-configuration-documentation.json b/_reviewers/langchainjs-match-configuration-documentation.json new file mode 100644 index 0000000..7805fe4 --- /dev/null +++ b/_reviewers/langchainjs-match-configuration-documentation.json @@ -0,0 +1,58 @@ +[ + { + "discussion_id": "2103914077", + "pr_number": 8241, + "pr_file": "libs/langchain-mcp-adapters/README.md", + "created_at": "2025-05-23T06:42:58+00:00", + "commented_code": "| `prefixToolNameWithServerName` | boolean | `true` | If true, prefixes all tool names with the server name (e.g., `serverName__toolName`) |\n| `additionalToolNamePrefix` | string | `mcp` | Additional prefix to add to tool names (e.g., `prefix__serverName__toolName`) |\n\n## Tool Timeout Configuration\n\nMCP tools support timeout configuration through LangChain's standard `RunnableConfig` interface. This allows you to set custom timeouts on a per-tool-call basis:\n\n```typescript\nconst client = new MultiServerMCPClient({\n mcpServers: {\n 'data-processor': {\n command: 'python',\n args: ['data_server.py']\n }", + "repo_full_name": "langchain-ai/langchainjs", + "discussion_comments": [ + { + "comment_id": "2103914077", + "repo_full_name": "langchain-ai/langchainjs", + "pr_number": 8241, + "pr_file": "libs/langchain-mcp-adapters/README.md", + "discussion_id": "2103914077", + "commented_code": "@@ -224,6 +224,57 @@ When loading MCP tools either directly through `loadMcpTools` or via `MultiServe\n | `prefixToolNameWithServerName` | boolean | `true` | If true, prefixes all tool names with the server name (e.g., `serverName__toolName`) |\n | `additionalToolNamePrefix` | string | `mcp` | Additional prefix to add to tool names (e.g., `prefix__serverName__toolName`) |\n \n+## Tool Timeout Configuration\n+\n+MCP tools support timeout configuration through LangChain's standard `RunnableConfig` interface. This allows you to set custom timeouts on a per-tool-call basis:\n+\n+```typescript\n+const client = new MultiServerMCPClient({\n+ mcpServers: {\n+ 'data-processor': {\n+ command: 'python',\n+ args: ['data_server.py']\n+ }", + "comment_created_at": "2025-05-23T06:42:58+00:00", + "comment_author": "Copilot", + "comment_body": "The example uses a `mcpServers` property which doesn’t match the constructor signature shown elsewhere (tests/config expect direct server mappings). It should accept servers directly instead of nesting under `mcpServers`.\n```suggestion\n 'data-processor': {\n command: 'python',\n args: ['data_server.py']\n```", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2106472876", + "pr_number": 8252, + "pr_file": "libs/langchain-mcp-adapters/README.md", + "created_at": "2025-05-26T04:07:09+00:00", + "commented_code": "| `throwOnLoadError` | boolean | `true` | Whether to throw an error if a tool fails to load |\n| `prefixToolNameWithServerName` | boolean | `true` | If true, prefixes all tool names with the server name (e.g., `serverName__toolName`) |\n| `additionalToolNamePrefix` | string | `mcp` | Additional prefix to add to tool names (e.g., `prefix__serverName__toolName`) |\n| `useStandardContentBlocks` | boolean | `false` | If true, uses LangChain's standard multimodal content blocks. Defaults to false for backward compatibility; recommended to set true for new applications |", + "repo_full_name": "langchain-ai/langchainjs", + "discussion_comments": [ + { + "comment_id": "2106472876", + "repo_full_name": "langchain-ai/langchainjs", + "pr_number": 8252, + "pr_file": "libs/langchain-mcp-adapters/README.md", + "discussion_id": "2106472876", + "commented_code": "@@ -223,6 +223,7 @@ When loading MCP tools either directly through `loadMcpTools` or via `MultiServe\n | `throwOnLoadError` | boolean | `true` | Whether to throw an error if a tool fails to load |\n | `prefixToolNameWithServerName` | boolean | `true` | If true, prefixes all tool names with the server name (e.g., `serverName__toolName`) |\n | `additionalToolNamePrefix` | string | `mcp` | Additional prefix to add to tool names (e.g., `prefix__serverName__toolName`) |\n+| `useStandardContentBlocks` | boolean | `false` | If true, uses LangChain's standard multimodal content blocks. Defaults to false for backward compatibility; recommended to set true for new applications |", + "comment_created_at": "2025-05-26T04:07:09+00:00", + "comment_author": "Copilot", + "comment_body": "Consider emphasizing that audio content is always processed using standard content blocks regardless of the flag, to clarify the behavioral difference between audio and other content types.", + "pr_file_module": null + }, + { + "comment_id": "2106473673", + "repo_full_name": "langchain-ai/langchainjs", + "pr_number": 8252, + "pr_file": "libs/langchain-mcp-adapters/README.md", + "discussion_id": "2106472876", + "commented_code": "@@ -223,6 +223,7 @@ When loading MCP tools either directly through `loadMcpTools` or via `MultiServe\n | `throwOnLoadError` | boolean | `true` | Whether to throw an error if a tool fails to load |\n | `prefixToolNameWithServerName` | boolean | `true` | If true, prefixes all tool names with the server name (e.g., `serverName__toolName`) |\n | `additionalToolNamePrefix` | string | `mcp` | Additional prefix to add to tool names (e.g., `prefix__serverName__toolName`) |\n+| `useStandardContentBlocks` | boolean | `false` | If true, uses LangChain's standard multimodal content blocks. Defaults to false for backward compatibility; recommended to set true for new applications |", + "comment_created_at": "2025-05-26T04:08:19+00:00", + "comment_author": "benjamincburns", + "comment_body": "Covered elsewhere.", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/langchainjs-match-configuration-documentation.md b/_reviewers/langchainjs-match-configuration-documentation.md index d9d59d2..c4b528a 100644 --- a/_reviewers/langchainjs-match-configuration-documentation.md +++ b/_reviewers/langchainjs-match-configuration-documentation.md @@ -41,63 +41,3 @@ const client = new MultiServerMCPClient({ ``` When documenting configuration flags like `useStandardContentBlocks`, also include special case behavior (e.g., "audio content always uses standard content blocks regardless of this setting"). - - -[ - { - "discussion_id": "2103914077", - "pr_number": 8241, - "pr_file": "libs/langchain-mcp-adapters/README.md", - "created_at": "2025-05-23T06:42:58+00:00", - "commented_code": "| `prefixToolNameWithServerName` | boolean | `true` | If true, prefixes all tool names with the server name (e.g., `serverName__toolName`) |\n| `additionalToolNamePrefix` | string | `mcp` | Additional prefix to add to tool names (e.g., `prefix__serverName__toolName`) |\n\n## Tool Timeout Configuration\n\nMCP tools support timeout configuration through LangChain's standard `RunnableConfig` interface. This allows you to set custom timeouts on a per-tool-call basis:\n\n```typescript\nconst client = new MultiServerMCPClient({\n mcpServers: {\n 'data-processor': {\n command: 'python',\n args: ['data_server.py']\n }", - "repo_full_name": "langchain-ai/langchainjs", - "discussion_comments": [ - { - "comment_id": "2103914077", - "repo_full_name": "langchain-ai/langchainjs", - "pr_number": 8241, - "pr_file": "libs/langchain-mcp-adapters/README.md", - "discussion_id": "2103914077", - "commented_code": "@@ -224,6 +224,57 @@ When loading MCP tools either directly through `loadMcpTools` or via `MultiServe\n | `prefixToolNameWithServerName` | boolean | `true` | If true, prefixes all tool names with the server name (e.g., `serverName__toolName`) |\n | `additionalToolNamePrefix` | string | `mcp` | Additional prefix to add to tool names (e.g., `prefix__serverName__toolName`) |\n \n+## Tool Timeout Configuration\n+\n+MCP tools support timeout configuration through LangChain's standard `RunnableConfig` interface. This allows you to set custom timeouts on a per-tool-call basis:\n+\n+```typescript\n+const client = new MultiServerMCPClient({\n+ mcpServers: {\n+ 'data-processor': {\n+ command: 'python',\n+ args: ['data_server.py']\n+ }", - "comment_created_at": "2025-05-23T06:42:58+00:00", - "comment_author": "Copilot", - "comment_body": "The example uses a `mcpServers` property which doesn\u2019t match the constructor signature shown elsewhere (tests/config expect direct server mappings). It should accept servers directly instead of nesting under `mcpServers`.\n```suggestion\n 'data-processor': {\n command: 'python',\n args: ['data_server.py']\n```", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2106472876", - "pr_number": 8252, - "pr_file": "libs/langchain-mcp-adapters/README.md", - "created_at": "2025-05-26T04:07:09+00:00", - "commented_code": "| `throwOnLoadError` | boolean | `true` | Whether to throw an error if a tool fails to load |\n| `prefixToolNameWithServerName` | boolean | `true` | If true, prefixes all tool names with the server name (e.g., `serverName__toolName`) |\n| `additionalToolNamePrefix` | string | `mcp` | Additional prefix to add to tool names (e.g., `prefix__serverName__toolName`) |\n| `useStandardContentBlocks` | boolean | `false` | If true, uses LangChain's standard multimodal content blocks. Defaults to false for backward compatibility; recommended to set true for new applications |", - "repo_full_name": "langchain-ai/langchainjs", - "discussion_comments": [ - { - "comment_id": "2106472876", - "repo_full_name": "langchain-ai/langchainjs", - "pr_number": 8252, - "pr_file": "libs/langchain-mcp-adapters/README.md", - "discussion_id": "2106472876", - "commented_code": "@@ -223,6 +223,7 @@ When loading MCP tools either directly through `loadMcpTools` or via `MultiServe\n | `throwOnLoadError` | boolean | `true` | Whether to throw an error if a tool fails to load |\n | `prefixToolNameWithServerName` | boolean | `true` | If true, prefixes all tool names with the server name (e.g., `serverName__toolName`) |\n | `additionalToolNamePrefix` | string | `mcp` | Additional prefix to add to tool names (e.g., `prefix__serverName__toolName`) |\n+| `useStandardContentBlocks` | boolean | `false` | If true, uses LangChain's standard multimodal content blocks. Defaults to false for backward compatibility; recommended to set true for new applications |", - "comment_created_at": "2025-05-26T04:07:09+00:00", - "comment_author": "Copilot", - "comment_body": "Consider emphasizing that audio content is always processed using standard content blocks regardless of the flag, to clarify the behavioral difference between audio and other content types.", - "pr_file_module": null - }, - { - "comment_id": "2106473673", - "repo_full_name": "langchain-ai/langchainjs", - "pr_number": 8252, - "pr_file": "libs/langchain-mcp-adapters/README.md", - "discussion_id": "2106472876", - "commented_code": "@@ -223,6 +223,7 @@ When loading MCP tools either directly through `loadMcpTools` or via `MultiServe\n | `throwOnLoadError` | boolean | `true` | Whether to throw an error if a tool fails to load |\n | `prefixToolNameWithServerName` | boolean | `true` | If true, prefixes all tool names with the server name (e.g., `serverName__toolName`) |\n | `additionalToolNamePrefix` | string | `mcp` | Additional prefix to add to tool names (e.g., `prefix__serverName__toolName`) |\n+| `useStandardContentBlocks` | boolean | `false` | If true, uses LangChain's standard multimodal content blocks. Defaults to false for backward compatibility; recommended to set true for new applications |", - "comment_created_at": "2025-05-26T04:08:19+00:00", - "comment_author": "benjamincburns", - "comment_body": "Covered elsewhere.", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/langchainjs-platform-appropriate-environment-variables.json b/_reviewers/langchainjs-platform-appropriate-environment-variables.json new file mode 100644 index 0000000..c9dac64 --- /dev/null +++ b/_reviewers/langchainjs-platform-appropriate-environment-variables.json @@ -0,0 +1,58 @@ +[ + { + "discussion_id": "1653253310", + "pr_number": 5624, + "pr_file": "docs/core_docs/src/theme/ChatModelTabs.js", + "created_at": "2024-06-25T17:27:15+00:00", + "commented_code": "mistralParams: `{\\n model: \"mistral-large-latest\",\\n temperature: 0\\n}`,\n groqParams: `{\\n model: \"mixtral-8x7b-32768\",\\n temperature: 0\\n}`,\n vertexParams: `{\\n model: \"gemini-1.5-pro\",\\n temperature: 0\\n}`,\n azureParams: `{\\n azure_endpoint=os.environ[\"AZURE_OPENAI_ENDPOINT\"],\\n azure_deployment=os.environ[\"AZURE_OPENAI_DEPLOYMENT_NAME\"],\\n openai_api_version=os.environ[\"AZURE_OPENAI_API_VERSION\"]\\n}`,", + "repo_full_name": "langchain-ai/langchainjs", + "discussion_comments": [ + { + "comment_id": "1653253310", + "repo_full_name": "langchain-ai/langchainjs", + "pr_number": 5624, + "pr_file": "docs/core_docs/src/theme/ChatModelTabs.js", + "discussion_id": "1653253310", + "commented_code": "@@ -31,6 +31,7 @@ const DEFAULTS = {\n mistralParams: `{\\n model: \"mistral-large-latest\",\\n temperature: 0\\n}`,\n groqParams: `{\\n model: \"mixtral-8x7b-32768\",\\n temperature: 0\\n}`,\n vertexParams: `{\\n model: \"gemini-1.5-pro\",\\n temperature: 0\\n}`,\n+ azureParams: `{\\n azure_endpoint=os.environ[\"AZURE_OPENAI_ENDPOINT\"],\\n azure_deployment=os.environ[\"AZURE_OPENAI_DEPLOYMENT_NAME\"],\\n openai_api_version=os.environ[\"AZURE_OPENAI_API_VERSION\"]\\n}`,", + "comment_created_at": "2024-06-25T17:27:15+00:00", + "comment_author": "jacoblee93", + "comment_body": "JavaScript uses `process.env`", + "pr_file_module": null + }, + { + "comment_id": "2063486552", + "repo_full_name": "langchain-ai/langchainjs", + "pr_number": 5624, + "pr_file": "docs/core_docs/src/theme/ChatModelTabs.js", + "discussion_id": "1653253310", + "commented_code": "@@ -31,6 +31,7 @@ const DEFAULTS = {\n mistralParams: `{\\n model: \"mistral-large-latest\",\\n temperature: 0\\n}`,\n groqParams: `{\\n model: \"mixtral-8x7b-32768\",\\n temperature: 0\\n}`,\n vertexParams: `{\\n model: \"gemini-1.5-pro\",\\n temperature: 0\\n}`,\n+ azureParams: `{\\n azure_endpoint=os.environ[\"AZURE_OPENAI_ENDPOINT\"],\\n azure_deployment=os.environ[\"AZURE_OPENAI_DEPLOYMENT_NAME\"],\\n openai_api_version=os.environ[\"AZURE_OPENAI_API_VERSION\"]\\n}`,", + "comment_created_at": "2025-04-28T11:41:37+00:00", + "comment_author": "benjamincburns", + "comment_body": "```suggestion\r\n azureParams: `{\\n azure_endpoint=process.env.AZURE_OPENAI_ENDPOINT,\\n azure_deployment=process.env.AZURE_OPENAI_DEPLOYMENT_NAME,\\n openai_api_version=process.env.AZURE_OPENAI_API_VERSION\\n}`,\r\n```", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1438352901", + "pr_number": 3794, + "pr_file": "docs/core_docs/scripts/notebook_init_utils.js", + "created_at": "2023-12-29T17:46:14+00:00", + "commented_code": "/* eslint-disable */", + "repo_full_name": "langchain-ai/langchainjs", + "discussion_comments": [ + { + "comment_id": "1438352901", + "repo_full_name": "langchain-ai/langchainjs", + "pr_number": 3794, + "pr_file": "docs/core_docs/scripts/notebook_init_utils.js", + "discussion_id": "1438352901", + "commented_code": "@@ -0,0 +1,11 @@\n+/* eslint-disable */", + "comment_created_at": "2023-12-29T17:46:14+00:00", + "comment_author": "jacoblee93", + "comment_body": "Yeah let's cut these if we can't find an automatic way to import them. If we really need something Node specific we can just use `.mdx`\r\n\r\nI can also reach out to the Deno team to see if they have suggestions, we have a channel with them.", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/langchainjs-platform-appropriate-environment-variables.md b/_reviewers/langchainjs-platform-appropriate-environment-variables.md index 583aa1a..9ccf055 100644 --- a/_reviewers/langchainjs-platform-appropriate-environment-variables.md +++ b/_reviewers/langchainjs-platform-appropriate-environment-variables.md @@ -30,63 +30,3 @@ const config = { ``` For platform-specific configurations, consider using appropriate file formats (like .mdx for Node-specific code) or implement automatic import methods that work across environments to improve compatibility. - - -[ - { - "discussion_id": "1653253310", - "pr_number": 5624, - "pr_file": "docs/core_docs/src/theme/ChatModelTabs.js", - "created_at": "2024-06-25T17:27:15+00:00", - "commented_code": "mistralParams: `{\\n model: \"mistral-large-latest\",\\n temperature: 0\\n}`,\n groqParams: `{\\n model: \"mixtral-8x7b-32768\",\\n temperature: 0\\n}`,\n vertexParams: `{\\n model: \"gemini-1.5-pro\",\\n temperature: 0\\n}`,\n azureParams: `{\\n azure_endpoint=os.environ[\"AZURE_OPENAI_ENDPOINT\"],\\n azure_deployment=os.environ[\"AZURE_OPENAI_DEPLOYMENT_NAME\"],\\n openai_api_version=os.environ[\"AZURE_OPENAI_API_VERSION\"]\\n}`,", - "repo_full_name": "langchain-ai/langchainjs", - "discussion_comments": [ - { - "comment_id": "1653253310", - "repo_full_name": "langchain-ai/langchainjs", - "pr_number": 5624, - "pr_file": "docs/core_docs/src/theme/ChatModelTabs.js", - "discussion_id": "1653253310", - "commented_code": "@@ -31,6 +31,7 @@ const DEFAULTS = {\n mistralParams: `{\\n model: \"mistral-large-latest\",\\n temperature: 0\\n}`,\n groqParams: `{\\n model: \"mixtral-8x7b-32768\",\\n temperature: 0\\n}`,\n vertexParams: `{\\n model: \"gemini-1.5-pro\",\\n temperature: 0\\n}`,\n+ azureParams: `{\\n azure_endpoint=os.environ[\"AZURE_OPENAI_ENDPOINT\"],\\n azure_deployment=os.environ[\"AZURE_OPENAI_DEPLOYMENT_NAME\"],\\n openai_api_version=os.environ[\"AZURE_OPENAI_API_VERSION\"]\\n}`,", - "comment_created_at": "2024-06-25T17:27:15+00:00", - "comment_author": "jacoblee93", - "comment_body": "JavaScript uses `process.env`", - "pr_file_module": null - }, - { - "comment_id": "2063486552", - "repo_full_name": "langchain-ai/langchainjs", - "pr_number": 5624, - "pr_file": "docs/core_docs/src/theme/ChatModelTabs.js", - "discussion_id": "1653253310", - "commented_code": "@@ -31,6 +31,7 @@ const DEFAULTS = {\n mistralParams: `{\\n model: \"mistral-large-latest\",\\n temperature: 0\\n}`,\n groqParams: `{\\n model: \"mixtral-8x7b-32768\",\\n temperature: 0\\n}`,\n vertexParams: `{\\n model: \"gemini-1.5-pro\",\\n temperature: 0\\n}`,\n+ azureParams: `{\\n azure_endpoint=os.environ[\"AZURE_OPENAI_ENDPOINT\"],\\n azure_deployment=os.environ[\"AZURE_OPENAI_DEPLOYMENT_NAME\"],\\n openai_api_version=os.environ[\"AZURE_OPENAI_API_VERSION\"]\\n}`,", - "comment_created_at": "2025-04-28T11:41:37+00:00", - "comment_author": "benjamincburns", - "comment_body": "```suggestion\r\n azureParams: `{\\n azure_endpoint=process.env.AZURE_OPENAI_ENDPOINT,\\n azure_deployment=process.env.AZURE_OPENAI_DEPLOYMENT_NAME,\\n openai_api_version=process.env.AZURE_OPENAI_API_VERSION\\n}`,\r\n```", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1438352901", - "pr_number": 3794, - "pr_file": "docs/core_docs/scripts/notebook_init_utils.js", - "created_at": "2023-12-29T17:46:14+00:00", - "commented_code": "/* eslint-disable */", - "repo_full_name": "langchain-ai/langchainjs", - "discussion_comments": [ - { - "comment_id": "1438352901", - "repo_full_name": "langchain-ai/langchainjs", - "pr_number": 3794, - "pr_file": "docs/core_docs/scripts/notebook_init_utils.js", - "discussion_id": "1438352901", - "commented_code": "@@ -0,0 +1,11 @@\n+/* eslint-disable */", - "comment_created_at": "2023-12-29T17:46:14+00:00", - "comment_author": "jacoblee93", - "comment_body": "Yeah let's cut these if we can't find an automatic way to import them. If we really need something Node specific we can just use `.mdx`\r\n\r\nI can also reach out to the Deno team to see if they have suggestions, we have a channel with them.", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/langchainjs-prefer-nullish-coalescing.json b/_reviewers/langchainjs-prefer-nullish-coalescing.json new file mode 100644 index 0000000..9789264 --- /dev/null +++ b/_reviewers/langchainjs-prefer-nullish-coalescing.json @@ -0,0 +1,194 @@ +[ + { + "discussion_id": "2093795865", + "pr_number": 8141, + "pr_file": "libs/langchain-anthropic/src/utils/message_inputs.ts", + "created_at": "2025-05-17T00:38:48+00:00", + "commented_code": "content: [\n {\n type: \"tool_result\",\n content: _formatContent(message.content),\n // rare case: message.content could be undefined\n ...(message.content != null ? { content: _formatContent(message.content) } : {}),", + "repo_full_name": "langchain-ai/langchainjs", + "discussion_comments": [ + { + "comment_id": "2093795865", + "repo_full_name": "langchain-ai/langchainjs", + "pr_number": 8141, + "pr_file": "libs/langchain-anthropic/src/utils/message_inputs.ts", + "discussion_id": "2093795865", + "commented_code": "@@ -116,7 +116,8 @@ function _ensureMessageContents(\n content: [\n {\n type: \"tool_result\",\n- content: _formatContent(message.content),\n+ // rare case: message.content could be undefined\n+ ...(message.content != null ? { content: _formatContent(message.content) } : {}),", + "comment_created_at": "2025-05-17T00:38:48+00:00", + "comment_author": "benjamincburns", + "comment_body": "Slightly modified from the original submission. Anthropic's API reference shows `content` here as being an _optional_ (but not nullable) field of type `string | object[]`. The changes in `core` will make this case no longer possible, but this should handle it for older versions that weren't coalescing `undefined` tool output to `\"\"`.\r\n\r\n![image](https://github.com/user-attachments/assets/5e282e7d-5766-47b7-8d5b-39231a4fbc8c)", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1433347005", + "pr_number": 3448, + "pr_file": "langchain/src/chat_models/azure_ml.ts", + "created_at": "2023-12-21T01:37:48+00:00", + "commented_code": "import { SimpleChatModel, BaseChatModelParams } from \"./base.js\";\nimport { AzureMLHttpClient } from \"../llms/azure_ml.js\";\nimport { getEnvironmentVariable } from \"../util/env.js\";\nimport { BaseMessage } from \"../schema/index.js\";\n\nexport interface ChatContentFormatter {\n /**\n * Formats the request payload for the AzureML endpoint. It takes a\n * prompt and a dictionary of model arguments as input and returns a\n * string representing the formatted request payload.\n * @param messages A list of messages for the chat so far.\n * @param modelArgs A dictionary of model arguments.\n * @returns A string representing the formatted request payload.\n */\n formatRequestPayload: (\n messages: BaseMessage[],\n modelArgs: Record\n ) => string;\n /**\n * Formats the response payload from the AzureML endpoint. It takes a\n * response payload as input and returns a string representing the\n * formatted response.\n * @param responsePayload The response payload from the AzureML endpoint.\n * @returns A string representing the formatted response.\n */\n formatResponsePayload: (output: string) => string;\n}\n\nexport class LlamaContentFormatter implements ChatContentFormatter {\n _convertMessageToRecord(message: BaseMessage): Record {\n if (message._getType() === \"human\") {\n return { role: \"user\", content: message.content };\n } else if (message._getType() === \"ai\") {\n return { role: \"assistant\", content: message.content };\n } else {\n return { role: message._getType(), content: message.content };\n }\n }\n\n formatRequestPayload(\n messages: BaseMessage[],\n modelArgs: Record\n ): string {\n let msgs = messages.map((message) => {\n this._convertMessageToRecord(message);\n });\n return JSON.stringify({\n input_data: {\n input_string: msgs,\n parameters: modelArgs,\n },\n });\n }\n\n formatResponsePayload(responsePayload: string) {\n const response = JSON.parse(responsePayload);\n return response.output;\n }\n}\n\n/**\n * Type definition for the input parameters of the AzureMLChatOnlineEndpoint class.\n */\nexport interface AzureMLChatParams extends BaseChatModelParams {\n endpointUrl?: string;\n endpointApiKey?: string;\n modelArgs?: Record;\n contentFormatter?: ChatContentFormatter;\n}\n\n/**\n * Class that represents the chat model. It extends the SimpleChatModel class and implements the AzureMLChatInput interface.\n */\nexport class AzureMLChatOnlineEndpoint\n extends SimpleChatModel\n implements AzureMLChatParams\n{\n static lc_name() {\n return \"AzureMLChatOnlineEndpoint\";\n }\n static lc_description() {\n return \"A class for interacting with AzureML Chat models.\";\n }\n endpointUrl: string;\n endpointApiKey: string;\n modelArgs?: Record;\n contentFormatter: ChatContentFormatter;\n httpClient: AzureMLHttpClient;\n\n constructor(fields: AzureMLChatParams) {\n super(fields ?? {});\n if (!fields?.endpointUrl && !getEnvironmentVariable(\"AZUREML_URL\")) {\n throw new Error(\"No Azure ML Url found.\");\n }\n if (!fields?.endpointApiKey && !getEnvironmentVariable(\"AZUREML_API_KEY\")) {\n throw new Error(\"No Azure ML ApiKey found.\");\n }\n if (!fields?.contentFormatter) {\n throw new Error(\"No Content Formatter provided.\");\n }\n\n this.endpointUrl =\n fields.endpointUrl || getEnvironmentVariable(\"AZUREML_URL\") + \"\";", + "repo_full_name": "langchain-ai/langchainjs", + "discussion_comments": [ + { + "comment_id": "1433347005", + "repo_full_name": "langchain-ai/langchainjs", + "pr_number": 3448, + "pr_file": "langchain/src/chat_models/azure_ml.ts", + "discussion_id": "1433347005", + "commented_code": "@@ -0,0 +1,142 @@\n+import { SimpleChatModel, BaseChatModelParams } from \"./base.js\";\n+import { AzureMLHttpClient } from \"../llms/azure_ml.js\";\n+import { getEnvironmentVariable } from \"../util/env.js\";\n+import { BaseMessage } from \"../schema/index.js\";\n+\n+export interface ChatContentFormatter {\n+ /**\n+ * Formats the request payload for the AzureML endpoint. It takes a\n+ * prompt and a dictionary of model arguments as input and returns a\n+ * string representing the formatted request payload.\n+ * @param messages A list of messages for the chat so far.\n+ * @param modelArgs A dictionary of model arguments.\n+ * @returns A string representing the formatted request payload.\n+ */\n+ formatRequestPayload: (\n+ messages: BaseMessage[],\n+ modelArgs: Record\n+ ) => string;\n+ /**\n+ * Formats the response payload from the AzureML endpoint. It takes a\n+ * response payload as input and returns a string representing the\n+ * formatted response.\n+ * @param responsePayload The response payload from the AzureML endpoint.\n+ * @returns A string representing the formatted response.\n+ */\n+ formatResponsePayload: (output: string) => string;\n+}\n+\n+export class LlamaContentFormatter implements ChatContentFormatter {\n+ _convertMessageToRecord(message: BaseMessage): Record {\n+ if (message._getType() === \"human\") {\n+ return { role: \"user\", content: message.content };\n+ } else if (message._getType() === \"ai\") {\n+ return { role: \"assistant\", content: message.content };\n+ } else {\n+ return { role: message._getType(), content: message.content };\n+ }\n+ }\n+\n+ formatRequestPayload(\n+ messages: BaseMessage[],\n+ modelArgs: Record\n+ ): string {\n+ let msgs = messages.map((message) => {\n+ this._convertMessageToRecord(message);\n+ });\n+ return JSON.stringify({\n+ input_data: {\n+ input_string: msgs,\n+ parameters: modelArgs,\n+ },\n+ });\n+ }\n+\n+ formatResponsePayload(responsePayload: string) {\n+ const response = JSON.parse(responsePayload);\n+ return response.output;\n+ }\n+}\n+\n+/**\n+ * Type definition for the input parameters of the AzureMLChatOnlineEndpoint class.\n+ */\n+export interface AzureMLChatParams extends BaseChatModelParams {\n+ endpointUrl?: string;\n+ endpointApiKey?: string;\n+ modelArgs?: Record;\n+ contentFormatter?: ChatContentFormatter;\n+}\n+\n+/**\n+ * Class that represents the chat model. It extends the SimpleChatModel class and implements the AzureMLChatInput interface.\n+ */\n+export class AzureMLChatOnlineEndpoint\n+ extends SimpleChatModel\n+ implements AzureMLChatParams\n+{\n+ static lc_name() {\n+ return \"AzureMLChatOnlineEndpoint\";\n+ }\n+ static lc_description() {\n+ return \"A class for interacting with AzureML Chat models.\";\n+ }\n+ endpointUrl: string;\n+ endpointApiKey: string;\n+ modelArgs?: Record;\n+ contentFormatter: ChatContentFormatter;\n+ httpClient: AzureMLHttpClient;\n+\n+ constructor(fields: AzureMLChatParams) {\n+ super(fields ?? {});\n+ if (!fields?.endpointUrl && !getEnvironmentVariable(\"AZUREML_URL\")) {\n+ throw new Error(\"No Azure ML Url found.\");\n+ }\n+ if (!fields?.endpointApiKey && !getEnvironmentVariable(\"AZUREML_API_KEY\")) {\n+ throw new Error(\"No Azure ML ApiKey found.\");\n+ }\n+ if (!fields?.contentFormatter) {\n+ throw new Error(\"No Content Formatter provided.\");\n+ }\n+\n+ this.endpointUrl =\n+ fields.endpointUrl || getEnvironmentVariable(\"AZUREML_URL\") + \"\";", + "comment_created_at": "2023-12-21T01:37:48+00:00", + "comment_author": "bracesproul", + "comment_body": "Prefer `??`. Please change for the rest. Also why is there `+ \"\"`?\r\n```suggestion\r\n fields.endpointUrl ?? getEnvironmentVariable(\"AZUREML_URL\") + \"\";\r\n```", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1933082744", + "pr_number": 7616, + "pr_file": "libs/langchain-openai/src/chat_models.ts", + "created_at": "2025-01-29T01:27:02+00:00", + "commented_code": "this.reasoningEffort = fields?.reasoningEffort;\n\n if (this.model === \"o1\") {\n this.disableStreaming = true;\n this.disableStreaming =\n fields?.disableStreaming != null ? fields?.disableStreaming : true;", + "repo_full_name": "langchain-ai/langchainjs", + "discussion_comments": [ + { + "comment_id": "1933082744", + "repo_full_name": "langchain-ai/langchainjs", + "pr_number": 7616, + "pr_file": "libs/langchain-openai/src/chat_models.ts", + "discussion_id": "1933082744", + "commented_code": "@@ -1002,7 +1002,8 @@ export class ChatOpenAI<\n this.reasoningEffort = fields?.reasoningEffort;\n \n if (this.model === \"o1\") {\n- this.disableStreaming = true;\n+ this.disableStreaming =\n+ fields?.disableStreaming != null ? fields?.disableStreaming : true;", + "comment_created_at": "2025-01-29T01:27:02+00:00", + "comment_author": "bracesproul", + "comment_body": "this is the one area which is technically breaking", + "pr_file_module": null + }, + { + "comment_id": "1933085046", + "repo_full_name": "langchain-ai/langchainjs", + "pr_number": 7616, + "pr_file": "libs/langchain-openai/src/chat_models.ts", + "discussion_id": "1933082744", + "commented_code": "@@ -1002,7 +1002,8 @@ export class ChatOpenAI<\n this.reasoningEffort = fields?.reasoningEffort;\n \n if (this.model === \"o1\") {\n- this.disableStreaming = true;\n+ this.disableStreaming =\n+ fields?.disableStreaming != null ? fields?.disableStreaming : true;", + "comment_created_at": "2025-01-29T01:29:26+00:00", + "comment_author": "jacoblee93", + "comment_body": "Only if someone's passing `disableStreaming: false` right? I think fine", + "pr_file_module": null + }, + { + "comment_id": "1933085495", + "repo_full_name": "langchain-ai/langchainjs", + "pr_number": 7616, + "pr_file": "libs/langchain-openai/src/chat_models.ts", + "discussion_id": "1933082744", + "commented_code": "@@ -1002,7 +1002,8 @@ export class ChatOpenAI<\n this.reasoningEffort = fields?.reasoningEffort;\n \n if (this.model === \"o1\") {\n- this.disableStreaming = true;\n+ this.disableStreaming =\n+ fields?.disableStreaming != null ? fields?.disableStreaming : true;", + "comment_created_at": "2025-01-29T01:29:45+00:00", + "comment_author": "jacoblee93", + "comment_body": "You can just do `fields?.disableStreaming ?? true`", + "pr_file_module": null + }, + { + "comment_id": "1933086166", + "repo_full_name": "langchain-ai/langchainjs", + "pr_number": 7616, + "pr_file": "libs/langchain-openai/src/chat_models.ts", + "discussion_id": "1933082744", + "commented_code": "@@ -1002,7 +1002,8 @@ export class ChatOpenAI<\n this.reasoningEffort = fields?.reasoningEffort;\n \n if (this.model === \"o1\") {\n- this.disableStreaming = true;\n+ this.disableStreaming =\n+ fields?.disableStreaming != null ? fields?.disableStreaming : true;", + "comment_created_at": "2025-01-29T01:30:43+00:00", + "comment_author": "bracesproul", + "comment_body": "ya `??` would work", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1456374070", + "pr_number": 4044, + "pr_file": "libs/langchain-community/src/vectorstores/azure_aisearch.ts", + "created_at": "2024-01-17T19:33:07+00:00", + "commented_code": "import * as uuid from \"uuid\";\nimport {\n SearchClient,\n SearchIndexClient,\n AzureKeyCredential,\n IndexingResult,\n SearchIndex,\n} from \"@azure/search-documents\";\nimport {\n MaxMarginalRelevanceSearchOptions,\n VectorStore,\n} from \"@langchain/core/vectorstores\";\nimport type { EmbeddingsInterface } from \"@langchain/core/embeddings\";\nimport { Document } from \"@langchain/core/documents\";\nimport { maximalMarginalRelevance } from \"@langchain/core/utils/math\";\nimport { getEnvironmentVariable } from \"@langchain/core/utils/env\";\n\n/**\n * Azure AI Search query type.\n */\nexport const AzureAISearchQueryType = {\n /** Vector search. */\n Similarity: \"similarity\",\n /** Hybrid full text and vector search. */\n SimilarityHybrid: \"similarity_hybrid\",\n /** Hybrid full text and vector search with semantic ranking. */\n SemanticHybrid: \"semantic_hybrid\",\n} as const;\n\n/**\n * Azure AI Search query type.\n */\nexport type AzureAISearchQueryType =\n (typeof AzureAISearchQueryType)[keyof typeof AzureAISearchQueryType];\n\n/**\n * Azure AI Search settings.\n */\nexport interface AzureAISearchQueryOptions {\n readonly type: AzureAISearchQueryType;\n readonly semanticConfigurationName?: string;\n}\n\n/**\n * Configuration options for the `AzureAISearchStore` constructor.\n */\nexport interface AzureAISearchConfig {\n readonly client?: SearchClient;\n readonly indexName?: string;\n readonly endpoint?: string;\n readonly key?: string;\n readonly search: AzureAISearchQueryOptions;\n /**\n * The amount of documents to chunk by when adding vectors.\n * @default 100\n */\n readonly chunkSize?: number;\n /**\n * The amount of documents to embed at once when adding documents.\n * Note that some providers like Azure OpenAI can only embed 16 documents\n * at a time.\n * @default 16\n */\n readonly embeddingBatchSize?: number;\n}\n\n/**\n * Azure AI Search options metadata schema.\n * If yout want to add custom data, use the attributes property.\n */\nexport type AzureAISearchDocumentMetadata = {\n source: string;\n attributes?: Array<{ key: string; value: string }>;\n};\n\n/**\n * Azure AI Search indexed document.\n */\nexport type AzureAISearchDocument = {\n id: string;\n content: string;\n content_vector: number[];\n metadata: AzureAISearchDocumentMetadata;\n};\n\n/**\n * Azure AI Search options for adding documents.\n */\nexport type AzureAISearchAddDocumentsOptions = {\n ids?: string[];\n};\n\nconst DEFAULT_FIELD_ID = \"id\";\nconst DEFAULT_FIELD_CONTENT = \"content\";\nconst DEFAULT_FIELD_CONTENT_VECTOR = \"content_vector\";\nconst DEFAULT_FIELD_METADATA = \"metadata\";\nconst DEFAULT_FIELD_METADATA_SOURCE = \"source\";\nconst DEFAULT_FIELD_METADATA_ATTRS = \"attributes\";\n\n/**\n * Azure AI Search vector store.\n * To use this, you should have:\n * - the `@azure/search-documents` NPM package installed\n * - an endpoint and key to the Azure AI Search instance\n *\n * If you directly provide a `SearchClient` instance, you need to ensure that\n * an index has been created. When using and endpoint and key, the index will\n * be created automatically if it does not exist.\n */\nexport class AzureAISearchVectorStore extends VectorStore {\n declare FilterType: string;\n\n get lc_secrets(): { [key: string]: string } {\n return {\n endpoint: \"AZURE_AISEARCH_ENDPOINT\",\n key: \"AZURE_AISEARCH_KEY\",\n };\n }\n\n _vectorstoreType(): string {\n return \"azure_aisearch\";\n }\n\n private readonly initPromise: Promise;\n\n private readonly client: SearchClient;\n\n private readonly indexName: string;\n\n private readonly chunkSize: number;\n\n private readonly embeddingBatchSize: number;\n\n private readonly options: AzureAISearchQueryOptions;\n\n constructor(embeddings: EmbeddingsInterface, config: AzureAISearchConfig) {\n super(embeddings, config);\n\n const endpoint =\n config.endpoint ?? getEnvironmentVariable(\"AZURE_AISEARCH_ENDPOINT\");\n const key = config.key ?? getEnvironmentVariable(\"AZURE_AISEARCH_KEY\");\n\n if (!config.client && !endpoint && !key) {", + "repo_full_name": "langchain-ai/langchainjs", + "discussion_comments": [ + { + "comment_id": "1456374070", + "repo_full_name": "langchain-ai/langchainjs", + "pr_number": 4044, + "pr_file": "libs/langchain-community/src/vectorstores/azure_aisearch.ts", + "discussion_id": "1456374070", + "commented_code": "@@ -0,0 +1,694 @@\n+import * as uuid from \"uuid\";\n+import {\n+ SearchClient,\n+ SearchIndexClient,\n+ AzureKeyCredential,\n+ IndexingResult,\n+ SearchIndex,\n+} from \"@azure/search-documents\";\n+import {\n+ MaxMarginalRelevanceSearchOptions,\n+ VectorStore,\n+} from \"@langchain/core/vectorstores\";\n+import type { EmbeddingsInterface } from \"@langchain/core/embeddings\";\n+import { Document } from \"@langchain/core/documents\";\n+import { maximalMarginalRelevance } from \"@langchain/core/utils/math\";\n+import { getEnvironmentVariable } from \"@langchain/core/utils/env\";\n+\n+/**\n+ * Azure AI Search query type.\n+ */\n+export const AzureAISearchQueryType = {\n+ /** Vector search. */\n+ Similarity: \"similarity\",\n+ /** Hybrid full text and vector search. */\n+ SimilarityHybrid: \"similarity_hybrid\",\n+ /** Hybrid full text and vector search with semantic ranking. */\n+ SemanticHybrid: \"semantic_hybrid\",\n+} as const;\n+\n+/**\n+ * Azure AI Search query type.\n+ */\n+export type AzureAISearchQueryType =\n+ (typeof AzureAISearchQueryType)[keyof typeof AzureAISearchQueryType];\n+\n+/**\n+ * Azure AI Search settings.\n+ */\n+export interface AzureAISearchQueryOptions {\n+ readonly type: AzureAISearchQueryType;\n+ readonly semanticConfigurationName?: string;\n+}\n+\n+/**\n+ * Configuration options for the `AzureAISearchStore` constructor.\n+ */\n+export interface AzureAISearchConfig {\n+ readonly client?: SearchClient;\n+ readonly indexName?: string;\n+ readonly endpoint?: string;\n+ readonly key?: string;\n+ readonly search: AzureAISearchQueryOptions;\n+ /**\n+ * The amount of documents to chunk by when adding vectors.\n+ * @default 100\n+ */\n+ readonly chunkSize?: number;\n+ /**\n+ * The amount of documents to embed at once when adding documents.\n+ * Note that some providers like Azure OpenAI can only embed 16 documents\n+ * at a time.\n+ * @default 16\n+ */\n+ readonly embeddingBatchSize?: number;\n+}\n+\n+/**\n+ * Azure AI Search options metadata schema.\n+ * If yout want to add custom data, use the attributes property.\n+ */\n+export type AzureAISearchDocumentMetadata = {\n+ source: string;\n+ attributes?: Array<{ key: string; value: string }>;\n+};\n+\n+/**\n+ * Azure AI Search indexed document.\n+ */\n+export type AzureAISearchDocument = {\n+ id: string;\n+ content: string;\n+ content_vector: number[];\n+ metadata: AzureAISearchDocumentMetadata;\n+};\n+\n+/**\n+ * Azure AI Search options for adding documents.\n+ */\n+export type AzureAISearchAddDocumentsOptions = {\n+ ids?: string[];\n+};\n+\n+const DEFAULT_FIELD_ID = \"id\";\n+const DEFAULT_FIELD_CONTENT = \"content\";\n+const DEFAULT_FIELD_CONTENT_VECTOR = \"content_vector\";\n+const DEFAULT_FIELD_METADATA = \"metadata\";\n+const DEFAULT_FIELD_METADATA_SOURCE = \"source\";\n+const DEFAULT_FIELD_METADATA_ATTRS = \"attributes\";\n+\n+/**\n+ * Azure AI Search vector store.\n+ * To use this, you should have:\n+ * - the `@azure/search-documents` NPM package installed\n+ * - an endpoint and key to the Azure AI Search instance\n+ *\n+ * If you directly provide a `SearchClient` instance, you need to ensure that\n+ * an index has been created. When using and endpoint and key, the index will\n+ * be created automatically if it does not exist.\n+ */\n+export class AzureAISearchVectorStore extends VectorStore {\n+ declare FilterType: string;\n+\n+ get lc_secrets(): { [key: string]: string } {\n+ return {\n+ endpoint: \"AZURE_AISEARCH_ENDPOINT\",\n+ key: \"AZURE_AISEARCH_KEY\",\n+ };\n+ }\n+\n+ _vectorstoreType(): string {\n+ return \"azure_aisearch\";\n+ }\n+\n+ private readonly initPromise: Promise;\n+\n+ private readonly client: SearchClient;\n+\n+ private readonly indexName: string;\n+\n+ private readonly chunkSize: number;\n+\n+ private readonly embeddingBatchSize: number;\n+\n+ private readonly options: AzureAISearchQueryOptions;\n+\n+ constructor(embeddings: EmbeddingsInterface, config: AzureAISearchConfig) {\n+ super(embeddings, config);\n+\n+ const endpoint =\n+ config.endpoint ?? getEnvironmentVariable(\"AZURE_AISEARCH_ENDPOINT\");\n+ const key = config.key ?? getEnvironmentVariable(\"AZURE_AISEARCH_KEY\");\n+\n+ if (!config.client && !endpoint && !key) {", + "comment_created_at": "2024-01-17T19:33:07+00:00", + "comment_author": "pablocastro", + "comment_body": "Wouldn't you want `(!config.client && (!endpoint || !key))` so we also raise an error if either the endpoint or the key are missing when not providing a pre-created client?", + "pr_file_module": null + }, + { + "comment_id": "1457265765", + "repo_full_name": "langchain-ai/langchainjs", + "pr_number": 4044, + "pr_file": "libs/langchain-community/src/vectorstores/azure_aisearch.ts", + "discussion_id": "1456374070", + "commented_code": "@@ -0,0 +1,694 @@\n+import * as uuid from \"uuid\";\n+import {\n+ SearchClient,\n+ SearchIndexClient,\n+ AzureKeyCredential,\n+ IndexingResult,\n+ SearchIndex,\n+} from \"@azure/search-documents\";\n+import {\n+ MaxMarginalRelevanceSearchOptions,\n+ VectorStore,\n+} from \"@langchain/core/vectorstores\";\n+import type { EmbeddingsInterface } from \"@langchain/core/embeddings\";\n+import { Document } from \"@langchain/core/documents\";\n+import { maximalMarginalRelevance } from \"@langchain/core/utils/math\";\n+import { getEnvironmentVariable } from \"@langchain/core/utils/env\";\n+\n+/**\n+ * Azure AI Search query type.\n+ */\n+export const AzureAISearchQueryType = {\n+ /** Vector search. */\n+ Similarity: \"similarity\",\n+ /** Hybrid full text and vector search. */\n+ SimilarityHybrid: \"similarity_hybrid\",\n+ /** Hybrid full text and vector search with semantic ranking. */\n+ SemanticHybrid: \"semantic_hybrid\",\n+} as const;\n+\n+/**\n+ * Azure AI Search query type.\n+ */\n+export type AzureAISearchQueryType =\n+ (typeof AzureAISearchQueryType)[keyof typeof AzureAISearchQueryType];\n+\n+/**\n+ * Azure AI Search settings.\n+ */\n+export interface AzureAISearchQueryOptions {\n+ readonly type: AzureAISearchQueryType;\n+ readonly semanticConfigurationName?: string;\n+}\n+\n+/**\n+ * Configuration options for the `AzureAISearchStore` constructor.\n+ */\n+export interface AzureAISearchConfig {\n+ readonly client?: SearchClient;\n+ readonly indexName?: string;\n+ readonly endpoint?: string;\n+ readonly key?: string;\n+ readonly search: AzureAISearchQueryOptions;\n+ /**\n+ * The amount of documents to chunk by when adding vectors.\n+ * @default 100\n+ */\n+ readonly chunkSize?: number;\n+ /**\n+ * The amount of documents to embed at once when adding documents.\n+ * Note that some providers like Azure OpenAI can only embed 16 documents\n+ * at a time.\n+ * @default 16\n+ */\n+ readonly embeddingBatchSize?: number;\n+}\n+\n+/**\n+ * Azure AI Search options metadata schema.\n+ * If yout want to add custom data, use the attributes property.\n+ */\n+export type AzureAISearchDocumentMetadata = {\n+ source: string;\n+ attributes?: Array<{ key: string; value: string }>;\n+};\n+\n+/**\n+ * Azure AI Search indexed document.\n+ */\n+export type AzureAISearchDocument = {\n+ id: string;\n+ content: string;\n+ content_vector: number[];\n+ metadata: AzureAISearchDocumentMetadata;\n+};\n+\n+/**\n+ * Azure AI Search options for adding documents.\n+ */\n+export type AzureAISearchAddDocumentsOptions = {\n+ ids?: string[];\n+};\n+\n+const DEFAULT_FIELD_ID = \"id\";\n+const DEFAULT_FIELD_CONTENT = \"content\";\n+const DEFAULT_FIELD_CONTENT_VECTOR = \"content_vector\";\n+const DEFAULT_FIELD_METADATA = \"metadata\";\n+const DEFAULT_FIELD_METADATA_SOURCE = \"source\";\n+const DEFAULT_FIELD_METADATA_ATTRS = \"attributes\";\n+\n+/**\n+ * Azure AI Search vector store.\n+ * To use this, you should have:\n+ * - the `@azure/search-documents` NPM package installed\n+ * - an endpoint and key to the Azure AI Search instance\n+ *\n+ * If you directly provide a `SearchClient` instance, you need to ensure that\n+ * an index has been created. When using and endpoint and key, the index will\n+ * be created automatically if it does not exist.\n+ */\n+export class AzureAISearchVectorStore extends VectorStore {\n+ declare FilterType: string;\n+\n+ get lc_secrets(): { [key: string]: string } {\n+ return {\n+ endpoint: \"AZURE_AISEARCH_ENDPOINT\",\n+ key: \"AZURE_AISEARCH_KEY\",\n+ };\n+ }\n+\n+ _vectorstoreType(): string {\n+ return \"azure_aisearch\";\n+ }\n+\n+ private readonly initPromise: Promise;\n+\n+ private readonly client: SearchClient;\n+\n+ private readonly indexName: string;\n+\n+ private readonly chunkSize: number;\n+\n+ private readonly embeddingBatchSize: number;\n+\n+ private readonly options: AzureAISearchQueryOptions;\n+\n+ constructor(embeddings: EmbeddingsInterface, config: AzureAISearchConfig) {\n+ super(embeddings, config);\n+\n+ const endpoint =\n+ config.endpoint ?? getEnvironmentVariable(\"AZURE_AISEARCH_ENDPOINT\");\n+ const key = config.key ?? getEnvironmentVariable(\"AZURE_AISEARCH_KEY\");\n+\n+ if (!config.client && !endpoint && !key) {", + "comment_created_at": "2024-01-18T10:49:33+00:00", + "comment_author": "sinedied", + "comment_body": "Thanks, fixed", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1887551586", + "pr_number": 7372, + "pr_file": "langchain/src/chains/combine_documents/base.ts", + "created_at": "2024-12-16T20:32:22+00:00", + "commented_code": "documents: Document[];\n config?: RunnableConfig;\n}) {\n if (documents == null || documents.length == 0) {", + "repo_full_name": "langchain-ai/langchainjs", + "discussion_comments": [ + { + "comment_id": "1887551586", + "repo_full_name": "langchain-ai/langchainjs", + "pr_number": 7372, + "pr_file": "langchain/src/chains/combine_documents/base.ts", + "discussion_id": "1887551586", + "commented_code": "@@ -21,6 +21,9 @@ export async function formatDocuments({\n documents: Document[];\n config?: RunnableConfig;\n }) {\n+ if (documents == null || documents.length == 0) {", + "comment_created_at": "2024-12-16T20:32:22+00:00", + "comment_author": "jacoblee93", + "comment_body": "Don't think we need the `.length` check\r\n\r\nAlso should use triple equals if at all", + "pr_file_module": null + }, + { + "comment_id": "1887691735", + "repo_full_name": "langchain-ai/langchainjs", + "pr_number": 7372, + "pr_file": "langchain/src/chains/combine_documents/base.ts", + "discussion_id": "1887551586", + "commented_code": "@@ -21,6 +21,9 @@ export async function formatDocuments({\n documents: Document[];\n config?: RunnableConfig;\n }) {\n+ if (documents == null || documents.length == 0) {", + "comment_created_at": "2024-12-16T23:09:03+00:00", + "comment_author": "ahm750", + "comment_body": "Triple equals added", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1805321749", + "pr_number": 7012, + "pr_file": "libs/langchain-openai/src/chat_models.ts", + "created_at": "2024-10-17T19:38:23+00:00", + "commented_code": "seed: options?.seed,\n ...streamOptionsConfig,\n parallel_tool_calls: options?.parallel_tool_calls,\n audio: this.audio,", + "repo_full_name": "langchain-ai/langchainjs", + "discussion_comments": [ + { + "comment_id": "1805321749", + "repo_full_name": "langchain-ai/langchainjs", + "pr_number": 7012, + "pr_file": "libs/langchain-openai/src/chat_models.ts", + "discussion_id": "1805321749", + "commented_code": "@@ -1190,6 +1207,8 @@ export class ChatOpenAI<\n seed: options?.seed,\n ...streamOptionsConfig,\n parallel_tool_calls: options?.parallel_tool_calls,\n+ audio: this.audio,", + "comment_created_at": "2024-10-17T19:38:23+00:00", + "comment_author": "jacoblee93", + "comment_body": "May be slightly safer to add these fields only if they are present - not sure how Azure or other model proxies will react to adding `audio: undefined`\r\n\r\n", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/langchainjs-prefer-nullish-coalescing.md b/_reviewers/langchainjs-prefer-nullish-coalescing.md index 3a7592f..c662783 100644 --- a/_reviewers/langchainjs-prefer-nullish-coalescing.md +++ b/_reviewers/langchainjs-prefer-nullish-coalescing.md @@ -64,199 +64,3 @@ if (!config.client && !endpoint && !key) { ``` These patterns help prevent subtle bugs and make your code's intent clearer to other developers. - - -[ - { - "discussion_id": "2093795865", - "pr_number": 8141, - "pr_file": "libs/langchain-anthropic/src/utils/message_inputs.ts", - "created_at": "2025-05-17T00:38:48+00:00", - "commented_code": "content: [\n {\n type: \"tool_result\",\n content: _formatContent(message.content),\n // rare case: message.content could be undefined\n ...(message.content != null ? { content: _formatContent(message.content) } : {}),", - "repo_full_name": "langchain-ai/langchainjs", - "discussion_comments": [ - { - "comment_id": "2093795865", - "repo_full_name": "langchain-ai/langchainjs", - "pr_number": 8141, - "pr_file": "libs/langchain-anthropic/src/utils/message_inputs.ts", - "discussion_id": "2093795865", - "commented_code": "@@ -116,7 +116,8 @@ function _ensureMessageContents(\n content: [\n {\n type: \"tool_result\",\n- content: _formatContent(message.content),\n+ // rare case: message.content could be undefined\n+ ...(message.content != null ? { content: _formatContent(message.content) } : {}),", - "comment_created_at": "2025-05-17T00:38:48+00:00", - "comment_author": "benjamincburns", - "comment_body": "Slightly modified from the original submission. Anthropic's API reference shows `content` here as being an _optional_ (but not nullable) field of type `string | object[]`. The changes in `core` will make this case no longer possible, but this should handle it for older versions that weren't coalescing `undefined` tool output to `\"\"`.\r\n\r\n![image](https://github.com/user-attachments/assets/5e282e7d-5766-47b7-8d5b-39231a4fbc8c)", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1433347005", - "pr_number": 3448, - "pr_file": "langchain/src/chat_models/azure_ml.ts", - "created_at": "2023-12-21T01:37:48+00:00", - "commented_code": "import { SimpleChatModel, BaseChatModelParams } from \"./base.js\";\nimport { AzureMLHttpClient } from \"../llms/azure_ml.js\";\nimport { getEnvironmentVariable } from \"../util/env.js\";\nimport { BaseMessage } from \"../schema/index.js\";\n\nexport interface ChatContentFormatter {\n /**\n * Formats the request payload for the AzureML endpoint. It takes a\n * prompt and a dictionary of model arguments as input and returns a\n * string representing the formatted request payload.\n * @param messages A list of messages for the chat so far.\n * @param modelArgs A dictionary of model arguments.\n * @returns A string representing the formatted request payload.\n */\n formatRequestPayload: (\n messages: BaseMessage[],\n modelArgs: Record\n ) => string;\n /**\n * Formats the response payload from the AzureML endpoint. It takes a\n * response payload as input and returns a string representing the\n * formatted response.\n * @param responsePayload The response payload from the AzureML endpoint.\n * @returns A string representing the formatted response.\n */\n formatResponsePayload: (output: string) => string;\n}\n\nexport class LlamaContentFormatter implements ChatContentFormatter {\n _convertMessageToRecord(message: BaseMessage): Record {\n if (message._getType() === \"human\") {\n return { role: \"user\", content: message.content };\n } else if (message._getType() === \"ai\") {\n return { role: \"assistant\", content: message.content };\n } else {\n return { role: message._getType(), content: message.content };\n }\n }\n\n formatRequestPayload(\n messages: BaseMessage[],\n modelArgs: Record\n ): string {\n let msgs = messages.map((message) => {\n this._convertMessageToRecord(message);\n });\n return JSON.stringify({\n input_data: {\n input_string: msgs,\n parameters: modelArgs,\n },\n });\n }\n\n formatResponsePayload(responsePayload: string) {\n const response = JSON.parse(responsePayload);\n return response.output;\n }\n}\n\n/**\n * Type definition for the input parameters of the AzureMLChatOnlineEndpoint class.\n */\nexport interface AzureMLChatParams extends BaseChatModelParams {\n endpointUrl?: string;\n endpointApiKey?: string;\n modelArgs?: Record;\n contentFormatter?: ChatContentFormatter;\n}\n\n/**\n * Class that represents the chat model. It extends the SimpleChatModel class and implements the AzureMLChatInput interface.\n */\nexport class AzureMLChatOnlineEndpoint\n extends SimpleChatModel\n implements AzureMLChatParams\n{\n static lc_name() {\n return \"AzureMLChatOnlineEndpoint\";\n }\n static lc_description() {\n return \"A class for interacting with AzureML Chat models.\";\n }\n endpointUrl: string;\n endpointApiKey: string;\n modelArgs?: Record;\n contentFormatter: ChatContentFormatter;\n httpClient: AzureMLHttpClient;\n\n constructor(fields: AzureMLChatParams) {\n super(fields ?? {});\n if (!fields?.endpointUrl && !getEnvironmentVariable(\"AZUREML_URL\")) {\n throw new Error(\"No Azure ML Url found.\");\n }\n if (!fields?.endpointApiKey && !getEnvironmentVariable(\"AZUREML_API_KEY\")) {\n throw new Error(\"No Azure ML ApiKey found.\");\n }\n if (!fields?.contentFormatter) {\n throw new Error(\"No Content Formatter provided.\");\n }\n\n this.endpointUrl =\n fields.endpointUrl || getEnvironmentVariable(\"AZUREML_URL\") + \"\";", - "repo_full_name": "langchain-ai/langchainjs", - "discussion_comments": [ - { - "comment_id": "1433347005", - "repo_full_name": "langchain-ai/langchainjs", - "pr_number": 3448, - "pr_file": "langchain/src/chat_models/azure_ml.ts", - "discussion_id": "1433347005", - "commented_code": "@@ -0,0 +1,142 @@\n+import { SimpleChatModel, BaseChatModelParams } from \"./base.js\";\n+import { AzureMLHttpClient } from \"../llms/azure_ml.js\";\n+import { getEnvironmentVariable } from \"../util/env.js\";\n+import { BaseMessage } from \"../schema/index.js\";\n+\n+export interface ChatContentFormatter {\n+ /**\n+ * Formats the request payload for the AzureML endpoint. It takes a\n+ * prompt and a dictionary of model arguments as input and returns a\n+ * string representing the formatted request payload.\n+ * @param messages A list of messages for the chat so far.\n+ * @param modelArgs A dictionary of model arguments.\n+ * @returns A string representing the formatted request payload.\n+ */\n+ formatRequestPayload: (\n+ messages: BaseMessage[],\n+ modelArgs: Record\n+ ) => string;\n+ /**\n+ * Formats the response payload from the AzureML endpoint. It takes a\n+ * response payload as input and returns a string representing the\n+ * formatted response.\n+ * @param responsePayload The response payload from the AzureML endpoint.\n+ * @returns A string representing the formatted response.\n+ */\n+ formatResponsePayload: (output: string) => string;\n+}\n+\n+export class LlamaContentFormatter implements ChatContentFormatter {\n+ _convertMessageToRecord(message: BaseMessage): Record {\n+ if (message._getType() === \"human\") {\n+ return { role: \"user\", content: message.content };\n+ } else if (message._getType() === \"ai\") {\n+ return { role: \"assistant\", content: message.content };\n+ } else {\n+ return { role: message._getType(), content: message.content };\n+ }\n+ }\n+\n+ formatRequestPayload(\n+ messages: BaseMessage[],\n+ modelArgs: Record\n+ ): string {\n+ let msgs = messages.map((message) => {\n+ this._convertMessageToRecord(message);\n+ });\n+ return JSON.stringify({\n+ input_data: {\n+ input_string: msgs,\n+ parameters: modelArgs,\n+ },\n+ });\n+ }\n+\n+ formatResponsePayload(responsePayload: string) {\n+ const response = JSON.parse(responsePayload);\n+ return response.output;\n+ }\n+}\n+\n+/**\n+ * Type definition for the input parameters of the AzureMLChatOnlineEndpoint class.\n+ */\n+export interface AzureMLChatParams extends BaseChatModelParams {\n+ endpointUrl?: string;\n+ endpointApiKey?: string;\n+ modelArgs?: Record;\n+ contentFormatter?: ChatContentFormatter;\n+}\n+\n+/**\n+ * Class that represents the chat model. It extends the SimpleChatModel class and implements the AzureMLChatInput interface.\n+ */\n+export class AzureMLChatOnlineEndpoint\n+ extends SimpleChatModel\n+ implements AzureMLChatParams\n+{\n+ static lc_name() {\n+ return \"AzureMLChatOnlineEndpoint\";\n+ }\n+ static lc_description() {\n+ return \"A class for interacting with AzureML Chat models.\";\n+ }\n+ endpointUrl: string;\n+ endpointApiKey: string;\n+ modelArgs?: Record;\n+ contentFormatter: ChatContentFormatter;\n+ httpClient: AzureMLHttpClient;\n+\n+ constructor(fields: AzureMLChatParams) {\n+ super(fields ?? {});\n+ if (!fields?.endpointUrl && !getEnvironmentVariable(\"AZUREML_URL\")) {\n+ throw new Error(\"No Azure ML Url found.\");\n+ }\n+ if (!fields?.endpointApiKey && !getEnvironmentVariable(\"AZUREML_API_KEY\")) {\n+ throw new Error(\"No Azure ML ApiKey found.\");\n+ }\n+ if (!fields?.contentFormatter) {\n+ throw new Error(\"No Content Formatter provided.\");\n+ }\n+\n+ this.endpointUrl =\n+ fields.endpointUrl || getEnvironmentVariable(\"AZUREML_URL\") + \"\";", - "comment_created_at": "2023-12-21T01:37:48+00:00", - "comment_author": "bracesproul", - "comment_body": "Prefer `??`. Please change for the rest. Also why is there `+ \"\"`?\r\n```suggestion\r\n fields.endpointUrl ?? getEnvironmentVariable(\"AZUREML_URL\") + \"\";\r\n```", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1933082744", - "pr_number": 7616, - "pr_file": "libs/langchain-openai/src/chat_models.ts", - "created_at": "2025-01-29T01:27:02+00:00", - "commented_code": "this.reasoningEffort = fields?.reasoningEffort;\n\n if (this.model === \"o1\") {\n this.disableStreaming = true;\n this.disableStreaming =\n fields?.disableStreaming != null ? fields?.disableStreaming : true;", - "repo_full_name": "langchain-ai/langchainjs", - "discussion_comments": [ - { - "comment_id": "1933082744", - "repo_full_name": "langchain-ai/langchainjs", - "pr_number": 7616, - "pr_file": "libs/langchain-openai/src/chat_models.ts", - "discussion_id": "1933082744", - "commented_code": "@@ -1002,7 +1002,8 @@ export class ChatOpenAI<\n this.reasoningEffort = fields?.reasoningEffort;\n \n if (this.model === \"o1\") {\n- this.disableStreaming = true;\n+ this.disableStreaming =\n+ fields?.disableStreaming != null ? fields?.disableStreaming : true;", - "comment_created_at": "2025-01-29T01:27:02+00:00", - "comment_author": "bracesproul", - "comment_body": "this is the one area which is technically breaking", - "pr_file_module": null - }, - { - "comment_id": "1933085046", - "repo_full_name": "langchain-ai/langchainjs", - "pr_number": 7616, - "pr_file": "libs/langchain-openai/src/chat_models.ts", - "discussion_id": "1933082744", - "commented_code": "@@ -1002,7 +1002,8 @@ export class ChatOpenAI<\n this.reasoningEffort = fields?.reasoningEffort;\n \n if (this.model === \"o1\") {\n- this.disableStreaming = true;\n+ this.disableStreaming =\n+ fields?.disableStreaming != null ? fields?.disableStreaming : true;", - "comment_created_at": "2025-01-29T01:29:26+00:00", - "comment_author": "jacoblee93", - "comment_body": "Only if someone's passing `disableStreaming: false` right? I think fine", - "pr_file_module": null - }, - { - "comment_id": "1933085495", - "repo_full_name": "langchain-ai/langchainjs", - "pr_number": 7616, - "pr_file": "libs/langchain-openai/src/chat_models.ts", - "discussion_id": "1933082744", - "commented_code": "@@ -1002,7 +1002,8 @@ export class ChatOpenAI<\n this.reasoningEffort = fields?.reasoningEffort;\n \n if (this.model === \"o1\") {\n- this.disableStreaming = true;\n+ this.disableStreaming =\n+ fields?.disableStreaming != null ? fields?.disableStreaming : true;", - "comment_created_at": "2025-01-29T01:29:45+00:00", - "comment_author": "jacoblee93", - "comment_body": "You can just do `fields?.disableStreaming ?? true`", - "pr_file_module": null - }, - { - "comment_id": "1933086166", - "repo_full_name": "langchain-ai/langchainjs", - "pr_number": 7616, - "pr_file": "libs/langchain-openai/src/chat_models.ts", - "discussion_id": "1933082744", - "commented_code": "@@ -1002,7 +1002,8 @@ export class ChatOpenAI<\n this.reasoningEffort = fields?.reasoningEffort;\n \n if (this.model === \"o1\") {\n- this.disableStreaming = true;\n+ this.disableStreaming =\n+ fields?.disableStreaming != null ? fields?.disableStreaming : true;", - "comment_created_at": "2025-01-29T01:30:43+00:00", - "comment_author": "bracesproul", - "comment_body": "ya `??` would work", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1456374070", - "pr_number": 4044, - "pr_file": "libs/langchain-community/src/vectorstores/azure_aisearch.ts", - "created_at": "2024-01-17T19:33:07+00:00", - "commented_code": "import * as uuid from \"uuid\";\nimport {\n SearchClient,\n SearchIndexClient,\n AzureKeyCredential,\n IndexingResult,\n SearchIndex,\n} from \"@azure/search-documents\";\nimport {\n MaxMarginalRelevanceSearchOptions,\n VectorStore,\n} from \"@langchain/core/vectorstores\";\nimport type { EmbeddingsInterface } from \"@langchain/core/embeddings\";\nimport { Document } from \"@langchain/core/documents\";\nimport { maximalMarginalRelevance } from \"@langchain/core/utils/math\";\nimport { getEnvironmentVariable } from \"@langchain/core/utils/env\";\n\n/**\n * Azure AI Search query type.\n */\nexport const AzureAISearchQueryType = {\n /** Vector search. */\n Similarity: \"similarity\",\n /** Hybrid full text and vector search. */\n SimilarityHybrid: \"similarity_hybrid\",\n /** Hybrid full text and vector search with semantic ranking. */\n SemanticHybrid: \"semantic_hybrid\",\n} as const;\n\n/**\n * Azure AI Search query type.\n */\nexport type AzureAISearchQueryType =\n (typeof AzureAISearchQueryType)[keyof typeof AzureAISearchQueryType];\n\n/**\n * Azure AI Search settings.\n */\nexport interface AzureAISearchQueryOptions {\n readonly type: AzureAISearchQueryType;\n readonly semanticConfigurationName?: string;\n}\n\n/**\n * Configuration options for the `AzureAISearchStore` constructor.\n */\nexport interface AzureAISearchConfig {\n readonly client?: SearchClient;\n readonly indexName?: string;\n readonly endpoint?: string;\n readonly key?: string;\n readonly search: AzureAISearchQueryOptions;\n /**\n * The amount of documents to chunk by when adding vectors.\n * @default 100\n */\n readonly chunkSize?: number;\n /**\n * The amount of documents to embed at once when adding documents.\n * Note that some providers like Azure OpenAI can only embed 16 documents\n * at a time.\n * @default 16\n */\n readonly embeddingBatchSize?: number;\n}\n\n/**\n * Azure AI Search options metadata schema.\n * If yout want to add custom data, use the attributes property.\n */\nexport type AzureAISearchDocumentMetadata = {\n source: string;\n attributes?: Array<{ key: string; value: string }>;\n};\n\n/**\n * Azure AI Search indexed document.\n */\nexport type AzureAISearchDocument = {\n id: string;\n content: string;\n content_vector: number[];\n metadata: AzureAISearchDocumentMetadata;\n};\n\n/**\n * Azure AI Search options for adding documents.\n */\nexport type AzureAISearchAddDocumentsOptions = {\n ids?: string[];\n};\n\nconst DEFAULT_FIELD_ID = \"id\";\nconst DEFAULT_FIELD_CONTENT = \"content\";\nconst DEFAULT_FIELD_CONTENT_VECTOR = \"content_vector\";\nconst DEFAULT_FIELD_METADATA = \"metadata\";\nconst DEFAULT_FIELD_METADATA_SOURCE = \"source\";\nconst DEFAULT_FIELD_METADATA_ATTRS = \"attributes\";\n\n/**\n * Azure AI Search vector store.\n * To use this, you should have:\n * - the `@azure/search-documents` NPM package installed\n * - an endpoint and key to the Azure AI Search instance\n *\n * If you directly provide a `SearchClient` instance, you need to ensure that\n * an index has been created. When using and endpoint and key, the index will\n * be created automatically if it does not exist.\n */\nexport class AzureAISearchVectorStore extends VectorStore {\n declare FilterType: string;\n\n get lc_secrets(): { [key: string]: string } {\n return {\n endpoint: \"AZURE_AISEARCH_ENDPOINT\",\n key: \"AZURE_AISEARCH_KEY\",\n };\n }\n\n _vectorstoreType(): string {\n return \"azure_aisearch\";\n }\n\n private readonly initPromise: Promise;\n\n private readonly client: SearchClient;\n\n private readonly indexName: string;\n\n private readonly chunkSize: number;\n\n private readonly embeddingBatchSize: number;\n\n private readonly options: AzureAISearchQueryOptions;\n\n constructor(embeddings: EmbeddingsInterface, config: AzureAISearchConfig) {\n super(embeddings, config);\n\n const endpoint =\n config.endpoint ?? getEnvironmentVariable(\"AZURE_AISEARCH_ENDPOINT\");\n const key = config.key ?? getEnvironmentVariable(\"AZURE_AISEARCH_KEY\");\n\n if (!config.client && !endpoint && !key) {", - "repo_full_name": "langchain-ai/langchainjs", - "discussion_comments": [ - { - "comment_id": "1456374070", - "repo_full_name": "langchain-ai/langchainjs", - "pr_number": 4044, - "pr_file": "libs/langchain-community/src/vectorstores/azure_aisearch.ts", - "discussion_id": "1456374070", - "commented_code": "@@ -0,0 +1,694 @@\n+import * as uuid from \"uuid\";\n+import {\n+ SearchClient,\n+ SearchIndexClient,\n+ AzureKeyCredential,\n+ IndexingResult,\n+ SearchIndex,\n+} from \"@azure/search-documents\";\n+import {\n+ MaxMarginalRelevanceSearchOptions,\n+ VectorStore,\n+} from \"@langchain/core/vectorstores\";\n+import type { EmbeddingsInterface } from \"@langchain/core/embeddings\";\n+import { Document } from \"@langchain/core/documents\";\n+import { maximalMarginalRelevance } from \"@langchain/core/utils/math\";\n+import { getEnvironmentVariable } from \"@langchain/core/utils/env\";\n+\n+/**\n+ * Azure AI Search query type.\n+ */\n+export const AzureAISearchQueryType = {\n+ /** Vector search. */\n+ Similarity: \"similarity\",\n+ /** Hybrid full text and vector search. */\n+ SimilarityHybrid: \"similarity_hybrid\",\n+ /** Hybrid full text and vector search with semantic ranking. */\n+ SemanticHybrid: \"semantic_hybrid\",\n+} as const;\n+\n+/**\n+ * Azure AI Search query type.\n+ */\n+export type AzureAISearchQueryType =\n+ (typeof AzureAISearchQueryType)[keyof typeof AzureAISearchQueryType];\n+\n+/**\n+ * Azure AI Search settings.\n+ */\n+export interface AzureAISearchQueryOptions {\n+ readonly type: AzureAISearchQueryType;\n+ readonly semanticConfigurationName?: string;\n+}\n+\n+/**\n+ * Configuration options for the `AzureAISearchStore` constructor.\n+ */\n+export interface AzureAISearchConfig {\n+ readonly client?: SearchClient;\n+ readonly indexName?: string;\n+ readonly endpoint?: string;\n+ readonly key?: string;\n+ readonly search: AzureAISearchQueryOptions;\n+ /**\n+ * The amount of documents to chunk by when adding vectors.\n+ * @default 100\n+ */\n+ readonly chunkSize?: number;\n+ /**\n+ * The amount of documents to embed at once when adding documents.\n+ * Note that some providers like Azure OpenAI can only embed 16 documents\n+ * at a time.\n+ * @default 16\n+ */\n+ readonly embeddingBatchSize?: number;\n+}\n+\n+/**\n+ * Azure AI Search options metadata schema.\n+ * If yout want to add custom data, use the attributes property.\n+ */\n+export type AzureAISearchDocumentMetadata = {\n+ source: string;\n+ attributes?: Array<{ key: string; value: string }>;\n+};\n+\n+/**\n+ * Azure AI Search indexed document.\n+ */\n+export type AzureAISearchDocument = {\n+ id: string;\n+ content: string;\n+ content_vector: number[];\n+ metadata: AzureAISearchDocumentMetadata;\n+};\n+\n+/**\n+ * Azure AI Search options for adding documents.\n+ */\n+export type AzureAISearchAddDocumentsOptions = {\n+ ids?: string[];\n+};\n+\n+const DEFAULT_FIELD_ID = \"id\";\n+const DEFAULT_FIELD_CONTENT = \"content\";\n+const DEFAULT_FIELD_CONTENT_VECTOR = \"content_vector\";\n+const DEFAULT_FIELD_METADATA = \"metadata\";\n+const DEFAULT_FIELD_METADATA_SOURCE = \"source\";\n+const DEFAULT_FIELD_METADATA_ATTRS = \"attributes\";\n+\n+/**\n+ * Azure AI Search vector store.\n+ * To use this, you should have:\n+ * - the `@azure/search-documents` NPM package installed\n+ * - an endpoint and key to the Azure AI Search instance\n+ *\n+ * If you directly provide a `SearchClient` instance, you need to ensure that\n+ * an index has been created. When using and endpoint and key, the index will\n+ * be created automatically if it does not exist.\n+ */\n+export class AzureAISearchVectorStore extends VectorStore {\n+ declare FilterType: string;\n+\n+ get lc_secrets(): { [key: string]: string } {\n+ return {\n+ endpoint: \"AZURE_AISEARCH_ENDPOINT\",\n+ key: \"AZURE_AISEARCH_KEY\",\n+ };\n+ }\n+\n+ _vectorstoreType(): string {\n+ return \"azure_aisearch\";\n+ }\n+\n+ private readonly initPromise: Promise;\n+\n+ private readonly client: SearchClient;\n+\n+ private readonly indexName: string;\n+\n+ private readonly chunkSize: number;\n+\n+ private readonly embeddingBatchSize: number;\n+\n+ private readonly options: AzureAISearchQueryOptions;\n+\n+ constructor(embeddings: EmbeddingsInterface, config: AzureAISearchConfig) {\n+ super(embeddings, config);\n+\n+ const endpoint =\n+ config.endpoint ?? getEnvironmentVariable(\"AZURE_AISEARCH_ENDPOINT\");\n+ const key = config.key ?? getEnvironmentVariable(\"AZURE_AISEARCH_KEY\");\n+\n+ if (!config.client && !endpoint && !key) {", - "comment_created_at": "2024-01-17T19:33:07+00:00", - "comment_author": "pablocastro", - "comment_body": "Wouldn't you want `(!config.client && (!endpoint || !key))` so we also raise an error if either the endpoint or the key are missing when not providing a pre-created client?", - "pr_file_module": null - }, - { - "comment_id": "1457265765", - "repo_full_name": "langchain-ai/langchainjs", - "pr_number": 4044, - "pr_file": "libs/langchain-community/src/vectorstores/azure_aisearch.ts", - "discussion_id": "1456374070", - "commented_code": "@@ -0,0 +1,694 @@\n+import * as uuid from \"uuid\";\n+import {\n+ SearchClient,\n+ SearchIndexClient,\n+ AzureKeyCredential,\n+ IndexingResult,\n+ SearchIndex,\n+} from \"@azure/search-documents\";\n+import {\n+ MaxMarginalRelevanceSearchOptions,\n+ VectorStore,\n+} from \"@langchain/core/vectorstores\";\n+import type { EmbeddingsInterface } from \"@langchain/core/embeddings\";\n+import { Document } from \"@langchain/core/documents\";\n+import { maximalMarginalRelevance } from \"@langchain/core/utils/math\";\n+import { getEnvironmentVariable } from \"@langchain/core/utils/env\";\n+\n+/**\n+ * Azure AI Search query type.\n+ */\n+export const AzureAISearchQueryType = {\n+ /** Vector search. */\n+ Similarity: \"similarity\",\n+ /** Hybrid full text and vector search. */\n+ SimilarityHybrid: \"similarity_hybrid\",\n+ /** Hybrid full text and vector search with semantic ranking. */\n+ SemanticHybrid: \"semantic_hybrid\",\n+} as const;\n+\n+/**\n+ * Azure AI Search query type.\n+ */\n+export type AzureAISearchQueryType =\n+ (typeof AzureAISearchQueryType)[keyof typeof AzureAISearchQueryType];\n+\n+/**\n+ * Azure AI Search settings.\n+ */\n+export interface AzureAISearchQueryOptions {\n+ readonly type: AzureAISearchQueryType;\n+ readonly semanticConfigurationName?: string;\n+}\n+\n+/**\n+ * Configuration options for the `AzureAISearchStore` constructor.\n+ */\n+export interface AzureAISearchConfig {\n+ readonly client?: SearchClient;\n+ readonly indexName?: string;\n+ readonly endpoint?: string;\n+ readonly key?: string;\n+ readonly search: AzureAISearchQueryOptions;\n+ /**\n+ * The amount of documents to chunk by when adding vectors.\n+ * @default 100\n+ */\n+ readonly chunkSize?: number;\n+ /**\n+ * The amount of documents to embed at once when adding documents.\n+ * Note that some providers like Azure OpenAI can only embed 16 documents\n+ * at a time.\n+ * @default 16\n+ */\n+ readonly embeddingBatchSize?: number;\n+}\n+\n+/**\n+ * Azure AI Search options metadata schema.\n+ * If yout want to add custom data, use the attributes property.\n+ */\n+export type AzureAISearchDocumentMetadata = {\n+ source: string;\n+ attributes?: Array<{ key: string; value: string }>;\n+};\n+\n+/**\n+ * Azure AI Search indexed document.\n+ */\n+export type AzureAISearchDocument = {\n+ id: string;\n+ content: string;\n+ content_vector: number[];\n+ metadata: AzureAISearchDocumentMetadata;\n+};\n+\n+/**\n+ * Azure AI Search options for adding documents.\n+ */\n+export type AzureAISearchAddDocumentsOptions = {\n+ ids?: string[];\n+};\n+\n+const DEFAULT_FIELD_ID = \"id\";\n+const DEFAULT_FIELD_CONTENT = \"content\";\n+const DEFAULT_FIELD_CONTENT_VECTOR = \"content_vector\";\n+const DEFAULT_FIELD_METADATA = \"metadata\";\n+const DEFAULT_FIELD_METADATA_SOURCE = \"source\";\n+const DEFAULT_FIELD_METADATA_ATTRS = \"attributes\";\n+\n+/**\n+ * Azure AI Search vector store.\n+ * To use this, you should have:\n+ * - the `@azure/search-documents` NPM package installed\n+ * - an endpoint and key to the Azure AI Search instance\n+ *\n+ * If you directly provide a `SearchClient` instance, you need to ensure that\n+ * an index has been created. When using and endpoint and key, the index will\n+ * be created automatically if it does not exist.\n+ */\n+export class AzureAISearchVectorStore extends VectorStore {\n+ declare FilterType: string;\n+\n+ get lc_secrets(): { [key: string]: string } {\n+ return {\n+ endpoint: \"AZURE_AISEARCH_ENDPOINT\",\n+ key: \"AZURE_AISEARCH_KEY\",\n+ };\n+ }\n+\n+ _vectorstoreType(): string {\n+ return \"azure_aisearch\";\n+ }\n+\n+ private readonly initPromise: Promise;\n+\n+ private readonly client: SearchClient;\n+\n+ private readonly indexName: string;\n+\n+ private readonly chunkSize: number;\n+\n+ private readonly embeddingBatchSize: number;\n+\n+ private readonly options: AzureAISearchQueryOptions;\n+\n+ constructor(embeddings: EmbeddingsInterface, config: AzureAISearchConfig) {\n+ super(embeddings, config);\n+\n+ const endpoint =\n+ config.endpoint ?? getEnvironmentVariable(\"AZURE_AISEARCH_ENDPOINT\");\n+ const key = config.key ?? getEnvironmentVariable(\"AZURE_AISEARCH_KEY\");\n+\n+ if (!config.client && !endpoint && !key) {", - "comment_created_at": "2024-01-18T10:49:33+00:00", - "comment_author": "sinedied", - "comment_body": "Thanks, fixed", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1887551586", - "pr_number": 7372, - "pr_file": "langchain/src/chains/combine_documents/base.ts", - "created_at": "2024-12-16T20:32:22+00:00", - "commented_code": "documents: Document[];\n config?: RunnableConfig;\n}) {\n if (documents == null || documents.length == 0) {", - "repo_full_name": "langchain-ai/langchainjs", - "discussion_comments": [ - { - "comment_id": "1887551586", - "repo_full_name": "langchain-ai/langchainjs", - "pr_number": 7372, - "pr_file": "langchain/src/chains/combine_documents/base.ts", - "discussion_id": "1887551586", - "commented_code": "@@ -21,6 +21,9 @@ export async function formatDocuments({\n documents: Document[];\n config?: RunnableConfig;\n }) {\n+ if (documents == null || documents.length == 0) {", - "comment_created_at": "2024-12-16T20:32:22+00:00", - "comment_author": "jacoblee93", - "comment_body": "Don't think we need the `.length` check\r\n\r\nAlso should use triple equals if at all", - "pr_file_module": null - }, - { - "comment_id": "1887691735", - "repo_full_name": "langchain-ai/langchainjs", - "pr_number": 7372, - "pr_file": "langchain/src/chains/combine_documents/base.ts", - "discussion_id": "1887551586", - "commented_code": "@@ -21,6 +21,9 @@ export async function formatDocuments({\n documents: Document[];\n config?: RunnableConfig;\n }) {\n+ if (documents == null || documents.length == 0) {", - "comment_created_at": "2024-12-16T23:09:03+00:00", - "comment_author": "ahm750", - "comment_body": "Triple equals added", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1805321749", - "pr_number": 7012, - "pr_file": "libs/langchain-openai/src/chat_models.ts", - "created_at": "2024-10-17T19:38:23+00:00", - "commented_code": "seed: options?.seed,\n ...streamOptionsConfig,\n parallel_tool_calls: options?.parallel_tool_calls,\n audio: this.audio,", - "repo_full_name": "langchain-ai/langchainjs", - "discussion_comments": [ - { - "comment_id": "1805321749", - "repo_full_name": "langchain-ai/langchainjs", - "pr_number": 7012, - "pr_file": "libs/langchain-openai/src/chat_models.ts", - "discussion_id": "1805321749", - "commented_code": "@@ -1190,6 +1207,8 @@ export class ChatOpenAI<\n seed: options?.seed,\n ...streamOptionsConfig,\n parallel_tool_calls: options?.parallel_tool_calls,\n+ audio: this.audio,", - "comment_created_at": "2024-10-17T19:38:23+00:00", - "comment_author": "jacoblee93", - "comment_body": "May be slightly safer to add these fields only if they are present - not sure how Azure or other model proxies will react to adding `audio: undefined`\r\n\r\n", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/langchainjs-preserve-api-backward-compatibility.json b/_reviewers/langchainjs-preserve-api-backward-compatibility.json new file mode 100644 index 0000000..328f680 --- /dev/null +++ b/_reviewers/langchainjs-preserve-api-backward-compatibility.json @@ -0,0 +1,240 @@ +[ + { + "discussion_id": "1433346407", + "pr_number": 3448, + "pr_file": "langchain/src/chat_models/azure_ml.ts", + "created_at": "2023-12-21T01:36:53+00:00", + "commented_code": "import { SimpleChatModel, BaseChatModelParams } from \"./base.js\";\nimport { AzureMLHttpClient } from \"../llms/azure_ml.js\";\nimport { getEnvironmentVariable } from \"../util/env.js\";\nimport { BaseMessage } from \"../schema/index.js\";\n\nexport interface ChatContentFormatter {\n /**\n * Formats the request payload for the AzureML endpoint. It takes a\n * prompt and a dictionary of model arguments as input and returns a\n * string representing the formatted request payload.\n * @param messages A list of messages for the chat so far.\n * @param modelArgs A dictionary of model arguments.\n * @returns A string representing the formatted request payload.\n */\n formatRequestPayload: (\n messages: BaseMessage[],\n modelArgs: Record\n ) => string;\n /**\n * Formats the response payload from the AzureML endpoint. It takes a\n * response payload as input and returns a string representing the\n * formatted response.\n * @param responsePayload The response payload from the AzureML endpoint.\n * @returns A string representing the formatted response.\n */\n formatResponsePayload: (output: string) => string;\n}\n\nexport class LlamaContentFormatter implements ChatContentFormatter {\n _convertMessageToRecord(message: BaseMessage): Record {\n if (message._getType() === \"human\") {\n return { role: \"user\", content: message.content };\n } else if (message._getType() === \"ai\") {\n return { role: \"assistant\", content: message.content };\n } else {\n return { role: message._getType(), content: message.content };\n }\n }\n\n formatRequestPayload(\n messages: BaseMessage[],\n modelArgs: Record\n ): string {\n let msgs = messages.map((message) => {\n this._convertMessageToRecord(message);\n });\n return JSON.stringify({\n input_data: {\n input_string: msgs,\n parameters: modelArgs,\n },\n });\n }\n\n formatResponsePayload(responsePayload: string) {\n const response = JSON.parse(responsePayload);\n return response.output;\n }\n}\n\n/**\n * Type definition for the input parameters of the AzureMLChatOnlineEndpoint class.\n */\nexport interface AzureMLChatParams extends BaseChatModelParams {\n endpointUrl?: string;\n endpointApiKey?: string;\n modelArgs?: Record;\n contentFormatter?: ChatContentFormatter;\n}\n\n/**\n * Class that represents the chat model. It extends the SimpleChatModel class and implements the AzureMLChatInput interface.\n */\nexport class AzureMLChatOnlineEndpoint\n extends SimpleChatModel\n implements AzureMLChatParams\n{\n static lc_name() {\n return \"AzureMLChatOnlineEndpoint\";\n }\n static lc_description() {\n return \"A class for interacting with AzureML Chat models.\";\n }\n endpointUrl: string;\n endpointApiKey: string;\n modelArgs?: Record;\n contentFormatter: ChatContentFormatter;\n httpClient: AzureMLHttpClient;\n\n constructor(fields: AzureMLChatParams) {\n super(fields ?? {});\n if (!fields?.endpointUrl && !getEnvironmentVariable(\"AZUREML_URL\")) {\n throw new Error(\"No Azure ML Url found.\");\n }\n if (!fields?.endpointApiKey && !getEnvironmentVariable(\"AZUREML_API_KEY\")) {\n throw new Error(\"No Azure ML ApiKey found.\");\n }\n if (!fields?.contentFormatter) {\n throw new Error(\"No Content Formatter provided.\");", + "repo_full_name": "langchain-ai/langchainjs", + "discussion_comments": [ + { + "comment_id": "1433346407", + "repo_full_name": "langchain-ai/langchainjs", + "pr_number": 3448, + "pr_file": "langchain/src/chat_models/azure_ml.ts", + "discussion_id": "1433346407", + "commented_code": "@@ -0,0 +1,142 @@\n+import { SimpleChatModel, BaseChatModelParams } from \"./base.js\";\n+import { AzureMLHttpClient } from \"../llms/azure_ml.js\";\n+import { getEnvironmentVariable } from \"../util/env.js\";\n+import { BaseMessage } from \"../schema/index.js\";\n+\n+export interface ChatContentFormatter {\n+ /**\n+ * Formats the request payload for the AzureML endpoint. It takes a\n+ * prompt and a dictionary of model arguments as input and returns a\n+ * string representing the formatted request payload.\n+ * @param messages A list of messages for the chat so far.\n+ * @param modelArgs A dictionary of model arguments.\n+ * @returns A string representing the formatted request payload.\n+ */\n+ formatRequestPayload: (\n+ messages: BaseMessage[],\n+ modelArgs: Record\n+ ) => string;\n+ /**\n+ * Formats the response payload from the AzureML endpoint. It takes a\n+ * response payload as input and returns a string representing the\n+ * formatted response.\n+ * @param responsePayload The response payload from the AzureML endpoint.\n+ * @returns A string representing the formatted response.\n+ */\n+ formatResponsePayload: (output: string) => string;\n+}\n+\n+export class LlamaContentFormatter implements ChatContentFormatter {\n+ _convertMessageToRecord(message: BaseMessage): Record {\n+ if (message._getType() === \"human\") {\n+ return { role: \"user\", content: message.content };\n+ } else if (message._getType() === \"ai\") {\n+ return { role: \"assistant\", content: message.content };\n+ } else {\n+ return { role: message._getType(), content: message.content };\n+ }\n+ }\n+\n+ formatRequestPayload(\n+ messages: BaseMessage[],\n+ modelArgs: Record\n+ ): string {\n+ let msgs = messages.map((message) => {\n+ this._convertMessageToRecord(message);\n+ });\n+ return JSON.stringify({\n+ input_data: {\n+ input_string: msgs,\n+ parameters: modelArgs,\n+ },\n+ });\n+ }\n+\n+ formatResponsePayload(responsePayload: string) {\n+ const response = JSON.parse(responsePayload);\n+ return response.output;\n+ }\n+}\n+\n+/**\n+ * Type definition for the input parameters of the AzureMLChatOnlineEndpoint class.\n+ */\n+export interface AzureMLChatParams extends BaseChatModelParams {\n+ endpointUrl?: string;\n+ endpointApiKey?: string;\n+ modelArgs?: Record;\n+ contentFormatter?: ChatContentFormatter;\n+}\n+\n+/**\n+ * Class that represents the chat model. It extends the SimpleChatModel class and implements the AzureMLChatInput interface.\n+ */\n+export class AzureMLChatOnlineEndpoint\n+ extends SimpleChatModel\n+ implements AzureMLChatParams\n+{\n+ static lc_name() {\n+ return \"AzureMLChatOnlineEndpoint\";\n+ }\n+ static lc_description() {\n+ return \"A class for interacting with AzureML Chat models.\";\n+ }\n+ endpointUrl: string;\n+ endpointApiKey: string;\n+ modelArgs?: Record;\n+ contentFormatter: ChatContentFormatter;\n+ httpClient: AzureMLHttpClient;\n+\n+ constructor(fields: AzureMLChatParams) {\n+ super(fields ?? {});\n+ if (!fields?.endpointUrl && !getEnvironmentVariable(\"AZUREML_URL\")) {\n+ throw new Error(\"No Azure ML Url found.\");\n+ }\n+ if (!fields?.endpointApiKey && !getEnvironmentVariable(\"AZUREML_API_KEY\")) {\n+ throw new Error(\"No Azure ML ApiKey found.\");\n+ }\n+ if (!fields?.contentFormatter) {\n+ throw new Error(\"No Content Formatter provided.\");", + "comment_created_at": "2023-12-21T01:36:53+00:00", + "comment_author": "bracesproul", + "comment_body": "Instead of this check it should be a required param in the `AzureMLChatParams` interface. But why are we requiring users to pass this in? Can we instead keep it optional and have a default formatter that gets used?", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2057154690", + "pr_number": 8056, + "pr_file": "libs/langchain-aws/src/tests/chat_models.int.test.ts", + "created_at": "2025-04-24T01:17:37+00:00", + "commented_code": "expect(typeof result.tool_calls![0].args.location).toBe(\"string\");\n expect(result.tool_calls![0].args.location.length).toBeGreaterThan(0);\n\n messages.push(\n new ToolMessage({\n tool_call_id: result.tool_calls![0].id!,\n content: await tools[0].invoke(result.tool_calls![0]),\n })\n );\n messages.push(await tools[0].invoke(result.tool_calls![0]));", + "repo_full_name": "langchain-ai/langchainjs", + "discussion_comments": [ + { + "comment_id": "2057154690", + "repo_full_name": "langchain-ai/langchainjs", + "pr_number": 8056, + "pr_file": "libs/langchain-aws/src/tests/chat_models.int.test.ts", + "discussion_id": "2057154690", + "commented_code": "@@ -513,12 +513,7 @@ test(\"Test ChatBedrockConverse can respond to tool invocations with thinking ena\n expect(typeof result.tool_calls![0].args.location).toBe(\"string\");\n expect(result.tool_calls![0].args.location.length).toBeGreaterThan(0);\n \n- messages.push(\n- new ToolMessage({\n- tool_call_id: result.tool_calls![0].id!,\n- content: await tools[0].invoke(result.tool_calls![0]),\n- })\n- );\n+ messages.push(await tools[0].invoke(result.tool_calls![0]));", + "comment_created_at": "2025-04-24T01:17:37+00:00", + "comment_author": "hntrl", + "comment_body": "StructuredTool.invoke will return a tool message when a tool call is passed, so we don't have to create a new tool message here", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1424769745", + "pr_number": 3577, + "pr_file": "libs/langchain-community/src/llms/watsonx_ai.ts", + "created_at": "2023-12-13T02:13:17+00:00", + "commented_code": "};\n }\n\n endpoint: string;\n\n region = \"us-south\";\n\n version = \"2023-05-29\";\n\n modelId = \"meta-llama/llama-2-70b-chat\";\n\n modelKwargs?: Record;\n\n ibmCloudApiKey?: string;\n\n ibmCloudToken?: string;\n projectId!: string;\n\n ibmCloudTokenExpiresAt?: number;\n modelParameters?: WatsonModelParameters;\n\n projectId?: string;\n\n modelParameters?: Record;\n private readonly watsonApiClient: WatsonApiClient;\n\n constructor(fields: WatsonxAIParams) {\n super(fields);\n\n this.region = fields?.region ?? this.region;", + "repo_full_name": "langchain-ai/langchainjs", + "discussion_comments": [ + { + "comment_id": "1424769745", + "repo_full_name": "langchain-ai/langchainjs", + "pr_number": 3577, + "pr_file": "libs/langchain-community/src/llms/watsonx_ai.ts", + "discussion_id": "1424769745", + "commented_code": "@@ -69,48 +30,43 @@ export class WatsonxAI extends LLM {\n };\n }\n \n- endpoint: string;\n-\n- region = \"us-south\";\n-\n- version = \"2023-05-29\";\n-\n modelId = \"meta-llama/llama-2-70b-chat\";\n \n modelKwargs?: Record;\n \n- ibmCloudApiKey?: string;\n-\n- ibmCloudToken?: string;\n+ projectId!: string;\n \n- ibmCloudTokenExpiresAt?: number;\n+ modelParameters?: WatsonModelParameters;\n \n- projectId?: string;\n-\n- modelParameters?: Record;\n+ private readonly watsonApiClient: WatsonApiClient;\n \n constructor(fields: WatsonxAIParams) {\n super(fields);\n \n- this.region = fields?.region ?? this.region;", + "comment_created_at": "2023-12-13T02:13:17+00:00", + "comment_author": "jacoblee93", + "comment_body": "Isn't removing all of these a breaking change?", + "pr_file_module": null + }, + { + "comment_id": "1424977554", + "repo_full_name": "langchain-ai/langchainjs", + "pr_number": 3577, + "pr_file": "libs/langchain-community/src/llms/watsonx_ai.ts", + "discussion_id": "1424769745", + "commented_code": "@@ -69,48 +30,43 @@ export class WatsonxAI extends LLM {\n };\n }\n \n- endpoint: string;\n-\n- region = \"us-south\";\n-\n- version = \"2023-05-29\";\n-\n modelId = \"meta-llama/llama-2-70b-chat\";\n \n modelKwargs?: Record;\n \n- ibmCloudApiKey?: string;\n-\n- ibmCloudToken?: string;\n+ projectId!: string;\n \n- ibmCloudTokenExpiresAt?: number;\n+ modelParameters?: WatsonModelParameters;\n \n- projectId?: string;\n-\n- modelParameters?: Record;\n+ private readonly watsonApiClient: WatsonApiClient;\n \n constructor(fields: WatsonxAIParams) {\n super(fields);\n \n- this.region = fields?.region ?? this.region;", + "comment_created_at": "2023-12-13T08:00:44+00:00", + "comment_author": "faileon", + "comment_body": "I moved a few attributes to the WatsonClient as it's something I also needed in the ChatModel. I did remove `lc_serializable = true;` by accident and will add it back in.\r\n\r\nI tried running the provided example at `examples/src/llms/watsonx_ai.ts` and it works like before.", + "pr_file_module": null + }, + { + "comment_id": "1425542720", + "repo_full_name": "langchain-ai/langchainjs", + "pr_number": 3577, + "pr_file": "libs/langchain-community/src/llms/watsonx_ai.ts", + "discussion_id": "1424769745", + "commented_code": "@@ -69,48 +30,43 @@ export class WatsonxAI extends LLM {\n };\n }\n \n- endpoint: string;\n-\n- region = \"us-south\";\n-\n- version = \"2023-05-29\";\n-\n modelId = \"meta-llama/llama-2-70b-chat\";\n \n modelKwargs?: Record;\n \n- ibmCloudApiKey?: string;\n-\n- ibmCloudToken?: string;\n+ projectId!: string;\n \n- ibmCloudTokenExpiresAt?: number;\n+ modelParameters?: WatsonModelParameters;\n \n- projectId?: string;\n-\n- modelParameters?: Record;\n+ private readonly watsonApiClient: WatsonApiClient;\n \n constructor(fields: WatsonxAIParams) {\n super(fields);\n \n- this.region = fields?.region ?? this.region;", + "comment_created_at": "2023-12-13T15:44:14+00:00", + "comment_author": "chasemcdo", + "comment_body": "It is breaking in the sense that if someone is dependent on the old schema they'll need to update it, but most of the functionality is there, but in a now more reusable format.\r\n\r\nOne thing that I am noticing missing is there was originally an [endpoint](https://github.com/langchain-ai/langchainjs/blob/06a39117aae61fcbf26a537de4bad8a156bbb9ab/libs/langchain-community/src/llms/watsonx_ai.ts#L14-L17) parameter which would allow the end user to overwrite the entire url if they so desired.\r\n\r\nI had included this since the production endpoint is still labeled as \"beta\", so this would allow someone to migrate to a different (potentially more stable) endpoint as soon as it is released without the need to submit a PR to LangChain.\r\n\r\nThoughts on reintroducing that @faileon ?", + "pr_file_module": null + }, + { + "comment_id": "1425755461", + "repo_full_name": "langchain-ai/langchainjs", + "pr_number": 3577, + "pr_file": "libs/langchain-community/src/llms/watsonx_ai.ts", + "discussion_id": "1424769745", + "commented_code": "@@ -69,48 +30,43 @@ export class WatsonxAI extends LLM {\n };\n }\n \n- endpoint: string;\n-\n- region = \"us-south\";\n-\n- version = \"2023-05-29\";\n-\n modelId = \"meta-llama/llama-2-70b-chat\";\n \n modelKwargs?: Record;\n \n- ibmCloudApiKey?: string;\n-\n- ibmCloudToken?: string;\n+ projectId!: string;\n \n- ibmCloudTokenExpiresAt?: number;\n+ modelParameters?: WatsonModelParameters;\n \n- projectId?: string;\n-\n- modelParameters?: Record;\n+ private readonly watsonApiClient: WatsonApiClient;\n \n constructor(fields: WatsonxAIParams) {\n super(fields);\n \n- this.region = fields?.region ?? this.region;", + "comment_created_at": "2023-12-13T18:48:59+00:00", + "comment_author": "jacoblee93", + "comment_body": "Not sure what current usage is, but would love a shim for principle's sake. I can try to have a look later if you don't have time.", + "pr_file_module": null + }, + { + "comment_id": "1426735975", + "repo_full_name": "langchain-ai/langchainjs", + "pr_number": 3577, + "pr_file": "libs/langchain-community/src/llms/watsonx_ai.ts", + "discussion_id": "1424769745", + "commented_code": "@@ -69,48 +30,43 @@ export class WatsonxAI extends LLM {\n };\n }\n \n- endpoint: string;\n-\n- region = \"us-south\";\n-\n- version = \"2023-05-29\";\n-\n modelId = \"meta-llama/llama-2-70b-chat\";\n \n modelKwargs?: Record;\n \n- ibmCloudApiKey?: string;\n-\n- ibmCloudToken?: string;\n+ projectId!: string;\n \n- ibmCloudTokenExpiresAt?: number;\n+ modelParameters?: WatsonModelParameters;\n \n- projectId?: string;\n-\n- modelParameters?: Record;\n+ private readonly watsonApiClient: WatsonApiClient;\n \n constructor(fields: WatsonxAIParams) {\n super(fields);\n \n- this.region = fields?.region ?? this.region;", + "comment_created_at": "2023-12-14T13:42:44+00:00", + "comment_author": "faileon", + "comment_body": "Ah I get it now, yes this would be breaking to someone using the old schema. I can revert it and make it backwards compatible.\r\n\r\nRegarding the endpoint parameter that's a good catch, I agree it should be configurable, as it will most likely change once IBM releases non beta version.\r\n\r\nI have some free time during this weekend again to work on it.", + "pr_file_module": null + }, + { + "comment_id": "1427152346", + "repo_full_name": "langchain-ai/langchainjs", + "pr_number": 3577, + "pr_file": "libs/langchain-community/src/llms/watsonx_ai.ts", + "discussion_id": "1424769745", + "commented_code": "@@ -69,48 +30,43 @@ export class WatsonxAI extends LLM {\n };\n }\n \n- endpoint: string;\n-\n- region = \"us-south\";\n-\n- version = \"2023-05-29\";\n-\n modelId = \"meta-llama/llama-2-70b-chat\";\n \n modelKwargs?: Record;\n \n- ibmCloudApiKey?: string;\n-\n- ibmCloudToken?: string;\n+ projectId!: string;\n \n- ibmCloudTokenExpiresAt?: number;\n+ modelParameters?: WatsonModelParameters;\n \n- projectId?: string;\n-\n- modelParameters?: Record;\n+ private readonly watsonApiClient: WatsonApiClient;\n \n constructor(fields: WatsonxAIParams) {\n super(fields);\n \n- this.region = fields?.region ?? this.region;", + "comment_created_at": "2023-12-14T19:01:09+00:00", + "comment_author": "jacoblee93", + "comment_body": "Thank you! This is a very recent integration, but we really try to not have any breaking changes in patches unless they are completely unavoidable.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2048025111", + "pr_number": 8020, + "pr_file": "langchain-core/src/messages/index.ts", + "created_at": "2025-04-17T01:04:27+00:00", + "commented_code": "type InvalidToolCall,\n isToolMessage,\n isToolMessageChunk,\n type ToolCall,", + "repo_full_name": "langchain-ai/langchainjs", + "discussion_comments": [ + { + "comment_id": "2048025111", + "repo_full_name": "langchain-ai/langchainjs", + "pr_number": 8020, + "pr_file": "langchain-core/src/messages/index.ts", + "discussion_id": "2048025111", + "commented_code": "@@ -16,4 +16,5 @@ export {\n type InvalidToolCall,\n isToolMessage,\n isToolMessageChunk,\n+ type ToolCall,", + "comment_created_at": "2025-04-17T01:04:27+00:00", + "comment_author": "jacoblee93", + "comment_body": "This conflicts with the `ToolCall` declared in `base.ts` - you should import it like this:\r\n\r\n```ts\r\nimport { type ToolCall } from \"@langchain/core/messages/tool\";\r\n```\r\n\r\nWe will remove the old type on next breaking change.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1708007834", + "pr_number": 6133, + "pr_file": "libs/langchain-azure-cosmosdb/src/azure_cosmosdb_mongodb.ts", + "created_at": "2024-08-07T22:15:25+00:00", + "commented_code": "import {\n ObjectId,\n Collection,\n Document as MongoDBDocument,\n MongoClient,\n Db,\n Filter,\n} from \"mongodb\";\nimport type { EmbeddingsInterface } from \"@langchain/core/embeddings\";\nimport {\n MaxMarginalRelevanceSearchOptions,\n VectorStore,\n} from \"@langchain/core/vectorstores\";\nimport { Document, DocumentInterface } from \"@langchain/core/documents\";\nimport { maximalMarginalRelevance } from \"@langchain/core/utils/math\";\nimport { getEnvironmentVariable } from \"@langchain/core/utils/env\";\n\n/** Azure Cosmos DB for MongoDB vCore Similarity type. */\nexport const AzureCosmosDBMongoDBSimilarityType = {\n /** Cosine similarity */\n COS: \"COS\",\n /** Inner - product */\n IP: \"IP\",\n /** Euclidian distance */\n L2: \"L2\",\n} as const;\n\n/** Azure Cosmos DB for MongoDB vCore Similarity type. */\nexport type AzureCosmosDBMongoDBSimilarityType =\n (typeof AzureCosmosDBMongoDBSimilarityType)[keyof typeof AzureCosmosDBMongoDBSimilarityType];\n\n/** Azure Cosmos DB for MongoDB vCore Index Options. */\nexport type AzureCosmosDBMongoDBIndexOptions = {\n /** Skips automatic index creation. */\n readonly skipCreate?: boolean;\n /** Number of clusters that the inverted file (IVF) index uses to group the vector data. */\n readonly numLists?: number;\n /** Number of dimensions for vector similarity. */\n readonly dimensions?: number;\n /** Similarity metric to use with the IVF index. */\n readonly similarity?: AzureCosmosDBMongoDBSimilarityType;\n};\n\n/** Azure Cosmos DB for MongoDB vCore delete Parameters. */\nexport type AzureCosmosDBMongoDBDeleteParams = {\n /** List of IDs for the documents to be removed. */\n readonly ids?: string | string[];\n /** MongoDB filter object or list of IDs for the documents to be removed. */\n readonly filter?: Filter;\n};\n\n/** Configuration options for the `AzureCosmosDBMongoDBVectorStore` constructor. */\nexport interface AzureCosmosDBMongoDBConfig {\n readonly client?: MongoClient;\n readonly connectionString?: string;\n readonly databaseName?: string;\n readonly collectionName?: string;\n readonly indexName?: string;\n readonly textKey?: string;\n readonly embeddingKey?: string;\n readonly indexOptions?: AzureCosmosDBMongoDBIndexOptions;\n}\n\n/**\n * Azure Cosmos DB for MongoDB vCore vector store.\n * To use this, you should have both:\n * - the `mongodb` NPM package installed\n * - a connection string associated with a MongoDB VCore Cluster\n *\n * You do not need to create a database or collection, it will be created\n * automatically.\n *\n * You also need an index on the collection, which is by default be created\n * automatically using the `createIndex` method.\n */\nexport class AzureCosmosDBMongoDBVectorStore extends VectorStore {\n get lc_secrets(): { [key: string]: string } {\n return {\n connectionString: \"AZURE_COSMOSDB_MONGODB_CONNECTION_STRING\",\n };\n }\n\n private connectPromise: Promise;\n\n private initPromise: Promise;\n\n private readonly client: MongoClient | undefined;\n\n private database: Db;\n\n private collection: Collection;\n\n readonly indexName: string;\n\n readonly textKey: string;\n\n readonly embeddingKey: string;\n\n private readonly indexOptions: AzureCosmosDBMongoDBIndexOptions;\n\n /**\n * Initializes the AzureCosmosDBMongoDBVectorStore.\n * Connect the client to the database and create the container, creating them if needed.\n * @returns A promise that resolves when the AzureCosmosDBMongoDBVectorStore has been initialized.\n */\n initialize: () => Promise;\n\n _vectorstoreType(): string {\n return \"azure_cosmosdb_mongodb\";\n }\n\n constructor(\n embeddings: EmbeddingsInterface,\n dbConfig: AzureCosmosDBMongoDBConfig\n ) {\n super(embeddings, dbConfig);\n\n const connectionString =\n dbConfig.connectionString ??\n getEnvironmentVariable(\"AZURE_COSMOSDB_MONGODB_CONNECTION_STRING\");\n\n if (!dbConfig.client && !connectionString) {\n throw new Error(\n \"AzureCosmosDBMongoDBVectorStore client or connection string must be set.\"\n );\n }\n\n if (!dbConfig.client) {\n // eslint-disable-next-line @typescript-eslint/no-non-null-assertion\n this.client = new MongoClient(connectionString!, {\n appName: \"langchainjs\",\n });\n }\n\n // eslint-disable-next-line @typescript-eslint/no-non-null-assertion\n const client = dbConfig.client || this.client!;\n const databaseName = dbConfig.databaseName ?? \"documentsDB\";\n const collectionName = dbConfig.collectionName ?? \"documents\";\n this.indexName = dbConfig.indexName ?? \"vectorSearchIndex\";\n this.textKey = dbConfig.textKey ?? \"textContent\";\n this.embeddingKey = dbConfig.embeddingKey ?? \"vectorContent\";\n this.indexOptions = dbConfig.indexOptions ?? {};\n\n // Deferring initialization to the first call to `initialize`\n this.initialize = () => {", + "repo_full_name": "langchain-ai/langchainjs", + "discussion_comments": [ + { + "comment_id": "1708007834", + "repo_full_name": "langchain-ai/langchainjs", + "pr_number": 6133, + "pr_file": "libs/langchain-azure-cosmosdb/src/azure_cosmosdb_mongodb.ts", + "discussion_id": "1708007834", + "commented_code": "@@ -0,0 +1,496 @@\n+import {\n+ ObjectId,\n+ Collection,\n+ Document as MongoDBDocument,\n+ MongoClient,\n+ Db,\n+ Filter,\n+} from \"mongodb\";\n+import type { EmbeddingsInterface } from \"@langchain/core/embeddings\";\n+import {\n+ MaxMarginalRelevanceSearchOptions,\n+ VectorStore,\n+} from \"@langchain/core/vectorstores\";\n+import { Document, DocumentInterface } from \"@langchain/core/documents\";\n+import { maximalMarginalRelevance } from \"@langchain/core/utils/math\";\n+import { getEnvironmentVariable } from \"@langchain/core/utils/env\";\n+\n+/** Azure Cosmos DB for MongoDB vCore Similarity type. */\n+export const AzureCosmosDBMongoDBSimilarityType = {\n+ /** Cosine similarity */\n+ COS: \"COS\",\n+ /** Inner - product */\n+ IP: \"IP\",\n+ /** Euclidian distance */\n+ L2: \"L2\",\n+} as const;\n+\n+/** Azure Cosmos DB for MongoDB vCore Similarity type. */\n+export type AzureCosmosDBMongoDBSimilarityType =\n+ (typeof AzureCosmosDBMongoDBSimilarityType)[keyof typeof AzureCosmosDBMongoDBSimilarityType];\n+\n+/** Azure Cosmos DB for MongoDB vCore Index Options. */\n+export type AzureCosmosDBMongoDBIndexOptions = {\n+ /** Skips automatic index creation. */\n+ readonly skipCreate?: boolean;\n+ /** Number of clusters that the inverted file (IVF) index uses to group the vector data. */\n+ readonly numLists?: number;\n+ /** Number of dimensions for vector similarity. */\n+ readonly dimensions?: number;\n+ /** Similarity metric to use with the IVF index. */\n+ readonly similarity?: AzureCosmosDBMongoDBSimilarityType;\n+};\n+\n+/** Azure Cosmos DB for MongoDB vCore delete Parameters. */\n+export type AzureCosmosDBMongoDBDeleteParams = {\n+ /** List of IDs for the documents to be removed. */\n+ readonly ids?: string | string[];\n+ /** MongoDB filter object or list of IDs for the documents to be removed. */\n+ readonly filter?: Filter;\n+};\n+\n+/** Configuration options for the `AzureCosmosDBMongoDBVectorStore` constructor. */\n+export interface AzureCosmosDBMongoDBConfig {\n+ readonly client?: MongoClient;\n+ readonly connectionString?: string;\n+ readonly databaseName?: string;\n+ readonly collectionName?: string;\n+ readonly indexName?: string;\n+ readonly textKey?: string;\n+ readonly embeddingKey?: string;\n+ readonly indexOptions?: AzureCosmosDBMongoDBIndexOptions;\n+}\n+\n+/**\n+ * Azure Cosmos DB for MongoDB vCore vector store.\n+ * To use this, you should have both:\n+ * - the `mongodb` NPM package installed\n+ * - a connection string associated with a MongoDB VCore Cluster\n+ *\n+ * You do not need to create a database or collection, it will be created\n+ * automatically.\n+ *\n+ * You also need an index on the collection, which is by default be created\n+ * automatically using the `createIndex` method.\n+ */\n+export class AzureCosmosDBMongoDBVectorStore extends VectorStore {\n+ get lc_secrets(): { [key: string]: string } {\n+ return {\n+ connectionString: \"AZURE_COSMOSDB_MONGODB_CONNECTION_STRING\",\n+ };\n+ }\n+\n+ private connectPromise: Promise;\n+\n+ private initPromise: Promise;\n+\n+ private readonly client: MongoClient | undefined;\n+\n+ private database: Db;\n+\n+ private collection: Collection;\n+\n+ readonly indexName: string;\n+\n+ readonly textKey: string;\n+\n+ readonly embeddingKey: string;\n+\n+ private readonly indexOptions: AzureCosmosDBMongoDBIndexOptions;\n+\n+ /**\n+ * Initializes the AzureCosmosDBMongoDBVectorStore.\n+ * Connect the client to the database and create the container, creating them if needed.\n+ * @returns A promise that resolves when the AzureCosmosDBMongoDBVectorStore has been initialized.\n+ */\n+ initialize: () => Promise;\n+\n+ _vectorstoreType(): string {\n+ return \"azure_cosmosdb_mongodb\";\n+ }\n+\n+ constructor(\n+ embeddings: EmbeddingsInterface,\n+ dbConfig: AzureCosmosDBMongoDBConfig\n+ ) {\n+ super(embeddings, dbConfig);\n+\n+ const connectionString =\n+ dbConfig.connectionString ??\n+ getEnvironmentVariable(\"AZURE_COSMOSDB_MONGODB_CONNECTION_STRING\");\n+\n+ if (!dbConfig.client && !connectionString) {\n+ throw new Error(\n+ \"AzureCosmosDBMongoDBVectorStore client or connection string must be set.\"\n+ );\n+ }\n+\n+ if (!dbConfig.client) {\n+ // eslint-disable-next-line @typescript-eslint/no-non-null-assertion\n+ this.client = new MongoClient(connectionString!, {\n+ appName: \"langchainjs\",\n+ });\n+ }\n+\n+ // eslint-disable-next-line @typescript-eslint/no-non-null-assertion\n+ const client = dbConfig.client || this.client!;\n+ const databaseName = dbConfig.databaseName ?? \"documentsDB\";\n+ const collectionName = dbConfig.collectionName ?? \"documents\";\n+ this.indexName = dbConfig.indexName ?? \"vectorSearchIndex\";\n+ this.textKey = dbConfig.textKey ?? \"textContent\";\n+ this.embeddingKey = dbConfig.embeddingKey ?? \"vectorContent\";\n+ this.indexOptions = dbConfig.indexOptions ?? {};\n+\n+ // Deferring initialization to the first call to `initialize`\n+ this.initialize = () => {", + "comment_created_at": "2024-08-07T22:15:25+00:00", + "comment_author": "jacoblee93", + "comment_body": "Would prefer this as a static method like this:\r\n\r\nhttps://v02.api.js.langchain.com/classes/langchain_community_vectorstores_pgvector.PGVectorStore.html#initialize\r\n\r\nSince I'm not sure this will show up well in API refs, but won't block on it", + "pr_file_module": null + }, + { + "comment_id": "1708897351", + "repo_full_name": "langchain-ai/langchainjs", + "pr_number": 6133, + "pr_file": "libs/langchain-azure-cosmosdb/src/azure_cosmosdb_mongodb.ts", + "discussion_id": "1708007834", + "commented_code": "@@ -0,0 +1,496 @@\n+import {\n+ ObjectId,\n+ Collection,\n+ Document as MongoDBDocument,\n+ MongoClient,\n+ Db,\n+ Filter,\n+} from \"mongodb\";\n+import type { EmbeddingsInterface } from \"@langchain/core/embeddings\";\n+import {\n+ MaxMarginalRelevanceSearchOptions,\n+ VectorStore,\n+} from \"@langchain/core/vectorstores\";\n+import { Document, DocumentInterface } from \"@langchain/core/documents\";\n+import { maximalMarginalRelevance } from \"@langchain/core/utils/math\";\n+import { getEnvironmentVariable } from \"@langchain/core/utils/env\";\n+\n+/** Azure Cosmos DB for MongoDB vCore Similarity type. */\n+export const AzureCosmosDBMongoDBSimilarityType = {\n+ /** Cosine similarity */\n+ COS: \"COS\",\n+ /** Inner - product */\n+ IP: \"IP\",\n+ /** Euclidian distance */\n+ L2: \"L2\",\n+} as const;\n+\n+/** Azure Cosmos DB for MongoDB vCore Similarity type. */\n+export type AzureCosmosDBMongoDBSimilarityType =\n+ (typeof AzureCosmosDBMongoDBSimilarityType)[keyof typeof AzureCosmosDBMongoDBSimilarityType];\n+\n+/** Azure Cosmos DB for MongoDB vCore Index Options. */\n+export type AzureCosmosDBMongoDBIndexOptions = {\n+ /** Skips automatic index creation. */\n+ readonly skipCreate?: boolean;\n+ /** Number of clusters that the inverted file (IVF) index uses to group the vector data. */\n+ readonly numLists?: number;\n+ /** Number of dimensions for vector similarity. */\n+ readonly dimensions?: number;\n+ /** Similarity metric to use with the IVF index. */\n+ readonly similarity?: AzureCosmosDBMongoDBSimilarityType;\n+};\n+\n+/** Azure Cosmos DB for MongoDB vCore delete Parameters. */\n+export type AzureCosmosDBMongoDBDeleteParams = {\n+ /** List of IDs for the documents to be removed. */\n+ readonly ids?: string | string[];\n+ /** MongoDB filter object or list of IDs for the documents to be removed. */\n+ readonly filter?: Filter;\n+};\n+\n+/** Configuration options for the `AzureCosmosDBMongoDBVectorStore` constructor. */\n+export interface AzureCosmosDBMongoDBConfig {\n+ readonly client?: MongoClient;\n+ readonly connectionString?: string;\n+ readonly databaseName?: string;\n+ readonly collectionName?: string;\n+ readonly indexName?: string;\n+ readonly textKey?: string;\n+ readonly embeddingKey?: string;\n+ readonly indexOptions?: AzureCosmosDBMongoDBIndexOptions;\n+}\n+\n+/**\n+ * Azure Cosmos DB for MongoDB vCore vector store.\n+ * To use this, you should have both:\n+ * - the `mongodb` NPM package installed\n+ * - a connection string associated with a MongoDB VCore Cluster\n+ *\n+ * You do not need to create a database or collection, it will be created\n+ * automatically.\n+ *\n+ * You also need an index on the collection, which is by default be created\n+ * automatically using the `createIndex` method.\n+ */\n+export class AzureCosmosDBMongoDBVectorStore extends VectorStore {\n+ get lc_secrets(): { [key: string]: string } {\n+ return {\n+ connectionString: \"AZURE_COSMOSDB_MONGODB_CONNECTION_STRING\",\n+ };\n+ }\n+\n+ private connectPromise: Promise;\n+\n+ private initPromise: Promise;\n+\n+ private readonly client: MongoClient | undefined;\n+\n+ private database: Db;\n+\n+ private collection: Collection;\n+\n+ readonly indexName: string;\n+\n+ readonly textKey: string;\n+\n+ readonly embeddingKey: string;\n+\n+ private readonly indexOptions: AzureCosmosDBMongoDBIndexOptions;\n+\n+ /**\n+ * Initializes the AzureCosmosDBMongoDBVectorStore.\n+ * Connect the client to the database and create the container, creating them if needed.\n+ * @returns A promise that resolves when the AzureCosmosDBMongoDBVectorStore has been initialized.\n+ */\n+ initialize: () => Promise;\n+\n+ _vectorstoreType(): string {\n+ return \"azure_cosmosdb_mongodb\";\n+ }\n+\n+ constructor(\n+ embeddings: EmbeddingsInterface,\n+ dbConfig: AzureCosmosDBMongoDBConfig\n+ ) {\n+ super(embeddings, dbConfig);\n+\n+ const connectionString =\n+ dbConfig.connectionString ??\n+ getEnvironmentVariable(\"AZURE_COSMOSDB_MONGODB_CONNECTION_STRING\");\n+\n+ if (!dbConfig.client && !connectionString) {\n+ throw new Error(\n+ \"AzureCosmosDBMongoDBVectorStore client or connection string must be set.\"\n+ );\n+ }\n+\n+ if (!dbConfig.client) {\n+ // eslint-disable-next-line @typescript-eslint/no-non-null-assertion\n+ this.client = new MongoClient(connectionString!, {\n+ appName: \"langchainjs\",\n+ });\n+ }\n+\n+ // eslint-disable-next-line @typescript-eslint/no-non-null-assertion\n+ const client = dbConfig.client || this.client!;\n+ const databaseName = dbConfig.databaseName ?? \"documentsDB\";\n+ const collectionName = dbConfig.collectionName ?? \"documents\";\n+ this.indexName = dbConfig.indexName ?? \"vectorSearchIndex\";\n+ this.textKey = dbConfig.textKey ?? \"textContent\";\n+ this.embeddingKey = dbConfig.embeddingKey ?? \"vectorContent\";\n+ this.indexOptions = dbConfig.indexOptions ?? {};\n+\n+ // Deferring initialization to the first call to `initialize`\n+ this.initialize = () => {", + "comment_created_at": "2024-08-08T08:04:50+00:00", + "comment_author": "sinedied", + "comment_body": "Would you be ok with a middle ground approach?\r\n- make the current `initialize` method I made internal\r\n- add a new static `initialize` method the calls constructor + init/connect the db connection\r\n- keep the \"automatic\" init when calling any VS method in case it wasn't done before (for compatibility / simplicity)\r\n- explain how the init works in the docs\r\n\r\nMy reasoning: I'd like to make the migration from the previous implementation painless (ie keep the auto init) and also make it work for folks who skim quickly through docs and just call the constructor without using the `initialize` method", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1581599504", + "pr_number": 5197, + "pr_file": "libs/langchain-community/src/vectorstores/azure_cosmosdb.ts", + "created_at": "2024-04-26T23:02:14+00:00", + "commented_code": "/**\n * Removes specified documents from the AzureCosmosDBVectorStore.\n * @param ids IDs of the documents to be removed. If no IDs are specified,\n * all documents will be removed.\n * If no IDs or filter are specified, all documents will be removed.\n * @param ids A list of IDs for the documents to be removed.\n * @param filter A MongoDB filter object or list of IDs for the documents to be removed.\n * @returns A promise that resolves when the documents have been removed.\n */\n async delete(ids?: string[]): Promise {\n async delete(\n ids?: string | string[],", + "repo_full_name": "langchain-ai/langchainjs", + "discussion_comments": [ + { + "comment_id": "1581599504", + "repo_full_name": "langchain-ai/langchainjs", + "pr_number": 5197, + "pr_file": "libs/langchain-community/src/vectorstores/azure_cosmosdb.ts", + "discussion_id": "1581599504", + "commented_code": "@@ -205,19 +233,27 @@ export class AzureCosmosDBVectorStore extends VectorStore {\n \n /**\n * Removes specified documents from the AzureCosmosDBVectorStore.\n- * @param ids IDs of the documents to be removed. If no IDs are specified,\n- * all documents will be removed.\n+ * If no IDs or filter are specified, all documents will be removed.\n+ * @param ids A list of IDs for the documents to be removed.\n+ * @param filter A MongoDB filter object or list of IDs for the documents to be removed.\n * @returns A promise that resolves when the documents have been removed.\n */\n- async delete(ids?: string[]): Promise {\n+ async delete(\n+ ids?: string | string[],", + "comment_created_at": "2024-04-26T23:02:14+00:00", + "comment_author": "jacoblee93", + "comment_body": "Hey sorry, no I mean a single param like this so as not to break the interface:\r\n\r\nhttps://github.com/langchain-ai/langchainjs/blob/main/libs/langchain-community/src/vectorstores/pinecone.ts#L174", + "pr_file_module": null + }, + { + "comment_id": "1583227571", + "repo_full_name": "langchain-ai/langchainjs", + "pr_number": 5197, + "pr_file": "libs/langchain-community/src/vectorstores/azure_cosmosdb.ts", + "discussion_id": "1581599504", + "commented_code": "@@ -205,19 +233,27 @@ export class AzureCosmosDBVectorStore extends VectorStore {\n \n /**\n * Removes specified documents from the AzureCosmosDBVectorStore.\n- * @param ids IDs of the documents to be removed. If no IDs are specified,\n- * all documents will be removed.\n+ * If no IDs or filter are specified, all documents will be removed.\n+ * @param ids A list of IDs for the documents to be removed.\n+ * @param filter A MongoDB filter object or list of IDs for the documents to be removed.\n * @returns A promise that resolves when the documents have been removed.\n */\n- async delete(ids?: string[]): Promise {\n+ async delete(\n+ ids?: string | string[],", + "comment_created_at": "2024-04-29T14:55:22+00:00", + "comment_author": "sinedied", + "comment_body": "Done, I updated and rebased the PR", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1583971678", + "pr_number": 5197, + "pr_file": "libs/langchain-community/src/vectorstores/azure_cosmosdb.ts", + "created_at": "2024-04-30T00:17:08+00:00", + "commented_code": "/**\n * Removes specified documents from the AzureCosmosDBVectorStore.\n * @param ids IDs of the documents to be removed. If no IDs are specified,\n * all documents will be removed.\n * If no IDs or filter are specified, all documents will be removed.\n * @param params Parameters for the delete operation.\n * @returns A promise that resolves when the documents have been removed.\n */\n async delete(ids?: string[]): Promise {\n async delete(params: AzureCosmosDBDeleteParams = {}): Promise {", + "repo_full_name": "langchain-ai/langchainjs", + "discussion_comments": [ + { + "comment_id": "1583971678", + "repo_full_name": "langchain-ai/langchainjs", + "pr_number": 5197, + "pr_file": "libs/langchain-community/src/vectorstores/azure_cosmosdb.ts", + "discussion_id": "1583971678", + "commented_code": "@@ -205,19 +241,24 @@ export class AzureCosmosDBVectorStore extends VectorStore {\n \n /**\n * Removes specified documents from the AzureCosmosDBVectorStore.\n- * @param ids IDs of the documents to be removed. If no IDs are specified,\n- * all documents will be removed.\n+ * If no IDs or filter are specified, all documents will be removed.\n+ * @param params Parameters for the delete operation.\n * @returns A promise that resolves when the documents have been removed.\n */\n- async delete(ids?: string[]): Promise {\n+ async delete(params: AzureCosmosDBDeleteParams = {}): Promise {", + "comment_created_at": "2024-04-30T00:17:08+00:00", + "comment_author": "bracesproul", + "comment_body": "This will be a breaking change. Instead we should overload this func so both input types are allowed, and we can narrow the type inside of the method.", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/langchainjs-preserve-api-backward-compatibility.md b/_reviewers/langchainjs-preserve-api-backward-compatibility.md index 8aa6087..0cf99ae 100644 --- a/_reviewers/langchainjs-preserve-api-backward-compatibility.md +++ b/_reviewers/langchainjs-preserve-api-backward-compatibility.md @@ -42,245 +42,3 @@ class Store { ``` This approach allows gradual migration to new patterns while maintaining existing integrations. When deprecating functionality, provide clear migration paths and timeline in documentation. - - -[ - { - "discussion_id": "1433346407", - "pr_number": 3448, - "pr_file": "langchain/src/chat_models/azure_ml.ts", - "created_at": "2023-12-21T01:36:53+00:00", - "commented_code": "import { SimpleChatModel, BaseChatModelParams } from \"./base.js\";\nimport { AzureMLHttpClient } from \"../llms/azure_ml.js\";\nimport { getEnvironmentVariable } from \"../util/env.js\";\nimport { BaseMessage } from \"../schema/index.js\";\n\nexport interface ChatContentFormatter {\n /**\n * Formats the request payload for the AzureML endpoint. It takes a\n * prompt and a dictionary of model arguments as input and returns a\n * string representing the formatted request payload.\n * @param messages A list of messages for the chat so far.\n * @param modelArgs A dictionary of model arguments.\n * @returns A string representing the formatted request payload.\n */\n formatRequestPayload: (\n messages: BaseMessage[],\n modelArgs: Record\n ) => string;\n /**\n * Formats the response payload from the AzureML endpoint. It takes a\n * response payload as input and returns a string representing the\n * formatted response.\n * @param responsePayload The response payload from the AzureML endpoint.\n * @returns A string representing the formatted response.\n */\n formatResponsePayload: (output: string) => string;\n}\n\nexport class LlamaContentFormatter implements ChatContentFormatter {\n _convertMessageToRecord(message: BaseMessage): Record {\n if (message._getType() === \"human\") {\n return { role: \"user\", content: message.content };\n } else if (message._getType() === \"ai\") {\n return { role: \"assistant\", content: message.content };\n } else {\n return { role: message._getType(), content: message.content };\n }\n }\n\n formatRequestPayload(\n messages: BaseMessage[],\n modelArgs: Record\n ): string {\n let msgs = messages.map((message) => {\n this._convertMessageToRecord(message);\n });\n return JSON.stringify({\n input_data: {\n input_string: msgs,\n parameters: modelArgs,\n },\n });\n }\n\n formatResponsePayload(responsePayload: string) {\n const response = JSON.parse(responsePayload);\n return response.output;\n }\n}\n\n/**\n * Type definition for the input parameters of the AzureMLChatOnlineEndpoint class.\n */\nexport interface AzureMLChatParams extends BaseChatModelParams {\n endpointUrl?: string;\n endpointApiKey?: string;\n modelArgs?: Record;\n contentFormatter?: ChatContentFormatter;\n}\n\n/**\n * Class that represents the chat model. It extends the SimpleChatModel class and implements the AzureMLChatInput interface.\n */\nexport class AzureMLChatOnlineEndpoint\n extends SimpleChatModel\n implements AzureMLChatParams\n{\n static lc_name() {\n return \"AzureMLChatOnlineEndpoint\";\n }\n static lc_description() {\n return \"A class for interacting with AzureML Chat models.\";\n }\n endpointUrl: string;\n endpointApiKey: string;\n modelArgs?: Record;\n contentFormatter: ChatContentFormatter;\n httpClient: AzureMLHttpClient;\n\n constructor(fields: AzureMLChatParams) {\n super(fields ?? {});\n if (!fields?.endpointUrl && !getEnvironmentVariable(\"AZUREML_URL\")) {\n throw new Error(\"No Azure ML Url found.\");\n }\n if (!fields?.endpointApiKey && !getEnvironmentVariable(\"AZUREML_API_KEY\")) {\n throw new Error(\"No Azure ML ApiKey found.\");\n }\n if (!fields?.contentFormatter) {\n throw new Error(\"No Content Formatter provided.\");", - "repo_full_name": "langchain-ai/langchainjs", - "discussion_comments": [ - { - "comment_id": "1433346407", - "repo_full_name": "langchain-ai/langchainjs", - "pr_number": 3448, - "pr_file": "langchain/src/chat_models/azure_ml.ts", - "discussion_id": "1433346407", - "commented_code": "@@ -0,0 +1,142 @@\n+import { SimpleChatModel, BaseChatModelParams } from \"./base.js\";\n+import { AzureMLHttpClient } from \"../llms/azure_ml.js\";\n+import { getEnvironmentVariable } from \"../util/env.js\";\n+import { BaseMessage } from \"../schema/index.js\";\n+\n+export interface ChatContentFormatter {\n+ /**\n+ * Formats the request payload for the AzureML endpoint. It takes a\n+ * prompt and a dictionary of model arguments as input and returns a\n+ * string representing the formatted request payload.\n+ * @param messages A list of messages for the chat so far.\n+ * @param modelArgs A dictionary of model arguments.\n+ * @returns A string representing the formatted request payload.\n+ */\n+ formatRequestPayload: (\n+ messages: BaseMessage[],\n+ modelArgs: Record\n+ ) => string;\n+ /**\n+ * Formats the response payload from the AzureML endpoint. It takes a\n+ * response payload as input and returns a string representing the\n+ * formatted response.\n+ * @param responsePayload The response payload from the AzureML endpoint.\n+ * @returns A string representing the formatted response.\n+ */\n+ formatResponsePayload: (output: string) => string;\n+}\n+\n+export class LlamaContentFormatter implements ChatContentFormatter {\n+ _convertMessageToRecord(message: BaseMessage): Record {\n+ if (message._getType() === \"human\") {\n+ return { role: \"user\", content: message.content };\n+ } else if (message._getType() === \"ai\") {\n+ return { role: \"assistant\", content: message.content };\n+ } else {\n+ return { role: message._getType(), content: message.content };\n+ }\n+ }\n+\n+ formatRequestPayload(\n+ messages: BaseMessage[],\n+ modelArgs: Record\n+ ): string {\n+ let msgs = messages.map((message) => {\n+ this._convertMessageToRecord(message);\n+ });\n+ return JSON.stringify({\n+ input_data: {\n+ input_string: msgs,\n+ parameters: modelArgs,\n+ },\n+ });\n+ }\n+\n+ formatResponsePayload(responsePayload: string) {\n+ const response = JSON.parse(responsePayload);\n+ return response.output;\n+ }\n+}\n+\n+/**\n+ * Type definition for the input parameters of the AzureMLChatOnlineEndpoint class.\n+ */\n+export interface AzureMLChatParams extends BaseChatModelParams {\n+ endpointUrl?: string;\n+ endpointApiKey?: string;\n+ modelArgs?: Record;\n+ contentFormatter?: ChatContentFormatter;\n+}\n+\n+/**\n+ * Class that represents the chat model. It extends the SimpleChatModel class and implements the AzureMLChatInput interface.\n+ */\n+export class AzureMLChatOnlineEndpoint\n+ extends SimpleChatModel\n+ implements AzureMLChatParams\n+{\n+ static lc_name() {\n+ return \"AzureMLChatOnlineEndpoint\";\n+ }\n+ static lc_description() {\n+ return \"A class for interacting with AzureML Chat models.\";\n+ }\n+ endpointUrl: string;\n+ endpointApiKey: string;\n+ modelArgs?: Record;\n+ contentFormatter: ChatContentFormatter;\n+ httpClient: AzureMLHttpClient;\n+\n+ constructor(fields: AzureMLChatParams) {\n+ super(fields ?? {});\n+ if (!fields?.endpointUrl && !getEnvironmentVariable(\"AZUREML_URL\")) {\n+ throw new Error(\"No Azure ML Url found.\");\n+ }\n+ if (!fields?.endpointApiKey && !getEnvironmentVariable(\"AZUREML_API_KEY\")) {\n+ throw new Error(\"No Azure ML ApiKey found.\");\n+ }\n+ if (!fields?.contentFormatter) {\n+ throw new Error(\"No Content Formatter provided.\");", - "comment_created_at": "2023-12-21T01:36:53+00:00", - "comment_author": "bracesproul", - "comment_body": "Instead of this check it should be a required param in the `AzureMLChatParams` interface. But why are we requiring users to pass this in? Can we instead keep it optional and have a default formatter that gets used?", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2057154690", - "pr_number": 8056, - "pr_file": "libs/langchain-aws/src/tests/chat_models.int.test.ts", - "created_at": "2025-04-24T01:17:37+00:00", - "commented_code": "expect(typeof result.tool_calls![0].args.location).toBe(\"string\");\n expect(result.tool_calls![0].args.location.length).toBeGreaterThan(0);\n\n messages.push(\n new ToolMessage({\n tool_call_id: result.tool_calls![0].id!,\n content: await tools[0].invoke(result.tool_calls![0]),\n })\n );\n messages.push(await tools[0].invoke(result.tool_calls![0]));", - "repo_full_name": "langchain-ai/langchainjs", - "discussion_comments": [ - { - "comment_id": "2057154690", - "repo_full_name": "langchain-ai/langchainjs", - "pr_number": 8056, - "pr_file": "libs/langchain-aws/src/tests/chat_models.int.test.ts", - "discussion_id": "2057154690", - "commented_code": "@@ -513,12 +513,7 @@ test(\"Test ChatBedrockConverse can respond to tool invocations with thinking ena\n expect(typeof result.tool_calls![0].args.location).toBe(\"string\");\n expect(result.tool_calls![0].args.location.length).toBeGreaterThan(0);\n \n- messages.push(\n- new ToolMessage({\n- tool_call_id: result.tool_calls![0].id!,\n- content: await tools[0].invoke(result.tool_calls![0]),\n- })\n- );\n+ messages.push(await tools[0].invoke(result.tool_calls![0]));", - "comment_created_at": "2025-04-24T01:17:37+00:00", - "comment_author": "hntrl", - "comment_body": "StructuredTool.invoke will return a tool message when a tool call is passed, so we don't have to create a new tool message here", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1424769745", - "pr_number": 3577, - "pr_file": "libs/langchain-community/src/llms/watsonx_ai.ts", - "created_at": "2023-12-13T02:13:17+00:00", - "commented_code": "};\n }\n\n endpoint: string;\n\n region = \"us-south\";\n\n version = \"2023-05-29\";\n\n modelId = \"meta-llama/llama-2-70b-chat\";\n\n modelKwargs?: Record;\n\n ibmCloudApiKey?: string;\n\n ibmCloudToken?: string;\n projectId!: string;\n\n ibmCloudTokenExpiresAt?: number;\n modelParameters?: WatsonModelParameters;\n\n projectId?: string;\n\n modelParameters?: Record;\n private readonly watsonApiClient: WatsonApiClient;\n\n constructor(fields: WatsonxAIParams) {\n super(fields);\n\n this.region = fields?.region ?? this.region;", - "repo_full_name": "langchain-ai/langchainjs", - "discussion_comments": [ - { - "comment_id": "1424769745", - "repo_full_name": "langchain-ai/langchainjs", - "pr_number": 3577, - "pr_file": "libs/langchain-community/src/llms/watsonx_ai.ts", - "discussion_id": "1424769745", - "commented_code": "@@ -69,48 +30,43 @@ export class WatsonxAI extends LLM {\n };\n }\n \n- endpoint: string;\n-\n- region = \"us-south\";\n-\n- version = \"2023-05-29\";\n-\n modelId = \"meta-llama/llama-2-70b-chat\";\n \n modelKwargs?: Record;\n \n- ibmCloudApiKey?: string;\n-\n- ibmCloudToken?: string;\n+ projectId!: string;\n \n- ibmCloudTokenExpiresAt?: number;\n+ modelParameters?: WatsonModelParameters;\n \n- projectId?: string;\n-\n- modelParameters?: Record;\n+ private readonly watsonApiClient: WatsonApiClient;\n \n constructor(fields: WatsonxAIParams) {\n super(fields);\n \n- this.region = fields?.region ?? this.region;", - "comment_created_at": "2023-12-13T02:13:17+00:00", - "comment_author": "jacoblee93", - "comment_body": "Isn't removing all of these a breaking change?", - "pr_file_module": null - }, - { - "comment_id": "1424977554", - "repo_full_name": "langchain-ai/langchainjs", - "pr_number": 3577, - "pr_file": "libs/langchain-community/src/llms/watsonx_ai.ts", - "discussion_id": "1424769745", - "commented_code": "@@ -69,48 +30,43 @@ export class WatsonxAI extends LLM {\n };\n }\n \n- endpoint: string;\n-\n- region = \"us-south\";\n-\n- version = \"2023-05-29\";\n-\n modelId = \"meta-llama/llama-2-70b-chat\";\n \n modelKwargs?: Record;\n \n- ibmCloudApiKey?: string;\n-\n- ibmCloudToken?: string;\n+ projectId!: string;\n \n- ibmCloudTokenExpiresAt?: number;\n+ modelParameters?: WatsonModelParameters;\n \n- projectId?: string;\n-\n- modelParameters?: Record;\n+ private readonly watsonApiClient: WatsonApiClient;\n \n constructor(fields: WatsonxAIParams) {\n super(fields);\n \n- this.region = fields?.region ?? this.region;", - "comment_created_at": "2023-12-13T08:00:44+00:00", - "comment_author": "faileon", - "comment_body": "I moved a few attributes to the WatsonClient as it's something I also needed in the ChatModel. I did remove `lc_serializable = true;` by accident and will add it back in.\r\n\r\nI tried running the provided example at `examples/src/llms/watsonx_ai.ts` and it works like before.", - "pr_file_module": null - }, - { - "comment_id": "1425542720", - "repo_full_name": "langchain-ai/langchainjs", - "pr_number": 3577, - "pr_file": "libs/langchain-community/src/llms/watsonx_ai.ts", - "discussion_id": "1424769745", - "commented_code": "@@ -69,48 +30,43 @@ export class WatsonxAI extends LLM {\n };\n }\n \n- endpoint: string;\n-\n- region = \"us-south\";\n-\n- version = \"2023-05-29\";\n-\n modelId = \"meta-llama/llama-2-70b-chat\";\n \n modelKwargs?: Record;\n \n- ibmCloudApiKey?: string;\n-\n- ibmCloudToken?: string;\n+ projectId!: string;\n \n- ibmCloudTokenExpiresAt?: number;\n+ modelParameters?: WatsonModelParameters;\n \n- projectId?: string;\n-\n- modelParameters?: Record;\n+ private readonly watsonApiClient: WatsonApiClient;\n \n constructor(fields: WatsonxAIParams) {\n super(fields);\n \n- this.region = fields?.region ?? this.region;", - "comment_created_at": "2023-12-13T15:44:14+00:00", - "comment_author": "chasemcdo", - "comment_body": "It is breaking in the sense that if someone is dependent on the old schema they'll need to update it, but most of the functionality is there, but in a now more reusable format.\r\n\r\nOne thing that I am noticing missing is there was originally an [endpoint](https://github.com/langchain-ai/langchainjs/blob/06a39117aae61fcbf26a537de4bad8a156bbb9ab/libs/langchain-community/src/llms/watsonx_ai.ts#L14-L17) parameter which would allow the end user to overwrite the entire url if they so desired.\r\n\r\nI had included this since the production endpoint is still labeled as \"beta\", so this would allow someone to migrate to a different (potentially more stable) endpoint as soon as it is released without the need to submit a PR to LangChain.\r\n\r\nThoughts on reintroducing that @faileon ?", - "pr_file_module": null - }, - { - "comment_id": "1425755461", - "repo_full_name": "langchain-ai/langchainjs", - "pr_number": 3577, - "pr_file": "libs/langchain-community/src/llms/watsonx_ai.ts", - "discussion_id": "1424769745", - "commented_code": "@@ -69,48 +30,43 @@ export class WatsonxAI extends LLM {\n };\n }\n \n- endpoint: string;\n-\n- region = \"us-south\";\n-\n- version = \"2023-05-29\";\n-\n modelId = \"meta-llama/llama-2-70b-chat\";\n \n modelKwargs?: Record;\n \n- ibmCloudApiKey?: string;\n-\n- ibmCloudToken?: string;\n+ projectId!: string;\n \n- ibmCloudTokenExpiresAt?: number;\n+ modelParameters?: WatsonModelParameters;\n \n- projectId?: string;\n-\n- modelParameters?: Record;\n+ private readonly watsonApiClient: WatsonApiClient;\n \n constructor(fields: WatsonxAIParams) {\n super(fields);\n \n- this.region = fields?.region ?? this.region;", - "comment_created_at": "2023-12-13T18:48:59+00:00", - "comment_author": "jacoblee93", - "comment_body": "Not sure what current usage is, but would love a shim for principle's sake. I can try to have a look later if you don't have time.", - "pr_file_module": null - }, - { - "comment_id": "1426735975", - "repo_full_name": "langchain-ai/langchainjs", - "pr_number": 3577, - "pr_file": "libs/langchain-community/src/llms/watsonx_ai.ts", - "discussion_id": "1424769745", - "commented_code": "@@ -69,48 +30,43 @@ export class WatsonxAI extends LLM {\n };\n }\n \n- endpoint: string;\n-\n- region = \"us-south\";\n-\n- version = \"2023-05-29\";\n-\n modelId = \"meta-llama/llama-2-70b-chat\";\n \n modelKwargs?: Record;\n \n- ibmCloudApiKey?: string;\n-\n- ibmCloudToken?: string;\n+ projectId!: string;\n \n- ibmCloudTokenExpiresAt?: number;\n+ modelParameters?: WatsonModelParameters;\n \n- projectId?: string;\n-\n- modelParameters?: Record;\n+ private readonly watsonApiClient: WatsonApiClient;\n \n constructor(fields: WatsonxAIParams) {\n super(fields);\n \n- this.region = fields?.region ?? this.region;", - "comment_created_at": "2023-12-14T13:42:44+00:00", - "comment_author": "faileon", - "comment_body": "Ah I get it now, yes this would be breaking to someone using the old schema. I can revert it and make it backwards compatible.\r\n\r\nRegarding the endpoint parameter that's a good catch, I agree it should be configurable, as it will most likely change once IBM releases non beta version.\r\n\r\nI have some free time during this weekend again to work on it.", - "pr_file_module": null - }, - { - "comment_id": "1427152346", - "repo_full_name": "langchain-ai/langchainjs", - "pr_number": 3577, - "pr_file": "libs/langchain-community/src/llms/watsonx_ai.ts", - "discussion_id": "1424769745", - "commented_code": "@@ -69,48 +30,43 @@ export class WatsonxAI extends LLM {\n };\n }\n \n- endpoint: string;\n-\n- region = \"us-south\";\n-\n- version = \"2023-05-29\";\n-\n modelId = \"meta-llama/llama-2-70b-chat\";\n \n modelKwargs?: Record;\n \n- ibmCloudApiKey?: string;\n-\n- ibmCloudToken?: string;\n+ projectId!: string;\n \n- ibmCloudTokenExpiresAt?: number;\n+ modelParameters?: WatsonModelParameters;\n \n- projectId?: string;\n-\n- modelParameters?: Record;\n+ private readonly watsonApiClient: WatsonApiClient;\n \n constructor(fields: WatsonxAIParams) {\n super(fields);\n \n- this.region = fields?.region ?? this.region;", - "comment_created_at": "2023-12-14T19:01:09+00:00", - "comment_author": "jacoblee93", - "comment_body": "Thank you! This is a very recent integration, but we really try to not have any breaking changes in patches unless they are completely unavoidable.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2048025111", - "pr_number": 8020, - "pr_file": "langchain-core/src/messages/index.ts", - "created_at": "2025-04-17T01:04:27+00:00", - "commented_code": "type InvalidToolCall,\n isToolMessage,\n isToolMessageChunk,\n type ToolCall,", - "repo_full_name": "langchain-ai/langchainjs", - "discussion_comments": [ - { - "comment_id": "2048025111", - "repo_full_name": "langchain-ai/langchainjs", - "pr_number": 8020, - "pr_file": "langchain-core/src/messages/index.ts", - "discussion_id": "2048025111", - "commented_code": "@@ -16,4 +16,5 @@ export {\n type InvalidToolCall,\n isToolMessage,\n isToolMessageChunk,\n+ type ToolCall,", - "comment_created_at": "2025-04-17T01:04:27+00:00", - "comment_author": "jacoblee93", - "comment_body": "This conflicts with the `ToolCall` declared in `base.ts` - you should import it like this:\r\n\r\n```ts\r\nimport { type ToolCall } from \"@langchain/core/messages/tool\";\r\n```\r\n\r\nWe will remove the old type on next breaking change.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1708007834", - "pr_number": 6133, - "pr_file": "libs/langchain-azure-cosmosdb/src/azure_cosmosdb_mongodb.ts", - "created_at": "2024-08-07T22:15:25+00:00", - "commented_code": "import {\n ObjectId,\n Collection,\n Document as MongoDBDocument,\n MongoClient,\n Db,\n Filter,\n} from \"mongodb\";\nimport type { EmbeddingsInterface } from \"@langchain/core/embeddings\";\nimport {\n MaxMarginalRelevanceSearchOptions,\n VectorStore,\n} from \"@langchain/core/vectorstores\";\nimport { Document, DocumentInterface } from \"@langchain/core/documents\";\nimport { maximalMarginalRelevance } from \"@langchain/core/utils/math\";\nimport { getEnvironmentVariable } from \"@langchain/core/utils/env\";\n\n/** Azure Cosmos DB for MongoDB vCore Similarity type. */\nexport const AzureCosmosDBMongoDBSimilarityType = {\n /** Cosine similarity */\n COS: \"COS\",\n /** Inner - product */\n IP: \"IP\",\n /** Euclidian distance */\n L2: \"L2\",\n} as const;\n\n/** Azure Cosmos DB for MongoDB vCore Similarity type. */\nexport type AzureCosmosDBMongoDBSimilarityType =\n (typeof AzureCosmosDBMongoDBSimilarityType)[keyof typeof AzureCosmosDBMongoDBSimilarityType];\n\n/** Azure Cosmos DB for MongoDB vCore Index Options. */\nexport type AzureCosmosDBMongoDBIndexOptions = {\n /** Skips automatic index creation. */\n readonly skipCreate?: boolean;\n /** Number of clusters that the inverted file (IVF) index uses to group the vector data. */\n readonly numLists?: number;\n /** Number of dimensions for vector similarity. */\n readonly dimensions?: number;\n /** Similarity metric to use with the IVF index. */\n readonly similarity?: AzureCosmosDBMongoDBSimilarityType;\n};\n\n/** Azure Cosmos DB for MongoDB vCore delete Parameters. */\nexport type AzureCosmosDBMongoDBDeleteParams = {\n /** List of IDs for the documents to be removed. */\n readonly ids?: string | string[];\n /** MongoDB filter object or list of IDs for the documents to be removed. */\n readonly filter?: Filter;\n};\n\n/** Configuration options for the `AzureCosmosDBMongoDBVectorStore` constructor. */\nexport interface AzureCosmosDBMongoDBConfig {\n readonly client?: MongoClient;\n readonly connectionString?: string;\n readonly databaseName?: string;\n readonly collectionName?: string;\n readonly indexName?: string;\n readonly textKey?: string;\n readonly embeddingKey?: string;\n readonly indexOptions?: AzureCosmosDBMongoDBIndexOptions;\n}\n\n/**\n * Azure Cosmos DB for MongoDB vCore vector store.\n * To use this, you should have both:\n * - the `mongodb` NPM package installed\n * - a connection string associated with a MongoDB VCore Cluster\n *\n * You do not need to create a database or collection, it will be created\n * automatically.\n *\n * You also need an index on the collection, which is by default be created\n * automatically using the `createIndex` method.\n */\nexport class AzureCosmosDBMongoDBVectorStore extends VectorStore {\n get lc_secrets(): { [key: string]: string } {\n return {\n connectionString: \"AZURE_COSMOSDB_MONGODB_CONNECTION_STRING\",\n };\n }\n\n private connectPromise: Promise;\n\n private initPromise: Promise;\n\n private readonly client: MongoClient | undefined;\n\n private database: Db;\n\n private collection: Collection;\n\n readonly indexName: string;\n\n readonly textKey: string;\n\n readonly embeddingKey: string;\n\n private readonly indexOptions: AzureCosmosDBMongoDBIndexOptions;\n\n /**\n * Initializes the AzureCosmosDBMongoDBVectorStore.\n * Connect the client to the database and create the container, creating them if needed.\n * @returns A promise that resolves when the AzureCosmosDBMongoDBVectorStore has been initialized.\n */\n initialize: () => Promise;\n\n _vectorstoreType(): string {\n return \"azure_cosmosdb_mongodb\";\n }\n\n constructor(\n embeddings: EmbeddingsInterface,\n dbConfig: AzureCosmosDBMongoDBConfig\n ) {\n super(embeddings, dbConfig);\n\n const connectionString =\n dbConfig.connectionString ??\n getEnvironmentVariable(\"AZURE_COSMOSDB_MONGODB_CONNECTION_STRING\");\n\n if (!dbConfig.client && !connectionString) {\n throw new Error(\n \"AzureCosmosDBMongoDBVectorStore client or connection string must be set.\"\n );\n }\n\n if (!dbConfig.client) {\n // eslint-disable-next-line @typescript-eslint/no-non-null-assertion\n this.client = new MongoClient(connectionString!, {\n appName: \"langchainjs\",\n });\n }\n\n // eslint-disable-next-line @typescript-eslint/no-non-null-assertion\n const client = dbConfig.client || this.client!;\n const databaseName = dbConfig.databaseName ?? \"documentsDB\";\n const collectionName = dbConfig.collectionName ?? \"documents\";\n this.indexName = dbConfig.indexName ?? \"vectorSearchIndex\";\n this.textKey = dbConfig.textKey ?? \"textContent\";\n this.embeddingKey = dbConfig.embeddingKey ?? \"vectorContent\";\n this.indexOptions = dbConfig.indexOptions ?? {};\n\n // Deferring initialization to the first call to `initialize`\n this.initialize = () => {", - "repo_full_name": "langchain-ai/langchainjs", - "discussion_comments": [ - { - "comment_id": "1708007834", - "repo_full_name": "langchain-ai/langchainjs", - "pr_number": 6133, - "pr_file": "libs/langchain-azure-cosmosdb/src/azure_cosmosdb_mongodb.ts", - "discussion_id": "1708007834", - "commented_code": "@@ -0,0 +1,496 @@\n+import {\n+ ObjectId,\n+ Collection,\n+ Document as MongoDBDocument,\n+ MongoClient,\n+ Db,\n+ Filter,\n+} from \"mongodb\";\n+import type { EmbeddingsInterface } from \"@langchain/core/embeddings\";\n+import {\n+ MaxMarginalRelevanceSearchOptions,\n+ VectorStore,\n+} from \"@langchain/core/vectorstores\";\n+import { Document, DocumentInterface } from \"@langchain/core/documents\";\n+import { maximalMarginalRelevance } from \"@langchain/core/utils/math\";\n+import { getEnvironmentVariable } from \"@langchain/core/utils/env\";\n+\n+/** Azure Cosmos DB for MongoDB vCore Similarity type. */\n+export const AzureCosmosDBMongoDBSimilarityType = {\n+ /** Cosine similarity */\n+ COS: \"COS\",\n+ /** Inner - product */\n+ IP: \"IP\",\n+ /** Euclidian distance */\n+ L2: \"L2\",\n+} as const;\n+\n+/** Azure Cosmos DB for MongoDB vCore Similarity type. */\n+export type AzureCosmosDBMongoDBSimilarityType =\n+ (typeof AzureCosmosDBMongoDBSimilarityType)[keyof typeof AzureCosmosDBMongoDBSimilarityType];\n+\n+/** Azure Cosmos DB for MongoDB vCore Index Options. */\n+export type AzureCosmosDBMongoDBIndexOptions = {\n+ /** Skips automatic index creation. */\n+ readonly skipCreate?: boolean;\n+ /** Number of clusters that the inverted file (IVF) index uses to group the vector data. */\n+ readonly numLists?: number;\n+ /** Number of dimensions for vector similarity. */\n+ readonly dimensions?: number;\n+ /** Similarity metric to use with the IVF index. */\n+ readonly similarity?: AzureCosmosDBMongoDBSimilarityType;\n+};\n+\n+/** Azure Cosmos DB for MongoDB vCore delete Parameters. */\n+export type AzureCosmosDBMongoDBDeleteParams = {\n+ /** List of IDs for the documents to be removed. */\n+ readonly ids?: string | string[];\n+ /** MongoDB filter object or list of IDs for the documents to be removed. */\n+ readonly filter?: Filter;\n+};\n+\n+/** Configuration options for the `AzureCosmosDBMongoDBVectorStore` constructor. */\n+export interface AzureCosmosDBMongoDBConfig {\n+ readonly client?: MongoClient;\n+ readonly connectionString?: string;\n+ readonly databaseName?: string;\n+ readonly collectionName?: string;\n+ readonly indexName?: string;\n+ readonly textKey?: string;\n+ readonly embeddingKey?: string;\n+ readonly indexOptions?: AzureCosmosDBMongoDBIndexOptions;\n+}\n+\n+/**\n+ * Azure Cosmos DB for MongoDB vCore vector store.\n+ * To use this, you should have both:\n+ * - the `mongodb` NPM package installed\n+ * - a connection string associated with a MongoDB VCore Cluster\n+ *\n+ * You do not need to create a database or collection, it will be created\n+ * automatically.\n+ *\n+ * You also need an index on the collection, which is by default be created\n+ * automatically using the `createIndex` method.\n+ */\n+export class AzureCosmosDBMongoDBVectorStore extends VectorStore {\n+ get lc_secrets(): { [key: string]: string } {\n+ return {\n+ connectionString: \"AZURE_COSMOSDB_MONGODB_CONNECTION_STRING\",\n+ };\n+ }\n+\n+ private connectPromise: Promise;\n+\n+ private initPromise: Promise;\n+\n+ private readonly client: MongoClient | undefined;\n+\n+ private database: Db;\n+\n+ private collection: Collection;\n+\n+ readonly indexName: string;\n+\n+ readonly textKey: string;\n+\n+ readonly embeddingKey: string;\n+\n+ private readonly indexOptions: AzureCosmosDBMongoDBIndexOptions;\n+\n+ /**\n+ * Initializes the AzureCosmosDBMongoDBVectorStore.\n+ * Connect the client to the database and create the container, creating them if needed.\n+ * @returns A promise that resolves when the AzureCosmosDBMongoDBVectorStore has been initialized.\n+ */\n+ initialize: () => Promise;\n+\n+ _vectorstoreType(): string {\n+ return \"azure_cosmosdb_mongodb\";\n+ }\n+\n+ constructor(\n+ embeddings: EmbeddingsInterface,\n+ dbConfig: AzureCosmosDBMongoDBConfig\n+ ) {\n+ super(embeddings, dbConfig);\n+\n+ const connectionString =\n+ dbConfig.connectionString ??\n+ getEnvironmentVariable(\"AZURE_COSMOSDB_MONGODB_CONNECTION_STRING\");\n+\n+ if (!dbConfig.client && !connectionString) {\n+ throw new Error(\n+ \"AzureCosmosDBMongoDBVectorStore client or connection string must be set.\"\n+ );\n+ }\n+\n+ if (!dbConfig.client) {\n+ // eslint-disable-next-line @typescript-eslint/no-non-null-assertion\n+ this.client = new MongoClient(connectionString!, {\n+ appName: \"langchainjs\",\n+ });\n+ }\n+\n+ // eslint-disable-next-line @typescript-eslint/no-non-null-assertion\n+ const client = dbConfig.client || this.client!;\n+ const databaseName = dbConfig.databaseName ?? \"documentsDB\";\n+ const collectionName = dbConfig.collectionName ?? \"documents\";\n+ this.indexName = dbConfig.indexName ?? \"vectorSearchIndex\";\n+ this.textKey = dbConfig.textKey ?? \"textContent\";\n+ this.embeddingKey = dbConfig.embeddingKey ?? \"vectorContent\";\n+ this.indexOptions = dbConfig.indexOptions ?? {};\n+\n+ // Deferring initialization to the first call to `initialize`\n+ this.initialize = () => {", - "comment_created_at": "2024-08-07T22:15:25+00:00", - "comment_author": "jacoblee93", - "comment_body": "Would prefer this as a static method like this:\r\n\r\nhttps://v02.api.js.langchain.com/classes/langchain_community_vectorstores_pgvector.PGVectorStore.html#initialize\r\n\r\nSince I'm not sure this will show up well in API refs, but won't block on it", - "pr_file_module": null - }, - { - "comment_id": "1708897351", - "repo_full_name": "langchain-ai/langchainjs", - "pr_number": 6133, - "pr_file": "libs/langchain-azure-cosmosdb/src/azure_cosmosdb_mongodb.ts", - "discussion_id": "1708007834", - "commented_code": "@@ -0,0 +1,496 @@\n+import {\n+ ObjectId,\n+ Collection,\n+ Document as MongoDBDocument,\n+ MongoClient,\n+ Db,\n+ Filter,\n+} from \"mongodb\";\n+import type { EmbeddingsInterface } from \"@langchain/core/embeddings\";\n+import {\n+ MaxMarginalRelevanceSearchOptions,\n+ VectorStore,\n+} from \"@langchain/core/vectorstores\";\n+import { Document, DocumentInterface } from \"@langchain/core/documents\";\n+import { maximalMarginalRelevance } from \"@langchain/core/utils/math\";\n+import { getEnvironmentVariable } from \"@langchain/core/utils/env\";\n+\n+/** Azure Cosmos DB for MongoDB vCore Similarity type. */\n+export const AzureCosmosDBMongoDBSimilarityType = {\n+ /** Cosine similarity */\n+ COS: \"COS\",\n+ /** Inner - product */\n+ IP: \"IP\",\n+ /** Euclidian distance */\n+ L2: \"L2\",\n+} as const;\n+\n+/** Azure Cosmos DB for MongoDB vCore Similarity type. */\n+export type AzureCosmosDBMongoDBSimilarityType =\n+ (typeof AzureCosmosDBMongoDBSimilarityType)[keyof typeof AzureCosmosDBMongoDBSimilarityType];\n+\n+/** Azure Cosmos DB for MongoDB vCore Index Options. */\n+export type AzureCosmosDBMongoDBIndexOptions = {\n+ /** Skips automatic index creation. */\n+ readonly skipCreate?: boolean;\n+ /** Number of clusters that the inverted file (IVF) index uses to group the vector data. */\n+ readonly numLists?: number;\n+ /** Number of dimensions for vector similarity. */\n+ readonly dimensions?: number;\n+ /** Similarity metric to use with the IVF index. */\n+ readonly similarity?: AzureCosmosDBMongoDBSimilarityType;\n+};\n+\n+/** Azure Cosmos DB for MongoDB vCore delete Parameters. */\n+export type AzureCosmosDBMongoDBDeleteParams = {\n+ /** List of IDs for the documents to be removed. */\n+ readonly ids?: string | string[];\n+ /** MongoDB filter object or list of IDs for the documents to be removed. */\n+ readonly filter?: Filter;\n+};\n+\n+/** Configuration options for the `AzureCosmosDBMongoDBVectorStore` constructor. */\n+export interface AzureCosmosDBMongoDBConfig {\n+ readonly client?: MongoClient;\n+ readonly connectionString?: string;\n+ readonly databaseName?: string;\n+ readonly collectionName?: string;\n+ readonly indexName?: string;\n+ readonly textKey?: string;\n+ readonly embeddingKey?: string;\n+ readonly indexOptions?: AzureCosmosDBMongoDBIndexOptions;\n+}\n+\n+/**\n+ * Azure Cosmos DB for MongoDB vCore vector store.\n+ * To use this, you should have both:\n+ * - the `mongodb` NPM package installed\n+ * - a connection string associated with a MongoDB VCore Cluster\n+ *\n+ * You do not need to create a database or collection, it will be created\n+ * automatically.\n+ *\n+ * You also need an index on the collection, which is by default be created\n+ * automatically using the `createIndex` method.\n+ */\n+export class AzureCosmosDBMongoDBVectorStore extends VectorStore {\n+ get lc_secrets(): { [key: string]: string } {\n+ return {\n+ connectionString: \"AZURE_COSMOSDB_MONGODB_CONNECTION_STRING\",\n+ };\n+ }\n+\n+ private connectPromise: Promise;\n+\n+ private initPromise: Promise;\n+\n+ private readonly client: MongoClient | undefined;\n+\n+ private database: Db;\n+\n+ private collection: Collection;\n+\n+ readonly indexName: string;\n+\n+ readonly textKey: string;\n+\n+ readonly embeddingKey: string;\n+\n+ private readonly indexOptions: AzureCosmosDBMongoDBIndexOptions;\n+\n+ /**\n+ * Initializes the AzureCosmosDBMongoDBVectorStore.\n+ * Connect the client to the database and create the container, creating them if needed.\n+ * @returns A promise that resolves when the AzureCosmosDBMongoDBVectorStore has been initialized.\n+ */\n+ initialize: () => Promise;\n+\n+ _vectorstoreType(): string {\n+ return \"azure_cosmosdb_mongodb\";\n+ }\n+\n+ constructor(\n+ embeddings: EmbeddingsInterface,\n+ dbConfig: AzureCosmosDBMongoDBConfig\n+ ) {\n+ super(embeddings, dbConfig);\n+\n+ const connectionString =\n+ dbConfig.connectionString ??\n+ getEnvironmentVariable(\"AZURE_COSMOSDB_MONGODB_CONNECTION_STRING\");\n+\n+ if (!dbConfig.client && !connectionString) {\n+ throw new Error(\n+ \"AzureCosmosDBMongoDBVectorStore client or connection string must be set.\"\n+ );\n+ }\n+\n+ if (!dbConfig.client) {\n+ // eslint-disable-next-line @typescript-eslint/no-non-null-assertion\n+ this.client = new MongoClient(connectionString!, {\n+ appName: \"langchainjs\",\n+ });\n+ }\n+\n+ // eslint-disable-next-line @typescript-eslint/no-non-null-assertion\n+ const client = dbConfig.client || this.client!;\n+ const databaseName = dbConfig.databaseName ?? \"documentsDB\";\n+ const collectionName = dbConfig.collectionName ?? \"documents\";\n+ this.indexName = dbConfig.indexName ?? \"vectorSearchIndex\";\n+ this.textKey = dbConfig.textKey ?? \"textContent\";\n+ this.embeddingKey = dbConfig.embeddingKey ?? \"vectorContent\";\n+ this.indexOptions = dbConfig.indexOptions ?? {};\n+\n+ // Deferring initialization to the first call to `initialize`\n+ this.initialize = () => {", - "comment_created_at": "2024-08-08T08:04:50+00:00", - "comment_author": "sinedied", - "comment_body": "Would you be ok with a middle ground approach?\r\n- make the current `initialize` method I made internal\r\n- add a new static `initialize` method the calls constructor + init/connect the db connection\r\n- keep the \"automatic\" init when calling any VS method in case it wasn't done before (for compatibility / simplicity)\r\n- explain how the init works in the docs\r\n\r\nMy reasoning: I'd like to make the migration from the previous implementation painless (ie keep the auto init) and also make it work for folks who skim quickly through docs and just call the constructor without using the `initialize` method", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1581599504", - "pr_number": 5197, - "pr_file": "libs/langchain-community/src/vectorstores/azure_cosmosdb.ts", - "created_at": "2024-04-26T23:02:14+00:00", - "commented_code": "/**\n * Removes specified documents from the AzureCosmosDBVectorStore.\n * @param ids IDs of the documents to be removed. If no IDs are specified,\n * all documents will be removed.\n * If no IDs or filter are specified, all documents will be removed.\n * @param ids A list of IDs for the documents to be removed.\n * @param filter A MongoDB filter object or list of IDs for the documents to be removed.\n * @returns A promise that resolves when the documents have been removed.\n */\n async delete(ids?: string[]): Promise {\n async delete(\n ids?: string | string[],", - "repo_full_name": "langchain-ai/langchainjs", - "discussion_comments": [ - { - "comment_id": "1581599504", - "repo_full_name": "langchain-ai/langchainjs", - "pr_number": 5197, - "pr_file": "libs/langchain-community/src/vectorstores/azure_cosmosdb.ts", - "discussion_id": "1581599504", - "commented_code": "@@ -205,19 +233,27 @@ export class AzureCosmosDBVectorStore extends VectorStore {\n \n /**\n * Removes specified documents from the AzureCosmosDBVectorStore.\n- * @param ids IDs of the documents to be removed. If no IDs are specified,\n- * all documents will be removed.\n+ * If no IDs or filter are specified, all documents will be removed.\n+ * @param ids A list of IDs for the documents to be removed.\n+ * @param filter A MongoDB filter object or list of IDs for the documents to be removed.\n * @returns A promise that resolves when the documents have been removed.\n */\n- async delete(ids?: string[]): Promise {\n+ async delete(\n+ ids?: string | string[],", - "comment_created_at": "2024-04-26T23:02:14+00:00", - "comment_author": "jacoblee93", - "comment_body": "Hey sorry, no I mean a single param like this so as not to break the interface:\r\n\r\nhttps://github.com/langchain-ai/langchainjs/blob/main/libs/langchain-community/src/vectorstores/pinecone.ts#L174", - "pr_file_module": null - }, - { - "comment_id": "1583227571", - "repo_full_name": "langchain-ai/langchainjs", - "pr_number": 5197, - "pr_file": "libs/langchain-community/src/vectorstores/azure_cosmosdb.ts", - "discussion_id": "1581599504", - "commented_code": "@@ -205,19 +233,27 @@ export class AzureCosmosDBVectorStore extends VectorStore {\n \n /**\n * Removes specified documents from the AzureCosmosDBVectorStore.\n- * @param ids IDs of the documents to be removed. If no IDs are specified,\n- * all documents will be removed.\n+ * If no IDs or filter are specified, all documents will be removed.\n+ * @param ids A list of IDs for the documents to be removed.\n+ * @param filter A MongoDB filter object or list of IDs for the documents to be removed.\n * @returns A promise that resolves when the documents have been removed.\n */\n- async delete(ids?: string[]): Promise {\n+ async delete(\n+ ids?: string | string[],", - "comment_created_at": "2024-04-29T14:55:22+00:00", - "comment_author": "sinedied", - "comment_body": "Done, I updated and rebased the PR", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1583971678", - "pr_number": 5197, - "pr_file": "libs/langchain-community/src/vectorstores/azure_cosmosdb.ts", - "created_at": "2024-04-30T00:17:08+00:00", - "commented_code": "/**\n * Removes specified documents from the AzureCosmosDBVectorStore.\n * @param ids IDs of the documents to be removed. If no IDs are specified,\n * all documents will be removed.\n * If no IDs or filter are specified, all documents will be removed.\n * @param params Parameters for the delete operation.\n * @returns A promise that resolves when the documents have been removed.\n */\n async delete(ids?: string[]): Promise {\n async delete(params: AzureCosmosDBDeleteParams = {}): Promise {", - "repo_full_name": "langchain-ai/langchainjs", - "discussion_comments": [ - { - "comment_id": "1583971678", - "repo_full_name": "langchain-ai/langchainjs", - "pr_number": 5197, - "pr_file": "libs/langchain-community/src/vectorstores/azure_cosmosdb.ts", - "discussion_id": "1583971678", - "commented_code": "@@ -205,19 +241,24 @@ export class AzureCosmosDBVectorStore extends VectorStore {\n \n /**\n * Removes specified documents from the AzureCosmosDBVectorStore.\n- * @param ids IDs of the documents to be removed. If no IDs are specified,\n- * all documents will be removed.\n+ * If no IDs or filter are specified, all documents will be removed.\n+ * @param params Parameters for the delete operation.\n * @returns A promise that resolves when the documents have been removed.\n */\n- async delete(ids?: string[]): Promise {\n+ async delete(params: AzureCosmosDBDeleteParams = {}): Promise {", - "comment_created_at": "2024-04-30T00:17:08+00:00", - "comment_author": "bracesproul", - "comment_body": "This will be a breaking change. Instead we should overload this func so both input types are allowed, and we can narrow the type inside of the method.", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/langchainjs-simplify-code-organization.json b/_reviewers/langchainjs-simplify-code-organization.json new file mode 100644 index 0000000..ce284d8 --- /dev/null +++ b/_reviewers/langchainjs-simplify-code-organization.json @@ -0,0 +1,82 @@ +[ + { + "discussion_id": "1881043372", + "pr_number": 7347, + "pr_file": "docs/core_docs/src/theme/EmbeddingTabs.js", + "created_at": "2024-12-11T21:44:10+00:00", + "commented_code": "/* eslint-disable react/jsx-props-no-spreading, react/destructuring-assignment */\nimport React from \"react\";\nimport Tabs from \"@theme/Tabs\";\nimport TabItem from \"@theme/TabItem\";\nimport CodeBlock from \"@theme-original/CodeBlock\";\nimport Npm2Yarn from \"@theme/Npm2Yarn\";\n\nfunction InstallationInfo({ children }) {\n return {children};\n}", + "repo_full_name": "langchain-ai/langchainjs", + "discussion_comments": [ + { + "comment_id": "1881043372", + "repo_full_name": "langchain-ai/langchainjs", + "pr_number": 7347, + "pr_file": "docs/core_docs/src/theme/EmbeddingTabs.js", + "discussion_id": "1881043372", + "commented_code": "@@ -0,0 +1,131 @@\n+/* eslint-disable react/jsx-props-no-spreading, react/destructuring-assignment */\n+import React from \"react\";\n+import Tabs from \"@theme/Tabs\";\n+import TabItem from \"@theme/TabItem\";\n+import CodeBlock from \"@theme-original/CodeBlock\";\n+import Npm2Yarn from \"@theme/Npm2Yarn\";\n+\n+function InstallationInfo({ children }) {\n+ return {children};\n+}", + "comment_created_at": "2024-12-11T21:44:10+00:00", + "comment_author": "bracesproul", + "comment_body": "Whats the point of wrapping this and not just calling `Npm2Yarn` inside the component?", + "pr_file_module": null + }, + { + "comment_id": "1882257407", + "repo_full_name": "langchain-ai/langchainjs", + "pr_number": 7347, + "pr_file": "docs/core_docs/src/theme/EmbeddingTabs.js", + "discussion_id": "1881043372", + "commented_code": "@@ -0,0 +1,131 @@\n+/* eslint-disable react/jsx-props-no-spreading, react/destructuring-assignment */\n+import React from \"react\";\n+import Tabs from \"@theme/Tabs\";\n+import TabItem from \"@theme/TabItem\";\n+import CodeBlock from \"@theme-original/CodeBlock\";\n+import Npm2Yarn from \"@theme/Npm2Yarn\";\n+\n+function InstallationInfo({ children }) {\n+ return {children};\n+}", + "comment_created_at": "2024-12-12T14:33:14+00:00", + "comment_author": "ccurme", + "comment_body": "Thanks, updated this. I had copied `InstallationInfo` from chat model tabs and then deleted some stuff.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1637267576", + "pr_number": 5737, + "pr_file": "langchain-core/langchain.config.js", + "created_at": "2024-06-12T23:54:00+00:00", + "commented_code": "\"utils/testing\": \"utils/testing/index\",\n \"utils/tiktoken\": \"utils/tiktoken\",\n \"utils/types\": \"utils/types/index\",\n \"utils/is_openai_tool\": \"utils/is_openai_tool\",", + "repo_full_name": "langchain-ai/langchainjs", + "discussion_comments": [ + { + "comment_id": "1637267576", + "repo_full_name": "langchain-ai/langchainjs", + "pr_number": 5737, + "pr_file": "langchain-core/langchain.config.js", + "discussion_id": "1637267576", + "commented_code": "@@ -66,6 +66,7 @@ export const config = {\n \"utils/testing\": \"utils/testing/index\",\n \"utils/tiktoken\": \"utils/tiktoken\",\n \"utils/types\": \"utils/types/index\",\n+ \"utils/is_openai_tool\": \"utils/is_openai_tool\",", + "comment_created_at": "2024-06-12T23:54:00+00:00", + "comment_author": "jacoblee93", + "comment_body": "Can we put this in an existing entrypoint? `types` maybe?", + "pr_file_module": null + }, + { + "comment_id": "1637268177", + "repo_full_name": "langchain-ai/langchainjs", + "pr_number": 5737, + "pr_file": "langchain-core/langchain.config.js", + "discussion_id": "1637267576", + "commented_code": "@@ -66,6 +66,7 @@ export const config = {\n \"utils/testing\": \"utils/testing/index\",\n \"utils/tiktoken\": \"utils/tiktoken\",\n \"utils/types\": \"utils/types/index\",\n+ \"utils/is_openai_tool\": \"utils/is_openai_tool\",", + "comment_created_at": "2024-06-12T23:54:11+00:00", + "comment_author": "jacoblee93", + "comment_body": "Want to avoid fragmentation", + "pr_file_module": null + }, + { + "comment_id": "1637345929", + "repo_full_name": "langchain-ai/langchainjs", + "pr_number": 5737, + "pr_file": "langchain-core/langchain.config.js", + "discussion_id": "1637267576", + "commented_code": "@@ -66,6 +66,7 @@ export const config = {\n \"utils/testing\": \"utils/testing/index\",\n \"utils/tiktoken\": \"utils/tiktoken\",\n \"utils/types\": \"utils/types/index\",\n+ \"utils/is_openai_tool\": \"utils/is_openai_tool\",", + "comment_created_at": "2024-06-13T01:21:49+00:00", + "comment_author": "bracesproul", + "comment_body": "Moved to `@langchain/core/language_models/base` so it's exported from the same place as `ToolDefinition`", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/langchainjs-simplify-code-organization.md b/_reviewers/langchainjs-simplify-code-organization.md index 90a2a69..5d9610c 100644 --- a/_reviewers/langchainjs-simplify-code-organization.md +++ b/_reviewers/langchainjs-simplify-code-organization.md @@ -33,87 +33,3 @@ Maintain clean code organization by eliminating unnecessary abstractions and pre ``` This approach reduces code complexity, improves maintainability, and makes the codebase easier to navigate and understand. - - -[ - { - "discussion_id": "1881043372", - "pr_number": 7347, - "pr_file": "docs/core_docs/src/theme/EmbeddingTabs.js", - "created_at": "2024-12-11T21:44:10+00:00", - "commented_code": "/* eslint-disable react/jsx-props-no-spreading, react/destructuring-assignment */\nimport React from \"react\";\nimport Tabs from \"@theme/Tabs\";\nimport TabItem from \"@theme/TabItem\";\nimport CodeBlock from \"@theme-original/CodeBlock\";\nimport Npm2Yarn from \"@theme/Npm2Yarn\";\n\nfunction InstallationInfo({ children }) {\n return {children};\n}", - "repo_full_name": "langchain-ai/langchainjs", - "discussion_comments": [ - { - "comment_id": "1881043372", - "repo_full_name": "langchain-ai/langchainjs", - "pr_number": 7347, - "pr_file": "docs/core_docs/src/theme/EmbeddingTabs.js", - "discussion_id": "1881043372", - "commented_code": "@@ -0,0 +1,131 @@\n+/* eslint-disable react/jsx-props-no-spreading, react/destructuring-assignment */\n+import React from \"react\";\n+import Tabs from \"@theme/Tabs\";\n+import TabItem from \"@theme/TabItem\";\n+import CodeBlock from \"@theme-original/CodeBlock\";\n+import Npm2Yarn from \"@theme/Npm2Yarn\";\n+\n+function InstallationInfo({ children }) {\n+ return {children};\n+}", - "comment_created_at": "2024-12-11T21:44:10+00:00", - "comment_author": "bracesproul", - "comment_body": "Whats the point of wrapping this and not just calling `Npm2Yarn` inside the component?", - "pr_file_module": null - }, - { - "comment_id": "1882257407", - "repo_full_name": "langchain-ai/langchainjs", - "pr_number": 7347, - "pr_file": "docs/core_docs/src/theme/EmbeddingTabs.js", - "discussion_id": "1881043372", - "commented_code": "@@ -0,0 +1,131 @@\n+/* eslint-disable react/jsx-props-no-spreading, react/destructuring-assignment */\n+import React from \"react\";\n+import Tabs from \"@theme/Tabs\";\n+import TabItem from \"@theme/TabItem\";\n+import CodeBlock from \"@theme-original/CodeBlock\";\n+import Npm2Yarn from \"@theme/Npm2Yarn\";\n+\n+function InstallationInfo({ children }) {\n+ return {children};\n+}", - "comment_created_at": "2024-12-12T14:33:14+00:00", - "comment_author": "ccurme", - "comment_body": "Thanks, updated this. I had copied `InstallationInfo` from chat model tabs and then deleted some stuff.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1637267576", - "pr_number": 5737, - "pr_file": "langchain-core/langchain.config.js", - "created_at": "2024-06-12T23:54:00+00:00", - "commented_code": "\"utils/testing\": \"utils/testing/index\",\n \"utils/tiktoken\": \"utils/tiktoken\",\n \"utils/types\": \"utils/types/index\",\n \"utils/is_openai_tool\": \"utils/is_openai_tool\",", - "repo_full_name": "langchain-ai/langchainjs", - "discussion_comments": [ - { - "comment_id": "1637267576", - "repo_full_name": "langchain-ai/langchainjs", - "pr_number": 5737, - "pr_file": "langchain-core/langchain.config.js", - "discussion_id": "1637267576", - "commented_code": "@@ -66,6 +66,7 @@ export const config = {\n \"utils/testing\": \"utils/testing/index\",\n \"utils/tiktoken\": \"utils/tiktoken\",\n \"utils/types\": \"utils/types/index\",\n+ \"utils/is_openai_tool\": \"utils/is_openai_tool\",", - "comment_created_at": "2024-06-12T23:54:00+00:00", - "comment_author": "jacoblee93", - "comment_body": "Can we put this in an existing entrypoint? `types` maybe?", - "pr_file_module": null - }, - { - "comment_id": "1637268177", - "repo_full_name": "langchain-ai/langchainjs", - "pr_number": 5737, - "pr_file": "langchain-core/langchain.config.js", - "discussion_id": "1637267576", - "commented_code": "@@ -66,6 +66,7 @@ export const config = {\n \"utils/testing\": \"utils/testing/index\",\n \"utils/tiktoken\": \"utils/tiktoken\",\n \"utils/types\": \"utils/types/index\",\n+ \"utils/is_openai_tool\": \"utils/is_openai_tool\",", - "comment_created_at": "2024-06-12T23:54:11+00:00", - "comment_author": "jacoblee93", - "comment_body": "Want to avoid fragmentation", - "pr_file_module": null - }, - { - "comment_id": "1637345929", - "repo_full_name": "langchain-ai/langchainjs", - "pr_number": 5737, - "pr_file": "langchain-core/langchain.config.js", - "discussion_id": "1637267576", - "commented_code": "@@ -66,6 +66,7 @@ export const config = {\n \"utils/testing\": \"utils/testing/index\",\n \"utils/tiktoken\": \"utils/tiktoken\",\n \"utils/types\": \"utils/types/index\",\n+ \"utils/is_openai_tool\": \"utils/is_openai_tool\",", - "comment_created_at": "2024-06-13T01:21:49+00:00", - "comment_author": "bracesproul", - "comment_body": "Moved to `@langchain/core/language_models/base` so it's exported from the same place as `ToolDefinition`", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/langchainjs-throw-meaningful-errors.json b/_reviewers/langchainjs-throw-meaningful-errors.json new file mode 100644 index 0000000..a4ddec3 --- /dev/null +++ b/_reviewers/langchainjs-throw-meaningful-errors.json @@ -0,0 +1,136 @@ +[ + { + "discussion_id": "1899693765", + "pr_number": 7300, + "pr_file": "libs/langchain-community/src/utils/reddit.ts", + "created_at": "2024-12-30T17:32:20+00:00", + "commented_code": "import dotenv from \"dotenv\";\nimport { AsyncCaller } from \"@langchain/core/utils/async_caller\";\n\ndotenv.config();\n\nexport interface RedditAPIConfig {\n clientId: string;\n clientSecret: string;\n userAgent: string;\n}\n\nexport interface RedditPost {\n title: string;\n selftext: string;\n subreddit_name_prefixed: string;\n score: number;\n id: string;\n url: string;\n author: string;\n}\n\nexport class RedditAPIWrapper {\n private clientId: string;\n\n private clientSecret: string;\n\n private userAgent: string;\n\n private token: string | null = null;\n\n private baseUrl = \"https://oauth.reddit.com\";\n\n private asyncCaller: AsyncCaller; // Using AsyncCaller for requests\n\n constructor(config: RedditAPIConfig) {\n this.clientId = config.clientId;\n this.clientSecret = config.clientSecret;\n this.userAgent = config.userAgent;\n this.asyncCaller = new AsyncCaller({\n maxConcurrency: 5,\n maxRetries: 3,\n onFailedAttempt: (error) => {\n console.error(\"Attempt failed:\", error.message);\n },\n });\n }\n\n private async authenticate() {\n if (this.token) return;\n\n const authString = btoa(`${this.clientId}:${this.clientSecret}`);\n\n try {\n const response = await fetch(\n \"https://www.reddit.com/api/v1/access_token\",\n {\n method: \"POST\",\n headers: {\n Authorization: `Basic ${authString}`,\n \"User-Agent\": this.userAgent,\n \"Content-Type\": \"application/x-www-form-urlencoded\",\n },\n body: \"grant_type=client_credentials\",\n }\n );\n\n if (!response.ok) {\n throw new Error(\n `Error authenticating with Reddit: ${response.statusText}`\n );\n }\n\n const data = await response.json();\n this.token = data.access_token;\n } catch (error) {\n console.error(\"Error authenticating with Reddit:\", error);\n }", + "repo_full_name": "langchain-ai/langchainjs", + "discussion_comments": [ + { + "comment_id": "1899693765", + "repo_full_name": "langchain-ai/langchainjs", + "pr_number": 7300, + "pr_file": "libs/langchain-community/src/utils/reddit.ts", + "discussion_id": "1899693765", + "commented_code": "@@ -0,0 +1,145 @@\n+import dotenv from \"dotenv\";\n+import { AsyncCaller } from \"@langchain/core/utils/async_caller\";\n+\n+dotenv.config();\n+\n+export interface RedditAPIConfig {\n+ clientId: string;\n+ clientSecret: string;\n+ userAgent: string;\n+}\n+\n+export interface RedditPost {\n+ title: string;\n+ selftext: string;\n+ subreddit_name_prefixed: string;\n+ score: number;\n+ id: string;\n+ url: string;\n+ author: string;\n+}\n+\n+export class RedditAPIWrapper {\n+ private clientId: string;\n+\n+ private clientSecret: string;\n+\n+ private userAgent: string;\n+\n+ private token: string | null = null;\n+\n+ private baseUrl = \"https://oauth.reddit.com\";\n+\n+ private asyncCaller: AsyncCaller; // Using AsyncCaller for requests\n+\n+ constructor(config: RedditAPIConfig) {\n+ this.clientId = config.clientId;\n+ this.clientSecret = config.clientSecret;\n+ this.userAgent = config.userAgent;\n+ this.asyncCaller = new AsyncCaller({\n+ maxConcurrency: 5,\n+ maxRetries: 3,\n+ onFailedAttempt: (error) => {\n+ console.error(\"Attempt failed:\", error.message);\n+ },\n+ });\n+ }\n+\n+ private async authenticate() {\n+ if (this.token) return;\n+\n+ const authString = btoa(`${this.clientId}:${this.clientSecret}`);\n+\n+ try {\n+ const response = await fetch(\n+ \"https://www.reddit.com/api/v1/access_token\",\n+ {\n+ method: \"POST\",\n+ headers: {\n+ Authorization: `Basic ${authString}`,\n+ \"User-Agent\": this.userAgent,\n+ \"Content-Type\": \"application/x-www-form-urlencoded\",\n+ },\n+ body: \"grant_type=client_credentials\",\n+ }\n+ );\n+\n+ if (!response.ok) {\n+ throw new Error(\n+ `Error authenticating with Reddit: ${response.statusText}`\n+ );\n+ }\n+\n+ const data = await response.json();\n+ this.token = data.access_token;\n+ } catch (error) {\n+ console.error(\"Error authenticating with Reddit:\", error);\n+ }", + "comment_created_at": "2024-12-30T17:32:20+00:00", + "comment_author": "nick-w-nick", + "comment_body": "If I am reading this correctly, if it encounters an issue, it will throw an error with the text \r\n\r\n`Error authenticating with Reddit: ${response.statusText}`\r\n\r\nbut will then do `console.error(\"Error authenticating with Reddit:\", error);`, which will output something like:\r\n```\r\nError authenticating with Reddit:\r\n Error authenticating with Reddit: \"invalid auth token\"\r\n```\r\n\r\nI'd suggest just throwing the status text from Reddit instead and then logging out the authentication error in the catch instead.\r\n\r\n```suggestion\r\n if (!response.ok) {\r\n throw new Error(response.statusText);\r\n }\r\n\r\n const data = await response.json();\r\n this.token = data.access_token;\r\n } catch (error) {\r\n console.error(\"Error authenticating with Reddit:\", error);\r\n }\r\n```", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1433348408", + "pr_number": 3448, + "pr_file": "langchain/src/chat_models/azure_ml.ts", + "created_at": "2023-12-21T01:39:37+00:00", + "commented_code": "import { SimpleChatModel, BaseChatModelParams } from \"./base.js\";\nimport { AzureMLHttpClient } from \"../llms/azure_ml.js\";\nimport { getEnvironmentVariable } from \"../util/env.js\";\nimport { BaseMessage } from \"../schema/index.js\";\n\nexport interface ChatContentFormatter {\n /**\n * Formats the request payload for the AzureML endpoint. It takes a\n * prompt and a dictionary of model arguments as input and returns a\n * string representing the formatted request payload.\n * @param messages A list of messages for the chat so far.\n * @param modelArgs A dictionary of model arguments.\n * @returns A string representing the formatted request payload.\n */\n formatRequestPayload: (\n messages: BaseMessage[],\n modelArgs: Record\n ) => string;\n /**\n * Formats the response payload from the AzureML endpoint. It takes a\n * response payload as input and returns a string representing the\n * formatted response.\n * @param responsePayload The response payload from the AzureML endpoint.\n * @returns A string representing the formatted response.\n */\n formatResponsePayload: (output: string) => string;\n}\n\nexport class LlamaContentFormatter implements ChatContentFormatter {\n _convertMessageToRecord(message: BaseMessage): Record {\n if (message._getType() === \"human\") {\n return { role: \"user\", content: message.content };\n } else if (message._getType() === \"ai\") {\n return { role: \"assistant\", content: message.content };\n } else {\n return { role: message._getType(), content: message.content };\n }\n }\n\n formatRequestPayload(\n messages: BaseMessage[],\n modelArgs: Record\n ): string {\n let msgs = messages.map((message) => {\n this._convertMessageToRecord(message);\n });\n return JSON.stringify({\n input_data: {\n input_string: msgs,\n parameters: modelArgs,\n },\n });\n }\n\n formatResponsePayload(responsePayload: string) {\n const response = JSON.parse(responsePayload);\n return response.output;\n }\n}\n\n/**\n * Type definition for the input parameters of the AzureMLChatOnlineEndpoint class.\n */\nexport interface AzureMLChatParams extends BaseChatModelParams {\n endpointUrl?: string;\n endpointApiKey?: string;\n modelArgs?: Record;\n contentFormatter?: ChatContentFormatter;\n}\n\n/**\n * Class that represents the chat model. It extends the SimpleChatModel class and implements the AzureMLChatInput interface.\n */\nexport class AzureMLChatOnlineEndpoint\n extends SimpleChatModel\n implements AzureMLChatParams\n{\n static lc_name() {\n return \"AzureMLChatOnlineEndpoint\";\n }\n static lc_description() {\n return \"A class for interacting with AzureML Chat models.\";\n }\n endpointUrl: string;\n endpointApiKey: string;\n modelArgs?: Record;\n contentFormatter: ChatContentFormatter;\n httpClient: AzureMLHttpClient;\n\n constructor(fields: AzureMLChatParams) {\n super(fields ?? {});\n if (!fields?.endpointUrl && !getEnvironmentVariable(\"AZUREML_URL\")) {\n throw new Error(\"No Azure ML Url found.\");\n }\n if (!fields?.endpointApiKey && !getEnvironmentVariable(\"AZUREML_API_KEY\")) {\n throw new Error(\"No Azure ML ApiKey found.\");\n }\n if (!fields?.contentFormatter) {\n throw new Error(\"No Content Formatter provided.\");\n }\n\n this.endpointUrl =\n fields.endpointUrl || getEnvironmentVariable(\"AZUREML_URL\") + \"\";\n this.endpointApiKey =\n fields.endpointApiKey || getEnvironmentVariable(\"AZUREML_API_KEY\") + \"\";\n this.httpClient = new AzureMLHttpClient(\n this.endpointUrl,\n this.endpointApiKey\n );\n this.contentFormatter = fields.contentFormatter;\n this.modelArgs = fields?.modelArgs;\n }\n get _identifying_params() {\n const modelKwargs = this.modelArgs || {};\n return {\n ...super._identifyingParams,\n modelKwargs,\n };\n }\n\n _llmType() {\n return \"azureml_chat\";\n }\n\n _combineLLMOutput(): Record | undefined {\n return [];", + "repo_full_name": "langchain-ai/langchainjs", + "discussion_comments": [ + { + "comment_id": "1433348408", + "repo_full_name": "langchain-ai/langchainjs", + "pr_number": 3448, + "pr_file": "langchain/src/chat_models/azure_ml.ts", + "discussion_id": "1433348408", + "commented_code": "@@ -0,0 +1,142 @@\n+import { SimpleChatModel, BaseChatModelParams } from \"./base.js\";\n+import { AzureMLHttpClient } from \"../llms/azure_ml.js\";\n+import { getEnvironmentVariable } from \"../util/env.js\";\n+import { BaseMessage } from \"../schema/index.js\";\n+\n+export interface ChatContentFormatter {\n+ /**\n+ * Formats the request payload for the AzureML endpoint. It takes a\n+ * prompt and a dictionary of model arguments as input and returns a\n+ * string representing the formatted request payload.\n+ * @param messages A list of messages for the chat so far.\n+ * @param modelArgs A dictionary of model arguments.\n+ * @returns A string representing the formatted request payload.\n+ */\n+ formatRequestPayload: (\n+ messages: BaseMessage[],\n+ modelArgs: Record\n+ ) => string;\n+ /**\n+ * Formats the response payload from the AzureML endpoint. It takes a\n+ * response payload as input and returns a string representing the\n+ * formatted response.\n+ * @param responsePayload The response payload from the AzureML endpoint.\n+ * @returns A string representing the formatted response.\n+ */\n+ formatResponsePayload: (output: string) => string;\n+}\n+\n+export class LlamaContentFormatter implements ChatContentFormatter {\n+ _convertMessageToRecord(message: BaseMessage): Record {\n+ if (message._getType() === \"human\") {\n+ return { role: \"user\", content: message.content };\n+ } else if (message._getType() === \"ai\") {\n+ return { role: \"assistant\", content: message.content };\n+ } else {\n+ return { role: message._getType(), content: message.content };\n+ }\n+ }\n+\n+ formatRequestPayload(\n+ messages: BaseMessage[],\n+ modelArgs: Record\n+ ): string {\n+ let msgs = messages.map((message) => {\n+ this._convertMessageToRecord(message);\n+ });\n+ return JSON.stringify({\n+ input_data: {\n+ input_string: msgs,\n+ parameters: modelArgs,\n+ },\n+ });\n+ }\n+\n+ formatResponsePayload(responsePayload: string) {\n+ const response = JSON.parse(responsePayload);\n+ return response.output;\n+ }\n+}\n+\n+/**\n+ * Type definition for the input parameters of the AzureMLChatOnlineEndpoint class.\n+ */\n+export interface AzureMLChatParams extends BaseChatModelParams {\n+ endpointUrl?: string;\n+ endpointApiKey?: string;\n+ modelArgs?: Record;\n+ contentFormatter?: ChatContentFormatter;\n+}\n+\n+/**\n+ * Class that represents the chat model. It extends the SimpleChatModel class and implements the AzureMLChatInput interface.\n+ */\n+export class AzureMLChatOnlineEndpoint\n+ extends SimpleChatModel\n+ implements AzureMLChatParams\n+{\n+ static lc_name() {\n+ return \"AzureMLChatOnlineEndpoint\";\n+ }\n+ static lc_description() {\n+ return \"A class for interacting with AzureML Chat models.\";\n+ }\n+ endpointUrl: string;\n+ endpointApiKey: string;\n+ modelArgs?: Record;\n+ contentFormatter: ChatContentFormatter;\n+ httpClient: AzureMLHttpClient;\n+\n+ constructor(fields: AzureMLChatParams) {\n+ super(fields ?? {});\n+ if (!fields?.endpointUrl && !getEnvironmentVariable(\"AZUREML_URL\")) {\n+ throw new Error(\"No Azure ML Url found.\");\n+ }\n+ if (!fields?.endpointApiKey && !getEnvironmentVariable(\"AZUREML_API_KEY\")) {\n+ throw new Error(\"No Azure ML ApiKey found.\");\n+ }\n+ if (!fields?.contentFormatter) {\n+ throw new Error(\"No Content Formatter provided.\");\n+ }\n+\n+ this.endpointUrl =\n+ fields.endpointUrl || getEnvironmentVariable(\"AZUREML_URL\") + \"\";\n+ this.endpointApiKey =\n+ fields.endpointApiKey || getEnvironmentVariable(\"AZUREML_API_KEY\") + \"\";\n+ this.httpClient = new AzureMLHttpClient(\n+ this.endpointUrl,\n+ this.endpointApiKey\n+ );\n+ this.contentFormatter = fields.contentFormatter;\n+ this.modelArgs = fields?.modelArgs;\n+ }\n+ get _identifying_params() {\n+ const modelKwargs = this.modelArgs || {};\n+ return {\n+ ...super._identifyingParams,\n+ modelKwargs,\n+ };\n+ }\n+\n+ _llmType() {\n+ return \"azureml_chat\";\n+ }\n+\n+ _combineLLMOutput(): Record | undefined {\n+ return [];", + "comment_created_at": "2023-12-21T01:39:37+00:00", + "comment_author": "bracesproul", + "comment_body": "If it's not implemented, it should throw a not implemented error.\r\n```suggestion\r\n throw new Error(\"AzureMLChatOnlineEndpoint._combineLLMOutput called, but is not implemented.\")\r\n```", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1901634206", + "pr_number": 7436, + "pr_file": "libs/langchain-azure-cosmosdb/src/chat_histories/mongodb.ts", + "created_at": "2025-01-03T10:04:10+00:00", + "commented_code": "await this.collection.deleteOne({ [ID_KEY]: this.sessionId });\n }\n\n async getAllSessions(): Promise {\n await this.initialize();\n const documents = await this.collection.find().toArray();\n\n const chatSessions: ChatSession[] = documents.map((doc) => ({\n id: doc[ID_KEY],\n context: doc.context || {},\n }));\n\n return chatSessions;\n }\n\n async clearAllSessions() {\n await this.initialize();\n try {\n await this.collection.deleteMany({});\n } catch (error) {\n console.log(\"Error clearing sessions:\", error);\n }\n }\n\n async getContext(): Promise> {\n await this.initialize();\n\n const document = await this.collection.findOne({\n [ID_KEY]: this.sessionId,\n });\n this.context = document?.context || this.context;\n return this.context;\n }\n\n async setContext(context: Record): Promise {\n await this.initialize();\n\n try {\n await this.collection.updateOne(\n { [ID_KEY]: this.sessionId },\n {\n $set: { context },\n },\n { upsert: true }\n );\n } catch (error) {\n console.log(\"Error setting context\", error);", + "repo_full_name": "langchain-ai/langchainjs", + "discussion_comments": [ + { + "comment_id": "1901634206", + "repo_full_name": "langchain-ai/langchainjs", + "pr_number": 7436, + "pr_file": "libs/langchain-azure-cosmosdb/src/chat_histories/mongodb.ts", + "discussion_id": "1901634206", + "commented_code": "@@ -152,4 +161,51 @@ export class AzureCosmosDBMongoChatMessageHistory extends BaseListChatMessageHis\n \n await this.collection.deleteOne({ [ID_KEY]: this.sessionId });\n }\n+\n+ async getAllSessions(): Promise {\n+ await this.initialize();\n+ const documents = await this.collection.find().toArray();\n+\n+ const chatSessions: ChatSession[] = documents.map((doc) => ({\n+ id: doc[ID_KEY],\n+ context: doc.context || {},\n+ }));\n+\n+ return chatSessions;\n+ }\n+\n+ async clearAllSessions() {\n+ await this.initialize();\n+ try {\n+ await this.collection.deleteMany({});\n+ } catch (error) {\n+ console.log(\"Error clearing sessions:\", error);\n+ }\n+ }\n+\n+ async getContext(): Promise> {\n+ await this.initialize();\n+\n+ const document = await this.collection.findOne({\n+ [ID_KEY]: this.sessionId,\n+ });\n+ this.context = document?.context || this.context;\n+ return this.context;\n+ }\n+\n+ async setContext(context: Record): Promise {\n+ await this.initialize();\n+\n+ try {\n+ await this.collection.updateOne(\n+ { [ID_KEY]: this.sessionId },\n+ {\n+ $set: { context },\n+ },\n+ { upsert: true }\n+ );\n+ } catch (error) {\n+ console.log(\"Error setting context\", error);", + "comment_created_at": "2025-01-03T10:04:10+00:00", + "comment_author": "sinedied", + "comment_body": "Would be better to throw so these errors can be identified", + "pr_file_module": null + }, + { + "comment_id": "1912473317", + "repo_full_name": "langchain-ai/langchainjs", + "pr_number": 7436, + "pr_file": "libs/langchain-azure-cosmosdb/src/chat_histories/mongodb.ts", + "discussion_id": "1901634206", + "commented_code": "@@ -152,4 +161,51 @@ export class AzureCosmosDBMongoChatMessageHistory extends BaseListChatMessageHis\n \n await this.collection.deleteOne({ [ID_KEY]: this.sessionId });\n }\n+\n+ async getAllSessions(): Promise {\n+ await this.initialize();\n+ const documents = await this.collection.find().toArray();\n+\n+ const chatSessions: ChatSession[] = documents.map((doc) => ({\n+ id: doc[ID_KEY],\n+ context: doc.context || {},\n+ }));\n+\n+ return chatSessions;\n+ }\n+\n+ async clearAllSessions() {\n+ await this.initialize();\n+ try {\n+ await this.collection.deleteMany({});\n+ } catch (error) {\n+ console.log(\"Error clearing sessions:\", error);\n+ }\n+ }\n+\n+ async getContext(): Promise> {\n+ await this.initialize();\n+\n+ const document = await this.collection.findOne({\n+ [ID_KEY]: this.sessionId,\n+ });\n+ this.context = document?.context || this.context;\n+ return this.context;\n+ }\n+\n+ async setContext(context: Record): Promise {\n+ await this.initialize();\n+\n+ try {\n+ await this.collection.updateOne(\n+ { [ID_KEY]: this.sessionId },\n+ {\n+ $set: { context },\n+ },\n+ { upsert: true }\n+ );\n+ } catch (error) {\n+ console.log(\"Error setting context\", error);", + "comment_created_at": "2025-01-12T15:12:16+00:00", + "comment_author": "crisjy", + "comment_body": "throw errors for this", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1901634285", + "pr_number": 7436, + "pr_file": "libs/langchain-azure-cosmosdb/src/chat_histories/mongodb.ts", + "created_at": "2025-01-03T10:04:15+00:00", + "commented_code": "await this.collection.deleteOne({ [ID_KEY]: this.sessionId });\n }\n\n async getAllSessions(): Promise {\n await this.initialize();\n const documents = await this.collection.find().toArray();\n\n const chatSessions: ChatSession[] = documents.map((doc) => ({\n id: doc[ID_KEY],\n context: doc.context || {},\n }));\n\n return chatSessions;\n }\n\n async clearAllSessions() {\n await this.initialize();\n try {\n await this.collection.deleteMany({});\n } catch (error) {\n console.log(\"Error clearing sessions:\", error);", + "repo_full_name": "langchain-ai/langchainjs", + "discussion_comments": [ + { + "comment_id": "1901634285", + "repo_full_name": "langchain-ai/langchainjs", + "pr_number": 7436, + "pr_file": "libs/langchain-azure-cosmosdb/src/chat_histories/mongodb.ts", + "discussion_id": "1901634285", + "commented_code": "@@ -152,4 +161,51 @@ export class AzureCosmosDBMongoChatMessageHistory extends BaseListChatMessageHis\n \n await this.collection.deleteOne({ [ID_KEY]: this.sessionId });\n }\n+\n+ async getAllSessions(): Promise {\n+ await this.initialize();\n+ const documents = await this.collection.find().toArray();\n+\n+ const chatSessions: ChatSession[] = documents.map((doc) => ({\n+ id: doc[ID_KEY],\n+ context: doc.context || {},\n+ }));\n+\n+ return chatSessions;\n+ }\n+\n+ async clearAllSessions() {\n+ await this.initialize();\n+ try {\n+ await this.collection.deleteMany({});\n+ } catch (error) {\n+ console.log(\"Error clearing sessions:\", error);", + "comment_created_at": "2025-01-03T10:04:15+00:00", + "comment_author": "sinedied", + "comment_body": "Would be better to throw so these errors can be identified", + "pr_file_module": null + }, + { + "comment_id": "1912473333", + "repo_full_name": "langchain-ai/langchainjs", + "pr_number": 7436, + "pr_file": "libs/langchain-azure-cosmosdb/src/chat_histories/mongodb.ts", + "discussion_id": "1901634285", + "commented_code": "@@ -152,4 +161,51 @@ export class AzureCosmosDBMongoChatMessageHistory extends BaseListChatMessageHis\n \n await this.collection.deleteOne({ [ID_KEY]: this.sessionId });\n }\n+\n+ async getAllSessions(): Promise {\n+ await this.initialize();\n+ const documents = await this.collection.find().toArray();\n+\n+ const chatSessions: ChatSession[] = documents.map((doc) => ({\n+ id: doc[ID_KEY],\n+ context: doc.context || {},\n+ }));\n+\n+ return chatSessions;\n+ }\n+\n+ async clearAllSessions() {\n+ await this.initialize();\n+ try {\n+ await this.collection.deleteMany({});\n+ } catch (error) {\n+ console.log(\"Error clearing sessions:\", error);", + "comment_created_at": "2025-01-12T15:12:23+00:00", + "comment_author": "crisjy", + "comment_body": "throw errors for this", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1661598255", + "pr_number": 5835, + "pr_file": "libs/langchain-google-common/src/auth.ts", + "created_at": "2024-07-01T22:27:33+00:00", + "commented_code": "`Google request failed with status code ${res.status}: ${resText}`\n );\n // eslint-disable-next-line @typescript-eslint/no-explicit-any\n (error as any).response = res;\n (error as any).details = {", + "repo_full_name": "langchain-ai/langchainjs", + "discussion_comments": [ + { + "comment_id": "1661598255", + "repo_full_name": "langchain-ai/langchainjs", + "pr_number": 5835, + "pr_file": "libs/langchain-google-common/src/auth.ts", + "discussion_id": "1661598255", + "commented_code": "@@ -58,15 +73,18 @@ export abstract class GoogleAbstractedFetchClient\n `Google request failed with status code ${res.status}: ${resText}`\n );\n // eslint-disable-next-line @typescript-eslint/no-explicit-any\n- (error as any).response = res;\n+ (error as any).details = {", + "comment_created_at": "2024-07-01T22:27:33+00:00", + "comment_author": "jacoblee93", + "comment_body": "Good to leave `response` directly on the error object - `AsyncCaller` uses fields on it to decide retries.", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/langchainjs-throw-meaningful-errors.md b/_reviewers/langchainjs-throw-meaningful-errors.md index f6bbe8e..01b5f8d 100644 --- a/_reviewers/langchainjs-throw-meaningful-errors.md +++ b/_reviewers/langchainjs-throw-meaningful-errors.md @@ -68,141 +68,3 @@ try { ``` Well-designed error handling improves debugging efficiency and creates a more robust application. - - -[ - { - "discussion_id": "1899693765", - "pr_number": 7300, - "pr_file": "libs/langchain-community/src/utils/reddit.ts", - "created_at": "2024-12-30T17:32:20+00:00", - "commented_code": "import dotenv from \"dotenv\";\nimport { AsyncCaller } from \"@langchain/core/utils/async_caller\";\n\ndotenv.config();\n\nexport interface RedditAPIConfig {\n clientId: string;\n clientSecret: string;\n userAgent: string;\n}\n\nexport interface RedditPost {\n title: string;\n selftext: string;\n subreddit_name_prefixed: string;\n score: number;\n id: string;\n url: string;\n author: string;\n}\n\nexport class RedditAPIWrapper {\n private clientId: string;\n\n private clientSecret: string;\n\n private userAgent: string;\n\n private token: string | null = null;\n\n private baseUrl = \"https://oauth.reddit.com\";\n\n private asyncCaller: AsyncCaller; // Using AsyncCaller for requests\n\n constructor(config: RedditAPIConfig) {\n this.clientId = config.clientId;\n this.clientSecret = config.clientSecret;\n this.userAgent = config.userAgent;\n this.asyncCaller = new AsyncCaller({\n maxConcurrency: 5,\n maxRetries: 3,\n onFailedAttempt: (error) => {\n console.error(\"Attempt failed:\", error.message);\n },\n });\n }\n\n private async authenticate() {\n if (this.token) return;\n\n const authString = btoa(`${this.clientId}:${this.clientSecret}`);\n\n try {\n const response = await fetch(\n \"https://www.reddit.com/api/v1/access_token\",\n {\n method: \"POST\",\n headers: {\n Authorization: `Basic ${authString}`,\n \"User-Agent\": this.userAgent,\n \"Content-Type\": \"application/x-www-form-urlencoded\",\n },\n body: \"grant_type=client_credentials\",\n }\n );\n\n if (!response.ok) {\n throw new Error(\n `Error authenticating with Reddit: ${response.statusText}`\n );\n }\n\n const data = await response.json();\n this.token = data.access_token;\n } catch (error) {\n console.error(\"Error authenticating with Reddit:\", error);\n }", - "repo_full_name": "langchain-ai/langchainjs", - "discussion_comments": [ - { - "comment_id": "1899693765", - "repo_full_name": "langchain-ai/langchainjs", - "pr_number": 7300, - "pr_file": "libs/langchain-community/src/utils/reddit.ts", - "discussion_id": "1899693765", - "commented_code": "@@ -0,0 +1,145 @@\n+import dotenv from \"dotenv\";\n+import { AsyncCaller } from \"@langchain/core/utils/async_caller\";\n+\n+dotenv.config();\n+\n+export interface RedditAPIConfig {\n+ clientId: string;\n+ clientSecret: string;\n+ userAgent: string;\n+}\n+\n+export interface RedditPost {\n+ title: string;\n+ selftext: string;\n+ subreddit_name_prefixed: string;\n+ score: number;\n+ id: string;\n+ url: string;\n+ author: string;\n+}\n+\n+export class RedditAPIWrapper {\n+ private clientId: string;\n+\n+ private clientSecret: string;\n+\n+ private userAgent: string;\n+\n+ private token: string | null = null;\n+\n+ private baseUrl = \"https://oauth.reddit.com\";\n+\n+ private asyncCaller: AsyncCaller; // Using AsyncCaller for requests\n+\n+ constructor(config: RedditAPIConfig) {\n+ this.clientId = config.clientId;\n+ this.clientSecret = config.clientSecret;\n+ this.userAgent = config.userAgent;\n+ this.asyncCaller = new AsyncCaller({\n+ maxConcurrency: 5,\n+ maxRetries: 3,\n+ onFailedAttempt: (error) => {\n+ console.error(\"Attempt failed:\", error.message);\n+ },\n+ });\n+ }\n+\n+ private async authenticate() {\n+ if (this.token) return;\n+\n+ const authString = btoa(`${this.clientId}:${this.clientSecret}`);\n+\n+ try {\n+ const response = await fetch(\n+ \"https://www.reddit.com/api/v1/access_token\",\n+ {\n+ method: \"POST\",\n+ headers: {\n+ Authorization: `Basic ${authString}`,\n+ \"User-Agent\": this.userAgent,\n+ \"Content-Type\": \"application/x-www-form-urlencoded\",\n+ },\n+ body: \"grant_type=client_credentials\",\n+ }\n+ );\n+\n+ if (!response.ok) {\n+ throw new Error(\n+ `Error authenticating with Reddit: ${response.statusText}`\n+ );\n+ }\n+\n+ const data = await response.json();\n+ this.token = data.access_token;\n+ } catch (error) {\n+ console.error(\"Error authenticating with Reddit:\", error);\n+ }", - "comment_created_at": "2024-12-30T17:32:20+00:00", - "comment_author": "nick-w-nick", - "comment_body": "If I am reading this correctly, if it encounters an issue, it will throw an error with the text \r\n\r\n`Error authenticating with Reddit: ${response.statusText}`\r\n\r\nbut will then do `console.error(\"Error authenticating with Reddit:\", error);`, which will output something like:\r\n```\r\nError authenticating with Reddit:\r\n Error authenticating with Reddit: \"invalid auth token\"\r\n```\r\n\r\nI'd suggest just throwing the status text from Reddit instead and then logging out the authentication error in the catch instead.\r\n\r\n```suggestion\r\n if (!response.ok) {\r\n throw new Error(response.statusText);\r\n }\r\n\r\n const data = await response.json();\r\n this.token = data.access_token;\r\n } catch (error) {\r\n console.error(\"Error authenticating with Reddit:\", error);\r\n }\r\n```", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1433348408", - "pr_number": 3448, - "pr_file": "langchain/src/chat_models/azure_ml.ts", - "created_at": "2023-12-21T01:39:37+00:00", - "commented_code": "import { SimpleChatModel, BaseChatModelParams } from \"./base.js\";\nimport { AzureMLHttpClient } from \"../llms/azure_ml.js\";\nimport { getEnvironmentVariable } from \"../util/env.js\";\nimport { BaseMessage } from \"../schema/index.js\";\n\nexport interface ChatContentFormatter {\n /**\n * Formats the request payload for the AzureML endpoint. It takes a\n * prompt and a dictionary of model arguments as input and returns a\n * string representing the formatted request payload.\n * @param messages A list of messages for the chat so far.\n * @param modelArgs A dictionary of model arguments.\n * @returns A string representing the formatted request payload.\n */\n formatRequestPayload: (\n messages: BaseMessage[],\n modelArgs: Record\n ) => string;\n /**\n * Formats the response payload from the AzureML endpoint. It takes a\n * response payload as input and returns a string representing the\n * formatted response.\n * @param responsePayload The response payload from the AzureML endpoint.\n * @returns A string representing the formatted response.\n */\n formatResponsePayload: (output: string) => string;\n}\n\nexport class LlamaContentFormatter implements ChatContentFormatter {\n _convertMessageToRecord(message: BaseMessage): Record {\n if (message._getType() === \"human\") {\n return { role: \"user\", content: message.content };\n } else if (message._getType() === \"ai\") {\n return { role: \"assistant\", content: message.content };\n } else {\n return { role: message._getType(), content: message.content };\n }\n }\n\n formatRequestPayload(\n messages: BaseMessage[],\n modelArgs: Record\n ): string {\n let msgs = messages.map((message) => {\n this._convertMessageToRecord(message);\n });\n return JSON.stringify({\n input_data: {\n input_string: msgs,\n parameters: modelArgs,\n },\n });\n }\n\n formatResponsePayload(responsePayload: string) {\n const response = JSON.parse(responsePayload);\n return response.output;\n }\n}\n\n/**\n * Type definition for the input parameters of the AzureMLChatOnlineEndpoint class.\n */\nexport interface AzureMLChatParams extends BaseChatModelParams {\n endpointUrl?: string;\n endpointApiKey?: string;\n modelArgs?: Record;\n contentFormatter?: ChatContentFormatter;\n}\n\n/**\n * Class that represents the chat model. It extends the SimpleChatModel class and implements the AzureMLChatInput interface.\n */\nexport class AzureMLChatOnlineEndpoint\n extends SimpleChatModel\n implements AzureMLChatParams\n{\n static lc_name() {\n return \"AzureMLChatOnlineEndpoint\";\n }\n static lc_description() {\n return \"A class for interacting with AzureML Chat models.\";\n }\n endpointUrl: string;\n endpointApiKey: string;\n modelArgs?: Record;\n contentFormatter: ChatContentFormatter;\n httpClient: AzureMLHttpClient;\n\n constructor(fields: AzureMLChatParams) {\n super(fields ?? {});\n if (!fields?.endpointUrl && !getEnvironmentVariable(\"AZUREML_URL\")) {\n throw new Error(\"No Azure ML Url found.\");\n }\n if (!fields?.endpointApiKey && !getEnvironmentVariable(\"AZUREML_API_KEY\")) {\n throw new Error(\"No Azure ML ApiKey found.\");\n }\n if (!fields?.contentFormatter) {\n throw new Error(\"No Content Formatter provided.\");\n }\n\n this.endpointUrl =\n fields.endpointUrl || getEnvironmentVariable(\"AZUREML_URL\") + \"\";\n this.endpointApiKey =\n fields.endpointApiKey || getEnvironmentVariable(\"AZUREML_API_KEY\") + \"\";\n this.httpClient = new AzureMLHttpClient(\n this.endpointUrl,\n this.endpointApiKey\n );\n this.contentFormatter = fields.contentFormatter;\n this.modelArgs = fields?.modelArgs;\n }\n get _identifying_params() {\n const modelKwargs = this.modelArgs || {};\n return {\n ...super._identifyingParams,\n modelKwargs,\n };\n }\n\n _llmType() {\n return \"azureml_chat\";\n }\n\n _combineLLMOutput(): Record | undefined {\n return [];", - "repo_full_name": "langchain-ai/langchainjs", - "discussion_comments": [ - { - "comment_id": "1433348408", - "repo_full_name": "langchain-ai/langchainjs", - "pr_number": 3448, - "pr_file": "langchain/src/chat_models/azure_ml.ts", - "discussion_id": "1433348408", - "commented_code": "@@ -0,0 +1,142 @@\n+import { SimpleChatModel, BaseChatModelParams } from \"./base.js\";\n+import { AzureMLHttpClient } from \"../llms/azure_ml.js\";\n+import { getEnvironmentVariable } from \"../util/env.js\";\n+import { BaseMessage } from \"../schema/index.js\";\n+\n+export interface ChatContentFormatter {\n+ /**\n+ * Formats the request payload for the AzureML endpoint. It takes a\n+ * prompt and a dictionary of model arguments as input and returns a\n+ * string representing the formatted request payload.\n+ * @param messages A list of messages for the chat so far.\n+ * @param modelArgs A dictionary of model arguments.\n+ * @returns A string representing the formatted request payload.\n+ */\n+ formatRequestPayload: (\n+ messages: BaseMessage[],\n+ modelArgs: Record\n+ ) => string;\n+ /**\n+ * Formats the response payload from the AzureML endpoint. It takes a\n+ * response payload as input and returns a string representing the\n+ * formatted response.\n+ * @param responsePayload The response payload from the AzureML endpoint.\n+ * @returns A string representing the formatted response.\n+ */\n+ formatResponsePayload: (output: string) => string;\n+}\n+\n+export class LlamaContentFormatter implements ChatContentFormatter {\n+ _convertMessageToRecord(message: BaseMessage): Record {\n+ if (message._getType() === \"human\") {\n+ return { role: \"user\", content: message.content };\n+ } else if (message._getType() === \"ai\") {\n+ return { role: \"assistant\", content: message.content };\n+ } else {\n+ return { role: message._getType(), content: message.content };\n+ }\n+ }\n+\n+ formatRequestPayload(\n+ messages: BaseMessage[],\n+ modelArgs: Record\n+ ): string {\n+ let msgs = messages.map((message) => {\n+ this._convertMessageToRecord(message);\n+ });\n+ return JSON.stringify({\n+ input_data: {\n+ input_string: msgs,\n+ parameters: modelArgs,\n+ },\n+ });\n+ }\n+\n+ formatResponsePayload(responsePayload: string) {\n+ const response = JSON.parse(responsePayload);\n+ return response.output;\n+ }\n+}\n+\n+/**\n+ * Type definition for the input parameters of the AzureMLChatOnlineEndpoint class.\n+ */\n+export interface AzureMLChatParams extends BaseChatModelParams {\n+ endpointUrl?: string;\n+ endpointApiKey?: string;\n+ modelArgs?: Record;\n+ contentFormatter?: ChatContentFormatter;\n+}\n+\n+/**\n+ * Class that represents the chat model. It extends the SimpleChatModel class and implements the AzureMLChatInput interface.\n+ */\n+export class AzureMLChatOnlineEndpoint\n+ extends SimpleChatModel\n+ implements AzureMLChatParams\n+{\n+ static lc_name() {\n+ return \"AzureMLChatOnlineEndpoint\";\n+ }\n+ static lc_description() {\n+ return \"A class for interacting with AzureML Chat models.\";\n+ }\n+ endpointUrl: string;\n+ endpointApiKey: string;\n+ modelArgs?: Record;\n+ contentFormatter: ChatContentFormatter;\n+ httpClient: AzureMLHttpClient;\n+\n+ constructor(fields: AzureMLChatParams) {\n+ super(fields ?? {});\n+ if (!fields?.endpointUrl && !getEnvironmentVariable(\"AZUREML_URL\")) {\n+ throw new Error(\"No Azure ML Url found.\");\n+ }\n+ if (!fields?.endpointApiKey && !getEnvironmentVariable(\"AZUREML_API_KEY\")) {\n+ throw new Error(\"No Azure ML ApiKey found.\");\n+ }\n+ if (!fields?.contentFormatter) {\n+ throw new Error(\"No Content Formatter provided.\");\n+ }\n+\n+ this.endpointUrl =\n+ fields.endpointUrl || getEnvironmentVariable(\"AZUREML_URL\") + \"\";\n+ this.endpointApiKey =\n+ fields.endpointApiKey || getEnvironmentVariable(\"AZUREML_API_KEY\") + \"\";\n+ this.httpClient = new AzureMLHttpClient(\n+ this.endpointUrl,\n+ this.endpointApiKey\n+ );\n+ this.contentFormatter = fields.contentFormatter;\n+ this.modelArgs = fields?.modelArgs;\n+ }\n+ get _identifying_params() {\n+ const modelKwargs = this.modelArgs || {};\n+ return {\n+ ...super._identifyingParams,\n+ modelKwargs,\n+ };\n+ }\n+\n+ _llmType() {\n+ return \"azureml_chat\";\n+ }\n+\n+ _combineLLMOutput(): Record | undefined {\n+ return [];", - "comment_created_at": "2023-12-21T01:39:37+00:00", - "comment_author": "bracesproul", - "comment_body": "If it's not implemented, it should throw a not implemented error.\r\n```suggestion\r\n throw new Error(\"AzureMLChatOnlineEndpoint._combineLLMOutput called, but is not implemented.\")\r\n```", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1901634206", - "pr_number": 7436, - "pr_file": "libs/langchain-azure-cosmosdb/src/chat_histories/mongodb.ts", - "created_at": "2025-01-03T10:04:10+00:00", - "commented_code": "await this.collection.deleteOne({ [ID_KEY]: this.sessionId });\n }\n\n async getAllSessions(): Promise {\n await this.initialize();\n const documents = await this.collection.find().toArray();\n\n const chatSessions: ChatSession[] = documents.map((doc) => ({\n id: doc[ID_KEY],\n context: doc.context || {},\n }));\n\n return chatSessions;\n }\n\n async clearAllSessions() {\n await this.initialize();\n try {\n await this.collection.deleteMany({});\n } catch (error) {\n console.log(\"Error clearing sessions:\", error);\n }\n }\n\n async getContext(): Promise> {\n await this.initialize();\n\n const document = await this.collection.findOne({\n [ID_KEY]: this.sessionId,\n });\n this.context = document?.context || this.context;\n return this.context;\n }\n\n async setContext(context: Record): Promise {\n await this.initialize();\n\n try {\n await this.collection.updateOne(\n { [ID_KEY]: this.sessionId },\n {\n $set: { context },\n },\n { upsert: true }\n );\n } catch (error) {\n console.log(\"Error setting context\", error);", - "repo_full_name": "langchain-ai/langchainjs", - "discussion_comments": [ - { - "comment_id": "1901634206", - "repo_full_name": "langchain-ai/langchainjs", - "pr_number": 7436, - "pr_file": "libs/langchain-azure-cosmosdb/src/chat_histories/mongodb.ts", - "discussion_id": "1901634206", - "commented_code": "@@ -152,4 +161,51 @@ export class AzureCosmosDBMongoChatMessageHistory extends BaseListChatMessageHis\n \n await this.collection.deleteOne({ [ID_KEY]: this.sessionId });\n }\n+\n+ async getAllSessions(): Promise {\n+ await this.initialize();\n+ const documents = await this.collection.find().toArray();\n+\n+ const chatSessions: ChatSession[] = documents.map((doc) => ({\n+ id: doc[ID_KEY],\n+ context: doc.context || {},\n+ }));\n+\n+ return chatSessions;\n+ }\n+\n+ async clearAllSessions() {\n+ await this.initialize();\n+ try {\n+ await this.collection.deleteMany({});\n+ } catch (error) {\n+ console.log(\"Error clearing sessions:\", error);\n+ }\n+ }\n+\n+ async getContext(): Promise> {\n+ await this.initialize();\n+\n+ const document = await this.collection.findOne({\n+ [ID_KEY]: this.sessionId,\n+ });\n+ this.context = document?.context || this.context;\n+ return this.context;\n+ }\n+\n+ async setContext(context: Record): Promise {\n+ await this.initialize();\n+\n+ try {\n+ await this.collection.updateOne(\n+ { [ID_KEY]: this.sessionId },\n+ {\n+ $set: { context },\n+ },\n+ { upsert: true }\n+ );\n+ } catch (error) {\n+ console.log(\"Error setting context\", error);", - "comment_created_at": "2025-01-03T10:04:10+00:00", - "comment_author": "sinedied", - "comment_body": "Would be better to throw so these errors can be identified", - "pr_file_module": null - }, - { - "comment_id": "1912473317", - "repo_full_name": "langchain-ai/langchainjs", - "pr_number": 7436, - "pr_file": "libs/langchain-azure-cosmosdb/src/chat_histories/mongodb.ts", - "discussion_id": "1901634206", - "commented_code": "@@ -152,4 +161,51 @@ export class AzureCosmosDBMongoChatMessageHistory extends BaseListChatMessageHis\n \n await this.collection.deleteOne({ [ID_KEY]: this.sessionId });\n }\n+\n+ async getAllSessions(): Promise {\n+ await this.initialize();\n+ const documents = await this.collection.find().toArray();\n+\n+ const chatSessions: ChatSession[] = documents.map((doc) => ({\n+ id: doc[ID_KEY],\n+ context: doc.context || {},\n+ }));\n+\n+ return chatSessions;\n+ }\n+\n+ async clearAllSessions() {\n+ await this.initialize();\n+ try {\n+ await this.collection.deleteMany({});\n+ } catch (error) {\n+ console.log(\"Error clearing sessions:\", error);\n+ }\n+ }\n+\n+ async getContext(): Promise> {\n+ await this.initialize();\n+\n+ const document = await this.collection.findOne({\n+ [ID_KEY]: this.sessionId,\n+ });\n+ this.context = document?.context || this.context;\n+ return this.context;\n+ }\n+\n+ async setContext(context: Record): Promise {\n+ await this.initialize();\n+\n+ try {\n+ await this.collection.updateOne(\n+ { [ID_KEY]: this.sessionId },\n+ {\n+ $set: { context },\n+ },\n+ { upsert: true }\n+ );\n+ } catch (error) {\n+ console.log(\"Error setting context\", error);", - "comment_created_at": "2025-01-12T15:12:16+00:00", - "comment_author": "crisjy", - "comment_body": "throw errors for this", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1901634285", - "pr_number": 7436, - "pr_file": "libs/langchain-azure-cosmosdb/src/chat_histories/mongodb.ts", - "created_at": "2025-01-03T10:04:15+00:00", - "commented_code": "await this.collection.deleteOne({ [ID_KEY]: this.sessionId });\n }\n\n async getAllSessions(): Promise {\n await this.initialize();\n const documents = await this.collection.find().toArray();\n\n const chatSessions: ChatSession[] = documents.map((doc) => ({\n id: doc[ID_KEY],\n context: doc.context || {},\n }));\n\n return chatSessions;\n }\n\n async clearAllSessions() {\n await this.initialize();\n try {\n await this.collection.deleteMany({});\n } catch (error) {\n console.log(\"Error clearing sessions:\", error);", - "repo_full_name": "langchain-ai/langchainjs", - "discussion_comments": [ - { - "comment_id": "1901634285", - "repo_full_name": "langchain-ai/langchainjs", - "pr_number": 7436, - "pr_file": "libs/langchain-azure-cosmosdb/src/chat_histories/mongodb.ts", - "discussion_id": "1901634285", - "commented_code": "@@ -152,4 +161,51 @@ export class AzureCosmosDBMongoChatMessageHistory extends BaseListChatMessageHis\n \n await this.collection.deleteOne({ [ID_KEY]: this.sessionId });\n }\n+\n+ async getAllSessions(): Promise {\n+ await this.initialize();\n+ const documents = await this.collection.find().toArray();\n+\n+ const chatSessions: ChatSession[] = documents.map((doc) => ({\n+ id: doc[ID_KEY],\n+ context: doc.context || {},\n+ }));\n+\n+ return chatSessions;\n+ }\n+\n+ async clearAllSessions() {\n+ await this.initialize();\n+ try {\n+ await this.collection.deleteMany({});\n+ } catch (error) {\n+ console.log(\"Error clearing sessions:\", error);", - "comment_created_at": "2025-01-03T10:04:15+00:00", - "comment_author": "sinedied", - "comment_body": "Would be better to throw so these errors can be identified", - "pr_file_module": null - }, - { - "comment_id": "1912473333", - "repo_full_name": "langchain-ai/langchainjs", - "pr_number": 7436, - "pr_file": "libs/langchain-azure-cosmosdb/src/chat_histories/mongodb.ts", - "discussion_id": "1901634285", - "commented_code": "@@ -152,4 +161,51 @@ export class AzureCosmosDBMongoChatMessageHistory extends BaseListChatMessageHis\n \n await this.collection.deleteOne({ [ID_KEY]: this.sessionId });\n }\n+\n+ async getAllSessions(): Promise {\n+ await this.initialize();\n+ const documents = await this.collection.find().toArray();\n+\n+ const chatSessions: ChatSession[] = documents.map((doc) => ({\n+ id: doc[ID_KEY],\n+ context: doc.context || {},\n+ }));\n+\n+ return chatSessions;\n+ }\n+\n+ async clearAllSessions() {\n+ await this.initialize();\n+ try {\n+ await this.collection.deleteMany({});\n+ } catch (error) {\n+ console.log(\"Error clearing sessions:\", error);", - "comment_created_at": "2025-01-12T15:12:23+00:00", - "comment_author": "crisjy", - "comment_body": "throw errors for this", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1661598255", - "pr_number": 5835, - "pr_file": "libs/langchain-google-common/src/auth.ts", - "created_at": "2024-07-01T22:27:33+00:00", - "commented_code": "`Google request failed with status code ${res.status}: ${resText}`\n );\n // eslint-disable-next-line @typescript-eslint/no-explicit-any\n (error as any).response = res;\n (error as any).details = {", - "repo_full_name": "langchain-ai/langchainjs", - "discussion_comments": [ - { - "comment_id": "1661598255", - "repo_full_name": "langchain-ai/langchainjs", - "pr_number": 5835, - "pr_file": "libs/langchain-google-common/src/auth.ts", - "discussion_id": "1661598255", - "commented_code": "@@ -58,15 +73,18 @@ export abstract class GoogleAbstractedFetchClient\n `Google request failed with status code ${res.status}: ${resText}`\n );\n // eslint-disable-next-line @typescript-eslint/no-explicit-any\n- (error as any).response = res;\n+ (error as any).details = {", - "comment_created_at": "2024-07-01T22:27:33+00:00", - "comment_author": "jacoblee93", - "comment_body": "Good to leave `response` directly on the error object - `AsyncCaller` uses fields on it to decide retries.", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/langchainjs-typescript-naming-standards.json b/_reviewers/langchainjs-typescript-naming-standards.json new file mode 100644 index 0000000..dc16c0b --- /dev/null +++ b/_reviewers/langchainjs-typescript-naming-standards.json @@ -0,0 +1,270 @@ +[ + { + "discussion_id": "2106342464", + "pr_number": 8248, + "pr_file": "libs/langchain-anthropic/src/chat_models.ts", + "created_at": "2025-05-25T23:52:50+00:00", + "commented_code": "return \"input_schema\" in tool;\n}\n\n// eslint-disable-next-line @typescript-eslint/no-explicit-any\nfunction isBuiltinTool(\n tool: any", + "repo_full_name": "langchain-ai/langchainjs", + "discussion_comments": [ + { + "comment_id": "2106342464", + "repo_full_name": "langchain-ai/langchainjs", + "pr_number": 8248, + "pr_file": "libs/langchain-anthropic/src/chat_models.ts", + "discussion_id": "2106342464", + "commented_code": "@@ -100,6 +100,26 @@ function isAnthropicTool(tool: any): tool is Anthropic.Messages.Tool {\n return \"input_schema\" in tool;\n }\n \n+// eslint-disable-next-line @typescript-eslint/no-explicit-any\n+function isBuiltinTool(\n+ tool: any", + "comment_created_at": "2025-05-25T23:52:50+00:00", + "comment_author": "benjamincburns", + "comment_body": "nit: use `unknown` - that's why we have the ESLint error", + "pr_file_module": null + }, + { + "comment_id": "2106392558", + "repo_full_name": "langchain-ai/langchainjs", + "pr_number": 8248, + "pr_file": "libs/langchain-anthropic/src/chat_models.ts", + "discussion_id": "2106342464", + "commented_code": "@@ -100,6 +100,26 @@ function isAnthropicTool(tool: any): tool is Anthropic.Messages.Tool {\n return \"input_schema\" in tool;\n }\n \n+// eslint-disable-next-line @typescript-eslint/no-explicit-any\n+function isBuiltinTool(\n+ tool: any", + "comment_created_at": "2025-05-26T01:49:53+00:00", + "comment_author": "bleafman", + "comment_body": "Done!", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1899705386", + "pr_number": 7300, + "pr_file": "libs/langchain-community/src/document_loaders/web/reddit.ts", + "created_at": "2024-12-30T17:55:31+00:00", + "commented_code": "import { BaseDocumentLoader } from \"@langchain/core/document_loaders/base\";\nimport { Document } from \"@langchain/core/documents\";\nimport { getEnvironmentVariable } from \"@langchain/core/utils/env\";\nimport {\n RedditAPIWrapper,\n RedditPost,\n RedditAPIConfig,\n} from \"../../utils/reddit.js\";\n\n/**\n * Class representing a document loader for loading Reddit posts. It extends\n * the BaseDocumentLoader and implements the RedditAPIConfig interface.\n * @example\n * ```typescript\n * const loader = new RedditPostsLoader({\n * clientId: \"REDDIT_CLIENT_ID\",\n * clientSecret: \"REDDIT_CLIENT_SECRET\",\n * userAgent: \"REDDIT_USER_AGENT\",\n * searchQueries: [\"LangChain\", \"Langchaindev\"],\n * mode: \"subreddit\",\n * categories: [\"hot\", \"new\"],\n * numberPosts: 5\n * });\n * const docs = await loader.load();\n * ```\n */\nexport class RedditPostsLoader\n extends BaseDocumentLoader\n implements RedditAPIConfig\n{\n public clientId: string;\n\n public clientSecret: string;\n\n public userAgent: string;\n\n private redditApiWrapper: RedditAPIWrapper;\n\n private searchQueries: string[];\n\n private mode: string;\n\n private categories: string[];\n\n private numberPosts: number;\n\n constructor({\n clientId = getEnvironmentVariable(\"REDDIT_CLIENT_ID\") as string,\n clientSecret = getEnvironmentVariable(\"REDDIT_CLIENT_SECRET\") as string,\n userAgent = getEnvironmentVariable(\"REDDIT_USER_AGENT\") as string,\n searchQueries,\n mode,\n categories = [\"new\"],\n numberPosts = 10,\n }: RedditAPIConfig & {\n searchQueries: string[];\n mode: string;\n categories?: string[];\n numberPosts?: number;\n }) {\n super();\n this.clientId = clientId;\n this.clientSecret = clientSecret;\n this.userAgent = userAgent;\n this.redditApiWrapper = new RedditAPIWrapper({\n clientId: this.clientId,\n clientSecret: this.clientSecret,\n userAgent: this.userAgent,\n });\n this.searchQueries = searchQueries;\n this.mode = mode;", + "repo_full_name": "langchain-ai/langchainjs", + "discussion_comments": [ + { + "comment_id": "1899705386", + "repo_full_name": "langchain-ai/langchainjs", + "pr_number": 7300, + "pr_file": "libs/langchain-community/src/document_loaders/web/reddit.ts", + "discussion_id": "1899705386", + "commented_code": "@@ -0,0 +1,133 @@\n+import { BaseDocumentLoader } from \"@langchain/core/document_loaders/base\";\n+import { Document } from \"@langchain/core/documents\";\n+import { getEnvironmentVariable } from \"@langchain/core/utils/env\";\n+import {\n+ RedditAPIWrapper,\n+ RedditPost,\n+ RedditAPIConfig,\n+} from \"../../utils/reddit.js\";\n+\n+/**\n+ * Class representing a document loader for loading Reddit posts. It extends\n+ * the BaseDocumentLoader and implements the RedditAPIConfig interface.\n+ * @example\n+ * ```typescript\n+ * const loader = new RedditPostsLoader({\n+ * clientId: \"REDDIT_CLIENT_ID\",\n+ * clientSecret: \"REDDIT_CLIENT_SECRET\",\n+ * userAgent: \"REDDIT_USER_AGENT\",\n+ * searchQueries: [\"LangChain\", \"Langchaindev\"],\n+ * mode: \"subreddit\",\n+ * categories: [\"hot\", \"new\"],\n+ * numberPosts: 5\n+ * });\n+ * const docs = await loader.load();\n+ * ```\n+ */\n+export class RedditPostsLoader\n+ extends BaseDocumentLoader\n+ implements RedditAPIConfig\n+{\n+ public clientId: string;\n+\n+ public clientSecret: string;\n+\n+ public userAgent: string;\n+\n+ private redditApiWrapper: RedditAPIWrapper;\n+\n+ private searchQueries: string[];\n+\n+ private mode: string;\n+\n+ private categories: string[];\n+\n+ private numberPosts: number;\n+\n+ constructor({\n+ clientId = getEnvironmentVariable(\"REDDIT_CLIENT_ID\") as string,\n+ clientSecret = getEnvironmentVariable(\"REDDIT_CLIENT_SECRET\") as string,\n+ userAgent = getEnvironmentVariable(\"REDDIT_USER_AGENT\") as string,\n+ searchQueries,\n+ mode,\n+ categories = [\"new\"],\n+ numberPosts = 10,\n+ }: RedditAPIConfig & {\n+ searchQueries: string[];\n+ mode: string;\n+ categories?: string[];\n+ numberPosts?: number;\n+ }) {\n+ super();\n+ this.clientId = clientId;\n+ this.clientSecret = clientSecret;\n+ this.userAgent = userAgent;\n+ this.redditApiWrapper = new RedditAPIWrapper({\n+ clientId: this.clientId,\n+ clientSecret: this.clientSecret,\n+ userAgent: this.userAgent,\n+ });\n+ this.searchQueries = searchQueries;\n+ this.mode = mode;", + "comment_created_at": "2024-12-30T17:55:31+00:00", + "comment_author": "nick-w-nick", + "comment_body": "Would be a lot nicer if the \"mode\" parameter was typed instead since different values cause errors to be thrown, like so:\r\n\r\n```suggestion\r\n\r\nexport type SearchMode = \"subreddit\" | \"username\";\r\n\r\n/**\r\n * Class representing a document loader for loading Reddit posts. It extends\r\n * the BaseDocumentLoader and implements the RedditAPIConfig interface.\r\n * @example\r\n * ```typescript\r\n * const loader = new RedditPostsLoader({\r\n * clientId: \"REDDIT_CLIENT_ID\",\r\n * clientSecret: \"REDDIT_CLIENT_SECRET\",\r\n * userAgent: \"REDDIT_USER_AGENT\",\r\n * searchQueries: [\"LangChain\", \"Langchaindev\"],\r\n * mode: \"subreddit\",\r\n * categories: [\"hot\", \"new\"],\r\n * numberPosts: 5\r\n * });\r\n * const docs = await loader.load();\r\n * ```\r\n */\r\nexport class RedditPostsLoader\r\n extends BaseDocumentLoader\r\n implements RedditAPIConfig\r\n{\r\n public clientId: string;\r\n\r\n public clientSecret: string;\r\n\r\n public userAgent: string;\r\n\r\n private redditApiWrapper: RedditAPIWrapper;\r\n\r\n private searchQueries: string[];\r\n\r\n private mode: SearchMode;\r\n\r\n private categories: string[];\r\n\r\n private numberPosts: number;\r\n\r\n constructor({\r\n clientId = getEnvironmentVariable(\"REDDIT_CLIENT_ID\") as string,\r\n clientSecret = getEnvironmentVariable(\"REDDIT_CLIENT_SECRET\") as string,\r\n userAgent = getEnvironmentVariable(\"REDDIT_USER_AGENT\") as string,\r\n searchQueries,\r\n mode,\r\n categories = [\"new\"],\r\n numberPosts = 10,\r\n }: RedditAPIConfig & {\r\n searchQueries: string[];\r\n mode: SearchMode;\r\n categories?: string[];\r\n numberPosts?: number;\r\n }) {\r\n super();\r\n this.clientId = clientId;\r\n this.clientSecret = clientSecret;\r\n this.userAgent = userAgent;\r\n this.redditApiWrapper = new RedditAPIWrapper({\r\n clientId: this.clientId,\r\n clientSecret: this.clientSecret,\r\n userAgent: this.userAgent,\r\n });\r\n this.searchQueries = searchQueries;\r\n this.mode = mode;\r\n```", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1992603122", + "pr_number": 7817, + "pr_file": "libs/langchain-community/src/chat_models/perplexity.ts", + "created_at": "2025-03-13T02:58:54+00:00", + "commented_code": "import {\n AIMessage,\n AIMessageChunk,\n BaseMessage,\n BaseMessageChunk,\n ChatMessage,\n ChatMessageChunk,\n HumanMessage,\n HumanMessageChunk,\n SystemMessage,\n SystemMessageChunk,\n} from \"@langchain/core/messages\";\nimport {\n BaseChatModel,\n BaseChatModelParams,\n} from \"@langchain/core/language_models/chat_models\";\nimport { CallbackManagerForLLMRun } from \"@langchain/core/callbacks/manager\";\nimport {\n ChatGeneration,\n ChatGenerationChunk,\n ChatResult,\n} from \"@langchain/core/outputs\";\nimport { getEnvironmentVariable } from \"@langchain/core/utils/env\";\nimport OpenAI from \"openai\";\n\n/**\n * Type representing the role of a message in the Perplexity chat model.\n */\nexport type PerplexityRole = \"system\" | \"user\" | \"assistant\";\n\n/**\n * Interface defining the parameters for the Perplexity chat model.\n */\nexport interface PerplexityChatInput {\n /** Model name to use */\n modelName?: string;\n\n /** Maximum number of tokens to generate */\n maxTokens?: number;\n\n /** Temperature parameter between 0 and 2 */\n temperature?: number;\n\n /** Top P parameter between 0 and 1 */\n topP?: number;\n\n /** Search domain filter - limit the citations used by the online model to URLs from the specified domains. */\n searchDomainFilter?: any[];\n\n /** Whether to return images */\n returnImages?: boolean;\n\n /** Determines whether or not a request to an online model should return related questions. */\n returnRelatedQuestions?: boolean;\n\n /** Returns search results within the specified time interval - does not apply to images. Values include month, week, day, hour. */\n searchRecencyFilter?: string;\n\n /** Top K parameter between 1 and 2048 */\n topK?: number;\n\n /** Presence penalty between -2 and 2 */\n presencePenalty?: number;\n\n /** Frequency penalty greater than 0 */\n frequencyPenalty?: number;\n\n /** API key for Perplexity. Defaults to the value of\n * PERPLEXITY_API_KEY environment variable.\n */\n apiKey?: string;\n\n /** Whether to stream the results or not */\n streaming?: boolean;\n\n /** Timeout for requests to Perplexity */\n timeout?: number;\n}\n\n/**\n * Wrapper around Perplexity large language models that use the Chat endpoint.\n */\nexport class ChatPerplexity\n extends BaseChatModel\n implements PerplexityChatInput\n{\n static lc_name() {\n return \"ChatPerplexity\";\n }\n\n modelName = \"llama-3.1-sonar-small-128k-online\";", + "repo_full_name": "langchain-ai/langchainjs", + "discussion_comments": [ + { + "comment_id": "1992603122", + "repo_full_name": "langchain-ai/langchainjs", + "pr_number": 7817, + "pr_file": "libs/langchain-community/src/chat_models/perplexity.ts", + "discussion_id": "1992603122", + "commented_code": "@@ -0,0 +1,290 @@\n+import {\n+ AIMessage,\n+ AIMessageChunk,\n+ BaseMessage,\n+ BaseMessageChunk,\n+ ChatMessage,\n+ ChatMessageChunk,\n+ HumanMessage,\n+ HumanMessageChunk,\n+ SystemMessage,\n+ SystemMessageChunk,\n+} from \"@langchain/core/messages\";\n+import {\n+ BaseChatModel,\n+ BaseChatModelParams,\n+} from \"@langchain/core/language_models/chat_models\";\n+import { CallbackManagerForLLMRun } from \"@langchain/core/callbacks/manager\";\n+import {\n+ ChatGeneration,\n+ ChatGenerationChunk,\n+ ChatResult,\n+} from \"@langchain/core/outputs\";\n+import { getEnvironmentVariable } from \"@langchain/core/utils/env\";\n+import OpenAI from \"openai\";\n+\n+/**\n+ * Type representing the role of a message in the Perplexity chat model.\n+ */\n+export type PerplexityRole = \"system\" | \"user\" | \"assistant\";\n+\n+/**\n+ * Interface defining the parameters for the Perplexity chat model.\n+ */\n+export interface PerplexityChatInput {\n+ /** Model name to use */\n+ modelName?: string;\n+\n+ /** Maximum number of tokens to generate */\n+ maxTokens?: number;\n+\n+ /** Temperature parameter between 0 and 2 */\n+ temperature?: number;\n+\n+ /** Top P parameter between 0 and 1 */\n+ topP?: number;\n+\n+ /** Search domain filter - limit the citations used by the online model to URLs from the specified domains. */\n+ searchDomainFilter?: any[];\n+\n+ /** Whether to return images */\n+ returnImages?: boolean;\n+\n+ /** Determines whether or not a request to an online model should return related questions. */\n+ returnRelatedQuestions?: boolean;\n+\n+ /** Returns search results within the specified time interval - does not apply to images. Values include month, week, day, hour. */\n+ searchRecencyFilter?: string;\n+\n+ /** Top K parameter between 1 and 2048 */\n+ topK?: number;\n+\n+ /** Presence penalty between -2 and 2 */\n+ presencePenalty?: number;\n+\n+ /** Frequency penalty greater than 0 */\n+ frequencyPenalty?: number;\n+\n+ /** API key for Perplexity. Defaults to the value of\n+ * PERPLEXITY_API_KEY environment variable.\n+ */\n+ apiKey?: string;\n+\n+ /** Whether to stream the results or not */\n+ streaming?: boolean;\n+\n+ /** Timeout for requests to Perplexity */\n+ timeout?: number;\n+}\n+\n+/**\n+ * Wrapper around Perplexity large language models that use the Chat endpoint.\n+ */\n+export class ChatPerplexity\n+ extends BaseChatModel\n+ implements PerplexityChatInput\n+{\n+ static lc_name() {\n+ return \"ChatPerplexity\";\n+ }\n+\n+ modelName = \"llama-3.1-sonar-small-128k-online\";", + "comment_created_at": "2025-03-13T02:58:54+00:00", + "comment_author": "jacoblee93", + "comment_body": "Let's not take a default here\r\n\r\nAlso, we are standardizing on `model` over `modelName`", + "pr_file_module": null + }, + { + "comment_id": "1996233446", + "repo_full_name": "langchain-ai/langchainjs", + "pr_number": 7817, + "pr_file": "libs/langchain-community/src/chat_models/perplexity.ts", + "discussion_id": "1992603122", + "commented_code": "@@ -0,0 +1,290 @@\n+import {\n+ AIMessage,\n+ AIMessageChunk,\n+ BaseMessage,\n+ BaseMessageChunk,\n+ ChatMessage,\n+ ChatMessageChunk,\n+ HumanMessage,\n+ HumanMessageChunk,\n+ SystemMessage,\n+ SystemMessageChunk,\n+} from \"@langchain/core/messages\";\n+import {\n+ BaseChatModel,\n+ BaseChatModelParams,\n+} from \"@langchain/core/language_models/chat_models\";\n+import { CallbackManagerForLLMRun } from \"@langchain/core/callbacks/manager\";\n+import {\n+ ChatGeneration,\n+ ChatGenerationChunk,\n+ ChatResult,\n+} from \"@langchain/core/outputs\";\n+import { getEnvironmentVariable } from \"@langchain/core/utils/env\";\n+import OpenAI from \"openai\";\n+\n+/**\n+ * Type representing the role of a message in the Perplexity chat model.\n+ */\n+export type PerplexityRole = \"system\" | \"user\" | \"assistant\";\n+\n+/**\n+ * Interface defining the parameters for the Perplexity chat model.\n+ */\n+export interface PerplexityChatInput {\n+ /** Model name to use */\n+ modelName?: string;\n+\n+ /** Maximum number of tokens to generate */\n+ maxTokens?: number;\n+\n+ /** Temperature parameter between 0 and 2 */\n+ temperature?: number;\n+\n+ /** Top P parameter between 0 and 1 */\n+ topP?: number;\n+\n+ /** Search domain filter - limit the citations used by the online model to URLs from the specified domains. */\n+ searchDomainFilter?: any[];\n+\n+ /** Whether to return images */\n+ returnImages?: boolean;\n+\n+ /** Determines whether or not a request to an online model should return related questions. */\n+ returnRelatedQuestions?: boolean;\n+\n+ /** Returns search results within the specified time interval - does not apply to images. Values include month, week, day, hour. */\n+ searchRecencyFilter?: string;\n+\n+ /** Top K parameter between 1 and 2048 */\n+ topK?: number;\n+\n+ /** Presence penalty between -2 and 2 */\n+ presencePenalty?: number;\n+\n+ /** Frequency penalty greater than 0 */\n+ frequencyPenalty?: number;\n+\n+ /** API key for Perplexity. Defaults to the value of\n+ * PERPLEXITY_API_KEY environment variable.\n+ */\n+ apiKey?: string;\n+\n+ /** Whether to stream the results or not */\n+ streaming?: boolean;\n+\n+ /** Timeout for requests to Perplexity */\n+ timeout?: number;\n+}\n+\n+/**\n+ * Wrapper around Perplexity large language models that use the Chat endpoint.\n+ */\n+export class ChatPerplexity\n+ extends BaseChatModel\n+ implements PerplexityChatInput\n+{\n+ static lc_name() {\n+ return \"ChatPerplexity\";\n+ }\n+\n+ modelName = \"llama-3.1-sonar-small-128k-online\";", + "comment_created_at": "2025-03-14T20:25:36+00:00", + "comment_author": "anadi45", + "comment_body": "changed", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1434581965", + "pr_number": 3577, + "pr_file": "libs/langchain-community/src/chat_models/watsonx_ai.ts", + "created_at": "2023-12-21T23:11:45+00:00", + "commented_code": "import { CallbackManagerForLLMRun } from \"@langchain/core/callbacks/manager\";\nimport {\n type BaseChatModelParams,\n SimpleChatModel,\n} from \"@langchain/core/language_models/chat_models\";\nimport {\n AIMessageChunk,\n BaseMessage,\n ChatMessage,\n} from \"@langchain/core/messages\";\nimport { ChatGenerationChunk } from \"@langchain/core/outputs\";\nimport { getEnvironmentVariable } from \"@langchain/core/utils/env\";\nimport type {\n WatsonModelParameters,\n WatsonxAIParams,\n} from \"../types/watsonx-types.js\";\nimport { WatsonApiClient } from \"../utils/watsonx-client.js\";\n\nexport class WatsonxAIChat extends SimpleChatModel {\n private readonly watsonApiClient: WatsonApiClient;\n\n readonly modelId!: string;\n\n readonly modelParameters?: WatsonModelParameters;\n\n readonly projectId!: string;\n\n constructor(fields: WatsonxAIParams & BaseChatModelParams) {\n super(fields);\n\n const {\n clientConfig = {},\n modelId = \"meta-llama/llama-2-70b-chat\",\n modelParameters,\n projectId = getEnvironmentVariable(\"WATSONX_PROJECT_ID\") ?? \"\",\n } = fields;\n\n this.modelId = modelId;\n this.modelParameters = modelParameters;\n this.projectId = projectId;\n\n const {\n apiKey = getEnvironmentVariable(\"IBM_CLOUD_API_KEY\"),\n apiVersion = \"2023-05-29\",\n region = \"us-south\",\n } = clientConfig;\n\n if (!apiKey) {\n throw new Error(\"Missing IBM Cloud API Key\");\n }\n\n if (!this.projectId) {\n throw new Error(\"Missing WatsonX AI Project ID\");\n }\n\n this.watsonApiClient = new WatsonApiClient({\n apiKey,\n apiVersion,\n region,\n });\n }\n\n protected _formatMessagesAsPrompt(messages: BaseMessage[]): string {\n return messages\n .map((message) => {\n let messageText;\n if (message._getType() === \"human\") {\n messageText = `[INST] ${message.content} [/INST]`;\n } else if (message._getType() === \"ai\") {\n messageText = message.content;\n } else if (message._getType() === \"system\") {\n messageText = `<> ${message.content} <>`;\n } else if (ChatMessage.isInstance(message)) {\n messageText = `\\n\\n${message.role[0].toUpperCase()}${message.role.slice(\n 1\n )}: ${message.content}`;\n } else {\n console.warn(\n `Unsupported message type passed to Watson: \"${message._getType()}\"`\n );\n messageText = \"\";\n }\n return messageText;\n })\n .join(\"\\n\");\n }\n\n _combineLLMOutput() {\n return {};\n }\n\n async _call(\n messages: BaseMessage[],\n options: this[\"ParsedCallOptions\"],\n runManager: CallbackManagerForLLMRun | undefined\n ): Promise {\n const chunks = [];\n const stream = this._streamResponseChunks(messages, options, runManager);\n for await (const chunk of stream) {\n chunks.push(chunk.message.content);\n }\n return chunks.join(\"\");\n }\n\n override async *_streamResponseChunks(\n _messages: BaseMessage[],\n _options: this[\"ParsedCallOptions\"],\n _runManager?: CallbackManagerForLLMRun", + "repo_full_name": "langchain-ai/langchainjs", + "discussion_comments": [ + { + "comment_id": "1434581965", + "repo_full_name": "langchain-ai/langchainjs", + "pr_number": 3577, + "pr_file": "libs/langchain-community/src/chat_models/watsonx_ai.ts", + "discussion_id": "1434581965", + "commented_code": "@@ -0,0 +1,157 @@\n+import { CallbackManagerForLLMRun } from \"@langchain/core/callbacks/manager\";\n+import {\n+ type BaseChatModelParams,\n+ SimpleChatModel,\n+} from \"@langchain/core/language_models/chat_models\";\n+import {\n+ AIMessageChunk,\n+ BaseMessage,\n+ ChatMessage,\n+} from \"@langchain/core/messages\";\n+import { ChatGenerationChunk } from \"@langchain/core/outputs\";\n+import { getEnvironmentVariable } from \"@langchain/core/utils/env\";\n+import type {\n+ WatsonModelParameters,\n+ WatsonxAIParams,\n+} from \"../types/watsonx-types.js\";\n+import { WatsonApiClient } from \"../utils/watsonx-client.js\";\n+\n+export class WatsonxAIChat extends SimpleChatModel {\n+ private readonly watsonApiClient: WatsonApiClient;\n+\n+ readonly modelId!: string;\n+\n+ readonly modelParameters?: WatsonModelParameters;\n+\n+ readonly projectId!: string;\n+\n+ constructor(fields: WatsonxAIParams & BaseChatModelParams) {\n+ super(fields);\n+\n+ const {\n+ clientConfig = {},\n+ modelId = \"meta-llama/llama-2-70b-chat\",\n+ modelParameters,\n+ projectId = getEnvironmentVariable(\"WATSONX_PROJECT_ID\") ?? \"\",\n+ } = fields;\n+\n+ this.modelId = modelId;\n+ this.modelParameters = modelParameters;\n+ this.projectId = projectId;\n+\n+ const {\n+ apiKey = getEnvironmentVariable(\"IBM_CLOUD_API_KEY\"),\n+ apiVersion = \"2023-05-29\",\n+ region = \"us-south\",\n+ } = clientConfig;\n+\n+ if (!apiKey) {\n+ throw new Error(\"Missing IBM Cloud API Key\");\n+ }\n+\n+ if (!this.projectId) {\n+ throw new Error(\"Missing WatsonX AI Project ID\");\n+ }\n+\n+ this.watsonApiClient = new WatsonApiClient({\n+ apiKey,\n+ apiVersion,\n+ region,\n+ });\n+ }\n+\n+ protected _formatMessagesAsPrompt(messages: BaseMessage[]): string {\n+ return messages\n+ .map((message) => {\n+ let messageText;\n+ if (message._getType() === \"human\") {\n+ messageText = `[INST] ${message.content} [/INST]`;\n+ } else if (message._getType() === \"ai\") {\n+ messageText = message.content;\n+ } else if (message._getType() === \"system\") {\n+ messageText = `<> ${message.content} <>`;\n+ } else if (ChatMessage.isInstance(message)) {\n+ messageText = `\\n\\n${message.role[0].toUpperCase()}${message.role.slice(\n+ 1\n+ )}: ${message.content}`;\n+ } else {\n+ console.warn(\n+ `Unsupported message type passed to Watson: \"${message._getType()}\"`\n+ );\n+ messageText = \"\";\n+ }\n+ return messageText;\n+ })\n+ .join(\"\\n\");\n+ }\n+\n+ _combineLLMOutput() {\n+ return {};\n+ }\n+\n+ async _call(\n+ messages: BaseMessage[],\n+ options: this[\"ParsedCallOptions\"],\n+ runManager: CallbackManagerForLLMRun | undefined\n+ ): Promise {\n+ const chunks = [];\n+ const stream = this._streamResponseChunks(messages, options, runManager);\n+ for await (const chunk of stream) {\n+ chunks.push(chunk.message.content);\n+ }\n+ return chunks.join(\"\");\n+ }\n+\n+ override async *_streamResponseChunks(\n+ _messages: BaseMessage[],\n+ _options: this[\"ParsedCallOptions\"],\n+ _runManager?: CallbackManagerForLLMRun", + "comment_created_at": "2023-12-21T23:11:45+00:00", + "comment_author": "bracesproul", + "comment_body": "Drop the underscore if they're being used. Typically, variables prefixed with an underscore are unused, and the underscore is used to bypass a lint rule for no-unused-variables", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1418032747", + "pr_number": 3091, + "pr_file": "langchain/src/experimental/tools/graphql.ts", + "created_at": "2023-12-06T22:09:04+00:00", + "commented_code": "import { z } from \"zod\";\nimport { StructuredTool } from \"../../tools/base.js\";\n\nexport class GraphQLClientTool extends StructuredTool {\n static lc_name() {\n return \"GraphQLClientTool\";\n }\n\n name = \"gql_client\";\n\n description = `You can make a GraphQL request with this tool`;\n\n _endpoint: string;\n\n _headers: HeadersInit | undefined;", + "repo_full_name": "langchain-ai/langchainjs", + "discussion_comments": [ + { + "comment_id": "1418032747", + "repo_full_name": "langchain-ai/langchainjs", + "pr_number": 3091, + "pr_file": "langchain/src/experimental/tools/graphql.ts", + "discussion_id": "1418032747", + "commented_code": "@@ -0,0 +1,47 @@\n+import { z } from \"zod\";\n+import { StructuredTool } from \"../../tools/base.js\";\n+\n+export class GraphQLClientTool extends StructuredTool {\n+ static lc_name() {\n+ return \"GraphQLClientTool\";\n+ }\n+\n+ name = \"gql_client\";\n+\n+ description = `You can make a GraphQL request with this tool`;\n+\n+ _endpoint: string;\n+\n+ _headers: HeadersInit | undefined;", + "comment_created_at": "2023-12-06T22:09:04+00:00", + "comment_author": "bracesproul", + "comment_body": "Instead of prefixing with `_` lets just make them `private`/`protected`", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1908162540", + "pr_number": 7344, + "pr_file": "libs/langchain-community/src/vectorstores/azion_edgesql.ts", + "created_at": "2025-01-09T04:50:04+00:00", + "commented_code": "import { VectorStore } from '@langchain/core/vectorstores';\nimport { useQuery, useExecute, getDatabases, createDatabase, getTables, type AzionDatabaseResponse, QueryResult, AzionDatabaseQueryResponse } from 'azion/sql';\nimport type { EmbeddingsInterface } from '@langchain/core/embeddings';\nimport { Document } from '@langchain/core/documents';\n\n/**\n * Represents a filter condition for querying the Azion database\n * @property operator - The comparison operator to use (e.g. =, !=, >, <, etc)\n * @property column - The database column to filter on\n * @property value - The value to compare against\n */\nexport type AzionFilter = {operator: Operator, column: Column, value: string};\n\n/**\n * Represents a database column name\n */\nexport type Column = string;\n\n/**\n * Valid SQL operators that can be used in filter conditions\n */\nexport type Operator = \n | '=' | '!=' | '>' | '<>' | '<' // Basic comparison operators\n | '>=' | '<=' // Range operators\n | 'LIKE' | 'NOT LIKE' // Pattern matching\n | 'IN' | 'NOT IN' // Set membership\n | 'IS NULL' | 'IS NOT NULL'; // NULL checks\n\n\n/**\n * Interface for configuring the Azion vector store setup\n * @property {string[]} columns - Additional columns to create in the database table beyond the required ones\n * @property {\"vector\" | \"hybrid\"} mode - The search mode to enable:\n * \"vector\" - Only vector similarity search\n * \"hybrid\" - Both vector and full-text search capabilities\n */\ninterface AzionSetupOptions {\n columns: string[],\n mode: \"vector\" | \"hybrid\"\n}\n\n/**\n * Interface representing the structure of a row in the vector store\n * @property content - The text content of the document\n * @property embedding - The vector embedding of the content as an array of numbers\n * @property metadata - Additional metadata associated with the document as key-value pairs\n */\ninterface rowsInterface {\n content: string;\n embedding: number[];\n metadata: Record;\n}\n\nexport type AzionMetadata = Record;\n\n/**\n * Interface for the response returned when searching embeddings.\n */\ninterface SearchEmbeddingsResponse {\n id: number;\n content: string;\n similarity: number;\n metadata: {\n searchtype: string;\n [key: string]: any;\n };\n}\n\n/**\n * Interface for configuring hybrid search options that combines vector and full-text search\n * @property {number} kfts - Number of results to return from full-text search\n * @property {number} kvector - Number of results to return from vector similarity search\n * @property {AzionFilter[]} [filter] - Optional array of filters to apply to search results\n * @property {string[]} [metadataItems] - Optional array of metadata fields to include in results\n */\ninterface hybridSearchOptions {", + "repo_full_name": "langchain-ai/langchainjs", + "discussion_comments": [ + { + "comment_id": "1908162540", + "repo_full_name": "langchain-ai/langchainjs", + "pr_number": 7344, + "pr_file": "libs/langchain-community/src/vectorstores/azion_edgesql.ts", + "discussion_id": "1908162540", + "commented_code": "@@ -0,0 +1,924 @@\n+import { VectorStore } from '@langchain/core/vectorstores';\n+import { useQuery, useExecute, getDatabases, createDatabase, getTables, type AzionDatabaseResponse, QueryResult, AzionDatabaseQueryResponse } from 'azion/sql';\n+import type { EmbeddingsInterface } from '@langchain/core/embeddings';\n+import { Document } from '@langchain/core/documents';\n+\n+/**\n+ * Represents a filter condition for querying the Azion database\n+ * @property operator - The comparison operator to use (e.g. =, !=, >, <, etc)\n+ * @property column - The database column to filter on\n+ * @property value - The value to compare against\n+ */\n+export type AzionFilter = {operator: Operator, column: Column, value: string};\n+\n+/**\n+ * Represents a database column name\n+ */\n+export type Column = string;\n+\n+/**\n+ * Valid SQL operators that can be used in filter conditions\n+ */\n+export type Operator = \n+ | '=' | '!=' | '>' | '<>' | '<' // Basic comparison operators\n+ | '>=' | '<=' // Range operators\n+ | 'LIKE' | 'NOT LIKE' // Pattern matching\n+ | 'IN' | 'NOT IN' // Set membership\n+ | 'IS NULL' | 'IS NOT NULL'; // NULL checks\n+\n+\n+/**\n+ * Interface for configuring the Azion vector store setup\n+ * @property {string[]} columns - Additional columns to create in the database table beyond the required ones\n+ * @property {\"vector\" | \"hybrid\"} mode - The search mode to enable:\n+ * \"vector\" - Only vector similarity search\n+ * \"hybrid\" - Both vector and full-text search capabilities\n+ */\n+interface AzionSetupOptions {\n+ columns: string[],\n+ mode: \"vector\" | \"hybrid\"\n+}\n+\n+/**\n+ * Interface representing the structure of a row in the vector store\n+ * @property content - The text content of the document\n+ * @property embedding - The vector embedding of the content as an array of numbers\n+ * @property metadata - Additional metadata associated with the document as key-value pairs\n+ */\n+interface rowsInterface {\n+ content: string;\n+ embedding: number[];\n+ metadata: Record;\n+}\n+\n+export type AzionMetadata = Record;\n+\n+/**\n+ * Interface for the response returned when searching embeddings.\n+ */\n+interface SearchEmbeddingsResponse {\n+ id: number;\n+ content: string;\n+ similarity: number;\n+ metadata: {\n+ searchtype: string;\n+ [key: string]: any;\n+ };\n+}\n+\n+/**\n+ * Interface for configuring hybrid search options that combines vector and full-text search\n+ * @property {number} kfts - Number of results to return from full-text search\n+ * @property {number} kvector - Number of results to return from vector similarity search\n+ * @property {AzionFilter[]} [filter] - Optional array of filters to apply to search results\n+ * @property {string[]} [metadataItems] - Optional array of metadata fields to include in results\n+ */\n+interface hybridSearchOptions {", + "comment_created_at": "2025-01-09T04:50:04+00:00", + "comment_author": "jacoblee93", + "comment_body": "By convention we capitalize types/interfaces", + "pr_file_module": null + }, + { + "comment_id": "1941033662", + "repo_full_name": "langchain-ai/langchainjs", + "pr_number": 7344, + "pr_file": "libs/langchain-community/src/vectorstores/azion_edgesql.ts", + "discussion_id": "1908162540", + "commented_code": "@@ -0,0 +1,924 @@\n+import { VectorStore } from '@langchain/core/vectorstores';\n+import { useQuery, useExecute, getDatabases, createDatabase, getTables, type AzionDatabaseResponse, QueryResult, AzionDatabaseQueryResponse } from 'azion/sql';\n+import type { EmbeddingsInterface } from '@langchain/core/embeddings';\n+import { Document } from '@langchain/core/documents';\n+\n+/**\n+ * Represents a filter condition for querying the Azion database\n+ * @property operator - The comparison operator to use (e.g. =, !=, >, <, etc)\n+ * @property column - The database column to filter on\n+ * @property value - The value to compare against\n+ */\n+export type AzionFilter = {operator: Operator, column: Column, value: string};\n+\n+/**\n+ * Represents a database column name\n+ */\n+export type Column = string;\n+\n+/**\n+ * Valid SQL operators that can be used in filter conditions\n+ */\n+export type Operator = \n+ | '=' | '!=' | '>' | '<>' | '<' // Basic comparison operators\n+ | '>=' | '<=' // Range operators\n+ | 'LIKE' | 'NOT LIKE' // Pattern matching\n+ | 'IN' | 'NOT IN' // Set membership\n+ | 'IS NULL' | 'IS NOT NULL'; // NULL checks\n+\n+\n+/**\n+ * Interface for configuring the Azion vector store setup\n+ * @property {string[]} columns - Additional columns to create in the database table beyond the required ones\n+ * @property {\"vector\" | \"hybrid\"} mode - The search mode to enable:\n+ * \"vector\" - Only vector similarity search\n+ * \"hybrid\" - Both vector and full-text search capabilities\n+ */\n+interface AzionSetupOptions {\n+ columns: string[],\n+ mode: \"vector\" | \"hybrid\"\n+}\n+\n+/**\n+ * Interface representing the structure of a row in the vector store\n+ * @property content - The text content of the document\n+ * @property embedding - The vector embedding of the content as an array of numbers\n+ * @property metadata - Additional metadata associated with the document as key-value pairs\n+ */\n+interface rowsInterface {\n+ content: string;\n+ embedding: number[];\n+ metadata: Record;\n+}\n+\n+export type AzionMetadata = Record;\n+\n+/**\n+ * Interface for the response returned when searching embeddings.\n+ */\n+interface SearchEmbeddingsResponse {\n+ id: number;\n+ content: string;\n+ similarity: number;\n+ metadata: {\n+ searchtype: string;\n+ [key: string]: any;\n+ };\n+}\n+\n+/**\n+ * Interface for configuring hybrid search options that combines vector and full-text search\n+ * @property {number} kfts - Number of results to return from full-text search\n+ * @property {number} kvector - Number of results to return from vector similarity search\n+ * @property {AzionFilter[]} [filter] - Optional array of filters to apply to search results\n+ * @property {string[]} [metadataItems] - Optional array of metadata fields to include in results\n+ */\n+interface hybridSearchOptions {", + "comment_created_at": "2025-02-04T11:57:24+00:00", + "comment_author": "PedroMiolaSilva", + "comment_body": "Fixed!\r\n", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1908152860", + "pr_number": 7450, + "pr_file": "libs/langchain-community/src/embeddings/bytedance_doubao.ts", + "created_at": "2025-01-09T04:32:48+00:00", + "commented_code": "import { getEnvironmentVariable } from \"@langchain/core/utils/env\";\nimport { Embeddings, type EmbeddingsParams } from \"@langchain/core/embeddings\";\nimport { chunkArray } from \"@langchain/core/utils/chunk_array\";\n\nexport interface ByteDanceDoubaoEmbeddingsParams extends EmbeddingsParams {\n /** Model name to use */\n modelName: string;", + "repo_full_name": "langchain-ai/langchainjs", + "discussion_comments": [ + { + "comment_id": "1908152860", + "repo_full_name": "langchain-ai/langchainjs", + "pr_number": 7450, + "pr_file": "libs/langchain-community/src/embeddings/bytedance_doubao.ts", + "discussion_id": "1908152860", + "commented_code": "@@ -0,0 +1,175 @@\n+import { getEnvironmentVariable } from \"@langchain/core/utils/env\";\n+import { Embeddings, type EmbeddingsParams } from \"@langchain/core/embeddings\";\n+import { chunkArray } from \"@langchain/core/utils/chunk_array\";\n+\n+export interface ByteDanceDoubaoEmbeddingsParams extends EmbeddingsParams {\n+ /** Model name to use */\n+ modelName: string;", + "comment_created_at": "2025-01-09T04:32:48+00:00", + "comment_author": "jacoblee93", + "comment_body": "We are standardizing as `model` instead of `modelName`", + "pr_file_module": null + }, + { + "comment_id": "1908213190", + "repo_full_name": "langchain-ai/langchainjs", + "pr_number": 7450, + "pr_file": "libs/langchain-community/src/embeddings/bytedance_doubao.ts", + "discussion_id": "1908152860", + "commented_code": "@@ -0,0 +1,175 @@\n+import { getEnvironmentVariable } from \"@langchain/core/utils/env\";\n+import { Embeddings, type EmbeddingsParams } from \"@langchain/core/embeddings\";\n+import { chunkArray } from \"@langchain/core/utils/chunk_array\";\n+\n+export interface ByteDanceDoubaoEmbeddingsParams extends EmbeddingsParams {\n+ /** Model name to use */\n+ modelName: string;", + "comment_created_at": "2025-01-09T06:06:56+00:00", + "comment_author": "ucev", + "comment_body": "done", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1118002128", + "pr_number": 139, + "pr_file": "langchain/chains/question_answering/load.ts", + "created_at": "2023-02-26T01:12:56+00:00", + "commented_code": "DEFAULT_COMBINE_QA_PROMPT,\n} from \"./map_reduce_prompts\";\n\ninterface qaChainParams {\n prompt?: PromptTemplate;\n combineMapPrompt?: PromptTemplate;\n combinePrompt?: PromptTemplate;\n type?: string;\n}\nexport const loadQAChain = (llm: BaseLLM, params: qaChainParams = {}) => {\ntype chainTypeName = \"stuff\" | \"map_reduce\";", + "repo_full_name": "langchain-ai/langchainjs", + "discussion_comments": [ + { + "comment_id": "1118002128", + "repo_full_name": "langchain-ai/langchainjs", + "pr_number": 139, + "pr_file": "langchain/chains/question_answering/load.ts", + "discussion_id": "1118002128", + "commented_code": "@@ -11,13 +11,23 @@ import {\n DEFAULT_COMBINE_QA_PROMPT,\n } from \"./map_reduce_prompts\";\n \n-interface qaChainParams {\n- prompt?: PromptTemplate;\n- combineMapPrompt?: PromptTemplate;\n- combinePrompt?: PromptTemplate;\n- type?: string;\n-}\n-export const loadQAChain = (llm: BaseLLM, params: qaChainParams = {}) => {\n+type chainTypeName = \"stuff\" | \"map_reduce\";", + "comment_created_at": "2023-02-26T01:12:56+00:00", + "comment_author": "atbe", + "comment_body": "I would give this type a more strict name and follow upper casing for it too (given that its a type):\r\n\r\n`QuestionAnsweringChainType` for example\r\n\r\n", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1119986832", + "pr_number": 139, + "pr_file": "langchain/chains/question_answering/load.ts", + "created_at": "2023-02-28T12:24:23+00:00", + "commented_code": "DEFAULT_COMBINE_QA_PROMPT,\n} from \"./map_reduce_prompts\";\n\ninterface qaChainParams {\n prompt?: PromptTemplate;\n combineMapPrompt?: PromptTemplate;\n combinePrompt?: PromptTemplate;\n type?: string;\n}\nexport const loadQAChain = (llm: BaseLLM, params: qaChainParams = {}) => {\ntype chainTypeName = \"stuff\" | \"map_reduce\";\n\ntype chainType = T extends \"stuff\"", + "repo_full_name": "langchain-ai/langchainjs", + "discussion_comments": [ + { + "comment_id": "1119986832", + "repo_full_name": "langchain-ai/langchainjs", + "pr_number": 139, + "pr_file": "langchain/chains/question_answering/load.ts", + "discussion_id": "1119986832", + "commented_code": "@@ -11,13 +11,23 @@ import {\n DEFAULT_COMBINE_QA_PROMPT,\n } from \"./map_reduce_prompts\";\n \n-interface qaChainParams {\n- prompt?: PromptTemplate;\n- combineMapPrompt?: PromptTemplate;\n- combinePrompt?: PromptTemplate;\n- type?: string;\n-}\n-export const loadQAChain = (llm: BaseLLM, params: qaChainParams = {}) => {\n+type chainTypeName = \"stuff\" | \"map_reduce\";\n+\n+type chainType = T extends \"stuff\"", + "comment_created_at": "2023-02-28T12:24:23+00:00", + "comment_author": "nfcampos", + "comment_body": "Types should start with an uppercase letter", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1183234558", + "pr_number": 1096, + "pr_file": "langchain/src/document_loaders/fs/directory.ts", + "created_at": "2023-05-03T04:47:50+00:00", + "commented_code": "constructor(\n public directoryPath: string,\n public loaders: {\n [extension: string]: (filePath: string) => BaseDocumentLoader;\n [extension: `.${string}`]: (filePath: string) => BaseDocumentLoader;", + "repo_full_name": "langchain-ai/langchainjs", + "discussion_comments": [ + { + "comment_id": "1183234558", + "repo_full_name": "langchain-ai/langchainjs", + "pr_number": 1096, + "pr_file": "langchain/src/document_loaders/fs/directory.ts", + "discussion_id": "1183234558", + "commented_code": "@@ -19,7 +19,7 @@ export class DirectoryLoader extends BaseDocumentLoader {\n constructor(\n public directoryPath: string,\n public loaders: {\n- [extension: string]: (filePath: string) => BaseDocumentLoader;\n+ [extension: `.${string}`]: (filePath: string) => BaseDocumentLoader;", + "comment_created_at": "2023-05-03T04:47:50+00:00", + "comment_author": "rpidanny", + "comment_body": "Since we're always expecting the extension to start with a `.`, this should help TS users to have better DX. wdyt?", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/langchainjs-typescript-naming-standards.md b/_reviewers/langchainjs-typescript-naming-standards.md index c6b4bfd..d4c0ee0 100644 --- a/_reviewers/langchainjs-typescript-naming-standards.md +++ b/_reviewers/langchainjs-typescript-naming-standards.md @@ -72,275 +72,3 @@ Follow consistent naming conventions in TypeScript to improve code clarity, type // More precise loaders: { [extension: `.${string}`]: (path: string) => Loader } ``` - - -[ - { - "discussion_id": "2106342464", - "pr_number": 8248, - "pr_file": "libs/langchain-anthropic/src/chat_models.ts", - "created_at": "2025-05-25T23:52:50+00:00", - "commented_code": "return \"input_schema\" in tool;\n}\n\n// eslint-disable-next-line @typescript-eslint/no-explicit-any\nfunction isBuiltinTool(\n tool: any", - "repo_full_name": "langchain-ai/langchainjs", - "discussion_comments": [ - { - "comment_id": "2106342464", - "repo_full_name": "langchain-ai/langchainjs", - "pr_number": 8248, - "pr_file": "libs/langchain-anthropic/src/chat_models.ts", - "discussion_id": "2106342464", - "commented_code": "@@ -100,6 +100,26 @@ function isAnthropicTool(tool: any): tool is Anthropic.Messages.Tool {\n return \"input_schema\" in tool;\n }\n \n+// eslint-disable-next-line @typescript-eslint/no-explicit-any\n+function isBuiltinTool(\n+ tool: any", - "comment_created_at": "2025-05-25T23:52:50+00:00", - "comment_author": "benjamincburns", - "comment_body": "nit: use `unknown` - that's why we have the ESLint error", - "pr_file_module": null - }, - { - "comment_id": "2106392558", - "repo_full_name": "langchain-ai/langchainjs", - "pr_number": 8248, - "pr_file": "libs/langchain-anthropic/src/chat_models.ts", - "discussion_id": "2106342464", - "commented_code": "@@ -100,6 +100,26 @@ function isAnthropicTool(tool: any): tool is Anthropic.Messages.Tool {\n return \"input_schema\" in tool;\n }\n \n+// eslint-disable-next-line @typescript-eslint/no-explicit-any\n+function isBuiltinTool(\n+ tool: any", - "comment_created_at": "2025-05-26T01:49:53+00:00", - "comment_author": "bleafman", - "comment_body": "Done!", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1899705386", - "pr_number": 7300, - "pr_file": "libs/langchain-community/src/document_loaders/web/reddit.ts", - "created_at": "2024-12-30T17:55:31+00:00", - "commented_code": "import { BaseDocumentLoader } from \"@langchain/core/document_loaders/base\";\nimport { Document } from \"@langchain/core/documents\";\nimport { getEnvironmentVariable } from \"@langchain/core/utils/env\";\nimport {\n RedditAPIWrapper,\n RedditPost,\n RedditAPIConfig,\n} from \"../../utils/reddit.js\";\n\n/**\n * Class representing a document loader for loading Reddit posts. It extends\n * the BaseDocumentLoader and implements the RedditAPIConfig interface.\n * @example\n * ```typescript\n * const loader = new RedditPostsLoader({\n * clientId: \"REDDIT_CLIENT_ID\",\n * clientSecret: \"REDDIT_CLIENT_SECRET\",\n * userAgent: \"REDDIT_USER_AGENT\",\n * searchQueries: [\"LangChain\", \"Langchaindev\"],\n * mode: \"subreddit\",\n * categories: [\"hot\", \"new\"],\n * numberPosts: 5\n * });\n * const docs = await loader.load();\n * ```\n */\nexport class RedditPostsLoader\n extends BaseDocumentLoader\n implements RedditAPIConfig\n{\n public clientId: string;\n\n public clientSecret: string;\n\n public userAgent: string;\n\n private redditApiWrapper: RedditAPIWrapper;\n\n private searchQueries: string[];\n\n private mode: string;\n\n private categories: string[];\n\n private numberPosts: number;\n\n constructor({\n clientId = getEnvironmentVariable(\"REDDIT_CLIENT_ID\") as string,\n clientSecret = getEnvironmentVariable(\"REDDIT_CLIENT_SECRET\") as string,\n userAgent = getEnvironmentVariable(\"REDDIT_USER_AGENT\") as string,\n searchQueries,\n mode,\n categories = [\"new\"],\n numberPosts = 10,\n }: RedditAPIConfig & {\n searchQueries: string[];\n mode: string;\n categories?: string[];\n numberPosts?: number;\n }) {\n super();\n this.clientId = clientId;\n this.clientSecret = clientSecret;\n this.userAgent = userAgent;\n this.redditApiWrapper = new RedditAPIWrapper({\n clientId: this.clientId,\n clientSecret: this.clientSecret,\n userAgent: this.userAgent,\n });\n this.searchQueries = searchQueries;\n this.mode = mode;", - "repo_full_name": "langchain-ai/langchainjs", - "discussion_comments": [ - { - "comment_id": "1899705386", - "repo_full_name": "langchain-ai/langchainjs", - "pr_number": 7300, - "pr_file": "libs/langchain-community/src/document_loaders/web/reddit.ts", - "discussion_id": "1899705386", - "commented_code": "@@ -0,0 +1,133 @@\n+import { BaseDocumentLoader } from \"@langchain/core/document_loaders/base\";\n+import { Document } from \"@langchain/core/documents\";\n+import { getEnvironmentVariable } from \"@langchain/core/utils/env\";\n+import {\n+ RedditAPIWrapper,\n+ RedditPost,\n+ RedditAPIConfig,\n+} from \"../../utils/reddit.js\";\n+\n+/**\n+ * Class representing a document loader for loading Reddit posts. It extends\n+ * the BaseDocumentLoader and implements the RedditAPIConfig interface.\n+ * @example\n+ * ```typescript\n+ * const loader = new RedditPostsLoader({\n+ * clientId: \"REDDIT_CLIENT_ID\",\n+ * clientSecret: \"REDDIT_CLIENT_SECRET\",\n+ * userAgent: \"REDDIT_USER_AGENT\",\n+ * searchQueries: [\"LangChain\", \"Langchaindev\"],\n+ * mode: \"subreddit\",\n+ * categories: [\"hot\", \"new\"],\n+ * numberPosts: 5\n+ * });\n+ * const docs = await loader.load();\n+ * ```\n+ */\n+export class RedditPostsLoader\n+ extends BaseDocumentLoader\n+ implements RedditAPIConfig\n+{\n+ public clientId: string;\n+\n+ public clientSecret: string;\n+\n+ public userAgent: string;\n+\n+ private redditApiWrapper: RedditAPIWrapper;\n+\n+ private searchQueries: string[];\n+\n+ private mode: string;\n+\n+ private categories: string[];\n+\n+ private numberPosts: number;\n+\n+ constructor({\n+ clientId = getEnvironmentVariable(\"REDDIT_CLIENT_ID\") as string,\n+ clientSecret = getEnvironmentVariable(\"REDDIT_CLIENT_SECRET\") as string,\n+ userAgent = getEnvironmentVariable(\"REDDIT_USER_AGENT\") as string,\n+ searchQueries,\n+ mode,\n+ categories = [\"new\"],\n+ numberPosts = 10,\n+ }: RedditAPIConfig & {\n+ searchQueries: string[];\n+ mode: string;\n+ categories?: string[];\n+ numberPosts?: number;\n+ }) {\n+ super();\n+ this.clientId = clientId;\n+ this.clientSecret = clientSecret;\n+ this.userAgent = userAgent;\n+ this.redditApiWrapper = new RedditAPIWrapper({\n+ clientId: this.clientId,\n+ clientSecret: this.clientSecret,\n+ userAgent: this.userAgent,\n+ });\n+ this.searchQueries = searchQueries;\n+ this.mode = mode;", - "comment_created_at": "2024-12-30T17:55:31+00:00", - "comment_author": "nick-w-nick", - "comment_body": "Would be a lot nicer if the \"mode\" parameter was typed instead since different values cause errors to be thrown, like so:\r\n\r\n```suggestion\r\n\r\nexport type SearchMode = \"subreddit\" | \"username\";\r\n\r\n/**\r\n * Class representing a document loader for loading Reddit posts. It extends\r\n * the BaseDocumentLoader and implements the RedditAPIConfig interface.\r\n * @example\r\n * ```typescript\r\n * const loader = new RedditPostsLoader({\r\n * clientId: \"REDDIT_CLIENT_ID\",\r\n * clientSecret: \"REDDIT_CLIENT_SECRET\",\r\n * userAgent: \"REDDIT_USER_AGENT\",\r\n * searchQueries: [\"LangChain\", \"Langchaindev\"],\r\n * mode: \"subreddit\",\r\n * categories: [\"hot\", \"new\"],\r\n * numberPosts: 5\r\n * });\r\n * const docs = await loader.load();\r\n * ```\r\n */\r\nexport class RedditPostsLoader\r\n extends BaseDocumentLoader\r\n implements RedditAPIConfig\r\n{\r\n public clientId: string;\r\n\r\n public clientSecret: string;\r\n\r\n public userAgent: string;\r\n\r\n private redditApiWrapper: RedditAPIWrapper;\r\n\r\n private searchQueries: string[];\r\n\r\n private mode: SearchMode;\r\n\r\n private categories: string[];\r\n\r\n private numberPosts: number;\r\n\r\n constructor({\r\n clientId = getEnvironmentVariable(\"REDDIT_CLIENT_ID\") as string,\r\n clientSecret = getEnvironmentVariable(\"REDDIT_CLIENT_SECRET\") as string,\r\n userAgent = getEnvironmentVariable(\"REDDIT_USER_AGENT\") as string,\r\n searchQueries,\r\n mode,\r\n categories = [\"new\"],\r\n numberPosts = 10,\r\n }: RedditAPIConfig & {\r\n searchQueries: string[];\r\n mode: SearchMode;\r\n categories?: string[];\r\n numberPosts?: number;\r\n }) {\r\n super();\r\n this.clientId = clientId;\r\n this.clientSecret = clientSecret;\r\n this.userAgent = userAgent;\r\n this.redditApiWrapper = new RedditAPIWrapper({\r\n clientId: this.clientId,\r\n clientSecret: this.clientSecret,\r\n userAgent: this.userAgent,\r\n });\r\n this.searchQueries = searchQueries;\r\n this.mode = mode;\r\n```", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1992603122", - "pr_number": 7817, - "pr_file": "libs/langchain-community/src/chat_models/perplexity.ts", - "created_at": "2025-03-13T02:58:54+00:00", - "commented_code": "import {\n AIMessage,\n AIMessageChunk,\n BaseMessage,\n BaseMessageChunk,\n ChatMessage,\n ChatMessageChunk,\n HumanMessage,\n HumanMessageChunk,\n SystemMessage,\n SystemMessageChunk,\n} from \"@langchain/core/messages\";\nimport {\n BaseChatModel,\n BaseChatModelParams,\n} from \"@langchain/core/language_models/chat_models\";\nimport { CallbackManagerForLLMRun } from \"@langchain/core/callbacks/manager\";\nimport {\n ChatGeneration,\n ChatGenerationChunk,\n ChatResult,\n} from \"@langchain/core/outputs\";\nimport { getEnvironmentVariable } from \"@langchain/core/utils/env\";\nimport OpenAI from \"openai\";\n\n/**\n * Type representing the role of a message in the Perplexity chat model.\n */\nexport type PerplexityRole = \"system\" | \"user\" | \"assistant\";\n\n/**\n * Interface defining the parameters for the Perplexity chat model.\n */\nexport interface PerplexityChatInput {\n /** Model name to use */\n modelName?: string;\n\n /** Maximum number of tokens to generate */\n maxTokens?: number;\n\n /** Temperature parameter between 0 and 2 */\n temperature?: number;\n\n /** Top P parameter between 0 and 1 */\n topP?: number;\n\n /** Search domain filter - limit the citations used by the online model to URLs from the specified domains. */\n searchDomainFilter?: any[];\n\n /** Whether to return images */\n returnImages?: boolean;\n\n /** Determines whether or not a request to an online model should return related questions. */\n returnRelatedQuestions?: boolean;\n\n /** Returns search results within the specified time interval - does not apply to images. Values include month, week, day, hour. */\n searchRecencyFilter?: string;\n\n /** Top K parameter between 1 and 2048 */\n topK?: number;\n\n /** Presence penalty between -2 and 2 */\n presencePenalty?: number;\n\n /** Frequency penalty greater than 0 */\n frequencyPenalty?: number;\n\n /** API key for Perplexity. Defaults to the value of\n * PERPLEXITY_API_KEY environment variable.\n */\n apiKey?: string;\n\n /** Whether to stream the results or not */\n streaming?: boolean;\n\n /** Timeout for requests to Perplexity */\n timeout?: number;\n}\n\n/**\n * Wrapper around Perplexity large language models that use the Chat endpoint.\n */\nexport class ChatPerplexity\n extends BaseChatModel\n implements PerplexityChatInput\n{\n static lc_name() {\n return \"ChatPerplexity\";\n }\n\n modelName = \"llama-3.1-sonar-small-128k-online\";", - "repo_full_name": "langchain-ai/langchainjs", - "discussion_comments": [ - { - "comment_id": "1992603122", - "repo_full_name": "langchain-ai/langchainjs", - "pr_number": 7817, - "pr_file": "libs/langchain-community/src/chat_models/perplexity.ts", - "discussion_id": "1992603122", - "commented_code": "@@ -0,0 +1,290 @@\n+import {\n+ AIMessage,\n+ AIMessageChunk,\n+ BaseMessage,\n+ BaseMessageChunk,\n+ ChatMessage,\n+ ChatMessageChunk,\n+ HumanMessage,\n+ HumanMessageChunk,\n+ SystemMessage,\n+ SystemMessageChunk,\n+} from \"@langchain/core/messages\";\n+import {\n+ BaseChatModel,\n+ BaseChatModelParams,\n+} from \"@langchain/core/language_models/chat_models\";\n+import { CallbackManagerForLLMRun } from \"@langchain/core/callbacks/manager\";\n+import {\n+ ChatGeneration,\n+ ChatGenerationChunk,\n+ ChatResult,\n+} from \"@langchain/core/outputs\";\n+import { getEnvironmentVariable } from \"@langchain/core/utils/env\";\n+import OpenAI from \"openai\";\n+\n+/**\n+ * Type representing the role of a message in the Perplexity chat model.\n+ */\n+export type PerplexityRole = \"system\" | \"user\" | \"assistant\";\n+\n+/**\n+ * Interface defining the parameters for the Perplexity chat model.\n+ */\n+export interface PerplexityChatInput {\n+ /** Model name to use */\n+ modelName?: string;\n+\n+ /** Maximum number of tokens to generate */\n+ maxTokens?: number;\n+\n+ /** Temperature parameter between 0 and 2 */\n+ temperature?: number;\n+\n+ /** Top P parameter between 0 and 1 */\n+ topP?: number;\n+\n+ /** Search domain filter - limit the citations used by the online model to URLs from the specified domains. */\n+ searchDomainFilter?: any[];\n+\n+ /** Whether to return images */\n+ returnImages?: boolean;\n+\n+ /** Determines whether or not a request to an online model should return related questions. */\n+ returnRelatedQuestions?: boolean;\n+\n+ /** Returns search results within the specified time interval - does not apply to images. Values include month, week, day, hour. */\n+ searchRecencyFilter?: string;\n+\n+ /** Top K parameter between 1 and 2048 */\n+ topK?: number;\n+\n+ /** Presence penalty between -2 and 2 */\n+ presencePenalty?: number;\n+\n+ /** Frequency penalty greater than 0 */\n+ frequencyPenalty?: number;\n+\n+ /** API key for Perplexity. Defaults to the value of\n+ * PERPLEXITY_API_KEY environment variable.\n+ */\n+ apiKey?: string;\n+\n+ /** Whether to stream the results or not */\n+ streaming?: boolean;\n+\n+ /** Timeout for requests to Perplexity */\n+ timeout?: number;\n+}\n+\n+/**\n+ * Wrapper around Perplexity large language models that use the Chat endpoint.\n+ */\n+export class ChatPerplexity\n+ extends BaseChatModel\n+ implements PerplexityChatInput\n+{\n+ static lc_name() {\n+ return \"ChatPerplexity\";\n+ }\n+\n+ modelName = \"llama-3.1-sonar-small-128k-online\";", - "comment_created_at": "2025-03-13T02:58:54+00:00", - "comment_author": "jacoblee93", - "comment_body": "Let's not take a default here\r\n\r\nAlso, we are standardizing on `model` over `modelName`", - "pr_file_module": null - }, - { - "comment_id": "1996233446", - "repo_full_name": "langchain-ai/langchainjs", - "pr_number": 7817, - "pr_file": "libs/langchain-community/src/chat_models/perplexity.ts", - "discussion_id": "1992603122", - "commented_code": "@@ -0,0 +1,290 @@\n+import {\n+ AIMessage,\n+ AIMessageChunk,\n+ BaseMessage,\n+ BaseMessageChunk,\n+ ChatMessage,\n+ ChatMessageChunk,\n+ HumanMessage,\n+ HumanMessageChunk,\n+ SystemMessage,\n+ SystemMessageChunk,\n+} from \"@langchain/core/messages\";\n+import {\n+ BaseChatModel,\n+ BaseChatModelParams,\n+} from \"@langchain/core/language_models/chat_models\";\n+import { CallbackManagerForLLMRun } from \"@langchain/core/callbacks/manager\";\n+import {\n+ ChatGeneration,\n+ ChatGenerationChunk,\n+ ChatResult,\n+} from \"@langchain/core/outputs\";\n+import { getEnvironmentVariable } from \"@langchain/core/utils/env\";\n+import OpenAI from \"openai\";\n+\n+/**\n+ * Type representing the role of a message in the Perplexity chat model.\n+ */\n+export type PerplexityRole = \"system\" | \"user\" | \"assistant\";\n+\n+/**\n+ * Interface defining the parameters for the Perplexity chat model.\n+ */\n+export interface PerplexityChatInput {\n+ /** Model name to use */\n+ modelName?: string;\n+\n+ /** Maximum number of tokens to generate */\n+ maxTokens?: number;\n+\n+ /** Temperature parameter between 0 and 2 */\n+ temperature?: number;\n+\n+ /** Top P parameter between 0 and 1 */\n+ topP?: number;\n+\n+ /** Search domain filter - limit the citations used by the online model to URLs from the specified domains. */\n+ searchDomainFilter?: any[];\n+\n+ /** Whether to return images */\n+ returnImages?: boolean;\n+\n+ /** Determines whether or not a request to an online model should return related questions. */\n+ returnRelatedQuestions?: boolean;\n+\n+ /** Returns search results within the specified time interval - does not apply to images. Values include month, week, day, hour. */\n+ searchRecencyFilter?: string;\n+\n+ /** Top K parameter between 1 and 2048 */\n+ topK?: number;\n+\n+ /** Presence penalty between -2 and 2 */\n+ presencePenalty?: number;\n+\n+ /** Frequency penalty greater than 0 */\n+ frequencyPenalty?: number;\n+\n+ /** API key for Perplexity. Defaults to the value of\n+ * PERPLEXITY_API_KEY environment variable.\n+ */\n+ apiKey?: string;\n+\n+ /** Whether to stream the results or not */\n+ streaming?: boolean;\n+\n+ /** Timeout for requests to Perplexity */\n+ timeout?: number;\n+}\n+\n+/**\n+ * Wrapper around Perplexity large language models that use the Chat endpoint.\n+ */\n+export class ChatPerplexity\n+ extends BaseChatModel\n+ implements PerplexityChatInput\n+{\n+ static lc_name() {\n+ return \"ChatPerplexity\";\n+ }\n+\n+ modelName = \"llama-3.1-sonar-small-128k-online\";", - "comment_created_at": "2025-03-14T20:25:36+00:00", - "comment_author": "anadi45", - "comment_body": "changed", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1434581965", - "pr_number": 3577, - "pr_file": "libs/langchain-community/src/chat_models/watsonx_ai.ts", - "created_at": "2023-12-21T23:11:45+00:00", - "commented_code": "import { CallbackManagerForLLMRun } from \"@langchain/core/callbacks/manager\";\nimport {\n type BaseChatModelParams,\n SimpleChatModel,\n} from \"@langchain/core/language_models/chat_models\";\nimport {\n AIMessageChunk,\n BaseMessage,\n ChatMessage,\n} from \"@langchain/core/messages\";\nimport { ChatGenerationChunk } from \"@langchain/core/outputs\";\nimport { getEnvironmentVariable } from \"@langchain/core/utils/env\";\nimport type {\n WatsonModelParameters,\n WatsonxAIParams,\n} from \"../types/watsonx-types.js\";\nimport { WatsonApiClient } from \"../utils/watsonx-client.js\";\n\nexport class WatsonxAIChat extends SimpleChatModel {\n private readonly watsonApiClient: WatsonApiClient;\n\n readonly modelId!: string;\n\n readonly modelParameters?: WatsonModelParameters;\n\n readonly projectId!: string;\n\n constructor(fields: WatsonxAIParams & BaseChatModelParams) {\n super(fields);\n\n const {\n clientConfig = {},\n modelId = \"meta-llama/llama-2-70b-chat\",\n modelParameters,\n projectId = getEnvironmentVariable(\"WATSONX_PROJECT_ID\") ?? \"\",\n } = fields;\n\n this.modelId = modelId;\n this.modelParameters = modelParameters;\n this.projectId = projectId;\n\n const {\n apiKey = getEnvironmentVariable(\"IBM_CLOUD_API_KEY\"),\n apiVersion = \"2023-05-29\",\n region = \"us-south\",\n } = clientConfig;\n\n if (!apiKey) {\n throw new Error(\"Missing IBM Cloud API Key\");\n }\n\n if (!this.projectId) {\n throw new Error(\"Missing WatsonX AI Project ID\");\n }\n\n this.watsonApiClient = new WatsonApiClient({\n apiKey,\n apiVersion,\n region,\n });\n }\n\n protected _formatMessagesAsPrompt(messages: BaseMessage[]): string {\n return messages\n .map((message) => {\n let messageText;\n if (message._getType() === \"human\") {\n messageText = `[INST] ${message.content} [/INST]`;\n } else if (message._getType() === \"ai\") {\n messageText = message.content;\n } else if (message._getType() === \"system\") {\n messageText = `<> ${message.content} <>`;\n } else if (ChatMessage.isInstance(message)) {\n messageText = `\\n\\n${message.role[0].toUpperCase()}${message.role.slice(\n 1\n )}: ${message.content}`;\n } else {\n console.warn(\n `Unsupported message type passed to Watson: \"${message._getType()}\"`\n );\n messageText = \"\";\n }\n return messageText;\n })\n .join(\"\\n\");\n }\n\n _combineLLMOutput() {\n return {};\n }\n\n async _call(\n messages: BaseMessage[],\n options: this[\"ParsedCallOptions\"],\n runManager: CallbackManagerForLLMRun | undefined\n ): Promise {\n const chunks = [];\n const stream = this._streamResponseChunks(messages, options, runManager);\n for await (const chunk of stream) {\n chunks.push(chunk.message.content);\n }\n return chunks.join(\"\");\n }\n\n override async *_streamResponseChunks(\n _messages: BaseMessage[],\n _options: this[\"ParsedCallOptions\"],\n _runManager?: CallbackManagerForLLMRun", - "repo_full_name": "langchain-ai/langchainjs", - "discussion_comments": [ - { - "comment_id": "1434581965", - "repo_full_name": "langchain-ai/langchainjs", - "pr_number": 3577, - "pr_file": "libs/langchain-community/src/chat_models/watsonx_ai.ts", - "discussion_id": "1434581965", - "commented_code": "@@ -0,0 +1,157 @@\n+import { CallbackManagerForLLMRun } from \"@langchain/core/callbacks/manager\";\n+import {\n+ type BaseChatModelParams,\n+ SimpleChatModel,\n+} from \"@langchain/core/language_models/chat_models\";\n+import {\n+ AIMessageChunk,\n+ BaseMessage,\n+ ChatMessage,\n+} from \"@langchain/core/messages\";\n+import { ChatGenerationChunk } from \"@langchain/core/outputs\";\n+import { getEnvironmentVariable } from \"@langchain/core/utils/env\";\n+import type {\n+ WatsonModelParameters,\n+ WatsonxAIParams,\n+} from \"../types/watsonx-types.js\";\n+import { WatsonApiClient } from \"../utils/watsonx-client.js\";\n+\n+export class WatsonxAIChat extends SimpleChatModel {\n+ private readonly watsonApiClient: WatsonApiClient;\n+\n+ readonly modelId!: string;\n+\n+ readonly modelParameters?: WatsonModelParameters;\n+\n+ readonly projectId!: string;\n+\n+ constructor(fields: WatsonxAIParams & BaseChatModelParams) {\n+ super(fields);\n+\n+ const {\n+ clientConfig = {},\n+ modelId = \"meta-llama/llama-2-70b-chat\",\n+ modelParameters,\n+ projectId = getEnvironmentVariable(\"WATSONX_PROJECT_ID\") ?? \"\",\n+ } = fields;\n+\n+ this.modelId = modelId;\n+ this.modelParameters = modelParameters;\n+ this.projectId = projectId;\n+\n+ const {\n+ apiKey = getEnvironmentVariable(\"IBM_CLOUD_API_KEY\"),\n+ apiVersion = \"2023-05-29\",\n+ region = \"us-south\",\n+ } = clientConfig;\n+\n+ if (!apiKey) {\n+ throw new Error(\"Missing IBM Cloud API Key\");\n+ }\n+\n+ if (!this.projectId) {\n+ throw new Error(\"Missing WatsonX AI Project ID\");\n+ }\n+\n+ this.watsonApiClient = new WatsonApiClient({\n+ apiKey,\n+ apiVersion,\n+ region,\n+ });\n+ }\n+\n+ protected _formatMessagesAsPrompt(messages: BaseMessage[]): string {\n+ return messages\n+ .map((message) => {\n+ let messageText;\n+ if (message._getType() === \"human\") {\n+ messageText = `[INST] ${message.content} [/INST]`;\n+ } else if (message._getType() === \"ai\") {\n+ messageText = message.content;\n+ } else if (message._getType() === \"system\") {\n+ messageText = `<> ${message.content} <>`;\n+ } else if (ChatMessage.isInstance(message)) {\n+ messageText = `\\n\\n${message.role[0].toUpperCase()}${message.role.slice(\n+ 1\n+ )}: ${message.content}`;\n+ } else {\n+ console.warn(\n+ `Unsupported message type passed to Watson: \"${message._getType()}\"`\n+ );\n+ messageText = \"\";\n+ }\n+ return messageText;\n+ })\n+ .join(\"\\n\");\n+ }\n+\n+ _combineLLMOutput() {\n+ return {};\n+ }\n+\n+ async _call(\n+ messages: BaseMessage[],\n+ options: this[\"ParsedCallOptions\"],\n+ runManager: CallbackManagerForLLMRun | undefined\n+ ): Promise {\n+ const chunks = [];\n+ const stream = this._streamResponseChunks(messages, options, runManager);\n+ for await (const chunk of stream) {\n+ chunks.push(chunk.message.content);\n+ }\n+ return chunks.join(\"\");\n+ }\n+\n+ override async *_streamResponseChunks(\n+ _messages: BaseMessage[],\n+ _options: this[\"ParsedCallOptions\"],\n+ _runManager?: CallbackManagerForLLMRun", - "comment_created_at": "2023-12-21T23:11:45+00:00", - "comment_author": "bracesproul", - "comment_body": "Drop the underscore if they're being used. Typically, variables prefixed with an underscore are unused, and the underscore is used to bypass a lint rule for no-unused-variables", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1418032747", - "pr_number": 3091, - "pr_file": "langchain/src/experimental/tools/graphql.ts", - "created_at": "2023-12-06T22:09:04+00:00", - "commented_code": "import { z } from \"zod\";\nimport { StructuredTool } from \"../../tools/base.js\";\n\nexport class GraphQLClientTool extends StructuredTool {\n static lc_name() {\n return \"GraphQLClientTool\";\n }\n\n name = \"gql_client\";\n\n description = `You can make a GraphQL request with this tool`;\n\n _endpoint: string;\n\n _headers: HeadersInit | undefined;", - "repo_full_name": "langchain-ai/langchainjs", - "discussion_comments": [ - { - "comment_id": "1418032747", - "repo_full_name": "langchain-ai/langchainjs", - "pr_number": 3091, - "pr_file": "langchain/src/experimental/tools/graphql.ts", - "discussion_id": "1418032747", - "commented_code": "@@ -0,0 +1,47 @@\n+import { z } from \"zod\";\n+import { StructuredTool } from \"../../tools/base.js\";\n+\n+export class GraphQLClientTool extends StructuredTool {\n+ static lc_name() {\n+ return \"GraphQLClientTool\";\n+ }\n+\n+ name = \"gql_client\";\n+\n+ description = `You can make a GraphQL request with this tool`;\n+\n+ _endpoint: string;\n+\n+ _headers: HeadersInit | undefined;", - "comment_created_at": "2023-12-06T22:09:04+00:00", - "comment_author": "bracesproul", - "comment_body": "Instead of prefixing with `_` lets just make them `private`/`protected`", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1908162540", - "pr_number": 7344, - "pr_file": "libs/langchain-community/src/vectorstores/azion_edgesql.ts", - "created_at": "2025-01-09T04:50:04+00:00", - "commented_code": "import { VectorStore } from '@langchain/core/vectorstores';\nimport { useQuery, useExecute, getDatabases, createDatabase, getTables, type AzionDatabaseResponse, QueryResult, AzionDatabaseQueryResponse } from 'azion/sql';\nimport type { EmbeddingsInterface } from '@langchain/core/embeddings';\nimport { Document } from '@langchain/core/documents';\n\n/**\n * Represents a filter condition for querying the Azion database\n * @property operator - The comparison operator to use (e.g. =, !=, >, <, etc)\n * @property column - The database column to filter on\n * @property value - The value to compare against\n */\nexport type AzionFilter = {operator: Operator, column: Column, value: string};\n\n/**\n * Represents a database column name\n */\nexport type Column = string;\n\n/**\n * Valid SQL operators that can be used in filter conditions\n */\nexport type Operator = \n | '=' | '!=' | '>' | '<>' | '<' // Basic comparison operators\n | '>=' | '<=' // Range operators\n | 'LIKE' | 'NOT LIKE' // Pattern matching\n | 'IN' | 'NOT IN' // Set membership\n | 'IS NULL' | 'IS NOT NULL'; // NULL checks\n\n\n/**\n * Interface for configuring the Azion vector store setup\n * @property {string[]} columns - Additional columns to create in the database table beyond the required ones\n * @property {\"vector\" | \"hybrid\"} mode - The search mode to enable:\n * \"vector\" - Only vector similarity search\n * \"hybrid\" - Both vector and full-text search capabilities\n */\ninterface AzionSetupOptions {\n columns: string[],\n mode: \"vector\" | \"hybrid\"\n}\n\n/**\n * Interface representing the structure of a row in the vector store\n * @property content - The text content of the document\n * @property embedding - The vector embedding of the content as an array of numbers\n * @property metadata - Additional metadata associated with the document as key-value pairs\n */\ninterface rowsInterface {\n content: string;\n embedding: number[];\n metadata: Record;\n}\n\nexport type AzionMetadata = Record;\n\n/**\n * Interface for the response returned when searching embeddings.\n */\ninterface SearchEmbeddingsResponse {\n id: number;\n content: string;\n similarity: number;\n metadata: {\n searchtype: string;\n [key: string]: any;\n };\n}\n\n/**\n * Interface for configuring hybrid search options that combines vector and full-text search\n * @property {number} kfts - Number of results to return from full-text search\n * @property {number} kvector - Number of results to return from vector similarity search\n * @property {AzionFilter[]} [filter] - Optional array of filters to apply to search results\n * @property {string[]} [metadataItems] - Optional array of metadata fields to include in results\n */\ninterface hybridSearchOptions {", - "repo_full_name": "langchain-ai/langchainjs", - "discussion_comments": [ - { - "comment_id": "1908162540", - "repo_full_name": "langchain-ai/langchainjs", - "pr_number": 7344, - "pr_file": "libs/langchain-community/src/vectorstores/azion_edgesql.ts", - "discussion_id": "1908162540", - "commented_code": "@@ -0,0 +1,924 @@\n+import { VectorStore } from '@langchain/core/vectorstores';\n+import { useQuery, useExecute, getDatabases, createDatabase, getTables, type AzionDatabaseResponse, QueryResult, AzionDatabaseQueryResponse } from 'azion/sql';\n+import type { EmbeddingsInterface } from '@langchain/core/embeddings';\n+import { Document } from '@langchain/core/documents';\n+\n+/**\n+ * Represents a filter condition for querying the Azion database\n+ * @property operator - The comparison operator to use (e.g. =, !=, >, <, etc)\n+ * @property column - The database column to filter on\n+ * @property value - The value to compare against\n+ */\n+export type AzionFilter = {operator: Operator, column: Column, value: string};\n+\n+/**\n+ * Represents a database column name\n+ */\n+export type Column = string;\n+\n+/**\n+ * Valid SQL operators that can be used in filter conditions\n+ */\n+export type Operator = \n+ | '=' | '!=' | '>' | '<>' | '<' // Basic comparison operators\n+ | '>=' | '<=' // Range operators\n+ | 'LIKE' | 'NOT LIKE' // Pattern matching\n+ | 'IN' | 'NOT IN' // Set membership\n+ | 'IS NULL' | 'IS NOT NULL'; // NULL checks\n+\n+\n+/**\n+ * Interface for configuring the Azion vector store setup\n+ * @property {string[]} columns - Additional columns to create in the database table beyond the required ones\n+ * @property {\"vector\" | \"hybrid\"} mode - The search mode to enable:\n+ * \"vector\" - Only vector similarity search\n+ * \"hybrid\" - Both vector and full-text search capabilities\n+ */\n+interface AzionSetupOptions {\n+ columns: string[],\n+ mode: \"vector\" | \"hybrid\"\n+}\n+\n+/**\n+ * Interface representing the structure of a row in the vector store\n+ * @property content - The text content of the document\n+ * @property embedding - The vector embedding of the content as an array of numbers\n+ * @property metadata - Additional metadata associated with the document as key-value pairs\n+ */\n+interface rowsInterface {\n+ content: string;\n+ embedding: number[];\n+ metadata: Record;\n+}\n+\n+export type AzionMetadata = Record;\n+\n+/**\n+ * Interface for the response returned when searching embeddings.\n+ */\n+interface SearchEmbeddingsResponse {\n+ id: number;\n+ content: string;\n+ similarity: number;\n+ metadata: {\n+ searchtype: string;\n+ [key: string]: any;\n+ };\n+}\n+\n+/**\n+ * Interface for configuring hybrid search options that combines vector and full-text search\n+ * @property {number} kfts - Number of results to return from full-text search\n+ * @property {number} kvector - Number of results to return from vector similarity search\n+ * @property {AzionFilter[]} [filter] - Optional array of filters to apply to search results\n+ * @property {string[]} [metadataItems] - Optional array of metadata fields to include in results\n+ */\n+interface hybridSearchOptions {", - "comment_created_at": "2025-01-09T04:50:04+00:00", - "comment_author": "jacoblee93", - "comment_body": "By convention we capitalize types/interfaces", - "pr_file_module": null - }, - { - "comment_id": "1941033662", - "repo_full_name": "langchain-ai/langchainjs", - "pr_number": 7344, - "pr_file": "libs/langchain-community/src/vectorstores/azion_edgesql.ts", - "discussion_id": "1908162540", - "commented_code": "@@ -0,0 +1,924 @@\n+import { VectorStore } from '@langchain/core/vectorstores';\n+import { useQuery, useExecute, getDatabases, createDatabase, getTables, type AzionDatabaseResponse, QueryResult, AzionDatabaseQueryResponse } from 'azion/sql';\n+import type { EmbeddingsInterface } from '@langchain/core/embeddings';\n+import { Document } from '@langchain/core/documents';\n+\n+/**\n+ * Represents a filter condition for querying the Azion database\n+ * @property operator - The comparison operator to use (e.g. =, !=, >, <, etc)\n+ * @property column - The database column to filter on\n+ * @property value - The value to compare against\n+ */\n+export type AzionFilter = {operator: Operator, column: Column, value: string};\n+\n+/**\n+ * Represents a database column name\n+ */\n+export type Column = string;\n+\n+/**\n+ * Valid SQL operators that can be used in filter conditions\n+ */\n+export type Operator = \n+ | '=' | '!=' | '>' | '<>' | '<' // Basic comparison operators\n+ | '>=' | '<=' // Range operators\n+ | 'LIKE' | 'NOT LIKE' // Pattern matching\n+ | 'IN' | 'NOT IN' // Set membership\n+ | 'IS NULL' | 'IS NOT NULL'; // NULL checks\n+\n+\n+/**\n+ * Interface for configuring the Azion vector store setup\n+ * @property {string[]} columns - Additional columns to create in the database table beyond the required ones\n+ * @property {\"vector\" | \"hybrid\"} mode - The search mode to enable:\n+ * \"vector\" - Only vector similarity search\n+ * \"hybrid\" - Both vector and full-text search capabilities\n+ */\n+interface AzionSetupOptions {\n+ columns: string[],\n+ mode: \"vector\" | \"hybrid\"\n+}\n+\n+/**\n+ * Interface representing the structure of a row in the vector store\n+ * @property content - The text content of the document\n+ * @property embedding - The vector embedding of the content as an array of numbers\n+ * @property metadata - Additional metadata associated with the document as key-value pairs\n+ */\n+interface rowsInterface {\n+ content: string;\n+ embedding: number[];\n+ metadata: Record;\n+}\n+\n+export type AzionMetadata = Record;\n+\n+/**\n+ * Interface for the response returned when searching embeddings.\n+ */\n+interface SearchEmbeddingsResponse {\n+ id: number;\n+ content: string;\n+ similarity: number;\n+ metadata: {\n+ searchtype: string;\n+ [key: string]: any;\n+ };\n+}\n+\n+/**\n+ * Interface for configuring hybrid search options that combines vector and full-text search\n+ * @property {number} kfts - Number of results to return from full-text search\n+ * @property {number} kvector - Number of results to return from vector similarity search\n+ * @property {AzionFilter[]} [filter] - Optional array of filters to apply to search results\n+ * @property {string[]} [metadataItems] - Optional array of metadata fields to include in results\n+ */\n+interface hybridSearchOptions {", - "comment_created_at": "2025-02-04T11:57:24+00:00", - "comment_author": "PedroMiolaSilva", - "comment_body": "Fixed!\r\n", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1908152860", - "pr_number": 7450, - "pr_file": "libs/langchain-community/src/embeddings/bytedance_doubao.ts", - "created_at": "2025-01-09T04:32:48+00:00", - "commented_code": "import { getEnvironmentVariable } from \"@langchain/core/utils/env\";\nimport { Embeddings, type EmbeddingsParams } from \"@langchain/core/embeddings\";\nimport { chunkArray } from \"@langchain/core/utils/chunk_array\";\n\nexport interface ByteDanceDoubaoEmbeddingsParams extends EmbeddingsParams {\n /** Model name to use */\n modelName: string;", - "repo_full_name": "langchain-ai/langchainjs", - "discussion_comments": [ - { - "comment_id": "1908152860", - "repo_full_name": "langchain-ai/langchainjs", - "pr_number": 7450, - "pr_file": "libs/langchain-community/src/embeddings/bytedance_doubao.ts", - "discussion_id": "1908152860", - "commented_code": "@@ -0,0 +1,175 @@\n+import { getEnvironmentVariable } from \"@langchain/core/utils/env\";\n+import { Embeddings, type EmbeddingsParams } from \"@langchain/core/embeddings\";\n+import { chunkArray } from \"@langchain/core/utils/chunk_array\";\n+\n+export interface ByteDanceDoubaoEmbeddingsParams extends EmbeddingsParams {\n+ /** Model name to use */\n+ modelName: string;", - "comment_created_at": "2025-01-09T04:32:48+00:00", - "comment_author": "jacoblee93", - "comment_body": "We are standardizing as `model` instead of `modelName`", - "pr_file_module": null - }, - { - "comment_id": "1908213190", - "repo_full_name": "langchain-ai/langchainjs", - "pr_number": 7450, - "pr_file": "libs/langchain-community/src/embeddings/bytedance_doubao.ts", - "discussion_id": "1908152860", - "commented_code": "@@ -0,0 +1,175 @@\n+import { getEnvironmentVariable } from \"@langchain/core/utils/env\";\n+import { Embeddings, type EmbeddingsParams } from \"@langchain/core/embeddings\";\n+import { chunkArray } from \"@langchain/core/utils/chunk_array\";\n+\n+export interface ByteDanceDoubaoEmbeddingsParams extends EmbeddingsParams {\n+ /** Model name to use */\n+ modelName: string;", - "comment_created_at": "2025-01-09T06:06:56+00:00", - "comment_author": "ucev", - "comment_body": "done", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1118002128", - "pr_number": 139, - "pr_file": "langchain/chains/question_answering/load.ts", - "created_at": "2023-02-26T01:12:56+00:00", - "commented_code": "DEFAULT_COMBINE_QA_PROMPT,\n} from \"./map_reduce_prompts\";\n\ninterface qaChainParams {\n prompt?: PromptTemplate;\n combineMapPrompt?: PromptTemplate;\n combinePrompt?: PromptTemplate;\n type?: string;\n}\nexport const loadQAChain = (llm: BaseLLM, params: qaChainParams = {}) => {\ntype chainTypeName = \"stuff\" | \"map_reduce\";", - "repo_full_name": "langchain-ai/langchainjs", - "discussion_comments": [ - { - "comment_id": "1118002128", - "repo_full_name": "langchain-ai/langchainjs", - "pr_number": 139, - "pr_file": "langchain/chains/question_answering/load.ts", - "discussion_id": "1118002128", - "commented_code": "@@ -11,13 +11,23 @@ import {\n DEFAULT_COMBINE_QA_PROMPT,\n } from \"./map_reduce_prompts\";\n \n-interface qaChainParams {\n- prompt?: PromptTemplate;\n- combineMapPrompt?: PromptTemplate;\n- combinePrompt?: PromptTemplate;\n- type?: string;\n-}\n-export const loadQAChain = (llm: BaseLLM, params: qaChainParams = {}) => {\n+type chainTypeName = \"stuff\" | \"map_reduce\";", - "comment_created_at": "2023-02-26T01:12:56+00:00", - "comment_author": "atbe", - "comment_body": "I would give this type a more strict name and follow upper casing for it too (given that its a type):\r\n\r\n`QuestionAnsweringChainType` for example\r\n\r\n", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1119986832", - "pr_number": 139, - "pr_file": "langchain/chains/question_answering/load.ts", - "created_at": "2023-02-28T12:24:23+00:00", - "commented_code": "DEFAULT_COMBINE_QA_PROMPT,\n} from \"./map_reduce_prompts\";\n\ninterface qaChainParams {\n prompt?: PromptTemplate;\n combineMapPrompt?: PromptTemplate;\n combinePrompt?: PromptTemplate;\n type?: string;\n}\nexport const loadQAChain = (llm: BaseLLM, params: qaChainParams = {}) => {\ntype chainTypeName = \"stuff\" | \"map_reduce\";\n\ntype chainType = T extends \"stuff\"", - "repo_full_name": "langchain-ai/langchainjs", - "discussion_comments": [ - { - "comment_id": "1119986832", - "repo_full_name": "langchain-ai/langchainjs", - "pr_number": 139, - "pr_file": "langchain/chains/question_answering/load.ts", - "discussion_id": "1119986832", - "commented_code": "@@ -11,13 +11,23 @@ import {\n DEFAULT_COMBINE_QA_PROMPT,\n } from \"./map_reduce_prompts\";\n \n-interface qaChainParams {\n- prompt?: PromptTemplate;\n- combineMapPrompt?: PromptTemplate;\n- combinePrompt?: PromptTemplate;\n- type?: string;\n-}\n-export const loadQAChain = (llm: BaseLLM, params: qaChainParams = {}) => {\n+type chainTypeName = \"stuff\" | \"map_reduce\";\n+\n+type chainType = T extends \"stuff\"", - "comment_created_at": "2023-02-28T12:24:23+00:00", - "comment_author": "nfcampos", - "comment_body": "Types should start with an uppercase letter", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1183234558", - "pr_number": 1096, - "pr_file": "langchain/src/document_loaders/fs/directory.ts", - "created_at": "2023-05-03T04:47:50+00:00", - "commented_code": "constructor(\n public directoryPath: string,\n public loaders: {\n [extension: string]: (filePath: string) => BaseDocumentLoader;\n [extension: `.${string}`]: (filePath: string) => BaseDocumentLoader;", - "repo_full_name": "langchain-ai/langchainjs", - "discussion_comments": [ - { - "comment_id": "1183234558", - "repo_full_name": "langchain-ai/langchainjs", - "pr_number": 1096, - "pr_file": "langchain/src/document_loaders/fs/directory.ts", - "discussion_id": "1183234558", - "commented_code": "@@ -19,7 +19,7 @@ export class DirectoryLoader extends BaseDocumentLoader {\n constructor(\n public directoryPath: string,\n public loaders: {\n- [extension: string]: (filePath: string) => BaseDocumentLoader;\n+ [extension: `.${string}`]: (filePath: string) => BaseDocumentLoader;", - "comment_created_at": "2023-05-03T04:47:50+00:00", - "comment_author": "rpidanny", - "comment_body": "Since we're always expecting the extension to start with a `.`, this should help TS users to have better DX. wdyt?", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/langchainjs-update-documentation-configuration.json b/_reviewers/langchainjs-update-documentation-configuration.json new file mode 100644 index 0000000..d73798d --- /dev/null +++ b/_reviewers/langchainjs-update-documentation-configuration.json @@ -0,0 +1,46 @@ +[ + { + "discussion_id": "1605436319", + "pr_number": 5418, + "pr_file": "langchain/package.json", + "created_at": "2024-05-17T19:01:11+00:00", + "commented_code": null, + "repo_full_name": "langchain-ai/langchainjs", + "discussion_comments": [ + { + "comment_id": "1605436319", + "repo_full_name": "langchain-ai/langchainjs", + "pr_number": 5418, + "pr_file": "langchain/package.json", + "discussion_id": "1605436319", + "commented_code": null, + "comment_created_at": "2024-05-17T19:01:11+00:00", + "comment_author": "bracesproul", + "comment_body": "The langchain entrypoint is deprecated, so we shouldn't be adding more features to it. Only add to the community integration", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1189846834", + "pr_number": 1192, + "pr_file": "langchain/package.json", + "created_at": "2023-05-10T12:37:04+00:00", + "commented_code": "\"document_loaders/fs/csv.cjs\",\n \"document_loaders/fs/csv.js\",\n \"document_loaders/fs/csv.d.ts\",\n \"document_loaders/fs/md.cjs\",", + "repo_full_name": "langchain-ai/langchainjs", + "discussion_comments": [ + { + "comment_id": "1189846834", + "repo_full_name": "langchain-ai/langchainjs", + "pr_number": 1192, + "pr_file": "langchain/package.json", + "discussion_id": "1189846834", + "commented_code": "@@ -199,6 +199,9 @@\n \"document_loaders/fs/csv.cjs\",\n \"document_loaders/fs/csv.js\",\n \"document_loaders/fs/csv.d.ts\",\n+ \"document_loaders/fs/md.cjs\",", + "comment_created_at": "2023-05-10T12:37:04+00:00", + "comment_author": "dqbd", + "comment_body": "Consider adding `document_loaders/fs/md.ts` in `typedocOptions` found in `tsconfig.json`", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/langchainjs-update-documentation-configuration.md b/_reviewers/langchainjs-update-documentation-configuration.md index 66a88c9..d80fa9e 100644 --- a/_reviewers/langchainjs-update-documentation-configuration.md +++ b/_reviewers/langchainjs-update-documentation-configuration.md @@ -34,51 +34,3 @@ Example: ``` This practice ensures comprehensive documentation coverage and provides clear guidance for developers, preventing them from extending deprecated features and helping them locate the proper integration points for new functionality. - - -[ - { - "discussion_id": "1605436319", - "pr_number": 5418, - "pr_file": "langchain/package.json", - "created_at": "2024-05-17T19:01:11+00:00", - "commented_code": null, - "repo_full_name": "langchain-ai/langchainjs", - "discussion_comments": [ - { - "comment_id": "1605436319", - "repo_full_name": "langchain-ai/langchainjs", - "pr_number": 5418, - "pr_file": "langchain/package.json", - "discussion_id": "1605436319", - "commented_code": null, - "comment_created_at": "2024-05-17T19:01:11+00:00", - "comment_author": "bracesproul", - "comment_body": "The langchain entrypoint is deprecated, so we shouldn't be adding more features to it. Only add to the community integration", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1189846834", - "pr_number": 1192, - "pr_file": "langchain/package.json", - "created_at": "2023-05-10T12:37:04+00:00", - "commented_code": "\"document_loaders/fs/csv.cjs\",\n \"document_loaders/fs/csv.js\",\n \"document_loaders/fs/csv.d.ts\",\n \"document_loaders/fs/md.cjs\",", - "repo_full_name": "langchain-ai/langchainjs", - "discussion_comments": [ - { - "comment_id": "1189846834", - "repo_full_name": "langchain-ai/langchainjs", - "pr_number": 1192, - "pr_file": "langchain/package.json", - "discussion_id": "1189846834", - "commented_code": "@@ -199,6 +199,9 @@\n \"document_loaders/fs/csv.cjs\",\n \"document_loaders/fs/csv.js\",\n \"document_loaders/fs/csv.d.ts\",\n+ \"document_loaders/fs/md.cjs\",", - "comment_created_at": "2023-05-10T12:37:04+00:00", - "comment_author": "dqbd", - "comment_body": "Consider adding `document_loaders/fs/md.ts` in `typedocOptions` found in `tsconfig.json`", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/langchainjs-use-comprehensive-jsdoc.json b/_reviewers/langchainjs-use-comprehensive-jsdoc.json new file mode 100644 index 0000000..8749166 --- /dev/null +++ b/_reviewers/langchainjs-use-comprehensive-jsdoc.json @@ -0,0 +1,252 @@ +[ + { + "discussion_id": "1417964024", + "pr_number": 3465, + "pr_file": "langchain/src/tools/outlook/authFlowREST.ts", + "created_at": "2023-12-06T20:52:16+00:00", + "commented_code": "import * as http from \"http\";\nimport * as url from \"url\";\nimport { AuthFlowBase } from \"./authFlowBase.js\";\nimport { getEnvironmentVariable } from \"../../util/env.js\";\n\ninterface AccessTokenResponse {\n access_token: string;\n refresh_token: string;\n}\n\n// this class uses your clientId, clientSecret and redirectUri\n// to get access token and refresh token, if you already have them,\n// you can use AuthFlowToken or AuthFlowRefresh instead", + "repo_full_name": "langchain-ai/langchainjs", + "discussion_comments": [ + { + "comment_id": "1417964024", + "repo_full_name": "langchain-ai/langchainjs", + "pr_number": 3465, + "pr_file": "langchain/src/tools/outlook/authFlowREST.ts", + "discussion_id": "1417964024", + "commented_code": "@@ -0,0 +1,205 @@\n+import * as http from \"http\";\n+import * as url from \"url\";\n+import { AuthFlowBase } from \"./authFlowBase.js\";\n+import { getEnvironmentVariable } from \"../../util/env.js\";\n+\n+interface AccessTokenResponse {\n+ access_token: string;\n+ refresh_token: string;\n+}\n+\n+// this class uses your clientId, clientSecret and redirectUri\n+// to get access token and refresh token, if you already have them,\n+// you can use AuthFlowToken or AuthFlowRefresh instead", + "comment_created_at": "2023-12-06T20:52:16+00:00", + "comment_author": "bracesproul", + "comment_body": "This should be a JSDoc so devs & our api refs can get the description properly", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1418002088", + "pr_number": 3465, + "pr_file": "langchain/src/tools/outlook/authFlowToken.ts", + "created_at": "2023-12-06T21:33:05+00:00", + "commented_code": "import { AuthFlowBase } from \"./authFlowBase.js\";\nimport { getEnvironmentVariable } from \"../../util/env.js\";\n\ninterface AccessTokenResponse {\n access_token: string;\n refresh_token: string;\n}\n\n// if you have the token, and no need to refresh it, warning: token expires in 1 hour", + "repo_full_name": "langchain-ai/langchainjs", + "discussion_comments": [ + { + "comment_id": "1418002088", + "repo_full_name": "langchain-ai/langchainjs", + "pr_number": 3465, + "pr_file": "langchain/src/tools/outlook/authFlowToken.ts", + "discussion_id": "1418002088", + "commented_code": "@@ -0,0 +1,103 @@\n+import { AuthFlowBase } from \"./authFlowBase.js\";\n+import { getEnvironmentVariable } from \"../../util/env.js\";\n+\n+interface AccessTokenResponse {\n+ access_token: string;\n+ refresh_token: string;\n+}\n+\n+// if you have the token, and no need to refresh it, warning: token expires in 1 hour", + "comment_created_at": "2023-12-06T21:33:05+00:00", + "comment_author": "bracesproul", + "comment_body": "Convert to JSDoc and give more of an overview as to what this class can be used for", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2152847821", + "pr_number": 8379, + "pr_file": "langchain-core/src/utils/js-sha1/hash.ts", + "created_at": "2025-06-17T17:54:53+00:00", + "commented_code": "return buffer;\n};\n\n/**\n * @deprecated Use `makeDefaultKeyEncoder()` to create a custom key encoder.\n * This function will be removed in a future version.\n */\nexport const insecureHash = (message) => {\n console.warn(", + "repo_full_name": "langchain-ai/langchainjs", + "discussion_comments": [ + { + "comment_id": "2152847821", + "repo_full_name": "langchain-ai/langchainjs", + "pr_number": 8379, + "pr_file": "langchain-core/src/utils/js-sha1/hash.ts", + "discussion_id": "2152847821", + "commented_code": "@@ -412,6 +412,19 @@ Sha1.prototype.arrayBuffer = function () {\n return buffer;\n };\n \n+/**\n+ * @deprecated Use `makeDefaultKeyEncoder()` to create a custom key encoder.\n+ * This function will be removed in a future version.\n+ */\n export const insecureHash = (message) => {\n+ console.warn(", + "comment_created_at": "2025-06-17T17:54:53+00:00", + "comment_author": "jacoblee93", + "comment_body": "Let's just log this once on initial call to avoid flooding the console and add some example code", + "pr_file_module": null + }, + { + "comment_id": "2152875547", + "repo_full_name": "langchain-ai/langchainjs", + "pr_number": 8379, + "pr_file": "langchain-core/src/utils/js-sha1/hash.ts", + "discussion_id": "2152847821", + "commented_code": "@@ -412,6 +412,19 @@ Sha1.prototype.arrayBuffer = function () {\n return buffer;\n };\n \n+/**\n+ * @deprecated Use `makeDefaultKeyEncoder()` to create a custom key encoder.\n+ * This function will be removed in a future version.\n+ */\n export const insecureHash = (message) => {\n+ console.warn(", + "comment_created_at": "2025-06-17T18:10:07+00:00", + "comment_author": "manekinekko", + "comment_body": "Fixed.", + "pr_file_module": null + }, + { + "comment_id": "2152877086", + "repo_full_name": "langchain-ai/langchainjs", + "pr_number": 8379, + "pr_file": "langchain-core/src/utils/js-sha1/hash.ts", + "discussion_id": "2152847821", + "commented_code": "@@ -412,6 +412,19 @@ Sha1.prototype.arrayBuffer = function () {\n return buffer;\n };\n \n+/**\n+ * @deprecated Use `makeDefaultKeyEncoder()` to create a custom key encoder.\n+ * This function will be removed in a future version.\n+ */\n export const insecureHash = (message) => {\n+ console.warn(", + "comment_created_at": "2025-06-17T18:11:06+00:00", + "comment_author": "manekinekko", + "comment_body": "> add some example code\r\n\r\nCan you elaborate on which example code you'd like to be added?", + "pr_file_module": null + }, + { + "comment_id": "2152879610", + "repo_full_name": "langchain-ai/langchainjs", + "pr_number": 8379, + "pr_file": "langchain-core/src/utils/js-sha1/hash.ts", + "discussion_id": "2152847821", + "commented_code": "@@ -412,6 +412,19 @@ Sha1.prototype.arrayBuffer = function () {\n return buffer;\n };\n \n+/**\n+ * @deprecated Use `makeDefaultKeyEncoder()` to create a custom key encoder.\n+ * This function will be removed in a future version.\n+ */\n export const insecureHash = (message) => {\n+ console.warn(", + "comment_created_at": "2025-06-17T18:12:34+00:00", + "comment_author": "manekinekko", + "comment_body": "@jacoblee93 can we maybe link to some docs?", + "pr_file_module": null + }, + { + "comment_id": "2155188698", + "repo_full_name": "langchain-ai/langchainjs", + "pr_number": 8379, + "pr_file": "langchain-core/src/utils/js-sha1/hash.ts", + "discussion_id": "2152847821", + "commented_code": "@@ -412,6 +412,19 @@ Sha1.prototype.arrayBuffer = function () {\n return buffer;\n };\n \n+/**\n+ * @deprecated Use `makeDefaultKeyEncoder()` to create a custom key encoder.\n+ * This function will be removed in a future version.\n+ */\n export const insecureHash = (message) => {\n+ console.warn(", + "comment_created_at": "2025-06-18T17:54:07+00:00", + "comment_author": "jacoblee93", + "comment_body": "Ah right... this is used in multiple places\r\n\r\n@hntrl maybe let's make a small docs page with agnostic info here?", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2065677698", + "pr_number": 8090, + "pr_file": "libs/langchain-openai/src/chat_models.ts", + "created_at": "2025-04-29T07:17:40+00:00", + "commented_code": "/**\n * Constrains effort on reasoning for reasoning models. Currently supported values are low, medium, and high.\n * Reducing reasoning effort can result in faster responses and fewer tokens used on reasoning in a response.\n *\n * @deprecated Use `reasoning` object instead.", + "repo_full_name": "langchain-ai/langchainjs", + "discussion_comments": [ + { + "comment_id": "2065677698", + "repo_full_name": "langchain-ai/langchainjs", + "pr_number": 8090, + "pr_file": "libs/langchain-openai/src/chat_models.ts", + "discussion_id": "2065677698", + "commented_code": "@@ -1024,9 +1050,19 @@ export interface ChatOpenAICallOptions\n /**\n * Constrains effort on reasoning for reasoning models. Currently supported values are low, medium, and high.\n * Reducing reasoning effort can result in faster responses and fewer tokens used on reasoning in a response.\n+ *\n+ * @deprecated Use `reasoning` object instead.", + "comment_created_at": "2025-04-29T07:17:40+00:00", + "comment_author": "benjamincburns", + "comment_body": "use `{@link ... }` syntax", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2050989297", + "pr_number": 8042, + "pr_file": "langchain-core/src/language_models/chat_models.ts", + "created_at": "2025-04-18T18:45:37+00:00", + "commented_code": "});\n}\n\n/**\n * \ndef _format_for_tracing(messages: list[BaseMessage]) -> list[BaseMessage]:\n \"\"\"Format messages for tracing in on_chat_model_start.\n For backward compatibility, we update image content blocks to OpenAI Chat\n Completions format.\n Args:\n messages: List of messages to format.\n Returns:\n List of messages formatted for tracing.\n \"\"\"\n messages_to_trace = []\n for message in messages:\n message_to_trace = message\n if isinstance(message.content, list):\n for idx, block in enumerate(message.content):\n if (\n isinstance(block, dict)\n and block.get(\"type\") == \"image\"\n and is_data_content_block(block)\n ):\n if message_to_trace is message:\n message_to_trace = message.model_copy()\n # Also shallow-copy content\n message_to_trace.content = list(message_to_trace.content)\n\n message_to_trace.content[idx] = ( # type: ignore[index] # mypy confused by .model_copy\n convert_to_openai_image_block(block)\n )\n messages_to_trace.append(message_to_trace)\n\n return messages_to_trace\n */\n\nfunction _formatForTracing(messages: BaseMessage[]): BaseMessage[] {", + "repo_full_name": "langchain-ai/langchainjs", + "discussion_comments": [ + { + "comment_id": "2050989297", + "repo_full_name": "langchain-ai/langchainjs", + "pr_number": 8042, + "pr_file": "langchain-core/src/language_models/chat_models.ts", + "discussion_id": "2050989297", + "commented_code": "@@ -130,6 +132,73 @@ export function createChatMessageChunkEncoderStream() {\n });\n }\n \n+/**\n+ * \n+def _format_for_tracing(messages: list[BaseMessage]) -> list[BaseMessage]:\n+ \"\"\"Format messages for tracing in on_chat_model_start.\n+ For backward compatibility, we update image content blocks to OpenAI Chat\n+ Completions format.\n+ Args:\n+ messages: List of messages to format.\n+ Returns:\n+ List of messages formatted for tracing.\n+ \"\"\"\n+ messages_to_trace = []\n+ for message in messages:\n+ message_to_trace = message\n+ if isinstance(message.content, list):\n+ for idx, block in enumerate(message.content):\n+ if (\n+ isinstance(block, dict)\n+ and block.get(\"type\") == \"image\"\n+ and is_data_content_block(block)\n+ ):\n+ if message_to_trace is message:\n+ message_to_trace = message.model_copy()\n+ # Also shallow-copy content\n+ message_to_trace.content = list(message_to_trace.content)\n+\n+ message_to_trace.content[idx] = ( # type: ignore[index] # mypy confused by .model_copy\n+ convert_to_openai_image_block(block)\n+ )\n+ messages_to_trace.append(message_to_trace)\n+\n+ return messages_to_trace\n+ */\n+\n+function _formatForTracing(messages: BaseMessage[]): BaseMessage[] {", + "comment_created_at": "2025-04-18T18:45:37+00:00", + "comment_author": "bracesproul", + "comment_body": "jsdoc would be nice explaining what it's doing", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2054972084", + "pr_number": 8053, + "pr_file": "langchain/src/chat_models/universal.ts", + "created_at": "2025-04-22T22:21:27+00:00", + "commented_code": "queuedMethodOperations?: Record;\n}\n\nclass _ConfigurableModel<\n/**\n * Internal class used to create chat models.", + "repo_full_name": "langchain-ai/langchainjs", + "discussion_comments": [ + { + "comment_id": "2054972084", + "repo_full_name": "langchain-ai/langchainjs", + "pr_number": 8053, + "pr_file": "langchain/src/chat_models/universal.ts", + "discussion_id": "2054972084", + "commented_code": "@@ -238,7 +238,10 @@ interface ConfigurableModelFields extends BaseChatModelParams {\n queuedMethodOperations?: Record;\n }\n \n-class _ConfigurableModel<\n+/**\n+ * Internal class used to create chat models.", + "comment_created_at": "2025-04-22T22:21:27+00:00", + "comment_author": "benjamincburns", + "comment_body": "Not sure if this is set up for langchain yet, but it's good practice to use the `@internal` tsdoc tag on internal structures, that way we don't need to treat them as public for the purpose of breaking changes.", + "pr_file_module": null + }, + { + "comment_id": "2054972335", + "repo_full_name": "langchain-ai/langchainjs", + "pr_number": 8053, + "pr_file": "langchain/src/chat_models/universal.ts", + "discussion_id": "2054972084", + "commented_code": "@@ -238,7 +238,10 @@ interface ConfigurableModelFields extends BaseChatModelParams {\n queuedMethodOperations?: Record;\n }\n \n-class _ConfigurableModel<\n+/**\n+ * Internal class used to create chat models.", + "comment_created_at": "2025-04-22T22:21:43+00:00", + "comment_author": "benjamincburns", + "comment_body": "```suggestion\r\n * Internal class used to create chat models.\r\n *\r\n * @internal\r\n```", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2004654421", + "pr_number": 7852, + "pr_file": "libs/langchain-google-cloud-sql-pg/src/engine.ts", + "created_at": "2025-03-20T01:54:51+00:00", + "commented_code": "import { AuthTypes, Connector, IpAddressTypes } from \"@google-cloud/cloud-sql-connector\";\nimport { GoogleAuth } from \"google-auth-library\";\nimport knex from \"knex\";\nimport { getIAMPrincipalEmail } from \"./utils/utils.js\";\n\nexport interface PostgresEngineArgs {\n ipType?: IpAddressTypes,\n user?: string,\n password?: string,\n iamAccountEmail?: string,\n}\n\nexport interface VectorStoreTableArgs {\n schemaName?: string,\n contentColumn?: string,\n embeddingColumn?: string,\n metadataColumns?: Column[],\n metadataJsonColumn?: string,\n idColumn?: string | Column,\n overwriteExisting?: boolean,\n storeMetadata?: boolean\n}\n\nexport class Column {\n name: string;\n\n dataType: string;\n\n nullable: boolean;\n\n constructor(name: string, dataType: string, nullable: boolean = true) {\n this.name = name;\n this.dataType = dataType;\n this.nullable = nullable;\n\n this.postInitilization()\n }\n\n private postInitilization() {\n if (typeof this.name !== \"string\") {\n throw Error(\"Column name must be type string\");\n }\n\n if (typeof this.dataType !== \"string\") {\n throw Error(\"Column data_type must be type string\");\n }\n\n }\n}\n\nconst USER_AGENT = \"langchain-google-cloud-sql-pg-js\";\n\nexport class PostgresEngine {", + "repo_full_name": "langchain-ai/langchainjs", + "discussion_comments": [ + { + "comment_id": "2004654421", + "repo_full_name": "langchain-ai/langchainjs", + "pr_number": 7852, + "pr_file": "libs/langchain-google-cloud-sql-pg/src/engine.ts", + "discussion_id": "2004654421", + "commented_code": "@@ -0,0 +1,271 @@\n+import { AuthTypes, Connector, IpAddressTypes } from \"@google-cloud/cloud-sql-connector\";\n+import { GoogleAuth } from \"google-auth-library\";\n+import knex from \"knex\";\n+import { getIAMPrincipalEmail } from \"./utils/utils.js\";\n+\n+export interface PostgresEngineArgs {\n+ ipType?: IpAddressTypes,\n+ user?: string,\n+ password?: string,\n+ iamAccountEmail?: string,\n+}\n+\n+export interface VectorStoreTableArgs {\n+ schemaName?: string,\n+ contentColumn?: string,\n+ embeddingColumn?: string,\n+ metadataColumns?: Column[],\n+ metadataJsonColumn?: string,\n+ idColumn?: string | Column,\n+ overwriteExisting?: boolean,\n+ storeMetadata?: boolean\n+}\n+\n+export class Column {\n+ name: string;\n+\n+ dataType: string;\n+\n+ nullable: boolean;\n+\n+ constructor(name: string, dataType: string, nullable: boolean = true) {\n+ this.name = name;\n+ this.dataType = dataType;\n+ this.nullable = nullable;\n+\n+ this.postInitilization()\n+ }\n+\n+ private postInitilization() {\n+ if (typeof this.name !== \"string\") {\n+ throw Error(\"Column name must be type string\");\n+ }\n+\n+ if (typeof this.dataType !== \"string\") {\n+ throw Error(\"Column data_type must be type string\");\n+ }\n+\n+ }\n+}\n+\n+const USER_AGENT = \"langchain-google-cloud-sql-pg-js\";\n+\n+export class PostgresEngine {", + "comment_created_at": "2025-03-20T01:54:51+00:00", + "comment_author": "jacoblee93", + "comment_body": "nit: Add docstring with usage example", + "pr_file_module": null + }, + { + "comment_id": "2004658794", + "repo_full_name": "langchain-ai/langchainjs", + "pr_number": 7852, + "pr_file": "libs/langchain-google-cloud-sql-pg/src/engine.ts", + "discussion_id": "2004654421", + "commented_code": "@@ -0,0 +1,271 @@\n+import { AuthTypes, Connector, IpAddressTypes } from \"@google-cloud/cloud-sql-connector\";\n+import { GoogleAuth } from \"google-auth-library\";\n+import knex from \"knex\";\n+import { getIAMPrincipalEmail } from \"./utils/utils.js\";\n+\n+export interface PostgresEngineArgs {\n+ ipType?: IpAddressTypes,\n+ user?: string,\n+ password?: string,\n+ iamAccountEmail?: string,\n+}\n+\n+export interface VectorStoreTableArgs {\n+ schemaName?: string,\n+ contentColumn?: string,\n+ embeddingColumn?: string,\n+ metadataColumns?: Column[],\n+ metadataJsonColumn?: string,\n+ idColumn?: string | Column,\n+ overwriteExisting?: boolean,\n+ storeMetadata?: boolean\n+}\n+\n+export class Column {\n+ name: string;\n+\n+ dataType: string;\n+\n+ nullable: boolean;\n+\n+ constructor(name: string, dataType: string, nullable: boolean = true) {\n+ this.name = name;\n+ this.dataType = dataType;\n+ this.nullable = nullable;\n+\n+ this.postInitilization()\n+ }\n+\n+ private postInitilization() {\n+ if (typeof this.name !== \"string\") {\n+ throw Error(\"Column name must be type string\");\n+ }\n+\n+ if (typeof this.dataType !== \"string\") {\n+ throw Error(\"Column data_type must be type string\");\n+ }\n+\n+ }\n+}\n+\n+const USER_AGENT = \"langchain-google-cloud-sql-pg-js\";\n+\n+export class PostgresEngine {", + "comment_created_at": "2025-03-20T01:58:56+00:00", + "comment_author": "jacoblee93", + "comment_body": "Might be nice to have chat history/vectorstore classes just accept params to instantiate an engine on init in addition to taking a full engine class for connection reuse so that the user has one less import/abstraction to worry about", + "pr_file_module": null + }, + { + "comment_id": "2008312551", + "repo_full_name": "langchain-ai/langchainjs", + "pr_number": 7852, + "pr_file": "libs/langchain-google-cloud-sql-pg/src/engine.ts", + "discussion_id": "2004654421", + "commented_code": "@@ -0,0 +1,271 @@\n+import { AuthTypes, Connector, IpAddressTypes } from \"@google-cloud/cloud-sql-connector\";\n+import { GoogleAuth } from \"google-auth-library\";\n+import knex from \"knex\";\n+import { getIAMPrincipalEmail } from \"./utils/utils.js\";\n+\n+export interface PostgresEngineArgs {\n+ ipType?: IpAddressTypes,\n+ user?: string,\n+ password?: string,\n+ iamAccountEmail?: string,\n+}\n+\n+export interface VectorStoreTableArgs {\n+ schemaName?: string,\n+ contentColumn?: string,\n+ embeddingColumn?: string,\n+ metadataColumns?: Column[],\n+ metadataJsonColumn?: string,\n+ idColumn?: string | Column,\n+ overwriteExisting?: boolean,\n+ storeMetadata?: boolean\n+}\n+\n+export class Column {\n+ name: string;\n+\n+ dataType: string;\n+\n+ nullable: boolean;\n+\n+ constructor(name: string, dataType: string, nullable: boolean = true) {\n+ this.name = name;\n+ this.dataType = dataType;\n+ this.nullable = nullable;\n+\n+ this.postInitilization()\n+ }\n+\n+ private postInitilization() {\n+ if (typeof this.name !== \"string\") {\n+ throw Error(\"Column name must be type string\");\n+ }\n+\n+ if (typeof this.dataType !== \"string\") {\n+ throw Error(\"Column data_type must be type string\");\n+ }\n+\n+ }\n+}\n+\n+const USER_AGENT = \"langchain-google-cloud-sql-pg-js\";\n+\n+export class PostgresEngine {", + "comment_created_at": "2025-03-21T20:48:26+00:00", + "comment_author": "averikitsch", + "comment_body": "I will add docstrings. The engine is a shared object so the connection pool is shared. This is a postgres best practice.", + "pr_file_module": null + }, + { + "comment_id": "2009501988", + "repo_full_name": "langchain-ai/langchainjs", + "pr_number": 7852, + "pr_file": "libs/langchain-google-cloud-sql-pg/src/engine.ts", + "discussion_id": "2004654421", + "commented_code": "@@ -0,0 +1,271 @@\n+import { AuthTypes, Connector, IpAddressTypes } from \"@google-cloud/cloud-sql-connector\";\n+import { GoogleAuth } from \"google-auth-library\";\n+import knex from \"knex\";\n+import { getIAMPrincipalEmail } from \"./utils/utils.js\";\n+\n+export interface PostgresEngineArgs {\n+ ipType?: IpAddressTypes,\n+ user?: string,\n+ password?: string,\n+ iamAccountEmail?: string,\n+}\n+\n+export interface VectorStoreTableArgs {\n+ schemaName?: string,\n+ contentColumn?: string,\n+ embeddingColumn?: string,\n+ metadataColumns?: Column[],\n+ metadataJsonColumn?: string,\n+ idColumn?: string | Column,\n+ overwriteExisting?: boolean,\n+ storeMetadata?: boolean\n+}\n+\n+export class Column {\n+ name: string;\n+\n+ dataType: string;\n+\n+ nullable: boolean;\n+\n+ constructor(name: string, dataType: string, nullable: boolean = true) {\n+ this.name = name;\n+ this.dataType = dataType;\n+ this.nullable = nullable;\n+\n+ this.postInitilization()\n+ }\n+\n+ private postInitilization() {\n+ if (typeof this.name !== \"string\") {\n+ throw Error(\"Column name must be type string\");\n+ }\n+\n+ if (typeof this.dataType !== \"string\") {\n+ throw Error(\"Column data_type must be type string\");\n+ }\n+\n+ }\n+}\n+\n+const USER_AGENT = \"langchain-google-cloud-sql-pg-js\";\n+\n+export class PostgresEngine {", + "comment_created_at": "2025-03-24T05:52:27+00:00", + "comment_author": "jacoblee93", + "comment_body": "Ok, sounds good. Yeah definitely better to do it this way for prod, just more pieces and classes to set up for getting started", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/langchainjs-use-comprehensive-jsdoc.md b/_reviewers/langchainjs-use-comprehensive-jsdoc.md index 3410e0c..2a68b64 100644 --- a/_reviewers/langchainjs-use-comprehensive-jsdoc.md +++ b/_reviewers/langchainjs-use-comprehensive-jsdoc.md @@ -51,257 +51,3 @@ export class AuthFlow { // ... } ``` - - -[ - { - "discussion_id": "1417964024", - "pr_number": 3465, - "pr_file": "langchain/src/tools/outlook/authFlowREST.ts", - "created_at": "2023-12-06T20:52:16+00:00", - "commented_code": "import * as http from \"http\";\nimport * as url from \"url\";\nimport { AuthFlowBase } from \"./authFlowBase.js\";\nimport { getEnvironmentVariable } from \"../../util/env.js\";\n\ninterface AccessTokenResponse {\n access_token: string;\n refresh_token: string;\n}\n\n// this class uses your clientId, clientSecret and redirectUri\n// to get access token and refresh token, if you already have them,\n// you can use AuthFlowToken or AuthFlowRefresh instead", - "repo_full_name": "langchain-ai/langchainjs", - "discussion_comments": [ - { - "comment_id": "1417964024", - "repo_full_name": "langchain-ai/langchainjs", - "pr_number": 3465, - "pr_file": "langchain/src/tools/outlook/authFlowREST.ts", - "discussion_id": "1417964024", - "commented_code": "@@ -0,0 +1,205 @@\n+import * as http from \"http\";\n+import * as url from \"url\";\n+import { AuthFlowBase } from \"./authFlowBase.js\";\n+import { getEnvironmentVariable } from \"../../util/env.js\";\n+\n+interface AccessTokenResponse {\n+ access_token: string;\n+ refresh_token: string;\n+}\n+\n+// this class uses your clientId, clientSecret and redirectUri\n+// to get access token and refresh token, if you already have them,\n+// you can use AuthFlowToken or AuthFlowRefresh instead", - "comment_created_at": "2023-12-06T20:52:16+00:00", - "comment_author": "bracesproul", - "comment_body": "This should be a JSDoc so devs & our api refs can get the description properly", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1418002088", - "pr_number": 3465, - "pr_file": "langchain/src/tools/outlook/authFlowToken.ts", - "created_at": "2023-12-06T21:33:05+00:00", - "commented_code": "import { AuthFlowBase } from \"./authFlowBase.js\";\nimport { getEnvironmentVariable } from \"../../util/env.js\";\n\ninterface AccessTokenResponse {\n access_token: string;\n refresh_token: string;\n}\n\n// if you have the token, and no need to refresh it, warning: token expires in 1 hour", - "repo_full_name": "langchain-ai/langchainjs", - "discussion_comments": [ - { - "comment_id": "1418002088", - "repo_full_name": "langchain-ai/langchainjs", - "pr_number": 3465, - "pr_file": "langchain/src/tools/outlook/authFlowToken.ts", - "discussion_id": "1418002088", - "commented_code": "@@ -0,0 +1,103 @@\n+import { AuthFlowBase } from \"./authFlowBase.js\";\n+import { getEnvironmentVariable } from \"../../util/env.js\";\n+\n+interface AccessTokenResponse {\n+ access_token: string;\n+ refresh_token: string;\n+}\n+\n+// if you have the token, and no need to refresh it, warning: token expires in 1 hour", - "comment_created_at": "2023-12-06T21:33:05+00:00", - "comment_author": "bracesproul", - "comment_body": "Convert to JSDoc and give more of an overview as to what this class can be used for", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2152847821", - "pr_number": 8379, - "pr_file": "langchain-core/src/utils/js-sha1/hash.ts", - "created_at": "2025-06-17T17:54:53+00:00", - "commented_code": "return buffer;\n};\n\n/**\n * @deprecated Use `makeDefaultKeyEncoder()` to create a custom key encoder.\n * This function will be removed in a future version.\n */\nexport const insecureHash = (message) => {\n console.warn(", - "repo_full_name": "langchain-ai/langchainjs", - "discussion_comments": [ - { - "comment_id": "2152847821", - "repo_full_name": "langchain-ai/langchainjs", - "pr_number": 8379, - "pr_file": "langchain-core/src/utils/js-sha1/hash.ts", - "discussion_id": "2152847821", - "commented_code": "@@ -412,6 +412,19 @@ Sha1.prototype.arrayBuffer = function () {\n return buffer;\n };\n \n+/**\n+ * @deprecated Use `makeDefaultKeyEncoder()` to create a custom key encoder.\n+ * This function will be removed in a future version.\n+ */\n export const insecureHash = (message) => {\n+ console.warn(", - "comment_created_at": "2025-06-17T17:54:53+00:00", - "comment_author": "jacoblee93", - "comment_body": "Let's just log this once on initial call to avoid flooding the console and add some example code", - "pr_file_module": null - }, - { - "comment_id": "2152875547", - "repo_full_name": "langchain-ai/langchainjs", - "pr_number": 8379, - "pr_file": "langchain-core/src/utils/js-sha1/hash.ts", - "discussion_id": "2152847821", - "commented_code": "@@ -412,6 +412,19 @@ Sha1.prototype.arrayBuffer = function () {\n return buffer;\n };\n \n+/**\n+ * @deprecated Use `makeDefaultKeyEncoder()` to create a custom key encoder.\n+ * This function will be removed in a future version.\n+ */\n export const insecureHash = (message) => {\n+ console.warn(", - "comment_created_at": "2025-06-17T18:10:07+00:00", - "comment_author": "manekinekko", - "comment_body": "Fixed.", - "pr_file_module": null - }, - { - "comment_id": "2152877086", - "repo_full_name": "langchain-ai/langchainjs", - "pr_number": 8379, - "pr_file": "langchain-core/src/utils/js-sha1/hash.ts", - "discussion_id": "2152847821", - "commented_code": "@@ -412,6 +412,19 @@ Sha1.prototype.arrayBuffer = function () {\n return buffer;\n };\n \n+/**\n+ * @deprecated Use `makeDefaultKeyEncoder()` to create a custom key encoder.\n+ * This function will be removed in a future version.\n+ */\n export const insecureHash = (message) => {\n+ console.warn(", - "comment_created_at": "2025-06-17T18:11:06+00:00", - "comment_author": "manekinekko", - "comment_body": "> add some example code\r\n\r\nCan you elaborate on which example code you'd like to be added?", - "pr_file_module": null - }, - { - "comment_id": "2152879610", - "repo_full_name": "langchain-ai/langchainjs", - "pr_number": 8379, - "pr_file": "langchain-core/src/utils/js-sha1/hash.ts", - "discussion_id": "2152847821", - "commented_code": "@@ -412,6 +412,19 @@ Sha1.prototype.arrayBuffer = function () {\n return buffer;\n };\n \n+/**\n+ * @deprecated Use `makeDefaultKeyEncoder()` to create a custom key encoder.\n+ * This function will be removed in a future version.\n+ */\n export const insecureHash = (message) => {\n+ console.warn(", - "comment_created_at": "2025-06-17T18:12:34+00:00", - "comment_author": "manekinekko", - "comment_body": "@jacoblee93 can we maybe link to some docs?", - "pr_file_module": null - }, - { - "comment_id": "2155188698", - "repo_full_name": "langchain-ai/langchainjs", - "pr_number": 8379, - "pr_file": "langchain-core/src/utils/js-sha1/hash.ts", - "discussion_id": "2152847821", - "commented_code": "@@ -412,6 +412,19 @@ Sha1.prototype.arrayBuffer = function () {\n return buffer;\n };\n \n+/**\n+ * @deprecated Use `makeDefaultKeyEncoder()` to create a custom key encoder.\n+ * This function will be removed in a future version.\n+ */\n export const insecureHash = (message) => {\n+ console.warn(", - "comment_created_at": "2025-06-18T17:54:07+00:00", - "comment_author": "jacoblee93", - "comment_body": "Ah right... this is used in multiple places\r\n\r\n@hntrl maybe let's make a small docs page with agnostic info here?", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2065677698", - "pr_number": 8090, - "pr_file": "libs/langchain-openai/src/chat_models.ts", - "created_at": "2025-04-29T07:17:40+00:00", - "commented_code": "/**\n * Constrains effort on reasoning for reasoning models. Currently supported values are low, medium, and high.\n * Reducing reasoning effort can result in faster responses and fewer tokens used on reasoning in a response.\n *\n * @deprecated Use `reasoning` object instead.", - "repo_full_name": "langchain-ai/langchainjs", - "discussion_comments": [ - { - "comment_id": "2065677698", - "repo_full_name": "langchain-ai/langchainjs", - "pr_number": 8090, - "pr_file": "libs/langchain-openai/src/chat_models.ts", - "discussion_id": "2065677698", - "commented_code": "@@ -1024,9 +1050,19 @@ export interface ChatOpenAICallOptions\n /**\n * Constrains effort on reasoning for reasoning models. Currently supported values are low, medium, and high.\n * Reducing reasoning effort can result in faster responses and fewer tokens used on reasoning in a response.\n+ *\n+ * @deprecated Use `reasoning` object instead.", - "comment_created_at": "2025-04-29T07:17:40+00:00", - "comment_author": "benjamincburns", - "comment_body": "use `{@link ... }` syntax", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2050989297", - "pr_number": 8042, - "pr_file": "langchain-core/src/language_models/chat_models.ts", - "created_at": "2025-04-18T18:45:37+00:00", - "commented_code": "});\n}\n\n/**\n * \ndef _format_for_tracing(messages: list[BaseMessage]) -> list[BaseMessage]:\n \"\"\"Format messages for tracing in on_chat_model_start.\n For backward compatibility, we update image content blocks to OpenAI Chat\n Completions format.\n Args:\n messages: List of messages to format.\n Returns:\n List of messages formatted for tracing.\n \"\"\"\n messages_to_trace = []\n for message in messages:\n message_to_trace = message\n if isinstance(message.content, list):\n for idx, block in enumerate(message.content):\n if (\n isinstance(block, dict)\n and block.get(\"type\") == \"image\"\n and is_data_content_block(block)\n ):\n if message_to_trace is message:\n message_to_trace = message.model_copy()\n # Also shallow-copy content\n message_to_trace.content = list(message_to_trace.content)\n\n message_to_trace.content[idx] = ( # type: ignore[index] # mypy confused by .model_copy\n convert_to_openai_image_block(block)\n )\n messages_to_trace.append(message_to_trace)\n\n return messages_to_trace\n */\n\nfunction _formatForTracing(messages: BaseMessage[]): BaseMessage[] {", - "repo_full_name": "langchain-ai/langchainjs", - "discussion_comments": [ - { - "comment_id": "2050989297", - "repo_full_name": "langchain-ai/langchainjs", - "pr_number": 8042, - "pr_file": "langchain-core/src/language_models/chat_models.ts", - "discussion_id": "2050989297", - "commented_code": "@@ -130,6 +132,73 @@ export function createChatMessageChunkEncoderStream() {\n });\n }\n \n+/**\n+ * \n+def _format_for_tracing(messages: list[BaseMessage]) -> list[BaseMessage]:\n+ \"\"\"Format messages for tracing in on_chat_model_start.\n+ For backward compatibility, we update image content blocks to OpenAI Chat\n+ Completions format.\n+ Args:\n+ messages: List of messages to format.\n+ Returns:\n+ List of messages formatted for tracing.\n+ \"\"\"\n+ messages_to_trace = []\n+ for message in messages:\n+ message_to_trace = message\n+ if isinstance(message.content, list):\n+ for idx, block in enumerate(message.content):\n+ if (\n+ isinstance(block, dict)\n+ and block.get(\"type\") == \"image\"\n+ and is_data_content_block(block)\n+ ):\n+ if message_to_trace is message:\n+ message_to_trace = message.model_copy()\n+ # Also shallow-copy content\n+ message_to_trace.content = list(message_to_trace.content)\n+\n+ message_to_trace.content[idx] = ( # type: ignore[index] # mypy confused by .model_copy\n+ convert_to_openai_image_block(block)\n+ )\n+ messages_to_trace.append(message_to_trace)\n+\n+ return messages_to_trace\n+ */\n+\n+function _formatForTracing(messages: BaseMessage[]): BaseMessage[] {", - "comment_created_at": "2025-04-18T18:45:37+00:00", - "comment_author": "bracesproul", - "comment_body": "jsdoc would be nice explaining what it's doing", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2054972084", - "pr_number": 8053, - "pr_file": "langchain/src/chat_models/universal.ts", - "created_at": "2025-04-22T22:21:27+00:00", - "commented_code": "queuedMethodOperations?: Record;\n}\n\nclass _ConfigurableModel<\n/**\n * Internal class used to create chat models.", - "repo_full_name": "langchain-ai/langchainjs", - "discussion_comments": [ - { - "comment_id": "2054972084", - "repo_full_name": "langchain-ai/langchainjs", - "pr_number": 8053, - "pr_file": "langchain/src/chat_models/universal.ts", - "discussion_id": "2054972084", - "commented_code": "@@ -238,7 +238,10 @@ interface ConfigurableModelFields extends BaseChatModelParams {\n queuedMethodOperations?: Record;\n }\n \n-class _ConfigurableModel<\n+/**\n+ * Internal class used to create chat models.", - "comment_created_at": "2025-04-22T22:21:27+00:00", - "comment_author": "benjamincburns", - "comment_body": "Not sure if this is set up for langchain yet, but it's good practice to use the `@internal` tsdoc tag on internal structures, that way we don't need to treat them as public for the purpose of breaking changes.", - "pr_file_module": null - }, - { - "comment_id": "2054972335", - "repo_full_name": "langchain-ai/langchainjs", - "pr_number": 8053, - "pr_file": "langchain/src/chat_models/universal.ts", - "discussion_id": "2054972084", - "commented_code": "@@ -238,7 +238,10 @@ interface ConfigurableModelFields extends BaseChatModelParams {\n queuedMethodOperations?: Record;\n }\n \n-class _ConfigurableModel<\n+/**\n+ * Internal class used to create chat models.", - "comment_created_at": "2025-04-22T22:21:43+00:00", - "comment_author": "benjamincburns", - "comment_body": "```suggestion\r\n * Internal class used to create chat models.\r\n *\r\n * @internal\r\n```", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2004654421", - "pr_number": 7852, - "pr_file": "libs/langchain-google-cloud-sql-pg/src/engine.ts", - "created_at": "2025-03-20T01:54:51+00:00", - "commented_code": "import { AuthTypes, Connector, IpAddressTypes } from \"@google-cloud/cloud-sql-connector\";\nimport { GoogleAuth } from \"google-auth-library\";\nimport knex from \"knex\";\nimport { getIAMPrincipalEmail } from \"./utils/utils.js\";\n\nexport interface PostgresEngineArgs {\n ipType?: IpAddressTypes,\n user?: string,\n password?: string,\n iamAccountEmail?: string,\n}\n\nexport interface VectorStoreTableArgs {\n schemaName?: string,\n contentColumn?: string,\n embeddingColumn?: string,\n metadataColumns?: Column[],\n metadataJsonColumn?: string,\n idColumn?: string | Column,\n overwriteExisting?: boolean,\n storeMetadata?: boolean\n}\n\nexport class Column {\n name: string;\n\n dataType: string;\n\n nullable: boolean;\n\n constructor(name: string, dataType: string, nullable: boolean = true) {\n this.name = name;\n this.dataType = dataType;\n this.nullable = nullable;\n\n this.postInitilization()\n }\n\n private postInitilization() {\n if (typeof this.name !== \"string\") {\n throw Error(\"Column name must be type string\");\n }\n\n if (typeof this.dataType !== \"string\") {\n throw Error(\"Column data_type must be type string\");\n }\n\n }\n}\n\nconst USER_AGENT = \"langchain-google-cloud-sql-pg-js\";\n\nexport class PostgresEngine {", - "repo_full_name": "langchain-ai/langchainjs", - "discussion_comments": [ - { - "comment_id": "2004654421", - "repo_full_name": "langchain-ai/langchainjs", - "pr_number": 7852, - "pr_file": "libs/langchain-google-cloud-sql-pg/src/engine.ts", - "discussion_id": "2004654421", - "commented_code": "@@ -0,0 +1,271 @@\n+import { AuthTypes, Connector, IpAddressTypes } from \"@google-cloud/cloud-sql-connector\";\n+import { GoogleAuth } from \"google-auth-library\";\n+import knex from \"knex\";\n+import { getIAMPrincipalEmail } from \"./utils/utils.js\";\n+\n+export interface PostgresEngineArgs {\n+ ipType?: IpAddressTypes,\n+ user?: string,\n+ password?: string,\n+ iamAccountEmail?: string,\n+}\n+\n+export interface VectorStoreTableArgs {\n+ schemaName?: string,\n+ contentColumn?: string,\n+ embeddingColumn?: string,\n+ metadataColumns?: Column[],\n+ metadataJsonColumn?: string,\n+ idColumn?: string | Column,\n+ overwriteExisting?: boolean,\n+ storeMetadata?: boolean\n+}\n+\n+export class Column {\n+ name: string;\n+\n+ dataType: string;\n+\n+ nullable: boolean;\n+\n+ constructor(name: string, dataType: string, nullable: boolean = true) {\n+ this.name = name;\n+ this.dataType = dataType;\n+ this.nullable = nullable;\n+\n+ this.postInitilization()\n+ }\n+\n+ private postInitilization() {\n+ if (typeof this.name !== \"string\") {\n+ throw Error(\"Column name must be type string\");\n+ }\n+\n+ if (typeof this.dataType !== \"string\") {\n+ throw Error(\"Column data_type must be type string\");\n+ }\n+\n+ }\n+}\n+\n+const USER_AGENT = \"langchain-google-cloud-sql-pg-js\";\n+\n+export class PostgresEngine {", - "comment_created_at": "2025-03-20T01:54:51+00:00", - "comment_author": "jacoblee93", - "comment_body": "nit: Add docstring with usage example", - "pr_file_module": null - }, - { - "comment_id": "2004658794", - "repo_full_name": "langchain-ai/langchainjs", - "pr_number": 7852, - "pr_file": "libs/langchain-google-cloud-sql-pg/src/engine.ts", - "discussion_id": "2004654421", - "commented_code": "@@ -0,0 +1,271 @@\n+import { AuthTypes, Connector, IpAddressTypes } from \"@google-cloud/cloud-sql-connector\";\n+import { GoogleAuth } from \"google-auth-library\";\n+import knex from \"knex\";\n+import { getIAMPrincipalEmail } from \"./utils/utils.js\";\n+\n+export interface PostgresEngineArgs {\n+ ipType?: IpAddressTypes,\n+ user?: string,\n+ password?: string,\n+ iamAccountEmail?: string,\n+}\n+\n+export interface VectorStoreTableArgs {\n+ schemaName?: string,\n+ contentColumn?: string,\n+ embeddingColumn?: string,\n+ metadataColumns?: Column[],\n+ metadataJsonColumn?: string,\n+ idColumn?: string | Column,\n+ overwriteExisting?: boolean,\n+ storeMetadata?: boolean\n+}\n+\n+export class Column {\n+ name: string;\n+\n+ dataType: string;\n+\n+ nullable: boolean;\n+\n+ constructor(name: string, dataType: string, nullable: boolean = true) {\n+ this.name = name;\n+ this.dataType = dataType;\n+ this.nullable = nullable;\n+\n+ this.postInitilization()\n+ }\n+\n+ private postInitilization() {\n+ if (typeof this.name !== \"string\") {\n+ throw Error(\"Column name must be type string\");\n+ }\n+\n+ if (typeof this.dataType !== \"string\") {\n+ throw Error(\"Column data_type must be type string\");\n+ }\n+\n+ }\n+}\n+\n+const USER_AGENT = \"langchain-google-cloud-sql-pg-js\";\n+\n+export class PostgresEngine {", - "comment_created_at": "2025-03-20T01:58:56+00:00", - "comment_author": "jacoblee93", - "comment_body": "Might be nice to have chat history/vectorstore classes just accept params to instantiate an engine on init in addition to taking a full engine class for connection reuse so that the user has one less import/abstraction to worry about", - "pr_file_module": null - }, - { - "comment_id": "2008312551", - "repo_full_name": "langchain-ai/langchainjs", - "pr_number": 7852, - "pr_file": "libs/langchain-google-cloud-sql-pg/src/engine.ts", - "discussion_id": "2004654421", - "commented_code": "@@ -0,0 +1,271 @@\n+import { AuthTypes, Connector, IpAddressTypes } from \"@google-cloud/cloud-sql-connector\";\n+import { GoogleAuth } from \"google-auth-library\";\n+import knex from \"knex\";\n+import { getIAMPrincipalEmail } from \"./utils/utils.js\";\n+\n+export interface PostgresEngineArgs {\n+ ipType?: IpAddressTypes,\n+ user?: string,\n+ password?: string,\n+ iamAccountEmail?: string,\n+}\n+\n+export interface VectorStoreTableArgs {\n+ schemaName?: string,\n+ contentColumn?: string,\n+ embeddingColumn?: string,\n+ metadataColumns?: Column[],\n+ metadataJsonColumn?: string,\n+ idColumn?: string | Column,\n+ overwriteExisting?: boolean,\n+ storeMetadata?: boolean\n+}\n+\n+export class Column {\n+ name: string;\n+\n+ dataType: string;\n+\n+ nullable: boolean;\n+\n+ constructor(name: string, dataType: string, nullable: boolean = true) {\n+ this.name = name;\n+ this.dataType = dataType;\n+ this.nullable = nullable;\n+\n+ this.postInitilization()\n+ }\n+\n+ private postInitilization() {\n+ if (typeof this.name !== \"string\") {\n+ throw Error(\"Column name must be type string\");\n+ }\n+\n+ if (typeof this.dataType !== \"string\") {\n+ throw Error(\"Column data_type must be type string\");\n+ }\n+\n+ }\n+}\n+\n+const USER_AGENT = \"langchain-google-cloud-sql-pg-js\";\n+\n+export class PostgresEngine {", - "comment_created_at": "2025-03-21T20:48:26+00:00", - "comment_author": "averikitsch", - "comment_body": "I will add docstrings. The engine is a shared object so the connection pool is shared. This is a postgres best practice.", - "pr_file_module": null - }, - { - "comment_id": "2009501988", - "repo_full_name": "langchain-ai/langchainjs", - "pr_number": 7852, - "pr_file": "libs/langchain-google-cloud-sql-pg/src/engine.ts", - "discussion_id": "2004654421", - "commented_code": "@@ -0,0 +1,271 @@\n+import { AuthTypes, Connector, IpAddressTypes } from \"@google-cloud/cloud-sql-connector\";\n+import { GoogleAuth } from \"google-auth-library\";\n+import knex from \"knex\";\n+import { getIAMPrincipalEmail } from \"./utils/utils.js\";\n+\n+export interface PostgresEngineArgs {\n+ ipType?: IpAddressTypes,\n+ user?: string,\n+ password?: string,\n+ iamAccountEmail?: string,\n+}\n+\n+export interface VectorStoreTableArgs {\n+ schemaName?: string,\n+ contentColumn?: string,\n+ embeddingColumn?: string,\n+ metadataColumns?: Column[],\n+ metadataJsonColumn?: string,\n+ idColumn?: string | Column,\n+ overwriteExisting?: boolean,\n+ storeMetadata?: boolean\n+}\n+\n+export class Column {\n+ name: string;\n+\n+ dataType: string;\n+\n+ nullable: boolean;\n+\n+ constructor(name: string, dataType: string, nullable: boolean = true) {\n+ this.name = name;\n+ this.dataType = dataType;\n+ this.nullable = nullable;\n+\n+ this.postInitilization()\n+ }\n+\n+ private postInitilization() {\n+ if (typeof this.name !== \"string\") {\n+ throw Error(\"Column name must be type string\");\n+ }\n+\n+ if (typeof this.dataType !== \"string\") {\n+ throw Error(\"Column data_type must be type string\");\n+ }\n+\n+ }\n+}\n+\n+const USER_AGENT = \"langchain-google-cloud-sql-pg-js\";\n+\n+export class PostgresEngine {", - "comment_created_at": "2025-03-24T05:52:27+00:00", - "comment_author": "jacoblee93", - "comment_body": "Ok, sounds good. Yeah definitely better to do it this way for prod, just more pieces and classes to set up for getting started", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/langchainjs-use-database-native-types.json b/_reviewers/langchainjs-use-database-native-types.json new file mode 100644 index 0000000..28be9ae --- /dev/null +++ b/_reviewers/langchainjs-use-database-native-types.json @@ -0,0 +1,174 @@ +[ + { + "discussion_id": "2099231081", + "pr_number": 8007, + "pr_file": "libs/langchain-textsplitters/src/oracle_text_splitter.ts", + "created_at": "2025-05-21T03:31:55+00:00", + "commented_code": "import oracledb from \"oracledb\";\nimport { TextSplitter, TextSplitterParams } from \"./text_splitter.js\";\n\n/**\n * Split text into smaller pieces\n * @example\n * ```typescript\n * const splitter = new OracleTextSplitter(conn, params);\n * let chunks = await splitter.splitText(doc.pageContent);\n * ```\n */\nexport class OracleTextSplitter extends TextSplitter {\n protected conn: oracledb.Connection;\n\n protected pref: Record;\n\n static lc_name() {\n return \"OracleTextSplitter\";\n }\n\n constructor(\n conn: oracledb.Connection,\n pref: Record,\n fields?: TextSplitterParams\n ) {\n super(fields);\n this.conn = conn;\n this.pref = pref;\n }\n\n async splitText(text: string) {\n const chunks: string[] = [];\n\n const result = await this.conn.execute(\n (\n `select t.column_value as data from dbms_vector_chain.utl_to_chunks(:content, json(:pref)) t`\n ),\n {\n content: { val: text, dir: oracledb.BIND_IN, type: oracledb.CLOB },\n pref: JSON.stringify(this.pref),", + "repo_full_name": "langchain-ai/langchainjs", + "discussion_comments": [ + { + "comment_id": "2099231081", + "repo_full_name": "langchain-ai/langchainjs", + "pr_number": 8007, + "pr_file": "libs/langchain-textsplitters/src/oracle_text_splitter.ts", + "discussion_id": "2099231081", + "commented_code": "@@ -0,0 +1,56 @@\n+import oracledb from \"oracledb\";\n+import { TextSplitter, TextSplitterParams } from \"./text_splitter.js\";\n+\n+/**\n+ * Split text into smaller pieces\n+ * @example\n+ * ```typescript\n+ * const splitter = new OracleTextSplitter(conn, params);\n+ * let chunks = await splitter.splitText(doc.pageContent);\n+ * ```\n+ */\n+export class OracleTextSplitter extends TextSplitter {\n+ protected conn: oracledb.Connection;\n+\n+ protected pref: Record;\n+\n+ static lc_name() {\n+ return \"OracleTextSplitter\";\n+ }\n+\n+ constructor(\n+ conn: oracledb.Connection,\n+ pref: Record,\n+ fields?: TextSplitterParams\n+ ) {\n+ super(fields);\n+ this.conn = conn;\n+ this.pref = pref;\n+ }\n+\n+ async splitText(text: string) {\n+ const chunks: string[] = [];\n+\n+ const result = await this.conn.execute(\n+ (\n+ `select t.column_value as data from dbms_vector_chain.utl_to_chunks(:content, json(:pref)) t`\n+ ),\n+ {\n+ content: { val: text, dir: oracledb.BIND_IN, type: oracledb.CLOB },\n+ pref: JSON.stringify(this.pref),", + "comment_created_at": "2025-05-21T03:31:55+00:00", + "comment_author": "cjbj", + "comment_body": "What DBs are being targeted? Can you bind JSON as DB_TYPE_JSON? DItto other places JSON is used.", + "pr_file_module": null + }, + { + "comment_id": "2100349110", + "repo_full_name": "langchain-ai/langchainjs", + "pr_number": 8007, + "pr_file": "libs/langchain-textsplitters/src/oracle_text_splitter.ts", + "discussion_id": "2099231081", + "commented_code": "@@ -0,0 +1,56 @@\n+import oracledb from \"oracledb\";\n+import { TextSplitter, TextSplitterParams } from \"./text_splitter.js\";\n+\n+/**\n+ * Split text into smaller pieces\n+ * @example\n+ * ```typescript\n+ * const splitter = new OracleTextSplitter(conn, params);\n+ * let chunks = await splitter.splitText(doc.pageContent);\n+ * ```\n+ */\n+export class OracleTextSplitter extends TextSplitter {\n+ protected conn: oracledb.Connection;\n+\n+ protected pref: Record;\n+\n+ static lc_name() {\n+ return \"OracleTextSplitter\";\n+ }\n+\n+ constructor(\n+ conn: oracledb.Connection,\n+ pref: Record,\n+ fields?: TextSplitterParams\n+ ) {\n+ super(fields);\n+ this.conn = conn;\n+ this.pref = pref;\n+ }\n+\n+ async splitText(text: string) {\n+ const chunks: string[] = [];\n+\n+ const result = await this.conn.execute(\n+ (\n+ `select t.column_value as data from dbms_vector_chain.utl_to_chunks(:content, json(:pref)) t`\n+ ),\n+ {\n+ content: { val: text, dir: oracledb.BIND_IN, type: oracledb.CLOB },\n+ pref: JSON.stringify(this.pref),", + "comment_created_at": "2025-05-21T13:47:56+00:00", + "comment_author": "hackerdave", + "comment_body": "I'm not sure if we were clear in the docs but the langchain integration is targeting Oracle Database 23ai which is the latest version. The vector features will only work on 23. Yes we could have used DB_TYPE_JSON which will work from 21 but in this case we are passing as a string.", + "pr_file_module": null + }, + { + "comment_id": "2101324757", + "repo_full_name": "langchain-ai/langchainjs", + "pr_number": 8007, + "pr_file": "libs/langchain-textsplitters/src/oracle_text_splitter.ts", + "discussion_id": "2099231081", + "commented_code": "@@ -0,0 +1,56 @@\n+import oracledb from \"oracledb\";\n+import { TextSplitter, TextSplitterParams } from \"./text_splitter.js\";\n+\n+/**\n+ * Split text into smaller pieces\n+ * @example\n+ * ```typescript\n+ * const splitter = new OracleTextSplitter(conn, params);\n+ * let chunks = await splitter.splitText(doc.pageContent);\n+ * ```\n+ */\n+export class OracleTextSplitter extends TextSplitter {\n+ protected conn: oracledb.Connection;\n+\n+ protected pref: Record;\n+\n+ static lc_name() {\n+ return \"OracleTextSplitter\";\n+ }\n+\n+ constructor(\n+ conn: oracledb.Connection,\n+ pref: Record,\n+ fields?: TextSplitterParams\n+ ) {\n+ super(fields);\n+ this.conn = conn;\n+ this.pref = pref;\n+ }\n+\n+ async splitText(text: string) {\n+ const chunks: string[] = [];\n+\n+ const result = await this.conn.execute(\n+ (\n+ `select t.column_value as data from dbms_vector_chain.utl_to_chunks(:content, json(:pref)) t`\n+ ),\n+ {\n+ content: { val: text, dir: oracledb.BIND_IN, type: oracledb.CLOB },\n+ pref: JSON.stringify(this.pref),", + "comment_created_at": "2025-05-21T23:02:07+00:00", + "comment_author": "cjbj", + "comment_body": "Binding as DB_TYPE_JSON where possible will be more efficient.", + "pr_file_module": null + }, + { + "comment_id": "2101476268", + "repo_full_name": "langchain-ai/langchainjs", + "pr_number": 8007, + "pr_file": "libs/langchain-textsplitters/src/oracle_text_splitter.ts", + "discussion_id": "2099231081", + "commented_code": "@@ -0,0 +1,56 @@\n+import oracledb from \"oracledb\";\n+import { TextSplitter, TextSplitterParams } from \"./text_splitter.js\";\n+\n+/**\n+ * Split text into smaller pieces\n+ * @example\n+ * ```typescript\n+ * const splitter = new OracleTextSplitter(conn, params);\n+ * let chunks = await splitter.splitText(doc.pageContent);\n+ * ```\n+ */\n+export class OracleTextSplitter extends TextSplitter {\n+ protected conn: oracledb.Connection;\n+\n+ protected pref: Record;\n+\n+ static lc_name() {\n+ return \"OracleTextSplitter\";\n+ }\n+\n+ constructor(\n+ conn: oracledb.Connection,\n+ pref: Record,\n+ fields?: TextSplitterParams\n+ ) {\n+ super(fields);\n+ this.conn = conn;\n+ this.pref = pref;\n+ }\n+\n+ async splitText(text: string) {\n+ const chunks: string[] = [];\n+\n+ const result = await this.conn.execute(\n+ (\n+ `select t.column_value as data from dbms_vector_chain.utl_to_chunks(:content, json(:pref)) t`\n+ ),\n+ {\n+ content: { val: text, dir: oracledb.BIND_IN, type: oracledb.CLOB },\n+ pref: JSON.stringify(this.pref),", + "comment_created_at": "2025-05-22T02:00:59+00:00", + "comment_author": "hackerdave", + "comment_body": "Switched to DB_TYPE_JSON", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1752684909", + "pr_number": 6676, + "pr_file": "langchain/src/retrievers/recency_ranked.ts", + "created_at": "2024-09-10T20:31:26+00:00", + "commented_code": "import { BaseRetriever } from \"@langchain/core/retrievers\";\nimport { VectorStoreInterface } from \"@langchain/core/vectorstores\";\nimport { Document } from \"@langchain/core/documents\";\n\nexport interface RecencyRankedRetrieverConfig {\n vectorStore: VectorStoreInterface;\n k: number;\n topK?: number;\n recencyWeight: number;\n}\n\nexport class RecencyRankedRetriever extends BaseRetriever {\n static lc_name() {\n return \"RecencyRankedRetriever\";\n }\n\n lc_namespace = [\"langchain\", \"retrievers\", \"recency_ranked\"];\n \n private vectorStore: VectorStoreInterface;\n\n private k: number;\n\n private topK: number;\n\n private recencyWeight: number;\n\n constructor(config: RecencyRankedRetrieverConfig) {\n super();\n this.vectorStore = config.vectorStore;\n this.k = config.k;\n this.topK = config.topK ?? config.k;\n this.recencyWeight = config.recencyWeight;\n }\n\n async _getRelevantDocuments(query: string): Promise {\n const relevantDocs = await this.vectorStore.similaritySearchWithScore(query, this.k);\n const rerankedDocs = this.recentDocumentRanker(relevantDocs, this.topK, this.recencyWeight);\n return rerankedDocs.map(([doc, _]) => doc);\n }\n\n async invoke(query: string): Promise {\n return this._getRelevantDocuments(query);\n }\n\n private recentDocumentRanker(\n documents: [Document, number][],\n topK: number,\n recencyWeight: number\n ): [Document, number][] {\n if (documents.length === 0) return [];\n\n if (!documents.every(([doc, _]) => doc.metadata.date instanceof Date)) {", + "repo_full_name": "langchain-ai/langchainjs", + "discussion_comments": [ + { + "comment_id": "1752684909", + "repo_full_name": "langchain-ai/langchainjs", + "pr_number": 6676, + "pr_file": "langchain/src/retrievers/recency_ranked.ts", + "discussion_id": "1752684909", + "commented_code": "@@ -0,0 +1,78 @@\n+import { BaseRetriever } from \"@langchain/core/retrievers\";\n+import { VectorStoreInterface } from \"@langchain/core/vectorstores\";\n+import { Document } from \"@langchain/core/documents\";\n+\n+export interface RecencyRankedRetrieverConfig {\n+ vectorStore: VectorStoreInterface;\n+ k: number;\n+ topK?: number;\n+ recencyWeight: number;\n+}\n+\n+export class RecencyRankedRetriever extends BaseRetriever {\n+ static lc_name() {\n+ return \"RecencyRankedRetriever\";\n+ }\n+\n+ lc_namespace = [\"langchain\", \"retrievers\", \"recency_ranked\"];\n+ \n+ private vectorStore: VectorStoreInterface;\n+\n+ private k: number;\n+\n+ private topK: number;\n+\n+ private recencyWeight: number;\n+\n+ constructor(config: RecencyRankedRetrieverConfig) {\n+ super();\n+ this.vectorStore = config.vectorStore;\n+ this.k = config.k;\n+ this.topK = config.topK ?? config.k;\n+ this.recencyWeight = config.recencyWeight;\n+ }\n+\n+ async _getRelevantDocuments(query: string): Promise {\n+ const relevantDocs = await this.vectorStore.similaritySearchWithScore(query, this.k);\n+ const rerankedDocs = this.recentDocumentRanker(relevantDocs, this.topK, this.recencyWeight);\n+ return rerankedDocs.map(([doc, _]) => doc);\n+ }\n+\n+ async invoke(query: string): Promise {\n+ return this._getRelevantDocuments(query);\n+ }\n+\n+ private recentDocumentRanker(\n+ documents: [Document, number][],\n+ topK: number,\n+ recencyWeight: number\n+ ): [Document, number][] {\n+ if (documents.length === 0) return [];\n+\n+ if (!documents.every(([doc, _]) => doc.metadata.date instanceof Date)) {", + "comment_created_at": "2024-09-10T20:31:26+00:00", + "comment_author": "jacoblee93", + "comment_body": "Would suggest storing this as an ISO string - not all (most?) vector stores will not deserialize dates", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2004667909", + "pr_number": 7852, + "pr_file": "libs/langchain-google-cloud-sql-pg/src/vectorStore.ts", + "created_at": "2025-03-20T02:08:43+00:00", + "commented_code": "import { EmbeddingsInterface } from \"@langchain/core/embeddings\";\nimport { MaxMarginalRelevanceSearchOptions, VectorStore } from \"@langchain/core/vectorstores\";\nimport { Document } from \"@langchain/core/documents\";\nimport { v4 as uuidv4 } from \"uuid\";\nimport { maximalMarginalRelevance } from \"@langchain/core/utils/math\";\nimport { DEFAULT_DISTANCE_STRATEGY, DistanceStrategy, QueryOptions } from \"./indexes.js\";\nimport PostgresEngine from \"./engine.js\";\nimport { customZip } from \"./utils/utils.js\";\n\nexport interface PostgresVectorStoreArgs {\n schemaName?: string,\n contentColumn?: string,\n embeddingColumn?: string,\n metadataColumns?: Array,\n idColumn?: string,\n distanceStrategy?: DistanceStrategy,\n k?: number,\n fetchK?: number,\n lambdaMult?: number,\n ignoreMetadataColumns?: Array,\n metadataJsonColumn?: string,\n indexQueryOptions?: QueryOptions\n}\n\nexport interface dbConfigArgs {\n engine: PostgresEngine;\n tableName: string;\n dbConfig?: PostgresVectorStoreArgs;\n}\n\nexport class PostgresVectorStore extends VectorStore {\n declare FilterType: string;\n\n engine: PostgresEngine;\n\n embeddings: EmbeddingsInterface;\n\n tableName: string;\n\n schemaName: string;\n\n contentColumn: string;\n\n embeddingColumn: string;\n\n metadataColumns: Array;\n\n ignoreMetadataColumns: Array;\n\n idColumn: string;\n\n metadataJsonColumn: string;\n\n distanceStrategy: DistanceStrategy;\n\n k: number;\n\n fetchK: number;\n\n lambdaMult: number;\n\n indexQueryOptions: QueryOptions;\n\n /**\n * Initializes a new vector store with embeddings and database configuration.\n *\n * @param embeddings - Instance of `EmbeddingsInterface` used to embed queries.\n * @param dbConfig - Configuration settings for the database or storage system.\n */\n constructor(embeddings: EmbeddingsInterface, dbConfig: Record) {\n super(embeddings, dbConfig);\n this.embeddings = embeddings;\n this.engine = dbConfig.engine;\n this.tableName = dbConfig.tableName;\n this.schemaName = dbConfig.schemaName;\n this.contentColumn = dbConfig.contentColumn;\n this.embeddingColumn = dbConfig.embeddingColumn;\n this.metadataColumns = dbConfig.metadataColumns ? dbConfig.metadataColumns : [];\n this.ignoreMetadataColumns = dbConfig.ignoreMetadataColumns;\n this.idColumn = dbConfig.idColumn;\n this.metadataJsonColumn = dbConfig.metadataJsonColumn;\n this.distanceStrategy = dbConfig.distanceStrategy;\n this.k = dbConfig.k;\n this.fetchK = dbConfig.fetchK;\n this.lambdaMult = dbConfig.lambdaMult;\n this.indexQueryOptions = dbConfig.indexQueryOptions;\n }\n\n /**\n * Create a new PostgresVectorStore instance.\n * @param {PostgresEngine} engine Required - Connection pool engine for managing connections to Cloud SQL for PostgreSQL database.\n * @param {Embeddings} embeddings Required - Text embedding model to use.\n * @param {string} tableName Required - Name of an existing table or table to be created.\n * @param {string} schemaName Database schema name of the table. Defaults to \"public\".\n * @param {string} contentColumn Column that represent a Document's page_content. Defaults to \"content\".\n * @param {string} embeddingColumn Column for embedding vectors. The embedding is generated from the document value. Defaults to \"embedding\".\n * @param {Array} metadataColumns Column(s) that represent a document's metadata.\n * @param {Array} ignoreMetadataColumns Optional - Column(s) to ignore in pre-existing tables for a document's metadata. Can not be used with metadata_columns.\n * @param {string} idColumn Column that represents the Document's id. Defaults to \"langchain_id\".\n * @param {string} metadataJsonColumn Optional - Column to store metadata as JSON. Defaults to \"langchain_metadata\".\n * @param {DistanceStrategy} distanceStrategy Distance strategy to use for vector similarity search. Defaults to COSINE_DISTANCE.\n * @param {number} k Number of Documents to return from search. Defaults to 4.\n * @param {number} fetchK Number of Documents to fetch to pass to MMR algorithm.\n * @param {number} lambdaMult Number between 0 and 1 that determines the degree of diversity among the results with 0 corresponding to maximum diversity and 1 to minimum diversity. Defaults to 0.5.\n * @param {QueryOptions} indexQueryOptions Optional - Index query option.\n * @returns PostgresVectorStore instance.\n */\n static async create(\n engine: PostgresEngine,\n embeddings: EmbeddingsInterface,\n tableName: string,\n {\n schemaName = \"public\",\n contentColumn = \"content\",\n embeddingColumn = \"embedding\",\n metadataColumns = [],\n ignoreMetadataColumns,\n idColumn = \"langchain_id\",\n metadataJsonColumn = \"langchain_metadata\",\n distanceStrategy = DEFAULT_DISTANCE_STRATEGY,\n k = 4,\n fetchK = 20,\n lambdaMult = 0.5,\n indexQueryOptions\n }: PostgresVectorStoreArgs = {}\n ): Promise {\n\n if (metadataColumns !== undefined && ignoreMetadataColumns !== undefined) {\n throw Error(\"Can not use both metadata_columns and ignore_metadata_columns.\");\n }\n\n const { rows } = await engine.pool.raw(`SELECT column_name, data_type FROM information_schema.columns WHERE table_name = '${tableName}' AND table_schema = '${schemaName}'`);\n const columns: { [key: string] } = {};\n\n for (const index in rows) {\n if (rows[index]) {\n const row = rows[index];\n columns[row.column_name] = row.data_type\n }\n }\n\n if (!Object.prototype.hasOwnProperty.call(columns, idColumn)) {\n throw Error(`Id column: ${idColumn}, does not exist.`);\n }\n\n if (!Object.prototype.hasOwnProperty.call(columns, contentColumn)) {\n throw Error(`Content column: ${contentColumn}, does not exist.`);\n }\n\n const contentType = columns[contentColumn];\n\n if (contentType !== \"text\" && !contentType.includes(\"char\")) {\n throw Error(`Content column: ${contentColumn}, is type: ${contentType}. It must be a type of character string.`);\n }\n\n if (!Object.prototype.hasOwnProperty.call(columns, embeddingColumn)) {\n throw Error(`Embedding column: ${embeddingColumn}, does not exist.`);\n }\n\n if (columns[embeddingColumn] !== \"USER-DEFINED\") {\n throw Error(`Embedding column: ${embeddingColumn} is not of type Vector.`);\n }\n\n const jsonColumn = Object.prototype.hasOwnProperty.call(columns, metadataJsonColumn) ? metadataJsonColumn : \"\";\n\n for (const column of metadataColumns) {\n if (!Object.prototype.hasOwnProperty.call(columns, column)) {\n throw Error(`Metadata column: ${column}, does not exist.`);\n }\n }\n\n const allColumns = columns;\n let allMetadataColumns: string[];\n if (ignoreMetadataColumns !== undefined && ignoreMetadataColumns.length > 0) {\n for (const column of ignoreMetadataColumns) {\n delete allColumns[column];\n }\n\n delete allColumns[idColumn];\n delete allColumns[contentColumn];\n delete allColumns[embeddingColumn];\n allMetadataColumns = Object.keys(allColumns);\n }\n return new PostgresVectorStore(\n embeddings,\n {\n engine,\n tableName,\n schemaName,\n contentColumn,\n embeddingColumn,\n metadataColumns: allMetadataColumns,\n ignoreMetadataColumns,\n idColumn,\n metadataJsonColumn: jsonColumn,\n distanceStrategy,\n k,\n fetchK,\n lambdaMult,\n indexQueryOptions\n }\n )\n }\n\n static async fromTexts(texts: string[], metadatas: object[] | object, embeddings: EmbeddingsInterface, dbConfig: dbConfigArgs): Promise {\n const documents: Document[] = [];\n\n for (let i = 0; i < texts.length; i += 1) {\n const doc = new Document({\n pageContent: texts[i],\n metadata: Array.isArray(metadatas) ? metadatas[i] : metadatas\n })\n documents.push(doc);\n }\n\n return PostgresVectorStore.fromDocuments(documents, embeddings, dbConfig);\n }\n\n static async fromDocuments(docs: Document[], embeddings: EmbeddingsInterface, dbConfig: dbConfigArgs): Promise {\n const { engine } = dbConfig;\n const { tableName } = dbConfig;\n const config = dbConfig.dbConfig;\n const vectorStore = await this.create(engine, embeddings, tableName, config);\n\n await vectorStore.addDocuments(docs)\n\n return vectorStore;\n }\n\n async addVectors(vectors: number[][], documents: Document[], options?: { ids?: string[] }): Promise {\n let ids: string[] = [];\n const metadatas: Record[] = []\n\n if (vectors.length !== documents.length) {\n throw new Error(\"The number of vectors must match the number of documents provided.\");\n }\n\n if (options?.ids) {\n ids = options.ids;\n } else {\n documents.forEach(document => {\n if (document.id !== undefined) {\n ids.push(document.id);\n } else {\n ids.push(uuidv4());\n }\n });\n }\n\n if (options && options.ids && options.ids.length !== documents.length) {\n throw new Error(\"The number of ids must match the number of documents provided.\");\n }\n\n documents.forEach(document => {\n metadatas.push(document.metadata)\n });\n\n const tuples = customZip(ids, documents, vectors, metadatas);\n\n // Insert embeddings\n for (const [id, document, embedding, metadata] of tuples) {\n const metadataColNames = this.metadataColumns.length > 0 ? `, \"${this.metadataColumns.join(\"\\\",\\\"\")}\"` : \"\";\n\n let stmt = `INSERT INTO \"${this.schemaName}\".\"${this.tableName}\"(\"${this.idColumn}\", \"${this.contentColumn}\", \"${this.embeddingColumn}\" ${metadataColNames}`\n const values: { [key: string] } = {\n id,\n content: document.pageContent,\n embedding: `[${embedding.toString()}]`\n }\n let valuesStmt = \" VALUES (:id, :content, :embedding\";\n\n // Add metadata\n const extra = metadata;\n for (const metadataColumn of this.metadataColumns) {\n if (Object.prototype.hasOwnProperty.call(metadata, metadataColumn)) {\n valuesStmt += `, :${metadataColumn}`;\n values[metadataColumn] = metadata[metadataColumn]\n delete extra[metadataColumn]\n } else {\n valuesStmt += \" ,null\"\n }\n }\n\n // Add JSON column and/or close statement\n stmt += this.metadataJsonColumn ? `, ${this.metadataJsonColumn})` : \")\";", + "repo_full_name": "langchain-ai/langchainjs", + "discussion_comments": [ + { + "comment_id": "2004667909", + "repo_full_name": "langchain-ai/langchainjs", + "pr_number": 7852, + "pr_file": "libs/langchain-google-cloud-sql-pg/src/vectorStore.ts", + "discussion_id": "2004667909", + "commented_code": "@@ -0,0 +1,423 @@\n+import { EmbeddingsInterface } from \"@langchain/core/embeddings\";\n+import { MaxMarginalRelevanceSearchOptions, VectorStore } from \"@langchain/core/vectorstores\";\n+import { Document } from \"@langchain/core/documents\";\n+import { v4 as uuidv4 } from \"uuid\";\n+import { maximalMarginalRelevance } from \"@langchain/core/utils/math\";\n+import { DEFAULT_DISTANCE_STRATEGY, DistanceStrategy, QueryOptions } from \"./indexes.js\";\n+import PostgresEngine from \"./engine.js\";\n+import { customZip } from \"./utils/utils.js\";\n+\n+export interface PostgresVectorStoreArgs {\n+ schemaName?: string,\n+ contentColumn?: string,\n+ embeddingColumn?: string,\n+ metadataColumns?: Array,\n+ idColumn?: string,\n+ distanceStrategy?: DistanceStrategy,\n+ k?: number,\n+ fetchK?: number,\n+ lambdaMult?: number,\n+ ignoreMetadataColumns?: Array,\n+ metadataJsonColumn?: string,\n+ indexQueryOptions?: QueryOptions\n+}\n+\n+export interface dbConfigArgs {\n+ engine: PostgresEngine;\n+ tableName: string;\n+ dbConfig?: PostgresVectorStoreArgs;\n+}\n+\n+export class PostgresVectorStore extends VectorStore {\n+ declare FilterType: string;\n+\n+ engine: PostgresEngine;\n+\n+ embeddings: EmbeddingsInterface;\n+\n+ tableName: string;\n+\n+ schemaName: string;\n+\n+ contentColumn: string;\n+\n+ embeddingColumn: string;\n+\n+ metadataColumns: Array;\n+\n+ ignoreMetadataColumns: Array;\n+\n+ idColumn: string;\n+\n+ metadataJsonColumn: string;\n+\n+ distanceStrategy: DistanceStrategy;\n+\n+ k: number;\n+\n+ fetchK: number;\n+\n+ lambdaMult: number;\n+\n+ indexQueryOptions: QueryOptions;\n+\n+ /**\n+ * Initializes a new vector store with embeddings and database configuration.\n+ *\n+ * @param embeddings - Instance of `EmbeddingsInterface` used to embed queries.\n+ * @param dbConfig - Configuration settings for the database or storage system.\n+ */\n+ constructor(embeddings: EmbeddingsInterface, dbConfig: Record) {\n+ super(embeddings, dbConfig);\n+ this.embeddings = embeddings;\n+ this.engine = dbConfig.engine;\n+ this.tableName = dbConfig.tableName;\n+ this.schemaName = dbConfig.schemaName;\n+ this.contentColumn = dbConfig.contentColumn;\n+ this.embeddingColumn = dbConfig.embeddingColumn;\n+ this.metadataColumns = dbConfig.metadataColumns ? dbConfig.metadataColumns : [];\n+ this.ignoreMetadataColumns = dbConfig.ignoreMetadataColumns;\n+ this.idColumn = dbConfig.idColumn;\n+ this.metadataJsonColumn = dbConfig.metadataJsonColumn;\n+ this.distanceStrategy = dbConfig.distanceStrategy;\n+ this.k = dbConfig.k;\n+ this.fetchK = dbConfig.fetchK;\n+ this.lambdaMult = dbConfig.lambdaMult;\n+ this.indexQueryOptions = dbConfig.indexQueryOptions;\n+ }\n+\n+ /**\n+ * Create a new PostgresVectorStore instance.\n+ * @param {PostgresEngine} engine Required - Connection pool engine for managing connections to Cloud SQL for PostgreSQL database.\n+ * @param {Embeddings} embeddings Required - Text embedding model to use.\n+ * @param {string} tableName Required - Name of an existing table or table to be created.\n+ * @param {string} schemaName Database schema name of the table. Defaults to \"public\".\n+ * @param {string} contentColumn Column that represent a Document's page_content. Defaults to \"content\".\n+ * @param {string} embeddingColumn Column for embedding vectors. The embedding is generated from the document value. Defaults to \"embedding\".\n+ * @param {Array} metadataColumns Column(s) that represent a document's metadata.\n+ * @param {Array} ignoreMetadataColumns Optional - Column(s) to ignore in pre-existing tables for a document's metadata. Can not be used with metadata_columns.\n+ * @param {string} idColumn Column that represents the Document's id. Defaults to \"langchain_id\".\n+ * @param {string} metadataJsonColumn Optional - Column to store metadata as JSON. Defaults to \"langchain_metadata\".\n+ * @param {DistanceStrategy} distanceStrategy Distance strategy to use for vector similarity search. Defaults to COSINE_DISTANCE.\n+ * @param {number} k Number of Documents to return from search. Defaults to 4.\n+ * @param {number} fetchK Number of Documents to fetch to pass to MMR algorithm.\n+ * @param {number} lambdaMult Number between 0 and 1 that determines the degree of diversity among the results with 0 corresponding to maximum diversity and 1 to minimum diversity. Defaults to 0.5.\n+ * @param {QueryOptions} indexQueryOptions Optional - Index query option.\n+ * @returns PostgresVectorStore instance.\n+ */\n+ static async create(\n+ engine: PostgresEngine,\n+ embeddings: EmbeddingsInterface,\n+ tableName: string,\n+ {\n+ schemaName = \"public\",\n+ contentColumn = \"content\",\n+ embeddingColumn = \"embedding\",\n+ metadataColumns = [],\n+ ignoreMetadataColumns,\n+ idColumn = \"langchain_id\",\n+ metadataJsonColumn = \"langchain_metadata\",\n+ distanceStrategy = DEFAULT_DISTANCE_STRATEGY,\n+ k = 4,\n+ fetchK = 20,\n+ lambdaMult = 0.5,\n+ indexQueryOptions\n+ }: PostgresVectorStoreArgs = {}\n+ ): Promise {\n+\n+ if (metadataColumns !== undefined && ignoreMetadataColumns !== undefined) {\n+ throw Error(\"Can not use both metadata_columns and ignore_metadata_columns.\");\n+ }\n+\n+ const { rows } = await engine.pool.raw(`SELECT column_name, data_type FROM information_schema.columns WHERE table_name = '${tableName}' AND table_schema = '${schemaName}'`);\n+ const columns: { [key: string] } = {};\n+\n+ for (const index in rows) {\n+ if (rows[index]) {\n+ const row = rows[index];\n+ columns[row.column_name] = row.data_type\n+ }\n+ }\n+\n+ if (!Object.prototype.hasOwnProperty.call(columns, idColumn)) {\n+ throw Error(`Id column: ${idColumn}, does not exist.`);\n+ }\n+\n+ if (!Object.prototype.hasOwnProperty.call(columns, contentColumn)) {\n+ throw Error(`Content column: ${contentColumn}, does not exist.`);\n+ }\n+\n+ const contentType = columns[contentColumn];\n+\n+ if (contentType !== \"text\" && !contentType.includes(\"char\")) {\n+ throw Error(`Content column: ${contentColumn}, is type: ${contentType}. It must be a type of character string.`);\n+ }\n+\n+ if (!Object.prototype.hasOwnProperty.call(columns, embeddingColumn)) {\n+ throw Error(`Embedding column: ${embeddingColumn}, does not exist.`);\n+ }\n+\n+ if (columns[embeddingColumn] !== \"USER-DEFINED\") {\n+ throw Error(`Embedding column: ${embeddingColumn} is not of type Vector.`);\n+ }\n+\n+ const jsonColumn = Object.prototype.hasOwnProperty.call(columns, metadataJsonColumn) ? metadataJsonColumn : \"\";\n+\n+ for (const column of metadataColumns) {\n+ if (!Object.prototype.hasOwnProperty.call(columns, column)) {\n+ throw Error(`Metadata column: ${column}, does not exist.`);\n+ }\n+ }\n+\n+ const allColumns = columns;\n+ let allMetadataColumns: string[];\n+ if (ignoreMetadataColumns !== undefined && ignoreMetadataColumns.length > 0) {\n+ for (const column of ignoreMetadataColumns) {\n+ delete allColumns[column];\n+ }\n+\n+ delete allColumns[idColumn];\n+ delete allColumns[contentColumn];\n+ delete allColumns[embeddingColumn];\n+ allMetadataColumns = Object.keys(allColumns);\n+ }\n+ return new PostgresVectorStore(\n+ embeddings,\n+ {\n+ engine,\n+ tableName,\n+ schemaName,\n+ contentColumn,\n+ embeddingColumn,\n+ metadataColumns: allMetadataColumns,\n+ ignoreMetadataColumns,\n+ idColumn,\n+ metadataJsonColumn: jsonColumn,\n+ distanceStrategy,\n+ k,\n+ fetchK,\n+ lambdaMult,\n+ indexQueryOptions\n+ }\n+ )\n+ }\n+\n+ static async fromTexts(texts: string[], metadatas: object[] | object, embeddings: EmbeddingsInterface, dbConfig: dbConfigArgs): Promise {\n+ const documents: Document[] = [];\n+\n+ for (let i = 0; i < texts.length; i += 1) {\n+ const doc = new Document({\n+ pageContent: texts[i],\n+ metadata: Array.isArray(metadatas) ? metadatas[i] : metadatas\n+ })\n+ documents.push(doc);\n+ }\n+\n+ return PostgresVectorStore.fromDocuments(documents, embeddings, dbConfig);\n+ }\n+\n+ static async fromDocuments(docs: Document[], embeddings: EmbeddingsInterface, dbConfig: dbConfigArgs): Promise {\n+ const { engine } = dbConfig;\n+ const { tableName } = dbConfig;\n+ const config = dbConfig.dbConfig;\n+ const vectorStore = await this.create(engine, embeddings, tableName, config);\n+\n+ await vectorStore.addDocuments(docs)\n+\n+ return vectorStore;\n+ }\n+\n+ async addVectors(vectors: number[][], documents: Document[], options?: { ids?: string[] }): Promise {\n+ let ids: string[] = [];\n+ const metadatas: Record[] = []\n+\n+ if (vectors.length !== documents.length) {\n+ throw new Error(\"The number of vectors must match the number of documents provided.\");\n+ }\n+\n+ if (options?.ids) {\n+ ids = options.ids;\n+ } else {\n+ documents.forEach(document => {\n+ if (document.id !== undefined) {\n+ ids.push(document.id);\n+ } else {\n+ ids.push(uuidv4());\n+ }\n+ });\n+ }\n+\n+ if (options && options.ids && options.ids.length !== documents.length) {\n+ throw new Error(\"The number of ids must match the number of documents provided.\");\n+ }\n+\n+ documents.forEach(document => {\n+ metadatas.push(document.metadata)\n+ });\n+\n+ const tuples = customZip(ids, documents, vectors, metadatas);\n+\n+ // Insert embeddings\n+ for (const [id, document, embedding, metadata] of tuples) {\n+ const metadataColNames = this.metadataColumns.length > 0 ? `, \"${this.metadataColumns.join(\"\\\",\\\"\")}\"` : \"\";\n+\n+ let stmt = `INSERT INTO \"${this.schemaName}\".\"${this.tableName}\"(\"${this.idColumn}\", \"${this.contentColumn}\", \"${this.embeddingColumn}\" ${metadataColNames}`\n+ const values: { [key: string] } = {\n+ id,\n+ content: document.pageContent,\n+ embedding: `[${embedding.toString()}]`\n+ }\n+ let valuesStmt = \" VALUES (:id, :content, :embedding\";\n+\n+ // Add metadata\n+ const extra = metadata;\n+ for (const metadataColumn of this.metadataColumns) {\n+ if (Object.prototype.hasOwnProperty.call(metadata, metadataColumn)) {\n+ valuesStmt += `, :${metadataColumn}`;\n+ values[metadataColumn] = metadata[metadataColumn]\n+ delete extra[metadataColumn]\n+ } else {\n+ valuesStmt += \" ,null\"\n+ }\n+ }\n+\n+ // Add JSON column and/or close statement\n+ stmt += this.metadataJsonColumn ? `, ${this.metadataJsonColumn})` : \")\";", + "comment_created_at": "2025-03-20T02:08:43+00:00", + "comment_author": "jacoblee93", + "comment_body": "OOC why not just support `metadataJsonColumn` vs spreading metadata into other columns?", + "pr_file_module": null + }, + { + "comment_id": "2008274415", + "repo_full_name": "langchain-ai/langchainjs", + "pr_number": 7852, + "pr_file": "libs/langchain-google-cloud-sql-pg/src/vectorStore.ts", + "discussion_id": "2004667909", + "commented_code": "@@ -0,0 +1,423 @@\n+import { EmbeddingsInterface } from \"@langchain/core/embeddings\";\n+import { MaxMarginalRelevanceSearchOptions, VectorStore } from \"@langchain/core/vectorstores\";\n+import { Document } from \"@langchain/core/documents\";\n+import { v4 as uuidv4 } from \"uuid\";\n+import { maximalMarginalRelevance } from \"@langchain/core/utils/math\";\n+import { DEFAULT_DISTANCE_STRATEGY, DistanceStrategy, QueryOptions } from \"./indexes.js\";\n+import PostgresEngine from \"./engine.js\";\n+import { customZip } from \"./utils/utils.js\";\n+\n+export interface PostgresVectorStoreArgs {\n+ schemaName?: string,\n+ contentColumn?: string,\n+ embeddingColumn?: string,\n+ metadataColumns?: Array,\n+ idColumn?: string,\n+ distanceStrategy?: DistanceStrategy,\n+ k?: number,\n+ fetchK?: number,\n+ lambdaMult?: number,\n+ ignoreMetadataColumns?: Array,\n+ metadataJsonColumn?: string,\n+ indexQueryOptions?: QueryOptions\n+}\n+\n+export interface dbConfigArgs {\n+ engine: PostgresEngine;\n+ tableName: string;\n+ dbConfig?: PostgresVectorStoreArgs;\n+}\n+\n+export class PostgresVectorStore extends VectorStore {\n+ declare FilterType: string;\n+\n+ engine: PostgresEngine;\n+\n+ embeddings: EmbeddingsInterface;\n+\n+ tableName: string;\n+\n+ schemaName: string;\n+\n+ contentColumn: string;\n+\n+ embeddingColumn: string;\n+\n+ metadataColumns: Array;\n+\n+ ignoreMetadataColumns: Array;\n+\n+ idColumn: string;\n+\n+ metadataJsonColumn: string;\n+\n+ distanceStrategy: DistanceStrategy;\n+\n+ k: number;\n+\n+ fetchK: number;\n+\n+ lambdaMult: number;\n+\n+ indexQueryOptions: QueryOptions;\n+\n+ /**\n+ * Initializes a new vector store with embeddings and database configuration.\n+ *\n+ * @param embeddings - Instance of `EmbeddingsInterface` used to embed queries.\n+ * @param dbConfig - Configuration settings for the database or storage system.\n+ */\n+ constructor(embeddings: EmbeddingsInterface, dbConfig: Record) {\n+ super(embeddings, dbConfig);\n+ this.embeddings = embeddings;\n+ this.engine = dbConfig.engine;\n+ this.tableName = dbConfig.tableName;\n+ this.schemaName = dbConfig.schemaName;\n+ this.contentColumn = dbConfig.contentColumn;\n+ this.embeddingColumn = dbConfig.embeddingColumn;\n+ this.metadataColumns = dbConfig.metadataColumns ? dbConfig.metadataColumns : [];\n+ this.ignoreMetadataColumns = dbConfig.ignoreMetadataColumns;\n+ this.idColumn = dbConfig.idColumn;\n+ this.metadataJsonColumn = dbConfig.metadataJsonColumn;\n+ this.distanceStrategy = dbConfig.distanceStrategy;\n+ this.k = dbConfig.k;\n+ this.fetchK = dbConfig.fetchK;\n+ this.lambdaMult = dbConfig.lambdaMult;\n+ this.indexQueryOptions = dbConfig.indexQueryOptions;\n+ }\n+\n+ /**\n+ * Create a new PostgresVectorStore instance.\n+ * @param {PostgresEngine} engine Required - Connection pool engine for managing connections to Cloud SQL for PostgreSQL database.\n+ * @param {Embeddings} embeddings Required - Text embedding model to use.\n+ * @param {string} tableName Required - Name of an existing table or table to be created.\n+ * @param {string} schemaName Database schema name of the table. Defaults to \"public\".\n+ * @param {string} contentColumn Column that represent a Document's page_content. Defaults to \"content\".\n+ * @param {string} embeddingColumn Column for embedding vectors. The embedding is generated from the document value. Defaults to \"embedding\".\n+ * @param {Array} metadataColumns Column(s) that represent a document's metadata.\n+ * @param {Array} ignoreMetadataColumns Optional - Column(s) to ignore in pre-existing tables for a document's metadata. Can not be used with metadata_columns.\n+ * @param {string} idColumn Column that represents the Document's id. Defaults to \"langchain_id\".\n+ * @param {string} metadataJsonColumn Optional - Column to store metadata as JSON. Defaults to \"langchain_metadata\".\n+ * @param {DistanceStrategy} distanceStrategy Distance strategy to use for vector similarity search. Defaults to COSINE_DISTANCE.\n+ * @param {number} k Number of Documents to return from search. Defaults to 4.\n+ * @param {number} fetchK Number of Documents to fetch to pass to MMR algorithm.\n+ * @param {number} lambdaMult Number between 0 and 1 that determines the degree of diversity among the results with 0 corresponding to maximum diversity and 1 to minimum diversity. Defaults to 0.5.\n+ * @param {QueryOptions} indexQueryOptions Optional - Index query option.\n+ * @returns PostgresVectorStore instance.\n+ */\n+ static async create(\n+ engine: PostgresEngine,\n+ embeddings: EmbeddingsInterface,\n+ tableName: string,\n+ {\n+ schemaName = \"public\",\n+ contentColumn = \"content\",\n+ embeddingColumn = \"embedding\",\n+ metadataColumns = [],\n+ ignoreMetadataColumns,\n+ idColumn = \"langchain_id\",\n+ metadataJsonColumn = \"langchain_metadata\",\n+ distanceStrategy = DEFAULT_DISTANCE_STRATEGY,\n+ k = 4,\n+ fetchK = 20,\n+ lambdaMult = 0.5,\n+ indexQueryOptions\n+ }: PostgresVectorStoreArgs = {}\n+ ): Promise {\n+\n+ if (metadataColumns !== undefined && ignoreMetadataColumns !== undefined) {\n+ throw Error(\"Can not use both metadata_columns and ignore_metadata_columns.\");\n+ }\n+\n+ const { rows } = await engine.pool.raw(`SELECT column_name, data_type FROM information_schema.columns WHERE table_name = '${tableName}' AND table_schema = '${schemaName}'`);\n+ const columns: { [key: string] } = {};\n+\n+ for (const index in rows) {\n+ if (rows[index]) {\n+ const row = rows[index];\n+ columns[row.column_name] = row.data_type\n+ }\n+ }\n+\n+ if (!Object.prototype.hasOwnProperty.call(columns, idColumn)) {\n+ throw Error(`Id column: ${idColumn}, does not exist.`);\n+ }\n+\n+ if (!Object.prototype.hasOwnProperty.call(columns, contentColumn)) {\n+ throw Error(`Content column: ${contentColumn}, does not exist.`);\n+ }\n+\n+ const contentType = columns[contentColumn];\n+\n+ if (contentType !== \"text\" && !contentType.includes(\"char\")) {\n+ throw Error(`Content column: ${contentColumn}, is type: ${contentType}. It must be a type of character string.`);\n+ }\n+\n+ if (!Object.prototype.hasOwnProperty.call(columns, embeddingColumn)) {\n+ throw Error(`Embedding column: ${embeddingColumn}, does not exist.`);\n+ }\n+\n+ if (columns[embeddingColumn] !== \"USER-DEFINED\") {\n+ throw Error(`Embedding column: ${embeddingColumn} is not of type Vector.`);\n+ }\n+\n+ const jsonColumn = Object.prototype.hasOwnProperty.call(columns, metadataJsonColumn) ? metadataJsonColumn : \"\";\n+\n+ for (const column of metadataColumns) {\n+ if (!Object.prototype.hasOwnProperty.call(columns, column)) {\n+ throw Error(`Metadata column: ${column}, does not exist.`);\n+ }\n+ }\n+\n+ const allColumns = columns;\n+ let allMetadataColumns: string[];\n+ if (ignoreMetadataColumns !== undefined && ignoreMetadataColumns.length > 0) {\n+ for (const column of ignoreMetadataColumns) {\n+ delete allColumns[column];\n+ }\n+\n+ delete allColumns[idColumn];\n+ delete allColumns[contentColumn];\n+ delete allColumns[embeddingColumn];\n+ allMetadataColumns = Object.keys(allColumns);\n+ }\n+ return new PostgresVectorStore(\n+ embeddings,\n+ {\n+ engine,\n+ tableName,\n+ schemaName,\n+ contentColumn,\n+ embeddingColumn,\n+ metadataColumns: allMetadataColumns,\n+ ignoreMetadataColumns,\n+ idColumn,\n+ metadataJsonColumn: jsonColumn,\n+ distanceStrategy,\n+ k,\n+ fetchK,\n+ lambdaMult,\n+ indexQueryOptions\n+ }\n+ )\n+ }\n+\n+ static async fromTexts(texts: string[], metadatas: object[] | object, embeddings: EmbeddingsInterface, dbConfig: dbConfigArgs): Promise {\n+ const documents: Document[] = [];\n+\n+ for (let i = 0; i < texts.length; i += 1) {\n+ const doc = new Document({\n+ pageContent: texts[i],\n+ metadata: Array.isArray(metadatas) ? metadatas[i] : metadatas\n+ })\n+ documents.push(doc);\n+ }\n+\n+ return PostgresVectorStore.fromDocuments(documents, embeddings, dbConfig);\n+ }\n+\n+ static async fromDocuments(docs: Document[], embeddings: EmbeddingsInterface, dbConfig: dbConfigArgs): Promise {\n+ const { engine } = dbConfig;\n+ const { tableName } = dbConfig;\n+ const config = dbConfig.dbConfig;\n+ const vectorStore = await this.create(engine, embeddings, tableName, config);\n+\n+ await vectorStore.addDocuments(docs)\n+\n+ return vectorStore;\n+ }\n+\n+ async addVectors(vectors: number[][], documents: Document[], options?: { ids?: string[] }): Promise {\n+ let ids: string[] = [];\n+ const metadatas: Record[] = []\n+\n+ if (vectors.length !== documents.length) {\n+ throw new Error(\"The number of vectors must match the number of documents provided.\");\n+ }\n+\n+ if (options?.ids) {\n+ ids = options.ids;\n+ } else {\n+ documents.forEach(document => {\n+ if (document.id !== undefined) {\n+ ids.push(document.id);\n+ } else {\n+ ids.push(uuidv4());\n+ }\n+ });\n+ }\n+\n+ if (options && options.ids && options.ids.length !== documents.length) {\n+ throw new Error(\"The number of ids must match the number of documents provided.\");\n+ }\n+\n+ documents.forEach(document => {\n+ metadatas.push(document.metadata)\n+ });\n+\n+ const tuples = customZip(ids, documents, vectors, metadatas);\n+\n+ // Insert embeddings\n+ for (const [id, document, embedding, metadata] of tuples) {\n+ const metadataColNames = this.metadataColumns.length > 0 ? `, \"${this.metadataColumns.join(\"\\\",\\\"\")}\"` : \"\";\n+\n+ let stmt = `INSERT INTO \"${this.schemaName}\".\"${this.tableName}\"(\"${this.idColumn}\", \"${this.contentColumn}\", \"${this.embeddingColumn}\" ${metadataColNames}`\n+ const values: { [key: string] } = {\n+ id,\n+ content: document.pageContent,\n+ embedding: `[${embedding.toString()}]`\n+ }\n+ let valuesStmt = \" VALUES (:id, :content, :embedding\";\n+\n+ // Add metadata\n+ const extra = metadata;\n+ for (const metadataColumn of this.metadataColumns) {\n+ if (Object.prototype.hasOwnProperty.call(metadata, metadataColumn)) {\n+ valuesStmt += `, :${metadataColumn}`;\n+ values[metadataColumn] = metadata[metadataColumn]\n+ delete extra[metadataColumn]\n+ } else {\n+ valuesStmt += \" ,null\"\n+ }\n+ }\n+\n+ // Add JSON column and/or close statement\n+ stmt += this.metadataJsonColumn ? `, ${this.metadataJsonColumn})` : \")\";", + "comment_created_at": "2025-03-21T20:14:26+00:00", + "comment_author": "averikitsch", + "comment_body": "This pattern follows database best practices for relational databases by using individual columns versus all data in a JSON column. This allows for low latency filtering and type checking. ", + "pr_file_module": null + }, + { + "comment_id": "2009500129", + "repo_full_name": "langchain-ai/langchainjs", + "pr_number": 7852, + "pr_file": "libs/langchain-google-cloud-sql-pg/src/vectorStore.ts", + "discussion_id": "2004667909", + "commented_code": "@@ -0,0 +1,423 @@\n+import { EmbeddingsInterface } from \"@langchain/core/embeddings\";\n+import { MaxMarginalRelevanceSearchOptions, VectorStore } from \"@langchain/core/vectorstores\";\n+import { Document } from \"@langchain/core/documents\";\n+import { v4 as uuidv4 } from \"uuid\";\n+import { maximalMarginalRelevance } from \"@langchain/core/utils/math\";\n+import { DEFAULT_DISTANCE_STRATEGY, DistanceStrategy, QueryOptions } from \"./indexes.js\";\n+import PostgresEngine from \"./engine.js\";\n+import { customZip } from \"./utils/utils.js\";\n+\n+export interface PostgresVectorStoreArgs {\n+ schemaName?: string,\n+ contentColumn?: string,\n+ embeddingColumn?: string,\n+ metadataColumns?: Array,\n+ idColumn?: string,\n+ distanceStrategy?: DistanceStrategy,\n+ k?: number,\n+ fetchK?: number,\n+ lambdaMult?: number,\n+ ignoreMetadataColumns?: Array,\n+ metadataJsonColumn?: string,\n+ indexQueryOptions?: QueryOptions\n+}\n+\n+export interface dbConfigArgs {\n+ engine: PostgresEngine;\n+ tableName: string;\n+ dbConfig?: PostgresVectorStoreArgs;\n+}\n+\n+export class PostgresVectorStore extends VectorStore {\n+ declare FilterType: string;\n+\n+ engine: PostgresEngine;\n+\n+ embeddings: EmbeddingsInterface;\n+\n+ tableName: string;\n+\n+ schemaName: string;\n+\n+ contentColumn: string;\n+\n+ embeddingColumn: string;\n+\n+ metadataColumns: Array;\n+\n+ ignoreMetadataColumns: Array;\n+\n+ idColumn: string;\n+\n+ metadataJsonColumn: string;\n+\n+ distanceStrategy: DistanceStrategy;\n+\n+ k: number;\n+\n+ fetchK: number;\n+\n+ lambdaMult: number;\n+\n+ indexQueryOptions: QueryOptions;\n+\n+ /**\n+ * Initializes a new vector store with embeddings and database configuration.\n+ *\n+ * @param embeddings - Instance of `EmbeddingsInterface` used to embed queries.\n+ * @param dbConfig - Configuration settings for the database or storage system.\n+ */\n+ constructor(embeddings: EmbeddingsInterface, dbConfig: Record) {\n+ super(embeddings, dbConfig);\n+ this.embeddings = embeddings;\n+ this.engine = dbConfig.engine;\n+ this.tableName = dbConfig.tableName;\n+ this.schemaName = dbConfig.schemaName;\n+ this.contentColumn = dbConfig.contentColumn;\n+ this.embeddingColumn = dbConfig.embeddingColumn;\n+ this.metadataColumns = dbConfig.metadataColumns ? dbConfig.metadataColumns : [];\n+ this.ignoreMetadataColumns = dbConfig.ignoreMetadataColumns;\n+ this.idColumn = dbConfig.idColumn;\n+ this.metadataJsonColumn = dbConfig.metadataJsonColumn;\n+ this.distanceStrategy = dbConfig.distanceStrategy;\n+ this.k = dbConfig.k;\n+ this.fetchK = dbConfig.fetchK;\n+ this.lambdaMult = dbConfig.lambdaMult;\n+ this.indexQueryOptions = dbConfig.indexQueryOptions;\n+ }\n+\n+ /**\n+ * Create a new PostgresVectorStore instance.\n+ * @param {PostgresEngine} engine Required - Connection pool engine for managing connections to Cloud SQL for PostgreSQL database.\n+ * @param {Embeddings} embeddings Required - Text embedding model to use.\n+ * @param {string} tableName Required - Name of an existing table or table to be created.\n+ * @param {string} schemaName Database schema name of the table. Defaults to \"public\".\n+ * @param {string} contentColumn Column that represent a Document's page_content. Defaults to \"content\".\n+ * @param {string} embeddingColumn Column for embedding vectors. The embedding is generated from the document value. Defaults to \"embedding\".\n+ * @param {Array} metadataColumns Column(s) that represent a document's metadata.\n+ * @param {Array} ignoreMetadataColumns Optional - Column(s) to ignore in pre-existing tables for a document's metadata. Can not be used with metadata_columns.\n+ * @param {string} idColumn Column that represents the Document's id. Defaults to \"langchain_id\".\n+ * @param {string} metadataJsonColumn Optional - Column to store metadata as JSON. Defaults to \"langchain_metadata\".\n+ * @param {DistanceStrategy} distanceStrategy Distance strategy to use for vector similarity search. Defaults to COSINE_DISTANCE.\n+ * @param {number} k Number of Documents to return from search. Defaults to 4.\n+ * @param {number} fetchK Number of Documents to fetch to pass to MMR algorithm.\n+ * @param {number} lambdaMult Number between 0 and 1 that determines the degree of diversity among the results with 0 corresponding to maximum diversity and 1 to minimum diversity. Defaults to 0.5.\n+ * @param {QueryOptions} indexQueryOptions Optional - Index query option.\n+ * @returns PostgresVectorStore instance.\n+ */\n+ static async create(\n+ engine: PostgresEngine,\n+ embeddings: EmbeddingsInterface,\n+ tableName: string,\n+ {\n+ schemaName = \"public\",\n+ contentColumn = \"content\",\n+ embeddingColumn = \"embedding\",\n+ metadataColumns = [],\n+ ignoreMetadataColumns,\n+ idColumn = \"langchain_id\",\n+ metadataJsonColumn = \"langchain_metadata\",\n+ distanceStrategy = DEFAULT_DISTANCE_STRATEGY,\n+ k = 4,\n+ fetchK = 20,\n+ lambdaMult = 0.5,\n+ indexQueryOptions\n+ }: PostgresVectorStoreArgs = {}\n+ ): Promise {\n+\n+ if (metadataColumns !== undefined && ignoreMetadataColumns !== undefined) {\n+ throw Error(\"Can not use both metadata_columns and ignore_metadata_columns.\");\n+ }\n+\n+ const { rows } = await engine.pool.raw(`SELECT column_name, data_type FROM information_schema.columns WHERE table_name = '${tableName}' AND table_schema = '${schemaName}'`);\n+ const columns: { [key: string] } = {};\n+\n+ for (const index in rows) {\n+ if (rows[index]) {\n+ const row = rows[index];\n+ columns[row.column_name] = row.data_type\n+ }\n+ }\n+\n+ if (!Object.prototype.hasOwnProperty.call(columns, idColumn)) {\n+ throw Error(`Id column: ${idColumn}, does not exist.`);\n+ }\n+\n+ if (!Object.prototype.hasOwnProperty.call(columns, contentColumn)) {\n+ throw Error(`Content column: ${contentColumn}, does not exist.`);\n+ }\n+\n+ const contentType = columns[contentColumn];\n+\n+ if (contentType !== \"text\" && !contentType.includes(\"char\")) {\n+ throw Error(`Content column: ${contentColumn}, is type: ${contentType}. It must be a type of character string.`);\n+ }\n+\n+ if (!Object.prototype.hasOwnProperty.call(columns, embeddingColumn)) {\n+ throw Error(`Embedding column: ${embeddingColumn}, does not exist.`);\n+ }\n+\n+ if (columns[embeddingColumn] !== \"USER-DEFINED\") {\n+ throw Error(`Embedding column: ${embeddingColumn} is not of type Vector.`);\n+ }\n+\n+ const jsonColumn = Object.prototype.hasOwnProperty.call(columns, metadataJsonColumn) ? metadataJsonColumn : \"\";\n+\n+ for (const column of metadataColumns) {\n+ if (!Object.prototype.hasOwnProperty.call(columns, column)) {\n+ throw Error(`Metadata column: ${column}, does not exist.`);\n+ }\n+ }\n+\n+ const allColumns = columns;\n+ let allMetadataColumns: string[];\n+ if (ignoreMetadataColumns !== undefined && ignoreMetadataColumns.length > 0) {\n+ for (const column of ignoreMetadataColumns) {\n+ delete allColumns[column];\n+ }\n+\n+ delete allColumns[idColumn];\n+ delete allColumns[contentColumn];\n+ delete allColumns[embeddingColumn];\n+ allMetadataColumns = Object.keys(allColumns);\n+ }\n+ return new PostgresVectorStore(\n+ embeddings,\n+ {\n+ engine,\n+ tableName,\n+ schemaName,\n+ contentColumn,\n+ embeddingColumn,\n+ metadataColumns: allMetadataColumns,\n+ ignoreMetadataColumns,\n+ idColumn,\n+ metadataJsonColumn: jsonColumn,\n+ distanceStrategy,\n+ k,\n+ fetchK,\n+ lambdaMult,\n+ indexQueryOptions\n+ }\n+ )\n+ }\n+\n+ static async fromTexts(texts: string[], metadatas: object[] | object, embeddings: EmbeddingsInterface, dbConfig: dbConfigArgs): Promise {\n+ const documents: Document[] = [];\n+\n+ for (let i = 0; i < texts.length; i += 1) {\n+ const doc = new Document({\n+ pageContent: texts[i],\n+ metadata: Array.isArray(metadatas) ? metadatas[i] : metadatas\n+ })\n+ documents.push(doc);\n+ }\n+\n+ return PostgresVectorStore.fromDocuments(documents, embeddings, dbConfig);\n+ }\n+\n+ static async fromDocuments(docs: Document[], embeddings: EmbeddingsInterface, dbConfig: dbConfigArgs): Promise {\n+ const { engine } = dbConfig;\n+ const { tableName } = dbConfig;\n+ const config = dbConfig.dbConfig;\n+ const vectorStore = await this.create(engine, embeddings, tableName, config);\n+\n+ await vectorStore.addDocuments(docs)\n+\n+ return vectorStore;\n+ }\n+\n+ async addVectors(vectors: number[][], documents: Document[], options?: { ids?: string[] }): Promise {\n+ let ids: string[] = [];\n+ const metadatas: Record[] = []\n+\n+ if (vectors.length !== documents.length) {\n+ throw new Error(\"The number of vectors must match the number of documents provided.\");\n+ }\n+\n+ if (options?.ids) {\n+ ids = options.ids;\n+ } else {\n+ documents.forEach(document => {\n+ if (document.id !== undefined) {\n+ ids.push(document.id);\n+ } else {\n+ ids.push(uuidv4());\n+ }\n+ });\n+ }\n+\n+ if (options && options.ids && options.ids.length !== documents.length) {\n+ throw new Error(\"The number of ids must match the number of documents provided.\");\n+ }\n+\n+ documents.forEach(document => {\n+ metadatas.push(document.metadata)\n+ });\n+\n+ const tuples = customZip(ids, documents, vectors, metadatas);\n+\n+ // Insert embeddings\n+ for (const [id, document, embedding, metadata] of tuples) {\n+ const metadataColNames = this.metadataColumns.length > 0 ? `, \"${this.metadataColumns.join(\"\\\",\\\"\")}\"` : \"\";\n+\n+ let stmt = `INSERT INTO \"${this.schemaName}\".\"${this.tableName}\"(\"${this.idColumn}\", \"${this.contentColumn}\", \"${this.embeddingColumn}\" ${metadataColNames}`\n+ const values: { [key: string] } = {\n+ id,\n+ content: document.pageContent,\n+ embedding: `[${embedding.toString()}]`\n+ }\n+ let valuesStmt = \" VALUES (:id, :content, :embedding\";\n+\n+ // Add metadata\n+ const extra = metadata;\n+ for (const metadataColumn of this.metadataColumns) {\n+ if (Object.prototype.hasOwnProperty.call(metadata, metadataColumn)) {\n+ valuesStmt += `, :${metadataColumn}`;\n+ values[metadataColumn] = metadata[metadataColumn]\n+ delete extra[metadataColumn]\n+ } else {\n+ valuesStmt += \" ,null\"\n+ }\n+ }\n+\n+ // Add JSON column and/or close statement\n+ stmt += this.metadataJsonColumn ? `, ${this.metadataJsonColumn})` : \")\";", + "comment_created_at": "2025-03-24T05:49:51+00:00", + "comment_author": "jacoblee93", + "comment_body": "And the JSON column is just to make onboarding easier?", + "pr_file_module": null + }, + { + "comment_id": "2017533960", + "repo_full_name": "langchain-ai/langchainjs", + "pr_number": 7852, + "pr_file": "libs/langchain-google-cloud-sql-pg/src/vectorStore.ts", + "discussion_id": "2004667909", + "commented_code": "@@ -0,0 +1,423 @@\n+import { EmbeddingsInterface } from \"@langchain/core/embeddings\";\n+import { MaxMarginalRelevanceSearchOptions, VectorStore } from \"@langchain/core/vectorstores\";\n+import { Document } from \"@langchain/core/documents\";\n+import { v4 as uuidv4 } from \"uuid\";\n+import { maximalMarginalRelevance } from \"@langchain/core/utils/math\";\n+import { DEFAULT_DISTANCE_STRATEGY, DistanceStrategy, QueryOptions } from \"./indexes.js\";\n+import PostgresEngine from \"./engine.js\";\n+import { customZip } from \"./utils/utils.js\";\n+\n+export interface PostgresVectorStoreArgs {\n+ schemaName?: string,\n+ contentColumn?: string,\n+ embeddingColumn?: string,\n+ metadataColumns?: Array,\n+ idColumn?: string,\n+ distanceStrategy?: DistanceStrategy,\n+ k?: number,\n+ fetchK?: number,\n+ lambdaMult?: number,\n+ ignoreMetadataColumns?: Array,\n+ metadataJsonColumn?: string,\n+ indexQueryOptions?: QueryOptions\n+}\n+\n+export interface dbConfigArgs {\n+ engine: PostgresEngine;\n+ tableName: string;\n+ dbConfig?: PostgresVectorStoreArgs;\n+}\n+\n+export class PostgresVectorStore extends VectorStore {\n+ declare FilterType: string;\n+\n+ engine: PostgresEngine;\n+\n+ embeddings: EmbeddingsInterface;\n+\n+ tableName: string;\n+\n+ schemaName: string;\n+\n+ contentColumn: string;\n+\n+ embeddingColumn: string;\n+\n+ metadataColumns: Array;\n+\n+ ignoreMetadataColumns: Array;\n+\n+ idColumn: string;\n+\n+ metadataJsonColumn: string;\n+\n+ distanceStrategy: DistanceStrategy;\n+\n+ k: number;\n+\n+ fetchK: number;\n+\n+ lambdaMult: number;\n+\n+ indexQueryOptions: QueryOptions;\n+\n+ /**\n+ * Initializes a new vector store with embeddings and database configuration.\n+ *\n+ * @param embeddings - Instance of `EmbeddingsInterface` used to embed queries.\n+ * @param dbConfig - Configuration settings for the database or storage system.\n+ */\n+ constructor(embeddings: EmbeddingsInterface, dbConfig: Record) {\n+ super(embeddings, dbConfig);\n+ this.embeddings = embeddings;\n+ this.engine = dbConfig.engine;\n+ this.tableName = dbConfig.tableName;\n+ this.schemaName = dbConfig.schemaName;\n+ this.contentColumn = dbConfig.contentColumn;\n+ this.embeddingColumn = dbConfig.embeddingColumn;\n+ this.metadataColumns = dbConfig.metadataColumns ? dbConfig.metadataColumns : [];\n+ this.ignoreMetadataColumns = dbConfig.ignoreMetadataColumns;\n+ this.idColumn = dbConfig.idColumn;\n+ this.metadataJsonColumn = dbConfig.metadataJsonColumn;\n+ this.distanceStrategy = dbConfig.distanceStrategy;\n+ this.k = dbConfig.k;\n+ this.fetchK = dbConfig.fetchK;\n+ this.lambdaMult = dbConfig.lambdaMult;\n+ this.indexQueryOptions = dbConfig.indexQueryOptions;\n+ }\n+\n+ /**\n+ * Create a new PostgresVectorStore instance.\n+ * @param {PostgresEngine} engine Required - Connection pool engine for managing connections to Cloud SQL for PostgreSQL database.\n+ * @param {Embeddings} embeddings Required - Text embedding model to use.\n+ * @param {string} tableName Required - Name of an existing table or table to be created.\n+ * @param {string} schemaName Database schema name of the table. Defaults to \"public\".\n+ * @param {string} contentColumn Column that represent a Document's page_content. Defaults to \"content\".\n+ * @param {string} embeddingColumn Column for embedding vectors. The embedding is generated from the document value. Defaults to \"embedding\".\n+ * @param {Array} metadataColumns Column(s) that represent a document's metadata.\n+ * @param {Array} ignoreMetadataColumns Optional - Column(s) to ignore in pre-existing tables for a document's metadata. Can not be used with metadata_columns.\n+ * @param {string} idColumn Column that represents the Document's id. Defaults to \"langchain_id\".\n+ * @param {string} metadataJsonColumn Optional - Column to store metadata as JSON. Defaults to \"langchain_metadata\".\n+ * @param {DistanceStrategy} distanceStrategy Distance strategy to use for vector similarity search. Defaults to COSINE_DISTANCE.\n+ * @param {number} k Number of Documents to return from search. Defaults to 4.\n+ * @param {number} fetchK Number of Documents to fetch to pass to MMR algorithm.\n+ * @param {number} lambdaMult Number between 0 and 1 that determines the degree of diversity among the results with 0 corresponding to maximum diversity and 1 to minimum diversity. Defaults to 0.5.\n+ * @param {QueryOptions} indexQueryOptions Optional - Index query option.\n+ * @returns PostgresVectorStore instance.\n+ */\n+ static async create(\n+ engine: PostgresEngine,\n+ embeddings: EmbeddingsInterface,\n+ tableName: string,\n+ {\n+ schemaName = \"public\",\n+ contentColumn = \"content\",\n+ embeddingColumn = \"embedding\",\n+ metadataColumns = [],\n+ ignoreMetadataColumns,\n+ idColumn = \"langchain_id\",\n+ metadataJsonColumn = \"langchain_metadata\",\n+ distanceStrategy = DEFAULT_DISTANCE_STRATEGY,\n+ k = 4,\n+ fetchK = 20,\n+ lambdaMult = 0.5,\n+ indexQueryOptions\n+ }: PostgresVectorStoreArgs = {}\n+ ): Promise {\n+\n+ if (metadataColumns !== undefined && ignoreMetadataColumns !== undefined) {\n+ throw Error(\"Can not use both metadata_columns and ignore_metadata_columns.\");\n+ }\n+\n+ const { rows } = await engine.pool.raw(`SELECT column_name, data_type FROM information_schema.columns WHERE table_name = '${tableName}' AND table_schema = '${schemaName}'`);\n+ const columns: { [key: string] } = {};\n+\n+ for (const index in rows) {\n+ if (rows[index]) {\n+ const row = rows[index];\n+ columns[row.column_name] = row.data_type\n+ }\n+ }\n+\n+ if (!Object.prototype.hasOwnProperty.call(columns, idColumn)) {\n+ throw Error(`Id column: ${idColumn}, does not exist.`);\n+ }\n+\n+ if (!Object.prototype.hasOwnProperty.call(columns, contentColumn)) {\n+ throw Error(`Content column: ${contentColumn}, does not exist.`);\n+ }\n+\n+ const contentType = columns[contentColumn];\n+\n+ if (contentType !== \"text\" && !contentType.includes(\"char\")) {\n+ throw Error(`Content column: ${contentColumn}, is type: ${contentType}. It must be a type of character string.`);\n+ }\n+\n+ if (!Object.prototype.hasOwnProperty.call(columns, embeddingColumn)) {\n+ throw Error(`Embedding column: ${embeddingColumn}, does not exist.`);\n+ }\n+\n+ if (columns[embeddingColumn] !== \"USER-DEFINED\") {\n+ throw Error(`Embedding column: ${embeddingColumn} is not of type Vector.`);\n+ }\n+\n+ const jsonColumn = Object.prototype.hasOwnProperty.call(columns, metadataJsonColumn) ? metadataJsonColumn : \"\";\n+\n+ for (const column of metadataColumns) {\n+ if (!Object.prototype.hasOwnProperty.call(columns, column)) {\n+ throw Error(`Metadata column: ${column}, does not exist.`);\n+ }\n+ }\n+\n+ const allColumns = columns;\n+ let allMetadataColumns: string[];\n+ if (ignoreMetadataColumns !== undefined && ignoreMetadataColumns.length > 0) {\n+ for (const column of ignoreMetadataColumns) {\n+ delete allColumns[column];\n+ }\n+\n+ delete allColumns[idColumn];\n+ delete allColumns[contentColumn];\n+ delete allColumns[embeddingColumn];\n+ allMetadataColumns = Object.keys(allColumns);\n+ }\n+ return new PostgresVectorStore(\n+ embeddings,\n+ {\n+ engine,\n+ tableName,\n+ schemaName,\n+ contentColumn,\n+ embeddingColumn,\n+ metadataColumns: allMetadataColumns,\n+ ignoreMetadataColumns,\n+ idColumn,\n+ metadataJsonColumn: jsonColumn,\n+ distanceStrategy,\n+ k,\n+ fetchK,\n+ lambdaMult,\n+ indexQueryOptions\n+ }\n+ )\n+ }\n+\n+ static async fromTexts(texts: string[], metadatas: object[] | object, embeddings: EmbeddingsInterface, dbConfig: dbConfigArgs): Promise {\n+ const documents: Document[] = [];\n+\n+ for (let i = 0; i < texts.length; i += 1) {\n+ const doc = new Document({\n+ pageContent: texts[i],\n+ metadata: Array.isArray(metadatas) ? metadatas[i] : metadatas\n+ })\n+ documents.push(doc);\n+ }\n+\n+ return PostgresVectorStore.fromDocuments(documents, embeddings, dbConfig);\n+ }\n+\n+ static async fromDocuments(docs: Document[], embeddings: EmbeddingsInterface, dbConfig: dbConfigArgs): Promise {\n+ const { engine } = dbConfig;\n+ const { tableName } = dbConfig;\n+ const config = dbConfig.dbConfig;\n+ const vectorStore = await this.create(engine, embeddings, tableName, config);\n+\n+ await vectorStore.addDocuments(docs)\n+\n+ return vectorStore;\n+ }\n+\n+ async addVectors(vectors: number[][], documents: Document[], options?: { ids?: string[] }): Promise {\n+ let ids: string[] = [];\n+ const metadatas: Record[] = []\n+\n+ if (vectors.length !== documents.length) {\n+ throw new Error(\"The number of vectors must match the number of documents provided.\");\n+ }\n+\n+ if (options?.ids) {\n+ ids = options.ids;\n+ } else {\n+ documents.forEach(document => {\n+ if (document.id !== undefined) {\n+ ids.push(document.id);\n+ } else {\n+ ids.push(uuidv4());\n+ }\n+ });\n+ }\n+\n+ if (options && options.ids && options.ids.length !== documents.length) {\n+ throw new Error(\"The number of ids must match the number of documents provided.\");\n+ }\n+\n+ documents.forEach(document => {\n+ metadatas.push(document.metadata)\n+ });\n+\n+ const tuples = customZip(ids, documents, vectors, metadatas);\n+\n+ // Insert embeddings\n+ for (const [id, document, embedding, metadata] of tuples) {\n+ const metadataColNames = this.metadataColumns.length > 0 ? `, \"${this.metadataColumns.join(\"\\\",\\\"\")}\"` : \"\";\n+\n+ let stmt = `INSERT INTO \"${this.schemaName}\".\"${this.tableName}\"(\"${this.idColumn}\", \"${this.contentColumn}\", \"${this.embeddingColumn}\" ${metadataColNames}`\n+ const values: { [key: string] } = {\n+ id,\n+ content: document.pageContent,\n+ embedding: `[${embedding.toString()}]`\n+ }\n+ let valuesStmt = \" VALUES (:id, :content, :embedding\";\n+\n+ // Add metadata\n+ const extra = metadata;\n+ for (const metadataColumn of this.metadataColumns) {\n+ if (Object.prototype.hasOwnProperty.call(metadata, metadataColumn)) {\n+ valuesStmt += `, :${metadataColumn}`;\n+ values[metadataColumn] = metadata[metadataColumn]\n+ delete extra[metadataColumn]\n+ } else {\n+ valuesStmt += \" ,null\"\n+ }\n+ }\n+\n+ // Add JSON column and/or close statement\n+ stmt += this.metadataJsonColumn ? `, ${this.metadataJsonColumn})` : \")\";", + "comment_created_at": "2025-03-27T20:07:53+00:00", + "comment_author": "averikitsch", + "comment_body": "Yes and we don't want users to lose metadata if not specifically defined ", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1456406645", + "pr_number": 4044, + "pr_file": "libs/langchain-community/src/vectorstores/azure_aisearch.ts", + "created_at": "2024-01-17T19:58:12+00:00", + "commented_code": "import * as uuid from \"uuid\";\nimport {\n SearchClient,\n SearchIndexClient,\n AzureKeyCredential,\n IndexingResult,\n SearchIndex,\n} from \"@azure/search-documents\";\nimport {\n MaxMarginalRelevanceSearchOptions,\n VectorStore,\n} from \"@langchain/core/vectorstores\";\nimport type { EmbeddingsInterface } from \"@langchain/core/embeddings\";\nimport { Document } from \"@langchain/core/documents\";\nimport { maximalMarginalRelevance } from \"@langchain/core/utils/math\";\nimport { getEnvironmentVariable } from \"@langchain/core/utils/env\";\n\n/**\n * Azure AI Search query type.\n */\nexport const AzureAISearchQueryType = {\n /** Vector search. */\n Similarity: \"similarity\",\n /** Hybrid full text and vector search. */\n SimilarityHybrid: \"similarity_hybrid\",\n /** Hybrid full text and vector search with semantic ranking. */\n SemanticHybrid: \"semantic_hybrid\",\n} as const;\n\n/**\n * Azure AI Search query type.\n */\nexport type AzureAISearchQueryType =\n (typeof AzureAISearchQueryType)[keyof typeof AzureAISearchQueryType];\n\n/**\n * Azure AI Search settings.\n */\nexport interface AzureAISearchQueryOptions {\n readonly type: AzureAISearchQueryType;\n readonly semanticConfigurationName?: string;\n}\n\n/**\n * Configuration options for the `AzureAISearchStore` constructor.\n */\nexport interface AzureAISearchConfig {\n readonly client?: SearchClient;\n readonly indexName?: string;\n readonly endpoint?: string;\n readonly key?: string;\n readonly search: AzureAISearchQueryOptions;\n /**\n * The amount of documents to chunk by when adding vectors.\n * @default 100\n */\n readonly chunkSize?: number;\n /**\n * The amount of documents to embed at once when adding documents.\n * Note that some providers like Azure OpenAI can only embed 16 documents\n * at a time.\n * @default 16\n */\n readonly embeddingBatchSize?: number;\n}\n\n/**\n * Azure AI Search options metadata schema.\n * If yout want to add custom data, use the attributes property.\n */\nexport type AzureAISearchDocumentMetadata = {\n source: string;\n attributes?: Array<{ key: string; value: string }>;\n};\n\n/**\n * Azure AI Search indexed document.\n */\nexport type AzureAISearchDocument = {\n id: string;\n content: string;\n content_vector: number[];\n metadata: AzureAISearchDocumentMetadata;\n};\n\n/**\n * Azure AI Search options for adding documents.\n */\nexport type AzureAISearchAddDocumentsOptions = {\n ids?: string[];\n};\n\nconst DEFAULT_FIELD_ID = \"id\";\nconst DEFAULT_FIELD_CONTENT = \"content\";\nconst DEFAULT_FIELD_CONTENT_VECTOR = \"content_vector\";\nconst DEFAULT_FIELD_METADATA = \"metadata\";\nconst DEFAULT_FIELD_METADATA_SOURCE = \"source\";\nconst DEFAULT_FIELD_METADATA_ATTRS = \"attributes\";\n\n/**\n * Azure AI Search vector store.\n * To use this, you should have:\n * - the `@azure/search-documents` NPM package installed\n * - an endpoint and key to the Azure AI Search instance\n *\n * If you directly provide a `SearchClient` instance, you need to ensure that\n * an index has been created. When using and endpoint and key, the index will\n * be created automatically if it does not exist.\n */\nexport class AzureAISearchVectorStore extends VectorStore {\n declare FilterType: string;\n\n get lc_secrets(): { [key: string]: string } {\n return {\n endpoint: \"AZURE_AISEARCH_ENDPOINT\",\n key: \"AZURE_AISEARCH_KEY\",\n };\n }\n\n _vectorstoreType(): string {\n return \"azure_aisearch\";\n }\n\n private readonly initPromise: Promise;\n\n private readonly client: SearchClient;\n\n private readonly indexName: string;\n\n private readonly chunkSize: number;\n\n private readonly embeddingBatchSize: number;\n\n private readonly options: AzureAISearchQueryOptions;\n\n constructor(embeddings: EmbeddingsInterface, config: AzureAISearchConfig) {\n super(embeddings, config);\n\n const endpoint =\n config.endpoint ?? getEnvironmentVariable(\"AZURE_AISEARCH_ENDPOINT\");\n const key = config.key ?? getEnvironmentVariable(\"AZURE_AISEARCH_KEY\");\n\n if (!config.client && !endpoint && !key) {\n throw new Error(\n \"Azure AI Search client or connection string must be set.\"\n );\n }\n\n this.indexName = config.indexName ?? \"vectorsearch\";\n this.chunkSize = config.chunkSize ?? 100;\n this.embeddingBatchSize = config.embeddingBatchSize ?? 16;\n\n if (!config.client) {\n // eslint-disable-next-line @typescript-eslint/no-non-null-assertion\n const credential = new AzureKeyCredential(key!);\n // eslint-disable-next-line @typescript-eslint/no-non-null-assertion\n this.client = new SearchClient(endpoint!, this.indexName, credential);\n // eslint-disable-next-line @typescript-eslint/no-non-null-assertion\n const indexClient = new SearchIndexClient(endpoint!, credential);\n\n // Start initialization, but don't wait for it to finish here\n this.initPromise = this.ensureIndexExists(indexClient).catch((error) => {\n console.error(\n \"Error during Azure AI Search index initialization:\",\n error\n );\n });\n } else {\n this.client = config.client;\n }\n\n this.options = config.search;\n this.embeddings = embeddings;\n }\n\n /**\n * Removes specified documents from the AzureAISearchVectorStore using a filter.\n * @param filter OData filter to find documents to delete.\n * @returns A promise that resolves when the documents have been removed.\n */\n async deleteMany(filter: string): Promise {\n const { results } = await this.client.search(\"*\", {\n filter,\n });\n\n const ids: string[] = [];\n for await (const item of results) {\n ids.push(item.document.id);\n }\n\n const { results: deleteResults } = await this.client.deleteDocuments(\n DEFAULT_FIELD_ID,\n ids\n );\n return deleteResults;\n }\n\n /**\n * Removes specified documents from the AzureAISearchVectorStore.\n * @param ids IDs of the documents to be removed.\n * @returns A promise that resolves when the documents have been removed.\n */\n async deleteById(ids: string | string[]): Promise {\n await this.initPromise;\n const { results } = await this.client.deleteDocuments(\n DEFAULT_FIELD_ID,\n Array.isArray(ids) ? ids : [ids]\n );\n return results;\n }\n\n /**\n * Adds documents to the AzureAISearchVectorStore.\n * Documents are chunked into batches of size `embeddingBatchSize` then\n * embedded and added to the AzureAISearchVectorStore.\n * @param documents The documents to add.\n * @param options Options for adding documents.\n * @returns A promise that resolves to the ids of the added documents.\n */\n async addDocuments(\n documents: Document[],\n options?: AzureAISearchAddDocumentsOptions\n ) {\n const texts = documents.map(({ pageContent }) => pageContent);\n const results: string[] = [];\n\n for (let i = 0; i < texts.length; i += this.embeddingBatchSize) {\n const batch = texts.slice(i, i + this.embeddingBatchSize);\n const docsBatch = documents.slice(i, i + this.embeddingBatchSize);\n const batchEmbeddings: number[][] = await this.embeddings.embedDocuments(\n batch\n );\n const batchResult = await this.addVectors(\n batchEmbeddings,\n docsBatch,\n options\n );\n\n results.push(...batchResult);\n }\n\n return results;\n }\n\n /**\n * Adds vectors to the AzureAISearchVectorStore.\n * @param vectors Vectors to be added.\n * @param documents Corresponding documents to be added.\n * @param options Options for adding documents.\n * @returns A promise that resolves to the ids of the added documents.\n */\n async addVectors(\n vectors: number[][],\n documents: Document[],\n options?: AzureAISearchAddDocumentsOptions\n ): Promise {\n const ids = options?.ids ?? documents.map(() => uuid.v4());\n const entities: AzureAISearchDocument[] = documents.map((doc, idx) => ({\n id: ids[idx],\n content: doc.pageContent,\n content_vector: vectors[idx],\n metadata: {\n source: doc.metadata?.source,\n attributes: doc.metadata?.attributes ?? [],\n },\n }));\n\n await this.initPromise;\n for (let i = 0; i < entities.length; i += this.chunkSize) {\n const chunk = entities.slice(i, i + this.chunkSize);\n await this.client.uploadDocuments(chunk, { throwOnAnyFailure: true });\n }\n\n return ids;\n }\n\n /**\n * Performs a similarity search using query type specified in configuration.\n * @param query Query text for the similarity search.\n * @param k=4 Number of nearest neighbors to return.\n * @param filter Optional OData filter for the documents.\n * @returns Promise that resolves to a list of documents and their corresponding similarity scores.\n */\n async similaritySearch(\n query: string,\n k = 4,\n filter: this[\"FilterType\"] | undefined = undefined\n ): Promise {\n const results = await this.similaritySearchWithScore(query, k, filter);\n\n return results.map((result) => result[0]);\n }\n\n /**\n * Performs a similarity search using query type specified in configuration.\n * @param query Query text for the similarity search.\n * @param k=4 Number of nearest neighbors to return.\n * @param filter Optional OData filter for the documents.\n * @returns Promise that resolves to a list of documents and their corresponding similarity scores.\n */\n async similaritySearchWithScore(\n query: string,\n k = 4,\n filter: this[\"FilterType\"] | undefined = undefined\n ): Promise<[Document, number][]> {\n const searchType = this.options.type;\n\n if (searchType === AzureAISearchQueryType.Similarity) {\n return this.similaritySearchVectorWithScore(\n await this.embeddings.embedQuery(query),\n k,\n filter\n );\n } else if (searchType === AzureAISearchQueryType.SimilarityHybrid) {\n return this.hybridSearchVectorWithScore(\n query,\n await this.embeddings.embedQuery(query),\n k,\n filter\n );\n } else if (searchType === AzureAISearchQueryType.SemanticHybrid) {\n return this.semanticHybridSearchVectorWithScore(\n query,\n await this.embeddings.embedQuery(query),\n k,\n filter\n );\n }\n\n throw new Error(`Unrecognized search type '${searchType}'`);\n }\n\n /**\n * Performs a hybrid search using query text.\n * @param query Query text for the similarity search.\n * @param queryVector Query vector for the similarity search.\n * If not provided, the query text will be embedded.\n * @param k=4 Number of nearest neighbors to return.\n * @param filter Optional OData filter for the documents.\n * @returns Promise that resolves to a list of documents and their corresponding similarity scores.\n */\n async hybridSearchVectorWithScore(\n query: string,\n queryVector?: number[],\n k = 4,\n filter: string | undefined = undefined\n ): Promise<[Document, number][]> {\n const vector = queryVector ?? (await this.embeddings.embedQuery(query));\n\n await this.initPromise;\n const { results } = await this.client.search(query, {\n vectorSearchOptions: {\n queries: [\n {\n kind: \"vector\",\n vector,\n kNearestNeighborsCount: k,\n fields: [DEFAULT_FIELD_CONTENT_VECTOR],\n },\n ],\n },\n filter,\n top: k,\n });\n\n const docsWithScore: [Document, number][] = [];\n\n for await (const item of results) {\n const document = new Document<\n AzureAISearchDocumentMetadata & { embedding: number[] }\n >({\n pageContent: item.document[DEFAULT_FIELD_CONTENT],\n metadata: {\n ...item.document[DEFAULT_FIELD_METADATA],\n embedding: item.document[DEFAULT_FIELD_CONTENT_VECTOR],\n },\n });\n docsWithScore.push([document, item.score]);\n }\n\n return docsWithScore;\n }\n\n /**\n * Performs a hybrid search with semantic reranker using query text.\n * @param query Query text for the similarity search.\n * @param queryVector Query vector for the similarity search.\n * If not provided, the query text will be embedded.\n * @param k=4 Number of nearest neighbors to return.\n * @param filter Optional OData filter for the documents.\n * @returns Promise that resolves to a list of documents and their corresponding similarity scores.\n */\n async semanticHybridSearchVectorWithScore(\n query: string,\n queryVector?: number[],\n k = 4,\n filter: string | undefined = undefined\n ): Promise<[Document, number][]> {\n const vector = queryVector ?? (await this.embeddings.embedQuery(query));\n\n await this.initPromise;\n const { results } = await this.client.search(query, {\n vectorSearchOptions: {\n queries: [\n {\n kind: \"vector\",\n vector,\n kNearestNeighborsCount: k,\n fields: [DEFAULT_FIELD_CONTENT_VECTOR],\n },\n ],\n },\n filter,\n top: k,\n queryType: \"semantic\",\n semanticSearchOptions: {\n configurationName: \"semantic-search-config\",\n captions: {\n captionType: \"extractive\",\n },\n answers: {\n answerType: \"extractive\",\n },\n },\n });\n\n const docsWithScore: [Document, number][] = [];\n\n for await (const item of results) {\n const document = new Document<\n AzureAISearchDocumentMetadata & { embedding: number[] }\n >({\n pageContent: item.document[DEFAULT_FIELD_CONTENT],\n metadata: {\n ...item.document[DEFAULT_FIELD_METADATA],\n embedding: item.document[DEFAULT_FIELD_CONTENT_VECTOR],\n },\n });\n docsWithScore.push([document, item.score]);\n }\n\n return docsWithScore;\n }\n\n /**\n * Performs a similarity search on the vectors stored in the collection.\n * @param queryVector Query vector for the similarity search.\n * @param k=4 Number of nearest neighbors to return.\n * @param filter string OData filter for the documents.\n * @returns Promise that resolves to a list of documents and their corresponding similarity scores.\n */\n async similaritySearchVectorWithScore(\n query: number[],\n k: number,\n filter?: string\n ): Promise<[Document, number][]> {\n await this.initPromise;\n\n const { results } = await this.client.search(\"*\", {\n vectorSearchOptions: {\n queries: [\n {\n kind: \"vector\",\n vector: query,\n kNearestNeighborsCount: k,\n fields: [DEFAULT_FIELD_CONTENT_VECTOR],\n },\n ],\n },\n filter,\n });\n\n const docsWithScore: [Document, number][] = [];\n\n for await (const item of results) {\n const document = new Document<\n AzureAISearchDocumentMetadata & { embedding: number[] }\n >({\n pageContent: item.document[DEFAULT_FIELD_CONTENT],\n metadata: {\n ...item.document[DEFAULT_FIELD_METADATA],\n embedding: item.document[DEFAULT_FIELD_CONTENT_VECTOR],\n },\n });\n docsWithScore.push([document, item.score]);\n }\n\n return docsWithScore;\n }\n\n /**\n * Return documents selected using the maximal marginal relevance.\n * Maximal marginal relevance optimizes for similarity to the query AND\n * diversity among selected documents.\n * @param query Text to look up documents similar to.\n * @param options.k Number of documents to return.\n * @param options.fetchK=20 Number of documents to fetch before passing to\n * the MMR algorithm.\n * @param options.lambda=0.5 Number between 0 and 1 that determines the\n * degree of diversity among the results, where 0 corresponds to maximum\n * diversity and 1 to minimum diversity.\n * @returns List of documents selected by maximal marginal relevance.\n */\n async maxMarginalRelevanceSearch(\n query: string,\n options: MaxMarginalRelevanceSearchOptions\n ): Promise {\n const { k, fetchK = 20, lambda = 0.5 } = options;\n\n const queryEmbedding = await this.embeddings.embedQuery(query);\n const docs = await this.similaritySearchVectorWithScore(\n queryEmbedding,\n fetchK\n );\n const embeddingList = docs.map((doc) => doc[0].metadata.embedding);\n\n // Re-rank the results using MMR\n const mmrIndexes = maximalMarginalRelevance(\n queryEmbedding,\n embeddingList,\n lambda,\n k\n );\n\n const mmrDocs = mmrIndexes.map((index) => docs[index][0]);\n return mmrDocs;\n }\n\n /**\n * Ensures that an index exists on the AzureAISearchVectorStore.\n * @param indexClient The Azure AI Search index client.\n * @returns A promise that resolves when the AzureAISearchVectorStore index has been initialized.\n * @protected\n */\n protected async ensureIndexExists(\n indexClient: SearchIndexClient\n ): Promise {\n try {\n await indexClient.getIndex(this.indexName);\n } catch (e) {\n // Index does not exists, create it\n const searchIndex = this.createSearchIndexDefinition(this.indexName);\n await indexClient.createIndex(searchIndex);\n }\n }\n\n /**\n * Prepares the search index definition for Azure AI Search.\n * @param indexName The name of the index.\n * @returns The SearchIndex object.\n * @protected\n */\n protected createSearchIndexDefinition(indexName: string): SearchIndex {\n return {\n name: indexName,\n vectorSearch: {\n algorithms: [\n {\n name: \"vector-search-algorithm\",\n kind: \"hnsw\",\n parameters: {\n m: 4,\n efSearch: 500,\n metric: \"cosine\",\n efConstruction: 400,\n },\n },\n ],\n profiles: [\n {\n name: \"vector-search-profile\",\n algorithmConfigurationName: \"vector-search-algorithm\",\n },\n ],\n },\n semanticSearch: {\n defaultConfigurationName: \"semantic-search-config\",\n configurations: [\n {\n name: \"semantic-search-config\",\n prioritizedFields: {\n contentFields: [\n {\n name: DEFAULT_FIELD_CONTENT,\n },\n ],\n keywordsFields: [\n {\n name: DEFAULT_FIELD_CONTENT,\n },\n ],\n },\n },\n ],\n },\n fields: [\n {\n name: DEFAULT_FIELD_ID,\n filterable: true,\n key: true,\n type: \"Edm.String\",\n },\n {\n name: DEFAULT_FIELD_CONTENT,\n searchable: true,\n filterable: true,\n type: \"Edm.String\",\n },\n {\n name: DEFAULT_FIELD_CONTENT_VECTOR,\n searchable: true,\n type: \"Collection(Edm.Single)\",\n vectorSearchDimensions: 1536,\n vectorSearchProfileName: \"vector-search-profile\",\n },\n {\n name: DEFAULT_FIELD_METADATA,\n type: \"Edm.ComplexType\",", + "repo_full_name": "langchain-ai/langchainjs", + "discussion_comments": [ + { + "comment_id": "1456406645", + "repo_full_name": "langchain-ai/langchainjs", + "pr_number": 4044, + "pr_file": "libs/langchain-community/src/vectorstores/azure_aisearch.ts", + "discussion_id": "1456406645", + "commented_code": "@@ -0,0 +1,694 @@\n+import * as uuid from \"uuid\";\n+import {\n+ SearchClient,\n+ SearchIndexClient,\n+ AzureKeyCredential,\n+ IndexingResult,\n+ SearchIndex,\n+} from \"@azure/search-documents\";\n+import {\n+ MaxMarginalRelevanceSearchOptions,\n+ VectorStore,\n+} from \"@langchain/core/vectorstores\";\n+import type { EmbeddingsInterface } from \"@langchain/core/embeddings\";\n+import { Document } from \"@langchain/core/documents\";\n+import { maximalMarginalRelevance } from \"@langchain/core/utils/math\";\n+import { getEnvironmentVariable } from \"@langchain/core/utils/env\";\n+\n+/**\n+ * Azure AI Search query type.\n+ */\n+export const AzureAISearchQueryType = {\n+ /** Vector search. */\n+ Similarity: \"similarity\",\n+ /** Hybrid full text and vector search. */\n+ SimilarityHybrid: \"similarity_hybrid\",\n+ /** Hybrid full text and vector search with semantic ranking. */\n+ SemanticHybrid: \"semantic_hybrid\",\n+} as const;\n+\n+/**\n+ * Azure AI Search query type.\n+ */\n+export type AzureAISearchQueryType =\n+ (typeof AzureAISearchQueryType)[keyof typeof AzureAISearchQueryType];\n+\n+/**\n+ * Azure AI Search settings.\n+ */\n+export interface AzureAISearchQueryOptions {\n+ readonly type: AzureAISearchQueryType;\n+ readonly semanticConfigurationName?: string;\n+}\n+\n+/**\n+ * Configuration options for the `AzureAISearchStore` constructor.\n+ */\n+export interface AzureAISearchConfig {\n+ readonly client?: SearchClient;\n+ readonly indexName?: string;\n+ readonly endpoint?: string;\n+ readonly key?: string;\n+ readonly search: AzureAISearchQueryOptions;\n+ /**\n+ * The amount of documents to chunk by when adding vectors.\n+ * @default 100\n+ */\n+ readonly chunkSize?: number;\n+ /**\n+ * The amount of documents to embed at once when adding documents.\n+ * Note that some providers like Azure OpenAI can only embed 16 documents\n+ * at a time.\n+ * @default 16\n+ */\n+ readonly embeddingBatchSize?: number;\n+}\n+\n+/**\n+ * Azure AI Search options metadata schema.\n+ * If yout want to add custom data, use the attributes property.\n+ */\n+export type AzureAISearchDocumentMetadata = {\n+ source: string;\n+ attributes?: Array<{ key: string; value: string }>;\n+};\n+\n+/**\n+ * Azure AI Search indexed document.\n+ */\n+export type AzureAISearchDocument = {\n+ id: string;\n+ content: string;\n+ content_vector: number[];\n+ metadata: AzureAISearchDocumentMetadata;\n+};\n+\n+/**\n+ * Azure AI Search options for adding documents.\n+ */\n+export type AzureAISearchAddDocumentsOptions = {\n+ ids?: string[];\n+};\n+\n+const DEFAULT_FIELD_ID = \"id\";\n+const DEFAULT_FIELD_CONTENT = \"content\";\n+const DEFAULT_FIELD_CONTENT_VECTOR = \"content_vector\";\n+const DEFAULT_FIELD_METADATA = \"metadata\";\n+const DEFAULT_FIELD_METADATA_SOURCE = \"source\";\n+const DEFAULT_FIELD_METADATA_ATTRS = \"attributes\";\n+\n+/**\n+ * Azure AI Search vector store.\n+ * To use this, you should have:\n+ * - the `@azure/search-documents` NPM package installed\n+ * - an endpoint and key to the Azure AI Search instance\n+ *\n+ * If you directly provide a `SearchClient` instance, you need to ensure that\n+ * an index has been created. When using and endpoint and key, the index will\n+ * be created automatically if it does not exist.\n+ */\n+export class AzureAISearchVectorStore extends VectorStore {\n+ declare FilterType: string;\n+\n+ get lc_secrets(): { [key: string]: string } {\n+ return {\n+ endpoint: \"AZURE_AISEARCH_ENDPOINT\",\n+ key: \"AZURE_AISEARCH_KEY\",\n+ };\n+ }\n+\n+ _vectorstoreType(): string {\n+ return \"azure_aisearch\";\n+ }\n+\n+ private readonly initPromise: Promise;\n+\n+ private readonly client: SearchClient;\n+\n+ private readonly indexName: string;\n+\n+ private readonly chunkSize: number;\n+\n+ private readonly embeddingBatchSize: number;\n+\n+ private readonly options: AzureAISearchQueryOptions;\n+\n+ constructor(embeddings: EmbeddingsInterface, config: AzureAISearchConfig) {\n+ super(embeddings, config);\n+\n+ const endpoint =\n+ config.endpoint ?? getEnvironmentVariable(\"AZURE_AISEARCH_ENDPOINT\");\n+ const key = config.key ?? getEnvironmentVariable(\"AZURE_AISEARCH_KEY\");\n+\n+ if (!config.client && !endpoint && !key) {\n+ throw new Error(\n+ \"Azure AI Search client or connection string must be set.\"\n+ );\n+ }\n+\n+ this.indexName = config.indexName ?? \"vectorsearch\";\n+ this.chunkSize = config.chunkSize ?? 100;\n+ this.embeddingBatchSize = config.embeddingBatchSize ?? 16;\n+\n+ if (!config.client) {\n+ // eslint-disable-next-line @typescript-eslint/no-non-null-assertion\n+ const credential = new AzureKeyCredential(key!);\n+ // eslint-disable-next-line @typescript-eslint/no-non-null-assertion\n+ this.client = new SearchClient(endpoint!, this.indexName, credential);\n+ // eslint-disable-next-line @typescript-eslint/no-non-null-assertion\n+ const indexClient = new SearchIndexClient(endpoint!, credential);\n+\n+ // Start initialization, but don't wait for it to finish here\n+ this.initPromise = this.ensureIndexExists(indexClient).catch((error) => {\n+ console.error(\n+ \"Error during Azure AI Search index initialization:\",\n+ error\n+ );\n+ });\n+ } else {\n+ this.client = config.client;\n+ }\n+\n+ this.options = config.search;\n+ this.embeddings = embeddings;\n+ }\n+\n+ /**\n+ * Removes specified documents from the AzureAISearchVectorStore using a filter.\n+ * @param filter OData filter to find documents to delete.\n+ * @returns A promise that resolves when the documents have been removed.\n+ */\n+ async deleteMany(filter: string): Promise {\n+ const { results } = await this.client.search(\"*\", {\n+ filter,\n+ });\n+\n+ const ids: string[] = [];\n+ for await (const item of results) {\n+ ids.push(item.document.id);\n+ }\n+\n+ const { results: deleteResults } = await this.client.deleteDocuments(\n+ DEFAULT_FIELD_ID,\n+ ids\n+ );\n+ return deleteResults;\n+ }\n+\n+ /**\n+ * Removes specified documents from the AzureAISearchVectorStore.\n+ * @param ids IDs of the documents to be removed.\n+ * @returns A promise that resolves when the documents have been removed.\n+ */\n+ async deleteById(ids: string | string[]): Promise {\n+ await this.initPromise;\n+ const { results } = await this.client.deleteDocuments(\n+ DEFAULT_FIELD_ID,\n+ Array.isArray(ids) ? ids : [ids]\n+ );\n+ return results;\n+ }\n+\n+ /**\n+ * Adds documents to the AzureAISearchVectorStore.\n+ * Documents are chunked into batches of size `embeddingBatchSize` then\n+ * embedded and added to the AzureAISearchVectorStore.\n+ * @param documents The documents to add.\n+ * @param options Options for adding documents.\n+ * @returns A promise that resolves to the ids of the added documents.\n+ */\n+ async addDocuments(\n+ documents: Document[],\n+ options?: AzureAISearchAddDocumentsOptions\n+ ) {\n+ const texts = documents.map(({ pageContent }) => pageContent);\n+ const results: string[] = [];\n+\n+ for (let i = 0; i < texts.length; i += this.embeddingBatchSize) {\n+ const batch = texts.slice(i, i + this.embeddingBatchSize);\n+ const docsBatch = documents.slice(i, i + this.embeddingBatchSize);\n+ const batchEmbeddings: number[][] = await this.embeddings.embedDocuments(\n+ batch\n+ );\n+ const batchResult = await this.addVectors(\n+ batchEmbeddings,\n+ docsBatch,\n+ options\n+ );\n+\n+ results.push(...batchResult);\n+ }\n+\n+ return results;\n+ }\n+\n+ /**\n+ * Adds vectors to the AzureAISearchVectorStore.\n+ * @param vectors Vectors to be added.\n+ * @param documents Corresponding documents to be added.\n+ * @param options Options for adding documents.\n+ * @returns A promise that resolves to the ids of the added documents.\n+ */\n+ async addVectors(\n+ vectors: number[][],\n+ documents: Document[],\n+ options?: AzureAISearchAddDocumentsOptions\n+ ): Promise {\n+ const ids = options?.ids ?? documents.map(() => uuid.v4());\n+ const entities: AzureAISearchDocument[] = documents.map((doc, idx) => ({\n+ id: ids[idx],\n+ content: doc.pageContent,\n+ content_vector: vectors[idx],\n+ metadata: {\n+ source: doc.metadata?.source,\n+ attributes: doc.metadata?.attributes ?? [],\n+ },\n+ }));\n+\n+ await this.initPromise;\n+ for (let i = 0; i < entities.length; i += this.chunkSize) {\n+ const chunk = entities.slice(i, i + this.chunkSize);\n+ await this.client.uploadDocuments(chunk, { throwOnAnyFailure: true });\n+ }\n+\n+ return ids;\n+ }\n+\n+ /**\n+ * Performs a similarity search using query type specified in configuration.\n+ * @param query Query text for the similarity search.\n+ * @param k=4 Number of nearest neighbors to return.\n+ * @param filter Optional OData filter for the documents.\n+ * @returns Promise that resolves to a list of documents and their corresponding similarity scores.\n+ */\n+ async similaritySearch(\n+ query: string,\n+ k = 4,\n+ filter: this[\"FilterType\"] | undefined = undefined\n+ ): Promise {\n+ const results = await this.similaritySearchWithScore(query, k, filter);\n+\n+ return results.map((result) => result[0]);\n+ }\n+\n+ /**\n+ * Performs a similarity search using query type specified in configuration.\n+ * @param query Query text for the similarity search.\n+ * @param k=4 Number of nearest neighbors to return.\n+ * @param filter Optional OData filter for the documents.\n+ * @returns Promise that resolves to a list of documents and their corresponding similarity scores.\n+ */\n+ async similaritySearchWithScore(\n+ query: string,\n+ k = 4,\n+ filter: this[\"FilterType\"] | undefined = undefined\n+ ): Promise<[Document, number][]> {\n+ const searchType = this.options.type;\n+\n+ if (searchType === AzureAISearchQueryType.Similarity) {\n+ return this.similaritySearchVectorWithScore(\n+ await this.embeddings.embedQuery(query),\n+ k,\n+ filter\n+ );\n+ } else if (searchType === AzureAISearchQueryType.SimilarityHybrid) {\n+ return this.hybridSearchVectorWithScore(\n+ query,\n+ await this.embeddings.embedQuery(query),\n+ k,\n+ filter\n+ );\n+ } else if (searchType === AzureAISearchQueryType.SemanticHybrid) {\n+ return this.semanticHybridSearchVectorWithScore(\n+ query,\n+ await this.embeddings.embedQuery(query),\n+ k,\n+ filter\n+ );\n+ }\n+\n+ throw new Error(`Unrecognized search type '${searchType}'`);\n+ }\n+\n+ /**\n+ * Performs a hybrid search using query text.\n+ * @param query Query text for the similarity search.\n+ * @param queryVector Query vector for the similarity search.\n+ * If not provided, the query text will be embedded.\n+ * @param k=4 Number of nearest neighbors to return.\n+ * @param filter Optional OData filter for the documents.\n+ * @returns Promise that resolves to a list of documents and their corresponding similarity scores.\n+ */\n+ async hybridSearchVectorWithScore(\n+ query: string,\n+ queryVector?: number[],\n+ k = 4,\n+ filter: string | undefined = undefined\n+ ): Promise<[Document, number][]> {\n+ const vector = queryVector ?? (await this.embeddings.embedQuery(query));\n+\n+ await this.initPromise;\n+ const { results } = await this.client.search(query, {\n+ vectorSearchOptions: {\n+ queries: [\n+ {\n+ kind: \"vector\",\n+ vector,\n+ kNearestNeighborsCount: k,\n+ fields: [DEFAULT_FIELD_CONTENT_VECTOR],\n+ },\n+ ],\n+ },\n+ filter,\n+ top: k,\n+ });\n+\n+ const docsWithScore: [Document, number][] = [];\n+\n+ for await (const item of results) {\n+ const document = new Document<\n+ AzureAISearchDocumentMetadata & { embedding: number[] }\n+ >({\n+ pageContent: item.document[DEFAULT_FIELD_CONTENT],\n+ metadata: {\n+ ...item.document[DEFAULT_FIELD_METADATA],\n+ embedding: item.document[DEFAULT_FIELD_CONTENT_VECTOR],\n+ },\n+ });\n+ docsWithScore.push([document, item.score]);\n+ }\n+\n+ return docsWithScore;\n+ }\n+\n+ /**\n+ * Performs a hybrid search with semantic reranker using query text.\n+ * @param query Query text for the similarity search.\n+ * @param queryVector Query vector for the similarity search.\n+ * If not provided, the query text will be embedded.\n+ * @param k=4 Number of nearest neighbors to return.\n+ * @param filter Optional OData filter for the documents.\n+ * @returns Promise that resolves to a list of documents and their corresponding similarity scores.\n+ */\n+ async semanticHybridSearchVectorWithScore(\n+ query: string,\n+ queryVector?: number[],\n+ k = 4,\n+ filter: string | undefined = undefined\n+ ): Promise<[Document, number][]> {\n+ const vector = queryVector ?? (await this.embeddings.embedQuery(query));\n+\n+ await this.initPromise;\n+ const { results } = await this.client.search(query, {\n+ vectorSearchOptions: {\n+ queries: [\n+ {\n+ kind: \"vector\",\n+ vector,\n+ kNearestNeighborsCount: k,\n+ fields: [DEFAULT_FIELD_CONTENT_VECTOR],\n+ },\n+ ],\n+ },\n+ filter,\n+ top: k,\n+ queryType: \"semantic\",\n+ semanticSearchOptions: {\n+ configurationName: \"semantic-search-config\",\n+ captions: {\n+ captionType: \"extractive\",\n+ },\n+ answers: {\n+ answerType: \"extractive\",\n+ },\n+ },\n+ });\n+\n+ const docsWithScore: [Document, number][] = [];\n+\n+ for await (const item of results) {\n+ const document = new Document<\n+ AzureAISearchDocumentMetadata & { embedding: number[] }\n+ >({\n+ pageContent: item.document[DEFAULT_FIELD_CONTENT],\n+ metadata: {\n+ ...item.document[DEFAULT_FIELD_METADATA],\n+ embedding: item.document[DEFAULT_FIELD_CONTENT_VECTOR],\n+ },\n+ });\n+ docsWithScore.push([document, item.score]);\n+ }\n+\n+ return docsWithScore;\n+ }\n+\n+ /**\n+ * Performs a similarity search on the vectors stored in the collection.\n+ * @param queryVector Query vector for the similarity search.\n+ * @param k=4 Number of nearest neighbors to return.\n+ * @param filter string OData filter for the documents.\n+ * @returns Promise that resolves to a list of documents and their corresponding similarity scores.\n+ */\n+ async similaritySearchVectorWithScore(\n+ query: number[],\n+ k: number,\n+ filter?: string\n+ ): Promise<[Document, number][]> {\n+ await this.initPromise;\n+\n+ const { results } = await this.client.search(\"*\", {\n+ vectorSearchOptions: {\n+ queries: [\n+ {\n+ kind: \"vector\",\n+ vector: query,\n+ kNearestNeighborsCount: k,\n+ fields: [DEFAULT_FIELD_CONTENT_VECTOR],\n+ },\n+ ],\n+ },\n+ filter,\n+ });\n+\n+ const docsWithScore: [Document, number][] = [];\n+\n+ for await (const item of results) {\n+ const document = new Document<\n+ AzureAISearchDocumentMetadata & { embedding: number[] }\n+ >({\n+ pageContent: item.document[DEFAULT_FIELD_CONTENT],\n+ metadata: {\n+ ...item.document[DEFAULT_FIELD_METADATA],\n+ embedding: item.document[DEFAULT_FIELD_CONTENT_VECTOR],\n+ },\n+ });\n+ docsWithScore.push([document, item.score]);\n+ }\n+\n+ return docsWithScore;\n+ }\n+\n+ /**\n+ * Return documents selected using the maximal marginal relevance.\n+ * Maximal marginal relevance optimizes for similarity to the query AND\n+ * diversity among selected documents.\n+ * @param query Text to look up documents similar to.\n+ * @param options.k Number of documents to return.\n+ * @param options.fetchK=20 Number of documents to fetch before passing to\n+ * the MMR algorithm.\n+ * @param options.lambda=0.5 Number between 0 and 1 that determines the\n+ * degree of diversity among the results, where 0 corresponds to maximum\n+ * diversity and 1 to minimum diversity.\n+ * @returns List of documents selected by maximal marginal relevance.\n+ */\n+ async maxMarginalRelevanceSearch(\n+ query: string,\n+ options: MaxMarginalRelevanceSearchOptions\n+ ): Promise {\n+ const { k, fetchK = 20, lambda = 0.5 } = options;\n+\n+ const queryEmbedding = await this.embeddings.embedQuery(query);\n+ const docs = await this.similaritySearchVectorWithScore(\n+ queryEmbedding,\n+ fetchK\n+ );\n+ const embeddingList = docs.map((doc) => doc[0].metadata.embedding);\n+\n+ // Re-rank the results using MMR\n+ const mmrIndexes = maximalMarginalRelevance(\n+ queryEmbedding,\n+ embeddingList,\n+ lambda,\n+ k\n+ );\n+\n+ const mmrDocs = mmrIndexes.map((index) => docs[index][0]);\n+ return mmrDocs;\n+ }\n+\n+ /**\n+ * Ensures that an index exists on the AzureAISearchVectorStore.\n+ * @param indexClient The Azure AI Search index client.\n+ * @returns A promise that resolves when the AzureAISearchVectorStore index has been initialized.\n+ * @protected\n+ */\n+ protected async ensureIndexExists(\n+ indexClient: SearchIndexClient\n+ ): Promise {\n+ try {\n+ await indexClient.getIndex(this.indexName);\n+ } catch (e) {\n+ // Index does not exists, create it\n+ const searchIndex = this.createSearchIndexDefinition(this.indexName);\n+ await indexClient.createIndex(searchIndex);\n+ }\n+ }\n+\n+ /**\n+ * Prepares the search index definition for Azure AI Search.\n+ * @param indexName The name of the index.\n+ * @returns The SearchIndex object.\n+ * @protected\n+ */\n+ protected createSearchIndexDefinition(indexName: string): SearchIndex {\n+ return {\n+ name: indexName,\n+ vectorSearch: {\n+ algorithms: [\n+ {\n+ name: \"vector-search-algorithm\",\n+ kind: \"hnsw\",\n+ parameters: {\n+ m: 4,\n+ efSearch: 500,\n+ metric: \"cosine\",\n+ efConstruction: 400,\n+ },\n+ },\n+ ],\n+ profiles: [\n+ {\n+ name: \"vector-search-profile\",\n+ algorithmConfigurationName: \"vector-search-algorithm\",\n+ },\n+ ],\n+ },\n+ semanticSearch: {\n+ defaultConfigurationName: \"semantic-search-config\",\n+ configurations: [\n+ {\n+ name: \"semantic-search-config\",\n+ prioritizedFields: {\n+ contentFields: [\n+ {\n+ name: DEFAULT_FIELD_CONTENT,\n+ },\n+ ],\n+ keywordsFields: [\n+ {\n+ name: DEFAULT_FIELD_CONTENT,\n+ },\n+ ],\n+ },\n+ },\n+ ],\n+ },\n+ fields: [\n+ {\n+ name: DEFAULT_FIELD_ID,\n+ filterable: true,\n+ key: true,\n+ type: \"Edm.String\",\n+ },\n+ {\n+ name: DEFAULT_FIELD_CONTENT,\n+ searchable: true,\n+ filterable: true,\n+ type: \"Edm.String\",\n+ },\n+ {\n+ name: DEFAULT_FIELD_CONTENT_VECTOR,\n+ searchable: true,\n+ type: \"Collection(Edm.Single)\",\n+ vectorSearchDimensions: 1536,\n+ vectorSearchProfileName: \"vector-search-profile\",\n+ },\n+ {\n+ name: DEFAULT_FIELD_METADATA,\n+ type: \"Edm.ComplexType\",", + "comment_created_at": "2024-01-17T19:58:12+00:00", + "comment_author": "pablocastro", + "comment_body": "Are the metadata fields known at index time? If they were, creating actual filterable fields of the right types instead of a dictionary-style complex type would be better, and you'd get more type fidelity (e.g. numbers, dates, etc.) and filter expressions would be richer (e.g. numeric/date range comparisons).", + "pr_file_module": null + }, + { + "comment_id": "1457277213", + "repo_full_name": "langchain-ai/langchainjs", + "pr_number": 4044, + "pr_file": "libs/langchain-community/src/vectorstores/azure_aisearch.ts", + "discussion_id": "1456406645", + "commented_code": "@@ -0,0 +1,694 @@\n+import * as uuid from \"uuid\";\n+import {\n+ SearchClient,\n+ SearchIndexClient,\n+ AzureKeyCredential,\n+ IndexingResult,\n+ SearchIndex,\n+} from \"@azure/search-documents\";\n+import {\n+ MaxMarginalRelevanceSearchOptions,\n+ VectorStore,\n+} from \"@langchain/core/vectorstores\";\n+import type { EmbeddingsInterface } from \"@langchain/core/embeddings\";\n+import { Document } from \"@langchain/core/documents\";\n+import { maximalMarginalRelevance } from \"@langchain/core/utils/math\";\n+import { getEnvironmentVariable } from \"@langchain/core/utils/env\";\n+\n+/**\n+ * Azure AI Search query type.\n+ */\n+export const AzureAISearchQueryType = {\n+ /** Vector search. */\n+ Similarity: \"similarity\",\n+ /** Hybrid full text and vector search. */\n+ SimilarityHybrid: \"similarity_hybrid\",\n+ /** Hybrid full text and vector search with semantic ranking. */\n+ SemanticHybrid: \"semantic_hybrid\",\n+} as const;\n+\n+/**\n+ * Azure AI Search query type.\n+ */\n+export type AzureAISearchQueryType =\n+ (typeof AzureAISearchQueryType)[keyof typeof AzureAISearchQueryType];\n+\n+/**\n+ * Azure AI Search settings.\n+ */\n+export interface AzureAISearchQueryOptions {\n+ readonly type: AzureAISearchQueryType;\n+ readonly semanticConfigurationName?: string;\n+}\n+\n+/**\n+ * Configuration options for the `AzureAISearchStore` constructor.\n+ */\n+export interface AzureAISearchConfig {\n+ readonly client?: SearchClient;\n+ readonly indexName?: string;\n+ readonly endpoint?: string;\n+ readonly key?: string;\n+ readonly search: AzureAISearchQueryOptions;\n+ /**\n+ * The amount of documents to chunk by when adding vectors.\n+ * @default 100\n+ */\n+ readonly chunkSize?: number;\n+ /**\n+ * The amount of documents to embed at once when adding documents.\n+ * Note that some providers like Azure OpenAI can only embed 16 documents\n+ * at a time.\n+ * @default 16\n+ */\n+ readonly embeddingBatchSize?: number;\n+}\n+\n+/**\n+ * Azure AI Search options metadata schema.\n+ * If yout want to add custom data, use the attributes property.\n+ */\n+export type AzureAISearchDocumentMetadata = {\n+ source: string;\n+ attributes?: Array<{ key: string; value: string }>;\n+};\n+\n+/**\n+ * Azure AI Search indexed document.\n+ */\n+export type AzureAISearchDocument = {\n+ id: string;\n+ content: string;\n+ content_vector: number[];\n+ metadata: AzureAISearchDocumentMetadata;\n+};\n+\n+/**\n+ * Azure AI Search options for adding documents.\n+ */\n+export type AzureAISearchAddDocumentsOptions = {\n+ ids?: string[];\n+};\n+\n+const DEFAULT_FIELD_ID = \"id\";\n+const DEFAULT_FIELD_CONTENT = \"content\";\n+const DEFAULT_FIELD_CONTENT_VECTOR = \"content_vector\";\n+const DEFAULT_FIELD_METADATA = \"metadata\";\n+const DEFAULT_FIELD_METADATA_SOURCE = \"source\";\n+const DEFAULT_FIELD_METADATA_ATTRS = \"attributes\";\n+\n+/**\n+ * Azure AI Search vector store.\n+ * To use this, you should have:\n+ * - the `@azure/search-documents` NPM package installed\n+ * - an endpoint and key to the Azure AI Search instance\n+ *\n+ * If you directly provide a `SearchClient` instance, you need to ensure that\n+ * an index has been created. When using and endpoint and key, the index will\n+ * be created automatically if it does not exist.\n+ */\n+export class AzureAISearchVectorStore extends VectorStore {\n+ declare FilterType: string;\n+\n+ get lc_secrets(): { [key: string]: string } {\n+ return {\n+ endpoint: \"AZURE_AISEARCH_ENDPOINT\",\n+ key: \"AZURE_AISEARCH_KEY\",\n+ };\n+ }\n+\n+ _vectorstoreType(): string {\n+ return \"azure_aisearch\";\n+ }\n+\n+ private readonly initPromise: Promise;\n+\n+ private readonly client: SearchClient;\n+\n+ private readonly indexName: string;\n+\n+ private readonly chunkSize: number;\n+\n+ private readonly embeddingBatchSize: number;\n+\n+ private readonly options: AzureAISearchQueryOptions;\n+\n+ constructor(embeddings: EmbeddingsInterface, config: AzureAISearchConfig) {\n+ super(embeddings, config);\n+\n+ const endpoint =\n+ config.endpoint ?? getEnvironmentVariable(\"AZURE_AISEARCH_ENDPOINT\");\n+ const key = config.key ?? getEnvironmentVariable(\"AZURE_AISEARCH_KEY\");\n+\n+ if (!config.client && !endpoint && !key) {\n+ throw new Error(\n+ \"Azure AI Search client or connection string must be set.\"\n+ );\n+ }\n+\n+ this.indexName = config.indexName ?? \"vectorsearch\";\n+ this.chunkSize = config.chunkSize ?? 100;\n+ this.embeddingBatchSize = config.embeddingBatchSize ?? 16;\n+\n+ if (!config.client) {\n+ // eslint-disable-next-line @typescript-eslint/no-non-null-assertion\n+ const credential = new AzureKeyCredential(key!);\n+ // eslint-disable-next-line @typescript-eslint/no-non-null-assertion\n+ this.client = new SearchClient(endpoint!, this.indexName, credential);\n+ // eslint-disable-next-line @typescript-eslint/no-non-null-assertion\n+ const indexClient = new SearchIndexClient(endpoint!, credential);\n+\n+ // Start initialization, but don't wait for it to finish here\n+ this.initPromise = this.ensureIndexExists(indexClient).catch((error) => {\n+ console.error(\n+ \"Error during Azure AI Search index initialization:\",\n+ error\n+ );\n+ });\n+ } else {\n+ this.client = config.client;\n+ }\n+\n+ this.options = config.search;\n+ this.embeddings = embeddings;\n+ }\n+\n+ /**\n+ * Removes specified documents from the AzureAISearchVectorStore using a filter.\n+ * @param filter OData filter to find documents to delete.\n+ * @returns A promise that resolves when the documents have been removed.\n+ */\n+ async deleteMany(filter: string): Promise {\n+ const { results } = await this.client.search(\"*\", {\n+ filter,\n+ });\n+\n+ const ids: string[] = [];\n+ for await (const item of results) {\n+ ids.push(item.document.id);\n+ }\n+\n+ const { results: deleteResults } = await this.client.deleteDocuments(\n+ DEFAULT_FIELD_ID,\n+ ids\n+ );\n+ return deleteResults;\n+ }\n+\n+ /**\n+ * Removes specified documents from the AzureAISearchVectorStore.\n+ * @param ids IDs of the documents to be removed.\n+ * @returns A promise that resolves when the documents have been removed.\n+ */\n+ async deleteById(ids: string | string[]): Promise {\n+ await this.initPromise;\n+ const { results } = await this.client.deleteDocuments(\n+ DEFAULT_FIELD_ID,\n+ Array.isArray(ids) ? ids : [ids]\n+ );\n+ return results;\n+ }\n+\n+ /**\n+ * Adds documents to the AzureAISearchVectorStore.\n+ * Documents are chunked into batches of size `embeddingBatchSize` then\n+ * embedded and added to the AzureAISearchVectorStore.\n+ * @param documents The documents to add.\n+ * @param options Options for adding documents.\n+ * @returns A promise that resolves to the ids of the added documents.\n+ */\n+ async addDocuments(\n+ documents: Document[],\n+ options?: AzureAISearchAddDocumentsOptions\n+ ) {\n+ const texts = documents.map(({ pageContent }) => pageContent);\n+ const results: string[] = [];\n+\n+ for (let i = 0; i < texts.length; i += this.embeddingBatchSize) {\n+ const batch = texts.slice(i, i + this.embeddingBatchSize);\n+ const docsBatch = documents.slice(i, i + this.embeddingBatchSize);\n+ const batchEmbeddings: number[][] = await this.embeddings.embedDocuments(\n+ batch\n+ );\n+ const batchResult = await this.addVectors(\n+ batchEmbeddings,\n+ docsBatch,\n+ options\n+ );\n+\n+ results.push(...batchResult);\n+ }\n+\n+ return results;\n+ }\n+\n+ /**\n+ * Adds vectors to the AzureAISearchVectorStore.\n+ * @param vectors Vectors to be added.\n+ * @param documents Corresponding documents to be added.\n+ * @param options Options for adding documents.\n+ * @returns A promise that resolves to the ids of the added documents.\n+ */\n+ async addVectors(\n+ vectors: number[][],\n+ documents: Document[],\n+ options?: AzureAISearchAddDocumentsOptions\n+ ): Promise {\n+ const ids = options?.ids ?? documents.map(() => uuid.v4());\n+ const entities: AzureAISearchDocument[] = documents.map((doc, idx) => ({\n+ id: ids[idx],\n+ content: doc.pageContent,\n+ content_vector: vectors[idx],\n+ metadata: {\n+ source: doc.metadata?.source,\n+ attributes: doc.metadata?.attributes ?? [],\n+ },\n+ }));\n+\n+ await this.initPromise;\n+ for (let i = 0; i < entities.length; i += this.chunkSize) {\n+ const chunk = entities.slice(i, i + this.chunkSize);\n+ await this.client.uploadDocuments(chunk, { throwOnAnyFailure: true });\n+ }\n+\n+ return ids;\n+ }\n+\n+ /**\n+ * Performs a similarity search using query type specified in configuration.\n+ * @param query Query text for the similarity search.\n+ * @param k=4 Number of nearest neighbors to return.\n+ * @param filter Optional OData filter for the documents.\n+ * @returns Promise that resolves to a list of documents and their corresponding similarity scores.\n+ */\n+ async similaritySearch(\n+ query: string,\n+ k = 4,\n+ filter: this[\"FilterType\"] | undefined = undefined\n+ ): Promise {\n+ const results = await this.similaritySearchWithScore(query, k, filter);\n+\n+ return results.map((result) => result[0]);\n+ }\n+\n+ /**\n+ * Performs a similarity search using query type specified in configuration.\n+ * @param query Query text for the similarity search.\n+ * @param k=4 Number of nearest neighbors to return.\n+ * @param filter Optional OData filter for the documents.\n+ * @returns Promise that resolves to a list of documents and their corresponding similarity scores.\n+ */\n+ async similaritySearchWithScore(\n+ query: string,\n+ k = 4,\n+ filter: this[\"FilterType\"] | undefined = undefined\n+ ): Promise<[Document, number][]> {\n+ const searchType = this.options.type;\n+\n+ if (searchType === AzureAISearchQueryType.Similarity) {\n+ return this.similaritySearchVectorWithScore(\n+ await this.embeddings.embedQuery(query),\n+ k,\n+ filter\n+ );\n+ } else if (searchType === AzureAISearchQueryType.SimilarityHybrid) {\n+ return this.hybridSearchVectorWithScore(\n+ query,\n+ await this.embeddings.embedQuery(query),\n+ k,\n+ filter\n+ );\n+ } else if (searchType === AzureAISearchQueryType.SemanticHybrid) {\n+ return this.semanticHybridSearchVectorWithScore(\n+ query,\n+ await this.embeddings.embedQuery(query),\n+ k,\n+ filter\n+ );\n+ }\n+\n+ throw new Error(`Unrecognized search type '${searchType}'`);\n+ }\n+\n+ /**\n+ * Performs a hybrid search using query text.\n+ * @param query Query text for the similarity search.\n+ * @param queryVector Query vector for the similarity search.\n+ * If not provided, the query text will be embedded.\n+ * @param k=4 Number of nearest neighbors to return.\n+ * @param filter Optional OData filter for the documents.\n+ * @returns Promise that resolves to a list of documents and their corresponding similarity scores.\n+ */\n+ async hybridSearchVectorWithScore(\n+ query: string,\n+ queryVector?: number[],\n+ k = 4,\n+ filter: string | undefined = undefined\n+ ): Promise<[Document, number][]> {\n+ const vector = queryVector ?? (await this.embeddings.embedQuery(query));\n+\n+ await this.initPromise;\n+ const { results } = await this.client.search(query, {\n+ vectorSearchOptions: {\n+ queries: [\n+ {\n+ kind: \"vector\",\n+ vector,\n+ kNearestNeighborsCount: k,\n+ fields: [DEFAULT_FIELD_CONTENT_VECTOR],\n+ },\n+ ],\n+ },\n+ filter,\n+ top: k,\n+ });\n+\n+ const docsWithScore: [Document, number][] = [];\n+\n+ for await (const item of results) {\n+ const document = new Document<\n+ AzureAISearchDocumentMetadata & { embedding: number[] }\n+ >({\n+ pageContent: item.document[DEFAULT_FIELD_CONTENT],\n+ metadata: {\n+ ...item.document[DEFAULT_FIELD_METADATA],\n+ embedding: item.document[DEFAULT_FIELD_CONTENT_VECTOR],\n+ },\n+ });\n+ docsWithScore.push([document, item.score]);\n+ }\n+\n+ return docsWithScore;\n+ }\n+\n+ /**\n+ * Performs a hybrid search with semantic reranker using query text.\n+ * @param query Query text for the similarity search.\n+ * @param queryVector Query vector for the similarity search.\n+ * If not provided, the query text will be embedded.\n+ * @param k=4 Number of nearest neighbors to return.\n+ * @param filter Optional OData filter for the documents.\n+ * @returns Promise that resolves to a list of documents and their corresponding similarity scores.\n+ */\n+ async semanticHybridSearchVectorWithScore(\n+ query: string,\n+ queryVector?: number[],\n+ k = 4,\n+ filter: string | undefined = undefined\n+ ): Promise<[Document, number][]> {\n+ const vector = queryVector ?? (await this.embeddings.embedQuery(query));\n+\n+ await this.initPromise;\n+ const { results } = await this.client.search(query, {\n+ vectorSearchOptions: {\n+ queries: [\n+ {\n+ kind: \"vector\",\n+ vector,\n+ kNearestNeighborsCount: k,\n+ fields: [DEFAULT_FIELD_CONTENT_VECTOR],\n+ },\n+ ],\n+ },\n+ filter,\n+ top: k,\n+ queryType: \"semantic\",\n+ semanticSearchOptions: {\n+ configurationName: \"semantic-search-config\",\n+ captions: {\n+ captionType: \"extractive\",\n+ },\n+ answers: {\n+ answerType: \"extractive\",\n+ },\n+ },\n+ });\n+\n+ const docsWithScore: [Document, number][] = [];\n+\n+ for await (const item of results) {\n+ const document = new Document<\n+ AzureAISearchDocumentMetadata & { embedding: number[] }\n+ >({\n+ pageContent: item.document[DEFAULT_FIELD_CONTENT],\n+ metadata: {\n+ ...item.document[DEFAULT_FIELD_METADATA],\n+ embedding: item.document[DEFAULT_FIELD_CONTENT_VECTOR],\n+ },\n+ });\n+ docsWithScore.push([document, item.score]);\n+ }\n+\n+ return docsWithScore;\n+ }\n+\n+ /**\n+ * Performs a similarity search on the vectors stored in the collection.\n+ * @param queryVector Query vector for the similarity search.\n+ * @param k=4 Number of nearest neighbors to return.\n+ * @param filter string OData filter for the documents.\n+ * @returns Promise that resolves to a list of documents and their corresponding similarity scores.\n+ */\n+ async similaritySearchVectorWithScore(\n+ query: number[],\n+ k: number,\n+ filter?: string\n+ ): Promise<[Document, number][]> {\n+ await this.initPromise;\n+\n+ const { results } = await this.client.search(\"*\", {\n+ vectorSearchOptions: {\n+ queries: [\n+ {\n+ kind: \"vector\",\n+ vector: query,\n+ kNearestNeighborsCount: k,\n+ fields: [DEFAULT_FIELD_CONTENT_VECTOR],\n+ },\n+ ],\n+ },\n+ filter,\n+ });\n+\n+ const docsWithScore: [Document, number][] = [];\n+\n+ for await (const item of results) {\n+ const document = new Document<\n+ AzureAISearchDocumentMetadata & { embedding: number[] }\n+ >({\n+ pageContent: item.document[DEFAULT_FIELD_CONTENT],\n+ metadata: {\n+ ...item.document[DEFAULT_FIELD_METADATA],\n+ embedding: item.document[DEFAULT_FIELD_CONTENT_VECTOR],\n+ },\n+ });\n+ docsWithScore.push([document, item.score]);\n+ }\n+\n+ return docsWithScore;\n+ }\n+\n+ /**\n+ * Return documents selected using the maximal marginal relevance.\n+ * Maximal marginal relevance optimizes for similarity to the query AND\n+ * diversity among selected documents.\n+ * @param query Text to look up documents similar to.\n+ * @param options.k Number of documents to return.\n+ * @param options.fetchK=20 Number of documents to fetch before passing to\n+ * the MMR algorithm.\n+ * @param options.lambda=0.5 Number between 0 and 1 that determines the\n+ * degree of diversity among the results, where 0 corresponds to maximum\n+ * diversity and 1 to minimum diversity.\n+ * @returns List of documents selected by maximal marginal relevance.\n+ */\n+ async maxMarginalRelevanceSearch(\n+ query: string,\n+ options: MaxMarginalRelevanceSearchOptions\n+ ): Promise {\n+ const { k, fetchK = 20, lambda = 0.5 } = options;\n+\n+ const queryEmbedding = await this.embeddings.embedQuery(query);\n+ const docs = await this.similaritySearchVectorWithScore(\n+ queryEmbedding,\n+ fetchK\n+ );\n+ const embeddingList = docs.map((doc) => doc[0].metadata.embedding);\n+\n+ // Re-rank the results using MMR\n+ const mmrIndexes = maximalMarginalRelevance(\n+ queryEmbedding,\n+ embeddingList,\n+ lambda,\n+ k\n+ );\n+\n+ const mmrDocs = mmrIndexes.map((index) => docs[index][0]);\n+ return mmrDocs;\n+ }\n+\n+ /**\n+ * Ensures that an index exists on the AzureAISearchVectorStore.\n+ * @param indexClient The Azure AI Search index client.\n+ * @returns A promise that resolves when the AzureAISearchVectorStore index has been initialized.\n+ * @protected\n+ */\n+ protected async ensureIndexExists(\n+ indexClient: SearchIndexClient\n+ ): Promise {\n+ try {\n+ await indexClient.getIndex(this.indexName);\n+ } catch (e) {\n+ // Index does not exists, create it\n+ const searchIndex = this.createSearchIndexDefinition(this.indexName);\n+ await indexClient.createIndex(searchIndex);\n+ }\n+ }\n+\n+ /**\n+ * Prepares the search index definition for Azure AI Search.\n+ * @param indexName The name of the index.\n+ * @returns The SearchIndex object.\n+ * @protected\n+ */\n+ protected createSearchIndexDefinition(indexName: string): SearchIndex {\n+ return {\n+ name: indexName,\n+ vectorSearch: {\n+ algorithms: [\n+ {\n+ name: \"vector-search-algorithm\",\n+ kind: \"hnsw\",\n+ parameters: {\n+ m: 4,\n+ efSearch: 500,\n+ metric: \"cosine\",\n+ efConstruction: 400,\n+ },\n+ },\n+ ],\n+ profiles: [\n+ {\n+ name: \"vector-search-profile\",\n+ algorithmConfigurationName: \"vector-search-algorithm\",\n+ },\n+ ],\n+ },\n+ semanticSearch: {\n+ defaultConfigurationName: \"semantic-search-config\",\n+ configurations: [\n+ {\n+ name: \"semantic-search-config\",\n+ prioritizedFields: {\n+ contentFields: [\n+ {\n+ name: DEFAULT_FIELD_CONTENT,\n+ },\n+ ],\n+ keywordsFields: [\n+ {\n+ name: DEFAULT_FIELD_CONTENT,\n+ },\n+ ],\n+ },\n+ },\n+ ],\n+ },\n+ fields: [\n+ {\n+ name: DEFAULT_FIELD_ID,\n+ filterable: true,\n+ key: true,\n+ type: \"Edm.String\",\n+ },\n+ {\n+ name: DEFAULT_FIELD_CONTENT,\n+ searchable: true,\n+ filterable: true,\n+ type: \"Edm.String\",\n+ },\n+ {\n+ name: DEFAULT_FIELD_CONTENT_VECTOR,\n+ searchable: true,\n+ type: \"Collection(Edm.Single)\",\n+ vectorSearchDimensions: 1536,\n+ vectorSearchProfileName: \"vector-search-profile\",\n+ },\n+ {\n+ name: DEFAULT_FIELD_METADATA,\n+ type: \"Edm.ComplexType\",", + "comment_created_at": "2024-01-18T10:59:17+00:00", + "comment_author": "sinedied", + "comment_body": "Yes, I'm not quite happy with that one and it comes from the base implementation I started from. \r\nBut I thought as a default as least it makes it easier for the users who do not want to bother with the schema, as LC allows for arbitrary metadata.\r\n\r\nThat's also why I said I wand to provide more customizations options in another PR to allow for custom index schema for the users who wants better control over the data and filters.", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/langchainjs-use-database-native-types.md b/_reviewers/langchainjs-use-database-native-types.md index 52ed306..4c5e14e 100644 --- a/_reviewers/langchainjs-use-database-native-types.md +++ b/_reviewers/langchainjs-use-database-native-types.md @@ -45,179 +45,3 @@ Always leverage database-native data types and appropriate schema design to maxi ``` This approach enables more efficient querying, better indexing capabilities, type validation at the database level, and allows you to leverage database-specific optimizations. Creating typed fields instead of dictionary-style complex types provides better type fidelity and enables richer filtering expressions like numeric/date range comparisons. - - -[ - { - "discussion_id": "2099231081", - "pr_number": 8007, - "pr_file": "libs/langchain-textsplitters/src/oracle_text_splitter.ts", - "created_at": "2025-05-21T03:31:55+00:00", - "commented_code": "import oracledb from \"oracledb\";\nimport { TextSplitter, TextSplitterParams } from \"./text_splitter.js\";\n\n/**\n * Split text into smaller pieces\n * @example\n * ```typescript\n * const splitter = new OracleTextSplitter(conn, params);\n * let chunks = await splitter.splitText(doc.pageContent);\n * ```\n */\nexport class OracleTextSplitter extends TextSplitter {\n protected conn: oracledb.Connection;\n\n protected pref: Record;\n\n static lc_name() {\n return \"OracleTextSplitter\";\n }\n\n constructor(\n conn: oracledb.Connection,\n pref: Record,\n fields?: TextSplitterParams\n ) {\n super(fields);\n this.conn = conn;\n this.pref = pref;\n }\n\n async splitText(text: string) {\n const chunks: string[] = [];\n\n const result = await this.conn.execute(\n (\n `select t.column_value as data from dbms_vector_chain.utl_to_chunks(:content, json(:pref)) t`\n ),\n {\n content: { val: text, dir: oracledb.BIND_IN, type: oracledb.CLOB },\n pref: JSON.stringify(this.pref),", - "repo_full_name": "langchain-ai/langchainjs", - "discussion_comments": [ - { - "comment_id": "2099231081", - "repo_full_name": "langchain-ai/langchainjs", - "pr_number": 8007, - "pr_file": "libs/langchain-textsplitters/src/oracle_text_splitter.ts", - "discussion_id": "2099231081", - "commented_code": "@@ -0,0 +1,56 @@\n+import oracledb from \"oracledb\";\n+import { TextSplitter, TextSplitterParams } from \"./text_splitter.js\";\n+\n+/**\n+ * Split text into smaller pieces\n+ * @example\n+ * ```typescript\n+ * const splitter = new OracleTextSplitter(conn, params);\n+ * let chunks = await splitter.splitText(doc.pageContent);\n+ * ```\n+ */\n+export class OracleTextSplitter extends TextSplitter {\n+ protected conn: oracledb.Connection;\n+\n+ protected pref: Record;\n+\n+ static lc_name() {\n+ return \"OracleTextSplitter\";\n+ }\n+\n+ constructor(\n+ conn: oracledb.Connection,\n+ pref: Record,\n+ fields?: TextSplitterParams\n+ ) {\n+ super(fields);\n+ this.conn = conn;\n+ this.pref = pref;\n+ }\n+\n+ async splitText(text: string) {\n+ const chunks: string[] = [];\n+\n+ const result = await this.conn.execute(\n+ (\n+ `select t.column_value as data from dbms_vector_chain.utl_to_chunks(:content, json(:pref)) t`\n+ ),\n+ {\n+ content: { val: text, dir: oracledb.BIND_IN, type: oracledb.CLOB },\n+ pref: JSON.stringify(this.pref),", - "comment_created_at": "2025-05-21T03:31:55+00:00", - "comment_author": "cjbj", - "comment_body": "What DBs are being targeted? Can you bind JSON as DB_TYPE_JSON? DItto other places JSON is used.", - "pr_file_module": null - }, - { - "comment_id": "2100349110", - "repo_full_name": "langchain-ai/langchainjs", - "pr_number": 8007, - "pr_file": "libs/langchain-textsplitters/src/oracle_text_splitter.ts", - "discussion_id": "2099231081", - "commented_code": "@@ -0,0 +1,56 @@\n+import oracledb from \"oracledb\";\n+import { TextSplitter, TextSplitterParams } from \"./text_splitter.js\";\n+\n+/**\n+ * Split text into smaller pieces\n+ * @example\n+ * ```typescript\n+ * const splitter = new OracleTextSplitter(conn, params);\n+ * let chunks = await splitter.splitText(doc.pageContent);\n+ * ```\n+ */\n+export class OracleTextSplitter extends TextSplitter {\n+ protected conn: oracledb.Connection;\n+\n+ protected pref: Record;\n+\n+ static lc_name() {\n+ return \"OracleTextSplitter\";\n+ }\n+\n+ constructor(\n+ conn: oracledb.Connection,\n+ pref: Record,\n+ fields?: TextSplitterParams\n+ ) {\n+ super(fields);\n+ this.conn = conn;\n+ this.pref = pref;\n+ }\n+\n+ async splitText(text: string) {\n+ const chunks: string[] = [];\n+\n+ const result = await this.conn.execute(\n+ (\n+ `select t.column_value as data from dbms_vector_chain.utl_to_chunks(:content, json(:pref)) t`\n+ ),\n+ {\n+ content: { val: text, dir: oracledb.BIND_IN, type: oracledb.CLOB },\n+ pref: JSON.stringify(this.pref),", - "comment_created_at": "2025-05-21T13:47:56+00:00", - "comment_author": "hackerdave", - "comment_body": "I'm not sure if we were clear in the docs but the langchain integration is targeting Oracle Database 23ai which is the latest version. The vector features will only work on 23. Yes we could have used DB_TYPE_JSON which will work from 21 but in this case we are passing as a string.", - "pr_file_module": null - }, - { - "comment_id": "2101324757", - "repo_full_name": "langchain-ai/langchainjs", - "pr_number": 8007, - "pr_file": "libs/langchain-textsplitters/src/oracle_text_splitter.ts", - "discussion_id": "2099231081", - "commented_code": "@@ -0,0 +1,56 @@\n+import oracledb from \"oracledb\";\n+import { TextSplitter, TextSplitterParams } from \"./text_splitter.js\";\n+\n+/**\n+ * Split text into smaller pieces\n+ * @example\n+ * ```typescript\n+ * const splitter = new OracleTextSplitter(conn, params);\n+ * let chunks = await splitter.splitText(doc.pageContent);\n+ * ```\n+ */\n+export class OracleTextSplitter extends TextSplitter {\n+ protected conn: oracledb.Connection;\n+\n+ protected pref: Record;\n+\n+ static lc_name() {\n+ return \"OracleTextSplitter\";\n+ }\n+\n+ constructor(\n+ conn: oracledb.Connection,\n+ pref: Record,\n+ fields?: TextSplitterParams\n+ ) {\n+ super(fields);\n+ this.conn = conn;\n+ this.pref = pref;\n+ }\n+\n+ async splitText(text: string) {\n+ const chunks: string[] = [];\n+\n+ const result = await this.conn.execute(\n+ (\n+ `select t.column_value as data from dbms_vector_chain.utl_to_chunks(:content, json(:pref)) t`\n+ ),\n+ {\n+ content: { val: text, dir: oracledb.BIND_IN, type: oracledb.CLOB },\n+ pref: JSON.stringify(this.pref),", - "comment_created_at": "2025-05-21T23:02:07+00:00", - "comment_author": "cjbj", - "comment_body": "Binding as DB_TYPE_JSON where possible will be more efficient.", - "pr_file_module": null - }, - { - "comment_id": "2101476268", - "repo_full_name": "langchain-ai/langchainjs", - "pr_number": 8007, - "pr_file": "libs/langchain-textsplitters/src/oracle_text_splitter.ts", - "discussion_id": "2099231081", - "commented_code": "@@ -0,0 +1,56 @@\n+import oracledb from \"oracledb\";\n+import { TextSplitter, TextSplitterParams } from \"./text_splitter.js\";\n+\n+/**\n+ * Split text into smaller pieces\n+ * @example\n+ * ```typescript\n+ * const splitter = new OracleTextSplitter(conn, params);\n+ * let chunks = await splitter.splitText(doc.pageContent);\n+ * ```\n+ */\n+export class OracleTextSplitter extends TextSplitter {\n+ protected conn: oracledb.Connection;\n+\n+ protected pref: Record;\n+\n+ static lc_name() {\n+ return \"OracleTextSplitter\";\n+ }\n+\n+ constructor(\n+ conn: oracledb.Connection,\n+ pref: Record,\n+ fields?: TextSplitterParams\n+ ) {\n+ super(fields);\n+ this.conn = conn;\n+ this.pref = pref;\n+ }\n+\n+ async splitText(text: string) {\n+ const chunks: string[] = [];\n+\n+ const result = await this.conn.execute(\n+ (\n+ `select t.column_value as data from dbms_vector_chain.utl_to_chunks(:content, json(:pref)) t`\n+ ),\n+ {\n+ content: { val: text, dir: oracledb.BIND_IN, type: oracledb.CLOB },\n+ pref: JSON.stringify(this.pref),", - "comment_created_at": "2025-05-22T02:00:59+00:00", - "comment_author": "hackerdave", - "comment_body": "Switched to DB_TYPE_JSON", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1752684909", - "pr_number": 6676, - "pr_file": "langchain/src/retrievers/recency_ranked.ts", - "created_at": "2024-09-10T20:31:26+00:00", - "commented_code": "import { BaseRetriever } from \"@langchain/core/retrievers\";\nimport { VectorStoreInterface } from \"@langchain/core/vectorstores\";\nimport { Document } from \"@langchain/core/documents\";\n\nexport interface RecencyRankedRetrieverConfig {\n vectorStore: VectorStoreInterface;\n k: number;\n topK?: number;\n recencyWeight: number;\n}\n\nexport class RecencyRankedRetriever extends BaseRetriever {\n static lc_name() {\n return \"RecencyRankedRetriever\";\n }\n\n lc_namespace = [\"langchain\", \"retrievers\", \"recency_ranked\"];\n \n private vectorStore: VectorStoreInterface;\n\n private k: number;\n\n private topK: number;\n\n private recencyWeight: number;\n\n constructor(config: RecencyRankedRetrieverConfig) {\n super();\n this.vectorStore = config.vectorStore;\n this.k = config.k;\n this.topK = config.topK ?? config.k;\n this.recencyWeight = config.recencyWeight;\n }\n\n async _getRelevantDocuments(query: string): Promise {\n const relevantDocs = await this.vectorStore.similaritySearchWithScore(query, this.k);\n const rerankedDocs = this.recentDocumentRanker(relevantDocs, this.topK, this.recencyWeight);\n return rerankedDocs.map(([doc, _]) => doc);\n }\n\n async invoke(query: string): Promise {\n return this._getRelevantDocuments(query);\n }\n\n private recentDocumentRanker(\n documents: [Document, number][],\n topK: number,\n recencyWeight: number\n ): [Document, number][] {\n if (documents.length === 0) return [];\n\n if (!documents.every(([doc, _]) => doc.metadata.date instanceof Date)) {", - "repo_full_name": "langchain-ai/langchainjs", - "discussion_comments": [ - { - "comment_id": "1752684909", - "repo_full_name": "langchain-ai/langchainjs", - "pr_number": 6676, - "pr_file": "langchain/src/retrievers/recency_ranked.ts", - "discussion_id": "1752684909", - "commented_code": "@@ -0,0 +1,78 @@\n+import { BaseRetriever } from \"@langchain/core/retrievers\";\n+import { VectorStoreInterface } from \"@langchain/core/vectorstores\";\n+import { Document } from \"@langchain/core/documents\";\n+\n+export interface RecencyRankedRetrieverConfig {\n+ vectorStore: VectorStoreInterface;\n+ k: number;\n+ topK?: number;\n+ recencyWeight: number;\n+}\n+\n+export class RecencyRankedRetriever extends BaseRetriever {\n+ static lc_name() {\n+ return \"RecencyRankedRetriever\";\n+ }\n+\n+ lc_namespace = [\"langchain\", \"retrievers\", \"recency_ranked\"];\n+ \n+ private vectorStore: VectorStoreInterface;\n+\n+ private k: number;\n+\n+ private topK: number;\n+\n+ private recencyWeight: number;\n+\n+ constructor(config: RecencyRankedRetrieverConfig) {\n+ super();\n+ this.vectorStore = config.vectorStore;\n+ this.k = config.k;\n+ this.topK = config.topK ?? config.k;\n+ this.recencyWeight = config.recencyWeight;\n+ }\n+\n+ async _getRelevantDocuments(query: string): Promise {\n+ const relevantDocs = await this.vectorStore.similaritySearchWithScore(query, this.k);\n+ const rerankedDocs = this.recentDocumentRanker(relevantDocs, this.topK, this.recencyWeight);\n+ return rerankedDocs.map(([doc, _]) => doc);\n+ }\n+\n+ async invoke(query: string): Promise {\n+ return this._getRelevantDocuments(query);\n+ }\n+\n+ private recentDocumentRanker(\n+ documents: [Document, number][],\n+ topK: number,\n+ recencyWeight: number\n+ ): [Document, number][] {\n+ if (documents.length === 0) return [];\n+\n+ if (!documents.every(([doc, _]) => doc.metadata.date instanceof Date)) {", - "comment_created_at": "2024-09-10T20:31:26+00:00", - "comment_author": "jacoblee93", - "comment_body": "Would suggest storing this as an ISO string - not all (most?) vector stores will not deserialize dates", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2004667909", - "pr_number": 7852, - "pr_file": "libs/langchain-google-cloud-sql-pg/src/vectorStore.ts", - "created_at": "2025-03-20T02:08:43+00:00", - "commented_code": "import { EmbeddingsInterface } from \"@langchain/core/embeddings\";\nimport { MaxMarginalRelevanceSearchOptions, VectorStore } from \"@langchain/core/vectorstores\";\nimport { Document } from \"@langchain/core/documents\";\nimport { v4 as uuidv4 } from \"uuid\";\nimport { maximalMarginalRelevance } from \"@langchain/core/utils/math\";\nimport { DEFAULT_DISTANCE_STRATEGY, DistanceStrategy, QueryOptions } from \"./indexes.js\";\nimport PostgresEngine from \"./engine.js\";\nimport { customZip } from \"./utils/utils.js\";\n\nexport interface PostgresVectorStoreArgs {\n schemaName?: string,\n contentColumn?: string,\n embeddingColumn?: string,\n metadataColumns?: Array,\n idColumn?: string,\n distanceStrategy?: DistanceStrategy,\n k?: number,\n fetchK?: number,\n lambdaMult?: number,\n ignoreMetadataColumns?: Array,\n metadataJsonColumn?: string,\n indexQueryOptions?: QueryOptions\n}\n\nexport interface dbConfigArgs {\n engine: PostgresEngine;\n tableName: string;\n dbConfig?: PostgresVectorStoreArgs;\n}\n\nexport class PostgresVectorStore extends VectorStore {\n declare FilterType: string;\n\n engine: PostgresEngine;\n\n embeddings: EmbeddingsInterface;\n\n tableName: string;\n\n schemaName: string;\n\n contentColumn: string;\n\n embeddingColumn: string;\n\n metadataColumns: Array;\n\n ignoreMetadataColumns: Array;\n\n idColumn: string;\n\n metadataJsonColumn: string;\n\n distanceStrategy: DistanceStrategy;\n\n k: number;\n\n fetchK: number;\n\n lambdaMult: number;\n\n indexQueryOptions: QueryOptions;\n\n /**\n * Initializes a new vector store with embeddings and database configuration.\n *\n * @param embeddings - Instance of `EmbeddingsInterface` used to embed queries.\n * @param dbConfig - Configuration settings for the database or storage system.\n */\n constructor(embeddings: EmbeddingsInterface, dbConfig: Record) {\n super(embeddings, dbConfig);\n this.embeddings = embeddings;\n this.engine = dbConfig.engine;\n this.tableName = dbConfig.tableName;\n this.schemaName = dbConfig.schemaName;\n this.contentColumn = dbConfig.contentColumn;\n this.embeddingColumn = dbConfig.embeddingColumn;\n this.metadataColumns = dbConfig.metadataColumns ? dbConfig.metadataColumns : [];\n this.ignoreMetadataColumns = dbConfig.ignoreMetadataColumns;\n this.idColumn = dbConfig.idColumn;\n this.metadataJsonColumn = dbConfig.metadataJsonColumn;\n this.distanceStrategy = dbConfig.distanceStrategy;\n this.k = dbConfig.k;\n this.fetchK = dbConfig.fetchK;\n this.lambdaMult = dbConfig.lambdaMult;\n this.indexQueryOptions = dbConfig.indexQueryOptions;\n }\n\n /**\n * Create a new PostgresVectorStore instance.\n * @param {PostgresEngine} engine Required - Connection pool engine for managing connections to Cloud SQL for PostgreSQL database.\n * @param {Embeddings} embeddings Required - Text embedding model to use.\n * @param {string} tableName Required - Name of an existing table or table to be created.\n * @param {string} schemaName Database schema name of the table. Defaults to \"public\".\n * @param {string} contentColumn Column that represent a Document's page_content. Defaults to \"content\".\n * @param {string} embeddingColumn Column for embedding vectors. The embedding is generated from the document value. Defaults to \"embedding\".\n * @param {Array} metadataColumns Column(s) that represent a document's metadata.\n * @param {Array} ignoreMetadataColumns Optional - Column(s) to ignore in pre-existing tables for a document's metadata. Can not be used with metadata_columns.\n * @param {string} idColumn Column that represents the Document's id. Defaults to \"langchain_id\".\n * @param {string} metadataJsonColumn Optional - Column to store metadata as JSON. Defaults to \"langchain_metadata\".\n * @param {DistanceStrategy} distanceStrategy Distance strategy to use for vector similarity search. Defaults to COSINE_DISTANCE.\n * @param {number} k Number of Documents to return from search. Defaults to 4.\n * @param {number} fetchK Number of Documents to fetch to pass to MMR algorithm.\n * @param {number} lambdaMult Number between 0 and 1 that determines the degree of diversity among the results with 0 corresponding to maximum diversity and 1 to minimum diversity. Defaults to 0.5.\n * @param {QueryOptions} indexQueryOptions Optional - Index query option.\n * @returns PostgresVectorStore instance.\n */\n static async create(\n engine: PostgresEngine,\n embeddings: EmbeddingsInterface,\n tableName: string,\n {\n schemaName = \"public\",\n contentColumn = \"content\",\n embeddingColumn = \"embedding\",\n metadataColumns = [],\n ignoreMetadataColumns,\n idColumn = \"langchain_id\",\n metadataJsonColumn = \"langchain_metadata\",\n distanceStrategy = DEFAULT_DISTANCE_STRATEGY,\n k = 4,\n fetchK = 20,\n lambdaMult = 0.5,\n indexQueryOptions\n }: PostgresVectorStoreArgs = {}\n ): Promise {\n\n if (metadataColumns !== undefined && ignoreMetadataColumns !== undefined) {\n throw Error(\"Can not use both metadata_columns and ignore_metadata_columns.\");\n }\n\n const { rows } = await engine.pool.raw(`SELECT column_name, data_type FROM information_schema.columns WHERE table_name = '${tableName}' AND table_schema = '${schemaName}'`);\n const columns: { [key: string] } = {};\n\n for (const index in rows) {\n if (rows[index]) {\n const row = rows[index];\n columns[row.column_name] = row.data_type\n }\n }\n\n if (!Object.prototype.hasOwnProperty.call(columns, idColumn)) {\n throw Error(`Id column: ${idColumn}, does not exist.`);\n }\n\n if (!Object.prototype.hasOwnProperty.call(columns, contentColumn)) {\n throw Error(`Content column: ${contentColumn}, does not exist.`);\n }\n\n const contentType = columns[contentColumn];\n\n if (contentType !== \"text\" && !contentType.includes(\"char\")) {\n throw Error(`Content column: ${contentColumn}, is type: ${contentType}. It must be a type of character string.`);\n }\n\n if (!Object.prototype.hasOwnProperty.call(columns, embeddingColumn)) {\n throw Error(`Embedding column: ${embeddingColumn}, does not exist.`);\n }\n\n if (columns[embeddingColumn] !== \"USER-DEFINED\") {\n throw Error(`Embedding column: ${embeddingColumn} is not of type Vector.`);\n }\n\n const jsonColumn = Object.prototype.hasOwnProperty.call(columns, metadataJsonColumn) ? metadataJsonColumn : \"\";\n\n for (const column of metadataColumns) {\n if (!Object.prototype.hasOwnProperty.call(columns, column)) {\n throw Error(`Metadata column: ${column}, does not exist.`);\n }\n }\n\n const allColumns = columns;\n let allMetadataColumns: string[];\n if (ignoreMetadataColumns !== undefined && ignoreMetadataColumns.length > 0) {\n for (const column of ignoreMetadataColumns) {\n delete allColumns[column];\n }\n\n delete allColumns[idColumn];\n delete allColumns[contentColumn];\n delete allColumns[embeddingColumn];\n allMetadataColumns = Object.keys(allColumns);\n }\n return new PostgresVectorStore(\n embeddings,\n {\n engine,\n tableName,\n schemaName,\n contentColumn,\n embeddingColumn,\n metadataColumns: allMetadataColumns,\n ignoreMetadataColumns,\n idColumn,\n metadataJsonColumn: jsonColumn,\n distanceStrategy,\n k,\n fetchK,\n lambdaMult,\n indexQueryOptions\n }\n )\n }\n\n static async fromTexts(texts: string[], metadatas: object[] | object, embeddings: EmbeddingsInterface, dbConfig: dbConfigArgs): Promise {\n const documents: Document[] = [];\n\n for (let i = 0; i < texts.length; i += 1) {\n const doc = new Document({\n pageContent: texts[i],\n metadata: Array.isArray(metadatas) ? metadatas[i] : metadatas\n })\n documents.push(doc);\n }\n\n return PostgresVectorStore.fromDocuments(documents, embeddings, dbConfig);\n }\n\n static async fromDocuments(docs: Document[], embeddings: EmbeddingsInterface, dbConfig: dbConfigArgs): Promise {\n const { engine } = dbConfig;\n const { tableName } = dbConfig;\n const config = dbConfig.dbConfig;\n const vectorStore = await this.create(engine, embeddings, tableName, config);\n\n await vectorStore.addDocuments(docs)\n\n return vectorStore;\n }\n\n async addVectors(vectors: number[][], documents: Document[], options?: { ids?: string[] }): Promise {\n let ids: string[] = [];\n const metadatas: Record[] = []\n\n if (vectors.length !== documents.length) {\n throw new Error(\"The number of vectors must match the number of documents provided.\");\n }\n\n if (options?.ids) {\n ids = options.ids;\n } else {\n documents.forEach(document => {\n if (document.id !== undefined) {\n ids.push(document.id);\n } else {\n ids.push(uuidv4());\n }\n });\n }\n\n if (options && options.ids && options.ids.length !== documents.length) {\n throw new Error(\"The number of ids must match the number of documents provided.\");\n }\n\n documents.forEach(document => {\n metadatas.push(document.metadata)\n });\n\n const tuples = customZip(ids, documents, vectors, metadatas);\n\n // Insert embeddings\n for (const [id, document, embedding, metadata] of tuples) {\n const metadataColNames = this.metadataColumns.length > 0 ? `, \"${this.metadataColumns.join(\"\\\",\\\"\")}\"` : \"\";\n\n let stmt = `INSERT INTO \"${this.schemaName}\".\"${this.tableName}\"(\"${this.idColumn}\", \"${this.contentColumn}\", \"${this.embeddingColumn}\" ${metadataColNames}`\n const values: { [key: string] } = {\n id,\n content: document.pageContent,\n embedding: `[${embedding.toString()}]`\n }\n let valuesStmt = \" VALUES (:id, :content, :embedding\";\n\n // Add metadata\n const extra = metadata;\n for (const metadataColumn of this.metadataColumns) {\n if (Object.prototype.hasOwnProperty.call(metadata, metadataColumn)) {\n valuesStmt += `, :${metadataColumn}`;\n values[metadataColumn] = metadata[metadataColumn]\n delete extra[metadataColumn]\n } else {\n valuesStmt += \" ,null\"\n }\n }\n\n // Add JSON column and/or close statement\n stmt += this.metadataJsonColumn ? `, ${this.metadataJsonColumn})` : \")\";", - "repo_full_name": "langchain-ai/langchainjs", - "discussion_comments": [ - { - "comment_id": "2004667909", - "repo_full_name": "langchain-ai/langchainjs", - "pr_number": 7852, - "pr_file": "libs/langchain-google-cloud-sql-pg/src/vectorStore.ts", - "discussion_id": "2004667909", - "commented_code": "@@ -0,0 +1,423 @@\n+import { EmbeddingsInterface } from \"@langchain/core/embeddings\";\n+import { MaxMarginalRelevanceSearchOptions, VectorStore } from \"@langchain/core/vectorstores\";\n+import { Document } from \"@langchain/core/documents\";\n+import { v4 as uuidv4 } from \"uuid\";\n+import { maximalMarginalRelevance } from \"@langchain/core/utils/math\";\n+import { DEFAULT_DISTANCE_STRATEGY, DistanceStrategy, QueryOptions } from \"./indexes.js\";\n+import PostgresEngine from \"./engine.js\";\n+import { customZip } from \"./utils/utils.js\";\n+\n+export interface PostgresVectorStoreArgs {\n+ schemaName?: string,\n+ contentColumn?: string,\n+ embeddingColumn?: string,\n+ metadataColumns?: Array,\n+ idColumn?: string,\n+ distanceStrategy?: DistanceStrategy,\n+ k?: number,\n+ fetchK?: number,\n+ lambdaMult?: number,\n+ ignoreMetadataColumns?: Array,\n+ metadataJsonColumn?: string,\n+ indexQueryOptions?: QueryOptions\n+}\n+\n+export interface dbConfigArgs {\n+ engine: PostgresEngine;\n+ tableName: string;\n+ dbConfig?: PostgresVectorStoreArgs;\n+}\n+\n+export class PostgresVectorStore extends VectorStore {\n+ declare FilterType: string;\n+\n+ engine: PostgresEngine;\n+\n+ embeddings: EmbeddingsInterface;\n+\n+ tableName: string;\n+\n+ schemaName: string;\n+\n+ contentColumn: string;\n+\n+ embeddingColumn: string;\n+\n+ metadataColumns: Array;\n+\n+ ignoreMetadataColumns: Array;\n+\n+ idColumn: string;\n+\n+ metadataJsonColumn: string;\n+\n+ distanceStrategy: DistanceStrategy;\n+\n+ k: number;\n+\n+ fetchK: number;\n+\n+ lambdaMult: number;\n+\n+ indexQueryOptions: QueryOptions;\n+\n+ /**\n+ * Initializes a new vector store with embeddings and database configuration.\n+ *\n+ * @param embeddings - Instance of `EmbeddingsInterface` used to embed queries.\n+ * @param dbConfig - Configuration settings for the database or storage system.\n+ */\n+ constructor(embeddings: EmbeddingsInterface, dbConfig: Record) {\n+ super(embeddings, dbConfig);\n+ this.embeddings = embeddings;\n+ this.engine = dbConfig.engine;\n+ this.tableName = dbConfig.tableName;\n+ this.schemaName = dbConfig.schemaName;\n+ this.contentColumn = dbConfig.contentColumn;\n+ this.embeddingColumn = dbConfig.embeddingColumn;\n+ this.metadataColumns = dbConfig.metadataColumns ? dbConfig.metadataColumns : [];\n+ this.ignoreMetadataColumns = dbConfig.ignoreMetadataColumns;\n+ this.idColumn = dbConfig.idColumn;\n+ this.metadataJsonColumn = dbConfig.metadataJsonColumn;\n+ this.distanceStrategy = dbConfig.distanceStrategy;\n+ this.k = dbConfig.k;\n+ this.fetchK = dbConfig.fetchK;\n+ this.lambdaMult = dbConfig.lambdaMult;\n+ this.indexQueryOptions = dbConfig.indexQueryOptions;\n+ }\n+\n+ /**\n+ * Create a new PostgresVectorStore instance.\n+ * @param {PostgresEngine} engine Required - Connection pool engine for managing connections to Cloud SQL for PostgreSQL database.\n+ * @param {Embeddings} embeddings Required - Text embedding model to use.\n+ * @param {string} tableName Required - Name of an existing table or table to be created.\n+ * @param {string} schemaName Database schema name of the table. Defaults to \"public\".\n+ * @param {string} contentColumn Column that represent a Document's page_content. Defaults to \"content\".\n+ * @param {string} embeddingColumn Column for embedding vectors. The embedding is generated from the document value. Defaults to \"embedding\".\n+ * @param {Array} metadataColumns Column(s) that represent a document's metadata.\n+ * @param {Array} ignoreMetadataColumns Optional - Column(s) to ignore in pre-existing tables for a document's metadata. Can not be used with metadata_columns.\n+ * @param {string} idColumn Column that represents the Document's id. Defaults to \"langchain_id\".\n+ * @param {string} metadataJsonColumn Optional - Column to store metadata as JSON. Defaults to \"langchain_metadata\".\n+ * @param {DistanceStrategy} distanceStrategy Distance strategy to use for vector similarity search. Defaults to COSINE_DISTANCE.\n+ * @param {number} k Number of Documents to return from search. Defaults to 4.\n+ * @param {number} fetchK Number of Documents to fetch to pass to MMR algorithm.\n+ * @param {number} lambdaMult Number between 0 and 1 that determines the degree of diversity among the results with 0 corresponding to maximum diversity and 1 to minimum diversity. Defaults to 0.5.\n+ * @param {QueryOptions} indexQueryOptions Optional - Index query option.\n+ * @returns PostgresVectorStore instance.\n+ */\n+ static async create(\n+ engine: PostgresEngine,\n+ embeddings: EmbeddingsInterface,\n+ tableName: string,\n+ {\n+ schemaName = \"public\",\n+ contentColumn = \"content\",\n+ embeddingColumn = \"embedding\",\n+ metadataColumns = [],\n+ ignoreMetadataColumns,\n+ idColumn = \"langchain_id\",\n+ metadataJsonColumn = \"langchain_metadata\",\n+ distanceStrategy = DEFAULT_DISTANCE_STRATEGY,\n+ k = 4,\n+ fetchK = 20,\n+ lambdaMult = 0.5,\n+ indexQueryOptions\n+ }: PostgresVectorStoreArgs = {}\n+ ): Promise {\n+\n+ if (metadataColumns !== undefined && ignoreMetadataColumns !== undefined) {\n+ throw Error(\"Can not use both metadata_columns and ignore_metadata_columns.\");\n+ }\n+\n+ const { rows } = await engine.pool.raw(`SELECT column_name, data_type FROM information_schema.columns WHERE table_name = '${tableName}' AND table_schema = '${schemaName}'`);\n+ const columns: { [key: string] } = {};\n+\n+ for (const index in rows) {\n+ if (rows[index]) {\n+ const row = rows[index];\n+ columns[row.column_name] = row.data_type\n+ }\n+ }\n+\n+ if (!Object.prototype.hasOwnProperty.call(columns, idColumn)) {\n+ throw Error(`Id column: ${idColumn}, does not exist.`);\n+ }\n+\n+ if (!Object.prototype.hasOwnProperty.call(columns, contentColumn)) {\n+ throw Error(`Content column: ${contentColumn}, does not exist.`);\n+ }\n+\n+ const contentType = columns[contentColumn];\n+\n+ if (contentType !== \"text\" && !contentType.includes(\"char\")) {\n+ throw Error(`Content column: ${contentColumn}, is type: ${contentType}. It must be a type of character string.`);\n+ }\n+\n+ if (!Object.prototype.hasOwnProperty.call(columns, embeddingColumn)) {\n+ throw Error(`Embedding column: ${embeddingColumn}, does not exist.`);\n+ }\n+\n+ if (columns[embeddingColumn] !== \"USER-DEFINED\") {\n+ throw Error(`Embedding column: ${embeddingColumn} is not of type Vector.`);\n+ }\n+\n+ const jsonColumn = Object.prototype.hasOwnProperty.call(columns, metadataJsonColumn) ? metadataJsonColumn : \"\";\n+\n+ for (const column of metadataColumns) {\n+ if (!Object.prototype.hasOwnProperty.call(columns, column)) {\n+ throw Error(`Metadata column: ${column}, does not exist.`);\n+ }\n+ }\n+\n+ const allColumns = columns;\n+ let allMetadataColumns: string[];\n+ if (ignoreMetadataColumns !== undefined && ignoreMetadataColumns.length > 0) {\n+ for (const column of ignoreMetadataColumns) {\n+ delete allColumns[column];\n+ }\n+\n+ delete allColumns[idColumn];\n+ delete allColumns[contentColumn];\n+ delete allColumns[embeddingColumn];\n+ allMetadataColumns = Object.keys(allColumns);\n+ }\n+ return new PostgresVectorStore(\n+ embeddings,\n+ {\n+ engine,\n+ tableName,\n+ schemaName,\n+ contentColumn,\n+ embeddingColumn,\n+ metadataColumns: allMetadataColumns,\n+ ignoreMetadataColumns,\n+ idColumn,\n+ metadataJsonColumn: jsonColumn,\n+ distanceStrategy,\n+ k,\n+ fetchK,\n+ lambdaMult,\n+ indexQueryOptions\n+ }\n+ )\n+ }\n+\n+ static async fromTexts(texts: string[], metadatas: object[] | object, embeddings: EmbeddingsInterface, dbConfig: dbConfigArgs): Promise {\n+ const documents: Document[] = [];\n+\n+ for (let i = 0; i < texts.length; i += 1) {\n+ const doc = new Document({\n+ pageContent: texts[i],\n+ metadata: Array.isArray(metadatas) ? metadatas[i] : metadatas\n+ })\n+ documents.push(doc);\n+ }\n+\n+ return PostgresVectorStore.fromDocuments(documents, embeddings, dbConfig);\n+ }\n+\n+ static async fromDocuments(docs: Document[], embeddings: EmbeddingsInterface, dbConfig: dbConfigArgs): Promise {\n+ const { engine } = dbConfig;\n+ const { tableName } = dbConfig;\n+ const config = dbConfig.dbConfig;\n+ const vectorStore = await this.create(engine, embeddings, tableName, config);\n+\n+ await vectorStore.addDocuments(docs)\n+\n+ return vectorStore;\n+ }\n+\n+ async addVectors(vectors: number[][], documents: Document[], options?: { ids?: string[] }): Promise {\n+ let ids: string[] = [];\n+ const metadatas: Record[] = []\n+\n+ if (vectors.length !== documents.length) {\n+ throw new Error(\"The number of vectors must match the number of documents provided.\");\n+ }\n+\n+ if (options?.ids) {\n+ ids = options.ids;\n+ } else {\n+ documents.forEach(document => {\n+ if (document.id !== undefined) {\n+ ids.push(document.id);\n+ } else {\n+ ids.push(uuidv4());\n+ }\n+ });\n+ }\n+\n+ if (options && options.ids && options.ids.length !== documents.length) {\n+ throw new Error(\"The number of ids must match the number of documents provided.\");\n+ }\n+\n+ documents.forEach(document => {\n+ metadatas.push(document.metadata)\n+ });\n+\n+ const tuples = customZip(ids, documents, vectors, metadatas);\n+\n+ // Insert embeddings\n+ for (const [id, document, embedding, metadata] of tuples) {\n+ const metadataColNames = this.metadataColumns.length > 0 ? `, \"${this.metadataColumns.join(\"\\\",\\\"\")}\"` : \"\";\n+\n+ let stmt = `INSERT INTO \"${this.schemaName}\".\"${this.tableName}\"(\"${this.idColumn}\", \"${this.contentColumn}\", \"${this.embeddingColumn}\" ${metadataColNames}`\n+ const values: { [key: string] } = {\n+ id,\n+ content: document.pageContent,\n+ embedding: `[${embedding.toString()}]`\n+ }\n+ let valuesStmt = \" VALUES (:id, :content, :embedding\";\n+\n+ // Add metadata\n+ const extra = metadata;\n+ for (const metadataColumn of this.metadataColumns) {\n+ if (Object.prototype.hasOwnProperty.call(metadata, metadataColumn)) {\n+ valuesStmt += `, :${metadataColumn}`;\n+ values[metadataColumn] = metadata[metadataColumn]\n+ delete extra[metadataColumn]\n+ } else {\n+ valuesStmt += \" ,null\"\n+ }\n+ }\n+\n+ // Add JSON column and/or close statement\n+ stmt += this.metadataJsonColumn ? `, ${this.metadataJsonColumn})` : \")\";", - "comment_created_at": "2025-03-20T02:08:43+00:00", - "comment_author": "jacoblee93", - "comment_body": "OOC why not just support `metadataJsonColumn` vs spreading metadata into other columns?", - "pr_file_module": null - }, - { - "comment_id": "2008274415", - "repo_full_name": "langchain-ai/langchainjs", - "pr_number": 7852, - "pr_file": "libs/langchain-google-cloud-sql-pg/src/vectorStore.ts", - "discussion_id": "2004667909", - "commented_code": "@@ -0,0 +1,423 @@\n+import { EmbeddingsInterface } from \"@langchain/core/embeddings\";\n+import { MaxMarginalRelevanceSearchOptions, VectorStore } from \"@langchain/core/vectorstores\";\n+import { Document } from \"@langchain/core/documents\";\n+import { v4 as uuidv4 } from \"uuid\";\n+import { maximalMarginalRelevance } from \"@langchain/core/utils/math\";\n+import { DEFAULT_DISTANCE_STRATEGY, DistanceStrategy, QueryOptions } from \"./indexes.js\";\n+import PostgresEngine from \"./engine.js\";\n+import { customZip } from \"./utils/utils.js\";\n+\n+export interface PostgresVectorStoreArgs {\n+ schemaName?: string,\n+ contentColumn?: string,\n+ embeddingColumn?: string,\n+ metadataColumns?: Array,\n+ idColumn?: string,\n+ distanceStrategy?: DistanceStrategy,\n+ k?: number,\n+ fetchK?: number,\n+ lambdaMult?: number,\n+ ignoreMetadataColumns?: Array,\n+ metadataJsonColumn?: string,\n+ indexQueryOptions?: QueryOptions\n+}\n+\n+export interface dbConfigArgs {\n+ engine: PostgresEngine;\n+ tableName: string;\n+ dbConfig?: PostgresVectorStoreArgs;\n+}\n+\n+export class PostgresVectorStore extends VectorStore {\n+ declare FilterType: string;\n+\n+ engine: PostgresEngine;\n+\n+ embeddings: EmbeddingsInterface;\n+\n+ tableName: string;\n+\n+ schemaName: string;\n+\n+ contentColumn: string;\n+\n+ embeddingColumn: string;\n+\n+ metadataColumns: Array;\n+\n+ ignoreMetadataColumns: Array;\n+\n+ idColumn: string;\n+\n+ metadataJsonColumn: string;\n+\n+ distanceStrategy: DistanceStrategy;\n+\n+ k: number;\n+\n+ fetchK: number;\n+\n+ lambdaMult: number;\n+\n+ indexQueryOptions: QueryOptions;\n+\n+ /**\n+ * Initializes a new vector store with embeddings and database configuration.\n+ *\n+ * @param embeddings - Instance of `EmbeddingsInterface` used to embed queries.\n+ * @param dbConfig - Configuration settings for the database or storage system.\n+ */\n+ constructor(embeddings: EmbeddingsInterface, dbConfig: Record) {\n+ super(embeddings, dbConfig);\n+ this.embeddings = embeddings;\n+ this.engine = dbConfig.engine;\n+ this.tableName = dbConfig.tableName;\n+ this.schemaName = dbConfig.schemaName;\n+ this.contentColumn = dbConfig.contentColumn;\n+ this.embeddingColumn = dbConfig.embeddingColumn;\n+ this.metadataColumns = dbConfig.metadataColumns ? dbConfig.metadataColumns : [];\n+ this.ignoreMetadataColumns = dbConfig.ignoreMetadataColumns;\n+ this.idColumn = dbConfig.idColumn;\n+ this.metadataJsonColumn = dbConfig.metadataJsonColumn;\n+ this.distanceStrategy = dbConfig.distanceStrategy;\n+ this.k = dbConfig.k;\n+ this.fetchK = dbConfig.fetchK;\n+ this.lambdaMult = dbConfig.lambdaMult;\n+ this.indexQueryOptions = dbConfig.indexQueryOptions;\n+ }\n+\n+ /**\n+ * Create a new PostgresVectorStore instance.\n+ * @param {PostgresEngine} engine Required - Connection pool engine for managing connections to Cloud SQL for PostgreSQL database.\n+ * @param {Embeddings} embeddings Required - Text embedding model to use.\n+ * @param {string} tableName Required - Name of an existing table or table to be created.\n+ * @param {string} schemaName Database schema name of the table. Defaults to \"public\".\n+ * @param {string} contentColumn Column that represent a Document's page_content. Defaults to \"content\".\n+ * @param {string} embeddingColumn Column for embedding vectors. The embedding is generated from the document value. Defaults to \"embedding\".\n+ * @param {Array} metadataColumns Column(s) that represent a document's metadata.\n+ * @param {Array} ignoreMetadataColumns Optional - Column(s) to ignore in pre-existing tables for a document's metadata. Can not be used with metadata_columns.\n+ * @param {string} idColumn Column that represents the Document's id. Defaults to \"langchain_id\".\n+ * @param {string} metadataJsonColumn Optional - Column to store metadata as JSON. Defaults to \"langchain_metadata\".\n+ * @param {DistanceStrategy} distanceStrategy Distance strategy to use for vector similarity search. Defaults to COSINE_DISTANCE.\n+ * @param {number} k Number of Documents to return from search. Defaults to 4.\n+ * @param {number} fetchK Number of Documents to fetch to pass to MMR algorithm.\n+ * @param {number} lambdaMult Number between 0 and 1 that determines the degree of diversity among the results with 0 corresponding to maximum diversity and 1 to minimum diversity. Defaults to 0.5.\n+ * @param {QueryOptions} indexQueryOptions Optional - Index query option.\n+ * @returns PostgresVectorStore instance.\n+ */\n+ static async create(\n+ engine: PostgresEngine,\n+ embeddings: EmbeddingsInterface,\n+ tableName: string,\n+ {\n+ schemaName = \"public\",\n+ contentColumn = \"content\",\n+ embeddingColumn = \"embedding\",\n+ metadataColumns = [],\n+ ignoreMetadataColumns,\n+ idColumn = \"langchain_id\",\n+ metadataJsonColumn = \"langchain_metadata\",\n+ distanceStrategy = DEFAULT_DISTANCE_STRATEGY,\n+ k = 4,\n+ fetchK = 20,\n+ lambdaMult = 0.5,\n+ indexQueryOptions\n+ }: PostgresVectorStoreArgs = {}\n+ ): Promise {\n+\n+ if (metadataColumns !== undefined && ignoreMetadataColumns !== undefined) {\n+ throw Error(\"Can not use both metadata_columns and ignore_metadata_columns.\");\n+ }\n+\n+ const { rows } = await engine.pool.raw(`SELECT column_name, data_type FROM information_schema.columns WHERE table_name = '${tableName}' AND table_schema = '${schemaName}'`);\n+ const columns: { [key: string] } = {};\n+\n+ for (const index in rows) {\n+ if (rows[index]) {\n+ const row = rows[index];\n+ columns[row.column_name] = row.data_type\n+ }\n+ }\n+\n+ if (!Object.prototype.hasOwnProperty.call(columns, idColumn)) {\n+ throw Error(`Id column: ${idColumn}, does not exist.`);\n+ }\n+\n+ if (!Object.prototype.hasOwnProperty.call(columns, contentColumn)) {\n+ throw Error(`Content column: ${contentColumn}, does not exist.`);\n+ }\n+\n+ const contentType = columns[contentColumn];\n+\n+ if (contentType !== \"text\" && !contentType.includes(\"char\")) {\n+ throw Error(`Content column: ${contentColumn}, is type: ${contentType}. It must be a type of character string.`);\n+ }\n+\n+ if (!Object.prototype.hasOwnProperty.call(columns, embeddingColumn)) {\n+ throw Error(`Embedding column: ${embeddingColumn}, does not exist.`);\n+ }\n+\n+ if (columns[embeddingColumn] !== \"USER-DEFINED\") {\n+ throw Error(`Embedding column: ${embeddingColumn} is not of type Vector.`);\n+ }\n+\n+ const jsonColumn = Object.prototype.hasOwnProperty.call(columns, metadataJsonColumn) ? metadataJsonColumn : \"\";\n+\n+ for (const column of metadataColumns) {\n+ if (!Object.prototype.hasOwnProperty.call(columns, column)) {\n+ throw Error(`Metadata column: ${column}, does not exist.`);\n+ }\n+ }\n+\n+ const allColumns = columns;\n+ let allMetadataColumns: string[];\n+ if (ignoreMetadataColumns !== undefined && ignoreMetadataColumns.length > 0) {\n+ for (const column of ignoreMetadataColumns) {\n+ delete allColumns[column];\n+ }\n+\n+ delete allColumns[idColumn];\n+ delete allColumns[contentColumn];\n+ delete allColumns[embeddingColumn];\n+ allMetadataColumns = Object.keys(allColumns);\n+ }\n+ return new PostgresVectorStore(\n+ embeddings,\n+ {\n+ engine,\n+ tableName,\n+ schemaName,\n+ contentColumn,\n+ embeddingColumn,\n+ metadataColumns: allMetadataColumns,\n+ ignoreMetadataColumns,\n+ idColumn,\n+ metadataJsonColumn: jsonColumn,\n+ distanceStrategy,\n+ k,\n+ fetchK,\n+ lambdaMult,\n+ indexQueryOptions\n+ }\n+ )\n+ }\n+\n+ static async fromTexts(texts: string[], metadatas: object[] | object, embeddings: EmbeddingsInterface, dbConfig: dbConfigArgs): Promise {\n+ const documents: Document[] = [];\n+\n+ for (let i = 0; i < texts.length; i += 1) {\n+ const doc = new Document({\n+ pageContent: texts[i],\n+ metadata: Array.isArray(metadatas) ? metadatas[i] : metadatas\n+ })\n+ documents.push(doc);\n+ }\n+\n+ return PostgresVectorStore.fromDocuments(documents, embeddings, dbConfig);\n+ }\n+\n+ static async fromDocuments(docs: Document[], embeddings: EmbeddingsInterface, dbConfig: dbConfigArgs): Promise {\n+ const { engine } = dbConfig;\n+ const { tableName } = dbConfig;\n+ const config = dbConfig.dbConfig;\n+ const vectorStore = await this.create(engine, embeddings, tableName, config);\n+\n+ await vectorStore.addDocuments(docs)\n+\n+ return vectorStore;\n+ }\n+\n+ async addVectors(vectors: number[][], documents: Document[], options?: { ids?: string[] }): Promise {\n+ let ids: string[] = [];\n+ const metadatas: Record[] = []\n+\n+ if (vectors.length !== documents.length) {\n+ throw new Error(\"The number of vectors must match the number of documents provided.\");\n+ }\n+\n+ if (options?.ids) {\n+ ids = options.ids;\n+ } else {\n+ documents.forEach(document => {\n+ if (document.id !== undefined) {\n+ ids.push(document.id);\n+ } else {\n+ ids.push(uuidv4());\n+ }\n+ });\n+ }\n+\n+ if (options && options.ids && options.ids.length !== documents.length) {\n+ throw new Error(\"The number of ids must match the number of documents provided.\");\n+ }\n+\n+ documents.forEach(document => {\n+ metadatas.push(document.metadata)\n+ });\n+\n+ const tuples = customZip(ids, documents, vectors, metadatas);\n+\n+ // Insert embeddings\n+ for (const [id, document, embedding, metadata] of tuples) {\n+ const metadataColNames = this.metadataColumns.length > 0 ? `, \"${this.metadataColumns.join(\"\\\",\\\"\")}\"` : \"\";\n+\n+ let stmt = `INSERT INTO \"${this.schemaName}\".\"${this.tableName}\"(\"${this.idColumn}\", \"${this.contentColumn}\", \"${this.embeddingColumn}\" ${metadataColNames}`\n+ const values: { [key: string] } = {\n+ id,\n+ content: document.pageContent,\n+ embedding: `[${embedding.toString()}]`\n+ }\n+ let valuesStmt = \" VALUES (:id, :content, :embedding\";\n+\n+ // Add metadata\n+ const extra = metadata;\n+ for (const metadataColumn of this.metadataColumns) {\n+ if (Object.prototype.hasOwnProperty.call(metadata, metadataColumn)) {\n+ valuesStmt += `, :${metadataColumn}`;\n+ values[metadataColumn] = metadata[metadataColumn]\n+ delete extra[metadataColumn]\n+ } else {\n+ valuesStmt += \" ,null\"\n+ }\n+ }\n+\n+ // Add JSON column and/or close statement\n+ stmt += this.metadataJsonColumn ? `, ${this.metadataJsonColumn})` : \")\";", - "comment_created_at": "2025-03-21T20:14:26+00:00", - "comment_author": "averikitsch", - "comment_body": "This pattern follows database best practices for relational databases by using individual columns versus all data in a JSON column. This allows for low latency filtering and type checking. ", - "pr_file_module": null - }, - { - "comment_id": "2009500129", - "repo_full_name": "langchain-ai/langchainjs", - "pr_number": 7852, - "pr_file": "libs/langchain-google-cloud-sql-pg/src/vectorStore.ts", - "discussion_id": "2004667909", - "commented_code": "@@ -0,0 +1,423 @@\n+import { EmbeddingsInterface } from \"@langchain/core/embeddings\";\n+import { MaxMarginalRelevanceSearchOptions, VectorStore } from \"@langchain/core/vectorstores\";\n+import { Document } from \"@langchain/core/documents\";\n+import { v4 as uuidv4 } from \"uuid\";\n+import { maximalMarginalRelevance } from \"@langchain/core/utils/math\";\n+import { DEFAULT_DISTANCE_STRATEGY, DistanceStrategy, QueryOptions } from \"./indexes.js\";\n+import PostgresEngine from \"./engine.js\";\n+import { customZip } from \"./utils/utils.js\";\n+\n+export interface PostgresVectorStoreArgs {\n+ schemaName?: string,\n+ contentColumn?: string,\n+ embeddingColumn?: string,\n+ metadataColumns?: Array,\n+ idColumn?: string,\n+ distanceStrategy?: DistanceStrategy,\n+ k?: number,\n+ fetchK?: number,\n+ lambdaMult?: number,\n+ ignoreMetadataColumns?: Array,\n+ metadataJsonColumn?: string,\n+ indexQueryOptions?: QueryOptions\n+}\n+\n+export interface dbConfigArgs {\n+ engine: PostgresEngine;\n+ tableName: string;\n+ dbConfig?: PostgresVectorStoreArgs;\n+}\n+\n+export class PostgresVectorStore extends VectorStore {\n+ declare FilterType: string;\n+\n+ engine: PostgresEngine;\n+\n+ embeddings: EmbeddingsInterface;\n+\n+ tableName: string;\n+\n+ schemaName: string;\n+\n+ contentColumn: string;\n+\n+ embeddingColumn: string;\n+\n+ metadataColumns: Array;\n+\n+ ignoreMetadataColumns: Array;\n+\n+ idColumn: string;\n+\n+ metadataJsonColumn: string;\n+\n+ distanceStrategy: DistanceStrategy;\n+\n+ k: number;\n+\n+ fetchK: number;\n+\n+ lambdaMult: number;\n+\n+ indexQueryOptions: QueryOptions;\n+\n+ /**\n+ * Initializes a new vector store with embeddings and database configuration.\n+ *\n+ * @param embeddings - Instance of `EmbeddingsInterface` used to embed queries.\n+ * @param dbConfig - Configuration settings for the database or storage system.\n+ */\n+ constructor(embeddings: EmbeddingsInterface, dbConfig: Record) {\n+ super(embeddings, dbConfig);\n+ this.embeddings = embeddings;\n+ this.engine = dbConfig.engine;\n+ this.tableName = dbConfig.tableName;\n+ this.schemaName = dbConfig.schemaName;\n+ this.contentColumn = dbConfig.contentColumn;\n+ this.embeddingColumn = dbConfig.embeddingColumn;\n+ this.metadataColumns = dbConfig.metadataColumns ? dbConfig.metadataColumns : [];\n+ this.ignoreMetadataColumns = dbConfig.ignoreMetadataColumns;\n+ this.idColumn = dbConfig.idColumn;\n+ this.metadataJsonColumn = dbConfig.metadataJsonColumn;\n+ this.distanceStrategy = dbConfig.distanceStrategy;\n+ this.k = dbConfig.k;\n+ this.fetchK = dbConfig.fetchK;\n+ this.lambdaMult = dbConfig.lambdaMult;\n+ this.indexQueryOptions = dbConfig.indexQueryOptions;\n+ }\n+\n+ /**\n+ * Create a new PostgresVectorStore instance.\n+ * @param {PostgresEngine} engine Required - Connection pool engine for managing connections to Cloud SQL for PostgreSQL database.\n+ * @param {Embeddings} embeddings Required - Text embedding model to use.\n+ * @param {string} tableName Required - Name of an existing table or table to be created.\n+ * @param {string} schemaName Database schema name of the table. Defaults to \"public\".\n+ * @param {string} contentColumn Column that represent a Document's page_content. Defaults to \"content\".\n+ * @param {string} embeddingColumn Column for embedding vectors. The embedding is generated from the document value. Defaults to \"embedding\".\n+ * @param {Array} metadataColumns Column(s) that represent a document's metadata.\n+ * @param {Array} ignoreMetadataColumns Optional - Column(s) to ignore in pre-existing tables for a document's metadata. Can not be used with metadata_columns.\n+ * @param {string} idColumn Column that represents the Document's id. Defaults to \"langchain_id\".\n+ * @param {string} metadataJsonColumn Optional - Column to store metadata as JSON. Defaults to \"langchain_metadata\".\n+ * @param {DistanceStrategy} distanceStrategy Distance strategy to use for vector similarity search. Defaults to COSINE_DISTANCE.\n+ * @param {number} k Number of Documents to return from search. Defaults to 4.\n+ * @param {number} fetchK Number of Documents to fetch to pass to MMR algorithm.\n+ * @param {number} lambdaMult Number between 0 and 1 that determines the degree of diversity among the results with 0 corresponding to maximum diversity and 1 to minimum diversity. Defaults to 0.5.\n+ * @param {QueryOptions} indexQueryOptions Optional - Index query option.\n+ * @returns PostgresVectorStore instance.\n+ */\n+ static async create(\n+ engine: PostgresEngine,\n+ embeddings: EmbeddingsInterface,\n+ tableName: string,\n+ {\n+ schemaName = \"public\",\n+ contentColumn = \"content\",\n+ embeddingColumn = \"embedding\",\n+ metadataColumns = [],\n+ ignoreMetadataColumns,\n+ idColumn = \"langchain_id\",\n+ metadataJsonColumn = \"langchain_metadata\",\n+ distanceStrategy = DEFAULT_DISTANCE_STRATEGY,\n+ k = 4,\n+ fetchK = 20,\n+ lambdaMult = 0.5,\n+ indexQueryOptions\n+ }: PostgresVectorStoreArgs = {}\n+ ): Promise {\n+\n+ if (metadataColumns !== undefined && ignoreMetadataColumns !== undefined) {\n+ throw Error(\"Can not use both metadata_columns and ignore_metadata_columns.\");\n+ }\n+\n+ const { rows } = await engine.pool.raw(`SELECT column_name, data_type FROM information_schema.columns WHERE table_name = '${tableName}' AND table_schema = '${schemaName}'`);\n+ const columns: { [key: string] } = {};\n+\n+ for (const index in rows) {\n+ if (rows[index]) {\n+ const row = rows[index];\n+ columns[row.column_name] = row.data_type\n+ }\n+ }\n+\n+ if (!Object.prototype.hasOwnProperty.call(columns, idColumn)) {\n+ throw Error(`Id column: ${idColumn}, does not exist.`);\n+ }\n+\n+ if (!Object.prototype.hasOwnProperty.call(columns, contentColumn)) {\n+ throw Error(`Content column: ${contentColumn}, does not exist.`);\n+ }\n+\n+ const contentType = columns[contentColumn];\n+\n+ if (contentType !== \"text\" && !contentType.includes(\"char\")) {\n+ throw Error(`Content column: ${contentColumn}, is type: ${contentType}. It must be a type of character string.`);\n+ }\n+\n+ if (!Object.prototype.hasOwnProperty.call(columns, embeddingColumn)) {\n+ throw Error(`Embedding column: ${embeddingColumn}, does not exist.`);\n+ }\n+\n+ if (columns[embeddingColumn] !== \"USER-DEFINED\") {\n+ throw Error(`Embedding column: ${embeddingColumn} is not of type Vector.`);\n+ }\n+\n+ const jsonColumn = Object.prototype.hasOwnProperty.call(columns, metadataJsonColumn) ? metadataJsonColumn : \"\";\n+\n+ for (const column of metadataColumns) {\n+ if (!Object.prototype.hasOwnProperty.call(columns, column)) {\n+ throw Error(`Metadata column: ${column}, does not exist.`);\n+ }\n+ }\n+\n+ const allColumns = columns;\n+ let allMetadataColumns: string[];\n+ if (ignoreMetadataColumns !== undefined && ignoreMetadataColumns.length > 0) {\n+ for (const column of ignoreMetadataColumns) {\n+ delete allColumns[column];\n+ }\n+\n+ delete allColumns[idColumn];\n+ delete allColumns[contentColumn];\n+ delete allColumns[embeddingColumn];\n+ allMetadataColumns = Object.keys(allColumns);\n+ }\n+ return new PostgresVectorStore(\n+ embeddings,\n+ {\n+ engine,\n+ tableName,\n+ schemaName,\n+ contentColumn,\n+ embeddingColumn,\n+ metadataColumns: allMetadataColumns,\n+ ignoreMetadataColumns,\n+ idColumn,\n+ metadataJsonColumn: jsonColumn,\n+ distanceStrategy,\n+ k,\n+ fetchK,\n+ lambdaMult,\n+ indexQueryOptions\n+ }\n+ )\n+ }\n+\n+ static async fromTexts(texts: string[], metadatas: object[] | object, embeddings: EmbeddingsInterface, dbConfig: dbConfigArgs): Promise {\n+ const documents: Document[] = [];\n+\n+ for (let i = 0; i < texts.length; i += 1) {\n+ const doc = new Document({\n+ pageContent: texts[i],\n+ metadata: Array.isArray(metadatas) ? metadatas[i] : metadatas\n+ })\n+ documents.push(doc);\n+ }\n+\n+ return PostgresVectorStore.fromDocuments(documents, embeddings, dbConfig);\n+ }\n+\n+ static async fromDocuments(docs: Document[], embeddings: EmbeddingsInterface, dbConfig: dbConfigArgs): Promise {\n+ const { engine } = dbConfig;\n+ const { tableName } = dbConfig;\n+ const config = dbConfig.dbConfig;\n+ const vectorStore = await this.create(engine, embeddings, tableName, config);\n+\n+ await vectorStore.addDocuments(docs)\n+\n+ return vectorStore;\n+ }\n+\n+ async addVectors(vectors: number[][], documents: Document[], options?: { ids?: string[] }): Promise {\n+ let ids: string[] = [];\n+ const metadatas: Record[] = []\n+\n+ if (vectors.length !== documents.length) {\n+ throw new Error(\"The number of vectors must match the number of documents provided.\");\n+ }\n+\n+ if (options?.ids) {\n+ ids = options.ids;\n+ } else {\n+ documents.forEach(document => {\n+ if (document.id !== undefined) {\n+ ids.push(document.id);\n+ } else {\n+ ids.push(uuidv4());\n+ }\n+ });\n+ }\n+\n+ if (options && options.ids && options.ids.length !== documents.length) {\n+ throw new Error(\"The number of ids must match the number of documents provided.\");\n+ }\n+\n+ documents.forEach(document => {\n+ metadatas.push(document.metadata)\n+ });\n+\n+ const tuples = customZip(ids, documents, vectors, metadatas);\n+\n+ // Insert embeddings\n+ for (const [id, document, embedding, metadata] of tuples) {\n+ const metadataColNames = this.metadataColumns.length > 0 ? `, \"${this.metadataColumns.join(\"\\\",\\\"\")}\"` : \"\";\n+\n+ let stmt = `INSERT INTO \"${this.schemaName}\".\"${this.tableName}\"(\"${this.idColumn}\", \"${this.contentColumn}\", \"${this.embeddingColumn}\" ${metadataColNames}`\n+ const values: { [key: string] } = {\n+ id,\n+ content: document.pageContent,\n+ embedding: `[${embedding.toString()}]`\n+ }\n+ let valuesStmt = \" VALUES (:id, :content, :embedding\";\n+\n+ // Add metadata\n+ const extra = metadata;\n+ for (const metadataColumn of this.metadataColumns) {\n+ if (Object.prototype.hasOwnProperty.call(metadata, metadataColumn)) {\n+ valuesStmt += `, :${metadataColumn}`;\n+ values[metadataColumn] = metadata[metadataColumn]\n+ delete extra[metadataColumn]\n+ } else {\n+ valuesStmt += \" ,null\"\n+ }\n+ }\n+\n+ // Add JSON column and/or close statement\n+ stmt += this.metadataJsonColumn ? `, ${this.metadataJsonColumn})` : \")\";", - "comment_created_at": "2025-03-24T05:49:51+00:00", - "comment_author": "jacoblee93", - "comment_body": "And the JSON column is just to make onboarding easier?", - "pr_file_module": null - }, - { - "comment_id": "2017533960", - "repo_full_name": "langchain-ai/langchainjs", - "pr_number": 7852, - "pr_file": "libs/langchain-google-cloud-sql-pg/src/vectorStore.ts", - "discussion_id": "2004667909", - "commented_code": "@@ -0,0 +1,423 @@\n+import { EmbeddingsInterface } from \"@langchain/core/embeddings\";\n+import { MaxMarginalRelevanceSearchOptions, VectorStore } from \"@langchain/core/vectorstores\";\n+import { Document } from \"@langchain/core/documents\";\n+import { v4 as uuidv4 } from \"uuid\";\n+import { maximalMarginalRelevance } from \"@langchain/core/utils/math\";\n+import { DEFAULT_DISTANCE_STRATEGY, DistanceStrategy, QueryOptions } from \"./indexes.js\";\n+import PostgresEngine from \"./engine.js\";\n+import { customZip } from \"./utils/utils.js\";\n+\n+export interface PostgresVectorStoreArgs {\n+ schemaName?: string,\n+ contentColumn?: string,\n+ embeddingColumn?: string,\n+ metadataColumns?: Array,\n+ idColumn?: string,\n+ distanceStrategy?: DistanceStrategy,\n+ k?: number,\n+ fetchK?: number,\n+ lambdaMult?: number,\n+ ignoreMetadataColumns?: Array,\n+ metadataJsonColumn?: string,\n+ indexQueryOptions?: QueryOptions\n+}\n+\n+export interface dbConfigArgs {\n+ engine: PostgresEngine;\n+ tableName: string;\n+ dbConfig?: PostgresVectorStoreArgs;\n+}\n+\n+export class PostgresVectorStore extends VectorStore {\n+ declare FilterType: string;\n+\n+ engine: PostgresEngine;\n+\n+ embeddings: EmbeddingsInterface;\n+\n+ tableName: string;\n+\n+ schemaName: string;\n+\n+ contentColumn: string;\n+\n+ embeddingColumn: string;\n+\n+ metadataColumns: Array;\n+\n+ ignoreMetadataColumns: Array;\n+\n+ idColumn: string;\n+\n+ metadataJsonColumn: string;\n+\n+ distanceStrategy: DistanceStrategy;\n+\n+ k: number;\n+\n+ fetchK: number;\n+\n+ lambdaMult: number;\n+\n+ indexQueryOptions: QueryOptions;\n+\n+ /**\n+ * Initializes a new vector store with embeddings and database configuration.\n+ *\n+ * @param embeddings - Instance of `EmbeddingsInterface` used to embed queries.\n+ * @param dbConfig - Configuration settings for the database or storage system.\n+ */\n+ constructor(embeddings: EmbeddingsInterface, dbConfig: Record) {\n+ super(embeddings, dbConfig);\n+ this.embeddings = embeddings;\n+ this.engine = dbConfig.engine;\n+ this.tableName = dbConfig.tableName;\n+ this.schemaName = dbConfig.schemaName;\n+ this.contentColumn = dbConfig.contentColumn;\n+ this.embeddingColumn = dbConfig.embeddingColumn;\n+ this.metadataColumns = dbConfig.metadataColumns ? dbConfig.metadataColumns : [];\n+ this.ignoreMetadataColumns = dbConfig.ignoreMetadataColumns;\n+ this.idColumn = dbConfig.idColumn;\n+ this.metadataJsonColumn = dbConfig.metadataJsonColumn;\n+ this.distanceStrategy = dbConfig.distanceStrategy;\n+ this.k = dbConfig.k;\n+ this.fetchK = dbConfig.fetchK;\n+ this.lambdaMult = dbConfig.lambdaMult;\n+ this.indexQueryOptions = dbConfig.indexQueryOptions;\n+ }\n+\n+ /**\n+ * Create a new PostgresVectorStore instance.\n+ * @param {PostgresEngine} engine Required - Connection pool engine for managing connections to Cloud SQL for PostgreSQL database.\n+ * @param {Embeddings} embeddings Required - Text embedding model to use.\n+ * @param {string} tableName Required - Name of an existing table or table to be created.\n+ * @param {string} schemaName Database schema name of the table. Defaults to \"public\".\n+ * @param {string} contentColumn Column that represent a Document's page_content. Defaults to \"content\".\n+ * @param {string} embeddingColumn Column for embedding vectors. The embedding is generated from the document value. Defaults to \"embedding\".\n+ * @param {Array} metadataColumns Column(s) that represent a document's metadata.\n+ * @param {Array} ignoreMetadataColumns Optional - Column(s) to ignore in pre-existing tables for a document's metadata. Can not be used with metadata_columns.\n+ * @param {string} idColumn Column that represents the Document's id. Defaults to \"langchain_id\".\n+ * @param {string} metadataJsonColumn Optional - Column to store metadata as JSON. Defaults to \"langchain_metadata\".\n+ * @param {DistanceStrategy} distanceStrategy Distance strategy to use for vector similarity search. Defaults to COSINE_DISTANCE.\n+ * @param {number} k Number of Documents to return from search. Defaults to 4.\n+ * @param {number} fetchK Number of Documents to fetch to pass to MMR algorithm.\n+ * @param {number} lambdaMult Number between 0 and 1 that determines the degree of diversity among the results with 0 corresponding to maximum diversity and 1 to minimum diversity. Defaults to 0.5.\n+ * @param {QueryOptions} indexQueryOptions Optional - Index query option.\n+ * @returns PostgresVectorStore instance.\n+ */\n+ static async create(\n+ engine: PostgresEngine,\n+ embeddings: EmbeddingsInterface,\n+ tableName: string,\n+ {\n+ schemaName = \"public\",\n+ contentColumn = \"content\",\n+ embeddingColumn = \"embedding\",\n+ metadataColumns = [],\n+ ignoreMetadataColumns,\n+ idColumn = \"langchain_id\",\n+ metadataJsonColumn = \"langchain_metadata\",\n+ distanceStrategy = DEFAULT_DISTANCE_STRATEGY,\n+ k = 4,\n+ fetchK = 20,\n+ lambdaMult = 0.5,\n+ indexQueryOptions\n+ }: PostgresVectorStoreArgs = {}\n+ ): Promise {\n+\n+ if (metadataColumns !== undefined && ignoreMetadataColumns !== undefined) {\n+ throw Error(\"Can not use both metadata_columns and ignore_metadata_columns.\");\n+ }\n+\n+ const { rows } = await engine.pool.raw(`SELECT column_name, data_type FROM information_schema.columns WHERE table_name = '${tableName}' AND table_schema = '${schemaName}'`);\n+ const columns: { [key: string] } = {};\n+\n+ for (const index in rows) {\n+ if (rows[index]) {\n+ const row = rows[index];\n+ columns[row.column_name] = row.data_type\n+ }\n+ }\n+\n+ if (!Object.prototype.hasOwnProperty.call(columns, idColumn)) {\n+ throw Error(`Id column: ${idColumn}, does not exist.`);\n+ }\n+\n+ if (!Object.prototype.hasOwnProperty.call(columns, contentColumn)) {\n+ throw Error(`Content column: ${contentColumn}, does not exist.`);\n+ }\n+\n+ const contentType = columns[contentColumn];\n+\n+ if (contentType !== \"text\" && !contentType.includes(\"char\")) {\n+ throw Error(`Content column: ${contentColumn}, is type: ${contentType}. It must be a type of character string.`);\n+ }\n+\n+ if (!Object.prototype.hasOwnProperty.call(columns, embeddingColumn)) {\n+ throw Error(`Embedding column: ${embeddingColumn}, does not exist.`);\n+ }\n+\n+ if (columns[embeddingColumn] !== \"USER-DEFINED\") {\n+ throw Error(`Embedding column: ${embeddingColumn} is not of type Vector.`);\n+ }\n+\n+ const jsonColumn = Object.prototype.hasOwnProperty.call(columns, metadataJsonColumn) ? metadataJsonColumn : \"\";\n+\n+ for (const column of metadataColumns) {\n+ if (!Object.prototype.hasOwnProperty.call(columns, column)) {\n+ throw Error(`Metadata column: ${column}, does not exist.`);\n+ }\n+ }\n+\n+ const allColumns = columns;\n+ let allMetadataColumns: string[];\n+ if (ignoreMetadataColumns !== undefined && ignoreMetadataColumns.length > 0) {\n+ for (const column of ignoreMetadataColumns) {\n+ delete allColumns[column];\n+ }\n+\n+ delete allColumns[idColumn];\n+ delete allColumns[contentColumn];\n+ delete allColumns[embeddingColumn];\n+ allMetadataColumns = Object.keys(allColumns);\n+ }\n+ return new PostgresVectorStore(\n+ embeddings,\n+ {\n+ engine,\n+ tableName,\n+ schemaName,\n+ contentColumn,\n+ embeddingColumn,\n+ metadataColumns: allMetadataColumns,\n+ ignoreMetadataColumns,\n+ idColumn,\n+ metadataJsonColumn: jsonColumn,\n+ distanceStrategy,\n+ k,\n+ fetchK,\n+ lambdaMult,\n+ indexQueryOptions\n+ }\n+ )\n+ }\n+\n+ static async fromTexts(texts: string[], metadatas: object[] | object, embeddings: EmbeddingsInterface, dbConfig: dbConfigArgs): Promise {\n+ const documents: Document[] = [];\n+\n+ for (let i = 0; i < texts.length; i += 1) {\n+ const doc = new Document({\n+ pageContent: texts[i],\n+ metadata: Array.isArray(metadatas) ? metadatas[i] : metadatas\n+ })\n+ documents.push(doc);\n+ }\n+\n+ return PostgresVectorStore.fromDocuments(documents, embeddings, dbConfig);\n+ }\n+\n+ static async fromDocuments(docs: Document[], embeddings: EmbeddingsInterface, dbConfig: dbConfigArgs): Promise {\n+ const { engine } = dbConfig;\n+ const { tableName } = dbConfig;\n+ const config = dbConfig.dbConfig;\n+ const vectorStore = await this.create(engine, embeddings, tableName, config);\n+\n+ await vectorStore.addDocuments(docs)\n+\n+ return vectorStore;\n+ }\n+\n+ async addVectors(vectors: number[][], documents: Document[], options?: { ids?: string[] }): Promise {\n+ let ids: string[] = [];\n+ const metadatas: Record[] = []\n+\n+ if (vectors.length !== documents.length) {\n+ throw new Error(\"The number of vectors must match the number of documents provided.\");\n+ }\n+\n+ if (options?.ids) {\n+ ids = options.ids;\n+ } else {\n+ documents.forEach(document => {\n+ if (document.id !== undefined) {\n+ ids.push(document.id);\n+ } else {\n+ ids.push(uuidv4());\n+ }\n+ });\n+ }\n+\n+ if (options && options.ids && options.ids.length !== documents.length) {\n+ throw new Error(\"The number of ids must match the number of documents provided.\");\n+ }\n+\n+ documents.forEach(document => {\n+ metadatas.push(document.metadata)\n+ });\n+\n+ const tuples = customZip(ids, documents, vectors, metadatas);\n+\n+ // Insert embeddings\n+ for (const [id, document, embedding, metadata] of tuples) {\n+ const metadataColNames = this.metadataColumns.length > 0 ? `, \"${this.metadataColumns.join(\"\\\",\\\"\")}\"` : \"\";\n+\n+ let stmt = `INSERT INTO \"${this.schemaName}\".\"${this.tableName}\"(\"${this.idColumn}\", \"${this.contentColumn}\", \"${this.embeddingColumn}\" ${metadataColNames}`\n+ const values: { [key: string] } = {\n+ id,\n+ content: document.pageContent,\n+ embedding: `[${embedding.toString()}]`\n+ }\n+ let valuesStmt = \" VALUES (:id, :content, :embedding\";\n+\n+ // Add metadata\n+ const extra = metadata;\n+ for (const metadataColumn of this.metadataColumns) {\n+ if (Object.prototype.hasOwnProperty.call(metadata, metadataColumn)) {\n+ valuesStmt += `, :${metadataColumn}`;\n+ values[metadataColumn] = metadata[metadataColumn]\n+ delete extra[metadataColumn]\n+ } else {\n+ valuesStmt += \" ,null\"\n+ }\n+ }\n+\n+ // Add JSON column and/or close statement\n+ stmt += this.metadataJsonColumn ? `, ${this.metadataJsonColumn})` : \")\";", - "comment_created_at": "2025-03-27T20:07:53+00:00", - "comment_author": "averikitsch", - "comment_body": "Yes and we don't want users to lose metadata if not specifically defined ", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1456406645", - "pr_number": 4044, - "pr_file": "libs/langchain-community/src/vectorstores/azure_aisearch.ts", - "created_at": "2024-01-17T19:58:12+00:00", - "commented_code": "import * as uuid from \"uuid\";\nimport {\n SearchClient,\n SearchIndexClient,\n AzureKeyCredential,\n IndexingResult,\n SearchIndex,\n} from \"@azure/search-documents\";\nimport {\n MaxMarginalRelevanceSearchOptions,\n VectorStore,\n} from \"@langchain/core/vectorstores\";\nimport type { EmbeddingsInterface } from \"@langchain/core/embeddings\";\nimport { Document } from \"@langchain/core/documents\";\nimport { maximalMarginalRelevance } from \"@langchain/core/utils/math\";\nimport { getEnvironmentVariable } from \"@langchain/core/utils/env\";\n\n/**\n * Azure AI Search query type.\n */\nexport const AzureAISearchQueryType = {\n /** Vector search. */\n Similarity: \"similarity\",\n /** Hybrid full text and vector search. */\n SimilarityHybrid: \"similarity_hybrid\",\n /** Hybrid full text and vector search with semantic ranking. */\n SemanticHybrid: \"semantic_hybrid\",\n} as const;\n\n/**\n * Azure AI Search query type.\n */\nexport type AzureAISearchQueryType =\n (typeof AzureAISearchQueryType)[keyof typeof AzureAISearchQueryType];\n\n/**\n * Azure AI Search settings.\n */\nexport interface AzureAISearchQueryOptions {\n readonly type: AzureAISearchQueryType;\n readonly semanticConfigurationName?: string;\n}\n\n/**\n * Configuration options for the `AzureAISearchStore` constructor.\n */\nexport interface AzureAISearchConfig {\n readonly client?: SearchClient;\n readonly indexName?: string;\n readonly endpoint?: string;\n readonly key?: string;\n readonly search: AzureAISearchQueryOptions;\n /**\n * The amount of documents to chunk by when adding vectors.\n * @default 100\n */\n readonly chunkSize?: number;\n /**\n * The amount of documents to embed at once when adding documents.\n * Note that some providers like Azure OpenAI can only embed 16 documents\n * at a time.\n * @default 16\n */\n readonly embeddingBatchSize?: number;\n}\n\n/**\n * Azure AI Search options metadata schema.\n * If yout want to add custom data, use the attributes property.\n */\nexport type AzureAISearchDocumentMetadata = {\n source: string;\n attributes?: Array<{ key: string; value: string }>;\n};\n\n/**\n * Azure AI Search indexed document.\n */\nexport type AzureAISearchDocument = {\n id: string;\n content: string;\n content_vector: number[];\n metadata: AzureAISearchDocumentMetadata;\n};\n\n/**\n * Azure AI Search options for adding documents.\n */\nexport type AzureAISearchAddDocumentsOptions = {\n ids?: string[];\n};\n\nconst DEFAULT_FIELD_ID = \"id\";\nconst DEFAULT_FIELD_CONTENT = \"content\";\nconst DEFAULT_FIELD_CONTENT_VECTOR = \"content_vector\";\nconst DEFAULT_FIELD_METADATA = \"metadata\";\nconst DEFAULT_FIELD_METADATA_SOURCE = \"source\";\nconst DEFAULT_FIELD_METADATA_ATTRS = \"attributes\";\n\n/**\n * Azure AI Search vector store.\n * To use this, you should have:\n * - the `@azure/search-documents` NPM package installed\n * - an endpoint and key to the Azure AI Search instance\n *\n * If you directly provide a `SearchClient` instance, you need to ensure that\n * an index has been created. When using and endpoint and key, the index will\n * be created automatically if it does not exist.\n */\nexport class AzureAISearchVectorStore extends VectorStore {\n declare FilterType: string;\n\n get lc_secrets(): { [key: string]: string } {\n return {\n endpoint: \"AZURE_AISEARCH_ENDPOINT\",\n key: \"AZURE_AISEARCH_KEY\",\n };\n }\n\n _vectorstoreType(): string {\n return \"azure_aisearch\";\n }\n\n private readonly initPromise: Promise;\n\n private readonly client: SearchClient;\n\n private readonly indexName: string;\n\n private readonly chunkSize: number;\n\n private readonly embeddingBatchSize: number;\n\n private readonly options: AzureAISearchQueryOptions;\n\n constructor(embeddings: EmbeddingsInterface, config: AzureAISearchConfig) {\n super(embeddings, config);\n\n const endpoint =\n config.endpoint ?? getEnvironmentVariable(\"AZURE_AISEARCH_ENDPOINT\");\n const key = config.key ?? getEnvironmentVariable(\"AZURE_AISEARCH_KEY\");\n\n if (!config.client && !endpoint && !key) {\n throw new Error(\n \"Azure AI Search client or connection string must be set.\"\n );\n }\n\n this.indexName = config.indexName ?? \"vectorsearch\";\n this.chunkSize = config.chunkSize ?? 100;\n this.embeddingBatchSize = config.embeddingBatchSize ?? 16;\n\n if (!config.client) {\n // eslint-disable-next-line @typescript-eslint/no-non-null-assertion\n const credential = new AzureKeyCredential(key!);\n // eslint-disable-next-line @typescript-eslint/no-non-null-assertion\n this.client = new SearchClient(endpoint!, this.indexName, credential);\n // eslint-disable-next-line @typescript-eslint/no-non-null-assertion\n const indexClient = new SearchIndexClient(endpoint!, credential);\n\n // Start initialization, but don't wait for it to finish here\n this.initPromise = this.ensureIndexExists(indexClient).catch((error) => {\n console.error(\n \"Error during Azure AI Search index initialization:\",\n error\n );\n });\n } else {\n this.client = config.client;\n }\n\n this.options = config.search;\n this.embeddings = embeddings;\n }\n\n /**\n * Removes specified documents from the AzureAISearchVectorStore using a filter.\n * @param filter OData filter to find documents to delete.\n * @returns A promise that resolves when the documents have been removed.\n */\n async deleteMany(filter: string): Promise {\n const { results } = await this.client.search(\"*\", {\n filter,\n });\n\n const ids: string[] = [];\n for await (const item of results) {\n ids.push(item.document.id);\n }\n\n const { results: deleteResults } = await this.client.deleteDocuments(\n DEFAULT_FIELD_ID,\n ids\n );\n return deleteResults;\n }\n\n /**\n * Removes specified documents from the AzureAISearchVectorStore.\n * @param ids IDs of the documents to be removed.\n * @returns A promise that resolves when the documents have been removed.\n */\n async deleteById(ids: string | string[]): Promise {\n await this.initPromise;\n const { results } = await this.client.deleteDocuments(\n DEFAULT_FIELD_ID,\n Array.isArray(ids) ? ids : [ids]\n );\n return results;\n }\n\n /**\n * Adds documents to the AzureAISearchVectorStore.\n * Documents are chunked into batches of size `embeddingBatchSize` then\n * embedded and added to the AzureAISearchVectorStore.\n * @param documents The documents to add.\n * @param options Options for adding documents.\n * @returns A promise that resolves to the ids of the added documents.\n */\n async addDocuments(\n documents: Document[],\n options?: AzureAISearchAddDocumentsOptions\n ) {\n const texts = documents.map(({ pageContent }) => pageContent);\n const results: string[] = [];\n\n for (let i = 0; i < texts.length; i += this.embeddingBatchSize) {\n const batch = texts.slice(i, i + this.embeddingBatchSize);\n const docsBatch = documents.slice(i, i + this.embeddingBatchSize);\n const batchEmbeddings: number[][] = await this.embeddings.embedDocuments(\n batch\n );\n const batchResult = await this.addVectors(\n batchEmbeddings,\n docsBatch,\n options\n );\n\n results.push(...batchResult);\n }\n\n return results;\n }\n\n /**\n * Adds vectors to the AzureAISearchVectorStore.\n * @param vectors Vectors to be added.\n * @param documents Corresponding documents to be added.\n * @param options Options for adding documents.\n * @returns A promise that resolves to the ids of the added documents.\n */\n async addVectors(\n vectors: number[][],\n documents: Document[],\n options?: AzureAISearchAddDocumentsOptions\n ): Promise {\n const ids = options?.ids ?? documents.map(() => uuid.v4());\n const entities: AzureAISearchDocument[] = documents.map((doc, idx) => ({\n id: ids[idx],\n content: doc.pageContent,\n content_vector: vectors[idx],\n metadata: {\n source: doc.metadata?.source,\n attributes: doc.metadata?.attributes ?? [],\n },\n }));\n\n await this.initPromise;\n for (let i = 0; i < entities.length; i += this.chunkSize) {\n const chunk = entities.slice(i, i + this.chunkSize);\n await this.client.uploadDocuments(chunk, { throwOnAnyFailure: true });\n }\n\n return ids;\n }\n\n /**\n * Performs a similarity search using query type specified in configuration.\n * @param query Query text for the similarity search.\n * @param k=4 Number of nearest neighbors to return.\n * @param filter Optional OData filter for the documents.\n * @returns Promise that resolves to a list of documents and their corresponding similarity scores.\n */\n async similaritySearch(\n query: string,\n k = 4,\n filter: this[\"FilterType\"] | undefined = undefined\n ): Promise {\n const results = await this.similaritySearchWithScore(query, k, filter);\n\n return results.map((result) => result[0]);\n }\n\n /**\n * Performs a similarity search using query type specified in configuration.\n * @param query Query text for the similarity search.\n * @param k=4 Number of nearest neighbors to return.\n * @param filter Optional OData filter for the documents.\n * @returns Promise that resolves to a list of documents and their corresponding similarity scores.\n */\n async similaritySearchWithScore(\n query: string,\n k = 4,\n filter: this[\"FilterType\"] | undefined = undefined\n ): Promise<[Document, number][]> {\n const searchType = this.options.type;\n\n if (searchType === AzureAISearchQueryType.Similarity) {\n return this.similaritySearchVectorWithScore(\n await this.embeddings.embedQuery(query),\n k,\n filter\n );\n } else if (searchType === AzureAISearchQueryType.SimilarityHybrid) {\n return this.hybridSearchVectorWithScore(\n query,\n await this.embeddings.embedQuery(query),\n k,\n filter\n );\n } else if (searchType === AzureAISearchQueryType.SemanticHybrid) {\n return this.semanticHybridSearchVectorWithScore(\n query,\n await this.embeddings.embedQuery(query),\n k,\n filter\n );\n }\n\n throw new Error(`Unrecognized search type '${searchType}'`);\n }\n\n /**\n * Performs a hybrid search using query text.\n * @param query Query text for the similarity search.\n * @param queryVector Query vector for the similarity search.\n * If not provided, the query text will be embedded.\n * @param k=4 Number of nearest neighbors to return.\n * @param filter Optional OData filter for the documents.\n * @returns Promise that resolves to a list of documents and their corresponding similarity scores.\n */\n async hybridSearchVectorWithScore(\n query: string,\n queryVector?: number[],\n k = 4,\n filter: string | undefined = undefined\n ): Promise<[Document, number][]> {\n const vector = queryVector ?? (await this.embeddings.embedQuery(query));\n\n await this.initPromise;\n const { results } = await this.client.search(query, {\n vectorSearchOptions: {\n queries: [\n {\n kind: \"vector\",\n vector,\n kNearestNeighborsCount: k,\n fields: [DEFAULT_FIELD_CONTENT_VECTOR],\n },\n ],\n },\n filter,\n top: k,\n });\n\n const docsWithScore: [Document, number][] = [];\n\n for await (const item of results) {\n const document = new Document<\n AzureAISearchDocumentMetadata & { embedding: number[] }\n >({\n pageContent: item.document[DEFAULT_FIELD_CONTENT],\n metadata: {\n ...item.document[DEFAULT_FIELD_METADATA],\n embedding: item.document[DEFAULT_FIELD_CONTENT_VECTOR],\n },\n });\n docsWithScore.push([document, item.score]);\n }\n\n return docsWithScore;\n }\n\n /**\n * Performs a hybrid search with semantic reranker using query text.\n * @param query Query text for the similarity search.\n * @param queryVector Query vector for the similarity search.\n * If not provided, the query text will be embedded.\n * @param k=4 Number of nearest neighbors to return.\n * @param filter Optional OData filter for the documents.\n * @returns Promise that resolves to a list of documents and their corresponding similarity scores.\n */\n async semanticHybridSearchVectorWithScore(\n query: string,\n queryVector?: number[],\n k = 4,\n filter: string | undefined = undefined\n ): Promise<[Document, number][]> {\n const vector = queryVector ?? (await this.embeddings.embedQuery(query));\n\n await this.initPromise;\n const { results } = await this.client.search(query, {\n vectorSearchOptions: {\n queries: [\n {\n kind: \"vector\",\n vector,\n kNearestNeighborsCount: k,\n fields: [DEFAULT_FIELD_CONTENT_VECTOR],\n },\n ],\n },\n filter,\n top: k,\n queryType: \"semantic\",\n semanticSearchOptions: {\n configurationName: \"semantic-search-config\",\n captions: {\n captionType: \"extractive\",\n },\n answers: {\n answerType: \"extractive\",\n },\n },\n });\n\n const docsWithScore: [Document, number][] = [];\n\n for await (const item of results) {\n const document = new Document<\n AzureAISearchDocumentMetadata & { embedding: number[] }\n >({\n pageContent: item.document[DEFAULT_FIELD_CONTENT],\n metadata: {\n ...item.document[DEFAULT_FIELD_METADATA],\n embedding: item.document[DEFAULT_FIELD_CONTENT_VECTOR],\n },\n });\n docsWithScore.push([document, item.score]);\n }\n\n return docsWithScore;\n }\n\n /**\n * Performs a similarity search on the vectors stored in the collection.\n * @param queryVector Query vector for the similarity search.\n * @param k=4 Number of nearest neighbors to return.\n * @param filter string OData filter for the documents.\n * @returns Promise that resolves to a list of documents and their corresponding similarity scores.\n */\n async similaritySearchVectorWithScore(\n query: number[],\n k: number,\n filter?: string\n ): Promise<[Document, number][]> {\n await this.initPromise;\n\n const { results } = await this.client.search(\"*\", {\n vectorSearchOptions: {\n queries: [\n {\n kind: \"vector\",\n vector: query,\n kNearestNeighborsCount: k,\n fields: [DEFAULT_FIELD_CONTENT_VECTOR],\n },\n ],\n },\n filter,\n });\n\n const docsWithScore: [Document, number][] = [];\n\n for await (const item of results) {\n const document = new Document<\n AzureAISearchDocumentMetadata & { embedding: number[] }\n >({\n pageContent: item.document[DEFAULT_FIELD_CONTENT],\n metadata: {\n ...item.document[DEFAULT_FIELD_METADATA],\n embedding: item.document[DEFAULT_FIELD_CONTENT_VECTOR],\n },\n });\n docsWithScore.push([document, item.score]);\n }\n\n return docsWithScore;\n }\n\n /**\n * Return documents selected using the maximal marginal relevance.\n * Maximal marginal relevance optimizes for similarity to the query AND\n * diversity among selected documents.\n * @param query Text to look up documents similar to.\n * @param options.k Number of documents to return.\n * @param options.fetchK=20 Number of documents to fetch before passing to\n * the MMR algorithm.\n * @param options.lambda=0.5 Number between 0 and 1 that determines the\n * degree of diversity among the results, where 0 corresponds to maximum\n * diversity and 1 to minimum diversity.\n * @returns List of documents selected by maximal marginal relevance.\n */\n async maxMarginalRelevanceSearch(\n query: string,\n options: MaxMarginalRelevanceSearchOptions\n ): Promise {\n const { k, fetchK = 20, lambda = 0.5 } = options;\n\n const queryEmbedding = await this.embeddings.embedQuery(query);\n const docs = await this.similaritySearchVectorWithScore(\n queryEmbedding,\n fetchK\n );\n const embeddingList = docs.map((doc) => doc[0].metadata.embedding);\n\n // Re-rank the results using MMR\n const mmrIndexes = maximalMarginalRelevance(\n queryEmbedding,\n embeddingList,\n lambda,\n k\n );\n\n const mmrDocs = mmrIndexes.map((index) => docs[index][0]);\n return mmrDocs;\n }\n\n /**\n * Ensures that an index exists on the AzureAISearchVectorStore.\n * @param indexClient The Azure AI Search index client.\n * @returns A promise that resolves when the AzureAISearchVectorStore index has been initialized.\n * @protected\n */\n protected async ensureIndexExists(\n indexClient: SearchIndexClient\n ): Promise {\n try {\n await indexClient.getIndex(this.indexName);\n } catch (e) {\n // Index does not exists, create it\n const searchIndex = this.createSearchIndexDefinition(this.indexName);\n await indexClient.createIndex(searchIndex);\n }\n }\n\n /**\n * Prepares the search index definition for Azure AI Search.\n * @param indexName The name of the index.\n * @returns The SearchIndex object.\n * @protected\n */\n protected createSearchIndexDefinition(indexName: string): SearchIndex {\n return {\n name: indexName,\n vectorSearch: {\n algorithms: [\n {\n name: \"vector-search-algorithm\",\n kind: \"hnsw\",\n parameters: {\n m: 4,\n efSearch: 500,\n metric: \"cosine\",\n efConstruction: 400,\n },\n },\n ],\n profiles: [\n {\n name: \"vector-search-profile\",\n algorithmConfigurationName: \"vector-search-algorithm\",\n },\n ],\n },\n semanticSearch: {\n defaultConfigurationName: \"semantic-search-config\",\n configurations: [\n {\n name: \"semantic-search-config\",\n prioritizedFields: {\n contentFields: [\n {\n name: DEFAULT_FIELD_CONTENT,\n },\n ],\n keywordsFields: [\n {\n name: DEFAULT_FIELD_CONTENT,\n },\n ],\n },\n },\n ],\n },\n fields: [\n {\n name: DEFAULT_FIELD_ID,\n filterable: true,\n key: true,\n type: \"Edm.String\",\n },\n {\n name: DEFAULT_FIELD_CONTENT,\n searchable: true,\n filterable: true,\n type: \"Edm.String\",\n },\n {\n name: DEFAULT_FIELD_CONTENT_VECTOR,\n searchable: true,\n type: \"Collection(Edm.Single)\",\n vectorSearchDimensions: 1536,\n vectorSearchProfileName: \"vector-search-profile\",\n },\n {\n name: DEFAULT_FIELD_METADATA,\n type: \"Edm.ComplexType\",", - "repo_full_name": "langchain-ai/langchainjs", - "discussion_comments": [ - { - "comment_id": "1456406645", - "repo_full_name": "langchain-ai/langchainjs", - "pr_number": 4044, - "pr_file": "libs/langchain-community/src/vectorstores/azure_aisearch.ts", - "discussion_id": "1456406645", - "commented_code": "@@ -0,0 +1,694 @@\n+import * as uuid from \"uuid\";\n+import {\n+ SearchClient,\n+ SearchIndexClient,\n+ AzureKeyCredential,\n+ IndexingResult,\n+ SearchIndex,\n+} from \"@azure/search-documents\";\n+import {\n+ MaxMarginalRelevanceSearchOptions,\n+ VectorStore,\n+} from \"@langchain/core/vectorstores\";\n+import type { EmbeddingsInterface } from \"@langchain/core/embeddings\";\n+import { Document } from \"@langchain/core/documents\";\n+import { maximalMarginalRelevance } from \"@langchain/core/utils/math\";\n+import { getEnvironmentVariable } from \"@langchain/core/utils/env\";\n+\n+/**\n+ * Azure AI Search query type.\n+ */\n+export const AzureAISearchQueryType = {\n+ /** Vector search. */\n+ Similarity: \"similarity\",\n+ /** Hybrid full text and vector search. */\n+ SimilarityHybrid: \"similarity_hybrid\",\n+ /** Hybrid full text and vector search with semantic ranking. */\n+ SemanticHybrid: \"semantic_hybrid\",\n+} as const;\n+\n+/**\n+ * Azure AI Search query type.\n+ */\n+export type AzureAISearchQueryType =\n+ (typeof AzureAISearchQueryType)[keyof typeof AzureAISearchQueryType];\n+\n+/**\n+ * Azure AI Search settings.\n+ */\n+export interface AzureAISearchQueryOptions {\n+ readonly type: AzureAISearchQueryType;\n+ readonly semanticConfigurationName?: string;\n+}\n+\n+/**\n+ * Configuration options for the `AzureAISearchStore` constructor.\n+ */\n+export interface AzureAISearchConfig {\n+ readonly client?: SearchClient;\n+ readonly indexName?: string;\n+ readonly endpoint?: string;\n+ readonly key?: string;\n+ readonly search: AzureAISearchQueryOptions;\n+ /**\n+ * The amount of documents to chunk by when adding vectors.\n+ * @default 100\n+ */\n+ readonly chunkSize?: number;\n+ /**\n+ * The amount of documents to embed at once when adding documents.\n+ * Note that some providers like Azure OpenAI can only embed 16 documents\n+ * at a time.\n+ * @default 16\n+ */\n+ readonly embeddingBatchSize?: number;\n+}\n+\n+/**\n+ * Azure AI Search options metadata schema.\n+ * If yout want to add custom data, use the attributes property.\n+ */\n+export type AzureAISearchDocumentMetadata = {\n+ source: string;\n+ attributes?: Array<{ key: string; value: string }>;\n+};\n+\n+/**\n+ * Azure AI Search indexed document.\n+ */\n+export type AzureAISearchDocument = {\n+ id: string;\n+ content: string;\n+ content_vector: number[];\n+ metadata: AzureAISearchDocumentMetadata;\n+};\n+\n+/**\n+ * Azure AI Search options for adding documents.\n+ */\n+export type AzureAISearchAddDocumentsOptions = {\n+ ids?: string[];\n+};\n+\n+const DEFAULT_FIELD_ID = \"id\";\n+const DEFAULT_FIELD_CONTENT = \"content\";\n+const DEFAULT_FIELD_CONTENT_VECTOR = \"content_vector\";\n+const DEFAULT_FIELD_METADATA = \"metadata\";\n+const DEFAULT_FIELD_METADATA_SOURCE = \"source\";\n+const DEFAULT_FIELD_METADATA_ATTRS = \"attributes\";\n+\n+/**\n+ * Azure AI Search vector store.\n+ * To use this, you should have:\n+ * - the `@azure/search-documents` NPM package installed\n+ * - an endpoint and key to the Azure AI Search instance\n+ *\n+ * If you directly provide a `SearchClient` instance, you need to ensure that\n+ * an index has been created. When using and endpoint and key, the index will\n+ * be created automatically if it does not exist.\n+ */\n+export class AzureAISearchVectorStore extends VectorStore {\n+ declare FilterType: string;\n+\n+ get lc_secrets(): { [key: string]: string } {\n+ return {\n+ endpoint: \"AZURE_AISEARCH_ENDPOINT\",\n+ key: \"AZURE_AISEARCH_KEY\",\n+ };\n+ }\n+\n+ _vectorstoreType(): string {\n+ return \"azure_aisearch\";\n+ }\n+\n+ private readonly initPromise: Promise;\n+\n+ private readonly client: SearchClient;\n+\n+ private readonly indexName: string;\n+\n+ private readonly chunkSize: number;\n+\n+ private readonly embeddingBatchSize: number;\n+\n+ private readonly options: AzureAISearchQueryOptions;\n+\n+ constructor(embeddings: EmbeddingsInterface, config: AzureAISearchConfig) {\n+ super(embeddings, config);\n+\n+ const endpoint =\n+ config.endpoint ?? getEnvironmentVariable(\"AZURE_AISEARCH_ENDPOINT\");\n+ const key = config.key ?? getEnvironmentVariable(\"AZURE_AISEARCH_KEY\");\n+\n+ if (!config.client && !endpoint && !key) {\n+ throw new Error(\n+ \"Azure AI Search client or connection string must be set.\"\n+ );\n+ }\n+\n+ this.indexName = config.indexName ?? \"vectorsearch\";\n+ this.chunkSize = config.chunkSize ?? 100;\n+ this.embeddingBatchSize = config.embeddingBatchSize ?? 16;\n+\n+ if (!config.client) {\n+ // eslint-disable-next-line @typescript-eslint/no-non-null-assertion\n+ const credential = new AzureKeyCredential(key!);\n+ // eslint-disable-next-line @typescript-eslint/no-non-null-assertion\n+ this.client = new SearchClient(endpoint!, this.indexName, credential);\n+ // eslint-disable-next-line @typescript-eslint/no-non-null-assertion\n+ const indexClient = new SearchIndexClient(endpoint!, credential);\n+\n+ // Start initialization, but don't wait for it to finish here\n+ this.initPromise = this.ensureIndexExists(indexClient).catch((error) => {\n+ console.error(\n+ \"Error during Azure AI Search index initialization:\",\n+ error\n+ );\n+ });\n+ } else {\n+ this.client = config.client;\n+ }\n+\n+ this.options = config.search;\n+ this.embeddings = embeddings;\n+ }\n+\n+ /**\n+ * Removes specified documents from the AzureAISearchVectorStore using a filter.\n+ * @param filter OData filter to find documents to delete.\n+ * @returns A promise that resolves when the documents have been removed.\n+ */\n+ async deleteMany(filter: string): Promise {\n+ const { results } = await this.client.search(\"*\", {\n+ filter,\n+ });\n+\n+ const ids: string[] = [];\n+ for await (const item of results) {\n+ ids.push(item.document.id);\n+ }\n+\n+ const { results: deleteResults } = await this.client.deleteDocuments(\n+ DEFAULT_FIELD_ID,\n+ ids\n+ );\n+ return deleteResults;\n+ }\n+\n+ /**\n+ * Removes specified documents from the AzureAISearchVectorStore.\n+ * @param ids IDs of the documents to be removed.\n+ * @returns A promise that resolves when the documents have been removed.\n+ */\n+ async deleteById(ids: string | string[]): Promise {\n+ await this.initPromise;\n+ const { results } = await this.client.deleteDocuments(\n+ DEFAULT_FIELD_ID,\n+ Array.isArray(ids) ? ids : [ids]\n+ );\n+ return results;\n+ }\n+\n+ /**\n+ * Adds documents to the AzureAISearchVectorStore.\n+ * Documents are chunked into batches of size `embeddingBatchSize` then\n+ * embedded and added to the AzureAISearchVectorStore.\n+ * @param documents The documents to add.\n+ * @param options Options for adding documents.\n+ * @returns A promise that resolves to the ids of the added documents.\n+ */\n+ async addDocuments(\n+ documents: Document[],\n+ options?: AzureAISearchAddDocumentsOptions\n+ ) {\n+ const texts = documents.map(({ pageContent }) => pageContent);\n+ const results: string[] = [];\n+\n+ for (let i = 0; i < texts.length; i += this.embeddingBatchSize) {\n+ const batch = texts.slice(i, i + this.embeddingBatchSize);\n+ const docsBatch = documents.slice(i, i + this.embeddingBatchSize);\n+ const batchEmbeddings: number[][] = await this.embeddings.embedDocuments(\n+ batch\n+ );\n+ const batchResult = await this.addVectors(\n+ batchEmbeddings,\n+ docsBatch,\n+ options\n+ );\n+\n+ results.push(...batchResult);\n+ }\n+\n+ return results;\n+ }\n+\n+ /**\n+ * Adds vectors to the AzureAISearchVectorStore.\n+ * @param vectors Vectors to be added.\n+ * @param documents Corresponding documents to be added.\n+ * @param options Options for adding documents.\n+ * @returns A promise that resolves to the ids of the added documents.\n+ */\n+ async addVectors(\n+ vectors: number[][],\n+ documents: Document[],\n+ options?: AzureAISearchAddDocumentsOptions\n+ ): Promise {\n+ const ids = options?.ids ?? documents.map(() => uuid.v4());\n+ const entities: AzureAISearchDocument[] = documents.map((doc, idx) => ({\n+ id: ids[idx],\n+ content: doc.pageContent,\n+ content_vector: vectors[idx],\n+ metadata: {\n+ source: doc.metadata?.source,\n+ attributes: doc.metadata?.attributes ?? [],\n+ },\n+ }));\n+\n+ await this.initPromise;\n+ for (let i = 0; i < entities.length; i += this.chunkSize) {\n+ const chunk = entities.slice(i, i + this.chunkSize);\n+ await this.client.uploadDocuments(chunk, { throwOnAnyFailure: true });\n+ }\n+\n+ return ids;\n+ }\n+\n+ /**\n+ * Performs a similarity search using query type specified in configuration.\n+ * @param query Query text for the similarity search.\n+ * @param k=4 Number of nearest neighbors to return.\n+ * @param filter Optional OData filter for the documents.\n+ * @returns Promise that resolves to a list of documents and their corresponding similarity scores.\n+ */\n+ async similaritySearch(\n+ query: string,\n+ k = 4,\n+ filter: this[\"FilterType\"] | undefined = undefined\n+ ): Promise {\n+ const results = await this.similaritySearchWithScore(query, k, filter);\n+\n+ return results.map((result) => result[0]);\n+ }\n+\n+ /**\n+ * Performs a similarity search using query type specified in configuration.\n+ * @param query Query text for the similarity search.\n+ * @param k=4 Number of nearest neighbors to return.\n+ * @param filter Optional OData filter for the documents.\n+ * @returns Promise that resolves to a list of documents and their corresponding similarity scores.\n+ */\n+ async similaritySearchWithScore(\n+ query: string,\n+ k = 4,\n+ filter: this[\"FilterType\"] | undefined = undefined\n+ ): Promise<[Document, number][]> {\n+ const searchType = this.options.type;\n+\n+ if (searchType === AzureAISearchQueryType.Similarity) {\n+ return this.similaritySearchVectorWithScore(\n+ await this.embeddings.embedQuery(query),\n+ k,\n+ filter\n+ );\n+ } else if (searchType === AzureAISearchQueryType.SimilarityHybrid) {\n+ return this.hybridSearchVectorWithScore(\n+ query,\n+ await this.embeddings.embedQuery(query),\n+ k,\n+ filter\n+ );\n+ } else if (searchType === AzureAISearchQueryType.SemanticHybrid) {\n+ return this.semanticHybridSearchVectorWithScore(\n+ query,\n+ await this.embeddings.embedQuery(query),\n+ k,\n+ filter\n+ );\n+ }\n+\n+ throw new Error(`Unrecognized search type '${searchType}'`);\n+ }\n+\n+ /**\n+ * Performs a hybrid search using query text.\n+ * @param query Query text for the similarity search.\n+ * @param queryVector Query vector for the similarity search.\n+ * If not provided, the query text will be embedded.\n+ * @param k=4 Number of nearest neighbors to return.\n+ * @param filter Optional OData filter for the documents.\n+ * @returns Promise that resolves to a list of documents and their corresponding similarity scores.\n+ */\n+ async hybridSearchVectorWithScore(\n+ query: string,\n+ queryVector?: number[],\n+ k = 4,\n+ filter: string | undefined = undefined\n+ ): Promise<[Document, number][]> {\n+ const vector = queryVector ?? (await this.embeddings.embedQuery(query));\n+\n+ await this.initPromise;\n+ const { results } = await this.client.search(query, {\n+ vectorSearchOptions: {\n+ queries: [\n+ {\n+ kind: \"vector\",\n+ vector,\n+ kNearestNeighborsCount: k,\n+ fields: [DEFAULT_FIELD_CONTENT_VECTOR],\n+ },\n+ ],\n+ },\n+ filter,\n+ top: k,\n+ });\n+\n+ const docsWithScore: [Document, number][] = [];\n+\n+ for await (const item of results) {\n+ const document = new Document<\n+ AzureAISearchDocumentMetadata & { embedding: number[] }\n+ >({\n+ pageContent: item.document[DEFAULT_FIELD_CONTENT],\n+ metadata: {\n+ ...item.document[DEFAULT_FIELD_METADATA],\n+ embedding: item.document[DEFAULT_FIELD_CONTENT_VECTOR],\n+ },\n+ });\n+ docsWithScore.push([document, item.score]);\n+ }\n+\n+ return docsWithScore;\n+ }\n+\n+ /**\n+ * Performs a hybrid search with semantic reranker using query text.\n+ * @param query Query text for the similarity search.\n+ * @param queryVector Query vector for the similarity search.\n+ * If not provided, the query text will be embedded.\n+ * @param k=4 Number of nearest neighbors to return.\n+ * @param filter Optional OData filter for the documents.\n+ * @returns Promise that resolves to a list of documents and their corresponding similarity scores.\n+ */\n+ async semanticHybridSearchVectorWithScore(\n+ query: string,\n+ queryVector?: number[],\n+ k = 4,\n+ filter: string | undefined = undefined\n+ ): Promise<[Document, number][]> {\n+ const vector = queryVector ?? (await this.embeddings.embedQuery(query));\n+\n+ await this.initPromise;\n+ const { results } = await this.client.search(query, {\n+ vectorSearchOptions: {\n+ queries: [\n+ {\n+ kind: \"vector\",\n+ vector,\n+ kNearestNeighborsCount: k,\n+ fields: [DEFAULT_FIELD_CONTENT_VECTOR],\n+ },\n+ ],\n+ },\n+ filter,\n+ top: k,\n+ queryType: \"semantic\",\n+ semanticSearchOptions: {\n+ configurationName: \"semantic-search-config\",\n+ captions: {\n+ captionType: \"extractive\",\n+ },\n+ answers: {\n+ answerType: \"extractive\",\n+ },\n+ },\n+ });\n+\n+ const docsWithScore: [Document, number][] = [];\n+\n+ for await (const item of results) {\n+ const document = new Document<\n+ AzureAISearchDocumentMetadata & { embedding: number[] }\n+ >({\n+ pageContent: item.document[DEFAULT_FIELD_CONTENT],\n+ metadata: {\n+ ...item.document[DEFAULT_FIELD_METADATA],\n+ embedding: item.document[DEFAULT_FIELD_CONTENT_VECTOR],\n+ },\n+ });\n+ docsWithScore.push([document, item.score]);\n+ }\n+\n+ return docsWithScore;\n+ }\n+\n+ /**\n+ * Performs a similarity search on the vectors stored in the collection.\n+ * @param queryVector Query vector for the similarity search.\n+ * @param k=4 Number of nearest neighbors to return.\n+ * @param filter string OData filter for the documents.\n+ * @returns Promise that resolves to a list of documents and their corresponding similarity scores.\n+ */\n+ async similaritySearchVectorWithScore(\n+ query: number[],\n+ k: number,\n+ filter?: string\n+ ): Promise<[Document, number][]> {\n+ await this.initPromise;\n+\n+ const { results } = await this.client.search(\"*\", {\n+ vectorSearchOptions: {\n+ queries: [\n+ {\n+ kind: \"vector\",\n+ vector: query,\n+ kNearestNeighborsCount: k,\n+ fields: [DEFAULT_FIELD_CONTENT_VECTOR],\n+ },\n+ ],\n+ },\n+ filter,\n+ });\n+\n+ const docsWithScore: [Document, number][] = [];\n+\n+ for await (const item of results) {\n+ const document = new Document<\n+ AzureAISearchDocumentMetadata & { embedding: number[] }\n+ >({\n+ pageContent: item.document[DEFAULT_FIELD_CONTENT],\n+ metadata: {\n+ ...item.document[DEFAULT_FIELD_METADATA],\n+ embedding: item.document[DEFAULT_FIELD_CONTENT_VECTOR],\n+ },\n+ });\n+ docsWithScore.push([document, item.score]);\n+ }\n+\n+ return docsWithScore;\n+ }\n+\n+ /**\n+ * Return documents selected using the maximal marginal relevance.\n+ * Maximal marginal relevance optimizes for similarity to the query AND\n+ * diversity among selected documents.\n+ * @param query Text to look up documents similar to.\n+ * @param options.k Number of documents to return.\n+ * @param options.fetchK=20 Number of documents to fetch before passing to\n+ * the MMR algorithm.\n+ * @param options.lambda=0.5 Number between 0 and 1 that determines the\n+ * degree of diversity among the results, where 0 corresponds to maximum\n+ * diversity and 1 to minimum diversity.\n+ * @returns List of documents selected by maximal marginal relevance.\n+ */\n+ async maxMarginalRelevanceSearch(\n+ query: string,\n+ options: MaxMarginalRelevanceSearchOptions\n+ ): Promise {\n+ const { k, fetchK = 20, lambda = 0.5 } = options;\n+\n+ const queryEmbedding = await this.embeddings.embedQuery(query);\n+ const docs = await this.similaritySearchVectorWithScore(\n+ queryEmbedding,\n+ fetchK\n+ );\n+ const embeddingList = docs.map((doc) => doc[0].metadata.embedding);\n+\n+ // Re-rank the results using MMR\n+ const mmrIndexes = maximalMarginalRelevance(\n+ queryEmbedding,\n+ embeddingList,\n+ lambda,\n+ k\n+ );\n+\n+ const mmrDocs = mmrIndexes.map((index) => docs[index][0]);\n+ return mmrDocs;\n+ }\n+\n+ /**\n+ * Ensures that an index exists on the AzureAISearchVectorStore.\n+ * @param indexClient The Azure AI Search index client.\n+ * @returns A promise that resolves when the AzureAISearchVectorStore index has been initialized.\n+ * @protected\n+ */\n+ protected async ensureIndexExists(\n+ indexClient: SearchIndexClient\n+ ): Promise {\n+ try {\n+ await indexClient.getIndex(this.indexName);\n+ } catch (e) {\n+ // Index does not exists, create it\n+ const searchIndex = this.createSearchIndexDefinition(this.indexName);\n+ await indexClient.createIndex(searchIndex);\n+ }\n+ }\n+\n+ /**\n+ * Prepares the search index definition for Azure AI Search.\n+ * @param indexName The name of the index.\n+ * @returns The SearchIndex object.\n+ * @protected\n+ */\n+ protected createSearchIndexDefinition(indexName: string): SearchIndex {\n+ return {\n+ name: indexName,\n+ vectorSearch: {\n+ algorithms: [\n+ {\n+ name: \"vector-search-algorithm\",\n+ kind: \"hnsw\",\n+ parameters: {\n+ m: 4,\n+ efSearch: 500,\n+ metric: \"cosine\",\n+ efConstruction: 400,\n+ },\n+ },\n+ ],\n+ profiles: [\n+ {\n+ name: \"vector-search-profile\",\n+ algorithmConfigurationName: \"vector-search-algorithm\",\n+ },\n+ ],\n+ },\n+ semanticSearch: {\n+ defaultConfigurationName: \"semantic-search-config\",\n+ configurations: [\n+ {\n+ name: \"semantic-search-config\",\n+ prioritizedFields: {\n+ contentFields: [\n+ {\n+ name: DEFAULT_FIELD_CONTENT,\n+ },\n+ ],\n+ keywordsFields: [\n+ {\n+ name: DEFAULT_FIELD_CONTENT,\n+ },\n+ ],\n+ },\n+ },\n+ ],\n+ },\n+ fields: [\n+ {\n+ name: DEFAULT_FIELD_ID,\n+ filterable: true,\n+ key: true,\n+ type: \"Edm.String\",\n+ },\n+ {\n+ name: DEFAULT_FIELD_CONTENT,\n+ searchable: true,\n+ filterable: true,\n+ type: \"Edm.String\",\n+ },\n+ {\n+ name: DEFAULT_FIELD_CONTENT_VECTOR,\n+ searchable: true,\n+ type: \"Collection(Edm.Single)\",\n+ vectorSearchDimensions: 1536,\n+ vectorSearchProfileName: \"vector-search-profile\",\n+ },\n+ {\n+ name: DEFAULT_FIELD_METADATA,\n+ type: \"Edm.ComplexType\",", - "comment_created_at": "2024-01-17T19:58:12+00:00", - "comment_author": "pablocastro", - "comment_body": "Are the metadata fields known at index time? If they were, creating actual filterable fields of the right types instead of a dictionary-style complex type would be better, and you'd get more type fidelity (e.g. numbers, dates, etc.) and filter expressions would be richer (e.g. numeric/date range comparisons).", - "pr_file_module": null - }, - { - "comment_id": "1457277213", - "repo_full_name": "langchain-ai/langchainjs", - "pr_number": 4044, - "pr_file": "libs/langchain-community/src/vectorstores/azure_aisearch.ts", - "discussion_id": "1456406645", - "commented_code": "@@ -0,0 +1,694 @@\n+import * as uuid from \"uuid\";\n+import {\n+ SearchClient,\n+ SearchIndexClient,\n+ AzureKeyCredential,\n+ IndexingResult,\n+ SearchIndex,\n+} from \"@azure/search-documents\";\n+import {\n+ MaxMarginalRelevanceSearchOptions,\n+ VectorStore,\n+} from \"@langchain/core/vectorstores\";\n+import type { EmbeddingsInterface } from \"@langchain/core/embeddings\";\n+import { Document } from \"@langchain/core/documents\";\n+import { maximalMarginalRelevance } from \"@langchain/core/utils/math\";\n+import { getEnvironmentVariable } from \"@langchain/core/utils/env\";\n+\n+/**\n+ * Azure AI Search query type.\n+ */\n+export const AzureAISearchQueryType = {\n+ /** Vector search. */\n+ Similarity: \"similarity\",\n+ /** Hybrid full text and vector search. */\n+ SimilarityHybrid: \"similarity_hybrid\",\n+ /** Hybrid full text and vector search with semantic ranking. */\n+ SemanticHybrid: \"semantic_hybrid\",\n+} as const;\n+\n+/**\n+ * Azure AI Search query type.\n+ */\n+export type AzureAISearchQueryType =\n+ (typeof AzureAISearchQueryType)[keyof typeof AzureAISearchQueryType];\n+\n+/**\n+ * Azure AI Search settings.\n+ */\n+export interface AzureAISearchQueryOptions {\n+ readonly type: AzureAISearchQueryType;\n+ readonly semanticConfigurationName?: string;\n+}\n+\n+/**\n+ * Configuration options for the `AzureAISearchStore` constructor.\n+ */\n+export interface AzureAISearchConfig {\n+ readonly client?: SearchClient;\n+ readonly indexName?: string;\n+ readonly endpoint?: string;\n+ readonly key?: string;\n+ readonly search: AzureAISearchQueryOptions;\n+ /**\n+ * The amount of documents to chunk by when adding vectors.\n+ * @default 100\n+ */\n+ readonly chunkSize?: number;\n+ /**\n+ * The amount of documents to embed at once when adding documents.\n+ * Note that some providers like Azure OpenAI can only embed 16 documents\n+ * at a time.\n+ * @default 16\n+ */\n+ readonly embeddingBatchSize?: number;\n+}\n+\n+/**\n+ * Azure AI Search options metadata schema.\n+ * If yout want to add custom data, use the attributes property.\n+ */\n+export type AzureAISearchDocumentMetadata = {\n+ source: string;\n+ attributes?: Array<{ key: string; value: string }>;\n+};\n+\n+/**\n+ * Azure AI Search indexed document.\n+ */\n+export type AzureAISearchDocument = {\n+ id: string;\n+ content: string;\n+ content_vector: number[];\n+ metadata: AzureAISearchDocumentMetadata;\n+};\n+\n+/**\n+ * Azure AI Search options for adding documents.\n+ */\n+export type AzureAISearchAddDocumentsOptions = {\n+ ids?: string[];\n+};\n+\n+const DEFAULT_FIELD_ID = \"id\";\n+const DEFAULT_FIELD_CONTENT = \"content\";\n+const DEFAULT_FIELD_CONTENT_VECTOR = \"content_vector\";\n+const DEFAULT_FIELD_METADATA = \"metadata\";\n+const DEFAULT_FIELD_METADATA_SOURCE = \"source\";\n+const DEFAULT_FIELD_METADATA_ATTRS = \"attributes\";\n+\n+/**\n+ * Azure AI Search vector store.\n+ * To use this, you should have:\n+ * - the `@azure/search-documents` NPM package installed\n+ * - an endpoint and key to the Azure AI Search instance\n+ *\n+ * If you directly provide a `SearchClient` instance, you need to ensure that\n+ * an index has been created. When using and endpoint and key, the index will\n+ * be created automatically if it does not exist.\n+ */\n+export class AzureAISearchVectorStore extends VectorStore {\n+ declare FilterType: string;\n+\n+ get lc_secrets(): { [key: string]: string } {\n+ return {\n+ endpoint: \"AZURE_AISEARCH_ENDPOINT\",\n+ key: \"AZURE_AISEARCH_KEY\",\n+ };\n+ }\n+\n+ _vectorstoreType(): string {\n+ return \"azure_aisearch\";\n+ }\n+\n+ private readonly initPromise: Promise;\n+\n+ private readonly client: SearchClient;\n+\n+ private readonly indexName: string;\n+\n+ private readonly chunkSize: number;\n+\n+ private readonly embeddingBatchSize: number;\n+\n+ private readonly options: AzureAISearchQueryOptions;\n+\n+ constructor(embeddings: EmbeddingsInterface, config: AzureAISearchConfig) {\n+ super(embeddings, config);\n+\n+ const endpoint =\n+ config.endpoint ?? getEnvironmentVariable(\"AZURE_AISEARCH_ENDPOINT\");\n+ const key = config.key ?? getEnvironmentVariable(\"AZURE_AISEARCH_KEY\");\n+\n+ if (!config.client && !endpoint && !key) {\n+ throw new Error(\n+ \"Azure AI Search client or connection string must be set.\"\n+ );\n+ }\n+\n+ this.indexName = config.indexName ?? \"vectorsearch\";\n+ this.chunkSize = config.chunkSize ?? 100;\n+ this.embeddingBatchSize = config.embeddingBatchSize ?? 16;\n+\n+ if (!config.client) {\n+ // eslint-disable-next-line @typescript-eslint/no-non-null-assertion\n+ const credential = new AzureKeyCredential(key!);\n+ // eslint-disable-next-line @typescript-eslint/no-non-null-assertion\n+ this.client = new SearchClient(endpoint!, this.indexName, credential);\n+ // eslint-disable-next-line @typescript-eslint/no-non-null-assertion\n+ const indexClient = new SearchIndexClient(endpoint!, credential);\n+\n+ // Start initialization, but don't wait for it to finish here\n+ this.initPromise = this.ensureIndexExists(indexClient).catch((error) => {\n+ console.error(\n+ \"Error during Azure AI Search index initialization:\",\n+ error\n+ );\n+ });\n+ } else {\n+ this.client = config.client;\n+ }\n+\n+ this.options = config.search;\n+ this.embeddings = embeddings;\n+ }\n+\n+ /**\n+ * Removes specified documents from the AzureAISearchVectorStore using a filter.\n+ * @param filter OData filter to find documents to delete.\n+ * @returns A promise that resolves when the documents have been removed.\n+ */\n+ async deleteMany(filter: string): Promise {\n+ const { results } = await this.client.search(\"*\", {\n+ filter,\n+ });\n+\n+ const ids: string[] = [];\n+ for await (const item of results) {\n+ ids.push(item.document.id);\n+ }\n+\n+ const { results: deleteResults } = await this.client.deleteDocuments(\n+ DEFAULT_FIELD_ID,\n+ ids\n+ );\n+ return deleteResults;\n+ }\n+\n+ /**\n+ * Removes specified documents from the AzureAISearchVectorStore.\n+ * @param ids IDs of the documents to be removed.\n+ * @returns A promise that resolves when the documents have been removed.\n+ */\n+ async deleteById(ids: string | string[]): Promise {\n+ await this.initPromise;\n+ const { results } = await this.client.deleteDocuments(\n+ DEFAULT_FIELD_ID,\n+ Array.isArray(ids) ? ids : [ids]\n+ );\n+ return results;\n+ }\n+\n+ /**\n+ * Adds documents to the AzureAISearchVectorStore.\n+ * Documents are chunked into batches of size `embeddingBatchSize` then\n+ * embedded and added to the AzureAISearchVectorStore.\n+ * @param documents The documents to add.\n+ * @param options Options for adding documents.\n+ * @returns A promise that resolves to the ids of the added documents.\n+ */\n+ async addDocuments(\n+ documents: Document[],\n+ options?: AzureAISearchAddDocumentsOptions\n+ ) {\n+ const texts = documents.map(({ pageContent }) => pageContent);\n+ const results: string[] = [];\n+\n+ for (let i = 0; i < texts.length; i += this.embeddingBatchSize) {\n+ const batch = texts.slice(i, i + this.embeddingBatchSize);\n+ const docsBatch = documents.slice(i, i + this.embeddingBatchSize);\n+ const batchEmbeddings: number[][] = await this.embeddings.embedDocuments(\n+ batch\n+ );\n+ const batchResult = await this.addVectors(\n+ batchEmbeddings,\n+ docsBatch,\n+ options\n+ );\n+\n+ results.push(...batchResult);\n+ }\n+\n+ return results;\n+ }\n+\n+ /**\n+ * Adds vectors to the AzureAISearchVectorStore.\n+ * @param vectors Vectors to be added.\n+ * @param documents Corresponding documents to be added.\n+ * @param options Options for adding documents.\n+ * @returns A promise that resolves to the ids of the added documents.\n+ */\n+ async addVectors(\n+ vectors: number[][],\n+ documents: Document[],\n+ options?: AzureAISearchAddDocumentsOptions\n+ ): Promise {\n+ const ids = options?.ids ?? documents.map(() => uuid.v4());\n+ const entities: AzureAISearchDocument[] = documents.map((doc, idx) => ({\n+ id: ids[idx],\n+ content: doc.pageContent,\n+ content_vector: vectors[idx],\n+ metadata: {\n+ source: doc.metadata?.source,\n+ attributes: doc.metadata?.attributes ?? [],\n+ },\n+ }));\n+\n+ await this.initPromise;\n+ for (let i = 0; i < entities.length; i += this.chunkSize) {\n+ const chunk = entities.slice(i, i + this.chunkSize);\n+ await this.client.uploadDocuments(chunk, { throwOnAnyFailure: true });\n+ }\n+\n+ return ids;\n+ }\n+\n+ /**\n+ * Performs a similarity search using query type specified in configuration.\n+ * @param query Query text for the similarity search.\n+ * @param k=4 Number of nearest neighbors to return.\n+ * @param filter Optional OData filter for the documents.\n+ * @returns Promise that resolves to a list of documents and their corresponding similarity scores.\n+ */\n+ async similaritySearch(\n+ query: string,\n+ k = 4,\n+ filter: this[\"FilterType\"] | undefined = undefined\n+ ): Promise {\n+ const results = await this.similaritySearchWithScore(query, k, filter);\n+\n+ return results.map((result) => result[0]);\n+ }\n+\n+ /**\n+ * Performs a similarity search using query type specified in configuration.\n+ * @param query Query text for the similarity search.\n+ * @param k=4 Number of nearest neighbors to return.\n+ * @param filter Optional OData filter for the documents.\n+ * @returns Promise that resolves to a list of documents and their corresponding similarity scores.\n+ */\n+ async similaritySearchWithScore(\n+ query: string,\n+ k = 4,\n+ filter: this[\"FilterType\"] | undefined = undefined\n+ ): Promise<[Document, number][]> {\n+ const searchType = this.options.type;\n+\n+ if (searchType === AzureAISearchQueryType.Similarity) {\n+ return this.similaritySearchVectorWithScore(\n+ await this.embeddings.embedQuery(query),\n+ k,\n+ filter\n+ );\n+ } else if (searchType === AzureAISearchQueryType.SimilarityHybrid) {\n+ return this.hybridSearchVectorWithScore(\n+ query,\n+ await this.embeddings.embedQuery(query),\n+ k,\n+ filter\n+ );\n+ } else if (searchType === AzureAISearchQueryType.SemanticHybrid) {\n+ return this.semanticHybridSearchVectorWithScore(\n+ query,\n+ await this.embeddings.embedQuery(query),\n+ k,\n+ filter\n+ );\n+ }\n+\n+ throw new Error(`Unrecognized search type '${searchType}'`);\n+ }\n+\n+ /**\n+ * Performs a hybrid search using query text.\n+ * @param query Query text for the similarity search.\n+ * @param queryVector Query vector for the similarity search.\n+ * If not provided, the query text will be embedded.\n+ * @param k=4 Number of nearest neighbors to return.\n+ * @param filter Optional OData filter for the documents.\n+ * @returns Promise that resolves to a list of documents and their corresponding similarity scores.\n+ */\n+ async hybridSearchVectorWithScore(\n+ query: string,\n+ queryVector?: number[],\n+ k = 4,\n+ filter: string | undefined = undefined\n+ ): Promise<[Document, number][]> {\n+ const vector = queryVector ?? (await this.embeddings.embedQuery(query));\n+\n+ await this.initPromise;\n+ const { results } = await this.client.search(query, {\n+ vectorSearchOptions: {\n+ queries: [\n+ {\n+ kind: \"vector\",\n+ vector,\n+ kNearestNeighborsCount: k,\n+ fields: [DEFAULT_FIELD_CONTENT_VECTOR],\n+ },\n+ ],\n+ },\n+ filter,\n+ top: k,\n+ });\n+\n+ const docsWithScore: [Document, number][] = [];\n+\n+ for await (const item of results) {\n+ const document = new Document<\n+ AzureAISearchDocumentMetadata & { embedding: number[] }\n+ >({\n+ pageContent: item.document[DEFAULT_FIELD_CONTENT],\n+ metadata: {\n+ ...item.document[DEFAULT_FIELD_METADATA],\n+ embedding: item.document[DEFAULT_FIELD_CONTENT_VECTOR],\n+ },\n+ });\n+ docsWithScore.push([document, item.score]);\n+ }\n+\n+ return docsWithScore;\n+ }\n+\n+ /**\n+ * Performs a hybrid search with semantic reranker using query text.\n+ * @param query Query text for the similarity search.\n+ * @param queryVector Query vector for the similarity search.\n+ * If not provided, the query text will be embedded.\n+ * @param k=4 Number of nearest neighbors to return.\n+ * @param filter Optional OData filter for the documents.\n+ * @returns Promise that resolves to a list of documents and their corresponding similarity scores.\n+ */\n+ async semanticHybridSearchVectorWithScore(\n+ query: string,\n+ queryVector?: number[],\n+ k = 4,\n+ filter: string | undefined = undefined\n+ ): Promise<[Document, number][]> {\n+ const vector = queryVector ?? (await this.embeddings.embedQuery(query));\n+\n+ await this.initPromise;\n+ const { results } = await this.client.search(query, {\n+ vectorSearchOptions: {\n+ queries: [\n+ {\n+ kind: \"vector\",\n+ vector,\n+ kNearestNeighborsCount: k,\n+ fields: [DEFAULT_FIELD_CONTENT_VECTOR],\n+ },\n+ ],\n+ },\n+ filter,\n+ top: k,\n+ queryType: \"semantic\",\n+ semanticSearchOptions: {\n+ configurationName: \"semantic-search-config\",\n+ captions: {\n+ captionType: \"extractive\",\n+ },\n+ answers: {\n+ answerType: \"extractive\",\n+ },\n+ },\n+ });\n+\n+ const docsWithScore: [Document, number][] = [];\n+\n+ for await (const item of results) {\n+ const document = new Document<\n+ AzureAISearchDocumentMetadata & { embedding: number[] }\n+ >({\n+ pageContent: item.document[DEFAULT_FIELD_CONTENT],\n+ metadata: {\n+ ...item.document[DEFAULT_FIELD_METADATA],\n+ embedding: item.document[DEFAULT_FIELD_CONTENT_VECTOR],\n+ },\n+ });\n+ docsWithScore.push([document, item.score]);\n+ }\n+\n+ return docsWithScore;\n+ }\n+\n+ /**\n+ * Performs a similarity search on the vectors stored in the collection.\n+ * @param queryVector Query vector for the similarity search.\n+ * @param k=4 Number of nearest neighbors to return.\n+ * @param filter string OData filter for the documents.\n+ * @returns Promise that resolves to a list of documents and their corresponding similarity scores.\n+ */\n+ async similaritySearchVectorWithScore(\n+ query: number[],\n+ k: number,\n+ filter?: string\n+ ): Promise<[Document, number][]> {\n+ await this.initPromise;\n+\n+ const { results } = await this.client.search(\"*\", {\n+ vectorSearchOptions: {\n+ queries: [\n+ {\n+ kind: \"vector\",\n+ vector: query,\n+ kNearestNeighborsCount: k,\n+ fields: [DEFAULT_FIELD_CONTENT_VECTOR],\n+ },\n+ ],\n+ },\n+ filter,\n+ });\n+\n+ const docsWithScore: [Document, number][] = [];\n+\n+ for await (const item of results) {\n+ const document = new Document<\n+ AzureAISearchDocumentMetadata & { embedding: number[] }\n+ >({\n+ pageContent: item.document[DEFAULT_FIELD_CONTENT],\n+ metadata: {\n+ ...item.document[DEFAULT_FIELD_METADATA],\n+ embedding: item.document[DEFAULT_FIELD_CONTENT_VECTOR],\n+ },\n+ });\n+ docsWithScore.push([document, item.score]);\n+ }\n+\n+ return docsWithScore;\n+ }\n+\n+ /**\n+ * Return documents selected using the maximal marginal relevance.\n+ * Maximal marginal relevance optimizes for similarity to the query AND\n+ * diversity among selected documents.\n+ * @param query Text to look up documents similar to.\n+ * @param options.k Number of documents to return.\n+ * @param options.fetchK=20 Number of documents to fetch before passing to\n+ * the MMR algorithm.\n+ * @param options.lambda=0.5 Number between 0 and 1 that determines the\n+ * degree of diversity among the results, where 0 corresponds to maximum\n+ * diversity and 1 to minimum diversity.\n+ * @returns List of documents selected by maximal marginal relevance.\n+ */\n+ async maxMarginalRelevanceSearch(\n+ query: string,\n+ options: MaxMarginalRelevanceSearchOptions\n+ ): Promise {\n+ const { k, fetchK = 20, lambda = 0.5 } = options;\n+\n+ const queryEmbedding = await this.embeddings.embedQuery(query);\n+ const docs = await this.similaritySearchVectorWithScore(\n+ queryEmbedding,\n+ fetchK\n+ );\n+ const embeddingList = docs.map((doc) => doc[0].metadata.embedding);\n+\n+ // Re-rank the results using MMR\n+ const mmrIndexes = maximalMarginalRelevance(\n+ queryEmbedding,\n+ embeddingList,\n+ lambda,\n+ k\n+ );\n+\n+ const mmrDocs = mmrIndexes.map((index) => docs[index][0]);\n+ return mmrDocs;\n+ }\n+\n+ /**\n+ * Ensures that an index exists on the AzureAISearchVectorStore.\n+ * @param indexClient The Azure AI Search index client.\n+ * @returns A promise that resolves when the AzureAISearchVectorStore index has been initialized.\n+ * @protected\n+ */\n+ protected async ensureIndexExists(\n+ indexClient: SearchIndexClient\n+ ): Promise {\n+ try {\n+ await indexClient.getIndex(this.indexName);\n+ } catch (e) {\n+ // Index does not exists, create it\n+ const searchIndex = this.createSearchIndexDefinition(this.indexName);\n+ await indexClient.createIndex(searchIndex);\n+ }\n+ }\n+\n+ /**\n+ * Prepares the search index definition for Azure AI Search.\n+ * @param indexName The name of the index.\n+ * @returns The SearchIndex object.\n+ * @protected\n+ */\n+ protected createSearchIndexDefinition(indexName: string): SearchIndex {\n+ return {\n+ name: indexName,\n+ vectorSearch: {\n+ algorithms: [\n+ {\n+ name: \"vector-search-algorithm\",\n+ kind: \"hnsw\",\n+ parameters: {\n+ m: 4,\n+ efSearch: 500,\n+ metric: \"cosine\",\n+ efConstruction: 400,\n+ },\n+ },\n+ ],\n+ profiles: [\n+ {\n+ name: \"vector-search-profile\",\n+ algorithmConfigurationName: \"vector-search-algorithm\",\n+ },\n+ ],\n+ },\n+ semanticSearch: {\n+ defaultConfigurationName: \"semantic-search-config\",\n+ configurations: [\n+ {\n+ name: \"semantic-search-config\",\n+ prioritizedFields: {\n+ contentFields: [\n+ {\n+ name: DEFAULT_FIELD_CONTENT,\n+ },\n+ ],\n+ keywordsFields: [\n+ {\n+ name: DEFAULT_FIELD_CONTENT,\n+ },\n+ ],\n+ },\n+ },\n+ ],\n+ },\n+ fields: [\n+ {\n+ name: DEFAULT_FIELD_ID,\n+ filterable: true,\n+ key: true,\n+ type: \"Edm.String\",\n+ },\n+ {\n+ name: DEFAULT_FIELD_CONTENT,\n+ searchable: true,\n+ filterable: true,\n+ type: \"Edm.String\",\n+ },\n+ {\n+ name: DEFAULT_FIELD_CONTENT_VECTOR,\n+ searchable: true,\n+ type: \"Collection(Edm.Single)\",\n+ vectorSearchDimensions: 1536,\n+ vectorSearchProfileName: \"vector-search-profile\",\n+ },\n+ {\n+ name: DEFAULT_FIELD_METADATA,\n+ type: \"Edm.ComplexType\",", - "comment_created_at": "2024-01-18T10:59:17+00:00", - "comment_author": "sinedied", - "comment_body": "Yes, I'm not quite happy with that one and it comes from the base implementation I started from. \r\nBut I thought as a default as least it makes it easier for the users who do not want to bother with the schema, as LC allows for arbitrary metadata.\r\n\r\nThat's also why I said I wand to provide more customizations options in another PR to allow for custom index schema for the users who wants better control over the data and filters.", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/langchainjs-validate-untrusted-input.json b/_reviewers/langchainjs-validate-untrusted-input.json new file mode 100644 index 0000000..8a2c08e --- /dev/null +++ b/_reviewers/langchainjs-validate-untrusted-input.json @@ -0,0 +1,184 @@ +[ + { + "discussion_id": "2101515141", + "pr_number": 8007, + "pr_file": "libs/langchain-community/src/document_loaders/fs/oracle.ts", + "created_at": "2025-05-22T02:44:48+00:00", + "commented_code": "import { Document } from \"@langchain/core/documents\";\nimport fs from \"node:fs\";\nimport path from \"node:path\";\nimport oracledb from \"oracledb\";\nimport * as htmlparser2 from \"htmlparser2\";\nimport { BaseDocumentLoader } from \"@langchain/core/document_loaders/base\";\n\nfunction* listDir(dir: string): Generator {\n const files = fs.readdirSync(dir, { withFileTypes: true });\n\n for (const file of files) {\n if (file.isDirectory()) {\n yield* listDir(path.join(dir, file.name));\n } else {\n yield path.join(dir, file.name);\n }\n }\n}\n\n/**\n * Load documents from a file, a directory, or a table\n * If the document isn't a plain text file such as a PDF,\n * a plain text version will be extracted using utl_to_text\n * @example\n * ```typescript\n * const loader = new OracleDocLoader(conn, params);\n * const docs = await loader.load();\n * ```\n */\nexport class OracleDocLoader extends BaseDocumentLoader {\n protected conn: oracledb.Connection;\n\n protected pref: Record;\n\n constructor(conn: oracledb.Connection, pref: Record) {\n super();\n this.conn = conn;\n this.pref = pref;\n }\n\n /**\n * A method that loads the text file or blob and returns a promise that\n * resolves to an array of `Document` instances. It reads the text from\n * the file or blob using the `readFile` function from the\n * `node:fs/promises` module or the `text()` method of the blob. It then\n * parses the text using the `parse()` method and creates a `Document`\n * instance for each parsed page. The metadata includes the source of the\n * text (file path or blob) and, if there are multiple pages, the line\n * number of each page.\n * @returns A promise that resolves to an array of `Document` instances.\n */\n async load() {\n const docs = [];\n\n if (\"file\" in this.pref) {\n // don't specify an encoding to use binary\n const data = fs.readFileSync(this.pref.file);\n\n const result = await this.conn.execute(\n (\n `select dbms_vector_chain.utl_to_text(:content, :pref) text, dbms_vector_chain.utl_to_text(:content, json('{\"plaintext\": \"false\"}')) metadata from dual`\n ),\n {\n content: { val: data, dir: oracledb.BIND_IN, type: oracledb.BLOB },\n pref: { val: this.pref, type: oracledb.DB_TYPE_JSON },\n },\n ({\n resultSet: true, // return a ResultSet (default is false)\n fetchInfo: {\n TEXT: { type: oracledb.STRING },\n METADATA: { type: oracledb.STRING },\n },\n })\n );\n\n const rs = result.resultSet;\n let row;\n if (rs != null) {\n while ((row = await rs.getRow())) {\n const [plain_text, metadata] = await this._extract(row);\n docs.push(\n new Document({\n pageContent: plain_text,\n metadata: >metadata,\n })\n );\n }\n await rs.close();\n }\n } else if (\"dir\" in this.pref) {\n for (const file of listDir(this.pref.dir)) {\n // don't specify an encoding to use binary\n const data = fs.readFileSync(file);\n\n const result = await this.conn.execute(\n (\n `select dbms_vector_chain.utl_to_text(:content, :pref) text, dbms_vector_chain.utl_to_text(:content, json('{\"plaintext\": \"false\"}')) metadata from dual`\n ),\n {\n content: { val: data, dir: oracledb.BIND_IN, type: oracledb.BLOB },\n pref: { val: this.pref, type: oracledb.DB_TYPE_JSON },\n },\n ({\n resultSet: true, // return a ResultSet (default is false)\n fetchInfo: {\n TEXT: { type: oracledb.STRING },\n METADATA: { type: oracledb.STRING },\n },\n })\n );\n\n const rs = result.resultSet;\n let row;\n if (rs != null) {\n while ((row = await rs.getRow())) {\n const [plain_text, metadata] = await this._extract(row);\n docs.push(\n new Document({\n pageContent: plain_text,\n metadata: >metadata,\n })\n );\n }\n await rs.close();\n }\n }\n } else if (\"tablename\" in this.pref) {\n if (!(\"owner\" in this.pref) || !(\"colname\" in this.pref)) {\n throw new Error(`Invalid preferences: missing owner or colname`);\n }\n // SQL doesn't accept backslash to escape a double quote (\\\"). If string contains \\\" change to \"\n if (\n this.pref.tablename.startsWith('\\\\\"') &&\n this.pref.tablename.endsWith('\\\\\"')\n ) {\n this.pref.tablename = this.pref.tablename.replaceAll(\"\\\\\", \"\");\n }\n const result = await this.conn.execute(\n (\n `select dbms_vector_chain.utl_to_text(t.${this.pref.colname}, :pref) text, dbms_vector_chain.utl_to_text(t.${this.pref.colname}, json('{\"plaintext\": \"false\"}')) metadata from ${this.pref.owner}.${this.pref.tablename} t`", + "repo_full_name": "langchain-ai/langchainjs", + "discussion_comments": [ + { + "comment_id": "2101515141", + "repo_full_name": "langchain-ai/langchainjs", + "pr_number": 8007, + "pr_file": "libs/langchain-community/src/document_loaders/fs/oracle.ts", + "discussion_id": "2101515141", + "commented_code": "@@ -0,0 +1,197 @@\n+import { Document } from \"@langchain/core/documents\";\n+import fs from \"node:fs\";\n+import path from \"node:path\";\n+import oracledb from \"oracledb\";\n+import * as htmlparser2 from \"htmlparser2\";\n+import { BaseDocumentLoader } from \"@langchain/core/document_loaders/base\";\n+\n+function* listDir(dir: string): Generator {\n+ const files = fs.readdirSync(dir, { withFileTypes: true });\n+\n+ for (const file of files) {\n+ if (file.isDirectory()) {\n+ yield* listDir(path.join(dir, file.name));\n+ } else {\n+ yield path.join(dir, file.name);\n+ }\n+ }\n+}\n+\n+/**\n+ * Load documents from a file, a directory, or a table\n+ * If the document isn't a plain text file such as a PDF,\n+ * a plain text version will be extracted using utl_to_text\n+ * @example\n+ * ```typescript\n+ * const loader = new OracleDocLoader(conn, params);\n+ * const docs = await loader.load();\n+ * ```\n+ */\n+export class OracleDocLoader extends BaseDocumentLoader {\n+ protected conn: oracledb.Connection;\n+\n+ protected pref: Record;\n+\n+ constructor(conn: oracledb.Connection, pref: Record) {\n+ super();\n+ this.conn = conn;\n+ this.pref = pref;\n+ }\n+\n+ /**\n+ * A method that loads the text file or blob and returns a promise that\n+ * resolves to an array of `Document` instances. It reads the text from\n+ * the file or blob using the `readFile` function from the\n+ * `node:fs/promises` module or the `text()` method of the blob. It then\n+ * parses the text using the `parse()` method and creates a `Document`\n+ * instance for each parsed page. The metadata includes the source of the\n+ * text (file path or blob) and, if there are multiple pages, the line\n+ * number of each page.\n+ * @returns A promise that resolves to an array of `Document` instances.\n+ */\n+ async load() {\n+ const docs = [];\n+\n+ if (\"file\" in this.pref) {\n+ // don't specify an encoding to use binary\n+ const data = fs.readFileSync(this.pref.file);\n+\n+ const result = await this.conn.execute(\n+ (\n+ `select dbms_vector_chain.utl_to_text(:content, :pref) text, dbms_vector_chain.utl_to_text(:content, json('{\"plaintext\": \"false\"}')) metadata from dual`\n+ ),\n+ {\n+ content: { val: data, dir: oracledb.BIND_IN, type: oracledb.BLOB },\n+ pref: { val: this.pref, type: oracledb.DB_TYPE_JSON },\n+ },\n+ ({\n+ resultSet: true, // return a ResultSet (default is false)\n+ fetchInfo: {\n+ TEXT: { type: oracledb.STRING },\n+ METADATA: { type: oracledb.STRING },\n+ },\n+ })\n+ );\n+\n+ const rs = result.resultSet;\n+ let row;\n+ if (rs != null) {\n+ while ((row = await rs.getRow())) {\n+ const [plain_text, metadata] = await this._extract(row);\n+ docs.push(\n+ new Document({\n+ pageContent: plain_text,\n+ metadata: >metadata,\n+ })\n+ );\n+ }\n+ await rs.close();\n+ }\n+ } else if (\"dir\" in this.pref) {\n+ for (const file of listDir(this.pref.dir)) {\n+ // don't specify an encoding to use binary\n+ const data = fs.readFileSync(file);\n+\n+ const result = await this.conn.execute(\n+ (\n+ `select dbms_vector_chain.utl_to_text(:content, :pref) text, dbms_vector_chain.utl_to_text(:content, json('{\"plaintext\": \"false\"}')) metadata from dual`\n+ ),\n+ {\n+ content: { val: data, dir: oracledb.BIND_IN, type: oracledb.BLOB },\n+ pref: { val: this.pref, type: oracledb.DB_TYPE_JSON },\n+ },\n+ ({\n+ resultSet: true, // return a ResultSet (default is false)\n+ fetchInfo: {\n+ TEXT: { type: oracledb.STRING },\n+ METADATA: { type: oracledb.STRING },\n+ },\n+ })\n+ );\n+\n+ const rs = result.resultSet;\n+ let row;\n+ if (rs != null) {\n+ while ((row = await rs.getRow())) {\n+ const [plain_text, metadata] = await this._extract(row);\n+ docs.push(\n+ new Document({\n+ pageContent: plain_text,\n+ metadata: >metadata,\n+ })\n+ );\n+ }\n+ await rs.close();\n+ }\n+ }\n+ } else if (\"tablename\" in this.pref) {\n+ if (!(\"owner\" in this.pref) || !(\"colname\" in this.pref)) {\n+ throw new Error(`Invalid preferences: missing owner or colname`);\n+ }\n+ // SQL doesn't accept backslash to escape a double quote (\\\"). If string contains \\\" change to \"\n+ if (\n+ this.pref.tablename.startsWith('\\\\\"') &&\n+ this.pref.tablename.endsWith('\\\\\"')\n+ ) {\n+ this.pref.tablename = this.pref.tablename.replaceAll(\"\\\\\", \"\");\n+ }\n+ const result = await this.conn.execute(\n+ (\n+ `select dbms_vector_chain.utl_to_text(t.${this.pref.colname}, :pref) text, dbms_vector_chain.utl_to_text(t.${this.pref.colname}, json('{\"plaintext\": \"false\"}')) metadata from ${this.pref.owner}.${this.pref.tablename} t`", + "comment_created_at": "2025-05-22T02:44:48+00:00", + "comment_author": "cjbj", + "comment_body": "Have the substituted values been filtered to avoid any SQL Injection issues?", + "pr_file_module": null + }, + { + "comment_id": "2103199788", + "repo_full_name": "langchain-ai/langchainjs", + "pr_number": 8007, + "pr_file": "libs/langchain-community/src/document_loaders/fs/oracle.ts", + "discussion_id": "2101515141", + "commented_code": "@@ -0,0 +1,197 @@\n+import { Document } from \"@langchain/core/documents\";\n+import fs from \"node:fs\";\n+import path from \"node:path\";\n+import oracledb from \"oracledb\";\n+import * as htmlparser2 from \"htmlparser2\";\n+import { BaseDocumentLoader } from \"@langchain/core/document_loaders/base\";\n+\n+function* listDir(dir: string): Generator {\n+ const files = fs.readdirSync(dir, { withFileTypes: true });\n+\n+ for (const file of files) {\n+ if (file.isDirectory()) {\n+ yield* listDir(path.join(dir, file.name));\n+ } else {\n+ yield path.join(dir, file.name);\n+ }\n+ }\n+}\n+\n+/**\n+ * Load documents from a file, a directory, or a table\n+ * If the document isn't a plain text file such as a PDF,\n+ * a plain text version will be extracted using utl_to_text\n+ * @example\n+ * ```typescript\n+ * const loader = new OracleDocLoader(conn, params);\n+ * const docs = await loader.load();\n+ * ```\n+ */\n+export class OracleDocLoader extends BaseDocumentLoader {\n+ protected conn: oracledb.Connection;\n+\n+ protected pref: Record;\n+\n+ constructor(conn: oracledb.Connection, pref: Record) {\n+ super();\n+ this.conn = conn;\n+ this.pref = pref;\n+ }\n+\n+ /**\n+ * A method that loads the text file or blob and returns a promise that\n+ * resolves to an array of `Document` instances. It reads the text from\n+ * the file or blob using the `readFile` function from the\n+ * `node:fs/promises` module or the `text()` method of the blob. It then\n+ * parses the text using the `parse()` method and creates a `Document`\n+ * instance for each parsed page. The metadata includes the source of the\n+ * text (file path or blob) and, if there are multiple pages, the line\n+ * number of each page.\n+ * @returns A promise that resolves to an array of `Document` instances.\n+ */\n+ async load() {\n+ const docs = [];\n+\n+ if (\"file\" in this.pref) {\n+ // don't specify an encoding to use binary\n+ const data = fs.readFileSync(this.pref.file);\n+\n+ const result = await this.conn.execute(\n+ (\n+ `select dbms_vector_chain.utl_to_text(:content, :pref) text, dbms_vector_chain.utl_to_text(:content, json('{\"plaintext\": \"false\"}')) metadata from dual`\n+ ),\n+ {\n+ content: { val: data, dir: oracledb.BIND_IN, type: oracledb.BLOB },\n+ pref: { val: this.pref, type: oracledb.DB_TYPE_JSON },\n+ },\n+ ({\n+ resultSet: true, // return a ResultSet (default is false)\n+ fetchInfo: {\n+ TEXT: { type: oracledb.STRING },\n+ METADATA: { type: oracledb.STRING },\n+ },\n+ })\n+ );\n+\n+ const rs = result.resultSet;\n+ let row;\n+ if (rs != null) {\n+ while ((row = await rs.getRow())) {\n+ const [plain_text, metadata] = await this._extract(row);\n+ docs.push(\n+ new Document({\n+ pageContent: plain_text,\n+ metadata: >metadata,\n+ })\n+ );\n+ }\n+ await rs.close();\n+ }\n+ } else if (\"dir\" in this.pref) {\n+ for (const file of listDir(this.pref.dir)) {\n+ // don't specify an encoding to use binary\n+ const data = fs.readFileSync(file);\n+\n+ const result = await this.conn.execute(\n+ (\n+ `select dbms_vector_chain.utl_to_text(:content, :pref) text, dbms_vector_chain.utl_to_text(:content, json('{\"plaintext\": \"false\"}')) metadata from dual`\n+ ),\n+ {\n+ content: { val: data, dir: oracledb.BIND_IN, type: oracledb.BLOB },\n+ pref: { val: this.pref, type: oracledb.DB_TYPE_JSON },\n+ },\n+ ({\n+ resultSet: true, // return a ResultSet (default is false)\n+ fetchInfo: {\n+ TEXT: { type: oracledb.STRING },\n+ METADATA: { type: oracledb.STRING },\n+ },\n+ })\n+ );\n+\n+ const rs = result.resultSet;\n+ let row;\n+ if (rs != null) {\n+ while ((row = await rs.getRow())) {\n+ const [plain_text, metadata] = await this._extract(row);\n+ docs.push(\n+ new Document({\n+ pageContent: plain_text,\n+ metadata: >metadata,\n+ })\n+ );\n+ }\n+ await rs.close();\n+ }\n+ }\n+ } else if (\"tablename\" in this.pref) {\n+ if (!(\"owner\" in this.pref) || !(\"colname\" in this.pref)) {\n+ throw new Error(`Invalid preferences: missing owner or colname`);\n+ }\n+ // SQL doesn't accept backslash to escape a double quote (\\\"). If string contains \\\" change to \"\n+ if (\n+ this.pref.tablename.startsWith('\\\\\"') &&\n+ this.pref.tablename.endsWith('\\\\\"')\n+ ) {\n+ this.pref.tablename = this.pref.tablename.replaceAll(\"\\\\\", \"\");\n+ }\n+ const result = await this.conn.execute(\n+ (\n+ `select dbms_vector_chain.utl_to_text(t.${this.pref.colname}, :pref) text, dbms_vector_chain.utl_to_text(t.${this.pref.colname}, json('{\"plaintext\": \"false\"}')) metadata from ${this.pref.owner}.${this.pref.tablename} t`", + "comment_created_at": "2025-05-22T18:46:39+00:00", + "comment_author": "hackerdave", + "comment_body": "Have added a check", + "pr_file_module": null + }, + { + "comment_id": "2103462930", + "repo_full_name": "langchain-ai/langchainjs", + "pr_number": 8007, + "pr_file": "libs/langchain-community/src/document_loaders/fs/oracle.ts", + "discussion_id": "2101515141", + "commented_code": "@@ -0,0 +1,197 @@\n+import { Document } from \"@langchain/core/documents\";\n+import fs from \"node:fs\";\n+import path from \"node:path\";\n+import oracledb from \"oracledb\";\n+import * as htmlparser2 from \"htmlparser2\";\n+import { BaseDocumentLoader } from \"@langchain/core/document_loaders/base\";\n+\n+function* listDir(dir: string): Generator {\n+ const files = fs.readdirSync(dir, { withFileTypes: true });\n+\n+ for (const file of files) {\n+ if (file.isDirectory()) {\n+ yield* listDir(path.join(dir, file.name));\n+ } else {\n+ yield path.join(dir, file.name);\n+ }\n+ }\n+}\n+\n+/**\n+ * Load documents from a file, a directory, or a table\n+ * If the document isn't a plain text file such as a PDF,\n+ * a plain text version will be extracted using utl_to_text\n+ * @example\n+ * ```typescript\n+ * const loader = new OracleDocLoader(conn, params);\n+ * const docs = await loader.load();\n+ * ```\n+ */\n+export class OracleDocLoader extends BaseDocumentLoader {\n+ protected conn: oracledb.Connection;\n+\n+ protected pref: Record;\n+\n+ constructor(conn: oracledb.Connection, pref: Record) {\n+ super();\n+ this.conn = conn;\n+ this.pref = pref;\n+ }\n+\n+ /**\n+ * A method that loads the text file or blob and returns a promise that\n+ * resolves to an array of `Document` instances. It reads the text from\n+ * the file or blob using the `readFile` function from the\n+ * `node:fs/promises` module or the `text()` method of the blob. It then\n+ * parses the text using the `parse()` method and creates a `Document`\n+ * instance for each parsed page. The metadata includes the source of the\n+ * text (file path or blob) and, if there are multiple pages, the line\n+ * number of each page.\n+ * @returns A promise that resolves to an array of `Document` instances.\n+ */\n+ async load() {\n+ const docs = [];\n+\n+ if (\"file\" in this.pref) {\n+ // don't specify an encoding to use binary\n+ const data = fs.readFileSync(this.pref.file);\n+\n+ const result = await this.conn.execute(\n+ (\n+ `select dbms_vector_chain.utl_to_text(:content, :pref) text, dbms_vector_chain.utl_to_text(:content, json('{\"plaintext\": \"false\"}')) metadata from dual`\n+ ),\n+ {\n+ content: { val: data, dir: oracledb.BIND_IN, type: oracledb.BLOB },\n+ pref: { val: this.pref, type: oracledb.DB_TYPE_JSON },\n+ },\n+ ({\n+ resultSet: true, // return a ResultSet (default is false)\n+ fetchInfo: {\n+ TEXT: { type: oracledb.STRING },\n+ METADATA: { type: oracledb.STRING },\n+ },\n+ })\n+ );\n+\n+ const rs = result.resultSet;\n+ let row;\n+ if (rs != null) {\n+ while ((row = await rs.getRow())) {\n+ const [plain_text, metadata] = await this._extract(row);\n+ docs.push(\n+ new Document({\n+ pageContent: plain_text,\n+ metadata: >metadata,\n+ })\n+ );\n+ }\n+ await rs.close();\n+ }\n+ } else if (\"dir\" in this.pref) {\n+ for (const file of listDir(this.pref.dir)) {\n+ // don't specify an encoding to use binary\n+ const data = fs.readFileSync(file);\n+\n+ const result = await this.conn.execute(\n+ (\n+ `select dbms_vector_chain.utl_to_text(:content, :pref) text, dbms_vector_chain.utl_to_text(:content, json('{\"plaintext\": \"false\"}')) metadata from dual`\n+ ),\n+ {\n+ content: { val: data, dir: oracledb.BIND_IN, type: oracledb.BLOB },\n+ pref: { val: this.pref, type: oracledb.DB_TYPE_JSON },\n+ },\n+ ({\n+ resultSet: true, // return a ResultSet (default is false)\n+ fetchInfo: {\n+ TEXT: { type: oracledb.STRING },\n+ METADATA: { type: oracledb.STRING },\n+ },\n+ })\n+ );\n+\n+ const rs = result.resultSet;\n+ let row;\n+ if (rs != null) {\n+ while ((row = await rs.getRow())) {\n+ const [plain_text, metadata] = await this._extract(row);\n+ docs.push(\n+ new Document({\n+ pageContent: plain_text,\n+ metadata: >metadata,\n+ })\n+ );\n+ }\n+ await rs.close();\n+ }\n+ }\n+ } else if (\"tablename\" in this.pref) {\n+ if (!(\"owner\" in this.pref) || !(\"colname\" in this.pref)) {\n+ throw new Error(`Invalid preferences: missing owner or colname`);\n+ }\n+ // SQL doesn't accept backslash to escape a double quote (\\\"). If string contains \\\" change to \"\n+ if (\n+ this.pref.tablename.startsWith('\\\\\"') &&\n+ this.pref.tablename.endsWith('\\\\\"')\n+ ) {\n+ this.pref.tablename = this.pref.tablename.replaceAll(\"\\\\\", \"\");\n+ }\n+ const result = await this.conn.execute(\n+ (\n+ `select dbms_vector_chain.utl_to_text(t.${this.pref.colname}, :pref) text, dbms_vector_chain.utl_to_text(t.${this.pref.colname}, json('{\"plaintext\": \"false\"}')) metadata from ${this.pref.owner}.${this.pref.tablename} t`", + "comment_created_at": "2025-05-22T22:07:14+00:00", + "comment_author": "cjbj", + "comment_body": "- You need to bind those values !\r\n- Would it be nice to throw a custom error message if the check fails?\r\n\r\n```\r\n const cn = 'empno';\r\n const ow = 'scott';\r\n const tn = 'emp';\r\n const qn = `${ow}.${tn}`;\r\n const sql = `select sys.dbms_assert.simple_sql_name(:sn), sys.dbms_assert.qualified_sql_name(:qn) from dual`;\r\n const binds = [cn, qn];\r\n await connection.execute(sql, binds);\r\n```", + "pr_file_module": null + }, + { + "comment_id": "2110532871", + "repo_full_name": "langchain-ai/langchainjs", + "pr_number": 8007, + "pr_file": "libs/langchain-community/src/document_loaders/fs/oracle.ts", + "discussion_id": "2101515141", + "commented_code": "@@ -0,0 +1,197 @@\n+import { Document } from \"@langchain/core/documents\";\n+import fs from \"node:fs\";\n+import path from \"node:path\";\n+import oracledb from \"oracledb\";\n+import * as htmlparser2 from \"htmlparser2\";\n+import { BaseDocumentLoader } from \"@langchain/core/document_loaders/base\";\n+\n+function* listDir(dir: string): Generator {\n+ const files = fs.readdirSync(dir, { withFileTypes: true });\n+\n+ for (const file of files) {\n+ if (file.isDirectory()) {\n+ yield* listDir(path.join(dir, file.name));\n+ } else {\n+ yield path.join(dir, file.name);\n+ }\n+ }\n+}\n+\n+/**\n+ * Load documents from a file, a directory, or a table\n+ * If the document isn't a plain text file such as a PDF,\n+ * a plain text version will be extracted using utl_to_text\n+ * @example\n+ * ```typescript\n+ * const loader = new OracleDocLoader(conn, params);\n+ * const docs = await loader.load();\n+ * ```\n+ */\n+export class OracleDocLoader extends BaseDocumentLoader {\n+ protected conn: oracledb.Connection;\n+\n+ protected pref: Record;\n+\n+ constructor(conn: oracledb.Connection, pref: Record) {\n+ super();\n+ this.conn = conn;\n+ this.pref = pref;\n+ }\n+\n+ /**\n+ * A method that loads the text file or blob and returns a promise that\n+ * resolves to an array of `Document` instances. It reads the text from\n+ * the file or blob using the `readFile` function from the\n+ * `node:fs/promises` module or the `text()` method of the blob. It then\n+ * parses the text using the `parse()` method and creates a `Document`\n+ * instance for each parsed page. The metadata includes the source of the\n+ * text (file path or blob) and, if there are multiple pages, the line\n+ * number of each page.\n+ * @returns A promise that resolves to an array of `Document` instances.\n+ */\n+ async load() {\n+ const docs = [];\n+\n+ if (\"file\" in this.pref) {\n+ // don't specify an encoding to use binary\n+ const data = fs.readFileSync(this.pref.file);\n+\n+ const result = await this.conn.execute(\n+ (\n+ `select dbms_vector_chain.utl_to_text(:content, :pref) text, dbms_vector_chain.utl_to_text(:content, json('{\"plaintext\": \"false\"}')) metadata from dual`\n+ ),\n+ {\n+ content: { val: data, dir: oracledb.BIND_IN, type: oracledb.BLOB },\n+ pref: { val: this.pref, type: oracledb.DB_TYPE_JSON },\n+ },\n+ ({\n+ resultSet: true, // return a ResultSet (default is false)\n+ fetchInfo: {\n+ TEXT: { type: oracledb.STRING },\n+ METADATA: { type: oracledb.STRING },\n+ },\n+ })\n+ );\n+\n+ const rs = result.resultSet;\n+ let row;\n+ if (rs != null) {\n+ while ((row = await rs.getRow())) {\n+ const [plain_text, metadata] = await this._extract(row);\n+ docs.push(\n+ new Document({\n+ pageContent: plain_text,\n+ metadata: >metadata,\n+ })\n+ );\n+ }\n+ await rs.close();\n+ }\n+ } else if (\"dir\" in this.pref) {\n+ for (const file of listDir(this.pref.dir)) {\n+ // don't specify an encoding to use binary\n+ const data = fs.readFileSync(file);\n+\n+ const result = await this.conn.execute(\n+ (\n+ `select dbms_vector_chain.utl_to_text(:content, :pref) text, dbms_vector_chain.utl_to_text(:content, json('{\"plaintext\": \"false\"}')) metadata from dual`\n+ ),\n+ {\n+ content: { val: data, dir: oracledb.BIND_IN, type: oracledb.BLOB },\n+ pref: { val: this.pref, type: oracledb.DB_TYPE_JSON },\n+ },\n+ ({\n+ resultSet: true, // return a ResultSet (default is false)\n+ fetchInfo: {\n+ TEXT: { type: oracledb.STRING },\n+ METADATA: { type: oracledb.STRING },\n+ },\n+ })\n+ );\n+\n+ const rs = result.resultSet;\n+ let row;\n+ if (rs != null) {\n+ while ((row = await rs.getRow())) {\n+ const [plain_text, metadata] = await this._extract(row);\n+ docs.push(\n+ new Document({\n+ pageContent: plain_text,\n+ metadata: >metadata,\n+ })\n+ );\n+ }\n+ await rs.close();\n+ }\n+ }\n+ } else if (\"tablename\" in this.pref) {\n+ if (!(\"owner\" in this.pref) || !(\"colname\" in this.pref)) {\n+ throw new Error(`Invalid preferences: missing owner or colname`);\n+ }\n+ // SQL doesn't accept backslash to escape a double quote (\\\"). If string contains \\\" change to \"\n+ if (\n+ this.pref.tablename.startsWith('\\\\\"') &&\n+ this.pref.tablename.endsWith('\\\\\"')\n+ ) {\n+ this.pref.tablename = this.pref.tablename.replaceAll(\"\\\\\", \"\");\n+ }\n+ const result = await this.conn.execute(\n+ (\n+ `select dbms_vector_chain.utl_to_text(t.${this.pref.colname}, :pref) text, dbms_vector_chain.utl_to_text(t.${this.pref.colname}, json('{\"plaintext\": \"false\"}')) metadata from ${this.pref.owner}.${this.pref.tablename} t`", + "comment_created_at": "2025-05-27T23:37:33+00:00", + "comment_author": "hackerdave", + "comment_body": "Changed to bind values and added a custom error message as suggested", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2004662752", + "pr_number": 7852, + "pr_file": "libs/langchain-google-cloud-sql-pg/src/chatMessageHistory.ts", + "created_at": "2025-03-20T02:03:37+00:00", + "commented_code": "import { BaseChatMessageHistory } from \"@langchain/core/chat_history\";\nimport { AIMessage, BaseMessage, HumanMessage, StoredMessage, mapStoredMessagesToChatMessages } from \"@langchain/core/messages\";\nimport PostgresEngine from \"./engine.js\";\n\nexport interface PostgresChatMessageHistoryInput {\n engine: PostgresEngine;\n sessionId: string;\n tableName: string;\n schemaName: string;\n}\n\nexport class PostgresChatMessageHistory extends BaseChatMessageHistory {\n lc_namespace: string[] = [\"langchain\", \"stores\", \"message\", \"google-cloud-sql-pg\"];\n\n engine: PostgresEngine;\n\n sessionId: string;\n\n tableName: string;\n\n schemaName: string;\n\n constructor({ engine, sessionId, tableName, schemaName = \"public\" }: PostgresChatMessageHistoryInput) {\n super();\n this.engine = engine;\n this.sessionId = sessionId;\n this.tableName = tableName;\n this.schemaName = schemaName;\n }\n\n /**\n * Create a new PostgresChatMessageHistory instance.\n *\n * @param {PostgresEngine} engine Postgres engine instance to use.\n * @param {string} sessionId Retrieve the table content witht this session ID.\n * @param {string} tableName Table name that stores that chat message history.\n * @param {string} schemaName Schema name for the chat message history table. Default: \"public\".\n * @returns PostgresChatMessageHistory instance.\n */\n static async create(\n engine: PostgresEngine,\n sessionId: string,\n tableName: string,\n schemaName: string = \"public\"\n ) {\n const query = `SELECT column_name, data_type FROM information_schema.columns WHERE table_name = '${tableName}' AND table_schema = '${schemaName}'`;", + "repo_full_name": "langchain-ai/langchainjs", + "discussion_comments": [ + { + "comment_id": "2004662752", + "repo_full_name": "langchain-ai/langchainjs", + "pr_number": 7852, + "pr_file": "libs/langchain-google-cloud-sql-pg/src/chatMessageHistory.ts", + "discussion_id": "2004662752", + "commented_code": "@@ -0,0 +1,143 @@\n+import { BaseChatMessageHistory } from \"@langchain/core/chat_history\";\n+import { AIMessage, BaseMessage, HumanMessage, StoredMessage, mapStoredMessagesToChatMessages } from \"@langchain/core/messages\";\n+import PostgresEngine from \"./engine.js\";\n+\n+export interface PostgresChatMessageHistoryInput {\n+ engine: PostgresEngine;\n+ sessionId: string;\n+ tableName: string;\n+ schemaName: string;\n+}\n+\n+export class PostgresChatMessageHistory extends BaseChatMessageHistory {\n+ lc_namespace: string[] = [\"langchain\", \"stores\", \"message\", \"google-cloud-sql-pg\"];\n+\n+ engine: PostgresEngine;\n+\n+ sessionId: string;\n+\n+ tableName: string;\n+\n+ schemaName: string;\n+\n+ constructor({ engine, sessionId, tableName, schemaName = \"public\" }: PostgresChatMessageHistoryInput) {\n+ super();\n+ this.engine = engine;\n+ this.sessionId = sessionId;\n+ this.tableName = tableName;\n+ this.schemaName = schemaName;\n+ }\n+\n+ /**\n+ * Create a new PostgresChatMessageHistory instance.\n+ *\n+ * @param {PostgresEngine} engine Postgres engine instance to use.\n+ * @param {string} sessionId Retrieve the table content witht this session ID.\n+ * @param {string} tableName Table name that stores that chat message history.\n+ * @param {string} schemaName Schema name for the chat message history table. Default: \"public\".\n+ * @returns PostgresChatMessageHistory instance.\n+ */\n+ static async create(\n+ engine: PostgresEngine,\n+ sessionId: string,\n+ tableName: string,\n+ schemaName: string = \"public\"\n+ ) {\n+ const query = `SELECT column_name, data_type FROM information_schema.columns WHERE table_name = '${tableName}' AND table_schema = '${schemaName}'`;", + "comment_created_at": "2025-03-20T02:03:37+00:00", + "comment_author": "jacoblee93", + "comment_body": "May be worth a small note in docs emphasizing that `tableName` and `schemaName` in these files are not escaped and to not pass in end-user input here", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1901267827", + "pr_number": 7360, + "pr_file": "libs/langchain-community/src/vectorstores/mariadb.ts", + "created_at": "2025-01-02T21:19:03+00:00", + "commented_code": "import mariadb, { type Pool, type PoolConfig } from \"mariadb\";\nimport { VectorStore } from \"@langchain/core/vectorstores\";\nimport type { EmbeddingsInterface } from \"@langchain/core/embeddings\";\nimport { Document } from \"@langchain/core/documents\";\nimport { getEnvironmentVariable } from \"@langchain/core/utils/env\";\nimport {\n FilterExpressionConverter,\n StringBuilder,\n BaseFilterExpressionConverter,\n Expression,\n Value,\n Key,\n Group,\n} from \"@langchain/core/filter\";\n\ntype Metadata = Record;\n\nexport type DistanceStrategy = \"COSINE\" | \"EUCLIDEAN\";\n\n/**\n * Converts Expression into JSON metadata filter expression format\n * for MariaDB\n */\nclass MariaDBFilterExpressionConverter\n extends BaseFilterExpressionConverter\n implements FilterExpressionConverter\n{\n private metadataFieldName: string;\n\n constructor(metadataFieldName: string) {\n super();\n this.metadataFieldName = metadataFieldName;\n }\n\n convertExpressionToContext(\n expression: Expression,\n context: StringBuilder\n ): void {\n super.convertOperandToContext(expression.left, context);\n super.convertSymbolToContext(expression, context);\n if (expression.right) {\n super.convertOperandToContext(expression.right, context);\n }\n }\n\n convertKeyToContext(key: Key, context: StringBuilder): void {\n context.append(`JSON_VALUE(${this.metadataFieldName}, '$.${key.key}')`);\n }\n\n writeValueRangeStart(_listValue: Value, context: StringBuilder): void {\n context.append(\"(\");\n }\n\n writeValueRangeEnd(_listValue: Value, context: StringBuilder): void {\n context.append(\")\");\n }\n\n writeGroupStart(_group: Group, context: StringBuilder): void {\n context.append(\"(\");\n }\n\n writeGroupEnd(_group: Group, context: StringBuilder): void {\n context.append(\")\");\n }\n}\n\n/**\n * Interface that defines the arguments required to create a\n * `MariaDBStore` instance. It includes MariaDB connection options,\n * table name and verbosity level.\n */\nexport interface MariaDBStoreArgs {\n connectionOptions?: PoolConfig;\n pool?: Pool;\n tableName?: string;\n collectionTableName?: string;\n collectionName?: string;\n collectionMetadata?: Metadata | null;\n schemaName?: string | null;\n columns?: {\n idColumnName?: string;\n vectorColumnName?: string;\n contentColumnName?: string;\n metadataColumnName?: string;\n };\n verbose?: boolean;\n /**\n * The amount of documents to chunk by when\n * adding vectors.\n * @default 500\n */\n chunkSize?: number;\n ids?: string[];\n distanceStrategy?: DistanceStrategy;\n}\n\n/**\n * MariaDB vector store integration.\n *\n * Setup:\n * Install `@langchain/community` and `mariadb`.\n *\n * If you wish to generate ids, you should also install the `uuid` package.\n *\n * ```bash\n * npm install @langchain/community mariadb uuid\n * ```\n *\n * ## [Constructor args](https://api.js.langchain.com/classes/_langchain_community.vectorstores_mariadb.MariaDB.html#constructor)\n *\n *
\n * Instantiate\n *\n * ```typescript\n * import {\n * MariaDBStore,\n * DistanceStrategy,\n * } from \"@langchain/community/vectorstores/mariadb\";\n *\n * // Or other embeddings\n * import { OpenAIEmbeddings } from \"@langchain/openai\";\n * import { PoolConfig } from \"mariadb\";\n *\n * const embeddings = new OpenAIEmbeddings({\n * model: \"text-embedding-3-small\",\n * });\n *\n * // Sample config\n * const config = {\n * connectionOptions: {\n * host: \"127.0.0.1\",\n * port: 3306,\n * user: \"myuser\",\n * password: \"ChangeMe\",\n * database: \"api\",\n * } as PoolConfig,\n * tableName: \"testlangchainjs\",\n * columns: {\n * idColumnName: \"id\",\n * vectorColumnName: \"vector\",\n * contentColumnName: \"content\",\n * metadataColumnName: \"metadata\",\n * },\n * // supported distance strategies: COSINE (default) or EUCLIDEAN\n * distanceStrategy: \"COSINE\" as DistanceStrategy,\n * };\n *\n * const vectorStore = await MariaDBStore.initialize(embeddings, config);\n * ```\n *
\n *\n *
\n *\n *
\n * Add documents\n *\n * ```typescript\n * import type { Document } from '@langchain/core/documents';\n *\n * const document1 = { pageContent: \"foo\", metadata: { baz: \"bar\" } };\n * const document2 = { pageContent: \"thud\", metadata: { bar: \"baz\" } };\n * const document3 = { pageContent: \"i will be deleted :(\", metadata: {} };\n *\n * const documents: Document[] = [document1, document2, document3];\n * const ids = [\"1\", \"2\", \"3\"];\n * await vectorStore.addDocuments(documents, { ids });\n * ```\n *
\n *\n *
\n *\n *
\n * Delete documents\n *\n * ```typescript\n * await vectorStore.delete({ ids: [\"3\"] });\n * ```\n *
\n *\n *
\n *\n *
\n * Similarity search\n *\n * ```typescript\n * const results = await vectorStore.similaritySearch(\"thud\", 1);\n * for (const doc of results) {\n * console.log(`* ${doc.pageContent} [${JSON.stringify(doc.metadata, null)}]`);\n * }\n * // Output: * thud [{\"baz\":\"bar\"}]\n * ```\n *
\n *\n *
\n *\n *\n *
\n * Similarity search with filter\n *\n * ```typescript\n * const resultsWithFilter = await vectorStore.similaritySearch(\"thud\", 1, new Expression( ExpressionType.EQ, new Key(\"country\"), new Value(\"BG\")));\n *\n * for (const doc of resultsWithFilter) {\n * console.log(`* ${doc.pageContent} [${JSON.stringify(doc.metadata, null)}]`);\n * }\n * // Output: * foo [{\"baz\":\"bar\"}]\n * ```\n *
\n *\n *
\n *\n *\n *
\n * Similarity search with score\n *\n * ```typescript\n * const resultsWithScore = await vectorStore.similaritySearchWithScore(\"qux\", 1);\n * for (const [doc, score] of resultsWithScore) {\n * console.log(`* [SIM=${score.toFixed(6)}] ${doc.pageContent} [${JSON.stringify(doc.metadata, null)}]`);\n * }\n * // Output: * [SIM=0.000000] qux [{\"bar\":\"baz\",\"baz\":\"bar\"}]\n * ```\n *
\n *\n *
\n *\n *
\n * As a retriever\n *\n * ```typescript\n * const retriever = vectorStore.asRetriever({\n * searchType: \"mmr\", // Leave blank for standard similarity search\n * k: 1,\n * });\n * const resultAsRetriever = await retriever.invoke(\"thud\");\n * console.log(resultAsRetriever);\n *\n * // Output: [Document({ metadata: { \"baz\":\"bar\" }, pageContent: \"thud\" })]\n * ```\n *
\n *\n *
\n */\nexport class MariaDBStore extends VectorStore {\n tableName: string;\n\n collectionTableName?: string;\n\n collectionName = \"langchain\";\n\n collectionId?: string;\n\n collectionMetadata: Metadata | null;\n\n schemaName: string | null;\n\n idColumnName: string;\n\n vectorColumnName: string;\n\n contentColumnName: string;\n\n metadataColumnName: string;\n\n _verbose?: boolean;\n\n pool: Pool;\n\n chunkSize = 500;\n\n distanceStrategy: DistanceStrategy;\n\n expressionConverter: MariaDBFilterExpressionConverter;\n\n constructor(embeddings: EmbeddingsInterface, config: MariaDBStoreArgs) {\n super(embeddings, config);\n this.tableName = this.escapeId(config.tableName ?? \"langchain\", false);\n if (\n config.collectionName !== undefined &&\n config.collectionTableName === undefined\n ) {\n throw new Error(\n `If supplying a \"collectionName\", you must also supply a \"collectionTableName\".`\n );\n }\n\n this.collectionTableName = config.collectionTableName\n ? this.escapeId(config.collectionTableName, false)\n : undefined;\n\n this.collectionName = config.collectionName\n ? this.escapeId(config.collectionName, false)\n : \"langchaincol\";\n\n this.collectionMetadata = config.collectionMetadata ?? null;\n this.schemaName = config.schemaName\n ? this.escapeId(config.schemaName, false)\n : null;\n\n this.vectorColumnName = this.escapeId(\n config.columns?.vectorColumnName ?? \"embedding\",\n false\n );\n this.contentColumnName = this.escapeId(\n config.columns?.contentColumnName ?? \"text\",\n false\n );\n this.idColumnName = this.escapeId(\n config.columns?.idColumnName ?? \"id\",\n false\n );\n this.metadataColumnName = this.escapeId(\n config.columns?.metadataColumnName ?? \"metadata\",\n false\n );\n this.expressionConverter = new MariaDBFilterExpressionConverter(\n this.metadataColumnName\n );\n\n if (!config.connectionOptions && !config.pool) {\n throw new Error(\n \"You must provide either a `connectionOptions` object or a `pool` instance.\"\n );\n }\n\n const langchainVerbose = getEnvironmentVariable(\"LANGCHAIN_VERBOSE\");\n\n if (langchainVerbose === \"true\") {\n this._verbose = true;\n } else if (langchainVerbose === \"false\") {\n this._verbose = false;\n } else {\n this._verbose = config.verbose;\n }\n\n if (config.pool) {\n this.pool = config.pool;\n } else {\n const poolConf = { ...config.connectionOptions, rowsAsArray: true };\n // add query to log if verbose\n if (this._verbose) poolConf.logger = { query: console.log };\n this.pool = mariadb.createPool(poolConf);\n }\n this.chunkSize = config.chunkSize ?? 500;\n\n this.distanceStrategy =\n config.distanceStrategy ?? (\"COSINE\" as DistanceStrategy);\n }\n\n get computedTableName() {\n return this.schemaName == null\n ? this.tableName\n : `${this.schemaName}.${this.tableName}`;\n }\n\n get computedCollectionTableName() {\n return this.schemaName == null\n ? `${this.collectionTableName}`\n : `\"${this.schemaName}\".\"${this.collectionTableName}\"`;\n }\n\n /**\n * Escape identifier\n *\n * @param identifier identifier value\n * @param alwaysQuote must identifier be quoted if not required\n */\n private escapeId(identifier: string, alwaysQuote: boolean): string {\n if (!identifier || identifier === \"\")\n throw new Error(\"Identifier is required\");\n\n const len = identifier.length;\n const simpleIdentifier = /^[0-9a-zA-Z$_]*$/;\n if (simpleIdentifier.test(identifier)) {\n if (len < 1 || len > 64) {\n throw new Error(\"Invalid identifier length\");\n }\n if (alwaysQuote) return `\\`${identifier}\\``;\n\n // Identifier names may begin with a numeral, but can't only contain numerals unless quoted.\n if (/^\\d+$/.test(identifier)) {\n // identifier containing only numerals must be quoted\n return `\\`${identifier}\\``;\n }\n // identifier containing only numerals must be quoted\n return identifier;\n } else {\n if (identifier.includes(\"\\u0000\")) {\n throw new Error(\"Invalid name - containing u0000 character\");\n }\n let ident = identifier;\n if (/^`.+`$/.test(identifier)) {\n ident = identifier.substring(1, identifier.length - 1);\n }\n if (len < 1 || len > 64) {\n throw new Error(\"Invalid identifier length\");\n }\n return `\\`${ident.replace(/`/g, \"``\")}\\``;\n }\n }\n\n private printable(definition: string): string {\n return definition.replaceAll(/[^0-9a-zA-Z_]/g, \"\");\n }\n\n /**\n * Static method to create a new `MariaDBStore` instance from a\n * connection. It creates a table if one does not exist, and calls\n * `connect` to return a new instance of `MariaDBStore`.\n *\n * @param embeddings - Embeddings instance.\n * @param fields - `MariaDBStoreArgs` instance\n * @param fields.dimensions Number of dimensions in your vector data type. default to 1536.\n * @returns A new instance of `MariaDBStore`.\n */\n static async initialize(\n embeddings: EmbeddingsInterface,\n config: MariaDBStoreArgs & { dimensions?: number }\n ): Promise {\n const { dimensions, ...rest } = config;\n const mariadbStore = new MariaDBStore(embeddings, rest);\n await mariadbStore.ensureTableInDatabase(dimensions);\n await mariadbStore.ensureCollectionTableInDatabase();\n await mariadbStore.loadCollectionId();\n\n return mariadbStore;\n }\n\n /**\n * Static method to create a new `MariaDBStore` instance from an\n * array of texts and their metadata. It converts the texts into\n * `Document` instances and adds them to the store.\n *\n * @param texts - Array of texts.\n * @param metadatas - Array of metadata objects or a single metadata object.\n * @param embeddings - Embeddings instance.\n * @param dbConfig - `MariaDBStoreArgs` instance.\n * @returns Promise that resolves with a new instance of `MariaDBStore`.\n */\n static async fromTexts(\n texts: string[],\n metadatas: object[] | object,\n embeddings: EmbeddingsInterface,\n dbConfig: MariaDBStoreArgs & { dimensions?: number }\n ): Promise {\n const docs = [];\n for (let i = 0; i < texts.length; i += 1) {\n const metadata = Array.isArray(metadatas) ? metadatas[i] : metadatas;\n const newDoc = new Document({\n pageContent: texts[i],\n metadata,\n });\n docs.push(newDoc);\n }\n\n return MariaDBStore.fromDocuments(docs, embeddings, dbConfig);\n }\n\n /**\n * Static method to create a new `MariaDBStore` instance from an\n * array of `Document` instances. It adds the documents to the store.\n *\n * @param docs - Array of `Document` instances.\n * @param embeddings - Embeddings instance.\n * @param dbConfig - `MariaDBStoreArgs` instance.\n * @returns Promise that resolves with a new instance of `MariaDBStore`.\n */\n static async fromDocuments(\n docs: Document[],\n embeddings: EmbeddingsInterface,\n dbConfig: MariaDBStoreArgs & { dimensions?: number }\n ): Promise {\n const instance = await MariaDBStore.initialize(embeddings, dbConfig);\n await instance.addDocuments(docs, { ids: dbConfig.ids });\n return instance;\n }\n\n _vectorstoreType(): string {\n return \"mariadb\";\n }\n\n /**\n * Method to add documents to the vector store. It converts the documents into\n * vectors, and adds them to the store.\n *\n * @param documents - Array of `Document` instances.\n * @param options - Optional arguments for adding documents\n * @returns Promise that resolves when the documents have been added.\n */\n async addDocuments(\n documents: Document[],\n options?: { ids?: string[] }\n ): Promise {\n const texts = documents.map(({ pageContent }) => pageContent);\n\n return this.addVectors(\n await this.embeddings.embedDocuments(texts),\n documents,\n options\n );\n }\n\n /**\n * Inserts a row for the collectionName provided at initialization if it does not\n * exist and set the collectionId.\n */\n private async loadCollectionId(): Promise {\n if (this.collectionId) {\n return;\n }\n\n if (this.collectionTableName) {\n const queryResult = await this.pool.query(\n {\n sql: `SELECT uuid from ${this.computedCollectionTableName} WHERE label = ?`,\n rowsAsArray: true,\n },\n [this.collectionName]\n );\n if (queryResult.length > 0) {\n this.collectionId = queryResult[0][0];\n } else {\n const insertString = `INSERT INTO ${this.computedCollectionTableName}(label, cmetadata) VALUES (?, ?) RETURNING uuid`;\n const insertResult = await this.pool.query(\n { sql: insertString, rowsAsArray: true },\n [this.collectionName, this.collectionMetadata]\n );\n this.collectionId = insertResult[0][0];\n }\n }\n }\n\n /**\n * Method to add vectors to the vector store. It converts the vectors into\n * rows and inserts them into the database.\n *\n * @param vectors - Array of vectors.\n * @param documents - Array of `Document` instances.\n * @param options - Optional arguments for adding documents\n * @returns Promise that resolves when the vectors have been added.\n */\n async addVectors(\n vectors: number[][],\n documents: Document[],\n options?: { ids?: string[] }\n ): Promise {\n const ids = options?.ids;\n\n // Either all documents have ids or none of them do to avoid confusion.\n if (ids !== undefined && ids.length !== vectors.length) {\n throw new Error(\n \"The number of ids must match the number of vectors provided.\"\n );\n }\n await this.loadCollectionId();\n\n const insertQuery = `INSERT INTO ${this.computedTableName}(${", + "repo_full_name": "langchain-ai/langchainjs", + "discussion_comments": [ + { + "comment_id": "1901267827", + "repo_full_name": "langchain-ai/langchainjs", + "pr_number": 7360, + "pr_file": "libs/langchain-community/src/vectorstores/mariadb.ts", + "discussion_id": "1901267827", + "commented_code": "@@ -0,0 +1,760 @@\n+import mariadb, { type Pool, type PoolConfig } from \"mariadb\";\n+import { VectorStore } from \"@langchain/core/vectorstores\";\n+import type { EmbeddingsInterface } from \"@langchain/core/embeddings\";\n+import { Document } from \"@langchain/core/documents\";\n+import { getEnvironmentVariable } from \"@langchain/core/utils/env\";\n+import {\n+ FilterExpressionConverter,\n+ StringBuilder,\n+ BaseFilterExpressionConverter,\n+ Expression,\n+ Value,\n+ Key,\n+ Group,\n+} from \"@langchain/core/filter\";\n+\n+type Metadata = Record;\n+\n+export type DistanceStrategy = \"COSINE\" | \"EUCLIDEAN\";\n+\n+/**\n+ * Converts Expression into JSON metadata filter expression format\n+ * for MariaDB\n+ */\n+class MariaDBFilterExpressionConverter\n+ extends BaseFilterExpressionConverter\n+ implements FilterExpressionConverter\n+{\n+ private metadataFieldName: string;\n+\n+ constructor(metadataFieldName: string) {\n+ super();\n+ this.metadataFieldName = metadataFieldName;\n+ }\n+\n+ convertExpressionToContext(\n+ expression: Expression,\n+ context: StringBuilder\n+ ): void {\n+ super.convertOperandToContext(expression.left, context);\n+ super.convertSymbolToContext(expression, context);\n+ if (expression.right) {\n+ super.convertOperandToContext(expression.right, context);\n+ }\n+ }\n+\n+ convertKeyToContext(key: Key, context: StringBuilder): void {\n+ context.append(`JSON_VALUE(${this.metadataFieldName}, '$.${key.key}')`);\n+ }\n+\n+ writeValueRangeStart(_listValue: Value, context: StringBuilder): void {\n+ context.append(\"(\");\n+ }\n+\n+ writeValueRangeEnd(_listValue: Value, context: StringBuilder): void {\n+ context.append(\")\");\n+ }\n+\n+ writeGroupStart(_group: Group, context: StringBuilder): void {\n+ context.append(\"(\");\n+ }\n+\n+ writeGroupEnd(_group: Group, context: StringBuilder): void {\n+ context.append(\")\");\n+ }\n+}\n+\n+/**\n+ * Interface that defines the arguments required to create a\n+ * `MariaDBStore` instance. It includes MariaDB connection options,\n+ * table name and verbosity level.\n+ */\n+export interface MariaDBStoreArgs {\n+ connectionOptions?: PoolConfig;\n+ pool?: Pool;\n+ tableName?: string;\n+ collectionTableName?: string;\n+ collectionName?: string;\n+ collectionMetadata?: Metadata | null;\n+ schemaName?: string | null;\n+ columns?: {\n+ idColumnName?: string;\n+ vectorColumnName?: string;\n+ contentColumnName?: string;\n+ metadataColumnName?: string;\n+ };\n+ verbose?: boolean;\n+ /**\n+ * The amount of documents to chunk by when\n+ * adding vectors.\n+ * @default 500\n+ */\n+ chunkSize?: number;\n+ ids?: string[];\n+ distanceStrategy?: DistanceStrategy;\n+}\n+\n+/**\n+ * MariaDB vector store integration.\n+ *\n+ * Setup:\n+ * Install `@langchain/community` and `mariadb`.\n+ *\n+ * If you wish to generate ids, you should also install the `uuid` package.\n+ *\n+ * ```bash\n+ * npm install @langchain/community mariadb uuid\n+ * ```\n+ *\n+ * ## [Constructor args](https://api.js.langchain.com/classes/_langchain_community.vectorstores_mariadb.MariaDB.html#constructor)\n+ *\n+ *
\n+ * Instantiate\n+ *\n+ * ```typescript\n+ * import {\n+ * MariaDBStore,\n+ * DistanceStrategy,\n+ * } from \"@langchain/community/vectorstores/mariadb\";\n+ *\n+ * // Or other embeddings\n+ * import { OpenAIEmbeddings } from \"@langchain/openai\";\n+ * import { PoolConfig } from \"mariadb\";\n+ *\n+ * const embeddings = new OpenAIEmbeddings({\n+ * model: \"text-embedding-3-small\",\n+ * });\n+ *\n+ * // Sample config\n+ * const config = {\n+ * connectionOptions: {\n+ * host: \"127.0.0.1\",\n+ * port: 3306,\n+ * user: \"myuser\",\n+ * password: \"ChangeMe\",\n+ * database: \"api\",\n+ * } as PoolConfig,\n+ * tableName: \"testlangchainjs\",\n+ * columns: {\n+ * idColumnName: \"id\",\n+ * vectorColumnName: \"vector\",\n+ * contentColumnName: \"content\",\n+ * metadataColumnName: \"metadata\",\n+ * },\n+ * // supported distance strategies: COSINE (default) or EUCLIDEAN\n+ * distanceStrategy: \"COSINE\" as DistanceStrategy,\n+ * };\n+ *\n+ * const vectorStore = await MariaDBStore.initialize(embeddings, config);\n+ * ```\n+ *
\n+ *\n+ *
\n+ *\n+ *
\n+ * Add documents\n+ *\n+ * ```typescript\n+ * import type { Document } from '@langchain/core/documents';\n+ *\n+ * const document1 = { pageContent: \"foo\", metadata: { baz: \"bar\" } };\n+ * const document2 = { pageContent: \"thud\", metadata: { bar: \"baz\" } };\n+ * const document3 = { pageContent: \"i will be deleted :(\", metadata: {} };\n+ *\n+ * const documents: Document[] = [document1, document2, document3];\n+ * const ids = [\"1\", \"2\", \"3\"];\n+ * await vectorStore.addDocuments(documents, { ids });\n+ * ```\n+ *
\n+ *\n+ *
\n+ *\n+ *
\n+ * Delete documents\n+ *\n+ * ```typescript\n+ * await vectorStore.delete({ ids: [\"3\"] });\n+ * ```\n+ *
\n+ *\n+ *
\n+ *\n+ *
\n+ * Similarity search\n+ *\n+ * ```typescript\n+ * const results = await vectorStore.similaritySearch(\"thud\", 1);\n+ * for (const doc of results) {\n+ * console.log(`* ${doc.pageContent} [${JSON.stringify(doc.metadata, null)}]`);\n+ * }\n+ * // Output: * thud [{\"baz\":\"bar\"}]\n+ * ```\n+ *
\n+ *\n+ *
\n+ *\n+ *\n+ *
\n+ * Similarity search with filter\n+ *\n+ * ```typescript\n+ * const resultsWithFilter = await vectorStore.similaritySearch(\"thud\", 1, new Expression( ExpressionType.EQ, new Key(\"country\"), new Value(\"BG\")));\n+ *\n+ * for (const doc of resultsWithFilter) {\n+ * console.log(`* ${doc.pageContent} [${JSON.stringify(doc.metadata, null)}]`);\n+ * }\n+ * // Output: * foo [{\"baz\":\"bar\"}]\n+ * ```\n+ *
\n+ *\n+ *
\n+ *\n+ *\n+ *
\n+ * Similarity search with score\n+ *\n+ * ```typescript\n+ * const resultsWithScore = await vectorStore.similaritySearchWithScore(\"qux\", 1);\n+ * for (const [doc, score] of resultsWithScore) {\n+ * console.log(`* [SIM=${score.toFixed(6)}] ${doc.pageContent} [${JSON.stringify(doc.metadata, null)}]`);\n+ * }\n+ * // Output: * [SIM=0.000000] qux [{\"bar\":\"baz\",\"baz\":\"bar\"}]\n+ * ```\n+ *
\n+ *\n+ *
\n+ *\n+ *
\n+ * As a retriever\n+ *\n+ * ```typescript\n+ * const retriever = vectorStore.asRetriever({\n+ * searchType: \"mmr\", // Leave blank for standard similarity search\n+ * k: 1,\n+ * });\n+ * const resultAsRetriever = await retriever.invoke(\"thud\");\n+ * console.log(resultAsRetriever);\n+ *\n+ * // Output: [Document({ metadata: { \"baz\":\"bar\" }, pageContent: \"thud\" })]\n+ * ```\n+ *
\n+ *\n+ *
\n+ */\n+export class MariaDBStore extends VectorStore {\n+ tableName: string;\n+\n+ collectionTableName?: string;\n+\n+ collectionName = \"langchain\";\n+\n+ collectionId?: string;\n+\n+ collectionMetadata: Metadata | null;\n+\n+ schemaName: string | null;\n+\n+ idColumnName: string;\n+\n+ vectorColumnName: string;\n+\n+ contentColumnName: string;\n+\n+ metadataColumnName: string;\n+\n+ _verbose?: boolean;\n+\n+ pool: Pool;\n+\n+ chunkSize = 500;\n+\n+ distanceStrategy: DistanceStrategy;\n+\n+ expressionConverter: MariaDBFilterExpressionConverter;\n+\n+ constructor(embeddings: EmbeddingsInterface, config: MariaDBStoreArgs) {\n+ super(embeddings, config);\n+ this.tableName = this.escapeId(config.tableName ?? \"langchain\", false);\n+ if (\n+ config.collectionName !== undefined &&\n+ config.collectionTableName === undefined\n+ ) {\n+ throw new Error(\n+ `If supplying a \"collectionName\", you must also supply a \"collectionTableName\".`\n+ );\n+ }\n+\n+ this.collectionTableName = config.collectionTableName\n+ ? this.escapeId(config.collectionTableName, false)\n+ : undefined;\n+\n+ this.collectionName = config.collectionName\n+ ? this.escapeId(config.collectionName, false)\n+ : \"langchaincol\";\n+\n+ this.collectionMetadata = config.collectionMetadata ?? null;\n+ this.schemaName = config.schemaName\n+ ? this.escapeId(config.schemaName, false)\n+ : null;\n+\n+ this.vectorColumnName = this.escapeId(\n+ config.columns?.vectorColumnName ?? \"embedding\",\n+ false\n+ );\n+ this.contentColumnName = this.escapeId(\n+ config.columns?.contentColumnName ?? \"text\",\n+ false\n+ );\n+ this.idColumnName = this.escapeId(\n+ config.columns?.idColumnName ?? \"id\",\n+ false\n+ );\n+ this.metadataColumnName = this.escapeId(\n+ config.columns?.metadataColumnName ?? \"metadata\",\n+ false\n+ );\n+ this.expressionConverter = new MariaDBFilterExpressionConverter(\n+ this.metadataColumnName\n+ );\n+\n+ if (!config.connectionOptions && !config.pool) {\n+ throw new Error(\n+ \"You must provide either a `connectionOptions` object or a `pool` instance.\"\n+ );\n+ }\n+\n+ const langchainVerbose = getEnvironmentVariable(\"LANGCHAIN_VERBOSE\");\n+\n+ if (langchainVerbose === \"true\") {\n+ this._verbose = true;\n+ } else if (langchainVerbose === \"false\") {\n+ this._verbose = false;\n+ } else {\n+ this._verbose = config.verbose;\n+ }\n+\n+ if (config.pool) {\n+ this.pool = config.pool;\n+ } else {\n+ const poolConf = { ...config.connectionOptions, rowsAsArray: true };\n+ // add query to log if verbose\n+ if (this._verbose) poolConf.logger = { query: console.log };\n+ this.pool = mariadb.createPool(poolConf);\n+ }\n+ this.chunkSize = config.chunkSize ?? 500;\n+\n+ this.distanceStrategy =\n+ config.distanceStrategy ?? (\"COSINE\" as DistanceStrategy);\n+ }\n+\n+ get computedTableName() {\n+ return this.schemaName == null\n+ ? this.tableName\n+ : `${this.schemaName}.${this.tableName}`;\n+ }\n+\n+ get computedCollectionTableName() {\n+ return this.schemaName == null\n+ ? `${this.collectionTableName}`\n+ : `\"${this.schemaName}\".\"${this.collectionTableName}\"`;\n+ }\n+\n+ /**\n+ * Escape identifier\n+ *\n+ * @param identifier identifier value\n+ * @param alwaysQuote must identifier be quoted if not required\n+ */\n+ private escapeId(identifier: string, alwaysQuote: boolean): string {\n+ if (!identifier || identifier === \"\")\n+ throw new Error(\"Identifier is required\");\n+\n+ const len = identifier.length;\n+ const simpleIdentifier = /^[0-9a-zA-Z$_]*$/;\n+ if (simpleIdentifier.test(identifier)) {\n+ if (len < 1 || len > 64) {\n+ throw new Error(\"Invalid identifier length\");\n+ }\n+ if (alwaysQuote) return `\\`${identifier}\\``;\n+\n+ // Identifier names may begin with a numeral, but can't only contain numerals unless quoted.\n+ if (/^\\d+$/.test(identifier)) {\n+ // identifier containing only numerals must be quoted\n+ return `\\`${identifier}\\``;\n+ }\n+ // identifier containing only numerals must be quoted\n+ return identifier;\n+ } else {\n+ if (identifier.includes(\"\\u0000\")) {\n+ throw new Error(\"Invalid name - containing u0000 character\");\n+ }\n+ let ident = identifier;\n+ if (/^`.+`$/.test(identifier)) {\n+ ident = identifier.substring(1, identifier.length - 1);\n+ }\n+ if (len < 1 || len > 64) {\n+ throw new Error(\"Invalid identifier length\");\n+ }\n+ return `\\`${ident.replace(/`/g, \"``\")}\\``;\n+ }\n+ }\n+\n+ private printable(definition: string): string {\n+ return definition.replaceAll(/[^0-9a-zA-Z_]/g, \"\");\n+ }\n+\n+ /**\n+ * Static method to create a new `MariaDBStore` instance from a\n+ * connection. It creates a table if one does not exist, and calls\n+ * `connect` to return a new instance of `MariaDBStore`.\n+ *\n+ * @param embeddings - Embeddings instance.\n+ * @param fields - `MariaDBStoreArgs` instance\n+ * @param fields.dimensions Number of dimensions in your vector data type. default to 1536.\n+ * @returns A new instance of `MariaDBStore`.\n+ */\n+ static async initialize(\n+ embeddings: EmbeddingsInterface,\n+ config: MariaDBStoreArgs & { dimensions?: number }\n+ ): Promise {\n+ const { dimensions, ...rest } = config;\n+ const mariadbStore = new MariaDBStore(embeddings, rest);\n+ await mariadbStore.ensureTableInDatabase(dimensions);\n+ await mariadbStore.ensureCollectionTableInDatabase();\n+ await mariadbStore.loadCollectionId();\n+\n+ return mariadbStore;\n+ }\n+\n+ /**\n+ * Static method to create a new `MariaDBStore` instance from an\n+ * array of texts and their metadata. It converts the texts into\n+ * `Document` instances and adds them to the store.\n+ *\n+ * @param texts - Array of texts.\n+ * @param metadatas - Array of metadata objects or a single metadata object.\n+ * @param embeddings - Embeddings instance.\n+ * @param dbConfig - `MariaDBStoreArgs` instance.\n+ * @returns Promise that resolves with a new instance of `MariaDBStore`.\n+ */\n+ static async fromTexts(\n+ texts: string[],\n+ metadatas: object[] | object,\n+ embeddings: EmbeddingsInterface,\n+ dbConfig: MariaDBStoreArgs & { dimensions?: number }\n+ ): Promise {\n+ const docs = [];\n+ for (let i = 0; i < texts.length; i += 1) {\n+ const metadata = Array.isArray(metadatas) ? metadatas[i] : metadatas;\n+ const newDoc = new Document({\n+ pageContent: texts[i],\n+ metadata,\n+ });\n+ docs.push(newDoc);\n+ }\n+\n+ return MariaDBStore.fromDocuments(docs, embeddings, dbConfig);\n+ }\n+\n+ /**\n+ * Static method to create a new `MariaDBStore` instance from an\n+ * array of `Document` instances. It adds the documents to the store.\n+ *\n+ * @param docs - Array of `Document` instances.\n+ * @param embeddings - Embeddings instance.\n+ * @param dbConfig - `MariaDBStoreArgs` instance.\n+ * @returns Promise that resolves with a new instance of `MariaDBStore`.\n+ */\n+ static async fromDocuments(\n+ docs: Document[],\n+ embeddings: EmbeddingsInterface,\n+ dbConfig: MariaDBStoreArgs & { dimensions?: number }\n+ ): Promise {\n+ const instance = await MariaDBStore.initialize(embeddings, dbConfig);\n+ await instance.addDocuments(docs, { ids: dbConfig.ids });\n+ return instance;\n+ }\n+\n+ _vectorstoreType(): string {\n+ return \"mariadb\";\n+ }\n+\n+ /**\n+ * Method to add documents to the vector store. It converts the documents into\n+ * vectors, and adds them to the store.\n+ *\n+ * @param documents - Array of `Document` instances.\n+ * @param options - Optional arguments for adding documents\n+ * @returns Promise that resolves when the documents have been added.\n+ */\n+ async addDocuments(\n+ documents: Document[],\n+ options?: { ids?: string[] }\n+ ): Promise {\n+ const texts = documents.map(({ pageContent }) => pageContent);\n+\n+ return this.addVectors(\n+ await this.embeddings.embedDocuments(texts),\n+ documents,\n+ options\n+ );\n+ }\n+\n+ /**\n+ * Inserts a row for the collectionName provided at initialization if it does not\n+ * exist and set the collectionId.\n+ */\n+ private async loadCollectionId(): Promise {\n+ if (this.collectionId) {\n+ return;\n+ }\n+\n+ if (this.collectionTableName) {\n+ const queryResult = await this.pool.query(\n+ {\n+ sql: `SELECT uuid from ${this.computedCollectionTableName} WHERE label = ?`,\n+ rowsAsArray: true,\n+ },\n+ [this.collectionName]\n+ );\n+ if (queryResult.length > 0) {\n+ this.collectionId = queryResult[0][0];\n+ } else {\n+ const insertString = `INSERT INTO ${this.computedCollectionTableName}(label, cmetadata) VALUES (?, ?) RETURNING uuid`;\n+ const insertResult = await this.pool.query(\n+ { sql: insertString, rowsAsArray: true },\n+ [this.collectionName, this.collectionMetadata]\n+ );\n+ this.collectionId = insertResult[0][0];\n+ }\n+ }\n+ }\n+\n+ /**\n+ * Method to add vectors to the vector store. It converts the vectors into\n+ * rows and inserts them into the database.\n+ *\n+ * @param vectors - Array of vectors.\n+ * @param documents - Array of `Document` instances.\n+ * @param options - Optional arguments for adding documents\n+ * @returns Promise that resolves when the vectors have been added.\n+ */\n+ async addVectors(\n+ vectors: number[][],\n+ documents: Document[],\n+ options?: { ids?: string[] }\n+ ): Promise {\n+ const ids = options?.ids;\n+\n+ // Either all documents have ids or none of them do to avoid confusion.\n+ if (ids !== undefined && ids.length !== vectors.length) {\n+ throw new Error(\n+ \"The number of ids must match the number of vectors provided.\"\n+ );\n+ }\n+ await this.loadCollectionId();\n+\n+ const insertQuery = `INSERT INTO ${this.computedTableName}(${", + "comment_created_at": "2025-01-02T21:19:03+00:00", + "comment_author": "jacoblee93", + "comment_body": "nit: can we escape column names too/disallow nonalphanumeric?", + "pr_file_module": null + }, + { + "comment_id": "1901592943", + "repo_full_name": "langchain-ai/langchainjs", + "pr_number": 7360, + "pr_file": "libs/langchain-community/src/vectorstores/mariadb.ts", + "discussion_id": "1901267827", + "commented_code": "@@ -0,0 +1,760 @@\n+import mariadb, { type Pool, type PoolConfig } from \"mariadb\";\n+import { VectorStore } from \"@langchain/core/vectorstores\";\n+import type { EmbeddingsInterface } from \"@langchain/core/embeddings\";\n+import { Document } from \"@langchain/core/documents\";\n+import { getEnvironmentVariable } from \"@langchain/core/utils/env\";\n+import {\n+ FilterExpressionConverter,\n+ StringBuilder,\n+ BaseFilterExpressionConverter,\n+ Expression,\n+ Value,\n+ Key,\n+ Group,\n+} from \"@langchain/core/filter\";\n+\n+type Metadata = Record;\n+\n+export type DistanceStrategy = \"COSINE\" | \"EUCLIDEAN\";\n+\n+/**\n+ * Converts Expression into JSON metadata filter expression format\n+ * for MariaDB\n+ */\n+class MariaDBFilterExpressionConverter\n+ extends BaseFilterExpressionConverter\n+ implements FilterExpressionConverter\n+{\n+ private metadataFieldName: string;\n+\n+ constructor(metadataFieldName: string) {\n+ super();\n+ this.metadataFieldName = metadataFieldName;\n+ }\n+\n+ convertExpressionToContext(\n+ expression: Expression,\n+ context: StringBuilder\n+ ): void {\n+ super.convertOperandToContext(expression.left, context);\n+ super.convertSymbolToContext(expression, context);\n+ if (expression.right) {\n+ super.convertOperandToContext(expression.right, context);\n+ }\n+ }\n+\n+ convertKeyToContext(key: Key, context: StringBuilder): void {\n+ context.append(`JSON_VALUE(${this.metadataFieldName}, '$.${key.key}')`);\n+ }\n+\n+ writeValueRangeStart(_listValue: Value, context: StringBuilder): void {\n+ context.append(\"(\");\n+ }\n+\n+ writeValueRangeEnd(_listValue: Value, context: StringBuilder): void {\n+ context.append(\")\");\n+ }\n+\n+ writeGroupStart(_group: Group, context: StringBuilder): void {\n+ context.append(\"(\");\n+ }\n+\n+ writeGroupEnd(_group: Group, context: StringBuilder): void {\n+ context.append(\")\");\n+ }\n+}\n+\n+/**\n+ * Interface that defines the arguments required to create a\n+ * `MariaDBStore` instance. It includes MariaDB connection options,\n+ * table name and verbosity level.\n+ */\n+export interface MariaDBStoreArgs {\n+ connectionOptions?: PoolConfig;\n+ pool?: Pool;\n+ tableName?: string;\n+ collectionTableName?: string;\n+ collectionName?: string;\n+ collectionMetadata?: Metadata | null;\n+ schemaName?: string | null;\n+ columns?: {\n+ idColumnName?: string;\n+ vectorColumnName?: string;\n+ contentColumnName?: string;\n+ metadataColumnName?: string;\n+ };\n+ verbose?: boolean;\n+ /**\n+ * The amount of documents to chunk by when\n+ * adding vectors.\n+ * @default 500\n+ */\n+ chunkSize?: number;\n+ ids?: string[];\n+ distanceStrategy?: DistanceStrategy;\n+}\n+\n+/**\n+ * MariaDB vector store integration.\n+ *\n+ * Setup:\n+ * Install `@langchain/community` and `mariadb`.\n+ *\n+ * If you wish to generate ids, you should also install the `uuid` package.\n+ *\n+ * ```bash\n+ * npm install @langchain/community mariadb uuid\n+ * ```\n+ *\n+ * ## [Constructor args](https://api.js.langchain.com/classes/_langchain_community.vectorstores_mariadb.MariaDB.html#constructor)\n+ *\n+ *
\n+ * Instantiate\n+ *\n+ * ```typescript\n+ * import {\n+ * MariaDBStore,\n+ * DistanceStrategy,\n+ * } from \"@langchain/community/vectorstores/mariadb\";\n+ *\n+ * // Or other embeddings\n+ * import { OpenAIEmbeddings } from \"@langchain/openai\";\n+ * import { PoolConfig } from \"mariadb\";\n+ *\n+ * const embeddings = new OpenAIEmbeddings({\n+ * model: \"text-embedding-3-small\",\n+ * });\n+ *\n+ * // Sample config\n+ * const config = {\n+ * connectionOptions: {\n+ * host: \"127.0.0.1\",\n+ * port: 3306,\n+ * user: \"myuser\",\n+ * password: \"ChangeMe\",\n+ * database: \"api\",\n+ * } as PoolConfig,\n+ * tableName: \"testlangchainjs\",\n+ * columns: {\n+ * idColumnName: \"id\",\n+ * vectorColumnName: \"vector\",\n+ * contentColumnName: \"content\",\n+ * metadataColumnName: \"metadata\",\n+ * },\n+ * // supported distance strategies: COSINE (default) or EUCLIDEAN\n+ * distanceStrategy: \"COSINE\" as DistanceStrategy,\n+ * };\n+ *\n+ * const vectorStore = await MariaDBStore.initialize(embeddings, config);\n+ * ```\n+ *
\n+ *\n+ *
\n+ *\n+ *
\n+ * Add documents\n+ *\n+ * ```typescript\n+ * import type { Document } from '@langchain/core/documents';\n+ *\n+ * const document1 = { pageContent: \"foo\", metadata: { baz: \"bar\" } };\n+ * const document2 = { pageContent: \"thud\", metadata: { bar: \"baz\" } };\n+ * const document3 = { pageContent: \"i will be deleted :(\", metadata: {} };\n+ *\n+ * const documents: Document[] = [document1, document2, document3];\n+ * const ids = [\"1\", \"2\", \"3\"];\n+ * await vectorStore.addDocuments(documents, { ids });\n+ * ```\n+ *
\n+ *\n+ *
\n+ *\n+ *
\n+ * Delete documents\n+ *\n+ * ```typescript\n+ * await vectorStore.delete({ ids: [\"3\"] });\n+ * ```\n+ *
\n+ *\n+ *
\n+ *\n+ *
\n+ * Similarity search\n+ *\n+ * ```typescript\n+ * const results = await vectorStore.similaritySearch(\"thud\", 1);\n+ * for (const doc of results) {\n+ * console.log(`* ${doc.pageContent} [${JSON.stringify(doc.metadata, null)}]`);\n+ * }\n+ * // Output: * thud [{\"baz\":\"bar\"}]\n+ * ```\n+ *
\n+ *\n+ *
\n+ *\n+ *\n+ *
\n+ * Similarity search with filter\n+ *\n+ * ```typescript\n+ * const resultsWithFilter = await vectorStore.similaritySearch(\"thud\", 1, new Expression( ExpressionType.EQ, new Key(\"country\"), new Value(\"BG\")));\n+ *\n+ * for (const doc of resultsWithFilter) {\n+ * console.log(`* ${doc.pageContent} [${JSON.stringify(doc.metadata, null)}]`);\n+ * }\n+ * // Output: * foo [{\"baz\":\"bar\"}]\n+ * ```\n+ *
\n+ *\n+ *
\n+ *\n+ *\n+ *
\n+ * Similarity search with score\n+ *\n+ * ```typescript\n+ * const resultsWithScore = await vectorStore.similaritySearchWithScore(\"qux\", 1);\n+ * for (const [doc, score] of resultsWithScore) {\n+ * console.log(`* [SIM=${score.toFixed(6)}] ${doc.pageContent} [${JSON.stringify(doc.metadata, null)}]`);\n+ * }\n+ * // Output: * [SIM=0.000000] qux [{\"bar\":\"baz\",\"baz\":\"bar\"}]\n+ * ```\n+ *
\n+ *\n+ *
\n+ *\n+ *
\n+ * As a retriever\n+ *\n+ * ```typescript\n+ * const retriever = vectorStore.asRetriever({\n+ * searchType: \"mmr\", // Leave blank for standard similarity search\n+ * k: 1,\n+ * });\n+ * const resultAsRetriever = await retriever.invoke(\"thud\");\n+ * console.log(resultAsRetriever);\n+ *\n+ * // Output: [Document({ metadata: { \"baz\":\"bar\" }, pageContent: \"thud\" })]\n+ * ```\n+ *
\n+ *\n+ *
\n+ */\n+export class MariaDBStore extends VectorStore {\n+ tableName: string;\n+\n+ collectionTableName?: string;\n+\n+ collectionName = \"langchain\";\n+\n+ collectionId?: string;\n+\n+ collectionMetadata: Metadata | null;\n+\n+ schemaName: string | null;\n+\n+ idColumnName: string;\n+\n+ vectorColumnName: string;\n+\n+ contentColumnName: string;\n+\n+ metadataColumnName: string;\n+\n+ _verbose?: boolean;\n+\n+ pool: Pool;\n+\n+ chunkSize = 500;\n+\n+ distanceStrategy: DistanceStrategy;\n+\n+ expressionConverter: MariaDBFilterExpressionConverter;\n+\n+ constructor(embeddings: EmbeddingsInterface, config: MariaDBStoreArgs) {\n+ super(embeddings, config);\n+ this.tableName = this.escapeId(config.tableName ?? \"langchain\", false);\n+ if (\n+ config.collectionName !== undefined &&\n+ config.collectionTableName === undefined\n+ ) {\n+ throw new Error(\n+ `If supplying a \"collectionName\", you must also supply a \"collectionTableName\".`\n+ );\n+ }\n+\n+ this.collectionTableName = config.collectionTableName\n+ ? this.escapeId(config.collectionTableName, false)\n+ : undefined;\n+\n+ this.collectionName = config.collectionName\n+ ? this.escapeId(config.collectionName, false)\n+ : \"langchaincol\";\n+\n+ this.collectionMetadata = config.collectionMetadata ?? null;\n+ this.schemaName = config.schemaName\n+ ? this.escapeId(config.schemaName, false)\n+ : null;\n+\n+ this.vectorColumnName = this.escapeId(\n+ config.columns?.vectorColumnName ?? \"embedding\",\n+ false\n+ );\n+ this.contentColumnName = this.escapeId(\n+ config.columns?.contentColumnName ?? \"text\",\n+ false\n+ );\n+ this.idColumnName = this.escapeId(\n+ config.columns?.idColumnName ?? \"id\",\n+ false\n+ );\n+ this.metadataColumnName = this.escapeId(\n+ config.columns?.metadataColumnName ?? \"metadata\",\n+ false\n+ );\n+ this.expressionConverter = new MariaDBFilterExpressionConverter(\n+ this.metadataColumnName\n+ );\n+\n+ if (!config.connectionOptions && !config.pool) {\n+ throw new Error(\n+ \"You must provide either a `connectionOptions` object or a `pool` instance.\"\n+ );\n+ }\n+\n+ const langchainVerbose = getEnvironmentVariable(\"LANGCHAIN_VERBOSE\");\n+\n+ if (langchainVerbose === \"true\") {\n+ this._verbose = true;\n+ } else if (langchainVerbose === \"false\") {\n+ this._verbose = false;\n+ } else {\n+ this._verbose = config.verbose;\n+ }\n+\n+ if (config.pool) {\n+ this.pool = config.pool;\n+ } else {\n+ const poolConf = { ...config.connectionOptions, rowsAsArray: true };\n+ // add query to log if verbose\n+ if (this._verbose) poolConf.logger = { query: console.log };\n+ this.pool = mariadb.createPool(poolConf);\n+ }\n+ this.chunkSize = config.chunkSize ?? 500;\n+\n+ this.distanceStrategy =\n+ config.distanceStrategy ?? (\"COSINE\" as DistanceStrategy);\n+ }\n+\n+ get computedTableName() {\n+ return this.schemaName == null\n+ ? this.tableName\n+ : `${this.schemaName}.${this.tableName}`;\n+ }\n+\n+ get computedCollectionTableName() {\n+ return this.schemaName == null\n+ ? `${this.collectionTableName}`\n+ : `\"${this.schemaName}\".\"${this.collectionTableName}\"`;\n+ }\n+\n+ /**\n+ * Escape identifier\n+ *\n+ * @param identifier identifier value\n+ * @param alwaysQuote must identifier be quoted if not required\n+ */\n+ private escapeId(identifier: string, alwaysQuote: boolean): string {\n+ if (!identifier || identifier === \"\")\n+ throw new Error(\"Identifier is required\");\n+\n+ const len = identifier.length;\n+ const simpleIdentifier = /^[0-9a-zA-Z$_]*$/;\n+ if (simpleIdentifier.test(identifier)) {\n+ if (len < 1 || len > 64) {\n+ throw new Error(\"Invalid identifier length\");\n+ }\n+ if (alwaysQuote) return `\\`${identifier}\\``;\n+\n+ // Identifier names may begin with a numeral, but can't only contain numerals unless quoted.\n+ if (/^\\d+$/.test(identifier)) {\n+ // identifier containing only numerals must be quoted\n+ return `\\`${identifier}\\``;\n+ }\n+ // identifier containing only numerals must be quoted\n+ return identifier;\n+ } else {\n+ if (identifier.includes(\"\\u0000\")) {\n+ throw new Error(\"Invalid name - containing u0000 character\");\n+ }\n+ let ident = identifier;\n+ if (/^`.+`$/.test(identifier)) {\n+ ident = identifier.substring(1, identifier.length - 1);\n+ }\n+ if (len < 1 || len > 64) {\n+ throw new Error(\"Invalid identifier length\");\n+ }\n+ return `\\`${ident.replace(/`/g, \"``\")}\\``;\n+ }\n+ }\n+\n+ private printable(definition: string): string {\n+ return definition.replaceAll(/[^0-9a-zA-Z_]/g, \"\");\n+ }\n+\n+ /**\n+ * Static method to create a new `MariaDBStore` instance from a\n+ * connection. It creates a table if one does not exist, and calls\n+ * `connect` to return a new instance of `MariaDBStore`.\n+ *\n+ * @param embeddings - Embeddings instance.\n+ * @param fields - `MariaDBStoreArgs` instance\n+ * @param fields.dimensions Number of dimensions in your vector data type. default to 1536.\n+ * @returns A new instance of `MariaDBStore`.\n+ */\n+ static async initialize(\n+ embeddings: EmbeddingsInterface,\n+ config: MariaDBStoreArgs & { dimensions?: number }\n+ ): Promise {\n+ const { dimensions, ...rest } = config;\n+ const mariadbStore = new MariaDBStore(embeddings, rest);\n+ await mariadbStore.ensureTableInDatabase(dimensions);\n+ await mariadbStore.ensureCollectionTableInDatabase();\n+ await mariadbStore.loadCollectionId();\n+\n+ return mariadbStore;\n+ }\n+\n+ /**\n+ * Static method to create a new `MariaDBStore` instance from an\n+ * array of texts and their metadata. It converts the texts into\n+ * `Document` instances and adds them to the store.\n+ *\n+ * @param texts - Array of texts.\n+ * @param metadatas - Array of metadata objects or a single metadata object.\n+ * @param embeddings - Embeddings instance.\n+ * @param dbConfig - `MariaDBStoreArgs` instance.\n+ * @returns Promise that resolves with a new instance of `MariaDBStore`.\n+ */\n+ static async fromTexts(\n+ texts: string[],\n+ metadatas: object[] | object,\n+ embeddings: EmbeddingsInterface,\n+ dbConfig: MariaDBStoreArgs & { dimensions?: number }\n+ ): Promise {\n+ const docs = [];\n+ for (let i = 0; i < texts.length; i += 1) {\n+ const metadata = Array.isArray(metadatas) ? metadatas[i] : metadatas;\n+ const newDoc = new Document({\n+ pageContent: texts[i],\n+ metadata,\n+ });\n+ docs.push(newDoc);\n+ }\n+\n+ return MariaDBStore.fromDocuments(docs, embeddings, dbConfig);\n+ }\n+\n+ /**\n+ * Static method to create a new `MariaDBStore` instance from an\n+ * array of `Document` instances. It adds the documents to the store.\n+ *\n+ * @param docs - Array of `Document` instances.\n+ * @param embeddings - Embeddings instance.\n+ * @param dbConfig - `MariaDBStoreArgs` instance.\n+ * @returns Promise that resolves with a new instance of `MariaDBStore`.\n+ */\n+ static async fromDocuments(\n+ docs: Document[],\n+ embeddings: EmbeddingsInterface,\n+ dbConfig: MariaDBStoreArgs & { dimensions?: number }\n+ ): Promise {\n+ const instance = await MariaDBStore.initialize(embeddings, dbConfig);\n+ await instance.addDocuments(docs, { ids: dbConfig.ids });\n+ return instance;\n+ }\n+\n+ _vectorstoreType(): string {\n+ return \"mariadb\";\n+ }\n+\n+ /**\n+ * Method to add documents to the vector store. It converts the documents into\n+ * vectors, and adds them to the store.\n+ *\n+ * @param documents - Array of `Document` instances.\n+ * @param options - Optional arguments for adding documents\n+ * @returns Promise that resolves when the documents have been added.\n+ */\n+ async addDocuments(\n+ documents: Document[],\n+ options?: { ids?: string[] }\n+ ): Promise {\n+ const texts = documents.map(({ pageContent }) => pageContent);\n+\n+ return this.addVectors(\n+ await this.embeddings.embedDocuments(texts),\n+ documents,\n+ options\n+ );\n+ }\n+\n+ /**\n+ * Inserts a row for the collectionName provided at initialization if it does not\n+ * exist and set the collectionId.\n+ */\n+ private async loadCollectionId(): Promise {\n+ if (this.collectionId) {\n+ return;\n+ }\n+\n+ if (this.collectionTableName) {\n+ const queryResult = await this.pool.query(\n+ {\n+ sql: `SELECT uuid from ${this.computedCollectionTableName} WHERE label = ?`,\n+ rowsAsArray: true,\n+ },\n+ [this.collectionName]\n+ );\n+ if (queryResult.length > 0) {\n+ this.collectionId = queryResult[0][0];\n+ } else {\n+ const insertString = `INSERT INTO ${this.computedCollectionTableName}(label, cmetadata) VALUES (?, ?) RETURNING uuid`;\n+ const insertResult = await this.pool.query(\n+ { sql: insertString, rowsAsArray: true },\n+ [this.collectionName, this.collectionMetadata]\n+ );\n+ this.collectionId = insertResult[0][0];\n+ }\n+ }\n+ }\n+\n+ /**\n+ * Method to add vectors to the vector store. It converts the vectors into\n+ * rows and inserts them into the database.\n+ *\n+ * @param vectors - Array of vectors.\n+ * @param documents - Array of `Document` instances.\n+ * @param options - Optional arguments for adding documents\n+ * @returns Promise that resolves when the vectors have been added.\n+ */\n+ async addVectors(\n+ vectors: number[][],\n+ documents: Document[],\n+ options?: { ids?: string[] }\n+ ): Promise {\n+ const ids = options?.ids;\n+\n+ // Either all documents have ids or none of them do to avoid confusion.\n+ if (ids !== undefined && ids.length !== vectors.length) {\n+ throw new Error(\n+ \"The number of ids must match the number of vectors provided.\"\n+ );\n+ }\n+ await this.loadCollectionId();\n+\n+ const insertQuery = `INSERT INTO ${this.computedTableName}(${", + "comment_created_at": "2025-01-03T09:12:12+00:00", + "comment_author": "rusher", + "comment_body": "table name and columns names are all escaped to prevent any injection. see `escapeId` function", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1755519345", + "pr_number": 6736, + "pr_file": "langchain/src/storage/tests/file_system.test.ts", + "created_at": "2024-09-11T20:08:42+00:00", + "commented_code": ").toEqual([value1, value2]);\n await fs.promises.rm(secondaryRootPath, { recursive: true, force: true });\n });\n\n test(\"Should disallow attempts to traverse paths outside of a subfolder\", async () => {\n const encoder = new TextEncoder();\n const store = await LocalFileStore.fromPath(secondaryRootPath);\n const value1 = new Date().toISOString();\n await expect(\n store.mset([[\"../foo\", encoder.encode(value1)]])\n ).rejects.toThrowError();\n await expect(\n store.mset([[\"/foo\", encoder.encode(value1)]])\n ).rejects.toThrowError();\n await expect(store.mget([\"../foo\"])).rejects.toThrowError();", + "repo_full_name": "langchain-ai/langchainjs", + "discussion_comments": [ + { + "comment_id": "1755519345", + "repo_full_name": "langchain-ai/langchainjs", + "pr_number": 6736, + "pr_file": "langchain/src/storage/tests/file_system.test.ts", + "discussion_id": "1755519345", + "commented_code": "@@ -88,4 +88,19 @@ describe(\"LocalFileStore\", () => {\n ).toEqual([value1, value2]);\n await fs.promises.rm(secondaryRootPath, { recursive: true, force: true });\n });\n+\n+ test(\"Should disallow attempts to traverse paths outside of a subfolder\", async () => {\n+ const encoder = new TextEncoder();\n+ const store = await LocalFileStore.fromPath(secondaryRootPath);\n+ const value1 = new Date().toISOString();\n+ await expect(\n+ store.mset([[\"../foo\", encoder.encode(value1)]])\n+ ).rejects.toThrowError();\n+ await expect(\n+ store.mset([[\"/foo\", encoder.encode(value1)]])\n+ ).rejects.toThrowError();\n+ await expect(store.mget([\"../foo\"])).rejects.toThrowError();", + "comment_created_at": "2024-09-11T20:08:42+00:00", + "comment_author": "eyurtsev", + "comment_body": "maybe add test for `\\` as well?", + "pr_file_module": null + }, + { + "comment_id": "1755531830", + "repo_full_name": "langchain-ai/langchainjs", + "pr_number": 6736, + "pr_file": "langchain/src/storage/tests/file_system.test.ts", + "discussion_id": "1755519345", + "commented_code": "@@ -88,4 +88,19 @@ describe(\"LocalFileStore\", () => {\n ).toEqual([value1, value2]);\n await fs.promises.rm(secondaryRootPath, { recursive: true, force: true });\n });\n+\n+ test(\"Should disallow attempts to traverse paths outside of a subfolder\", async () => {\n+ const encoder = new TextEncoder();\n+ const store = await LocalFileStore.fromPath(secondaryRootPath);\n+ const value1 = new Date().toISOString();\n+ await expect(\n+ store.mset([[\"../foo\", encoder.encode(value1)]])\n+ ).rejects.toThrowError();\n+ await expect(\n+ store.mset([[\"/foo\", encoder.encode(value1)]])\n+ ).rejects.toThrowError();\n+ await expect(store.mget([\"../foo\"])).rejects.toThrowError();", + "comment_created_at": "2024-09-11T20:18:07+00:00", + "comment_author": "jacoblee93", + "comment_body": "Yeah good call", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1667178276", + "pr_number": 5835, + "pr_file": "libs/langchain-google-common/src/experimental/utils/media_core.ts", + "created_at": "2024-07-05T21:49:09+00:00", + "commented_code": "import { v4 as uuidv4 } from \"uuid\";\nimport { BaseStore } from \"@langchain/core/stores\";\nimport { Serializable } from \"@langchain/core/load/serializable\";\n\nexport interface MediaBlobParameters {\n data?: Blob;\n\n metadata?: Record;\n\n path?: string;\n}\n\n/**\n * Represents a chunk of data that can be identified by the path where the\n * data is (or will be) located, along with optional metadata about the data.\n */\nexport class MediaBlob\n extends Serializable // FIXME - I'm not sure this serializes or deserializes correctly\n implements MediaBlobParameters\n{\n lc_serializable = true;\n\n lc_namespace = [\"langchain\", \"google-common\"]; // FIXME - What should this be? And why?\n\n data?: Blob;\n\n // eslint-disable-next-line @typescript-eslint/no-explicit-any\n metadata?: Record;\n\n path?: string;\n\n constructor(params?: MediaBlobParameters) {\n super(params);\n this.data = params?.data;\n this.metadata = params?.metadata;\n this.path = params?.path;\n }\n\n get size(): number {\n return this.data?.size ?? 0;\n }\n\n get dataType(): string {\n return this.data?.type ?? \"\";\n }\n\n get encoding(): string {\n const charsetEquals = this.dataType.indexOf(\"charset=\");\n return charsetEquals === -1\n ? \"utf-8\"\n : this.dataType.substring(charsetEquals + 8);\n }\n\n get mimetype(): string {\n const semicolon = this.dataType.indexOf(\";\");\n return semicolon === -1\n ? this.dataType\n : this.dataType.substring(0, semicolon);\n }\n\n async asString(): Promise {\n const data = this.data ?? new Blob([]);\n const dataBuffer = await data.arrayBuffer();\n const dataArray = new Uint8Array(dataBuffer);\n\n // Need to handle the array in smaller chunks to deal with stack size limits\n let ret = \"\";\n const chunkSize = 102400;\n for (let i = 0; i < dataArray.length; i += chunkSize) {\n const chunk = dataArray.subarray(i, i + chunkSize);\n ret += String.fromCharCode(...chunk);\n }\n\n return ret;\n }\n\n async asBase64(): Promise {\n return btoa(await this.asString());\n }\n\n async asDataUrl(): Promise {\n return `data:${this.mimetype};base64,${await this.asBase64()}`;\n }\n\n async asUri(): Promise {\n return this.path ?? (await this.asDataUrl());\n }\n\n async encode(): Promise<{ encoded: string; encoding: string }> {\n const dataUrl = await this.asDataUrl();\n const comma = dataUrl.indexOf(\",\");\n const encoded = dataUrl.substring(comma + 1);\n const encoding: string = dataUrl.indexOf(\"base64\") > -1 ? \"base64\" : \"8bit\";\n return {\n encoded,\n encoding,\n };\n }\n}\n\nexport type ActionIfInvalidAction =\n | \"ignore\"\n | \"prefixPath\"\n | \"prefixUuid\"\n | \"removePath\";\n\nexport interface BlobStoreStoreOptions {\n /**\n * If the path is missing or invalid in the blob, how should we create\n * a new path?\n * Subclasses may define their own methods, but the following are supported\n * by default:\n * - Undefined or an emtpy string: Reject the blob\n * - \"ignore\": Attempt to store it anyway (but this may fail)\n * - \"prefixPath\": Use the default prefix for the BlobStore and get the\n * unique portion from the URL. The original path is stored in the metadata\n * - \"prefixUuid\": Use the default prefix for the BlobStore and get the\n * unique portion from a generated UUID. The original path is stored\n * in the metadata\n */\n actionIfInvalid?: ActionIfInvalidAction;\n\n /**\n * The expected prefix for URIs that are stored.\n * This may be used to test if a MediaBlob is valid and used to create a new\n * path if \"prefixPath\" or \"prefixUuid\" is set for actionIfInvalid.\n */\n pathPrefix?: string;\n}\n\nexport type ActionIfBlobMissingAction = \"emptyBlob\";\n\nexport interface BlobStoreFetchOptions {\n /**\n * If the blob is not found when fetching, what should we do?\n * Subclasses may define their own methods, but the following are supported\n * by default:\n * - Undefined or an empty string: return undefined\n * - \"emptyBlob\": return a new MediaBlob that has the path set, but nothing else.\n */\n actionIfBlobMissing?: ActionIfBlobMissingAction;\n}\n\nexport interface BlobStoreOptions {\n defaultStoreOptions?: BlobStoreStoreOptions;\n\n defaultFetchOptions?: BlobStoreFetchOptions;\n}\n\n/**\n * A specialized Store that is designed to handle MediaBlobs and use the\n * key that is included in the blob to determine exactly how it is stored.\n *\n * The full details of a MediaBlob may be changed when it is stored.\n * For example, it may get additional or different Metadata. This should be\n * what is returned when the store() method is called.\n *\n * Although BlobStore extends BaseStore, not all of the methods from\n * BaseStore may be implemented (or even possible). Those that are not\n * implemented should be documented and throw an Error if called.\n */\nexport abstract class BlobStore extends BaseStore {\n lc_namespace = [\"langchain\", \"google-common\"]; // FIXME - What should this be? And why?\n\n defaultStoreOptions: BlobStoreStoreOptions;\n\n defaultFetchOptions: BlobStoreFetchOptions;\n\n constructor(opts?: BlobStoreOptions) {\n super(opts);\n this.defaultStoreOptions = opts?.defaultStoreOptions ?? {};\n this.defaultFetchOptions = opts?.defaultFetchOptions ?? {};\n }\n\n protected async _realKey(key: string | MediaBlob): Promise {\n return typeof key === \"string\" ? key : await key.asUri();\n }\n\n /**\n * Is the path set in the MediaBlob supported by this BlobStore?\n * Subclasses must implement and evaluate `blob.path` to make this\n * determination.\n *\n * Although this is async, this is expected to be a relatively fast operation\n * (ie - you shouldn't make network calls).\n *\n * The default implementation assumes that undefined blob.paths are invalid\n * and then uses the replacePathPrefix (or an empty string) as an assumed path\n * to start with.\n *\n * @param blob The blob to test\n * @param opts Any options (if needed) that may be used to determine if it is valid\n * @return If the string represented by blob.path is supported.\n */\n protected _hasValidPath(\n blob: MediaBlob,\n opts?: BlobStoreStoreOptions\n ): Promise {\n const path = blob.path ?? \"\";\n const prefix = opts?.pathPrefix ?? \"\";\n const isPrefixed =\n typeof blob.path !== \"undefined\" && path.startsWith(prefix);\n return Promise.resolve(isPrefixed);\n }\n\n protected _blobPathSuffix(blob: MediaBlob): string {\n // Get the path currently set and make sure we treat it as a string\n const blobPath = `${blob.path}`;\n\n // Advance past the first set of /\n let pathStart = blobPath.indexOf(\"/\") + 1;\n while (blobPath.charAt(pathStart) === \"/\") {\n pathStart += 1;\n }\n\n // We will use the rest as the path for a replacement\n return blobPath.substring(pathStart);\n }\n\n protected async _newBlob(\n oldBlob: MediaBlob,\n newPath?: string\n ): Promise {\n const oldPath = oldBlob.path;\n const metadata = oldBlob?.metadata ?? {};\n metadata.langchainOldPath = oldPath;\n const newBlob = new MediaBlob({\n ...oldBlob,\n metadata,\n });\n if (newPath) {\n newBlob.path = newPath;\n } else if (newBlob.path) {\n delete newBlob.path;\n }\n return newBlob;\n }\n\n protected async _validBlobPrefixPath(\n blob: MediaBlob,\n opts?: BlobStoreStoreOptions\n ): Promise {\n const prefix = opts?.pathPrefix ?? \"\";\n const suffix = this._blobPathSuffix(blob);\n const newPath = `${prefix}${suffix}`;\n return this._newBlob(blob, newPath);\n }\n\n protected async _validBlobPrefixUuid(\n blob: MediaBlob,\n opts?: BlobStoreStoreOptions\n ): Promise {\n const prefix = opts?.pathPrefix ?? \"\";\n const suffix = uuidv4(); // TODO - option to specify version?\n const newPath = `${prefix}${suffix}`;\n return this._newBlob(blob, newPath);\n }\n\n protected async _validBlobRemovePath(\n blob: MediaBlob,\n _opts?: BlobStoreStoreOptions\n ): Promise {\n return this._newBlob(blob, undefined);\n }\n\n /**\n * Based on the blob and options, return a blob that has a valid path\n * that can be saved.\n * @param blob\n * @param opts\n */\n protected async _validStoreBlob(\n blob: MediaBlob,\n opts?: BlobStoreStoreOptions\n ): Promise {\n if (await this._hasValidPath(blob, opts)) {\n return blob;\n }\n switch (opts?.actionIfInvalid) {\n case \"ignore\":\n return blob;\n case \"prefixPath\":\n return this._validBlobPrefixPath(blob, opts);\n case \"prefixUuid\":\n return this._validBlobPrefixUuid(blob, opts);\n case \"removePath\":\n return this._validBlobRemovePath(blob, opts);\n default:\n return undefined;\n }\n }\n\n async store(\n blob: MediaBlob,\n opts: BlobStoreStoreOptions = {}\n ): Promise {\n const allOpts: BlobStoreStoreOptions = {\n ...this.defaultStoreOptions,\n ...opts,\n };\n const validBlob = await this._validStoreBlob(blob, allOpts);\n if (typeof validBlob !== \"undefined\") {\n const validKey = await validBlob.asUri();\n await this.mset([[validKey, validBlob]]);\n const savedKey = await validBlob.asUri();\n return await this.fetch(savedKey);\n }\n return undefined;\n }\n\n protected async _missingFetchBlobEmpty(\n path: string,\n _opts?: BlobStoreFetchOptions\n ): Promise {\n return new MediaBlob({ path });\n }\n\n protected async _missingFetchBlob(\n path: string,\n opts?: BlobStoreFetchOptions\n ): Promise {\n switch (opts?.actionIfBlobMissing) {\n case \"emptyBlob\":\n return this._missingFetchBlobEmpty(path, opts);\n default:\n return undefined;\n }\n }\n\n async fetch(\n key: string | MediaBlob,\n opts: BlobStoreFetchOptions = {}\n ): Promise {\n const allOpts: BlobStoreFetchOptions = {\n ...this.defaultFetchOptions,\n ...opts,\n };\n const realKey = await this._realKey(key);\n const ret = await this.mget([realKey]);\n return ret?.[0] ?? (await this._missingFetchBlob(realKey, allOpts));\n }\n}\n\nexport interface BackedBlobStoreOptions extends BlobStoreOptions {\n backingStore: BaseStore;\n}\n\nexport class BackedBlobStore extends BlobStore {\n backingStore: BaseStore;\n\n constructor(opts: BackedBlobStoreOptions) {\n super(opts);\n this.backingStore = opts.backingStore;\n }\n\n mdelete(keys: string[]): Promise {\n return this.backingStore.mdelete(keys);\n }\n\n mget(keys: string[]): Promise<(MediaBlob | undefined)[]> {\n return this.backingStore.mget(keys);\n }\n\n mset(keyValuePairs: [string, MediaBlob][]): Promise {\n return this.backingStore.mset(keyValuePairs);\n }\n\n yieldKeys(prefix: string | undefined): AsyncGenerator {\n return this.backingStore.yieldKeys(prefix);\n }\n}\n\nexport class SimpleWebBlobStore extends BlobStore {", + "repo_full_name": "langchain-ai/langchainjs", + "discussion_comments": [ + { + "comment_id": "1667178276", + "repo_full_name": "langchain-ai/langchainjs", + "pr_number": 5835, + "pr_file": "libs/langchain-google-common/src/experimental/utils/media_core.ts", + "discussion_id": "1667178276", + "commented_code": "@@ -0,0 +1,504 @@\n+import { v4 as uuidv4 } from \"uuid\";\n+import { BaseStore } from \"@langchain/core/stores\";\n+import { Serializable } from \"@langchain/core/load/serializable\";\n+\n+export interface MediaBlobParameters {\n+ data?: Blob;\n+\n+ metadata?: Record;\n+\n+ path?: string;\n+}\n+\n+/**\n+ * Represents a chunk of data that can be identified by the path where the\n+ * data is (or will be) located, along with optional metadata about the data.\n+ */\n+export class MediaBlob\n+ extends Serializable // FIXME - I'm not sure this serializes or deserializes correctly\n+ implements MediaBlobParameters\n+{\n+ lc_serializable = true;\n+\n+ lc_namespace = [\"langchain\", \"google-common\"]; // FIXME - What should this be? And why?\n+\n+ data?: Blob;\n+\n+ // eslint-disable-next-line @typescript-eslint/no-explicit-any\n+ metadata?: Record;\n+\n+ path?: string;\n+\n+ constructor(params?: MediaBlobParameters) {\n+ super(params);\n+ this.data = params?.data;\n+ this.metadata = params?.metadata;\n+ this.path = params?.path;\n+ }\n+\n+ get size(): number {\n+ return this.data?.size ?? 0;\n+ }\n+\n+ get dataType(): string {\n+ return this.data?.type ?? \"\";\n+ }\n+\n+ get encoding(): string {\n+ const charsetEquals = this.dataType.indexOf(\"charset=\");\n+ return charsetEquals === -1\n+ ? \"utf-8\"\n+ : this.dataType.substring(charsetEquals + 8);\n+ }\n+\n+ get mimetype(): string {\n+ const semicolon = this.dataType.indexOf(\";\");\n+ return semicolon === -1\n+ ? this.dataType\n+ : this.dataType.substring(0, semicolon);\n+ }\n+\n+ async asString(): Promise {\n+ const data = this.data ?? new Blob([]);\n+ const dataBuffer = await data.arrayBuffer();\n+ const dataArray = new Uint8Array(dataBuffer);\n+\n+ // Need to handle the array in smaller chunks to deal with stack size limits\n+ let ret = \"\";\n+ const chunkSize = 102400;\n+ for (let i = 0; i < dataArray.length; i += chunkSize) {\n+ const chunk = dataArray.subarray(i, i + chunkSize);\n+ ret += String.fromCharCode(...chunk);\n+ }\n+\n+ return ret;\n+ }\n+\n+ async asBase64(): Promise {\n+ return btoa(await this.asString());\n+ }\n+\n+ async asDataUrl(): Promise {\n+ return `data:${this.mimetype};base64,${await this.asBase64()}`;\n+ }\n+\n+ async asUri(): Promise {\n+ return this.path ?? (await this.asDataUrl());\n+ }\n+\n+ async encode(): Promise<{ encoded: string; encoding: string }> {\n+ const dataUrl = await this.asDataUrl();\n+ const comma = dataUrl.indexOf(\",\");\n+ const encoded = dataUrl.substring(comma + 1);\n+ const encoding: string = dataUrl.indexOf(\"base64\") > -1 ? \"base64\" : \"8bit\";\n+ return {\n+ encoded,\n+ encoding,\n+ };\n+ }\n+}\n+\n+export type ActionIfInvalidAction =\n+ | \"ignore\"\n+ | \"prefixPath\"\n+ | \"prefixUuid\"\n+ | \"removePath\";\n+\n+export interface BlobStoreStoreOptions {\n+ /**\n+ * If the path is missing or invalid in the blob, how should we create\n+ * a new path?\n+ * Subclasses may define their own methods, but the following are supported\n+ * by default:\n+ * - Undefined or an emtpy string: Reject the blob\n+ * - \"ignore\": Attempt to store it anyway (but this may fail)\n+ * - \"prefixPath\": Use the default prefix for the BlobStore and get the\n+ * unique portion from the URL. The original path is stored in the metadata\n+ * - \"prefixUuid\": Use the default prefix for the BlobStore and get the\n+ * unique portion from a generated UUID. The original path is stored\n+ * in the metadata\n+ */\n+ actionIfInvalid?: ActionIfInvalidAction;\n+\n+ /**\n+ * The expected prefix for URIs that are stored.\n+ * This may be used to test if a MediaBlob is valid and used to create a new\n+ * path if \"prefixPath\" or \"prefixUuid\" is set for actionIfInvalid.\n+ */\n+ pathPrefix?: string;\n+}\n+\n+export type ActionIfBlobMissingAction = \"emptyBlob\";\n+\n+export interface BlobStoreFetchOptions {\n+ /**\n+ * If the blob is not found when fetching, what should we do?\n+ * Subclasses may define their own methods, but the following are supported\n+ * by default:\n+ * - Undefined or an empty string: return undefined\n+ * - \"emptyBlob\": return a new MediaBlob that has the path set, but nothing else.\n+ */\n+ actionIfBlobMissing?: ActionIfBlobMissingAction;\n+}\n+\n+export interface BlobStoreOptions {\n+ defaultStoreOptions?: BlobStoreStoreOptions;\n+\n+ defaultFetchOptions?: BlobStoreFetchOptions;\n+}\n+\n+/**\n+ * A specialized Store that is designed to handle MediaBlobs and use the\n+ * key that is included in the blob to determine exactly how it is stored.\n+ *\n+ * The full details of a MediaBlob may be changed when it is stored.\n+ * For example, it may get additional or different Metadata. This should be\n+ * what is returned when the store() method is called.\n+ *\n+ * Although BlobStore extends BaseStore, not all of the methods from\n+ * BaseStore may be implemented (or even possible). Those that are not\n+ * implemented should be documented and throw an Error if called.\n+ */\n+export abstract class BlobStore extends BaseStore {\n+ lc_namespace = [\"langchain\", \"google-common\"]; // FIXME - What should this be? And why?\n+\n+ defaultStoreOptions: BlobStoreStoreOptions;\n+\n+ defaultFetchOptions: BlobStoreFetchOptions;\n+\n+ constructor(opts?: BlobStoreOptions) {\n+ super(opts);\n+ this.defaultStoreOptions = opts?.defaultStoreOptions ?? {};\n+ this.defaultFetchOptions = opts?.defaultFetchOptions ?? {};\n+ }\n+\n+ protected async _realKey(key: string | MediaBlob): Promise {\n+ return typeof key === \"string\" ? key : await key.asUri();\n+ }\n+\n+ /**\n+ * Is the path set in the MediaBlob supported by this BlobStore?\n+ * Subclasses must implement and evaluate `blob.path` to make this\n+ * determination.\n+ *\n+ * Although this is async, this is expected to be a relatively fast operation\n+ * (ie - you shouldn't make network calls).\n+ *\n+ * The default implementation assumes that undefined blob.paths are invalid\n+ * and then uses the replacePathPrefix (or an empty string) as an assumed path\n+ * to start with.\n+ *\n+ * @param blob The blob to test\n+ * @param opts Any options (if needed) that may be used to determine if it is valid\n+ * @return If the string represented by blob.path is supported.\n+ */\n+ protected _hasValidPath(\n+ blob: MediaBlob,\n+ opts?: BlobStoreStoreOptions\n+ ): Promise {\n+ const path = blob.path ?? \"\";\n+ const prefix = opts?.pathPrefix ?? \"\";\n+ const isPrefixed =\n+ typeof blob.path !== \"undefined\" && path.startsWith(prefix);\n+ return Promise.resolve(isPrefixed);\n+ }\n+\n+ protected _blobPathSuffix(blob: MediaBlob): string {\n+ // Get the path currently set and make sure we treat it as a string\n+ const blobPath = `${blob.path}`;\n+\n+ // Advance past the first set of /\n+ let pathStart = blobPath.indexOf(\"/\") + 1;\n+ while (blobPath.charAt(pathStart) === \"/\") {\n+ pathStart += 1;\n+ }\n+\n+ // We will use the rest as the path for a replacement\n+ return blobPath.substring(pathStart);\n+ }\n+\n+ protected async _newBlob(\n+ oldBlob: MediaBlob,\n+ newPath?: string\n+ ): Promise {\n+ const oldPath = oldBlob.path;\n+ const metadata = oldBlob?.metadata ?? {};\n+ metadata.langchainOldPath = oldPath;\n+ const newBlob = new MediaBlob({\n+ ...oldBlob,\n+ metadata,\n+ });\n+ if (newPath) {\n+ newBlob.path = newPath;\n+ } else if (newBlob.path) {\n+ delete newBlob.path;\n+ }\n+ return newBlob;\n+ }\n+\n+ protected async _validBlobPrefixPath(\n+ blob: MediaBlob,\n+ opts?: BlobStoreStoreOptions\n+ ): Promise {\n+ const prefix = opts?.pathPrefix ?? \"\";\n+ const suffix = this._blobPathSuffix(blob);\n+ const newPath = `${prefix}${suffix}`;\n+ return this._newBlob(blob, newPath);\n+ }\n+\n+ protected async _validBlobPrefixUuid(\n+ blob: MediaBlob,\n+ opts?: BlobStoreStoreOptions\n+ ): Promise {\n+ const prefix = opts?.pathPrefix ?? \"\";\n+ const suffix = uuidv4(); // TODO - option to specify version?\n+ const newPath = `${prefix}${suffix}`;\n+ return this._newBlob(blob, newPath);\n+ }\n+\n+ protected async _validBlobRemovePath(\n+ blob: MediaBlob,\n+ _opts?: BlobStoreStoreOptions\n+ ): Promise {\n+ return this._newBlob(blob, undefined);\n+ }\n+\n+ /**\n+ * Based on the blob and options, return a blob that has a valid path\n+ * that can be saved.\n+ * @param blob\n+ * @param opts\n+ */\n+ protected async _validStoreBlob(\n+ blob: MediaBlob,\n+ opts?: BlobStoreStoreOptions\n+ ): Promise {\n+ if (await this._hasValidPath(blob, opts)) {\n+ return blob;\n+ }\n+ switch (opts?.actionIfInvalid) {\n+ case \"ignore\":\n+ return blob;\n+ case \"prefixPath\":\n+ return this._validBlobPrefixPath(blob, opts);\n+ case \"prefixUuid\":\n+ return this._validBlobPrefixUuid(blob, opts);\n+ case \"removePath\":\n+ return this._validBlobRemovePath(blob, opts);\n+ default:\n+ return undefined;\n+ }\n+ }\n+\n+ async store(\n+ blob: MediaBlob,\n+ opts: BlobStoreStoreOptions = {}\n+ ): Promise {\n+ const allOpts: BlobStoreStoreOptions = {\n+ ...this.defaultStoreOptions,\n+ ...opts,\n+ };\n+ const validBlob = await this._validStoreBlob(blob, allOpts);\n+ if (typeof validBlob !== \"undefined\") {\n+ const validKey = await validBlob.asUri();\n+ await this.mset([[validKey, validBlob]]);\n+ const savedKey = await validBlob.asUri();\n+ return await this.fetch(savedKey);\n+ }\n+ return undefined;\n+ }\n+\n+ protected async _missingFetchBlobEmpty(\n+ path: string,\n+ _opts?: BlobStoreFetchOptions\n+ ): Promise {\n+ return new MediaBlob({ path });\n+ }\n+\n+ protected async _missingFetchBlob(\n+ path: string,\n+ opts?: BlobStoreFetchOptions\n+ ): Promise {\n+ switch (opts?.actionIfBlobMissing) {\n+ case \"emptyBlob\":\n+ return this._missingFetchBlobEmpty(path, opts);\n+ default:\n+ return undefined;\n+ }\n+ }\n+\n+ async fetch(\n+ key: string | MediaBlob,\n+ opts: BlobStoreFetchOptions = {}\n+ ): Promise {\n+ const allOpts: BlobStoreFetchOptions = {\n+ ...this.defaultFetchOptions,\n+ ...opts,\n+ };\n+ const realKey = await this._realKey(key);\n+ const ret = await this.mget([realKey]);\n+ return ret?.[0] ?? (await this._missingFetchBlob(realKey, allOpts));\n+ }\n+}\n+\n+export interface BackedBlobStoreOptions extends BlobStoreOptions {\n+ backingStore: BaseStore;\n+}\n+\n+export class BackedBlobStore extends BlobStore {\n+ backingStore: BaseStore;\n+\n+ constructor(opts: BackedBlobStoreOptions) {\n+ super(opts);\n+ this.backingStore = opts.backingStore;\n+ }\n+\n+ mdelete(keys: string[]): Promise {\n+ return this.backingStore.mdelete(keys);\n+ }\n+\n+ mget(keys: string[]): Promise<(MediaBlob | undefined)[]> {\n+ return this.backingStore.mget(keys);\n+ }\n+\n+ mset(keyValuePairs: [string, MediaBlob][]): Promise {\n+ return this.backingStore.mset(keyValuePairs);\n+ }\n+\n+ yieldKeys(prefix: string | undefined): AsyncGenerator {\n+ return this.backingStore.yieldKeys(prefix);\n+ }\n+}\n+\n+export class SimpleWebBlobStore extends BlobStore {", + "comment_created_at": "2024-07-05T21:49:09+00:00", + "comment_author": "eyurtsev", + "comment_body": "If this code can be executed on a server, then it allows for an SSRF attack", + "pr_file_module": null + }, + { + "comment_id": "1669466959", + "repo_full_name": "langchain-ai/langchainjs", + "pr_number": 5835, + "pr_file": "libs/langchain-google-common/src/experimental/utils/media_core.ts", + "discussion_id": "1667178276", + "commented_code": "@@ -0,0 +1,504 @@\n+import { v4 as uuidv4 } from \"uuid\";\n+import { BaseStore } from \"@langchain/core/stores\";\n+import { Serializable } from \"@langchain/core/load/serializable\";\n+\n+export interface MediaBlobParameters {\n+ data?: Blob;\n+\n+ metadata?: Record;\n+\n+ path?: string;\n+}\n+\n+/**\n+ * Represents a chunk of data that can be identified by the path where the\n+ * data is (or will be) located, along with optional metadata about the data.\n+ */\n+export class MediaBlob\n+ extends Serializable // FIXME - I'm not sure this serializes or deserializes correctly\n+ implements MediaBlobParameters\n+{\n+ lc_serializable = true;\n+\n+ lc_namespace = [\"langchain\", \"google-common\"]; // FIXME - What should this be? And why?\n+\n+ data?: Blob;\n+\n+ // eslint-disable-next-line @typescript-eslint/no-explicit-any\n+ metadata?: Record;\n+\n+ path?: string;\n+\n+ constructor(params?: MediaBlobParameters) {\n+ super(params);\n+ this.data = params?.data;\n+ this.metadata = params?.metadata;\n+ this.path = params?.path;\n+ }\n+\n+ get size(): number {\n+ return this.data?.size ?? 0;\n+ }\n+\n+ get dataType(): string {\n+ return this.data?.type ?? \"\";\n+ }\n+\n+ get encoding(): string {\n+ const charsetEquals = this.dataType.indexOf(\"charset=\");\n+ return charsetEquals === -1\n+ ? \"utf-8\"\n+ : this.dataType.substring(charsetEquals + 8);\n+ }\n+\n+ get mimetype(): string {\n+ const semicolon = this.dataType.indexOf(\";\");\n+ return semicolon === -1\n+ ? this.dataType\n+ : this.dataType.substring(0, semicolon);\n+ }\n+\n+ async asString(): Promise {\n+ const data = this.data ?? new Blob([]);\n+ const dataBuffer = await data.arrayBuffer();\n+ const dataArray = new Uint8Array(dataBuffer);\n+\n+ // Need to handle the array in smaller chunks to deal with stack size limits\n+ let ret = \"\";\n+ const chunkSize = 102400;\n+ for (let i = 0; i < dataArray.length; i += chunkSize) {\n+ const chunk = dataArray.subarray(i, i + chunkSize);\n+ ret += String.fromCharCode(...chunk);\n+ }\n+\n+ return ret;\n+ }\n+\n+ async asBase64(): Promise {\n+ return btoa(await this.asString());\n+ }\n+\n+ async asDataUrl(): Promise {\n+ return `data:${this.mimetype};base64,${await this.asBase64()}`;\n+ }\n+\n+ async asUri(): Promise {\n+ return this.path ?? (await this.asDataUrl());\n+ }\n+\n+ async encode(): Promise<{ encoded: string; encoding: string }> {\n+ const dataUrl = await this.asDataUrl();\n+ const comma = dataUrl.indexOf(\",\");\n+ const encoded = dataUrl.substring(comma + 1);\n+ const encoding: string = dataUrl.indexOf(\"base64\") > -1 ? \"base64\" : \"8bit\";\n+ return {\n+ encoded,\n+ encoding,\n+ };\n+ }\n+}\n+\n+export type ActionIfInvalidAction =\n+ | \"ignore\"\n+ | \"prefixPath\"\n+ | \"prefixUuid\"\n+ | \"removePath\";\n+\n+export interface BlobStoreStoreOptions {\n+ /**\n+ * If the path is missing or invalid in the blob, how should we create\n+ * a new path?\n+ * Subclasses may define their own methods, but the following are supported\n+ * by default:\n+ * - Undefined or an emtpy string: Reject the blob\n+ * - \"ignore\": Attempt to store it anyway (but this may fail)\n+ * - \"prefixPath\": Use the default prefix for the BlobStore and get the\n+ * unique portion from the URL. The original path is stored in the metadata\n+ * - \"prefixUuid\": Use the default prefix for the BlobStore and get the\n+ * unique portion from a generated UUID. The original path is stored\n+ * in the metadata\n+ */\n+ actionIfInvalid?: ActionIfInvalidAction;\n+\n+ /**\n+ * The expected prefix for URIs that are stored.\n+ * This may be used to test if a MediaBlob is valid and used to create a new\n+ * path if \"prefixPath\" or \"prefixUuid\" is set for actionIfInvalid.\n+ */\n+ pathPrefix?: string;\n+}\n+\n+export type ActionIfBlobMissingAction = \"emptyBlob\";\n+\n+export interface BlobStoreFetchOptions {\n+ /**\n+ * If the blob is not found when fetching, what should we do?\n+ * Subclasses may define their own methods, but the following are supported\n+ * by default:\n+ * - Undefined or an empty string: return undefined\n+ * - \"emptyBlob\": return a new MediaBlob that has the path set, but nothing else.\n+ */\n+ actionIfBlobMissing?: ActionIfBlobMissingAction;\n+}\n+\n+export interface BlobStoreOptions {\n+ defaultStoreOptions?: BlobStoreStoreOptions;\n+\n+ defaultFetchOptions?: BlobStoreFetchOptions;\n+}\n+\n+/**\n+ * A specialized Store that is designed to handle MediaBlobs and use the\n+ * key that is included in the blob to determine exactly how it is stored.\n+ *\n+ * The full details of a MediaBlob may be changed when it is stored.\n+ * For example, it may get additional or different Metadata. This should be\n+ * what is returned when the store() method is called.\n+ *\n+ * Although BlobStore extends BaseStore, not all of the methods from\n+ * BaseStore may be implemented (or even possible). Those that are not\n+ * implemented should be documented and throw an Error if called.\n+ */\n+export abstract class BlobStore extends BaseStore {\n+ lc_namespace = [\"langchain\", \"google-common\"]; // FIXME - What should this be? And why?\n+\n+ defaultStoreOptions: BlobStoreStoreOptions;\n+\n+ defaultFetchOptions: BlobStoreFetchOptions;\n+\n+ constructor(opts?: BlobStoreOptions) {\n+ super(opts);\n+ this.defaultStoreOptions = opts?.defaultStoreOptions ?? {};\n+ this.defaultFetchOptions = opts?.defaultFetchOptions ?? {};\n+ }\n+\n+ protected async _realKey(key: string | MediaBlob): Promise {\n+ return typeof key === \"string\" ? key : await key.asUri();\n+ }\n+\n+ /**\n+ * Is the path set in the MediaBlob supported by this BlobStore?\n+ * Subclasses must implement and evaluate `blob.path` to make this\n+ * determination.\n+ *\n+ * Although this is async, this is expected to be a relatively fast operation\n+ * (ie - you shouldn't make network calls).\n+ *\n+ * The default implementation assumes that undefined blob.paths are invalid\n+ * and then uses the replacePathPrefix (or an empty string) as an assumed path\n+ * to start with.\n+ *\n+ * @param blob The blob to test\n+ * @param opts Any options (if needed) that may be used to determine if it is valid\n+ * @return If the string represented by blob.path is supported.\n+ */\n+ protected _hasValidPath(\n+ blob: MediaBlob,\n+ opts?: BlobStoreStoreOptions\n+ ): Promise {\n+ const path = blob.path ?? \"\";\n+ const prefix = opts?.pathPrefix ?? \"\";\n+ const isPrefixed =\n+ typeof blob.path !== \"undefined\" && path.startsWith(prefix);\n+ return Promise.resolve(isPrefixed);\n+ }\n+\n+ protected _blobPathSuffix(blob: MediaBlob): string {\n+ // Get the path currently set and make sure we treat it as a string\n+ const blobPath = `${blob.path}`;\n+\n+ // Advance past the first set of /\n+ let pathStart = blobPath.indexOf(\"/\") + 1;\n+ while (blobPath.charAt(pathStart) === \"/\") {\n+ pathStart += 1;\n+ }\n+\n+ // We will use the rest as the path for a replacement\n+ return blobPath.substring(pathStart);\n+ }\n+\n+ protected async _newBlob(\n+ oldBlob: MediaBlob,\n+ newPath?: string\n+ ): Promise {\n+ const oldPath = oldBlob.path;\n+ const metadata = oldBlob?.metadata ?? {};\n+ metadata.langchainOldPath = oldPath;\n+ const newBlob = new MediaBlob({\n+ ...oldBlob,\n+ metadata,\n+ });\n+ if (newPath) {\n+ newBlob.path = newPath;\n+ } else if (newBlob.path) {\n+ delete newBlob.path;\n+ }\n+ return newBlob;\n+ }\n+\n+ protected async _validBlobPrefixPath(\n+ blob: MediaBlob,\n+ opts?: BlobStoreStoreOptions\n+ ): Promise {\n+ const prefix = opts?.pathPrefix ?? \"\";\n+ const suffix = this._blobPathSuffix(blob);\n+ const newPath = `${prefix}${suffix}`;\n+ return this._newBlob(blob, newPath);\n+ }\n+\n+ protected async _validBlobPrefixUuid(\n+ blob: MediaBlob,\n+ opts?: BlobStoreStoreOptions\n+ ): Promise {\n+ const prefix = opts?.pathPrefix ?? \"\";\n+ const suffix = uuidv4(); // TODO - option to specify version?\n+ const newPath = `${prefix}${suffix}`;\n+ return this._newBlob(blob, newPath);\n+ }\n+\n+ protected async _validBlobRemovePath(\n+ blob: MediaBlob,\n+ _opts?: BlobStoreStoreOptions\n+ ): Promise {\n+ return this._newBlob(blob, undefined);\n+ }\n+\n+ /**\n+ * Based on the blob and options, return a blob that has a valid path\n+ * that can be saved.\n+ * @param blob\n+ * @param opts\n+ */\n+ protected async _validStoreBlob(\n+ blob: MediaBlob,\n+ opts?: BlobStoreStoreOptions\n+ ): Promise {\n+ if (await this._hasValidPath(blob, opts)) {\n+ return blob;\n+ }\n+ switch (opts?.actionIfInvalid) {\n+ case \"ignore\":\n+ return blob;\n+ case \"prefixPath\":\n+ return this._validBlobPrefixPath(blob, opts);\n+ case \"prefixUuid\":\n+ return this._validBlobPrefixUuid(blob, opts);\n+ case \"removePath\":\n+ return this._validBlobRemovePath(blob, opts);\n+ default:\n+ return undefined;\n+ }\n+ }\n+\n+ async store(\n+ blob: MediaBlob,\n+ opts: BlobStoreStoreOptions = {}\n+ ): Promise {\n+ const allOpts: BlobStoreStoreOptions = {\n+ ...this.defaultStoreOptions,\n+ ...opts,\n+ };\n+ const validBlob = await this._validStoreBlob(blob, allOpts);\n+ if (typeof validBlob !== \"undefined\") {\n+ const validKey = await validBlob.asUri();\n+ await this.mset([[validKey, validBlob]]);\n+ const savedKey = await validBlob.asUri();\n+ return await this.fetch(savedKey);\n+ }\n+ return undefined;\n+ }\n+\n+ protected async _missingFetchBlobEmpty(\n+ path: string,\n+ _opts?: BlobStoreFetchOptions\n+ ): Promise {\n+ return new MediaBlob({ path });\n+ }\n+\n+ protected async _missingFetchBlob(\n+ path: string,\n+ opts?: BlobStoreFetchOptions\n+ ): Promise {\n+ switch (opts?.actionIfBlobMissing) {\n+ case \"emptyBlob\":\n+ return this._missingFetchBlobEmpty(path, opts);\n+ default:\n+ return undefined;\n+ }\n+ }\n+\n+ async fetch(\n+ key: string | MediaBlob,\n+ opts: BlobStoreFetchOptions = {}\n+ ): Promise {\n+ const allOpts: BlobStoreFetchOptions = {\n+ ...this.defaultFetchOptions,\n+ ...opts,\n+ };\n+ const realKey = await this._realKey(key);\n+ const ret = await this.mget([realKey]);\n+ return ret?.[0] ?? (await this._missingFetchBlob(realKey, allOpts));\n+ }\n+}\n+\n+export interface BackedBlobStoreOptions extends BlobStoreOptions {\n+ backingStore: BaseStore;\n+}\n+\n+export class BackedBlobStore extends BlobStore {\n+ backingStore: BaseStore;\n+\n+ constructor(opts: BackedBlobStoreOptions) {\n+ super(opts);\n+ this.backingStore = opts.backingStore;\n+ }\n+\n+ mdelete(keys: string[]): Promise {\n+ return this.backingStore.mdelete(keys);\n+ }\n+\n+ mget(keys: string[]): Promise<(MediaBlob | undefined)[]> {\n+ return this.backingStore.mget(keys);\n+ }\n+\n+ mset(keyValuePairs: [string, MediaBlob][]): Promise {\n+ return this.backingStore.mset(keyValuePairs);\n+ }\n+\n+ yieldKeys(prefix: string | undefined): AsyncGenerator {\n+ return this.backingStore.yieldKeys(prefix);\n+ }\n+}\n+\n+export class SimpleWebBlobStore extends BlobStore {", + "comment_created_at": "2024-07-09T00:10:10+00:00", + "comment_author": "afirstenberg", + "comment_body": "A fair point.\r\n\r\nHowever, we have code that is already doing this in the Gemini implementations. (See https://github.com/langchain-ai/langchain-google/blob/7c678dcc59bb4e4d7a1efdbb9572d74b75e87665/libs/genai/langchain_google_genai/_image_utils.py#L137 for where it is done in Python, for example.)\r\n\r\nMaking this a separate class puts controls of the URL retrieval in the hands of the developer. the `SimpleWebBlobStore` is just what it says - simple. But it can serve as a base for developers to make subclasses (that do things such as authentication if necessary) that still work against a \"normal\" http/https URL.\r\n\r\nAnd we do need *something* that does the fetch for the `MediaManager` so that Gemini can work the same as OpenAI models (where you're expected to pass a public URL for images). Making it have the same signature as other `BlobStore`s makes sense for flexibility.", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/langchainjs-validate-untrusted-input.md b/_reviewers/langchainjs-validate-untrusted-input.md index d1a1f63..0f48ace 100644 --- a/_reviewers/langchainjs-validate-untrusted-input.md +++ b/_reviewers/langchainjs-validate-untrusted-input.md @@ -52,189 +52,3 @@ if (!isValidUrl(userProvidedUrl)) { ``` When developing API endpoints or libraries, clearly document in JSDoc comments which parameters must come from trusted sources and cannot accept direct user input. - - -[ - { - "discussion_id": "2101515141", - "pr_number": 8007, - "pr_file": "libs/langchain-community/src/document_loaders/fs/oracle.ts", - "created_at": "2025-05-22T02:44:48+00:00", - "commented_code": "import { Document } from \"@langchain/core/documents\";\nimport fs from \"node:fs\";\nimport path from \"node:path\";\nimport oracledb from \"oracledb\";\nimport * as htmlparser2 from \"htmlparser2\";\nimport { BaseDocumentLoader } from \"@langchain/core/document_loaders/base\";\n\nfunction* listDir(dir: string): Generator {\n const files = fs.readdirSync(dir, { withFileTypes: true });\n\n for (const file of files) {\n if (file.isDirectory()) {\n yield* listDir(path.join(dir, file.name));\n } else {\n yield path.join(dir, file.name);\n }\n }\n}\n\n/**\n * Load documents from a file, a directory, or a table\n * If the document isn't a plain text file such as a PDF,\n * a plain text version will be extracted using utl_to_text\n * @example\n * ```typescript\n * const loader = new OracleDocLoader(conn, params);\n * const docs = await loader.load();\n * ```\n */\nexport class OracleDocLoader extends BaseDocumentLoader {\n protected conn: oracledb.Connection;\n\n protected pref: Record;\n\n constructor(conn: oracledb.Connection, pref: Record) {\n super();\n this.conn = conn;\n this.pref = pref;\n }\n\n /**\n * A method that loads the text file or blob and returns a promise that\n * resolves to an array of `Document` instances. It reads the text from\n * the file or blob using the `readFile` function from the\n * `node:fs/promises` module or the `text()` method of the blob. It then\n * parses the text using the `parse()` method and creates a `Document`\n * instance for each parsed page. The metadata includes the source of the\n * text (file path or blob) and, if there are multiple pages, the line\n * number of each page.\n * @returns A promise that resolves to an array of `Document` instances.\n */\n async load() {\n const docs = [];\n\n if (\"file\" in this.pref) {\n // don't specify an encoding to use binary\n const data = fs.readFileSync(this.pref.file);\n\n const result = await this.conn.execute(\n (\n `select dbms_vector_chain.utl_to_text(:content, :pref) text, dbms_vector_chain.utl_to_text(:content, json('{\"plaintext\": \"false\"}')) metadata from dual`\n ),\n {\n content: { val: data, dir: oracledb.BIND_IN, type: oracledb.BLOB },\n pref: { val: this.pref, type: oracledb.DB_TYPE_JSON },\n },\n ({\n resultSet: true, // return a ResultSet (default is false)\n fetchInfo: {\n TEXT: { type: oracledb.STRING },\n METADATA: { type: oracledb.STRING },\n },\n })\n );\n\n const rs = result.resultSet;\n let row;\n if (rs != null) {\n while ((row = await rs.getRow())) {\n const [plain_text, metadata] = await this._extract(row);\n docs.push(\n new Document({\n pageContent: plain_text,\n metadata: >metadata,\n })\n );\n }\n await rs.close();\n }\n } else if (\"dir\" in this.pref) {\n for (const file of listDir(this.pref.dir)) {\n // don't specify an encoding to use binary\n const data = fs.readFileSync(file);\n\n const result = await this.conn.execute(\n (\n `select dbms_vector_chain.utl_to_text(:content, :pref) text, dbms_vector_chain.utl_to_text(:content, json('{\"plaintext\": \"false\"}')) metadata from dual`\n ),\n {\n content: { val: data, dir: oracledb.BIND_IN, type: oracledb.BLOB },\n pref: { val: this.pref, type: oracledb.DB_TYPE_JSON },\n },\n ({\n resultSet: true, // return a ResultSet (default is false)\n fetchInfo: {\n TEXT: { type: oracledb.STRING },\n METADATA: { type: oracledb.STRING },\n },\n })\n );\n\n const rs = result.resultSet;\n let row;\n if (rs != null) {\n while ((row = await rs.getRow())) {\n const [plain_text, metadata] = await this._extract(row);\n docs.push(\n new Document({\n pageContent: plain_text,\n metadata: >metadata,\n })\n );\n }\n await rs.close();\n }\n }\n } else if (\"tablename\" in this.pref) {\n if (!(\"owner\" in this.pref) || !(\"colname\" in this.pref)) {\n throw new Error(`Invalid preferences: missing owner or colname`);\n }\n // SQL doesn't accept backslash to escape a double quote (\\\"). If string contains \\\" change to \"\n if (\n this.pref.tablename.startsWith('\\\\\"') &&\n this.pref.tablename.endsWith('\\\\\"')\n ) {\n this.pref.tablename = this.pref.tablename.replaceAll(\"\\\\\", \"\");\n }\n const result = await this.conn.execute(\n (\n `select dbms_vector_chain.utl_to_text(t.${this.pref.colname}, :pref) text, dbms_vector_chain.utl_to_text(t.${this.pref.colname}, json('{\"plaintext\": \"false\"}')) metadata from ${this.pref.owner}.${this.pref.tablename} t`", - "repo_full_name": "langchain-ai/langchainjs", - "discussion_comments": [ - { - "comment_id": "2101515141", - "repo_full_name": "langchain-ai/langchainjs", - "pr_number": 8007, - "pr_file": "libs/langchain-community/src/document_loaders/fs/oracle.ts", - "discussion_id": "2101515141", - "commented_code": "@@ -0,0 +1,197 @@\n+import { Document } from \"@langchain/core/documents\";\n+import fs from \"node:fs\";\n+import path from \"node:path\";\n+import oracledb from \"oracledb\";\n+import * as htmlparser2 from \"htmlparser2\";\n+import { BaseDocumentLoader } from \"@langchain/core/document_loaders/base\";\n+\n+function* listDir(dir: string): Generator {\n+ const files = fs.readdirSync(dir, { withFileTypes: true });\n+\n+ for (const file of files) {\n+ if (file.isDirectory()) {\n+ yield* listDir(path.join(dir, file.name));\n+ } else {\n+ yield path.join(dir, file.name);\n+ }\n+ }\n+}\n+\n+/**\n+ * Load documents from a file, a directory, or a table\n+ * If the document isn't a plain text file such as a PDF,\n+ * a plain text version will be extracted using utl_to_text\n+ * @example\n+ * ```typescript\n+ * const loader = new OracleDocLoader(conn, params);\n+ * const docs = await loader.load();\n+ * ```\n+ */\n+export class OracleDocLoader extends BaseDocumentLoader {\n+ protected conn: oracledb.Connection;\n+\n+ protected pref: Record;\n+\n+ constructor(conn: oracledb.Connection, pref: Record) {\n+ super();\n+ this.conn = conn;\n+ this.pref = pref;\n+ }\n+\n+ /**\n+ * A method that loads the text file or blob and returns a promise that\n+ * resolves to an array of `Document` instances. It reads the text from\n+ * the file or blob using the `readFile` function from the\n+ * `node:fs/promises` module or the `text()` method of the blob. It then\n+ * parses the text using the `parse()` method and creates a `Document`\n+ * instance for each parsed page. The metadata includes the source of the\n+ * text (file path or blob) and, if there are multiple pages, the line\n+ * number of each page.\n+ * @returns A promise that resolves to an array of `Document` instances.\n+ */\n+ async load() {\n+ const docs = [];\n+\n+ if (\"file\" in this.pref) {\n+ // don't specify an encoding to use binary\n+ const data = fs.readFileSync(this.pref.file);\n+\n+ const result = await this.conn.execute(\n+ (\n+ `select dbms_vector_chain.utl_to_text(:content, :pref) text, dbms_vector_chain.utl_to_text(:content, json('{\"plaintext\": \"false\"}')) metadata from dual`\n+ ),\n+ {\n+ content: { val: data, dir: oracledb.BIND_IN, type: oracledb.BLOB },\n+ pref: { val: this.pref, type: oracledb.DB_TYPE_JSON },\n+ },\n+ ({\n+ resultSet: true, // return a ResultSet (default is false)\n+ fetchInfo: {\n+ TEXT: { type: oracledb.STRING },\n+ METADATA: { type: oracledb.STRING },\n+ },\n+ })\n+ );\n+\n+ const rs = result.resultSet;\n+ let row;\n+ if (rs != null) {\n+ while ((row = await rs.getRow())) {\n+ const [plain_text, metadata] = await this._extract(row);\n+ docs.push(\n+ new Document({\n+ pageContent: plain_text,\n+ metadata: >metadata,\n+ })\n+ );\n+ }\n+ await rs.close();\n+ }\n+ } else if (\"dir\" in this.pref) {\n+ for (const file of listDir(this.pref.dir)) {\n+ // don't specify an encoding to use binary\n+ const data = fs.readFileSync(file);\n+\n+ const result = await this.conn.execute(\n+ (\n+ `select dbms_vector_chain.utl_to_text(:content, :pref) text, dbms_vector_chain.utl_to_text(:content, json('{\"plaintext\": \"false\"}')) metadata from dual`\n+ ),\n+ {\n+ content: { val: data, dir: oracledb.BIND_IN, type: oracledb.BLOB },\n+ pref: { val: this.pref, type: oracledb.DB_TYPE_JSON },\n+ },\n+ ({\n+ resultSet: true, // return a ResultSet (default is false)\n+ fetchInfo: {\n+ TEXT: { type: oracledb.STRING },\n+ METADATA: { type: oracledb.STRING },\n+ },\n+ })\n+ );\n+\n+ const rs = result.resultSet;\n+ let row;\n+ if (rs != null) {\n+ while ((row = await rs.getRow())) {\n+ const [plain_text, metadata] = await this._extract(row);\n+ docs.push(\n+ new Document({\n+ pageContent: plain_text,\n+ metadata: >metadata,\n+ })\n+ );\n+ }\n+ await rs.close();\n+ }\n+ }\n+ } else if (\"tablename\" in this.pref) {\n+ if (!(\"owner\" in this.pref) || !(\"colname\" in this.pref)) {\n+ throw new Error(`Invalid preferences: missing owner or colname`);\n+ }\n+ // SQL doesn't accept backslash to escape a double quote (\\\"). If string contains \\\" change to \"\n+ if (\n+ this.pref.tablename.startsWith('\\\\\"') &&\n+ this.pref.tablename.endsWith('\\\\\"')\n+ ) {\n+ this.pref.tablename = this.pref.tablename.replaceAll(\"\\\\\", \"\");\n+ }\n+ const result = await this.conn.execute(\n+ (\n+ `select dbms_vector_chain.utl_to_text(t.${this.pref.colname}, :pref) text, dbms_vector_chain.utl_to_text(t.${this.pref.colname}, json('{\"plaintext\": \"false\"}')) metadata from ${this.pref.owner}.${this.pref.tablename} t`", - "comment_created_at": "2025-05-22T02:44:48+00:00", - "comment_author": "cjbj", - "comment_body": "Have the substituted values been filtered to avoid any SQL Injection issues?", - "pr_file_module": null - }, - { - "comment_id": "2103199788", - "repo_full_name": "langchain-ai/langchainjs", - "pr_number": 8007, - "pr_file": "libs/langchain-community/src/document_loaders/fs/oracle.ts", - "discussion_id": "2101515141", - "commented_code": "@@ -0,0 +1,197 @@\n+import { Document } from \"@langchain/core/documents\";\n+import fs from \"node:fs\";\n+import path from \"node:path\";\n+import oracledb from \"oracledb\";\n+import * as htmlparser2 from \"htmlparser2\";\n+import { BaseDocumentLoader } from \"@langchain/core/document_loaders/base\";\n+\n+function* listDir(dir: string): Generator {\n+ const files = fs.readdirSync(dir, { withFileTypes: true });\n+\n+ for (const file of files) {\n+ if (file.isDirectory()) {\n+ yield* listDir(path.join(dir, file.name));\n+ } else {\n+ yield path.join(dir, file.name);\n+ }\n+ }\n+}\n+\n+/**\n+ * Load documents from a file, a directory, or a table\n+ * If the document isn't a plain text file such as a PDF,\n+ * a plain text version will be extracted using utl_to_text\n+ * @example\n+ * ```typescript\n+ * const loader = new OracleDocLoader(conn, params);\n+ * const docs = await loader.load();\n+ * ```\n+ */\n+export class OracleDocLoader extends BaseDocumentLoader {\n+ protected conn: oracledb.Connection;\n+\n+ protected pref: Record;\n+\n+ constructor(conn: oracledb.Connection, pref: Record) {\n+ super();\n+ this.conn = conn;\n+ this.pref = pref;\n+ }\n+\n+ /**\n+ * A method that loads the text file or blob and returns a promise that\n+ * resolves to an array of `Document` instances. It reads the text from\n+ * the file or blob using the `readFile` function from the\n+ * `node:fs/promises` module or the `text()` method of the blob. It then\n+ * parses the text using the `parse()` method and creates a `Document`\n+ * instance for each parsed page. The metadata includes the source of the\n+ * text (file path or blob) and, if there are multiple pages, the line\n+ * number of each page.\n+ * @returns A promise that resolves to an array of `Document` instances.\n+ */\n+ async load() {\n+ const docs = [];\n+\n+ if (\"file\" in this.pref) {\n+ // don't specify an encoding to use binary\n+ const data = fs.readFileSync(this.pref.file);\n+\n+ const result = await this.conn.execute(\n+ (\n+ `select dbms_vector_chain.utl_to_text(:content, :pref) text, dbms_vector_chain.utl_to_text(:content, json('{\"plaintext\": \"false\"}')) metadata from dual`\n+ ),\n+ {\n+ content: { val: data, dir: oracledb.BIND_IN, type: oracledb.BLOB },\n+ pref: { val: this.pref, type: oracledb.DB_TYPE_JSON },\n+ },\n+ ({\n+ resultSet: true, // return a ResultSet (default is false)\n+ fetchInfo: {\n+ TEXT: { type: oracledb.STRING },\n+ METADATA: { type: oracledb.STRING },\n+ },\n+ })\n+ );\n+\n+ const rs = result.resultSet;\n+ let row;\n+ if (rs != null) {\n+ while ((row = await rs.getRow())) {\n+ const [plain_text, metadata] = await this._extract(row);\n+ docs.push(\n+ new Document({\n+ pageContent: plain_text,\n+ metadata: >metadata,\n+ })\n+ );\n+ }\n+ await rs.close();\n+ }\n+ } else if (\"dir\" in this.pref) {\n+ for (const file of listDir(this.pref.dir)) {\n+ // don't specify an encoding to use binary\n+ const data = fs.readFileSync(file);\n+\n+ const result = await this.conn.execute(\n+ (\n+ `select dbms_vector_chain.utl_to_text(:content, :pref) text, dbms_vector_chain.utl_to_text(:content, json('{\"plaintext\": \"false\"}')) metadata from dual`\n+ ),\n+ {\n+ content: { val: data, dir: oracledb.BIND_IN, type: oracledb.BLOB },\n+ pref: { val: this.pref, type: oracledb.DB_TYPE_JSON },\n+ },\n+ ({\n+ resultSet: true, // return a ResultSet (default is false)\n+ fetchInfo: {\n+ TEXT: { type: oracledb.STRING },\n+ METADATA: { type: oracledb.STRING },\n+ },\n+ })\n+ );\n+\n+ const rs = result.resultSet;\n+ let row;\n+ if (rs != null) {\n+ while ((row = await rs.getRow())) {\n+ const [plain_text, metadata] = await this._extract(row);\n+ docs.push(\n+ new Document({\n+ pageContent: plain_text,\n+ metadata: >metadata,\n+ })\n+ );\n+ }\n+ await rs.close();\n+ }\n+ }\n+ } else if (\"tablename\" in this.pref) {\n+ if (!(\"owner\" in this.pref) || !(\"colname\" in this.pref)) {\n+ throw new Error(`Invalid preferences: missing owner or colname`);\n+ }\n+ // SQL doesn't accept backslash to escape a double quote (\\\"). If string contains \\\" change to \"\n+ if (\n+ this.pref.tablename.startsWith('\\\\\"') &&\n+ this.pref.tablename.endsWith('\\\\\"')\n+ ) {\n+ this.pref.tablename = this.pref.tablename.replaceAll(\"\\\\\", \"\");\n+ }\n+ const result = await this.conn.execute(\n+ (\n+ `select dbms_vector_chain.utl_to_text(t.${this.pref.colname}, :pref) text, dbms_vector_chain.utl_to_text(t.${this.pref.colname}, json('{\"plaintext\": \"false\"}')) metadata from ${this.pref.owner}.${this.pref.tablename} t`", - "comment_created_at": "2025-05-22T18:46:39+00:00", - "comment_author": "hackerdave", - "comment_body": "Have added a check", - "pr_file_module": null - }, - { - "comment_id": "2103462930", - "repo_full_name": "langchain-ai/langchainjs", - "pr_number": 8007, - "pr_file": "libs/langchain-community/src/document_loaders/fs/oracle.ts", - "discussion_id": "2101515141", - "commented_code": "@@ -0,0 +1,197 @@\n+import { Document } from \"@langchain/core/documents\";\n+import fs from \"node:fs\";\n+import path from \"node:path\";\n+import oracledb from \"oracledb\";\n+import * as htmlparser2 from \"htmlparser2\";\n+import { BaseDocumentLoader } from \"@langchain/core/document_loaders/base\";\n+\n+function* listDir(dir: string): Generator {\n+ const files = fs.readdirSync(dir, { withFileTypes: true });\n+\n+ for (const file of files) {\n+ if (file.isDirectory()) {\n+ yield* listDir(path.join(dir, file.name));\n+ } else {\n+ yield path.join(dir, file.name);\n+ }\n+ }\n+}\n+\n+/**\n+ * Load documents from a file, a directory, or a table\n+ * If the document isn't a plain text file such as a PDF,\n+ * a plain text version will be extracted using utl_to_text\n+ * @example\n+ * ```typescript\n+ * const loader = new OracleDocLoader(conn, params);\n+ * const docs = await loader.load();\n+ * ```\n+ */\n+export class OracleDocLoader extends BaseDocumentLoader {\n+ protected conn: oracledb.Connection;\n+\n+ protected pref: Record;\n+\n+ constructor(conn: oracledb.Connection, pref: Record) {\n+ super();\n+ this.conn = conn;\n+ this.pref = pref;\n+ }\n+\n+ /**\n+ * A method that loads the text file or blob and returns a promise that\n+ * resolves to an array of `Document` instances. It reads the text from\n+ * the file or blob using the `readFile` function from the\n+ * `node:fs/promises` module or the `text()` method of the blob. It then\n+ * parses the text using the `parse()` method and creates a `Document`\n+ * instance for each parsed page. The metadata includes the source of the\n+ * text (file path or blob) and, if there are multiple pages, the line\n+ * number of each page.\n+ * @returns A promise that resolves to an array of `Document` instances.\n+ */\n+ async load() {\n+ const docs = [];\n+\n+ if (\"file\" in this.pref) {\n+ // don't specify an encoding to use binary\n+ const data = fs.readFileSync(this.pref.file);\n+\n+ const result = await this.conn.execute(\n+ (\n+ `select dbms_vector_chain.utl_to_text(:content, :pref) text, dbms_vector_chain.utl_to_text(:content, json('{\"plaintext\": \"false\"}')) metadata from dual`\n+ ),\n+ {\n+ content: { val: data, dir: oracledb.BIND_IN, type: oracledb.BLOB },\n+ pref: { val: this.pref, type: oracledb.DB_TYPE_JSON },\n+ },\n+ ({\n+ resultSet: true, // return a ResultSet (default is false)\n+ fetchInfo: {\n+ TEXT: { type: oracledb.STRING },\n+ METADATA: { type: oracledb.STRING },\n+ },\n+ })\n+ );\n+\n+ const rs = result.resultSet;\n+ let row;\n+ if (rs != null) {\n+ while ((row = await rs.getRow())) {\n+ const [plain_text, metadata] = await this._extract(row);\n+ docs.push(\n+ new Document({\n+ pageContent: plain_text,\n+ metadata: >metadata,\n+ })\n+ );\n+ }\n+ await rs.close();\n+ }\n+ } else if (\"dir\" in this.pref) {\n+ for (const file of listDir(this.pref.dir)) {\n+ // don't specify an encoding to use binary\n+ const data = fs.readFileSync(file);\n+\n+ const result = await this.conn.execute(\n+ (\n+ `select dbms_vector_chain.utl_to_text(:content, :pref) text, dbms_vector_chain.utl_to_text(:content, json('{\"plaintext\": \"false\"}')) metadata from dual`\n+ ),\n+ {\n+ content: { val: data, dir: oracledb.BIND_IN, type: oracledb.BLOB },\n+ pref: { val: this.pref, type: oracledb.DB_TYPE_JSON },\n+ },\n+ ({\n+ resultSet: true, // return a ResultSet (default is false)\n+ fetchInfo: {\n+ TEXT: { type: oracledb.STRING },\n+ METADATA: { type: oracledb.STRING },\n+ },\n+ })\n+ );\n+\n+ const rs = result.resultSet;\n+ let row;\n+ if (rs != null) {\n+ while ((row = await rs.getRow())) {\n+ const [plain_text, metadata] = await this._extract(row);\n+ docs.push(\n+ new Document({\n+ pageContent: plain_text,\n+ metadata: >metadata,\n+ })\n+ );\n+ }\n+ await rs.close();\n+ }\n+ }\n+ } else if (\"tablename\" in this.pref) {\n+ if (!(\"owner\" in this.pref) || !(\"colname\" in this.pref)) {\n+ throw new Error(`Invalid preferences: missing owner or colname`);\n+ }\n+ // SQL doesn't accept backslash to escape a double quote (\\\"). If string contains \\\" change to \"\n+ if (\n+ this.pref.tablename.startsWith('\\\\\"') &&\n+ this.pref.tablename.endsWith('\\\\\"')\n+ ) {\n+ this.pref.tablename = this.pref.tablename.replaceAll(\"\\\\\", \"\");\n+ }\n+ const result = await this.conn.execute(\n+ (\n+ `select dbms_vector_chain.utl_to_text(t.${this.pref.colname}, :pref) text, dbms_vector_chain.utl_to_text(t.${this.pref.colname}, json('{\"plaintext\": \"false\"}')) metadata from ${this.pref.owner}.${this.pref.tablename} t`", - "comment_created_at": "2025-05-22T22:07:14+00:00", - "comment_author": "cjbj", - "comment_body": "- You need to bind those values !\r\n- Would it be nice to throw a custom error message if the check fails?\r\n\r\n```\r\n const cn = 'empno';\r\n const ow = 'scott';\r\n const tn = 'emp';\r\n const qn = `${ow}.${tn}`;\r\n const sql = `select sys.dbms_assert.simple_sql_name(:sn), sys.dbms_assert.qualified_sql_name(:qn) from dual`;\r\n const binds = [cn, qn];\r\n await connection.execute(sql, binds);\r\n```", - "pr_file_module": null - }, - { - "comment_id": "2110532871", - "repo_full_name": "langchain-ai/langchainjs", - "pr_number": 8007, - "pr_file": "libs/langchain-community/src/document_loaders/fs/oracle.ts", - "discussion_id": "2101515141", - "commented_code": "@@ -0,0 +1,197 @@\n+import { Document } from \"@langchain/core/documents\";\n+import fs from \"node:fs\";\n+import path from \"node:path\";\n+import oracledb from \"oracledb\";\n+import * as htmlparser2 from \"htmlparser2\";\n+import { BaseDocumentLoader } from \"@langchain/core/document_loaders/base\";\n+\n+function* listDir(dir: string): Generator {\n+ const files = fs.readdirSync(dir, { withFileTypes: true });\n+\n+ for (const file of files) {\n+ if (file.isDirectory()) {\n+ yield* listDir(path.join(dir, file.name));\n+ } else {\n+ yield path.join(dir, file.name);\n+ }\n+ }\n+}\n+\n+/**\n+ * Load documents from a file, a directory, or a table\n+ * If the document isn't a plain text file such as a PDF,\n+ * a plain text version will be extracted using utl_to_text\n+ * @example\n+ * ```typescript\n+ * const loader = new OracleDocLoader(conn, params);\n+ * const docs = await loader.load();\n+ * ```\n+ */\n+export class OracleDocLoader extends BaseDocumentLoader {\n+ protected conn: oracledb.Connection;\n+\n+ protected pref: Record;\n+\n+ constructor(conn: oracledb.Connection, pref: Record) {\n+ super();\n+ this.conn = conn;\n+ this.pref = pref;\n+ }\n+\n+ /**\n+ * A method that loads the text file or blob and returns a promise that\n+ * resolves to an array of `Document` instances. It reads the text from\n+ * the file or blob using the `readFile` function from the\n+ * `node:fs/promises` module or the `text()` method of the blob. It then\n+ * parses the text using the `parse()` method and creates a `Document`\n+ * instance for each parsed page. The metadata includes the source of the\n+ * text (file path or blob) and, if there are multiple pages, the line\n+ * number of each page.\n+ * @returns A promise that resolves to an array of `Document` instances.\n+ */\n+ async load() {\n+ const docs = [];\n+\n+ if (\"file\" in this.pref) {\n+ // don't specify an encoding to use binary\n+ const data = fs.readFileSync(this.pref.file);\n+\n+ const result = await this.conn.execute(\n+ (\n+ `select dbms_vector_chain.utl_to_text(:content, :pref) text, dbms_vector_chain.utl_to_text(:content, json('{\"plaintext\": \"false\"}')) metadata from dual`\n+ ),\n+ {\n+ content: { val: data, dir: oracledb.BIND_IN, type: oracledb.BLOB },\n+ pref: { val: this.pref, type: oracledb.DB_TYPE_JSON },\n+ },\n+ ({\n+ resultSet: true, // return a ResultSet (default is false)\n+ fetchInfo: {\n+ TEXT: { type: oracledb.STRING },\n+ METADATA: { type: oracledb.STRING },\n+ },\n+ })\n+ );\n+\n+ const rs = result.resultSet;\n+ let row;\n+ if (rs != null) {\n+ while ((row = await rs.getRow())) {\n+ const [plain_text, metadata] = await this._extract(row);\n+ docs.push(\n+ new Document({\n+ pageContent: plain_text,\n+ metadata: >metadata,\n+ })\n+ );\n+ }\n+ await rs.close();\n+ }\n+ } else if (\"dir\" in this.pref) {\n+ for (const file of listDir(this.pref.dir)) {\n+ // don't specify an encoding to use binary\n+ const data = fs.readFileSync(file);\n+\n+ const result = await this.conn.execute(\n+ (\n+ `select dbms_vector_chain.utl_to_text(:content, :pref) text, dbms_vector_chain.utl_to_text(:content, json('{\"plaintext\": \"false\"}')) metadata from dual`\n+ ),\n+ {\n+ content: { val: data, dir: oracledb.BIND_IN, type: oracledb.BLOB },\n+ pref: { val: this.pref, type: oracledb.DB_TYPE_JSON },\n+ },\n+ ({\n+ resultSet: true, // return a ResultSet (default is false)\n+ fetchInfo: {\n+ TEXT: { type: oracledb.STRING },\n+ METADATA: { type: oracledb.STRING },\n+ },\n+ })\n+ );\n+\n+ const rs = result.resultSet;\n+ let row;\n+ if (rs != null) {\n+ while ((row = await rs.getRow())) {\n+ const [plain_text, metadata] = await this._extract(row);\n+ docs.push(\n+ new Document({\n+ pageContent: plain_text,\n+ metadata: >metadata,\n+ })\n+ );\n+ }\n+ await rs.close();\n+ }\n+ }\n+ } else if (\"tablename\" in this.pref) {\n+ if (!(\"owner\" in this.pref) || !(\"colname\" in this.pref)) {\n+ throw new Error(`Invalid preferences: missing owner or colname`);\n+ }\n+ // SQL doesn't accept backslash to escape a double quote (\\\"). If string contains \\\" change to \"\n+ if (\n+ this.pref.tablename.startsWith('\\\\\"') &&\n+ this.pref.tablename.endsWith('\\\\\"')\n+ ) {\n+ this.pref.tablename = this.pref.tablename.replaceAll(\"\\\\\", \"\");\n+ }\n+ const result = await this.conn.execute(\n+ (\n+ `select dbms_vector_chain.utl_to_text(t.${this.pref.colname}, :pref) text, dbms_vector_chain.utl_to_text(t.${this.pref.colname}, json('{\"plaintext\": \"false\"}')) metadata from ${this.pref.owner}.${this.pref.tablename} t`", - "comment_created_at": "2025-05-27T23:37:33+00:00", - "comment_author": "hackerdave", - "comment_body": "Changed to bind values and added a custom error message as suggested", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2004662752", - "pr_number": 7852, - "pr_file": "libs/langchain-google-cloud-sql-pg/src/chatMessageHistory.ts", - "created_at": "2025-03-20T02:03:37+00:00", - "commented_code": "import { BaseChatMessageHistory } from \"@langchain/core/chat_history\";\nimport { AIMessage, BaseMessage, HumanMessage, StoredMessage, mapStoredMessagesToChatMessages } from \"@langchain/core/messages\";\nimport PostgresEngine from \"./engine.js\";\n\nexport interface PostgresChatMessageHistoryInput {\n engine: PostgresEngine;\n sessionId: string;\n tableName: string;\n schemaName: string;\n}\n\nexport class PostgresChatMessageHistory extends BaseChatMessageHistory {\n lc_namespace: string[] = [\"langchain\", \"stores\", \"message\", \"google-cloud-sql-pg\"];\n\n engine: PostgresEngine;\n\n sessionId: string;\n\n tableName: string;\n\n schemaName: string;\n\n constructor({ engine, sessionId, tableName, schemaName = \"public\" }: PostgresChatMessageHistoryInput) {\n super();\n this.engine = engine;\n this.sessionId = sessionId;\n this.tableName = tableName;\n this.schemaName = schemaName;\n }\n\n /**\n * Create a new PostgresChatMessageHistory instance.\n *\n * @param {PostgresEngine} engine Postgres engine instance to use.\n * @param {string} sessionId Retrieve the table content witht this session ID.\n * @param {string} tableName Table name that stores that chat message history.\n * @param {string} schemaName Schema name for the chat message history table. Default: \"public\".\n * @returns PostgresChatMessageHistory instance.\n */\n static async create(\n engine: PostgresEngine,\n sessionId: string,\n tableName: string,\n schemaName: string = \"public\"\n ) {\n const query = `SELECT column_name, data_type FROM information_schema.columns WHERE table_name = '${tableName}' AND table_schema = '${schemaName}'`;", - "repo_full_name": "langchain-ai/langchainjs", - "discussion_comments": [ - { - "comment_id": "2004662752", - "repo_full_name": "langchain-ai/langchainjs", - "pr_number": 7852, - "pr_file": "libs/langchain-google-cloud-sql-pg/src/chatMessageHistory.ts", - "discussion_id": "2004662752", - "commented_code": "@@ -0,0 +1,143 @@\n+import { BaseChatMessageHistory } from \"@langchain/core/chat_history\";\n+import { AIMessage, BaseMessage, HumanMessage, StoredMessage, mapStoredMessagesToChatMessages } from \"@langchain/core/messages\";\n+import PostgresEngine from \"./engine.js\";\n+\n+export interface PostgresChatMessageHistoryInput {\n+ engine: PostgresEngine;\n+ sessionId: string;\n+ tableName: string;\n+ schemaName: string;\n+}\n+\n+export class PostgresChatMessageHistory extends BaseChatMessageHistory {\n+ lc_namespace: string[] = [\"langchain\", \"stores\", \"message\", \"google-cloud-sql-pg\"];\n+\n+ engine: PostgresEngine;\n+\n+ sessionId: string;\n+\n+ tableName: string;\n+\n+ schemaName: string;\n+\n+ constructor({ engine, sessionId, tableName, schemaName = \"public\" }: PostgresChatMessageHistoryInput) {\n+ super();\n+ this.engine = engine;\n+ this.sessionId = sessionId;\n+ this.tableName = tableName;\n+ this.schemaName = schemaName;\n+ }\n+\n+ /**\n+ * Create a new PostgresChatMessageHistory instance.\n+ *\n+ * @param {PostgresEngine} engine Postgres engine instance to use.\n+ * @param {string} sessionId Retrieve the table content witht this session ID.\n+ * @param {string} tableName Table name that stores that chat message history.\n+ * @param {string} schemaName Schema name for the chat message history table. Default: \"public\".\n+ * @returns PostgresChatMessageHistory instance.\n+ */\n+ static async create(\n+ engine: PostgresEngine,\n+ sessionId: string,\n+ tableName: string,\n+ schemaName: string = \"public\"\n+ ) {\n+ const query = `SELECT column_name, data_type FROM information_schema.columns WHERE table_name = '${tableName}' AND table_schema = '${schemaName}'`;", - "comment_created_at": "2025-03-20T02:03:37+00:00", - "comment_author": "jacoblee93", - "comment_body": "May be worth a small note in docs emphasizing that `tableName` and `schemaName` in these files are not escaped and to not pass in end-user input here", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1901267827", - "pr_number": 7360, - "pr_file": "libs/langchain-community/src/vectorstores/mariadb.ts", - "created_at": "2025-01-02T21:19:03+00:00", - "commented_code": "import mariadb, { type Pool, type PoolConfig } from \"mariadb\";\nimport { VectorStore } from \"@langchain/core/vectorstores\";\nimport type { EmbeddingsInterface } from \"@langchain/core/embeddings\";\nimport { Document } from \"@langchain/core/documents\";\nimport { getEnvironmentVariable } from \"@langchain/core/utils/env\";\nimport {\n FilterExpressionConverter,\n StringBuilder,\n BaseFilterExpressionConverter,\n Expression,\n Value,\n Key,\n Group,\n} from \"@langchain/core/filter\";\n\ntype Metadata = Record;\n\nexport type DistanceStrategy = \"COSINE\" | \"EUCLIDEAN\";\n\n/**\n * Converts Expression into JSON metadata filter expression format\n * for MariaDB\n */\nclass MariaDBFilterExpressionConverter\n extends BaseFilterExpressionConverter\n implements FilterExpressionConverter\n{\n private metadataFieldName: string;\n\n constructor(metadataFieldName: string) {\n super();\n this.metadataFieldName = metadataFieldName;\n }\n\n convertExpressionToContext(\n expression: Expression,\n context: StringBuilder\n ): void {\n super.convertOperandToContext(expression.left, context);\n super.convertSymbolToContext(expression, context);\n if (expression.right) {\n super.convertOperandToContext(expression.right, context);\n }\n }\n\n convertKeyToContext(key: Key, context: StringBuilder): void {\n context.append(`JSON_VALUE(${this.metadataFieldName}, '$.${key.key}')`);\n }\n\n writeValueRangeStart(_listValue: Value, context: StringBuilder): void {\n context.append(\"(\");\n }\n\n writeValueRangeEnd(_listValue: Value, context: StringBuilder): void {\n context.append(\")\");\n }\n\n writeGroupStart(_group: Group, context: StringBuilder): void {\n context.append(\"(\");\n }\n\n writeGroupEnd(_group: Group, context: StringBuilder): void {\n context.append(\")\");\n }\n}\n\n/**\n * Interface that defines the arguments required to create a\n * `MariaDBStore` instance. It includes MariaDB connection options,\n * table name and verbosity level.\n */\nexport interface MariaDBStoreArgs {\n connectionOptions?: PoolConfig;\n pool?: Pool;\n tableName?: string;\n collectionTableName?: string;\n collectionName?: string;\n collectionMetadata?: Metadata | null;\n schemaName?: string | null;\n columns?: {\n idColumnName?: string;\n vectorColumnName?: string;\n contentColumnName?: string;\n metadataColumnName?: string;\n };\n verbose?: boolean;\n /**\n * The amount of documents to chunk by when\n * adding vectors.\n * @default 500\n */\n chunkSize?: number;\n ids?: string[];\n distanceStrategy?: DistanceStrategy;\n}\n\n/**\n * MariaDB vector store integration.\n *\n * Setup:\n * Install `@langchain/community` and `mariadb`.\n *\n * If you wish to generate ids, you should also install the `uuid` package.\n *\n * ```bash\n * npm install @langchain/community mariadb uuid\n * ```\n *\n * ## [Constructor args](https://api.js.langchain.com/classes/_langchain_community.vectorstores_mariadb.MariaDB.html#constructor)\n *\n *
\n * Instantiate\n *\n * ```typescript\n * import {\n * MariaDBStore,\n * DistanceStrategy,\n * } from \"@langchain/community/vectorstores/mariadb\";\n *\n * // Or other embeddings\n * import { OpenAIEmbeddings } from \"@langchain/openai\";\n * import { PoolConfig } from \"mariadb\";\n *\n * const embeddings = new OpenAIEmbeddings({\n * model: \"text-embedding-3-small\",\n * });\n *\n * // Sample config\n * const config = {\n * connectionOptions: {\n * host: \"127.0.0.1\",\n * port: 3306,\n * user: \"myuser\",\n * password: \"ChangeMe\",\n * database: \"api\",\n * } as PoolConfig,\n * tableName: \"testlangchainjs\",\n * columns: {\n * idColumnName: \"id\",\n * vectorColumnName: \"vector\",\n * contentColumnName: \"content\",\n * metadataColumnName: \"metadata\",\n * },\n * // supported distance strategies: COSINE (default) or EUCLIDEAN\n * distanceStrategy: \"COSINE\" as DistanceStrategy,\n * };\n *\n * const vectorStore = await MariaDBStore.initialize(embeddings, config);\n * ```\n *
\n *\n *
\n *\n *
\n * Add documents\n *\n * ```typescript\n * import type { Document } from '@langchain/core/documents';\n *\n * const document1 = { pageContent: \"foo\", metadata: { baz: \"bar\" } };\n * const document2 = { pageContent: \"thud\", metadata: { bar: \"baz\" } };\n * const document3 = { pageContent: \"i will be deleted :(\", metadata: {} };\n *\n * const documents: Document[] = [document1, document2, document3];\n * const ids = [\"1\", \"2\", \"3\"];\n * await vectorStore.addDocuments(documents, { ids });\n * ```\n *
\n *\n *
\n *\n *
\n * Delete documents\n *\n * ```typescript\n * await vectorStore.delete({ ids: [\"3\"] });\n * ```\n *
\n *\n *
\n *\n *
\n * Similarity search\n *\n * ```typescript\n * const results = await vectorStore.similaritySearch(\"thud\", 1);\n * for (const doc of results) {\n * console.log(`* ${doc.pageContent} [${JSON.stringify(doc.metadata, null)}]`);\n * }\n * // Output: * thud [{\"baz\":\"bar\"}]\n * ```\n *
\n *\n *
\n *\n *\n *
\n * Similarity search with filter\n *\n * ```typescript\n * const resultsWithFilter = await vectorStore.similaritySearch(\"thud\", 1, new Expression( ExpressionType.EQ, new Key(\"country\"), new Value(\"BG\")));\n *\n * for (const doc of resultsWithFilter) {\n * console.log(`* ${doc.pageContent} [${JSON.stringify(doc.metadata, null)}]`);\n * }\n * // Output: * foo [{\"baz\":\"bar\"}]\n * ```\n *
\n *\n *
\n *\n *\n *
\n * Similarity search with score\n *\n * ```typescript\n * const resultsWithScore = await vectorStore.similaritySearchWithScore(\"qux\", 1);\n * for (const [doc, score] of resultsWithScore) {\n * console.log(`* [SIM=${score.toFixed(6)}] ${doc.pageContent} [${JSON.stringify(doc.metadata, null)}]`);\n * }\n * // Output: * [SIM=0.000000] qux [{\"bar\":\"baz\",\"baz\":\"bar\"}]\n * ```\n *
\n *\n *
\n *\n *
\n * As a retriever\n *\n * ```typescript\n * const retriever = vectorStore.asRetriever({\n * searchType: \"mmr\", // Leave blank for standard similarity search\n * k: 1,\n * });\n * const resultAsRetriever = await retriever.invoke(\"thud\");\n * console.log(resultAsRetriever);\n *\n * // Output: [Document({ metadata: { \"baz\":\"bar\" }, pageContent: \"thud\" })]\n * ```\n *
\n *\n *
\n */\nexport class MariaDBStore extends VectorStore {\n tableName: string;\n\n collectionTableName?: string;\n\n collectionName = \"langchain\";\n\n collectionId?: string;\n\n collectionMetadata: Metadata | null;\n\n schemaName: string | null;\n\n idColumnName: string;\n\n vectorColumnName: string;\n\n contentColumnName: string;\n\n metadataColumnName: string;\n\n _verbose?: boolean;\n\n pool: Pool;\n\n chunkSize = 500;\n\n distanceStrategy: DistanceStrategy;\n\n expressionConverter: MariaDBFilterExpressionConverter;\n\n constructor(embeddings: EmbeddingsInterface, config: MariaDBStoreArgs) {\n super(embeddings, config);\n this.tableName = this.escapeId(config.tableName ?? \"langchain\", false);\n if (\n config.collectionName !== undefined &&\n config.collectionTableName === undefined\n ) {\n throw new Error(\n `If supplying a \"collectionName\", you must also supply a \"collectionTableName\".`\n );\n }\n\n this.collectionTableName = config.collectionTableName\n ? this.escapeId(config.collectionTableName, false)\n : undefined;\n\n this.collectionName = config.collectionName\n ? this.escapeId(config.collectionName, false)\n : \"langchaincol\";\n\n this.collectionMetadata = config.collectionMetadata ?? null;\n this.schemaName = config.schemaName\n ? this.escapeId(config.schemaName, false)\n : null;\n\n this.vectorColumnName = this.escapeId(\n config.columns?.vectorColumnName ?? \"embedding\",\n false\n );\n this.contentColumnName = this.escapeId(\n config.columns?.contentColumnName ?? \"text\",\n false\n );\n this.idColumnName = this.escapeId(\n config.columns?.idColumnName ?? \"id\",\n false\n );\n this.metadataColumnName = this.escapeId(\n config.columns?.metadataColumnName ?? \"metadata\",\n false\n );\n this.expressionConverter = new MariaDBFilterExpressionConverter(\n this.metadataColumnName\n );\n\n if (!config.connectionOptions && !config.pool) {\n throw new Error(\n \"You must provide either a `connectionOptions` object or a `pool` instance.\"\n );\n }\n\n const langchainVerbose = getEnvironmentVariable(\"LANGCHAIN_VERBOSE\");\n\n if (langchainVerbose === \"true\") {\n this._verbose = true;\n } else if (langchainVerbose === \"false\") {\n this._verbose = false;\n } else {\n this._verbose = config.verbose;\n }\n\n if (config.pool) {\n this.pool = config.pool;\n } else {\n const poolConf = { ...config.connectionOptions, rowsAsArray: true };\n // add query to log if verbose\n if (this._verbose) poolConf.logger = { query: console.log };\n this.pool = mariadb.createPool(poolConf);\n }\n this.chunkSize = config.chunkSize ?? 500;\n\n this.distanceStrategy =\n config.distanceStrategy ?? (\"COSINE\" as DistanceStrategy);\n }\n\n get computedTableName() {\n return this.schemaName == null\n ? this.tableName\n : `${this.schemaName}.${this.tableName}`;\n }\n\n get computedCollectionTableName() {\n return this.schemaName == null\n ? `${this.collectionTableName}`\n : `\"${this.schemaName}\".\"${this.collectionTableName}\"`;\n }\n\n /**\n * Escape identifier\n *\n * @param identifier identifier value\n * @param alwaysQuote must identifier be quoted if not required\n */\n private escapeId(identifier: string, alwaysQuote: boolean): string {\n if (!identifier || identifier === \"\")\n throw new Error(\"Identifier is required\");\n\n const len = identifier.length;\n const simpleIdentifier = /^[0-9a-zA-Z$_]*$/;\n if (simpleIdentifier.test(identifier)) {\n if (len < 1 || len > 64) {\n throw new Error(\"Invalid identifier length\");\n }\n if (alwaysQuote) return `\\`${identifier}\\``;\n\n // Identifier names may begin with a numeral, but can't only contain numerals unless quoted.\n if (/^\\d+$/.test(identifier)) {\n // identifier containing only numerals must be quoted\n return `\\`${identifier}\\``;\n }\n // identifier containing only numerals must be quoted\n return identifier;\n } else {\n if (identifier.includes(\"\\u0000\")) {\n throw new Error(\"Invalid name - containing u0000 character\");\n }\n let ident = identifier;\n if (/^`.+`$/.test(identifier)) {\n ident = identifier.substring(1, identifier.length - 1);\n }\n if (len < 1 || len > 64) {\n throw new Error(\"Invalid identifier length\");\n }\n return `\\`${ident.replace(/`/g, \"``\")}\\``;\n }\n }\n\n private printable(definition: string): string {\n return definition.replaceAll(/[^0-9a-zA-Z_]/g, \"\");\n }\n\n /**\n * Static method to create a new `MariaDBStore` instance from a\n * connection. It creates a table if one does not exist, and calls\n * `connect` to return a new instance of `MariaDBStore`.\n *\n * @param embeddings - Embeddings instance.\n * @param fields - `MariaDBStoreArgs` instance\n * @param fields.dimensions Number of dimensions in your vector data type. default to 1536.\n * @returns A new instance of `MariaDBStore`.\n */\n static async initialize(\n embeddings: EmbeddingsInterface,\n config: MariaDBStoreArgs & { dimensions?: number }\n ): Promise {\n const { dimensions, ...rest } = config;\n const mariadbStore = new MariaDBStore(embeddings, rest);\n await mariadbStore.ensureTableInDatabase(dimensions);\n await mariadbStore.ensureCollectionTableInDatabase();\n await mariadbStore.loadCollectionId();\n\n return mariadbStore;\n }\n\n /**\n * Static method to create a new `MariaDBStore` instance from an\n * array of texts and their metadata. It converts the texts into\n * `Document` instances and adds them to the store.\n *\n * @param texts - Array of texts.\n * @param metadatas - Array of metadata objects or a single metadata object.\n * @param embeddings - Embeddings instance.\n * @param dbConfig - `MariaDBStoreArgs` instance.\n * @returns Promise that resolves with a new instance of `MariaDBStore`.\n */\n static async fromTexts(\n texts: string[],\n metadatas: object[] | object,\n embeddings: EmbeddingsInterface,\n dbConfig: MariaDBStoreArgs & { dimensions?: number }\n ): Promise {\n const docs = [];\n for (let i = 0; i < texts.length; i += 1) {\n const metadata = Array.isArray(metadatas) ? metadatas[i] : metadatas;\n const newDoc = new Document({\n pageContent: texts[i],\n metadata,\n });\n docs.push(newDoc);\n }\n\n return MariaDBStore.fromDocuments(docs, embeddings, dbConfig);\n }\n\n /**\n * Static method to create a new `MariaDBStore` instance from an\n * array of `Document` instances. It adds the documents to the store.\n *\n * @param docs - Array of `Document` instances.\n * @param embeddings - Embeddings instance.\n * @param dbConfig - `MariaDBStoreArgs` instance.\n * @returns Promise that resolves with a new instance of `MariaDBStore`.\n */\n static async fromDocuments(\n docs: Document[],\n embeddings: EmbeddingsInterface,\n dbConfig: MariaDBStoreArgs & { dimensions?: number }\n ): Promise {\n const instance = await MariaDBStore.initialize(embeddings, dbConfig);\n await instance.addDocuments(docs, { ids: dbConfig.ids });\n return instance;\n }\n\n _vectorstoreType(): string {\n return \"mariadb\";\n }\n\n /**\n * Method to add documents to the vector store. It converts the documents into\n * vectors, and adds them to the store.\n *\n * @param documents - Array of `Document` instances.\n * @param options - Optional arguments for adding documents\n * @returns Promise that resolves when the documents have been added.\n */\n async addDocuments(\n documents: Document[],\n options?: { ids?: string[] }\n ): Promise {\n const texts = documents.map(({ pageContent }) => pageContent);\n\n return this.addVectors(\n await this.embeddings.embedDocuments(texts),\n documents,\n options\n );\n }\n\n /**\n * Inserts a row for the collectionName provided at initialization if it does not\n * exist and set the collectionId.\n */\n private async loadCollectionId(): Promise {\n if (this.collectionId) {\n return;\n }\n\n if (this.collectionTableName) {\n const queryResult = await this.pool.query(\n {\n sql: `SELECT uuid from ${this.computedCollectionTableName} WHERE label = ?`,\n rowsAsArray: true,\n },\n [this.collectionName]\n );\n if (queryResult.length > 0) {\n this.collectionId = queryResult[0][0];\n } else {\n const insertString = `INSERT INTO ${this.computedCollectionTableName}(label, cmetadata) VALUES (?, ?) RETURNING uuid`;\n const insertResult = await this.pool.query(\n { sql: insertString, rowsAsArray: true },\n [this.collectionName, this.collectionMetadata]\n );\n this.collectionId = insertResult[0][0];\n }\n }\n }\n\n /**\n * Method to add vectors to the vector store. It converts the vectors into\n * rows and inserts them into the database.\n *\n * @param vectors - Array of vectors.\n * @param documents - Array of `Document` instances.\n * @param options - Optional arguments for adding documents\n * @returns Promise that resolves when the vectors have been added.\n */\n async addVectors(\n vectors: number[][],\n documents: Document[],\n options?: { ids?: string[] }\n ): Promise {\n const ids = options?.ids;\n\n // Either all documents have ids or none of them do to avoid confusion.\n if (ids !== undefined && ids.length !== vectors.length) {\n throw new Error(\n \"The number of ids must match the number of vectors provided.\"\n );\n }\n await this.loadCollectionId();\n\n const insertQuery = `INSERT INTO ${this.computedTableName}(${", - "repo_full_name": "langchain-ai/langchainjs", - "discussion_comments": [ - { - "comment_id": "1901267827", - "repo_full_name": "langchain-ai/langchainjs", - "pr_number": 7360, - "pr_file": "libs/langchain-community/src/vectorstores/mariadb.ts", - "discussion_id": "1901267827", - "commented_code": "@@ -0,0 +1,760 @@\n+import mariadb, { type Pool, type PoolConfig } from \"mariadb\";\n+import { VectorStore } from \"@langchain/core/vectorstores\";\n+import type { EmbeddingsInterface } from \"@langchain/core/embeddings\";\n+import { Document } from \"@langchain/core/documents\";\n+import { getEnvironmentVariable } from \"@langchain/core/utils/env\";\n+import {\n+ FilterExpressionConverter,\n+ StringBuilder,\n+ BaseFilterExpressionConverter,\n+ Expression,\n+ Value,\n+ Key,\n+ Group,\n+} from \"@langchain/core/filter\";\n+\n+type Metadata = Record;\n+\n+export type DistanceStrategy = \"COSINE\" | \"EUCLIDEAN\";\n+\n+/**\n+ * Converts Expression into JSON metadata filter expression format\n+ * for MariaDB\n+ */\n+class MariaDBFilterExpressionConverter\n+ extends BaseFilterExpressionConverter\n+ implements FilterExpressionConverter\n+{\n+ private metadataFieldName: string;\n+\n+ constructor(metadataFieldName: string) {\n+ super();\n+ this.metadataFieldName = metadataFieldName;\n+ }\n+\n+ convertExpressionToContext(\n+ expression: Expression,\n+ context: StringBuilder\n+ ): void {\n+ super.convertOperandToContext(expression.left, context);\n+ super.convertSymbolToContext(expression, context);\n+ if (expression.right) {\n+ super.convertOperandToContext(expression.right, context);\n+ }\n+ }\n+\n+ convertKeyToContext(key: Key, context: StringBuilder): void {\n+ context.append(`JSON_VALUE(${this.metadataFieldName}, '$.${key.key}')`);\n+ }\n+\n+ writeValueRangeStart(_listValue: Value, context: StringBuilder): void {\n+ context.append(\"(\");\n+ }\n+\n+ writeValueRangeEnd(_listValue: Value, context: StringBuilder): void {\n+ context.append(\")\");\n+ }\n+\n+ writeGroupStart(_group: Group, context: StringBuilder): void {\n+ context.append(\"(\");\n+ }\n+\n+ writeGroupEnd(_group: Group, context: StringBuilder): void {\n+ context.append(\")\");\n+ }\n+}\n+\n+/**\n+ * Interface that defines the arguments required to create a\n+ * `MariaDBStore` instance. It includes MariaDB connection options,\n+ * table name and verbosity level.\n+ */\n+export interface MariaDBStoreArgs {\n+ connectionOptions?: PoolConfig;\n+ pool?: Pool;\n+ tableName?: string;\n+ collectionTableName?: string;\n+ collectionName?: string;\n+ collectionMetadata?: Metadata | null;\n+ schemaName?: string | null;\n+ columns?: {\n+ idColumnName?: string;\n+ vectorColumnName?: string;\n+ contentColumnName?: string;\n+ metadataColumnName?: string;\n+ };\n+ verbose?: boolean;\n+ /**\n+ * The amount of documents to chunk by when\n+ * adding vectors.\n+ * @default 500\n+ */\n+ chunkSize?: number;\n+ ids?: string[];\n+ distanceStrategy?: DistanceStrategy;\n+}\n+\n+/**\n+ * MariaDB vector store integration.\n+ *\n+ * Setup:\n+ * Install `@langchain/community` and `mariadb`.\n+ *\n+ * If you wish to generate ids, you should also install the `uuid` package.\n+ *\n+ * ```bash\n+ * npm install @langchain/community mariadb uuid\n+ * ```\n+ *\n+ * ## [Constructor args](https://api.js.langchain.com/classes/_langchain_community.vectorstores_mariadb.MariaDB.html#constructor)\n+ *\n+ *
\n+ * Instantiate\n+ *\n+ * ```typescript\n+ * import {\n+ * MariaDBStore,\n+ * DistanceStrategy,\n+ * } from \"@langchain/community/vectorstores/mariadb\";\n+ *\n+ * // Or other embeddings\n+ * import { OpenAIEmbeddings } from \"@langchain/openai\";\n+ * import { PoolConfig } from \"mariadb\";\n+ *\n+ * const embeddings = new OpenAIEmbeddings({\n+ * model: \"text-embedding-3-small\",\n+ * });\n+ *\n+ * // Sample config\n+ * const config = {\n+ * connectionOptions: {\n+ * host: \"127.0.0.1\",\n+ * port: 3306,\n+ * user: \"myuser\",\n+ * password: \"ChangeMe\",\n+ * database: \"api\",\n+ * } as PoolConfig,\n+ * tableName: \"testlangchainjs\",\n+ * columns: {\n+ * idColumnName: \"id\",\n+ * vectorColumnName: \"vector\",\n+ * contentColumnName: \"content\",\n+ * metadataColumnName: \"metadata\",\n+ * },\n+ * // supported distance strategies: COSINE (default) or EUCLIDEAN\n+ * distanceStrategy: \"COSINE\" as DistanceStrategy,\n+ * };\n+ *\n+ * const vectorStore = await MariaDBStore.initialize(embeddings, config);\n+ * ```\n+ *
\n+ *\n+ *
\n+ *\n+ *
\n+ * Add documents\n+ *\n+ * ```typescript\n+ * import type { Document } from '@langchain/core/documents';\n+ *\n+ * const document1 = { pageContent: \"foo\", metadata: { baz: \"bar\" } };\n+ * const document2 = { pageContent: \"thud\", metadata: { bar: \"baz\" } };\n+ * const document3 = { pageContent: \"i will be deleted :(\", metadata: {} };\n+ *\n+ * const documents: Document[] = [document1, document2, document3];\n+ * const ids = [\"1\", \"2\", \"3\"];\n+ * await vectorStore.addDocuments(documents, { ids });\n+ * ```\n+ *
\n+ *\n+ *
\n+ *\n+ *
\n+ * Delete documents\n+ *\n+ * ```typescript\n+ * await vectorStore.delete({ ids: [\"3\"] });\n+ * ```\n+ *
\n+ *\n+ *
\n+ *\n+ *
\n+ * Similarity search\n+ *\n+ * ```typescript\n+ * const results = await vectorStore.similaritySearch(\"thud\", 1);\n+ * for (const doc of results) {\n+ * console.log(`* ${doc.pageContent} [${JSON.stringify(doc.metadata, null)}]`);\n+ * }\n+ * // Output: * thud [{\"baz\":\"bar\"}]\n+ * ```\n+ *
\n+ *\n+ *
\n+ *\n+ *\n+ *
\n+ * Similarity search with filter\n+ *\n+ * ```typescript\n+ * const resultsWithFilter = await vectorStore.similaritySearch(\"thud\", 1, new Expression( ExpressionType.EQ, new Key(\"country\"), new Value(\"BG\")));\n+ *\n+ * for (const doc of resultsWithFilter) {\n+ * console.log(`* ${doc.pageContent} [${JSON.stringify(doc.metadata, null)}]`);\n+ * }\n+ * // Output: * foo [{\"baz\":\"bar\"}]\n+ * ```\n+ *
\n+ *\n+ *
\n+ *\n+ *\n+ *
\n+ * Similarity search with score\n+ *\n+ * ```typescript\n+ * const resultsWithScore = await vectorStore.similaritySearchWithScore(\"qux\", 1);\n+ * for (const [doc, score] of resultsWithScore) {\n+ * console.log(`* [SIM=${score.toFixed(6)}] ${doc.pageContent} [${JSON.stringify(doc.metadata, null)}]`);\n+ * }\n+ * // Output: * [SIM=0.000000] qux [{\"bar\":\"baz\",\"baz\":\"bar\"}]\n+ * ```\n+ *
\n+ *\n+ *
\n+ *\n+ *
\n+ * As a retriever\n+ *\n+ * ```typescript\n+ * const retriever = vectorStore.asRetriever({\n+ * searchType: \"mmr\", // Leave blank for standard similarity search\n+ * k: 1,\n+ * });\n+ * const resultAsRetriever = await retriever.invoke(\"thud\");\n+ * console.log(resultAsRetriever);\n+ *\n+ * // Output: [Document({ metadata: { \"baz\":\"bar\" }, pageContent: \"thud\" })]\n+ * ```\n+ *
\n+ *\n+ *
\n+ */\n+export class MariaDBStore extends VectorStore {\n+ tableName: string;\n+\n+ collectionTableName?: string;\n+\n+ collectionName = \"langchain\";\n+\n+ collectionId?: string;\n+\n+ collectionMetadata: Metadata | null;\n+\n+ schemaName: string | null;\n+\n+ idColumnName: string;\n+\n+ vectorColumnName: string;\n+\n+ contentColumnName: string;\n+\n+ metadataColumnName: string;\n+\n+ _verbose?: boolean;\n+\n+ pool: Pool;\n+\n+ chunkSize = 500;\n+\n+ distanceStrategy: DistanceStrategy;\n+\n+ expressionConverter: MariaDBFilterExpressionConverter;\n+\n+ constructor(embeddings: EmbeddingsInterface, config: MariaDBStoreArgs) {\n+ super(embeddings, config);\n+ this.tableName = this.escapeId(config.tableName ?? \"langchain\", false);\n+ if (\n+ config.collectionName !== undefined &&\n+ config.collectionTableName === undefined\n+ ) {\n+ throw new Error(\n+ `If supplying a \"collectionName\", you must also supply a \"collectionTableName\".`\n+ );\n+ }\n+\n+ this.collectionTableName = config.collectionTableName\n+ ? this.escapeId(config.collectionTableName, false)\n+ : undefined;\n+\n+ this.collectionName = config.collectionName\n+ ? this.escapeId(config.collectionName, false)\n+ : \"langchaincol\";\n+\n+ this.collectionMetadata = config.collectionMetadata ?? null;\n+ this.schemaName = config.schemaName\n+ ? this.escapeId(config.schemaName, false)\n+ : null;\n+\n+ this.vectorColumnName = this.escapeId(\n+ config.columns?.vectorColumnName ?? \"embedding\",\n+ false\n+ );\n+ this.contentColumnName = this.escapeId(\n+ config.columns?.contentColumnName ?? \"text\",\n+ false\n+ );\n+ this.idColumnName = this.escapeId(\n+ config.columns?.idColumnName ?? \"id\",\n+ false\n+ );\n+ this.metadataColumnName = this.escapeId(\n+ config.columns?.metadataColumnName ?? \"metadata\",\n+ false\n+ );\n+ this.expressionConverter = new MariaDBFilterExpressionConverter(\n+ this.metadataColumnName\n+ );\n+\n+ if (!config.connectionOptions && !config.pool) {\n+ throw new Error(\n+ \"You must provide either a `connectionOptions` object or a `pool` instance.\"\n+ );\n+ }\n+\n+ const langchainVerbose = getEnvironmentVariable(\"LANGCHAIN_VERBOSE\");\n+\n+ if (langchainVerbose === \"true\") {\n+ this._verbose = true;\n+ } else if (langchainVerbose === \"false\") {\n+ this._verbose = false;\n+ } else {\n+ this._verbose = config.verbose;\n+ }\n+\n+ if (config.pool) {\n+ this.pool = config.pool;\n+ } else {\n+ const poolConf = { ...config.connectionOptions, rowsAsArray: true };\n+ // add query to log if verbose\n+ if (this._verbose) poolConf.logger = { query: console.log };\n+ this.pool = mariadb.createPool(poolConf);\n+ }\n+ this.chunkSize = config.chunkSize ?? 500;\n+\n+ this.distanceStrategy =\n+ config.distanceStrategy ?? (\"COSINE\" as DistanceStrategy);\n+ }\n+\n+ get computedTableName() {\n+ return this.schemaName == null\n+ ? this.tableName\n+ : `${this.schemaName}.${this.tableName}`;\n+ }\n+\n+ get computedCollectionTableName() {\n+ return this.schemaName == null\n+ ? `${this.collectionTableName}`\n+ : `\"${this.schemaName}\".\"${this.collectionTableName}\"`;\n+ }\n+\n+ /**\n+ * Escape identifier\n+ *\n+ * @param identifier identifier value\n+ * @param alwaysQuote must identifier be quoted if not required\n+ */\n+ private escapeId(identifier: string, alwaysQuote: boolean): string {\n+ if (!identifier || identifier === \"\")\n+ throw new Error(\"Identifier is required\");\n+\n+ const len = identifier.length;\n+ const simpleIdentifier = /^[0-9a-zA-Z$_]*$/;\n+ if (simpleIdentifier.test(identifier)) {\n+ if (len < 1 || len > 64) {\n+ throw new Error(\"Invalid identifier length\");\n+ }\n+ if (alwaysQuote) return `\\`${identifier}\\``;\n+\n+ // Identifier names may begin with a numeral, but can't only contain numerals unless quoted.\n+ if (/^\\d+$/.test(identifier)) {\n+ // identifier containing only numerals must be quoted\n+ return `\\`${identifier}\\``;\n+ }\n+ // identifier containing only numerals must be quoted\n+ return identifier;\n+ } else {\n+ if (identifier.includes(\"\\u0000\")) {\n+ throw new Error(\"Invalid name - containing u0000 character\");\n+ }\n+ let ident = identifier;\n+ if (/^`.+`$/.test(identifier)) {\n+ ident = identifier.substring(1, identifier.length - 1);\n+ }\n+ if (len < 1 || len > 64) {\n+ throw new Error(\"Invalid identifier length\");\n+ }\n+ return `\\`${ident.replace(/`/g, \"``\")}\\``;\n+ }\n+ }\n+\n+ private printable(definition: string): string {\n+ return definition.replaceAll(/[^0-9a-zA-Z_]/g, \"\");\n+ }\n+\n+ /**\n+ * Static method to create a new `MariaDBStore` instance from a\n+ * connection. It creates a table if one does not exist, and calls\n+ * `connect` to return a new instance of `MariaDBStore`.\n+ *\n+ * @param embeddings - Embeddings instance.\n+ * @param fields - `MariaDBStoreArgs` instance\n+ * @param fields.dimensions Number of dimensions in your vector data type. default to 1536.\n+ * @returns A new instance of `MariaDBStore`.\n+ */\n+ static async initialize(\n+ embeddings: EmbeddingsInterface,\n+ config: MariaDBStoreArgs & { dimensions?: number }\n+ ): Promise {\n+ const { dimensions, ...rest } = config;\n+ const mariadbStore = new MariaDBStore(embeddings, rest);\n+ await mariadbStore.ensureTableInDatabase(dimensions);\n+ await mariadbStore.ensureCollectionTableInDatabase();\n+ await mariadbStore.loadCollectionId();\n+\n+ return mariadbStore;\n+ }\n+\n+ /**\n+ * Static method to create a new `MariaDBStore` instance from an\n+ * array of texts and their metadata. It converts the texts into\n+ * `Document` instances and adds them to the store.\n+ *\n+ * @param texts - Array of texts.\n+ * @param metadatas - Array of metadata objects or a single metadata object.\n+ * @param embeddings - Embeddings instance.\n+ * @param dbConfig - `MariaDBStoreArgs` instance.\n+ * @returns Promise that resolves with a new instance of `MariaDBStore`.\n+ */\n+ static async fromTexts(\n+ texts: string[],\n+ metadatas: object[] | object,\n+ embeddings: EmbeddingsInterface,\n+ dbConfig: MariaDBStoreArgs & { dimensions?: number }\n+ ): Promise {\n+ const docs = [];\n+ for (let i = 0; i < texts.length; i += 1) {\n+ const metadata = Array.isArray(metadatas) ? metadatas[i] : metadatas;\n+ const newDoc = new Document({\n+ pageContent: texts[i],\n+ metadata,\n+ });\n+ docs.push(newDoc);\n+ }\n+\n+ return MariaDBStore.fromDocuments(docs, embeddings, dbConfig);\n+ }\n+\n+ /**\n+ * Static method to create a new `MariaDBStore` instance from an\n+ * array of `Document` instances. It adds the documents to the store.\n+ *\n+ * @param docs - Array of `Document` instances.\n+ * @param embeddings - Embeddings instance.\n+ * @param dbConfig - `MariaDBStoreArgs` instance.\n+ * @returns Promise that resolves with a new instance of `MariaDBStore`.\n+ */\n+ static async fromDocuments(\n+ docs: Document[],\n+ embeddings: EmbeddingsInterface,\n+ dbConfig: MariaDBStoreArgs & { dimensions?: number }\n+ ): Promise {\n+ const instance = await MariaDBStore.initialize(embeddings, dbConfig);\n+ await instance.addDocuments(docs, { ids: dbConfig.ids });\n+ return instance;\n+ }\n+\n+ _vectorstoreType(): string {\n+ return \"mariadb\";\n+ }\n+\n+ /**\n+ * Method to add documents to the vector store. It converts the documents into\n+ * vectors, and adds them to the store.\n+ *\n+ * @param documents - Array of `Document` instances.\n+ * @param options - Optional arguments for adding documents\n+ * @returns Promise that resolves when the documents have been added.\n+ */\n+ async addDocuments(\n+ documents: Document[],\n+ options?: { ids?: string[] }\n+ ): Promise {\n+ const texts = documents.map(({ pageContent }) => pageContent);\n+\n+ return this.addVectors(\n+ await this.embeddings.embedDocuments(texts),\n+ documents,\n+ options\n+ );\n+ }\n+\n+ /**\n+ * Inserts a row for the collectionName provided at initialization if it does not\n+ * exist and set the collectionId.\n+ */\n+ private async loadCollectionId(): Promise {\n+ if (this.collectionId) {\n+ return;\n+ }\n+\n+ if (this.collectionTableName) {\n+ const queryResult = await this.pool.query(\n+ {\n+ sql: `SELECT uuid from ${this.computedCollectionTableName} WHERE label = ?`,\n+ rowsAsArray: true,\n+ },\n+ [this.collectionName]\n+ );\n+ if (queryResult.length > 0) {\n+ this.collectionId = queryResult[0][0];\n+ } else {\n+ const insertString = `INSERT INTO ${this.computedCollectionTableName}(label, cmetadata) VALUES (?, ?) RETURNING uuid`;\n+ const insertResult = await this.pool.query(\n+ { sql: insertString, rowsAsArray: true },\n+ [this.collectionName, this.collectionMetadata]\n+ );\n+ this.collectionId = insertResult[0][0];\n+ }\n+ }\n+ }\n+\n+ /**\n+ * Method to add vectors to the vector store. It converts the vectors into\n+ * rows and inserts them into the database.\n+ *\n+ * @param vectors - Array of vectors.\n+ * @param documents - Array of `Document` instances.\n+ * @param options - Optional arguments for adding documents\n+ * @returns Promise that resolves when the vectors have been added.\n+ */\n+ async addVectors(\n+ vectors: number[][],\n+ documents: Document[],\n+ options?: { ids?: string[] }\n+ ): Promise {\n+ const ids = options?.ids;\n+\n+ // Either all documents have ids or none of them do to avoid confusion.\n+ if (ids !== undefined && ids.length !== vectors.length) {\n+ throw new Error(\n+ \"The number of ids must match the number of vectors provided.\"\n+ );\n+ }\n+ await this.loadCollectionId();\n+\n+ const insertQuery = `INSERT INTO ${this.computedTableName}(${", - "comment_created_at": "2025-01-02T21:19:03+00:00", - "comment_author": "jacoblee93", - "comment_body": "nit: can we escape column names too/disallow nonalphanumeric?", - "pr_file_module": null - }, - { - "comment_id": "1901592943", - "repo_full_name": "langchain-ai/langchainjs", - "pr_number": 7360, - "pr_file": "libs/langchain-community/src/vectorstores/mariadb.ts", - "discussion_id": "1901267827", - "commented_code": "@@ -0,0 +1,760 @@\n+import mariadb, { type Pool, type PoolConfig } from \"mariadb\";\n+import { VectorStore } from \"@langchain/core/vectorstores\";\n+import type { EmbeddingsInterface } from \"@langchain/core/embeddings\";\n+import { Document } from \"@langchain/core/documents\";\n+import { getEnvironmentVariable } from \"@langchain/core/utils/env\";\n+import {\n+ FilterExpressionConverter,\n+ StringBuilder,\n+ BaseFilterExpressionConverter,\n+ Expression,\n+ Value,\n+ Key,\n+ Group,\n+} from \"@langchain/core/filter\";\n+\n+type Metadata = Record;\n+\n+export type DistanceStrategy = \"COSINE\" | \"EUCLIDEAN\";\n+\n+/**\n+ * Converts Expression into JSON metadata filter expression format\n+ * for MariaDB\n+ */\n+class MariaDBFilterExpressionConverter\n+ extends BaseFilterExpressionConverter\n+ implements FilterExpressionConverter\n+{\n+ private metadataFieldName: string;\n+\n+ constructor(metadataFieldName: string) {\n+ super();\n+ this.metadataFieldName = metadataFieldName;\n+ }\n+\n+ convertExpressionToContext(\n+ expression: Expression,\n+ context: StringBuilder\n+ ): void {\n+ super.convertOperandToContext(expression.left, context);\n+ super.convertSymbolToContext(expression, context);\n+ if (expression.right) {\n+ super.convertOperandToContext(expression.right, context);\n+ }\n+ }\n+\n+ convertKeyToContext(key: Key, context: StringBuilder): void {\n+ context.append(`JSON_VALUE(${this.metadataFieldName}, '$.${key.key}')`);\n+ }\n+\n+ writeValueRangeStart(_listValue: Value, context: StringBuilder): void {\n+ context.append(\"(\");\n+ }\n+\n+ writeValueRangeEnd(_listValue: Value, context: StringBuilder): void {\n+ context.append(\")\");\n+ }\n+\n+ writeGroupStart(_group: Group, context: StringBuilder): void {\n+ context.append(\"(\");\n+ }\n+\n+ writeGroupEnd(_group: Group, context: StringBuilder): void {\n+ context.append(\")\");\n+ }\n+}\n+\n+/**\n+ * Interface that defines the arguments required to create a\n+ * `MariaDBStore` instance. It includes MariaDB connection options,\n+ * table name and verbosity level.\n+ */\n+export interface MariaDBStoreArgs {\n+ connectionOptions?: PoolConfig;\n+ pool?: Pool;\n+ tableName?: string;\n+ collectionTableName?: string;\n+ collectionName?: string;\n+ collectionMetadata?: Metadata | null;\n+ schemaName?: string | null;\n+ columns?: {\n+ idColumnName?: string;\n+ vectorColumnName?: string;\n+ contentColumnName?: string;\n+ metadataColumnName?: string;\n+ };\n+ verbose?: boolean;\n+ /**\n+ * The amount of documents to chunk by when\n+ * adding vectors.\n+ * @default 500\n+ */\n+ chunkSize?: number;\n+ ids?: string[];\n+ distanceStrategy?: DistanceStrategy;\n+}\n+\n+/**\n+ * MariaDB vector store integration.\n+ *\n+ * Setup:\n+ * Install `@langchain/community` and `mariadb`.\n+ *\n+ * If you wish to generate ids, you should also install the `uuid` package.\n+ *\n+ * ```bash\n+ * npm install @langchain/community mariadb uuid\n+ * ```\n+ *\n+ * ## [Constructor args](https://api.js.langchain.com/classes/_langchain_community.vectorstores_mariadb.MariaDB.html#constructor)\n+ *\n+ *
\n+ * Instantiate\n+ *\n+ * ```typescript\n+ * import {\n+ * MariaDBStore,\n+ * DistanceStrategy,\n+ * } from \"@langchain/community/vectorstores/mariadb\";\n+ *\n+ * // Or other embeddings\n+ * import { OpenAIEmbeddings } from \"@langchain/openai\";\n+ * import { PoolConfig } from \"mariadb\";\n+ *\n+ * const embeddings = new OpenAIEmbeddings({\n+ * model: \"text-embedding-3-small\",\n+ * });\n+ *\n+ * // Sample config\n+ * const config = {\n+ * connectionOptions: {\n+ * host: \"127.0.0.1\",\n+ * port: 3306,\n+ * user: \"myuser\",\n+ * password: \"ChangeMe\",\n+ * database: \"api\",\n+ * } as PoolConfig,\n+ * tableName: \"testlangchainjs\",\n+ * columns: {\n+ * idColumnName: \"id\",\n+ * vectorColumnName: \"vector\",\n+ * contentColumnName: \"content\",\n+ * metadataColumnName: \"metadata\",\n+ * },\n+ * // supported distance strategies: COSINE (default) or EUCLIDEAN\n+ * distanceStrategy: \"COSINE\" as DistanceStrategy,\n+ * };\n+ *\n+ * const vectorStore = await MariaDBStore.initialize(embeddings, config);\n+ * ```\n+ *
\n+ *\n+ *
\n+ *\n+ *
\n+ * Add documents\n+ *\n+ * ```typescript\n+ * import type { Document } from '@langchain/core/documents';\n+ *\n+ * const document1 = { pageContent: \"foo\", metadata: { baz: \"bar\" } };\n+ * const document2 = { pageContent: \"thud\", metadata: { bar: \"baz\" } };\n+ * const document3 = { pageContent: \"i will be deleted :(\", metadata: {} };\n+ *\n+ * const documents: Document[] = [document1, document2, document3];\n+ * const ids = [\"1\", \"2\", \"3\"];\n+ * await vectorStore.addDocuments(documents, { ids });\n+ * ```\n+ *
\n+ *\n+ *
\n+ *\n+ *
\n+ * Delete documents\n+ *\n+ * ```typescript\n+ * await vectorStore.delete({ ids: [\"3\"] });\n+ * ```\n+ *
\n+ *\n+ *
\n+ *\n+ *
\n+ * Similarity search\n+ *\n+ * ```typescript\n+ * const results = await vectorStore.similaritySearch(\"thud\", 1);\n+ * for (const doc of results) {\n+ * console.log(`* ${doc.pageContent} [${JSON.stringify(doc.metadata, null)}]`);\n+ * }\n+ * // Output: * thud [{\"baz\":\"bar\"}]\n+ * ```\n+ *
\n+ *\n+ *
\n+ *\n+ *\n+ *
\n+ * Similarity search with filter\n+ *\n+ * ```typescript\n+ * const resultsWithFilter = await vectorStore.similaritySearch(\"thud\", 1, new Expression( ExpressionType.EQ, new Key(\"country\"), new Value(\"BG\")));\n+ *\n+ * for (const doc of resultsWithFilter) {\n+ * console.log(`* ${doc.pageContent} [${JSON.stringify(doc.metadata, null)}]`);\n+ * }\n+ * // Output: * foo [{\"baz\":\"bar\"}]\n+ * ```\n+ *
\n+ *\n+ *
\n+ *\n+ *\n+ *
\n+ * Similarity search with score\n+ *\n+ * ```typescript\n+ * const resultsWithScore = await vectorStore.similaritySearchWithScore(\"qux\", 1);\n+ * for (const [doc, score] of resultsWithScore) {\n+ * console.log(`* [SIM=${score.toFixed(6)}] ${doc.pageContent} [${JSON.stringify(doc.metadata, null)}]`);\n+ * }\n+ * // Output: * [SIM=0.000000] qux [{\"bar\":\"baz\",\"baz\":\"bar\"}]\n+ * ```\n+ *
\n+ *\n+ *
\n+ *\n+ *
\n+ * As a retriever\n+ *\n+ * ```typescript\n+ * const retriever = vectorStore.asRetriever({\n+ * searchType: \"mmr\", // Leave blank for standard similarity search\n+ * k: 1,\n+ * });\n+ * const resultAsRetriever = await retriever.invoke(\"thud\");\n+ * console.log(resultAsRetriever);\n+ *\n+ * // Output: [Document({ metadata: { \"baz\":\"bar\" }, pageContent: \"thud\" })]\n+ * ```\n+ *
\n+ *\n+ *
\n+ */\n+export class MariaDBStore extends VectorStore {\n+ tableName: string;\n+\n+ collectionTableName?: string;\n+\n+ collectionName = \"langchain\";\n+\n+ collectionId?: string;\n+\n+ collectionMetadata: Metadata | null;\n+\n+ schemaName: string | null;\n+\n+ idColumnName: string;\n+\n+ vectorColumnName: string;\n+\n+ contentColumnName: string;\n+\n+ metadataColumnName: string;\n+\n+ _verbose?: boolean;\n+\n+ pool: Pool;\n+\n+ chunkSize = 500;\n+\n+ distanceStrategy: DistanceStrategy;\n+\n+ expressionConverter: MariaDBFilterExpressionConverter;\n+\n+ constructor(embeddings: EmbeddingsInterface, config: MariaDBStoreArgs) {\n+ super(embeddings, config);\n+ this.tableName = this.escapeId(config.tableName ?? \"langchain\", false);\n+ if (\n+ config.collectionName !== undefined &&\n+ config.collectionTableName === undefined\n+ ) {\n+ throw new Error(\n+ `If supplying a \"collectionName\", you must also supply a \"collectionTableName\".`\n+ );\n+ }\n+\n+ this.collectionTableName = config.collectionTableName\n+ ? this.escapeId(config.collectionTableName, false)\n+ : undefined;\n+\n+ this.collectionName = config.collectionName\n+ ? this.escapeId(config.collectionName, false)\n+ : \"langchaincol\";\n+\n+ this.collectionMetadata = config.collectionMetadata ?? null;\n+ this.schemaName = config.schemaName\n+ ? this.escapeId(config.schemaName, false)\n+ : null;\n+\n+ this.vectorColumnName = this.escapeId(\n+ config.columns?.vectorColumnName ?? \"embedding\",\n+ false\n+ );\n+ this.contentColumnName = this.escapeId(\n+ config.columns?.contentColumnName ?? \"text\",\n+ false\n+ );\n+ this.idColumnName = this.escapeId(\n+ config.columns?.idColumnName ?? \"id\",\n+ false\n+ );\n+ this.metadataColumnName = this.escapeId(\n+ config.columns?.metadataColumnName ?? \"metadata\",\n+ false\n+ );\n+ this.expressionConverter = new MariaDBFilterExpressionConverter(\n+ this.metadataColumnName\n+ );\n+\n+ if (!config.connectionOptions && !config.pool) {\n+ throw new Error(\n+ \"You must provide either a `connectionOptions` object or a `pool` instance.\"\n+ );\n+ }\n+\n+ const langchainVerbose = getEnvironmentVariable(\"LANGCHAIN_VERBOSE\");\n+\n+ if (langchainVerbose === \"true\") {\n+ this._verbose = true;\n+ } else if (langchainVerbose === \"false\") {\n+ this._verbose = false;\n+ } else {\n+ this._verbose = config.verbose;\n+ }\n+\n+ if (config.pool) {\n+ this.pool = config.pool;\n+ } else {\n+ const poolConf = { ...config.connectionOptions, rowsAsArray: true };\n+ // add query to log if verbose\n+ if (this._verbose) poolConf.logger = { query: console.log };\n+ this.pool = mariadb.createPool(poolConf);\n+ }\n+ this.chunkSize = config.chunkSize ?? 500;\n+\n+ this.distanceStrategy =\n+ config.distanceStrategy ?? (\"COSINE\" as DistanceStrategy);\n+ }\n+\n+ get computedTableName() {\n+ return this.schemaName == null\n+ ? this.tableName\n+ : `${this.schemaName}.${this.tableName}`;\n+ }\n+\n+ get computedCollectionTableName() {\n+ return this.schemaName == null\n+ ? `${this.collectionTableName}`\n+ : `\"${this.schemaName}\".\"${this.collectionTableName}\"`;\n+ }\n+\n+ /**\n+ * Escape identifier\n+ *\n+ * @param identifier identifier value\n+ * @param alwaysQuote must identifier be quoted if not required\n+ */\n+ private escapeId(identifier: string, alwaysQuote: boolean): string {\n+ if (!identifier || identifier === \"\")\n+ throw new Error(\"Identifier is required\");\n+\n+ const len = identifier.length;\n+ const simpleIdentifier = /^[0-9a-zA-Z$_]*$/;\n+ if (simpleIdentifier.test(identifier)) {\n+ if (len < 1 || len > 64) {\n+ throw new Error(\"Invalid identifier length\");\n+ }\n+ if (alwaysQuote) return `\\`${identifier}\\``;\n+\n+ // Identifier names may begin with a numeral, but can't only contain numerals unless quoted.\n+ if (/^\\d+$/.test(identifier)) {\n+ // identifier containing only numerals must be quoted\n+ return `\\`${identifier}\\``;\n+ }\n+ // identifier containing only numerals must be quoted\n+ return identifier;\n+ } else {\n+ if (identifier.includes(\"\\u0000\")) {\n+ throw new Error(\"Invalid name - containing u0000 character\");\n+ }\n+ let ident = identifier;\n+ if (/^`.+`$/.test(identifier)) {\n+ ident = identifier.substring(1, identifier.length - 1);\n+ }\n+ if (len < 1 || len > 64) {\n+ throw new Error(\"Invalid identifier length\");\n+ }\n+ return `\\`${ident.replace(/`/g, \"``\")}\\``;\n+ }\n+ }\n+\n+ private printable(definition: string): string {\n+ return definition.replaceAll(/[^0-9a-zA-Z_]/g, \"\");\n+ }\n+\n+ /**\n+ * Static method to create a new `MariaDBStore` instance from a\n+ * connection. It creates a table if one does not exist, and calls\n+ * `connect` to return a new instance of `MariaDBStore`.\n+ *\n+ * @param embeddings - Embeddings instance.\n+ * @param fields - `MariaDBStoreArgs` instance\n+ * @param fields.dimensions Number of dimensions in your vector data type. default to 1536.\n+ * @returns A new instance of `MariaDBStore`.\n+ */\n+ static async initialize(\n+ embeddings: EmbeddingsInterface,\n+ config: MariaDBStoreArgs & { dimensions?: number }\n+ ): Promise {\n+ const { dimensions, ...rest } = config;\n+ const mariadbStore = new MariaDBStore(embeddings, rest);\n+ await mariadbStore.ensureTableInDatabase(dimensions);\n+ await mariadbStore.ensureCollectionTableInDatabase();\n+ await mariadbStore.loadCollectionId();\n+\n+ return mariadbStore;\n+ }\n+\n+ /**\n+ * Static method to create a new `MariaDBStore` instance from an\n+ * array of texts and their metadata. It converts the texts into\n+ * `Document` instances and adds them to the store.\n+ *\n+ * @param texts - Array of texts.\n+ * @param metadatas - Array of metadata objects or a single metadata object.\n+ * @param embeddings - Embeddings instance.\n+ * @param dbConfig - `MariaDBStoreArgs` instance.\n+ * @returns Promise that resolves with a new instance of `MariaDBStore`.\n+ */\n+ static async fromTexts(\n+ texts: string[],\n+ metadatas: object[] | object,\n+ embeddings: EmbeddingsInterface,\n+ dbConfig: MariaDBStoreArgs & { dimensions?: number }\n+ ): Promise {\n+ const docs = [];\n+ for (let i = 0; i < texts.length; i += 1) {\n+ const metadata = Array.isArray(metadatas) ? metadatas[i] : metadatas;\n+ const newDoc = new Document({\n+ pageContent: texts[i],\n+ metadata,\n+ });\n+ docs.push(newDoc);\n+ }\n+\n+ return MariaDBStore.fromDocuments(docs, embeddings, dbConfig);\n+ }\n+\n+ /**\n+ * Static method to create a new `MariaDBStore` instance from an\n+ * array of `Document` instances. It adds the documents to the store.\n+ *\n+ * @param docs - Array of `Document` instances.\n+ * @param embeddings - Embeddings instance.\n+ * @param dbConfig - `MariaDBStoreArgs` instance.\n+ * @returns Promise that resolves with a new instance of `MariaDBStore`.\n+ */\n+ static async fromDocuments(\n+ docs: Document[],\n+ embeddings: EmbeddingsInterface,\n+ dbConfig: MariaDBStoreArgs & { dimensions?: number }\n+ ): Promise {\n+ const instance = await MariaDBStore.initialize(embeddings, dbConfig);\n+ await instance.addDocuments(docs, { ids: dbConfig.ids });\n+ return instance;\n+ }\n+\n+ _vectorstoreType(): string {\n+ return \"mariadb\";\n+ }\n+\n+ /**\n+ * Method to add documents to the vector store. It converts the documents into\n+ * vectors, and adds them to the store.\n+ *\n+ * @param documents - Array of `Document` instances.\n+ * @param options - Optional arguments for adding documents\n+ * @returns Promise that resolves when the documents have been added.\n+ */\n+ async addDocuments(\n+ documents: Document[],\n+ options?: { ids?: string[] }\n+ ): Promise {\n+ const texts = documents.map(({ pageContent }) => pageContent);\n+\n+ return this.addVectors(\n+ await this.embeddings.embedDocuments(texts),\n+ documents,\n+ options\n+ );\n+ }\n+\n+ /**\n+ * Inserts a row for the collectionName provided at initialization if it does not\n+ * exist and set the collectionId.\n+ */\n+ private async loadCollectionId(): Promise {\n+ if (this.collectionId) {\n+ return;\n+ }\n+\n+ if (this.collectionTableName) {\n+ const queryResult = await this.pool.query(\n+ {\n+ sql: `SELECT uuid from ${this.computedCollectionTableName} WHERE label = ?`,\n+ rowsAsArray: true,\n+ },\n+ [this.collectionName]\n+ );\n+ if (queryResult.length > 0) {\n+ this.collectionId = queryResult[0][0];\n+ } else {\n+ const insertString = `INSERT INTO ${this.computedCollectionTableName}(label, cmetadata) VALUES (?, ?) RETURNING uuid`;\n+ const insertResult = await this.pool.query(\n+ { sql: insertString, rowsAsArray: true },\n+ [this.collectionName, this.collectionMetadata]\n+ );\n+ this.collectionId = insertResult[0][0];\n+ }\n+ }\n+ }\n+\n+ /**\n+ * Method to add vectors to the vector store. It converts the vectors into\n+ * rows and inserts them into the database.\n+ *\n+ * @param vectors - Array of vectors.\n+ * @param documents - Array of `Document` instances.\n+ * @param options - Optional arguments for adding documents\n+ * @returns Promise that resolves when the vectors have been added.\n+ */\n+ async addVectors(\n+ vectors: number[][],\n+ documents: Document[],\n+ options?: { ids?: string[] }\n+ ): Promise {\n+ const ids = options?.ids;\n+\n+ // Either all documents have ids or none of them do to avoid confusion.\n+ if (ids !== undefined && ids.length !== vectors.length) {\n+ throw new Error(\n+ \"The number of ids must match the number of vectors provided.\"\n+ );\n+ }\n+ await this.loadCollectionId();\n+\n+ const insertQuery = `INSERT INTO ${this.computedTableName}(${", - "comment_created_at": "2025-01-03T09:12:12+00:00", - "comment_author": "rusher", - "comment_body": "table name and columns names are all escaped to prevent any injection. see `escapeId` function", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1755519345", - "pr_number": 6736, - "pr_file": "langchain/src/storage/tests/file_system.test.ts", - "created_at": "2024-09-11T20:08:42+00:00", - "commented_code": ").toEqual([value1, value2]);\n await fs.promises.rm(secondaryRootPath, { recursive: true, force: true });\n });\n\n test(\"Should disallow attempts to traverse paths outside of a subfolder\", async () => {\n const encoder = new TextEncoder();\n const store = await LocalFileStore.fromPath(secondaryRootPath);\n const value1 = new Date().toISOString();\n await expect(\n store.mset([[\"../foo\", encoder.encode(value1)]])\n ).rejects.toThrowError();\n await expect(\n store.mset([[\"/foo\", encoder.encode(value1)]])\n ).rejects.toThrowError();\n await expect(store.mget([\"../foo\"])).rejects.toThrowError();", - "repo_full_name": "langchain-ai/langchainjs", - "discussion_comments": [ - { - "comment_id": "1755519345", - "repo_full_name": "langchain-ai/langchainjs", - "pr_number": 6736, - "pr_file": "langchain/src/storage/tests/file_system.test.ts", - "discussion_id": "1755519345", - "commented_code": "@@ -88,4 +88,19 @@ describe(\"LocalFileStore\", () => {\n ).toEqual([value1, value2]);\n await fs.promises.rm(secondaryRootPath, { recursive: true, force: true });\n });\n+\n+ test(\"Should disallow attempts to traverse paths outside of a subfolder\", async () => {\n+ const encoder = new TextEncoder();\n+ const store = await LocalFileStore.fromPath(secondaryRootPath);\n+ const value1 = new Date().toISOString();\n+ await expect(\n+ store.mset([[\"../foo\", encoder.encode(value1)]])\n+ ).rejects.toThrowError();\n+ await expect(\n+ store.mset([[\"/foo\", encoder.encode(value1)]])\n+ ).rejects.toThrowError();\n+ await expect(store.mget([\"../foo\"])).rejects.toThrowError();", - "comment_created_at": "2024-09-11T20:08:42+00:00", - "comment_author": "eyurtsev", - "comment_body": "maybe add test for `\\` as well?", - "pr_file_module": null - }, - { - "comment_id": "1755531830", - "repo_full_name": "langchain-ai/langchainjs", - "pr_number": 6736, - "pr_file": "langchain/src/storage/tests/file_system.test.ts", - "discussion_id": "1755519345", - "commented_code": "@@ -88,4 +88,19 @@ describe(\"LocalFileStore\", () => {\n ).toEqual([value1, value2]);\n await fs.promises.rm(secondaryRootPath, { recursive: true, force: true });\n });\n+\n+ test(\"Should disallow attempts to traverse paths outside of a subfolder\", async () => {\n+ const encoder = new TextEncoder();\n+ const store = await LocalFileStore.fromPath(secondaryRootPath);\n+ const value1 = new Date().toISOString();\n+ await expect(\n+ store.mset([[\"../foo\", encoder.encode(value1)]])\n+ ).rejects.toThrowError();\n+ await expect(\n+ store.mset([[\"/foo\", encoder.encode(value1)]])\n+ ).rejects.toThrowError();\n+ await expect(store.mget([\"../foo\"])).rejects.toThrowError();", - "comment_created_at": "2024-09-11T20:18:07+00:00", - "comment_author": "jacoblee93", - "comment_body": "Yeah good call", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1667178276", - "pr_number": 5835, - "pr_file": "libs/langchain-google-common/src/experimental/utils/media_core.ts", - "created_at": "2024-07-05T21:49:09+00:00", - "commented_code": "import { v4 as uuidv4 } from \"uuid\";\nimport { BaseStore } from \"@langchain/core/stores\";\nimport { Serializable } from \"@langchain/core/load/serializable\";\n\nexport interface MediaBlobParameters {\n data?: Blob;\n\n metadata?: Record;\n\n path?: string;\n}\n\n/**\n * Represents a chunk of data that can be identified by the path where the\n * data is (or will be) located, along with optional metadata about the data.\n */\nexport class MediaBlob\n extends Serializable // FIXME - I'm not sure this serializes or deserializes correctly\n implements MediaBlobParameters\n{\n lc_serializable = true;\n\n lc_namespace = [\"langchain\", \"google-common\"]; // FIXME - What should this be? And why?\n\n data?: Blob;\n\n // eslint-disable-next-line @typescript-eslint/no-explicit-any\n metadata?: Record;\n\n path?: string;\n\n constructor(params?: MediaBlobParameters) {\n super(params);\n this.data = params?.data;\n this.metadata = params?.metadata;\n this.path = params?.path;\n }\n\n get size(): number {\n return this.data?.size ?? 0;\n }\n\n get dataType(): string {\n return this.data?.type ?? \"\";\n }\n\n get encoding(): string {\n const charsetEquals = this.dataType.indexOf(\"charset=\");\n return charsetEquals === -1\n ? \"utf-8\"\n : this.dataType.substring(charsetEquals + 8);\n }\n\n get mimetype(): string {\n const semicolon = this.dataType.indexOf(\";\");\n return semicolon === -1\n ? this.dataType\n : this.dataType.substring(0, semicolon);\n }\n\n async asString(): Promise {\n const data = this.data ?? new Blob([]);\n const dataBuffer = await data.arrayBuffer();\n const dataArray = new Uint8Array(dataBuffer);\n\n // Need to handle the array in smaller chunks to deal with stack size limits\n let ret = \"\";\n const chunkSize = 102400;\n for (let i = 0; i < dataArray.length; i += chunkSize) {\n const chunk = dataArray.subarray(i, i + chunkSize);\n ret += String.fromCharCode(...chunk);\n }\n\n return ret;\n }\n\n async asBase64(): Promise {\n return btoa(await this.asString());\n }\n\n async asDataUrl(): Promise {\n return `data:${this.mimetype};base64,${await this.asBase64()}`;\n }\n\n async asUri(): Promise {\n return this.path ?? (await this.asDataUrl());\n }\n\n async encode(): Promise<{ encoded: string; encoding: string }> {\n const dataUrl = await this.asDataUrl();\n const comma = dataUrl.indexOf(\",\");\n const encoded = dataUrl.substring(comma + 1);\n const encoding: string = dataUrl.indexOf(\"base64\") > -1 ? \"base64\" : \"8bit\";\n return {\n encoded,\n encoding,\n };\n }\n}\n\nexport type ActionIfInvalidAction =\n | \"ignore\"\n | \"prefixPath\"\n | \"prefixUuid\"\n | \"removePath\";\n\nexport interface BlobStoreStoreOptions {\n /**\n * If the path is missing or invalid in the blob, how should we create\n * a new path?\n * Subclasses may define their own methods, but the following are supported\n * by default:\n * - Undefined or an emtpy string: Reject the blob\n * - \"ignore\": Attempt to store it anyway (but this may fail)\n * - \"prefixPath\": Use the default prefix for the BlobStore and get the\n * unique portion from the URL. The original path is stored in the metadata\n * - \"prefixUuid\": Use the default prefix for the BlobStore and get the\n * unique portion from a generated UUID. The original path is stored\n * in the metadata\n */\n actionIfInvalid?: ActionIfInvalidAction;\n\n /**\n * The expected prefix for URIs that are stored.\n * This may be used to test if a MediaBlob is valid and used to create a new\n * path if \"prefixPath\" or \"prefixUuid\" is set for actionIfInvalid.\n */\n pathPrefix?: string;\n}\n\nexport type ActionIfBlobMissingAction = \"emptyBlob\";\n\nexport interface BlobStoreFetchOptions {\n /**\n * If the blob is not found when fetching, what should we do?\n * Subclasses may define their own methods, but the following are supported\n * by default:\n * - Undefined or an empty string: return undefined\n * - \"emptyBlob\": return a new MediaBlob that has the path set, but nothing else.\n */\n actionIfBlobMissing?: ActionIfBlobMissingAction;\n}\n\nexport interface BlobStoreOptions {\n defaultStoreOptions?: BlobStoreStoreOptions;\n\n defaultFetchOptions?: BlobStoreFetchOptions;\n}\n\n/**\n * A specialized Store that is designed to handle MediaBlobs and use the\n * key that is included in the blob to determine exactly how it is stored.\n *\n * The full details of a MediaBlob may be changed when it is stored.\n * For example, it may get additional or different Metadata. This should be\n * what is returned when the store() method is called.\n *\n * Although BlobStore extends BaseStore, not all of the methods from\n * BaseStore may be implemented (or even possible). Those that are not\n * implemented should be documented and throw an Error if called.\n */\nexport abstract class BlobStore extends BaseStore {\n lc_namespace = [\"langchain\", \"google-common\"]; // FIXME - What should this be? And why?\n\n defaultStoreOptions: BlobStoreStoreOptions;\n\n defaultFetchOptions: BlobStoreFetchOptions;\n\n constructor(opts?: BlobStoreOptions) {\n super(opts);\n this.defaultStoreOptions = opts?.defaultStoreOptions ?? {};\n this.defaultFetchOptions = opts?.defaultFetchOptions ?? {};\n }\n\n protected async _realKey(key: string | MediaBlob): Promise {\n return typeof key === \"string\" ? key : await key.asUri();\n }\n\n /**\n * Is the path set in the MediaBlob supported by this BlobStore?\n * Subclasses must implement and evaluate `blob.path` to make this\n * determination.\n *\n * Although this is async, this is expected to be a relatively fast operation\n * (ie - you shouldn't make network calls).\n *\n * The default implementation assumes that undefined blob.paths are invalid\n * and then uses the replacePathPrefix (or an empty string) as an assumed path\n * to start with.\n *\n * @param blob The blob to test\n * @param opts Any options (if needed) that may be used to determine if it is valid\n * @return If the string represented by blob.path is supported.\n */\n protected _hasValidPath(\n blob: MediaBlob,\n opts?: BlobStoreStoreOptions\n ): Promise {\n const path = blob.path ?? \"\";\n const prefix = opts?.pathPrefix ?? \"\";\n const isPrefixed =\n typeof blob.path !== \"undefined\" && path.startsWith(prefix);\n return Promise.resolve(isPrefixed);\n }\n\n protected _blobPathSuffix(blob: MediaBlob): string {\n // Get the path currently set and make sure we treat it as a string\n const blobPath = `${blob.path}`;\n\n // Advance past the first set of /\n let pathStart = blobPath.indexOf(\"/\") + 1;\n while (blobPath.charAt(pathStart) === \"/\") {\n pathStart += 1;\n }\n\n // We will use the rest as the path for a replacement\n return blobPath.substring(pathStart);\n }\n\n protected async _newBlob(\n oldBlob: MediaBlob,\n newPath?: string\n ): Promise {\n const oldPath = oldBlob.path;\n const metadata = oldBlob?.metadata ?? {};\n metadata.langchainOldPath = oldPath;\n const newBlob = new MediaBlob({\n ...oldBlob,\n metadata,\n });\n if (newPath) {\n newBlob.path = newPath;\n } else if (newBlob.path) {\n delete newBlob.path;\n }\n return newBlob;\n }\n\n protected async _validBlobPrefixPath(\n blob: MediaBlob,\n opts?: BlobStoreStoreOptions\n ): Promise {\n const prefix = opts?.pathPrefix ?? \"\";\n const suffix = this._blobPathSuffix(blob);\n const newPath = `${prefix}${suffix}`;\n return this._newBlob(blob, newPath);\n }\n\n protected async _validBlobPrefixUuid(\n blob: MediaBlob,\n opts?: BlobStoreStoreOptions\n ): Promise {\n const prefix = opts?.pathPrefix ?? \"\";\n const suffix = uuidv4(); // TODO - option to specify version?\n const newPath = `${prefix}${suffix}`;\n return this._newBlob(blob, newPath);\n }\n\n protected async _validBlobRemovePath(\n blob: MediaBlob,\n _opts?: BlobStoreStoreOptions\n ): Promise {\n return this._newBlob(blob, undefined);\n }\n\n /**\n * Based on the blob and options, return a blob that has a valid path\n * that can be saved.\n * @param blob\n * @param opts\n */\n protected async _validStoreBlob(\n blob: MediaBlob,\n opts?: BlobStoreStoreOptions\n ): Promise {\n if (await this._hasValidPath(blob, opts)) {\n return blob;\n }\n switch (opts?.actionIfInvalid) {\n case \"ignore\":\n return blob;\n case \"prefixPath\":\n return this._validBlobPrefixPath(blob, opts);\n case \"prefixUuid\":\n return this._validBlobPrefixUuid(blob, opts);\n case \"removePath\":\n return this._validBlobRemovePath(blob, opts);\n default:\n return undefined;\n }\n }\n\n async store(\n blob: MediaBlob,\n opts: BlobStoreStoreOptions = {}\n ): Promise {\n const allOpts: BlobStoreStoreOptions = {\n ...this.defaultStoreOptions,\n ...opts,\n };\n const validBlob = await this._validStoreBlob(blob, allOpts);\n if (typeof validBlob !== \"undefined\") {\n const validKey = await validBlob.asUri();\n await this.mset([[validKey, validBlob]]);\n const savedKey = await validBlob.asUri();\n return await this.fetch(savedKey);\n }\n return undefined;\n }\n\n protected async _missingFetchBlobEmpty(\n path: string,\n _opts?: BlobStoreFetchOptions\n ): Promise {\n return new MediaBlob({ path });\n }\n\n protected async _missingFetchBlob(\n path: string,\n opts?: BlobStoreFetchOptions\n ): Promise {\n switch (opts?.actionIfBlobMissing) {\n case \"emptyBlob\":\n return this._missingFetchBlobEmpty(path, opts);\n default:\n return undefined;\n }\n }\n\n async fetch(\n key: string | MediaBlob,\n opts: BlobStoreFetchOptions = {}\n ): Promise {\n const allOpts: BlobStoreFetchOptions = {\n ...this.defaultFetchOptions,\n ...opts,\n };\n const realKey = await this._realKey(key);\n const ret = await this.mget([realKey]);\n return ret?.[0] ?? (await this._missingFetchBlob(realKey, allOpts));\n }\n}\n\nexport interface BackedBlobStoreOptions extends BlobStoreOptions {\n backingStore: BaseStore;\n}\n\nexport class BackedBlobStore extends BlobStore {\n backingStore: BaseStore;\n\n constructor(opts: BackedBlobStoreOptions) {\n super(opts);\n this.backingStore = opts.backingStore;\n }\n\n mdelete(keys: string[]): Promise {\n return this.backingStore.mdelete(keys);\n }\n\n mget(keys: string[]): Promise<(MediaBlob | undefined)[]> {\n return this.backingStore.mget(keys);\n }\n\n mset(keyValuePairs: [string, MediaBlob][]): Promise {\n return this.backingStore.mset(keyValuePairs);\n }\n\n yieldKeys(prefix: string | undefined): AsyncGenerator {\n return this.backingStore.yieldKeys(prefix);\n }\n}\n\nexport class SimpleWebBlobStore extends BlobStore {", - "repo_full_name": "langchain-ai/langchainjs", - "discussion_comments": [ - { - "comment_id": "1667178276", - "repo_full_name": "langchain-ai/langchainjs", - "pr_number": 5835, - "pr_file": "libs/langchain-google-common/src/experimental/utils/media_core.ts", - "discussion_id": "1667178276", - "commented_code": "@@ -0,0 +1,504 @@\n+import { v4 as uuidv4 } from \"uuid\";\n+import { BaseStore } from \"@langchain/core/stores\";\n+import { Serializable } from \"@langchain/core/load/serializable\";\n+\n+export interface MediaBlobParameters {\n+ data?: Blob;\n+\n+ metadata?: Record;\n+\n+ path?: string;\n+}\n+\n+/**\n+ * Represents a chunk of data that can be identified by the path where the\n+ * data is (or will be) located, along with optional metadata about the data.\n+ */\n+export class MediaBlob\n+ extends Serializable // FIXME - I'm not sure this serializes or deserializes correctly\n+ implements MediaBlobParameters\n+{\n+ lc_serializable = true;\n+\n+ lc_namespace = [\"langchain\", \"google-common\"]; // FIXME - What should this be? And why?\n+\n+ data?: Blob;\n+\n+ // eslint-disable-next-line @typescript-eslint/no-explicit-any\n+ metadata?: Record;\n+\n+ path?: string;\n+\n+ constructor(params?: MediaBlobParameters) {\n+ super(params);\n+ this.data = params?.data;\n+ this.metadata = params?.metadata;\n+ this.path = params?.path;\n+ }\n+\n+ get size(): number {\n+ return this.data?.size ?? 0;\n+ }\n+\n+ get dataType(): string {\n+ return this.data?.type ?? \"\";\n+ }\n+\n+ get encoding(): string {\n+ const charsetEquals = this.dataType.indexOf(\"charset=\");\n+ return charsetEquals === -1\n+ ? \"utf-8\"\n+ : this.dataType.substring(charsetEquals + 8);\n+ }\n+\n+ get mimetype(): string {\n+ const semicolon = this.dataType.indexOf(\";\");\n+ return semicolon === -1\n+ ? this.dataType\n+ : this.dataType.substring(0, semicolon);\n+ }\n+\n+ async asString(): Promise {\n+ const data = this.data ?? new Blob([]);\n+ const dataBuffer = await data.arrayBuffer();\n+ const dataArray = new Uint8Array(dataBuffer);\n+\n+ // Need to handle the array in smaller chunks to deal with stack size limits\n+ let ret = \"\";\n+ const chunkSize = 102400;\n+ for (let i = 0; i < dataArray.length; i += chunkSize) {\n+ const chunk = dataArray.subarray(i, i + chunkSize);\n+ ret += String.fromCharCode(...chunk);\n+ }\n+\n+ return ret;\n+ }\n+\n+ async asBase64(): Promise {\n+ return btoa(await this.asString());\n+ }\n+\n+ async asDataUrl(): Promise {\n+ return `data:${this.mimetype};base64,${await this.asBase64()}`;\n+ }\n+\n+ async asUri(): Promise {\n+ return this.path ?? (await this.asDataUrl());\n+ }\n+\n+ async encode(): Promise<{ encoded: string; encoding: string }> {\n+ const dataUrl = await this.asDataUrl();\n+ const comma = dataUrl.indexOf(\",\");\n+ const encoded = dataUrl.substring(comma + 1);\n+ const encoding: string = dataUrl.indexOf(\"base64\") > -1 ? \"base64\" : \"8bit\";\n+ return {\n+ encoded,\n+ encoding,\n+ };\n+ }\n+}\n+\n+export type ActionIfInvalidAction =\n+ | \"ignore\"\n+ | \"prefixPath\"\n+ | \"prefixUuid\"\n+ | \"removePath\";\n+\n+export interface BlobStoreStoreOptions {\n+ /**\n+ * If the path is missing or invalid in the blob, how should we create\n+ * a new path?\n+ * Subclasses may define their own methods, but the following are supported\n+ * by default:\n+ * - Undefined or an emtpy string: Reject the blob\n+ * - \"ignore\": Attempt to store it anyway (but this may fail)\n+ * - \"prefixPath\": Use the default prefix for the BlobStore and get the\n+ * unique portion from the URL. The original path is stored in the metadata\n+ * - \"prefixUuid\": Use the default prefix for the BlobStore and get the\n+ * unique portion from a generated UUID. The original path is stored\n+ * in the metadata\n+ */\n+ actionIfInvalid?: ActionIfInvalidAction;\n+\n+ /**\n+ * The expected prefix for URIs that are stored.\n+ * This may be used to test if a MediaBlob is valid and used to create a new\n+ * path if \"prefixPath\" or \"prefixUuid\" is set for actionIfInvalid.\n+ */\n+ pathPrefix?: string;\n+}\n+\n+export type ActionIfBlobMissingAction = \"emptyBlob\";\n+\n+export interface BlobStoreFetchOptions {\n+ /**\n+ * If the blob is not found when fetching, what should we do?\n+ * Subclasses may define their own methods, but the following are supported\n+ * by default:\n+ * - Undefined or an empty string: return undefined\n+ * - \"emptyBlob\": return a new MediaBlob that has the path set, but nothing else.\n+ */\n+ actionIfBlobMissing?: ActionIfBlobMissingAction;\n+}\n+\n+export interface BlobStoreOptions {\n+ defaultStoreOptions?: BlobStoreStoreOptions;\n+\n+ defaultFetchOptions?: BlobStoreFetchOptions;\n+}\n+\n+/**\n+ * A specialized Store that is designed to handle MediaBlobs and use the\n+ * key that is included in the blob to determine exactly how it is stored.\n+ *\n+ * The full details of a MediaBlob may be changed when it is stored.\n+ * For example, it may get additional or different Metadata. This should be\n+ * what is returned when the store() method is called.\n+ *\n+ * Although BlobStore extends BaseStore, not all of the methods from\n+ * BaseStore may be implemented (or even possible). Those that are not\n+ * implemented should be documented and throw an Error if called.\n+ */\n+export abstract class BlobStore extends BaseStore {\n+ lc_namespace = [\"langchain\", \"google-common\"]; // FIXME - What should this be? And why?\n+\n+ defaultStoreOptions: BlobStoreStoreOptions;\n+\n+ defaultFetchOptions: BlobStoreFetchOptions;\n+\n+ constructor(opts?: BlobStoreOptions) {\n+ super(opts);\n+ this.defaultStoreOptions = opts?.defaultStoreOptions ?? {};\n+ this.defaultFetchOptions = opts?.defaultFetchOptions ?? {};\n+ }\n+\n+ protected async _realKey(key: string | MediaBlob): Promise {\n+ return typeof key === \"string\" ? key : await key.asUri();\n+ }\n+\n+ /**\n+ * Is the path set in the MediaBlob supported by this BlobStore?\n+ * Subclasses must implement and evaluate `blob.path` to make this\n+ * determination.\n+ *\n+ * Although this is async, this is expected to be a relatively fast operation\n+ * (ie - you shouldn't make network calls).\n+ *\n+ * The default implementation assumes that undefined blob.paths are invalid\n+ * and then uses the replacePathPrefix (or an empty string) as an assumed path\n+ * to start with.\n+ *\n+ * @param blob The blob to test\n+ * @param opts Any options (if needed) that may be used to determine if it is valid\n+ * @return If the string represented by blob.path is supported.\n+ */\n+ protected _hasValidPath(\n+ blob: MediaBlob,\n+ opts?: BlobStoreStoreOptions\n+ ): Promise {\n+ const path = blob.path ?? \"\";\n+ const prefix = opts?.pathPrefix ?? \"\";\n+ const isPrefixed =\n+ typeof blob.path !== \"undefined\" && path.startsWith(prefix);\n+ return Promise.resolve(isPrefixed);\n+ }\n+\n+ protected _blobPathSuffix(blob: MediaBlob): string {\n+ // Get the path currently set and make sure we treat it as a string\n+ const blobPath = `${blob.path}`;\n+\n+ // Advance past the first set of /\n+ let pathStart = blobPath.indexOf(\"/\") + 1;\n+ while (blobPath.charAt(pathStart) === \"/\") {\n+ pathStart += 1;\n+ }\n+\n+ // We will use the rest as the path for a replacement\n+ return blobPath.substring(pathStart);\n+ }\n+\n+ protected async _newBlob(\n+ oldBlob: MediaBlob,\n+ newPath?: string\n+ ): Promise {\n+ const oldPath = oldBlob.path;\n+ const metadata = oldBlob?.metadata ?? {};\n+ metadata.langchainOldPath = oldPath;\n+ const newBlob = new MediaBlob({\n+ ...oldBlob,\n+ metadata,\n+ });\n+ if (newPath) {\n+ newBlob.path = newPath;\n+ } else if (newBlob.path) {\n+ delete newBlob.path;\n+ }\n+ return newBlob;\n+ }\n+\n+ protected async _validBlobPrefixPath(\n+ blob: MediaBlob,\n+ opts?: BlobStoreStoreOptions\n+ ): Promise {\n+ const prefix = opts?.pathPrefix ?? \"\";\n+ const suffix = this._blobPathSuffix(blob);\n+ const newPath = `${prefix}${suffix}`;\n+ return this._newBlob(blob, newPath);\n+ }\n+\n+ protected async _validBlobPrefixUuid(\n+ blob: MediaBlob,\n+ opts?: BlobStoreStoreOptions\n+ ): Promise {\n+ const prefix = opts?.pathPrefix ?? \"\";\n+ const suffix = uuidv4(); // TODO - option to specify version?\n+ const newPath = `${prefix}${suffix}`;\n+ return this._newBlob(blob, newPath);\n+ }\n+\n+ protected async _validBlobRemovePath(\n+ blob: MediaBlob,\n+ _opts?: BlobStoreStoreOptions\n+ ): Promise {\n+ return this._newBlob(blob, undefined);\n+ }\n+\n+ /**\n+ * Based on the blob and options, return a blob that has a valid path\n+ * that can be saved.\n+ * @param blob\n+ * @param opts\n+ */\n+ protected async _validStoreBlob(\n+ blob: MediaBlob,\n+ opts?: BlobStoreStoreOptions\n+ ): Promise {\n+ if (await this._hasValidPath(blob, opts)) {\n+ return blob;\n+ }\n+ switch (opts?.actionIfInvalid) {\n+ case \"ignore\":\n+ return blob;\n+ case \"prefixPath\":\n+ return this._validBlobPrefixPath(blob, opts);\n+ case \"prefixUuid\":\n+ return this._validBlobPrefixUuid(blob, opts);\n+ case \"removePath\":\n+ return this._validBlobRemovePath(blob, opts);\n+ default:\n+ return undefined;\n+ }\n+ }\n+\n+ async store(\n+ blob: MediaBlob,\n+ opts: BlobStoreStoreOptions = {}\n+ ): Promise {\n+ const allOpts: BlobStoreStoreOptions = {\n+ ...this.defaultStoreOptions,\n+ ...opts,\n+ };\n+ const validBlob = await this._validStoreBlob(blob, allOpts);\n+ if (typeof validBlob !== \"undefined\") {\n+ const validKey = await validBlob.asUri();\n+ await this.mset([[validKey, validBlob]]);\n+ const savedKey = await validBlob.asUri();\n+ return await this.fetch(savedKey);\n+ }\n+ return undefined;\n+ }\n+\n+ protected async _missingFetchBlobEmpty(\n+ path: string,\n+ _opts?: BlobStoreFetchOptions\n+ ): Promise {\n+ return new MediaBlob({ path });\n+ }\n+\n+ protected async _missingFetchBlob(\n+ path: string,\n+ opts?: BlobStoreFetchOptions\n+ ): Promise {\n+ switch (opts?.actionIfBlobMissing) {\n+ case \"emptyBlob\":\n+ return this._missingFetchBlobEmpty(path, opts);\n+ default:\n+ return undefined;\n+ }\n+ }\n+\n+ async fetch(\n+ key: string | MediaBlob,\n+ opts: BlobStoreFetchOptions = {}\n+ ): Promise {\n+ const allOpts: BlobStoreFetchOptions = {\n+ ...this.defaultFetchOptions,\n+ ...opts,\n+ };\n+ const realKey = await this._realKey(key);\n+ const ret = await this.mget([realKey]);\n+ return ret?.[0] ?? (await this._missingFetchBlob(realKey, allOpts));\n+ }\n+}\n+\n+export interface BackedBlobStoreOptions extends BlobStoreOptions {\n+ backingStore: BaseStore;\n+}\n+\n+export class BackedBlobStore extends BlobStore {\n+ backingStore: BaseStore;\n+\n+ constructor(opts: BackedBlobStoreOptions) {\n+ super(opts);\n+ this.backingStore = opts.backingStore;\n+ }\n+\n+ mdelete(keys: string[]): Promise {\n+ return this.backingStore.mdelete(keys);\n+ }\n+\n+ mget(keys: string[]): Promise<(MediaBlob | undefined)[]> {\n+ return this.backingStore.mget(keys);\n+ }\n+\n+ mset(keyValuePairs: [string, MediaBlob][]): Promise {\n+ return this.backingStore.mset(keyValuePairs);\n+ }\n+\n+ yieldKeys(prefix: string | undefined): AsyncGenerator {\n+ return this.backingStore.yieldKeys(prefix);\n+ }\n+}\n+\n+export class SimpleWebBlobStore extends BlobStore {", - "comment_created_at": "2024-07-05T21:49:09+00:00", - "comment_author": "eyurtsev", - "comment_body": "If this code can be executed on a server, then it allows for an SSRF attack", - "pr_file_module": null - }, - { - "comment_id": "1669466959", - "repo_full_name": "langchain-ai/langchainjs", - "pr_number": 5835, - "pr_file": "libs/langchain-google-common/src/experimental/utils/media_core.ts", - "discussion_id": "1667178276", - "commented_code": "@@ -0,0 +1,504 @@\n+import { v4 as uuidv4 } from \"uuid\";\n+import { BaseStore } from \"@langchain/core/stores\";\n+import { Serializable } from \"@langchain/core/load/serializable\";\n+\n+export interface MediaBlobParameters {\n+ data?: Blob;\n+\n+ metadata?: Record;\n+\n+ path?: string;\n+}\n+\n+/**\n+ * Represents a chunk of data that can be identified by the path where the\n+ * data is (or will be) located, along with optional metadata about the data.\n+ */\n+export class MediaBlob\n+ extends Serializable // FIXME - I'm not sure this serializes or deserializes correctly\n+ implements MediaBlobParameters\n+{\n+ lc_serializable = true;\n+\n+ lc_namespace = [\"langchain\", \"google-common\"]; // FIXME - What should this be? And why?\n+\n+ data?: Blob;\n+\n+ // eslint-disable-next-line @typescript-eslint/no-explicit-any\n+ metadata?: Record;\n+\n+ path?: string;\n+\n+ constructor(params?: MediaBlobParameters) {\n+ super(params);\n+ this.data = params?.data;\n+ this.metadata = params?.metadata;\n+ this.path = params?.path;\n+ }\n+\n+ get size(): number {\n+ return this.data?.size ?? 0;\n+ }\n+\n+ get dataType(): string {\n+ return this.data?.type ?? \"\";\n+ }\n+\n+ get encoding(): string {\n+ const charsetEquals = this.dataType.indexOf(\"charset=\");\n+ return charsetEquals === -1\n+ ? \"utf-8\"\n+ : this.dataType.substring(charsetEquals + 8);\n+ }\n+\n+ get mimetype(): string {\n+ const semicolon = this.dataType.indexOf(\";\");\n+ return semicolon === -1\n+ ? this.dataType\n+ : this.dataType.substring(0, semicolon);\n+ }\n+\n+ async asString(): Promise {\n+ const data = this.data ?? new Blob([]);\n+ const dataBuffer = await data.arrayBuffer();\n+ const dataArray = new Uint8Array(dataBuffer);\n+\n+ // Need to handle the array in smaller chunks to deal with stack size limits\n+ let ret = \"\";\n+ const chunkSize = 102400;\n+ for (let i = 0; i < dataArray.length; i += chunkSize) {\n+ const chunk = dataArray.subarray(i, i + chunkSize);\n+ ret += String.fromCharCode(...chunk);\n+ }\n+\n+ return ret;\n+ }\n+\n+ async asBase64(): Promise {\n+ return btoa(await this.asString());\n+ }\n+\n+ async asDataUrl(): Promise {\n+ return `data:${this.mimetype};base64,${await this.asBase64()}`;\n+ }\n+\n+ async asUri(): Promise {\n+ return this.path ?? (await this.asDataUrl());\n+ }\n+\n+ async encode(): Promise<{ encoded: string; encoding: string }> {\n+ const dataUrl = await this.asDataUrl();\n+ const comma = dataUrl.indexOf(\",\");\n+ const encoded = dataUrl.substring(comma + 1);\n+ const encoding: string = dataUrl.indexOf(\"base64\") > -1 ? \"base64\" : \"8bit\";\n+ return {\n+ encoded,\n+ encoding,\n+ };\n+ }\n+}\n+\n+export type ActionIfInvalidAction =\n+ | \"ignore\"\n+ | \"prefixPath\"\n+ | \"prefixUuid\"\n+ | \"removePath\";\n+\n+export interface BlobStoreStoreOptions {\n+ /**\n+ * If the path is missing or invalid in the blob, how should we create\n+ * a new path?\n+ * Subclasses may define their own methods, but the following are supported\n+ * by default:\n+ * - Undefined or an emtpy string: Reject the blob\n+ * - \"ignore\": Attempt to store it anyway (but this may fail)\n+ * - \"prefixPath\": Use the default prefix for the BlobStore and get the\n+ * unique portion from the URL. The original path is stored in the metadata\n+ * - \"prefixUuid\": Use the default prefix for the BlobStore and get the\n+ * unique portion from a generated UUID. The original path is stored\n+ * in the metadata\n+ */\n+ actionIfInvalid?: ActionIfInvalidAction;\n+\n+ /**\n+ * The expected prefix for URIs that are stored.\n+ * This may be used to test if a MediaBlob is valid and used to create a new\n+ * path if \"prefixPath\" or \"prefixUuid\" is set for actionIfInvalid.\n+ */\n+ pathPrefix?: string;\n+}\n+\n+export type ActionIfBlobMissingAction = \"emptyBlob\";\n+\n+export interface BlobStoreFetchOptions {\n+ /**\n+ * If the blob is not found when fetching, what should we do?\n+ * Subclasses may define their own methods, but the following are supported\n+ * by default:\n+ * - Undefined or an empty string: return undefined\n+ * - \"emptyBlob\": return a new MediaBlob that has the path set, but nothing else.\n+ */\n+ actionIfBlobMissing?: ActionIfBlobMissingAction;\n+}\n+\n+export interface BlobStoreOptions {\n+ defaultStoreOptions?: BlobStoreStoreOptions;\n+\n+ defaultFetchOptions?: BlobStoreFetchOptions;\n+}\n+\n+/**\n+ * A specialized Store that is designed to handle MediaBlobs and use the\n+ * key that is included in the blob to determine exactly how it is stored.\n+ *\n+ * The full details of a MediaBlob may be changed when it is stored.\n+ * For example, it may get additional or different Metadata. This should be\n+ * what is returned when the store() method is called.\n+ *\n+ * Although BlobStore extends BaseStore, not all of the methods from\n+ * BaseStore may be implemented (or even possible). Those that are not\n+ * implemented should be documented and throw an Error if called.\n+ */\n+export abstract class BlobStore extends BaseStore {\n+ lc_namespace = [\"langchain\", \"google-common\"]; // FIXME - What should this be? And why?\n+\n+ defaultStoreOptions: BlobStoreStoreOptions;\n+\n+ defaultFetchOptions: BlobStoreFetchOptions;\n+\n+ constructor(opts?: BlobStoreOptions) {\n+ super(opts);\n+ this.defaultStoreOptions = opts?.defaultStoreOptions ?? {};\n+ this.defaultFetchOptions = opts?.defaultFetchOptions ?? {};\n+ }\n+\n+ protected async _realKey(key: string | MediaBlob): Promise {\n+ return typeof key === \"string\" ? key : await key.asUri();\n+ }\n+\n+ /**\n+ * Is the path set in the MediaBlob supported by this BlobStore?\n+ * Subclasses must implement and evaluate `blob.path` to make this\n+ * determination.\n+ *\n+ * Although this is async, this is expected to be a relatively fast operation\n+ * (ie - you shouldn't make network calls).\n+ *\n+ * The default implementation assumes that undefined blob.paths are invalid\n+ * and then uses the replacePathPrefix (or an empty string) as an assumed path\n+ * to start with.\n+ *\n+ * @param blob The blob to test\n+ * @param opts Any options (if needed) that may be used to determine if it is valid\n+ * @return If the string represented by blob.path is supported.\n+ */\n+ protected _hasValidPath(\n+ blob: MediaBlob,\n+ opts?: BlobStoreStoreOptions\n+ ): Promise {\n+ const path = blob.path ?? \"\";\n+ const prefix = opts?.pathPrefix ?? \"\";\n+ const isPrefixed =\n+ typeof blob.path !== \"undefined\" && path.startsWith(prefix);\n+ return Promise.resolve(isPrefixed);\n+ }\n+\n+ protected _blobPathSuffix(blob: MediaBlob): string {\n+ // Get the path currently set and make sure we treat it as a string\n+ const blobPath = `${blob.path}`;\n+\n+ // Advance past the first set of /\n+ let pathStart = blobPath.indexOf(\"/\") + 1;\n+ while (blobPath.charAt(pathStart) === \"/\") {\n+ pathStart += 1;\n+ }\n+\n+ // We will use the rest as the path for a replacement\n+ return blobPath.substring(pathStart);\n+ }\n+\n+ protected async _newBlob(\n+ oldBlob: MediaBlob,\n+ newPath?: string\n+ ): Promise {\n+ const oldPath = oldBlob.path;\n+ const metadata = oldBlob?.metadata ?? {};\n+ metadata.langchainOldPath = oldPath;\n+ const newBlob = new MediaBlob({\n+ ...oldBlob,\n+ metadata,\n+ });\n+ if (newPath) {\n+ newBlob.path = newPath;\n+ } else if (newBlob.path) {\n+ delete newBlob.path;\n+ }\n+ return newBlob;\n+ }\n+\n+ protected async _validBlobPrefixPath(\n+ blob: MediaBlob,\n+ opts?: BlobStoreStoreOptions\n+ ): Promise {\n+ const prefix = opts?.pathPrefix ?? \"\";\n+ const suffix = this._blobPathSuffix(blob);\n+ const newPath = `${prefix}${suffix}`;\n+ return this._newBlob(blob, newPath);\n+ }\n+\n+ protected async _validBlobPrefixUuid(\n+ blob: MediaBlob,\n+ opts?: BlobStoreStoreOptions\n+ ): Promise {\n+ const prefix = opts?.pathPrefix ?? \"\";\n+ const suffix = uuidv4(); // TODO - option to specify version?\n+ const newPath = `${prefix}${suffix}`;\n+ return this._newBlob(blob, newPath);\n+ }\n+\n+ protected async _validBlobRemovePath(\n+ blob: MediaBlob,\n+ _opts?: BlobStoreStoreOptions\n+ ): Promise {\n+ return this._newBlob(blob, undefined);\n+ }\n+\n+ /**\n+ * Based on the blob and options, return a blob that has a valid path\n+ * that can be saved.\n+ * @param blob\n+ * @param opts\n+ */\n+ protected async _validStoreBlob(\n+ blob: MediaBlob,\n+ opts?: BlobStoreStoreOptions\n+ ): Promise {\n+ if (await this._hasValidPath(blob, opts)) {\n+ return blob;\n+ }\n+ switch (opts?.actionIfInvalid) {\n+ case \"ignore\":\n+ return blob;\n+ case \"prefixPath\":\n+ return this._validBlobPrefixPath(blob, opts);\n+ case \"prefixUuid\":\n+ return this._validBlobPrefixUuid(blob, opts);\n+ case \"removePath\":\n+ return this._validBlobRemovePath(blob, opts);\n+ default:\n+ return undefined;\n+ }\n+ }\n+\n+ async store(\n+ blob: MediaBlob,\n+ opts: BlobStoreStoreOptions = {}\n+ ): Promise {\n+ const allOpts: BlobStoreStoreOptions = {\n+ ...this.defaultStoreOptions,\n+ ...opts,\n+ };\n+ const validBlob = await this._validStoreBlob(blob, allOpts);\n+ if (typeof validBlob !== \"undefined\") {\n+ const validKey = await validBlob.asUri();\n+ await this.mset([[validKey, validBlob]]);\n+ const savedKey = await validBlob.asUri();\n+ return await this.fetch(savedKey);\n+ }\n+ return undefined;\n+ }\n+\n+ protected async _missingFetchBlobEmpty(\n+ path: string,\n+ _opts?: BlobStoreFetchOptions\n+ ): Promise {\n+ return new MediaBlob({ path });\n+ }\n+\n+ protected async _missingFetchBlob(\n+ path: string,\n+ opts?: BlobStoreFetchOptions\n+ ): Promise {\n+ switch (opts?.actionIfBlobMissing) {\n+ case \"emptyBlob\":\n+ return this._missingFetchBlobEmpty(path, opts);\n+ default:\n+ return undefined;\n+ }\n+ }\n+\n+ async fetch(\n+ key: string | MediaBlob,\n+ opts: BlobStoreFetchOptions = {}\n+ ): Promise {\n+ const allOpts: BlobStoreFetchOptions = {\n+ ...this.defaultFetchOptions,\n+ ...opts,\n+ };\n+ const realKey = await this._realKey(key);\n+ const ret = await this.mget([realKey]);\n+ return ret?.[0] ?? (await this._missingFetchBlob(realKey, allOpts));\n+ }\n+}\n+\n+export interface BackedBlobStoreOptions extends BlobStoreOptions {\n+ backingStore: BaseStore;\n+}\n+\n+export class BackedBlobStore extends BlobStore {\n+ backingStore: BaseStore;\n+\n+ constructor(opts: BackedBlobStoreOptions) {\n+ super(opts);\n+ this.backingStore = opts.backingStore;\n+ }\n+\n+ mdelete(keys: string[]): Promise {\n+ return this.backingStore.mdelete(keys);\n+ }\n+\n+ mget(keys: string[]): Promise<(MediaBlob | undefined)[]> {\n+ return this.backingStore.mget(keys);\n+ }\n+\n+ mset(keyValuePairs: [string, MediaBlob][]): Promise {\n+ return this.backingStore.mset(keyValuePairs);\n+ }\n+\n+ yieldKeys(prefix: string | undefined): AsyncGenerator {\n+ return this.backingStore.yieldKeys(prefix);\n+ }\n+}\n+\n+export class SimpleWebBlobStore extends BlobStore {", - "comment_created_at": "2024-07-09T00:10:10+00:00", - "comment_author": "afirstenberg", - "comment_body": "A fair point.\r\n\r\nHowever, we have code that is already doing this in the Gemini implementations. (See https://github.com/langchain-ai/langchain-google/blob/7c678dcc59bb4e4d7a1efdbb9572d74b75e87665/libs/genai/langchain_google_genai/_image_utils.py#L137 for where it is done in Python, for example.)\r\n\r\nMaking this a separate class puts controls of the URL retrieval in the hands of the developer. the `SimpleWebBlobStore` is just what it says - simple. But it can serve as a base for developers to make subclasses (that do things such as authentication if necessary) that still work against a \"normal\" http/https URL.\r\n\r\nAnd we do need *something* that does the fetch for the `MediaManager` so that Gemini can work the same as OpenAI models (where you're expected to pass a public URL for images). Making it have the same signature as other `BlobStore`s makes sense for flexibility.", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/mxnet-avoid-eval-function.json b/_reviewers/mxnet-avoid-eval-function.json new file mode 100644 index 0000000..c798cd5 --- /dev/null +++ b/_reviewers/mxnet-avoid-eval-function.json @@ -0,0 +1,36 @@ +[ + { + "discussion_id": "250018330", + "pr_number": 13735, + "pr_file": "example/gluon/wavenet/trainer.py", + "created_at": "2019-01-23T00:46:51+00:00", + "commented_code": "# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements. See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership. The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License. You may obtain a copy of the License at\n#\n# http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied. See the License for the\n# specific language governing permissions and limitations\n# under the License.\n\"\"\"\nModule: WaveNet trainer modulep\n\"\"\"\nimport sys\nimport numpy as np\nimport mxnet as mx\nfrom mxnet import gluon, autograd, nd\nfrom tqdm import trange\n\nfrom models import WaveNet\nfrom utils import decode_mu_law\nfrom data_loader import load_wav, data_generation, data_generation_sample\n# pylint: disable=invalid-name, too-many-arguments, too-many-instance-attributes, no-member, no-self-use\n# set gpu count\ndef setting_ctx(use_gpu):\n \"\"\"\n Description : setting cpu/gpu\n \"\"\"\n if eval(use_gpu):", + "repo_full_name": "apache/mxnet", + "discussion_comments": [ + { + "comment_id": "250018330", + "repo_full_name": "apache/mxnet", + "pr_number": 13735, + "pr_file": "example/gluon/wavenet/trainer.py", + "discussion_id": "250018330", + "commented_code": "@@ -0,0 +1,130 @@\n+# Licensed to the Apache Software Foundation (ASF) under one\n+# or more contributor license agreements. See the NOTICE file\n+# distributed with this work for additional information\n+# regarding copyright ownership. The ASF licenses this file\n+# to you under the Apache License, Version 2.0 (the\n+# \"License\"); you may not use this file except in compliance\n+# with the License. You may obtain a copy of the License at\n+#\n+# http://www.apache.org/licenses/LICENSE-2.0\n+#\n+# Unless required by applicable law or agreed to in writing,\n+# software distributed under the License is distributed on an\n+# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n+# KIND, either express or implied. See the License for the\n+# specific language governing permissions and limitations\n+# under the License.\n+\"\"\"\n+Module: WaveNet trainer modulep\n+\"\"\"\n+import sys\n+import numpy as np\n+import mxnet as mx\n+from mxnet import gluon, autograd, nd\n+from tqdm import trange\n+\n+from models import WaveNet\n+from utils import decode_mu_law\n+from data_loader import load_wav, data_generation, data_generation_sample\n+# pylint: disable=invalid-name, too-many-arguments, too-many-instance-attributes, no-member, no-self-use\n+# set gpu count\n+def setting_ctx(use_gpu):\n+ \"\"\"\n+ Description : setting cpu/gpu\n+ \"\"\"\n+ if eval(use_gpu):", + "comment_created_at": "2019-01-23T00:46:51+00:00", + "comment_author": "ThomasDelteil", + "comment_body": "please do not use eval here", + "pr_file_module": null + }, + { + "comment_id": "252286024", + "repo_full_name": "apache/mxnet", + "pr_number": 13735, + "pr_file": "example/gluon/wavenet/trainer.py", + "discussion_id": "250018330", + "commented_code": "@@ -0,0 +1,130 @@\n+# Licensed to the Apache Software Foundation (ASF) under one\n+# or more contributor license agreements. See the NOTICE file\n+# distributed with this work for additional information\n+# regarding copyright ownership. The ASF licenses this file\n+# to you under the Apache License, Version 2.0 (the\n+# \"License\"); you may not use this file except in compliance\n+# with the License. You may obtain a copy of the License at\n+#\n+# http://www.apache.org/licenses/LICENSE-2.0\n+#\n+# Unless required by applicable law or agreed to in writing,\n+# software distributed under the License is distributed on an\n+# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n+# KIND, either express or implied. See the License for the\n+# specific language governing permissions and limitations\n+# under the License.\n+\"\"\"\n+Module: WaveNet trainer modulep\n+\"\"\"\n+import sys\n+import numpy as np\n+import mxnet as mx\n+from mxnet import gluon, autograd, nd\n+from tqdm import trange\n+\n+from models import WaveNet\n+from utils import decode_mu_law\n+from data_loader import load_wav, data_generation, data_generation_sample\n+# pylint: disable=invalid-name, too-many-arguments, too-many-instance-attributes, no-member, no-self-use\n+# set gpu count\n+def setting_ctx(use_gpu):\n+ \"\"\"\n+ Description : setting cpu/gpu\n+ \"\"\"\n+ if eval(use_gpu):", + "comment_created_at": "2019-01-30T14:46:27+00:00", + "comment_author": "seujung", + "comment_body": "delete eval and change the use_gpu mode", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/mxnet-avoid-eval-function.md b/_reviewers/mxnet-avoid-eval-function.md index 674f627..a8abbb0 100644 --- a/_reviewers/mxnet-avoid-eval-function.md +++ b/_reviewers/mxnet-avoid-eval-function.md @@ -29,41 +29,3 @@ def setting_ctx(use_gpu): ``` For other type conversions, use appropriate functions like `int()`, `float()`, or `json.loads()` to safely parse data without executing code. - - -[ - { - "discussion_id": "250018330", - "pr_number": 13735, - "pr_file": "example/gluon/wavenet/trainer.py", - "created_at": "2019-01-23T00:46:51+00:00", - "commented_code": "# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements. See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership. The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License. You may obtain a copy of the License at\n#\n# http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied. See the License for the\n# specific language governing permissions and limitations\n# under the License.\n\"\"\"\nModule: WaveNet trainer modulep\n\"\"\"\nimport sys\nimport numpy as np\nimport mxnet as mx\nfrom mxnet import gluon, autograd, nd\nfrom tqdm import trange\n\nfrom models import WaveNet\nfrom utils import decode_mu_law\nfrom data_loader import load_wav, data_generation, data_generation_sample\n# pylint: disable=invalid-name, too-many-arguments, too-many-instance-attributes, no-member, no-self-use\n# set gpu count\ndef setting_ctx(use_gpu):\n \"\"\"\n Description : setting cpu/gpu\n \"\"\"\n if eval(use_gpu):", - "repo_full_name": "apache/mxnet", - "discussion_comments": [ - { - "comment_id": "250018330", - "repo_full_name": "apache/mxnet", - "pr_number": 13735, - "pr_file": "example/gluon/wavenet/trainer.py", - "discussion_id": "250018330", - "commented_code": "@@ -0,0 +1,130 @@\n+# Licensed to the Apache Software Foundation (ASF) under one\n+# or more contributor license agreements. See the NOTICE file\n+# distributed with this work for additional information\n+# regarding copyright ownership. The ASF licenses this file\n+# to you under the Apache License, Version 2.0 (the\n+# \"License\"); you may not use this file except in compliance\n+# with the License. You may obtain a copy of the License at\n+#\n+# http://www.apache.org/licenses/LICENSE-2.0\n+#\n+# Unless required by applicable law or agreed to in writing,\n+# software distributed under the License is distributed on an\n+# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n+# KIND, either express or implied. See the License for the\n+# specific language governing permissions and limitations\n+# under the License.\n+\"\"\"\n+Module: WaveNet trainer modulep\n+\"\"\"\n+import sys\n+import numpy as np\n+import mxnet as mx\n+from mxnet import gluon, autograd, nd\n+from tqdm import trange\n+\n+from models import WaveNet\n+from utils import decode_mu_law\n+from data_loader import load_wav, data_generation, data_generation_sample\n+# pylint: disable=invalid-name, too-many-arguments, too-many-instance-attributes, no-member, no-self-use\n+# set gpu count\n+def setting_ctx(use_gpu):\n+ \"\"\"\n+ Description : setting cpu/gpu\n+ \"\"\"\n+ if eval(use_gpu):", - "comment_created_at": "2019-01-23T00:46:51+00:00", - "comment_author": "ThomasDelteil", - "comment_body": "please do not use eval here", - "pr_file_module": null - }, - { - "comment_id": "252286024", - "repo_full_name": "apache/mxnet", - "pr_number": 13735, - "pr_file": "example/gluon/wavenet/trainer.py", - "discussion_id": "250018330", - "commented_code": "@@ -0,0 +1,130 @@\n+# Licensed to the Apache Software Foundation (ASF) under one\n+# or more contributor license agreements. See the NOTICE file\n+# distributed with this work for additional information\n+# regarding copyright ownership. The ASF licenses this file\n+# to you under the Apache License, Version 2.0 (the\n+# \"License\"); you may not use this file except in compliance\n+# with the License. You may obtain a copy of the License at\n+#\n+# http://www.apache.org/licenses/LICENSE-2.0\n+#\n+# Unless required by applicable law or agreed to in writing,\n+# software distributed under the License is distributed on an\n+# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n+# KIND, either express or implied. See the License for the\n+# specific language governing permissions and limitations\n+# under the License.\n+\"\"\"\n+Module: WaveNet trainer modulep\n+\"\"\"\n+import sys\n+import numpy as np\n+import mxnet as mx\n+from mxnet import gluon, autograd, nd\n+from tqdm import trange\n+\n+from models import WaveNet\n+from utils import decode_mu_law\n+from data_loader import load_wav, data_generation, data_generation_sample\n+# pylint: disable=invalid-name, too-many-arguments, too-many-instance-attributes, no-member, no-self-use\n+# set gpu count\n+def setting_ctx(use_gpu):\n+ \"\"\"\n+ Description : setting cpu/gpu\n+ \"\"\"\n+ if eval(use_gpu):", - "comment_created_at": "2019-01-30T14:46:27+00:00", - "comment_author": "seujung", - "comment_body": "delete eval and change the use_gpu mode", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/mxnet-avoid-redundant-calculations.json b/_reviewers/mxnet-avoid-redundant-calculations.json new file mode 100644 index 0000000..c0fb030 --- /dev/null +++ b/_reviewers/mxnet-avoid-redundant-calculations.json @@ -0,0 +1,102 @@ +[ + { + "discussion_id": "764007192", + "pr_number": 20757, + "pr_file": "src/operator/nn/dnnl/dnnl_split.cc", + "created_at": "2021-12-07T13:46:24+00:00", + "commented_code": "/*\n * Licensed to the Apache Software Foundation (ASF) under one\n * or more contributor license agreements. See the NOTICE file\n * distributed with this work for additional information\n * regarding copyright ownership. The ASF licenses this file\n * to you under the Apache License, Version 2.0 (the\n * \"License\"); you may not use this file except in compliance\n * with the License. You may obtain a copy of the License at\n *\n * http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing,\n * software distributed under the License is distributed on an\n * \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n * KIND, either express or implied. See the License for the\n * specific language governing permissions and limitations\n * under the License.\n */\n\n/*!\n * \\file dnnl_split.cc\n */\n\n#if MXNET_USE_ONEDNN == 1\n\n#include \"../../tensor/matrix_op-inl.h\"\n#include \"./dnnl_split-inl.h\"\n\nnamespace mxnet {\nnamespace op {\n\nbool SupportDNNLSplit(const NDArray& input) {\n static const std::set supported_dtypes = {\n mshadow::kFloat32, mshadow::kBfloat16, mshadow::kInt32, mshadow::kInt8, mshadow::kUint8};\n return supported_dtypes.count(input.dtype());\n}\n\nvoid DNNLSplitForward(const nnvm::NodeAttrs& attrs,\n const OpContext& ctx,\n const std::vector& inputs,\n const std::vector& req,\n const std::vector& outputs) {\n const SplitParam& param = dmlc::get(attrs.parsed);\n const auto tensors = DNNLSplitFwd::Tensors(inputs[0], outputs);\n\n const auto& ishape = tensors.input.shape();\n const int split_axis = param.axis >= 0 ? param.axis : param.axis + ishape.ndim();\n const mxnet::TShape split_pts =\n (param.sections > 0) ? GetSplitIndices(tensors.input.shape(), split_axis, param.sections) :\n param.indices;\n\n const auto fwd = DNNLSplitFwd::GetCached(param, tensors, split_pts, split_axis);\n fwd.Execute(tensors, split_pts, split_axis, req);\n}\n\nDNNLSplitFwd::Tensors::Tensors(const NDArray& input, const std::vector& outputs)\n : input(input), outputs(outputs) {}\n\ntypedef ParamOpSign DNNLSplitSignature;\n\nDNNLSplitFwd DNNLSplitFwd::GetCached(const SplitParam& param,\n const Tensors& tensors,\n const TShape& split_pts,\n const int split_axis) {\n#if DMLC_CXX11_THREAD_LOCAL\n static thread_local std::unordered_map fwds;\n#else\n static MX_THREAD_LOCAL std::unordered_map fwds;\n#endif\n\n DNNLSplitSignature key(param);\n key.AddSign(tensors.input);\n key.AddSign(tensors.outputs);\n key.AddSign(split_pts);\n key.AddSign(split_axis);\n auto it = fwds.find(key);\n if (it == fwds.end()) {\n DNNLSplitFwd fwd(tensors, split_pts, split_axis);\n it = AddToCache(&fwds, key, fwd);\n }\n return it->second;\n}\n\nDNNLSplitFwd::DNNLSplitFwd(const Tensors& tensors, const TShape& split_pts, const int split_axis) {\n const auto cpu_engine = CpuEngine::Get()->get_engine();\n const auto input = tensors.input.Reorder2Default();\n const auto& ishape = input.shape();\n const auto& dtype = get_dnnl_type(input.dtype());\n const auto format_tag = static_cast(GetDefaultFormat(ishape.ndim()));\n\n dnnl::memory::dims strides(ishape.ndim(), 1);\n // last dim stride = 1, start loop from the penultimate\n for (int i = ishape.ndim() - 2; i >= 0; --i) {\n strides[i] = strides[i + 1] * ishape[i + 1];", + "repo_full_name": "apache/mxnet", + "discussion_comments": [ + { + "comment_id": "764007192", + "repo_full_name": "apache/mxnet", + "pr_number": 20757, + "pr_file": "src/operator/nn/dnnl/dnnl_split.cc", + "discussion_id": "764007192", + "commented_code": "@@ -0,0 +1,155 @@\n+/*\n+ * Licensed to the Apache Software Foundation (ASF) under one\n+ * or more contributor license agreements. See the NOTICE file\n+ * distributed with this work for additional information\n+ * regarding copyright ownership. The ASF licenses this file\n+ * to you under the Apache License, Version 2.0 (the\n+ * \"License\"); you may not use this file except in compliance\n+ * with the License. You may obtain a copy of the License at\n+ *\n+ * http://www.apache.org/licenses/LICENSE-2.0\n+ *\n+ * Unless required by applicable law or agreed to in writing,\n+ * software distributed under the License is distributed on an\n+ * \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n+ * KIND, either express or implied. See the License for the\n+ * specific language governing permissions and limitations\n+ * under the License.\n+ */\n+\n+/*!\n+ * \\file dnnl_split.cc\n+ */\n+\n+#if MXNET_USE_ONEDNN == 1\n+\n+#include \"../../tensor/matrix_op-inl.h\"\n+#include \"./dnnl_split-inl.h\"\n+\n+namespace mxnet {\n+namespace op {\n+\n+bool SupportDNNLSplit(const NDArray& input) {\n+ static const std::set supported_dtypes = {\n+ mshadow::kFloat32, mshadow::kBfloat16, mshadow::kInt32, mshadow::kInt8, mshadow::kUint8};\n+ return supported_dtypes.count(input.dtype());\n+}\n+\n+void DNNLSplitForward(const nnvm::NodeAttrs& attrs,\n+ const OpContext& ctx,\n+ const std::vector& inputs,\n+ const std::vector& req,\n+ const std::vector& outputs) {\n+ const SplitParam& param = dmlc::get(attrs.parsed);\n+ const auto tensors = DNNLSplitFwd::Tensors(inputs[0], outputs);\n+\n+ const auto& ishape = tensors.input.shape();\n+ const int split_axis = param.axis >= 0 ? param.axis : param.axis + ishape.ndim();\n+ const mxnet::TShape split_pts =\n+ (param.sections > 0) ? GetSplitIndices(tensors.input.shape(), split_axis, param.sections) :\n+ param.indices;\n+\n+ const auto fwd = DNNLSplitFwd::GetCached(param, tensors, split_pts, split_axis);\n+ fwd.Execute(tensors, split_pts, split_axis, req);\n+}\n+\n+DNNLSplitFwd::Tensors::Tensors(const NDArray& input, const std::vector& outputs)\n+ : input(input), outputs(outputs) {}\n+\n+typedef ParamOpSign DNNLSplitSignature;\n+\n+DNNLSplitFwd DNNLSplitFwd::GetCached(const SplitParam& param,\n+ const Tensors& tensors,\n+ const TShape& split_pts,\n+ const int split_axis) {\n+#if DMLC_CXX11_THREAD_LOCAL\n+ static thread_local std::unordered_map fwds;\n+#else\n+ static MX_THREAD_LOCAL std::unordered_map fwds;\n+#endif\n+\n+ DNNLSplitSignature key(param);\n+ key.AddSign(tensors.input);\n+ key.AddSign(tensors.outputs);\n+ key.AddSign(split_pts);\n+ key.AddSign(split_axis);\n+ auto it = fwds.find(key);\n+ if (it == fwds.end()) {\n+ DNNLSplitFwd fwd(tensors, split_pts, split_axis);\n+ it = AddToCache(&fwds, key, fwd);\n+ }\n+ return it->second;\n+}\n+\n+DNNLSplitFwd::DNNLSplitFwd(const Tensors& tensors, const TShape& split_pts, const int split_axis) {\n+ const auto cpu_engine = CpuEngine::Get()->get_engine();\n+ const auto input = tensors.input.Reorder2Default();\n+ const auto& ishape = input.shape();\n+ const auto& dtype = get_dnnl_type(input.dtype());\n+ const auto format_tag = static_cast(GetDefaultFormat(ishape.ndim()));\n+\n+ dnnl::memory::dims strides(ishape.ndim(), 1);\n+ // last dim stride = 1, start loop from the penultimate\n+ for (int i = ishape.ndim() - 2; i >= 0; --i) {\n+ strides[i] = strides[i + 1] * ishape[i + 1];", + "comment_created_at": "2021-12-07T13:46:24+00:00", + "comment_author": "agrabows", + "comment_body": "This vector could be an attribute of DNNLSplitFwd class to avoid duplicate loop in Execute() function.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "746524936", + "pr_number": 20724, + "pr_file": "src/operator/subgraph/dnnl/dnnl_post_quantize_property.h", + "created_at": "2021-11-10T12:06:59+00:00", + "commented_code": "#define MXNET_OPERATOR_SUBGRAPH_DNNL_DNNL_POST_QUANTIZE_PROPERTY_H_\n#if MXNET_USE_ONEDNN == 1\n\n#include \n#include \n#include \n#include \n\n#include \"../../nn/dnnl/dnnl_convolution-inl.h\"\n#include \"../../nn/fully_connected-inl.h\"\n#include \"../../quantization/requantize-inl.h\"\n#include \"../../tensor/elemwise_binary_op-inl.h\"\n#include \"../common.h\"\n#include \"dnnl_conv-inl.h\"\n#include \"dnnl_subgraph_base-inl.h\"\n\nnamespace mxnet {\nnamespace op {\n\nclass SgDNNLPostQuantizeSelector : public SubgraphSelector {\nconst std::set support_req_fusion_op = {\"_contrib_quantized_elemwise_add\",\n \"_contrib_quantized_elemwise_mul\",\n \"_contrib_quantized_npi_add\",\n \"_sg_onednn_conv\",\n \"_sg_onednn_fully_connected\",\n \"_sg_onednn_selfatt_qk\",\n \"_sg_onednn_selfatt_valatt\",\n \"_sg_onednn_batch_dot\"};\n\nclass SgDNNLPostQuantizeSelector : public SubgraphSelectorV2 {\n public:\n /*! \\brief pattern match status */\n enum SelectStatus {\n kFail = 0,\n kStart,\n kRequantize,\n kSuccess,\n };\n\n private:\n bool disable_fuse_all;\n bool disable_float_output;\n SelectStatus status;\n std::vector matched_list;\n std::vector matched_list;\n std::set support_requantize_fusion_op_name;\n\n public:\n SgDNNLPostQuantizeSelector() {\n support_requantize_fusion_op_name.insert(\"_sg_onednn_conv\");\n support_requantize_fusion_op_name.insert(\"_contrib_quantized_elemwise_add\");\n support_requantize_fusion_op_name.insert(\"_contrib_quantized_npi_add\");\n explicit SgDNNLPostQuantizeSelector(const bool dis_fuse_all, const bool dis_float_output)\n : disable_fuse_all(dis_fuse_all), disable_float_output(dis_float_output) {\n support_requantize_fusion_op_name = support_req_fusion_op;\n }\n\n bool Select(const nnvm::Node& n) override {\n if (n.op() && support_requantize_fusion_op_name.count(n.op()->name)) {\n if (n.op() == Op::Get(\"_sg_onednn_conv\")) {\n auto const& param = nnvm::get(n.attrs.parsed);\n if (param.full_conv_param.dnnl_param.quantized) {\n status = kStart;\n matched_list.clear();\n matched_list.push_back(&n);\n return true;\n }\n } else if (n.op()->name == \"_contrib_quantized_elemwise_add\" ||\n n.op()->name == \"_contrib_quantized_npi_add\") {\n status = kStart;\n matched_list.clear();\n matched_list.push_back(&n);\n return true;\n }\n bool Select(const BiDirectedNode& n) override {\n const nnvm::Node* raw_node = n.node;\n if ((!disable_fuse_all) && raw_node->op() &&\n support_requantize_fusion_op_name.count(raw_node->op()->name)) {\n status = kStart;\n matched_list.clear();\n matched_list.push_back(&n);", + "repo_full_name": "apache/mxnet", + "discussion_comments": [ + { + "comment_id": "746524936", + "repo_full_name": "apache/mxnet", + "pr_number": 20724, + "pr_file": "src/operator/subgraph/dnnl/dnnl_post_quantize_property.h", + "discussion_id": "746524936", + "commented_code": "@@ -20,146 +20,209 @@\n #define MXNET_OPERATOR_SUBGRAPH_DNNL_DNNL_POST_QUANTIZE_PROPERTY_H_\n #if MXNET_USE_ONEDNN == 1\n \n+#include \n #include \n #include \n #include \n \n #include \"../../nn/dnnl/dnnl_convolution-inl.h\"\n+#include \"../../nn/fully_connected-inl.h\"\n #include \"../../quantization/requantize-inl.h\"\n+#include \"../../tensor/elemwise_binary_op-inl.h\"\n #include \"../common.h\"\n #include \"dnnl_conv-inl.h\"\n #include \"dnnl_subgraph_base-inl.h\"\n \n namespace mxnet {\n namespace op {\n \n-class SgDNNLPostQuantizeSelector : public SubgraphSelector {\n+const std::set support_req_fusion_op = {\"_contrib_quantized_elemwise_add\",\n+ \"_contrib_quantized_elemwise_mul\",\n+ \"_contrib_quantized_npi_add\",\n+ \"_sg_onednn_conv\",\n+ \"_sg_onednn_fully_connected\",\n+ \"_sg_onednn_selfatt_qk\",\n+ \"_sg_onednn_selfatt_valatt\",\n+ \"_sg_onednn_batch_dot\"};\n+\n+class SgDNNLPostQuantizeSelector : public SubgraphSelectorV2 {\n public:\n /*! \\brief pattern match status */\n enum SelectStatus {\n kFail = 0,\n kStart,\n+ kRequantize,\n kSuccess,\n };\n \n private:\n+ bool disable_fuse_all;\n+ bool disable_float_output;\n SelectStatus status;\n- std::vector matched_list;\n+ std::vector matched_list;\n std::set support_requantize_fusion_op_name;\n \n public:\n- SgDNNLPostQuantizeSelector() {\n- support_requantize_fusion_op_name.insert(\"_sg_onednn_conv\");\n- support_requantize_fusion_op_name.insert(\"_contrib_quantized_elemwise_add\");\n- support_requantize_fusion_op_name.insert(\"_contrib_quantized_npi_add\");\n+ explicit SgDNNLPostQuantizeSelector(const bool dis_fuse_all, const bool dis_float_output)\n+ : disable_fuse_all(dis_fuse_all), disable_float_output(dis_float_output) {\n+ support_requantize_fusion_op_name = support_req_fusion_op;\n }\n \n- bool Select(const nnvm::Node& n) override {\n- if (n.op() && support_requantize_fusion_op_name.count(n.op()->name)) {\n- if (n.op() == Op::Get(\"_sg_onednn_conv\")) {\n- auto const& param = nnvm::get(n.attrs.parsed);\n- if (param.full_conv_param.dnnl_param.quantized) {\n- status = kStart;\n- matched_list.clear();\n- matched_list.push_back(&n);\n- return true;\n- }\n- } else if (n.op()->name == \"_contrib_quantized_elemwise_add\" ||\n- n.op()->name == \"_contrib_quantized_npi_add\") {\n- status = kStart;\n- matched_list.clear();\n- matched_list.push_back(&n);\n- return true;\n- }\n+ bool Select(const BiDirectedNode& n) override {\n+ const nnvm::Node* raw_node = n.node;\n+ if ((!disable_fuse_all) && raw_node->op() &&\n+ support_requantize_fusion_op_name.count(raw_node->op()->name)) {\n+ status = kStart;\n+ matched_list.clear();\n+ matched_list.push_back(&n);", + "comment_created_at": "2021-11-10T12:06:59+00:00", + "comment_author": "mozga-intel", + "comment_body": "If we clear the vector here, the capacity is the same as before; Then it might be better to use `emplace_back(&n)` instead of `push_back(n)` ~ I hope to avoid creating temp object before.\r\nHow about using?\r\n```suggestion\r\n matched_list.emplace_back(&n);\r\n```", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "744752804", + "pr_number": 20621, + "pr_file": "src/operator/nn/dnnl/dnnl_concat-inl.h", + "created_at": "2021-11-08T14:03:21+00:00", + "commented_code": "static DNNLConcatFwd& GetConcatForward(int concat_dim,\n const std::vector& in_data,\n const std::vector& data_md) {\n const std::vector& data_md,\n int cache_dim = -1) {\n#if DMLC_CXX11_THREAD_LOCAL\n static thread_local std::unordered_map fwds;\n#else\n static MX_THREAD_LOCAL std::unordered_map fwds;\n#endif\n if (cache_dim == -1) {\n cache_dim = concat_dim;", + "repo_full_name": "apache/mxnet", + "discussion_comments": [ + { + "comment_id": "744752804", + "repo_full_name": "apache/mxnet", + "pr_number": 20621, + "pr_file": "src/operator/nn/dnnl/dnnl_concat-inl.h", + "discussion_id": "744752804", + "commented_code": "@@ -52,13 +52,18 @@ class DNNLConcatFwd {\n \n static DNNLConcatFwd& GetConcatForward(int concat_dim,\n const std::vector& in_data,\n- const std::vector& data_md) {\n+ const std::vector& data_md,\n+ int cache_dim = -1) {\n #if DMLC_CXX11_THREAD_LOCAL\n static thread_local std::unordered_map fwds;\n #else\n static MX_THREAD_LOCAL std::unordered_map fwds;\n #endif\n+ if (cache_dim == -1) {\n+ cache_dim = concat_dim;", + "comment_created_at": "2021-11-08T14:03:21+00:00", + "comment_author": "mozga-intel", + "comment_body": "What is the advantage of adding the same value here (twofold: cache_dim if cache_dim == 1)?", + "pr_file_module": null + }, + { + "comment_id": "745458419", + "repo_full_name": "apache/mxnet", + "pr_number": 20621, + "pr_file": "src/operator/nn/dnnl/dnnl_concat-inl.h", + "discussion_id": "744752804", + "commented_code": "@@ -52,13 +52,18 @@ class DNNLConcatFwd {\n \n static DNNLConcatFwd& GetConcatForward(int concat_dim,\n const std::vector& in_data,\n- const std::vector& data_md) {\n+ const std::vector& data_md,\n+ int cache_dim = -1) {\n #if DMLC_CXX11_THREAD_LOCAL\n static thread_local std::unordered_map fwds;\n #else\n static MX_THREAD_LOCAL std::unordered_map fwds;\n #endif\n+ if (cache_dim == -1) {\n+ cache_dim = concat_dim;", + "comment_created_at": "2021-11-09T09:59:18+00:00", + "comment_author": "bgawrych", + "comment_body": "Changed to adding -1 value to key. It's necessary for stack op as there is used trick creating always 3 dimensional memory descriptor [leading_dim, mid_dim, trailing_dim]. For the same input shapes stack op can be performed on different axis. Without adding new key returned primitive would be the same for both scenarios which is wrong.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "249252790", + "pr_number": 13811, + "pr_file": "src/operator/contrib/erfinv-inl.h", + "created_at": "2019-01-19T18:14:11+00:00", + "commented_code": "/*\n * Copyright (c) 2014 Indiana University\n * All rights reserved.\n * Written by Prof. Gary L. Pavlis, Dept. of Geol. Sci.,\n * Indiana University, Bloomington, IN\n * This software is licensed under the New BSD license:\n * Redistribution and use in source and binary forms,\n * with or without modification, are permitted provided\n * that the following conditions are met:\n * Redistributions of source code must retain the above\n * copyright notice, this list of conditions and the\n * following disclaimer.\n * Redistributions in binary form must reproduce the\n * above copyright notice, this list of conditions and\n * the following disclaimer in the documentation and/or\n * other materials provided with the distribution.\n * Neither the name of Indiana University nor\n * the names of its contributors may be used to endorse\n * or promote products derived from this software without\n * specific prior written permission.\n * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND\n * CONTRIBUTORS \"AS IS\" AND ANY EXPRESS OR IMPLIED\n * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED\n * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A\n * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL\n * THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY\n * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR\n * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,\n * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF\n * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)\n * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER\n * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING\n * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE\n * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE\n * POSSIBILITY OF SUCH DAMAGE.\n */\n/*\n * The next function is taken from\n * https://github.com/antelopeusersgroup/antelope_contrib/blob/master/lib/location/libgenloc/erfinv.c.\n * Output was modified to be inf or -inf when input is 1 or -1.\n */\n#ifndef MXNET_OPERATOR_CONTRIB_ERFINV_INL_H_\n#define MXNET_OPERATOR_CONTRIB_ERFINV_INL_H_\n\n#define _USE_MATH_DEFINES\n#define CENTRAL_RANGE 0.7\n\n#include \n#include \n#include \"math.h\"\n\nnamespace mxnet {\nnamespace op {\nnamespace mshadow_op {\n\n/*! \\brief inverse gauss error function */\nstruct erfinv : public mxnet_op::tunable {\n template\n MSHADOW_XINLINE static DType Map(DType v) {\n /* Function to calculate inverse error function. Rational approximation\n is used to generate an initial approximation, which is then improved to\n full accuracy by two steps of Newton's method. Code is a direct\n translation of the erfinv m file in matlab version 2.0.\n Author: Gary L. Pavlis, Indiana University\n Date: February 1996\n */\n double y = static_cast(v);\n /*working variables */\n double x = 0.0;\n double z, num, dem;\n /* coefficients in rational expansion */\n double a[4]={ 0.886226899, -1.645349621, 0.914624893, -0.140543331};\n double b[4]={-2.118377725, 1.442710462, -0.329097515, 0.012229801};\n double c[4]={-1.970840454, -1.624906493, 3.429567803, 1.641345311};\n double d[2]={ 3.543889200, 1.637067800};\n if (fabs(y) > 1.0) {", + "repo_full_name": "apache/mxnet", + "discussion_comments": [ + { + "comment_id": "249252790", + "repo_full_name": "apache/mxnet", + "pr_number": 13811, + "pr_file": "src/operator/contrib/erfinv-inl.h", + "discussion_id": "249252790", + "commented_code": "@@ -0,0 +1,105 @@\n+/*\n+ * Copyright (c) 2014 Indiana University\n+ * All rights reserved.\n+ * Written by Prof. Gary L. Pavlis, Dept. of Geol. Sci.,\n+ * Indiana University, Bloomington, IN\n+ * This software is licensed under the New BSD license:\n+ * Redistribution and use in source and binary forms,\n+ * with or without modification, are permitted provided\n+ * that the following conditions are met:\n+ * Redistributions of source code must retain the above\n+ * copyright notice, this list of conditions and the\n+ * following disclaimer.\n+ * Redistributions in binary form must reproduce the\n+ * above copyright notice, this list of conditions and\n+ * the following disclaimer in the documentation and/or\n+ * other materials provided with the distribution.\n+ * Neither the name of Indiana University nor\n+ * the names of its contributors may be used to endorse\n+ * or promote products derived from this software without\n+ * specific prior written permission.\n+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND\n+ * CONTRIBUTORS \"AS IS\" AND ANY EXPRESS OR IMPLIED\n+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED\n+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A\n+ * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL\n+ * THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY\n+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR\n+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,\n+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF\n+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)\n+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER\n+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING\n+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE\n+ * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE\n+ * POSSIBILITY OF SUCH DAMAGE.\n+ */\n+/*\n+ * The next function is taken from\n+ * https://github.com/antelopeusersgroup/antelope_contrib/blob/master/lib/location/libgenloc/erfinv.c.\n+ * Output was modified to be inf or -inf when input is 1 or -1.\n+ */\n+#ifndef MXNET_OPERATOR_CONTRIB_ERFINV_INL_H_\n+#define MXNET_OPERATOR_CONTRIB_ERFINV_INL_H_\n+\n+#define _USE_MATH_DEFINES\n+#define CENTRAL_RANGE 0.7\n+\n+#include \n+#include \n+#include \"math.h\"\n+\n+namespace mxnet {\n+namespace op {\n+namespace mshadow_op {\n+\n+/*! \\brief inverse gauss error function */\n+struct erfinv : public mxnet_op::tunable {\n+ template\n+ MSHADOW_XINLINE static DType Map(DType v) {\n+ /* Function to calculate inverse error function. Rational approximation\n+ is used to generate an initial approximation, which is then improved to\n+ full accuracy by two steps of Newton's method. Code is a direct\n+ translation of the erfinv m file in matlab version 2.0.\n+ Author: Gary L. Pavlis, Indiana University\n+ Date: February 1996\n+ */\n+ double y = static_cast(v);\n+ /*working variables */\n+ double x = 0.0;\n+ double z, num, dem;\n+ /* coefficients in rational expansion */\n+ double a[4]={ 0.886226899, -1.645349621, 0.914624893, -0.140543331};\n+ double b[4]={-2.118377725, 1.442710462, -0.329097515, 0.012229801};\n+ double c[4]={-1.970840454, -1.624906493, 3.429567803, 1.641345311};\n+ double d[2]={ 3.543889200, 1.637067800};\n+ if (fabs(y) > 1.0) {", + "comment_created_at": "2019-01-19T18:14:11+00:00", + "comment_author": "eric-haibin-lin", + "comment_body": "Can we cache the value of fabs(y) instead of calculating it multiple times? ", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/mxnet-avoid-redundant-calculations.md b/_reviewers/mxnet-avoid-redundant-calculations.md index b0b089e..1e4887b 100644 --- a/_reviewers/mxnet-avoid-redundant-calculations.md +++ b/_reviewers/mxnet-avoid-redundant-calculations.md @@ -62,107 +62,3 @@ matched_list.emplace_back(&n); ``` These optimizations can significantly improve performance in computationally intensive applications by reducing unnecessary calculations, memory operations, and object constructions. - - -[ - { - "discussion_id": "764007192", - "pr_number": 20757, - "pr_file": "src/operator/nn/dnnl/dnnl_split.cc", - "created_at": "2021-12-07T13:46:24+00:00", - "commented_code": "/*\n * Licensed to the Apache Software Foundation (ASF) under one\n * or more contributor license agreements. See the NOTICE file\n * distributed with this work for additional information\n * regarding copyright ownership. The ASF licenses this file\n * to you under the Apache License, Version 2.0 (the\n * \"License\"); you may not use this file except in compliance\n * with the License. You may obtain a copy of the License at\n *\n * http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing,\n * software distributed under the License is distributed on an\n * \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n * KIND, either express or implied. See the License for the\n * specific language governing permissions and limitations\n * under the License.\n */\n\n/*!\n * \\file dnnl_split.cc\n */\n\n#if MXNET_USE_ONEDNN == 1\n\n#include \"../../tensor/matrix_op-inl.h\"\n#include \"./dnnl_split-inl.h\"\n\nnamespace mxnet {\nnamespace op {\n\nbool SupportDNNLSplit(const NDArray& input) {\n static const std::set supported_dtypes = {\n mshadow::kFloat32, mshadow::kBfloat16, mshadow::kInt32, mshadow::kInt8, mshadow::kUint8};\n return supported_dtypes.count(input.dtype());\n}\n\nvoid DNNLSplitForward(const nnvm::NodeAttrs& attrs,\n const OpContext& ctx,\n const std::vector& inputs,\n const std::vector& req,\n const std::vector& outputs) {\n const SplitParam& param = dmlc::get(attrs.parsed);\n const auto tensors = DNNLSplitFwd::Tensors(inputs[0], outputs);\n\n const auto& ishape = tensors.input.shape();\n const int split_axis = param.axis >= 0 ? param.axis : param.axis + ishape.ndim();\n const mxnet::TShape split_pts =\n (param.sections > 0) ? GetSplitIndices(tensors.input.shape(), split_axis, param.sections) :\n param.indices;\n\n const auto fwd = DNNLSplitFwd::GetCached(param, tensors, split_pts, split_axis);\n fwd.Execute(tensors, split_pts, split_axis, req);\n}\n\nDNNLSplitFwd::Tensors::Tensors(const NDArray& input, const std::vector& outputs)\n : input(input), outputs(outputs) {}\n\ntypedef ParamOpSign DNNLSplitSignature;\n\nDNNLSplitFwd DNNLSplitFwd::GetCached(const SplitParam& param,\n const Tensors& tensors,\n const TShape& split_pts,\n const int split_axis) {\n#if DMLC_CXX11_THREAD_LOCAL\n static thread_local std::unordered_map fwds;\n#else\n static MX_THREAD_LOCAL std::unordered_map fwds;\n#endif\n\n DNNLSplitSignature key(param);\n key.AddSign(tensors.input);\n key.AddSign(tensors.outputs);\n key.AddSign(split_pts);\n key.AddSign(split_axis);\n auto it = fwds.find(key);\n if (it == fwds.end()) {\n DNNLSplitFwd fwd(tensors, split_pts, split_axis);\n it = AddToCache(&fwds, key, fwd);\n }\n return it->second;\n}\n\nDNNLSplitFwd::DNNLSplitFwd(const Tensors& tensors, const TShape& split_pts, const int split_axis) {\n const auto cpu_engine = CpuEngine::Get()->get_engine();\n const auto input = tensors.input.Reorder2Default();\n const auto& ishape = input.shape();\n const auto& dtype = get_dnnl_type(input.dtype());\n const auto format_tag = static_cast(GetDefaultFormat(ishape.ndim()));\n\n dnnl::memory::dims strides(ishape.ndim(), 1);\n // last dim stride = 1, start loop from the penultimate\n for (int i = ishape.ndim() - 2; i >= 0; --i) {\n strides[i] = strides[i + 1] * ishape[i + 1];", - "repo_full_name": "apache/mxnet", - "discussion_comments": [ - { - "comment_id": "764007192", - "repo_full_name": "apache/mxnet", - "pr_number": 20757, - "pr_file": "src/operator/nn/dnnl/dnnl_split.cc", - "discussion_id": "764007192", - "commented_code": "@@ -0,0 +1,155 @@\n+/*\n+ * Licensed to the Apache Software Foundation (ASF) under one\n+ * or more contributor license agreements. See the NOTICE file\n+ * distributed with this work for additional information\n+ * regarding copyright ownership. The ASF licenses this file\n+ * to you under the Apache License, Version 2.0 (the\n+ * \"License\"); you may not use this file except in compliance\n+ * with the License. You may obtain a copy of the License at\n+ *\n+ * http://www.apache.org/licenses/LICENSE-2.0\n+ *\n+ * Unless required by applicable law or agreed to in writing,\n+ * software distributed under the License is distributed on an\n+ * \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n+ * KIND, either express or implied. See the License for the\n+ * specific language governing permissions and limitations\n+ * under the License.\n+ */\n+\n+/*!\n+ * \\file dnnl_split.cc\n+ */\n+\n+#if MXNET_USE_ONEDNN == 1\n+\n+#include \"../../tensor/matrix_op-inl.h\"\n+#include \"./dnnl_split-inl.h\"\n+\n+namespace mxnet {\n+namespace op {\n+\n+bool SupportDNNLSplit(const NDArray& input) {\n+ static const std::set supported_dtypes = {\n+ mshadow::kFloat32, mshadow::kBfloat16, mshadow::kInt32, mshadow::kInt8, mshadow::kUint8};\n+ return supported_dtypes.count(input.dtype());\n+}\n+\n+void DNNLSplitForward(const nnvm::NodeAttrs& attrs,\n+ const OpContext& ctx,\n+ const std::vector& inputs,\n+ const std::vector& req,\n+ const std::vector& outputs) {\n+ const SplitParam& param = dmlc::get(attrs.parsed);\n+ const auto tensors = DNNLSplitFwd::Tensors(inputs[0], outputs);\n+\n+ const auto& ishape = tensors.input.shape();\n+ const int split_axis = param.axis >= 0 ? param.axis : param.axis + ishape.ndim();\n+ const mxnet::TShape split_pts =\n+ (param.sections > 0) ? GetSplitIndices(tensors.input.shape(), split_axis, param.sections) :\n+ param.indices;\n+\n+ const auto fwd = DNNLSplitFwd::GetCached(param, tensors, split_pts, split_axis);\n+ fwd.Execute(tensors, split_pts, split_axis, req);\n+}\n+\n+DNNLSplitFwd::Tensors::Tensors(const NDArray& input, const std::vector& outputs)\n+ : input(input), outputs(outputs) {}\n+\n+typedef ParamOpSign DNNLSplitSignature;\n+\n+DNNLSplitFwd DNNLSplitFwd::GetCached(const SplitParam& param,\n+ const Tensors& tensors,\n+ const TShape& split_pts,\n+ const int split_axis) {\n+#if DMLC_CXX11_THREAD_LOCAL\n+ static thread_local std::unordered_map fwds;\n+#else\n+ static MX_THREAD_LOCAL std::unordered_map fwds;\n+#endif\n+\n+ DNNLSplitSignature key(param);\n+ key.AddSign(tensors.input);\n+ key.AddSign(tensors.outputs);\n+ key.AddSign(split_pts);\n+ key.AddSign(split_axis);\n+ auto it = fwds.find(key);\n+ if (it == fwds.end()) {\n+ DNNLSplitFwd fwd(tensors, split_pts, split_axis);\n+ it = AddToCache(&fwds, key, fwd);\n+ }\n+ return it->second;\n+}\n+\n+DNNLSplitFwd::DNNLSplitFwd(const Tensors& tensors, const TShape& split_pts, const int split_axis) {\n+ const auto cpu_engine = CpuEngine::Get()->get_engine();\n+ const auto input = tensors.input.Reorder2Default();\n+ const auto& ishape = input.shape();\n+ const auto& dtype = get_dnnl_type(input.dtype());\n+ const auto format_tag = static_cast(GetDefaultFormat(ishape.ndim()));\n+\n+ dnnl::memory::dims strides(ishape.ndim(), 1);\n+ // last dim stride = 1, start loop from the penultimate\n+ for (int i = ishape.ndim() - 2; i >= 0; --i) {\n+ strides[i] = strides[i + 1] * ishape[i + 1];", - "comment_created_at": "2021-12-07T13:46:24+00:00", - "comment_author": "agrabows", - "comment_body": "This vector could be an attribute of DNNLSplitFwd class to avoid duplicate loop in Execute() function.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "746524936", - "pr_number": 20724, - "pr_file": "src/operator/subgraph/dnnl/dnnl_post_quantize_property.h", - "created_at": "2021-11-10T12:06:59+00:00", - "commented_code": "#define MXNET_OPERATOR_SUBGRAPH_DNNL_DNNL_POST_QUANTIZE_PROPERTY_H_\n#if MXNET_USE_ONEDNN == 1\n\n#include \n#include \n#include \n#include \n\n#include \"../../nn/dnnl/dnnl_convolution-inl.h\"\n#include \"../../nn/fully_connected-inl.h\"\n#include \"../../quantization/requantize-inl.h\"\n#include \"../../tensor/elemwise_binary_op-inl.h\"\n#include \"../common.h\"\n#include \"dnnl_conv-inl.h\"\n#include \"dnnl_subgraph_base-inl.h\"\n\nnamespace mxnet {\nnamespace op {\n\nclass SgDNNLPostQuantizeSelector : public SubgraphSelector {\nconst std::set support_req_fusion_op = {\"_contrib_quantized_elemwise_add\",\n \"_contrib_quantized_elemwise_mul\",\n \"_contrib_quantized_npi_add\",\n \"_sg_onednn_conv\",\n \"_sg_onednn_fully_connected\",\n \"_sg_onednn_selfatt_qk\",\n \"_sg_onednn_selfatt_valatt\",\n \"_sg_onednn_batch_dot\"};\n\nclass SgDNNLPostQuantizeSelector : public SubgraphSelectorV2 {\n public:\n /*! \\brief pattern match status */\n enum SelectStatus {\n kFail = 0,\n kStart,\n kRequantize,\n kSuccess,\n };\n\n private:\n bool disable_fuse_all;\n bool disable_float_output;\n SelectStatus status;\n std::vector matched_list;\n std::vector matched_list;\n std::set support_requantize_fusion_op_name;\n\n public:\n SgDNNLPostQuantizeSelector() {\n support_requantize_fusion_op_name.insert(\"_sg_onednn_conv\");\n support_requantize_fusion_op_name.insert(\"_contrib_quantized_elemwise_add\");\n support_requantize_fusion_op_name.insert(\"_contrib_quantized_npi_add\");\n explicit SgDNNLPostQuantizeSelector(const bool dis_fuse_all, const bool dis_float_output)\n : disable_fuse_all(dis_fuse_all), disable_float_output(dis_float_output) {\n support_requantize_fusion_op_name = support_req_fusion_op;\n }\n\n bool Select(const nnvm::Node& n) override {\n if (n.op() && support_requantize_fusion_op_name.count(n.op()->name)) {\n if (n.op() == Op::Get(\"_sg_onednn_conv\")) {\n auto const& param = nnvm::get(n.attrs.parsed);\n if (param.full_conv_param.dnnl_param.quantized) {\n status = kStart;\n matched_list.clear();\n matched_list.push_back(&n);\n return true;\n }\n } else if (n.op()->name == \"_contrib_quantized_elemwise_add\" ||\n n.op()->name == \"_contrib_quantized_npi_add\") {\n status = kStart;\n matched_list.clear();\n matched_list.push_back(&n);\n return true;\n }\n bool Select(const BiDirectedNode& n) override {\n const nnvm::Node* raw_node = n.node;\n if ((!disable_fuse_all) && raw_node->op() &&\n support_requantize_fusion_op_name.count(raw_node->op()->name)) {\n status = kStart;\n matched_list.clear();\n matched_list.push_back(&n);", - "repo_full_name": "apache/mxnet", - "discussion_comments": [ - { - "comment_id": "746524936", - "repo_full_name": "apache/mxnet", - "pr_number": 20724, - "pr_file": "src/operator/subgraph/dnnl/dnnl_post_quantize_property.h", - "discussion_id": "746524936", - "commented_code": "@@ -20,146 +20,209 @@\n #define MXNET_OPERATOR_SUBGRAPH_DNNL_DNNL_POST_QUANTIZE_PROPERTY_H_\n #if MXNET_USE_ONEDNN == 1\n \n+#include \n #include \n #include \n #include \n \n #include \"../../nn/dnnl/dnnl_convolution-inl.h\"\n+#include \"../../nn/fully_connected-inl.h\"\n #include \"../../quantization/requantize-inl.h\"\n+#include \"../../tensor/elemwise_binary_op-inl.h\"\n #include \"../common.h\"\n #include \"dnnl_conv-inl.h\"\n #include \"dnnl_subgraph_base-inl.h\"\n \n namespace mxnet {\n namespace op {\n \n-class SgDNNLPostQuantizeSelector : public SubgraphSelector {\n+const std::set support_req_fusion_op = {\"_contrib_quantized_elemwise_add\",\n+ \"_contrib_quantized_elemwise_mul\",\n+ \"_contrib_quantized_npi_add\",\n+ \"_sg_onednn_conv\",\n+ \"_sg_onednn_fully_connected\",\n+ \"_sg_onednn_selfatt_qk\",\n+ \"_sg_onednn_selfatt_valatt\",\n+ \"_sg_onednn_batch_dot\"};\n+\n+class SgDNNLPostQuantizeSelector : public SubgraphSelectorV2 {\n public:\n /*! \\brief pattern match status */\n enum SelectStatus {\n kFail = 0,\n kStart,\n+ kRequantize,\n kSuccess,\n };\n \n private:\n+ bool disable_fuse_all;\n+ bool disable_float_output;\n SelectStatus status;\n- std::vector matched_list;\n+ std::vector matched_list;\n std::set support_requantize_fusion_op_name;\n \n public:\n- SgDNNLPostQuantizeSelector() {\n- support_requantize_fusion_op_name.insert(\"_sg_onednn_conv\");\n- support_requantize_fusion_op_name.insert(\"_contrib_quantized_elemwise_add\");\n- support_requantize_fusion_op_name.insert(\"_contrib_quantized_npi_add\");\n+ explicit SgDNNLPostQuantizeSelector(const bool dis_fuse_all, const bool dis_float_output)\n+ : disable_fuse_all(dis_fuse_all), disable_float_output(dis_float_output) {\n+ support_requantize_fusion_op_name = support_req_fusion_op;\n }\n \n- bool Select(const nnvm::Node& n) override {\n- if (n.op() && support_requantize_fusion_op_name.count(n.op()->name)) {\n- if (n.op() == Op::Get(\"_sg_onednn_conv\")) {\n- auto const& param = nnvm::get(n.attrs.parsed);\n- if (param.full_conv_param.dnnl_param.quantized) {\n- status = kStart;\n- matched_list.clear();\n- matched_list.push_back(&n);\n- return true;\n- }\n- } else if (n.op()->name == \"_contrib_quantized_elemwise_add\" ||\n- n.op()->name == \"_contrib_quantized_npi_add\") {\n- status = kStart;\n- matched_list.clear();\n- matched_list.push_back(&n);\n- return true;\n- }\n+ bool Select(const BiDirectedNode& n) override {\n+ const nnvm::Node* raw_node = n.node;\n+ if ((!disable_fuse_all) && raw_node->op() &&\n+ support_requantize_fusion_op_name.count(raw_node->op()->name)) {\n+ status = kStart;\n+ matched_list.clear();\n+ matched_list.push_back(&n);", - "comment_created_at": "2021-11-10T12:06:59+00:00", - "comment_author": "mozga-intel", - "comment_body": "If we clear the vector here, the capacity is the same as before; Then it might be better to use `emplace_back(&n)` instead of `push_back(n)` ~ I hope to avoid creating temp object before.\r\nHow about using?\r\n```suggestion\r\n matched_list.emplace_back(&n);\r\n```", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "744752804", - "pr_number": 20621, - "pr_file": "src/operator/nn/dnnl/dnnl_concat-inl.h", - "created_at": "2021-11-08T14:03:21+00:00", - "commented_code": "static DNNLConcatFwd& GetConcatForward(int concat_dim,\n const std::vector& in_data,\n const std::vector& data_md) {\n const std::vector& data_md,\n int cache_dim = -1) {\n#if DMLC_CXX11_THREAD_LOCAL\n static thread_local std::unordered_map fwds;\n#else\n static MX_THREAD_LOCAL std::unordered_map fwds;\n#endif\n if (cache_dim == -1) {\n cache_dim = concat_dim;", - "repo_full_name": "apache/mxnet", - "discussion_comments": [ - { - "comment_id": "744752804", - "repo_full_name": "apache/mxnet", - "pr_number": 20621, - "pr_file": "src/operator/nn/dnnl/dnnl_concat-inl.h", - "discussion_id": "744752804", - "commented_code": "@@ -52,13 +52,18 @@ class DNNLConcatFwd {\n \n static DNNLConcatFwd& GetConcatForward(int concat_dim,\n const std::vector& in_data,\n- const std::vector& data_md) {\n+ const std::vector& data_md,\n+ int cache_dim = -1) {\n #if DMLC_CXX11_THREAD_LOCAL\n static thread_local std::unordered_map fwds;\n #else\n static MX_THREAD_LOCAL std::unordered_map fwds;\n #endif\n+ if (cache_dim == -1) {\n+ cache_dim = concat_dim;", - "comment_created_at": "2021-11-08T14:03:21+00:00", - "comment_author": "mozga-intel", - "comment_body": "What is the advantage of adding the same value here (twofold: cache_dim if cache_dim == 1)?", - "pr_file_module": null - }, - { - "comment_id": "745458419", - "repo_full_name": "apache/mxnet", - "pr_number": 20621, - "pr_file": "src/operator/nn/dnnl/dnnl_concat-inl.h", - "discussion_id": "744752804", - "commented_code": "@@ -52,13 +52,18 @@ class DNNLConcatFwd {\n \n static DNNLConcatFwd& GetConcatForward(int concat_dim,\n const std::vector& in_data,\n- const std::vector& data_md) {\n+ const std::vector& data_md,\n+ int cache_dim = -1) {\n #if DMLC_CXX11_THREAD_LOCAL\n static thread_local std::unordered_map fwds;\n #else\n static MX_THREAD_LOCAL std::unordered_map fwds;\n #endif\n+ if (cache_dim == -1) {\n+ cache_dim = concat_dim;", - "comment_created_at": "2021-11-09T09:59:18+00:00", - "comment_author": "bgawrych", - "comment_body": "Changed to adding -1 value to key. It's necessary for stack op as there is used trick creating always 3 dimensional memory descriptor [leading_dim, mid_dim, trailing_dim]. For the same input shapes stack op can be performed on different axis. Without adding new key returned primitive would be the same for both scenarios which is wrong.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "249252790", - "pr_number": 13811, - "pr_file": "src/operator/contrib/erfinv-inl.h", - "created_at": "2019-01-19T18:14:11+00:00", - "commented_code": "/*\n * Copyright (c) 2014 Indiana University\n * All rights reserved.\n * Written by Prof. Gary L. Pavlis, Dept. of Geol. Sci.,\n * Indiana University, Bloomington, IN\n * This software is licensed under the New BSD license:\n * Redistribution and use in source and binary forms,\n * with or without modification, are permitted provided\n * that the following conditions are met:\n * Redistributions of source code must retain the above\n * copyright notice, this list of conditions and the\n * following disclaimer.\n * Redistributions in binary form must reproduce the\n * above copyright notice, this list of conditions and\n * the following disclaimer in the documentation and/or\n * other materials provided with the distribution.\n * Neither the name of Indiana University nor\n * the names of its contributors may be used to endorse\n * or promote products derived from this software without\n * specific prior written permission.\n * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND\n * CONTRIBUTORS \"AS IS\" AND ANY EXPRESS OR IMPLIED\n * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED\n * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A\n * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL\n * THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY\n * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR\n * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,\n * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF\n * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)\n * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER\n * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING\n * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE\n * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE\n * POSSIBILITY OF SUCH DAMAGE.\n */\n/*\n * The next function is taken from\n * https://github.com/antelopeusersgroup/antelope_contrib/blob/master/lib/location/libgenloc/erfinv.c.\n * Output was modified to be inf or -inf when input is 1 or -1.\n */\n#ifndef MXNET_OPERATOR_CONTRIB_ERFINV_INL_H_\n#define MXNET_OPERATOR_CONTRIB_ERFINV_INL_H_\n\n#define _USE_MATH_DEFINES\n#define CENTRAL_RANGE 0.7\n\n#include \n#include \n#include \"math.h\"\n\nnamespace mxnet {\nnamespace op {\nnamespace mshadow_op {\n\n/*! \\brief inverse gauss error function */\nstruct erfinv : public mxnet_op::tunable {\n template\n MSHADOW_XINLINE static DType Map(DType v) {\n /* Function to calculate inverse error function. Rational approximation\n is used to generate an initial approximation, which is then improved to\n full accuracy by two steps of Newton's method. Code is a direct\n translation of the erfinv m file in matlab version 2.0.\n Author: Gary L. Pavlis, Indiana University\n Date: February 1996\n */\n double y = static_cast(v);\n /*working variables */\n double x = 0.0;\n double z, num, dem;\n /* coefficients in rational expansion */\n double a[4]={ 0.886226899, -1.645349621, 0.914624893, -0.140543331};\n double b[4]={-2.118377725, 1.442710462, -0.329097515, 0.012229801};\n double c[4]={-1.970840454, -1.624906493, 3.429567803, 1.641345311};\n double d[2]={ 3.543889200, 1.637067800};\n if (fabs(y) > 1.0) {", - "repo_full_name": "apache/mxnet", - "discussion_comments": [ - { - "comment_id": "249252790", - "repo_full_name": "apache/mxnet", - "pr_number": 13811, - "pr_file": "src/operator/contrib/erfinv-inl.h", - "discussion_id": "249252790", - "commented_code": "@@ -0,0 +1,105 @@\n+/*\n+ * Copyright (c) 2014 Indiana University\n+ * All rights reserved.\n+ * Written by Prof. Gary L. Pavlis, Dept. of Geol. Sci.,\n+ * Indiana University, Bloomington, IN\n+ * This software is licensed under the New BSD license:\n+ * Redistribution and use in source and binary forms,\n+ * with or without modification, are permitted provided\n+ * that the following conditions are met:\n+ * Redistributions of source code must retain the above\n+ * copyright notice, this list of conditions and the\n+ * following disclaimer.\n+ * Redistributions in binary form must reproduce the\n+ * above copyright notice, this list of conditions and\n+ * the following disclaimer in the documentation and/or\n+ * other materials provided with the distribution.\n+ * Neither the name of Indiana University nor\n+ * the names of its contributors may be used to endorse\n+ * or promote products derived from this software without\n+ * specific prior written permission.\n+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND\n+ * CONTRIBUTORS \"AS IS\" AND ANY EXPRESS OR IMPLIED\n+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED\n+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A\n+ * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL\n+ * THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY\n+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR\n+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,\n+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF\n+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)\n+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER\n+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING\n+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE\n+ * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE\n+ * POSSIBILITY OF SUCH DAMAGE.\n+ */\n+/*\n+ * The next function is taken from\n+ * https://github.com/antelopeusersgroup/antelope_contrib/blob/master/lib/location/libgenloc/erfinv.c.\n+ * Output was modified to be inf or -inf when input is 1 or -1.\n+ */\n+#ifndef MXNET_OPERATOR_CONTRIB_ERFINV_INL_H_\n+#define MXNET_OPERATOR_CONTRIB_ERFINV_INL_H_\n+\n+#define _USE_MATH_DEFINES\n+#define CENTRAL_RANGE 0.7\n+\n+#include \n+#include \n+#include \"math.h\"\n+\n+namespace mxnet {\n+namespace op {\n+namespace mshadow_op {\n+\n+/*! \\brief inverse gauss error function */\n+struct erfinv : public mxnet_op::tunable {\n+ template\n+ MSHADOW_XINLINE static DType Map(DType v) {\n+ /* Function to calculate inverse error function. Rational approximation\n+ is used to generate an initial approximation, which is then improved to\n+ full accuracy by two steps of Newton's method. Code is a direct\n+ translation of the erfinv m file in matlab version 2.0.\n+ Author: Gary L. Pavlis, Indiana University\n+ Date: February 1996\n+ */\n+ double y = static_cast(v);\n+ /*working variables */\n+ double x = 0.0;\n+ double z, num, dem;\n+ /* coefficients in rational expansion */\n+ double a[4]={ 0.886226899, -1.645349621, 0.914624893, -0.140543331};\n+ double b[4]={-2.118377725, 1.442710462, -0.329097515, 0.012229801};\n+ double c[4]={-1.970840454, -1.624906493, 3.429567803, 1.641345311};\n+ double d[2]={ 3.543889200, 1.637067800};\n+ if (fabs(y) > 1.0) {", - "comment_created_at": "2019-01-19T18:14:11+00:00", - "comment_author": "eric-haibin-lin", - "comment_body": "Can we cache the value of fabs(y) instead of calculating it multiple times? ", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/mxnet-centralize-configuration-parameters.json b/_reviewers/mxnet-centralize-configuration-parameters.json new file mode 100644 index 0000000..e8f8079 --- /dev/null +++ b/_reviewers/mxnet-centralize-configuration-parameters.json @@ -0,0 +1,140 @@ +[ + { + "discussion_id": "725211061", + "pr_number": 20474, + "pr_file": "tools/staticbuild/build_lib.sh", + "created_at": "2021-10-08T18:09:55+00:00", + "commented_code": "-DCMAKE_OSX_DEPLOYMENT_TARGET=10.13 \\\n ..\nninja\nif [[ ! $PLATFORM == 'darwin' ]] && [[ $BLAS == 'mkl' ]]; then\n patchelf --set-rpath \"/opt/intel/oneapi/mkl/${INTEL_MKL}/lib/intel64/:\\$ORIGIN\" --force-rpath libmxnet.so", + "repo_full_name": "apache/mxnet", + "discussion_comments": [ + { + "comment_id": "725211061", + "repo_full_name": "apache/mxnet", + "pr_number": 20474, + "pr_file": "tools/staticbuild/build_lib.sh", + "discussion_id": "725211061", + "commented_code": "@@ -36,13 +40,18 @@ cmake -GNinja -C $cmake_config \\\n -DCMAKE_OSX_DEPLOYMENT_TARGET=10.13 \\\n ..\n ninja\n+if [[ ! $PLATFORM == 'darwin' ]] && [[ $BLAS == 'mkl' ]]; then\n+ patchelf --set-rpath \"/opt/intel/oneapi/mkl/${INTEL_MKL}/lib/intel64/:\\$ORIGIN\" --force-rpath libmxnet.so", + "comment_created_at": "2021-10-08T18:09:55+00:00", + "comment_author": "leezu", + "comment_body": "Will oneapi/mkl always be in `/opt/intel`? Is there a reason for not asking users to fix their run-time search path environment variables instead (which ideally would automatically be set correctly upon installation of oneapi/mkl)?", + "pr_file_module": null + }, + { + "comment_id": "725240417", + "repo_full_name": "apache/mxnet", + "pr_number": 20474, + "pr_file": "tools/staticbuild/build_lib.sh", + "discussion_id": "725211061", + "commented_code": "@@ -36,13 +40,18 @@ cmake -GNinja -C $cmake_config \\\n -DCMAKE_OSX_DEPLOYMENT_TARGET=10.13 \\\n ..\n ninja\n+if [[ ! $PLATFORM == 'darwin' ]] && [[ $BLAS == 'mkl' ]]; then\n+ patchelf --set-rpath \"/opt/intel/oneapi/mkl/${INTEL_MKL}/lib/intel64/:\\$ORIGIN\" --force-rpath libmxnet.so", + "comment_created_at": "2021-10-08T19:02:59+00:00", + "comment_author": "akarbown", + "comment_body": "_> Will oneapi/mkl always be in /opt/intel?_ \r\nI think that if the user do not define explicitly other location, the default location for oneMKL supposed to be /opt/intel/oneapi/ at least the way it was installed in the way as it is in the mkl.sh file. \r\n_> Is there a reason for not asking users to fix their run-time search path environment variables instead (which ideally would automatically be set correctly upon installation of oneapi/mkl)?_\r\nI've added it for the sake of the tests so that while running them (in the runtime) libmxnet.dylib could see the MKL libraries. It's not the best solution. Now, I think that maybe it would be better to ``source /opt/intel/oneapi/setvars.sh`` script just before the test execution (in the *.yml file). What do you think? Or did you have something else in mind?", + "pr_file_module": null + }, + { + "comment_id": "725695236", + "repo_full_name": "apache/mxnet", + "pr_number": 20474, + "pr_file": "tools/staticbuild/build_lib.sh", + "discussion_id": "725211061", + "commented_code": "@@ -36,13 +40,18 @@ cmake -GNinja -C $cmake_config \\\n -DCMAKE_OSX_DEPLOYMENT_TARGET=10.13 \\\n ..\n ninja\n+if [[ ! $PLATFORM == 'darwin' ]] && [[ $BLAS == 'mkl' ]]; then\n+ patchelf --set-rpath \"/opt/intel/oneapi/mkl/${INTEL_MKL}/lib/intel64/:\\$ORIGIN\" --force-rpath libmxnet.so", + "comment_created_at": "2021-10-10T20:41:27+00:00", + "comment_author": "leezu", + "comment_body": "I assume `source /opt/intel/oneapi/setvars.sh` is also what users would be expected to do if they install MKL on Mac? If so, I think that'll be more robust than hardcoding the rpath ", + "pr_file_module": null + }, + { + "comment_id": "725708632", + "repo_full_name": "apache/mxnet", + "pr_number": 20474, + "pr_file": "tools/staticbuild/build_lib.sh", + "discussion_id": "725211061", + "commented_code": "@@ -36,13 +40,18 @@ cmake -GNinja -C $cmake_config \\\n -DCMAKE_OSX_DEPLOYMENT_TARGET=10.13 \\\n ..\n ninja\n+if [[ ! $PLATFORM == 'darwin' ]] && [[ $BLAS == 'mkl' ]]; then\n+ patchelf --set-rpath \"/opt/intel/oneapi/mkl/${INTEL_MKL}/lib/intel64/:\\$ORIGIN\" --force-rpath libmxnet.so", + "comment_created_at": "2021-10-10T22:29:39+00:00", + "comment_author": "akarbown", + "comment_body": "Yes, sure. However, I've just realized that when linking with MKL static libraries there is no need to ``source /opt/intel/oneapi/setvars.sh``. It's mandatory in case of linking with MKL dynamic libraries. Thanks for pointing that out!", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "610025602", + "pr_number": 20138, + "pr_file": "ci/docker/runtime_functions.sh", + "created_at": "2021-04-08T19:21:08+00:00", + "commented_code": "export MXNET_SUBGRAPH_VERBOSE=0\n export MXNET_ENABLE_CYTHON=0\n export DMLC_LOG_STACK_TRACE_DEPTH=100\n OMP_NUM_THREADS=$(expr $(nproc) / 4) pytest -m 'not serial' -k 'not test_operator' -n 4 --durations=50 --cov-report xml:tests_unittest.xml --verbose tests/python/unittest\n OMP_NUM_THREADS=$(expr $(nproc) / 4) pytest -m 'not serial' -k 'not test_operator' -n 4 --durations=50 --cov=./contrib --cov=./python/mxnet --cov-report xml:tests_unittest.xml --verbose tests/python/unittest\n MXNET_ENGINE_TYPE=NaiveEngine \\\n OMP_NUM_THREADS=$(expr $(nproc) / 4) pytest -m 'not serial' -k 'test_operator' -n 4 --durations=50 --cov-report xml:tests_unittest.xml --cov-append --verbose tests/python/unittest\n pytest -m 'serial' --durations=50 --cov-report xml:tests_unittest.xml --cov-append --verbose tests/python/unittest\n pytest --durations=50 --cov-report xml:tests_mkl.xml --verbose tests/python/mkl\n OMP_NUM_THREADS=$(expr $(nproc) / 4) pytest -m 'not serial' -k 'test_operator' -n 4 --durations=50 --cov=./contrib --cov=./python/mxnet --cov-report xml:tests_unittest.xml --cov-append --verbose tests/python/unittest\n pytest -m 'serial' --durations=50 --cov=./contrib --cov=./python/mxnet --cov-report xml:tests_unittest.xml --cov-append --verbose tests/python/unittest\n pytest --durations=50 --cov=./contrib --cov=./python/mxnet --cov-report xml:tests_mkl.xml --verbose tests/python/mkl", + "repo_full_name": "apache/mxnet", + "discussion_comments": [ + { + "comment_id": "610025602", + "repo_full_name": "apache/mxnet", + "pr_number": 20138, + "pr_file": "ci/docker/runtime_functions.sh", + "discussion_id": "610025602", + "commented_code": "@@ -783,11 +783,11 @@ unittest_ubuntu_python3_cpu_onednn() {\n export MXNET_SUBGRAPH_VERBOSE=0\n export MXNET_ENABLE_CYTHON=0\n export DMLC_LOG_STACK_TRACE_DEPTH=100\n- OMP_NUM_THREADS=$(expr $(nproc) / 4) pytest -m 'not serial' -k 'not test_operator' -n 4 --durations=50 --cov-report xml:tests_unittest.xml --verbose tests/python/unittest\n+ OMP_NUM_THREADS=$(expr $(nproc) / 4) pytest -m 'not serial' -k 'not test_operator' -n 4 --durations=50 --cov=./contrib --cov=./python/mxnet --cov-report xml:tests_unittest.xml --verbose tests/python/unittest\n MXNET_ENGINE_TYPE=NaiveEngine \\\n- OMP_NUM_THREADS=$(expr $(nproc) / 4) pytest -m 'not serial' -k 'test_operator' -n 4 --durations=50 --cov-report xml:tests_unittest.xml --cov-append --verbose tests/python/unittest\n- pytest -m 'serial' --durations=50 --cov-report xml:tests_unittest.xml --cov-append --verbose tests/python/unittest\n- pytest --durations=50 --cov-report xml:tests_mkl.xml --verbose tests/python/mkl\n+ OMP_NUM_THREADS=$(expr $(nproc) / 4) pytest -m 'not serial' -k 'test_operator' -n 4 --durations=50 --cov=./contrib --cov=./python/mxnet --cov-report xml:tests_unittest.xml --cov-append --verbose tests/python/unittest\n+ pytest -m 'serial' --durations=50 --cov=./contrib --cov=./python/mxnet --cov-report xml:tests_unittest.xml --cov-append --verbose tests/python/unittest\n+ pytest --durations=50 --cov=./contrib --cov=./python/mxnet --cov-report xml:tests_mkl.xml --verbose tests/python/mkl", + "comment_created_at": "2021-04-08T19:21:08+00:00", + "comment_author": "marcoabreu", + "comment_body": "Can't we have this in some configuration file instead of inlining it everywhere?", + "pr_file_module": null + }, + { + "comment_id": "610034846", + "repo_full_name": "apache/mxnet", + "pr_number": 20138, + "pr_file": "ci/docker/runtime_functions.sh", + "discussion_id": "610025602", + "commented_code": "@@ -783,11 +783,11 @@ unittest_ubuntu_python3_cpu_onednn() {\n export MXNET_SUBGRAPH_VERBOSE=0\n export MXNET_ENABLE_CYTHON=0\n export DMLC_LOG_STACK_TRACE_DEPTH=100\n- OMP_NUM_THREADS=$(expr $(nproc) / 4) pytest -m 'not serial' -k 'not test_operator' -n 4 --durations=50 --cov-report xml:tests_unittest.xml --verbose tests/python/unittest\n+ OMP_NUM_THREADS=$(expr $(nproc) / 4) pytest -m 'not serial' -k 'not test_operator' -n 4 --durations=50 --cov=./contrib --cov=./python/mxnet --cov-report xml:tests_unittest.xml --verbose tests/python/unittest\n MXNET_ENGINE_TYPE=NaiveEngine \\\n- OMP_NUM_THREADS=$(expr $(nproc) / 4) pytest -m 'not serial' -k 'test_operator' -n 4 --durations=50 --cov-report xml:tests_unittest.xml --cov-append --verbose tests/python/unittest\n- pytest -m 'serial' --durations=50 --cov-report xml:tests_unittest.xml --cov-append --verbose tests/python/unittest\n- pytest --durations=50 --cov-report xml:tests_mkl.xml --verbose tests/python/mkl\n+ OMP_NUM_THREADS=$(expr $(nproc) / 4) pytest -m 'not serial' -k 'test_operator' -n 4 --durations=50 --cov=./contrib --cov=./python/mxnet --cov-report xml:tests_unittest.xml --cov-append --verbose tests/python/unittest\n+ pytest -m 'serial' --durations=50 --cov=./contrib --cov=./python/mxnet --cov-report xml:tests_unittest.xml --cov-append --verbose tests/python/unittest\n+ pytest --durations=50 --cov=./contrib --cov=./python/mxnet --cov-report xml:tests_mkl.xml --verbose tests/python/mkl", + "comment_created_at": "2021-04-08T19:32:54+00:00", + "comment_author": "barry-jin", + "comment_body": "Thanks for the suggestion. I will update it. ", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "651362257", + "pr_number": 20342, + "pr_file": "tools/staticbuild/build.sh", + "created_at": "2021-06-15T00:26:44+00:00", + "commented_code": "export PKG_CONFIG_PATH=$DEPS_PATH/lib/pkgconfig:$DEPS_PATH/lib64/pkgconfig:$DEPS_PATH/lib/x86_64-linux-gnu/pkgconfig:$PKG_CONFIG_PATH\nfi\nexport FC=\"gfortran\"\nexport CPATH=$DEPS_PATH/include:$CPATH\nif [[ $ARCH == 'aarch64' ]]; then\n export CPATH=/opt/arm/armpl_21.0_gcc-8.2/include_lp64_mp:$CPATH", + "repo_full_name": "apache/mxnet", + "discussion_comments": [ + { + "comment_id": "651362257", + "repo_full_name": "apache/mxnet", + "pr_number": 20342, + "pr_file": "tools/staticbuild/build.sh", + "discussion_id": "651362257", + "commented_code": "@@ -59,7 +59,11 @@ else\n export PKG_CONFIG_PATH=$DEPS_PATH/lib/pkgconfig:$DEPS_PATH/lib64/pkgconfig:$DEPS_PATH/lib/x86_64-linux-gnu/pkgconfig:$PKG_CONFIG_PATH\n fi\n export FC=\"gfortran\"\n-export CPATH=$DEPS_PATH/include:$CPATH\n+if [[ $ARCH == 'aarch64' ]]; then\n+ export CPATH=/opt/arm/armpl_21.0_gcc-8.2/include_lp64_mp:$CPATH", + "comment_created_at": "2021-06-15T00:26:44+00:00", + "comment_author": "Zha0q1", + "comment_body": "If the user installed a different armpl package then this will not work, is there a way to automatically detect the armpl path?", + "pr_file_module": null + }, + { + "comment_id": "651363271", + "repo_full_name": "apache/mxnet", + "pr_number": 20342, + "pr_file": "tools/staticbuild/build.sh", + "discussion_id": "651362257", + "commented_code": "@@ -59,7 +59,11 @@ else\n export PKG_CONFIG_PATH=$DEPS_PATH/lib/pkgconfig:$DEPS_PATH/lib64/pkgconfig:$DEPS_PATH/lib/x86_64-linux-gnu/pkgconfig:$PKG_CONFIG_PATH\n fi\n export FC=\"gfortran\"\n-export CPATH=$DEPS_PATH/include:$CPATH\n+if [[ $ARCH == 'aarch64' ]]; then\n+ export CPATH=/opt/arm/armpl_21.0_gcc-8.2/include_lp64_mp:$CPATH", + "comment_created_at": "2021-06-15T00:29:53+00:00", + "comment_author": "mseth10", + "comment_body": "does it work with other armpl versions? didn't we see an error when we were using gcc-10 armpl?", + "pr_file_module": null + }, + { + "comment_id": "651370471", + "repo_full_name": "apache/mxnet", + "pr_number": 20342, + "pr_file": "tools/staticbuild/build.sh", + "discussion_id": "651362257", + "commented_code": "@@ -59,7 +59,11 @@ else\n export PKG_CONFIG_PATH=$DEPS_PATH/lib/pkgconfig:$DEPS_PATH/lib64/pkgconfig:$DEPS_PATH/lib/x86_64-linux-gnu/pkgconfig:$PKG_CONFIG_PATH\n fi\n export FC=\"gfortran\"\n-export CPATH=$DEPS_PATH/include:$CPATH\n+if [[ $ARCH == 'aarch64' ]]; then\n+ export CPATH=/opt/arm/armpl_21.0_gcc-8.2/include_lp64_mp:$CPATH", + "comment_created_at": "2021-06-15T00:53:03+00:00", + "comment_author": "mseth10", + "comment_body": "we can ask users to specifically install `armpl_21.0_gcc-8.2` before installing mxnet wheel", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/mxnet-centralize-configuration-parameters.md b/_reviewers/mxnet-centralize-configuration-parameters.md index 9378d36..9490ea0 100644 --- a/_reviewers/mxnet-centralize-configuration-parameters.md +++ b/_reviewers/mxnet-centralize-configuration-parameters.md @@ -40,145 +40,3 @@ pytest ${TEST_COMMON_ARGS} ${UNITTEST_ARGS} tests/python/unittest ``` This approach improves portability across different environments and makes maintenance easier when configurations need to change. - - -[ - { - "discussion_id": "725211061", - "pr_number": 20474, - "pr_file": "tools/staticbuild/build_lib.sh", - "created_at": "2021-10-08T18:09:55+00:00", - "commented_code": "-DCMAKE_OSX_DEPLOYMENT_TARGET=10.13 \\\n ..\nninja\nif [[ ! $PLATFORM == 'darwin' ]] && [[ $BLAS == 'mkl' ]]; then\n patchelf --set-rpath \"/opt/intel/oneapi/mkl/${INTEL_MKL}/lib/intel64/:\\$ORIGIN\" --force-rpath libmxnet.so", - "repo_full_name": "apache/mxnet", - "discussion_comments": [ - { - "comment_id": "725211061", - "repo_full_name": "apache/mxnet", - "pr_number": 20474, - "pr_file": "tools/staticbuild/build_lib.sh", - "discussion_id": "725211061", - "commented_code": "@@ -36,13 +40,18 @@ cmake -GNinja -C $cmake_config \\\n -DCMAKE_OSX_DEPLOYMENT_TARGET=10.13 \\\n ..\n ninja\n+if [[ ! $PLATFORM == 'darwin' ]] && [[ $BLAS == 'mkl' ]]; then\n+ patchelf --set-rpath \"/opt/intel/oneapi/mkl/${INTEL_MKL}/lib/intel64/:\\$ORIGIN\" --force-rpath libmxnet.so", - "comment_created_at": "2021-10-08T18:09:55+00:00", - "comment_author": "leezu", - "comment_body": "Will oneapi/mkl always be in `/opt/intel`? Is there a reason for not asking users to fix their run-time search path environment variables instead (which ideally would automatically be set correctly upon installation of oneapi/mkl)?", - "pr_file_module": null - }, - { - "comment_id": "725240417", - "repo_full_name": "apache/mxnet", - "pr_number": 20474, - "pr_file": "tools/staticbuild/build_lib.sh", - "discussion_id": "725211061", - "commented_code": "@@ -36,13 +40,18 @@ cmake -GNinja -C $cmake_config \\\n -DCMAKE_OSX_DEPLOYMENT_TARGET=10.13 \\\n ..\n ninja\n+if [[ ! $PLATFORM == 'darwin' ]] && [[ $BLAS == 'mkl' ]]; then\n+ patchelf --set-rpath \"/opt/intel/oneapi/mkl/${INTEL_MKL}/lib/intel64/:\\$ORIGIN\" --force-rpath libmxnet.so", - "comment_created_at": "2021-10-08T19:02:59+00:00", - "comment_author": "akarbown", - "comment_body": "_> Will oneapi/mkl always be in /opt/intel?_ \r\nI think that if the user do not define explicitly other location, the default location for oneMKL supposed to be /opt/intel/oneapi/ at least the way it was installed in the way as it is in the mkl.sh file. \r\n_> Is there a reason for not asking users to fix their run-time search path environment variables instead (which ideally would automatically be set correctly upon installation of oneapi/mkl)?_\r\nI've added it for the sake of the tests so that while running them (in the runtime) libmxnet.dylib could see the MKL libraries. It's not the best solution. Now, I think that maybe it would be better to ``source /opt/intel/oneapi/setvars.sh`` script just before the test execution (in the *.yml file). What do you think? Or did you have something else in mind?", - "pr_file_module": null - }, - { - "comment_id": "725695236", - "repo_full_name": "apache/mxnet", - "pr_number": 20474, - "pr_file": "tools/staticbuild/build_lib.sh", - "discussion_id": "725211061", - "commented_code": "@@ -36,13 +40,18 @@ cmake -GNinja -C $cmake_config \\\n -DCMAKE_OSX_DEPLOYMENT_TARGET=10.13 \\\n ..\n ninja\n+if [[ ! $PLATFORM == 'darwin' ]] && [[ $BLAS == 'mkl' ]]; then\n+ patchelf --set-rpath \"/opt/intel/oneapi/mkl/${INTEL_MKL}/lib/intel64/:\\$ORIGIN\" --force-rpath libmxnet.so", - "comment_created_at": "2021-10-10T20:41:27+00:00", - "comment_author": "leezu", - "comment_body": "I assume `source /opt/intel/oneapi/setvars.sh` is also what users would be expected to do if they install MKL on Mac? If so, I think that'll be more robust than hardcoding the rpath ", - "pr_file_module": null - }, - { - "comment_id": "725708632", - "repo_full_name": "apache/mxnet", - "pr_number": 20474, - "pr_file": "tools/staticbuild/build_lib.sh", - "discussion_id": "725211061", - "commented_code": "@@ -36,13 +40,18 @@ cmake -GNinja -C $cmake_config \\\n -DCMAKE_OSX_DEPLOYMENT_TARGET=10.13 \\\n ..\n ninja\n+if [[ ! $PLATFORM == 'darwin' ]] && [[ $BLAS == 'mkl' ]]; then\n+ patchelf --set-rpath \"/opt/intel/oneapi/mkl/${INTEL_MKL}/lib/intel64/:\\$ORIGIN\" --force-rpath libmxnet.so", - "comment_created_at": "2021-10-10T22:29:39+00:00", - "comment_author": "akarbown", - "comment_body": "Yes, sure. However, I've just realized that when linking with MKL static libraries there is no need to ``source /opt/intel/oneapi/setvars.sh``. It's mandatory in case of linking with MKL dynamic libraries. Thanks for pointing that out!", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "610025602", - "pr_number": 20138, - "pr_file": "ci/docker/runtime_functions.sh", - "created_at": "2021-04-08T19:21:08+00:00", - "commented_code": "export MXNET_SUBGRAPH_VERBOSE=0\n export MXNET_ENABLE_CYTHON=0\n export DMLC_LOG_STACK_TRACE_DEPTH=100\n OMP_NUM_THREADS=$(expr $(nproc) / 4) pytest -m 'not serial' -k 'not test_operator' -n 4 --durations=50 --cov-report xml:tests_unittest.xml --verbose tests/python/unittest\n OMP_NUM_THREADS=$(expr $(nproc) / 4) pytest -m 'not serial' -k 'not test_operator' -n 4 --durations=50 --cov=./contrib --cov=./python/mxnet --cov-report xml:tests_unittest.xml --verbose tests/python/unittest\n MXNET_ENGINE_TYPE=NaiveEngine \\\n OMP_NUM_THREADS=$(expr $(nproc) / 4) pytest -m 'not serial' -k 'test_operator' -n 4 --durations=50 --cov-report xml:tests_unittest.xml --cov-append --verbose tests/python/unittest\n pytest -m 'serial' --durations=50 --cov-report xml:tests_unittest.xml --cov-append --verbose tests/python/unittest\n pytest --durations=50 --cov-report xml:tests_mkl.xml --verbose tests/python/mkl\n OMP_NUM_THREADS=$(expr $(nproc) / 4) pytest -m 'not serial' -k 'test_operator' -n 4 --durations=50 --cov=./contrib --cov=./python/mxnet --cov-report xml:tests_unittest.xml --cov-append --verbose tests/python/unittest\n pytest -m 'serial' --durations=50 --cov=./contrib --cov=./python/mxnet --cov-report xml:tests_unittest.xml --cov-append --verbose tests/python/unittest\n pytest --durations=50 --cov=./contrib --cov=./python/mxnet --cov-report xml:tests_mkl.xml --verbose tests/python/mkl", - "repo_full_name": "apache/mxnet", - "discussion_comments": [ - { - "comment_id": "610025602", - "repo_full_name": "apache/mxnet", - "pr_number": 20138, - "pr_file": "ci/docker/runtime_functions.sh", - "discussion_id": "610025602", - "commented_code": "@@ -783,11 +783,11 @@ unittest_ubuntu_python3_cpu_onednn() {\n export MXNET_SUBGRAPH_VERBOSE=0\n export MXNET_ENABLE_CYTHON=0\n export DMLC_LOG_STACK_TRACE_DEPTH=100\n- OMP_NUM_THREADS=$(expr $(nproc) / 4) pytest -m 'not serial' -k 'not test_operator' -n 4 --durations=50 --cov-report xml:tests_unittest.xml --verbose tests/python/unittest\n+ OMP_NUM_THREADS=$(expr $(nproc) / 4) pytest -m 'not serial' -k 'not test_operator' -n 4 --durations=50 --cov=./contrib --cov=./python/mxnet --cov-report xml:tests_unittest.xml --verbose tests/python/unittest\n MXNET_ENGINE_TYPE=NaiveEngine \\\n- OMP_NUM_THREADS=$(expr $(nproc) / 4) pytest -m 'not serial' -k 'test_operator' -n 4 --durations=50 --cov-report xml:tests_unittest.xml --cov-append --verbose tests/python/unittest\n- pytest -m 'serial' --durations=50 --cov-report xml:tests_unittest.xml --cov-append --verbose tests/python/unittest\n- pytest --durations=50 --cov-report xml:tests_mkl.xml --verbose tests/python/mkl\n+ OMP_NUM_THREADS=$(expr $(nproc) / 4) pytest -m 'not serial' -k 'test_operator' -n 4 --durations=50 --cov=./contrib --cov=./python/mxnet --cov-report xml:tests_unittest.xml --cov-append --verbose tests/python/unittest\n+ pytest -m 'serial' --durations=50 --cov=./contrib --cov=./python/mxnet --cov-report xml:tests_unittest.xml --cov-append --verbose tests/python/unittest\n+ pytest --durations=50 --cov=./contrib --cov=./python/mxnet --cov-report xml:tests_mkl.xml --verbose tests/python/mkl", - "comment_created_at": "2021-04-08T19:21:08+00:00", - "comment_author": "marcoabreu", - "comment_body": "Can't we have this in some configuration file instead of inlining it everywhere?", - "pr_file_module": null - }, - { - "comment_id": "610034846", - "repo_full_name": "apache/mxnet", - "pr_number": 20138, - "pr_file": "ci/docker/runtime_functions.sh", - "discussion_id": "610025602", - "commented_code": "@@ -783,11 +783,11 @@ unittest_ubuntu_python3_cpu_onednn() {\n export MXNET_SUBGRAPH_VERBOSE=0\n export MXNET_ENABLE_CYTHON=0\n export DMLC_LOG_STACK_TRACE_DEPTH=100\n- OMP_NUM_THREADS=$(expr $(nproc) / 4) pytest -m 'not serial' -k 'not test_operator' -n 4 --durations=50 --cov-report xml:tests_unittest.xml --verbose tests/python/unittest\n+ OMP_NUM_THREADS=$(expr $(nproc) / 4) pytest -m 'not serial' -k 'not test_operator' -n 4 --durations=50 --cov=./contrib --cov=./python/mxnet --cov-report xml:tests_unittest.xml --verbose tests/python/unittest\n MXNET_ENGINE_TYPE=NaiveEngine \\\n- OMP_NUM_THREADS=$(expr $(nproc) / 4) pytest -m 'not serial' -k 'test_operator' -n 4 --durations=50 --cov-report xml:tests_unittest.xml --cov-append --verbose tests/python/unittest\n- pytest -m 'serial' --durations=50 --cov-report xml:tests_unittest.xml --cov-append --verbose tests/python/unittest\n- pytest --durations=50 --cov-report xml:tests_mkl.xml --verbose tests/python/mkl\n+ OMP_NUM_THREADS=$(expr $(nproc) / 4) pytest -m 'not serial' -k 'test_operator' -n 4 --durations=50 --cov=./contrib --cov=./python/mxnet --cov-report xml:tests_unittest.xml --cov-append --verbose tests/python/unittest\n+ pytest -m 'serial' --durations=50 --cov=./contrib --cov=./python/mxnet --cov-report xml:tests_unittest.xml --cov-append --verbose tests/python/unittest\n+ pytest --durations=50 --cov=./contrib --cov=./python/mxnet --cov-report xml:tests_mkl.xml --verbose tests/python/mkl", - "comment_created_at": "2021-04-08T19:32:54+00:00", - "comment_author": "barry-jin", - "comment_body": "Thanks for the suggestion. I will update it. ", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "651362257", - "pr_number": 20342, - "pr_file": "tools/staticbuild/build.sh", - "created_at": "2021-06-15T00:26:44+00:00", - "commented_code": "export PKG_CONFIG_PATH=$DEPS_PATH/lib/pkgconfig:$DEPS_PATH/lib64/pkgconfig:$DEPS_PATH/lib/x86_64-linux-gnu/pkgconfig:$PKG_CONFIG_PATH\nfi\nexport FC=\"gfortran\"\nexport CPATH=$DEPS_PATH/include:$CPATH\nif [[ $ARCH == 'aarch64' ]]; then\n export CPATH=/opt/arm/armpl_21.0_gcc-8.2/include_lp64_mp:$CPATH", - "repo_full_name": "apache/mxnet", - "discussion_comments": [ - { - "comment_id": "651362257", - "repo_full_name": "apache/mxnet", - "pr_number": 20342, - "pr_file": "tools/staticbuild/build.sh", - "discussion_id": "651362257", - "commented_code": "@@ -59,7 +59,11 @@ else\n export PKG_CONFIG_PATH=$DEPS_PATH/lib/pkgconfig:$DEPS_PATH/lib64/pkgconfig:$DEPS_PATH/lib/x86_64-linux-gnu/pkgconfig:$PKG_CONFIG_PATH\n fi\n export FC=\"gfortran\"\n-export CPATH=$DEPS_PATH/include:$CPATH\n+if [[ $ARCH == 'aarch64' ]]; then\n+ export CPATH=/opt/arm/armpl_21.0_gcc-8.2/include_lp64_mp:$CPATH", - "comment_created_at": "2021-06-15T00:26:44+00:00", - "comment_author": "Zha0q1", - "comment_body": "If the user installed a different armpl package then this will not work, is there a way to automatically detect the armpl path?", - "pr_file_module": null - }, - { - "comment_id": "651363271", - "repo_full_name": "apache/mxnet", - "pr_number": 20342, - "pr_file": "tools/staticbuild/build.sh", - "discussion_id": "651362257", - "commented_code": "@@ -59,7 +59,11 @@ else\n export PKG_CONFIG_PATH=$DEPS_PATH/lib/pkgconfig:$DEPS_PATH/lib64/pkgconfig:$DEPS_PATH/lib/x86_64-linux-gnu/pkgconfig:$PKG_CONFIG_PATH\n fi\n export FC=\"gfortran\"\n-export CPATH=$DEPS_PATH/include:$CPATH\n+if [[ $ARCH == 'aarch64' ]]; then\n+ export CPATH=/opt/arm/armpl_21.0_gcc-8.2/include_lp64_mp:$CPATH", - "comment_created_at": "2021-06-15T00:29:53+00:00", - "comment_author": "mseth10", - "comment_body": "does it work with other armpl versions? didn't we see an error when we were using gcc-10 armpl?", - "pr_file_module": null - }, - { - "comment_id": "651370471", - "repo_full_name": "apache/mxnet", - "pr_number": 20342, - "pr_file": "tools/staticbuild/build.sh", - "discussion_id": "651362257", - "commented_code": "@@ -59,7 +59,11 @@ else\n export PKG_CONFIG_PATH=$DEPS_PATH/lib/pkgconfig:$DEPS_PATH/lib64/pkgconfig:$DEPS_PATH/lib/x86_64-linux-gnu/pkgconfig:$PKG_CONFIG_PATH\n fi\n export FC=\"gfortran\"\n-export CPATH=$DEPS_PATH/include:$CPATH\n+if [[ $ARCH == 'aarch64' ]]; then\n+ export CPATH=/opt/arm/armpl_21.0_gcc-8.2/include_lp64_mp:$CPATH", - "comment_created_at": "2021-06-15T00:53:03+00:00", - "comment_author": "mseth10", - "comment_body": "we can ask users to specifically install `armpl_21.0_gcc-8.2` before installing mxnet wheel", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/mxnet-centralize-synchronization-logic.json b/_reviewers/mxnet-centralize-synchronization-logic.json new file mode 100644 index 0000000..ea70949 --- /dev/null +++ b/_reviewers/mxnet-centralize-synchronization-logic.json @@ -0,0 +1,92 @@ +[ + { + "discussion_id": "658483058", + "pr_number": 20331, + "pr_file": "include/mxnet/base.h", + "created_at": "2021-06-25T05:24:40+00:00", + "commented_code": "* \\brief the auxiliary stream of the device, can be nullptr or Stream* in GPU mode\n */\n void *aux_stream;\n /*!\n * \\brief indicator of whether this execution is run in bulk mode\n /*!\n * \\brief pointer to the cuda event pool used by the dependecy engine\n */\n bool is_bulk;\n void *event_pool = nullptr;", + "repo_full_name": "apache/mxnet", + "discussion_comments": [ + { + "comment_id": "658483058", + "repo_full_name": "apache/mxnet", + "pr_number": 20331, + "pr_file": "include/mxnet/base.h", + "discussion_id": "658483058", + "commented_code": "@@ -348,10 +348,10 @@ struct RunContext {\n * \\brief the auxiliary stream of the device, can be nullptr or Stream* in GPU mode\n */\n void *aux_stream;\n- /*!\n- * \\brief indicator of whether this execution is run in bulk mode\n+ /*!\n+ * \\brief pointer to the cuda event pool used by the dependecy engine\n */\n- bool is_bulk;\n+ void *event_pool = nullptr;", + "comment_created_at": "2021-06-25T05:24:40+00:00", + "comment_author": "ptrendx", + "comment_body": "Leaving this comment here just as a way to explain why the `is_bulk` is removed. `is_bulk` was a way to mark operations as not needing synchronization when being a part of a single engine bulk. This PR moves the handling of the synchronization to the engine itself, so this is no longer useful.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "701276286", + "pr_number": 20331, + "pr_file": "src/engine/threaded_engine.cc", + "created_at": "2021-09-02T17:12:47+00:00", + "commented_code": "OprBlock::Delete(opr_block);\n}\n\nvoid ThreadedEngine::OnStartStatic(Engine *engine, void *opr_block,\n const dmlc::Error* error) {\n // no-op\n}\n\n#if MXNET_USE_CUDA\nstatic inline void AddEventHelper(\n std::unordered_map* events_per_stream,\n const EventInfo& cuda_event) {\n auto event_stream = cuda_event.stream;\n if (events_per_stream->count(event_stream) > 0) {\n if ((*events_per_stream)[event_stream].pool_index < cuda_event.pool_index) {\n (*events_per_stream)[event_stream] = cuda_event;", + "repo_full_name": "apache/mxnet", + "discussion_comments": [ + { + "comment_id": "701276286", + "repo_full_name": "apache/mxnet", + "pr_number": 20331, + "pr_file": "src/engine/threaded_engine.cc", + "discussion_id": "701276286", + "commented_code": "@@ -523,5 +543,206 @@ void ThreadedEngine::OnCompleteStatic(Engine *engine, void *opr_block_,\n OprBlock::Delete(opr_block);\n }\n \n+void ThreadedEngine::OnStartStatic(Engine *engine, void *opr_block,\n+ const dmlc::Error* error) {\n+ // no-op\n+}\n+\n+#if MXNET_USE_CUDA\n+static inline void AddEventHelper(\n+ std::unordered_map* events_per_stream,\n+ const EventInfo& cuda_event) {\n+ auto event_stream = cuda_event.stream;\n+ if (events_per_stream->count(event_stream) > 0) {\n+ if ((*events_per_stream)[event_stream].pool_index < cuda_event.pool_index) {\n+ (*events_per_stream)[event_stream] = cuda_event;", + "comment_created_at": "2021-09-02T17:12:47+00:00", + "comment_author": "barry-jin", + "comment_body": "Why should the event be replaced by others with larger pool_index? ", + "pr_file_module": null + }, + { + "comment_id": "701325238", + "repo_full_name": "apache/mxnet", + "pr_number": 20331, + "pr_file": "src/engine/threaded_engine.cc", + "discussion_id": "701276286", + "commented_code": "@@ -523,5 +543,206 @@ void ThreadedEngine::OnCompleteStatic(Engine *engine, void *opr_block_,\n OprBlock::Delete(opr_block);\n }\n \n+void ThreadedEngine::OnStartStatic(Engine *engine, void *opr_block,\n+ const dmlc::Error* error) {\n+ // no-op\n+}\n+\n+#if MXNET_USE_CUDA\n+static inline void AddEventHelper(\n+ std::unordered_map* events_per_stream,\n+ const EventInfo& cuda_event) {\n+ auto event_stream = cuda_event.stream;\n+ if (events_per_stream->count(event_stream) > 0) {\n+ if ((*events_per_stream)[event_stream].pool_index < cuda_event.pool_index) {\n+ (*events_per_stream)[event_stream] = cuda_event;", + "comment_created_at": "2021-09-02T18:26:32+00:00", + "comment_author": "ptrendx", + "comment_body": "Because the pool is per stream so for 2 events from the same pool the one with the larger pool_index had to be recorded later (and synchronizing with that event will synchronize with both).", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "701304204", + "pr_number": 20331, + "pr_file": "src/engine/threaded_engine_perdevice.cc", + "created_at": "2021-09-02T17:54:13+00:00", + "commented_code": "MSHADOW_CATCH_ERROR(mshadow::SetDevice(ctx.dev_id));\n #endif\n }\n this->ExecuteOprBlock(RunContext{ctx, nullptr, nullptr, false}, opr_block);\n CallbackOnStart on_start = this->CreateOnStart(ThreadedEngine::OnStartStatic,\n opr_block);\n CallbackOnComplete callback = this->CreateCallback(ThreadedEngine::OnCompleteStatic,\n opr_block);", + "repo_full_name": "apache/mxnet", + "discussion_comments": [ + { + "comment_id": "701304204", + "repo_full_name": "apache/mxnet", + "pr_number": 20331, + "pr_file": "src/engine/threaded_engine_perdevice.cc", + "discussion_id": "701304204", + "commented_code": "@@ -103,7 +118,11 @@ class ThreadedEnginePerDevice : public ThreadedEngine {\n MSHADOW_CATCH_ERROR(mshadow::SetDevice(ctx.dev_id));\n #endif\n }\n- this->ExecuteOprBlock(RunContext{ctx, nullptr, nullptr, false}, opr_block);\n+ CallbackOnStart on_start = this->CreateOnStart(ThreadedEngine::OnStartStatic,\n+ opr_block);\n+ CallbackOnComplete callback = this->CreateCallback(ThreadedEngine::OnCompleteStatic,\n+ opr_block);", + "comment_created_at": "2021-09-02T17:54:13+00:00", + "comment_author": "barry-jin", + "comment_body": "Why use OnStartStatic and OnCompleteStatic here instead of creating GPU onstart and oncomplete for GPU case, creating static-onstart and static-oncomplete for CPU case. ", + "pr_file_module": null + }, + { + "comment_id": "701326230", + "repo_full_name": "apache/mxnet", + "pr_number": 20331, + "pr_file": "src/engine/threaded_engine_perdevice.cc", + "discussion_id": "701304204", + "commented_code": "@@ -103,7 +118,11 @@ class ThreadedEnginePerDevice : public ThreadedEngine {\n MSHADOW_CATCH_ERROR(mshadow::SetDevice(ctx.dev_id));\n #endif\n }\n- this->ExecuteOprBlock(RunContext{ctx, nullptr, nullptr, false}, opr_block);\n+ CallbackOnStart on_start = this->CreateOnStart(ThreadedEngine::OnStartStatic,\n+ opr_block);\n+ CallbackOnComplete callback = this->CreateCallback(ThreadedEngine::OnCompleteStatic,\n+ opr_block);", + "comment_created_at": "2021-09-02T18:28:07+00:00", + "comment_author": "ptrendx", + "comment_body": "Mechanism introduced in this PR is in addition to the dependency tracking that already existed, and the OnCompleteStatic contains dependency update code (so it has to be called from the GPU version as well).", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/mxnet-centralize-synchronization-logic.md b/_reviewers/mxnet-centralize-synchronization-logic.md index d1ff887..4a97209 100644 --- a/_reviewers/mxnet-centralize-synchronization-logic.md +++ b/_reviewers/mxnet-centralize-synchronization-logic.md @@ -43,97 +43,3 @@ void SynchronizeOperations(RunContext* ctx) { ``` This pattern is particularly important for CUDA operations where proper event ordering and stream synchronization are critical for correctness. Using centralized event pools and dependency tracking helps ensure operations complete in the correct order without unnecessary synchronization points. - - -[ - { - "discussion_id": "658483058", - "pr_number": 20331, - "pr_file": "include/mxnet/base.h", - "created_at": "2021-06-25T05:24:40+00:00", - "commented_code": "* \\brief the auxiliary stream of the device, can be nullptr or Stream* in GPU mode\n */\n void *aux_stream;\n /*!\n * \\brief indicator of whether this execution is run in bulk mode\n /*!\n * \\brief pointer to the cuda event pool used by the dependecy engine\n */\n bool is_bulk;\n void *event_pool = nullptr;", - "repo_full_name": "apache/mxnet", - "discussion_comments": [ - { - "comment_id": "658483058", - "repo_full_name": "apache/mxnet", - "pr_number": 20331, - "pr_file": "include/mxnet/base.h", - "discussion_id": "658483058", - "commented_code": "@@ -348,10 +348,10 @@ struct RunContext {\n * \\brief the auxiliary stream of the device, can be nullptr or Stream* in GPU mode\n */\n void *aux_stream;\n- /*!\n- * \\brief indicator of whether this execution is run in bulk mode\n+ /*!\n+ * \\brief pointer to the cuda event pool used by the dependecy engine\n */\n- bool is_bulk;\n+ void *event_pool = nullptr;", - "comment_created_at": "2021-06-25T05:24:40+00:00", - "comment_author": "ptrendx", - "comment_body": "Leaving this comment here just as a way to explain why the `is_bulk` is removed. `is_bulk` was a way to mark operations as not needing synchronization when being a part of a single engine bulk. This PR moves the handling of the synchronization to the engine itself, so this is no longer useful.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "701276286", - "pr_number": 20331, - "pr_file": "src/engine/threaded_engine.cc", - "created_at": "2021-09-02T17:12:47+00:00", - "commented_code": "OprBlock::Delete(opr_block);\n}\n\nvoid ThreadedEngine::OnStartStatic(Engine *engine, void *opr_block,\n const dmlc::Error* error) {\n // no-op\n}\n\n#if MXNET_USE_CUDA\nstatic inline void AddEventHelper(\n std::unordered_map* events_per_stream,\n const EventInfo& cuda_event) {\n auto event_stream = cuda_event.stream;\n if (events_per_stream->count(event_stream) > 0) {\n if ((*events_per_stream)[event_stream].pool_index < cuda_event.pool_index) {\n (*events_per_stream)[event_stream] = cuda_event;", - "repo_full_name": "apache/mxnet", - "discussion_comments": [ - { - "comment_id": "701276286", - "repo_full_name": "apache/mxnet", - "pr_number": 20331, - "pr_file": "src/engine/threaded_engine.cc", - "discussion_id": "701276286", - "commented_code": "@@ -523,5 +543,206 @@ void ThreadedEngine::OnCompleteStatic(Engine *engine, void *opr_block_,\n OprBlock::Delete(opr_block);\n }\n \n+void ThreadedEngine::OnStartStatic(Engine *engine, void *opr_block,\n+ const dmlc::Error* error) {\n+ // no-op\n+}\n+\n+#if MXNET_USE_CUDA\n+static inline void AddEventHelper(\n+ std::unordered_map* events_per_stream,\n+ const EventInfo& cuda_event) {\n+ auto event_stream = cuda_event.stream;\n+ if (events_per_stream->count(event_stream) > 0) {\n+ if ((*events_per_stream)[event_stream].pool_index < cuda_event.pool_index) {\n+ (*events_per_stream)[event_stream] = cuda_event;", - "comment_created_at": "2021-09-02T17:12:47+00:00", - "comment_author": "barry-jin", - "comment_body": "Why should the event be replaced by others with larger pool_index? ", - "pr_file_module": null - }, - { - "comment_id": "701325238", - "repo_full_name": "apache/mxnet", - "pr_number": 20331, - "pr_file": "src/engine/threaded_engine.cc", - "discussion_id": "701276286", - "commented_code": "@@ -523,5 +543,206 @@ void ThreadedEngine::OnCompleteStatic(Engine *engine, void *opr_block_,\n OprBlock::Delete(opr_block);\n }\n \n+void ThreadedEngine::OnStartStatic(Engine *engine, void *opr_block,\n+ const dmlc::Error* error) {\n+ // no-op\n+}\n+\n+#if MXNET_USE_CUDA\n+static inline void AddEventHelper(\n+ std::unordered_map* events_per_stream,\n+ const EventInfo& cuda_event) {\n+ auto event_stream = cuda_event.stream;\n+ if (events_per_stream->count(event_stream) > 0) {\n+ if ((*events_per_stream)[event_stream].pool_index < cuda_event.pool_index) {\n+ (*events_per_stream)[event_stream] = cuda_event;", - "comment_created_at": "2021-09-02T18:26:32+00:00", - "comment_author": "ptrendx", - "comment_body": "Because the pool is per stream so for 2 events from the same pool the one with the larger pool_index had to be recorded later (and synchronizing with that event will synchronize with both).", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "701304204", - "pr_number": 20331, - "pr_file": "src/engine/threaded_engine_perdevice.cc", - "created_at": "2021-09-02T17:54:13+00:00", - "commented_code": "MSHADOW_CATCH_ERROR(mshadow::SetDevice(ctx.dev_id));\n #endif\n }\n this->ExecuteOprBlock(RunContext{ctx, nullptr, nullptr, false}, opr_block);\n CallbackOnStart on_start = this->CreateOnStart(ThreadedEngine::OnStartStatic,\n opr_block);\n CallbackOnComplete callback = this->CreateCallback(ThreadedEngine::OnCompleteStatic,\n opr_block);", - "repo_full_name": "apache/mxnet", - "discussion_comments": [ - { - "comment_id": "701304204", - "repo_full_name": "apache/mxnet", - "pr_number": 20331, - "pr_file": "src/engine/threaded_engine_perdevice.cc", - "discussion_id": "701304204", - "commented_code": "@@ -103,7 +118,11 @@ class ThreadedEnginePerDevice : public ThreadedEngine {\n MSHADOW_CATCH_ERROR(mshadow::SetDevice(ctx.dev_id));\n #endif\n }\n- this->ExecuteOprBlock(RunContext{ctx, nullptr, nullptr, false}, opr_block);\n+ CallbackOnStart on_start = this->CreateOnStart(ThreadedEngine::OnStartStatic,\n+ opr_block);\n+ CallbackOnComplete callback = this->CreateCallback(ThreadedEngine::OnCompleteStatic,\n+ opr_block);", - "comment_created_at": "2021-09-02T17:54:13+00:00", - "comment_author": "barry-jin", - "comment_body": "Why use OnStartStatic and OnCompleteStatic here instead of creating GPU onstart and oncomplete for GPU case, creating static-onstart and static-oncomplete for CPU case. ", - "pr_file_module": null - }, - { - "comment_id": "701326230", - "repo_full_name": "apache/mxnet", - "pr_number": 20331, - "pr_file": "src/engine/threaded_engine_perdevice.cc", - "discussion_id": "701304204", - "commented_code": "@@ -103,7 +118,11 @@ class ThreadedEnginePerDevice : public ThreadedEngine {\n MSHADOW_CATCH_ERROR(mshadow::SetDevice(ctx.dev_id));\n #endif\n }\n- this->ExecuteOprBlock(RunContext{ctx, nullptr, nullptr, false}, opr_block);\n+ CallbackOnStart on_start = this->CreateOnStart(ThreadedEngine::OnStartStatic,\n+ opr_block);\n+ CallbackOnComplete callback = this->CreateCallback(ThreadedEngine::OnCompleteStatic,\n+ opr_block);", - "comment_created_at": "2021-09-02T18:28:07+00:00", - "comment_author": "ptrendx", - "comment_body": "Mechanism introduced in this PR is in addition to the dependency tracking that already existed, and the OnCompleteStatic contains dependency update code (so it has to be called from the GPU version as well).", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/mxnet-comprehensive-api-documentation.json b/_reviewers/mxnet-comprehensive-api-documentation.json new file mode 100644 index 0000000..6dbc5d1 --- /dev/null +++ b/_reviewers/mxnet-comprehensive-api-documentation.json @@ -0,0 +1,114 @@ +[ + { + "discussion_id": "250017319", + "pr_number": 13735, + "pr_file": "example/gluon/wavenet/main.py", + "created_at": "2019-01-23T00:41:29+00:00", + "commented_code": "\"\"\"\nDescrition : main module to run code\n\"\"\"\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements. See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership. The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License. You may obtain a copy of the License at\n#\n# http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied. See the License for the\n# specific language governing permissions and limitations\n# under the License.\nimport argparse\nfrom trainer import Train\n\ndef main():\n \"\"\"\n Description : run code using argument info\n \"\"\"\n parser = argparse.ArgumentParser()\n parser = argparse.ArgumentParser()\n parser.add_argument('--batch_size', type=int, default=64)", + "repo_full_name": "apache/mxnet", + "discussion_comments": [ + { + "comment_id": "250017319", + "repo_full_name": "apache/mxnet", + "pr_number": 13735, + "pr_file": "example/gluon/wavenet/main.py", + "discussion_id": "250017319", + "commented_code": "@@ -0,0 +1,48 @@\n+\"\"\"\n+Descrition : main module to run code\n+\"\"\"\n+# Licensed to the Apache Software Foundation (ASF) under one\n+# or more contributor license agreements. See the NOTICE file\n+# distributed with this work for additional information\n+# regarding copyright ownership. The ASF licenses this file\n+# to you under the Apache License, Version 2.0 (the\n+# \"License\"); you may not use this file except in compliance\n+# with the License. You may obtain a copy of the License at\n+#\n+# http://www.apache.org/licenses/LICENSE-2.0\n+#\n+# Unless required by applicable law or agreed to in writing,\n+# software distributed under the License is distributed on an\n+# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n+# KIND, either express or implied. See the License for the\n+# specific language governing permissions and limitations\n+# under the License.\n+import argparse\n+from trainer import Train\n+\n+def main():\n+ \"\"\"\n+ Description : run code using argument info\n+ \"\"\"\n+ parser = argparse.ArgumentParser()\n+ parser = argparse.ArgumentParser()\n+ parser.add_argument('--batch_size', type=int, default=64)", + "comment_created_at": "2019-01-23T00:41:29+00:00", + "comment_author": "ThomasDelteil", + "comment_body": "Please add descriptions to each argument", + "pr_file_module": null + }, + { + "comment_id": "250473457", + "repo_full_name": "apache/mxnet", + "pr_number": 13735, + "pr_file": "example/gluon/wavenet/main.py", + "discussion_id": "250017319", + "commented_code": "@@ -0,0 +1,48 @@\n+\"\"\"\n+Descrition : main module to run code\n+\"\"\"\n+# Licensed to the Apache Software Foundation (ASF) under one\n+# or more contributor license agreements. See the NOTICE file\n+# distributed with this work for additional information\n+# regarding copyright ownership. The ASF licenses this file\n+# to you under the Apache License, Version 2.0 (the\n+# \"License\"); you may not use this file except in compliance\n+# with the License. You may obtain a copy of the License at\n+#\n+# http://www.apache.org/licenses/LICENSE-2.0\n+#\n+# Unless required by applicable law or agreed to in writing,\n+# software distributed under the License is distributed on an\n+# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n+# KIND, either express or implied. See the License for the\n+# specific language governing permissions and limitations\n+# under the License.\n+import argparse\n+from trainer import Train\n+\n+def main():\n+ \"\"\"\n+ Description : run code using argument info\n+ \"\"\"\n+ parser = argparse.ArgumentParser()\n+ parser = argparse.ArgumentParser()\n+ parser.add_argument('--batch_size', type=int, default=64)", + "comment_created_at": "2019-01-24T05:51:19+00:00", + "comment_author": "seujung", + "comment_body": "Add description to each argument", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "698622760", + "pr_number": 20559, + "pr_file": "python/mxnet/gluon/block.py", + "created_at": "2021-08-30T16:16:26+00:00", + "commented_code": "for p in params.values():\n p.reset_ctx(ctx)\n\n def mark_vars(self, var_arrays):\n \"\"\"Mark the intermediate nodes\n\n Parameters\n ----------\n vars: NDArrays or List[NDArrays] with nonempty deferredcomputation_entry_", + "repo_full_name": "apache/mxnet", + "discussion_comments": [ + { + "comment_id": "698622760", + "repo_full_name": "apache/mxnet", + "pr_number": 20559, + "pr_file": "python/mxnet/gluon/block.py", + "discussion_id": "698622760", + "commented_code": "@@ -1635,6 +1637,47 @@ def reset_ctx(self, ctx):\n for p in params.values():\n p.reset_ctx(ctx)\n \n+ def mark_vars(self, var_arrays):\n+ \"\"\"Mark the intermediate nodes\n+\n+ Parameters\n+ ----------\n+ vars: NDArrays or List[NDArrays] with nonempty deferredcomputation_entry_", + "comment_created_at": "2021-08-30T16:16:26+00:00", + "comment_author": "barry-jin", + "comment_body": "Could you follow the [example](https://github.com/apache/incubator-mxnet/blob/49c47380fac3c8c8e7ee8eb237000165e22828ef/python/mxnet/numpy/multiarray.py#L2621-L2645) to write the docstring for mark_vars? ", + "pr_file_module": null + }, + { + "comment_id": "698727447", + "repo_full_name": "apache/mxnet", + "pr_number": 20559, + "pr_file": "python/mxnet/gluon/block.py", + "discussion_id": "698622760", + "commented_code": "@@ -1635,6 +1637,47 @@ def reset_ctx(self, ctx):\n for p in params.values():\n p.reset_ctx(ctx)\n \n+ def mark_vars(self, var_arrays):\n+ \"\"\"Mark the intermediate nodes\n+\n+ Parameters\n+ ----------\n+ vars: NDArrays or List[NDArrays] with nonempty deferredcomputation_entry_", + "comment_created_at": "2021-08-30T18:54:06+00:00", + "comment_author": "KexinFeng", + "comment_body": "Done.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "720920133", + "pr_number": 20592, + "pr_file": "python/mxnet/numpy/multiarray.py", + "created_at": "2021-10-03T22:43:24+00:00", + "commented_code": "\"\"\"\n return _mx_nd_np.arctan(x, out=out, **kwargs)\n\natan = arctan\natan.__doc__ = \"\"\"\n Trigonometric inverse tangent, element-wise.\n The inverse of tan, so that if ``y = tan(x)`` then ``x = atan(y)``.\n\n Notes\n ---------\n `atan` is a alias for `arctan`. It is a standard API in\n https://data-apis.org/array-api/latest/API_specification/elementwise_functions.html#atan-x\n instead of an official NumPy operator.\n \n >>>np.atan is np.arctan\n True\n \n Parameters\n ----------\n x : ndarray or scalar\n Input values.\n out : ndarray or None, optional\n A location into which the result is stored. If provided, it must have\n a shape that the inputs broadcast to. If not provided or `None`,\n a freshly-allocated array is returned.\n\n Returns\n -------\n out : ndarray or scalar\n Out has the same shape as `x`. It lies is in\n ``[-pi/2, pi/2]`` (``atan(+/-inf)`` returns ``+/-pi/2``).\n This is a scalar if `x` is a scalar.\n\n Notes", + "repo_full_name": "apache/mxnet", + "discussion_comments": [ + { + "comment_id": "720920133", + "repo_full_name": "apache/mxnet", + "pr_number": 20592, + "pr_file": "python/mxnet/numpy/multiarray.py", + "discussion_id": "720920133", + "commented_code": "@@ -4309,6 +4493,56 @@ def arctan(x, out=None, **kwargs):\n \"\"\"\n return _mx_nd_np.arctan(x, out=out, **kwargs)\n \n+atan = arctan\n+atan.__doc__ = \"\"\"\n+ Trigonometric inverse tangent, element-wise.\n+ The inverse of tan, so that if ``y = tan(x)`` then ``x = atan(y)``.\n+\n+ Notes\n+ ---------\n+ `atan` is a alias for `arctan`. It is a standard API in\n+ https://data-apis.org/array-api/latest/API_specification/elementwise_functions.html#atan-x\n+ instead of an official NumPy operator.\n+ \n+ >>>np.atan is np.arctan\n+ True\n+ \n+ Parameters\n+ ----------\n+ x : ndarray or scalar\n+ Input values.\n+ out : ndarray or None, optional\n+ A location into which the result is stored. If provided, it must have\n+ a shape that the inputs broadcast to. If not provided or `None`,\n+ a freshly-allocated array is returned.\n+\n+ Returns\n+ -------\n+ out : ndarray or scalar\n+ Out has the same shape as `x`. It lies is in\n+ ``[-pi/2, pi/2]`` (``atan(+/-inf)`` returns ``+/-pi/2``).\n+ This is a scalar if `x` is a scalar.\n+\n+ Notes", + "comment_created_at": "2021-10-03T22:43:24+00:00", + "comment_author": "barry-jin", + "comment_body": "Could you append a note here to elaborate that atan is a standardized API and is equivelant to arctan and also provide the reference. ", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "720922965", + "pr_number": 20592, + "pr_file": "python/mxnet/numpy/multiarray.py", + "created_at": "2021-10-03T22:48:00+00:00", + "commented_code": "\"\"\"\n return _mx_nd_np.dsplit(ary, indices_or_sections)\n\n@set_module('mxnet.numpy')\ndef concat(seq, axis=0, out=None):\n \"\"\"Join a sequence of arrays along an existing axis.\n\n Parameters\n ----------\n a1, a2, ... : sequence of array_like\n The arrays must have the same shape, except in the dimension\n corresponding to `axis` (the first, by default).\n axis : int, optional\n The axis along which the arrays will be joined. If axis is None,\n arrays are flattened before use. Default is 0.\n out : ndarray, optional\n If provided, the destination to place the result. The shape must be\n correct, matching that of what concatenate would have returned if no\n out argument were specified.\n\n Returns\n -------\n res : ndarray\n The concatenated array.\n\n See Also\n --------\n split : Split array into a list of multiple sub-arrays of equal size.\n hsplit : Split array into multiple sub-arrays horizontally (column wise)\n vsplit : Split array into multiple sub-arrays vertically (row wise)\n dsplit : Split array into multiple sub-arrays along the 3rd axis (depth).\n stack : Stack a sequence of arrays along a new axis.\n hstack : Stack arrays in sequence horizontally (column wise)\n vstack : Stack arrays in sequence vertically (row wise)\n dstack : Stack arrays in sequence depth wise (along third dimension)\n\n Examples\n --------\n >>> a = np.array([[1, 2], [3, 4]])\n >>> b = np.array([[5, 6]])\n >>> np.concat((a, b), axis=0)\n array([[1., 2.],\n [3., 4.],\n [5., 6.]])\n\n >>> np.concat((a, b.T), axis=1)\n array([[1., 2., 5.],\n [3., 4., 6.]])", + "repo_full_name": "apache/mxnet", + "discussion_comments": [ + { + "comment_id": "720922965", + "repo_full_name": "apache/mxnet", + "pr_number": 20592, + "pr_file": "python/mxnet/numpy/multiarray.py", + "discussion_id": "720922965", + "commented_code": "@@ -6533,6 +6972,56 @@ def dsplit(ary, indices_or_sections):\n \"\"\"\n return _mx_nd_np.dsplit(ary, indices_or_sections)\n \n+@set_module('mxnet.numpy')\n+def concat(seq, axis=0, out=None):\n+ \"\"\"Join a sequence of arrays along an existing axis.\n+\n+ Parameters\n+ ----------\n+ a1, a2, ... : sequence of array_like\n+ The arrays must have the same shape, except in the dimension\n+ corresponding to `axis` (the first, by default).\n+ axis : int, optional\n+ The axis along which the arrays will be joined. If axis is None,\n+ arrays are flattened before use. Default is 0.\n+ out : ndarray, optional\n+ If provided, the destination to place the result. The shape must be\n+ correct, matching that of what concatenate would have returned if no\n+ out argument were specified.\n+\n+ Returns\n+ -------\n+ res : ndarray\n+ The concatenated array.\n+\n+ See Also\n+ --------\n+ split : Split array into a list of multiple sub-arrays of equal size.\n+ hsplit : Split array into multiple sub-arrays horizontally (column wise)\n+ vsplit : Split array into multiple sub-arrays vertically (row wise)\n+ dsplit : Split array into multiple sub-arrays along the 3rd axis (depth).\n+ stack : Stack a sequence of arrays along a new axis.\n+ hstack : Stack arrays in sequence horizontally (column wise)\n+ vstack : Stack arrays in sequence vertically (row wise)\n+ dstack : Stack arrays in sequence depth wise (along third dimension)\n+\n+ Examples\n+ --------\n+ >>> a = np.array([[1, 2], [3, 4]])\n+ >>> b = np.array([[5, 6]])\n+ >>> np.concat((a, b), axis=0)\n+ array([[1., 2.],\n+ [3., 4.],\n+ [5., 6.]])\n+\n+ >>> np.concat((a, b.T), axis=1)\n+ array([[1., 2., 5.],\n+ [3., 4., 6.]])\n+", + "comment_created_at": "2021-10-03T22:48:00+00:00", + "comment_author": "barry-jin", + "comment_body": "Add note here to elaborate that concat is a standardized API and is equivelant to concatenate and also provide the reference. ", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/mxnet-comprehensive-api-documentation.md b/_reviewers/mxnet-comprehensive-api-documentation.md index a61e2d5..498cf4b 100644 --- a/_reviewers/mxnet-comprehensive-api-documentation.md +++ b/_reviewers/mxnet-comprehensive-api-documentation.md @@ -52,119 +52,3 @@ atan.__doc__ = """ # parameter documentation follows... """ ``` - - -[ - { - "discussion_id": "250017319", - "pr_number": 13735, - "pr_file": "example/gluon/wavenet/main.py", - "created_at": "2019-01-23T00:41:29+00:00", - "commented_code": "\"\"\"\nDescrition : main module to run code\n\"\"\"\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements. See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership. The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License. You may obtain a copy of the License at\n#\n# http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied. See the License for the\n# specific language governing permissions and limitations\n# under the License.\nimport argparse\nfrom trainer import Train\n\ndef main():\n \"\"\"\n Description : run code using argument info\n \"\"\"\n parser = argparse.ArgumentParser()\n parser = argparse.ArgumentParser()\n parser.add_argument('--batch_size', type=int, default=64)", - "repo_full_name": "apache/mxnet", - "discussion_comments": [ - { - "comment_id": "250017319", - "repo_full_name": "apache/mxnet", - "pr_number": 13735, - "pr_file": "example/gluon/wavenet/main.py", - "discussion_id": "250017319", - "commented_code": "@@ -0,0 +1,48 @@\n+\"\"\"\n+Descrition : main module to run code\n+\"\"\"\n+# Licensed to the Apache Software Foundation (ASF) under one\n+# or more contributor license agreements. See the NOTICE file\n+# distributed with this work for additional information\n+# regarding copyright ownership. The ASF licenses this file\n+# to you under the Apache License, Version 2.0 (the\n+# \"License\"); you may not use this file except in compliance\n+# with the License. You may obtain a copy of the License at\n+#\n+# http://www.apache.org/licenses/LICENSE-2.0\n+#\n+# Unless required by applicable law or agreed to in writing,\n+# software distributed under the License is distributed on an\n+# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n+# KIND, either express or implied. See the License for the\n+# specific language governing permissions and limitations\n+# under the License.\n+import argparse\n+from trainer import Train\n+\n+def main():\n+ \"\"\"\n+ Description : run code using argument info\n+ \"\"\"\n+ parser = argparse.ArgumentParser()\n+ parser = argparse.ArgumentParser()\n+ parser.add_argument('--batch_size', type=int, default=64)", - "comment_created_at": "2019-01-23T00:41:29+00:00", - "comment_author": "ThomasDelteil", - "comment_body": "Please add descriptions to each argument", - "pr_file_module": null - }, - { - "comment_id": "250473457", - "repo_full_name": "apache/mxnet", - "pr_number": 13735, - "pr_file": "example/gluon/wavenet/main.py", - "discussion_id": "250017319", - "commented_code": "@@ -0,0 +1,48 @@\n+\"\"\"\n+Descrition : main module to run code\n+\"\"\"\n+# Licensed to the Apache Software Foundation (ASF) under one\n+# or more contributor license agreements. See the NOTICE file\n+# distributed with this work for additional information\n+# regarding copyright ownership. The ASF licenses this file\n+# to you under the Apache License, Version 2.0 (the\n+# \"License\"); you may not use this file except in compliance\n+# with the License. You may obtain a copy of the License at\n+#\n+# http://www.apache.org/licenses/LICENSE-2.0\n+#\n+# Unless required by applicable law or agreed to in writing,\n+# software distributed under the License is distributed on an\n+# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n+# KIND, either express or implied. See the License for the\n+# specific language governing permissions and limitations\n+# under the License.\n+import argparse\n+from trainer import Train\n+\n+def main():\n+ \"\"\"\n+ Description : run code using argument info\n+ \"\"\"\n+ parser = argparse.ArgumentParser()\n+ parser = argparse.ArgumentParser()\n+ parser.add_argument('--batch_size', type=int, default=64)", - "comment_created_at": "2019-01-24T05:51:19+00:00", - "comment_author": "seujung", - "comment_body": "Add description to each argument", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "698622760", - "pr_number": 20559, - "pr_file": "python/mxnet/gluon/block.py", - "created_at": "2021-08-30T16:16:26+00:00", - "commented_code": "for p in params.values():\n p.reset_ctx(ctx)\n\n def mark_vars(self, var_arrays):\n \"\"\"Mark the intermediate nodes\n\n Parameters\n ----------\n vars: NDArrays or List[NDArrays] with nonempty deferredcomputation_entry_", - "repo_full_name": "apache/mxnet", - "discussion_comments": [ - { - "comment_id": "698622760", - "repo_full_name": "apache/mxnet", - "pr_number": 20559, - "pr_file": "python/mxnet/gluon/block.py", - "discussion_id": "698622760", - "commented_code": "@@ -1635,6 +1637,47 @@ def reset_ctx(self, ctx):\n for p in params.values():\n p.reset_ctx(ctx)\n \n+ def mark_vars(self, var_arrays):\n+ \"\"\"Mark the intermediate nodes\n+\n+ Parameters\n+ ----------\n+ vars: NDArrays or List[NDArrays] with nonempty deferredcomputation_entry_", - "comment_created_at": "2021-08-30T16:16:26+00:00", - "comment_author": "barry-jin", - "comment_body": "Could you follow the [example](https://github.com/apache/incubator-mxnet/blob/49c47380fac3c8c8e7ee8eb237000165e22828ef/python/mxnet/numpy/multiarray.py#L2621-L2645) to write the docstring for mark_vars? ", - "pr_file_module": null - }, - { - "comment_id": "698727447", - "repo_full_name": "apache/mxnet", - "pr_number": 20559, - "pr_file": "python/mxnet/gluon/block.py", - "discussion_id": "698622760", - "commented_code": "@@ -1635,6 +1637,47 @@ def reset_ctx(self, ctx):\n for p in params.values():\n p.reset_ctx(ctx)\n \n+ def mark_vars(self, var_arrays):\n+ \"\"\"Mark the intermediate nodes\n+\n+ Parameters\n+ ----------\n+ vars: NDArrays or List[NDArrays] with nonempty deferredcomputation_entry_", - "comment_created_at": "2021-08-30T18:54:06+00:00", - "comment_author": "KexinFeng", - "comment_body": "Done.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "720920133", - "pr_number": 20592, - "pr_file": "python/mxnet/numpy/multiarray.py", - "created_at": "2021-10-03T22:43:24+00:00", - "commented_code": "\"\"\"\n return _mx_nd_np.arctan(x, out=out, **kwargs)\n\natan = arctan\natan.__doc__ = \"\"\"\n Trigonometric inverse tangent, element-wise.\n The inverse of tan, so that if ``y = tan(x)`` then ``x = atan(y)``.\n\n Notes\n ---------\n `atan` is a alias for `arctan`. It is a standard API in\n https://data-apis.org/array-api/latest/API_specification/elementwise_functions.html#atan-x\n instead of an official NumPy operator.\n \n >>>np.atan is np.arctan\n True\n \n Parameters\n ----------\n x : ndarray or scalar\n Input values.\n out : ndarray or None, optional\n A location into which the result is stored. If provided, it must have\n a shape that the inputs broadcast to. If not provided or `None`,\n a freshly-allocated array is returned.\n\n Returns\n -------\n out : ndarray or scalar\n Out has the same shape as `x`. It lies is in\n ``[-pi/2, pi/2]`` (``atan(+/-inf)`` returns ``+/-pi/2``).\n This is a scalar if `x` is a scalar.\n\n Notes", - "repo_full_name": "apache/mxnet", - "discussion_comments": [ - { - "comment_id": "720920133", - "repo_full_name": "apache/mxnet", - "pr_number": 20592, - "pr_file": "python/mxnet/numpy/multiarray.py", - "discussion_id": "720920133", - "commented_code": "@@ -4309,6 +4493,56 @@ def arctan(x, out=None, **kwargs):\n \"\"\"\n return _mx_nd_np.arctan(x, out=out, **kwargs)\n \n+atan = arctan\n+atan.__doc__ = \"\"\"\n+ Trigonometric inverse tangent, element-wise.\n+ The inverse of tan, so that if ``y = tan(x)`` then ``x = atan(y)``.\n+\n+ Notes\n+ ---------\n+ `atan` is a alias for `arctan`. It is a standard API in\n+ https://data-apis.org/array-api/latest/API_specification/elementwise_functions.html#atan-x\n+ instead of an official NumPy operator.\n+ \n+ >>>np.atan is np.arctan\n+ True\n+ \n+ Parameters\n+ ----------\n+ x : ndarray or scalar\n+ Input values.\n+ out : ndarray or None, optional\n+ A location into which the result is stored. If provided, it must have\n+ a shape that the inputs broadcast to. If not provided or `None`,\n+ a freshly-allocated array is returned.\n+\n+ Returns\n+ -------\n+ out : ndarray or scalar\n+ Out has the same shape as `x`. It lies is in\n+ ``[-pi/2, pi/2]`` (``atan(+/-inf)`` returns ``+/-pi/2``).\n+ This is a scalar if `x` is a scalar.\n+\n+ Notes", - "comment_created_at": "2021-10-03T22:43:24+00:00", - "comment_author": "barry-jin", - "comment_body": "Could you append a note here to elaborate that atan is a standardized API and is equivelant to arctan and also provide the reference. ", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "720922965", - "pr_number": 20592, - "pr_file": "python/mxnet/numpy/multiarray.py", - "created_at": "2021-10-03T22:48:00+00:00", - "commented_code": "\"\"\"\n return _mx_nd_np.dsplit(ary, indices_or_sections)\n\n@set_module('mxnet.numpy')\ndef concat(seq, axis=0, out=None):\n \"\"\"Join a sequence of arrays along an existing axis.\n\n Parameters\n ----------\n a1, a2, ... : sequence of array_like\n The arrays must have the same shape, except in the dimension\n corresponding to `axis` (the first, by default).\n axis : int, optional\n The axis along which the arrays will be joined. If axis is None,\n arrays are flattened before use. Default is 0.\n out : ndarray, optional\n If provided, the destination to place the result. The shape must be\n correct, matching that of what concatenate would have returned if no\n out argument were specified.\n\n Returns\n -------\n res : ndarray\n The concatenated array.\n\n See Also\n --------\n split : Split array into a list of multiple sub-arrays of equal size.\n hsplit : Split array into multiple sub-arrays horizontally (column wise)\n vsplit : Split array into multiple sub-arrays vertically (row wise)\n dsplit : Split array into multiple sub-arrays along the 3rd axis (depth).\n stack : Stack a sequence of arrays along a new axis.\n hstack : Stack arrays in sequence horizontally (column wise)\n vstack : Stack arrays in sequence vertically (row wise)\n dstack : Stack arrays in sequence depth wise (along third dimension)\n\n Examples\n --------\n >>> a = np.array([[1, 2], [3, 4]])\n >>> b = np.array([[5, 6]])\n >>> np.concat((a, b), axis=0)\n array([[1., 2.],\n [3., 4.],\n [5., 6.]])\n\n >>> np.concat((a, b.T), axis=1)\n array([[1., 2., 5.],\n [3., 4., 6.]])", - "repo_full_name": "apache/mxnet", - "discussion_comments": [ - { - "comment_id": "720922965", - "repo_full_name": "apache/mxnet", - "pr_number": 20592, - "pr_file": "python/mxnet/numpy/multiarray.py", - "discussion_id": "720922965", - "commented_code": "@@ -6533,6 +6972,56 @@ def dsplit(ary, indices_or_sections):\n \"\"\"\n return _mx_nd_np.dsplit(ary, indices_or_sections)\n \n+@set_module('mxnet.numpy')\n+def concat(seq, axis=0, out=None):\n+ \"\"\"Join a sequence of arrays along an existing axis.\n+\n+ Parameters\n+ ----------\n+ a1, a2, ... : sequence of array_like\n+ The arrays must have the same shape, except in the dimension\n+ corresponding to `axis` (the first, by default).\n+ axis : int, optional\n+ The axis along which the arrays will be joined. If axis is None,\n+ arrays are flattened before use. Default is 0.\n+ out : ndarray, optional\n+ If provided, the destination to place the result. The shape must be\n+ correct, matching that of what concatenate would have returned if no\n+ out argument were specified.\n+\n+ Returns\n+ -------\n+ res : ndarray\n+ The concatenated array.\n+\n+ See Also\n+ --------\n+ split : Split array into a list of multiple sub-arrays of equal size.\n+ hsplit : Split array into multiple sub-arrays horizontally (column wise)\n+ vsplit : Split array into multiple sub-arrays vertically (row wise)\n+ dsplit : Split array into multiple sub-arrays along the 3rd axis (depth).\n+ stack : Stack a sequence of arrays along a new axis.\n+ hstack : Stack arrays in sequence horizontally (column wise)\n+ vstack : Stack arrays in sequence vertically (row wise)\n+ dstack : Stack arrays in sequence depth wise (along third dimension)\n+\n+ Examples\n+ --------\n+ >>> a = np.array([[1, 2], [3, 4]])\n+ >>> b = np.array([[5, 6]])\n+ >>> np.concat((a, b), axis=0)\n+ array([[1., 2.],\n+ [3., 4.],\n+ [5., 6.]])\n+\n+ >>> np.concat((a, b.T), axis=1)\n+ array([[1., 2., 5.],\n+ [3., 4., 6.]])\n+", - "comment_created_at": "2021-10-03T22:48:00+00:00", - "comment_author": "barry-jin", - "comment_body": "Add note here to elaborate that concat is a standardized API and is equivelant to concatenate and also provide the reference. ", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/mxnet-consistent-api-documentation.json b/_reviewers/mxnet-consistent-api-documentation.json new file mode 100644 index 0000000..4d894fa --- /dev/null +++ b/_reviewers/mxnet-consistent-api-documentation.json @@ -0,0 +1,46 @@ +[ + { + "discussion_id": "694324561", + "pr_number": 20473, + "pr_file": "docs/python_docs/python/tutorials/getting-started/gluon_migration_guide.md", + "created_at": "2021-08-23T21:33:34+00:00", + "commented_code": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n# Gluon2.0: Migration Guide\n\n## Overview\nSince the introduction of the Gluon API in MXNet 1.x, it has superceded commonly used symbolic, module and model APIs for model development. In fact, Gluon was the first in deep learning community to unify the flexibility of imperative programming with the performance benefits of symbolic programming, through just-in-time compilation. \n\nIn Gluon2.0, we extend the support to MXNet numpy and numpy extension with simplified interface and new functionalities: \n\n- **Simplified hybridization with deferred compute and tracing**: Deferred compute allows the imperative execution to be used for graph construction, which allows us to unify the historic divergence of NDArray and Symbol. Hybridization now works in a simplified hybrid forward interface. Users only need to specify the computation through imperative programming. Hybridization also works through tracing, i.e. tracing the data flow of the first input data to create graph.\n\n- **Data 2.0**: The new design for data loading in Gluon allows hybridizing and deploying data processing pipeline in the same way as model hybridization. The new C++ data loader improves data loading efficiency on CIFAR 10 by 50%.\n\n- **Distributed 2.0**: The new distributed-training design in Gluon 2.0 provides a unified distributed data parallel interface across native Parameter Server, BytePS, and Horovod, and is extensible for supporting custom distributed training libraries.\n\n- **Gluon Probability**: parameterizable probability distributions and sampling functions to facilitate more areas of research such as Baysian methods and AutoML.\n\n- **Gluon Metrics** and **Optimizers**: refactored with MXNet numpy interface and addressed legacy issues.\n\nAdopting these new functionalities may or may not require modifications on your models. But don't worry, this migration guide will go through a high-level mapping from old functionality to new APIs and make Gluon2.0 migration a hassle-free experience. \n\n## Data Pipeline\n**What's new**: In Gluon2.0, `MultithreadingDataLoader` is introduced to speed up the data loading pipeline. It will use the pure MXNet C++ implementation of dataloader, datasets and batchify functions. So, you can use either MXNet internal multithreading mode dataloader or python multiprocessing mode dataloader in Gluon2.0. \n\n**Migration Guide**: Users can continue with the traditional gluon.data.Dataloader and the C++ backend will be applied automatically. \n\n[Gluon2.0 dataloader](../../api/gluon/data/index.rst#mxnet.gluon.data.DataLoader) will provide a new parameter called `try_nopython`. This parameter takes default value of None; when set to `True` the dataloader will compile python dataloading pipeline into pure MXNet c++ implementation. The compilation is not guaranteed to support all use cases, but it will fallback to python in case of failure: \n\n- The dataset is not fully [supported by backend](../../api/gluon/data/index.rst#mxnet.gluon.data.Dataset)(e.g., there are custom python datasets).\n\n- Transform is not fully hybridizable. \n\n- Bachify is not fully [supported by backend](https://github.com/apache/incubator-mxnet/blob/master/python/mxnet/gluon/data/batchify.py). \n\n\nYou can refer to [Step5 in Crash Course](https://mxnet.apache.org/versions/master/api/python/docs/tutorials/getting-started/crash-course/5-datasets.html#New-in-MXNet-2.0:-faster-C++-backend-dataloaders) for a detailed performance increase with C++ backend. \n## Modeling\nIn Gluon2.0, users will have a brand new modeling experience with NumPy-compatible APIs and deferred compute mechanism. \n\n- **NumPy-compatible programing experience**: users can build their models with MXNet implementation with NumPy array library, NumPy-compatible math operators and some neural network extension operators. \n\n- **Imperative-only coding experience**: with deferred compute and tracing being introduced, users only need to specify the computation through imperative coding but can still make hybridization work. Users will no longer need to interact with symbol APIs. \n\nTo help users migrate smoothly to use these simplified interface, we will provide the following guidance on how to replace legacy operators with NumPy-compatible operators, how to build models with `forward` instead of `hybrid_forward` and how to use `Parameter` class to register your parameters. \n\n\n### NumPy-compatible Programming Experience\n#### NumPy Arrays\nMXNet [NumPy ndarray(i.e. `mx.np.ndarray`)](../../api/np/arrays.ndarray.html) is a multidimensional container of items of the same type and size. Most of its properties and attributes are the same as legacy NDArrays(i.e. `mx.nd.ndarray`), so users can use NumPy array library just as they did with legacy NDArrays. But, there are still some changes and deprecations that needs attention, as mentioned below. \n**Migration Guide**: \n\n1. Currently, NumPy ndarray only supports `default` storage type, other storage types, like `row_sparse`, `csr` are not supported. Also, `tostype()` attribute is deprecated. \n\n2. Users can use `as_np_ndarray` attribute to switch from a legacy NDArray to NumPy ndarray just like this:\n ```{.python}\n import mxnet as mx\n nd_array = mx.ones((5,3))\n np_array = nd_array.as_np_ndarray()\n ```\n\n3. Compared with legacy NDArray, some attributes are deprecated in NumPy ndarray. Listed below are some of the deprecated APIs and their corresponding replacements in NumPy ndarray, others can be found in [**Appendix/NumPy Array Deprecated Attributes**](#NumPy-Array-Deprecated-Attributes).\n | Deprecated Attributes | NumPy ndarray Equivalent |\n | ----------------------------------------------------- | ------------------------------ |\n | `a.asscalar()` | `a.item()` |\n | `a.as_in_context()` | `a.as_in_ctx()` |\n | `a.context` | `a.ctx` |\n | `a.reshape_like(b)` | `a.reshape(b.shape)` |\n | `a.zeros_like(b)` | `mx.np.zeros_like(b)` |\n | `a.ones_like(b)` | `mx.np.ones_like(b)` |\n\n4. Compared with legacy NDArray, some attributes will have different behaviors and take different inputs. \n | Attribute | Legacy Inputs | NumPy Inputs |\n | ----------------------------- | ------------------------ | -------- |\n | `a.reshape(*args, **kwargs)` | **shape**: Some dimensions of the shape can take special values from the set {0, -1, -2, -3, -4}.
The significance of each is explained below:
``0`` copy this dimension from the input to the output shape.
``-1`` infers the dimension of the output shape by using the remainder of the input dimensions.
``-2`` copy all/remainder of the input dimensions to the output shape.
``-3`` use the product of two consecutive dimensions of the input shape as the output dimension.
``-4`` split one dimension of the input into two dimensions passed subsequent to -4 in shape (can contain -1).
**reverse**: If set to 1, then the special values are inferred from right to left | **shape**: shape parameter will be **positional argument** rather than key-word argument.
Some dimensions of the shape can take special values from the set {-1, -2, -3, -4, -5, -6}.
The significance of each is explained below:
``-1`` infers the dimension of the output shape by using the remainder of the input dimensions.
``-2`` copy this dimension from the input to the output shape.
``-3`` will skip current dimension if and only if the current dim size is one.
``-4`` copy all remain of the input dimensions to the output shape.
``-5`` use the product of two consecutive dimensions of the input shape as the output.
``-6`` split one dimension of the input into two dimensions passed subsequent to -6 in the new shape.
**reverse**: No **reverse** parameter for `np.reshape` but for `npx.reshape`.
**order**: Read the elements of `a` using this index order, and place the elements into the reshaped array using this index order. |\n\n\n#### NumPy and NumPy-extension Operators\nMost of the legacy NDArray operators(`mx.nd.op`) have the equivalent ones in np/npx namespace, users can just repalce them with `mx.np.op` or `mx.npx.op` to migrate. Some of the operators will have different inputs and behaviors as listed in the table below.", + "repo_full_name": "apache/mxnet", + "discussion_comments": [ + { + "comment_id": "694324561", + "repo_full_name": "apache/mxnet", + "pr_number": 20473, + "pr_file": "docs/python_docs/python/tutorials/getting-started/gluon_migration_guide.md", + "discussion_id": "694324561", + "commented_code": "@@ -0,0 +1,453 @@\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+# Gluon2.0: Migration Guide\n+\n+## Overview\n+Since the introduction of the Gluon API in MXNet 1.x, it has superceded commonly used symbolic, module and model APIs for model development. In fact, Gluon was the first in deep learning community to unify the flexibility of imperative programming with the performance benefits of symbolic programming, through just-in-time compilation. \n+\n+In Gluon2.0, we extend the support to MXNet numpy and numpy extension with simplified interface and new functionalities: \n+\n+- **Simplified hybridization with deferred compute and tracing**: Deferred compute allows the imperative execution to be used for graph construction, which allows us to unify the historic divergence of NDArray and Symbol. Hybridization now works in a simplified hybrid forward interface. Users only need to specify the computation through imperative programming. Hybridization also works through tracing, i.e. tracing the data flow of the first input data to create graph.\n+\n+- **Data 2.0**: The new design for data loading in Gluon allows hybridizing and deploying data processing pipeline in the same way as model hybridization. The new C++ data loader improves data loading efficiency on CIFAR 10 by 50%.\n+\n+- **Distributed 2.0**: The new distributed-training design in Gluon 2.0 provides a unified distributed data parallel interface across native Parameter Server, BytePS, and Horovod, and is extensible for supporting custom distributed training libraries.\n+\n+- **Gluon Probability**: parameterizable probability distributions and sampling functions to facilitate more areas of research such as Baysian methods and AutoML.\n+\n+- **Gluon Metrics** and **Optimizers**: refactored with MXNet numpy interface and addressed legacy issues.\n+\n+Adopting these new functionalities may or may not require modifications on your models. But don't worry, this migration guide will go through a high-level mapping from old functionality to new APIs and make Gluon2.0 migration a hassle-free experience. \n+\n+## Data Pipeline\n+**What's new**: In Gluon2.0, `MultithreadingDataLoader` is introduced to speed up the data loading pipeline. It will use the pure MXNet C++ implementation of dataloader, datasets and batchify functions. So, you can use either MXNet internal multithreading mode dataloader or python multiprocessing mode dataloader in Gluon2.0. \n+\n+**Migration Guide**: Users can continue with the traditional gluon.data.Dataloader and the C++ backend will be applied automatically. \n+\n+[Gluon2.0 dataloader](../../api/gluon/data/index.rst#mxnet.gluon.data.DataLoader) will provide a new parameter called `try_nopython`. This parameter takes default value of None; when set to `True` the dataloader will compile python dataloading pipeline into pure MXNet c++ implementation. The compilation is not guaranteed to support all use cases, but it will fallback to python in case of failure: \n+\n+- The dataset is not fully [supported by backend](../../api/gluon/data/index.rst#mxnet.gluon.data.Dataset)(e.g., there are custom python datasets).\n+\n+- Transform is not fully hybridizable. \n+\n+- Bachify is not fully [supported by backend](https://github.com/apache/incubator-mxnet/blob/master/python/mxnet/gluon/data/batchify.py). \n+\n+\n+You can refer to [Step5 in Crash Course](https://mxnet.apache.org/versions/master/api/python/docs/tutorials/getting-started/crash-course/5-datasets.html#New-in-MXNet-2.0:-faster-C++-backend-dataloaders) for a detailed performance increase with C++ backend. \n+## Modeling\n+In Gluon2.0, users will have a brand new modeling experience with NumPy-compatible APIs and deferred compute mechanism. \n+\n+- **NumPy-compatible programing experience**: users can build their models with MXNet implementation with NumPy array library, NumPy-compatible math operators and some neural network extension operators. \n+\n+- **Imperative-only coding experience**: with deferred compute and tracing being introduced, users only need to specify the computation through imperative coding but can still make hybridization work. Users will no longer need to interact with symbol APIs. \n+\n+To help users migrate smoothly to use these simplified interface, we will provide the following guidance on how to replace legacy operators with NumPy-compatible operators, how to build models with `forward` instead of `hybrid_forward` and how to use `Parameter` class to register your parameters. \n+\n+\n+### NumPy-compatible Programming Experience\n+#### NumPy Arrays\n+MXNet [NumPy ndarray(i.e. `mx.np.ndarray`)](../../api/np/arrays.ndarray.html) is a multidimensional container of items of the same type and size. Most of its properties and attributes are the same as legacy NDArrays(i.e. `mx.nd.ndarray`), so users can use NumPy array library just as they did with legacy NDArrays. But, there are still some changes and deprecations that needs attention, as mentioned below. \n+**Migration Guide**: \n+\n+1. Currently, NumPy ndarray only supports `default` storage type, other storage types, like `row_sparse`, `csr` are not supported. Also, `tostype()` attribute is deprecated. \n+\n+2. Users can use `as_np_ndarray` attribute to switch from a legacy NDArray to NumPy ndarray just like this:\n+ ```{.python}\n+ import mxnet as mx\n+ nd_array = mx.ones((5,3))\n+ np_array = nd_array.as_np_ndarray()\n+ ```\n+\n+3. Compared with legacy NDArray, some attributes are deprecated in NumPy ndarray. Listed below are some of the deprecated APIs and their corresponding replacements in NumPy ndarray, others can be found in [**Appendix/NumPy Array Deprecated Attributes**](#NumPy-Array-Deprecated-Attributes).\n+ | Deprecated Attributes | NumPy ndarray Equivalent |\n+ | ----------------------------------------------------- | ------------------------------ |\n+ | `a.asscalar()` | `a.item()` |\n+ | `a.as_in_context()` | `a.as_in_ctx()` |\n+ | `a.context` | `a.ctx` |\n+ | `a.reshape_like(b)` | `a.reshape(b.shape)` |\n+ | `a.zeros_like(b)` | `mx.np.zeros_like(b)` |\n+ | `a.ones_like(b)` | `mx.np.ones_like(b)` |\n+\n+4. Compared with legacy NDArray, some attributes will have different behaviors and take different inputs. \n+ | Attribute | Legacy Inputs | NumPy Inputs |\n+ | ----------------------------- | ------------------------ | -------- |\n+ | `a.reshape(*args, **kwargs)` | **shape**: Some dimensions of the shape can take special values from the set {0, -1, -2, -3, -4}.
The significance of each is explained below:
``0`` copy this dimension from the input to the output shape.
``-1`` infers the dimension of the output shape by using the remainder of the input dimensions.
``-2`` copy all/remainder of the input dimensions to the output shape.
``-3`` use the product of two consecutive dimensions of the input shape as the output dimension.
``-4`` split one dimension of the input into two dimensions passed subsequent to -4 in shape (can contain -1).
**reverse**: If set to 1, then the special values are inferred from right to left | **shape**: shape parameter will be **positional argument** rather than key-word argument.
Some dimensions of the shape can take special values from the set {-1, -2, -3, -4, -5, -6}.
The significance of each is explained below:
``-1`` infers the dimension of the output shape by using the remainder of the input dimensions.
``-2`` copy this dimension from the input to the output shape.
``-3`` will skip current dimension if and only if the current dim size is one.
``-4`` copy all remain of the input dimensions to the output shape.
``-5`` use the product of two consecutive dimensions of the input shape as the output.
``-6`` split one dimension of the input into two dimensions passed subsequent to -6 in the new shape.
**reverse**: No **reverse** parameter for `np.reshape` but for `npx.reshape`.
**order**: Read the elements of `a` using this index order, and place the elements into the reshaped array using this index order. |\n+\n+\n+#### NumPy and NumPy-extension Operators\n+Most of the legacy NDArray operators(`mx.nd.op`) have the equivalent ones in np/npx namespace, users can just repalce them with `mx.np.op` or `mx.npx.op` to migrate. Some of the operators will have different inputs and behaviors as listed in the table below. ", + "comment_created_at": "2021-08-23T21:33:34+00:00", + "comment_author": "szha", + "comment_body": "```suggestion\r\nMost of the legacy NDArray operators(`mx.nd.op`) have the equivalent ones in np/npx namespace. Users can just replace them with `mx.np.op` or `mx.npx.op` to migrate. Some of the operators will have different inputs and behaviors as listed in the table below. \r\n```", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "694324761", + "pr_number": 20473, + "pr_file": "docs/python_docs/python/tutorials/getting-started/gluon_migration_guide.md", + "created_at": "2021-08-23T21:34:00+00:00", + "commented_code": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n# Gluon2.0: Migration Guide\n\n## Overview\nSince the introduction of the Gluon API in MXNet 1.x, it has superceded commonly used symbolic, module and model APIs for model development. In fact, Gluon was the first in deep learning community to unify the flexibility of imperative programming with the performance benefits of symbolic programming, through just-in-time compilation. \n\nIn Gluon2.0, we extend the support to MXNet numpy and numpy extension with simplified interface and new functionalities: \n\n- **Simplified hybridization with deferred compute and tracing**: Deferred compute allows the imperative execution to be used for graph construction, which allows us to unify the historic divergence of NDArray and Symbol. Hybridization now works in a simplified hybrid forward interface. Users only need to specify the computation through imperative programming. Hybridization also works through tracing, i.e. tracing the data flow of the first input data to create graph.\n\n- **Data 2.0**: The new design for data loading in Gluon allows hybridizing and deploying data processing pipeline in the same way as model hybridization. The new C++ data loader improves data loading efficiency on CIFAR 10 by 50%.\n\n- **Distributed 2.0**: The new distributed-training design in Gluon 2.0 provides a unified distributed data parallel interface across native Parameter Server, BytePS, and Horovod, and is extensible for supporting custom distributed training libraries.\n\n- **Gluon Probability**: parameterizable probability distributions and sampling functions to facilitate more areas of research such as Baysian methods and AutoML.\n\n- **Gluon Metrics** and **Optimizers**: refactored with MXNet numpy interface and addressed legacy issues.\n\nAdopting these new functionalities may or may not require modifications on your models. But don't worry, this migration guide will go through a high-level mapping from old functionality to new APIs and make Gluon2.0 migration a hassle-free experience. \n\n## Data Pipeline\n**What's new**: In Gluon2.0, `MultithreadingDataLoader` is introduced to speed up the data loading pipeline. It will use the pure MXNet C++ implementation of dataloader, datasets and batchify functions. So, you can use either MXNet internal multithreading mode dataloader or python multiprocessing mode dataloader in Gluon2.0. \n\n**Migration Guide**: Users can continue with the traditional gluon.data.Dataloader and the C++ backend will be applied automatically. \n\n[Gluon2.0 dataloader](../../api/gluon/data/index.rst#mxnet.gluon.data.DataLoader) will provide a new parameter called `try_nopython`. This parameter takes default value of None; when set to `True` the dataloader will compile python dataloading pipeline into pure MXNet c++ implementation. The compilation is not guaranteed to support all use cases, but it will fallback to python in case of failure: \n\n- The dataset is not fully [supported by backend](../../api/gluon/data/index.rst#mxnet.gluon.data.Dataset)(e.g., there are custom python datasets).\n\n- Transform is not fully hybridizable. \n\n- Bachify is not fully [supported by backend](https://github.com/apache/incubator-mxnet/blob/master/python/mxnet/gluon/data/batchify.py). \n\n\nYou can refer to [Step5 in Crash Course](https://mxnet.apache.org/versions/master/api/python/docs/tutorials/getting-started/crash-course/5-datasets.html#New-in-MXNet-2.0:-faster-C++-backend-dataloaders) for a detailed performance increase with C++ backend. \n## Modeling\nIn Gluon2.0, users will have a brand new modeling experience with NumPy-compatible APIs and deferred compute mechanism. \n\n- **NumPy-compatible programing experience**: users can build their models with MXNet implementation with NumPy array library, NumPy-compatible math operators and some neural network extension operators. \n\n- **Imperative-only coding experience**: with deferred compute and tracing being introduced, users only need to specify the computation through imperative coding but can still make hybridization work. Users will no longer need to interact with symbol APIs. \n\nTo help users migrate smoothly to use these simplified interface, we will provide the following guidance on how to replace legacy operators with NumPy-compatible operators, how to build models with `forward` instead of `hybrid_forward` and how to use `Parameter` class to register your parameters. \n\n\n### NumPy-compatible Programming Experience\n#### NumPy Arrays\nMXNet [NumPy ndarray(i.e. `mx.np.ndarray`)](../../api/np/arrays.ndarray.html) is a multidimensional container of items of the same type and size. Most of its properties and attributes are the same as legacy NDArrays(i.e. `mx.nd.ndarray`), so users can use NumPy array library just as they did with legacy NDArrays. But, there are still some changes and deprecations that needs attention, as mentioned below. \n**Migration Guide**: \n\n1. Currently, NumPy ndarray only supports `default` storage type, other storage types, like `row_sparse`, `csr` are not supported. Also, `tostype()` attribute is deprecated. \n\n2. Users can use `as_np_ndarray` attribute to switch from a legacy NDArray to NumPy ndarray just like this:\n ```{.python}\n import mxnet as mx\n nd_array = mx.ones((5,3))\n np_array = nd_array.as_np_ndarray()\n ```\n\n3. Compared with legacy NDArray, some attributes are deprecated in NumPy ndarray. Listed below are some of the deprecated APIs and their corresponding replacements in NumPy ndarray, others can be found in [**Appendix/NumPy Array Deprecated Attributes**](#NumPy-Array-Deprecated-Attributes).\n | Deprecated Attributes | NumPy ndarray Equivalent |\n | ----------------------------------------------------- | ------------------------------ |\n | `a.asscalar()` | `a.item()` |\n | `a.as_in_context()` | `a.as_in_ctx()` |\n | `a.context` | `a.ctx` |\n | `a.reshape_like(b)` | `a.reshape(b.shape)` |\n | `a.zeros_like(b)` | `mx.np.zeros_like(b)` |\n | `a.ones_like(b)` | `mx.np.ones_like(b)` |\n\n4. Compared with legacy NDArray, some attributes will have different behaviors and take different inputs. \n | Attribute | Legacy Inputs | NumPy Inputs |\n | ----------------------------- | ------------------------ | -------- |\n | `a.reshape(*args, **kwargs)` | **shape**: Some dimensions of the shape can take special values from the set {0, -1, -2, -3, -4}.
The significance of each is explained below:
``0`` copy this dimension from the input to the output shape.
``-1`` infers the dimension of the output shape by using the remainder of the input dimensions.
``-2`` copy all/remainder of the input dimensions to the output shape.
``-3`` use the product of two consecutive dimensions of the input shape as the output dimension.
``-4`` split one dimension of the input into two dimensions passed subsequent to -4 in shape (can contain -1).
**reverse**: If set to 1, then the special values are inferred from right to left | **shape**: shape parameter will be **positional argument** rather than key-word argument.
Some dimensions of the shape can take special values from the set {-1, -2, -3, -4, -5, -6}.
The significance of each is explained below:
``-1`` infers the dimension of the output shape by using the remainder of the input dimensions.
``-2`` copy this dimension from the input to the output shape.
``-3`` will skip current dimension if and only if the current dim size is one.
``-4`` copy all remain of the input dimensions to the output shape.
``-5`` use the product of two consecutive dimensions of the input shape as the output.
``-6`` split one dimension of the input into two dimensions passed subsequent to -6 in the new shape.
**reverse**: No **reverse** parameter for `np.reshape` but for `npx.reshape`.
**order**: Read the elements of `a` using this index order, and place the elements into the reshaped array using this index order. |\n\n\n#### NumPy and NumPy-extension Operators\nMost of the legacy NDArray operators(`mx.nd.op`) have the equivalent ones in np/npx namespace, users can just repalce them with `mx.np.op` or `mx.npx.op` to migrate. Some of the operators will have different inputs and behaviors as listed in the table below. \n**Migration Guide**:\n\n1. Operators migration with name/inputs changes\n | Legacy Operators | NumPy Operators Equivalent | Changes |\n | ----------------------------------------------------- | ------------------------------ | ------------------- |\n | `mx.nd.flatten(*args, **kwargs)` | `mx.npx.batch_flatten(*args, **kwargs)` | moved to `npx` namespace with new name `batch_flatten` |\n | `mx.nd.concat(a, b, c)` | `mx.np.concatenate([a, b, c])` | - moved to `np` namespace with new name `concatenate`.
- use list of ndarrays as input rather than positional ndarrays |\n | `mx.nd.stack(a, b, c)` | `mx.np.stack([a, b, c])` | - moved to `np` namespace.
- use list of ndarrays as input rather than positional ndarrays |\n | `mx.nd.SliceChannel(*args, **kwargs)` | `mx.npx.slice_channel(*args, **kwargs)` | - moved to `npx` namespace with new name `slice_channel`. |\n | `mx.nd.FullyConnected(*args, **kwargs)` | `mx.npx.fully_connected(*args, **kwargs)` | - moved to `npx` namespace with new name `fully_connected`. |\n | `mx.nd.Activation(*args, **kwargs)` | `mx.npx.activation(*args, **kwargs)` | - moved to `npx` namespace with new name `activation`. |\n | `mx.nd.Activation(*args, **kwargs)` | `mx.npx.activation(*args, **kwargs)` | - moved to `npx` namespace with new name `activation`. |\n | `mx.nd.elemwise_add(a, b)` | `a + b` | - Just use ndarray python operator. |\n | `mx.nd.elemwise_mul(a, b)` | `mx.np.multiply(a, b)` | - Use `multiply` operator in `np` namespace. |\n\n2. Operators migration with multiple steps: `mx.nd.mean` -> `mx.np.mean`:\n```{.python}\nimport mxnet as mx\n# Legacy: calculate mean value with reduction on axis 1\n# with `exclude` option on \nnd_mean = mx.nd.mean(data, axis=1, exclude=1)\n\n# Numpy: no exclude option to users, but user can perform steps as follow\naxes = list(range(data.ndim))\ndel axes[1]\nnp_mean = mx.np.mean(data, axis=axes)\n```\n\n3. Random Operators\n | Legacy Operators | NumPy Operators Equivalent | Changes |\n | ----------------------------------------------------- | ------------------------------ | ---------------------------- |\n | `mx.random.uniform(-1.0, 1.0, shape=(2, 3))`
`mx.nd.random.uniform(-1.0, 1.0, shape=(2, 3))` | `mx.np.random.uniform(-1.0, 1.0, size=(2, 3))` | For all the NumPy random operators, use **size** key word instead of **shape** |", + "repo_full_name": "apache/mxnet", + "discussion_comments": [ + { + "comment_id": "694324761", + "repo_full_name": "apache/mxnet", + "pr_number": 20473, + "pr_file": "docs/python_docs/python/tutorials/getting-started/gluon_migration_guide.md", + "discussion_id": "694324761", + "commented_code": "@@ -0,0 +1,453 @@\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+# Gluon2.0: Migration Guide\n+\n+## Overview\n+Since the introduction of the Gluon API in MXNet 1.x, it has superceded commonly used symbolic, module and model APIs for model development. In fact, Gluon was the first in deep learning community to unify the flexibility of imperative programming with the performance benefits of symbolic programming, through just-in-time compilation. \n+\n+In Gluon2.0, we extend the support to MXNet numpy and numpy extension with simplified interface and new functionalities: \n+\n+- **Simplified hybridization with deferred compute and tracing**: Deferred compute allows the imperative execution to be used for graph construction, which allows us to unify the historic divergence of NDArray and Symbol. Hybridization now works in a simplified hybrid forward interface. Users only need to specify the computation through imperative programming. Hybridization also works through tracing, i.e. tracing the data flow of the first input data to create graph.\n+\n+- **Data 2.0**: The new design for data loading in Gluon allows hybridizing and deploying data processing pipeline in the same way as model hybridization. The new C++ data loader improves data loading efficiency on CIFAR 10 by 50%.\n+\n+- **Distributed 2.0**: The new distributed-training design in Gluon 2.0 provides a unified distributed data parallel interface across native Parameter Server, BytePS, and Horovod, and is extensible for supporting custom distributed training libraries.\n+\n+- **Gluon Probability**: parameterizable probability distributions and sampling functions to facilitate more areas of research such as Baysian methods and AutoML.\n+\n+- **Gluon Metrics** and **Optimizers**: refactored with MXNet numpy interface and addressed legacy issues.\n+\n+Adopting these new functionalities may or may not require modifications on your models. But don't worry, this migration guide will go through a high-level mapping from old functionality to new APIs and make Gluon2.0 migration a hassle-free experience. \n+\n+## Data Pipeline\n+**What's new**: In Gluon2.0, `MultithreadingDataLoader` is introduced to speed up the data loading pipeline. It will use the pure MXNet C++ implementation of dataloader, datasets and batchify functions. So, you can use either MXNet internal multithreading mode dataloader or python multiprocessing mode dataloader in Gluon2.0. \n+\n+**Migration Guide**: Users can continue with the traditional gluon.data.Dataloader and the C++ backend will be applied automatically. \n+\n+[Gluon2.0 dataloader](../../api/gluon/data/index.rst#mxnet.gluon.data.DataLoader) will provide a new parameter called `try_nopython`. This parameter takes default value of None; when set to `True` the dataloader will compile python dataloading pipeline into pure MXNet c++ implementation. The compilation is not guaranteed to support all use cases, but it will fallback to python in case of failure: \n+\n+- The dataset is not fully [supported by backend](../../api/gluon/data/index.rst#mxnet.gluon.data.Dataset)(e.g., there are custom python datasets).\n+\n+- Transform is not fully hybridizable. \n+\n+- Bachify is not fully [supported by backend](https://github.com/apache/incubator-mxnet/blob/master/python/mxnet/gluon/data/batchify.py). \n+\n+\n+You can refer to [Step5 in Crash Course](https://mxnet.apache.org/versions/master/api/python/docs/tutorials/getting-started/crash-course/5-datasets.html#New-in-MXNet-2.0:-faster-C++-backend-dataloaders) for a detailed performance increase with C++ backend. \n+## Modeling\n+In Gluon2.0, users will have a brand new modeling experience with NumPy-compatible APIs and deferred compute mechanism. \n+\n+- **NumPy-compatible programing experience**: users can build their models with MXNet implementation with NumPy array library, NumPy-compatible math operators and some neural network extension operators. \n+\n+- **Imperative-only coding experience**: with deferred compute and tracing being introduced, users only need to specify the computation through imperative coding but can still make hybridization work. Users will no longer need to interact with symbol APIs. \n+\n+To help users migrate smoothly to use these simplified interface, we will provide the following guidance on how to replace legacy operators with NumPy-compatible operators, how to build models with `forward` instead of `hybrid_forward` and how to use `Parameter` class to register your parameters. \n+\n+\n+### NumPy-compatible Programming Experience\n+#### NumPy Arrays\n+MXNet [NumPy ndarray(i.e. `mx.np.ndarray`)](../../api/np/arrays.ndarray.html) is a multidimensional container of items of the same type and size. Most of its properties and attributes are the same as legacy NDArrays(i.e. `mx.nd.ndarray`), so users can use NumPy array library just as they did with legacy NDArrays. But, there are still some changes and deprecations that needs attention, as mentioned below. \n+**Migration Guide**: \n+\n+1. Currently, NumPy ndarray only supports `default` storage type, other storage types, like `row_sparse`, `csr` are not supported. Also, `tostype()` attribute is deprecated. \n+\n+2. Users can use `as_np_ndarray` attribute to switch from a legacy NDArray to NumPy ndarray just like this:\n+ ```{.python}\n+ import mxnet as mx\n+ nd_array = mx.ones((5,3))\n+ np_array = nd_array.as_np_ndarray()\n+ ```\n+\n+3. Compared with legacy NDArray, some attributes are deprecated in NumPy ndarray. Listed below are some of the deprecated APIs and their corresponding replacements in NumPy ndarray, others can be found in [**Appendix/NumPy Array Deprecated Attributes**](#NumPy-Array-Deprecated-Attributes).\n+ | Deprecated Attributes | NumPy ndarray Equivalent |\n+ | ----------------------------------------------------- | ------------------------------ |\n+ | `a.asscalar()` | `a.item()` |\n+ | `a.as_in_context()` | `a.as_in_ctx()` |\n+ | `a.context` | `a.ctx` |\n+ | `a.reshape_like(b)` | `a.reshape(b.shape)` |\n+ | `a.zeros_like(b)` | `mx.np.zeros_like(b)` |\n+ | `a.ones_like(b)` | `mx.np.ones_like(b)` |\n+\n+4. Compared with legacy NDArray, some attributes will have different behaviors and take different inputs. \n+ | Attribute | Legacy Inputs | NumPy Inputs |\n+ | ----------------------------- | ------------------------ | -------- |\n+ | `a.reshape(*args, **kwargs)` | **shape**: Some dimensions of the shape can take special values from the set {0, -1, -2, -3, -4}.
The significance of each is explained below:
``0`` copy this dimension from the input to the output shape.
``-1`` infers the dimension of the output shape by using the remainder of the input dimensions.
``-2`` copy all/remainder of the input dimensions to the output shape.
``-3`` use the product of two consecutive dimensions of the input shape as the output dimension.
``-4`` split one dimension of the input into two dimensions passed subsequent to -4 in shape (can contain -1).
**reverse**: If set to 1, then the special values are inferred from right to left | **shape**: shape parameter will be **positional argument** rather than key-word argument.
Some dimensions of the shape can take special values from the set {-1, -2, -3, -4, -5, -6}.
The significance of each is explained below:
``-1`` infers the dimension of the output shape by using the remainder of the input dimensions.
``-2`` copy this dimension from the input to the output shape.
``-3`` will skip current dimension if and only if the current dim size is one.
``-4`` copy all remain of the input dimensions to the output shape.
``-5`` use the product of two consecutive dimensions of the input shape as the output.
``-6`` split one dimension of the input into two dimensions passed subsequent to -6 in the new shape.
**reverse**: No **reverse** parameter for `np.reshape` but for `npx.reshape`.
**order**: Read the elements of `a` using this index order, and place the elements into the reshaped array using this index order. |\n+\n+\n+#### NumPy and NumPy-extension Operators\n+Most of the legacy NDArray operators(`mx.nd.op`) have the equivalent ones in np/npx namespace, users can just repalce them with `mx.np.op` or `mx.npx.op` to migrate. Some of the operators will have different inputs and behaviors as listed in the table below. \n+**Migration Guide**:\n+\n+1. Operators migration with name/inputs changes\n+ | Legacy Operators | NumPy Operators Equivalent | Changes |\n+ | ----------------------------------------------------- | ------------------------------ | ------------------- |\n+ | `mx.nd.flatten(*args, **kwargs)` | `mx.npx.batch_flatten(*args, **kwargs)` | moved to `npx` namespace with new name `batch_flatten` |\n+ | `mx.nd.concat(a, b, c)` | `mx.np.concatenate([a, b, c])` | - moved to `np` namespace with new name `concatenate`.
- use list of ndarrays as input rather than positional ndarrays |\n+ | `mx.nd.stack(a, b, c)` | `mx.np.stack([a, b, c])` | - moved to `np` namespace.
- use list of ndarrays as input rather than positional ndarrays |\n+ | `mx.nd.SliceChannel(*args, **kwargs)` | `mx.npx.slice_channel(*args, **kwargs)` | - moved to `npx` namespace with new name `slice_channel`. |\n+ | `mx.nd.FullyConnected(*args, **kwargs)` | `mx.npx.fully_connected(*args, **kwargs)` | - moved to `npx` namespace with new name `fully_connected`. |\n+ | `mx.nd.Activation(*args, **kwargs)` | `mx.npx.activation(*args, **kwargs)` | - moved to `npx` namespace with new name `activation`. |\n+ | `mx.nd.Activation(*args, **kwargs)` | `mx.npx.activation(*args, **kwargs)` | - moved to `npx` namespace with new name `activation`. |\n+ | `mx.nd.elemwise_add(a, b)` | `a + b` | - Just use ndarray python operator. |\n+ | `mx.nd.elemwise_mul(a, b)` | `mx.np.multiply(a, b)` | - Use `multiply` operator in `np` namespace. |\n+\n+2. Operators migration with multiple steps: `mx.nd.mean` -> `mx.np.mean`:\n+```{.python}\n+import mxnet as mx\n+# Legacy: calculate mean value with reduction on axis 1\n+# with `exclude` option on \n+nd_mean = mx.nd.mean(data, axis=1, exclude=1)\n+\n+# Numpy: no exclude option to users, but user can perform steps as follow\n+axes = list(range(data.ndim))\n+del axes[1]\n+np_mean = mx.np.mean(data, axis=axes)\n+```\n+\n+3. Random Operators\n+ | Legacy Operators | NumPy Operators Equivalent | Changes |\n+ | ----------------------------------------------------- | ------------------------------ | ---------------------------- |\n+ | `mx.random.uniform(-1.0, 1.0, shape=(2, 3))`
`mx.nd.random.uniform(-1.0, 1.0, shape=(2, 3))` | `mx.np.random.uniform(-1.0, 1.0, size=(2, 3))` | For all the NumPy random operators, use **size** key word instead of **shape** |", + "comment_created_at": "2021-08-23T21:34:00+00:00", + "comment_author": "szha", + "comment_body": "```suggestion\r\n | `mx.random.uniform(-1.0, 1.0, shape=(2, 3))`
`mx.nd.random.uniform(-1.0, 1.0, shape=(2, 3))` | `mx.np.random.uniform(-1.0, 1.0, size=(2, 3))` | For all the NumPy random operators, use **size** keyword instead of **shape** |\r\n```", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/mxnet-consistent-api-documentation.md b/_reviewers/mxnet-consistent-api-documentation.md index b0a3358..dff0716 100644 --- a/_reviewers/mxnet-consistent-api-documentation.md +++ b/_reviewers/mxnet-consistent-api-documentation.md @@ -49,51 +49,3 @@ shape : tuple, optional ``` Consistent documentation reduces developer confusion and improves API adoption. It serves as the primary interface between your API and its users, making it as important as the implementation itself. - - -[ - { - "discussion_id": "694324561", - "pr_number": 20473, - "pr_file": "docs/python_docs/python/tutorials/getting-started/gluon_migration_guide.md", - "created_at": "2021-08-23T21:33:34+00:00", - "commented_code": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n# Gluon2.0: Migration Guide\n\n## Overview\nSince the introduction of the Gluon API in MXNet 1.x, it has superceded commonly used symbolic, module and model APIs for model development. In fact, Gluon was the first in deep learning community to unify the flexibility of imperative programming with the performance benefits of symbolic programming, through just-in-time compilation. \n\nIn Gluon2.0, we extend the support to MXNet numpy and numpy extension with simplified interface and new functionalities: \n\n- **Simplified hybridization with deferred compute and tracing**: Deferred compute allows the imperative execution to be used for graph construction, which allows us to unify the historic divergence of NDArray and Symbol. Hybridization now works in a simplified hybrid forward interface. Users only need to specify the computation through imperative programming. Hybridization also works through tracing, i.e. tracing the data flow of the first input data to create graph.\n\n- **Data 2.0**: The new design for data loading in Gluon allows hybridizing and deploying data processing pipeline in the same way as model hybridization. The new C++ data loader improves data loading efficiency on CIFAR 10 by 50%.\n\n- **Distributed 2.0**: The new distributed-training design in Gluon 2.0 provides a unified distributed data parallel interface across native Parameter Server, BytePS, and Horovod, and is extensible for supporting custom distributed training libraries.\n\n- **Gluon Probability**: parameterizable probability distributions and sampling functions to facilitate more areas of research such as Baysian methods and AutoML.\n\n- **Gluon Metrics** and **Optimizers**: refactored with MXNet numpy interface and addressed legacy issues.\n\nAdopting these new functionalities may or may not require modifications on your models. But don't worry, this migration guide will go through a high-level mapping from old functionality to new APIs and make Gluon2.0 migration a hassle-free experience. \n\n## Data Pipeline\n**What's new**: In Gluon2.0, `MultithreadingDataLoader` is introduced to speed up the data loading pipeline. It will use the pure MXNet C++ implementation of dataloader, datasets and batchify functions. So, you can use either MXNet internal multithreading mode dataloader or python multiprocessing mode dataloader in Gluon2.0. \n\n**Migration Guide**: Users can continue with the traditional gluon.data.Dataloader and the C++ backend will be applied automatically. \n\n[Gluon2.0 dataloader](../../api/gluon/data/index.rst#mxnet.gluon.data.DataLoader) will provide a new parameter called `try_nopython`. This parameter takes default value of None; when set to `True` the dataloader will compile python dataloading pipeline into pure MXNet c++ implementation. The compilation is not guaranteed to support all use cases, but it will fallback to python in case of failure: \n\n- The dataset is not fully [supported by backend](../../api/gluon/data/index.rst#mxnet.gluon.data.Dataset)(e.g., there are custom python datasets).\n\n- Transform is not fully hybridizable. \n\n- Bachify is not fully [supported by backend](https://github.com/apache/incubator-mxnet/blob/master/python/mxnet/gluon/data/batchify.py). \n\n\nYou can refer to [Step5 in Crash Course](https://mxnet.apache.org/versions/master/api/python/docs/tutorials/getting-started/crash-course/5-datasets.html#New-in-MXNet-2.0:-faster-C++-backend-dataloaders) for a detailed performance increase with C++ backend. \n## Modeling\nIn Gluon2.0, users will have a brand new modeling experience with NumPy-compatible APIs and deferred compute mechanism. \n\n- **NumPy-compatible programing experience**: users can build their models with MXNet implementation with NumPy array library, NumPy-compatible math operators and some neural network extension operators. \n\n- **Imperative-only coding experience**: with deferred compute and tracing being introduced, users only need to specify the computation through imperative coding but can still make hybridization work. Users will no longer need to interact with symbol APIs. \n\nTo help users migrate smoothly to use these simplified interface, we will provide the following guidance on how to replace legacy operators with NumPy-compatible operators, how to build models with `forward` instead of `hybrid_forward` and how to use `Parameter` class to register your parameters. \n\n\n### NumPy-compatible Programming Experience\n#### NumPy Arrays\nMXNet [NumPy ndarray(i.e. `mx.np.ndarray`)](../../api/np/arrays.ndarray.html) is a multidimensional container of items of the same type and size. Most of its properties and attributes are the same as legacy NDArrays(i.e. `mx.nd.ndarray`), so users can use NumPy array library just as they did with legacy NDArrays. But, there are still some changes and deprecations that needs attention, as mentioned below. \n**Migration Guide**: \n\n1. Currently, NumPy ndarray only supports `default` storage type, other storage types, like `row_sparse`, `csr` are not supported. Also, `tostype()` attribute is deprecated. \n\n2. Users can use `as_np_ndarray` attribute to switch from a legacy NDArray to NumPy ndarray just like this:\n ```{.python}\n import mxnet as mx\n nd_array = mx.ones((5,3))\n np_array = nd_array.as_np_ndarray()\n ```\n\n3. Compared with legacy NDArray, some attributes are deprecated in NumPy ndarray. Listed below are some of the deprecated APIs and their corresponding replacements in NumPy ndarray, others can be found in [**Appendix/NumPy Array Deprecated Attributes**](#NumPy-Array-Deprecated-Attributes).\n | Deprecated Attributes | NumPy ndarray Equivalent |\n | ----------------------------------------------------- | ------------------------------ |\n | `a.asscalar()` | `a.item()` |\n | `a.as_in_context()` | `a.as_in_ctx()` |\n | `a.context` | `a.ctx` |\n | `a.reshape_like(b)` | `a.reshape(b.shape)` |\n | `a.zeros_like(b)` | `mx.np.zeros_like(b)` |\n | `a.ones_like(b)` | `mx.np.ones_like(b)` |\n\n4. Compared with legacy NDArray, some attributes will have different behaviors and take different inputs. \n | Attribute | Legacy Inputs | NumPy Inputs |\n | ----------------------------- | ------------------------ | -------- |\n | `a.reshape(*args, **kwargs)` | **shape**: Some dimensions of the shape can take special values from the set {0, -1, -2, -3, -4}.
The significance of each is explained below:
``0`` copy this dimension from the input to the output shape.
``-1`` infers the dimension of the output shape by using the remainder of the input dimensions.
``-2`` copy all/remainder of the input dimensions to the output shape.
``-3`` use the product of two consecutive dimensions of the input shape as the output dimension.
``-4`` split one dimension of the input into two dimensions passed subsequent to -4 in shape (can contain -1).
**reverse**: If set to 1, then the special values are inferred from right to left | **shape**: shape parameter will be **positional argument** rather than key-word argument.
Some dimensions of the shape can take special values from the set {-1, -2, -3, -4, -5, -6}.
The significance of each is explained below:
``-1`` infers the dimension of the output shape by using the remainder of the input dimensions.
``-2`` copy this dimension from the input to the output shape.
``-3`` will skip current dimension if and only if the current dim size is one.
``-4`` copy all remain of the input dimensions to the output shape.
``-5`` use the product of two consecutive dimensions of the input shape as the output.
``-6`` split one dimension of the input into two dimensions passed subsequent to -6 in the new shape.
**reverse**: No **reverse** parameter for `np.reshape` but for `npx.reshape`.
**order**: Read the elements of `a` using this index order, and place the elements into the reshaped array using this index order. |\n\n\n#### NumPy and NumPy-extension Operators\nMost of the legacy NDArray operators(`mx.nd.op`) have the equivalent ones in np/npx namespace, users can just repalce them with `mx.np.op` or `mx.npx.op` to migrate. Some of the operators will have different inputs and behaviors as listed in the table below.", - "repo_full_name": "apache/mxnet", - "discussion_comments": [ - { - "comment_id": "694324561", - "repo_full_name": "apache/mxnet", - "pr_number": 20473, - "pr_file": "docs/python_docs/python/tutorials/getting-started/gluon_migration_guide.md", - "discussion_id": "694324561", - "commented_code": "@@ -0,0 +1,453 @@\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+# Gluon2.0: Migration Guide\n+\n+## Overview\n+Since the introduction of the Gluon API in MXNet 1.x, it has superceded commonly used symbolic, module and model APIs for model development. In fact, Gluon was the first in deep learning community to unify the flexibility of imperative programming with the performance benefits of symbolic programming, through just-in-time compilation. \n+\n+In Gluon2.0, we extend the support to MXNet numpy and numpy extension with simplified interface and new functionalities: \n+\n+- **Simplified hybridization with deferred compute and tracing**: Deferred compute allows the imperative execution to be used for graph construction, which allows us to unify the historic divergence of NDArray and Symbol. Hybridization now works in a simplified hybrid forward interface. Users only need to specify the computation through imperative programming. Hybridization also works through tracing, i.e. tracing the data flow of the first input data to create graph.\n+\n+- **Data 2.0**: The new design for data loading in Gluon allows hybridizing and deploying data processing pipeline in the same way as model hybridization. The new C++ data loader improves data loading efficiency on CIFAR 10 by 50%.\n+\n+- **Distributed 2.0**: The new distributed-training design in Gluon 2.0 provides a unified distributed data parallel interface across native Parameter Server, BytePS, and Horovod, and is extensible for supporting custom distributed training libraries.\n+\n+- **Gluon Probability**: parameterizable probability distributions and sampling functions to facilitate more areas of research such as Baysian methods and AutoML.\n+\n+- **Gluon Metrics** and **Optimizers**: refactored with MXNet numpy interface and addressed legacy issues.\n+\n+Adopting these new functionalities may or may not require modifications on your models. But don't worry, this migration guide will go through a high-level mapping from old functionality to new APIs and make Gluon2.0 migration a hassle-free experience. \n+\n+## Data Pipeline\n+**What's new**: In Gluon2.0, `MultithreadingDataLoader` is introduced to speed up the data loading pipeline. It will use the pure MXNet C++ implementation of dataloader, datasets and batchify functions. So, you can use either MXNet internal multithreading mode dataloader or python multiprocessing mode dataloader in Gluon2.0. \n+\n+**Migration Guide**: Users can continue with the traditional gluon.data.Dataloader and the C++ backend will be applied automatically. \n+\n+[Gluon2.0 dataloader](../../api/gluon/data/index.rst#mxnet.gluon.data.DataLoader) will provide a new parameter called `try_nopython`. This parameter takes default value of None; when set to `True` the dataloader will compile python dataloading pipeline into pure MXNet c++ implementation. The compilation is not guaranteed to support all use cases, but it will fallback to python in case of failure: \n+\n+- The dataset is not fully [supported by backend](../../api/gluon/data/index.rst#mxnet.gluon.data.Dataset)(e.g., there are custom python datasets).\n+\n+- Transform is not fully hybridizable. \n+\n+- Bachify is not fully [supported by backend](https://github.com/apache/incubator-mxnet/blob/master/python/mxnet/gluon/data/batchify.py). \n+\n+\n+You can refer to [Step5 in Crash Course](https://mxnet.apache.org/versions/master/api/python/docs/tutorials/getting-started/crash-course/5-datasets.html#New-in-MXNet-2.0:-faster-C++-backend-dataloaders) for a detailed performance increase with C++ backend. \n+## Modeling\n+In Gluon2.0, users will have a brand new modeling experience with NumPy-compatible APIs and deferred compute mechanism. \n+\n+- **NumPy-compatible programing experience**: users can build their models with MXNet implementation with NumPy array library, NumPy-compatible math operators and some neural network extension operators. \n+\n+- **Imperative-only coding experience**: with deferred compute and tracing being introduced, users only need to specify the computation through imperative coding but can still make hybridization work. Users will no longer need to interact with symbol APIs. \n+\n+To help users migrate smoothly to use these simplified interface, we will provide the following guidance on how to replace legacy operators with NumPy-compatible operators, how to build models with `forward` instead of `hybrid_forward` and how to use `Parameter` class to register your parameters. \n+\n+\n+### NumPy-compatible Programming Experience\n+#### NumPy Arrays\n+MXNet [NumPy ndarray(i.e. `mx.np.ndarray`)](../../api/np/arrays.ndarray.html) is a multidimensional container of items of the same type and size. Most of its properties and attributes are the same as legacy NDArrays(i.e. `mx.nd.ndarray`), so users can use NumPy array library just as they did with legacy NDArrays. But, there are still some changes and deprecations that needs attention, as mentioned below. \n+**Migration Guide**: \n+\n+1. Currently, NumPy ndarray only supports `default` storage type, other storage types, like `row_sparse`, `csr` are not supported. Also, `tostype()` attribute is deprecated. \n+\n+2. Users can use `as_np_ndarray` attribute to switch from a legacy NDArray to NumPy ndarray just like this:\n+ ```{.python}\n+ import mxnet as mx\n+ nd_array = mx.ones((5,3))\n+ np_array = nd_array.as_np_ndarray()\n+ ```\n+\n+3. Compared with legacy NDArray, some attributes are deprecated in NumPy ndarray. Listed below are some of the deprecated APIs and their corresponding replacements in NumPy ndarray, others can be found in [**Appendix/NumPy Array Deprecated Attributes**](#NumPy-Array-Deprecated-Attributes).\n+ | Deprecated Attributes | NumPy ndarray Equivalent |\n+ | ----------------------------------------------------- | ------------------------------ |\n+ | `a.asscalar()` | `a.item()` |\n+ | `a.as_in_context()` | `a.as_in_ctx()` |\n+ | `a.context` | `a.ctx` |\n+ | `a.reshape_like(b)` | `a.reshape(b.shape)` |\n+ | `a.zeros_like(b)` | `mx.np.zeros_like(b)` |\n+ | `a.ones_like(b)` | `mx.np.ones_like(b)` |\n+\n+4. Compared with legacy NDArray, some attributes will have different behaviors and take different inputs. \n+ | Attribute | Legacy Inputs | NumPy Inputs |\n+ | ----------------------------- | ------------------------ | -------- |\n+ | `a.reshape(*args, **kwargs)` | **shape**: Some dimensions of the shape can take special values from the set {0, -1, -2, -3, -4}.
The significance of each is explained below:
``0`` copy this dimension from the input to the output shape.
``-1`` infers the dimension of the output shape by using the remainder of the input dimensions.
``-2`` copy all/remainder of the input dimensions to the output shape.
``-3`` use the product of two consecutive dimensions of the input shape as the output dimension.
``-4`` split one dimension of the input into two dimensions passed subsequent to -4 in shape (can contain -1).
**reverse**: If set to 1, then the special values are inferred from right to left | **shape**: shape parameter will be **positional argument** rather than key-word argument.
Some dimensions of the shape can take special values from the set {-1, -2, -3, -4, -5, -6}.
The significance of each is explained below:
``-1`` infers the dimension of the output shape by using the remainder of the input dimensions.
``-2`` copy this dimension from the input to the output shape.
``-3`` will skip current dimension if and only if the current dim size is one.
``-4`` copy all remain of the input dimensions to the output shape.
``-5`` use the product of two consecutive dimensions of the input shape as the output.
``-6`` split one dimension of the input into two dimensions passed subsequent to -6 in the new shape.
**reverse**: No **reverse** parameter for `np.reshape` but for `npx.reshape`.
**order**: Read the elements of `a` using this index order, and place the elements into the reshaped array using this index order. |\n+\n+\n+#### NumPy and NumPy-extension Operators\n+Most of the legacy NDArray operators(`mx.nd.op`) have the equivalent ones in np/npx namespace, users can just repalce them with `mx.np.op` or `mx.npx.op` to migrate. Some of the operators will have different inputs and behaviors as listed in the table below. ", - "comment_created_at": "2021-08-23T21:33:34+00:00", - "comment_author": "szha", - "comment_body": "```suggestion\r\nMost of the legacy NDArray operators(`mx.nd.op`) have the equivalent ones in np/npx namespace. Users can just replace them with `mx.np.op` or `mx.npx.op` to migrate. Some of the operators will have different inputs and behaviors as listed in the table below. \r\n```", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "694324761", - "pr_number": 20473, - "pr_file": "docs/python_docs/python/tutorials/getting-started/gluon_migration_guide.md", - "created_at": "2021-08-23T21:34:00+00:00", - "commented_code": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n# Gluon2.0: Migration Guide\n\n## Overview\nSince the introduction of the Gluon API in MXNet 1.x, it has superceded commonly used symbolic, module and model APIs for model development. In fact, Gluon was the first in deep learning community to unify the flexibility of imperative programming with the performance benefits of symbolic programming, through just-in-time compilation. \n\nIn Gluon2.0, we extend the support to MXNet numpy and numpy extension with simplified interface and new functionalities: \n\n- **Simplified hybridization with deferred compute and tracing**: Deferred compute allows the imperative execution to be used for graph construction, which allows us to unify the historic divergence of NDArray and Symbol. Hybridization now works in a simplified hybrid forward interface. Users only need to specify the computation through imperative programming. Hybridization also works through tracing, i.e. tracing the data flow of the first input data to create graph.\n\n- **Data 2.0**: The new design for data loading in Gluon allows hybridizing and deploying data processing pipeline in the same way as model hybridization. The new C++ data loader improves data loading efficiency on CIFAR 10 by 50%.\n\n- **Distributed 2.0**: The new distributed-training design in Gluon 2.0 provides a unified distributed data parallel interface across native Parameter Server, BytePS, and Horovod, and is extensible for supporting custom distributed training libraries.\n\n- **Gluon Probability**: parameterizable probability distributions and sampling functions to facilitate more areas of research such as Baysian methods and AutoML.\n\n- **Gluon Metrics** and **Optimizers**: refactored with MXNet numpy interface and addressed legacy issues.\n\nAdopting these new functionalities may or may not require modifications on your models. But don't worry, this migration guide will go through a high-level mapping from old functionality to new APIs and make Gluon2.0 migration a hassle-free experience. \n\n## Data Pipeline\n**What's new**: In Gluon2.0, `MultithreadingDataLoader` is introduced to speed up the data loading pipeline. It will use the pure MXNet C++ implementation of dataloader, datasets and batchify functions. So, you can use either MXNet internal multithreading mode dataloader or python multiprocessing mode dataloader in Gluon2.0. \n\n**Migration Guide**: Users can continue with the traditional gluon.data.Dataloader and the C++ backend will be applied automatically. \n\n[Gluon2.0 dataloader](../../api/gluon/data/index.rst#mxnet.gluon.data.DataLoader) will provide a new parameter called `try_nopython`. This parameter takes default value of None; when set to `True` the dataloader will compile python dataloading pipeline into pure MXNet c++ implementation. The compilation is not guaranteed to support all use cases, but it will fallback to python in case of failure: \n\n- The dataset is not fully [supported by backend](../../api/gluon/data/index.rst#mxnet.gluon.data.Dataset)(e.g., there are custom python datasets).\n\n- Transform is not fully hybridizable. \n\n- Bachify is not fully [supported by backend](https://github.com/apache/incubator-mxnet/blob/master/python/mxnet/gluon/data/batchify.py). \n\n\nYou can refer to [Step5 in Crash Course](https://mxnet.apache.org/versions/master/api/python/docs/tutorials/getting-started/crash-course/5-datasets.html#New-in-MXNet-2.0:-faster-C++-backend-dataloaders) for a detailed performance increase with C++ backend. \n## Modeling\nIn Gluon2.0, users will have a brand new modeling experience with NumPy-compatible APIs and deferred compute mechanism. \n\n- **NumPy-compatible programing experience**: users can build their models with MXNet implementation with NumPy array library, NumPy-compatible math operators and some neural network extension operators. \n\n- **Imperative-only coding experience**: with deferred compute and tracing being introduced, users only need to specify the computation through imperative coding but can still make hybridization work. Users will no longer need to interact with symbol APIs. \n\nTo help users migrate smoothly to use these simplified interface, we will provide the following guidance on how to replace legacy operators with NumPy-compatible operators, how to build models with `forward` instead of `hybrid_forward` and how to use `Parameter` class to register your parameters. \n\n\n### NumPy-compatible Programming Experience\n#### NumPy Arrays\nMXNet [NumPy ndarray(i.e. `mx.np.ndarray`)](../../api/np/arrays.ndarray.html) is a multidimensional container of items of the same type and size. Most of its properties and attributes are the same as legacy NDArrays(i.e. `mx.nd.ndarray`), so users can use NumPy array library just as they did with legacy NDArrays. But, there are still some changes and deprecations that needs attention, as mentioned below. \n**Migration Guide**: \n\n1. Currently, NumPy ndarray only supports `default` storage type, other storage types, like `row_sparse`, `csr` are not supported. Also, `tostype()` attribute is deprecated. \n\n2. Users can use `as_np_ndarray` attribute to switch from a legacy NDArray to NumPy ndarray just like this:\n ```{.python}\n import mxnet as mx\n nd_array = mx.ones((5,3))\n np_array = nd_array.as_np_ndarray()\n ```\n\n3. Compared with legacy NDArray, some attributes are deprecated in NumPy ndarray. Listed below are some of the deprecated APIs and their corresponding replacements in NumPy ndarray, others can be found in [**Appendix/NumPy Array Deprecated Attributes**](#NumPy-Array-Deprecated-Attributes).\n | Deprecated Attributes | NumPy ndarray Equivalent |\n | ----------------------------------------------------- | ------------------------------ |\n | `a.asscalar()` | `a.item()` |\n | `a.as_in_context()` | `a.as_in_ctx()` |\n | `a.context` | `a.ctx` |\n | `a.reshape_like(b)` | `a.reshape(b.shape)` |\n | `a.zeros_like(b)` | `mx.np.zeros_like(b)` |\n | `a.ones_like(b)` | `mx.np.ones_like(b)` |\n\n4. Compared with legacy NDArray, some attributes will have different behaviors and take different inputs. \n | Attribute | Legacy Inputs | NumPy Inputs |\n | ----------------------------- | ------------------------ | -------- |\n | `a.reshape(*args, **kwargs)` | **shape**: Some dimensions of the shape can take special values from the set {0, -1, -2, -3, -4}.
The significance of each is explained below:
``0`` copy this dimension from the input to the output shape.
``-1`` infers the dimension of the output shape by using the remainder of the input dimensions.
``-2`` copy all/remainder of the input dimensions to the output shape.
``-3`` use the product of two consecutive dimensions of the input shape as the output dimension.
``-4`` split one dimension of the input into two dimensions passed subsequent to -4 in shape (can contain -1).
**reverse**: If set to 1, then the special values are inferred from right to left | **shape**: shape parameter will be **positional argument** rather than key-word argument.
Some dimensions of the shape can take special values from the set {-1, -2, -3, -4, -5, -6}.
The significance of each is explained below:
``-1`` infers the dimension of the output shape by using the remainder of the input dimensions.
``-2`` copy this dimension from the input to the output shape.
``-3`` will skip current dimension if and only if the current dim size is one.
``-4`` copy all remain of the input dimensions to the output shape.
``-5`` use the product of two consecutive dimensions of the input shape as the output.
``-6`` split one dimension of the input into two dimensions passed subsequent to -6 in the new shape.
**reverse**: No **reverse** parameter for `np.reshape` but for `npx.reshape`.
**order**: Read the elements of `a` using this index order, and place the elements into the reshaped array using this index order. |\n\n\n#### NumPy and NumPy-extension Operators\nMost of the legacy NDArray operators(`mx.nd.op`) have the equivalent ones in np/npx namespace, users can just repalce them with `mx.np.op` or `mx.npx.op` to migrate. Some of the operators will have different inputs and behaviors as listed in the table below. \n**Migration Guide**:\n\n1. Operators migration with name/inputs changes\n | Legacy Operators | NumPy Operators Equivalent | Changes |\n | ----------------------------------------------------- | ------------------------------ | ------------------- |\n | `mx.nd.flatten(*args, **kwargs)` | `mx.npx.batch_flatten(*args, **kwargs)` | moved to `npx` namespace with new name `batch_flatten` |\n | `mx.nd.concat(a, b, c)` | `mx.np.concatenate([a, b, c])` | - moved to `np` namespace with new name `concatenate`.
- use list of ndarrays as input rather than positional ndarrays |\n | `mx.nd.stack(a, b, c)` | `mx.np.stack([a, b, c])` | - moved to `np` namespace.
- use list of ndarrays as input rather than positional ndarrays |\n | `mx.nd.SliceChannel(*args, **kwargs)` | `mx.npx.slice_channel(*args, **kwargs)` | - moved to `npx` namespace with new name `slice_channel`. |\n | `mx.nd.FullyConnected(*args, **kwargs)` | `mx.npx.fully_connected(*args, **kwargs)` | - moved to `npx` namespace with new name `fully_connected`. |\n | `mx.nd.Activation(*args, **kwargs)` | `mx.npx.activation(*args, **kwargs)` | - moved to `npx` namespace with new name `activation`. |\n | `mx.nd.Activation(*args, **kwargs)` | `mx.npx.activation(*args, **kwargs)` | - moved to `npx` namespace with new name `activation`. |\n | `mx.nd.elemwise_add(a, b)` | `a + b` | - Just use ndarray python operator. |\n | `mx.nd.elemwise_mul(a, b)` | `mx.np.multiply(a, b)` | - Use `multiply` operator in `np` namespace. |\n\n2. Operators migration with multiple steps: `mx.nd.mean` -> `mx.np.mean`:\n```{.python}\nimport mxnet as mx\n# Legacy: calculate mean value with reduction on axis 1\n# with `exclude` option on \nnd_mean = mx.nd.mean(data, axis=1, exclude=1)\n\n# Numpy: no exclude option to users, but user can perform steps as follow\naxes = list(range(data.ndim))\ndel axes[1]\nnp_mean = mx.np.mean(data, axis=axes)\n```\n\n3. Random Operators\n | Legacy Operators | NumPy Operators Equivalent | Changes |\n | ----------------------------------------------------- | ------------------------------ | ---------------------------- |\n | `mx.random.uniform(-1.0, 1.0, shape=(2, 3))`
`mx.nd.random.uniform(-1.0, 1.0, shape=(2, 3))` | `mx.np.random.uniform(-1.0, 1.0, size=(2, 3))` | For all the NumPy random operators, use **size** key word instead of **shape** |", - "repo_full_name": "apache/mxnet", - "discussion_comments": [ - { - "comment_id": "694324761", - "repo_full_name": "apache/mxnet", - "pr_number": 20473, - "pr_file": "docs/python_docs/python/tutorials/getting-started/gluon_migration_guide.md", - "discussion_id": "694324761", - "commented_code": "@@ -0,0 +1,453 @@\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+# Gluon2.0: Migration Guide\n+\n+## Overview\n+Since the introduction of the Gluon API in MXNet 1.x, it has superceded commonly used symbolic, module and model APIs for model development. In fact, Gluon was the first in deep learning community to unify the flexibility of imperative programming with the performance benefits of symbolic programming, through just-in-time compilation. \n+\n+In Gluon2.0, we extend the support to MXNet numpy and numpy extension with simplified interface and new functionalities: \n+\n+- **Simplified hybridization with deferred compute and tracing**: Deferred compute allows the imperative execution to be used for graph construction, which allows us to unify the historic divergence of NDArray and Symbol. Hybridization now works in a simplified hybrid forward interface. Users only need to specify the computation through imperative programming. Hybridization also works through tracing, i.e. tracing the data flow of the first input data to create graph.\n+\n+- **Data 2.0**: The new design for data loading in Gluon allows hybridizing and deploying data processing pipeline in the same way as model hybridization. The new C++ data loader improves data loading efficiency on CIFAR 10 by 50%.\n+\n+- **Distributed 2.0**: The new distributed-training design in Gluon 2.0 provides a unified distributed data parallel interface across native Parameter Server, BytePS, and Horovod, and is extensible for supporting custom distributed training libraries.\n+\n+- **Gluon Probability**: parameterizable probability distributions and sampling functions to facilitate more areas of research such as Baysian methods and AutoML.\n+\n+- **Gluon Metrics** and **Optimizers**: refactored with MXNet numpy interface and addressed legacy issues.\n+\n+Adopting these new functionalities may or may not require modifications on your models. But don't worry, this migration guide will go through a high-level mapping from old functionality to new APIs and make Gluon2.0 migration a hassle-free experience. \n+\n+## Data Pipeline\n+**What's new**: In Gluon2.0, `MultithreadingDataLoader` is introduced to speed up the data loading pipeline. It will use the pure MXNet C++ implementation of dataloader, datasets and batchify functions. So, you can use either MXNet internal multithreading mode dataloader or python multiprocessing mode dataloader in Gluon2.0. \n+\n+**Migration Guide**: Users can continue with the traditional gluon.data.Dataloader and the C++ backend will be applied automatically. \n+\n+[Gluon2.0 dataloader](../../api/gluon/data/index.rst#mxnet.gluon.data.DataLoader) will provide a new parameter called `try_nopython`. This parameter takes default value of None; when set to `True` the dataloader will compile python dataloading pipeline into pure MXNet c++ implementation. The compilation is not guaranteed to support all use cases, but it will fallback to python in case of failure: \n+\n+- The dataset is not fully [supported by backend](../../api/gluon/data/index.rst#mxnet.gluon.data.Dataset)(e.g., there are custom python datasets).\n+\n+- Transform is not fully hybridizable. \n+\n+- Bachify is not fully [supported by backend](https://github.com/apache/incubator-mxnet/blob/master/python/mxnet/gluon/data/batchify.py). \n+\n+\n+You can refer to [Step5 in Crash Course](https://mxnet.apache.org/versions/master/api/python/docs/tutorials/getting-started/crash-course/5-datasets.html#New-in-MXNet-2.0:-faster-C++-backend-dataloaders) for a detailed performance increase with C++ backend. \n+## Modeling\n+In Gluon2.0, users will have a brand new modeling experience with NumPy-compatible APIs and deferred compute mechanism. \n+\n+- **NumPy-compatible programing experience**: users can build their models with MXNet implementation with NumPy array library, NumPy-compatible math operators and some neural network extension operators. \n+\n+- **Imperative-only coding experience**: with deferred compute and tracing being introduced, users only need to specify the computation through imperative coding but can still make hybridization work. Users will no longer need to interact with symbol APIs. \n+\n+To help users migrate smoothly to use these simplified interface, we will provide the following guidance on how to replace legacy operators with NumPy-compatible operators, how to build models with `forward` instead of `hybrid_forward` and how to use `Parameter` class to register your parameters. \n+\n+\n+### NumPy-compatible Programming Experience\n+#### NumPy Arrays\n+MXNet [NumPy ndarray(i.e. `mx.np.ndarray`)](../../api/np/arrays.ndarray.html) is a multidimensional container of items of the same type and size. Most of its properties and attributes are the same as legacy NDArrays(i.e. `mx.nd.ndarray`), so users can use NumPy array library just as they did with legacy NDArrays. But, there are still some changes and deprecations that needs attention, as mentioned below. \n+**Migration Guide**: \n+\n+1. Currently, NumPy ndarray only supports `default` storage type, other storage types, like `row_sparse`, `csr` are not supported. Also, `tostype()` attribute is deprecated. \n+\n+2. Users can use `as_np_ndarray` attribute to switch from a legacy NDArray to NumPy ndarray just like this:\n+ ```{.python}\n+ import mxnet as mx\n+ nd_array = mx.ones((5,3))\n+ np_array = nd_array.as_np_ndarray()\n+ ```\n+\n+3. Compared with legacy NDArray, some attributes are deprecated in NumPy ndarray. Listed below are some of the deprecated APIs and their corresponding replacements in NumPy ndarray, others can be found in [**Appendix/NumPy Array Deprecated Attributes**](#NumPy-Array-Deprecated-Attributes).\n+ | Deprecated Attributes | NumPy ndarray Equivalent |\n+ | ----------------------------------------------------- | ------------------------------ |\n+ | `a.asscalar()` | `a.item()` |\n+ | `a.as_in_context()` | `a.as_in_ctx()` |\n+ | `a.context` | `a.ctx` |\n+ | `a.reshape_like(b)` | `a.reshape(b.shape)` |\n+ | `a.zeros_like(b)` | `mx.np.zeros_like(b)` |\n+ | `a.ones_like(b)` | `mx.np.ones_like(b)` |\n+\n+4. Compared with legacy NDArray, some attributes will have different behaviors and take different inputs. \n+ | Attribute | Legacy Inputs | NumPy Inputs |\n+ | ----------------------------- | ------------------------ | -------- |\n+ | `a.reshape(*args, **kwargs)` | **shape**: Some dimensions of the shape can take special values from the set {0, -1, -2, -3, -4}.
The significance of each is explained below:
``0`` copy this dimension from the input to the output shape.
``-1`` infers the dimension of the output shape by using the remainder of the input dimensions.
``-2`` copy all/remainder of the input dimensions to the output shape.
``-3`` use the product of two consecutive dimensions of the input shape as the output dimension.
``-4`` split one dimension of the input into two dimensions passed subsequent to -4 in shape (can contain -1).
**reverse**: If set to 1, then the special values are inferred from right to left | **shape**: shape parameter will be **positional argument** rather than key-word argument.
Some dimensions of the shape can take special values from the set {-1, -2, -3, -4, -5, -6}.
The significance of each is explained below:
``-1`` infers the dimension of the output shape by using the remainder of the input dimensions.
``-2`` copy this dimension from the input to the output shape.
``-3`` will skip current dimension if and only if the current dim size is one.
``-4`` copy all remain of the input dimensions to the output shape.
``-5`` use the product of two consecutive dimensions of the input shape as the output.
``-6`` split one dimension of the input into two dimensions passed subsequent to -6 in the new shape.
**reverse**: No **reverse** parameter for `np.reshape` but for `npx.reshape`.
**order**: Read the elements of `a` using this index order, and place the elements into the reshaped array using this index order. |\n+\n+\n+#### NumPy and NumPy-extension Operators\n+Most of the legacy NDArray operators(`mx.nd.op`) have the equivalent ones in np/npx namespace, users can just repalce them with `mx.np.op` or `mx.npx.op` to migrate. Some of the operators will have different inputs and behaviors as listed in the table below. \n+**Migration Guide**:\n+\n+1. Operators migration with name/inputs changes\n+ | Legacy Operators | NumPy Operators Equivalent | Changes |\n+ | ----------------------------------------------------- | ------------------------------ | ------------------- |\n+ | `mx.nd.flatten(*args, **kwargs)` | `mx.npx.batch_flatten(*args, **kwargs)` | moved to `npx` namespace with new name `batch_flatten` |\n+ | `mx.nd.concat(a, b, c)` | `mx.np.concatenate([a, b, c])` | - moved to `np` namespace with new name `concatenate`.
- use list of ndarrays as input rather than positional ndarrays |\n+ | `mx.nd.stack(a, b, c)` | `mx.np.stack([a, b, c])` | - moved to `np` namespace.
- use list of ndarrays as input rather than positional ndarrays |\n+ | `mx.nd.SliceChannel(*args, **kwargs)` | `mx.npx.slice_channel(*args, **kwargs)` | - moved to `npx` namespace with new name `slice_channel`. |\n+ | `mx.nd.FullyConnected(*args, **kwargs)` | `mx.npx.fully_connected(*args, **kwargs)` | - moved to `npx` namespace with new name `fully_connected`. |\n+ | `mx.nd.Activation(*args, **kwargs)` | `mx.npx.activation(*args, **kwargs)` | - moved to `npx` namespace with new name `activation`. |\n+ | `mx.nd.Activation(*args, **kwargs)` | `mx.npx.activation(*args, **kwargs)` | - moved to `npx` namespace with new name `activation`. |\n+ | `mx.nd.elemwise_add(a, b)` | `a + b` | - Just use ndarray python operator. |\n+ | `mx.nd.elemwise_mul(a, b)` | `mx.np.multiply(a, b)` | - Use `multiply` operator in `np` namespace. |\n+\n+2. Operators migration with multiple steps: `mx.nd.mean` -> `mx.np.mean`:\n+```{.python}\n+import mxnet as mx\n+# Legacy: calculate mean value with reduction on axis 1\n+# with `exclude` option on \n+nd_mean = mx.nd.mean(data, axis=1, exclude=1)\n+\n+# Numpy: no exclude option to users, but user can perform steps as follow\n+axes = list(range(data.ndim))\n+del axes[1]\n+np_mean = mx.np.mean(data, axis=axes)\n+```\n+\n+3. Random Operators\n+ | Legacy Operators | NumPy Operators Equivalent | Changes |\n+ | ----------------------------------------------------- | ------------------------------ | ---------------------------- |\n+ | `mx.random.uniform(-1.0, 1.0, shape=(2, 3))`
`mx.nd.random.uniform(-1.0, 1.0, shape=(2, 3))` | `mx.np.random.uniform(-1.0, 1.0, size=(2, 3))` | For all the NumPy random operators, use **size** key word instead of **shape** |", - "comment_created_at": "2021-08-23T21:34:00+00:00", - "comment_author": "szha", - "comment_body": "```suggestion\r\n | `mx.random.uniform(-1.0, 1.0, shape=(2, 3))`
`mx.nd.random.uniform(-1.0, 1.0, shape=(2, 3))` | `mx.np.random.uniform(-1.0, 1.0, size=(2, 3))` | For all the NumPy random operators, use **size** keyword instead of **shape** |\r\n```", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/mxnet-consistent-logging-format.json b/_reviewers/mxnet-consistent-logging-format.json new file mode 100644 index 0000000..a8656fe --- /dev/null +++ b/_reviewers/mxnet-consistent-logging-format.json @@ -0,0 +1,104 @@ +[ + { + "discussion_id": "237215006", + "pr_number": 12893, + "pr_file": "benchmark/python/gluon/benchmark_gluon_rnn.py", + "created_at": "2018-11-28T18:51:38+00:00", + "commented_code": "# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements. See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership. The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License. You may obtain a copy of the License at\n#\n# http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied. See the License for the\n# specific language governing permissions and limitations\n# under the License.\n\nimport mxnet as mx\nimport mxnet.gluon as gluon\nimport time\nimport logging\nimport sys\nimport argparse\nfrom mxnet.gluon import nn, rnn\n\nparser = argparse.ArgumentParser(description='Gluon RNN Benchmarking.')\nparser.add_argument('--num-layer', type=int, default=1,\n help='The number of layers of the RNN model')\nparser.add_argument('--layout', type=str, default='TNC',\n help='The layout of the input shape, can be either TNC or NTC.')\nparser.add_argument('--specify-shape', type=str,\n help='Specify the input shape, format batchsize, time-step, embed-size, hidden-size.')\nparser.add_argument('--cell-type', type=str, default='lstm',\n help='RNN cell type, can be either lstm, gru or all to cover both.')\nparser.add_argument('--unfuse', action='store_true', default=False,\n help='Unfuse the RNN layer to stacked RNN cell instead.') \nparser.add_argument('--latency', action='store_true', default=False,\n help='Measursing the latency, batchsize will be set to 1.')\nparser.add_argument('--train', action='store_true', default=False,\n help='Run backward benchmark.')\nparser.add_argument('--dropout', type=float, default=0, \n help='float, default 0; Fraction of the input that gets dropped out during training time.')\nparser.add_argument('--gpu', action='store_true', default=False,\n help='bool, default Flase; set True to run benchmarks on GPU.')\nparser.add_argument('--no-hybridize', action='store_true', default=False,\n help='bool, default False; hybridize will introduce a better perf.')\nparser.add_argument('--bidirection', action='store_true', default=False,\n help='bool, default False; Whether to use bidirectional unroll.')\n\nopt = parser.parse_args()\nlogging.basicConfig(level=logging.INFO)\n\n#[bs, sequence length, embedding size, hidden size]\ninput_shape_list = [[64,15,500,500],\n [64,20,500,500],\n [64,25,500,500],\n [64,30,500,500],\n [64,35,500,500],\n [64,40,500,500],\n [64,45,500,500],\n [64,50,500,500],\n [16,25,512,512],\n [32,25,512,512],\n [64,25,512,512],\n [128,25,512,512],\n [16,25,1024,1024],\n [32,25,1024,1024],\n [64,25,1024,1024],\n [128,25,1024,1024],\n [16,25,2048,2048],\n [32,25,2048,2048],\n [64,25,2048,2048],\n [128,25,2048,2048],\n [16,25,4096,4096],\n [32,25,4096,4096],\n [64,25,4096,4096],\n [128,25,4096,4096]]\n\nrnncell_type = ['lstm', 'gru', 'all']\ninput_layout = ['TNC', 'NTC']\n\nif not opt.gpu:\n ctx = mx.cpu()\nelse:\n ctx = mx.gpu(0)\n\ndropout = opt.dropout\nbidirection = opt.bidirection\nunfuse = opt.unfuse\ncelltype = opt.cell_type\n\ndry_run = 20\nnum_iter = 100\n\ndef get_rnn_layer(input_shape, num_layer, cell_type, dropout=0, bidirection=False):\n hidden_size = input_shape[3]\n embedding_size = input_shape[2]\n if cell_type == 'lstm':\n rnn_layer = rnn.LSTM(hidden_size, num_layer, dropout=dropout,\n bidirectional=bidirection, input_size=embedding_size,\n prefix='_lstm_layer')\n elif cell_type == 'gru':\n rnn_layer = rnn.GRU(hidden_size, num_layer, dropout=dropout,\n bidirectional=bidirection, input_size=embedding_size,\n prefix='_gru_layer')\n return rnn_layer\n\n\ndef rnn_cell_score(input_shape, cell_type, ctx, num_layer, dropout=0, bidirection=False, layout='TNC', unfuse=False, hybridize=True, is_train=False):\n bs = input_shape[0]\n seq_len = input_shape[1]\n embedding_size = input_shape[2]\n hidden_size = input_shape[3]\n rnn_layer = get_rnn_layer(input_shape, num_layer, cell_type, dropout, bidirection)\n input_data = mx.sym.Variable('data')\n\n if unfuse:\n rnn_cell = rnn_layer._unfuse()\n if hybridize:\n rnn_cell.hybridize()\n out, _ = rnn_cell.unroll(length=seq_len, inputs = input_data, layout=layout, merge_outputs=True)\n else:\n if hybridize:\n rnn_layer.hybridize()\n out = rnn_layer(input_data)\n\n if is_train: \n #out = mx.sym.slice(out, begin=(0, None), end=(bs, None))\n hidden = mx.sym.Reshape(data = out, shape=(-1, hidden_size))\n pred = mx.sym.FullyConnected(data=hidden, num_hidden=embedding_size, name='pred')\n if layout == 'TNC':\n pred = mx.sym.Reshape(data=pred, shape=(seq_len, -1, embedding_size))\n elif layout == 'NTC':\n pred = mx.sym.Reshape(data=pred, shape=(-1, seq_len, embedding_size))\n softmax_output = mx.sym.SoftmaxOutput(data=pred, name='softmax')\n\n if layout == 'NTC':\n dshape = (bs, seq_len, embedding_size)\n elif layout == 'TNC':\n dshape = (seq_len, bs, embedding_size)\n \n if is_train:\n mod = mx.mod.Module(softmax_output, label_names=('softmax_label',), context=ctx)\n else:\n mod = mx.mod.Module(out, label_names=None, context=ctx)\n \n if is_train:\n if layout == 'TNC':\n mod.bind(for_training = True, data_shapes=[('data', dshape)],\n label_shapes=[('softmax_label', (seq_len, bs, embedding_size))])\n elif layout == 'NTC':\n mod.bind(for_training = True, data_shapes=[('data', dshape)],\n label_shapes=[('softmax_label', (bs, seq_len, embedding_size))])\n \n else:\n mod.bind(data_shapes=[('data', dshape)], label_shapes=None)\n\n batch = mx.io.DataBatch(data=[mx.random.uniform(shape=dshape)], label=[])\n mod.init_params(initializer=mx.init.Xavier(magnitude=2.))\n if is_train:\n mod.init_optimizer(optimizer='sgd')\n mod.forward(batch, is_train=True)\n if unfuse:\n for o in mod.get_outputs():\n o.wait_to_read()\n else:\n o = mod.get_outputs()[0]\n o.wait_to_read()\n mod.backward()\n mod.update()\n else:\n mod.forward(batch, is_train=False)\n if unfuse:\n for o in mod.get_outputs():\n o.wait_to_read()\n else:\n o = mod.get_outputs()[0]\n o.wait_to_read()\n\nif __name__ == '__main__':\n\n num_layer = opt.num_layer\n layout = opt.layout\n latency = opt.latency\n hybridize = not(opt.no_hybridize)\n \n if layout not in input_layout:\n logging.warning('Only TNC or NTC are supported!')\n sys.exit(0)\n\n if celltype not in rnncell_type:\n logging.warning('Only LSTM and GRU cell are supported!')\n sys.exit(0)\n \n if celltype == 'all':\n cell_lst = ['lstm', 'gru']\n else:\n cell_lst = [celltype]\n\n if opt.specify_shape != None:\n input_shape_list = [[int(x) for x in opt.specify_shape.split(',')]]\n\n for cell in cell_lst:\n if opt.train:\n logging.info('%s training benchmark.', cell)\n else:\n logging.info('%s inference benchmark.', cell)", + "repo_full_name": "apache/mxnet", + "discussion_comments": [ + { + "comment_id": "237215006", + "repo_full_name": "apache/mxnet", + "pr_number": 12893, + "pr_file": "benchmark/python/gluon/benchmark_gluon_rnn.py", + "discussion_id": "237215006", + "commented_code": "@@ -0,0 +1,225 @@\n+# Licensed to the Apache Software Foundation (ASF) under one\n+# or more contributor license agreements. See the NOTICE file\n+# distributed with this work for additional information\n+# regarding copyright ownership. The ASF licenses this file\n+# to you under the Apache License, Version 2.0 (the\n+# \"License\"); you may not use this file except in compliance\n+# with the License. You may obtain a copy of the License at\n+#\n+# http://www.apache.org/licenses/LICENSE-2.0\n+#\n+# Unless required by applicable law or agreed to in writing,\n+# software distributed under the License is distributed on an\n+# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n+# KIND, either express or implied. See the License for the\n+# specific language governing permissions and limitations\n+# under the License.\n+\n+import mxnet as mx\n+import mxnet.gluon as gluon\n+import time\n+import logging\n+import sys\n+import argparse\n+from mxnet.gluon import nn, rnn\n+\n+parser = argparse.ArgumentParser(description='Gluon RNN Benchmarking.')\n+parser.add_argument('--num-layer', type=int, default=1,\n+ help='The number of layers of the RNN model')\n+parser.add_argument('--layout', type=str, default='TNC',\n+ help='The layout of the input shape, can be either TNC or NTC.')\n+parser.add_argument('--specify-shape', type=str,\n+ help='Specify the input shape, format batchsize, time-step, embed-size, hidden-size.')\n+parser.add_argument('--cell-type', type=str, default='lstm',\n+ help='RNN cell type, can be either lstm, gru or all to cover both.')\n+parser.add_argument('--unfuse', action='store_true', default=False,\n+ help='Unfuse the RNN layer to stacked RNN cell instead.') \n+parser.add_argument('--latency', action='store_true', default=False,\n+ help='Measursing the latency, batchsize will be set to 1.')\n+parser.add_argument('--train', action='store_true', default=False,\n+ help='Run backward benchmark.')\n+parser.add_argument('--dropout', type=float, default=0, \n+ help='float, default 0; Fraction of the input that gets dropped out during training time.')\n+parser.add_argument('--gpu', action='store_true', default=False,\n+ help='bool, default Flase; set True to run benchmarks on GPU.')\n+parser.add_argument('--no-hybridize', action='store_true', default=False,\n+ help='bool, default False; hybridize will introduce a better perf.')\n+parser.add_argument('--bidirection', action='store_true', default=False,\n+ help='bool, default False; Whether to use bidirectional unroll.')\n+\n+opt = parser.parse_args()\n+logging.basicConfig(level=logging.INFO)\n+\n+#[bs, sequence length, embedding size, hidden size]\n+input_shape_list = [[64,15,500,500],\n+ [64,20,500,500],\n+ [64,25,500,500],\n+ [64,30,500,500],\n+ [64,35,500,500],\n+ [64,40,500,500],\n+ [64,45,500,500],\n+ [64,50,500,500],\n+ [16,25,512,512],\n+ [32,25,512,512],\n+ [64,25,512,512],\n+ [128,25,512,512],\n+ [16,25,1024,1024],\n+ [32,25,1024,1024],\n+ [64,25,1024,1024],\n+ [128,25,1024,1024],\n+ [16,25,2048,2048],\n+ [32,25,2048,2048],\n+ [64,25,2048,2048],\n+ [128,25,2048,2048],\n+ [16,25,4096,4096],\n+ [32,25,4096,4096],\n+ [64,25,4096,4096],\n+ [128,25,4096,4096]]\n+\n+rnncell_type = ['lstm', 'gru', 'all']\n+input_layout = ['TNC', 'NTC']\n+\n+if not opt.gpu:\n+ ctx = mx.cpu()\n+else:\n+ ctx = mx.gpu(0)\n+\n+dropout = opt.dropout\n+bidirection = opt.bidirection\n+unfuse = opt.unfuse\n+celltype = opt.cell_type\n+\n+dry_run = 20\n+num_iter = 100\n+\n+def get_rnn_layer(input_shape, num_layer, cell_type, dropout=0, bidirection=False):\n+ hidden_size = input_shape[3]\n+ embedding_size = input_shape[2]\n+ if cell_type == 'lstm':\n+ rnn_layer = rnn.LSTM(hidden_size, num_layer, dropout=dropout,\n+ bidirectional=bidirection, input_size=embedding_size,\n+ prefix='_lstm_layer')\n+ elif cell_type == 'gru':\n+ rnn_layer = rnn.GRU(hidden_size, num_layer, dropout=dropout,\n+ bidirectional=bidirection, input_size=embedding_size,\n+ prefix='_gru_layer')\n+ return rnn_layer\n+\n+\n+def rnn_cell_score(input_shape, cell_type, ctx, num_layer, dropout=0, bidirection=False, layout='TNC', unfuse=False, hybridize=True, is_train=False):\n+ bs = input_shape[0]\n+ seq_len = input_shape[1]\n+ embedding_size = input_shape[2]\n+ hidden_size = input_shape[3]\n+ rnn_layer = get_rnn_layer(input_shape, num_layer, cell_type, dropout, bidirection)\n+ input_data = mx.sym.Variable('data')\n+\n+ if unfuse:\n+ rnn_cell = rnn_layer._unfuse()\n+ if hybridize:\n+ rnn_cell.hybridize()\n+ out, _ = rnn_cell.unroll(length=seq_len, inputs = input_data, layout=layout, merge_outputs=True)\n+ else:\n+ if hybridize:\n+ rnn_layer.hybridize()\n+ out = rnn_layer(input_data)\n+\n+ if is_train: \n+ #out = mx.sym.slice(out, begin=(0, None), end=(bs, None))\n+ hidden = mx.sym.Reshape(data = out, shape=(-1, hidden_size))\n+ pred = mx.sym.FullyConnected(data=hidden, num_hidden=embedding_size, name='pred')\n+ if layout == 'TNC':\n+ pred = mx.sym.Reshape(data=pred, shape=(seq_len, -1, embedding_size))\n+ elif layout == 'NTC':\n+ pred = mx.sym.Reshape(data=pred, shape=(-1, seq_len, embedding_size))\n+ softmax_output = mx.sym.SoftmaxOutput(data=pred, name='softmax')\n+\n+ if layout == 'NTC':\n+ dshape = (bs, seq_len, embedding_size)\n+ elif layout == 'TNC':\n+ dshape = (seq_len, bs, embedding_size)\n+ \n+ if is_train:\n+ mod = mx.mod.Module(softmax_output, label_names=('softmax_label',), context=ctx)\n+ else:\n+ mod = mx.mod.Module(out, label_names=None, context=ctx)\n+ \n+ if is_train:\n+ if layout == 'TNC':\n+ mod.bind(for_training = True, data_shapes=[('data', dshape)],\n+ label_shapes=[('softmax_label', (seq_len, bs, embedding_size))])\n+ elif layout == 'NTC':\n+ mod.bind(for_training = True, data_shapes=[('data', dshape)],\n+ label_shapes=[('softmax_label', (bs, seq_len, embedding_size))])\n+ \n+ else:\n+ mod.bind(data_shapes=[('data', dshape)], label_shapes=None)\n+\n+ batch = mx.io.DataBatch(data=[mx.random.uniform(shape=dshape)], label=[])\n+ mod.init_params(initializer=mx.init.Xavier(magnitude=2.))\n+ if is_train:\n+ mod.init_optimizer(optimizer='sgd')\n+ mod.forward(batch, is_train=True)\n+ if unfuse:\n+ for o in mod.get_outputs():\n+ o.wait_to_read()\n+ else:\n+ o = mod.get_outputs()[0]\n+ o.wait_to_read()\n+ mod.backward()\n+ mod.update()\n+ else:\n+ mod.forward(batch, is_train=False)\n+ if unfuse:\n+ for o in mod.get_outputs():\n+ o.wait_to_read()\n+ else:\n+ o = mod.get_outputs()[0]\n+ o.wait_to_read()\n+\n+if __name__ == '__main__':\n+\n+ num_layer = opt.num_layer\n+ layout = opt.layout\n+ latency = opt.latency\n+ hybridize = not(opt.no_hybridize)\n+ \n+ if layout not in input_layout:\n+ logging.warning('Only TNC or NTC are supported!')\n+ sys.exit(0)\n+\n+ if celltype not in rnncell_type:\n+ logging.warning('Only LSTM and GRU cell are supported!')\n+ sys.exit(0)\n+ \n+ if celltype == 'all':\n+ cell_lst = ['lstm', 'gru']\n+ else:\n+ cell_lst = [celltype]\n+\n+ if opt.specify_shape != None:\n+ input_shape_list = [[int(x) for x in opt.specify_shape.split(',')]]\n+\n+ for cell in cell_lst:\n+ if opt.train:\n+ logging.info('%s training benchmark.', cell)\n+ else:\n+ logging.info('%s inference benchmark.', cell)", + "comment_created_at": "2018-11-28T18:51:38+00:00", + "comment_author": "vandanavk", + "comment_body": "can this be changed to check for opt.train before the for loop?\r\nsomething like\r\n`print_string = 'training' if opt.train else 'inference'`\r\nand inside the for loop it would be `logging.info('%s %s benchmark.', cell, print_string)`", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "968208176", + "pr_number": 21136, + "pr_file": "tests/nightly/model_backwards_compatibility_check/common.py", + "created_at": "2022-09-12T09:47:28+00:00", + "commented_code": "result = bucket.meta.client.list_objects(Bucket=bucket.name, Delimiter=backslash)\n folder_list = list()\n if 'CommonPrefixes' not in result:\n logging.error('No trained models found in S3 bucket : %s for this file. '\n 'Please train the models and run inference again' % bucket_name)\n raise Exception(\"No trained models found in S3 bucket : %s for this file. \"\n \"Please train the models and run inference again\" % bucket_name)\n logging.error('No trained models found in S3 bucket : {} for this file. '", + "repo_full_name": "apache/mxnet", + "discussion_comments": [ + { + "comment_id": "968208176", + "repo_full_name": "apache/mxnet", + "pr_number": 21136, + "pr_file": "tests/nightly/model_backwards_compatibility_check/common.py", + "discussion_id": "968208176", + "commented_code": "@@ -105,10 +105,10 @@ def get_top_level_folders_in_bucket(s3client, bucket_name):\n result = bucket.meta.client.list_objects(Bucket=bucket.name, Delimiter=backslash)\n folder_list = list()\n if 'CommonPrefixes' not in result:\n- logging.error('No trained models found in S3 bucket : %s for this file. '\n- 'Please train the models and run inference again' % bucket_name)\n- raise Exception(\"No trained models found in S3 bucket : %s for this file. \"\n- \"Please train the models and run inference again\" % bucket_name)\n+ logging.error('No trained models found in S3 bucket : {} for this file. '", + "comment_created_at": "2022-09-12T09:47:28+00:00", + "comment_author": "anko-intel", + "comment_body": "What is the reason to have to different approaches than in line 110?\r\nhere {bucket_name} seems to be more readable", + "pr_file_module": null + }, + { + "comment_id": "968317987", + "repo_full_name": "apache/mxnet", + "pr_number": 21136, + "pr_file": "tests/nightly/model_backwards_compatibility_check/common.py", + "discussion_id": "968208176", + "commented_code": "@@ -105,10 +105,10 @@ def get_top_level_folders_in_bucket(s3client, bucket_name):\n result = bucket.meta.client.list_objects(Bucket=bucket.name, Delimiter=backslash)\n folder_list = list()\n if 'CommonPrefixes' not in result:\n- logging.error('No trained models found in S3 bucket : %s for this file. '\n- 'Please train the models and run inference again' % bucket_name)\n- raise Exception(\"No trained models found in S3 bucket : %s for this file. \"\n- \"Please train the models and run inference again\" % bucket_name)\n+ logging.error('No trained models found in S3 bucket : {} for this file. '", + "comment_created_at": "2022-09-12T11:52:22+00:00", + "comment_author": "hankaj", + "comment_body": "In CI I got the error that you should not use f-strings in messages of logging functions.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "823646148", + "pr_number": 20753, + "pr_file": "python/mxnet/amp/amp.py", + "created_at": "2022-03-10T12:01:50+00:00", + "commented_code": "from being casted to LP16 or FP32.\n data_names : list of strs, optional\n A list of strings that represent input data tensor names to the model\n cast_optional_params : bool, default False\n cast_params_offline : bool, default False\n Whether to cast the arg_params and aux_params that don't require to be in LP16\n because of a cast layer following it, but will reduce the computation and memory\n overhead of the model if casted.\n \"\"\"\n assert isinstance(sym, Symbol), \"First argument to convert_symbol should be Symbol\"\n import json\n\n assert target_dtype in ['float16', 'bfloat16'], \\\n \"Only target_dtype float16 and bfloat16 are supported currently\"\n assert isinstance(sym, Symbol), \"First argument to convert_symbol should be a Symbol\"\n assert target_dtype_ops is None or isinstance(target_dtype_ops, list), \\\n \"target_dtype_ops should be a list of strs\"\n assert fp32_ops is None or isinstance(fp32_ops, list), \\\n \"fp32_ops should be a list of strs\"\n assert conditional_fp32_ops is None or isinstance(conditional_fp32_ops, list), \\\n \"conditional_fp32_ops should be a list\"\n\n if target_dtype == 'bfloat16':\n target_dtype = bfloat16\n target_dtype = get_dtype_name(target_dtype)\n assert target_dtype in ['float16', *bfloat16.names], \\\n \"Only float16 and bfloat16 types are currently supported as target_dtype\"\n\n if target_dtype_ops is not None:\n assert isinstance(target_dtype_ops, list), \"target_dtype_ops should be a list of strs\"\n else:\n if target_dtype_ops is None:\n target_dtype_ops = list_lp16_ops(target_dtype)\n\n if fp32_ops is not None:\n assert isinstance(fp32_ops, list), \"fp32_ops should be a list of strs\"\n else:\n if fp32_ops is None:\n fp32_ops = list_fp32_ops(target_dtype)\n\n if conditional_fp32_ops is not None:\n assert isinstance(conditional_fp32_ops, list), \"conditional_fp32_ops should be a list\"\n else:\n # conditional ops\n if conditional_fp32_ops is None:\n conditional_fp32_ops = list_conditional_fp32_ops(target_dtype)\n cond_ops = {cond_op[0]: {} for cond_op in conditional_fp32_ops}\n for cond_op in conditional_fp32_ops:\n op_name, attr_name, attr_vals = cond_op\n assert isinstance(op_name, str) and isinstance(attr_name, str) and isinstance(attr_vals, list), \\\n \"conditional_fp32_ops should be a list of (str, str, list of str)\"\n cond_ops[op_name].setdefault(attr_name, []).extend(attr_vals)\n\n nodes_attr = sym.attr_dict()\n nodes_op = {n['name']: n['op'] for n in json.loads(sym.tojson())['nodes']}\n assert set(excluded_sym_names).issubset(set(nodes_op.keys())), \\", + "repo_full_name": "apache/mxnet", + "discussion_comments": [ + { + "comment_id": "823646148", + "repo_full_name": "apache/mxnet", + "pr_number": 20753, + "pr_file": "python/mxnet/amp/amp.py", + "discussion_id": "823646148", + "commented_code": "@@ -459,73 +461,73 @@ def convert_symbol(sym, target_dtype=\"float16\", target_dtype_ops=None,\n from being casted to LP16 or FP32.\n data_names : list of strs, optional\n A list of strings that represent input data tensor names to the model\n- cast_optional_params : bool, default False\n+ cast_params_offline : bool, default False\n Whether to cast the arg_params and aux_params that don't require to be in LP16\n because of a cast layer following it, but will reduce the computation and memory\n overhead of the model if casted.\n \"\"\"\n- assert isinstance(sym, Symbol), \"First argument to convert_symbol should be Symbol\"\n+ import json\n \n- assert target_dtype in ['float16', 'bfloat16'], \\\n- \"Only target_dtype float16 and bfloat16 are supported currently\"\n+ assert isinstance(sym, Symbol), \"First argument to convert_symbol should be a Symbol\"\n+ assert target_dtype_ops is None or isinstance(target_dtype_ops, list), \\\n+ \"target_dtype_ops should be a list of strs\"\n+ assert fp32_ops is None or isinstance(fp32_ops, list), \\\n+ \"fp32_ops should be a list of strs\"\n+ assert conditional_fp32_ops is None or isinstance(conditional_fp32_ops, list), \\\n+ \"conditional_fp32_ops should be a list\"\n \n- if target_dtype == 'bfloat16':\n- target_dtype = bfloat16\n+ target_dtype = get_dtype_name(target_dtype)\n+ assert target_dtype in ['float16', *bfloat16.names], \\\n+ \"Only float16 and bfloat16 types are currently supported as target_dtype\"\n \n- if target_dtype_ops is not None:\n- assert isinstance(target_dtype_ops, list), \"target_dtype_ops should be a list of strs\"\n- else:\n+ if target_dtype_ops is None:\n target_dtype_ops = list_lp16_ops(target_dtype)\n-\n- if fp32_ops is not None:\n- assert isinstance(fp32_ops, list), \"fp32_ops should be a list of strs\"\n- else:\n+ if fp32_ops is None:\n fp32_ops = list_fp32_ops(target_dtype)\n \n- if conditional_fp32_ops is not None:\n- assert isinstance(conditional_fp32_ops, list), \"conditional_fp32_ops should be a list\"\n- else:\n+ # conditional ops\n+ if conditional_fp32_ops is None:\n conditional_fp32_ops = list_conditional_fp32_ops(target_dtype)\n+ cond_ops = {cond_op[0]: {} for cond_op in conditional_fp32_ops}\n+ for cond_op in conditional_fp32_ops:\n+ op_name, attr_name, attr_vals = cond_op\n+ assert isinstance(op_name, str) and isinstance(attr_name, str) and isinstance(attr_vals, list), \\\n+ \"conditional_fp32_ops should be a list of (str, str, list of str)\"\n+ cond_ops[op_name].setdefault(attr_name, []).extend(attr_vals)\n+\n+ nodes_attr = sym.attr_dict()\n+ nodes_op = {n['name']: n['op'] for n in json.loads(sym.tojson())['nodes']}\n+ assert set(excluded_sym_names).issubset(set(nodes_op.keys())), \\", + "comment_created_at": "2022-03-10T12:01:50+00:00", + "comment_author": "bgawrych", + "comment_body": "should it be assert? maybe warning is enough?", + "pr_file_module": null + }, + { + "comment_id": "824647707", + "repo_full_name": "apache/mxnet", + "pr_number": 20753, + "pr_file": "python/mxnet/amp/amp.py", + "discussion_id": "823646148", + "commented_code": "@@ -459,73 +461,73 @@ def convert_symbol(sym, target_dtype=\"float16\", target_dtype_ops=None,\n from being casted to LP16 or FP32.\n data_names : list of strs, optional\n A list of strings that represent input data tensor names to the model\n- cast_optional_params : bool, default False\n+ cast_params_offline : bool, default False\n Whether to cast the arg_params and aux_params that don't require to be in LP16\n because of a cast layer following it, but will reduce the computation and memory\n overhead of the model if casted.\n \"\"\"\n- assert isinstance(sym, Symbol), \"First argument to convert_symbol should be Symbol\"\n+ import json\n \n- assert target_dtype in ['float16', 'bfloat16'], \\\n- \"Only target_dtype float16 and bfloat16 are supported currently\"\n+ assert isinstance(sym, Symbol), \"First argument to convert_symbol should be a Symbol\"\n+ assert target_dtype_ops is None or isinstance(target_dtype_ops, list), \\\n+ \"target_dtype_ops should be a list of strs\"\n+ assert fp32_ops is None or isinstance(fp32_ops, list), \\\n+ \"fp32_ops should be a list of strs\"\n+ assert conditional_fp32_ops is None or isinstance(conditional_fp32_ops, list), \\\n+ \"conditional_fp32_ops should be a list\"\n \n- if target_dtype == 'bfloat16':\n- target_dtype = bfloat16\n+ target_dtype = get_dtype_name(target_dtype)\n+ assert target_dtype in ['float16', *bfloat16.names], \\\n+ \"Only float16 and bfloat16 types are currently supported as target_dtype\"\n \n- if target_dtype_ops is not None:\n- assert isinstance(target_dtype_ops, list), \"target_dtype_ops should be a list of strs\"\n- else:\n+ if target_dtype_ops is None:\n target_dtype_ops = list_lp16_ops(target_dtype)\n-\n- if fp32_ops is not None:\n- assert isinstance(fp32_ops, list), \"fp32_ops should be a list of strs\"\n- else:\n+ if fp32_ops is None:\n fp32_ops = list_fp32_ops(target_dtype)\n \n- if conditional_fp32_ops is not None:\n- assert isinstance(conditional_fp32_ops, list), \"conditional_fp32_ops should be a list\"\n- else:\n+ # conditional ops\n+ if conditional_fp32_ops is None:\n conditional_fp32_ops = list_conditional_fp32_ops(target_dtype)\n+ cond_ops = {cond_op[0]: {} for cond_op in conditional_fp32_ops}\n+ for cond_op in conditional_fp32_ops:\n+ op_name, attr_name, attr_vals = cond_op\n+ assert isinstance(op_name, str) and isinstance(attr_name, str) and isinstance(attr_vals, list), \\\n+ \"conditional_fp32_ops should be a list of (str, str, list of str)\"\n+ cond_ops[op_name].setdefault(attr_name, []).extend(attr_vals)\n+\n+ nodes_attr = sym.attr_dict()\n+ nodes_op = {n['name']: n['op'] for n in json.loads(sym.tojson())['nodes']}\n+ assert set(excluded_sym_names).issubset(set(nodes_op.keys())), \\", + "comment_created_at": "2022-03-11T11:55:22+00:00", + "comment_author": "PawelGlomski-Intel", + "comment_body": "I changed it to use `logging.warning(...)` since this is how logging is handled in this file. Do you think we should instead add a logger as an argument to the conversion functions?", + "pr_file_module": null + }, + { + "comment_id": "827244414", + "repo_full_name": "apache/mxnet", + "pr_number": 20753, + "pr_file": "python/mxnet/amp/amp.py", + "discussion_id": "823646148", + "commented_code": "@@ -459,73 +461,73 @@ def convert_symbol(sym, target_dtype=\"float16\", target_dtype_ops=None,\n from being casted to LP16 or FP32.\n data_names : list of strs, optional\n A list of strings that represent input data tensor names to the model\n- cast_optional_params : bool, default False\n+ cast_params_offline : bool, default False\n Whether to cast the arg_params and aux_params that don't require to be in LP16\n because of a cast layer following it, but will reduce the computation and memory\n overhead of the model if casted.\n \"\"\"\n- assert isinstance(sym, Symbol), \"First argument to convert_symbol should be Symbol\"\n+ import json\n \n- assert target_dtype in ['float16', 'bfloat16'], \\\n- \"Only target_dtype float16 and bfloat16 are supported currently\"\n+ assert isinstance(sym, Symbol), \"First argument to convert_symbol should be a Symbol\"\n+ assert target_dtype_ops is None or isinstance(target_dtype_ops, list), \\\n+ \"target_dtype_ops should be a list of strs\"\n+ assert fp32_ops is None or isinstance(fp32_ops, list), \\\n+ \"fp32_ops should be a list of strs\"\n+ assert conditional_fp32_ops is None or isinstance(conditional_fp32_ops, list), \\\n+ \"conditional_fp32_ops should be a list\"\n \n- if target_dtype == 'bfloat16':\n- target_dtype = bfloat16\n+ target_dtype = get_dtype_name(target_dtype)\n+ assert target_dtype in ['float16', *bfloat16.names], \\\n+ \"Only float16 and bfloat16 types are currently supported as target_dtype\"\n \n- if target_dtype_ops is not None:\n- assert isinstance(target_dtype_ops, list), \"target_dtype_ops should be a list of strs\"\n- else:\n+ if target_dtype_ops is None:\n target_dtype_ops = list_lp16_ops(target_dtype)\n-\n- if fp32_ops is not None:\n- assert isinstance(fp32_ops, list), \"fp32_ops should be a list of strs\"\n- else:\n+ if fp32_ops is None:\n fp32_ops = list_fp32_ops(target_dtype)\n \n- if conditional_fp32_ops is not None:\n- assert isinstance(conditional_fp32_ops, list), \"conditional_fp32_ops should be a list\"\n- else:\n+ # conditional ops\n+ if conditional_fp32_ops is None:\n conditional_fp32_ops = list_conditional_fp32_ops(target_dtype)\n+ cond_ops = {cond_op[0]: {} for cond_op in conditional_fp32_ops}\n+ for cond_op in conditional_fp32_ops:\n+ op_name, attr_name, attr_vals = cond_op\n+ assert isinstance(op_name, str) and isinstance(attr_name, str) and isinstance(attr_vals, list), \\\n+ \"conditional_fp32_ops should be a list of (str, str, list of str)\"\n+ cond_ops[op_name].setdefault(attr_name, []).extend(attr_vals)\n+\n+ nodes_attr = sym.attr_dict()\n+ nodes_op = {n['name']: n['op'] for n in json.loads(sym.tojson())['nodes']}\n+ assert set(excluded_sym_names).issubset(set(nodes_op.keys())), \\", + "comment_created_at": "2022-03-15T17:33:16+00:00", + "comment_author": "bgawrych", + "comment_body": "Other functions are just using logging module, so I don't think adding argument is necessary", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/mxnet-consistent-logging-format.md b/_reviewers/mxnet-consistent-logging-format.md index 455c8fd..b7fd617 100644 --- a/_reviewers/mxnet-consistent-logging-format.md +++ b/_reviewers/mxnet-consistent-logging-format.md @@ -43,109 +43,3 @@ logging.info('%s %s benchmark.', cell, mode) - Use `logging.warning()` for potential issues that don't stop execution - Use assertions only for developer-facing invariants that should never be violated - Consider warnings instead of assertions in user-facing code - - -[ - { - "discussion_id": "237215006", - "pr_number": 12893, - "pr_file": "benchmark/python/gluon/benchmark_gluon_rnn.py", - "created_at": "2018-11-28T18:51:38+00:00", - "commented_code": "# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements. See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership. The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License. You may obtain a copy of the License at\n#\n# http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied. See the License for the\n# specific language governing permissions and limitations\n# under the License.\n\nimport mxnet as mx\nimport mxnet.gluon as gluon\nimport time\nimport logging\nimport sys\nimport argparse\nfrom mxnet.gluon import nn, rnn\n\nparser = argparse.ArgumentParser(description='Gluon RNN Benchmarking.')\nparser.add_argument('--num-layer', type=int, default=1,\n help='The number of layers of the RNN model')\nparser.add_argument('--layout', type=str, default='TNC',\n help='The layout of the input shape, can be either TNC or NTC.')\nparser.add_argument('--specify-shape', type=str,\n help='Specify the input shape, format batchsize, time-step, embed-size, hidden-size.')\nparser.add_argument('--cell-type', type=str, default='lstm',\n help='RNN cell type, can be either lstm, gru or all to cover both.')\nparser.add_argument('--unfuse', action='store_true', default=False,\n help='Unfuse the RNN layer to stacked RNN cell instead.') \nparser.add_argument('--latency', action='store_true', default=False,\n help='Measursing the latency, batchsize will be set to 1.')\nparser.add_argument('--train', action='store_true', default=False,\n help='Run backward benchmark.')\nparser.add_argument('--dropout', type=float, default=0, \n help='float, default 0; Fraction of the input that gets dropped out during training time.')\nparser.add_argument('--gpu', action='store_true', default=False,\n help='bool, default Flase; set True to run benchmarks on GPU.')\nparser.add_argument('--no-hybridize', action='store_true', default=False,\n help='bool, default False; hybridize will introduce a better perf.')\nparser.add_argument('--bidirection', action='store_true', default=False,\n help='bool, default False; Whether to use bidirectional unroll.')\n\nopt = parser.parse_args()\nlogging.basicConfig(level=logging.INFO)\n\n#[bs, sequence length, embedding size, hidden size]\ninput_shape_list = [[64,15,500,500],\n [64,20,500,500],\n [64,25,500,500],\n [64,30,500,500],\n [64,35,500,500],\n [64,40,500,500],\n [64,45,500,500],\n [64,50,500,500],\n [16,25,512,512],\n [32,25,512,512],\n [64,25,512,512],\n [128,25,512,512],\n [16,25,1024,1024],\n [32,25,1024,1024],\n [64,25,1024,1024],\n [128,25,1024,1024],\n [16,25,2048,2048],\n [32,25,2048,2048],\n [64,25,2048,2048],\n [128,25,2048,2048],\n [16,25,4096,4096],\n [32,25,4096,4096],\n [64,25,4096,4096],\n [128,25,4096,4096]]\n\nrnncell_type = ['lstm', 'gru', 'all']\ninput_layout = ['TNC', 'NTC']\n\nif not opt.gpu:\n ctx = mx.cpu()\nelse:\n ctx = mx.gpu(0)\n\ndropout = opt.dropout\nbidirection = opt.bidirection\nunfuse = opt.unfuse\ncelltype = opt.cell_type\n\ndry_run = 20\nnum_iter = 100\n\ndef get_rnn_layer(input_shape, num_layer, cell_type, dropout=0, bidirection=False):\n hidden_size = input_shape[3]\n embedding_size = input_shape[2]\n if cell_type == 'lstm':\n rnn_layer = rnn.LSTM(hidden_size, num_layer, dropout=dropout,\n bidirectional=bidirection, input_size=embedding_size,\n prefix='_lstm_layer')\n elif cell_type == 'gru':\n rnn_layer = rnn.GRU(hidden_size, num_layer, dropout=dropout,\n bidirectional=bidirection, input_size=embedding_size,\n prefix='_gru_layer')\n return rnn_layer\n\n\ndef rnn_cell_score(input_shape, cell_type, ctx, num_layer, dropout=0, bidirection=False, layout='TNC', unfuse=False, hybridize=True, is_train=False):\n bs = input_shape[0]\n seq_len = input_shape[1]\n embedding_size = input_shape[2]\n hidden_size = input_shape[3]\n rnn_layer = get_rnn_layer(input_shape, num_layer, cell_type, dropout, bidirection)\n input_data = mx.sym.Variable('data')\n\n if unfuse:\n rnn_cell = rnn_layer._unfuse()\n if hybridize:\n rnn_cell.hybridize()\n out, _ = rnn_cell.unroll(length=seq_len, inputs = input_data, layout=layout, merge_outputs=True)\n else:\n if hybridize:\n rnn_layer.hybridize()\n out = rnn_layer(input_data)\n\n if is_train: \n #out = mx.sym.slice(out, begin=(0, None), end=(bs, None))\n hidden = mx.sym.Reshape(data = out, shape=(-1, hidden_size))\n pred = mx.sym.FullyConnected(data=hidden, num_hidden=embedding_size, name='pred')\n if layout == 'TNC':\n pred = mx.sym.Reshape(data=pred, shape=(seq_len, -1, embedding_size))\n elif layout == 'NTC':\n pred = mx.sym.Reshape(data=pred, shape=(-1, seq_len, embedding_size))\n softmax_output = mx.sym.SoftmaxOutput(data=pred, name='softmax')\n\n if layout == 'NTC':\n dshape = (bs, seq_len, embedding_size)\n elif layout == 'TNC':\n dshape = (seq_len, bs, embedding_size)\n \n if is_train:\n mod = mx.mod.Module(softmax_output, label_names=('softmax_label',), context=ctx)\n else:\n mod = mx.mod.Module(out, label_names=None, context=ctx)\n \n if is_train:\n if layout == 'TNC':\n mod.bind(for_training = True, data_shapes=[('data', dshape)],\n label_shapes=[('softmax_label', (seq_len, bs, embedding_size))])\n elif layout == 'NTC':\n mod.bind(for_training = True, data_shapes=[('data', dshape)],\n label_shapes=[('softmax_label', (bs, seq_len, embedding_size))])\n \n else:\n mod.bind(data_shapes=[('data', dshape)], label_shapes=None)\n\n batch = mx.io.DataBatch(data=[mx.random.uniform(shape=dshape)], label=[])\n mod.init_params(initializer=mx.init.Xavier(magnitude=2.))\n if is_train:\n mod.init_optimizer(optimizer='sgd')\n mod.forward(batch, is_train=True)\n if unfuse:\n for o in mod.get_outputs():\n o.wait_to_read()\n else:\n o = mod.get_outputs()[0]\n o.wait_to_read()\n mod.backward()\n mod.update()\n else:\n mod.forward(batch, is_train=False)\n if unfuse:\n for o in mod.get_outputs():\n o.wait_to_read()\n else:\n o = mod.get_outputs()[0]\n o.wait_to_read()\n\nif __name__ == '__main__':\n\n num_layer = opt.num_layer\n layout = opt.layout\n latency = opt.latency\n hybridize = not(opt.no_hybridize)\n \n if layout not in input_layout:\n logging.warning('Only TNC or NTC are supported!')\n sys.exit(0)\n\n if celltype not in rnncell_type:\n logging.warning('Only LSTM and GRU cell are supported!')\n sys.exit(0)\n \n if celltype == 'all':\n cell_lst = ['lstm', 'gru']\n else:\n cell_lst = [celltype]\n\n if opt.specify_shape != None:\n input_shape_list = [[int(x) for x in opt.specify_shape.split(',')]]\n\n for cell in cell_lst:\n if opt.train:\n logging.info('%s training benchmark.', cell)\n else:\n logging.info('%s inference benchmark.', cell)", - "repo_full_name": "apache/mxnet", - "discussion_comments": [ - { - "comment_id": "237215006", - "repo_full_name": "apache/mxnet", - "pr_number": 12893, - "pr_file": "benchmark/python/gluon/benchmark_gluon_rnn.py", - "discussion_id": "237215006", - "commented_code": "@@ -0,0 +1,225 @@\n+# Licensed to the Apache Software Foundation (ASF) under one\n+# or more contributor license agreements. See the NOTICE file\n+# distributed with this work for additional information\n+# regarding copyright ownership. The ASF licenses this file\n+# to you under the Apache License, Version 2.0 (the\n+# \"License\"); you may not use this file except in compliance\n+# with the License. You may obtain a copy of the License at\n+#\n+# http://www.apache.org/licenses/LICENSE-2.0\n+#\n+# Unless required by applicable law or agreed to in writing,\n+# software distributed under the License is distributed on an\n+# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n+# KIND, either express or implied. See the License for the\n+# specific language governing permissions and limitations\n+# under the License.\n+\n+import mxnet as mx\n+import mxnet.gluon as gluon\n+import time\n+import logging\n+import sys\n+import argparse\n+from mxnet.gluon import nn, rnn\n+\n+parser = argparse.ArgumentParser(description='Gluon RNN Benchmarking.')\n+parser.add_argument('--num-layer', type=int, default=1,\n+ help='The number of layers of the RNN model')\n+parser.add_argument('--layout', type=str, default='TNC',\n+ help='The layout of the input shape, can be either TNC or NTC.')\n+parser.add_argument('--specify-shape', type=str,\n+ help='Specify the input shape, format batchsize, time-step, embed-size, hidden-size.')\n+parser.add_argument('--cell-type', type=str, default='lstm',\n+ help='RNN cell type, can be either lstm, gru or all to cover both.')\n+parser.add_argument('--unfuse', action='store_true', default=False,\n+ help='Unfuse the RNN layer to stacked RNN cell instead.') \n+parser.add_argument('--latency', action='store_true', default=False,\n+ help='Measursing the latency, batchsize will be set to 1.')\n+parser.add_argument('--train', action='store_true', default=False,\n+ help='Run backward benchmark.')\n+parser.add_argument('--dropout', type=float, default=0, \n+ help='float, default 0; Fraction of the input that gets dropped out during training time.')\n+parser.add_argument('--gpu', action='store_true', default=False,\n+ help='bool, default Flase; set True to run benchmarks on GPU.')\n+parser.add_argument('--no-hybridize', action='store_true', default=False,\n+ help='bool, default False; hybridize will introduce a better perf.')\n+parser.add_argument('--bidirection', action='store_true', default=False,\n+ help='bool, default False; Whether to use bidirectional unroll.')\n+\n+opt = parser.parse_args()\n+logging.basicConfig(level=logging.INFO)\n+\n+#[bs, sequence length, embedding size, hidden size]\n+input_shape_list = [[64,15,500,500],\n+ [64,20,500,500],\n+ [64,25,500,500],\n+ [64,30,500,500],\n+ [64,35,500,500],\n+ [64,40,500,500],\n+ [64,45,500,500],\n+ [64,50,500,500],\n+ [16,25,512,512],\n+ [32,25,512,512],\n+ [64,25,512,512],\n+ [128,25,512,512],\n+ [16,25,1024,1024],\n+ [32,25,1024,1024],\n+ [64,25,1024,1024],\n+ [128,25,1024,1024],\n+ [16,25,2048,2048],\n+ [32,25,2048,2048],\n+ [64,25,2048,2048],\n+ [128,25,2048,2048],\n+ [16,25,4096,4096],\n+ [32,25,4096,4096],\n+ [64,25,4096,4096],\n+ [128,25,4096,4096]]\n+\n+rnncell_type = ['lstm', 'gru', 'all']\n+input_layout = ['TNC', 'NTC']\n+\n+if not opt.gpu:\n+ ctx = mx.cpu()\n+else:\n+ ctx = mx.gpu(0)\n+\n+dropout = opt.dropout\n+bidirection = opt.bidirection\n+unfuse = opt.unfuse\n+celltype = opt.cell_type\n+\n+dry_run = 20\n+num_iter = 100\n+\n+def get_rnn_layer(input_shape, num_layer, cell_type, dropout=0, bidirection=False):\n+ hidden_size = input_shape[3]\n+ embedding_size = input_shape[2]\n+ if cell_type == 'lstm':\n+ rnn_layer = rnn.LSTM(hidden_size, num_layer, dropout=dropout,\n+ bidirectional=bidirection, input_size=embedding_size,\n+ prefix='_lstm_layer')\n+ elif cell_type == 'gru':\n+ rnn_layer = rnn.GRU(hidden_size, num_layer, dropout=dropout,\n+ bidirectional=bidirection, input_size=embedding_size,\n+ prefix='_gru_layer')\n+ return rnn_layer\n+\n+\n+def rnn_cell_score(input_shape, cell_type, ctx, num_layer, dropout=0, bidirection=False, layout='TNC', unfuse=False, hybridize=True, is_train=False):\n+ bs = input_shape[0]\n+ seq_len = input_shape[1]\n+ embedding_size = input_shape[2]\n+ hidden_size = input_shape[3]\n+ rnn_layer = get_rnn_layer(input_shape, num_layer, cell_type, dropout, bidirection)\n+ input_data = mx.sym.Variable('data')\n+\n+ if unfuse:\n+ rnn_cell = rnn_layer._unfuse()\n+ if hybridize:\n+ rnn_cell.hybridize()\n+ out, _ = rnn_cell.unroll(length=seq_len, inputs = input_data, layout=layout, merge_outputs=True)\n+ else:\n+ if hybridize:\n+ rnn_layer.hybridize()\n+ out = rnn_layer(input_data)\n+\n+ if is_train: \n+ #out = mx.sym.slice(out, begin=(0, None), end=(bs, None))\n+ hidden = mx.sym.Reshape(data = out, shape=(-1, hidden_size))\n+ pred = mx.sym.FullyConnected(data=hidden, num_hidden=embedding_size, name='pred')\n+ if layout == 'TNC':\n+ pred = mx.sym.Reshape(data=pred, shape=(seq_len, -1, embedding_size))\n+ elif layout == 'NTC':\n+ pred = mx.sym.Reshape(data=pred, shape=(-1, seq_len, embedding_size))\n+ softmax_output = mx.sym.SoftmaxOutput(data=pred, name='softmax')\n+\n+ if layout == 'NTC':\n+ dshape = (bs, seq_len, embedding_size)\n+ elif layout == 'TNC':\n+ dshape = (seq_len, bs, embedding_size)\n+ \n+ if is_train:\n+ mod = mx.mod.Module(softmax_output, label_names=('softmax_label',), context=ctx)\n+ else:\n+ mod = mx.mod.Module(out, label_names=None, context=ctx)\n+ \n+ if is_train:\n+ if layout == 'TNC':\n+ mod.bind(for_training = True, data_shapes=[('data', dshape)],\n+ label_shapes=[('softmax_label', (seq_len, bs, embedding_size))])\n+ elif layout == 'NTC':\n+ mod.bind(for_training = True, data_shapes=[('data', dshape)],\n+ label_shapes=[('softmax_label', (bs, seq_len, embedding_size))])\n+ \n+ else:\n+ mod.bind(data_shapes=[('data', dshape)], label_shapes=None)\n+\n+ batch = mx.io.DataBatch(data=[mx.random.uniform(shape=dshape)], label=[])\n+ mod.init_params(initializer=mx.init.Xavier(magnitude=2.))\n+ if is_train:\n+ mod.init_optimizer(optimizer='sgd')\n+ mod.forward(batch, is_train=True)\n+ if unfuse:\n+ for o in mod.get_outputs():\n+ o.wait_to_read()\n+ else:\n+ o = mod.get_outputs()[0]\n+ o.wait_to_read()\n+ mod.backward()\n+ mod.update()\n+ else:\n+ mod.forward(batch, is_train=False)\n+ if unfuse:\n+ for o in mod.get_outputs():\n+ o.wait_to_read()\n+ else:\n+ o = mod.get_outputs()[0]\n+ o.wait_to_read()\n+\n+if __name__ == '__main__':\n+\n+ num_layer = opt.num_layer\n+ layout = opt.layout\n+ latency = opt.latency\n+ hybridize = not(opt.no_hybridize)\n+ \n+ if layout not in input_layout:\n+ logging.warning('Only TNC or NTC are supported!')\n+ sys.exit(0)\n+\n+ if celltype not in rnncell_type:\n+ logging.warning('Only LSTM and GRU cell are supported!')\n+ sys.exit(0)\n+ \n+ if celltype == 'all':\n+ cell_lst = ['lstm', 'gru']\n+ else:\n+ cell_lst = [celltype]\n+\n+ if opt.specify_shape != None:\n+ input_shape_list = [[int(x) for x in opt.specify_shape.split(',')]]\n+\n+ for cell in cell_lst:\n+ if opt.train:\n+ logging.info('%s training benchmark.', cell)\n+ else:\n+ logging.info('%s inference benchmark.', cell)", - "comment_created_at": "2018-11-28T18:51:38+00:00", - "comment_author": "vandanavk", - "comment_body": "can this be changed to check for opt.train before the for loop?\r\nsomething like\r\n`print_string = 'training' if opt.train else 'inference'`\r\nand inside the for loop it would be `logging.info('%s %s benchmark.', cell, print_string)`", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "968208176", - "pr_number": 21136, - "pr_file": "tests/nightly/model_backwards_compatibility_check/common.py", - "created_at": "2022-09-12T09:47:28+00:00", - "commented_code": "result = bucket.meta.client.list_objects(Bucket=bucket.name, Delimiter=backslash)\n folder_list = list()\n if 'CommonPrefixes' not in result:\n logging.error('No trained models found in S3 bucket : %s for this file. '\n 'Please train the models and run inference again' % bucket_name)\n raise Exception(\"No trained models found in S3 bucket : %s for this file. \"\n \"Please train the models and run inference again\" % bucket_name)\n logging.error('No trained models found in S3 bucket : {} for this file. '", - "repo_full_name": "apache/mxnet", - "discussion_comments": [ - { - "comment_id": "968208176", - "repo_full_name": "apache/mxnet", - "pr_number": 21136, - "pr_file": "tests/nightly/model_backwards_compatibility_check/common.py", - "discussion_id": "968208176", - "commented_code": "@@ -105,10 +105,10 @@ def get_top_level_folders_in_bucket(s3client, bucket_name):\n result = bucket.meta.client.list_objects(Bucket=bucket.name, Delimiter=backslash)\n folder_list = list()\n if 'CommonPrefixes' not in result:\n- logging.error('No trained models found in S3 bucket : %s for this file. '\n- 'Please train the models and run inference again' % bucket_name)\n- raise Exception(\"No trained models found in S3 bucket : %s for this file. \"\n- \"Please train the models and run inference again\" % bucket_name)\n+ logging.error('No trained models found in S3 bucket : {} for this file. '", - "comment_created_at": "2022-09-12T09:47:28+00:00", - "comment_author": "anko-intel", - "comment_body": "What is the reason to have to different approaches than in line 110?\r\nhere {bucket_name} seems to be more readable", - "pr_file_module": null - }, - { - "comment_id": "968317987", - "repo_full_name": "apache/mxnet", - "pr_number": 21136, - "pr_file": "tests/nightly/model_backwards_compatibility_check/common.py", - "discussion_id": "968208176", - "commented_code": "@@ -105,10 +105,10 @@ def get_top_level_folders_in_bucket(s3client, bucket_name):\n result = bucket.meta.client.list_objects(Bucket=bucket.name, Delimiter=backslash)\n folder_list = list()\n if 'CommonPrefixes' not in result:\n- logging.error('No trained models found in S3 bucket : %s for this file. '\n- 'Please train the models and run inference again' % bucket_name)\n- raise Exception(\"No trained models found in S3 bucket : %s for this file. \"\n- \"Please train the models and run inference again\" % bucket_name)\n+ logging.error('No trained models found in S3 bucket : {} for this file. '", - "comment_created_at": "2022-09-12T11:52:22+00:00", - "comment_author": "hankaj", - "comment_body": "In CI I got the error that you should not use f-strings in messages of logging functions.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "823646148", - "pr_number": 20753, - "pr_file": "python/mxnet/amp/amp.py", - "created_at": "2022-03-10T12:01:50+00:00", - "commented_code": "from being casted to LP16 or FP32.\n data_names : list of strs, optional\n A list of strings that represent input data tensor names to the model\n cast_optional_params : bool, default False\n cast_params_offline : bool, default False\n Whether to cast the arg_params and aux_params that don't require to be in LP16\n because of a cast layer following it, but will reduce the computation and memory\n overhead of the model if casted.\n \"\"\"\n assert isinstance(sym, Symbol), \"First argument to convert_symbol should be Symbol\"\n import json\n\n assert target_dtype in ['float16', 'bfloat16'], \\\n \"Only target_dtype float16 and bfloat16 are supported currently\"\n assert isinstance(sym, Symbol), \"First argument to convert_symbol should be a Symbol\"\n assert target_dtype_ops is None or isinstance(target_dtype_ops, list), \\\n \"target_dtype_ops should be a list of strs\"\n assert fp32_ops is None or isinstance(fp32_ops, list), \\\n \"fp32_ops should be a list of strs\"\n assert conditional_fp32_ops is None or isinstance(conditional_fp32_ops, list), \\\n \"conditional_fp32_ops should be a list\"\n\n if target_dtype == 'bfloat16':\n target_dtype = bfloat16\n target_dtype = get_dtype_name(target_dtype)\n assert target_dtype in ['float16', *bfloat16.names], \\\n \"Only float16 and bfloat16 types are currently supported as target_dtype\"\n\n if target_dtype_ops is not None:\n assert isinstance(target_dtype_ops, list), \"target_dtype_ops should be a list of strs\"\n else:\n if target_dtype_ops is None:\n target_dtype_ops = list_lp16_ops(target_dtype)\n\n if fp32_ops is not None:\n assert isinstance(fp32_ops, list), \"fp32_ops should be a list of strs\"\n else:\n if fp32_ops is None:\n fp32_ops = list_fp32_ops(target_dtype)\n\n if conditional_fp32_ops is not None:\n assert isinstance(conditional_fp32_ops, list), \"conditional_fp32_ops should be a list\"\n else:\n # conditional ops\n if conditional_fp32_ops is None:\n conditional_fp32_ops = list_conditional_fp32_ops(target_dtype)\n cond_ops = {cond_op[0]: {} for cond_op in conditional_fp32_ops}\n for cond_op in conditional_fp32_ops:\n op_name, attr_name, attr_vals = cond_op\n assert isinstance(op_name, str) and isinstance(attr_name, str) and isinstance(attr_vals, list), \\\n \"conditional_fp32_ops should be a list of (str, str, list of str)\"\n cond_ops[op_name].setdefault(attr_name, []).extend(attr_vals)\n\n nodes_attr = sym.attr_dict()\n nodes_op = {n['name']: n['op'] for n in json.loads(sym.tojson())['nodes']}\n assert set(excluded_sym_names).issubset(set(nodes_op.keys())), \\", - "repo_full_name": "apache/mxnet", - "discussion_comments": [ - { - "comment_id": "823646148", - "repo_full_name": "apache/mxnet", - "pr_number": 20753, - "pr_file": "python/mxnet/amp/amp.py", - "discussion_id": "823646148", - "commented_code": "@@ -459,73 +461,73 @@ def convert_symbol(sym, target_dtype=\"float16\", target_dtype_ops=None,\n from being casted to LP16 or FP32.\n data_names : list of strs, optional\n A list of strings that represent input data tensor names to the model\n- cast_optional_params : bool, default False\n+ cast_params_offline : bool, default False\n Whether to cast the arg_params and aux_params that don't require to be in LP16\n because of a cast layer following it, but will reduce the computation and memory\n overhead of the model if casted.\n \"\"\"\n- assert isinstance(sym, Symbol), \"First argument to convert_symbol should be Symbol\"\n+ import json\n \n- assert target_dtype in ['float16', 'bfloat16'], \\\n- \"Only target_dtype float16 and bfloat16 are supported currently\"\n+ assert isinstance(sym, Symbol), \"First argument to convert_symbol should be a Symbol\"\n+ assert target_dtype_ops is None or isinstance(target_dtype_ops, list), \\\n+ \"target_dtype_ops should be a list of strs\"\n+ assert fp32_ops is None or isinstance(fp32_ops, list), \\\n+ \"fp32_ops should be a list of strs\"\n+ assert conditional_fp32_ops is None or isinstance(conditional_fp32_ops, list), \\\n+ \"conditional_fp32_ops should be a list\"\n \n- if target_dtype == 'bfloat16':\n- target_dtype = bfloat16\n+ target_dtype = get_dtype_name(target_dtype)\n+ assert target_dtype in ['float16', *bfloat16.names], \\\n+ \"Only float16 and bfloat16 types are currently supported as target_dtype\"\n \n- if target_dtype_ops is not None:\n- assert isinstance(target_dtype_ops, list), \"target_dtype_ops should be a list of strs\"\n- else:\n+ if target_dtype_ops is None:\n target_dtype_ops = list_lp16_ops(target_dtype)\n-\n- if fp32_ops is not None:\n- assert isinstance(fp32_ops, list), \"fp32_ops should be a list of strs\"\n- else:\n+ if fp32_ops is None:\n fp32_ops = list_fp32_ops(target_dtype)\n \n- if conditional_fp32_ops is not None:\n- assert isinstance(conditional_fp32_ops, list), \"conditional_fp32_ops should be a list\"\n- else:\n+ # conditional ops\n+ if conditional_fp32_ops is None:\n conditional_fp32_ops = list_conditional_fp32_ops(target_dtype)\n+ cond_ops = {cond_op[0]: {} for cond_op in conditional_fp32_ops}\n+ for cond_op in conditional_fp32_ops:\n+ op_name, attr_name, attr_vals = cond_op\n+ assert isinstance(op_name, str) and isinstance(attr_name, str) and isinstance(attr_vals, list), \\\n+ \"conditional_fp32_ops should be a list of (str, str, list of str)\"\n+ cond_ops[op_name].setdefault(attr_name, []).extend(attr_vals)\n+\n+ nodes_attr = sym.attr_dict()\n+ nodes_op = {n['name']: n['op'] for n in json.loads(sym.tojson())['nodes']}\n+ assert set(excluded_sym_names).issubset(set(nodes_op.keys())), \\", - "comment_created_at": "2022-03-10T12:01:50+00:00", - "comment_author": "bgawrych", - "comment_body": "should it be assert? maybe warning is enough?", - "pr_file_module": null - }, - { - "comment_id": "824647707", - "repo_full_name": "apache/mxnet", - "pr_number": 20753, - "pr_file": "python/mxnet/amp/amp.py", - "discussion_id": "823646148", - "commented_code": "@@ -459,73 +461,73 @@ def convert_symbol(sym, target_dtype=\"float16\", target_dtype_ops=None,\n from being casted to LP16 or FP32.\n data_names : list of strs, optional\n A list of strings that represent input data tensor names to the model\n- cast_optional_params : bool, default False\n+ cast_params_offline : bool, default False\n Whether to cast the arg_params and aux_params that don't require to be in LP16\n because of a cast layer following it, but will reduce the computation and memory\n overhead of the model if casted.\n \"\"\"\n- assert isinstance(sym, Symbol), \"First argument to convert_symbol should be Symbol\"\n+ import json\n \n- assert target_dtype in ['float16', 'bfloat16'], \\\n- \"Only target_dtype float16 and bfloat16 are supported currently\"\n+ assert isinstance(sym, Symbol), \"First argument to convert_symbol should be a Symbol\"\n+ assert target_dtype_ops is None or isinstance(target_dtype_ops, list), \\\n+ \"target_dtype_ops should be a list of strs\"\n+ assert fp32_ops is None or isinstance(fp32_ops, list), \\\n+ \"fp32_ops should be a list of strs\"\n+ assert conditional_fp32_ops is None or isinstance(conditional_fp32_ops, list), \\\n+ \"conditional_fp32_ops should be a list\"\n \n- if target_dtype == 'bfloat16':\n- target_dtype = bfloat16\n+ target_dtype = get_dtype_name(target_dtype)\n+ assert target_dtype in ['float16', *bfloat16.names], \\\n+ \"Only float16 and bfloat16 types are currently supported as target_dtype\"\n \n- if target_dtype_ops is not None:\n- assert isinstance(target_dtype_ops, list), \"target_dtype_ops should be a list of strs\"\n- else:\n+ if target_dtype_ops is None:\n target_dtype_ops = list_lp16_ops(target_dtype)\n-\n- if fp32_ops is not None:\n- assert isinstance(fp32_ops, list), \"fp32_ops should be a list of strs\"\n- else:\n+ if fp32_ops is None:\n fp32_ops = list_fp32_ops(target_dtype)\n \n- if conditional_fp32_ops is not None:\n- assert isinstance(conditional_fp32_ops, list), \"conditional_fp32_ops should be a list\"\n- else:\n+ # conditional ops\n+ if conditional_fp32_ops is None:\n conditional_fp32_ops = list_conditional_fp32_ops(target_dtype)\n+ cond_ops = {cond_op[0]: {} for cond_op in conditional_fp32_ops}\n+ for cond_op in conditional_fp32_ops:\n+ op_name, attr_name, attr_vals = cond_op\n+ assert isinstance(op_name, str) and isinstance(attr_name, str) and isinstance(attr_vals, list), \\\n+ \"conditional_fp32_ops should be a list of (str, str, list of str)\"\n+ cond_ops[op_name].setdefault(attr_name, []).extend(attr_vals)\n+\n+ nodes_attr = sym.attr_dict()\n+ nodes_op = {n['name']: n['op'] for n in json.loads(sym.tojson())['nodes']}\n+ assert set(excluded_sym_names).issubset(set(nodes_op.keys())), \\", - "comment_created_at": "2022-03-11T11:55:22+00:00", - "comment_author": "PawelGlomski-Intel", - "comment_body": "I changed it to use `logging.warning(...)` since this is how logging is handled in this file. Do you think we should instead add a logger as an argument to the conversion functions?", - "pr_file_module": null - }, - { - "comment_id": "827244414", - "repo_full_name": "apache/mxnet", - "pr_number": 20753, - "pr_file": "python/mxnet/amp/amp.py", - "discussion_id": "823646148", - "commented_code": "@@ -459,73 +461,73 @@ def convert_symbol(sym, target_dtype=\"float16\", target_dtype_ops=None,\n from being casted to LP16 or FP32.\n data_names : list of strs, optional\n A list of strings that represent input data tensor names to the model\n- cast_optional_params : bool, default False\n+ cast_params_offline : bool, default False\n Whether to cast the arg_params and aux_params that don't require to be in LP16\n because of a cast layer following it, but will reduce the computation and memory\n overhead of the model if casted.\n \"\"\"\n- assert isinstance(sym, Symbol), \"First argument to convert_symbol should be Symbol\"\n+ import json\n \n- assert target_dtype in ['float16', 'bfloat16'], \\\n- \"Only target_dtype float16 and bfloat16 are supported currently\"\n+ assert isinstance(sym, Symbol), \"First argument to convert_symbol should be a Symbol\"\n+ assert target_dtype_ops is None or isinstance(target_dtype_ops, list), \\\n+ \"target_dtype_ops should be a list of strs\"\n+ assert fp32_ops is None or isinstance(fp32_ops, list), \\\n+ \"fp32_ops should be a list of strs\"\n+ assert conditional_fp32_ops is None or isinstance(conditional_fp32_ops, list), \\\n+ \"conditional_fp32_ops should be a list\"\n \n- if target_dtype == 'bfloat16':\n- target_dtype = bfloat16\n+ target_dtype = get_dtype_name(target_dtype)\n+ assert target_dtype in ['float16', *bfloat16.names], \\\n+ \"Only float16 and bfloat16 types are currently supported as target_dtype\"\n \n- if target_dtype_ops is not None:\n- assert isinstance(target_dtype_ops, list), \"target_dtype_ops should be a list of strs\"\n- else:\n+ if target_dtype_ops is None:\n target_dtype_ops = list_lp16_ops(target_dtype)\n-\n- if fp32_ops is not None:\n- assert isinstance(fp32_ops, list), \"fp32_ops should be a list of strs\"\n- else:\n+ if fp32_ops is None:\n fp32_ops = list_fp32_ops(target_dtype)\n \n- if conditional_fp32_ops is not None:\n- assert isinstance(conditional_fp32_ops, list), \"conditional_fp32_ops should be a list\"\n- else:\n+ # conditional ops\n+ if conditional_fp32_ops is None:\n conditional_fp32_ops = list_conditional_fp32_ops(target_dtype)\n+ cond_ops = {cond_op[0]: {} for cond_op in conditional_fp32_ops}\n+ for cond_op in conditional_fp32_ops:\n+ op_name, attr_name, attr_vals = cond_op\n+ assert isinstance(op_name, str) and isinstance(attr_name, str) and isinstance(attr_vals, list), \\\n+ \"conditional_fp32_ops should be a list of (str, str, list of str)\"\n+ cond_ops[op_name].setdefault(attr_name, []).extend(attr_vals)\n+\n+ nodes_attr = sym.attr_dict()\n+ nodes_op = {n['name']: n['op'] for n in json.loads(sym.tojson())['nodes']}\n+ assert set(excluded_sym_names).issubset(set(nodes_op.keys())), \\", - "comment_created_at": "2022-03-15T17:33:16+00:00", - "comment_author": "bgawrych", - "comment_body": "Other functions are just using logging module, so I don't think adding argument is necessary", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/mxnet-consistent-naming-patterns.json b/_reviewers/mxnet-consistent-naming-patterns.json new file mode 100644 index 0000000..dd0ffaf --- /dev/null +++ b/_reviewers/mxnet-consistent-naming-patterns.json @@ -0,0 +1,180 @@ +[ + { + "discussion_id": "722364711", + "pr_number": 20633, + "pr_file": "python/mxnet/numpy/linalg.py", + "created_at": "2021-10-05T15:33:59+00:00", + "commented_code": "return _mx_nd_np.linalg.matrix_rank(M, tol, hermitian)\n\n\ndef vecdot(a: ndarray, b, ndarray, axis: Optional[int] =None) -> ndarray:", + "repo_full_name": "apache/mxnet", + "discussion_comments": [ + { + "comment_id": "722364711", + "repo_full_name": "apache/mxnet", + "pr_number": 20633, + "pr_file": "python/mxnet/numpy/linalg.py", + "discussion_id": "722364711", + "commented_code": "@@ -67,6 +70,53 @@ def matrix_rank(M, tol=None, hermitian=False):\n return _mx_nd_np.linalg.matrix_rank(M, tol, hermitian)\n \n \n+def vecdot(a: ndarray, b, ndarray, axis: Optional[int] =None) -> ndarray:", + "comment_created_at": "2021-10-05T15:33:59+00:00", + "comment_author": "mozga-intel", + "comment_body": "```def vecdot(a: ndarray, b, ndarray, axis: Optional[int] =None) -> ndarray:```\r\nHow about adding the same function style in the entire file by giving the type-name explicitly?", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "756375086", + "pr_number": 20633, + "pr_file": "python/mxnet/numpy/multiarray.py", + "created_at": "2021-11-24T19:36:54+00:00", + "commented_code": "\"\"\"\n from_dlpack = ndarray_from_dlpack(ndarray)\n return from_dlpack(x)\n\n\n@set_module('mxnet.numpy')\ndef matrix_transpose(x: ndarray, /) -> ndarray:\n \"\"\"\n Transposes a matrix (or a stack of matrices) x ..\n\n Parameters\n ----------\n x : ndarray\n input array having shape (..., M, N) and whose innermost two dimensions form MxN matrices\n\n Returns\n -------\n out : ndarray\n an array containing the transpose for each matrix and having shape (..., N, M) . The\n returned array must have the same data type as x .\n \"\"\"\n if x.ndim < 2:\n raise ValueError(\"x must be at least 2-dimensional for matrix_transpose\")\n return _mx_nd_np.swapaxes(x, -1, -2)\n\n\ndef astype(\n self: ndarray,", + "repo_full_name": "apache/mxnet", + "discussion_comments": [ + { + "comment_id": "756375086", + "repo_full_name": "apache/mxnet", + "pr_number": 20633, + "pr_file": "python/mxnet/numpy/multiarray.py", + "discussion_id": "756375086", + "commented_code": "@@ -13363,3 +13965,96 @@ def from_dlpack(x):\n \"\"\"\n from_dlpack = ndarray_from_dlpack(ndarray)\n return from_dlpack(x)\n+\n+\n+@set_module('mxnet.numpy')\n+def matrix_transpose(x: ndarray, /) -> ndarray:\n+ \"\"\"\n+ Transposes a matrix (or a stack of matrices) x ..\n+\n+ Parameters\n+ ----------\n+ x : ndarray\n+ input array having shape (..., M, N) and whose innermost two dimensions form MxN matrices\n+\n+ Returns\n+ -------\n+ out : ndarray\n+ an array containing the transpose for each matrix and having shape (..., N, M) . The\n+ returned array must have the same data type as x .\n+ \"\"\"\n+ if x.ndim < 2:\n+ raise ValueError(\"x must be at least 2-dimensional for matrix_transpose\")\n+ return _mx_nd_np.swapaxes(x, -1, -2)\n+\n+\n+def astype(\n+ self: ndarray,", + "comment_created_at": "2021-11-24T19:36:54+00:00", + "comment_author": "barry-jin", + "comment_body": "Please use x or array as argument here instead of self. ", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "797614635", + "pr_number": 20846, + "pr_file": "tests/python/dnnl/test_bf16_operator.py", + "created_at": "2022-02-02T13:38:27+00:00", + "commented_code": "etol: float\n The error rate threshold, allow a small amount of value not consistent between bf16 and fp32\n \"\"\"\n if not isinstance(data_shape, tuple):\n data_shape = tuple(data_shape)\n if isinstance(data_shape, tuple):\n data_shape = {\"data\": data_shape}\n data_range = (0.0, 10.0)\n data_list_fp32 = list()\n data_list_bf16 = list()\n for i in range(num_input_data):\n data_list_fp32.append(mx.nd.random.uniform(low=data_range[0], high=data_range[1], shape=data_shape))\n for i, obj in enumerate(data_shape):", + "repo_full_name": "apache/mxnet", + "discussion_comments": [ + { + "comment_id": "797614635", + "repo_full_name": "apache/mxnet", + "pr_number": 20846, + "pr_file": "tests/python/dnnl/test_bf16_operator.py", + "discussion_id": "797614635", + "commented_code": "@@ -55,20 +55,22 @@ def check_operator_accuracy(sym_fp32, sym_bf16, data_shape, num_input_data=1, bf\n etol: float\n The error rate threshold, allow a small amount of value not consistent between bf16 and fp32\n \"\"\"\n- if not isinstance(data_shape, tuple):\n- data_shape = tuple(data_shape)\n+ if isinstance(data_shape, tuple):\n+ data_shape = {\"data\": data_shape}\n data_range = (0.0, 10.0)\n data_list_fp32 = list()\n data_list_bf16 = list()\n- for i in range(num_input_data):\n- data_list_fp32.append(mx.nd.random.uniform(low=data_range[0], high=data_range[1], shape=data_shape))\n+ for i, obj in enumerate(data_shape):", + "comment_created_at": "2022-02-02T13:38:27+00:00", + "comment_author": "bgawrych", + "comment_body": "replace obj with name/key or use items() to iterate over values in dict", + "pr_file_module": null + }, + { + "comment_id": "797733350", + "repo_full_name": "apache/mxnet", + "pr_number": 20846, + "pr_file": "tests/python/dnnl/test_bf16_operator.py", + "discussion_id": "797614635", + "commented_code": "@@ -55,20 +55,22 @@ def check_operator_accuracy(sym_fp32, sym_bf16, data_shape, num_input_data=1, bf\n etol: float\n The error rate threshold, allow a small amount of value not consistent between bf16 and fp32\n \"\"\"\n- if not isinstance(data_shape, tuple):\n- data_shape = tuple(data_shape)\n+ if isinstance(data_shape, tuple):\n+ data_shape = {\"data\": data_shape}\n data_range = (0.0, 10.0)\n data_list_fp32 = list()\n data_list_bf16 = list()\n- for i in range(num_input_data):\n- data_list_fp32.append(mx.nd.random.uniform(low=data_range[0], high=data_range[1], shape=data_shape))\n+ for i, obj in enumerate(data_shape):", + "comment_created_at": "2022-02-02T15:34:35+00:00", + "comment_author": "agrabows", + "comment_body": "done", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "646155037", + "pr_number": 20262, + "pr_file": "tests/python/unittest/test_gluon.py", + "created_at": "2021-06-06T16:21:48+00:00", + "commented_code": "from mxnet.util import is_np_array\nfrom mxnet.ndarray.ndarray import _STORAGE_TYPE_STR_TO_ID\nfrom mxnet.test_utils import use_np\nimport mxnet.numpy as _mx_np\nfrom common import assertRaises, assert_raises_cudnn_not_satisfied, \\\n xfail_when_nonstandard_decimal_separator, environment\nimport numpy as np\nimport numpy as _np", + "repo_full_name": "apache/mxnet", + "discussion_comments": [ + { + "comment_id": "646155037", + "repo_full_name": "apache/mxnet", + "pr_number": 20262, + "pr_file": "tests/python/unittest/test_gluon.py", + "discussion_id": "646155037", + "commented_code": "@@ -27,10 +27,9 @@\n from mxnet.util import is_np_array\n from mxnet.ndarray.ndarray import _STORAGE_TYPE_STR_TO_ID\n from mxnet.test_utils import use_np\n-import mxnet.numpy as _mx_np\n from common import assertRaises, assert_raises_cudnn_not_satisfied, \\\n xfail_when_nonstandard_decimal_separator, environment\n-import numpy as np\n+import numpy as _np", + "comment_created_at": "2021-06-06T16:21:48+00:00", + "comment_author": "szha", + "comment_body": "use `onp` as the name to be consistent", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "646157100", + "pr_number": 20262, + "pr_file": "tests/python/unittest/test_loss.py", + "created_at": "2021-06-06T16:39:10+00:00", + "commented_code": "# under the License.\n\nimport mxnet as mx\nimport numpy as np\nimport numpy as _np", + "repo_full_name": "apache/mxnet", + "discussion_comments": [ + { + "comment_id": "646157100", + "repo_full_name": "apache/mxnet", + "pr_number": 20262, + "pr_file": "tests/python/unittest/test_loss.py", + "discussion_id": "646157100", + "commented_code": "@@ -16,91 +16,98 @@\n # under the License.\n \n import mxnet as mx\n-import numpy as np\n+import numpy as _np", + "comment_created_at": "2021-06-06T16:39:10+00:00", + "comment_author": "szha", + "comment_body": "use `onp` as the name for consistency", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "654714998", + "pr_number": 20262, + "pr_file": "benchmark/python/control_flow/rnn.py", + "created_at": "2021-06-18T23:32:28+00:00", + "commented_code": "from time import time\n\nimport mxnet as mx\nimport numpy as np\nfrom mxnet import gluon\nimport numpy as onp", + "repo_full_name": "apache/mxnet", + "discussion_comments": [ + { + "comment_id": "654714998", + "repo_full_name": "apache/mxnet", + "pr_number": 20262, + "pr_file": "benchmark/python/control_flow/rnn.py", + "discussion_id": "654714998", + "commented_code": "@@ -24,8 +24,8 @@\n from time import time\n \n import mxnet as mx\n-import numpy as np\n-from mxnet import gluon\n+import numpy as onp", + "comment_created_at": "2021-06-18T23:32:28+00:00", + "comment_author": "TristonC", + "comment_body": "Is the 'o' in onp as original as this is original numpy? It will be confusing as np being well known as numpy for short.", + "pr_file_module": null + }, + { + "comment_id": "654727506", + "repo_full_name": "apache/mxnet", + "pr_number": 20262, + "pr_file": "benchmark/python/control_flow/rnn.py", + "discussion_id": "654714998", + "commented_code": "@@ -24,8 +24,8 @@\n from time import time\n \n import mxnet as mx\n-import numpy as np\n-from mxnet import gluon\n+import numpy as onp", + "comment_created_at": "2021-06-19T01:03:34+00:00", + "comment_author": "barry-jin", + "comment_body": "Yes, 'o' in onp is 'official', which is used to distinguish between official numpy and MXNet numpy. Usually, user will do \r\n`from mxnet import np` and build their models with numpy operators from MXNet. This will provide numpy-compatible coding experience in MXNet for users. ", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "638717908", + "pr_number": 20227, + "pr_file": "tests/python/quantization/test_quantization.py", + "created_at": "2021-05-25T11:55:26+00:00", + "commented_code": "data_high = 127.0\n\n # run fp32 bn\n data_sym = mx.sym.Variable(name='data', shape=data_shape, dtype='float32')\n bn_fp32 = mx.sym.BatchNorm(data=data_sym, name='bn', use_global_stats=True, fix_gamma=False)\n arg_shapes, out_shapes, aux_shapes = bn_fp32.infer_shape(data=data_shape)\n arg_names = bn_fp32.list_arguments()\n aux_names = bn_fp32.list_auxiliary_states()\n bn_fp32 = mx.gluon.nn.BatchNorm(use_global_stats=True, scale=True)\n data = mx.nd.random.uniform(low=data_low, high=data_high, shape=data_shape)\n bn_fp32.initialize()\n bn_fp32.hybridize()\n bn_fp32(data)\n fp32_params = bn_fp32.collect_params()\n \n data = mx.nd.random.uniform(low=data_low, high=data_high, shape=data_shape)\n gamma = mx.nd.random.uniform(low=data_low, high=data_high, shape=arg_shapes[1])\n beta = mx.nd.random.uniform(low=data_low, high=data_high, shape=arg_shapes[2])\n gamma = mx.nd.random.uniform(low=data_low, high=data_high, shape=fp32_params['gamma'].shape)\n beta = mx.nd.random.uniform(low=data_low, high=data_high, shape=fp32_params['beta'].shape)\n moving_mean, moving_var = get_mean_var(data)\n new_params = {\n 'gamma':gamma,\n 'beta':beta,\n 'running_mean': moving_mean,", + "repo_full_name": "apache/mxnet", + "discussion_comments": [ + { + "comment_id": "638717908", + "repo_full_name": "apache/mxnet", + "pr_number": 20227, + "pr_file": "tests/python/quantization/test_quantization.py", + "discussion_id": "638717908", + "commented_code": "@@ -760,48 +864,38 @@ def check_quantized_bn(data_shape, qdtype):\n data_high = 127.0\n \n # run fp32 bn\n- data_sym = mx.sym.Variable(name='data', shape=data_shape, dtype='float32')\n- bn_fp32 = mx.sym.BatchNorm(data=data_sym, name='bn', use_global_stats=True, fix_gamma=False)\n- arg_shapes, out_shapes, aux_shapes = bn_fp32.infer_shape(data=data_shape)\n- arg_names = bn_fp32.list_arguments()\n- aux_names = bn_fp32.list_auxiliary_states()\n+ bn_fp32 = mx.gluon.nn.BatchNorm(use_global_stats=True, scale=True)\n+ data = mx.nd.random.uniform(low=data_low, high=data_high, shape=data_shape)\n+ bn_fp32.initialize()\n+ bn_fp32.hybridize()\n+ bn_fp32(data)\n+ fp32_params = bn_fp32.collect_params()\n+ \n data = mx.nd.random.uniform(low=data_low, high=data_high, shape=data_shape)\n- gamma = mx.nd.random.uniform(low=data_low, high=data_high, shape=arg_shapes[1])\n- beta = mx.nd.random.uniform(low=data_low, high=data_high, shape=arg_shapes[2])\n+ gamma = mx.nd.random.uniform(low=data_low, high=data_high, shape=fp32_params['gamma'].shape)\n+ beta = mx.nd.random.uniform(low=data_low, high=data_high, shape=fp32_params['beta'].shape)\n moving_mean, moving_var = get_mean_var(data)\n+ new_params = {\n+ 'gamma':gamma,\n+ 'beta':beta,\n+ 'running_mean': moving_mean,", + "comment_created_at": "2021-05-25T11:55:26+00:00", + "comment_author": "bartekkuncer", + "comment_body": "Maybe rename 'moving_*' vars to 'running_*' vars for consistency?", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/mxnet-consistent-naming-patterns.md b/_reviewers/mxnet-consistent-naming-patterns.md index 9aff767..87d1af5 100644 --- a/_reviewers/mxnet-consistent-naming-patterns.md +++ b/_reviewers/mxnet-consistent-naming-patterns.md @@ -47,185 +47,3 @@ Maintain consistent naming conventions throughout the codebase to enhance readab 4. **Type annotations**: Maintain a consistent style for type annotations throughout the codebase, with explicit type names for parameters and return values. 5. **Related variables**: Use consistent prefixes or suffixes for related variables (e.g., 'running_mean' and 'running_var' instead of mixing 'moving_' and 'running_' prefixes). - - -[ - { - "discussion_id": "722364711", - "pr_number": 20633, - "pr_file": "python/mxnet/numpy/linalg.py", - "created_at": "2021-10-05T15:33:59+00:00", - "commented_code": "return _mx_nd_np.linalg.matrix_rank(M, tol, hermitian)\n\n\ndef vecdot(a: ndarray, b, ndarray, axis: Optional[int] =None) -> ndarray:", - "repo_full_name": "apache/mxnet", - "discussion_comments": [ - { - "comment_id": "722364711", - "repo_full_name": "apache/mxnet", - "pr_number": 20633, - "pr_file": "python/mxnet/numpy/linalg.py", - "discussion_id": "722364711", - "commented_code": "@@ -67,6 +70,53 @@ def matrix_rank(M, tol=None, hermitian=False):\n return _mx_nd_np.linalg.matrix_rank(M, tol, hermitian)\n \n \n+def vecdot(a: ndarray, b, ndarray, axis: Optional[int] =None) -> ndarray:", - "comment_created_at": "2021-10-05T15:33:59+00:00", - "comment_author": "mozga-intel", - "comment_body": "```def vecdot(a: ndarray, b, ndarray, axis: Optional[int] =None) -> ndarray:```\r\nHow about adding the same function style in the entire file by giving the type-name explicitly?", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "756375086", - "pr_number": 20633, - "pr_file": "python/mxnet/numpy/multiarray.py", - "created_at": "2021-11-24T19:36:54+00:00", - "commented_code": "\"\"\"\n from_dlpack = ndarray_from_dlpack(ndarray)\n return from_dlpack(x)\n\n\n@set_module('mxnet.numpy')\ndef matrix_transpose(x: ndarray, /) -> ndarray:\n \"\"\"\n Transposes a matrix (or a stack of matrices) x ..\n\n Parameters\n ----------\n x : ndarray\n input array having shape (..., M, N) and whose innermost two dimensions form MxN matrices\n\n Returns\n -------\n out : ndarray\n an array containing the transpose for each matrix and having shape (..., N, M) . The\n returned array must have the same data type as x .\n \"\"\"\n if x.ndim < 2:\n raise ValueError(\"x must be at least 2-dimensional for matrix_transpose\")\n return _mx_nd_np.swapaxes(x, -1, -2)\n\n\ndef astype(\n self: ndarray,", - "repo_full_name": "apache/mxnet", - "discussion_comments": [ - { - "comment_id": "756375086", - "repo_full_name": "apache/mxnet", - "pr_number": 20633, - "pr_file": "python/mxnet/numpy/multiarray.py", - "discussion_id": "756375086", - "commented_code": "@@ -13363,3 +13965,96 @@ def from_dlpack(x):\n \"\"\"\n from_dlpack = ndarray_from_dlpack(ndarray)\n return from_dlpack(x)\n+\n+\n+@set_module('mxnet.numpy')\n+def matrix_transpose(x: ndarray, /) -> ndarray:\n+ \"\"\"\n+ Transposes a matrix (or a stack of matrices) x ..\n+\n+ Parameters\n+ ----------\n+ x : ndarray\n+ input array having shape (..., M, N) and whose innermost two dimensions form MxN matrices\n+\n+ Returns\n+ -------\n+ out : ndarray\n+ an array containing the transpose for each matrix and having shape (..., N, M) . The\n+ returned array must have the same data type as x .\n+ \"\"\"\n+ if x.ndim < 2:\n+ raise ValueError(\"x must be at least 2-dimensional for matrix_transpose\")\n+ return _mx_nd_np.swapaxes(x, -1, -2)\n+\n+\n+def astype(\n+ self: ndarray,", - "comment_created_at": "2021-11-24T19:36:54+00:00", - "comment_author": "barry-jin", - "comment_body": "Please use x or array as argument here instead of self. ", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "797614635", - "pr_number": 20846, - "pr_file": "tests/python/dnnl/test_bf16_operator.py", - "created_at": "2022-02-02T13:38:27+00:00", - "commented_code": "etol: float\n The error rate threshold, allow a small amount of value not consistent between bf16 and fp32\n \"\"\"\n if not isinstance(data_shape, tuple):\n data_shape = tuple(data_shape)\n if isinstance(data_shape, tuple):\n data_shape = {\"data\": data_shape}\n data_range = (0.0, 10.0)\n data_list_fp32 = list()\n data_list_bf16 = list()\n for i in range(num_input_data):\n data_list_fp32.append(mx.nd.random.uniform(low=data_range[0], high=data_range[1], shape=data_shape))\n for i, obj in enumerate(data_shape):", - "repo_full_name": "apache/mxnet", - "discussion_comments": [ - { - "comment_id": "797614635", - "repo_full_name": "apache/mxnet", - "pr_number": 20846, - "pr_file": "tests/python/dnnl/test_bf16_operator.py", - "discussion_id": "797614635", - "commented_code": "@@ -55,20 +55,22 @@ def check_operator_accuracy(sym_fp32, sym_bf16, data_shape, num_input_data=1, bf\n etol: float\n The error rate threshold, allow a small amount of value not consistent between bf16 and fp32\n \"\"\"\n- if not isinstance(data_shape, tuple):\n- data_shape = tuple(data_shape)\n+ if isinstance(data_shape, tuple):\n+ data_shape = {\"data\": data_shape}\n data_range = (0.0, 10.0)\n data_list_fp32 = list()\n data_list_bf16 = list()\n- for i in range(num_input_data):\n- data_list_fp32.append(mx.nd.random.uniform(low=data_range[0], high=data_range[1], shape=data_shape))\n+ for i, obj in enumerate(data_shape):", - "comment_created_at": "2022-02-02T13:38:27+00:00", - "comment_author": "bgawrych", - "comment_body": "replace obj with name/key or use items() to iterate over values in dict", - "pr_file_module": null - }, - { - "comment_id": "797733350", - "repo_full_name": "apache/mxnet", - "pr_number": 20846, - "pr_file": "tests/python/dnnl/test_bf16_operator.py", - "discussion_id": "797614635", - "commented_code": "@@ -55,20 +55,22 @@ def check_operator_accuracy(sym_fp32, sym_bf16, data_shape, num_input_data=1, bf\n etol: float\n The error rate threshold, allow a small amount of value not consistent between bf16 and fp32\n \"\"\"\n- if not isinstance(data_shape, tuple):\n- data_shape = tuple(data_shape)\n+ if isinstance(data_shape, tuple):\n+ data_shape = {\"data\": data_shape}\n data_range = (0.0, 10.0)\n data_list_fp32 = list()\n data_list_bf16 = list()\n- for i in range(num_input_data):\n- data_list_fp32.append(mx.nd.random.uniform(low=data_range[0], high=data_range[1], shape=data_shape))\n+ for i, obj in enumerate(data_shape):", - "comment_created_at": "2022-02-02T15:34:35+00:00", - "comment_author": "agrabows", - "comment_body": "done", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "646155037", - "pr_number": 20262, - "pr_file": "tests/python/unittest/test_gluon.py", - "created_at": "2021-06-06T16:21:48+00:00", - "commented_code": "from mxnet.util import is_np_array\nfrom mxnet.ndarray.ndarray import _STORAGE_TYPE_STR_TO_ID\nfrom mxnet.test_utils import use_np\nimport mxnet.numpy as _mx_np\nfrom common import assertRaises, assert_raises_cudnn_not_satisfied, \\\n xfail_when_nonstandard_decimal_separator, environment\nimport numpy as np\nimport numpy as _np", - "repo_full_name": "apache/mxnet", - "discussion_comments": [ - { - "comment_id": "646155037", - "repo_full_name": "apache/mxnet", - "pr_number": 20262, - "pr_file": "tests/python/unittest/test_gluon.py", - "discussion_id": "646155037", - "commented_code": "@@ -27,10 +27,9 @@\n from mxnet.util import is_np_array\n from mxnet.ndarray.ndarray import _STORAGE_TYPE_STR_TO_ID\n from mxnet.test_utils import use_np\n-import mxnet.numpy as _mx_np\n from common import assertRaises, assert_raises_cudnn_not_satisfied, \\\n xfail_when_nonstandard_decimal_separator, environment\n-import numpy as np\n+import numpy as _np", - "comment_created_at": "2021-06-06T16:21:48+00:00", - "comment_author": "szha", - "comment_body": "use `onp` as the name to be consistent", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "646157100", - "pr_number": 20262, - "pr_file": "tests/python/unittest/test_loss.py", - "created_at": "2021-06-06T16:39:10+00:00", - "commented_code": "# under the License.\n\nimport mxnet as mx\nimport numpy as np\nimport numpy as _np", - "repo_full_name": "apache/mxnet", - "discussion_comments": [ - { - "comment_id": "646157100", - "repo_full_name": "apache/mxnet", - "pr_number": 20262, - "pr_file": "tests/python/unittest/test_loss.py", - "discussion_id": "646157100", - "commented_code": "@@ -16,91 +16,98 @@\n # under the License.\n \n import mxnet as mx\n-import numpy as np\n+import numpy as _np", - "comment_created_at": "2021-06-06T16:39:10+00:00", - "comment_author": "szha", - "comment_body": "use `onp` as the name for consistency", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "654714998", - "pr_number": 20262, - "pr_file": "benchmark/python/control_flow/rnn.py", - "created_at": "2021-06-18T23:32:28+00:00", - "commented_code": "from time import time\n\nimport mxnet as mx\nimport numpy as np\nfrom mxnet import gluon\nimport numpy as onp", - "repo_full_name": "apache/mxnet", - "discussion_comments": [ - { - "comment_id": "654714998", - "repo_full_name": "apache/mxnet", - "pr_number": 20262, - "pr_file": "benchmark/python/control_flow/rnn.py", - "discussion_id": "654714998", - "commented_code": "@@ -24,8 +24,8 @@\n from time import time\n \n import mxnet as mx\n-import numpy as np\n-from mxnet import gluon\n+import numpy as onp", - "comment_created_at": "2021-06-18T23:32:28+00:00", - "comment_author": "TristonC", - "comment_body": "Is the 'o' in onp as original as this is original numpy? It will be confusing as np being well known as numpy for short.", - "pr_file_module": null - }, - { - "comment_id": "654727506", - "repo_full_name": "apache/mxnet", - "pr_number": 20262, - "pr_file": "benchmark/python/control_flow/rnn.py", - "discussion_id": "654714998", - "commented_code": "@@ -24,8 +24,8 @@\n from time import time\n \n import mxnet as mx\n-import numpy as np\n-from mxnet import gluon\n+import numpy as onp", - "comment_created_at": "2021-06-19T01:03:34+00:00", - "comment_author": "barry-jin", - "comment_body": "Yes, 'o' in onp is 'official', which is used to distinguish between official numpy and MXNet numpy. Usually, user will do \r\n`from mxnet import np` and build their models with numpy operators from MXNet. This will provide numpy-compatible coding experience in MXNet for users. ", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "638717908", - "pr_number": 20227, - "pr_file": "tests/python/quantization/test_quantization.py", - "created_at": "2021-05-25T11:55:26+00:00", - "commented_code": "data_high = 127.0\n\n # run fp32 bn\n data_sym = mx.sym.Variable(name='data', shape=data_shape, dtype='float32')\n bn_fp32 = mx.sym.BatchNorm(data=data_sym, name='bn', use_global_stats=True, fix_gamma=False)\n arg_shapes, out_shapes, aux_shapes = bn_fp32.infer_shape(data=data_shape)\n arg_names = bn_fp32.list_arguments()\n aux_names = bn_fp32.list_auxiliary_states()\n bn_fp32 = mx.gluon.nn.BatchNorm(use_global_stats=True, scale=True)\n data = mx.nd.random.uniform(low=data_low, high=data_high, shape=data_shape)\n bn_fp32.initialize()\n bn_fp32.hybridize()\n bn_fp32(data)\n fp32_params = bn_fp32.collect_params()\n \n data = mx.nd.random.uniform(low=data_low, high=data_high, shape=data_shape)\n gamma = mx.nd.random.uniform(low=data_low, high=data_high, shape=arg_shapes[1])\n beta = mx.nd.random.uniform(low=data_low, high=data_high, shape=arg_shapes[2])\n gamma = mx.nd.random.uniform(low=data_low, high=data_high, shape=fp32_params['gamma'].shape)\n beta = mx.nd.random.uniform(low=data_low, high=data_high, shape=fp32_params['beta'].shape)\n moving_mean, moving_var = get_mean_var(data)\n new_params = {\n 'gamma':gamma,\n 'beta':beta,\n 'running_mean': moving_mean,", - "repo_full_name": "apache/mxnet", - "discussion_comments": [ - { - "comment_id": "638717908", - "repo_full_name": "apache/mxnet", - "pr_number": 20227, - "pr_file": "tests/python/quantization/test_quantization.py", - "discussion_id": "638717908", - "commented_code": "@@ -760,48 +864,38 @@ def check_quantized_bn(data_shape, qdtype):\n data_high = 127.0\n \n # run fp32 bn\n- data_sym = mx.sym.Variable(name='data', shape=data_shape, dtype='float32')\n- bn_fp32 = mx.sym.BatchNorm(data=data_sym, name='bn', use_global_stats=True, fix_gamma=False)\n- arg_shapes, out_shapes, aux_shapes = bn_fp32.infer_shape(data=data_shape)\n- arg_names = bn_fp32.list_arguments()\n- aux_names = bn_fp32.list_auxiliary_states()\n+ bn_fp32 = mx.gluon.nn.BatchNorm(use_global_stats=True, scale=True)\n+ data = mx.nd.random.uniform(low=data_low, high=data_high, shape=data_shape)\n+ bn_fp32.initialize()\n+ bn_fp32.hybridize()\n+ bn_fp32(data)\n+ fp32_params = bn_fp32.collect_params()\n+ \n data = mx.nd.random.uniform(low=data_low, high=data_high, shape=data_shape)\n- gamma = mx.nd.random.uniform(low=data_low, high=data_high, shape=arg_shapes[1])\n- beta = mx.nd.random.uniform(low=data_low, high=data_high, shape=arg_shapes[2])\n+ gamma = mx.nd.random.uniform(low=data_low, high=data_high, shape=fp32_params['gamma'].shape)\n+ beta = mx.nd.random.uniform(low=data_low, high=data_high, shape=fp32_params['beta'].shape)\n moving_mean, moving_var = get_mean_var(data)\n+ new_params = {\n+ 'gamma':gamma,\n+ 'beta':beta,\n+ 'running_mean': moving_mean,", - "comment_created_at": "2021-05-25T11:55:26+00:00", - "comment_author": "bartekkuncer", - "comment_body": "Maybe rename 'moving_*' vars to 'running_*' vars for consistency?", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/mxnet-document-all-parameters.json b/_reviewers/mxnet-document-all-parameters.json new file mode 100644 index 0000000..1a6be94 --- /dev/null +++ b/_reviewers/mxnet-document-all-parameters.json @@ -0,0 +1,124 @@ +[ + { + "discussion_id": "845789888", + "pr_number": 20983, + "pr_file": "include/mxnet/c_api.h", + "created_at": "2022-04-08T06:52:59+00:00", + "commented_code": "* \\return 0 when success, -1 when failure happens\n */\nMXNET_DLL int MXAutogradIsTraining(bool* curr);\n/*!\n * \\brief set whether to disable AMP\n * \\param curr returns the current status", + "repo_full_name": "apache/mxnet", + "discussion_comments": [ + { + "comment_id": "845789888", + "repo_full_name": "apache/mxnet", + "pr_number": 20983, + "pr_file": "include/mxnet/c_api.h", + "discussion_id": "845789888", + "commented_code": "@@ -1225,6 +1225,18 @@ MXNET_DLL int MXAutogradIsRecording(bool* curr);\n * \\return 0 when success, -1 when failure happens\n */\n MXNET_DLL int MXAutogradIsTraining(bool* curr);\n+/*!\n+ * \\brief set whether to disable AMP\n+ * \\param curr returns the current status", + "comment_created_at": "2022-04-08T06:52:59+00:00", + "comment_author": "bgawrych", + "comment_body": "```suggestion\r\n * \\param prev returns the current status\r\n```", + "pr_file_module": null + }, + { + "comment_id": "845790273", + "repo_full_name": "apache/mxnet", + "pr_number": 20983, + "pr_file": "include/mxnet/c_api.h", + "discussion_id": "845789888", + "commented_code": "@@ -1225,6 +1225,18 @@ MXNET_DLL int MXAutogradIsRecording(bool* curr);\n * \\return 0 when success, -1 when failure happens\n */\n MXNET_DLL int MXAutogradIsTraining(bool* curr);\n+/*!\n+ * \\brief set whether to disable AMP\n+ * \\param curr returns the current status", + "comment_created_at": "2022-04-08T06:53:36+00:00", + "comment_author": "bgawrych", + "comment_body": "lack of is_amp_disabled in doc", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "744059733", + "pr_number": 20635, + "pr_file": "include/mxnet/storage.h", + "created_at": "2021-11-06T02:51:51+00:00", + "commented_code": "* \\param ctx Context information about the device and ID.\n * \\return Handle struct.\n */\n Handle Alloc(size_t size, Context ctx) {\n Handle Alloc(size_t size, Context ctx, bool failsafe = false) {", + "repo_full_name": "apache/mxnet", + "discussion_comments": [ + { + "comment_id": "744059733", + "repo_full_name": "apache/mxnet", + "pr_number": 20635, + "pr_file": "include/mxnet/storage.h", + "discussion_id": "744059733", + "commented_code": "@@ -71,18 +71,18 @@ class Storage {\n * \\param ctx Context information about the device and ID.\n * \\return Handle struct.\n */\n- Handle Alloc(size_t size, Context ctx) {\n+ Handle Alloc(size_t size, Context ctx, bool failsafe = false) {", + "comment_created_at": "2021-11-06T02:51:51+00:00", + "comment_author": "DickJC123", + "comment_body": "Where you've add the failsafe parameter (here and elsewhere), please add documentation. I suggest:\r\n\r\n \\* \\param failsafe Return a handle with a null dptr if out of memory, rather than exit.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "744071053", + "pr_number": 20635, + "pr_file": "src/storage/gpu_device_storage.h", + "created_at": "2021-11-06T03:52:45+00:00", + "commented_code": "* \\brief Allocation.\n * \\param handle Handle struct.\n */\n inline static void Alloc(Storage::Handle* handle);\n inline static void Alloc(Storage::Handle* handle, bool failsafe = false);", + "repo_full_name": "apache/mxnet", + "discussion_comments": [ + { + "comment_id": "744071053", + "repo_full_name": "apache/mxnet", + "pr_number": 20635, + "pr_file": "src/storage/gpu_device_storage.h", + "discussion_id": "744071053", + "commented_code": "@@ -39,21 +39,28 @@ class GPUDeviceStorage {\n * \\brief Allocation.\n * \\param handle Handle struct.\n */\n- inline static void Alloc(Storage::Handle* handle);\n+ inline static void Alloc(Storage::Handle* handle, bool failsafe = false);", + "comment_created_at": "2021-11-06T03:52:45+00:00", + "comment_author": "DickJC123", + "comment_body": "Add doc of new param `failsafe` per earlier comment.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "744071104", + "pr_number": 20635, + "pr_file": "src/storage/pinned_memory_storage.h", + "created_at": "2021-11-06T03:53:29+00:00", + "commented_code": "* \\brief Allocation.\n * \\param handle Handle struct.\n */\n inline static void Alloc(Storage::Handle* handle);\n inline static void Alloc(Storage::Handle* handle, bool failsafe);", + "repo_full_name": "apache/mxnet", + "discussion_comments": [ + { + "comment_id": "744071104", + "repo_full_name": "apache/mxnet", + "pr_number": 20635, + "pr_file": "src/storage/pinned_memory_storage.h", + "discussion_id": "744071104", + "commented_code": "@@ -36,7 +36,7 @@ class PinnedMemoryStorage {\n * \\brief Allocation.\n * \\param handle Handle struct.\n */\n- inline static void Alloc(Storage::Handle* handle);\n+ inline static void Alloc(Storage::Handle* handle, bool failsafe);", + "comment_created_at": "2021-11-06T03:53:29+00:00", + "comment_author": "DickJC123", + "comment_body": "Add doc of new param `failsafe` per earlier comment.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "744071143", + "pr_number": 20635, + "pr_file": "src/storage/storage_manager.h", + "created_at": "2021-11-06T03:54:29+00:00", + "commented_code": "* \\brief Allocation.\n * \\param handle Handle struct.\n */\n virtual void Alloc(Storage::Handle* handle) = 0;\n virtual void Alloc(Storage::Handle* handle, bool failsafe = false) = 0;", + "repo_full_name": "apache/mxnet", + "discussion_comments": [ + { + "comment_id": "744071143", + "repo_full_name": "apache/mxnet", + "pr_number": 20635, + "pr_file": "src/storage/storage_manager.h", + "discussion_id": "744071143", + "commented_code": "@@ -40,7 +40,7 @@ class StorageManager {\n * \\brief Allocation.\n * \\param handle Handle struct.\n */\n- virtual void Alloc(Storage::Handle* handle) = 0;\n+ virtual void Alloc(Storage::Handle* handle, bool failsafe = false) = 0;", + "comment_created_at": "2021-11-06T03:54:29+00:00", + "comment_author": "DickJC123", + "comment_body": "Add doc of new param `failsafe` per earlier comment.", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/mxnet-document-all-parameters.md b/_reviewers/mxnet-document-all-parameters.md index 7a2d1fd..9bd91ef 100644 --- a/_reviewers/mxnet-document-all-parameters.md +++ b/_reviewers/mxnet-document-all-parameters.md @@ -29,129 +29,3 @@ virtual void Alloc(Storage::Handle* handle, bool failsafe = false); ``` This helps other developers understand how to use the function correctly and makes the codebase more maintainable. Incomplete parameter documentation can lead to misuse of functions and introduces bugs that are difficult to diagnose. - - -[ - { - "discussion_id": "845789888", - "pr_number": 20983, - "pr_file": "include/mxnet/c_api.h", - "created_at": "2022-04-08T06:52:59+00:00", - "commented_code": "* \\return 0 when success, -1 when failure happens\n */\nMXNET_DLL int MXAutogradIsTraining(bool* curr);\n/*!\n * \\brief set whether to disable AMP\n * \\param curr returns the current status", - "repo_full_name": "apache/mxnet", - "discussion_comments": [ - { - "comment_id": "845789888", - "repo_full_name": "apache/mxnet", - "pr_number": 20983, - "pr_file": "include/mxnet/c_api.h", - "discussion_id": "845789888", - "commented_code": "@@ -1225,6 +1225,18 @@ MXNET_DLL int MXAutogradIsRecording(bool* curr);\n * \\return 0 when success, -1 when failure happens\n */\n MXNET_DLL int MXAutogradIsTraining(bool* curr);\n+/*!\n+ * \\brief set whether to disable AMP\n+ * \\param curr returns the current status", - "comment_created_at": "2022-04-08T06:52:59+00:00", - "comment_author": "bgawrych", - "comment_body": "```suggestion\r\n * \\param prev returns the current status\r\n```", - "pr_file_module": null - }, - { - "comment_id": "845790273", - "repo_full_name": "apache/mxnet", - "pr_number": 20983, - "pr_file": "include/mxnet/c_api.h", - "discussion_id": "845789888", - "commented_code": "@@ -1225,6 +1225,18 @@ MXNET_DLL int MXAutogradIsRecording(bool* curr);\n * \\return 0 when success, -1 when failure happens\n */\n MXNET_DLL int MXAutogradIsTraining(bool* curr);\n+/*!\n+ * \\brief set whether to disable AMP\n+ * \\param curr returns the current status", - "comment_created_at": "2022-04-08T06:53:36+00:00", - "comment_author": "bgawrych", - "comment_body": "lack of is_amp_disabled in doc", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "744059733", - "pr_number": 20635, - "pr_file": "include/mxnet/storage.h", - "created_at": "2021-11-06T02:51:51+00:00", - "commented_code": "* \\param ctx Context information about the device and ID.\n * \\return Handle struct.\n */\n Handle Alloc(size_t size, Context ctx) {\n Handle Alloc(size_t size, Context ctx, bool failsafe = false) {", - "repo_full_name": "apache/mxnet", - "discussion_comments": [ - { - "comment_id": "744059733", - "repo_full_name": "apache/mxnet", - "pr_number": 20635, - "pr_file": "include/mxnet/storage.h", - "discussion_id": "744059733", - "commented_code": "@@ -71,18 +71,18 @@ class Storage {\n * \\param ctx Context information about the device and ID.\n * \\return Handle struct.\n */\n- Handle Alloc(size_t size, Context ctx) {\n+ Handle Alloc(size_t size, Context ctx, bool failsafe = false) {", - "comment_created_at": "2021-11-06T02:51:51+00:00", - "comment_author": "DickJC123", - "comment_body": "Where you've add the failsafe parameter (here and elsewhere), please add documentation. I suggest:\r\n\r\n \\* \\param failsafe Return a handle with a null dptr if out of memory, rather than exit.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "744071053", - "pr_number": 20635, - "pr_file": "src/storage/gpu_device_storage.h", - "created_at": "2021-11-06T03:52:45+00:00", - "commented_code": "* \\brief Allocation.\n * \\param handle Handle struct.\n */\n inline static void Alloc(Storage::Handle* handle);\n inline static void Alloc(Storage::Handle* handle, bool failsafe = false);", - "repo_full_name": "apache/mxnet", - "discussion_comments": [ - { - "comment_id": "744071053", - "repo_full_name": "apache/mxnet", - "pr_number": 20635, - "pr_file": "src/storage/gpu_device_storage.h", - "discussion_id": "744071053", - "commented_code": "@@ -39,21 +39,28 @@ class GPUDeviceStorage {\n * \\brief Allocation.\n * \\param handle Handle struct.\n */\n- inline static void Alloc(Storage::Handle* handle);\n+ inline static void Alloc(Storage::Handle* handle, bool failsafe = false);", - "comment_created_at": "2021-11-06T03:52:45+00:00", - "comment_author": "DickJC123", - "comment_body": "Add doc of new param `failsafe` per earlier comment.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "744071104", - "pr_number": 20635, - "pr_file": "src/storage/pinned_memory_storage.h", - "created_at": "2021-11-06T03:53:29+00:00", - "commented_code": "* \\brief Allocation.\n * \\param handle Handle struct.\n */\n inline static void Alloc(Storage::Handle* handle);\n inline static void Alloc(Storage::Handle* handle, bool failsafe);", - "repo_full_name": "apache/mxnet", - "discussion_comments": [ - { - "comment_id": "744071104", - "repo_full_name": "apache/mxnet", - "pr_number": 20635, - "pr_file": "src/storage/pinned_memory_storage.h", - "discussion_id": "744071104", - "commented_code": "@@ -36,7 +36,7 @@ class PinnedMemoryStorage {\n * \\brief Allocation.\n * \\param handle Handle struct.\n */\n- inline static void Alloc(Storage::Handle* handle);\n+ inline static void Alloc(Storage::Handle* handle, bool failsafe);", - "comment_created_at": "2021-11-06T03:53:29+00:00", - "comment_author": "DickJC123", - "comment_body": "Add doc of new param `failsafe` per earlier comment.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "744071143", - "pr_number": 20635, - "pr_file": "src/storage/storage_manager.h", - "created_at": "2021-11-06T03:54:29+00:00", - "commented_code": "* \\brief Allocation.\n * \\param handle Handle struct.\n */\n virtual void Alloc(Storage::Handle* handle) = 0;\n virtual void Alloc(Storage::Handle* handle, bool failsafe = false) = 0;", - "repo_full_name": "apache/mxnet", - "discussion_comments": [ - { - "comment_id": "744071143", - "repo_full_name": "apache/mxnet", - "pr_number": 20635, - "pr_file": "src/storage/storage_manager.h", - "discussion_id": "744071143", - "commented_code": "@@ -40,7 +40,7 @@ class StorageManager {\n * \\brief Allocation.\n * \\param handle Handle struct.\n */\n- virtual void Alloc(Storage::Handle* handle) = 0;\n+ virtual void Alloc(Storage::Handle* handle, bool failsafe = false) = 0;", - "comment_created_at": "2021-11-06T03:54:29+00:00", - "comment_author": "DickJC123", - "comment_body": "Add doc of new param `failsafe` per earlier comment.", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/mxnet-document-api-completely.json b/_reviewers/mxnet-document-api-completely.json new file mode 100644 index 0000000..1b4c7a0 --- /dev/null +++ b/_reviewers/mxnet-document-api-completely.json @@ -0,0 +1,68 @@ +[ + { + "discussion_id": "360244161", + "pr_number": 17129, + "pr_file": "python/mxnet/gluon/contrib/estimator/estimator.py", + "created_at": "2019-12-20T06:39:31+00:00", + "commented_code": "Parameters\n ----------\n val_data : DataLoader\n val_data : iterable Object\n Validation data loader with data and labels.", + "repo_full_name": "apache/mxnet", + "discussion_comments": [ + { + "comment_id": "360244161", + "repo_full_name": "apache/mxnet", + "pr_number": 17129, + "pr_file": "python/mxnet/gluon/contrib/estimator/estimator.py", + "discussion_id": "360244161", + "commented_code": "@@ -281,7 +281,7 @@ def evaluate(self,\n \n Parameters\n ----------\n- val_data : DataLoader\n+ val_data : iterable Object\n Validation data loader with data and labels.", + "comment_created_at": "2019-12-20T06:39:31+00:00", + "comment_author": "leezu", + "comment_body": "It's unclear that `data loader` refers to the `gluon.DataLoader`. It would be good to clarify that any iterable object yielding a sequence of batches is acceptable and that the `gluon.DataLoader` is one instance of such iterables.\r\nSame applies to the fit_batch below.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "711223058", + "pr_number": 20592, + "pr_file": "python/mxnet/numpy/multiarray.py", + "created_at": "2021-09-17T17:12:57+00:00", + "commented_code": "\"\"\"\n return _mx_nd_np.remainder(x1, x2, out=out)\n\n@set_module('mxnet.numpy')\n@wrap_np_binary_func\ndef pow(x1, x2):", + "repo_full_name": "apache/mxnet", + "discussion_comments": [ + { + "comment_id": "711223058", + "repo_full_name": "apache/mxnet", + "pr_number": 20592, + "pr_file": "python/mxnet/numpy/multiarray.py", + "discussion_id": "711223058", + "commented_code": "@@ -3591,6 +3589,55 @@ def remainder(x1, x2, out=None, **kwargs):\n \"\"\"\n return _mx_nd_np.remainder(x1, x2, out=out)\n \n+@set_module('mxnet.numpy')\n+@wrap_np_binary_func\n+def pow(x1, x2):", + "comment_created_at": "2021-09-17T17:12:57+00:00", + "comment_author": "barry-jin", + "comment_body": "We can use assignment to make `pow` the alias of `power` and add notes to inform the user that `pow` is not a official NumPy operator but a standard API. \r\n\r\nExample:\r\n```\r\npow = power\r\npow.__doc__ = ```\r\n ...\r\n Notes\r\n -----\r\n `pow` is an alias for `power`. It is a standard API in https://data-apis.org/array-api/latest/API_specification/elementwise_functions.html#pow-x1-x2 instead of an official NumPy operator. \r\n\r\n >>> np.pow is np.power\r\n True\r\n ...\r\n```\r\n\r\nWe can apply this to other operators in this PR. What do you think. ", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "720919840", + "pr_number": 20592, + "pr_file": "python/mxnet/numpy/multiarray.py", + "created_at": "2021-10-03T22:42:51+00:00", + "commented_code": "\"\"\"\n return _mx_nd_np.arccos(x, out=out, **kwargs)\n\nacos = arccos\nacos.__doc__ = \"\"\"\n Trigonometric inverse cosine, element-wise.\n The inverse of cos so that, if y = cos(x), then x = acos(y).\n \n Notes\n ----------\n `acos` is a alias for `arccos`. It is a standard API in\n https://data-apis.org/array-api/latest/API_specification/elementwise_functions.html#acos-x\n instead of an official NumPy operator.\n \n >>>np.acos is np.arccos\n True\n\n Parameters\n ----------\n x : ndarray\n x-coordinate on the unit circle. For real arguments, the domain is [-1, 1].\n out : ndarray, optional\n A location into which the result is stored. If provided, it must have a shape that\n the inputs broadcast to. If not provided or None, a freshly-allocated array is returned.\n A tuple (possible only as a keyword argument) must have length equal to the number of outputs.\n\n Returns\n ----------\n angle : ndarray\n The angle of the ray intersecting the unit circle at the given x-coordinate in radians [0, pi].\n This is a scalar if x is a scalar.\n\n Notes", + "repo_full_name": "apache/mxnet", + "discussion_comments": [ + { + "comment_id": "720919840", + "repo_full_name": "apache/mxnet", + "pr_number": 20592, + "pr_file": "python/mxnet/numpy/multiarray.py", + "discussion_id": "720919840", + "commented_code": "@@ -4264,6 +4405,49 @@ def arccos(x, out=None, **kwargs):\n \"\"\"\n return _mx_nd_np.arccos(x, out=out, **kwargs)\n \n+acos = arccos\n+acos.__doc__ = \"\"\"\n+ Trigonometric inverse cosine, element-wise.\n+ The inverse of cos so that, if y = cos(x), then x = acos(y).\n+ \n+ Notes\n+ ----------\n+ `acos` is a alias for `arccos`. It is a standard API in\n+ https://data-apis.org/array-api/latest/API_specification/elementwise_functions.html#acos-x\n+ instead of an official NumPy operator.\n+ \n+ >>>np.acos is np.arccos\n+ True\n+\n+ Parameters\n+ ----------\n+ x : ndarray\n+ x-coordinate on the unit circle. For real arguments, the domain is [-1, 1].\n+ out : ndarray, optional\n+ A location into which the result is stored. If provided, it must have a shape that\n+ the inputs broadcast to. If not provided or None, a freshly-allocated array is returned.\n+ A tuple (possible only as a keyword argument) must have length equal to the number of outputs.\n+\n+ Returns\n+ ----------\n+ angle : ndarray\n+ The angle of the ray intersecting the unit circle at the given x-coordinate in radians [0, pi].\n+ This is a scalar if x is a scalar.\n+\n+ Notes", + "comment_created_at": "2021-10-03T22:42:51+00:00", + "comment_author": "barry-jin", + "comment_body": "Could you append a note here to elaborate that acos is a standardized API and is equivelant to arccos and also provide the reference. ", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/mxnet-document-api-completely.md b/_reviewers/mxnet-document-api-completely.md index c86f1cb..01f1e2f 100644 --- a/_reviewers/mxnet-document-api-completely.md +++ b/_reviewers/mxnet-document-api-completely.md @@ -58,73 +58,3 @@ acos.__doc__ = """ True """ ``` - - -[ - { - "discussion_id": "360244161", - "pr_number": 17129, - "pr_file": "python/mxnet/gluon/contrib/estimator/estimator.py", - "created_at": "2019-12-20T06:39:31+00:00", - "commented_code": "Parameters\n ----------\n val_data : DataLoader\n val_data : iterable Object\n Validation data loader with data and labels.", - "repo_full_name": "apache/mxnet", - "discussion_comments": [ - { - "comment_id": "360244161", - "repo_full_name": "apache/mxnet", - "pr_number": 17129, - "pr_file": "python/mxnet/gluon/contrib/estimator/estimator.py", - "discussion_id": "360244161", - "commented_code": "@@ -281,7 +281,7 @@ def evaluate(self,\n \n Parameters\n ----------\n- val_data : DataLoader\n+ val_data : iterable Object\n Validation data loader with data and labels.", - "comment_created_at": "2019-12-20T06:39:31+00:00", - "comment_author": "leezu", - "comment_body": "It's unclear that `data loader` refers to the `gluon.DataLoader`. It would be good to clarify that any iterable object yielding a sequence of batches is acceptable and that the `gluon.DataLoader` is one instance of such iterables.\r\nSame applies to the fit_batch below.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "711223058", - "pr_number": 20592, - "pr_file": "python/mxnet/numpy/multiarray.py", - "created_at": "2021-09-17T17:12:57+00:00", - "commented_code": "\"\"\"\n return _mx_nd_np.remainder(x1, x2, out=out)\n\n@set_module('mxnet.numpy')\n@wrap_np_binary_func\ndef pow(x1, x2):", - "repo_full_name": "apache/mxnet", - "discussion_comments": [ - { - "comment_id": "711223058", - "repo_full_name": "apache/mxnet", - "pr_number": 20592, - "pr_file": "python/mxnet/numpy/multiarray.py", - "discussion_id": "711223058", - "commented_code": "@@ -3591,6 +3589,55 @@ def remainder(x1, x2, out=None, **kwargs):\n \"\"\"\n return _mx_nd_np.remainder(x1, x2, out=out)\n \n+@set_module('mxnet.numpy')\n+@wrap_np_binary_func\n+def pow(x1, x2):", - "comment_created_at": "2021-09-17T17:12:57+00:00", - "comment_author": "barry-jin", - "comment_body": "We can use assignment to make `pow` the alias of `power` and add notes to inform the user that `pow` is not a official NumPy operator but a standard API. \r\n\r\nExample:\r\n```\r\npow = power\r\npow.__doc__ = ```\r\n ...\r\n Notes\r\n -----\r\n `pow` is an alias for `power`. It is a standard API in https://data-apis.org/array-api/latest/API_specification/elementwise_functions.html#pow-x1-x2 instead of an official NumPy operator. \r\n\r\n >>> np.pow is np.power\r\n True\r\n ...\r\n```\r\n\r\nWe can apply this to other operators in this PR. What do you think. ", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "720919840", - "pr_number": 20592, - "pr_file": "python/mxnet/numpy/multiarray.py", - "created_at": "2021-10-03T22:42:51+00:00", - "commented_code": "\"\"\"\n return _mx_nd_np.arccos(x, out=out, **kwargs)\n\nacos = arccos\nacos.__doc__ = \"\"\"\n Trigonometric inverse cosine, element-wise.\n The inverse of cos so that, if y = cos(x), then x = acos(y).\n \n Notes\n ----------\n `acos` is a alias for `arccos`. It is a standard API in\n https://data-apis.org/array-api/latest/API_specification/elementwise_functions.html#acos-x\n instead of an official NumPy operator.\n \n >>>np.acos is np.arccos\n True\n\n Parameters\n ----------\n x : ndarray\n x-coordinate on the unit circle. For real arguments, the domain is [-1, 1].\n out : ndarray, optional\n A location into which the result is stored. If provided, it must have a shape that\n the inputs broadcast to. If not provided or None, a freshly-allocated array is returned.\n A tuple (possible only as a keyword argument) must have length equal to the number of outputs.\n\n Returns\n ----------\n angle : ndarray\n The angle of the ray intersecting the unit circle at the given x-coordinate in radians [0, pi].\n This is a scalar if x is a scalar.\n\n Notes", - "repo_full_name": "apache/mxnet", - "discussion_comments": [ - { - "comment_id": "720919840", - "repo_full_name": "apache/mxnet", - "pr_number": 20592, - "pr_file": "python/mxnet/numpy/multiarray.py", - "discussion_id": "720919840", - "commented_code": "@@ -4264,6 +4405,49 @@ def arccos(x, out=None, **kwargs):\n \"\"\"\n return _mx_nd_np.arccos(x, out=out, **kwargs)\n \n+acos = arccos\n+acos.__doc__ = \"\"\"\n+ Trigonometric inverse cosine, element-wise.\n+ The inverse of cos so that, if y = cos(x), then x = acos(y).\n+ \n+ Notes\n+ ----------\n+ `acos` is a alias for `arccos`. It is a standard API in\n+ https://data-apis.org/array-api/latest/API_specification/elementwise_functions.html#acos-x\n+ instead of an official NumPy operator.\n+ \n+ >>>np.acos is np.arccos\n+ True\n+\n+ Parameters\n+ ----------\n+ x : ndarray\n+ x-coordinate on the unit circle. For real arguments, the domain is [-1, 1].\n+ out : ndarray, optional\n+ A location into which the result is stored. If provided, it must have a shape that\n+ the inputs broadcast to. If not provided or None, a freshly-allocated array is returned.\n+ A tuple (possible only as a keyword argument) must have length equal to the number of outputs.\n+\n+ Returns\n+ ----------\n+ angle : ndarray\n+ The angle of the ray intersecting the unit circle at the given x-coordinate in radians [0, pi].\n+ This is a scalar if x is a scalar.\n+\n+ Notes", - "comment_created_at": "2021-10-03T22:42:51+00:00", - "comment_author": "barry-jin", - "comment_body": "Could you append a note here to elaborate that acos is a standardized API and is equivelant to arccos and also provide the reference. ", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/mxnet-document-environment-variables.json b/_reviewers/mxnet-document-environment-variables.json new file mode 100644 index 0000000..6e2f177 --- /dev/null +++ b/_reviewers/mxnet-document-environment-variables.json @@ -0,0 +1,80 @@ +[ + { + "discussion_id": "744069231", + "pr_number": 20635, + "pr_file": "src/operator/cudnn_ops.cc", + "created_at": "2021-11-06T03:30:04+00:00", + "commented_code": "/*\n * Licensed to the Apache Software Foundation (ASF) under one\n * or more contributor license agreements. See the NOTICE file\n * distributed with this work for additional information\n * regarding copyright ownership. The ASF licenses this file\n * to you under the Apache License, Version 2.0 (the\n * \"License\"); you may not use this file except in compliance\n * with the License. You may obtain a copy of the License at\n *\n * http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing,\n * software distributed under the License is distributed on an\n * \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n * KIND, either express or implied. See the License for the\n * specific language governing permissions and limitations\n * under the License.\n */\n\n/*!\n * Copyright (c) 2021 by Contributors\n * \\file cudnn_ops.cc\n * \\brief cuDNN v8 ops\n */\n\n#include \"cudnn_ops.h\"\n\n#include \n#if MXNET_USE_CUDNN == 1\n\n#include \n\n#include \n#include \n#include \n#include \n#include \n#include \n#include \n#include \n#include \n\nnamespace mxnet {\nnamespace op {\n\nusing cudnn_cxx::Descriptor;\nusing cudnn_cxx::GetAttr;\nusing cudnn_cxx::GetSomeAttrs;\nusing cudnn_cxx::IsCompatible;\nusing cudnn_cxx::MakeAvgSampler;\nusing cudnn_cxx::MakeFinalized;\nusing cudnn_cxx::PackedStrides;\nusing cudnn_cxx::PlanStr;\n\nnamespace cudnn {\n\ncudnnDataType_t CudnnType(mshadow::TypeFlag dtype) {\n static std::unordered_map type_map {\n {mshadow::kFloat32, CUDNN_DATA_FLOAT}, {mshadow::kFloat64, CUDNN_DATA_DOUBLE},\n {mshadow::kFloat16, CUDNN_DATA_HALF}, {mshadow::kUint8, CUDNN_DATA_UINT8},\n {mshadow::kInt8, CUDNN_DATA_INT8}, {mshadow::kInt32, CUDNN_DATA_INT32},\n#if CUDNN_VERSION >= 8100\n {mshadow::kInt64, CUDNN_DATA_INT64},\n#endif // CUDNN_VERSION >= 8100\n };\n auto it = type_map.find(dtype);\n CHECK(it != type_map.end()) << \"Unsupported type: \" << dtype;\n return it->second;\n}\n\nstd::vector LayoutInfo::Order() const {\n std::vector ret(n_space_dims + 2);\n std::iota(ret.begin(), ret.end(), 0);\n if (channel_last)\n std::rotate(ret.begin() + 1, ret.begin() + 2, ret.end());\n return ret;\n}\n\nsize_t LayoutInfo::ChannelIdx() const {\n return channel_last ? 1 + n_space_dims : 1;\n}\n\nstd::vector LayoutInfo::Strides(const std::vector& dims) const {\n return PackedStrides(Order(), dims);\n}\n\nLayoutInfo GetLayoutInfo(mshadow::LayoutFlag layout) {\n static std::unordered_map layout_map{\n {mshadow::kNCW, {1, false}},\n {mshadow::kNWC, {1, true}},\n {mshadow::kNCHW, {2, false}},\n {mshadow::kNHWC, {2, true}},\n {mshadow::kNCDHW, {3, false}},\n {mshadow::kNDHWC, {3, true}},\n };\n auto it = layout_map.find(layout);\n CHECK(it != layout_map.end()) << \"Unsupported layout: \" << layout;\n return it->second;\n}\n\nTShape ExpandChannelDims(mshadow::LayoutFlag layout, int c) {\n auto li = GetLayoutInfo(layout);\n std::vector dims(li.n_space_dims + 2, 1);\n dims[li.ChannelIdx()] = c;\n return TShape(dims.begin(), dims.end());\n}\n\nstd::vector ReverseOrder(const std::vector& o) {\n std::vector ret(o.size());\n for (size_t i = 0; i < ret.size(); ++i)\n ret[o[i]] = i;\n return ret;\n}\n\nstd::vector RequireNumerics() {\n std::vector ret;\n return ret;\n}\n\nstd::vector ExcludeNumerics() {\n std::vector ret;\n if (!dmlc::GetEnv(\"MXNET_CUDA_ALLOW_TENSOR_CORE\", true))\n ret.push_back(CUDNN_NUMERICAL_NOTE_TENSOR_CORE);\n if (!dmlc::GetEnv(\"MXNET_CUDA_TENSOR_OP_MATH_ALLOW_CONVERSION\", false))\n ret.push_back(CUDNN_NUMERICAL_NOTE_DOWN_CONVERT_INPUTS);\n if (!dmlc::GetEnv(\"MXNET_CUDNN_ALLOW_REDUCED_PRECISION_REDUCTION\", true))\n ret.push_back(CUDNN_NUMERICAL_NOTE_REDUCED_PRECISION_REDUCTION);\n if (!dmlc::GetEnv(\"MXNET_CUDNN_ALLOW_FFT\", true))\n ret.push_back(CUDNN_NUMERICAL_NOTE_FFT);\n if (dmlc::GetEnv(\"MXNET_ENFORCE_DETERMINISM\", false))\n ret.push_back(CUDNN_NUMERICAL_NOTE_NONDETERMINISTIC);\n if (!dmlc::GetEnv(\"MXNET_CUDNN_ALLOW_WINOGRAD\", true))\n ret.push_back(CUDNN_NUMERICAL_NOTE_WINOGRAD);\n return ret;\n}\n\nDescriptor MakeTensorDesc(int64_t uid,\n cudnnDataType_t dtype,\n const std::vector& dims,\n const std::vector& strides,\n bool is_virtual) {\n int64_t alignment = 16; // TODO(vcherepanov): ?\n return MakeFinalized(CUDNN_BACKEND_TENSOR_DESCRIPTOR,\n CUDNN_ATTR_TENSOR_UNIQUE_ID,\n uid,\n CUDNN_ATTR_TENSOR_DATA_TYPE,\n dtype,\n CUDNN_ATTR_TENSOR_BYTE_ALIGNMENT,\n alignment,\n CUDNN_ATTR_TENSOR_DIMENSIONS,\n dims,\n CUDNN_ATTR_TENSOR_STRIDES,\n strides,\n CUDNN_ATTR_TENSOR_IS_VIRTUAL,\n is_virtual);\n}\n\nDescriptor MakeTensorDesc(int64_t uid,\n const TBlob& blob,\n const LayoutInfo& li,\n bool expand_1d,\n bool is_virtual) {\n std::vector dims(blob.shape_.ndim());\n CHECK_EQ(dims.size(), li.n_space_dims + 2);\n auto rev_order = ReverseOrder(li.Order());\n for (size_t i = 0; i < dims.size(); ++i)\n dims[i] = blob.shape_[rev_order[i]];\n auto strides = li.Strides(dims);\n if (li.n_space_dims == 1 && expand_1d) {\n dims.insert(dims.begin() + 2, 1);\n std::vector order(dims.size());\n std::iota(order.begin(), order.end(), 0);\n if (li.channel_last)\n std::rotate(order.begin() + 1, order.begin() + 2, order.end());\n strides = PackedStrides(order, dims);\n }\n return MakeTensorDesc(\n uid, CudnnType(static_cast(blob.type_flag_)), dims, strides, is_virtual);\n}\n\nDescriptor MakeCTensorDescExpandDims(int64_t uid,\n const TBlob& b,\n const LayoutInfo& li,\n bool is_virtual) {\n std::vector dims(li.n_space_dims + 2, 1);\n dims[1] = b.shape_[0];\n auto dtype = CudnnType(static_cast(b.type_flag_));\n return MakeTensorDesc(uid, dtype, dims, li.Strides(dims), is_virtual);\n}\n\nDescriptor MakeConvDesc(const ConvParam& param, mshadow::TypeFlag dtype) {\n int64_t sdims = param.kernel.ndim();\n std::vector stride(param.stride.begin(), param.stride.end());\n std::vector dilate(param.dilate.begin(), param.dilate.end());\n std::vector pad(param.pad.begin(), param.pad.end());\n\n auto comp_type = CudnnType(dtype);\n if (comp_type == CUDNN_DATA_HALF)\n comp_type = CUDNN_DATA_FLOAT;\n\n if (sdims == 1) {\n // TODO(vcherepanov): remove this once cuDNN properly supports 1D convolutions.\n // For now, making spacial dims 2D: 1 x W.\n ++sdims;\n stride.insert(stride.begin(), 1);\n dilate.insert(dilate.begin(), 1);\n pad.insert(pad.begin(), 0);\n }\n return MakeFinalized(CUDNN_BACKEND_CONVOLUTION_DESCRIPTOR,\n CUDNN_ATTR_CONVOLUTION_SPATIAL_DIMS,\n sdims,\n CUDNN_ATTR_CONVOLUTION_COMP_TYPE,\n comp_type,\n CUDNN_ATTR_CONVOLUTION_CONV_MODE,\n CUDNN_CROSS_CORRELATION,\n CUDNN_ATTR_CONVOLUTION_FILTER_STRIDES,\n stride,\n CUDNN_ATTR_CONVOLUTION_DILATIONS,\n dilate,\n CUDNN_ATTR_CONVOLUTION_PRE_PADDINGS,\n pad,\n CUDNN_ATTR_CONVOLUTION_POST_PADDINGS,\n pad);\n}\n\nDescriptor MakeConvFwdOp(const Descriptor& conv,\n const Descriptor& x,\n const Descriptor& w,\n const Descriptor& y,\n bool add_to) {\n auto ret = Make(CUDNN_BACKEND_OPERATION_CONVOLUTION_FORWARD_DESCRIPTOR,\n CUDNN_ATTR_OPERATION_CONVOLUTION_FORWARD_CONV_DESC,\n conv,\n CUDNN_ATTR_OPERATION_CONVOLUTION_FORWARD_X,\n x,\n CUDNN_ATTR_OPERATION_CONVOLUTION_FORWARD_W,\n w,\n CUDNN_ATTR_OPERATION_CONVOLUTION_FORWARD_Y,\n y);\n if (GetAttr(x, CUDNN_ATTR_TENSOR_DATA_TYPE) == CUDNN_DATA_DOUBLE) {\n SetAttrs(ret,\n CUDNN_ATTR_OPERATION_CONVOLUTION_FORWARD_ALPHA,\n 1.0,\n CUDNN_ATTR_OPERATION_CONVOLUTION_FORWARD_BETA,\n add_to ? 1.0 : 0.0);\n } else {\n SetAttrs(ret,\n CUDNN_ATTR_OPERATION_CONVOLUTION_FORWARD_ALPHA,\n 1.0f,\n CUDNN_ATTR_OPERATION_CONVOLUTION_FORWARD_BETA,\n add_to ? 1.0f : 0.0f);\n }\n CUDNN_CALL(cudnnBackendFinalize(ret.get()));\n return ret;\n}\n\nDescriptor MakeConvDgradOp(const Descriptor& conv,\n const Descriptor& w,\n const Descriptor& dy,\n const Descriptor& dx,\n bool add_to) {\n auto ret = Make(CUDNN_BACKEND_OPERATION_CONVOLUTION_BACKWARD_DATA_DESCRIPTOR,\n CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_DATA_CONV_DESC,\n conv,\n CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_DATA_W,\n w,\n CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_DATA_DY,\n dy,\n CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_DATA_DX,\n dx);\n if (GetAttr(w, CUDNN_ATTR_TENSOR_DATA_TYPE) == CUDNN_DATA_DOUBLE) {\n SetAttrs(ret,\n CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_DATA_ALPHA,\n 1.0,\n CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_DATA_BETA,\n add_to ? 1.0 : 0.0);\n } else {\n SetAttrs(ret,\n CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_DATA_ALPHA,\n 1.0f,\n CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_DATA_BETA,\n add_to ? 1.0f : 0.0f);\n }\n CUDNN_CALL(cudnnBackendFinalize(ret.get()));\n return ret;\n}\n\nDescriptor MakeConvWgradOp(const Descriptor& conv,\n const Descriptor& x,\n const Descriptor& dy,\n const Descriptor& dw,\n bool add_to) {\n auto ret = Make(CUDNN_BACKEND_OPERATION_CONVOLUTION_BACKWARD_FILTER_DESCRIPTOR,\n CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_FILTER_CONV_DESC,\n conv,\n CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_FILTER_X,\n x,\n CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_FILTER_DY,\n dy,\n CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_FILTER_DW,\n dw);\n if (GetAttr(x, CUDNN_ATTR_TENSOR_DATA_TYPE) == CUDNN_DATA_DOUBLE) {\n SetAttrs(ret,\n CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_FILTER_ALPHA,\n 1.0,\n CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_FILTER_BETA,\n add_to ? 1.0 : 0.0);\n } else {\n SetAttrs(ret,\n CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_FILTER_ALPHA,\n 1.0f,\n CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_FILTER_BETA,\n add_to ? 1.0f : 0.0f);\n }\n CUDNN_CALL(cudnnBackendFinalize(ret.get()));\n return ret;\n}\n\nDescriptor MakeOpGraph(cudnnHandle_t handle, const std::vector& ops) {\n return MakeFinalized(CUDNN_BACKEND_OPERATIONGRAPH_DESCRIPTOR,\n CUDNN_ATTR_OPERATIONGRAPH_HANDLE,\n handle,\n CUDNN_ATTR_OPERATIONGRAPH_OPS,\n ops);\n}\n\nConvParam::ConvParam(const ConvolutionParam& p, bool add_to)\n : kernel(p.kernel),\n stride(p.stride),\n dilate(p.dilate),\n pad(p.pad),\n num_filter(p.num_filter),\n num_group(p.num_group),\n workspace(p.workspace),\n cudnn_tune(p.cudnn_tune),\n layout(p.layout),\n add_to(add_to) {}\n\nConvParam::ConvParam(const DeconvolutionParam& p, bool add_to)\n : kernel(p.kernel),\n stride(p.stride),\n dilate(p.dilate),\n pad(p.pad),\n num_filter(p.num_filter),\n num_group(p.num_group),\n workspace(p.workspace),\n cudnn_tune(p.cudnn_tune),\n layout(p.layout),\n add_to(add_to) {}\n\nvoid TuneWarnOnce() {\n thread_local bool done = false;\n if (!done) {\n LOG(INFO) << \"Auto-tuning cuDNN op, set MXNET_CUDNN_AUTOTUNE_DEFAULT to 0 to disable\";\n done = true;\n }\n}\n\nstd::vector MakeFallbackPlans(\n const std::vector& ixs,\n cudnnHandle_t handle,\n const Descriptor& op_graph,\n size_t workspace_limit,\n size_t* max_workspace,\n const std::unordered_set& excl_engines,\n const std::vector& req_numeric,\n const std::vector& excl_numeric\n#if CUDNN_VERSION >= 8200\n ,\n const std::vector& req_behavior,\n const std::vector& excl_behavior\n#endif // CUDNN_VERSION >= 8200\n) {\n std::vector plans;\n if (max_workspace)\n *max_workspace = 0;\n for (auto ix : ixs) {\n if (excl_engines.count(ix))\n continue;\n auto engine = Make(CUDNN_BACKEND_ENGINE_DESCRIPTOR,\n CUDNN_ATTR_ENGINE_OPERATION_GRAPH,\n op_graph,\n CUDNN_ATTR_ENGINE_GLOBAL_INDEX,\n ix);\n auto err = cudnnBackendFinalize(engine.get());\n if (err == CUDNN_STATUS_NOT_SUPPORTED || err == CUDNN_STATUS_ARCH_MISMATCH)\n continue;\n if (err != CUDNN_STATUS_SUCCESS) {\n LOG(WARNING) << \"Unexpected cuDNN status: \" << err << \": \" << cudnnGetErrorString(err);\n continue;\n }\n auto cfg =\n MakeFinalized(CUDNN_BACKEND_ENGINECFG_DESCRIPTOR, CUDNN_ATTR_ENGINECFG_ENGINE, engine);\n auto plan = Make(CUDNN_BACKEND_EXECUTION_PLAN_DESCRIPTOR,\n CUDNN_ATTR_EXECUTION_PLAN_HANDLE,\n handle,\n CUDNN_ATTR_EXECUTION_PLAN_ENGINE_CONFIG,\n cfg);\n err = cudnnBackendFinalize(plan.get());\n if (err == CUDNN_STATUS_NOT_SUPPORTED || err == CUDNN_STATUS_ARCH_MISMATCH)\n continue;\n if (err != CUDNN_STATUS_SUCCESS) {\n LOG(WARNING) << \"Unexpected cuDNN status: \" << err << \": \" << cudnnGetErrorString(err);\n continue;\n }\n auto workspace = GetAttr(plan, CUDNN_ATTR_EXECUTION_PLAN_WORKSPACE_SIZE);\n if (workspace > workspace_limit)\n continue;\n auto numerical = GetSomeAttrs(\n CUDNN_NUMERICAL_NOTE_TYPE_COUNT, engine, CUDNN_ATTR_ENGINE_NUMERICAL_NOTE);\n if (!IsCompatible(numerical, req_numeric, excl_numeric))\n continue;\n#if CUDNN_VERSION >= 8200\n auto behavior = GetSomeAttrs(\n CUDNN_BEHAVIOR_NOTE_TYPE_COUNT, engine, CUDNN_ATTR_ENGINE_BEHAVIOR_NOTE);\n if (!IsCompatible(behavior, req_behavior, excl_behavior))\n continue;\n#endif // CUDNN_VERSION >= 8200\n plans.push_back(std::move(plan));\n if (max_workspace)\n *max_workspace = std::max(*max_workspace, static_cast(workspace));\n }\n return plans;\n}\n\ncudnnBackendHeurMode_t HeurMode() {\n#if CUDNN_VERSION >= 8100\n auto minor = cudnnGetVersion() / 100 % 10;\n int default_mode = minor < 2 ? CUDNN_HEUR_MODE_INSTANT : CUDNN_HEUR_MODE_B;\n#else\n int default_mode = CUDNN_HEUR_MODE_INSTANT;\n#endif // CUDNN_VERSION >= 8100\n return static_cast(dmlc::GetEnv(\"MXNET_CUDNN_HEUR_MODE\", default_mode));", + "repo_full_name": "apache/mxnet", + "discussion_comments": [ + { + "comment_id": "744069231", + "repo_full_name": "apache/mxnet", + "pr_number": 20635, + "pr_file": "src/operator/cudnn_ops.cc", + "discussion_id": "744069231", + "commented_code": "@@ -0,0 +1,765 @@\n+/*\n+ * Licensed to the Apache Software Foundation (ASF) under one\n+ * or more contributor license agreements. See the NOTICE file\n+ * distributed with this work for additional information\n+ * regarding copyright ownership. The ASF licenses this file\n+ * to you under the Apache License, Version 2.0 (the\n+ * \"License\"); you may not use this file except in compliance\n+ * with the License. You may obtain a copy of the License at\n+ *\n+ * http://www.apache.org/licenses/LICENSE-2.0\n+ *\n+ * Unless required by applicable law or agreed to in writing,\n+ * software distributed under the License is distributed on an\n+ * \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n+ * KIND, either express or implied. See the License for the\n+ * specific language governing permissions and limitations\n+ * under the License.\n+ */\n+\n+/*!\n+ * Copyright (c) 2021 by Contributors\n+ * \\file cudnn_ops.cc\n+ * \\brief cuDNN v8 ops\n+ */\n+\n+#include \"cudnn_ops.h\"\n+\n+#include \n+#if MXNET_USE_CUDNN == 1\n+\n+#include \n+\n+#include \n+#include \n+#include \n+#include \n+#include \n+#include \n+#include \n+#include \n+#include \n+\n+namespace mxnet {\n+namespace op {\n+\n+using cudnn_cxx::Descriptor;\n+using cudnn_cxx::GetAttr;\n+using cudnn_cxx::GetSomeAttrs;\n+using cudnn_cxx::IsCompatible;\n+using cudnn_cxx::MakeAvgSampler;\n+using cudnn_cxx::MakeFinalized;\n+using cudnn_cxx::PackedStrides;\n+using cudnn_cxx::PlanStr;\n+\n+namespace cudnn {\n+\n+cudnnDataType_t CudnnType(mshadow::TypeFlag dtype) {\n+ static std::unordered_map type_map {\n+ {mshadow::kFloat32, CUDNN_DATA_FLOAT}, {mshadow::kFloat64, CUDNN_DATA_DOUBLE},\n+ {mshadow::kFloat16, CUDNN_DATA_HALF}, {mshadow::kUint8, CUDNN_DATA_UINT8},\n+ {mshadow::kInt8, CUDNN_DATA_INT8}, {mshadow::kInt32, CUDNN_DATA_INT32},\n+#if CUDNN_VERSION >= 8100\n+ {mshadow::kInt64, CUDNN_DATA_INT64},\n+#endif // CUDNN_VERSION >= 8100\n+ };\n+ auto it = type_map.find(dtype);\n+ CHECK(it != type_map.end()) << \"Unsupported type: \" << dtype;\n+ return it->second;\n+}\n+\n+std::vector LayoutInfo::Order() const {\n+ std::vector ret(n_space_dims + 2);\n+ std::iota(ret.begin(), ret.end(), 0);\n+ if (channel_last)\n+ std::rotate(ret.begin() + 1, ret.begin() + 2, ret.end());\n+ return ret;\n+}\n+\n+size_t LayoutInfo::ChannelIdx() const {\n+ return channel_last ? 1 + n_space_dims : 1;\n+}\n+\n+std::vector LayoutInfo::Strides(const std::vector& dims) const {\n+ return PackedStrides(Order(), dims);\n+}\n+\n+LayoutInfo GetLayoutInfo(mshadow::LayoutFlag layout) {\n+ static std::unordered_map layout_map{\n+ {mshadow::kNCW, {1, false}},\n+ {mshadow::kNWC, {1, true}},\n+ {mshadow::kNCHW, {2, false}},\n+ {mshadow::kNHWC, {2, true}},\n+ {mshadow::kNCDHW, {3, false}},\n+ {mshadow::kNDHWC, {3, true}},\n+ };\n+ auto it = layout_map.find(layout);\n+ CHECK(it != layout_map.end()) << \"Unsupported layout: \" << layout;\n+ return it->second;\n+}\n+\n+TShape ExpandChannelDims(mshadow::LayoutFlag layout, int c) {\n+ auto li = GetLayoutInfo(layout);\n+ std::vector dims(li.n_space_dims + 2, 1);\n+ dims[li.ChannelIdx()] = c;\n+ return TShape(dims.begin(), dims.end());\n+}\n+\n+std::vector ReverseOrder(const std::vector& o) {\n+ std::vector ret(o.size());\n+ for (size_t i = 0; i < ret.size(); ++i)\n+ ret[o[i]] = i;\n+ return ret;\n+}\n+\n+std::vector RequireNumerics() {\n+ std::vector ret;\n+ return ret;\n+}\n+\n+std::vector ExcludeNumerics() {\n+ std::vector ret;\n+ if (!dmlc::GetEnv(\"MXNET_CUDA_ALLOW_TENSOR_CORE\", true))\n+ ret.push_back(CUDNN_NUMERICAL_NOTE_TENSOR_CORE);\n+ if (!dmlc::GetEnv(\"MXNET_CUDA_TENSOR_OP_MATH_ALLOW_CONVERSION\", false))\n+ ret.push_back(CUDNN_NUMERICAL_NOTE_DOWN_CONVERT_INPUTS);\n+ if (!dmlc::GetEnv(\"MXNET_CUDNN_ALLOW_REDUCED_PRECISION_REDUCTION\", true))\n+ ret.push_back(CUDNN_NUMERICAL_NOTE_REDUCED_PRECISION_REDUCTION);\n+ if (!dmlc::GetEnv(\"MXNET_CUDNN_ALLOW_FFT\", true))\n+ ret.push_back(CUDNN_NUMERICAL_NOTE_FFT);\n+ if (dmlc::GetEnv(\"MXNET_ENFORCE_DETERMINISM\", false))\n+ ret.push_back(CUDNN_NUMERICAL_NOTE_NONDETERMINISTIC);\n+ if (!dmlc::GetEnv(\"MXNET_CUDNN_ALLOW_WINOGRAD\", true))\n+ ret.push_back(CUDNN_NUMERICAL_NOTE_WINOGRAD);\n+ return ret;\n+}\n+\n+Descriptor MakeTensorDesc(int64_t uid,\n+ cudnnDataType_t dtype,\n+ const std::vector& dims,\n+ const std::vector& strides,\n+ bool is_virtual) {\n+ int64_t alignment = 16; // TODO(vcherepanov): ?\n+ return MakeFinalized(CUDNN_BACKEND_TENSOR_DESCRIPTOR,\n+ CUDNN_ATTR_TENSOR_UNIQUE_ID,\n+ uid,\n+ CUDNN_ATTR_TENSOR_DATA_TYPE,\n+ dtype,\n+ CUDNN_ATTR_TENSOR_BYTE_ALIGNMENT,\n+ alignment,\n+ CUDNN_ATTR_TENSOR_DIMENSIONS,\n+ dims,\n+ CUDNN_ATTR_TENSOR_STRIDES,\n+ strides,\n+ CUDNN_ATTR_TENSOR_IS_VIRTUAL,\n+ is_virtual);\n+}\n+\n+Descriptor MakeTensorDesc(int64_t uid,\n+ const TBlob& blob,\n+ const LayoutInfo& li,\n+ bool expand_1d,\n+ bool is_virtual) {\n+ std::vector dims(blob.shape_.ndim());\n+ CHECK_EQ(dims.size(), li.n_space_dims + 2);\n+ auto rev_order = ReverseOrder(li.Order());\n+ for (size_t i = 0; i < dims.size(); ++i)\n+ dims[i] = blob.shape_[rev_order[i]];\n+ auto strides = li.Strides(dims);\n+ if (li.n_space_dims == 1 && expand_1d) {\n+ dims.insert(dims.begin() + 2, 1);\n+ std::vector order(dims.size());\n+ std::iota(order.begin(), order.end(), 0);\n+ if (li.channel_last)\n+ std::rotate(order.begin() + 1, order.begin() + 2, order.end());\n+ strides = PackedStrides(order, dims);\n+ }\n+ return MakeTensorDesc(\n+ uid, CudnnType(static_cast(blob.type_flag_)), dims, strides, is_virtual);\n+}\n+\n+Descriptor MakeCTensorDescExpandDims(int64_t uid,\n+ const TBlob& b,\n+ const LayoutInfo& li,\n+ bool is_virtual) {\n+ std::vector dims(li.n_space_dims + 2, 1);\n+ dims[1] = b.shape_[0];\n+ auto dtype = CudnnType(static_cast(b.type_flag_));\n+ return MakeTensorDesc(uid, dtype, dims, li.Strides(dims), is_virtual);\n+}\n+\n+Descriptor MakeConvDesc(const ConvParam& param, mshadow::TypeFlag dtype) {\n+ int64_t sdims = param.kernel.ndim();\n+ std::vector stride(param.stride.begin(), param.stride.end());\n+ std::vector dilate(param.dilate.begin(), param.dilate.end());\n+ std::vector pad(param.pad.begin(), param.pad.end());\n+\n+ auto comp_type = CudnnType(dtype);\n+ if (comp_type == CUDNN_DATA_HALF)\n+ comp_type = CUDNN_DATA_FLOAT;\n+\n+ if (sdims == 1) {\n+ // TODO(vcherepanov): remove this once cuDNN properly supports 1D convolutions.\n+ // For now, making spacial dims 2D: 1 x W.\n+ ++sdims;\n+ stride.insert(stride.begin(), 1);\n+ dilate.insert(dilate.begin(), 1);\n+ pad.insert(pad.begin(), 0);\n+ }\n+ return MakeFinalized(CUDNN_BACKEND_CONVOLUTION_DESCRIPTOR,\n+ CUDNN_ATTR_CONVOLUTION_SPATIAL_DIMS,\n+ sdims,\n+ CUDNN_ATTR_CONVOLUTION_COMP_TYPE,\n+ comp_type,\n+ CUDNN_ATTR_CONVOLUTION_CONV_MODE,\n+ CUDNN_CROSS_CORRELATION,\n+ CUDNN_ATTR_CONVOLUTION_FILTER_STRIDES,\n+ stride,\n+ CUDNN_ATTR_CONVOLUTION_DILATIONS,\n+ dilate,\n+ CUDNN_ATTR_CONVOLUTION_PRE_PADDINGS,\n+ pad,\n+ CUDNN_ATTR_CONVOLUTION_POST_PADDINGS,\n+ pad);\n+}\n+\n+Descriptor MakeConvFwdOp(const Descriptor& conv,\n+ const Descriptor& x,\n+ const Descriptor& w,\n+ const Descriptor& y,\n+ bool add_to) {\n+ auto ret = Make(CUDNN_BACKEND_OPERATION_CONVOLUTION_FORWARD_DESCRIPTOR,\n+ CUDNN_ATTR_OPERATION_CONVOLUTION_FORWARD_CONV_DESC,\n+ conv,\n+ CUDNN_ATTR_OPERATION_CONVOLUTION_FORWARD_X,\n+ x,\n+ CUDNN_ATTR_OPERATION_CONVOLUTION_FORWARD_W,\n+ w,\n+ CUDNN_ATTR_OPERATION_CONVOLUTION_FORWARD_Y,\n+ y);\n+ if (GetAttr(x, CUDNN_ATTR_TENSOR_DATA_TYPE) == CUDNN_DATA_DOUBLE) {\n+ SetAttrs(ret,\n+ CUDNN_ATTR_OPERATION_CONVOLUTION_FORWARD_ALPHA,\n+ 1.0,\n+ CUDNN_ATTR_OPERATION_CONVOLUTION_FORWARD_BETA,\n+ add_to ? 1.0 : 0.0);\n+ } else {\n+ SetAttrs(ret,\n+ CUDNN_ATTR_OPERATION_CONVOLUTION_FORWARD_ALPHA,\n+ 1.0f,\n+ CUDNN_ATTR_OPERATION_CONVOLUTION_FORWARD_BETA,\n+ add_to ? 1.0f : 0.0f);\n+ }\n+ CUDNN_CALL(cudnnBackendFinalize(ret.get()));\n+ return ret;\n+}\n+\n+Descriptor MakeConvDgradOp(const Descriptor& conv,\n+ const Descriptor& w,\n+ const Descriptor& dy,\n+ const Descriptor& dx,\n+ bool add_to) {\n+ auto ret = Make(CUDNN_BACKEND_OPERATION_CONVOLUTION_BACKWARD_DATA_DESCRIPTOR,\n+ CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_DATA_CONV_DESC,\n+ conv,\n+ CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_DATA_W,\n+ w,\n+ CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_DATA_DY,\n+ dy,\n+ CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_DATA_DX,\n+ dx);\n+ if (GetAttr(w, CUDNN_ATTR_TENSOR_DATA_TYPE) == CUDNN_DATA_DOUBLE) {\n+ SetAttrs(ret,\n+ CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_DATA_ALPHA,\n+ 1.0,\n+ CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_DATA_BETA,\n+ add_to ? 1.0 : 0.0);\n+ } else {\n+ SetAttrs(ret,\n+ CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_DATA_ALPHA,\n+ 1.0f,\n+ CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_DATA_BETA,\n+ add_to ? 1.0f : 0.0f);\n+ }\n+ CUDNN_CALL(cudnnBackendFinalize(ret.get()));\n+ return ret;\n+}\n+\n+Descriptor MakeConvWgradOp(const Descriptor& conv,\n+ const Descriptor& x,\n+ const Descriptor& dy,\n+ const Descriptor& dw,\n+ bool add_to) {\n+ auto ret = Make(CUDNN_BACKEND_OPERATION_CONVOLUTION_BACKWARD_FILTER_DESCRIPTOR,\n+ CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_FILTER_CONV_DESC,\n+ conv,\n+ CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_FILTER_X,\n+ x,\n+ CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_FILTER_DY,\n+ dy,\n+ CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_FILTER_DW,\n+ dw);\n+ if (GetAttr(x, CUDNN_ATTR_TENSOR_DATA_TYPE) == CUDNN_DATA_DOUBLE) {\n+ SetAttrs(ret,\n+ CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_FILTER_ALPHA,\n+ 1.0,\n+ CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_FILTER_BETA,\n+ add_to ? 1.0 : 0.0);\n+ } else {\n+ SetAttrs(ret,\n+ CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_FILTER_ALPHA,\n+ 1.0f,\n+ CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_FILTER_BETA,\n+ add_to ? 1.0f : 0.0f);\n+ }\n+ CUDNN_CALL(cudnnBackendFinalize(ret.get()));\n+ return ret;\n+}\n+\n+Descriptor MakeOpGraph(cudnnHandle_t handle, const std::vector& ops) {\n+ return MakeFinalized(CUDNN_BACKEND_OPERATIONGRAPH_DESCRIPTOR,\n+ CUDNN_ATTR_OPERATIONGRAPH_HANDLE,\n+ handle,\n+ CUDNN_ATTR_OPERATIONGRAPH_OPS,\n+ ops);\n+}\n+\n+ConvParam::ConvParam(const ConvolutionParam& p, bool add_to)\n+ : kernel(p.kernel),\n+ stride(p.stride),\n+ dilate(p.dilate),\n+ pad(p.pad),\n+ num_filter(p.num_filter),\n+ num_group(p.num_group),\n+ workspace(p.workspace),\n+ cudnn_tune(p.cudnn_tune),\n+ layout(p.layout),\n+ add_to(add_to) {}\n+\n+ConvParam::ConvParam(const DeconvolutionParam& p, bool add_to)\n+ : kernel(p.kernel),\n+ stride(p.stride),\n+ dilate(p.dilate),\n+ pad(p.pad),\n+ num_filter(p.num_filter),\n+ num_group(p.num_group),\n+ workspace(p.workspace),\n+ cudnn_tune(p.cudnn_tune),\n+ layout(p.layout),\n+ add_to(add_to) {}\n+\n+void TuneWarnOnce() {\n+ thread_local bool done = false;\n+ if (!done) {\n+ LOG(INFO) << \"Auto-tuning cuDNN op, set MXNET_CUDNN_AUTOTUNE_DEFAULT to 0 to disable\";\n+ done = true;\n+ }\n+}\n+\n+std::vector MakeFallbackPlans(\n+ const std::vector& ixs,\n+ cudnnHandle_t handle,\n+ const Descriptor& op_graph,\n+ size_t workspace_limit,\n+ size_t* max_workspace,\n+ const std::unordered_set& excl_engines,\n+ const std::vector& req_numeric,\n+ const std::vector& excl_numeric\n+#if CUDNN_VERSION >= 8200\n+ ,\n+ const std::vector& req_behavior,\n+ const std::vector& excl_behavior\n+#endif // CUDNN_VERSION >= 8200\n+) {\n+ std::vector plans;\n+ if (max_workspace)\n+ *max_workspace = 0;\n+ for (auto ix : ixs) {\n+ if (excl_engines.count(ix))\n+ continue;\n+ auto engine = Make(CUDNN_BACKEND_ENGINE_DESCRIPTOR,\n+ CUDNN_ATTR_ENGINE_OPERATION_GRAPH,\n+ op_graph,\n+ CUDNN_ATTR_ENGINE_GLOBAL_INDEX,\n+ ix);\n+ auto err = cudnnBackendFinalize(engine.get());\n+ if (err == CUDNN_STATUS_NOT_SUPPORTED || err == CUDNN_STATUS_ARCH_MISMATCH)\n+ continue;\n+ if (err != CUDNN_STATUS_SUCCESS) {\n+ LOG(WARNING) << \"Unexpected cuDNN status: \" << err << \": \" << cudnnGetErrorString(err);\n+ continue;\n+ }\n+ auto cfg =\n+ MakeFinalized(CUDNN_BACKEND_ENGINECFG_DESCRIPTOR, CUDNN_ATTR_ENGINECFG_ENGINE, engine);\n+ auto plan = Make(CUDNN_BACKEND_EXECUTION_PLAN_DESCRIPTOR,\n+ CUDNN_ATTR_EXECUTION_PLAN_HANDLE,\n+ handle,\n+ CUDNN_ATTR_EXECUTION_PLAN_ENGINE_CONFIG,\n+ cfg);\n+ err = cudnnBackendFinalize(plan.get());\n+ if (err == CUDNN_STATUS_NOT_SUPPORTED || err == CUDNN_STATUS_ARCH_MISMATCH)\n+ continue;\n+ if (err != CUDNN_STATUS_SUCCESS) {\n+ LOG(WARNING) << \"Unexpected cuDNN status: \" << err << \": \" << cudnnGetErrorString(err);\n+ continue;\n+ }\n+ auto workspace = GetAttr(plan, CUDNN_ATTR_EXECUTION_PLAN_WORKSPACE_SIZE);\n+ if (workspace > workspace_limit)\n+ continue;\n+ auto numerical = GetSomeAttrs(\n+ CUDNN_NUMERICAL_NOTE_TYPE_COUNT, engine, CUDNN_ATTR_ENGINE_NUMERICAL_NOTE);\n+ if (!IsCompatible(numerical, req_numeric, excl_numeric))\n+ continue;\n+#if CUDNN_VERSION >= 8200\n+ auto behavior = GetSomeAttrs(\n+ CUDNN_BEHAVIOR_NOTE_TYPE_COUNT, engine, CUDNN_ATTR_ENGINE_BEHAVIOR_NOTE);\n+ if (!IsCompatible(behavior, req_behavior, excl_behavior))\n+ continue;\n+#endif // CUDNN_VERSION >= 8200\n+ plans.push_back(std::move(plan));\n+ if (max_workspace)\n+ *max_workspace = std::max(*max_workspace, static_cast(workspace));\n+ }\n+ return plans;\n+}\n+\n+cudnnBackendHeurMode_t HeurMode() {\n+#if CUDNN_VERSION >= 8100\n+ auto minor = cudnnGetVersion() / 100 % 10;\n+ int default_mode = minor < 2 ? CUDNN_HEUR_MODE_INSTANT : CUDNN_HEUR_MODE_B;\n+#else\n+ int default_mode = CUDNN_HEUR_MODE_INSTANT;\n+#endif // CUDNN_VERSION >= 8100\n+ return static_cast(dmlc::GetEnv(\"MXNET_CUDNN_HEUR_MODE\", default_mode));", + "comment_created_at": "2021-11-06T03:30:04+00:00", + "comment_author": "DickJC123", + "comment_body": "Again, please add a description of MXNET_CUDNN_HEUR_MODE in the env_var.md file.\r\n\r\nI'm OK to leave it as an int tied to the cudnn.h definitions. But technically, that ties constants that users will put in training scripts to those definitions, which could change I suppose. An alternative would be for you to parse the env var as a string, e.g. if set via:\r\n```\r\nexport MXNET_CUDNN_HEUR_MODE=\"B\"\r\n```\r\n", + "pr_file_module": null + }, + { + "comment_id": "745831518", + "repo_full_name": "apache/mxnet", + "pr_number": 20635, + "pr_file": "src/operator/cudnn_ops.cc", + "discussion_id": "744069231", + "commented_code": "@@ -0,0 +1,765 @@\n+/*\n+ * Licensed to the Apache Software Foundation (ASF) under one\n+ * or more contributor license agreements. See the NOTICE file\n+ * distributed with this work for additional information\n+ * regarding copyright ownership. The ASF licenses this file\n+ * to you under the Apache License, Version 2.0 (the\n+ * \"License\"); you may not use this file except in compliance\n+ * with the License. You may obtain a copy of the License at\n+ *\n+ * http://www.apache.org/licenses/LICENSE-2.0\n+ *\n+ * Unless required by applicable law or agreed to in writing,\n+ * software distributed under the License is distributed on an\n+ * \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n+ * KIND, either express or implied. See the License for the\n+ * specific language governing permissions and limitations\n+ * under the License.\n+ */\n+\n+/*!\n+ * Copyright (c) 2021 by Contributors\n+ * \\file cudnn_ops.cc\n+ * \\brief cuDNN v8 ops\n+ */\n+\n+#include \"cudnn_ops.h\"\n+\n+#include \n+#if MXNET_USE_CUDNN == 1\n+\n+#include \n+\n+#include \n+#include \n+#include \n+#include \n+#include \n+#include \n+#include \n+#include \n+#include \n+\n+namespace mxnet {\n+namespace op {\n+\n+using cudnn_cxx::Descriptor;\n+using cudnn_cxx::GetAttr;\n+using cudnn_cxx::GetSomeAttrs;\n+using cudnn_cxx::IsCompatible;\n+using cudnn_cxx::MakeAvgSampler;\n+using cudnn_cxx::MakeFinalized;\n+using cudnn_cxx::PackedStrides;\n+using cudnn_cxx::PlanStr;\n+\n+namespace cudnn {\n+\n+cudnnDataType_t CudnnType(mshadow::TypeFlag dtype) {\n+ static std::unordered_map type_map {\n+ {mshadow::kFloat32, CUDNN_DATA_FLOAT}, {mshadow::kFloat64, CUDNN_DATA_DOUBLE},\n+ {mshadow::kFloat16, CUDNN_DATA_HALF}, {mshadow::kUint8, CUDNN_DATA_UINT8},\n+ {mshadow::kInt8, CUDNN_DATA_INT8}, {mshadow::kInt32, CUDNN_DATA_INT32},\n+#if CUDNN_VERSION >= 8100\n+ {mshadow::kInt64, CUDNN_DATA_INT64},\n+#endif // CUDNN_VERSION >= 8100\n+ };\n+ auto it = type_map.find(dtype);\n+ CHECK(it != type_map.end()) << \"Unsupported type: \" << dtype;\n+ return it->second;\n+}\n+\n+std::vector LayoutInfo::Order() const {\n+ std::vector ret(n_space_dims + 2);\n+ std::iota(ret.begin(), ret.end(), 0);\n+ if (channel_last)\n+ std::rotate(ret.begin() + 1, ret.begin() + 2, ret.end());\n+ return ret;\n+}\n+\n+size_t LayoutInfo::ChannelIdx() const {\n+ return channel_last ? 1 + n_space_dims : 1;\n+}\n+\n+std::vector LayoutInfo::Strides(const std::vector& dims) const {\n+ return PackedStrides(Order(), dims);\n+}\n+\n+LayoutInfo GetLayoutInfo(mshadow::LayoutFlag layout) {\n+ static std::unordered_map layout_map{\n+ {mshadow::kNCW, {1, false}},\n+ {mshadow::kNWC, {1, true}},\n+ {mshadow::kNCHW, {2, false}},\n+ {mshadow::kNHWC, {2, true}},\n+ {mshadow::kNCDHW, {3, false}},\n+ {mshadow::kNDHWC, {3, true}},\n+ };\n+ auto it = layout_map.find(layout);\n+ CHECK(it != layout_map.end()) << \"Unsupported layout: \" << layout;\n+ return it->second;\n+}\n+\n+TShape ExpandChannelDims(mshadow::LayoutFlag layout, int c) {\n+ auto li = GetLayoutInfo(layout);\n+ std::vector dims(li.n_space_dims + 2, 1);\n+ dims[li.ChannelIdx()] = c;\n+ return TShape(dims.begin(), dims.end());\n+}\n+\n+std::vector ReverseOrder(const std::vector& o) {\n+ std::vector ret(o.size());\n+ for (size_t i = 0; i < ret.size(); ++i)\n+ ret[o[i]] = i;\n+ return ret;\n+}\n+\n+std::vector RequireNumerics() {\n+ std::vector ret;\n+ return ret;\n+}\n+\n+std::vector ExcludeNumerics() {\n+ std::vector ret;\n+ if (!dmlc::GetEnv(\"MXNET_CUDA_ALLOW_TENSOR_CORE\", true))\n+ ret.push_back(CUDNN_NUMERICAL_NOTE_TENSOR_CORE);\n+ if (!dmlc::GetEnv(\"MXNET_CUDA_TENSOR_OP_MATH_ALLOW_CONVERSION\", false))\n+ ret.push_back(CUDNN_NUMERICAL_NOTE_DOWN_CONVERT_INPUTS);\n+ if (!dmlc::GetEnv(\"MXNET_CUDNN_ALLOW_REDUCED_PRECISION_REDUCTION\", true))\n+ ret.push_back(CUDNN_NUMERICAL_NOTE_REDUCED_PRECISION_REDUCTION);\n+ if (!dmlc::GetEnv(\"MXNET_CUDNN_ALLOW_FFT\", true))\n+ ret.push_back(CUDNN_NUMERICAL_NOTE_FFT);\n+ if (dmlc::GetEnv(\"MXNET_ENFORCE_DETERMINISM\", false))\n+ ret.push_back(CUDNN_NUMERICAL_NOTE_NONDETERMINISTIC);\n+ if (!dmlc::GetEnv(\"MXNET_CUDNN_ALLOW_WINOGRAD\", true))\n+ ret.push_back(CUDNN_NUMERICAL_NOTE_WINOGRAD);\n+ return ret;\n+}\n+\n+Descriptor MakeTensorDesc(int64_t uid,\n+ cudnnDataType_t dtype,\n+ const std::vector& dims,\n+ const std::vector& strides,\n+ bool is_virtual) {\n+ int64_t alignment = 16; // TODO(vcherepanov): ?\n+ return MakeFinalized(CUDNN_BACKEND_TENSOR_DESCRIPTOR,\n+ CUDNN_ATTR_TENSOR_UNIQUE_ID,\n+ uid,\n+ CUDNN_ATTR_TENSOR_DATA_TYPE,\n+ dtype,\n+ CUDNN_ATTR_TENSOR_BYTE_ALIGNMENT,\n+ alignment,\n+ CUDNN_ATTR_TENSOR_DIMENSIONS,\n+ dims,\n+ CUDNN_ATTR_TENSOR_STRIDES,\n+ strides,\n+ CUDNN_ATTR_TENSOR_IS_VIRTUAL,\n+ is_virtual);\n+}\n+\n+Descriptor MakeTensorDesc(int64_t uid,\n+ const TBlob& blob,\n+ const LayoutInfo& li,\n+ bool expand_1d,\n+ bool is_virtual) {\n+ std::vector dims(blob.shape_.ndim());\n+ CHECK_EQ(dims.size(), li.n_space_dims + 2);\n+ auto rev_order = ReverseOrder(li.Order());\n+ for (size_t i = 0; i < dims.size(); ++i)\n+ dims[i] = blob.shape_[rev_order[i]];\n+ auto strides = li.Strides(dims);\n+ if (li.n_space_dims == 1 && expand_1d) {\n+ dims.insert(dims.begin() + 2, 1);\n+ std::vector order(dims.size());\n+ std::iota(order.begin(), order.end(), 0);\n+ if (li.channel_last)\n+ std::rotate(order.begin() + 1, order.begin() + 2, order.end());\n+ strides = PackedStrides(order, dims);\n+ }\n+ return MakeTensorDesc(\n+ uid, CudnnType(static_cast(blob.type_flag_)), dims, strides, is_virtual);\n+}\n+\n+Descriptor MakeCTensorDescExpandDims(int64_t uid,\n+ const TBlob& b,\n+ const LayoutInfo& li,\n+ bool is_virtual) {\n+ std::vector dims(li.n_space_dims + 2, 1);\n+ dims[1] = b.shape_[0];\n+ auto dtype = CudnnType(static_cast(b.type_flag_));\n+ return MakeTensorDesc(uid, dtype, dims, li.Strides(dims), is_virtual);\n+}\n+\n+Descriptor MakeConvDesc(const ConvParam& param, mshadow::TypeFlag dtype) {\n+ int64_t sdims = param.kernel.ndim();\n+ std::vector stride(param.stride.begin(), param.stride.end());\n+ std::vector dilate(param.dilate.begin(), param.dilate.end());\n+ std::vector pad(param.pad.begin(), param.pad.end());\n+\n+ auto comp_type = CudnnType(dtype);\n+ if (comp_type == CUDNN_DATA_HALF)\n+ comp_type = CUDNN_DATA_FLOAT;\n+\n+ if (sdims == 1) {\n+ // TODO(vcherepanov): remove this once cuDNN properly supports 1D convolutions.\n+ // For now, making spacial dims 2D: 1 x W.\n+ ++sdims;\n+ stride.insert(stride.begin(), 1);\n+ dilate.insert(dilate.begin(), 1);\n+ pad.insert(pad.begin(), 0);\n+ }\n+ return MakeFinalized(CUDNN_BACKEND_CONVOLUTION_DESCRIPTOR,\n+ CUDNN_ATTR_CONVOLUTION_SPATIAL_DIMS,\n+ sdims,\n+ CUDNN_ATTR_CONVOLUTION_COMP_TYPE,\n+ comp_type,\n+ CUDNN_ATTR_CONVOLUTION_CONV_MODE,\n+ CUDNN_CROSS_CORRELATION,\n+ CUDNN_ATTR_CONVOLUTION_FILTER_STRIDES,\n+ stride,\n+ CUDNN_ATTR_CONVOLUTION_DILATIONS,\n+ dilate,\n+ CUDNN_ATTR_CONVOLUTION_PRE_PADDINGS,\n+ pad,\n+ CUDNN_ATTR_CONVOLUTION_POST_PADDINGS,\n+ pad);\n+}\n+\n+Descriptor MakeConvFwdOp(const Descriptor& conv,\n+ const Descriptor& x,\n+ const Descriptor& w,\n+ const Descriptor& y,\n+ bool add_to) {\n+ auto ret = Make(CUDNN_BACKEND_OPERATION_CONVOLUTION_FORWARD_DESCRIPTOR,\n+ CUDNN_ATTR_OPERATION_CONVOLUTION_FORWARD_CONV_DESC,\n+ conv,\n+ CUDNN_ATTR_OPERATION_CONVOLUTION_FORWARD_X,\n+ x,\n+ CUDNN_ATTR_OPERATION_CONVOLUTION_FORWARD_W,\n+ w,\n+ CUDNN_ATTR_OPERATION_CONVOLUTION_FORWARD_Y,\n+ y);\n+ if (GetAttr(x, CUDNN_ATTR_TENSOR_DATA_TYPE) == CUDNN_DATA_DOUBLE) {\n+ SetAttrs(ret,\n+ CUDNN_ATTR_OPERATION_CONVOLUTION_FORWARD_ALPHA,\n+ 1.0,\n+ CUDNN_ATTR_OPERATION_CONVOLUTION_FORWARD_BETA,\n+ add_to ? 1.0 : 0.0);\n+ } else {\n+ SetAttrs(ret,\n+ CUDNN_ATTR_OPERATION_CONVOLUTION_FORWARD_ALPHA,\n+ 1.0f,\n+ CUDNN_ATTR_OPERATION_CONVOLUTION_FORWARD_BETA,\n+ add_to ? 1.0f : 0.0f);\n+ }\n+ CUDNN_CALL(cudnnBackendFinalize(ret.get()));\n+ return ret;\n+}\n+\n+Descriptor MakeConvDgradOp(const Descriptor& conv,\n+ const Descriptor& w,\n+ const Descriptor& dy,\n+ const Descriptor& dx,\n+ bool add_to) {\n+ auto ret = Make(CUDNN_BACKEND_OPERATION_CONVOLUTION_BACKWARD_DATA_DESCRIPTOR,\n+ CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_DATA_CONV_DESC,\n+ conv,\n+ CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_DATA_W,\n+ w,\n+ CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_DATA_DY,\n+ dy,\n+ CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_DATA_DX,\n+ dx);\n+ if (GetAttr(w, CUDNN_ATTR_TENSOR_DATA_TYPE) == CUDNN_DATA_DOUBLE) {\n+ SetAttrs(ret,\n+ CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_DATA_ALPHA,\n+ 1.0,\n+ CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_DATA_BETA,\n+ add_to ? 1.0 : 0.0);\n+ } else {\n+ SetAttrs(ret,\n+ CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_DATA_ALPHA,\n+ 1.0f,\n+ CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_DATA_BETA,\n+ add_to ? 1.0f : 0.0f);\n+ }\n+ CUDNN_CALL(cudnnBackendFinalize(ret.get()));\n+ return ret;\n+}\n+\n+Descriptor MakeConvWgradOp(const Descriptor& conv,\n+ const Descriptor& x,\n+ const Descriptor& dy,\n+ const Descriptor& dw,\n+ bool add_to) {\n+ auto ret = Make(CUDNN_BACKEND_OPERATION_CONVOLUTION_BACKWARD_FILTER_DESCRIPTOR,\n+ CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_FILTER_CONV_DESC,\n+ conv,\n+ CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_FILTER_X,\n+ x,\n+ CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_FILTER_DY,\n+ dy,\n+ CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_FILTER_DW,\n+ dw);\n+ if (GetAttr(x, CUDNN_ATTR_TENSOR_DATA_TYPE) == CUDNN_DATA_DOUBLE) {\n+ SetAttrs(ret,\n+ CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_FILTER_ALPHA,\n+ 1.0,\n+ CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_FILTER_BETA,\n+ add_to ? 1.0 : 0.0);\n+ } else {\n+ SetAttrs(ret,\n+ CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_FILTER_ALPHA,\n+ 1.0f,\n+ CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_FILTER_BETA,\n+ add_to ? 1.0f : 0.0f);\n+ }\n+ CUDNN_CALL(cudnnBackendFinalize(ret.get()));\n+ return ret;\n+}\n+\n+Descriptor MakeOpGraph(cudnnHandle_t handle, const std::vector& ops) {\n+ return MakeFinalized(CUDNN_BACKEND_OPERATIONGRAPH_DESCRIPTOR,\n+ CUDNN_ATTR_OPERATIONGRAPH_HANDLE,\n+ handle,\n+ CUDNN_ATTR_OPERATIONGRAPH_OPS,\n+ ops);\n+}\n+\n+ConvParam::ConvParam(const ConvolutionParam& p, bool add_to)\n+ : kernel(p.kernel),\n+ stride(p.stride),\n+ dilate(p.dilate),\n+ pad(p.pad),\n+ num_filter(p.num_filter),\n+ num_group(p.num_group),\n+ workspace(p.workspace),\n+ cudnn_tune(p.cudnn_tune),\n+ layout(p.layout),\n+ add_to(add_to) {}\n+\n+ConvParam::ConvParam(const DeconvolutionParam& p, bool add_to)\n+ : kernel(p.kernel),\n+ stride(p.stride),\n+ dilate(p.dilate),\n+ pad(p.pad),\n+ num_filter(p.num_filter),\n+ num_group(p.num_group),\n+ workspace(p.workspace),\n+ cudnn_tune(p.cudnn_tune),\n+ layout(p.layout),\n+ add_to(add_to) {}\n+\n+void TuneWarnOnce() {\n+ thread_local bool done = false;\n+ if (!done) {\n+ LOG(INFO) << \"Auto-tuning cuDNN op, set MXNET_CUDNN_AUTOTUNE_DEFAULT to 0 to disable\";\n+ done = true;\n+ }\n+}\n+\n+std::vector MakeFallbackPlans(\n+ const std::vector& ixs,\n+ cudnnHandle_t handle,\n+ const Descriptor& op_graph,\n+ size_t workspace_limit,\n+ size_t* max_workspace,\n+ const std::unordered_set& excl_engines,\n+ const std::vector& req_numeric,\n+ const std::vector& excl_numeric\n+#if CUDNN_VERSION >= 8200\n+ ,\n+ const std::vector& req_behavior,\n+ const std::vector& excl_behavior\n+#endif // CUDNN_VERSION >= 8200\n+) {\n+ std::vector plans;\n+ if (max_workspace)\n+ *max_workspace = 0;\n+ for (auto ix : ixs) {\n+ if (excl_engines.count(ix))\n+ continue;\n+ auto engine = Make(CUDNN_BACKEND_ENGINE_DESCRIPTOR,\n+ CUDNN_ATTR_ENGINE_OPERATION_GRAPH,\n+ op_graph,\n+ CUDNN_ATTR_ENGINE_GLOBAL_INDEX,\n+ ix);\n+ auto err = cudnnBackendFinalize(engine.get());\n+ if (err == CUDNN_STATUS_NOT_SUPPORTED || err == CUDNN_STATUS_ARCH_MISMATCH)\n+ continue;\n+ if (err != CUDNN_STATUS_SUCCESS) {\n+ LOG(WARNING) << \"Unexpected cuDNN status: \" << err << \": \" << cudnnGetErrorString(err);\n+ continue;\n+ }\n+ auto cfg =\n+ MakeFinalized(CUDNN_BACKEND_ENGINECFG_DESCRIPTOR, CUDNN_ATTR_ENGINECFG_ENGINE, engine);\n+ auto plan = Make(CUDNN_BACKEND_EXECUTION_PLAN_DESCRIPTOR,\n+ CUDNN_ATTR_EXECUTION_PLAN_HANDLE,\n+ handle,\n+ CUDNN_ATTR_EXECUTION_PLAN_ENGINE_CONFIG,\n+ cfg);\n+ err = cudnnBackendFinalize(plan.get());\n+ if (err == CUDNN_STATUS_NOT_SUPPORTED || err == CUDNN_STATUS_ARCH_MISMATCH)\n+ continue;\n+ if (err != CUDNN_STATUS_SUCCESS) {\n+ LOG(WARNING) << \"Unexpected cuDNN status: \" << err << \": \" << cudnnGetErrorString(err);\n+ continue;\n+ }\n+ auto workspace = GetAttr(plan, CUDNN_ATTR_EXECUTION_PLAN_WORKSPACE_SIZE);\n+ if (workspace > workspace_limit)\n+ continue;\n+ auto numerical = GetSomeAttrs(\n+ CUDNN_NUMERICAL_NOTE_TYPE_COUNT, engine, CUDNN_ATTR_ENGINE_NUMERICAL_NOTE);\n+ if (!IsCompatible(numerical, req_numeric, excl_numeric))\n+ continue;\n+#if CUDNN_VERSION >= 8200\n+ auto behavior = GetSomeAttrs(\n+ CUDNN_BEHAVIOR_NOTE_TYPE_COUNT, engine, CUDNN_ATTR_ENGINE_BEHAVIOR_NOTE);\n+ if (!IsCompatible(behavior, req_behavior, excl_behavior))\n+ continue;\n+#endif // CUDNN_VERSION >= 8200\n+ plans.push_back(std::move(plan));\n+ if (max_workspace)\n+ *max_workspace = std::max(*max_workspace, static_cast(workspace));\n+ }\n+ return plans;\n+}\n+\n+cudnnBackendHeurMode_t HeurMode() {\n+#if CUDNN_VERSION >= 8100\n+ auto minor = cudnnGetVersion() / 100 % 10;\n+ int default_mode = minor < 2 ? CUDNN_HEUR_MODE_INSTANT : CUDNN_HEUR_MODE_B;\n+#else\n+ int default_mode = CUDNN_HEUR_MODE_INSTANT;\n+#endif // CUDNN_VERSION >= 8100\n+ return static_cast(dmlc::GetEnv(\"MXNET_CUDNN_HEUR_MODE\", default_mode));", + "comment_created_at": "2021-11-09T17:09:16+00:00", + "comment_author": "DickJC123", + "comment_body": "The conclusion here is that it's simpler to define this as integers that align with the current definition. If future versions of cudnn*.h invalidate this, we can choose to insert a remapping function at that time if we feel it's important to maintain backward compatibility.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "664135001", + "pr_number": 20331, + "pr_file": "src/engine/threaded_engine.cc", + "created_at": "2021-07-05T21:43:39+00:00", + "commented_code": "OprBlock::Delete(opr_block);\n}\n\nvoid ThreadedEngine::OnStartStatic(Engine *engine, void *opr_block,\n const dmlc::Error* error) {\n // no-op\n}\n\n#if MXNET_USE_CUDA\nstatic inline void AddEventHelper(\n std::unordered_map* events_per_stream,\n const EventInfo& cuda_event) {\n auto event_stream = cuda_event.stream;\n if (events_per_stream->count(event_stream) > 0) {\n if ((*events_per_stream)[event_stream].pool_index < cuda_event.pool_index) {\n (*events_per_stream)[event_stream] = cuda_event;\n }\n } else {\n (*events_per_stream).emplace(event_stream, cuda_event);\n }\n}\n\nvoid ThreadedEngine::OnStartCPU(Engine *engine, void *opr_block,\n const dmlc::Error* error) {\n static bool use_new_dep_engine = dmlc::GetEnv(\"MXNET_ASYNC_GPU_ENGINE\", false);", + "repo_full_name": "apache/mxnet", + "discussion_comments": [ + { + "comment_id": "664135001", + "repo_full_name": "apache/mxnet", + "pr_number": 20331, + "pr_file": "src/engine/threaded_engine.cc", + "discussion_id": "664135001", + "commented_code": "@@ -523,5 +543,206 @@ void ThreadedEngine::OnCompleteStatic(Engine *engine, void *opr_block_,\n OprBlock::Delete(opr_block);\n }\n \n+void ThreadedEngine::OnStartStatic(Engine *engine, void *opr_block,\n+ const dmlc::Error* error) {\n+ // no-op\n+}\n+\n+#if MXNET_USE_CUDA\n+static inline void AddEventHelper(\n+ std::unordered_map* events_per_stream,\n+ const EventInfo& cuda_event) {\n+ auto event_stream = cuda_event.stream;\n+ if (events_per_stream->count(event_stream) > 0) {\n+ if ((*events_per_stream)[event_stream].pool_index < cuda_event.pool_index) {\n+ (*events_per_stream)[event_stream] = cuda_event;\n+ }\n+ } else {\n+ (*events_per_stream).emplace(event_stream, cuda_event);\n+ }\n+}\n+\n+void ThreadedEngine::OnStartCPU(Engine *engine, void *opr_block,\n+ const dmlc::Error* error) {\n+ static bool use_new_dep_engine = dmlc::GetEnv(\"MXNET_ASYNC_GPU_ENGINE\", false);", + "comment_created_at": "2021-07-05T21:43:39+00:00", + "comment_author": "szha", + "comment_body": "We already have `MXNET_ENGINE_TYPE` which decides whether `NaiveEngine` (i.e. synchronous scheduler) is used, so unless there's a strong reason I'd prefer not to introduce new environment variables here.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "670376928", + "pr_number": 20450, + "pr_file": "src/operator/nn/mkldnn/mkldnn_base-inl.h", + "created_at": "2021-07-15T11:29:29+00:00", + "commented_code": "for (size_t i = 0; i < dims.size(); i++) dims[i] = arr.shape()[i];\n auto format = mkldnn::memory::format_tag::any;\n // for batch 256 alexnet benchmark test\n if (dims.size() == 2) {\n const bool brgemm_disabled = dmlc::GetEnv(\"MXNET_DISABLE_ONEDNN_BRGEMM_FC\", true);", + "repo_full_name": "apache/mxnet", + "discussion_comments": [ + { + "comment_id": "670376928", + "repo_full_name": "apache/mxnet", + "pr_number": 20450, + "pr_file": "src/operator/nn/mkldnn/mkldnn_base-inl.h", + "discussion_id": "670376928", + "commented_code": "@@ -312,7 +312,8 @@ inline static mkldnn::memory::desc GetFCWeightDesc(const NDArray &arr, int dtype\n for (size_t i = 0; i < dims.size(); i++) dims[i] = arr.shape()[i];\n auto format = mkldnn::memory::format_tag::any;\n // for batch 256 alexnet benchmark test\n- if (dims.size() == 2) {\n+ const bool brgemm_disabled = dmlc::GetEnv(\"MXNET_DISABLE_ONEDNN_BRGEMM_FC\", true);", + "comment_created_at": "2021-07-15T11:29:29+00:00", + "comment_author": "anko-intel", + "comment_body": "It will be good to add description to docs/static_site/src/pages/api/faq/env_var.md\r\nAlso I am not sure if for 1.x branch the name have to include ONEDNN ?\r\nso maybe MXNET_MKLDNN_DISABLE_BRGEMM_FC", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/mxnet-document-environment-variables.md b/_reviewers/mxnet-document-environment-variables.md index e364939..ec53fa7 100644 --- a/_reviewers/mxnet-document-environment-variables.md +++ b/_reviewers/mxnet-document-environment-variables.md @@ -34,85 +34,3 @@ When adding new configuration options: 2. Follow the subsystem naming prefix pattern (MXNET_[SUBSYSTEM]_*) 3. Document the variable in env_var.md with its default value and purpose 4. For enum-like options, document all valid values and their effects - - -[ - { - "discussion_id": "744069231", - "pr_number": 20635, - "pr_file": "src/operator/cudnn_ops.cc", - "created_at": "2021-11-06T03:30:04+00:00", - "commented_code": "/*\n * Licensed to the Apache Software Foundation (ASF) under one\n * or more contributor license agreements. See the NOTICE file\n * distributed with this work for additional information\n * regarding copyright ownership. The ASF licenses this file\n * to you under the Apache License, Version 2.0 (the\n * \"License\"); you may not use this file except in compliance\n * with the License. You may obtain a copy of the License at\n *\n * http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing,\n * software distributed under the License is distributed on an\n * \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n * KIND, either express or implied. See the License for the\n * specific language governing permissions and limitations\n * under the License.\n */\n\n/*!\n * Copyright (c) 2021 by Contributors\n * \\file cudnn_ops.cc\n * \\brief cuDNN v8 ops\n */\n\n#include \"cudnn_ops.h\"\n\n#include \n#if MXNET_USE_CUDNN == 1\n\n#include \n\n#include \n#include \n#include \n#include \n#include \n#include \n#include \n#include \n#include \n\nnamespace mxnet {\nnamespace op {\n\nusing cudnn_cxx::Descriptor;\nusing cudnn_cxx::GetAttr;\nusing cudnn_cxx::GetSomeAttrs;\nusing cudnn_cxx::IsCompatible;\nusing cudnn_cxx::MakeAvgSampler;\nusing cudnn_cxx::MakeFinalized;\nusing cudnn_cxx::PackedStrides;\nusing cudnn_cxx::PlanStr;\n\nnamespace cudnn {\n\ncudnnDataType_t CudnnType(mshadow::TypeFlag dtype) {\n static std::unordered_map type_map {\n {mshadow::kFloat32, CUDNN_DATA_FLOAT}, {mshadow::kFloat64, CUDNN_DATA_DOUBLE},\n {mshadow::kFloat16, CUDNN_DATA_HALF}, {mshadow::kUint8, CUDNN_DATA_UINT8},\n {mshadow::kInt8, CUDNN_DATA_INT8}, {mshadow::kInt32, CUDNN_DATA_INT32},\n#if CUDNN_VERSION >= 8100\n {mshadow::kInt64, CUDNN_DATA_INT64},\n#endif // CUDNN_VERSION >= 8100\n };\n auto it = type_map.find(dtype);\n CHECK(it != type_map.end()) << \"Unsupported type: \" << dtype;\n return it->second;\n}\n\nstd::vector LayoutInfo::Order() const {\n std::vector ret(n_space_dims + 2);\n std::iota(ret.begin(), ret.end(), 0);\n if (channel_last)\n std::rotate(ret.begin() + 1, ret.begin() + 2, ret.end());\n return ret;\n}\n\nsize_t LayoutInfo::ChannelIdx() const {\n return channel_last ? 1 + n_space_dims : 1;\n}\n\nstd::vector LayoutInfo::Strides(const std::vector& dims) const {\n return PackedStrides(Order(), dims);\n}\n\nLayoutInfo GetLayoutInfo(mshadow::LayoutFlag layout) {\n static std::unordered_map layout_map{\n {mshadow::kNCW, {1, false}},\n {mshadow::kNWC, {1, true}},\n {mshadow::kNCHW, {2, false}},\n {mshadow::kNHWC, {2, true}},\n {mshadow::kNCDHW, {3, false}},\n {mshadow::kNDHWC, {3, true}},\n };\n auto it = layout_map.find(layout);\n CHECK(it != layout_map.end()) << \"Unsupported layout: \" << layout;\n return it->second;\n}\n\nTShape ExpandChannelDims(mshadow::LayoutFlag layout, int c) {\n auto li = GetLayoutInfo(layout);\n std::vector dims(li.n_space_dims + 2, 1);\n dims[li.ChannelIdx()] = c;\n return TShape(dims.begin(), dims.end());\n}\n\nstd::vector ReverseOrder(const std::vector& o) {\n std::vector ret(o.size());\n for (size_t i = 0; i < ret.size(); ++i)\n ret[o[i]] = i;\n return ret;\n}\n\nstd::vector RequireNumerics() {\n std::vector ret;\n return ret;\n}\n\nstd::vector ExcludeNumerics() {\n std::vector ret;\n if (!dmlc::GetEnv(\"MXNET_CUDA_ALLOW_TENSOR_CORE\", true))\n ret.push_back(CUDNN_NUMERICAL_NOTE_TENSOR_CORE);\n if (!dmlc::GetEnv(\"MXNET_CUDA_TENSOR_OP_MATH_ALLOW_CONVERSION\", false))\n ret.push_back(CUDNN_NUMERICAL_NOTE_DOWN_CONVERT_INPUTS);\n if (!dmlc::GetEnv(\"MXNET_CUDNN_ALLOW_REDUCED_PRECISION_REDUCTION\", true))\n ret.push_back(CUDNN_NUMERICAL_NOTE_REDUCED_PRECISION_REDUCTION);\n if (!dmlc::GetEnv(\"MXNET_CUDNN_ALLOW_FFT\", true))\n ret.push_back(CUDNN_NUMERICAL_NOTE_FFT);\n if (dmlc::GetEnv(\"MXNET_ENFORCE_DETERMINISM\", false))\n ret.push_back(CUDNN_NUMERICAL_NOTE_NONDETERMINISTIC);\n if (!dmlc::GetEnv(\"MXNET_CUDNN_ALLOW_WINOGRAD\", true))\n ret.push_back(CUDNN_NUMERICAL_NOTE_WINOGRAD);\n return ret;\n}\n\nDescriptor MakeTensorDesc(int64_t uid,\n cudnnDataType_t dtype,\n const std::vector& dims,\n const std::vector& strides,\n bool is_virtual) {\n int64_t alignment = 16; // TODO(vcherepanov): ?\n return MakeFinalized(CUDNN_BACKEND_TENSOR_DESCRIPTOR,\n CUDNN_ATTR_TENSOR_UNIQUE_ID,\n uid,\n CUDNN_ATTR_TENSOR_DATA_TYPE,\n dtype,\n CUDNN_ATTR_TENSOR_BYTE_ALIGNMENT,\n alignment,\n CUDNN_ATTR_TENSOR_DIMENSIONS,\n dims,\n CUDNN_ATTR_TENSOR_STRIDES,\n strides,\n CUDNN_ATTR_TENSOR_IS_VIRTUAL,\n is_virtual);\n}\n\nDescriptor MakeTensorDesc(int64_t uid,\n const TBlob& blob,\n const LayoutInfo& li,\n bool expand_1d,\n bool is_virtual) {\n std::vector dims(blob.shape_.ndim());\n CHECK_EQ(dims.size(), li.n_space_dims + 2);\n auto rev_order = ReverseOrder(li.Order());\n for (size_t i = 0; i < dims.size(); ++i)\n dims[i] = blob.shape_[rev_order[i]];\n auto strides = li.Strides(dims);\n if (li.n_space_dims == 1 && expand_1d) {\n dims.insert(dims.begin() + 2, 1);\n std::vector order(dims.size());\n std::iota(order.begin(), order.end(), 0);\n if (li.channel_last)\n std::rotate(order.begin() + 1, order.begin() + 2, order.end());\n strides = PackedStrides(order, dims);\n }\n return MakeTensorDesc(\n uid, CudnnType(static_cast(blob.type_flag_)), dims, strides, is_virtual);\n}\n\nDescriptor MakeCTensorDescExpandDims(int64_t uid,\n const TBlob& b,\n const LayoutInfo& li,\n bool is_virtual) {\n std::vector dims(li.n_space_dims + 2, 1);\n dims[1] = b.shape_[0];\n auto dtype = CudnnType(static_cast(b.type_flag_));\n return MakeTensorDesc(uid, dtype, dims, li.Strides(dims), is_virtual);\n}\n\nDescriptor MakeConvDesc(const ConvParam& param, mshadow::TypeFlag dtype) {\n int64_t sdims = param.kernel.ndim();\n std::vector stride(param.stride.begin(), param.stride.end());\n std::vector dilate(param.dilate.begin(), param.dilate.end());\n std::vector pad(param.pad.begin(), param.pad.end());\n\n auto comp_type = CudnnType(dtype);\n if (comp_type == CUDNN_DATA_HALF)\n comp_type = CUDNN_DATA_FLOAT;\n\n if (sdims == 1) {\n // TODO(vcherepanov): remove this once cuDNN properly supports 1D convolutions.\n // For now, making spacial dims 2D: 1 x W.\n ++sdims;\n stride.insert(stride.begin(), 1);\n dilate.insert(dilate.begin(), 1);\n pad.insert(pad.begin(), 0);\n }\n return MakeFinalized(CUDNN_BACKEND_CONVOLUTION_DESCRIPTOR,\n CUDNN_ATTR_CONVOLUTION_SPATIAL_DIMS,\n sdims,\n CUDNN_ATTR_CONVOLUTION_COMP_TYPE,\n comp_type,\n CUDNN_ATTR_CONVOLUTION_CONV_MODE,\n CUDNN_CROSS_CORRELATION,\n CUDNN_ATTR_CONVOLUTION_FILTER_STRIDES,\n stride,\n CUDNN_ATTR_CONVOLUTION_DILATIONS,\n dilate,\n CUDNN_ATTR_CONVOLUTION_PRE_PADDINGS,\n pad,\n CUDNN_ATTR_CONVOLUTION_POST_PADDINGS,\n pad);\n}\n\nDescriptor MakeConvFwdOp(const Descriptor& conv,\n const Descriptor& x,\n const Descriptor& w,\n const Descriptor& y,\n bool add_to) {\n auto ret = Make(CUDNN_BACKEND_OPERATION_CONVOLUTION_FORWARD_DESCRIPTOR,\n CUDNN_ATTR_OPERATION_CONVOLUTION_FORWARD_CONV_DESC,\n conv,\n CUDNN_ATTR_OPERATION_CONVOLUTION_FORWARD_X,\n x,\n CUDNN_ATTR_OPERATION_CONVOLUTION_FORWARD_W,\n w,\n CUDNN_ATTR_OPERATION_CONVOLUTION_FORWARD_Y,\n y);\n if (GetAttr(x, CUDNN_ATTR_TENSOR_DATA_TYPE) == CUDNN_DATA_DOUBLE) {\n SetAttrs(ret,\n CUDNN_ATTR_OPERATION_CONVOLUTION_FORWARD_ALPHA,\n 1.0,\n CUDNN_ATTR_OPERATION_CONVOLUTION_FORWARD_BETA,\n add_to ? 1.0 : 0.0);\n } else {\n SetAttrs(ret,\n CUDNN_ATTR_OPERATION_CONVOLUTION_FORWARD_ALPHA,\n 1.0f,\n CUDNN_ATTR_OPERATION_CONVOLUTION_FORWARD_BETA,\n add_to ? 1.0f : 0.0f);\n }\n CUDNN_CALL(cudnnBackendFinalize(ret.get()));\n return ret;\n}\n\nDescriptor MakeConvDgradOp(const Descriptor& conv,\n const Descriptor& w,\n const Descriptor& dy,\n const Descriptor& dx,\n bool add_to) {\n auto ret = Make(CUDNN_BACKEND_OPERATION_CONVOLUTION_BACKWARD_DATA_DESCRIPTOR,\n CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_DATA_CONV_DESC,\n conv,\n CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_DATA_W,\n w,\n CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_DATA_DY,\n dy,\n CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_DATA_DX,\n dx);\n if (GetAttr(w, CUDNN_ATTR_TENSOR_DATA_TYPE) == CUDNN_DATA_DOUBLE) {\n SetAttrs(ret,\n CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_DATA_ALPHA,\n 1.0,\n CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_DATA_BETA,\n add_to ? 1.0 : 0.0);\n } else {\n SetAttrs(ret,\n CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_DATA_ALPHA,\n 1.0f,\n CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_DATA_BETA,\n add_to ? 1.0f : 0.0f);\n }\n CUDNN_CALL(cudnnBackendFinalize(ret.get()));\n return ret;\n}\n\nDescriptor MakeConvWgradOp(const Descriptor& conv,\n const Descriptor& x,\n const Descriptor& dy,\n const Descriptor& dw,\n bool add_to) {\n auto ret = Make(CUDNN_BACKEND_OPERATION_CONVOLUTION_BACKWARD_FILTER_DESCRIPTOR,\n CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_FILTER_CONV_DESC,\n conv,\n CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_FILTER_X,\n x,\n CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_FILTER_DY,\n dy,\n CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_FILTER_DW,\n dw);\n if (GetAttr(x, CUDNN_ATTR_TENSOR_DATA_TYPE) == CUDNN_DATA_DOUBLE) {\n SetAttrs(ret,\n CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_FILTER_ALPHA,\n 1.0,\n CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_FILTER_BETA,\n add_to ? 1.0 : 0.0);\n } else {\n SetAttrs(ret,\n CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_FILTER_ALPHA,\n 1.0f,\n CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_FILTER_BETA,\n add_to ? 1.0f : 0.0f);\n }\n CUDNN_CALL(cudnnBackendFinalize(ret.get()));\n return ret;\n}\n\nDescriptor MakeOpGraph(cudnnHandle_t handle, const std::vector& ops) {\n return MakeFinalized(CUDNN_BACKEND_OPERATIONGRAPH_DESCRIPTOR,\n CUDNN_ATTR_OPERATIONGRAPH_HANDLE,\n handle,\n CUDNN_ATTR_OPERATIONGRAPH_OPS,\n ops);\n}\n\nConvParam::ConvParam(const ConvolutionParam& p, bool add_to)\n : kernel(p.kernel),\n stride(p.stride),\n dilate(p.dilate),\n pad(p.pad),\n num_filter(p.num_filter),\n num_group(p.num_group),\n workspace(p.workspace),\n cudnn_tune(p.cudnn_tune),\n layout(p.layout),\n add_to(add_to) {}\n\nConvParam::ConvParam(const DeconvolutionParam& p, bool add_to)\n : kernel(p.kernel),\n stride(p.stride),\n dilate(p.dilate),\n pad(p.pad),\n num_filter(p.num_filter),\n num_group(p.num_group),\n workspace(p.workspace),\n cudnn_tune(p.cudnn_tune),\n layout(p.layout),\n add_to(add_to) {}\n\nvoid TuneWarnOnce() {\n thread_local bool done = false;\n if (!done) {\n LOG(INFO) << \"Auto-tuning cuDNN op, set MXNET_CUDNN_AUTOTUNE_DEFAULT to 0 to disable\";\n done = true;\n }\n}\n\nstd::vector MakeFallbackPlans(\n const std::vector& ixs,\n cudnnHandle_t handle,\n const Descriptor& op_graph,\n size_t workspace_limit,\n size_t* max_workspace,\n const std::unordered_set& excl_engines,\n const std::vector& req_numeric,\n const std::vector& excl_numeric\n#if CUDNN_VERSION >= 8200\n ,\n const std::vector& req_behavior,\n const std::vector& excl_behavior\n#endif // CUDNN_VERSION >= 8200\n) {\n std::vector plans;\n if (max_workspace)\n *max_workspace = 0;\n for (auto ix : ixs) {\n if (excl_engines.count(ix))\n continue;\n auto engine = Make(CUDNN_BACKEND_ENGINE_DESCRIPTOR,\n CUDNN_ATTR_ENGINE_OPERATION_GRAPH,\n op_graph,\n CUDNN_ATTR_ENGINE_GLOBAL_INDEX,\n ix);\n auto err = cudnnBackendFinalize(engine.get());\n if (err == CUDNN_STATUS_NOT_SUPPORTED || err == CUDNN_STATUS_ARCH_MISMATCH)\n continue;\n if (err != CUDNN_STATUS_SUCCESS) {\n LOG(WARNING) << \"Unexpected cuDNN status: \" << err << \": \" << cudnnGetErrorString(err);\n continue;\n }\n auto cfg =\n MakeFinalized(CUDNN_BACKEND_ENGINECFG_DESCRIPTOR, CUDNN_ATTR_ENGINECFG_ENGINE, engine);\n auto plan = Make(CUDNN_BACKEND_EXECUTION_PLAN_DESCRIPTOR,\n CUDNN_ATTR_EXECUTION_PLAN_HANDLE,\n handle,\n CUDNN_ATTR_EXECUTION_PLAN_ENGINE_CONFIG,\n cfg);\n err = cudnnBackendFinalize(plan.get());\n if (err == CUDNN_STATUS_NOT_SUPPORTED || err == CUDNN_STATUS_ARCH_MISMATCH)\n continue;\n if (err != CUDNN_STATUS_SUCCESS) {\n LOG(WARNING) << \"Unexpected cuDNN status: \" << err << \": \" << cudnnGetErrorString(err);\n continue;\n }\n auto workspace = GetAttr(plan, CUDNN_ATTR_EXECUTION_PLAN_WORKSPACE_SIZE);\n if (workspace > workspace_limit)\n continue;\n auto numerical = GetSomeAttrs(\n CUDNN_NUMERICAL_NOTE_TYPE_COUNT, engine, CUDNN_ATTR_ENGINE_NUMERICAL_NOTE);\n if (!IsCompatible(numerical, req_numeric, excl_numeric))\n continue;\n#if CUDNN_VERSION >= 8200\n auto behavior = GetSomeAttrs(\n CUDNN_BEHAVIOR_NOTE_TYPE_COUNT, engine, CUDNN_ATTR_ENGINE_BEHAVIOR_NOTE);\n if (!IsCompatible(behavior, req_behavior, excl_behavior))\n continue;\n#endif // CUDNN_VERSION >= 8200\n plans.push_back(std::move(plan));\n if (max_workspace)\n *max_workspace = std::max(*max_workspace, static_cast(workspace));\n }\n return plans;\n}\n\ncudnnBackendHeurMode_t HeurMode() {\n#if CUDNN_VERSION >= 8100\n auto minor = cudnnGetVersion() / 100 % 10;\n int default_mode = minor < 2 ? CUDNN_HEUR_MODE_INSTANT : CUDNN_HEUR_MODE_B;\n#else\n int default_mode = CUDNN_HEUR_MODE_INSTANT;\n#endif // CUDNN_VERSION >= 8100\n return static_cast(dmlc::GetEnv(\"MXNET_CUDNN_HEUR_MODE\", default_mode));", - "repo_full_name": "apache/mxnet", - "discussion_comments": [ - { - "comment_id": "744069231", - "repo_full_name": "apache/mxnet", - "pr_number": 20635, - "pr_file": "src/operator/cudnn_ops.cc", - "discussion_id": "744069231", - "commented_code": "@@ -0,0 +1,765 @@\n+/*\n+ * Licensed to the Apache Software Foundation (ASF) under one\n+ * or more contributor license agreements. See the NOTICE file\n+ * distributed with this work for additional information\n+ * regarding copyright ownership. The ASF licenses this file\n+ * to you under the Apache License, Version 2.0 (the\n+ * \"License\"); you may not use this file except in compliance\n+ * with the License. You may obtain a copy of the License at\n+ *\n+ * http://www.apache.org/licenses/LICENSE-2.0\n+ *\n+ * Unless required by applicable law or agreed to in writing,\n+ * software distributed under the License is distributed on an\n+ * \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n+ * KIND, either express or implied. See the License for the\n+ * specific language governing permissions and limitations\n+ * under the License.\n+ */\n+\n+/*!\n+ * Copyright (c) 2021 by Contributors\n+ * \\file cudnn_ops.cc\n+ * \\brief cuDNN v8 ops\n+ */\n+\n+#include \"cudnn_ops.h\"\n+\n+#include \n+#if MXNET_USE_CUDNN == 1\n+\n+#include \n+\n+#include \n+#include \n+#include \n+#include \n+#include \n+#include \n+#include \n+#include \n+#include \n+\n+namespace mxnet {\n+namespace op {\n+\n+using cudnn_cxx::Descriptor;\n+using cudnn_cxx::GetAttr;\n+using cudnn_cxx::GetSomeAttrs;\n+using cudnn_cxx::IsCompatible;\n+using cudnn_cxx::MakeAvgSampler;\n+using cudnn_cxx::MakeFinalized;\n+using cudnn_cxx::PackedStrides;\n+using cudnn_cxx::PlanStr;\n+\n+namespace cudnn {\n+\n+cudnnDataType_t CudnnType(mshadow::TypeFlag dtype) {\n+ static std::unordered_map type_map {\n+ {mshadow::kFloat32, CUDNN_DATA_FLOAT}, {mshadow::kFloat64, CUDNN_DATA_DOUBLE},\n+ {mshadow::kFloat16, CUDNN_DATA_HALF}, {mshadow::kUint8, CUDNN_DATA_UINT8},\n+ {mshadow::kInt8, CUDNN_DATA_INT8}, {mshadow::kInt32, CUDNN_DATA_INT32},\n+#if CUDNN_VERSION >= 8100\n+ {mshadow::kInt64, CUDNN_DATA_INT64},\n+#endif // CUDNN_VERSION >= 8100\n+ };\n+ auto it = type_map.find(dtype);\n+ CHECK(it != type_map.end()) << \"Unsupported type: \" << dtype;\n+ return it->second;\n+}\n+\n+std::vector LayoutInfo::Order() const {\n+ std::vector ret(n_space_dims + 2);\n+ std::iota(ret.begin(), ret.end(), 0);\n+ if (channel_last)\n+ std::rotate(ret.begin() + 1, ret.begin() + 2, ret.end());\n+ return ret;\n+}\n+\n+size_t LayoutInfo::ChannelIdx() const {\n+ return channel_last ? 1 + n_space_dims : 1;\n+}\n+\n+std::vector LayoutInfo::Strides(const std::vector& dims) const {\n+ return PackedStrides(Order(), dims);\n+}\n+\n+LayoutInfo GetLayoutInfo(mshadow::LayoutFlag layout) {\n+ static std::unordered_map layout_map{\n+ {mshadow::kNCW, {1, false}},\n+ {mshadow::kNWC, {1, true}},\n+ {mshadow::kNCHW, {2, false}},\n+ {mshadow::kNHWC, {2, true}},\n+ {mshadow::kNCDHW, {3, false}},\n+ {mshadow::kNDHWC, {3, true}},\n+ };\n+ auto it = layout_map.find(layout);\n+ CHECK(it != layout_map.end()) << \"Unsupported layout: \" << layout;\n+ return it->second;\n+}\n+\n+TShape ExpandChannelDims(mshadow::LayoutFlag layout, int c) {\n+ auto li = GetLayoutInfo(layout);\n+ std::vector dims(li.n_space_dims + 2, 1);\n+ dims[li.ChannelIdx()] = c;\n+ return TShape(dims.begin(), dims.end());\n+}\n+\n+std::vector ReverseOrder(const std::vector& o) {\n+ std::vector ret(o.size());\n+ for (size_t i = 0; i < ret.size(); ++i)\n+ ret[o[i]] = i;\n+ return ret;\n+}\n+\n+std::vector RequireNumerics() {\n+ std::vector ret;\n+ return ret;\n+}\n+\n+std::vector ExcludeNumerics() {\n+ std::vector ret;\n+ if (!dmlc::GetEnv(\"MXNET_CUDA_ALLOW_TENSOR_CORE\", true))\n+ ret.push_back(CUDNN_NUMERICAL_NOTE_TENSOR_CORE);\n+ if (!dmlc::GetEnv(\"MXNET_CUDA_TENSOR_OP_MATH_ALLOW_CONVERSION\", false))\n+ ret.push_back(CUDNN_NUMERICAL_NOTE_DOWN_CONVERT_INPUTS);\n+ if (!dmlc::GetEnv(\"MXNET_CUDNN_ALLOW_REDUCED_PRECISION_REDUCTION\", true))\n+ ret.push_back(CUDNN_NUMERICAL_NOTE_REDUCED_PRECISION_REDUCTION);\n+ if (!dmlc::GetEnv(\"MXNET_CUDNN_ALLOW_FFT\", true))\n+ ret.push_back(CUDNN_NUMERICAL_NOTE_FFT);\n+ if (dmlc::GetEnv(\"MXNET_ENFORCE_DETERMINISM\", false))\n+ ret.push_back(CUDNN_NUMERICAL_NOTE_NONDETERMINISTIC);\n+ if (!dmlc::GetEnv(\"MXNET_CUDNN_ALLOW_WINOGRAD\", true))\n+ ret.push_back(CUDNN_NUMERICAL_NOTE_WINOGRAD);\n+ return ret;\n+}\n+\n+Descriptor MakeTensorDesc(int64_t uid,\n+ cudnnDataType_t dtype,\n+ const std::vector& dims,\n+ const std::vector& strides,\n+ bool is_virtual) {\n+ int64_t alignment = 16; // TODO(vcherepanov): ?\n+ return MakeFinalized(CUDNN_BACKEND_TENSOR_DESCRIPTOR,\n+ CUDNN_ATTR_TENSOR_UNIQUE_ID,\n+ uid,\n+ CUDNN_ATTR_TENSOR_DATA_TYPE,\n+ dtype,\n+ CUDNN_ATTR_TENSOR_BYTE_ALIGNMENT,\n+ alignment,\n+ CUDNN_ATTR_TENSOR_DIMENSIONS,\n+ dims,\n+ CUDNN_ATTR_TENSOR_STRIDES,\n+ strides,\n+ CUDNN_ATTR_TENSOR_IS_VIRTUAL,\n+ is_virtual);\n+}\n+\n+Descriptor MakeTensorDesc(int64_t uid,\n+ const TBlob& blob,\n+ const LayoutInfo& li,\n+ bool expand_1d,\n+ bool is_virtual) {\n+ std::vector dims(blob.shape_.ndim());\n+ CHECK_EQ(dims.size(), li.n_space_dims + 2);\n+ auto rev_order = ReverseOrder(li.Order());\n+ for (size_t i = 0; i < dims.size(); ++i)\n+ dims[i] = blob.shape_[rev_order[i]];\n+ auto strides = li.Strides(dims);\n+ if (li.n_space_dims == 1 && expand_1d) {\n+ dims.insert(dims.begin() + 2, 1);\n+ std::vector order(dims.size());\n+ std::iota(order.begin(), order.end(), 0);\n+ if (li.channel_last)\n+ std::rotate(order.begin() + 1, order.begin() + 2, order.end());\n+ strides = PackedStrides(order, dims);\n+ }\n+ return MakeTensorDesc(\n+ uid, CudnnType(static_cast(blob.type_flag_)), dims, strides, is_virtual);\n+}\n+\n+Descriptor MakeCTensorDescExpandDims(int64_t uid,\n+ const TBlob& b,\n+ const LayoutInfo& li,\n+ bool is_virtual) {\n+ std::vector dims(li.n_space_dims + 2, 1);\n+ dims[1] = b.shape_[0];\n+ auto dtype = CudnnType(static_cast(b.type_flag_));\n+ return MakeTensorDesc(uid, dtype, dims, li.Strides(dims), is_virtual);\n+}\n+\n+Descriptor MakeConvDesc(const ConvParam& param, mshadow::TypeFlag dtype) {\n+ int64_t sdims = param.kernel.ndim();\n+ std::vector stride(param.stride.begin(), param.stride.end());\n+ std::vector dilate(param.dilate.begin(), param.dilate.end());\n+ std::vector pad(param.pad.begin(), param.pad.end());\n+\n+ auto comp_type = CudnnType(dtype);\n+ if (comp_type == CUDNN_DATA_HALF)\n+ comp_type = CUDNN_DATA_FLOAT;\n+\n+ if (sdims == 1) {\n+ // TODO(vcherepanov): remove this once cuDNN properly supports 1D convolutions.\n+ // For now, making spacial dims 2D: 1 x W.\n+ ++sdims;\n+ stride.insert(stride.begin(), 1);\n+ dilate.insert(dilate.begin(), 1);\n+ pad.insert(pad.begin(), 0);\n+ }\n+ return MakeFinalized(CUDNN_BACKEND_CONVOLUTION_DESCRIPTOR,\n+ CUDNN_ATTR_CONVOLUTION_SPATIAL_DIMS,\n+ sdims,\n+ CUDNN_ATTR_CONVOLUTION_COMP_TYPE,\n+ comp_type,\n+ CUDNN_ATTR_CONVOLUTION_CONV_MODE,\n+ CUDNN_CROSS_CORRELATION,\n+ CUDNN_ATTR_CONVOLUTION_FILTER_STRIDES,\n+ stride,\n+ CUDNN_ATTR_CONVOLUTION_DILATIONS,\n+ dilate,\n+ CUDNN_ATTR_CONVOLUTION_PRE_PADDINGS,\n+ pad,\n+ CUDNN_ATTR_CONVOLUTION_POST_PADDINGS,\n+ pad);\n+}\n+\n+Descriptor MakeConvFwdOp(const Descriptor& conv,\n+ const Descriptor& x,\n+ const Descriptor& w,\n+ const Descriptor& y,\n+ bool add_to) {\n+ auto ret = Make(CUDNN_BACKEND_OPERATION_CONVOLUTION_FORWARD_DESCRIPTOR,\n+ CUDNN_ATTR_OPERATION_CONVOLUTION_FORWARD_CONV_DESC,\n+ conv,\n+ CUDNN_ATTR_OPERATION_CONVOLUTION_FORWARD_X,\n+ x,\n+ CUDNN_ATTR_OPERATION_CONVOLUTION_FORWARD_W,\n+ w,\n+ CUDNN_ATTR_OPERATION_CONVOLUTION_FORWARD_Y,\n+ y);\n+ if (GetAttr(x, CUDNN_ATTR_TENSOR_DATA_TYPE) == CUDNN_DATA_DOUBLE) {\n+ SetAttrs(ret,\n+ CUDNN_ATTR_OPERATION_CONVOLUTION_FORWARD_ALPHA,\n+ 1.0,\n+ CUDNN_ATTR_OPERATION_CONVOLUTION_FORWARD_BETA,\n+ add_to ? 1.0 : 0.0);\n+ } else {\n+ SetAttrs(ret,\n+ CUDNN_ATTR_OPERATION_CONVOLUTION_FORWARD_ALPHA,\n+ 1.0f,\n+ CUDNN_ATTR_OPERATION_CONVOLUTION_FORWARD_BETA,\n+ add_to ? 1.0f : 0.0f);\n+ }\n+ CUDNN_CALL(cudnnBackendFinalize(ret.get()));\n+ return ret;\n+}\n+\n+Descriptor MakeConvDgradOp(const Descriptor& conv,\n+ const Descriptor& w,\n+ const Descriptor& dy,\n+ const Descriptor& dx,\n+ bool add_to) {\n+ auto ret = Make(CUDNN_BACKEND_OPERATION_CONVOLUTION_BACKWARD_DATA_DESCRIPTOR,\n+ CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_DATA_CONV_DESC,\n+ conv,\n+ CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_DATA_W,\n+ w,\n+ CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_DATA_DY,\n+ dy,\n+ CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_DATA_DX,\n+ dx);\n+ if (GetAttr(w, CUDNN_ATTR_TENSOR_DATA_TYPE) == CUDNN_DATA_DOUBLE) {\n+ SetAttrs(ret,\n+ CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_DATA_ALPHA,\n+ 1.0,\n+ CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_DATA_BETA,\n+ add_to ? 1.0 : 0.0);\n+ } else {\n+ SetAttrs(ret,\n+ CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_DATA_ALPHA,\n+ 1.0f,\n+ CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_DATA_BETA,\n+ add_to ? 1.0f : 0.0f);\n+ }\n+ CUDNN_CALL(cudnnBackendFinalize(ret.get()));\n+ return ret;\n+}\n+\n+Descriptor MakeConvWgradOp(const Descriptor& conv,\n+ const Descriptor& x,\n+ const Descriptor& dy,\n+ const Descriptor& dw,\n+ bool add_to) {\n+ auto ret = Make(CUDNN_BACKEND_OPERATION_CONVOLUTION_BACKWARD_FILTER_DESCRIPTOR,\n+ CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_FILTER_CONV_DESC,\n+ conv,\n+ CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_FILTER_X,\n+ x,\n+ CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_FILTER_DY,\n+ dy,\n+ CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_FILTER_DW,\n+ dw);\n+ if (GetAttr(x, CUDNN_ATTR_TENSOR_DATA_TYPE) == CUDNN_DATA_DOUBLE) {\n+ SetAttrs(ret,\n+ CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_FILTER_ALPHA,\n+ 1.0,\n+ CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_FILTER_BETA,\n+ add_to ? 1.0 : 0.0);\n+ } else {\n+ SetAttrs(ret,\n+ CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_FILTER_ALPHA,\n+ 1.0f,\n+ CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_FILTER_BETA,\n+ add_to ? 1.0f : 0.0f);\n+ }\n+ CUDNN_CALL(cudnnBackendFinalize(ret.get()));\n+ return ret;\n+}\n+\n+Descriptor MakeOpGraph(cudnnHandle_t handle, const std::vector& ops) {\n+ return MakeFinalized(CUDNN_BACKEND_OPERATIONGRAPH_DESCRIPTOR,\n+ CUDNN_ATTR_OPERATIONGRAPH_HANDLE,\n+ handle,\n+ CUDNN_ATTR_OPERATIONGRAPH_OPS,\n+ ops);\n+}\n+\n+ConvParam::ConvParam(const ConvolutionParam& p, bool add_to)\n+ : kernel(p.kernel),\n+ stride(p.stride),\n+ dilate(p.dilate),\n+ pad(p.pad),\n+ num_filter(p.num_filter),\n+ num_group(p.num_group),\n+ workspace(p.workspace),\n+ cudnn_tune(p.cudnn_tune),\n+ layout(p.layout),\n+ add_to(add_to) {}\n+\n+ConvParam::ConvParam(const DeconvolutionParam& p, bool add_to)\n+ : kernel(p.kernel),\n+ stride(p.stride),\n+ dilate(p.dilate),\n+ pad(p.pad),\n+ num_filter(p.num_filter),\n+ num_group(p.num_group),\n+ workspace(p.workspace),\n+ cudnn_tune(p.cudnn_tune),\n+ layout(p.layout),\n+ add_to(add_to) {}\n+\n+void TuneWarnOnce() {\n+ thread_local bool done = false;\n+ if (!done) {\n+ LOG(INFO) << \"Auto-tuning cuDNN op, set MXNET_CUDNN_AUTOTUNE_DEFAULT to 0 to disable\";\n+ done = true;\n+ }\n+}\n+\n+std::vector MakeFallbackPlans(\n+ const std::vector& ixs,\n+ cudnnHandle_t handle,\n+ const Descriptor& op_graph,\n+ size_t workspace_limit,\n+ size_t* max_workspace,\n+ const std::unordered_set& excl_engines,\n+ const std::vector& req_numeric,\n+ const std::vector& excl_numeric\n+#if CUDNN_VERSION >= 8200\n+ ,\n+ const std::vector& req_behavior,\n+ const std::vector& excl_behavior\n+#endif // CUDNN_VERSION >= 8200\n+) {\n+ std::vector plans;\n+ if (max_workspace)\n+ *max_workspace = 0;\n+ for (auto ix : ixs) {\n+ if (excl_engines.count(ix))\n+ continue;\n+ auto engine = Make(CUDNN_BACKEND_ENGINE_DESCRIPTOR,\n+ CUDNN_ATTR_ENGINE_OPERATION_GRAPH,\n+ op_graph,\n+ CUDNN_ATTR_ENGINE_GLOBAL_INDEX,\n+ ix);\n+ auto err = cudnnBackendFinalize(engine.get());\n+ if (err == CUDNN_STATUS_NOT_SUPPORTED || err == CUDNN_STATUS_ARCH_MISMATCH)\n+ continue;\n+ if (err != CUDNN_STATUS_SUCCESS) {\n+ LOG(WARNING) << \"Unexpected cuDNN status: \" << err << \": \" << cudnnGetErrorString(err);\n+ continue;\n+ }\n+ auto cfg =\n+ MakeFinalized(CUDNN_BACKEND_ENGINECFG_DESCRIPTOR, CUDNN_ATTR_ENGINECFG_ENGINE, engine);\n+ auto plan = Make(CUDNN_BACKEND_EXECUTION_PLAN_DESCRIPTOR,\n+ CUDNN_ATTR_EXECUTION_PLAN_HANDLE,\n+ handle,\n+ CUDNN_ATTR_EXECUTION_PLAN_ENGINE_CONFIG,\n+ cfg);\n+ err = cudnnBackendFinalize(plan.get());\n+ if (err == CUDNN_STATUS_NOT_SUPPORTED || err == CUDNN_STATUS_ARCH_MISMATCH)\n+ continue;\n+ if (err != CUDNN_STATUS_SUCCESS) {\n+ LOG(WARNING) << \"Unexpected cuDNN status: \" << err << \": \" << cudnnGetErrorString(err);\n+ continue;\n+ }\n+ auto workspace = GetAttr(plan, CUDNN_ATTR_EXECUTION_PLAN_WORKSPACE_SIZE);\n+ if (workspace > workspace_limit)\n+ continue;\n+ auto numerical = GetSomeAttrs(\n+ CUDNN_NUMERICAL_NOTE_TYPE_COUNT, engine, CUDNN_ATTR_ENGINE_NUMERICAL_NOTE);\n+ if (!IsCompatible(numerical, req_numeric, excl_numeric))\n+ continue;\n+#if CUDNN_VERSION >= 8200\n+ auto behavior = GetSomeAttrs(\n+ CUDNN_BEHAVIOR_NOTE_TYPE_COUNT, engine, CUDNN_ATTR_ENGINE_BEHAVIOR_NOTE);\n+ if (!IsCompatible(behavior, req_behavior, excl_behavior))\n+ continue;\n+#endif // CUDNN_VERSION >= 8200\n+ plans.push_back(std::move(plan));\n+ if (max_workspace)\n+ *max_workspace = std::max(*max_workspace, static_cast(workspace));\n+ }\n+ return plans;\n+}\n+\n+cudnnBackendHeurMode_t HeurMode() {\n+#if CUDNN_VERSION >= 8100\n+ auto minor = cudnnGetVersion() / 100 % 10;\n+ int default_mode = minor < 2 ? CUDNN_HEUR_MODE_INSTANT : CUDNN_HEUR_MODE_B;\n+#else\n+ int default_mode = CUDNN_HEUR_MODE_INSTANT;\n+#endif // CUDNN_VERSION >= 8100\n+ return static_cast(dmlc::GetEnv(\"MXNET_CUDNN_HEUR_MODE\", default_mode));", - "comment_created_at": "2021-11-06T03:30:04+00:00", - "comment_author": "DickJC123", - "comment_body": "Again, please add a description of MXNET_CUDNN_HEUR_MODE in the env_var.md file.\r\n\r\nI'm OK to leave it as an int tied to the cudnn.h definitions. But technically, that ties constants that users will put in training scripts to those definitions, which could change I suppose. An alternative would be for you to parse the env var as a string, e.g. if set via:\r\n```\r\nexport MXNET_CUDNN_HEUR_MODE=\"B\"\r\n```\r\n", - "pr_file_module": null - }, - { - "comment_id": "745831518", - "repo_full_name": "apache/mxnet", - "pr_number": 20635, - "pr_file": "src/operator/cudnn_ops.cc", - "discussion_id": "744069231", - "commented_code": "@@ -0,0 +1,765 @@\n+/*\n+ * Licensed to the Apache Software Foundation (ASF) under one\n+ * or more contributor license agreements. See the NOTICE file\n+ * distributed with this work for additional information\n+ * regarding copyright ownership. The ASF licenses this file\n+ * to you under the Apache License, Version 2.0 (the\n+ * \"License\"); you may not use this file except in compliance\n+ * with the License. You may obtain a copy of the License at\n+ *\n+ * http://www.apache.org/licenses/LICENSE-2.0\n+ *\n+ * Unless required by applicable law or agreed to in writing,\n+ * software distributed under the License is distributed on an\n+ * \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n+ * KIND, either express or implied. See the License for the\n+ * specific language governing permissions and limitations\n+ * under the License.\n+ */\n+\n+/*!\n+ * Copyright (c) 2021 by Contributors\n+ * \\file cudnn_ops.cc\n+ * \\brief cuDNN v8 ops\n+ */\n+\n+#include \"cudnn_ops.h\"\n+\n+#include \n+#if MXNET_USE_CUDNN == 1\n+\n+#include \n+\n+#include \n+#include \n+#include \n+#include \n+#include \n+#include \n+#include \n+#include \n+#include \n+\n+namespace mxnet {\n+namespace op {\n+\n+using cudnn_cxx::Descriptor;\n+using cudnn_cxx::GetAttr;\n+using cudnn_cxx::GetSomeAttrs;\n+using cudnn_cxx::IsCompatible;\n+using cudnn_cxx::MakeAvgSampler;\n+using cudnn_cxx::MakeFinalized;\n+using cudnn_cxx::PackedStrides;\n+using cudnn_cxx::PlanStr;\n+\n+namespace cudnn {\n+\n+cudnnDataType_t CudnnType(mshadow::TypeFlag dtype) {\n+ static std::unordered_map type_map {\n+ {mshadow::kFloat32, CUDNN_DATA_FLOAT}, {mshadow::kFloat64, CUDNN_DATA_DOUBLE},\n+ {mshadow::kFloat16, CUDNN_DATA_HALF}, {mshadow::kUint8, CUDNN_DATA_UINT8},\n+ {mshadow::kInt8, CUDNN_DATA_INT8}, {mshadow::kInt32, CUDNN_DATA_INT32},\n+#if CUDNN_VERSION >= 8100\n+ {mshadow::kInt64, CUDNN_DATA_INT64},\n+#endif // CUDNN_VERSION >= 8100\n+ };\n+ auto it = type_map.find(dtype);\n+ CHECK(it != type_map.end()) << \"Unsupported type: \" << dtype;\n+ return it->second;\n+}\n+\n+std::vector LayoutInfo::Order() const {\n+ std::vector ret(n_space_dims + 2);\n+ std::iota(ret.begin(), ret.end(), 0);\n+ if (channel_last)\n+ std::rotate(ret.begin() + 1, ret.begin() + 2, ret.end());\n+ return ret;\n+}\n+\n+size_t LayoutInfo::ChannelIdx() const {\n+ return channel_last ? 1 + n_space_dims : 1;\n+}\n+\n+std::vector LayoutInfo::Strides(const std::vector& dims) const {\n+ return PackedStrides(Order(), dims);\n+}\n+\n+LayoutInfo GetLayoutInfo(mshadow::LayoutFlag layout) {\n+ static std::unordered_map layout_map{\n+ {mshadow::kNCW, {1, false}},\n+ {mshadow::kNWC, {1, true}},\n+ {mshadow::kNCHW, {2, false}},\n+ {mshadow::kNHWC, {2, true}},\n+ {mshadow::kNCDHW, {3, false}},\n+ {mshadow::kNDHWC, {3, true}},\n+ };\n+ auto it = layout_map.find(layout);\n+ CHECK(it != layout_map.end()) << \"Unsupported layout: \" << layout;\n+ return it->second;\n+}\n+\n+TShape ExpandChannelDims(mshadow::LayoutFlag layout, int c) {\n+ auto li = GetLayoutInfo(layout);\n+ std::vector dims(li.n_space_dims + 2, 1);\n+ dims[li.ChannelIdx()] = c;\n+ return TShape(dims.begin(), dims.end());\n+}\n+\n+std::vector ReverseOrder(const std::vector& o) {\n+ std::vector ret(o.size());\n+ for (size_t i = 0; i < ret.size(); ++i)\n+ ret[o[i]] = i;\n+ return ret;\n+}\n+\n+std::vector RequireNumerics() {\n+ std::vector ret;\n+ return ret;\n+}\n+\n+std::vector ExcludeNumerics() {\n+ std::vector ret;\n+ if (!dmlc::GetEnv(\"MXNET_CUDA_ALLOW_TENSOR_CORE\", true))\n+ ret.push_back(CUDNN_NUMERICAL_NOTE_TENSOR_CORE);\n+ if (!dmlc::GetEnv(\"MXNET_CUDA_TENSOR_OP_MATH_ALLOW_CONVERSION\", false))\n+ ret.push_back(CUDNN_NUMERICAL_NOTE_DOWN_CONVERT_INPUTS);\n+ if (!dmlc::GetEnv(\"MXNET_CUDNN_ALLOW_REDUCED_PRECISION_REDUCTION\", true))\n+ ret.push_back(CUDNN_NUMERICAL_NOTE_REDUCED_PRECISION_REDUCTION);\n+ if (!dmlc::GetEnv(\"MXNET_CUDNN_ALLOW_FFT\", true))\n+ ret.push_back(CUDNN_NUMERICAL_NOTE_FFT);\n+ if (dmlc::GetEnv(\"MXNET_ENFORCE_DETERMINISM\", false))\n+ ret.push_back(CUDNN_NUMERICAL_NOTE_NONDETERMINISTIC);\n+ if (!dmlc::GetEnv(\"MXNET_CUDNN_ALLOW_WINOGRAD\", true))\n+ ret.push_back(CUDNN_NUMERICAL_NOTE_WINOGRAD);\n+ return ret;\n+}\n+\n+Descriptor MakeTensorDesc(int64_t uid,\n+ cudnnDataType_t dtype,\n+ const std::vector& dims,\n+ const std::vector& strides,\n+ bool is_virtual) {\n+ int64_t alignment = 16; // TODO(vcherepanov): ?\n+ return MakeFinalized(CUDNN_BACKEND_TENSOR_DESCRIPTOR,\n+ CUDNN_ATTR_TENSOR_UNIQUE_ID,\n+ uid,\n+ CUDNN_ATTR_TENSOR_DATA_TYPE,\n+ dtype,\n+ CUDNN_ATTR_TENSOR_BYTE_ALIGNMENT,\n+ alignment,\n+ CUDNN_ATTR_TENSOR_DIMENSIONS,\n+ dims,\n+ CUDNN_ATTR_TENSOR_STRIDES,\n+ strides,\n+ CUDNN_ATTR_TENSOR_IS_VIRTUAL,\n+ is_virtual);\n+}\n+\n+Descriptor MakeTensorDesc(int64_t uid,\n+ const TBlob& blob,\n+ const LayoutInfo& li,\n+ bool expand_1d,\n+ bool is_virtual) {\n+ std::vector dims(blob.shape_.ndim());\n+ CHECK_EQ(dims.size(), li.n_space_dims + 2);\n+ auto rev_order = ReverseOrder(li.Order());\n+ for (size_t i = 0; i < dims.size(); ++i)\n+ dims[i] = blob.shape_[rev_order[i]];\n+ auto strides = li.Strides(dims);\n+ if (li.n_space_dims == 1 && expand_1d) {\n+ dims.insert(dims.begin() + 2, 1);\n+ std::vector order(dims.size());\n+ std::iota(order.begin(), order.end(), 0);\n+ if (li.channel_last)\n+ std::rotate(order.begin() + 1, order.begin() + 2, order.end());\n+ strides = PackedStrides(order, dims);\n+ }\n+ return MakeTensorDesc(\n+ uid, CudnnType(static_cast(blob.type_flag_)), dims, strides, is_virtual);\n+}\n+\n+Descriptor MakeCTensorDescExpandDims(int64_t uid,\n+ const TBlob& b,\n+ const LayoutInfo& li,\n+ bool is_virtual) {\n+ std::vector dims(li.n_space_dims + 2, 1);\n+ dims[1] = b.shape_[0];\n+ auto dtype = CudnnType(static_cast(b.type_flag_));\n+ return MakeTensorDesc(uid, dtype, dims, li.Strides(dims), is_virtual);\n+}\n+\n+Descriptor MakeConvDesc(const ConvParam& param, mshadow::TypeFlag dtype) {\n+ int64_t sdims = param.kernel.ndim();\n+ std::vector stride(param.stride.begin(), param.stride.end());\n+ std::vector dilate(param.dilate.begin(), param.dilate.end());\n+ std::vector pad(param.pad.begin(), param.pad.end());\n+\n+ auto comp_type = CudnnType(dtype);\n+ if (comp_type == CUDNN_DATA_HALF)\n+ comp_type = CUDNN_DATA_FLOAT;\n+\n+ if (sdims == 1) {\n+ // TODO(vcherepanov): remove this once cuDNN properly supports 1D convolutions.\n+ // For now, making spacial dims 2D: 1 x W.\n+ ++sdims;\n+ stride.insert(stride.begin(), 1);\n+ dilate.insert(dilate.begin(), 1);\n+ pad.insert(pad.begin(), 0);\n+ }\n+ return MakeFinalized(CUDNN_BACKEND_CONVOLUTION_DESCRIPTOR,\n+ CUDNN_ATTR_CONVOLUTION_SPATIAL_DIMS,\n+ sdims,\n+ CUDNN_ATTR_CONVOLUTION_COMP_TYPE,\n+ comp_type,\n+ CUDNN_ATTR_CONVOLUTION_CONV_MODE,\n+ CUDNN_CROSS_CORRELATION,\n+ CUDNN_ATTR_CONVOLUTION_FILTER_STRIDES,\n+ stride,\n+ CUDNN_ATTR_CONVOLUTION_DILATIONS,\n+ dilate,\n+ CUDNN_ATTR_CONVOLUTION_PRE_PADDINGS,\n+ pad,\n+ CUDNN_ATTR_CONVOLUTION_POST_PADDINGS,\n+ pad);\n+}\n+\n+Descriptor MakeConvFwdOp(const Descriptor& conv,\n+ const Descriptor& x,\n+ const Descriptor& w,\n+ const Descriptor& y,\n+ bool add_to) {\n+ auto ret = Make(CUDNN_BACKEND_OPERATION_CONVOLUTION_FORWARD_DESCRIPTOR,\n+ CUDNN_ATTR_OPERATION_CONVOLUTION_FORWARD_CONV_DESC,\n+ conv,\n+ CUDNN_ATTR_OPERATION_CONVOLUTION_FORWARD_X,\n+ x,\n+ CUDNN_ATTR_OPERATION_CONVOLUTION_FORWARD_W,\n+ w,\n+ CUDNN_ATTR_OPERATION_CONVOLUTION_FORWARD_Y,\n+ y);\n+ if (GetAttr(x, CUDNN_ATTR_TENSOR_DATA_TYPE) == CUDNN_DATA_DOUBLE) {\n+ SetAttrs(ret,\n+ CUDNN_ATTR_OPERATION_CONVOLUTION_FORWARD_ALPHA,\n+ 1.0,\n+ CUDNN_ATTR_OPERATION_CONVOLUTION_FORWARD_BETA,\n+ add_to ? 1.0 : 0.0);\n+ } else {\n+ SetAttrs(ret,\n+ CUDNN_ATTR_OPERATION_CONVOLUTION_FORWARD_ALPHA,\n+ 1.0f,\n+ CUDNN_ATTR_OPERATION_CONVOLUTION_FORWARD_BETA,\n+ add_to ? 1.0f : 0.0f);\n+ }\n+ CUDNN_CALL(cudnnBackendFinalize(ret.get()));\n+ return ret;\n+}\n+\n+Descriptor MakeConvDgradOp(const Descriptor& conv,\n+ const Descriptor& w,\n+ const Descriptor& dy,\n+ const Descriptor& dx,\n+ bool add_to) {\n+ auto ret = Make(CUDNN_BACKEND_OPERATION_CONVOLUTION_BACKWARD_DATA_DESCRIPTOR,\n+ CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_DATA_CONV_DESC,\n+ conv,\n+ CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_DATA_W,\n+ w,\n+ CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_DATA_DY,\n+ dy,\n+ CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_DATA_DX,\n+ dx);\n+ if (GetAttr(w, CUDNN_ATTR_TENSOR_DATA_TYPE) == CUDNN_DATA_DOUBLE) {\n+ SetAttrs(ret,\n+ CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_DATA_ALPHA,\n+ 1.0,\n+ CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_DATA_BETA,\n+ add_to ? 1.0 : 0.0);\n+ } else {\n+ SetAttrs(ret,\n+ CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_DATA_ALPHA,\n+ 1.0f,\n+ CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_DATA_BETA,\n+ add_to ? 1.0f : 0.0f);\n+ }\n+ CUDNN_CALL(cudnnBackendFinalize(ret.get()));\n+ return ret;\n+}\n+\n+Descriptor MakeConvWgradOp(const Descriptor& conv,\n+ const Descriptor& x,\n+ const Descriptor& dy,\n+ const Descriptor& dw,\n+ bool add_to) {\n+ auto ret = Make(CUDNN_BACKEND_OPERATION_CONVOLUTION_BACKWARD_FILTER_DESCRIPTOR,\n+ CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_FILTER_CONV_DESC,\n+ conv,\n+ CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_FILTER_X,\n+ x,\n+ CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_FILTER_DY,\n+ dy,\n+ CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_FILTER_DW,\n+ dw);\n+ if (GetAttr(x, CUDNN_ATTR_TENSOR_DATA_TYPE) == CUDNN_DATA_DOUBLE) {\n+ SetAttrs(ret,\n+ CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_FILTER_ALPHA,\n+ 1.0,\n+ CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_FILTER_BETA,\n+ add_to ? 1.0 : 0.0);\n+ } else {\n+ SetAttrs(ret,\n+ CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_FILTER_ALPHA,\n+ 1.0f,\n+ CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_FILTER_BETA,\n+ add_to ? 1.0f : 0.0f);\n+ }\n+ CUDNN_CALL(cudnnBackendFinalize(ret.get()));\n+ return ret;\n+}\n+\n+Descriptor MakeOpGraph(cudnnHandle_t handle, const std::vector& ops) {\n+ return MakeFinalized(CUDNN_BACKEND_OPERATIONGRAPH_DESCRIPTOR,\n+ CUDNN_ATTR_OPERATIONGRAPH_HANDLE,\n+ handle,\n+ CUDNN_ATTR_OPERATIONGRAPH_OPS,\n+ ops);\n+}\n+\n+ConvParam::ConvParam(const ConvolutionParam& p, bool add_to)\n+ : kernel(p.kernel),\n+ stride(p.stride),\n+ dilate(p.dilate),\n+ pad(p.pad),\n+ num_filter(p.num_filter),\n+ num_group(p.num_group),\n+ workspace(p.workspace),\n+ cudnn_tune(p.cudnn_tune),\n+ layout(p.layout),\n+ add_to(add_to) {}\n+\n+ConvParam::ConvParam(const DeconvolutionParam& p, bool add_to)\n+ : kernel(p.kernel),\n+ stride(p.stride),\n+ dilate(p.dilate),\n+ pad(p.pad),\n+ num_filter(p.num_filter),\n+ num_group(p.num_group),\n+ workspace(p.workspace),\n+ cudnn_tune(p.cudnn_tune),\n+ layout(p.layout),\n+ add_to(add_to) {}\n+\n+void TuneWarnOnce() {\n+ thread_local bool done = false;\n+ if (!done) {\n+ LOG(INFO) << \"Auto-tuning cuDNN op, set MXNET_CUDNN_AUTOTUNE_DEFAULT to 0 to disable\";\n+ done = true;\n+ }\n+}\n+\n+std::vector MakeFallbackPlans(\n+ const std::vector& ixs,\n+ cudnnHandle_t handle,\n+ const Descriptor& op_graph,\n+ size_t workspace_limit,\n+ size_t* max_workspace,\n+ const std::unordered_set& excl_engines,\n+ const std::vector& req_numeric,\n+ const std::vector& excl_numeric\n+#if CUDNN_VERSION >= 8200\n+ ,\n+ const std::vector& req_behavior,\n+ const std::vector& excl_behavior\n+#endif // CUDNN_VERSION >= 8200\n+) {\n+ std::vector plans;\n+ if (max_workspace)\n+ *max_workspace = 0;\n+ for (auto ix : ixs) {\n+ if (excl_engines.count(ix))\n+ continue;\n+ auto engine = Make(CUDNN_BACKEND_ENGINE_DESCRIPTOR,\n+ CUDNN_ATTR_ENGINE_OPERATION_GRAPH,\n+ op_graph,\n+ CUDNN_ATTR_ENGINE_GLOBAL_INDEX,\n+ ix);\n+ auto err = cudnnBackendFinalize(engine.get());\n+ if (err == CUDNN_STATUS_NOT_SUPPORTED || err == CUDNN_STATUS_ARCH_MISMATCH)\n+ continue;\n+ if (err != CUDNN_STATUS_SUCCESS) {\n+ LOG(WARNING) << \"Unexpected cuDNN status: \" << err << \": \" << cudnnGetErrorString(err);\n+ continue;\n+ }\n+ auto cfg =\n+ MakeFinalized(CUDNN_BACKEND_ENGINECFG_DESCRIPTOR, CUDNN_ATTR_ENGINECFG_ENGINE, engine);\n+ auto plan = Make(CUDNN_BACKEND_EXECUTION_PLAN_DESCRIPTOR,\n+ CUDNN_ATTR_EXECUTION_PLAN_HANDLE,\n+ handle,\n+ CUDNN_ATTR_EXECUTION_PLAN_ENGINE_CONFIG,\n+ cfg);\n+ err = cudnnBackendFinalize(plan.get());\n+ if (err == CUDNN_STATUS_NOT_SUPPORTED || err == CUDNN_STATUS_ARCH_MISMATCH)\n+ continue;\n+ if (err != CUDNN_STATUS_SUCCESS) {\n+ LOG(WARNING) << \"Unexpected cuDNN status: \" << err << \": \" << cudnnGetErrorString(err);\n+ continue;\n+ }\n+ auto workspace = GetAttr(plan, CUDNN_ATTR_EXECUTION_PLAN_WORKSPACE_SIZE);\n+ if (workspace > workspace_limit)\n+ continue;\n+ auto numerical = GetSomeAttrs(\n+ CUDNN_NUMERICAL_NOTE_TYPE_COUNT, engine, CUDNN_ATTR_ENGINE_NUMERICAL_NOTE);\n+ if (!IsCompatible(numerical, req_numeric, excl_numeric))\n+ continue;\n+#if CUDNN_VERSION >= 8200\n+ auto behavior = GetSomeAttrs(\n+ CUDNN_BEHAVIOR_NOTE_TYPE_COUNT, engine, CUDNN_ATTR_ENGINE_BEHAVIOR_NOTE);\n+ if (!IsCompatible(behavior, req_behavior, excl_behavior))\n+ continue;\n+#endif // CUDNN_VERSION >= 8200\n+ plans.push_back(std::move(plan));\n+ if (max_workspace)\n+ *max_workspace = std::max(*max_workspace, static_cast(workspace));\n+ }\n+ return plans;\n+}\n+\n+cudnnBackendHeurMode_t HeurMode() {\n+#if CUDNN_VERSION >= 8100\n+ auto minor = cudnnGetVersion() / 100 % 10;\n+ int default_mode = minor < 2 ? CUDNN_HEUR_MODE_INSTANT : CUDNN_HEUR_MODE_B;\n+#else\n+ int default_mode = CUDNN_HEUR_MODE_INSTANT;\n+#endif // CUDNN_VERSION >= 8100\n+ return static_cast(dmlc::GetEnv(\"MXNET_CUDNN_HEUR_MODE\", default_mode));", - "comment_created_at": "2021-11-09T17:09:16+00:00", - "comment_author": "DickJC123", - "comment_body": "The conclusion here is that it's simpler to define this as integers that align with the current definition. If future versions of cudnn*.h invalidate this, we can choose to insert a remapping function at that time if we feel it's important to maintain backward compatibility.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "664135001", - "pr_number": 20331, - "pr_file": "src/engine/threaded_engine.cc", - "created_at": "2021-07-05T21:43:39+00:00", - "commented_code": "OprBlock::Delete(opr_block);\n}\n\nvoid ThreadedEngine::OnStartStatic(Engine *engine, void *opr_block,\n const dmlc::Error* error) {\n // no-op\n}\n\n#if MXNET_USE_CUDA\nstatic inline void AddEventHelper(\n std::unordered_map* events_per_stream,\n const EventInfo& cuda_event) {\n auto event_stream = cuda_event.stream;\n if (events_per_stream->count(event_stream) > 0) {\n if ((*events_per_stream)[event_stream].pool_index < cuda_event.pool_index) {\n (*events_per_stream)[event_stream] = cuda_event;\n }\n } else {\n (*events_per_stream).emplace(event_stream, cuda_event);\n }\n}\n\nvoid ThreadedEngine::OnStartCPU(Engine *engine, void *opr_block,\n const dmlc::Error* error) {\n static bool use_new_dep_engine = dmlc::GetEnv(\"MXNET_ASYNC_GPU_ENGINE\", false);", - "repo_full_name": "apache/mxnet", - "discussion_comments": [ - { - "comment_id": "664135001", - "repo_full_name": "apache/mxnet", - "pr_number": 20331, - "pr_file": "src/engine/threaded_engine.cc", - "discussion_id": "664135001", - "commented_code": "@@ -523,5 +543,206 @@ void ThreadedEngine::OnCompleteStatic(Engine *engine, void *opr_block_,\n OprBlock::Delete(opr_block);\n }\n \n+void ThreadedEngine::OnStartStatic(Engine *engine, void *opr_block,\n+ const dmlc::Error* error) {\n+ // no-op\n+}\n+\n+#if MXNET_USE_CUDA\n+static inline void AddEventHelper(\n+ std::unordered_map* events_per_stream,\n+ const EventInfo& cuda_event) {\n+ auto event_stream = cuda_event.stream;\n+ if (events_per_stream->count(event_stream) > 0) {\n+ if ((*events_per_stream)[event_stream].pool_index < cuda_event.pool_index) {\n+ (*events_per_stream)[event_stream] = cuda_event;\n+ }\n+ } else {\n+ (*events_per_stream).emplace(event_stream, cuda_event);\n+ }\n+}\n+\n+void ThreadedEngine::OnStartCPU(Engine *engine, void *opr_block,\n+ const dmlc::Error* error) {\n+ static bool use_new_dep_engine = dmlc::GetEnv(\"MXNET_ASYNC_GPU_ENGINE\", false);", - "comment_created_at": "2021-07-05T21:43:39+00:00", - "comment_author": "szha", - "comment_body": "We already have `MXNET_ENGINE_TYPE` which decides whether `NaiveEngine` (i.e. synchronous scheduler) is used, so unless there's a strong reason I'd prefer not to introduce new environment variables here.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "670376928", - "pr_number": 20450, - "pr_file": "src/operator/nn/mkldnn/mkldnn_base-inl.h", - "created_at": "2021-07-15T11:29:29+00:00", - "commented_code": "for (size_t i = 0; i < dims.size(); i++) dims[i] = arr.shape()[i];\n auto format = mkldnn::memory::format_tag::any;\n // for batch 256 alexnet benchmark test\n if (dims.size() == 2) {\n const bool brgemm_disabled = dmlc::GetEnv(\"MXNET_DISABLE_ONEDNN_BRGEMM_FC\", true);", - "repo_full_name": "apache/mxnet", - "discussion_comments": [ - { - "comment_id": "670376928", - "repo_full_name": "apache/mxnet", - "pr_number": 20450, - "pr_file": "src/operator/nn/mkldnn/mkldnn_base-inl.h", - "discussion_id": "670376928", - "commented_code": "@@ -312,7 +312,8 @@ inline static mkldnn::memory::desc GetFCWeightDesc(const NDArray &arr, int dtype\n for (size_t i = 0; i < dims.size(); i++) dims[i] = arr.shape()[i];\n auto format = mkldnn::memory::format_tag::any;\n // for batch 256 alexnet benchmark test\n- if (dims.size() == 2) {\n+ const bool brgemm_disabled = dmlc::GetEnv(\"MXNET_DISABLE_ONEDNN_BRGEMM_FC\", true);", - "comment_created_at": "2021-07-15T11:29:29+00:00", - "comment_author": "anko-intel", - "comment_body": "It will be good to add description to docs/static_site/src/pages/api/faq/env_var.md\r\nAlso I am not sure if for 1.x branch the name have to include ONEDNN ?\r\nso maybe MXNET_MKLDNN_DISABLE_BRGEMM_FC", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/mxnet-documentation-clarity-and-formatting.json b/_reviewers/mxnet-documentation-clarity-and-formatting.json new file mode 100644 index 0000000..71302a8 --- /dev/null +++ b/_reviewers/mxnet-documentation-clarity-and-formatting.json @@ -0,0 +1,128 @@ +[ + { + "discussion_id": "245805403", + "pr_number": 13735, + "pr_file": "example/gluon/wavenet/README.md", + "created_at": "2019-01-07T21:34:08+00:00", + "commented_code": "# WaveNet with Gluon\n\nGluon implementation of [WaveNet: A Generative Model for Raw Audio](https://arxiv.org/abs/1609.03499)\n\n![net_structure1](assets/net_struc1.png)\n![net_structure2](assets/net_struc2.png)\n\n## Requirements\n- Python 3.6.1\n- Mxnet 1.2\n- tqdm\n- scipy.io\n\n\n## Usage\n\n- arguments\n - batch_size : Define batch size (defualt=64)\n - epoches : Define total epoches (default=1000)\n - mu : Define mu (default=128)", + "repo_full_name": "apache/mxnet", + "discussion_comments": [ + { + "comment_id": "245805403", + "repo_full_name": "apache/mxnet", + "pr_number": 13735, + "pr_file": "example/gluon/wavenet/README.md", + "discussion_id": "245805403", + "commented_code": "@@ -0,0 +1,53 @@\n+# WaveNet with Gluon\n+\n+Gluon implementation of [WaveNet: A Generative Model for Raw Audio](https://arxiv.org/abs/1609.03499)\n+\n+![net_structure1](assets/net_struc1.png)\n+![net_structure2](assets/net_struc2.png)\n+\n+## Requirements\n+- Python 3.6.1\n+- Mxnet 1.2\n+- tqdm\n+- scipy.io\n+\n+\n+## Usage\n+\n+- arguments\n+ - batch_size : Define batch size (defualt=64)\n+ - epoches : Define total epoches (default=1000)\n+ - mu : Define mu (default=128)", + "comment_created_at": "2019-01-07T21:34:08+00:00", + "comment_author": "vishaalkapoor", + "comment_body": "It would be clearer to give some context on what mu is as one would have to guess it's in the paper, which doesn't refer to the mu-law. E.g. Define \\mu in \\mu-Law (see the paper or https://en.wikipedia.org/wiki/%CE%9C-law_algorithm).", + "pr_file_module": null + }, + { + "comment_id": "246599398", + "repo_full_name": "apache/mxnet", + "pr_number": 13735, + "pr_file": "example/gluon/wavenet/README.md", + "discussion_id": "245805403", + "commented_code": "@@ -0,0 +1,53 @@\n+# WaveNet with Gluon\n+\n+Gluon implementation of [WaveNet: A Generative Model for Raw Audio](https://arxiv.org/abs/1609.03499)\n+\n+![net_structure1](assets/net_struc1.png)\n+![net_structure2](assets/net_struc2.png)\n+\n+## Requirements\n+- Python 3.6.1\n+- Mxnet 1.2\n+- tqdm\n+- scipy.io\n+\n+\n+## Usage\n+\n+- arguments\n+ - batch_size : Define batch size (defualt=64)\n+ - epoches : Define total epoches (default=1000)\n+ - mu : Define mu (default=128)", + "comment_created_at": "2019-01-10T00:23:21+00:00", + "comment_author": "seujung", + "comment_body": "Corrected by applying the contents. And add the link for the mu-law algorithm.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "916649711", + "pr_number": 21090, + "pr_file": "docs/python_docs/python/tutorials/getting-started/gluon_migration_guide.md", + "created_at": "2022-07-08T09:40:47+00:00", + "commented_code": "- **Gluon Metrics** and **Optimizers**: refactored with MXNet numpy interface and addressed legacy issues.\n\nAdopting these new functionalities may or may not require modifications on your models. But don't worry, this migration guide will go through a high-level mapping from old functionality to new APIs and make Gluon2.0 migration a hassle-free experience. \nAdopting these new functionalities may or may not require modifications on your models. But don't worry, this migration guide will go through a high-level mapping from old functionality to new APIs and make Gluon2.0 migration a hassle-free experience.", + "repo_full_name": "apache/mxnet", + "discussion_comments": [ + { + "comment_id": "916649711", + "repo_full_name": "apache/mxnet", + "pr_number": 21090, + "pr_file": "docs/python_docs/python/tutorials/getting-started/gluon_migration_guide.md", + "discussion_id": "916649711", + "commented_code": "@@ -33,41 +33,41 @@ In Gluon2.0, we extend the support to MXNet numpy and numpy extension with simpl\n \n - **Gluon Metrics** and **Optimizers**: refactored with MXNet numpy interface and addressed legacy issues.\n \n-Adopting these new functionalities may or may not require modifications on your models. But don't worry, this migration guide will go through a high-level mapping from old functionality to new APIs and make Gluon2.0 migration a hassle-free experience. \n+Adopting these new functionalities may or may not require modifications on your models. But don't worry, this migration guide will go through a high-level mapping from old functionality to new APIs and make Gluon2.0 migration a hassle-free experience.", + "comment_created_at": "2022-07-08T09:40:47+00:00", + "comment_author": "anko-intel", + "comment_body": "Please check generated documentation. Space or especially 2 spaces are sometimes meaningfull. Without it there will be no new line.", + "pr_file_module": null + }, + { + "comment_id": "916685610", + "repo_full_name": "apache/mxnet", + "pr_number": 21090, + "pr_file": "docs/python_docs/python/tutorials/getting-started/gluon_migration_guide.md", + "discussion_id": "916649711", + "commented_code": "@@ -33,41 +33,41 @@ In Gluon2.0, we extend the support to MXNet numpy and numpy extension with simpl\n \n - **Gluon Metrics** and **Optimizers**: refactored with MXNet numpy interface and addressed legacy issues.\n \n-Adopting these new functionalities may or may not require modifications on your models. But don't worry, this migration guide will go through a high-level mapping from old functionality to new APIs and make Gluon2.0 migration a hassle-free experience. \n+Adopting these new functionalities may or may not require modifications on your models. But don't worry, this migration guide will go through a high-level mapping from old functionality to new APIs and make Gluon2.0 migration a hassle-free experience.", + "comment_created_at": "2022-07-08T10:27:25+00:00", + "comment_author": "bartekkuncer", + "comment_body": "I checked it already. Here are the screenshots:\r\n![image](https://user-images.githubusercontent.com/59650839/177974295-ebe5b812-3815-4c6e-bf4c-c875ae5224f4.png)\r\n![image](https://user-images.githubusercontent.com/59650839/177974315-7d5e9da6-0b82-4e70-acd9-07e8679ef4c9.png)\r\n![image](https://user-images.githubusercontent.com/59650839/177974335-56a30cd7-ab35-4ba1-839b-719efa6b27e2.png)\r\n![image](https://user-images.githubusercontent.com/59650839/177974345-c174bdda-bc90-41a8-9e4d-b293e06b4ea3.png)\r\n![image](https://user-images.githubusercontent.com/59650839/177974369-4d397961-05ca-4f16-b1be-88cd9ab2c0a4.png)\r\n![image](https://user-images.githubusercontent.com/59650839/177974379-b42b4cb3-e31c-4103-afcd-5b1854b3387a.png)\r\n\r\nI thought that adding a new line in text is enough to have a new line present in generated document. Could you point me to the markdown language documentation regarding spaces creating/necessary to create new line? This seems quite bizarre to me and would like to understand the logic behind it. ", + "pr_file_module": null + }, + { + "comment_id": "916726087", + "repo_full_name": "apache/mxnet", + "pr_number": 21090, + "pr_file": "docs/python_docs/python/tutorials/getting-started/gluon_migration_guide.md", + "discussion_id": "916649711", + "commented_code": "@@ -33,41 +33,41 @@ In Gluon2.0, we extend the support to MXNet numpy and numpy extension with simpl\n \n - **Gluon Metrics** and **Optimizers**: refactored with MXNet numpy interface and addressed legacy issues.\n \n-Adopting these new functionalities may or may not require modifications on your models. But don't worry, this migration guide will go through a high-level mapping from old functionality to new APIs and make Gluon2.0 migration a hassle-free experience. \n+Adopting these new functionalities may or may not require modifications on your models. But don't worry, this migration guide will go through a high-level mapping from old functionality to new APIs and make Gluon2.0 migration a hassle-free experience.", + "comment_created_at": "2022-07-08T11:24:42+00:00", + "comment_author": "anko-intel", + "comment_body": "Thank you for the screenshot. I've just observed such behaviour, when fixing documentation. Now I find some tips about it in the documentation: https://www.markdownguide.org/basic-syntax#line-break-best-practices\r\nYou can see such behaviou if you generate this part documentation: https://github.com/apache/incubator-mxnet/pull/21060/files#diff-f16304c2f893c2e7d51d74dff3892e5e11b39b8043751ade78b0286bc49afbd2L225-L227\r\nfor the older version, where such double spaces where probably accidentally missing.", + "pr_file_module": null + }, + { + "comment_id": "916733346", + "repo_full_name": "apache/mxnet", + "pr_number": 21090, + "pr_file": "docs/python_docs/python/tutorials/getting-started/gluon_migration_guide.md", + "discussion_id": "916649711", + "commented_code": "@@ -33,41 +33,41 @@ In Gluon2.0, we extend the support to MXNet numpy and numpy extension with simpl\n \n - **Gluon Metrics** and **Optimizers**: refactored with MXNet numpy interface and addressed legacy issues.\n \n-Adopting these new functionalities may or may not require modifications on your models. But don't worry, this migration guide will go through a high-level mapping from old functionality to new APIs and make Gluon2.0 migration a hassle-free experience. \n+Adopting these new functionalities may or may not require modifications on your models. But don't worry, this migration guide will go through a high-level mapping from old functionality to new APIs and make Gluon2.0 migration a hassle-free experience.", + "comment_created_at": "2022-07-08T11:36:20+00:00", + "comment_author": "bartekkuncer", + "comment_body": "Thanks for the links!", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "646146854", + "pr_number": 20262, + "pr_file": "docs/python_docs/python/tutorials/packages/gluon/blocks/custom-layer.md", + "created_at": "2021-06-06T15:15:02+00:00", + "commented_code": "[-0.05046433]\n [-1.2375476 ]\n [-0.15506986]]\n\n```\n\n\n## Parameters of a custom layer\n\nUsually, a layer has a set of associated parameters, sometimes also referred as weights. This is an internal state of a layer. Most often, these parameters are the ones, that we want to learn during backpropogation step, but sometimes these parameters might be just constants we want to use during forward pass.\n\nAll parameters of a block are stored and accessed via [ParameterDict](https://github.com/apache/incubator-mxnet/blob/master/python/mxnet/gluon/parameter.py#L508) class. This class helps with initialization, updating, saving and loading of the parameters. Each layer can have multiple set of parameters, and all of them can be stored in a single instance of the `ParameterDict` class. On a block level, the instance of the `ParameterDict` class is accessible via `self.params` field, and outside of a block one can access all parameters of the network via [collect_params()](https://mxnet.apache.org/api/python/gluon/gluon.html#mxnet.gluon.Block.collect_params) method called on a `container`. `ParameterDict` uses [Parameter](https://mxnet.apache.org/api/python/gluon/gluon.html#mxnet.gluon.Parameter) class to represent parameters inside of Apache MxNet neural network. If parameter doesn't exist, trying to get a parameter via `self.params` will create it automatically.\nAll parameters of a block are stored and accessed via [ParameterDict](https://github.com/apache/incubator-mxnet/blob/master/python/mxnet/gluon/parameter.py#L508) class. This class helps with initialization, updating, saving and loading of the parameters. Each layer can have multiple set of parameters, and all of them can be stored in a single instance of the `ParameterDict` class. On a block level, the instance of the `ParameterDict` class is accessible via `self.params` field, and outside of a block one can access all parameters of the network via [collect_params()](https://mxnet.apache.org/api/python/gluon/gluon.html#mxnet.gluon.Block.collect_params) method called on a `container`. `ParameterDict` uses [Parameter](https://mxnet.apache.org/api/python/gluon/gluon.html#mxnet.gluon.Parameter) class to represent parameters inside of Apache MxNet neural network.", + "repo_full_name": "apache/mxnet", + "discussion_comments": [ + { + "comment_id": "646146854", + "repo_full_name": "apache/mxnet", + "pr_number": 20262, + "pr_file": "docs/python_docs/python/tutorials/packages/gluon/blocks/custom-layer.md", + "discussion_id": "646146854", + "commented_code": "@@ -131,50 +128,47 @@ Output:\n [-0.05046433]\n [-1.2375476 ]\n [-0.15506986]]\n-\n ```\n \n \n ## Parameters of a custom layer\n \n Usually, a layer has a set of associated parameters, sometimes also referred as weights. This is an internal state of a layer. Most often, these parameters are the ones, that we want to learn during backpropogation step, but sometimes these parameters might be just constants we want to use during forward pass.\n \n-All parameters of a block are stored and accessed via [ParameterDict](https://github.com/apache/incubator-mxnet/blob/master/python/mxnet/gluon/parameter.py#L508) class. This class helps with initialization, updating, saving and loading of the parameters. Each layer can have multiple set of parameters, and all of them can be stored in a single instance of the `ParameterDict` class. On a block level, the instance of the `ParameterDict` class is accessible via `self.params` field, and outside of a block one can access all parameters of the network via [collect_params()](https://mxnet.apache.org/api/python/gluon/gluon.html#mxnet.gluon.Block.collect_params) method called on a `container`. `ParameterDict` uses [Parameter](https://mxnet.apache.org/api/python/gluon/gluon.html#mxnet.gluon.Parameter) class to represent parameters inside of Apache MxNet neural network. If parameter doesn't exist, trying to get a parameter via `self.params` will create it automatically.\n+All parameters of a block are stored and accessed via [ParameterDict](https://github.com/apache/incubator-mxnet/blob/master/python/mxnet/gluon/parameter.py#L508) class. This class helps with initialization, updating, saving and loading of the parameters. Each layer can have multiple set of parameters, and all of them can be stored in a single instance of the `ParameterDict` class. On a block level, the instance of the `ParameterDict` class is accessible via `self.params` field, and outside of a block one can access all parameters of the network via [collect_params()](https://mxnet.apache.org/api/python/gluon/gluon.html#mxnet.gluon.Block.collect_params) method called on a `container`. `ParameterDict` uses [Parameter](https://mxnet.apache.org/api/python/gluon/gluon.html#mxnet.gluon.Parameter) class to represent parameters inside of Apache MxNet neural network.", + "comment_created_at": "2021-06-06T15:15:02+00:00", + "comment_author": "szha", + "comment_body": "We don't have a custom class for the dictionary of parameters. `collect_params()` now returns a regular dictionary. @leezu anything you want to highlight here?", + "pr_file_module": null + }, + { + "comment_id": "647241837", + "repo_full_name": "apache/mxnet", + "pr_number": 20262, + "pr_file": "docs/python_docs/python/tutorials/packages/gluon/blocks/custom-layer.md", + "discussion_id": "646146854", + "commented_code": "@@ -131,50 +128,47 @@ Output:\n [-0.05046433]\n [-1.2375476 ]\n [-0.15506986]]\n-\n ```\n \n \n ## Parameters of a custom layer\n \n Usually, a layer has a set of associated parameters, sometimes also referred as weights. This is an internal state of a layer. Most often, these parameters are the ones, that we want to learn during backpropogation step, but sometimes these parameters might be just constants we want to use during forward pass.\n \n-All parameters of a block are stored and accessed via [ParameterDict](https://github.com/apache/incubator-mxnet/blob/master/python/mxnet/gluon/parameter.py#L508) class. This class helps with initialization, updating, saving and loading of the parameters. Each layer can have multiple set of parameters, and all of them can be stored in a single instance of the `ParameterDict` class. On a block level, the instance of the `ParameterDict` class is accessible via `self.params` field, and outside of a block one can access all parameters of the network via [collect_params()](https://mxnet.apache.org/api/python/gluon/gluon.html#mxnet.gluon.Block.collect_params) method called on a `container`. `ParameterDict` uses [Parameter](https://mxnet.apache.org/api/python/gluon/gluon.html#mxnet.gluon.Parameter) class to represent parameters inside of Apache MxNet neural network. If parameter doesn't exist, trying to get a parameter via `self.params` will create it automatically.\n+All parameters of a block are stored and accessed via [ParameterDict](https://github.com/apache/incubator-mxnet/blob/master/python/mxnet/gluon/parameter.py#L508) class. This class helps with initialization, updating, saving and loading of the parameters. Each layer can have multiple set of parameters, and all of them can be stored in a single instance of the `ParameterDict` class. On a block level, the instance of the `ParameterDict` class is accessible via `self.params` field, and outside of a block one can access all parameters of the network via [collect_params()](https://mxnet.apache.org/api/python/gluon/gluon.html#mxnet.gluon.Block.collect_params) method called on a `container`. `ParameterDict` uses [Parameter](https://mxnet.apache.org/api/python/gluon/gluon.html#mxnet.gluon.Parameter) class to represent parameters inside of Apache MxNet neural network.", + "comment_created_at": "2021-06-08T08:43:58+00:00", + "comment_author": "leezu", + "comment_body": "Good catch. Let's remove the reference to ParameterDict and call it a `dict` of Parameters. It's also an option to use the https://docs.python.org/3/library/typing.html syntax to describe the type.", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/mxnet-documentation-clarity-and-formatting.md b/_reviewers/mxnet-documentation-clarity-and-formatting.md index b2c46e4..2344007 100644 --- a/_reviewers/mxnet-documentation-clarity-and-formatting.md +++ b/_reviewers/mxnet-documentation-clarity-and-formatting.md @@ -32,133 +32,3 @@ When writing documentation (README files, tutorials, API docs), ensure clarity a 4. **Keep technical references current** - When documenting code structures, ensure descriptions match the actual implementation. Update documentation when code interfaces change. Following these practices ensures documentation is both technically accurate and genuinely helpful to other developers. - - -[ - { - "discussion_id": "245805403", - "pr_number": 13735, - "pr_file": "example/gluon/wavenet/README.md", - "created_at": "2019-01-07T21:34:08+00:00", - "commented_code": "# WaveNet with Gluon\n\nGluon implementation of [WaveNet: A Generative Model for Raw Audio](https://arxiv.org/abs/1609.03499)\n\n![net_structure1](assets/net_struc1.png)\n![net_structure2](assets/net_struc2.png)\n\n## Requirements\n- Python 3.6.1\n- Mxnet 1.2\n- tqdm\n- scipy.io\n\n\n## Usage\n\n- arguments\n - batch_size : Define batch size (defualt=64)\n - epoches : Define total epoches (default=1000)\n - mu : Define mu (default=128)", - "repo_full_name": "apache/mxnet", - "discussion_comments": [ - { - "comment_id": "245805403", - "repo_full_name": "apache/mxnet", - "pr_number": 13735, - "pr_file": "example/gluon/wavenet/README.md", - "discussion_id": "245805403", - "commented_code": "@@ -0,0 +1,53 @@\n+# WaveNet with Gluon\n+\n+Gluon implementation of [WaveNet: A Generative Model for Raw Audio](https://arxiv.org/abs/1609.03499)\n+\n+![net_structure1](assets/net_struc1.png)\n+![net_structure2](assets/net_struc2.png)\n+\n+## Requirements\n+- Python 3.6.1\n+- Mxnet 1.2\n+- tqdm\n+- scipy.io\n+\n+\n+## Usage\n+\n+- arguments\n+ - batch_size : Define batch size (defualt=64)\n+ - epoches : Define total epoches (default=1000)\n+ - mu : Define mu (default=128)", - "comment_created_at": "2019-01-07T21:34:08+00:00", - "comment_author": "vishaalkapoor", - "comment_body": "It would be clearer to give some context on what mu is as one would have to guess it's in the paper, which doesn't refer to the mu-law. E.g. Define \\mu in \\mu-Law (see the paper or https://en.wikipedia.org/wiki/%CE%9C-law_algorithm).", - "pr_file_module": null - }, - { - "comment_id": "246599398", - "repo_full_name": "apache/mxnet", - "pr_number": 13735, - "pr_file": "example/gluon/wavenet/README.md", - "discussion_id": "245805403", - "commented_code": "@@ -0,0 +1,53 @@\n+# WaveNet with Gluon\n+\n+Gluon implementation of [WaveNet: A Generative Model for Raw Audio](https://arxiv.org/abs/1609.03499)\n+\n+![net_structure1](assets/net_struc1.png)\n+![net_structure2](assets/net_struc2.png)\n+\n+## Requirements\n+- Python 3.6.1\n+- Mxnet 1.2\n+- tqdm\n+- scipy.io\n+\n+\n+## Usage\n+\n+- arguments\n+ - batch_size : Define batch size (defualt=64)\n+ - epoches : Define total epoches (default=1000)\n+ - mu : Define mu (default=128)", - "comment_created_at": "2019-01-10T00:23:21+00:00", - "comment_author": "seujung", - "comment_body": "Corrected by applying the contents. And add the link for the mu-law algorithm.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "916649711", - "pr_number": 21090, - "pr_file": "docs/python_docs/python/tutorials/getting-started/gluon_migration_guide.md", - "created_at": "2022-07-08T09:40:47+00:00", - "commented_code": "- **Gluon Metrics** and **Optimizers**: refactored with MXNet numpy interface and addressed legacy issues.\n\nAdopting these new functionalities may or may not require modifications on your models. But don't worry, this migration guide will go through a high-level mapping from old functionality to new APIs and make Gluon2.0 migration a hassle-free experience. \nAdopting these new functionalities may or may not require modifications on your models. But don't worry, this migration guide will go through a high-level mapping from old functionality to new APIs and make Gluon2.0 migration a hassle-free experience.", - "repo_full_name": "apache/mxnet", - "discussion_comments": [ - { - "comment_id": "916649711", - "repo_full_name": "apache/mxnet", - "pr_number": 21090, - "pr_file": "docs/python_docs/python/tutorials/getting-started/gluon_migration_guide.md", - "discussion_id": "916649711", - "commented_code": "@@ -33,41 +33,41 @@ In Gluon2.0, we extend the support to MXNet numpy and numpy extension with simpl\n \n - **Gluon Metrics** and **Optimizers**: refactored with MXNet numpy interface and addressed legacy issues.\n \n-Adopting these new functionalities may or may not require modifications on your models. But don't worry, this migration guide will go through a high-level mapping from old functionality to new APIs and make Gluon2.0 migration a hassle-free experience. \n+Adopting these new functionalities may or may not require modifications on your models. But don't worry, this migration guide will go through a high-level mapping from old functionality to new APIs and make Gluon2.0 migration a hassle-free experience.", - "comment_created_at": "2022-07-08T09:40:47+00:00", - "comment_author": "anko-intel", - "comment_body": "Please check generated documentation. Space or especially 2 spaces are sometimes meaningfull. Without it there will be no new line.", - "pr_file_module": null - }, - { - "comment_id": "916685610", - "repo_full_name": "apache/mxnet", - "pr_number": 21090, - "pr_file": "docs/python_docs/python/tutorials/getting-started/gluon_migration_guide.md", - "discussion_id": "916649711", - "commented_code": "@@ -33,41 +33,41 @@ In Gluon2.0, we extend the support to MXNet numpy and numpy extension with simpl\n \n - **Gluon Metrics** and **Optimizers**: refactored with MXNet numpy interface and addressed legacy issues.\n \n-Adopting these new functionalities may or may not require modifications on your models. But don't worry, this migration guide will go through a high-level mapping from old functionality to new APIs and make Gluon2.0 migration a hassle-free experience. \n+Adopting these new functionalities may or may not require modifications on your models. But don't worry, this migration guide will go through a high-level mapping from old functionality to new APIs and make Gluon2.0 migration a hassle-free experience.", - "comment_created_at": "2022-07-08T10:27:25+00:00", - "comment_author": "bartekkuncer", - "comment_body": "I checked it already. Here are the screenshots:\r\n![image](https://user-images.githubusercontent.com/59650839/177974295-ebe5b812-3815-4c6e-bf4c-c875ae5224f4.png)\r\n![image](https://user-images.githubusercontent.com/59650839/177974315-7d5e9da6-0b82-4e70-acd9-07e8679ef4c9.png)\r\n![image](https://user-images.githubusercontent.com/59650839/177974335-56a30cd7-ab35-4ba1-839b-719efa6b27e2.png)\r\n![image](https://user-images.githubusercontent.com/59650839/177974345-c174bdda-bc90-41a8-9e4d-b293e06b4ea3.png)\r\n![image](https://user-images.githubusercontent.com/59650839/177974369-4d397961-05ca-4f16-b1be-88cd9ab2c0a4.png)\r\n![image](https://user-images.githubusercontent.com/59650839/177974379-b42b4cb3-e31c-4103-afcd-5b1854b3387a.png)\r\n\r\nI thought that adding a new line in text is enough to have a new line present in generated document. Could you point me to the markdown language documentation regarding spaces creating/necessary to create new line? This seems quite bizarre to me and would like to understand the logic behind it. ", - "pr_file_module": null - }, - { - "comment_id": "916726087", - "repo_full_name": "apache/mxnet", - "pr_number": 21090, - "pr_file": "docs/python_docs/python/tutorials/getting-started/gluon_migration_guide.md", - "discussion_id": "916649711", - "commented_code": "@@ -33,41 +33,41 @@ In Gluon2.0, we extend the support to MXNet numpy and numpy extension with simpl\n \n - **Gluon Metrics** and **Optimizers**: refactored with MXNet numpy interface and addressed legacy issues.\n \n-Adopting these new functionalities may or may not require modifications on your models. But don't worry, this migration guide will go through a high-level mapping from old functionality to new APIs and make Gluon2.0 migration a hassle-free experience. \n+Adopting these new functionalities may or may not require modifications on your models. But don't worry, this migration guide will go through a high-level mapping from old functionality to new APIs and make Gluon2.0 migration a hassle-free experience.", - "comment_created_at": "2022-07-08T11:24:42+00:00", - "comment_author": "anko-intel", - "comment_body": "Thank you for the screenshot. I've just observed such behaviour, when fixing documentation. Now I find some tips about it in the documentation: https://www.markdownguide.org/basic-syntax#line-break-best-practices\r\nYou can see such behaviou if you generate this part documentation: https://github.com/apache/incubator-mxnet/pull/21060/files#diff-f16304c2f893c2e7d51d74dff3892e5e11b39b8043751ade78b0286bc49afbd2L225-L227\r\nfor the older version, where such double spaces where probably accidentally missing.", - "pr_file_module": null - }, - { - "comment_id": "916733346", - "repo_full_name": "apache/mxnet", - "pr_number": 21090, - "pr_file": "docs/python_docs/python/tutorials/getting-started/gluon_migration_guide.md", - "discussion_id": "916649711", - "commented_code": "@@ -33,41 +33,41 @@ In Gluon2.0, we extend the support to MXNet numpy and numpy extension with simpl\n \n - **Gluon Metrics** and **Optimizers**: refactored with MXNet numpy interface and addressed legacy issues.\n \n-Adopting these new functionalities may or may not require modifications on your models. But don't worry, this migration guide will go through a high-level mapping from old functionality to new APIs and make Gluon2.0 migration a hassle-free experience. \n+Adopting these new functionalities may or may not require modifications on your models. But don't worry, this migration guide will go through a high-level mapping from old functionality to new APIs and make Gluon2.0 migration a hassle-free experience.", - "comment_created_at": "2022-07-08T11:36:20+00:00", - "comment_author": "bartekkuncer", - "comment_body": "Thanks for the links!", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "646146854", - "pr_number": 20262, - "pr_file": "docs/python_docs/python/tutorials/packages/gluon/blocks/custom-layer.md", - "created_at": "2021-06-06T15:15:02+00:00", - "commented_code": "[-0.05046433]\n [-1.2375476 ]\n [-0.15506986]]\n\n```\n\n\n## Parameters of a custom layer\n\nUsually, a layer has a set of associated parameters, sometimes also referred as weights. This is an internal state of a layer. Most often, these parameters are the ones, that we want to learn during backpropogation step, but sometimes these parameters might be just constants we want to use during forward pass.\n\nAll parameters of a block are stored and accessed via [ParameterDict](https://github.com/apache/incubator-mxnet/blob/master/python/mxnet/gluon/parameter.py#L508) class. This class helps with initialization, updating, saving and loading of the parameters. Each layer can have multiple set of parameters, and all of them can be stored in a single instance of the `ParameterDict` class. On a block level, the instance of the `ParameterDict` class is accessible via `self.params` field, and outside of a block one can access all parameters of the network via [collect_params()](https://mxnet.apache.org/api/python/gluon/gluon.html#mxnet.gluon.Block.collect_params) method called on a `container`. `ParameterDict` uses [Parameter](https://mxnet.apache.org/api/python/gluon/gluon.html#mxnet.gluon.Parameter) class to represent parameters inside of Apache MxNet neural network. If parameter doesn't exist, trying to get a parameter via `self.params` will create it automatically.\nAll parameters of a block are stored and accessed via [ParameterDict](https://github.com/apache/incubator-mxnet/blob/master/python/mxnet/gluon/parameter.py#L508) class. This class helps with initialization, updating, saving and loading of the parameters. Each layer can have multiple set of parameters, and all of them can be stored in a single instance of the `ParameterDict` class. On a block level, the instance of the `ParameterDict` class is accessible via `self.params` field, and outside of a block one can access all parameters of the network via [collect_params()](https://mxnet.apache.org/api/python/gluon/gluon.html#mxnet.gluon.Block.collect_params) method called on a `container`. `ParameterDict` uses [Parameter](https://mxnet.apache.org/api/python/gluon/gluon.html#mxnet.gluon.Parameter) class to represent parameters inside of Apache MxNet neural network.", - "repo_full_name": "apache/mxnet", - "discussion_comments": [ - { - "comment_id": "646146854", - "repo_full_name": "apache/mxnet", - "pr_number": 20262, - "pr_file": "docs/python_docs/python/tutorials/packages/gluon/blocks/custom-layer.md", - "discussion_id": "646146854", - "commented_code": "@@ -131,50 +128,47 @@ Output:\n [-0.05046433]\n [-1.2375476 ]\n [-0.15506986]]\n-\n ```\n \n \n ## Parameters of a custom layer\n \n Usually, a layer has a set of associated parameters, sometimes also referred as weights. This is an internal state of a layer. Most often, these parameters are the ones, that we want to learn during backpropogation step, but sometimes these parameters might be just constants we want to use during forward pass.\n \n-All parameters of a block are stored and accessed via [ParameterDict](https://github.com/apache/incubator-mxnet/blob/master/python/mxnet/gluon/parameter.py#L508) class. This class helps with initialization, updating, saving and loading of the parameters. Each layer can have multiple set of parameters, and all of them can be stored in a single instance of the `ParameterDict` class. On a block level, the instance of the `ParameterDict` class is accessible via `self.params` field, and outside of a block one can access all parameters of the network via [collect_params()](https://mxnet.apache.org/api/python/gluon/gluon.html#mxnet.gluon.Block.collect_params) method called on a `container`. `ParameterDict` uses [Parameter](https://mxnet.apache.org/api/python/gluon/gluon.html#mxnet.gluon.Parameter) class to represent parameters inside of Apache MxNet neural network. If parameter doesn't exist, trying to get a parameter via `self.params` will create it automatically.\n+All parameters of a block are stored and accessed via [ParameterDict](https://github.com/apache/incubator-mxnet/blob/master/python/mxnet/gluon/parameter.py#L508) class. This class helps with initialization, updating, saving and loading of the parameters. Each layer can have multiple set of parameters, and all of them can be stored in a single instance of the `ParameterDict` class. On a block level, the instance of the `ParameterDict` class is accessible via `self.params` field, and outside of a block one can access all parameters of the network via [collect_params()](https://mxnet.apache.org/api/python/gluon/gluon.html#mxnet.gluon.Block.collect_params) method called on a `container`. `ParameterDict` uses [Parameter](https://mxnet.apache.org/api/python/gluon/gluon.html#mxnet.gluon.Parameter) class to represent parameters inside of Apache MxNet neural network.", - "comment_created_at": "2021-06-06T15:15:02+00:00", - "comment_author": "szha", - "comment_body": "We don't have a custom class for the dictionary of parameters. `collect_params()` now returns a regular dictionary. @leezu anything you want to highlight here?", - "pr_file_module": null - }, - { - "comment_id": "647241837", - "repo_full_name": "apache/mxnet", - "pr_number": 20262, - "pr_file": "docs/python_docs/python/tutorials/packages/gluon/blocks/custom-layer.md", - "discussion_id": "646146854", - "commented_code": "@@ -131,50 +128,47 @@ Output:\n [-0.05046433]\n [-1.2375476 ]\n [-0.15506986]]\n-\n ```\n \n \n ## Parameters of a custom layer\n \n Usually, a layer has a set of associated parameters, sometimes also referred as weights. This is an internal state of a layer. Most often, these parameters are the ones, that we want to learn during backpropogation step, but sometimes these parameters might be just constants we want to use during forward pass.\n \n-All parameters of a block are stored and accessed via [ParameterDict](https://github.com/apache/incubator-mxnet/blob/master/python/mxnet/gluon/parameter.py#L508) class. This class helps with initialization, updating, saving and loading of the parameters. Each layer can have multiple set of parameters, and all of them can be stored in a single instance of the `ParameterDict` class. On a block level, the instance of the `ParameterDict` class is accessible via `self.params` field, and outside of a block one can access all parameters of the network via [collect_params()](https://mxnet.apache.org/api/python/gluon/gluon.html#mxnet.gluon.Block.collect_params) method called on a `container`. `ParameterDict` uses [Parameter](https://mxnet.apache.org/api/python/gluon/gluon.html#mxnet.gluon.Parameter) class to represent parameters inside of Apache MxNet neural network. If parameter doesn't exist, trying to get a parameter via `self.params` will create it automatically.\n+All parameters of a block are stored and accessed via [ParameterDict](https://github.com/apache/incubator-mxnet/blob/master/python/mxnet/gluon/parameter.py#L508) class. This class helps with initialization, updating, saving and loading of the parameters. Each layer can have multiple set of parameters, and all of them can be stored in a single instance of the `ParameterDict` class. On a block level, the instance of the `ParameterDict` class is accessible via `self.params` field, and outside of a block one can access all parameters of the network via [collect_params()](https://mxnet.apache.org/api/python/gluon/gluon.html#mxnet.gluon.Block.collect_params) method called on a `container`. `ParameterDict` uses [Parameter](https://mxnet.apache.org/api/python/gluon/gluon.html#mxnet.gluon.Parameter) class to represent parameters inside of Apache MxNet neural network.", - "comment_created_at": "2021-06-08T08:43:58+00:00", - "comment_author": "leezu", - "comment_body": "Good catch. Let's remove the reference to ParameterDict and call it a `dict` of Parameters. It's also an option to use the https://docs.python.org/3/library/typing.html syntax to describe the type.", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/mxnet-eliminate-redundant-constructs.json b/_reviewers/mxnet-eliminate-redundant-constructs.json new file mode 100644 index 0000000..97d298d --- /dev/null +++ b/_reviewers/mxnet-eliminate-redundant-constructs.json @@ -0,0 +1,290 @@ +[ + { + "discussion_id": "693931632", + "pr_number": 20505, + "pr_file": "python/mxnet/gluon/contrib/estimator/estimator.py", + "created_at": "2021-08-23T12:36:20+00:00", + "commented_code": "for handler in epoch_begin:\n handler.epoch_begin(estimator_ref)\n\n for i, batch in enumerate(train_data):\n for _, batch in enumerate(train_data):", + "repo_full_name": "apache/mxnet", + "discussion_comments": [ + { + "comment_id": "693931632", + "repo_full_name": "apache/mxnet", + "pr_number": 20505, + "pr_file": "python/mxnet/gluon/contrib/estimator/estimator.py", + "discussion_id": "693931632", + "commented_code": "@@ -392,7 +392,7 @@ def fit(self, train_data,\n for handler in epoch_begin:\n handler.epoch_begin(estimator_ref)\n \n- for i, batch in enumerate(train_data):\n+ for _, batch in enumerate(train_data):", + "comment_created_at": "2021-08-23T12:36:20+00:00", + "comment_author": "bgawrych", + "comment_body": "in most of the cases enumerate makes no sense", + "pr_file_module": null + }, + { + "comment_id": "695844534", + "repo_full_name": "apache/mxnet", + "pr_number": 20505, + "pr_file": "python/mxnet/gluon/contrib/estimator/estimator.py", + "discussion_id": "693931632", + "commented_code": "@@ -392,7 +392,7 @@ def fit(self, train_data,\n for handler in epoch_begin:\n handler.epoch_begin(estimator_ref)\n \n- for i, batch in enumerate(train_data):\n+ for _, batch in enumerate(train_data):", + "comment_created_at": "2021-08-25T15:03:31+00:00", + "comment_author": "szha", + "comment_body": "```suggestion\r\n for batch in train_data:\r\n```", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "695844760", + "pr_number": 20505, + "pr_file": "python/mxnet/operator.py", + "created_at": "2021-08-25T15:03:48+00:00", + "commented_code": "\"entries in returned aux stypes, \" \\\n \"got %d.\"%(len(tensors[4]), len(ret[4]))\n rstype = []\n for i, ret_list in enumerate(ret):\n for _, ret_list in enumerate(ret):", + "repo_full_name": "apache/mxnet", + "discussion_comments": [ + { + "comment_id": "695844760", + "repo_full_name": "apache/mxnet", + "pr_number": 20505, + "pr_file": "python/mxnet/operator.py", + "discussion_id": "695844760", + "commented_code": "@@ -824,7 +824,7 @@ def infer_storage_type_backward_entry(num_tensor, tensor_stypes, tags, _):\n \"entries in returned aux stypes, \" \\\n \"got %d.\"%(len(tensors[4]), len(ret[4]))\n rstype = []\n- for i, ret_list in enumerate(ret):\n+ for _, ret_list in enumerate(ret):", + "comment_created_at": "2021-08-25T15:03:48+00:00", + "comment_author": "szha", + "comment_body": "```suggestion\r\n for ret_list in ret:\r\n```", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "695846443", + "pr_number": 20505, + "pr_file": "tests/python/unittest/test_gluon_data.py", + "created_at": "2021-08-25T15:05:38+00:00", + "commented_code": "a = mx.gluon.data.SimpleDataset([i for i in range(length)])\n a_filtered = a.filter(lambda x: x % 10 == 0)\n assert(len(a_filtered) == 10)\n for idx, sample in enumerate(a_filtered):\n for _, sample in enumerate(a_filtered):", + "repo_full_name": "apache/mxnet", + "discussion_comments": [ + { + "comment_id": "695846443", + "repo_full_name": "apache/mxnet", + "pr_number": 20505, + "pr_file": "tests/python/unittest/test_gluon_data.py", + "discussion_id": "695846443", + "commented_code": "@@ -382,25 +382,25 @@ def test_dataset_filter():\n a = mx.gluon.data.SimpleDataset([i for i in range(length)])\n a_filtered = a.filter(lambda x: x % 10 == 0)\n assert(len(a_filtered) == 10)\n- for idx, sample in enumerate(a_filtered):\n+ for _, sample in enumerate(a_filtered):", + "comment_created_at": "2021-08-25T15:05:38+00:00", + "comment_author": "szha", + "comment_body": "```suggestion\r\n for sample in a_filtered:\r\n```", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "695846885", + "pr_number": 20505, + "pr_file": "tests/python/unittest/test_gluon_data.py", + "created_at": "2021-08-25T15:06:07+00:00", + "commented_code": "a = mx.gluon.data.SimpleDataset([i for i in range(length)])\n a_filtered = a.filter(lambda x: x % 10 == 0)\n assert(len(a_filtered) == 10)\n for idx, sample in enumerate(a_filtered):\n for _, sample in enumerate(a_filtered):\n assert sample % 10 == 0\n a_xform_filtered = a.transform(lambda x: x + 1).filter(lambda x: x % 10 == 0)\n assert(len(a_xform_filtered) == 10)\n # the filtered data is already transformed\n for idx, sample in enumerate(a_xform_filtered):\n for _, sample in enumerate(a_xform_filtered):\n assert sample % 10 == 0\n\ndef test_dataset_filter_handle():\n length = 100\n a = mx.gluon.data.SimpleDataset(np.arange(length))\n a_filtered = a.filter(lambda x: x % 10 == 0).__mx_handle__()\n assert(len(a_filtered) == 10)\n for idx, sample in enumerate(a_filtered):\n for _, sample in enumerate(a_filtered):", + "repo_full_name": "apache/mxnet", + "discussion_comments": [ + { + "comment_id": "695846885", + "repo_full_name": "apache/mxnet", + "pr_number": 20505, + "pr_file": "tests/python/unittest/test_gluon_data.py", + "discussion_id": "695846885", + "commented_code": "@@ -382,25 +382,25 @@ def test_dataset_filter():\n a = mx.gluon.data.SimpleDataset([i for i in range(length)])\n a_filtered = a.filter(lambda x: x % 10 == 0)\n assert(len(a_filtered) == 10)\n- for idx, sample in enumerate(a_filtered):\n+ for _, sample in enumerate(a_filtered):\n assert sample % 10 == 0\n a_xform_filtered = a.transform(lambda x: x + 1).filter(lambda x: x % 10 == 0)\n assert(len(a_xform_filtered) == 10)\n # the filtered data is already transformed\n- for idx, sample in enumerate(a_xform_filtered):\n+ for _, sample in enumerate(a_xform_filtered):\n assert sample % 10 == 0\n \n def test_dataset_filter_handle():\n length = 100\n a = mx.gluon.data.SimpleDataset(np.arange(length))\n a_filtered = a.filter(lambda x: x % 10 == 0).__mx_handle__()\n assert(len(a_filtered) == 10)\n- for idx, sample in enumerate(a_filtered):\n+ for _, sample in enumerate(a_filtered):", + "comment_created_at": "2021-08-25T15:06:07+00:00", + "comment_author": "szha", + "comment_body": "```suggestion\r\n for sample in a_filtered:\r\n```", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "695847740", + "pr_number": 20505, + "pr_file": "tests/python/unittest/test_gluon_data.py", + "created_at": "2021-08-25T15:07:01+00:00", + "commented_code": "assert len(a_take_10) == count\n expected_total = sum([i for i in range(count)])\n total = 0\n for idx, sample in enumerate(a_take_10):\n for _, sample in enumerate(a_take_10):", + "repo_full_name": "apache/mxnet", + "discussion_comments": [ + { + "comment_id": "695847740", + "repo_full_name": "apache/mxnet", + "pr_number": 20505, + "pr_file": "tests/python/unittest/test_gluon_data.py", + "discussion_id": "695847740", + "commented_code": "@@ -451,7 +451,7 @@ def test_dataset_take():\n assert len(a_take_10) == count\n expected_total = sum([i for i in range(count)])\n total = 0\n- for idx, sample in enumerate(a_take_10):\n+ for _, sample in enumerate(a_take_10):", + "comment_created_at": "2021-08-25T15:07:01+00:00", + "comment_author": "szha", + "comment_body": "```suggestion\r\n for sample in a_take_10:\r\n```", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "695848016", + "pr_number": 20505, + "pr_file": "tests/python/unittest/test_gluon_data.py", + "created_at": "2021-08-25T15:07:16+00:00", + "commented_code": "assert len(a_xform_take_10) == count\n expected_total = sum([i * 10 for i in range(count)])\n total = 0\n for idx, sample in enumerate(a_xform_take_10):\n for _, sample in enumerate(a_xform_take_10):", + "repo_full_name": "apache/mxnet", + "discussion_comments": [ + { + "comment_id": "695848016", + "repo_full_name": "apache/mxnet", + "pr_number": 20505, + "pr_file": "tests/python/unittest/test_gluon_data.py", + "discussion_id": "695848016", + "commented_code": "@@ -460,7 +460,7 @@ def test_dataset_take():\n assert len(a_xform_take_10) == count\n expected_total = sum([i * 10 for i in range(count)])\n total = 0\n- for idx, sample in enumerate(a_xform_take_10):\n+ for _, sample in enumerate(a_xform_take_10):", + "comment_created_at": "2021-08-25T15:07:16+00:00", + "comment_author": "szha", + "comment_body": "```suggestion\r\n for sample in a_xform_take_10:\r\n```", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "695848243", + "pr_number": 20505, + "pr_file": "tests/python/unittest/test_gluon_data.py", + "created_at": "2021-08-25T15:07:30+00:00", + "commented_code": "assert len(a_take_10) == count\n expected_total = sum([i for i in range(count)])\n total = 0\n for idx, sample in enumerate(a_take_10):\n for _, sample in enumerate(a_take_10):", + "repo_full_name": "apache/mxnet", + "discussion_comments": [ + { + "comment_id": "695848243", + "repo_full_name": "apache/mxnet", + "pr_number": 20505, + "pr_file": "tests/python/unittest/test_gluon_data.py", + "discussion_id": "695848243", + "commented_code": "@@ -477,7 +477,7 @@ def test_dataset_take_handle():\n assert len(a_take_10) == count\n expected_total = sum([i for i in range(count)])\n total = 0\n- for idx, sample in enumerate(a_take_10):\n+ for _, sample in enumerate(a_take_10):", + "comment_created_at": "2021-08-25T15:07:30+00:00", + "comment_author": "szha", + "comment_body": "```suggestion\r\n for sample in a_take_10:\r\n```", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "695848426", + "pr_number": 20505, + "pr_file": "tests/python/unittest/test_gluon_data.py", + "created_at": "2021-08-25T15:07:43+00:00", + "commented_code": "assert len(a_xform_take_10) == count\n expected_total = sum([i for i in range(count)])\n total = 0\n for idx, sample in enumerate(a_xform_take_10):\n for _, sample in enumerate(a_xform_take_10):", + "repo_full_name": "apache/mxnet", + "discussion_comments": [ + { + "comment_id": "695848426", + "repo_full_name": "apache/mxnet", + "pr_number": 20505, + "pr_file": "tests/python/unittest/test_gluon_data.py", + "discussion_id": "695848426", + "commented_code": "@@ -486,7 +486,7 @@ def test_dataset_take_handle():\n assert len(a_xform_take_10) == count\n expected_total = sum([i for i in range(count)])\n total = 0\n- for idx, sample in enumerate(a_xform_take_10):\n+ for _, sample in enumerate(a_xform_take_10):", + "comment_created_at": "2021-08-25T15:07:43+00:00", + "comment_author": "szha", + "comment_body": "```suggestion\r\n for sample in a_xform_take_10:\r\n```", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "695853434", + "pr_number": 20505, + "pr_file": "tests/python/unittest/test_numpy_interoperability.py", + "created_at": "2021-08-25T15:13:00+00:00", + "commented_code": "# assertRaises(np.AxisError, np.swapaxes, -5, 0)\n for i in range(-4, 4):\n for j in range(-4, 4):\n for k, src in enumerate((a, b)):\n for _, src in enumerate((a, b)):", + "repo_full_name": "apache/mxnet", + "discussion_comments": [ + { + "comment_id": "695853434", + "repo_full_name": "apache/mxnet", + "pr_number": 20505, + "pr_file": "tests/python/unittest/test_numpy_interoperability.py", + "discussion_id": "695853434", + "commented_code": "@@ -421,7 +421,7 @@ def _add_workload_swapaxes():\n # assertRaises(np.AxisError, np.swapaxes, -5, 0)\n for i in range(-4, 4):\n for j in range(-4, 4):\n- for k, src in enumerate((a, b)):\n+ for _, src in enumerate((a, b)):", + "comment_created_at": "2021-08-25T15:13:00+00:00", + "comment_author": "szha", + "comment_body": "```suggestion\r\n for src in (a, b):\r\n```", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "695858106", + "pr_number": 20505, + "pr_file": "tests/python/unittest/test_numpy_op.py", + "created_at": "2021-08-25T15:17:56+00:00", + "commented_code": "assert_almost_equal(out_mx.asnumpy(), expected_np, rtol=rtol, atol=atol)\n out_mx.backward()\n cur_grad = []\n for (iop, op) in enumerate(x):\n for (_, op) in enumerate(x):", + "repo_full_name": "apache/mxnet", + "discussion_comments": [ + { + "comment_id": "695858106", + "repo_full_name": "apache/mxnet", + "pr_number": 20505, + "pr_file": "tests/python/unittest/test_numpy_op.py", + "discussion_id": "695858106", + "commented_code": "@@ -8410,10 +8410,10 @@ def dbg(name, data):\n assert_almost_equal(out_mx.asnumpy(), expected_np, rtol=rtol, atol=atol)\n out_mx.backward()\n cur_grad = []\n- for (iop, op) in enumerate(x):\n+ for (_, op) in enumerate(x):", + "comment_created_at": "2021-08-25T15:17:56+00:00", + "comment_author": "szha", + "comment_body": "```suggestion\r\n for op in x:\r\n```", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "646148463", + "pr_number": 20262, + "pr_file": "python/mxnet/gluon/data/vision/transforms/__init__.py", + "created_at": "2021-06-06T15:28:34+00:00", + "commented_code": "super(Cast, self).__init__()\n self._dtype = dtype\n\n def hybrid_forward(self, F, *args):\n if is_np_array():\n F = F.npx\n return tuple([F.cast(x, self._dtype) for x in args])\n def forward(self, *args):\n return tuple([x.astype(self._dtype) for x in args])", + "repo_full_name": "apache/mxnet", + "discussion_comments": [ + { + "comment_id": "646148463", + "repo_full_name": "apache/mxnet", + "pr_number": 20262, + "pr_file": "python/mxnet/gluon/data/vision/transforms/__init__.py", + "discussion_id": "646148463", + "commented_code": "@@ -129,10 +131,8 @@ def __init__(self, dtype='float32'):\n super(Cast, self).__init__()\n self._dtype = dtype\n \n- def hybrid_forward(self, F, *args):\n- if is_np_array():\n- F = F.npx\n- return tuple([F.cast(x, self._dtype) for x in args])\n+ def forward(self, *args):\n+ return tuple([x.astype(self._dtype) for x in args])", + "comment_created_at": "2021-06-06T15:28:34+00:00", + "comment_author": "szha", + "comment_body": "nit: the list doesn't seem necessary", + "pr_file_module": null + }, + { + "comment_id": "646148726", + "repo_full_name": "apache/mxnet", + "pr_number": 20262, + "pr_file": "python/mxnet/gluon/data/vision/transforms/__init__.py", + "discussion_id": "646148463", + "commented_code": "@@ -129,10 +131,8 @@ def __init__(self, dtype='float32'):\n super(Cast, self).__init__()\n self._dtype = dtype\n \n- def hybrid_forward(self, F, *args):\n- if is_np_array():\n- F = F.npx\n- return tuple([F.cast(x, self._dtype) for x in args])\n+ def forward(self, *args):\n+ return tuple([x.astype(self._dtype) for x in args])", + "comment_created_at": "2021-06-06T15:30:43+00:00", + "comment_author": "szha", + "comment_body": "```suggestion\r\n return tuple(x.astype(self._dtype) for x in args)\r\n```", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "711215778", + "pr_number": 20592, + "pr_file": "python/mxnet/numpy/linalg.py", + "created_at": "2021-09-17T17:00:40+00:00", + "commented_code": ">>> a = np.array([[ 5.4119368 , 8.996273 , -5.086096 ],\n ... [ 0.8866155 , 1.7490431 , -4.6107802 ],\n ... [-0.08034172, 4.4172044 , 1.4528792 ]])\n >>> LA.eigvalsh(a, UPLO='L')\n >>> LA.eigvalsh(a, upper=False)\n array([-2.87381886, 5.10144682, 6.38623114]) # in ascending order\n \"\"\"\n if(upper==False):", + "repo_full_name": "apache/mxnet", + "discussion_comments": [ + { + "comment_id": "711215778", + "repo_full_name": "apache/mxnet", + "pr_number": 20592, + "pr_file": "python/mxnet/numpy/linalg.py", + "discussion_id": "711215778", + "commented_code": "@@ -890,9 +891,13 @@ def eigvalsh(a, UPLO='L'):\n >>> a = np.array([[ 5.4119368 , 8.996273 , -5.086096 ],\n ... [ 0.8866155 , 1.7490431 , -4.6107802 ],\n ... [-0.08034172, 4.4172044 , 1.4528792 ]])\n- >>> LA.eigvalsh(a, UPLO='L')\n+ >>> LA.eigvalsh(a, upper=False)\n array([-2.87381886, 5.10144682, 6.38623114]) # in ascending order\n \"\"\"\n+ if(upper==False):", + "comment_created_at": "2021-09-17T17:00:40+00:00", + "comment_author": "barry-jin", + "comment_body": "Comparison to `False` in python should be not expression: `if(upper==False)` => `if not upper`", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/mxnet-eliminate-redundant-constructs.md b/_reviewers/mxnet-eliminate-redundant-constructs.md index 34dc781..32768a4 100644 --- a/_reviewers/mxnet-eliminate-redundant-constructs.md +++ b/_reviewers/mxnet-eliminate-redundant-constructs.md @@ -46,295 +46,3 @@ if not upper: ``` These simplifications improve code readability and maintain a clean, Pythonic style that aligns with best practices. - - -[ - { - "discussion_id": "693931632", - "pr_number": 20505, - "pr_file": "python/mxnet/gluon/contrib/estimator/estimator.py", - "created_at": "2021-08-23T12:36:20+00:00", - "commented_code": "for handler in epoch_begin:\n handler.epoch_begin(estimator_ref)\n\n for i, batch in enumerate(train_data):\n for _, batch in enumerate(train_data):", - "repo_full_name": "apache/mxnet", - "discussion_comments": [ - { - "comment_id": "693931632", - "repo_full_name": "apache/mxnet", - "pr_number": 20505, - "pr_file": "python/mxnet/gluon/contrib/estimator/estimator.py", - "discussion_id": "693931632", - "commented_code": "@@ -392,7 +392,7 @@ def fit(self, train_data,\n for handler in epoch_begin:\n handler.epoch_begin(estimator_ref)\n \n- for i, batch in enumerate(train_data):\n+ for _, batch in enumerate(train_data):", - "comment_created_at": "2021-08-23T12:36:20+00:00", - "comment_author": "bgawrych", - "comment_body": "in most of the cases enumerate makes no sense", - "pr_file_module": null - }, - { - "comment_id": "695844534", - "repo_full_name": "apache/mxnet", - "pr_number": 20505, - "pr_file": "python/mxnet/gluon/contrib/estimator/estimator.py", - "discussion_id": "693931632", - "commented_code": "@@ -392,7 +392,7 @@ def fit(self, train_data,\n for handler in epoch_begin:\n handler.epoch_begin(estimator_ref)\n \n- for i, batch in enumerate(train_data):\n+ for _, batch in enumerate(train_data):", - "comment_created_at": "2021-08-25T15:03:31+00:00", - "comment_author": "szha", - "comment_body": "```suggestion\r\n for batch in train_data:\r\n```", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "695844760", - "pr_number": 20505, - "pr_file": "python/mxnet/operator.py", - "created_at": "2021-08-25T15:03:48+00:00", - "commented_code": "\"entries in returned aux stypes, \" \\\n \"got %d.\"%(len(tensors[4]), len(ret[4]))\n rstype = []\n for i, ret_list in enumerate(ret):\n for _, ret_list in enumerate(ret):", - "repo_full_name": "apache/mxnet", - "discussion_comments": [ - { - "comment_id": "695844760", - "repo_full_name": "apache/mxnet", - "pr_number": 20505, - "pr_file": "python/mxnet/operator.py", - "discussion_id": "695844760", - "commented_code": "@@ -824,7 +824,7 @@ def infer_storage_type_backward_entry(num_tensor, tensor_stypes, tags, _):\n \"entries in returned aux stypes, \" \\\n \"got %d.\"%(len(tensors[4]), len(ret[4]))\n rstype = []\n- for i, ret_list in enumerate(ret):\n+ for _, ret_list in enumerate(ret):", - "comment_created_at": "2021-08-25T15:03:48+00:00", - "comment_author": "szha", - "comment_body": "```suggestion\r\n for ret_list in ret:\r\n```", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "695846443", - "pr_number": 20505, - "pr_file": "tests/python/unittest/test_gluon_data.py", - "created_at": "2021-08-25T15:05:38+00:00", - "commented_code": "a = mx.gluon.data.SimpleDataset([i for i in range(length)])\n a_filtered = a.filter(lambda x: x % 10 == 0)\n assert(len(a_filtered) == 10)\n for idx, sample in enumerate(a_filtered):\n for _, sample in enumerate(a_filtered):", - "repo_full_name": "apache/mxnet", - "discussion_comments": [ - { - "comment_id": "695846443", - "repo_full_name": "apache/mxnet", - "pr_number": 20505, - "pr_file": "tests/python/unittest/test_gluon_data.py", - "discussion_id": "695846443", - "commented_code": "@@ -382,25 +382,25 @@ def test_dataset_filter():\n a = mx.gluon.data.SimpleDataset([i for i in range(length)])\n a_filtered = a.filter(lambda x: x % 10 == 0)\n assert(len(a_filtered) == 10)\n- for idx, sample in enumerate(a_filtered):\n+ for _, sample in enumerate(a_filtered):", - "comment_created_at": "2021-08-25T15:05:38+00:00", - "comment_author": "szha", - "comment_body": "```suggestion\r\n for sample in a_filtered:\r\n```", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "695846885", - "pr_number": 20505, - "pr_file": "tests/python/unittest/test_gluon_data.py", - "created_at": "2021-08-25T15:06:07+00:00", - "commented_code": "a = mx.gluon.data.SimpleDataset([i for i in range(length)])\n a_filtered = a.filter(lambda x: x % 10 == 0)\n assert(len(a_filtered) == 10)\n for idx, sample in enumerate(a_filtered):\n for _, sample in enumerate(a_filtered):\n assert sample % 10 == 0\n a_xform_filtered = a.transform(lambda x: x + 1).filter(lambda x: x % 10 == 0)\n assert(len(a_xform_filtered) == 10)\n # the filtered data is already transformed\n for idx, sample in enumerate(a_xform_filtered):\n for _, sample in enumerate(a_xform_filtered):\n assert sample % 10 == 0\n\ndef test_dataset_filter_handle():\n length = 100\n a = mx.gluon.data.SimpleDataset(np.arange(length))\n a_filtered = a.filter(lambda x: x % 10 == 0).__mx_handle__()\n assert(len(a_filtered) == 10)\n for idx, sample in enumerate(a_filtered):\n for _, sample in enumerate(a_filtered):", - "repo_full_name": "apache/mxnet", - "discussion_comments": [ - { - "comment_id": "695846885", - "repo_full_name": "apache/mxnet", - "pr_number": 20505, - "pr_file": "tests/python/unittest/test_gluon_data.py", - "discussion_id": "695846885", - "commented_code": "@@ -382,25 +382,25 @@ def test_dataset_filter():\n a = mx.gluon.data.SimpleDataset([i for i in range(length)])\n a_filtered = a.filter(lambda x: x % 10 == 0)\n assert(len(a_filtered) == 10)\n- for idx, sample in enumerate(a_filtered):\n+ for _, sample in enumerate(a_filtered):\n assert sample % 10 == 0\n a_xform_filtered = a.transform(lambda x: x + 1).filter(lambda x: x % 10 == 0)\n assert(len(a_xform_filtered) == 10)\n # the filtered data is already transformed\n- for idx, sample in enumerate(a_xform_filtered):\n+ for _, sample in enumerate(a_xform_filtered):\n assert sample % 10 == 0\n \n def test_dataset_filter_handle():\n length = 100\n a = mx.gluon.data.SimpleDataset(np.arange(length))\n a_filtered = a.filter(lambda x: x % 10 == 0).__mx_handle__()\n assert(len(a_filtered) == 10)\n- for idx, sample in enumerate(a_filtered):\n+ for _, sample in enumerate(a_filtered):", - "comment_created_at": "2021-08-25T15:06:07+00:00", - "comment_author": "szha", - "comment_body": "```suggestion\r\n for sample in a_filtered:\r\n```", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "695847740", - "pr_number": 20505, - "pr_file": "tests/python/unittest/test_gluon_data.py", - "created_at": "2021-08-25T15:07:01+00:00", - "commented_code": "assert len(a_take_10) == count\n expected_total = sum([i for i in range(count)])\n total = 0\n for idx, sample in enumerate(a_take_10):\n for _, sample in enumerate(a_take_10):", - "repo_full_name": "apache/mxnet", - "discussion_comments": [ - { - "comment_id": "695847740", - "repo_full_name": "apache/mxnet", - "pr_number": 20505, - "pr_file": "tests/python/unittest/test_gluon_data.py", - "discussion_id": "695847740", - "commented_code": "@@ -451,7 +451,7 @@ def test_dataset_take():\n assert len(a_take_10) == count\n expected_total = sum([i for i in range(count)])\n total = 0\n- for idx, sample in enumerate(a_take_10):\n+ for _, sample in enumerate(a_take_10):", - "comment_created_at": "2021-08-25T15:07:01+00:00", - "comment_author": "szha", - "comment_body": "```suggestion\r\n for sample in a_take_10:\r\n```", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "695848016", - "pr_number": 20505, - "pr_file": "tests/python/unittest/test_gluon_data.py", - "created_at": "2021-08-25T15:07:16+00:00", - "commented_code": "assert len(a_xform_take_10) == count\n expected_total = sum([i * 10 for i in range(count)])\n total = 0\n for idx, sample in enumerate(a_xform_take_10):\n for _, sample in enumerate(a_xform_take_10):", - "repo_full_name": "apache/mxnet", - "discussion_comments": [ - { - "comment_id": "695848016", - "repo_full_name": "apache/mxnet", - "pr_number": 20505, - "pr_file": "tests/python/unittest/test_gluon_data.py", - "discussion_id": "695848016", - "commented_code": "@@ -460,7 +460,7 @@ def test_dataset_take():\n assert len(a_xform_take_10) == count\n expected_total = sum([i * 10 for i in range(count)])\n total = 0\n- for idx, sample in enumerate(a_xform_take_10):\n+ for _, sample in enumerate(a_xform_take_10):", - "comment_created_at": "2021-08-25T15:07:16+00:00", - "comment_author": "szha", - "comment_body": "```suggestion\r\n for sample in a_xform_take_10:\r\n```", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "695848243", - "pr_number": 20505, - "pr_file": "tests/python/unittest/test_gluon_data.py", - "created_at": "2021-08-25T15:07:30+00:00", - "commented_code": "assert len(a_take_10) == count\n expected_total = sum([i for i in range(count)])\n total = 0\n for idx, sample in enumerate(a_take_10):\n for _, sample in enumerate(a_take_10):", - "repo_full_name": "apache/mxnet", - "discussion_comments": [ - { - "comment_id": "695848243", - "repo_full_name": "apache/mxnet", - "pr_number": 20505, - "pr_file": "tests/python/unittest/test_gluon_data.py", - "discussion_id": "695848243", - "commented_code": "@@ -477,7 +477,7 @@ def test_dataset_take_handle():\n assert len(a_take_10) == count\n expected_total = sum([i for i in range(count)])\n total = 0\n- for idx, sample in enumerate(a_take_10):\n+ for _, sample in enumerate(a_take_10):", - "comment_created_at": "2021-08-25T15:07:30+00:00", - "comment_author": "szha", - "comment_body": "```suggestion\r\n for sample in a_take_10:\r\n```", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "695848426", - "pr_number": 20505, - "pr_file": "tests/python/unittest/test_gluon_data.py", - "created_at": "2021-08-25T15:07:43+00:00", - "commented_code": "assert len(a_xform_take_10) == count\n expected_total = sum([i for i in range(count)])\n total = 0\n for idx, sample in enumerate(a_xform_take_10):\n for _, sample in enumerate(a_xform_take_10):", - "repo_full_name": "apache/mxnet", - "discussion_comments": [ - { - "comment_id": "695848426", - "repo_full_name": "apache/mxnet", - "pr_number": 20505, - "pr_file": "tests/python/unittest/test_gluon_data.py", - "discussion_id": "695848426", - "commented_code": "@@ -486,7 +486,7 @@ def test_dataset_take_handle():\n assert len(a_xform_take_10) == count\n expected_total = sum([i for i in range(count)])\n total = 0\n- for idx, sample in enumerate(a_xform_take_10):\n+ for _, sample in enumerate(a_xform_take_10):", - "comment_created_at": "2021-08-25T15:07:43+00:00", - "comment_author": "szha", - "comment_body": "```suggestion\r\n for sample in a_xform_take_10:\r\n```", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "695853434", - "pr_number": 20505, - "pr_file": "tests/python/unittest/test_numpy_interoperability.py", - "created_at": "2021-08-25T15:13:00+00:00", - "commented_code": "# assertRaises(np.AxisError, np.swapaxes, -5, 0)\n for i in range(-4, 4):\n for j in range(-4, 4):\n for k, src in enumerate((a, b)):\n for _, src in enumerate((a, b)):", - "repo_full_name": "apache/mxnet", - "discussion_comments": [ - { - "comment_id": "695853434", - "repo_full_name": "apache/mxnet", - "pr_number": 20505, - "pr_file": "tests/python/unittest/test_numpy_interoperability.py", - "discussion_id": "695853434", - "commented_code": "@@ -421,7 +421,7 @@ def _add_workload_swapaxes():\n # assertRaises(np.AxisError, np.swapaxes, -5, 0)\n for i in range(-4, 4):\n for j in range(-4, 4):\n- for k, src in enumerate((a, b)):\n+ for _, src in enumerate((a, b)):", - "comment_created_at": "2021-08-25T15:13:00+00:00", - "comment_author": "szha", - "comment_body": "```suggestion\r\n for src in (a, b):\r\n```", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "695858106", - "pr_number": 20505, - "pr_file": "tests/python/unittest/test_numpy_op.py", - "created_at": "2021-08-25T15:17:56+00:00", - "commented_code": "assert_almost_equal(out_mx.asnumpy(), expected_np, rtol=rtol, atol=atol)\n out_mx.backward()\n cur_grad = []\n for (iop, op) in enumerate(x):\n for (_, op) in enumerate(x):", - "repo_full_name": "apache/mxnet", - "discussion_comments": [ - { - "comment_id": "695858106", - "repo_full_name": "apache/mxnet", - "pr_number": 20505, - "pr_file": "tests/python/unittest/test_numpy_op.py", - "discussion_id": "695858106", - "commented_code": "@@ -8410,10 +8410,10 @@ def dbg(name, data):\n assert_almost_equal(out_mx.asnumpy(), expected_np, rtol=rtol, atol=atol)\n out_mx.backward()\n cur_grad = []\n- for (iop, op) in enumerate(x):\n+ for (_, op) in enumerate(x):", - "comment_created_at": "2021-08-25T15:17:56+00:00", - "comment_author": "szha", - "comment_body": "```suggestion\r\n for op in x:\r\n```", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "646148463", - "pr_number": 20262, - "pr_file": "python/mxnet/gluon/data/vision/transforms/__init__.py", - "created_at": "2021-06-06T15:28:34+00:00", - "commented_code": "super(Cast, self).__init__()\n self._dtype = dtype\n\n def hybrid_forward(self, F, *args):\n if is_np_array():\n F = F.npx\n return tuple([F.cast(x, self._dtype) for x in args])\n def forward(self, *args):\n return tuple([x.astype(self._dtype) for x in args])", - "repo_full_name": "apache/mxnet", - "discussion_comments": [ - { - "comment_id": "646148463", - "repo_full_name": "apache/mxnet", - "pr_number": 20262, - "pr_file": "python/mxnet/gluon/data/vision/transforms/__init__.py", - "discussion_id": "646148463", - "commented_code": "@@ -129,10 +131,8 @@ def __init__(self, dtype='float32'):\n super(Cast, self).__init__()\n self._dtype = dtype\n \n- def hybrid_forward(self, F, *args):\n- if is_np_array():\n- F = F.npx\n- return tuple([F.cast(x, self._dtype) for x in args])\n+ def forward(self, *args):\n+ return tuple([x.astype(self._dtype) for x in args])", - "comment_created_at": "2021-06-06T15:28:34+00:00", - "comment_author": "szha", - "comment_body": "nit: the list doesn't seem necessary", - "pr_file_module": null - }, - { - "comment_id": "646148726", - "repo_full_name": "apache/mxnet", - "pr_number": 20262, - "pr_file": "python/mxnet/gluon/data/vision/transforms/__init__.py", - "discussion_id": "646148463", - "commented_code": "@@ -129,10 +131,8 @@ def __init__(self, dtype='float32'):\n super(Cast, self).__init__()\n self._dtype = dtype\n \n- def hybrid_forward(self, F, *args):\n- if is_np_array():\n- F = F.npx\n- return tuple([F.cast(x, self._dtype) for x in args])\n+ def forward(self, *args):\n+ return tuple([x.astype(self._dtype) for x in args])", - "comment_created_at": "2021-06-06T15:30:43+00:00", - "comment_author": "szha", - "comment_body": "```suggestion\r\n return tuple(x.astype(self._dtype) for x in args)\r\n```", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "711215778", - "pr_number": 20592, - "pr_file": "python/mxnet/numpy/linalg.py", - "created_at": "2021-09-17T17:00:40+00:00", - "commented_code": ">>> a = np.array([[ 5.4119368 , 8.996273 , -5.086096 ],\n ... [ 0.8866155 , 1.7490431 , -4.6107802 ],\n ... [-0.08034172, 4.4172044 , 1.4528792 ]])\n >>> LA.eigvalsh(a, UPLO='L')\n >>> LA.eigvalsh(a, upper=False)\n array([-2.87381886, 5.10144682, 6.38623114]) # in ascending order\n \"\"\"\n if(upper==False):", - "repo_full_name": "apache/mxnet", - "discussion_comments": [ - { - "comment_id": "711215778", - "repo_full_name": "apache/mxnet", - "pr_number": 20592, - "pr_file": "python/mxnet/numpy/linalg.py", - "discussion_id": "711215778", - "commented_code": "@@ -890,9 +891,13 @@ def eigvalsh(a, UPLO='L'):\n >>> a = np.array([[ 5.4119368 , 8.996273 , -5.086096 ],\n ... [ 0.8866155 , 1.7490431 , -4.6107802 ],\n ... [-0.08034172, 4.4172044 , 1.4528792 ]])\n- >>> LA.eigvalsh(a, UPLO='L')\n+ >>> LA.eigvalsh(a, upper=False)\n array([-2.87381886, 5.10144682, 6.38623114]) # in ascending order\n \"\"\"\n+ if(upper==False):", - "comment_created_at": "2021-09-17T17:00:40+00:00", - "comment_author": "barry-jin", - "comment_body": "Comparison to `False` in python should be not expression: `if(upper==False)` => `if not upper`", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/mxnet-explain-optimization-mechanisms.json b/_reviewers/mxnet-explain-optimization-mechanisms.json new file mode 100644 index 0000000..ec6983a --- /dev/null +++ b/_reviewers/mxnet-explain-optimization-mechanisms.json @@ -0,0 +1,112 @@ +[ + { + "discussion_id": "954872677", + "pr_number": 21127, + "pr_file": "docs/python_docs/python/tutorials/performance/backend/dnnl/dnnl_quantization_inc.md", + "created_at": "2022-08-25T11:55:17+00:00", + "commented_code": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n# Improving accuracy with Intel® Neural Compressor\n\nThe accuracy of a model can decrease as a result of quantization. When the accuracy drop is significant, we can try to manually find a better quantization configuration (exclude some layers, try different calibration methods, etc.), but for bigger models this might prove to be a difficult and time consuming task. [Intel® Neural Compressor](https://github.com/intel/neural-compressor) (INC) tries to automate this process using several tuning heuristics, which aim to find the quantization configuration that satisfies the specified accuracy requirement.\n\n**NOTE:**\n\nMost tuning strategies will try different configurations on an evaluation dataset in order to find out how each layer affects the accuracy of the model. This means that for larger models, it may take a long time to find a solution (as the tuning space is usually larger and the evaluation itself takes longer).\n\n## Installation and Prerequisites\n\n- Install MXNet with oneDNN enabled as described in the [Get started](https://mxnet.apache.org/versions/master/get_started?platform=linux&language=python&processor=cpu&environ=pip&). (Until the 2.0 release you can use the nightly build version: `pip install --pre mxnet -f https://dist.mxnet.io/python`)\n\n- Install Intel® Neural Compressor:\n\n Use one of the commands below to install INC (supported python versions are: 3.6, 3.7, 3.8, 3.9):\n\n ```bash\n # install stable version from pip\n pip install neural-compressor\n\n # install nightly version from pip\n pip install -i https://test.pypi.org/simple/ neural-compressor\n\n # install stable version from conda\n conda install neural-compressor -c conda-forge -c intel\n ```\n If you come into trouble with dependencies on `cv2` library you can run: `apt-get update && apt-get install -y python3-opencv`\n\n## Configuration file\n\nQuantization tuning process can be customized in the yaml configuration file. Below is a simple example:\n\n```yaml\n# cnn.yaml\n\nversion: 1.0\n\nmodel:\n name: cnn\n framework: mxnet\n\nquantization:\n calibration:\n sampling_size: 160 # number of samples for calibration\n\ntuning:\n strategy:\n name: basic\n accuracy_criterion:\n relative: 0.01\n exit_policy:\n timeout: 0\n random_seed: 9527\n```\n\nWe are using the `basic` strategy, but you could also try out different ones. [Here](https://github.com/intel/neural-compressor/blob/master/docs/tuning_strategies.md) you can find a list of strategies available in INC and details of how they work. You can also add your own strategy if the existing ones do not suit your needs.\n\nSince the value of `timeout` is 0, INC will run until it finds a configuration that satisfies the accuracy criterion and then exit. Depending on the strategy this may not be ideal, as sometimes it would be better to further explore the tuning space to find a superior configuration both in terms of accuracy and speed. To achieve this, we can set a specific `timeout` value, which will tell INC how long (in seconds) it should run.\n\nFor more information about the configuration file, see the [template](https://github.com/intel/neural-compressor/blob/master/neural_compressor/template/ptq.yaml) from the official INC repo. Keep in mind that only the `post training quantization` is currently supported for MXNet.\n\n## Model quantization and tuning\n\nIn general, Intel® Neural Compressor requires 4 elements in order to run: \n1. Config file - like the example above \n2. Model to be quantized \n3. Calibration dataloader \n4. Evaluation function - a function that takes a model as an argument and returns the accuracy it achieves on a certain evaluation dataset. \n\n### Quantizing ResNet\n\nThe [quantization](https://mxnet.apache.org/versions/master/api/python/docs/tutorials/performance/backend/dnnl/dnnl_quantization.html#Quantization) sections described how to quantize ResNet using the native MXNet quantization. This example shows how we can achieve the similar results (with the auto-tuning) using INC.\n\n1. Get the model\n\n```python\nimport logging\nimport mxnet as mx\nfrom mxnet.gluon.model_zoo import vision\n\nlogging.basicConfig()\nlogger = logging.getLogger('logger')\nlogger.setLevel(logging.INFO)\n\nbatch_shape = (1, 3, 224, 224)\nresnet18 = vision.resnet18_v1(pretrained=True)\n```\n\n2. Prepare the dataset:\n\n```python\nmx.test_utils.download('http://data.mxnet.io/data/val_256_q90.rec', 'data/val_256_q90.rec')\n\nbatch_size = 16\nmean_std = {'mean_r': 123.68, 'mean_g': 116.779, 'mean_b': 103.939,\n 'std_r': 58.393, 'std_g': 57.12, 'std_b': 57.375}\n\ndata = mx.io.ImageRecordIter(path_imgrec='data/val_256_q90.rec',\n batch_size=batch_size,\n data_shape=batch_shape[1:],\n rand_crop=False,\n rand_mirror=False,\n shuffle=False,\n **mean_std)\ndata.batch_size = batch_size\n```\n\n3. Prepare the evaluation function:\n\n```python\neval_samples = batch_size*10\n\ndef eval_func(model):\n data.reset()\n metric = mx.metric.Accuracy()\n for i, batch in enumerate(data):\n if i * batch_size >= eval_samples:\n break\n x = batch.data[0].as_in_context(mx.cpu())\n label = batch.label[0].as_in_context(mx.cpu())\n outputs = model.forward(x)\n metric.update(label, outputs)\n return metric.get()[1]\n```\n\n4. Run Intel® Neural Compressor:\n\n```python\nfrom neural_compressor.experimental import Quantization\nquantizer = Quantization(\"./cnn.yaml\")\nquantizer.model = resnet18\nquantizer.calib_dataloader = data\nquantizer.eval_func = eval_func\nqnet = quantizer.fit().model\n```\n\nSince this model already achieves good accuracy using native quantization (less than 1% accuracy drop), for the given configuration file, INC will end on the first configuration, quantizing all layers using `naive` calibration mode for each. To see the true potential of INC, we need a model which suffers from a larger accuracy drop after quantization.\n\n### Quantizing ResNet50v2\n\nThis example shows how to use INC to quantize ResNet50 v2. In this case, the native MXNet quantization introduce a huge accuracy drop (70% using `naive` calibration mode) and INC allows automatically find better solution.\n\nThis is the (TODO link to INC configuration file) for this example: \n```yaml\nversion: 1.0\n\nmodel:\n name: resnet50_v2\n framework: mxnet\n\nquantization:\n calibration:\n sampling_size: 192 # number of samples for calibration\n\ntuning:\n strategy:\n name: mse\n accuracy_criterion:\n relative: 0.015\n exit_policy:\n timeout: 0\n max_trials: 500\n random_seed: 9527\n```\n\nIt could be used with script below \n(TODO link to resnet_mse.py)\nto find operator which mostly influence accuracy drops and disable it from quantization. \nYou can find description of MSE strategy \n[here](https://github.com/intel/neural-compressor/blob/master/docs/tuning_strategies.md#user-content-mse).\n\n```python\nimport mxnet as mx\nfrom mxnet.gluon.model_zoo.vision import resnet50_v2\nfrom mxnet.gluon.data.vision import transforms\nfrom mxnet.contrib.quantization import quantize_net\n\n# Preparing input data\nrgb_mean = (0.485, 0.456, 0.406)\nrgb_std = (0.229, 0.224, 0.225)\nbatch_size = 64\nnum_calib_batches = 9\n# set below proper path to ImageNet data set\ndataset = mx.gluon.data.vision.ImageRecordDataset('../imagenet/rec/val.rec')\n# Tuning in INC on whole data set takes too long time so we take only part of the whole data set\n# as representative part of it:\ndataset = dataset.take(num_calib_batches * batch_size)\ntransformer = transforms.Compose([transforms.Resize(256),\n transforms.CenterCrop(224),\n transforms.ToTensor(),\n transforms.Normalize(mean=rgb_mean, std=rgb_std)])\n# Note: as input data are used many times during tuning it is better to prepared data earlier,\n# so lazy parameter for transform_first is set to False", + "repo_full_name": "apache/mxnet", + "discussion_comments": [ + { + "comment_id": "954872677", + "repo_full_name": "apache/mxnet", + "pr_number": 21127, + "pr_file": "docs/python_docs/python/tutorials/performance/backend/dnnl/dnnl_quantization_inc.md", + "discussion_id": "954872677", + "commented_code": "@@ -0,0 +1,290 @@\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+# Improving accuracy with Intel® Neural Compressor\n+\n+The accuracy of a model can decrease as a result of quantization. When the accuracy drop is significant, we can try to manually find a better quantization configuration (exclude some layers, try different calibration methods, etc.), but for bigger models this might prove to be a difficult and time consuming task. [Intel® Neural Compressor](https://github.com/intel/neural-compressor) (INC) tries to automate this process using several tuning heuristics, which aim to find the quantization configuration that satisfies the specified accuracy requirement.\n+\n+**NOTE:**\n+\n+Most tuning strategies will try different configurations on an evaluation dataset in order to find out how each layer affects the accuracy of the model. This means that for larger models, it may take a long time to find a solution (as the tuning space is usually larger and the evaluation itself takes longer).\n+\n+## Installation and Prerequisites\n+\n+- Install MXNet with oneDNN enabled as described in the [Get started](https://mxnet.apache.org/versions/master/get_started?platform=linux&language=python&processor=cpu&environ=pip&). (Until the 2.0 release you can use the nightly build version: `pip install --pre mxnet -f https://dist.mxnet.io/python`)\n+\n+- Install Intel® Neural Compressor:\n+\n+ Use one of the commands below to install INC (supported python versions are: 3.6, 3.7, 3.8, 3.9):\n+\n+ ```bash\n+ # install stable version from pip\n+ pip install neural-compressor\n+\n+ # install nightly version from pip\n+ pip install -i https://test.pypi.org/simple/ neural-compressor\n+\n+ # install stable version from conda\n+ conda install neural-compressor -c conda-forge -c intel\n+ ```\n+ If you come into trouble with dependencies on `cv2` library you can run: `apt-get update && apt-get install -y python3-opencv`\n+\n+## Configuration file\n+\n+Quantization tuning process can be customized in the yaml configuration file. Below is a simple example:\n+\n+```yaml\n+# cnn.yaml\n+\n+version: 1.0\n+\n+model:\n+ name: cnn\n+ framework: mxnet\n+\n+quantization:\n+ calibration:\n+ sampling_size: 160 # number of samples for calibration\n+\n+tuning:\n+ strategy:\n+ name: basic\n+ accuracy_criterion:\n+ relative: 0.01\n+ exit_policy:\n+ timeout: 0\n+ random_seed: 9527\n+```\n+\n+We are using the `basic` strategy, but you could also try out different ones. [Here](https://github.com/intel/neural-compressor/blob/master/docs/tuning_strategies.md) you can find a list of strategies available in INC and details of how they work. You can also add your own strategy if the existing ones do not suit your needs.\n+\n+Since the value of `timeout` is 0, INC will run until it finds a configuration that satisfies the accuracy criterion and then exit. Depending on the strategy this may not be ideal, as sometimes it would be better to further explore the tuning space to find a superior configuration both in terms of accuracy and speed. To achieve this, we can set a specific `timeout` value, which will tell INC how long (in seconds) it should run.\n+\n+For more information about the configuration file, see the [template](https://github.com/intel/neural-compressor/blob/master/neural_compressor/template/ptq.yaml) from the official INC repo. Keep in mind that only the `post training quantization` is currently supported for MXNet.\n+\n+## Model quantization and tuning\n+\n+In general, Intel® Neural Compressor requires 4 elements in order to run: \n+1. Config file - like the example above \n+2. Model to be quantized \n+3. Calibration dataloader \n+4. Evaluation function - a function that takes a model as an argument and returns the accuracy it achieves on a certain evaluation dataset. \n+\n+### Quantizing ResNet\n+\n+The [quantization](https://mxnet.apache.org/versions/master/api/python/docs/tutorials/performance/backend/dnnl/dnnl_quantization.html#Quantization) sections described how to quantize ResNet using the native MXNet quantization. This example shows how we can achieve the similar results (with the auto-tuning) using INC.\n+\n+1. Get the model\n+\n+```python\n+import logging\n+import mxnet as mx\n+from mxnet.gluon.model_zoo import vision\n+\n+logging.basicConfig()\n+logger = logging.getLogger('logger')\n+logger.setLevel(logging.INFO)\n+\n+batch_shape = (1, 3, 224, 224)\n+resnet18 = vision.resnet18_v1(pretrained=True)\n+```\n+\n+2. Prepare the dataset:\n+\n+```python\n+mx.test_utils.download('http://data.mxnet.io/data/val_256_q90.rec', 'data/val_256_q90.rec')\n+\n+batch_size = 16\n+mean_std = {'mean_r': 123.68, 'mean_g': 116.779, 'mean_b': 103.939,\n+ 'std_r': 58.393, 'std_g': 57.12, 'std_b': 57.375}\n+\n+data = mx.io.ImageRecordIter(path_imgrec='data/val_256_q90.rec',\n+ batch_size=batch_size,\n+ data_shape=batch_shape[1:],\n+ rand_crop=False,\n+ rand_mirror=False,\n+ shuffle=False,\n+ **mean_std)\n+data.batch_size = batch_size\n+```\n+\n+3. Prepare the evaluation function:\n+\n+```python\n+eval_samples = batch_size*10\n+\n+def eval_func(model):\n+ data.reset()\n+ metric = mx.metric.Accuracy()\n+ for i, batch in enumerate(data):\n+ if i * batch_size >= eval_samples:\n+ break\n+ x = batch.data[0].as_in_context(mx.cpu())\n+ label = batch.label[0].as_in_context(mx.cpu())\n+ outputs = model.forward(x)\n+ metric.update(label, outputs)\n+ return metric.get()[1]\n+```\n+\n+4. Run Intel® Neural Compressor:\n+\n+```python\n+from neural_compressor.experimental import Quantization\n+quantizer = Quantization(\"./cnn.yaml\")\n+quantizer.model = resnet18\n+quantizer.calib_dataloader = data\n+quantizer.eval_func = eval_func\n+qnet = quantizer.fit().model\n+```\n+\n+Since this model already achieves good accuracy using native quantization (less than 1% accuracy drop), for the given configuration file, INC will end on the first configuration, quantizing all layers using `naive` calibration mode for each. To see the true potential of INC, we need a model which suffers from a larger accuracy drop after quantization.\n+\n+### Quantizing ResNet50v2\n+\n+This example shows how to use INC to quantize ResNet50 v2. In this case, the native MXNet quantization introduce a huge accuracy drop (70% using `naive` calibration mode) and INC allows automatically find better solution.\n+\n+This is the (TODO link to INC configuration file) for this example: \n+```yaml\n+version: 1.0\n+\n+model:\n+ name: resnet50_v2\n+ framework: mxnet\n+\n+quantization:\n+ calibration:\n+ sampling_size: 192 # number of samples for calibration\n+\n+tuning:\n+ strategy:\n+ name: mse\n+ accuracy_criterion:\n+ relative: 0.015\n+ exit_policy:\n+ timeout: 0\n+ max_trials: 500\n+ random_seed: 9527\n+```\n+\n+It could be used with script below \n+(TODO link to resnet_mse.py)\n+to find operator which mostly influence accuracy drops and disable it from quantization. \n+You can find description of MSE strategy \n+[here](https://github.com/intel/neural-compressor/blob/master/docs/tuning_strategies.md#user-content-mse).\n+\n+```python\n+import mxnet as mx\n+from mxnet.gluon.model_zoo.vision import resnet50_v2\n+from mxnet.gluon.data.vision import transforms\n+from mxnet.contrib.quantization import quantize_net\n+\n+# Preparing input data\n+rgb_mean = (0.485, 0.456, 0.406)\n+rgb_std = (0.229, 0.224, 0.225)\n+batch_size = 64\n+num_calib_batches = 9\n+# set below proper path to ImageNet data set\n+dataset = mx.gluon.data.vision.ImageRecordDataset('../imagenet/rec/val.rec')\n+# Tuning in INC on whole data set takes too long time so we take only part of the whole data set\n+# as representative part of it:\n+dataset = dataset.take(num_calib_batches * batch_size)\n+transformer = transforms.Compose([transforms.Resize(256),\n+ transforms.CenterCrop(224),\n+ transforms.ToTensor(),\n+ transforms.Normalize(mean=rgb_mean, std=rgb_std)])\n+# Note: as input data are used many times during tuning it is better to prepared data earlier,\n+# so lazy parameter for transform_first is set to False", + "comment_created_at": "2022-08-25T11:55:17+00:00", + "comment_author": "bartekkuncer", + "comment_body": "```suggestion\r\n# Note: as input data is used many times during tuning, it is better to have it prepared earlier.\r\n# Therefore, lazy parameter for transform_first is set to False.\r\n```", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "796512474", + "pr_number": 20856, + "pr_file": "docs/python_docs/python/tutorials/performance/backend/dnnl/dnnl_quantization.md", + "created_at": "2022-02-01T11:42:30+00:00", + "commented_code": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n## Introduction\n\nAfter successful model building and achieving desired accuracy on the test data, often the next step is to optimize inference to deploy the model to production. One of the key features of usable model is to have as small latency as possible to be able to provide services to large number of customers at the same time. In addition to customer satisfaction, with well optimized model, hardware load is reduced which also reduces energy costs needed to perform inference.\n\nTwo main types of software optimizations can be characerized as:\n- memory-bound optimizations - main objective of these optimizations is to reduce memory operations (reads and writes) - it is done by e.g. chaining sequence of operations which can be performed one after another immediately (example: ReLU activation)", + "repo_full_name": "apache/mxnet", + "discussion_comments": [ + { + "comment_id": "796512474", + "repo_full_name": "apache/mxnet", + "pr_number": 20856, + "pr_file": "docs/python_docs/python/tutorials/performance/backend/dnnl/dnnl_quantization.md", + "discussion_id": "796512474", + "commented_code": "@@ -0,0 +1,304 @@\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+## Introduction\n+\n+After successful model building and achieving desired accuracy on the test data, often the next step is to optimize inference to deploy the model to production. One of the key features of usable model is to have as small latency as possible to be able to provide services to large number of customers at the same time. In addition to customer satisfaction, with well optimized model, hardware load is reduced which also reduces energy costs needed to perform inference.\n+\n+Two main types of software optimizations can be characerized as:\n+- memory-bound optimizations - main objective of these optimizations is to reduce memory operations (reads and writes) - it is done by e.g. chaining sequence of operations which can be performed one after another immediately (example: ReLU activation)", + "comment_created_at": "2022-02-01T11:42:30+00:00", + "comment_author": "bartekkuncer", + "comment_body": "```suggestion\r\n- memory-bound optimizations - main objective of these optimizations is to reduce memory operations (reads and writes) - it is done by e.g. chaining operations which can be performed one after another immediately, where input of every subsequent operation is the output of the previous one (example: ReLU activation after convolution)\r\n```", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "796567112", + "pr_number": 20856, + "pr_file": "docs/python_docs/python/tutorials/performance/backend/dnnl/dnnl_quantization.md", + "created_at": "2022-02-01T12:55:09+00:00", + "commented_code": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n## Introduction\n\nAfter successful model building and achieving desired accuracy on the test data, often the next step is to optimize inference to deploy the model to production. One of the key features of usable model is to have as small latency as possible to be able to provide services to large number of customers at the same time. In addition to customer satisfaction, with well optimized model, hardware load is reduced which also reduces energy costs needed to perform inference.\n\nTwo main types of software optimizations can be characerized as:\n- memory-bound optimizations - main objective of these optimizations is to reduce memory operations (reads and writes) - it is done by e.g. chaining sequence of operations which can be performed one after another immediately (example: ReLU activation)\n- compute-bound optimizations - these optimizations are mainly done on operations which require large number of CPU cycles to complete, like FullyConnected and Convolution - one of the methods to speedup compute-bound operations is to lower computation precision - this type of optimization is called quantization\n\nIn version 2.0 of the Apache MXNet (incubating) GluonAPI2.0 replaced Symbolic API known from versions 1.x, thus there are some differences between API to perform graph fusion and quantization.\n\n## Operator Fusion\n\nModels are often represented as directed graph of operations (represented by nodes) and data flow (representad as edges). This way of visualizing helps a lot when searching for common patterns in whole model which can be optimized by fusion. Example:\n![base_model](https://github.com/dmlc/web-data/blob/main/mxnet/tutorials/onednn/quantization_2_0/sample_net.png?raw=true)\n\n\nThe simplest way to explain what fusion is and how it works is to present an example. On the image above is shown a sequence of popular operations taken from ResNet architecture. This type of architecture is built with many similar block called residual blocks. Some possible fusion patterns are:\n\n- Conv2D + BatchNorm => Fusing BatchNorm with Convolution can be performed by modifing weights and bias of Convolution - this way BatchNorm is completely contained within Convolution which makes BatchNorm zero time operation. Only cost of fusing is time needed to prepare weights and bias in Convolution based on BatchNorm parameters.\n- Conv2D + ReLU => this type of fusion is very popular also with other layers (e.g. FullyConnected + Activation). It is very simple idea where before writing data to output, activation is performed on that data. Main benefit of this fusion is that, there is no need to read and write back data in other layer only to perform simple activation function. \n- Conv2D + Add => even simpler idea than the previous one - instead of overwriting output memory, results are added to the output memory. In the simplest terms: `out_mem = conv_result` is replaced by `out_mem += conv_result`", + "repo_full_name": "apache/mxnet", + "discussion_comments": [ + { + "comment_id": "796567112", + "repo_full_name": "apache/mxnet", + "pr_number": 20856, + "pr_file": "docs/python_docs/python/tutorials/performance/backend/dnnl/dnnl_quantization.md", + "discussion_id": "796567112", + "commented_code": "@@ -0,0 +1,304 @@\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+## Introduction\n+\n+After successful model building and achieving desired accuracy on the test data, often the next step is to optimize inference to deploy the model to production. One of the key features of usable model is to have as small latency as possible to be able to provide services to large number of customers at the same time. In addition to customer satisfaction, with well optimized model, hardware load is reduced which also reduces energy costs needed to perform inference.\n+\n+Two main types of software optimizations can be characerized as:\n+- memory-bound optimizations - main objective of these optimizations is to reduce memory operations (reads and writes) - it is done by e.g. chaining sequence of operations which can be performed one after another immediately (example: ReLU activation)\n+- compute-bound optimizations - these optimizations are mainly done on operations which require large number of CPU cycles to complete, like FullyConnected and Convolution - one of the methods to speedup compute-bound operations is to lower computation precision - this type of optimization is called quantization\n+\n+In version 2.0 of the Apache MXNet (incubating) GluonAPI2.0 replaced Symbolic API known from versions 1.x, thus there are some differences between API to perform graph fusion and quantization.\n+\n+## Operator Fusion\n+\n+Models are often represented as directed graph of operations (represented by nodes) and data flow (representad as edges). This way of visualizing helps a lot when searching for common patterns in whole model which can be optimized by fusion. Example:\n+![base_model](https://github.com/dmlc/web-data/blob/main/mxnet/tutorials/onednn/quantization_2_0/sample_net.png?raw=true)\n+\n+\n+The simplest way to explain what fusion is and how it works is to present an example. On the image above is shown a sequence of popular operations taken from ResNet architecture. This type of architecture is built with many similar block called residual blocks. Some possible fusion patterns are:\n+\n+- Conv2D + BatchNorm => Fusing BatchNorm with Convolution can be performed by modifing weights and bias of Convolution - this way BatchNorm is completely contained within Convolution which makes BatchNorm zero time operation. Only cost of fusing is time needed to prepare weights and bias in Convolution based on BatchNorm parameters.\n+- Conv2D + ReLU => this type of fusion is very popular also with other layers (e.g. FullyConnected + Activation). It is very simple idea where before writing data to output, activation is performed on that data. Main benefit of this fusion is that, there is no need to read and write back data in other layer only to perform simple activation function. \n+- Conv2D + Add => even simpler idea than the previous one - instead of overwriting output memory, results are added to the output memory. In the simplest terms: `out_mem = conv_result` is replaced by `out_mem += conv_result`", + "comment_created_at": "2022-02-01T12:55:09+00:00", + "comment_author": "bartekkuncer", + "comment_body": "```suggestion\r\n- Conv2D + Add => even simpler idea than the previous ones - instead of overwriting output memory, results are added to the output memory. In the simplest terms: `out_mem = conv_result` is replaced by `out_mem += conv_result`.\r\n```", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "800439402", + "pr_number": 20856, + "pr_file": "docs/python_docs/python/tutorials/performance/backend/dnnl/dnnl_quantization.md", + "created_at": "2022-02-07T09:02:47+00:00", + "commented_code": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n## Introduction\n\nAfter successful model building and achieving desired accuracy on the test data, often the next step is to optimize inference to deploy the model to production. One of the key features of usable model is to have as small latency as possible to be able to provide services to large number of customers at the same time. In addition to customer satisfaction, with well optimized model, hardware load is reduced which also reduces energy costs needed to perform inference.\n\nTwo main types of software optimizations can be characerized as:\n- memory-bound optimizations - main objective of these optimizations is to reduce memory operations (reads and writes) - it is done by e.g. chaining operations which can be performed one after another immediately, where input of every subsequent operation is the output of the previous one (example: ReLU activation after convolution)", + "repo_full_name": "apache/mxnet", + "discussion_comments": [ + { + "comment_id": "800439402", + "repo_full_name": "apache/mxnet", + "pr_number": 20856, + "pr_file": "docs/python_docs/python/tutorials/performance/backend/dnnl/dnnl_quantization.md", + "discussion_id": "800439402", + "commented_code": "@@ -0,0 +1,304 @@\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+## Introduction\n+\n+After successful model building and achieving desired accuracy on the test data, often the next step is to optimize inference to deploy the model to production. One of the key features of usable model is to have as small latency as possible to be able to provide services to large number of customers at the same time. In addition to customer satisfaction, with well optimized model, hardware load is reduced which also reduces energy costs needed to perform inference.\n+\n+Two main types of software optimizations can be characerized as:\n+- memory-bound optimizations - main objective of these optimizations is to reduce memory operations (reads and writes) - it is done by e.g. chaining operations which can be performed one after another immediately, where input of every subsequent operation is the output of the previous one (example: ReLU activation after convolution)", + "comment_created_at": "2022-02-07T09:02:47+00:00", + "comment_author": "bartekkuncer", + "comment_body": "```suggestion\r\n- memory-bound optimizations - main objective of these optimizations is to reduce the amount of memory operations (reads and writes) - it is done by e.g. chaining operations which can be performed one after another immediately, where input of every subsequent operation is the output of the previous one (example: ReLU activation after convolution),\r\n```", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "804494315", + "pr_number": 20856, + "pr_file": "docs/python_docs/python/tutorials/performance/backend/dnnl/dnnl_quantization.md", + "created_at": "2022-02-11T09:47:22+00:00", + "commented_code": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n## Introduction\n\nAfter successful model building and achieving desired accuracy on the test data, often the next step is to optimize inference to deploy the model to production. One of the key features of usable model is to have as small latency as possible to be able to provide services to large number of customers simultaneously. In addition to customer satisfaction, with well optimized model, hardware load is reduced which also reduces energy costs needed to perform inference.\n\nTwo main types of software optimizations can be characerized as:\n- memory-bound optimizations - main objective of these optimizations is to reduce the amount of memory operations (reads and writes) - it is done by e.g. chaining operations which can be performed one after another immediately, where input of every subsequent operation is the output of the previous one (example: ReLU activation after convolution),\n- compute-bound optimizations - these optimizations are mainly made on operations which require large number of CPU cycles to complete, like FullyConnected and Convolution. One of the methods to speedup compute-bound operations is to lower computation precision - this type of optimization is called quantization.\n\nIn version 2.0 of the Apache MXNet (incubating) GluonAPI2.0 replaced Symbolic API known from versions 1.x, thus there are some differences between API to perform graph fusion and quantization.\n\n## Operator Fusion\n\nModels are often represented as a directed graph of operations (represented by nodes) and data flow (represented as edges). This way of visualizing helps a lot when searching for common patterns in whole model which can be optimized by fusion. Example:\n![base_model](https://github.com/dmlc/web-data/blob/main/mxnet/tutorials/onednn/quantization_2_0/sample_net.png?raw=true)\n\n\nThe simplest way to explain what fusion is and how it works is to present an example. Image above depicts a sequence of popular operations taken from ResNet architecture. This type of architecture is built with many similar blocks called residual blocks. Some possible fusion patterns are:\n\n- Conv2D + BatchNorm => Fusing BatchNorm with Convolution can be performed by modifing weights and bias of Convolution - this way BatchNorm is completely contained within Convolution which makes BatchNorm zero time operation. Only cost of fusing is time needed to prepare weights and bias in Convolution based on BatchNorm parameters.\n- Conv2D + ReLU => this type of fusion is very popular also with other layers (e.g. FullyConnected + Activation). It is very simple idea where before writing data to output, activation is performed on that data. Main benefit of this fusion is that, there is no need to read and write back data in other layer only to perform simple activation function. \n- Conv2D + Add => even simpler idea than the previous ones - instead of overwriting output memory, results are added to the output memory. In the simplest terms: `out_mem = conv_result` is replaced by `out_mem += conv_result`.", + "repo_full_name": "apache/mxnet", + "discussion_comments": [ + { + "comment_id": "804494315", + "repo_full_name": "apache/mxnet", + "pr_number": 20856, + "pr_file": "docs/python_docs/python/tutorials/performance/backend/dnnl/dnnl_quantization.md", + "discussion_id": "804494315", + "commented_code": "@@ -0,0 +1,304 @@\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+## Introduction\n+\n+After successful model building and achieving desired accuracy on the test data, often the next step is to optimize inference to deploy the model to production. One of the key features of usable model is to have as small latency as possible to be able to provide services to large number of customers simultaneously. In addition to customer satisfaction, with well optimized model, hardware load is reduced which also reduces energy costs needed to perform inference.\n+\n+Two main types of software optimizations can be characerized as:\n+- memory-bound optimizations - main objective of these optimizations is to reduce the amount of memory operations (reads and writes) - it is done by e.g. chaining operations which can be performed one after another immediately, where input of every subsequent operation is the output of the previous one (example: ReLU activation after convolution),\n+- compute-bound optimizations - these optimizations are mainly made on operations which require large number of CPU cycles to complete, like FullyConnected and Convolution. One of the methods to speedup compute-bound operations is to lower computation precision - this type of optimization is called quantization.\n+\n+In version 2.0 of the Apache MXNet (incubating) GluonAPI2.0 replaced Symbolic API known from versions 1.x, thus there are some differences between API to perform graph fusion and quantization.\n+\n+## Operator Fusion\n+\n+Models are often represented as a directed graph of operations (represented by nodes) and data flow (represented as edges). This way of visualizing helps a lot when searching for common patterns in whole model which can be optimized by fusion. Example:\n+![base_model](https://github.com/dmlc/web-data/blob/main/mxnet/tutorials/onednn/quantization_2_0/sample_net.png?raw=true)\n+\n+\n+The simplest way to explain what fusion is and how it works is to present an example. Image above depicts a sequence of popular operations taken from ResNet architecture. This type of architecture is built with many similar blocks called residual blocks. Some possible fusion patterns are:\n+\n+- Conv2D + BatchNorm => Fusing BatchNorm with Convolution can be performed by modifing weights and bias of Convolution - this way BatchNorm is completely contained within Convolution which makes BatchNorm zero time operation. Only cost of fusing is time needed to prepare weights and bias in Convolution based on BatchNorm parameters.\n+- Conv2D + ReLU => this type of fusion is very popular also with other layers (e.g. FullyConnected + Activation). It is very simple idea where before writing data to output, activation is performed on that data. Main benefit of this fusion is that, there is no need to read and write back data in other layer only to perform simple activation function. \n+- Conv2D + Add => even simpler idea than the previous ones - instead of overwriting output memory, results are added to the output memory. In the simplest terms: `out_mem = conv_result` is replaced by `out_mem += conv_result`.", + "comment_created_at": "2022-02-11T09:47:22+00:00", + "comment_author": "bartekkuncer", + "comment_body": "```suggestion\r\n- Conv2D + Add => even simpler idea than the previous ones - instead of overwriting the output memory, results are added to it. In the simplest terms: `out_mem = conv_result` is replaced by `out_mem += conv_result`.\r\n```", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/mxnet-explain-optimization-mechanisms.md b/_reviewers/mxnet-explain-optimization-mechanisms.md index e8d8e45..d254455 100644 --- a/_reviewers/mxnet-explain-optimization-mechanisms.md +++ b/_reviewers/mxnet-explain-optimization-mechanisms.md @@ -31,117 +31,3 @@ out_mem += conv_result # Adds to existing data without separate read For data-intensive operations like quantization tuning, include preparation best practices: "As input data is used many times during tuning, it is better to have it prepared earlier." This reduces redundant processing and improves overall performance. When explaining optimization patterns, clearly describe the benefit: "chaining operations which can be performed one after another immediately, where input of every subsequent operation is the output of the previous one" provides specific insight into why the optimization helps. Precise explanations help developers implement optimizations correctly and make informed decisions about performance trade-offs. - - -[ - { - "discussion_id": "954872677", - "pr_number": 21127, - "pr_file": "docs/python_docs/python/tutorials/performance/backend/dnnl/dnnl_quantization_inc.md", - "created_at": "2022-08-25T11:55:17+00:00", - "commented_code": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n# Improving accuracy with Intel\u00ae Neural Compressor\n\nThe accuracy of a model can decrease as a result of quantization. When the accuracy drop is significant, we can try to manually find a better quantization configuration (exclude some layers, try different calibration methods, etc.), but for bigger models this might prove to be a difficult and time consuming task. [Intel\u00ae Neural Compressor](https://github.com/intel/neural-compressor) (INC) tries to automate this process using several tuning heuristics, which aim to find the quantization configuration that satisfies the specified accuracy requirement.\n\n**NOTE:**\n\nMost tuning strategies will try different configurations on an evaluation dataset in order to find out how each layer affects the accuracy of the model. This means that for larger models, it may take a long time to find a solution (as the tuning space is usually larger and the evaluation itself takes longer).\n\n## Installation and Prerequisites\n\n- Install MXNet with oneDNN enabled as described in the [Get started](https://mxnet.apache.org/versions/master/get_started?platform=linux&language=python&processor=cpu&environ=pip&). (Until the 2.0 release you can use the nightly build version: `pip install --pre mxnet -f https://dist.mxnet.io/python`)\n\n- Install Intel\u00ae Neural Compressor:\n\n Use one of the commands below to install INC (supported python versions are: 3.6, 3.7, 3.8, 3.9):\n\n ```bash\n # install stable version from pip\n pip install neural-compressor\n\n # install nightly version from pip\n pip install -i https://test.pypi.org/simple/ neural-compressor\n\n # install stable version from conda\n conda install neural-compressor -c conda-forge -c intel\n ```\n If you come into trouble with dependencies on `cv2` library you can run: `apt-get update && apt-get install -y python3-opencv`\n\n## Configuration file\n\nQuantization tuning process can be customized in the yaml configuration file. Below is a simple example:\n\n```yaml\n# cnn.yaml\n\nversion: 1.0\n\nmodel:\n name: cnn\n framework: mxnet\n\nquantization:\n calibration:\n sampling_size: 160 # number of samples for calibration\n\ntuning:\n strategy:\n name: basic\n accuracy_criterion:\n relative: 0.01\n exit_policy:\n timeout: 0\n random_seed: 9527\n```\n\nWe are using the `basic` strategy, but you could also try out different ones. [Here](https://github.com/intel/neural-compressor/blob/master/docs/tuning_strategies.md) you can find a list of strategies available in INC and details of how they work. You can also add your own strategy if the existing ones do not suit your needs.\n\nSince the value of `timeout` is 0, INC will run until it finds a configuration that satisfies the accuracy criterion and then exit. Depending on the strategy this may not be ideal, as sometimes it would be better to further explore the tuning space to find a superior configuration both in terms of accuracy and speed. To achieve this, we can set a specific `timeout` value, which will tell INC how long (in seconds) it should run.\n\nFor more information about the configuration file, see the [template](https://github.com/intel/neural-compressor/blob/master/neural_compressor/template/ptq.yaml) from the official INC repo. Keep in mind that only the `post training quantization` is currently supported for MXNet.\n\n## Model quantization and tuning\n\nIn general, Intel\u00ae Neural Compressor requires 4 elements in order to run: \n1. Config file - like the example above \n2. Model to be quantized \n3. Calibration dataloader \n4. Evaluation function - a function that takes a model as an argument and returns the accuracy it achieves on a certain evaluation dataset. \n\n### Quantizing ResNet\n\nThe [quantization](https://mxnet.apache.org/versions/master/api/python/docs/tutorials/performance/backend/dnnl/dnnl_quantization.html#Quantization) sections described how to quantize ResNet using the native MXNet quantization. This example shows how we can achieve the similar results (with the auto-tuning) using INC.\n\n1. Get the model\n\n```python\nimport logging\nimport mxnet as mx\nfrom mxnet.gluon.model_zoo import vision\n\nlogging.basicConfig()\nlogger = logging.getLogger('logger')\nlogger.setLevel(logging.INFO)\n\nbatch_shape = (1, 3, 224, 224)\nresnet18 = vision.resnet18_v1(pretrained=True)\n```\n\n2. Prepare the dataset:\n\n```python\nmx.test_utils.download('http://data.mxnet.io/data/val_256_q90.rec', 'data/val_256_q90.rec')\n\nbatch_size = 16\nmean_std = {'mean_r': 123.68, 'mean_g': 116.779, 'mean_b': 103.939,\n 'std_r': 58.393, 'std_g': 57.12, 'std_b': 57.375}\n\ndata = mx.io.ImageRecordIter(path_imgrec='data/val_256_q90.rec',\n batch_size=batch_size,\n data_shape=batch_shape[1:],\n rand_crop=False,\n rand_mirror=False,\n shuffle=False,\n **mean_std)\ndata.batch_size = batch_size\n```\n\n3. Prepare the evaluation function:\n\n```python\neval_samples = batch_size*10\n\ndef eval_func(model):\n data.reset()\n metric = mx.metric.Accuracy()\n for i, batch in enumerate(data):\n if i * batch_size >= eval_samples:\n break\n x = batch.data[0].as_in_context(mx.cpu())\n label = batch.label[0].as_in_context(mx.cpu())\n outputs = model.forward(x)\n metric.update(label, outputs)\n return metric.get()[1]\n```\n\n4. Run Intel\u00ae Neural Compressor:\n\n```python\nfrom neural_compressor.experimental import Quantization\nquantizer = Quantization(\"./cnn.yaml\")\nquantizer.model = resnet18\nquantizer.calib_dataloader = data\nquantizer.eval_func = eval_func\nqnet = quantizer.fit().model\n```\n\nSince this model already achieves good accuracy using native quantization (less than 1% accuracy drop), for the given configuration file, INC will end on the first configuration, quantizing all layers using `naive` calibration mode for each. To see the true potential of INC, we need a model which suffers from a larger accuracy drop after quantization.\n\n### Quantizing ResNet50v2\n\nThis example shows how to use INC to quantize ResNet50 v2. In this case, the native MXNet quantization introduce a huge accuracy drop (70% using `naive` calibration mode) and INC allows automatically find better solution.\n\nThis is the (TODO link to INC configuration file) for this example: \n```yaml\nversion: 1.0\n\nmodel:\n name: resnet50_v2\n framework: mxnet\n\nquantization:\n calibration:\n sampling_size: 192 # number of samples for calibration\n\ntuning:\n strategy:\n name: mse\n accuracy_criterion:\n relative: 0.015\n exit_policy:\n timeout: 0\n max_trials: 500\n random_seed: 9527\n```\n\nIt could be used with script below \n(TODO link to resnet_mse.py)\nto find operator which mostly influence accuracy drops and disable it from quantization. \nYou can find description of MSE strategy \n[here](https://github.com/intel/neural-compressor/blob/master/docs/tuning_strategies.md#user-content-mse).\n\n```python\nimport mxnet as mx\nfrom mxnet.gluon.model_zoo.vision import resnet50_v2\nfrom mxnet.gluon.data.vision import transforms\nfrom mxnet.contrib.quantization import quantize_net\n\n# Preparing input data\nrgb_mean = (0.485, 0.456, 0.406)\nrgb_std = (0.229, 0.224, 0.225)\nbatch_size = 64\nnum_calib_batches = 9\n# set below proper path to ImageNet data set\ndataset = mx.gluon.data.vision.ImageRecordDataset('../imagenet/rec/val.rec')\n# Tuning in INC on whole data set takes too long time so we take only part of the whole data set\n# as representative part of it:\ndataset = dataset.take(num_calib_batches * batch_size)\ntransformer = transforms.Compose([transforms.Resize(256),\n transforms.CenterCrop(224),\n transforms.ToTensor(),\n transforms.Normalize(mean=rgb_mean, std=rgb_std)])\n# Note: as input data are used many times during tuning it is better to prepared data earlier,\n# so lazy parameter for transform_first is set to False", - "repo_full_name": "apache/mxnet", - "discussion_comments": [ - { - "comment_id": "954872677", - "repo_full_name": "apache/mxnet", - "pr_number": 21127, - "pr_file": "docs/python_docs/python/tutorials/performance/backend/dnnl/dnnl_quantization_inc.md", - "discussion_id": "954872677", - "commented_code": "@@ -0,0 +1,290 @@\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+# Improving accuracy with Intel\u00ae Neural Compressor\n+\n+The accuracy of a model can decrease as a result of quantization. When the accuracy drop is significant, we can try to manually find a better quantization configuration (exclude some layers, try different calibration methods, etc.), but for bigger models this might prove to be a difficult and time consuming task. [Intel\u00ae Neural Compressor](https://github.com/intel/neural-compressor) (INC) tries to automate this process using several tuning heuristics, which aim to find the quantization configuration that satisfies the specified accuracy requirement.\n+\n+**NOTE:**\n+\n+Most tuning strategies will try different configurations on an evaluation dataset in order to find out how each layer affects the accuracy of the model. This means that for larger models, it may take a long time to find a solution (as the tuning space is usually larger and the evaluation itself takes longer).\n+\n+## Installation and Prerequisites\n+\n+- Install MXNet with oneDNN enabled as described in the [Get started](https://mxnet.apache.org/versions/master/get_started?platform=linux&language=python&processor=cpu&environ=pip&). (Until the 2.0 release you can use the nightly build version: `pip install --pre mxnet -f https://dist.mxnet.io/python`)\n+\n+- Install Intel\u00ae Neural Compressor:\n+\n+ Use one of the commands below to install INC (supported python versions are: 3.6, 3.7, 3.8, 3.9):\n+\n+ ```bash\n+ # install stable version from pip\n+ pip install neural-compressor\n+\n+ # install nightly version from pip\n+ pip install -i https://test.pypi.org/simple/ neural-compressor\n+\n+ # install stable version from conda\n+ conda install neural-compressor -c conda-forge -c intel\n+ ```\n+ If you come into trouble with dependencies on `cv2` library you can run: `apt-get update && apt-get install -y python3-opencv`\n+\n+## Configuration file\n+\n+Quantization tuning process can be customized in the yaml configuration file. Below is a simple example:\n+\n+```yaml\n+# cnn.yaml\n+\n+version: 1.0\n+\n+model:\n+ name: cnn\n+ framework: mxnet\n+\n+quantization:\n+ calibration:\n+ sampling_size: 160 # number of samples for calibration\n+\n+tuning:\n+ strategy:\n+ name: basic\n+ accuracy_criterion:\n+ relative: 0.01\n+ exit_policy:\n+ timeout: 0\n+ random_seed: 9527\n+```\n+\n+We are using the `basic` strategy, but you could also try out different ones. [Here](https://github.com/intel/neural-compressor/blob/master/docs/tuning_strategies.md) you can find a list of strategies available in INC and details of how they work. You can also add your own strategy if the existing ones do not suit your needs.\n+\n+Since the value of `timeout` is 0, INC will run until it finds a configuration that satisfies the accuracy criterion and then exit. Depending on the strategy this may not be ideal, as sometimes it would be better to further explore the tuning space to find a superior configuration both in terms of accuracy and speed. To achieve this, we can set a specific `timeout` value, which will tell INC how long (in seconds) it should run.\n+\n+For more information about the configuration file, see the [template](https://github.com/intel/neural-compressor/blob/master/neural_compressor/template/ptq.yaml) from the official INC repo. Keep in mind that only the `post training quantization` is currently supported for MXNet.\n+\n+## Model quantization and tuning\n+\n+In general, Intel\u00ae Neural Compressor requires 4 elements in order to run: \n+1. Config file - like the example above \n+2. Model to be quantized \n+3. Calibration dataloader \n+4. Evaluation function - a function that takes a model as an argument and returns the accuracy it achieves on a certain evaluation dataset. \n+\n+### Quantizing ResNet\n+\n+The [quantization](https://mxnet.apache.org/versions/master/api/python/docs/tutorials/performance/backend/dnnl/dnnl_quantization.html#Quantization) sections described how to quantize ResNet using the native MXNet quantization. This example shows how we can achieve the similar results (with the auto-tuning) using INC.\n+\n+1. Get the model\n+\n+```python\n+import logging\n+import mxnet as mx\n+from mxnet.gluon.model_zoo import vision\n+\n+logging.basicConfig()\n+logger = logging.getLogger('logger')\n+logger.setLevel(logging.INFO)\n+\n+batch_shape = (1, 3, 224, 224)\n+resnet18 = vision.resnet18_v1(pretrained=True)\n+```\n+\n+2. Prepare the dataset:\n+\n+```python\n+mx.test_utils.download('http://data.mxnet.io/data/val_256_q90.rec', 'data/val_256_q90.rec')\n+\n+batch_size = 16\n+mean_std = {'mean_r': 123.68, 'mean_g': 116.779, 'mean_b': 103.939,\n+ 'std_r': 58.393, 'std_g': 57.12, 'std_b': 57.375}\n+\n+data = mx.io.ImageRecordIter(path_imgrec='data/val_256_q90.rec',\n+ batch_size=batch_size,\n+ data_shape=batch_shape[1:],\n+ rand_crop=False,\n+ rand_mirror=False,\n+ shuffle=False,\n+ **mean_std)\n+data.batch_size = batch_size\n+```\n+\n+3. Prepare the evaluation function:\n+\n+```python\n+eval_samples = batch_size*10\n+\n+def eval_func(model):\n+ data.reset()\n+ metric = mx.metric.Accuracy()\n+ for i, batch in enumerate(data):\n+ if i * batch_size >= eval_samples:\n+ break\n+ x = batch.data[0].as_in_context(mx.cpu())\n+ label = batch.label[0].as_in_context(mx.cpu())\n+ outputs = model.forward(x)\n+ metric.update(label, outputs)\n+ return metric.get()[1]\n+```\n+\n+4. Run Intel\u00ae Neural Compressor:\n+\n+```python\n+from neural_compressor.experimental import Quantization\n+quantizer = Quantization(\"./cnn.yaml\")\n+quantizer.model = resnet18\n+quantizer.calib_dataloader = data\n+quantizer.eval_func = eval_func\n+qnet = quantizer.fit().model\n+```\n+\n+Since this model already achieves good accuracy using native quantization (less than 1% accuracy drop), for the given configuration file, INC will end on the first configuration, quantizing all layers using `naive` calibration mode for each. To see the true potential of INC, we need a model which suffers from a larger accuracy drop after quantization.\n+\n+### Quantizing ResNet50v2\n+\n+This example shows how to use INC to quantize ResNet50 v2. In this case, the native MXNet quantization introduce a huge accuracy drop (70% using `naive` calibration mode) and INC allows automatically find better solution.\n+\n+This is the (TODO link to INC configuration file) for this example: \n+```yaml\n+version: 1.0\n+\n+model:\n+ name: resnet50_v2\n+ framework: mxnet\n+\n+quantization:\n+ calibration:\n+ sampling_size: 192 # number of samples for calibration\n+\n+tuning:\n+ strategy:\n+ name: mse\n+ accuracy_criterion:\n+ relative: 0.015\n+ exit_policy:\n+ timeout: 0\n+ max_trials: 500\n+ random_seed: 9527\n+```\n+\n+It could be used with script below \n+(TODO link to resnet_mse.py)\n+to find operator which mostly influence accuracy drops and disable it from quantization. \n+You can find description of MSE strategy \n+[here](https://github.com/intel/neural-compressor/blob/master/docs/tuning_strategies.md#user-content-mse).\n+\n+```python\n+import mxnet as mx\n+from mxnet.gluon.model_zoo.vision import resnet50_v2\n+from mxnet.gluon.data.vision import transforms\n+from mxnet.contrib.quantization import quantize_net\n+\n+# Preparing input data\n+rgb_mean = (0.485, 0.456, 0.406)\n+rgb_std = (0.229, 0.224, 0.225)\n+batch_size = 64\n+num_calib_batches = 9\n+# set below proper path to ImageNet data set\n+dataset = mx.gluon.data.vision.ImageRecordDataset('../imagenet/rec/val.rec')\n+# Tuning in INC on whole data set takes too long time so we take only part of the whole data set\n+# as representative part of it:\n+dataset = dataset.take(num_calib_batches * batch_size)\n+transformer = transforms.Compose([transforms.Resize(256),\n+ transforms.CenterCrop(224),\n+ transforms.ToTensor(),\n+ transforms.Normalize(mean=rgb_mean, std=rgb_std)])\n+# Note: as input data are used many times during tuning it is better to prepared data earlier,\n+# so lazy parameter for transform_first is set to False", - "comment_created_at": "2022-08-25T11:55:17+00:00", - "comment_author": "bartekkuncer", - "comment_body": "```suggestion\r\n# Note: as input data is used many times during tuning, it is better to have it prepared earlier.\r\n# Therefore, lazy parameter for transform_first is set to False.\r\n```", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "796512474", - "pr_number": 20856, - "pr_file": "docs/python_docs/python/tutorials/performance/backend/dnnl/dnnl_quantization.md", - "created_at": "2022-02-01T11:42:30+00:00", - "commented_code": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n## Introduction\n\nAfter successful model building and achieving desired accuracy on the test data, often the next step is to optimize inference to deploy the model to production. One of the key features of usable model is to have as small latency as possible to be able to provide services to large number of customers at the same time. In addition to customer satisfaction, with well optimized model, hardware load is reduced which also reduces energy costs needed to perform inference.\n\nTwo main types of software optimizations can be characerized as:\n- memory-bound optimizations - main objective of these optimizations is to reduce memory operations (reads and writes) - it is done by e.g. chaining sequence of operations which can be performed one after another immediately (example: ReLU activation)", - "repo_full_name": "apache/mxnet", - "discussion_comments": [ - { - "comment_id": "796512474", - "repo_full_name": "apache/mxnet", - "pr_number": 20856, - "pr_file": "docs/python_docs/python/tutorials/performance/backend/dnnl/dnnl_quantization.md", - "discussion_id": "796512474", - "commented_code": "@@ -0,0 +1,304 @@\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+## Introduction\n+\n+After successful model building and achieving desired accuracy on the test data, often the next step is to optimize inference to deploy the model to production. One of the key features of usable model is to have as small latency as possible to be able to provide services to large number of customers at the same time. In addition to customer satisfaction, with well optimized model, hardware load is reduced which also reduces energy costs needed to perform inference.\n+\n+Two main types of software optimizations can be characerized as:\n+- memory-bound optimizations - main objective of these optimizations is to reduce memory operations (reads and writes) - it is done by e.g. chaining sequence of operations which can be performed one after another immediately (example: ReLU activation)", - "comment_created_at": "2022-02-01T11:42:30+00:00", - "comment_author": "bartekkuncer", - "comment_body": "```suggestion\r\n- memory-bound optimizations - main objective of these optimizations is to reduce memory operations (reads and writes) - it is done by e.g. chaining operations which can be performed one after another immediately, where input of every subsequent operation is the output of the previous one (example: ReLU activation after convolution)\r\n```", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "796567112", - "pr_number": 20856, - "pr_file": "docs/python_docs/python/tutorials/performance/backend/dnnl/dnnl_quantization.md", - "created_at": "2022-02-01T12:55:09+00:00", - "commented_code": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n## Introduction\n\nAfter successful model building and achieving desired accuracy on the test data, often the next step is to optimize inference to deploy the model to production. One of the key features of usable model is to have as small latency as possible to be able to provide services to large number of customers at the same time. In addition to customer satisfaction, with well optimized model, hardware load is reduced which also reduces energy costs needed to perform inference.\n\nTwo main types of software optimizations can be characerized as:\n- memory-bound optimizations - main objective of these optimizations is to reduce memory operations (reads and writes) - it is done by e.g. chaining sequence of operations which can be performed one after another immediately (example: ReLU activation)\n- compute-bound optimizations - these optimizations are mainly done on operations which require large number of CPU cycles to complete, like FullyConnected and Convolution - one of the methods to speedup compute-bound operations is to lower computation precision - this type of optimization is called quantization\n\nIn version 2.0 of the Apache MXNet (incubating) GluonAPI2.0 replaced Symbolic API known from versions 1.x, thus there are some differences between API to perform graph fusion and quantization.\n\n## Operator Fusion\n\nModels are often represented as directed graph of operations (represented by nodes) and data flow (representad as edges). This way of visualizing helps a lot when searching for common patterns in whole model which can be optimized by fusion. Example:\n![base_model](https://github.com/dmlc/web-data/blob/main/mxnet/tutorials/onednn/quantization_2_0/sample_net.png?raw=true)\n\n\nThe simplest way to explain what fusion is and how it works is to present an example. On the image above is shown a sequence of popular operations taken from ResNet architecture. This type of architecture is built with many similar block called residual blocks. Some possible fusion patterns are:\n\n- Conv2D + BatchNorm => Fusing BatchNorm with Convolution can be performed by modifing weights and bias of Convolution - this way BatchNorm is completely contained within Convolution which makes BatchNorm zero time operation. Only cost of fusing is time needed to prepare weights and bias in Convolution based on BatchNorm parameters.\n- Conv2D + ReLU => this type of fusion is very popular also with other layers (e.g. FullyConnected + Activation). It is very simple idea where before writing data to output, activation is performed on that data. Main benefit of this fusion is that, there is no need to read and write back data in other layer only to perform simple activation function. \n- Conv2D + Add => even simpler idea than the previous one - instead of overwriting output memory, results are added to the output memory. In the simplest terms: `out_mem = conv_result` is replaced by `out_mem += conv_result`", - "repo_full_name": "apache/mxnet", - "discussion_comments": [ - { - "comment_id": "796567112", - "repo_full_name": "apache/mxnet", - "pr_number": 20856, - "pr_file": "docs/python_docs/python/tutorials/performance/backend/dnnl/dnnl_quantization.md", - "discussion_id": "796567112", - "commented_code": "@@ -0,0 +1,304 @@\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+## Introduction\n+\n+After successful model building and achieving desired accuracy on the test data, often the next step is to optimize inference to deploy the model to production. One of the key features of usable model is to have as small latency as possible to be able to provide services to large number of customers at the same time. In addition to customer satisfaction, with well optimized model, hardware load is reduced which also reduces energy costs needed to perform inference.\n+\n+Two main types of software optimizations can be characerized as:\n+- memory-bound optimizations - main objective of these optimizations is to reduce memory operations (reads and writes) - it is done by e.g. chaining sequence of operations which can be performed one after another immediately (example: ReLU activation)\n+- compute-bound optimizations - these optimizations are mainly done on operations which require large number of CPU cycles to complete, like FullyConnected and Convolution - one of the methods to speedup compute-bound operations is to lower computation precision - this type of optimization is called quantization\n+\n+In version 2.0 of the Apache MXNet (incubating) GluonAPI2.0 replaced Symbolic API known from versions 1.x, thus there are some differences between API to perform graph fusion and quantization.\n+\n+## Operator Fusion\n+\n+Models are often represented as directed graph of operations (represented by nodes) and data flow (representad as edges). This way of visualizing helps a lot when searching for common patterns in whole model which can be optimized by fusion. Example:\n+![base_model](https://github.com/dmlc/web-data/blob/main/mxnet/tutorials/onednn/quantization_2_0/sample_net.png?raw=true)\n+\n+\n+The simplest way to explain what fusion is and how it works is to present an example. On the image above is shown a sequence of popular operations taken from ResNet architecture. This type of architecture is built with many similar block called residual blocks. Some possible fusion patterns are:\n+\n+- Conv2D + BatchNorm => Fusing BatchNorm with Convolution can be performed by modifing weights and bias of Convolution - this way BatchNorm is completely contained within Convolution which makes BatchNorm zero time operation. Only cost of fusing is time needed to prepare weights and bias in Convolution based on BatchNorm parameters.\n+- Conv2D + ReLU => this type of fusion is very popular also with other layers (e.g. FullyConnected + Activation). It is very simple idea where before writing data to output, activation is performed on that data. Main benefit of this fusion is that, there is no need to read and write back data in other layer only to perform simple activation function. \n+- Conv2D + Add => even simpler idea than the previous one - instead of overwriting output memory, results are added to the output memory. In the simplest terms: `out_mem = conv_result` is replaced by `out_mem += conv_result`", - "comment_created_at": "2022-02-01T12:55:09+00:00", - "comment_author": "bartekkuncer", - "comment_body": "```suggestion\r\n- Conv2D + Add => even simpler idea than the previous ones - instead of overwriting output memory, results are added to the output memory. In the simplest terms: `out_mem = conv_result` is replaced by `out_mem += conv_result`.\r\n```", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "800439402", - "pr_number": 20856, - "pr_file": "docs/python_docs/python/tutorials/performance/backend/dnnl/dnnl_quantization.md", - "created_at": "2022-02-07T09:02:47+00:00", - "commented_code": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n## Introduction\n\nAfter successful model building and achieving desired accuracy on the test data, often the next step is to optimize inference to deploy the model to production. One of the key features of usable model is to have as small latency as possible to be able to provide services to large number of customers at the same time. In addition to customer satisfaction, with well optimized model, hardware load is reduced which also reduces energy costs needed to perform inference.\n\nTwo main types of software optimizations can be characerized as:\n- memory-bound optimizations - main objective of these optimizations is to reduce memory operations (reads and writes) - it is done by e.g. chaining operations which can be performed one after another immediately, where input of every subsequent operation is the output of the previous one (example: ReLU activation after convolution)", - "repo_full_name": "apache/mxnet", - "discussion_comments": [ - { - "comment_id": "800439402", - "repo_full_name": "apache/mxnet", - "pr_number": 20856, - "pr_file": "docs/python_docs/python/tutorials/performance/backend/dnnl/dnnl_quantization.md", - "discussion_id": "800439402", - "commented_code": "@@ -0,0 +1,304 @@\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+## Introduction\n+\n+After successful model building and achieving desired accuracy on the test data, often the next step is to optimize inference to deploy the model to production. One of the key features of usable model is to have as small latency as possible to be able to provide services to large number of customers at the same time. In addition to customer satisfaction, with well optimized model, hardware load is reduced which also reduces energy costs needed to perform inference.\n+\n+Two main types of software optimizations can be characerized as:\n+- memory-bound optimizations - main objective of these optimizations is to reduce memory operations (reads and writes) - it is done by e.g. chaining operations which can be performed one after another immediately, where input of every subsequent operation is the output of the previous one (example: ReLU activation after convolution)", - "comment_created_at": "2022-02-07T09:02:47+00:00", - "comment_author": "bartekkuncer", - "comment_body": "```suggestion\r\n- memory-bound optimizations - main objective of these optimizations is to reduce the amount of memory operations (reads and writes) - it is done by e.g. chaining operations which can be performed one after another immediately, where input of every subsequent operation is the output of the previous one (example: ReLU activation after convolution),\r\n```", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "804494315", - "pr_number": 20856, - "pr_file": "docs/python_docs/python/tutorials/performance/backend/dnnl/dnnl_quantization.md", - "created_at": "2022-02-11T09:47:22+00:00", - "commented_code": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n## Introduction\n\nAfter successful model building and achieving desired accuracy on the test data, often the next step is to optimize inference to deploy the model to production. One of the key features of usable model is to have as small latency as possible to be able to provide services to large number of customers simultaneously. In addition to customer satisfaction, with well optimized model, hardware load is reduced which also reduces energy costs needed to perform inference.\n\nTwo main types of software optimizations can be characerized as:\n- memory-bound optimizations - main objective of these optimizations is to reduce the amount of memory operations (reads and writes) - it is done by e.g. chaining operations which can be performed one after another immediately, where input of every subsequent operation is the output of the previous one (example: ReLU activation after convolution),\n- compute-bound optimizations - these optimizations are mainly made on operations which require large number of CPU cycles to complete, like FullyConnected and Convolution. One of the methods to speedup compute-bound operations is to lower computation precision - this type of optimization is called quantization.\n\nIn version 2.0 of the Apache MXNet (incubating) GluonAPI2.0 replaced Symbolic API known from versions 1.x, thus there are some differences between API to perform graph fusion and quantization.\n\n## Operator Fusion\n\nModels are often represented as a directed graph of operations (represented by nodes) and data flow (represented as edges). This way of visualizing helps a lot when searching for common patterns in whole model which can be optimized by fusion. Example:\n![base_model](https://github.com/dmlc/web-data/blob/main/mxnet/tutorials/onednn/quantization_2_0/sample_net.png?raw=true)\n\n\nThe simplest way to explain what fusion is and how it works is to present an example. Image above depicts a sequence of popular operations taken from ResNet architecture. This type of architecture is built with many similar blocks called residual blocks. Some possible fusion patterns are:\n\n- Conv2D + BatchNorm => Fusing BatchNorm with Convolution can be performed by modifing weights and bias of Convolution - this way BatchNorm is completely contained within Convolution which makes BatchNorm zero time operation. Only cost of fusing is time needed to prepare weights and bias in Convolution based on BatchNorm parameters.\n- Conv2D + ReLU => this type of fusion is very popular also with other layers (e.g. FullyConnected + Activation). It is very simple idea where before writing data to output, activation is performed on that data. Main benefit of this fusion is that, there is no need to read and write back data in other layer only to perform simple activation function. \n- Conv2D + Add => even simpler idea than the previous ones - instead of overwriting output memory, results are added to the output memory. In the simplest terms: `out_mem = conv_result` is replaced by `out_mem += conv_result`.", - "repo_full_name": "apache/mxnet", - "discussion_comments": [ - { - "comment_id": "804494315", - "repo_full_name": "apache/mxnet", - "pr_number": 20856, - "pr_file": "docs/python_docs/python/tutorials/performance/backend/dnnl/dnnl_quantization.md", - "discussion_id": "804494315", - "commented_code": "@@ -0,0 +1,304 @@\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+## Introduction\n+\n+After successful model building and achieving desired accuracy on the test data, often the next step is to optimize inference to deploy the model to production. One of the key features of usable model is to have as small latency as possible to be able to provide services to large number of customers simultaneously. In addition to customer satisfaction, with well optimized model, hardware load is reduced which also reduces energy costs needed to perform inference.\n+\n+Two main types of software optimizations can be characerized as:\n+- memory-bound optimizations - main objective of these optimizations is to reduce the amount of memory operations (reads and writes) - it is done by e.g. chaining operations which can be performed one after another immediately, where input of every subsequent operation is the output of the previous one (example: ReLU activation after convolution),\n+- compute-bound optimizations - these optimizations are mainly made on operations which require large number of CPU cycles to complete, like FullyConnected and Convolution. One of the methods to speedup compute-bound operations is to lower computation precision - this type of optimization is called quantization.\n+\n+In version 2.0 of the Apache MXNet (incubating) GluonAPI2.0 replaced Symbolic API known from versions 1.x, thus there are some differences between API to perform graph fusion and quantization.\n+\n+## Operator Fusion\n+\n+Models are often represented as a directed graph of operations (represented by nodes) and data flow (represented as edges). This way of visualizing helps a lot when searching for common patterns in whole model which can be optimized by fusion. Example:\n+![base_model](https://github.com/dmlc/web-data/blob/main/mxnet/tutorials/onednn/quantization_2_0/sample_net.png?raw=true)\n+\n+\n+The simplest way to explain what fusion is and how it works is to present an example. Image above depicts a sequence of popular operations taken from ResNet architecture. This type of architecture is built with many similar blocks called residual blocks. Some possible fusion patterns are:\n+\n+- Conv2D + BatchNorm => Fusing BatchNorm with Convolution can be performed by modifing weights and bias of Convolution - this way BatchNorm is completely contained within Convolution which makes BatchNorm zero time operation. Only cost of fusing is time needed to prepare weights and bias in Convolution based on BatchNorm parameters.\n+- Conv2D + ReLU => this type of fusion is very popular also with other layers (e.g. FullyConnected + Activation). It is very simple idea where before writing data to output, activation is performed on that data. Main benefit of this fusion is that, there is no need to read and write back data in other layer only to perform simple activation function. \n+- Conv2D + Add => even simpler idea than the previous ones - instead of overwriting output memory, results are added to the output memory. In the simplest terms: `out_mem = conv_result` is replaced by `out_mem += conv_result`.", - "comment_created_at": "2022-02-11T09:47:22+00:00", - "comment_author": "bartekkuncer", - "comment_body": "```suggestion\r\n- Conv2D + Add => even simpler idea than the previous ones - instead of overwriting the output memory, results are added to it. In the simplest terms: `out_mem = conv_result` is replaced by `out_mem += conv_result`.\r\n```", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/mxnet-explicit-null-checks.json b/_reviewers/mxnet-explicit-null-checks.json new file mode 100644 index 0000000..f689236 --- /dev/null +++ b/_reviewers/mxnet-explicit-null-checks.json @@ -0,0 +1,138 @@ +[ + { + "discussion_id": "308052505", + "pr_number": 15678, + "pr_file": "cpp-package/scripts/OpWrapperGenerator.py", + "created_at": "2019-07-29T04:33:37+00:00", + "commented_code": "'caffe-layer-parameter':'::caffe::LayerParameter',\\\n 'NDArray-or-Symbol[]':'const std::vector&',\\\n 'float':'mx_float',\\\n 'float or None':'mx_float',\\", + "repo_full_name": "apache/mxnet", + "discussion_comments": [ + { + "comment_id": "308052505", + "repo_full_name": "apache/mxnet", + "pr_number": 15678, + "pr_file": "cpp-package/scripts/OpWrapperGenerator.py", + "discussion_id": "308052505", + "commented_code": "@@ -88,8 +88,10 @@ class Arg:\n 'caffe-layer-parameter':'::caffe::LayerParameter',\\\n 'NDArray-or-Symbol[]':'const std::vector&',\\\n 'float':'mx_float',\\\n+ 'float or None':'mx_float',\\", + "comment_created_at": "2019-07-29T04:33:37+00:00", + "comment_author": "ZhennanQin", + "comment_body": "Why not `'float or None':'dmlc::optional',\\`?", + "pr_file_module": null + }, + { + "comment_id": "308065098", + "repo_full_name": "apache/mxnet", + "pr_number": 15678, + "pr_file": "cpp-package/scripts/OpWrapperGenerator.py", + "discussion_id": "308052505", + "commented_code": "@@ -88,8 +88,10 @@ class Arg:\n 'caffe-layer-parameter':'::caffe::LayerParameter',\\\n 'NDArray-or-Symbol[]':'const std::vector&',\\\n 'float':'mx_float',\\\n+ 'float or None':'mx_float',\\", + "comment_created_at": "2019-07-29T05:58:45+00:00", + "comment_author": "wkcn", + "comment_body": "Thank you for pointing it! I will fix it.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "853066594", + "pr_number": 20983, + "pr_file": "python/mxnet/amp/amp.py", + "created_at": "2022-04-19T13:20:43+00:00", + "commented_code": "\"conditional_fp32_ops should be a list of (str, str, list of str)\"\n cond_ops[op_name].setdefault(attr_name, []).extend(attr_vals)\n\n nodes_attr = sym.attr_dict()\n nodes_attrs = sym.attr_dict()\n nodes_op = {n['name']: n['op'] for n in json.loads(sym.tojson())['nodes']}\n if not set(excluded_sym_names).issubset(set(nodes_op.keys())):\n logging.warning(\"excluded_sym_names are not present in the network. Missing layers: {}\".format(\n set(excluded_sym_names) - set(nodes_op.keys())))\n\n for node_name, node_op in nodes_op.items():\n if node_op not in cond_ops:\n continue\n node_attrs = nodes_attr[node_name]\n node_attrs = nodes_attrs[node_name]\n for attr_name, attr_vals in cond_ops[node_op].items():\n assert attr_name in node_attrs\n if node_attrs[attr_name] in attr_vals:\n excluded_sym_names += node_name\n excluded_sym_names.append(node_name)\n break\n excluded_sym_names = list(set(excluded_sym_names))\n\n excluded_sym_names = set(excluded_sym_names)\n for node in sym.get_internals():\n if node.name in excluded_sym_names:\n excluded_sym_names.remove(node.name)\n opt_constraints = node.attr('__opt_constraint__')\n opt_constraints = 0 if opt_constraints is None else opt_constraints", + "repo_full_name": "apache/mxnet", + "discussion_comments": [ + { + "comment_id": "853066594", + "repo_full_name": "apache/mxnet", + "pr_number": 20983, + "pr_file": "python/mxnet/amp/amp.py", + "discussion_id": "853066594", + "commented_code": "@@ -497,22 +495,30 @@ def convert_symbol(sym, input_dtypes, param_dtypes, target_dtype=\"float16\", targ\n \"conditional_fp32_ops should be a list of (str, str, list of str)\"\n cond_ops[op_name].setdefault(attr_name, []).extend(attr_vals)\n \n- nodes_attr = sym.attr_dict()\n+ nodes_attrs = sym.attr_dict()\n nodes_op = {n['name']: n['op'] for n in json.loads(sym.tojson())['nodes']}\n- if not set(excluded_sym_names).issubset(set(nodes_op.keys())):\n- logging.warning(\"excluded_sym_names are not present in the network. Missing layers: {}\".format(\n- set(excluded_sym_names) - set(nodes_op.keys())))\n-\n for node_name, node_op in nodes_op.items():\n if node_op not in cond_ops:\n continue\n- node_attrs = nodes_attr[node_name]\n+ node_attrs = nodes_attrs[node_name]\n for attr_name, attr_vals in cond_ops[node_op].items():\n assert attr_name in node_attrs\n if node_attrs[attr_name] in attr_vals:\n- excluded_sym_names += node_name\n+ excluded_sym_names.append(node_name)\n break\n- excluded_sym_names = list(set(excluded_sym_names))\n+\n+ excluded_sym_names = set(excluded_sym_names)\n+ for node in sym.get_internals():\n+ if node.name in excluded_sym_names:\n+ excluded_sym_names.remove(node.name)\n+ opt_constraints = node.attr('__opt_constraint__')\n+ opt_constraints = 0 if opt_constraints is None else opt_constraints", + "comment_created_at": "2022-04-19T13:20:43+00:00", + "comment_author": "bgawrych", + "comment_body": "Does node.attr(...) returns always value in proper type? Is it possible it will return number as string in some case? If yes then |= operator may not work", + "pr_file_module": null + }, + { + "comment_id": "853259275", + "repo_full_name": "apache/mxnet", + "pr_number": 20983, + "pr_file": "python/mxnet/amp/amp.py", + "discussion_id": "853066594", + "commented_code": "@@ -497,22 +495,30 @@ def convert_symbol(sym, input_dtypes, param_dtypes, target_dtype=\"float16\", targ\n \"conditional_fp32_ops should be a list of (str, str, list of str)\"\n cond_ops[op_name].setdefault(attr_name, []).extend(attr_vals)\n \n- nodes_attr = sym.attr_dict()\n+ nodes_attrs = sym.attr_dict()\n nodes_op = {n['name']: n['op'] for n in json.loads(sym.tojson())['nodes']}\n- if not set(excluded_sym_names).issubset(set(nodes_op.keys())):\n- logging.warning(\"excluded_sym_names are not present in the network. Missing layers: {}\".format(\n- set(excluded_sym_names) - set(nodes_op.keys())))\n-\n for node_name, node_op in nodes_op.items():\n if node_op not in cond_ops:\n continue\n- node_attrs = nodes_attr[node_name]\n+ node_attrs = nodes_attrs[node_name]\n for attr_name, attr_vals in cond_ops[node_op].items():\n assert attr_name in node_attrs\n if node_attrs[attr_name] in attr_vals:\n- excluded_sym_names += node_name\n+ excluded_sym_names.append(node_name)\n break\n- excluded_sym_names = list(set(excluded_sym_names))\n+\n+ excluded_sym_names = set(excluded_sym_names)\n+ for node in sym.get_internals():\n+ if node.name in excluded_sym_names:\n+ excluded_sym_names.remove(node.name)\n+ opt_constraints = node.attr('__opt_constraint__')\n+ opt_constraints = 0 if opt_constraints is None else opt_constraints", + "comment_created_at": "2022-04-19T16:18:15+00:00", + "comment_author": "PawelGlomski-Intel", + "comment_body": "Good catch! I believe all attributes are strings actually. I added a test covering this.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "610245192", + "pr_number": 20105, + "pr_file": "python/mxnet/ndarray/numpy_extension/_op.py", + "created_at": "2021-04-09T00:53:38+00:00", + "commented_code": "out : NDArray or list of NDArrays\n The output of this function.\n \"\"\"\n return _api_internal.batch_norm(x, gamma, beta, running_mean, running_var, eps, momentum,\n fix_gamma, use_global_stats, output_mean_var, axis,\n cudnn_off, min_calib_range, max_calib_range)\n out = _api_internal.batch_norm(x, gamma, beta, running_mean, running_var, eps, momentum,\n fix_gamma, use_global_stats, output_mean_var, axis,\n cudnn_off, min_calib_range, max_calib_range)\n if isinstance(out, NDArrayBase):\n return out\n return list(out)", + "repo_full_name": "apache/mxnet", + "discussion_comments": [ + { + "comment_id": "610245192", + "repo_full_name": "apache/mxnet", + "pr_number": 20105, + "pr_file": "python/mxnet/ndarray/numpy_extension/_op.py", + "discussion_id": "610245192", + "commented_code": "@@ -354,9 +355,12 @@ def batch_norm(x, gamma, beta, running_mean, running_var, eps=1e-3, momentum=0.9\n out : NDArray or list of NDArrays\n The output of this function.\n \"\"\"\n- return _api_internal.batch_norm(x, gamma, beta, running_mean, running_var, eps, momentum,\n- fix_gamma, use_global_stats, output_mean_var, axis,\n- cudnn_off, min_calib_range, max_calib_range)\n+ out = _api_internal.batch_norm(x, gamma, beta, running_mean, running_var, eps, momentum,\n+ fix_gamma, use_global_stats, output_mean_var, axis,\n+ cudnn_off, min_calib_range, max_calib_range)\n+ if isinstance(out, NDArrayBase):\n+ return out\n+ return list(out)", + "comment_created_at": "2021-04-09T00:53:38+00:00", + "comment_author": "szha", + "comment_body": "what does this branch handle?", + "pr_file_module": null + }, + { + "comment_id": "610245572", + "repo_full_name": "apache/mxnet", + "pr_number": 20105, + "pr_file": "python/mxnet/ndarray/numpy_extension/_op.py", + "discussion_id": "610245192", + "commented_code": "@@ -354,9 +355,12 @@ def batch_norm(x, gamma, beta, running_mean, running_var, eps=1e-3, momentum=0.9\n out : NDArray or list of NDArrays\n The output of this function.\n \"\"\"\n- return _api_internal.batch_norm(x, gamma, beta, running_mean, running_var, eps, momentum,\n- fix_gamma, use_global_stats, output_mean_var, axis,\n- cudnn_off, min_calib_range, max_calib_range)\n+ out = _api_internal.batch_norm(x, gamma, beta, running_mean, running_var, eps, momentum,\n+ fix_gamma, use_global_stats, output_mean_var, axis,\n+ cudnn_off, min_calib_range, max_calib_range)\n+ if isinstance(out, NDArrayBase):\n+ return out\n+ return list(out)", + "comment_created_at": "2021-04-09T00:54:22+00:00", + "comment_author": "szha", + "comment_body": "this seems to be a common pattern", + "pr_file_module": null + }, + { + "comment_id": "610274484", + "repo_full_name": "apache/mxnet", + "pr_number": 20105, + "pr_file": "python/mxnet/ndarray/numpy_extension/_op.py", + "discussion_id": "610245192", + "commented_code": "@@ -354,9 +355,12 @@ def batch_norm(x, gamma, beta, running_mean, running_var, eps=1e-3, momentum=0.9\n out : NDArray or list of NDArrays\n The output of this function.\n \"\"\"\n- return _api_internal.batch_norm(x, gamma, beta, running_mean, running_var, eps, momentum,\n- fix_gamma, use_global_stats, output_mean_var, axis,\n- cudnn_off, min_calib_range, max_calib_range)\n+ out = _api_internal.batch_norm(x, gamma, beta, running_mean, running_var, eps, momentum,\n+ fix_gamma, use_global_stats, output_mean_var, axis,\n+ cudnn_off, min_calib_range, max_calib_range)\n+ if isinstance(out, NDArrayBase):\n+ return out\n+ return list(out)", + "comment_created_at": "2021-04-09T01:50:34+00:00", + "comment_author": "barry-jin", + "comment_body": "Because the out can be NDArraBase type or ADT type. This branch will return out directly if its NDArray, otherwise convert ADT to python list and return list of NDArrays. ", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "577027310", + "pr_number": 19685, + "pr_file": "python/mxnet/_ctypes/cached_op.py", + "created_at": "2021-02-16T18:01:48+00:00", + "commented_code": "\"\"\"ctypes implementation of imperative invoke wrapper\"\"\"\n # New FFI only supports numpy ndarray\n default_ctx = kwargs.pop('default_ctx', None)\n out = kwargs.pop('out', None)\n if kwargs:\n raise TypeError(\n \"CachedOp.__call__ got unexpected keyword argument(s): \" + \\\n ', '.join(kwargs.keys()))\n if self.is_np_sym:\n if len(args) == 1 and args[0] is None:\n args = []\n type_id = default_ctx.device_typeid if default_ctx else None\n device_id = default_ctx.device_id if default_ctx else None\n out_arg = out if out and not isinstance(out, NDArrayBase) else (out, )", + "repo_full_name": "apache/mxnet", + "discussion_comments": [ + { + "comment_id": "577027310", + "repo_full_name": "apache/mxnet", + "pr_number": 19685, + "pr_file": "python/mxnet/_ctypes/cached_op.py", + "discussion_id": "577027310", + "commented_code": "@@ -74,27 +74,32 @@ def __call__(self, *args, **kwargs):\n \"\"\"ctypes implementation of imperative invoke wrapper\"\"\"\n # New FFI only supports numpy ndarray\n default_ctx = kwargs.pop('default_ctx', None)\n+ out = kwargs.pop('out', None)\n+ if kwargs:\n+ raise TypeError(\n+ \"CachedOp.__call__ got unexpected keyword argument(s): \" + \\\n+ ', '.join(kwargs.keys()))\n if self.is_np_sym:\n if len(args) == 1 and args[0] is None:\n args = []\n type_id = default_ctx.device_typeid if default_ctx else None\n device_id = default_ctx.device_id if default_ctx else None\n+ out_arg = out if out and not isinstance(out, NDArrayBase) else (out, )", + "comment_created_at": "2021-02-16T18:01:48+00:00", + "comment_author": "szha", + "comment_body": "if the intention is to check whether `out` is not None, it's probably better to write it that way. Otherwise, `if NDArray` will call `__bool__` of NDArray", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/mxnet-explicit-null-checks.md b/_reviewers/mxnet-explicit-null-checks.md index 00b378d..80396df 100644 --- a/_reviewers/mxnet-explicit-null-checks.md +++ b/_reviewers/mxnet-explicit-null-checks.md @@ -43,143 +43,3 @@ if isinstance(out, NDArrayBase): return out return list(out) # Convert if needed ``` - - -[ - { - "discussion_id": "308052505", - "pr_number": 15678, - "pr_file": "cpp-package/scripts/OpWrapperGenerator.py", - "created_at": "2019-07-29T04:33:37+00:00", - "commented_code": "'caffe-layer-parameter':'::caffe::LayerParameter',\\\n 'NDArray-or-Symbol[]':'const std::vector&',\\\n 'float':'mx_float',\\\n 'float or None':'mx_float',\\", - "repo_full_name": "apache/mxnet", - "discussion_comments": [ - { - "comment_id": "308052505", - "repo_full_name": "apache/mxnet", - "pr_number": 15678, - "pr_file": "cpp-package/scripts/OpWrapperGenerator.py", - "discussion_id": "308052505", - "commented_code": "@@ -88,8 +88,10 @@ class Arg:\n 'caffe-layer-parameter':'::caffe::LayerParameter',\\\n 'NDArray-or-Symbol[]':'const std::vector&',\\\n 'float':'mx_float',\\\n+ 'float or None':'mx_float',\\", - "comment_created_at": "2019-07-29T04:33:37+00:00", - "comment_author": "ZhennanQin", - "comment_body": "Why not `'float or None':'dmlc::optional',\\`?", - "pr_file_module": null - }, - { - "comment_id": "308065098", - "repo_full_name": "apache/mxnet", - "pr_number": 15678, - "pr_file": "cpp-package/scripts/OpWrapperGenerator.py", - "discussion_id": "308052505", - "commented_code": "@@ -88,8 +88,10 @@ class Arg:\n 'caffe-layer-parameter':'::caffe::LayerParameter',\\\n 'NDArray-or-Symbol[]':'const std::vector&',\\\n 'float':'mx_float',\\\n+ 'float or None':'mx_float',\\", - "comment_created_at": "2019-07-29T05:58:45+00:00", - "comment_author": "wkcn", - "comment_body": "Thank you for pointing it! I will fix it.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "853066594", - "pr_number": 20983, - "pr_file": "python/mxnet/amp/amp.py", - "created_at": "2022-04-19T13:20:43+00:00", - "commented_code": "\"conditional_fp32_ops should be a list of (str, str, list of str)\"\n cond_ops[op_name].setdefault(attr_name, []).extend(attr_vals)\n\n nodes_attr = sym.attr_dict()\n nodes_attrs = sym.attr_dict()\n nodes_op = {n['name']: n['op'] for n in json.loads(sym.tojson())['nodes']}\n if not set(excluded_sym_names).issubset(set(nodes_op.keys())):\n logging.warning(\"excluded_sym_names are not present in the network. Missing layers: {}\".format(\n set(excluded_sym_names) - set(nodes_op.keys())))\n\n for node_name, node_op in nodes_op.items():\n if node_op not in cond_ops:\n continue\n node_attrs = nodes_attr[node_name]\n node_attrs = nodes_attrs[node_name]\n for attr_name, attr_vals in cond_ops[node_op].items():\n assert attr_name in node_attrs\n if node_attrs[attr_name] in attr_vals:\n excluded_sym_names += node_name\n excluded_sym_names.append(node_name)\n break\n excluded_sym_names = list(set(excluded_sym_names))\n\n excluded_sym_names = set(excluded_sym_names)\n for node in sym.get_internals():\n if node.name in excluded_sym_names:\n excluded_sym_names.remove(node.name)\n opt_constraints = node.attr('__opt_constraint__')\n opt_constraints = 0 if opt_constraints is None else opt_constraints", - "repo_full_name": "apache/mxnet", - "discussion_comments": [ - { - "comment_id": "853066594", - "repo_full_name": "apache/mxnet", - "pr_number": 20983, - "pr_file": "python/mxnet/amp/amp.py", - "discussion_id": "853066594", - "commented_code": "@@ -497,22 +495,30 @@ def convert_symbol(sym, input_dtypes, param_dtypes, target_dtype=\"float16\", targ\n \"conditional_fp32_ops should be a list of (str, str, list of str)\"\n cond_ops[op_name].setdefault(attr_name, []).extend(attr_vals)\n \n- nodes_attr = sym.attr_dict()\n+ nodes_attrs = sym.attr_dict()\n nodes_op = {n['name']: n['op'] for n in json.loads(sym.tojson())['nodes']}\n- if not set(excluded_sym_names).issubset(set(nodes_op.keys())):\n- logging.warning(\"excluded_sym_names are not present in the network. Missing layers: {}\".format(\n- set(excluded_sym_names) - set(nodes_op.keys())))\n-\n for node_name, node_op in nodes_op.items():\n if node_op not in cond_ops:\n continue\n- node_attrs = nodes_attr[node_name]\n+ node_attrs = nodes_attrs[node_name]\n for attr_name, attr_vals in cond_ops[node_op].items():\n assert attr_name in node_attrs\n if node_attrs[attr_name] in attr_vals:\n- excluded_sym_names += node_name\n+ excluded_sym_names.append(node_name)\n break\n- excluded_sym_names = list(set(excluded_sym_names))\n+\n+ excluded_sym_names = set(excluded_sym_names)\n+ for node in sym.get_internals():\n+ if node.name in excluded_sym_names:\n+ excluded_sym_names.remove(node.name)\n+ opt_constraints = node.attr('__opt_constraint__')\n+ opt_constraints = 0 if opt_constraints is None else opt_constraints", - "comment_created_at": "2022-04-19T13:20:43+00:00", - "comment_author": "bgawrych", - "comment_body": "Does node.attr(...) returns always value in proper type? Is it possible it will return number as string in some case? If yes then |= operator may not work", - "pr_file_module": null - }, - { - "comment_id": "853259275", - "repo_full_name": "apache/mxnet", - "pr_number": 20983, - "pr_file": "python/mxnet/amp/amp.py", - "discussion_id": "853066594", - "commented_code": "@@ -497,22 +495,30 @@ def convert_symbol(sym, input_dtypes, param_dtypes, target_dtype=\"float16\", targ\n \"conditional_fp32_ops should be a list of (str, str, list of str)\"\n cond_ops[op_name].setdefault(attr_name, []).extend(attr_vals)\n \n- nodes_attr = sym.attr_dict()\n+ nodes_attrs = sym.attr_dict()\n nodes_op = {n['name']: n['op'] for n in json.loads(sym.tojson())['nodes']}\n- if not set(excluded_sym_names).issubset(set(nodes_op.keys())):\n- logging.warning(\"excluded_sym_names are not present in the network. Missing layers: {}\".format(\n- set(excluded_sym_names) - set(nodes_op.keys())))\n-\n for node_name, node_op in nodes_op.items():\n if node_op not in cond_ops:\n continue\n- node_attrs = nodes_attr[node_name]\n+ node_attrs = nodes_attrs[node_name]\n for attr_name, attr_vals in cond_ops[node_op].items():\n assert attr_name in node_attrs\n if node_attrs[attr_name] in attr_vals:\n- excluded_sym_names += node_name\n+ excluded_sym_names.append(node_name)\n break\n- excluded_sym_names = list(set(excluded_sym_names))\n+\n+ excluded_sym_names = set(excluded_sym_names)\n+ for node in sym.get_internals():\n+ if node.name in excluded_sym_names:\n+ excluded_sym_names.remove(node.name)\n+ opt_constraints = node.attr('__opt_constraint__')\n+ opt_constraints = 0 if opt_constraints is None else opt_constraints", - "comment_created_at": "2022-04-19T16:18:15+00:00", - "comment_author": "PawelGlomski-Intel", - "comment_body": "Good catch! I believe all attributes are strings actually. I added a test covering this.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "610245192", - "pr_number": 20105, - "pr_file": "python/mxnet/ndarray/numpy_extension/_op.py", - "created_at": "2021-04-09T00:53:38+00:00", - "commented_code": "out : NDArray or list of NDArrays\n The output of this function.\n \"\"\"\n return _api_internal.batch_norm(x, gamma, beta, running_mean, running_var, eps, momentum,\n fix_gamma, use_global_stats, output_mean_var, axis,\n cudnn_off, min_calib_range, max_calib_range)\n out = _api_internal.batch_norm(x, gamma, beta, running_mean, running_var, eps, momentum,\n fix_gamma, use_global_stats, output_mean_var, axis,\n cudnn_off, min_calib_range, max_calib_range)\n if isinstance(out, NDArrayBase):\n return out\n return list(out)", - "repo_full_name": "apache/mxnet", - "discussion_comments": [ - { - "comment_id": "610245192", - "repo_full_name": "apache/mxnet", - "pr_number": 20105, - "pr_file": "python/mxnet/ndarray/numpy_extension/_op.py", - "discussion_id": "610245192", - "commented_code": "@@ -354,9 +355,12 @@ def batch_norm(x, gamma, beta, running_mean, running_var, eps=1e-3, momentum=0.9\n out : NDArray or list of NDArrays\n The output of this function.\n \"\"\"\n- return _api_internal.batch_norm(x, gamma, beta, running_mean, running_var, eps, momentum,\n- fix_gamma, use_global_stats, output_mean_var, axis,\n- cudnn_off, min_calib_range, max_calib_range)\n+ out = _api_internal.batch_norm(x, gamma, beta, running_mean, running_var, eps, momentum,\n+ fix_gamma, use_global_stats, output_mean_var, axis,\n+ cudnn_off, min_calib_range, max_calib_range)\n+ if isinstance(out, NDArrayBase):\n+ return out\n+ return list(out)", - "comment_created_at": "2021-04-09T00:53:38+00:00", - "comment_author": "szha", - "comment_body": "what does this branch handle?", - "pr_file_module": null - }, - { - "comment_id": "610245572", - "repo_full_name": "apache/mxnet", - "pr_number": 20105, - "pr_file": "python/mxnet/ndarray/numpy_extension/_op.py", - "discussion_id": "610245192", - "commented_code": "@@ -354,9 +355,12 @@ def batch_norm(x, gamma, beta, running_mean, running_var, eps=1e-3, momentum=0.9\n out : NDArray or list of NDArrays\n The output of this function.\n \"\"\"\n- return _api_internal.batch_norm(x, gamma, beta, running_mean, running_var, eps, momentum,\n- fix_gamma, use_global_stats, output_mean_var, axis,\n- cudnn_off, min_calib_range, max_calib_range)\n+ out = _api_internal.batch_norm(x, gamma, beta, running_mean, running_var, eps, momentum,\n+ fix_gamma, use_global_stats, output_mean_var, axis,\n+ cudnn_off, min_calib_range, max_calib_range)\n+ if isinstance(out, NDArrayBase):\n+ return out\n+ return list(out)", - "comment_created_at": "2021-04-09T00:54:22+00:00", - "comment_author": "szha", - "comment_body": "this seems to be a common pattern", - "pr_file_module": null - }, - { - "comment_id": "610274484", - "repo_full_name": "apache/mxnet", - "pr_number": 20105, - "pr_file": "python/mxnet/ndarray/numpy_extension/_op.py", - "discussion_id": "610245192", - "commented_code": "@@ -354,9 +355,12 @@ def batch_norm(x, gamma, beta, running_mean, running_var, eps=1e-3, momentum=0.9\n out : NDArray or list of NDArrays\n The output of this function.\n \"\"\"\n- return _api_internal.batch_norm(x, gamma, beta, running_mean, running_var, eps, momentum,\n- fix_gamma, use_global_stats, output_mean_var, axis,\n- cudnn_off, min_calib_range, max_calib_range)\n+ out = _api_internal.batch_norm(x, gamma, beta, running_mean, running_var, eps, momentum,\n+ fix_gamma, use_global_stats, output_mean_var, axis,\n+ cudnn_off, min_calib_range, max_calib_range)\n+ if isinstance(out, NDArrayBase):\n+ return out\n+ return list(out)", - "comment_created_at": "2021-04-09T01:50:34+00:00", - "comment_author": "barry-jin", - "comment_body": "Because the out can be NDArraBase type or ADT type. This branch will return out directly if its NDArray, otherwise convert ADT to python list and return list of NDArrays. ", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "577027310", - "pr_number": 19685, - "pr_file": "python/mxnet/_ctypes/cached_op.py", - "created_at": "2021-02-16T18:01:48+00:00", - "commented_code": "\"\"\"ctypes implementation of imperative invoke wrapper\"\"\"\n # New FFI only supports numpy ndarray\n default_ctx = kwargs.pop('default_ctx', None)\n out = kwargs.pop('out', None)\n if kwargs:\n raise TypeError(\n \"CachedOp.__call__ got unexpected keyword argument(s): \" + \\\n ', '.join(kwargs.keys()))\n if self.is_np_sym:\n if len(args) == 1 and args[0] is None:\n args = []\n type_id = default_ctx.device_typeid if default_ctx else None\n device_id = default_ctx.device_id if default_ctx else None\n out_arg = out if out and not isinstance(out, NDArrayBase) else (out, )", - "repo_full_name": "apache/mxnet", - "discussion_comments": [ - { - "comment_id": "577027310", - "repo_full_name": "apache/mxnet", - "pr_number": 19685, - "pr_file": "python/mxnet/_ctypes/cached_op.py", - "discussion_id": "577027310", - "commented_code": "@@ -74,27 +74,32 @@ def __call__(self, *args, **kwargs):\n \"\"\"ctypes implementation of imperative invoke wrapper\"\"\"\n # New FFI only supports numpy ndarray\n default_ctx = kwargs.pop('default_ctx', None)\n+ out = kwargs.pop('out', None)\n+ if kwargs:\n+ raise TypeError(\n+ \"CachedOp.__call__ got unexpected keyword argument(s): \" + \\\n+ ', '.join(kwargs.keys()))\n if self.is_np_sym:\n if len(args) == 1 and args[0] is None:\n args = []\n type_id = default_ctx.device_typeid if default_ctx else None\n device_id = default_ctx.device_id if default_ctx else None\n+ out_arg = out if out and not isinstance(out, NDArrayBase) else (out, )", - "comment_created_at": "2021-02-16T18:01:48+00:00", - "comment_author": "szha", - "comment_body": "if the intention is to check whether `out` is not None, it's probably better to write it that way. Otherwise, `if NDArray` will call `__bool__` of NDArray", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/mxnet-graph-traversal-optimization.json b/_reviewers/mxnet-graph-traversal-optimization.json new file mode 100644 index 0000000..55ddb17 --- /dev/null +++ b/_reviewers/mxnet-graph-traversal-optimization.json @@ -0,0 +1,150 @@ +[ + { + "discussion_id": "664297807", + "pr_number": 20400, + "pr_file": "src/operator/subgraph/mkldnn/mkldnn_fc_sum_fuse.h", + "created_at": "2021-07-06T07:19:38+00:00", + "commented_code": "/*\n * Licensed to the Apache Software Foundation (ASF) under one\n * or more contributor license agreements. See the NOTICE file\n * distributed with this work for additional information\n * regarding copyright ownership. The ASF licenses this file\n * to you under the Apache License, Version 2.0 (the\n * \"License\"); you may not use this file except in compliance\n * with the License. You may obtain a copy of the License at\n *\n * http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing,\n * software distributed under the License is distributed on an\n * \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n * KIND, either express or implied. See the License for the\n * specific language governing permissions and limitations\n * under the License.\n */\n\n/*\n \\brief It fuses FC + SUM for floating point output in second post quantization pass\n*/\n\n#ifndef MXNET_OPERATOR_SUBGRAPH_MKLDNN_MKLDNN_FC_SUM_FUSE_H_\n#define MXNET_OPERATOR_SUBGRAPH_MKLDNN_MKLDNN_FC_SUM_FUSE_H_\n#if MXNET_USE_MKLDNN == 1\n\n#include \n#include \n#include \n#include \n#include \n#include \"../common.h\"\n#include \"../../tensor/matrix_op-inl.h\"\n#include \"mkldnn_subgraph_base-inl.h\"\n#include \"mkldnn_fc-inl.h\"\n\nnamespace mxnet {\nnamespace op {\n\nclass SgMKLDNNFCSumFuseSelector : public SubgraphSelector {\n public:\n /*! \\brief pattern match status */\n enum SelectStatus {\n kFail = 0,\n kStart,\n kSuccess,\n };\n\n private:\n bool quantized_;\n SelectStatus status_;\n std::vector matched_list_;\n\n public:\n explicit SgMKLDNNFCSumFuseSelector(bool quantized) :\n quantized_(quantized) {}\n\n bool Select(const nnvm::Node &n, const std::shared_ptr& node_attr) override {\n if (n.op() == Op::Get(\"_sg_mkldnn_fully_connected\") && SupportMKLDNNAttr(node_attr)) {\n auto const &fc_param = nnvm::get(n.attrs.parsed);\n // TODO(anko) remove fc_param.mkldnn_param.quantized from if below\n // to fuse even for not quantized?\n if (fc_param.mkldnn_param.enable_float_output && fc_param.mkldnn_param.quantized) {\n status_ = kStart;\n matched_list_.clear();\n matched_list_.push_back(&n);\n return true;\n }\n }\n return false;\n }\n\n bool SelectInput(const nnvm::Node &n, const nnvm::Node &new_node) override {\n return false;\n }\n\n bool SelectOutput(const nnvm::Node &n, const nnvm::Node &new_node) override {\n if (status_ == kFail || status_ == kSuccess || new_node.is_variable())\n return false;\n // If n isn't the last matched node, then we encoutered a internal\n // branch, we should pop out the node behind n and stop fusion.\n if (matched_list_.back() != &n) {\n if (std::find(matched_list_.begin(), matched_list_.end(), &n) !=\n matched_list_.end()) {\n while (matched_list_.back() != &n) {\n matched_list_.pop_back();\n }\n }\n status_ = kSuccess;\n return false;\n }\n\n switch (status_) {\n case kStart:\n if (new_node.op()->name == \"elemwise_add\") {\n matched_list_.push_back(&new_node);\n status_ = kSuccess;\n return true;\n }\n default:\n status_ = kSuccess;\n return false;\n }\n }\n\n std::vector Filter(\n const std::vector &candidates) override {\n if (status_ == kFail) {\n return std::vector(0);\n } else {\n std::vector ret;\n for (auto i : matched_list_) {\n auto non_const_i = const_cast(i);\n if (std::find(candidates.begin(), candidates.end(), non_const_i) !=\n candidates.end()) {\n ret.push_back(non_const_i);\n }\n }\n return candidates;\n }\n }\n\n void Reset() override {\n CHECK_GE(matched_list_.size(), 1);\n auto new_selector = SgMKLDNNFCSumFuseSelector(quantized_);\n new_selector.Select(*matched_list_[0], nullptr);\n *this = new_selector;\n }\n};\n\nclass SgMKLDNNFCSumFuseProperty : public SubgraphProperty {\n public:\n SgMKLDNNFCSumFuseProperty() {}\n\n static SubgraphPropertyPtr Create() {\n static const std::string &name = \"MKLDNN FullyConnected post quantization second pass\";\n auto property = std::make_shared();\n property->SetAttr(\"property_name\", name);\n property->SetAttr(\"inference_only\", true);\n if (dmlc::GetEnv(\"MXNET_DISABLE_MKLDNN_FC_SUM\", 0)) {\n property->SetAttr(\"disable\", true);\n }\n return property;\n }\n\n nnvm::ObjectPtr CreateSubgraphNode(const nnvm::Symbol &sym,\n const int subgraph_id = 0) const override {\n nnvm::ObjectPtr fc_node = nullptr;\n nnvm::ObjectPtr ew_add_node = nullptr;\n\n DFSVisit(sym.outputs, [&](const nnvm::ObjectPtr &node) {\n if (node->is_variable()) return;\n auto &sub_name = node->op()->name;\n if (sub_name == \"_sg_mkldnn_fully_connected\") {\n fc_node = node;\n } else if (sub_name == \"elemwise_add\") {\n ew_add_node = node;\n }\n });\n\n CHECK_NOTNULL(fc_node);\n if (ew_add_node != nullptr) {\n CHECK_NOTNULL(fc_node->attrs.subgraphs[0]);\n auto fc_orginal = fc_node->attrs.subgraphs[0]->outputs[0].node;\n if (fc_orginal->op() == Op::Get(\"FullyConnected\")) {\n nnvm::Symbol new_sym;\n nnvm::NodeEntry &ew_input_with_fc = (ew_add_node->inputs[1].node == fc_node) ?\n ew_add_node->inputs[1] :\n ew_add_node->inputs[0];\n ew_input_with_fc.node = fc_orginal;\n new_sym.outputs.emplace_back(ew_add_node);\n fc_node->attrs.subgraphs.clear();\n fc_node->attrs.subgraphs.emplace_back(std::make_shared(new_sym));\n fc_node->attrs.dict[\"with_sum\"] = \"True\";\n fc_node->op()->attr_parser(&(fc_node->attrs));\n }\n }\n return fc_node;\n }\n\n SubgraphSelectorPtr CreateSubgraphSelector() const override {\n bool quantized = HasAttr(\"quantize\") ? GetAttr(\"quantize\") : false;\n auto selector =\n std::make_shared(quantized);\n return selector;\n }\n\n void ConnectSubgraphOutputs(\n const nnvm::ObjectPtr n,\n std::vector *output_entries) const override {\n // Connect all extern output entries to output[0]\n for (size_t i = 0; i < output_entries->size(); ++i) {\n auto entry_ptr = output_entries->at(i);\n *entry_ptr = nnvm::NodeEntry{n, entry_ptr->index, 0};\n }\n }\n\n void ConnectSubgraphInputs(\n const nnvm::ObjectPtr n, std::vector *input_entries,\n std::vector *orig_input_entries) const override {\n auto sym = n->attrs.subgraphs[0];\n auto const &fc_param = nnvm::get(n->attrs.parsed);\n std::unordered_set node_sets;\n DFSVisit(sym->outputs, [&](const nnvm::ObjectPtr &node) {", + "repo_full_name": "apache/mxnet", + "discussion_comments": [ + { + "comment_id": "664297807", + "repo_full_name": "apache/mxnet", + "pr_number": 20400, + "pr_file": "src/operator/subgraph/mkldnn/mkldnn_fc_sum_fuse.h", + "discussion_id": "664297807", + "commented_code": "@@ -0,0 +1,240 @@\n+/*\n+ * Licensed to the Apache Software Foundation (ASF) under one\n+ * or more contributor license agreements. See the NOTICE file\n+ * distributed with this work for additional information\n+ * regarding copyright ownership. The ASF licenses this file\n+ * to you under the Apache License, Version 2.0 (the\n+ * \"License\"); you may not use this file except in compliance\n+ * with the License. You may obtain a copy of the License at\n+ *\n+ * http://www.apache.org/licenses/LICENSE-2.0\n+ *\n+ * Unless required by applicable law or agreed to in writing,\n+ * software distributed under the License is distributed on an\n+ * \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n+ * KIND, either express or implied. See the License for the\n+ * specific language governing permissions and limitations\n+ * under the License.\n+ */\n+\n+/*\n+ \\brief It fuses FC + SUM for floating point output in second post quantization pass\n+*/\n+\n+#ifndef MXNET_OPERATOR_SUBGRAPH_MKLDNN_MKLDNN_FC_SUM_FUSE_H_\n+#define MXNET_OPERATOR_SUBGRAPH_MKLDNN_MKLDNN_FC_SUM_FUSE_H_\n+#if MXNET_USE_MKLDNN == 1\n+\n+#include \n+#include \n+#include \n+#include \n+#include \n+#include \"../common.h\"\n+#include \"../../tensor/matrix_op-inl.h\"\n+#include \"mkldnn_subgraph_base-inl.h\"\n+#include \"mkldnn_fc-inl.h\"\n+\n+namespace mxnet {\n+namespace op {\n+\n+class SgMKLDNNFCSumFuseSelector : public SubgraphSelector {\n+ public:\n+ /*! \\brief pattern match status */\n+ enum SelectStatus {\n+ kFail = 0,\n+ kStart,\n+ kSuccess,\n+ };\n+\n+ private:\n+ bool quantized_;\n+ SelectStatus status_;\n+ std::vector matched_list_;\n+\n+ public:\n+ explicit SgMKLDNNFCSumFuseSelector(bool quantized) :\n+ quantized_(quantized) {}\n+\n+ bool Select(const nnvm::Node &n, const std::shared_ptr& node_attr) override {\n+ if (n.op() == Op::Get(\"_sg_mkldnn_fully_connected\") && SupportMKLDNNAttr(node_attr)) {\n+ auto const &fc_param = nnvm::get(n.attrs.parsed);\n+ // TODO(anko) remove fc_param.mkldnn_param.quantized from if below\n+ // to fuse even for not quantized?\n+ if (fc_param.mkldnn_param.enable_float_output && fc_param.mkldnn_param.quantized) {\n+ status_ = kStart;\n+ matched_list_.clear();\n+ matched_list_.push_back(&n);\n+ return true;\n+ }\n+ }\n+ return false;\n+ }\n+\n+ bool SelectInput(const nnvm::Node &n, const nnvm::Node &new_node) override {\n+ return false;\n+ }\n+\n+ bool SelectOutput(const nnvm::Node &n, const nnvm::Node &new_node) override {\n+ if (status_ == kFail || status_ == kSuccess || new_node.is_variable())\n+ return false;\n+ // If n isn't the last matched node, then we encoutered a internal\n+ // branch, we should pop out the node behind n and stop fusion.\n+ if (matched_list_.back() != &n) {\n+ if (std::find(matched_list_.begin(), matched_list_.end(), &n) !=\n+ matched_list_.end()) {\n+ while (matched_list_.back() != &n) {\n+ matched_list_.pop_back();\n+ }\n+ }\n+ status_ = kSuccess;\n+ return false;\n+ }\n+\n+ switch (status_) {\n+ case kStart:\n+ if (new_node.op()->name == \"elemwise_add\") {\n+ matched_list_.push_back(&new_node);\n+ status_ = kSuccess;\n+ return true;\n+ }\n+ default:\n+ status_ = kSuccess;\n+ return false;\n+ }\n+ }\n+\n+ std::vector Filter(\n+ const std::vector &candidates) override {\n+ if (status_ == kFail) {\n+ return std::vector(0);\n+ } else {\n+ std::vector ret;\n+ for (auto i : matched_list_) {\n+ auto non_const_i = const_cast(i);\n+ if (std::find(candidates.begin(), candidates.end(), non_const_i) !=\n+ candidates.end()) {\n+ ret.push_back(non_const_i);\n+ }\n+ }\n+ return candidates;\n+ }\n+ }\n+\n+ void Reset() override {\n+ CHECK_GE(matched_list_.size(), 1);\n+ auto new_selector = SgMKLDNNFCSumFuseSelector(quantized_);\n+ new_selector.Select(*matched_list_[0], nullptr);\n+ *this = new_selector;\n+ }\n+};\n+\n+class SgMKLDNNFCSumFuseProperty : public SubgraphProperty {\n+ public:\n+ SgMKLDNNFCSumFuseProperty() {}\n+\n+ static SubgraphPropertyPtr Create() {\n+ static const std::string &name = \"MKLDNN FullyConnected post quantization second pass\";\n+ auto property = std::make_shared();\n+ property->SetAttr(\"property_name\", name);\n+ property->SetAttr(\"inference_only\", true);\n+ if (dmlc::GetEnv(\"MXNET_DISABLE_MKLDNN_FC_SUM\", 0)) {\n+ property->SetAttr(\"disable\", true);\n+ }\n+ return property;\n+ }\n+\n+ nnvm::ObjectPtr CreateSubgraphNode(const nnvm::Symbol &sym,\n+ const int subgraph_id = 0) const override {\n+ nnvm::ObjectPtr fc_node = nullptr;\n+ nnvm::ObjectPtr ew_add_node = nullptr;\n+\n+ DFSVisit(sym.outputs, [&](const nnvm::ObjectPtr &node) {\n+ if (node->is_variable()) return;\n+ auto &sub_name = node->op()->name;\n+ if (sub_name == \"_sg_mkldnn_fully_connected\") {\n+ fc_node = node;\n+ } else if (sub_name == \"elemwise_add\") {\n+ ew_add_node = node;\n+ }\n+ });\n+\n+ CHECK_NOTNULL(fc_node);\n+ if (ew_add_node != nullptr) {\n+ CHECK_NOTNULL(fc_node->attrs.subgraphs[0]);\n+ auto fc_orginal = fc_node->attrs.subgraphs[0]->outputs[0].node;\n+ if (fc_orginal->op() == Op::Get(\"FullyConnected\")) {\n+ nnvm::Symbol new_sym;\n+ nnvm::NodeEntry &ew_input_with_fc = (ew_add_node->inputs[1].node == fc_node) ?\n+ ew_add_node->inputs[1] :\n+ ew_add_node->inputs[0];\n+ ew_input_with_fc.node = fc_orginal;\n+ new_sym.outputs.emplace_back(ew_add_node);\n+ fc_node->attrs.subgraphs.clear();\n+ fc_node->attrs.subgraphs.emplace_back(std::make_shared(new_sym));\n+ fc_node->attrs.dict[\"with_sum\"] = \"True\";\n+ fc_node->op()->attr_parser(&(fc_node->attrs));\n+ }\n+ }\n+ return fc_node;\n+ }\n+\n+ SubgraphSelectorPtr CreateSubgraphSelector() const override {\n+ bool quantized = HasAttr(\"quantize\") ? GetAttr(\"quantize\") : false;\n+ auto selector =\n+ std::make_shared(quantized);\n+ return selector;\n+ }\n+\n+ void ConnectSubgraphOutputs(\n+ const nnvm::ObjectPtr n,\n+ std::vector *output_entries) const override {\n+ // Connect all extern output entries to output[0]\n+ for (size_t i = 0; i < output_entries->size(); ++i) {\n+ auto entry_ptr = output_entries->at(i);\n+ *entry_ptr = nnvm::NodeEntry{n, entry_ptr->index, 0};\n+ }\n+ }\n+\n+ void ConnectSubgraphInputs(\n+ const nnvm::ObjectPtr n, std::vector *input_entries,\n+ std::vector *orig_input_entries) const override {\n+ auto sym = n->attrs.subgraphs[0];\n+ auto const &fc_param = nnvm::get(n->attrs.parsed);\n+ std::unordered_set node_sets;\n+ DFSVisit(sym->outputs, [&](const nnvm::ObjectPtr &node) {", + "comment_created_at": "2021-07-06T07:19:38+00:00", + "comment_author": "mozga-intel", + "comment_body": "Could you please tell me whether there is possible to prevent calling DFS multiple time (for every pattern)?", + "pr_file_module": null + }, + { + "comment_id": "664386233", + "repo_full_name": "apache/mxnet", + "pr_number": 20400, + "pr_file": "src/operator/subgraph/mkldnn/mkldnn_fc_sum_fuse.h", + "discussion_id": "664297807", + "commented_code": "@@ -0,0 +1,240 @@\n+/*\n+ * Licensed to the Apache Software Foundation (ASF) under one\n+ * or more contributor license agreements. See the NOTICE file\n+ * distributed with this work for additional information\n+ * regarding copyright ownership. The ASF licenses this file\n+ * to you under the Apache License, Version 2.0 (the\n+ * \"License\"); you may not use this file except in compliance\n+ * with the License. You may obtain a copy of the License at\n+ *\n+ * http://www.apache.org/licenses/LICENSE-2.0\n+ *\n+ * Unless required by applicable law or agreed to in writing,\n+ * software distributed under the License is distributed on an\n+ * \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n+ * KIND, either express or implied. See the License for the\n+ * specific language governing permissions and limitations\n+ * under the License.\n+ */\n+\n+/*\n+ \\brief It fuses FC + SUM for floating point output in second post quantization pass\n+*/\n+\n+#ifndef MXNET_OPERATOR_SUBGRAPH_MKLDNN_MKLDNN_FC_SUM_FUSE_H_\n+#define MXNET_OPERATOR_SUBGRAPH_MKLDNN_MKLDNN_FC_SUM_FUSE_H_\n+#if MXNET_USE_MKLDNN == 1\n+\n+#include \n+#include \n+#include \n+#include \n+#include \n+#include \"../common.h\"\n+#include \"../../tensor/matrix_op-inl.h\"\n+#include \"mkldnn_subgraph_base-inl.h\"\n+#include \"mkldnn_fc-inl.h\"\n+\n+namespace mxnet {\n+namespace op {\n+\n+class SgMKLDNNFCSumFuseSelector : public SubgraphSelector {\n+ public:\n+ /*! \\brief pattern match status */\n+ enum SelectStatus {\n+ kFail = 0,\n+ kStart,\n+ kSuccess,\n+ };\n+\n+ private:\n+ bool quantized_;\n+ SelectStatus status_;\n+ std::vector matched_list_;\n+\n+ public:\n+ explicit SgMKLDNNFCSumFuseSelector(bool quantized) :\n+ quantized_(quantized) {}\n+\n+ bool Select(const nnvm::Node &n, const std::shared_ptr& node_attr) override {\n+ if (n.op() == Op::Get(\"_sg_mkldnn_fully_connected\") && SupportMKLDNNAttr(node_attr)) {\n+ auto const &fc_param = nnvm::get(n.attrs.parsed);\n+ // TODO(anko) remove fc_param.mkldnn_param.quantized from if below\n+ // to fuse even for not quantized?\n+ if (fc_param.mkldnn_param.enable_float_output && fc_param.mkldnn_param.quantized) {\n+ status_ = kStart;\n+ matched_list_.clear();\n+ matched_list_.push_back(&n);\n+ return true;\n+ }\n+ }\n+ return false;\n+ }\n+\n+ bool SelectInput(const nnvm::Node &n, const nnvm::Node &new_node) override {\n+ return false;\n+ }\n+\n+ bool SelectOutput(const nnvm::Node &n, const nnvm::Node &new_node) override {\n+ if (status_ == kFail || status_ == kSuccess || new_node.is_variable())\n+ return false;\n+ // If n isn't the last matched node, then we encoutered a internal\n+ // branch, we should pop out the node behind n and stop fusion.\n+ if (matched_list_.back() != &n) {\n+ if (std::find(matched_list_.begin(), matched_list_.end(), &n) !=\n+ matched_list_.end()) {\n+ while (matched_list_.back() != &n) {\n+ matched_list_.pop_back();\n+ }\n+ }\n+ status_ = kSuccess;\n+ return false;\n+ }\n+\n+ switch (status_) {\n+ case kStart:\n+ if (new_node.op()->name == \"elemwise_add\") {\n+ matched_list_.push_back(&new_node);\n+ status_ = kSuccess;\n+ return true;\n+ }\n+ default:\n+ status_ = kSuccess;\n+ return false;\n+ }\n+ }\n+\n+ std::vector Filter(\n+ const std::vector &candidates) override {\n+ if (status_ == kFail) {\n+ return std::vector(0);\n+ } else {\n+ std::vector ret;\n+ for (auto i : matched_list_) {\n+ auto non_const_i = const_cast(i);\n+ if (std::find(candidates.begin(), candidates.end(), non_const_i) !=\n+ candidates.end()) {\n+ ret.push_back(non_const_i);\n+ }\n+ }\n+ return candidates;\n+ }\n+ }\n+\n+ void Reset() override {\n+ CHECK_GE(matched_list_.size(), 1);\n+ auto new_selector = SgMKLDNNFCSumFuseSelector(quantized_);\n+ new_selector.Select(*matched_list_[0], nullptr);\n+ *this = new_selector;\n+ }\n+};\n+\n+class SgMKLDNNFCSumFuseProperty : public SubgraphProperty {\n+ public:\n+ SgMKLDNNFCSumFuseProperty() {}\n+\n+ static SubgraphPropertyPtr Create() {\n+ static const std::string &name = \"MKLDNN FullyConnected post quantization second pass\";\n+ auto property = std::make_shared();\n+ property->SetAttr(\"property_name\", name);\n+ property->SetAttr(\"inference_only\", true);\n+ if (dmlc::GetEnv(\"MXNET_DISABLE_MKLDNN_FC_SUM\", 0)) {\n+ property->SetAttr(\"disable\", true);\n+ }\n+ return property;\n+ }\n+\n+ nnvm::ObjectPtr CreateSubgraphNode(const nnvm::Symbol &sym,\n+ const int subgraph_id = 0) const override {\n+ nnvm::ObjectPtr fc_node = nullptr;\n+ nnvm::ObjectPtr ew_add_node = nullptr;\n+\n+ DFSVisit(sym.outputs, [&](const nnvm::ObjectPtr &node) {\n+ if (node->is_variable()) return;\n+ auto &sub_name = node->op()->name;\n+ if (sub_name == \"_sg_mkldnn_fully_connected\") {\n+ fc_node = node;\n+ } else if (sub_name == \"elemwise_add\") {\n+ ew_add_node = node;\n+ }\n+ });\n+\n+ CHECK_NOTNULL(fc_node);\n+ if (ew_add_node != nullptr) {\n+ CHECK_NOTNULL(fc_node->attrs.subgraphs[0]);\n+ auto fc_orginal = fc_node->attrs.subgraphs[0]->outputs[0].node;\n+ if (fc_orginal->op() == Op::Get(\"FullyConnected\")) {\n+ nnvm::Symbol new_sym;\n+ nnvm::NodeEntry &ew_input_with_fc = (ew_add_node->inputs[1].node == fc_node) ?\n+ ew_add_node->inputs[1] :\n+ ew_add_node->inputs[0];\n+ ew_input_with_fc.node = fc_orginal;\n+ new_sym.outputs.emplace_back(ew_add_node);\n+ fc_node->attrs.subgraphs.clear();\n+ fc_node->attrs.subgraphs.emplace_back(std::make_shared(new_sym));\n+ fc_node->attrs.dict[\"with_sum\"] = \"True\";\n+ fc_node->op()->attr_parser(&(fc_node->attrs));\n+ }\n+ }\n+ return fc_node;\n+ }\n+\n+ SubgraphSelectorPtr CreateSubgraphSelector() const override {\n+ bool quantized = HasAttr(\"quantize\") ? GetAttr(\"quantize\") : false;\n+ auto selector =\n+ std::make_shared(quantized);\n+ return selector;\n+ }\n+\n+ void ConnectSubgraphOutputs(\n+ const nnvm::ObjectPtr n,\n+ std::vector *output_entries) const override {\n+ // Connect all extern output entries to output[0]\n+ for (size_t i = 0; i < output_entries->size(); ++i) {\n+ auto entry_ptr = output_entries->at(i);\n+ *entry_ptr = nnvm::NodeEntry{n, entry_ptr->index, 0};\n+ }\n+ }\n+\n+ void ConnectSubgraphInputs(\n+ const nnvm::ObjectPtr n, std::vector *input_entries,\n+ std::vector *orig_input_entries) const override {\n+ auto sym = n->attrs.subgraphs[0];\n+ auto const &fc_param = nnvm::get(n->attrs.parsed);\n+ std::unordered_set node_sets;\n+ DFSVisit(sym->outputs, [&](const nnvm::ObjectPtr &node) {", + "comment_created_at": "2021-07-06T09:20:20+00:00", + "comment_author": "anko-intel", + "comment_body": "I think so, but I think it is to big change in this moment.", + "pr_file_module": null + }, + { + "comment_id": "664396171", + "repo_full_name": "apache/mxnet", + "pr_number": 20400, + "pr_file": "src/operator/subgraph/mkldnn/mkldnn_fc_sum_fuse.h", + "discussion_id": "664297807", + "commented_code": "@@ -0,0 +1,240 @@\n+/*\n+ * Licensed to the Apache Software Foundation (ASF) under one\n+ * or more contributor license agreements. See the NOTICE file\n+ * distributed with this work for additional information\n+ * regarding copyright ownership. The ASF licenses this file\n+ * to you under the Apache License, Version 2.0 (the\n+ * \"License\"); you may not use this file except in compliance\n+ * with the License. You may obtain a copy of the License at\n+ *\n+ * http://www.apache.org/licenses/LICENSE-2.0\n+ *\n+ * Unless required by applicable law or agreed to in writing,\n+ * software distributed under the License is distributed on an\n+ * \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n+ * KIND, either express or implied. See the License for the\n+ * specific language governing permissions and limitations\n+ * under the License.\n+ */\n+\n+/*\n+ \\brief It fuses FC + SUM for floating point output in second post quantization pass\n+*/\n+\n+#ifndef MXNET_OPERATOR_SUBGRAPH_MKLDNN_MKLDNN_FC_SUM_FUSE_H_\n+#define MXNET_OPERATOR_SUBGRAPH_MKLDNN_MKLDNN_FC_SUM_FUSE_H_\n+#if MXNET_USE_MKLDNN == 1\n+\n+#include \n+#include \n+#include \n+#include \n+#include \n+#include \"../common.h\"\n+#include \"../../tensor/matrix_op-inl.h\"\n+#include \"mkldnn_subgraph_base-inl.h\"\n+#include \"mkldnn_fc-inl.h\"\n+\n+namespace mxnet {\n+namespace op {\n+\n+class SgMKLDNNFCSumFuseSelector : public SubgraphSelector {\n+ public:\n+ /*! \\brief pattern match status */\n+ enum SelectStatus {\n+ kFail = 0,\n+ kStart,\n+ kSuccess,\n+ };\n+\n+ private:\n+ bool quantized_;\n+ SelectStatus status_;\n+ std::vector matched_list_;\n+\n+ public:\n+ explicit SgMKLDNNFCSumFuseSelector(bool quantized) :\n+ quantized_(quantized) {}\n+\n+ bool Select(const nnvm::Node &n, const std::shared_ptr& node_attr) override {\n+ if (n.op() == Op::Get(\"_sg_mkldnn_fully_connected\") && SupportMKLDNNAttr(node_attr)) {\n+ auto const &fc_param = nnvm::get(n.attrs.parsed);\n+ // TODO(anko) remove fc_param.mkldnn_param.quantized from if below\n+ // to fuse even for not quantized?\n+ if (fc_param.mkldnn_param.enable_float_output && fc_param.mkldnn_param.quantized) {\n+ status_ = kStart;\n+ matched_list_.clear();\n+ matched_list_.push_back(&n);\n+ return true;\n+ }\n+ }\n+ return false;\n+ }\n+\n+ bool SelectInput(const nnvm::Node &n, const nnvm::Node &new_node) override {\n+ return false;\n+ }\n+\n+ bool SelectOutput(const nnvm::Node &n, const nnvm::Node &new_node) override {\n+ if (status_ == kFail || status_ == kSuccess || new_node.is_variable())\n+ return false;\n+ // If n isn't the last matched node, then we encoutered a internal\n+ // branch, we should pop out the node behind n and stop fusion.\n+ if (matched_list_.back() != &n) {\n+ if (std::find(matched_list_.begin(), matched_list_.end(), &n) !=\n+ matched_list_.end()) {\n+ while (matched_list_.back() != &n) {\n+ matched_list_.pop_back();\n+ }\n+ }\n+ status_ = kSuccess;\n+ return false;\n+ }\n+\n+ switch (status_) {\n+ case kStart:\n+ if (new_node.op()->name == \"elemwise_add\") {\n+ matched_list_.push_back(&new_node);\n+ status_ = kSuccess;\n+ return true;\n+ }\n+ default:\n+ status_ = kSuccess;\n+ return false;\n+ }\n+ }\n+\n+ std::vector Filter(\n+ const std::vector &candidates) override {\n+ if (status_ == kFail) {\n+ return std::vector(0);\n+ } else {\n+ std::vector ret;\n+ for (auto i : matched_list_) {\n+ auto non_const_i = const_cast(i);\n+ if (std::find(candidates.begin(), candidates.end(), non_const_i) !=\n+ candidates.end()) {\n+ ret.push_back(non_const_i);\n+ }\n+ }\n+ return candidates;\n+ }\n+ }\n+\n+ void Reset() override {\n+ CHECK_GE(matched_list_.size(), 1);\n+ auto new_selector = SgMKLDNNFCSumFuseSelector(quantized_);\n+ new_selector.Select(*matched_list_[0], nullptr);\n+ *this = new_selector;\n+ }\n+};\n+\n+class SgMKLDNNFCSumFuseProperty : public SubgraphProperty {\n+ public:\n+ SgMKLDNNFCSumFuseProperty() {}\n+\n+ static SubgraphPropertyPtr Create() {\n+ static const std::string &name = \"MKLDNN FullyConnected post quantization second pass\";\n+ auto property = std::make_shared();\n+ property->SetAttr(\"property_name\", name);\n+ property->SetAttr(\"inference_only\", true);\n+ if (dmlc::GetEnv(\"MXNET_DISABLE_MKLDNN_FC_SUM\", 0)) {\n+ property->SetAttr(\"disable\", true);\n+ }\n+ return property;\n+ }\n+\n+ nnvm::ObjectPtr CreateSubgraphNode(const nnvm::Symbol &sym,\n+ const int subgraph_id = 0) const override {\n+ nnvm::ObjectPtr fc_node = nullptr;\n+ nnvm::ObjectPtr ew_add_node = nullptr;\n+\n+ DFSVisit(sym.outputs, [&](const nnvm::ObjectPtr &node) {\n+ if (node->is_variable()) return;\n+ auto &sub_name = node->op()->name;\n+ if (sub_name == \"_sg_mkldnn_fully_connected\") {\n+ fc_node = node;\n+ } else if (sub_name == \"elemwise_add\") {\n+ ew_add_node = node;\n+ }\n+ });\n+\n+ CHECK_NOTNULL(fc_node);\n+ if (ew_add_node != nullptr) {\n+ CHECK_NOTNULL(fc_node->attrs.subgraphs[0]);\n+ auto fc_orginal = fc_node->attrs.subgraphs[0]->outputs[0].node;\n+ if (fc_orginal->op() == Op::Get(\"FullyConnected\")) {\n+ nnvm::Symbol new_sym;\n+ nnvm::NodeEntry &ew_input_with_fc = (ew_add_node->inputs[1].node == fc_node) ?\n+ ew_add_node->inputs[1] :\n+ ew_add_node->inputs[0];\n+ ew_input_with_fc.node = fc_orginal;\n+ new_sym.outputs.emplace_back(ew_add_node);\n+ fc_node->attrs.subgraphs.clear();\n+ fc_node->attrs.subgraphs.emplace_back(std::make_shared(new_sym));\n+ fc_node->attrs.dict[\"with_sum\"] = \"True\";\n+ fc_node->op()->attr_parser(&(fc_node->attrs));\n+ }\n+ }\n+ return fc_node;\n+ }\n+\n+ SubgraphSelectorPtr CreateSubgraphSelector() const override {\n+ bool quantized = HasAttr(\"quantize\") ? GetAttr(\"quantize\") : false;\n+ auto selector =\n+ std::make_shared(quantized);\n+ return selector;\n+ }\n+\n+ void ConnectSubgraphOutputs(\n+ const nnvm::ObjectPtr n,\n+ std::vector *output_entries) const override {\n+ // Connect all extern output entries to output[0]\n+ for (size_t i = 0; i < output_entries->size(); ++i) {\n+ auto entry_ptr = output_entries->at(i);\n+ *entry_ptr = nnvm::NodeEntry{n, entry_ptr->index, 0};\n+ }\n+ }\n+\n+ void ConnectSubgraphInputs(\n+ const nnvm::ObjectPtr n, std::vector *input_entries,\n+ std::vector *orig_input_entries) const override {\n+ auto sym = n->attrs.subgraphs[0];\n+ auto const &fc_param = nnvm::get(n->attrs.parsed);\n+ std::unordered_set node_sets;\n+ DFSVisit(sym->outputs, [&](const nnvm::ObjectPtr &node) {", + "comment_created_at": "2021-07-06T09:33:23+00:00", + "comment_author": "sfraczek", + "comment_body": "@anko-intel Is this iterating through just subgraph fc->elementwise_add or entire graph?", + "pr_file_module": null + }, + { + "comment_id": "664618631", + "repo_full_name": "apache/mxnet", + "pr_number": 20400, + "pr_file": "src/operator/subgraph/mkldnn/mkldnn_fc_sum_fuse.h", + "discussion_id": "664297807", + "commented_code": "@@ -0,0 +1,240 @@\n+/*\n+ * Licensed to the Apache Software Foundation (ASF) under one\n+ * or more contributor license agreements. See the NOTICE file\n+ * distributed with this work for additional information\n+ * regarding copyright ownership. The ASF licenses this file\n+ * to you under the Apache License, Version 2.0 (the\n+ * \"License\"); you may not use this file except in compliance\n+ * with the License. You may obtain a copy of the License at\n+ *\n+ * http://www.apache.org/licenses/LICENSE-2.0\n+ *\n+ * Unless required by applicable law or agreed to in writing,\n+ * software distributed under the License is distributed on an\n+ * \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n+ * KIND, either express or implied. See the License for the\n+ * specific language governing permissions and limitations\n+ * under the License.\n+ */\n+\n+/*\n+ \\brief It fuses FC + SUM for floating point output in second post quantization pass\n+*/\n+\n+#ifndef MXNET_OPERATOR_SUBGRAPH_MKLDNN_MKLDNN_FC_SUM_FUSE_H_\n+#define MXNET_OPERATOR_SUBGRAPH_MKLDNN_MKLDNN_FC_SUM_FUSE_H_\n+#if MXNET_USE_MKLDNN == 1\n+\n+#include \n+#include \n+#include \n+#include \n+#include \n+#include \"../common.h\"\n+#include \"../../tensor/matrix_op-inl.h\"\n+#include \"mkldnn_subgraph_base-inl.h\"\n+#include \"mkldnn_fc-inl.h\"\n+\n+namespace mxnet {\n+namespace op {\n+\n+class SgMKLDNNFCSumFuseSelector : public SubgraphSelector {\n+ public:\n+ /*! \\brief pattern match status */\n+ enum SelectStatus {\n+ kFail = 0,\n+ kStart,\n+ kSuccess,\n+ };\n+\n+ private:\n+ bool quantized_;\n+ SelectStatus status_;\n+ std::vector matched_list_;\n+\n+ public:\n+ explicit SgMKLDNNFCSumFuseSelector(bool quantized) :\n+ quantized_(quantized) {}\n+\n+ bool Select(const nnvm::Node &n, const std::shared_ptr& node_attr) override {\n+ if (n.op() == Op::Get(\"_sg_mkldnn_fully_connected\") && SupportMKLDNNAttr(node_attr)) {\n+ auto const &fc_param = nnvm::get(n.attrs.parsed);\n+ // TODO(anko) remove fc_param.mkldnn_param.quantized from if below\n+ // to fuse even for not quantized?\n+ if (fc_param.mkldnn_param.enable_float_output && fc_param.mkldnn_param.quantized) {\n+ status_ = kStart;\n+ matched_list_.clear();\n+ matched_list_.push_back(&n);\n+ return true;\n+ }\n+ }\n+ return false;\n+ }\n+\n+ bool SelectInput(const nnvm::Node &n, const nnvm::Node &new_node) override {\n+ return false;\n+ }\n+\n+ bool SelectOutput(const nnvm::Node &n, const nnvm::Node &new_node) override {\n+ if (status_ == kFail || status_ == kSuccess || new_node.is_variable())\n+ return false;\n+ // If n isn't the last matched node, then we encoutered a internal\n+ // branch, we should pop out the node behind n and stop fusion.\n+ if (matched_list_.back() != &n) {\n+ if (std::find(matched_list_.begin(), matched_list_.end(), &n) !=\n+ matched_list_.end()) {\n+ while (matched_list_.back() != &n) {\n+ matched_list_.pop_back();\n+ }\n+ }\n+ status_ = kSuccess;\n+ return false;\n+ }\n+\n+ switch (status_) {\n+ case kStart:\n+ if (new_node.op()->name == \"elemwise_add\") {\n+ matched_list_.push_back(&new_node);\n+ status_ = kSuccess;\n+ return true;\n+ }\n+ default:\n+ status_ = kSuccess;\n+ return false;\n+ }\n+ }\n+\n+ std::vector Filter(\n+ const std::vector &candidates) override {\n+ if (status_ == kFail) {\n+ return std::vector(0);\n+ } else {\n+ std::vector ret;\n+ for (auto i : matched_list_) {\n+ auto non_const_i = const_cast(i);\n+ if (std::find(candidates.begin(), candidates.end(), non_const_i) !=\n+ candidates.end()) {\n+ ret.push_back(non_const_i);\n+ }\n+ }\n+ return candidates;\n+ }\n+ }\n+\n+ void Reset() override {\n+ CHECK_GE(matched_list_.size(), 1);\n+ auto new_selector = SgMKLDNNFCSumFuseSelector(quantized_);\n+ new_selector.Select(*matched_list_[0], nullptr);\n+ *this = new_selector;\n+ }\n+};\n+\n+class SgMKLDNNFCSumFuseProperty : public SubgraphProperty {\n+ public:\n+ SgMKLDNNFCSumFuseProperty() {}\n+\n+ static SubgraphPropertyPtr Create() {\n+ static const std::string &name = \"MKLDNN FullyConnected post quantization second pass\";\n+ auto property = std::make_shared();\n+ property->SetAttr(\"property_name\", name);\n+ property->SetAttr(\"inference_only\", true);\n+ if (dmlc::GetEnv(\"MXNET_DISABLE_MKLDNN_FC_SUM\", 0)) {\n+ property->SetAttr(\"disable\", true);\n+ }\n+ return property;\n+ }\n+\n+ nnvm::ObjectPtr CreateSubgraphNode(const nnvm::Symbol &sym,\n+ const int subgraph_id = 0) const override {\n+ nnvm::ObjectPtr fc_node = nullptr;\n+ nnvm::ObjectPtr ew_add_node = nullptr;\n+\n+ DFSVisit(sym.outputs, [&](const nnvm::ObjectPtr &node) {\n+ if (node->is_variable()) return;\n+ auto &sub_name = node->op()->name;\n+ if (sub_name == \"_sg_mkldnn_fully_connected\") {\n+ fc_node = node;\n+ } else if (sub_name == \"elemwise_add\") {\n+ ew_add_node = node;\n+ }\n+ });\n+\n+ CHECK_NOTNULL(fc_node);\n+ if (ew_add_node != nullptr) {\n+ CHECK_NOTNULL(fc_node->attrs.subgraphs[0]);\n+ auto fc_orginal = fc_node->attrs.subgraphs[0]->outputs[0].node;\n+ if (fc_orginal->op() == Op::Get(\"FullyConnected\")) {\n+ nnvm::Symbol new_sym;\n+ nnvm::NodeEntry &ew_input_with_fc = (ew_add_node->inputs[1].node == fc_node) ?\n+ ew_add_node->inputs[1] :\n+ ew_add_node->inputs[0];\n+ ew_input_with_fc.node = fc_orginal;\n+ new_sym.outputs.emplace_back(ew_add_node);\n+ fc_node->attrs.subgraphs.clear();\n+ fc_node->attrs.subgraphs.emplace_back(std::make_shared(new_sym));\n+ fc_node->attrs.dict[\"with_sum\"] = \"True\";\n+ fc_node->op()->attr_parser(&(fc_node->attrs));\n+ }\n+ }\n+ return fc_node;\n+ }\n+\n+ SubgraphSelectorPtr CreateSubgraphSelector() const override {\n+ bool quantized = HasAttr(\"quantize\") ? GetAttr(\"quantize\") : false;\n+ auto selector =\n+ std::make_shared(quantized);\n+ return selector;\n+ }\n+\n+ void ConnectSubgraphOutputs(\n+ const nnvm::ObjectPtr n,\n+ std::vector *output_entries) const override {\n+ // Connect all extern output entries to output[0]\n+ for (size_t i = 0; i < output_entries->size(); ++i) {\n+ auto entry_ptr = output_entries->at(i);\n+ *entry_ptr = nnvm::NodeEntry{n, entry_ptr->index, 0};\n+ }\n+ }\n+\n+ void ConnectSubgraphInputs(\n+ const nnvm::ObjectPtr n, std::vector *input_entries,\n+ std::vector *orig_input_entries) const override {\n+ auto sym = n->attrs.subgraphs[0];\n+ auto const &fc_param = nnvm::get(n->attrs.parsed);\n+ std::unordered_set node_sets;\n+ DFSVisit(sym->outputs, [&](const nnvm::ObjectPtr &node) {", + "comment_created_at": "2021-07-06T14:40:31+00:00", + "comment_author": "mozga-intel", + "comment_body": "@anko-intel Thanks, How about talking it over? \r\n\r\n## Problem statement ##\r\nIf the structure of a graph is still the same all the time (apart of fusing). Then we can try to avoid running DFS algorithm multiple times. For brevity, those calls force us to pay a penalty: O(V + E). If we assume that `n` represents a total number of patterns, then the summary cost of that is equal n * O(V +E). Even more, for a few operators (i.e this one) DFS might be run twice or more, if any ~ then we got mn * O(V +E). \r\n\r\n## Solution - short description [Future plan] ##\r\nReduce overall time of using DFS to a minimum: nice to have just O(V +E), by labelling nodes and keeping traversal order. Pattern matching is applied after that ~ on a traversal order vector (maybe LCA knowledge is needed).", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "647723370", + "pr_number": 20262, + "pr_file": "src/imperative/imperative.cc", + "created_at": "2021-06-08T19:10:31+00:00", + "commented_code": "<< \"must have a deferred compute history associated with them.\";\n s.outputs.emplace_back(ndoutput->deferredcompute_entry_);\n }\n nnvm::DFSVisit(s.outputs, [&](const nnvm::ObjectPtr& n) {", + "repo_full_name": "apache/mxnet", + "discussion_comments": [ + { + "comment_id": "647723370", + "repo_full_name": "apache/mxnet", + "pr_number": 20262, + "pr_file": "src/imperative/imperative.cc", + "discussion_id": "647723370", + "commented_code": "@@ -350,6 +350,15 @@ nnvm::Symbol Imperative::GetDeferredComputeSymbol(const std::vector &\n << \"must have a deferred compute history associated with them.\";\n s.outputs.emplace_back(ndoutput->deferredcompute_entry_);\n }\n+ nnvm::DFSVisit(s.outputs, [&](const nnvm::ObjectPtr& n) {", + "comment_created_at": "2021-06-08T19:10:31+00:00", + "comment_author": "leezu", + "comment_body": "I think this needs to run on a copy of the graph, because in general there can still be a call to `Compute` after `GetDeferredComputeSymbol`. ", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "766527196", + "pr_number": 20767, + "pr_file": "src/operator/subgraph/dnnl/dnnl_identity_property.h", + "created_at": "2021-12-10T09:52:32+00:00", + "commented_code": "}\n\n bool SelectInput(const BiDirectedNode& n, const BiDirectedNode& input_node) override {\n if (input_node.node->is_variable()) {\n if (pattern_found || input_node.node->is_variable()) {", + "repo_full_name": "apache/mxnet", + "discussion_comments": [ + { + "comment_id": "766527196", + "repo_full_name": "apache/mxnet", + "pr_number": 20767, + "pr_file": "src/operator/subgraph/dnnl/dnnl_identity_property.h", + "discussion_id": "766527196", + "commented_code": "@@ -65,10 +66,11 @@ class SgDNNLIdentitySelector : public SubgraphSelectorV2 {\n }\n \n bool SelectInput(const BiDirectedNode& n, const BiDirectedNode& input_node) override {\n- if (input_node.node->is_variable()) {\n+ if (pattern_found || input_node.node->is_variable()) {", + "comment_created_at": "2021-12-10T09:52:32+00:00", + "comment_author": "PawelGlomski-Intel", + "comment_body": "You could use `matched_list_` like you did in the `Filter` function.\r\n```suggestion\r\n if (matched_list_.size() == 2 || input_node.node->is_variable()) {\r\n```\r\nIf not, then I would also use `pattern_found` in the `Filter` function, to show, that these conditions are connected.\r\nhttps://github.com/apache/incubator-mxnet/blob/7e163157cf789d7ec5e7272532e04495e57bd0ba/src/operator/subgraph/dnnl/dnnl_identity_property.h#L85\r\nChange to :\r\n```\r\n if (pattern_found && candidates.size() == matched_list_.size()) { \r\n```", + "pr_file_module": null + }, + { + "comment_id": "770310841", + "repo_full_name": "apache/mxnet", + "pr_number": 20767, + "pr_file": "src/operator/subgraph/dnnl/dnnl_identity_property.h", + "discussion_id": "766527196", + "commented_code": "@@ -65,10 +66,11 @@ class SgDNNLIdentitySelector : public SubgraphSelectorV2 {\n }\n \n bool SelectInput(const BiDirectedNode& n, const BiDirectedNode& input_node) override {\n- if (input_node.node->is_variable()) {\n+ if (pattern_found || input_node.node->is_variable()) {", + "comment_created_at": "2021-12-16T08:19:20+00:00", + "comment_author": "bgawrych", + "comment_body": "Done", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "820816070", + "pr_number": 20753, + "pr_file": "src/nnvm/low_precision_pass.cc", + "created_at": "2022-03-07T15:26:58+00:00", + "commented_code": "#include \n#include \n#include \n#include \"../operator/operator_common.h\"\n\nnamespace mxnet {\nusing nnvm::Graph;\nusing nnvm::Node;\nusing nnvm::NodeEntry;\nusing nnvm::ObjectPtr;\nusing nnvm::Symbol;\n\n// create a node for operator : op_name with name : node_name\nstatic ObjectPtr CreateNode(std::string op_name, std::string node_name) {\n ObjectPtr node = Node::Create();\n node->attrs.name = node_name;\n if (op_name == \"nullptr\") {\n node->attrs.op = nullptr;\n // ugly workaround because VariableParam is not exposed\n node->attrs.parsed =\n nnvm::Symbol::CreateVariable(node->attrs.name).outputs[0].node->attrs.parsed;\n } else {\n node->attrs.op = Op::Get(op_name);\n }\n return node;\n}\n\nstatic ObjectPtr InsertNode(std::string op_name,\n std::string node_name,\n ObjectPtr current,\n NodeEntry previous) {\n ObjectPtr node = CreateNode(op_name, node_name);\n node->inputs.emplace_back(previous);\n if (current)\n current->inputs.emplace_back(NodeEntry{node, 0, 0});\n return node;\nbool is_cast_op(const nnvm::Op* const op) {\n return op && (op == Op::Get(\"amp_cast\") || op == Op::Get(\"Cast\"));\n}\n\n// get suffix for a node entry so that it can be used for amp_cast/amp_multicast node name\nstatic std::string GetSuffix(const nnvm::NodeEntry& node_entry,\n const std::unordered_map& mirror_map) {\n static const auto& flist_outputs = nnvm::Op::GetAttr(\"FListOutputNames\");\n std::string suffix = \"\";\n ObjectPtr mirror_node = mirror_map.at(node_entry.node.get());\n if (mirror_node->op() != nullptr) {\n auto list_output_names_func = flist_outputs.get(node_entry.node->op(), nullptr);\n if (list_output_names_func != nullptr) {\n std::vector names = list_output_names_func(node_entry.node->attrs);\n suffix = \"_\" + names[node_entry.index];\n } else {\n suffix = \"_\" + std::to_string(node_entry.index);\nclass MappedNodeEntry {\n public:\n MappedNodeEntry(NodeEntry node_entry, const int original_dtype)\n : entry(std::move(node_entry)), original_dtype(original_dtype) {\n dtype = original_dtype;\n }\n\n void convert(const int new_dtype) {\n CHECK_EQ(dtype, original_dtype); // dtype should be changed only once\n dtype = new_dtype;\n }\n\n const NodeEntry& as_original() {\n return as_type(original_dtype);\n }\n\n const NodeEntry& as_type(const int target_dtype) {\n if (dtype == target_dtype) {\n return entry;\n }\n NodeEntry& cast_entry = casts[target_dtype];\n if (cast_entry.node == nullptr) {\n cast_entry = cast(target_dtype);\n CHECK(cast_entry.node);\n }\n return cast_entry;\n }\n return suffix;\n}\n\n// add amp_cast node between curr_node and input\nstatic void AddCastNode(const nnvm::NodeEntry& e,\n const std::string& suffix,\n const nnvm::NodeEntry& input,\n const std::string dtype,\n nnvm::NodeEntryMap* mirror_entry_map,\n ObjectPtr curr_node) {\n ObjectPtr cast_node =\n InsertNode(\"amp_cast\", e.node->attrs.name + suffix + \"_amp_cast_\" + dtype, curr_node, input);\n cast_node->attrs.dict[\"dtype\"] = dtype;\n cast_node->op()->attr_parser(&(cast_node->attrs));\n (*mirror_entry_map)[e] = NodeEntry{std::move(cast_node), 0, e.version};\n return;\n}\n bool has_dtype_entry(const int target_dtype) const {\n return dtype == target_dtype || casts.count(target_dtype) > 0;\n }\n\n bool can_be_cast_offline_to(const int target_dtype) const {", + "repo_full_name": "apache/mxnet", + "discussion_comments": [ + { + "comment_id": "820816070", + "repo_full_name": "apache/mxnet", + "pr_number": 20753, + "pr_file": "src/nnvm/low_precision_pass.cc", + "discussion_id": "820816070", + "commented_code": "@@ -29,374 +29,322 @@\n #include \n #include \n #include \n+#include \"../operator/operator_common.h\"\n \n namespace mxnet {\n using nnvm::Graph;\n using nnvm::Node;\n using nnvm::NodeEntry;\n using nnvm::ObjectPtr;\n-using nnvm::Symbol;\n-\n-// create a node for operator : op_name with name : node_name\n-static ObjectPtr CreateNode(std::string op_name, std::string node_name) {\n- ObjectPtr node = Node::Create();\n- node->attrs.name = node_name;\n- if (op_name == \"nullptr\") {\n- node->attrs.op = nullptr;\n- // ugly workaround because VariableParam is not exposed\n- node->attrs.parsed =\n- nnvm::Symbol::CreateVariable(node->attrs.name).outputs[0].node->attrs.parsed;\n- } else {\n- node->attrs.op = Op::Get(op_name);\n- }\n- return node;\n-}\n \n-static ObjectPtr InsertNode(std::string op_name,\n- std::string node_name,\n- ObjectPtr current,\n- NodeEntry previous) {\n- ObjectPtr node = CreateNode(op_name, node_name);\n- node->inputs.emplace_back(previous);\n- if (current)\n- current->inputs.emplace_back(NodeEntry{node, 0, 0});\n- return node;\n+bool is_cast_op(const nnvm::Op* const op) {\n+ return op && (op == Op::Get(\"amp_cast\") || op == Op::Get(\"Cast\"));\n }\n \n-// get suffix for a node entry so that it can be used for amp_cast/amp_multicast node name\n-static std::string GetSuffix(const nnvm::NodeEntry& node_entry,\n- const std::unordered_map& mirror_map) {\n- static const auto& flist_outputs = nnvm::Op::GetAttr(\"FListOutputNames\");\n- std::string suffix = \"\";\n- ObjectPtr mirror_node = mirror_map.at(node_entry.node.get());\n- if (mirror_node->op() != nullptr) {\n- auto list_output_names_func = flist_outputs.get(node_entry.node->op(), nullptr);\n- if (list_output_names_func != nullptr) {\n- std::vector names = list_output_names_func(node_entry.node->attrs);\n- suffix = \"_\" + names[node_entry.index];\n- } else {\n- suffix = \"_\" + std::to_string(node_entry.index);\n+class MappedNodeEntry {\n+ public:\n+ MappedNodeEntry(NodeEntry node_entry, const int original_dtype)\n+ : entry(std::move(node_entry)), original_dtype(original_dtype) {\n+ dtype = original_dtype;\n+ }\n+\n+ void convert(const int new_dtype) {\n+ CHECK_EQ(dtype, original_dtype); // dtype should be changed only once\n+ dtype = new_dtype;\n+ }\n+\n+ const NodeEntry& as_original() {\n+ return as_type(original_dtype);\n+ }\n+\n+ const NodeEntry& as_type(const int target_dtype) {\n+ if (dtype == target_dtype) {\n+ return entry;\n+ }\n+ NodeEntry& cast_entry = casts[target_dtype];\n+ if (cast_entry.node == nullptr) {\n+ cast_entry = cast(target_dtype);\n+ CHECK(cast_entry.node);\n }\n+ return cast_entry;\n }\n- return suffix;\n-}\n \n-// add amp_cast node between curr_node and input\n-static void AddCastNode(const nnvm::NodeEntry& e,\n- const std::string& suffix,\n- const nnvm::NodeEntry& input,\n- const std::string dtype,\n- nnvm::NodeEntryMap* mirror_entry_map,\n- ObjectPtr curr_node) {\n- ObjectPtr cast_node =\n- InsertNode(\"amp_cast\", e.node->attrs.name + suffix + \"_amp_cast_\" + dtype, curr_node, input);\n- cast_node->attrs.dict[\"dtype\"] = dtype;\n- cast_node->op()->attr_parser(&(cast_node->attrs));\n- (*mirror_entry_map)[e] = NodeEntry{std::move(cast_node), 0, e.version};\n- return;\n-}\n+ bool has_dtype_entry(const int target_dtype) const {\n+ return dtype == target_dtype || casts.count(target_dtype) > 0;\n+ }\n+\n+ bool can_be_cast_offline_to(const int target_dtype) const {", + "comment_created_at": "2022-03-07T15:26:58+00:00", + "comment_author": "bartekkuncer", + "comment_body": "Why amount of casts to a particular dtype determines whether casting to this dtype can be performed offline?", + "pr_file_module": null + }, + { + "comment_id": "820851332", + "repo_full_name": "apache/mxnet", + "pr_number": 20753, + "pr_file": "src/nnvm/low_precision_pass.cc", + "discussion_id": "820816070", + "commented_code": "@@ -29,374 +29,322 @@\n #include \n #include \n #include \n+#include \"../operator/operator_common.h\"\n \n namespace mxnet {\n using nnvm::Graph;\n using nnvm::Node;\n using nnvm::NodeEntry;\n using nnvm::ObjectPtr;\n-using nnvm::Symbol;\n-\n-// create a node for operator : op_name with name : node_name\n-static ObjectPtr CreateNode(std::string op_name, std::string node_name) {\n- ObjectPtr node = Node::Create();\n- node->attrs.name = node_name;\n- if (op_name == \"nullptr\") {\n- node->attrs.op = nullptr;\n- // ugly workaround because VariableParam is not exposed\n- node->attrs.parsed =\n- nnvm::Symbol::CreateVariable(node->attrs.name).outputs[0].node->attrs.parsed;\n- } else {\n- node->attrs.op = Op::Get(op_name);\n- }\n- return node;\n-}\n \n-static ObjectPtr InsertNode(std::string op_name,\n- std::string node_name,\n- ObjectPtr current,\n- NodeEntry previous) {\n- ObjectPtr node = CreateNode(op_name, node_name);\n- node->inputs.emplace_back(previous);\n- if (current)\n- current->inputs.emplace_back(NodeEntry{node, 0, 0});\n- return node;\n+bool is_cast_op(const nnvm::Op* const op) {\n+ return op && (op == Op::Get(\"amp_cast\") || op == Op::Get(\"Cast\"));\n }\n \n-// get suffix for a node entry so that it can be used for amp_cast/amp_multicast node name\n-static std::string GetSuffix(const nnvm::NodeEntry& node_entry,\n- const std::unordered_map& mirror_map) {\n- static const auto& flist_outputs = nnvm::Op::GetAttr(\"FListOutputNames\");\n- std::string suffix = \"\";\n- ObjectPtr mirror_node = mirror_map.at(node_entry.node.get());\n- if (mirror_node->op() != nullptr) {\n- auto list_output_names_func = flist_outputs.get(node_entry.node->op(), nullptr);\n- if (list_output_names_func != nullptr) {\n- std::vector names = list_output_names_func(node_entry.node->attrs);\n- suffix = \"_\" + names[node_entry.index];\n- } else {\n- suffix = \"_\" + std::to_string(node_entry.index);\n+class MappedNodeEntry {\n+ public:\n+ MappedNodeEntry(NodeEntry node_entry, const int original_dtype)\n+ : entry(std::move(node_entry)), original_dtype(original_dtype) {\n+ dtype = original_dtype;\n+ }\n+\n+ void convert(const int new_dtype) {\n+ CHECK_EQ(dtype, original_dtype); // dtype should be changed only once\n+ dtype = new_dtype;\n+ }\n+\n+ const NodeEntry& as_original() {\n+ return as_type(original_dtype);\n+ }\n+\n+ const NodeEntry& as_type(const int target_dtype) {\n+ if (dtype == target_dtype) {\n+ return entry;\n+ }\n+ NodeEntry& cast_entry = casts[target_dtype];\n+ if (cast_entry.node == nullptr) {\n+ cast_entry = cast(target_dtype);\n+ CHECK(cast_entry.node);\n }\n+ return cast_entry;\n }\n- return suffix;\n-}\n \n-// add amp_cast node between curr_node and input\n-static void AddCastNode(const nnvm::NodeEntry& e,\n- const std::string& suffix,\n- const nnvm::NodeEntry& input,\n- const std::string dtype,\n- nnvm::NodeEntryMap* mirror_entry_map,\n- ObjectPtr curr_node) {\n- ObjectPtr cast_node =\n- InsertNode(\"amp_cast\", e.node->attrs.name + suffix + \"_amp_cast_\" + dtype, curr_node, input);\n- cast_node->attrs.dict[\"dtype\"] = dtype;\n- cast_node->op()->attr_parser(&(cast_node->attrs));\n- (*mirror_entry_map)[e] = NodeEntry{std::move(cast_node), 0, e.version};\n- return;\n-}\n+ bool has_dtype_entry(const int target_dtype) const {\n+ return dtype == target_dtype || casts.count(target_dtype) > 0;\n+ }\n+\n+ bool can_be_cast_offline_to(const int target_dtype) const {", + "comment_created_at": "2022-03-07T16:01:08+00:00", + "comment_author": "PawelGlomski-Intel", + "comment_body": "`casts` is an unordered map, thus the count returns whether the cast to this dtype exists or not. A parameter should be cast offline only if there is a need for it - there is at least one operator using it through a cast.", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/mxnet-graph-traversal-optimization.md b/_reviewers/mxnet-graph-traversal-optimization.md index 6ba03db..1c68ce2 100644 --- a/_reviewers/mxnet-graph-traversal-optimization.md +++ b/_reviewers/mxnet-graph-traversal-optimization.md @@ -51,155 +51,3 @@ for (auto pattern : patterns) { ``` This approach significantly reduces computational complexity when multiple operations or pattern matching need to be performed on the same graph structure. - - -[ - { - "discussion_id": "664297807", - "pr_number": 20400, - "pr_file": "src/operator/subgraph/mkldnn/mkldnn_fc_sum_fuse.h", - "created_at": "2021-07-06T07:19:38+00:00", - "commented_code": "/*\n * Licensed to the Apache Software Foundation (ASF) under one\n * or more contributor license agreements. See the NOTICE file\n * distributed with this work for additional information\n * regarding copyright ownership. The ASF licenses this file\n * to you under the Apache License, Version 2.0 (the\n * \"License\"); you may not use this file except in compliance\n * with the License. You may obtain a copy of the License at\n *\n * http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing,\n * software distributed under the License is distributed on an\n * \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n * KIND, either express or implied. See the License for the\n * specific language governing permissions and limitations\n * under the License.\n */\n\n/*\n \\brief It fuses FC + SUM for floating point output in second post quantization pass\n*/\n\n#ifndef MXNET_OPERATOR_SUBGRAPH_MKLDNN_MKLDNN_FC_SUM_FUSE_H_\n#define MXNET_OPERATOR_SUBGRAPH_MKLDNN_MKLDNN_FC_SUM_FUSE_H_\n#if MXNET_USE_MKLDNN == 1\n\n#include \n#include \n#include \n#include \n#include \n#include \"../common.h\"\n#include \"../../tensor/matrix_op-inl.h\"\n#include \"mkldnn_subgraph_base-inl.h\"\n#include \"mkldnn_fc-inl.h\"\n\nnamespace mxnet {\nnamespace op {\n\nclass SgMKLDNNFCSumFuseSelector : public SubgraphSelector {\n public:\n /*! \\brief pattern match status */\n enum SelectStatus {\n kFail = 0,\n kStart,\n kSuccess,\n };\n\n private:\n bool quantized_;\n SelectStatus status_;\n std::vector matched_list_;\n\n public:\n explicit SgMKLDNNFCSumFuseSelector(bool quantized) :\n quantized_(quantized) {}\n\n bool Select(const nnvm::Node &n, const std::shared_ptr& node_attr) override {\n if (n.op() == Op::Get(\"_sg_mkldnn_fully_connected\") && SupportMKLDNNAttr(node_attr)) {\n auto const &fc_param = nnvm::get(n.attrs.parsed);\n // TODO(anko) remove fc_param.mkldnn_param.quantized from if below\n // to fuse even for not quantized?\n if (fc_param.mkldnn_param.enable_float_output && fc_param.mkldnn_param.quantized) {\n status_ = kStart;\n matched_list_.clear();\n matched_list_.push_back(&n);\n return true;\n }\n }\n return false;\n }\n\n bool SelectInput(const nnvm::Node &n, const nnvm::Node &new_node) override {\n return false;\n }\n\n bool SelectOutput(const nnvm::Node &n, const nnvm::Node &new_node) override {\n if (status_ == kFail || status_ == kSuccess || new_node.is_variable())\n return false;\n // If n isn't the last matched node, then we encoutered a internal\n // branch, we should pop out the node behind n and stop fusion.\n if (matched_list_.back() != &n) {\n if (std::find(matched_list_.begin(), matched_list_.end(), &n) !=\n matched_list_.end()) {\n while (matched_list_.back() != &n) {\n matched_list_.pop_back();\n }\n }\n status_ = kSuccess;\n return false;\n }\n\n switch (status_) {\n case kStart:\n if (new_node.op()->name == \"elemwise_add\") {\n matched_list_.push_back(&new_node);\n status_ = kSuccess;\n return true;\n }\n default:\n status_ = kSuccess;\n return false;\n }\n }\n\n std::vector Filter(\n const std::vector &candidates) override {\n if (status_ == kFail) {\n return std::vector(0);\n } else {\n std::vector ret;\n for (auto i : matched_list_) {\n auto non_const_i = const_cast(i);\n if (std::find(candidates.begin(), candidates.end(), non_const_i) !=\n candidates.end()) {\n ret.push_back(non_const_i);\n }\n }\n return candidates;\n }\n }\n\n void Reset() override {\n CHECK_GE(matched_list_.size(), 1);\n auto new_selector = SgMKLDNNFCSumFuseSelector(quantized_);\n new_selector.Select(*matched_list_[0], nullptr);\n *this = new_selector;\n }\n};\n\nclass SgMKLDNNFCSumFuseProperty : public SubgraphProperty {\n public:\n SgMKLDNNFCSumFuseProperty() {}\n\n static SubgraphPropertyPtr Create() {\n static const std::string &name = \"MKLDNN FullyConnected post quantization second pass\";\n auto property = std::make_shared();\n property->SetAttr(\"property_name\", name);\n property->SetAttr(\"inference_only\", true);\n if (dmlc::GetEnv(\"MXNET_DISABLE_MKLDNN_FC_SUM\", 0)) {\n property->SetAttr(\"disable\", true);\n }\n return property;\n }\n\n nnvm::ObjectPtr CreateSubgraphNode(const nnvm::Symbol &sym,\n const int subgraph_id = 0) const override {\n nnvm::ObjectPtr fc_node = nullptr;\n nnvm::ObjectPtr ew_add_node = nullptr;\n\n DFSVisit(sym.outputs, [&](const nnvm::ObjectPtr &node) {\n if (node->is_variable()) return;\n auto &sub_name = node->op()->name;\n if (sub_name == \"_sg_mkldnn_fully_connected\") {\n fc_node = node;\n } else if (sub_name == \"elemwise_add\") {\n ew_add_node = node;\n }\n });\n\n CHECK_NOTNULL(fc_node);\n if (ew_add_node != nullptr) {\n CHECK_NOTNULL(fc_node->attrs.subgraphs[0]);\n auto fc_orginal = fc_node->attrs.subgraphs[0]->outputs[0].node;\n if (fc_orginal->op() == Op::Get(\"FullyConnected\")) {\n nnvm::Symbol new_sym;\n nnvm::NodeEntry &ew_input_with_fc = (ew_add_node->inputs[1].node == fc_node) ?\n ew_add_node->inputs[1] :\n ew_add_node->inputs[0];\n ew_input_with_fc.node = fc_orginal;\n new_sym.outputs.emplace_back(ew_add_node);\n fc_node->attrs.subgraphs.clear();\n fc_node->attrs.subgraphs.emplace_back(std::make_shared(new_sym));\n fc_node->attrs.dict[\"with_sum\"] = \"True\";\n fc_node->op()->attr_parser(&(fc_node->attrs));\n }\n }\n return fc_node;\n }\n\n SubgraphSelectorPtr CreateSubgraphSelector() const override {\n bool quantized = HasAttr(\"quantize\") ? GetAttr(\"quantize\") : false;\n auto selector =\n std::make_shared(quantized);\n return selector;\n }\n\n void ConnectSubgraphOutputs(\n const nnvm::ObjectPtr n,\n std::vector *output_entries) const override {\n // Connect all extern output entries to output[0]\n for (size_t i = 0; i < output_entries->size(); ++i) {\n auto entry_ptr = output_entries->at(i);\n *entry_ptr = nnvm::NodeEntry{n, entry_ptr->index, 0};\n }\n }\n\n void ConnectSubgraphInputs(\n const nnvm::ObjectPtr n, std::vector *input_entries,\n std::vector *orig_input_entries) const override {\n auto sym = n->attrs.subgraphs[0];\n auto const &fc_param = nnvm::get(n->attrs.parsed);\n std::unordered_set node_sets;\n DFSVisit(sym->outputs, [&](const nnvm::ObjectPtr &node) {", - "repo_full_name": "apache/mxnet", - "discussion_comments": [ - { - "comment_id": "664297807", - "repo_full_name": "apache/mxnet", - "pr_number": 20400, - "pr_file": "src/operator/subgraph/mkldnn/mkldnn_fc_sum_fuse.h", - "discussion_id": "664297807", - "commented_code": "@@ -0,0 +1,240 @@\n+/*\n+ * Licensed to the Apache Software Foundation (ASF) under one\n+ * or more contributor license agreements. See the NOTICE file\n+ * distributed with this work for additional information\n+ * regarding copyright ownership. The ASF licenses this file\n+ * to you under the Apache License, Version 2.0 (the\n+ * \"License\"); you may not use this file except in compliance\n+ * with the License. You may obtain a copy of the License at\n+ *\n+ * http://www.apache.org/licenses/LICENSE-2.0\n+ *\n+ * Unless required by applicable law or agreed to in writing,\n+ * software distributed under the License is distributed on an\n+ * \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n+ * KIND, either express or implied. See the License for the\n+ * specific language governing permissions and limitations\n+ * under the License.\n+ */\n+\n+/*\n+ \\brief It fuses FC + SUM for floating point output in second post quantization pass\n+*/\n+\n+#ifndef MXNET_OPERATOR_SUBGRAPH_MKLDNN_MKLDNN_FC_SUM_FUSE_H_\n+#define MXNET_OPERATOR_SUBGRAPH_MKLDNN_MKLDNN_FC_SUM_FUSE_H_\n+#if MXNET_USE_MKLDNN == 1\n+\n+#include \n+#include \n+#include \n+#include \n+#include \n+#include \"../common.h\"\n+#include \"../../tensor/matrix_op-inl.h\"\n+#include \"mkldnn_subgraph_base-inl.h\"\n+#include \"mkldnn_fc-inl.h\"\n+\n+namespace mxnet {\n+namespace op {\n+\n+class SgMKLDNNFCSumFuseSelector : public SubgraphSelector {\n+ public:\n+ /*! \\brief pattern match status */\n+ enum SelectStatus {\n+ kFail = 0,\n+ kStart,\n+ kSuccess,\n+ };\n+\n+ private:\n+ bool quantized_;\n+ SelectStatus status_;\n+ std::vector matched_list_;\n+\n+ public:\n+ explicit SgMKLDNNFCSumFuseSelector(bool quantized) :\n+ quantized_(quantized) {}\n+\n+ bool Select(const nnvm::Node &n, const std::shared_ptr& node_attr) override {\n+ if (n.op() == Op::Get(\"_sg_mkldnn_fully_connected\") && SupportMKLDNNAttr(node_attr)) {\n+ auto const &fc_param = nnvm::get(n.attrs.parsed);\n+ // TODO(anko) remove fc_param.mkldnn_param.quantized from if below\n+ // to fuse even for not quantized?\n+ if (fc_param.mkldnn_param.enable_float_output && fc_param.mkldnn_param.quantized) {\n+ status_ = kStart;\n+ matched_list_.clear();\n+ matched_list_.push_back(&n);\n+ return true;\n+ }\n+ }\n+ return false;\n+ }\n+\n+ bool SelectInput(const nnvm::Node &n, const nnvm::Node &new_node) override {\n+ return false;\n+ }\n+\n+ bool SelectOutput(const nnvm::Node &n, const nnvm::Node &new_node) override {\n+ if (status_ == kFail || status_ == kSuccess || new_node.is_variable())\n+ return false;\n+ // If n isn't the last matched node, then we encoutered a internal\n+ // branch, we should pop out the node behind n and stop fusion.\n+ if (matched_list_.back() != &n) {\n+ if (std::find(matched_list_.begin(), matched_list_.end(), &n) !=\n+ matched_list_.end()) {\n+ while (matched_list_.back() != &n) {\n+ matched_list_.pop_back();\n+ }\n+ }\n+ status_ = kSuccess;\n+ return false;\n+ }\n+\n+ switch (status_) {\n+ case kStart:\n+ if (new_node.op()->name == \"elemwise_add\") {\n+ matched_list_.push_back(&new_node);\n+ status_ = kSuccess;\n+ return true;\n+ }\n+ default:\n+ status_ = kSuccess;\n+ return false;\n+ }\n+ }\n+\n+ std::vector Filter(\n+ const std::vector &candidates) override {\n+ if (status_ == kFail) {\n+ return std::vector(0);\n+ } else {\n+ std::vector ret;\n+ for (auto i : matched_list_) {\n+ auto non_const_i = const_cast(i);\n+ if (std::find(candidates.begin(), candidates.end(), non_const_i) !=\n+ candidates.end()) {\n+ ret.push_back(non_const_i);\n+ }\n+ }\n+ return candidates;\n+ }\n+ }\n+\n+ void Reset() override {\n+ CHECK_GE(matched_list_.size(), 1);\n+ auto new_selector = SgMKLDNNFCSumFuseSelector(quantized_);\n+ new_selector.Select(*matched_list_[0], nullptr);\n+ *this = new_selector;\n+ }\n+};\n+\n+class SgMKLDNNFCSumFuseProperty : public SubgraphProperty {\n+ public:\n+ SgMKLDNNFCSumFuseProperty() {}\n+\n+ static SubgraphPropertyPtr Create() {\n+ static const std::string &name = \"MKLDNN FullyConnected post quantization second pass\";\n+ auto property = std::make_shared();\n+ property->SetAttr(\"property_name\", name);\n+ property->SetAttr(\"inference_only\", true);\n+ if (dmlc::GetEnv(\"MXNET_DISABLE_MKLDNN_FC_SUM\", 0)) {\n+ property->SetAttr(\"disable\", true);\n+ }\n+ return property;\n+ }\n+\n+ nnvm::ObjectPtr CreateSubgraphNode(const nnvm::Symbol &sym,\n+ const int subgraph_id = 0) const override {\n+ nnvm::ObjectPtr fc_node = nullptr;\n+ nnvm::ObjectPtr ew_add_node = nullptr;\n+\n+ DFSVisit(sym.outputs, [&](const nnvm::ObjectPtr &node) {\n+ if (node->is_variable()) return;\n+ auto &sub_name = node->op()->name;\n+ if (sub_name == \"_sg_mkldnn_fully_connected\") {\n+ fc_node = node;\n+ } else if (sub_name == \"elemwise_add\") {\n+ ew_add_node = node;\n+ }\n+ });\n+\n+ CHECK_NOTNULL(fc_node);\n+ if (ew_add_node != nullptr) {\n+ CHECK_NOTNULL(fc_node->attrs.subgraphs[0]);\n+ auto fc_orginal = fc_node->attrs.subgraphs[0]->outputs[0].node;\n+ if (fc_orginal->op() == Op::Get(\"FullyConnected\")) {\n+ nnvm::Symbol new_sym;\n+ nnvm::NodeEntry &ew_input_with_fc = (ew_add_node->inputs[1].node == fc_node) ?\n+ ew_add_node->inputs[1] :\n+ ew_add_node->inputs[0];\n+ ew_input_with_fc.node = fc_orginal;\n+ new_sym.outputs.emplace_back(ew_add_node);\n+ fc_node->attrs.subgraphs.clear();\n+ fc_node->attrs.subgraphs.emplace_back(std::make_shared(new_sym));\n+ fc_node->attrs.dict[\"with_sum\"] = \"True\";\n+ fc_node->op()->attr_parser(&(fc_node->attrs));\n+ }\n+ }\n+ return fc_node;\n+ }\n+\n+ SubgraphSelectorPtr CreateSubgraphSelector() const override {\n+ bool quantized = HasAttr(\"quantize\") ? GetAttr(\"quantize\") : false;\n+ auto selector =\n+ std::make_shared(quantized);\n+ return selector;\n+ }\n+\n+ void ConnectSubgraphOutputs(\n+ const nnvm::ObjectPtr n,\n+ std::vector *output_entries) const override {\n+ // Connect all extern output entries to output[0]\n+ for (size_t i = 0; i < output_entries->size(); ++i) {\n+ auto entry_ptr = output_entries->at(i);\n+ *entry_ptr = nnvm::NodeEntry{n, entry_ptr->index, 0};\n+ }\n+ }\n+\n+ void ConnectSubgraphInputs(\n+ const nnvm::ObjectPtr n, std::vector *input_entries,\n+ std::vector *orig_input_entries) const override {\n+ auto sym = n->attrs.subgraphs[0];\n+ auto const &fc_param = nnvm::get(n->attrs.parsed);\n+ std::unordered_set node_sets;\n+ DFSVisit(sym->outputs, [&](const nnvm::ObjectPtr &node) {", - "comment_created_at": "2021-07-06T07:19:38+00:00", - "comment_author": "mozga-intel", - "comment_body": "Could you please tell me whether there is possible to prevent calling DFS multiple time (for every pattern)?", - "pr_file_module": null - }, - { - "comment_id": "664386233", - "repo_full_name": "apache/mxnet", - "pr_number": 20400, - "pr_file": "src/operator/subgraph/mkldnn/mkldnn_fc_sum_fuse.h", - "discussion_id": "664297807", - "commented_code": "@@ -0,0 +1,240 @@\n+/*\n+ * Licensed to the Apache Software Foundation (ASF) under one\n+ * or more contributor license agreements. See the NOTICE file\n+ * distributed with this work for additional information\n+ * regarding copyright ownership. The ASF licenses this file\n+ * to you under the Apache License, Version 2.0 (the\n+ * \"License\"); you may not use this file except in compliance\n+ * with the License. You may obtain a copy of the License at\n+ *\n+ * http://www.apache.org/licenses/LICENSE-2.0\n+ *\n+ * Unless required by applicable law or agreed to in writing,\n+ * software distributed under the License is distributed on an\n+ * \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n+ * KIND, either express or implied. See the License for the\n+ * specific language governing permissions and limitations\n+ * under the License.\n+ */\n+\n+/*\n+ \\brief It fuses FC + SUM for floating point output in second post quantization pass\n+*/\n+\n+#ifndef MXNET_OPERATOR_SUBGRAPH_MKLDNN_MKLDNN_FC_SUM_FUSE_H_\n+#define MXNET_OPERATOR_SUBGRAPH_MKLDNN_MKLDNN_FC_SUM_FUSE_H_\n+#if MXNET_USE_MKLDNN == 1\n+\n+#include \n+#include \n+#include \n+#include \n+#include \n+#include \"../common.h\"\n+#include \"../../tensor/matrix_op-inl.h\"\n+#include \"mkldnn_subgraph_base-inl.h\"\n+#include \"mkldnn_fc-inl.h\"\n+\n+namespace mxnet {\n+namespace op {\n+\n+class SgMKLDNNFCSumFuseSelector : public SubgraphSelector {\n+ public:\n+ /*! \\brief pattern match status */\n+ enum SelectStatus {\n+ kFail = 0,\n+ kStart,\n+ kSuccess,\n+ };\n+\n+ private:\n+ bool quantized_;\n+ SelectStatus status_;\n+ std::vector matched_list_;\n+\n+ public:\n+ explicit SgMKLDNNFCSumFuseSelector(bool quantized) :\n+ quantized_(quantized) {}\n+\n+ bool Select(const nnvm::Node &n, const std::shared_ptr& node_attr) override {\n+ if (n.op() == Op::Get(\"_sg_mkldnn_fully_connected\") && SupportMKLDNNAttr(node_attr)) {\n+ auto const &fc_param = nnvm::get(n.attrs.parsed);\n+ // TODO(anko) remove fc_param.mkldnn_param.quantized from if below\n+ // to fuse even for not quantized?\n+ if (fc_param.mkldnn_param.enable_float_output && fc_param.mkldnn_param.quantized) {\n+ status_ = kStart;\n+ matched_list_.clear();\n+ matched_list_.push_back(&n);\n+ return true;\n+ }\n+ }\n+ return false;\n+ }\n+\n+ bool SelectInput(const nnvm::Node &n, const nnvm::Node &new_node) override {\n+ return false;\n+ }\n+\n+ bool SelectOutput(const nnvm::Node &n, const nnvm::Node &new_node) override {\n+ if (status_ == kFail || status_ == kSuccess || new_node.is_variable())\n+ return false;\n+ // If n isn't the last matched node, then we encoutered a internal\n+ // branch, we should pop out the node behind n and stop fusion.\n+ if (matched_list_.back() != &n) {\n+ if (std::find(matched_list_.begin(), matched_list_.end(), &n) !=\n+ matched_list_.end()) {\n+ while (matched_list_.back() != &n) {\n+ matched_list_.pop_back();\n+ }\n+ }\n+ status_ = kSuccess;\n+ return false;\n+ }\n+\n+ switch (status_) {\n+ case kStart:\n+ if (new_node.op()->name == \"elemwise_add\") {\n+ matched_list_.push_back(&new_node);\n+ status_ = kSuccess;\n+ return true;\n+ }\n+ default:\n+ status_ = kSuccess;\n+ return false;\n+ }\n+ }\n+\n+ std::vector Filter(\n+ const std::vector &candidates) override {\n+ if (status_ == kFail) {\n+ return std::vector(0);\n+ } else {\n+ std::vector ret;\n+ for (auto i : matched_list_) {\n+ auto non_const_i = const_cast(i);\n+ if (std::find(candidates.begin(), candidates.end(), non_const_i) !=\n+ candidates.end()) {\n+ ret.push_back(non_const_i);\n+ }\n+ }\n+ return candidates;\n+ }\n+ }\n+\n+ void Reset() override {\n+ CHECK_GE(matched_list_.size(), 1);\n+ auto new_selector = SgMKLDNNFCSumFuseSelector(quantized_);\n+ new_selector.Select(*matched_list_[0], nullptr);\n+ *this = new_selector;\n+ }\n+};\n+\n+class SgMKLDNNFCSumFuseProperty : public SubgraphProperty {\n+ public:\n+ SgMKLDNNFCSumFuseProperty() {}\n+\n+ static SubgraphPropertyPtr Create() {\n+ static const std::string &name = \"MKLDNN FullyConnected post quantization second pass\";\n+ auto property = std::make_shared();\n+ property->SetAttr(\"property_name\", name);\n+ property->SetAttr(\"inference_only\", true);\n+ if (dmlc::GetEnv(\"MXNET_DISABLE_MKLDNN_FC_SUM\", 0)) {\n+ property->SetAttr(\"disable\", true);\n+ }\n+ return property;\n+ }\n+\n+ nnvm::ObjectPtr CreateSubgraphNode(const nnvm::Symbol &sym,\n+ const int subgraph_id = 0) const override {\n+ nnvm::ObjectPtr fc_node = nullptr;\n+ nnvm::ObjectPtr ew_add_node = nullptr;\n+\n+ DFSVisit(sym.outputs, [&](const nnvm::ObjectPtr &node) {\n+ if (node->is_variable()) return;\n+ auto &sub_name = node->op()->name;\n+ if (sub_name == \"_sg_mkldnn_fully_connected\") {\n+ fc_node = node;\n+ } else if (sub_name == \"elemwise_add\") {\n+ ew_add_node = node;\n+ }\n+ });\n+\n+ CHECK_NOTNULL(fc_node);\n+ if (ew_add_node != nullptr) {\n+ CHECK_NOTNULL(fc_node->attrs.subgraphs[0]);\n+ auto fc_orginal = fc_node->attrs.subgraphs[0]->outputs[0].node;\n+ if (fc_orginal->op() == Op::Get(\"FullyConnected\")) {\n+ nnvm::Symbol new_sym;\n+ nnvm::NodeEntry &ew_input_with_fc = (ew_add_node->inputs[1].node == fc_node) ?\n+ ew_add_node->inputs[1] :\n+ ew_add_node->inputs[0];\n+ ew_input_with_fc.node = fc_orginal;\n+ new_sym.outputs.emplace_back(ew_add_node);\n+ fc_node->attrs.subgraphs.clear();\n+ fc_node->attrs.subgraphs.emplace_back(std::make_shared(new_sym));\n+ fc_node->attrs.dict[\"with_sum\"] = \"True\";\n+ fc_node->op()->attr_parser(&(fc_node->attrs));\n+ }\n+ }\n+ return fc_node;\n+ }\n+\n+ SubgraphSelectorPtr CreateSubgraphSelector() const override {\n+ bool quantized = HasAttr(\"quantize\") ? GetAttr(\"quantize\") : false;\n+ auto selector =\n+ std::make_shared(quantized);\n+ return selector;\n+ }\n+\n+ void ConnectSubgraphOutputs(\n+ const nnvm::ObjectPtr n,\n+ std::vector *output_entries) const override {\n+ // Connect all extern output entries to output[0]\n+ for (size_t i = 0; i < output_entries->size(); ++i) {\n+ auto entry_ptr = output_entries->at(i);\n+ *entry_ptr = nnvm::NodeEntry{n, entry_ptr->index, 0};\n+ }\n+ }\n+\n+ void ConnectSubgraphInputs(\n+ const nnvm::ObjectPtr n, std::vector *input_entries,\n+ std::vector *orig_input_entries) const override {\n+ auto sym = n->attrs.subgraphs[0];\n+ auto const &fc_param = nnvm::get(n->attrs.parsed);\n+ std::unordered_set node_sets;\n+ DFSVisit(sym->outputs, [&](const nnvm::ObjectPtr &node) {", - "comment_created_at": "2021-07-06T09:20:20+00:00", - "comment_author": "anko-intel", - "comment_body": "I think so, but I think it is to big change in this moment.", - "pr_file_module": null - }, - { - "comment_id": "664396171", - "repo_full_name": "apache/mxnet", - "pr_number": 20400, - "pr_file": "src/operator/subgraph/mkldnn/mkldnn_fc_sum_fuse.h", - "discussion_id": "664297807", - "commented_code": "@@ -0,0 +1,240 @@\n+/*\n+ * Licensed to the Apache Software Foundation (ASF) under one\n+ * or more contributor license agreements. See the NOTICE file\n+ * distributed with this work for additional information\n+ * regarding copyright ownership. The ASF licenses this file\n+ * to you under the Apache License, Version 2.0 (the\n+ * \"License\"); you may not use this file except in compliance\n+ * with the License. You may obtain a copy of the License at\n+ *\n+ * http://www.apache.org/licenses/LICENSE-2.0\n+ *\n+ * Unless required by applicable law or agreed to in writing,\n+ * software distributed under the License is distributed on an\n+ * \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n+ * KIND, either express or implied. See the License for the\n+ * specific language governing permissions and limitations\n+ * under the License.\n+ */\n+\n+/*\n+ \\brief It fuses FC + SUM for floating point output in second post quantization pass\n+*/\n+\n+#ifndef MXNET_OPERATOR_SUBGRAPH_MKLDNN_MKLDNN_FC_SUM_FUSE_H_\n+#define MXNET_OPERATOR_SUBGRAPH_MKLDNN_MKLDNN_FC_SUM_FUSE_H_\n+#if MXNET_USE_MKLDNN == 1\n+\n+#include \n+#include \n+#include \n+#include \n+#include \n+#include \"../common.h\"\n+#include \"../../tensor/matrix_op-inl.h\"\n+#include \"mkldnn_subgraph_base-inl.h\"\n+#include \"mkldnn_fc-inl.h\"\n+\n+namespace mxnet {\n+namespace op {\n+\n+class SgMKLDNNFCSumFuseSelector : public SubgraphSelector {\n+ public:\n+ /*! \\brief pattern match status */\n+ enum SelectStatus {\n+ kFail = 0,\n+ kStart,\n+ kSuccess,\n+ };\n+\n+ private:\n+ bool quantized_;\n+ SelectStatus status_;\n+ std::vector matched_list_;\n+\n+ public:\n+ explicit SgMKLDNNFCSumFuseSelector(bool quantized) :\n+ quantized_(quantized) {}\n+\n+ bool Select(const nnvm::Node &n, const std::shared_ptr& node_attr) override {\n+ if (n.op() == Op::Get(\"_sg_mkldnn_fully_connected\") && SupportMKLDNNAttr(node_attr)) {\n+ auto const &fc_param = nnvm::get(n.attrs.parsed);\n+ // TODO(anko) remove fc_param.mkldnn_param.quantized from if below\n+ // to fuse even for not quantized?\n+ if (fc_param.mkldnn_param.enable_float_output && fc_param.mkldnn_param.quantized) {\n+ status_ = kStart;\n+ matched_list_.clear();\n+ matched_list_.push_back(&n);\n+ return true;\n+ }\n+ }\n+ return false;\n+ }\n+\n+ bool SelectInput(const nnvm::Node &n, const nnvm::Node &new_node) override {\n+ return false;\n+ }\n+\n+ bool SelectOutput(const nnvm::Node &n, const nnvm::Node &new_node) override {\n+ if (status_ == kFail || status_ == kSuccess || new_node.is_variable())\n+ return false;\n+ // If n isn't the last matched node, then we encoutered a internal\n+ // branch, we should pop out the node behind n and stop fusion.\n+ if (matched_list_.back() != &n) {\n+ if (std::find(matched_list_.begin(), matched_list_.end(), &n) !=\n+ matched_list_.end()) {\n+ while (matched_list_.back() != &n) {\n+ matched_list_.pop_back();\n+ }\n+ }\n+ status_ = kSuccess;\n+ return false;\n+ }\n+\n+ switch (status_) {\n+ case kStart:\n+ if (new_node.op()->name == \"elemwise_add\") {\n+ matched_list_.push_back(&new_node);\n+ status_ = kSuccess;\n+ return true;\n+ }\n+ default:\n+ status_ = kSuccess;\n+ return false;\n+ }\n+ }\n+\n+ std::vector Filter(\n+ const std::vector &candidates) override {\n+ if (status_ == kFail) {\n+ return std::vector(0);\n+ } else {\n+ std::vector ret;\n+ for (auto i : matched_list_) {\n+ auto non_const_i = const_cast(i);\n+ if (std::find(candidates.begin(), candidates.end(), non_const_i) !=\n+ candidates.end()) {\n+ ret.push_back(non_const_i);\n+ }\n+ }\n+ return candidates;\n+ }\n+ }\n+\n+ void Reset() override {\n+ CHECK_GE(matched_list_.size(), 1);\n+ auto new_selector = SgMKLDNNFCSumFuseSelector(quantized_);\n+ new_selector.Select(*matched_list_[0], nullptr);\n+ *this = new_selector;\n+ }\n+};\n+\n+class SgMKLDNNFCSumFuseProperty : public SubgraphProperty {\n+ public:\n+ SgMKLDNNFCSumFuseProperty() {}\n+\n+ static SubgraphPropertyPtr Create() {\n+ static const std::string &name = \"MKLDNN FullyConnected post quantization second pass\";\n+ auto property = std::make_shared();\n+ property->SetAttr(\"property_name\", name);\n+ property->SetAttr(\"inference_only\", true);\n+ if (dmlc::GetEnv(\"MXNET_DISABLE_MKLDNN_FC_SUM\", 0)) {\n+ property->SetAttr(\"disable\", true);\n+ }\n+ return property;\n+ }\n+\n+ nnvm::ObjectPtr CreateSubgraphNode(const nnvm::Symbol &sym,\n+ const int subgraph_id = 0) const override {\n+ nnvm::ObjectPtr fc_node = nullptr;\n+ nnvm::ObjectPtr ew_add_node = nullptr;\n+\n+ DFSVisit(sym.outputs, [&](const nnvm::ObjectPtr &node) {\n+ if (node->is_variable()) return;\n+ auto &sub_name = node->op()->name;\n+ if (sub_name == \"_sg_mkldnn_fully_connected\") {\n+ fc_node = node;\n+ } else if (sub_name == \"elemwise_add\") {\n+ ew_add_node = node;\n+ }\n+ });\n+\n+ CHECK_NOTNULL(fc_node);\n+ if (ew_add_node != nullptr) {\n+ CHECK_NOTNULL(fc_node->attrs.subgraphs[0]);\n+ auto fc_orginal = fc_node->attrs.subgraphs[0]->outputs[0].node;\n+ if (fc_orginal->op() == Op::Get(\"FullyConnected\")) {\n+ nnvm::Symbol new_sym;\n+ nnvm::NodeEntry &ew_input_with_fc = (ew_add_node->inputs[1].node == fc_node) ?\n+ ew_add_node->inputs[1] :\n+ ew_add_node->inputs[0];\n+ ew_input_with_fc.node = fc_orginal;\n+ new_sym.outputs.emplace_back(ew_add_node);\n+ fc_node->attrs.subgraphs.clear();\n+ fc_node->attrs.subgraphs.emplace_back(std::make_shared(new_sym));\n+ fc_node->attrs.dict[\"with_sum\"] = \"True\";\n+ fc_node->op()->attr_parser(&(fc_node->attrs));\n+ }\n+ }\n+ return fc_node;\n+ }\n+\n+ SubgraphSelectorPtr CreateSubgraphSelector() const override {\n+ bool quantized = HasAttr(\"quantize\") ? GetAttr(\"quantize\") : false;\n+ auto selector =\n+ std::make_shared(quantized);\n+ return selector;\n+ }\n+\n+ void ConnectSubgraphOutputs(\n+ const nnvm::ObjectPtr n,\n+ std::vector *output_entries) const override {\n+ // Connect all extern output entries to output[0]\n+ for (size_t i = 0; i < output_entries->size(); ++i) {\n+ auto entry_ptr = output_entries->at(i);\n+ *entry_ptr = nnvm::NodeEntry{n, entry_ptr->index, 0};\n+ }\n+ }\n+\n+ void ConnectSubgraphInputs(\n+ const nnvm::ObjectPtr n, std::vector *input_entries,\n+ std::vector *orig_input_entries) const override {\n+ auto sym = n->attrs.subgraphs[0];\n+ auto const &fc_param = nnvm::get(n->attrs.parsed);\n+ std::unordered_set node_sets;\n+ DFSVisit(sym->outputs, [&](const nnvm::ObjectPtr &node) {", - "comment_created_at": "2021-07-06T09:33:23+00:00", - "comment_author": "sfraczek", - "comment_body": "@anko-intel Is this iterating through just subgraph fc->elementwise_add or entire graph?", - "pr_file_module": null - }, - { - "comment_id": "664618631", - "repo_full_name": "apache/mxnet", - "pr_number": 20400, - "pr_file": "src/operator/subgraph/mkldnn/mkldnn_fc_sum_fuse.h", - "discussion_id": "664297807", - "commented_code": "@@ -0,0 +1,240 @@\n+/*\n+ * Licensed to the Apache Software Foundation (ASF) under one\n+ * or more contributor license agreements. See the NOTICE file\n+ * distributed with this work for additional information\n+ * regarding copyright ownership. The ASF licenses this file\n+ * to you under the Apache License, Version 2.0 (the\n+ * \"License\"); you may not use this file except in compliance\n+ * with the License. You may obtain a copy of the License at\n+ *\n+ * http://www.apache.org/licenses/LICENSE-2.0\n+ *\n+ * Unless required by applicable law or agreed to in writing,\n+ * software distributed under the License is distributed on an\n+ * \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n+ * KIND, either express or implied. See the License for the\n+ * specific language governing permissions and limitations\n+ * under the License.\n+ */\n+\n+/*\n+ \\brief It fuses FC + SUM for floating point output in second post quantization pass\n+*/\n+\n+#ifndef MXNET_OPERATOR_SUBGRAPH_MKLDNN_MKLDNN_FC_SUM_FUSE_H_\n+#define MXNET_OPERATOR_SUBGRAPH_MKLDNN_MKLDNN_FC_SUM_FUSE_H_\n+#if MXNET_USE_MKLDNN == 1\n+\n+#include \n+#include \n+#include \n+#include \n+#include \n+#include \"../common.h\"\n+#include \"../../tensor/matrix_op-inl.h\"\n+#include \"mkldnn_subgraph_base-inl.h\"\n+#include \"mkldnn_fc-inl.h\"\n+\n+namespace mxnet {\n+namespace op {\n+\n+class SgMKLDNNFCSumFuseSelector : public SubgraphSelector {\n+ public:\n+ /*! \\brief pattern match status */\n+ enum SelectStatus {\n+ kFail = 0,\n+ kStart,\n+ kSuccess,\n+ };\n+\n+ private:\n+ bool quantized_;\n+ SelectStatus status_;\n+ std::vector matched_list_;\n+\n+ public:\n+ explicit SgMKLDNNFCSumFuseSelector(bool quantized) :\n+ quantized_(quantized) {}\n+\n+ bool Select(const nnvm::Node &n, const std::shared_ptr& node_attr) override {\n+ if (n.op() == Op::Get(\"_sg_mkldnn_fully_connected\") && SupportMKLDNNAttr(node_attr)) {\n+ auto const &fc_param = nnvm::get(n.attrs.parsed);\n+ // TODO(anko) remove fc_param.mkldnn_param.quantized from if below\n+ // to fuse even for not quantized?\n+ if (fc_param.mkldnn_param.enable_float_output && fc_param.mkldnn_param.quantized) {\n+ status_ = kStart;\n+ matched_list_.clear();\n+ matched_list_.push_back(&n);\n+ return true;\n+ }\n+ }\n+ return false;\n+ }\n+\n+ bool SelectInput(const nnvm::Node &n, const nnvm::Node &new_node) override {\n+ return false;\n+ }\n+\n+ bool SelectOutput(const nnvm::Node &n, const nnvm::Node &new_node) override {\n+ if (status_ == kFail || status_ == kSuccess || new_node.is_variable())\n+ return false;\n+ // If n isn't the last matched node, then we encoutered a internal\n+ // branch, we should pop out the node behind n and stop fusion.\n+ if (matched_list_.back() != &n) {\n+ if (std::find(matched_list_.begin(), matched_list_.end(), &n) !=\n+ matched_list_.end()) {\n+ while (matched_list_.back() != &n) {\n+ matched_list_.pop_back();\n+ }\n+ }\n+ status_ = kSuccess;\n+ return false;\n+ }\n+\n+ switch (status_) {\n+ case kStart:\n+ if (new_node.op()->name == \"elemwise_add\") {\n+ matched_list_.push_back(&new_node);\n+ status_ = kSuccess;\n+ return true;\n+ }\n+ default:\n+ status_ = kSuccess;\n+ return false;\n+ }\n+ }\n+\n+ std::vector Filter(\n+ const std::vector &candidates) override {\n+ if (status_ == kFail) {\n+ return std::vector(0);\n+ } else {\n+ std::vector ret;\n+ for (auto i : matched_list_) {\n+ auto non_const_i = const_cast(i);\n+ if (std::find(candidates.begin(), candidates.end(), non_const_i) !=\n+ candidates.end()) {\n+ ret.push_back(non_const_i);\n+ }\n+ }\n+ return candidates;\n+ }\n+ }\n+\n+ void Reset() override {\n+ CHECK_GE(matched_list_.size(), 1);\n+ auto new_selector = SgMKLDNNFCSumFuseSelector(quantized_);\n+ new_selector.Select(*matched_list_[0], nullptr);\n+ *this = new_selector;\n+ }\n+};\n+\n+class SgMKLDNNFCSumFuseProperty : public SubgraphProperty {\n+ public:\n+ SgMKLDNNFCSumFuseProperty() {}\n+\n+ static SubgraphPropertyPtr Create() {\n+ static const std::string &name = \"MKLDNN FullyConnected post quantization second pass\";\n+ auto property = std::make_shared();\n+ property->SetAttr(\"property_name\", name);\n+ property->SetAttr(\"inference_only\", true);\n+ if (dmlc::GetEnv(\"MXNET_DISABLE_MKLDNN_FC_SUM\", 0)) {\n+ property->SetAttr(\"disable\", true);\n+ }\n+ return property;\n+ }\n+\n+ nnvm::ObjectPtr CreateSubgraphNode(const nnvm::Symbol &sym,\n+ const int subgraph_id = 0) const override {\n+ nnvm::ObjectPtr fc_node = nullptr;\n+ nnvm::ObjectPtr ew_add_node = nullptr;\n+\n+ DFSVisit(sym.outputs, [&](const nnvm::ObjectPtr &node) {\n+ if (node->is_variable()) return;\n+ auto &sub_name = node->op()->name;\n+ if (sub_name == \"_sg_mkldnn_fully_connected\") {\n+ fc_node = node;\n+ } else if (sub_name == \"elemwise_add\") {\n+ ew_add_node = node;\n+ }\n+ });\n+\n+ CHECK_NOTNULL(fc_node);\n+ if (ew_add_node != nullptr) {\n+ CHECK_NOTNULL(fc_node->attrs.subgraphs[0]);\n+ auto fc_orginal = fc_node->attrs.subgraphs[0]->outputs[0].node;\n+ if (fc_orginal->op() == Op::Get(\"FullyConnected\")) {\n+ nnvm::Symbol new_sym;\n+ nnvm::NodeEntry &ew_input_with_fc = (ew_add_node->inputs[1].node == fc_node) ?\n+ ew_add_node->inputs[1] :\n+ ew_add_node->inputs[0];\n+ ew_input_with_fc.node = fc_orginal;\n+ new_sym.outputs.emplace_back(ew_add_node);\n+ fc_node->attrs.subgraphs.clear();\n+ fc_node->attrs.subgraphs.emplace_back(std::make_shared(new_sym));\n+ fc_node->attrs.dict[\"with_sum\"] = \"True\";\n+ fc_node->op()->attr_parser(&(fc_node->attrs));\n+ }\n+ }\n+ return fc_node;\n+ }\n+\n+ SubgraphSelectorPtr CreateSubgraphSelector() const override {\n+ bool quantized = HasAttr(\"quantize\") ? GetAttr(\"quantize\") : false;\n+ auto selector =\n+ std::make_shared(quantized);\n+ return selector;\n+ }\n+\n+ void ConnectSubgraphOutputs(\n+ const nnvm::ObjectPtr n,\n+ std::vector *output_entries) const override {\n+ // Connect all extern output entries to output[0]\n+ for (size_t i = 0; i < output_entries->size(); ++i) {\n+ auto entry_ptr = output_entries->at(i);\n+ *entry_ptr = nnvm::NodeEntry{n, entry_ptr->index, 0};\n+ }\n+ }\n+\n+ void ConnectSubgraphInputs(\n+ const nnvm::ObjectPtr n, std::vector *input_entries,\n+ std::vector *orig_input_entries) const override {\n+ auto sym = n->attrs.subgraphs[0];\n+ auto const &fc_param = nnvm::get(n->attrs.parsed);\n+ std::unordered_set node_sets;\n+ DFSVisit(sym->outputs, [&](const nnvm::ObjectPtr &node) {", - "comment_created_at": "2021-07-06T14:40:31+00:00", - "comment_author": "mozga-intel", - "comment_body": "@anko-intel Thanks, How about talking it over? \r\n\r\n## Problem statement ##\r\nIf the structure of a graph is still the same all the time (apart of fusing). Then we can try to avoid running DFS algorithm multiple times. For brevity, those calls force us to pay a penalty: O(V + E). If we assume that `n` represents a total number of patterns, then the summary cost of that is equal n * O(V +E). Even more, for a few operators (i.e this one) DFS might be run twice or more, if any ~ then we got mn * O(V +E). \r\n\r\n## Solution - short description [Future plan] ##\r\nReduce overall time of using DFS to a minimum: nice to have just O(V +E), by labelling nodes and keeping traversal order. Pattern matching is applied after that ~ on a traversal order vector (maybe LCA knowledge is needed).", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "647723370", - "pr_number": 20262, - "pr_file": "src/imperative/imperative.cc", - "created_at": "2021-06-08T19:10:31+00:00", - "commented_code": "<< \"must have a deferred compute history associated with them.\";\n s.outputs.emplace_back(ndoutput->deferredcompute_entry_);\n }\n nnvm::DFSVisit(s.outputs, [&](const nnvm::ObjectPtr& n) {", - "repo_full_name": "apache/mxnet", - "discussion_comments": [ - { - "comment_id": "647723370", - "repo_full_name": "apache/mxnet", - "pr_number": 20262, - "pr_file": "src/imperative/imperative.cc", - "discussion_id": "647723370", - "commented_code": "@@ -350,6 +350,15 @@ nnvm::Symbol Imperative::GetDeferredComputeSymbol(const std::vector &\n << \"must have a deferred compute history associated with them.\";\n s.outputs.emplace_back(ndoutput->deferredcompute_entry_);\n }\n+ nnvm::DFSVisit(s.outputs, [&](const nnvm::ObjectPtr& n) {", - "comment_created_at": "2021-06-08T19:10:31+00:00", - "comment_author": "leezu", - "comment_body": "I think this needs to run on a copy of the graph, because in general there can still be a call to `Compute` after `GetDeferredComputeSymbol`. ", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "766527196", - "pr_number": 20767, - "pr_file": "src/operator/subgraph/dnnl/dnnl_identity_property.h", - "created_at": "2021-12-10T09:52:32+00:00", - "commented_code": "}\n\n bool SelectInput(const BiDirectedNode& n, const BiDirectedNode& input_node) override {\n if (input_node.node->is_variable()) {\n if (pattern_found || input_node.node->is_variable()) {", - "repo_full_name": "apache/mxnet", - "discussion_comments": [ - { - "comment_id": "766527196", - "repo_full_name": "apache/mxnet", - "pr_number": 20767, - "pr_file": "src/operator/subgraph/dnnl/dnnl_identity_property.h", - "discussion_id": "766527196", - "commented_code": "@@ -65,10 +66,11 @@ class SgDNNLIdentitySelector : public SubgraphSelectorV2 {\n }\n \n bool SelectInput(const BiDirectedNode& n, const BiDirectedNode& input_node) override {\n- if (input_node.node->is_variable()) {\n+ if (pattern_found || input_node.node->is_variable()) {", - "comment_created_at": "2021-12-10T09:52:32+00:00", - "comment_author": "PawelGlomski-Intel", - "comment_body": "You could use `matched_list_` like you did in the `Filter` function.\r\n```suggestion\r\n if (matched_list_.size() == 2 || input_node.node->is_variable()) {\r\n```\r\nIf not, then I would also use `pattern_found` in the `Filter` function, to show, that these conditions are connected.\r\nhttps://github.com/apache/incubator-mxnet/blob/7e163157cf789d7ec5e7272532e04495e57bd0ba/src/operator/subgraph/dnnl/dnnl_identity_property.h#L85\r\nChange to :\r\n```\r\n if (pattern_found && candidates.size() == matched_list_.size()) { \r\n```", - "pr_file_module": null - }, - { - "comment_id": "770310841", - "repo_full_name": "apache/mxnet", - "pr_number": 20767, - "pr_file": "src/operator/subgraph/dnnl/dnnl_identity_property.h", - "discussion_id": "766527196", - "commented_code": "@@ -65,10 +66,11 @@ class SgDNNLIdentitySelector : public SubgraphSelectorV2 {\n }\n \n bool SelectInput(const BiDirectedNode& n, const BiDirectedNode& input_node) override {\n- if (input_node.node->is_variable()) {\n+ if (pattern_found || input_node.node->is_variable()) {", - "comment_created_at": "2021-12-16T08:19:20+00:00", - "comment_author": "bgawrych", - "comment_body": "Done", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "820816070", - "pr_number": 20753, - "pr_file": "src/nnvm/low_precision_pass.cc", - "created_at": "2022-03-07T15:26:58+00:00", - "commented_code": "#include \n#include \n#include \n#include \"../operator/operator_common.h\"\n\nnamespace mxnet {\nusing nnvm::Graph;\nusing nnvm::Node;\nusing nnvm::NodeEntry;\nusing nnvm::ObjectPtr;\nusing nnvm::Symbol;\n\n// create a node for operator : op_name with name : node_name\nstatic ObjectPtr CreateNode(std::string op_name, std::string node_name) {\n ObjectPtr node = Node::Create();\n node->attrs.name = node_name;\n if (op_name == \"nullptr\") {\n node->attrs.op = nullptr;\n // ugly workaround because VariableParam is not exposed\n node->attrs.parsed =\n nnvm::Symbol::CreateVariable(node->attrs.name).outputs[0].node->attrs.parsed;\n } else {\n node->attrs.op = Op::Get(op_name);\n }\n return node;\n}\n\nstatic ObjectPtr InsertNode(std::string op_name,\n std::string node_name,\n ObjectPtr current,\n NodeEntry previous) {\n ObjectPtr node = CreateNode(op_name, node_name);\n node->inputs.emplace_back(previous);\n if (current)\n current->inputs.emplace_back(NodeEntry{node, 0, 0});\n return node;\nbool is_cast_op(const nnvm::Op* const op) {\n return op && (op == Op::Get(\"amp_cast\") || op == Op::Get(\"Cast\"));\n}\n\n// get suffix for a node entry so that it can be used for amp_cast/amp_multicast node name\nstatic std::string GetSuffix(const nnvm::NodeEntry& node_entry,\n const std::unordered_map& mirror_map) {\n static const auto& flist_outputs = nnvm::Op::GetAttr(\"FListOutputNames\");\n std::string suffix = \"\";\n ObjectPtr mirror_node = mirror_map.at(node_entry.node.get());\n if (mirror_node->op() != nullptr) {\n auto list_output_names_func = flist_outputs.get(node_entry.node->op(), nullptr);\n if (list_output_names_func != nullptr) {\n std::vector names = list_output_names_func(node_entry.node->attrs);\n suffix = \"_\" + names[node_entry.index];\n } else {\n suffix = \"_\" + std::to_string(node_entry.index);\nclass MappedNodeEntry {\n public:\n MappedNodeEntry(NodeEntry node_entry, const int original_dtype)\n : entry(std::move(node_entry)), original_dtype(original_dtype) {\n dtype = original_dtype;\n }\n\n void convert(const int new_dtype) {\n CHECK_EQ(dtype, original_dtype); // dtype should be changed only once\n dtype = new_dtype;\n }\n\n const NodeEntry& as_original() {\n return as_type(original_dtype);\n }\n\n const NodeEntry& as_type(const int target_dtype) {\n if (dtype == target_dtype) {\n return entry;\n }\n NodeEntry& cast_entry = casts[target_dtype];\n if (cast_entry.node == nullptr) {\n cast_entry = cast(target_dtype);\n CHECK(cast_entry.node);\n }\n return cast_entry;\n }\n return suffix;\n}\n\n// add amp_cast node between curr_node and input\nstatic void AddCastNode(const nnvm::NodeEntry& e,\n const std::string& suffix,\n const nnvm::NodeEntry& input,\n const std::string dtype,\n nnvm::NodeEntryMap* mirror_entry_map,\n ObjectPtr curr_node) {\n ObjectPtr cast_node =\n InsertNode(\"amp_cast\", e.node->attrs.name + suffix + \"_amp_cast_\" + dtype, curr_node, input);\n cast_node->attrs.dict[\"dtype\"] = dtype;\n cast_node->op()->attr_parser(&(cast_node->attrs));\n (*mirror_entry_map)[e] = NodeEntry{std::move(cast_node), 0, e.version};\n return;\n}\n bool has_dtype_entry(const int target_dtype) const {\n return dtype == target_dtype || casts.count(target_dtype) > 0;\n }\n\n bool can_be_cast_offline_to(const int target_dtype) const {", - "repo_full_name": "apache/mxnet", - "discussion_comments": [ - { - "comment_id": "820816070", - "repo_full_name": "apache/mxnet", - "pr_number": 20753, - "pr_file": "src/nnvm/low_precision_pass.cc", - "discussion_id": "820816070", - "commented_code": "@@ -29,374 +29,322 @@\n #include \n #include \n #include \n+#include \"../operator/operator_common.h\"\n \n namespace mxnet {\n using nnvm::Graph;\n using nnvm::Node;\n using nnvm::NodeEntry;\n using nnvm::ObjectPtr;\n-using nnvm::Symbol;\n-\n-// create a node for operator : op_name with name : node_name\n-static ObjectPtr CreateNode(std::string op_name, std::string node_name) {\n- ObjectPtr node = Node::Create();\n- node->attrs.name = node_name;\n- if (op_name == \"nullptr\") {\n- node->attrs.op = nullptr;\n- // ugly workaround because VariableParam is not exposed\n- node->attrs.parsed =\n- nnvm::Symbol::CreateVariable(node->attrs.name).outputs[0].node->attrs.parsed;\n- } else {\n- node->attrs.op = Op::Get(op_name);\n- }\n- return node;\n-}\n \n-static ObjectPtr InsertNode(std::string op_name,\n- std::string node_name,\n- ObjectPtr current,\n- NodeEntry previous) {\n- ObjectPtr node = CreateNode(op_name, node_name);\n- node->inputs.emplace_back(previous);\n- if (current)\n- current->inputs.emplace_back(NodeEntry{node, 0, 0});\n- return node;\n+bool is_cast_op(const nnvm::Op* const op) {\n+ return op && (op == Op::Get(\"amp_cast\") || op == Op::Get(\"Cast\"));\n }\n \n-// get suffix for a node entry so that it can be used for amp_cast/amp_multicast node name\n-static std::string GetSuffix(const nnvm::NodeEntry& node_entry,\n- const std::unordered_map& mirror_map) {\n- static const auto& flist_outputs = nnvm::Op::GetAttr(\"FListOutputNames\");\n- std::string suffix = \"\";\n- ObjectPtr mirror_node = mirror_map.at(node_entry.node.get());\n- if (mirror_node->op() != nullptr) {\n- auto list_output_names_func = flist_outputs.get(node_entry.node->op(), nullptr);\n- if (list_output_names_func != nullptr) {\n- std::vector names = list_output_names_func(node_entry.node->attrs);\n- suffix = \"_\" + names[node_entry.index];\n- } else {\n- suffix = \"_\" + std::to_string(node_entry.index);\n+class MappedNodeEntry {\n+ public:\n+ MappedNodeEntry(NodeEntry node_entry, const int original_dtype)\n+ : entry(std::move(node_entry)), original_dtype(original_dtype) {\n+ dtype = original_dtype;\n+ }\n+\n+ void convert(const int new_dtype) {\n+ CHECK_EQ(dtype, original_dtype); // dtype should be changed only once\n+ dtype = new_dtype;\n+ }\n+\n+ const NodeEntry& as_original() {\n+ return as_type(original_dtype);\n+ }\n+\n+ const NodeEntry& as_type(const int target_dtype) {\n+ if (dtype == target_dtype) {\n+ return entry;\n+ }\n+ NodeEntry& cast_entry = casts[target_dtype];\n+ if (cast_entry.node == nullptr) {\n+ cast_entry = cast(target_dtype);\n+ CHECK(cast_entry.node);\n }\n+ return cast_entry;\n }\n- return suffix;\n-}\n \n-// add amp_cast node between curr_node and input\n-static void AddCastNode(const nnvm::NodeEntry& e,\n- const std::string& suffix,\n- const nnvm::NodeEntry& input,\n- const std::string dtype,\n- nnvm::NodeEntryMap* mirror_entry_map,\n- ObjectPtr curr_node) {\n- ObjectPtr cast_node =\n- InsertNode(\"amp_cast\", e.node->attrs.name + suffix + \"_amp_cast_\" + dtype, curr_node, input);\n- cast_node->attrs.dict[\"dtype\"] = dtype;\n- cast_node->op()->attr_parser(&(cast_node->attrs));\n- (*mirror_entry_map)[e] = NodeEntry{std::move(cast_node), 0, e.version};\n- return;\n-}\n+ bool has_dtype_entry(const int target_dtype) const {\n+ return dtype == target_dtype || casts.count(target_dtype) > 0;\n+ }\n+\n+ bool can_be_cast_offline_to(const int target_dtype) const {", - "comment_created_at": "2022-03-07T15:26:58+00:00", - "comment_author": "bartekkuncer", - "comment_body": "Why amount of casts to a particular dtype determines whether casting to this dtype can be performed offline?", - "pr_file_module": null - }, - { - "comment_id": "820851332", - "repo_full_name": "apache/mxnet", - "pr_number": 20753, - "pr_file": "src/nnvm/low_precision_pass.cc", - "discussion_id": "820816070", - "commented_code": "@@ -29,374 +29,322 @@\n #include \n #include \n #include \n+#include \"../operator/operator_common.h\"\n \n namespace mxnet {\n using nnvm::Graph;\n using nnvm::Node;\n using nnvm::NodeEntry;\n using nnvm::ObjectPtr;\n-using nnvm::Symbol;\n-\n-// create a node for operator : op_name with name : node_name\n-static ObjectPtr CreateNode(std::string op_name, std::string node_name) {\n- ObjectPtr node = Node::Create();\n- node->attrs.name = node_name;\n- if (op_name == \"nullptr\") {\n- node->attrs.op = nullptr;\n- // ugly workaround because VariableParam is not exposed\n- node->attrs.parsed =\n- nnvm::Symbol::CreateVariable(node->attrs.name).outputs[0].node->attrs.parsed;\n- } else {\n- node->attrs.op = Op::Get(op_name);\n- }\n- return node;\n-}\n \n-static ObjectPtr InsertNode(std::string op_name,\n- std::string node_name,\n- ObjectPtr current,\n- NodeEntry previous) {\n- ObjectPtr node = CreateNode(op_name, node_name);\n- node->inputs.emplace_back(previous);\n- if (current)\n- current->inputs.emplace_back(NodeEntry{node, 0, 0});\n- return node;\n+bool is_cast_op(const nnvm::Op* const op) {\n+ return op && (op == Op::Get(\"amp_cast\") || op == Op::Get(\"Cast\"));\n }\n \n-// get suffix for a node entry so that it can be used for amp_cast/amp_multicast node name\n-static std::string GetSuffix(const nnvm::NodeEntry& node_entry,\n- const std::unordered_map& mirror_map) {\n- static const auto& flist_outputs = nnvm::Op::GetAttr(\"FListOutputNames\");\n- std::string suffix = \"\";\n- ObjectPtr mirror_node = mirror_map.at(node_entry.node.get());\n- if (mirror_node->op() != nullptr) {\n- auto list_output_names_func = flist_outputs.get(node_entry.node->op(), nullptr);\n- if (list_output_names_func != nullptr) {\n- std::vector names = list_output_names_func(node_entry.node->attrs);\n- suffix = \"_\" + names[node_entry.index];\n- } else {\n- suffix = \"_\" + std::to_string(node_entry.index);\n+class MappedNodeEntry {\n+ public:\n+ MappedNodeEntry(NodeEntry node_entry, const int original_dtype)\n+ : entry(std::move(node_entry)), original_dtype(original_dtype) {\n+ dtype = original_dtype;\n+ }\n+\n+ void convert(const int new_dtype) {\n+ CHECK_EQ(dtype, original_dtype); // dtype should be changed only once\n+ dtype = new_dtype;\n+ }\n+\n+ const NodeEntry& as_original() {\n+ return as_type(original_dtype);\n+ }\n+\n+ const NodeEntry& as_type(const int target_dtype) {\n+ if (dtype == target_dtype) {\n+ return entry;\n+ }\n+ NodeEntry& cast_entry = casts[target_dtype];\n+ if (cast_entry.node == nullptr) {\n+ cast_entry = cast(target_dtype);\n+ CHECK(cast_entry.node);\n }\n+ return cast_entry;\n }\n- return suffix;\n-}\n \n-// add amp_cast node between curr_node and input\n-static void AddCastNode(const nnvm::NodeEntry& e,\n- const std::string& suffix,\n- const nnvm::NodeEntry& input,\n- const std::string dtype,\n- nnvm::NodeEntryMap* mirror_entry_map,\n- ObjectPtr curr_node) {\n- ObjectPtr cast_node =\n- InsertNode(\"amp_cast\", e.node->attrs.name + suffix + \"_amp_cast_\" + dtype, curr_node, input);\n- cast_node->attrs.dict[\"dtype\"] = dtype;\n- cast_node->op()->attr_parser(&(cast_node->attrs));\n- (*mirror_entry_map)[e] = NodeEntry{std::move(cast_node), 0, e.version};\n- return;\n-}\n+ bool has_dtype_entry(const int target_dtype) const {\n+ return dtype == target_dtype || casts.count(target_dtype) > 0;\n+ }\n+\n+ bool can_be_cast_offline_to(const int target_dtype) const {", - "comment_created_at": "2022-03-07T16:01:08+00:00", - "comment_author": "PawelGlomski-Intel", - "comment_body": "`casts` is an unordered map, thus the count returns whether the cast to this dtype exists or not. A parameter should be cast offline only if there is a need for it - there is at least one operator using it through a cast.", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/mxnet-hybridization-compatible-operations.json b/_reviewers/mxnet-hybridization-compatible-operations.json new file mode 100644 index 0000000..3b9d081 --- /dev/null +++ b/_reviewers/mxnet-hybridization-compatible-operations.json @@ -0,0 +1,220 @@ +[ + { + "discussion_id": "250017909", + "pr_number": 13735, + "pr_file": "example/gluon/wavenet/models.py", + "created_at": "2019-01-23T00:44:31+00:00", + "commented_code": "# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements. See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership. The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License. You may obtain a copy of the License at\n#\n# http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied. See the License for the\n# specific language governing permissions and limitations\n# under the License.\n\n\"\"\"\nModule: WaveNet network modulep\n\"\"\"\nfrom mxnet import nd\nfrom mxnet.gluon import nn\nimport mxnet.ndarray as F\n# pylint: disable=invalid-name, too-many-arguments, arguments-differ, attribute-defined-outside-init, too-many-instance-attributes, invalid-sequence-index, no-self-use\nclass One_Hot(nn.Block):\n \"\"\"\n Description : generate one hot result\n \"\"\"\n def __init__(self, depth):\n super(One_Hot, self).__init__()\n self.depth = depth\n\n def forward(self, X_in):\n with X_in.context:\n X_in = X_in\n self.ones = nd.one_hot(nd.arange(self.depth), self.depth)\n return self.ones[X_in, :]\n\n def __repr__(self):\n return self.__class__.__name__ + \"({})\".format(self.depth)\n\nclass WaveNet(nn.Block):", + "repo_full_name": "apache/mxnet", + "discussion_comments": [ + { + "comment_id": "250017909", + "repo_full_name": "apache/mxnet", + "pr_number": 13735, + "pr_file": "example/gluon/wavenet/models.py", + "discussion_id": "250017909", + "commented_code": "@@ -0,0 +1,118 @@\n+\n+# Licensed to the Apache Software Foundation (ASF) under one\n+# or more contributor license agreements. See the NOTICE file\n+# distributed with this work for additional information\n+# regarding copyright ownership. The ASF licenses this file\n+# to you under the Apache License, Version 2.0 (the\n+# \"License\"); you may not use this file except in compliance\n+# with the License. You may obtain a copy of the License at\n+#\n+# http://www.apache.org/licenses/LICENSE-2.0\n+#\n+# Unless required by applicable law or agreed to in writing,\n+# software distributed under the License is distributed on an\n+# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n+# KIND, either express or implied. See the License for the\n+# specific language governing permissions and limitations\n+# under the License.\n+\n+\"\"\"\n+Module: WaveNet network modulep\n+\"\"\"\n+from mxnet import nd\n+from mxnet.gluon import nn\n+import mxnet.ndarray as F\n+# pylint: disable=invalid-name, too-many-arguments, arguments-differ, attribute-defined-outside-init, too-many-instance-attributes, invalid-sequence-index, no-self-use\n+class One_Hot(nn.Block):\n+ \"\"\"\n+ Description : generate one hot result\n+ \"\"\"\n+ def __init__(self, depth):\n+ super(One_Hot, self).__init__()\n+ self.depth = depth\n+\n+ def forward(self, X_in):\n+ with X_in.context:\n+ X_in = X_in\n+ self.ones = nd.one_hot(nd.arange(self.depth), self.depth)\n+ return self.ones[X_in, :]\n+\n+ def __repr__(self):\n+ return self.__class__.__name__ + \"({})\".format(self.depth)\n+\n+class WaveNet(nn.Block):", + "comment_created_at": "2019-01-23T00:44:31+00:00", + "comment_author": "ThomasDelteil", + "comment_body": "Could we consider a hybridizable implementation?", + "pr_file_module": null + }, + { + "comment_id": "252269415", + "repo_full_name": "apache/mxnet", + "pr_number": 13735, + "pr_file": "example/gluon/wavenet/models.py", + "discussion_id": "250017909", + "commented_code": "@@ -0,0 +1,118 @@\n+\n+# Licensed to the Apache Software Foundation (ASF) under one\n+# or more contributor license agreements. See the NOTICE file\n+# distributed with this work for additional information\n+# regarding copyright ownership. The ASF licenses this file\n+# to you under the Apache License, Version 2.0 (the\n+# \"License\"); you may not use this file except in compliance\n+# with the License. You may obtain a copy of the License at\n+#\n+# http://www.apache.org/licenses/LICENSE-2.0\n+#\n+# Unless required by applicable law or agreed to in writing,\n+# software distributed under the License is distributed on an\n+# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n+# KIND, either express or implied. See the License for the\n+# specific language governing permissions and limitations\n+# under the License.\n+\n+\"\"\"\n+Module: WaveNet network modulep\n+\"\"\"\n+from mxnet import nd\n+from mxnet.gluon import nn\n+import mxnet.ndarray as F\n+# pylint: disable=invalid-name, too-many-arguments, arguments-differ, attribute-defined-outside-init, too-many-instance-attributes, invalid-sequence-index, no-self-use\n+class One_Hot(nn.Block):\n+ \"\"\"\n+ Description : generate one hot result\n+ \"\"\"\n+ def __init__(self, depth):\n+ super(One_Hot, self).__init__()\n+ self.depth = depth\n+\n+ def forward(self, X_in):\n+ with X_in.context:\n+ X_in = X_in\n+ self.ones = nd.one_hot(nd.arange(self.depth), self.depth)\n+ return self.ones[X_in, :]\n+\n+ def __repr__(self):\n+ return self.__class__.__name__ + \"({})\".format(self.depth)\n+\n+class WaveNet(nn.Block):", + "comment_created_at": "2019-01-30T14:07:24+00:00", + "comment_author": "seujung", + "comment_body": "When I change Block to HybridBlock, some problem occurs in the following code.\r\n``\r\noutput = sum([s[:, :, -output.shape[2]:] for s in skip_connections])\r\n``\r\nCould you tell me how to fix the problem?", + "pr_file_module": null + }, + { + "comment_id": "252419590", + "repo_full_name": "apache/mxnet", + "pr_number": 13735, + "pr_file": "example/gluon/wavenet/models.py", + "discussion_id": "250017909", + "commented_code": "@@ -0,0 +1,118 @@\n+\n+# Licensed to the Apache Software Foundation (ASF) under one\n+# or more contributor license agreements. See the NOTICE file\n+# distributed with this work for additional information\n+# regarding copyright ownership. The ASF licenses this file\n+# to you under the Apache License, Version 2.0 (the\n+# \"License\"); you may not use this file except in compliance\n+# with the License. You may obtain a copy of the License at\n+#\n+# http://www.apache.org/licenses/LICENSE-2.0\n+#\n+# Unless required by applicable law or agreed to in writing,\n+# software distributed under the License is distributed on an\n+# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n+# KIND, either express or implied. See the License for the\n+# specific language governing permissions and limitations\n+# under the License.\n+\n+\"\"\"\n+Module: WaveNet network modulep\n+\"\"\"\n+from mxnet import nd\n+from mxnet.gluon import nn\n+import mxnet.ndarray as F\n+# pylint: disable=invalid-name, too-many-arguments, arguments-differ, attribute-defined-outside-init, too-many-instance-attributes, invalid-sequence-index, no-self-use\n+class One_Hot(nn.Block):\n+ \"\"\"\n+ Description : generate one hot result\n+ \"\"\"\n+ def __init__(self, depth):\n+ super(One_Hot, self).__init__()\n+ self.depth = depth\n+\n+ def forward(self, X_in):\n+ with X_in.context:\n+ X_in = X_in\n+ self.ones = nd.one_hot(nd.arange(self.depth), self.depth)\n+ return self.ones[X_in, :]\n+\n+ def __repr__(self):\n+ return self.__class__.__name__ + \"({})\".format(self.depth)\n+\n+class WaveNet(nn.Block):", + "comment_created_at": "2019-01-30T20:11:28+00:00", + "comment_author": "safrooze", + "comment_body": "You cannot use index operator with hybridization. You need to use F.slice(). Also shape is not available with hybridized networks. You have three options:\r\n1. use padding so that output is the same size as input\r\n2. calculate the amount to slice by doing the math (a function of kernel size and dilation)\r\n3. use `sym.infer_shape()` to infer shape of the output in hybridized mode.\r\n\r\nI recommend option 2.", + "pr_file_module": null + }, + { + "comment_id": "252942411", + "repo_full_name": "apache/mxnet", + "pr_number": 13735, + "pr_file": "example/gluon/wavenet/models.py", + "discussion_id": "250017909", + "commented_code": "@@ -0,0 +1,118 @@\n+\n+# Licensed to the Apache Software Foundation (ASF) under one\n+# or more contributor license agreements. See the NOTICE file\n+# distributed with this work for additional information\n+# regarding copyright ownership. The ASF licenses this file\n+# to you under the Apache License, Version 2.0 (the\n+# \"License\"); you may not use this file except in compliance\n+# with the License. You may obtain a copy of the License at\n+#\n+# http://www.apache.org/licenses/LICENSE-2.0\n+#\n+# Unless required by applicable law or agreed to in writing,\n+# software distributed under the License is distributed on an\n+# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n+# KIND, either express or implied. See the License for the\n+# specific language governing permissions and limitations\n+# under the License.\n+\n+\"\"\"\n+Module: WaveNet network modulep\n+\"\"\"\n+from mxnet import nd\n+from mxnet.gluon import nn\n+import mxnet.ndarray as F\n+# pylint: disable=invalid-name, too-many-arguments, arguments-differ, attribute-defined-outside-init, too-many-instance-attributes, invalid-sequence-index, no-self-use\n+class One_Hot(nn.Block):\n+ \"\"\"\n+ Description : generate one hot result\n+ \"\"\"\n+ def __init__(self, depth):\n+ super(One_Hot, self).__init__()\n+ self.depth = depth\n+\n+ def forward(self, X_in):\n+ with X_in.context:\n+ X_in = X_in\n+ self.ones = nd.one_hot(nd.arange(self.depth), self.depth)\n+ return self.ones[X_in, :]\n+\n+ def __repr__(self):\n+ return self.__class__.__name__ + \"({})\".format(self.depth)\n+\n+class WaveNet(nn.Block):", + "comment_created_at": "2019-02-01T06:13:20+00:00", + "comment_author": "seujung", + "comment_body": "I try to change code using HybridBlock. But, I need too much time to apply it. Is it essential to reflect this? If I have to do it, I will try to change this module.", + "pr_file_module": null + }, + { + "comment_id": "265847269", + "repo_full_name": "apache/mxnet", + "pr_number": 13735, + "pr_file": "example/gluon/wavenet/models.py", + "discussion_id": "250017909", + "commented_code": "@@ -0,0 +1,118 @@\n+\n+# Licensed to the Apache Software Foundation (ASF) under one\n+# or more contributor license agreements. See the NOTICE file\n+# distributed with this work for additional information\n+# regarding copyright ownership. The ASF licenses this file\n+# to you under the Apache License, Version 2.0 (the\n+# \"License\"); you may not use this file except in compliance\n+# with the License. You may obtain a copy of the License at\n+#\n+# http://www.apache.org/licenses/LICENSE-2.0\n+#\n+# Unless required by applicable law or agreed to in writing,\n+# software distributed under the License is distributed on an\n+# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n+# KIND, either express or implied. See the License for the\n+# specific language governing permissions and limitations\n+# under the License.\n+\n+\"\"\"\n+Module: WaveNet network modulep\n+\"\"\"\n+from mxnet import nd\n+from mxnet.gluon import nn\n+import mxnet.ndarray as F\n+# pylint: disable=invalid-name, too-many-arguments, arguments-differ, attribute-defined-outside-init, too-many-instance-attributes, invalid-sequence-index, no-self-use\n+class One_Hot(nn.Block):\n+ \"\"\"\n+ Description : generate one hot result\n+ \"\"\"\n+ def __init__(self, depth):\n+ super(One_Hot, self).__init__()\n+ self.depth = depth\n+\n+ def forward(self, X_in):\n+ with X_in.context:\n+ X_in = X_in\n+ self.ones = nd.one_hot(nd.arange(self.depth), self.depth)\n+ return self.ones[X_in, :]\n+\n+ def __repr__(self):\n+ return self.__class__.__name__ + \"({})\".format(self.depth)\n+\n+class WaveNet(nn.Block):", + "comment_created_at": "2019-03-15T04:31:55+00:00", + "comment_author": "seujung", + "comment_body": "update hybrid module for wavenet model ", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "697558544", + "pr_number": 20559, + "pr_file": "tests/python/unittest/test_autograd.py", + "created_at": "2021-08-27T16:08:00+00:00", + "commented_code": "dx.backward()\n assert abs(x.grad.asscalar() - 2.71828175) < 1e-7\n\ndef test_retain_grad_drop_grad():\n x = nd.array([1,2,3,4])\n x.attach_grad()\n y = nd.array([5,6,7,8])\n y.attach_grad()\n\n with mx.autograd.record():\n u = x * y\n z = u * x\n\n u.attach_grad()\n z.attach_grad()\n out_grad = nd.array([10, 10, 10, 10])\n z.backward(out_grad, retain_graph=True)\n \n assert (u.grad == out_grad * x).asnumpy().all()\n assert (z.grad == out_grad).asnumpy().all()\n assert (x.grad == out_grad * 2 * x * y).asnumpy().all()\n assert (y.grad == out_grad * x*x).asnumpy().all()\n\n u.drop_grad()\n z.drop_grad()\n y.drop_grad()\n out_grad = nd.array([0.1, 0.1, 0.1, 0.1])\n z.backward(out_grad)\n\n assert u.grad is None and z.grad is None and y.grad is None\n assert (x.grad == out_grad * 2 * x * y).asnumpy().all()\n\ndef test_retain_grad_drop_grad_gluon():\n class CompBlock(mx.gluon.HybridBlock):\n def __init__(self):\n super().__init__()\n self.marked_var = None\n def forward(self, a, b):\n out1 = a*b\n out2 = out1 * a\n self.marked_var = out1\n return out2\n x = mx.np.array([1,2,3,4])\n y = mx.np.array([5,6,7,8])\n x.attach_grad()\n y.attach_grad()\n block2 = CompBlock()\n block2.initialize()\n # block2.hybridize()\n with mx.autograd.record():\n z = block2(x, y)\n u = block2.marked_var\n u.attach_grad()\n z.attach_grad()\n z.backward(retain_graph=True)", + "repo_full_name": "apache/mxnet", + "discussion_comments": [ + { + "comment_id": "697558544", + "repo_full_name": "apache/mxnet", + "pr_number": 20559, + "pr_file": "tests/python/unittest/test_autograd.py", + "discussion_id": "697558544", + "commented_code": "@@ -519,3 +519,68 @@ def test_gradient():\n dx.backward()\n assert abs(x.grad.asscalar() - 2.71828175) < 1e-7\n \n+def test_retain_grad_drop_grad():\n+ x = nd.array([1,2,3,4])\n+ x.attach_grad()\n+ y = nd.array([5,6,7,8])\n+ y.attach_grad()\n+\n+ with mx.autograd.record():\n+ u = x * y\n+ z = u * x\n+\n+ u.attach_grad()\n+ z.attach_grad()\n+ out_grad = nd.array([10, 10, 10, 10])\n+ z.backward(out_grad, retain_graph=True)\n+ \n+ assert (u.grad == out_grad * x).asnumpy().all()\n+ assert (z.grad == out_grad).asnumpy().all()\n+ assert (x.grad == out_grad * 2 * x * y).asnumpy().all()\n+ assert (y.grad == out_grad * x*x).asnumpy().all()\n+\n+ u.drop_grad()\n+ z.drop_grad()\n+ y.drop_grad()\n+ out_grad = nd.array([0.1, 0.1, 0.1, 0.1])\n+ z.backward(out_grad)\n+\n+ assert u.grad is None and z.grad is None and y.grad is None\n+ assert (x.grad == out_grad * 2 * x * y).asnumpy().all()\n+\n+def test_retain_grad_drop_grad_gluon():\n+ class CompBlock(mx.gluon.HybridBlock):\n+ def __init__(self):\n+ super().__init__()\n+ self.marked_var = None\n+ def forward(self, a, b):\n+ out1 = a*b\n+ out2 = out1 * a\n+ self.marked_var = out1\n+ return out2\n+ x = mx.np.array([1,2,3,4])\n+ y = mx.np.array([5,6,7,8])\n+ x.attach_grad()\n+ y.attach_grad()\n+ block2 = CompBlock()\n+ block2.initialize()\n+ # block2.hybridize()\n+ with mx.autograd.record():\n+ z = block2(x, y)\n+ u = block2.marked_var\n+ u.attach_grad()\n+ z.attach_grad()\n+ z.backward(retain_graph=True)", + "comment_created_at": "2021-08-27T16:08:00+00:00", + "comment_author": "leezu", + "comment_body": "Accessing the `marked_var` outside of the Block wouldn't work for hybridized Blocks. That's because the ndarray (`out1`) is just a temporary object used in the forward function for tracing the computation. Afterwards it's discarded and only the underlying symbolic graph is kept.\r\n\r\nThus I think you'll need to support something like `attach_grad` inside of the forward function. WDYT?\r\nFor that, I'd recommend you take a look at the CachedOp and how it handles gradients. You'll need a way to express that it should return a gradient buffer for the intermediate variable.\r\n\r\nAlso keep in mind that `autograd` API currently only targets the imperative use-case (without `hybridize`) and the deferred compute tracing used during hybridization are can't be enabled at the same time as `autograd` currently:\r\n\r\nhttps://github.com/apache/incubator-mxnet/blob/71526851d857fde97660aa79ccdb77f2b1cf963f/src/imperative/imperative.cc#L301-L305", + "pr_file_module": null + }, + { + "comment_id": "697632922", + "repo_full_name": "apache/mxnet", + "pr_number": 20559, + "pr_file": "tests/python/unittest/test_autograd.py", + "discussion_id": "697558544", + "commented_code": "@@ -519,3 +519,68 @@ def test_gradient():\n dx.backward()\n assert abs(x.grad.asscalar() - 2.71828175) < 1e-7\n \n+def test_retain_grad_drop_grad():\n+ x = nd.array([1,2,3,4])\n+ x.attach_grad()\n+ y = nd.array([5,6,7,8])\n+ y.attach_grad()\n+\n+ with mx.autograd.record():\n+ u = x * y\n+ z = u * x\n+\n+ u.attach_grad()\n+ z.attach_grad()\n+ out_grad = nd.array([10, 10, 10, 10])\n+ z.backward(out_grad, retain_graph=True)\n+ \n+ assert (u.grad == out_grad * x).asnumpy().all()\n+ assert (z.grad == out_grad).asnumpy().all()\n+ assert (x.grad == out_grad * 2 * x * y).asnumpy().all()\n+ assert (y.grad == out_grad * x*x).asnumpy().all()\n+\n+ u.drop_grad()\n+ z.drop_grad()\n+ y.drop_grad()\n+ out_grad = nd.array([0.1, 0.1, 0.1, 0.1])\n+ z.backward(out_grad)\n+\n+ assert u.grad is None and z.grad is None and y.grad is None\n+ assert (x.grad == out_grad * 2 * x * y).asnumpy().all()\n+\n+def test_retain_grad_drop_grad_gluon():\n+ class CompBlock(mx.gluon.HybridBlock):\n+ def __init__(self):\n+ super().__init__()\n+ self.marked_var = None\n+ def forward(self, a, b):\n+ out1 = a*b\n+ out2 = out1 * a\n+ self.marked_var = out1\n+ return out2\n+ x = mx.np.array([1,2,3,4])\n+ y = mx.np.array([5,6,7,8])\n+ x.attach_grad()\n+ y.attach_grad()\n+ block2 = CompBlock()\n+ block2.initialize()\n+ # block2.hybridize()\n+ with mx.autograd.record():\n+ z = block2(x, y)\n+ u = block2.marked_var\n+ u.attach_grad()\n+ z.attach_grad()\n+ z.backward(retain_graph=True)", + "comment_created_at": "2021-08-27T18:14:10+00:00", + "comment_author": "KexinFeng", + "comment_body": "Hi Leo,\r\nThanks for pointing out the relation between deferred compute and autograd API. I just committed my first implementation of `attach_grad` on hybridize mode. Indeed, the major implementation is in the forward pass, ie the invoke of `CachedOp`. \r\n\r\nThe main idea is to utilize the handle of deferred ndarray (`out1`) to mark the nonleaf node which are still deferredcompute node. And then, when invoking `CachedOp`, the autograd computation nodes are retained and linked to the ndarrays like 'out1'. Thus these ndarrays can further call `attach_grad` which will the the same as imperative mode.\r\n\r\nBut it is true that there is an incompatibility of hybridize computation with the imperative style of `attach_grad`. This incompatibility may lead to inconvenience when designing the python frontend use of this feature. This could be done next.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "701074106", + "pr_number": 20559, + "pr_file": "tests/python/unittest/test_autograd.py", + "created_at": "2021-09-02T13:18:39+00:00", + "commented_code": "dx.backward()\n assert abs(x.grad.asscalar() - 2.71828175) < 1e-7\n\ndef test_retain_grad_drop_grad():\n x = nd.array([1,2,3,4])\n x.attach_grad()\n y = nd.array([5,6,7,8])\n y.attach_grad()\n\n with mx.autograd.record():\n u = x * y\n z = u * x\n\n u.attach_grad()\n z.attach_grad()\n out_grad = nd.array([10, 10, 10, 10])\n z.backward(out_grad, retain_graph=True)\n\n assert (u.grad == out_grad * x).asnumpy().all()\n assert (z.grad == out_grad).asnumpy().all()\n assert (x.grad == out_grad * 2 * x * y).asnumpy().all()\n assert (y.grad == out_grad * x*x).asnumpy().all()\n\n u.drop_grad()\n z.drop_grad()\n y.drop_grad()\n out_grad = nd.array([0.1, 0.1, 0.1, 0.1])\n z.backward(out_grad)\n\n assert u.grad is None and z.grad is None and y.grad is None\n assert (x.grad == out_grad * 2 * x * y).asnumpy().all()\n\ndef test_retain_grad_drop_grad_gluon():\n class CompBlock(mx.gluon.HybridBlock):\n def __init__(self):\n super().__init__()\n\n def forward(self, a, b):\n out1 = a*b\n out2 = out1 * a\n self.mark_vars(out1)\n return out2\n\n x = mx.np.array([1,2,3,4])\n y = mx.np.array([5,6,7,8])\n x.attach_grad()\n y.attach_grad()\n block2 = CompBlock()\n block2.initialize()\n # block2.hybridize()", + "repo_full_name": "apache/mxnet", + "discussion_comments": [ + { + "comment_id": "701074106", + "repo_full_name": "apache/mxnet", + "pr_number": 20559, + "pr_file": "tests/python/unittest/test_autograd.py", + "discussion_id": "701074106", + "commented_code": "@@ -519,3 +519,110 @@ def test_gradient():\n dx.backward()\n assert abs(x.grad.asscalar() - 2.71828175) < 1e-7\n \n+def test_retain_grad_drop_grad():\n+ x = nd.array([1,2,3,4])\n+ x.attach_grad()\n+ y = nd.array([5,6,7,8])\n+ y.attach_grad()\n+\n+ with mx.autograd.record():\n+ u = x * y\n+ z = u * x\n+\n+ u.attach_grad()\n+ z.attach_grad()\n+ out_grad = nd.array([10, 10, 10, 10])\n+ z.backward(out_grad, retain_graph=True)\n+\n+ assert (u.grad == out_grad * x).asnumpy().all()\n+ assert (z.grad == out_grad).asnumpy().all()\n+ assert (x.grad == out_grad * 2 * x * y).asnumpy().all()\n+ assert (y.grad == out_grad * x*x).asnumpy().all()\n+\n+ u.drop_grad()\n+ z.drop_grad()\n+ y.drop_grad()\n+ out_grad = nd.array([0.1, 0.1, 0.1, 0.1])\n+ z.backward(out_grad)\n+\n+ assert u.grad is None and z.grad is None and y.grad is None\n+ assert (x.grad == out_grad * 2 * x * y).asnumpy().all()\n+\n+def test_retain_grad_drop_grad_gluon():\n+ class CompBlock(mx.gluon.HybridBlock):\n+ def __init__(self):\n+ super().__init__()\n+\n+ def forward(self, a, b):\n+ out1 = a*b\n+ out2 = out1 * a\n+ self.mark_vars(out1)\n+ return out2\n+\n+ x = mx.np.array([1,2,3,4])\n+ y = mx.np.array([5,6,7,8])\n+ x.attach_grad()\n+ y.attach_grad()\n+ block2 = CompBlock()\n+ block2.initialize()\n+ # block2.hybridize()", + "comment_created_at": "2021-09-02T13:18:39+00:00", + "comment_author": "leezu", + "comment_body": "Should this test be run both with and without hybridize? You can parameterize the test function https://docs.pytest.org/en/6.2.x/parametrize.html", + "pr_file_module": null + }, + { + "comment_id": "701944170", + "repo_full_name": "apache/mxnet", + "pr_number": 20559, + "pr_file": "tests/python/unittest/test_autograd.py", + "discussion_id": "701074106", + "commented_code": "@@ -519,3 +519,110 @@ def test_gradient():\n dx.backward()\n assert abs(x.grad.asscalar() - 2.71828175) < 1e-7\n \n+def test_retain_grad_drop_grad():\n+ x = nd.array([1,2,3,4])\n+ x.attach_grad()\n+ y = nd.array([5,6,7,8])\n+ y.attach_grad()\n+\n+ with mx.autograd.record():\n+ u = x * y\n+ z = u * x\n+\n+ u.attach_grad()\n+ z.attach_grad()\n+ out_grad = nd.array([10, 10, 10, 10])\n+ z.backward(out_grad, retain_graph=True)\n+\n+ assert (u.grad == out_grad * x).asnumpy().all()\n+ assert (z.grad == out_grad).asnumpy().all()\n+ assert (x.grad == out_grad * 2 * x * y).asnumpy().all()\n+ assert (y.grad == out_grad * x*x).asnumpy().all()\n+\n+ u.drop_grad()\n+ z.drop_grad()\n+ y.drop_grad()\n+ out_grad = nd.array([0.1, 0.1, 0.1, 0.1])\n+ z.backward(out_grad)\n+\n+ assert u.grad is None and z.grad is None and y.grad is None\n+ assert (x.grad == out_grad * 2 * x * y).asnumpy().all()\n+\n+def test_retain_grad_drop_grad_gluon():\n+ class CompBlock(mx.gluon.HybridBlock):\n+ def __init__(self):\n+ super().__init__()\n+\n+ def forward(self, a, b):\n+ out1 = a*b\n+ out2 = out1 * a\n+ self.mark_vars(out1)\n+ return out2\n+\n+ x = mx.np.array([1,2,3,4])\n+ y = mx.np.array([5,6,7,8])\n+ x.attach_grad()\n+ y.attach_grad()\n+ block2 = CompBlock()\n+ block2.initialize()\n+ # block2.hybridize()", + "comment_created_at": "2021-09-03T14:40:15+00:00", + "comment_author": "KexinFeng", + "comment_body": "Done.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "325399680", + "pr_number": 15839, + "pr_file": "python/mxnet/gluon/block.py", + "created_at": "2019-09-17T21:45:03+00:00", + "commented_code": "def _call_cached_op(self, *args):\n if self._cached_op is None:\n self._build_cache(*args)\n assert self._cached_op, \"cached op is not None\"\n if self._callback:\n self._cached_op._register_op_hook(self._callback, self._monitor_all)\n if len(self._flags) >= 2 and self._flags[1]:\n warnings.warn(\"Callback is not supported when static_shape=True \"", + "repo_full_name": "apache/mxnet", + "discussion_comments": [ + { + "comment_id": "325399680", + "repo_full_name": "apache/mxnet", + "pr_number": 15839, + "pr_file": "python/mxnet/gluon/block.py", + "discussion_id": "325399680", + "commented_code": "@@ -833,6 +848,12 @@ def _deferred_infer_shape(self, *args):\n def _call_cached_op(self, *args):\n if self._cached_op is None:\n self._build_cache(*args)\n+ assert self._cached_op, \"cached op is not None\"\n+ if self._callback:\n+ self._cached_op._register_op_hook(self._callback, self._monitor_all)\n+ if len(self._flags) >= 2 and self._flags[1]:\n+ warnings.warn(\"Callback is not supported when static_shape=True \"", + "comment_created_at": "2019-09-17T21:45:03+00:00", + "comment_author": "rahul003", + "comment_body": "Why's this? If it's not supported, do not register the callback?", + "pr_file_module": null + }, + { + "comment_id": "325399851", + "repo_full_name": "apache/mxnet", + "pr_number": 15839, + "pr_file": "python/mxnet/gluon/block.py", + "discussion_id": "325399680", + "commented_code": "@@ -833,6 +848,12 @@ def _deferred_infer_shape(self, *args):\n def _call_cached_op(self, *args):\n if self._cached_op is None:\n self._build_cache(*args)\n+ assert self._cached_op, \"cached op is not None\"\n+ if self._callback:\n+ self._cached_op._register_op_hook(self._callback, self._monitor_all)\n+ if len(self._flags) >= 2 and self._flags[1]:\n+ warnings.warn(\"Callback is not supported when static_shape=True \"", + "comment_created_at": "2019-09-17T21:45:33+00:00", + "comment_author": "rahul003", + "comment_body": "else say something like experimental for static_shape=True if you are not confident, but it might work", + "pr_file_module": null + }, + { + "comment_id": "325407901", + "repo_full_name": "apache/mxnet", + "pr_number": 15839, + "pr_file": "python/mxnet/gluon/block.py", + "discussion_id": "325399680", + "commented_code": "@@ -833,6 +848,12 @@ def _deferred_infer_shape(self, *args):\n def _call_cached_op(self, *args):\n if self._cached_op is None:\n self._build_cache(*args)\n+ assert self._cached_op, \"cached op is not None\"\n+ if self._callback:\n+ self._cached_op._register_op_hook(self._callback, self._monitor_all)\n+ if len(self._flags) >= 2 and self._flags[1]:\n+ warnings.warn(\"Callback is not supported when static_shape=True \"", + "comment_created_at": "2019-09-17T22:10:50+00:00", + "comment_author": "anirudh2290", + "comment_body": "Yes I will say experimental, good point thanks. The reason this is experimental is because for certain code path the cached op directly calls engine push instead of calling InvokeOp. adding the callbacks in such cases was much more difficult since we just had the opr and the engine vars not the ndarray references. Please see line: https://github.com/apache/incubator-mxnet/blob/master/src/imperative/cached_op.cc#L689\r\n\r\nFor this case, it is not supported.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "646152752", + "pr_number": 20262, + "pr_file": "python/mxnet/gluon/rnn/rnn_layer.py", + "created_at": "2021-06-06T16:02:42+00:00", + "commented_code": "else:\n return super(_RNNLayer, self).__call__(inputs, states, **kwargs)\n\n def hybrid_forward(self, F, inputs, states, sequence_length=None, **kwargs):\n if F is ndarray:\n batch_size = inputs.shape[self._layout.find('N')]\n def forward(self, inputs, states, sequence_length=None):\n batch_size = inputs.shape[self._layout.find('N')]\n\n if F is ndarray:\n for state, info in zip(states, self.state_info(batch_size)):\n if state.shape != info['shape']:\n raise ValueError(\n \"Invalid recurrent state shape. Expecting %s, got %s.\"%(\n str(info['shape']), str(state.shape)))\n out = self._forward_kernel(F, inputs, states, sequence_length, **kwargs)\n for state, info in zip(states, self.state_info(batch_size)):\n if state.shape != info['shape']:\n raise ValueError(\n \"Invalid recurrent state shape. Expecting %s, got %s.\"%(\n str(info['shape']), str(state.shape)))\n out = self._forward_kernel(inputs, states, sequence_length)\n\n # out is (output, state)\n return out[0] if self.skip_states else out\n\n def _forward_kernel(self, F, inputs, states, sequence_length, **kwargs):\n def infer_shape(self, inputs, *args):\n assert inputs.ndim == 3, \\\n \"Input data should be rank-3 tensor of dim [sequence length, batch size, input size]\"\n if not self._projection_size:\n step = self._hidden_size\n else:\n step = self._projection_size\n ni = inputs.shape[2]\n for i in range(self._num_layers):\n for j in ['l', 'r'][:self._dir]:\n name = '{}{}_i2h_weight'.format(j, i)\n getattr(self, name).shape = (self._gates*self._hidden_size, ni)\n ni = step * self._dir\n\n def _forward_kernel(self, inputs, states, sequence_length):\n \"\"\" forward using CUDNN or CPU kenrel\"\"\"\n swapaxes = F.np.swapaxes if is_np_array() else F.swapaxes\n ctx = inputs.ctx\n if self._layout == 'NTC':\n inputs = swapaxes(inputs, 0, 1)\n inputs = np.swapaxes(inputs, 0, 1)\n if self._projection_size is None:\n params = (kwargs['{}{}_{}_{}'.format(d, l, g, t)].reshape(-1)\n params = (getattr(self, '{}{}_{}_{}'.format(d, l, g, t)).data(ctx).reshape(-1)\n for t in ['weight', 'bias']\n for l in range(self._num_layers)\n for d in ['l', 'r'][:self._dir]\n for g in ['i2h', 'h2h'])\n else:\n params = (kwargs['{}{}_{}_{}'.format(d, l, g, t)].reshape(-1)\n params = (getattr(self, '{}{}_{}_{}'.format(d, l, g, t)).data(ctx).reshape(-1)\n for t in ['weight', 'bias']\n for l in range(self._num_layers)\n for d in ['l', 'r'][:self._dir]\n for g in ['i2h', 'h2h', 'h2r']\n if g != 'h2r' or t != 'bias')\n\n rnn_param_concat = F.np._internal.rnn_param_concat if is_np_array()\\\n else F._internal._rnn_param_concat\n params = rnn_param_concat(*params, dim=0)\n params = ndarray.np._internal.rnn_param_concat(*params, dim=0)", + "repo_full_name": "apache/mxnet", + "discussion_comments": [ + { + "comment_id": "646152752", + "repo_full_name": "apache/mxnet", + "pr_number": 20262, + "pr_file": "python/mxnet/gluon/rnn/rnn_layer.py", + "discussion_id": "646152752", + "commented_code": "@@ -182,65 +180,79 @@ def __call__(self, inputs, states=None, sequence_length=None, **kwargs):\n else:\n return super(_RNNLayer, self).__call__(inputs, states, **kwargs)\n \n- def hybrid_forward(self, F, inputs, states, sequence_length=None, **kwargs):\n- if F is ndarray:\n- batch_size = inputs.shape[self._layout.find('N')]\n+ def forward(self, inputs, states, sequence_length=None):\n+ batch_size = inputs.shape[self._layout.find('N')]\n \n- if F is ndarray:\n- for state, info in zip(states, self.state_info(batch_size)):\n- if state.shape != info['shape']:\n- raise ValueError(\n- \"Invalid recurrent state shape. Expecting %s, got %s.\"%(\n- str(info['shape']), str(state.shape)))\n- out = self._forward_kernel(F, inputs, states, sequence_length, **kwargs)\n+ for state, info in zip(states, self.state_info(batch_size)):\n+ if state.shape != info['shape']:\n+ raise ValueError(\n+ \"Invalid recurrent state shape. Expecting %s, got %s.\"%(\n+ str(info['shape']), str(state.shape)))\n+ out = self._forward_kernel(inputs, states, sequence_length)\n \n # out is (output, state)\n return out[0] if self.skip_states else out\n \n- def _forward_kernel(self, F, inputs, states, sequence_length, **kwargs):\n+ def infer_shape(self, inputs, *args):\n+ assert inputs.ndim == 3, \\\n+ \"Input data should be rank-3 tensor of dim [sequence length, batch size, input size]\"\n+ if not self._projection_size:\n+ step = self._hidden_size\n+ else:\n+ step = self._projection_size\n+ ni = inputs.shape[2]\n+ for i in range(self._num_layers):\n+ for j in ['l', 'r'][:self._dir]:\n+ name = '{}{}_i2h_weight'.format(j, i)\n+ getattr(self, name).shape = (self._gates*self._hidden_size, ni)\n+ ni = step * self._dir\n+\n+ def _forward_kernel(self, inputs, states, sequence_length):\n \"\"\" forward using CUDNN or CPU kenrel\"\"\"\n- swapaxes = F.np.swapaxes if is_np_array() else F.swapaxes\n+ ctx = inputs.ctx\n if self._layout == 'NTC':\n- inputs = swapaxes(inputs, 0, 1)\n+ inputs = np.swapaxes(inputs, 0, 1)\n if self._projection_size is None:\n- params = (kwargs['{}{}_{}_{}'.format(d, l, g, t)].reshape(-1)\n+ params = (getattr(self, '{}{}_{}_{}'.format(d, l, g, t)).data(ctx).reshape(-1)\n for t in ['weight', 'bias']\n for l in range(self._num_layers)\n for d in ['l', 'r'][:self._dir]\n for g in ['i2h', 'h2h'])\n else:\n- params = (kwargs['{}{}_{}_{}'.format(d, l, g, t)].reshape(-1)\n+ params = (getattr(self, '{}{}_{}_{}'.format(d, l, g, t)).data(ctx).reshape(-1)\n for t in ['weight', 'bias']\n for l in range(self._num_layers)\n for d in ['l', 'r'][:self._dir]\n for g in ['i2h', 'h2h', 'h2r']\n if g != 'h2r' or t != 'bias')\n \n- rnn_param_concat = F.np._internal.rnn_param_concat if is_np_array()\\\n- else F._internal._rnn_param_concat\n- params = rnn_param_concat(*params, dim=0)\n+ params = ndarray.np._internal.rnn_param_concat(*params, dim=0)", + "comment_created_at": "2021-06-06T16:02:42+00:00", + "comment_author": "szha", + "comment_body": "I think we should get rid of `rnn_param_concat` by\r\n1. registering only a fused parameter for the RNN layer\r\n2. add a utility for converting the fused parameter to split parameters for consumption in RNN cells only as needed.", + "pr_file_module": null + }, + { + "comment_id": "646846433", + "repo_full_name": "apache/mxnet", + "pr_number": 20262, + "pr_file": "python/mxnet/gluon/rnn/rnn_layer.py", + "discussion_id": "646152752", + "commented_code": "@@ -182,65 +180,79 @@ def __call__(self, inputs, states=None, sequence_length=None, **kwargs):\n else:\n return super(_RNNLayer, self).__call__(inputs, states, **kwargs)\n \n- def hybrid_forward(self, F, inputs, states, sequence_length=None, **kwargs):\n- if F is ndarray:\n- batch_size = inputs.shape[self._layout.find('N')]\n+ def forward(self, inputs, states, sequence_length=None):\n+ batch_size = inputs.shape[self._layout.find('N')]\n \n- if F is ndarray:\n- for state, info in zip(states, self.state_info(batch_size)):\n- if state.shape != info['shape']:\n- raise ValueError(\n- \"Invalid recurrent state shape. Expecting %s, got %s.\"%(\n- str(info['shape']), str(state.shape)))\n- out = self._forward_kernel(F, inputs, states, sequence_length, **kwargs)\n+ for state, info in zip(states, self.state_info(batch_size)):\n+ if state.shape != info['shape']:\n+ raise ValueError(\n+ \"Invalid recurrent state shape. Expecting %s, got %s.\"%(\n+ str(info['shape']), str(state.shape)))\n+ out = self._forward_kernel(inputs, states, sequence_length)\n \n # out is (output, state)\n return out[0] if self.skip_states else out\n \n- def _forward_kernel(self, F, inputs, states, sequence_length, **kwargs):\n+ def infer_shape(self, inputs, *args):\n+ assert inputs.ndim == 3, \\\n+ \"Input data should be rank-3 tensor of dim [sequence length, batch size, input size]\"\n+ if not self._projection_size:\n+ step = self._hidden_size\n+ else:\n+ step = self._projection_size\n+ ni = inputs.shape[2]\n+ for i in range(self._num_layers):\n+ for j in ['l', 'r'][:self._dir]:\n+ name = '{}{}_i2h_weight'.format(j, i)\n+ getattr(self, name).shape = (self._gates*self._hidden_size, ni)\n+ ni = step * self._dir\n+\n+ def _forward_kernel(self, inputs, states, sequence_length):\n \"\"\" forward using CUDNN or CPU kenrel\"\"\"\n- swapaxes = F.np.swapaxes if is_np_array() else F.swapaxes\n+ ctx = inputs.ctx\n if self._layout == 'NTC':\n- inputs = swapaxes(inputs, 0, 1)\n+ inputs = np.swapaxes(inputs, 0, 1)\n if self._projection_size is None:\n- params = (kwargs['{}{}_{}_{}'.format(d, l, g, t)].reshape(-1)\n+ params = (getattr(self, '{}{}_{}_{}'.format(d, l, g, t)).data(ctx).reshape(-1)\n for t in ['weight', 'bias']\n for l in range(self._num_layers)\n for d in ['l', 'r'][:self._dir]\n for g in ['i2h', 'h2h'])\n else:\n- params = (kwargs['{}{}_{}_{}'.format(d, l, g, t)].reshape(-1)\n+ params = (getattr(self, '{}{}_{}_{}'.format(d, l, g, t)).data(ctx).reshape(-1)\n for t in ['weight', 'bias']\n for l in range(self._num_layers)\n for d in ['l', 'r'][:self._dir]\n for g in ['i2h', 'h2h', 'h2r']\n if g != 'h2r' or t != 'bias')\n \n- rnn_param_concat = F.np._internal.rnn_param_concat if is_np_array()\\\n- else F._internal._rnn_param_concat\n- params = rnn_param_concat(*params, dim=0)\n+ params = ndarray.np._internal.rnn_param_concat(*params, dim=0)", + "comment_created_at": "2021-06-07T18:34:34+00:00", + "comment_author": "barry-jin", + "comment_body": "I think another way is to use `np.concatenate` here. Because the only difference between rnn_param_cancat and np.concatenate is [InferShape](https://github.com/apache/incubator-mxnet/blob/a6fdc7ae11ab1590f2b2a9a47379e7ab41479c72/src/operator/nn/concat.cc#L436-L440). Currently in deferred compute mode, infer_shape method is defined on python side, so we can just use `np.concatenate` here. ", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/mxnet-hybridization-compatible-operations.md b/_reviewers/mxnet-hybridization-compatible-operations.md index 7de3762..7d46290 100644 --- a/_reviewers/mxnet-hybridization-compatible-operations.md +++ b/_reviewers/mxnet-hybridization-compatible-operations.md @@ -39,225 +39,3 @@ def forward(self, x): ``` When implementing features like callbacks with hybridization, be aware of limitations with static shapes and mark experimental features appropriately in documentation. - - -[ - { - "discussion_id": "250017909", - "pr_number": 13735, - "pr_file": "example/gluon/wavenet/models.py", - "created_at": "2019-01-23T00:44:31+00:00", - "commented_code": "# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements. See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership. The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License. You may obtain a copy of the License at\n#\n# http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied. See the License for the\n# specific language governing permissions and limitations\n# under the License.\n\n\"\"\"\nModule: WaveNet network modulep\n\"\"\"\nfrom mxnet import nd\nfrom mxnet.gluon import nn\nimport mxnet.ndarray as F\n# pylint: disable=invalid-name, too-many-arguments, arguments-differ, attribute-defined-outside-init, too-many-instance-attributes, invalid-sequence-index, no-self-use\nclass One_Hot(nn.Block):\n \"\"\"\n Description : generate one hot result\n \"\"\"\n def __init__(self, depth):\n super(One_Hot, self).__init__()\n self.depth = depth\n\n def forward(self, X_in):\n with X_in.context:\n X_in = X_in\n self.ones = nd.one_hot(nd.arange(self.depth), self.depth)\n return self.ones[X_in, :]\n\n def __repr__(self):\n return self.__class__.__name__ + \"({})\".format(self.depth)\n\nclass WaveNet(nn.Block):", - "repo_full_name": "apache/mxnet", - "discussion_comments": [ - { - "comment_id": "250017909", - "repo_full_name": "apache/mxnet", - "pr_number": 13735, - "pr_file": "example/gluon/wavenet/models.py", - "discussion_id": "250017909", - "commented_code": "@@ -0,0 +1,118 @@\n+\n+# Licensed to the Apache Software Foundation (ASF) under one\n+# or more contributor license agreements. See the NOTICE file\n+# distributed with this work for additional information\n+# regarding copyright ownership. The ASF licenses this file\n+# to you under the Apache License, Version 2.0 (the\n+# \"License\"); you may not use this file except in compliance\n+# with the License. You may obtain a copy of the License at\n+#\n+# http://www.apache.org/licenses/LICENSE-2.0\n+#\n+# Unless required by applicable law or agreed to in writing,\n+# software distributed under the License is distributed on an\n+# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n+# KIND, either express or implied. See the License for the\n+# specific language governing permissions and limitations\n+# under the License.\n+\n+\"\"\"\n+Module: WaveNet network modulep\n+\"\"\"\n+from mxnet import nd\n+from mxnet.gluon import nn\n+import mxnet.ndarray as F\n+# pylint: disable=invalid-name, too-many-arguments, arguments-differ, attribute-defined-outside-init, too-many-instance-attributes, invalid-sequence-index, no-self-use\n+class One_Hot(nn.Block):\n+ \"\"\"\n+ Description : generate one hot result\n+ \"\"\"\n+ def __init__(self, depth):\n+ super(One_Hot, self).__init__()\n+ self.depth = depth\n+\n+ def forward(self, X_in):\n+ with X_in.context:\n+ X_in = X_in\n+ self.ones = nd.one_hot(nd.arange(self.depth), self.depth)\n+ return self.ones[X_in, :]\n+\n+ def __repr__(self):\n+ return self.__class__.__name__ + \"({})\".format(self.depth)\n+\n+class WaveNet(nn.Block):", - "comment_created_at": "2019-01-23T00:44:31+00:00", - "comment_author": "ThomasDelteil", - "comment_body": "Could we consider a hybridizable implementation?", - "pr_file_module": null - }, - { - "comment_id": "252269415", - "repo_full_name": "apache/mxnet", - "pr_number": 13735, - "pr_file": "example/gluon/wavenet/models.py", - "discussion_id": "250017909", - "commented_code": "@@ -0,0 +1,118 @@\n+\n+# Licensed to the Apache Software Foundation (ASF) under one\n+# or more contributor license agreements. See the NOTICE file\n+# distributed with this work for additional information\n+# regarding copyright ownership. The ASF licenses this file\n+# to you under the Apache License, Version 2.0 (the\n+# \"License\"); you may not use this file except in compliance\n+# with the License. You may obtain a copy of the License at\n+#\n+# http://www.apache.org/licenses/LICENSE-2.0\n+#\n+# Unless required by applicable law or agreed to in writing,\n+# software distributed under the License is distributed on an\n+# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n+# KIND, either express or implied. See the License for the\n+# specific language governing permissions and limitations\n+# under the License.\n+\n+\"\"\"\n+Module: WaveNet network modulep\n+\"\"\"\n+from mxnet import nd\n+from mxnet.gluon import nn\n+import mxnet.ndarray as F\n+# pylint: disable=invalid-name, too-many-arguments, arguments-differ, attribute-defined-outside-init, too-many-instance-attributes, invalid-sequence-index, no-self-use\n+class One_Hot(nn.Block):\n+ \"\"\"\n+ Description : generate one hot result\n+ \"\"\"\n+ def __init__(self, depth):\n+ super(One_Hot, self).__init__()\n+ self.depth = depth\n+\n+ def forward(self, X_in):\n+ with X_in.context:\n+ X_in = X_in\n+ self.ones = nd.one_hot(nd.arange(self.depth), self.depth)\n+ return self.ones[X_in, :]\n+\n+ def __repr__(self):\n+ return self.__class__.__name__ + \"({})\".format(self.depth)\n+\n+class WaveNet(nn.Block):", - "comment_created_at": "2019-01-30T14:07:24+00:00", - "comment_author": "seujung", - "comment_body": "When I change Block to HybridBlock, some problem occurs in the following code.\r\n``\r\noutput = sum([s[:, :, -output.shape[2]:] for s in skip_connections])\r\n``\r\nCould you tell me how to fix the problem?", - "pr_file_module": null - }, - { - "comment_id": "252419590", - "repo_full_name": "apache/mxnet", - "pr_number": 13735, - "pr_file": "example/gluon/wavenet/models.py", - "discussion_id": "250017909", - "commented_code": "@@ -0,0 +1,118 @@\n+\n+# Licensed to the Apache Software Foundation (ASF) under one\n+# or more contributor license agreements. See the NOTICE file\n+# distributed with this work for additional information\n+# regarding copyright ownership. The ASF licenses this file\n+# to you under the Apache License, Version 2.0 (the\n+# \"License\"); you may not use this file except in compliance\n+# with the License. You may obtain a copy of the License at\n+#\n+# http://www.apache.org/licenses/LICENSE-2.0\n+#\n+# Unless required by applicable law or agreed to in writing,\n+# software distributed under the License is distributed on an\n+# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n+# KIND, either express or implied. See the License for the\n+# specific language governing permissions and limitations\n+# under the License.\n+\n+\"\"\"\n+Module: WaveNet network modulep\n+\"\"\"\n+from mxnet import nd\n+from mxnet.gluon import nn\n+import mxnet.ndarray as F\n+# pylint: disable=invalid-name, too-many-arguments, arguments-differ, attribute-defined-outside-init, too-many-instance-attributes, invalid-sequence-index, no-self-use\n+class One_Hot(nn.Block):\n+ \"\"\"\n+ Description : generate one hot result\n+ \"\"\"\n+ def __init__(self, depth):\n+ super(One_Hot, self).__init__()\n+ self.depth = depth\n+\n+ def forward(self, X_in):\n+ with X_in.context:\n+ X_in = X_in\n+ self.ones = nd.one_hot(nd.arange(self.depth), self.depth)\n+ return self.ones[X_in, :]\n+\n+ def __repr__(self):\n+ return self.__class__.__name__ + \"({})\".format(self.depth)\n+\n+class WaveNet(nn.Block):", - "comment_created_at": "2019-01-30T20:11:28+00:00", - "comment_author": "safrooze", - "comment_body": "You cannot use index operator with hybridization. You need to use F.slice(). Also shape is not available with hybridized networks. You have three options:\r\n1. use padding so that output is the same size as input\r\n2. calculate the amount to slice by doing the math (a function of kernel size and dilation)\r\n3. use `sym.infer_shape()` to infer shape of the output in hybridized mode.\r\n\r\nI recommend option 2.", - "pr_file_module": null - }, - { - "comment_id": "252942411", - "repo_full_name": "apache/mxnet", - "pr_number": 13735, - "pr_file": "example/gluon/wavenet/models.py", - "discussion_id": "250017909", - "commented_code": "@@ -0,0 +1,118 @@\n+\n+# Licensed to the Apache Software Foundation (ASF) under one\n+# or more contributor license agreements. See the NOTICE file\n+# distributed with this work for additional information\n+# regarding copyright ownership. The ASF licenses this file\n+# to you under the Apache License, Version 2.0 (the\n+# \"License\"); you may not use this file except in compliance\n+# with the License. You may obtain a copy of the License at\n+#\n+# http://www.apache.org/licenses/LICENSE-2.0\n+#\n+# Unless required by applicable law or agreed to in writing,\n+# software distributed under the License is distributed on an\n+# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n+# KIND, either express or implied. See the License for the\n+# specific language governing permissions and limitations\n+# under the License.\n+\n+\"\"\"\n+Module: WaveNet network modulep\n+\"\"\"\n+from mxnet import nd\n+from mxnet.gluon import nn\n+import mxnet.ndarray as F\n+# pylint: disable=invalid-name, too-many-arguments, arguments-differ, attribute-defined-outside-init, too-many-instance-attributes, invalid-sequence-index, no-self-use\n+class One_Hot(nn.Block):\n+ \"\"\"\n+ Description : generate one hot result\n+ \"\"\"\n+ def __init__(self, depth):\n+ super(One_Hot, self).__init__()\n+ self.depth = depth\n+\n+ def forward(self, X_in):\n+ with X_in.context:\n+ X_in = X_in\n+ self.ones = nd.one_hot(nd.arange(self.depth), self.depth)\n+ return self.ones[X_in, :]\n+\n+ def __repr__(self):\n+ return self.__class__.__name__ + \"({})\".format(self.depth)\n+\n+class WaveNet(nn.Block):", - "comment_created_at": "2019-02-01T06:13:20+00:00", - "comment_author": "seujung", - "comment_body": "I try to change code using HybridBlock. But, I need too much time to apply it. Is it essential to reflect this? If I have to do it, I will try to change this module.", - "pr_file_module": null - }, - { - "comment_id": "265847269", - "repo_full_name": "apache/mxnet", - "pr_number": 13735, - "pr_file": "example/gluon/wavenet/models.py", - "discussion_id": "250017909", - "commented_code": "@@ -0,0 +1,118 @@\n+\n+# Licensed to the Apache Software Foundation (ASF) under one\n+# or more contributor license agreements. See the NOTICE file\n+# distributed with this work for additional information\n+# regarding copyright ownership. The ASF licenses this file\n+# to you under the Apache License, Version 2.0 (the\n+# \"License\"); you may not use this file except in compliance\n+# with the License. You may obtain a copy of the License at\n+#\n+# http://www.apache.org/licenses/LICENSE-2.0\n+#\n+# Unless required by applicable law or agreed to in writing,\n+# software distributed under the License is distributed on an\n+# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n+# KIND, either express or implied. See the License for the\n+# specific language governing permissions and limitations\n+# under the License.\n+\n+\"\"\"\n+Module: WaveNet network modulep\n+\"\"\"\n+from mxnet import nd\n+from mxnet.gluon import nn\n+import mxnet.ndarray as F\n+# pylint: disable=invalid-name, too-many-arguments, arguments-differ, attribute-defined-outside-init, too-many-instance-attributes, invalid-sequence-index, no-self-use\n+class One_Hot(nn.Block):\n+ \"\"\"\n+ Description : generate one hot result\n+ \"\"\"\n+ def __init__(self, depth):\n+ super(One_Hot, self).__init__()\n+ self.depth = depth\n+\n+ def forward(self, X_in):\n+ with X_in.context:\n+ X_in = X_in\n+ self.ones = nd.one_hot(nd.arange(self.depth), self.depth)\n+ return self.ones[X_in, :]\n+\n+ def __repr__(self):\n+ return self.__class__.__name__ + \"({})\".format(self.depth)\n+\n+class WaveNet(nn.Block):", - "comment_created_at": "2019-03-15T04:31:55+00:00", - "comment_author": "seujung", - "comment_body": "update hybrid module for wavenet model ", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "697558544", - "pr_number": 20559, - "pr_file": "tests/python/unittest/test_autograd.py", - "created_at": "2021-08-27T16:08:00+00:00", - "commented_code": "dx.backward()\n assert abs(x.grad.asscalar() - 2.71828175) < 1e-7\n\ndef test_retain_grad_drop_grad():\n x = nd.array([1,2,3,4])\n x.attach_grad()\n y = nd.array([5,6,7,8])\n y.attach_grad()\n\n with mx.autograd.record():\n u = x * y\n z = u * x\n\n u.attach_grad()\n z.attach_grad()\n out_grad = nd.array([10, 10, 10, 10])\n z.backward(out_grad, retain_graph=True)\n \n assert (u.grad == out_grad * x).asnumpy().all()\n assert (z.grad == out_grad).asnumpy().all()\n assert (x.grad == out_grad * 2 * x * y).asnumpy().all()\n assert (y.grad == out_grad * x*x).asnumpy().all()\n\n u.drop_grad()\n z.drop_grad()\n y.drop_grad()\n out_grad = nd.array([0.1, 0.1, 0.1, 0.1])\n z.backward(out_grad)\n\n assert u.grad is None and z.grad is None and y.grad is None\n assert (x.grad == out_grad * 2 * x * y).asnumpy().all()\n\ndef test_retain_grad_drop_grad_gluon():\n class CompBlock(mx.gluon.HybridBlock):\n def __init__(self):\n super().__init__()\n self.marked_var = None\n def forward(self, a, b):\n out1 = a*b\n out2 = out1 * a\n self.marked_var = out1\n return out2\n x = mx.np.array([1,2,3,4])\n y = mx.np.array([5,6,7,8])\n x.attach_grad()\n y.attach_grad()\n block2 = CompBlock()\n block2.initialize()\n # block2.hybridize()\n with mx.autograd.record():\n z = block2(x, y)\n u = block2.marked_var\n u.attach_grad()\n z.attach_grad()\n z.backward(retain_graph=True)", - "repo_full_name": "apache/mxnet", - "discussion_comments": [ - { - "comment_id": "697558544", - "repo_full_name": "apache/mxnet", - "pr_number": 20559, - "pr_file": "tests/python/unittest/test_autograd.py", - "discussion_id": "697558544", - "commented_code": "@@ -519,3 +519,68 @@ def test_gradient():\n dx.backward()\n assert abs(x.grad.asscalar() - 2.71828175) < 1e-7\n \n+def test_retain_grad_drop_grad():\n+ x = nd.array([1,2,3,4])\n+ x.attach_grad()\n+ y = nd.array([5,6,7,8])\n+ y.attach_grad()\n+\n+ with mx.autograd.record():\n+ u = x * y\n+ z = u * x\n+\n+ u.attach_grad()\n+ z.attach_grad()\n+ out_grad = nd.array([10, 10, 10, 10])\n+ z.backward(out_grad, retain_graph=True)\n+ \n+ assert (u.grad == out_grad * x).asnumpy().all()\n+ assert (z.grad == out_grad).asnumpy().all()\n+ assert (x.grad == out_grad * 2 * x * y).asnumpy().all()\n+ assert (y.grad == out_grad * x*x).asnumpy().all()\n+\n+ u.drop_grad()\n+ z.drop_grad()\n+ y.drop_grad()\n+ out_grad = nd.array([0.1, 0.1, 0.1, 0.1])\n+ z.backward(out_grad)\n+\n+ assert u.grad is None and z.grad is None and y.grad is None\n+ assert (x.grad == out_grad * 2 * x * y).asnumpy().all()\n+\n+def test_retain_grad_drop_grad_gluon():\n+ class CompBlock(mx.gluon.HybridBlock):\n+ def __init__(self):\n+ super().__init__()\n+ self.marked_var = None\n+ def forward(self, a, b):\n+ out1 = a*b\n+ out2 = out1 * a\n+ self.marked_var = out1\n+ return out2\n+ x = mx.np.array([1,2,3,4])\n+ y = mx.np.array([5,6,7,8])\n+ x.attach_grad()\n+ y.attach_grad()\n+ block2 = CompBlock()\n+ block2.initialize()\n+ # block2.hybridize()\n+ with mx.autograd.record():\n+ z = block2(x, y)\n+ u = block2.marked_var\n+ u.attach_grad()\n+ z.attach_grad()\n+ z.backward(retain_graph=True)", - "comment_created_at": "2021-08-27T16:08:00+00:00", - "comment_author": "leezu", - "comment_body": "Accessing the `marked_var` outside of the Block wouldn't work for hybridized Blocks. That's because the ndarray (`out1`) is just a temporary object used in the forward function for tracing the computation. Afterwards it's discarded and only the underlying symbolic graph is kept.\r\n\r\nThus I think you'll need to support something like `attach_grad` inside of the forward function. WDYT?\r\nFor that, I'd recommend you take a look at the CachedOp and how it handles gradients. You'll need a way to express that it should return a gradient buffer for the intermediate variable.\r\n\r\nAlso keep in mind that `autograd` API currently only targets the imperative use-case (without `hybridize`) and the deferred compute tracing used during hybridization are can't be enabled at the same time as `autograd` currently:\r\n\r\nhttps://github.com/apache/incubator-mxnet/blob/71526851d857fde97660aa79ccdb77f2b1cf963f/src/imperative/imperative.cc#L301-L305", - "pr_file_module": null - }, - { - "comment_id": "697632922", - "repo_full_name": "apache/mxnet", - "pr_number": 20559, - "pr_file": "tests/python/unittest/test_autograd.py", - "discussion_id": "697558544", - "commented_code": "@@ -519,3 +519,68 @@ def test_gradient():\n dx.backward()\n assert abs(x.grad.asscalar() - 2.71828175) < 1e-7\n \n+def test_retain_grad_drop_grad():\n+ x = nd.array([1,2,3,4])\n+ x.attach_grad()\n+ y = nd.array([5,6,7,8])\n+ y.attach_grad()\n+\n+ with mx.autograd.record():\n+ u = x * y\n+ z = u * x\n+\n+ u.attach_grad()\n+ z.attach_grad()\n+ out_grad = nd.array([10, 10, 10, 10])\n+ z.backward(out_grad, retain_graph=True)\n+ \n+ assert (u.grad == out_grad * x).asnumpy().all()\n+ assert (z.grad == out_grad).asnumpy().all()\n+ assert (x.grad == out_grad * 2 * x * y).asnumpy().all()\n+ assert (y.grad == out_grad * x*x).asnumpy().all()\n+\n+ u.drop_grad()\n+ z.drop_grad()\n+ y.drop_grad()\n+ out_grad = nd.array([0.1, 0.1, 0.1, 0.1])\n+ z.backward(out_grad)\n+\n+ assert u.grad is None and z.grad is None and y.grad is None\n+ assert (x.grad == out_grad * 2 * x * y).asnumpy().all()\n+\n+def test_retain_grad_drop_grad_gluon():\n+ class CompBlock(mx.gluon.HybridBlock):\n+ def __init__(self):\n+ super().__init__()\n+ self.marked_var = None\n+ def forward(self, a, b):\n+ out1 = a*b\n+ out2 = out1 * a\n+ self.marked_var = out1\n+ return out2\n+ x = mx.np.array([1,2,3,4])\n+ y = mx.np.array([5,6,7,8])\n+ x.attach_grad()\n+ y.attach_grad()\n+ block2 = CompBlock()\n+ block2.initialize()\n+ # block2.hybridize()\n+ with mx.autograd.record():\n+ z = block2(x, y)\n+ u = block2.marked_var\n+ u.attach_grad()\n+ z.attach_grad()\n+ z.backward(retain_graph=True)", - "comment_created_at": "2021-08-27T18:14:10+00:00", - "comment_author": "KexinFeng", - "comment_body": "Hi Leo,\r\nThanks for pointing out the relation between deferred compute and autograd API. I just committed my first implementation of `attach_grad` on hybridize mode. Indeed, the major implementation is in the forward pass, ie the invoke of `CachedOp`. \r\n\r\nThe main idea is to utilize the handle of deferred ndarray (`out1`) to mark the nonleaf node which are still deferredcompute node. And then, when invoking `CachedOp`, the autograd computation nodes are retained and linked to the ndarrays like 'out1'. Thus these ndarrays can further call `attach_grad` which will the the same as imperative mode.\r\n\r\nBut it is true that there is an incompatibility of hybridize computation with the imperative style of `attach_grad`. This incompatibility may lead to inconvenience when designing the python frontend use of this feature. This could be done next.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "701074106", - "pr_number": 20559, - "pr_file": "tests/python/unittest/test_autograd.py", - "created_at": "2021-09-02T13:18:39+00:00", - "commented_code": "dx.backward()\n assert abs(x.grad.asscalar() - 2.71828175) < 1e-7\n\ndef test_retain_grad_drop_grad():\n x = nd.array([1,2,3,4])\n x.attach_grad()\n y = nd.array([5,6,7,8])\n y.attach_grad()\n\n with mx.autograd.record():\n u = x * y\n z = u * x\n\n u.attach_grad()\n z.attach_grad()\n out_grad = nd.array([10, 10, 10, 10])\n z.backward(out_grad, retain_graph=True)\n\n assert (u.grad == out_grad * x).asnumpy().all()\n assert (z.grad == out_grad).asnumpy().all()\n assert (x.grad == out_grad * 2 * x * y).asnumpy().all()\n assert (y.grad == out_grad * x*x).asnumpy().all()\n\n u.drop_grad()\n z.drop_grad()\n y.drop_grad()\n out_grad = nd.array([0.1, 0.1, 0.1, 0.1])\n z.backward(out_grad)\n\n assert u.grad is None and z.grad is None and y.grad is None\n assert (x.grad == out_grad * 2 * x * y).asnumpy().all()\n\ndef test_retain_grad_drop_grad_gluon():\n class CompBlock(mx.gluon.HybridBlock):\n def __init__(self):\n super().__init__()\n\n def forward(self, a, b):\n out1 = a*b\n out2 = out1 * a\n self.mark_vars(out1)\n return out2\n\n x = mx.np.array([1,2,3,4])\n y = mx.np.array([5,6,7,8])\n x.attach_grad()\n y.attach_grad()\n block2 = CompBlock()\n block2.initialize()\n # block2.hybridize()", - "repo_full_name": "apache/mxnet", - "discussion_comments": [ - { - "comment_id": "701074106", - "repo_full_name": "apache/mxnet", - "pr_number": 20559, - "pr_file": "tests/python/unittest/test_autograd.py", - "discussion_id": "701074106", - "commented_code": "@@ -519,3 +519,110 @@ def test_gradient():\n dx.backward()\n assert abs(x.grad.asscalar() - 2.71828175) < 1e-7\n \n+def test_retain_grad_drop_grad():\n+ x = nd.array([1,2,3,4])\n+ x.attach_grad()\n+ y = nd.array([5,6,7,8])\n+ y.attach_grad()\n+\n+ with mx.autograd.record():\n+ u = x * y\n+ z = u * x\n+\n+ u.attach_grad()\n+ z.attach_grad()\n+ out_grad = nd.array([10, 10, 10, 10])\n+ z.backward(out_grad, retain_graph=True)\n+\n+ assert (u.grad == out_grad * x).asnumpy().all()\n+ assert (z.grad == out_grad).asnumpy().all()\n+ assert (x.grad == out_grad * 2 * x * y).asnumpy().all()\n+ assert (y.grad == out_grad * x*x).asnumpy().all()\n+\n+ u.drop_grad()\n+ z.drop_grad()\n+ y.drop_grad()\n+ out_grad = nd.array([0.1, 0.1, 0.1, 0.1])\n+ z.backward(out_grad)\n+\n+ assert u.grad is None and z.grad is None and y.grad is None\n+ assert (x.grad == out_grad * 2 * x * y).asnumpy().all()\n+\n+def test_retain_grad_drop_grad_gluon():\n+ class CompBlock(mx.gluon.HybridBlock):\n+ def __init__(self):\n+ super().__init__()\n+\n+ def forward(self, a, b):\n+ out1 = a*b\n+ out2 = out1 * a\n+ self.mark_vars(out1)\n+ return out2\n+\n+ x = mx.np.array([1,2,3,4])\n+ y = mx.np.array([5,6,7,8])\n+ x.attach_grad()\n+ y.attach_grad()\n+ block2 = CompBlock()\n+ block2.initialize()\n+ # block2.hybridize()", - "comment_created_at": "2021-09-02T13:18:39+00:00", - "comment_author": "leezu", - "comment_body": "Should this test be run both with and without hybridize? You can parameterize the test function https://docs.pytest.org/en/6.2.x/parametrize.html", - "pr_file_module": null - }, - { - "comment_id": "701944170", - "repo_full_name": "apache/mxnet", - "pr_number": 20559, - "pr_file": "tests/python/unittest/test_autograd.py", - "discussion_id": "701074106", - "commented_code": "@@ -519,3 +519,110 @@ def test_gradient():\n dx.backward()\n assert abs(x.grad.asscalar() - 2.71828175) < 1e-7\n \n+def test_retain_grad_drop_grad():\n+ x = nd.array([1,2,3,4])\n+ x.attach_grad()\n+ y = nd.array([5,6,7,8])\n+ y.attach_grad()\n+\n+ with mx.autograd.record():\n+ u = x * y\n+ z = u * x\n+\n+ u.attach_grad()\n+ z.attach_grad()\n+ out_grad = nd.array([10, 10, 10, 10])\n+ z.backward(out_grad, retain_graph=True)\n+\n+ assert (u.grad == out_grad * x).asnumpy().all()\n+ assert (z.grad == out_grad).asnumpy().all()\n+ assert (x.grad == out_grad * 2 * x * y).asnumpy().all()\n+ assert (y.grad == out_grad * x*x).asnumpy().all()\n+\n+ u.drop_grad()\n+ z.drop_grad()\n+ y.drop_grad()\n+ out_grad = nd.array([0.1, 0.1, 0.1, 0.1])\n+ z.backward(out_grad)\n+\n+ assert u.grad is None and z.grad is None and y.grad is None\n+ assert (x.grad == out_grad * 2 * x * y).asnumpy().all()\n+\n+def test_retain_grad_drop_grad_gluon():\n+ class CompBlock(mx.gluon.HybridBlock):\n+ def __init__(self):\n+ super().__init__()\n+\n+ def forward(self, a, b):\n+ out1 = a*b\n+ out2 = out1 * a\n+ self.mark_vars(out1)\n+ return out2\n+\n+ x = mx.np.array([1,2,3,4])\n+ y = mx.np.array([5,6,7,8])\n+ x.attach_grad()\n+ y.attach_grad()\n+ block2 = CompBlock()\n+ block2.initialize()\n+ # block2.hybridize()", - "comment_created_at": "2021-09-03T14:40:15+00:00", - "comment_author": "KexinFeng", - "comment_body": "Done.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "325399680", - "pr_number": 15839, - "pr_file": "python/mxnet/gluon/block.py", - "created_at": "2019-09-17T21:45:03+00:00", - "commented_code": "def _call_cached_op(self, *args):\n if self._cached_op is None:\n self._build_cache(*args)\n assert self._cached_op, \"cached op is not None\"\n if self._callback:\n self._cached_op._register_op_hook(self._callback, self._monitor_all)\n if len(self._flags) >= 2 and self._flags[1]:\n warnings.warn(\"Callback is not supported when static_shape=True \"", - "repo_full_name": "apache/mxnet", - "discussion_comments": [ - { - "comment_id": "325399680", - "repo_full_name": "apache/mxnet", - "pr_number": 15839, - "pr_file": "python/mxnet/gluon/block.py", - "discussion_id": "325399680", - "commented_code": "@@ -833,6 +848,12 @@ def _deferred_infer_shape(self, *args):\n def _call_cached_op(self, *args):\n if self._cached_op is None:\n self._build_cache(*args)\n+ assert self._cached_op, \"cached op is not None\"\n+ if self._callback:\n+ self._cached_op._register_op_hook(self._callback, self._monitor_all)\n+ if len(self._flags) >= 2 and self._flags[1]:\n+ warnings.warn(\"Callback is not supported when static_shape=True \"", - "comment_created_at": "2019-09-17T21:45:03+00:00", - "comment_author": "rahul003", - "comment_body": "Why's this? If it's not supported, do not register the callback?", - "pr_file_module": null - }, - { - "comment_id": "325399851", - "repo_full_name": "apache/mxnet", - "pr_number": 15839, - "pr_file": "python/mxnet/gluon/block.py", - "discussion_id": "325399680", - "commented_code": "@@ -833,6 +848,12 @@ def _deferred_infer_shape(self, *args):\n def _call_cached_op(self, *args):\n if self._cached_op is None:\n self._build_cache(*args)\n+ assert self._cached_op, \"cached op is not None\"\n+ if self._callback:\n+ self._cached_op._register_op_hook(self._callback, self._monitor_all)\n+ if len(self._flags) >= 2 and self._flags[1]:\n+ warnings.warn(\"Callback is not supported when static_shape=True \"", - "comment_created_at": "2019-09-17T21:45:33+00:00", - "comment_author": "rahul003", - "comment_body": "else say something like experimental for static_shape=True if you are not confident, but it might work", - "pr_file_module": null - }, - { - "comment_id": "325407901", - "repo_full_name": "apache/mxnet", - "pr_number": 15839, - "pr_file": "python/mxnet/gluon/block.py", - "discussion_id": "325399680", - "commented_code": "@@ -833,6 +848,12 @@ def _deferred_infer_shape(self, *args):\n def _call_cached_op(self, *args):\n if self._cached_op is None:\n self._build_cache(*args)\n+ assert self._cached_op, \"cached op is not None\"\n+ if self._callback:\n+ self._cached_op._register_op_hook(self._callback, self._monitor_all)\n+ if len(self._flags) >= 2 and self._flags[1]:\n+ warnings.warn(\"Callback is not supported when static_shape=True \"", - "comment_created_at": "2019-09-17T22:10:50+00:00", - "comment_author": "anirudh2290", - "comment_body": "Yes I will say experimental, good point thanks. The reason this is experimental is because for certain code path the cached op directly calls engine push instead of calling InvokeOp. adding the callbacks in such cases was much more difficult since we just had the opr and the engine vars not the ndarray references. Please see line: https://github.com/apache/incubator-mxnet/blob/master/src/imperative/cached_op.cc#L689\r\n\r\nFor this case, it is not supported.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "646152752", - "pr_number": 20262, - "pr_file": "python/mxnet/gluon/rnn/rnn_layer.py", - "created_at": "2021-06-06T16:02:42+00:00", - "commented_code": "else:\n return super(_RNNLayer, self).__call__(inputs, states, **kwargs)\n\n def hybrid_forward(self, F, inputs, states, sequence_length=None, **kwargs):\n if F is ndarray:\n batch_size = inputs.shape[self._layout.find('N')]\n def forward(self, inputs, states, sequence_length=None):\n batch_size = inputs.shape[self._layout.find('N')]\n\n if F is ndarray:\n for state, info in zip(states, self.state_info(batch_size)):\n if state.shape != info['shape']:\n raise ValueError(\n \"Invalid recurrent state shape. Expecting %s, got %s.\"%(\n str(info['shape']), str(state.shape)))\n out = self._forward_kernel(F, inputs, states, sequence_length, **kwargs)\n for state, info in zip(states, self.state_info(batch_size)):\n if state.shape != info['shape']:\n raise ValueError(\n \"Invalid recurrent state shape. Expecting %s, got %s.\"%(\n str(info['shape']), str(state.shape)))\n out = self._forward_kernel(inputs, states, sequence_length)\n\n # out is (output, state)\n return out[0] if self.skip_states else out\n\n def _forward_kernel(self, F, inputs, states, sequence_length, **kwargs):\n def infer_shape(self, inputs, *args):\n assert inputs.ndim == 3, \\\n \"Input data should be rank-3 tensor of dim [sequence length, batch size, input size]\"\n if not self._projection_size:\n step = self._hidden_size\n else:\n step = self._projection_size\n ni = inputs.shape[2]\n for i in range(self._num_layers):\n for j in ['l', 'r'][:self._dir]:\n name = '{}{}_i2h_weight'.format(j, i)\n getattr(self, name).shape = (self._gates*self._hidden_size, ni)\n ni = step * self._dir\n\n def _forward_kernel(self, inputs, states, sequence_length):\n \"\"\" forward using CUDNN or CPU kenrel\"\"\"\n swapaxes = F.np.swapaxes if is_np_array() else F.swapaxes\n ctx = inputs.ctx\n if self._layout == 'NTC':\n inputs = swapaxes(inputs, 0, 1)\n inputs = np.swapaxes(inputs, 0, 1)\n if self._projection_size is None:\n params = (kwargs['{}{}_{}_{}'.format(d, l, g, t)].reshape(-1)\n params = (getattr(self, '{}{}_{}_{}'.format(d, l, g, t)).data(ctx).reshape(-1)\n for t in ['weight', 'bias']\n for l in range(self._num_layers)\n for d in ['l', 'r'][:self._dir]\n for g in ['i2h', 'h2h'])\n else:\n params = (kwargs['{}{}_{}_{}'.format(d, l, g, t)].reshape(-1)\n params = (getattr(self, '{}{}_{}_{}'.format(d, l, g, t)).data(ctx).reshape(-1)\n for t in ['weight', 'bias']\n for l in range(self._num_layers)\n for d in ['l', 'r'][:self._dir]\n for g in ['i2h', 'h2h', 'h2r']\n if g != 'h2r' or t != 'bias')\n\n rnn_param_concat = F.np._internal.rnn_param_concat if is_np_array()\\\n else F._internal._rnn_param_concat\n params = rnn_param_concat(*params, dim=0)\n params = ndarray.np._internal.rnn_param_concat(*params, dim=0)", - "repo_full_name": "apache/mxnet", - "discussion_comments": [ - { - "comment_id": "646152752", - "repo_full_name": "apache/mxnet", - "pr_number": 20262, - "pr_file": "python/mxnet/gluon/rnn/rnn_layer.py", - "discussion_id": "646152752", - "commented_code": "@@ -182,65 +180,79 @@ def __call__(self, inputs, states=None, sequence_length=None, **kwargs):\n else:\n return super(_RNNLayer, self).__call__(inputs, states, **kwargs)\n \n- def hybrid_forward(self, F, inputs, states, sequence_length=None, **kwargs):\n- if F is ndarray:\n- batch_size = inputs.shape[self._layout.find('N')]\n+ def forward(self, inputs, states, sequence_length=None):\n+ batch_size = inputs.shape[self._layout.find('N')]\n \n- if F is ndarray:\n- for state, info in zip(states, self.state_info(batch_size)):\n- if state.shape != info['shape']:\n- raise ValueError(\n- \"Invalid recurrent state shape. Expecting %s, got %s.\"%(\n- str(info['shape']), str(state.shape)))\n- out = self._forward_kernel(F, inputs, states, sequence_length, **kwargs)\n+ for state, info in zip(states, self.state_info(batch_size)):\n+ if state.shape != info['shape']:\n+ raise ValueError(\n+ \"Invalid recurrent state shape. Expecting %s, got %s.\"%(\n+ str(info['shape']), str(state.shape)))\n+ out = self._forward_kernel(inputs, states, sequence_length)\n \n # out is (output, state)\n return out[0] if self.skip_states else out\n \n- def _forward_kernel(self, F, inputs, states, sequence_length, **kwargs):\n+ def infer_shape(self, inputs, *args):\n+ assert inputs.ndim == 3, \\\n+ \"Input data should be rank-3 tensor of dim [sequence length, batch size, input size]\"\n+ if not self._projection_size:\n+ step = self._hidden_size\n+ else:\n+ step = self._projection_size\n+ ni = inputs.shape[2]\n+ for i in range(self._num_layers):\n+ for j in ['l', 'r'][:self._dir]:\n+ name = '{}{}_i2h_weight'.format(j, i)\n+ getattr(self, name).shape = (self._gates*self._hidden_size, ni)\n+ ni = step * self._dir\n+\n+ def _forward_kernel(self, inputs, states, sequence_length):\n \"\"\" forward using CUDNN or CPU kenrel\"\"\"\n- swapaxes = F.np.swapaxes if is_np_array() else F.swapaxes\n+ ctx = inputs.ctx\n if self._layout == 'NTC':\n- inputs = swapaxes(inputs, 0, 1)\n+ inputs = np.swapaxes(inputs, 0, 1)\n if self._projection_size is None:\n- params = (kwargs['{}{}_{}_{}'.format(d, l, g, t)].reshape(-1)\n+ params = (getattr(self, '{}{}_{}_{}'.format(d, l, g, t)).data(ctx).reshape(-1)\n for t in ['weight', 'bias']\n for l in range(self._num_layers)\n for d in ['l', 'r'][:self._dir]\n for g in ['i2h', 'h2h'])\n else:\n- params = (kwargs['{}{}_{}_{}'.format(d, l, g, t)].reshape(-1)\n+ params = (getattr(self, '{}{}_{}_{}'.format(d, l, g, t)).data(ctx).reshape(-1)\n for t in ['weight', 'bias']\n for l in range(self._num_layers)\n for d in ['l', 'r'][:self._dir]\n for g in ['i2h', 'h2h', 'h2r']\n if g != 'h2r' or t != 'bias')\n \n- rnn_param_concat = F.np._internal.rnn_param_concat if is_np_array()\\\n- else F._internal._rnn_param_concat\n- params = rnn_param_concat(*params, dim=0)\n+ params = ndarray.np._internal.rnn_param_concat(*params, dim=0)", - "comment_created_at": "2021-06-06T16:02:42+00:00", - "comment_author": "szha", - "comment_body": "I think we should get rid of `rnn_param_concat` by\r\n1. registering only a fused parameter for the RNN layer\r\n2. add a utility for converting the fused parameter to split parameters for consumption in RNN cells only as needed.", - "pr_file_module": null - }, - { - "comment_id": "646846433", - "repo_full_name": "apache/mxnet", - "pr_number": 20262, - "pr_file": "python/mxnet/gluon/rnn/rnn_layer.py", - "discussion_id": "646152752", - "commented_code": "@@ -182,65 +180,79 @@ def __call__(self, inputs, states=None, sequence_length=None, **kwargs):\n else:\n return super(_RNNLayer, self).__call__(inputs, states, **kwargs)\n \n- def hybrid_forward(self, F, inputs, states, sequence_length=None, **kwargs):\n- if F is ndarray:\n- batch_size = inputs.shape[self._layout.find('N')]\n+ def forward(self, inputs, states, sequence_length=None):\n+ batch_size = inputs.shape[self._layout.find('N')]\n \n- if F is ndarray:\n- for state, info in zip(states, self.state_info(batch_size)):\n- if state.shape != info['shape']:\n- raise ValueError(\n- \"Invalid recurrent state shape. Expecting %s, got %s.\"%(\n- str(info['shape']), str(state.shape)))\n- out = self._forward_kernel(F, inputs, states, sequence_length, **kwargs)\n+ for state, info in zip(states, self.state_info(batch_size)):\n+ if state.shape != info['shape']:\n+ raise ValueError(\n+ \"Invalid recurrent state shape. Expecting %s, got %s.\"%(\n+ str(info['shape']), str(state.shape)))\n+ out = self._forward_kernel(inputs, states, sequence_length)\n \n # out is (output, state)\n return out[0] if self.skip_states else out\n \n- def _forward_kernel(self, F, inputs, states, sequence_length, **kwargs):\n+ def infer_shape(self, inputs, *args):\n+ assert inputs.ndim == 3, \\\n+ \"Input data should be rank-3 tensor of dim [sequence length, batch size, input size]\"\n+ if not self._projection_size:\n+ step = self._hidden_size\n+ else:\n+ step = self._projection_size\n+ ni = inputs.shape[2]\n+ for i in range(self._num_layers):\n+ for j in ['l', 'r'][:self._dir]:\n+ name = '{}{}_i2h_weight'.format(j, i)\n+ getattr(self, name).shape = (self._gates*self._hidden_size, ni)\n+ ni = step * self._dir\n+\n+ def _forward_kernel(self, inputs, states, sequence_length):\n \"\"\" forward using CUDNN or CPU kenrel\"\"\"\n- swapaxes = F.np.swapaxes if is_np_array() else F.swapaxes\n+ ctx = inputs.ctx\n if self._layout == 'NTC':\n- inputs = swapaxes(inputs, 0, 1)\n+ inputs = np.swapaxes(inputs, 0, 1)\n if self._projection_size is None:\n- params = (kwargs['{}{}_{}_{}'.format(d, l, g, t)].reshape(-1)\n+ params = (getattr(self, '{}{}_{}_{}'.format(d, l, g, t)).data(ctx).reshape(-1)\n for t in ['weight', 'bias']\n for l in range(self._num_layers)\n for d in ['l', 'r'][:self._dir]\n for g in ['i2h', 'h2h'])\n else:\n- params = (kwargs['{}{}_{}_{}'.format(d, l, g, t)].reshape(-1)\n+ params = (getattr(self, '{}{}_{}_{}'.format(d, l, g, t)).data(ctx).reshape(-1)\n for t in ['weight', 'bias']\n for l in range(self._num_layers)\n for d in ['l', 'r'][:self._dir]\n for g in ['i2h', 'h2h', 'h2r']\n if g != 'h2r' or t != 'bias')\n \n- rnn_param_concat = F.np._internal.rnn_param_concat if is_np_array()\\\n- else F._internal._rnn_param_concat\n- params = rnn_param_concat(*params, dim=0)\n+ params = ndarray.np._internal.rnn_param_concat(*params, dim=0)", - "comment_created_at": "2021-06-07T18:34:34+00:00", - "comment_author": "barry-jin", - "comment_body": "I think another way is to use `np.concatenate` here. Because the only difference between rnn_param_cancat and np.concatenate is [InferShape](https://github.com/apache/incubator-mxnet/blob/a6fdc7ae11ab1590f2b2a9a47379e7ab41479c72/src/operator/nn/concat.cc#L436-L440). Currently in deferred compute mode, infer_shape method is defined on python side, so we can just use `np.concatenate` here. ", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/mxnet-optimize-iteration-patterns.json b/_reviewers/mxnet-optimize-iteration-patterns.json new file mode 100644 index 0000000..a4e16c4 --- /dev/null +++ b/_reviewers/mxnet-optimize-iteration-patterns.json @@ -0,0 +1,170 @@ +[ + { + "discussion_id": "360245337", + "pr_number": 17129, + "pr_file": "python/mxnet/gluon/contrib/estimator/estimator.py", + "created_at": "2019-12-20T06:45:46+00:00", + "commented_code": "batch_axis : int, default 0\n Batch axis to split the training data into devices.\n \"\"\"\n if not isinstance(train_data, DataLoader):\n raise ValueError(\"Estimator only support input as Gluon DataLoader. Alternatively, you \"\n \"can transform your DataIter or any NDArray into Gluon DataLoader. \"\n \"Refer to gluon.data.dataloader\")\n try:\n is_iterable = iter(train_data)", + "repo_full_name": "apache/mxnet", + "discussion_comments": [ + { + "comment_id": "360245337", + "repo_full_name": "apache/mxnet", + "pr_number": 17129, + "pr_file": "python/mxnet/gluon/contrib/estimator/estimator.py", + "discussion_id": "360245337", + "commented_code": "@@ -358,10 +359,11 @@ def fit(self, train_data,\n batch_axis : int, default 0\n Batch axis to split the training data into devices.\n \"\"\"\n- if not isinstance(train_data, DataLoader):\n- raise ValueError(\"Estimator only support input as Gluon DataLoader. Alternatively, you \"\n- \"can transform your DataIter or any NDArray into Gluon DataLoader. \"\n- \"Refer to gluon.data.dataloader\")\n+ try:\n+ is_iterable = iter(train_data)", + "comment_created_at": "2019-12-20T06:45:46+00:00", + "comment_author": "leezu", + "comment_body": "Calling `iter(train_data)` can have side-effects, such as starting prefetching. See https://github.com/dmlc/gluon-nlp/blob/297e14732849f2b8025de6e2bf80ac05cab82b44/src/gluonnlp/data/stream.py#L341\r\n\r\nThus `iter` must only be invoked once. The enumerate call below invokes `iter` a second time implicitly. If you want to invoke `iter` manually, you need to use a while loop and `next` to iterate over the elements until `StopIteration` is raised.", + "pr_file_module": null + }, + { + "comment_id": "360251070", + "repo_full_name": "apache/mxnet", + "pr_number": 17129, + "pr_file": "python/mxnet/gluon/contrib/estimator/estimator.py", + "discussion_id": "360245337", + "commented_code": "@@ -358,10 +359,11 @@ def fit(self, train_data,\n batch_axis : int, default 0\n Batch axis to split the training data into devices.\n \"\"\"\n- if not isinstance(train_data, DataLoader):\n- raise ValueError(\"Estimator only support input as Gluon DataLoader. Alternatively, you \"\n- \"can transform your DataIter or any NDArray into Gluon DataLoader. \"\n- \"Refer to gluon.data.dataloader\")\n+ try:\n+ is_iterable = iter(train_data)", + "comment_created_at": "2019-12-20T07:14:42+00:00", + "comment_author": "liuzh47", + "comment_body": "what about using `hasattr(train_data, '__iter__')` to check if the `train_data` is iterable or not? Do we need to cover the case if `train_data` has implemented `__getitem__`?", + "pr_file_module": null + }, + { + "comment_id": "360254544", + "repo_full_name": "apache/mxnet", + "pr_number": 17129, + "pr_file": "python/mxnet/gluon/contrib/estimator/estimator.py", + "discussion_id": "360245337", + "commented_code": "@@ -358,10 +359,11 @@ def fit(self, train_data,\n batch_axis : int, default 0\n Batch axis to split the training data into devices.\n \"\"\"\n- if not isinstance(train_data, DataLoader):\n- raise ValueError(\"Estimator only support input as Gluon DataLoader. Alternatively, you \"\n- \"can transform your DataIter or any NDArray into Gluon DataLoader. \"\n- \"Refer to gluon.data.dataloader\")\n+ try:\n+ is_iterable = iter(train_data)", + "comment_created_at": "2019-12-20T07:30:58+00:00", + "comment_author": "leezu", + "comment_body": "You need to check both. Thus `hasattr(train_data, '__iter__') or hasattr(train_data, '__getitem__')` should be fine.\r\n\r\nSee \r\n\r\n> An object capable of returning its members one at a time. Examples of iterables include all sequence types (such as list, str, and tuple) and some non-sequence types like dict, file objects, and objects of any classes you define with an `__iter__()` method or with a `__getitem__()` method that implements Sequence semantics.\r\n\r\nhttps://docs.python.org/3/glossary.html", + "pr_file_module": null + }, + { + "comment_id": "360256284", + "repo_full_name": "apache/mxnet", + "pr_number": 17129, + "pr_file": "python/mxnet/gluon/contrib/estimator/estimator.py", + "discussion_id": "360245337", + "commented_code": "@@ -358,10 +359,11 @@ def fit(self, train_data,\n batch_axis : int, default 0\n Batch axis to split the training data into devices.\n \"\"\"\n- if not isinstance(train_data, DataLoader):\n- raise ValueError(\"Estimator only support input as Gluon DataLoader. Alternatively, you \"\n- \"can transform your DataIter or any NDArray into Gluon DataLoader. \"\n- \"Refer to gluon.data.dataloader\")\n+ try:\n+ is_iterable = iter(train_data)", + "comment_created_at": "2019-12-20T07:38:45+00:00", + "comment_author": "liuzh47", + "comment_body": "ok, it makes sense to me.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "695846029", + "pr_number": 20505, + "pr_file": "tests/python/unittest/test_gluon_data.py", + "created_at": "2021-08-25T15:05:11+00:00", + "commented_code": "def test_multi_worker_forked_data_loader():\n data = _Dummy(False)\n loader = DataLoader(data, batch_size=40, batchify_fn=_batchify, num_workers=2)\n for epoch in range(1):\n for i, data in enumerate(loader):\n for _ in range(1):\n for _ in enumerate(loader):", + "repo_full_name": "apache/mxnet", + "discussion_comments": [ + { + "comment_id": "695846029", + "repo_full_name": "apache/mxnet", + "pr_number": 20505, + "pr_file": "tests/python/unittest/test_gluon_data.py", + "discussion_id": "695846029", + "commented_code": "@@ -325,14 +325,14 @@ def _batchify(data):\n def test_multi_worker_forked_data_loader():\n data = _Dummy(False)\n loader = DataLoader(data, batch_size=40, batchify_fn=_batchify, num_workers=2)\n- for epoch in range(1):\n- for i, data in enumerate(loader):\n+ for _ in range(1):\n+ for _ in enumerate(loader):", + "comment_created_at": "2021-08-25T15:05:11+00:00", + "comment_author": "szha", + "comment_body": "```suggestion\r\n for _ in loader:\r\n```", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "695846617", + "pr_number": 20505, + "pr_file": "tests/python/unittest/test_gluon_data.py", + "created_at": "2021-08-25T15:05:50+00:00", + "commented_code": "a = mx.gluon.data.SimpleDataset([i for i in range(length)])\n a_filtered = a.filter(lambda x: x % 10 == 0)\n assert(len(a_filtered) == 10)\n for idx, sample in enumerate(a_filtered):\n for _, sample in enumerate(a_filtered):\n assert sample % 10 == 0\n a_xform_filtered = a.transform(lambda x: x + 1).filter(lambda x: x % 10 == 0)\n assert(len(a_xform_filtered) == 10)\n # the filtered data is already transformed\n for idx, sample in enumerate(a_xform_filtered):\n for _, sample in enumerate(a_xform_filtered):", + "repo_full_name": "apache/mxnet", + "discussion_comments": [ + { + "comment_id": "695846617", + "repo_full_name": "apache/mxnet", + "pr_number": 20505, + "pr_file": "tests/python/unittest/test_gluon_data.py", + "discussion_id": "695846617", + "commented_code": "@@ -382,25 +382,25 @@ def test_dataset_filter():\n a = mx.gluon.data.SimpleDataset([i for i in range(length)])\n a_filtered = a.filter(lambda x: x % 10 == 0)\n assert(len(a_filtered) == 10)\n- for idx, sample in enumerate(a_filtered):\n+ for _, sample in enumerate(a_filtered):\n assert sample % 10 == 0\n a_xform_filtered = a.transform(lambda x: x + 1).filter(lambda x: x % 10 == 0)\n assert(len(a_xform_filtered) == 10)\n # the filtered data is already transformed\n- for idx, sample in enumerate(a_xform_filtered):\n+ for _, sample in enumerate(a_xform_filtered):", + "comment_created_at": "2021-08-25T15:05:50+00:00", + "comment_author": "szha", + "comment_body": "```suggestion\r\n for sample in a_xform_filtered:\r\n```", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "695847352", + "pr_number": 20505, + "pr_file": "tests/python/unittest/test_gluon_data.py", + "created_at": "2021-08-25T15:06:36+00:00", + "commented_code": "assert len(shard_3) == 2\n total = 0\n for shard in [shard_0, shard_1, shard_2, shard_3]:\n for idx, sample in enumerate(shard):\n for _, sample in enumerate(shard):", + "repo_full_name": "apache/mxnet", + "discussion_comments": [ + { + "comment_id": "695847352", + "repo_full_name": "apache/mxnet", + "pr_number": 20505, + "pr_file": "tests/python/unittest/test_gluon_data.py", + "discussion_id": "695847352", + "commented_code": "@@ -417,7 +417,7 @@ def test_dataset_shard():\n assert len(shard_3) == 2\n total = 0\n for shard in [shard_0, shard_1, shard_2, shard_3]:\n- for idx, sample in enumerate(shard):\n+ for _, sample in enumerate(shard):", + "comment_created_at": "2021-08-25T15:06:36+00:00", + "comment_author": "szha", + "comment_body": "```suggestion\r\n for sample in shard:\r\n```", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "695847543", + "pr_number": 20505, + "pr_file": "tests/python/unittest/test_gluon_data.py", + "created_at": "2021-08-25T15:06:48+00:00", + "commented_code": "assert len(shard_3) == 2\n total = 0\n for shard in [shard_0, shard_1, shard_2, shard_3]:\n for idx, sample in enumerate(shard):\n for _, sample in enumerate(shard):", + "repo_full_name": "apache/mxnet", + "discussion_comments": [ + { + "comment_id": "695847543", + "repo_full_name": "apache/mxnet", + "pr_number": 20505, + "pr_file": "tests/python/unittest/test_gluon_data.py", + "discussion_id": "695847543", + "commented_code": "@@ -435,7 +435,7 @@ def test_dataset_shard_handle():\n assert len(shard_3) == 2\n total = 0\n for shard in [shard_0, shard_1, shard_2, shard_3]:\n- for idx, sample in enumerate(shard):\n+ for _, sample in enumerate(shard):", + "comment_created_at": "2021-08-25T15:06:48+00:00", + "comment_author": "szha", + "comment_body": "```suggestion\r\n for sample in shard:\r\n```", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "695855964", + "pr_number": 20505, + "pr_file": "tests/python/unittest/test_numpy_op.py", + "created_at": "2021-08-25T15:15:41+00:00", + "commented_code": "dtype=dtype))\n for optimize in [False, True]:\n x = []\n for (iop, op) in enumerate(operands):\n for (iop, _) in enumerate(operands):", + "repo_full_name": "apache/mxnet", + "discussion_comments": [ + { + "comment_id": "695855964", + "repo_full_name": "apache/mxnet", + "pr_number": 20505, + "pr_file": "tests/python/unittest/test_numpy_op.py", + "discussion_id": "695855964", + "commented_code": "@@ -8396,7 +8396,7 @@ def dbg(name, data):\n dtype=dtype))\n for optimize in [False, True]:\n x = []\n- for (iop, op) in enumerate(operands):\n+ for (iop, _) in enumerate(operands):", + "comment_created_at": "2021-08-25T15:15:41+00:00", + "comment_author": "szha", + "comment_body": "```suggestion\r\n for iop in range(len(operands)):\r\n```", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/mxnet-optimize-iteration-patterns.md b/_reviewers/mxnet-optimize-iteration-patterns.md index 4c93b49..906d7b6 100644 --- a/_reviewers/mxnet-optimize-iteration-patterns.md +++ b/_reviewers/mxnet-optimize-iteration-patterns.md @@ -23,175 +23,3 @@ for item in collection: ``` This optimization removes unnecessary index variable allocation and makes the code more readable. The principle applies to any iterative algorithm - don't compute values you won't use. Similarly, when testing object capabilities (like whether an object is iterable), check for specific attributes (`hasattr(obj, '__iter__')`) rather than calling functions that might have side effects like prefetching data. - - -[ - { - "discussion_id": "360245337", - "pr_number": 17129, - "pr_file": "python/mxnet/gluon/contrib/estimator/estimator.py", - "created_at": "2019-12-20T06:45:46+00:00", - "commented_code": "batch_axis : int, default 0\n Batch axis to split the training data into devices.\n \"\"\"\n if not isinstance(train_data, DataLoader):\n raise ValueError(\"Estimator only support input as Gluon DataLoader. Alternatively, you \"\n \"can transform your DataIter or any NDArray into Gluon DataLoader. \"\n \"Refer to gluon.data.dataloader\")\n try:\n is_iterable = iter(train_data)", - "repo_full_name": "apache/mxnet", - "discussion_comments": [ - { - "comment_id": "360245337", - "repo_full_name": "apache/mxnet", - "pr_number": 17129, - "pr_file": "python/mxnet/gluon/contrib/estimator/estimator.py", - "discussion_id": "360245337", - "commented_code": "@@ -358,10 +359,11 @@ def fit(self, train_data,\n batch_axis : int, default 0\n Batch axis to split the training data into devices.\n \"\"\"\n- if not isinstance(train_data, DataLoader):\n- raise ValueError(\"Estimator only support input as Gluon DataLoader. Alternatively, you \"\n- \"can transform your DataIter or any NDArray into Gluon DataLoader. \"\n- \"Refer to gluon.data.dataloader\")\n+ try:\n+ is_iterable = iter(train_data)", - "comment_created_at": "2019-12-20T06:45:46+00:00", - "comment_author": "leezu", - "comment_body": "Calling `iter(train_data)` can have side-effects, such as starting prefetching. See https://github.com/dmlc/gluon-nlp/blob/297e14732849f2b8025de6e2bf80ac05cab82b44/src/gluonnlp/data/stream.py#L341\r\n\r\nThus `iter` must only be invoked once. The enumerate call below invokes `iter` a second time implicitly. If you want to invoke `iter` manually, you need to use a while loop and `next` to iterate over the elements until `StopIteration` is raised.", - "pr_file_module": null - }, - { - "comment_id": "360251070", - "repo_full_name": "apache/mxnet", - "pr_number": 17129, - "pr_file": "python/mxnet/gluon/contrib/estimator/estimator.py", - "discussion_id": "360245337", - "commented_code": "@@ -358,10 +359,11 @@ def fit(self, train_data,\n batch_axis : int, default 0\n Batch axis to split the training data into devices.\n \"\"\"\n- if not isinstance(train_data, DataLoader):\n- raise ValueError(\"Estimator only support input as Gluon DataLoader. Alternatively, you \"\n- \"can transform your DataIter or any NDArray into Gluon DataLoader. \"\n- \"Refer to gluon.data.dataloader\")\n+ try:\n+ is_iterable = iter(train_data)", - "comment_created_at": "2019-12-20T07:14:42+00:00", - "comment_author": "liuzh47", - "comment_body": "what about using `hasattr(train_data, '__iter__')` to check if the `train_data` is iterable or not? Do we need to cover the case if `train_data` has implemented `__getitem__`?", - "pr_file_module": null - }, - { - "comment_id": "360254544", - "repo_full_name": "apache/mxnet", - "pr_number": 17129, - "pr_file": "python/mxnet/gluon/contrib/estimator/estimator.py", - "discussion_id": "360245337", - "commented_code": "@@ -358,10 +359,11 @@ def fit(self, train_data,\n batch_axis : int, default 0\n Batch axis to split the training data into devices.\n \"\"\"\n- if not isinstance(train_data, DataLoader):\n- raise ValueError(\"Estimator only support input as Gluon DataLoader. Alternatively, you \"\n- \"can transform your DataIter or any NDArray into Gluon DataLoader. \"\n- \"Refer to gluon.data.dataloader\")\n+ try:\n+ is_iterable = iter(train_data)", - "comment_created_at": "2019-12-20T07:30:58+00:00", - "comment_author": "leezu", - "comment_body": "You need to check both. Thus `hasattr(train_data, '__iter__') or hasattr(train_data, '__getitem__')` should be fine.\r\n\r\nSee \r\n\r\n> An object capable of returning its members one at a time. Examples of iterables include all sequence types (such as list, str, and tuple) and some non-sequence types like dict, file objects, and objects of any classes you define with an `__iter__()` method or with a `__getitem__()` method that implements Sequence semantics.\r\n\r\nhttps://docs.python.org/3/glossary.html", - "pr_file_module": null - }, - { - "comment_id": "360256284", - "repo_full_name": "apache/mxnet", - "pr_number": 17129, - "pr_file": "python/mxnet/gluon/contrib/estimator/estimator.py", - "discussion_id": "360245337", - "commented_code": "@@ -358,10 +359,11 @@ def fit(self, train_data,\n batch_axis : int, default 0\n Batch axis to split the training data into devices.\n \"\"\"\n- if not isinstance(train_data, DataLoader):\n- raise ValueError(\"Estimator only support input as Gluon DataLoader. Alternatively, you \"\n- \"can transform your DataIter or any NDArray into Gluon DataLoader. \"\n- \"Refer to gluon.data.dataloader\")\n+ try:\n+ is_iterable = iter(train_data)", - "comment_created_at": "2019-12-20T07:38:45+00:00", - "comment_author": "liuzh47", - "comment_body": "ok, it makes sense to me.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "695846029", - "pr_number": 20505, - "pr_file": "tests/python/unittest/test_gluon_data.py", - "created_at": "2021-08-25T15:05:11+00:00", - "commented_code": "def test_multi_worker_forked_data_loader():\n data = _Dummy(False)\n loader = DataLoader(data, batch_size=40, batchify_fn=_batchify, num_workers=2)\n for epoch in range(1):\n for i, data in enumerate(loader):\n for _ in range(1):\n for _ in enumerate(loader):", - "repo_full_name": "apache/mxnet", - "discussion_comments": [ - { - "comment_id": "695846029", - "repo_full_name": "apache/mxnet", - "pr_number": 20505, - "pr_file": "tests/python/unittest/test_gluon_data.py", - "discussion_id": "695846029", - "commented_code": "@@ -325,14 +325,14 @@ def _batchify(data):\n def test_multi_worker_forked_data_loader():\n data = _Dummy(False)\n loader = DataLoader(data, batch_size=40, batchify_fn=_batchify, num_workers=2)\n- for epoch in range(1):\n- for i, data in enumerate(loader):\n+ for _ in range(1):\n+ for _ in enumerate(loader):", - "comment_created_at": "2021-08-25T15:05:11+00:00", - "comment_author": "szha", - "comment_body": "```suggestion\r\n for _ in loader:\r\n```", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "695846617", - "pr_number": 20505, - "pr_file": "tests/python/unittest/test_gluon_data.py", - "created_at": "2021-08-25T15:05:50+00:00", - "commented_code": "a = mx.gluon.data.SimpleDataset([i for i in range(length)])\n a_filtered = a.filter(lambda x: x % 10 == 0)\n assert(len(a_filtered) == 10)\n for idx, sample in enumerate(a_filtered):\n for _, sample in enumerate(a_filtered):\n assert sample % 10 == 0\n a_xform_filtered = a.transform(lambda x: x + 1).filter(lambda x: x % 10 == 0)\n assert(len(a_xform_filtered) == 10)\n # the filtered data is already transformed\n for idx, sample in enumerate(a_xform_filtered):\n for _, sample in enumerate(a_xform_filtered):", - "repo_full_name": "apache/mxnet", - "discussion_comments": [ - { - "comment_id": "695846617", - "repo_full_name": "apache/mxnet", - "pr_number": 20505, - "pr_file": "tests/python/unittest/test_gluon_data.py", - "discussion_id": "695846617", - "commented_code": "@@ -382,25 +382,25 @@ def test_dataset_filter():\n a = mx.gluon.data.SimpleDataset([i for i in range(length)])\n a_filtered = a.filter(lambda x: x % 10 == 0)\n assert(len(a_filtered) == 10)\n- for idx, sample in enumerate(a_filtered):\n+ for _, sample in enumerate(a_filtered):\n assert sample % 10 == 0\n a_xform_filtered = a.transform(lambda x: x + 1).filter(lambda x: x % 10 == 0)\n assert(len(a_xform_filtered) == 10)\n # the filtered data is already transformed\n- for idx, sample in enumerate(a_xform_filtered):\n+ for _, sample in enumerate(a_xform_filtered):", - "comment_created_at": "2021-08-25T15:05:50+00:00", - "comment_author": "szha", - "comment_body": "```suggestion\r\n for sample in a_xform_filtered:\r\n```", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "695847352", - "pr_number": 20505, - "pr_file": "tests/python/unittest/test_gluon_data.py", - "created_at": "2021-08-25T15:06:36+00:00", - "commented_code": "assert len(shard_3) == 2\n total = 0\n for shard in [shard_0, shard_1, shard_2, shard_3]:\n for idx, sample in enumerate(shard):\n for _, sample in enumerate(shard):", - "repo_full_name": "apache/mxnet", - "discussion_comments": [ - { - "comment_id": "695847352", - "repo_full_name": "apache/mxnet", - "pr_number": 20505, - "pr_file": "tests/python/unittest/test_gluon_data.py", - "discussion_id": "695847352", - "commented_code": "@@ -417,7 +417,7 @@ def test_dataset_shard():\n assert len(shard_3) == 2\n total = 0\n for shard in [shard_0, shard_1, shard_2, shard_3]:\n- for idx, sample in enumerate(shard):\n+ for _, sample in enumerate(shard):", - "comment_created_at": "2021-08-25T15:06:36+00:00", - "comment_author": "szha", - "comment_body": "```suggestion\r\n for sample in shard:\r\n```", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "695847543", - "pr_number": 20505, - "pr_file": "tests/python/unittest/test_gluon_data.py", - "created_at": "2021-08-25T15:06:48+00:00", - "commented_code": "assert len(shard_3) == 2\n total = 0\n for shard in [shard_0, shard_1, shard_2, shard_3]:\n for idx, sample in enumerate(shard):\n for _, sample in enumerate(shard):", - "repo_full_name": "apache/mxnet", - "discussion_comments": [ - { - "comment_id": "695847543", - "repo_full_name": "apache/mxnet", - "pr_number": 20505, - "pr_file": "tests/python/unittest/test_gluon_data.py", - "discussion_id": "695847543", - "commented_code": "@@ -435,7 +435,7 @@ def test_dataset_shard_handle():\n assert len(shard_3) == 2\n total = 0\n for shard in [shard_0, shard_1, shard_2, shard_3]:\n- for idx, sample in enumerate(shard):\n+ for _, sample in enumerate(shard):", - "comment_created_at": "2021-08-25T15:06:48+00:00", - "comment_author": "szha", - "comment_body": "```suggestion\r\n for sample in shard:\r\n```", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "695855964", - "pr_number": 20505, - "pr_file": "tests/python/unittest/test_numpy_op.py", - "created_at": "2021-08-25T15:15:41+00:00", - "commented_code": "dtype=dtype))\n for optimize in [False, True]:\n x = []\n for (iop, op) in enumerate(operands):\n for (iop, _) in enumerate(operands):", - "repo_full_name": "apache/mxnet", - "discussion_comments": [ - { - "comment_id": "695855964", - "repo_full_name": "apache/mxnet", - "pr_number": 20505, - "pr_file": "tests/python/unittest/test_numpy_op.py", - "discussion_id": "695855964", - "commented_code": "@@ -8396,7 +8396,7 @@ def dbg(name, data):\n dtype=dtype))\n for optimize in [False, True]:\n x = []\n- for (iop, op) in enumerate(operands):\n+ for (iop, _) in enumerate(operands):", - "comment_created_at": "2021-08-25T15:15:41+00:00", - "comment_author": "szha", - "comment_body": "```suggestion\r\n for iop in range(len(operands)):\r\n```", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/mxnet-pre-compute-reused-data.json b/_reviewers/mxnet-pre-compute-reused-data.json new file mode 100644 index 0000000..d3f1999 --- /dev/null +++ b/_reviewers/mxnet-pre-compute-reused-data.json @@ -0,0 +1,46 @@ +[ + { + "discussion_id": "954886448", + "pr_number": 21127, + "pr_file": "example/quantization_inc/resnet_measurment.py", + "created_at": "2022-08-25T12:10:41+00:00", + "commented_code": "# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements. See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership. The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License. You may obtain a copy of the License at\n#\n# http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied. See the License for the\n# specific language governing permissions and limitations\n# under the License.\n\nimport mxnet as mx\nfrom mxnet.gluon.data.vision import transforms\nimport time\nimport glob\n\n\ndef test_accuracy(net, data_loader, description):\n count = 0\n acc_top1 = mx.gluon.metric.Accuracy()\n acc_top5 = mx.gluon.metric.TopKAccuracy(5)\n start = time.time()\n for x, label in data_loader:\n output = net(x)\n acc_top1.update(label, output)\n acc_top5.update(label, output)\n count += 1\n time_spend = time.time() - start\n _, top1 = acc_top1.get()\n _, top5 = acc_top5.get()\n print('{:21} Top1 Accuracy: {:.4f} Top5 Accuracy: {:.4f} from {:4} batches in {:8.2f}s'\n .format(description, top1, top5, count, time_spend))\n\n# Preparing input data\nrgb_mean = (0.485, 0.456, 0.406)\nrgb_std = (0.229, 0.224, 0.225)\nbatch_size = 64\n\nstart = time.time()\n# Set below proper path to ImageNet data set\ndataset = mx.gluon.data.vision.ImageRecordDataset('../imagenet/rec/val.rec')\ntransformer = transforms.Compose([transforms.Resize(256),\n transforms.CenterCrop(224),\n transforms.ToTensor(),\n transforms.Normalize(mean=rgb_mean, std=rgb_std)])\n# Note: As the input data are used many times it is better to prepared data once,\n# so lazy parameter for transform_first is set to False", + "repo_full_name": "apache/mxnet", + "discussion_comments": [ + { + "comment_id": "954886448", + "repo_full_name": "apache/mxnet", + "pr_number": 21127, + "pr_file": "example/quantization_inc/resnet_measurment.py", + "discussion_id": "954886448", + "commented_code": "@@ -0,0 +1,68 @@\n+# Licensed to the Apache Software Foundation (ASF) under one\n+# or more contributor license agreements. See the NOTICE file\n+# distributed with this work for additional information\n+# regarding copyright ownership. The ASF licenses this file\n+# to you under the Apache License, Version 2.0 (the\n+# \"License\"); you may not use this file except in compliance\n+# with the License. You may obtain a copy of the License at\n+#\n+# http://www.apache.org/licenses/LICENSE-2.0\n+#\n+# Unless required by applicable law or agreed to in writing,\n+# software distributed under the License is distributed on an\n+# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n+# KIND, either express or implied. See the License for the\n+# specific language governing permissions and limitations\n+# under the License.\n+\n+import mxnet as mx\n+from mxnet.gluon.data.vision import transforms\n+import time\n+import glob\n+\n+\n+def test_accuracy(net, data_loader, description):\n+ count = 0\n+ acc_top1 = mx.gluon.metric.Accuracy()\n+ acc_top5 = mx.gluon.metric.TopKAccuracy(5)\n+ start = time.time()\n+ for x, label in data_loader:\n+ output = net(x)\n+ acc_top1.update(label, output)\n+ acc_top5.update(label, output)\n+ count += 1\n+ time_spend = time.time() - start\n+ _, top1 = acc_top1.get()\n+ _, top5 = acc_top5.get()\n+ print('{:21} Top1 Accuracy: {:.4f} Top5 Accuracy: {:.4f} from {:4} batches in {:8.2f}s'\n+ .format(description, top1, top5, count, time_spend))\n+\n+# Preparing input data\n+rgb_mean = (0.485, 0.456, 0.406)\n+rgb_std = (0.229, 0.224, 0.225)\n+batch_size = 64\n+\n+start = time.time()\n+# Set below proper path to ImageNet data set\n+dataset = mx.gluon.data.vision.ImageRecordDataset('../imagenet/rec/val.rec')\n+transformer = transforms.Compose([transforms.Resize(256),\n+ transforms.CenterCrop(224),\n+ transforms.ToTensor(),\n+ transforms.Normalize(mean=rgb_mean, std=rgb_std)])\n+# Note: As the input data are used many times it is better to prepared data once,\n+# so lazy parameter for transform_first is set to False", + "comment_created_at": "2022-08-25T12:10:41+00:00", + "comment_author": "bartekkuncer", + "comment_body": "```suggestion\r\n# Note: as the input data is used many times it is better to prepare it once.\r\n# Therefore, lazy parameter for transform_first is set to False.\r\n```", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "954888778", + "pr_number": 21127, + "pr_file": "example/quantization_inc/resnet_mse.py", + "created_at": "2022-08-25T12:13:13+00:00", + "commented_code": "# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements. See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership. The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License. You may obtain a copy of the License at\n#\n# http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied. See the License for the\n# specific language governing permissions and limitations\n# under the License.\n\nimport mxnet as mx\nfrom mxnet.gluon.model_zoo.vision import resnet50_v2\nfrom mxnet.gluon.data.vision import transforms\nfrom mxnet.contrib.quantization import quantize_net\n\n# Preparing input data\nrgb_mean = (0.485, 0.456, 0.406)\nrgb_std = (0.229, 0.224, 0.225)\nbatch_size = 64\nnum_calib_batches = 9\n# Set below proper path to ImageNet data set\ndataset = mx.gluon.data.vision.ImageRecordDataset('../imagenet/rec/val.rec')\n# Tuning in INC on whole data set takes too long time so we take only part of the whole data set\n# as representative part of it:\ndataset = dataset.take(num_calib_batches * batch_size)\ntransformer = transforms.Compose([transforms.Resize(256),\n transforms.CenterCrop(224),\n transforms.ToTensor(),\n transforms.Normalize(mean=rgb_mean, std=rgb_std)])\n# Note: as input data are used many times during tuning it is better to prepared data earlier,\n# so lazy parameter for transform_first is set to False", + "repo_full_name": "apache/mxnet", + "discussion_comments": [ + { + "comment_id": "954888778", + "repo_full_name": "apache/mxnet", + "pr_number": 21127, + "pr_file": "example/quantization_inc/resnet_mse.py", + "discussion_id": "954888778", + "commented_code": "@@ -0,0 +1,65 @@\n+# Licensed to the Apache Software Foundation (ASF) under one\n+# or more contributor license agreements. See the NOTICE file\n+# distributed with this work for additional information\n+# regarding copyright ownership. The ASF licenses this file\n+# to you under the Apache License, Version 2.0 (the\n+# \"License\"); you may not use this file except in compliance\n+# with the License. You may obtain a copy of the License at\n+#\n+# http://www.apache.org/licenses/LICENSE-2.0\n+#\n+# Unless required by applicable law or agreed to in writing,\n+# software distributed under the License is distributed on an\n+# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n+# KIND, either express or implied. See the License for the\n+# specific language governing permissions and limitations\n+# under the License.\n+\n+import mxnet as mx\n+from mxnet.gluon.model_zoo.vision import resnet50_v2\n+from mxnet.gluon.data.vision import transforms\n+from mxnet.contrib.quantization import quantize_net\n+\n+# Preparing input data\n+rgb_mean = (0.485, 0.456, 0.406)\n+rgb_std = (0.229, 0.224, 0.225)\n+batch_size = 64\n+num_calib_batches = 9\n+# Set below proper path to ImageNet data set\n+dataset = mx.gluon.data.vision.ImageRecordDataset('../imagenet/rec/val.rec')\n+# Tuning in INC on whole data set takes too long time so we take only part of the whole data set\n+# as representative part of it:\n+dataset = dataset.take(num_calib_batches * batch_size)\n+transformer = transforms.Compose([transforms.Resize(256),\n+ transforms.CenterCrop(224),\n+ transforms.ToTensor(),\n+ transforms.Normalize(mean=rgb_mean, std=rgb_std)])\n+# Note: as input data are used many times during tuning it is better to prepared data earlier,\n+# so lazy parameter for transform_first is set to False", + "comment_created_at": "2022-08-25T12:13:13+00:00", + "comment_author": "bartekkuncer", + "comment_body": "```suggestion\r\n# Note: as input data is used many times during tuning it is better to have it prepared earlier.\r\n# Therefore, lazy parameter for transform_first is set to False.\r\n```", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/mxnet-pre-compute-reused-data.md b/_reviewers/mxnet-pre-compute-reused-data.md index 3813b85..3764de0 100644 --- a/_reviewers/mxnet-pre-compute-reused-data.md +++ b/_reviewers/mxnet-pre-compute-reused-data.md @@ -30,51 +30,3 @@ data_loader = DataLoader(dataset.transform_first(transformer, lazy=False)) ``` By preparing data once rather than on-demand, you reduce computational overhead and improve overall execution speed, especially in performance-critical sections of code. - - -[ - { - "discussion_id": "954886448", - "pr_number": 21127, - "pr_file": "example/quantization_inc/resnet_measurment.py", - "created_at": "2022-08-25T12:10:41+00:00", - "commented_code": "# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements. See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership. The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License. You may obtain a copy of the License at\n#\n# http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied. See the License for the\n# specific language governing permissions and limitations\n# under the License.\n\nimport mxnet as mx\nfrom mxnet.gluon.data.vision import transforms\nimport time\nimport glob\n\n\ndef test_accuracy(net, data_loader, description):\n count = 0\n acc_top1 = mx.gluon.metric.Accuracy()\n acc_top5 = mx.gluon.metric.TopKAccuracy(5)\n start = time.time()\n for x, label in data_loader:\n output = net(x)\n acc_top1.update(label, output)\n acc_top5.update(label, output)\n count += 1\n time_spend = time.time() - start\n _, top1 = acc_top1.get()\n _, top5 = acc_top5.get()\n print('{:21} Top1 Accuracy: {:.4f} Top5 Accuracy: {:.4f} from {:4} batches in {:8.2f}s'\n .format(description, top1, top5, count, time_spend))\n\n# Preparing input data\nrgb_mean = (0.485, 0.456, 0.406)\nrgb_std = (0.229, 0.224, 0.225)\nbatch_size = 64\n\nstart = time.time()\n# Set below proper path to ImageNet data set\ndataset = mx.gluon.data.vision.ImageRecordDataset('../imagenet/rec/val.rec')\ntransformer = transforms.Compose([transforms.Resize(256),\n transforms.CenterCrop(224),\n transforms.ToTensor(),\n transforms.Normalize(mean=rgb_mean, std=rgb_std)])\n# Note: As the input data are used many times it is better to prepared data once,\n# so lazy parameter for transform_first is set to False", - "repo_full_name": "apache/mxnet", - "discussion_comments": [ - { - "comment_id": "954886448", - "repo_full_name": "apache/mxnet", - "pr_number": 21127, - "pr_file": "example/quantization_inc/resnet_measurment.py", - "discussion_id": "954886448", - "commented_code": "@@ -0,0 +1,68 @@\n+# Licensed to the Apache Software Foundation (ASF) under one\n+# or more contributor license agreements. See the NOTICE file\n+# distributed with this work for additional information\n+# regarding copyright ownership. The ASF licenses this file\n+# to you under the Apache License, Version 2.0 (the\n+# \"License\"); you may not use this file except in compliance\n+# with the License. You may obtain a copy of the License at\n+#\n+# http://www.apache.org/licenses/LICENSE-2.0\n+#\n+# Unless required by applicable law or agreed to in writing,\n+# software distributed under the License is distributed on an\n+# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n+# KIND, either express or implied. See the License for the\n+# specific language governing permissions and limitations\n+# under the License.\n+\n+import mxnet as mx\n+from mxnet.gluon.data.vision import transforms\n+import time\n+import glob\n+\n+\n+def test_accuracy(net, data_loader, description):\n+ count = 0\n+ acc_top1 = mx.gluon.metric.Accuracy()\n+ acc_top5 = mx.gluon.metric.TopKAccuracy(5)\n+ start = time.time()\n+ for x, label in data_loader:\n+ output = net(x)\n+ acc_top1.update(label, output)\n+ acc_top5.update(label, output)\n+ count += 1\n+ time_spend = time.time() - start\n+ _, top1 = acc_top1.get()\n+ _, top5 = acc_top5.get()\n+ print('{:21} Top1 Accuracy: {:.4f} Top5 Accuracy: {:.4f} from {:4} batches in {:8.2f}s'\n+ .format(description, top1, top5, count, time_spend))\n+\n+# Preparing input data\n+rgb_mean = (0.485, 0.456, 0.406)\n+rgb_std = (0.229, 0.224, 0.225)\n+batch_size = 64\n+\n+start = time.time()\n+# Set below proper path to ImageNet data set\n+dataset = mx.gluon.data.vision.ImageRecordDataset('../imagenet/rec/val.rec')\n+transformer = transforms.Compose([transforms.Resize(256),\n+ transforms.CenterCrop(224),\n+ transforms.ToTensor(),\n+ transforms.Normalize(mean=rgb_mean, std=rgb_std)])\n+# Note: As the input data are used many times it is better to prepared data once,\n+# so lazy parameter for transform_first is set to False", - "comment_created_at": "2022-08-25T12:10:41+00:00", - "comment_author": "bartekkuncer", - "comment_body": "```suggestion\r\n# Note: as the input data is used many times it is better to prepare it once.\r\n# Therefore, lazy parameter for transform_first is set to False.\r\n```", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "954888778", - "pr_number": 21127, - "pr_file": "example/quantization_inc/resnet_mse.py", - "created_at": "2022-08-25T12:13:13+00:00", - "commented_code": "# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements. See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership. The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License. You may obtain a copy of the License at\n#\n# http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied. See the License for the\n# specific language governing permissions and limitations\n# under the License.\n\nimport mxnet as mx\nfrom mxnet.gluon.model_zoo.vision import resnet50_v2\nfrom mxnet.gluon.data.vision import transforms\nfrom mxnet.contrib.quantization import quantize_net\n\n# Preparing input data\nrgb_mean = (0.485, 0.456, 0.406)\nrgb_std = (0.229, 0.224, 0.225)\nbatch_size = 64\nnum_calib_batches = 9\n# Set below proper path to ImageNet data set\ndataset = mx.gluon.data.vision.ImageRecordDataset('../imagenet/rec/val.rec')\n# Tuning in INC on whole data set takes too long time so we take only part of the whole data set\n# as representative part of it:\ndataset = dataset.take(num_calib_batches * batch_size)\ntransformer = transforms.Compose([transforms.Resize(256),\n transforms.CenterCrop(224),\n transforms.ToTensor(),\n transforms.Normalize(mean=rgb_mean, std=rgb_std)])\n# Note: as input data are used many times during tuning it is better to prepared data earlier,\n# so lazy parameter for transform_first is set to False", - "repo_full_name": "apache/mxnet", - "discussion_comments": [ - { - "comment_id": "954888778", - "repo_full_name": "apache/mxnet", - "pr_number": 21127, - "pr_file": "example/quantization_inc/resnet_mse.py", - "discussion_id": "954888778", - "commented_code": "@@ -0,0 +1,65 @@\n+# Licensed to the Apache Software Foundation (ASF) under one\n+# or more contributor license agreements. See the NOTICE file\n+# distributed with this work for additional information\n+# regarding copyright ownership. The ASF licenses this file\n+# to you under the Apache License, Version 2.0 (the\n+# \"License\"); you may not use this file except in compliance\n+# with the License. You may obtain a copy of the License at\n+#\n+# http://www.apache.org/licenses/LICENSE-2.0\n+#\n+# Unless required by applicable law or agreed to in writing,\n+# software distributed under the License is distributed on an\n+# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n+# KIND, either express or implied. See the License for the\n+# specific language governing permissions and limitations\n+# under the License.\n+\n+import mxnet as mx\n+from mxnet.gluon.model_zoo.vision import resnet50_v2\n+from mxnet.gluon.data.vision import transforms\n+from mxnet.contrib.quantization import quantize_net\n+\n+# Preparing input data\n+rgb_mean = (0.485, 0.456, 0.406)\n+rgb_std = (0.229, 0.224, 0.225)\n+batch_size = 64\n+num_calib_batches = 9\n+# Set below proper path to ImageNet data set\n+dataset = mx.gluon.data.vision.ImageRecordDataset('../imagenet/rec/val.rec')\n+# Tuning in INC on whole data set takes too long time so we take only part of the whole data set\n+# as representative part of it:\n+dataset = dataset.take(num_calib_batches * batch_size)\n+transformer = transforms.Compose([transforms.Resize(256),\n+ transforms.CenterCrop(224),\n+ transforms.ToTensor(),\n+ transforms.Normalize(mean=rgb_mean, std=rgb_std)])\n+# Note: as input data are used many times during tuning it is better to prepared data earlier,\n+# so lazy parameter for transform_first is set to False", - "comment_created_at": "2022-08-25T12:13:13+00:00", - "comment_author": "bartekkuncer", - "comment_body": "```suggestion\r\n# Note: as input data is used many times during tuning it is better to have it prepared earlier.\r\n# Therefore, lazy parameter for transform_first is set to False.\r\n```", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/mxnet-simplify-for-readability.json b/_reviewers/mxnet-simplify-for-readability.json new file mode 100644 index 0000000..fcab6d9 --- /dev/null +++ b/_reviewers/mxnet-simplify-for-readability.json @@ -0,0 +1,252 @@ +[ + { + "discussion_id": "954763427", + "pr_number": 21132, + "pr_file": "src/operator/tensor/elemwise_binary_broadcast_op_basic.cc", + "created_at": "2022-08-25T09:54:33+00:00", + "commented_code": "const std::vector& inputs,\n const std::vector& req,\n const std::vector& outputs) {\n mxnet::TShape new_lshape, new_rshape, new_oshape;\n int ndim_diff = BinaryBroadcastShapeCompact(inputs[0].shape(),\n inputs[1].shape(),\n outputs[0].shape(),\n &new_lshape,\n &new_rshape,\n &new_oshape);\n std::vector new_inputs;\n std::vector new_outputs;\n if (ndim_diff) {\n new_inputs = {inputs[0].Reshape(new_lshape), inputs[1].Reshape(new_rshape)};\n new_outputs = {outputs[0].Reshape(new_oshape)};\n } else if (inputs[0].shape().Size() == 1 && inputs[1].shape().Size() == 1) {\n // BinaryBroadcastShapeCompact function doesn't reshape tensors of size (1,1,...,1)\n // into shape (1). It is mandatory for oneDNN primitive to have this reshape done.\n mxnet::TShape one_shape = mxnet::TShape(1, 1);\n new_inputs = {inputs[0].Reshape(one_shape), inputs[1].Reshape(one_shape)};\n new_outputs = {outputs[0].Reshape(one_shape)};\n // We can use more efficient sum kernel when there is no broadcast - when shapes are the same\n const bool same_shape = (inputs[0].shape() == inputs[1].shape());", + "repo_full_name": "apache/mxnet", + "discussion_comments": [ + { + "comment_id": "954763427", + "repo_full_name": "apache/mxnet", + "pr_number": 21132, + "pr_file": "src/operator/tensor/elemwise_binary_broadcast_op_basic.cc", + "discussion_id": "954763427", + "commented_code": "@@ -38,31 +39,39 @@ void DNNLBinaryOpForward(const nnvm::NodeAttrs& attrs,\n const std::vector& inputs,\n const std::vector& req,\n const std::vector& outputs) {\n- mxnet::TShape new_lshape, new_rshape, new_oshape;\n- int ndim_diff = BinaryBroadcastShapeCompact(inputs[0].shape(),\n- inputs[1].shape(),\n- outputs[0].shape(),\n- &new_lshape,\n- &new_rshape,\n- &new_oshape);\n- std::vector new_inputs;\n- std::vector new_outputs;\n- if (ndim_diff) {\n- new_inputs = {inputs[0].Reshape(new_lshape), inputs[1].Reshape(new_rshape)};\n- new_outputs = {outputs[0].Reshape(new_oshape)};\n- } else if (inputs[0].shape().Size() == 1 && inputs[1].shape().Size() == 1) {\n- // BinaryBroadcastShapeCompact function doesn't reshape tensors of size (1,1,...,1)\n- // into shape (1). It is mandatory for oneDNN primitive to have this reshape done.\n- mxnet::TShape one_shape = mxnet::TShape(1, 1);\n- new_inputs = {inputs[0].Reshape(one_shape), inputs[1].Reshape(one_shape)};\n- new_outputs = {outputs[0].Reshape(one_shape)};\n+ // We can use more efficient sum kernel when there is no broadcast - when shapes are the same\n+ const bool same_shape = (inputs[0].shape() == inputs[1].shape());", + "comment_created_at": "2022-08-25T09:54:33+00:00", + "comment_author": "bartekkuncer", + "comment_body": "```suggestion\r\n const bool same_shape = inputs[0].shape() == inputs[1].shape();\r\n```", + "pr_file_module": null + }, + { + "comment_id": "954778291", + "repo_full_name": "apache/mxnet", + "pr_number": 21132, + "pr_file": "src/operator/tensor/elemwise_binary_broadcast_op_basic.cc", + "discussion_id": "954763427", + "commented_code": "@@ -38,31 +39,39 @@ void DNNLBinaryOpForward(const nnvm::NodeAttrs& attrs,\n const std::vector& inputs,\n const std::vector& req,\n const std::vector& outputs) {\n- mxnet::TShape new_lshape, new_rshape, new_oshape;\n- int ndim_diff = BinaryBroadcastShapeCompact(inputs[0].shape(),\n- inputs[1].shape(),\n- outputs[0].shape(),\n- &new_lshape,\n- &new_rshape,\n- &new_oshape);\n- std::vector new_inputs;\n- std::vector new_outputs;\n- if (ndim_diff) {\n- new_inputs = {inputs[0].Reshape(new_lshape), inputs[1].Reshape(new_rshape)};\n- new_outputs = {outputs[0].Reshape(new_oshape)};\n- } else if (inputs[0].shape().Size() == 1 && inputs[1].shape().Size() == 1) {\n- // BinaryBroadcastShapeCompact function doesn't reshape tensors of size (1,1,...,1)\n- // into shape (1). It is mandatory for oneDNN primitive to have this reshape done.\n- mxnet::TShape one_shape = mxnet::TShape(1, 1);\n- new_inputs = {inputs[0].Reshape(one_shape), inputs[1].Reshape(one_shape)};\n- new_outputs = {outputs[0].Reshape(one_shape)};\n+ // We can use more efficient sum kernel when there is no broadcast - when shapes are the same\n+ const bool same_shape = (inputs[0].shape() == inputs[1].shape());", + "comment_created_at": "2022-08-25T10:10:31+00:00", + "comment_author": "bartekkuncer", + "comment_body": "Maybe put these shapes in variables as they are referred to multiple times?", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "941155133", + "pr_number": 21115, + "pr_file": "src/operator/subgraph/dnnl/dnnl_transformer_qk_common.h", + "created_at": "2022-08-09T10:10:26+00:00", + "commented_code": "/*\n * Licensed to the Apache Software Foundation (ASF) under one\n * or more contributor license agreements. See the NOTICE file\n * distributed with this work for additional information\n * regarding copyright ownership. The ASF licenses this file\n * to you under the Apache License, Version 2.0 (the\n * \"License\"); you may not use this file except in compliance\n * with the License. You may obtain a copy of the License at\n *\n * http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing,\n * software distributed under the License is distributed on an\n * \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n * KIND, either express or implied. See the License for the\n * specific language governing permissions and limitations\n * under the License.\n */\n\n#ifndef MXNET_OPERATOR_SUBGRAPH_DNNL_DNNL_TRANSFORMER_QK_COMMON_H_\n#define MXNET_OPERATOR_SUBGRAPH_DNNL_DNNL_TRANSFORMER_QK_COMMON_H_\n\n#if MXNET_USE_ONEDNN == 1\n\n#include \n#include \n\n#include \"operator/contrib/transformer-inl.h\"\n#include \"operator/numpy/np_matrix_op-inl.h\"\n#include \"operator/tensor/matrix_op-inl.h\"\n#include \"operator/subgraph/common.h\"\n#include \"dnnl_common.h\"\n#include \"dnnl_subgraph_base-inl.h\"\n#include \"dnnl_transformer-inl.h\"\n\nnamespace mxnet {\nnamespace op {\nnamespace qk_common {\n\nenum SelectStatusTransformerQK {\n kFail = 0,\n kStart,\n kFirstSwapAx,\n kSecondSwapAx,\n kFirstReshape,\n kSecondReshape,\n kSuccess\n};\n\n// /*\n// kStart ---> kFirstSwapAx ---> kSecondSwapAx ---> kFirstReshape ---> kSecondReshape ---> kSuccess\n// OR\n// kStart ---> kFirstSwapAx ---> kSecondSwapAx ---> kFirstReshape ---> kSuccess\n// Each status except kStart is connected with kFail\n// */\n\nenum mode { include_split = 0, without_split };\n\ninline bool CheckSwapAxisConditionsQK(const BiDirectedNode& input_node) {\n if (input_node.outputs.size() != 1)\n return false;\n return CheckSwapAxisConditions(*input_node.node);\n}\n\ninline bool CheckReshapeConditionsQK(const BiDirectedNode& input_node, const index_t out_index) {\n if (input_node.outputs.size() != 1)\n return false;\n return CheckReshapeConditions(*input_node.node, out_index);\n}\n\ninline bool CheckSplitConditions(const std::vector& matched_list,\n const BiDirectedNode& node) {\n const SplitParam& param = dmlc::get(node.node->attrs.parsed);\n\n if (param.axis != -1 || param.sections != 3 || param.squeeze_axis)\n return false;\n\n const auto first_reshape = (*(matched_list.end() - 2))->node;\n const auto second_reshape = (*(matched_list.end() - 1))->node;\n if (first_reshape->op() != Op::Get(\"_npx_reshape\") ||\n second_reshape->op() != Op::Get(\"_npx_reshape\")) {\n return false;\n }\n // 3 sections - ensure that every output is used only once\n if (node.outputs.size() == 3 && node.outputs.count(first_reshape) &&\n node.outputs.count(second_reshape)) {\n return true;\n }\n\n return false;\n}\n\ninline bool Select(SelectStatusTransformerQK* status,", + "repo_full_name": "apache/mxnet", + "discussion_comments": [ + { + "comment_id": "941155133", + "repo_full_name": "apache/mxnet", + "pr_number": 21115, + "pr_file": "src/operator/subgraph/dnnl/dnnl_transformer_qk_common.h", + "discussion_id": "941155133", + "commented_code": "@@ -0,0 +1,232 @@\n+/*\n+ * Licensed to the Apache Software Foundation (ASF) under one\n+ * or more contributor license agreements. See the NOTICE file\n+ * distributed with this work for additional information\n+ * regarding copyright ownership. The ASF licenses this file\n+ * to you under the Apache License, Version 2.0 (the\n+ * \"License\"); you may not use this file except in compliance\n+ * with the License. You may obtain a copy of the License at\n+ *\n+ * http://www.apache.org/licenses/LICENSE-2.0\n+ *\n+ * Unless required by applicable law or agreed to in writing,\n+ * software distributed under the License is distributed on an\n+ * \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n+ * KIND, either express or implied. See the License for the\n+ * specific language governing permissions and limitations\n+ * under the License.\n+ */\n+\n+#ifndef MXNET_OPERATOR_SUBGRAPH_DNNL_DNNL_TRANSFORMER_QK_COMMON_H_\n+#define MXNET_OPERATOR_SUBGRAPH_DNNL_DNNL_TRANSFORMER_QK_COMMON_H_\n+\n+#if MXNET_USE_ONEDNN == 1\n+\n+#include \n+#include \n+\n+#include \"operator/contrib/transformer-inl.h\"\n+#include \"operator/numpy/np_matrix_op-inl.h\"\n+#include \"operator/tensor/matrix_op-inl.h\"\n+#include \"operator/subgraph/common.h\"\n+#include \"dnnl_common.h\"\n+#include \"dnnl_subgraph_base-inl.h\"\n+#include \"dnnl_transformer-inl.h\"\n+\n+namespace mxnet {\n+namespace op {\n+namespace qk_common {\n+\n+enum SelectStatusTransformerQK {\n+ kFail = 0,\n+ kStart,\n+ kFirstSwapAx,\n+ kSecondSwapAx,\n+ kFirstReshape,\n+ kSecondReshape,\n+ kSuccess\n+};\n+\n+// /*\n+// kStart ---> kFirstSwapAx ---> kSecondSwapAx ---> kFirstReshape ---> kSecondReshape ---> kSuccess\n+// OR\n+// kStart ---> kFirstSwapAx ---> kSecondSwapAx ---> kFirstReshape ---> kSuccess\n+// Each status except kStart is connected with kFail\n+// */\n+\n+enum mode { include_split = 0, without_split };\n+\n+inline bool CheckSwapAxisConditionsQK(const BiDirectedNode& input_node) {\n+ if (input_node.outputs.size() != 1)\n+ return false;\n+ return CheckSwapAxisConditions(*input_node.node);\n+}\n+\n+inline bool CheckReshapeConditionsQK(const BiDirectedNode& input_node, const index_t out_index) {\n+ if (input_node.outputs.size() != 1)\n+ return false;\n+ return CheckReshapeConditions(*input_node.node, out_index);\n+}\n+\n+inline bool CheckSplitConditions(const std::vector& matched_list,\n+ const BiDirectedNode& node) {\n+ const SplitParam& param = dmlc::get(node.node->attrs.parsed);\n+\n+ if (param.axis != -1 || param.sections != 3 || param.squeeze_axis)\n+ return false;\n+\n+ const auto first_reshape = (*(matched_list.end() - 2))->node;\n+ const auto second_reshape = (*(matched_list.end() - 1))->node;\n+ if (first_reshape->op() != Op::Get(\"_npx_reshape\") ||\n+ second_reshape->op() != Op::Get(\"_npx_reshape\")) {\n+ return false;\n+ }\n+ // 3 sections - ensure that every output is used only once\n+ if (node.outputs.size() == 3 && node.outputs.count(first_reshape) &&\n+ node.outputs.count(second_reshape)) {\n+ return true;\n+ }\n+\n+ return false;\n+}\n+\n+inline bool Select(SelectStatusTransformerQK* status,", + "comment_created_at": "2022-08-09T10:10:26+00:00", + "comment_author": "bartekkuncer", + "comment_body": "Why pointers instead of references?", + "pr_file_module": null + }, + { + "comment_id": "942626592", + "repo_full_name": "apache/mxnet", + "pr_number": 21115, + "pr_file": "src/operator/subgraph/dnnl/dnnl_transformer_qk_common.h", + "discussion_id": "941155133", + "commented_code": "@@ -0,0 +1,232 @@\n+/*\n+ * Licensed to the Apache Software Foundation (ASF) under one\n+ * or more contributor license agreements. See the NOTICE file\n+ * distributed with this work for additional information\n+ * regarding copyright ownership. The ASF licenses this file\n+ * to you under the Apache License, Version 2.0 (the\n+ * \"License\"); you may not use this file except in compliance\n+ * with the License. You may obtain a copy of the License at\n+ *\n+ * http://www.apache.org/licenses/LICENSE-2.0\n+ *\n+ * Unless required by applicable law or agreed to in writing,\n+ * software distributed under the License is distributed on an\n+ * \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n+ * KIND, either express or implied. See the License for the\n+ * specific language governing permissions and limitations\n+ * under the License.\n+ */\n+\n+#ifndef MXNET_OPERATOR_SUBGRAPH_DNNL_DNNL_TRANSFORMER_QK_COMMON_H_\n+#define MXNET_OPERATOR_SUBGRAPH_DNNL_DNNL_TRANSFORMER_QK_COMMON_H_\n+\n+#if MXNET_USE_ONEDNN == 1\n+\n+#include \n+#include \n+\n+#include \"operator/contrib/transformer-inl.h\"\n+#include \"operator/numpy/np_matrix_op-inl.h\"\n+#include \"operator/tensor/matrix_op-inl.h\"\n+#include \"operator/subgraph/common.h\"\n+#include \"dnnl_common.h\"\n+#include \"dnnl_subgraph_base-inl.h\"\n+#include \"dnnl_transformer-inl.h\"\n+\n+namespace mxnet {\n+namespace op {\n+namespace qk_common {\n+\n+enum SelectStatusTransformerQK {\n+ kFail = 0,\n+ kStart,\n+ kFirstSwapAx,\n+ kSecondSwapAx,\n+ kFirstReshape,\n+ kSecondReshape,\n+ kSuccess\n+};\n+\n+// /*\n+// kStart ---> kFirstSwapAx ---> kSecondSwapAx ---> kFirstReshape ---> kSecondReshape ---> kSuccess\n+// OR\n+// kStart ---> kFirstSwapAx ---> kSecondSwapAx ---> kFirstReshape ---> kSuccess\n+// Each status except kStart is connected with kFail\n+// */\n+\n+enum mode { include_split = 0, without_split };\n+\n+inline bool CheckSwapAxisConditionsQK(const BiDirectedNode& input_node) {\n+ if (input_node.outputs.size() != 1)\n+ return false;\n+ return CheckSwapAxisConditions(*input_node.node);\n+}\n+\n+inline bool CheckReshapeConditionsQK(const BiDirectedNode& input_node, const index_t out_index) {\n+ if (input_node.outputs.size() != 1)\n+ return false;\n+ return CheckReshapeConditions(*input_node.node, out_index);\n+}\n+\n+inline bool CheckSplitConditions(const std::vector& matched_list,\n+ const BiDirectedNode& node) {\n+ const SplitParam& param = dmlc::get(node.node->attrs.parsed);\n+\n+ if (param.axis != -1 || param.sections != 3 || param.squeeze_axis)\n+ return false;\n+\n+ const auto first_reshape = (*(matched_list.end() - 2))->node;\n+ const auto second_reshape = (*(matched_list.end() - 1))->node;\n+ if (first_reshape->op() != Op::Get(\"_npx_reshape\") ||\n+ second_reshape->op() != Op::Get(\"_npx_reshape\")) {\n+ return false;\n+ }\n+ // 3 sections - ensure that every output is used only once\n+ if (node.outputs.size() == 3 && node.outputs.count(first_reshape) &&\n+ node.outputs.count(second_reshape)) {\n+ return true;\n+ }\n+\n+ return false;\n+}\n+\n+inline bool Select(SelectStatusTransformerQK* status,", + "comment_created_at": "2022-08-10T15:55:42+00:00", + "comment_author": "agrabows", + "comment_body": "using references causes error:\r\nsrc/operator/subgraph/dnnl/dnnl_transformer_qk_common.h:93: Is this a non-const reference? If so, make const or use a pointer: SelectStatusTransformerQK& status [runtime/references] [2]", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "904833817", + "pr_number": 21077, + "pr_file": "src/operator/subgraph/dnnl/dnnl_fc_sum_fuse_property.h", + "created_at": "2022-06-23T10:00:41+00:00", + "commented_code": "bool SelectOutput(const BiDirectedNode& cur_node, const BiDirectedNode& output_node) override {\n const auto cur_n = cur_node.node;\n const auto output_n = output_node.node;\n if (status_ == kFail || status_ == kSuccess || output_n->is_variable()) {\n if (status_ != kStart || output_n->is_variable()) {", + "repo_full_name": "apache/mxnet", + "discussion_comments": [ + { + "comment_id": "904833817", + "repo_full_name": "apache/mxnet", + "pr_number": 21077, + "pr_file": "src/operator/subgraph/dnnl/dnnl_fc_sum_fuse_property.h", + "discussion_id": "904833817", + "commented_code": "@@ -95,42 +90,27 @@ class SgDNNLFCSumFuseSelector : public SubgraphSelectorV2 {\n bool SelectOutput(const BiDirectedNode& cur_node, const BiDirectedNode& output_node) override {\n const auto cur_n = cur_node.node;\n const auto output_n = output_node.node;\n- if (status_ == kFail || status_ == kSuccess || output_n->is_variable()) {\n+ if (status_ != kStart || output_n->is_variable()) {", + "comment_created_at": "2022-06-23T10:00:41+00:00", + "comment_author": "anko-intel", + "comment_body": "The same state machine process was done on other operators. Evee it is not fully utilized here yet, I think it is better to do it in the common way. (status_ == kFail || status_ == kSuccess ||)", + "pr_file_module": null + }, + { + "comment_id": "904894874", + "repo_full_name": "apache/mxnet", + "pr_number": 21077, + "pr_file": "src/operator/subgraph/dnnl/dnnl_fc_sum_fuse_property.h", + "discussion_id": "904833817", + "commented_code": "@@ -95,42 +90,27 @@ class SgDNNLFCSumFuseSelector : public SubgraphSelectorV2 {\n bool SelectOutput(const BiDirectedNode& cur_node, const BiDirectedNode& output_node) override {\n const auto cur_n = cur_node.node;\n const auto output_n = output_node.node;\n- if (status_ == kFail || status_ == kSuccess || output_n->is_variable()) {\n+ if (status_ != kStart || output_n->is_variable()) {", + "comment_created_at": "2022-06-23T11:13:20+00:00", + "comment_author": "bartekkuncer", + "comment_body": "In my opinion it is a basic change and there is no need to keep this condition any longer than it needs to be especially that now it is way easier to read. ", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "906009360", + "pr_number": 20976, + "pr_file": "src/operator/subgraph/dnnl/dnnl_pow_mul_scalar.cc", + "created_at": "2022-06-24T12:20:50+00:00", + "commented_code": "/*\n * Licensed to the Apache Software Foundation (ASF) under one\n * or more contributor license agreements. See the NOTICE file\n * distributed with this work for additional information\n * regarding copyright ownership. The ASF licenses this file\n * to you under the Apache License, Version 2.0 (the\n * \"License\"); you may not use this file except in compliance\n * with the License. You may obtain a copy of the License at\n *\n * http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing,\n * software distributed under the License is distributed on an\n * \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n * KIND, either express or implied. See the License for the\n * specific language governing permissions and limitations\n * under the License.\n */\n\n/*!\n * \\file dnnl_pow_mul_scalar.cc\n * \\brief DNNL pow_mul_scalar operator based on subgraph\n */\n\n#if MXNET_USE_ONEDNN == 1\n\n#include \n#include \n#include \n\n#include \"operator/nn/dnnl/dnnl_base-inl.h\"\n#include \"operator/nn/dnnl/dnnl_pow_mul_scalar-inl.h\"\n#include \"operator/subgraph/common.h\"\n\nnamespace mxnet {\nnamespace op {\nbool DNNLPowMulScalarType(const nnvm::NodeAttrs& attrs,\n std::vector* in_attrs,\n std::vector* out_attrs) {\n CHECK_EQ(in_attrs->size(), 1U);\n CHECK_EQ(out_attrs->size(), 1U);\n const DNNLPowMulScalarParam& param = nnvm::get(attrs.parsed);\n bool scalar_is_int = param.exp_is_int && param.mul_is_int;\n if (common::is_int(in_attrs->at(0)) && !scalar_is_int) {\n TYPE_ASSIGN_CHECK(*out_attrs, 0, mshadow::kFloat64);\n } else if (in_attrs->at(0) == mshadow::kBool) {\n TYPE_ASSIGN_CHECK(*out_attrs, 0, scalar_is_int ? mshadow::kInt64 : mshadow::kFloat64);\n } else {\n TYPE_ASSIGN_CHECK(*out_attrs, 0, in_attrs->at(0));\n }\n return out_attrs->at(0) != -1;\n}\n\ninline static bool DNNLPowMulScalarStorageType(const nnvm::NodeAttrs& attrs,\n const int dev_mask,\n DispatchMode* dispatch_mode,\n std::vector* in_attrs,\n std::vector* out_attrs) {\n return DNNLStorageType(attrs, dev_mask, true, dispatch_mode, in_attrs, out_attrs);\n}\n\ntemplate \nstatic void ComputeOP(const nnvm::NodeAttrs& attrs,\n const OpContext& ctx,\n mshadow::Stream* s,\n const TBlob& input,\n const TBlob& output,\n const double scalar) {\n using namespace mshadow;\n using namespace mshadow::expr;\n MSHADOW_TYPE_SWITCH(output.type_flag_, DType, {\n auto temp_req = input.dptr_ == output.dptr_ ? kWriteInplace : kWriteTo;\n TBlob temp_tblob = input;\n if (input.type_flag_ != output.type_flag_) {\n temp_tblob = TBlob(ctx.requested[0].get_space_typed(Shape1(output.Size()), s));\n CastCompute(attrs, ctx, {input}, {kWriteTo}, {temp_tblob});\n }\n MXNET_ASSIGN_REQ_SWITCH(temp_req, Req, {\n mxnet_op::Kernel, cpu>::Launch(\n s, input.Size(), output.dptr(), temp_tblob.dptr(), DType(scalar));\n });\n });\n}\n\nstatic void PowMulScalarCompute(const nnvm::NodeAttrs& attrs,\n const OpContext& ctx,\n const std::vector& inputs,\n const std::vector& req,\n const std::vector& outputs) {\n mshadow::Stream* s = ctx.get_stream();\n DCHECK_EQ(inputs.size(), 1);\n DCHECK_EQ(outputs.size(), 1);\n using namespace mshadow;\n using namespace mshadow::expr;\n const DNNLPowMulScalarParam& param = nnvm::get(attrs.parsed);\n // temp_mid_tblob is output of power operation and input of multiplication.\n // Its dtype depends on input dtype and scalar type.\n TBlob temp_mid_tblob =\n ((common::is_int(inputs[0].type_flag_) || inputs[0].type_flag_ == kBool) &&\n !param.exp_is_int) ?\n outputs[0] :\n inputs[0].type_flag_ == kBool ?\n TBlob(ctx.requested[0].get_space_typed(Shape1(inputs[0].Size()), s)) :\n inputs[0];", + "repo_full_name": "apache/mxnet", + "discussion_comments": [ + { + "comment_id": "906009360", + "repo_full_name": "apache/mxnet", + "pr_number": 20976, + "pr_file": "src/operator/subgraph/dnnl/dnnl_pow_mul_scalar.cc", + "discussion_id": "906009360", + "commented_code": "@@ -0,0 +1,151 @@\n+/*\n+ * Licensed to the Apache Software Foundation (ASF) under one\n+ * or more contributor license agreements. See the NOTICE file\n+ * distributed with this work for additional information\n+ * regarding copyright ownership. The ASF licenses this file\n+ * to you under the Apache License, Version 2.0 (the\n+ * \"License\"); you may not use this file except in compliance\n+ * with the License. You may obtain a copy of the License at\n+ *\n+ * http://www.apache.org/licenses/LICENSE-2.0\n+ *\n+ * Unless required by applicable law or agreed to in writing,\n+ * software distributed under the License is distributed on an\n+ * \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n+ * KIND, either express or implied. See the License for the\n+ * specific language governing permissions and limitations\n+ * under the License.\n+ */\n+\n+/*!\n+ * \\file dnnl_pow_mul_scalar.cc\n+ * \\brief DNNL pow_mul_scalar operator based on subgraph\n+ */\n+\n+#if MXNET_USE_ONEDNN == 1\n+\n+#include \n+#include \n+#include \n+\n+#include \"operator/nn/dnnl/dnnl_base-inl.h\"\n+#include \"operator/nn/dnnl/dnnl_pow_mul_scalar-inl.h\"\n+#include \"operator/subgraph/common.h\"\n+\n+namespace mxnet {\n+namespace op {\n+bool DNNLPowMulScalarType(const nnvm::NodeAttrs& attrs,\n+ std::vector* in_attrs,\n+ std::vector* out_attrs) {\n+ CHECK_EQ(in_attrs->size(), 1U);\n+ CHECK_EQ(out_attrs->size(), 1U);\n+ const DNNLPowMulScalarParam& param = nnvm::get(attrs.parsed);\n+ bool scalar_is_int = param.exp_is_int && param.mul_is_int;\n+ if (common::is_int(in_attrs->at(0)) && !scalar_is_int) {\n+ TYPE_ASSIGN_CHECK(*out_attrs, 0, mshadow::kFloat64);\n+ } else if (in_attrs->at(0) == mshadow::kBool) {\n+ TYPE_ASSIGN_CHECK(*out_attrs, 0, scalar_is_int ? mshadow::kInt64 : mshadow::kFloat64);\n+ } else {\n+ TYPE_ASSIGN_CHECK(*out_attrs, 0, in_attrs->at(0));\n+ }\n+ return out_attrs->at(0) != -1;\n+}\n+\n+inline static bool DNNLPowMulScalarStorageType(const nnvm::NodeAttrs& attrs,\n+ const int dev_mask,\n+ DispatchMode* dispatch_mode,\n+ std::vector* in_attrs,\n+ std::vector* out_attrs) {\n+ return DNNLStorageType(attrs, dev_mask, true, dispatch_mode, in_attrs, out_attrs);\n+}\n+\n+template \n+static void ComputeOP(const nnvm::NodeAttrs& attrs,\n+ const OpContext& ctx,\n+ mshadow::Stream* s,\n+ const TBlob& input,\n+ const TBlob& output,\n+ const double scalar) {\n+ using namespace mshadow;\n+ using namespace mshadow::expr;\n+ MSHADOW_TYPE_SWITCH(output.type_flag_, DType, {\n+ auto temp_req = input.dptr_ == output.dptr_ ? kWriteInplace : kWriteTo;\n+ TBlob temp_tblob = input;\n+ if (input.type_flag_ != output.type_flag_) {\n+ temp_tblob = TBlob(ctx.requested[0].get_space_typed(Shape1(output.Size()), s));\n+ CastCompute(attrs, ctx, {input}, {kWriteTo}, {temp_tblob});\n+ }\n+ MXNET_ASSIGN_REQ_SWITCH(temp_req, Req, {\n+ mxnet_op::Kernel, cpu>::Launch(\n+ s, input.Size(), output.dptr(), temp_tblob.dptr(), DType(scalar));\n+ });\n+ });\n+}\n+\n+static void PowMulScalarCompute(const nnvm::NodeAttrs& attrs,\n+ const OpContext& ctx,\n+ const std::vector& inputs,\n+ const std::vector& req,\n+ const std::vector& outputs) {\n+ mshadow::Stream* s = ctx.get_stream();\n+ DCHECK_EQ(inputs.size(), 1);\n+ DCHECK_EQ(outputs.size(), 1);\n+ using namespace mshadow;\n+ using namespace mshadow::expr;\n+ const DNNLPowMulScalarParam& param = nnvm::get(attrs.parsed);\n+ // temp_mid_tblob is output of power operation and input of multiplication.\n+ // Its dtype depends on input dtype and scalar type.\n+ TBlob temp_mid_tblob =\n+ ((common::is_int(inputs[0].type_flag_) || inputs[0].type_flag_ == kBool) &&\n+ !param.exp_is_int) ?\n+ outputs[0] :\n+ inputs[0].type_flag_ == kBool ?\n+ TBlob(ctx.requested[0].get_space_typed(Shape1(inputs[0].Size()), s)) :\n+ inputs[0];", + "comment_created_at": "2022-06-24T12:20:50+00:00", + "comment_author": "bgawrych", + "comment_body": "please use regular if else statements, it is not readable at all", + "pr_file_module": null + }, + { + "comment_id": "906030146", + "repo_full_name": "apache/mxnet", + "pr_number": 20976, + "pr_file": "src/operator/subgraph/dnnl/dnnl_pow_mul_scalar.cc", + "discussion_id": "906009360", + "commented_code": "@@ -0,0 +1,151 @@\n+/*\n+ * Licensed to the Apache Software Foundation (ASF) under one\n+ * or more contributor license agreements. See the NOTICE file\n+ * distributed with this work for additional information\n+ * regarding copyright ownership. The ASF licenses this file\n+ * to you under the Apache License, Version 2.0 (the\n+ * \"License\"); you may not use this file except in compliance\n+ * with the License. You may obtain a copy of the License at\n+ *\n+ * http://www.apache.org/licenses/LICENSE-2.0\n+ *\n+ * Unless required by applicable law or agreed to in writing,\n+ * software distributed under the License is distributed on an\n+ * \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n+ * KIND, either express or implied. See the License for the\n+ * specific language governing permissions and limitations\n+ * under the License.\n+ */\n+\n+/*!\n+ * \\file dnnl_pow_mul_scalar.cc\n+ * \\brief DNNL pow_mul_scalar operator based on subgraph\n+ */\n+\n+#if MXNET_USE_ONEDNN == 1\n+\n+#include \n+#include \n+#include \n+\n+#include \"operator/nn/dnnl/dnnl_base-inl.h\"\n+#include \"operator/nn/dnnl/dnnl_pow_mul_scalar-inl.h\"\n+#include \"operator/subgraph/common.h\"\n+\n+namespace mxnet {\n+namespace op {\n+bool DNNLPowMulScalarType(const nnvm::NodeAttrs& attrs,\n+ std::vector* in_attrs,\n+ std::vector* out_attrs) {\n+ CHECK_EQ(in_attrs->size(), 1U);\n+ CHECK_EQ(out_attrs->size(), 1U);\n+ const DNNLPowMulScalarParam& param = nnvm::get(attrs.parsed);\n+ bool scalar_is_int = param.exp_is_int && param.mul_is_int;\n+ if (common::is_int(in_attrs->at(0)) && !scalar_is_int) {\n+ TYPE_ASSIGN_CHECK(*out_attrs, 0, mshadow::kFloat64);\n+ } else if (in_attrs->at(0) == mshadow::kBool) {\n+ TYPE_ASSIGN_CHECK(*out_attrs, 0, scalar_is_int ? mshadow::kInt64 : mshadow::kFloat64);\n+ } else {\n+ TYPE_ASSIGN_CHECK(*out_attrs, 0, in_attrs->at(0));\n+ }\n+ return out_attrs->at(0) != -1;\n+}\n+\n+inline static bool DNNLPowMulScalarStorageType(const nnvm::NodeAttrs& attrs,\n+ const int dev_mask,\n+ DispatchMode* dispatch_mode,\n+ std::vector* in_attrs,\n+ std::vector* out_attrs) {\n+ return DNNLStorageType(attrs, dev_mask, true, dispatch_mode, in_attrs, out_attrs);\n+}\n+\n+template \n+static void ComputeOP(const nnvm::NodeAttrs& attrs,\n+ const OpContext& ctx,\n+ mshadow::Stream* s,\n+ const TBlob& input,\n+ const TBlob& output,\n+ const double scalar) {\n+ using namespace mshadow;\n+ using namespace mshadow::expr;\n+ MSHADOW_TYPE_SWITCH(output.type_flag_, DType, {\n+ auto temp_req = input.dptr_ == output.dptr_ ? kWriteInplace : kWriteTo;\n+ TBlob temp_tblob = input;\n+ if (input.type_flag_ != output.type_flag_) {\n+ temp_tblob = TBlob(ctx.requested[0].get_space_typed(Shape1(output.Size()), s));\n+ CastCompute(attrs, ctx, {input}, {kWriteTo}, {temp_tblob});\n+ }\n+ MXNET_ASSIGN_REQ_SWITCH(temp_req, Req, {\n+ mxnet_op::Kernel, cpu>::Launch(\n+ s, input.Size(), output.dptr(), temp_tblob.dptr(), DType(scalar));\n+ });\n+ });\n+}\n+\n+static void PowMulScalarCompute(const nnvm::NodeAttrs& attrs,\n+ const OpContext& ctx,\n+ const std::vector& inputs,\n+ const std::vector& req,\n+ const std::vector& outputs) {\n+ mshadow::Stream* s = ctx.get_stream();\n+ DCHECK_EQ(inputs.size(), 1);\n+ DCHECK_EQ(outputs.size(), 1);\n+ using namespace mshadow;\n+ using namespace mshadow::expr;\n+ const DNNLPowMulScalarParam& param = nnvm::get(attrs.parsed);\n+ // temp_mid_tblob is output of power operation and input of multiplication.\n+ // Its dtype depends on input dtype and scalar type.\n+ TBlob temp_mid_tblob =\n+ ((common::is_int(inputs[0].type_flag_) || inputs[0].type_flag_ == kBool) &&\n+ !param.exp_is_int) ?\n+ outputs[0] :\n+ inputs[0].type_flag_ == kBool ?\n+ TBlob(ctx.requested[0].get_space_typed(Shape1(inputs[0].Size()), s)) :\n+ inputs[0];", + "comment_created_at": "2022-06-24T12:49:21+00:00", + "comment_author": "bgawrych", + "comment_body": "also are you sure that inputs[0] should be here as temp_mid_tblob?", + "pr_file_module": null + }, + { + "comment_id": "906108836", + "repo_full_name": "apache/mxnet", + "pr_number": 20976, + "pr_file": "src/operator/subgraph/dnnl/dnnl_pow_mul_scalar.cc", + "discussion_id": "906009360", + "commented_code": "@@ -0,0 +1,151 @@\n+/*\n+ * Licensed to the Apache Software Foundation (ASF) under one\n+ * or more contributor license agreements. See the NOTICE file\n+ * distributed with this work for additional information\n+ * regarding copyright ownership. The ASF licenses this file\n+ * to you under the Apache License, Version 2.0 (the\n+ * \"License\"); you may not use this file except in compliance\n+ * with the License. You may obtain a copy of the License at\n+ *\n+ * http://www.apache.org/licenses/LICENSE-2.0\n+ *\n+ * Unless required by applicable law or agreed to in writing,\n+ * software distributed under the License is distributed on an\n+ * \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n+ * KIND, either express or implied. See the License for the\n+ * specific language governing permissions and limitations\n+ * under the License.\n+ */\n+\n+/*!\n+ * \\file dnnl_pow_mul_scalar.cc\n+ * \\brief DNNL pow_mul_scalar operator based on subgraph\n+ */\n+\n+#if MXNET_USE_ONEDNN == 1\n+\n+#include \n+#include \n+#include \n+\n+#include \"operator/nn/dnnl/dnnl_base-inl.h\"\n+#include \"operator/nn/dnnl/dnnl_pow_mul_scalar-inl.h\"\n+#include \"operator/subgraph/common.h\"\n+\n+namespace mxnet {\n+namespace op {\n+bool DNNLPowMulScalarType(const nnvm::NodeAttrs& attrs,\n+ std::vector* in_attrs,\n+ std::vector* out_attrs) {\n+ CHECK_EQ(in_attrs->size(), 1U);\n+ CHECK_EQ(out_attrs->size(), 1U);\n+ const DNNLPowMulScalarParam& param = nnvm::get(attrs.parsed);\n+ bool scalar_is_int = param.exp_is_int && param.mul_is_int;\n+ if (common::is_int(in_attrs->at(0)) && !scalar_is_int) {\n+ TYPE_ASSIGN_CHECK(*out_attrs, 0, mshadow::kFloat64);\n+ } else if (in_attrs->at(0) == mshadow::kBool) {\n+ TYPE_ASSIGN_CHECK(*out_attrs, 0, scalar_is_int ? mshadow::kInt64 : mshadow::kFloat64);\n+ } else {\n+ TYPE_ASSIGN_CHECK(*out_attrs, 0, in_attrs->at(0));\n+ }\n+ return out_attrs->at(0) != -1;\n+}\n+\n+inline static bool DNNLPowMulScalarStorageType(const nnvm::NodeAttrs& attrs,\n+ const int dev_mask,\n+ DispatchMode* dispatch_mode,\n+ std::vector* in_attrs,\n+ std::vector* out_attrs) {\n+ return DNNLStorageType(attrs, dev_mask, true, dispatch_mode, in_attrs, out_attrs);\n+}\n+\n+template \n+static void ComputeOP(const nnvm::NodeAttrs& attrs,\n+ const OpContext& ctx,\n+ mshadow::Stream* s,\n+ const TBlob& input,\n+ const TBlob& output,\n+ const double scalar) {\n+ using namespace mshadow;\n+ using namespace mshadow::expr;\n+ MSHADOW_TYPE_SWITCH(output.type_flag_, DType, {\n+ auto temp_req = input.dptr_ == output.dptr_ ? kWriteInplace : kWriteTo;\n+ TBlob temp_tblob = input;\n+ if (input.type_flag_ != output.type_flag_) {\n+ temp_tblob = TBlob(ctx.requested[0].get_space_typed(Shape1(output.Size()), s));\n+ CastCompute(attrs, ctx, {input}, {kWriteTo}, {temp_tblob});\n+ }\n+ MXNET_ASSIGN_REQ_SWITCH(temp_req, Req, {\n+ mxnet_op::Kernel, cpu>::Launch(\n+ s, input.Size(), output.dptr(), temp_tblob.dptr(), DType(scalar));\n+ });\n+ });\n+}\n+\n+static void PowMulScalarCompute(const nnvm::NodeAttrs& attrs,\n+ const OpContext& ctx,\n+ const std::vector& inputs,\n+ const std::vector& req,\n+ const std::vector& outputs) {\n+ mshadow::Stream* s = ctx.get_stream();\n+ DCHECK_EQ(inputs.size(), 1);\n+ DCHECK_EQ(outputs.size(), 1);\n+ using namespace mshadow;\n+ using namespace mshadow::expr;\n+ const DNNLPowMulScalarParam& param = nnvm::get(attrs.parsed);\n+ // temp_mid_tblob is output of power operation and input of multiplication.\n+ // Its dtype depends on input dtype and scalar type.\n+ TBlob temp_mid_tblob =\n+ ((common::is_int(inputs[0].type_flag_) || inputs[0].type_flag_ == kBool) &&\n+ !param.exp_is_int) ?\n+ outputs[0] :\n+ inputs[0].type_flag_ == kBool ?\n+ TBlob(ctx.requested[0].get_space_typed(Shape1(inputs[0].Size()), s)) :\n+ inputs[0];", + "comment_created_at": "2022-06-24T14:14:58+00:00", + "comment_author": "bartekkuncer", + "comment_body": "To me there is no \"should\". If the parameters of the operation are right (output of the power op is of the same dtype as the input) I believe it can be used as a temporary buffer.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "903418937", + "pr_number": 21034, + "pr_file": "src/operator/mshadow_op.h", + "created_at": "2022-06-22T08:02:08+00:00", + "commented_code": "(1.0f + math::erf(static_cast(a) / SQRT_2))));\n\nMXNET_BINARY_MATH_OP_NC(gelu_grad,\n DType(0.5f * (1.0f + math::erf(static_cast(a) / SQRT_2) +\n static_cast(a) *\n erf_grad::Map(static_cast(a) / SQRT_2) / SQRT_2)));\n DType(static_cast(b) / static_cast(a) +\n 0.5f * static_cast(a) *\n erf_grad::Map(static_cast(a) / SQRT_2) / SQRT_2));\n\nMXNET_UNARY_MATH_OP(gelu_tanh,\n DType(0.5f * static_cast(a) *\n (1.0f + math::tanh(math::sqrt(2.0f / PI) *\n (static_cast(a) +\n 0.044715 * math::pow(static_cast(a), 3))))));\n\nMXNET_BINARY_MATH_OP_NC(\n gelu_tanh_grad,\n DType(static_cast(b) *\n (1.0f / static_cast(a) +\n (1.0f -\n math::tanh(math::sqrt(2.0f / PI) *\n (static_cast(a) + 0.044715 * math::pow(static_cast(a), 3))) *\n (math::sqrt(2.0f / PI) *\n (1.0f + 0.134145 * math::pow(static_cast(a), 2)))))));", + "repo_full_name": "apache/mxnet", + "discussion_comments": [ + { + "comment_id": "903418937", + "repo_full_name": "apache/mxnet", + "pr_number": 21034, + "pr_file": "src/operator/mshadow_op.h", + "discussion_id": "903418937", + "commented_code": "@@ -617,9 +617,25 @@ MXNET_UNARY_MATH_OP(gelu,\n (1.0f + math::erf(static_cast(a) / SQRT_2))));\n \n MXNET_BINARY_MATH_OP_NC(gelu_grad,\n- DType(0.5f * (1.0f + math::erf(static_cast(a) / SQRT_2) +\n- static_cast(a) *\n- erf_grad::Map(static_cast(a) / SQRT_2) / SQRT_2)));\n+ DType(static_cast(b) / static_cast(a) +\n+ 0.5f * static_cast(a) *\n+ erf_grad::Map(static_cast(a) / SQRT_2) / SQRT_2));\n+\n+MXNET_UNARY_MATH_OP(gelu_tanh,\n+ DType(0.5f * static_cast(a) *\n+ (1.0f + math::tanh(math::sqrt(2.0f / PI) *\n+ (static_cast(a) +\n+ 0.044715 * math::pow(static_cast(a), 3))))));\n+\n+MXNET_BINARY_MATH_OP_NC(\n+ gelu_tanh_grad,\n+ DType(static_cast(b) *\n+ (1.0f / static_cast(a) +\n+ (1.0f -\n+ math::tanh(math::sqrt(2.0f / PI) *\n+ (static_cast(a) + 0.044715 * math::pow(static_cast(a), 3))) *\n+ (math::sqrt(2.0f / PI) *\n+ (1.0f + 0.134145 * math::pow(static_cast(a), 2)))))));", + "comment_created_at": "2022-06-22T08:02:08+00:00", + "comment_author": "RafLit", + "comment_body": "It would be cleaner to define 0.044715 and 0.134145 as constants.", + "pr_file_module": null + }, + { + "comment_id": "904916624", + "repo_full_name": "apache/mxnet", + "pr_number": 21034, + "pr_file": "src/operator/mshadow_op.h", + "discussion_id": "903418937", + "commented_code": "@@ -617,9 +617,25 @@ MXNET_UNARY_MATH_OP(gelu,\n (1.0f + math::erf(static_cast(a) / SQRT_2))));\n \n MXNET_BINARY_MATH_OP_NC(gelu_grad,\n- DType(0.5f * (1.0f + math::erf(static_cast(a) / SQRT_2) +\n- static_cast(a) *\n- erf_grad::Map(static_cast(a) / SQRT_2) / SQRT_2)));\n+ DType(static_cast(b) / static_cast(a) +\n+ 0.5f * static_cast(a) *\n+ erf_grad::Map(static_cast(a) / SQRT_2) / SQRT_2));\n+\n+MXNET_UNARY_MATH_OP(gelu_tanh,\n+ DType(0.5f * static_cast(a) *\n+ (1.0f + math::tanh(math::sqrt(2.0f / PI) *\n+ (static_cast(a) +\n+ 0.044715 * math::pow(static_cast(a), 3))))));\n+\n+MXNET_BINARY_MATH_OP_NC(\n+ gelu_tanh_grad,\n+ DType(static_cast(b) *\n+ (1.0f / static_cast(a) +\n+ (1.0f -\n+ math::tanh(math::sqrt(2.0f / PI) *\n+ (static_cast(a) + 0.044715 * math::pow(static_cast(a), 3))) *\n+ (math::sqrt(2.0f / PI) *\n+ (1.0f + 0.134145 * math::pow(static_cast(a), 2)))))));", + "comment_created_at": "2022-06-23T11:40:54+00:00", + "comment_author": "bgawrych", + "comment_body": "done", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "746508919", + "pr_number": 20724, + "pr_file": "src/operator/subgraph/dnnl/dnnl_post_quantize_property.h", + "created_at": "2021-11-10T11:43:44+00:00", + "commented_code": "#define MXNET_OPERATOR_SUBGRAPH_DNNL_DNNL_POST_QUANTIZE_PROPERTY_H_\n#if MXNET_USE_ONEDNN == 1\n\n#include \n#include \n#include \n#include \n\n#include \"../../nn/dnnl/dnnl_convolution-inl.h\"\n#include \"../../nn/fully_connected-inl.h\"\n#include \"../../quantization/requantize-inl.h\"\n#include \"../../tensor/elemwise_binary_op-inl.h\"\n#include \"../common.h\"\n#include \"dnnl_conv-inl.h\"\n#include \"dnnl_subgraph_base-inl.h\"\n\nnamespace mxnet {\nnamespace op {\n\nclass SgDNNLPostQuantizeSelector : public SubgraphSelector {\nconst std::set support_req_fusion_op = {\"_contrib_quantized_elemwise_add\",\n \"_contrib_quantized_elemwise_mul\",\n \"_contrib_quantized_npi_add\",\n \"_sg_onednn_conv\",\n \"_sg_onednn_fully_connected\",\n \"_sg_onednn_selfatt_qk\",\n \"_sg_onednn_selfatt_valatt\",\n \"_sg_onednn_batch_dot\"};\n\nclass SgDNNLPostQuantizeSelector : public SubgraphSelectorV2 {\n public:\n /*! \\brief pattern match status */\n enum SelectStatus {\n kFail = 0,\n kStart,\n kRequantize,\n kSuccess,\n };\n\n private:\n bool disable_fuse_all;\n bool disable_float_output;\n SelectStatus status;\n std::vector matched_list;\n std::vector matched_list;\n std::set support_requantize_fusion_op_name;\n\n public:\n SgDNNLPostQuantizeSelector() {\n support_requantize_fusion_op_name.insert(\"_sg_onednn_conv\");\n support_requantize_fusion_op_name.insert(\"_contrib_quantized_elemwise_add\");\n support_requantize_fusion_op_name.insert(\"_contrib_quantized_npi_add\");\n explicit SgDNNLPostQuantizeSelector(const bool dis_fuse_all, const bool dis_float_output)\n : disable_fuse_all(dis_fuse_all), disable_float_output(dis_float_output) {\n support_requantize_fusion_op_name = support_req_fusion_op;\n }\n\n bool Select(const nnvm::Node& n) override {\n if (n.op() && support_requantize_fusion_op_name.count(n.op()->name)) {\n if (n.op() == Op::Get(\"_sg_onednn_conv\")) {\n auto const& param = nnvm::get(n.attrs.parsed);\n if (param.full_conv_param.dnnl_param.quantized) {\n status = kStart;\n matched_list.clear();\n matched_list.push_back(&n);\n return true;\n }\n } else if (n.op()->name == \"_contrib_quantized_elemwise_add\" ||\n n.op()->name == \"_contrib_quantized_npi_add\") {\n status = kStart;\n matched_list.clear();\n matched_list.push_back(&n);\n return true;\n }\n bool Select(const BiDirectedNode& n) override {\n const nnvm::Node* raw_node = n.node;\n if ((!disable_fuse_all) && raw_node->op() &&\n support_requantize_fusion_op_name.count(raw_node->op()->name)) {\n status = kStart;\n matched_list.clear();\n matched_list.push_back(&n);\n return true;\n }\n return false;\n }\n\n bool SelectInput(const nnvm::Node& n, const nnvm::Node& new_node) override {\n bool SelectInput(const BiDirectedNode& n, const BiDirectedNode& new_node) override {\n return false;\n }\n\n bool SelectOutput(const nnvm::Node& n, const nnvm::Node& new_node) override {\n if (status == kFail || status == kSuccess || new_node.is_variable())\n bool SelectOutput(const BiDirectedNode& n, const BiDirectedNode& new_node) override {\n const nnvm::Node* raw_node = n.node;\n const nnvm::Node* raw_new_node = new_node.node;\n if (status == kFail || status == kSuccess || raw_new_node->is_variable())\n return false;\n // If n isn't the last matched node, then we encoutered a internal\n // branch, we should pop out the node behind n and stop fusion.\n if (matched_list.back() != &n) {\n status = kFail;\n if (std::find(matched_list.begin(), matched_list.end(), &n) != matched_list.end()) {\n while (matched_list.back() != &n) {\n matched_list.pop_back();\n }\n }\n status = kSuccess;\n return false;\n }\n if (new_node.op()->name == \"_contrib_requantize\") {\n auto const& param = nnvm::get(new_node.attrs.parsed);\n if (param.min_calib_range.has_value() && param.max_calib_range.has_value()) {\n matched_list.push_back(&new_node);\n\n switch (status) {\n case kStart:\n if (raw_new_node->op() == Op::Get(\"_contrib_requantize\")) {\n auto const& param = nnvm::get(raw_new_node->attrs.parsed);\n if (param.min_calib_range.has_value() && param.max_calib_range.has_value()) {\n matched_list.push_back(&new_node);\n status = kRequantize;\n if (raw_node->op() == Op::Get(\"_sg_onednn_conv\")) {\n status = kSuccess;\n }\n return true;\n }\n }\n case kRequantize:\n if (!disable_float_output && raw_new_node->op() == Op::Get(\"_contrib_dequantize\")) {\n CHECK(raw_node->op() == Op::Get(\"_contrib_requantize\"));\n if (n.outputs.size() > 1) {\n // check if requantize have other outputs than dequantize\n // if it has we can't fuse dequantize\n for (auto kv : n.outputs) {", + "repo_full_name": "apache/mxnet", + "discussion_comments": [ + { + "comment_id": "746508919", + "repo_full_name": "apache/mxnet", + "pr_number": 20724, + "pr_file": "src/operator/subgraph/dnnl/dnnl_post_quantize_property.h", + "discussion_id": "746508919", + "commented_code": "@@ -20,146 +20,209 @@\n #define MXNET_OPERATOR_SUBGRAPH_DNNL_DNNL_POST_QUANTIZE_PROPERTY_H_\n #if MXNET_USE_ONEDNN == 1\n \n+#include \n #include \n #include \n #include \n \n #include \"../../nn/dnnl/dnnl_convolution-inl.h\"\n+#include \"../../nn/fully_connected-inl.h\"\n #include \"../../quantization/requantize-inl.h\"\n+#include \"../../tensor/elemwise_binary_op-inl.h\"\n #include \"../common.h\"\n #include \"dnnl_conv-inl.h\"\n #include \"dnnl_subgraph_base-inl.h\"\n \n namespace mxnet {\n namespace op {\n \n-class SgDNNLPostQuantizeSelector : public SubgraphSelector {\n+const std::set support_req_fusion_op = {\"_contrib_quantized_elemwise_add\",\n+ \"_contrib_quantized_elemwise_mul\",\n+ \"_contrib_quantized_npi_add\",\n+ \"_sg_onednn_conv\",\n+ \"_sg_onednn_fully_connected\",\n+ \"_sg_onednn_selfatt_qk\",\n+ \"_sg_onednn_selfatt_valatt\",\n+ \"_sg_onednn_batch_dot\"};\n+\n+class SgDNNLPostQuantizeSelector : public SubgraphSelectorV2 {\n public:\n /*! \\brief pattern match status */\n enum SelectStatus {\n kFail = 0,\n kStart,\n+ kRequantize,\n kSuccess,\n };\n \n private:\n+ bool disable_fuse_all;\n+ bool disable_float_output;\n SelectStatus status;\n- std::vector matched_list;\n+ std::vector matched_list;\n std::set support_requantize_fusion_op_name;\n \n public:\n- SgDNNLPostQuantizeSelector() {\n- support_requantize_fusion_op_name.insert(\"_sg_onednn_conv\");\n- support_requantize_fusion_op_name.insert(\"_contrib_quantized_elemwise_add\");\n- support_requantize_fusion_op_name.insert(\"_contrib_quantized_npi_add\");\n+ explicit SgDNNLPostQuantizeSelector(const bool dis_fuse_all, const bool dis_float_output)\n+ : disable_fuse_all(dis_fuse_all), disable_float_output(dis_float_output) {\n+ support_requantize_fusion_op_name = support_req_fusion_op;\n }\n \n- bool Select(const nnvm::Node& n) override {\n- if (n.op() && support_requantize_fusion_op_name.count(n.op()->name)) {\n- if (n.op() == Op::Get(\"_sg_onednn_conv\")) {\n- auto const& param = nnvm::get(n.attrs.parsed);\n- if (param.full_conv_param.dnnl_param.quantized) {\n- status = kStart;\n- matched_list.clear();\n- matched_list.push_back(&n);\n- return true;\n- }\n- } else if (n.op()->name == \"_contrib_quantized_elemwise_add\" ||\n- n.op()->name == \"_contrib_quantized_npi_add\") {\n- status = kStart;\n- matched_list.clear();\n- matched_list.push_back(&n);\n- return true;\n- }\n+ bool Select(const BiDirectedNode& n) override {\n+ const nnvm::Node* raw_node = n.node;\n+ if ((!disable_fuse_all) && raw_node->op() &&\n+ support_requantize_fusion_op_name.count(raw_node->op()->name)) {\n+ status = kStart;\n+ matched_list.clear();\n+ matched_list.push_back(&n);\n+ return true;\n }\n return false;\n }\n \n- bool SelectInput(const nnvm::Node& n, const nnvm::Node& new_node) override {\n+ bool SelectInput(const BiDirectedNode& n, const BiDirectedNode& new_node) override {\n return false;\n }\n \n- bool SelectOutput(const nnvm::Node& n, const nnvm::Node& new_node) override {\n- if (status == kFail || status == kSuccess || new_node.is_variable())\n+ bool SelectOutput(const BiDirectedNode& n, const BiDirectedNode& new_node) override {\n+ const nnvm::Node* raw_node = n.node;\n+ const nnvm::Node* raw_new_node = new_node.node;\n+ if (status == kFail || status == kSuccess || raw_new_node->is_variable())\n return false;\n // If n isn't the last matched node, then we encoutered a internal\n // branch, we should pop out the node behind n and stop fusion.\n if (matched_list.back() != &n) {\n- status = kFail;\n+ if (std::find(matched_list.begin(), matched_list.end(), &n) != matched_list.end()) {\n+ while (matched_list.back() != &n) {\n+ matched_list.pop_back();\n+ }\n+ }\n+ status = kSuccess;\n return false;\n }\n- if (new_node.op()->name == \"_contrib_requantize\") {\n- auto const& param = nnvm::get(new_node.attrs.parsed);\n- if (param.min_calib_range.has_value() && param.max_calib_range.has_value()) {\n- matched_list.push_back(&new_node);\n+\n+ switch (status) {\n+ case kStart:\n+ if (raw_new_node->op() == Op::Get(\"_contrib_requantize\")) {\n+ auto const& param = nnvm::get(raw_new_node->attrs.parsed);\n+ if (param.min_calib_range.has_value() && param.max_calib_range.has_value()) {\n+ matched_list.push_back(&new_node);\n+ status = kRequantize;\n+ if (raw_node->op() == Op::Get(\"_sg_onednn_conv\")) {\n+ status = kSuccess;\n+ }\n+ return true;\n+ }\n+ }\n+ case kRequantize:\n+ if (!disable_float_output && raw_new_node->op() == Op::Get(\"_contrib_dequantize\")) {\n+ CHECK(raw_node->op() == Op::Get(\"_contrib_requantize\"));\n+ if (n.outputs.size() > 1) {\n+ // check if requantize have other outputs than dequantize\n+ // if it has we can't fuse dequantize\n+ for (auto kv : n.outputs) {", + "comment_created_at": "2021-11-10T11:43:44+00:00", + "comment_author": "mozga-intel", + "comment_body": "How about? If an object is const-> then `kv` is const, otherwise, the `kv` might be non-const.\r\n```suggestion\r\n for (const auto kv : n.outputs) {\r\n```", + "pr_file_module": null + }, + { + "comment_id": "750712116", + "repo_full_name": "apache/mxnet", + "pr_number": 20724, + "pr_file": "src/operator/subgraph/dnnl/dnnl_post_quantize_property.h", + "discussion_id": "746508919", + "commented_code": "@@ -20,146 +20,209 @@\n #define MXNET_OPERATOR_SUBGRAPH_DNNL_DNNL_POST_QUANTIZE_PROPERTY_H_\n #if MXNET_USE_ONEDNN == 1\n \n+#include \n #include \n #include \n #include \n \n #include \"../../nn/dnnl/dnnl_convolution-inl.h\"\n+#include \"../../nn/fully_connected-inl.h\"\n #include \"../../quantization/requantize-inl.h\"\n+#include \"../../tensor/elemwise_binary_op-inl.h\"\n #include \"../common.h\"\n #include \"dnnl_conv-inl.h\"\n #include \"dnnl_subgraph_base-inl.h\"\n \n namespace mxnet {\n namespace op {\n \n-class SgDNNLPostQuantizeSelector : public SubgraphSelector {\n+const std::set support_req_fusion_op = {\"_contrib_quantized_elemwise_add\",\n+ \"_contrib_quantized_elemwise_mul\",\n+ \"_contrib_quantized_npi_add\",\n+ \"_sg_onednn_conv\",\n+ \"_sg_onednn_fully_connected\",\n+ \"_sg_onednn_selfatt_qk\",\n+ \"_sg_onednn_selfatt_valatt\",\n+ \"_sg_onednn_batch_dot\"};\n+\n+class SgDNNLPostQuantizeSelector : public SubgraphSelectorV2 {\n public:\n /*! \\brief pattern match status */\n enum SelectStatus {\n kFail = 0,\n kStart,\n+ kRequantize,\n kSuccess,\n };\n \n private:\n+ bool disable_fuse_all;\n+ bool disable_float_output;\n SelectStatus status;\n- std::vector matched_list;\n+ std::vector matched_list;\n std::set support_requantize_fusion_op_name;\n \n public:\n- SgDNNLPostQuantizeSelector() {\n- support_requantize_fusion_op_name.insert(\"_sg_onednn_conv\");\n- support_requantize_fusion_op_name.insert(\"_contrib_quantized_elemwise_add\");\n- support_requantize_fusion_op_name.insert(\"_contrib_quantized_npi_add\");\n+ explicit SgDNNLPostQuantizeSelector(const bool dis_fuse_all, const bool dis_float_output)\n+ : disable_fuse_all(dis_fuse_all), disable_float_output(dis_float_output) {\n+ support_requantize_fusion_op_name = support_req_fusion_op;\n }\n \n- bool Select(const nnvm::Node& n) override {\n- if (n.op() && support_requantize_fusion_op_name.count(n.op()->name)) {\n- if (n.op() == Op::Get(\"_sg_onednn_conv\")) {\n- auto const& param = nnvm::get(n.attrs.parsed);\n- if (param.full_conv_param.dnnl_param.quantized) {\n- status = kStart;\n- matched_list.clear();\n- matched_list.push_back(&n);\n- return true;\n- }\n- } else if (n.op()->name == \"_contrib_quantized_elemwise_add\" ||\n- n.op()->name == \"_contrib_quantized_npi_add\") {\n- status = kStart;\n- matched_list.clear();\n- matched_list.push_back(&n);\n- return true;\n- }\n+ bool Select(const BiDirectedNode& n) override {\n+ const nnvm::Node* raw_node = n.node;\n+ if ((!disable_fuse_all) && raw_node->op() &&\n+ support_requantize_fusion_op_name.count(raw_node->op()->name)) {\n+ status = kStart;\n+ matched_list.clear();\n+ matched_list.push_back(&n);\n+ return true;\n }\n return false;\n }\n \n- bool SelectInput(const nnvm::Node& n, const nnvm::Node& new_node) override {\n+ bool SelectInput(const BiDirectedNode& n, const BiDirectedNode& new_node) override {\n return false;\n }\n \n- bool SelectOutput(const nnvm::Node& n, const nnvm::Node& new_node) override {\n- if (status == kFail || status == kSuccess || new_node.is_variable())\n+ bool SelectOutput(const BiDirectedNode& n, const BiDirectedNode& new_node) override {\n+ const nnvm::Node* raw_node = n.node;\n+ const nnvm::Node* raw_new_node = new_node.node;\n+ if (status == kFail || status == kSuccess || raw_new_node->is_variable())\n return false;\n // If n isn't the last matched node, then we encoutered a internal\n // branch, we should pop out the node behind n and stop fusion.\n if (matched_list.back() != &n) {\n- status = kFail;\n+ if (std::find(matched_list.begin(), matched_list.end(), &n) != matched_list.end()) {\n+ while (matched_list.back() != &n) {\n+ matched_list.pop_back();\n+ }\n+ }\n+ status = kSuccess;\n return false;\n }\n- if (new_node.op()->name == \"_contrib_requantize\") {\n- auto const& param = nnvm::get(new_node.attrs.parsed);\n- if (param.min_calib_range.has_value() && param.max_calib_range.has_value()) {\n- matched_list.push_back(&new_node);\n+\n+ switch (status) {\n+ case kStart:\n+ if (raw_new_node->op() == Op::Get(\"_contrib_requantize\")) {\n+ auto const& param = nnvm::get(raw_new_node->attrs.parsed);\n+ if (param.min_calib_range.has_value() && param.max_calib_range.has_value()) {\n+ matched_list.push_back(&new_node);\n+ status = kRequantize;\n+ if (raw_node->op() == Op::Get(\"_sg_onednn_conv\")) {\n+ status = kSuccess;\n+ }\n+ return true;\n+ }\n+ }\n+ case kRequantize:\n+ if (!disable_float_output && raw_new_node->op() == Op::Get(\"_contrib_dequantize\")) {\n+ CHECK(raw_node->op() == Op::Get(\"_contrib_requantize\"));\n+ if (n.outputs.size() > 1) {\n+ // check if requantize have other outputs than dequantize\n+ // if it has we can't fuse dequantize\n+ for (auto kv : n.outputs) {", + "comment_created_at": "2021-11-16T22:27:00+00:00", + "comment_author": "DominikaJedynak", + "comment_body": "kv.first, which is the only place where kv is used, is marked const, but I can make kv const as well for clarity", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "670199194", + "pr_number": 20430, + "pr_file": "src/operator/quantization/quantize_graph_pass.h", + "created_at": "2021-07-15T07:18:28+00:00", + "commented_code": "/*\n * Licensed to the Apache Software Foundation (ASF) under one\n * or more contributor license agreements. See the NOTICE file\n * distributed with this work for additional information\n * regarding copyright ownership. The ASF licenses this file\n * to you under the Apache License, Version 2.0 (the\n * \"License\"); you may not use this file except in compliance\n * with the License. You may obtain a copy of the License at\n *\n * http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing,\n * software distributed under the License is distributed on an\n * \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n * KIND, either express or implied. See the License for the\n * specific language governing permissions and limitations\n * under the License.\n */\n\n/*!\n * Copyright (c) 2021 by Contributors\n * \\file quantize_graph_pass.h\n * \\brief\n */\n#ifndef MXNET_OPERATOR_QUANTIZATION_QUANTIZE_GRAPH_PASS_H_\n#define MXNET_OPERATOR_QUANTIZATION_QUANTIZE_GRAPH_PASS_H_\n\n#include \n#include \n#include \n#include \n#include \n#include \n#include \n#include \n#include \"quantize_v2-inl.h\"\n#include \"../nn/mkldnn/mkldnn_fully_connected-inl.h\"\n#include \"../../common/utils.h\"\n\nnamespace mxnet {\nnamespace op {\n\nusing nnvm::Symbol;\nusing nnvm::Node;\nusing nnvm::ObjectPtr;\nusing nnvm::NodeEntry;\nusing nnvm::Graph;\n\ninline ObjectPtr CreateNode(std::string op_name, std::string node_name) {\n ObjectPtr node = Node::Create();\n node->attrs.name = node_name;\n if (op_name == \"nullptr\") {\n node->attrs.op = nullptr;\n // ugly workaround because VariableParam is not exposed\n node->attrs.parsed =\n nnvm::Symbol::CreateVariable(node->attrs.name).outputs[0].node->attrs.parsed;\n } else {\n node->attrs.op = Op::Get(op_name);\n }\n return node;\n}\n\ntemplate \nstatic inline bool IsOneDNNFullyConnected(const ObjectPtr& n) {\n#if MXNET_USE_MKLDNN == 1\n if (n->op() == Op::Get(\"_sg_mkldnn_fully_connected\")) {\n auto const& param = nnvm::get(n->attrs.parsed);\n if (!(param.mkldnn_param.channel_wise_quantize.has_value() &&\n param.mkldnn_param.channel_wise_quantize.value())) {\n return !require_bias || (param.default_param.no_bias == false &&\n n->inputs[2].node->is_variable());\n }\n }\n#endif\n return false;\n}\n\nstatic inline bool IsQuantize(const ObjectPtr& n) {\n if (n->op() == Op::Get(\"_contrib_quantize_v2\")) {\n auto const ¶m = nnvm::get(n->attrs.parsed);\n if (param.min_calib_range.has_value() &&\n param.min_calib_range.value() < 0.0f) {\n return true;\n }\n }\n return false;\n}\n\nstatic NDArray* FindInArgByName(const Graph &g, const std::string& name) {\n const std::vector& in_arg_names =\n g.GetAttr>(\"in_arg_names\");\n size_t i = std::distance(in_arg_names.begin(),\n std::find(in_arg_names.begin(), in_arg_names.end(), name));\n if (i == in_arg_names.size()) {\n LOG(FATAL) << name << \" not found in in_arg_names\";\n }\n return g.GetAttr(\"in_args\")[i];\n}\n\n// Rescales weights, min_weight and max_weight. Returns bias_int32_rescale.\nstatic inline float RescaleWeights(const Graph &g, const ObjectPtr &fc, NDArray* weight_tensor) {\n ObjectPtr &quantize = fc->inputs[0].node;\n auto min_data = std::stof(quantize->attrs.dict.at(\"min_calib_range\"));\n auto max_data = std::stof(quantize->attrs.dict.at(\"max_calib_range\"));\n\n FCInputIndex id(nnvm::get(fc->attrs.parsed));\n auto in = fc->inputs;\n\n float *min_weight = FindInArgByName(g, in[id.weight_min].node->attrs.name)->data().dptr();", + "repo_full_name": "apache/mxnet", + "discussion_comments": [ + { + "comment_id": "670199194", + "repo_full_name": "apache/mxnet", + "pr_number": 20430, + "pr_file": "src/operator/quantization/quantize_graph_pass.h", + "discussion_id": "670199194", + "commented_code": "@@ -0,0 +1,155 @@\n+/*\n+ * Licensed to the Apache Software Foundation (ASF) under one\n+ * or more contributor license agreements. See the NOTICE file\n+ * distributed with this work for additional information\n+ * regarding copyright ownership. The ASF licenses this file\n+ * to you under the Apache License, Version 2.0 (the\n+ * \"License\"); you may not use this file except in compliance\n+ * with the License. You may obtain a copy of the License at\n+ *\n+ * http://www.apache.org/licenses/LICENSE-2.0\n+ *\n+ * Unless required by applicable law or agreed to in writing,\n+ * software distributed under the License is distributed on an\n+ * \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n+ * KIND, either express or implied. See the License for the\n+ * specific language governing permissions and limitations\n+ * under the License.\n+ */\n+\n+/*!\n+ * Copyright (c) 2021 by Contributors\n+ * \\file quantize_graph_pass.h\n+ * \\brief\n+ */\n+#ifndef MXNET_OPERATOR_QUANTIZATION_QUANTIZE_GRAPH_PASS_H_\n+#define MXNET_OPERATOR_QUANTIZATION_QUANTIZE_GRAPH_PASS_H_\n+\n+#include \n+#include \n+#include \n+#include \n+#include \n+#include \n+#include \n+#include \n+#include \"quantize_v2-inl.h\"\n+#include \"../nn/mkldnn/mkldnn_fully_connected-inl.h\"\n+#include \"../../common/utils.h\"\n+\n+namespace mxnet {\n+namespace op {\n+\n+using nnvm::Symbol;\n+using nnvm::Node;\n+using nnvm::ObjectPtr;\n+using nnvm::NodeEntry;\n+using nnvm::Graph;\n+\n+inline ObjectPtr CreateNode(std::string op_name, std::string node_name) {\n+ ObjectPtr node = Node::Create();\n+ node->attrs.name = node_name;\n+ if (op_name == \"nullptr\") {\n+ node->attrs.op = nullptr;\n+ // ugly workaround because VariableParam is not exposed\n+ node->attrs.parsed =\n+ nnvm::Symbol::CreateVariable(node->attrs.name).outputs[0].node->attrs.parsed;\n+ } else {\n+ node->attrs.op = Op::Get(op_name);\n+ }\n+ return node;\n+}\n+\n+template \n+static inline bool IsOneDNNFullyConnected(const ObjectPtr& n) {\n+#if MXNET_USE_MKLDNN == 1\n+ if (n->op() == Op::Get(\"_sg_mkldnn_fully_connected\")) {\n+ auto const& param = nnvm::get(n->attrs.parsed);\n+ if (!(param.mkldnn_param.channel_wise_quantize.has_value() &&\n+ param.mkldnn_param.channel_wise_quantize.value())) {\n+ return !require_bias || (param.default_param.no_bias == false &&\n+ n->inputs[2].node->is_variable());\n+ }\n+ }\n+#endif\n+ return false;\n+}\n+\n+static inline bool IsQuantize(const ObjectPtr& n) {\n+ if (n->op() == Op::Get(\"_contrib_quantize_v2\")) {\n+ auto const ¶m = nnvm::get(n->attrs.parsed);\n+ if (param.min_calib_range.has_value() &&\n+ param.min_calib_range.value() < 0.0f) {\n+ return true;\n+ }\n+ }\n+ return false;\n+}\n+\n+static NDArray* FindInArgByName(const Graph &g, const std::string& name) {\n+ const std::vector& in_arg_names =\n+ g.GetAttr>(\"in_arg_names\");\n+ size_t i = std::distance(in_arg_names.begin(),\n+ std::find(in_arg_names.begin(), in_arg_names.end(), name));\n+ if (i == in_arg_names.size()) {\n+ LOG(FATAL) << name << \" not found in in_arg_names\";\n+ }\n+ return g.GetAttr(\"in_args\")[i];\n+}\n+\n+// Rescales weights, min_weight and max_weight. Returns bias_int32_rescale.\n+static inline float RescaleWeights(const Graph &g, const ObjectPtr &fc, NDArray* weight_tensor) {\n+ ObjectPtr &quantize = fc->inputs[0].node;\n+ auto min_data = std::stof(quantize->attrs.dict.at(\"min_calib_range\"));\n+ auto max_data = std::stof(quantize->attrs.dict.at(\"max_calib_range\"));\n+\n+ FCInputIndex id(nnvm::get(fc->attrs.parsed));\n+ auto in = fc->inputs;\n+\n+ float *min_weight = FindInArgByName(g, in[id.weight_min].node->attrs.name)->data().dptr();", + "comment_created_at": "2021-07-15T07:18:28+00:00", + "comment_author": "anko-intel", + "comment_body": "One suggestion below:\r\n```suggestion\r\n float *min_weight = \r\n FindInArgByName(g, in[idx.weight_min].node->attrs.name)->data().dptr();\r\n```\r\nor maybe add another helper function which already return pointer to float to avoid \"->data().dptr()\" in the end of each line \r\n\r\n", + "pr_file_module": null + }, + { + "comment_id": "670413549", + "repo_full_name": "apache/mxnet", + "pr_number": 20430, + "pr_file": "src/operator/quantization/quantize_graph_pass.h", + "discussion_id": "670199194", + "commented_code": "@@ -0,0 +1,155 @@\n+/*\n+ * Licensed to the Apache Software Foundation (ASF) under one\n+ * or more contributor license agreements. See the NOTICE file\n+ * distributed with this work for additional information\n+ * regarding copyright ownership. The ASF licenses this file\n+ * to you under the Apache License, Version 2.0 (the\n+ * \"License\"); you may not use this file except in compliance\n+ * with the License. You may obtain a copy of the License at\n+ *\n+ * http://www.apache.org/licenses/LICENSE-2.0\n+ *\n+ * Unless required by applicable law or agreed to in writing,\n+ * software distributed under the License is distributed on an\n+ * \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n+ * KIND, either express or implied. See the License for the\n+ * specific language governing permissions and limitations\n+ * under the License.\n+ */\n+\n+/*!\n+ * Copyright (c) 2021 by Contributors\n+ * \\file quantize_graph_pass.h\n+ * \\brief\n+ */\n+#ifndef MXNET_OPERATOR_QUANTIZATION_QUANTIZE_GRAPH_PASS_H_\n+#define MXNET_OPERATOR_QUANTIZATION_QUANTIZE_GRAPH_PASS_H_\n+\n+#include \n+#include \n+#include \n+#include \n+#include \n+#include \n+#include \n+#include \n+#include \"quantize_v2-inl.h\"\n+#include \"../nn/mkldnn/mkldnn_fully_connected-inl.h\"\n+#include \"../../common/utils.h\"\n+\n+namespace mxnet {\n+namespace op {\n+\n+using nnvm::Symbol;\n+using nnvm::Node;\n+using nnvm::ObjectPtr;\n+using nnvm::NodeEntry;\n+using nnvm::Graph;\n+\n+inline ObjectPtr CreateNode(std::string op_name, std::string node_name) {\n+ ObjectPtr node = Node::Create();\n+ node->attrs.name = node_name;\n+ if (op_name == \"nullptr\") {\n+ node->attrs.op = nullptr;\n+ // ugly workaround because VariableParam is not exposed\n+ node->attrs.parsed =\n+ nnvm::Symbol::CreateVariable(node->attrs.name).outputs[0].node->attrs.parsed;\n+ } else {\n+ node->attrs.op = Op::Get(op_name);\n+ }\n+ return node;\n+}\n+\n+template \n+static inline bool IsOneDNNFullyConnected(const ObjectPtr& n) {\n+#if MXNET_USE_MKLDNN == 1\n+ if (n->op() == Op::Get(\"_sg_mkldnn_fully_connected\")) {\n+ auto const& param = nnvm::get(n->attrs.parsed);\n+ if (!(param.mkldnn_param.channel_wise_quantize.has_value() &&\n+ param.mkldnn_param.channel_wise_quantize.value())) {\n+ return !require_bias || (param.default_param.no_bias == false &&\n+ n->inputs[2].node->is_variable());\n+ }\n+ }\n+#endif\n+ return false;\n+}\n+\n+static inline bool IsQuantize(const ObjectPtr& n) {\n+ if (n->op() == Op::Get(\"_contrib_quantize_v2\")) {\n+ auto const ¶m = nnvm::get(n->attrs.parsed);\n+ if (param.min_calib_range.has_value() &&\n+ param.min_calib_range.value() < 0.0f) {\n+ return true;\n+ }\n+ }\n+ return false;\n+}\n+\n+static NDArray* FindInArgByName(const Graph &g, const std::string& name) {\n+ const std::vector& in_arg_names =\n+ g.GetAttr>(\"in_arg_names\");\n+ size_t i = std::distance(in_arg_names.begin(),\n+ std::find(in_arg_names.begin(), in_arg_names.end(), name));\n+ if (i == in_arg_names.size()) {\n+ LOG(FATAL) << name << \" not found in in_arg_names\";\n+ }\n+ return g.GetAttr(\"in_args\")[i];\n+}\n+\n+// Rescales weights, min_weight and max_weight. Returns bias_int32_rescale.\n+static inline float RescaleWeights(const Graph &g, const ObjectPtr &fc, NDArray* weight_tensor) {\n+ ObjectPtr &quantize = fc->inputs[0].node;\n+ auto min_data = std::stof(quantize->attrs.dict.at(\"min_calib_range\"));\n+ auto max_data = std::stof(quantize->attrs.dict.at(\"max_calib_range\"));\n+\n+ FCInputIndex id(nnvm::get(fc->attrs.parsed));\n+ auto in = fc->inputs;\n+\n+ float *min_weight = FindInArgByName(g, in[id.weight_min].node->attrs.name)->data().dptr();", + "comment_created_at": "2021-07-15T12:26:40+00:00", + "comment_author": "sfraczek", + "comment_body": "thank you", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/mxnet-simplify-for-readability.md b/_reviewers/mxnet-simplify-for-readability.md index 5efceb6..a6fd2a5 100644 --- a/_reviewers/mxnet-simplify-for-readability.md +++ b/_reviewers/mxnet-simplify-for-readability.md @@ -67,257 +67,3 @@ for (const auto& kv : n.outputs) { ``` These practices improve code clarity, reduce cognitive load when reading the code, and make future maintenance easier. - - -[ - { - "discussion_id": "954763427", - "pr_number": 21132, - "pr_file": "src/operator/tensor/elemwise_binary_broadcast_op_basic.cc", - "created_at": "2022-08-25T09:54:33+00:00", - "commented_code": "const std::vector& inputs,\n const std::vector& req,\n const std::vector& outputs) {\n mxnet::TShape new_lshape, new_rshape, new_oshape;\n int ndim_diff = BinaryBroadcastShapeCompact(inputs[0].shape(),\n inputs[1].shape(),\n outputs[0].shape(),\n &new_lshape,\n &new_rshape,\n &new_oshape);\n std::vector new_inputs;\n std::vector new_outputs;\n if (ndim_diff) {\n new_inputs = {inputs[0].Reshape(new_lshape), inputs[1].Reshape(new_rshape)};\n new_outputs = {outputs[0].Reshape(new_oshape)};\n } else if (inputs[0].shape().Size() == 1 && inputs[1].shape().Size() == 1) {\n // BinaryBroadcastShapeCompact function doesn't reshape tensors of size (1,1,...,1)\n // into shape (1). It is mandatory for oneDNN primitive to have this reshape done.\n mxnet::TShape one_shape = mxnet::TShape(1, 1);\n new_inputs = {inputs[0].Reshape(one_shape), inputs[1].Reshape(one_shape)};\n new_outputs = {outputs[0].Reshape(one_shape)};\n // We can use more efficient sum kernel when there is no broadcast - when shapes are the same\n const bool same_shape = (inputs[0].shape() == inputs[1].shape());", - "repo_full_name": "apache/mxnet", - "discussion_comments": [ - { - "comment_id": "954763427", - "repo_full_name": "apache/mxnet", - "pr_number": 21132, - "pr_file": "src/operator/tensor/elemwise_binary_broadcast_op_basic.cc", - "discussion_id": "954763427", - "commented_code": "@@ -38,31 +39,39 @@ void DNNLBinaryOpForward(const nnvm::NodeAttrs& attrs,\n const std::vector& inputs,\n const std::vector& req,\n const std::vector& outputs) {\n- mxnet::TShape new_lshape, new_rshape, new_oshape;\n- int ndim_diff = BinaryBroadcastShapeCompact(inputs[0].shape(),\n- inputs[1].shape(),\n- outputs[0].shape(),\n- &new_lshape,\n- &new_rshape,\n- &new_oshape);\n- std::vector new_inputs;\n- std::vector new_outputs;\n- if (ndim_diff) {\n- new_inputs = {inputs[0].Reshape(new_lshape), inputs[1].Reshape(new_rshape)};\n- new_outputs = {outputs[0].Reshape(new_oshape)};\n- } else if (inputs[0].shape().Size() == 1 && inputs[1].shape().Size() == 1) {\n- // BinaryBroadcastShapeCompact function doesn't reshape tensors of size (1,1,...,1)\n- // into shape (1). It is mandatory for oneDNN primitive to have this reshape done.\n- mxnet::TShape one_shape = mxnet::TShape(1, 1);\n- new_inputs = {inputs[0].Reshape(one_shape), inputs[1].Reshape(one_shape)};\n- new_outputs = {outputs[0].Reshape(one_shape)};\n+ // We can use more efficient sum kernel when there is no broadcast - when shapes are the same\n+ const bool same_shape = (inputs[0].shape() == inputs[1].shape());", - "comment_created_at": "2022-08-25T09:54:33+00:00", - "comment_author": "bartekkuncer", - "comment_body": "```suggestion\r\n const bool same_shape = inputs[0].shape() == inputs[1].shape();\r\n```", - "pr_file_module": null - }, - { - "comment_id": "954778291", - "repo_full_name": "apache/mxnet", - "pr_number": 21132, - "pr_file": "src/operator/tensor/elemwise_binary_broadcast_op_basic.cc", - "discussion_id": "954763427", - "commented_code": "@@ -38,31 +39,39 @@ void DNNLBinaryOpForward(const nnvm::NodeAttrs& attrs,\n const std::vector& inputs,\n const std::vector& req,\n const std::vector& outputs) {\n- mxnet::TShape new_lshape, new_rshape, new_oshape;\n- int ndim_diff = BinaryBroadcastShapeCompact(inputs[0].shape(),\n- inputs[1].shape(),\n- outputs[0].shape(),\n- &new_lshape,\n- &new_rshape,\n- &new_oshape);\n- std::vector new_inputs;\n- std::vector new_outputs;\n- if (ndim_diff) {\n- new_inputs = {inputs[0].Reshape(new_lshape), inputs[1].Reshape(new_rshape)};\n- new_outputs = {outputs[0].Reshape(new_oshape)};\n- } else if (inputs[0].shape().Size() == 1 && inputs[1].shape().Size() == 1) {\n- // BinaryBroadcastShapeCompact function doesn't reshape tensors of size (1,1,...,1)\n- // into shape (1). It is mandatory for oneDNN primitive to have this reshape done.\n- mxnet::TShape one_shape = mxnet::TShape(1, 1);\n- new_inputs = {inputs[0].Reshape(one_shape), inputs[1].Reshape(one_shape)};\n- new_outputs = {outputs[0].Reshape(one_shape)};\n+ // We can use more efficient sum kernel when there is no broadcast - when shapes are the same\n+ const bool same_shape = (inputs[0].shape() == inputs[1].shape());", - "comment_created_at": "2022-08-25T10:10:31+00:00", - "comment_author": "bartekkuncer", - "comment_body": "Maybe put these shapes in variables as they are referred to multiple times?", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "941155133", - "pr_number": 21115, - "pr_file": "src/operator/subgraph/dnnl/dnnl_transformer_qk_common.h", - "created_at": "2022-08-09T10:10:26+00:00", - "commented_code": "/*\n * Licensed to the Apache Software Foundation (ASF) under one\n * or more contributor license agreements. See the NOTICE file\n * distributed with this work for additional information\n * regarding copyright ownership. The ASF licenses this file\n * to you under the Apache License, Version 2.0 (the\n * \"License\"); you may not use this file except in compliance\n * with the License. You may obtain a copy of the License at\n *\n * http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing,\n * software distributed under the License is distributed on an\n * \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n * KIND, either express or implied. See the License for the\n * specific language governing permissions and limitations\n * under the License.\n */\n\n#ifndef MXNET_OPERATOR_SUBGRAPH_DNNL_DNNL_TRANSFORMER_QK_COMMON_H_\n#define MXNET_OPERATOR_SUBGRAPH_DNNL_DNNL_TRANSFORMER_QK_COMMON_H_\n\n#if MXNET_USE_ONEDNN == 1\n\n#include \n#include \n\n#include \"operator/contrib/transformer-inl.h\"\n#include \"operator/numpy/np_matrix_op-inl.h\"\n#include \"operator/tensor/matrix_op-inl.h\"\n#include \"operator/subgraph/common.h\"\n#include \"dnnl_common.h\"\n#include \"dnnl_subgraph_base-inl.h\"\n#include \"dnnl_transformer-inl.h\"\n\nnamespace mxnet {\nnamespace op {\nnamespace qk_common {\n\nenum SelectStatusTransformerQK {\n kFail = 0,\n kStart,\n kFirstSwapAx,\n kSecondSwapAx,\n kFirstReshape,\n kSecondReshape,\n kSuccess\n};\n\n// /*\n// kStart ---> kFirstSwapAx ---> kSecondSwapAx ---> kFirstReshape ---> kSecondReshape ---> kSuccess\n// OR\n// kStart ---> kFirstSwapAx ---> kSecondSwapAx ---> kFirstReshape ---> kSuccess\n// Each status except kStart is connected with kFail\n// */\n\nenum mode { include_split = 0, without_split };\n\ninline bool CheckSwapAxisConditionsQK(const BiDirectedNode& input_node) {\n if (input_node.outputs.size() != 1)\n return false;\n return CheckSwapAxisConditions(*input_node.node);\n}\n\ninline bool CheckReshapeConditionsQK(const BiDirectedNode& input_node, const index_t out_index) {\n if (input_node.outputs.size() != 1)\n return false;\n return CheckReshapeConditions(*input_node.node, out_index);\n}\n\ninline bool CheckSplitConditions(const std::vector& matched_list,\n const BiDirectedNode& node) {\n const SplitParam& param = dmlc::get(node.node->attrs.parsed);\n\n if (param.axis != -1 || param.sections != 3 || param.squeeze_axis)\n return false;\n\n const auto first_reshape = (*(matched_list.end() - 2))->node;\n const auto second_reshape = (*(matched_list.end() - 1))->node;\n if (first_reshape->op() != Op::Get(\"_npx_reshape\") ||\n second_reshape->op() != Op::Get(\"_npx_reshape\")) {\n return false;\n }\n // 3 sections - ensure that every output is used only once\n if (node.outputs.size() == 3 && node.outputs.count(first_reshape) &&\n node.outputs.count(second_reshape)) {\n return true;\n }\n\n return false;\n}\n\ninline bool Select(SelectStatusTransformerQK* status,", - "repo_full_name": "apache/mxnet", - "discussion_comments": [ - { - "comment_id": "941155133", - "repo_full_name": "apache/mxnet", - "pr_number": 21115, - "pr_file": "src/operator/subgraph/dnnl/dnnl_transformer_qk_common.h", - "discussion_id": "941155133", - "commented_code": "@@ -0,0 +1,232 @@\n+/*\n+ * Licensed to the Apache Software Foundation (ASF) under one\n+ * or more contributor license agreements. See the NOTICE file\n+ * distributed with this work for additional information\n+ * regarding copyright ownership. The ASF licenses this file\n+ * to you under the Apache License, Version 2.0 (the\n+ * \"License\"); you may not use this file except in compliance\n+ * with the License. You may obtain a copy of the License at\n+ *\n+ * http://www.apache.org/licenses/LICENSE-2.0\n+ *\n+ * Unless required by applicable law or agreed to in writing,\n+ * software distributed under the License is distributed on an\n+ * \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n+ * KIND, either express or implied. See the License for the\n+ * specific language governing permissions and limitations\n+ * under the License.\n+ */\n+\n+#ifndef MXNET_OPERATOR_SUBGRAPH_DNNL_DNNL_TRANSFORMER_QK_COMMON_H_\n+#define MXNET_OPERATOR_SUBGRAPH_DNNL_DNNL_TRANSFORMER_QK_COMMON_H_\n+\n+#if MXNET_USE_ONEDNN == 1\n+\n+#include \n+#include \n+\n+#include \"operator/contrib/transformer-inl.h\"\n+#include \"operator/numpy/np_matrix_op-inl.h\"\n+#include \"operator/tensor/matrix_op-inl.h\"\n+#include \"operator/subgraph/common.h\"\n+#include \"dnnl_common.h\"\n+#include \"dnnl_subgraph_base-inl.h\"\n+#include \"dnnl_transformer-inl.h\"\n+\n+namespace mxnet {\n+namespace op {\n+namespace qk_common {\n+\n+enum SelectStatusTransformerQK {\n+ kFail = 0,\n+ kStart,\n+ kFirstSwapAx,\n+ kSecondSwapAx,\n+ kFirstReshape,\n+ kSecondReshape,\n+ kSuccess\n+};\n+\n+// /*\n+// kStart ---> kFirstSwapAx ---> kSecondSwapAx ---> kFirstReshape ---> kSecondReshape ---> kSuccess\n+// OR\n+// kStart ---> kFirstSwapAx ---> kSecondSwapAx ---> kFirstReshape ---> kSuccess\n+// Each status except kStart is connected with kFail\n+// */\n+\n+enum mode { include_split = 0, without_split };\n+\n+inline bool CheckSwapAxisConditionsQK(const BiDirectedNode& input_node) {\n+ if (input_node.outputs.size() != 1)\n+ return false;\n+ return CheckSwapAxisConditions(*input_node.node);\n+}\n+\n+inline bool CheckReshapeConditionsQK(const BiDirectedNode& input_node, const index_t out_index) {\n+ if (input_node.outputs.size() != 1)\n+ return false;\n+ return CheckReshapeConditions(*input_node.node, out_index);\n+}\n+\n+inline bool CheckSplitConditions(const std::vector& matched_list,\n+ const BiDirectedNode& node) {\n+ const SplitParam& param = dmlc::get(node.node->attrs.parsed);\n+\n+ if (param.axis != -1 || param.sections != 3 || param.squeeze_axis)\n+ return false;\n+\n+ const auto first_reshape = (*(matched_list.end() - 2))->node;\n+ const auto second_reshape = (*(matched_list.end() - 1))->node;\n+ if (first_reshape->op() != Op::Get(\"_npx_reshape\") ||\n+ second_reshape->op() != Op::Get(\"_npx_reshape\")) {\n+ return false;\n+ }\n+ // 3 sections - ensure that every output is used only once\n+ if (node.outputs.size() == 3 && node.outputs.count(first_reshape) &&\n+ node.outputs.count(second_reshape)) {\n+ return true;\n+ }\n+\n+ return false;\n+}\n+\n+inline bool Select(SelectStatusTransformerQK* status,", - "comment_created_at": "2022-08-09T10:10:26+00:00", - "comment_author": "bartekkuncer", - "comment_body": "Why pointers instead of references?", - "pr_file_module": null - }, - { - "comment_id": "942626592", - "repo_full_name": "apache/mxnet", - "pr_number": 21115, - "pr_file": "src/operator/subgraph/dnnl/dnnl_transformer_qk_common.h", - "discussion_id": "941155133", - "commented_code": "@@ -0,0 +1,232 @@\n+/*\n+ * Licensed to the Apache Software Foundation (ASF) under one\n+ * or more contributor license agreements. See the NOTICE file\n+ * distributed with this work for additional information\n+ * regarding copyright ownership. The ASF licenses this file\n+ * to you under the Apache License, Version 2.0 (the\n+ * \"License\"); you may not use this file except in compliance\n+ * with the License. You may obtain a copy of the License at\n+ *\n+ * http://www.apache.org/licenses/LICENSE-2.0\n+ *\n+ * Unless required by applicable law or agreed to in writing,\n+ * software distributed under the License is distributed on an\n+ * \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n+ * KIND, either express or implied. See the License for the\n+ * specific language governing permissions and limitations\n+ * under the License.\n+ */\n+\n+#ifndef MXNET_OPERATOR_SUBGRAPH_DNNL_DNNL_TRANSFORMER_QK_COMMON_H_\n+#define MXNET_OPERATOR_SUBGRAPH_DNNL_DNNL_TRANSFORMER_QK_COMMON_H_\n+\n+#if MXNET_USE_ONEDNN == 1\n+\n+#include \n+#include \n+\n+#include \"operator/contrib/transformer-inl.h\"\n+#include \"operator/numpy/np_matrix_op-inl.h\"\n+#include \"operator/tensor/matrix_op-inl.h\"\n+#include \"operator/subgraph/common.h\"\n+#include \"dnnl_common.h\"\n+#include \"dnnl_subgraph_base-inl.h\"\n+#include \"dnnl_transformer-inl.h\"\n+\n+namespace mxnet {\n+namespace op {\n+namespace qk_common {\n+\n+enum SelectStatusTransformerQK {\n+ kFail = 0,\n+ kStart,\n+ kFirstSwapAx,\n+ kSecondSwapAx,\n+ kFirstReshape,\n+ kSecondReshape,\n+ kSuccess\n+};\n+\n+// /*\n+// kStart ---> kFirstSwapAx ---> kSecondSwapAx ---> kFirstReshape ---> kSecondReshape ---> kSuccess\n+// OR\n+// kStart ---> kFirstSwapAx ---> kSecondSwapAx ---> kFirstReshape ---> kSuccess\n+// Each status except kStart is connected with kFail\n+// */\n+\n+enum mode { include_split = 0, without_split };\n+\n+inline bool CheckSwapAxisConditionsQK(const BiDirectedNode& input_node) {\n+ if (input_node.outputs.size() != 1)\n+ return false;\n+ return CheckSwapAxisConditions(*input_node.node);\n+}\n+\n+inline bool CheckReshapeConditionsQK(const BiDirectedNode& input_node, const index_t out_index) {\n+ if (input_node.outputs.size() != 1)\n+ return false;\n+ return CheckReshapeConditions(*input_node.node, out_index);\n+}\n+\n+inline bool CheckSplitConditions(const std::vector& matched_list,\n+ const BiDirectedNode& node) {\n+ const SplitParam& param = dmlc::get(node.node->attrs.parsed);\n+\n+ if (param.axis != -1 || param.sections != 3 || param.squeeze_axis)\n+ return false;\n+\n+ const auto first_reshape = (*(matched_list.end() - 2))->node;\n+ const auto second_reshape = (*(matched_list.end() - 1))->node;\n+ if (first_reshape->op() != Op::Get(\"_npx_reshape\") ||\n+ second_reshape->op() != Op::Get(\"_npx_reshape\")) {\n+ return false;\n+ }\n+ // 3 sections - ensure that every output is used only once\n+ if (node.outputs.size() == 3 && node.outputs.count(first_reshape) &&\n+ node.outputs.count(second_reshape)) {\n+ return true;\n+ }\n+\n+ return false;\n+}\n+\n+inline bool Select(SelectStatusTransformerQK* status,", - "comment_created_at": "2022-08-10T15:55:42+00:00", - "comment_author": "agrabows", - "comment_body": "using references causes error:\r\nsrc/operator/subgraph/dnnl/dnnl_transformer_qk_common.h:93: Is this a non-const reference? If so, make const or use a pointer: SelectStatusTransformerQK& status [runtime/references] [2]", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "904833817", - "pr_number": 21077, - "pr_file": "src/operator/subgraph/dnnl/dnnl_fc_sum_fuse_property.h", - "created_at": "2022-06-23T10:00:41+00:00", - "commented_code": "bool SelectOutput(const BiDirectedNode& cur_node, const BiDirectedNode& output_node) override {\n const auto cur_n = cur_node.node;\n const auto output_n = output_node.node;\n if (status_ == kFail || status_ == kSuccess || output_n->is_variable()) {\n if (status_ != kStart || output_n->is_variable()) {", - "repo_full_name": "apache/mxnet", - "discussion_comments": [ - { - "comment_id": "904833817", - "repo_full_name": "apache/mxnet", - "pr_number": 21077, - "pr_file": "src/operator/subgraph/dnnl/dnnl_fc_sum_fuse_property.h", - "discussion_id": "904833817", - "commented_code": "@@ -95,42 +90,27 @@ class SgDNNLFCSumFuseSelector : public SubgraphSelectorV2 {\n bool SelectOutput(const BiDirectedNode& cur_node, const BiDirectedNode& output_node) override {\n const auto cur_n = cur_node.node;\n const auto output_n = output_node.node;\n- if (status_ == kFail || status_ == kSuccess || output_n->is_variable()) {\n+ if (status_ != kStart || output_n->is_variable()) {", - "comment_created_at": "2022-06-23T10:00:41+00:00", - "comment_author": "anko-intel", - "comment_body": "The same state machine process was done on other operators. Evee it is not fully utilized here yet, I think it is better to do it in the common way. (status_ == kFail || status_ == kSuccess ||)", - "pr_file_module": null - }, - { - "comment_id": "904894874", - "repo_full_name": "apache/mxnet", - "pr_number": 21077, - "pr_file": "src/operator/subgraph/dnnl/dnnl_fc_sum_fuse_property.h", - "discussion_id": "904833817", - "commented_code": "@@ -95,42 +90,27 @@ class SgDNNLFCSumFuseSelector : public SubgraphSelectorV2 {\n bool SelectOutput(const BiDirectedNode& cur_node, const BiDirectedNode& output_node) override {\n const auto cur_n = cur_node.node;\n const auto output_n = output_node.node;\n- if (status_ == kFail || status_ == kSuccess || output_n->is_variable()) {\n+ if (status_ != kStart || output_n->is_variable()) {", - "comment_created_at": "2022-06-23T11:13:20+00:00", - "comment_author": "bartekkuncer", - "comment_body": "In my opinion it is a basic change and there is no need to keep this condition any longer than it needs to be especially that now it is way easier to read. ", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "906009360", - "pr_number": 20976, - "pr_file": "src/operator/subgraph/dnnl/dnnl_pow_mul_scalar.cc", - "created_at": "2022-06-24T12:20:50+00:00", - "commented_code": "/*\n * Licensed to the Apache Software Foundation (ASF) under one\n * or more contributor license agreements. See the NOTICE file\n * distributed with this work for additional information\n * regarding copyright ownership. The ASF licenses this file\n * to you under the Apache License, Version 2.0 (the\n * \"License\"); you may not use this file except in compliance\n * with the License. You may obtain a copy of the License at\n *\n * http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing,\n * software distributed under the License is distributed on an\n * \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n * KIND, either express or implied. See the License for the\n * specific language governing permissions and limitations\n * under the License.\n */\n\n/*!\n * \\file dnnl_pow_mul_scalar.cc\n * \\brief DNNL pow_mul_scalar operator based on subgraph\n */\n\n#if MXNET_USE_ONEDNN == 1\n\n#include \n#include \n#include \n\n#include \"operator/nn/dnnl/dnnl_base-inl.h\"\n#include \"operator/nn/dnnl/dnnl_pow_mul_scalar-inl.h\"\n#include \"operator/subgraph/common.h\"\n\nnamespace mxnet {\nnamespace op {\nbool DNNLPowMulScalarType(const nnvm::NodeAttrs& attrs,\n std::vector* in_attrs,\n std::vector* out_attrs) {\n CHECK_EQ(in_attrs->size(), 1U);\n CHECK_EQ(out_attrs->size(), 1U);\n const DNNLPowMulScalarParam& param = nnvm::get(attrs.parsed);\n bool scalar_is_int = param.exp_is_int && param.mul_is_int;\n if (common::is_int(in_attrs->at(0)) && !scalar_is_int) {\n TYPE_ASSIGN_CHECK(*out_attrs, 0, mshadow::kFloat64);\n } else if (in_attrs->at(0) == mshadow::kBool) {\n TYPE_ASSIGN_CHECK(*out_attrs, 0, scalar_is_int ? mshadow::kInt64 : mshadow::kFloat64);\n } else {\n TYPE_ASSIGN_CHECK(*out_attrs, 0, in_attrs->at(0));\n }\n return out_attrs->at(0) != -1;\n}\n\ninline static bool DNNLPowMulScalarStorageType(const nnvm::NodeAttrs& attrs,\n const int dev_mask,\n DispatchMode* dispatch_mode,\n std::vector* in_attrs,\n std::vector* out_attrs) {\n return DNNLStorageType(attrs, dev_mask, true, dispatch_mode, in_attrs, out_attrs);\n}\n\ntemplate \nstatic void ComputeOP(const nnvm::NodeAttrs& attrs,\n const OpContext& ctx,\n mshadow::Stream* s,\n const TBlob& input,\n const TBlob& output,\n const double scalar) {\n using namespace mshadow;\n using namespace mshadow::expr;\n MSHADOW_TYPE_SWITCH(output.type_flag_, DType, {\n auto temp_req = input.dptr_ == output.dptr_ ? kWriteInplace : kWriteTo;\n TBlob temp_tblob = input;\n if (input.type_flag_ != output.type_flag_) {\n temp_tblob = TBlob(ctx.requested[0].get_space_typed(Shape1(output.Size()), s));\n CastCompute(attrs, ctx, {input}, {kWriteTo}, {temp_tblob});\n }\n MXNET_ASSIGN_REQ_SWITCH(temp_req, Req, {\n mxnet_op::Kernel, cpu>::Launch(\n s, input.Size(), output.dptr(), temp_tblob.dptr(), DType(scalar));\n });\n });\n}\n\nstatic void PowMulScalarCompute(const nnvm::NodeAttrs& attrs,\n const OpContext& ctx,\n const std::vector& inputs,\n const std::vector& req,\n const std::vector& outputs) {\n mshadow::Stream* s = ctx.get_stream();\n DCHECK_EQ(inputs.size(), 1);\n DCHECK_EQ(outputs.size(), 1);\n using namespace mshadow;\n using namespace mshadow::expr;\n const DNNLPowMulScalarParam& param = nnvm::get(attrs.parsed);\n // temp_mid_tblob is output of power operation and input of multiplication.\n // Its dtype depends on input dtype and scalar type.\n TBlob temp_mid_tblob =\n ((common::is_int(inputs[0].type_flag_) || inputs[0].type_flag_ == kBool) &&\n !param.exp_is_int) ?\n outputs[0] :\n inputs[0].type_flag_ == kBool ?\n TBlob(ctx.requested[0].get_space_typed(Shape1(inputs[0].Size()), s)) :\n inputs[0];", - "repo_full_name": "apache/mxnet", - "discussion_comments": [ - { - "comment_id": "906009360", - "repo_full_name": "apache/mxnet", - "pr_number": 20976, - "pr_file": "src/operator/subgraph/dnnl/dnnl_pow_mul_scalar.cc", - "discussion_id": "906009360", - "commented_code": "@@ -0,0 +1,151 @@\n+/*\n+ * Licensed to the Apache Software Foundation (ASF) under one\n+ * or more contributor license agreements. See the NOTICE file\n+ * distributed with this work for additional information\n+ * regarding copyright ownership. The ASF licenses this file\n+ * to you under the Apache License, Version 2.0 (the\n+ * \"License\"); you may not use this file except in compliance\n+ * with the License. You may obtain a copy of the License at\n+ *\n+ * http://www.apache.org/licenses/LICENSE-2.0\n+ *\n+ * Unless required by applicable law or agreed to in writing,\n+ * software distributed under the License is distributed on an\n+ * \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n+ * KIND, either express or implied. See the License for the\n+ * specific language governing permissions and limitations\n+ * under the License.\n+ */\n+\n+/*!\n+ * \\file dnnl_pow_mul_scalar.cc\n+ * \\brief DNNL pow_mul_scalar operator based on subgraph\n+ */\n+\n+#if MXNET_USE_ONEDNN == 1\n+\n+#include \n+#include \n+#include \n+\n+#include \"operator/nn/dnnl/dnnl_base-inl.h\"\n+#include \"operator/nn/dnnl/dnnl_pow_mul_scalar-inl.h\"\n+#include \"operator/subgraph/common.h\"\n+\n+namespace mxnet {\n+namespace op {\n+bool DNNLPowMulScalarType(const nnvm::NodeAttrs& attrs,\n+ std::vector* in_attrs,\n+ std::vector* out_attrs) {\n+ CHECK_EQ(in_attrs->size(), 1U);\n+ CHECK_EQ(out_attrs->size(), 1U);\n+ const DNNLPowMulScalarParam& param = nnvm::get(attrs.parsed);\n+ bool scalar_is_int = param.exp_is_int && param.mul_is_int;\n+ if (common::is_int(in_attrs->at(0)) && !scalar_is_int) {\n+ TYPE_ASSIGN_CHECK(*out_attrs, 0, mshadow::kFloat64);\n+ } else if (in_attrs->at(0) == mshadow::kBool) {\n+ TYPE_ASSIGN_CHECK(*out_attrs, 0, scalar_is_int ? mshadow::kInt64 : mshadow::kFloat64);\n+ } else {\n+ TYPE_ASSIGN_CHECK(*out_attrs, 0, in_attrs->at(0));\n+ }\n+ return out_attrs->at(0) != -1;\n+}\n+\n+inline static bool DNNLPowMulScalarStorageType(const nnvm::NodeAttrs& attrs,\n+ const int dev_mask,\n+ DispatchMode* dispatch_mode,\n+ std::vector* in_attrs,\n+ std::vector* out_attrs) {\n+ return DNNLStorageType(attrs, dev_mask, true, dispatch_mode, in_attrs, out_attrs);\n+}\n+\n+template \n+static void ComputeOP(const nnvm::NodeAttrs& attrs,\n+ const OpContext& ctx,\n+ mshadow::Stream* s,\n+ const TBlob& input,\n+ const TBlob& output,\n+ const double scalar) {\n+ using namespace mshadow;\n+ using namespace mshadow::expr;\n+ MSHADOW_TYPE_SWITCH(output.type_flag_, DType, {\n+ auto temp_req = input.dptr_ == output.dptr_ ? kWriteInplace : kWriteTo;\n+ TBlob temp_tblob = input;\n+ if (input.type_flag_ != output.type_flag_) {\n+ temp_tblob = TBlob(ctx.requested[0].get_space_typed(Shape1(output.Size()), s));\n+ CastCompute(attrs, ctx, {input}, {kWriteTo}, {temp_tblob});\n+ }\n+ MXNET_ASSIGN_REQ_SWITCH(temp_req, Req, {\n+ mxnet_op::Kernel, cpu>::Launch(\n+ s, input.Size(), output.dptr(), temp_tblob.dptr(), DType(scalar));\n+ });\n+ });\n+}\n+\n+static void PowMulScalarCompute(const nnvm::NodeAttrs& attrs,\n+ const OpContext& ctx,\n+ const std::vector& inputs,\n+ const std::vector& req,\n+ const std::vector& outputs) {\n+ mshadow::Stream* s = ctx.get_stream();\n+ DCHECK_EQ(inputs.size(), 1);\n+ DCHECK_EQ(outputs.size(), 1);\n+ using namespace mshadow;\n+ using namespace mshadow::expr;\n+ const DNNLPowMulScalarParam& param = nnvm::get(attrs.parsed);\n+ // temp_mid_tblob is output of power operation and input of multiplication.\n+ // Its dtype depends on input dtype and scalar type.\n+ TBlob temp_mid_tblob =\n+ ((common::is_int(inputs[0].type_flag_) || inputs[0].type_flag_ == kBool) &&\n+ !param.exp_is_int) ?\n+ outputs[0] :\n+ inputs[0].type_flag_ == kBool ?\n+ TBlob(ctx.requested[0].get_space_typed(Shape1(inputs[0].Size()), s)) :\n+ inputs[0];", - "comment_created_at": "2022-06-24T12:20:50+00:00", - "comment_author": "bgawrych", - "comment_body": "please use regular if else statements, it is not readable at all", - "pr_file_module": null - }, - { - "comment_id": "906030146", - "repo_full_name": "apache/mxnet", - "pr_number": 20976, - "pr_file": "src/operator/subgraph/dnnl/dnnl_pow_mul_scalar.cc", - "discussion_id": "906009360", - "commented_code": "@@ -0,0 +1,151 @@\n+/*\n+ * Licensed to the Apache Software Foundation (ASF) under one\n+ * or more contributor license agreements. See the NOTICE file\n+ * distributed with this work for additional information\n+ * regarding copyright ownership. The ASF licenses this file\n+ * to you under the Apache License, Version 2.0 (the\n+ * \"License\"); you may not use this file except in compliance\n+ * with the License. You may obtain a copy of the License at\n+ *\n+ * http://www.apache.org/licenses/LICENSE-2.0\n+ *\n+ * Unless required by applicable law or agreed to in writing,\n+ * software distributed under the License is distributed on an\n+ * \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n+ * KIND, either express or implied. See the License for the\n+ * specific language governing permissions and limitations\n+ * under the License.\n+ */\n+\n+/*!\n+ * \\file dnnl_pow_mul_scalar.cc\n+ * \\brief DNNL pow_mul_scalar operator based on subgraph\n+ */\n+\n+#if MXNET_USE_ONEDNN == 1\n+\n+#include \n+#include \n+#include \n+\n+#include \"operator/nn/dnnl/dnnl_base-inl.h\"\n+#include \"operator/nn/dnnl/dnnl_pow_mul_scalar-inl.h\"\n+#include \"operator/subgraph/common.h\"\n+\n+namespace mxnet {\n+namespace op {\n+bool DNNLPowMulScalarType(const nnvm::NodeAttrs& attrs,\n+ std::vector* in_attrs,\n+ std::vector* out_attrs) {\n+ CHECK_EQ(in_attrs->size(), 1U);\n+ CHECK_EQ(out_attrs->size(), 1U);\n+ const DNNLPowMulScalarParam& param = nnvm::get(attrs.parsed);\n+ bool scalar_is_int = param.exp_is_int && param.mul_is_int;\n+ if (common::is_int(in_attrs->at(0)) && !scalar_is_int) {\n+ TYPE_ASSIGN_CHECK(*out_attrs, 0, mshadow::kFloat64);\n+ } else if (in_attrs->at(0) == mshadow::kBool) {\n+ TYPE_ASSIGN_CHECK(*out_attrs, 0, scalar_is_int ? mshadow::kInt64 : mshadow::kFloat64);\n+ } else {\n+ TYPE_ASSIGN_CHECK(*out_attrs, 0, in_attrs->at(0));\n+ }\n+ return out_attrs->at(0) != -1;\n+}\n+\n+inline static bool DNNLPowMulScalarStorageType(const nnvm::NodeAttrs& attrs,\n+ const int dev_mask,\n+ DispatchMode* dispatch_mode,\n+ std::vector* in_attrs,\n+ std::vector* out_attrs) {\n+ return DNNLStorageType(attrs, dev_mask, true, dispatch_mode, in_attrs, out_attrs);\n+}\n+\n+template \n+static void ComputeOP(const nnvm::NodeAttrs& attrs,\n+ const OpContext& ctx,\n+ mshadow::Stream* s,\n+ const TBlob& input,\n+ const TBlob& output,\n+ const double scalar) {\n+ using namespace mshadow;\n+ using namespace mshadow::expr;\n+ MSHADOW_TYPE_SWITCH(output.type_flag_, DType, {\n+ auto temp_req = input.dptr_ == output.dptr_ ? kWriteInplace : kWriteTo;\n+ TBlob temp_tblob = input;\n+ if (input.type_flag_ != output.type_flag_) {\n+ temp_tblob = TBlob(ctx.requested[0].get_space_typed(Shape1(output.Size()), s));\n+ CastCompute(attrs, ctx, {input}, {kWriteTo}, {temp_tblob});\n+ }\n+ MXNET_ASSIGN_REQ_SWITCH(temp_req, Req, {\n+ mxnet_op::Kernel, cpu>::Launch(\n+ s, input.Size(), output.dptr(), temp_tblob.dptr(), DType(scalar));\n+ });\n+ });\n+}\n+\n+static void PowMulScalarCompute(const nnvm::NodeAttrs& attrs,\n+ const OpContext& ctx,\n+ const std::vector& inputs,\n+ const std::vector& req,\n+ const std::vector& outputs) {\n+ mshadow::Stream* s = ctx.get_stream();\n+ DCHECK_EQ(inputs.size(), 1);\n+ DCHECK_EQ(outputs.size(), 1);\n+ using namespace mshadow;\n+ using namespace mshadow::expr;\n+ const DNNLPowMulScalarParam& param = nnvm::get(attrs.parsed);\n+ // temp_mid_tblob is output of power operation and input of multiplication.\n+ // Its dtype depends on input dtype and scalar type.\n+ TBlob temp_mid_tblob =\n+ ((common::is_int(inputs[0].type_flag_) || inputs[0].type_flag_ == kBool) &&\n+ !param.exp_is_int) ?\n+ outputs[0] :\n+ inputs[0].type_flag_ == kBool ?\n+ TBlob(ctx.requested[0].get_space_typed(Shape1(inputs[0].Size()), s)) :\n+ inputs[0];", - "comment_created_at": "2022-06-24T12:49:21+00:00", - "comment_author": "bgawrych", - "comment_body": "also are you sure that inputs[0] should be here as temp_mid_tblob?", - "pr_file_module": null - }, - { - "comment_id": "906108836", - "repo_full_name": "apache/mxnet", - "pr_number": 20976, - "pr_file": "src/operator/subgraph/dnnl/dnnl_pow_mul_scalar.cc", - "discussion_id": "906009360", - "commented_code": "@@ -0,0 +1,151 @@\n+/*\n+ * Licensed to the Apache Software Foundation (ASF) under one\n+ * or more contributor license agreements. See the NOTICE file\n+ * distributed with this work for additional information\n+ * regarding copyright ownership. The ASF licenses this file\n+ * to you under the Apache License, Version 2.0 (the\n+ * \"License\"); you may not use this file except in compliance\n+ * with the License. You may obtain a copy of the License at\n+ *\n+ * http://www.apache.org/licenses/LICENSE-2.0\n+ *\n+ * Unless required by applicable law or agreed to in writing,\n+ * software distributed under the License is distributed on an\n+ * \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n+ * KIND, either express or implied. See the License for the\n+ * specific language governing permissions and limitations\n+ * under the License.\n+ */\n+\n+/*!\n+ * \\file dnnl_pow_mul_scalar.cc\n+ * \\brief DNNL pow_mul_scalar operator based on subgraph\n+ */\n+\n+#if MXNET_USE_ONEDNN == 1\n+\n+#include \n+#include \n+#include \n+\n+#include \"operator/nn/dnnl/dnnl_base-inl.h\"\n+#include \"operator/nn/dnnl/dnnl_pow_mul_scalar-inl.h\"\n+#include \"operator/subgraph/common.h\"\n+\n+namespace mxnet {\n+namespace op {\n+bool DNNLPowMulScalarType(const nnvm::NodeAttrs& attrs,\n+ std::vector* in_attrs,\n+ std::vector* out_attrs) {\n+ CHECK_EQ(in_attrs->size(), 1U);\n+ CHECK_EQ(out_attrs->size(), 1U);\n+ const DNNLPowMulScalarParam& param = nnvm::get(attrs.parsed);\n+ bool scalar_is_int = param.exp_is_int && param.mul_is_int;\n+ if (common::is_int(in_attrs->at(0)) && !scalar_is_int) {\n+ TYPE_ASSIGN_CHECK(*out_attrs, 0, mshadow::kFloat64);\n+ } else if (in_attrs->at(0) == mshadow::kBool) {\n+ TYPE_ASSIGN_CHECK(*out_attrs, 0, scalar_is_int ? mshadow::kInt64 : mshadow::kFloat64);\n+ } else {\n+ TYPE_ASSIGN_CHECK(*out_attrs, 0, in_attrs->at(0));\n+ }\n+ return out_attrs->at(0) != -1;\n+}\n+\n+inline static bool DNNLPowMulScalarStorageType(const nnvm::NodeAttrs& attrs,\n+ const int dev_mask,\n+ DispatchMode* dispatch_mode,\n+ std::vector* in_attrs,\n+ std::vector* out_attrs) {\n+ return DNNLStorageType(attrs, dev_mask, true, dispatch_mode, in_attrs, out_attrs);\n+}\n+\n+template \n+static void ComputeOP(const nnvm::NodeAttrs& attrs,\n+ const OpContext& ctx,\n+ mshadow::Stream* s,\n+ const TBlob& input,\n+ const TBlob& output,\n+ const double scalar) {\n+ using namespace mshadow;\n+ using namespace mshadow::expr;\n+ MSHADOW_TYPE_SWITCH(output.type_flag_, DType, {\n+ auto temp_req = input.dptr_ == output.dptr_ ? kWriteInplace : kWriteTo;\n+ TBlob temp_tblob = input;\n+ if (input.type_flag_ != output.type_flag_) {\n+ temp_tblob = TBlob(ctx.requested[0].get_space_typed(Shape1(output.Size()), s));\n+ CastCompute(attrs, ctx, {input}, {kWriteTo}, {temp_tblob});\n+ }\n+ MXNET_ASSIGN_REQ_SWITCH(temp_req, Req, {\n+ mxnet_op::Kernel, cpu>::Launch(\n+ s, input.Size(), output.dptr(), temp_tblob.dptr(), DType(scalar));\n+ });\n+ });\n+}\n+\n+static void PowMulScalarCompute(const nnvm::NodeAttrs& attrs,\n+ const OpContext& ctx,\n+ const std::vector& inputs,\n+ const std::vector& req,\n+ const std::vector& outputs) {\n+ mshadow::Stream* s = ctx.get_stream();\n+ DCHECK_EQ(inputs.size(), 1);\n+ DCHECK_EQ(outputs.size(), 1);\n+ using namespace mshadow;\n+ using namespace mshadow::expr;\n+ const DNNLPowMulScalarParam& param = nnvm::get(attrs.parsed);\n+ // temp_mid_tblob is output of power operation and input of multiplication.\n+ // Its dtype depends on input dtype and scalar type.\n+ TBlob temp_mid_tblob =\n+ ((common::is_int(inputs[0].type_flag_) || inputs[0].type_flag_ == kBool) &&\n+ !param.exp_is_int) ?\n+ outputs[0] :\n+ inputs[0].type_flag_ == kBool ?\n+ TBlob(ctx.requested[0].get_space_typed(Shape1(inputs[0].Size()), s)) :\n+ inputs[0];", - "comment_created_at": "2022-06-24T14:14:58+00:00", - "comment_author": "bartekkuncer", - "comment_body": "To me there is no \"should\". If the parameters of the operation are right (output of the power op is of the same dtype as the input) I believe it can be used as a temporary buffer.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "903418937", - "pr_number": 21034, - "pr_file": "src/operator/mshadow_op.h", - "created_at": "2022-06-22T08:02:08+00:00", - "commented_code": "(1.0f + math::erf(static_cast(a) / SQRT_2))));\n\nMXNET_BINARY_MATH_OP_NC(gelu_grad,\n DType(0.5f * (1.0f + math::erf(static_cast(a) / SQRT_2) +\n static_cast(a) *\n erf_grad::Map(static_cast(a) / SQRT_2) / SQRT_2)));\n DType(static_cast(b) / static_cast(a) +\n 0.5f * static_cast(a) *\n erf_grad::Map(static_cast(a) / SQRT_2) / SQRT_2));\n\nMXNET_UNARY_MATH_OP(gelu_tanh,\n DType(0.5f * static_cast(a) *\n (1.0f + math::tanh(math::sqrt(2.0f / PI) *\n (static_cast(a) +\n 0.044715 * math::pow(static_cast(a), 3))))));\n\nMXNET_BINARY_MATH_OP_NC(\n gelu_tanh_grad,\n DType(static_cast(b) *\n (1.0f / static_cast(a) +\n (1.0f -\n math::tanh(math::sqrt(2.0f / PI) *\n (static_cast(a) + 0.044715 * math::pow(static_cast(a), 3))) *\n (math::sqrt(2.0f / PI) *\n (1.0f + 0.134145 * math::pow(static_cast(a), 2)))))));", - "repo_full_name": "apache/mxnet", - "discussion_comments": [ - { - "comment_id": "903418937", - "repo_full_name": "apache/mxnet", - "pr_number": 21034, - "pr_file": "src/operator/mshadow_op.h", - "discussion_id": "903418937", - "commented_code": "@@ -617,9 +617,25 @@ MXNET_UNARY_MATH_OP(gelu,\n (1.0f + math::erf(static_cast(a) / SQRT_2))));\n \n MXNET_BINARY_MATH_OP_NC(gelu_grad,\n- DType(0.5f * (1.0f + math::erf(static_cast(a) / SQRT_2) +\n- static_cast(a) *\n- erf_grad::Map(static_cast(a) / SQRT_2) / SQRT_2)));\n+ DType(static_cast(b) / static_cast(a) +\n+ 0.5f * static_cast(a) *\n+ erf_grad::Map(static_cast(a) / SQRT_2) / SQRT_2));\n+\n+MXNET_UNARY_MATH_OP(gelu_tanh,\n+ DType(0.5f * static_cast(a) *\n+ (1.0f + math::tanh(math::sqrt(2.0f / PI) *\n+ (static_cast(a) +\n+ 0.044715 * math::pow(static_cast(a), 3))))));\n+\n+MXNET_BINARY_MATH_OP_NC(\n+ gelu_tanh_grad,\n+ DType(static_cast(b) *\n+ (1.0f / static_cast(a) +\n+ (1.0f -\n+ math::tanh(math::sqrt(2.0f / PI) *\n+ (static_cast(a) + 0.044715 * math::pow(static_cast(a), 3))) *\n+ (math::sqrt(2.0f / PI) *\n+ (1.0f + 0.134145 * math::pow(static_cast(a), 2)))))));", - "comment_created_at": "2022-06-22T08:02:08+00:00", - "comment_author": "RafLit", - "comment_body": "It would be cleaner to define 0.044715 and 0.134145 as constants.", - "pr_file_module": null - }, - { - "comment_id": "904916624", - "repo_full_name": "apache/mxnet", - "pr_number": 21034, - "pr_file": "src/operator/mshadow_op.h", - "discussion_id": "903418937", - "commented_code": "@@ -617,9 +617,25 @@ MXNET_UNARY_MATH_OP(gelu,\n (1.0f + math::erf(static_cast(a) / SQRT_2))));\n \n MXNET_BINARY_MATH_OP_NC(gelu_grad,\n- DType(0.5f * (1.0f + math::erf(static_cast(a) / SQRT_2) +\n- static_cast(a) *\n- erf_grad::Map(static_cast(a) / SQRT_2) / SQRT_2)));\n+ DType(static_cast(b) / static_cast(a) +\n+ 0.5f * static_cast(a) *\n+ erf_grad::Map(static_cast(a) / SQRT_2) / SQRT_2));\n+\n+MXNET_UNARY_MATH_OP(gelu_tanh,\n+ DType(0.5f * static_cast(a) *\n+ (1.0f + math::tanh(math::sqrt(2.0f / PI) *\n+ (static_cast(a) +\n+ 0.044715 * math::pow(static_cast(a), 3))))));\n+\n+MXNET_BINARY_MATH_OP_NC(\n+ gelu_tanh_grad,\n+ DType(static_cast(b) *\n+ (1.0f / static_cast(a) +\n+ (1.0f -\n+ math::tanh(math::sqrt(2.0f / PI) *\n+ (static_cast(a) + 0.044715 * math::pow(static_cast(a), 3))) *\n+ (math::sqrt(2.0f / PI) *\n+ (1.0f + 0.134145 * math::pow(static_cast(a), 2)))))));", - "comment_created_at": "2022-06-23T11:40:54+00:00", - "comment_author": "bgawrych", - "comment_body": "done", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "746508919", - "pr_number": 20724, - "pr_file": "src/operator/subgraph/dnnl/dnnl_post_quantize_property.h", - "created_at": "2021-11-10T11:43:44+00:00", - "commented_code": "#define MXNET_OPERATOR_SUBGRAPH_DNNL_DNNL_POST_QUANTIZE_PROPERTY_H_\n#if MXNET_USE_ONEDNN == 1\n\n#include \n#include \n#include \n#include \n\n#include \"../../nn/dnnl/dnnl_convolution-inl.h\"\n#include \"../../nn/fully_connected-inl.h\"\n#include \"../../quantization/requantize-inl.h\"\n#include \"../../tensor/elemwise_binary_op-inl.h\"\n#include \"../common.h\"\n#include \"dnnl_conv-inl.h\"\n#include \"dnnl_subgraph_base-inl.h\"\n\nnamespace mxnet {\nnamespace op {\n\nclass SgDNNLPostQuantizeSelector : public SubgraphSelector {\nconst std::set support_req_fusion_op = {\"_contrib_quantized_elemwise_add\",\n \"_contrib_quantized_elemwise_mul\",\n \"_contrib_quantized_npi_add\",\n \"_sg_onednn_conv\",\n \"_sg_onednn_fully_connected\",\n \"_sg_onednn_selfatt_qk\",\n \"_sg_onednn_selfatt_valatt\",\n \"_sg_onednn_batch_dot\"};\n\nclass SgDNNLPostQuantizeSelector : public SubgraphSelectorV2 {\n public:\n /*! \\brief pattern match status */\n enum SelectStatus {\n kFail = 0,\n kStart,\n kRequantize,\n kSuccess,\n };\n\n private:\n bool disable_fuse_all;\n bool disable_float_output;\n SelectStatus status;\n std::vector matched_list;\n std::vector matched_list;\n std::set support_requantize_fusion_op_name;\n\n public:\n SgDNNLPostQuantizeSelector() {\n support_requantize_fusion_op_name.insert(\"_sg_onednn_conv\");\n support_requantize_fusion_op_name.insert(\"_contrib_quantized_elemwise_add\");\n support_requantize_fusion_op_name.insert(\"_contrib_quantized_npi_add\");\n explicit SgDNNLPostQuantizeSelector(const bool dis_fuse_all, const bool dis_float_output)\n : disable_fuse_all(dis_fuse_all), disable_float_output(dis_float_output) {\n support_requantize_fusion_op_name = support_req_fusion_op;\n }\n\n bool Select(const nnvm::Node& n) override {\n if (n.op() && support_requantize_fusion_op_name.count(n.op()->name)) {\n if (n.op() == Op::Get(\"_sg_onednn_conv\")) {\n auto const& param = nnvm::get(n.attrs.parsed);\n if (param.full_conv_param.dnnl_param.quantized) {\n status = kStart;\n matched_list.clear();\n matched_list.push_back(&n);\n return true;\n }\n } else if (n.op()->name == \"_contrib_quantized_elemwise_add\" ||\n n.op()->name == \"_contrib_quantized_npi_add\") {\n status = kStart;\n matched_list.clear();\n matched_list.push_back(&n);\n return true;\n }\n bool Select(const BiDirectedNode& n) override {\n const nnvm::Node* raw_node = n.node;\n if ((!disable_fuse_all) && raw_node->op() &&\n support_requantize_fusion_op_name.count(raw_node->op()->name)) {\n status = kStart;\n matched_list.clear();\n matched_list.push_back(&n);\n return true;\n }\n return false;\n }\n\n bool SelectInput(const nnvm::Node& n, const nnvm::Node& new_node) override {\n bool SelectInput(const BiDirectedNode& n, const BiDirectedNode& new_node) override {\n return false;\n }\n\n bool SelectOutput(const nnvm::Node& n, const nnvm::Node& new_node) override {\n if (status == kFail || status == kSuccess || new_node.is_variable())\n bool SelectOutput(const BiDirectedNode& n, const BiDirectedNode& new_node) override {\n const nnvm::Node* raw_node = n.node;\n const nnvm::Node* raw_new_node = new_node.node;\n if (status == kFail || status == kSuccess || raw_new_node->is_variable())\n return false;\n // If n isn't the last matched node, then we encoutered a internal\n // branch, we should pop out the node behind n and stop fusion.\n if (matched_list.back() != &n) {\n status = kFail;\n if (std::find(matched_list.begin(), matched_list.end(), &n) != matched_list.end()) {\n while (matched_list.back() != &n) {\n matched_list.pop_back();\n }\n }\n status = kSuccess;\n return false;\n }\n if (new_node.op()->name == \"_contrib_requantize\") {\n auto const& param = nnvm::get(new_node.attrs.parsed);\n if (param.min_calib_range.has_value() && param.max_calib_range.has_value()) {\n matched_list.push_back(&new_node);\n\n switch (status) {\n case kStart:\n if (raw_new_node->op() == Op::Get(\"_contrib_requantize\")) {\n auto const& param = nnvm::get(raw_new_node->attrs.parsed);\n if (param.min_calib_range.has_value() && param.max_calib_range.has_value()) {\n matched_list.push_back(&new_node);\n status = kRequantize;\n if (raw_node->op() == Op::Get(\"_sg_onednn_conv\")) {\n status = kSuccess;\n }\n return true;\n }\n }\n case kRequantize:\n if (!disable_float_output && raw_new_node->op() == Op::Get(\"_contrib_dequantize\")) {\n CHECK(raw_node->op() == Op::Get(\"_contrib_requantize\"));\n if (n.outputs.size() > 1) {\n // check if requantize have other outputs than dequantize\n // if it has we can't fuse dequantize\n for (auto kv : n.outputs) {", - "repo_full_name": "apache/mxnet", - "discussion_comments": [ - { - "comment_id": "746508919", - "repo_full_name": "apache/mxnet", - "pr_number": 20724, - "pr_file": "src/operator/subgraph/dnnl/dnnl_post_quantize_property.h", - "discussion_id": "746508919", - "commented_code": "@@ -20,146 +20,209 @@\n #define MXNET_OPERATOR_SUBGRAPH_DNNL_DNNL_POST_QUANTIZE_PROPERTY_H_\n #if MXNET_USE_ONEDNN == 1\n \n+#include \n #include \n #include \n #include \n \n #include \"../../nn/dnnl/dnnl_convolution-inl.h\"\n+#include \"../../nn/fully_connected-inl.h\"\n #include \"../../quantization/requantize-inl.h\"\n+#include \"../../tensor/elemwise_binary_op-inl.h\"\n #include \"../common.h\"\n #include \"dnnl_conv-inl.h\"\n #include \"dnnl_subgraph_base-inl.h\"\n \n namespace mxnet {\n namespace op {\n \n-class SgDNNLPostQuantizeSelector : public SubgraphSelector {\n+const std::set support_req_fusion_op = {\"_contrib_quantized_elemwise_add\",\n+ \"_contrib_quantized_elemwise_mul\",\n+ \"_contrib_quantized_npi_add\",\n+ \"_sg_onednn_conv\",\n+ \"_sg_onednn_fully_connected\",\n+ \"_sg_onednn_selfatt_qk\",\n+ \"_sg_onednn_selfatt_valatt\",\n+ \"_sg_onednn_batch_dot\"};\n+\n+class SgDNNLPostQuantizeSelector : public SubgraphSelectorV2 {\n public:\n /*! \\brief pattern match status */\n enum SelectStatus {\n kFail = 0,\n kStart,\n+ kRequantize,\n kSuccess,\n };\n \n private:\n+ bool disable_fuse_all;\n+ bool disable_float_output;\n SelectStatus status;\n- std::vector matched_list;\n+ std::vector matched_list;\n std::set support_requantize_fusion_op_name;\n \n public:\n- SgDNNLPostQuantizeSelector() {\n- support_requantize_fusion_op_name.insert(\"_sg_onednn_conv\");\n- support_requantize_fusion_op_name.insert(\"_contrib_quantized_elemwise_add\");\n- support_requantize_fusion_op_name.insert(\"_contrib_quantized_npi_add\");\n+ explicit SgDNNLPostQuantizeSelector(const bool dis_fuse_all, const bool dis_float_output)\n+ : disable_fuse_all(dis_fuse_all), disable_float_output(dis_float_output) {\n+ support_requantize_fusion_op_name = support_req_fusion_op;\n }\n \n- bool Select(const nnvm::Node& n) override {\n- if (n.op() && support_requantize_fusion_op_name.count(n.op()->name)) {\n- if (n.op() == Op::Get(\"_sg_onednn_conv\")) {\n- auto const& param = nnvm::get(n.attrs.parsed);\n- if (param.full_conv_param.dnnl_param.quantized) {\n- status = kStart;\n- matched_list.clear();\n- matched_list.push_back(&n);\n- return true;\n- }\n- } else if (n.op()->name == \"_contrib_quantized_elemwise_add\" ||\n- n.op()->name == \"_contrib_quantized_npi_add\") {\n- status = kStart;\n- matched_list.clear();\n- matched_list.push_back(&n);\n- return true;\n- }\n+ bool Select(const BiDirectedNode& n) override {\n+ const nnvm::Node* raw_node = n.node;\n+ if ((!disable_fuse_all) && raw_node->op() &&\n+ support_requantize_fusion_op_name.count(raw_node->op()->name)) {\n+ status = kStart;\n+ matched_list.clear();\n+ matched_list.push_back(&n);\n+ return true;\n }\n return false;\n }\n \n- bool SelectInput(const nnvm::Node& n, const nnvm::Node& new_node) override {\n+ bool SelectInput(const BiDirectedNode& n, const BiDirectedNode& new_node) override {\n return false;\n }\n \n- bool SelectOutput(const nnvm::Node& n, const nnvm::Node& new_node) override {\n- if (status == kFail || status == kSuccess || new_node.is_variable())\n+ bool SelectOutput(const BiDirectedNode& n, const BiDirectedNode& new_node) override {\n+ const nnvm::Node* raw_node = n.node;\n+ const nnvm::Node* raw_new_node = new_node.node;\n+ if (status == kFail || status == kSuccess || raw_new_node->is_variable())\n return false;\n // If n isn't the last matched node, then we encoutered a internal\n // branch, we should pop out the node behind n and stop fusion.\n if (matched_list.back() != &n) {\n- status = kFail;\n+ if (std::find(matched_list.begin(), matched_list.end(), &n) != matched_list.end()) {\n+ while (matched_list.back() != &n) {\n+ matched_list.pop_back();\n+ }\n+ }\n+ status = kSuccess;\n return false;\n }\n- if (new_node.op()->name == \"_contrib_requantize\") {\n- auto const& param = nnvm::get(new_node.attrs.parsed);\n- if (param.min_calib_range.has_value() && param.max_calib_range.has_value()) {\n- matched_list.push_back(&new_node);\n+\n+ switch (status) {\n+ case kStart:\n+ if (raw_new_node->op() == Op::Get(\"_contrib_requantize\")) {\n+ auto const& param = nnvm::get(raw_new_node->attrs.parsed);\n+ if (param.min_calib_range.has_value() && param.max_calib_range.has_value()) {\n+ matched_list.push_back(&new_node);\n+ status = kRequantize;\n+ if (raw_node->op() == Op::Get(\"_sg_onednn_conv\")) {\n+ status = kSuccess;\n+ }\n+ return true;\n+ }\n+ }\n+ case kRequantize:\n+ if (!disable_float_output && raw_new_node->op() == Op::Get(\"_contrib_dequantize\")) {\n+ CHECK(raw_node->op() == Op::Get(\"_contrib_requantize\"));\n+ if (n.outputs.size() > 1) {\n+ // check if requantize have other outputs than dequantize\n+ // if it has we can't fuse dequantize\n+ for (auto kv : n.outputs) {", - "comment_created_at": "2021-11-10T11:43:44+00:00", - "comment_author": "mozga-intel", - "comment_body": "How about? If an object is const-> then `kv` is const, otherwise, the `kv` might be non-const.\r\n```suggestion\r\n for (const auto kv : n.outputs) {\r\n```", - "pr_file_module": null - }, - { - "comment_id": "750712116", - "repo_full_name": "apache/mxnet", - "pr_number": 20724, - "pr_file": "src/operator/subgraph/dnnl/dnnl_post_quantize_property.h", - "discussion_id": "746508919", - "commented_code": "@@ -20,146 +20,209 @@\n #define MXNET_OPERATOR_SUBGRAPH_DNNL_DNNL_POST_QUANTIZE_PROPERTY_H_\n #if MXNET_USE_ONEDNN == 1\n \n+#include \n #include \n #include \n #include \n \n #include \"../../nn/dnnl/dnnl_convolution-inl.h\"\n+#include \"../../nn/fully_connected-inl.h\"\n #include \"../../quantization/requantize-inl.h\"\n+#include \"../../tensor/elemwise_binary_op-inl.h\"\n #include \"../common.h\"\n #include \"dnnl_conv-inl.h\"\n #include \"dnnl_subgraph_base-inl.h\"\n \n namespace mxnet {\n namespace op {\n \n-class SgDNNLPostQuantizeSelector : public SubgraphSelector {\n+const std::set support_req_fusion_op = {\"_contrib_quantized_elemwise_add\",\n+ \"_contrib_quantized_elemwise_mul\",\n+ \"_contrib_quantized_npi_add\",\n+ \"_sg_onednn_conv\",\n+ \"_sg_onednn_fully_connected\",\n+ \"_sg_onednn_selfatt_qk\",\n+ \"_sg_onednn_selfatt_valatt\",\n+ \"_sg_onednn_batch_dot\"};\n+\n+class SgDNNLPostQuantizeSelector : public SubgraphSelectorV2 {\n public:\n /*! \\brief pattern match status */\n enum SelectStatus {\n kFail = 0,\n kStart,\n+ kRequantize,\n kSuccess,\n };\n \n private:\n+ bool disable_fuse_all;\n+ bool disable_float_output;\n SelectStatus status;\n- std::vector matched_list;\n+ std::vector matched_list;\n std::set support_requantize_fusion_op_name;\n \n public:\n- SgDNNLPostQuantizeSelector() {\n- support_requantize_fusion_op_name.insert(\"_sg_onednn_conv\");\n- support_requantize_fusion_op_name.insert(\"_contrib_quantized_elemwise_add\");\n- support_requantize_fusion_op_name.insert(\"_contrib_quantized_npi_add\");\n+ explicit SgDNNLPostQuantizeSelector(const bool dis_fuse_all, const bool dis_float_output)\n+ : disable_fuse_all(dis_fuse_all), disable_float_output(dis_float_output) {\n+ support_requantize_fusion_op_name = support_req_fusion_op;\n }\n \n- bool Select(const nnvm::Node& n) override {\n- if (n.op() && support_requantize_fusion_op_name.count(n.op()->name)) {\n- if (n.op() == Op::Get(\"_sg_onednn_conv\")) {\n- auto const& param = nnvm::get(n.attrs.parsed);\n- if (param.full_conv_param.dnnl_param.quantized) {\n- status = kStart;\n- matched_list.clear();\n- matched_list.push_back(&n);\n- return true;\n- }\n- } else if (n.op()->name == \"_contrib_quantized_elemwise_add\" ||\n- n.op()->name == \"_contrib_quantized_npi_add\") {\n- status = kStart;\n- matched_list.clear();\n- matched_list.push_back(&n);\n- return true;\n- }\n+ bool Select(const BiDirectedNode& n) override {\n+ const nnvm::Node* raw_node = n.node;\n+ if ((!disable_fuse_all) && raw_node->op() &&\n+ support_requantize_fusion_op_name.count(raw_node->op()->name)) {\n+ status = kStart;\n+ matched_list.clear();\n+ matched_list.push_back(&n);\n+ return true;\n }\n return false;\n }\n \n- bool SelectInput(const nnvm::Node& n, const nnvm::Node& new_node) override {\n+ bool SelectInput(const BiDirectedNode& n, const BiDirectedNode& new_node) override {\n return false;\n }\n \n- bool SelectOutput(const nnvm::Node& n, const nnvm::Node& new_node) override {\n- if (status == kFail || status == kSuccess || new_node.is_variable())\n+ bool SelectOutput(const BiDirectedNode& n, const BiDirectedNode& new_node) override {\n+ const nnvm::Node* raw_node = n.node;\n+ const nnvm::Node* raw_new_node = new_node.node;\n+ if (status == kFail || status == kSuccess || raw_new_node->is_variable())\n return false;\n // If n isn't the last matched node, then we encoutered a internal\n // branch, we should pop out the node behind n and stop fusion.\n if (matched_list.back() != &n) {\n- status = kFail;\n+ if (std::find(matched_list.begin(), matched_list.end(), &n) != matched_list.end()) {\n+ while (matched_list.back() != &n) {\n+ matched_list.pop_back();\n+ }\n+ }\n+ status = kSuccess;\n return false;\n }\n- if (new_node.op()->name == \"_contrib_requantize\") {\n- auto const& param = nnvm::get(new_node.attrs.parsed);\n- if (param.min_calib_range.has_value() && param.max_calib_range.has_value()) {\n- matched_list.push_back(&new_node);\n+\n+ switch (status) {\n+ case kStart:\n+ if (raw_new_node->op() == Op::Get(\"_contrib_requantize\")) {\n+ auto const& param = nnvm::get(raw_new_node->attrs.parsed);\n+ if (param.min_calib_range.has_value() && param.max_calib_range.has_value()) {\n+ matched_list.push_back(&new_node);\n+ status = kRequantize;\n+ if (raw_node->op() == Op::Get(\"_sg_onednn_conv\")) {\n+ status = kSuccess;\n+ }\n+ return true;\n+ }\n+ }\n+ case kRequantize:\n+ if (!disable_float_output && raw_new_node->op() == Op::Get(\"_contrib_dequantize\")) {\n+ CHECK(raw_node->op() == Op::Get(\"_contrib_requantize\"));\n+ if (n.outputs.size() > 1) {\n+ // check if requantize have other outputs than dequantize\n+ // if it has we can't fuse dequantize\n+ for (auto kv : n.outputs) {", - "comment_created_at": "2021-11-16T22:27:00+00:00", - "comment_author": "DominikaJedynak", - "comment_body": "kv.first, which is the only place where kv is used, is marked const, but I can make kv const as well for clarity", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "670199194", - "pr_number": 20430, - "pr_file": "src/operator/quantization/quantize_graph_pass.h", - "created_at": "2021-07-15T07:18:28+00:00", - "commented_code": "/*\n * Licensed to the Apache Software Foundation (ASF) under one\n * or more contributor license agreements. See the NOTICE file\n * distributed with this work for additional information\n * regarding copyright ownership. The ASF licenses this file\n * to you under the Apache License, Version 2.0 (the\n * \"License\"); you may not use this file except in compliance\n * with the License. You may obtain a copy of the License at\n *\n * http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing,\n * software distributed under the License is distributed on an\n * \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n * KIND, either express or implied. See the License for the\n * specific language governing permissions and limitations\n * under the License.\n */\n\n/*!\n * Copyright (c) 2021 by Contributors\n * \\file quantize_graph_pass.h\n * \\brief\n */\n#ifndef MXNET_OPERATOR_QUANTIZATION_QUANTIZE_GRAPH_PASS_H_\n#define MXNET_OPERATOR_QUANTIZATION_QUANTIZE_GRAPH_PASS_H_\n\n#include \n#include \n#include \n#include \n#include \n#include \n#include \n#include \n#include \"quantize_v2-inl.h\"\n#include \"../nn/mkldnn/mkldnn_fully_connected-inl.h\"\n#include \"../../common/utils.h\"\n\nnamespace mxnet {\nnamespace op {\n\nusing nnvm::Symbol;\nusing nnvm::Node;\nusing nnvm::ObjectPtr;\nusing nnvm::NodeEntry;\nusing nnvm::Graph;\n\ninline ObjectPtr CreateNode(std::string op_name, std::string node_name) {\n ObjectPtr node = Node::Create();\n node->attrs.name = node_name;\n if (op_name == \"nullptr\") {\n node->attrs.op = nullptr;\n // ugly workaround because VariableParam is not exposed\n node->attrs.parsed =\n nnvm::Symbol::CreateVariable(node->attrs.name).outputs[0].node->attrs.parsed;\n } else {\n node->attrs.op = Op::Get(op_name);\n }\n return node;\n}\n\ntemplate \nstatic inline bool IsOneDNNFullyConnected(const ObjectPtr& n) {\n#if MXNET_USE_MKLDNN == 1\n if (n->op() == Op::Get(\"_sg_mkldnn_fully_connected\")) {\n auto const& param = nnvm::get(n->attrs.parsed);\n if (!(param.mkldnn_param.channel_wise_quantize.has_value() &&\n param.mkldnn_param.channel_wise_quantize.value())) {\n return !require_bias || (param.default_param.no_bias == false &&\n n->inputs[2].node->is_variable());\n }\n }\n#endif\n return false;\n}\n\nstatic inline bool IsQuantize(const ObjectPtr& n) {\n if (n->op() == Op::Get(\"_contrib_quantize_v2\")) {\n auto const ¶m = nnvm::get(n->attrs.parsed);\n if (param.min_calib_range.has_value() &&\n param.min_calib_range.value() < 0.0f) {\n return true;\n }\n }\n return false;\n}\n\nstatic NDArray* FindInArgByName(const Graph &g, const std::string& name) {\n const std::vector& in_arg_names =\n g.GetAttr>(\"in_arg_names\");\n size_t i = std::distance(in_arg_names.begin(),\n std::find(in_arg_names.begin(), in_arg_names.end(), name));\n if (i == in_arg_names.size()) {\n LOG(FATAL) << name << \" not found in in_arg_names\";\n }\n return g.GetAttr(\"in_args\")[i];\n}\n\n// Rescales weights, min_weight and max_weight. Returns bias_int32_rescale.\nstatic inline float RescaleWeights(const Graph &g, const ObjectPtr &fc, NDArray* weight_tensor) {\n ObjectPtr &quantize = fc->inputs[0].node;\n auto min_data = std::stof(quantize->attrs.dict.at(\"min_calib_range\"));\n auto max_data = std::stof(quantize->attrs.dict.at(\"max_calib_range\"));\n\n FCInputIndex id(nnvm::get(fc->attrs.parsed));\n auto in = fc->inputs;\n\n float *min_weight = FindInArgByName(g, in[id.weight_min].node->attrs.name)->data().dptr();", - "repo_full_name": "apache/mxnet", - "discussion_comments": [ - { - "comment_id": "670199194", - "repo_full_name": "apache/mxnet", - "pr_number": 20430, - "pr_file": "src/operator/quantization/quantize_graph_pass.h", - "discussion_id": "670199194", - "commented_code": "@@ -0,0 +1,155 @@\n+/*\n+ * Licensed to the Apache Software Foundation (ASF) under one\n+ * or more contributor license agreements. See the NOTICE file\n+ * distributed with this work for additional information\n+ * regarding copyright ownership. The ASF licenses this file\n+ * to you under the Apache License, Version 2.0 (the\n+ * \"License\"); you may not use this file except in compliance\n+ * with the License. You may obtain a copy of the License at\n+ *\n+ * http://www.apache.org/licenses/LICENSE-2.0\n+ *\n+ * Unless required by applicable law or agreed to in writing,\n+ * software distributed under the License is distributed on an\n+ * \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n+ * KIND, either express or implied. See the License for the\n+ * specific language governing permissions and limitations\n+ * under the License.\n+ */\n+\n+/*!\n+ * Copyright (c) 2021 by Contributors\n+ * \\file quantize_graph_pass.h\n+ * \\brief\n+ */\n+#ifndef MXNET_OPERATOR_QUANTIZATION_QUANTIZE_GRAPH_PASS_H_\n+#define MXNET_OPERATOR_QUANTIZATION_QUANTIZE_GRAPH_PASS_H_\n+\n+#include \n+#include \n+#include \n+#include \n+#include \n+#include \n+#include \n+#include \n+#include \"quantize_v2-inl.h\"\n+#include \"../nn/mkldnn/mkldnn_fully_connected-inl.h\"\n+#include \"../../common/utils.h\"\n+\n+namespace mxnet {\n+namespace op {\n+\n+using nnvm::Symbol;\n+using nnvm::Node;\n+using nnvm::ObjectPtr;\n+using nnvm::NodeEntry;\n+using nnvm::Graph;\n+\n+inline ObjectPtr CreateNode(std::string op_name, std::string node_name) {\n+ ObjectPtr node = Node::Create();\n+ node->attrs.name = node_name;\n+ if (op_name == \"nullptr\") {\n+ node->attrs.op = nullptr;\n+ // ugly workaround because VariableParam is not exposed\n+ node->attrs.parsed =\n+ nnvm::Symbol::CreateVariable(node->attrs.name).outputs[0].node->attrs.parsed;\n+ } else {\n+ node->attrs.op = Op::Get(op_name);\n+ }\n+ return node;\n+}\n+\n+template \n+static inline bool IsOneDNNFullyConnected(const ObjectPtr& n) {\n+#if MXNET_USE_MKLDNN == 1\n+ if (n->op() == Op::Get(\"_sg_mkldnn_fully_connected\")) {\n+ auto const& param = nnvm::get(n->attrs.parsed);\n+ if (!(param.mkldnn_param.channel_wise_quantize.has_value() &&\n+ param.mkldnn_param.channel_wise_quantize.value())) {\n+ return !require_bias || (param.default_param.no_bias == false &&\n+ n->inputs[2].node->is_variable());\n+ }\n+ }\n+#endif\n+ return false;\n+}\n+\n+static inline bool IsQuantize(const ObjectPtr& n) {\n+ if (n->op() == Op::Get(\"_contrib_quantize_v2\")) {\n+ auto const ¶m = nnvm::get(n->attrs.parsed);\n+ if (param.min_calib_range.has_value() &&\n+ param.min_calib_range.value() < 0.0f) {\n+ return true;\n+ }\n+ }\n+ return false;\n+}\n+\n+static NDArray* FindInArgByName(const Graph &g, const std::string& name) {\n+ const std::vector& in_arg_names =\n+ g.GetAttr>(\"in_arg_names\");\n+ size_t i = std::distance(in_arg_names.begin(),\n+ std::find(in_arg_names.begin(), in_arg_names.end(), name));\n+ if (i == in_arg_names.size()) {\n+ LOG(FATAL) << name << \" not found in in_arg_names\";\n+ }\n+ return g.GetAttr(\"in_args\")[i];\n+}\n+\n+// Rescales weights, min_weight and max_weight. Returns bias_int32_rescale.\n+static inline float RescaleWeights(const Graph &g, const ObjectPtr &fc, NDArray* weight_tensor) {\n+ ObjectPtr &quantize = fc->inputs[0].node;\n+ auto min_data = std::stof(quantize->attrs.dict.at(\"min_calib_range\"));\n+ auto max_data = std::stof(quantize->attrs.dict.at(\"max_calib_range\"));\n+\n+ FCInputIndex id(nnvm::get(fc->attrs.parsed));\n+ auto in = fc->inputs;\n+\n+ float *min_weight = FindInArgByName(g, in[id.weight_min].node->attrs.name)->data().dptr();", - "comment_created_at": "2021-07-15T07:18:28+00:00", - "comment_author": "anko-intel", - "comment_body": "One suggestion below:\r\n```suggestion\r\n float *min_weight = \r\n FindInArgByName(g, in[idx.weight_min].node->attrs.name)->data().dptr();\r\n```\r\nor maybe add another helper function which already return pointer to float to avoid \"->data().dptr()\" in the end of each line \r\n\r\n", - "pr_file_module": null - }, - { - "comment_id": "670413549", - "repo_full_name": "apache/mxnet", - "pr_number": 20430, - "pr_file": "src/operator/quantization/quantize_graph_pass.h", - "discussion_id": "670199194", - "commented_code": "@@ -0,0 +1,155 @@\n+/*\n+ * Licensed to the Apache Software Foundation (ASF) under one\n+ * or more contributor license agreements. See the NOTICE file\n+ * distributed with this work for additional information\n+ * regarding copyright ownership. The ASF licenses this file\n+ * to you under the Apache License, Version 2.0 (the\n+ * \"License\"); you may not use this file except in compliance\n+ * with the License. You may obtain a copy of the License at\n+ *\n+ * http://www.apache.org/licenses/LICENSE-2.0\n+ *\n+ * Unless required by applicable law or agreed to in writing,\n+ * software distributed under the License is distributed on an\n+ * \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n+ * KIND, either express or implied. See the License for the\n+ * specific language governing permissions and limitations\n+ * under the License.\n+ */\n+\n+/*!\n+ * Copyright (c) 2021 by Contributors\n+ * \\file quantize_graph_pass.h\n+ * \\brief\n+ */\n+#ifndef MXNET_OPERATOR_QUANTIZATION_QUANTIZE_GRAPH_PASS_H_\n+#define MXNET_OPERATOR_QUANTIZATION_QUANTIZE_GRAPH_PASS_H_\n+\n+#include \n+#include \n+#include \n+#include \n+#include \n+#include \n+#include \n+#include \n+#include \"quantize_v2-inl.h\"\n+#include \"../nn/mkldnn/mkldnn_fully_connected-inl.h\"\n+#include \"../../common/utils.h\"\n+\n+namespace mxnet {\n+namespace op {\n+\n+using nnvm::Symbol;\n+using nnvm::Node;\n+using nnvm::ObjectPtr;\n+using nnvm::NodeEntry;\n+using nnvm::Graph;\n+\n+inline ObjectPtr CreateNode(std::string op_name, std::string node_name) {\n+ ObjectPtr node = Node::Create();\n+ node->attrs.name = node_name;\n+ if (op_name == \"nullptr\") {\n+ node->attrs.op = nullptr;\n+ // ugly workaround because VariableParam is not exposed\n+ node->attrs.parsed =\n+ nnvm::Symbol::CreateVariable(node->attrs.name).outputs[0].node->attrs.parsed;\n+ } else {\n+ node->attrs.op = Op::Get(op_name);\n+ }\n+ return node;\n+}\n+\n+template \n+static inline bool IsOneDNNFullyConnected(const ObjectPtr& n) {\n+#if MXNET_USE_MKLDNN == 1\n+ if (n->op() == Op::Get(\"_sg_mkldnn_fully_connected\")) {\n+ auto const& param = nnvm::get(n->attrs.parsed);\n+ if (!(param.mkldnn_param.channel_wise_quantize.has_value() &&\n+ param.mkldnn_param.channel_wise_quantize.value())) {\n+ return !require_bias || (param.default_param.no_bias == false &&\n+ n->inputs[2].node->is_variable());\n+ }\n+ }\n+#endif\n+ return false;\n+}\n+\n+static inline bool IsQuantize(const ObjectPtr& n) {\n+ if (n->op() == Op::Get(\"_contrib_quantize_v2\")) {\n+ auto const ¶m = nnvm::get(n->attrs.parsed);\n+ if (param.min_calib_range.has_value() &&\n+ param.min_calib_range.value() < 0.0f) {\n+ return true;\n+ }\n+ }\n+ return false;\n+}\n+\n+static NDArray* FindInArgByName(const Graph &g, const std::string& name) {\n+ const std::vector& in_arg_names =\n+ g.GetAttr>(\"in_arg_names\");\n+ size_t i = std::distance(in_arg_names.begin(),\n+ std::find(in_arg_names.begin(), in_arg_names.end(), name));\n+ if (i == in_arg_names.size()) {\n+ LOG(FATAL) << name << \" not found in in_arg_names\";\n+ }\n+ return g.GetAttr(\"in_args\")[i];\n+}\n+\n+// Rescales weights, min_weight and max_weight. Returns bias_int32_rescale.\n+static inline float RescaleWeights(const Graph &g, const ObjectPtr &fc, NDArray* weight_tensor) {\n+ ObjectPtr &quantize = fc->inputs[0].node;\n+ auto min_data = std::stof(quantize->attrs.dict.at(\"min_calib_range\"));\n+ auto max_data = std::stof(quantize->attrs.dict.at(\"max_calib_range\"));\n+\n+ FCInputIndex id(nnvm::get(fc->attrs.parsed));\n+ auto in = fc->inputs;\n+\n+ float *min_weight = FindInArgByName(g, in[id.weight_min].node->attrs.name)->data().dptr();", - "comment_created_at": "2021-07-15T12:26:40+00:00", - "comment_author": "sfraczek", - "comment_body": "thank you", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/mxnet-technical-precision-matters.json b/_reviewers/mxnet-technical-precision-matters.json new file mode 100644 index 0000000..3a7a7cd --- /dev/null +++ b/_reviewers/mxnet-technical-precision-matters.json @@ -0,0 +1,178 @@ +[ + { + "discussion_id": "954861509", + "pr_number": 21127, + "pr_file": "docs/python_docs/python/tutorials/performance/backend/dnnl/dnnl_quantization_inc.md", + "created_at": "2022-08-25T11:42:50+00:00", + "commented_code": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n# Improving accuracy with Intel® Neural Compressor\n\nThe accuracy of a model can decrease as a result of quantization. When the accuracy drop is significant, we can try to manually find a better quantization configuration (exclude some layers, try different calibration methods, etc.), but for bigger models this might prove to be a difficult and time consuming task. [Intel® Neural Compressor](https://github.com/intel/neural-compressor) (INC) tries to automate this process using several tuning heuristics, which aim to find the quantization configuration that satisfies the specified accuracy requirement.\n\n**NOTE:**\n\nMost tuning strategies will try different configurations on an evaluation dataset in order to find out how each layer affects the accuracy of the model. This means that for larger models, it may take a long time to find a solution (as the tuning space is usually larger and the evaluation itself takes longer).\n\n## Installation and Prerequisites\n\n- Install MXNet with oneDNN enabled as described in the [Get started](https://mxnet.apache.org/versions/master/get_started?platform=linux&language=python&processor=cpu&environ=pip&). (Until the 2.0 release you can use the nightly build version: `pip install --pre mxnet -f https://dist.mxnet.io/python`)\n\n- Install Intel® Neural Compressor:\n\n Use one of the commands below to install INC (supported python versions are: 3.6, 3.7, 3.8, 3.9):\n\n ```bash\n # install stable version from pip\n pip install neural-compressor\n\n # install nightly version from pip\n pip install -i https://test.pypi.org/simple/ neural-compressor\n\n # install stable version from conda\n conda install neural-compressor -c conda-forge -c intel\n ```\n If you come into trouble with dependencies on `cv2` library you can run: `apt-get update && apt-get install -y python3-opencv`\n\n## Configuration file\n\nQuantization tuning process can be customized in the yaml configuration file. Below is a simple example:\n\n```yaml\n# cnn.yaml\n\nversion: 1.0\n\nmodel:\n name: cnn\n framework: mxnet\n\nquantization:\n calibration:\n sampling_size: 160 # number of samples for calibration\n\ntuning:\n strategy:\n name: basic\n accuracy_criterion:\n relative: 0.01\n exit_policy:\n timeout: 0\n random_seed: 9527\n```\n\nWe are using the `basic` strategy, but you could also try out different ones. [Here](https://github.com/intel/neural-compressor/blob/master/docs/tuning_strategies.md) you can find a list of strategies available in INC and details of how they work. You can also add your own strategy if the existing ones do not suit your needs.\n\nSince the value of `timeout` is 0, INC will run until it finds a configuration that satisfies the accuracy criterion and then exit. Depending on the strategy this may not be ideal, as sometimes it would be better to further explore the tuning space to find a superior configuration both in terms of accuracy and speed. To achieve this, we can set a specific `timeout` value, which will tell INC how long (in seconds) it should run.\n\nFor more information about the configuration file, see the [template](https://github.com/intel/neural-compressor/blob/master/neural_compressor/template/ptq.yaml) from the official INC repo. Keep in mind that only the `post training quantization` is currently supported for MXNet.\n\n## Model quantization and tuning\n\nIn general, Intel® Neural Compressor requires 4 elements in order to run: \n1. Config file - like the example above \n2. Model to be quantized \n3. Calibration dataloader \n4. Evaluation function - a function that takes a model as an argument and returns the accuracy it achieves on a certain evaluation dataset. \n\n### Quantizing ResNet\n\nThe [quantization](https://mxnet.apache.org/versions/master/api/python/docs/tutorials/performance/backend/dnnl/dnnl_quantization.html#Quantization) sections described how to quantize ResNet using the native MXNet quantization. This example shows how we can achieve the similar results (with the auto-tuning) using INC.\n\n1. Get the model\n\n```python\nimport logging\nimport mxnet as mx\nfrom mxnet.gluon.model_zoo import vision\n\nlogging.basicConfig()\nlogger = logging.getLogger('logger')\nlogger.setLevel(logging.INFO)\n\nbatch_shape = (1, 3, 224, 224)\nresnet18 = vision.resnet18_v1(pretrained=True)\n```\n\n2. Prepare the dataset:\n\n```python\nmx.test_utils.download('http://data.mxnet.io/data/val_256_q90.rec', 'data/val_256_q90.rec')\n\nbatch_size = 16\nmean_std = {'mean_r': 123.68, 'mean_g': 116.779, 'mean_b': 103.939,\n 'std_r': 58.393, 'std_g': 57.12, 'std_b': 57.375}\n\ndata = mx.io.ImageRecordIter(path_imgrec='data/val_256_q90.rec',\n batch_size=batch_size,\n data_shape=batch_shape[1:],\n rand_crop=False,\n rand_mirror=False,\n shuffle=False,\n **mean_std)\ndata.batch_size = batch_size\n```\n\n3. Prepare the evaluation function:\n\n```python\neval_samples = batch_size*10\n\ndef eval_func(model):\n data.reset()\n metric = mx.metric.Accuracy()\n for i, batch in enumerate(data):\n if i * batch_size >= eval_samples:\n break\n x = batch.data[0].as_in_context(mx.cpu())\n label = batch.label[0].as_in_context(mx.cpu())\n outputs = model.forward(x)\n metric.update(label, outputs)\n return metric.get()[1]\n```\n\n4. Run Intel® Neural Compressor:\n\n```python\nfrom neural_compressor.experimental import Quantization\nquantizer = Quantization(\"./cnn.yaml\")\nquantizer.model = resnet18\nquantizer.calib_dataloader = data\nquantizer.eval_func = eval_func\nqnet = quantizer.fit().model\n```\n\nSince this model already achieves good accuracy using native quantization (less than 1% accuracy drop), for the given configuration file, INC will end on the first configuration, quantizing all layers using `naive` calibration mode for each. To see the true potential of INC, we need a model which suffers from a larger accuracy drop after quantization.\n\n### Quantizing ResNet50v2\n\nThis example shows how to use INC to quantize ResNet50 v2. In this case, the native MXNet quantization introduce a huge accuracy drop (70% using `naive` calibration mode) and INC allows automatically find better solution.", + "repo_full_name": "apache/mxnet", + "discussion_comments": [ + { + "comment_id": "954861509", + "repo_full_name": "apache/mxnet", + "pr_number": 21127, + "pr_file": "docs/python_docs/python/tutorials/performance/backend/dnnl/dnnl_quantization_inc.md", + "discussion_id": "954861509", + "commented_code": "@@ -0,0 +1,290 @@\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+# Improving accuracy with Intel® Neural Compressor\n+\n+The accuracy of a model can decrease as a result of quantization. When the accuracy drop is significant, we can try to manually find a better quantization configuration (exclude some layers, try different calibration methods, etc.), but for bigger models this might prove to be a difficult and time consuming task. [Intel® Neural Compressor](https://github.com/intel/neural-compressor) (INC) tries to automate this process using several tuning heuristics, which aim to find the quantization configuration that satisfies the specified accuracy requirement.\n+\n+**NOTE:**\n+\n+Most tuning strategies will try different configurations on an evaluation dataset in order to find out how each layer affects the accuracy of the model. This means that for larger models, it may take a long time to find a solution (as the tuning space is usually larger and the evaluation itself takes longer).\n+\n+## Installation and Prerequisites\n+\n+- Install MXNet with oneDNN enabled as described in the [Get started](https://mxnet.apache.org/versions/master/get_started?platform=linux&language=python&processor=cpu&environ=pip&). (Until the 2.0 release you can use the nightly build version: `pip install --pre mxnet -f https://dist.mxnet.io/python`)\n+\n+- Install Intel® Neural Compressor:\n+\n+ Use one of the commands below to install INC (supported python versions are: 3.6, 3.7, 3.8, 3.9):\n+\n+ ```bash\n+ # install stable version from pip\n+ pip install neural-compressor\n+\n+ # install nightly version from pip\n+ pip install -i https://test.pypi.org/simple/ neural-compressor\n+\n+ # install stable version from conda\n+ conda install neural-compressor -c conda-forge -c intel\n+ ```\n+ If you come into trouble with dependencies on `cv2` library you can run: `apt-get update && apt-get install -y python3-opencv`\n+\n+## Configuration file\n+\n+Quantization tuning process can be customized in the yaml configuration file. Below is a simple example:\n+\n+```yaml\n+# cnn.yaml\n+\n+version: 1.0\n+\n+model:\n+ name: cnn\n+ framework: mxnet\n+\n+quantization:\n+ calibration:\n+ sampling_size: 160 # number of samples for calibration\n+\n+tuning:\n+ strategy:\n+ name: basic\n+ accuracy_criterion:\n+ relative: 0.01\n+ exit_policy:\n+ timeout: 0\n+ random_seed: 9527\n+```\n+\n+We are using the `basic` strategy, but you could also try out different ones. [Here](https://github.com/intel/neural-compressor/blob/master/docs/tuning_strategies.md) you can find a list of strategies available in INC and details of how they work. You can also add your own strategy if the existing ones do not suit your needs.\n+\n+Since the value of `timeout` is 0, INC will run until it finds a configuration that satisfies the accuracy criterion and then exit. Depending on the strategy this may not be ideal, as sometimes it would be better to further explore the tuning space to find a superior configuration both in terms of accuracy and speed. To achieve this, we can set a specific `timeout` value, which will tell INC how long (in seconds) it should run.\n+\n+For more information about the configuration file, see the [template](https://github.com/intel/neural-compressor/blob/master/neural_compressor/template/ptq.yaml) from the official INC repo. Keep in mind that only the `post training quantization` is currently supported for MXNet.\n+\n+## Model quantization and tuning\n+\n+In general, Intel® Neural Compressor requires 4 elements in order to run: \n+1. Config file - like the example above \n+2. Model to be quantized \n+3. Calibration dataloader \n+4. Evaluation function - a function that takes a model as an argument and returns the accuracy it achieves on a certain evaluation dataset. \n+\n+### Quantizing ResNet\n+\n+The [quantization](https://mxnet.apache.org/versions/master/api/python/docs/tutorials/performance/backend/dnnl/dnnl_quantization.html#Quantization) sections described how to quantize ResNet using the native MXNet quantization. This example shows how we can achieve the similar results (with the auto-tuning) using INC.\n+\n+1. Get the model\n+\n+```python\n+import logging\n+import mxnet as mx\n+from mxnet.gluon.model_zoo import vision\n+\n+logging.basicConfig()\n+logger = logging.getLogger('logger')\n+logger.setLevel(logging.INFO)\n+\n+batch_shape = (1, 3, 224, 224)\n+resnet18 = vision.resnet18_v1(pretrained=True)\n+```\n+\n+2. Prepare the dataset:\n+\n+```python\n+mx.test_utils.download('http://data.mxnet.io/data/val_256_q90.rec', 'data/val_256_q90.rec')\n+\n+batch_size = 16\n+mean_std = {'mean_r': 123.68, 'mean_g': 116.779, 'mean_b': 103.939,\n+ 'std_r': 58.393, 'std_g': 57.12, 'std_b': 57.375}\n+\n+data = mx.io.ImageRecordIter(path_imgrec='data/val_256_q90.rec',\n+ batch_size=batch_size,\n+ data_shape=batch_shape[1:],\n+ rand_crop=False,\n+ rand_mirror=False,\n+ shuffle=False,\n+ **mean_std)\n+data.batch_size = batch_size\n+```\n+\n+3. Prepare the evaluation function:\n+\n+```python\n+eval_samples = batch_size*10\n+\n+def eval_func(model):\n+ data.reset()\n+ metric = mx.metric.Accuracy()\n+ for i, batch in enumerate(data):\n+ if i * batch_size >= eval_samples:\n+ break\n+ x = batch.data[0].as_in_context(mx.cpu())\n+ label = batch.label[0].as_in_context(mx.cpu())\n+ outputs = model.forward(x)\n+ metric.update(label, outputs)\n+ return metric.get()[1]\n+```\n+\n+4. Run Intel® Neural Compressor:\n+\n+```python\n+from neural_compressor.experimental import Quantization\n+quantizer = Quantization(\"./cnn.yaml\")\n+quantizer.model = resnet18\n+quantizer.calib_dataloader = data\n+quantizer.eval_func = eval_func\n+qnet = quantizer.fit().model\n+```\n+\n+Since this model already achieves good accuracy using native quantization (less than 1% accuracy drop), for the given configuration file, INC will end on the first configuration, quantizing all layers using `naive` calibration mode for each. To see the true potential of INC, we need a model which suffers from a larger accuracy drop after quantization.\n+\n+### Quantizing ResNet50v2\n+\n+This example shows how to use INC to quantize ResNet50 v2. In this case, the native MXNet quantization introduce a huge accuracy drop (70% using `naive` calibration mode) and INC allows automatically find better solution.", + "comment_created_at": "2022-08-25T11:42:50+00:00", + "comment_author": "bartekkuncer", + "comment_body": "```suggestion\r\nThis example shows how to use INC to quantize ResNet50 v2. In this case, the native MXNet quantization introduce a huge accuracy drop (70% using `naive` calibration mode) and INC allows to automatically find better solution.\r\n```", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "954863542", + "pr_number": 21127, + "pr_file": "docs/python_docs/python/tutorials/performance/backend/dnnl/dnnl_quantization_inc.md", + "created_at": "2022-08-25T11:45:02+00:00", + "commented_code": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n# Improving accuracy with Intel® Neural Compressor\n\nThe accuracy of a model can decrease as a result of quantization. When the accuracy drop is significant, we can try to manually find a better quantization configuration (exclude some layers, try different calibration methods, etc.), but for bigger models this might prove to be a difficult and time consuming task. [Intel® Neural Compressor](https://github.com/intel/neural-compressor) (INC) tries to automate this process using several tuning heuristics, which aim to find the quantization configuration that satisfies the specified accuracy requirement.\n\n**NOTE:**\n\nMost tuning strategies will try different configurations on an evaluation dataset in order to find out how each layer affects the accuracy of the model. This means that for larger models, it may take a long time to find a solution (as the tuning space is usually larger and the evaluation itself takes longer).\n\n## Installation and Prerequisites\n\n- Install MXNet with oneDNN enabled as described in the [Get started](https://mxnet.apache.org/versions/master/get_started?platform=linux&language=python&processor=cpu&environ=pip&). (Until the 2.0 release you can use the nightly build version: `pip install --pre mxnet -f https://dist.mxnet.io/python`)\n\n- Install Intel® Neural Compressor:\n\n Use one of the commands below to install INC (supported python versions are: 3.6, 3.7, 3.8, 3.9):\n\n ```bash\n # install stable version from pip\n pip install neural-compressor\n\n # install nightly version from pip\n pip install -i https://test.pypi.org/simple/ neural-compressor\n\n # install stable version from conda\n conda install neural-compressor -c conda-forge -c intel\n ```\n If you come into trouble with dependencies on `cv2` library you can run: `apt-get update && apt-get install -y python3-opencv`\n\n## Configuration file\n\nQuantization tuning process can be customized in the yaml configuration file. Below is a simple example:\n\n```yaml\n# cnn.yaml\n\nversion: 1.0\n\nmodel:\n name: cnn\n framework: mxnet\n\nquantization:\n calibration:\n sampling_size: 160 # number of samples for calibration\n\ntuning:\n strategy:\n name: basic\n accuracy_criterion:\n relative: 0.01\n exit_policy:\n timeout: 0\n random_seed: 9527\n```\n\nWe are using the `basic` strategy, but you could also try out different ones. [Here](https://github.com/intel/neural-compressor/blob/master/docs/tuning_strategies.md) you can find a list of strategies available in INC and details of how they work. You can also add your own strategy if the existing ones do not suit your needs.\n\nSince the value of `timeout` is 0, INC will run until it finds a configuration that satisfies the accuracy criterion and then exit. Depending on the strategy this may not be ideal, as sometimes it would be better to further explore the tuning space to find a superior configuration both in terms of accuracy and speed. To achieve this, we can set a specific `timeout` value, which will tell INC how long (in seconds) it should run.\n\nFor more information about the configuration file, see the [template](https://github.com/intel/neural-compressor/blob/master/neural_compressor/template/ptq.yaml) from the official INC repo. Keep in mind that only the `post training quantization` is currently supported for MXNet.\n\n## Model quantization and tuning\n\nIn general, Intel® Neural Compressor requires 4 elements in order to run: \n1. Config file - like the example above \n2. Model to be quantized \n3. Calibration dataloader \n4. Evaluation function - a function that takes a model as an argument and returns the accuracy it achieves on a certain evaluation dataset. \n\n### Quantizing ResNet\n\nThe [quantization](https://mxnet.apache.org/versions/master/api/python/docs/tutorials/performance/backend/dnnl/dnnl_quantization.html#Quantization) sections described how to quantize ResNet using the native MXNet quantization. This example shows how we can achieve the similar results (with the auto-tuning) using INC.\n\n1. Get the model\n\n```python\nimport logging\nimport mxnet as mx\nfrom mxnet.gluon.model_zoo import vision\n\nlogging.basicConfig()\nlogger = logging.getLogger('logger')\nlogger.setLevel(logging.INFO)\n\nbatch_shape = (1, 3, 224, 224)\nresnet18 = vision.resnet18_v1(pretrained=True)\n```\n\n2. Prepare the dataset:\n\n```python\nmx.test_utils.download('http://data.mxnet.io/data/val_256_q90.rec', 'data/val_256_q90.rec')\n\nbatch_size = 16\nmean_std = {'mean_r': 123.68, 'mean_g': 116.779, 'mean_b': 103.939,\n 'std_r': 58.393, 'std_g': 57.12, 'std_b': 57.375}\n\ndata = mx.io.ImageRecordIter(path_imgrec='data/val_256_q90.rec',\n batch_size=batch_size,\n data_shape=batch_shape[1:],\n rand_crop=False,\n rand_mirror=False,\n shuffle=False,\n **mean_std)\ndata.batch_size = batch_size\n```\n\n3. Prepare the evaluation function:\n\n```python\neval_samples = batch_size*10\n\ndef eval_func(model):\n data.reset()\n metric = mx.metric.Accuracy()\n for i, batch in enumerate(data):\n if i * batch_size >= eval_samples:\n break\n x = batch.data[0].as_in_context(mx.cpu())\n label = batch.label[0].as_in_context(mx.cpu())\n outputs = model.forward(x)\n metric.update(label, outputs)\n return metric.get()[1]\n```\n\n4. Run Intel® Neural Compressor:\n\n```python\nfrom neural_compressor.experimental import Quantization\nquantizer = Quantization(\"./cnn.yaml\")\nquantizer.model = resnet18\nquantizer.calib_dataloader = data\nquantizer.eval_func = eval_func\nqnet = quantizer.fit().model\n```\n\nSince this model already achieves good accuracy using native quantization (less than 1% accuracy drop), for the given configuration file, INC will end on the first configuration, quantizing all layers using `naive` calibration mode for each. To see the true potential of INC, we need a model which suffers from a larger accuracy drop after quantization.\n\n### Quantizing ResNet50v2\n\nThis example shows how to use INC to quantize ResNet50 v2. In this case, the native MXNet quantization introduce a huge accuracy drop (70% using `naive` calibration mode) and INC allows automatically find better solution.\n\nThis is the (TODO link to INC configuration file) for this example: \n```yaml\nversion: 1.0\n\nmodel:\n name: resnet50_v2\n framework: mxnet\n\nquantization:\n calibration:\n sampling_size: 192 # number of samples for calibration\n\ntuning:\n strategy:\n name: mse\n accuracy_criterion:\n relative: 0.015\n exit_policy:\n timeout: 0\n max_trials: 500\n random_seed: 9527\n```\n\nIt could be used with script below \n(TODO link to resnet_mse.py)\nto find operator which mostly influence accuracy drops and disable it from quantization.", + "repo_full_name": "apache/mxnet", + "discussion_comments": [ + { + "comment_id": "954863542", + "repo_full_name": "apache/mxnet", + "pr_number": 21127, + "pr_file": "docs/python_docs/python/tutorials/performance/backend/dnnl/dnnl_quantization_inc.md", + "discussion_id": "954863542", + "commented_code": "@@ -0,0 +1,290 @@\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+# Improving accuracy with Intel® Neural Compressor\n+\n+The accuracy of a model can decrease as a result of quantization. When the accuracy drop is significant, we can try to manually find a better quantization configuration (exclude some layers, try different calibration methods, etc.), but for bigger models this might prove to be a difficult and time consuming task. [Intel® Neural Compressor](https://github.com/intel/neural-compressor) (INC) tries to automate this process using several tuning heuristics, which aim to find the quantization configuration that satisfies the specified accuracy requirement.\n+\n+**NOTE:**\n+\n+Most tuning strategies will try different configurations on an evaluation dataset in order to find out how each layer affects the accuracy of the model. This means that for larger models, it may take a long time to find a solution (as the tuning space is usually larger and the evaluation itself takes longer).\n+\n+## Installation and Prerequisites\n+\n+- Install MXNet with oneDNN enabled as described in the [Get started](https://mxnet.apache.org/versions/master/get_started?platform=linux&language=python&processor=cpu&environ=pip&). (Until the 2.0 release you can use the nightly build version: `pip install --pre mxnet -f https://dist.mxnet.io/python`)\n+\n+- Install Intel® Neural Compressor:\n+\n+ Use one of the commands below to install INC (supported python versions are: 3.6, 3.7, 3.8, 3.9):\n+\n+ ```bash\n+ # install stable version from pip\n+ pip install neural-compressor\n+\n+ # install nightly version from pip\n+ pip install -i https://test.pypi.org/simple/ neural-compressor\n+\n+ # install stable version from conda\n+ conda install neural-compressor -c conda-forge -c intel\n+ ```\n+ If you come into trouble with dependencies on `cv2` library you can run: `apt-get update && apt-get install -y python3-opencv`\n+\n+## Configuration file\n+\n+Quantization tuning process can be customized in the yaml configuration file. Below is a simple example:\n+\n+```yaml\n+# cnn.yaml\n+\n+version: 1.0\n+\n+model:\n+ name: cnn\n+ framework: mxnet\n+\n+quantization:\n+ calibration:\n+ sampling_size: 160 # number of samples for calibration\n+\n+tuning:\n+ strategy:\n+ name: basic\n+ accuracy_criterion:\n+ relative: 0.01\n+ exit_policy:\n+ timeout: 0\n+ random_seed: 9527\n+```\n+\n+We are using the `basic` strategy, but you could also try out different ones. [Here](https://github.com/intel/neural-compressor/blob/master/docs/tuning_strategies.md) you can find a list of strategies available in INC and details of how they work. You can also add your own strategy if the existing ones do not suit your needs.\n+\n+Since the value of `timeout` is 0, INC will run until it finds a configuration that satisfies the accuracy criterion and then exit. Depending on the strategy this may not be ideal, as sometimes it would be better to further explore the tuning space to find a superior configuration both in terms of accuracy and speed. To achieve this, we can set a specific `timeout` value, which will tell INC how long (in seconds) it should run.\n+\n+For more information about the configuration file, see the [template](https://github.com/intel/neural-compressor/blob/master/neural_compressor/template/ptq.yaml) from the official INC repo. Keep in mind that only the `post training quantization` is currently supported for MXNet.\n+\n+## Model quantization and tuning\n+\n+In general, Intel® Neural Compressor requires 4 elements in order to run: \n+1. Config file - like the example above \n+2. Model to be quantized \n+3. Calibration dataloader \n+4. Evaluation function - a function that takes a model as an argument and returns the accuracy it achieves on a certain evaluation dataset. \n+\n+### Quantizing ResNet\n+\n+The [quantization](https://mxnet.apache.org/versions/master/api/python/docs/tutorials/performance/backend/dnnl/dnnl_quantization.html#Quantization) sections described how to quantize ResNet using the native MXNet quantization. This example shows how we can achieve the similar results (with the auto-tuning) using INC.\n+\n+1. Get the model\n+\n+```python\n+import logging\n+import mxnet as mx\n+from mxnet.gluon.model_zoo import vision\n+\n+logging.basicConfig()\n+logger = logging.getLogger('logger')\n+logger.setLevel(logging.INFO)\n+\n+batch_shape = (1, 3, 224, 224)\n+resnet18 = vision.resnet18_v1(pretrained=True)\n+```\n+\n+2. Prepare the dataset:\n+\n+```python\n+mx.test_utils.download('http://data.mxnet.io/data/val_256_q90.rec', 'data/val_256_q90.rec')\n+\n+batch_size = 16\n+mean_std = {'mean_r': 123.68, 'mean_g': 116.779, 'mean_b': 103.939,\n+ 'std_r': 58.393, 'std_g': 57.12, 'std_b': 57.375}\n+\n+data = mx.io.ImageRecordIter(path_imgrec='data/val_256_q90.rec',\n+ batch_size=batch_size,\n+ data_shape=batch_shape[1:],\n+ rand_crop=False,\n+ rand_mirror=False,\n+ shuffle=False,\n+ **mean_std)\n+data.batch_size = batch_size\n+```\n+\n+3. Prepare the evaluation function:\n+\n+```python\n+eval_samples = batch_size*10\n+\n+def eval_func(model):\n+ data.reset()\n+ metric = mx.metric.Accuracy()\n+ for i, batch in enumerate(data):\n+ if i * batch_size >= eval_samples:\n+ break\n+ x = batch.data[0].as_in_context(mx.cpu())\n+ label = batch.label[0].as_in_context(mx.cpu())\n+ outputs = model.forward(x)\n+ metric.update(label, outputs)\n+ return metric.get()[1]\n+```\n+\n+4. Run Intel® Neural Compressor:\n+\n+```python\n+from neural_compressor.experimental import Quantization\n+quantizer = Quantization(\"./cnn.yaml\")\n+quantizer.model = resnet18\n+quantizer.calib_dataloader = data\n+quantizer.eval_func = eval_func\n+qnet = quantizer.fit().model\n+```\n+\n+Since this model already achieves good accuracy using native quantization (less than 1% accuracy drop), for the given configuration file, INC will end on the first configuration, quantizing all layers using `naive` calibration mode for each. To see the true potential of INC, we need a model which suffers from a larger accuracy drop after quantization.\n+\n+### Quantizing ResNet50v2\n+\n+This example shows how to use INC to quantize ResNet50 v2. In this case, the native MXNet quantization introduce a huge accuracy drop (70% using `naive` calibration mode) and INC allows automatically find better solution.\n+\n+This is the (TODO link to INC configuration file) for this example: \n+```yaml\n+version: 1.0\n+\n+model:\n+ name: resnet50_v2\n+ framework: mxnet\n+\n+quantization:\n+ calibration:\n+ sampling_size: 192 # number of samples for calibration\n+\n+tuning:\n+ strategy:\n+ name: mse\n+ accuracy_criterion:\n+ relative: 0.015\n+ exit_policy:\n+ timeout: 0\n+ max_trials: 500\n+ random_seed: 9527\n+```\n+\n+It could be used with script below \n+(TODO link to resnet_mse.py)\n+to find operator which mostly influence accuracy drops and disable it from quantization. ", + "comment_created_at": "2022-08-25T11:45:02+00:00", + "comment_author": "bartekkuncer", + "comment_body": "```suggestion\r\nto find operator, which caused the most significant accuracy drop and disable it from quantization. \r\n```", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "954874866", + "pr_number": 21127, + "pr_file": "docs/python_docs/python/tutorials/performance/backend/dnnl/dnnl_quantization_inc.md", + "created_at": "2022-08-25T11:57:43+00:00", + "commented_code": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n# Improving accuracy with Intel® Neural Compressor\n\nThe accuracy of a model can decrease as a result of quantization. When the accuracy drop is significant, we can try to manually find a better quantization configuration (exclude some layers, try different calibration methods, etc.), but for bigger models this might prove to be a difficult and time consuming task. [Intel® Neural Compressor](https://github.com/intel/neural-compressor) (INC) tries to automate this process using several tuning heuristics, which aim to find the quantization configuration that satisfies the specified accuracy requirement.\n\n**NOTE:**\n\nMost tuning strategies will try different configurations on an evaluation dataset in order to find out how each layer affects the accuracy of the model. This means that for larger models, it may take a long time to find a solution (as the tuning space is usually larger and the evaluation itself takes longer).\n\n## Installation and Prerequisites\n\n- Install MXNet with oneDNN enabled as described in the [Get started](https://mxnet.apache.org/versions/master/get_started?platform=linux&language=python&processor=cpu&environ=pip&). (Until the 2.0 release you can use the nightly build version: `pip install --pre mxnet -f https://dist.mxnet.io/python`)\n\n- Install Intel® Neural Compressor:\n\n Use one of the commands below to install INC (supported python versions are: 3.6, 3.7, 3.8, 3.9):\n\n ```bash\n # install stable version from pip\n pip install neural-compressor\n\n # install nightly version from pip\n pip install -i https://test.pypi.org/simple/ neural-compressor\n\n # install stable version from conda\n conda install neural-compressor -c conda-forge -c intel\n ```\n If you come into trouble with dependencies on `cv2` library you can run: `apt-get update && apt-get install -y python3-opencv`\n\n## Configuration file\n\nQuantization tuning process can be customized in the yaml configuration file. Below is a simple example:\n\n```yaml\n# cnn.yaml\n\nversion: 1.0\n\nmodel:\n name: cnn\n framework: mxnet\n\nquantization:\n calibration:\n sampling_size: 160 # number of samples for calibration\n\ntuning:\n strategy:\n name: basic\n accuracy_criterion:\n relative: 0.01\n exit_policy:\n timeout: 0\n random_seed: 9527\n```\n\nWe are using the `basic` strategy, but you could also try out different ones. [Here](https://github.com/intel/neural-compressor/blob/master/docs/tuning_strategies.md) you can find a list of strategies available in INC and details of how they work. You can also add your own strategy if the existing ones do not suit your needs.\n\nSince the value of `timeout` is 0, INC will run until it finds a configuration that satisfies the accuracy criterion and then exit. Depending on the strategy this may not be ideal, as sometimes it would be better to further explore the tuning space to find a superior configuration both in terms of accuracy and speed. To achieve this, we can set a specific `timeout` value, which will tell INC how long (in seconds) it should run.\n\nFor more information about the configuration file, see the [template](https://github.com/intel/neural-compressor/blob/master/neural_compressor/template/ptq.yaml) from the official INC repo. Keep in mind that only the `post training quantization` is currently supported for MXNet.\n\n## Model quantization and tuning\n\nIn general, Intel® Neural Compressor requires 4 elements in order to run: \n1. Config file - like the example above \n2. Model to be quantized \n3. Calibration dataloader \n4. Evaluation function - a function that takes a model as an argument and returns the accuracy it achieves on a certain evaluation dataset. \n\n### Quantizing ResNet\n\nThe [quantization](https://mxnet.apache.org/versions/master/api/python/docs/tutorials/performance/backend/dnnl/dnnl_quantization.html#Quantization) sections described how to quantize ResNet using the native MXNet quantization. This example shows how we can achieve the similar results (with the auto-tuning) using INC.\n\n1. Get the model\n\n```python\nimport logging\nimport mxnet as mx\nfrom mxnet.gluon.model_zoo import vision\n\nlogging.basicConfig()\nlogger = logging.getLogger('logger')\nlogger.setLevel(logging.INFO)\n\nbatch_shape = (1, 3, 224, 224)\nresnet18 = vision.resnet18_v1(pretrained=True)\n```\n\n2. Prepare the dataset:\n\n```python\nmx.test_utils.download('http://data.mxnet.io/data/val_256_q90.rec', 'data/val_256_q90.rec')\n\nbatch_size = 16\nmean_std = {'mean_r': 123.68, 'mean_g': 116.779, 'mean_b': 103.939,\n 'std_r': 58.393, 'std_g': 57.12, 'std_b': 57.375}\n\ndata = mx.io.ImageRecordIter(path_imgrec='data/val_256_q90.rec',\n batch_size=batch_size,\n data_shape=batch_shape[1:],\n rand_crop=False,\n rand_mirror=False,\n shuffle=False,\n **mean_std)\ndata.batch_size = batch_size\n```\n\n3. Prepare the evaluation function:\n\n```python\neval_samples = batch_size*10\n\ndef eval_func(model):\n data.reset()\n metric = mx.metric.Accuracy()\n for i, batch in enumerate(data):\n if i * batch_size >= eval_samples:\n break\n x = batch.data[0].as_in_context(mx.cpu())\n label = batch.label[0].as_in_context(mx.cpu())\n outputs = model.forward(x)\n metric.update(label, outputs)\n return metric.get()[1]\n```\n\n4. Run Intel® Neural Compressor:\n\n```python\nfrom neural_compressor.experimental import Quantization\nquantizer = Quantization(\"./cnn.yaml\")\nquantizer.model = resnet18\nquantizer.calib_dataloader = data\nquantizer.eval_func = eval_func\nqnet = quantizer.fit().model\n```\n\nSince this model already achieves good accuracy using native quantization (less than 1% accuracy drop), for the given configuration file, INC will end on the first configuration, quantizing all layers using `naive` calibration mode for each. To see the true potential of INC, we need a model which suffers from a larger accuracy drop after quantization.\n\n### Quantizing ResNet50v2\n\nThis example shows how to use INC to quantize ResNet50 v2. In this case, the native MXNet quantization introduce a huge accuracy drop (70% using `naive` calibration mode) and INC allows automatically find better solution.\n\nThis is the (TODO link to INC configuration file) for this example: \n```yaml\nversion: 1.0\n\nmodel:\n name: resnet50_v2\n framework: mxnet\n\nquantization:\n calibration:\n sampling_size: 192 # number of samples for calibration\n\ntuning:\n strategy:\n name: mse\n accuracy_criterion:\n relative: 0.015\n exit_policy:\n timeout: 0\n max_trials: 500\n random_seed: 9527\n```\n\nIt could be used with script below \n(TODO link to resnet_mse.py)\nto find operator which mostly influence accuracy drops and disable it from quantization. \nYou can find description of MSE strategy \n[here](https://github.com/intel/neural-compressor/blob/master/docs/tuning_strategies.md#user-content-mse).\n\n```python\nimport mxnet as mx\nfrom mxnet.gluon.model_zoo.vision import resnet50_v2\nfrom mxnet.gluon.data.vision import transforms\nfrom mxnet.contrib.quantization import quantize_net\n\n# Preparing input data\nrgb_mean = (0.485, 0.456, 0.406)\nrgb_std = (0.229, 0.224, 0.225)\nbatch_size = 64\nnum_calib_batches = 9\n# set below proper path to ImageNet data set\ndataset = mx.gluon.data.vision.ImageRecordDataset('../imagenet/rec/val.rec')\n# Tuning in INC on whole data set takes too long time so we take only part of the whole data set\n# as representative part of it:\ndataset = dataset.take(num_calib_batches * batch_size)\ntransformer = transforms.Compose([transforms.Resize(256),\n transforms.CenterCrop(224),\n transforms.ToTensor(),\n transforms.Normalize(mean=rgb_mean, std=rgb_std)])\n# Note: as input data are used many times during tuning it is better to prepared data earlier,\n# so lazy parameter for transform_first is set to False\nval_data = mx.gluon.data.DataLoader(\n dataset.transform_first(transformer, lazy=False), batch_size, shuffle=False)\nval_data.batch_size = batch_size\n\nnet = resnet50_v2(pretrained=True)\n\ndef eval_func(model):\n metric = mx.gluon.metric.Accuracy()\n for x, label in val_data:\n output = model(x)\n metric.update(label, output)\n accuracy = metric.get()[1]\n return accuracy\n\n\nfrom neural_compressor.experimental import Quantization\nquantizer = Quantization(\"resnet50v2_mse.yaml\")\nquantizer.model = net\nquantizer.calib_dataloader = val_data\nquantizer.eval_func = eval_func\nqnet_inc = quantizer.fit().model\nprint(\"INC finished\")\n# You can save optimized model for the later use:\nqnet_inc.export(\"__quantized_with_inc\")\n# You can see what configurations was applied aby INC and which nodes was excluded from quantization\n# to achieve given accuracy lost against floating point calculation", + "repo_full_name": "apache/mxnet", + "discussion_comments": [ + { + "comment_id": "954874866", + "repo_full_name": "apache/mxnet", + "pr_number": 21127, + "pr_file": "docs/python_docs/python/tutorials/performance/backend/dnnl/dnnl_quantization_inc.md", + "discussion_id": "954874866", + "commented_code": "@@ -0,0 +1,290 @@\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+# Improving accuracy with Intel® Neural Compressor\n+\n+The accuracy of a model can decrease as a result of quantization. When the accuracy drop is significant, we can try to manually find a better quantization configuration (exclude some layers, try different calibration methods, etc.), but for bigger models this might prove to be a difficult and time consuming task. [Intel® Neural Compressor](https://github.com/intel/neural-compressor) (INC) tries to automate this process using several tuning heuristics, which aim to find the quantization configuration that satisfies the specified accuracy requirement.\n+\n+**NOTE:**\n+\n+Most tuning strategies will try different configurations on an evaluation dataset in order to find out how each layer affects the accuracy of the model. This means that for larger models, it may take a long time to find a solution (as the tuning space is usually larger and the evaluation itself takes longer).\n+\n+## Installation and Prerequisites\n+\n+- Install MXNet with oneDNN enabled as described in the [Get started](https://mxnet.apache.org/versions/master/get_started?platform=linux&language=python&processor=cpu&environ=pip&). (Until the 2.0 release you can use the nightly build version: `pip install --pre mxnet -f https://dist.mxnet.io/python`)\n+\n+- Install Intel® Neural Compressor:\n+\n+ Use one of the commands below to install INC (supported python versions are: 3.6, 3.7, 3.8, 3.9):\n+\n+ ```bash\n+ # install stable version from pip\n+ pip install neural-compressor\n+\n+ # install nightly version from pip\n+ pip install -i https://test.pypi.org/simple/ neural-compressor\n+\n+ # install stable version from conda\n+ conda install neural-compressor -c conda-forge -c intel\n+ ```\n+ If you come into trouble with dependencies on `cv2` library you can run: `apt-get update && apt-get install -y python3-opencv`\n+\n+## Configuration file\n+\n+Quantization tuning process can be customized in the yaml configuration file. Below is a simple example:\n+\n+```yaml\n+# cnn.yaml\n+\n+version: 1.0\n+\n+model:\n+ name: cnn\n+ framework: mxnet\n+\n+quantization:\n+ calibration:\n+ sampling_size: 160 # number of samples for calibration\n+\n+tuning:\n+ strategy:\n+ name: basic\n+ accuracy_criterion:\n+ relative: 0.01\n+ exit_policy:\n+ timeout: 0\n+ random_seed: 9527\n+```\n+\n+We are using the `basic` strategy, but you could also try out different ones. [Here](https://github.com/intel/neural-compressor/blob/master/docs/tuning_strategies.md) you can find a list of strategies available in INC and details of how they work. You can also add your own strategy if the existing ones do not suit your needs.\n+\n+Since the value of `timeout` is 0, INC will run until it finds a configuration that satisfies the accuracy criterion and then exit. Depending on the strategy this may not be ideal, as sometimes it would be better to further explore the tuning space to find a superior configuration both in terms of accuracy and speed. To achieve this, we can set a specific `timeout` value, which will tell INC how long (in seconds) it should run.\n+\n+For more information about the configuration file, see the [template](https://github.com/intel/neural-compressor/blob/master/neural_compressor/template/ptq.yaml) from the official INC repo. Keep in mind that only the `post training quantization` is currently supported for MXNet.\n+\n+## Model quantization and tuning\n+\n+In general, Intel® Neural Compressor requires 4 elements in order to run: \n+1. Config file - like the example above \n+2. Model to be quantized \n+3. Calibration dataloader \n+4. Evaluation function - a function that takes a model as an argument and returns the accuracy it achieves on a certain evaluation dataset. \n+\n+### Quantizing ResNet\n+\n+The [quantization](https://mxnet.apache.org/versions/master/api/python/docs/tutorials/performance/backend/dnnl/dnnl_quantization.html#Quantization) sections described how to quantize ResNet using the native MXNet quantization. This example shows how we can achieve the similar results (with the auto-tuning) using INC.\n+\n+1. Get the model\n+\n+```python\n+import logging\n+import mxnet as mx\n+from mxnet.gluon.model_zoo import vision\n+\n+logging.basicConfig()\n+logger = logging.getLogger('logger')\n+logger.setLevel(logging.INFO)\n+\n+batch_shape = (1, 3, 224, 224)\n+resnet18 = vision.resnet18_v1(pretrained=True)\n+```\n+\n+2. Prepare the dataset:\n+\n+```python\n+mx.test_utils.download('http://data.mxnet.io/data/val_256_q90.rec', 'data/val_256_q90.rec')\n+\n+batch_size = 16\n+mean_std = {'mean_r': 123.68, 'mean_g': 116.779, 'mean_b': 103.939,\n+ 'std_r': 58.393, 'std_g': 57.12, 'std_b': 57.375}\n+\n+data = mx.io.ImageRecordIter(path_imgrec='data/val_256_q90.rec',\n+ batch_size=batch_size,\n+ data_shape=batch_shape[1:],\n+ rand_crop=False,\n+ rand_mirror=False,\n+ shuffle=False,\n+ **mean_std)\n+data.batch_size = batch_size\n+```\n+\n+3. Prepare the evaluation function:\n+\n+```python\n+eval_samples = batch_size*10\n+\n+def eval_func(model):\n+ data.reset()\n+ metric = mx.metric.Accuracy()\n+ for i, batch in enumerate(data):\n+ if i * batch_size >= eval_samples:\n+ break\n+ x = batch.data[0].as_in_context(mx.cpu())\n+ label = batch.label[0].as_in_context(mx.cpu())\n+ outputs = model.forward(x)\n+ metric.update(label, outputs)\n+ return metric.get()[1]\n+```\n+\n+4. Run Intel® Neural Compressor:\n+\n+```python\n+from neural_compressor.experimental import Quantization\n+quantizer = Quantization(\"./cnn.yaml\")\n+quantizer.model = resnet18\n+quantizer.calib_dataloader = data\n+quantizer.eval_func = eval_func\n+qnet = quantizer.fit().model\n+```\n+\n+Since this model already achieves good accuracy using native quantization (less than 1% accuracy drop), for the given configuration file, INC will end on the first configuration, quantizing all layers using `naive` calibration mode for each. To see the true potential of INC, we need a model which suffers from a larger accuracy drop after quantization.\n+\n+### Quantizing ResNet50v2\n+\n+This example shows how to use INC to quantize ResNet50 v2. In this case, the native MXNet quantization introduce a huge accuracy drop (70% using `naive` calibration mode) and INC allows automatically find better solution.\n+\n+This is the (TODO link to INC configuration file) for this example: \n+```yaml\n+version: 1.0\n+\n+model:\n+ name: resnet50_v2\n+ framework: mxnet\n+\n+quantization:\n+ calibration:\n+ sampling_size: 192 # number of samples for calibration\n+\n+tuning:\n+ strategy:\n+ name: mse\n+ accuracy_criterion:\n+ relative: 0.015\n+ exit_policy:\n+ timeout: 0\n+ max_trials: 500\n+ random_seed: 9527\n+```\n+\n+It could be used with script below \n+(TODO link to resnet_mse.py)\n+to find operator which mostly influence accuracy drops and disable it from quantization. \n+You can find description of MSE strategy \n+[here](https://github.com/intel/neural-compressor/blob/master/docs/tuning_strategies.md#user-content-mse).\n+\n+```python\n+import mxnet as mx\n+from mxnet.gluon.model_zoo.vision import resnet50_v2\n+from mxnet.gluon.data.vision import transforms\n+from mxnet.contrib.quantization import quantize_net\n+\n+# Preparing input data\n+rgb_mean = (0.485, 0.456, 0.406)\n+rgb_std = (0.229, 0.224, 0.225)\n+batch_size = 64\n+num_calib_batches = 9\n+# set below proper path to ImageNet data set\n+dataset = mx.gluon.data.vision.ImageRecordDataset('../imagenet/rec/val.rec')\n+# Tuning in INC on whole data set takes too long time so we take only part of the whole data set\n+# as representative part of it:\n+dataset = dataset.take(num_calib_batches * batch_size)\n+transformer = transforms.Compose([transforms.Resize(256),\n+ transforms.CenterCrop(224),\n+ transforms.ToTensor(),\n+ transforms.Normalize(mean=rgb_mean, std=rgb_std)])\n+# Note: as input data are used many times during tuning it is better to prepared data earlier,\n+# so lazy parameter for transform_first is set to False\n+val_data = mx.gluon.data.DataLoader(\n+ dataset.transform_first(transformer, lazy=False), batch_size, shuffle=False)\n+val_data.batch_size = batch_size\n+\n+net = resnet50_v2(pretrained=True)\n+\n+def eval_func(model):\n+ metric = mx.gluon.metric.Accuracy()\n+ for x, label in val_data:\n+ output = model(x)\n+ metric.update(label, output)\n+ accuracy = metric.get()[1]\n+ return accuracy\n+\n+\n+from neural_compressor.experimental import Quantization\n+quantizer = Quantization(\"resnet50v2_mse.yaml\")\n+quantizer.model = net\n+quantizer.calib_dataloader = val_data\n+quantizer.eval_func = eval_func\n+qnet_inc = quantizer.fit().model\n+print(\"INC finished\")\n+# You can save optimized model for the later use:\n+qnet_inc.export(\"__quantized_with_inc\")\n+# You can see what configurations was applied aby INC and which nodes was excluded from quantization\n+# to achieve given accuracy lost against floating point calculation", + "comment_created_at": "2022-08-25T11:57:43+00:00", + "comment_author": "bartekkuncer", + "comment_body": "```suggestion\r\n# You can see which configuration was applied by INC and which nodes were excluded from quantization,\r\n# to achieve given accuracy loss against floating point calculation.\r\n```", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "796581572", + "pr_number": 20856, + "pr_file": "docs/python_docs/python/tutorials/performance/backend/dnnl/dnnl_quantization.md", + "created_at": "2022-02-01T13:12:13+00:00", + "commented_code": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n## Introduction\n\nAfter successful model building and achieving desired accuracy on the test data, often the next step is to optimize inference to deploy the model to production. One of the key features of usable model is to have as small latency as possible to be able to provide services to large number of customers at the same time. In addition to customer satisfaction, with well optimized model, hardware load is reduced which also reduces energy costs needed to perform inference.\n\nTwo main types of software optimizations can be characerized as:\n- memory-bound optimizations - main objective of these optimizations is to reduce memory operations (reads and writes) - it is done by e.g. chaining sequence of operations which can be performed one after another immediately (example: ReLU activation)\n- compute-bound optimizations - these optimizations are mainly done on operations which require large number of CPU cycles to complete, like FullyConnected and Convolution - one of the methods to speedup compute-bound operations is to lower computation precision - this type of optimization is called quantization\n\nIn version 2.0 of the Apache MXNet (incubating) GluonAPI2.0 replaced Symbolic API known from versions 1.x, thus there are some differences between API to perform graph fusion and quantization.\n\n## Operator Fusion\n\nModels are often represented as directed graph of operations (represented by nodes) and data flow (representad as edges). This way of visualizing helps a lot when searching for common patterns in whole model which can be optimized by fusion. Example:\n![base_model](https://github.com/dmlc/web-data/blob/main/mxnet/tutorials/onednn/quantization_2_0/sample_net.png?raw=true)\n\n\nThe simplest way to explain what fusion is and how it works is to present an example. On the image above is shown a sequence of popular operations taken from ResNet architecture. This type of architecture is built with many similar block called residual blocks. Some possible fusion patterns are:\n\n- Conv2D + BatchNorm => Fusing BatchNorm with Convolution can be performed by modifing weights and bias of Convolution - this way BatchNorm is completely contained within Convolution which makes BatchNorm zero time operation. Only cost of fusing is time needed to prepare weights and bias in Convolution based on BatchNorm parameters.\n- Conv2D + ReLU => this type of fusion is very popular also with other layers (e.g. FullyConnected + Activation). It is very simple idea where before writing data to output, activation is performed on that data. Main benefit of this fusion is that, there is no need to read and write back data in other layer only to perform simple activation function. \n- Conv2D + Add => even simpler idea than the previous one - instead of overwriting output memory, results are added to the output memory. In the simplest terms: `out_mem = conv_result` is replaced by `out_mem += conv_result`\n\nAbove examples are presented as atomic ones, but often they can be combined together, thus two patterns can be fused in above example:\n- Conv2D + BatchNorm + ReLU\n- Conv2D + BatchNorm + Add + ReLU\n\nAfter fusing all patterns, computational graph will be changed to the following one:\n![fused_fp32_model](https://github.com/dmlc/web-data/blob/main/mxnet/tutorials/onednn/quantization_2_0/fused_f32.png?raw=true)\n\n\n\n### Operator fusion in MXNet\nSince the version 1.6 of MXNet built with oneDNN support, operator fusion had been enabled by default if executing model with Module API, however in version 2.0 it has been decided to remove setting this feature by environment flag and replace it by aware user API call.\n\nTo fuse model in MXNet 2.0 there are two requirements:\n- the model must be defined as a subclass of HybridBlock or Symbol,\n- the model must have specific operator patterns which can be fused.\n\nAs an example we define example network (sample block from ResNet architecture):\n\n```\nimport mxnet as mx\nfrom mxnet.gluon import nn\n\nclass SampleBlock(nn.HybridBlock):\n def __init__(self):\n super(SampleBlock, self).__init__()\n self.conv1 = nn.Conv2D(channels=64, kernel_size=3, strides=1, padding=1,\n use_bias=False, in_channels=64)\n self.bn1 = nn.BatchNorm()\n self.conv2 = nn.Conv2D(channels=64, kernel_size=3, strides=1, padding=1,\n use_bias=False, in_channels=64)\n self.bn2 = nn.BatchNorm()\n\n def forward(self, x):\n out = mx.npx.activation(self.bn1(self.conv1(x)), 'relu')\n out = self.bn2(self.conv2(out))\n out = mx.npx.activation(out + x, 'relu')\n return out\n \nnet = SampleBlock()\nnet.initialize()\n\ndata = mx.np.zeros(shape=(1,64,224,224))\n# run fusion\nnet.optimize_for(data, backend='ONEDNN')\n\n# We can check fusion by plotting current symbol of our optimized network\nsym, _ = net.export(None)\ngraph = mx.viz.plot_network(sym, save_format='jpg')\ngraph.view()\n```\nBoth HybridBlock and Symbol classes provide API to easily run fusion of operators. Single line of code is enabling fusion passes on model:\n```\nnet.optimize_for(data, backend='ONEDNN')\n```\n\n*optimize_for* function is available also as Symbol class method. Example call to this API is shown below. Notice that Symbol’s *optimize_for* method is not done in-place, so assigning it to a new variable is required:\n\n```\noptimized_symbol = sym.optimize_for(backend='ONEDNN')\n```\n\nFor the above model definition in a naive benchmark with artificial data, we can gain up to 1.75x speedup without any accuracy loss on our testing machine with Intel(R) Core(TM) i9-9940X.\n\n\n## Quantization\nAs mentioned in the introduction, precision reduction is another very popular method of improving performance of workloads and, what is important, in most cases is combined together with operator fusion which improves performance even more. In training precision reduction utilizes 16 bit data types like bfloat or float16, but for inference great results can be achieved using int8. \n\nModel quantization helps on both memory-bound and compute-bound operations. In quantized model IO operations are reduced as int8 data type is 4x smaller than float32, and also computational throughput is increased as more data can be SIMD'ed. On modern Intel architectures using int8 data type can bring even more speedup by utilizing special VNNI instruction set. \n\n![before_quant](https://github.com/dmlc/web-data/blob/main/mxnet/tutorials/onednn/quantization_2_0/before_quant.png?raw=true)\n\nFirstly quantization performs operator fusion on floating-point model as mentioned in paragraph earlier. Next, all operators which support int8 data type are marked as quantized and if needed additional operators are injected into graph surrounding quantizable operator - the goal of this additional operators is to quantize, dequantize or requantize data to keep data type between operators compatible.\n \n![quant_not_calib](https://github.com/dmlc/web-data/blob/main/mxnet/tutorials/onednn/quantization_2_0/quant_not_calib.png?raw=true)\n\n\nAfter injection step it is important to perform calibration of the model, however this step is optional. Quantizing without calibration is not recommended in terms of performance. It will result in calculating data minimum and maximum in quantize and requantize nodes during each inference pass. Calibrating a model greatly improves performance as minimum and maximum values are collected offline and are saved inside node - this way there is no need to search for these values during inference pass. \n\n![quant_calib](https://github.com/dmlc/web-data/blob/main/mxnet/tutorials/onednn/quantization_2_0/quant_calib.png?raw=true)\n\n\nCurrently, there are three supported calibration methods:\n- naive — min/max values from the calibration run,\n- entropy — uses KL divergence to determine the best symmetrical quantization thresholds for a given histogram of values.\n- custom — uses user-defined CalibrationCollector to control the calibration process.\n\nLast operation in quantization flow is to perform additional operator fusion. Second fusion is about merging requantize and dequantize operators into preceding node - oneDNN kernels can perform needed scaling before writing result to output which results in model execution speed-up. Notice that last Convolution does not need minimum and maximum values as it is not requantizing int32 to int8, but dequantizing directly to float32 and scale is calculated basing on minimum and maximum of input and weights.", + "repo_full_name": "apache/mxnet", + "discussion_comments": [ + { + "comment_id": "796581572", + "repo_full_name": "apache/mxnet", + "pr_number": 20856, + "pr_file": "docs/python_docs/python/tutorials/performance/backend/dnnl/dnnl_quantization.md", + "discussion_id": "796581572", + "commented_code": "@@ -0,0 +1,304 @@\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+## Introduction\n+\n+After successful model building and achieving desired accuracy on the test data, often the next step is to optimize inference to deploy the model to production. One of the key features of usable model is to have as small latency as possible to be able to provide services to large number of customers at the same time. In addition to customer satisfaction, with well optimized model, hardware load is reduced which also reduces energy costs needed to perform inference.\n+\n+Two main types of software optimizations can be characerized as:\n+- memory-bound optimizations - main objective of these optimizations is to reduce memory operations (reads and writes) - it is done by e.g. chaining sequence of operations which can be performed one after another immediately (example: ReLU activation)\n+- compute-bound optimizations - these optimizations are mainly done on operations which require large number of CPU cycles to complete, like FullyConnected and Convolution - one of the methods to speedup compute-bound operations is to lower computation precision - this type of optimization is called quantization\n+\n+In version 2.0 of the Apache MXNet (incubating) GluonAPI2.0 replaced Symbolic API known from versions 1.x, thus there are some differences between API to perform graph fusion and quantization.\n+\n+## Operator Fusion\n+\n+Models are often represented as directed graph of operations (represented by nodes) and data flow (representad as edges). This way of visualizing helps a lot when searching for common patterns in whole model which can be optimized by fusion. Example:\n+![base_model](https://github.com/dmlc/web-data/blob/main/mxnet/tutorials/onednn/quantization_2_0/sample_net.png?raw=true)\n+\n+\n+The simplest way to explain what fusion is and how it works is to present an example. On the image above is shown a sequence of popular operations taken from ResNet architecture. This type of architecture is built with many similar block called residual blocks. Some possible fusion patterns are:\n+\n+- Conv2D + BatchNorm => Fusing BatchNorm with Convolution can be performed by modifing weights and bias of Convolution - this way BatchNorm is completely contained within Convolution which makes BatchNorm zero time operation. Only cost of fusing is time needed to prepare weights and bias in Convolution based on BatchNorm parameters.\n+- Conv2D + ReLU => this type of fusion is very popular also with other layers (e.g. FullyConnected + Activation). It is very simple idea where before writing data to output, activation is performed on that data. Main benefit of this fusion is that, there is no need to read and write back data in other layer only to perform simple activation function. \n+- Conv2D + Add => even simpler idea than the previous one - instead of overwriting output memory, results are added to the output memory. In the simplest terms: `out_mem = conv_result` is replaced by `out_mem += conv_result`\n+\n+Above examples are presented as atomic ones, but often they can be combined together, thus two patterns can be fused in above example:\n+- Conv2D + BatchNorm + ReLU\n+- Conv2D + BatchNorm + Add + ReLU\n+\n+After fusing all patterns, computational graph will be changed to the following one:\n+![fused_fp32_model](https://github.com/dmlc/web-data/blob/main/mxnet/tutorials/onednn/quantization_2_0/fused_f32.png?raw=true)\n+\n+\n+\n+### Operator fusion in MXNet\n+Since the version 1.6 of MXNet built with oneDNN support, operator fusion had been enabled by default if executing model with Module API, however in version 2.0 it has been decided to remove setting this feature by environment flag and replace it by aware user API call.\n+\n+To fuse model in MXNet 2.0 there are two requirements:\n+- the model must be defined as a subclass of HybridBlock or Symbol,\n+- the model must have specific operator patterns which can be fused.\n+\n+As an example we define example network (sample block from ResNet architecture):\n+\n+```\n+import mxnet as mx\n+from mxnet.gluon import nn\n+\n+class SampleBlock(nn.HybridBlock):\n+ def __init__(self):\n+ super(SampleBlock, self).__init__()\n+ self.conv1 = nn.Conv2D(channels=64, kernel_size=3, strides=1, padding=1,\n+ use_bias=False, in_channels=64)\n+ self.bn1 = nn.BatchNorm()\n+ self.conv2 = nn.Conv2D(channels=64, kernel_size=3, strides=1, padding=1,\n+ use_bias=False, in_channels=64)\n+ self.bn2 = nn.BatchNorm()\n+\n+ def forward(self, x):\n+ out = mx.npx.activation(self.bn1(self.conv1(x)), 'relu')\n+ out = self.bn2(self.conv2(out))\n+ out = mx.npx.activation(out + x, 'relu')\n+ return out\n+ \n+net = SampleBlock()\n+net.initialize()\n+\n+data = mx.np.zeros(shape=(1,64,224,224))\n+# run fusion\n+net.optimize_for(data, backend='ONEDNN')\n+\n+# We can check fusion by plotting current symbol of our optimized network\n+sym, _ = net.export(None)\n+graph = mx.viz.plot_network(sym, save_format='jpg')\n+graph.view()\n+```\n+Both HybridBlock and Symbol classes provide API to easily run fusion of operators. Single line of code is enabling fusion passes on model:\n+```\n+net.optimize_for(data, backend='ONEDNN')\n+```\n+\n+*optimize_for* function is available also as Symbol class method. Example call to this API is shown below. Notice that Symbol’s *optimize_for* method is not done in-place, so assigning it to a new variable is required:\n+\n+```\n+optimized_symbol = sym.optimize_for(backend='ONEDNN')\n+```\n+\n+For the above model definition in a naive benchmark with artificial data, we can gain up to 1.75x speedup without any accuracy loss on our testing machine with Intel(R) Core(TM) i9-9940X.\n+\n+\n+## Quantization\n+As mentioned in the introduction, precision reduction is another very popular method of improving performance of workloads and, what is important, in most cases is combined together with operator fusion which improves performance even more. In training precision reduction utilizes 16 bit data types like bfloat or float16, but for inference great results can be achieved using int8. \n+\n+Model quantization helps on both memory-bound and compute-bound operations. In quantized model IO operations are reduced as int8 data type is 4x smaller than float32, and also computational throughput is increased as more data can be SIMD'ed. On modern Intel architectures using int8 data type can bring even more speedup by utilizing special VNNI instruction set. \n+\n+![before_quant](https://github.com/dmlc/web-data/blob/main/mxnet/tutorials/onednn/quantization_2_0/before_quant.png?raw=true)\n+\n+Firstly quantization performs operator fusion on floating-point model as mentioned in paragraph earlier. Next, all operators which support int8 data type are marked as quantized and if needed additional operators are injected into graph surrounding quantizable operator - the goal of this additional operators is to quantize, dequantize or requantize data to keep data type between operators compatible.\n+ \n+![quant_not_calib](https://github.com/dmlc/web-data/blob/main/mxnet/tutorials/onednn/quantization_2_0/quant_not_calib.png?raw=true)\n+\n+\n+After injection step it is important to perform calibration of the model, however this step is optional. Quantizing without calibration is not recommended in terms of performance. It will result in calculating data minimum and maximum in quantize and requantize nodes during each inference pass. Calibrating a model greatly improves performance as minimum and maximum values are collected offline and are saved inside node - this way there is no need to search for these values during inference pass. \n+\n+![quant_calib](https://github.com/dmlc/web-data/blob/main/mxnet/tutorials/onednn/quantization_2_0/quant_calib.png?raw=true)\n+\n+\n+Currently, there are three supported calibration methods:\n+- naive — min/max values from the calibration run,\n+- entropy — uses KL divergence to determine the best symmetrical quantization thresholds for a given histogram of values.\n+- custom — uses user-defined CalibrationCollector to control the calibration process.\n+\n+Last operation in quantization flow is to perform additional operator fusion. Second fusion is about merging requantize and dequantize operators into preceding node - oneDNN kernels can perform needed scaling before writing result to output which results in model execution speed-up. Notice that last Convolution does not need minimum and maximum values as it is not requantizing int32 to int8, but dequantizing directly to float32 and scale is calculated basing on minimum and maximum of input and weights.", + "comment_created_at": "2022-02-01T13:12:13+00:00", + "comment_author": "bartekkuncer", + "comment_body": "```suggestion\r\nLast stage of quantization flow is to perform additional operator fusion. Second fusion is about merging requantize and dequantize operators into preceding node - oneDNN kernels can perform needed scaling before writing result to output which results in model execution speed-up. Notice that last Convolution does not need minimum and maximum values as it is not requantizing int32 to int8, but dequantizing directly to float32 and scale is calculated basing on minimum and maximum of input and weights.\r\n```", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "800441603", + "pr_number": 20856, + "pr_file": "docs/python_docs/python/tutorials/performance/backend/dnnl/dnnl_quantization.md", + "created_at": "2022-02-07T09:05:28+00:00", + "commented_code": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n## Introduction\n\nAfter successful model building and achieving desired accuracy on the test data, often the next step is to optimize inference to deploy the model to production. One of the key features of usable model is to have as small latency as possible to be able to provide services to large number of customers at the same time. In addition to customer satisfaction, with well optimized model, hardware load is reduced which also reduces energy costs needed to perform inference.\n\nTwo main types of software optimizations can be characerized as:\n- memory-bound optimizations - main objective of these optimizations is to reduce memory operations (reads and writes) - it is done by e.g. chaining operations which can be performed one after another immediately, where input of every subsequent operation is the output of the previous one (example: ReLU activation after convolution)\n- compute-bound optimizations - these optimizations are mainly done on operations which require large number of CPU cycles to complete, like FullyConnected and Convolution - one of the methods to speedup compute-bound operations is to lower computation precision - this type of optimization is called quantization", + "repo_full_name": "apache/mxnet", + "discussion_comments": [ + { + "comment_id": "800441603", + "repo_full_name": "apache/mxnet", + "pr_number": 20856, + "pr_file": "docs/python_docs/python/tutorials/performance/backend/dnnl/dnnl_quantization.md", + "discussion_id": "800441603", + "commented_code": "@@ -0,0 +1,304 @@\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+## Introduction\n+\n+After successful model building and achieving desired accuracy on the test data, often the next step is to optimize inference to deploy the model to production. One of the key features of usable model is to have as small latency as possible to be able to provide services to large number of customers at the same time. In addition to customer satisfaction, with well optimized model, hardware load is reduced which also reduces energy costs needed to perform inference.\n+\n+Two main types of software optimizations can be characerized as:\n+- memory-bound optimizations - main objective of these optimizations is to reduce memory operations (reads and writes) - it is done by e.g. chaining operations which can be performed one after another immediately, where input of every subsequent operation is the output of the previous one (example: ReLU activation after convolution)\n+- compute-bound optimizations - these optimizations are mainly done on operations which require large number of CPU cycles to complete, like FullyConnected and Convolution - one of the methods to speedup compute-bound operations is to lower computation precision - this type of optimization is called quantization", + "comment_created_at": "2022-02-07T09:05:28+00:00", + "comment_author": "bartekkuncer", + "comment_body": "```suggestion\r\n- compute-bound optimizations - these optimizations are mainly made on operations which require large number of CPU cycles to complete, like FullyConnected and Convolution. One of the methods to speedup compute-bound operations is to lower computation precision - this type of optimization is called quantization.\r\n```", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "800492034", + "pr_number": 20856, + "pr_file": "docs/python_docs/python/tutorials/performance/backend/dnnl/dnnl_quantization.md", + "created_at": "2022-02-07T10:03:03+00:00", + "commented_code": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n## Introduction\n\nAfter successful model building and achieving desired accuracy on the test data, often the next step is to optimize inference to deploy the model to production. One of the key features of usable model is to have as small latency as possible to be able to provide services to large number of customers at the same time. In addition to customer satisfaction, with well optimized model, hardware load is reduced which also reduces energy costs needed to perform inference.\n\nTwo main types of software optimizations can be characerized as:\n- memory-bound optimizations - main objective of these optimizations is to reduce memory operations (reads and writes) - it is done by e.g. chaining operations which can be performed one after another immediately, where input of every subsequent operation is the output of the previous one (example: ReLU activation after convolution)\n- compute-bound optimizations - these optimizations are mainly done on operations which require large number of CPU cycles to complete, like FullyConnected and Convolution - one of the methods to speedup compute-bound operations is to lower computation precision - this type of optimization is called quantization\n\nIn version 2.0 of the Apache MXNet (incubating) GluonAPI2.0 replaced Symbolic API known from versions 1.x, thus there are some differences between API to perform graph fusion and quantization.\n\n## Operator Fusion\n\nModels are often represented as a directed graph of operations (represented by nodes) and data flow (represented as edges). This way of visualizing helps a lot when searching for common patterns in whole model which can be optimized by fusion. Example:\n![base_model](https://github.com/dmlc/web-data/blob/main/mxnet/tutorials/onednn/quantization_2_0/sample_net.png?raw=true)\n\n\nThe simplest way to explain what fusion is and how it works is to present an example. Image above depicts a sequence of popular operations taken from ResNet architecture. This type of architecture is built with many similar blocks called residual blocks. Some possible fusion patterns are:\n\n- Conv2D + BatchNorm => Fusing BatchNorm with Convolution can be performed by modifing weights and bias of Convolution - this way BatchNorm is completely contained within Convolution which makes BatchNorm zero time operation. Only cost of fusing is time needed to prepare weights and bias in Convolution based on BatchNorm parameters.\n- Conv2D + ReLU => this type of fusion is very popular also with other layers (e.g. FullyConnected + Activation). It is very simple idea where before writing data to output, activation is performed on that data. Main benefit of this fusion is that, there is no need to read and write back data in other layer only to perform simple activation function. \n- Conv2D + Add => even simpler idea than the previous ones - instead of overwriting output memory, results are added to the output memory. In the simplest terms: `out_mem = conv_result` is replaced by `out_mem += conv_result`.\n\nAbove examples are presented as atomic ones, but often they can be combined together, thus two patterns that can be fused in above example are:\n- Conv2D + BatchNorm + ReLU\n- Conv2D + BatchNorm + Add + ReLU\n\nAfter fusing all patterns, computational graph will be changed to the following one:\n![fused_fp32_model](https://github.com/dmlc/web-data/blob/main/mxnet/tutorials/onednn/quantization_2_0/fused_f32.png?raw=true)\n\n\n\n### Operator fusion in MXNet\nSince the version 1.6 of MXNet built with oneDNN support, operator fusion had been enabled by default for executing model with Module API, however in version 2.0 it has been decided to remove setting this feature by environment flag and replace it by aware user API call.\n\nTo fuse model in MXNet 2.0 there are two requirements:\n- the model must be defined as a subclass of HybridBlock or Symbol,\n- the model must have specific operator patterns which can be fused.\n\nAs an example we define example network (sample block from ResNet architecture):\n\n```\nimport mxnet as mx\nfrom mxnet.gluon import nn\n\nclass SampleBlock(nn.HybridBlock):\n def __init__(self):\n super(SampleBlock, self).__init__()\n self.conv1 = nn.Conv2D(channels=64, kernel_size=3, strides=1, padding=1,\n use_bias=False, in_channels=64)\n self.bn1 = nn.BatchNorm()\n self.conv2 = nn.Conv2D(channels=64, kernel_size=3, strides=1, padding=1,\n use_bias=False, in_channels=64)\n self.bn2 = nn.BatchNorm()\n\n def forward(self, x):\n out = mx.npx.activation(self.bn1(self.conv1(x)), 'relu')\n out = self.bn2(self.conv2(out))\n out = mx.npx.activation(out + x, 'relu')\n return out\n \nnet = SampleBlock()\nnet.initialize()\n\ndata = mx.np.zeros(shape=(1,64,224,224))\n# run fusion\nnet.optimize_for(data, backend='ONEDNN')\n\n# We can check fusion by plotting current symbol of our optimized network\nsym, _ = net.export(None)\ngraph = mx.viz.plot_network(sym, save_format='jpg')\ngraph.view()\n```\nBoth HybridBlock and Symbol classes provide API to easily run fusion of operators. Single line of code is enabling fusion passes on model:\n```\nnet.optimize_for(data, backend='ONEDNN')\n```\n\n*optimize_for* function is available also as Symbol class method. Example call to this API is shown below. Notice that Symbol’s *optimize_for* method is not done in-place, so assigning it to a new variable is required:\n\n```\noptimized_symbol = sym.optimize_for(backend='ONEDNN')\n```\n\nFor the above model definition in a naive benchmark with artificial data, we can gain up to 1.75x speedup without any accuracy loss on our testing machine with Intel(R) Core(TM) i9-9940X.\n\n\n## Quantization\nAs mentioned in the introduction, precision reduction is another very popular method of improving performance of workloads and, what is important, in most cases is combined together with operator fusion which improves performance even more. In training precision reduction utilizes 16 bit data types like bfloat or float16, but for inference great results can be achieved using int8. \n\nModel quantization helps on both memory-bound and compute-bound operations. In quantized model IO operations are reduced as int8 data type is 4x smaller than float32, and also computational throughput is increased as more data can be SIMD'ed. On modern Intel architectures using int8 data type can bring even more speedup by utilizing special VNNI instruction set. \n\n![before_quant](https://github.com/dmlc/web-data/blob/main/mxnet/tutorials/onednn/quantization_2_0/before_quant.png?raw=true)\n\nFirstly quantization performs operator fusion on floating-point model as mentioned in paragraph earlier. Next, all operators which support int8 data type are marked as quantized and if needed additional operators are injected into graph surrounding quantizable operator - the goal of this additional operators is to quantize, dequantize or requantize data to keep data type between operators compatible.\n \n![quant_not_calib](https://github.com/dmlc/web-data/blob/main/mxnet/tutorials/onednn/quantization_2_0/quant_not_calib.png?raw=true)\n\n\nAfter injection step it is important to perform calibration of the model, however this step is optional. Quantizing without calibration is not recommended in terms of performance. It will result in calculating data minimum and maximum in quantize and requantize nodes during each inference pass. Calibrating a model greatly improves performance as minimum and maximum values are collected offline and are saved inside node - this way there is no need to search for these values during inference pass. \n\n![quant_calib](https://github.com/dmlc/web-data/blob/main/mxnet/tutorials/onednn/quantization_2_0/quant_calib.png?raw=true)\n\n\nCurrently, there are three supported calibration methods:\n- naive — min/max values from the calibration run,\n- entropy — uses KL divergence to determine the best symmetrical quantization thresholds for a given histogram of values,\n- custom — uses user-defined CalibrationCollector to control the calibration process.\n\nLast stage of quantization flow is to perform additional operator fusion. Second fusion is about merging requantize and dequantize operators into preceding node - oneDNN kernels can perform needed scaling before writing result to output which results in model execution speed-up. Notice that last Convolution does not need minimum and maximum values as it is not requantizing int32 to int8, but dequantizing directly to float32 and scale is calculated basing on minimum and maximum of input and weights.\n\n![quant_calib_fused](https://github.com/dmlc/web-data/blob/main/mxnet/tutorials/onednn/quantization_2_0/quant_calib_fused.png?raw=true)\n\nIn MXNet 2.0, the quantization procedure has been adjusted to work well with Gluon models since it is the main API now. The goal was to allow the user to quantize fp32 HybridBlock model in just a few lines of code.\n\n### Quantization in MXNet\n\nAs an example of a quantization procedure, pretrained *resnet50_v1* from *model_zoo.vision* package can be used. To get it simply run the following code:\n\n```\nimport mxnet as mx\nfrom mxnet.gluon.model_zoo.vision import resnet50_v1\n\nnet = resnet50_v1(pretrained=True)\n```\n\nNow, to get a ready-to-deploy quantized model two steps are required:\n1. Prepare data loader with calibration data - this data will be used as input to the network. All necessary layers will be observed with layer collector to calculate minimum and maximum value of that layer. This flow is internal mechanism and all what user needs to do is to provide data loader.\n2. Call `quantize_net` function from `contrib.quantize` package - both operator fusion calls will be called inside this API.\n\n```\ncalib_data_loader = mx.gluon.data.DataLoader(dummy_data, batch_size=batch_size)\nqnet = quantize_net(net, calib_mode='naive', calib_data=calib_data_loader)\n```\n\nFollowing function, which calculates total inference time on the model with an artificial data, can be used to compare the performance:\n\n```\ndef benchmark_net(net, batch_size=32, batches=100, warmup_batches=5):\n import time\n data = mx.np.random.uniform(-1.0, 1.0, (batch_size, 3, 224, 224))\n \n for i in range(batches + warmup_batches):\n if i == warmup_batches:\n tic = time.time()\n out = net(data)\n out.wait_to_read()\n \n total_time = time.time() - tic\n return total_time\n```\n\n\nComparing fused float32 network to quantized network on CLX8280 shows 4.29x speedup - measurment was done on 28 cores and this machine utilizes VNNI instruction set.\n\n\nThe other aspect of lowering the precision of a model is a difference in its accuracy. We will check that on previously tested resnet50_v1 with ImageNet dataset. To run this example you will need ImageNet dataset prepared with this tutorial and stored in path_to_imagenet. Let’s compare top1 and top5 accuracy of standard fp32 model with quantized int8 model calibrated using naive and entropy calibration mode. We will use only 10 batches of the validation dataset to calibrate quantized model.\n\n```\nimport mxnet as mx\nfrom mxnet.gluon.model_zoo.vision import resnet50_v1\nfrom mxnet.gluon.data.vision import transforms\nfrom mxnet.contrib.quantization import quantize_net\n\ndef test_accuracy(net, data_loader):\n acc_top1 = mx.gluon.metric.Accuracy()\n acc_top5 = mx.gluon.metric.TopKAccuracy(5)\n \n for x, label in data_loader:\n output = net(x)\n acc_top1.update(label, output)\n acc_top5.update(label, output)\n\n _, top1 = acc_top1.get()\n _, top5 = acc_top5.get()\n\n return top1, top5\n \nrgb_mean = (0.485, 0.456, 0.406)\nrgb_std = (0.229, 0.224, 0.225)\nbatch_size = 64\n \ndataset = mx.gluon.data.vision.ImageRecordDataset('path_to_imagenet/val.rec')\ntransformer = transforms.Compose([transforms.Resize(256),\n transforms.CenterCrop(224),\n transforms.ToTensor(),\n transforms.Normalize(mean=rgb_mean, std=rgb_std)])\nval_data = mx.gluon.data.DataLoader(dataset.transform_first(transformer), batch_size, shuffle=True)\n\nnet = resnet50_v1(pretrained=True)\nnet.hybridize(backend='ONEDNN', static_alloc=True, static_shape=True)\n\ntop1, top5 = test_accuracy(net, val_data)\nprint('FP32 Top1 Accuracy: {} Top5 Accuracy: {}'.format(top1, top5))\n\nqnet = quantize_net(net, calib_mode='naive', calib_data=val_data, num_calib_batches=10)\nqnet.hybridize(static_alloc=True, static_shape=True)\ntop1, top5 = test_accuracy(qnet, val_data)\nprint('INT8Naive Top1 Accuracy: {} Top5 Accuracy: {}'.format(top1, top5))\n\nqnet = quantize_net(net, calib_mode='entropy', calib_data=val_data, num_calib_batches=10)\nqnet.hybridize(static_alloc=True, static_shape=True)\ntop1, top5 = test_accuracy(qnet, val_data)\nprint('INT8Entropy Top1 Accuracy: {} Top5 Accuracy: {}'.format(top1, top5))\n```\n\n#### Output:\n> FP32 Top1 Accuracy: 0.76364 Top5 Accuracy: 0.93094\n> INT8Naive Top1 Accuracy: 0.76028 Top5 Accuracy: 0.92796\n> INT8Entropy Top1 Accuracy: 0.76404 Top5 Accuracy: 0.93042\n\nWith quantized model there is tiny accuracy drop, however this is cost of great performance optimization and memory footprint. The difference between calibration method is dependent on the model itself, used activation layers and the size of calibration data.", + "repo_full_name": "apache/mxnet", + "discussion_comments": [ + { + "comment_id": "800492034", + "repo_full_name": "apache/mxnet", + "pr_number": 20856, + "pr_file": "docs/python_docs/python/tutorials/performance/backend/dnnl/dnnl_quantization.md", + "discussion_id": "800492034", + "commented_code": "@@ -0,0 +1,304 @@\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+## Introduction\n+\n+After successful model building and achieving desired accuracy on the test data, often the next step is to optimize inference to deploy the model to production. One of the key features of usable model is to have as small latency as possible to be able to provide services to large number of customers at the same time. In addition to customer satisfaction, with well optimized model, hardware load is reduced which also reduces energy costs needed to perform inference.\n+\n+Two main types of software optimizations can be characerized as:\n+- memory-bound optimizations - main objective of these optimizations is to reduce memory operations (reads and writes) - it is done by e.g. chaining operations which can be performed one after another immediately, where input of every subsequent operation is the output of the previous one (example: ReLU activation after convolution)\n+- compute-bound optimizations - these optimizations are mainly done on operations which require large number of CPU cycles to complete, like FullyConnected and Convolution - one of the methods to speedup compute-bound operations is to lower computation precision - this type of optimization is called quantization\n+\n+In version 2.0 of the Apache MXNet (incubating) GluonAPI2.0 replaced Symbolic API known from versions 1.x, thus there are some differences between API to perform graph fusion and quantization.\n+\n+## Operator Fusion\n+\n+Models are often represented as a directed graph of operations (represented by nodes) and data flow (represented as edges). This way of visualizing helps a lot when searching for common patterns in whole model which can be optimized by fusion. Example:\n+![base_model](https://github.com/dmlc/web-data/blob/main/mxnet/tutorials/onednn/quantization_2_0/sample_net.png?raw=true)\n+\n+\n+The simplest way to explain what fusion is and how it works is to present an example. Image above depicts a sequence of popular operations taken from ResNet architecture. This type of architecture is built with many similar blocks called residual blocks. Some possible fusion patterns are:\n+\n+- Conv2D + BatchNorm => Fusing BatchNorm with Convolution can be performed by modifing weights and bias of Convolution - this way BatchNorm is completely contained within Convolution which makes BatchNorm zero time operation. Only cost of fusing is time needed to prepare weights and bias in Convolution based on BatchNorm parameters.\n+- Conv2D + ReLU => this type of fusion is very popular also with other layers (e.g. FullyConnected + Activation). It is very simple idea where before writing data to output, activation is performed on that data. Main benefit of this fusion is that, there is no need to read and write back data in other layer only to perform simple activation function. \n+- Conv2D + Add => even simpler idea than the previous ones - instead of overwriting output memory, results are added to the output memory. In the simplest terms: `out_mem = conv_result` is replaced by `out_mem += conv_result`.\n+\n+Above examples are presented as atomic ones, but often they can be combined together, thus two patterns that can be fused in above example are:\n+- Conv2D + BatchNorm + ReLU\n+- Conv2D + BatchNorm + Add + ReLU\n+\n+After fusing all patterns, computational graph will be changed to the following one:\n+![fused_fp32_model](https://github.com/dmlc/web-data/blob/main/mxnet/tutorials/onednn/quantization_2_0/fused_f32.png?raw=true)\n+\n+\n+\n+### Operator fusion in MXNet\n+Since the version 1.6 of MXNet built with oneDNN support, operator fusion had been enabled by default for executing model with Module API, however in version 2.0 it has been decided to remove setting this feature by environment flag and replace it by aware user API call.\n+\n+To fuse model in MXNet 2.0 there are two requirements:\n+- the model must be defined as a subclass of HybridBlock or Symbol,\n+- the model must have specific operator patterns which can be fused.\n+\n+As an example we define example network (sample block from ResNet architecture):\n+\n+```\n+import mxnet as mx\n+from mxnet.gluon import nn\n+\n+class SampleBlock(nn.HybridBlock):\n+ def __init__(self):\n+ super(SampleBlock, self).__init__()\n+ self.conv1 = nn.Conv2D(channels=64, kernel_size=3, strides=1, padding=1,\n+ use_bias=False, in_channels=64)\n+ self.bn1 = nn.BatchNorm()\n+ self.conv2 = nn.Conv2D(channels=64, kernel_size=3, strides=1, padding=1,\n+ use_bias=False, in_channels=64)\n+ self.bn2 = nn.BatchNorm()\n+\n+ def forward(self, x):\n+ out = mx.npx.activation(self.bn1(self.conv1(x)), 'relu')\n+ out = self.bn2(self.conv2(out))\n+ out = mx.npx.activation(out + x, 'relu')\n+ return out\n+ \n+net = SampleBlock()\n+net.initialize()\n+\n+data = mx.np.zeros(shape=(1,64,224,224))\n+# run fusion\n+net.optimize_for(data, backend='ONEDNN')\n+\n+# We can check fusion by plotting current symbol of our optimized network\n+sym, _ = net.export(None)\n+graph = mx.viz.plot_network(sym, save_format='jpg')\n+graph.view()\n+```\n+Both HybridBlock and Symbol classes provide API to easily run fusion of operators. Single line of code is enabling fusion passes on model:\n+```\n+net.optimize_for(data, backend='ONEDNN')\n+```\n+\n+*optimize_for* function is available also as Symbol class method. Example call to this API is shown below. Notice that Symbol’s *optimize_for* method is not done in-place, so assigning it to a new variable is required:\n+\n+```\n+optimized_symbol = sym.optimize_for(backend='ONEDNN')\n+```\n+\n+For the above model definition in a naive benchmark with artificial data, we can gain up to 1.75x speedup without any accuracy loss on our testing machine with Intel(R) Core(TM) i9-9940X.\n+\n+\n+## Quantization\n+As mentioned in the introduction, precision reduction is another very popular method of improving performance of workloads and, what is important, in most cases is combined together with operator fusion which improves performance even more. In training precision reduction utilizes 16 bit data types like bfloat or float16, but for inference great results can be achieved using int8. \n+\n+Model quantization helps on both memory-bound and compute-bound operations. In quantized model IO operations are reduced as int8 data type is 4x smaller than float32, and also computational throughput is increased as more data can be SIMD'ed. On modern Intel architectures using int8 data type can bring even more speedup by utilizing special VNNI instruction set. \n+\n+![before_quant](https://github.com/dmlc/web-data/blob/main/mxnet/tutorials/onednn/quantization_2_0/before_quant.png?raw=true)\n+\n+Firstly quantization performs operator fusion on floating-point model as mentioned in paragraph earlier. Next, all operators which support int8 data type are marked as quantized and if needed additional operators are injected into graph surrounding quantizable operator - the goal of this additional operators is to quantize, dequantize or requantize data to keep data type between operators compatible.\n+ \n+![quant_not_calib](https://github.com/dmlc/web-data/blob/main/mxnet/tutorials/onednn/quantization_2_0/quant_not_calib.png?raw=true)\n+\n+\n+After injection step it is important to perform calibration of the model, however this step is optional. Quantizing without calibration is not recommended in terms of performance. It will result in calculating data minimum and maximum in quantize and requantize nodes during each inference pass. Calibrating a model greatly improves performance as minimum and maximum values are collected offline and are saved inside node - this way there is no need to search for these values during inference pass. \n+\n+![quant_calib](https://github.com/dmlc/web-data/blob/main/mxnet/tutorials/onednn/quantization_2_0/quant_calib.png?raw=true)\n+\n+\n+Currently, there are three supported calibration methods:\n+- naive — min/max values from the calibration run,\n+- entropy — uses KL divergence to determine the best symmetrical quantization thresholds for a given histogram of values,\n+- custom — uses user-defined CalibrationCollector to control the calibration process.\n+\n+Last stage of quantization flow is to perform additional operator fusion. Second fusion is about merging requantize and dequantize operators into preceding node - oneDNN kernels can perform needed scaling before writing result to output which results in model execution speed-up. Notice that last Convolution does not need minimum and maximum values as it is not requantizing int32 to int8, but dequantizing directly to float32 and scale is calculated basing on minimum and maximum of input and weights.\n+\n+![quant_calib_fused](https://github.com/dmlc/web-data/blob/main/mxnet/tutorials/onednn/quantization_2_0/quant_calib_fused.png?raw=true)\n+\n+In MXNet 2.0, the quantization procedure has been adjusted to work well with Gluon models since it is the main API now. The goal was to allow the user to quantize fp32 HybridBlock model in just a few lines of code.\n+\n+### Quantization in MXNet\n+\n+As an example of a quantization procedure, pretrained *resnet50_v1* from *model_zoo.vision* package can be used. To get it simply run the following code:\n+\n+```\n+import mxnet as mx\n+from mxnet.gluon.model_zoo.vision import resnet50_v1\n+\n+net = resnet50_v1(pretrained=True)\n+```\n+\n+Now, to get a ready-to-deploy quantized model two steps are required:\n+1. Prepare data loader with calibration data - this data will be used as input to the network. All necessary layers will be observed with layer collector to calculate minimum and maximum value of that layer. This flow is internal mechanism and all what user needs to do is to provide data loader.\n+2. Call `quantize_net` function from `contrib.quantize` package - both operator fusion calls will be called inside this API.\n+\n+```\n+calib_data_loader = mx.gluon.data.DataLoader(dummy_data, batch_size=batch_size)\n+qnet = quantize_net(net, calib_mode='naive', calib_data=calib_data_loader)\n+```\n+\n+Following function, which calculates total inference time on the model with an artificial data, can be used to compare the performance:\n+\n+```\n+def benchmark_net(net, batch_size=32, batches=100, warmup_batches=5):\n+ import time\n+ data = mx.np.random.uniform(-1.0, 1.0, (batch_size, 3, 224, 224))\n+ \n+ for i in range(batches + warmup_batches):\n+ if i == warmup_batches:\n+ tic = time.time()\n+ out = net(data)\n+ out.wait_to_read()\n+ \n+ total_time = time.time() - tic\n+ return total_time\n+```\n+\n+\n+Comparing fused float32 network to quantized network on CLX8280 shows 4.29x speedup - measurment was done on 28 cores and this machine utilizes VNNI instruction set.\n+\n+\n+The other aspect of lowering the precision of a model is a difference in its accuracy. We will check that on previously tested resnet50_v1 with ImageNet dataset. To run this example you will need ImageNet dataset prepared with this tutorial and stored in path_to_imagenet. Let’s compare top1 and top5 accuracy of standard fp32 model with quantized int8 model calibrated using naive and entropy calibration mode. We will use only 10 batches of the validation dataset to calibrate quantized model.\n+\n+```\n+import mxnet as mx\n+from mxnet.gluon.model_zoo.vision import resnet50_v1\n+from mxnet.gluon.data.vision import transforms\n+from mxnet.contrib.quantization import quantize_net\n+\n+def test_accuracy(net, data_loader):\n+ acc_top1 = mx.gluon.metric.Accuracy()\n+ acc_top5 = mx.gluon.metric.TopKAccuracy(5)\n+ \n+ for x, label in data_loader:\n+ output = net(x)\n+ acc_top1.update(label, output)\n+ acc_top5.update(label, output)\n+\n+ _, top1 = acc_top1.get()\n+ _, top5 = acc_top5.get()\n+\n+ return top1, top5\n+ \n+rgb_mean = (0.485, 0.456, 0.406)\n+rgb_std = (0.229, 0.224, 0.225)\n+batch_size = 64\n+ \n+dataset = mx.gluon.data.vision.ImageRecordDataset('path_to_imagenet/val.rec')\n+transformer = transforms.Compose([transforms.Resize(256),\n+ transforms.CenterCrop(224),\n+ transforms.ToTensor(),\n+ transforms.Normalize(mean=rgb_mean, std=rgb_std)])\n+val_data = mx.gluon.data.DataLoader(dataset.transform_first(transformer), batch_size, shuffle=True)\n+\n+net = resnet50_v1(pretrained=True)\n+net.hybridize(backend='ONEDNN', static_alloc=True, static_shape=True)\n+\n+top1, top5 = test_accuracy(net, val_data)\n+print('FP32 Top1 Accuracy: {} Top5 Accuracy: {}'.format(top1, top5))\n+\n+qnet = quantize_net(net, calib_mode='naive', calib_data=val_data, num_calib_batches=10)\n+qnet.hybridize(static_alloc=True, static_shape=True)\n+top1, top5 = test_accuracy(qnet, val_data)\n+print('INT8Naive Top1 Accuracy: {} Top5 Accuracy: {}'.format(top1, top5))\n+\n+qnet = quantize_net(net, calib_mode='entropy', calib_data=val_data, num_calib_batches=10)\n+qnet.hybridize(static_alloc=True, static_shape=True)\n+top1, top5 = test_accuracy(qnet, val_data)\n+print('INT8Entropy Top1 Accuracy: {} Top5 Accuracy: {}'.format(top1, top5))\n+```\n+\n+#### Output:\n+> FP32 Top1 Accuracy: 0.76364 Top5 Accuracy: 0.93094\n+> INT8Naive Top1 Accuracy: 0.76028 Top5 Accuracy: 0.92796\n+> INT8Entropy Top1 Accuracy: 0.76404 Top5 Accuracy: 0.93042\n+\n+With quantized model there is tiny accuracy drop, however this is cost of great performance optimization and memory footprint. The difference between calibration method is dependent on the model itself, used activation layers and the size of calibration data.", + "comment_created_at": "2022-02-07T10:03:03+00:00", + "comment_author": "bartekkuncer", + "comment_body": "```suggestion\r\nWith quantized model there is tiny accuracy drop, however this is the cost of great performance optimization and memory footprint reduction. The difference between calibration methods is dependent on the model itself, used activation layers and the size of calibration data.\r\n```", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "800492895", + "pr_number": 20856, + "pr_file": "docs/python_docs/python/tutorials/performance/backend/dnnl/dnnl_quantization.md", + "created_at": "2022-02-07T10:04:07+00:00", + "commented_code": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n## Introduction\n\nAfter successful model building and achieving desired accuracy on the test data, often the next step is to optimize inference to deploy the model to production. One of the key features of usable model is to have as small latency as possible to be able to provide services to large number of customers at the same time. In addition to customer satisfaction, with well optimized model, hardware load is reduced which also reduces energy costs needed to perform inference.\n\nTwo main types of software optimizations can be characerized as:\n- memory-bound optimizations - main objective of these optimizations is to reduce memory operations (reads and writes) - it is done by e.g. chaining operations which can be performed one after another immediately, where input of every subsequent operation is the output of the previous one (example: ReLU activation after convolution)\n- compute-bound optimizations - these optimizations are mainly done on operations which require large number of CPU cycles to complete, like FullyConnected and Convolution - one of the methods to speedup compute-bound operations is to lower computation precision - this type of optimization is called quantization\n\nIn version 2.0 of the Apache MXNet (incubating) GluonAPI2.0 replaced Symbolic API known from versions 1.x, thus there are some differences between API to perform graph fusion and quantization.\n\n## Operator Fusion\n\nModels are often represented as a directed graph of operations (represented by nodes) and data flow (represented as edges). This way of visualizing helps a lot when searching for common patterns in whole model which can be optimized by fusion. Example:\n![base_model](https://github.com/dmlc/web-data/blob/main/mxnet/tutorials/onednn/quantization_2_0/sample_net.png?raw=true)\n\n\nThe simplest way to explain what fusion is and how it works is to present an example. Image above depicts a sequence of popular operations taken from ResNet architecture. This type of architecture is built with many similar blocks called residual blocks. Some possible fusion patterns are:\n\n- Conv2D + BatchNorm => Fusing BatchNorm with Convolution can be performed by modifing weights and bias of Convolution - this way BatchNorm is completely contained within Convolution which makes BatchNorm zero time operation. Only cost of fusing is time needed to prepare weights and bias in Convolution based on BatchNorm parameters.\n- Conv2D + ReLU => this type of fusion is very popular also with other layers (e.g. FullyConnected + Activation). It is very simple idea where before writing data to output, activation is performed on that data. Main benefit of this fusion is that, there is no need to read and write back data in other layer only to perform simple activation function. \n- Conv2D + Add => even simpler idea than the previous ones - instead of overwriting output memory, results are added to the output memory. In the simplest terms: `out_mem = conv_result` is replaced by `out_mem += conv_result`.\n\nAbove examples are presented as atomic ones, but often they can be combined together, thus two patterns that can be fused in above example are:\n- Conv2D + BatchNorm + ReLU\n- Conv2D + BatchNorm + Add + ReLU\n\nAfter fusing all patterns, computational graph will be changed to the following one:\n![fused_fp32_model](https://github.com/dmlc/web-data/blob/main/mxnet/tutorials/onednn/quantization_2_0/fused_f32.png?raw=true)\n\n\n\n### Operator fusion in MXNet\nSince the version 1.6 of MXNet built with oneDNN support, operator fusion had been enabled by default for executing model with Module API, however in version 2.0 it has been decided to remove setting this feature by environment flag and replace it by aware user API call.\n\nTo fuse model in MXNet 2.0 there are two requirements:\n- the model must be defined as a subclass of HybridBlock or Symbol,\n- the model must have specific operator patterns which can be fused.\n\nAs an example we define example network (sample block from ResNet architecture):\n\n```\nimport mxnet as mx\nfrom mxnet.gluon import nn\n\nclass SampleBlock(nn.HybridBlock):\n def __init__(self):\n super(SampleBlock, self).__init__()\n self.conv1 = nn.Conv2D(channels=64, kernel_size=3, strides=1, padding=1,\n use_bias=False, in_channels=64)\n self.bn1 = nn.BatchNorm()\n self.conv2 = nn.Conv2D(channels=64, kernel_size=3, strides=1, padding=1,\n use_bias=False, in_channels=64)\n self.bn2 = nn.BatchNorm()\n\n def forward(self, x):\n out = mx.npx.activation(self.bn1(self.conv1(x)), 'relu')\n out = self.bn2(self.conv2(out))\n out = mx.npx.activation(out + x, 'relu')\n return out\n \nnet = SampleBlock()\nnet.initialize()\n\ndata = mx.np.zeros(shape=(1,64,224,224))\n# run fusion\nnet.optimize_for(data, backend='ONEDNN')\n\n# We can check fusion by plotting current symbol of our optimized network\nsym, _ = net.export(None)\ngraph = mx.viz.plot_network(sym, save_format='jpg')\ngraph.view()\n```\nBoth HybridBlock and Symbol classes provide API to easily run fusion of operators. Single line of code is enabling fusion passes on model:\n```\nnet.optimize_for(data, backend='ONEDNN')\n```\n\n*optimize_for* function is available also as Symbol class method. Example call to this API is shown below. Notice that Symbol’s *optimize_for* method is not done in-place, so assigning it to a new variable is required:\n\n```\noptimized_symbol = sym.optimize_for(backend='ONEDNN')\n```\n\nFor the above model definition in a naive benchmark with artificial data, we can gain up to 1.75x speedup without any accuracy loss on our testing machine with Intel(R) Core(TM) i9-9940X.\n\n\n## Quantization\nAs mentioned in the introduction, precision reduction is another very popular method of improving performance of workloads and, what is important, in most cases is combined together with operator fusion which improves performance even more. In training precision reduction utilizes 16 bit data types like bfloat or float16, but for inference great results can be achieved using int8. \n\nModel quantization helps on both memory-bound and compute-bound operations. In quantized model IO operations are reduced as int8 data type is 4x smaller than float32, and also computational throughput is increased as more data can be SIMD'ed. On modern Intel architectures using int8 data type can bring even more speedup by utilizing special VNNI instruction set. \n\n![before_quant](https://github.com/dmlc/web-data/blob/main/mxnet/tutorials/onednn/quantization_2_0/before_quant.png?raw=true)\n\nFirstly quantization performs operator fusion on floating-point model as mentioned in paragraph earlier. Next, all operators which support int8 data type are marked as quantized and if needed additional operators are injected into graph surrounding quantizable operator - the goal of this additional operators is to quantize, dequantize or requantize data to keep data type between operators compatible.\n \n![quant_not_calib](https://github.com/dmlc/web-data/blob/main/mxnet/tutorials/onednn/quantization_2_0/quant_not_calib.png?raw=true)\n\n\nAfter injection step it is important to perform calibration of the model, however this step is optional. Quantizing without calibration is not recommended in terms of performance. It will result in calculating data minimum and maximum in quantize and requantize nodes during each inference pass. Calibrating a model greatly improves performance as minimum and maximum values are collected offline and are saved inside node - this way there is no need to search for these values during inference pass. \n\n![quant_calib](https://github.com/dmlc/web-data/blob/main/mxnet/tutorials/onednn/quantization_2_0/quant_calib.png?raw=true)\n\n\nCurrently, there are three supported calibration methods:\n- naive — min/max values from the calibration run,\n- entropy — uses KL divergence to determine the best symmetrical quantization thresholds for a given histogram of values,\n- custom — uses user-defined CalibrationCollector to control the calibration process.\n\nLast stage of quantization flow is to perform additional operator fusion. Second fusion is about merging requantize and dequantize operators into preceding node - oneDNN kernels can perform needed scaling before writing result to output which results in model execution speed-up. Notice that last Convolution does not need minimum and maximum values as it is not requantizing int32 to int8, but dequantizing directly to float32 and scale is calculated basing on minimum and maximum of input and weights.\n\n![quant_calib_fused](https://github.com/dmlc/web-data/blob/main/mxnet/tutorials/onednn/quantization_2_0/quant_calib_fused.png?raw=true)\n\nIn MXNet 2.0, the quantization procedure has been adjusted to work well with Gluon models since it is the main API now. The goal was to allow the user to quantize fp32 HybridBlock model in just a few lines of code.\n\n### Quantization in MXNet\n\nAs an example of a quantization procedure, pretrained *resnet50_v1* from *model_zoo.vision* package can be used. To get it simply run the following code:\n\n```\nimport mxnet as mx\nfrom mxnet.gluon.model_zoo.vision import resnet50_v1\n\nnet = resnet50_v1(pretrained=True)\n```\n\nNow, to get a ready-to-deploy quantized model two steps are required:\n1. Prepare data loader with calibration data - this data will be used as input to the network. All necessary layers will be observed with layer collector to calculate minimum and maximum value of that layer. This flow is internal mechanism and all what user needs to do is to provide data loader.\n2. Call `quantize_net` function from `contrib.quantize` package - both operator fusion calls will be called inside this API.\n\n```\ncalib_data_loader = mx.gluon.data.DataLoader(dummy_data, batch_size=batch_size)\nqnet = quantize_net(net, calib_mode='naive', calib_data=calib_data_loader)\n```\n\nFollowing function, which calculates total inference time on the model with an artificial data, can be used to compare the performance:\n\n```\ndef benchmark_net(net, batch_size=32, batches=100, warmup_batches=5):\n import time\n data = mx.np.random.uniform(-1.0, 1.0, (batch_size, 3, 224, 224))\n \n for i in range(batches + warmup_batches):\n if i == warmup_batches:\n tic = time.time()\n out = net(data)\n out.wait_to_read()\n \n total_time = time.time() - tic\n return total_time\n```\n\n\nComparing fused float32 network to quantized network on CLX8280 shows 4.29x speedup - measurment was done on 28 cores and this machine utilizes VNNI instruction set.\n\n\nThe other aspect of lowering the precision of a model is a difference in its accuracy. We will check that on previously tested resnet50_v1 with ImageNet dataset. To run this example you will need ImageNet dataset prepared with this tutorial and stored in path_to_imagenet. Let’s compare top1 and top5 accuracy of standard fp32 model with quantized int8 model calibrated using naive and entropy calibration mode. We will use only 10 batches of the validation dataset to calibrate quantized model.\n\n```\nimport mxnet as mx\nfrom mxnet.gluon.model_zoo.vision import resnet50_v1\nfrom mxnet.gluon.data.vision import transforms\nfrom mxnet.contrib.quantization import quantize_net\n\ndef test_accuracy(net, data_loader):\n acc_top1 = mx.gluon.metric.Accuracy()\n acc_top5 = mx.gluon.metric.TopKAccuracy(5)\n \n for x, label in data_loader:\n output = net(x)\n acc_top1.update(label, output)\n acc_top5.update(label, output)\n\n _, top1 = acc_top1.get()\n _, top5 = acc_top5.get()\n\n return top1, top5\n \nrgb_mean = (0.485, 0.456, 0.406)\nrgb_std = (0.229, 0.224, 0.225)\nbatch_size = 64\n \ndataset = mx.gluon.data.vision.ImageRecordDataset('path_to_imagenet/val.rec')\ntransformer = transforms.Compose([transforms.Resize(256),\n transforms.CenterCrop(224),\n transforms.ToTensor(),\n transforms.Normalize(mean=rgb_mean, std=rgb_std)])\nval_data = mx.gluon.data.DataLoader(dataset.transform_first(transformer), batch_size, shuffle=True)\n\nnet = resnet50_v1(pretrained=True)\nnet.hybridize(backend='ONEDNN', static_alloc=True, static_shape=True)\n\ntop1, top5 = test_accuracy(net, val_data)\nprint('FP32 Top1 Accuracy: {} Top5 Accuracy: {}'.format(top1, top5))\n\nqnet = quantize_net(net, calib_mode='naive', calib_data=val_data, num_calib_batches=10)\nqnet.hybridize(static_alloc=True, static_shape=True)\ntop1, top5 = test_accuracy(qnet, val_data)\nprint('INT8Naive Top1 Accuracy: {} Top5 Accuracy: {}'.format(top1, top5))\n\nqnet = quantize_net(net, calib_mode='entropy', calib_data=val_data, num_calib_batches=10)\nqnet.hybridize(static_alloc=True, static_shape=True)\ntop1, top5 = test_accuracy(qnet, val_data)\nprint('INT8Entropy Top1 Accuracy: {} Top5 Accuracy: {}'.format(top1, top5))\n```\n\n#### Output:\n> FP32 Top1 Accuracy: 0.76364 Top5 Accuracy: 0.93094\n> INT8Naive Top1 Accuracy: 0.76028 Top5 Accuracy: 0.92796\n> INT8Entropy Top1 Accuracy: 0.76404 Top5 Accuracy: 0.93042\n\nWith quantized model there is tiny accuracy drop, however this is cost of great performance optimization and memory footprint. The difference between calibration method is dependent on the model itself, used activation layers and the size of calibration data.\n\n### Custom layer collectors and calibrating the model\nIn MXNet 2.0 new interface for creating custom calibration collector has been added. Main goal of this interface is to give the user as much flexibility as possible in almost every step of quantization. Creating own layer collector is pretty easy, however computing effective min/max values can be not a trivial task. \n\nLayer collectors are responsible for collecting statistics of each node in the graph — it means that the input/output data of every operator executed can be observed. Collector utilize the register_op_hook method of HybridBlock class.", + "repo_full_name": "apache/mxnet", + "discussion_comments": [ + { + "comment_id": "800492895", + "repo_full_name": "apache/mxnet", + "pr_number": 20856, + "pr_file": "docs/python_docs/python/tutorials/performance/backend/dnnl/dnnl_quantization.md", + "discussion_id": "800492895", + "commented_code": "@@ -0,0 +1,304 @@\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+## Introduction\n+\n+After successful model building and achieving desired accuracy on the test data, often the next step is to optimize inference to deploy the model to production. One of the key features of usable model is to have as small latency as possible to be able to provide services to large number of customers at the same time. In addition to customer satisfaction, with well optimized model, hardware load is reduced which also reduces energy costs needed to perform inference.\n+\n+Two main types of software optimizations can be characerized as:\n+- memory-bound optimizations - main objective of these optimizations is to reduce memory operations (reads and writes) - it is done by e.g. chaining operations which can be performed one after another immediately, where input of every subsequent operation is the output of the previous one (example: ReLU activation after convolution)\n+- compute-bound optimizations - these optimizations are mainly done on operations which require large number of CPU cycles to complete, like FullyConnected and Convolution - one of the methods to speedup compute-bound operations is to lower computation precision - this type of optimization is called quantization\n+\n+In version 2.0 of the Apache MXNet (incubating) GluonAPI2.0 replaced Symbolic API known from versions 1.x, thus there are some differences between API to perform graph fusion and quantization.\n+\n+## Operator Fusion\n+\n+Models are often represented as a directed graph of operations (represented by nodes) and data flow (represented as edges). This way of visualizing helps a lot when searching for common patterns in whole model which can be optimized by fusion. Example:\n+![base_model](https://github.com/dmlc/web-data/blob/main/mxnet/tutorials/onednn/quantization_2_0/sample_net.png?raw=true)\n+\n+\n+The simplest way to explain what fusion is and how it works is to present an example. Image above depicts a sequence of popular operations taken from ResNet architecture. This type of architecture is built with many similar blocks called residual blocks. Some possible fusion patterns are:\n+\n+- Conv2D + BatchNorm => Fusing BatchNorm with Convolution can be performed by modifing weights and bias of Convolution - this way BatchNorm is completely contained within Convolution which makes BatchNorm zero time operation. Only cost of fusing is time needed to prepare weights and bias in Convolution based on BatchNorm parameters.\n+- Conv2D + ReLU => this type of fusion is very popular also with other layers (e.g. FullyConnected + Activation). It is very simple idea where before writing data to output, activation is performed on that data. Main benefit of this fusion is that, there is no need to read and write back data in other layer only to perform simple activation function. \n+- Conv2D + Add => even simpler idea than the previous ones - instead of overwriting output memory, results are added to the output memory. In the simplest terms: `out_mem = conv_result` is replaced by `out_mem += conv_result`.\n+\n+Above examples are presented as atomic ones, but often they can be combined together, thus two patterns that can be fused in above example are:\n+- Conv2D + BatchNorm + ReLU\n+- Conv2D + BatchNorm + Add + ReLU\n+\n+After fusing all patterns, computational graph will be changed to the following one:\n+![fused_fp32_model](https://github.com/dmlc/web-data/blob/main/mxnet/tutorials/onednn/quantization_2_0/fused_f32.png?raw=true)\n+\n+\n+\n+### Operator fusion in MXNet\n+Since the version 1.6 of MXNet built with oneDNN support, operator fusion had been enabled by default for executing model with Module API, however in version 2.0 it has been decided to remove setting this feature by environment flag and replace it by aware user API call.\n+\n+To fuse model in MXNet 2.0 there are two requirements:\n+- the model must be defined as a subclass of HybridBlock or Symbol,\n+- the model must have specific operator patterns which can be fused.\n+\n+As an example we define example network (sample block from ResNet architecture):\n+\n+```\n+import mxnet as mx\n+from mxnet.gluon import nn\n+\n+class SampleBlock(nn.HybridBlock):\n+ def __init__(self):\n+ super(SampleBlock, self).__init__()\n+ self.conv1 = nn.Conv2D(channels=64, kernel_size=3, strides=1, padding=1,\n+ use_bias=False, in_channels=64)\n+ self.bn1 = nn.BatchNorm()\n+ self.conv2 = nn.Conv2D(channels=64, kernel_size=3, strides=1, padding=1,\n+ use_bias=False, in_channels=64)\n+ self.bn2 = nn.BatchNorm()\n+\n+ def forward(self, x):\n+ out = mx.npx.activation(self.bn1(self.conv1(x)), 'relu')\n+ out = self.bn2(self.conv2(out))\n+ out = mx.npx.activation(out + x, 'relu')\n+ return out\n+ \n+net = SampleBlock()\n+net.initialize()\n+\n+data = mx.np.zeros(shape=(1,64,224,224))\n+# run fusion\n+net.optimize_for(data, backend='ONEDNN')\n+\n+# We can check fusion by plotting current symbol of our optimized network\n+sym, _ = net.export(None)\n+graph = mx.viz.plot_network(sym, save_format='jpg')\n+graph.view()\n+```\n+Both HybridBlock and Symbol classes provide API to easily run fusion of operators. Single line of code is enabling fusion passes on model:\n+```\n+net.optimize_for(data, backend='ONEDNN')\n+```\n+\n+*optimize_for* function is available also as Symbol class method. Example call to this API is shown below. Notice that Symbol’s *optimize_for* method is not done in-place, so assigning it to a new variable is required:\n+\n+```\n+optimized_symbol = sym.optimize_for(backend='ONEDNN')\n+```\n+\n+For the above model definition in a naive benchmark with artificial data, we can gain up to 1.75x speedup without any accuracy loss on our testing machine with Intel(R) Core(TM) i9-9940X.\n+\n+\n+## Quantization\n+As mentioned in the introduction, precision reduction is another very popular method of improving performance of workloads and, what is important, in most cases is combined together with operator fusion which improves performance even more. In training precision reduction utilizes 16 bit data types like bfloat or float16, but for inference great results can be achieved using int8. \n+\n+Model quantization helps on both memory-bound and compute-bound operations. In quantized model IO operations are reduced as int8 data type is 4x smaller than float32, and also computational throughput is increased as more data can be SIMD'ed. On modern Intel architectures using int8 data type can bring even more speedup by utilizing special VNNI instruction set. \n+\n+![before_quant](https://github.com/dmlc/web-data/blob/main/mxnet/tutorials/onednn/quantization_2_0/before_quant.png?raw=true)\n+\n+Firstly quantization performs operator fusion on floating-point model as mentioned in paragraph earlier. Next, all operators which support int8 data type are marked as quantized and if needed additional operators are injected into graph surrounding quantizable operator - the goal of this additional operators is to quantize, dequantize or requantize data to keep data type between operators compatible.\n+ \n+![quant_not_calib](https://github.com/dmlc/web-data/blob/main/mxnet/tutorials/onednn/quantization_2_0/quant_not_calib.png?raw=true)\n+\n+\n+After injection step it is important to perform calibration of the model, however this step is optional. Quantizing without calibration is not recommended in terms of performance. It will result in calculating data minimum and maximum in quantize and requantize nodes during each inference pass. Calibrating a model greatly improves performance as minimum and maximum values are collected offline and are saved inside node - this way there is no need to search for these values during inference pass. \n+\n+![quant_calib](https://github.com/dmlc/web-data/blob/main/mxnet/tutorials/onednn/quantization_2_0/quant_calib.png?raw=true)\n+\n+\n+Currently, there are three supported calibration methods:\n+- naive — min/max values from the calibration run,\n+- entropy — uses KL divergence to determine the best symmetrical quantization thresholds for a given histogram of values,\n+- custom — uses user-defined CalibrationCollector to control the calibration process.\n+\n+Last stage of quantization flow is to perform additional operator fusion. Second fusion is about merging requantize and dequantize operators into preceding node - oneDNN kernels can perform needed scaling before writing result to output which results in model execution speed-up. Notice that last Convolution does not need minimum and maximum values as it is not requantizing int32 to int8, but dequantizing directly to float32 and scale is calculated basing on minimum and maximum of input and weights.\n+\n+![quant_calib_fused](https://github.com/dmlc/web-data/blob/main/mxnet/tutorials/onednn/quantization_2_0/quant_calib_fused.png?raw=true)\n+\n+In MXNet 2.0, the quantization procedure has been adjusted to work well with Gluon models since it is the main API now. The goal was to allow the user to quantize fp32 HybridBlock model in just a few lines of code.\n+\n+### Quantization in MXNet\n+\n+As an example of a quantization procedure, pretrained *resnet50_v1* from *model_zoo.vision* package can be used. To get it simply run the following code:\n+\n+```\n+import mxnet as mx\n+from mxnet.gluon.model_zoo.vision import resnet50_v1\n+\n+net = resnet50_v1(pretrained=True)\n+```\n+\n+Now, to get a ready-to-deploy quantized model two steps are required:\n+1. Prepare data loader with calibration data - this data will be used as input to the network. All necessary layers will be observed with layer collector to calculate minimum and maximum value of that layer. This flow is internal mechanism and all what user needs to do is to provide data loader.\n+2. Call `quantize_net` function from `contrib.quantize` package - both operator fusion calls will be called inside this API.\n+\n+```\n+calib_data_loader = mx.gluon.data.DataLoader(dummy_data, batch_size=batch_size)\n+qnet = quantize_net(net, calib_mode='naive', calib_data=calib_data_loader)\n+```\n+\n+Following function, which calculates total inference time on the model with an artificial data, can be used to compare the performance:\n+\n+```\n+def benchmark_net(net, batch_size=32, batches=100, warmup_batches=5):\n+ import time\n+ data = mx.np.random.uniform(-1.0, 1.0, (batch_size, 3, 224, 224))\n+ \n+ for i in range(batches + warmup_batches):\n+ if i == warmup_batches:\n+ tic = time.time()\n+ out = net(data)\n+ out.wait_to_read()\n+ \n+ total_time = time.time() - tic\n+ return total_time\n+```\n+\n+\n+Comparing fused float32 network to quantized network on CLX8280 shows 4.29x speedup - measurment was done on 28 cores and this machine utilizes VNNI instruction set.\n+\n+\n+The other aspect of lowering the precision of a model is a difference in its accuracy. We will check that on previously tested resnet50_v1 with ImageNet dataset. To run this example you will need ImageNet dataset prepared with this tutorial and stored in path_to_imagenet. Let’s compare top1 and top5 accuracy of standard fp32 model with quantized int8 model calibrated using naive and entropy calibration mode. We will use only 10 batches of the validation dataset to calibrate quantized model.\n+\n+```\n+import mxnet as mx\n+from mxnet.gluon.model_zoo.vision import resnet50_v1\n+from mxnet.gluon.data.vision import transforms\n+from mxnet.contrib.quantization import quantize_net\n+\n+def test_accuracy(net, data_loader):\n+ acc_top1 = mx.gluon.metric.Accuracy()\n+ acc_top5 = mx.gluon.metric.TopKAccuracy(5)\n+ \n+ for x, label in data_loader:\n+ output = net(x)\n+ acc_top1.update(label, output)\n+ acc_top5.update(label, output)\n+\n+ _, top1 = acc_top1.get()\n+ _, top5 = acc_top5.get()\n+\n+ return top1, top5\n+ \n+rgb_mean = (0.485, 0.456, 0.406)\n+rgb_std = (0.229, 0.224, 0.225)\n+batch_size = 64\n+ \n+dataset = mx.gluon.data.vision.ImageRecordDataset('path_to_imagenet/val.rec')\n+transformer = transforms.Compose([transforms.Resize(256),\n+ transforms.CenterCrop(224),\n+ transforms.ToTensor(),\n+ transforms.Normalize(mean=rgb_mean, std=rgb_std)])\n+val_data = mx.gluon.data.DataLoader(dataset.transform_first(transformer), batch_size, shuffle=True)\n+\n+net = resnet50_v1(pretrained=True)\n+net.hybridize(backend='ONEDNN', static_alloc=True, static_shape=True)\n+\n+top1, top5 = test_accuracy(net, val_data)\n+print('FP32 Top1 Accuracy: {} Top5 Accuracy: {}'.format(top1, top5))\n+\n+qnet = quantize_net(net, calib_mode='naive', calib_data=val_data, num_calib_batches=10)\n+qnet.hybridize(static_alloc=True, static_shape=True)\n+top1, top5 = test_accuracy(qnet, val_data)\n+print('INT8Naive Top1 Accuracy: {} Top5 Accuracy: {}'.format(top1, top5))\n+\n+qnet = quantize_net(net, calib_mode='entropy', calib_data=val_data, num_calib_batches=10)\n+qnet.hybridize(static_alloc=True, static_shape=True)\n+top1, top5 = test_accuracy(qnet, val_data)\n+print('INT8Entropy Top1 Accuracy: {} Top5 Accuracy: {}'.format(top1, top5))\n+```\n+\n+#### Output:\n+> FP32 Top1 Accuracy: 0.76364 Top5 Accuracy: 0.93094\n+> INT8Naive Top1 Accuracy: 0.76028 Top5 Accuracy: 0.92796\n+> INT8Entropy Top1 Accuracy: 0.76404 Top5 Accuracy: 0.93042\n+\n+With quantized model there is tiny accuracy drop, however this is cost of great performance optimization and memory footprint. The difference between calibration method is dependent on the model itself, used activation layers and the size of calibration data.\n+\n+### Custom layer collectors and calibrating the model\n+In MXNet 2.0 new interface for creating custom calibration collector has been added. Main goal of this interface is to give the user as much flexibility as possible in almost every step of quantization. Creating own layer collector is pretty easy, however computing effective min/max values can be not a trivial task. \n+\n+Layer collectors are responsible for collecting statistics of each node in the graph — it means that the input/output data of every operator executed can be observed. Collector utilize the register_op_hook method of HybridBlock class.", + "comment_created_at": "2022-02-07T10:04:07+00:00", + "comment_author": "bartekkuncer", + "comment_body": "```suggestion\r\nLayer collectors are responsible for collecting statistics of each node in the graph — it means that the input/output data of every operator executed can be observed. Collector utilizes the register_op_hook method of HybridBlock class.\r\n```", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "804509742", + "pr_number": 20856, + "pr_file": "docs/python_docs/python/tutorials/performance/backend/dnnl/dnnl_quantization.md", + "created_at": "2022-02-11T10:08:02+00:00", + "commented_code": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n## Introduction\n\nAfter successful model building and achieving desired accuracy on the test data, often the next step is to optimize inference to deploy the model to production. One of the key features of usable model is to have as small latency as possible to be able to provide services to large number of customers simultaneously. In addition to customer satisfaction, with well optimized model, hardware load is reduced which also reduces energy costs needed to perform inference.\n\nTwo main types of software optimizations can be characerized as:\n- memory-bound optimizations - main objective of these optimizations is to reduce the amount of memory operations (reads and writes) - it is done by e.g. chaining operations which can be performed one after another immediately, where input of every subsequent operation is the output of the previous one (example: ReLU activation after convolution),\n- compute-bound optimizations - these optimizations are mainly made on operations which require large number of CPU cycles to complete, like FullyConnected and Convolution. One of the methods to speedup compute-bound operations is to lower computation precision - this type of optimization is called quantization.\n\nIn version 2.0 of the Apache MXNet (incubating) GluonAPI2.0 replaced Symbolic API known from versions 1.x, thus there are some differences between API to perform graph fusion and quantization.\n\n## Operator Fusion\n\nModels are often represented as a directed graph of operations (represented by nodes) and data flow (represented as edges). This way of visualizing helps a lot when searching for common patterns in whole model which can be optimized by fusion. Example:\n![base_model](https://github.com/dmlc/web-data/blob/main/mxnet/tutorials/onednn/quantization_2_0/sample_net.png?raw=true)\n\n\nThe simplest way to explain what fusion is and how it works is to present an example. Image above depicts a sequence of popular operations taken from ResNet architecture. This type of architecture is built with many similar blocks called residual blocks. Some possible fusion patterns are:\n\n- Conv2D + BatchNorm => Fusing BatchNorm with Convolution can be performed by modifing weights and bias of Convolution - this way BatchNorm is completely contained within Convolution which makes BatchNorm zero time operation. Only cost of fusing is time needed to prepare weights and bias in Convolution based on BatchNorm parameters.\n- Conv2D + ReLU => this type of fusion is very popular also with other layers (e.g. FullyConnected + Activation). It is very simple idea where before writing data to output, activation is performed on that data. Main benefit of this fusion is that, there is no need to read and write back data in other layer only to perform simple activation function. \n- Conv2D + Add => even simpler idea than the previous ones - instead of overwriting output memory, results are added to the output memory. In the simplest terms: `out_mem = conv_result` is replaced by `out_mem += conv_result`.\n\nAbove examples are presented as atomic ones, but often they can be combined together, thus two patterns that can be fused in above example are:\n- Conv2D + BatchNorm + ReLU\n- Conv2D + BatchNorm + Add + ReLU\n\nAfter fusing all patterns, computational graph will be changed to the following one:\n![fused_fp32_model](https://github.com/dmlc/web-data/blob/main/mxnet/tutorials/onednn/quantization_2_0/fused_f32.png?raw=true)\n\n\n\n### Operator fusion in MXNet\nSince the version 1.6 of MXNet built with oneDNN support, operator fusion had been enabled by default for executing model with Module API, however in version 2.0 it has been decided to remove setting this feature by environment flag and replace it by aware user API call.\n\nTo fuse model in MXNet 2.0 there are two requirements:\n- the model must be defined as a subclass of HybridBlock or Symbol,\n- the model must have specific operator patterns which can be fused.\n\nAs an example we define example network (sample block from ResNet architecture):\n\n```\nimport mxnet as mx\nfrom mxnet.gluon import nn\n\nclass SampleBlock(nn.HybridBlock):\n def __init__(self):\n super(SampleBlock, self).__init__()\n self.conv1 = nn.Conv2D(channels=64, kernel_size=3, strides=1, padding=1,\n use_bias=False, in_channels=64)\n self.bn1 = nn.BatchNorm()\n self.conv2 = nn.Conv2D(channels=64, kernel_size=3, strides=1, padding=1,\n use_bias=False, in_channels=64)\n self.bn2 = nn.BatchNorm()\n\n def forward(self, x):\n out = mx.npx.activation(self.bn1(self.conv1(x)), 'relu')\n out = self.bn2(self.conv2(out))\n out = mx.npx.activation(out + x, 'relu')\n return out\n \nnet = SampleBlock()\nnet.initialize()\n\ndata = mx.np.zeros(shape=(1,64,224,224))\n# run fusion\nnet.optimize_for(data, backend='ONEDNN')\n\n# We can check fusion by plotting current symbol of our optimized network\nsym, _ = net.export(None)\ngraph = mx.viz.plot_network(sym, save_format='jpg')\ngraph.view()\n```\nBoth HybridBlock and Symbol classes provide API to easily run fusion of operators. Single line of code is enabling fusion passes on model:\n```\nnet.optimize_for(data, backend='ONEDNN')\n```\n\n*optimize_for* function is available also as Symbol class method. Example call to this API is shown below. Notice that Symbol’s *optimize_for* method is not done in-place, so assigning it to a new variable is required:\n\n```\noptimized_symbol = sym.optimize_for(backend='ONEDNN')\n```\n\nFor the above model definition in a naive benchmark with artificial data, we can gain up to 1.75x speedup without any accuracy loss on our testing machine with Intel(R) Core(TM) i9-9940X.\n\n\n## Quantization\nAs mentioned in the introduction, precision reduction is another very popular method of improving performance of workloads and, what is important, in most cases is combined together with operator fusion which improves performance even more. In training precision reduction utilizes 16 bit data types like bfloat or float16, but for inference great results can be achieved using int8. \n\nModel quantization helps on both memory-bound and compute-bound operations. In quantized model IO operations are reduced as int8 data type is 4x smaller than float32, and also computational throughput is increased as more data can be SIMD'ed. On modern Intel architectures using int8 data type can bring even more speedup by utilizing special VNNI instruction set. \n\n![before_quant](https://github.com/dmlc/web-data/blob/main/mxnet/tutorials/onednn/quantization_2_0/before_quant.png?raw=true)\n\nFirstly quantization performs operator fusion on floating-point model as mentioned in paragraph earlier. Next, all operators which support int8 data type are marked as quantized and if needed additional operators are injected into graph surrounding quantizable operator - the goal of this additional operators is to quantize, dequantize or requantize data to keep data type between operators compatible.\n \n![quant_not_calib](https://github.com/dmlc/web-data/blob/main/mxnet/tutorials/onednn/quantization_2_0/quant_not_calib.png?raw=true)\n\n\nAfter injection step it is important to perform calibration of the model, however this step is optional. Quantizing without calibration is not recommended in terms of performance. It will result in calculating data minimum and maximum in quantize and requantize nodes during each inference pass. Calibrating a model greatly improves performance as minimum and maximum values are collected offline and are saved inside node - this way there is no need to search for these values during inference pass. \n\n![quant_calib](https://github.com/dmlc/web-data/blob/main/mxnet/tutorials/onednn/quantization_2_0/quant_calib.png?raw=true)\n\n\nCurrently, there are three supported calibration methods:\n- naive — min/max values from the calibration run,\n- entropy — uses KL divergence to determine the best symmetrical quantization thresholds for a given histogram of values,\n- custom — uses user-defined CalibrationCollector to control the calibration process.\n\nLast stage of quantization flow is to perform additional operator fusion. Second fusion is about merging requantize and dequantize operators into preceding node - oneDNN kernels can perform needed scaling before writing result to output which results in model execution speed-up. Notice that last Convolution does not need minimum and maximum values as it is not requantizing int32 to int8, but dequantizing directly to float32 and scale is calculated basing on minimum and maximum of input and weights.\n\n![quant_calib_fused](https://github.com/dmlc/web-data/blob/main/mxnet/tutorials/onednn/quantization_2_0/quant_calib_fused.png?raw=true)\n\nIn MXNet 2.0, the quantization procedure has been adjusted to work well with Gluon models since it is the main API now. The goal was to allow the user to quantize fp32 HybridBlock model in just a few lines of code.\n\n### Quantization in MXNet\n\nAs an example of a quantization procedure, pretrained *resnet50_v1* from *model_zoo.vision* package can be used. To get it simply run the following code:\n\n```\nimport mxnet as mx\nfrom mxnet.gluon.model_zoo.vision import resnet50_v1\n\nnet = resnet50_v1(pretrained=True)\n```\n\nNow, to get a ready-to-deploy quantized model two steps are required:\n1. Prepare data loader with calibration data - this data will be used as input to the network. All necessary layers will be observed with layer collector to calculate minimum and maximum value of that layer. This flow is internal mechanism and all what user needs to do is to provide data loader.\n2. Call `quantize_net` function from `contrib.quantize` package - both operator fusion calls will be called inside this API.\n\n```\ncalib_data_loader = mx.gluon.data.DataLoader(dummy_data, batch_size=batch_size)\nqnet = quantize_net(net, calib_mode='naive', calib_data=calib_data_loader)\n```\n\nFollowing function, which calculates total inference time on the model with an artificial data, can be used to compare the performance:\n\n```\ndef benchmark_net(net, batch_size=32, batches=100, warmup_batches=5):\n import time\n data = mx.np.random.uniform(-1.0, 1.0, (batch_size, 3, 224, 224))\n \n for i in range(batches + warmup_batches):\n if i == warmup_batches:\n tic = time.time()\n out = net(data)\n out.wait_to_read()\n \n total_time = time.time() - tic\n return total_time\n```\n\n\nComparing fused float32 network to quantized network on CLX8280 shows 4.29x speedup - measurment was done on 28 cores and this machine utilizes VNNI instruction set.\n\n\nThe other aspect of lowering the precision of a model is a difference in its accuracy. We will check that on previously tested resnet50_v1 with ImageNet dataset. To run this example you will need ImageNet dataset prepared with this tutorial and stored in path_to_imagenet. Let’s compare top1 and top5 accuracy of standard fp32 model with quantized int8 model calibrated using naive and entropy calibration mode. We will use only 10 batches of the validation dataset to calibrate quantized model.\n\n```\nimport mxnet as mx\nfrom mxnet.gluon.model_zoo.vision import resnet50_v1\nfrom mxnet.gluon.data.vision import transforms\nfrom mxnet.contrib.quantization import quantize_net\n\ndef test_accuracy(net, data_loader):\n acc_top1 = mx.gluon.metric.Accuracy()\n acc_top5 = mx.gluon.metric.TopKAccuracy(5)\n \n for x, label in data_loader:\n output = net(x)\n acc_top1.update(label, output)\n acc_top5.update(label, output)\n\n _, top1 = acc_top1.get()\n _, top5 = acc_top5.get()\n\n return top1, top5\n \nrgb_mean = (0.485, 0.456, 0.406)\nrgb_std = (0.229, 0.224, 0.225)\nbatch_size = 64\n \ndataset = mx.gluon.data.vision.ImageRecordDataset('path_to_imagenet/val.rec')\ntransformer = transforms.Compose([transforms.Resize(256),\n transforms.CenterCrop(224),\n transforms.ToTensor(),\n transforms.Normalize(mean=rgb_mean, std=rgb_std)])\nval_data = mx.gluon.data.DataLoader(dataset.transform_first(transformer), batch_size, shuffle=True)\n\nnet = resnet50_v1(pretrained=True)\nnet.hybridize(backend='ONEDNN', static_alloc=True, static_shape=True)\n\ntop1, top5 = test_accuracy(net, val_data)\nprint('FP32 Top1 Accuracy: {} Top5 Accuracy: {}'.format(top1, top5))\n\nqnet = quantize_net(net, calib_mode='naive', calib_data=val_data, num_calib_batches=10)\nqnet.hybridize(static_alloc=True, static_shape=True)\ntop1, top5 = test_accuracy(qnet, val_data)\nprint('INT8Naive Top1 Accuracy: {} Top5 Accuracy: {}'.format(top1, top5))\n\nqnet = quantize_net(net, calib_mode='entropy', calib_data=val_data, num_calib_batches=10)\nqnet.hybridize(static_alloc=True, static_shape=True)\ntop1, top5 = test_accuracy(qnet, val_data)\nprint('INT8Entropy Top1 Accuracy: {} Top5 Accuracy: {}'.format(top1, top5))\n```\n\n#### Output:\n> FP32 Top1 Accuracy: 0.76364 Top5 Accuracy: 0.93094\n> INT8Naive Top1 Accuracy: 0.76028 Top5 Accuracy: 0.92796\n> INT8Entropy Top1 Accuracy: 0.76404 Top5 Accuracy: 0.93042\n\nWith quantized model there is tiny accuracy drop, however this is the cost of great performance optimization and memory footprint reduction. The difference between calibration methods is dependent on the model itself, used activation layers and the size of calibration data.", + "repo_full_name": "apache/mxnet", + "discussion_comments": [ + { + "comment_id": "804509742", + "repo_full_name": "apache/mxnet", + "pr_number": 20856, + "pr_file": "docs/python_docs/python/tutorials/performance/backend/dnnl/dnnl_quantization.md", + "discussion_id": "804509742", + "commented_code": "@@ -0,0 +1,304 @@\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+## Introduction\n+\n+After successful model building and achieving desired accuracy on the test data, often the next step is to optimize inference to deploy the model to production. One of the key features of usable model is to have as small latency as possible to be able to provide services to large number of customers simultaneously. In addition to customer satisfaction, with well optimized model, hardware load is reduced which also reduces energy costs needed to perform inference.\n+\n+Two main types of software optimizations can be characerized as:\n+- memory-bound optimizations - main objective of these optimizations is to reduce the amount of memory operations (reads and writes) - it is done by e.g. chaining operations which can be performed one after another immediately, where input of every subsequent operation is the output of the previous one (example: ReLU activation after convolution),\n+- compute-bound optimizations - these optimizations are mainly made on operations which require large number of CPU cycles to complete, like FullyConnected and Convolution. One of the methods to speedup compute-bound operations is to lower computation precision - this type of optimization is called quantization.\n+\n+In version 2.0 of the Apache MXNet (incubating) GluonAPI2.0 replaced Symbolic API known from versions 1.x, thus there are some differences between API to perform graph fusion and quantization.\n+\n+## Operator Fusion\n+\n+Models are often represented as a directed graph of operations (represented by nodes) and data flow (represented as edges). This way of visualizing helps a lot when searching for common patterns in whole model which can be optimized by fusion. Example:\n+![base_model](https://github.com/dmlc/web-data/blob/main/mxnet/tutorials/onednn/quantization_2_0/sample_net.png?raw=true)\n+\n+\n+The simplest way to explain what fusion is and how it works is to present an example. Image above depicts a sequence of popular operations taken from ResNet architecture. This type of architecture is built with many similar blocks called residual blocks. Some possible fusion patterns are:\n+\n+- Conv2D + BatchNorm => Fusing BatchNorm with Convolution can be performed by modifing weights and bias of Convolution - this way BatchNorm is completely contained within Convolution which makes BatchNorm zero time operation. Only cost of fusing is time needed to prepare weights and bias in Convolution based on BatchNorm parameters.\n+- Conv2D + ReLU => this type of fusion is very popular also with other layers (e.g. FullyConnected + Activation). It is very simple idea where before writing data to output, activation is performed on that data. Main benefit of this fusion is that, there is no need to read and write back data in other layer only to perform simple activation function. \n+- Conv2D + Add => even simpler idea than the previous ones - instead of overwriting output memory, results are added to the output memory. In the simplest terms: `out_mem = conv_result` is replaced by `out_mem += conv_result`.\n+\n+Above examples are presented as atomic ones, but often they can be combined together, thus two patterns that can be fused in above example are:\n+- Conv2D + BatchNorm + ReLU\n+- Conv2D + BatchNorm + Add + ReLU\n+\n+After fusing all patterns, computational graph will be changed to the following one:\n+![fused_fp32_model](https://github.com/dmlc/web-data/blob/main/mxnet/tutorials/onednn/quantization_2_0/fused_f32.png?raw=true)\n+\n+\n+\n+### Operator fusion in MXNet\n+Since the version 1.6 of MXNet built with oneDNN support, operator fusion had been enabled by default for executing model with Module API, however in version 2.0 it has been decided to remove setting this feature by environment flag and replace it by aware user API call.\n+\n+To fuse model in MXNet 2.0 there are two requirements:\n+- the model must be defined as a subclass of HybridBlock or Symbol,\n+- the model must have specific operator patterns which can be fused.\n+\n+As an example we define example network (sample block from ResNet architecture):\n+\n+```\n+import mxnet as mx\n+from mxnet.gluon import nn\n+\n+class SampleBlock(nn.HybridBlock):\n+ def __init__(self):\n+ super(SampleBlock, self).__init__()\n+ self.conv1 = nn.Conv2D(channels=64, kernel_size=3, strides=1, padding=1,\n+ use_bias=False, in_channels=64)\n+ self.bn1 = nn.BatchNorm()\n+ self.conv2 = nn.Conv2D(channels=64, kernel_size=3, strides=1, padding=1,\n+ use_bias=False, in_channels=64)\n+ self.bn2 = nn.BatchNorm()\n+\n+ def forward(self, x):\n+ out = mx.npx.activation(self.bn1(self.conv1(x)), 'relu')\n+ out = self.bn2(self.conv2(out))\n+ out = mx.npx.activation(out + x, 'relu')\n+ return out\n+ \n+net = SampleBlock()\n+net.initialize()\n+\n+data = mx.np.zeros(shape=(1,64,224,224))\n+# run fusion\n+net.optimize_for(data, backend='ONEDNN')\n+\n+# We can check fusion by plotting current symbol of our optimized network\n+sym, _ = net.export(None)\n+graph = mx.viz.plot_network(sym, save_format='jpg')\n+graph.view()\n+```\n+Both HybridBlock and Symbol classes provide API to easily run fusion of operators. Single line of code is enabling fusion passes on model:\n+```\n+net.optimize_for(data, backend='ONEDNN')\n+```\n+\n+*optimize_for* function is available also as Symbol class method. Example call to this API is shown below. Notice that Symbol’s *optimize_for* method is not done in-place, so assigning it to a new variable is required:\n+\n+```\n+optimized_symbol = sym.optimize_for(backend='ONEDNN')\n+```\n+\n+For the above model definition in a naive benchmark with artificial data, we can gain up to 1.75x speedup without any accuracy loss on our testing machine with Intel(R) Core(TM) i9-9940X.\n+\n+\n+## Quantization\n+As mentioned in the introduction, precision reduction is another very popular method of improving performance of workloads and, what is important, in most cases is combined together with operator fusion which improves performance even more. In training precision reduction utilizes 16 bit data types like bfloat or float16, but for inference great results can be achieved using int8. \n+\n+Model quantization helps on both memory-bound and compute-bound operations. In quantized model IO operations are reduced as int8 data type is 4x smaller than float32, and also computational throughput is increased as more data can be SIMD'ed. On modern Intel architectures using int8 data type can bring even more speedup by utilizing special VNNI instruction set. \n+\n+![before_quant](https://github.com/dmlc/web-data/blob/main/mxnet/tutorials/onednn/quantization_2_0/before_quant.png?raw=true)\n+\n+Firstly quantization performs operator fusion on floating-point model as mentioned in paragraph earlier. Next, all operators which support int8 data type are marked as quantized and if needed additional operators are injected into graph surrounding quantizable operator - the goal of this additional operators is to quantize, dequantize or requantize data to keep data type between operators compatible.\n+ \n+![quant_not_calib](https://github.com/dmlc/web-data/blob/main/mxnet/tutorials/onednn/quantization_2_0/quant_not_calib.png?raw=true)\n+\n+\n+After injection step it is important to perform calibration of the model, however this step is optional. Quantizing without calibration is not recommended in terms of performance. It will result in calculating data minimum and maximum in quantize and requantize nodes during each inference pass. Calibrating a model greatly improves performance as minimum and maximum values are collected offline and are saved inside node - this way there is no need to search for these values during inference pass. \n+\n+![quant_calib](https://github.com/dmlc/web-data/blob/main/mxnet/tutorials/onednn/quantization_2_0/quant_calib.png?raw=true)\n+\n+\n+Currently, there are three supported calibration methods:\n+- naive — min/max values from the calibration run,\n+- entropy — uses KL divergence to determine the best symmetrical quantization thresholds for a given histogram of values,\n+- custom — uses user-defined CalibrationCollector to control the calibration process.\n+\n+Last stage of quantization flow is to perform additional operator fusion. Second fusion is about merging requantize and dequantize operators into preceding node - oneDNN kernels can perform needed scaling before writing result to output which results in model execution speed-up. Notice that last Convolution does not need minimum and maximum values as it is not requantizing int32 to int8, but dequantizing directly to float32 and scale is calculated basing on minimum and maximum of input and weights.\n+\n+![quant_calib_fused](https://github.com/dmlc/web-data/blob/main/mxnet/tutorials/onednn/quantization_2_0/quant_calib_fused.png?raw=true)\n+\n+In MXNet 2.0, the quantization procedure has been adjusted to work well with Gluon models since it is the main API now. The goal was to allow the user to quantize fp32 HybridBlock model in just a few lines of code.\n+\n+### Quantization in MXNet\n+\n+As an example of a quantization procedure, pretrained *resnet50_v1* from *model_zoo.vision* package can be used. To get it simply run the following code:\n+\n+```\n+import mxnet as mx\n+from mxnet.gluon.model_zoo.vision import resnet50_v1\n+\n+net = resnet50_v1(pretrained=True)\n+```\n+\n+Now, to get a ready-to-deploy quantized model two steps are required:\n+1. Prepare data loader with calibration data - this data will be used as input to the network. All necessary layers will be observed with layer collector to calculate minimum and maximum value of that layer. This flow is internal mechanism and all what user needs to do is to provide data loader.\n+2. Call `quantize_net` function from `contrib.quantize` package - both operator fusion calls will be called inside this API.\n+\n+```\n+calib_data_loader = mx.gluon.data.DataLoader(dummy_data, batch_size=batch_size)\n+qnet = quantize_net(net, calib_mode='naive', calib_data=calib_data_loader)\n+```\n+\n+Following function, which calculates total inference time on the model with an artificial data, can be used to compare the performance:\n+\n+```\n+def benchmark_net(net, batch_size=32, batches=100, warmup_batches=5):\n+ import time\n+ data = mx.np.random.uniform(-1.0, 1.0, (batch_size, 3, 224, 224))\n+ \n+ for i in range(batches + warmup_batches):\n+ if i == warmup_batches:\n+ tic = time.time()\n+ out = net(data)\n+ out.wait_to_read()\n+ \n+ total_time = time.time() - tic\n+ return total_time\n+```\n+\n+\n+Comparing fused float32 network to quantized network on CLX8280 shows 4.29x speedup - measurment was done on 28 cores and this machine utilizes VNNI instruction set.\n+\n+\n+The other aspect of lowering the precision of a model is a difference in its accuracy. We will check that on previously tested resnet50_v1 with ImageNet dataset. To run this example you will need ImageNet dataset prepared with this tutorial and stored in path_to_imagenet. Let’s compare top1 and top5 accuracy of standard fp32 model with quantized int8 model calibrated using naive and entropy calibration mode. We will use only 10 batches of the validation dataset to calibrate quantized model.\n+\n+```\n+import mxnet as mx\n+from mxnet.gluon.model_zoo.vision import resnet50_v1\n+from mxnet.gluon.data.vision import transforms\n+from mxnet.contrib.quantization import quantize_net\n+\n+def test_accuracy(net, data_loader):\n+ acc_top1 = mx.gluon.metric.Accuracy()\n+ acc_top5 = mx.gluon.metric.TopKAccuracy(5)\n+ \n+ for x, label in data_loader:\n+ output = net(x)\n+ acc_top1.update(label, output)\n+ acc_top5.update(label, output)\n+\n+ _, top1 = acc_top1.get()\n+ _, top5 = acc_top5.get()\n+\n+ return top1, top5\n+ \n+rgb_mean = (0.485, 0.456, 0.406)\n+rgb_std = (0.229, 0.224, 0.225)\n+batch_size = 64\n+ \n+dataset = mx.gluon.data.vision.ImageRecordDataset('path_to_imagenet/val.rec')\n+transformer = transforms.Compose([transforms.Resize(256),\n+ transforms.CenterCrop(224),\n+ transforms.ToTensor(),\n+ transforms.Normalize(mean=rgb_mean, std=rgb_std)])\n+val_data = mx.gluon.data.DataLoader(dataset.transform_first(transformer), batch_size, shuffle=True)\n+\n+net = resnet50_v1(pretrained=True)\n+net.hybridize(backend='ONEDNN', static_alloc=True, static_shape=True)\n+\n+top1, top5 = test_accuracy(net, val_data)\n+print('FP32 Top1 Accuracy: {} Top5 Accuracy: {}'.format(top1, top5))\n+\n+qnet = quantize_net(net, calib_mode='naive', calib_data=val_data, num_calib_batches=10)\n+qnet.hybridize(static_alloc=True, static_shape=True)\n+top1, top5 = test_accuracy(qnet, val_data)\n+print('INT8Naive Top1 Accuracy: {} Top5 Accuracy: {}'.format(top1, top5))\n+\n+qnet = quantize_net(net, calib_mode='entropy', calib_data=val_data, num_calib_batches=10)\n+qnet.hybridize(static_alloc=True, static_shape=True)\n+top1, top5 = test_accuracy(qnet, val_data)\n+print('INT8Entropy Top1 Accuracy: {} Top5 Accuracy: {}'.format(top1, top5))\n+```\n+\n+#### Output:\n+> FP32 Top1 Accuracy: 0.76364 Top5 Accuracy: 0.93094\n+> INT8Naive Top1 Accuracy: 0.76028 Top5 Accuracy: 0.92796\n+> INT8Entropy Top1 Accuracy: 0.76404 Top5 Accuracy: 0.93042\n+\n+With quantized model there is tiny accuracy drop, however this is the cost of great performance optimization and memory footprint reduction. The difference between calibration methods is dependent on the model itself, used activation layers and the size of calibration data.", + "comment_created_at": "2022-02-11T10:08:02+00:00", + "comment_author": "bartekkuncer", + "comment_body": "```suggestion\r\nWith quantized model there is a tiny accuracy drop, however this is the cost of great performance optimization and memory footprint reduction. The difference between calibration methods is dependent on the model itself, used activation layers and the size of calibration data.\r\n```", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/mxnet-technical-precision-matters.md b/_reviewers/mxnet-technical-precision-matters.md index 0e35afc..fcdc720 100644 --- a/_reviewers/mxnet-technical-precision-matters.md +++ b/_reviewers/mxnet-technical-precision-matters.md @@ -31,183 +31,3 @@ Key practices to follow: - Correct: "INC allows to automatically find better solution" Clear documentation directly impacts how effectively developers can implement and optimize AI models, particularly for critical operations like quantization that balance accuracy and performance. - - -[ - { - "discussion_id": "954861509", - "pr_number": 21127, - "pr_file": "docs/python_docs/python/tutorials/performance/backend/dnnl/dnnl_quantization_inc.md", - "created_at": "2022-08-25T11:42:50+00:00", - "commented_code": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n# Improving accuracy with Intel\u00ae Neural Compressor\n\nThe accuracy of a model can decrease as a result of quantization. When the accuracy drop is significant, we can try to manually find a better quantization configuration (exclude some layers, try different calibration methods, etc.), but for bigger models this might prove to be a difficult and time consuming task. [Intel\u00ae Neural Compressor](https://github.com/intel/neural-compressor) (INC) tries to automate this process using several tuning heuristics, which aim to find the quantization configuration that satisfies the specified accuracy requirement.\n\n**NOTE:**\n\nMost tuning strategies will try different configurations on an evaluation dataset in order to find out how each layer affects the accuracy of the model. This means that for larger models, it may take a long time to find a solution (as the tuning space is usually larger and the evaluation itself takes longer).\n\n## Installation and Prerequisites\n\n- Install MXNet with oneDNN enabled as described in the [Get started](https://mxnet.apache.org/versions/master/get_started?platform=linux&language=python&processor=cpu&environ=pip&). (Until the 2.0 release you can use the nightly build version: `pip install --pre mxnet -f https://dist.mxnet.io/python`)\n\n- Install Intel\u00ae Neural Compressor:\n\n Use one of the commands below to install INC (supported python versions are: 3.6, 3.7, 3.8, 3.9):\n\n ```bash\n # install stable version from pip\n pip install neural-compressor\n\n # install nightly version from pip\n pip install -i https://test.pypi.org/simple/ neural-compressor\n\n # install stable version from conda\n conda install neural-compressor -c conda-forge -c intel\n ```\n If you come into trouble with dependencies on `cv2` library you can run: `apt-get update && apt-get install -y python3-opencv`\n\n## Configuration file\n\nQuantization tuning process can be customized in the yaml configuration file. Below is a simple example:\n\n```yaml\n# cnn.yaml\n\nversion: 1.0\n\nmodel:\n name: cnn\n framework: mxnet\n\nquantization:\n calibration:\n sampling_size: 160 # number of samples for calibration\n\ntuning:\n strategy:\n name: basic\n accuracy_criterion:\n relative: 0.01\n exit_policy:\n timeout: 0\n random_seed: 9527\n```\n\nWe are using the `basic` strategy, but you could also try out different ones. [Here](https://github.com/intel/neural-compressor/blob/master/docs/tuning_strategies.md) you can find a list of strategies available in INC and details of how they work. You can also add your own strategy if the existing ones do not suit your needs.\n\nSince the value of `timeout` is 0, INC will run until it finds a configuration that satisfies the accuracy criterion and then exit. Depending on the strategy this may not be ideal, as sometimes it would be better to further explore the tuning space to find a superior configuration both in terms of accuracy and speed. To achieve this, we can set a specific `timeout` value, which will tell INC how long (in seconds) it should run.\n\nFor more information about the configuration file, see the [template](https://github.com/intel/neural-compressor/blob/master/neural_compressor/template/ptq.yaml) from the official INC repo. Keep in mind that only the `post training quantization` is currently supported for MXNet.\n\n## Model quantization and tuning\n\nIn general, Intel\u00ae Neural Compressor requires 4 elements in order to run: \n1. Config file - like the example above \n2. Model to be quantized \n3. Calibration dataloader \n4. Evaluation function - a function that takes a model as an argument and returns the accuracy it achieves on a certain evaluation dataset. \n\n### Quantizing ResNet\n\nThe [quantization](https://mxnet.apache.org/versions/master/api/python/docs/tutorials/performance/backend/dnnl/dnnl_quantization.html#Quantization) sections described how to quantize ResNet using the native MXNet quantization. This example shows how we can achieve the similar results (with the auto-tuning) using INC.\n\n1. Get the model\n\n```python\nimport logging\nimport mxnet as mx\nfrom mxnet.gluon.model_zoo import vision\n\nlogging.basicConfig()\nlogger = logging.getLogger('logger')\nlogger.setLevel(logging.INFO)\n\nbatch_shape = (1, 3, 224, 224)\nresnet18 = vision.resnet18_v1(pretrained=True)\n```\n\n2. Prepare the dataset:\n\n```python\nmx.test_utils.download('http://data.mxnet.io/data/val_256_q90.rec', 'data/val_256_q90.rec')\n\nbatch_size = 16\nmean_std = {'mean_r': 123.68, 'mean_g': 116.779, 'mean_b': 103.939,\n 'std_r': 58.393, 'std_g': 57.12, 'std_b': 57.375}\n\ndata = mx.io.ImageRecordIter(path_imgrec='data/val_256_q90.rec',\n batch_size=batch_size,\n data_shape=batch_shape[1:],\n rand_crop=False,\n rand_mirror=False,\n shuffle=False,\n **mean_std)\ndata.batch_size = batch_size\n```\n\n3. Prepare the evaluation function:\n\n```python\neval_samples = batch_size*10\n\ndef eval_func(model):\n data.reset()\n metric = mx.metric.Accuracy()\n for i, batch in enumerate(data):\n if i * batch_size >= eval_samples:\n break\n x = batch.data[0].as_in_context(mx.cpu())\n label = batch.label[0].as_in_context(mx.cpu())\n outputs = model.forward(x)\n metric.update(label, outputs)\n return metric.get()[1]\n```\n\n4. Run Intel\u00ae Neural Compressor:\n\n```python\nfrom neural_compressor.experimental import Quantization\nquantizer = Quantization(\"./cnn.yaml\")\nquantizer.model = resnet18\nquantizer.calib_dataloader = data\nquantizer.eval_func = eval_func\nqnet = quantizer.fit().model\n```\n\nSince this model already achieves good accuracy using native quantization (less than 1% accuracy drop), for the given configuration file, INC will end on the first configuration, quantizing all layers using `naive` calibration mode for each. To see the true potential of INC, we need a model which suffers from a larger accuracy drop after quantization.\n\n### Quantizing ResNet50v2\n\nThis example shows how to use INC to quantize ResNet50 v2. In this case, the native MXNet quantization introduce a huge accuracy drop (70% using `naive` calibration mode) and INC allows automatically find better solution.", - "repo_full_name": "apache/mxnet", - "discussion_comments": [ - { - "comment_id": "954861509", - "repo_full_name": "apache/mxnet", - "pr_number": 21127, - "pr_file": "docs/python_docs/python/tutorials/performance/backend/dnnl/dnnl_quantization_inc.md", - "discussion_id": "954861509", - "commented_code": "@@ -0,0 +1,290 @@\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+# Improving accuracy with Intel\u00ae Neural Compressor\n+\n+The accuracy of a model can decrease as a result of quantization. When the accuracy drop is significant, we can try to manually find a better quantization configuration (exclude some layers, try different calibration methods, etc.), but for bigger models this might prove to be a difficult and time consuming task. [Intel\u00ae Neural Compressor](https://github.com/intel/neural-compressor) (INC) tries to automate this process using several tuning heuristics, which aim to find the quantization configuration that satisfies the specified accuracy requirement.\n+\n+**NOTE:**\n+\n+Most tuning strategies will try different configurations on an evaluation dataset in order to find out how each layer affects the accuracy of the model. This means that for larger models, it may take a long time to find a solution (as the tuning space is usually larger and the evaluation itself takes longer).\n+\n+## Installation and Prerequisites\n+\n+- Install MXNet with oneDNN enabled as described in the [Get started](https://mxnet.apache.org/versions/master/get_started?platform=linux&language=python&processor=cpu&environ=pip&). (Until the 2.0 release you can use the nightly build version: `pip install --pre mxnet -f https://dist.mxnet.io/python`)\n+\n+- Install Intel\u00ae Neural Compressor:\n+\n+ Use one of the commands below to install INC (supported python versions are: 3.6, 3.7, 3.8, 3.9):\n+\n+ ```bash\n+ # install stable version from pip\n+ pip install neural-compressor\n+\n+ # install nightly version from pip\n+ pip install -i https://test.pypi.org/simple/ neural-compressor\n+\n+ # install stable version from conda\n+ conda install neural-compressor -c conda-forge -c intel\n+ ```\n+ If you come into trouble with dependencies on `cv2` library you can run: `apt-get update && apt-get install -y python3-opencv`\n+\n+## Configuration file\n+\n+Quantization tuning process can be customized in the yaml configuration file. Below is a simple example:\n+\n+```yaml\n+# cnn.yaml\n+\n+version: 1.0\n+\n+model:\n+ name: cnn\n+ framework: mxnet\n+\n+quantization:\n+ calibration:\n+ sampling_size: 160 # number of samples for calibration\n+\n+tuning:\n+ strategy:\n+ name: basic\n+ accuracy_criterion:\n+ relative: 0.01\n+ exit_policy:\n+ timeout: 0\n+ random_seed: 9527\n+```\n+\n+We are using the `basic` strategy, but you could also try out different ones. [Here](https://github.com/intel/neural-compressor/blob/master/docs/tuning_strategies.md) you can find a list of strategies available in INC and details of how they work. You can also add your own strategy if the existing ones do not suit your needs.\n+\n+Since the value of `timeout` is 0, INC will run until it finds a configuration that satisfies the accuracy criterion and then exit. Depending on the strategy this may not be ideal, as sometimes it would be better to further explore the tuning space to find a superior configuration both in terms of accuracy and speed. To achieve this, we can set a specific `timeout` value, which will tell INC how long (in seconds) it should run.\n+\n+For more information about the configuration file, see the [template](https://github.com/intel/neural-compressor/blob/master/neural_compressor/template/ptq.yaml) from the official INC repo. Keep in mind that only the `post training quantization` is currently supported for MXNet.\n+\n+## Model quantization and tuning\n+\n+In general, Intel\u00ae Neural Compressor requires 4 elements in order to run: \n+1. Config file - like the example above \n+2. Model to be quantized \n+3. Calibration dataloader \n+4. Evaluation function - a function that takes a model as an argument and returns the accuracy it achieves on a certain evaluation dataset. \n+\n+### Quantizing ResNet\n+\n+The [quantization](https://mxnet.apache.org/versions/master/api/python/docs/tutorials/performance/backend/dnnl/dnnl_quantization.html#Quantization) sections described how to quantize ResNet using the native MXNet quantization. This example shows how we can achieve the similar results (with the auto-tuning) using INC.\n+\n+1. Get the model\n+\n+```python\n+import logging\n+import mxnet as mx\n+from mxnet.gluon.model_zoo import vision\n+\n+logging.basicConfig()\n+logger = logging.getLogger('logger')\n+logger.setLevel(logging.INFO)\n+\n+batch_shape = (1, 3, 224, 224)\n+resnet18 = vision.resnet18_v1(pretrained=True)\n+```\n+\n+2. Prepare the dataset:\n+\n+```python\n+mx.test_utils.download('http://data.mxnet.io/data/val_256_q90.rec', 'data/val_256_q90.rec')\n+\n+batch_size = 16\n+mean_std = {'mean_r': 123.68, 'mean_g': 116.779, 'mean_b': 103.939,\n+ 'std_r': 58.393, 'std_g': 57.12, 'std_b': 57.375}\n+\n+data = mx.io.ImageRecordIter(path_imgrec='data/val_256_q90.rec',\n+ batch_size=batch_size,\n+ data_shape=batch_shape[1:],\n+ rand_crop=False,\n+ rand_mirror=False,\n+ shuffle=False,\n+ **mean_std)\n+data.batch_size = batch_size\n+```\n+\n+3. Prepare the evaluation function:\n+\n+```python\n+eval_samples = batch_size*10\n+\n+def eval_func(model):\n+ data.reset()\n+ metric = mx.metric.Accuracy()\n+ for i, batch in enumerate(data):\n+ if i * batch_size >= eval_samples:\n+ break\n+ x = batch.data[0].as_in_context(mx.cpu())\n+ label = batch.label[0].as_in_context(mx.cpu())\n+ outputs = model.forward(x)\n+ metric.update(label, outputs)\n+ return metric.get()[1]\n+```\n+\n+4. Run Intel\u00ae Neural Compressor:\n+\n+```python\n+from neural_compressor.experimental import Quantization\n+quantizer = Quantization(\"./cnn.yaml\")\n+quantizer.model = resnet18\n+quantizer.calib_dataloader = data\n+quantizer.eval_func = eval_func\n+qnet = quantizer.fit().model\n+```\n+\n+Since this model already achieves good accuracy using native quantization (less than 1% accuracy drop), for the given configuration file, INC will end on the first configuration, quantizing all layers using `naive` calibration mode for each. To see the true potential of INC, we need a model which suffers from a larger accuracy drop after quantization.\n+\n+### Quantizing ResNet50v2\n+\n+This example shows how to use INC to quantize ResNet50 v2. In this case, the native MXNet quantization introduce a huge accuracy drop (70% using `naive` calibration mode) and INC allows automatically find better solution.", - "comment_created_at": "2022-08-25T11:42:50+00:00", - "comment_author": "bartekkuncer", - "comment_body": "```suggestion\r\nThis example shows how to use INC to quantize ResNet50 v2. In this case, the native MXNet quantization introduce a huge accuracy drop (70% using `naive` calibration mode) and INC allows to automatically find better solution.\r\n```", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "954863542", - "pr_number": 21127, - "pr_file": "docs/python_docs/python/tutorials/performance/backend/dnnl/dnnl_quantization_inc.md", - "created_at": "2022-08-25T11:45:02+00:00", - "commented_code": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n# Improving accuracy with Intel\u00ae Neural Compressor\n\nThe accuracy of a model can decrease as a result of quantization. When the accuracy drop is significant, we can try to manually find a better quantization configuration (exclude some layers, try different calibration methods, etc.), but for bigger models this might prove to be a difficult and time consuming task. [Intel\u00ae Neural Compressor](https://github.com/intel/neural-compressor) (INC) tries to automate this process using several tuning heuristics, which aim to find the quantization configuration that satisfies the specified accuracy requirement.\n\n**NOTE:**\n\nMost tuning strategies will try different configurations on an evaluation dataset in order to find out how each layer affects the accuracy of the model. This means that for larger models, it may take a long time to find a solution (as the tuning space is usually larger and the evaluation itself takes longer).\n\n## Installation and Prerequisites\n\n- Install MXNet with oneDNN enabled as described in the [Get started](https://mxnet.apache.org/versions/master/get_started?platform=linux&language=python&processor=cpu&environ=pip&). (Until the 2.0 release you can use the nightly build version: `pip install --pre mxnet -f https://dist.mxnet.io/python`)\n\n- Install Intel\u00ae Neural Compressor:\n\n Use one of the commands below to install INC (supported python versions are: 3.6, 3.7, 3.8, 3.9):\n\n ```bash\n # install stable version from pip\n pip install neural-compressor\n\n # install nightly version from pip\n pip install -i https://test.pypi.org/simple/ neural-compressor\n\n # install stable version from conda\n conda install neural-compressor -c conda-forge -c intel\n ```\n If you come into trouble with dependencies on `cv2` library you can run: `apt-get update && apt-get install -y python3-opencv`\n\n## Configuration file\n\nQuantization tuning process can be customized in the yaml configuration file. Below is a simple example:\n\n```yaml\n# cnn.yaml\n\nversion: 1.0\n\nmodel:\n name: cnn\n framework: mxnet\n\nquantization:\n calibration:\n sampling_size: 160 # number of samples for calibration\n\ntuning:\n strategy:\n name: basic\n accuracy_criterion:\n relative: 0.01\n exit_policy:\n timeout: 0\n random_seed: 9527\n```\n\nWe are using the `basic` strategy, but you could also try out different ones. [Here](https://github.com/intel/neural-compressor/blob/master/docs/tuning_strategies.md) you can find a list of strategies available in INC and details of how they work. You can also add your own strategy if the existing ones do not suit your needs.\n\nSince the value of `timeout` is 0, INC will run until it finds a configuration that satisfies the accuracy criterion and then exit. Depending on the strategy this may not be ideal, as sometimes it would be better to further explore the tuning space to find a superior configuration both in terms of accuracy and speed. To achieve this, we can set a specific `timeout` value, which will tell INC how long (in seconds) it should run.\n\nFor more information about the configuration file, see the [template](https://github.com/intel/neural-compressor/blob/master/neural_compressor/template/ptq.yaml) from the official INC repo. Keep in mind that only the `post training quantization` is currently supported for MXNet.\n\n## Model quantization and tuning\n\nIn general, Intel\u00ae Neural Compressor requires 4 elements in order to run: \n1. Config file - like the example above \n2. Model to be quantized \n3. Calibration dataloader \n4. Evaluation function - a function that takes a model as an argument and returns the accuracy it achieves on a certain evaluation dataset. \n\n### Quantizing ResNet\n\nThe [quantization](https://mxnet.apache.org/versions/master/api/python/docs/tutorials/performance/backend/dnnl/dnnl_quantization.html#Quantization) sections described how to quantize ResNet using the native MXNet quantization. This example shows how we can achieve the similar results (with the auto-tuning) using INC.\n\n1. Get the model\n\n```python\nimport logging\nimport mxnet as mx\nfrom mxnet.gluon.model_zoo import vision\n\nlogging.basicConfig()\nlogger = logging.getLogger('logger')\nlogger.setLevel(logging.INFO)\n\nbatch_shape = (1, 3, 224, 224)\nresnet18 = vision.resnet18_v1(pretrained=True)\n```\n\n2. Prepare the dataset:\n\n```python\nmx.test_utils.download('http://data.mxnet.io/data/val_256_q90.rec', 'data/val_256_q90.rec')\n\nbatch_size = 16\nmean_std = {'mean_r': 123.68, 'mean_g': 116.779, 'mean_b': 103.939,\n 'std_r': 58.393, 'std_g': 57.12, 'std_b': 57.375}\n\ndata = mx.io.ImageRecordIter(path_imgrec='data/val_256_q90.rec',\n batch_size=batch_size,\n data_shape=batch_shape[1:],\n rand_crop=False,\n rand_mirror=False,\n shuffle=False,\n **mean_std)\ndata.batch_size = batch_size\n```\n\n3. Prepare the evaluation function:\n\n```python\neval_samples = batch_size*10\n\ndef eval_func(model):\n data.reset()\n metric = mx.metric.Accuracy()\n for i, batch in enumerate(data):\n if i * batch_size >= eval_samples:\n break\n x = batch.data[0].as_in_context(mx.cpu())\n label = batch.label[0].as_in_context(mx.cpu())\n outputs = model.forward(x)\n metric.update(label, outputs)\n return metric.get()[1]\n```\n\n4. Run Intel\u00ae Neural Compressor:\n\n```python\nfrom neural_compressor.experimental import Quantization\nquantizer = Quantization(\"./cnn.yaml\")\nquantizer.model = resnet18\nquantizer.calib_dataloader = data\nquantizer.eval_func = eval_func\nqnet = quantizer.fit().model\n```\n\nSince this model already achieves good accuracy using native quantization (less than 1% accuracy drop), for the given configuration file, INC will end on the first configuration, quantizing all layers using `naive` calibration mode for each. To see the true potential of INC, we need a model which suffers from a larger accuracy drop after quantization.\n\n### Quantizing ResNet50v2\n\nThis example shows how to use INC to quantize ResNet50 v2. In this case, the native MXNet quantization introduce a huge accuracy drop (70% using `naive` calibration mode) and INC allows automatically find better solution.\n\nThis is the (TODO link to INC configuration file) for this example: \n```yaml\nversion: 1.0\n\nmodel:\n name: resnet50_v2\n framework: mxnet\n\nquantization:\n calibration:\n sampling_size: 192 # number of samples for calibration\n\ntuning:\n strategy:\n name: mse\n accuracy_criterion:\n relative: 0.015\n exit_policy:\n timeout: 0\n max_trials: 500\n random_seed: 9527\n```\n\nIt could be used with script below \n(TODO link to resnet_mse.py)\nto find operator which mostly influence accuracy drops and disable it from quantization.", - "repo_full_name": "apache/mxnet", - "discussion_comments": [ - { - "comment_id": "954863542", - "repo_full_name": "apache/mxnet", - "pr_number": 21127, - "pr_file": "docs/python_docs/python/tutorials/performance/backend/dnnl/dnnl_quantization_inc.md", - "discussion_id": "954863542", - "commented_code": "@@ -0,0 +1,290 @@\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+# Improving accuracy with Intel\u00ae Neural Compressor\n+\n+The accuracy of a model can decrease as a result of quantization. When the accuracy drop is significant, we can try to manually find a better quantization configuration (exclude some layers, try different calibration methods, etc.), but for bigger models this might prove to be a difficult and time consuming task. [Intel\u00ae Neural Compressor](https://github.com/intel/neural-compressor) (INC) tries to automate this process using several tuning heuristics, which aim to find the quantization configuration that satisfies the specified accuracy requirement.\n+\n+**NOTE:**\n+\n+Most tuning strategies will try different configurations on an evaluation dataset in order to find out how each layer affects the accuracy of the model. This means that for larger models, it may take a long time to find a solution (as the tuning space is usually larger and the evaluation itself takes longer).\n+\n+## Installation and Prerequisites\n+\n+- Install MXNet with oneDNN enabled as described in the [Get started](https://mxnet.apache.org/versions/master/get_started?platform=linux&language=python&processor=cpu&environ=pip&). (Until the 2.0 release you can use the nightly build version: `pip install --pre mxnet -f https://dist.mxnet.io/python`)\n+\n+- Install Intel\u00ae Neural Compressor:\n+\n+ Use one of the commands below to install INC (supported python versions are: 3.6, 3.7, 3.8, 3.9):\n+\n+ ```bash\n+ # install stable version from pip\n+ pip install neural-compressor\n+\n+ # install nightly version from pip\n+ pip install -i https://test.pypi.org/simple/ neural-compressor\n+\n+ # install stable version from conda\n+ conda install neural-compressor -c conda-forge -c intel\n+ ```\n+ If you come into trouble with dependencies on `cv2` library you can run: `apt-get update && apt-get install -y python3-opencv`\n+\n+## Configuration file\n+\n+Quantization tuning process can be customized in the yaml configuration file. Below is a simple example:\n+\n+```yaml\n+# cnn.yaml\n+\n+version: 1.0\n+\n+model:\n+ name: cnn\n+ framework: mxnet\n+\n+quantization:\n+ calibration:\n+ sampling_size: 160 # number of samples for calibration\n+\n+tuning:\n+ strategy:\n+ name: basic\n+ accuracy_criterion:\n+ relative: 0.01\n+ exit_policy:\n+ timeout: 0\n+ random_seed: 9527\n+```\n+\n+We are using the `basic` strategy, but you could also try out different ones. [Here](https://github.com/intel/neural-compressor/blob/master/docs/tuning_strategies.md) you can find a list of strategies available in INC and details of how they work. You can also add your own strategy if the existing ones do not suit your needs.\n+\n+Since the value of `timeout` is 0, INC will run until it finds a configuration that satisfies the accuracy criterion and then exit. Depending on the strategy this may not be ideal, as sometimes it would be better to further explore the tuning space to find a superior configuration both in terms of accuracy and speed. To achieve this, we can set a specific `timeout` value, which will tell INC how long (in seconds) it should run.\n+\n+For more information about the configuration file, see the [template](https://github.com/intel/neural-compressor/blob/master/neural_compressor/template/ptq.yaml) from the official INC repo. Keep in mind that only the `post training quantization` is currently supported for MXNet.\n+\n+## Model quantization and tuning\n+\n+In general, Intel\u00ae Neural Compressor requires 4 elements in order to run: \n+1. Config file - like the example above \n+2. Model to be quantized \n+3. Calibration dataloader \n+4. Evaluation function - a function that takes a model as an argument and returns the accuracy it achieves on a certain evaluation dataset. \n+\n+### Quantizing ResNet\n+\n+The [quantization](https://mxnet.apache.org/versions/master/api/python/docs/tutorials/performance/backend/dnnl/dnnl_quantization.html#Quantization) sections described how to quantize ResNet using the native MXNet quantization. This example shows how we can achieve the similar results (with the auto-tuning) using INC.\n+\n+1. Get the model\n+\n+```python\n+import logging\n+import mxnet as mx\n+from mxnet.gluon.model_zoo import vision\n+\n+logging.basicConfig()\n+logger = logging.getLogger('logger')\n+logger.setLevel(logging.INFO)\n+\n+batch_shape = (1, 3, 224, 224)\n+resnet18 = vision.resnet18_v1(pretrained=True)\n+```\n+\n+2. Prepare the dataset:\n+\n+```python\n+mx.test_utils.download('http://data.mxnet.io/data/val_256_q90.rec', 'data/val_256_q90.rec')\n+\n+batch_size = 16\n+mean_std = {'mean_r': 123.68, 'mean_g': 116.779, 'mean_b': 103.939,\n+ 'std_r': 58.393, 'std_g': 57.12, 'std_b': 57.375}\n+\n+data = mx.io.ImageRecordIter(path_imgrec='data/val_256_q90.rec',\n+ batch_size=batch_size,\n+ data_shape=batch_shape[1:],\n+ rand_crop=False,\n+ rand_mirror=False,\n+ shuffle=False,\n+ **mean_std)\n+data.batch_size = batch_size\n+```\n+\n+3. Prepare the evaluation function:\n+\n+```python\n+eval_samples = batch_size*10\n+\n+def eval_func(model):\n+ data.reset()\n+ metric = mx.metric.Accuracy()\n+ for i, batch in enumerate(data):\n+ if i * batch_size >= eval_samples:\n+ break\n+ x = batch.data[0].as_in_context(mx.cpu())\n+ label = batch.label[0].as_in_context(mx.cpu())\n+ outputs = model.forward(x)\n+ metric.update(label, outputs)\n+ return metric.get()[1]\n+```\n+\n+4. Run Intel\u00ae Neural Compressor:\n+\n+```python\n+from neural_compressor.experimental import Quantization\n+quantizer = Quantization(\"./cnn.yaml\")\n+quantizer.model = resnet18\n+quantizer.calib_dataloader = data\n+quantizer.eval_func = eval_func\n+qnet = quantizer.fit().model\n+```\n+\n+Since this model already achieves good accuracy using native quantization (less than 1% accuracy drop), for the given configuration file, INC will end on the first configuration, quantizing all layers using `naive` calibration mode for each. To see the true potential of INC, we need a model which suffers from a larger accuracy drop after quantization.\n+\n+### Quantizing ResNet50v2\n+\n+This example shows how to use INC to quantize ResNet50 v2. In this case, the native MXNet quantization introduce a huge accuracy drop (70% using `naive` calibration mode) and INC allows automatically find better solution.\n+\n+This is the (TODO link to INC configuration file) for this example: \n+```yaml\n+version: 1.0\n+\n+model:\n+ name: resnet50_v2\n+ framework: mxnet\n+\n+quantization:\n+ calibration:\n+ sampling_size: 192 # number of samples for calibration\n+\n+tuning:\n+ strategy:\n+ name: mse\n+ accuracy_criterion:\n+ relative: 0.015\n+ exit_policy:\n+ timeout: 0\n+ max_trials: 500\n+ random_seed: 9527\n+```\n+\n+It could be used with script below \n+(TODO link to resnet_mse.py)\n+to find operator which mostly influence accuracy drops and disable it from quantization. ", - "comment_created_at": "2022-08-25T11:45:02+00:00", - "comment_author": "bartekkuncer", - "comment_body": "```suggestion\r\nto find operator, which caused the most significant accuracy drop and disable it from quantization. \r\n```", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "954874866", - "pr_number": 21127, - "pr_file": "docs/python_docs/python/tutorials/performance/backend/dnnl/dnnl_quantization_inc.md", - "created_at": "2022-08-25T11:57:43+00:00", - "commented_code": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n# Improving accuracy with Intel\u00ae Neural Compressor\n\nThe accuracy of a model can decrease as a result of quantization. When the accuracy drop is significant, we can try to manually find a better quantization configuration (exclude some layers, try different calibration methods, etc.), but for bigger models this might prove to be a difficult and time consuming task. [Intel\u00ae Neural Compressor](https://github.com/intel/neural-compressor) (INC) tries to automate this process using several tuning heuristics, which aim to find the quantization configuration that satisfies the specified accuracy requirement.\n\n**NOTE:**\n\nMost tuning strategies will try different configurations on an evaluation dataset in order to find out how each layer affects the accuracy of the model. This means that for larger models, it may take a long time to find a solution (as the tuning space is usually larger and the evaluation itself takes longer).\n\n## Installation and Prerequisites\n\n- Install MXNet with oneDNN enabled as described in the [Get started](https://mxnet.apache.org/versions/master/get_started?platform=linux&language=python&processor=cpu&environ=pip&). (Until the 2.0 release you can use the nightly build version: `pip install --pre mxnet -f https://dist.mxnet.io/python`)\n\n- Install Intel\u00ae Neural Compressor:\n\n Use one of the commands below to install INC (supported python versions are: 3.6, 3.7, 3.8, 3.9):\n\n ```bash\n # install stable version from pip\n pip install neural-compressor\n\n # install nightly version from pip\n pip install -i https://test.pypi.org/simple/ neural-compressor\n\n # install stable version from conda\n conda install neural-compressor -c conda-forge -c intel\n ```\n If you come into trouble with dependencies on `cv2` library you can run: `apt-get update && apt-get install -y python3-opencv`\n\n## Configuration file\n\nQuantization tuning process can be customized in the yaml configuration file. Below is a simple example:\n\n```yaml\n# cnn.yaml\n\nversion: 1.0\n\nmodel:\n name: cnn\n framework: mxnet\n\nquantization:\n calibration:\n sampling_size: 160 # number of samples for calibration\n\ntuning:\n strategy:\n name: basic\n accuracy_criterion:\n relative: 0.01\n exit_policy:\n timeout: 0\n random_seed: 9527\n```\n\nWe are using the `basic` strategy, but you could also try out different ones. [Here](https://github.com/intel/neural-compressor/blob/master/docs/tuning_strategies.md) you can find a list of strategies available in INC and details of how they work. You can also add your own strategy if the existing ones do not suit your needs.\n\nSince the value of `timeout` is 0, INC will run until it finds a configuration that satisfies the accuracy criterion and then exit. Depending on the strategy this may not be ideal, as sometimes it would be better to further explore the tuning space to find a superior configuration both in terms of accuracy and speed. To achieve this, we can set a specific `timeout` value, which will tell INC how long (in seconds) it should run.\n\nFor more information about the configuration file, see the [template](https://github.com/intel/neural-compressor/blob/master/neural_compressor/template/ptq.yaml) from the official INC repo. Keep in mind that only the `post training quantization` is currently supported for MXNet.\n\n## Model quantization and tuning\n\nIn general, Intel\u00ae Neural Compressor requires 4 elements in order to run: \n1. Config file - like the example above \n2. Model to be quantized \n3. Calibration dataloader \n4. Evaluation function - a function that takes a model as an argument and returns the accuracy it achieves on a certain evaluation dataset. \n\n### Quantizing ResNet\n\nThe [quantization](https://mxnet.apache.org/versions/master/api/python/docs/tutorials/performance/backend/dnnl/dnnl_quantization.html#Quantization) sections described how to quantize ResNet using the native MXNet quantization. This example shows how we can achieve the similar results (with the auto-tuning) using INC.\n\n1. Get the model\n\n```python\nimport logging\nimport mxnet as mx\nfrom mxnet.gluon.model_zoo import vision\n\nlogging.basicConfig()\nlogger = logging.getLogger('logger')\nlogger.setLevel(logging.INFO)\n\nbatch_shape = (1, 3, 224, 224)\nresnet18 = vision.resnet18_v1(pretrained=True)\n```\n\n2. Prepare the dataset:\n\n```python\nmx.test_utils.download('http://data.mxnet.io/data/val_256_q90.rec', 'data/val_256_q90.rec')\n\nbatch_size = 16\nmean_std = {'mean_r': 123.68, 'mean_g': 116.779, 'mean_b': 103.939,\n 'std_r': 58.393, 'std_g': 57.12, 'std_b': 57.375}\n\ndata = mx.io.ImageRecordIter(path_imgrec='data/val_256_q90.rec',\n batch_size=batch_size,\n data_shape=batch_shape[1:],\n rand_crop=False,\n rand_mirror=False,\n shuffle=False,\n **mean_std)\ndata.batch_size = batch_size\n```\n\n3. Prepare the evaluation function:\n\n```python\neval_samples = batch_size*10\n\ndef eval_func(model):\n data.reset()\n metric = mx.metric.Accuracy()\n for i, batch in enumerate(data):\n if i * batch_size >= eval_samples:\n break\n x = batch.data[0].as_in_context(mx.cpu())\n label = batch.label[0].as_in_context(mx.cpu())\n outputs = model.forward(x)\n metric.update(label, outputs)\n return metric.get()[1]\n```\n\n4. Run Intel\u00ae Neural Compressor:\n\n```python\nfrom neural_compressor.experimental import Quantization\nquantizer = Quantization(\"./cnn.yaml\")\nquantizer.model = resnet18\nquantizer.calib_dataloader = data\nquantizer.eval_func = eval_func\nqnet = quantizer.fit().model\n```\n\nSince this model already achieves good accuracy using native quantization (less than 1% accuracy drop), for the given configuration file, INC will end on the first configuration, quantizing all layers using `naive` calibration mode for each. To see the true potential of INC, we need a model which suffers from a larger accuracy drop after quantization.\n\n### Quantizing ResNet50v2\n\nThis example shows how to use INC to quantize ResNet50 v2. In this case, the native MXNet quantization introduce a huge accuracy drop (70% using `naive` calibration mode) and INC allows automatically find better solution.\n\nThis is the (TODO link to INC configuration file) for this example: \n```yaml\nversion: 1.0\n\nmodel:\n name: resnet50_v2\n framework: mxnet\n\nquantization:\n calibration:\n sampling_size: 192 # number of samples for calibration\n\ntuning:\n strategy:\n name: mse\n accuracy_criterion:\n relative: 0.015\n exit_policy:\n timeout: 0\n max_trials: 500\n random_seed: 9527\n```\n\nIt could be used with script below \n(TODO link to resnet_mse.py)\nto find operator which mostly influence accuracy drops and disable it from quantization. \nYou can find description of MSE strategy \n[here](https://github.com/intel/neural-compressor/blob/master/docs/tuning_strategies.md#user-content-mse).\n\n```python\nimport mxnet as mx\nfrom mxnet.gluon.model_zoo.vision import resnet50_v2\nfrom mxnet.gluon.data.vision import transforms\nfrom mxnet.contrib.quantization import quantize_net\n\n# Preparing input data\nrgb_mean = (0.485, 0.456, 0.406)\nrgb_std = (0.229, 0.224, 0.225)\nbatch_size = 64\nnum_calib_batches = 9\n# set below proper path to ImageNet data set\ndataset = mx.gluon.data.vision.ImageRecordDataset('../imagenet/rec/val.rec')\n# Tuning in INC on whole data set takes too long time so we take only part of the whole data set\n# as representative part of it:\ndataset = dataset.take(num_calib_batches * batch_size)\ntransformer = transforms.Compose([transforms.Resize(256),\n transforms.CenterCrop(224),\n transforms.ToTensor(),\n transforms.Normalize(mean=rgb_mean, std=rgb_std)])\n# Note: as input data are used many times during tuning it is better to prepared data earlier,\n# so lazy parameter for transform_first is set to False\nval_data = mx.gluon.data.DataLoader(\n dataset.transform_first(transformer, lazy=False), batch_size, shuffle=False)\nval_data.batch_size = batch_size\n\nnet = resnet50_v2(pretrained=True)\n\ndef eval_func(model):\n metric = mx.gluon.metric.Accuracy()\n for x, label in val_data:\n output = model(x)\n metric.update(label, output)\n accuracy = metric.get()[1]\n return accuracy\n\n\nfrom neural_compressor.experimental import Quantization\nquantizer = Quantization(\"resnet50v2_mse.yaml\")\nquantizer.model = net\nquantizer.calib_dataloader = val_data\nquantizer.eval_func = eval_func\nqnet_inc = quantizer.fit().model\nprint(\"INC finished\")\n# You can save optimized model for the later use:\nqnet_inc.export(\"__quantized_with_inc\")\n# You can see what configurations was applied aby INC and which nodes was excluded from quantization\n# to achieve given accuracy lost against floating point calculation", - "repo_full_name": "apache/mxnet", - "discussion_comments": [ - { - "comment_id": "954874866", - "repo_full_name": "apache/mxnet", - "pr_number": 21127, - "pr_file": "docs/python_docs/python/tutorials/performance/backend/dnnl/dnnl_quantization_inc.md", - "discussion_id": "954874866", - "commented_code": "@@ -0,0 +1,290 @@\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+# Improving accuracy with Intel\u00ae Neural Compressor\n+\n+The accuracy of a model can decrease as a result of quantization. When the accuracy drop is significant, we can try to manually find a better quantization configuration (exclude some layers, try different calibration methods, etc.), but for bigger models this might prove to be a difficult and time consuming task. [Intel\u00ae Neural Compressor](https://github.com/intel/neural-compressor) (INC) tries to automate this process using several tuning heuristics, which aim to find the quantization configuration that satisfies the specified accuracy requirement.\n+\n+**NOTE:**\n+\n+Most tuning strategies will try different configurations on an evaluation dataset in order to find out how each layer affects the accuracy of the model. This means that for larger models, it may take a long time to find a solution (as the tuning space is usually larger and the evaluation itself takes longer).\n+\n+## Installation and Prerequisites\n+\n+- Install MXNet with oneDNN enabled as described in the [Get started](https://mxnet.apache.org/versions/master/get_started?platform=linux&language=python&processor=cpu&environ=pip&). (Until the 2.0 release you can use the nightly build version: `pip install --pre mxnet -f https://dist.mxnet.io/python`)\n+\n+- Install Intel\u00ae Neural Compressor:\n+\n+ Use one of the commands below to install INC (supported python versions are: 3.6, 3.7, 3.8, 3.9):\n+\n+ ```bash\n+ # install stable version from pip\n+ pip install neural-compressor\n+\n+ # install nightly version from pip\n+ pip install -i https://test.pypi.org/simple/ neural-compressor\n+\n+ # install stable version from conda\n+ conda install neural-compressor -c conda-forge -c intel\n+ ```\n+ If you come into trouble with dependencies on `cv2` library you can run: `apt-get update && apt-get install -y python3-opencv`\n+\n+## Configuration file\n+\n+Quantization tuning process can be customized in the yaml configuration file. Below is a simple example:\n+\n+```yaml\n+# cnn.yaml\n+\n+version: 1.0\n+\n+model:\n+ name: cnn\n+ framework: mxnet\n+\n+quantization:\n+ calibration:\n+ sampling_size: 160 # number of samples for calibration\n+\n+tuning:\n+ strategy:\n+ name: basic\n+ accuracy_criterion:\n+ relative: 0.01\n+ exit_policy:\n+ timeout: 0\n+ random_seed: 9527\n+```\n+\n+We are using the `basic` strategy, but you could also try out different ones. [Here](https://github.com/intel/neural-compressor/blob/master/docs/tuning_strategies.md) you can find a list of strategies available in INC and details of how they work. You can also add your own strategy if the existing ones do not suit your needs.\n+\n+Since the value of `timeout` is 0, INC will run until it finds a configuration that satisfies the accuracy criterion and then exit. Depending on the strategy this may not be ideal, as sometimes it would be better to further explore the tuning space to find a superior configuration both in terms of accuracy and speed. To achieve this, we can set a specific `timeout` value, which will tell INC how long (in seconds) it should run.\n+\n+For more information about the configuration file, see the [template](https://github.com/intel/neural-compressor/blob/master/neural_compressor/template/ptq.yaml) from the official INC repo. Keep in mind that only the `post training quantization` is currently supported for MXNet.\n+\n+## Model quantization and tuning\n+\n+In general, Intel\u00ae Neural Compressor requires 4 elements in order to run: \n+1. Config file - like the example above \n+2. Model to be quantized \n+3. Calibration dataloader \n+4. Evaluation function - a function that takes a model as an argument and returns the accuracy it achieves on a certain evaluation dataset. \n+\n+### Quantizing ResNet\n+\n+The [quantization](https://mxnet.apache.org/versions/master/api/python/docs/tutorials/performance/backend/dnnl/dnnl_quantization.html#Quantization) sections described how to quantize ResNet using the native MXNet quantization. This example shows how we can achieve the similar results (with the auto-tuning) using INC.\n+\n+1. Get the model\n+\n+```python\n+import logging\n+import mxnet as mx\n+from mxnet.gluon.model_zoo import vision\n+\n+logging.basicConfig()\n+logger = logging.getLogger('logger')\n+logger.setLevel(logging.INFO)\n+\n+batch_shape = (1, 3, 224, 224)\n+resnet18 = vision.resnet18_v1(pretrained=True)\n+```\n+\n+2. Prepare the dataset:\n+\n+```python\n+mx.test_utils.download('http://data.mxnet.io/data/val_256_q90.rec', 'data/val_256_q90.rec')\n+\n+batch_size = 16\n+mean_std = {'mean_r': 123.68, 'mean_g': 116.779, 'mean_b': 103.939,\n+ 'std_r': 58.393, 'std_g': 57.12, 'std_b': 57.375}\n+\n+data = mx.io.ImageRecordIter(path_imgrec='data/val_256_q90.rec',\n+ batch_size=batch_size,\n+ data_shape=batch_shape[1:],\n+ rand_crop=False,\n+ rand_mirror=False,\n+ shuffle=False,\n+ **mean_std)\n+data.batch_size = batch_size\n+```\n+\n+3. Prepare the evaluation function:\n+\n+```python\n+eval_samples = batch_size*10\n+\n+def eval_func(model):\n+ data.reset()\n+ metric = mx.metric.Accuracy()\n+ for i, batch in enumerate(data):\n+ if i * batch_size >= eval_samples:\n+ break\n+ x = batch.data[0].as_in_context(mx.cpu())\n+ label = batch.label[0].as_in_context(mx.cpu())\n+ outputs = model.forward(x)\n+ metric.update(label, outputs)\n+ return metric.get()[1]\n+```\n+\n+4. Run Intel\u00ae Neural Compressor:\n+\n+```python\n+from neural_compressor.experimental import Quantization\n+quantizer = Quantization(\"./cnn.yaml\")\n+quantizer.model = resnet18\n+quantizer.calib_dataloader = data\n+quantizer.eval_func = eval_func\n+qnet = quantizer.fit().model\n+```\n+\n+Since this model already achieves good accuracy using native quantization (less than 1% accuracy drop), for the given configuration file, INC will end on the first configuration, quantizing all layers using `naive` calibration mode for each. To see the true potential of INC, we need a model which suffers from a larger accuracy drop after quantization.\n+\n+### Quantizing ResNet50v2\n+\n+This example shows how to use INC to quantize ResNet50 v2. In this case, the native MXNet quantization introduce a huge accuracy drop (70% using `naive` calibration mode) and INC allows automatically find better solution.\n+\n+This is the (TODO link to INC configuration file) for this example: \n+```yaml\n+version: 1.0\n+\n+model:\n+ name: resnet50_v2\n+ framework: mxnet\n+\n+quantization:\n+ calibration:\n+ sampling_size: 192 # number of samples for calibration\n+\n+tuning:\n+ strategy:\n+ name: mse\n+ accuracy_criterion:\n+ relative: 0.015\n+ exit_policy:\n+ timeout: 0\n+ max_trials: 500\n+ random_seed: 9527\n+```\n+\n+It could be used with script below \n+(TODO link to resnet_mse.py)\n+to find operator which mostly influence accuracy drops and disable it from quantization. \n+You can find description of MSE strategy \n+[here](https://github.com/intel/neural-compressor/blob/master/docs/tuning_strategies.md#user-content-mse).\n+\n+```python\n+import mxnet as mx\n+from mxnet.gluon.model_zoo.vision import resnet50_v2\n+from mxnet.gluon.data.vision import transforms\n+from mxnet.contrib.quantization import quantize_net\n+\n+# Preparing input data\n+rgb_mean = (0.485, 0.456, 0.406)\n+rgb_std = (0.229, 0.224, 0.225)\n+batch_size = 64\n+num_calib_batches = 9\n+# set below proper path to ImageNet data set\n+dataset = mx.gluon.data.vision.ImageRecordDataset('../imagenet/rec/val.rec')\n+# Tuning in INC on whole data set takes too long time so we take only part of the whole data set\n+# as representative part of it:\n+dataset = dataset.take(num_calib_batches * batch_size)\n+transformer = transforms.Compose([transforms.Resize(256),\n+ transforms.CenterCrop(224),\n+ transforms.ToTensor(),\n+ transforms.Normalize(mean=rgb_mean, std=rgb_std)])\n+# Note: as input data are used many times during tuning it is better to prepared data earlier,\n+# so lazy parameter for transform_first is set to False\n+val_data = mx.gluon.data.DataLoader(\n+ dataset.transform_first(transformer, lazy=False), batch_size, shuffle=False)\n+val_data.batch_size = batch_size\n+\n+net = resnet50_v2(pretrained=True)\n+\n+def eval_func(model):\n+ metric = mx.gluon.metric.Accuracy()\n+ for x, label in val_data:\n+ output = model(x)\n+ metric.update(label, output)\n+ accuracy = metric.get()[1]\n+ return accuracy\n+\n+\n+from neural_compressor.experimental import Quantization\n+quantizer = Quantization(\"resnet50v2_mse.yaml\")\n+quantizer.model = net\n+quantizer.calib_dataloader = val_data\n+quantizer.eval_func = eval_func\n+qnet_inc = quantizer.fit().model\n+print(\"INC finished\")\n+# You can save optimized model for the later use:\n+qnet_inc.export(\"__quantized_with_inc\")\n+# You can see what configurations was applied aby INC and which nodes was excluded from quantization\n+# to achieve given accuracy lost against floating point calculation", - "comment_created_at": "2022-08-25T11:57:43+00:00", - "comment_author": "bartekkuncer", - "comment_body": "```suggestion\r\n# You can see which configuration was applied by INC and which nodes were excluded from quantization,\r\n# to achieve given accuracy loss against floating point calculation.\r\n```", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "796581572", - "pr_number": 20856, - "pr_file": "docs/python_docs/python/tutorials/performance/backend/dnnl/dnnl_quantization.md", - "created_at": "2022-02-01T13:12:13+00:00", - "commented_code": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n## Introduction\n\nAfter successful model building and achieving desired accuracy on the test data, often the next step is to optimize inference to deploy the model to production. One of the key features of usable model is to have as small latency as possible to be able to provide services to large number of customers at the same time. In addition to customer satisfaction, with well optimized model, hardware load is reduced which also reduces energy costs needed to perform inference.\n\nTwo main types of software optimizations can be characerized as:\n- memory-bound optimizations - main objective of these optimizations is to reduce memory operations (reads and writes) - it is done by e.g. chaining sequence of operations which can be performed one after another immediately (example: ReLU activation)\n- compute-bound optimizations - these optimizations are mainly done on operations which require large number of CPU cycles to complete, like FullyConnected and Convolution - one of the methods to speedup compute-bound operations is to lower computation precision - this type of optimization is called quantization\n\nIn version 2.0 of the Apache MXNet (incubating) GluonAPI2.0 replaced Symbolic API known from versions 1.x, thus there are some differences between API to perform graph fusion and quantization.\n\n## Operator Fusion\n\nModels are often represented as directed graph of operations (represented by nodes) and data flow (representad as edges). This way of visualizing helps a lot when searching for common patterns in whole model which can be optimized by fusion. Example:\n![base_model](https://github.com/dmlc/web-data/blob/main/mxnet/tutorials/onednn/quantization_2_0/sample_net.png?raw=true)\n\n\nThe simplest way to explain what fusion is and how it works is to present an example. On the image above is shown a sequence of popular operations taken from ResNet architecture. This type of architecture is built with many similar block called residual blocks. Some possible fusion patterns are:\n\n- Conv2D + BatchNorm => Fusing BatchNorm with Convolution can be performed by modifing weights and bias of Convolution - this way BatchNorm is completely contained within Convolution which makes BatchNorm zero time operation. Only cost of fusing is time needed to prepare weights and bias in Convolution based on BatchNorm parameters.\n- Conv2D + ReLU => this type of fusion is very popular also with other layers (e.g. FullyConnected + Activation). It is very simple idea where before writing data to output, activation is performed on that data. Main benefit of this fusion is that, there is no need to read and write back data in other layer only to perform simple activation function. \n- Conv2D + Add => even simpler idea than the previous one - instead of overwriting output memory, results are added to the output memory. In the simplest terms: `out_mem = conv_result` is replaced by `out_mem += conv_result`\n\nAbove examples are presented as atomic ones, but often they can be combined together, thus two patterns can be fused in above example:\n- Conv2D + BatchNorm + ReLU\n- Conv2D + BatchNorm + Add + ReLU\n\nAfter fusing all patterns, computational graph will be changed to the following one:\n![fused_fp32_model](https://github.com/dmlc/web-data/blob/main/mxnet/tutorials/onednn/quantization_2_0/fused_f32.png?raw=true)\n\n\n\n### Operator fusion in MXNet\nSince the version 1.6 of MXNet built with oneDNN support, operator fusion had been enabled by default if executing model with Module API, however in version 2.0 it has been decided to remove setting this feature by environment flag and replace it by aware user API call.\n\nTo fuse model in MXNet 2.0 there are two requirements:\n- the model must be defined as a subclass of HybridBlock or Symbol,\n- the model must have specific operator patterns which can be fused.\n\nAs an example we define example network (sample block from ResNet architecture):\n\n```\nimport mxnet as mx\nfrom mxnet.gluon import nn\n\nclass SampleBlock(nn.HybridBlock):\n def __init__(self):\n super(SampleBlock, self).__init__()\n self.conv1 = nn.Conv2D(channels=64, kernel_size=3, strides=1, padding=1,\n use_bias=False, in_channels=64)\n self.bn1 = nn.BatchNorm()\n self.conv2 = nn.Conv2D(channels=64, kernel_size=3, strides=1, padding=1,\n use_bias=False, in_channels=64)\n self.bn2 = nn.BatchNorm()\n\n def forward(self, x):\n out = mx.npx.activation(self.bn1(self.conv1(x)), 'relu')\n out = self.bn2(self.conv2(out))\n out = mx.npx.activation(out + x, 'relu')\n return out\n \nnet = SampleBlock()\nnet.initialize()\n\ndata = mx.np.zeros(shape=(1,64,224,224))\n# run fusion\nnet.optimize_for(data, backend='ONEDNN')\n\n# We can check fusion by plotting current symbol of our optimized network\nsym, _ = net.export(None)\ngraph = mx.viz.plot_network(sym, save_format='jpg')\ngraph.view()\n```\nBoth HybridBlock and Symbol classes provide API to easily run fusion of operators. Single line of code is enabling fusion passes on model:\n```\nnet.optimize_for(data, backend='ONEDNN')\n```\n\n*optimize_for* function is available also as Symbol class method. Example call to this API is shown below. Notice that Symbol\u2019s *optimize_for* method is not done in-place, so assigning it to a new variable is required:\n\n```\noptimized_symbol = sym.optimize_for(backend='ONEDNN')\n```\n\nFor the above model definition in a naive benchmark with artificial data, we can gain up to 1.75x speedup without any accuracy loss on our testing machine with Intel(R) Core(TM) i9-9940X.\n\n\n## Quantization\nAs mentioned in the introduction, precision reduction is another very popular method of improving performance of workloads and, what is important, in most cases is combined together with operator fusion which improves performance even more. In training precision reduction utilizes 16 bit data types like bfloat or float16, but for inference great results can be achieved using int8. \n\nModel quantization helps on both memory-bound and compute-bound operations. In quantized model IO operations are reduced as int8 data type is 4x smaller than float32, and also computational throughput is increased as more data can be SIMD'ed. On modern Intel architectures using int8 data type can bring even more speedup by utilizing special VNNI instruction set. \n\n![before_quant](https://github.com/dmlc/web-data/blob/main/mxnet/tutorials/onednn/quantization_2_0/before_quant.png?raw=true)\n\nFirstly quantization performs operator fusion on floating-point model as mentioned in paragraph earlier. Next, all operators which support int8 data type are marked as quantized and if needed additional operators are injected into graph surrounding quantizable operator - the goal of this additional operators is to quantize, dequantize or requantize data to keep data type between operators compatible.\n \n![quant_not_calib](https://github.com/dmlc/web-data/blob/main/mxnet/tutorials/onednn/quantization_2_0/quant_not_calib.png?raw=true)\n\n\nAfter injection step it is important to perform calibration of the model, however this step is optional. Quantizing without calibration is not recommended in terms of performance. It will result in calculating data minimum and maximum in quantize and requantize nodes during each inference pass. Calibrating a model greatly improves performance as minimum and maximum values are collected offline and are saved inside node - this way there is no need to search for these values during inference pass. \n\n![quant_calib](https://github.com/dmlc/web-data/blob/main/mxnet/tutorials/onednn/quantization_2_0/quant_calib.png?raw=true)\n\n\nCurrently, there are three supported calibration methods:\n- naive \u2014 min/max values from the calibration run,\n- entropy \u2014 uses KL divergence to determine the best symmetrical quantization thresholds for a given histogram of values.\n- custom \u2014 uses user-defined CalibrationCollector to control the calibration process.\n\nLast operation in quantization flow is to perform additional operator fusion. Second fusion is about merging requantize and dequantize operators into preceding node - oneDNN kernels can perform needed scaling before writing result to output which results in model execution speed-up. Notice that last Convolution does not need minimum and maximum values as it is not requantizing int32 to int8, but dequantizing directly to float32 and scale is calculated basing on minimum and maximum of input and weights.", - "repo_full_name": "apache/mxnet", - "discussion_comments": [ - { - "comment_id": "796581572", - "repo_full_name": "apache/mxnet", - "pr_number": 20856, - "pr_file": "docs/python_docs/python/tutorials/performance/backend/dnnl/dnnl_quantization.md", - "discussion_id": "796581572", - "commented_code": "@@ -0,0 +1,304 @@\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+## Introduction\n+\n+After successful model building and achieving desired accuracy on the test data, often the next step is to optimize inference to deploy the model to production. One of the key features of usable model is to have as small latency as possible to be able to provide services to large number of customers at the same time. In addition to customer satisfaction, with well optimized model, hardware load is reduced which also reduces energy costs needed to perform inference.\n+\n+Two main types of software optimizations can be characerized as:\n+- memory-bound optimizations - main objective of these optimizations is to reduce memory operations (reads and writes) - it is done by e.g. chaining sequence of operations which can be performed one after another immediately (example: ReLU activation)\n+- compute-bound optimizations - these optimizations are mainly done on operations which require large number of CPU cycles to complete, like FullyConnected and Convolution - one of the methods to speedup compute-bound operations is to lower computation precision - this type of optimization is called quantization\n+\n+In version 2.0 of the Apache MXNet (incubating) GluonAPI2.0 replaced Symbolic API known from versions 1.x, thus there are some differences between API to perform graph fusion and quantization.\n+\n+## Operator Fusion\n+\n+Models are often represented as directed graph of operations (represented by nodes) and data flow (representad as edges). This way of visualizing helps a lot when searching for common patterns in whole model which can be optimized by fusion. Example:\n+![base_model](https://github.com/dmlc/web-data/blob/main/mxnet/tutorials/onednn/quantization_2_0/sample_net.png?raw=true)\n+\n+\n+The simplest way to explain what fusion is and how it works is to present an example. On the image above is shown a sequence of popular operations taken from ResNet architecture. This type of architecture is built with many similar block called residual blocks. Some possible fusion patterns are:\n+\n+- Conv2D + BatchNorm => Fusing BatchNorm with Convolution can be performed by modifing weights and bias of Convolution - this way BatchNorm is completely contained within Convolution which makes BatchNorm zero time operation. Only cost of fusing is time needed to prepare weights and bias in Convolution based on BatchNorm parameters.\n+- Conv2D + ReLU => this type of fusion is very popular also with other layers (e.g. FullyConnected + Activation). It is very simple idea where before writing data to output, activation is performed on that data. Main benefit of this fusion is that, there is no need to read and write back data in other layer only to perform simple activation function. \n+- Conv2D + Add => even simpler idea than the previous one - instead of overwriting output memory, results are added to the output memory. In the simplest terms: `out_mem = conv_result` is replaced by `out_mem += conv_result`\n+\n+Above examples are presented as atomic ones, but often they can be combined together, thus two patterns can be fused in above example:\n+- Conv2D + BatchNorm + ReLU\n+- Conv2D + BatchNorm + Add + ReLU\n+\n+After fusing all patterns, computational graph will be changed to the following one:\n+![fused_fp32_model](https://github.com/dmlc/web-data/blob/main/mxnet/tutorials/onednn/quantization_2_0/fused_f32.png?raw=true)\n+\n+\n+\n+### Operator fusion in MXNet\n+Since the version 1.6 of MXNet built with oneDNN support, operator fusion had been enabled by default if executing model with Module API, however in version 2.0 it has been decided to remove setting this feature by environment flag and replace it by aware user API call.\n+\n+To fuse model in MXNet 2.0 there are two requirements:\n+- the model must be defined as a subclass of HybridBlock or Symbol,\n+- the model must have specific operator patterns which can be fused.\n+\n+As an example we define example network (sample block from ResNet architecture):\n+\n+```\n+import mxnet as mx\n+from mxnet.gluon import nn\n+\n+class SampleBlock(nn.HybridBlock):\n+ def __init__(self):\n+ super(SampleBlock, self).__init__()\n+ self.conv1 = nn.Conv2D(channels=64, kernel_size=3, strides=1, padding=1,\n+ use_bias=False, in_channels=64)\n+ self.bn1 = nn.BatchNorm()\n+ self.conv2 = nn.Conv2D(channels=64, kernel_size=3, strides=1, padding=1,\n+ use_bias=False, in_channels=64)\n+ self.bn2 = nn.BatchNorm()\n+\n+ def forward(self, x):\n+ out = mx.npx.activation(self.bn1(self.conv1(x)), 'relu')\n+ out = self.bn2(self.conv2(out))\n+ out = mx.npx.activation(out + x, 'relu')\n+ return out\n+ \n+net = SampleBlock()\n+net.initialize()\n+\n+data = mx.np.zeros(shape=(1,64,224,224))\n+# run fusion\n+net.optimize_for(data, backend='ONEDNN')\n+\n+# We can check fusion by plotting current symbol of our optimized network\n+sym, _ = net.export(None)\n+graph = mx.viz.plot_network(sym, save_format='jpg')\n+graph.view()\n+```\n+Both HybridBlock and Symbol classes provide API to easily run fusion of operators. Single line of code is enabling fusion passes on model:\n+```\n+net.optimize_for(data, backend='ONEDNN')\n+```\n+\n+*optimize_for* function is available also as Symbol class method. Example call to this API is shown below. Notice that Symbol\u2019s *optimize_for* method is not done in-place, so assigning it to a new variable is required:\n+\n+```\n+optimized_symbol = sym.optimize_for(backend='ONEDNN')\n+```\n+\n+For the above model definition in a naive benchmark with artificial data, we can gain up to 1.75x speedup without any accuracy loss on our testing machine with Intel(R) Core(TM) i9-9940X.\n+\n+\n+## Quantization\n+As mentioned in the introduction, precision reduction is another very popular method of improving performance of workloads and, what is important, in most cases is combined together with operator fusion which improves performance even more. In training precision reduction utilizes 16 bit data types like bfloat or float16, but for inference great results can be achieved using int8. \n+\n+Model quantization helps on both memory-bound and compute-bound operations. In quantized model IO operations are reduced as int8 data type is 4x smaller than float32, and also computational throughput is increased as more data can be SIMD'ed. On modern Intel architectures using int8 data type can bring even more speedup by utilizing special VNNI instruction set. \n+\n+![before_quant](https://github.com/dmlc/web-data/blob/main/mxnet/tutorials/onednn/quantization_2_0/before_quant.png?raw=true)\n+\n+Firstly quantization performs operator fusion on floating-point model as mentioned in paragraph earlier. Next, all operators which support int8 data type are marked as quantized and if needed additional operators are injected into graph surrounding quantizable operator - the goal of this additional operators is to quantize, dequantize or requantize data to keep data type between operators compatible.\n+ \n+![quant_not_calib](https://github.com/dmlc/web-data/blob/main/mxnet/tutorials/onednn/quantization_2_0/quant_not_calib.png?raw=true)\n+\n+\n+After injection step it is important to perform calibration of the model, however this step is optional. Quantizing without calibration is not recommended in terms of performance. It will result in calculating data minimum and maximum in quantize and requantize nodes during each inference pass. Calibrating a model greatly improves performance as minimum and maximum values are collected offline and are saved inside node - this way there is no need to search for these values during inference pass. \n+\n+![quant_calib](https://github.com/dmlc/web-data/blob/main/mxnet/tutorials/onednn/quantization_2_0/quant_calib.png?raw=true)\n+\n+\n+Currently, there are three supported calibration methods:\n+- naive \u2014 min/max values from the calibration run,\n+- entropy \u2014 uses KL divergence to determine the best symmetrical quantization thresholds for a given histogram of values.\n+- custom \u2014 uses user-defined CalibrationCollector to control the calibration process.\n+\n+Last operation in quantization flow is to perform additional operator fusion. Second fusion is about merging requantize and dequantize operators into preceding node - oneDNN kernels can perform needed scaling before writing result to output which results in model execution speed-up. Notice that last Convolution does not need minimum and maximum values as it is not requantizing int32 to int8, but dequantizing directly to float32 and scale is calculated basing on minimum and maximum of input and weights.", - "comment_created_at": "2022-02-01T13:12:13+00:00", - "comment_author": "bartekkuncer", - "comment_body": "```suggestion\r\nLast stage of quantization flow is to perform additional operator fusion. Second fusion is about merging requantize and dequantize operators into preceding node - oneDNN kernels can perform needed scaling before writing result to output which results in model execution speed-up. Notice that last Convolution does not need minimum and maximum values as it is not requantizing int32 to int8, but dequantizing directly to float32 and scale is calculated basing on minimum and maximum of input and weights.\r\n```", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "800441603", - "pr_number": 20856, - "pr_file": "docs/python_docs/python/tutorials/performance/backend/dnnl/dnnl_quantization.md", - "created_at": "2022-02-07T09:05:28+00:00", - "commented_code": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n## Introduction\n\nAfter successful model building and achieving desired accuracy on the test data, often the next step is to optimize inference to deploy the model to production. One of the key features of usable model is to have as small latency as possible to be able to provide services to large number of customers at the same time. In addition to customer satisfaction, with well optimized model, hardware load is reduced which also reduces energy costs needed to perform inference.\n\nTwo main types of software optimizations can be characerized as:\n- memory-bound optimizations - main objective of these optimizations is to reduce memory operations (reads and writes) - it is done by e.g. chaining operations which can be performed one after another immediately, where input of every subsequent operation is the output of the previous one (example: ReLU activation after convolution)\n- compute-bound optimizations - these optimizations are mainly done on operations which require large number of CPU cycles to complete, like FullyConnected and Convolution - one of the methods to speedup compute-bound operations is to lower computation precision - this type of optimization is called quantization", - "repo_full_name": "apache/mxnet", - "discussion_comments": [ - { - "comment_id": "800441603", - "repo_full_name": "apache/mxnet", - "pr_number": 20856, - "pr_file": "docs/python_docs/python/tutorials/performance/backend/dnnl/dnnl_quantization.md", - "discussion_id": "800441603", - "commented_code": "@@ -0,0 +1,304 @@\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+## Introduction\n+\n+After successful model building and achieving desired accuracy on the test data, often the next step is to optimize inference to deploy the model to production. One of the key features of usable model is to have as small latency as possible to be able to provide services to large number of customers at the same time. In addition to customer satisfaction, with well optimized model, hardware load is reduced which also reduces energy costs needed to perform inference.\n+\n+Two main types of software optimizations can be characerized as:\n+- memory-bound optimizations - main objective of these optimizations is to reduce memory operations (reads and writes) - it is done by e.g. chaining operations which can be performed one after another immediately, where input of every subsequent operation is the output of the previous one (example: ReLU activation after convolution)\n+- compute-bound optimizations - these optimizations are mainly done on operations which require large number of CPU cycles to complete, like FullyConnected and Convolution - one of the methods to speedup compute-bound operations is to lower computation precision - this type of optimization is called quantization", - "comment_created_at": "2022-02-07T09:05:28+00:00", - "comment_author": "bartekkuncer", - "comment_body": "```suggestion\r\n- compute-bound optimizations - these optimizations are mainly made on operations which require large number of CPU cycles to complete, like FullyConnected and Convolution. One of the methods to speedup compute-bound operations is to lower computation precision - this type of optimization is called quantization.\r\n```", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "800492034", - "pr_number": 20856, - "pr_file": "docs/python_docs/python/tutorials/performance/backend/dnnl/dnnl_quantization.md", - "created_at": "2022-02-07T10:03:03+00:00", - "commented_code": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n## Introduction\n\nAfter successful model building and achieving desired accuracy on the test data, often the next step is to optimize inference to deploy the model to production. One of the key features of usable model is to have as small latency as possible to be able to provide services to large number of customers at the same time. In addition to customer satisfaction, with well optimized model, hardware load is reduced which also reduces energy costs needed to perform inference.\n\nTwo main types of software optimizations can be characerized as:\n- memory-bound optimizations - main objective of these optimizations is to reduce memory operations (reads and writes) - it is done by e.g. chaining operations which can be performed one after another immediately, where input of every subsequent operation is the output of the previous one (example: ReLU activation after convolution)\n- compute-bound optimizations - these optimizations are mainly done on operations which require large number of CPU cycles to complete, like FullyConnected and Convolution - one of the methods to speedup compute-bound operations is to lower computation precision - this type of optimization is called quantization\n\nIn version 2.0 of the Apache MXNet (incubating) GluonAPI2.0 replaced Symbolic API known from versions 1.x, thus there are some differences between API to perform graph fusion and quantization.\n\n## Operator Fusion\n\nModels are often represented as a directed graph of operations (represented by nodes) and data flow (represented as edges). This way of visualizing helps a lot when searching for common patterns in whole model which can be optimized by fusion. Example:\n![base_model](https://github.com/dmlc/web-data/blob/main/mxnet/tutorials/onednn/quantization_2_0/sample_net.png?raw=true)\n\n\nThe simplest way to explain what fusion is and how it works is to present an example. Image above depicts a sequence of popular operations taken from ResNet architecture. This type of architecture is built with many similar blocks called residual blocks. Some possible fusion patterns are:\n\n- Conv2D + BatchNorm => Fusing BatchNorm with Convolution can be performed by modifing weights and bias of Convolution - this way BatchNorm is completely contained within Convolution which makes BatchNorm zero time operation. Only cost of fusing is time needed to prepare weights and bias in Convolution based on BatchNorm parameters.\n- Conv2D + ReLU => this type of fusion is very popular also with other layers (e.g. FullyConnected + Activation). It is very simple idea where before writing data to output, activation is performed on that data. Main benefit of this fusion is that, there is no need to read and write back data in other layer only to perform simple activation function. \n- Conv2D + Add => even simpler idea than the previous ones - instead of overwriting output memory, results are added to the output memory. In the simplest terms: `out_mem = conv_result` is replaced by `out_mem += conv_result`.\n\nAbove examples are presented as atomic ones, but often they can be combined together, thus two patterns that can be fused in above example are:\n- Conv2D + BatchNorm + ReLU\n- Conv2D + BatchNorm + Add + ReLU\n\nAfter fusing all patterns, computational graph will be changed to the following one:\n![fused_fp32_model](https://github.com/dmlc/web-data/blob/main/mxnet/tutorials/onednn/quantization_2_0/fused_f32.png?raw=true)\n\n\n\n### Operator fusion in MXNet\nSince the version 1.6 of MXNet built with oneDNN support, operator fusion had been enabled by default for executing model with Module API, however in version 2.0 it has been decided to remove setting this feature by environment flag and replace it by aware user API call.\n\nTo fuse model in MXNet 2.0 there are two requirements:\n- the model must be defined as a subclass of HybridBlock or Symbol,\n- the model must have specific operator patterns which can be fused.\n\nAs an example we define example network (sample block from ResNet architecture):\n\n```\nimport mxnet as mx\nfrom mxnet.gluon import nn\n\nclass SampleBlock(nn.HybridBlock):\n def __init__(self):\n super(SampleBlock, self).__init__()\n self.conv1 = nn.Conv2D(channels=64, kernel_size=3, strides=1, padding=1,\n use_bias=False, in_channels=64)\n self.bn1 = nn.BatchNorm()\n self.conv2 = nn.Conv2D(channels=64, kernel_size=3, strides=1, padding=1,\n use_bias=False, in_channels=64)\n self.bn2 = nn.BatchNorm()\n\n def forward(self, x):\n out = mx.npx.activation(self.bn1(self.conv1(x)), 'relu')\n out = self.bn2(self.conv2(out))\n out = mx.npx.activation(out + x, 'relu')\n return out\n \nnet = SampleBlock()\nnet.initialize()\n\ndata = mx.np.zeros(shape=(1,64,224,224))\n# run fusion\nnet.optimize_for(data, backend='ONEDNN')\n\n# We can check fusion by plotting current symbol of our optimized network\nsym, _ = net.export(None)\ngraph = mx.viz.plot_network(sym, save_format='jpg')\ngraph.view()\n```\nBoth HybridBlock and Symbol classes provide API to easily run fusion of operators. Single line of code is enabling fusion passes on model:\n```\nnet.optimize_for(data, backend='ONEDNN')\n```\n\n*optimize_for* function is available also as Symbol class method. Example call to this API is shown below. Notice that Symbol\u2019s *optimize_for* method is not done in-place, so assigning it to a new variable is required:\n\n```\noptimized_symbol = sym.optimize_for(backend='ONEDNN')\n```\n\nFor the above model definition in a naive benchmark with artificial data, we can gain up to 1.75x speedup without any accuracy loss on our testing machine with Intel(R) Core(TM) i9-9940X.\n\n\n## Quantization\nAs mentioned in the introduction, precision reduction is another very popular method of improving performance of workloads and, what is important, in most cases is combined together with operator fusion which improves performance even more. In training precision reduction utilizes 16 bit data types like bfloat or float16, but for inference great results can be achieved using int8. \n\nModel quantization helps on both memory-bound and compute-bound operations. In quantized model IO operations are reduced as int8 data type is 4x smaller than float32, and also computational throughput is increased as more data can be SIMD'ed. On modern Intel architectures using int8 data type can bring even more speedup by utilizing special VNNI instruction set. \n\n![before_quant](https://github.com/dmlc/web-data/blob/main/mxnet/tutorials/onednn/quantization_2_0/before_quant.png?raw=true)\n\nFirstly quantization performs operator fusion on floating-point model as mentioned in paragraph earlier. Next, all operators which support int8 data type are marked as quantized and if needed additional operators are injected into graph surrounding quantizable operator - the goal of this additional operators is to quantize, dequantize or requantize data to keep data type between operators compatible.\n \n![quant_not_calib](https://github.com/dmlc/web-data/blob/main/mxnet/tutorials/onednn/quantization_2_0/quant_not_calib.png?raw=true)\n\n\nAfter injection step it is important to perform calibration of the model, however this step is optional. Quantizing without calibration is not recommended in terms of performance. It will result in calculating data minimum and maximum in quantize and requantize nodes during each inference pass. Calibrating a model greatly improves performance as minimum and maximum values are collected offline and are saved inside node - this way there is no need to search for these values during inference pass. \n\n![quant_calib](https://github.com/dmlc/web-data/blob/main/mxnet/tutorials/onednn/quantization_2_0/quant_calib.png?raw=true)\n\n\nCurrently, there are three supported calibration methods:\n- naive \u2014 min/max values from the calibration run,\n- entropy \u2014 uses KL divergence to determine the best symmetrical quantization thresholds for a given histogram of values,\n- custom \u2014 uses user-defined CalibrationCollector to control the calibration process.\n\nLast stage of quantization flow is to perform additional operator fusion. Second fusion is about merging requantize and dequantize operators into preceding node - oneDNN kernels can perform needed scaling before writing result to output which results in model execution speed-up. Notice that last Convolution does not need minimum and maximum values as it is not requantizing int32 to int8, but dequantizing directly to float32 and scale is calculated basing on minimum and maximum of input and weights.\n\n![quant_calib_fused](https://github.com/dmlc/web-data/blob/main/mxnet/tutorials/onednn/quantization_2_0/quant_calib_fused.png?raw=true)\n\nIn MXNet 2.0, the quantization procedure has been adjusted to work well with Gluon models since it is the main API now. The goal was to allow the user to quantize fp32 HybridBlock model in just a few lines of code.\n\n### Quantization in MXNet\n\nAs an example of a quantization procedure, pretrained *resnet50_v1* from *model_zoo.vision* package can be used. To get it simply run the following code:\n\n```\nimport mxnet as mx\nfrom mxnet.gluon.model_zoo.vision import resnet50_v1\n\nnet = resnet50_v1(pretrained=True)\n```\n\nNow, to get a ready-to-deploy quantized model two steps are required:\n1. Prepare data loader with calibration data - this data will be used as input to the network. All necessary layers will be observed with layer collector to calculate minimum and maximum value of that layer. This flow is internal mechanism and all what user needs to do is to provide data loader.\n2. Call `quantize_net` function from `contrib.quantize` package - both operator fusion calls will be called inside this API.\n\n```\ncalib_data_loader = mx.gluon.data.DataLoader(dummy_data, batch_size=batch_size)\nqnet = quantize_net(net, calib_mode='naive', calib_data=calib_data_loader)\n```\n\nFollowing function, which calculates total inference time on the model with an artificial data, can be used to compare the performance:\n\n```\ndef benchmark_net(net, batch_size=32, batches=100, warmup_batches=5):\n import time\n data = mx.np.random.uniform(-1.0, 1.0, (batch_size, 3, 224, 224))\n \n for i in range(batches + warmup_batches):\n if i == warmup_batches:\n tic = time.time()\n out = net(data)\n out.wait_to_read()\n \n total_time = time.time()\u200a-\u200atic\n return total_time\n```\n\n\nComparing fused float32 network to quantized network on CLX8280 shows 4.29x speedup - measurment was done on 28 cores and this machine utilizes VNNI instruction set.\n\n\nThe other aspect of lowering the precision of a model is a difference in its accuracy. We will check that on previously tested resnet50_v1 with ImageNet dataset. To run this example you will need ImageNet dataset prepared with this tutorial and stored in path_to_imagenet. Let\u2019s compare top1 and top5 accuracy of standard fp32 model with quantized int8 model calibrated using naive and entropy calibration mode. We will use only 10 batches of the validation dataset to calibrate quantized model.\n\n```\nimport mxnet as mx\nfrom mxnet.gluon.model_zoo.vision import resnet50_v1\nfrom mxnet.gluon.data.vision import transforms\nfrom mxnet.contrib.quantization import quantize_net\n\ndef test_accuracy(net, data_loader):\n acc_top1 = mx.gluon.metric.Accuracy()\n acc_top5 = mx.gluon.metric.TopKAccuracy(5)\n \n for x, label in data_loader:\n output = net(x)\n acc_top1.update(label, output)\n acc_top5.update(label, output)\n\n _, top1 = acc_top1.get()\n _, top5 = acc_top5.get()\n\n return top1, top5\n \nrgb_mean = (0.485, 0.456, 0.406)\nrgb_std = (0.229, 0.224, 0.225)\nbatch_size = 64\n \ndataset = mx.gluon.data.vision.ImageRecordDataset('path_to_imagenet/val.rec')\ntransformer = transforms.Compose([transforms.Resize(256),\n transforms.CenterCrop(224),\n transforms.ToTensor(),\n transforms.Normalize(mean=rgb_mean, std=rgb_std)])\nval_data = mx.gluon.data.DataLoader(dataset.transform_first(transformer), batch_size, shuffle=True)\n\nnet = resnet50_v1(pretrained=True)\nnet.hybridize(backend='ONEDNN', static_alloc=True, static_shape=True)\n\ntop1, top5 = test_accuracy(net, val_data)\nprint('FP32 Top1 Accuracy: {} Top5 Accuracy: {}'.format(top1, top5))\n\nqnet = quantize_net(net, calib_mode='naive', calib_data=val_data, num_calib_batches=10)\nqnet.hybridize(static_alloc=True, static_shape=True)\ntop1, top5 = test_accuracy(qnet, val_data)\nprint('INT8Naive Top1 Accuracy: {} Top5 Accuracy: {}'.format(top1, top5))\n\nqnet = quantize_net(net, calib_mode='entropy', calib_data=val_data, num_calib_batches=10)\nqnet.hybridize(static_alloc=True, static_shape=True)\ntop1, top5 = test_accuracy(qnet, val_data)\nprint('INT8Entropy Top1 Accuracy: {} Top5 Accuracy: {}'.format(top1, top5))\n```\n\n#### Output:\n> FP32 Top1 Accuracy: 0.76364 Top5 Accuracy: 0.93094\n> INT8Naive Top1 Accuracy: 0.76028 Top5 Accuracy: 0.92796\n> INT8Entropy Top1 Accuracy: 0.76404 Top5 Accuracy: 0.93042\n\nWith quantized model there is tiny accuracy drop, however this is cost of great performance optimization and memory footprint. The difference between calibration method is dependent on the model itself, used activation layers and the size of calibration data.", - "repo_full_name": "apache/mxnet", - "discussion_comments": [ - { - "comment_id": "800492034", - "repo_full_name": "apache/mxnet", - "pr_number": 20856, - "pr_file": "docs/python_docs/python/tutorials/performance/backend/dnnl/dnnl_quantization.md", - "discussion_id": "800492034", - "commented_code": "@@ -0,0 +1,304 @@\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+## Introduction\n+\n+After successful model building and achieving desired accuracy on the test data, often the next step is to optimize inference to deploy the model to production. One of the key features of usable model is to have as small latency as possible to be able to provide services to large number of customers at the same time. In addition to customer satisfaction, with well optimized model, hardware load is reduced which also reduces energy costs needed to perform inference.\n+\n+Two main types of software optimizations can be characerized as:\n+- memory-bound optimizations - main objective of these optimizations is to reduce memory operations (reads and writes) - it is done by e.g. chaining operations which can be performed one after another immediately, where input of every subsequent operation is the output of the previous one (example: ReLU activation after convolution)\n+- compute-bound optimizations - these optimizations are mainly done on operations which require large number of CPU cycles to complete, like FullyConnected and Convolution - one of the methods to speedup compute-bound operations is to lower computation precision - this type of optimization is called quantization\n+\n+In version 2.0 of the Apache MXNet (incubating) GluonAPI2.0 replaced Symbolic API known from versions 1.x, thus there are some differences between API to perform graph fusion and quantization.\n+\n+## Operator Fusion\n+\n+Models are often represented as a directed graph of operations (represented by nodes) and data flow (represented as edges). This way of visualizing helps a lot when searching for common patterns in whole model which can be optimized by fusion. Example:\n+![base_model](https://github.com/dmlc/web-data/blob/main/mxnet/tutorials/onednn/quantization_2_0/sample_net.png?raw=true)\n+\n+\n+The simplest way to explain what fusion is and how it works is to present an example. Image above depicts a sequence of popular operations taken from ResNet architecture. This type of architecture is built with many similar blocks called residual blocks. Some possible fusion patterns are:\n+\n+- Conv2D + BatchNorm => Fusing BatchNorm with Convolution can be performed by modifing weights and bias of Convolution - this way BatchNorm is completely contained within Convolution which makes BatchNorm zero time operation. Only cost of fusing is time needed to prepare weights and bias in Convolution based on BatchNorm parameters.\n+- Conv2D + ReLU => this type of fusion is very popular also with other layers (e.g. FullyConnected + Activation). It is very simple idea where before writing data to output, activation is performed on that data. Main benefit of this fusion is that, there is no need to read and write back data in other layer only to perform simple activation function. \n+- Conv2D + Add => even simpler idea than the previous ones - instead of overwriting output memory, results are added to the output memory. In the simplest terms: `out_mem = conv_result` is replaced by `out_mem += conv_result`.\n+\n+Above examples are presented as atomic ones, but often they can be combined together, thus two patterns that can be fused in above example are:\n+- Conv2D + BatchNorm + ReLU\n+- Conv2D + BatchNorm + Add + ReLU\n+\n+After fusing all patterns, computational graph will be changed to the following one:\n+![fused_fp32_model](https://github.com/dmlc/web-data/blob/main/mxnet/tutorials/onednn/quantization_2_0/fused_f32.png?raw=true)\n+\n+\n+\n+### Operator fusion in MXNet\n+Since the version 1.6 of MXNet built with oneDNN support, operator fusion had been enabled by default for executing model with Module API, however in version 2.0 it has been decided to remove setting this feature by environment flag and replace it by aware user API call.\n+\n+To fuse model in MXNet 2.0 there are two requirements:\n+- the model must be defined as a subclass of HybridBlock or Symbol,\n+- the model must have specific operator patterns which can be fused.\n+\n+As an example we define example network (sample block from ResNet architecture):\n+\n+```\n+import mxnet as mx\n+from mxnet.gluon import nn\n+\n+class SampleBlock(nn.HybridBlock):\n+ def __init__(self):\n+ super(SampleBlock, self).__init__()\n+ self.conv1 = nn.Conv2D(channels=64, kernel_size=3, strides=1, padding=1,\n+ use_bias=False, in_channels=64)\n+ self.bn1 = nn.BatchNorm()\n+ self.conv2 = nn.Conv2D(channels=64, kernel_size=3, strides=1, padding=1,\n+ use_bias=False, in_channels=64)\n+ self.bn2 = nn.BatchNorm()\n+\n+ def forward(self, x):\n+ out = mx.npx.activation(self.bn1(self.conv1(x)), 'relu')\n+ out = self.bn2(self.conv2(out))\n+ out = mx.npx.activation(out + x, 'relu')\n+ return out\n+ \n+net = SampleBlock()\n+net.initialize()\n+\n+data = mx.np.zeros(shape=(1,64,224,224))\n+# run fusion\n+net.optimize_for(data, backend='ONEDNN')\n+\n+# We can check fusion by plotting current symbol of our optimized network\n+sym, _ = net.export(None)\n+graph = mx.viz.plot_network(sym, save_format='jpg')\n+graph.view()\n+```\n+Both HybridBlock and Symbol classes provide API to easily run fusion of operators. Single line of code is enabling fusion passes on model:\n+```\n+net.optimize_for(data, backend='ONEDNN')\n+```\n+\n+*optimize_for* function is available also as Symbol class method. Example call to this API is shown below. Notice that Symbol\u2019s *optimize_for* method is not done in-place, so assigning it to a new variable is required:\n+\n+```\n+optimized_symbol = sym.optimize_for(backend='ONEDNN')\n+```\n+\n+For the above model definition in a naive benchmark with artificial data, we can gain up to 1.75x speedup without any accuracy loss on our testing machine with Intel(R) Core(TM) i9-9940X.\n+\n+\n+## Quantization\n+As mentioned in the introduction, precision reduction is another very popular method of improving performance of workloads and, what is important, in most cases is combined together with operator fusion which improves performance even more. In training precision reduction utilizes 16 bit data types like bfloat or float16, but for inference great results can be achieved using int8. \n+\n+Model quantization helps on both memory-bound and compute-bound operations. In quantized model IO operations are reduced as int8 data type is 4x smaller than float32, and also computational throughput is increased as more data can be SIMD'ed. On modern Intel architectures using int8 data type can bring even more speedup by utilizing special VNNI instruction set. \n+\n+![before_quant](https://github.com/dmlc/web-data/blob/main/mxnet/tutorials/onednn/quantization_2_0/before_quant.png?raw=true)\n+\n+Firstly quantization performs operator fusion on floating-point model as mentioned in paragraph earlier. Next, all operators which support int8 data type are marked as quantized and if needed additional operators are injected into graph surrounding quantizable operator - the goal of this additional operators is to quantize, dequantize or requantize data to keep data type between operators compatible.\n+ \n+![quant_not_calib](https://github.com/dmlc/web-data/blob/main/mxnet/tutorials/onednn/quantization_2_0/quant_not_calib.png?raw=true)\n+\n+\n+After injection step it is important to perform calibration of the model, however this step is optional. Quantizing without calibration is not recommended in terms of performance. It will result in calculating data minimum and maximum in quantize and requantize nodes during each inference pass. Calibrating a model greatly improves performance as minimum and maximum values are collected offline and are saved inside node - this way there is no need to search for these values during inference pass. \n+\n+![quant_calib](https://github.com/dmlc/web-data/blob/main/mxnet/tutorials/onednn/quantization_2_0/quant_calib.png?raw=true)\n+\n+\n+Currently, there are three supported calibration methods:\n+- naive \u2014 min/max values from the calibration run,\n+- entropy \u2014 uses KL divergence to determine the best symmetrical quantization thresholds for a given histogram of values,\n+- custom \u2014 uses user-defined CalibrationCollector to control the calibration process.\n+\n+Last stage of quantization flow is to perform additional operator fusion. Second fusion is about merging requantize and dequantize operators into preceding node - oneDNN kernels can perform needed scaling before writing result to output which results in model execution speed-up. Notice that last Convolution does not need minimum and maximum values as it is not requantizing int32 to int8, but dequantizing directly to float32 and scale is calculated basing on minimum and maximum of input and weights.\n+\n+![quant_calib_fused](https://github.com/dmlc/web-data/blob/main/mxnet/tutorials/onednn/quantization_2_0/quant_calib_fused.png?raw=true)\n+\n+In MXNet 2.0, the quantization procedure has been adjusted to work well with Gluon models since it is the main API now. The goal was to allow the user to quantize fp32 HybridBlock model in just a few lines of code.\n+\n+### Quantization in MXNet\n+\n+As an example of a quantization procedure, pretrained *resnet50_v1* from *model_zoo.vision* package can be used. To get it simply run the following code:\n+\n+```\n+import mxnet as mx\n+from mxnet.gluon.model_zoo.vision import resnet50_v1\n+\n+net = resnet50_v1(pretrained=True)\n+```\n+\n+Now, to get a ready-to-deploy quantized model two steps are required:\n+1. Prepare data loader with calibration data - this data will be used as input to the network. All necessary layers will be observed with layer collector to calculate minimum and maximum value of that layer. This flow is internal mechanism and all what user needs to do is to provide data loader.\n+2. Call `quantize_net` function from `contrib.quantize` package - both operator fusion calls will be called inside this API.\n+\n+```\n+calib_data_loader = mx.gluon.data.DataLoader(dummy_data, batch_size=batch_size)\n+qnet = quantize_net(net, calib_mode='naive', calib_data=calib_data_loader)\n+```\n+\n+Following function, which calculates total inference time on the model with an artificial data, can be used to compare the performance:\n+\n+```\n+def benchmark_net(net, batch_size=32, batches=100, warmup_batches=5):\n+ import time\n+ data = mx.np.random.uniform(-1.0, 1.0, (batch_size, 3, 224, 224))\n+ \n+ for i in range(batches + warmup_batches):\n+ if i == warmup_batches:\n+ tic = time.time()\n+ out = net(data)\n+ out.wait_to_read()\n+ \n+ total_time = time.time()\u200a-\u200atic\n+ return total_time\n+```\n+\n+\n+Comparing fused float32 network to quantized network on CLX8280 shows 4.29x speedup - measurment was done on 28 cores and this machine utilizes VNNI instruction set.\n+\n+\n+The other aspect of lowering the precision of a model is a difference in its accuracy. We will check that on previously tested resnet50_v1 with ImageNet dataset. To run this example you will need ImageNet dataset prepared with this tutorial and stored in path_to_imagenet. Let\u2019s compare top1 and top5 accuracy of standard fp32 model with quantized int8 model calibrated using naive and entropy calibration mode. We will use only 10 batches of the validation dataset to calibrate quantized model.\n+\n+```\n+import mxnet as mx\n+from mxnet.gluon.model_zoo.vision import resnet50_v1\n+from mxnet.gluon.data.vision import transforms\n+from mxnet.contrib.quantization import quantize_net\n+\n+def test_accuracy(net, data_loader):\n+ acc_top1 = mx.gluon.metric.Accuracy()\n+ acc_top5 = mx.gluon.metric.TopKAccuracy(5)\n+ \n+ for x, label in data_loader:\n+ output = net(x)\n+ acc_top1.update(label, output)\n+ acc_top5.update(label, output)\n+\n+ _, top1 = acc_top1.get()\n+ _, top5 = acc_top5.get()\n+\n+ return top1, top5\n+ \n+rgb_mean = (0.485, 0.456, 0.406)\n+rgb_std = (0.229, 0.224, 0.225)\n+batch_size = 64\n+ \n+dataset = mx.gluon.data.vision.ImageRecordDataset('path_to_imagenet/val.rec')\n+transformer = transforms.Compose([transforms.Resize(256),\n+ transforms.CenterCrop(224),\n+ transforms.ToTensor(),\n+ transforms.Normalize(mean=rgb_mean, std=rgb_std)])\n+val_data = mx.gluon.data.DataLoader(dataset.transform_first(transformer), batch_size, shuffle=True)\n+\n+net = resnet50_v1(pretrained=True)\n+net.hybridize(backend='ONEDNN', static_alloc=True, static_shape=True)\n+\n+top1, top5 = test_accuracy(net, val_data)\n+print('FP32 Top1 Accuracy: {} Top5 Accuracy: {}'.format(top1, top5))\n+\n+qnet = quantize_net(net, calib_mode='naive', calib_data=val_data, num_calib_batches=10)\n+qnet.hybridize(static_alloc=True, static_shape=True)\n+top1, top5 = test_accuracy(qnet, val_data)\n+print('INT8Naive Top1 Accuracy: {} Top5 Accuracy: {}'.format(top1, top5))\n+\n+qnet = quantize_net(net, calib_mode='entropy', calib_data=val_data, num_calib_batches=10)\n+qnet.hybridize(static_alloc=True, static_shape=True)\n+top1, top5 = test_accuracy(qnet, val_data)\n+print('INT8Entropy Top1 Accuracy: {} Top5 Accuracy: {}'.format(top1, top5))\n+```\n+\n+#### Output:\n+> FP32 Top1 Accuracy: 0.76364 Top5 Accuracy: 0.93094\n+> INT8Naive Top1 Accuracy: 0.76028 Top5 Accuracy: 0.92796\n+> INT8Entropy Top1 Accuracy: 0.76404 Top5 Accuracy: 0.93042\n+\n+With quantized model there is tiny accuracy drop, however this is cost of great performance optimization and memory footprint. The difference between calibration method is dependent on the model itself, used activation layers and the size of calibration data.", - "comment_created_at": "2022-02-07T10:03:03+00:00", - "comment_author": "bartekkuncer", - "comment_body": "```suggestion\r\nWith quantized model there is tiny accuracy drop, however this is the cost of great performance optimization and memory footprint reduction. The difference between calibration methods is dependent on the model itself, used activation layers and the size of calibration data.\r\n```", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "800492895", - "pr_number": 20856, - "pr_file": "docs/python_docs/python/tutorials/performance/backend/dnnl/dnnl_quantization.md", - "created_at": "2022-02-07T10:04:07+00:00", - "commented_code": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n## Introduction\n\nAfter successful model building and achieving desired accuracy on the test data, often the next step is to optimize inference to deploy the model to production. One of the key features of usable model is to have as small latency as possible to be able to provide services to large number of customers at the same time. In addition to customer satisfaction, with well optimized model, hardware load is reduced which also reduces energy costs needed to perform inference.\n\nTwo main types of software optimizations can be characerized as:\n- memory-bound optimizations - main objective of these optimizations is to reduce memory operations (reads and writes) - it is done by e.g. chaining operations which can be performed one after another immediately, where input of every subsequent operation is the output of the previous one (example: ReLU activation after convolution)\n- compute-bound optimizations - these optimizations are mainly done on operations which require large number of CPU cycles to complete, like FullyConnected and Convolution - one of the methods to speedup compute-bound operations is to lower computation precision - this type of optimization is called quantization\n\nIn version 2.0 of the Apache MXNet (incubating) GluonAPI2.0 replaced Symbolic API known from versions 1.x, thus there are some differences between API to perform graph fusion and quantization.\n\n## Operator Fusion\n\nModels are often represented as a directed graph of operations (represented by nodes) and data flow (represented as edges). This way of visualizing helps a lot when searching for common patterns in whole model which can be optimized by fusion. Example:\n![base_model](https://github.com/dmlc/web-data/blob/main/mxnet/tutorials/onednn/quantization_2_0/sample_net.png?raw=true)\n\n\nThe simplest way to explain what fusion is and how it works is to present an example. Image above depicts a sequence of popular operations taken from ResNet architecture. This type of architecture is built with many similar blocks called residual blocks. Some possible fusion patterns are:\n\n- Conv2D + BatchNorm => Fusing BatchNorm with Convolution can be performed by modifing weights and bias of Convolution - this way BatchNorm is completely contained within Convolution which makes BatchNorm zero time operation. Only cost of fusing is time needed to prepare weights and bias in Convolution based on BatchNorm parameters.\n- Conv2D + ReLU => this type of fusion is very popular also with other layers (e.g. FullyConnected + Activation). It is very simple idea where before writing data to output, activation is performed on that data. Main benefit of this fusion is that, there is no need to read and write back data in other layer only to perform simple activation function. \n- Conv2D + Add => even simpler idea than the previous ones - instead of overwriting output memory, results are added to the output memory. In the simplest terms: `out_mem = conv_result` is replaced by `out_mem += conv_result`.\n\nAbove examples are presented as atomic ones, but often they can be combined together, thus two patterns that can be fused in above example are:\n- Conv2D + BatchNorm + ReLU\n- Conv2D + BatchNorm + Add + ReLU\n\nAfter fusing all patterns, computational graph will be changed to the following one:\n![fused_fp32_model](https://github.com/dmlc/web-data/blob/main/mxnet/tutorials/onednn/quantization_2_0/fused_f32.png?raw=true)\n\n\n\n### Operator fusion in MXNet\nSince the version 1.6 of MXNet built with oneDNN support, operator fusion had been enabled by default for executing model with Module API, however in version 2.0 it has been decided to remove setting this feature by environment flag and replace it by aware user API call.\n\nTo fuse model in MXNet 2.0 there are two requirements:\n- the model must be defined as a subclass of HybridBlock or Symbol,\n- the model must have specific operator patterns which can be fused.\n\nAs an example we define example network (sample block from ResNet architecture):\n\n```\nimport mxnet as mx\nfrom mxnet.gluon import nn\n\nclass SampleBlock(nn.HybridBlock):\n def __init__(self):\n super(SampleBlock, self).__init__()\n self.conv1 = nn.Conv2D(channels=64, kernel_size=3, strides=1, padding=1,\n use_bias=False, in_channels=64)\n self.bn1 = nn.BatchNorm()\n self.conv2 = nn.Conv2D(channels=64, kernel_size=3, strides=1, padding=1,\n use_bias=False, in_channels=64)\n self.bn2 = nn.BatchNorm()\n\n def forward(self, x):\n out = mx.npx.activation(self.bn1(self.conv1(x)), 'relu')\n out = self.bn2(self.conv2(out))\n out = mx.npx.activation(out + x, 'relu')\n return out\n \nnet = SampleBlock()\nnet.initialize()\n\ndata = mx.np.zeros(shape=(1,64,224,224))\n# run fusion\nnet.optimize_for(data, backend='ONEDNN')\n\n# We can check fusion by plotting current symbol of our optimized network\nsym, _ = net.export(None)\ngraph = mx.viz.plot_network(sym, save_format='jpg')\ngraph.view()\n```\nBoth HybridBlock and Symbol classes provide API to easily run fusion of operators. Single line of code is enabling fusion passes on model:\n```\nnet.optimize_for(data, backend='ONEDNN')\n```\n\n*optimize_for* function is available also as Symbol class method. Example call to this API is shown below. Notice that Symbol\u2019s *optimize_for* method is not done in-place, so assigning it to a new variable is required:\n\n```\noptimized_symbol = sym.optimize_for(backend='ONEDNN')\n```\n\nFor the above model definition in a naive benchmark with artificial data, we can gain up to 1.75x speedup without any accuracy loss on our testing machine with Intel(R) Core(TM) i9-9940X.\n\n\n## Quantization\nAs mentioned in the introduction, precision reduction is another very popular method of improving performance of workloads and, what is important, in most cases is combined together with operator fusion which improves performance even more. In training precision reduction utilizes 16 bit data types like bfloat or float16, but for inference great results can be achieved using int8. \n\nModel quantization helps on both memory-bound and compute-bound operations. In quantized model IO operations are reduced as int8 data type is 4x smaller than float32, and also computational throughput is increased as more data can be SIMD'ed. On modern Intel architectures using int8 data type can bring even more speedup by utilizing special VNNI instruction set. \n\n![before_quant](https://github.com/dmlc/web-data/blob/main/mxnet/tutorials/onednn/quantization_2_0/before_quant.png?raw=true)\n\nFirstly quantization performs operator fusion on floating-point model as mentioned in paragraph earlier. Next, all operators which support int8 data type are marked as quantized and if needed additional operators are injected into graph surrounding quantizable operator - the goal of this additional operators is to quantize, dequantize or requantize data to keep data type between operators compatible.\n \n![quant_not_calib](https://github.com/dmlc/web-data/blob/main/mxnet/tutorials/onednn/quantization_2_0/quant_not_calib.png?raw=true)\n\n\nAfter injection step it is important to perform calibration of the model, however this step is optional. Quantizing without calibration is not recommended in terms of performance. It will result in calculating data minimum and maximum in quantize and requantize nodes during each inference pass. Calibrating a model greatly improves performance as minimum and maximum values are collected offline and are saved inside node - this way there is no need to search for these values during inference pass. \n\n![quant_calib](https://github.com/dmlc/web-data/blob/main/mxnet/tutorials/onednn/quantization_2_0/quant_calib.png?raw=true)\n\n\nCurrently, there are three supported calibration methods:\n- naive \u2014 min/max values from the calibration run,\n- entropy \u2014 uses KL divergence to determine the best symmetrical quantization thresholds for a given histogram of values,\n- custom \u2014 uses user-defined CalibrationCollector to control the calibration process.\n\nLast stage of quantization flow is to perform additional operator fusion. Second fusion is about merging requantize and dequantize operators into preceding node - oneDNN kernels can perform needed scaling before writing result to output which results in model execution speed-up. Notice that last Convolution does not need minimum and maximum values as it is not requantizing int32 to int8, but dequantizing directly to float32 and scale is calculated basing on minimum and maximum of input and weights.\n\n![quant_calib_fused](https://github.com/dmlc/web-data/blob/main/mxnet/tutorials/onednn/quantization_2_0/quant_calib_fused.png?raw=true)\n\nIn MXNet 2.0, the quantization procedure has been adjusted to work well with Gluon models since it is the main API now. The goal was to allow the user to quantize fp32 HybridBlock model in just a few lines of code.\n\n### Quantization in MXNet\n\nAs an example of a quantization procedure, pretrained *resnet50_v1* from *model_zoo.vision* package can be used. To get it simply run the following code:\n\n```\nimport mxnet as mx\nfrom mxnet.gluon.model_zoo.vision import resnet50_v1\n\nnet = resnet50_v1(pretrained=True)\n```\n\nNow, to get a ready-to-deploy quantized model two steps are required:\n1. Prepare data loader with calibration data - this data will be used as input to the network. All necessary layers will be observed with layer collector to calculate minimum and maximum value of that layer. This flow is internal mechanism and all what user needs to do is to provide data loader.\n2. Call `quantize_net` function from `contrib.quantize` package - both operator fusion calls will be called inside this API.\n\n```\ncalib_data_loader = mx.gluon.data.DataLoader(dummy_data, batch_size=batch_size)\nqnet = quantize_net(net, calib_mode='naive', calib_data=calib_data_loader)\n```\n\nFollowing function, which calculates total inference time on the model with an artificial data, can be used to compare the performance:\n\n```\ndef benchmark_net(net, batch_size=32, batches=100, warmup_batches=5):\n import time\n data = mx.np.random.uniform(-1.0, 1.0, (batch_size, 3, 224, 224))\n \n for i in range(batches + warmup_batches):\n if i == warmup_batches:\n tic = time.time()\n out = net(data)\n out.wait_to_read()\n \n total_time = time.time()\u200a-\u200atic\n return total_time\n```\n\n\nComparing fused float32 network to quantized network on CLX8280 shows 4.29x speedup - measurment was done on 28 cores and this machine utilizes VNNI instruction set.\n\n\nThe other aspect of lowering the precision of a model is a difference in its accuracy. We will check that on previously tested resnet50_v1 with ImageNet dataset. To run this example you will need ImageNet dataset prepared with this tutorial and stored in path_to_imagenet. Let\u2019s compare top1 and top5 accuracy of standard fp32 model with quantized int8 model calibrated using naive and entropy calibration mode. We will use only 10 batches of the validation dataset to calibrate quantized model.\n\n```\nimport mxnet as mx\nfrom mxnet.gluon.model_zoo.vision import resnet50_v1\nfrom mxnet.gluon.data.vision import transforms\nfrom mxnet.contrib.quantization import quantize_net\n\ndef test_accuracy(net, data_loader):\n acc_top1 = mx.gluon.metric.Accuracy()\n acc_top5 = mx.gluon.metric.TopKAccuracy(5)\n \n for x, label in data_loader:\n output = net(x)\n acc_top1.update(label, output)\n acc_top5.update(label, output)\n\n _, top1 = acc_top1.get()\n _, top5 = acc_top5.get()\n\n return top1, top5\n \nrgb_mean = (0.485, 0.456, 0.406)\nrgb_std = (0.229, 0.224, 0.225)\nbatch_size = 64\n \ndataset = mx.gluon.data.vision.ImageRecordDataset('path_to_imagenet/val.rec')\ntransformer = transforms.Compose([transforms.Resize(256),\n transforms.CenterCrop(224),\n transforms.ToTensor(),\n transforms.Normalize(mean=rgb_mean, std=rgb_std)])\nval_data = mx.gluon.data.DataLoader(dataset.transform_first(transformer), batch_size, shuffle=True)\n\nnet = resnet50_v1(pretrained=True)\nnet.hybridize(backend='ONEDNN', static_alloc=True, static_shape=True)\n\ntop1, top5 = test_accuracy(net, val_data)\nprint('FP32 Top1 Accuracy: {} Top5 Accuracy: {}'.format(top1, top5))\n\nqnet = quantize_net(net, calib_mode='naive', calib_data=val_data, num_calib_batches=10)\nqnet.hybridize(static_alloc=True, static_shape=True)\ntop1, top5 = test_accuracy(qnet, val_data)\nprint('INT8Naive Top1 Accuracy: {} Top5 Accuracy: {}'.format(top1, top5))\n\nqnet = quantize_net(net, calib_mode='entropy', calib_data=val_data, num_calib_batches=10)\nqnet.hybridize(static_alloc=True, static_shape=True)\ntop1, top5 = test_accuracy(qnet, val_data)\nprint('INT8Entropy Top1 Accuracy: {} Top5 Accuracy: {}'.format(top1, top5))\n```\n\n#### Output:\n> FP32 Top1 Accuracy: 0.76364 Top5 Accuracy: 0.93094\n> INT8Naive Top1 Accuracy: 0.76028 Top5 Accuracy: 0.92796\n> INT8Entropy Top1 Accuracy: 0.76404 Top5 Accuracy: 0.93042\n\nWith quantized model there is tiny accuracy drop, however this is cost of great performance optimization and memory footprint. The difference between calibration method is dependent on the model itself, used activation layers and the size of calibration data.\n\n### Custom layer collectors and calibrating the model\nIn MXNet 2.0 new interface for creating custom calibration collector has been added. Main goal of this interface is to give the user as much flexibility as possible in almost every step of quantization. Creating own layer collector is pretty easy, however computing effective min/max values can be not a trivial task. \n\nLayer collectors are responsible for collecting statistics of each node in the graph \u2014 it means that the input/output data of every operator executed can be observed. Collector utilize the register_op_hook method of HybridBlock class.", - "repo_full_name": "apache/mxnet", - "discussion_comments": [ - { - "comment_id": "800492895", - "repo_full_name": "apache/mxnet", - "pr_number": 20856, - "pr_file": "docs/python_docs/python/tutorials/performance/backend/dnnl/dnnl_quantization.md", - "discussion_id": "800492895", - "commented_code": "@@ -0,0 +1,304 @@\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+## Introduction\n+\n+After successful model building and achieving desired accuracy on the test data, often the next step is to optimize inference to deploy the model to production. One of the key features of usable model is to have as small latency as possible to be able to provide services to large number of customers at the same time. In addition to customer satisfaction, with well optimized model, hardware load is reduced which also reduces energy costs needed to perform inference.\n+\n+Two main types of software optimizations can be characerized as:\n+- memory-bound optimizations - main objective of these optimizations is to reduce memory operations (reads and writes) - it is done by e.g. chaining operations which can be performed one after another immediately, where input of every subsequent operation is the output of the previous one (example: ReLU activation after convolution)\n+- compute-bound optimizations - these optimizations are mainly done on operations which require large number of CPU cycles to complete, like FullyConnected and Convolution - one of the methods to speedup compute-bound operations is to lower computation precision - this type of optimization is called quantization\n+\n+In version 2.0 of the Apache MXNet (incubating) GluonAPI2.0 replaced Symbolic API known from versions 1.x, thus there are some differences between API to perform graph fusion and quantization.\n+\n+## Operator Fusion\n+\n+Models are often represented as a directed graph of operations (represented by nodes) and data flow (represented as edges). This way of visualizing helps a lot when searching for common patterns in whole model which can be optimized by fusion. Example:\n+![base_model](https://github.com/dmlc/web-data/blob/main/mxnet/tutorials/onednn/quantization_2_0/sample_net.png?raw=true)\n+\n+\n+The simplest way to explain what fusion is and how it works is to present an example. Image above depicts a sequence of popular operations taken from ResNet architecture. This type of architecture is built with many similar blocks called residual blocks. Some possible fusion patterns are:\n+\n+- Conv2D + BatchNorm => Fusing BatchNorm with Convolution can be performed by modifing weights and bias of Convolution - this way BatchNorm is completely contained within Convolution which makes BatchNorm zero time operation. Only cost of fusing is time needed to prepare weights and bias in Convolution based on BatchNorm parameters.\n+- Conv2D + ReLU => this type of fusion is very popular also with other layers (e.g. FullyConnected + Activation). It is very simple idea where before writing data to output, activation is performed on that data. Main benefit of this fusion is that, there is no need to read and write back data in other layer only to perform simple activation function. \n+- Conv2D + Add => even simpler idea than the previous ones - instead of overwriting output memory, results are added to the output memory. In the simplest terms: `out_mem = conv_result` is replaced by `out_mem += conv_result`.\n+\n+Above examples are presented as atomic ones, but often they can be combined together, thus two patterns that can be fused in above example are:\n+- Conv2D + BatchNorm + ReLU\n+- Conv2D + BatchNorm + Add + ReLU\n+\n+After fusing all patterns, computational graph will be changed to the following one:\n+![fused_fp32_model](https://github.com/dmlc/web-data/blob/main/mxnet/tutorials/onednn/quantization_2_0/fused_f32.png?raw=true)\n+\n+\n+\n+### Operator fusion in MXNet\n+Since the version 1.6 of MXNet built with oneDNN support, operator fusion had been enabled by default for executing model with Module API, however in version 2.0 it has been decided to remove setting this feature by environment flag and replace it by aware user API call.\n+\n+To fuse model in MXNet 2.0 there are two requirements:\n+- the model must be defined as a subclass of HybridBlock or Symbol,\n+- the model must have specific operator patterns which can be fused.\n+\n+As an example we define example network (sample block from ResNet architecture):\n+\n+```\n+import mxnet as mx\n+from mxnet.gluon import nn\n+\n+class SampleBlock(nn.HybridBlock):\n+ def __init__(self):\n+ super(SampleBlock, self).__init__()\n+ self.conv1 = nn.Conv2D(channels=64, kernel_size=3, strides=1, padding=1,\n+ use_bias=False, in_channels=64)\n+ self.bn1 = nn.BatchNorm()\n+ self.conv2 = nn.Conv2D(channels=64, kernel_size=3, strides=1, padding=1,\n+ use_bias=False, in_channels=64)\n+ self.bn2 = nn.BatchNorm()\n+\n+ def forward(self, x):\n+ out = mx.npx.activation(self.bn1(self.conv1(x)), 'relu')\n+ out = self.bn2(self.conv2(out))\n+ out = mx.npx.activation(out + x, 'relu')\n+ return out\n+ \n+net = SampleBlock()\n+net.initialize()\n+\n+data = mx.np.zeros(shape=(1,64,224,224))\n+# run fusion\n+net.optimize_for(data, backend='ONEDNN')\n+\n+# We can check fusion by plotting current symbol of our optimized network\n+sym, _ = net.export(None)\n+graph = mx.viz.plot_network(sym, save_format='jpg')\n+graph.view()\n+```\n+Both HybridBlock and Symbol classes provide API to easily run fusion of operators. Single line of code is enabling fusion passes on model:\n+```\n+net.optimize_for(data, backend='ONEDNN')\n+```\n+\n+*optimize_for* function is available also as Symbol class method. Example call to this API is shown below. Notice that Symbol\u2019s *optimize_for* method is not done in-place, so assigning it to a new variable is required:\n+\n+```\n+optimized_symbol = sym.optimize_for(backend='ONEDNN')\n+```\n+\n+For the above model definition in a naive benchmark with artificial data, we can gain up to 1.75x speedup without any accuracy loss on our testing machine with Intel(R) Core(TM) i9-9940X.\n+\n+\n+## Quantization\n+As mentioned in the introduction, precision reduction is another very popular method of improving performance of workloads and, what is important, in most cases is combined together with operator fusion which improves performance even more. In training precision reduction utilizes 16 bit data types like bfloat or float16, but for inference great results can be achieved using int8. \n+\n+Model quantization helps on both memory-bound and compute-bound operations. In quantized model IO operations are reduced as int8 data type is 4x smaller than float32, and also computational throughput is increased as more data can be SIMD'ed. On modern Intel architectures using int8 data type can bring even more speedup by utilizing special VNNI instruction set. \n+\n+![before_quant](https://github.com/dmlc/web-data/blob/main/mxnet/tutorials/onednn/quantization_2_0/before_quant.png?raw=true)\n+\n+Firstly quantization performs operator fusion on floating-point model as mentioned in paragraph earlier. Next, all operators which support int8 data type are marked as quantized and if needed additional operators are injected into graph surrounding quantizable operator - the goal of this additional operators is to quantize, dequantize or requantize data to keep data type between operators compatible.\n+ \n+![quant_not_calib](https://github.com/dmlc/web-data/blob/main/mxnet/tutorials/onednn/quantization_2_0/quant_not_calib.png?raw=true)\n+\n+\n+After injection step it is important to perform calibration of the model, however this step is optional. Quantizing without calibration is not recommended in terms of performance. It will result in calculating data minimum and maximum in quantize and requantize nodes during each inference pass. Calibrating a model greatly improves performance as minimum and maximum values are collected offline and are saved inside node - this way there is no need to search for these values during inference pass. \n+\n+![quant_calib](https://github.com/dmlc/web-data/blob/main/mxnet/tutorials/onednn/quantization_2_0/quant_calib.png?raw=true)\n+\n+\n+Currently, there are three supported calibration methods:\n+- naive \u2014 min/max values from the calibration run,\n+- entropy \u2014 uses KL divergence to determine the best symmetrical quantization thresholds for a given histogram of values,\n+- custom \u2014 uses user-defined CalibrationCollector to control the calibration process.\n+\n+Last stage of quantization flow is to perform additional operator fusion. Second fusion is about merging requantize and dequantize operators into preceding node - oneDNN kernels can perform needed scaling before writing result to output which results in model execution speed-up. Notice that last Convolution does not need minimum and maximum values as it is not requantizing int32 to int8, but dequantizing directly to float32 and scale is calculated basing on minimum and maximum of input and weights.\n+\n+![quant_calib_fused](https://github.com/dmlc/web-data/blob/main/mxnet/tutorials/onednn/quantization_2_0/quant_calib_fused.png?raw=true)\n+\n+In MXNet 2.0, the quantization procedure has been adjusted to work well with Gluon models since it is the main API now. The goal was to allow the user to quantize fp32 HybridBlock model in just a few lines of code.\n+\n+### Quantization in MXNet\n+\n+As an example of a quantization procedure, pretrained *resnet50_v1* from *model_zoo.vision* package can be used. To get it simply run the following code:\n+\n+```\n+import mxnet as mx\n+from mxnet.gluon.model_zoo.vision import resnet50_v1\n+\n+net = resnet50_v1(pretrained=True)\n+```\n+\n+Now, to get a ready-to-deploy quantized model two steps are required:\n+1. Prepare data loader with calibration data - this data will be used as input to the network. All necessary layers will be observed with layer collector to calculate minimum and maximum value of that layer. This flow is internal mechanism and all what user needs to do is to provide data loader.\n+2. Call `quantize_net` function from `contrib.quantize` package - both operator fusion calls will be called inside this API.\n+\n+```\n+calib_data_loader = mx.gluon.data.DataLoader(dummy_data, batch_size=batch_size)\n+qnet = quantize_net(net, calib_mode='naive', calib_data=calib_data_loader)\n+```\n+\n+Following function, which calculates total inference time on the model with an artificial data, can be used to compare the performance:\n+\n+```\n+def benchmark_net(net, batch_size=32, batches=100, warmup_batches=5):\n+ import time\n+ data = mx.np.random.uniform(-1.0, 1.0, (batch_size, 3, 224, 224))\n+ \n+ for i in range(batches + warmup_batches):\n+ if i == warmup_batches:\n+ tic = time.time()\n+ out = net(data)\n+ out.wait_to_read()\n+ \n+ total_time = time.time()\u200a-\u200atic\n+ return total_time\n+```\n+\n+\n+Comparing fused float32 network to quantized network on CLX8280 shows 4.29x speedup - measurment was done on 28 cores and this machine utilizes VNNI instruction set.\n+\n+\n+The other aspect of lowering the precision of a model is a difference in its accuracy. We will check that on previously tested resnet50_v1 with ImageNet dataset. To run this example you will need ImageNet dataset prepared with this tutorial and stored in path_to_imagenet. Let\u2019s compare top1 and top5 accuracy of standard fp32 model with quantized int8 model calibrated using naive and entropy calibration mode. We will use only 10 batches of the validation dataset to calibrate quantized model.\n+\n+```\n+import mxnet as mx\n+from mxnet.gluon.model_zoo.vision import resnet50_v1\n+from mxnet.gluon.data.vision import transforms\n+from mxnet.contrib.quantization import quantize_net\n+\n+def test_accuracy(net, data_loader):\n+ acc_top1 = mx.gluon.metric.Accuracy()\n+ acc_top5 = mx.gluon.metric.TopKAccuracy(5)\n+ \n+ for x, label in data_loader:\n+ output = net(x)\n+ acc_top1.update(label, output)\n+ acc_top5.update(label, output)\n+\n+ _, top1 = acc_top1.get()\n+ _, top5 = acc_top5.get()\n+\n+ return top1, top5\n+ \n+rgb_mean = (0.485, 0.456, 0.406)\n+rgb_std = (0.229, 0.224, 0.225)\n+batch_size = 64\n+ \n+dataset = mx.gluon.data.vision.ImageRecordDataset('path_to_imagenet/val.rec')\n+transformer = transforms.Compose([transforms.Resize(256),\n+ transforms.CenterCrop(224),\n+ transforms.ToTensor(),\n+ transforms.Normalize(mean=rgb_mean, std=rgb_std)])\n+val_data = mx.gluon.data.DataLoader(dataset.transform_first(transformer), batch_size, shuffle=True)\n+\n+net = resnet50_v1(pretrained=True)\n+net.hybridize(backend='ONEDNN', static_alloc=True, static_shape=True)\n+\n+top1, top5 = test_accuracy(net, val_data)\n+print('FP32 Top1 Accuracy: {} Top5 Accuracy: {}'.format(top1, top5))\n+\n+qnet = quantize_net(net, calib_mode='naive', calib_data=val_data, num_calib_batches=10)\n+qnet.hybridize(static_alloc=True, static_shape=True)\n+top1, top5 = test_accuracy(qnet, val_data)\n+print('INT8Naive Top1 Accuracy: {} Top5 Accuracy: {}'.format(top1, top5))\n+\n+qnet = quantize_net(net, calib_mode='entropy', calib_data=val_data, num_calib_batches=10)\n+qnet.hybridize(static_alloc=True, static_shape=True)\n+top1, top5 = test_accuracy(qnet, val_data)\n+print('INT8Entropy Top1 Accuracy: {} Top5 Accuracy: {}'.format(top1, top5))\n+```\n+\n+#### Output:\n+> FP32 Top1 Accuracy: 0.76364 Top5 Accuracy: 0.93094\n+> INT8Naive Top1 Accuracy: 0.76028 Top5 Accuracy: 0.92796\n+> INT8Entropy Top1 Accuracy: 0.76404 Top5 Accuracy: 0.93042\n+\n+With quantized model there is tiny accuracy drop, however this is cost of great performance optimization and memory footprint. The difference between calibration method is dependent on the model itself, used activation layers and the size of calibration data.\n+\n+### Custom layer collectors and calibrating the model\n+In MXNet 2.0 new interface for creating custom calibration collector has been added. Main goal of this interface is to give the user as much flexibility as possible in almost every step of quantization. Creating own layer collector is pretty easy, however computing effective min/max values can be not a trivial task. \n+\n+Layer collectors are responsible for collecting statistics of each node in the graph \u2014 it means that the input/output data of every operator executed can be observed. Collector utilize the register_op_hook method of HybridBlock class.", - "comment_created_at": "2022-02-07T10:04:07+00:00", - "comment_author": "bartekkuncer", - "comment_body": "```suggestion\r\nLayer collectors are responsible for collecting statistics of each node in the graph \u2014 it means that the input/output data of every operator executed can be observed. Collector utilizes the register_op_hook method of HybridBlock class.\r\n```", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "804509742", - "pr_number": 20856, - "pr_file": "docs/python_docs/python/tutorials/performance/backend/dnnl/dnnl_quantization.md", - "created_at": "2022-02-11T10:08:02+00:00", - "commented_code": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n## Introduction\n\nAfter successful model building and achieving desired accuracy on the test data, often the next step is to optimize inference to deploy the model to production. One of the key features of usable model is to have as small latency as possible to be able to provide services to large number of customers simultaneously. In addition to customer satisfaction, with well optimized model, hardware load is reduced which also reduces energy costs needed to perform inference.\n\nTwo main types of software optimizations can be characerized as:\n- memory-bound optimizations - main objective of these optimizations is to reduce the amount of memory operations (reads and writes) - it is done by e.g. chaining operations which can be performed one after another immediately, where input of every subsequent operation is the output of the previous one (example: ReLU activation after convolution),\n- compute-bound optimizations - these optimizations are mainly made on operations which require large number of CPU cycles to complete, like FullyConnected and Convolution. One of the methods to speedup compute-bound operations is to lower computation precision - this type of optimization is called quantization.\n\nIn version 2.0 of the Apache MXNet (incubating) GluonAPI2.0 replaced Symbolic API known from versions 1.x, thus there are some differences between API to perform graph fusion and quantization.\n\n## Operator Fusion\n\nModels are often represented as a directed graph of operations (represented by nodes) and data flow (represented as edges). This way of visualizing helps a lot when searching for common patterns in whole model which can be optimized by fusion. Example:\n![base_model](https://github.com/dmlc/web-data/blob/main/mxnet/tutorials/onednn/quantization_2_0/sample_net.png?raw=true)\n\n\nThe simplest way to explain what fusion is and how it works is to present an example. Image above depicts a sequence of popular operations taken from ResNet architecture. This type of architecture is built with many similar blocks called residual blocks. Some possible fusion patterns are:\n\n- Conv2D + BatchNorm => Fusing BatchNorm with Convolution can be performed by modifing weights and bias of Convolution - this way BatchNorm is completely contained within Convolution which makes BatchNorm zero time operation. Only cost of fusing is time needed to prepare weights and bias in Convolution based on BatchNorm parameters.\n- Conv2D + ReLU => this type of fusion is very popular also with other layers (e.g. FullyConnected + Activation). It is very simple idea where before writing data to output, activation is performed on that data. Main benefit of this fusion is that, there is no need to read and write back data in other layer only to perform simple activation function. \n- Conv2D + Add => even simpler idea than the previous ones - instead of overwriting output memory, results are added to the output memory. In the simplest terms: `out_mem = conv_result` is replaced by `out_mem += conv_result`.\n\nAbove examples are presented as atomic ones, but often they can be combined together, thus two patterns that can be fused in above example are:\n- Conv2D + BatchNorm + ReLU\n- Conv2D + BatchNorm + Add + ReLU\n\nAfter fusing all patterns, computational graph will be changed to the following one:\n![fused_fp32_model](https://github.com/dmlc/web-data/blob/main/mxnet/tutorials/onednn/quantization_2_0/fused_f32.png?raw=true)\n\n\n\n### Operator fusion in MXNet\nSince the version 1.6 of MXNet built with oneDNN support, operator fusion had been enabled by default for executing model with Module API, however in version 2.0 it has been decided to remove setting this feature by environment flag and replace it by aware user API call.\n\nTo fuse model in MXNet 2.0 there are two requirements:\n- the model must be defined as a subclass of HybridBlock or Symbol,\n- the model must have specific operator patterns which can be fused.\n\nAs an example we define example network (sample block from ResNet architecture):\n\n```\nimport mxnet as mx\nfrom mxnet.gluon import nn\n\nclass SampleBlock(nn.HybridBlock):\n def __init__(self):\n super(SampleBlock, self).__init__()\n self.conv1 = nn.Conv2D(channels=64, kernel_size=3, strides=1, padding=1,\n use_bias=False, in_channels=64)\n self.bn1 = nn.BatchNorm()\n self.conv2 = nn.Conv2D(channels=64, kernel_size=3, strides=1, padding=1,\n use_bias=False, in_channels=64)\n self.bn2 = nn.BatchNorm()\n\n def forward(self, x):\n out = mx.npx.activation(self.bn1(self.conv1(x)), 'relu')\n out = self.bn2(self.conv2(out))\n out = mx.npx.activation(out + x, 'relu')\n return out\n \nnet = SampleBlock()\nnet.initialize()\n\ndata = mx.np.zeros(shape=(1,64,224,224))\n# run fusion\nnet.optimize_for(data, backend='ONEDNN')\n\n# We can check fusion by plotting current symbol of our optimized network\nsym, _ = net.export(None)\ngraph = mx.viz.plot_network(sym, save_format='jpg')\ngraph.view()\n```\nBoth HybridBlock and Symbol classes provide API to easily run fusion of operators. Single line of code is enabling fusion passes on model:\n```\nnet.optimize_for(data, backend='ONEDNN')\n```\n\n*optimize_for* function is available also as Symbol class method. Example call to this API is shown below. Notice that Symbol\u2019s *optimize_for* method is not done in-place, so assigning it to a new variable is required:\n\n```\noptimized_symbol = sym.optimize_for(backend='ONEDNN')\n```\n\nFor the above model definition in a naive benchmark with artificial data, we can gain up to 1.75x speedup without any accuracy loss on our testing machine with Intel(R) Core(TM) i9-9940X.\n\n\n## Quantization\nAs mentioned in the introduction, precision reduction is another very popular method of improving performance of workloads and, what is important, in most cases is combined together with operator fusion which improves performance even more. In training precision reduction utilizes 16 bit data types like bfloat or float16, but for inference great results can be achieved using int8. \n\nModel quantization helps on both memory-bound and compute-bound operations. In quantized model IO operations are reduced as int8 data type is 4x smaller than float32, and also computational throughput is increased as more data can be SIMD'ed. On modern Intel architectures using int8 data type can bring even more speedup by utilizing special VNNI instruction set. \n\n![before_quant](https://github.com/dmlc/web-data/blob/main/mxnet/tutorials/onednn/quantization_2_0/before_quant.png?raw=true)\n\nFirstly quantization performs operator fusion on floating-point model as mentioned in paragraph earlier. Next, all operators which support int8 data type are marked as quantized and if needed additional operators are injected into graph surrounding quantizable operator - the goal of this additional operators is to quantize, dequantize or requantize data to keep data type between operators compatible.\n \n![quant_not_calib](https://github.com/dmlc/web-data/blob/main/mxnet/tutorials/onednn/quantization_2_0/quant_not_calib.png?raw=true)\n\n\nAfter injection step it is important to perform calibration of the model, however this step is optional. Quantizing without calibration is not recommended in terms of performance. It will result in calculating data minimum and maximum in quantize and requantize nodes during each inference pass. Calibrating a model greatly improves performance as minimum and maximum values are collected offline and are saved inside node - this way there is no need to search for these values during inference pass. \n\n![quant_calib](https://github.com/dmlc/web-data/blob/main/mxnet/tutorials/onednn/quantization_2_0/quant_calib.png?raw=true)\n\n\nCurrently, there are three supported calibration methods:\n- naive \u2014 min/max values from the calibration run,\n- entropy \u2014 uses KL divergence to determine the best symmetrical quantization thresholds for a given histogram of values,\n- custom \u2014 uses user-defined CalibrationCollector to control the calibration process.\n\nLast stage of quantization flow is to perform additional operator fusion. Second fusion is about merging requantize and dequantize operators into preceding node - oneDNN kernels can perform needed scaling before writing result to output which results in model execution speed-up. Notice that last Convolution does not need minimum and maximum values as it is not requantizing int32 to int8, but dequantizing directly to float32 and scale is calculated basing on minimum and maximum of input and weights.\n\n![quant_calib_fused](https://github.com/dmlc/web-data/blob/main/mxnet/tutorials/onednn/quantization_2_0/quant_calib_fused.png?raw=true)\n\nIn MXNet 2.0, the quantization procedure has been adjusted to work well with Gluon models since it is the main API now. The goal was to allow the user to quantize fp32 HybridBlock model in just a few lines of code.\n\n### Quantization in MXNet\n\nAs an example of a quantization procedure, pretrained *resnet50_v1* from *model_zoo.vision* package can be used. To get it simply run the following code:\n\n```\nimport mxnet as mx\nfrom mxnet.gluon.model_zoo.vision import resnet50_v1\n\nnet = resnet50_v1(pretrained=True)\n```\n\nNow, to get a ready-to-deploy quantized model two steps are required:\n1. Prepare data loader with calibration data - this data will be used as input to the network. All necessary layers will be observed with layer collector to calculate minimum and maximum value of that layer. This flow is internal mechanism and all what user needs to do is to provide data loader.\n2. Call `quantize_net` function from `contrib.quantize` package - both operator fusion calls will be called inside this API.\n\n```\ncalib_data_loader = mx.gluon.data.DataLoader(dummy_data, batch_size=batch_size)\nqnet = quantize_net(net, calib_mode='naive', calib_data=calib_data_loader)\n```\n\nFollowing function, which calculates total inference time on the model with an artificial data, can be used to compare the performance:\n\n```\ndef benchmark_net(net, batch_size=32, batches=100, warmup_batches=5):\n import time\n data = mx.np.random.uniform(-1.0, 1.0, (batch_size, 3, 224, 224))\n \n for i in range(batches + warmup_batches):\n if i == warmup_batches:\n tic = time.time()\n out = net(data)\n out.wait_to_read()\n \n total_time = time.time()\u200a-\u200atic\n return total_time\n```\n\n\nComparing fused float32 network to quantized network on CLX8280 shows 4.29x speedup - measurment was done on 28 cores and this machine utilizes VNNI instruction set.\n\n\nThe other aspect of lowering the precision of a model is a difference in its accuracy. We will check that on previously tested resnet50_v1 with ImageNet dataset. To run this example you will need ImageNet dataset prepared with this tutorial and stored in path_to_imagenet. Let\u2019s compare top1 and top5 accuracy of standard fp32 model with quantized int8 model calibrated using naive and entropy calibration mode. We will use only 10 batches of the validation dataset to calibrate quantized model.\n\n```\nimport mxnet as mx\nfrom mxnet.gluon.model_zoo.vision import resnet50_v1\nfrom mxnet.gluon.data.vision import transforms\nfrom mxnet.contrib.quantization import quantize_net\n\ndef test_accuracy(net, data_loader):\n acc_top1 = mx.gluon.metric.Accuracy()\n acc_top5 = mx.gluon.metric.TopKAccuracy(5)\n \n for x, label in data_loader:\n output = net(x)\n acc_top1.update(label, output)\n acc_top5.update(label, output)\n\n _, top1 = acc_top1.get()\n _, top5 = acc_top5.get()\n\n return top1, top5\n \nrgb_mean = (0.485, 0.456, 0.406)\nrgb_std = (0.229, 0.224, 0.225)\nbatch_size = 64\n \ndataset = mx.gluon.data.vision.ImageRecordDataset('path_to_imagenet/val.rec')\ntransformer = transforms.Compose([transforms.Resize(256),\n transforms.CenterCrop(224),\n transforms.ToTensor(),\n transforms.Normalize(mean=rgb_mean, std=rgb_std)])\nval_data = mx.gluon.data.DataLoader(dataset.transform_first(transformer), batch_size, shuffle=True)\n\nnet = resnet50_v1(pretrained=True)\nnet.hybridize(backend='ONEDNN', static_alloc=True, static_shape=True)\n\ntop1, top5 = test_accuracy(net, val_data)\nprint('FP32 Top1 Accuracy: {} Top5 Accuracy: {}'.format(top1, top5))\n\nqnet = quantize_net(net, calib_mode='naive', calib_data=val_data, num_calib_batches=10)\nqnet.hybridize(static_alloc=True, static_shape=True)\ntop1, top5 = test_accuracy(qnet, val_data)\nprint('INT8Naive Top1 Accuracy: {} Top5 Accuracy: {}'.format(top1, top5))\n\nqnet = quantize_net(net, calib_mode='entropy', calib_data=val_data, num_calib_batches=10)\nqnet.hybridize(static_alloc=True, static_shape=True)\ntop1, top5 = test_accuracy(qnet, val_data)\nprint('INT8Entropy Top1 Accuracy: {} Top5 Accuracy: {}'.format(top1, top5))\n```\n\n#### Output:\n> FP32 Top1 Accuracy: 0.76364 Top5 Accuracy: 0.93094\n> INT8Naive Top1 Accuracy: 0.76028 Top5 Accuracy: 0.92796\n> INT8Entropy Top1 Accuracy: 0.76404 Top5 Accuracy: 0.93042\n\nWith quantized model there is tiny accuracy drop, however this is the cost of great performance optimization and memory footprint reduction. The difference between calibration methods is dependent on the model itself, used activation layers and the size of calibration data.", - "repo_full_name": "apache/mxnet", - "discussion_comments": [ - { - "comment_id": "804509742", - "repo_full_name": "apache/mxnet", - "pr_number": 20856, - "pr_file": "docs/python_docs/python/tutorials/performance/backend/dnnl/dnnl_quantization.md", - "discussion_id": "804509742", - "commented_code": "@@ -0,0 +1,304 @@\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+## Introduction\n+\n+After successful model building and achieving desired accuracy on the test data, often the next step is to optimize inference to deploy the model to production. One of the key features of usable model is to have as small latency as possible to be able to provide services to large number of customers simultaneously. In addition to customer satisfaction, with well optimized model, hardware load is reduced which also reduces energy costs needed to perform inference.\n+\n+Two main types of software optimizations can be characerized as:\n+- memory-bound optimizations - main objective of these optimizations is to reduce the amount of memory operations (reads and writes) - it is done by e.g. chaining operations which can be performed one after another immediately, where input of every subsequent operation is the output of the previous one (example: ReLU activation after convolution),\n+- compute-bound optimizations - these optimizations are mainly made on operations which require large number of CPU cycles to complete, like FullyConnected and Convolution. One of the methods to speedup compute-bound operations is to lower computation precision - this type of optimization is called quantization.\n+\n+In version 2.0 of the Apache MXNet (incubating) GluonAPI2.0 replaced Symbolic API known from versions 1.x, thus there are some differences between API to perform graph fusion and quantization.\n+\n+## Operator Fusion\n+\n+Models are often represented as a directed graph of operations (represented by nodes) and data flow (represented as edges). This way of visualizing helps a lot when searching for common patterns in whole model which can be optimized by fusion. Example:\n+![base_model](https://github.com/dmlc/web-data/blob/main/mxnet/tutorials/onednn/quantization_2_0/sample_net.png?raw=true)\n+\n+\n+The simplest way to explain what fusion is and how it works is to present an example. Image above depicts a sequence of popular operations taken from ResNet architecture. This type of architecture is built with many similar blocks called residual blocks. Some possible fusion patterns are:\n+\n+- Conv2D + BatchNorm => Fusing BatchNorm with Convolution can be performed by modifing weights and bias of Convolution - this way BatchNorm is completely contained within Convolution which makes BatchNorm zero time operation. Only cost of fusing is time needed to prepare weights and bias in Convolution based on BatchNorm parameters.\n+- Conv2D + ReLU => this type of fusion is very popular also with other layers (e.g. FullyConnected + Activation). It is very simple idea where before writing data to output, activation is performed on that data. Main benefit of this fusion is that, there is no need to read and write back data in other layer only to perform simple activation function. \n+- Conv2D + Add => even simpler idea than the previous ones - instead of overwriting output memory, results are added to the output memory. In the simplest terms: `out_mem = conv_result` is replaced by `out_mem += conv_result`.\n+\n+Above examples are presented as atomic ones, but often they can be combined together, thus two patterns that can be fused in above example are:\n+- Conv2D + BatchNorm + ReLU\n+- Conv2D + BatchNorm + Add + ReLU\n+\n+After fusing all patterns, computational graph will be changed to the following one:\n+![fused_fp32_model](https://github.com/dmlc/web-data/blob/main/mxnet/tutorials/onednn/quantization_2_0/fused_f32.png?raw=true)\n+\n+\n+\n+### Operator fusion in MXNet\n+Since the version 1.6 of MXNet built with oneDNN support, operator fusion had been enabled by default for executing model with Module API, however in version 2.0 it has been decided to remove setting this feature by environment flag and replace it by aware user API call.\n+\n+To fuse model in MXNet 2.0 there are two requirements:\n+- the model must be defined as a subclass of HybridBlock or Symbol,\n+- the model must have specific operator patterns which can be fused.\n+\n+As an example we define example network (sample block from ResNet architecture):\n+\n+```\n+import mxnet as mx\n+from mxnet.gluon import nn\n+\n+class SampleBlock(nn.HybridBlock):\n+ def __init__(self):\n+ super(SampleBlock, self).__init__()\n+ self.conv1 = nn.Conv2D(channels=64, kernel_size=3, strides=1, padding=1,\n+ use_bias=False, in_channels=64)\n+ self.bn1 = nn.BatchNorm()\n+ self.conv2 = nn.Conv2D(channels=64, kernel_size=3, strides=1, padding=1,\n+ use_bias=False, in_channels=64)\n+ self.bn2 = nn.BatchNorm()\n+\n+ def forward(self, x):\n+ out = mx.npx.activation(self.bn1(self.conv1(x)), 'relu')\n+ out = self.bn2(self.conv2(out))\n+ out = mx.npx.activation(out + x, 'relu')\n+ return out\n+ \n+net = SampleBlock()\n+net.initialize()\n+\n+data = mx.np.zeros(shape=(1,64,224,224))\n+# run fusion\n+net.optimize_for(data, backend='ONEDNN')\n+\n+# We can check fusion by plotting current symbol of our optimized network\n+sym, _ = net.export(None)\n+graph = mx.viz.plot_network(sym, save_format='jpg')\n+graph.view()\n+```\n+Both HybridBlock and Symbol classes provide API to easily run fusion of operators. Single line of code is enabling fusion passes on model:\n+```\n+net.optimize_for(data, backend='ONEDNN')\n+```\n+\n+*optimize_for* function is available also as Symbol class method. Example call to this API is shown below. Notice that Symbol\u2019s *optimize_for* method is not done in-place, so assigning it to a new variable is required:\n+\n+```\n+optimized_symbol = sym.optimize_for(backend='ONEDNN')\n+```\n+\n+For the above model definition in a naive benchmark with artificial data, we can gain up to 1.75x speedup without any accuracy loss on our testing machine with Intel(R) Core(TM) i9-9940X.\n+\n+\n+## Quantization\n+As mentioned in the introduction, precision reduction is another very popular method of improving performance of workloads and, what is important, in most cases is combined together with operator fusion which improves performance even more. In training precision reduction utilizes 16 bit data types like bfloat or float16, but for inference great results can be achieved using int8. \n+\n+Model quantization helps on both memory-bound and compute-bound operations. In quantized model IO operations are reduced as int8 data type is 4x smaller than float32, and also computational throughput is increased as more data can be SIMD'ed. On modern Intel architectures using int8 data type can bring even more speedup by utilizing special VNNI instruction set. \n+\n+![before_quant](https://github.com/dmlc/web-data/blob/main/mxnet/tutorials/onednn/quantization_2_0/before_quant.png?raw=true)\n+\n+Firstly quantization performs operator fusion on floating-point model as mentioned in paragraph earlier. Next, all operators which support int8 data type are marked as quantized and if needed additional operators are injected into graph surrounding quantizable operator - the goal of this additional operators is to quantize, dequantize or requantize data to keep data type between operators compatible.\n+ \n+![quant_not_calib](https://github.com/dmlc/web-data/blob/main/mxnet/tutorials/onednn/quantization_2_0/quant_not_calib.png?raw=true)\n+\n+\n+After injection step it is important to perform calibration of the model, however this step is optional. Quantizing without calibration is not recommended in terms of performance. It will result in calculating data minimum and maximum in quantize and requantize nodes during each inference pass. Calibrating a model greatly improves performance as minimum and maximum values are collected offline and are saved inside node - this way there is no need to search for these values during inference pass. \n+\n+![quant_calib](https://github.com/dmlc/web-data/blob/main/mxnet/tutorials/onednn/quantization_2_0/quant_calib.png?raw=true)\n+\n+\n+Currently, there are three supported calibration methods:\n+- naive \u2014 min/max values from the calibration run,\n+- entropy \u2014 uses KL divergence to determine the best symmetrical quantization thresholds for a given histogram of values,\n+- custom \u2014 uses user-defined CalibrationCollector to control the calibration process.\n+\n+Last stage of quantization flow is to perform additional operator fusion. Second fusion is about merging requantize and dequantize operators into preceding node - oneDNN kernels can perform needed scaling before writing result to output which results in model execution speed-up. Notice that last Convolution does not need minimum and maximum values as it is not requantizing int32 to int8, but dequantizing directly to float32 and scale is calculated basing on minimum and maximum of input and weights.\n+\n+![quant_calib_fused](https://github.com/dmlc/web-data/blob/main/mxnet/tutorials/onednn/quantization_2_0/quant_calib_fused.png?raw=true)\n+\n+In MXNet 2.0, the quantization procedure has been adjusted to work well with Gluon models since it is the main API now. The goal was to allow the user to quantize fp32 HybridBlock model in just a few lines of code.\n+\n+### Quantization in MXNet\n+\n+As an example of a quantization procedure, pretrained *resnet50_v1* from *model_zoo.vision* package can be used. To get it simply run the following code:\n+\n+```\n+import mxnet as mx\n+from mxnet.gluon.model_zoo.vision import resnet50_v1\n+\n+net = resnet50_v1(pretrained=True)\n+```\n+\n+Now, to get a ready-to-deploy quantized model two steps are required:\n+1. Prepare data loader with calibration data - this data will be used as input to the network. All necessary layers will be observed with layer collector to calculate minimum and maximum value of that layer. This flow is internal mechanism and all what user needs to do is to provide data loader.\n+2. Call `quantize_net` function from `contrib.quantize` package - both operator fusion calls will be called inside this API.\n+\n+```\n+calib_data_loader = mx.gluon.data.DataLoader(dummy_data, batch_size=batch_size)\n+qnet = quantize_net(net, calib_mode='naive', calib_data=calib_data_loader)\n+```\n+\n+Following function, which calculates total inference time on the model with an artificial data, can be used to compare the performance:\n+\n+```\n+def benchmark_net(net, batch_size=32, batches=100, warmup_batches=5):\n+ import time\n+ data = mx.np.random.uniform(-1.0, 1.0, (batch_size, 3, 224, 224))\n+ \n+ for i in range(batches + warmup_batches):\n+ if i == warmup_batches:\n+ tic = time.time()\n+ out = net(data)\n+ out.wait_to_read()\n+ \n+ total_time = time.time()\u200a-\u200atic\n+ return total_time\n+```\n+\n+\n+Comparing fused float32 network to quantized network on CLX8280 shows 4.29x speedup - measurment was done on 28 cores and this machine utilizes VNNI instruction set.\n+\n+\n+The other aspect of lowering the precision of a model is a difference in its accuracy. We will check that on previously tested resnet50_v1 with ImageNet dataset. To run this example you will need ImageNet dataset prepared with this tutorial and stored in path_to_imagenet. Let\u2019s compare top1 and top5 accuracy of standard fp32 model with quantized int8 model calibrated using naive and entropy calibration mode. We will use only 10 batches of the validation dataset to calibrate quantized model.\n+\n+```\n+import mxnet as mx\n+from mxnet.gluon.model_zoo.vision import resnet50_v1\n+from mxnet.gluon.data.vision import transforms\n+from mxnet.contrib.quantization import quantize_net\n+\n+def test_accuracy(net, data_loader):\n+ acc_top1 = mx.gluon.metric.Accuracy()\n+ acc_top5 = mx.gluon.metric.TopKAccuracy(5)\n+ \n+ for x, label in data_loader:\n+ output = net(x)\n+ acc_top1.update(label, output)\n+ acc_top5.update(label, output)\n+\n+ _, top1 = acc_top1.get()\n+ _, top5 = acc_top5.get()\n+\n+ return top1, top5\n+ \n+rgb_mean = (0.485, 0.456, 0.406)\n+rgb_std = (0.229, 0.224, 0.225)\n+batch_size = 64\n+ \n+dataset = mx.gluon.data.vision.ImageRecordDataset('path_to_imagenet/val.rec')\n+transformer = transforms.Compose([transforms.Resize(256),\n+ transforms.CenterCrop(224),\n+ transforms.ToTensor(),\n+ transforms.Normalize(mean=rgb_mean, std=rgb_std)])\n+val_data = mx.gluon.data.DataLoader(dataset.transform_first(transformer), batch_size, shuffle=True)\n+\n+net = resnet50_v1(pretrained=True)\n+net.hybridize(backend='ONEDNN', static_alloc=True, static_shape=True)\n+\n+top1, top5 = test_accuracy(net, val_data)\n+print('FP32 Top1 Accuracy: {} Top5 Accuracy: {}'.format(top1, top5))\n+\n+qnet = quantize_net(net, calib_mode='naive', calib_data=val_data, num_calib_batches=10)\n+qnet.hybridize(static_alloc=True, static_shape=True)\n+top1, top5 = test_accuracy(qnet, val_data)\n+print('INT8Naive Top1 Accuracy: {} Top5 Accuracy: {}'.format(top1, top5))\n+\n+qnet = quantize_net(net, calib_mode='entropy', calib_data=val_data, num_calib_batches=10)\n+qnet.hybridize(static_alloc=True, static_shape=True)\n+top1, top5 = test_accuracy(qnet, val_data)\n+print('INT8Entropy Top1 Accuracy: {} Top5 Accuracy: {}'.format(top1, top5))\n+```\n+\n+#### Output:\n+> FP32 Top1 Accuracy: 0.76364 Top5 Accuracy: 0.93094\n+> INT8Naive Top1 Accuracy: 0.76028 Top5 Accuracy: 0.92796\n+> INT8Entropy Top1 Accuracy: 0.76404 Top5 Accuracy: 0.93042\n+\n+With quantized model there is tiny accuracy drop, however this is the cost of great performance optimization and memory footprint reduction. The difference between calibration methods is dependent on the model itself, used activation layers and the size of calibration data.", - "comment_created_at": "2022-02-11T10:08:02+00:00", - "comment_author": "bartekkuncer", - "comment_body": "```suggestion\r\nWith quantized model there is a tiny accuracy drop, however this is the cost of great performance optimization and memory footprint reduction. The difference between calibration methods is dependent on the model itself, used activation layers and the size of calibration data.\r\n```", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/mxnet-use-explicit-optional-types.json b/_reviewers/mxnet-use-explicit-optional-types.json new file mode 100644 index 0000000..7a3c969 --- /dev/null +++ b/_reviewers/mxnet-use-explicit-optional-types.json @@ -0,0 +1,140 @@ +[ + { + "discussion_id": "793919904", + "pr_number": 20852, + "pr_file": "perl-package/AI-MXNet/lib/AI/MXNet/Metric.pm", + "created_at": "2022-01-27T19:10:52+00:00", + "commented_code": "has 'name' => (is => 'rw', isa => 'Str');\nhas 'num' => (is => 'rw', isa => 'Int');\nhas 'num_inst' => (is => 'rw', isa => 'Maybe[Int|ArrayRef[Int]]');\nhas 'sum_metric' => (is => 'rw', isa => 'Maybe[Num|ArrayRef[Num]]');\nhas 'sum_metric' => (is => 'rw', isa => 'Maybe[Num|ArrayRef[Num]|PDL]');", + "repo_full_name": "apache/mxnet", + "discussion_comments": [ + { + "comment_id": "793919904", + "repo_full_name": "apache/mxnet", + "pr_number": 20852, + "pr_file": "perl-package/AI-MXNet/lib/AI/MXNet/Metric.pm", + "discussion_id": "793919904", + "commented_code": "@@ -79,7 +79,7 @@ use overload '\"\"' => sub {\n has 'name' => (is => 'rw', isa => 'Str');\n has 'num' => (is => 'rw', isa => 'Int');\n has 'num_inst' => (is => 'rw', isa => 'Maybe[Int|ArrayRef[Int]]');\n-has 'sum_metric' => (is => 'rw', isa => 'Maybe[Num|ArrayRef[Num]]');\n+has 'sum_metric' => (is => 'rw', isa => 'Maybe[Num|ArrayRef[Num]|PDL]');", + "comment_created_at": "2022-01-27T19:10:52+00:00", + "comment_author": "sergeykolychev", + "comment_body": "Why is this needed ? Is $pdl->sum (and other similar methods) produce pdl object now ? This change is a bit radical and will require additional tests to see of something that is not covered by the CI is not broken.", + "pr_file_module": null + }, + { + "comment_id": "793925590", + "repo_full_name": "apache/mxnet", + "pr_number": 20852, + "pr_file": "perl-package/AI-MXNet/lib/AI/MXNet/Metric.pm", + "discussion_id": "793919904", + "commented_code": "@@ -79,7 +79,7 @@ use overload '\"\"' => sub {\n has 'name' => (is => 'rw', isa => 'Str');\n has 'num' => (is => 'rw', isa => 'Int');\n has 'num_inst' => (is => 'rw', isa => 'Maybe[Int|ArrayRef[Int]]');\n-has 'sum_metric' => (is => 'rw', isa => 'Maybe[Num|ArrayRef[Num]]');\n+has 'sum_metric' => (is => 'rw', isa => 'Maybe[Num|ArrayRef[Num]|PDL]');", + "comment_created_at": "2022-01-27T19:18:39+00:00", + "comment_author": "zmughal", + "comment_body": "Yes, that is correct, they return a PDL ndarray (of zero rank) now (since PDL v2.056). This is to make sure that types such as complex numbers and BAD values can be correctly returned. To get a single Perl scalar out of a PDL, you can do `$pdl->sclr`. Though for many situations, this should work like a Perl scalar due to operator overloading.", + "pr_file_module": null + }, + { + "comment_id": "793930850", + "repo_full_name": "apache/mxnet", + "pr_number": 20852, + "pr_file": "perl-package/AI-MXNet/lib/AI/MXNet/Metric.pm", + "discussion_id": "793919904", + "commented_code": "@@ -79,7 +79,7 @@ use overload '\"\"' => sub {\n has 'name' => (is => 'rw', isa => 'Str');\n has 'num' => (is => 'rw', isa => 'Int');\n has 'num_inst' => (is => 'rw', isa => 'Maybe[Int|ArrayRef[Int]]');\n-has 'sum_metric' => (is => 'rw', isa => 'Maybe[Num|ArrayRef[Num]]');\n+has 'sum_metric' => (is => 'rw', isa => 'Maybe[Num|ArrayRef[Num]|PDL]');", + "comment_created_at": "2022-01-27T19:26:17+00:00", + "comment_author": "zmughal", + "comment_body": "What type of tests would you like to see?", + "pr_file_module": null + }, + { + "comment_id": "793936355", + "repo_full_name": "apache/mxnet", + "pr_number": 20852, + "pr_file": "perl-package/AI-MXNet/lib/AI/MXNet/Metric.pm", + "discussion_id": "793919904", + "commented_code": "@@ -79,7 +79,7 @@ use overload '\"\"' => sub {\n has 'name' => (is => 'rw', isa => 'Str');\n has 'num' => (is => 'rw', isa => 'Int');\n has 'num_inst' => (is => 'rw', isa => 'Maybe[Int|ArrayRef[Int]]');\n-has 'sum_metric' => (is => 'rw', isa => 'Maybe[Num|ArrayRef[Num]]');\n+has 'sum_metric' => (is => 'rw', isa => 'Maybe[Num|ArrayRef[Num]|PDL]');", + "comment_created_at": "2022-01-27T19:33:51+00:00", + "comment_author": "sergeykolychev", + "comment_body": "May I ask you locally run scripts from AI-MXNet/examples/ and see if they run and produce sane output ?", + "pr_file_module": null + }, + { + "comment_id": "794252536", + "repo_full_name": "apache/mxnet", + "pr_number": 20852, + "pr_file": "perl-package/AI-MXNet/lib/AI/MXNet/Metric.pm", + "discussion_id": "793919904", + "commented_code": "@@ -79,7 +79,7 @@ use overload '\"\"' => sub {\n has 'name' => (is => 'rw', isa => 'Str');\n has 'num' => (is => 'rw', isa => 'Int');\n has 'num_inst' => (is => 'rw', isa => 'Maybe[Int|ArrayRef[Int]]');\n-has 'sum_metric' => (is => 'rw', isa => 'Maybe[Num|ArrayRef[Num]]');\n+has 'sum_metric' => (is => 'rw', isa => 'Maybe[Num|ArrayRef[Num]|PDL]');", + "comment_created_at": "2022-01-28T07:15:57+00:00", + "comment_author": "zmughal", + "comment_body": "\r\n== examples/calculator.pl ==\r\n\r\n```\r\nfullyconnected4_bias -> [0.675975]\r\nfullyconnected4_weight -> \r\n[\r\n [ 1.00116 0.999985]\r\n]\r\n\r\nfullyconnected5_bias -> [4.85674e-05]\r\nfullyconnected5_weight -> \r\n[\r\n [0.000844207 0.000230625 0.508509]\r\n]\r\n\r\nfullyconnected6_bias -> [0.385759]\r\nfullyconnected6_weight -> \r\n[\r\n [ 1 -1]\r\n]\r\n\r\nfullyconnected7_bias -> [-2.62715e-06]\r\nfullyconnected7_weight -> \r\n[\r\n [-1.31198e-05 8.74287e-06 0.679938]\r\n]\r\n\r\n12345 + 54321 ≈ 66742.1796875\r\n188 - 88 ≈ 99.8591766357422\r\n250 * 2 ≈ 503.255798339844\r\n250 / 2 ≈ 124.998970031738\r\n```\r\n\r\n== examples/mnist.pl ==\r\n\r\n```\r\nEpoch[4] Batch [200]\tSpeed: 59678.68 samples/sec\tTrain-accuracy=0.974726\r\nEpoch[4] Batch [400]\tSpeed: 57716.06 samples/sec\tTrain-accuracy=0.973350\r\nEpoch[4] Train-accuracy=0.974233\r\nEpoch[4] Time cost=1.057\r\nEpoch[4] Validation-accuracy=0.967800\r\nEpoch[5] Batch [200]\tSpeed: 52405.81 samples/sec\tTrain-accuracy=0.977214\r\nEpoch[5] Batch [400]\tSpeed: 57804.15 samples/sec\tTrain-accuracy=0.976100\r\nEpoch[5] Train-accuracy=0.976950\r\nEpoch[5] Time cost=1.079\r\nEpoch[5] Validation-accuracy=0.966500\r\nEpoch[6] Batch [200]\tSpeed: 57043.78 samples/sec\tTrain-accuracy=0.977015\r\nEpoch[6] Batch [400]\tSpeed: 57357.53 samples/sec\tTrain-accuracy=0.975800\r\nEpoch[6] Train-accuracy=0.976433\r\nEpoch[6] Time cost=1.052\r\nEpoch[6] Validation-accuracy=0.963000\r\nEpoch[7] Batch [200]\tSpeed: 45378.31 samples/sec\tTrain-accuracy=0.977711\r\nEpoch[7] Batch [400]\tSpeed: 55753.34 samples/sec\tTrain-accuracy=0.976350\r\nEpoch[7] Train-accuracy=0.977467\r\nEpoch[7] Time cost=1.144\r\nEpoch[7] Validation-accuracy=0.963700\r\nEpoch[8] Batch [200]\tSpeed: 54546.75 samples/sec\tTrain-accuracy=0.980448\r\nEpoch[8] Batch [400]\tSpeed: 55774.47 samples/sec\tTrain-accuracy=0.980000\r\nEpoch[8] Train-accuracy=0.980217\r\nEpoch[8] Time cost=1.090\r\nEpoch[8] Validation-accuracy=0.962900\r\nEpoch[9] Batch [200]\tSpeed: 48810.85 samples/sec\tTrain-accuracy=0.980050\r\nEpoch[9] Batch [400]\tSpeed: 54945.51 samples/sec\tTrain-accuracy=0.982550\r\nEpoch[9] Train-accuracy=0.981850\r\nEpoch[9] Time cost=1.115\r\nEpoch[9] Validation-accuracy=0.969500\r\n```\r\n\r\n== examples/sparse/matrix_factorization/train.pl ==\r\n\r\n```\r\nEpoch[2] Batch [60300]\tSpeed: 50291.13 samples/sec\tTrain-mse=0.706833\r\nEpoch[2] Batch [60400]\tSpeed: 54047.43 samples/sec\tTrain-mse=0.723078\r\nEpoch[2] Batch [60500]\tSpeed: 53646.70 samples/sec\tTrain-mse=0.741219\r\nEpoch[2] Batch [60600]\tSpeed: 52929.93 samples/sec\tTrain-mse=0.706861\r\nEpoch[2] Batch [60700]\tSpeed: 53583.14 samples/sec\tTrain-mse=0.734509\r\nEpoch[2] Batch [60800]\tSpeed: 50907.01 samples/sec\tTrain-mse=0.735880\r\nEpoch[2] Batch [60900]\tSpeed: 39899.38 samples/sec\tTrain-mse=0.758780\r\nEpoch[2] Batch [61000]\tSpeed: 48115.77 samples/sec\tTrain-mse=0.693772\r\nEpoch[2] Batch [61100]\tSpeed: 53833.52 samples/sec\tTrain-mse=0.714911\r\nEpoch[2] Batch [61200]\tSpeed: 53998.88 samples/sec\tTrain-mse=0.731971\r\nEpoch[2] Batch [61300]\tSpeed: 51153.75 samples/sec\tTrain-mse=0.732873\r\nEpoch[2] Batch [61400]\tSpeed: 51454.19 samples/sec\tTrain-mse=0.715944\r\nEpoch[2] Batch [61500]\tSpeed: 51353.83 samples/sec\tTrain-mse=0.715877\r\nEpoch[2] Batch [61600]\tSpeed: 53121.93 samples/sec\tTrain-mse=0.703896\r\nEpoch[2] Batch [61700]\tSpeed: 53345.31 samples/sec\tTrain-mse=0.700082\r\nEpoch[2] Batch [61800]\tSpeed: 48247.09 samples/sec\tTrain-mse=0.734545\r\nEpoch[2] Batch [61900]\tSpeed: 42918.29 samples/sec\tTrain-mse=0.714020\r\nEpoch[2] Batch [62000]\tSpeed: 49330.02 samples/sec\tTrain-mse=0.722819\r\nEpoch[2] Batch [62100]\tSpeed: 53999.32 samples/sec\tTrain-mse=0.736258\r\nEpoch[2] Batch [62200]\tSpeed: 51953.96 samples/sec\tTrain-mse=0.737786\r\nEpoch[2] Batch [62300]\tSpeed: 51416.39 samples/sec\tTrain-mse=0.698692\r\nEpoch[2] Batch [62400]\tSpeed: 53206.32 samples/sec\tTrain-mse=0.716579\r\nEpoch[2] Batch [62500]\tSpeed: 52938.28 samples/sec\tTrain-mse=0.733243\r\nPreparing data iterators for ./data/ml-10M100K/r1.train ... \r\nPreparing data iterators for ./data/ml-10M100K/r1.test ... \r\nTraining started ...\r\nepoch 0, eval MSE = 0.99958598613739 \r\nepoch 1, eval MSE = 1.03603255748749 \r\nepoch 2, eval MSE = 1.07017529010773 \r\nTraining completed.\r\n```\r\n\r\n", + "pr_file_module": null + }, + { + "comment_id": "794693489", + "repo_full_name": "apache/mxnet", + "pr_number": 20852, + "pr_file": "perl-package/AI-MXNet/lib/AI/MXNet/Metric.pm", + "discussion_id": "793919904", + "commented_code": "@@ -79,7 +79,7 @@ use overload '\"\"' => sub {\n has 'name' => (is => 'rw', isa => 'Str');\n has 'num' => (is => 'rw', isa => 'Int');\n has 'num_inst' => (is => 'rw', isa => 'Maybe[Int|ArrayRef[Int]]');\n-has 'sum_metric' => (is => 'rw', isa => 'Maybe[Num|ArrayRef[Num]]');\n+has 'sum_metric' => (is => 'rw', isa => 'Maybe[Num|ArrayRef[Num]|PDL]');", + "comment_created_at": "2022-01-28T17:06:01+00:00", + "comment_author": "sergeykolychev", + "comment_body": "Looks good", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "688373879", + "pr_number": 20514, + "pr_file": "src/operator/quantization/quantize_v2-inl.h", + "created_at": "2021-08-13T09:23:55+00:00", + "commented_code": "int out_type;\n dmlc::optional min_calib_range;\n dmlc::optional max_calib_range;\n dmlc::optional shifted;\n dmlc::optional shifted_output;", + "repo_full_name": "apache/mxnet", + "discussion_comments": [ + { + "comment_id": "688373879", + "repo_full_name": "apache/mxnet", + "pr_number": 20514, + "pr_file": "src/operator/quantization/quantize_v2-inl.h", + "discussion_id": "688373879", + "commented_code": "@@ -41,7 +41,7 @@ struct QuantizeV2Param : public dmlc::Parameter {\n int out_type;\n dmlc::optional min_calib_range;\n dmlc::optional max_calib_range;\n- dmlc::optional shifted;\n+ dmlc::optional shifted_output;", + "comment_created_at": "2021-08-13T09:23:55+00:00", + "comment_author": "mozga-intel", + "comment_body": "The dmlc::optional is found a use for representing a value that may or may not be present. The std::optional was introduced in C++17, so it means that we can try to replace this one by a new one. Unfortunately, there is a bunch of branches have not supported C++17 yet. An exceptation is the master branch that supports c++17, well, maybe is worth making an internal wrapper. On second thoughts, it could be looks like: [proposition]\r\n```cpp\r\n#define MXNET_HAS_OPTIONAL() 0\r\n#if __cplusplus >= 201703L\r\n# ifdef __has_include\r\n# if __has_include()\r\n# include \r\n# undef MXNET_HAS_OPTIONAL\r\n# define MXNET_HAS_OPTIONAL() 1\r\n# endif\r\n# endif\r\n#endif\r\n\r\n#if MXNET_HAS_OPTIONAL()\r\n#warning \"optional\"\r\n// Please use std::optional\r\n#else\r\n#warning \"no optional\"\r\n// Please use dmlc::optional\r\n#endif\r\n```\r\nWhat are the advantages of using it?", + "pr_file_module": null + }, + { + "comment_id": "688415411", + "repo_full_name": "apache/mxnet", + "pr_number": 20514, + "pr_file": "src/operator/quantization/quantize_v2-inl.h", + "discussion_id": "688373879", + "commented_code": "@@ -41,7 +41,7 @@ struct QuantizeV2Param : public dmlc::Parameter {\n int out_type;\n dmlc::optional min_calib_range;\n dmlc::optional max_calib_range;\n- dmlc::optional shifted;\n+ dmlc::optional shifted_output;", + "comment_created_at": "2021-08-13T10:35:38+00:00", + "comment_author": "sfraczek", + "comment_body": "I think v1.x is more strongly backward compatibility oriented. I think it would be better to keep such change only to master. dmlc::optional is used in a lot of places as well as other functions so it might be hard to justify rewriting parts of it. I don't know what are advantages of either implementations. \r\nAlso, in this PR I would prefer to keep consistency with what is already used in operators.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "763881978", + "pr_number": 20745, + "pr_file": "src/operator/tensor/indexing_op.cc", + "created_at": "2021-12-07T11:04:38+00:00", + "commented_code": "}\n};\n\ntemplate \nstruct TakeNonzeroAxisCPU {\n /*!\n * \\brief Map function for take operator\n * \\param i global thread id\n * \\param out_data ptr to output buffer\n * \\param in_data ptr to input buffer\n * \\param idx ptr to indices buffer\n * \\param outer_dim_stride stride of dimension before axis\n * \\param axis_dim_stride stride of axis dimension\n * \\param idx_size size of the indices tensor\n * \\param axis_dim dim size of the axis dimension\n * \\param axis axis id\n */\n template \n MSHADOW_XINLINE static void Map(index_t i,\n DType* out_data,\n const DType* in_data,\n const IType* indices,\n const index_t outer_dim_stride,\n const index_t axis_dim_stride,\n const int idx_size,\n const int axis_dim,\n const int axis) {\n for (index_t j = 0; j < static_cast(idx_size); ++j) {\n int index = indices[j];\n if (clip) {\n index = (index < 0) ? 0 : index;", + "repo_full_name": "apache/mxnet", + "discussion_comments": [ + { + "comment_id": "763881978", + "repo_full_name": "apache/mxnet", + "pr_number": 20745, + "pr_file": "src/operator/tensor/indexing_op.cc", + "discussion_id": "763881978", + "commented_code": "@@ -60,6 +60,51 @@ struct TakeZeroAxisCPU {\n }\n };\n \n+template \n+struct TakeNonzeroAxisCPU {\n+ /*!\n+ * \\brief Map function for take operator\n+ * \\param i global thread id\n+ * \\param out_data ptr to output buffer\n+ * \\param in_data ptr to input buffer\n+ * \\param idx ptr to indices buffer\n+ * \\param outer_dim_stride stride of dimension before axis\n+ * \\param axis_dim_stride stride of axis dimension\n+ * \\param idx_size size of the indices tensor\n+ * \\param axis_dim dim size of the axis dimension\n+ * \\param axis axis id\n+ */\n+ template \n+ MSHADOW_XINLINE static void Map(index_t i,\n+ DType* out_data,\n+ const DType* in_data,\n+ const IType* indices,\n+ const index_t outer_dim_stride,\n+ const index_t axis_dim_stride,\n+ const int idx_size,\n+ const int axis_dim,\n+ const int axis) {\n+ for (index_t j = 0; j < static_cast(idx_size); ++j) {\n+ int index = indices[j];\n+ if (clip) {\n+ index = (index < 0) ? 0 : index;", + "comment_created_at": "2021-12-07T11:04:38+00:00", + "comment_author": "bartekkuncer", + "comment_body": "Maybe use max(0,index)?", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/mxnet-use-explicit-optional-types.md b/_reviewers/mxnet-use-explicit-optional-types.md index cd03bfd..9402fa0 100644 --- a/_reviewers/mxnet-use-explicit-optional-types.md +++ b/_reviewers/mxnet-use-explicit-optional-types.md @@ -24,145 +24,3 @@ dmlc::optional shifted_output; // Clearly indicates this boolean might no ``` Be explicit about which values can be null and provide appropriate methods to safely access or transform these values. When working with optional types, ensure consistent handling across the codebase rather than using ad-hoc null checks. This pattern helps catch null-related errors at compile time rather than runtime. - - -[ - { - "discussion_id": "793919904", - "pr_number": 20852, - "pr_file": "perl-package/AI-MXNet/lib/AI/MXNet/Metric.pm", - "created_at": "2022-01-27T19:10:52+00:00", - "commented_code": "has 'name' => (is => 'rw', isa => 'Str');\nhas 'num' => (is => 'rw', isa => 'Int');\nhas 'num_inst' => (is => 'rw', isa => 'Maybe[Int|ArrayRef[Int]]');\nhas 'sum_metric' => (is => 'rw', isa => 'Maybe[Num|ArrayRef[Num]]');\nhas 'sum_metric' => (is => 'rw', isa => 'Maybe[Num|ArrayRef[Num]|PDL]');", - "repo_full_name": "apache/mxnet", - "discussion_comments": [ - { - "comment_id": "793919904", - "repo_full_name": "apache/mxnet", - "pr_number": 20852, - "pr_file": "perl-package/AI-MXNet/lib/AI/MXNet/Metric.pm", - "discussion_id": "793919904", - "commented_code": "@@ -79,7 +79,7 @@ use overload '\"\"' => sub {\n has 'name' => (is => 'rw', isa => 'Str');\n has 'num' => (is => 'rw', isa => 'Int');\n has 'num_inst' => (is => 'rw', isa => 'Maybe[Int|ArrayRef[Int]]');\n-has 'sum_metric' => (is => 'rw', isa => 'Maybe[Num|ArrayRef[Num]]');\n+has 'sum_metric' => (is => 'rw', isa => 'Maybe[Num|ArrayRef[Num]|PDL]');", - "comment_created_at": "2022-01-27T19:10:52+00:00", - "comment_author": "sergeykolychev", - "comment_body": "Why is this needed ? Is $pdl->sum (and other similar methods) produce pdl object now ? This change is a bit radical and will require additional tests to see of something that is not covered by the CI is not broken.", - "pr_file_module": null - }, - { - "comment_id": "793925590", - "repo_full_name": "apache/mxnet", - "pr_number": 20852, - "pr_file": "perl-package/AI-MXNet/lib/AI/MXNet/Metric.pm", - "discussion_id": "793919904", - "commented_code": "@@ -79,7 +79,7 @@ use overload '\"\"' => sub {\n has 'name' => (is => 'rw', isa => 'Str');\n has 'num' => (is => 'rw', isa => 'Int');\n has 'num_inst' => (is => 'rw', isa => 'Maybe[Int|ArrayRef[Int]]');\n-has 'sum_metric' => (is => 'rw', isa => 'Maybe[Num|ArrayRef[Num]]');\n+has 'sum_metric' => (is => 'rw', isa => 'Maybe[Num|ArrayRef[Num]|PDL]');", - "comment_created_at": "2022-01-27T19:18:39+00:00", - "comment_author": "zmughal", - "comment_body": "Yes, that is correct, they return a PDL ndarray (of zero rank) now (since PDL v2.056). This is to make sure that types such as complex numbers and BAD values can be correctly returned. To get a single Perl scalar out of a PDL, you can do `$pdl->sclr`. Though for many situations, this should work like a Perl scalar due to operator overloading.", - "pr_file_module": null - }, - { - "comment_id": "793930850", - "repo_full_name": "apache/mxnet", - "pr_number": 20852, - "pr_file": "perl-package/AI-MXNet/lib/AI/MXNet/Metric.pm", - "discussion_id": "793919904", - "commented_code": "@@ -79,7 +79,7 @@ use overload '\"\"' => sub {\n has 'name' => (is => 'rw', isa => 'Str');\n has 'num' => (is => 'rw', isa => 'Int');\n has 'num_inst' => (is => 'rw', isa => 'Maybe[Int|ArrayRef[Int]]');\n-has 'sum_metric' => (is => 'rw', isa => 'Maybe[Num|ArrayRef[Num]]');\n+has 'sum_metric' => (is => 'rw', isa => 'Maybe[Num|ArrayRef[Num]|PDL]');", - "comment_created_at": "2022-01-27T19:26:17+00:00", - "comment_author": "zmughal", - "comment_body": "What type of tests would you like to see?", - "pr_file_module": null - }, - { - "comment_id": "793936355", - "repo_full_name": "apache/mxnet", - "pr_number": 20852, - "pr_file": "perl-package/AI-MXNet/lib/AI/MXNet/Metric.pm", - "discussion_id": "793919904", - "commented_code": "@@ -79,7 +79,7 @@ use overload '\"\"' => sub {\n has 'name' => (is => 'rw', isa => 'Str');\n has 'num' => (is => 'rw', isa => 'Int');\n has 'num_inst' => (is => 'rw', isa => 'Maybe[Int|ArrayRef[Int]]');\n-has 'sum_metric' => (is => 'rw', isa => 'Maybe[Num|ArrayRef[Num]]');\n+has 'sum_metric' => (is => 'rw', isa => 'Maybe[Num|ArrayRef[Num]|PDL]');", - "comment_created_at": "2022-01-27T19:33:51+00:00", - "comment_author": "sergeykolychev", - "comment_body": "May I ask you locally run scripts from AI-MXNet/examples/ and see if they run and produce sane output ?", - "pr_file_module": null - }, - { - "comment_id": "794252536", - "repo_full_name": "apache/mxnet", - "pr_number": 20852, - "pr_file": "perl-package/AI-MXNet/lib/AI/MXNet/Metric.pm", - "discussion_id": "793919904", - "commented_code": "@@ -79,7 +79,7 @@ use overload '\"\"' => sub {\n has 'name' => (is => 'rw', isa => 'Str');\n has 'num' => (is => 'rw', isa => 'Int');\n has 'num_inst' => (is => 'rw', isa => 'Maybe[Int|ArrayRef[Int]]');\n-has 'sum_metric' => (is => 'rw', isa => 'Maybe[Num|ArrayRef[Num]]');\n+has 'sum_metric' => (is => 'rw', isa => 'Maybe[Num|ArrayRef[Num]|PDL]');", - "comment_created_at": "2022-01-28T07:15:57+00:00", - "comment_author": "zmughal", - "comment_body": "\r\n== examples/calculator.pl ==\r\n\r\n```\r\nfullyconnected4_bias -> [0.675975]\r\nfullyconnected4_weight -> \r\n[\r\n [ 1.00116 0.999985]\r\n]\r\n\r\nfullyconnected5_bias -> [4.85674e-05]\r\nfullyconnected5_weight -> \r\n[\r\n [0.000844207 0.000230625 0.508509]\r\n]\r\n\r\nfullyconnected6_bias -> [0.385759]\r\nfullyconnected6_weight -> \r\n[\r\n [ 1 -1]\r\n]\r\n\r\nfullyconnected7_bias -> [-2.62715e-06]\r\nfullyconnected7_weight -> \r\n[\r\n [-1.31198e-05 8.74287e-06 0.679938]\r\n]\r\n\r\n12345 + 54321 \u2248 66742.1796875\r\n188 - 88 \u2248 99.8591766357422\r\n250 * 2 \u2248 503.255798339844\r\n250 / 2 \u2248 124.998970031738\r\n```\r\n\r\n== examples/mnist.pl ==\r\n\r\n```\r\nEpoch[4] Batch [200]\tSpeed: 59678.68 samples/sec\tTrain-accuracy=0.974726\r\nEpoch[4] Batch [400]\tSpeed: 57716.06 samples/sec\tTrain-accuracy=0.973350\r\nEpoch[4] Train-accuracy=0.974233\r\nEpoch[4] Time cost=1.057\r\nEpoch[4] Validation-accuracy=0.967800\r\nEpoch[5] Batch [200]\tSpeed: 52405.81 samples/sec\tTrain-accuracy=0.977214\r\nEpoch[5] Batch [400]\tSpeed: 57804.15 samples/sec\tTrain-accuracy=0.976100\r\nEpoch[5] Train-accuracy=0.976950\r\nEpoch[5] Time cost=1.079\r\nEpoch[5] Validation-accuracy=0.966500\r\nEpoch[6] Batch [200]\tSpeed: 57043.78 samples/sec\tTrain-accuracy=0.977015\r\nEpoch[6] Batch [400]\tSpeed: 57357.53 samples/sec\tTrain-accuracy=0.975800\r\nEpoch[6] Train-accuracy=0.976433\r\nEpoch[6] Time cost=1.052\r\nEpoch[6] Validation-accuracy=0.963000\r\nEpoch[7] Batch [200]\tSpeed: 45378.31 samples/sec\tTrain-accuracy=0.977711\r\nEpoch[7] Batch [400]\tSpeed: 55753.34 samples/sec\tTrain-accuracy=0.976350\r\nEpoch[7] Train-accuracy=0.977467\r\nEpoch[7] Time cost=1.144\r\nEpoch[7] Validation-accuracy=0.963700\r\nEpoch[8] Batch [200]\tSpeed: 54546.75 samples/sec\tTrain-accuracy=0.980448\r\nEpoch[8] Batch [400]\tSpeed: 55774.47 samples/sec\tTrain-accuracy=0.980000\r\nEpoch[8] Train-accuracy=0.980217\r\nEpoch[8] Time cost=1.090\r\nEpoch[8] Validation-accuracy=0.962900\r\nEpoch[9] Batch [200]\tSpeed: 48810.85 samples/sec\tTrain-accuracy=0.980050\r\nEpoch[9] Batch [400]\tSpeed: 54945.51 samples/sec\tTrain-accuracy=0.982550\r\nEpoch[9] Train-accuracy=0.981850\r\nEpoch[9] Time cost=1.115\r\nEpoch[9] Validation-accuracy=0.969500\r\n```\r\n\r\n== examples/sparse/matrix_factorization/train.pl ==\r\n\r\n```\r\nEpoch[2] Batch [60300]\tSpeed: 50291.13 samples/sec\tTrain-mse=0.706833\r\nEpoch[2] Batch [60400]\tSpeed: 54047.43 samples/sec\tTrain-mse=0.723078\r\nEpoch[2] Batch [60500]\tSpeed: 53646.70 samples/sec\tTrain-mse=0.741219\r\nEpoch[2] Batch [60600]\tSpeed: 52929.93 samples/sec\tTrain-mse=0.706861\r\nEpoch[2] Batch [60700]\tSpeed: 53583.14 samples/sec\tTrain-mse=0.734509\r\nEpoch[2] Batch [60800]\tSpeed: 50907.01 samples/sec\tTrain-mse=0.735880\r\nEpoch[2] Batch [60900]\tSpeed: 39899.38 samples/sec\tTrain-mse=0.758780\r\nEpoch[2] Batch [61000]\tSpeed: 48115.77 samples/sec\tTrain-mse=0.693772\r\nEpoch[2] Batch [61100]\tSpeed: 53833.52 samples/sec\tTrain-mse=0.714911\r\nEpoch[2] Batch [61200]\tSpeed: 53998.88 samples/sec\tTrain-mse=0.731971\r\nEpoch[2] Batch [61300]\tSpeed: 51153.75 samples/sec\tTrain-mse=0.732873\r\nEpoch[2] Batch [61400]\tSpeed: 51454.19 samples/sec\tTrain-mse=0.715944\r\nEpoch[2] Batch [61500]\tSpeed: 51353.83 samples/sec\tTrain-mse=0.715877\r\nEpoch[2] Batch [61600]\tSpeed: 53121.93 samples/sec\tTrain-mse=0.703896\r\nEpoch[2] Batch [61700]\tSpeed: 53345.31 samples/sec\tTrain-mse=0.700082\r\nEpoch[2] Batch [61800]\tSpeed: 48247.09 samples/sec\tTrain-mse=0.734545\r\nEpoch[2] Batch [61900]\tSpeed: 42918.29 samples/sec\tTrain-mse=0.714020\r\nEpoch[2] Batch [62000]\tSpeed: 49330.02 samples/sec\tTrain-mse=0.722819\r\nEpoch[2] Batch [62100]\tSpeed: 53999.32 samples/sec\tTrain-mse=0.736258\r\nEpoch[2] Batch [62200]\tSpeed: 51953.96 samples/sec\tTrain-mse=0.737786\r\nEpoch[2] Batch [62300]\tSpeed: 51416.39 samples/sec\tTrain-mse=0.698692\r\nEpoch[2] Batch [62400]\tSpeed: 53206.32 samples/sec\tTrain-mse=0.716579\r\nEpoch[2] Batch [62500]\tSpeed: 52938.28 samples/sec\tTrain-mse=0.733243\r\nPreparing data iterators for ./data/ml-10M100K/r1.train ... \r\nPreparing data iterators for ./data/ml-10M100K/r1.test ... \r\nTraining started ...\r\nepoch 0, eval MSE = 0.99958598613739 \r\nepoch 1, eval MSE = 1.03603255748749 \r\nepoch 2, eval MSE = 1.07017529010773 \r\nTraining completed.\r\n```\r\n\r\n", - "pr_file_module": null - }, - { - "comment_id": "794693489", - "repo_full_name": "apache/mxnet", - "pr_number": 20852, - "pr_file": "perl-package/AI-MXNet/lib/AI/MXNet/Metric.pm", - "discussion_id": "793919904", - "commented_code": "@@ -79,7 +79,7 @@ use overload '\"\"' => sub {\n has 'name' => (is => 'rw', isa => 'Str');\n has 'num' => (is => 'rw', isa => 'Int');\n has 'num_inst' => (is => 'rw', isa => 'Maybe[Int|ArrayRef[Int]]');\n-has 'sum_metric' => (is => 'rw', isa => 'Maybe[Num|ArrayRef[Num]]');\n+has 'sum_metric' => (is => 'rw', isa => 'Maybe[Num|ArrayRef[Num]|PDL]');", - "comment_created_at": "2022-01-28T17:06:01+00:00", - "comment_author": "sergeykolychev", - "comment_body": "Looks good", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "688373879", - "pr_number": 20514, - "pr_file": "src/operator/quantization/quantize_v2-inl.h", - "created_at": "2021-08-13T09:23:55+00:00", - "commented_code": "int out_type;\n dmlc::optional min_calib_range;\n dmlc::optional max_calib_range;\n dmlc::optional shifted;\n dmlc::optional shifted_output;", - "repo_full_name": "apache/mxnet", - "discussion_comments": [ - { - "comment_id": "688373879", - "repo_full_name": "apache/mxnet", - "pr_number": 20514, - "pr_file": "src/operator/quantization/quantize_v2-inl.h", - "discussion_id": "688373879", - "commented_code": "@@ -41,7 +41,7 @@ struct QuantizeV2Param : public dmlc::Parameter {\n int out_type;\n dmlc::optional min_calib_range;\n dmlc::optional max_calib_range;\n- dmlc::optional shifted;\n+ dmlc::optional shifted_output;", - "comment_created_at": "2021-08-13T09:23:55+00:00", - "comment_author": "mozga-intel", - "comment_body": "The dmlc::optional is found a use for representing a value that may or may not be present. The std::optional was introduced in C++17, so it means that we can try to replace this one by a new one. Unfortunately, there is a bunch of branches have not supported C++17 yet. An exceptation is the master branch that supports c++17, well, maybe is worth making an internal wrapper. On second thoughts, it could be looks like: [proposition]\r\n```cpp\r\n#define MXNET_HAS_OPTIONAL() 0\r\n#if __cplusplus >= 201703L\r\n# ifdef __has_include\r\n# if __has_include()\r\n# include \r\n# undef MXNET_HAS_OPTIONAL\r\n# define MXNET_HAS_OPTIONAL() 1\r\n# endif\r\n# endif\r\n#endif\r\n\r\n#if MXNET_HAS_OPTIONAL()\r\n#warning \"optional\"\r\n// Please use std::optional\r\n#else\r\n#warning \"no optional\"\r\n// Please use dmlc::optional\r\n#endif\r\n```\r\nWhat are the advantages of using it?", - "pr_file_module": null - }, - { - "comment_id": "688415411", - "repo_full_name": "apache/mxnet", - "pr_number": 20514, - "pr_file": "src/operator/quantization/quantize_v2-inl.h", - "discussion_id": "688373879", - "commented_code": "@@ -41,7 +41,7 @@ struct QuantizeV2Param : public dmlc::Parameter {\n int out_type;\n dmlc::optional min_calib_range;\n dmlc::optional max_calib_range;\n- dmlc::optional shifted;\n+ dmlc::optional shifted_output;", - "comment_created_at": "2021-08-13T10:35:38+00:00", - "comment_author": "sfraczek", - "comment_body": "I think v1.x is more strongly backward compatibility oriented. I think it would be better to keep such change only to master. dmlc::optional is used in a lot of places as well as other functions so it might be hard to justify rewriting parts of it. I don't know what are advantages of either implementations. \r\nAlso, in this PR I would prefer to keep consistency with what is already used in operators.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "763881978", - "pr_number": 20745, - "pr_file": "src/operator/tensor/indexing_op.cc", - "created_at": "2021-12-07T11:04:38+00:00", - "commented_code": "}\n};\n\ntemplate \nstruct TakeNonzeroAxisCPU {\n /*!\n * \\brief Map function for take operator\n * \\param i global thread id\n * \\param out_data ptr to output buffer\n * \\param in_data ptr to input buffer\n * \\param idx ptr to indices buffer\n * \\param outer_dim_stride stride of dimension before axis\n * \\param axis_dim_stride stride of axis dimension\n * \\param idx_size size of the indices tensor\n * \\param axis_dim dim size of the axis dimension\n * \\param axis axis id\n */\n template \n MSHADOW_XINLINE static void Map(index_t i,\n DType* out_data,\n const DType* in_data,\n const IType* indices,\n const index_t outer_dim_stride,\n const index_t axis_dim_stride,\n const int idx_size,\n const int axis_dim,\n const int axis) {\n for (index_t j = 0; j < static_cast(idx_size); ++j) {\n int index = indices[j];\n if (clip) {\n index = (index < 0) ? 0 : index;", - "repo_full_name": "apache/mxnet", - "discussion_comments": [ - { - "comment_id": "763881978", - "repo_full_name": "apache/mxnet", - "pr_number": 20745, - "pr_file": "src/operator/tensor/indexing_op.cc", - "discussion_id": "763881978", - "commented_code": "@@ -60,6 +60,51 @@ struct TakeZeroAxisCPU {\n }\n };\n \n+template \n+struct TakeNonzeroAxisCPU {\n+ /*!\n+ * \\brief Map function for take operator\n+ * \\param i global thread id\n+ * \\param out_data ptr to output buffer\n+ * \\param in_data ptr to input buffer\n+ * \\param idx ptr to indices buffer\n+ * \\param outer_dim_stride stride of dimension before axis\n+ * \\param axis_dim_stride stride of axis dimension\n+ * \\param idx_size size of the indices tensor\n+ * \\param axis_dim dim size of the axis dimension\n+ * \\param axis axis id\n+ */\n+ template \n+ MSHADOW_XINLINE static void Map(index_t i,\n+ DType* out_data,\n+ const DType* in_data,\n+ const IType* indices,\n+ const index_t outer_dim_stride,\n+ const index_t axis_dim_stride,\n+ const int idx_size,\n+ const int axis_dim,\n+ const int axis) {\n+ for (index_t j = 0; j < static_cast(idx_size); ++j) {\n+ int index = indices[j];\n+ if (clip) {\n+ index = (index < 0) ? 0 : index;", - "comment_created_at": "2021-12-07T11:04:38+00:00", - "comment_author": "bartekkuncer", - "comment_body": "Maybe use max(0,index)?", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/mxnet-use-intent-revealing-names.json b/_reviewers/mxnet-use-intent-revealing-names.json new file mode 100644 index 0000000..d90d7b2 --- /dev/null +++ b/_reviewers/mxnet-use-intent-revealing-names.json @@ -0,0 +1,160 @@ +[ + { + "discussion_id": "918911666", + "pr_number": 21002, + "pr_file": "src/common/utils.h", + "created_at": "2022-07-12T12:31:46+00:00", + "commented_code": "}\n\ninline bool is_float(const int dtype) {", + "repo_full_name": "apache/mxnet", + "discussion_comments": [ + { + "comment_id": "918911666", + "repo_full_name": "apache/mxnet", + "pr_number": 21002, + "pr_file": "src/common/utils.h", + "discussion_id": "918911666", + "commented_code": "@@ -929,11 +929,8 @@ inline mxnet::TShape CanonicalizeAxes(const mxnet::TShape& src) {\n }\n \n inline bool is_float(const int dtype) {", + "comment_created_at": "2022-07-12T12:31:46+00:00", + "comment_author": "anko-intel", + "comment_body": "maybe we can change the name to is_floating to emphase that it is not an answer for particular type like float but common floating point group of data representation\r\n```suggestion\r\ninline bool is_floating(const int dtype) {\r\n```", + "pr_file_module": null + }, + { + "comment_id": "921936461", + "repo_full_name": "apache/mxnet", + "pr_number": 21002, + "pr_file": "src/common/utils.h", + "discussion_id": "918911666", + "commented_code": "@@ -929,11 +929,8 @@ inline mxnet::TShape CanonicalizeAxes(const mxnet::TShape& src) {\n }\n \n inline bool is_float(const int dtype) {", + "comment_created_at": "2022-07-15T08:25:13+00:00", + "comment_author": "bgawrych", + "comment_body": "can be done in other PR to keep this one simple to track relevant changes", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "751251873", + "pr_number": 20669, + "pr_file": "src/operator/nn/dnnl/dnnl_reduce-inl.h", + "created_at": "2021-11-17T13:42:39+00:00", + "commented_code": "/*\n * Licensed to the Apache Software Foundation (ASF) under one\n * or more contributor license agreements. See the NOTICE file\n * distributed with this work for additional information\n * regarding copyright ownership. The ASF licenses this file\n * to you under the Apache License, Version 2.0 (the\n * \"License\"); you may not use this file except in compliance\n * with the License. You may obtain a copy of the License at\n *\n * http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing,\n * software distributed under the License is distributed on an\n * \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n * KIND, either express or implied. See the License for the\n * specific language governing permissions and limitations\n * under the License.\n */\n\n/*!\n * \\file dnnl_reduce-inl.h\n */\n\n#ifndef MXNET_OPERATOR_NN_DNNL_DNNL_REDUCE_INL_H_\n#define MXNET_OPERATOR_NN_DNNL_DNNL_REDUCE_INL_H_\n\n#if MXNET_USE_ONEDNN == 1\n#include \n\n#include \"./dnnl_base-inl.h\"\n#include \"./dnnl_ops-inl.h\"\n\nnamespace mxnet {\nnamespace op {\n\nusing reduce_fwd_t = dnnl::reduction;\nusing reduce_fwd_pd_t = dnnl::reduction::primitive_desc;\nstruct NumpyReduceAxesParam;\nstruct ReduceAxesParam;\nclass DNNLReduceFwd {\n public:\n struct Tensors {\n Tensors(const NDArray& data, const NDArray& out);\n\n const NDArray& data;\n const NDArray& out;\n };\n\n static DNNLReduceFwd GetCached(const NumpyReduceAxesParam& param,\n const Tensors& tensors,\n const bool is_train,\n const dnnl::algorithm reduction_alg);\n\n static reduce_fwd_pd_t GetReduceFwdPd(const dnnl::memory::desc& input_md,\n const dnnl::memory::desc& output_md,\n const dnnl::algorithm reduction_alg);\n\n DNNLReduceFwd(const NumpyReduceAxesParam& param,\n const Tensors& tensors,\n const bool is_train,\n const dnnl::algorithm reduction_alg);\n void Execute(const Tensors& tensors) const;\n\n private:\n std::shared_ptr reduce_pd;\n std::shared_ptr reduce_fwd;\n};\n\ntemplate \nNumpyReduceAxesParam ConvertParamsToNumpy(const T& original_param,", + "repo_full_name": "apache/mxnet", + "discussion_comments": [ + { + "comment_id": "751251873", + "repo_full_name": "apache/mxnet", + "pr_number": 20669, + "pr_file": "src/operator/nn/dnnl/dnnl_reduce-inl.h", + "discussion_id": "751251873", + "commented_code": "@@ -0,0 +1,108 @@\n+/*\n+ * Licensed to the Apache Software Foundation (ASF) under one\n+ * or more contributor license agreements. See the NOTICE file\n+ * distributed with this work for additional information\n+ * regarding copyright ownership. The ASF licenses this file\n+ * to you under the Apache License, Version 2.0 (the\n+ * \"License\"); you may not use this file except in compliance\n+ * with the License. You may obtain a copy of the License at\n+ *\n+ * http://www.apache.org/licenses/LICENSE-2.0\n+ *\n+ * Unless required by applicable law or agreed to in writing,\n+ * software distributed under the License is distributed on an\n+ * \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n+ * KIND, either express or implied. See the License for the\n+ * specific language governing permissions and limitations\n+ * under the License.\n+ */\n+\n+/*!\n+ * \\file dnnl_reduce-inl.h\n+ */\n+\n+#ifndef MXNET_OPERATOR_NN_DNNL_DNNL_REDUCE_INL_H_\n+#define MXNET_OPERATOR_NN_DNNL_DNNL_REDUCE_INL_H_\n+\n+#if MXNET_USE_ONEDNN == 1\n+#include \n+\n+#include \"./dnnl_base-inl.h\"\n+#include \"./dnnl_ops-inl.h\"\n+\n+namespace mxnet {\n+namespace op {\n+\n+using reduce_fwd_t = dnnl::reduction;\n+using reduce_fwd_pd_t = dnnl::reduction::primitive_desc;\n+struct NumpyReduceAxesParam;\n+struct ReduceAxesParam;\n+class DNNLReduceFwd {\n+ public:\n+ struct Tensors {\n+ Tensors(const NDArray& data, const NDArray& out);\n+\n+ const NDArray& data;\n+ const NDArray& out;\n+ };\n+\n+ static DNNLReduceFwd GetCached(const NumpyReduceAxesParam& param,\n+ const Tensors& tensors,\n+ const bool is_train,\n+ const dnnl::algorithm reduction_alg);\n+\n+ static reduce_fwd_pd_t GetReduceFwdPd(const dnnl::memory::desc& input_md,\n+ const dnnl::memory::desc& output_md,\n+ const dnnl::algorithm reduction_alg);\n+\n+ DNNLReduceFwd(const NumpyReduceAxesParam& param,\n+ const Tensors& tensors,\n+ const bool is_train,\n+ const dnnl::algorithm reduction_alg);\n+ void Execute(const Tensors& tensors) const;\n+\n+ private:\n+ std::shared_ptr reduce_pd;\n+ std::shared_ptr reduce_fwd;\n+};\n+\n+template \n+NumpyReduceAxesParam ConvertParamsToNumpy(const T& original_param,", + "comment_created_at": "2021-11-17T13:42:39+00:00", + "comment_author": "anko-intel", + "comment_body": "is it possible that different class has the same functionality? So maybe the function name should be different depending what is returned?", + "pr_file_module": null + }, + { + "comment_id": "752982322", + "repo_full_name": "apache/mxnet", + "pr_number": 20669, + "pr_file": "src/operator/nn/dnnl/dnnl_reduce-inl.h", + "discussion_id": "751251873", + "commented_code": "@@ -0,0 +1,108 @@\n+/*\n+ * Licensed to the Apache Software Foundation (ASF) under one\n+ * or more contributor license agreements. See the NOTICE file\n+ * distributed with this work for additional information\n+ * regarding copyright ownership. The ASF licenses this file\n+ * to you under the Apache License, Version 2.0 (the\n+ * \"License\"); you may not use this file except in compliance\n+ * with the License. You may obtain a copy of the License at\n+ *\n+ * http://www.apache.org/licenses/LICENSE-2.0\n+ *\n+ * Unless required by applicable law or agreed to in writing,\n+ * software distributed under the License is distributed on an\n+ * \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n+ * KIND, either express or implied. See the License for the\n+ * specific language governing permissions and limitations\n+ * under the License.\n+ */\n+\n+/*!\n+ * \\file dnnl_reduce-inl.h\n+ */\n+\n+#ifndef MXNET_OPERATOR_NN_DNNL_DNNL_REDUCE_INL_H_\n+#define MXNET_OPERATOR_NN_DNNL_DNNL_REDUCE_INL_H_\n+\n+#if MXNET_USE_ONEDNN == 1\n+#include \n+\n+#include \"./dnnl_base-inl.h\"\n+#include \"./dnnl_ops-inl.h\"\n+\n+namespace mxnet {\n+namespace op {\n+\n+using reduce_fwd_t = dnnl::reduction;\n+using reduce_fwd_pd_t = dnnl::reduction::primitive_desc;\n+struct NumpyReduceAxesParam;\n+struct ReduceAxesParam;\n+class DNNLReduceFwd {\n+ public:\n+ struct Tensors {\n+ Tensors(const NDArray& data, const NDArray& out);\n+\n+ const NDArray& data;\n+ const NDArray& out;\n+ };\n+\n+ static DNNLReduceFwd GetCached(const NumpyReduceAxesParam& param,\n+ const Tensors& tensors,\n+ const bool is_train,\n+ const dnnl::algorithm reduction_alg);\n+\n+ static reduce_fwd_pd_t GetReduceFwdPd(const dnnl::memory::desc& input_md,\n+ const dnnl::memory::desc& output_md,\n+ const dnnl::algorithm reduction_alg);\n+\n+ DNNLReduceFwd(const NumpyReduceAxesParam& param,\n+ const Tensors& tensors,\n+ const bool is_train,\n+ const dnnl::algorithm reduction_alg);\n+ void Execute(const Tensors& tensors) const;\n+\n+ private:\n+ std::shared_ptr reduce_pd;\n+ std::shared_ptr reduce_fwd;\n+};\n+\n+template \n+NumpyReduceAxesParam ConvertParamsToNumpy(const T& original_param,", + "comment_created_at": "2021-11-19T08:57:10+00:00", + "comment_author": "bgawrych", + "comment_body": "I've changed name to ConvertReduceParamsToNumpy and also in other file ConvertTransposeParamsToNumpy", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "693907785", + "pr_number": 20474, + "pr_file": "src/initialize.cc", + "created_at": "2021-08-23T12:00:11+00:00", + "commented_code": "cpu_worker_nthreads_(dmlc::GetEnv(\"MXNET_CPU_WORKER_NTHREADS\", 1)),\n mp_cv_num_threads_(dmlc::GetEnv(\"MXNET_MP_OPENCV_NUM_THREADS\", 0)) {\n dmlc::InitLogging(\"mxnet\");\n\n#if !(defined(_WIN32) || defined(_WIN64) || defined(__WINDOWS__))", + "repo_full_name": "apache/mxnet", + "discussion_comments": [ + { + "comment_id": "693907785", + "repo_full_name": "apache/mxnet", + "pr_number": 20474, + "pr_file": "src/initialize.cc", + "discussion_id": "693907785", + "commented_code": "@@ -93,6 +95,24 @@ LibraryInitializer::LibraryInitializer()\n cpu_worker_nthreads_(dmlc::GetEnv(\"MXNET_CPU_WORKER_NTHREADS\", 1)),\n mp_cv_num_threads_(dmlc::GetEnv(\"MXNET_MP_OPENCV_NUM_THREADS\", 0)) {\n dmlc::InitLogging(\"mxnet\");\n+\n+#if !(defined(_WIN32) || defined(_WIN64) || defined(__WINDOWS__))", + "comment_created_at": "2021-08-23T12:00:11+00:00", + "comment_author": "bgawrych", + "comment_body": "maybe it should be moved to a function like install_pthread_atfork_handlers - it probably will be more descriptive on what's happening here", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "643227554", + "pr_number": 19896, + "pr_file": "src/operator/contrib/adaptive_avg_pooling.cc", + "created_at": "2021-06-01T15:45:02+00:00", + "commented_code": "}\n}\n\n#if MXNET_USE_MKLDNN == 1\nbool SupportMKLDNNAveragePooling(const NDArray &in_data,\n const NDArray &out_data) {\n for (int64_t idx = 2; idx < in_data.shape().ndim(); ++idx) {\n const int s1 = in_data.shape()[idx];\n const int s2 = out_data.shape()[idx];\n if (s2 == 0) {\n return false;\n }\n if (s1 % s2 != 0) {\n return false;\n }\n }\n const int IH = in_data.shape()[2];\n const int IW = in_data.shape()[3];\n const int OH = out_data.shape()[2];\n const int OW = out_data.shape()[3];\n\n const int strides_1 = floor((IH << 1) / OH) - floor(IH / OH);\n const int strides_2 = floor((IW << 1) / OW) - floor(IW / OW);", + "repo_full_name": "apache/mxnet", + "discussion_comments": [ + { + "comment_id": "643227554", + "repo_full_name": "apache/mxnet", + "pr_number": 19896, + "pr_file": "src/operator/contrib/adaptive_avg_pooling.cc", + "discussion_id": "643227554", + "commented_code": "@@ -197,6 +198,81 @@ num_threads(engine::OpenMP::Get()->GetRecommendedOMPThreadCount())\n }\n }\n \n+#if MXNET_USE_MKLDNN == 1\n+bool SupportMKLDNNAveragePooling(const NDArray &in_data,\n+ const NDArray &out_data) {\n+ for (int64_t idx = 2; idx < in_data.shape().ndim(); ++idx) {\n+ const int s1 = in_data.shape()[idx];\n+ const int s2 = out_data.shape()[idx];\n+ if (s2 == 0) {\n+ return false;\n+ }\n+ if (s1 % s2 != 0) {\n+ return false;\n+ }\n+ }\n+ const int IH = in_data.shape()[2];\n+ const int IW = in_data.shape()[3];\n+ const int OH = out_data.shape()[2];\n+ const int OW = out_data.shape()[3];\n+\n+ const int strides_1 = floor((IH << 1) / OH) - floor(IH / OH);\n+ const int strides_2 = floor((IW << 1) / OW) - floor(IW / OW);", + "comment_created_at": "2021-06-01T15:45:02+00:00", + "comment_author": "akarbown", + "comment_body": "Maybe worth considering to rename 'strides_1, strides_2' to 'strides_H/strides_W'?", + "pr_file_module": null + }, + { + "comment_id": "650936307", + "repo_full_name": "apache/mxnet", + "pr_number": 19896, + "pr_file": "src/operator/contrib/adaptive_avg_pooling.cc", + "discussion_id": "643227554", + "commented_code": "@@ -197,6 +198,81 @@ num_threads(engine::OpenMP::Get()->GetRecommendedOMPThreadCount())\n }\n }\n \n+#if MXNET_USE_MKLDNN == 1\n+bool SupportMKLDNNAveragePooling(const NDArray &in_data,\n+ const NDArray &out_data) {\n+ for (int64_t idx = 2; idx < in_data.shape().ndim(); ++idx) {\n+ const int s1 = in_data.shape()[idx];\n+ const int s2 = out_data.shape()[idx];\n+ if (s2 == 0) {\n+ return false;\n+ }\n+ if (s1 % s2 != 0) {\n+ return false;\n+ }\n+ }\n+ const int IH = in_data.shape()[2];\n+ const int IW = in_data.shape()[3];\n+ const int OH = out_data.shape()[2];\n+ const int OW = out_data.shape()[3];\n+\n+ const int strides_1 = floor((IH << 1) / OH) - floor(IH / OH);\n+ const int strides_2 = floor((IW << 1) / OW) - floor(IW / OW);", + "comment_created_at": "2021-06-14T13:15:33+00:00", + "comment_author": "mozga-intel", + "comment_body": "Done", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "663079191", + "pr_number": 20302, + "pr_file": "src/operator/quantization/quantization_utils.h", + "created_at": "2021-07-02T15:05:08+00:00", + "commented_code": "return broadcast::ReduceWorkspaceSize(s, *dst_shape, kWriteTo, *src_shape);\n}\n\nenum QuantizeOutType { kAuto = 0, kInt8, kUint8 };\nenum QuantizeOutType { qAuto = 0, qInt8, qUint8 };", + "repo_full_name": "apache/mxnet", + "discussion_comments": [ + { + "comment_id": "663079191", + "repo_full_name": "apache/mxnet", + "pr_number": 20302, + "pr_file": "src/operator/quantization/quantization_utils.h", + "discussion_id": "663079191", + "commented_code": "@@ -187,22 +187,22 @@ inline size_t ConfigReduce(mshadow::Stream* s,\n return broadcast::ReduceWorkspaceSize(s, *dst_shape, kWriteTo, *src_shape);\n }\n \n-enum QuantizeOutType { kAuto = 0, kInt8, kUint8 };\n+enum QuantizeOutType { qAuto = 0, qInt8, qUint8 };", + "comment_created_at": "2021-07-02T15:05:08+00:00", + "comment_author": "szha", + "comment_body": "just curious about the naming, why the q prefix?", + "pr_file_module": null + }, + { + "comment_id": "663082456", + "repo_full_name": "apache/mxnet", + "pr_number": 20302, + "pr_file": "src/operator/quantization/quantization_utils.h", + "discussion_id": "663079191", + "commented_code": "@@ -187,22 +187,22 @@ inline size_t ConfigReduce(mshadow::Stream* s,\n return broadcast::ReduceWorkspaceSize(s, *dst_shape, kWriteTo, *src_shape);\n }\n \n-enum QuantizeOutType { kAuto = 0, kInt8, kUint8 };\n+enum QuantizeOutType { qAuto = 0, qInt8, qUint8 };", + "comment_created_at": "2021-07-02T15:09:57+00:00", + "comment_author": "sfraczek", + "comment_body": "Because there is already name mshadow::kInt8 and mshadow::kUint8 which I was intending to use but when I wrote just kUint8 without prefix `mshadow::` in front of it this enum got used instead and it was hard to detect error.Adding q makes a clear distinction at low cost. I think it's for the better since this enum is scarcely used anyway.", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/mxnet-use-intent-revealing-names.md b/_reviewers/mxnet-use-intent-revealing-names.md index 2cc1a32..efe6ff2 100644 --- a/_reviewers/mxnet-use-intent-revealing-names.md +++ b/_reviewers/mxnet-use-intent-revealing-names.md @@ -53,165 +53,3 @@ bool disable_fuse_dequantize = dmlc::GetEnv("MXNET_DISABLE_ONEDNN_FUSE_DEQUANTIZ ``` Extract meaningful operations into well-named functions instead of using inline code blocks. - - -[ - { - "discussion_id": "918911666", - "pr_number": 21002, - "pr_file": "src/common/utils.h", - "created_at": "2022-07-12T12:31:46+00:00", - "commented_code": "}\n\ninline bool is_float(const int dtype) {", - "repo_full_name": "apache/mxnet", - "discussion_comments": [ - { - "comment_id": "918911666", - "repo_full_name": "apache/mxnet", - "pr_number": 21002, - "pr_file": "src/common/utils.h", - "discussion_id": "918911666", - "commented_code": "@@ -929,11 +929,8 @@ inline mxnet::TShape CanonicalizeAxes(const mxnet::TShape& src) {\n }\n \n inline bool is_float(const int dtype) {", - "comment_created_at": "2022-07-12T12:31:46+00:00", - "comment_author": "anko-intel", - "comment_body": "maybe we can change the name to is_floating to emphase that it is not an answer for particular type like float but common floating point group of data representation\r\n```suggestion\r\ninline bool is_floating(const int dtype) {\r\n```", - "pr_file_module": null - }, - { - "comment_id": "921936461", - "repo_full_name": "apache/mxnet", - "pr_number": 21002, - "pr_file": "src/common/utils.h", - "discussion_id": "918911666", - "commented_code": "@@ -929,11 +929,8 @@ inline mxnet::TShape CanonicalizeAxes(const mxnet::TShape& src) {\n }\n \n inline bool is_float(const int dtype) {", - "comment_created_at": "2022-07-15T08:25:13+00:00", - "comment_author": "bgawrych", - "comment_body": "can be done in other PR to keep this one simple to track relevant changes", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "751251873", - "pr_number": 20669, - "pr_file": "src/operator/nn/dnnl/dnnl_reduce-inl.h", - "created_at": "2021-11-17T13:42:39+00:00", - "commented_code": "/*\n * Licensed to the Apache Software Foundation (ASF) under one\n * or more contributor license agreements. See the NOTICE file\n * distributed with this work for additional information\n * regarding copyright ownership. The ASF licenses this file\n * to you under the Apache License, Version 2.0 (the\n * \"License\"); you may not use this file except in compliance\n * with the License. You may obtain a copy of the License at\n *\n * http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing,\n * software distributed under the License is distributed on an\n * \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n * KIND, either express or implied. See the License for the\n * specific language governing permissions and limitations\n * under the License.\n */\n\n/*!\n * \\file dnnl_reduce-inl.h\n */\n\n#ifndef MXNET_OPERATOR_NN_DNNL_DNNL_REDUCE_INL_H_\n#define MXNET_OPERATOR_NN_DNNL_DNNL_REDUCE_INL_H_\n\n#if MXNET_USE_ONEDNN == 1\n#include \n\n#include \"./dnnl_base-inl.h\"\n#include \"./dnnl_ops-inl.h\"\n\nnamespace mxnet {\nnamespace op {\n\nusing reduce_fwd_t = dnnl::reduction;\nusing reduce_fwd_pd_t = dnnl::reduction::primitive_desc;\nstruct NumpyReduceAxesParam;\nstruct ReduceAxesParam;\nclass DNNLReduceFwd {\n public:\n struct Tensors {\n Tensors(const NDArray& data, const NDArray& out);\n\n const NDArray& data;\n const NDArray& out;\n };\n\n static DNNLReduceFwd GetCached(const NumpyReduceAxesParam& param,\n const Tensors& tensors,\n const bool is_train,\n const dnnl::algorithm reduction_alg);\n\n static reduce_fwd_pd_t GetReduceFwdPd(const dnnl::memory::desc& input_md,\n const dnnl::memory::desc& output_md,\n const dnnl::algorithm reduction_alg);\n\n DNNLReduceFwd(const NumpyReduceAxesParam& param,\n const Tensors& tensors,\n const bool is_train,\n const dnnl::algorithm reduction_alg);\n void Execute(const Tensors& tensors) const;\n\n private:\n std::shared_ptr reduce_pd;\n std::shared_ptr reduce_fwd;\n};\n\ntemplate \nNumpyReduceAxesParam ConvertParamsToNumpy(const T& original_param,", - "repo_full_name": "apache/mxnet", - "discussion_comments": [ - { - "comment_id": "751251873", - "repo_full_name": "apache/mxnet", - "pr_number": 20669, - "pr_file": "src/operator/nn/dnnl/dnnl_reduce-inl.h", - "discussion_id": "751251873", - "commented_code": "@@ -0,0 +1,108 @@\n+/*\n+ * Licensed to the Apache Software Foundation (ASF) under one\n+ * or more contributor license agreements. See the NOTICE file\n+ * distributed with this work for additional information\n+ * regarding copyright ownership. The ASF licenses this file\n+ * to you under the Apache License, Version 2.0 (the\n+ * \"License\"); you may not use this file except in compliance\n+ * with the License. You may obtain a copy of the License at\n+ *\n+ * http://www.apache.org/licenses/LICENSE-2.0\n+ *\n+ * Unless required by applicable law or agreed to in writing,\n+ * software distributed under the License is distributed on an\n+ * \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n+ * KIND, either express or implied. See the License for the\n+ * specific language governing permissions and limitations\n+ * under the License.\n+ */\n+\n+/*!\n+ * \\file dnnl_reduce-inl.h\n+ */\n+\n+#ifndef MXNET_OPERATOR_NN_DNNL_DNNL_REDUCE_INL_H_\n+#define MXNET_OPERATOR_NN_DNNL_DNNL_REDUCE_INL_H_\n+\n+#if MXNET_USE_ONEDNN == 1\n+#include \n+\n+#include \"./dnnl_base-inl.h\"\n+#include \"./dnnl_ops-inl.h\"\n+\n+namespace mxnet {\n+namespace op {\n+\n+using reduce_fwd_t = dnnl::reduction;\n+using reduce_fwd_pd_t = dnnl::reduction::primitive_desc;\n+struct NumpyReduceAxesParam;\n+struct ReduceAxesParam;\n+class DNNLReduceFwd {\n+ public:\n+ struct Tensors {\n+ Tensors(const NDArray& data, const NDArray& out);\n+\n+ const NDArray& data;\n+ const NDArray& out;\n+ };\n+\n+ static DNNLReduceFwd GetCached(const NumpyReduceAxesParam& param,\n+ const Tensors& tensors,\n+ const bool is_train,\n+ const dnnl::algorithm reduction_alg);\n+\n+ static reduce_fwd_pd_t GetReduceFwdPd(const dnnl::memory::desc& input_md,\n+ const dnnl::memory::desc& output_md,\n+ const dnnl::algorithm reduction_alg);\n+\n+ DNNLReduceFwd(const NumpyReduceAxesParam& param,\n+ const Tensors& tensors,\n+ const bool is_train,\n+ const dnnl::algorithm reduction_alg);\n+ void Execute(const Tensors& tensors) const;\n+\n+ private:\n+ std::shared_ptr reduce_pd;\n+ std::shared_ptr reduce_fwd;\n+};\n+\n+template \n+NumpyReduceAxesParam ConvertParamsToNumpy(const T& original_param,", - "comment_created_at": "2021-11-17T13:42:39+00:00", - "comment_author": "anko-intel", - "comment_body": "is it possible that different class has the same functionality? So maybe the function name should be different depending what is returned?", - "pr_file_module": null - }, - { - "comment_id": "752982322", - "repo_full_name": "apache/mxnet", - "pr_number": 20669, - "pr_file": "src/operator/nn/dnnl/dnnl_reduce-inl.h", - "discussion_id": "751251873", - "commented_code": "@@ -0,0 +1,108 @@\n+/*\n+ * Licensed to the Apache Software Foundation (ASF) under one\n+ * or more contributor license agreements. See the NOTICE file\n+ * distributed with this work for additional information\n+ * regarding copyright ownership. The ASF licenses this file\n+ * to you under the Apache License, Version 2.0 (the\n+ * \"License\"); you may not use this file except in compliance\n+ * with the License. You may obtain a copy of the License at\n+ *\n+ * http://www.apache.org/licenses/LICENSE-2.0\n+ *\n+ * Unless required by applicable law or agreed to in writing,\n+ * software distributed under the License is distributed on an\n+ * \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n+ * KIND, either express or implied. See the License for the\n+ * specific language governing permissions and limitations\n+ * under the License.\n+ */\n+\n+/*!\n+ * \\file dnnl_reduce-inl.h\n+ */\n+\n+#ifndef MXNET_OPERATOR_NN_DNNL_DNNL_REDUCE_INL_H_\n+#define MXNET_OPERATOR_NN_DNNL_DNNL_REDUCE_INL_H_\n+\n+#if MXNET_USE_ONEDNN == 1\n+#include \n+\n+#include \"./dnnl_base-inl.h\"\n+#include \"./dnnl_ops-inl.h\"\n+\n+namespace mxnet {\n+namespace op {\n+\n+using reduce_fwd_t = dnnl::reduction;\n+using reduce_fwd_pd_t = dnnl::reduction::primitive_desc;\n+struct NumpyReduceAxesParam;\n+struct ReduceAxesParam;\n+class DNNLReduceFwd {\n+ public:\n+ struct Tensors {\n+ Tensors(const NDArray& data, const NDArray& out);\n+\n+ const NDArray& data;\n+ const NDArray& out;\n+ };\n+\n+ static DNNLReduceFwd GetCached(const NumpyReduceAxesParam& param,\n+ const Tensors& tensors,\n+ const bool is_train,\n+ const dnnl::algorithm reduction_alg);\n+\n+ static reduce_fwd_pd_t GetReduceFwdPd(const dnnl::memory::desc& input_md,\n+ const dnnl::memory::desc& output_md,\n+ const dnnl::algorithm reduction_alg);\n+\n+ DNNLReduceFwd(const NumpyReduceAxesParam& param,\n+ const Tensors& tensors,\n+ const bool is_train,\n+ const dnnl::algorithm reduction_alg);\n+ void Execute(const Tensors& tensors) const;\n+\n+ private:\n+ std::shared_ptr reduce_pd;\n+ std::shared_ptr reduce_fwd;\n+};\n+\n+template \n+NumpyReduceAxesParam ConvertParamsToNumpy(const T& original_param,", - "comment_created_at": "2021-11-19T08:57:10+00:00", - "comment_author": "bgawrych", - "comment_body": "I've changed name to ConvertReduceParamsToNumpy and also in other file ConvertTransposeParamsToNumpy", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "693907785", - "pr_number": 20474, - "pr_file": "src/initialize.cc", - "created_at": "2021-08-23T12:00:11+00:00", - "commented_code": "cpu_worker_nthreads_(dmlc::GetEnv(\"MXNET_CPU_WORKER_NTHREADS\", 1)),\n mp_cv_num_threads_(dmlc::GetEnv(\"MXNET_MP_OPENCV_NUM_THREADS\", 0)) {\n dmlc::InitLogging(\"mxnet\");\n\n#if !(defined(_WIN32) || defined(_WIN64) || defined(__WINDOWS__))", - "repo_full_name": "apache/mxnet", - "discussion_comments": [ - { - "comment_id": "693907785", - "repo_full_name": "apache/mxnet", - "pr_number": 20474, - "pr_file": "src/initialize.cc", - "discussion_id": "693907785", - "commented_code": "@@ -93,6 +95,24 @@ LibraryInitializer::LibraryInitializer()\n cpu_worker_nthreads_(dmlc::GetEnv(\"MXNET_CPU_WORKER_NTHREADS\", 1)),\n mp_cv_num_threads_(dmlc::GetEnv(\"MXNET_MP_OPENCV_NUM_THREADS\", 0)) {\n dmlc::InitLogging(\"mxnet\");\n+\n+#if !(defined(_WIN32) || defined(_WIN64) || defined(__WINDOWS__))", - "comment_created_at": "2021-08-23T12:00:11+00:00", - "comment_author": "bgawrych", - "comment_body": "maybe it should be moved to a function like install_pthread_atfork_handlers - it probably will be more descriptive on what's happening here", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "643227554", - "pr_number": 19896, - "pr_file": "src/operator/contrib/adaptive_avg_pooling.cc", - "created_at": "2021-06-01T15:45:02+00:00", - "commented_code": "}\n}\n\n#if MXNET_USE_MKLDNN == 1\nbool SupportMKLDNNAveragePooling(const NDArray &in_data,\n const NDArray &out_data) {\n for (int64_t idx = 2; idx < in_data.shape().ndim(); ++idx) {\n const int s1 = in_data.shape()[idx];\n const int s2 = out_data.shape()[idx];\n if (s2 == 0) {\n return false;\n }\n if (s1 % s2 != 0) {\n return false;\n }\n }\n const int IH = in_data.shape()[2];\n const int IW = in_data.shape()[3];\n const int OH = out_data.shape()[2];\n const int OW = out_data.shape()[3];\n\n const int strides_1 = floor((IH << 1) / OH) - floor(IH / OH);\n const int strides_2 = floor((IW << 1) / OW) - floor(IW / OW);", - "repo_full_name": "apache/mxnet", - "discussion_comments": [ - { - "comment_id": "643227554", - "repo_full_name": "apache/mxnet", - "pr_number": 19896, - "pr_file": "src/operator/contrib/adaptive_avg_pooling.cc", - "discussion_id": "643227554", - "commented_code": "@@ -197,6 +198,81 @@ num_threads(engine::OpenMP::Get()->GetRecommendedOMPThreadCount())\n }\n }\n \n+#if MXNET_USE_MKLDNN == 1\n+bool SupportMKLDNNAveragePooling(const NDArray &in_data,\n+ const NDArray &out_data) {\n+ for (int64_t idx = 2; idx < in_data.shape().ndim(); ++idx) {\n+ const int s1 = in_data.shape()[idx];\n+ const int s2 = out_data.shape()[idx];\n+ if (s2 == 0) {\n+ return false;\n+ }\n+ if (s1 % s2 != 0) {\n+ return false;\n+ }\n+ }\n+ const int IH = in_data.shape()[2];\n+ const int IW = in_data.shape()[3];\n+ const int OH = out_data.shape()[2];\n+ const int OW = out_data.shape()[3];\n+\n+ const int strides_1 = floor((IH << 1) / OH) - floor(IH / OH);\n+ const int strides_2 = floor((IW << 1) / OW) - floor(IW / OW);", - "comment_created_at": "2021-06-01T15:45:02+00:00", - "comment_author": "akarbown", - "comment_body": "Maybe worth considering to rename 'strides_1, strides_2' to 'strides_H/strides_W'?", - "pr_file_module": null - }, - { - "comment_id": "650936307", - "repo_full_name": "apache/mxnet", - "pr_number": 19896, - "pr_file": "src/operator/contrib/adaptive_avg_pooling.cc", - "discussion_id": "643227554", - "commented_code": "@@ -197,6 +198,81 @@ num_threads(engine::OpenMP::Get()->GetRecommendedOMPThreadCount())\n }\n }\n \n+#if MXNET_USE_MKLDNN == 1\n+bool SupportMKLDNNAveragePooling(const NDArray &in_data,\n+ const NDArray &out_data) {\n+ for (int64_t idx = 2; idx < in_data.shape().ndim(); ++idx) {\n+ const int s1 = in_data.shape()[idx];\n+ const int s2 = out_data.shape()[idx];\n+ if (s2 == 0) {\n+ return false;\n+ }\n+ if (s1 % s2 != 0) {\n+ return false;\n+ }\n+ }\n+ const int IH = in_data.shape()[2];\n+ const int IW = in_data.shape()[3];\n+ const int OH = out_data.shape()[2];\n+ const int OW = out_data.shape()[3];\n+\n+ const int strides_1 = floor((IH << 1) / OH) - floor(IH / OH);\n+ const int strides_2 = floor((IW << 1) / OW) - floor(IW / OW);", - "comment_created_at": "2021-06-14T13:15:33+00:00", - "comment_author": "mozga-intel", - "comment_body": "Done", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "663079191", - "pr_number": 20302, - "pr_file": "src/operator/quantization/quantization_utils.h", - "created_at": "2021-07-02T15:05:08+00:00", - "commented_code": "return broadcast::ReduceWorkspaceSize(s, *dst_shape, kWriteTo, *src_shape);\n}\n\nenum QuantizeOutType { kAuto = 0, kInt8, kUint8 };\nenum QuantizeOutType { qAuto = 0, qInt8, qUint8 };", - "repo_full_name": "apache/mxnet", - "discussion_comments": [ - { - "comment_id": "663079191", - "repo_full_name": "apache/mxnet", - "pr_number": 20302, - "pr_file": "src/operator/quantization/quantization_utils.h", - "discussion_id": "663079191", - "commented_code": "@@ -187,22 +187,22 @@ inline size_t ConfigReduce(mshadow::Stream* s,\n return broadcast::ReduceWorkspaceSize(s, *dst_shape, kWriteTo, *src_shape);\n }\n \n-enum QuantizeOutType { kAuto = 0, kInt8, kUint8 };\n+enum QuantizeOutType { qAuto = 0, qInt8, qUint8 };", - "comment_created_at": "2021-07-02T15:05:08+00:00", - "comment_author": "szha", - "comment_body": "just curious about the naming, why the q prefix?", - "pr_file_module": null - }, - { - "comment_id": "663082456", - "repo_full_name": "apache/mxnet", - "pr_number": 20302, - "pr_file": "src/operator/quantization/quantization_utils.h", - "discussion_id": "663079191", - "commented_code": "@@ -187,22 +187,22 @@ inline size_t ConfigReduce(mshadow::Stream* s,\n return broadcast::ReduceWorkspaceSize(s, *dst_shape, kWriteTo, *src_shape);\n }\n \n-enum QuantizeOutType { kAuto = 0, kInt8, kUint8 };\n+enum QuantizeOutType { qAuto = 0, qInt8, qUint8 };", - "comment_created_at": "2021-07-02T15:09:57+00:00", - "comment_author": "sfraczek", - "comment_body": "Because there is already name mshadow::kInt8 and mshadow::kUint8 which I was intending to use but when I wrote just kUint8 without prefix `mshadow::` in front of it this enum got used instead and it was hard to detect error.Adding q makes a clear distinction at low cost. I think it's for the better since this enum is scarcely used anyway.", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/mxnet-use-named-constants.json b/_reviewers/mxnet-use-named-constants.json new file mode 100644 index 0000000..1dde333 --- /dev/null +++ b/_reviewers/mxnet-use-named-constants.json @@ -0,0 +1,58 @@ +[ + { + "discussion_id": "823738415", + "pr_number": 20913, + "pr_file": "src/ndarray/ndarray.cc", + "created_at": "2022-03-10T13:51:24+00:00", + "commented_code": "dnnl::memory::dims dims;\n // These are shapes supprted by DNNL.\n if (shape.ndim() >= 1 && shape.ndim() <= 6) {\n if (shape.ndim() >= 1 && shape.ndim() <= 12) {", + "repo_full_name": "apache/mxnet", + "discussion_comments": [ + { + "comment_id": "823738415", + "repo_full_name": "apache/mxnet", + "pr_number": 20913, + "pr_file": "src/ndarray/ndarray.cc", + "discussion_id": "823738415", + "commented_code": "@@ -605,36 +605,14 @@ void NDArray::Chunk::SetMKLMem(const mxnet::TShape& shape, int dtype) {\n \n dnnl::memory::dims dims;\n // These are shapes supprted by DNNL.\n- if (shape.ndim() >= 1 && shape.ndim() <= 6) {\n+ if (shape.ndim() >= 1 && shape.ndim() <= 12) {", + "comment_created_at": "2022-03-10T13:51:24+00:00", + "comment_author": "bgawrych", + "comment_body": "Can '12' be named by const variable? e.g. \"const int MAX_ONEDNN_DIMS\"", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "669908725", + "pr_number": 20430, + "pr_file": "src/operator/quantization/quantize_graph_pass.h", + "created_at": "2021-07-14T19:53:34+00:00", + "commented_code": "/*\n * Licensed to the Apache Software Foundation (ASF) under one\n * or more contributor license agreements. See the NOTICE file\n * distributed with this work for additional information\n * regarding copyright ownership. The ASF licenses this file\n * to you under the Apache License, Version 2.0 (the\n * \"License\"); you may not use this file except in compliance\n * with the License. You may obtain a copy of the License at\n *\n * http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing,\n * software distributed under the License is distributed on an\n * \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n * KIND, either express or implied. See the License for the\n * specific language governing permissions and limitations\n * under the License.\n */\n\n/*!\n * Copyright (c) 2021 by Contributors\n * \\file quantize_graph_pass.h\n * \\brief\n */\n#ifndef MXNET_OPERATOR_QUANTIZATION_QUANTIZE_GRAPH_PASS_H_\n#define MXNET_OPERATOR_QUANTIZATION_QUANTIZE_GRAPH_PASS_H_\n\n#include \n#include \n#include \n#include \n#include \n#include \n#include \n#include \n#include \"quantize_v2-inl.h\"\n#include \"../nn/mkldnn/mkldnn_fully_connected-inl.h\"\n#include \"../../common/utils.h\"\n\nnamespace mxnet {\nnamespace op {\n\nusing nnvm::Symbol;\nusing nnvm::Node;\nusing nnvm::ObjectPtr;\nusing nnvm::NodeEntry;\nusing nnvm::Graph;\n\ninline ObjectPtr CreateNode(std::string op_name, std::string node_name) {\n ObjectPtr node = Node::Create();\n node->attrs.name = node_name;\n if (op_name == \"nullptr\") {\n node->attrs.op = nullptr;\n // ugly workaround because VariableParam is not exposed\n node->attrs.parsed =\n nnvm::Symbol::CreateVariable(node->attrs.name).outputs[0].node->attrs.parsed;\n } else {\n node->attrs.op = Op::Get(op_name);\n }\n return node;\n}\n\ntemplate \nstatic inline bool IsOneDNNFullyConnected(const ObjectPtr& n) {\n#if MXNET_USE_MKLDNN == 1\n if (n->op() == Op::Get(\"_sg_mkldnn_fully_connected\")) {\n auto const& param = nnvm::get(n->attrs.parsed);\n if (!(param.mkldnn_param.channel_wise_quantize.has_value() &&\n param.mkldnn_param.channel_wise_quantize.value())) {\n return !require_bias || (param.default_param.no_bias == false &&\n n->inputs[2].node->is_variable());", + "repo_full_name": "apache/mxnet", + "discussion_comments": [ + { + "comment_id": "669908725", + "repo_full_name": "apache/mxnet", + "pr_number": 20430, + "pr_file": "src/operator/quantization/quantize_graph_pass.h", + "discussion_id": "669908725", + "commented_code": "@@ -0,0 +1,155 @@\n+/*\n+ * Licensed to the Apache Software Foundation (ASF) under one\n+ * or more contributor license agreements. See the NOTICE file\n+ * distributed with this work for additional information\n+ * regarding copyright ownership. The ASF licenses this file\n+ * to you under the Apache License, Version 2.0 (the\n+ * \"License\"); you may not use this file except in compliance\n+ * with the License. You may obtain a copy of the License at\n+ *\n+ * http://www.apache.org/licenses/LICENSE-2.0\n+ *\n+ * Unless required by applicable law or agreed to in writing,\n+ * software distributed under the License is distributed on an\n+ * \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n+ * KIND, either express or implied. See the License for the\n+ * specific language governing permissions and limitations\n+ * under the License.\n+ */\n+\n+/*!\n+ * Copyright (c) 2021 by Contributors\n+ * \\file quantize_graph_pass.h\n+ * \\brief\n+ */\n+#ifndef MXNET_OPERATOR_QUANTIZATION_QUANTIZE_GRAPH_PASS_H_\n+#define MXNET_OPERATOR_QUANTIZATION_QUANTIZE_GRAPH_PASS_H_\n+\n+#include \n+#include \n+#include \n+#include \n+#include \n+#include \n+#include \n+#include \n+#include \"quantize_v2-inl.h\"\n+#include \"../nn/mkldnn/mkldnn_fully_connected-inl.h\"\n+#include \"../../common/utils.h\"\n+\n+namespace mxnet {\n+namespace op {\n+\n+using nnvm::Symbol;\n+using nnvm::Node;\n+using nnvm::ObjectPtr;\n+using nnvm::NodeEntry;\n+using nnvm::Graph;\n+\n+inline ObjectPtr CreateNode(std::string op_name, std::string node_name) {\n+ ObjectPtr node = Node::Create();\n+ node->attrs.name = node_name;\n+ if (op_name == \"nullptr\") {\n+ node->attrs.op = nullptr;\n+ // ugly workaround because VariableParam is not exposed\n+ node->attrs.parsed =\n+ nnvm::Symbol::CreateVariable(node->attrs.name).outputs[0].node->attrs.parsed;\n+ } else {\n+ node->attrs.op = Op::Get(op_name);\n+ }\n+ return node;\n+}\n+\n+template \n+static inline bool IsOneDNNFullyConnected(const ObjectPtr& n) {\n+#if MXNET_USE_MKLDNN == 1\n+ if (n->op() == Op::Get(\"_sg_mkldnn_fully_connected\")) {\n+ auto const& param = nnvm::get(n->attrs.parsed);\n+ if (!(param.mkldnn_param.channel_wise_quantize.has_value() &&\n+ param.mkldnn_param.channel_wise_quantize.value())) {\n+ return !require_bias || (param.default_param.no_bias == false &&\n+ n->inputs[2].node->is_variable());", + "comment_created_at": "2021-07-14T19:53:34+00:00", + "comment_author": "anko-intel", + "comment_body": "instead of \"2\" it could be idx.bias", + "pr_file_module": null + }, + { + "comment_id": "670386574", + "repo_full_name": "apache/mxnet", + "pr_number": 20430, + "pr_file": "src/operator/quantization/quantize_graph_pass.h", + "discussion_id": "669908725", + "commented_code": "@@ -0,0 +1,155 @@\n+/*\n+ * Licensed to the Apache Software Foundation (ASF) under one\n+ * or more contributor license agreements. See the NOTICE file\n+ * distributed with this work for additional information\n+ * regarding copyright ownership. The ASF licenses this file\n+ * to you under the Apache License, Version 2.0 (the\n+ * \"License\"); you may not use this file except in compliance\n+ * with the License. You may obtain a copy of the License at\n+ *\n+ * http://www.apache.org/licenses/LICENSE-2.0\n+ *\n+ * Unless required by applicable law or agreed to in writing,\n+ * software distributed under the License is distributed on an\n+ * \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n+ * KIND, either express or implied. See the License for the\n+ * specific language governing permissions and limitations\n+ * under the License.\n+ */\n+\n+/*!\n+ * Copyright (c) 2021 by Contributors\n+ * \\file quantize_graph_pass.h\n+ * \\brief\n+ */\n+#ifndef MXNET_OPERATOR_QUANTIZATION_QUANTIZE_GRAPH_PASS_H_\n+#define MXNET_OPERATOR_QUANTIZATION_QUANTIZE_GRAPH_PASS_H_\n+\n+#include \n+#include \n+#include \n+#include \n+#include \n+#include \n+#include \n+#include \n+#include \"quantize_v2-inl.h\"\n+#include \"../nn/mkldnn/mkldnn_fully_connected-inl.h\"\n+#include \"../../common/utils.h\"\n+\n+namespace mxnet {\n+namespace op {\n+\n+using nnvm::Symbol;\n+using nnvm::Node;\n+using nnvm::ObjectPtr;\n+using nnvm::NodeEntry;\n+using nnvm::Graph;\n+\n+inline ObjectPtr CreateNode(std::string op_name, std::string node_name) {\n+ ObjectPtr node = Node::Create();\n+ node->attrs.name = node_name;\n+ if (op_name == \"nullptr\") {\n+ node->attrs.op = nullptr;\n+ // ugly workaround because VariableParam is not exposed\n+ node->attrs.parsed =\n+ nnvm::Symbol::CreateVariable(node->attrs.name).outputs[0].node->attrs.parsed;\n+ } else {\n+ node->attrs.op = Op::Get(op_name);\n+ }\n+ return node;\n+}\n+\n+template \n+static inline bool IsOneDNNFullyConnected(const ObjectPtr& n) {\n+#if MXNET_USE_MKLDNN == 1\n+ if (n->op() == Op::Get(\"_sg_mkldnn_fully_connected\")) {\n+ auto const& param = nnvm::get(n->attrs.parsed);\n+ if (!(param.mkldnn_param.channel_wise_quantize.has_value() &&\n+ param.mkldnn_param.channel_wise_quantize.value())) {\n+ return !require_bias || (param.default_param.no_bias == false &&\n+ n->inputs[2].node->is_variable());", + "comment_created_at": "2021-07-15T11:45:23+00:00", + "comment_author": "sfraczek", + "comment_body": "ok", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/mxnet-use-named-constants.md b/_reviewers/mxnet-use-named-constants.md index e3af750..a65ede8 100644 --- a/_reviewers/mxnet-use-named-constants.md +++ b/_reviewers/mxnet-use-named-constants.md @@ -42,63 +42,3 @@ if (n->inputs[BIAS_INDEX].node->is_variable()) { ``` This practice makes code more maintainable as dimensions and tensor layouts evolve during AI model optimization efforts. - - -[ - { - "discussion_id": "823738415", - "pr_number": 20913, - "pr_file": "src/ndarray/ndarray.cc", - "created_at": "2022-03-10T13:51:24+00:00", - "commented_code": "dnnl::memory::dims dims;\n // These are shapes supprted by DNNL.\n if (shape.ndim() >= 1 && shape.ndim() <= 6) {\n if (shape.ndim() >= 1 && shape.ndim() <= 12) {", - "repo_full_name": "apache/mxnet", - "discussion_comments": [ - { - "comment_id": "823738415", - "repo_full_name": "apache/mxnet", - "pr_number": 20913, - "pr_file": "src/ndarray/ndarray.cc", - "discussion_id": "823738415", - "commented_code": "@@ -605,36 +605,14 @@ void NDArray::Chunk::SetMKLMem(const mxnet::TShape& shape, int dtype) {\n \n dnnl::memory::dims dims;\n // These are shapes supprted by DNNL.\n- if (shape.ndim() >= 1 && shape.ndim() <= 6) {\n+ if (shape.ndim() >= 1 && shape.ndim() <= 12) {", - "comment_created_at": "2022-03-10T13:51:24+00:00", - "comment_author": "bgawrych", - "comment_body": "Can '12' be named by const variable? e.g. \"const int MAX_ONEDNN_DIMS\"", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "669908725", - "pr_number": 20430, - "pr_file": "src/operator/quantization/quantize_graph_pass.h", - "created_at": "2021-07-14T19:53:34+00:00", - "commented_code": "/*\n * Licensed to the Apache Software Foundation (ASF) under one\n * or more contributor license agreements. See the NOTICE file\n * distributed with this work for additional information\n * regarding copyright ownership. The ASF licenses this file\n * to you under the Apache License, Version 2.0 (the\n * \"License\"); you may not use this file except in compliance\n * with the License. You may obtain a copy of the License at\n *\n * http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing,\n * software distributed under the License is distributed on an\n * \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n * KIND, either express or implied. See the License for the\n * specific language governing permissions and limitations\n * under the License.\n */\n\n/*!\n * Copyright (c) 2021 by Contributors\n * \\file quantize_graph_pass.h\n * \\brief\n */\n#ifndef MXNET_OPERATOR_QUANTIZATION_QUANTIZE_GRAPH_PASS_H_\n#define MXNET_OPERATOR_QUANTIZATION_QUANTIZE_GRAPH_PASS_H_\n\n#include \n#include \n#include \n#include \n#include \n#include \n#include \n#include \n#include \"quantize_v2-inl.h\"\n#include \"../nn/mkldnn/mkldnn_fully_connected-inl.h\"\n#include \"../../common/utils.h\"\n\nnamespace mxnet {\nnamespace op {\n\nusing nnvm::Symbol;\nusing nnvm::Node;\nusing nnvm::ObjectPtr;\nusing nnvm::NodeEntry;\nusing nnvm::Graph;\n\ninline ObjectPtr CreateNode(std::string op_name, std::string node_name) {\n ObjectPtr node = Node::Create();\n node->attrs.name = node_name;\n if (op_name == \"nullptr\") {\n node->attrs.op = nullptr;\n // ugly workaround because VariableParam is not exposed\n node->attrs.parsed =\n nnvm::Symbol::CreateVariable(node->attrs.name).outputs[0].node->attrs.parsed;\n } else {\n node->attrs.op = Op::Get(op_name);\n }\n return node;\n}\n\ntemplate \nstatic inline bool IsOneDNNFullyConnected(const ObjectPtr& n) {\n#if MXNET_USE_MKLDNN == 1\n if (n->op() == Op::Get(\"_sg_mkldnn_fully_connected\")) {\n auto const& param = nnvm::get(n->attrs.parsed);\n if (!(param.mkldnn_param.channel_wise_quantize.has_value() &&\n param.mkldnn_param.channel_wise_quantize.value())) {\n return !require_bias || (param.default_param.no_bias == false &&\n n->inputs[2].node->is_variable());", - "repo_full_name": "apache/mxnet", - "discussion_comments": [ - { - "comment_id": "669908725", - "repo_full_name": "apache/mxnet", - "pr_number": 20430, - "pr_file": "src/operator/quantization/quantize_graph_pass.h", - "discussion_id": "669908725", - "commented_code": "@@ -0,0 +1,155 @@\n+/*\n+ * Licensed to the Apache Software Foundation (ASF) under one\n+ * or more contributor license agreements. See the NOTICE file\n+ * distributed with this work for additional information\n+ * regarding copyright ownership. The ASF licenses this file\n+ * to you under the Apache License, Version 2.0 (the\n+ * \"License\"); you may not use this file except in compliance\n+ * with the License. You may obtain a copy of the License at\n+ *\n+ * http://www.apache.org/licenses/LICENSE-2.0\n+ *\n+ * Unless required by applicable law or agreed to in writing,\n+ * software distributed under the License is distributed on an\n+ * \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n+ * KIND, either express or implied. See the License for the\n+ * specific language governing permissions and limitations\n+ * under the License.\n+ */\n+\n+/*!\n+ * Copyright (c) 2021 by Contributors\n+ * \\file quantize_graph_pass.h\n+ * \\brief\n+ */\n+#ifndef MXNET_OPERATOR_QUANTIZATION_QUANTIZE_GRAPH_PASS_H_\n+#define MXNET_OPERATOR_QUANTIZATION_QUANTIZE_GRAPH_PASS_H_\n+\n+#include \n+#include \n+#include \n+#include \n+#include \n+#include \n+#include \n+#include \n+#include \"quantize_v2-inl.h\"\n+#include \"../nn/mkldnn/mkldnn_fully_connected-inl.h\"\n+#include \"../../common/utils.h\"\n+\n+namespace mxnet {\n+namespace op {\n+\n+using nnvm::Symbol;\n+using nnvm::Node;\n+using nnvm::ObjectPtr;\n+using nnvm::NodeEntry;\n+using nnvm::Graph;\n+\n+inline ObjectPtr CreateNode(std::string op_name, std::string node_name) {\n+ ObjectPtr node = Node::Create();\n+ node->attrs.name = node_name;\n+ if (op_name == \"nullptr\") {\n+ node->attrs.op = nullptr;\n+ // ugly workaround because VariableParam is not exposed\n+ node->attrs.parsed =\n+ nnvm::Symbol::CreateVariable(node->attrs.name).outputs[0].node->attrs.parsed;\n+ } else {\n+ node->attrs.op = Op::Get(op_name);\n+ }\n+ return node;\n+}\n+\n+template \n+static inline bool IsOneDNNFullyConnected(const ObjectPtr& n) {\n+#if MXNET_USE_MKLDNN == 1\n+ if (n->op() == Op::Get(\"_sg_mkldnn_fully_connected\")) {\n+ auto const& param = nnvm::get(n->attrs.parsed);\n+ if (!(param.mkldnn_param.channel_wise_quantize.has_value() &&\n+ param.mkldnn_param.channel_wise_quantize.value())) {\n+ return !require_bias || (param.default_param.no_bias == false &&\n+ n->inputs[2].node->is_variable());", - "comment_created_at": "2021-07-14T19:53:34+00:00", - "comment_author": "anko-intel", - "comment_body": "instead of \"2\" it could be idx.bias", - "pr_file_module": null - }, - { - "comment_id": "670386574", - "repo_full_name": "apache/mxnet", - "pr_number": 20430, - "pr_file": "src/operator/quantization/quantize_graph_pass.h", - "discussion_id": "669908725", - "commented_code": "@@ -0,0 +1,155 @@\n+/*\n+ * Licensed to the Apache Software Foundation (ASF) under one\n+ * or more contributor license agreements. See the NOTICE file\n+ * distributed with this work for additional information\n+ * regarding copyright ownership. The ASF licenses this file\n+ * to you under the Apache License, Version 2.0 (the\n+ * \"License\"); you may not use this file except in compliance\n+ * with the License. You may obtain a copy of the License at\n+ *\n+ * http://www.apache.org/licenses/LICENSE-2.0\n+ *\n+ * Unless required by applicable law or agreed to in writing,\n+ * software distributed under the License is distributed on an\n+ * \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n+ * KIND, either express or implied. See the License for the\n+ * specific language governing permissions and limitations\n+ * under the License.\n+ */\n+\n+/*!\n+ * Copyright (c) 2021 by Contributors\n+ * \\file quantize_graph_pass.h\n+ * \\brief\n+ */\n+#ifndef MXNET_OPERATOR_QUANTIZATION_QUANTIZE_GRAPH_PASS_H_\n+#define MXNET_OPERATOR_QUANTIZATION_QUANTIZE_GRAPH_PASS_H_\n+\n+#include \n+#include \n+#include \n+#include \n+#include \n+#include \n+#include \n+#include \n+#include \"quantize_v2-inl.h\"\n+#include \"../nn/mkldnn/mkldnn_fully_connected-inl.h\"\n+#include \"../../common/utils.h\"\n+\n+namespace mxnet {\n+namespace op {\n+\n+using nnvm::Symbol;\n+using nnvm::Node;\n+using nnvm::ObjectPtr;\n+using nnvm::NodeEntry;\n+using nnvm::Graph;\n+\n+inline ObjectPtr CreateNode(std::string op_name, std::string node_name) {\n+ ObjectPtr node = Node::Create();\n+ node->attrs.name = node_name;\n+ if (op_name == \"nullptr\") {\n+ node->attrs.op = nullptr;\n+ // ugly workaround because VariableParam is not exposed\n+ node->attrs.parsed =\n+ nnvm::Symbol::CreateVariable(node->attrs.name).outputs[0].node->attrs.parsed;\n+ } else {\n+ node->attrs.op = Op::Get(op_name);\n+ }\n+ return node;\n+}\n+\n+template \n+static inline bool IsOneDNNFullyConnected(const ObjectPtr& n) {\n+#if MXNET_USE_MKLDNN == 1\n+ if (n->op() == Op::Get(\"_sg_mkldnn_fully_connected\")) {\n+ auto const& param = nnvm::get(n->attrs.parsed);\n+ if (!(param.mkldnn_param.channel_wise_quantize.has_value() &&\n+ param.mkldnn_param.channel_wise_quantize.value())) {\n+ return !require_bias || (param.default_param.no_bias == false &&\n+ n->inputs[2].node->is_variable());", - "comment_created_at": "2021-07-15T11:45:23+00:00", - "comment_author": "sfraczek", - "comment_body": "ok", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/mxnet-use-pytest-parameterization.json b/_reviewers/mxnet-use-pytest-parameterization.json new file mode 100644 index 0000000..b8bdd95 --- /dev/null +++ b/_reviewers/mxnet-use-pytest-parameterization.json @@ -0,0 +1,46 @@ +[ + { + "discussion_id": "739428844", + "pr_number": 20692, + "pr_file": "tests/python/unittest/test_numpy_op.py", + "created_at": "2021-10-29T17:48:25+00:00", + "commented_code": "@use_np\ndef test_np_argmin_argmax():\n workloads = [\n ((), 0, False),\n ((), -1, False),\n ((), 1, True),\n ((5, 3), None, False),\n ((5, 3), -1, False),\n ((5, 3), 1, False),\n ((5, 3), 3, True),\n ((5, 0, 3), 0, False),\n ((5, 0, 3), -1, False),\n ((5, 0, 3), None, True),\n ((5, 0, 3), 1, True),\n ((3, 5, 7), None, False),\n ((3, 5, 7), 0, False),\n ((3, 5, 7), 1, False),\n ((3, 5, 7), 2, False),\n ((3, 5, 7, 9, 11), -3, False),\n ]\n dtypes = ['float16', 'float32', 'float64', 'bool', 'int32']\n ops = ['argmin', 'argmax']\n\n@pytest.mark.parametrize('shape,axis,throw_exception', [\n ((), 0, False),\n ((), -1, False),\n ((), 1, True),\n ((5, 3), None, False),\n ((5, 3), -1, False),\n ((5, 3), 1, False),\n ((5, 3), 3, True),\n ((5, 0, 3), 0, False),\n ((5, 0, 3), -1, False),\n ((5, 0, 3), None, True),\n ((5, 0, 3), 1, True),\n ((3, 5, 7), None, False),\n ((3, 5, 7), 0, False),\n ((3, 5, 7), 1, False),\n ((3, 5, 7), 2, False),\n ((3, 5, 7, 9, 11), -3, False),\n])\n@pytest.mark.parametrize('dtype', ['float16', 'float32', 'float64', 'bool', 'int32'])\n@pytest.mark.parametrize('op_name', ['argmin', 'argmax'])\n@pytest.mark.parametrize('keepdims', [True, False])\ndef test_np_argmin_argmax(shape, axis, throw_exception, dtype, op_name, keepdims):\n class TestArgExtreme(HybridBlock):\n def __init__(self, op_name, axis=None):\n def __init__(self, op_name, axis=None, keepdims=False):\n super(TestArgExtreme, self).__init__()\n self._op_name = op_name\n self._axis = axis\n self.keepdims = keepdims\n\n def forward(self, x):\n return getattr(x, self._op_name)(self._axis)\n\n for op_name in ops:\n for shape, axis, throw_exception in workloads:\n for dtype in dtypes:\n a = np.random.uniform(low=0, high=100, size=shape).astype(dtype)\n if throw_exception:\n # Cannot use assert_exception because sometimes the main thread\n # proceeds to `assert False` before the exception is thrown\n # in the worker thread. Have to use mx.nd.waitall() here\n # to block the main thread.\n try:\n getattr(np, op_name)(a, axis)\n mx.nd.waitall()\n assert False\n except mx.MXNetError:\n pass\n else:\n mx_ret = getattr(np, op_name)(a, axis=axis)\n np_ret = getattr(onp, op_name)(a.asnumpy(), axis=axis)\n assert mx_ret.dtype == np_ret.dtype\n assert same(mx_ret.asnumpy(), np_ret)\n return getattr(x, self._op_name)(self._axis, keepdims=self.keepdims)\n\n a = np.random.uniform(low=0, high=100, size=shape).astype(dtype)\n if throw_exception:\n # Cannot use assert_exception because sometimes the main thread\n # proceeds to `assert False` before the exception is thrown\n # in the worker thread. Have to use mx.nd.waitall() here\n # to block the main thread.\n try:\n getattr(np, op_name)(a, axis)\n mx.nd.waitall()\n assert False\n except mx.MXNetError:\n pass", + "repo_full_name": "apache/mxnet", + "discussion_comments": [ + { + "comment_id": "739428844", + "repo_full_name": "apache/mxnet", + "pr_number": 20692, + "pr_file": "tests/python/unittest/test_numpy_op.py", + "discussion_id": "739428844", + "commented_code": "@@ -4437,73 +4437,84 @@ def GetDimSize(shp, axis):\n \n \n @use_np\n-def test_np_argmin_argmax():\n- workloads = [\n- ((), 0, False),\n- ((), -1, False),\n- ((), 1, True),\n- ((5, 3), None, False),\n- ((5, 3), -1, False),\n- ((5, 3), 1, False),\n- ((5, 3), 3, True),\n- ((5, 0, 3), 0, False),\n- ((5, 0, 3), -1, False),\n- ((5, 0, 3), None, True),\n- ((5, 0, 3), 1, True),\n- ((3, 5, 7), None, False),\n- ((3, 5, 7), 0, False),\n- ((3, 5, 7), 1, False),\n- ((3, 5, 7), 2, False),\n- ((3, 5, 7, 9, 11), -3, False),\n- ]\n- dtypes = ['float16', 'float32', 'float64', 'bool', 'int32']\n- ops = ['argmin', 'argmax']\n-\n+@pytest.mark.parametrize('shape,axis,throw_exception', [\n+ ((), 0, False),\n+ ((), -1, False),\n+ ((), 1, True),\n+ ((5, 3), None, False),\n+ ((5, 3), -1, False),\n+ ((5, 3), 1, False),\n+ ((5, 3), 3, True),\n+ ((5, 0, 3), 0, False),\n+ ((5, 0, 3), -1, False),\n+ ((5, 0, 3), None, True),\n+ ((5, 0, 3), 1, True),\n+ ((3, 5, 7), None, False),\n+ ((3, 5, 7), 0, False),\n+ ((3, 5, 7), 1, False),\n+ ((3, 5, 7), 2, False),\n+ ((3, 5, 7, 9, 11), -3, False),\n+])\n+@pytest.mark.parametrize('dtype', ['float16', 'float32', 'float64', 'bool', 'int32'])\n+@pytest.mark.parametrize('op_name', ['argmin', 'argmax'])\n+@pytest.mark.parametrize('keepdims', [True, False])\n+def test_np_argmin_argmax(shape, axis, throw_exception, dtype, op_name, keepdims):\n class TestArgExtreme(HybridBlock):\n- def __init__(self, op_name, axis=None):\n+ def __init__(self, op_name, axis=None, keepdims=False):\n super(TestArgExtreme, self).__init__()\n self._op_name = op_name\n self._axis = axis\n+ self.keepdims = keepdims\n \n def forward(self, x):\n- return getattr(x, self._op_name)(self._axis)\n-\n- for op_name in ops:\n- for shape, axis, throw_exception in workloads:\n- for dtype in dtypes:\n- a = np.random.uniform(low=0, high=100, size=shape).astype(dtype)\n- if throw_exception:\n- # Cannot use assert_exception because sometimes the main thread\n- # proceeds to `assert False` before the exception is thrown\n- # in the worker thread. Have to use mx.nd.waitall() here\n- # to block the main thread.\n- try:\n- getattr(np, op_name)(a, axis)\n- mx.nd.waitall()\n- assert False\n- except mx.MXNetError:\n- pass\n- else:\n- mx_ret = getattr(np, op_name)(a, axis=axis)\n- np_ret = getattr(onp, op_name)(a.asnumpy(), axis=axis)\n- assert mx_ret.dtype == np_ret.dtype\n- assert same(mx_ret.asnumpy(), np_ret)\n+ return getattr(x, self._op_name)(self._axis, keepdims=self.keepdims)\n+\n+ a = np.random.uniform(low=0, high=100, size=shape).astype(dtype)\n+ if throw_exception:\n+ # Cannot use assert_exception because sometimes the main thread\n+ # proceeds to `assert False` before the exception is thrown\n+ # in the worker thread. Have to use mx.nd.waitall() here\n+ # to block the main thread.\n+ try:\n+ getattr(np, op_name)(a, axis)\n+ mx.nd.waitall()\n+ assert False\n+ except mx.MXNetError:\n+ pass", + "comment_created_at": "2021-10-29T17:48:25+00:00", + "comment_author": "szha", + "comment_body": "This can be simplified https://docs.pytest.org/en/6.2.x/assert.html#assertions-about-expected-exceptions", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "610247827", + "pr_number": 20105, + "pr_file": "tests/python/unittest/test_numpy_gluon.py", + "created_at": "2021-04-09T00:58:33+00:00", + "commented_code": "[64, 88, 65, 89, 66, 90, 67, 91],\n [68, 92, 69, 93, 70, 94, 71, 95]]]]]\n )\n\n@use_np\ndef test_embedding():\n def check_embedding():\n layer = gluon.nn.Embedding(10, 100)\n layer.initialize()\n x = mx.np.array([3,4,2,0,1])\n with mx.autograd.record():\n y = layer(x)\n y.backward()\n assert (layer.weight.grad().asnumpy()[:5] == 1).all()\n assert (layer.weight.grad().asnumpy()[5:] == 0).all()\n\n def check_embedding_large_input():\n embedding = mx.gluon.nn.Embedding(10, 1)\n embedding.initialize()\n embedding.hybridize()\n shape = (20481,)\n with mx.autograd.record():\n emb_in = embedding(mx.np.ones(shape))\n loss = emb_in.sum()\n loss.backward()\n assert embedding.weight.grad().sum().item() == 20481\n\n check_embedding()\n check_embedding_large_input()\n\n@use_np\ndef test_layernorm():\n layer = nn.LayerNorm(in_channels=10)\n check_layer_forward(layer, (2, 10, 10, 10))\n\ndef check_layer_forward(layer, dshape):", + "repo_full_name": "apache/mxnet", + "discussion_comments": [ + { + "comment_id": "610247827", + "repo_full_name": "apache/mxnet", + "pr_number": 20105, + "pr_file": "tests/python/unittest/test_numpy_gluon.py", + "discussion_id": "610247827", + "commented_code": "@@ -602,3 +602,57 @@ def test_pixelshuffle3d():\n [64, 88, 65, 89, 66, 90, 67, 91],\n [68, 92, 69, 93, 70, 94, 71, 95]]]]]\n )\n+\n+@use_np\n+def test_embedding():\n+ def check_embedding():\n+ layer = gluon.nn.Embedding(10, 100)\n+ layer.initialize()\n+ x = mx.np.array([3,4,2,0,1])\n+ with mx.autograd.record():\n+ y = layer(x)\n+ y.backward()\n+ assert (layer.weight.grad().asnumpy()[:5] == 1).all()\n+ assert (layer.weight.grad().asnumpy()[5:] == 0).all()\n+\n+ def check_embedding_large_input():\n+ embedding = mx.gluon.nn.Embedding(10, 1)\n+ embedding.initialize()\n+ embedding.hybridize()\n+ shape = (20481,)\n+ with mx.autograd.record():\n+ emb_in = embedding(mx.np.ones(shape))\n+ loss = emb_in.sum()\n+ loss.backward()\n+ assert embedding.weight.grad().sum().item() == 20481\n+\n+ check_embedding()\n+ check_embedding_large_input()\n+\n+@use_np\n+def test_layernorm():\n+ layer = nn.LayerNorm(in_channels=10)\n+ check_layer_forward(layer, (2, 10, 10, 10))\n+\n+def check_layer_forward(layer, dshape):", + "comment_created_at": "2021-04-09T00:58:33+00:00", + "comment_author": "szha", + "comment_body": "you can use `@pytest.mark.parametrize('arg1,arg2', [(arg1_val, arg2_val)])` to parameterize the test instead of having a wrapper test function.", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/mxnet-use-pytest-parameterization.md b/_reviewers/mxnet-use-pytest-parameterization.md index fd99ae4..5e69dac 100644 --- a/_reviewers/mxnet-use-pytest-parameterization.md +++ b/_reviewers/mxnet-use-pytest-parameterization.md @@ -45,51 +45,3 @@ def test_operation(a, b, expected): ``` This approach makes tests more maintainable, documents test cases more clearly, and provides better reporting when tests fail by clearly identifying which specific parameter combination caused the failure. - - -[ - { - "discussion_id": "739428844", - "pr_number": 20692, - "pr_file": "tests/python/unittest/test_numpy_op.py", - "created_at": "2021-10-29T17:48:25+00:00", - "commented_code": "@use_np\ndef test_np_argmin_argmax():\n workloads = [\n ((), 0, False),\n ((), -1, False),\n ((), 1, True),\n ((5, 3), None, False),\n ((5, 3), -1, False),\n ((5, 3), 1, False),\n ((5, 3), 3, True),\n ((5, 0, 3), 0, False),\n ((5, 0, 3), -1, False),\n ((5, 0, 3), None, True),\n ((5, 0, 3), 1, True),\n ((3, 5, 7), None, False),\n ((3, 5, 7), 0, False),\n ((3, 5, 7), 1, False),\n ((3, 5, 7), 2, False),\n ((3, 5, 7, 9, 11), -3, False),\n ]\n dtypes = ['float16', 'float32', 'float64', 'bool', 'int32']\n ops = ['argmin', 'argmax']\n\n@pytest.mark.parametrize('shape,axis,throw_exception', [\n ((), 0, False),\n ((), -1, False),\n ((), 1, True),\n ((5, 3), None, False),\n ((5, 3), -1, False),\n ((5, 3), 1, False),\n ((5, 3), 3, True),\n ((5, 0, 3), 0, False),\n ((5, 0, 3), -1, False),\n ((5, 0, 3), None, True),\n ((5, 0, 3), 1, True),\n ((3, 5, 7), None, False),\n ((3, 5, 7), 0, False),\n ((3, 5, 7), 1, False),\n ((3, 5, 7), 2, False),\n ((3, 5, 7, 9, 11), -3, False),\n])\n@pytest.mark.parametrize('dtype', ['float16', 'float32', 'float64', 'bool', 'int32'])\n@pytest.mark.parametrize('op_name', ['argmin', 'argmax'])\n@pytest.mark.parametrize('keepdims', [True, False])\ndef test_np_argmin_argmax(shape, axis, throw_exception, dtype, op_name, keepdims):\n class TestArgExtreme(HybridBlock):\n def __init__(self, op_name, axis=None):\n def __init__(self, op_name, axis=None, keepdims=False):\n super(TestArgExtreme, self).__init__()\n self._op_name = op_name\n self._axis = axis\n self.keepdims = keepdims\n\n def forward(self, x):\n return getattr(x, self._op_name)(self._axis)\n\n for op_name in ops:\n for shape, axis, throw_exception in workloads:\n for dtype in dtypes:\n a = np.random.uniform(low=0, high=100, size=shape).astype(dtype)\n if throw_exception:\n # Cannot use assert_exception because sometimes the main thread\n # proceeds to `assert False` before the exception is thrown\n # in the worker thread. Have to use mx.nd.waitall() here\n # to block the main thread.\n try:\n getattr(np, op_name)(a, axis)\n mx.nd.waitall()\n assert False\n except mx.MXNetError:\n pass\n else:\n mx_ret = getattr(np, op_name)(a, axis=axis)\n np_ret = getattr(onp, op_name)(a.asnumpy(), axis=axis)\n assert mx_ret.dtype == np_ret.dtype\n assert same(mx_ret.asnumpy(), np_ret)\n return getattr(x, self._op_name)(self._axis, keepdims=self.keepdims)\n\n a = np.random.uniform(low=0, high=100, size=shape).astype(dtype)\n if throw_exception:\n # Cannot use assert_exception because sometimes the main thread\n # proceeds to `assert False` before the exception is thrown\n # in the worker thread. Have to use mx.nd.waitall() here\n # to block the main thread.\n try:\n getattr(np, op_name)(a, axis)\n mx.nd.waitall()\n assert False\n except mx.MXNetError:\n pass", - "repo_full_name": "apache/mxnet", - "discussion_comments": [ - { - "comment_id": "739428844", - "repo_full_name": "apache/mxnet", - "pr_number": 20692, - "pr_file": "tests/python/unittest/test_numpy_op.py", - "discussion_id": "739428844", - "commented_code": "@@ -4437,73 +4437,84 @@ def GetDimSize(shp, axis):\n \n \n @use_np\n-def test_np_argmin_argmax():\n- workloads = [\n- ((), 0, False),\n- ((), -1, False),\n- ((), 1, True),\n- ((5, 3), None, False),\n- ((5, 3), -1, False),\n- ((5, 3), 1, False),\n- ((5, 3), 3, True),\n- ((5, 0, 3), 0, False),\n- ((5, 0, 3), -1, False),\n- ((5, 0, 3), None, True),\n- ((5, 0, 3), 1, True),\n- ((3, 5, 7), None, False),\n- ((3, 5, 7), 0, False),\n- ((3, 5, 7), 1, False),\n- ((3, 5, 7), 2, False),\n- ((3, 5, 7, 9, 11), -3, False),\n- ]\n- dtypes = ['float16', 'float32', 'float64', 'bool', 'int32']\n- ops = ['argmin', 'argmax']\n-\n+@pytest.mark.parametrize('shape,axis,throw_exception', [\n+ ((), 0, False),\n+ ((), -1, False),\n+ ((), 1, True),\n+ ((5, 3), None, False),\n+ ((5, 3), -1, False),\n+ ((5, 3), 1, False),\n+ ((5, 3), 3, True),\n+ ((5, 0, 3), 0, False),\n+ ((5, 0, 3), -1, False),\n+ ((5, 0, 3), None, True),\n+ ((5, 0, 3), 1, True),\n+ ((3, 5, 7), None, False),\n+ ((3, 5, 7), 0, False),\n+ ((3, 5, 7), 1, False),\n+ ((3, 5, 7), 2, False),\n+ ((3, 5, 7, 9, 11), -3, False),\n+])\n+@pytest.mark.parametrize('dtype', ['float16', 'float32', 'float64', 'bool', 'int32'])\n+@pytest.mark.parametrize('op_name', ['argmin', 'argmax'])\n+@pytest.mark.parametrize('keepdims', [True, False])\n+def test_np_argmin_argmax(shape, axis, throw_exception, dtype, op_name, keepdims):\n class TestArgExtreme(HybridBlock):\n- def __init__(self, op_name, axis=None):\n+ def __init__(self, op_name, axis=None, keepdims=False):\n super(TestArgExtreme, self).__init__()\n self._op_name = op_name\n self._axis = axis\n+ self.keepdims = keepdims\n \n def forward(self, x):\n- return getattr(x, self._op_name)(self._axis)\n-\n- for op_name in ops:\n- for shape, axis, throw_exception in workloads:\n- for dtype in dtypes:\n- a = np.random.uniform(low=0, high=100, size=shape).astype(dtype)\n- if throw_exception:\n- # Cannot use assert_exception because sometimes the main thread\n- # proceeds to `assert False` before the exception is thrown\n- # in the worker thread. Have to use mx.nd.waitall() here\n- # to block the main thread.\n- try:\n- getattr(np, op_name)(a, axis)\n- mx.nd.waitall()\n- assert False\n- except mx.MXNetError:\n- pass\n- else:\n- mx_ret = getattr(np, op_name)(a, axis=axis)\n- np_ret = getattr(onp, op_name)(a.asnumpy(), axis=axis)\n- assert mx_ret.dtype == np_ret.dtype\n- assert same(mx_ret.asnumpy(), np_ret)\n+ return getattr(x, self._op_name)(self._axis, keepdims=self.keepdims)\n+\n+ a = np.random.uniform(low=0, high=100, size=shape).astype(dtype)\n+ if throw_exception:\n+ # Cannot use assert_exception because sometimes the main thread\n+ # proceeds to `assert False` before the exception is thrown\n+ # in the worker thread. Have to use mx.nd.waitall() here\n+ # to block the main thread.\n+ try:\n+ getattr(np, op_name)(a, axis)\n+ mx.nd.waitall()\n+ assert False\n+ except mx.MXNetError:\n+ pass", - "comment_created_at": "2021-10-29T17:48:25+00:00", - "comment_author": "szha", - "comment_body": "This can be simplified https://docs.pytest.org/en/6.2.x/assert.html#assertions-about-expected-exceptions", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "610247827", - "pr_number": 20105, - "pr_file": "tests/python/unittest/test_numpy_gluon.py", - "created_at": "2021-04-09T00:58:33+00:00", - "commented_code": "[64, 88, 65, 89, 66, 90, 67, 91],\n [68, 92, 69, 93, 70, 94, 71, 95]]]]]\n )\n\n@use_np\ndef test_embedding():\n def check_embedding():\n layer = gluon.nn.Embedding(10, 100)\n layer.initialize()\n x = mx.np.array([3,4,2,0,1])\n with mx.autograd.record():\n y = layer(x)\n y.backward()\n assert (layer.weight.grad().asnumpy()[:5] == 1).all()\n assert (layer.weight.grad().asnumpy()[5:] == 0).all()\n\n def check_embedding_large_input():\n embedding = mx.gluon.nn.Embedding(10, 1)\n embedding.initialize()\n embedding.hybridize()\n shape = (20481,)\n with mx.autograd.record():\n emb_in = embedding(mx.np.ones(shape))\n loss = emb_in.sum()\n loss.backward()\n assert embedding.weight.grad().sum().item() == 20481\n\n check_embedding()\n check_embedding_large_input()\n\n@use_np\ndef test_layernorm():\n layer = nn.LayerNorm(in_channels=10)\n check_layer_forward(layer, (2, 10, 10, 10))\n\ndef check_layer_forward(layer, dshape):", - "repo_full_name": "apache/mxnet", - "discussion_comments": [ - { - "comment_id": "610247827", - "repo_full_name": "apache/mxnet", - "pr_number": 20105, - "pr_file": "tests/python/unittest/test_numpy_gluon.py", - "discussion_id": "610247827", - "commented_code": "@@ -602,3 +602,57 @@ def test_pixelshuffle3d():\n [64, 88, 65, 89, 66, 90, 67, 91],\n [68, 92, 69, 93, 70, 94, 71, 95]]]]]\n )\n+\n+@use_np\n+def test_embedding():\n+ def check_embedding():\n+ layer = gluon.nn.Embedding(10, 100)\n+ layer.initialize()\n+ x = mx.np.array([3,4,2,0,1])\n+ with mx.autograd.record():\n+ y = layer(x)\n+ y.backward()\n+ assert (layer.weight.grad().asnumpy()[:5] == 1).all()\n+ assert (layer.weight.grad().asnumpy()[5:] == 0).all()\n+\n+ def check_embedding_large_input():\n+ embedding = mx.gluon.nn.Embedding(10, 1)\n+ embedding.initialize()\n+ embedding.hybridize()\n+ shape = (20481,)\n+ with mx.autograd.record():\n+ emb_in = embedding(mx.np.ones(shape))\n+ loss = emb_in.sum()\n+ loss.backward()\n+ assert embedding.weight.grad().sum().item() == 20481\n+\n+ check_embedding()\n+ check_embedding_large_input()\n+\n+@use_np\n+def test_layernorm():\n+ layer = nn.LayerNorm(in_channels=10)\n+ check_layer_forward(layer, (2, 10, 10, 10))\n+\n+def check_layer_forward(layer, dshape):", - "comment_created_at": "2021-04-09T00:58:33+00:00", - "comment_author": "szha", - "comment_body": "you can use `@pytest.mark.parametrize('arg1,arg2', [(arg1_val, arg2_val)])` to parameterize the test instead of having a wrapper test function.", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/opencv-check-nulls-directly.json b/_reviewers/opencv-check-nulls-directly.json new file mode 100644 index 0000000..488492a --- /dev/null +++ b/_reviewers/opencv-check-nulls-directly.json @@ -0,0 +1,46 @@ +[ + { + "discussion_id": "1856452050", + "pr_number": 26499, + "pr_file": "modules/objdetect/src/qrcode_sr_scale/super_scale.hpp", + "created_at": "2024-11-25T11:32:54+00:00", + "commented_code": "// This file is part of OpenCV project.\n// It is subject to the license terms in the LICENSE file found in the top-level directory\n// of this distribution and at http://opencv.org/license.html.\n//\n// Tencent is pleased to support the open source community by making WeChat QRCode available.\n// Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved.\n\n#ifndef __SCALE_SUPER_SCALE_HPP_\n#define __SCALE_SUPER_SCALE_HPP_\n\n#include \n#include \"opencv2/dnn.hpp\"\n#include \"opencv2/imgproc.hpp\"\nusing namespace std;\n\nnamespace cv {\nclass SuperScale {\npublic:\n SuperScale(){};\n ~SuperScale(){};\n int init(const std::string &config_path);\n std::vector getScaleList(const int width, const int height);\n Mat ProcessImageScale(const Mat &src, float scale, const bool &use_sr, int sr_max_size = 160);\n\nprivate:\n std::shared_ptr qbar_sr;\n bool net_loaded_ = false;", + "repo_full_name": "opencv/opencv", + "discussion_comments": [ + { + "comment_id": "1856452050", + "repo_full_name": "opencv/opencv", + "pr_number": 26499, + "pr_file": "modules/objdetect/src/qrcode_sr_scale/super_scale.hpp", + "discussion_id": "1856452050", + "commented_code": "@@ -0,0 +1,31 @@\n+// This file is part of OpenCV project.\n+// It is subject to the license terms in the LICENSE file found in the top-level directory\n+// of this distribution and at http://opencv.org/license.html.\n+//\n+// Tencent is pleased to support the open source community by making WeChat QRCode available.\n+// Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved.\n+\n+#ifndef __SCALE_SUPER_SCALE_HPP_\n+#define __SCALE_SUPER_SCALE_HPP_\n+\n+#include \n+#include \"opencv2/dnn.hpp\"\n+#include \"opencv2/imgproc.hpp\"\n+using namespace std;\n+\n+namespace cv {\n+class SuperScale {\n+public:\n+ SuperScale(){};\n+ ~SuperScale(){};\n+ int init(const std::string &config_path);\n+ std::vector getScaleList(const int width, const int height);\n+ Mat ProcessImageScale(const Mat &src, float scale, const bool &use_sr, int sr_max_size = 160);\n+\n+private:\n+ std::shared_ptr qbar_sr;\n+ bool net_loaded_ = false;", + "comment_created_at": "2024-11-25T11:32:54+00:00", + "comment_author": "dkurt", + "comment_body": "Check `qbar_sr` is not empty instead of separate boolean flag", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1910095401", + "pr_number": 26660, + "pr_file": "modules/highgui/src/window_cocoa.mm", + "created_at": "2025-01-10T09:42:19+00:00", + "commented_code": "if( floor(NSAppKitVersionNumber) > NSAppKitVersionNumber10_5 )\n [application setActivationPolicy:NSApplicationActivationPolicyRegular];\n#endif\n //[application finishLaunching];\n //atexit(icvCocoaCleanup);\n\n setlocale(LC_NUMERIC,\"C\");\n\n return 0;\n}\n\nstatic CVWindow *cvGetWindow(const char *name) {\n //cout << \"cvGetWindow\" << endl;\n NSAutoreleasePool* localpool = [[NSAutoreleasePool alloc] init];\n NSString *cvname = [NSString stringWithFormat:@\"%s\", name];\n CVWindow* retval = (CVWindow*) [windows valueForKey:cvname] ;\n //cout << \"retain count: \" << [retval retainCount] << endl;\n //retval = [retval retain];\n //cout << \"retain count: \" << [retval retainCount] << endl;\n [localpool drain];\n //cout << \"retain count: \" << [retval retainCount] << endl;\n return retval;\n @autoreleasepool{\n NSString *cvname = [NSString stringWithFormat:@\"%s\", name];\n CVWindow* retval = (CVWindow*) [windows valueForKey:cvname];\n return retval;\n }\n}\n\nCV_IMPL int cvStartWindowThread()\n{\n //cout << \"cvStartWindowThread\" << endl;\n return 0;\n}\n\nCV_IMPL void cvDestroyWindow( const char* name)\n{\n\n NSAutoreleasePool* localpool = [[NSAutoreleasePool alloc] init];\n //cout << \"cvDestroyWindow\" << endl;\n CVWindow *window = cvGetWindow(name);\n if(window) {\n if ([window styleMask] & NSFullScreenWindowMask) {\n [window toggleFullScreen:nil];\n @autoreleasepool {\n CVWindow *window = cvGetWindow(name);\n if(window) {\n if ([window styleMask] & NSFullScreenWindowMask) {\n [window toggleFullScreen:nil];\n }\n [window close];\n [windows removeObjectForKey:[NSString stringWithFormat:@\"%s\", name]];\n }\n [window close];\n [windows removeObjectForKey:[NSString stringWithFormat:@\"%s\", name]];\n }\n [localpool drain];\n}\n\n\nCV_IMPL void cvDestroyAllWindows( void )\n{\n //cout << \"cvDestroyAllWindows\" << endl;\n NSAutoreleasePool* localpool = [[NSAutoreleasePool alloc] init];\n NSDictionary* list = [NSDictionary dictionaryWithDictionary:windows];\n for(NSString *key in list) {\n cvDestroyWindow([key cStringUsingEncoding:NSASCIIStringEncoding]);\n @autoreleasepool {\n NSDictionary* list = [NSDictionary dictionaryWithDictionary:windows];\n for(NSString *key in list) {\n cvDestroyWindow([key cStringUsingEncoding:NSASCIIStringEncoding]);\n }\n }\n [localpool drain];\n}\n\n\nCV_IMPL void cvShowImage( const char* name, const CvArr* arr)\n{\n //cout << \"cvShowImage\" << endl;\n NSAutoreleasePool* localpool = [[NSAutoreleasePool alloc] init];\n CVWindow *window = cvGetWindow(name);\n if(!window)\n {\n cvNamedWindow(name, CV_WINDOW_AUTOSIZE);\n window = cvGetWindow(name);\n }\n @autoreleasepool{\n CVWindow *window = cvGetWindow(name);\n if(!window)\n {\n cvNamedWindow(name, CV_WINDOW_AUTOSIZE);\n window = cvGetWindow(name);\n }\n\n if(window)\n {\n bool empty = [[window contentView] image] == nil;\n NSRect vrectOld = [[window contentView] frame];\n NSSize oldImageSize = [[[window contentView] image] size];\n [[window contentView] setImageData:(CvArr *)arr];\n if([window autosize] || [window firstContent] || empty)\n if(window)\n {\n NSSize imageSize = [[[window contentView] image] size];\n // Only adjust the image size if the new image is a different size from the previous\n if (oldImageSize.height != imageSize.height || oldImageSize.width != imageSize.width)\n bool empty = [[window contentView] image] == nil;\n NSRect vrectOld = [[window contentView] frame];\n NSSize oldImageSize = [[[window contentView] image] size];\n [[window contentView] setImageData:(CvArr *)arr];\n if([window autosize] || [window firstContent] || empty)\n {\n //Set new view size considering sliders (reserve height and min width)\n NSSize scaledImageSize = imageSize;\n if ([[window contentView] respondsToSelector:@selector(convertSizeFromBacking:)])\n NSSize imageSize = [[[window contentView] image] size];\n // Only adjust the image size if the new image is a different size from the previous\n if (oldImageSize.height != imageSize.height || oldImageSize.width != imageSize.width)\n {\n // Only resize for retina displays if the image is bigger than the screen\n NSSize screenSize = NSScreen.mainScreen.visibleFrame.size;\n CGFloat titleBarHeight = window.frame.size.height - [window contentRectForFrameRect:window.frame].size.height;\n screenSize.height -= titleBarHeight;\n if (imageSize.width > screenSize.width || imageSize.height > screenSize.height)\n //Set new view size considering sliders (reserve height and min width)\n NSSize scaledImageSize = imageSize;\n if ([[window contentView] respondsToSelector:@selector(convertSizeFromBacking:)])\n {\n CGFloat fx = screenSize.width/std::max(imageSize.width, (CGFloat)1.f);\n CGFloat fy = screenSize.height/std::max(imageSize.height, (CGFloat)1.f);\n CGFloat min_f = std::min(fx, fy);\n scaledImageSize = [[window contentView] convertSizeFromBacking:imageSize];\n scaledImageSize.width = std::min(scaledImageSize.width, min_f*imageSize.width);\n scaledImageSize.height = std::min(scaledImageSize.height, min_f*imageSize.height);\n // Only resize for retina displays if the image is bigger than the screen\n NSSize screenSize = NSScreen.mainScreen.visibleFrame.size;\n CGFloat titleBarHeight = window.frame.size.height - [window contentRectForFrameRect:window.frame].size.height;\n screenSize.height -= titleBarHeight;\n if (imageSize.width > screenSize.width || imageSize.height > screenSize.height)\n {\n CGFloat fx = screenSize.width/std::max(imageSize.width, (CGFloat)1.f);\n CGFloat fy = screenSize.height/std::max(imageSize.height, (CGFloat)1.f);\n CGFloat min_f = std::min(fx, fy);\n scaledImageSize = [[window contentView] convertSizeFromBacking:imageSize];\n scaledImageSize.width = std::min(scaledImageSize.width, min_f*imageSize.width);\n scaledImageSize.height = std::min(scaledImageSize.height, min_f*imageSize.height);\n }\n }\n }\n NSSize contentSize = vrectOld.size;\n contentSize.height = scaledImageSize.height + [window contentView].sliderHeight;\n contentSize.width = std::max(scaledImageSize.width, MIN_SLIDER_WIDTH);\n [window setContentSize:contentSize]; //adjust sliders to fit new window size\n if([window firstContent])\n {\n int x = [window x0];\n int y = [window y0];\n if(x >= 0 && y >= 0)\n NSSize contentSize = vrectOld.size;\n contentSize.height = scaledImageSize.height + [window contentView].sliderHeight;\n contentSize.width = std::max(scaledImageSize.width, MIN_SLIDER_WIDTH);\n [window setContentSize:contentSize]; //adjust sliders to fit new window size\n if([window firstContent])\n {\n y = [[window screen] visibleFrame].size.height - y;\n [window setFrameTopLeftPoint:NSMakePoint(x, y)];\n int x = [window x0];\n int y = [window y0];\n if(x >= 0 && y >= 0)\n {\n y = [[window screen] visibleFrame].size.height - y;\n [window setFrameTopLeftPoint:NSMakePoint(x, y)];\n }\n }\n }\n }\n [window setFirstContent:NO];\n }\n [window setFirstContent:NO];\n }\n [localpool drain];\n}\n\nCV_IMPL void cvResizeWindow( const char* name, int width, int height)\n{\n\n //cout << \"cvResizeWindow\" << endl;\n NSAutoreleasePool* localpool = [[NSAutoreleasePool alloc] init];\n CVWindow *window = cvGetWindow(name);\n if(window && ![window autosize]) {\n height += [window contentView].sliderHeight;\n NSSize size = { (CGFloat)width, (CGFloat)height };\n [window setContentSize:size];\n @autoreleasepool{\n CVWindow *window = cvGetWindow(name);\n if(window && ![window autosize]) {\n height += [window contentView].sliderHeight;\n NSSize size = { (CGFloat)width, (CGFloat)height };\n [window setContentSize:size];\n }\n }\n [localpool drain];\n}\n\nCV_IMPL void cvMoveWindow( const char* name, int x, int y)\n{\n CV_FUNCNAME(\"cvMoveWindow\");\n __BEGIN__;\n\n NSAutoreleasePool* localpool1 = [[NSAutoreleasePool alloc] init];\n CVWindow *window = nil;\n __BEGIN__;\n @autoreleasepool{\n CVWindow *window = nil;\n\n if(name == NULL)\n CV_ERROR( CV_StsNullPtr, \"NULL window name\" );\n //cout << \"cvMoveWindow\"<< endl;\n window = cvGetWindow(name);\n if(window) {\n if([window firstContent]) {\n [window setX0:x];\n [window setY0:y];\n }\n else {\n y = [[window screen] visibleFrame].size.height - y;\n [window setFrameTopLeftPoint:NSMakePoint(x, y)];\n if(name == NULL)\n CV_ERROR( CV_StsNullPtr, \"NULL window name\" );\n window = cvGetWindow(name);\n if(window) {\n if([window firstContent]) {\n [window setX0:x];\n [window setY0:y];\n }\n else {\n y = [[window screen] visibleFrame].size.height - y;\n [window setFrameTopLeftPoint:NSMakePoint(x, y)];\n }\n }\n }", + "repo_full_name": "opencv/opencv", + "discussion_comments": [ + { + "comment_id": "1910095401", + "repo_full_name": "opencv/opencv", + "pr_number": 26660, + "pr_file": "modules/highgui/src/window_cocoa.mm", + "discussion_id": "1910095401", + "commented_code": "@@ -161,162 +146,145 @@ CV_IMPL int cvInitSystem( int , char** )\n if( floor(NSAppKitVersionNumber) > NSAppKitVersionNumber10_5 )\n [application setActivationPolicy:NSApplicationActivationPolicyRegular];\n #endif\n- //[application finishLaunching];\n- //atexit(icvCocoaCleanup);\n-\n setlocale(LC_NUMERIC,\"C\");\n \n return 0;\n }\n \n static CVWindow *cvGetWindow(const char *name) {\n- //cout << \"cvGetWindow\" << endl;\n- NSAutoreleasePool* localpool = [[NSAutoreleasePool alloc] init];\n- NSString *cvname = [NSString stringWithFormat:@\"%s\", name];\n- CVWindow* retval = (CVWindow*) [windows valueForKey:cvname] ;\n- //cout << \"retain count: \" << [retval retainCount] << endl;\n- //retval = [retval retain];\n- //cout << \"retain count: \" << [retval retainCount] << endl;\n- [localpool drain];\n- //cout << \"retain count: \" << [retval retainCount] << endl;\n- return retval;\n+ @autoreleasepool{\n+ NSString *cvname = [NSString stringWithFormat:@\"%s\", name];\n+ CVWindow* retval = (CVWindow*) [windows valueForKey:cvname];\n+ return retval;\n+ }\n }\n \n CV_IMPL int cvStartWindowThread()\n {\n- //cout << \"cvStartWindowThread\" << endl;\n return 0;\n }\n \n CV_IMPL void cvDestroyWindow( const char* name)\n {\n-\n- NSAutoreleasePool* localpool = [[NSAutoreleasePool alloc] init];\n- //cout << \"cvDestroyWindow\" << endl;\n- CVWindow *window = cvGetWindow(name);\n- if(window) {\n- if ([window styleMask] & NSFullScreenWindowMask) {\n- [window toggleFullScreen:nil];\n+ @autoreleasepool {\n+ CVWindow *window = cvGetWindow(name);\n+ if(window) {\n+ if ([window styleMask] & NSFullScreenWindowMask) {\n+ [window toggleFullScreen:nil];\n+ }\n+ [window close];\n+ [windows removeObjectForKey:[NSString stringWithFormat:@\"%s\", name]];\n }\n- [window close];\n- [windows removeObjectForKey:[NSString stringWithFormat:@\"%s\", name]];\n }\n- [localpool drain];\n }\n \n \n CV_IMPL void cvDestroyAllWindows( void )\n {\n- //cout << \"cvDestroyAllWindows\" << endl;\n- NSAutoreleasePool* localpool = [[NSAutoreleasePool alloc] init];\n- NSDictionary* list = [NSDictionary dictionaryWithDictionary:windows];\n- for(NSString *key in list) {\n- cvDestroyWindow([key cStringUsingEncoding:NSASCIIStringEncoding]);\n+ @autoreleasepool {\n+ NSDictionary* list = [NSDictionary dictionaryWithDictionary:windows];\n+ for(NSString *key in list) {\n+ cvDestroyWindow([key cStringUsingEncoding:NSASCIIStringEncoding]);\n+ }\n }\n- [localpool drain];\n }\n \n \n CV_IMPL void cvShowImage( const char* name, const CvArr* arr)\n {\n- //cout << \"cvShowImage\" << endl;\n- NSAutoreleasePool* localpool = [[NSAutoreleasePool alloc] init];\n- CVWindow *window = cvGetWindow(name);\n- if(!window)\n- {\n- cvNamedWindow(name, CV_WINDOW_AUTOSIZE);\n- window = cvGetWindow(name);\n- }\n+ @autoreleasepool{\n+ CVWindow *window = cvGetWindow(name);\n+ if(!window)\n+ {\n+ cvNamedWindow(name, CV_WINDOW_AUTOSIZE);\n+ window = cvGetWindow(name);\n+ }\n \n- if(window)\n- {\n- bool empty = [[window contentView] image] == nil;\n- NSRect vrectOld = [[window contentView] frame];\n- NSSize oldImageSize = [[[window contentView] image] size];\n- [[window contentView] setImageData:(CvArr *)arr];\n- if([window autosize] || [window firstContent] || empty)\n+ if(window)\n {\n- NSSize imageSize = [[[window contentView] image] size];\n- // Only adjust the image size if the new image is a different size from the previous\n- if (oldImageSize.height != imageSize.height || oldImageSize.width != imageSize.width)\n+ bool empty = [[window contentView] image] == nil;\n+ NSRect vrectOld = [[window contentView] frame];\n+ NSSize oldImageSize = [[[window contentView] image] size];\n+ [[window contentView] setImageData:(CvArr *)arr];\n+ if([window autosize] || [window firstContent] || empty)\n {\n- //Set new view size considering sliders (reserve height and min width)\n- NSSize scaledImageSize = imageSize;\n- if ([[window contentView] respondsToSelector:@selector(convertSizeFromBacking:)])\n+ NSSize imageSize = [[[window contentView] image] size];\n+ // Only adjust the image size if the new image is a different size from the previous\n+ if (oldImageSize.height != imageSize.height || oldImageSize.width != imageSize.width)\n {\n- // Only resize for retina displays if the image is bigger than the screen\n- NSSize screenSize = NSScreen.mainScreen.visibleFrame.size;\n- CGFloat titleBarHeight = window.frame.size.height - [window contentRectForFrameRect:window.frame].size.height;\n- screenSize.height -= titleBarHeight;\n- if (imageSize.width > screenSize.width || imageSize.height > screenSize.height)\n+ //Set new view size considering sliders (reserve height and min width)\n+ NSSize scaledImageSize = imageSize;\n+ if ([[window contentView] respondsToSelector:@selector(convertSizeFromBacking:)])\n {\n- CGFloat fx = screenSize.width/std::max(imageSize.width, (CGFloat)1.f);\n- CGFloat fy = screenSize.height/std::max(imageSize.height, (CGFloat)1.f);\n- CGFloat min_f = std::min(fx, fy);\n- scaledImageSize = [[window contentView] convertSizeFromBacking:imageSize];\n- scaledImageSize.width = std::min(scaledImageSize.width, min_f*imageSize.width);\n- scaledImageSize.height = std::min(scaledImageSize.height, min_f*imageSize.height);\n+ // Only resize for retina displays if the image is bigger than the screen\n+ NSSize screenSize = NSScreen.mainScreen.visibleFrame.size;\n+ CGFloat titleBarHeight = window.frame.size.height - [window contentRectForFrameRect:window.frame].size.height;\n+ screenSize.height -= titleBarHeight;\n+ if (imageSize.width > screenSize.width || imageSize.height > screenSize.height)\n+ {\n+ CGFloat fx = screenSize.width/std::max(imageSize.width, (CGFloat)1.f);\n+ CGFloat fy = screenSize.height/std::max(imageSize.height, (CGFloat)1.f);\n+ CGFloat min_f = std::min(fx, fy);\n+ scaledImageSize = [[window contentView] convertSizeFromBacking:imageSize];\n+ scaledImageSize.width = std::min(scaledImageSize.width, min_f*imageSize.width);\n+ scaledImageSize.height = std::min(scaledImageSize.height, min_f*imageSize.height);\n+ }\n }\n- }\n- NSSize contentSize = vrectOld.size;\n- contentSize.height = scaledImageSize.height + [window contentView].sliderHeight;\n- contentSize.width = std::max(scaledImageSize.width, MIN_SLIDER_WIDTH);\n- [window setContentSize:contentSize]; //adjust sliders to fit new window size\n- if([window firstContent])\n- {\n- int x = [window x0];\n- int y = [window y0];\n- if(x >= 0 && y >= 0)\n+ NSSize contentSize = vrectOld.size;\n+ contentSize.height = scaledImageSize.height + [window contentView].sliderHeight;\n+ contentSize.width = std::max(scaledImageSize.width, MIN_SLIDER_WIDTH);\n+ [window setContentSize:contentSize]; //adjust sliders to fit new window size\n+ if([window firstContent])\n {\n- y = [[window screen] visibleFrame].size.height - y;\n- [window setFrameTopLeftPoint:NSMakePoint(x, y)];\n+ int x = [window x0];\n+ int y = [window y0];\n+ if(x >= 0 && y >= 0)\n+ {\n+ y = [[window screen] visibleFrame].size.height - y;\n+ [window setFrameTopLeftPoint:NSMakePoint(x, y)];\n+ }\n }\n }\n }\n+ [window setFirstContent:NO];\n }\n- [window setFirstContent:NO];\n }\n- [localpool drain];\n }\n \n CV_IMPL void cvResizeWindow( const char* name, int width, int height)\n {\n-\n- //cout << \"cvResizeWindow\" << endl;\n- NSAutoreleasePool* localpool = [[NSAutoreleasePool alloc] init];\n- CVWindow *window = cvGetWindow(name);\n- if(window && ![window autosize]) {\n- height += [window contentView].sliderHeight;\n- NSSize size = { (CGFloat)width, (CGFloat)height };\n- [window setContentSize:size];\n+ @autoreleasepool{\n+ CVWindow *window = cvGetWindow(name);\n+ if(window && ![window autosize]) {\n+ height += [window contentView].sliderHeight;\n+ NSSize size = { (CGFloat)width, (CGFloat)height };\n+ [window setContentSize:size];\n+ }\n }\n- [localpool drain];\n }\n \n CV_IMPL void cvMoveWindow( const char* name, int x, int y)\n {\n CV_FUNCNAME(\"cvMoveWindow\");\n- __BEGIN__;\n \n- NSAutoreleasePool* localpool1 = [[NSAutoreleasePool alloc] init];\n- CVWindow *window = nil;\n+ __BEGIN__;\n+ @autoreleasepool{\n+ CVWindow *window = nil;\n \n- if(name == NULL)\n- CV_ERROR( CV_StsNullPtr, \"NULL window name\" );\n- //cout << \"cvMoveWindow\"<< endl;\n- window = cvGetWindow(name);\n- if(window) {\n- if([window firstContent]) {\n- [window setX0:x];\n- [window setY0:y];\n- }\n- else {\n- y = [[window screen] visibleFrame].size.height - y;\n- [window setFrameTopLeftPoint:NSMakePoint(x, y)];\n+ if(name == NULL)\n+ CV_ERROR( CV_StsNullPtr, \"NULL window name\" );\n+ window = cvGetWindow(name);\n+ if(window) {\n+ if([window firstContent]) {\n+ [window setX0:x];\n+ [window setY0:y];\n+ }\n+ else {\n+ y = [[window screen] visibleFrame].size.height - y;\n+ [window setFrameTopLeftPoint:NSMakePoint(x, y)];\n+ }\n }\n }", + "comment_created_at": "2025-01-10T09:42:19+00:00", + "comment_author": "VadimLevin", + "comment_body": "\r\n```objc\r\nif (name == NULL) {\r\n CV_ERROR( CV_StsNullPtr, \"NULL window name\" );\r\n}\r\n@autoreleasepool {\r\n CVWindow *window = GetWindow(name);\r\n```", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/opencv-check-nulls-directly.md b/_reviewers/opencv-check-nulls-directly.md index ec986ab..1bc4ac9 100644 --- a/_reviewers/opencv-check-nulls-directly.md +++ b/_reviewers/opencv-check-nulls-directly.md @@ -53,51 +53,3 @@ void processData(const char* name) { ``` This approach improves code readability, reduces state tracking errors, and prevents null dereference bugs by ensuring validation happens before any operations that assume non-null values. - - -[ - { - "discussion_id": "1856452050", - "pr_number": 26499, - "pr_file": "modules/objdetect/src/qrcode_sr_scale/super_scale.hpp", - "created_at": "2024-11-25T11:32:54+00:00", - "commented_code": "// This file is part of OpenCV project.\n// It is subject to the license terms in the LICENSE file found in the top-level directory\n// of this distribution and at http://opencv.org/license.html.\n//\n// Tencent is pleased to support the open source community by making WeChat QRCode available.\n// Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved.\n\n#ifndef __SCALE_SUPER_SCALE_HPP_\n#define __SCALE_SUPER_SCALE_HPP_\n\n#include \n#include \"opencv2/dnn.hpp\"\n#include \"opencv2/imgproc.hpp\"\nusing namespace std;\n\nnamespace cv {\nclass SuperScale {\npublic:\n SuperScale(){};\n ~SuperScale(){};\n int init(const std::string &config_path);\n std::vector getScaleList(const int width, const int height);\n Mat ProcessImageScale(const Mat &src, float scale, const bool &use_sr, int sr_max_size = 160);\n\nprivate:\n std::shared_ptr qbar_sr;\n bool net_loaded_ = false;", - "repo_full_name": "opencv/opencv", - "discussion_comments": [ - { - "comment_id": "1856452050", - "repo_full_name": "opencv/opencv", - "pr_number": 26499, - "pr_file": "modules/objdetect/src/qrcode_sr_scale/super_scale.hpp", - "discussion_id": "1856452050", - "commented_code": "@@ -0,0 +1,31 @@\n+// This file is part of OpenCV project.\n+// It is subject to the license terms in the LICENSE file found in the top-level directory\n+// of this distribution and at http://opencv.org/license.html.\n+//\n+// Tencent is pleased to support the open source community by making WeChat QRCode available.\n+// Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved.\n+\n+#ifndef __SCALE_SUPER_SCALE_HPP_\n+#define __SCALE_SUPER_SCALE_HPP_\n+\n+#include \n+#include \"opencv2/dnn.hpp\"\n+#include \"opencv2/imgproc.hpp\"\n+using namespace std;\n+\n+namespace cv {\n+class SuperScale {\n+public:\n+ SuperScale(){};\n+ ~SuperScale(){};\n+ int init(const std::string &config_path);\n+ std::vector getScaleList(const int width, const int height);\n+ Mat ProcessImageScale(const Mat &src, float scale, const bool &use_sr, int sr_max_size = 160);\n+\n+private:\n+ std::shared_ptr qbar_sr;\n+ bool net_loaded_ = false;", - "comment_created_at": "2024-11-25T11:32:54+00:00", - "comment_author": "dkurt", - "comment_body": "Check `qbar_sr` is not empty instead of separate boolean flag", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1910095401", - "pr_number": 26660, - "pr_file": "modules/highgui/src/window_cocoa.mm", - "created_at": "2025-01-10T09:42:19+00:00", - "commented_code": "if( floor(NSAppKitVersionNumber) > NSAppKitVersionNumber10_5 )\n [application setActivationPolicy:NSApplicationActivationPolicyRegular];\n#endif\n //[application finishLaunching];\n //atexit(icvCocoaCleanup);\n\n setlocale(LC_NUMERIC,\"C\");\n\n return 0;\n}\n\nstatic CVWindow *cvGetWindow(const char *name) {\n //cout << \"cvGetWindow\" << endl;\n NSAutoreleasePool* localpool = [[NSAutoreleasePool alloc] init];\n NSString *cvname = [NSString stringWithFormat:@\"%s\", name];\n CVWindow* retval = (CVWindow*) [windows valueForKey:cvname] ;\n //cout << \"retain count: \" << [retval retainCount] << endl;\n //retval = [retval retain];\n //cout << \"retain count: \" << [retval retainCount] << endl;\n [localpool drain];\n //cout << \"retain count: \" << [retval retainCount] << endl;\n return retval;\n @autoreleasepool{\n NSString *cvname = [NSString stringWithFormat:@\"%s\", name];\n CVWindow* retval = (CVWindow*) [windows valueForKey:cvname];\n return retval;\n }\n}\n\nCV_IMPL int cvStartWindowThread()\n{\n //cout << \"cvStartWindowThread\" << endl;\n return 0;\n}\n\nCV_IMPL void cvDestroyWindow( const char* name)\n{\n\n NSAutoreleasePool* localpool = [[NSAutoreleasePool alloc] init];\n //cout << \"cvDestroyWindow\" << endl;\n CVWindow *window = cvGetWindow(name);\n if(window) {\n if ([window styleMask] & NSFullScreenWindowMask) {\n [window toggleFullScreen:nil];\n @autoreleasepool {\n CVWindow *window = cvGetWindow(name);\n if(window) {\n if ([window styleMask] & NSFullScreenWindowMask) {\n [window toggleFullScreen:nil];\n }\n [window close];\n [windows removeObjectForKey:[NSString stringWithFormat:@\"%s\", name]];\n }\n [window close];\n [windows removeObjectForKey:[NSString stringWithFormat:@\"%s\", name]];\n }\n [localpool drain];\n}\n\n\nCV_IMPL void cvDestroyAllWindows( void )\n{\n //cout << \"cvDestroyAllWindows\" << endl;\n NSAutoreleasePool* localpool = [[NSAutoreleasePool alloc] init];\n NSDictionary* list = [NSDictionary dictionaryWithDictionary:windows];\n for(NSString *key in list) {\n cvDestroyWindow([key cStringUsingEncoding:NSASCIIStringEncoding]);\n @autoreleasepool {\n NSDictionary* list = [NSDictionary dictionaryWithDictionary:windows];\n for(NSString *key in list) {\n cvDestroyWindow([key cStringUsingEncoding:NSASCIIStringEncoding]);\n }\n }\n [localpool drain];\n}\n\n\nCV_IMPL void cvShowImage( const char* name, const CvArr* arr)\n{\n //cout << \"cvShowImage\" << endl;\n NSAutoreleasePool* localpool = [[NSAutoreleasePool alloc] init];\n CVWindow *window = cvGetWindow(name);\n if(!window)\n {\n cvNamedWindow(name, CV_WINDOW_AUTOSIZE);\n window = cvGetWindow(name);\n }\n @autoreleasepool{\n CVWindow *window = cvGetWindow(name);\n if(!window)\n {\n cvNamedWindow(name, CV_WINDOW_AUTOSIZE);\n window = cvGetWindow(name);\n }\n\n if(window)\n {\n bool empty = [[window contentView] image] == nil;\n NSRect vrectOld = [[window contentView] frame];\n NSSize oldImageSize = [[[window contentView] image] size];\n [[window contentView] setImageData:(CvArr *)arr];\n if([window autosize] || [window firstContent] || empty)\n if(window)\n {\n NSSize imageSize = [[[window contentView] image] size];\n // Only adjust the image size if the new image is a different size from the previous\n if (oldImageSize.height != imageSize.height || oldImageSize.width != imageSize.width)\n bool empty = [[window contentView] image] == nil;\n NSRect vrectOld = [[window contentView] frame];\n NSSize oldImageSize = [[[window contentView] image] size];\n [[window contentView] setImageData:(CvArr *)arr];\n if([window autosize] || [window firstContent] || empty)\n {\n //Set new view size considering sliders (reserve height and min width)\n NSSize scaledImageSize = imageSize;\n if ([[window contentView] respondsToSelector:@selector(convertSizeFromBacking:)])\n NSSize imageSize = [[[window contentView] image] size];\n // Only adjust the image size if the new image is a different size from the previous\n if (oldImageSize.height != imageSize.height || oldImageSize.width != imageSize.width)\n {\n // Only resize for retina displays if the image is bigger than the screen\n NSSize screenSize = NSScreen.mainScreen.visibleFrame.size;\n CGFloat titleBarHeight = window.frame.size.height - [window contentRectForFrameRect:window.frame].size.height;\n screenSize.height -= titleBarHeight;\n if (imageSize.width > screenSize.width || imageSize.height > screenSize.height)\n //Set new view size considering sliders (reserve height and min width)\n NSSize scaledImageSize = imageSize;\n if ([[window contentView] respondsToSelector:@selector(convertSizeFromBacking:)])\n {\n CGFloat fx = screenSize.width/std::max(imageSize.width, (CGFloat)1.f);\n CGFloat fy = screenSize.height/std::max(imageSize.height, (CGFloat)1.f);\n CGFloat min_f = std::min(fx, fy);\n scaledImageSize = [[window contentView] convertSizeFromBacking:imageSize];\n scaledImageSize.width = std::min(scaledImageSize.width, min_f*imageSize.width);\n scaledImageSize.height = std::min(scaledImageSize.height, min_f*imageSize.height);\n // Only resize for retina displays if the image is bigger than the screen\n NSSize screenSize = NSScreen.mainScreen.visibleFrame.size;\n CGFloat titleBarHeight = window.frame.size.height - [window contentRectForFrameRect:window.frame].size.height;\n screenSize.height -= titleBarHeight;\n if (imageSize.width > screenSize.width || imageSize.height > screenSize.height)\n {\n CGFloat fx = screenSize.width/std::max(imageSize.width, (CGFloat)1.f);\n CGFloat fy = screenSize.height/std::max(imageSize.height, (CGFloat)1.f);\n CGFloat min_f = std::min(fx, fy);\n scaledImageSize = [[window contentView] convertSizeFromBacking:imageSize];\n scaledImageSize.width = std::min(scaledImageSize.width, min_f*imageSize.width);\n scaledImageSize.height = std::min(scaledImageSize.height, min_f*imageSize.height);\n }\n }\n }\n NSSize contentSize = vrectOld.size;\n contentSize.height = scaledImageSize.height + [window contentView].sliderHeight;\n contentSize.width = std::max(scaledImageSize.width, MIN_SLIDER_WIDTH);\n [window setContentSize:contentSize]; //adjust sliders to fit new window size\n if([window firstContent])\n {\n int x = [window x0];\n int y = [window y0];\n if(x >= 0 && y >= 0)\n NSSize contentSize = vrectOld.size;\n contentSize.height = scaledImageSize.height + [window contentView].sliderHeight;\n contentSize.width = std::max(scaledImageSize.width, MIN_SLIDER_WIDTH);\n [window setContentSize:contentSize]; //adjust sliders to fit new window size\n if([window firstContent])\n {\n y = [[window screen] visibleFrame].size.height - y;\n [window setFrameTopLeftPoint:NSMakePoint(x, y)];\n int x = [window x0];\n int y = [window y0];\n if(x >= 0 && y >= 0)\n {\n y = [[window screen] visibleFrame].size.height - y;\n [window setFrameTopLeftPoint:NSMakePoint(x, y)];\n }\n }\n }\n }\n [window setFirstContent:NO];\n }\n [window setFirstContent:NO];\n }\n [localpool drain];\n}\n\nCV_IMPL void cvResizeWindow( const char* name, int width, int height)\n{\n\n //cout << \"cvResizeWindow\" << endl;\n NSAutoreleasePool* localpool = [[NSAutoreleasePool alloc] init];\n CVWindow *window = cvGetWindow(name);\n if(window && ![window autosize]) {\n height += [window contentView].sliderHeight;\n NSSize size = { (CGFloat)width, (CGFloat)height };\n [window setContentSize:size];\n @autoreleasepool{\n CVWindow *window = cvGetWindow(name);\n if(window && ![window autosize]) {\n height += [window contentView].sliderHeight;\n NSSize size = { (CGFloat)width, (CGFloat)height };\n [window setContentSize:size];\n }\n }\n [localpool drain];\n}\n\nCV_IMPL void cvMoveWindow( const char* name, int x, int y)\n{\n CV_FUNCNAME(\"cvMoveWindow\");\n __BEGIN__;\n\n NSAutoreleasePool* localpool1 = [[NSAutoreleasePool alloc] init];\n CVWindow *window = nil;\n __BEGIN__;\n @autoreleasepool{\n CVWindow *window = nil;\n\n if(name == NULL)\n CV_ERROR( CV_StsNullPtr, \"NULL window name\" );\n //cout << \"cvMoveWindow\"<< endl;\n window = cvGetWindow(name);\n if(window) {\n if([window firstContent]) {\n [window setX0:x];\n [window setY0:y];\n }\n else {\n y = [[window screen] visibleFrame].size.height - y;\n [window setFrameTopLeftPoint:NSMakePoint(x, y)];\n if(name == NULL)\n CV_ERROR( CV_StsNullPtr, \"NULL window name\" );\n window = cvGetWindow(name);\n if(window) {\n if([window firstContent]) {\n [window setX0:x];\n [window setY0:y];\n }\n else {\n y = [[window screen] visibleFrame].size.height - y;\n [window setFrameTopLeftPoint:NSMakePoint(x, y)];\n }\n }\n }", - "repo_full_name": "opencv/opencv", - "discussion_comments": [ - { - "comment_id": "1910095401", - "repo_full_name": "opencv/opencv", - "pr_number": 26660, - "pr_file": "modules/highgui/src/window_cocoa.mm", - "discussion_id": "1910095401", - "commented_code": "@@ -161,162 +146,145 @@ CV_IMPL int cvInitSystem( int , char** )\n if( floor(NSAppKitVersionNumber) > NSAppKitVersionNumber10_5 )\n [application setActivationPolicy:NSApplicationActivationPolicyRegular];\n #endif\n- //[application finishLaunching];\n- //atexit(icvCocoaCleanup);\n-\n setlocale(LC_NUMERIC,\"C\");\n \n return 0;\n }\n \n static CVWindow *cvGetWindow(const char *name) {\n- //cout << \"cvGetWindow\" << endl;\n- NSAutoreleasePool* localpool = [[NSAutoreleasePool alloc] init];\n- NSString *cvname = [NSString stringWithFormat:@\"%s\", name];\n- CVWindow* retval = (CVWindow*) [windows valueForKey:cvname] ;\n- //cout << \"retain count: \" << [retval retainCount] << endl;\n- //retval = [retval retain];\n- //cout << \"retain count: \" << [retval retainCount] << endl;\n- [localpool drain];\n- //cout << \"retain count: \" << [retval retainCount] << endl;\n- return retval;\n+ @autoreleasepool{\n+ NSString *cvname = [NSString stringWithFormat:@\"%s\", name];\n+ CVWindow* retval = (CVWindow*) [windows valueForKey:cvname];\n+ return retval;\n+ }\n }\n \n CV_IMPL int cvStartWindowThread()\n {\n- //cout << \"cvStartWindowThread\" << endl;\n return 0;\n }\n \n CV_IMPL void cvDestroyWindow( const char* name)\n {\n-\n- NSAutoreleasePool* localpool = [[NSAutoreleasePool alloc] init];\n- //cout << \"cvDestroyWindow\" << endl;\n- CVWindow *window = cvGetWindow(name);\n- if(window) {\n- if ([window styleMask] & NSFullScreenWindowMask) {\n- [window toggleFullScreen:nil];\n+ @autoreleasepool {\n+ CVWindow *window = cvGetWindow(name);\n+ if(window) {\n+ if ([window styleMask] & NSFullScreenWindowMask) {\n+ [window toggleFullScreen:nil];\n+ }\n+ [window close];\n+ [windows removeObjectForKey:[NSString stringWithFormat:@\"%s\", name]];\n }\n- [window close];\n- [windows removeObjectForKey:[NSString stringWithFormat:@\"%s\", name]];\n }\n- [localpool drain];\n }\n \n \n CV_IMPL void cvDestroyAllWindows( void )\n {\n- //cout << \"cvDestroyAllWindows\" << endl;\n- NSAutoreleasePool* localpool = [[NSAutoreleasePool alloc] init];\n- NSDictionary* list = [NSDictionary dictionaryWithDictionary:windows];\n- for(NSString *key in list) {\n- cvDestroyWindow([key cStringUsingEncoding:NSASCIIStringEncoding]);\n+ @autoreleasepool {\n+ NSDictionary* list = [NSDictionary dictionaryWithDictionary:windows];\n+ for(NSString *key in list) {\n+ cvDestroyWindow([key cStringUsingEncoding:NSASCIIStringEncoding]);\n+ }\n }\n- [localpool drain];\n }\n \n \n CV_IMPL void cvShowImage( const char* name, const CvArr* arr)\n {\n- //cout << \"cvShowImage\" << endl;\n- NSAutoreleasePool* localpool = [[NSAutoreleasePool alloc] init];\n- CVWindow *window = cvGetWindow(name);\n- if(!window)\n- {\n- cvNamedWindow(name, CV_WINDOW_AUTOSIZE);\n- window = cvGetWindow(name);\n- }\n+ @autoreleasepool{\n+ CVWindow *window = cvGetWindow(name);\n+ if(!window)\n+ {\n+ cvNamedWindow(name, CV_WINDOW_AUTOSIZE);\n+ window = cvGetWindow(name);\n+ }\n \n- if(window)\n- {\n- bool empty = [[window contentView] image] == nil;\n- NSRect vrectOld = [[window contentView] frame];\n- NSSize oldImageSize = [[[window contentView] image] size];\n- [[window contentView] setImageData:(CvArr *)arr];\n- if([window autosize] || [window firstContent] || empty)\n+ if(window)\n {\n- NSSize imageSize = [[[window contentView] image] size];\n- // Only adjust the image size if the new image is a different size from the previous\n- if (oldImageSize.height != imageSize.height || oldImageSize.width != imageSize.width)\n+ bool empty = [[window contentView] image] == nil;\n+ NSRect vrectOld = [[window contentView] frame];\n+ NSSize oldImageSize = [[[window contentView] image] size];\n+ [[window contentView] setImageData:(CvArr *)arr];\n+ if([window autosize] || [window firstContent] || empty)\n {\n- //Set new view size considering sliders (reserve height and min width)\n- NSSize scaledImageSize = imageSize;\n- if ([[window contentView] respondsToSelector:@selector(convertSizeFromBacking:)])\n+ NSSize imageSize = [[[window contentView] image] size];\n+ // Only adjust the image size if the new image is a different size from the previous\n+ if (oldImageSize.height != imageSize.height || oldImageSize.width != imageSize.width)\n {\n- // Only resize for retina displays if the image is bigger than the screen\n- NSSize screenSize = NSScreen.mainScreen.visibleFrame.size;\n- CGFloat titleBarHeight = window.frame.size.height - [window contentRectForFrameRect:window.frame].size.height;\n- screenSize.height -= titleBarHeight;\n- if (imageSize.width > screenSize.width || imageSize.height > screenSize.height)\n+ //Set new view size considering sliders (reserve height and min width)\n+ NSSize scaledImageSize = imageSize;\n+ if ([[window contentView] respondsToSelector:@selector(convertSizeFromBacking:)])\n {\n- CGFloat fx = screenSize.width/std::max(imageSize.width, (CGFloat)1.f);\n- CGFloat fy = screenSize.height/std::max(imageSize.height, (CGFloat)1.f);\n- CGFloat min_f = std::min(fx, fy);\n- scaledImageSize = [[window contentView] convertSizeFromBacking:imageSize];\n- scaledImageSize.width = std::min(scaledImageSize.width, min_f*imageSize.width);\n- scaledImageSize.height = std::min(scaledImageSize.height, min_f*imageSize.height);\n+ // Only resize for retina displays if the image is bigger than the screen\n+ NSSize screenSize = NSScreen.mainScreen.visibleFrame.size;\n+ CGFloat titleBarHeight = window.frame.size.height - [window contentRectForFrameRect:window.frame].size.height;\n+ screenSize.height -= titleBarHeight;\n+ if (imageSize.width > screenSize.width || imageSize.height > screenSize.height)\n+ {\n+ CGFloat fx = screenSize.width/std::max(imageSize.width, (CGFloat)1.f);\n+ CGFloat fy = screenSize.height/std::max(imageSize.height, (CGFloat)1.f);\n+ CGFloat min_f = std::min(fx, fy);\n+ scaledImageSize = [[window contentView] convertSizeFromBacking:imageSize];\n+ scaledImageSize.width = std::min(scaledImageSize.width, min_f*imageSize.width);\n+ scaledImageSize.height = std::min(scaledImageSize.height, min_f*imageSize.height);\n+ }\n }\n- }\n- NSSize contentSize = vrectOld.size;\n- contentSize.height = scaledImageSize.height + [window contentView].sliderHeight;\n- contentSize.width = std::max(scaledImageSize.width, MIN_SLIDER_WIDTH);\n- [window setContentSize:contentSize]; //adjust sliders to fit new window size\n- if([window firstContent])\n- {\n- int x = [window x0];\n- int y = [window y0];\n- if(x >= 0 && y >= 0)\n+ NSSize contentSize = vrectOld.size;\n+ contentSize.height = scaledImageSize.height + [window contentView].sliderHeight;\n+ contentSize.width = std::max(scaledImageSize.width, MIN_SLIDER_WIDTH);\n+ [window setContentSize:contentSize]; //adjust sliders to fit new window size\n+ if([window firstContent])\n {\n- y = [[window screen] visibleFrame].size.height - y;\n- [window setFrameTopLeftPoint:NSMakePoint(x, y)];\n+ int x = [window x0];\n+ int y = [window y0];\n+ if(x >= 0 && y >= 0)\n+ {\n+ y = [[window screen] visibleFrame].size.height - y;\n+ [window setFrameTopLeftPoint:NSMakePoint(x, y)];\n+ }\n }\n }\n }\n+ [window setFirstContent:NO];\n }\n- [window setFirstContent:NO];\n }\n- [localpool drain];\n }\n \n CV_IMPL void cvResizeWindow( const char* name, int width, int height)\n {\n-\n- //cout << \"cvResizeWindow\" << endl;\n- NSAutoreleasePool* localpool = [[NSAutoreleasePool alloc] init];\n- CVWindow *window = cvGetWindow(name);\n- if(window && ![window autosize]) {\n- height += [window contentView].sliderHeight;\n- NSSize size = { (CGFloat)width, (CGFloat)height };\n- [window setContentSize:size];\n+ @autoreleasepool{\n+ CVWindow *window = cvGetWindow(name);\n+ if(window && ![window autosize]) {\n+ height += [window contentView].sliderHeight;\n+ NSSize size = { (CGFloat)width, (CGFloat)height };\n+ [window setContentSize:size];\n+ }\n }\n- [localpool drain];\n }\n \n CV_IMPL void cvMoveWindow( const char* name, int x, int y)\n {\n CV_FUNCNAME(\"cvMoveWindow\");\n- __BEGIN__;\n \n- NSAutoreleasePool* localpool1 = [[NSAutoreleasePool alloc] init];\n- CVWindow *window = nil;\n+ __BEGIN__;\n+ @autoreleasepool{\n+ CVWindow *window = nil;\n \n- if(name == NULL)\n- CV_ERROR( CV_StsNullPtr, \"NULL window name\" );\n- //cout << \"cvMoveWindow\"<< endl;\n- window = cvGetWindow(name);\n- if(window) {\n- if([window firstContent]) {\n- [window setX0:x];\n- [window setY0:y];\n- }\n- else {\n- y = [[window screen] visibleFrame].size.height - y;\n- [window setFrameTopLeftPoint:NSMakePoint(x, y)];\n+ if(name == NULL)\n+ CV_ERROR( CV_StsNullPtr, \"NULL window name\" );\n+ window = cvGetWindow(name);\n+ if(window) {\n+ if([window firstContent]) {\n+ [window setX0:x];\n+ [window setY0:y];\n+ }\n+ else {\n+ y = [[window screen] visibleFrame].size.height - y;\n+ [window setFrameTopLeftPoint:NSMakePoint(x, y)];\n+ }\n }\n }", - "comment_created_at": "2025-01-10T09:42:19+00:00", - "comment_author": "VadimLevin", - "comment_body": "\r\n```objc\r\nif (name == NULL) {\r\n CV_ERROR( CV_StsNullPtr, \"NULL window name\" );\r\n}\r\n@autoreleasepool {\r\n CVWindow *window = GetWindow(name);\r\n```", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/opencv-cleanup-before-errors.json b/_reviewers/opencv-cleanup-before-errors.json new file mode 100644 index 0000000..c42769d --- /dev/null +++ b/_reviewers/opencv-cleanup-before-errors.json @@ -0,0 +1,80 @@ +[ + { + "discussion_id": "1896466401", + "pr_number": 25584, + "pr_file": "modules/videoio/misc/python/pyopencv_videoio.hpp", + "created_at": "2024-12-24T06:56:18+00:00", + "commented_code": "return true;\n}\n\nclass IOBaseWrapper : public std::streambuf\n{\npublic:\n IOBaseWrapper(PyObject* _obj = nullptr) : obj(_obj)\n {\n if (obj)\n Py_INCREF(obj);\n }\n\n ~IOBaseWrapper()\n {\n if (obj)\n Py_DECREF(obj);\n }\n\n std::streamsize xsgetn(char* buf, std::streamsize n) override\n {\n PyObject* ioBase = reinterpret_cast(obj);\n\n PyGILState_STATE gstate;\n gstate = PyGILState_Ensure();\n\n PyObject* size = pyopencv_from(static_cast(n));\n\n PyObject* res = PyObject_CallMethodObjArgs(ioBase, PyString_FromString(\"read\"), size, NULL);\n char* src = PyBytes_AsString(res);\n size_t len = static_cast(PyBytes_Size(res));\n CV_CheckLE(len, static_cast(n), \"Stream chunk size should be less or equal than requested size\");\n std::memcpy(buf, src, len);\n Py_DECREF(res);\n Py_DECREF(size);\n\n PyGILState_Release(gstate);\n\n return len;\n }\n\n std::streampos seekoff(std::streamoff off, std::ios_base::seekdir way, std::ios_base::openmode = std::ios_base::in | std::ios_base::out) override\n {\n PyObject* ioBase = reinterpret_cast(obj);\n\n PyGILState_STATE gstate;\n gstate = PyGILState_Ensure();\n\n PyObject* size = pyopencv_from(static_cast(off));\n PyObject* whence = pyopencv_from(way == std::ios_base::beg ? SEEK_SET : (way == std::ios_base::end ? SEEK_END : SEEK_CUR));\n\n PyObject* res = PyObject_CallMethodObjArgs(ioBase, PyString_FromString(\"seek\"), size, whence, NULL);\n int pos = PyLong_AsLong(res);\n Py_DECREF(res);\n Py_DECREF(size);\n Py_DECREF(whence);\n\n PyGILState_Release(gstate);\n\n return pos;\n }\n\nprivate:\n PyObject* obj;\n};\n\ntemplate<>\nbool pyopencv_to(PyObject* obj, Ptr& p, const ArgInfo&)\n{\n if (!obj)\n return false;\n\n PyGILState_STATE gstate;\n gstate = PyGILState_Ensure();\n\n PyObject* ioModule = PyImport_ImportModule(\"io\");\n PyObject* type = PyObject_GetAttrString(ioModule, \"BufferedIOBase\");\n Py_DECREF(ioModule);\n if (!PyObject_IsInstance(obj, type))\n CV_Error(cv::Error::StsBadArg, \"Input stream should be derived from io.BufferedIOBase\");", + "repo_full_name": "opencv/opencv", + "discussion_comments": [ + { + "comment_id": "1896466401", + "repo_full_name": "opencv/opencv", + "pr_number": 25584, + "pr_file": "modules/videoio/misc/python/pyopencv_videoio.hpp", + "discussion_id": "1896466401", + "commented_code": "@@ -31,4 +31,87 @@ template<> bool pyopencv_to(PyObject* obj, cv::VideoCapture& stream, const ArgIn\n return true;\n }\n \n+class IOBaseWrapper : public std::streambuf\n+{\n+public:\n+ IOBaseWrapper(PyObject* _obj = nullptr) : obj(_obj)\n+ {\n+ if (obj)\n+ Py_INCREF(obj);\n+ }\n+\n+ ~IOBaseWrapper()\n+ {\n+ if (obj)\n+ Py_DECREF(obj);\n+ }\n+\n+ std::streamsize xsgetn(char* buf, std::streamsize n) override\n+ {\n+ PyObject* ioBase = reinterpret_cast(obj);\n+\n+ PyGILState_STATE gstate;\n+ gstate = PyGILState_Ensure();\n+\n+ PyObject* size = pyopencv_from(static_cast(n));\n+\n+ PyObject* res = PyObject_CallMethodObjArgs(ioBase, PyString_FromString(\"read\"), size, NULL);\n+ char* src = PyBytes_AsString(res);\n+ size_t len = static_cast(PyBytes_Size(res));\n+ CV_CheckLE(len, static_cast(n), \"Stream chunk size should be less or equal than requested size\");\n+ std::memcpy(buf, src, len);\n+ Py_DECREF(res);\n+ Py_DECREF(size);\n+\n+ PyGILState_Release(gstate);\n+\n+ return len;\n+ }\n+\n+ std::streampos seekoff(std::streamoff off, std::ios_base::seekdir way, std::ios_base::openmode = std::ios_base::in | std::ios_base::out) override\n+ {\n+ PyObject* ioBase = reinterpret_cast(obj);\n+\n+ PyGILState_STATE gstate;\n+ gstate = PyGILState_Ensure();\n+\n+ PyObject* size = pyopencv_from(static_cast(off));\n+ PyObject* whence = pyopencv_from(way == std::ios_base::beg ? SEEK_SET : (way == std::ios_base::end ? SEEK_END : SEEK_CUR));\n+\n+ PyObject* res = PyObject_CallMethodObjArgs(ioBase, PyString_FromString(\"seek\"), size, whence, NULL);\n+ int pos = PyLong_AsLong(res);\n+ Py_DECREF(res);\n+ Py_DECREF(size);\n+ Py_DECREF(whence);\n+\n+ PyGILState_Release(gstate);\n+\n+ return pos;\n+ }\n+\n+private:\n+ PyObject* obj;\n+};\n+\n+template<>\n+bool pyopencv_to(PyObject* obj, Ptr& p, const ArgInfo&)\n+{\n+ if (!obj)\n+ return false;\n+\n+ PyGILState_STATE gstate;\n+ gstate = PyGILState_Ensure();\n+\n+ PyObject* ioModule = PyImport_ImportModule(\"io\");\n+ PyObject* type = PyObject_GetAttrString(ioModule, \"BufferedIOBase\");\n+ Py_DECREF(ioModule);\n+ if (!PyObject_IsInstance(obj, type))\n+ CV_Error(cv::Error::StsBadArg, \"Input stream should be derived from io.BufferedIOBase\");", + "comment_created_at": "2024-12-24T06:56:18+00:00", + "comment_author": "opencv-alalek", + "comment_body": "`CV_Error` doesn't call cleanup (release GIL and release `type`).\r\n\r\n```\r\nif (...)\r\n has_error = true;\r\n\r\n... cleanup ...\r\n\r\nif (has_error)\r\n CV_Error(...);\r\n```", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2010187533", + "pr_number": 27115, + "pr_file": "3rdparty/hal_rvv/hal_rvv_1p0/norm.hpp", + "created_at": "2025-03-24T13:30:33+00:00", + "commented_code": "sizeof(int), sizeof(float),\n sizeof(int64_t), 0,\n };\n assert(elem_size_tab[depth]);", + "repo_full_name": "opencv/opencv", + "discussion_comments": [ + { + "comment_id": "2010187533", + "repo_full_name": "opencv/opencv", + "pr_number": 27115, + "pr_file": "3rdparty/hal_rvv/hal_rvv_1p0/norm.hpp", + "discussion_id": "2010187533", + "commented_code": "@@ -1008,6 +1008,7 @@ inline int norm(const uchar* src, size_t src_step, const uchar* mask, size_t mas\n sizeof(int), sizeof(float),\n sizeof(int64_t), 0,\n };\n+ assert(elem_size_tab[depth]);", + "comment_created_at": "2025-03-24T13:30:33+00:00", + "comment_author": "asmorkalov", + "comment_body": "Looks like the assert will be disabled in regular release builds: https://en.cppreference.com/w/cpp/error/assert.\r\nWhy not just CV_Assert? It's defined in `opencv2/core/base.hpp`", + "pr_file_module": null + }, + { + "comment_id": "2010279840", + "repo_full_name": "opencv/opencv", + "pr_number": 27115, + "pr_file": "3rdparty/hal_rvv/hal_rvv_1p0/norm.hpp", + "discussion_id": "2010187533", + "commented_code": "@@ -1008,6 +1008,7 @@ inline int norm(const uchar* src, size_t src_step, const uchar* mask, size_t mas\n sizeof(int), sizeof(float),\n sizeof(int64_t), 0,\n };\n+ assert(elem_size_tab[depth]);", + "comment_created_at": "2025-03-24T14:19:36+00:00", + "comment_author": "fengyuentau", + "comment_body": "Ok", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1975939795", + "pr_number": 26958, + "pr_file": "3rdparty/hal_rvv/hal_rvv_1p0/pyramids.hpp", + "created_at": "2025-02-28T19:51:05+00:00", + "commented_code": "// This file is part of OpenCV project.\n// It is subject to the license terms in the LICENSE file found in the top-level directory\n// of this distribution and at http://opencv.org/license.html.\n#ifndef OPENCV_HAL_RVV_PYRAMIDS_HPP_INCLUDED\n#define OPENCV_HAL_RVV_PYRAMIDS_HPP_INCLUDED\n\n#include \n\nnamespace cv { namespace cv_hal_rvv { namespace pyramids {\n\n#undef cv_hal_pyrdown\n#define cv_hal_pyrdown cv::cv_hal_rvv::pyramids::pyrDown\n#undef cv_hal_pyrup\n#define cv_hal_pyrup cv::cv_hal_rvv::pyramids::pyrUp\n\ntemplate struct rvv;\n\ntemplate<> struct rvv\n{\n static inline size_t vsetvl_WT(size_t a) { return __riscv_vsetvl_e32m4(a); }\n static inline vuint8m1_t vle_T(const uchar* a, size_t b) { return __riscv_vle8_v_u8m1(a, b); }\n static inline vint32m4_t vle_WT(const int* a, size_t b) { return __riscv_vle32_v_i32m4(a, b); }\n static inline vuint32m4_t vle_M(const uint* a, size_t b) { return __riscv_vle32_v_u32m4(a, b); }\n static inline vuint8m1_t vlse_T(const uchar* a, ptrdiff_t b, size_t c) { return __riscv_vlse8_v_u8m1(a, b, c); }\n static inline vuint8m1_t vloxei_T(const uchar* a, vuint32m4_t b, size_t c) { return __riscv_vloxei32_v_u8m1(a, b, c); }\n static inline void vse_T(uchar* a, vuint8m1_t b, size_t c) { return __riscv_vse8(a, b, c); }\n static inline vint32m4_t vcvt_T_WT(vuint8m1_t a, size_t b) { return __riscv_vreinterpret_v_u32m4_i32m4(__riscv_vzext_vf4(a, b)); }\n static inline vuint8m1_t vcvt_WT_T(vint32m4_t a, int b, size_t c) { return __riscv_vncvt_x(__riscv_vncvt_x(__riscv_vreinterpret_v_i32m4_u32m4(__riscv_vsra(__riscv_vadd(a, 1 << (b - 1), c), b, c)), c), c); }\n};\n\ntemplate<> struct rvv\n{\n static inline size_t vsetvl_WT(size_t a) { return __riscv_vsetvl_e32m4(a); }\n static inline vint16m2_t vle_T(const short* a, size_t b) { return __riscv_vle16_v_i16m2(a, b); }\n static inline vint32m4_t vle_WT(const int* a, size_t b) { return __riscv_vle32_v_i32m4(a, b); }\n static inline vuint32m4_t vle_M(const uint* a, size_t b) { return __riscv_vle32_v_u32m4(a, b); }\n static inline vint16m2_t vlse_T(const short* a, ptrdiff_t b, size_t c) { return __riscv_vlse16_v_i16m2(a, b, c); }\n static inline vint16m2_t vloxei_T(const short* a, vuint32m4_t b, size_t c) { return __riscv_vloxei32_v_i16m2(a, b, c); }\n static inline void vse_T(short* a, vint16m2_t b, size_t c) { return __riscv_vse16(a, b, c); }\n static inline vint32m4_t vcvt_T_WT(vint16m2_t a, size_t b) { return __riscv_vsext_vf2(a, b); }\n static inline vint16m2_t vcvt_WT_T(vint32m4_t a, int b, size_t c) { return __riscv_vncvt_x(__riscv_vsra(__riscv_vadd(a, 1 << (b - 1), c), b, c), c); }\n};\n\ntemplate<> struct rvv\n{\n static inline size_t vsetvl_WT(size_t a) { return __riscv_vsetvl_e32m4(a); }\n static inline vfloat32m4_t vle_T(const float* a, size_t b) { return __riscv_vle32_v_f32m4(a, b); }\n static inline vfloat32m4_t vle_WT(const float* a, size_t b) { return __riscv_vle32_v_f32m4(a, b); }\n static inline vuint32m4_t vle_M(const uint* a, size_t b) { return __riscv_vle32_v_u32m4(a, b); }\n static inline vfloat32m4_t vlse_T(const float* a, ptrdiff_t b, size_t c) { return __riscv_vlse32_v_f32m4(a, b, c); }\n static inline vfloat32m4_t vloxei_T(const float* a, vuint32m4_t b, size_t c) { return __riscv_vloxei32_v_f32m4(a, b, c); }\n static inline void vse_T(float* a, vfloat32m4_t b, size_t c) { return __riscv_vse32(a, b, c); }\n};\n\ntemplate struct pyrDownVec0\n{\n void operator()(const T* src, WT* row, const uint* tabM, int start, int end)\n {\n int vl;\n switch (start)\n {\n case 1:\n for( int x = start; x < end; x += vl )\n {\n vl = rvv::vsetvl_WT(end - x);\n auto vec_src0 = rvv::vcvt_T_WT(rvv::vlse_T(src + x * 2 - 2, 2 * sizeof(T), vl), vl);\n auto vec_src1 = rvv::vcvt_T_WT(rvv::vlse_T(src + x * 2 - 1, 2 * sizeof(T), vl), vl);\n auto vec_src2 = rvv::vcvt_T_WT(rvv::vlse_T(src + x * 2, 2 * sizeof(T), vl), vl);\n auto vec_src3 = rvv::vcvt_T_WT(rvv::vlse_T(src + x * 2 + 1, 2 * sizeof(T), vl), vl);\n auto vec_src4 = rvv::vcvt_T_WT(rvv::vlse_T(src + x * 2 + 2, 2 * sizeof(T), vl), vl);\n __riscv_vse32(row + x, __riscv_vadd(__riscv_vadd(__riscv_vadd(vec_src0, vec_src4, vl), __riscv_vadd(vec_src2, vec_src2, vl), vl),\n __riscv_vsll(__riscv_vadd(__riscv_vadd(vec_src1, vec_src2, vl), vec_src3, vl), 2, vl), vl), vl);\n }\n break;\n case 2:\n for( int x = start / 2; x < end / 2; x += vl )\n {\n vl = rvv::vsetvl_WT(end / 2 - x);\n auto vec_src0 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 - 2) * 2, 4 * sizeof(T), vl), vl);\n auto vec_src1 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 - 1) * 2, 4 * sizeof(T), vl), vl);\n auto vec_src2 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2) * 2, 4 * sizeof(T), vl), vl);\n auto vec_src3 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 + 1) * 2, 4 * sizeof(T), vl), vl);\n auto vec_src4 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 + 2) * 2, 4 * sizeof(T), vl), vl);\n __riscv_vsse32(row + x * 2, 2 * sizeof(WT), __riscv_vadd(__riscv_vadd(__riscv_vadd(vec_src0, vec_src4, vl), __riscv_vadd(vec_src2, vec_src2, vl), vl),\n __riscv_vsll(__riscv_vadd(__riscv_vadd(vec_src1, vec_src2, vl), vec_src3, vl), 2, vl), vl), vl);\n vec_src0 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 - 2) * 2 + 1, 4 * sizeof(T), vl), vl);\n vec_src1 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 - 1) * 2 + 1, 4 * sizeof(T), vl), vl);\n vec_src2 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2) * 2 + 1, 4 * sizeof(T), vl), vl);\n vec_src3 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 + 1) * 2 + 1, 4 * sizeof(T), vl), vl);\n vec_src4 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 + 2) * 2 + 1, 4 * sizeof(T), vl), vl);\n __riscv_vsse32(row + x * 2 + 1, 2 * sizeof(WT), __riscv_vadd(__riscv_vadd(__riscv_vadd(vec_src0, vec_src4, vl), __riscv_vadd(vec_src2, vec_src2, vl), vl),\n __riscv_vsll(__riscv_vadd(__riscv_vadd(vec_src1, vec_src2, vl), vec_src3, vl), 2, vl), vl), vl);\n }\n break;\n case 3:\n for( int x = start / 3; x < end / 3; x += vl )\n {\n vl = rvv::vsetvl_WT(end / 3 - x);\n auto vec_src0 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 - 2) * 3, 6 * sizeof(T), vl), vl);\n auto vec_src1 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 - 1) * 3, 6 * sizeof(T), vl), vl);\n auto vec_src2 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2) * 3, 6 * sizeof(T), vl), vl);\n auto vec_src3 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 + 1) * 3, 6 * sizeof(T), vl), vl);\n auto vec_src4 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 + 2) * 3, 6 * sizeof(T), vl), vl);\n __riscv_vsse32(row + x * 3, 3 * sizeof(WT), __riscv_vadd(__riscv_vadd(__riscv_vadd(vec_src0, vec_src4, vl), __riscv_vadd(vec_src2, vec_src2, vl), vl),\n __riscv_vsll(__riscv_vadd(__riscv_vadd(vec_src1, vec_src2, vl), vec_src3, vl), 2, vl), vl), vl);\n vec_src0 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 - 2) * 3 + 1, 6 * sizeof(T), vl), vl);\n vec_src1 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 - 1) * 3 + 1, 6 * sizeof(T), vl), vl);\n vec_src2 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2) * 3 + 1, 6 * sizeof(T), vl), vl);\n vec_src3 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 + 1) * 3 + 1, 6 * sizeof(T), vl), vl);\n vec_src4 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 + 2) * 3 + 1, 6 * sizeof(T), vl), vl);\n __riscv_vsse32(row + x * 3 + 1, 3 * sizeof(WT), __riscv_vadd(__riscv_vadd(__riscv_vadd(vec_src0, vec_src4, vl), __riscv_vadd(vec_src2, vec_src2, vl), vl),\n __riscv_vsll(__riscv_vadd(__riscv_vadd(vec_src1, vec_src2, vl), vec_src3, vl), 2, vl), vl), vl);\n vec_src0 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 - 2) * 3 + 2, 6 * sizeof(T), vl), vl);\n vec_src1 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 - 1) * 3 + 2, 6 * sizeof(T), vl), vl);\n vec_src2 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2) * 3 + 2, 6 * sizeof(T), vl), vl);\n vec_src3 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 + 1) * 3 + 2, 6 * sizeof(T), vl), vl);\n vec_src4 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 + 2) * 3 + 2, 6 * sizeof(T), vl), vl);\n __riscv_vsse32(row + x * 3 + 2, 3 * sizeof(WT), __riscv_vadd(__riscv_vadd(__riscv_vadd(vec_src0, vec_src4, vl), __riscv_vadd(vec_src2, vec_src2, vl), vl),\n __riscv_vsll(__riscv_vadd(__riscv_vadd(vec_src1, vec_src2, vl), vec_src3, vl), 2, vl), vl), vl);\n }\n break;\n case 4:\n for( int x = start / 4; x < end / 4; x += vl )\n {\n vl = rvv::vsetvl_WT(end / 4 - x);\n auto vec_src0 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 - 2) * 4, 8 * sizeof(T), vl), vl);\n auto vec_src1 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 - 1) * 4, 8 * sizeof(T), vl), vl);\n auto vec_src2 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2) * 4, 8 * sizeof(T), vl), vl);\n auto vec_src3 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 + 1) * 4, 8 * sizeof(T), vl), vl);\n auto vec_src4 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 + 2) * 4, 8 * sizeof(T), vl), vl);\n __riscv_vsse32(row + x * 4, 4 * sizeof(WT), __riscv_vadd(__riscv_vadd(__riscv_vadd(vec_src0, vec_src4, vl), __riscv_vadd(vec_src2, vec_src2, vl), vl),\n __riscv_vsll(__riscv_vadd(__riscv_vadd(vec_src1, vec_src2, vl), vec_src3, vl), 2, vl), vl), vl);\n vec_src0 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 - 2) * 4 + 1, 8 * sizeof(T), vl), vl);\n vec_src1 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 - 1) * 4 + 1, 8 * sizeof(T), vl), vl);\n vec_src2 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2) * 4 + 1, 8 * sizeof(T), vl), vl);\n vec_src3 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 + 1) * 4 + 1, 8 * sizeof(T), vl), vl);\n vec_src4 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 + 2) * 4 + 1, 8 * sizeof(T), vl), vl);\n __riscv_vsse32(row + x * 4 + 1, 4 * sizeof(WT), __riscv_vadd(__riscv_vadd(__riscv_vadd(vec_src0, vec_src4, vl), __riscv_vadd(vec_src2, vec_src2, vl), vl),\n __riscv_vsll(__riscv_vadd(__riscv_vadd(vec_src1, vec_src2, vl), vec_src3, vl), 2, vl), vl), vl);\n vec_src0 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 - 2) * 4 + 2, 8 * sizeof(T), vl), vl);\n vec_src1 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 - 1) * 4 + 2, 8 * sizeof(T), vl), vl);\n vec_src2 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2) * 4 + 2, 8 * sizeof(T), vl), vl);\n vec_src3 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 + 1) * 4 + 2, 8 * sizeof(T), vl), vl);\n vec_src4 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 + 2) * 4 + 2, 8 * sizeof(T), vl), vl);\n __riscv_vsse32(row + x * 4 + 2, 4 * sizeof(WT), __riscv_vadd(__riscv_vadd(__riscv_vadd(vec_src0, vec_src4, vl), __riscv_vadd(vec_src2, vec_src2, vl), vl),\n __riscv_vsll(__riscv_vadd(__riscv_vadd(vec_src1, vec_src2, vl), vec_src3, vl), 2, vl), vl), vl);\n vec_src0 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 - 2) * 4 + 3, 8 * sizeof(T), vl), vl);\n vec_src1 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 - 1) * 4 + 3, 8 * sizeof(T), vl), vl);\n vec_src2 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2) * 4 + 3, 8 * sizeof(T), vl), vl);\n vec_src3 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 + 1) * 4 + 3, 8 * sizeof(T), vl), vl);\n vec_src4 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 + 2) * 4 + 3, 8 * sizeof(T), vl), vl);\n __riscv_vsse32(row + x * 4 + 3, 4 * sizeof(WT), __riscv_vadd(__riscv_vadd(__riscv_vadd(vec_src0, vec_src4, vl), __riscv_vadd(vec_src2, vec_src2, vl), vl),\n __riscv_vsll(__riscv_vadd(__riscv_vadd(vec_src1, vec_src2, vl), vec_src3, vl), 2, vl), vl), vl);\n }\n break;\n default:\n for( int x = start; x < end; x += vl )\n {\n vl = rvv::vsetvl_WT(end - x);\n auto vec_tabM = rvv::vle_M(tabM + x, vl);\n vec_tabM = __riscv_vmul(__riscv_vsub(vec_tabM, start * 2, vl), sizeof(T), vl);\n auto vec_src0 = rvv::vcvt_T_WT(rvv::vloxei_T(src, vec_tabM, vl), vl);\n vec_tabM = __riscv_vadd(vec_tabM, start * sizeof(T), vl);\n auto vec_src1 = rvv::vcvt_T_WT(rvv::vloxei_T(src, vec_tabM, vl), vl);\n vec_tabM = __riscv_vadd(vec_tabM, start * sizeof(T), vl);\n auto vec_src2 = rvv::vcvt_T_WT(rvv::vloxei_T(src, vec_tabM, vl), vl);\n vec_tabM = __riscv_vadd(vec_tabM, start * sizeof(T), vl);\n auto vec_src3 = rvv::vcvt_T_WT(rvv::vloxei_T(src, vec_tabM, vl), vl);\n vec_tabM = __riscv_vadd(vec_tabM, start * sizeof(T), vl);\n auto vec_src4 = rvv::vcvt_T_WT(rvv::vloxei_T(src, vec_tabM, vl), vl);\n __riscv_vse32(row + x, __riscv_vadd(__riscv_vadd(__riscv_vadd(vec_src0, vec_src4, vl), __riscv_vadd(vec_src2, vec_src2, vl), vl),\n __riscv_vsll(__riscv_vadd(__riscv_vadd(vec_src1, vec_src2, vl), vec_src3, vl), 2, vl), vl), vl);\n }\n }\n }\n};\ntemplate<> struct pyrDownVec0\n{\n void operator()(const float* src, float* row, const uint* tabM, int start, int end)\n {\n int vl;\n switch (start)\n {\n case 1:\n for( int x = start; x < end; x += vl )\n {\n vl = rvv::vsetvl_WT(end - x);\n auto vec_src0 = rvv::vlse_T(src + x * 2 - 2, 2 * sizeof(float), vl);\n auto vec_src1 = rvv::vlse_T(src + x * 2 - 1, 2 * sizeof(float), vl);\n auto vec_src2 = rvv::vlse_T(src + x * 2, 2 * sizeof(float), vl);\n auto vec_src3 = rvv::vlse_T(src + x * 2 + 1, 2 * sizeof(float), vl);\n auto vec_src4 = rvv::vlse_T(src + x * 2 + 2, 2 * sizeof(float), vl);\n __riscv_vse32(row + x, __riscv_vfmadd(vec_src2, 6, __riscv_vfmadd(__riscv_vfadd(vec_src1, vec_src3, vl), 4, __riscv_vfadd(vec_src0, vec_src4, vl), vl), vl), vl);\n }\n break;\n case 2:\n for( int x = start / 2; x < end / 2; x += vl )\n {\n vl = rvv::vsetvl_WT(end / 2 - x);\n auto vec_src0 = rvv::vlse_T(src + (x * 2 - 2) * 2, 4 * sizeof(float), vl);\n auto vec_src1 = rvv::vlse_T(src + (x * 2 - 1) * 2, 4 * sizeof(float), vl);\n auto vec_src2 = rvv::vlse_T(src + (x * 2) * 2, 4 * sizeof(float), vl);\n auto vec_src3 = rvv::vlse_T(src + (x * 2 + 1) * 2, 4 * sizeof(float), vl);\n auto vec_src4 = rvv::vlse_T(src + (x * 2 + 2) * 2, 4 * sizeof(float), vl);\n __riscv_vsse32(row + x * 2, 2 * sizeof(float), __riscv_vfmadd(vec_src2, 6, __riscv_vfmadd(__riscv_vfadd(vec_src1, vec_src3, vl), 4, __riscv_vfadd(vec_src0, vec_src4, vl), vl), vl), vl);\n vec_src0 = rvv::vlse_T(src + (x * 2 - 2) * 2 + 1, 4 * sizeof(float), vl);\n vec_src1 = rvv::vlse_T(src + (x * 2 - 1) * 2 + 1, 4 * sizeof(float), vl);\n vec_src2 = rvv::vlse_T(src + (x * 2) * 2 + 1, 4 * sizeof(float), vl);\n vec_src3 = rvv::vlse_T(src + (x * 2 + 1) * 2 + 1, 4 * sizeof(float), vl);\n vec_src4 = rvv::vlse_T(src + (x * 2 + 2) * 2 + 1, 4 * sizeof(float), vl);\n __riscv_vsse32(row + x * 2 + 1, 2 * sizeof(float), __riscv_vfmadd(vec_src2, 6, __riscv_vfmadd(__riscv_vfadd(vec_src1, vec_src3, vl), 4, __riscv_vfadd(vec_src0, vec_src4, vl), vl), vl), vl);\n }\n break;\n case 3:\n for( int x = start / 3; x < end / 3; x += vl )\n {\n vl = rvv::vsetvl_WT(end / 3 - x);\n auto vec_src0 = rvv::vlse_T(src + (x * 2 - 2) * 3, 6 * sizeof(float), vl);\n auto vec_src1 = rvv::vlse_T(src + (x * 2 - 1) * 3, 6 * sizeof(float), vl);\n auto vec_src2 = rvv::vlse_T(src + (x * 2) * 3, 6 * sizeof(float), vl);\n auto vec_src3 = rvv::vlse_T(src + (x * 2 + 1) * 3, 6 * sizeof(float), vl);\n auto vec_src4 = rvv::vlse_T(src + (x * 2 + 2) * 3, 6 * sizeof(float), vl);\n __riscv_vsse32(row + x * 3, 3 * sizeof(float), __riscv_vfmadd(vec_src2, 6, __riscv_vfmadd(__riscv_vfadd(vec_src1, vec_src3, vl), 4, __riscv_vfadd(vec_src0, vec_src4, vl), vl), vl), vl);\n vec_src0 = rvv::vlse_T(src + (x * 2 - 2) * 3 + 1, 6 * sizeof(float), vl);\n vec_src1 = rvv::vlse_T(src + (x * 2 - 1) * 3 + 1, 6 * sizeof(float), vl);\n vec_src2 = rvv::vlse_T(src + (x * 2) * 3 + 1, 6 * sizeof(float), vl);\n vec_src3 = rvv::vlse_T(src + (x * 2 + 1) * 3 + 1, 6 * sizeof(float), vl);\n vec_src4 = rvv::vlse_T(src + (x * 2 + 2) * 3 + 1, 6 * sizeof(float), vl);\n __riscv_vsse32(row + x * 3 + 1, 3 * sizeof(float), __riscv_vfmadd(vec_src2, 6, __riscv_vfmadd(__riscv_vfadd(vec_src1, vec_src3, vl), 4, __riscv_vfadd(vec_src0, vec_src4, vl), vl), vl), vl);\n vec_src0 = rvv::vlse_T(src + (x * 2 - 2) * 3 + 2, 6 * sizeof(float), vl);\n vec_src1 = rvv::vlse_T(src + (x * 2 - 1) * 3 + 2, 6 * sizeof(float), vl);\n vec_src2 = rvv::vlse_T(src + (x * 2) * 3 + 2, 6 * sizeof(float), vl);\n vec_src3 = rvv::vlse_T(src + (x * 2 + 1) * 3 + 2, 6 * sizeof(float), vl);\n vec_src4 = rvv::vlse_T(src + (x * 2 + 2) * 3 + 2, 6 * sizeof(float), vl);\n __riscv_vsse32(row + x * 3 + 2, 3 * sizeof(float), __riscv_vfmadd(vec_src2, 6, __riscv_vfmadd(__riscv_vfadd(vec_src1, vec_src3, vl), 4, __riscv_vfadd(vec_src0, vec_src4, vl), vl), vl), vl);\n }\n break;\n case 4:\n for( int x = start / 4; x < end / 4; x += vl )\n {\n vl = rvv::vsetvl_WT(end / 4 - x);\n auto vec_src0 = rvv::vlse_T(src + (x * 2 - 2) * 4, 8 * sizeof(float), vl);\n auto vec_src1 = rvv::vlse_T(src + (x * 2 - 1) * 4, 8 * sizeof(float), vl);\n auto vec_src2 = rvv::vlse_T(src + (x * 2) * 4, 8 * sizeof(float), vl);\n auto vec_src3 = rvv::vlse_T(src + (x * 2 + 1) * 4, 8 * sizeof(float), vl);\n auto vec_src4 = rvv::vlse_T(src + (x * 2 + 2) * 4, 8 * sizeof(float), vl);\n __riscv_vsse32(row + x * 4, 4 * sizeof(float), __riscv_vfmadd(vec_src2, 6, __riscv_vfmadd(__riscv_vfadd(vec_src1, vec_src3, vl), 4, __riscv_vfadd(vec_src0, vec_src4, vl), vl), vl), vl);\n vec_src0 = rvv::vlse_T(src + (x * 2 - 2) * 4 + 1, 8 * sizeof(float), vl);\n vec_src1 = rvv::vlse_T(src + (x * 2 - 1) * 4 + 1, 8 * sizeof(float), vl);\n vec_src2 = rvv::vlse_T(src + (x * 2) * 4 + 1, 8 * sizeof(float), vl);\n vec_src3 = rvv::vlse_T(src + (x * 2 + 1) * 4 + 1, 8 * sizeof(float), vl);\n vec_src4 = rvv::vlse_T(src + (x * 2 + 2) * 4 + 1, 8 * sizeof(float), vl);\n __riscv_vsse32(row + x * 4 + 1, 4 * sizeof(float), __riscv_vfmadd(vec_src2, 6, __riscv_vfmadd(__riscv_vfadd(vec_src1, vec_src3, vl), 4, __riscv_vfadd(vec_src0, vec_src4, vl), vl), vl), vl);\n vec_src0 = rvv::vlse_T(src + (x * 2 - 2) * 4 + 2, 8 * sizeof(float), vl);\n vec_src1 = rvv::vlse_T(src + (x * 2 - 1) * 4 + 2, 8 * sizeof(float), vl);\n vec_src2 = rvv::vlse_T(src + (x * 2) * 4 + 2, 8 * sizeof(float), vl);\n vec_src3 = rvv::vlse_T(src + (x * 2 + 1) * 4 + 2, 8 * sizeof(float), vl);\n vec_src4 = rvv::vlse_T(src + (x * 2 + 2) * 4 + 2, 8 * sizeof(float), vl);\n __riscv_vsse32(row + x * 4 + 2, 4 * sizeof(float), __riscv_vfmadd(vec_src2, 6, __riscv_vfmadd(__riscv_vfadd(vec_src1, vec_src3, vl), 4, __riscv_vfadd(vec_src0, vec_src4, vl), vl), vl), vl);\n vec_src0 = rvv::vlse_T(src + (x * 2 - 2) * 4 + 3, 8 * sizeof(float), vl);\n vec_src1 = rvv::vlse_T(src + (x * 2 - 1) * 4 + 3, 8 * sizeof(float), vl);\n vec_src2 = rvv::vlse_T(src + (x * 2) * 4 + 3, 8 * sizeof(float), vl);\n vec_src3 = rvv::vlse_T(src + (x * 2 + 1) * 4 + 3, 8 * sizeof(float), vl);\n vec_src4 = rvv::vlse_T(src + (x * 2 + 2) * 4 + 3, 8 * sizeof(float), vl);\n __riscv_vsse32(row + x * 4 + 3, 4 * sizeof(float), __riscv_vfmadd(vec_src2, 6, __riscv_vfmadd(__riscv_vfadd(vec_src1, vec_src3, vl), 4, __riscv_vfadd(vec_src0, vec_src4, vl), vl), vl), vl);\n }\n break;\n default:\n for( int x = start; x < end; x += vl )\n {\n vl = rvv::vsetvl_WT(end - x);\n auto vec_tabM = rvv::vle_M(tabM + x, vl);\n vec_tabM = __riscv_vmul(__riscv_vsub(vec_tabM, start * 2, vl), sizeof(float), vl);\n auto vec_src0 = rvv::vloxei_T(src, vec_tabM, vl);\n vec_tabM = __riscv_vadd(vec_tabM, start * sizeof(float), vl);\n auto vec_src1 = rvv::vloxei_T(src, vec_tabM, vl);\n vec_tabM = __riscv_vadd(vec_tabM, start * sizeof(float), vl);\n auto vec_src2 = rvv::vloxei_T(src, vec_tabM, vl);\n vec_tabM = __riscv_vadd(vec_tabM, start * sizeof(float), vl);\n auto vec_src3 = rvv::vloxei_T(src, vec_tabM, vl);\n vec_tabM = __riscv_vadd(vec_tabM, start * sizeof(float), vl);\n auto vec_src4 = rvv::vloxei_T(src, vec_tabM, vl);\n __riscv_vse32(row + x, __riscv_vfmadd(__riscv_vfadd(__riscv_vfadd(vec_src1, vec_src2, vl), vec_src3, vl), 4,\n __riscv_vfadd(__riscv_vfadd(vec_src0, vec_src4, vl), __riscv_vfadd(vec_src2, vec_src2, vl), vl), vl), vl);\n }\n }\n }\n};\n\ntemplate struct pyrDownVec1\n{\n void operator()(WT* row0, WT* row1, WT* row2, WT* row3, WT* row4, T* dst, int end)\n {\n int vl;\n for( int x = 0 ; x < end; x += vl )\n {\n vl = rvv::vsetvl_WT(end - x);\n auto vec_src0 = rvv::vle_WT(row0 + x, vl);\n auto vec_src1 = rvv::vle_WT(row1 + x, vl);\n auto vec_src2 = rvv::vle_WT(row2 + x, vl);\n auto vec_src3 = rvv::vle_WT(row3 + x, vl);\n auto vec_src4 = rvv::vle_WT(row4 + x, vl);\n rvv::vse_T(dst + x, rvv::vcvt_WT_T(__riscv_vadd(__riscv_vadd(__riscv_vadd(vec_src0, vec_src4, vl), __riscv_vadd(vec_src2, vec_src2, vl), vl),\n __riscv_vsll(__riscv_vadd(__riscv_vadd(vec_src1, vec_src2, vl), vec_src3, vl), 2, vl), vl), 8, vl), vl);\n }\n }\n};\ntemplate<> struct pyrDownVec1\n{\n void operator()(float* row0, float* row1, float* row2, float* row3, float* row4, float* dst, int end)\n {\n int vl;\n for( int x = 0 ; x < end; x += vl )\n {\n vl = rvv::vsetvl_WT(end - x);\n auto vec_src0 = rvv::vle_WT(row0 + x, vl);\n auto vec_src1 = rvv::vle_WT(row1 + x, vl);\n auto vec_src2 = rvv::vle_WT(row2 + x, vl);\n auto vec_src3 = rvv::vle_WT(row3 + x, vl);\n auto vec_src4 = rvv::vle_WT(row4 + x, vl);\n rvv::vse_T(dst + x, __riscv_vfmul(__riscv_vfmadd(vec_src2, 6, __riscv_vfmadd(__riscv_vfadd(vec_src1, vec_src3, vl), 4, __riscv_vfadd(vec_src0, vec_src4, vl), vl), vl), 1.f / 256.f, vl), vl);\n }\n }\n};\n\ntemplate struct pyrUpVec0\n{\n void operator()(const T* src, WT* row, const uint* dtab, int start, int end)\n {\n int vl;\n for( int x = start; x < end; x += vl )\n {\n vl = rvv::vsetvl_WT(end - x);\n auto vec_src0 = rvv::vcvt_T_WT(rvv::vle_T(src + x - start, vl), vl);\n auto vec_src1 = rvv::vcvt_T_WT(rvv::vle_T(src + x, vl), vl);\n auto vec_src2 = rvv::vcvt_T_WT(rvv::vle_T(src + x + start, vl), vl);\n\n auto vec_dtab = rvv::vle_M(dtab + x, vl);\n vec_dtab = __riscv_vmul(vec_dtab, sizeof(WT), vl);\n __riscv_vsoxei32(row, vec_dtab, __riscv_vadd(__riscv_vadd(vec_src0, vec_src2, vl), __riscv_vadd(__riscv_vsll(vec_src1, 2, vl), __riscv_vsll(vec_src1, 1, vl), vl), vl), vl);\n __riscv_vsoxei32(row, __riscv_vadd(vec_dtab, start * sizeof(WT), vl), __riscv_vsll(__riscv_vadd(vec_src1, vec_src2, vl), 2, vl), vl);\n }\n }\n};\ntemplate<> struct pyrUpVec0\n{\n void operator()(const float* src, float* row, const uint* dtab, int start, int end)\n {\n int vl;\n for( int x = start; x < end; x += vl )\n {\n vl = rvv::vsetvl_WT(end - x);\n auto vec_src0 = rvv::vle_T(src + x - start, vl);\n auto vec_src1 = rvv::vle_T(src + x, vl);\n auto vec_src2 = rvv::vle_T(src + x + start, vl);\n\n auto vec_dtab = rvv::vle_M(dtab + x, vl);\n vec_dtab = __riscv_vmul(vec_dtab, sizeof(float), vl);\n __riscv_vsoxei32(row, vec_dtab, __riscv_vfadd(__riscv_vfmadd(vec_src1, 6, vec_src0, vl), vec_src2, vl), vl);\n __riscv_vsoxei32(row, __riscv_vadd(vec_dtab, start * sizeof(float), vl), __riscv_vfmul(__riscv_vfadd(vec_src1, vec_src2, vl), 4, vl), vl);\n }\n }\n};\n\ntemplate struct pyrUpVec1\n{\n void operator()(WT* row0, WT* row1, WT* row2, T* dst0, T* dst1, int end)\n {\n int vl;\n if (dst0 != dst1)\n {\n for( int x = 0 ; x < end; x += vl )\n {\n vl = rvv::vsetvl_WT(end - x);\n auto vec_src0 = rvv::vle_WT(row0 + x, vl);\n auto vec_src1 = rvv::vle_WT(row1 + x, vl);\n auto vec_src2 = rvv::vle_WT(row2 + x, vl);\n rvv::vse_T(dst0 + x, rvv::vcvt_WT_T(__riscv_vadd(__riscv_vadd(vec_src0, vec_src2, vl), __riscv_vadd(__riscv_vsll(vec_src1, 2, vl), __riscv_vsll(vec_src1, 1, vl), vl), vl), 6, vl), vl);\n rvv::vse_T(dst1 + x, rvv::vcvt_WT_T(__riscv_vsll(__riscv_vadd(vec_src1, vec_src2, vl), 2, vl), 6, vl), vl);\n }\n }\n else\n {\n for( int x = 0 ; x < end; x += vl )\n {\n vl = rvv::vsetvl_WT(end - x);\n auto vec_src0 = rvv::vle_WT(row0 + x, vl);\n auto vec_src1 = rvv::vle_WT(row1 + x, vl);\n auto vec_src2 = rvv::vle_WT(row2 + x, vl);\n rvv::vse_T(dst0 + x, rvv::vcvt_WT_T(__riscv_vadd(__riscv_vadd(vec_src0, vec_src2, vl), __riscv_vadd(__riscv_vsll(vec_src1, 2, vl), __riscv_vsll(vec_src1, 1, vl), vl), vl), 6, vl), vl);\n }\n }\n }\n};\ntemplate<> struct pyrUpVec1\n{\n void operator()(float* row0, float* row1, float* row2, float* dst0, float* dst1, int end)\n {\n int vl;\n if (dst0 != dst1)\n {\n for( int x = 0 ; x < end; x += vl )\n {\n vl = rvv::vsetvl_WT(end - x);\n auto vec_src0 = rvv::vle_WT(row0 + x, vl);\n auto vec_src1 = rvv::vle_WT(row1 + x, vl);\n auto vec_src2 = rvv::vle_WT(row2 + x, vl);\n rvv::vse_T(dst0 + x, __riscv_vfmul(__riscv_vfadd(__riscv_vfmadd(vec_src1, 6, vec_src0, vl), vec_src2, vl), 1.f / 64.f, vl), vl);\n rvv::vse_T(dst1 + x, __riscv_vfmul(__riscv_vfadd(vec_src1, vec_src2, vl), 1.f / 16.f, vl), vl);\n }\n }\n else\n {\n for( int x = 0 ; x < end; x += vl )\n {\n vl = rvv::vsetvl_WT(end - x);\n auto vec_src0 = rvv::vle_WT(row0 + x, vl);\n auto vec_src1 = rvv::vle_WT(row1 + x, vl);\n auto vec_src2 = rvv::vle_WT(row2 + x, vl);\n rvv::vse_T(dst0 + x, __riscv_vfmul(__riscv_vfadd(__riscv_vfmadd(vec_src1, 6, vec_src0, vl), vec_src2, vl), 1.f / 64.f, vl), vl);\n }\n }\n }\n};\n\ntemplate\nstruct PyrDownInvoker : ParallelLoopBody\n{\n PyrDownInvoker(const uchar* _src_data, size_t _src_step, int _src_width, int _src_height, uchar* _dst_data, size_t _dst_step, int _dst_width, int _dst_height, int _cn, int _borderType, int* _tabR, int* _tabM, int* _tabL)\n {\n src_data = _src_data;\n src_step = _src_step;\n src_width = _src_width;\n src_height = _src_height;\n dst_data = _dst_data;\n dst_step = _dst_step;\n dst_width = _dst_width;\n dst_height = _dst_height;\n cn = _cn;\n borderType = _borderType;\n tabR = _tabR;\n tabM = _tabM;\n tabL = _tabL;\n }\n\n void operator()(const Range& range) const CV_OVERRIDE;\n\n const uchar* src_data;\n size_t src_step;\n int src_width;\n int src_height;\n uchar* dst_data;\n size_t dst_step;\n int dst_width;\n int dst_height;\n int cn;\n int borderType;\n int* tabR;\n int* tabM;\n int* tabL;\n};\n\n// the algorithm is copied from imgproc/src/pyramids.cpp,\n// in the function template void cv::pyrDown_\ntemplate\ninline int pyrDown(const uchar* src_data, size_t src_step, int src_width, int src_height, uchar* dst_data, size_t dst_step, int dst_width, int dst_height, int cn, int borderType)\n{\n const int PD_SZ = 5;\n\n std::vector _tabM(dst_width * cn), _tabL(cn * (PD_SZ + 2)), _tabR(cn * (PD_SZ + 2));\n int *tabM = _tabM.data(), *tabL = _tabL.data(), *tabR = _tabR.data();\n\n CV_Assert( src_width > 0 && src_height > 0 &&\n std::abs(dst_width*2 - src_width) <= 2 &&\n std::abs(dst_height*2 - src_height) <= 2 );\n int width0 = std::min((src_width-PD_SZ/2-1)/2 + 1, dst_width);\n\n for (int x = 0; x <= PD_SZ+1; x++)\n {\n int sx0 = borderInterpolate(x - PD_SZ/2, src_width, borderType)*cn;\n int sx1 = borderInterpolate(x + width0*2 - PD_SZ/2, src_width, borderType)*cn;\n for (int k = 0; k < cn; k++)\n {\n tabL[x*cn + k] = sx0 + k;\n tabR[x*cn + k] = sx1 + k;\n }\n }\n\n for (int x = 0; x < dst_width*cn; x++)\n tabM[x] = (x/cn)*2*cn + x % cn;\n\n cv::parallel_for_(Range(0,dst_height), PyrDownInvoker(src_data, src_step, src_width, src_height, dst_data, dst_step, dst_width, dst_height, cn, borderType, tabR, tabM, tabL), cv::getNumThreads());\n return CV_HAL_ERROR_OK;\n}\n\ntemplate\nvoid PyrDownInvoker::operator()(const Range& range) const\n{\n const int PD_SZ = 5;\n\n int bufstep = (dst_width*cn + 15) & -16;\n std::vector _buf(bufstep*PD_SZ + 16);\n WT* buf = (WT*)(((size_t)_buf.data() + 15) & -16);\n WT* rows[PD_SZ];\n\n int sy0 = -PD_SZ/2, sy = range.start * 2 + sy0, width0 = std::min((src_width-PD_SZ/2-1)/2 + 1, dst_width);\n\n int _dst_width = dst_width * cn;\n width0 *= cn;\n\n for (int y = range.start; y < range.end; y++)\n {\n T* dst = reinterpret_cast(dst_data + dst_step * y);\n WT *row0, *row1, *row2, *row3, *row4;\n\n // fill the ring buffer (horizontal convolution and decimation)\n int sy_limit = y*2 + 2;\n for( ; sy <= sy_limit; sy++ )\n {\n WT* row = buf + ((sy - sy0) % PD_SZ)*bufstep;\n int _sy = borderInterpolate(sy, src_height, borderType);\n const T* src = reinterpret_cast(src_data + src_step * _sy);\n\n do {\n int x = 0;\n for( ; x < cn; x++ )\n {\n row[x] = src[tabL[x+cn*2]]*6 + (src[tabL[x+cn]] + src[tabL[x+cn*3]])*4 +\n src[tabL[x]] + src[tabL[x+cn*4]];\n }\n\n if( x == _dst_width )\n break;\n\n pyrDownVec0()(src, row, reinterpret_cast(tabM), cn, width0);\n x = width0;\n\n // tabR\n for (int x_ = 0; x < _dst_width; x++, x_++)\n {\n row[x] = src[tabR[x_+cn*2]]*6 + (src[tabR[x_+cn]] + src[tabR[x_+cn*3]])*4 +\n src[tabR[x_]] + src[tabR[x_+cn*4]];\n }\n } while (0);\n }\n\n // do vertical convolution and decimation and write the result to the destination image\n for (int k = 0; k < PD_SZ; k++)\n rows[k] = buf + ((y*2 - PD_SZ/2 + k - sy0) % PD_SZ)*bufstep;\n row0 = rows[0]; row1 = rows[1]; row2 = rows[2]; row3 = rows[3]; row4 = rows[4];\n\n pyrDownVec1()(row0, row1, row2, row3, row4, dst, _dst_width);\n }\n}\n\n// the algorithm is copied from imgproc/src/pyramids.cpp,\n// in the function template void cv::pyrUp_\ntemplate\ninline int pyrUp(const uchar* src_data, size_t src_step, int src_width, int src_height, uchar* dst_data, size_t dst_step, int dst_width, int dst_height, int cn)\n{\n const int PU_SZ = 3;\n\n int bufstep = ((dst_width+1)*cn + 15) & -16;\n std::vector _buf(bufstep*PU_SZ + 16);\n WT* buf = (WT*)(((size_t)_buf.data() + 15) & -16);\n std::vector _dtab(src_width*cn);\n int* dtab = _dtab.data();\n WT* rows[PU_SZ];\n\n CV_Assert( std::abs(dst_width - src_width*2) == dst_width % 2 &&", + "repo_full_name": "opencv/opencv", + "discussion_comments": [ + { + "comment_id": "1975939795", + "repo_full_name": "opencv/opencv", + "pr_number": 26958, + "pr_file": "3rdparty/hal_rvv/hal_rvv_1p0/pyramids.hpp", + "discussion_id": "1975939795", + "commented_code": "@@ -0,0 +1,681 @@\n+// This file is part of OpenCV project.\n+// It is subject to the license terms in the LICENSE file found in the top-level directory\n+// of this distribution and at http://opencv.org/license.html.\n+#ifndef OPENCV_HAL_RVV_PYRAMIDS_HPP_INCLUDED\n+#define OPENCV_HAL_RVV_PYRAMIDS_HPP_INCLUDED\n+\n+#include \n+\n+namespace cv { namespace cv_hal_rvv { namespace pyramids {\n+\n+#undef cv_hal_pyrdown\n+#define cv_hal_pyrdown cv::cv_hal_rvv::pyramids::pyrDown\n+#undef cv_hal_pyrup\n+#define cv_hal_pyrup cv::cv_hal_rvv::pyramids::pyrUp\n+\n+template struct rvv;\n+\n+template<> struct rvv\n+{\n+ static inline size_t vsetvl_WT(size_t a) { return __riscv_vsetvl_e32m4(a); }\n+ static inline vuint8m1_t vle_T(const uchar* a, size_t b) { return __riscv_vle8_v_u8m1(a, b); }\n+ static inline vint32m4_t vle_WT(const int* a, size_t b) { return __riscv_vle32_v_i32m4(a, b); }\n+ static inline vuint32m4_t vle_M(const uint* a, size_t b) { return __riscv_vle32_v_u32m4(a, b); }\n+ static inline vuint8m1_t vlse_T(const uchar* a, ptrdiff_t b, size_t c) { return __riscv_vlse8_v_u8m1(a, b, c); }\n+ static inline vuint8m1_t vloxei_T(const uchar* a, vuint32m4_t b, size_t c) { return __riscv_vloxei32_v_u8m1(a, b, c); }\n+ static inline void vse_T(uchar* a, vuint8m1_t b, size_t c) { return __riscv_vse8(a, b, c); }\n+ static inline vint32m4_t vcvt_T_WT(vuint8m1_t a, size_t b) { return __riscv_vreinterpret_v_u32m4_i32m4(__riscv_vzext_vf4(a, b)); }\n+ static inline vuint8m1_t vcvt_WT_T(vint32m4_t a, int b, size_t c) { return __riscv_vncvt_x(__riscv_vncvt_x(__riscv_vreinterpret_v_i32m4_u32m4(__riscv_vsra(__riscv_vadd(a, 1 << (b - 1), c), b, c)), c), c); }\n+};\n+\n+template<> struct rvv\n+{\n+ static inline size_t vsetvl_WT(size_t a) { return __riscv_vsetvl_e32m4(a); }\n+ static inline vint16m2_t vle_T(const short* a, size_t b) { return __riscv_vle16_v_i16m2(a, b); }\n+ static inline vint32m4_t vle_WT(const int* a, size_t b) { return __riscv_vle32_v_i32m4(a, b); }\n+ static inline vuint32m4_t vle_M(const uint* a, size_t b) { return __riscv_vle32_v_u32m4(a, b); }\n+ static inline vint16m2_t vlse_T(const short* a, ptrdiff_t b, size_t c) { return __riscv_vlse16_v_i16m2(a, b, c); }\n+ static inline vint16m2_t vloxei_T(const short* a, vuint32m4_t b, size_t c) { return __riscv_vloxei32_v_i16m2(a, b, c); }\n+ static inline void vse_T(short* a, vint16m2_t b, size_t c) { return __riscv_vse16(a, b, c); }\n+ static inline vint32m4_t vcvt_T_WT(vint16m2_t a, size_t b) { return __riscv_vsext_vf2(a, b); }\n+ static inline vint16m2_t vcvt_WT_T(vint32m4_t a, int b, size_t c) { return __riscv_vncvt_x(__riscv_vsra(__riscv_vadd(a, 1 << (b - 1), c), b, c), c); }\n+};\n+\n+template<> struct rvv\n+{\n+ static inline size_t vsetvl_WT(size_t a) { return __riscv_vsetvl_e32m4(a); }\n+ static inline vfloat32m4_t vle_T(const float* a, size_t b) { return __riscv_vle32_v_f32m4(a, b); }\n+ static inline vfloat32m4_t vle_WT(const float* a, size_t b) { return __riscv_vle32_v_f32m4(a, b); }\n+ static inline vuint32m4_t vle_M(const uint* a, size_t b) { return __riscv_vle32_v_u32m4(a, b); }\n+ static inline vfloat32m4_t vlse_T(const float* a, ptrdiff_t b, size_t c) { return __riscv_vlse32_v_f32m4(a, b, c); }\n+ static inline vfloat32m4_t vloxei_T(const float* a, vuint32m4_t b, size_t c) { return __riscv_vloxei32_v_f32m4(a, b, c); }\n+ static inline void vse_T(float* a, vfloat32m4_t b, size_t c) { return __riscv_vse32(a, b, c); }\n+};\n+\n+template struct pyrDownVec0\n+{\n+ void operator()(const T* src, WT* row, const uint* tabM, int start, int end)\n+ {\n+ int vl;\n+ switch (start)\n+ {\n+ case 1:\n+ for( int x = start; x < end; x += vl )\n+ {\n+ vl = rvv::vsetvl_WT(end - x);\n+ auto vec_src0 = rvv::vcvt_T_WT(rvv::vlse_T(src + x * 2 - 2, 2 * sizeof(T), vl), vl);\n+ auto vec_src1 = rvv::vcvt_T_WT(rvv::vlse_T(src + x * 2 - 1, 2 * sizeof(T), vl), vl);\n+ auto vec_src2 = rvv::vcvt_T_WT(rvv::vlse_T(src + x * 2, 2 * sizeof(T), vl), vl);\n+ auto vec_src3 = rvv::vcvt_T_WT(rvv::vlse_T(src + x * 2 + 1, 2 * sizeof(T), vl), vl);\n+ auto vec_src4 = rvv::vcvt_T_WT(rvv::vlse_T(src + x * 2 + 2, 2 * sizeof(T), vl), vl);\n+ __riscv_vse32(row + x, __riscv_vadd(__riscv_vadd(__riscv_vadd(vec_src0, vec_src4, vl), __riscv_vadd(vec_src2, vec_src2, vl), vl),\n+ __riscv_vsll(__riscv_vadd(__riscv_vadd(vec_src1, vec_src2, vl), vec_src3, vl), 2, vl), vl), vl);\n+ }\n+ break;\n+ case 2:\n+ for( int x = start / 2; x < end / 2; x += vl )\n+ {\n+ vl = rvv::vsetvl_WT(end / 2 - x);\n+ auto vec_src0 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 - 2) * 2, 4 * sizeof(T), vl), vl);\n+ auto vec_src1 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 - 1) * 2, 4 * sizeof(T), vl), vl);\n+ auto vec_src2 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2) * 2, 4 * sizeof(T), vl), vl);\n+ auto vec_src3 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 + 1) * 2, 4 * sizeof(T), vl), vl);\n+ auto vec_src4 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 + 2) * 2, 4 * sizeof(T), vl), vl);\n+ __riscv_vsse32(row + x * 2, 2 * sizeof(WT), __riscv_vadd(__riscv_vadd(__riscv_vadd(vec_src0, vec_src4, vl), __riscv_vadd(vec_src2, vec_src2, vl), vl),\n+ __riscv_vsll(__riscv_vadd(__riscv_vadd(vec_src1, vec_src2, vl), vec_src3, vl), 2, vl), vl), vl);\n+ vec_src0 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 - 2) * 2 + 1, 4 * sizeof(T), vl), vl);\n+ vec_src1 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 - 1) * 2 + 1, 4 * sizeof(T), vl), vl);\n+ vec_src2 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2) * 2 + 1, 4 * sizeof(T), vl), vl);\n+ vec_src3 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 + 1) * 2 + 1, 4 * sizeof(T), vl), vl);\n+ vec_src4 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 + 2) * 2 + 1, 4 * sizeof(T), vl), vl);\n+ __riscv_vsse32(row + x * 2 + 1, 2 * sizeof(WT), __riscv_vadd(__riscv_vadd(__riscv_vadd(vec_src0, vec_src4, vl), __riscv_vadd(vec_src2, vec_src2, vl), vl),\n+ __riscv_vsll(__riscv_vadd(__riscv_vadd(vec_src1, vec_src2, vl), vec_src3, vl), 2, vl), vl), vl);\n+ }\n+ break;\n+ case 3:\n+ for( int x = start / 3; x < end / 3; x += vl )\n+ {\n+ vl = rvv::vsetvl_WT(end / 3 - x);\n+ auto vec_src0 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 - 2) * 3, 6 * sizeof(T), vl), vl);\n+ auto vec_src1 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 - 1) * 3, 6 * sizeof(T), vl), vl);\n+ auto vec_src2 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2) * 3, 6 * sizeof(T), vl), vl);\n+ auto vec_src3 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 + 1) * 3, 6 * sizeof(T), vl), vl);\n+ auto vec_src4 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 + 2) * 3, 6 * sizeof(T), vl), vl);\n+ __riscv_vsse32(row + x * 3, 3 * sizeof(WT), __riscv_vadd(__riscv_vadd(__riscv_vadd(vec_src0, vec_src4, vl), __riscv_vadd(vec_src2, vec_src2, vl), vl),\n+ __riscv_vsll(__riscv_vadd(__riscv_vadd(vec_src1, vec_src2, vl), vec_src3, vl), 2, vl), vl), vl);\n+ vec_src0 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 - 2) * 3 + 1, 6 * sizeof(T), vl), vl);\n+ vec_src1 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 - 1) * 3 + 1, 6 * sizeof(T), vl), vl);\n+ vec_src2 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2) * 3 + 1, 6 * sizeof(T), vl), vl);\n+ vec_src3 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 + 1) * 3 + 1, 6 * sizeof(T), vl), vl);\n+ vec_src4 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 + 2) * 3 + 1, 6 * sizeof(T), vl), vl);\n+ __riscv_vsse32(row + x * 3 + 1, 3 * sizeof(WT), __riscv_vadd(__riscv_vadd(__riscv_vadd(vec_src0, vec_src4, vl), __riscv_vadd(vec_src2, vec_src2, vl), vl),\n+ __riscv_vsll(__riscv_vadd(__riscv_vadd(vec_src1, vec_src2, vl), vec_src3, vl), 2, vl), vl), vl);\n+ vec_src0 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 - 2) * 3 + 2, 6 * sizeof(T), vl), vl);\n+ vec_src1 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 - 1) * 3 + 2, 6 * sizeof(T), vl), vl);\n+ vec_src2 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2) * 3 + 2, 6 * sizeof(T), vl), vl);\n+ vec_src3 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 + 1) * 3 + 2, 6 * sizeof(T), vl), vl);\n+ vec_src4 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 + 2) * 3 + 2, 6 * sizeof(T), vl), vl);\n+ __riscv_vsse32(row + x * 3 + 2, 3 * sizeof(WT), __riscv_vadd(__riscv_vadd(__riscv_vadd(vec_src0, vec_src4, vl), __riscv_vadd(vec_src2, vec_src2, vl), vl),\n+ __riscv_vsll(__riscv_vadd(__riscv_vadd(vec_src1, vec_src2, vl), vec_src3, vl), 2, vl), vl), vl);\n+ }\n+ break;\n+ case 4:\n+ for( int x = start / 4; x < end / 4; x += vl )\n+ {\n+ vl = rvv::vsetvl_WT(end / 4 - x);\n+ auto vec_src0 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 - 2) * 4, 8 * sizeof(T), vl), vl);\n+ auto vec_src1 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 - 1) * 4, 8 * sizeof(T), vl), vl);\n+ auto vec_src2 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2) * 4, 8 * sizeof(T), vl), vl);\n+ auto vec_src3 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 + 1) * 4, 8 * sizeof(T), vl), vl);\n+ auto vec_src4 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 + 2) * 4, 8 * sizeof(T), vl), vl);\n+ __riscv_vsse32(row + x * 4, 4 * sizeof(WT), __riscv_vadd(__riscv_vadd(__riscv_vadd(vec_src0, vec_src4, vl), __riscv_vadd(vec_src2, vec_src2, vl), vl),\n+ __riscv_vsll(__riscv_vadd(__riscv_vadd(vec_src1, vec_src2, vl), vec_src3, vl), 2, vl), vl), vl);\n+ vec_src0 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 - 2) * 4 + 1, 8 * sizeof(T), vl), vl);\n+ vec_src1 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 - 1) * 4 + 1, 8 * sizeof(T), vl), vl);\n+ vec_src2 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2) * 4 + 1, 8 * sizeof(T), vl), vl);\n+ vec_src3 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 + 1) * 4 + 1, 8 * sizeof(T), vl), vl);\n+ vec_src4 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 + 2) * 4 + 1, 8 * sizeof(T), vl), vl);\n+ __riscv_vsse32(row + x * 4 + 1, 4 * sizeof(WT), __riscv_vadd(__riscv_vadd(__riscv_vadd(vec_src0, vec_src4, vl), __riscv_vadd(vec_src2, vec_src2, vl), vl),\n+ __riscv_vsll(__riscv_vadd(__riscv_vadd(vec_src1, vec_src2, vl), vec_src3, vl), 2, vl), vl), vl);\n+ vec_src0 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 - 2) * 4 + 2, 8 * sizeof(T), vl), vl);\n+ vec_src1 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 - 1) * 4 + 2, 8 * sizeof(T), vl), vl);\n+ vec_src2 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2) * 4 + 2, 8 * sizeof(T), vl), vl);\n+ vec_src3 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 + 1) * 4 + 2, 8 * sizeof(T), vl), vl);\n+ vec_src4 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 + 2) * 4 + 2, 8 * sizeof(T), vl), vl);\n+ __riscv_vsse32(row + x * 4 + 2, 4 * sizeof(WT), __riscv_vadd(__riscv_vadd(__riscv_vadd(vec_src0, vec_src4, vl), __riscv_vadd(vec_src2, vec_src2, vl), vl),\n+ __riscv_vsll(__riscv_vadd(__riscv_vadd(vec_src1, vec_src2, vl), vec_src3, vl), 2, vl), vl), vl);\n+ vec_src0 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 - 2) * 4 + 3, 8 * sizeof(T), vl), vl);\n+ vec_src1 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 - 1) * 4 + 3, 8 * sizeof(T), vl), vl);\n+ vec_src2 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2) * 4 + 3, 8 * sizeof(T), vl), vl);\n+ vec_src3 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 + 1) * 4 + 3, 8 * sizeof(T), vl), vl);\n+ vec_src4 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 + 2) * 4 + 3, 8 * sizeof(T), vl), vl);\n+ __riscv_vsse32(row + x * 4 + 3, 4 * sizeof(WT), __riscv_vadd(__riscv_vadd(__riscv_vadd(vec_src0, vec_src4, vl), __riscv_vadd(vec_src2, vec_src2, vl), vl),\n+ __riscv_vsll(__riscv_vadd(__riscv_vadd(vec_src1, vec_src2, vl), vec_src3, vl), 2, vl), vl), vl);\n+ }\n+ break;\n+ default:\n+ for( int x = start; x < end; x += vl )\n+ {\n+ vl = rvv::vsetvl_WT(end - x);\n+ auto vec_tabM = rvv::vle_M(tabM + x, vl);\n+ vec_tabM = __riscv_vmul(__riscv_vsub(vec_tabM, start * 2, vl), sizeof(T), vl);\n+ auto vec_src0 = rvv::vcvt_T_WT(rvv::vloxei_T(src, vec_tabM, vl), vl);\n+ vec_tabM = __riscv_vadd(vec_tabM, start * sizeof(T), vl);\n+ auto vec_src1 = rvv::vcvt_T_WT(rvv::vloxei_T(src, vec_tabM, vl), vl);\n+ vec_tabM = __riscv_vadd(vec_tabM, start * sizeof(T), vl);\n+ auto vec_src2 = rvv::vcvt_T_WT(rvv::vloxei_T(src, vec_tabM, vl), vl);\n+ vec_tabM = __riscv_vadd(vec_tabM, start * sizeof(T), vl);\n+ auto vec_src3 = rvv::vcvt_T_WT(rvv::vloxei_T(src, vec_tabM, vl), vl);\n+ vec_tabM = __riscv_vadd(vec_tabM, start * sizeof(T), vl);\n+ auto vec_src4 = rvv::vcvt_T_WT(rvv::vloxei_T(src, vec_tabM, vl), vl);\n+ __riscv_vse32(row + x, __riscv_vadd(__riscv_vadd(__riscv_vadd(vec_src0, vec_src4, vl), __riscv_vadd(vec_src2, vec_src2, vl), vl),\n+ __riscv_vsll(__riscv_vadd(__riscv_vadd(vec_src1, vec_src2, vl), vec_src3, vl), 2, vl), vl), vl);\n+ }\n+ }\n+ }\n+};\n+template<> struct pyrDownVec0\n+{\n+ void operator()(const float* src, float* row, const uint* tabM, int start, int end)\n+ {\n+ int vl;\n+ switch (start)\n+ {\n+ case 1:\n+ for( int x = start; x < end; x += vl )\n+ {\n+ vl = rvv::vsetvl_WT(end - x);\n+ auto vec_src0 = rvv::vlse_T(src + x * 2 - 2, 2 * sizeof(float), vl);\n+ auto vec_src1 = rvv::vlse_T(src + x * 2 - 1, 2 * sizeof(float), vl);\n+ auto vec_src2 = rvv::vlse_T(src + x * 2, 2 * sizeof(float), vl);\n+ auto vec_src3 = rvv::vlse_T(src + x * 2 + 1, 2 * sizeof(float), vl);\n+ auto vec_src4 = rvv::vlse_T(src + x * 2 + 2, 2 * sizeof(float), vl);\n+ __riscv_vse32(row + x, __riscv_vfmadd(vec_src2, 6, __riscv_vfmadd(__riscv_vfadd(vec_src1, vec_src3, vl), 4, __riscv_vfadd(vec_src0, vec_src4, vl), vl), vl), vl);\n+ }\n+ break;\n+ case 2:\n+ for( int x = start / 2; x < end / 2; x += vl )\n+ {\n+ vl = rvv::vsetvl_WT(end / 2 - x);\n+ auto vec_src0 = rvv::vlse_T(src + (x * 2 - 2) * 2, 4 * sizeof(float), vl);\n+ auto vec_src1 = rvv::vlse_T(src + (x * 2 - 1) * 2, 4 * sizeof(float), vl);\n+ auto vec_src2 = rvv::vlse_T(src + (x * 2) * 2, 4 * sizeof(float), vl);\n+ auto vec_src3 = rvv::vlse_T(src + (x * 2 + 1) * 2, 4 * sizeof(float), vl);\n+ auto vec_src4 = rvv::vlse_T(src + (x * 2 + 2) * 2, 4 * sizeof(float), vl);\n+ __riscv_vsse32(row + x * 2, 2 * sizeof(float), __riscv_vfmadd(vec_src2, 6, __riscv_vfmadd(__riscv_vfadd(vec_src1, vec_src3, vl), 4, __riscv_vfadd(vec_src0, vec_src4, vl), vl), vl), vl);\n+ vec_src0 = rvv::vlse_T(src + (x * 2 - 2) * 2 + 1, 4 * sizeof(float), vl);\n+ vec_src1 = rvv::vlse_T(src + (x * 2 - 1) * 2 + 1, 4 * sizeof(float), vl);\n+ vec_src2 = rvv::vlse_T(src + (x * 2) * 2 + 1, 4 * sizeof(float), vl);\n+ vec_src3 = rvv::vlse_T(src + (x * 2 + 1) * 2 + 1, 4 * sizeof(float), vl);\n+ vec_src4 = rvv::vlse_T(src + (x * 2 + 2) * 2 + 1, 4 * sizeof(float), vl);\n+ __riscv_vsse32(row + x * 2 + 1, 2 * sizeof(float), __riscv_vfmadd(vec_src2, 6, __riscv_vfmadd(__riscv_vfadd(vec_src1, vec_src3, vl), 4, __riscv_vfadd(vec_src0, vec_src4, vl), vl), vl), vl);\n+ }\n+ break;\n+ case 3:\n+ for( int x = start / 3; x < end / 3; x += vl )\n+ {\n+ vl = rvv::vsetvl_WT(end / 3 - x);\n+ auto vec_src0 = rvv::vlse_T(src + (x * 2 - 2) * 3, 6 * sizeof(float), vl);\n+ auto vec_src1 = rvv::vlse_T(src + (x * 2 - 1) * 3, 6 * sizeof(float), vl);\n+ auto vec_src2 = rvv::vlse_T(src + (x * 2) * 3, 6 * sizeof(float), vl);\n+ auto vec_src3 = rvv::vlse_T(src + (x * 2 + 1) * 3, 6 * sizeof(float), vl);\n+ auto vec_src4 = rvv::vlse_T(src + (x * 2 + 2) * 3, 6 * sizeof(float), vl);\n+ __riscv_vsse32(row + x * 3, 3 * sizeof(float), __riscv_vfmadd(vec_src2, 6, __riscv_vfmadd(__riscv_vfadd(vec_src1, vec_src3, vl), 4, __riscv_vfadd(vec_src0, vec_src4, vl), vl), vl), vl);\n+ vec_src0 = rvv::vlse_T(src + (x * 2 - 2) * 3 + 1, 6 * sizeof(float), vl);\n+ vec_src1 = rvv::vlse_T(src + (x * 2 - 1) * 3 + 1, 6 * sizeof(float), vl);\n+ vec_src2 = rvv::vlse_T(src + (x * 2) * 3 + 1, 6 * sizeof(float), vl);\n+ vec_src3 = rvv::vlse_T(src + (x * 2 + 1) * 3 + 1, 6 * sizeof(float), vl);\n+ vec_src4 = rvv::vlse_T(src + (x * 2 + 2) * 3 + 1, 6 * sizeof(float), vl);\n+ __riscv_vsse32(row + x * 3 + 1, 3 * sizeof(float), __riscv_vfmadd(vec_src2, 6, __riscv_vfmadd(__riscv_vfadd(vec_src1, vec_src3, vl), 4, __riscv_vfadd(vec_src0, vec_src4, vl), vl), vl), vl);\n+ vec_src0 = rvv::vlse_T(src + (x * 2 - 2) * 3 + 2, 6 * sizeof(float), vl);\n+ vec_src1 = rvv::vlse_T(src + (x * 2 - 1) * 3 + 2, 6 * sizeof(float), vl);\n+ vec_src2 = rvv::vlse_T(src + (x * 2) * 3 + 2, 6 * sizeof(float), vl);\n+ vec_src3 = rvv::vlse_T(src + (x * 2 + 1) * 3 + 2, 6 * sizeof(float), vl);\n+ vec_src4 = rvv::vlse_T(src + (x * 2 + 2) * 3 + 2, 6 * sizeof(float), vl);\n+ __riscv_vsse32(row + x * 3 + 2, 3 * sizeof(float), __riscv_vfmadd(vec_src2, 6, __riscv_vfmadd(__riscv_vfadd(vec_src1, vec_src3, vl), 4, __riscv_vfadd(vec_src0, vec_src4, vl), vl), vl), vl);\n+ }\n+ break;\n+ case 4:\n+ for( int x = start / 4; x < end / 4; x += vl )\n+ {\n+ vl = rvv::vsetvl_WT(end / 4 - x);\n+ auto vec_src0 = rvv::vlse_T(src + (x * 2 - 2) * 4, 8 * sizeof(float), vl);\n+ auto vec_src1 = rvv::vlse_T(src + (x * 2 - 1) * 4, 8 * sizeof(float), vl);\n+ auto vec_src2 = rvv::vlse_T(src + (x * 2) * 4, 8 * sizeof(float), vl);\n+ auto vec_src3 = rvv::vlse_T(src + (x * 2 + 1) * 4, 8 * sizeof(float), vl);\n+ auto vec_src4 = rvv::vlse_T(src + (x * 2 + 2) * 4, 8 * sizeof(float), vl);\n+ __riscv_vsse32(row + x * 4, 4 * sizeof(float), __riscv_vfmadd(vec_src2, 6, __riscv_vfmadd(__riscv_vfadd(vec_src1, vec_src3, vl), 4, __riscv_vfadd(vec_src0, vec_src4, vl), vl), vl), vl);\n+ vec_src0 = rvv::vlse_T(src + (x * 2 - 2) * 4 + 1, 8 * sizeof(float), vl);\n+ vec_src1 = rvv::vlse_T(src + (x * 2 - 1) * 4 + 1, 8 * sizeof(float), vl);\n+ vec_src2 = rvv::vlse_T(src + (x * 2) * 4 + 1, 8 * sizeof(float), vl);\n+ vec_src3 = rvv::vlse_T(src + (x * 2 + 1) * 4 + 1, 8 * sizeof(float), vl);\n+ vec_src4 = rvv::vlse_T(src + (x * 2 + 2) * 4 + 1, 8 * sizeof(float), vl);\n+ __riscv_vsse32(row + x * 4 + 1, 4 * sizeof(float), __riscv_vfmadd(vec_src2, 6, __riscv_vfmadd(__riscv_vfadd(vec_src1, vec_src3, vl), 4, __riscv_vfadd(vec_src0, vec_src4, vl), vl), vl), vl);\n+ vec_src0 = rvv::vlse_T(src + (x * 2 - 2) * 4 + 2, 8 * sizeof(float), vl);\n+ vec_src1 = rvv::vlse_T(src + (x * 2 - 1) * 4 + 2, 8 * sizeof(float), vl);\n+ vec_src2 = rvv::vlse_T(src + (x * 2) * 4 + 2, 8 * sizeof(float), vl);\n+ vec_src3 = rvv::vlse_T(src + (x * 2 + 1) * 4 + 2, 8 * sizeof(float), vl);\n+ vec_src4 = rvv::vlse_T(src + (x * 2 + 2) * 4 + 2, 8 * sizeof(float), vl);\n+ __riscv_vsse32(row + x * 4 + 2, 4 * sizeof(float), __riscv_vfmadd(vec_src2, 6, __riscv_vfmadd(__riscv_vfadd(vec_src1, vec_src3, vl), 4, __riscv_vfadd(vec_src0, vec_src4, vl), vl), vl), vl);\n+ vec_src0 = rvv::vlse_T(src + (x * 2 - 2) * 4 + 3, 8 * sizeof(float), vl);\n+ vec_src1 = rvv::vlse_T(src + (x * 2 - 1) * 4 + 3, 8 * sizeof(float), vl);\n+ vec_src2 = rvv::vlse_T(src + (x * 2) * 4 + 3, 8 * sizeof(float), vl);\n+ vec_src3 = rvv::vlse_T(src + (x * 2 + 1) * 4 + 3, 8 * sizeof(float), vl);\n+ vec_src4 = rvv::vlse_T(src + (x * 2 + 2) * 4 + 3, 8 * sizeof(float), vl);\n+ __riscv_vsse32(row + x * 4 + 3, 4 * sizeof(float), __riscv_vfmadd(vec_src2, 6, __riscv_vfmadd(__riscv_vfadd(vec_src1, vec_src3, vl), 4, __riscv_vfadd(vec_src0, vec_src4, vl), vl), vl), vl);\n+ }\n+ break;\n+ default:\n+ for( int x = start; x < end; x += vl )\n+ {\n+ vl = rvv::vsetvl_WT(end - x);\n+ auto vec_tabM = rvv::vle_M(tabM + x, vl);\n+ vec_tabM = __riscv_vmul(__riscv_vsub(vec_tabM, start * 2, vl), sizeof(float), vl);\n+ auto vec_src0 = rvv::vloxei_T(src, vec_tabM, vl);\n+ vec_tabM = __riscv_vadd(vec_tabM, start * sizeof(float), vl);\n+ auto vec_src1 = rvv::vloxei_T(src, vec_tabM, vl);\n+ vec_tabM = __riscv_vadd(vec_tabM, start * sizeof(float), vl);\n+ auto vec_src2 = rvv::vloxei_T(src, vec_tabM, vl);\n+ vec_tabM = __riscv_vadd(vec_tabM, start * sizeof(float), vl);\n+ auto vec_src3 = rvv::vloxei_T(src, vec_tabM, vl);\n+ vec_tabM = __riscv_vadd(vec_tabM, start * sizeof(float), vl);\n+ auto vec_src4 = rvv::vloxei_T(src, vec_tabM, vl);\n+ __riscv_vse32(row + x, __riscv_vfmadd(__riscv_vfadd(__riscv_vfadd(vec_src1, vec_src2, vl), vec_src3, vl), 4,\n+ __riscv_vfadd(__riscv_vfadd(vec_src0, vec_src4, vl), __riscv_vfadd(vec_src2, vec_src2, vl), vl), vl), vl);\n+ }\n+ }\n+ }\n+};\n+\n+template struct pyrDownVec1\n+{\n+ void operator()(WT* row0, WT* row1, WT* row2, WT* row3, WT* row4, T* dst, int end)\n+ {\n+ int vl;\n+ for( int x = 0 ; x < end; x += vl )\n+ {\n+ vl = rvv::vsetvl_WT(end - x);\n+ auto vec_src0 = rvv::vle_WT(row0 + x, vl);\n+ auto vec_src1 = rvv::vle_WT(row1 + x, vl);\n+ auto vec_src2 = rvv::vle_WT(row2 + x, vl);\n+ auto vec_src3 = rvv::vle_WT(row3 + x, vl);\n+ auto vec_src4 = rvv::vle_WT(row4 + x, vl);\n+ rvv::vse_T(dst + x, rvv::vcvt_WT_T(__riscv_vadd(__riscv_vadd(__riscv_vadd(vec_src0, vec_src4, vl), __riscv_vadd(vec_src2, vec_src2, vl), vl),\n+ __riscv_vsll(__riscv_vadd(__riscv_vadd(vec_src1, vec_src2, vl), vec_src3, vl), 2, vl), vl), 8, vl), vl);\n+ }\n+ }\n+};\n+template<> struct pyrDownVec1\n+{\n+ void operator()(float* row0, float* row1, float* row2, float* row3, float* row4, float* dst, int end)\n+ {\n+ int vl;\n+ for( int x = 0 ; x < end; x += vl )\n+ {\n+ vl = rvv::vsetvl_WT(end - x);\n+ auto vec_src0 = rvv::vle_WT(row0 + x, vl);\n+ auto vec_src1 = rvv::vle_WT(row1 + x, vl);\n+ auto vec_src2 = rvv::vle_WT(row2 + x, vl);\n+ auto vec_src3 = rvv::vle_WT(row3 + x, vl);\n+ auto vec_src4 = rvv::vle_WT(row4 + x, vl);\n+ rvv::vse_T(dst + x, __riscv_vfmul(__riscv_vfmadd(vec_src2, 6, __riscv_vfmadd(__riscv_vfadd(vec_src1, vec_src3, vl), 4, __riscv_vfadd(vec_src0, vec_src4, vl), vl), vl), 1.f / 256.f, vl), vl);\n+ }\n+ }\n+};\n+\n+template struct pyrUpVec0\n+{\n+ void operator()(const T* src, WT* row, const uint* dtab, int start, int end)\n+ {\n+ int vl;\n+ for( int x = start; x < end; x += vl )\n+ {\n+ vl = rvv::vsetvl_WT(end - x);\n+ auto vec_src0 = rvv::vcvt_T_WT(rvv::vle_T(src + x - start, vl), vl);\n+ auto vec_src1 = rvv::vcvt_T_WT(rvv::vle_T(src + x, vl), vl);\n+ auto vec_src2 = rvv::vcvt_T_WT(rvv::vle_T(src + x + start, vl), vl);\n+\n+ auto vec_dtab = rvv::vle_M(dtab + x, vl);\n+ vec_dtab = __riscv_vmul(vec_dtab, sizeof(WT), vl);\n+ __riscv_vsoxei32(row, vec_dtab, __riscv_vadd(__riscv_vadd(vec_src0, vec_src2, vl), __riscv_vadd(__riscv_vsll(vec_src1, 2, vl), __riscv_vsll(vec_src1, 1, vl), vl), vl), vl);\n+ __riscv_vsoxei32(row, __riscv_vadd(vec_dtab, start * sizeof(WT), vl), __riscv_vsll(__riscv_vadd(vec_src1, vec_src2, vl), 2, vl), vl);\n+ }\n+ }\n+};\n+template<> struct pyrUpVec0\n+{\n+ void operator()(const float* src, float* row, const uint* dtab, int start, int end)\n+ {\n+ int vl;\n+ for( int x = start; x < end; x += vl )\n+ {\n+ vl = rvv::vsetvl_WT(end - x);\n+ auto vec_src0 = rvv::vle_T(src + x - start, vl);\n+ auto vec_src1 = rvv::vle_T(src + x, vl);\n+ auto vec_src2 = rvv::vle_T(src + x + start, vl);\n+\n+ auto vec_dtab = rvv::vle_M(dtab + x, vl);\n+ vec_dtab = __riscv_vmul(vec_dtab, sizeof(float), vl);\n+ __riscv_vsoxei32(row, vec_dtab, __riscv_vfadd(__riscv_vfmadd(vec_src1, 6, vec_src0, vl), vec_src2, vl), vl);\n+ __riscv_vsoxei32(row, __riscv_vadd(vec_dtab, start * sizeof(float), vl), __riscv_vfmul(__riscv_vfadd(vec_src1, vec_src2, vl), 4, vl), vl);\n+ }\n+ }\n+};\n+\n+template struct pyrUpVec1\n+{\n+ void operator()(WT* row0, WT* row1, WT* row2, T* dst0, T* dst1, int end)\n+ {\n+ int vl;\n+ if (dst0 != dst1)\n+ {\n+ for( int x = 0 ; x < end; x += vl )\n+ {\n+ vl = rvv::vsetvl_WT(end - x);\n+ auto vec_src0 = rvv::vle_WT(row0 + x, vl);\n+ auto vec_src1 = rvv::vle_WT(row1 + x, vl);\n+ auto vec_src2 = rvv::vle_WT(row2 + x, vl);\n+ rvv::vse_T(dst0 + x, rvv::vcvt_WT_T(__riscv_vadd(__riscv_vadd(vec_src0, vec_src2, vl), __riscv_vadd(__riscv_vsll(vec_src1, 2, vl), __riscv_vsll(vec_src1, 1, vl), vl), vl), 6, vl), vl);\n+ rvv::vse_T(dst1 + x, rvv::vcvt_WT_T(__riscv_vsll(__riscv_vadd(vec_src1, vec_src2, vl), 2, vl), 6, vl), vl);\n+ }\n+ }\n+ else\n+ {\n+ for( int x = 0 ; x < end; x += vl )\n+ {\n+ vl = rvv::vsetvl_WT(end - x);\n+ auto vec_src0 = rvv::vle_WT(row0 + x, vl);\n+ auto vec_src1 = rvv::vle_WT(row1 + x, vl);\n+ auto vec_src2 = rvv::vle_WT(row2 + x, vl);\n+ rvv::vse_T(dst0 + x, rvv::vcvt_WT_T(__riscv_vadd(__riscv_vadd(vec_src0, vec_src2, vl), __riscv_vadd(__riscv_vsll(vec_src1, 2, vl), __riscv_vsll(vec_src1, 1, vl), vl), vl), 6, vl), vl);\n+ }\n+ }\n+ }\n+};\n+template<> struct pyrUpVec1\n+{\n+ void operator()(float* row0, float* row1, float* row2, float* dst0, float* dst1, int end)\n+ {\n+ int vl;\n+ if (dst0 != dst1)\n+ {\n+ for( int x = 0 ; x < end; x += vl )\n+ {\n+ vl = rvv::vsetvl_WT(end - x);\n+ auto vec_src0 = rvv::vle_WT(row0 + x, vl);\n+ auto vec_src1 = rvv::vle_WT(row1 + x, vl);\n+ auto vec_src2 = rvv::vle_WT(row2 + x, vl);\n+ rvv::vse_T(dst0 + x, __riscv_vfmul(__riscv_vfadd(__riscv_vfmadd(vec_src1, 6, vec_src0, vl), vec_src2, vl), 1.f / 64.f, vl), vl);\n+ rvv::vse_T(dst1 + x, __riscv_vfmul(__riscv_vfadd(vec_src1, vec_src2, vl), 1.f / 16.f, vl), vl);\n+ }\n+ }\n+ else\n+ {\n+ for( int x = 0 ; x < end; x += vl )\n+ {\n+ vl = rvv::vsetvl_WT(end - x);\n+ auto vec_src0 = rvv::vle_WT(row0 + x, vl);\n+ auto vec_src1 = rvv::vle_WT(row1 + x, vl);\n+ auto vec_src2 = rvv::vle_WT(row2 + x, vl);\n+ rvv::vse_T(dst0 + x, __riscv_vfmul(__riscv_vfadd(__riscv_vfmadd(vec_src1, 6, vec_src0, vl), vec_src2, vl), 1.f / 64.f, vl), vl);\n+ }\n+ }\n+ }\n+};\n+\n+template\n+struct PyrDownInvoker : ParallelLoopBody\n+{\n+ PyrDownInvoker(const uchar* _src_data, size_t _src_step, int _src_width, int _src_height, uchar* _dst_data, size_t _dst_step, int _dst_width, int _dst_height, int _cn, int _borderType, int* _tabR, int* _tabM, int* _tabL)\n+ {\n+ src_data = _src_data;\n+ src_step = _src_step;\n+ src_width = _src_width;\n+ src_height = _src_height;\n+ dst_data = _dst_data;\n+ dst_step = _dst_step;\n+ dst_width = _dst_width;\n+ dst_height = _dst_height;\n+ cn = _cn;\n+ borderType = _borderType;\n+ tabR = _tabR;\n+ tabM = _tabM;\n+ tabL = _tabL;\n+ }\n+\n+ void operator()(const Range& range) const CV_OVERRIDE;\n+\n+ const uchar* src_data;\n+ size_t src_step;\n+ int src_width;\n+ int src_height;\n+ uchar* dst_data;\n+ size_t dst_step;\n+ int dst_width;\n+ int dst_height;\n+ int cn;\n+ int borderType;\n+ int* tabR;\n+ int* tabM;\n+ int* tabL;\n+};\n+\n+// the algorithm is copied from imgproc/src/pyramids.cpp,\n+// in the function template void cv::pyrDown_\n+template\n+inline int pyrDown(const uchar* src_data, size_t src_step, int src_width, int src_height, uchar* dst_data, size_t dst_step, int dst_width, int dst_height, int cn, int borderType)\n+{\n+ const int PD_SZ = 5;\n+\n+ std::vector _tabM(dst_width * cn), _tabL(cn * (PD_SZ + 2)), _tabR(cn * (PD_SZ + 2));\n+ int *tabM = _tabM.data(), *tabL = _tabL.data(), *tabR = _tabR.data();\n+\n+ CV_Assert( src_width > 0 && src_height > 0 &&\n+ std::abs(dst_width*2 - src_width) <= 2 &&\n+ std::abs(dst_height*2 - src_height) <= 2 );\n+ int width0 = std::min((src_width-PD_SZ/2-1)/2 + 1, dst_width);\n+\n+ for (int x = 0; x <= PD_SZ+1; x++)\n+ {\n+ int sx0 = borderInterpolate(x - PD_SZ/2, src_width, borderType)*cn;\n+ int sx1 = borderInterpolate(x + width0*2 - PD_SZ/2, src_width, borderType)*cn;\n+ for (int k = 0; k < cn; k++)\n+ {\n+ tabL[x*cn + k] = sx0 + k;\n+ tabR[x*cn + k] = sx1 + k;\n+ }\n+ }\n+\n+ for (int x = 0; x < dst_width*cn; x++)\n+ tabM[x] = (x/cn)*2*cn + x % cn;\n+\n+ cv::parallel_for_(Range(0,dst_height), PyrDownInvoker(src_data, src_step, src_width, src_height, dst_data, dst_step, dst_width, dst_height, cn, borderType, tabR, tabM, tabL), cv::getNumThreads());\n+ return CV_HAL_ERROR_OK;\n+}\n+\n+template\n+void PyrDownInvoker::operator()(const Range& range) const\n+{\n+ const int PD_SZ = 5;\n+\n+ int bufstep = (dst_width*cn + 15) & -16;\n+ std::vector _buf(bufstep*PD_SZ + 16);\n+ WT* buf = (WT*)(((size_t)_buf.data() + 15) & -16);\n+ WT* rows[PD_SZ];\n+\n+ int sy0 = -PD_SZ/2, sy = range.start * 2 + sy0, width0 = std::min((src_width-PD_SZ/2-1)/2 + 1, dst_width);\n+\n+ int _dst_width = dst_width * cn;\n+ width0 *= cn;\n+\n+ for (int y = range.start; y < range.end; y++)\n+ {\n+ T* dst = reinterpret_cast(dst_data + dst_step * y);\n+ WT *row0, *row1, *row2, *row3, *row4;\n+\n+ // fill the ring buffer (horizontal convolution and decimation)\n+ int sy_limit = y*2 + 2;\n+ for( ; sy <= sy_limit; sy++ )\n+ {\n+ WT* row = buf + ((sy - sy0) % PD_SZ)*bufstep;\n+ int _sy = borderInterpolate(sy, src_height, borderType);\n+ const T* src = reinterpret_cast(src_data + src_step * _sy);\n+\n+ do {\n+ int x = 0;\n+ for( ; x < cn; x++ )\n+ {\n+ row[x] = src[tabL[x+cn*2]]*6 + (src[tabL[x+cn]] + src[tabL[x+cn*3]])*4 +\n+ src[tabL[x]] + src[tabL[x+cn*4]];\n+ }\n+\n+ if( x == _dst_width )\n+ break;\n+\n+ pyrDownVec0()(src, row, reinterpret_cast(tabM), cn, width0);\n+ x = width0;\n+\n+ // tabR\n+ for (int x_ = 0; x < _dst_width; x++, x_++)\n+ {\n+ row[x] = src[tabR[x_+cn*2]]*6 + (src[tabR[x_+cn]] + src[tabR[x_+cn*3]])*4 +\n+ src[tabR[x_]] + src[tabR[x_+cn*4]];\n+ }\n+ } while (0);\n+ }\n+\n+ // do vertical convolution and decimation and write the result to the destination image\n+ for (int k = 0; k < PD_SZ; k++)\n+ rows[k] = buf + ((y*2 - PD_SZ/2 + k - sy0) % PD_SZ)*bufstep;\n+ row0 = rows[0]; row1 = rows[1]; row2 = rows[2]; row3 = rows[3]; row4 = rows[4];\n+\n+ pyrDownVec1()(row0, row1, row2, row3, row4, dst, _dst_width);\n+ }\n+}\n+\n+// the algorithm is copied from imgproc/src/pyramids.cpp,\n+// in the function template void cv::pyrUp_\n+template\n+inline int pyrUp(const uchar* src_data, size_t src_step, int src_width, int src_height, uchar* dst_data, size_t dst_step, int dst_width, int dst_height, int cn)\n+{\n+ const int PU_SZ = 3;\n+\n+ int bufstep = ((dst_width+1)*cn + 15) & -16;\n+ std::vector _buf(bufstep*PU_SZ + 16);\n+ WT* buf = (WT*)(((size_t)_buf.data() + 15) & -16);\n+ std::vector _dtab(src_width*cn);\n+ int* dtab = _dtab.data();\n+ WT* rows[PU_SZ];\n+\n+ CV_Assert( std::abs(dst_width - src_width*2) == dst_width % 2 &&", + "comment_created_at": "2025-02-28T19:51:05+00:00", + "comment_author": "mshabunin", + "comment_body": "Let's avoid `CV_Assert` too. Just return NOT_IMPLEMENTED in this case. Here and in other places.", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/opencv-cleanup-before-errors.md b/_reviewers/opencv-cleanup-before-errors.md index fd3df35..8c1ea66 100644 --- a/_reviewers/opencv-cleanup-before-errors.md +++ b/_reviewers/opencv-cleanup-before-errors.md @@ -46,85 +46,3 @@ if (has_error) ``` This pattern applies to all error scenarios, whether using exceptions (like CV_Error), assertions (CV_Assert), or returning error codes. By ensuring resources are released before propagating errors, you create more robust code that can handle failure scenarios gracefully. - - -[ - { - "discussion_id": "1896466401", - "pr_number": 25584, - "pr_file": "modules/videoio/misc/python/pyopencv_videoio.hpp", - "created_at": "2024-12-24T06:56:18+00:00", - "commented_code": "return true;\n}\n\nclass IOBaseWrapper : public std::streambuf\n{\npublic:\n IOBaseWrapper(PyObject* _obj = nullptr) : obj(_obj)\n {\n if (obj)\n Py_INCREF(obj);\n }\n\n ~IOBaseWrapper()\n {\n if (obj)\n Py_DECREF(obj);\n }\n\n std::streamsize xsgetn(char* buf, std::streamsize n) override\n {\n PyObject* ioBase = reinterpret_cast(obj);\n\n PyGILState_STATE gstate;\n gstate = PyGILState_Ensure();\n\n PyObject* size = pyopencv_from(static_cast(n));\n\n PyObject* res = PyObject_CallMethodObjArgs(ioBase, PyString_FromString(\"read\"), size, NULL);\n char* src = PyBytes_AsString(res);\n size_t len = static_cast(PyBytes_Size(res));\n CV_CheckLE(len, static_cast(n), \"Stream chunk size should be less or equal than requested size\");\n std::memcpy(buf, src, len);\n Py_DECREF(res);\n Py_DECREF(size);\n\n PyGILState_Release(gstate);\n\n return len;\n }\n\n std::streampos seekoff(std::streamoff off, std::ios_base::seekdir way, std::ios_base::openmode = std::ios_base::in | std::ios_base::out) override\n {\n PyObject* ioBase = reinterpret_cast(obj);\n\n PyGILState_STATE gstate;\n gstate = PyGILState_Ensure();\n\n PyObject* size = pyopencv_from(static_cast(off));\n PyObject* whence = pyopencv_from(way == std::ios_base::beg ? SEEK_SET : (way == std::ios_base::end ? SEEK_END : SEEK_CUR));\n\n PyObject* res = PyObject_CallMethodObjArgs(ioBase, PyString_FromString(\"seek\"), size, whence, NULL);\n int pos = PyLong_AsLong(res);\n Py_DECREF(res);\n Py_DECREF(size);\n Py_DECREF(whence);\n\n PyGILState_Release(gstate);\n\n return pos;\n }\n\nprivate:\n PyObject* obj;\n};\n\ntemplate<>\nbool pyopencv_to(PyObject* obj, Ptr& p, const ArgInfo&)\n{\n if (!obj)\n return false;\n\n PyGILState_STATE gstate;\n gstate = PyGILState_Ensure();\n\n PyObject* ioModule = PyImport_ImportModule(\"io\");\n PyObject* type = PyObject_GetAttrString(ioModule, \"BufferedIOBase\");\n Py_DECREF(ioModule);\n if (!PyObject_IsInstance(obj, type))\n CV_Error(cv::Error::StsBadArg, \"Input stream should be derived from io.BufferedIOBase\");", - "repo_full_name": "opencv/opencv", - "discussion_comments": [ - { - "comment_id": "1896466401", - "repo_full_name": "opencv/opencv", - "pr_number": 25584, - "pr_file": "modules/videoio/misc/python/pyopencv_videoio.hpp", - "discussion_id": "1896466401", - "commented_code": "@@ -31,4 +31,87 @@ template<> bool pyopencv_to(PyObject* obj, cv::VideoCapture& stream, const ArgIn\n return true;\n }\n \n+class IOBaseWrapper : public std::streambuf\n+{\n+public:\n+ IOBaseWrapper(PyObject* _obj = nullptr) : obj(_obj)\n+ {\n+ if (obj)\n+ Py_INCREF(obj);\n+ }\n+\n+ ~IOBaseWrapper()\n+ {\n+ if (obj)\n+ Py_DECREF(obj);\n+ }\n+\n+ std::streamsize xsgetn(char* buf, std::streamsize n) override\n+ {\n+ PyObject* ioBase = reinterpret_cast(obj);\n+\n+ PyGILState_STATE gstate;\n+ gstate = PyGILState_Ensure();\n+\n+ PyObject* size = pyopencv_from(static_cast(n));\n+\n+ PyObject* res = PyObject_CallMethodObjArgs(ioBase, PyString_FromString(\"read\"), size, NULL);\n+ char* src = PyBytes_AsString(res);\n+ size_t len = static_cast(PyBytes_Size(res));\n+ CV_CheckLE(len, static_cast(n), \"Stream chunk size should be less or equal than requested size\");\n+ std::memcpy(buf, src, len);\n+ Py_DECREF(res);\n+ Py_DECREF(size);\n+\n+ PyGILState_Release(gstate);\n+\n+ return len;\n+ }\n+\n+ std::streampos seekoff(std::streamoff off, std::ios_base::seekdir way, std::ios_base::openmode = std::ios_base::in | std::ios_base::out) override\n+ {\n+ PyObject* ioBase = reinterpret_cast(obj);\n+\n+ PyGILState_STATE gstate;\n+ gstate = PyGILState_Ensure();\n+\n+ PyObject* size = pyopencv_from(static_cast(off));\n+ PyObject* whence = pyopencv_from(way == std::ios_base::beg ? SEEK_SET : (way == std::ios_base::end ? SEEK_END : SEEK_CUR));\n+\n+ PyObject* res = PyObject_CallMethodObjArgs(ioBase, PyString_FromString(\"seek\"), size, whence, NULL);\n+ int pos = PyLong_AsLong(res);\n+ Py_DECREF(res);\n+ Py_DECREF(size);\n+ Py_DECREF(whence);\n+\n+ PyGILState_Release(gstate);\n+\n+ return pos;\n+ }\n+\n+private:\n+ PyObject* obj;\n+};\n+\n+template<>\n+bool pyopencv_to(PyObject* obj, Ptr& p, const ArgInfo&)\n+{\n+ if (!obj)\n+ return false;\n+\n+ PyGILState_STATE gstate;\n+ gstate = PyGILState_Ensure();\n+\n+ PyObject* ioModule = PyImport_ImportModule(\"io\");\n+ PyObject* type = PyObject_GetAttrString(ioModule, \"BufferedIOBase\");\n+ Py_DECREF(ioModule);\n+ if (!PyObject_IsInstance(obj, type))\n+ CV_Error(cv::Error::StsBadArg, \"Input stream should be derived from io.BufferedIOBase\");", - "comment_created_at": "2024-12-24T06:56:18+00:00", - "comment_author": "opencv-alalek", - "comment_body": "`CV_Error` doesn't call cleanup (release GIL and release `type`).\r\n\r\n```\r\nif (...)\r\n has_error = true;\r\n\r\n... cleanup ...\r\n\r\nif (has_error)\r\n CV_Error(...);\r\n```", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2010187533", - "pr_number": 27115, - "pr_file": "3rdparty/hal_rvv/hal_rvv_1p0/norm.hpp", - "created_at": "2025-03-24T13:30:33+00:00", - "commented_code": "sizeof(int), sizeof(float),\n sizeof(int64_t), 0,\n };\n assert(elem_size_tab[depth]);", - "repo_full_name": "opencv/opencv", - "discussion_comments": [ - { - "comment_id": "2010187533", - "repo_full_name": "opencv/opencv", - "pr_number": 27115, - "pr_file": "3rdparty/hal_rvv/hal_rvv_1p0/norm.hpp", - "discussion_id": "2010187533", - "commented_code": "@@ -1008,6 +1008,7 @@ inline int norm(const uchar* src, size_t src_step, const uchar* mask, size_t mas\n sizeof(int), sizeof(float),\n sizeof(int64_t), 0,\n };\n+ assert(elem_size_tab[depth]);", - "comment_created_at": "2025-03-24T13:30:33+00:00", - "comment_author": "asmorkalov", - "comment_body": "Looks like the assert will be disabled in regular release builds: https://en.cppreference.com/w/cpp/error/assert.\r\nWhy not just CV_Assert? It's defined in `opencv2/core/base.hpp`", - "pr_file_module": null - }, - { - "comment_id": "2010279840", - "repo_full_name": "opencv/opencv", - "pr_number": 27115, - "pr_file": "3rdparty/hal_rvv/hal_rvv_1p0/norm.hpp", - "discussion_id": "2010187533", - "commented_code": "@@ -1008,6 +1008,7 @@ inline int norm(const uchar* src, size_t src_step, const uchar* mask, size_t mas\n sizeof(int), sizeof(float),\n sizeof(int64_t), 0,\n };\n+ assert(elem_size_tab[depth]);", - "comment_created_at": "2025-03-24T14:19:36+00:00", - "comment_author": "fengyuentau", - "comment_body": "Ok", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1975939795", - "pr_number": 26958, - "pr_file": "3rdparty/hal_rvv/hal_rvv_1p0/pyramids.hpp", - "created_at": "2025-02-28T19:51:05+00:00", - "commented_code": "// This file is part of OpenCV project.\n// It is subject to the license terms in the LICENSE file found in the top-level directory\n// of this distribution and at http://opencv.org/license.html.\n#ifndef OPENCV_HAL_RVV_PYRAMIDS_HPP_INCLUDED\n#define OPENCV_HAL_RVV_PYRAMIDS_HPP_INCLUDED\n\n#include \n\nnamespace cv { namespace cv_hal_rvv { namespace pyramids {\n\n#undef cv_hal_pyrdown\n#define cv_hal_pyrdown cv::cv_hal_rvv::pyramids::pyrDown\n#undef cv_hal_pyrup\n#define cv_hal_pyrup cv::cv_hal_rvv::pyramids::pyrUp\n\ntemplate struct rvv;\n\ntemplate<> struct rvv\n{\n static inline size_t vsetvl_WT(size_t a) { return __riscv_vsetvl_e32m4(a); }\n static inline vuint8m1_t vle_T(const uchar* a, size_t b) { return __riscv_vle8_v_u8m1(a, b); }\n static inline vint32m4_t vle_WT(const int* a, size_t b) { return __riscv_vle32_v_i32m4(a, b); }\n static inline vuint32m4_t vle_M(const uint* a, size_t b) { return __riscv_vle32_v_u32m4(a, b); }\n static inline vuint8m1_t vlse_T(const uchar* a, ptrdiff_t b, size_t c) { return __riscv_vlse8_v_u8m1(a, b, c); }\n static inline vuint8m1_t vloxei_T(const uchar* a, vuint32m4_t b, size_t c) { return __riscv_vloxei32_v_u8m1(a, b, c); }\n static inline void vse_T(uchar* a, vuint8m1_t b, size_t c) { return __riscv_vse8(a, b, c); }\n static inline vint32m4_t vcvt_T_WT(vuint8m1_t a, size_t b) { return __riscv_vreinterpret_v_u32m4_i32m4(__riscv_vzext_vf4(a, b)); }\n static inline vuint8m1_t vcvt_WT_T(vint32m4_t a, int b, size_t c) { return __riscv_vncvt_x(__riscv_vncvt_x(__riscv_vreinterpret_v_i32m4_u32m4(__riscv_vsra(__riscv_vadd(a, 1 << (b - 1), c), b, c)), c), c); }\n};\n\ntemplate<> struct rvv\n{\n static inline size_t vsetvl_WT(size_t a) { return __riscv_vsetvl_e32m4(a); }\n static inline vint16m2_t vle_T(const short* a, size_t b) { return __riscv_vle16_v_i16m2(a, b); }\n static inline vint32m4_t vle_WT(const int* a, size_t b) { return __riscv_vle32_v_i32m4(a, b); }\n static inline vuint32m4_t vle_M(const uint* a, size_t b) { return __riscv_vle32_v_u32m4(a, b); }\n static inline vint16m2_t vlse_T(const short* a, ptrdiff_t b, size_t c) { return __riscv_vlse16_v_i16m2(a, b, c); }\n static inline vint16m2_t vloxei_T(const short* a, vuint32m4_t b, size_t c) { return __riscv_vloxei32_v_i16m2(a, b, c); }\n static inline void vse_T(short* a, vint16m2_t b, size_t c) { return __riscv_vse16(a, b, c); }\n static inline vint32m4_t vcvt_T_WT(vint16m2_t a, size_t b) { return __riscv_vsext_vf2(a, b); }\n static inline vint16m2_t vcvt_WT_T(vint32m4_t a, int b, size_t c) { return __riscv_vncvt_x(__riscv_vsra(__riscv_vadd(a, 1 << (b - 1), c), b, c), c); }\n};\n\ntemplate<> struct rvv\n{\n static inline size_t vsetvl_WT(size_t a) { return __riscv_vsetvl_e32m4(a); }\n static inline vfloat32m4_t vle_T(const float* a, size_t b) { return __riscv_vle32_v_f32m4(a, b); }\n static inline vfloat32m4_t vle_WT(const float* a, size_t b) { return __riscv_vle32_v_f32m4(a, b); }\n static inline vuint32m4_t vle_M(const uint* a, size_t b) { return __riscv_vle32_v_u32m4(a, b); }\n static inline vfloat32m4_t vlse_T(const float* a, ptrdiff_t b, size_t c) { return __riscv_vlse32_v_f32m4(a, b, c); }\n static inline vfloat32m4_t vloxei_T(const float* a, vuint32m4_t b, size_t c) { return __riscv_vloxei32_v_f32m4(a, b, c); }\n static inline void vse_T(float* a, vfloat32m4_t b, size_t c) { return __riscv_vse32(a, b, c); }\n};\n\ntemplate struct pyrDownVec0\n{\n void operator()(const T* src, WT* row, const uint* tabM, int start, int end)\n {\n int vl;\n switch (start)\n {\n case 1:\n for( int x = start; x < end; x += vl )\n {\n vl = rvv::vsetvl_WT(end - x);\n auto vec_src0 = rvv::vcvt_T_WT(rvv::vlse_T(src + x * 2 - 2, 2 * sizeof(T), vl), vl);\n auto vec_src1 = rvv::vcvt_T_WT(rvv::vlse_T(src + x * 2 - 1, 2 * sizeof(T), vl), vl);\n auto vec_src2 = rvv::vcvt_T_WT(rvv::vlse_T(src + x * 2, 2 * sizeof(T), vl), vl);\n auto vec_src3 = rvv::vcvt_T_WT(rvv::vlse_T(src + x * 2 + 1, 2 * sizeof(T), vl), vl);\n auto vec_src4 = rvv::vcvt_T_WT(rvv::vlse_T(src + x * 2 + 2, 2 * sizeof(T), vl), vl);\n __riscv_vse32(row + x, __riscv_vadd(__riscv_vadd(__riscv_vadd(vec_src0, vec_src4, vl), __riscv_vadd(vec_src2, vec_src2, vl), vl),\n __riscv_vsll(__riscv_vadd(__riscv_vadd(vec_src1, vec_src2, vl), vec_src3, vl), 2, vl), vl), vl);\n }\n break;\n case 2:\n for( int x = start / 2; x < end / 2; x += vl )\n {\n vl = rvv::vsetvl_WT(end / 2 - x);\n auto vec_src0 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 - 2) * 2, 4 * sizeof(T), vl), vl);\n auto vec_src1 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 - 1) * 2, 4 * sizeof(T), vl), vl);\n auto vec_src2 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2) * 2, 4 * sizeof(T), vl), vl);\n auto vec_src3 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 + 1) * 2, 4 * sizeof(T), vl), vl);\n auto vec_src4 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 + 2) * 2, 4 * sizeof(T), vl), vl);\n __riscv_vsse32(row + x * 2, 2 * sizeof(WT), __riscv_vadd(__riscv_vadd(__riscv_vadd(vec_src0, vec_src4, vl), __riscv_vadd(vec_src2, vec_src2, vl), vl),\n __riscv_vsll(__riscv_vadd(__riscv_vadd(vec_src1, vec_src2, vl), vec_src3, vl), 2, vl), vl), vl);\n vec_src0 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 - 2) * 2 + 1, 4 * sizeof(T), vl), vl);\n vec_src1 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 - 1) * 2 + 1, 4 * sizeof(T), vl), vl);\n vec_src2 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2) * 2 + 1, 4 * sizeof(T), vl), vl);\n vec_src3 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 + 1) * 2 + 1, 4 * sizeof(T), vl), vl);\n vec_src4 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 + 2) * 2 + 1, 4 * sizeof(T), vl), vl);\n __riscv_vsse32(row + x * 2 + 1, 2 * sizeof(WT), __riscv_vadd(__riscv_vadd(__riscv_vadd(vec_src0, vec_src4, vl), __riscv_vadd(vec_src2, vec_src2, vl), vl),\n __riscv_vsll(__riscv_vadd(__riscv_vadd(vec_src1, vec_src2, vl), vec_src3, vl), 2, vl), vl), vl);\n }\n break;\n case 3:\n for( int x = start / 3; x < end / 3; x += vl )\n {\n vl = rvv::vsetvl_WT(end / 3 - x);\n auto vec_src0 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 - 2) * 3, 6 * sizeof(T), vl), vl);\n auto vec_src1 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 - 1) * 3, 6 * sizeof(T), vl), vl);\n auto vec_src2 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2) * 3, 6 * sizeof(T), vl), vl);\n auto vec_src3 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 + 1) * 3, 6 * sizeof(T), vl), vl);\n auto vec_src4 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 + 2) * 3, 6 * sizeof(T), vl), vl);\n __riscv_vsse32(row + x * 3, 3 * sizeof(WT), __riscv_vadd(__riscv_vadd(__riscv_vadd(vec_src0, vec_src4, vl), __riscv_vadd(vec_src2, vec_src2, vl), vl),\n __riscv_vsll(__riscv_vadd(__riscv_vadd(vec_src1, vec_src2, vl), vec_src3, vl), 2, vl), vl), vl);\n vec_src0 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 - 2) * 3 + 1, 6 * sizeof(T), vl), vl);\n vec_src1 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 - 1) * 3 + 1, 6 * sizeof(T), vl), vl);\n vec_src2 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2) * 3 + 1, 6 * sizeof(T), vl), vl);\n vec_src3 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 + 1) * 3 + 1, 6 * sizeof(T), vl), vl);\n vec_src4 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 + 2) * 3 + 1, 6 * sizeof(T), vl), vl);\n __riscv_vsse32(row + x * 3 + 1, 3 * sizeof(WT), __riscv_vadd(__riscv_vadd(__riscv_vadd(vec_src0, vec_src4, vl), __riscv_vadd(vec_src2, vec_src2, vl), vl),\n __riscv_vsll(__riscv_vadd(__riscv_vadd(vec_src1, vec_src2, vl), vec_src3, vl), 2, vl), vl), vl);\n vec_src0 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 - 2) * 3 + 2, 6 * sizeof(T), vl), vl);\n vec_src1 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 - 1) * 3 + 2, 6 * sizeof(T), vl), vl);\n vec_src2 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2) * 3 + 2, 6 * sizeof(T), vl), vl);\n vec_src3 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 + 1) * 3 + 2, 6 * sizeof(T), vl), vl);\n vec_src4 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 + 2) * 3 + 2, 6 * sizeof(T), vl), vl);\n __riscv_vsse32(row + x * 3 + 2, 3 * sizeof(WT), __riscv_vadd(__riscv_vadd(__riscv_vadd(vec_src0, vec_src4, vl), __riscv_vadd(vec_src2, vec_src2, vl), vl),\n __riscv_vsll(__riscv_vadd(__riscv_vadd(vec_src1, vec_src2, vl), vec_src3, vl), 2, vl), vl), vl);\n }\n break;\n case 4:\n for( int x = start / 4; x < end / 4; x += vl )\n {\n vl = rvv::vsetvl_WT(end / 4 - x);\n auto vec_src0 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 - 2) * 4, 8 * sizeof(T), vl), vl);\n auto vec_src1 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 - 1) * 4, 8 * sizeof(T), vl), vl);\n auto vec_src2 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2) * 4, 8 * sizeof(T), vl), vl);\n auto vec_src3 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 + 1) * 4, 8 * sizeof(T), vl), vl);\n auto vec_src4 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 + 2) * 4, 8 * sizeof(T), vl), vl);\n __riscv_vsse32(row + x * 4, 4 * sizeof(WT), __riscv_vadd(__riscv_vadd(__riscv_vadd(vec_src0, vec_src4, vl), __riscv_vadd(vec_src2, vec_src2, vl), vl),\n __riscv_vsll(__riscv_vadd(__riscv_vadd(vec_src1, vec_src2, vl), vec_src3, vl), 2, vl), vl), vl);\n vec_src0 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 - 2) * 4 + 1, 8 * sizeof(T), vl), vl);\n vec_src1 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 - 1) * 4 + 1, 8 * sizeof(T), vl), vl);\n vec_src2 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2) * 4 + 1, 8 * sizeof(T), vl), vl);\n vec_src3 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 + 1) * 4 + 1, 8 * sizeof(T), vl), vl);\n vec_src4 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 + 2) * 4 + 1, 8 * sizeof(T), vl), vl);\n __riscv_vsse32(row + x * 4 + 1, 4 * sizeof(WT), __riscv_vadd(__riscv_vadd(__riscv_vadd(vec_src0, vec_src4, vl), __riscv_vadd(vec_src2, vec_src2, vl), vl),\n __riscv_vsll(__riscv_vadd(__riscv_vadd(vec_src1, vec_src2, vl), vec_src3, vl), 2, vl), vl), vl);\n vec_src0 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 - 2) * 4 + 2, 8 * sizeof(T), vl), vl);\n vec_src1 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 - 1) * 4 + 2, 8 * sizeof(T), vl), vl);\n vec_src2 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2) * 4 + 2, 8 * sizeof(T), vl), vl);\n vec_src3 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 + 1) * 4 + 2, 8 * sizeof(T), vl), vl);\n vec_src4 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 + 2) * 4 + 2, 8 * sizeof(T), vl), vl);\n __riscv_vsse32(row + x * 4 + 2, 4 * sizeof(WT), __riscv_vadd(__riscv_vadd(__riscv_vadd(vec_src0, vec_src4, vl), __riscv_vadd(vec_src2, vec_src2, vl), vl),\n __riscv_vsll(__riscv_vadd(__riscv_vadd(vec_src1, vec_src2, vl), vec_src3, vl), 2, vl), vl), vl);\n vec_src0 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 - 2) * 4 + 3, 8 * sizeof(T), vl), vl);\n vec_src1 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 - 1) * 4 + 3, 8 * sizeof(T), vl), vl);\n vec_src2 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2) * 4 + 3, 8 * sizeof(T), vl), vl);\n vec_src3 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 + 1) * 4 + 3, 8 * sizeof(T), vl), vl);\n vec_src4 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 + 2) * 4 + 3, 8 * sizeof(T), vl), vl);\n __riscv_vsse32(row + x * 4 + 3, 4 * sizeof(WT), __riscv_vadd(__riscv_vadd(__riscv_vadd(vec_src0, vec_src4, vl), __riscv_vadd(vec_src2, vec_src2, vl), vl),\n __riscv_vsll(__riscv_vadd(__riscv_vadd(vec_src1, vec_src2, vl), vec_src3, vl), 2, vl), vl), vl);\n }\n break;\n default:\n for( int x = start; x < end; x += vl )\n {\n vl = rvv::vsetvl_WT(end - x);\n auto vec_tabM = rvv::vle_M(tabM + x, vl);\n vec_tabM = __riscv_vmul(__riscv_vsub(vec_tabM, start * 2, vl), sizeof(T), vl);\n auto vec_src0 = rvv::vcvt_T_WT(rvv::vloxei_T(src, vec_tabM, vl), vl);\n vec_tabM = __riscv_vadd(vec_tabM, start * sizeof(T), vl);\n auto vec_src1 = rvv::vcvt_T_WT(rvv::vloxei_T(src, vec_tabM, vl), vl);\n vec_tabM = __riscv_vadd(vec_tabM, start * sizeof(T), vl);\n auto vec_src2 = rvv::vcvt_T_WT(rvv::vloxei_T(src, vec_tabM, vl), vl);\n vec_tabM = __riscv_vadd(vec_tabM, start * sizeof(T), vl);\n auto vec_src3 = rvv::vcvt_T_WT(rvv::vloxei_T(src, vec_tabM, vl), vl);\n vec_tabM = __riscv_vadd(vec_tabM, start * sizeof(T), vl);\n auto vec_src4 = rvv::vcvt_T_WT(rvv::vloxei_T(src, vec_tabM, vl), vl);\n __riscv_vse32(row + x, __riscv_vadd(__riscv_vadd(__riscv_vadd(vec_src0, vec_src4, vl), __riscv_vadd(vec_src2, vec_src2, vl), vl),\n __riscv_vsll(__riscv_vadd(__riscv_vadd(vec_src1, vec_src2, vl), vec_src3, vl), 2, vl), vl), vl);\n }\n }\n }\n};\ntemplate<> struct pyrDownVec0\n{\n void operator()(const float* src, float* row, const uint* tabM, int start, int end)\n {\n int vl;\n switch (start)\n {\n case 1:\n for( int x = start; x < end; x += vl )\n {\n vl = rvv::vsetvl_WT(end - x);\n auto vec_src0 = rvv::vlse_T(src + x * 2 - 2, 2 * sizeof(float), vl);\n auto vec_src1 = rvv::vlse_T(src + x * 2 - 1, 2 * sizeof(float), vl);\n auto vec_src2 = rvv::vlse_T(src + x * 2, 2 * sizeof(float), vl);\n auto vec_src3 = rvv::vlse_T(src + x * 2 + 1, 2 * sizeof(float), vl);\n auto vec_src4 = rvv::vlse_T(src + x * 2 + 2, 2 * sizeof(float), vl);\n __riscv_vse32(row + x, __riscv_vfmadd(vec_src2, 6, __riscv_vfmadd(__riscv_vfadd(vec_src1, vec_src3, vl), 4, __riscv_vfadd(vec_src0, vec_src4, vl), vl), vl), vl);\n }\n break;\n case 2:\n for( int x = start / 2; x < end / 2; x += vl )\n {\n vl = rvv::vsetvl_WT(end / 2 - x);\n auto vec_src0 = rvv::vlse_T(src + (x * 2 - 2) * 2, 4 * sizeof(float), vl);\n auto vec_src1 = rvv::vlse_T(src + (x * 2 - 1) * 2, 4 * sizeof(float), vl);\n auto vec_src2 = rvv::vlse_T(src + (x * 2) * 2, 4 * sizeof(float), vl);\n auto vec_src3 = rvv::vlse_T(src + (x * 2 + 1) * 2, 4 * sizeof(float), vl);\n auto vec_src4 = rvv::vlse_T(src + (x * 2 + 2) * 2, 4 * sizeof(float), vl);\n __riscv_vsse32(row + x * 2, 2 * sizeof(float), __riscv_vfmadd(vec_src2, 6, __riscv_vfmadd(__riscv_vfadd(vec_src1, vec_src3, vl), 4, __riscv_vfadd(vec_src0, vec_src4, vl), vl), vl), vl);\n vec_src0 = rvv::vlse_T(src + (x * 2 - 2) * 2 + 1, 4 * sizeof(float), vl);\n vec_src1 = rvv::vlse_T(src + (x * 2 - 1) * 2 + 1, 4 * sizeof(float), vl);\n vec_src2 = rvv::vlse_T(src + (x * 2) * 2 + 1, 4 * sizeof(float), vl);\n vec_src3 = rvv::vlse_T(src + (x * 2 + 1) * 2 + 1, 4 * sizeof(float), vl);\n vec_src4 = rvv::vlse_T(src + (x * 2 + 2) * 2 + 1, 4 * sizeof(float), vl);\n __riscv_vsse32(row + x * 2 + 1, 2 * sizeof(float), __riscv_vfmadd(vec_src2, 6, __riscv_vfmadd(__riscv_vfadd(vec_src1, vec_src3, vl), 4, __riscv_vfadd(vec_src0, vec_src4, vl), vl), vl), vl);\n }\n break;\n case 3:\n for( int x = start / 3; x < end / 3; x += vl )\n {\n vl = rvv::vsetvl_WT(end / 3 - x);\n auto vec_src0 = rvv::vlse_T(src + (x * 2 - 2) * 3, 6 * sizeof(float), vl);\n auto vec_src1 = rvv::vlse_T(src + (x * 2 - 1) * 3, 6 * sizeof(float), vl);\n auto vec_src2 = rvv::vlse_T(src + (x * 2) * 3, 6 * sizeof(float), vl);\n auto vec_src3 = rvv::vlse_T(src + (x * 2 + 1) * 3, 6 * sizeof(float), vl);\n auto vec_src4 = rvv::vlse_T(src + (x * 2 + 2) * 3, 6 * sizeof(float), vl);\n __riscv_vsse32(row + x * 3, 3 * sizeof(float), __riscv_vfmadd(vec_src2, 6, __riscv_vfmadd(__riscv_vfadd(vec_src1, vec_src3, vl), 4, __riscv_vfadd(vec_src0, vec_src4, vl), vl), vl), vl);\n vec_src0 = rvv::vlse_T(src + (x * 2 - 2) * 3 + 1, 6 * sizeof(float), vl);\n vec_src1 = rvv::vlse_T(src + (x * 2 - 1) * 3 + 1, 6 * sizeof(float), vl);\n vec_src2 = rvv::vlse_T(src + (x * 2) * 3 + 1, 6 * sizeof(float), vl);\n vec_src3 = rvv::vlse_T(src + (x * 2 + 1) * 3 + 1, 6 * sizeof(float), vl);\n vec_src4 = rvv::vlse_T(src + (x * 2 + 2) * 3 + 1, 6 * sizeof(float), vl);\n __riscv_vsse32(row + x * 3 + 1, 3 * sizeof(float), __riscv_vfmadd(vec_src2, 6, __riscv_vfmadd(__riscv_vfadd(vec_src1, vec_src3, vl), 4, __riscv_vfadd(vec_src0, vec_src4, vl), vl), vl), vl);\n vec_src0 = rvv::vlse_T(src + (x * 2 - 2) * 3 + 2, 6 * sizeof(float), vl);\n vec_src1 = rvv::vlse_T(src + (x * 2 - 1) * 3 + 2, 6 * sizeof(float), vl);\n vec_src2 = rvv::vlse_T(src + (x * 2) * 3 + 2, 6 * sizeof(float), vl);\n vec_src3 = rvv::vlse_T(src + (x * 2 + 1) * 3 + 2, 6 * sizeof(float), vl);\n vec_src4 = rvv::vlse_T(src + (x * 2 + 2) * 3 + 2, 6 * sizeof(float), vl);\n __riscv_vsse32(row + x * 3 + 2, 3 * sizeof(float), __riscv_vfmadd(vec_src2, 6, __riscv_vfmadd(__riscv_vfadd(vec_src1, vec_src3, vl), 4, __riscv_vfadd(vec_src0, vec_src4, vl), vl), vl), vl);\n }\n break;\n case 4:\n for( int x = start / 4; x < end / 4; x += vl )\n {\n vl = rvv::vsetvl_WT(end / 4 - x);\n auto vec_src0 = rvv::vlse_T(src + (x * 2 - 2) * 4, 8 * sizeof(float), vl);\n auto vec_src1 = rvv::vlse_T(src + (x * 2 - 1) * 4, 8 * sizeof(float), vl);\n auto vec_src2 = rvv::vlse_T(src + (x * 2) * 4, 8 * sizeof(float), vl);\n auto vec_src3 = rvv::vlse_T(src + (x * 2 + 1) * 4, 8 * sizeof(float), vl);\n auto vec_src4 = rvv::vlse_T(src + (x * 2 + 2) * 4, 8 * sizeof(float), vl);\n __riscv_vsse32(row + x * 4, 4 * sizeof(float), __riscv_vfmadd(vec_src2, 6, __riscv_vfmadd(__riscv_vfadd(vec_src1, vec_src3, vl), 4, __riscv_vfadd(vec_src0, vec_src4, vl), vl), vl), vl);\n vec_src0 = rvv::vlse_T(src + (x * 2 - 2) * 4 + 1, 8 * sizeof(float), vl);\n vec_src1 = rvv::vlse_T(src + (x * 2 - 1) * 4 + 1, 8 * sizeof(float), vl);\n vec_src2 = rvv::vlse_T(src + (x * 2) * 4 + 1, 8 * sizeof(float), vl);\n vec_src3 = rvv::vlse_T(src + (x * 2 + 1) * 4 + 1, 8 * sizeof(float), vl);\n vec_src4 = rvv::vlse_T(src + (x * 2 + 2) * 4 + 1, 8 * sizeof(float), vl);\n __riscv_vsse32(row + x * 4 + 1, 4 * sizeof(float), __riscv_vfmadd(vec_src2, 6, __riscv_vfmadd(__riscv_vfadd(vec_src1, vec_src3, vl), 4, __riscv_vfadd(vec_src0, vec_src4, vl), vl), vl), vl);\n vec_src0 = rvv::vlse_T(src + (x * 2 - 2) * 4 + 2, 8 * sizeof(float), vl);\n vec_src1 = rvv::vlse_T(src + (x * 2 - 1) * 4 + 2, 8 * sizeof(float), vl);\n vec_src2 = rvv::vlse_T(src + (x * 2) * 4 + 2, 8 * sizeof(float), vl);\n vec_src3 = rvv::vlse_T(src + (x * 2 + 1) * 4 + 2, 8 * sizeof(float), vl);\n vec_src4 = rvv::vlse_T(src + (x * 2 + 2) * 4 + 2, 8 * sizeof(float), vl);\n __riscv_vsse32(row + x * 4 + 2, 4 * sizeof(float), __riscv_vfmadd(vec_src2, 6, __riscv_vfmadd(__riscv_vfadd(vec_src1, vec_src3, vl), 4, __riscv_vfadd(vec_src0, vec_src4, vl), vl), vl), vl);\n vec_src0 = rvv::vlse_T(src + (x * 2 - 2) * 4 + 3, 8 * sizeof(float), vl);\n vec_src1 = rvv::vlse_T(src + (x * 2 - 1) * 4 + 3, 8 * sizeof(float), vl);\n vec_src2 = rvv::vlse_T(src + (x * 2) * 4 + 3, 8 * sizeof(float), vl);\n vec_src3 = rvv::vlse_T(src + (x * 2 + 1) * 4 + 3, 8 * sizeof(float), vl);\n vec_src4 = rvv::vlse_T(src + (x * 2 + 2) * 4 + 3, 8 * sizeof(float), vl);\n __riscv_vsse32(row + x * 4 + 3, 4 * sizeof(float), __riscv_vfmadd(vec_src2, 6, __riscv_vfmadd(__riscv_vfadd(vec_src1, vec_src3, vl), 4, __riscv_vfadd(vec_src0, vec_src4, vl), vl), vl), vl);\n }\n break;\n default:\n for( int x = start; x < end; x += vl )\n {\n vl = rvv::vsetvl_WT(end - x);\n auto vec_tabM = rvv::vle_M(tabM + x, vl);\n vec_tabM = __riscv_vmul(__riscv_vsub(vec_tabM, start * 2, vl), sizeof(float), vl);\n auto vec_src0 = rvv::vloxei_T(src, vec_tabM, vl);\n vec_tabM = __riscv_vadd(vec_tabM, start * sizeof(float), vl);\n auto vec_src1 = rvv::vloxei_T(src, vec_tabM, vl);\n vec_tabM = __riscv_vadd(vec_tabM, start * sizeof(float), vl);\n auto vec_src2 = rvv::vloxei_T(src, vec_tabM, vl);\n vec_tabM = __riscv_vadd(vec_tabM, start * sizeof(float), vl);\n auto vec_src3 = rvv::vloxei_T(src, vec_tabM, vl);\n vec_tabM = __riscv_vadd(vec_tabM, start * sizeof(float), vl);\n auto vec_src4 = rvv::vloxei_T(src, vec_tabM, vl);\n __riscv_vse32(row + x, __riscv_vfmadd(__riscv_vfadd(__riscv_vfadd(vec_src1, vec_src2, vl), vec_src3, vl), 4,\n __riscv_vfadd(__riscv_vfadd(vec_src0, vec_src4, vl), __riscv_vfadd(vec_src2, vec_src2, vl), vl), vl), vl);\n }\n }\n }\n};\n\ntemplate struct pyrDownVec1\n{\n void operator()(WT* row0, WT* row1, WT* row2, WT* row3, WT* row4, T* dst, int end)\n {\n int vl;\n for( int x = 0 ; x < end; x += vl )\n {\n vl = rvv::vsetvl_WT(end - x);\n auto vec_src0 = rvv::vle_WT(row0 + x, vl);\n auto vec_src1 = rvv::vle_WT(row1 + x, vl);\n auto vec_src2 = rvv::vle_WT(row2 + x, vl);\n auto vec_src3 = rvv::vle_WT(row3 + x, vl);\n auto vec_src4 = rvv::vle_WT(row4 + x, vl);\n rvv::vse_T(dst + x, rvv::vcvt_WT_T(__riscv_vadd(__riscv_vadd(__riscv_vadd(vec_src0, vec_src4, vl), __riscv_vadd(vec_src2, vec_src2, vl), vl),\n __riscv_vsll(__riscv_vadd(__riscv_vadd(vec_src1, vec_src2, vl), vec_src3, vl), 2, vl), vl), 8, vl), vl);\n }\n }\n};\ntemplate<> struct pyrDownVec1\n{\n void operator()(float* row0, float* row1, float* row2, float* row3, float* row4, float* dst, int end)\n {\n int vl;\n for( int x = 0 ; x < end; x += vl )\n {\n vl = rvv::vsetvl_WT(end - x);\n auto vec_src0 = rvv::vle_WT(row0 + x, vl);\n auto vec_src1 = rvv::vle_WT(row1 + x, vl);\n auto vec_src2 = rvv::vle_WT(row2 + x, vl);\n auto vec_src3 = rvv::vle_WT(row3 + x, vl);\n auto vec_src4 = rvv::vle_WT(row4 + x, vl);\n rvv::vse_T(dst + x, __riscv_vfmul(__riscv_vfmadd(vec_src2, 6, __riscv_vfmadd(__riscv_vfadd(vec_src1, vec_src3, vl), 4, __riscv_vfadd(vec_src0, vec_src4, vl), vl), vl), 1.f / 256.f, vl), vl);\n }\n }\n};\n\ntemplate struct pyrUpVec0\n{\n void operator()(const T* src, WT* row, const uint* dtab, int start, int end)\n {\n int vl;\n for( int x = start; x < end; x += vl )\n {\n vl = rvv::vsetvl_WT(end - x);\n auto vec_src0 = rvv::vcvt_T_WT(rvv::vle_T(src + x - start, vl), vl);\n auto vec_src1 = rvv::vcvt_T_WT(rvv::vle_T(src + x, vl), vl);\n auto vec_src2 = rvv::vcvt_T_WT(rvv::vle_T(src + x + start, vl), vl);\n\n auto vec_dtab = rvv::vle_M(dtab + x, vl);\n vec_dtab = __riscv_vmul(vec_dtab, sizeof(WT), vl);\n __riscv_vsoxei32(row, vec_dtab, __riscv_vadd(__riscv_vadd(vec_src0, vec_src2, vl), __riscv_vadd(__riscv_vsll(vec_src1, 2, vl), __riscv_vsll(vec_src1, 1, vl), vl), vl), vl);\n __riscv_vsoxei32(row, __riscv_vadd(vec_dtab, start * sizeof(WT), vl), __riscv_vsll(__riscv_vadd(vec_src1, vec_src2, vl), 2, vl), vl);\n }\n }\n};\ntemplate<> struct pyrUpVec0\n{\n void operator()(const float* src, float* row, const uint* dtab, int start, int end)\n {\n int vl;\n for( int x = start; x < end; x += vl )\n {\n vl = rvv::vsetvl_WT(end - x);\n auto vec_src0 = rvv::vle_T(src + x - start, vl);\n auto vec_src1 = rvv::vle_T(src + x, vl);\n auto vec_src2 = rvv::vle_T(src + x + start, vl);\n\n auto vec_dtab = rvv::vle_M(dtab + x, vl);\n vec_dtab = __riscv_vmul(vec_dtab, sizeof(float), vl);\n __riscv_vsoxei32(row, vec_dtab, __riscv_vfadd(__riscv_vfmadd(vec_src1, 6, vec_src0, vl), vec_src2, vl), vl);\n __riscv_vsoxei32(row, __riscv_vadd(vec_dtab, start * sizeof(float), vl), __riscv_vfmul(__riscv_vfadd(vec_src1, vec_src2, vl), 4, vl), vl);\n }\n }\n};\n\ntemplate struct pyrUpVec1\n{\n void operator()(WT* row0, WT* row1, WT* row2, T* dst0, T* dst1, int end)\n {\n int vl;\n if (dst0 != dst1)\n {\n for( int x = 0 ; x < end; x += vl )\n {\n vl = rvv::vsetvl_WT(end - x);\n auto vec_src0 = rvv::vle_WT(row0 + x, vl);\n auto vec_src1 = rvv::vle_WT(row1 + x, vl);\n auto vec_src2 = rvv::vle_WT(row2 + x, vl);\n rvv::vse_T(dst0 + x, rvv::vcvt_WT_T(__riscv_vadd(__riscv_vadd(vec_src0, vec_src2, vl), __riscv_vadd(__riscv_vsll(vec_src1, 2, vl), __riscv_vsll(vec_src1, 1, vl), vl), vl), 6, vl), vl);\n rvv::vse_T(dst1 + x, rvv::vcvt_WT_T(__riscv_vsll(__riscv_vadd(vec_src1, vec_src2, vl), 2, vl), 6, vl), vl);\n }\n }\n else\n {\n for( int x = 0 ; x < end; x += vl )\n {\n vl = rvv::vsetvl_WT(end - x);\n auto vec_src0 = rvv::vle_WT(row0 + x, vl);\n auto vec_src1 = rvv::vle_WT(row1 + x, vl);\n auto vec_src2 = rvv::vle_WT(row2 + x, vl);\n rvv::vse_T(dst0 + x, rvv::vcvt_WT_T(__riscv_vadd(__riscv_vadd(vec_src0, vec_src2, vl), __riscv_vadd(__riscv_vsll(vec_src1, 2, vl), __riscv_vsll(vec_src1, 1, vl), vl), vl), 6, vl), vl);\n }\n }\n }\n};\ntemplate<> struct pyrUpVec1\n{\n void operator()(float* row0, float* row1, float* row2, float* dst0, float* dst1, int end)\n {\n int vl;\n if (dst0 != dst1)\n {\n for( int x = 0 ; x < end; x += vl )\n {\n vl = rvv::vsetvl_WT(end - x);\n auto vec_src0 = rvv::vle_WT(row0 + x, vl);\n auto vec_src1 = rvv::vle_WT(row1 + x, vl);\n auto vec_src2 = rvv::vle_WT(row2 + x, vl);\n rvv::vse_T(dst0 + x, __riscv_vfmul(__riscv_vfadd(__riscv_vfmadd(vec_src1, 6, vec_src0, vl), vec_src2, vl), 1.f / 64.f, vl), vl);\n rvv::vse_T(dst1 + x, __riscv_vfmul(__riscv_vfadd(vec_src1, vec_src2, vl), 1.f / 16.f, vl), vl);\n }\n }\n else\n {\n for( int x = 0 ; x < end; x += vl )\n {\n vl = rvv::vsetvl_WT(end - x);\n auto vec_src0 = rvv::vle_WT(row0 + x, vl);\n auto vec_src1 = rvv::vle_WT(row1 + x, vl);\n auto vec_src2 = rvv::vle_WT(row2 + x, vl);\n rvv::vse_T(dst0 + x, __riscv_vfmul(__riscv_vfadd(__riscv_vfmadd(vec_src1, 6, vec_src0, vl), vec_src2, vl), 1.f / 64.f, vl), vl);\n }\n }\n }\n};\n\ntemplate\nstruct PyrDownInvoker : ParallelLoopBody\n{\n PyrDownInvoker(const uchar* _src_data, size_t _src_step, int _src_width, int _src_height, uchar* _dst_data, size_t _dst_step, int _dst_width, int _dst_height, int _cn, int _borderType, int* _tabR, int* _tabM, int* _tabL)\n {\n src_data = _src_data;\n src_step = _src_step;\n src_width = _src_width;\n src_height = _src_height;\n dst_data = _dst_data;\n dst_step = _dst_step;\n dst_width = _dst_width;\n dst_height = _dst_height;\n cn = _cn;\n borderType = _borderType;\n tabR = _tabR;\n tabM = _tabM;\n tabL = _tabL;\n }\n\n void operator()(const Range& range) const CV_OVERRIDE;\n\n const uchar* src_data;\n size_t src_step;\n int src_width;\n int src_height;\n uchar* dst_data;\n size_t dst_step;\n int dst_width;\n int dst_height;\n int cn;\n int borderType;\n int* tabR;\n int* tabM;\n int* tabL;\n};\n\n// the algorithm is copied from imgproc/src/pyramids.cpp,\n// in the function template void cv::pyrDown_\ntemplate\ninline int pyrDown(const uchar* src_data, size_t src_step, int src_width, int src_height, uchar* dst_data, size_t dst_step, int dst_width, int dst_height, int cn, int borderType)\n{\n const int PD_SZ = 5;\n\n std::vector _tabM(dst_width * cn), _tabL(cn * (PD_SZ + 2)), _tabR(cn * (PD_SZ + 2));\n int *tabM = _tabM.data(), *tabL = _tabL.data(), *tabR = _tabR.data();\n\n CV_Assert( src_width > 0 && src_height > 0 &&\n std::abs(dst_width*2 - src_width) <= 2 &&\n std::abs(dst_height*2 - src_height) <= 2 );\n int width0 = std::min((src_width-PD_SZ/2-1)/2 + 1, dst_width);\n\n for (int x = 0; x <= PD_SZ+1; x++)\n {\n int sx0 = borderInterpolate(x - PD_SZ/2, src_width, borderType)*cn;\n int sx1 = borderInterpolate(x + width0*2 - PD_SZ/2, src_width, borderType)*cn;\n for (int k = 0; k < cn; k++)\n {\n tabL[x*cn + k] = sx0 + k;\n tabR[x*cn + k] = sx1 + k;\n }\n }\n\n for (int x = 0; x < dst_width*cn; x++)\n tabM[x] = (x/cn)*2*cn + x % cn;\n\n cv::parallel_for_(Range(0,dst_height), PyrDownInvoker(src_data, src_step, src_width, src_height, dst_data, dst_step, dst_width, dst_height, cn, borderType, tabR, tabM, tabL), cv::getNumThreads());\n return CV_HAL_ERROR_OK;\n}\n\ntemplate\nvoid PyrDownInvoker::operator()(const Range& range) const\n{\n const int PD_SZ = 5;\n\n int bufstep = (dst_width*cn + 15) & -16;\n std::vector _buf(bufstep*PD_SZ + 16);\n WT* buf = (WT*)(((size_t)_buf.data() + 15) & -16);\n WT* rows[PD_SZ];\n\n int sy0 = -PD_SZ/2, sy = range.start * 2 + sy0, width0 = std::min((src_width-PD_SZ/2-1)/2 + 1, dst_width);\n\n int _dst_width = dst_width * cn;\n width0 *= cn;\n\n for (int y = range.start; y < range.end; y++)\n {\n T* dst = reinterpret_cast(dst_data + dst_step * y);\n WT *row0, *row1, *row2, *row3, *row4;\n\n // fill the ring buffer (horizontal convolution and decimation)\n int sy_limit = y*2 + 2;\n for( ; sy <= sy_limit; sy++ )\n {\n WT* row = buf + ((sy - sy0) % PD_SZ)*bufstep;\n int _sy = borderInterpolate(sy, src_height, borderType);\n const T* src = reinterpret_cast(src_data + src_step * _sy);\n\n do {\n int x = 0;\n for( ; x < cn; x++ )\n {\n row[x] = src[tabL[x+cn*2]]*6 + (src[tabL[x+cn]] + src[tabL[x+cn*3]])*4 +\n src[tabL[x]] + src[tabL[x+cn*4]];\n }\n\n if( x == _dst_width )\n break;\n\n pyrDownVec0()(src, row, reinterpret_cast(tabM), cn, width0);\n x = width0;\n\n // tabR\n for (int x_ = 0; x < _dst_width; x++, x_++)\n {\n row[x] = src[tabR[x_+cn*2]]*6 + (src[tabR[x_+cn]] + src[tabR[x_+cn*3]])*4 +\n src[tabR[x_]] + src[tabR[x_+cn*4]];\n }\n } while (0);\n }\n\n // do vertical convolution and decimation and write the result to the destination image\n for (int k = 0; k < PD_SZ; k++)\n rows[k] = buf + ((y*2 - PD_SZ/2 + k - sy0) % PD_SZ)*bufstep;\n row0 = rows[0]; row1 = rows[1]; row2 = rows[2]; row3 = rows[3]; row4 = rows[4];\n\n pyrDownVec1()(row0, row1, row2, row3, row4, dst, _dst_width);\n }\n}\n\n// the algorithm is copied from imgproc/src/pyramids.cpp,\n// in the function template void cv::pyrUp_\ntemplate\ninline int pyrUp(const uchar* src_data, size_t src_step, int src_width, int src_height, uchar* dst_data, size_t dst_step, int dst_width, int dst_height, int cn)\n{\n const int PU_SZ = 3;\n\n int bufstep = ((dst_width+1)*cn + 15) & -16;\n std::vector _buf(bufstep*PU_SZ + 16);\n WT* buf = (WT*)(((size_t)_buf.data() + 15) & -16);\n std::vector _dtab(src_width*cn);\n int* dtab = _dtab.data();\n WT* rows[PU_SZ];\n\n CV_Assert( std::abs(dst_width - src_width*2) == dst_width % 2 &&", - "repo_full_name": "opencv/opencv", - "discussion_comments": [ - { - "comment_id": "1975939795", - "repo_full_name": "opencv/opencv", - "pr_number": 26958, - "pr_file": "3rdparty/hal_rvv/hal_rvv_1p0/pyramids.hpp", - "discussion_id": "1975939795", - "commented_code": "@@ -0,0 +1,681 @@\n+// This file is part of OpenCV project.\n+// It is subject to the license terms in the LICENSE file found in the top-level directory\n+// of this distribution and at http://opencv.org/license.html.\n+#ifndef OPENCV_HAL_RVV_PYRAMIDS_HPP_INCLUDED\n+#define OPENCV_HAL_RVV_PYRAMIDS_HPP_INCLUDED\n+\n+#include \n+\n+namespace cv { namespace cv_hal_rvv { namespace pyramids {\n+\n+#undef cv_hal_pyrdown\n+#define cv_hal_pyrdown cv::cv_hal_rvv::pyramids::pyrDown\n+#undef cv_hal_pyrup\n+#define cv_hal_pyrup cv::cv_hal_rvv::pyramids::pyrUp\n+\n+template struct rvv;\n+\n+template<> struct rvv\n+{\n+ static inline size_t vsetvl_WT(size_t a) { return __riscv_vsetvl_e32m4(a); }\n+ static inline vuint8m1_t vle_T(const uchar* a, size_t b) { return __riscv_vle8_v_u8m1(a, b); }\n+ static inline vint32m4_t vle_WT(const int* a, size_t b) { return __riscv_vle32_v_i32m4(a, b); }\n+ static inline vuint32m4_t vle_M(const uint* a, size_t b) { return __riscv_vle32_v_u32m4(a, b); }\n+ static inline vuint8m1_t vlse_T(const uchar* a, ptrdiff_t b, size_t c) { return __riscv_vlse8_v_u8m1(a, b, c); }\n+ static inline vuint8m1_t vloxei_T(const uchar* a, vuint32m4_t b, size_t c) { return __riscv_vloxei32_v_u8m1(a, b, c); }\n+ static inline void vse_T(uchar* a, vuint8m1_t b, size_t c) { return __riscv_vse8(a, b, c); }\n+ static inline vint32m4_t vcvt_T_WT(vuint8m1_t a, size_t b) { return __riscv_vreinterpret_v_u32m4_i32m4(__riscv_vzext_vf4(a, b)); }\n+ static inline vuint8m1_t vcvt_WT_T(vint32m4_t a, int b, size_t c) { return __riscv_vncvt_x(__riscv_vncvt_x(__riscv_vreinterpret_v_i32m4_u32m4(__riscv_vsra(__riscv_vadd(a, 1 << (b - 1), c), b, c)), c), c); }\n+};\n+\n+template<> struct rvv\n+{\n+ static inline size_t vsetvl_WT(size_t a) { return __riscv_vsetvl_e32m4(a); }\n+ static inline vint16m2_t vle_T(const short* a, size_t b) { return __riscv_vle16_v_i16m2(a, b); }\n+ static inline vint32m4_t vle_WT(const int* a, size_t b) { return __riscv_vle32_v_i32m4(a, b); }\n+ static inline vuint32m4_t vle_M(const uint* a, size_t b) { return __riscv_vle32_v_u32m4(a, b); }\n+ static inline vint16m2_t vlse_T(const short* a, ptrdiff_t b, size_t c) { return __riscv_vlse16_v_i16m2(a, b, c); }\n+ static inline vint16m2_t vloxei_T(const short* a, vuint32m4_t b, size_t c) { return __riscv_vloxei32_v_i16m2(a, b, c); }\n+ static inline void vse_T(short* a, vint16m2_t b, size_t c) { return __riscv_vse16(a, b, c); }\n+ static inline vint32m4_t vcvt_T_WT(vint16m2_t a, size_t b) { return __riscv_vsext_vf2(a, b); }\n+ static inline vint16m2_t vcvt_WT_T(vint32m4_t a, int b, size_t c) { return __riscv_vncvt_x(__riscv_vsra(__riscv_vadd(a, 1 << (b - 1), c), b, c), c); }\n+};\n+\n+template<> struct rvv\n+{\n+ static inline size_t vsetvl_WT(size_t a) { return __riscv_vsetvl_e32m4(a); }\n+ static inline vfloat32m4_t vle_T(const float* a, size_t b) { return __riscv_vle32_v_f32m4(a, b); }\n+ static inline vfloat32m4_t vle_WT(const float* a, size_t b) { return __riscv_vle32_v_f32m4(a, b); }\n+ static inline vuint32m4_t vle_M(const uint* a, size_t b) { return __riscv_vle32_v_u32m4(a, b); }\n+ static inline vfloat32m4_t vlse_T(const float* a, ptrdiff_t b, size_t c) { return __riscv_vlse32_v_f32m4(a, b, c); }\n+ static inline vfloat32m4_t vloxei_T(const float* a, vuint32m4_t b, size_t c) { return __riscv_vloxei32_v_f32m4(a, b, c); }\n+ static inline void vse_T(float* a, vfloat32m4_t b, size_t c) { return __riscv_vse32(a, b, c); }\n+};\n+\n+template struct pyrDownVec0\n+{\n+ void operator()(const T* src, WT* row, const uint* tabM, int start, int end)\n+ {\n+ int vl;\n+ switch (start)\n+ {\n+ case 1:\n+ for( int x = start; x < end; x += vl )\n+ {\n+ vl = rvv::vsetvl_WT(end - x);\n+ auto vec_src0 = rvv::vcvt_T_WT(rvv::vlse_T(src + x * 2 - 2, 2 * sizeof(T), vl), vl);\n+ auto vec_src1 = rvv::vcvt_T_WT(rvv::vlse_T(src + x * 2 - 1, 2 * sizeof(T), vl), vl);\n+ auto vec_src2 = rvv::vcvt_T_WT(rvv::vlse_T(src + x * 2, 2 * sizeof(T), vl), vl);\n+ auto vec_src3 = rvv::vcvt_T_WT(rvv::vlse_T(src + x * 2 + 1, 2 * sizeof(T), vl), vl);\n+ auto vec_src4 = rvv::vcvt_T_WT(rvv::vlse_T(src + x * 2 + 2, 2 * sizeof(T), vl), vl);\n+ __riscv_vse32(row + x, __riscv_vadd(__riscv_vadd(__riscv_vadd(vec_src0, vec_src4, vl), __riscv_vadd(vec_src2, vec_src2, vl), vl),\n+ __riscv_vsll(__riscv_vadd(__riscv_vadd(vec_src1, vec_src2, vl), vec_src3, vl), 2, vl), vl), vl);\n+ }\n+ break;\n+ case 2:\n+ for( int x = start / 2; x < end / 2; x += vl )\n+ {\n+ vl = rvv::vsetvl_WT(end / 2 - x);\n+ auto vec_src0 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 - 2) * 2, 4 * sizeof(T), vl), vl);\n+ auto vec_src1 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 - 1) * 2, 4 * sizeof(T), vl), vl);\n+ auto vec_src2 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2) * 2, 4 * sizeof(T), vl), vl);\n+ auto vec_src3 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 + 1) * 2, 4 * sizeof(T), vl), vl);\n+ auto vec_src4 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 + 2) * 2, 4 * sizeof(T), vl), vl);\n+ __riscv_vsse32(row + x * 2, 2 * sizeof(WT), __riscv_vadd(__riscv_vadd(__riscv_vadd(vec_src0, vec_src4, vl), __riscv_vadd(vec_src2, vec_src2, vl), vl),\n+ __riscv_vsll(__riscv_vadd(__riscv_vadd(vec_src1, vec_src2, vl), vec_src3, vl), 2, vl), vl), vl);\n+ vec_src0 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 - 2) * 2 + 1, 4 * sizeof(T), vl), vl);\n+ vec_src1 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 - 1) * 2 + 1, 4 * sizeof(T), vl), vl);\n+ vec_src2 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2) * 2 + 1, 4 * sizeof(T), vl), vl);\n+ vec_src3 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 + 1) * 2 + 1, 4 * sizeof(T), vl), vl);\n+ vec_src4 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 + 2) * 2 + 1, 4 * sizeof(T), vl), vl);\n+ __riscv_vsse32(row + x * 2 + 1, 2 * sizeof(WT), __riscv_vadd(__riscv_vadd(__riscv_vadd(vec_src0, vec_src4, vl), __riscv_vadd(vec_src2, vec_src2, vl), vl),\n+ __riscv_vsll(__riscv_vadd(__riscv_vadd(vec_src1, vec_src2, vl), vec_src3, vl), 2, vl), vl), vl);\n+ }\n+ break;\n+ case 3:\n+ for( int x = start / 3; x < end / 3; x += vl )\n+ {\n+ vl = rvv::vsetvl_WT(end / 3 - x);\n+ auto vec_src0 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 - 2) * 3, 6 * sizeof(T), vl), vl);\n+ auto vec_src1 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 - 1) * 3, 6 * sizeof(T), vl), vl);\n+ auto vec_src2 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2) * 3, 6 * sizeof(T), vl), vl);\n+ auto vec_src3 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 + 1) * 3, 6 * sizeof(T), vl), vl);\n+ auto vec_src4 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 + 2) * 3, 6 * sizeof(T), vl), vl);\n+ __riscv_vsse32(row + x * 3, 3 * sizeof(WT), __riscv_vadd(__riscv_vadd(__riscv_vadd(vec_src0, vec_src4, vl), __riscv_vadd(vec_src2, vec_src2, vl), vl),\n+ __riscv_vsll(__riscv_vadd(__riscv_vadd(vec_src1, vec_src2, vl), vec_src3, vl), 2, vl), vl), vl);\n+ vec_src0 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 - 2) * 3 + 1, 6 * sizeof(T), vl), vl);\n+ vec_src1 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 - 1) * 3 + 1, 6 * sizeof(T), vl), vl);\n+ vec_src2 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2) * 3 + 1, 6 * sizeof(T), vl), vl);\n+ vec_src3 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 + 1) * 3 + 1, 6 * sizeof(T), vl), vl);\n+ vec_src4 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 + 2) * 3 + 1, 6 * sizeof(T), vl), vl);\n+ __riscv_vsse32(row + x * 3 + 1, 3 * sizeof(WT), __riscv_vadd(__riscv_vadd(__riscv_vadd(vec_src0, vec_src4, vl), __riscv_vadd(vec_src2, vec_src2, vl), vl),\n+ __riscv_vsll(__riscv_vadd(__riscv_vadd(vec_src1, vec_src2, vl), vec_src3, vl), 2, vl), vl), vl);\n+ vec_src0 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 - 2) * 3 + 2, 6 * sizeof(T), vl), vl);\n+ vec_src1 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 - 1) * 3 + 2, 6 * sizeof(T), vl), vl);\n+ vec_src2 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2) * 3 + 2, 6 * sizeof(T), vl), vl);\n+ vec_src3 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 + 1) * 3 + 2, 6 * sizeof(T), vl), vl);\n+ vec_src4 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 + 2) * 3 + 2, 6 * sizeof(T), vl), vl);\n+ __riscv_vsse32(row + x * 3 + 2, 3 * sizeof(WT), __riscv_vadd(__riscv_vadd(__riscv_vadd(vec_src0, vec_src4, vl), __riscv_vadd(vec_src2, vec_src2, vl), vl),\n+ __riscv_vsll(__riscv_vadd(__riscv_vadd(vec_src1, vec_src2, vl), vec_src3, vl), 2, vl), vl), vl);\n+ }\n+ break;\n+ case 4:\n+ for( int x = start / 4; x < end / 4; x += vl )\n+ {\n+ vl = rvv::vsetvl_WT(end / 4 - x);\n+ auto vec_src0 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 - 2) * 4, 8 * sizeof(T), vl), vl);\n+ auto vec_src1 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 - 1) * 4, 8 * sizeof(T), vl), vl);\n+ auto vec_src2 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2) * 4, 8 * sizeof(T), vl), vl);\n+ auto vec_src3 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 + 1) * 4, 8 * sizeof(T), vl), vl);\n+ auto vec_src4 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 + 2) * 4, 8 * sizeof(T), vl), vl);\n+ __riscv_vsse32(row + x * 4, 4 * sizeof(WT), __riscv_vadd(__riscv_vadd(__riscv_vadd(vec_src0, vec_src4, vl), __riscv_vadd(vec_src2, vec_src2, vl), vl),\n+ __riscv_vsll(__riscv_vadd(__riscv_vadd(vec_src1, vec_src2, vl), vec_src3, vl), 2, vl), vl), vl);\n+ vec_src0 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 - 2) * 4 + 1, 8 * sizeof(T), vl), vl);\n+ vec_src1 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 - 1) * 4 + 1, 8 * sizeof(T), vl), vl);\n+ vec_src2 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2) * 4 + 1, 8 * sizeof(T), vl), vl);\n+ vec_src3 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 + 1) * 4 + 1, 8 * sizeof(T), vl), vl);\n+ vec_src4 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 + 2) * 4 + 1, 8 * sizeof(T), vl), vl);\n+ __riscv_vsse32(row + x * 4 + 1, 4 * sizeof(WT), __riscv_vadd(__riscv_vadd(__riscv_vadd(vec_src0, vec_src4, vl), __riscv_vadd(vec_src2, vec_src2, vl), vl),\n+ __riscv_vsll(__riscv_vadd(__riscv_vadd(vec_src1, vec_src2, vl), vec_src3, vl), 2, vl), vl), vl);\n+ vec_src0 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 - 2) * 4 + 2, 8 * sizeof(T), vl), vl);\n+ vec_src1 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 - 1) * 4 + 2, 8 * sizeof(T), vl), vl);\n+ vec_src2 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2) * 4 + 2, 8 * sizeof(T), vl), vl);\n+ vec_src3 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 + 1) * 4 + 2, 8 * sizeof(T), vl), vl);\n+ vec_src4 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 + 2) * 4 + 2, 8 * sizeof(T), vl), vl);\n+ __riscv_vsse32(row + x * 4 + 2, 4 * sizeof(WT), __riscv_vadd(__riscv_vadd(__riscv_vadd(vec_src0, vec_src4, vl), __riscv_vadd(vec_src2, vec_src2, vl), vl),\n+ __riscv_vsll(__riscv_vadd(__riscv_vadd(vec_src1, vec_src2, vl), vec_src3, vl), 2, vl), vl), vl);\n+ vec_src0 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 - 2) * 4 + 3, 8 * sizeof(T), vl), vl);\n+ vec_src1 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 - 1) * 4 + 3, 8 * sizeof(T), vl), vl);\n+ vec_src2 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2) * 4 + 3, 8 * sizeof(T), vl), vl);\n+ vec_src3 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 + 1) * 4 + 3, 8 * sizeof(T), vl), vl);\n+ vec_src4 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 + 2) * 4 + 3, 8 * sizeof(T), vl), vl);\n+ __riscv_vsse32(row + x * 4 + 3, 4 * sizeof(WT), __riscv_vadd(__riscv_vadd(__riscv_vadd(vec_src0, vec_src4, vl), __riscv_vadd(vec_src2, vec_src2, vl), vl),\n+ __riscv_vsll(__riscv_vadd(__riscv_vadd(vec_src1, vec_src2, vl), vec_src3, vl), 2, vl), vl), vl);\n+ }\n+ break;\n+ default:\n+ for( int x = start; x < end; x += vl )\n+ {\n+ vl = rvv::vsetvl_WT(end - x);\n+ auto vec_tabM = rvv::vle_M(tabM + x, vl);\n+ vec_tabM = __riscv_vmul(__riscv_vsub(vec_tabM, start * 2, vl), sizeof(T), vl);\n+ auto vec_src0 = rvv::vcvt_T_WT(rvv::vloxei_T(src, vec_tabM, vl), vl);\n+ vec_tabM = __riscv_vadd(vec_tabM, start * sizeof(T), vl);\n+ auto vec_src1 = rvv::vcvt_T_WT(rvv::vloxei_T(src, vec_tabM, vl), vl);\n+ vec_tabM = __riscv_vadd(vec_tabM, start * sizeof(T), vl);\n+ auto vec_src2 = rvv::vcvt_T_WT(rvv::vloxei_T(src, vec_tabM, vl), vl);\n+ vec_tabM = __riscv_vadd(vec_tabM, start * sizeof(T), vl);\n+ auto vec_src3 = rvv::vcvt_T_WT(rvv::vloxei_T(src, vec_tabM, vl), vl);\n+ vec_tabM = __riscv_vadd(vec_tabM, start * sizeof(T), vl);\n+ auto vec_src4 = rvv::vcvt_T_WT(rvv::vloxei_T(src, vec_tabM, vl), vl);\n+ __riscv_vse32(row + x, __riscv_vadd(__riscv_vadd(__riscv_vadd(vec_src0, vec_src4, vl), __riscv_vadd(vec_src2, vec_src2, vl), vl),\n+ __riscv_vsll(__riscv_vadd(__riscv_vadd(vec_src1, vec_src2, vl), vec_src3, vl), 2, vl), vl), vl);\n+ }\n+ }\n+ }\n+};\n+template<> struct pyrDownVec0\n+{\n+ void operator()(const float* src, float* row, const uint* tabM, int start, int end)\n+ {\n+ int vl;\n+ switch (start)\n+ {\n+ case 1:\n+ for( int x = start; x < end; x += vl )\n+ {\n+ vl = rvv::vsetvl_WT(end - x);\n+ auto vec_src0 = rvv::vlse_T(src + x * 2 - 2, 2 * sizeof(float), vl);\n+ auto vec_src1 = rvv::vlse_T(src + x * 2 - 1, 2 * sizeof(float), vl);\n+ auto vec_src2 = rvv::vlse_T(src + x * 2, 2 * sizeof(float), vl);\n+ auto vec_src3 = rvv::vlse_T(src + x * 2 + 1, 2 * sizeof(float), vl);\n+ auto vec_src4 = rvv::vlse_T(src + x * 2 + 2, 2 * sizeof(float), vl);\n+ __riscv_vse32(row + x, __riscv_vfmadd(vec_src2, 6, __riscv_vfmadd(__riscv_vfadd(vec_src1, vec_src3, vl), 4, __riscv_vfadd(vec_src0, vec_src4, vl), vl), vl), vl);\n+ }\n+ break;\n+ case 2:\n+ for( int x = start / 2; x < end / 2; x += vl )\n+ {\n+ vl = rvv::vsetvl_WT(end / 2 - x);\n+ auto vec_src0 = rvv::vlse_T(src + (x * 2 - 2) * 2, 4 * sizeof(float), vl);\n+ auto vec_src1 = rvv::vlse_T(src + (x * 2 - 1) * 2, 4 * sizeof(float), vl);\n+ auto vec_src2 = rvv::vlse_T(src + (x * 2) * 2, 4 * sizeof(float), vl);\n+ auto vec_src3 = rvv::vlse_T(src + (x * 2 + 1) * 2, 4 * sizeof(float), vl);\n+ auto vec_src4 = rvv::vlse_T(src + (x * 2 + 2) * 2, 4 * sizeof(float), vl);\n+ __riscv_vsse32(row + x * 2, 2 * sizeof(float), __riscv_vfmadd(vec_src2, 6, __riscv_vfmadd(__riscv_vfadd(vec_src1, vec_src3, vl), 4, __riscv_vfadd(vec_src0, vec_src4, vl), vl), vl), vl);\n+ vec_src0 = rvv::vlse_T(src + (x * 2 - 2) * 2 + 1, 4 * sizeof(float), vl);\n+ vec_src1 = rvv::vlse_T(src + (x * 2 - 1) * 2 + 1, 4 * sizeof(float), vl);\n+ vec_src2 = rvv::vlse_T(src + (x * 2) * 2 + 1, 4 * sizeof(float), vl);\n+ vec_src3 = rvv::vlse_T(src + (x * 2 + 1) * 2 + 1, 4 * sizeof(float), vl);\n+ vec_src4 = rvv::vlse_T(src + (x * 2 + 2) * 2 + 1, 4 * sizeof(float), vl);\n+ __riscv_vsse32(row + x * 2 + 1, 2 * sizeof(float), __riscv_vfmadd(vec_src2, 6, __riscv_vfmadd(__riscv_vfadd(vec_src1, vec_src3, vl), 4, __riscv_vfadd(vec_src0, vec_src4, vl), vl), vl), vl);\n+ }\n+ break;\n+ case 3:\n+ for( int x = start / 3; x < end / 3; x += vl )\n+ {\n+ vl = rvv::vsetvl_WT(end / 3 - x);\n+ auto vec_src0 = rvv::vlse_T(src + (x * 2 - 2) * 3, 6 * sizeof(float), vl);\n+ auto vec_src1 = rvv::vlse_T(src + (x * 2 - 1) * 3, 6 * sizeof(float), vl);\n+ auto vec_src2 = rvv::vlse_T(src + (x * 2) * 3, 6 * sizeof(float), vl);\n+ auto vec_src3 = rvv::vlse_T(src + (x * 2 + 1) * 3, 6 * sizeof(float), vl);\n+ auto vec_src4 = rvv::vlse_T(src + (x * 2 + 2) * 3, 6 * sizeof(float), vl);\n+ __riscv_vsse32(row + x * 3, 3 * sizeof(float), __riscv_vfmadd(vec_src2, 6, __riscv_vfmadd(__riscv_vfadd(vec_src1, vec_src3, vl), 4, __riscv_vfadd(vec_src0, vec_src4, vl), vl), vl), vl);\n+ vec_src0 = rvv::vlse_T(src + (x * 2 - 2) * 3 + 1, 6 * sizeof(float), vl);\n+ vec_src1 = rvv::vlse_T(src + (x * 2 - 1) * 3 + 1, 6 * sizeof(float), vl);\n+ vec_src2 = rvv::vlse_T(src + (x * 2) * 3 + 1, 6 * sizeof(float), vl);\n+ vec_src3 = rvv::vlse_T(src + (x * 2 + 1) * 3 + 1, 6 * sizeof(float), vl);\n+ vec_src4 = rvv::vlse_T(src + (x * 2 + 2) * 3 + 1, 6 * sizeof(float), vl);\n+ __riscv_vsse32(row + x * 3 + 1, 3 * sizeof(float), __riscv_vfmadd(vec_src2, 6, __riscv_vfmadd(__riscv_vfadd(vec_src1, vec_src3, vl), 4, __riscv_vfadd(vec_src0, vec_src4, vl), vl), vl), vl);\n+ vec_src0 = rvv::vlse_T(src + (x * 2 - 2) * 3 + 2, 6 * sizeof(float), vl);\n+ vec_src1 = rvv::vlse_T(src + (x * 2 - 1) * 3 + 2, 6 * sizeof(float), vl);\n+ vec_src2 = rvv::vlse_T(src + (x * 2) * 3 + 2, 6 * sizeof(float), vl);\n+ vec_src3 = rvv::vlse_T(src + (x * 2 + 1) * 3 + 2, 6 * sizeof(float), vl);\n+ vec_src4 = rvv::vlse_T(src + (x * 2 + 2) * 3 + 2, 6 * sizeof(float), vl);\n+ __riscv_vsse32(row + x * 3 + 2, 3 * sizeof(float), __riscv_vfmadd(vec_src2, 6, __riscv_vfmadd(__riscv_vfadd(vec_src1, vec_src3, vl), 4, __riscv_vfadd(vec_src0, vec_src4, vl), vl), vl), vl);\n+ }\n+ break;\n+ case 4:\n+ for( int x = start / 4; x < end / 4; x += vl )\n+ {\n+ vl = rvv::vsetvl_WT(end / 4 - x);\n+ auto vec_src0 = rvv::vlse_T(src + (x * 2 - 2) * 4, 8 * sizeof(float), vl);\n+ auto vec_src1 = rvv::vlse_T(src + (x * 2 - 1) * 4, 8 * sizeof(float), vl);\n+ auto vec_src2 = rvv::vlse_T(src + (x * 2) * 4, 8 * sizeof(float), vl);\n+ auto vec_src3 = rvv::vlse_T(src + (x * 2 + 1) * 4, 8 * sizeof(float), vl);\n+ auto vec_src4 = rvv::vlse_T(src + (x * 2 + 2) * 4, 8 * sizeof(float), vl);\n+ __riscv_vsse32(row + x * 4, 4 * sizeof(float), __riscv_vfmadd(vec_src2, 6, __riscv_vfmadd(__riscv_vfadd(vec_src1, vec_src3, vl), 4, __riscv_vfadd(vec_src0, vec_src4, vl), vl), vl), vl);\n+ vec_src0 = rvv::vlse_T(src + (x * 2 - 2) * 4 + 1, 8 * sizeof(float), vl);\n+ vec_src1 = rvv::vlse_T(src + (x * 2 - 1) * 4 + 1, 8 * sizeof(float), vl);\n+ vec_src2 = rvv::vlse_T(src + (x * 2) * 4 + 1, 8 * sizeof(float), vl);\n+ vec_src3 = rvv::vlse_T(src + (x * 2 + 1) * 4 + 1, 8 * sizeof(float), vl);\n+ vec_src4 = rvv::vlse_T(src + (x * 2 + 2) * 4 + 1, 8 * sizeof(float), vl);\n+ __riscv_vsse32(row + x * 4 + 1, 4 * sizeof(float), __riscv_vfmadd(vec_src2, 6, __riscv_vfmadd(__riscv_vfadd(vec_src1, vec_src3, vl), 4, __riscv_vfadd(vec_src0, vec_src4, vl), vl), vl), vl);\n+ vec_src0 = rvv::vlse_T(src + (x * 2 - 2) * 4 + 2, 8 * sizeof(float), vl);\n+ vec_src1 = rvv::vlse_T(src + (x * 2 - 1) * 4 + 2, 8 * sizeof(float), vl);\n+ vec_src2 = rvv::vlse_T(src + (x * 2) * 4 + 2, 8 * sizeof(float), vl);\n+ vec_src3 = rvv::vlse_T(src + (x * 2 + 1) * 4 + 2, 8 * sizeof(float), vl);\n+ vec_src4 = rvv::vlse_T(src + (x * 2 + 2) * 4 + 2, 8 * sizeof(float), vl);\n+ __riscv_vsse32(row + x * 4 + 2, 4 * sizeof(float), __riscv_vfmadd(vec_src2, 6, __riscv_vfmadd(__riscv_vfadd(vec_src1, vec_src3, vl), 4, __riscv_vfadd(vec_src0, vec_src4, vl), vl), vl), vl);\n+ vec_src0 = rvv::vlse_T(src + (x * 2 - 2) * 4 + 3, 8 * sizeof(float), vl);\n+ vec_src1 = rvv::vlse_T(src + (x * 2 - 1) * 4 + 3, 8 * sizeof(float), vl);\n+ vec_src2 = rvv::vlse_T(src + (x * 2) * 4 + 3, 8 * sizeof(float), vl);\n+ vec_src3 = rvv::vlse_T(src + (x * 2 + 1) * 4 + 3, 8 * sizeof(float), vl);\n+ vec_src4 = rvv::vlse_T(src + (x * 2 + 2) * 4 + 3, 8 * sizeof(float), vl);\n+ __riscv_vsse32(row + x * 4 + 3, 4 * sizeof(float), __riscv_vfmadd(vec_src2, 6, __riscv_vfmadd(__riscv_vfadd(vec_src1, vec_src3, vl), 4, __riscv_vfadd(vec_src0, vec_src4, vl), vl), vl), vl);\n+ }\n+ break;\n+ default:\n+ for( int x = start; x < end; x += vl )\n+ {\n+ vl = rvv::vsetvl_WT(end - x);\n+ auto vec_tabM = rvv::vle_M(tabM + x, vl);\n+ vec_tabM = __riscv_vmul(__riscv_vsub(vec_tabM, start * 2, vl), sizeof(float), vl);\n+ auto vec_src0 = rvv::vloxei_T(src, vec_tabM, vl);\n+ vec_tabM = __riscv_vadd(vec_tabM, start * sizeof(float), vl);\n+ auto vec_src1 = rvv::vloxei_T(src, vec_tabM, vl);\n+ vec_tabM = __riscv_vadd(vec_tabM, start * sizeof(float), vl);\n+ auto vec_src2 = rvv::vloxei_T(src, vec_tabM, vl);\n+ vec_tabM = __riscv_vadd(vec_tabM, start * sizeof(float), vl);\n+ auto vec_src3 = rvv::vloxei_T(src, vec_tabM, vl);\n+ vec_tabM = __riscv_vadd(vec_tabM, start * sizeof(float), vl);\n+ auto vec_src4 = rvv::vloxei_T(src, vec_tabM, vl);\n+ __riscv_vse32(row + x, __riscv_vfmadd(__riscv_vfadd(__riscv_vfadd(vec_src1, vec_src2, vl), vec_src3, vl), 4,\n+ __riscv_vfadd(__riscv_vfadd(vec_src0, vec_src4, vl), __riscv_vfadd(vec_src2, vec_src2, vl), vl), vl), vl);\n+ }\n+ }\n+ }\n+};\n+\n+template struct pyrDownVec1\n+{\n+ void operator()(WT* row0, WT* row1, WT* row2, WT* row3, WT* row4, T* dst, int end)\n+ {\n+ int vl;\n+ for( int x = 0 ; x < end; x += vl )\n+ {\n+ vl = rvv::vsetvl_WT(end - x);\n+ auto vec_src0 = rvv::vle_WT(row0 + x, vl);\n+ auto vec_src1 = rvv::vle_WT(row1 + x, vl);\n+ auto vec_src2 = rvv::vle_WT(row2 + x, vl);\n+ auto vec_src3 = rvv::vle_WT(row3 + x, vl);\n+ auto vec_src4 = rvv::vle_WT(row4 + x, vl);\n+ rvv::vse_T(dst + x, rvv::vcvt_WT_T(__riscv_vadd(__riscv_vadd(__riscv_vadd(vec_src0, vec_src4, vl), __riscv_vadd(vec_src2, vec_src2, vl), vl),\n+ __riscv_vsll(__riscv_vadd(__riscv_vadd(vec_src1, vec_src2, vl), vec_src3, vl), 2, vl), vl), 8, vl), vl);\n+ }\n+ }\n+};\n+template<> struct pyrDownVec1\n+{\n+ void operator()(float* row0, float* row1, float* row2, float* row3, float* row4, float* dst, int end)\n+ {\n+ int vl;\n+ for( int x = 0 ; x < end; x += vl )\n+ {\n+ vl = rvv::vsetvl_WT(end - x);\n+ auto vec_src0 = rvv::vle_WT(row0 + x, vl);\n+ auto vec_src1 = rvv::vle_WT(row1 + x, vl);\n+ auto vec_src2 = rvv::vle_WT(row2 + x, vl);\n+ auto vec_src3 = rvv::vle_WT(row3 + x, vl);\n+ auto vec_src4 = rvv::vle_WT(row4 + x, vl);\n+ rvv::vse_T(dst + x, __riscv_vfmul(__riscv_vfmadd(vec_src2, 6, __riscv_vfmadd(__riscv_vfadd(vec_src1, vec_src3, vl), 4, __riscv_vfadd(vec_src0, vec_src4, vl), vl), vl), 1.f / 256.f, vl), vl);\n+ }\n+ }\n+};\n+\n+template struct pyrUpVec0\n+{\n+ void operator()(const T* src, WT* row, const uint* dtab, int start, int end)\n+ {\n+ int vl;\n+ for( int x = start; x < end; x += vl )\n+ {\n+ vl = rvv::vsetvl_WT(end - x);\n+ auto vec_src0 = rvv::vcvt_T_WT(rvv::vle_T(src + x - start, vl), vl);\n+ auto vec_src1 = rvv::vcvt_T_WT(rvv::vle_T(src + x, vl), vl);\n+ auto vec_src2 = rvv::vcvt_T_WT(rvv::vle_T(src + x + start, vl), vl);\n+\n+ auto vec_dtab = rvv::vle_M(dtab + x, vl);\n+ vec_dtab = __riscv_vmul(vec_dtab, sizeof(WT), vl);\n+ __riscv_vsoxei32(row, vec_dtab, __riscv_vadd(__riscv_vadd(vec_src0, vec_src2, vl), __riscv_vadd(__riscv_vsll(vec_src1, 2, vl), __riscv_vsll(vec_src1, 1, vl), vl), vl), vl);\n+ __riscv_vsoxei32(row, __riscv_vadd(vec_dtab, start * sizeof(WT), vl), __riscv_vsll(__riscv_vadd(vec_src1, vec_src2, vl), 2, vl), vl);\n+ }\n+ }\n+};\n+template<> struct pyrUpVec0\n+{\n+ void operator()(const float* src, float* row, const uint* dtab, int start, int end)\n+ {\n+ int vl;\n+ for( int x = start; x < end; x += vl )\n+ {\n+ vl = rvv::vsetvl_WT(end - x);\n+ auto vec_src0 = rvv::vle_T(src + x - start, vl);\n+ auto vec_src1 = rvv::vle_T(src + x, vl);\n+ auto vec_src2 = rvv::vle_T(src + x + start, vl);\n+\n+ auto vec_dtab = rvv::vle_M(dtab + x, vl);\n+ vec_dtab = __riscv_vmul(vec_dtab, sizeof(float), vl);\n+ __riscv_vsoxei32(row, vec_dtab, __riscv_vfadd(__riscv_vfmadd(vec_src1, 6, vec_src0, vl), vec_src2, vl), vl);\n+ __riscv_vsoxei32(row, __riscv_vadd(vec_dtab, start * sizeof(float), vl), __riscv_vfmul(__riscv_vfadd(vec_src1, vec_src2, vl), 4, vl), vl);\n+ }\n+ }\n+};\n+\n+template struct pyrUpVec1\n+{\n+ void operator()(WT* row0, WT* row1, WT* row2, T* dst0, T* dst1, int end)\n+ {\n+ int vl;\n+ if (dst0 != dst1)\n+ {\n+ for( int x = 0 ; x < end; x += vl )\n+ {\n+ vl = rvv::vsetvl_WT(end - x);\n+ auto vec_src0 = rvv::vle_WT(row0 + x, vl);\n+ auto vec_src1 = rvv::vle_WT(row1 + x, vl);\n+ auto vec_src2 = rvv::vle_WT(row2 + x, vl);\n+ rvv::vse_T(dst0 + x, rvv::vcvt_WT_T(__riscv_vadd(__riscv_vadd(vec_src0, vec_src2, vl), __riscv_vadd(__riscv_vsll(vec_src1, 2, vl), __riscv_vsll(vec_src1, 1, vl), vl), vl), 6, vl), vl);\n+ rvv::vse_T(dst1 + x, rvv::vcvt_WT_T(__riscv_vsll(__riscv_vadd(vec_src1, vec_src2, vl), 2, vl), 6, vl), vl);\n+ }\n+ }\n+ else\n+ {\n+ for( int x = 0 ; x < end; x += vl )\n+ {\n+ vl = rvv::vsetvl_WT(end - x);\n+ auto vec_src0 = rvv::vle_WT(row0 + x, vl);\n+ auto vec_src1 = rvv::vle_WT(row1 + x, vl);\n+ auto vec_src2 = rvv::vle_WT(row2 + x, vl);\n+ rvv::vse_T(dst0 + x, rvv::vcvt_WT_T(__riscv_vadd(__riscv_vadd(vec_src0, vec_src2, vl), __riscv_vadd(__riscv_vsll(vec_src1, 2, vl), __riscv_vsll(vec_src1, 1, vl), vl), vl), 6, vl), vl);\n+ }\n+ }\n+ }\n+};\n+template<> struct pyrUpVec1\n+{\n+ void operator()(float* row0, float* row1, float* row2, float* dst0, float* dst1, int end)\n+ {\n+ int vl;\n+ if (dst0 != dst1)\n+ {\n+ for( int x = 0 ; x < end; x += vl )\n+ {\n+ vl = rvv::vsetvl_WT(end - x);\n+ auto vec_src0 = rvv::vle_WT(row0 + x, vl);\n+ auto vec_src1 = rvv::vle_WT(row1 + x, vl);\n+ auto vec_src2 = rvv::vle_WT(row2 + x, vl);\n+ rvv::vse_T(dst0 + x, __riscv_vfmul(__riscv_vfadd(__riscv_vfmadd(vec_src1, 6, vec_src0, vl), vec_src2, vl), 1.f / 64.f, vl), vl);\n+ rvv::vse_T(dst1 + x, __riscv_vfmul(__riscv_vfadd(vec_src1, vec_src2, vl), 1.f / 16.f, vl), vl);\n+ }\n+ }\n+ else\n+ {\n+ for( int x = 0 ; x < end; x += vl )\n+ {\n+ vl = rvv::vsetvl_WT(end - x);\n+ auto vec_src0 = rvv::vle_WT(row0 + x, vl);\n+ auto vec_src1 = rvv::vle_WT(row1 + x, vl);\n+ auto vec_src2 = rvv::vle_WT(row2 + x, vl);\n+ rvv::vse_T(dst0 + x, __riscv_vfmul(__riscv_vfadd(__riscv_vfmadd(vec_src1, 6, vec_src0, vl), vec_src2, vl), 1.f / 64.f, vl), vl);\n+ }\n+ }\n+ }\n+};\n+\n+template\n+struct PyrDownInvoker : ParallelLoopBody\n+{\n+ PyrDownInvoker(const uchar* _src_data, size_t _src_step, int _src_width, int _src_height, uchar* _dst_data, size_t _dst_step, int _dst_width, int _dst_height, int _cn, int _borderType, int* _tabR, int* _tabM, int* _tabL)\n+ {\n+ src_data = _src_data;\n+ src_step = _src_step;\n+ src_width = _src_width;\n+ src_height = _src_height;\n+ dst_data = _dst_data;\n+ dst_step = _dst_step;\n+ dst_width = _dst_width;\n+ dst_height = _dst_height;\n+ cn = _cn;\n+ borderType = _borderType;\n+ tabR = _tabR;\n+ tabM = _tabM;\n+ tabL = _tabL;\n+ }\n+\n+ void operator()(const Range& range) const CV_OVERRIDE;\n+\n+ const uchar* src_data;\n+ size_t src_step;\n+ int src_width;\n+ int src_height;\n+ uchar* dst_data;\n+ size_t dst_step;\n+ int dst_width;\n+ int dst_height;\n+ int cn;\n+ int borderType;\n+ int* tabR;\n+ int* tabM;\n+ int* tabL;\n+};\n+\n+// the algorithm is copied from imgproc/src/pyramids.cpp,\n+// in the function template void cv::pyrDown_\n+template\n+inline int pyrDown(const uchar* src_data, size_t src_step, int src_width, int src_height, uchar* dst_data, size_t dst_step, int dst_width, int dst_height, int cn, int borderType)\n+{\n+ const int PD_SZ = 5;\n+\n+ std::vector _tabM(dst_width * cn), _tabL(cn * (PD_SZ + 2)), _tabR(cn * (PD_SZ + 2));\n+ int *tabM = _tabM.data(), *tabL = _tabL.data(), *tabR = _tabR.data();\n+\n+ CV_Assert( src_width > 0 && src_height > 0 &&\n+ std::abs(dst_width*2 - src_width) <= 2 &&\n+ std::abs(dst_height*2 - src_height) <= 2 );\n+ int width0 = std::min((src_width-PD_SZ/2-1)/2 + 1, dst_width);\n+\n+ for (int x = 0; x <= PD_SZ+1; x++)\n+ {\n+ int sx0 = borderInterpolate(x - PD_SZ/2, src_width, borderType)*cn;\n+ int sx1 = borderInterpolate(x + width0*2 - PD_SZ/2, src_width, borderType)*cn;\n+ for (int k = 0; k < cn; k++)\n+ {\n+ tabL[x*cn + k] = sx0 + k;\n+ tabR[x*cn + k] = sx1 + k;\n+ }\n+ }\n+\n+ for (int x = 0; x < dst_width*cn; x++)\n+ tabM[x] = (x/cn)*2*cn + x % cn;\n+\n+ cv::parallel_for_(Range(0,dst_height), PyrDownInvoker(src_data, src_step, src_width, src_height, dst_data, dst_step, dst_width, dst_height, cn, borderType, tabR, tabM, tabL), cv::getNumThreads());\n+ return CV_HAL_ERROR_OK;\n+}\n+\n+template\n+void PyrDownInvoker::operator()(const Range& range) const\n+{\n+ const int PD_SZ = 5;\n+\n+ int bufstep = (dst_width*cn + 15) & -16;\n+ std::vector _buf(bufstep*PD_SZ + 16);\n+ WT* buf = (WT*)(((size_t)_buf.data() + 15) & -16);\n+ WT* rows[PD_SZ];\n+\n+ int sy0 = -PD_SZ/2, sy = range.start * 2 + sy0, width0 = std::min((src_width-PD_SZ/2-1)/2 + 1, dst_width);\n+\n+ int _dst_width = dst_width * cn;\n+ width0 *= cn;\n+\n+ for (int y = range.start; y < range.end; y++)\n+ {\n+ T* dst = reinterpret_cast(dst_data + dst_step * y);\n+ WT *row0, *row1, *row2, *row3, *row4;\n+\n+ // fill the ring buffer (horizontal convolution and decimation)\n+ int sy_limit = y*2 + 2;\n+ for( ; sy <= sy_limit; sy++ )\n+ {\n+ WT* row = buf + ((sy - sy0) % PD_SZ)*bufstep;\n+ int _sy = borderInterpolate(sy, src_height, borderType);\n+ const T* src = reinterpret_cast(src_data + src_step * _sy);\n+\n+ do {\n+ int x = 0;\n+ for( ; x < cn; x++ )\n+ {\n+ row[x] = src[tabL[x+cn*2]]*6 + (src[tabL[x+cn]] + src[tabL[x+cn*3]])*4 +\n+ src[tabL[x]] + src[tabL[x+cn*4]];\n+ }\n+\n+ if( x == _dst_width )\n+ break;\n+\n+ pyrDownVec0()(src, row, reinterpret_cast(tabM), cn, width0);\n+ x = width0;\n+\n+ // tabR\n+ for (int x_ = 0; x < _dst_width; x++, x_++)\n+ {\n+ row[x] = src[tabR[x_+cn*2]]*6 + (src[tabR[x_+cn]] + src[tabR[x_+cn*3]])*4 +\n+ src[tabR[x_]] + src[tabR[x_+cn*4]];\n+ }\n+ } while (0);\n+ }\n+\n+ // do vertical convolution and decimation and write the result to the destination image\n+ for (int k = 0; k < PD_SZ; k++)\n+ rows[k] = buf + ((y*2 - PD_SZ/2 + k - sy0) % PD_SZ)*bufstep;\n+ row0 = rows[0]; row1 = rows[1]; row2 = rows[2]; row3 = rows[3]; row4 = rows[4];\n+\n+ pyrDownVec1()(row0, row1, row2, row3, row4, dst, _dst_width);\n+ }\n+}\n+\n+// the algorithm is copied from imgproc/src/pyramids.cpp,\n+// in the function template void cv::pyrUp_\n+template\n+inline int pyrUp(const uchar* src_data, size_t src_step, int src_width, int src_height, uchar* dst_data, size_t dst_step, int dst_width, int dst_height, int cn)\n+{\n+ const int PU_SZ = 3;\n+\n+ int bufstep = ((dst_width+1)*cn + 15) & -16;\n+ std::vector _buf(bufstep*PU_SZ + 16);\n+ WT* buf = (WT*)(((size_t)_buf.data() + 15) & -16);\n+ std::vector _dtab(src_width*cn);\n+ int* dtab = _dtab.data();\n+ WT* rows[PU_SZ];\n+\n+ CV_Assert( std::abs(dst_width - src_width*2) == dst_width % 2 &&", - "comment_created_at": "2025-02-28T19:51:05+00:00", - "comment_author": "mshabunin", - "comment_body": "Let's avoid `CV_Assert` too. Just return NOT_IMPLEMENTED in this case. Here and in other places.", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/opencv-clear-api-contracts.json b/_reviewers/opencv-clear-api-contracts.json new file mode 100644 index 0000000..a42d2da --- /dev/null +++ b/_reviewers/opencv-clear-api-contracts.json @@ -0,0 +1,278 @@ +[ + { + "discussion_id": "2176710492", + "pr_number": 27499, + "pr_file": "modules/imgcodecs/src/loadsave.cpp", + "created_at": "2025-07-01T07:51:35+00:00", + "commented_code": "*\n*/\nstatic bool\nimread_( const String& filename, int flags, OutputArray mat )\nimread_( const String& filename, int flags, OutputArray mat,\n std::vector* metadata_types, OutputArrayOfArrays metadata)\n{\n /// Search for the relevant decoder to handle the imagery\n ImageDecoder decoder;\n\n if (metadata_types)\n metadata_types->clear();\n\n#ifdef HAVE_GDAL", + "repo_full_name": "opencv/opencv", + "discussion_comments": [ + { + "comment_id": "2176710492", + "repo_full_name": "opencv/opencv", + "pr_number": 27499, + "pr_file": "modules/imgcodecs/src/loadsave.cpp", + "discussion_id": "2176710492", + "commented_code": "@@ -419,11 +475,15 @@ static void ApplyExifOrientation(ExifEntry_t orientationTag, OutputArray img)\n *\n */\n static bool\n-imread_( const String& filename, int flags, OutputArray mat )\n+imread_( const String& filename, int flags, OutputArray mat,\n+ std::vector* metadata_types, OutputArrayOfArrays metadata)\n {\n /// Search for the relevant decoder to handle the imagery\n ImageDecoder decoder;\n \n+ if (metadata_types)\n+ metadata_types->clear();\n+\n #ifdef HAVE_GDAL", + "comment_created_at": "2025-07-01T07:51:35+00:00", + "comment_author": "asmorkalov", + "comment_body": "Need a warning, that GDAL does not support metadata for now.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2091429332", + "pr_number": 27051, + "pr_file": "samples/cpp/color_correction_model.cpp", + "created_at": "2025-05-15T15:14:56+00:00", + "commented_code": "//! [tutorial]\n#include \n#include \n#include \n#include \n#include \n#include \n#include \n#include \n#include \"../dnn/common.hpp\"\n\nusing namespace std;\nusing namespace cv;\nusing namespace cv::dnn;\nusing namespace cv::ccm;\nusing namespace mcc;\n\nconst string about =\n \"This sample detects Macbeth color checker using DNN or thresholding and applies color correction.\"\n \"To run default:\\n\"\n \"\\t ./example_cpp_color_correction_model --input=path/to/your/input/image --query=path/to/your/query/image\\n\"\n \"With DNN model:\\n\"\n \"\\t ./example_cpp_color_correction_model mcc --input=path/to/your/input/image --query=path/to/your/query/image\\n\\n\"\n \"Using pre-computed CCM:\\n\"\n \"\\t ./example_cpp_color_correction_model mcc --ccm_file=path/to/ccm_output.yaml --query=path/to/your/query/image\\n\\n\"\n \"Model path can also be specified using --model argument. And config path can be specified using --config. Download it using python download_models.py mcc from dnn samples directory\\n\\n\";\n\nconst string param_keys =\n \"{ help h | | Print help message. }\"\n \"{ @alias | | An alias name of model to extract preprocessing parameters from models.yml file. }\"\n \"{ zoo | ../dnn/models.yml | An optional path to file with preprocessing parameters }\"\n \"{ input i | mcc_ccm_test.jpg | Path to input image for computing CCM.}\"\n \"{ query q | baboon.jpg | Path to query image to apply color correction. If not provided, input image will be used. }\"\n \"{ type | 0 | chartType: 0-Standard, 1-DigitalSG, 2-Vinyl }\"\n \"{ num_charts | 1 | Maximum number of charts in the image }\"\n \"{ ccm_file | | Path to YAML file containing pre-computed CCM parameters}\";\n\nconst string backend_keys = format(\n \"{ backend | default | Choose one of computation backends: \"\n \"default: automatically (by default), \"\n \"openvino: Intel's Deep Learning Inference Engine (https://software.intel.com/openvino-toolkit), \"\n \"opencv: OpenCV implementation, \"\n \"vkcom: VKCOM, \"\n \"cuda: CUDA, \"\n \"webnn: WebNN }\");\n\nconst string target_keys = format(\n \"{ target | cpu | Choose one of target computation devices: \"\n \"cpu: CPU target (by default), \"\n \"opencl: OpenCL, \"\n \"opencl_fp16: OpenCL fp16 (half-float precision), \"\n \"vpu: VPU, \"\n \"vulkan: Vulkan, \"\n \"cuda: CUDA, \"\n \"cuda_fp16: CUDA fp16 (half-float preprocess) }\");\n\nstring keys = param_keys + backend_keys + target_keys;\n\nstatic bool processFrame(const Mat& frame, Ptr detector, Mat& src, int nc){\n if (!detector->process(frame, nc))\n {\n return false;\n }\n vector> checkers = detector->getListColorChecker();\n src = checkers[0]->getChartsRGB(false);\n\n return true;\n}\n\nint main(int argc, char* argv[]) {\n CommandLineParser parser(argc, argv, keys);\n parser.about(about);\n\n if (parser.has(\"help\")) {\n cout << about << endl;\n parser.printMessage();\n return 0;\n }\n\n string modelName = parser.get(\"@alias\");\n string zooFile = parser.get(\"zoo\");\n const char* path = getenv(\"OPENCV_SAMPLES_DATA_PATH\");\n\n if ((path != NULL) || parser.has(\"@alias\")) {\n zooFile = findFile(zooFile);\n }\n else{\n cout<<\"[WARN] set the environment variables or pass the arguments --model, --config and models.yml file using --zoo for using dnn based detector. Continuing with default detector.\\n\\n\";\n }\n keys += genPreprocArguments(modelName, zooFile);\n parser = CommandLineParser(argc, argv, keys);\n\n int t = parser.get(\"type\");\n if (t < 0 || t > 2)\n {\n cout << \"Error: --type must be 0, 1 or 2\" << endl;\n parser.printMessage(); // prints full usage\n return -1;\n }\n\n ColorChart chartType = ColorChart(t);\n\n const string sha1 = parser.get(\"sha1\");\n const string modelPath = findModel(parser.get(\"model\"), sha1);\n const string config_sha1 = parser.get(\"config_sha1\");\n const string configPath = findModel(parser.get(\"config\"), config_sha1);\n const string backend = parser.get(\"backend\");\n const string target = parser.get(\"target\");\n\n int nc = parser.get(\"num_charts\");\n\n // Get input and target image paths\n const string inputFile = parser.get(\"input\");\n const string queryFile = parser.get(\"query\");\n const string ccmFile = parser.get(\"ccm_file\");\n\n if (!ccmFile.empty()) {\n // When ccm_file is provided, only query is required\n if (queryFile.empty()) {\n cout << \"Error: Query image path must be provided when using pre-computed CCM.\" << endl;\n parser.printMessage();\n return -1;\n }\n } else {\n // Original validation for when computing new CCM\n if (inputFile.empty()) {\n cout << \"Error: Input image path must be provided.\" << endl;\n parser.printMessage();\n return -1;\n }\n }\n\n ColorCorrectionModel model;\n Mat queryImage;\n\n if (!ccmFile.empty()) {\n // Load CCM from YAML file\n FileStorage fs(ccmFile, FileStorage::READ);\n if (!fs.isOpened()) {\n cout << \"Error: Unable to open CCM file: \" << ccmFile << endl;\n return -1;\n }\n model.read(fs[\"ColorCorrectionModel\"]);\n fs.release();\n cout << \"Loaded CCM from file: \" << ccmFile << endl;\n\n // Read query image when using pre-computed CCM\n queryImage = imread(findFile(queryFile));\n if (queryImage.empty()) {\n cout << \"Error: Unable to read query image.\" << endl;\n return -1;\n }\n } else {\n // Read input image for computing new CCM\n Mat originalImage = imread(findFile(inputFile));\n if (originalImage.empty()) {\n cout << \"Error: Unable to read input image.\" << endl;\n return -1;\n }\n\n // Process first image to compute CCM\n Mat image = originalImage.clone();\n Mat src;\n\n Ptr detector;\n if (!modelPath.empty() && !configPath.empty()) {\n Net net = readNetFromTensorflow(modelPath, configPath);\n net.setPreferableBackend(getBackendID(backend));\n net.setPreferableTarget(getTargetID(target));\n detector = CCheckerDetector::create(net);\n cout << \"Using DNN-based checker detector.\" << endl;\n } else {\n detector = CCheckerDetector::create();\n cout << \"Using thresholding-based checker detector.\" << endl;\n }\n detector->setColorChartType(chartType);\n\n if (!processFrame(image, detector, src, nc)) {\n cout << \"No chart detected in the input image!\" << endl;\n return -1;\n }\n // Convert to double and normalize\n src.convertTo(src, CV_64F, 1.0/255.0);\n\n // Color correction model\n model = ColorCorrectionModel(src, COLORCHECKER_MACBETH);\n model.setCcmType(CCM_LINEAR);\n model.setDistance(DISTANCE_CIE2000);\n model.setLinearization(LINEARIZATION_GAMMA);\n model.setLinearizationGamma(2.2);\n\n Mat ccm = model.compute();\n cout << \"Computed CCM Matrix:\\n\" << ccm << endl;\n cout << \"Loss: \" << model.getLoss() << endl;\n\n // Save model parameters to YAML file\n FileStorage fs(\"ccm_output.yaml\", FileStorage::WRITE);\n model.write(fs);\n fs.release();\n cout << \"Model parameters saved to ccm_output.yaml\" << endl;\n\n // Set query image for correction\n if (queryFile.empty()) {\n cout << \"[WARN] No query image provided, applying color correction on input image\" << endl;\n queryImage = originalImage.clone();\n } else {\n queryImage = imread(findFile(queryFile));\n if (queryImage.empty()) {\n cout << \"Error: Unable to read query image.\" << endl;\n return -1;\n }\n }\n }\n\n // Apply correction to query image\n Mat calibratedImage, normalizedImage;\n cvtColor(queryImage, normalizedImage, COLOR_BGR2RGB);\n normalizedImage.convertTo(normalizedImage, CV_64F, 1.0/255.0); // Convert to double and normalize\n model.correctImage(normalizedImage, calibratedImage);", + "repo_full_name": "opencv/opencv", + "discussion_comments": [ + { + "comment_id": "2091429332", + "repo_full_name": "opencv/opencv", + "pr_number": 27051, + "pr_file": "samples/cpp/color_correction_model.cpp", + "discussion_id": "2091429332", + "commented_code": "@@ -0,0 +1,232 @@\n+//! [tutorial]\n+#include \n+#include \n+#include \n+#include \n+#include \n+#include \n+#include \n+#include \n+#include \"../dnn/common.hpp\"\n+\n+using namespace std;\n+using namespace cv;\n+using namespace cv::dnn;\n+using namespace cv::ccm;\n+using namespace mcc;\n+\n+const string about =\n+ \"This sample detects Macbeth color checker using DNN or thresholding and applies color correction.\"\n+ \"To run default:\\n\"\n+ \"\\t ./example_cpp_color_correction_model --input=path/to/your/input/image --query=path/to/your/query/image\\n\"\n+ \"With DNN model:\\n\"\n+ \"\\t ./example_cpp_color_correction_model mcc --input=path/to/your/input/image --query=path/to/your/query/image\\n\\n\"\n+ \"Using pre-computed CCM:\\n\"\n+ \"\\t ./example_cpp_color_correction_model mcc --ccm_file=path/to/ccm_output.yaml --query=path/to/your/query/image\\n\\n\"\n+ \"Model path can also be specified using --model argument. And config path can be specified using --config. Download it using python download_models.py mcc from dnn samples directory\\n\\n\";\n+\n+const string param_keys =\n+ \"{ help h | | Print help message. }\"\n+ \"{ @alias | | An alias name of model to extract preprocessing parameters from models.yml file. }\"\n+ \"{ zoo | ../dnn/models.yml | An optional path to file with preprocessing parameters }\"\n+ \"{ input i | mcc_ccm_test.jpg | Path to input image for computing CCM.}\"\n+ \"{ query q | baboon.jpg | Path to query image to apply color correction. If not provided, input image will be used. }\"\n+ \"{ type | 0 | chartType: 0-Standard, 1-DigitalSG, 2-Vinyl }\"\n+ \"{ num_charts | 1 | Maximum number of charts in the image }\"\n+ \"{ ccm_file | | Path to YAML file containing pre-computed CCM parameters}\";\n+\n+const string backend_keys = format(\n+ \"{ backend | default | Choose one of computation backends: \"\n+ \"default: automatically (by default), \"\n+ \"openvino: Intel's Deep Learning Inference Engine (https://software.intel.com/openvino-toolkit), \"\n+ \"opencv: OpenCV implementation, \"\n+ \"vkcom: VKCOM, \"\n+ \"cuda: CUDA, \"\n+ \"webnn: WebNN }\");\n+\n+const string target_keys = format(\n+ \"{ target | cpu | Choose one of target computation devices: \"\n+ \"cpu: CPU target (by default), \"\n+ \"opencl: OpenCL, \"\n+ \"opencl_fp16: OpenCL fp16 (half-float precision), \"\n+ \"vpu: VPU, \"\n+ \"vulkan: Vulkan, \"\n+ \"cuda: CUDA, \"\n+ \"cuda_fp16: CUDA fp16 (half-float preprocess) }\");\n+\n+string keys = param_keys + backend_keys + target_keys;\n+\n+static bool processFrame(const Mat& frame, Ptr detector, Mat& src, int nc){\n+ if (!detector->process(frame, nc))\n+ {\n+ return false;\n+ }\n+ vector> checkers = detector->getListColorChecker();\n+ src = checkers[0]->getChartsRGB(false);\n+\n+ return true;\n+}\n+\n+int main(int argc, char* argv[]) {\n+ CommandLineParser parser(argc, argv, keys);\n+ parser.about(about);\n+\n+ if (parser.has(\"help\")) {\n+ cout << about << endl;\n+ parser.printMessage();\n+ return 0;\n+ }\n+\n+ string modelName = parser.get(\"@alias\");\n+ string zooFile = parser.get(\"zoo\");\n+ const char* path = getenv(\"OPENCV_SAMPLES_DATA_PATH\");\n+\n+ if ((path != NULL) || parser.has(\"@alias\")) {\n+ zooFile = findFile(zooFile);\n+ }\n+ else{\n+ cout<<\"[WARN] set the environment variables or pass the arguments --model, --config and models.yml file using --zoo for using dnn based detector. Continuing with default detector.\\n\\n\";\n+ }\n+ keys += genPreprocArguments(modelName, zooFile);\n+ parser = CommandLineParser(argc, argv, keys);\n+\n+ int t = parser.get(\"type\");\n+ if (t < 0 || t > 2)\n+ {\n+ cout << \"Error: --type must be 0, 1 or 2\" << endl;\n+ parser.printMessage(); // prints full usage\n+ return -1;\n+ }\n+\n+ ColorChart chartType = ColorChart(t);\n+\n+ const string sha1 = parser.get(\"sha1\");\n+ const string modelPath = findModel(parser.get(\"model\"), sha1);\n+ const string config_sha1 = parser.get(\"config_sha1\");\n+ const string configPath = findModel(parser.get(\"config\"), config_sha1);\n+ const string backend = parser.get(\"backend\");\n+ const string target = parser.get(\"target\");\n+\n+ int nc = parser.get(\"num_charts\");\n+\n+ // Get input and target image paths\n+ const string inputFile = parser.get(\"input\");\n+ const string queryFile = parser.get(\"query\");\n+ const string ccmFile = parser.get(\"ccm_file\");\n+\n+ if (!ccmFile.empty()) {\n+ // When ccm_file is provided, only query is required\n+ if (queryFile.empty()) {\n+ cout << \"Error: Query image path must be provided when using pre-computed CCM.\" << endl;\n+ parser.printMessage();\n+ return -1;\n+ }\n+ } else {\n+ // Original validation for when computing new CCM\n+ if (inputFile.empty()) {\n+ cout << \"Error: Input image path must be provided.\" << endl;\n+ parser.printMessage();\n+ return -1;\n+ }\n+ }\n+\n+ ColorCorrectionModel model;\n+ Mat queryImage;\n+\n+ if (!ccmFile.empty()) {\n+ // Load CCM from YAML file\n+ FileStorage fs(ccmFile, FileStorage::READ);\n+ if (!fs.isOpened()) {\n+ cout << \"Error: Unable to open CCM file: \" << ccmFile << endl;\n+ return -1;\n+ }\n+ model.read(fs[\"ColorCorrectionModel\"]);\n+ fs.release();\n+ cout << \"Loaded CCM from file: \" << ccmFile << endl;\n+\n+ // Read query image when using pre-computed CCM\n+ queryImage = imread(findFile(queryFile));\n+ if (queryImage.empty()) {\n+ cout << \"Error: Unable to read query image.\" << endl;\n+ return -1;\n+ }\n+ } else {\n+ // Read input image for computing new CCM\n+ Mat originalImage = imread(findFile(inputFile));\n+ if (originalImage.empty()) {\n+ cout << \"Error: Unable to read input image.\" << endl;\n+ return -1;\n+ }\n+\n+ // Process first image to compute CCM\n+ Mat image = originalImage.clone();\n+ Mat src;\n+\n+ Ptr detector;\n+ if (!modelPath.empty() && !configPath.empty()) {\n+ Net net = readNetFromTensorflow(modelPath, configPath);\n+ net.setPreferableBackend(getBackendID(backend));\n+ net.setPreferableTarget(getTargetID(target));\n+ detector = CCheckerDetector::create(net);\n+ cout << \"Using DNN-based checker detector.\" << endl;\n+ } else {\n+ detector = CCheckerDetector::create();\n+ cout << \"Using thresholding-based checker detector.\" << endl;\n+ }\n+ detector->setColorChartType(chartType);\n+\n+ if (!processFrame(image, detector, src, nc)) {\n+ cout << \"No chart detected in the input image!\" << endl;\n+ return -1;\n+ }\n+ // Convert to double and normalize\n+ src.convertTo(src, CV_64F, 1.0/255.0);\n+\n+ // Color correction model\n+ model = ColorCorrectionModel(src, COLORCHECKER_MACBETH);\n+ model.setCcmType(CCM_LINEAR);\n+ model.setDistance(DISTANCE_CIE2000);\n+ model.setLinearization(LINEARIZATION_GAMMA);\n+ model.setLinearizationGamma(2.2);\n+\n+ Mat ccm = model.compute();\n+ cout << \"Computed CCM Matrix:\\n\" << ccm << endl;\n+ cout << \"Loss: \" << model.getLoss() << endl;\n+\n+ // Save model parameters to YAML file\n+ FileStorage fs(\"ccm_output.yaml\", FileStorage::WRITE);\n+ model.write(fs);\n+ fs.release();\n+ cout << \"Model parameters saved to ccm_output.yaml\" << endl;\n+\n+ // Set query image for correction\n+ if (queryFile.empty()) {\n+ cout << \"[WARN] No query image provided, applying color correction on input image\" << endl;\n+ queryImage = originalImage.clone();\n+ } else {\n+ queryImage = imread(findFile(queryFile));\n+ if (queryImage.empty()) {\n+ cout << \"Error: Unable to read query image.\" << endl;\n+ return -1;\n+ }\n+ }\n+ }\n+\n+ // Apply correction to query image\n+ Mat calibratedImage, normalizedImage;\n+ cvtColor(queryImage, normalizedImage, COLOR_BGR2RGB);\n+ normalizedImage.convertTo(normalizedImage, CV_64F, 1.0/255.0); // Convert to double and normalize\n+ model.correctImage(normalizedImage, calibratedImage);", + "comment_created_at": "2025-05-15T15:14:56+00:00", + "comment_author": "asmorkalov", + "comment_body": "I propose to Move BRG/RGB logic into model with a flag (default to bgr). All OpenCV functions are designed for BGR. The flag allows to make optimizations without API change.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1022288868", + "pr_number": 22780, + "pr_file": "modules/core/src/va_intel.cpp", + "created_at": "2022-11-15T03:49:31+00:00", + "commented_code": "#else // !HAVE_VA\n\n# ifdef HAVE_VA_INTEL\n if (tryInterop)\n\n Context& va_ctx = OpenCLExecutionContext::getCurrent().getContext();\n VAAPIInterop* interop = va_ctx.getUserContext().get();\n if (tryInterop && interop == NULL)", + "repo_full_name": "opencv/opencv", + "discussion_comments": [ + { + "comment_id": "1022288868", + "repo_full_name": "opencv/opencv", + "pr_number": 22780, + "pr_file": "modules/core/src/va_intel.cpp", + "discussion_id": "1022288868", + "commented_code": "@@ -85,7 +85,10 @@ Context& initializeContextFromVA(VADisplay display, bool tryInterop)\n #else // !HAVE_VA\n \n # ifdef HAVE_VA_INTEL\n- if (tryInterop)\n+\n+ Context& va_ctx = OpenCLExecutionContext::getCurrent().getContext();\n+ VAAPIInterop* interop = va_ctx.getUserContext().get();\n+ if (tryInterop && interop == NULL)", + "comment_created_at": "2022-11-15T03:49:31+00:00", + "comment_author": "alalek", + "comment_body": "Function is called `initializeContextFromVA()`.\r\nBut implementation of this function is confusing (see below).\r\n\r\nExpected result is OpenCL context with specified `VADisplay display`.\r\nReturning result with **another** `display` is wrong.\r\nFallback paths (and `tryInterop` parameter) should not be there. Error should be raised instead (actually one fallback do that through `NO_VA_SUPPORT_ERROR`). `tryInterop` parameter should be dropped too.\r\n\r\nReference: `initializeContextFromD3D11Device()`\r\n\r\nRelates #5272 (and https://github.com/opencv/opencv/pull/5272#discussion_r38926396 is correct and we a have design problem here)", + "pr_file_module": null + }, + { + "comment_id": "1024888786", + "repo_full_name": "opencv/opencv", + "pr_number": 22780, + "pr_file": "modules/core/src/va_intel.cpp", + "discussion_id": "1022288868", + "commented_code": "@@ -85,7 +85,10 @@ Context& initializeContextFromVA(VADisplay display, bool tryInterop)\n #else // !HAVE_VA\n \n # ifdef HAVE_VA_INTEL\n- if (tryInterop)\n+\n+ Context& va_ctx = OpenCLExecutionContext::getCurrent().getContext();\n+ VAAPIInterop* interop = va_ctx.getUserContext().get();\n+ if (tryInterop && interop == NULL)", + "comment_created_at": "2022-11-17T08:32:54+00:00", + "comment_author": "kallaballa", + "comment_body": "I think I understand. I am on it", + "pr_file_module": null + }, + { + "comment_id": "1027130746", + "repo_full_name": "opencv/opencv", + "pr_number": 22780, + "pr_file": "modules/core/src/va_intel.cpp", + "discussion_id": "1022288868", + "commented_code": "@@ -85,7 +85,10 @@ Context& initializeContextFromVA(VADisplay display, bool tryInterop)\n #else // !HAVE_VA\n \n # ifdef HAVE_VA_INTEL\n- if (tryInterop)\n+\n+ Context& va_ctx = OpenCLExecutionContext::getCurrent().getContext();\n+ VAAPIInterop* interop = va_ctx.getUserContext().get();\n+ if (tryInterop && interop == NULL)", + "comment_created_at": "2022-11-19T19:12:26+00:00", + "comment_author": "kallaballa", + "comment_body": "But how to handle VA (non-Intel)? Is it correct to return a context without interop?", + "pr_file_module": null + }, + { + "comment_id": "1027131155", + "repo_full_name": "opencv/opencv", + "pr_number": 22780, + "pr_file": "modules/core/src/va_intel.cpp", + "discussion_id": "1022288868", + "commented_code": "@@ -85,7 +85,10 @@ Context& initializeContextFromVA(VADisplay display, bool tryInterop)\n #else // !HAVE_VA\n \n # ifdef HAVE_VA_INTEL\n- if (tryInterop)\n+\n+ Context& va_ctx = OpenCLExecutionContext::getCurrent().getContext();\n+ VAAPIInterop* interop = va_ctx.getUserContext().get();\n+ if (tryInterop && interop == NULL)", + "comment_created_at": "2022-11-19T19:16:53+00:00", + "comment_author": "kallaballa", + "comment_body": "Also. what happens if it is called multiple times and the context is already with VADisplay? I am asking specifically because ```initializeContextFromVA()``` is called from withing VideoWriter and VideoCapture.\r\n", + "pr_file_module": null + }, + { + "comment_id": "1027131595", + "repo_full_name": "opencv/opencv", + "pr_number": 22780, + "pr_file": "modules/core/src/va_intel.cpp", + "discussion_id": "1022288868", + "commented_code": "@@ -85,7 +85,10 @@ Context& initializeContextFromVA(VADisplay display, bool tryInterop)\n #else // !HAVE_VA\n \n # ifdef HAVE_VA_INTEL\n- if (tryInterop)\n+\n+ Context& va_ctx = OpenCLExecutionContext::getCurrent().getContext();\n+ VAAPIInterop* interop = va_ctx.getUserContext().get();\n+ if (tryInterop && interop == NULL)", + "comment_created_at": "2022-11-19T19:21:13+00:00", + "comment_author": "kallaballa", + "comment_body": "I myself have a proposal for what context handling should maybe look like based on my [4.x fork](https://github.com/kallaballa/opencv/tree/GCV): https://github.com/kallaballa/GCV/blob/main/src/video/video-demo.cpp\r\n\r\nIt basically creates a context for each interop-ability (OpenGL/VAAPI) copies them and when they are needed it binds them. So only one ```initializeContextFromVA()```", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1889809784", + "pr_number": 25584, + "pr_file": "modules/videoio/src/backend_plugin.cpp", + "created_at": "2024-12-18T08:23:40+00:00", + "commented_code": "return Ptr();\n}\n\nPtr PluginBackend::createCapture(std::streambuf& source, const VideoCaptureParameters& params) const", + "repo_full_name": "opencv/opencv", + "discussion_comments": [ + { + "comment_id": "1889809784", + "repo_full_name": "opencv/opencv", + "pr_number": 25584, + "pr_file": "modules/videoio/src/backend_plugin.cpp", + "discussion_id": "1889809784", + "commented_code": "@@ -704,6 +731,25 @@ Ptr PluginBackend::createCapture(const std::string &filename, con\n return Ptr();\n }\n \n+Ptr PluginBackend::createCapture(std::streambuf& source, const VideoCaptureParameters& params) const", + "comment_created_at": "2024-12-18T08:23:40+00:00", + "comment_author": "opencv-alalek", + "comment_body": "No need to use `std::streambuf` anywhere in API.\r\nWe need to use internally just simplified class which provides seek/read **interface**.\r\n\r\nPublic API should have simple converter from this.", + "pr_file_module": null + }, + { + "comment_id": "1895328978", + "repo_full_name": "opencv/opencv", + "pr_number": 25584, + "pr_file": "modules/videoio/src/backend_plugin.cpp", + "discussion_id": "1889809784", + "commented_code": "@@ -704,6 +731,25 @@ Ptr PluginBackend::createCapture(const std::string &filename, con\n return Ptr();\n }\n \n+Ptr PluginBackend::createCapture(std::streambuf& source, const VideoCaptureParameters& params) const", + "comment_created_at": "2024-12-23T06:56:59+00:00", + "comment_author": "dkurt", + "comment_body": "Thanks for the review. Added a simplified interface, but it’s private for internal conversion.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1895741926", + "pr_number": 25584, + "pr_file": "modules/videoio/src/cap.cpp", + "created_at": "2024-12-23T13:13:30+00:00", + "commented_code": "return false;\n}\n\nbool VideoCapture::open(std::streambuf& stream, int apiPreference, const std::vector& params)\n{\n CV_INSTRUMENT_REGION();\n\n if (isOpened())\n {\n release();\n }\n\n const VideoCaptureParameters parameters(params);\n const std::vector backends = cv::videoio_registry::getAvailableBackends_CaptureByBuffer();\n for (size_t i = 0; i < backends.size(); i++)\n {\n const VideoBackendInfo& info = backends[i];\n if (apiPreference == CAP_ANY || apiPreference == info.id)\n {\n if (!info.backendFactory)\n {\n CV_LOG_DEBUG(NULL, \"VIDEOIO(\" << info.name << \"): factory is not available (plugins require filesystem support)\");\n continue;\n }\n CV_CAPTURE_LOG_DEBUG(NULL,\n cv::format(\"VIDEOIO(%s): trying capture buffer ...\",\n info.name));\n CV_Assert(!info.backendFactory.empty());\n const Ptr backend = info.backendFactory->getBackend();\n if (!backend.empty())\n {\n try\n {\n icap = backend->createCapture(StreambufReadStream::create(stream), parameters);", + "repo_full_name": "opencv/opencv", + "discussion_comments": [ + { + "comment_id": "1895741926", + "repo_full_name": "opencv/opencv", + "pr_number": 25584, + "pr_file": "modules/videoio/src/cap.cpp", + "discussion_id": "1895741926", + "commented_code": "@@ -228,6 +235,128 @@ bool VideoCapture::open(const String& filename, int apiPreference, const std::ve\n return false;\n }\n \n+bool VideoCapture::open(std::streambuf& stream, int apiPreference, const std::vector& params)\n+{\n+ CV_INSTRUMENT_REGION();\n+\n+ if (isOpened())\n+ {\n+ release();\n+ }\n+\n+ const VideoCaptureParameters parameters(params);\n+ const std::vector backends = cv::videoio_registry::getAvailableBackends_CaptureByBuffer();\n+ for (size_t i = 0; i < backends.size(); i++)\n+ {\n+ const VideoBackendInfo& info = backends[i];\n+ if (apiPreference == CAP_ANY || apiPreference == info.id)\n+ {\n+ if (!info.backendFactory)\n+ {\n+ CV_LOG_DEBUG(NULL, \"VIDEOIO(\" << info.name << \"): factory is not available (plugins require filesystem support)\");\n+ continue;\n+ }\n+ CV_CAPTURE_LOG_DEBUG(NULL,\n+ cv::format(\"VIDEOIO(%s): trying capture buffer ...\",\n+ info.name));\n+ CV_Assert(!info.backendFactory.empty());\n+ const Ptr backend = info.backendFactory->getBackend();\n+ if (!backend.empty())\n+ {\n+ try\n+ {\n+ icap = backend->createCapture(StreambufReadStream::create(stream), parameters);", + "comment_created_at": "2024-12-23T13:13:30+00:00", + "comment_author": "opencv-alalek", + "comment_body": "Who performs `seek(0, 0)` between trials?\r\n\r\nPerhaps we should not allow `CAP_ANY` in this API.", + "pr_file_module": null + }, + { + "comment_id": "1896163255", + "repo_full_name": "opencv/opencv", + "pr_number": 25584, + "pr_file": "modules/videoio/src/cap.cpp", + "discussion_id": "1895741926", + "commented_code": "@@ -228,6 +235,128 @@ bool VideoCapture::open(const String& filename, int apiPreference, const std::ve\n return false;\n }\n \n+bool VideoCapture::open(std::streambuf& stream, int apiPreference, const std::vector& params)\n+{\n+ CV_INSTRUMENT_REGION();\n+\n+ if (isOpened())\n+ {\n+ release();\n+ }\n+\n+ const VideoCaptureParameters parameters(params);\n+ const std::vector backends = cv::videoio_registry::getAvailableBackends_CaptureByBuffer();\n+ for (size_t i = 0; i < backends.size(); i++)\n+ {\n+ const VideoBackendInfo& info = backends[i];\n+ if (apiPreference == CAP_ANY || apiPreference == info.id)\n+ {\n+ if (!info.backendFactory)\n+ {\n+ CV_LOG_DEBUG(NULL, \"VIDEOIO(\" << info.name << \"): factory is not available (plugins require filesystem support)\");\n+ continue;\n+ }\n+ CV_CAPTURE_LOG_DEBUG(NULL,\n+ cv::format(\"VIDEOIO(%s): trying capture buffer ...\",\n+ info.name));\n+ CV_Assert(!info.backendFactory.empty());\n+ const Ptr backend = info.backendFactory->getBackend();\n+ if (!backend.empty())\n+ {\n+ try\n+ {\n+ icap = backend->createCapture(StreambufReadStream::create(stream), parameters);", + "comment_created_at": "2024-12-23T22:18:46+00:00", + "comment_author": "dkurt", + "comment_body": "Good point, there is no explicit seek. Added a check that `apiPref != CAP_ANY`", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1856167263", + "pr_number": 26511, + "pr_file": "modules/photo/src/seamless_cloning.cpp", + "created_at": "2024-11-25T08:56:04+00:00", + "commented_code": "{\n CV_INSTRUMENT_REGION();\n CV_Assert(!_src.empty());\n CV_Assert(!_dst.empty());\n\n const Mat src = _src.getMat();\n const Mat dest = _dst.getMat();\n\n if (p == Point())\n {\n p.x = dest.size().width / 2;\n p.y = dest.size().height / 2;\n }", + "repo_full_name": "opencv/opencv", + "discussion_comments": [ + { + "comment_id": "1856167263", + "repo_full_name": "opencv/opencv", + "pr_number": 26511, + "pr_file": "modules/photo/src/seamless_cloning.cpp", + "discussion_id": "1856167263", + "commented_code": "@@ -68,17 +68,30 @@ void cv::seamlessClone(InputArray _src, InputArray _dst, InputArray _mask, Point\n {\n CV_INSTRUMENT_REGION();\n CV_Assert(!_src.empty());\n+ CV_Assert(!_dst.empty());\n \n const Mat src = _src.getMat();\n const Mat dest = _dst.getMat();\n+\n+ if (p == Point())\n+ {\n+ p.x = dest.size().width / 2;\n+ p.y = dest.size().height / 2;\n+ }", + "comment_created_at": "2024-11-25T08:56:04+00:00", + "comment_author": "asmorkalov", + "comment_body": "The `p` parameter does not have default value like `Point p = Point()`. It means that the point is user provided value. I do not think that we sound handle `Point()`, a.k.a. (0,0) as special case.", + "pr_file_module": null + }, + { + "comment_id": "1856216698", + "repo_full_name": "opencv/opencv", + "pr_number": 26511, + "pr_file": "modules/photo/src/seamless_cloning.cpp", + "discussion_id": "1856167263", + "commented_code": "@@ -68,17 +68,30 @@ void cv::seamlessClone(InputArray _src, InputArray _dst, InputArray _mask, Point\n {\n CV_INSTRUMENT_REGION();\n CV_Assert(!_src.empty());\n+ CV_Assert(!_dst.empty());\n \n const Mat src = _src.getMat();\n const Mat dest = _dst.getMat();\n+\n+ if (p == Point())\n+ {\n+ p.x = dest.size().width / 2;\n+ p.y = dest.size().height / 2;\n+ }", + "comment_created_at": "2024-11-25T09:25:31+00:00", + "comment_author": "sturkmen72", + "comment_body": "what about using Point(-1,-1) to centralize as special case ? \r\n", + "pr_file_module": null + }, + { + "comment_id": "1856333478", + "repo_full_name": "opencv/opencv", + "pr_number": 26511, + "pr_file": "modules/photo/src/seamless_cloning.cpp", + "discussion_id": "1856167263", + "commented_code": "@@ -68,17 +68,30 @@ void cv::seamlessClone(InputArray _src, InputArray _dst, InputArray _mask, Point\n {\n CV_INSTRUMENT_REGION();\n CV_Assert(!_src.empty());\n+ CV_Assert(!_dst.empty());\n \n const Mat src = _src.getMat();\n const Mat dest = _dst.getMat();\n+\n+ if (p == Point())\n+ {\n+ p.x = dest.size().width / 2;\n+ p.y = dest.size().height / 2;\n+ }", + "comment_created_at": "2024-11-25T10:30:20+00:00", + "comment_author": "asmorkalov", + "comment_body": "Why do we need special cases in code? User should provide location, shouldn't he/she?", + "pr_file_module": null + }, + { + "comment_id": "1856531315", + "repo_full_name": "opencv/opencv", + "pr_number": 26511, + "pr_file": "modules/photo/src/seamless_cloning.cpp", + "discussion_id": "1856167263", + "commented_code": "@@ -68,17 +68,30 @@ void cv::seamlessClone(InputArray _src, InputArray _dst, InputArray _mask, Point\n {\n CV_INSTRUMENT_REGION();\n CV_Assert(!_src.empty());\n+ CV_Assert(!_dst.empty());\n \n const Mat src = _src.getMat();\n const Mat dest = _dst.getMat();\n+\n+ if (p == Point())\n+ {\n+ p.x = dest.size().width / 2;\n+ p.y = dest.size().height / 2;\n+ }", + "comment_created_at": "2024-11-25T12:26:53+00:00", + "comment_author": "sturkmen72", + "comment_body": "\"In issue #15928, the user reports:\r\n\r\n>>I had:\r\n\r\n```\r\nsrc.shape = 500,500\r\nsrc_mask = 500,500\r\ndest.shape = 500,500\r\n\r\nh, w = src_mask.shape[:2]\r\n```\r\n>>At this point, I didn't see the need for the required argument `p`, as I expected the function to automatically match the mask. However, since `p` is a required argument, I provided `(w//2, h//2)` for `p` (the center of the mask). But instead of aligning the source image (`src`) into the destination (`dest`), the resulting cloning was shifted up to the center of the screen!\r\nIt seems like there may have been an issue with how the center point was calculated or used. The user expected the function to align the source (src) based on the mask (src_mask) automatically, but instead, the function seemed to shift the cloning to an unexpected location.\r\n\r\nPerhaps the logic for finding the center point could be improved or made more intuitive to help avoid confusion in such cases. A clearer method of determining the center, or perhaps some additional documentation on how to properly use the p argument, might help facilitate this process.\"", + "pr_file_module": null + }, + { + "comment_id": "1856553811", + "repo_full_name": "opencv/opencv", + "pr_number": 26511, + "pr_file": "modules/photo/src/seamless_cloning.cpp", + "discussion_id": "1856167263", + "commented_code": "@@ -68,17 +68,30 @@ void cv::seamlessClone(InputArray _src, InputArray _dst, InputArray _mask, Point\n {\n CV_INSTRUMENT_REGION();\n CV_Assert(!_src.empty());\n+ CV_Assert(!_dst.empty());\n \n const Mat src = _src.getMat();\n const Mat dest = _dst.getMat();\n+\n+ if (p == Point())\n+ {\n+ p.x = dest.size().width / 2;\n+ p.y = dest.size().height / 2;\n+ }", + "comment_created_at": "2024-11-25T12:43:22+00:00", + "comment_author": "sturkmen72", + "comment_body": "also in the issue the user commented \r\n>>I got the expected behaviour by using a rect of thickness 2\r\n\r\n```\r\nh, w = src_mask.shape[:2]\r\n# rect of thickness > 1\r\ncv2.rectangle(src_mask,(0,0), (w, h) ,255, 2)\r\n# the clone will now aligns from the centre\r\nout=cv2.seamlessClone(src, dest, src_mask,(w//2, h//2),flags= cv2.NORMAL_CLONE)\r\n```\r\na rect of 1 thickness did not work.\r\n\r\n", + "pr_file_module": null + }, + { + "comment_id": "1856554333", + "repo_full_name": "opencv/opencv", + "pr_number": 26511, + "pr_file": "modules/photo/src/seamless_cloning.cpp", + "discussion_id": "1856167263", + "commented_code": "@@ -68,17 +68,30 @@ void cv::seamlessClone(InputArray _src, InputArray _dst, InputArray _mask, Point\n {\n CV_INSTRUMENT_REGION();\n CV_Assert(!_src.empty());\n+ CV_Assert(!_dst.empty());\n \n const Mat src = _src.getMat();\n const Mat dest = _dst.getMat();\n+\n+ if (p == Point())\n+ {\n+ p.x = dest.size().width / 2;\n+ p.y = dest.size().height / 2;\n+ }", + "comment_created_at": "2024-11-25T12:43:44+00:00", + "comment_author": "sturkmen72", + "comment_body": "i will revert this idea if you want.", + "pr_file_module": null + }, + { + "comment_id": "1856639391", + "repo_full_name": "opencv/opencv", + "pr_number": 26511, + "pr_file": "modules/photo/src/seamless_cloning.cpp", + "discussion_id": "1856167263", + "commented_code": "@@ -68,17 +68,30 @@ void cv::seamlessClone(InputArray _src, InputArray _dst, InputArray _mask, Point\n {\n CV_INSTRUMENT_REGION();\n CV_Assert(!_src.empty());\n+ CV_Assert(!_dst.empty());\n \n const Mat src = _src.getMat();\n const Mat dest = _dst.getMat();\n+\n+ if (p == Point())\n+ {\n+ p.x = dest.size().width / 2;\n+ p.y = dest.size().height / 2;\n+ }", + "comment_created_at": "2024-11-25T13:42:54+00:00", + "comment_author": "sturkmen72", + "comment_body": "there is related issues on some repos like https://github.com/OpenTalker/SadTalker/issues/542\r\nlet me investigate if\r\n\r\n```\r\n {\r\n p.x = dest.size().width / 2;\r\n p.y = dest.size().height / 2;\r\n }\r\n```\r\nworks perfectly fine for different dimensions", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/opencv-clear-api-contracts.md b/_reviewers/opencv-clear-api-contracts.md index 91f5d78..674d20c 100644 --- a/_reviewers/opencv-clear-api-contracts.md +++ b/_reviewers/opencv-clear-api-contracts.md @@ -40,283 +40,3 @@ bool initializeContextFromVA(VADisplay display) { ``` Remember to document any special parameter handling. If your function treats certain values (like Point(0,0)) specially, make this explicit in both the function signature (with default parameters) and documentation. When extending existing APIs, maintain consistency with established patterns in the codebase. - - -[ - { - "discussion_id": "2176710492", - "pr_number": 27499, - "pr_file": "modules/imgcodecs/src/loadsave.cpp", - "created_at": "2025-07-01T07:51:35+00:00", - "commented_code": "*\n*/\nstatic bool\nimread_( const String& filename, int flags, OutputArray mat )\nimread_( const String& filename, int flags, OutputArray mat,\n std::vector* metadata_types, OutputArrayOfArrays metadata)\n{\n /// Search for the relevant decoder to handle the imagery\n ImageDecoder decoder;\n\n if (metadata_types)\n metadata_types->clear();\n\n#ifdef HAVE_GDAL", - "repo_full_name": "opencv/opencv", - "discussion_comments": [ - { - "comment_id": "2176710492", - "repo_full_name": "opencv/opencv", - "pr_number": 27499, - "pr_file": "modules/imgcodecs/src/loadsave.cpp", - "discussion_id": "2176710492", - "commented_code": "@@ -419,11 +475,15 @@ static void ApplyExifOrientation(ExifEntry_t orientationTag, OutputArray img)\n *\n */\n static bool\n-imread_( const String& filename, int flags, OutputArray mat )\n+imread_( const String& filename, int flags, OutputArray mat,\n+ std::vector* metadata_types, OutputArrayOfArrays metadata)\n {\n /// Search for the relevant decoder to handle the imagery\n ImageDecoder decoder;\n \n+ if (metadata_types)\n+ metadata_types->clear();\n+\n #ifdef HAVE_GDAL", - "comment_created_at": "2025-07-01T07:51:35+00:00", - "comment_author": "asmorkalov", - "comment_body": "Need a warning, that GDAL does not support metadata for now.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2091429332", - "pr_number": 27051, - "pr_file": "samples/cpp/color_correction_model.cpp", - "created_at": "2025-05-15T15:14:56+00:00", - "commented_code": "//! [tutorial]\n#include \n#include \n#include \n#include \n#include \n#include \n#include \n#include \n#include \"../dnn/common.hpp\"\n\nusing namespace std;\nusing namespace cv;\nusing namespace cv::dnn;\nusing namespace cv::ccm;\nusing namespace mcc;\n\nconst string about =\n \"This sample detects Macbeth color checker using DNN or thresholding and applies color correction.\"\n \"To run default:\\n\"\n \"\\t ./example_cpp_color_correction_model --input=path/to/your/input/image --query=path/to/your/query/image\\n\"\n \"With DNN model:\\n\"\n \"\\t ./example_cpp_color_correction_model mcc --input=path/to/your/input/image --query=path/to/your/query/image\\n\\n\"\n \"Using pre-computed CCM:\\n\"\n \"\\t ./example_cpp_color_correction_model mcc --ccm_file=path/to/ccm_output.yaml --query=path/to/your/query/image\\n\\n\"\n \"Model path can also be specified using --model argument. And config path can be specified using --config. Download it using python download_models.py mcc from dnn samples directory\\n\\n\";\n\nconst string param_keys =\n \"{ help h | | Print help message. }\"\n \"{ @alias | | An alias name of model to extract preprocessing parameters from models.yml file. }\"\n \"{ zoo | ../dnn/models.yml | An optional path to file with preprocessing parameters }\"\n \"{ input i | mcc_ccm_test.jpg | Path to input image for computing CCM.}\"\n \"{ query q | baboon.jpg | Path to query image to apply color correction. If not provided, input image will be used. }\"\n \"{ type | 0 | chartType: 0-Standard, 1-DigitalSG, 2-Vinyl }\"\n \"{ num_charts | 1 | Maximum number of charts in the image }\"\n \"{ ccm_file | | Path to YAML file containing pre-computed CCM parameters}\";\n\nconst string backend_keys = format(\n \"{ backend | default | Choose one of computation backends: \"\n \"default: automatically (by default), \"\n \"openvino: Intel's Deep Learning Inference Engine (https://software.intel.com/openvino-toolkit), \"\n \"opencv: OpenCV implementation, \"\n \"vkcom: VKCOM, \"\n \"cuda: CUDA, \"\n \"webnn: WebNN }\");\n\nconst string target_keys = format(\n \"{ target | cpu | Choose one of target computation devices: \"\n \"cpu: CPU target (by default), \"\n \"opencl: OpenCL, \"\n \"opencl_fp16: OpenCL fp16 (half-float precision), \"\n \"vpu: VPU, \"\n \"vulkan: Vulkan, \"\n \"cuda: CUDA, \"\n \"cuda_fp16: CUDA fp16 (half-float preprocess) }\");\n\nstring keys = param_keys + backend_keys + target_keys;\n\nstatic bool processFrame(const Mat& frame, Ptr detector, Mat& src, int nc){\n if (!detector->process(frame, nc))\n {\n return false;\n }\n vector> checkers = detector->getListColorChecker();\n src = checkers[0]->getChartsRGB(false);\n\n return true;\n}\n\nint main(int argc, char* argv[]) {\n CommandLineParser parser(argc, argv, keys);\n parser.about(about);\n\n if (parser.has(\"help\")) {\n cout << about << endl;\n parser.printMessage();\n return 0;\n }\n\n string modelName = parser.get(\"@alias\");\n string zooFile = parser.get(\"zoo\");\n const char* path = getenv(\"OPENCV_SAMPLES_DATA_PATH\");\n\n if ((path != NULL) || parser.has(\"@alias\")) {\n zooFile = findFile(zooFile);\n }\n else{\n cout<<\"[WARN] set the environment variables or pass the arguments --model, --config and models.yml file using --zoo for using dnn based detector. Continuing with default detector.\\n\\n\";\n }\n keys += genPreprocArguments(modelName, zooFile);\n parser = CommandLineParser(argc, argv, keys);\n\n int t = parser.get(\"type\");\n if (t < 0 || t > 2)\n {\n cout << \"Error: --type must be 0, 1 or 2\" << endl;\n parser.printMessage(); // prints full usage\n return -1;\n }\n\n ColorChart chartType = ColorChart(t);\n\n const string sha1 = parser.get(\"sha1\");\n const string modelPath = findModel(parser.get(\"model\"), sha1);\n const string config_sha1 = parser.get(\"config_sha1\");\n const string configPath = findModel(parser.get(\"config\"), config_sha1);\n const string backend = parser.get(\"backend\");\n const string target = parser.get(\"target\");\n\n int nc = parser.get(\"num_charts\");\n\n // Get input and target image paths\n const string inputFile = parser.get(\"input\");\n const string queryFile = parser.get(\"query\");\n const string ccmFile = parser.get(\"ccm_file\");\n\n if (!ccmFile.empty()) {\n // When ccm_file is provided, only query is required\n if (queryFile.empty()) {\n cout << \"Error: Query image path must be provided when using pre-computed CCM.\" << endl;\n parser.printMessage();\n return -1;\n }\n } else {\n // Original validation for when computing new CCM\n if (inputFile.empty()) {\n cout << \"Error: Input image path must be provided.\" << endl;\n parser.printMessage();\n return -1;\n }\n }\n\n ColorCorrectionModel model;\n Mat queryImage;\n\n if (!ccmFile.empty()) {\n // Load CCM from YAML file\n FileStorage fs(ccmFile, FileStorage::READ);\n if (!fs.isOpened()) {\n cout << \"Error: Unable to open CCM file: \" << ccmFile << endl;\n return -1;\n }\n model.read(fs[\"ColorCorrectionModel\"]);\n fs.release();\n cout << \"Loaded CCM from file: \" << ccmFile << endl;\n\n // Read query image when using pre-computed CCM\n queryImage = imread(findFile(queryFile));\n if (queryImage.empty()) {\n cout << \"Error: Unable to read query image.\" << endl;\n return -1;\n }\n } else {\n // Read input image for computing new CCM\n Mat originalImage = imread(findFile(inputFile));\n if (originalImage.empty()) {\n cout << \"Error: Unable to read input image.\" << endl;\n return -1;\n }\n\n // Process first image to compute CCM\n Mat image = originalImage.clone();\n Mat src;\n\n Ptr detector;\n if (!modelPath.empty() && !configPath.empty()) {\n Net net = readNetFromTensorflow(modelPath, configPath);\n net.setPreferableBackend(getBackendID(backend));\n net.setPreferableTarget(getTargetID(target));\n detector = CCheckerDetector::create(net);\n cout << \"Using DNN-based checker detector.\" << endl;\n } else {\n detector = CCheckerDetector::create();\n cout << \"Using thresholding-based checker detector.\" << endl;\n }\n detector->setColorChartType(chartType);\n\n if (!processFrame(image, detector, src, nc)) {\n cout << \"No chart detected in the input image!\" << endl;\n return -1;\n }\n // Convert to double and normalize\n src.convertTo(src, CV_64F, 1.0/255.0);\n\n // Color correction model\n model = ColorCorrectionModel(src, COLORCHECKER_MACBETH);\n model.setCcmType(CCM_LINEAR);\n model.setDistance(DISTANCE_CIE2000);\n model.setLinearization(LINEARIZATION_GAMMA);\n model.setLinearizationGamma(2.2);\n\n Mat ccm = model.compute();\n cout << \"Computed CCM Matrix:\\n\" << ccm << endl;\n cout << \"Loss: \" << model.getLoss() << endl;\n\n // Save model parameters to YAML file\n FileStorage fs(\"ccm_output.yaml\", FileStorage::WRITE);\n model.write(fs);\n fs.release();\n cout << \"Model parameters saved to ccm_output.yaml\" << endl;\n\n // Set query image for correction\n if (queryFile.empty()) {\n cout << \"[WARN] No query image provided, applying color correction on input image\" << endl;\n queryImage = originalImage.clone();\n } else {\n queryImage = imread(findFile(queryFile));\n if (queryImage.empty()) {\n cout << \"Error: Unable to read query image.\" << endl;\n return -1;\n }\n }\n }\n\n // Apply correction to query image\n Mat calibratedImage, normalizedImage;\n cvtColor(queryImage, normalizedImage, COLOR_BGR2RGB);\n normalizedImage.convertTo(normalizedImage, CV_64F, 1.0/255.0); // Convert to double and normalize\n model.correctImage(normalizedImage, calibratedImage);", - "repo_full_name": "opencv/opencv", - "discussion_comments": [ - { - "comment_id": "2091429332", - "repo_full_name": "opencv/opencv", - "pr_number": 27051, - "pr_file": "samples/cpp/color_correction_model.cpp", - "discussion_id": "2091429332", - "commented_code": "@@ -0,0 +1,232 @@\n+//! [tutorial]\n+#include \n+#include \n+#include \n+#include \n+#include \n+#include \n+#include \n+#include \n+#include \"../dnn/common.hpp\"\n+\n+using namespace std;\n+using namespace cv;\n+using namespace cv::dnn;\n+using namespace cv::ccm;\n+using namespace mcc;\n+\n+const string about =\n+ \"This sample detects Macbeth color checker using DNN or thresholding and applies color correction.\"\n+ \"To run default:\\n\"\n+ \"\\t ./example_cpp_color_correction_model --input=path/to/your/input/image --query=path/to/your/query/image\\n\"\n+ \"With DNN model:\\n\"\n+ \"\\t ./example_cpp_color_correction_model mcc --input=path/to/your/input/image --query=path/to/your/query/image\\n\\n\"\n+ \"Using pre-computed CCM:\\n\"\n+ \"\\t ./example_cpp_color_correction_model mcc --ccm_file=path/to/ccm_output.yaml --query=path/to/your/query/image\\n\\n\"\n+ \"Model path can also be specified using --model argument. And config path can be specified using --config. Download it using python download_models.py mcc from dnn samples directory\\n\\n\";\n+\n+const string param_keys =\n+ \"{ help h | | Print help message. }\"\n+ \"{ @alias | | An alias name of model to extract preprocessing parameters from models.yml file. }\"\n+ \"{ zoo | ../dnn/models.yml | An optional path to file with preprocessing parameters }\"\n+ \"{ input i | mcc_ccm_test.jpg | Path to input image for computing CCM.}\"\n+ \"{ query q | baboon.jpg | Path to query image to apply color correction. If not provided, input image will be used. }\"\n+ \"{ type | 0 | chartType: 0-Standard, 1-DigitalSG, 2-Vinyl }\"\n+ \"{ num_charts | 1 | Maximum number of charts in the image }\"\n+ \"{ ccm_file | | Path to YAML file containing pre-computed CCM parameters}\";\n+\n+const string backend_keys = format(\n+ \"{ backend | default | Choose one of computation backends: \"\n+ \"default: automatically (by default), \"\n+ \"openvino: Intel's Deep Learning Inference Engine (https://software.intel.com/openvino-toolkit), \"\n+ \"opencv: OpenCV implementation, \"\n+ \"vkcom: VKCOM, \"\n+ \"cuda: CUDA, \"\n+ \"webnn: WebNN }\");\n+\n+const string target_keys = format(\n+ \"{ target | cpu | Choose one of target computation devices: \"\n+ \"cpu: CPU target (by default), \"\n+ \"opencl: OpenCL, \"\n+ \"opencl_fp16: OpenCL fp16 (half-float precision), \"\n+ \"vpu: VPU, \"\n+ \"vulkan: Vulkan, \"\n+ \"cuda: CUDA, \"\n+ \"cuda_fp16: CUDA fp16 (half-float preprocess) }\");\n+\n+string keys = param_keys + backend_keys + target_keys;\n+\n+static bool processFrame(const Mat& frame, Ptr detector, Mat& src, int nc){\n+ if (!detector->process(frame, nc))\n+ {\n+ return false;\n+ }\n+ vector> checkers = detector->getListColorChecker();\n+ src = checkers[0]->getChartsRGB(false);\n+\n+ return true;\n+}\n+\n+int main(int argc, char* argv[]) {\n+ CommandLineParser parser(argc, argv, keys);\n+ parser.about(about);\n+\n+ if (parser.has(\"help\")) {\n+ cout << about << endl;\n+ parser.printMessage();\n+ return 0;\n+ }\n+\n+ string modelName = parser.get(\"@alias\");\n+ string zooFile = parser.get(\"zoo\");\n+ const char* path = getenv(\"OPENCV_SAMPLES_DATA_PATH\");\n+\n+ if ((path != NULL) || parser.has(\"@alias\")) {\n+ zooFile = findFile(zooFile);\n+ }\n+ else{\n+ cout<<\"[WARN] set the environment variables or pass the arguments --model, --config and models.yml file using --zoo for using dnn based detector. Continuing with default detector.\\n\\n\";\n+ }\n+ keys += genPreprocArguments(modelName, zooFile);\n+ parser = CommandLineParser(argc, argv, keys);\n+\n+ int t = parser.get(\"type\");\n+ if (t < 0 || t > 2)\n+ {\n+ cout << \"Error: --type must be 0, 1 or 2\" << endl;\n+ parser.printMessage(); // prints full usage\n+ return -1;\n+ }\n+\n+ ColorChart chartType = ColorChart(t);\n+\n+ const string sha1 = parser.get(\"sha1\");\n+ const string modelPath = findModel(parser.get(\"model\"), sha1);\n+ const string config_sha1 = parser.get(\"config_sha1\");\n+ const string configPath = findModel(parser.get(\"config\"), config_sha1);\n+ const string backend = parser.get(\"backend\");\n+ const string target = parser.get(\"target\");\n+\n+ int nc = parser.get(\"num_charts\");\n+\n+ // Get input and target image paths\n+ const string inputFile = parser.get(\"input\");\n+ const string queryFile = parser.get(\"query\");\n+ const string ccmFile = parser.get(\"ccm_file\");\n+\n+ if (!ccmFile.empty()) {\n+ // When ccm_file is provided, only query is required\n+ if (queryFile.empty()) {\n+ cout << \"Error: Query image path must be provided when using pre-computed CCM.\" << endl;\n+ parser.printMessage();\n+ return -1;\n+ }\n+ } else {\n+ // Original validation for when computing new CCM\n+ if (inputFile.empty()) {\n+ cout << \"Error: Input image path must be provided.\" << endl;\n+ parser.printMessage();\n+ return -1;\n+ }\n+ }\n+\n+ ColorCorrectionModel model;\n+ Mat queryImage;\n+\n+ if (!ccmFile.empty()) {\n+ // Load CCM from YAML file\n+ FileStorage fs(ccmFile, FileStorage::READ);\n+ if (!fs.isOpened()) {\n+ cout << \"Error: Unable to open CCM file: \" << ccmFile << endl;\n+ return -1;\n+ }\n+ model.read(fs[\"ColorCorrectionModel\"]);\n+ fs.release();\n+ cout << \"Loaded CCM from file: \" << ccmFile << endl;\n+\n+ // Read query image when using pre-computed CCM\n+ queryImage = imread(findFile(queryFile));\n+ if (queryImage.empty()) {\n+ cout << \"Error: Unable to read query image.\" << endl;\n+ return -1;\n+ }\n+ } else {\n+ // Read input image for computing new CCM\n+ Mat originalImage = imread(findFile(inputFile));\n+ if (originalImage.empty()) {\n+ cout << \"Error: Unable to read input image.\" << endl;\n+ return -1;\n+ }\n+\n+ // Process first image to compute CCM\n+ Mat image = originalImage.clone();\n+ Mat src;\n+\n+ Ptr detector;\n+ if (!modelPath.empty() && !configPath.empty()) {\n+ Net net = readNetFromTensorflow(modelPath, configPath);\n+ net.setPreferableBackend(getBackendID(backend));\n+ net.setPreferableTarget(getTargetID(target));\n+ detector = CCheckerDetector::create(net);\n+ cout << \"Using DNN-based checker detector.\" << endl;\n+ } else {\n+ detector = CCheckerDetector::create();\n+ cout << \"Using thresholding-based checker detector.\" << endl;\n+ }\n+ detector->setColorChartType(chartType);\n+\n+ if (!processFrame(image, detector, src, nc)) {\n+ cout << \"No chart detected in the input image!\" << endl;\n+ return -1;\n+ }\n+ // Convert to double and normalize\n+ src.convertTo(src, CV_64F, 1.0/255.0);\n+\n+ // Color correction model\n+ model = ColorCorrectionModel(src, COLORCHECKER_MACBETH);\n+ model.setCcmType(CCM_LINEAR);\n+ model.setDistance(DISTANCE_CIE2000);\n+ model.setLinearization(LINEARIZATION_GAMMA);\n+ model.setLinearizationGamma(2.2);\n+\n+ Mat ccm = model.compute();\n+ cout << \"Computed CCM Matrix:\\n\" << ccm << endl;\n+ cout << \"Loss: \" << model.getLoss() << endl;\n+\n+ // Save model parameters to YAML file\n+ FileStorage fs(\"ccm_output.yaml\", FileStorage::WRITE);\n+ model.write(fs);\n+ fs.release();\n+ cout << \"Model parameters saved to ccm_output.yaml\" << endl;\n+\n+ // Set query image for correction\n+ if (queryFile.empty()) {\n+ cout << \"[WARN] No query image provided, applying color correction on input image\" << endl;\n+ queryImage = originalImage.clone();\n+ } else {\n+ queryImage = imread(findFile(queryFile));\n+ if (queryImage.empty()) {\n+ cout << \"Error: Unable to read query image.\" << endl;\n+ return -1;\n+ }\n+ }\n+ }\n+\n+ // Apply correction to query image\n+ Mat calibratedImage, normalizedImage;\n+ cvtColor(queryImage, normalizedImage, COLOR_BGR2RGB);\n+ normalizedImage.convertTo(normalizedImage, CV_64F, 1.0/255.0); // Convert to double and normalize\n+ model.correctImage(normalizedImage, calibratedImage);", - "comment_created_at": "2025-05-15T15:14:56+00:00", - "comment_author": "asmorkalov", - "comment_body": "I propose to Move BRG/RGB logic into model with a flag (default to bgr). All OpenCV functions are designed for BGR. The flag allows to make optimizations without API change.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1022288868", - "pr_number": 22780, - "pr_file": "modules/core/src/va_intel.cpp", - "created_at": "2022-11-15T03:49:31+00:00", - "commented_code": "#else // !HAVE_VA\n\n# ifdef HAVE_VA_INTEL\n if (tryInterop)\n\n Context& va_ctx = OpenCLExecutionContext::getCurrent().getContext();\n VAAPIInterop* interop = va_ctx.getUserContext().get();\n if (tryInterop && interop == NULL)", - "repo_full_name": "opencv/opencv", - "discussion_comments": [ - { - "comment_id": "1022288868", - "repo_full_name": "opencv/opencv", - "pr_number": 22780, - "pr_file": "modules/core/src/va_intel.cpp", - "discussion_id": "1022288868", - "commented_code": "@@ -85,7 +85,10 @@ Context& initializeContextFromVA(VADisplay display, bool tryInterop)\n #else // !HAVE_VA\n \n # ifdef HAVE_VA_INTEL\n- if (tryInterop)\n+\n+ Context& va_ctx = OpenCLExecutionContext::getCurrent().getContext();\n+ VAAPIInterop* interop = va_ctx.getUserContext().get();\n+ if (tryInterop && interop == NULL)", - "comment_created_at": "2022-11-15T03:49:31+00:00", - "comment_author": "alalek", - "comment_body": "Function is called `initializeContextFromVA()`.\r\nBut implementation of this function is confusing (see below).\r\n\r\nExpected result is OpenCL context with specified `VADisplay display`.\r\nReturning result with **another** `display` is wrong.\r\nFallback paths (and `tryInterop` parameter) should not be there. Error should be raised instead (actually one fallback do that through `NO_VA_SUPPORT_ERROR`). `tryInterop` parameter should be dropped too.\r\n\r\nReference: `initializeContextFromD3D11Device()`\r\n\r\nRelates #5272 (and https://github.com/opencv/opencv/pull/5272#discussion_r38926396 is correct and we a have design problem here)", - "pr_file_module": null - }, - { - "comment_id": "1024888786", - "repo_full_name": "opencv/opencv", - "pr_number": 22780, - "pr_file": "modules/core/src/va_intel.cpp", - "discussion_id": "1022288868", - "commented_code": "@@ -85,7 +85,10 @@ Context& initializeContextFromVA(VADisplay display, bool tryInterop)\n #else // !HAVE_VA\n \n # ifdef HAVE_VA_INTEL\n- if (tryInterop)\n+\n+ Context& va_ctx = OpenCLExecutionContext::getCurrent().getContext();\n+ VAAPIInterop* interop = va_ctx.getUserContext().get();\n+ if (tryInterop && interop == NULL)", - "comment_created_at": "2022-11-17T08:32:54+00:00", - "comment_author": "kallaballa", - "comment_body": "I think I understand. I am on it", - "pr_file_module": null - }, - { - "comment_id": "1027130746", - "repo_full_name": "opencv/opencv", - "pr_number": 22780, - "pr_file": "modules/core/src/va_intel.cpp", - "discussion_id": "1022288868", - "commented_code": "@@ -85,7 +85,10 @@ Context& initializeContextFromVA(VADisplay display, bool tryInterop)\n #else // !HAVE_VA\n \n # ifdef HAVE_VA_INTEL\n- if (tryInterop)\n+\n+ Context& va_ctx = OpenCLExecutionContext::getCurrent().getContext();\n+ VAAPIInterop* interop = va_ctx.getUserContext().get();\n+ if (tryInterop && interop == NULL)", - "comment_created_at": "2022-11-19T19:12:26+00:00", - "comment_author": "kallaballa", - "comment_body": "But how to handle VA (non-Intel)? Is it correct to return a context without interop?", - "pr_file_module": null - }, - { - "comment_id": "1027131155", - "repo_full_name": "opencv/opencv", - "pr_number": 22780, - "pr_file": "modules/core/src/va_intel.cpp", - "discussion_id": "1022288868", - "commented_code": "@@ -85,7 +85,10 @@ Context& initializeContextFromVA(VADisplay display, bool tryInterop)\n #else // !HAVE_VA\n \n # ifdef HAVE_VA_INTEL\n- if (tryInterop)\n+\n+ Context& va_ctx = OpenCLExecutionContext::getCurrent().getContext();\n+ VAAPIInterop* interop = va_ctx.getUserContext().get();\n+ if (tryInterop && interop == NULL)", - "comment_created_at": "2022-11-19T19:16:53+00:00", - "comment_author": "kallaballa", - "comment_body": "Also. what happens if it is called multiple times and the context is already with VADisplay? I am asking specifically because ```initializeContextFromVA()``` is called from withing VideoWriter and VideoCapture.\r\n", - "pr_file_module": null - }, - { - "comment_id": "1027131595", - "repo_full_name": "opencv/opencv", - "pr_number": 22780, - "pr_file": "modules/core/src/va_intel.cpp", - "discussion_id": "1022288868", - "commented_code": "@@ -85,7 +85,10 @@ Context& initializeContextFromVA(VADisplay display, bool tryInterop)\n #else // !HAVE_VA\n \n # ifdef HAVE_VA_INTEL\n- if (tryInterop)\n+\n+ Context& va_ctx = OpenCLExecutionContext::getCurrent().getContext();\n+ VAAPIInterop* interop = va_ctx.getUserContext().get();\n+ if (tryInterop && interop == NULL)", - "comment_created_at": "2022-11-19T19:21:13+00:00", - "comment_author": "kallaballa", - "comment_body": "I myself have a proposal for what context handling should maybe look like based on my [4.x fork](https://github.com/kallaballa/opencv/tree/GCV): https://github.com/kallaballa/GCV/blob/main/src/video/video-demo.cpp\r\n\r\nIt basically creates a context for each interop-ability (OpenGL/VAAPI) copies them and when they are needed it binds them. So only one ```initializeContextFromVA()```", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1889809784", - "pr_number": 25584, - "pr_file": "modules/videoio/src/backend_plugin.cpp", - "created_at": "2024-12-18T08:23:40+00:00", - "commented_code": "return Ptr();\n}\n\nPtr PluginBackend::createCapture(std::streambuf& source, const VideoCaptureParameters& params) const", - "repo_full_name": "opencv/opencv", - "discussion_comments": [ - { - "comment_id": "1889809784", - "repo_full_name": "opencv/opencv", - "pr_number": 25584, - "pr_file": "modules/videoio/src/backend_plugin.cpp", - "discussion_id": "1889809784", - "commented_code": "@@ -704,6 +731,25 @@ Ptr PluginBackend::createCapture(const std::string &filename, con\n return Ptr();\n }\n \n+Ptr PluginBackend::createCapture(std::streambuf& source, const VideoCaptureParameters& params) const", - "comment_created_at": "2024-12-18T08:23:40+00:00", - "comment_author": "opencv-alalek", - "comment_body": "No need to use `std::streambuf` anywhere in API.\r\nWe need to use internally just simplified class which provides seek/read **interface**.\r\n\r\nPublic API should have simple converter from this.", - "pr_file_module": null - }, - { - "comment_id": "1895328978", - "repo_full_name": "opencv/opencv", - "pr_number": 25584, - "pr_file": "modules/videoio/src/backend_plugin.cpp", - "discussion_id": "1889809784", - "commented_code": "@@ -704,6 +731,25 @@ Ptr PluginBackend::createCapture(const std::string &filename, con\n return Ptr();\n }\n \n+Ptr PluginBackend::createCapture(std::streambuf& source, const VideoCaptureParameters& params) const", - "comment_created_at": "2024-12-23T06:56:59+00:00", - "comment_author": "dkurt", - "comment_body": "Thanks for the review. Added a simplified interface, but it\u2019s private for internal conversion.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1895741926", - "pr_number": 25584, - "pr_file": "modules/videoio/src/cap.cpp", - "created_at": "2024-12-23T13:13:30+00:00", - "commented_code": "return false;\n}\n\nbool VideoCapture::open(std::streambuf& stream, int apiPreference, const std::vector& params)\n{\n CV_INSTRUMENT_REGION();\n\n if (isOpened())\n {\n release();\n }\n\n const VideoCaptureParameters parameters(params);\n const std::vector backends = cv::videoio_registry::getAvailableBackends_CaptureByBuffer();\n for (size_t i = 0; i < backends.size(); i++)\n {\n const VideoBackendInfo& info = backends[i];\n if (apiPreference == CAP_ANY || apiPreference == info.id)\n {\n if (!info.backendFactory)\n {\n CV_LOG_DEBUG(NULL, \"VIDEOIO(\" << info.name << \"): factory is not available (plugins require filesystem support)\");\n continue;\n }\n CV_CAPTURE_LOG_DEBUG(NULL,\n cv::format(\"VIDEOIO(%s): trying capture buffer ...\",\n info.name));\n CV_Assert(!info.backendFactory.empty());\n const Ptr backend = info.backendFactory->getBackend();\n if (!backend.empty())\n {\n try\n {\n icap = backend->createCapture(StreambufReadStream::create(stream), parameters);", - "repo_full_name": "opencv/opencv", - "discussion_comments": [ - { - "comment_id": "1895741926", - "repo_full_name": "opencv/opencv", - "pr_number": 25584, - "pr_file": "modules/videoio/src/cap.cpp", - "discussion_id": "1895741926", - "commented_code": "@@ -228,6 +235,128 @@ bool VideoCapture::open(const String& filename, int apiPreference, const std::ve\n return false;\n }\n \n+bool VideoCapture::open(std::streambuf& stream, int apiPreference, const std::vector& params)\n+{\n+ CV_INSTRUMENT_REGION();\n+\n+ if (isOpened())\n+ {\n+ release();\n+ }\n+\n+ const VideoCaptureParameters parameters(params);\n+ const std::vector backends = cv::videoio_registry::getAvailableBackends_CaptureByBuffer();\n+ for (size_t i = 0; i < backends.size(); i++)\n+ {\n+ const VideoBackendInfo& info = backends[i];\n+ if (apiPreference == CAP_ANY || apiPreference == info.id)\n+ {\n+ if (!info.backendFactory)\n+ {\n+ CV_LOG_DEBUG(NULL, \"VIDEOIO(\" << info.name << \"): factory is not available (plugins require filesystem support)\");\n+ continue;\n+ }\n+ CV_CAPTURE_LOG_DEBUG(NULL,\n+ cv::format(\"VIDEOIO(%s): trying capture buffer ...\",\n+ info.name));\n+ CV_Assert(!info.backendFactory.empty());\n+ const Ptr backend = info.backendFactory->getBackend();\n+ if (!backend.empty())\n+ {\n+ try\n+ {\n+ icap = backend->createCapture(StreambufReadStream::create(stream), parameters);", - "comment_created_at": "2024-12-23T13:13:30+00:00", - "comment_author": "opencv-alalek", - "comment_body": "Who performs `seek(0, 0)` between trials?\r\n\r\nPerhaps we should not allow `CAP_ANY` in this API.", - "pr_file_module": null - }, - { - "comment_id": "1896163255", - "repo_full_name": "opencv/opencv", - "pr_number": 25584, - "pr_file": "modules/videoio/src/cap.cpp", - "discussion_id": "1895741926", - "commented_code": "@@ -228,6 +235,128 @@ bool VideoCapture::open(const String& filename, int apiPreference, const std::ve\n return false;\n }\n \n+bool VideoCapture::open(std::streambuf& stream, int apiPreference, const std::vector& params)\n+{\n+ CV_INSTRUMENT_REGION();\n+\n+ if (isOpened())\n+ {\n+ release();\n+ }\n+\n+ const VideoCaptureParameters parameters(params);\n+ const std::vector backends = cv::videoio_registry::getAvailableBackends_CaptureByBuffer();\n+ for (size_t i = 0; i < backends.size(); i++)\n+ {\n+ const VideoBackendInfo& info = backends[i];\n+ if (apiPreference == CAP_ANY || apiPreference == info.id)\n+ {\n+ if (!info.backendFactory)\n+ {\n+ CV_LOG_DEBUG(NULL, \"VIDEOIO(\" << info.name << \"): factory is not available (plugins require filesystem support)\");\n+ continue;\n+ }\n+ CV_CAPTURE_LOG_DEBUG(NULL,\n+ cv::format(\"VIDEOIO(%s): trying capture buffer ...\",\n+ info.name));\n+ CV_Assert(!info.backendFactory.empty());\n+ const Ptr backend = info.backendFactory->getBackend();\n+ if (!backend.empty())\n+ {\n+ try\n+ {\n+ icap = backend->createCapture(StreambufReadStream::create(stream), parameters);", - "comment_created_at": "2024-12-23T22:18:46+00:00", - "comment_author": "dkurt", - "comment_body": "Good point, there is no explicit seek. Added a check that `apiPref != CAP_ANY`", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1856167263", - "pr_number": 26511, - "pr_file": "modules/photo/src/seamless_cloning.cpp", - "created_at": "2024-11-25T08:56:04+00:00", - "commented_code": "{\n CV_INSTRUMENT_REGION();\n CV_Assert(!_src.empty());\n CV_Assert(!_dst.empty());\n\n const Mat src = _src.getMat();\n const Mat dest = _dst.getMat();\n\n if (p == Point())\n {\n p.x = dest.size().width / 2;\n p.y = dest.size().height / 2;\n }", - "repo_full_name": "opencv/opencv", - "discussion_comments": [ - { - "comment_id": "1856167263", - "repo_full_name": "opencv/opencv", - "pr_number": 26511, - "pr_file": "modules/photo/src/seamless_cloning.cpp", - "discussion_id": "1856167263", - "commented_code": "@@ -68,17 +68,30 @@ void cv::seamlessClone(InputArray _src, InputArray _dst, InputArray _mask, Point\n {\n CV_INSTRUMENT_REGION();\n CV_Assert(!_src.empty());\n+ CV_Assert(!_dst.empty());\n \n const Mat src = _src.getMat();\n const Mat dest = _dst.getMat();\n+\n+ if (p == Point())\n+ {\n+ p.x = dest.size().width / 2;\n+ p.y = dest.size().height / 2;\n+ }", - "comment_created_at": "2024-11-25T08:56:04+00:00", - "comment_author": "asmorkalov", - "comment_body": "The `p` parameter does not have default value like `Point p = Point()`. It means that the point is user provided value. I do not think that we sound handle `Point()`, a.k.a. (0,0) as special case.", - "pr_file_module": null - }, - { - "comment_id": "1856216698", - "repo_full_name": "opencv/opencv", - "pr_number": 26511, - "pr_file": "modules/photo/src/seamless_cloning.cpp", - "discussion_id": "1856167263", - "commented_code": "@@ -68,17 +68,30 @@ void cv::seamlessClone(InputArray _src, InputArray _dst, InputArray _mask, Point\n {\n CV_INSTRUMENT_REGION();\n CV_Assert(!_src.empty());\n+ CV_Assert(!_dst.empty());\n \n const Mat src = _src.getMat();\n const Mat dest = _dst.getMat();\n+\n+ if (p == Point())\n+ {\n+ p.x = dest.size().width / 2;\n+ p.y = dest.size().height / 2;\n+ }", - "comment_created_at": "2024-11-25T09:25:31+00:00", - "comment_author": "sturkmen72", - "comment_body": "what about using Point(-1,-1) to centralize as special case ? \r\n", - "pr_file_module": null - }, - { - "comment_id": "1856333478", - "repo_full_name": "opencv/opencv", - "pr_number": 26511, - "pr_file": "modules/photo/src/seamless_cloning.cpp", - "discussion_id": "1856167263", - "commented_code": "@@ -68,17 +68,30 @@ void cv::seamlessClone(InputArray _src, InputArray _dst, InputArray _mask, Point\n {\n CV_INSTRUMENT_REGION();\n CV_Assert(!_src.empty());\n+ CV_Assert(!_dst.empty());\n \n const Mat src = _src.getMat();\n const Mat dest = _dst.getMat();\n+\n+ if (p == Point())\n+ {\n+ p.x = dest.size().width / 2;\n+ p.y = dest.size().height / 2;\n+ }", - "comment_created_at": "2024-11-25T10:30:20+00:00", - "comment_author": "asmorkalov", - "comment_body": "Why do we need special cases in code? User should provide location, shouldn't he/she?", - "pr_file_module": null - }, - { - "comment_id": "1856531315", - "repo_full_name": "opencv/opencv", - "pr_number": 26511, - "pr_file": "modules/photo/src/seamless_cloning.cpp", - "discussion_id": "1856167263", - "commented_code": "@@ -68,17 +68,30 @@ void cv::seamlessClone(InputArray _src, InputArray _dst, InputArray _mask, Point\n {\n CV_INSTRUMENT_REGION();\n CV_Assert(!_src.empty());\n+ CV_Assert(!_dst.empty());\n \n const Mat src = _src.getMat();\n const Mat dest = _dst.getMat();\n+\n+ if (p == Point())\n+ {\n+ p.x = dest.size().width / 2;\n+ p.y = dest.size().height / 2;\n+ }", - "comment_created_at": "2024-11-25T12:26:53+00:00", - "comment_author": "sturkmen72", - "comment_body": "\"In issue #15928, the user reports:\r\n\r\n>>I had:\r\n\r\n```\r\nsrc.shape = 500,500\r\nsrc_mask = 500,500\r\ndest.shape = 500,500\r\n\r\nh, w = src_mask.shape[:2]\r\n```\r\n>>At this point, I didn't see the need for the required argument `p`, as I expected the function to automatically match the mask. However, since `p` is a required argument, I provided `(w//2, h//2)` for `p` (the center of the mask). But instead of aligning the source image (`src`) into the destination (`dest`), the resulting cloning was shifted up to the center of the screen!\r\nIt seems like there may have been an issue with how the center point was calculated or used. The user expected the function to align the source (src) based on the mask (src_mask) automatically, but instead, the function seemed to shift the cloning to an unexpected location.\r\n\r\nPerhaps the logic for finding the center point could be improved or made more intuitive to help avoid confusion in such cases. A clearer method of determining the center, or perhaps some additional documentation on how to properly use the p argument, might help facilitate this process.\"", - "pr_file_module": null - }, - { - "comment_id": "1856553811", - "repo_full_name": "opencv/opencv", - "pr_number": 26511, - "pr_file": "modules/photo/src/seamless_cloning.cpp", - "discussion_id": "1856167263", - "commented_code": "@@ -68,17 +68,30 @@ void cv::seamlessClone(InputArray _src, InputArray _dst, InputArray _mask, Point\n {\n CV_INSTRUMENT_REGION();\n CV_Assert(!_src.empty());\n+ CV_Assert(!_dst.empty());\n \n const Mat src = _src.getMat();\n const Mat dest = _dst.getMat();\n+\n+ if (p == Point())\n+ {\n+ p.x = dest.size().width / 2;\n+ p.y = dest.size().height / 2;\n+ }", - "comment_created_at": "2024-11-25T12:43:22+00:00", - "comment_author": "sturkmen72", - "comment_body": "also in the issue the user commented \r\n>>I got the expected behaviour by using a rect of thickness 2\r\n\r\n```\r\nh, w = src_mask.shape[:2]\r\n# rect of thickness > 1\r\ncv2.rectangle(src_mask,(0,0), (w, h) ,255, 2)\r\n# the clone will now aligns from the centre\r\nout=cv2.seamlessClone(src, dest, src_mask,(w//2, h//2),flags= cv2.NORMAL_CLONE)\r\n```\r\na rect of 1 thickness did not work.\r\n\r\n", - "pr_file_module": null - }, - { - "comment_id": "1856554333", - "repo_full_name": "opencv/opencv", - "pr_number": 26511, - "pr_file": "modules/photo/src/seamless_cloning.cpp", - "discussion_id": "1856167263", - "commented_code": "@@ -68,17 +68,30 @@ void cv::seamlessClone(InputArray _src, InputArray _dst, InputArray _mask, Point\n {\n CV_INSTRUMENT_REGION();\n CV_Assert(!_src.empty());\n+ CV_Assert(!_dst.empty());\n \n const Mat src = _src.getMat();\n const Mat dest = _dst.getMat();\n+\n+ if (p == Point())\n+ {\n+ p.x = dest.size().width / 2;\n+ p.y = dest.size().height / 2;\n+ }", - "comment_created_at": "2024-11-25T12:43:44+00:00", - "comment_author": "sturkmen72", - "comment_body": "i will revert this idea if you want.", - "pr_file_module": null - }, - { - "comment_id": "1856639391", - "repo_full_name": "opencv/opencv", - "pr_number": 26511, - "pr_file": "modules/photo/src/seamless_cloning.cpp", - "discussion_id": "1856167263", - "commented_code": "@@ -68,17 +68,30 @@ void cv::seamlessClone(InputArray _src, InputArray _dst, InputArray _mask, Point\n {\n CV_INSTRUMENT_REGION();\n CV_Assert(!_src.empty());\n+ CV_Assert(!_dst.empty());\n \n const Mat src = _src.getMat();\n const Mat dest = _dst.getMat();\n+\n+ if (p == Point())\n+ {\n+ p.x = dest.size().width / 2;\n+ p.y = dest.size().height / 2;\n+ }", - "comment_created_at": "2024-11-25T13:42:54+00:00", - "comment_author": "sturkmen72", - "comment_body": "there is related issues on some repos like https://github.com/OpenTalker/SadTalker/issues/542\r\nlet me investigate if\r\n\r\n```\r\n {\r\n p.x = dest.size().width / 2;\r\n p.y = dest.size().height / 2;\r\n }\r\n```\r\nworks perfectly fine for different dimensions", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/opencv-code-for-readability.json b/_reviewers/opencv-code-for-readability.json new file mode 100644 index 0000000..f658d45 --- /dev/null +++ b/_reviewers/opencv-code-for-readability.json @@ -0,0 +1,360 @@ +[ + { + "discussion_id": "2172502541", + "pr_number": 27460, + "pr_file": "modules/gapi/src/backends/ov/govbackend.cpp", + "created_at": "2025-06-27T17:15:47+00:00", + "commented_code": "const std::vector &nodes)\n : m_g(g), m_gm(m_g) {\n\n if(cv::gapi::getCompileArg>(compileArgs).has_value()) {", + "repo_full_name": "opencv/opencv", + "discussion_comments": [ + { + "comment_id": "2172502541", + "repo_full_name": "opencv/opencv", + "pr_number": 27460, + "pr_file": "modules/gapi/src/backends/ov/govbackend.cpp", + "discussion_id": "2172502541", + "commented_code": "@@ -1542,6 +1542,10 @@ cv::gimpl::ov::GOVExecutable::GOVExecutable(const ade::Graph &g,\n const std::vector &nodes)\n : m_g(g), m_gm(m_g) {\n \n+ if(cv::gapi::getCompileArg>(compileArgs).has_value()) {", + "comment_created_at": "2025-06-27T17:15:47+00:00", + "comment_author": "AsyaPronina", + "comment_body": "Better to split this check for two lines: to retrieve compile arg and then check if it `has_value()`", + "pr_file_module": null + }, + { + "comment_id": "2172504387", + "repo_full_name": "opencv/opencv", + "pr_number": 27460, + "pr_file": "modules/gapi/src/backends/ov/govbackend.cpp", + "discussion_id": "2172502541", + "commented_code": "@@ -1542,6 +1542,10 @@ cv::gimpl::ov::GOVExecutable::GOVExecutable(const ade::Graph &g,\n const std::vector &nodes)\n : m_g(g), m_gm(m_g) {\n \n+ if(cv::gapi::getCompileArg>(compileArgs).has_value()) {", + "comment_created_at": "2025-06-27T17:16:22+00:00", + "comment_author": "AsyaPronina", + "comment_body": "or might be create a brief alias for `std::reference_wrapper` here", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1851047891", + "pr_number": 26366, + "pr_file": "modules/features2d/src/orb.cpp", + "created_at": "2024-11-20T22:12:40+00:00", + "commented_code": "bool useOCL = false;\n#endif\n\n Mat image = _image.getMat(), mask = _mask.getMat();\n Mat image = _image.getMat(), mask;\n if (!_mask.empty())\n {\n cv::threshold(_mask.getMat(), mask, 0, 255, cv::THRESH_BINARY);", + "repo_full_name": "opencv/opencv", + "discussion_comments": [ + { + "comment_id": "1851047891", + "repo_full_name": "opencv/opencv", + "pr_number": 26366, + "pr_file": "modules/features2d/src/orb.cpp", + "discussion_id": "1851047891", + "commented_code": "@@ -1036,7 +1036,11 @@ void ORB_Impl::detectAndCompute( InputArray _image, InputArray _mask,\n bool useOCL = false;\n #endif\n \n- Mat image = _image.getMat(), mask = _mask.getMat();\n+ Mat image = _image.getMat(), mask;\n+ if (!_mask.empty())\n+ {\n+ cv::threshold(_mask.getMat(), mask, 0, 255, cv::THRESH_BINARY);", + "comment_created_at": "2024-11-20T22:12:40+00:00", + "comment_author": "sturkmen72", + "comment_body": "threshold(_mask, mask, 0, 255, THRESH_BINARY);", + "pr_file_module": null + }, + { + "comment_id": "1851260237", + "repo_full_name": "opencv/opencv", + "pr_number": 26366, + "pr_file": "modules/features2d/src/orb.cpp", + "discussion_id": "1851047891", + "commented_code": "@@ -1036,7 +1036,11 @@ void ORB_Impl::detectAndCompute( InputArray _image, InputArray _mask,\n bool useOCL = false;\n #endif\n \n- Mat image = _image.getMat(), mask = _mask.getMat();\n+ Mat image = _image.getMat(), mask;\n+ if (!_mask.empty())\n+ {\n+ cv::threshold(_mask.getMat(), mask, 0, 255, cv::THRESH_BINARY);", + "comment_created_at": "2024-11-21T03:21:15+00:00", + "comment_author": "shyama7004", + "comment_body": "Your snippet does the same thing but in a concise way:), do you want me to implement this change?", + "pr_file_module": null + }, + { + "comment_id": "1851462580", + "repo_full_name": "opencv/opencv", + "pr_number": 26366, + "pr_file": "modules/features2d/src/orb.cpp", + "discussion_id": "1851047891", + "commented_code": "@@ -1036,7 +1036,11 @@ void ORB_Impl::detectAndCompute( InputArray _image, InputArray _mask,\n bool useOCL = false;\n #endif\n \n- Mat image = _image.getMat(), mask = _mask.getMat();\n+ Mat image = _image.getMat(), mask;\n+ if (!_mask.empty())\n+ {\n+ cv::threshold(_mask.getMat(), mask, 0, 255, cv::THRESH_BINARY);", + "comment_created_at": "2024-11-21T06:58:16+00:00", + "comment_author": "sturkmen72", + "comment_body": "yes exactly does the same thing.\r\nno need to `cv::` prefix and `.getMat()`", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1450572150", + "pr_number": 24768, + "pr_file": "modules/dnn/src/layers/nary_eltwise_layers.cpp", + "created_at": "2024-01-12T15:02:26+00:00", + "commented_code": "return Ptr(new InfEngineNgraphNode(node));\n }\n#endif\n\n#ifdef HAVE_VULKAN\n virtual Ptr initVkCom(const std::vector > &inputs,\n std::vector > &outputs) CV_OVERRIDE\n {\n std::vector> inputWrappers;\n std::vector inputShapes;\n std::transform(inputs.begin(), inputs.end(), std::back_inserter(inputWrappers), [] (const Ptr& w) { return w.dynamicCast(); });\n for (const auto &ptr: inputWrappers) {\n CV_Assert(ptr);\n }\n std::transform(inputWrappers.begin(), inputWrappers.end(), std::back_inserter(inputShapes), [] (const Ptr& w) { return shape(*(w->getMat())); });\n auto outputWrapper = outputs[0].dynamicCast();\n CV_Assert(outputWrapper);\n auto outputShape = shape(*(outputWrapper->getMat()));\n std::vector vkBlobs; // TODO(vk) what\n\n // collect all input\n int ninputs = inputs.size();\n std::vector v_inp;\n std::transform(inputWrappers.begin(), inputWrappers.end(), std::back_inserter(v_inp), [] (const Ptr &w) { return (w->getMat())->template ptr(); });\n // const char** inp = v_inp.data();\n\n // collect ndims of all input\n std::vector v_inp_dims;\n std::transform(inputWrappers.begin(), inputWrappers.end(), std::back_inserter(v_inp_dims), [] (const Ptr &w) { return (w->getMat())->dims; });\n const int* inp_ndims = v_inp_dims.data();\n\n // collect shapes of all input\n std::vector v_inp_shape;\n std::transform(inputWrappers.begin(), inputWrappers.end(), std::back_inserter(v_inp_shape), [] (const Ptr &w) { return (w->getMat())->size.p; });\n const int** inp_shape = v_inp_shape.data();\n\n // collect steps of all input\n std::vector v_inp_step;\n std::transform(inputWrappers.begin(), inputWrappers.end(), std::back_inserter(v_inp_step), [] (const Ptr &w) { return (w->getMat())->step.p; });\n const size_t** inp_step = v_inp_step.data();\n\n // collect info of output (ndims, shape, step)\n // char* out = outputWrapper->getMat()->ptr();\n int out_ndims = outputWrapper->getMat()->dims;\n const int* out_shape = outputWrapper->getMat()->size.p;\n const size_t* out_step = outputWrapper->getMat()->step.p;\n\n // find max ndims for broadcasting\n int i, max_ndims = out_ndims > 2 ? out_ndims : 2;\n for(i = 0; i < ninputs; i++)\n max_ndims = max_ndims > inp_ndims[i] ? max_ndims : inp_ndims[i];\n\n // buf holds the following buffers for inputs & output:\n // * orig_shapes, shapes (result_shape), orig_steps, steps (result_step), (ninputs+1)*4 elements in total\n // * ptrs, (ninputs+1)*1 elements in total\n // * shape_buf & step_buf, (ninputs+1)*2*max_ndims elements in total\n // * all_ndims, (ninputs+1)*1 elements in total\n // * all_type_sizes, (ninputs+1)*1 elements in total\n AutoBuffer buf((ninputs + 1) * (2 * max_ndims + 7));\n\n int** orig_shapes = (int**)buf.data();\n int** shapes = orig_shapes + ninputs + 1;\n size_t** orig_steps = (size_t**)(shapes + ninputs + 1);\n size_t** steps = orig_steps + ninputs + 1;\n\n char** ptrs = (char**)(steps + ninputs + 1);\n\n size_t* step_buf = (size_t*)(ptrs + ninputs + 1);\n int* shape_buf = (int*)(step_buf + (ninputs + 1)*max_ndims);\n\n int* all_ndims = shape_buf + (ninputs + 1)*max_ndims;\n size_t* all_type_sizes = (size_t*)(all_ndims + ninputs + 1);\n\n for(i = 0; i <= ninputs; i++) {\n all_ndims[i] = i == 0 ? out_ndims : inp_ndims[i-1];\n all_type_sizes[i] = i == 0 ? outputWrapper->getMat()->elemSize() : inputWrappers[i-1]->getMat()->elemSize();\n orig_shapes[i] = (int*)(i == 0 ? out_shape : inp_shape ? inp_shape[i-1] : 0);\n orig_steps[i] = (size_t*)(i == 0 ? out_step : inp_step ? inp_step[i-1] : 0);\n shapes[i] = shape_buf + max_ndims*i;\n steps[i] = step_buf + max_ndims*i;\n }", + "repo_full_name": "opencv/opencv", + "discussion_comments": [ + { + "comment_id": "1450572150", + "repo_full_name": "opencv/opencv", + "pr_number": 24768, + "pr_file": "modules/dnn/src/layers/nary_eltwise_layers.cpp", + "discussion_id": "1450572150", + "commented_code": "@@ -928,6 +936,96 @@ class NaryEltwiseLayerImpl CV_FINAL : public NaryEltwiseLayer\n return Ptr(new InfEngineNgraphNode(node));\n }\n #endif\n+\n+#ifdef HAVE_VULKAN\n+ virtual Ptr initVkCom(const std::vector > &inputs,\n+ std::vector > &outputs) CV_OVERRIDE\n+ {\n+ std::vector> inputWrappers;\n+ std::vector inputShapes;\n+ std::transform(inputs.begin(), inputs.end(), std::back_inserter(inputWrappers), [] (const Ptr& w) { return w.dynamicCast(); });\n+ for (const auto &ptr: inputWrappers) {\n+ CV_Assert(ptr);\n+ }\n+ std::transform(inputWrappers.begin(), inputWrappers.end(), std::back_inserter(inputShapes), [] (const Ptr& w) { return shape(*(w->getMat())); });\n+ auto outputWrapper = outputs[0].dynamicCast();\n+ CV_Assert(outputWrapper);\n+ auto outputShape = shape(*(outputWrapper->getMat()));\n+ std::vector vkBlobs; // TODO(vk) what\n+\n+ // collect all input\n+ int ninputs = inputs.size();\n+ std::vector v_inp;\n+ std::transform(inputWrappers.begin(), inputWrappers.end(), std::back_inserter(v_inp), [] (const Ptr &w) { return (w->getMat())->template ptr(); });\n+ // const char** inp = v_inp.data();\n+\n+ // collect ndims of all input\n+ std::vector v_inp_dims;\n+ std::transform(inputWrappers.begin(), inputWrappers.end(), std::back_inserter(v_inp_dims), [] (const Ptr &w) { return (w->getMat())->dims; });\n+ const int* inp_ndims = v_inp_dims.data();\n+\n+ // collect shapes of all input\n+ std::vector v_inp_shape;\n+ std::transform(inputWrappers.begin(), inputWrappers.end(), std::back_inserter(v_inp_shape), [] (const Ptr &w) { return (w->getMat())->size.p; });\n+ const int** inp_shape = v_inp_shape.data();\n+\n+ // collect steps of all input\n+ std::vector v_inp_step;\n+ std::transform(inputWrappers.begin(), inputWrappers.end(), std::back_inserter(v_inp_step), [] (const Ptr &w) { return (w->getMat())->step.p; });\n+ const size_t** inp_step = v_inp_step.data();\n+\n+ // collect info of output (ndims, shape, step)\n+ // char* out = outputWrapper->getMat()->ptr();\n+ int out_ndims = outputWrapper->getMat()->dims;\n+ const int* out_shape = outputWrapper->getMat()->size.p;\n+ const size_t* out_step = outputWrapper->getMat()->step.p;\n+\n+ // find max ndims for broadcasting\n+ int i, max_ndims = out_ndims > 2 ? out_ndims : 2;\n+ for(i = 0; i < ninputs; i++)\n+ max_ndims = max_ndims > inp_ndims[i] ? max_ndims : inp_ndims[i];\n+\n+ // buf holds the following buffers for inputs & output:\n+ // * orig_shapes, shapes (result_shape), orig_steps, steps (result_step), (ninputs+1)*4 elements in total\n+ // * ptrs, (ninputs+1)*1 elements in total\n+ // * shape_buf & step_buf, (ninputs+1)*2*max_ndims elements in total\n+ // * all_ndims, (ninputs+1)*1 elements in total\n+ // * all_type_sizes, (ninputs+1)*1 elements in total\n+ AutoBuffer buf((ninputs + 1) * (2 * max_ndims + 7));\n+\n+ int** orig_shapes = (int**)buf.data();\n+ int** shapes = orig_shapes + ninputs + 1;\n+ size_t** orig_steps = (size_t**)(shapes + ninputs + 1);\n+ size_t** steps = orig_steps + ninputs + 1;\n+\n+ char** ptrs = (char**)(steps + ninputs + 1);\n+\n+ size_t* step_buf = (size_t*)(ptrs + ninputs + 1);\n+ int* shape_buf = (int*)(step_buf + (ninputs + 1)*max_ndims);\n+\n+ int* all_ndims = shape_buf + (ninputs + 1)*max_ndims;\n+ size_t* all_type_sizes = (size_t*)(all_ndims + ninputs + 1);\n+\n+ for(i = 0; i <= ninputs; i++) {\n+ all_ndims[i] = i == 0 ? out_ndims : inp_ndims[i-1];\n+ all_type_sizes[i] = i == 0 ? outputWrapper->getMat()->elemSize() : inputWrappers[i-1]->getMat()->elemSize();\n+ orig_shapes[i] = (int*)(i == 0 ? out_shape : inp_shape ? inp_shape[i-1] : 0);\n+ orig_steps[i] = (size_t*)(i == 0 ? out_step : inp_step ? inp_step[i-1] : 0);\n+ shapes[i] = shape_buf + max_ndims*i;\n+ steps[i] = step_buf + max_ndims*i;\n+ }", + "comment_created_at": "2024-01-12T15:02:26+00:00", + "comment_author": "fengyuentau", + "comment_body": "Since they share the same code with CPU implementation, could you extract and make it a function to call to reduce duplicate code?", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1977140260", + "pr_number": 26934, + "pr_file": "modules/objdetect/src/aruco/aruco_detector.cpp", + "created_at": "2025-03-03T09:17:51+00:00", + "commented_code": "RefineParameters refineParams;\n ArucoDetectorImpl() {}\n\n ArucoDetectorImpl(const Dictionary &_dictionary, const DetectorParameters &_detectorParams,\n const RefineParameters& _refineParams): dictionary(_dictionary),\n detectorParams(_detectorParams), refineParams(_refineParams) {}\n ArucoDetectorImpl(const vector&_dictionaries, const DetectorParameters &_detectorParams,\n const RefineParameters& _refineParams): dictionaries(_dictionaries),\n detectorParams(_detectorParams), refineParams(_refineParams) {\n CV_Assert(!dictionaries.empty());\n }\n\n /*\n * @brief Detect markers either using multiple or just first dictionary\n */\n void detectMarkers(InputArray _image, OutputArrayOfArrays _corners, OutputArray _ids,\n OutputArrayOfArrays _rejectedImgPoints, OutputArray _dictIndices, DictionaryMode dictMode) {\n CV_Assert(!_image.empty());\n\n CV_Assert(detectorParams.markerBorderBits > 0);\n // check that the parameters are set correctly if Aruco3 is used\n CV_Assert(!(detectorParams.useAruco3Detection == true &&\n detectorParams.minSideLengthCanonicalImg == 0 &&\n detectorParams.minMarkerLengthRatioOriginalImg == 0.0));\n\n Mat grey;\n _convertToGrey(_image, grey);\n\n // Aruco3 functionality is the extension of Aruco.\n // The description can be found in:\n // [1] Speeded up detection of squared fiducial markers, 2018, FJ Romera-Ramirez et al.\n // if Aruco3 functionality if not wanted\n // change some parameters to be sure to turn it off\n if (!detectorParams.useAruco3Detection) {\n detectorParams.minMarkerLengthRatioOriginalImg = 0.0;\n detectorParams.minSideLengthCanonicalImg = 0;\n }\n else {\n // always turn on corner refinement in case of Aruco3, due to upsampling\n detectorParams.cornerRefinementMethod = (int)CORNER_REFINE_SUBPIX;\n // only CORNER_REFINE_SUBPIX implement correctly for useAruco3Detection\n // Todo: update other CORNER_REFINE methods\n }\n\n /// Step 0: equation (2) from paper [1]\n const float fxfy = (!detectorParams.useAruco3Detection ? 1.f : detectorParams.minSideLengthCanonicalImg /\n (detectorParams.minSideLengthCanonicalImg + std::max(grey.cols, grey.rows)*\n detectorParams.minMarkerLengthRatioOriginalImg));\n\n /// Step 1: create image pyramid. Section 3.4. in [1]\n vector grey_pyramid;\n int closest_pyr_image_idx = 0, num_levels = 0;\n //// Step 1.1: resize image with equation (1) from paper [1]\n if (detectorParams.useAruco3Detection) {\n const float scale_pyr = 2.f;\n const float img_area = static_cast(grey.rows*grey.cols);\n const float min_area_marker = static_cast(detectorParams.minSideLengthCanonicalImg*\n detectorParams.minSideLengthCanonicalImg);\n // find max level\n num_levels = static_cast(log2(img_area / min_area_marker)/scale_pyr);\n // the closest pyramid image to the downsampled segmentation image\n // will later be used as start index for corner upsampling\n const float scale_img_area = img_area * fxfy * fxfy;\n closest_pyr_image_idx = cvRound(log2(img_area / scale_img_area)/scale_pyr);\n }\n buildPyramid(grey, grey_pyramid, num_levels);\n\n // resize to segmentation image\n // in this reduces size the contours will be detected\n if (fxfy != 1.f)\n resize(grey, grey, Size(cvRound(fxfy * grey.cols), cvRound(fxfy * grey.rows)));\n\n /// STEP 2: Detect marker candidates\n vector > candidates;\n vector > contours;\n vector ids;\n\n /// STEP 2.a Detect marker candidates :: using AprilTag\n if(detectorParams.cornerRefinementMethod == (int)CORNER_REFINE_APRILTAG){\n _apriltag(grey, detectorParams, candidates, contours);\n }\n /// STEP 2.b Detect marker candidates :: traditional way\n else {\n detectCandidates(grey, candidates, contours);\n }\n\n /// STEP 2.c FILTER OUT NEAR CANDIDATE PAIRS\n vector dictIndices;\n vector> rejectedImgPoints;\n if (DictionaryMode::Single == dictMode) {\n Dictionary& dictionary = dictionaries.at(0);\n auto selectedCandidates = filterTooCloseCandidates(candidates, contours, dictionary.markerSize);\n candidates.clear();\n contours.clear();\n\n /// STEP 2: Check candidate codification (identify markers)\n identifyCandidates(grey, grey_pyramid, selectedCandidates, candidates, contours,\n ids, dictionary, rejectedImgPoints);\n\n /// STEP 3: Corner refinement :: use corner subpix\n if (detectorParams.cornerRefinementMethod == (int)CORNER_REFINE_SUBPIX) {\n CV_Assert(detectorParams.cornerRefinementWinSize > 0 && detectorParams.cornerRefinementMaxIterations > 0 &&\n detectorParams.cornerRefinementMinAccuracy > 0);\n // Do subpixel estimation. In Aruco3 start on the lowest pyramid level and upscale the corners\n parallel_for_(Range(0, (int)candidates.size()), [&](const Range& range) {\n const int begin = range.start;\n const int end = range.end;\n\n for (int i = begin; i < end; i++) {\n if (detectorParams.useAruco3Detection) {\n const float scale_init = (float) grey_pyramid[closest_pyr_image_idx].cols / grey.cols;\n findCornerInPyrImage(scale_init, closest_pyr_image_idx, grey_pyramid, Mat(candidates[i]), detectorParams);\n } else {\n int cornerRefinementWinSize = std::max(1, cvRound(detectorParams.relativeCornerRefinmentWinSize*\n getAverageModuleSize(candidates[i], dictionary.markerSize, detectorParams.markerBorderBits)));\n cornerRefinementWinSize = min(cornerRefinementWinSize, detectorParams.cornerRefinementWinSize);\n cornerSubPix(grey, Mat(candidates[i]), Size(cornerRefinementWinSize, cornerRefinementWinSize), Size(-1, -1),\n TermCriteria(TermCriteria::MAX_ITER | TermCriteria::EPS,\n detectorParams.cornerRefinementMaxIterations,\n detectorParams.cornerRefinementMinAccuracy));\n }\n }\n });\n }\n } else if (DictionaryMode::Multi == dictMode) {\n unordered_set uniqueMarkerSizes;\n for (const Dictionary& dictionary : dictionaries) {\n uniqueMarkerSizes.insert(dictionary.markerSize);\n }\n\n // create at max 4 marker candidate trees for each dictionary size\n vector> candidatesPerDictionarySize = {{}, {}, {}, {}};\n for (int markerSize : uniqueMarkerSizes) {\n // min marker size is 4, so subtract 4 to get index\n const auto dictionarySizeIndex = markerSize - 4;\n // copy candidates\n vector> candidatesCopy = candidates;\n vector > contoursCopy = contours;\n candidatesPerDictionarySize[dictionarySizeIndex] = filterTooCloseCandidates(candidatesCopy, contoursCopy, markerSize);\n }\n candidates.clear();\n contours.clear();\n\n /// STEP 2: Check candidate codification (identify markers)\n int dictIndex = 0;\n for (const Dictionary& currentDictionary : dictionaries) {\n const auto dictionarySizeIndex = currentDictionary.markerSize - 4;\n // temporary variable to store the current candidates\n vector> currentCandidates;\n identifyCandidates(grey, grey_pyramid, candidatesPerDictionarySize[dictionarySizeIndex], currentCandidates, contours,\n ids, currentDictionary, rejectedImgPoints);\n if (_dictIndices.needed()) {\n dictIndices.insert(dictIndices.end(), currentCandidates.size(), dictIndex);\n }\n\n /// STEP 3: Corner refinement :: use corner subpix\n if (detectorParams.cornerRefinementMethod == (int)CORNER_REFINE_SUBPIX) {", + "repo_full_name": "opencv/opencv", + "discussion_comments": [ + { + "comment_id": "1977140260", + "repo_full_name": "opencv/opencv", + "pr_number": 26934, + "pr_file": "modules/objdetect/src/aruco/aruco_detector.cpp", + "discussion_id": "1977140260", + "commented_code": "@@ -651,9 +657,257 @@ struct ArucoDetector::ArucoDetectorImpl {\n RefineParameters refineParams;\n ArucoDetectorImpl() {}\n \n- ArucoDetectorImpl(const Dictionary &_dictionary, const DetectorParameters &_detectorParams,\n- const RefineParameters& _refineParams): dictionary(_dictionary),\n- detectorParams(_detectorParams), refineParams(_refineParams) {}\n+ ArucoDetectorImpl(const vector&_dictionaries, const DetectorParameters &_detectorParams,\n+ const RefineParameters& _refineParams): dictionaries(_dictionaries),\n+ detectorParams(_detectorParams), refineParams(_refineParams) {\n+ CV_Assert(!dictionaries.empty());\n+ }\n+\n+ /*\n+ * @brief Detect markers either using multiple or just first dictionary\n+ */\n+ void detectMarkers(InputArray _image, OutputArrayOfArrays _corners, OutputArray _ids,\n+ OutputArrayOfArrays _rejectedImgPoints, OutputArray _dictIndices, DictionaryMode dictMode) {\n+ CV_Assert(!_image.empty());\n+\n+ CV_Assert(detectorParams.markerBorderBits > 0);\n+ // check that the parameters are set correctly if Aruco3 is used\n+ CV_Assert(!(detectorParams.useAruco3Detection == true &&\n+ detectorParams.minSideLengthCanonicalImg == 0 &&\n+ detectorParams.minMarkerLengthRatioOriginalImg == 0.0));\n+\n+ Mat grey;\n+ _convertToGrey(_image, grey);\n+\n+ // Aruco3 functionality is the extension of Aruco.\n+ // The description can be found in:\n+ // [1] Speeded up detection of squared fiducial markers, 2018, FJ Romera-Ramirez et al.\n+ // if Aruco3 functionality if not wanted\n+ // change some parameters to be sure to turn it off\n+ if (!detectorParams.useAruco3Detection) {\n+ detectorParams.minMarkerLengthRatioOriginalImg = 0.0;\n+ detectorParams.minSideLengthCanonicalImg = 0;\n+ }\n+ else {\n+ // always turn on corner refinement in case of Aruco3, due to upsampling\n+ detectorParams.cornerRefinementMethod = (int)CORNER_REFINE_SUBPIX;\n+ // only CORNER_REFINE_SUBPIX implement correctly for useAruco3Detection\n+ // Todo: update other CORNER_REFINE methods\n+ }\n+\n+ /// Step 0: equation (2) from paper [1]\n+ const float fxfy = (!detectorParams.useAruco3Detection ? 1.f : detectorParams.minSideLengthCanonicalImg /\n+ (detectorParams.minSideLengthCanonicalImg + std::max(grey.cols, grey.rows)*\n+ detectorParams.minMarkerLengthRatioOriginalImg));\n+\n+ /// Step 1: create image pyramid. Section 3.4. in [1]\n+ vector grey_pyramid;\n+ int closest_pyr_image_idx = 0, num_levels = 0;\n+ //// Step 1.1: resize image with equation (1) from paper [1]\n+ if (detectorParams.useAruco3Detection) {\n+ const float scale_pyr = 2.f;\n+ const float img_area = static_cast(grey.rows*grey.cols);\n+ const float min_area_marker = static_cast(detectorParams.minSideLengthCanonicalImg*\n+ detectorParams.minSideLengthCanonicalImg);\n+ // find max level\n+ num_levels = static_cast(log2(img_area / min_area_marker)/scale_pyr);\n+ // the closest pyramid image to the downsampled segmentation image\n+ // will later be used as start index for corner upsampling\n+ const float scale_img_area = img_area * fxfy * fxfy;\n+ closest_pyr_image_idx = cvRound(log2(img_area / scale_img_area)/scale_pyr);\n+ }\n+ buildPyramid(grey, grey_pyramid, num_levels);\n+\n+ // resize to segmentation image\n+ // in this reduces size the contours will be detected\n+ if (fxfy != 1.f)\n+ resize(grey, grey, Size(cvRound(fxfy * grey.cols), cvRound(fxfy * grey.rows)));\n+\n+ /// STEP 2: Detect marker candidates\n+ vector > candidates;\n+ vector > contours;\n+ vector ids;\n+\n+ /// STEP 2.a Detect marker candidates :: using AprilTag\n+ if(detectorParams.cornerRefinementMethod == (int)CORNER_REFINE_APRILTAG){\n+ _apriltag(grey, detectorParams, candidates, contours);\n+ }\n+ /// STEP 2.b Detect marker candidates :: traditional way\n+ else {\n+ detectCandidates(grey, candidates, contours);\n+ }\n+\n+ /// STEP 2.c FILTER OUT NEAR CANDIDATE PAIRS\n+ vector dictIndices;\n+ vector> rejectedImgPoints;\n+ if (DictionaryMode::Single == dictMode) {\n+ Dictionary& dictionary = dictionaries.at(0);\n+ auto selectedCandidates = filterTooCloseCandidates(candidates, contours, dictionary.markerSize);\n+ candidates.clear();\n+ contours.clear();\n+\n+ /// STEP 2: Check candidate codification (identify markers)\n+ identifyCandidates(grey, grey_pyramid, selectedCandidates, candidates, contours,\n+ ids, dictionary, rejectedImgPoints);\n+\n+ /// STEP 3: Corner refinement :: use corner subpix\n+ if (detectorParams.cornerRefinementMethod == (int)CORNER_REFINE_SUBPIX) {\n+ CV_Assert(detectorParams.cornerRefinementWinSize > 0 && detectorParams.cornerRefinementMaxIterations > 0 &&\n+ detectorParams.cornerRefinementMinAccuracy > 0);\n+ // Do subpixel estimation. In Aruco3 start on the lowest pyramid level and upscale the corners\n+ parallel_for_(Range(0, (int)candidates.size()), [&](const Range& range) {\n+ const int begin = range.start;\n+ const int end = range.end;\n+\n+ for (int i = begin; i < end; i++) {\n+ if (detectorParams.useAruco3Detection) {\n+ const float scale_init = (float) grey_pyramid[closest_pyr_image_idx].cols / grey.cols;\n+ findCornerInPyrImage(scale_init, closest_pyr_image_idx, grey_pyramid, Mat(candidates[i]), detectorParams);\n+ } else {\n+ int cornerRefinementWinSize = std::max(1, cvRound(detectorParams.relativeCornerRefinmentWinSize*\n+ getAverageModuleSize(candidates[i], dictionary.markerSize, detectorParams.markerBorderBits)));\n+ cornerRefinementWinSize = min(cornerRefinementWinSize, detectorParams.cornerRefinementWinSize);\n+ cornerSubPix(grey, Mat(candidates[i]), Size(cornerRefinementWinSize, cornerRefinementWinSize), Size(-1, -1),\n+ TermCriteria(TermCriteria::MAX_ITER | TermCriteria::EPS,\n+ detectorParams.cornerRefinementMaxIterations,\n+ detectorParams.cornerRefinementMinAccuracy));\n+ }\n+ }\n+ });\n+ }\n+ } else if (DictionaryMode::Multi == dictMode) {\n+ unordered_set uniqueMarkerSizes;\n+ for (const Dictionary& dictionary : dictionaries) {\n+ uniqueMarkerSizes.insert(dictionary.markerSize);\n+ }\n+\n+ // create at max 4 marker candidate trees for each dictionary size\n+ vector> candidatesPerDictionarySize = {{}, {}, {}, {}};\n+ for (int markerSize : uniqueMarkerSizes) {\n+ // min marker size is 4, so subtract 4 to get index\n+ const auto dictionarySizeIndex = markerSize - 4;\n+ // copy candidates\n+ vector> candidatesCopy = candidates;\n+ vector > contoursCopy = contours;\n+ candidatesPerDictionarySize[dictionarySizeIndex] = filterTooCloseCandidates(candidatesCopy, contoursCopy, markerSize);\n+ }\n+ candidates.clear();\n+ contours.clear();\n+\n+ /// STEP 2: Check candidate codification (identify markers)\n+ int dictIndex = 0;\n+ for (const Dictionary& currentDictionary : dictionaries) {\n+ const auto dictionarySizeIndex = currentDictionary.markerSize - 4;\n+ // temporary variable to store the current candidates\n+ vector> currentCandidates;\n+ identifyCandidates(grey, grey_pyramid, candidatesPerDictionarySize[dictionarySizeIndex], currentCandidates, contours,\n+ ids, currentDictionary, rejectedImgPoints);\n+ if (_dictIndices.needed()) {\n+ dictIndices.insert(dictIndices.end(), currentCandidates.size(), dictIndex);\n+ }\n+\n+ /// STEP 3: Corner refinement :: use corner subpix\n+ if (detectorParams.cornerRefinementMethod == (int)CORNER_REFINE_SUBPIX) {", + "comment_created_at": "2025-03-03T09:17:51+00:00", + "comment_author": "mshabunin", + "comment_body": "Is it possible to extract this large block to separate function to reduce code duplication?", + "pr_file_module": null + }, + { + "comment_id": "1979573764", + "repo_full_name": "opencv/opencv", + "pr_number": 26934, + "pr_file": "modules/objdetect/src/aruco/aruco_detector.cpp", + "discussion_id": "1977140260", + "commented_code": "@@ -651,9 +657,257 @@ struct ArucoDetector::ArucoDetectorImpl {\n RefineParameters refineParams;\n ArucoDetectorImpl() {}\n \n- ArucoDetectorImpl(const Dictionary &_dictionary, const DetectorParameters &_detectorParams,\n- const RefineParameters& _refineParams): dictionary(_dictionary),\n- detectorParams(_detectorParams), refineParams(_refineParams) {}\n+ ArucoDetectorImpl(const vector&_dictionaries, const DetectorParameters &_detectorParams,\n+ const RefineParameters& _refineParams): dictionaries(_dictionaries),\n+ detectorParams(_detectorParams), refineParams(_refineParams) {\n+ CV_Assert(!dictionaries.empty());\n+ }\n+\n+ /*\n+ * @brief Detect markers either using multiple or just first dictionary\n+ */\n+ void detectMarkers(InputArray _image, OutputArrayOfArrays _corners, OutputArray _ids,\n+ OutputArrayOfArrays _rejectedImgPoints, OutputArray _dictIndices, DictionaryMode dictMode) {\n+ CV_Assert(!_image.empty());\n+\n+ CV_Assert(detectorParams.markerBorderBits > 0);\n+ // check that the parameters are set correctly if Aruco3 is used\n+ CV_Assert(!(detectorParams.useAruco3Detection == true &&\n+ detectorParams.minSideLengthCanonicalImg == 0 &&\n+ detectorParams.minMarkerLengthRatioOriginalImg == 0.0));\n+\n+ Mat grey;\n+ _convertToGrey(_image, grey);\n+\n+ // Aruco3 functionality is the extension of Aruco.\n+ // The description can be found in:\n+ // [1] Speeded up detection of squared fiducial markers, 2018, FJ Romera-Ramirez et al.\n+ // if Aruco3 functionality if not wanted\n+ // change some parameters to be sure to turn it off\n+ if (!detectorParams.useAruco3Detection) {\n+ detectorParams.minMarkerLengthRatioOriginalImg = 0.0;\n+ detectorParams.minSideLengthCanonicalImg = 0;\n+ }\n+ else {\n+ // always turn on corner refinement in case of Aruco3, due to upsampling\n+ detectorParams.cornerRefinementMethod = (int)CORNER_REFINE_SUBPIX;\n+ // only CORNER_REFINE_SUBPIX implement correctly for useAruco3Detection\n+ // Todo: update other CORNER_REFINE methods\n+ }\n+\n+ /// Step 0: equation (2) from paper [1]\n+ const float fxfy = (!detectorParams.useAruco3Detection ? 1.f : detectorParams.minSideLengthCanonicalImg /\n+ (detectorParams.minSideLengthCanonicalImg + std::max(grey.cols, grey.rows)*\n+ detectorParams.minMarkerLengthRatioOriginalImg));\n+\n+ /// Step 1: create image pyramid. Section 3.4. in [1]\n+ vector grey_pyramid;\n+ int closest_pyr_image_idx = 0, num_levels = 0;\n+ //// Step 1.1: resize image with equation (1) from paper [1]\n+ if (detectorParams.useAruco3Detection) {\n+ const float scale_pyr = 2.f;\n+ const float img_area = static_cast(grey.rows*grey.cols);\n+ const float min_area_marker = static_cast(detectorParams.minSideLengthCanonicalImg*\n+ detectorParams.minSideLengthCanonicalImg);\n+ // find max level\n+ num_levels = static_cast(log2(img_area / min_area_marker)/scale_pyr);\n+ // the closest pyramid image to the downsampled segmentation image\n+ // will later be used as start index for corner upsampling\n+ const float scale_img_area = img_area * fxfy * fxfy;\n+ closest_pyr_image_idx = cvRound(log2(img_area / scale_img_area)/scale_pyr);\n+ }\n+ buildPyramid(grey, grey_pyramid, num_levels);\n+\n+ // resize to segmentation image\n+ // in this reduces size the contours will be detected\n+ if (fxfy != 1.f)\n+ resize(grey, grey, Size(cvRound(fxfy * grey.cols), cvRound(fxfy * grey.rows)));\n+\n+ /// STEP 2: Detect marker candidates\n+ vector > candidates;\n+ vector > contours;\n+ vector ids;\n+\n+ /// STEP 2.a Detect marker candidates :: using AprilTag\n+ if(detectorParams.cornerRefinementMethod == (int)CORNER_REFINE_APRILTAG){\n+ _apriltag(grey, detectorParams, candidates, contours);\n+ }\n+ /// STEP 2.b Detect marker candidates :: traditional way\n+ else {\n+ detectCandidates(grey, candidates, contours);\n+ }\n+\n+ /// STEP 2.c FILTER OUT NEAR CANDIDATE PAIRS\n+ vector dictIndices;\n+ vector> rejectedImgPoints;\n+ if (DictionaryMode::Single == dictMode) {\n+ Dictionary& dictionary = dictionaries.at(0);\n+ auto selectedCandidates = filterTooCloseCandidates(candidates, contours, dictionary.markerSize);\n+ candidates.clear();\n+ contours.clear();\n+\n+ /// STEP 2: Check candidate codification (identify markers)\n+ identifyCandidates(grey, grey_pyramid, selectedCandidates, candidates, contours,\n+ ids, dictionary, rejectedImgPoints);\n+\n+ /// STEP 3: Corner refinement :: use corner subpix\n+ if (detectorParams.cornerRefinementMethod == (int)CORNER_REFINE_SUBPIX) {\n+ CV_Assert(detectorParams.cornerRefinementWinSize > 0 && detectorParams.cornerRefinementMaxIterations > 0 &&\n+ detectorParams.cornerRefinementMinAccuracy > 0);\n+ // Do subpixel estimation. In Aruco3 start on the lowest pyramid level and upscale the corners\n+ parallel_for_(Range(0, (int)candidates.size()), [&](const Range& range) {\n+ const int begin = range.start;\n+ const int end = range.end;\n+\n+ for (int i = begin; i < end; i++) {\n+ if (detectorParams.useAruco3Detection) {\n+ const float scale_init = (float) grey_pyramid[closest_pyr_image_idx].cols / grey.cols;\n+ findCornerInPyrImage(scale_init, closest_pyr_image_idx, grey_pyramid, Mat(candidates[i]), detectorParams);\n+ } else {\n+ int cornerRefinementWinSize = std::max(1, cvRound(detectorParams.relativeCornerRefinmentWinSize*\n+ getAverageModuleSize(candidates[i], dictionary.markerSize, detectorParams.markerBorderBits)));\n+ cornerRefinementWinSize = min(cornerRefinementWinSize, detectorParams.cornerRefinementWinSize);\n+ cornerSubPix(grey, Mat(candidates[i]), Size(cornerRefinementWinSize, cornerRefinementWinSize), Size(-1, -1),\n+ TermCriteria(TermCriteria::MAX_ITER | TermCriteria::EPS,\n+ detectorParams.cornerRefinementMaxIterations,\n+ detectorParams.cornerRefinementMinAccuracy));\n+ }\n+ }\n+ });\n+ }\n+ } else if (DictionaryMode::Multi == dictMode) {\n+ unordered_set uniqueMarkerSizes;\n+ for (const Dictionary& dictionary : dictionaries) {\n+ uniqueMarkerSizes.insert(dictionary.markerSize);\n+ }\n+\n+ // create at max 4 marker candidate trees for each dictionary size\n+ vector> candidatesPerDictionarySize = {{}, {}, {}, {}};\n+ for (int markerSize : uniqueMarkerSizes) {\n+ // min marker size is 4, so subtract 4 to get index\n+ const auto dictionarySizeIndex = markerSize - 4;\n+ // copy candidates\n+ vector> candidatesCopy = candidates;\n+ vector > contoursCopy = contours;\n+ candidatesPerDictionarySize[dictionarySizeIndex] = filterTooCloseCandidates(candidatesCopy, contoursCopy, markerSize);\n+ }\n+ candidates.clear();\n+ contours.clear();\n+\n+ /// STEP 2: Check candidate codification (identify markers)\n+ int dictIndex = 0;\n+ for (const Dictionary& currentDictionary : dictionaries) {\n+ const auto dictionarySizeIndex = currentDictionary.markerSize - 4;\n+ // temporary variable to store the current candidates\n+ vector> currentCandidates;\n+ identifyCandidates(grey, grey_pyramid, candidatesPerDictionarySize[dictionarySizeIndex], currentCandidates, contours,\n+ ids, currentDictionary, rejectedImgPoints);\n+ if (_dictIndices.needed()) {\n+ dictIndices.insert(dictIndices.end(), currentCandidates.size(), dictIndex);\n+ }\n+\n+ /// STEP 3: Corner refinement :: use corner subpix\n+ if (detectorParams.cornerRefinementMethod == (int)CORNER_REFINE_SUBPIX) {", + "comment_created_at": "2025-03-04T14:27:21+00:00", + "comment_author": "BenjaminKnecht", + "comment_body": "Done", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "723912815", + "pr_number": 20371, + "pr_file": "modules/core/src/opengl.cpp", + "created_at": "2021-10-07T07:22:56+00:00", + "commented_code": "#endif\n}\n\n\n////////////////////////////////////////////////////////////////////////\n// ogl::Attribute\n\ncv::ogl::Attribute::Attribute(Buffer buffer, size_t stride, size_t offset, int size, Type type, bool integer, bool normalized, unsigned int shader_loc)", + "repo_full_name": "opencv/opencv", + "discussion_comments": [ + { + "comment_id": "723912815", + "repo_full_name": "opencv/opencv", + "pr_number": 20371, + "pr_file": "modules/core/src/opengl.cpp", + "discussion_id": "723912815", + "commented_code": "@@ -1405,6 +1412,574 @@ void cv::ogl::Arrays::bind() const\n #endif\n }\n \n+\n+////////////////////////////////////////////////////////////////////////\n+// ogl::Attribute\n+\n+cv::ogl::Attribute::Attribute(Buffer buffer, size_t stride, size_t offset, int size, Type type, bool integer, bool normalized, unsigned int shader_loc)", + "comment_created_at": "2021-10-07T07:22:56+00:00", + "comment_author": "JulieBar", + "comment_body": "there is no need for an explicit constructor, an initialization list can be used", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "724440487", + "pr_number": 20371, + "pr_file": "modules/core/src/opengl.cpp", + "created_at": "2021-10-07T18:42:17+00:00", + "commented_code": "#endif\n}\n\n\n////////////////////////////////////////////////////////////////////////\n// ogl::Attribute\n\ncv::ogl::Attribute::Attribute(Buffer buffer, size_t stride, size_t offset, int size, Type type, bool integer, bool normalized, unsigned int shader_loc)\n{\n buffer_ = buffer;\n stride_ = stride;\n offset_ = offset;\n size_ = size;\n type_ = type;\n integer_ = integer;\n normalized_ = normalized;\n shader_loc_ = shader_loc;\n}\n\n////////////////////////////////////////////////////////////////////////\n// ogl::VertexArray\n\n#ifndef HAVE_OPENGL\n\nclass cv::ogl::VertexArray::Impl\n{\n};\n\n#else\n\nclass cv::ogl::VertexArray::Impl\n{\npublic:\n static const Ptr& empty();\n\n Impl(GLuint vaId, bool autoRelease);\n Impl(std::initializer_list attributes, bool autoRelease);\n ~Impl();\n\n void bind();\n\n void setAutoRelease(bool flag) { autoRelease_ = flag; }\n\n GLuint vaId() const { return vaId_; }\n\nprivate:\n Impl();\n\n GLuint vaId_;\n bool autoRelease_;\n};\n\nconst Ptr& cv::ogl::VertexArray::Impl::empty()\n{\n static Ptr p(new Impl);\n return p;\n}\n\ncv::ogl::VertexArray::Impl::Impl() : vaId_(0), autoRelease_(false)\n{\n}\n\ncv::ogl::VertexArray::Impl::Impl(GLuint vaId, bool autoRelease) : vaId_(vaId), autoRelease_(autoRelease)\n{\n CV_Assert( gl::IsVertexArray(vaId) == gl::TRUE_ );\n}\n\ncv::ogl::VertexArray::Impl::Impl(std::initializer_list attributes, bool autoRelease) : vaId_(0), autoRelease_(autoRelease)\n{\n gl::GenVertexArrays(1, &vaId_);\n CV_CheckGlError();\n\n CV_Assert( vaId_ != 0 );\n gl::BindVertexArray(vaId_);\n CV_CheckGlError();\n\n for (auto& attribute : attributes)\n {\n attribute.buffer_.bind(Buffer::ARRAY_BUFFER);\n if (attribute.integer_)\n {\n gl::VertexAttribIPointer(\n attribute.shader_loc_,\n attribute.size_,\n attribute.type_,\n attribute.stride_,\n (const void*)attribute.offset_\n );\n CV_CheckGlError();\n }\n else\n {\n gl::VertexAttribPointer(\n attribute.shader_loc_,\n attribute.size_,\n attribute.type_,\n attribute.normalized_,\n attribute.stride_,\n (const void*)attribute.offset_\n );\n CV_CheckGlError();\n }\n\n gl::EnableVertexAttribArray(attribute.shader_loc_);\n CV_CheckGlError();\n }\n}\n\ncv::ogl::VertexArray::Impl::~Impl()\n{\n if (autoRelease_ && vaId_)\n gl::DeleteVertexArrays(1, &vaId_);\n}\n\nvoid cv::ogl::VertexArray::Impl::bind()\n{\n gl::BindVertexArray(vaId_);\n CV_CheckGlError();\n}\n\n#endif // HAVE_OPENGL\n\ncv::ogl::VertexArray::VertexArray()\n{\n#ifndef HAVE_OPENGL\n throw_no_ogl();\n#else\n impl_ = Impl::empty();\n#endif\n}\n\ncv::ogl::VertexArray::VertexArray(std::initializer_list attributes, bool autoRelease)\n{\n#ifndef HAVE_OPENGL\n CV_UNUSED(attributes);\n CV_UNUSED(autoRelease);\n throw_no_ogl();\n#else\n impl_.reset(new Impl(attributes, autoRelease));\n#endif\n}\n\nvoid cv::ogl::VertexArray::create(std::initializer_list attributes, bool autoRelease)\n{\n#ifndef HAVE_OPENGL\n CV_UNUSED(attributes);\n CV_UNUSED(autoRelease);\n throw_no_ogl();\n#else\n impl_.reset(new Impl(attributes, autoRelease));\n#endif\n}\n\nvoid cv::ogl::VertexArray::release()\n{\n#ifdef HAVE_OPENGL\n if (impl_)\n impl_->setAutoRelease(true);\n impl_ = Impl::empty();\n#endif\n}\n\nvoid cv::ogl::VertexArray::setAutoRelease(bool flag)\n{\n#ifndef HAVE_OPENGL\n CV_UNUSED(flag);\n throw_no_ogl();\n#else\n impl_->setAutoRelease(flag);\n#endif\n}\n\nvoid cv::ogl::VertexArray::bind() const\n{\n#ifndef HAVE_OPENGL\n throw_no_ogl();\n#else\n impl_->bind();\n#endif\n}\n\nvoid cv::ogl::VertexArray::unbind()\n{\n#ifndef HAVE_OPENGL\n throw_no_ogl();\n#else\n gl::BindVertexArray(0);\n#endif\n}\n\nunsigned int cv::ogl::VertexArray::vaId() const\n{\n#ifndef HAVE_OPENGL\n throw_no_ogl();\n#else\n return impl_->vaId();\n#endif\n}\n\n////////////////////////////////////////////////////////////////////////\n// ogl::Shader\n\n#ifndef HAVE_OPENGL\n\nclass cv::ogl::Shader::Impl\n{\n};\n\n#else\n\nclass cv::ogl::Shader::Impl\n{\npublic:\n static const Ptr& empty();\n\n Impl(GLuint shaderId, bool autoRelease);\n Impl(const char* src, GLenum type, bool autoRelease);\n ~Impl();\n\n void setAutoRelease(bool flag) { autoRelease_ = flag; }\n\n GLuint shaderId() const { return shaderId_; }\n\nprivate:\n Impl();\n\n GLuint shaderId_;\n bool autoRelease_;\n};\n\nconst Ptr& cv::ogl::Shader::Impl::empty()\n{\n static Ptr p(new Impl);\n return p;\n}\n\ncv::ogl::Shader::Impl::Impl() : shaderId_(0), autoRelease_(false)\n{\n}\n\ncv::ogl::Shader::Impl::Impl(GLuint shaderId, bool autoRelease) : shaderId_(shaderId), autoRelease_(autoRelease)\n{\n CV_Assert( gl::IsShader(shaderId) == gl::TRUE_ );\n}\n\ncv::ogl::Shader::Impl::Impl(const char* src, GLenum type, bool autoRelease) : shaderId_(0), autoRelease_(autoRelease)\n{\n shaderId_ = gl::CreateShader(type);\n CV_CheckGlError();\n\n CV_Assert( shaderId_ != 0 );\n\n GLint status;\n gl::ShaderSource(shaderId_, 1, &src, 0);\n gl::CompileShader(shaderId_);\n gl::GetShaderiv(shaderId_, gl::COMPILE_STATUS, &status);\n if (!status) {\n String info_log;\n gl::GetShaderiv(shaderId_, gl::INFO_LOG_LENGTH, &status);\n info_log.resize(status);\n gl::GetShaderInfoLog(shaderId_, GLsizei(info_log.size()) + 1, nullptr, &info_log[0]);\n CV_Error(Error::OpenGlApiCallError, info_log);\n }\n\n CV_CheckGlError();\n}\n\ncv::ogl::Shader::Impl::~Impl()\n{\n if (autoRelease_ && shaderId_)\n gl::DeleteShader(shaderId_);\n}\n\n#endif // HAVE_OPENGL\n\ncv::ogl::Shader::Shader() : type_(Type::VERTEX)\n{\n#ifndef HAVE_OPENGL\n throw_no_ogl();\n#else\n impl_ = Impl::empty();\n#endif\n}\n\ncv::ogl::Shader::Shader(const char* src, Type type, bool autoRelease) : type_(type)\n{\n#ifndef HAVE_OPENGL\n CV_UNUSED(src);\n CV_UNUSED(type);\n CV_UNUSED(autoRelease);\n throw_no_ogl();\n#else\n impl_.reset(new Impl(src, type, autoRelease));\n#endif\n}\n\nvoid cv::ogl::Shader::create(const char* src, Type type, bool autoRelease)\n{\n#ifndef HAVE_OPENGL\n CV_UNUSED(src);\n CV_UNUSED(type);\n CV_UNUSED(autoRelease);\n throw_no_ogl();\n#else\n impl_.reset(new Impl(src, type, autoRelease));\n type_ = type;\n#endif\n}\n\nvoid cv::ogl::Shader::release()\n{\n#ifdef HAVE_OPENGL\n if (impl_)\n impl_->setAutoRelease(true);\n impl_ = Impl::empty();\n#endif\n}\n\nvoid cv::ogl::Shader::setAutoRelease(bool flag)\n{\n#ifndef HAVE_OPENGL\n CV_UNUSED(flag);\n throw_no_ogl();\n#else\n impl_->setAutoRelease(flag);\n#endif\n}\n\nunsigned int cv::ogl::Shader::shaderId() const\n{\n#ifndef HAVE_OPENGL\n throw_no_ogl();\n#else\n return impl_->shaderId();\n#endif\n}\n\n////////////////////////////////////////////////////////////////////////\n// ogl::Program\n\n#ifndef HAVE_OPENGL\n\nclass cv::ogl::Program::Impl\n{\n};\n\n#else\n\nclass cv::ogl::Program::Impl\n{\npublic:\n static const Ptr& empty();\n\n Impl(GLuint programId, bool autoRelease);\n Impl(Shader vert, Shader frag, bool autoRelease);\n ~Impl();\n\n void bind();\n\n int getAttributeLocation(const char* name) const;\n int getUniformLocation(const char* name) const;\n\n void setAutoRelease(bool flag) { autoRelease_ = flag; }\n\n GLuint programId() const { return programId_; }\n\nprivate:\n Impl();\n\n Shader vert_;\n Shader frag_;\n GLuint programId_;\n bool autoRelease_;\n};\n\nconst Ptr& cv::ogl::Program::Impl::empty()\n{\n static Ptr p(new Impl);\n return p;\n}\n\ncv::ogl::Program::Impl::Impl() : programId_(0), autoRelease_(false)\n{\n}\n\ncv::ogl::Program::Impl::Impl(GLuint programId, bool autoRelease) : programId_(programId), autoRelease_(autoRelease)\n{\n CV_Assert( gl::IsProgram(programId) == gl::TRUE_ );\n}\n\ncv::ogl::Program::Impl::Impl(Shader vert, Shader frag, bool autoRelease) : programId_(0), autoRelease_(autoRelease)\n{\n programId_ = gl::CreateProgram();\n CV_CheckGlError();\n\n CV_Assert( programId_ != 0 );\n\n gl::AttachShader(programId_, vert.shaderId());\n CV_CheckGlError();\n\n gl::AttachShader(programId_, frag.shaderId());\n CV_CheckGlError();\n\n GLint status;\n gl::LinkProgram(programId_);\n gl::GetProgramiv(programId_, gl::LINK_STATUS, &status);\n if (!status) {\n String info_log;\n gl::GetProgramiv(programId_, gl::INFO_LOG_LENGTH, &status);\n info_log.resize(status);\n gl::GetProgramInfoLog(programId_, GLsizei(info_log.size()) + 1, nullptr, &info_log[0]);\n CV_Error(Error::OpenGlApiCallError, info_log);\n }\n \n CV_CheckGlError();\n\n vert_ = vert;\n frag_ = frag;\n}\n\ncv::ogl::Program::Impl::~Impl()\n{\n if (autoRelease_ && programId_)\n gl::DeleteProgram(programId_);\n}\n\nvoid cv::ogl::Program::Impl::bind()\n{\n gl::UseProgram(programId_);\n CV_CheckGlError();\n}\n\nint cv::ogl::Program::Impl::getAttributeLocation(const char* name) const\n{\n int location = gl::GetAttribLocation(programId_, name);\n if (location < 0)\n {\n CV_Error(Error::OpenGlApiCallError, \"Attribute not found\");\n }\n CV_CheckGlError();\n return location;\n}\n\nint cv::ogl::Program::Impl::getUniformLocation(const char* name) const\n{\n int location = gl::GetUniformLocation(programId_, name);\n if (location < 0)\n {\n CV_Error(Error::OpenGlApiCallError, \"Attribute not found\");\n }\n CV_CheckGlError();\n return location;\n}\n\n#endif // HAVE_OPENGL\n\ncv::ogl::Program::Program()\n{\n#ifndef HAVE_OPENGL\n throw_no_ogl();\n#else\n impl_ = Impl::empty();\n#endif\n}\n\ncv::ogl::Program::Program(Shader vert, Shader frag, bool autoRelease)\n{\n#ifndef HAVE_OPENGL\n CV_UNUSED(vert);\n CV_UNUSED(frag);\n CV_UNUSED(autoRelease);\n throw_no_ogl();\n#else\n impl_.reset(new Impl(vert, frag, autoRelease));\n#endif\n}\n\nvoid cv::ogl::Program::create(Shader vert, Shader frag, bool autoRelease)\n{\n#ifndef HAVE_OPENGL\n CV_UNUSED(vert);\n CV_UNUSED(frag);\n CV_UNUSED(autoRelease);\n throw_no_ogl();\n#else\n impl_.reset(new Impl(vert, frag, autoRelease));\n#endif\n}\n\nvoid cv::ogl::Program::release()\n{\n#ifdef HAVE_OPENGL\n if (impl_)\n impl_->setAutoRelease(true);\n impl_ = Impl::empty();\n#endif\n}\n\nvoid cv::ogl::Program::setAutoRelease(bool flag)\n{\n#ifndef HAVE_OPENGL\n CV_UNUSED(flag);\n throw_no_ogl();\n#else\n impl_->setAutoRelease(flag);\n#endif\n}\n\nvoid cv::ogl::Program::bind() const\n{\n#ifndef HAVE_OPENGL\n throw_no_ogl();\n#else\n impl_->bind();\n#endif\n}\n\nvoid cv::ogl::Program::unbind()\n{\n#ifndef HAVE_OPENGL\n throw_no_ogl();\n#else\n gl::UseProgram(0);\n#endif\n}\n\nint cv::ogl::Program::getAttributeLocation(const char* name) const\n{\n#ifndef HAVE_OPENGL\n throw_no_ogl();\n#else\n return impl_->getAttributeLocation(name);\n#endif\n}\n\nint cv::ogl::Program::getUniformLocation(const char* name) const\n{\n#ifndef HAVE_OPENGL\n throw_no_ogl();\n#else\n return impl_->getUniformLocation(name);\n#endif\n}\n\nvoid cv::ogl::Program::setUniformVec3(int loc, Vec3f vec)\n{\n#ifndef HAVE_OPENGL\n throw_no_ogl();\n#else\n gl::Uniform3f(loc, vec(0), vec(1), vec(2));\n#endif\n}\n\nvoid cv::ogl::Program::setUniformMat4x4(int loc, Matx44f mat)\n{\n#ifndef HAVE_OPENGL\n throw_no_ogl();\n#else\n gl::UniformMatrix4fv(loc, 1, true, mat.val);\n#endif\n}\n\nunsigned int cv::ogl::Program::programId() const\n{\n#ifndef HAVE_OPENGL\n throw_no_ogl();\n#else\n return impl_->programId();\n#endif\n}", + "repo_full_name": "opencv/opencv", + "discussion_comments": [ + { + "comment_id": "724440487", + "repo_full_name": "opencv/opencv", + "pr_number": 20371, + "pr_file": "modules/core/src/opengl.cpp", + "discussion_id": "724440487", + "commented_code": "@@ -1405,6 +1412,574 @@ void cv::ogl::Arrays::bind() const\n #endif\n }\n \n+\n+////////////////////////////////////////////////////////////////////////\n+// ogl::Attribute\n+\n+cv::ogl::Attribute::Attribute(Buffer buffer, size_t stride, size_t offset, int size, Type type, bool integer, bool normalized, unsigned int shader_loc)\n+{\n+ buffer_ = buffer;\n+ stride_ = stride;\n+ offset_ = offset;\n+ size_ = size;\n+ type_ = type;\n+ integer_ = integer;\n+ normalized_ = normalized;\n+ shader_loc_ = shader_loc;\n+}\n+\n+////////////////////////////////////////////////////////////////////////\n+// ogl::VertexArray\n+\n+#ifndef HAVE_OPENGL\n+\n+class cv::ogl::VertexArray::Impl\n+{\n+};\n+\n+#else\n+\n+class cv::ogl::VertexArray::Impl\n+{\n+public:\n+ static const Ptr& empty();\n+\n+ Impl(GLuint vaId, bool autoRelease);\n+ Impl(std::initializer_list attributes, bool autoRelease);\n+ ~Impl();\n+\n+ void bind();\n+\n+ void setAutoRelease(bool flag) { autoRelease_ = flag; }\n+\n+ GLuint vaId() const { return vaId_; }\n+\n+private:\n+ Impl();\n+\n+ GLuint vaId_;\n+ bool autoRelease_;\n+};\n+\n+const Ptr& cv::ogl::VertexArray::Impl::empty()\n+{\n+ static Ptr p(new Impl);\n+ return p;\n+}\n+\n+cv::ogl::VertexArray::Impl::Impl() : vaId_(0), autoRelease_(false)\n+{\n+}\n+\n+cv::ogl::VertexArray::Impl::Impl(GLuint vaId, bool autoRelease) : vaId_(vaId), autoRelease_(autoRelease)\n+{\n+ CV_Assert( gl::IsVertexArray(vaId) == gl::TRUE_ );\n+}\n+\n+cv::ogl::VertexArray::Impl::Impl(std::initializer_list attributes, bool autoRelease) : vaId_(0), autoRelease_(autoRelease)\n+{\n+ gl::GenVertexArrays(1, &vaId_);\n+ CV_CheckGlError();\n+\n+ CV_Assert( vaId_ != 0 );\n+ gl::BindVertexArray(vaId_);\n+ CV_CheckGlError();\n+\n+ for (auto& attribute : attributes)\n+ {\n+ attribute.buffer_.bind(Buffer::ARRAY_BUFFER);\n+ if (attribute.integer_)\n+ {\n+ gl::VertexAttribIPointer(\n+ attribute.shader_loc_,\n+ attribute.size_,\n+ attribute.type_,\n+ attribute.stride_,\n+ (const void*)attribute.offset_\n+ );\n+ CV_CheckGlError();\n+ }\n+ else\n+ {\n+ gl::VertexAttribPointer(\n+ attribute.shader_loc_,\n+ attribute.size_,\n+ attribute.type_,\n+ attribute.normalized_,\n+ attribute.stride_,\n+ (const void*)attribute.offset_\n+ );\n+ CV_CheckGlError();\n+ }\n+\n+ gl::EnableVertexAttribArray(attribute.shader_loc_);\n+ CV_CheckGlError();\n+ }\n+}\n+\n+cv::ogl::VertexArray::Impl::~Impl()\n+{\n+ if (autoRelease_ && vaId_)\n+ gl::DeleteVertexArrays(1, &vaId_);\n+}\n+\n+void cv::ogl::VertexArray::Impl::bind()\n+{\n+ gl::BindVertexArray(vaId_);\n+ CV_CheckGlError();\n+}\n+\n+#endif // HAVE_OPENGL\n+\n+cv::ogl::VertexArray::VertexArray()\n+{\n+#ifndef HAVE_OPENGL\n+ throw_no_ogl();\n+#else\n+ impl_ = Impl::empty();\n+#endif\n+}\n+\n+cv::ogl::VertexArray::VertexArray(std::initializer_list attributes, bool autoRelease)\n+{\n+#ifndef HAVE_OPENGL\n+ CV_UNUSED(attributes);\n+ CV_UNUSED(autoRelease);\n+ throw_no_ogl();\n+#else\n+ impl_.reset(new Impl(attributes, autoRelease));\n+#endif\n+}\n+\n+void cv::ogl::VertexArray::create(std::initializer_list attributes, bool autoRelease)\n+{\n+#ifndef HAVE_OPENGL\n+ CV_UNUSED(attributes);\n+ CV_UNUSED(autoRelease);\n+ throw_no_ogl();\n+#else\n+ impl_.reset(new Impl(attributes, autoRelease));\n+#endif\n+}\n+\n+void cv::ogl::VertexArray::release()\n+{\n+#ifdef HAVE_OPENGL\n+ if (impl_)\n+ impl_->setAutoRelease(true);\n+ impl_ = Impl::empty();\n+#endif\n+}\n+\n+void cv::ogl::VertexArray::setAutoRelease(bool flag)\n+{\n+#ifndef HAVE_OPENGL\n+ CV_UNUSED(flag);\n+ throw_no_ogl();\n+#else\n+ impl_->setAutoRelease(flag);\n+#endif\n+}\n+\n+void cv::ogl::VertexArray::bind() const\n+{\n+#ifndef HAVE_OPENGL\n+ throw_no_ogl();\n+#else\n+ impl_->bind();\n+#endif\n+}\n+\n+void cv::ogl::VertexArray::unbind()\n+{\n+#ifndef HAVE_OPENGL\n+ throw_no_ogl();\n+#else\n+ gl::BindVertexArray(0);\n+#endif\n+}\n+\n+unsigned int cv::ogl::VertexArray::vaId() const\n+{\n+#ifndef HAVE_OPENGL\n+ throw_no_ogl();\n+#else\n+ return impl_->vaId();\n+#endif\n+}\n+\n+////////////////////////////////////////////////////////////////////////\n+// ogl::Shader\n+\n+#ifndef HAVE_OPENGL\n+\n+class cv::ogl::Shader::Impl\n+{\n+};\n+\n+#else\n+\n+class cv::ogl::Shader::Impl\n+{\n+public:\n+ static const Ptr& empty();\n+\n+ Impl(GLuint shaderId, bool autoRelease);\n+ Impl(const char* src, GLenum type, bool autoRelease);\n+ ~Impl();\n+\n+ void setAutoRelease(bool flag) { autoRelease_ = flag; }\n+\n+ GLuint shaderId() const { return shaderId_; }\n+\n+private:\n+ Impl();\n+\n+ GLuint shaderId_;\n+ bool autoRelease_;\n+};\n+\n+const Ptr& cv::ogl::Shader::Impl::empty()\n+{\n+ static Ptr p(new Impl);\n+ return p;\n+}\n+\n+cv::ogl::Shader::Impl::Impl() : shaderId_(0), autoRelease_(false)\n+{\n+}\n+\n+cv::ogl::Shader::Impl::Impl(GLuint shaderId, bool autoRelease) : shaderId_(shaderId), autoRelease_(autoRelease)\n+{\n+ CV_Assert( gl::IsShader(shaderId) == gl::TRUE_ );\n+}\n+\n+cv::ogl::Shader::Impl::Impl(const char* src, GLenum type, bool autoRelease) : shaderId_(0), autoRelease_(autoRelease)\n+{\n+ shaderId_ = gl::CreateShader(type);\n+ CV_CheckGlError();\n+\n+ CV_Assert( shaderId_ != 0 );\n+\n+ GLint status;\n+ gl::ShaderSource(shaderId_, 1, &src, 0);\n+ gl::CompileShader(shaderId_);\n+ gl::GetShaderiv(shaderId_, gl::COMPILE_STATUS, &status);\n+ if (!status) {\n+ String info_log;\n+ gl::GetShaderiv(shaderId_, gl::INFO_LOG_LENGTH, &status);\n+ info_log.resize(status);\n+ gl::GetShaderInfoLog(shaderId_, GLsizei(info_log.size()) + 1, nullptr, &info_log[0]);\n+ CV_Error(Error::OpenGlApiCallError, info_log);\n+ }\n+\n+ CV_CheckGlError();\n+}\n+\n+cv::ogl::Shader::Impl::~Impl()\n+{\n+ if (autoRelease_ && shaderId_)\n+ gl::DeleteShader(shaderId_);\n+}\n+\n+#endif // HAVE_OPENGL\n+\n+cv::ogl::Shader::Shader() : type_(Type::VERTEX)\n+{\n+#ifndef HAVE_OPENGL\n+ throw_no_ogl();\n+#else\n+ impl_ = Impl::empty();\n+#endif\n+}\n+\n+cv::ogl::Shader::Shader(const char* src, Type type, bool autoRelease) : type_(type)\n+{\n+#ifndef HAVE_OPENGL\n+ CV_UNUSED(src);\n+ CV_UNUSED(type);\n+ CV_UNUSED(autoRelease);\n+ throw_no_ogl();\n+#else\n+ impl_.reset(new Impl(src, type, autoRelease));\n+#endif\n+}\n+\n+void cv::ogl::Shader::create(const char* src, Type type, bool autoRelease)\n+{\n+#ifndef HAVE_OPENGL\n+ CV_UNUSED(src);\n+ CV_UNUSED(type);\n+ CV_UNUSED(autoRelease);\n+ throw_no_ogl();\n+#else\n+ impl_.reset(new Impl(src, type, autoRelease));\n+ type_ = type;\n+#endif\n+}\n+\n+void cv::ogl::Shader::release()\n+{\n+#ifdef HAVE_OPENGL\n+ if (impl_)\n+ impl_->setAutoRelease(true);\n+ impl_ = Impl::empty();\n+#endif\n+}\n+\n+void cv::ogl::Shader::setAutoRelease(bool flag)\n+{\n+#ifndef HAVE_OPENGL\n+ CV_UNUSED(flag);\n+ throw_no_ogl();\n+#else\n+ impl_->setAutoRelease(flag);\n+#endif\n+}\n+\n+unsigned int cv::ogl::Shader::shaderId() const\n+{\n+#ifndef HAVE_OPENGL\n+ throw_no_ogl();\n+#else\n+ return impl_->shaderId();\n+#endif\n+}\n+\n+////////////////////////////////////////////////////////////////////////\n+// ogl::Program\n+\n+#ifndef HAVE_OPENGL\n+\n+class cv::ogl::Program::Impl\n+{\n+};\n+\n+#else\n+\n+class cv::ogl::Program::Impl\n+{\n+public:\n+ static const Ptr& empty();\n+\n+ Impl(GLuint programId, bool autoRelease);\n+ Impl(Shader vert, Shader frag, bool autoRelease);\n+ ~Impl();\n+\n+ void bind();\n+\n+ int getAttributeLocation(const char* name) const;\n+ int getUniformLocation(const char* name) const;\n+\n+ void setAutoRelease(bool flag) { autoRelease_ = flag; }\n+\n+ GLuint programId() const { return programId_; }\n+\n+private:\n+ Impl();\n+\n+ Shader vert_;\n+ Shader frag_;\n+ GLuint programId_;\n+ bool autoRelease_;\n+};\n+\n+const Ptr& cv::ogl::Program::Impl::empty()\n+{\n+ static Ptr p(new Impl);\n+ return p;\n+}\n+\n+cv::ogl::Program::Impl::Impl() : programId_(0), autoRelease_(false)\n+{\n+}\n+\n+cv::ogl::Program::Impl::Impl(GLuint programId, bool autoRelease) : programId_(programId), autoRelease_(autoRelease)\n+{\n+ CV_Assert( gl::IsProgram(programId) == gl::TRUE_ );\n+}\n+\n+cv::ogl::Program::Impl::Impl(Shader vert, Shader frag, bool autoRelease) : programId_(0), autoRelease_(autoRelease)\n+{\n+ programId_ = gl::CreateProgram();\n+ CV_CheckGlError();\n+\n+ CV_Assert( programId_ != 0 );\n+\n+ gl::AttachShader(programId_, vert.shaderId());\n+ CV_CheckGlError();\n+\n+ gl::AttachShader(programId_, frag.shaderId());\n+ CV_CheckGlError();\n+\n+ GLint status;\n+ gl::LinkProgram(programId_);\n+ gl::GetProgramiv(programId_, gl::LINK_STATUS, &status);\n+ if (!status) {\n+ String info_log;\n+ gl::GetProgramiv(programId_, gl::INFO_LOG_LENGTH, &status);\n+ info_log.resize(status);\n+ gl::GetProgramInfoLog(programId_, GLsizei(info_log.size()) + 1, nullptr, &info_log[0]);\n+ CV_Error(Error::OpenGlApiCallError, info_log);\n+ }\n+ \n+ CV_CheckGlError();\n+\n+ vert_ = vert;\n+ frag_ = frag;\n+}\n+\n+cv::ogl::Program::Impl::~Impl()\n+{\n+ if (autoRelease_ && programId_)\n+ gl::DeleteProgram(programId_);\n+}\n+\n+void cv::ogl::Program::Impl::bind()\n+{\n+ gl::UseProgram(programId_);\n+ CV_CheckGlError();\n+}\n+\n+int cv::ogl::Program::Impl::getAttributeLocation(const char* name) const\n+{\n+ int location = gl::GetAttribLocation(programId_, name);\n+ if (location < 0)\n+ {\n+ CV_Error(Error::OpenGlApiCallError, \"Attribute not found\");\n+ }\n+ CV_CheckGlError();\n+ return location;\n+}\n+\n+int cv::ogl::Program::Impl::getUniformLocation(const char* name) const\n+{\n+ int location = gl::GetUniformLocation(programId_, name);\n+ if (location < 0)\n+ {\n+ CV_Error(Error::OpenGlApiCallError, \"Attribute not found\");\n+ }\n+ CV_CheckGlError();\n+ return location;\n+}\n+\n+#endif // HAVE_OPENGL\n+\n+cv::ogl::Program::Program()\n+{\n+#ifndef HAVE_OPENGL\n+ throw_no_ogl();\n+#else\n+ impl_ = Impl::empty();\n+#endif\n+}\n+\n+cv::ogl::Program::Program(Shader vert, Shader frag, bool autoRelease)\n+{\n+#ifndef HAVE_OPENGL\n+ CV_UNUSED(vert);\n+ CV_UNUSED(frag);\n+ CV_UNUSED(autoRelease);\n+ throw_no_ogl();\n+#else\n+ impl_.reset(new Impl(vert, frag, autoRelease));\n+#endif\n+}\n+\n+void cv::ogl::Program::create(Shader vert, Shader frag, bool autoRelease)\n+{\n+#ifndef HAVE_OPENGL\n+ CV_UNUSED(vert);\n+ CV_UNUSED(frag);\n+ CV_UNUSED(autoRelease);\n+ throw_no_ogl();\n+#else\n+ impl_.reset(new Impl(vert, frag, autoRelease));\n+#endif\n+}\n+\n+void cv::ogl::Program::release()\n+{\n+#ifdef HAVE_OPENGL\n+ if (impl_)\n+ impl_->setAutoRelease(true);\n+ impl_ = Impl::empty();\n+#endif\n+}\n+\n+void cv::ogl::Program::setAutoRelease(bool flag)\n+{\n+#ifndef HAVE_OPENGL\n+ CV_UNUSED(flag);\n+ throw_no_ogl();\n+#else\n+ impl_->setAutoRelease(flag);\n+#endif\n+}\n+\n+void cv::ogl::Program::bind() const\n+{\n+#ifndef HAVE_OPENGL\n+ throw_no_ogl();\n+#else\n+ impl_->bind();\n+#endif\n+}\n+\n+void cv::ogl::Program::unbind()\n+{\n+#ifndef HAVE_OPENGL\n+ throw_no_ogl();\n+#else\n+ gl::UseProgram(0);\n+#endif\n+}\n+\n+int cv::ogl::Program::getAttributeLocation(const char* name) const\n+{\n+#ifndef HAVE_OPENGL\n+ throw_no_ogl();\n+#else\n+ return impl_->getAttributeLocation(name);\n+#endif\n+}\n+\n+int cv::ogl::Program::getUniformLocation(const char* name) const\n+{\n+#ifndef HAVE_OPENGL\n+ throw_no_ogl();\n+#else\n+ return impl_->getUniformLocation(name);\n+#endif\n+}\n+\n+void cv::ogl::Program::setUniformVec3(int loc, Vec3f vec)\n+{\n+#ifndef HAVE_OPENGL\n+ throw_no_ogl();\n+#else\n+ gl::Uniform3f(loc, vec(0), vec(1), vec(2));\n+#endif\n+}\n+\n+void cv::ogl::Program::setUniformMat4x4(int loc, Matx44f mat)\n+{\n+#ifndef HAVE_OPENGL\n+ throw_no_ogl();\n+#else\n+ gl::UniformMatrix4fv(loc, 1, true, mat.val);\n+#endif\n+}\n+\n+unsigned int cv::ogl::Program::programId() const\n+{\n+#ifndef HAVE_OPENGL\n+ throw_no_ogl();\n+#else\n+ return impl_->programId();\n+#endif\n+}\n+", + "comment_created_at": "2021-10-07T18:42:17+00:00", + "comment_author": "JulieBar", + "comment_body": "please reduce the amount of duplicated code if possible. you can implement methods: empty, create, release, setAutoRelease, bind, unbind, and then re-use it. ", + "pr_file_module": null + }, + { + "comment_id": "729655562", + "repo_full_name": "opencv/opencv", + "pr_number": 20371, + "pr_file": "modules/core/src/opengl.cpp", + "discussion_id": "724440487", + "commented_code": "@@ -1405,6 +1412,574 @@ void cv::ogl::Arrays::bind() const\n #endif\n }\n \n+\n+////////////////////////////////////////////////////////////////////////\n+// ogl::Attribute\n+\n+cv::ogl::Attribute::Attribute(Buffer buffer, size_t stride, size_t offset, int size, Type type, bool integer, bool normalized, unsigned int shader_loc)\n+{\n+ buffer_ = buffer;\n+ stride_ = stride;\n+ offset_ = offset;\n+ size_ = size;\n+ type_ = type;\n+ integer_ = integer;\n+ normalized_ = normalized;\n+ shader_loc_ = shader_loc;\n+}\n+\n+////////////////////////////////////////////////////////////////////////\n+// ogl::VertexArray\n+\n+#ifndef HAVE_OPENGL\n+\n+class cv::ogl::VertexArray::Impl\n+{\n+};\n+\n+#else\n+\n+class cv::ogl::VertexArray::Impl\n+{\n+public:\n+ static const Ptr& empty();\n+\n+ Impl(GLuint vaId, bool autoRelease);\n+ Impl(std::initializer_list attributes, bool autoRelease);\n+ ~Impl();\n+\n+ void bind();\n+\n+ void setAutoRelease(bool flag) { autoRelease_ = flag; }\n+\n+ GLuint vaId() const { return vaId_; }\n+\n+private:\n+ Impl();\n+\n+ GLuint vaId_;\n+ bool autoRelease_;\n+};\n+\n+const Ptr& cv::ogl::VertexArray::Impl::empty()\n+{\n+ static Ptr p(new Impl);\n+ return p;\n+}\n+\n+cv::ogl::VertexArray::Impl::Impl() : vaId_(0), autoRelease_(false)\n+{\n+}\n+\n+cv::ogl::VertexArray::Impl::Impl(GLuint vaId, bool autoRelease) : vaId_(vaId), autoRelease_(autoRelease)\n+{\n+ CV_Assert( gl::IsVertexArray(vaId) == gl::TRUE_ );\n+}\n+\n+cv::ogl::VertexArray::Impl::Impl(std::initializer_list attributes, bool autoRelease) : vaId_(0), autoRelease_(autoRelease)\n+{\n+ gl::GenVertexArrays(1, &vaId_);\n+ CV_CheckGlError();\n+\n+ CV_Assert( vaId_ != 0 );\n+ gl::BindVertexArray(vaId_);\n+ CV_CheckGlError();\n+\n+ for (auto& attribute : attributes)\n+ {\n+ attribute.buffer_.bind(Buffer::ARRAY_BUFFER);\n+ if (attribute.integer_)\n+ {\n+ gl::VertexAttribIPointer(\n+ attribute.shader_loc_,\n+ attribute.size_,\n+ attribute.type_,\n+ attribute.stride_,\n+ (const void*)attribute.offset_\n+ );\n+ CV_CheckGlError();\n+ }\n+ else\n+ {\n+ gl::VertexAttribPointer(\n+ attribute.shader_loc_,\n+ attribute.size_,\n+ attribute.type_,\n+ attribute.normalized_,\n+ attribute.stride_,\n+ (const void*)attribute.offset_\n+ );\n+ CV_CheckGlError();\n+ }\n+\n+ gl::EnableVertexAttribArray(attribute.shader_loc_);\n+ CV_CheckGlError();\n+ }\n+}\n+\n+cv::ogl::VertexArray::Impl::~Impl()\n+{\n+ if (autoRelease_ && vaId_)\n+ gl::DeleteVertexArrays(1, &vaId_);\n+}\n+\n+void cv::ogl::VertexArray::Impl::bind()\n+{\n+ gl::BindVertexArray(vaId_);\n+ CV_CheckGlError();\n+}\n+\n+#endif // HAVE_OPENGL\n+\n+cv::ogl::VertexArray::VertexArray()\n+{\n+#ifndef HAVE_OPENGL\n+ throw_no_ogl();\n+#else\n+ impl_ = Impl::empty();\n+#endif\n+}\n+\n+cv::ogl::VertexArray::VertexArray(std::initializer_list attributes, bool autoRelease)\n+{\n+#ifndef HAVE_OPENGL\n+ CV_UNUSED(attributes);\n+ CV_UNUSED(autoRelease);\n+ throw_no_ogl();\n+#else\n+ impl_.reset(new Impl(attributes, autoRelease));\n+#endif\n+}\n+\n+void cv::ogl::VertexArray::create(std::initializer_list attributes, bool autoRelease)\n+{\n+#ifndef HAVE_OPENGL\n+ CV_UNUSED(attributes);\n+ CV_UNUSED(autoRelease);\n+ throw_no_ogl();\n+#else\n+ impl_.reset(new Impl(attributes, autoRelease));\n+#endif\n+}\n+\n+void cv::ogl::VertexArray::release()\n+{\n+#ifdef HAVE_OPENGL\n+ if (impl_)\n+ impl_->setAutoRelease(true);\n+ impl_ = Impl::empty();\n+#endif\n+}\n+\n+void cv::ogl::VertexArray::setAutoRelease(bool flag)\n+{\n+#ifndef HAVE_OPENGL\n+ CV_UNUSED(flag);\n+ throw_no_ogl();\n+#else\n+ impl_->setAutoRelease(flag);\n+#endif\n+}\n+\n+void cv::ogl::VertexArray::bind() const\n+{\n+#ifndef HAVE_OPENGL\n+ throw_no_ogl();\n+#else\n+ impl_->bind();\n+#endif\n+}\n+\n+void cv::ogl::VertexArray::unbind()\n+{\n+#ifndef HAVE_OPENGL\n+ throw_no_ogl();\n+#else\n+ gl::BindVertexArray(0);\n+#endif\n+}\n+\n+unsigned int cv::ogl::VertexArray::vaId() const\n+{\n+#ifndef HAVE_OPENGL\n+ throw_no_ogl();\n+#else\n+ return impl_->vaId();\n+#endif\n+}\n+\n+////////////////////////////////////////////////////////////////////////\n+// ogl::Shader\n+\n+#ifndef HAVE_OPENGL\n+\n+class cv::ogl::Shader::Impl\n+{\n+};\n+\n+#else\n+\n+class cv::ogl::Shader::Impl\n+{\n+public:\n+ static const Ptr& empty();\n+\n+ Impl(GLuint shaderId, bool autoRelease);\n+ Impl(const char* src, GLenum type, bool autoRelease);\n+ ~Impl();\n+\n+ void setAutoRelease(bool flag) { autoRelease_ = flag; }\n+\n+ GLuint shaderId() const { return shaderId_; }\n+\n+private:\n+ Impl();\n+\n+ GLuint shaderId_;\n+ bool autoRelease_;\n+};\n+\n+const Ptr& cv::ogl::Shader::Impl::empty()\n+{\n+ static Ptr p(new Impl);\n+ return p;\n+}\n+\n+cv::ogl::Shader::Impl::Impl() : shaderId_(0), autoRelease_(false)\n+{\n+}\n+\n+cv::ogl::Shader::Impl::Impl(GLuint shaderId, bool autoRelease) : shaderId_(shaderId), autoRelease_(autoRelease)\n+{\n+ CV_Assert( gl::IsShader(shaderId) == gl::TRUE_ );\n+}\n+\n+cv::ogl::Shader::Impl::Impl(const char* src, GLenum type, bool autoRelease) : shaderId_(0), autoRelease_(autoRelease)\n+{\n+ shaderId_ = gl::CreateShader(type);\n+ CV_CheckGlError();\n+\n+ CV_Assert( shaderId_ != 0 );\n+\n+ GLint status;\n+ gl::ShaderSource(shaderId_, 1, &src, 0);\n+ gl::CompileShader(shaderId_);\n+ gl::GetShaderiv(shaderId_, gl::COMPILE_STATUS, &status);\n+ if (!status) {\n+ String info_log;\n+ gl::GetShaderiv(shaderId_, gl::INFO_LOG_LENGTH, &status);\n+ info_log.resize(status);\n+ gl::GetShaderInfoLog(shaderId_, GLsizei(info_log.size()) + 1, nullptr, &info_log[0]);\n+ CV_Error(Error::OpenGlApiCallError, info_log);\n+ }\n+\n+ CV_CheckGlError();\n+}\n+\n+cv::ogl::Shader::Impl::~Impl()\n+{\n+ if (autoRelease_ && shaderId_)\n+ gl::DeleteShader(shaderId_);\n+}\n+\n+#endif // HAVE_OPENGL\n+\n+cv::ogl::Shader::Shader() : type_(Type::VERTEX)\n+{\n+#ifndef HAVE_OPENGL\n+ throw_no_ogl();\n+#else\n+ impl_ = Impl::empty();\n+#endif\n+}\n+\n+cv::ogl::Shader::Shader(const char* src, Type type, bool autoRelease) : type_(type)\n+{\n+#ifndef HAVE_OPENGL\n+ CV_UNUSED(src);\n+ CV_UNUSED(type);\n+ CV_UNUSED(autoRelease);\n+ throw_no_ogl();\n+#else\n+ impl_.reset(new Impl(src, type, autoRelease));\n+#endif\n+}\n+\n+void cv::ogl::Shader::create(const char* src, Type type, bool autoRelease)\n+{\n+#ifndef HAVE_OPENGL\n+ CV_UNUSED(src);\n+ CV_UNUSED(type);\n+ CV_UNUSED(autoRelease);\n+ throw_no_ogl();\n+#else\n+ impl_.reset(new Impl(src, type, autoRelease));\n+ type_ = type;\n+#endif\n+}\n+\n+void cv::ogl::Shader::release()\n+{\n+#ifdef HAVE_OPENGL\n+ if (impl_)\n+ impl_->setAutoRelease(true);\n+ impl_ = Impl::empty();\n+#endif\n+}\n+\n+void cv::ogl::Shader::setAutoRelease(bool flag)\n+{\n+#ifndef HAVE_OPENGL\n+ CV_UNUSED(flag);\n+ throw_no_ogl();\n+#else\n+ impl_->setAutoRelease(flag);\n+#endif\n+}\n+\n+unsigned int cv::ogl::Shader::shaderId() const\n+{\n+#ifndef HAVE_OPENGL\n+ throw_no_ogl();\n+#else\n+ return impl_->shaderId();\n+#endif\n+}\n+\n+////////////////////////////////////////////////////////////////////////\n+// ogl::Program\n+\n+#ifndef HAVE_OPENGL\n+\n+class cv::ogl::Program::Impl\n+{\n+};\n+\n+#else\n+\n+class cv::ogl::Program::Impl\n+{\n+public:\n+ static const Ptr& empty();\n+\n+ Impl(GLuint programId, bool autoRelease);\n+ Impl(Shader vert, Shader frag, bool autoRelease);\n+ ~Impl();\n+\n+ void bind();\n+\n+ int getAttributeLocation(const char* name) const;\n+ int getUniformLocation(const char* name) const;\n+\n+ void setAutoRelease(bool flag) { autoRelease_ = flag; }\n+\n+ GLuint programId() const { return programId_; }\n+\n+private:\n+ Impl();\n+\n+ Shader vert_;\n+ Shader frag_;\n+ GLuint programId_;\n+ bool autoRelease_;\n+};\n+\n+const Ptr& cv::ogl::Program::Impl::empty()\n+{\n+ static Ptr p(new Impl);\n+ return p;\n+}\n+\n+cv::ogl::Program::Impl::Impl() : programId_(0), autoRelease_(false)\n+{\n+}\n+\n+cv::ogl::Program::Impl::Impl(GLuint programId, bool autoRelease) : programId_(programId), autoRelease_(autoRelease)\n+{\n+ CV_Assert( gl::IsProgram(programId) == gl::TRUE_ );\n+}\n+\n+cv::ogl::Program::Impl::Impl(Shader vert, Shader frag, bool autoRelease) : programId_(0), autoRelease_(autoRelease)\n+{\n+ programId_ = gl::CreateProgram();\n+ CV_CheckGlError();\n+\n+ CV_Assert( programId_ != 0 );\n+\n+ gl::AttachShader(programId_, vert.shaderId());\n+ CV_CheckGlError();\n+\n+ gl::AttachShader(programId_, frag.shaderId());\n+ CV_CheckGlError();\n+\n+ GLint status;\n+ gl::LinkProgram(programId_);\n+ gl::GetProgramiv(programId_, gl::LINK_STATUS, &status);\n+ if (!status) {\n+ String info_log;\n+ gl::GetProgramiv(programId_, gl::INFO_LOG_LENGTH, &status);\n+ info_log.resize(status);\n+ gl::GetProgramInfoLog(programId_, GLsizei(info_log.size()) + 1, nullptr, &info_log[0]);\n+ CV_Error(Error::OpenGlApiCallError, info_log);\n+ }\n+ \n+ CV_CheckGlError();\n+\n+ vert_ = vert;\n+ frag_ = frag;\n+}\n+\n+cv::ogl::Program::Impl::~Impl()\n+{\n+ if (autoRelease_ && programId_)\n+ gl::DeleteProgram(programId_);\n+}\n+\n+void cv::ogl::Program::Impl::bind()\n+{\n+ gl::UseProgram(programId_);\n+ CV_CheckGlError();\n+}\n+\n+int cv::ogl::Program::Impl::getAttributeLocation(const char* name) const\n+{\n+ int location = gl::GetAttribLocation(programId_, name);\n+ if (location < 0)\n+ {\n+ CV_Error(Error::OpenGlApiCallError, \"Attribute not found\");\n+ }\n+ CV_CheckGlError();\n+ return location;\n+}\n+\n+int cv::ogl::Program::Impl::getUniformLocation(const char* name) const\n+{\n+ int location = gl::GetUniformLocation(programId_, name);\n+ if (location < 0)\n+ {\n+ CV_Error(Error::OpenGlApiCallError, \"Attribute not found\");\n+ }\n+ CV_CheckGlError();\n+ return location;\n+}\n+\n+#endif // HAVE_OPENGL\n+\n+cv::ogl::Program::Program()\n+{\n+#ifndef HAVE_OPENGL\n+ throw_no_ogl();\n+#else\n+ impl_ = Impl::empty();\n+#endif\n+}\n+\n+cv::ogl::Program::Program(Shader vert, Shader frag, bool autoRelease)\n+{\n+#ifndef HAVE_OPENGL\n+ CV_UNUSED(vert);\n+ CV_UNUSED(frag);\n+ CV_UNUSED(autoRelease);\n+ throw_no_ogl();\n+#else\n+ impl_.reset(new Impl(vert, frag, autoRelease));\n+#endif\n+}\n+\n+void cv::ogl::Program::create(Shader vert, Shader frag, bool autoRelease)\n+{\n+#ifndef HAVE_OPENGL\n+ CV_UNUSED(vert);\n+ CV_UNUSED(frag);\n+ CV_UNUSED(autoRelease);\n+ throw_no_ogl();\n+#else\n+ impl_.reset(new Impl(vert, frag, autoRelease));\n+#endif\n+}\n+\n+void cv::ogl::Program::release()\n+{\n+#ifdef HAVE_OPENGL\n+ if (impl_)\n+ impl_->setAutoRelease(true);\n+ impl_ = Impl::empty();\n+#endif\n+}\n+\n+void cv::ogl::Program::setAutoRelease(bool flag)\n+{\n+#ifndef HAVE_OPENGL\n+ CV_UNUSED(flag);\n+ throw_no_ogl();\n+#else\n+ impl_->setAutoRelease(flag);\n+#endif\n+}\n+\n+void cv::ogl::Program::bind() const\n+{\n+#ifndef HAVE_OPENGL\n+ throw_no_ogl();\n+#else\n+ impl_->bind();\n+#endif\n+}\n+\n+void cv::ogl::Program::unbind()\n+{\n+#ifndef HAVE_OPENGL\n+ throw_no_ogl();\n+#else\n+ gl::UseProgram(0);\n+#endif\n+}\n+\n+int cv::ogl::Program::getAttributeLocation(const char* name) const\n+{\n+#ifndef HAVE_OPENGL\n+ throw_no_ogl();\n+#else\n+ return impl_->getAttributeLocation(name);\n+#endif\n+}\n+\n+int cv::ogl::Program::getUniformLocation(const char* name) const\n+{\n+#ifndef HAVE_OPENGL\n+ throw_no_ogl();\n+#else\n+ return impl_->getUniformLocation(name);\n+#endif\n+}\n+\n+void cv::ogl::Program::setUniformVec3(int loc, Vec3f vec)\n+{\n+#ifndef HAVE_OPENGL\n+ throw_no_ogl();\n+#else\n+ gl::Uniform3f(loc, vec(0), vec(1), vec(2));\n+#endif\n+}\n+\n+void cv::ogl::Program::setUniformMat4x4(int loc, Matx44f mat)\n+{\n+#ifndef HAVE_OPENGL\n+ throw_no_ogl();\n+#else\n+ gl::UniformMatrix4fv(loc, 1, true, mat.val);\n+#endif\n+}\n+\n+unsigned int cv::ogl::Program::programId() const\n+{\n+#ifndef HAVE_OPENGL\n+ throw_no_ogl();\n+#else\n+ return impl_->programId();\n+#endif\n+}\n+", + "comment_created_at": "2021-10-15T09:12:14+00:00", + "comment_author": "RiscadoA", + "comment_body": "Can you clarify what you mean by duplicated code and those methods? I'm not seeing how it relates to the code section you commented. If the duplicated code you're referring to is those #ifndef #else #endif macros, I don't think it is possible to remove them.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "724474609", + "pr_number": 20371, + "pr_file": "modules/highgui/src/viz3d/viz3d.cpp", + "created_at": "2021-10-07T19:34:06+00:00", + "commented_code": "// This file is part of OpenCV project.\n// It is subject to the license terms in the LICENSE file found in the top-level directory\n// of this distribution and at http://opencv.org/license.html.\n\n#include \"../precomp.hpp\"\n#include \"viz3d_private.hpp\"\n#include \"opencv2/core/utils/logger.hpp\"\n#include \"opencv2/imgproc.hpp\"\n\nusing namespace cv;\nusing namespace cv::viz3d;\n\n#ifdef HAVE_OPENGL\n\nstatic void openGlDrawCallback(void* data)\n{\n Window* win = static_cast(data);\n if (win != nullptr)\n win->draw();\n}\n\nstatic void openGlFreeCallback(void* data)\n{\n Window* win = static_cast(data);\n if (win != nullptr)\n delete win;\n}\n\nstatic void mouseCallback(int event, int x, int y, int flags, void* data)\n{\n Window* win = static_cast(data);\n if (win != nullptr)\n win->onMouse(event, x, y, flags);\n}\n\nstatic Window* getWindow(const String& win_name)\n{\n namedWindow(win_name, WINDOW_OPENGL);\n const double useGl = getWindowProperty(win_name, WND_PROP_OPENGL);\n if (useGl <= 0)\n CV_Error(cv::Error::StsBadArg, \"OpenCV/UI: viz3d can't be used because the window was created without an OpenGL context\");\n\n Window* win = static_cast(getOpenGlUserData(win_name));\n\n if (!win)\n {\n\t\tsetOpenGlContext(win_name);\n\t\twin = new Window(win_name);\n\t\tsetOpenGlDrawCallback(win_name, &openGlDrawCallback, win);\n setOpenGlFreeCallback(win_name, &openGlFreeCallback);\n setMouseCallback(win_name, &mouseCallback, win);\n }\n else\n {\n auto callback = getOpenGlDrawCallback(win_name);\n if (callback != nullptr && callback != openGlDrawCallback)\n CV_Error(cv::Error::StsBadArg, \"OpenCV/UI: viz3d can't be used because the OpenGL callback is already being used on this window\");\n }\n\n return win;\n}\n\n#endif // HAVE_OPENGL\n\nvoid cv::viz3d::setPerspective(const String& win_name, float fov, float z_near, float z_far)\n{\n CV_TRACE_FUNCTION();\n#ifndef HAVE_OPENGL\n CV_UNUSED(win_name);\n CV_UNUSED(fov);\n CV_UNUSED(z_near);\n CV_UNUSED(z_far);\n CV_Error(cv::Error::OpenGlNotSupported, \"The library is compiled without OpenGL support\");\n#else\n Window* win = getWindow(win_name);\n win->getView().setPerspective(fov, z_near, z_far);\n updateWindow(win_name);\n#endif\n}\n\nvoid cv::viz3d::setGridVisible(const String& win_name, bool visible)\n{\n CV_TRACE_FUNCTION();\n#ifndef HAVE_OPENGL\n CV_UNUSED(win_name);\n CV_UNUSED(visible);\n CV_Error(cv::Error::OpenGlNotSupported, \"The library is compiled without OpenGL support\");\n#else\n Window* win = getWindow(win_name);\n win->setGridVisible(visible);\n updateWindow(win_name);\n#endif\n}\n\nvoid cv::viz3d::setSun(const String& win_name, const Vec3f& direction, const Vec3f& ambient, const Vec3f& diffuse)\n{\n CV_TRACE_FUNCTION();\n#ifndef HAVE_OPENGL\n CV_UNUSED(win_name);\n CV_UNUSED(direction);\n CV_UNUSED(ambient);\n CV_UNUSED(diffuse);\n CV_Error(cv::Error::OpenGlNotSupported, \"The library is compiled without OpenGL support\");\n#else\n Window* win = getWindow(win_name);\n win->setSun(direction, ambient, diffuse);\n updateWindow(win_name);\n#endif\n}\n\nvoid cv::viz3d::setSky(const String& win_name, const Vec3f& color)\n{\n CV_TRACE_FUNCTION();\n#ifndef HAVE_OPENGL\n CV_UNUSED(win_name);\n CV_UNUSED(color);\n CV_Error(cv::Error::OpenGlNotSupported, \"The library is compiled without OpenGL support\");\n#else\n Window* win = getWindow(win_name);\n win->setSky(color);\n updateWindow(win_name);\n#endif\n}\n\nvoid cv::viz3d::showBox(const String& win_name, const String& obj_name, const Vec3f& size, const Vec3f& color, RenderMode mode)\n{\n CV_TRACE_FUNCTION();\n#ifndef HAVE_OPENGL\n CV_UNUSED(win_name);\n CV_UNUSED(obj_name);\n CV_UNUSED(size);\n CV_UNUSED(color);\n CV_UNUSED(mode);\n CV_Error(cv::Error::OpenGlNotSupported, \"The library is compiled without OpenGL support\");\n#else\n if (mode == RENDER_WIREFRAME)\n {\n float points_data[] = {\n -size(0), -size(1), -size(2), color(0), color(1), color(2),\n +size(0), -size(1), -size(2), color(0), color(1), color(2),\n -size(0), +size(1), -size(2), color(0), color(1), color(2),\n +size(0), +size(1), -size(2), color(0), color(1), color(2),\n -size(0), -size(1), +size(2), color(0), color(1), color(2),\n +size(0), -size(1), +size(2), color(0), color(1), color(2),\n -size(0), +size(1), +size(2), color(0), color(1), color(2),\n +size(0), +size(1), +size(2), color(0), color(1), color(2),\n\n -size(0), -size(1), -size(2), color(0), color(1), color(2),\n -size(0), +size(1), -size(2), color(0), color(1), color(2),\n +size(0), -size(1), -size(2), color(0), color(1), color(2),\n +size(0), +size(1), -size(2), color(0), color(1), color(2),\n -size(0), -size(1), +size(2), color(0), color(1), color(2),\n -size(0), +size(1), +size(2), color(0), color(1), color(2),\n +size(0), -size(1), +size(2), color(0), color(1), color(2),\n +size(0), +size(1), +size(2), color(0), color(1), color(2),\n\n -size(0), -size(1), -size(2), color(0), color(1), color(2),\n -size(0), -size(1), +size(2), color(0), color(1), color(2),\n +size(0), -size(1), -size(2), color(0), color(1), color(2),\n +size(0), -size(1), +size(2), color(0), color(1), color(2),\n -size(0), +size(1), -size(2), color(0), color(1), color(2),\n -size(0), +size(1), +size(2), color(0), color(1), color(2),\n +size(0), +size(1), -size(2), color(0), color(1), color(2),\n +size(0), +size(1), +size(2), color(0), color(1), color(2),\n };\n\n const Mat points_mat = Mat(Size(6, 24), CV_32F, &points_data);", + "repo_full_name": "opencv/opencv", + "discussion_comments": [ + { + "comment_id": "724474609", + "repo_full_name": "opencv/opencv", + "pr_number": 20371, + "pr_file": "modules/highgui/src/viz3d/viz3d.cpp", + "discussion_id": "724474609", + "commented_code": "@@ -0,0 +1,1671 @@\n+// This file is part of OpenCV project.\n+// It is subject to the license terms in the LICENSE file found in the top-level directory\n+// of this distribution and at http://opencv.org/license.html.\n+\n+#include \"../precomp.hpp\"\n+#include \"viz3d_private.hpp\"\n+#include \"opencv2/core/utils/logger.hpp\"\n+#include \"opencv2/imgproc.hpp\"\n+\n+using namespace cv;\n+using namespace cv::viz3d;\n+\n+#ifdef HAVE_OPENGL\n+\n+static void openGlDrawCallback(void* data)\n+{\n+ Window* win = static_cast(data);\n+ if (win != nullptr)\n+ win->draw();\n+}\n+\n+static void openGlFreeCallback(void* data)\n+{\n+ Window* win = static_cast(data);\n+ if (win != nullptr)\n+ delete win;\n+}\n+\n+static void mouseCallback(int event, int x, int y, int flags, void* data)\n+{\n+ Window* win = static_cast(data);\n+ if (win != nullptr)\n+ win->onMouse(event, x, y, flags);\n+}\n+\n+static Window* getWindow(const String& win_name)\n+{\n+ namedWindow(win_name, WINDOW_OPENGL);\n+ const double useGl = getWindowProperty(win_name, WND_PROP_OPENGL);\n+ if (useGl <= 0)\n+ CV_Error(cv::Error::StsBadArg, \"OpenCV/UI: viz3d can't be used because the window was created without an OpenGL context\");\n+\n+ Window* win = static_cast(getOpenGlUserData(win_name));\n+\n+ if (!win)\n+ {\n+\t\tsetOpenGlContext(win_name);\n+\t\twin = new Window(win_name);\n+\t\tsetOpenGlDrawCallback(win_name, &openGlDrawCallback, win);\n+ setOpenGlFreeCallback(win_name, &openGlFreeCallback);\n+ setMouseCallback(win_name, &mouseCallback, win);\n+ }\n+ else\n+ {\n+ auto callback = getOpenGlDrawCallback(win_name);\n+ if (callback != nullptr && callback != openGlDrawCallback)\n+ CV_Error(cv::Error::StsBadArg, \"OpenCV/UI: viz3d can't be used because the OpenGL callback is already being used on this window\");\n+ }\n+\n+ return win;\n+}\n+\n+#endif // HAVE_OPENGL\n+\n+void cv::viz3d::setPerspective(const String& win_name, float fov, float z_near, float z_far)\n+{\n+ CV_TRACE_FUNCTION();\n+#ifndef HAVE_OPENGL\n+ CV_UNUSED(win_name);\n+ CV_UNUSED(fov);\n+ CV_UNUSED(z_near);\n+ CV_UNUSED(z_far);\n+ CV_Error(cv::Error::OpenGlNotSupported, \"The library is compiled without OpenGL support\");\n+#else\n+ Window* win = getWindow(win_name);\n+ win->getView().setPerspective(fov, z_near, z_far);\n+ updateWindow(win_name);\n+#endif\n+}\n+\n+void cv::viz3d::setGridVisible(const String& win_name, bool visible)\n+{\n+ CV_TRACE_FUNCTION();\n+#ifndef HAVE_OPENGL\n+ CV_UNUSED(win_name);\n+ CV_UNUSED(visible);\n+ CV_Error(cv::Error::OpenGlNotSupported, \"The library is compiled without OpenGL support\");\n+#else\n+ Window* win = getWindow(win_name);\n+ win->setGridVisible(visible);\n+ updateWindow(win_name);\n+#endif\n+}\n+\n+void cv::viz3d::setSun(const String& win_name, const Vec3f& direction, const Vec3f& ambient, const Vec3f& diffuse)\n+{\n+ CV_TRACE_FUNCTION();\n+#ifndef HAVE_OPENGL\n+ CV_UNUSED(win_name);\n+ CV_UNUSED(direction);\n+ CV_UNUSED(ambient);\n+ CV_UNUSED(diffuse);\n+ CV_Error(cv::Error::OpenGlNotSupported, \"The library is compiled without OpenGL support\");\n+#else\n+ Window* win = getWindow(win_name);\n+ win->setSun(direction, ambient, diffuse);\n+ updateWindow(win_name);\n+#endif\n+}\n+\n+void cv::viz3d::setSky(const String& win_name, const Vec3f& color)\n+{\n+ CV_TRACE_FUNCTION();\n+#ifndef HAVE_OPENGL\n+ CV_UNUSED(win_name);\n+ CV_UNUSED(color);\n+ CV_Error(cv::Error::OpenGlNotSupported, \"The library is compiled without OpenGL support\");\n+#else\n+ Window* win = getWindow(win_name);\n+ win->setSky(color);\n+ updateWindow(win_name);\n+#endif\n+}\n+\n+void cv::viz3d::showBox(const String& win_name, const String& obj_name, const Vec3f& size, const Vec3f& color, RenderMode mode)\n+{\n+ CV_TRACE_FUNCTION();\n+#ifndef HAVE_OPENGL\n+ CV_UNUSED(win_name);\n+ CV_UNUSED(obj_name);\n+ CV_UNUSED(size);\n+ CV_UNUSED(color);\n+ CV_UNUSED(mode);\n+ CV_Error(cv::Error::OpenGlNotSupported, \"The library is compiled without OpenGL support\");\n+#else\n+ if (mode == RENDER_WIREFRAME)\n+ {\n+ float points_data[] = {\n+ -size(0), -size(1), -size(2), color(0), color(1), color(2),\n+ +size(0), -size(1), -size(2), color(0), color(1), color(2),\n+ -size(0), +size(1), -size(2), color(0), color(1), color(2),\n+ +size(0), +size(1), -size(2), color(0), color(1), color(2),\n+ -size(0), -size(1), +size(2), color(0), color(1), color(2),\n+ +size(0), -size(1), +size(2), color(0), color(1), color(2),\n+ -size(0), +size(1), +size(2), color(0), color(1), color(2),\n+ +size(0), +size(1), +size(2), color(0), color(1), color(2),\n+\n+ -size(0), -size(1), -size(2), color(0), color(1), color(2),\n+ -size(0), +size(1), -size(2), color(0), color(1), color(2),\n+ +size(0), -size(1), -size(2), color(0), color(1), color(2),\n+ +size(0), +size(1), -size(2), color(0), color(1), color(2),\n+ -size(0), -size(1), +size(2), color(0), color(1), color(2),\n+ -size(0), +size(1), +size(2), color(0), color(1), color(2),\n+ +size(0), -size(1), +size(2), color(0), color(1), color(2),\n+ +size(0), +size(1), +size(2), color(0), color(1), color(2),\n+\n+ -size(0), -size(1), -size(2), color(0), color(1), color(2),\n+ -size(0), -size(1), +size(2), color(0), color(1), color(2),\n+ +size(0), -size(1), -size(2), color(0), color(1), color(2),\n+ +size(0), -size(1), +size(2), color(0), color(1), color(2),\n+ -size(0), +size(1), -size(2), color(0), color(1), color(2),\n+ -size(0), +size(1), +size(2), color(0), color(1), color(2),\n+ +size(0), +size(1), -size(2), color(0), color(1), color(2),\n+ +size(0), +size(1), +size(2), color(0), color(1), color(2),\n+ };\n+\n+ const Mat points_mat = Mat(Size(6, 24), CV_32F, &points_data);", + "comment_created_at": "2021-10-07T19:34:06+00:00", + "comment_author": "JulieBar", + "comment_body": "do not use magic numbers 6, 24; get it from the size of previous array", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "724475444", + "pr_number": 20371, + "pr_file": "modules/highgui/src/viz3d/viz3d.cpp", + "created_at": "2021-10-07T19:35:28+00:00", + "commented_code": "// This file is part of OpenCV project.\n// It is subject to the license terms in the LICENSE file found in the top-level directory\n// of this distribution and at http://opencv.org/license.html.\n\n#include \"../precomp.hpp\"\n#include \"viz3d_private.hpp\"\n#include \"opencv2/core/utils/logger.hpp\"\n#include \"opencv2/imgproc.hpp\"\n\nusing namespace cv;\nusing namespace cv::viz3d;\n\n#ifdef HAVE_OPENGL\n\nstatic void openGlDrawCallback(void* data)\n{\n Window* win = static_cast(data);\n if (win != nullptr)\n win->draw();\n}\n\nstatic void openGlFreeCallback(void* data)\n{\n Window* win = static_cast(data);\n if (win != nullptr)\n delete win;\n}\n\nstatic void mouseCallback(int event, int x, int y, int flags, void* data)\n{\n Window* win = static_cast(data);\n if (win != nullptr)\n win->onMouse(event, x, y, flags);\n}\n\nstatic Window* getWindow(const String& win_name)\n{\n namedWindow(win_name, WINDOW_OPENGL);\n const double useGl = getWindowProperty(win_name, WND_PROP_OPENGL);\n if (useGl <= 0)\n CV_Error(cv::Error::StsBadArg, \"OpenCV/UI: viz3d can't be used because the window was created without an OpenGL context\");\n\n Window* win = static_cast(getOpenGlUserData(win_name));\n\n if (!win)\n {\n\t\tsetOpenGlContext(win_name);\n\t\twin = new Window(win_name);\n\t\tsetOpenGlDrawCallback(win_name, &openGlDrawCallback, win);\n setOpenGlFreeCallback(win_name, &openGlFreeCallback);\n setMouseCallback(win_name, &mouseCallback, win);\n }\n else\n {\n auto callback = getOpenGlDrawCallback(win_name);\n if (callback != nullptr && callback != openGlDrawCallback)\n CV_Error(cv::Error::StsBadArg, \"OpenCV/UI: viz3d can't be used because the OpenGL callback is already being used on this window\");\n }\n\n return win;\n}\n\n#endif // HAVE_OPENGL\n\nvoid cv::viz3d::setPerspective(const String& win_name, float fov, float z_near, float z_far)\n{\n CV_TRACE_FUNCTION();\n#ifndef HAVE_OPENGL\n CV_UNUSED(win_name);\n CV_UNUSED(fov);\n CV_UNUSED(z_near);\n CV_UNUSED(z_far);\n CV_Error(cv::Error::OpenGlNotSupported, \"The library is compiled without OpenGL support\");\n#else\n Window* win = getWindow(win_name);\n win->getView().setPerspective(fov, z_near, z_far);\n updateWindow(win_name);\n#endif\n}\n\nvoid cv::viz3d::setGridVisible(const String& win_name, bool visible)\n{\n CV_TRACE_FUNCTION();\n#ifndef HAVE_OPENGL\n CV_UNUSED(win_name);\n CV_UNUSED(visible);\n CV_Error(cv::Error::OpenGlNotSupported, \"The library is compiled without OpenGL support\");\n#else\n Window* win = getWindow(win_name);\n win->setGridVisible(visible);\n updateWindow(win_name);\n#endif\n}\n\nvoid cv::viz3d::setSun(const String& win_name, const Vec3f& direction, const Vec3f& ambient, const Vec3f& diffuse)\n{\n CV_TRACE_FUNCTION();\n#ifndef HAVE_OPENGL\n CV_UNUSED(win_name);\n CV_UNUSED(direction);\n CV_UNUSED(ambient);\n CV_UNUSED(diffuse);\n CV_Error(cv::Error::OpenGlNotSupported, \"The library is compiled without OpenGL support\");\n#else\n Window* win = getWindow(win_name);\n win->setSun(direction, ambient, diffuse);\n updateWindow(win_name);\n#endif\n}\n\nvoid cv::viz3d::setSky(const String& win_name, const Vec3f& color)\n{\n CV_TRACE_FUNCTION();\n#ifndef HAVE_OPENGL\n CV_UNUSED(win_name);\n CV_UNUSED(color);\n CV_Error(cv::Error::OpenGlNotSupported, \"The library is compiled without OpenGL support\");\n#else\n Window* win = getWindow(win_name);\n win->setSky(color);\n updateWindow(win_name);\n#endif\n}\n\nvoid cv::viz3d::showBox(const String& win_name, const String& obj_name, const Vec3f& size, const Vec3f& color, RenderMode mode)\n{\n CV_TRACE_FUNCTION();\n#ifndef HAVE_OPENGL\n CV_UNUSED(win_name);\n CV_UNUSED(obj_name);\n CV_UNUSED(size);\n CV_UNUSED(color);\n CV_UNUSED(mode);\n CV_Error(cv::Error::OpenGlNotSupported, \"The library is compiled without OpenGL support\");\n#else\n if (mode == RENDER_WIREFRAME)\n {\n float points_data[] = {\n -size(0), -size(1), -size(2), color(0), color(1), color(2),\n +size(0), -size(1), -size(2), color(0), color(1), color(2),\n -size(0), +size(1), -size(2), color(0), color(1), color(2),\n +size(0), +size(1), -size(2), color(0), color(1), color(2),\n -size(0), -size(1), +size(2), color(0), color(1), color(2),\n +size(0), -size(1), +size(2), color(0), color(1), color(2),\n -size(0), +size(1), +size(2), color(0), color(1), color(2),\n +size(0), +size(1), +size(2), color(0), color(1), color(2),\n\n -size(0), -size(1), -size(2), color(0), color(1), color(2),\n -size(0), +size(1), -size(2), color(0), color(1), color(2),\n +size(0), -size(1), -size(2), color(0), color(1), color(2),\n +size(0), +size(1), -size(2), color(0), color(1), color(2),\n -size(0), -size(1), +size(2), color(0), color(1), color(2),\n -size(0), +size(1), +size(2), color(0), color(1), color(2),\n +size(0), -size(1), +size(2), color(0), color(1), color(2),\n +size(0), +size(1), +size(2), color(0), color(1), color(2),\n\n -size(0), -size(1), -size(2), color(0), color(1), color(2),\n -size(0), -size(1), +size(2), color(0), color(1), color(2),\n +size(0), -size(1), -size(2), color(0), color(1), color(2),\n +size(0), -size(1), +size(2), color(0), color(1), color(2),\n -size(0), +size(1), -size(2), color(0), color(1), color(2),\n -size(0), +size(1), +size(2), color(0), color(1), color(2),\n +size(0), +size(1), -size(2), color(0), color(1), color(2),\n +size(0), +size(1), +size(2), color(0), color(1), color(2),\n };\n\n const Mat points_mat = Mat(Size(6, 24), CV_32F, &points_data);\n\n showLines(win_name, obj_name, points_mat);\n }\n else if (mode == RENDER_SIMPLE)\n {\n float verts_data[] = {\n -size(0), -size(1), -size(2), color(0), color(1), color(2),\n +size(0), -size(1), -size(2), color(0), color(1), color(2),\n +size(0), +size(1), -size(2), color(0), color(1), color(2),\n -size(0), +size(1), -size(2), color(0), color(1), color(2),\n -size(0), -size(1), +size(2), color(0), color(1), color(2),\n +size(0), -size(1), +size(2), color(0), color(1), color(2),\n +size(0), +size(1), +size(2), color(0), color(1), color(2),\n -size(0), +size(1), +size(2), color(0), color(1), color(2),\n };\n\n static unsigned char indices_data[] = {\n 0, 1, 2,\n 2, 3, 0,\n 4, 5, 6,\n 6, 7, 4,\n 0, 1, 5,\n 5, 4, 0,\n 3, 2, 6,\n 6, 7, 3,\n 0, 3, 7,\n 7, 4, 0,\n 1, 2, 6,\n 6, 5, 1,\n };\n\n const Mat verts_mat = Mat(Size(6, 8), CV_32F, &verts_data);\n const Mat indices_mat = Mat(Size(3, 12), CV_8U, &indices_data);", + "repo_full_name": "opencv/opencv", + "discussion_comments": [ + { + "comment_id": "724475444", + "repo_full_name": "opencv/opencv", + "pr_number": 20371, + "pr_file": "modules/highgui/src/viz3d/viz3d.cpp", + "discussion_id": "724475444", + "commented_code": "@@ -0,0 +1,1671 @@\n+// This file is part of OpenCV project.\n+// It is subject to the license terms in the LICENSE file found in the top-level directory\n+// of this distribution and at http://opencv.org/license.html.\n+\n+#include \"../precomp.hpp\"\n+#include \"viz3d_private.hpp\"\n+#include \"opencv2/core/utils/logger.hpp\"\n+#include \"opencv2/imgproc.hpp\"\n+\n+using namespace cv;\n+using namespace cv::viz3d;\n+\n+#ifdef HAVE_OPENGL\n+\n+static void openGlDrawCallback(void* data)\n+{\n+ Window* win = static_cast(data);\n+ if (win != nullptr)\n+ win->draw();\n+}\n+\n+static void openGlFreeCallback(void* data)\n+{\n+ Window* win = static_cast(data);\n+ if (win != nullptr)\n+ delete win;\n+}\n+\n+static void mouseCallback(int event, int x, int y, int flags, void* data)\n+{\n+ Window* win = static_cast(data);\n+ if (win != nullptr)\n+ win->onMouse(event, x, y, flags);\n+}\n+\n+static Window* getWindow(const String& win_name)\n+{\n+ namedWindow(win_name, WINDOW_OPENGL);\n+ const double useGl = getWindowProperty(win_name, WND_PROP_OPENGL);\n+ if (useGl <= 0)\n+ CV_Error(cv::Error::StsBadArg, \"OpenCV/UI: viz3d can't be used because the window was created without an OpenGL context\");\n+\n+ Window* win = static_cast(getOpenGlUserData(win_name));\n+\n+ if (!win)\n+ {\n+\t\tsetOpenGlContext(win_name);\n+\t\twin = new Window(win_name);\n+\t\tsetOpenGlDrawCallback(win_name, &openGlDrawCallback, win);\n+ setOpenGlFreeCallback(win_name, &openGlFreeCallback);\n+ setMouseCallback(win_name, &mouseCallback, win);\n+ }\n+ else\n+ {\n+ auto callback = getOpenGlDrawCallback(win_name);\n+ if (callback != nullptr && callback != openGlDrawCallback)\n+ CV_Error(cv::Error::StsBadArg, \"OpenCV/UI: viz3d can't be used because the OpenGL callback is already being used on this window\");\n+ }\n+\n+ return win;\n+}\n+\n+#endif // HAVE_OPENGL\n+\n+void cv::viz3d::setPerspective(const String& win_name, float fov, float z_near, float z_far)\n+{\n+ CV_TRACE_FUNCTION();\n+#ifndef HAVE_OPENGL\n+ CV_UNUSED(win_name);\n+ CV_UNUSED(fov);\n+ CV_UNUSED(z_near);\n+ CV_UNUSED(z_far);\n+ CV_Error(cv::Error::OpenGlNotSupported, \"The library is compiled without OpenGL support\");\n+#else\n+ Window* win = getWindow(win_name);\n+ win->getView().setPerspective(fov, z_near, z_far);\n+ updateWindow(win_name);\n+#endif\n+}\n+\n+void cv::viz3d::setGridVisible(const String& win_name, bool visible)\n+{\n+ CV_TRACE_FUNCTION();\n+#ifndef HAVE_OPENGL\n+ CV_UNUSED(win_name);\n+ CV_UNUSED(visible);\n+ CV_Error(cv::Error::OpenGlNotSupported, \"The library is compiled without OpenGL support\");\n+#else\n+ Window* win = getWindow(win_name);\n+ win->setGridVisible(visible);\n+ updateWindow(win_name);\n+#endif\n+}\n+\n+void cv::viz3d::setSun(const String& win_name, const Vec3f& direction, const Vec3f& ambient, const Vec3f& diffuse)\n+{\n+ CV_TRACE_FUNCTION();\n+#ifndef HAVE_OPENGL\n+ CV_UNUSED(win_name);\n+ CV_UNUSED(direction);\n+ CV_UNUSED(ambient);\n+ CV_UNUSED(diffuse);\n+ CV_Error(cv::Error::OpenGlNotSupported, \"The library is compiled without OpenGL support\");\n+#else\n+ Window* win = getWindow(win_name);\n+ win->setSun(direction, ambient, diffuse);\n+ updateWindow(win_name);\n+#endif\n+}\n+\n+void cv::viz3d::setSky(const String& win_name, const Vec3f& color)\n+{\n+ CV_TRACE_FUNCTION();\n+#ifndef HAVE_OPENGL\n+ CV_UNUSED(win_name);\n+ CV_UNUSED(color);\n+ CV_Error(cv::Error::OpenGlNotSupported, \"The library is compiled without OpenGL support\");\n+#else\n+ Window* win = getWindow(win_name);\n+ win->setSky(color);\n+ updateWindow(win_name);\n+#endif\n+}\n+\n+void cv::viz3d::showBox(const String& win_name, const String& obj_name, const Vec3f& size, const Vec3f& color, RenderMode mode)\n+{\n+ CV_TRACE_FUNCTION();\n+#ifndef HAVE_OPENGL\n+ CV_UNUSED(win_name);\n+ CV_UNUSED(obj_name);\n+ CV_UNUSED(size);\n+ CV_UNUSED(color);\n+ CV_UNUSED(mode);\n+ CV_Error(cv::Error::OpenGlNotSupported, \"The library is compiled without OpenGL support\");\n+#else\n+ if (mode == RENDER_WIREFRAME)\n+ {\n+ float points_data[] = {\n+ -size(0), -size(1), -size(2), color(0), color(1), color(2),\n+ +size(0), -size(1), -size(2), color(0), color(1), color(2),\n+ -size(0), +size(1), -size(2), color(0), color(1), color(2),\n+ +size(0), +size(1), -size(2), color(0), color(1), color(2),\n+ -size(0), -size(1), +size(2), color(0), color(1), color(2),\n+ +size(0), -size(1), +size(2), color(0), color(1), color(2),\n+ -size(0), +size(1), +size(2), color(0), color(1), color(2),\n+ +size(0), +size(1), +size(2), color(0), color(1), color(2),\n+\n+ -size(0), -size(1), -size(2), color(0), color(1), color(2),\n+ -size(0), +size(1), -size(2), color(0), color(1), color(2),\n+ +size(0), -size(1), -size(2), color(0), color(1), color(2),\n+ +size(0), +size(1), -size(2), color(0), color(1), color(2),\n+ -size(0), -size(1), +size(2), color(0), color(1), color(2),\n+ -size(0), +size(1), +size(2), color(0), color(1), color(2),\n+ +size(0), -size(1), +size(2), color(0), color(1), color(2),\n+ +size(0), +size(1), +size(2), color(0), color(1), color(2),\n+\n+ -size(0), -size(1), -size(2), color(0), color(1), color(2),\n+ -size(0), -size(1), +size(2), color(0), color(1), color(2),\n+ +size(0), -size(1), -size(2), color(0), color(1), color(2),\n+ +size(0), -size(1), +size(2), color(0), color(1), color(2),\n+ -size(0), +size(1), -size(2), color(0), color(1), color(2),\n+ -size(0), +size(1), +size(2), color(0), color(1), color(2),\n+ +size(0), +size(1), -size(2), color(0), color(1), color(2),\n+ +size(0), +size(1), +size(2), color(0), color(1), color(2),\n+ };\n+\n+ const Mat points_mat = Mat(Size(6, 24), CV_32F, &points_data);\n+\n+ showLines(win_name, obj_name, points_mat);\n+ }\n+ else if (mode == RENDER_SIMPLE)\n+ {\n+ float verts_data[] = {\n+ -size(0), -size(1), -size(2), color(0), color(1), color(2),\n+ +size(0), -size(1), -size(2), color(0), color(1), color(2),\n+ +size(0), +size(1), -size(2), color(0), color(1), color(2),\n+ -size(0), +size(1), -size(2), color(0), color(1), color(2),\n+ -size(0), -size(1), +size(2), color(0), color(1), color(2),\n+ +size(0), -size(1), +size(2), color(0), color(1), color(2),\n+ +size(0), +size(1), +size(2), color(0), color(1), color(2),\n+ -size(0), +size(1), +size(2), color(0), color(1), color(2),\n+ };\n+\n+ static unsigned char indices_data[] = {\n+ 0, 1, 2,\n+ 2, 3, 0,\n+ 4, 5, 6,\n+ 6, 7, 4,\n+ 0, 1, 5,\n+ 5, 4, 0,\n+ 3, 2, 6,\n+ 6, 7, 3,\n+ 0, 3, 7,\n+ 7, 4, 0,\n+ 1, 2, 6,\n+ 6, 5, 1,\n+ };\n+\n+ const Mat verts_mat = Mat(Size(6, 8), CV_32F, &verts_data);\n+ const Mat indices_mat = Mat(Size(3, 12), CV_8U, &indices_data);", + "comment_created_at": "2021-10-07T19:35:28+00:00", + "comment_author": "JulieBar", + "comment_body": "no magic numbers; use std::array and get size", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "724533336", + "pr_number": 20371, + "pr_file": "modules/highgui/src/viz3d/viz3d.cpp", + "created_at": "2021-10-07T21:05:49+00:00", + "commented_code": "// This file is part of OpenCV project.\n// It is subject to the license terms in the LICENSE file found in the top-level directory\n// of this distribution and at http://opencv.org/license.html.\n\n#include \"../precomp.hpp\"\n#include \"viz3d_private.hpp\"\n#include \"opencv2/core/utils/logger.hpp\"\n#include \"opencv2/imgproc.hpp\"\n\nusing namespace cv;\nusing namespace cv::viz3d;\n\n#ifdef HAVE_OPENGL\n\nstatic void openGlDrawCallback(void* data)\n{\n Window* win = static_cast(data);\n if (win != nullptr)\n win->draw();\n}\n\nstatic void openGlFreeCallback(void* data)\n{\n Window* win = static_cast(data);\n if (win != nullptr)\n delete win;\n}\n\nstatic void mouseCallback(int event, int x, int y, int flags, void* data)\n{\n Window* win = static_cast(data);\n if (win != nullptr)\n win->onMouse(event, x, y, flags);\n}\n\nstatic Window* getWindow(const String& win_name)\n{\n namedWindow(win_name, WINDOW_OPENGL);\n const double useGl = getWindowProperty(win_name, WND_PROP_OPENGL);\n if (useGl <= 0)\n CV_Error(cv::Error::StsBadArg, \"OpenCV/UI: viz3d can't be used because the window was created without an OpenGL context\");\n\n Window* win = static_cast(getOpenGlUserData(win_name));\n\n if (!win)\n {\n\t\tsetOpenGlContext(win_name);\n\t\twin = new Window(win_name);\n\t\tsetOpenGlDrawCallback(win_name, &openGlDrawCallback, win);\n setOpenGlFreeCallback(win_name, &openGlFreeCallback);\n setMouseCallback(win_name, &mouseCallback, win);\n }\n else\n {\n auto callback = getOpenGlDrawCallback(win_name);\n if (callback != nullptr && callback != openGlDrawCallback)\n CV_Error(cv::Error::StsBadArg, \"OpenCV/UI: viz3d can't be used because the OpenGL callback is already being used on this window\");\n }\n\n return win;\n}\n\n#endif // HAVE_OPENGL\n\nvoid cv::viz3d::setPerspective(const String& win_name, float fov, float z_near, float z_far)\n{\n CV_TRACE_FUNCTION();\n#ifndef HAVE_OPENGL\n CV_UNUSED(win_name);\n CV_UNUSED(fov);\n CV_UNUSED(z_near);\n CV_UNUSED(z_far);\n CV_Error(cv::Error::OpenGlNotSupported, \"The library is compiled without OpenGL support\");\n#else\n Window* win = getWindow(win_name);\n win->getView().setPerspective(fov, z_near, z_far);\n updateWindow(win_name);\n#endif\n}\n\nvoid cv::viz3d::setGridVisible(const String& win_name, bool visible)\n{\n CV_TRACE_FUNCTION();\n#ifndef HAVE_OPENGL\n CV_UNUSED(win_name);\n CV_UNUSED(visible);\n CV_Error(cv::Error::OpenGlNotSupported, \"The library is compiled without OpenGL support\");\n#else\n Window* win = getWindow(win_name);\n win->setGridVisible(visible);\n updateWindow(win_name);\n#endif\n}\n\nvoid cv::viz3d::setSun(const String& win_name, const Vec3f& direction, const Vec3f& ambient, const Vec3f& diffuse)\n{\n CV_TRACE_FUNCTION();\n#ifndef HAVE_OPENGL\n CV_UNUSED(win_name);\n CV_UNUSED(direction);\n CV_UNUSED(ambient);\n CV_UNUSED(diffuse);\n CV_Error(cv::Error::OpenGlNotSupported, \"The library is compiled without OpenGL support\");\n#else\n Window* win = getWindow(win_name);\n win->setSun(direction, ambient, diffuse);\n updateWindow(win_name);\n#endif\n}\n\nvoid cv::viz3d::setSky(const String& win_name, const Vec3f& color)\n{\n CV_TRACE_FUNCTION();\n#ifndef HAVE_OPENGL\n CV_UNUSED(win_name);\n CV_UNUSED(color);\n CV_Error(cv::Error::OpenGlNotSupported, \"The library is compiled without OpenGL support\");\n#else\n Window* win = getWindow(win_name);\n win->setSky(color);\n updateWindow(win_name);\n#endif\n}\n\nvoid cv::viz3d::showBox(const String& win_name, const String& obj_name, const Vec3f& size, const Vec3f& color, RenderMode mode)\n{\n CV_TRACE_FUNCTION();\n#ifndef HAVE_OPENGL\n CV_UNUSED(win_name);\n CV_UNUSED(obj_name);\n CV_UNUSED(size);\n CV_UNUSED(color);\n CV_UNUSED(mode);\n CV_Error(cv::Error::OpenGlNotSupported, \"The library is compiled without OpenGL support\");\n#else\n if (mode == RENDER_WIREFRAME)\n {\n float points_data[] = {\n -size(0), -size(1), -size(2), color(0), color(1), color(2),\n +size(0), -size(1), -size(2), color(0), color(1), color(2),\n -size(0), +size(1), -size(2), color(0), color(1), color(2),\n +size(0), +size(1), -size(2), color(0), color(1), color(2),\n -size(0), -size(1), +size(2), color(0), color(1), color(2),\n +size(0), -size(1), +size(2), color(0), color(1), color(2),\n -size(0), +size(1), +size(2), color(0), color(1), color(2),\n +size(0), +size(1), +size(2), color(0), color(1), color(2),\n\n -size(0), -size(1), -size(2), color(0), color(1), color(2),\n -size(0), +size(1), -size(2), color(0), color(1), color(2),\n +size(0), -size(1), -size(2), color(0), color(1), color(2),\n +size(0), +size(1), -size(2), color(0), color(1), color(2),\n -size(0), -size(1), +size(2), color(0), color(1), color(2),\n -size(0), +size(1), +size(2), color(0), color(1), color(2),\n +size(0), -size(1), +size(2), color(0), color(1), color(2),\n +size(0), +size(1), +size(2), color(0), color(1), color(2),\n\n -size(0), -size(1), -size(2), color(0), color(1), color(2),\n -size(0), -size(1), +size(2), color(0), color(1), color(2),\n +size(0), -size(1), -size(2), color(0), color(1), color(2),\n +size(0), -size(1), +size(2), color(0), color(1), color(2),\n -size(0), +size(1), -size(2), color(0), color(1), color(2),\n -size(0), +size(1), +size(2), color(0), color(1), color(2),\n +size(0), +size(1), -size(2), color(0), color(1), color(2),\n +size(0), +size(1), +size(2), color(0), color(1), color(2),\n };\n\n const Mat points_mat = Mat(Size(6, 24), CV_32F, &points_data);\n\n showLines(win_name, obj_name, points_mat);\n }\n else if (mode == RENDER_SIMPLE)\n {\n float verts_data[] = {\n -size(0), -size(1), -size(2), color(0), color(1), color(2),\n +size(0), -size(1), -size(2), color(0), color(1), color(2),\n +size(0), +size(1), -size(2), color(0), color(1), color(2),\n -size(0), +size(1), -size(2), color(0), color(1), color(2),\n -size(0), -size(1), +size(2), color(0), color(1), color(2),\n +size(0), -size(1), +size(2), color(0), color(1), color(2),\n +size(0), +size(1), +size(2), color(0), color(1), color(2),\n -size(0), +size(1), +size(2), color(0), color(1), color(2),\n };\n\n static unsigned char indices_data[] = {\n 0, 1, 2,\n 2, 3, 0,\n 4, 5, 6,\n 6, 7, 4,\n 0, 1, 5,\n 5, 4, 0,\n 3, 2, 6,\n 6, 7, 3,\n 0, 3, 7,\n 7, 4, 0,\n 1, 2, 6,\n 6, 5, 1,\n };\n\n const Mat verts_mat = Mat(Size(6, 8), CV_32F, &verts_data);\n const Mat indices_mat = Mat(Size(3, 12), CV_8U, &indices_data);\n\n showMesh(win_name, obj_name, verts_mat, indices_mat);\n }\n else if (mode == RENDER_SHADING)\n {\n float verts_data[] = {\n -size(0), -size(1), -size(2), color(0), color(1), color(2), 0.0f, 0.0f, -1.0f, // 0\n +size(0), -size(1), -size(2), color(0), color(1), color(2), 0.0f, 0.0f, -1.0f, // 1\n +size(0), +size(1), -size(2), color(0), color(1), color(2), 0.0f, 0.0f, -1.0f, // 2\n +size(0), +size(1), -size(2), color(0), color(1), color(2), 0.0f, 0.0f, -1.0f, // 2\n -size(0), +size(1), -size(2), color(0), color(1), color(2), 0.0f, 0.0f, -1.0f, // 3\n -size(0), -size(1), -size(2), color(0), color(1), color(2), 0.0f, 0.0f, -1.0f, // 0\n\n -size(0), -size(1), +size(2), color(0), color(1), color(2), 0.0f, 0.0f, +1.0f, // 4\n +size(0), -size(1), +size(2), color(0), color(1), color(2), 0.0f, 0.0f, +1.0f, // 5\n +size(0), +size(1), +size(2), color(0), color(1), color(2), 0.0f, 0.0f, +1.0f, // 6\n +size(0), +size(1), +size(2), color(0), color(1), color(2), 0.0f, 0.0f, +1.0f, // 6\n -size(0), +size(1), +size(2), color(0), color(1), color(2), 0.0f, 0.0f, +1.0f, // 7\n -size(0), -size(1), +size(2), color(0), color(1), color(2), 0.0f, 0.0f, +1.0f, // 4\n\n\n -size(0), -size(1), -size(2), color(0), color(1), color(2), 0.0f, -1.0f, 0.0f, // 0\n +size(0), -size(1), -size(2), color(0), color(1), color(2), 0.0f, -1.0f, 0.0f, // 1\n +size(0), -size(1), +size(2), color(0), color(1), color(2), 0.0f, -1.0f, 0.0f, // 5\n +size(0), -size(1), +size(2), color(0), color(1), color(2), 0.0f, -1.0f, 0.0f, // 5\n -size(0), -size(1), +size(2), color(0), color(1), color(2), 0.0f, -1.0f, 0.0f, // 4\n -size(0), -size(1), -size(2), color(0), color(1), color(2), 0.0f, -1.0f, 0.0f, // 0\n\n -size(0), +size(1), -size(2), color(0), color(1), color(2), 0.0f, +1.0f, 0.0f, // 3\n +size(0), +size(1), -size(2), color(0), color(1), color(2), 0.0f, +1.0f, 0.0f, // 2\n +size(0), +size(1), +size(2), color(0), color(1), color(2), 0.0f, +1.0f, 0.0f, // 6\n +size(0), +size(1), +size(2), color(0), color(1), color(2), 0.0f, +1.0f, 0.0f, // 6\n -size(0), +size(1), +size(2), color(0), color(1), color(2), 0.0f, +1.0f, 0.0f, // 7\n -size(0), +size(1), -size(2), color(0), color(1), color(2), 0.0f, +1.0f, 0.0f, // 3\n\n\n -size(0), -size(1), -size(2), color(0), color(1), color(2), -1.0f, 0.0f, 0.0f, // 0\n -size(0), +size(1), -size(2), color(0), color(1), color(2), -1.0f, 0.0f, 0.0f, // 3\n -size(0), +size(1), +size(2), color(0), color(1), color(2), -1.0f, 0.0f, 0.0f, // 7\n -size(0), +size(1), +size(2), color(0), color(1), color(2), -1.0f, 0.0f, 0.0f, // 7\n -size(0), -size(1), +size(2), color(0), color(1), color(2), -1.0f, 0.0f, 0.0f, // 4\n -size(0), -size(1), -size(2), color(0), color(1), color(2), -1.0f, 0.0f, 0.0f, // 0\n\n +size(0), -size(1), -size(2), color(0), color(1), color(2), +1.0f, 0.0f, 0.0f, // 1\n +size(0), +size(1), -size(2), color(0), color(1), color(2), +1.0f, 0.0f, 0.0f, // 2\n +size(0), +size(1), +size(2), color(0), color(1), color(2), +1.0f, 0.0f, 0.0f, // 6\n +size(0), +size(1), +size(2), color(0), color(1), color(2), +1.0f, 0.0f, 0.0f, // 6\n +size(0), -size(1), +size(2), color(0), color(1), color(2), +1.0f, 0.0f, 0.0f, // 5\n +size(0), -size(1), -size(2), color(0), color(1), color(2), +1.0f, 0.0f, 0.0f, // 1\n };\n\n const Mat verts_mat = Mat(Size(9, 36), CV_32F, &verts_data);\n\n showMesh(win_name, obj_name, verts_mat);\n }\n#endif\n}\n\nvoid cv::viz3d::showPlane(const String& win_name, const String& obj_name, const Vec2f& size, const Vec3f& color, RenderMode mode)\n{\n CV_TRACE_FUNCTION();\n#ifndef HAVE_OPENGL\n CV_UNUSED(win_name);\n CV_UNUSED(obj_name);\n CV_UNUSED(size);\n CV_UNUSED(color);\n CV_UNUSED(mode);\n CV_Error(cv::Error::OpenGlNotSupported, \"The library is compiled without OpenGL support\");\n#else\n if (mode == RENDER_WIREFRAME)\n {\n float points_data[] = {\n -size(0), 0.0f, -size(1), color(0), color(1), color(2),\n +size(0), 0.0f, -size(1), color(0), color(1), color(2),\n +size(0), 0.0f, -size(1), color(0), color(1), color(2),\n +size(0), 0.0f, +size(1), color(0), color(1), color(2),\n +size(0), 0.0f, +size(1), color(0), color(1), color(2),\n -size(0), 0.0f, +size(1), color(0), color(1), color(2),\n -size(0), 0.0f, +size(1), color(0), color(1), color(2),\n -size(0), 0.0f, -size(1), color(0), color(1), color(2),\n };\n\n const Mat points_mat = Mat(Size(6, 8), CV_32F, points_data);\n\n showLines(win_name, obj_name, points_mat);\n }\n else if (mode == RENDER_SIMPLE)\n {\n float verts_data[] = {\n -size(0), 0.0f, -size(1), color(0), color(1), color(2),\n +size(0), 0.0f, -size(1), color(0), color(1), color(2),\n +size(0), 0.0f, +size(1), color(0), color(1), color(2),\n -size(0), 0.0f, -size(1), color(0), color(1), color(2),\n -size(0), 0.0f, +size(1), color(0), color(1), color(2),\n +size(0), 0.0f, +size(1), color(0), color(1), color(2),\n };\n\n const Mat verts_mat = Mat(Size(6, 6), CV_32F, verts_data);\n\n showMesh(win_name, obj_name, verts_mat);\n }\n else if (mode == RENDER_SHADING)\n {\n float verts_data[] = {\n -size(0), 0.0f, -size(1), color(0), color(1), color(2), 0.0f, 1.0f, 0.0f,\n +size(0), 0.0f, -size(1), color(0), color(1), color(2), 0.0f, 1.0f, 0.0f,\n +size(0), 0.0f, +size(1), color(0), color(1), color(2), 0.0f, 1.0f, 0.0f,\n -size(0), 0.0f, -size(1), color(0), color(1), color(2), 0.0f, 1.0f, 0.0f,\n -size(0), 0.0f, +size(1), color(0), color(1), color(2), 0.0f, 1.0f, 0.0f,\n +size(0), 0.0f, +size(1), color(0), color(1), color(2), 0.0f, 1.0f, 0.0f,\n };\n\n const Mat verts_mat = Mat(Size(9, 6), CV_32F, verts_data);\n\n showMesh(win_name, obj_name, verts_mat);\n }\n#endif\n}\n\nvoid cv::viz3d::showSphere(const String& win_name, const String& obj_name, float radius, const Vec3f& color, RenderMode mode, int divs)\n{\n CV_TRACE_FUNCTION();\n#ifndef HAVE_OPENGL\n CV_UNUSED(win_name);\n CV_UNUSED(obj_name);\n CV_UNUSED(radius);\n CV_UNUSED(color);\n CV_UNUSED(mode);\n CV_UNUSED(divs);\n CV_Error(cv::Error::OpenGlNotSupported, \"The library is compiled without OpenGL support\");\n#else\n CV_Assert(divs >= 1);\n\n if (mode == RENDER_WIREFRAME)\n {\n const float limit = 2.0f * 3.1415f;\n std::vector points_data;\n\n for (int e = 0; e < 3; ++e)\n {\n auto ex = Vec3f::zeros();\n auto ey = Vec3f::zeros();\n ex((e + 0) % 3) = radius;\n ey((e + 1) % 3) = radius;\n\n for (float t = 0.0f, n; t < limit;)\n {\n n = t + limit / (divs * 4);\n points_data.insert(points_data.end(), {\n ex(0) * cos(t) + ey(0) * sin(t), ex(1) * cos(t) + ey(1) * sin(t), ex(2) * cos(t) + ey(2) * sin(t), color(0), color(1), color(2),\n ex(0) * cos(n) + ey(0) * sin(n), ex(1) * cos(n) + ey(1) * sin(n), ex(2) * cos(n) + ey(2) * sin(n), color(0), color(1), color(2),\n });\n t = n;\n }\n }\n\n const Mat points_mat = Mat(Size(6, points_data.size() / 6), CV_32F, points_data.data());\n\n showLines(win_name, obj_name, points_mat);\n }\n else\n {\n std::vector verts_data;\n\n for (int s = -1; s <= 1; s += 2)\n for (int e = 0; e < 3; ++e)\n {\n auto ex = Vec3f::zeros();\n auto ey = Vec3f::zeros();\n auto pz = Vec3f::zeros();\n ex((e + 1) % 3) = 1.0f / divs;\n ey((e + 2) % 3) = 1.0f / divs;\n pz(e) = s;\n\n for (int x = -divs; x < divs; ++x)\n for (int y = -divs; y < divs; ++y)\n if (mode == RENDER_SIMPLE)\n verts_data.insert(verts_data.end(), {\n ex(0) * (x + 0) + ey(0) * (y + 0) + pz(0), ex(1) * (x + 0) + ey(1) * (y + 0) + pz(1), ex(2) * (x + 0) + ey(2) * (y + 0) + pz(2), color(0), color(1), color(2),\n ex(0) * (x + 1) + ey(0) * (y + 0) + pz(0), ex(1) * (x + 1) + ey(1) * (y + 0) + pz(1), ex(2) * (x + 1) + ey(2) * (y + 0) + pz(2), color(0), color(1), color(2),\n ex(0) * (x + 1) + ey(0) * (y + 1) + pz(0), ex(1) * (x + 1) + ey(1) * (y + 1) + pz(1), ex(2) * (x + 1) + ey(2) * (y + 1) + pz(2), color(0), color(1), color(2),\n ex(0) * (x + 1) + ey(0) * (y + 1) + pz(0), ex(1) * (x + 1) + ey(1) * (y + 1) + pz(1), ex(2) * (x + 1) + ey(2) * (y + 1) + pz(2), color(0), color(1), color(2),\n ex(0) * (x + 0) + ey(0) * (y + 1) + pz(0), ex(1) * (x + 0) + ey(1) * (y + 1) + pz(1), ex(2) * (x + 0) + ey(2) * (y + 1) + pz(2), color(0), color(1), color(2),\n ex(0) * (x + 0) + ey(0) * (y + 0) + pz(0), ex(1) * (x + 0) + ey(1) * (y + 0) + pz(1), ex(2) * (x + 0) + ey(2) * (y + 0) + pz(2), color(0), color(1), color(2),\n });\n else\n verts_data.insert(verts_data.end(), {\n ex(0) * (x + 0) + ey(0) * (y + 0) + pz(0), ex(1) * (x + 0) + ey(1) * (y + 0) + pz(1), ex(2) * (x + 0) + ey(2) * (y + 0) + pz(2), color(0), color(1), color(2), 0.0f, 0.0f, 0.0f,\n ex(0) * (x + 1) + ey(0) * (y + 0) + pz(0), ex(1) * (x + 1) + ey(1) * (y + 0) + pz(1), ex(2) * (x + 1) + ey(2) * (y + 0) + pz(2), color(0), color(1), color(2), 0.0f, 0.0f, 0.0f,\n ex(0) * (x + 1) + ey(0) * (y + 1) + pz(0), ex(1) * (x + 1) + ey(1) * (y + 1) + pz(1), ex(2) * (x + 1) + ey(2) * (y + 1) + pz(2), color(0), color(1), color(2), 0.0f, 0.0f, 0.0f,\n ex(0) * (x + 1) + ey(0) * (y + 1) + pz(0), ex(1) * (x + 1) + ey(1) * (y + 1) + pz(1), ex(2) * (x + 1) + ey(2) * (y + 1) + pz(2), color(0), color(1), color(2), 0.0f, 0.0f, 0.0f,\n ex(0) * (x + 0) + ey(0) * (y + 1) + pz(0), ex(1) * (x + 0) + ey(1) * (y + 1) + pz(1), ex(2) * (x + 0) + ey(2) * (y + 1) + pz(2), color(0), color(1), color(2), 0.0f, 0.0f, 0.0f,\n ex(0) * (x + 0) + ey(0) * (y + 0) + pz(0), ex(1) * (x + 0) + ey(1) * (y + 0) + pz(1), ex(2) * (x + 0) + ey(2) * (y + 0) + pz(2), color(0), color(1), color(2), 0.0f, 0.0f, 0.0f,\n });\n }\n\n if (mode == RENDER_SIMPLE)\n {\n for (int i = 0; i < verts_data.size() / 6; ++i)\n {\n float l = sqrtf(verts_data[6 * i + 0] * verts_data[6 * i + 0] + verts_data[6 * i + 1] * verts_data[6 * i + 1] + verts_data[6 * i + 2] * verts_data[6 * i + 2]);\n verts_data[6 * i + 0] *= radius / l;\n verts_data[6 * i + 1] *= radius / l;\n verts_data[6 * i + 2] *= radius / l;\n }\n\n const Mat verts_mat = Mat(Size(6, verts_data.size() / 6), CV_32F, verts_data.data());\n showMesh(win_name, obj_name, verts_mat);\n }\n else\n {\n for (int i = 0; i < verts_data.size() / 9; ++i)\n {\n float l = sqrtf(verts_data[9 * i + 0] * verts_data[9 * i + 0] + verts_data[9 * i + 1] * verts_data[9 * i + 1] + verts_data[9 * i + 2] * verts_data[9 * i + 2]);\n verts_data[9 * i + 6] = verts_data[9 * i + 0] / l;\n verts_data[9 * i + 7] = verts_data[9 * i + 1] / l;\n verts_data[9 * i + 8] = verts_data[9 * i + 2] / l;\n verts_data[9 * i + 0] *= radius / l;\n verts_data[9 * i + 1] *= radius / l;\n verts_data[9 * i + 2] *= radius / l;\n }\n\n const Mat verts_mat = Mat(Size(9, verts_data.size() / 9), CV_32F, verts_data.data());\n showMesh(win_name, obj_name, verts_mat);\n }\n }\n#endif\n}\n\nvoid cv::viz3d::showCameraTrajectory(\n const String& win_name, const String& obj_name, InputArray trajectory,\n float aspect, float scale, Vec3f frustum_color, Vec3f line_color)\n{\n CV_TRACE_FUNCTION();\n#ifndef HAVE_OPENGL\n CV_UNUSED(win_name);\n CV_UNUSED(obj_name);\n CV_UNUSED(trajectory);\n CV_UNUSED(aspect);\n CV_UNUSED(scale);\n CV_UNUSED(frustum_color);\n CV_UNUSED(line_color);\n CV_Error(cv::Error::OpenGlNotSupported, \"The library is compiled without OpenGL support\");\n#else\n CV_Assert(trajectory.dims() == 2 && trajectory.cols() == 6 && trajectory.depth() == CV_32F);\n\n auto data = trajectory.getMat();\n std::vector points_data;\n\n // Add frustums\n for (int i = 0; i < data.rows; ++i)\n {\n Vec3f position = { data.at(i, 0), data.at(i, 1), data.at(i, 2) };\n Vec3f forward = normalize(Vec3f { data.at(i, 3), data.at(i, 4), data.at(i, 5) });\n Vec3f world_up = { 0.0f, 1.0f, 0.0f };\n Vec3f right = forward.cross(world_up);\n Vec3f up = forward.cross(right);\n\n Vec3f back_f[4] = {\n position + (-right * aspect - up) * scale,\n position + ( right * aspect - up) * scale,\n position + ( right * aspect + up) * scale,\n position + (-right * aspect + up) * scale,\n };\n\n Vec3f front_f[4] = {\n position + (-right * aspect - up) * scale * 1.5f + forward * scale * 2.0f,\n position + (right * aspect - up) * scale * 1.5f + forward * scale * 2.0f,\n position + (right * aspect + up) * scale * 1.5f + forward * scale * 2.0f,\n position + (-right * aspect + up) * scale * 1.5f + forward * scale * 2.0f,\n };\n\n points_data.insert(points_data.end(), {\n back_f[0](0), back_f[0](1), back_f[0](2), frustum_color(0), frustum_color(1), frustum_color(2),\n back_f[1](0), back_f[1](1), back_f[1](2), frustum_color(0), frustum_color(1), frustum_color(2),\n back_f[1](0), back_f[1](1), back_f[1](2), frustum_color(0), frustum_color(1), frustum_color(2),\n back_f[2](0), back_f[2](1), back_f[2](2), frustum_color(0), frustum_color(1), frustum_color(2),\n back_f[2](0), back_f[2](1), back_f[2](2), frustum_color(0), frustum_color(1), frustum_color(2),\n back_f[3](0), back_f[3](1), back_f[3](2), frustum_color(0), frustum_color(1), frustum_color(2),\n back_f[3](0), back_f[3](1), back_f[3](2), frustum_color(0), frustum_color(1), frustum_color(2),\n back_f[0](0), back_f[0](1), back_f[0](2), frustum_color(0), frustum_color(1), frustum_color(2),\n\n front_f[0](0), front_f[0](1), front_f[0](2), frustum_color(0), frustum_color(1), frustum_color(2),\n front_f[1](0), front_f[1](1), front_f[1](2), frustum_color(0), frustum_color(1), frustum_color(2),\n front_f[1](0), front_f[1](1), front_f[1](2), frustum_color(0), frustum_color(1), frustum_color(2),\n front_f[2](0), front_f[2](1), front_f[2](2), frustum_color(0), frustum_color(1), frustum_color(2),\n front_f[2](0), front_f[2](1), front_f[2](2), frustum_color(0), frustum_color(1), frustum_color(2),\n front_f[3](0), front_f[3](1), front_f[3](2), frustum_color(0), frustum_color(1), frustum_color(2),\n front_f[3](0), front_f[3](1), front_f[3](2), frustum_color(0), frustum_color(1), frustum_color(2),\n front_f[0](0), front_f[0](1), front_f[0](2), frustum_color(0), frustum_color(1), frustum_color(2),\n\n back_f[0](0), back_f[0](1), back_f[0](2), frustum_color(0), frustum_color(1), frustum_color(2),\n front_f[0](0), front_f[0](1), front_f[0](2), frustum_color(0), frustum_color(1), frustum_color(2),\n back_f[1](0), back_f[1](1), back_f[1](2), frustum_color(0), frustum_color(1), frustum_color(2),\n front_f[1](0), front_f[1](1), front_f[1](2), frustum_color(0), frustum_color(1), frustum_color(2),\n back_f[2](0), back_f[2](1), back_f[2](2), frustum_color(0), frustum_color(1), frustum_color(2),\n front_f[2](0), front_f[2](1), front_f[2](2), frustum_color(0), frustum_color(1), frustum_color(2),\n back_f[3](0), back_f[3](1), back_f[3](2), frustum_color(0), frustum_color(1), frustum_color(2),\n front_f[3](0), front_f[3](1), front_f[3](2), frustum_color(0), frustum_color(1), frustum_color(2),\n });\n\n }\n\n // Add trajectory line\n for (int i = 1; i < data.rows; ++i)\n {\n points_data.insert(points_data.end(), {\n data.at(i - 1, 0), data.at(i - 1, 1), data.at(i - 1, 2), line_color(0), line_color(1), line_color(2),\n data.at(i, 0), data.at(i, 1), data.at(i, 2), line_color(0), line_color(1), line_color(2),\n });\n }\n\n const Mat points_mat = Mat(Size(6, points_data.size() / 6), CV_32F, points_data.data());\n showLines(win_name, obj_name, points_mat);\n\n#endif\n}\n\nvoid cv::viz3d::showMesh(const String& win_name, const String& obj_name, InputArray verts, InputArray indices)\n{\n CV_TRACE_FUNCTION();\n#ifndef HAVE_OPENGL\n CV_UNUSED(win_name);\n CV_UNUSED(obj_name);\n CV_UNUSED(verts);\n CV_UNUSED(indices);\n CV_Error(cv::Error::OpenGlNotSupported, \"The library is compiled without OpenGL support\");\n#else\n Window* win = getWindow(win_name);\n Object* obj = win->get(obj_name);\n if (obj)\n delete obj;\n setOpenGlContext(win_name);\n obj = new Mesh(verts, indices);\n win->set(obj_name, obj);\n updateWindow(win_name);\n#endif\n}\n\nvoid cv::viz3d::showMesh(const String& win_name, const String& obj_name, InputArray verts)\n{\n CV_TRACE_FUNCTION();\n#ifndef HAVE_OPENGL\n CV_UNUSED(win_name);\n CV_UNUSED(obj_name);\n CV_UNUSED(verts);\n CV_Error(cv::Error::OpenGlNotSupported, \"The library is compiled without OpenGL support\");\n#else\n Window* win = getWindow(win_name);\n Object* obj = win->get(obj_name);\n if (obj)\n delete obj;\n setOpenGlContext(win_name);\n obj = new Mesh(verts);\n win->set(obj_name, obj);\n updateWindow(win_name);\n#endif\n}\n\nvoid cv::viz3d::showPoints(const String& win_name, const String& obj_name, InputArray points)\n{\n CV_TRACE_FUNCTION();\n#ifndef HAVE_OPENGL\n CV_UNUSED(win_name);\n CV_UNUSED(obj_name);\n CV_UNUSED(points);\n CV_Error(cv::Error::OpenGlNotSupported, \"The library is compiled without OpenGL support\");\n#else\n Window* win = getWindow(win_name);\n Object* obj = win->get(obj_name);\n if (obj)\n delete obj;\n setOpenGlContext(win_name);\n obj = new PointCloud(points);\n win->set(obj_name, obj);\n updateWindow(win_name);\n#endif\n}\n\nvoid cv::viz3d::showRGBD(const String& win_name, const String& obj_name, InputArray img, const Matx33f& intrinsics, float scale)\n{\n CV_TRACE_FUNCTION();\n#ifndef HAVE_OPENGL\n CV_UNUSED(win_name);\n CV_UNUSED(obj_name);\n CV_UNUSED(img);\n CV_UNUSED(intrinsics);\n CV_UNUSED(scale);\n CV_Error(cv::Error::OpenGlNotSupported, \"The library is compiled without OpenGL support\");\n#else\n CV_Assert(img.dims() == 2 && img.channels() == 4 && img.type() == CV_32FC4);\n \n Mat mat = img.getMat();\n Mat points;\n\n // This section (RGBD to point cloud) should be changed to use the 3d module when\n // #20013 is merged.\n\n float fx = intrinsics(0, 0);\n float fy = intrinsics(1, 1);\n float cx = intrinsics(0, 2);\n float cy = intrinsics(1, 2);\n\n for (int u = 0; u < mat.cols; ++u)\n for (int v = 0; v < mat.rows; ++v)\n {\n Vec4f c = mat.at(v, u);\n float d = c(3) * 0.001f; // mm to m\n\n float x_over_z = (cx - (float)u) / fx;\n float y_over_z = (cy - (float)v) / fy;\n float z = d/* / sqrt(1.0f + x_over_z * x_over_z + y_over_z * y_over_z)*/;\n float x = x_over_z * z;\n float y = y_over_z * z;\n\n float point[] = {\n x * scale, y * scale, z * scale,\n c(0) / 255.0f, c(1) / 255.0f, c(2) / 255.0f,\n };\n points.push_back(Mat(1, 6, CV_32F, point));\n }\n\n showPoints(win_name, obj_name, points);\n#endif\n}\n\nvoid cv::viz3d::showLines(const String& win_name, const String& obj_name, InputArray points)\n{\n CV_TRACE_FUNCTION();\n#ifndef HAVE_OPENGL\n CV_UNUSED(win_name);\n CV_UNUSED(obj_name);\n CV_UNUSED(points);\n CV_Error(cv::Error::OpenGlNotSupported, \"The library is compiled without OpenGL support\");\n#else\n Window* win = getWindow(win_name);\n Object* obj = win->get(obj_name);\n if (obj)\n delete obj;\n setOpenGlContext(win_name);\n obj = new Lines(points);\n win->set(obj_name, obj);\n updateWindow(win_name);\n#endif\n}\n\nvoid cv::viz3d::setObjectPosition(const String& win_name, const String& obj_name, const Vec3f& position)\n{\n CV_TRACE_FUNCTION();\n#ifndef HAVE_OPENGL\n CV_UNUSED(win_name);\n CV_UNUSED(obj_name);\n CV_UNUSED(position);\n CV_Error(cv::Error::OpenGlNotSupported, \"The library is compiled without OpenGL support\");\n#else\n Window* win = getWindow(win_name);\n Object* obj = win->get(obj_name);\n if (!obj)\n CV_Error(cv::Error::StsObjectNotFound, \"Object not found\");\n obj->setPosition(position);\n updateWindow(win_name);\n#endif\n}\n\nvoid cv::viz3d::setObjectRotation(const String& win_name, const String& obj_name, const Vec3f& rotation)\n{\n CV_TRACE_FUNCTION();\n#ifndef HAVE_OPENGL\n CV_UNUSED(win_name);\n CV_UNUSED(obj_name);\n CV_UNUSED(rotation);\n CV_Error(cv::Error::OpenGlNotSupported, \"The library is compiled without OpenGL support\");\n#else\n Window* win = getWindow(win_name);\n Object* obj = win->get(obj_name);\n if (!obj)\n CV_Error(cv::Error::StsObjectNotFound, \"Object not found\");\n obj->setRotation(rotation);\n updateWindow(win_name);\n#endif\n}\n\nvoid cv::viz3d::destroyObject(const String& win_name, const String& obj_name)\n{\n CV_TRACE_FUNCTION();\n#ifndef HAVE_OPENGL\n CV_UNUSED(win_name);\n CV_UNUSED(obj_name);\n CV_Error(cv::Error::OpenGlNotSupported, \"The library is compiled without OpenGL support\");\n#else\n Window* win = getWindow(win_name);\n win->set(obj_name, nullptr);\n updateWindow(win_name);\n#endif\n}\n\n#ifdef HAVE_OPENGL\n\ncv::viz3d::View::View()\n{\n this->origin = { 0.0f, 0.0f, 0.0f };\n this->distance = 10.0f;\n this->position = { 0.0f, 0.0f, this->distance };\n this->up = { 0.0f, 1.0f, 0.0f };\n\n this->aspect = 1.0f;\n this->setPerspective(1.3f, 0.1f, 2000.0f);\n this->lookAt(this->origin, { 0.0f, 1.0f, 0.0f });\n}\n\nvoid cv::viz3d::View::setAspect(float aspect)\n{\n if (this->aspect != aspect)\n {\n this->aspect = aspect;\n this->setPerspective(this->fov, this->z_near, this->z_far);\n } \n}\n\nvoid cv::viz3d::View::setPerspective(float fov, float z_near, float z_far)\n{\n this->fov = fov;\n this->z_near = z_near;\n this->z_far = z_far;\n\n float tan_half_fovy = ::tan(this->fov / 2.0f);\n this->proj = Matx44f::zeros();\n this->proj(0, 0) = 1.0f / (this->aspect * tan_half_fovy);\n this->proj(1, 1) = 1.0f / tan_half_fovy;\n this->proj(2, 2) = (this->z_far + this->z_near) / (this->z_far - this->z_near);\n this->proj(2, 3) = 1.0f;\n this->proj(3, 2) = -(2.0f * this->z_far * this->z_near) / (this->z_far - this->z_near);\n}\n\nvoid cv::viz3d::View::rotate(float dx, float dy)\n{\n this->position = normalize(this->position - this->origin);\n float theta = atan2(this->position(2), this->position(0));\n float phi = ::asin(this->position(1));\n theta -= dx * 0.05f;\n phi += dy * 0.05f;\n phi = max(-1.5f, min(1.5f, phi));\n\n this->position(0) = ::cos(theta) * ::cos(phi) * this->distance;\n this->position(1) = ::sin(phi) * this->distance;\n this->position(2) = ::sin(theta) * ::cos(phi) * this->distance;\n this->position += this->origin;\n\n this->lookAt(this->origin, this->up);\n}\n\nvoid cv::viz3d::View::move(float dx, float dy)\n{\n Vec3f forward = normalize(this->position - this->origin);\n Vec3f right = normalize(this->up.cross(forward));\n Vec3f up = right.cross(forward);\n Vec3f delta = normalize(right * dx - up * dy) * this->distance * 0.01f;\n\n this->origin += delta;\n this->position += delta;\n this->lookAt(this->origin, this->up);\n}\n\nvoid cv::viz3d::View::scaleDistance(float amount)\n{\n this->distance *= amount;\n this->distance = max(0.1f, this->distance);\n this->position = normalize(this->position - this->origin) * this->distance + this->origin;\n\n this->lookAt(this->origin, this->up);\n}\n\nvoid cv::viz3d::View::lookAt(const Vec3f& point, const Vec3f& up)\n{\n Vec3f f = normalize(point - this->position);\n Vec3f s = normalize(up.cross(f));\n Vec3f u = f.cross(s);\n\n this->view = Matx44f::zeros();\n this->view(0, 0) = s[0];\n this->view(1, 0) = s[1];\n this->view(2, 0) = s[2];\n this->view(0, 1) = u[0];\n this->view(1, 1) = u[1];\n this->view(2, 1) = u[2];\n this->view(0, 2) = f[0];\n this->view(1, 2) = f[1];\n this->view(2, 2) = f[2];\n this->view(3, 0) = -s.dot(this->position);\n this->view(3, 1) = -u.dot(this->position);\n this->view(3, 2) = -f.dot(this->position);\n this->view(3, 3) = 1.0f;\n}\n\ncv::viz3d::Window::Window(const String& name)\n{\n this->name = name;\n this->sun.direction = normalize(Vec3f(0.3f, 1.0f, 0.5f));\n this->sun.ambient = { 0.1f, 0.1f, 0.1f };\n this->sun.diffuse = { 1.0f, 1.0f, 1.0f };\n this->sky_color = { 0.0f, 0.0f, 0.0f };\n\n\tfloat points[] = {\n\t\t0.0f, 0.0f, 0.0f, 1.0f, 0.0f, 0.0f,\n\t\t0.5f, 0.0f, 0.0f, 1.0f, 0.0f, 0.0f,\n\t\t0.0f, 0.0f, 0.0f, 0.0f, 1.0f, 0.0f,\n\t\t0.0f, 0.5f, 0.0f, 0.0f, 1.0f, 0.0f,\n\t\t0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 1.0f,\n\t\t0.0f, 0.0f, 0.5f, 0.0f, 0.0f, 1.0f,\n\t};\n\n\tMat points_mat = Mat(Size(6, 6), CV_32F, points);\n\n\tthis->crosshair = new Lines(points_mat);\n\tthis->shaders[this->crosshair->getShaderName()] = this->crosshair->buildShader();\n\tthis->crosshair->setShader(this->shaders[this->crosshair->getShaderName()]);\n\n this->grid = nullptr;\n}\n\ncv::viz3d::Window::~Window()\n{\n\tdelete this->crosshair;\n if (this->grid != nullptr)\n delete this->grid;\n\n for (auto obj : this->objects)\n\t\tdelete obj.second;\n}\n\ncv::viz3d::Object* cv::viz3d::Window::get(const String& obj_name)\n{\n\tauto it = this->objects.find(obj_name);\n\tif (it == this->objects.end())\n\t\treturn nullptr;\n\treturn it->second;\n}\n\nvoid cv::viz3d::Window::set(const String& obj_name, Object* obj)\n{\n\tauto it = this->objects.find(obj_name);\n\tif (it != this->objects.end() && it->second != obj)\n\t{\n\t\tdelete it->second;\n\n\t\tif (obj == nullptr)\n\t\t\tthis->objects.erase(it);\n\t\telse\n\t\t\tit->second = obj;\n\t}\n\telse if (obj != nullptr)\n\t\tthis->objects[obj_name] = obj;\n\n if (obj != nullptr)\n {\n String name = obj->getShaderName();\n auto it = this->shaders.find(name);\n if (it == this->shaders.end())\n this->shaders[name] = obj->buildShader();\n obj->setShader(this->shaders[name]);\n }\n}\n\nvoid cv::viz3d::Window::setSun(const Vec3f& direction, const Vec3f& ambient, const Vec3f& diffuse)\n{\n this->sun.direction = normalize(direction);\n this->sun.ambient = ambient;\n this->sun.diffuse = diffuse;\n}\n\nvoid cv::viz3d::Window::setSky(const Vec3f& color)\n{\n this->sky_color = color;\n}\n\nstatic Mat getGridVertices(const View& view)\n{\n const Vec3f grid_color = { 0.5f, 0.5f, 0.5f };\n const Vec3f axis_color = { 0.8f, 0.8f, 0.8f };\n const Vec3f center = view.getOrigin();\n const Vec3f camera_dir = view.getOrigin() - view.getPosition();\n const float scale = 0.3f;\n\n float tick_step = 1.0f;\n while (view.getDistance() * scale / tick_step > 4.0f)\n tick_step *= 2.0f;\n while (view.getDistance() * scale / tick_step < 2.0f)\n tick_step *= 0.5f;\n\n Mat points;\n float face_sign[3];\n\n const Vec3f min_p = center - Vec3f(1.0f, 1.0f, 1.0f) * view.getDistance() * scale;\n const Vec3f max_p = center + Vec3f(1.0f, 1.0f, 1.0f) * view.getDistance() * scale;\n\n // For each axis add a grid\n for (int ai = 0; ai < 3; ++ai)\n {\n Vec3f az = { 0.0f, 0.0f, 0.0f };\n az((ai + 2) % 3) = 1.0f;\n\n // Check if face is positive or negative along the az axis\n face_sign[ai] = camera_dir.dot(az) > 0.0f ? 1.0f : -1.0f;\n\n float x = (floor(min_p(ai) / tick_step) + 1.0f) * tick_step;\n for (; x < max_p(ai); x += tick_step)\n {\n Vec3f a = min_p;\n Vec3f b = max_p;\n a((ai + 0) % 3) = x;\n b((ai + 0) % 3) = x;\n if (face_sign[ai] > 0.0f)\n a((ai + 2) % 3) = b((ai + 2) % 3);\n else\n b((ai + 2) % 3) = a((ai + 2) % 3);\n\n float data[] = {\n a(0), a(1), a(2), grid_color(0), grid_color(1), grid_color(2),\n b(0), b(1), b(2), grid_color(0), grid_color(1), grid_color(2),\n };\n\n points.push_back(Mat(2, 6, CV_32F, data));\n }\n\n float y = (floor(min_p((ai + 1) % 3) / tick_step) + 1.0f) * tick_step;\n for (; y < max_p((ai + 1) % 3); y += tick_step)\n {\n Vec3f a = min_p;\n Vec3f b = max_p;\n a((ai + 1) % 3) = y;\n b((ai + 1) % 3) = y;\n if (face_sign[ai] > 0.0f)\n a((ai + 2) % 3) = b((ai + 2) % 3);\n else\n b((ai + 2) % 3) = a((ai + 2) % 3);\n\n float data[] = {\n a(0), a(1), a(2), grid_color(0), grid_color(1), grid_color(2),\n b(0), b(1), b(2), grid_color(0), grid_color(1), grid_color(2),\n };\n\n points.push_back(Mat(2, 6, CV_32F, data));\n }\n }\n\n // Draw Ox, Oy and Oz axes and ticks\n {\n Vec3f a = { face_sign[1] > 0.0f ? min_p(0) : max_p(0), face_sign[2] > 0.0f ? max_p(1) : min_p(1), face_sign[0] > 0.0f ? max_p(2) : min_p(2) };\n Vec3f b = { face_sign[1] > 0.0f ? min_p(0) : max_p(0), face_sign[2] > 0.0f ? max_p(1) : min_p(1), face_sign[0] > 0.0f ? min_p(2) : max_p(2) };\n Vec3f c = { face_sign[1] > 0.0f ? max_p(0) : min_p(0), face_sign[2] > 0.0f ? max_p(1) : min_p(1), face_sign[0] > 0.0f ? min_p(2) : max_p(2) };\n Vec3f d = { face_sign[1] > 0.0f ? max_p(0) : min_p(0), face_sign[2] > 0.0f ? min_p(1) : max_p(1), face_sign[0] > 0.0f ? min_p(2) : max_p(2) };\n\n float data[] = {\n a(0), a(1), a(2), 0.0f, 0.0f, 0.8f,\n b(0), b(1), b(2), 0.0f, 0.0f, 0.8f,\n b(0), b(1), b(2), 0.8f, 0.0f, 0.0f,\n c(0), c(1), c(2), 0.8f, 0.0f, 0.0f,\n c(0), c(1), c(2), 0.0f, 0.8f, 0.0f,\n d(0), d(1), d(2), 0.0f, 0.8f, 0.0f,\n };\n\n points.push_back(Mat(6, 6, CV_32F, data));\n\n float x = (floor(min_p(0) / tick_step) + 1.0f) * tick_step;\n for (; x < max_p(0); x += tick_step)\n {\n Vec3f a, b, c;\n a(0) = b(0) = x;\n a(1) = b(1) = face_sign[2] > 0.0f ? max_p(1) : min_p(1);\n a(2) = face_sign[0] > 0.0f ? min_p(2) : max_p(2);\n b(2) = a(2) - face_sign[0] * 0.03f * scale * view.getDistance();\n\n float line[] = {\n a(0), a(1), a(2), 0.8f, 0.0f, 0.0f,\n b(0), b(1), b(2), 0.8f, 0.0f, 0.0f,\n };\n\n points.push_back(Mat(2, 6, CV_32F, line));\n }\n\n float y = (floor(min_p(1) / tick_step) + 1.0f) * tick_step;\n for (; y < max_p(1); y += tick_step)\n {\n Vec3f a, b;\n a(0) = b(0) = face_sign[1] > 0.0f ? max_p(0) : min_p(0);\n a(1) = b(1) = y;\n a(2) = face_sign[0] > 0.0f ? min_p(2) : max_p(2);\n b(2) = a(2) - face_sign[0] * 0.03f * scale * view.getDistance();\n\n float line[] = {\n a(0), a(1), a(2), 0.0f, 0.8f, 0.0f,\n b(0), b(1), b(2), 0.0f, 0.8f, 0.0f,\n };\n\n points.push_back(Mat(2, 6, CV_32F, line));\n }\n\n float z = (floor(min_p(2) / tick_step) + 1.0f) * tick_step;\n for (; z < max_p(2); z += tick_step)\n {\n Vec3f a, b;\n a(0) = face_sign[1] > 0.0f ? min_p(0) : max_p(0);\n b(0) = a(0) - face_sign[1] * 0.03f * scale * view.getDistance();\n a(1) = b(1) = face_sign[2] > 0.0f ? max_p(1) : min_p(1);\n a(2) = b(2) = z;\n\n float line[] = {\n a(0), a(1), a(2), 0.0f, 0.0f, 0.8f,\n b(0), b(1), b(2), 0.0f, 0.0f, 0.8f,\n };\n\n points.push_back(Mat(2, 6, CV_32F, line));\n }\n }\n\n return points;\n}\n\nvoid cv::viz3d::Window::setGridVisible(bool visible)\n{\n if (visible)\n {\n this->grid = new Lines(Mat(4096, 6, CV_32F), 0);\n this->grid->setShader(this->shaders[this->grid->getShaderName()]);\n }\n else if (this->grid != nullptr)\n {\n delete this->grid;\n this->grid = nullptr;\n }\n}\n\nvoid cv::viz3d::Window::draw()\n{\n Rect rect = getWindowImageRect(this->name);\n float aspect = (float)rect.width / (float)rect.height;\n this->view.setAspect(aspect);\n\n ogl::enable(ogl::DEPTH_TEST);\n ogl::clearColor({ double(this->sky_color(0) * 255.0f), double(this->sky_color(1) * 255.0f), double(this->sky_color(2) * 255.0f), 255.0 });\n\n if (this->grid)\n {\n Mat labels;\n static_cast(this->grid)->update(getGridVertices(this->view));\n this->grid->draw(this->view, this->sun);\n }\n else\n {\n this->crosshair->setPosition(this->view.getOrigin());\n this->crosshair->draw(this->view, this->sun);\n }\n\n for (auto& obj : this->objects)\n obj.second->draw(this->view, this->sun);\n}\n\nvoid cv::viz3d::Window::onMouse(int event, int x, int y, int flags)\n{\n if (event == EVENT_LBUTTONDOWN || event == EVENT_RBUTTONDOWN)\n {\n this->l_mouse_x = x;\n this->l_mouse_y = y;\n }\n else if (event == EVENT_MOUSEMOVE && (flags & EVENT_FLAG_LBUTTON))\n {\n this->view.rotate(x - this->l_mouse_x, y - this->l_mouse_y);\n updateWindow(this->name);\n this->l_mouse_x = x;\n this->l_mouse_y = y;\n }\n else if (event == EVENT_MOUSEMOVE && (flags & EVENT_FLAG_RBUTTON))\n {\n this->view.move(x - this->l_mouse_x, y - this->l_mouse_y);\n updateWindow(this->name);\n this->l_mouse_x = x;\n this->l_mouse_y = y;\n }\n else if (event == EVENT_MOUSEWHEEL)\n {\n this->view.scaleDistance(min(1.2f, max(0.8f, 1.0f - getMouseWheelDelta(flags) / 12.0f)));\n updateWindow(this->name);\n }\n}\n\ncv::viz3d::Object::Object()\n{\n\tthis->position = { 0.0f, 0.0f, 0.0f };\n\tthis->rotation = { 0.0f, 0.0f, 0.0f };\n\tthis->model = Matx44f::eye();\n}\n\nvoid cv::viz3d::Object::setPosition(const Vec3f& position)\n{\n this->position = position;\n this->updateModel();\n}\n\nvoid cv::viz3d::Object::setRotation(const Vec3f& rotation)\n{\n this->rotation = rotation;\n this->updateModel();\n}\n\nvoid cv::viz3d::Object::updateModel()\n{\n // Calculate rotation matrices\n Matx44f rot_a = Matx44f::eye();\n rot_a(0, 0) = ::cos(this->rotation(0));\n rot_a(1, 0) = ::sin(this->rotation(0));\n rot_a(0, 1) = -::sin(this->rotation(0));\n rot_a(1, 1) = ::cos(this->rotation(0));\n\n Matx44f rot_b = Matx44f::eye();\n rot_b(1, 1) = ::cos(this->rotation(1));\n rot_b(2, 1) = ::sin(this->rotation(1));\n rot_b(1, 2) = -::sin(this->rotation(1));\n rot_b(2, 2) = ::cos(this->rotation(1));\n\n Matx44f rot_c = Matx44f::eye();\n rot_c(0, 0) = ::cos(this->rotation(2));\n rot_c(2, 0) = ::sin(this->rotation(2));\n rot_c(0, 2) = -::sin(this->rotation(2));\n rot_c(2, 2) = ::cos(this->rotation(2));\n\n // Calculate translation matrix\n Matx44f trans = Matx44f::eye();\n trans(3, 0) = this->position(0);\n trans(3, 1) = this->position(1);\n trans(3, 2) = this->position(2);\n\n // Multiply matrices\n this->model = rot_c * rot_b * rot_a * trans;\n}\n\ncv::viz3d::Mesh::Mesh(InputArray verts, InputArray indices)\n{\n // Check parameter validity\n CV_Assert(verts.channels() == 1 && verts.dims() == 2 && (verts.size().width == 3 || verts.size().width == 6 || verts.size().width == 9));\n CV_Assert(verts.depth() == CV_32F);\n CV_Assert(indices.channels() == 1 && indices.dims() == 2 && indices.size().width == 3);\n CV_Assert(indices.depth() == CV_8U || indices.depth() == CV_16U || indices.depth() == CV_32S);\n\n // Prepare buffers\n if (verts.kind() == _InputArray::OPENGL_BUFFER)\n this->verts = verts.getOGlBuffer();\n else\n this->verts.copyFrom(verts, ogl::Buffer::ARRAY_BUFFER);\n\n if (indices.kind() == _InputArray::OPENGL_BUFFER)\n this->indices = indices.getOGlBuffer();\n else\n this->indices.copyFrom(indices, ogl::Buffer::ELEMENT_ARRAY_BUFFER);\n\n switch (indices.depth())\n {\n case CV_8U:\n this->index_type = ogl::UNSIGNED_BYTE;\n break;\n case CV_16U:\n this->index_type = ogl::UNSIGNED_SHORT;\n break;\n case CV_32S:\n this->index_type = ogl::UNSIGNED_INT;\n break;\n }\n\n // Prepare vertex array\n if (verts.size().width == 3)\n {\n this->va.create({\n ogl::Attribute(", + "repo_full_name": "opencv/opencv", + "discussion_comments": [ + { + "comment_id": "724533336", + "repo_full_name": "opencv/opencv", + "pr_number": 20371, + "pr_file": "modules/highgui/src/viz3d/viz3d.cpp", + "discussion_id": "724533336", + "commented_code": "@@ -0,0 +1,1671 @@\n+// This file is part of OpenCV project.\n+// It is subject to the license terms in the LICENSE file found in the top-level directory\n+// of this distribution and at http://opencv.org/license.html.\n+\n+#include \"../precomp.hpp\"\n+#include \"viz3d_private.hpp\"\n+#include \"opencv2/core/utils/logger.hpp\"\n+#include \"opencv2/imgproc.hpp\"\n+\n+using namespace cv;\n+using namespace cv::viz3d;\n+\n+#ifdef HAVE_OPENGL\n+\n+static void openGlDrawCallback(void* data)\n+{\n+ Window* win = static_cast(data);\n+ if (win != nullptr)\n+ win->draw();\n+}\n+\n+static void openGlFreeCallback(void* data)\n+{\n+ Window* win = static_cast(data);\n+ if (win != nullptr)\n+ delete win;\n+}\n+\n+static void mouseCallback(int event, int x, int y, int flags, void* data)\n+{\n+ Window* win = static_cast(data);\n+ if (win != nullptr)\n+ win->onMouse(event, x, y, flags);\n+}\n+\n+static Window* getWindow(const String& win_name)\n+{\n+ namedWindow(win_name, WINDOW_OPENGL);\n+ const double useGl = getWindowProperty(win_name, WND_PROP_OPENGL);\n+ if (useGl <= 0)\n+ CV_Error(cv::Error::StsBadArg, \"OpenCV/UI: viz3d can't be used because the window was created without an OpenGL context\");\n+\n+ Window* win = static_cast(getOpenGlUserData(win_name));\n+\n+ if (!win)\n+ {\n+\t\tsetOpenGlContext(win_name);\n+\t\twin = new Window(win_name);\n+\t\tsetOpenGlDrawCallback(win_name, &openGlDrawCallback, win);\n+ setOpenGlFreeCallback(win_name, &openGlFreeCallback);\n+ setMouseCallback(win_name, &mouseCallback, win);\n+ }\n+ else\n+ {\n+ auto callback = getOpenGlDrawCallback(win_name);\n+ if (callback != nullptr && callback != openGlDrawCallback)\n+ CV_Error(cv::Error::StsBadArg, \"OpenCV/UI: viz3d can't be used because the OpenGL callback is already being used on this window\");\n+ }\n+\n+ return win;\n+}\n+\n+#endif // HAVE_OPENGL\n+\n+void cv::viz3d::setPerspective(const String& win_name, float fov, float z_near, float z_far)\n+{\n+ CV_TRACE_FUNCTION();\n+#ifndef HAVE_OPENGL\n+ CV_UNUSED(win_name);\n+ CV_UNUSED(fov);\n+ CV_UNUSED(z_near);\n+ CV_UNUSED(z_far);\n+ CV_Error(cv::Error::OpenGlNotSupported, \"The library is compiled without OpenGL support\");\n+#else\n+ Window* win = getWindow(win_name);\n+ win->getView().setPerspective(fov, z_near, z_far);\n+ updateWindow(win_name);\n+#endif\n+}\n+\n+void cv::viz3d::setGridVisible(const String& win_name, bool visible)\n+{\n+ CV_TRACE_FUNCTION();\n+#ifndef HAVE_OPENGL\n+ CV_UNUSED(win_name);\n+ CV_UNUSED(visible);\n+ CV_Error(cv::Error::OpenGlNotSupported, \"The library is compiled without OpenGL support\");\n+#else\n+ Window* win = getWindow(win_name);\n+ win->setGridVisible(visible);\n+ updateWindow(win_name);\n+#endif\n+}\n+\n+void cv::viz3d::setSun(const String& win_name, const Vec3f& direction, const Vec3f& ambient, const Vec3f& diffuse)\n+{\n+ CV_TRACE_FUNCTION();\n+#ifndef HAVE_OPENGL\n+ CV_UNUSED(win_name);\n+ CV_UNUSED(direction);\n+ CV_UNUSED(ambient);\n+ CV_UNUSED(diffuse);\n+ CV_Error(cv::Error::OpenGlNotSupported, \"The library is compiled without OpenGL support\");\n+#else\n+ Window* win = getWindow(win_name);\n+ win->setSun(direction, ambient, diffuse);\n+ updateWindow(win_name);\n+#endif\n+}\n+\n+void cv::viz3d::setSky(const String& win_name, const Vec3f& color)\n+{\n+ CV_TRACE_FUNCTION();\n+#ifndef HAVE_OPENGL\n+ CV_UNUSED(win_name);\n+ CV_UNUSED(color);\n+ CV_Error(cv::Error::OpenGlNotSupported, \"The library is compiled without OpenGL support\");\n+#else\n+ Window* win = getWindow(win_name);\n+ win->setSky(color);\n+ updateWindow(win_name);\n+#endif\n+}\n+\n+void cv::viz3d::showBox(const String& win_name, const String& obj_name, const Vec3f& size, const Vec3f& color, RenderMode mode)\n+{\n+ CV_TRACE_FUNCTION();\n+#ifndef HAVE_OPENGL\n+ CV_UNUSED(win_name);\n+ CV_UNUSED(obj_name);\n+ CV_UNUSED(size);\n+ CV_UNUSED(color);\n+ CV_UNUSED(mode);\n+ CV_Error(cv::Error::OpenGlNotSupported, \"The library is compiled without OpenGL support\");\n+#else\n+ if (mode == RENDER_WIREFRAME)\n+ {\n+ float points_data[] = {\n+ -size(0), -size(1), -size(2), color(0), color(1), color(2),\n+ +size(0), -size(1), -size(2), color(0), color(1), color(2),\n+ -size(0), +size(1), -size(2), color(0), color(1), color(2),\n+ +size(0), +size(1), -size(2), color(0), color(1), color(2),\n+ -size(0), -size(1), +size(2), color(0), color(1), color(2),\n+ +size(0), -size(1), +size(2), color(0), color(1), color(2),\n+ -size(0), +size(1), +size(2), color(0), color(1), color(2),\n+ +size(0), +size(1), +size(2), color(0), color(1), color(2),\n+\n+ -size(0), -size(1), -size(2), color(0), color(1), color(2),\n+ -size(0), +size(1), -size(2), color(0), color(1), color(2),\n+ +size(0), -size(1), -size(2), color(0), color(1), color(2),\n+ +size(0), +size(1), -size(2), color(0), color(1), color(2),\n+ -size(0), -size(1), +size(2), color(0), color(1), color(2),\n+ -size(0), +size(1), +size(2), color(0), color(1), color(2),\n+ +size(0), -size(1), +size(2), color(0), color(1), color(2),\n+ +size(0), +size(1), +size(2), color(0), color(1), color(2),\n+\n+ -size(0), -size(1), -size(2), color(0), color(1), color(2),\n+ -size(0), -size(1), +size(2), color(0), color(1), color(2),\n+ +size(0), -size(1), -size(2), color(0), color(1), color(2),\n+ +size(0), -size(1), +size(2), color(0), color(1), color(2),\n+ -size(0), +size(1), -size(2), color(0), color(1), color(2),\n+ -size(0), +size(1), +size(2), color(0), color(1), color(2),\n+ +size(0), +size(1), -size(2), color(0), color(1), color(2),\n+ +size(0), +size(1), +size(2), color(0), color(1), color(2),\n+ };\n+\n+ const Mat points_mat = Mat(Size(6, 24), CV_32F, &points_data);\n+\n+ showLines(win_name, obj_name, points_mat);\n+ }\n+ else if (mode == RENDER_SIMPLE)\n+ {\n+ float verts_data[] = {\n+ -size(0), -size(1), -size(2), color(0), color(1), color(2),\n+ +size(0), -size(1), -size(2), color(0), color(1), color(2),\n+ +size(0), +size(1), -size(2), color(0), color(1), color(2),\n+ -size(0), +size(1), -size(2), color(0), color(1), color(2),\n+ -size(0), -size(1), +size(2), color(0), color(1), color(2),\n+ +size(0), -size(1), +size(2), color(0), color(1), color(2),\n+ +size(0), +size(1), +size(2), color(0), color(1), color(2),\n+ -size(0), +size(1), +size(2), color(0), color(1), color(2),\n+ };\n+\n+ static unsigned char indices_data[] = {\n+ 0, 1, 2,\n+ 2, 3, 0,\n+ 4, 5, 6,\n+ 6, 7, 4,\n+ 0, 1, 5,\n+ 5, 4, 0,\n+ 3, 2, 6,\n+ 6, 7, 3,\n+ 0, 3, 7,\n+ 7, 4, 0,\n+ 1, 2, 6,\n+ 6, 5, 1,\n+ };\n+\n+ const Mat verts_mat = Mat(Size(6, 8), CV_32F, &verts_data);\n+ const Mat indices_mat = Mat(Size(3, 12), CV_8U, &indices_data);\n+\n+ showMesh(win_name, obj_name, verts_mat, indices_mat);\n+ }\n+ else if (mode == RENDER_SHADING)\n+ {\n+ float verts_data[] = {\n+ -size(0), -size(1), -size(2), color(0), color(1), color(2), 0.0f, 0.0f, -1.0f, // 0\n+ +size(0), -size(1), -size(2), color(0), color(1), color(2), 0.0f, 0.0f, -1.0f, // 1\n+ +size(0), +size(1), -size(2), color(0), color(1), color(2), 0.0f, 0.0f, -1.0f, // 2\n+ +size(0), +size(1), -size(2), color(0), color(1), color(2), 0.0f, 0.0f, -1.0f, // 2\n+ -size(0), +size(1), -size(2), color(0), color(1), color(2), 0.0f, 0.0f, -1.0f, // 3\n+ -size(0), -size(1), -size(2), color(0), color(1), color(2), 0.0f, 0.0f, -1.0f, // 0\n+\n+ -size(0), -size(1), +size(2), color(0), color(1), color(2), 0.0f, 0.0f, +1.0f, // 4\n+ +size(0), -size(1), +size(2), color(0), color(1), color(2), 0.0f, 0.0f, +1.0f, // 5\n+ +size(0), +size(1), +size(2), color(0), color(1), color(2), 0.0f, 0.0f, +1.0f, // 6\n+ +size(0), +size(1), +size(2), color(0), color(1), color(2), 0.0f, 0.0f, +1.0f, // 6\n+ -size(0), +size(1), +size(2), color(0), color(1), color(2), 0.0f, 0.0f, +1.0f, // 7\n+ -size(0), -size(1), +size(2), color(0), color(1), color(2), 0.0f, 0.0f, +1.0f, // 4\n+\n+\n+ -size(0), -size(1), -size(2), color(0), color(1), color(2), 0.0f, -1.0f, 0.0f, // 0\n+ +size(0), -size(1), -size(2), color(0), color(1), color(2), 0.0f, -1.0f, 0.0f, // 1\n+ +size(0), -size(1), +size(2), color(0), color(1), color(2), 0.0f, -1.0f, 0.0f, // 5\n+ +size(0), -size(1), +size(2), color(0), color(1), color(2), 0.0f, -1.0f, 0.0f, // 5\n+ -size(0), -size(1), +size(2), color(0), color(1), color(2), 0.0f, -1.0f, 0.0f, // 4\n+ -size(0), -size(1), -size(2), color(0), color(1), color(2), 0.0f, -1.0f, 0.0f, // 0\n+\n+ -size(0), +size(1), -size(2), color(0), color(1), color(2), 0.0f, +1.0f, 0.0f, // 3\n+ +size(0), +size(1), -size(2), color(0), color(1), color(2), 0.0f, +1.0f, 0.0f, // 2\n+ +size(0), +size(1), +size(2), color(0), color(1), color(2), 0.0f, +1.0f, 0.0f, // 6\n+ +size(0), +size(1), +size(2), color(0), color(1), color(2), 0.0f, +1.0f, 0.0f, // 6\n+ -size(0), +size(1), +size(2), color(0), color(1), color(2), 0.0f, +1.0f, 0.0f, // 7\n+ -size(0), +size(1), -size(2), color(0), color(1), color(2), 0.0f, +1.0f, 0.0f, // 3\n+\n+\n+ -size(0), -size(1), -size(2), color(0), color(1), color(2), -1.0f, 0.0f, 0.0f, // 0\n+ -size(0), +size(1), -size(2), color(0), color(1), color(2), -1.0f, 0.0f, 0.0f, // 3\n+ -size(0), +size(1), +size(2), color(0), color(1), color(2), -1.0f, 0.0f, 0.0f, // 7\n+ -size(0), +size(1), +size(2), color(0), color(1), color(2), -1.0f, 0.0f, 0.0f, // 7\n+ -size(0), -size(1), +size(2), color(0), color(1), color(2), -1.0f, 0.0f, 0.0f, // 4\n+ -size(0), -size(1), -size(2), color(0), color(1), color(2), -1.0f, 0.0f, 0.0f, // 0\n+\n+ +size(0), -size(1), -size(2), color(0), color(1), color(2), +1.0f, 0.0f, 0.0f, // 1\n+ +size(0), +size(1), -size(2), color(0), color(1), color(2), +1.0f, 0.0f, 0.0f, // 2\n+ +size(0), +size(1), +size(2), color(0), color(1), color(2), +1.0f, 0.0f, 0.0f, // 6\n+ +size(0), +size(1), +size(2), color(0), color(1), color(2), +1.0f, 0.0f, 0.0f, // 6\n+ +size(0), -size(1), +size(2), color(0), color(1), color(2), +1.0f, 0.0f, 0.0f, // 5\n+ +size(0), -size(1), -size(2), color(0), color(1), color(2), +1.0f, 0.0f, 0.0f, // 1\n+ };\n+\n+ const Mat verts_mat = Mat(Size(9, 36), CV_32F, &verts_data);\n+\n+ showMesh(win_name, obj_name, verts_mat);\n+ }\n+#endif\n+}\n+\n+void cv::viz3d::showPlane(const String& win_name, const String& obj_name, const Vec2f& size, const Vec3f& color, RenderMode mode)\n+{\n+ CV_TRACE_FUNCTION();\n+#ifndef HAVE_OPENGL\n+ CV_UNUSED(win_name);\n+ CV_UNUSED(obj_name);\n+ CV_UNUSED(size);\n+ CV_UNUSED(color);\n+ CV_UNUSED(mode);\n+ CV_Error(cv::Error::OpenGlNotSupported, \"The library is compiled without OpenGL support\");\n+#else\n+ if (mode == RENDER_WIREFRAME)\n+ {\n+ float points_data[] = {\n+ -size(0), 0.0f, -size(1), color(0), color(1), color(2),\n+ +size(0), 0.0f, -size(1), color(0), color(1), color(2),\n+ +size(0), 0.0f, -size(1), color(0), color(1), color(2),\n+ +size(0), 0.0f, +size(1), color(0), color(1), color(2),\n+ +size(0), 0.0f, +size(1), color(0), color(1), color(2),\n+ -size(0), 0.0f, +size(1), color(0), color(1), color(2),\n+ -size(0), 0.0f, +size(1), color(0), color(1), color(2),\n+ -size(0), 0.0f, -size(1), color(0), color(1), color(2),\n+ };\n+\n+ const Mat points_mat = Mat(Size(6, 8), CV_32F, points_data);\n+\n+ showLines(win_name, obj_name, points_mat);\n+ }\n+ else if (mode == RENDER_SIMPLE)\n+ {\n+ float verts_data[] = {\n+ -size(0), 0.0f, -size(1), color(0), color(1), color(2),\n+ +size(0), 0.0f, -size(1), color(0), color(1), color(2),\n+ +size(0), 0.0f, +size(1), color(0), color(1), color(2),\n+ -size(0), 0.0f, -size(1), color(0), color(1), color(2),\n+ -size(0), 0.0f, +size(1), color(0), color(1), color(2),\n+ +size(0), 0.0f, +size(1), color(0), color(1), color(2),\n+ };\n+\n+ const Mat verts_mat = Mat(Size(6, 6), CV_32F, verts_data);\n+\n+ showMesh(win_name, obj_name, verts_mat);\n+ }\n+ else if (mode == RENDER_SHADING)\n+ {\n+ float verts_data[] = {\n+ -size(0), 0.0f, -size(1), color(0), color(1), color(2), 0.0f, 1.0f, 0.0f,\n+ +size(0), 0.0f, -size(1), color(0), color(1), color(2), 0.0f, 1.0f, 0.0f,\n+ +size(0), 0.0f, +size(1), color(0), color(1), color(2), 0.0f, 1.0f, 0.0f,\n+ -size(0), 0.0f, -size(1), color(0), color(1), color(2), 0.0f, 1.0f, 0.0f,\n+ -size(0), 0.0f, +size(1), color(0), color(1), color(2), 0.0f, 1.0f, 0.0f,\n+ +size(0), 0.0f, +size(1), color(0), color(1), color(2), 0.0f, 1.0f, 0.0f,\n+ };\n+\n+ const Mat verts_mat = Mat(Size(9, 6), CV_32F, verts_data);\n+\n+ showMesh(win_name, obj_name, verts_mat);\n+ }\n+#endif\n+}\n+\n+void cv::viz3d::showSphere(const String& win_name, const String& obj_name, float radius, const Vec3f& color, RenderMode mode, int divs)\n+{\n+ CV_TRACE_FUNCTION();\n+#ifndef HAVE_OPENGL\n+ CV_UNUSED(win_name);\n+ CV_UNUSED(obj_name);\n+ CV_UNUSED(radius);\n+ CV_UNUSED(color);\n+ CV_UNUSED(mode);\n+ CV_UNUSED(divs);\n+ CV_Error(cv::Error::OpenGlNotSupported, \"The library is compiled without OpenGL support\");\n+#else\n+ CV_Assert(divs >= 1);\n+\n+ if (mode == RENDER_WIREFRAME)\n+ {\n+ const float limit = 2.0f * 3.1415f;\n+ std::vector points_data;\n+\n+ for (int e = 0; e < 3; ++e)\n+ {\n+ auto ex = Vec3f::zeros();\n+ auto ey = Vec3f::zeros();\n+ ex((e + 0) % 3) = radius;\n+ ey((e + 1) % 3) = radius;\n+\n+ for (float t = 0.0f, n; t < limit;)\n+ {\n+ n = t + limit / (divs * 4);\n+ points_data.insert(points_data.end(), {\n+ ex(0) * cos(t) + ey(0) * sin(t), ex(1) * cos(t) + ey(1) * sin(t), ex(2) * cos(t) + ey(2) * sin(t), color(0), color(1), color(2),\n+ ex(0) * cos(n) + ey(0) * sin(n), ex(1) * cos(n) + ey(1) * sin(n), ex(2) * cos(n) + ey(2) * sin(n), color(0), color(1), color(2),\n+ });\n+ t = n;\n+ }\n+ }\n+\n+ const Mat points_mat = Mat(Size(6, points_data.size() / 6), CV_32F, points_data.data());\n+\n+ showLines(win_name, obj_name, points_mat);\n+ }\n+ else\n+ {\n+ std::vector verts_data;\n+\n+ for (int s = -1; s <= 1; s += 2)\n+ for (int e = 0; e < 3; ++e)\n+ {\n+ auto ex = Vec3f::zeros();\n+ auto ey = Vec3f::zeros();\n+ auto pz = Vec3f::zeros();\n+ ex((e + 1) % 3) = 1.0f / divs;\n+ ey((e + 2) % 3) = 1.0f / divs;\n+ pz(e) = s;\n+\n+ for (int x = -divs; x < divs; ++x)\n+ for (int y = -divs; y < divs; ++y)\n+ if (mode == RENDER_SIMPLE)\n+ verts_data.insert(verts_data.end(), {\n+ ex(0) * (x + 0) + ey(0) * (y + 0) + pz(0), ex(1) * (x + 0) + ey(1) * (y + 0) + pz(1), ex(2) * (x + 0) + ey(2) * (y + 0) + pz(2), color(0), color(1), color(2),\n+ ex(0) * (x + 1) + ey(0) * (y + 0) + pz(0), ex(1) * (x + 1) + ey(1) * (y + 0) + pz(1), ex(2) * (x + 1) + ey(2) * (y + 0) + pz(2), color(0), color(1), color(2),\n+ ex(0) * (x + 1) + ey(0) * (y + 1) + pz(0), ex(1) * (x + 1) + ey(1) * (y + 1) + pz(1), ex(2) * (x + 1) + ey(2) * (y + 1) + pz(2), color(0), color(1), color(2),\n+ ex(0) * (x + 1) + ey(0) * (y + 1) + pz(0), ex(1) * (x + 1) + ey(1) * (y + 1) + pz(1), ex(2) * (x + 1) + ey(2) * (y + 1) + pz(2), color(0), color(1), color(2),\n+ ex(0) * (x + 0) + ey(0) * (y + 1) + pz(0), ex(1) * (x + 0) + ey(1) * (y + 1) + pz(1), ex(2) * (x + 0) + ey(2) * (y + 1) + pz(2), color(0), color(1), color(2),\n+ ex(0) * (x + 0) + ey(0) * (y + 0) + pz(0), ex(1) * (x + 0) + ey(1) * (y + 0) + pz(1), ex(2) * (x + 0) + ey(2) * (y + 0) + pz(2), color(0), color(1), color(2),\n+ });\n+ else\n+ verts_data.insert(verts_data.end(), {\n+ ex(0) * (x + 0) + ey(0) * (y + 0) + pz(0), ex(1) * (x + 0) + ey(1) * (y + 0) + pz(1), ex(2) * (x + 0) + ey(2) * (y + 0) + pz(2), color(0), color(1), color(2), 0.0f, 0.0f, 0.0f,\n+ ex(0) * (x + 1) + ey(0) * (y + 0) + pz(0), ex(1) * (x + 1) + ey(1) * (y + 0) + pz(1), ex(2) * (x + 1) + ey(2) * (y + 0) + pz(2), color(0), color(1), color(2), 0.0f, 0.0f, 0.0f,\n+ ex(0) * (x + 1) + ey(0) * (y + 1) + pz(0), ex(1) * (x + 1) + ey(1) * (y + 1) + pz(1), ex(2) * (x + 1) + ey(2) * (y + 1) + pz(2), color(0), color(1), color(2), 0.0f, 0.0f, 0.0f,\n+ ex(0) * (x + 1) + ey(0) * (y + 1) + pz(0), ex(1) * (x + 1) + ey(1) * (y + 1) + pz(1), ex(2) * (x + 1) + ey(2) * (y + 1) + pz(2), color(0), color(1), color(2), 0.0f, 0.0f, 0.0f,\n+ ex(0) * (x + 0) + ey(0) * (y + 1) + pz(0), ex(1) * (x + 0) + ey(1) * (y + 1) + pz(1), ex(2) * (x + 0) + ey(2) * (y + 1) + pz(2), color(0), color(1), color(2), 0.0f, 0.0f, 0.0f,\n+ ex(0) * (x + 0) + ey(0) * (y + 0) + pz(0), ex(1) * (x + 0) + ey(1) * (y + 0) + pz(1), ex(2) * (x + 0) + ey(2) * (y + 0) + pz(2), color(0), color(1), color(2), 0.0f, 0.0f, 0.0f,\n+ });\n+ }\n+\n+ if (mode == RENDER_SIMPLE)\n+ {\n+ for (int i = 0; i < verts_data.size() / 6; ++i)\n+ {\n+ float l = sqrtf(verts_data[6 * i + 0] * verts_data[6 * i + 0] + verts_data[6 * i + 1] * verts_data[6 * i + 1] + verts_data[6 * i + 2] * verts_data[6 * i + 2]);\n+ verts_data[6 * i + 0] *= radius / l;\n+ verts_data[6 * i + 1] *= radius / l;\n+ verts_data[6 * i + 2] *= radius / l;\n+ }\n+\n+ const Mat verts_mat = Mat(Size(6, verts_data.size() / 6), CV_32F, verts_data.data());\n+ showMesh(win_name, obj_name, verts_mat);\n+ }\n+ else\n+ {\n+ for (int i = 0; i < verts_data.size() / 9; ++i)\n+ {\n+ float l = sqrtf(verts_data[9 * i + 0] * verts_data[9 * i + 0] + verts_data[9 * i + 1] * verts_data[9 * i + 1] + verts_data[9 * i + 2] * verts_data[9 * i + 2]);\n+ verts_data[9 * i + 6] = verts_data[9 * i + 0] / l;\n+ verts_data[9 * i + 7] = verts_data[9 * i + 1] / l;\n+ verts_data[9 * i + 8] = verts_data[9 * i + 2] / l;\n+ verts_data[9 * i + 0] *= radius / l;\n+ verts_data[9 * i + 1] *= radius / l;\n+ verts_data[9 * i + 2] *= radius / l;\n+ }\n+\n+ const Mat verts_mat = Mat(Size(9, verts_data.size() / 9), CV_32F, verts_data.data());\n+ showMesh(win_name, obj_name, verts_mat);\n+ }\n+ }\n+#endif\n+}\n+\n+void cv::viz3d::showCameraTrajectory(\n+ const String& win_name, const String& obj_name, InputArray trajectory,\n+ float aspect, float scale, Vec3f frustum_color, Vec3f line_color)\n+{\n+ CV_TRACE_FUNCTION();\n+#ifndef HAVE_OPENGL\n+ CV_UNUSED(win_name);\n+ CV_UNUSED(obj_name);\n+ CV_UNUSED(trajectory);\n+ CV_UNUSED(aspect);\n+ CV_UNUSED(scale);\n+ CV_UNUSED(frustum_color);\n+ CV_UNUSED(line_color);\n+ CV_Error(cv::Error::OpenGlNotSupported, \"The library is compiled without OpenGL support\");\n+#else\n+ CV_Assert(trajectory.dims() == 2 && trajectory.cols() == 6 && trajectory.depth() == CV_32F);\n+\n+ auto data = trajectory.getMat();\n+ std::vector points_data;\n+\n+ // Add frustums\n+ for (int i = 0; i < data.rows; ++i)\n+ {\n+ Vec3f position = { data.at(i, 0), data.at(i, 1), data.at(i, 2) };\n+ Vec3f forward = normalize(Vec3f { data.at(i, 3), data.at(i, 4), data.at(i, 5) });\n+ Vec3f world_up = { 0.0f, 1.0f, 0.0f };\n+ Vec3f right = forward.cross(world_up);\n+ Vec3f up = forward.cross(right);\n+\n+ Vec3f back_f[4] = {\n+ position + (-right * aspect - up) * scale,\n+ position + ( right * aspect - up) * scale,\n+ position + ( right * aspect + up) * scale,\n+ position + (-right * aspect + up) * scale,\n+ };\n+\n+ Vec3f front_f[4] = {\n+ position + (-right * aspect - up) * scale * 1.5f + forward * scale * 2.0f,\n+ position + (right * aspect - up) * scale * 1.5f + forward * scale * 2.0f,\n+ position + (right * aspect + up) * scale * 1.5f + forward * scale * 2.0f,\n+ position + (-right * aspect + up) * scale * 1.5f + forward * scale * 2.0f,\n+ };\n+\n+ points_data.insert(points_data.end(), {\n+ back_f[0](0), back_f[0](1), back_f[0](2), frustum_color(0), frustum_color(1), frustum_color(2),\n+ back_f[1](0), back_f[1](1), back_f[1](2), frustum_color(0), frustum_color(1), frustum_color(2),\n+ back_f[1](0), back_f[1](1), back_f[1](2), frustum_color(0), frustum_color(1), frustum_color(2),\n+ back_f[2](0), back_f[2](1), back_f[2](2), frustum_color(0), frustum_color(1), frustum_color(2),\n+ back_f[2](0), back_f[2](1), back_f[2](2), frustum_color(0), frustum_color(1), frustum_color(2),\n+ back_f[3](0), back_f[3](1), back_f[3](2), frustum_color(0), frustum_color(1), frustum_color(2),\n+ back_f[3](0), back_f[3](1), back_f[3](2), frustum_color(0), frustum_color(1), frustum_color(2),\n+ back_f[0](0), back_f[0](1), back_f[0](2), frustum_color(0), frustum_color(1), frustum_color(2),\n+\n+ front_f[0](0), front_f[0](1), front_f[0](2), frustum_color(0), frustum_color(1), frustum_color(2),\n+ front_f[1](0), front_f[1](1), front_f[1](2), frustum_color(0), frustum_color(1), frustum_color(2),\n+ front_f[1](0), front_f[1](1), front_f[1](2), frustum_color(0), frustum_color(1), frustum_color(2),\n+ front_f[2](0), front_f[2](1), front_f[2](2), frustum_color(0), frustum_color(1), frustum_color(2),\n+ front_f[2](0), front_f[2](1), front_f[2](2), frustum_color(0), frustum_color(1), frustum_color(2),\n+ front_f[3](0), front_f[3](1), front_f[3](2), frustum_color(0), frustum_color(1), frustum_color(2),\n+ front_f[3](0), front_f[3](1), front_f[3](2), frustum_color(0), frustum_color(1), frustum_color(2),\n+ front_f[0](0), front_f[0](1), front_f[0](2), frustum_color(0), frustum_color(1), frustum_color(2),\n+\n+ back_f[0](0), back_f[0](1), back_f[0](2), frustum_color(0), frustum_color(1), frustum_color(2),\n+ front_f[0](0), front_f[0](1), front_f[0](2), frustum_color(0), frustum_color(1), frustum_color(2),\n+ back_f[1](0), back_f[1](1), back_f[1](2), frustum_color(0), frustum_color(1), frustum_color(2),\n+ front_f[1](0), front_f[1](1), front_f[1](2), frustum_color(0), frustum_color(1), frustum_color(2),\n+ back_f[2](0), back_f[2](1), back_f[2](2), frustum_color(0), frustum_color(1), frustum_color(2),\n+ front_f[2](0), front_f[2](1), front_f[2](2), frustum_color(0), frustum_color(1), frustum_color(2),\n+ back_f[3](0), back_f[3](1), back_f[3](2), frustum_color(0), frustum_color(1), frustum_color(2),\n+ front_f[3](0), front_f[3](1), front_f[3](2), frustum_color(0), frustum_color(1), frustum_color(2),\n+ });\n+\n+ }\n+\n+ // Add trajectory line\n+ for (int i = 1; i < data.rows; ++i)\n+ {\n+ points_data.insert(points_data.end(), {\n+ data.at(i - 1, 0), data.at(i - 1, 1), data.at(i - 1, 2), line_color(0), line_color(1), line_color(2),\n+ data.at(i, 0), data.at(i, 1), data.at(i, 2), line_color(0), line_color(1), line_color(2),\n+ });\n+ }\n+\n+ const Mat points_mat = Mat(Size(6, points_data.size() / 6), CV_32F, points_data.data());\n+ showLines(win_name, obj_name, points_mat);\n+\n+#endif\n+}\n+\n+void cv::viz3d::showMesh(const String& win_name, const String& obj_name, InputArray verts, InputArray indices)\n+{\n+ CV_TRACE_FUNCTION();\n+#ifndef HAVE_OPENGL\n+ CV_UNUSED(win_name);\n+ CV_UNUSED(obj_name);\n+ CV_UNUSED(verts);\n+ CV_UNUSED(indices);\n+ CV_Error(cv::Error::OpenGlNotSupported, \"The library is compiled without OpenGL support\");\n+#else\n+ Window* win = getWindow(win_name);\n+ Object* obj = win->get(obj_name);\n+ if (obj)\n+ delete obj;\n+ setOpenGlContext(win_name);\n+ obj = new Mesh(verts, indices);\n+ win->set(obj_name, obj);\n+ updateWindow(win_name);\n+#endif\n+}\n+\n+void cv::viz3d::showMesh(const String& win_name, const String& obj_name, InputArray verts)\n+{\n+ CV_TRACE_FUNCTION();\n+#ifndef HAVE_OPENGL\n+ CV_UNUSED(win_name);\n+ CV_UNUSED(obj_name);\n+ CV_UNUSED(verts);\n+ CV_Error(cv::Error::OpenGlNotSupported, \"The library is compiled without OpenGL support\");\n+#else\n+ Window* win = getWindow(win_name);\n+ Object* obj = win->get(obj_name);\n+ if (obj)\n+ delete obj;\n+ setOpenGlContext(win_name);\n+ obj = new Mesh(verts);\n+ win->set(obj_name, obj);\n+ updateWindow(win_name);\n+#endif\n+}\n+\n+void cv::viz3d::showPoints(const String& win_name, const String& obj_name, InputArray points)\n+{\n+ CV_TRACE_FUNCTION();\n+#ifndef HAVE_OPENGL\n+ CV_UNUSED(win_name);\n+ CV_UNUSED(obj_name);\n+ CV_UNUSED(points);\n+ CV_Error(cv::Error::OpenGlNotSupported, \"The library is compiled without OpenGL support\");\n+#else\n+ Window* win = getWindow(win_name);\n+ Object* obj = win->get(obj_name);\n+ if (obj)\n+ delete obj;\n+ setOpenGlContext(win_name);\n+ obj = new PointCloud(points);\n+ win->set(obj_name, obj);\n+ updateWindow(win_name);\n+#endif\n+}\n+\n+void cv::viz3d::showRGBD(const String& win_name, const String& obj_name, InputArray img, const Matx33f& intrinsics, float scale)\n+{\n+ CV_TRACE_FUNCTION();\n+#ifndef HAVE_OPENGL\n+ CV_UNUSED(win_name);\n+ CV_UNUSED(obj_name);\n+ CV_UNUSED(img);\n+ CV_UNUSED(intrinsics);\n+ CV_UNUSED(scale);\n+ CV_Error(cv::Error::OpenGlNotSupported, \"The library is compiled without OpenGL support\");\n+#else\n+ CV_Assert(img.dims() == 2 && img.channels() == 4 && img.type() == CV_32FC4);\n+ \n+ Mat mat = img.getMat();\n+ Mat points;\n+\n+ // This section (RGBD to point cloud) should be changed to use the 3d module when\n+ // #20013 is merged.\n+\n+ float fx = intrinsics(0, 0);\n+ float fy = intrinsics(1, 1);\n+ float cx = intrinsics(0, 2);\n+ float cy = intrinsics(1, 2);\n+\n+ for (int u = 0; u < mat.cols; ++u)\n+ for (int v = 0; v < mat.rows; ++v)\n+ {\n+ Vec4f c = mat.at(v, u);\n+ float d = c(3) * 0.001f; // mm to m\n+\n+ float x_over_z = (cx - (float)u) / fx;\n+ float y_over_z = (cy - (float)v) / fy;\n+ float z = d/* / sqrt(1.0f + x_over_z * x_over_z + y_over_z * y_over_z)*/;\n+ float x = x_over_z * z;\n+ float y = y_over_z * z;\n+\n+ float point[] = {\n+ x * scale, y * scale, z * scale,\n+ c(0) / 255.0f, c(1) / 255.0f, c(2) / 255.0f,\n+ };\n+ points.push_back(Mat(1, 6, CV_32F, point));\n+ }\n+\n+ showPoints(win_name, obj_name, points);\n+#endif\n+}\n+\n+void cv::viz3d::showLines(const String& win_name, const String& obj_name, InputArray points)\n+{\n+ CV_TRACE_FUNCTION();\n+#ifndef HAVE_OPENGL\n+ CV_UNUSED(win_name);\n+ CV_UNUSED(obj_name);\n+ CV_UNUSED(points);\n+ CV_Error(cv::Error::OpenGlNotSupported, \"The library is compiled without OpenGL support\");\n+#else\n+ Window* win = getWindow(win_name);\n+ Object* obj = win->get(obj_name);\n+ if (obj)\n+ delete obj;\n+ setOpenGlContext(win_name);\n+ obj = new Lines(points);\n+ win->set(obj_name, obj);\n+ updateWindow(win_name);\n+#endif\n+}\n+\n+void cv::viz3d::setObjectPosition(const String& win_name, const String& obj_name, const Vec3f& position)\n+{\n+ CV_TRACE_FUNCTION();\n+#ifndef HAVE_OPENGL\n+ CV_UNUSED(win_name);\n+ CV_UNUSED(obj_name);\n+ CV_UNUSED(position);\n+ CV_Error(cv::Error::OpenGlNotSupported, \"The library is compiled without OpenGL support\");\n+#else\n+ Window* win = getWindow(win_name);\n+ Object* obj = win->get(obj_name);\n+ if (!obj)\n+ CV_Error(cv::Error::StsObjectNotFound, \"Object not found\");\n+ obj->setPosition(position);\n+ updateWindow(win_name);\n+#endif\n+}\n+\n+void cv::viz3d::setObjectRotation(const String& win_name, const String& obj_name, const Vec3f& rotation)\n+{\n+ CV_TRACE_FUNCTION();\n+#ifndef HAVE_OPENGL\n+ CV_UNUSED(win_name);\n+ CV_UNUSED(obj_name);\n+ CV_UNUSED(rotation);\n+ CV_Error(cv::Error::OpenGlNotSupported, \"The library is compiled without OpenGL support\");\n+#else\n+ Window* win = getWindow(win_name);\n+ Object* obj = win->get(obj_name);\n+ if (!obj)\n+ CV_Error(cv::Error::StsObjectNotFound, \"Object not found\");\n+ obj->setRotation(rotation);\n+ updateWindow(win_name);\n+#endif\n+}\n+\n+void cv::viz3d::destroyObject(const String& win_name, const String& obj_name)\n+{\n+ CV_TRACE_FUNCTION();\n+#ifndef HAVE_OPENGL\n+ CV_UNUSED(win_name);\n+ CV_UNUSED(obj_name);\n+ CV_Error(cv::Error::OpenGlNotSupported, \"The library is compiled without OpenGL support\");\n+#else\n+ Window* win = getWindow(win_name);\n+ win->set(obj_name, nullptr);\n+ updateWindow(win_name);\n+#endif\n+}\n+\n+#ifdef HAVE_OPENGL\n+\n+cv::viz3d::View::View()\n+{\n+ this->origin = { 0.0f, 0.0f, 0.0f };\n+ this->distance = 10.0f;\n+ this->position = { 0.0f, 0.0f, this->distance };\n+ this->up = { 0.0f, 1.0f, 0.0f };\n+\n+ this->aspect = 1.0f;\n+ this->setPerspective(1.3f, 0.1f, 2000.0f);\n+ this->lookAt(this->origin, { 0.0f, 1.0f, 0.0f });\n+}\n+\n+void cv::viz3d::View::setAspect(float aspect)\n+{\n+ if (this->aspect != aspect)\n+ {\n+ this->aspect = aspect;\n+ this->setPerspective(this->fov, this->z_near, this->z_far);\n+ } \n+}\n+\n+void cv::viz3d::View::setPerspective(float fov, float z_near, float z_far)\n+{\n+ this->fov = fov;\n+ this->z_near = z_near;\n+ this->z_far = z_far;\n+\n+ float tan_half_fovy = ::tan(this->fov / 2.0f);\n+ this->proj = Matx44f::zeros();\n+ this->proj(0, 0) = 1.0f / (this->aspect * tan_half_fovy);\n+ this->proj(1, 1) = 1.0f / tan_half_fovy;\n+ this->proj(2, 2) = (this->z_far + this->z_near) / (this->z_far - this->z_near);\n+ this->proj(2, 3) = 1.0f;\n+ this->proj(3, 2) = -(2.0f * this->z_far * this->z_near) / (this->z_far - this->z_near);\n+}\n+\n+void cv::viz3d::View::rotate(float dx, float dy)\n+{\n+ this->position = normalize(this->position - this->origin);\n+ float theta = atan2(this->position(2), this->position(0));\n+ float phi = ::asin(this->position(1));\n+ theta -= dx * 0.05f;\n+ phi += dy * 0.05f;\n+ phi = max(-1.5f, min(1.5f, phi));\n+\n+ this->position(0) = ::cos(theta) * ::cos(phi) * this->distance;\n+ this->position(1) = ::sin(phi) * this->distance;\n+ this->position(2) = ::sin(theta) * ::cos(phi) * this->distance;\n+ this->position += this->origin;\n+\n+ this->lookAt(this->origin, this->up);\n+}\n+\n+void cv::viz3d::View::move(float dx, float dy)\n+{\n+ Vec3f forward = normalize(this->position - this->origin);\n+ Vec3f right = normalize(this->up.cross(forward));\n+ Vec3f up = right.cross(forward);\n+ Vec3f delta = normalize(right * dx - up * dy) * this->distance * 0.01f;\n+\n+ this->origin += delta;\n+ this->position += delta;\n+ this->lookAt(this->origin, this->up);\n+}\n+\n+void cv::viz3d::View::scaleDistance(float amount)\n+{\n+ this->distance *= amount;\n+ this->distance = max(0.1f, this->distance);\n+ this->position = normalize(this->position - this->origin) * this->distance + this->origin;\n+\n+ this->lookAt(this->origin, this->up);\n+}\n+\n+void cv::viz3d::View::lookAt(const Vec3f& point, const Vec3f& up)\n+{\n+ Vec3f f = normalize(point - this->position);\n+ Vec3f s = normalize(up.cross(f));\n+ Vec3f u = f.cross(s);\n+\n+ this->view = Matx44f::zeros();\n+ this->view(0, 0) = s[0];\n+ this->view(1, 0) = s[1];\n+ this->view(2, 0) = s[2];\n+ this->view(0, 1) = u[0];\n+ this->view(1, 1) = u[1];\n+ this->view(2, 1) = u[2];\n+ this->view(0, 2) = f[0];\n+ this->view(1, 2) = f[1];\n+ this->view(2, 2) = f[2];\n+ this->view(3, 0) = -s.dot(this->position);\n+ this->view(3, 1) = -u.dot(this->position);\n+ this->view(3, 2) = -f.dot(this->position);\n+ this->view(3, 3) = 1.0f;\n+}\n+\n+cv::viz3d::Window::Window(const String& name)\n+{\n+ this->name = name;\n+ this->sun.direction = normalize(Vec3f(0.3f, 1.0f, 0.5f));\n+ this->sun.ambient = { 0.1f, 0.1f, 0.1f };\n+ this->sun.diffuse = { 1.0f, 1.0f, 1.0f };\n+ this->sky_color = { 0.0f, 0.0f, 0.0f };\n+\n+\tfloat points[] = {\n+\t\t0.0f, 0.0f, 0.0f, 1.0f, 0.0f, 0.0f,\n+\t\t0.5f, 0.0f, 0.0f, 1.0f, 0.0f, 0.0f,\n+\t\t0.0f, 0.0f, 0.0f, 0.0f, 1.0f, 0.0f,\n+\t\t0.0f, 0.5f, 0.0f, 0.0f, 1.0f, 0.0f,\n+\t\t0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 1.0f,\n+\t\t0.0f, 0.0f, 0.5f, 0.0f, 0.0f, 1.0f,\n+\t};\n+\n+\tMat points_mat = Mat(Size(6, 6), CV_32F, points);\n+\n+\tthis->crosshair = new Lines(points_mat);\n+\tthis->shaders[this->crosshair->getShaderName()] = this->crosshair->buildShader();\n+\tthis->crosshair->setShader(this->shaders[this->crosshair->getShaderName()]);\n+\n+ this->grid = nullptr;\n+}\n+\n+cv::viz3d::Window::~Window()\n+{\n+\tdelete this->crosshair;\n+ if (this->grid != nullptr)\n+ delete this->grid;\n+\n+ for (auto obj : this->objects)\n+\t\tdelete obj.second;\n+}\n+\n+cv::viz3d::Object* cv::viz3d::Window::get(const String& obj_name)\n+{\n+\tauto it = this->objects.find(obj_name);\n+\tif (it == this->objects.end())\n+\t\treturn nullptr;\n+\treturn it->second;\n+}\n+\n+void cv::viz3d::Window::set(const String& obj_name, Object* obj)\n+{\n+\tauto it = this->objects.find(obj_name);\n+\tif (it != this->objects.end() && it->second != obj)\n+\t{\n+\t\tdelete it->second;\n+\n+\t\tif (obj == nullptr)\n+\t\t\tthis->objects.erase(it);\n+\t\telse\n+\t\t\tit->second = obj;\n+\t}\n+\telse if (obj != nullptr)\n+\t\tthis->objects[obj_name] = obj;\n+\n+ if (obj != nullptr)\n+ {\n+ String name = obj->getShaderName();\n+ auto it = this->shaders.find(name);\n+ if (it == this->shaders.end())\n+ this->shaders[name] = obj->buildShader();\n+ obj->setShader(this->shaders[name]);\n+ }\n+}\n+\n+void cv::viz3d::Window::setSun(const Vec3f& direction, const Vec3f& ambient, const Vec3f& diffuse)\n+{\n+ this->sun.direction = normalize(direction);\n+ this->sun.ambient = ambient;\n+ this->sun.diffuse = diffuse;\n+}\n+\n+void cv::viz3d::Window::setSky(const Vec3f& color)\n+{\n+ this->sky_color = color;\n+}\n+\n+static Mat getGridVertices(const View& view)\n+{\n+ const Vec3f grid_color = { 0.5f, 0.5f, 0.5f };\n+ const Vec3f axis_color = { 0.8f, 0.8f, 0.8f };\n+ const Vec3f center = view.getOrigin();\n+ const Vec3f camera_dir = view.getOrigin() - view.getPosition();\n+ const float scale = 0.3f;\n+\n+ float tick_step = 1.0f;\n+ while (view.getDistance() * scale / tick_step > 4.0f)\n+ tick_step *= 2.0f;\n+ while (view.getDistance() * scale / tick_step < 2.0f)\n+ tick_step *= 0.5f;\n+\n+ Mat points;\n+ float face_sign[3];\n+\n+ const Vec3f min_p = center - Vec3f(1.0f, 1.0f, 1.0f) * view.getDistance() * scale;\n+ const Vec3f max_p = center + Vec3f(1.0f, 1.0f, 1.0f) * view.getDistance() * scale;\n+\n+ // For each axis add a grid\n+ for (int ai = 0; ai < 3; ++ai)\n+ {\n+ Vec3f az = { 0.0f, 0.0f, 0.0f };\n+ az((ai + 2) % 3) = 1.0f;\n+\n+ // Check if face is positive or negative along the az axis\n+ face_sign[ai] = camera_dir.dot(az) > 0.0f ? 1.0f : -1.0f;\n+\n+ float x = (floor(min_p(ai) / tick_step) + 1.0f) * tick_step;\n+ for (; x < max_p(ai); x += tick_step)\n+ {\n+ Vec3f a = min_p;\n+ Vec3f b = max_p;\n+ a((ai + 0) % 3) = x;\n+ b((ai + 0) % 3) = x;\n+ if (face_sign[ai] > 0.0f)\n+ a((ai + 2) % 3) = b((ai + 2) % 3);\n+ else\n+ b((ai + 2) % 3) = a((ai + 2) % 3);\n+\n+ float data[] = {\n+ a(0), a(1), a(2), grid_color(0), grid_color(1), grid_color(2),\n+ b(0), b(1), b(2), grid_color(0), grid_color(1), grid_color(2),\n+ };\n+\n+ points.push_back(Mat(2, 6, CV_32F, data));\n+ }\n+\n+ float y = (floor(min_p((ai + 1) % 3) / tick_step) + 1.0f) * tick_step;\n+ for (; y < max_p((ai + 1) % 3); y += tick_step)\n+ {\n+ Vec3f a = min_p;\n+ Vec3f b = max_p;\n+ a((ai + 1) % 3) = y;\n+ b((ai + 1) % 3) = y;\n+ if (face_sign[ai] > 0.0f)\n+ a((ai + 2) % 3) = b((ai + 2) % 3);\n+ else\n+ b((ai + 2) % 3) = a((ai + 2) % 3);\n+\n+ float data[] = {\n+ a(0), a(1), a(2), grid_color(0), grid_color(1), grid_color(2),\n+ b(0), b(1), b(2), grid_color(0), grid_color(1), grid_color(2),\n+ };\n+\n+ points.push_back(Mat(2, 6, CV_32F, data));\n+ }\n+ }\n+\n+ // Draw Ox, Oy and Oz axes and ticks\n+ {\n+ Vec3f a = { face_sign[1] > 0.0f ? min_p(0) : max_p(0), face_sign[2] > 0.0f ? max_p(1) : min_p(1), face_sign[0] > 0.0f ? max_p(2) : min_p(2) };\n+ Vec3f b = { face_sign[1] > 0.0f ? min_p(0) : max_p(0), face_sign[2] > 0.0f ? max_p(1) : min_p(1), face_sign[0] > 0.0f ? min_p(2) : max_p(2) };\n+ Vec3f c = { face_sign[1] > 0.0f ? max_p(0) : min_p(0), face_sign[2] > 0.0f ? max_p(1) : min_p(1), face_sign[0] > 0.0f ? min_p(2) : max_p(2) };\n+ Vec3f d = { face_sign[1] > 0.0f ? max_p(0) : min_p(0), face_sign[2] > 0.0f ? min_p(1) : max_p(1), face_sign[0] > 0.0f ? min_p(2) : max_p(2) };\n+\n+ float data[] = {\n+ a(0), a(1), a(2), 0.0f, 0.0f, 0.8f,\n+ b(0), b(1), b(2), 0.0f, 0.0f, 0.8f,\n+ b(0), b(1), b(2), 0.8f, 0.0f, 0.0f,\n+ c(0), c(1), c(2), 0.8f, 0.0f, 0.0f,\n+ c(0), c(1), c(2), 0.0f, 0.8f, 0.0f,\n+ d(0), d(1), d(2), 0.0f, 0.8f, 0.0f,\n+ };\n+\n+ points.push_back(Mat(6, 6, CV_32F, data));\n+\n+ float x = (floor(min_p(0) / tick_step) + 1.0f) * tick_step;\n+ for (; x < max_p(0); x += tick_step)\n+ {\n+ Vec3f a, b, c;\n+ a(0) = b(0) = x;\n+ a(1) = b(1) = face_sign[2] > 0.0f ? max_p(1) : min_p(1);\n+ a(2) = face_sign[0] > 0.0f ? min_p(2) : max_p(2);\n+ b(2) = a(2) - face_sign[0] * 0.03f * scale * view.getDistance();\n+\n+ float line[] = {\n+ a(0), a(1), a(2), 0.8f, 0.0f, 0.0f,\n+ b(0), b(1), b(2), 0.8f, 0.0f, 0.0f,\n+ };\n+\n+ points.push_back(Mat(2, 6, CV_32F, line));\n+ }\n+\n+ float y = (floor(min_p(1) / tick_step) + 1.0f) * tick_step;\n+ for (; y < max_p(1); y += tick_step)\n+ {\n+ Vec3f a, b;\n+ a(0) = b(0) = face_sign[1] > 0.0f ? max_p(0) : min_p(0);\n+ a(1) = b(1) = y;\n+ a(2) = face_sign[0] > 0.0f ? min_p(2) : max_p(2);\n+ b(2) = a(2) - face_sign[0] * 0.03f * scale * view.getDistance();\n+\n+ float line[] = {\n+ a(0), a(1), a(2), 0.0f, 0.8f, 0.0f,\n+ b(0), b(1), b(2), 0.0f, 0.8f, 0.0f,\n+ };\n+\n+ points.push_back(Mat(2, 6, CV_32F, line));\n+ }\n+\n+ float z = (floor(min_p(2) / tick_step) + 1.0f) * tick_step;\n+ for (; z < max_p(2); z += tick_step)\n+ {\n+ Vec3f a, b;\n+ a(0) = face_sign[1] > 0.0f ? min_p(0) : max_p(0);\n+ b(0) = a(0) - face_sign[1] * 0.03f * scale * view.getDistance();\n+ a(1) = b(1) = face_sign[2] > 0.0f ? max_p(1) : min_p(1);\n+ a(2) = b(2) = z;\n+\n+ float line[] = {\n+ a(0), a(1), a(2), 0.0f, 0.0f, 0.8f,\n+ b(0), b(1), b(2), 0.0f, 0.0f, 0.8f,\n+ };\n+\n+ points.push_back(Mat(2, 6, CV_32F, line));\n+ }\n+ }\n+\n+ return points;\n+}\n+\n+void cv::viz3d::Window::setGridVisible(bool visible)\n+{\n+ if (visible)\n+ {\n+ this->grid = new Lines(Mat(4096, 6, CV_32F), 0);\n+ this->grid->setShader(this->shaders[this->grid->getShaderName()]);\n+ }\n+ else if (this->grid != nullptr)\n+ {\n+ delete this->grid;\n+ this->grid = nullptr;\n+ }\n+}\n+\n+void cv::viz3d::Window::draw()\n+{\n+ Rect rect = getWindowImageRect(this->name);\n+ float aspect = (float)rect.width / (float)rect.height;\n+ this->view.setAspect(aspect);\n+\n+ ogl::enable(ogl::DEPTH_TEST);\n+ ogl::clearColor({ double(this->sky_color(0) * 255.0f), double(this->sky_color(1) * 255.0f), double(this->sky_color(2) * 255.0f), 255.0 });\n+\n+ if (this->grid)\n+ {\n+ Mat labels;\n+ static_cast(this->grid)->update(getGridVertices(this->view));\n+ this->grid->draw(this->view, this->sun);\n+ }\n+ else\n+ {\n+ this->crosshair->setPosition(this->view.getOrigin());\n+ this->crosshair->draw(this->view, this->sun);\n+ }\n+\n+ for (auto& obj : this->objects)\n+ obj.second->draw(this->view, this->sun);\n+}\n+\n+void cv::viz3d::Window::onMouse(int event, int x, int y, int flags)\n+{\n+ if (event == EVENT_LBUTTONDOWN || event == EVENT_RBUTTONDOWN)\n+ {\n+ this->l_mouse_x = x;\n+ this->l_mouse_y = y;\n+ }\n+ else if (event == EVENT_MOUSEMOVE && (flags & EVENT_FLAG_LBUTTON))\n+ {\n+ this->view.rotate(x - this->l_mouse_x, y - this->l_mouse_y);\n+ updateWindow(this->name);\n+ this->l_mouse_x = x;\n+ this->l_mouse_y = y;\n+ }\n+ else if (event == EVENT_MOUSEMOVE && (flags & EVENT_FLAG_RBUTTON))\n+ {\n+ this->view.move(x - this->l_mouse_x, y - this->l_mouse_y);\n+ updateWindow(this->name);\n+ this->l_mouse_x = x;\n+ this->l_mouse_y = y;\n+ }\n+ else if (event == EVENT_MOUSEWHEEL)\n+ {\n+ this->view.scaleDistance(min(1.2f, max(0.8f, 1.0f - getMouseWheelDelta(flags) / 12.0f)));\n+ updateWindow(this->name);\n+ }\n+}\n+\n+cv::viz3d::Object::Object()\n+{\n+\tthis->position = { 0.0f, 0.0f, 0.0f };\n+\tthis->rotation = { 0.0f, 0.0f, 0.0f };\n+\tthis->model = Matx44f::eye();\n+}\n+\n+void cv::viz3d::Object::setPosition(const Vec3f& position)\n+{\n+ this->position = position;\n+ this->updateModel();\n+}\n+\n+void cv::viz3d::Object::setRotation(const Vec3f& rotation)\n+{\n+ this->rotation = rotation;\n+ this->updateModel();\n+}\n+\n+void cv::viz3d::Object::updateModel()\n+{\n+ // Calculate rotation matrices\n+ Matx44f rot_a = Matx44f::eye();\n+ rot_a(0, 0) = ::cos(this->rotation(0));\n+ rot_a(1, 0) = ::sin(this->rotation(0));\n+ rot_a(0, 1) = -::sin(this->rotation(0));\n+ rot_a(1, 1) = ::cos(this->rotation(0));\n+\n+ Matx44f rot_b = Matx44f::eye();\n+ rot_b(1, 1) = ::cos(this->rotation(1));\n+ rot_b(2, 1) = ::sin(this->rotation(1));\n+ rot_b(1, 2) = -::sin(this->rotation(1));\n+ rot_b(2, 2) = ::cos(this->rotation(1));\n+\n+ Matx44f rot_c = Matx44f::eye();\n+ rot_c(0, 0) = ::cos(this->rotation(2));\n+ rot_c(2, 0) = ::sin(this->rotation(2));\n+ rot_c(0, 2) = -::sin(this->rotation(2));\n+ rot_c(2, 2) = ::cos(this->rotation(2));\n+\n+ // Calculate translation matrix\n+ Matx44f trans = Matx44f::eye();\n+ trans(3, 0) = this->position(0);\n+ trans(3, 1) = this->position(1);\n+ trans(3, 2) = this->position(2);\n+\n+ // Multiply matrices\n+ this->model = rot_c * rot_b * rot_a * trans;\n+}\n+\n+cv::viz3d::Mesh::Mesh(InputArray verts, InputArray indices)\n+{\n+ // Check parameter validity\n+ CV_Assert(verts.channels() == 1 && verts.dims() == 2 && (verts.size().width == 3 || verts.size().width == 6 || verts.size().width == 9));\n+ CV_Assert(verts.depth() == CV_32F);\n+ CV_Assert(indices.channels() == 1 && indices.dims() == 2 && indices.size().width == 3);\n+ CV_Assert(indices.depth() == CV_8U || indices.depth() == CV_16U || indices.depth() == CV_32S);\n+\n+ // Prepare buffers\n+ if (verts.kind() == _InputArray::OPENGL_BUFFER)\n+ this->verts = verts.getOGlBuffer();\n+ else\n+ this->verts.copyFrom(verts, ogl::Buffer::ARRAY_BUFFER);\n+\n+ if (indices.kind() == _InputArray::OPENGL_BUFFER)\n+ this->indices = indices.getOGlBuffer();\n+ else\n+ this->indices.copyFrom(indices, ogl::Buffer::ELEMENT_ARRAY_BUFFER);\n+\n+ switch (indices.depth())\n+ {\n+ case CV_8U:\n+ this->index_type = ogl::UNSIGNED_BYTE;\n+ break;\n+ case CV_16U:\n+ this->index_type = ogl::UNSIGNED_SHORT;\n+ break;\n+ case CV_32S:\n+ this->index_type = ogl::UNSIGNED_INT;\n+ break;\n+ }\n+\n+ // Prepare vertex array\n+ if (verts.size().width == 3)\n+ {\n+ this->va.create({\n+ ogl::Attribute(", + "comment_created_at": "2021-10-07T21:05:49+00:00", + "comment_author": "JulieBar", + "comment_body": "decrease duplication\r\n```\r\ngenerate_attr(shader_loc) {\r\nreturn ogl::Attribute(\r\n\t\t\t this->verts,\r\n\t\t\t verts.size().width * sizeof(float), 0,\r\n\t\t\t 3, ogl::Attribute::FLOAT,\r\n\t\t\t false, false,\r\n\t\t\t shader_loc\r\n\t\t );\r\n}\r\n```", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1924918226", + "pr_number": 26772, + "pr_file": "modules/photo/src/denoising.cpp", + "created_at": "2025-01-22T08:41:08+00:00", + "commented_code": "int hn = (int)h.size();\n double granularity = (double)std::max(1., (double)dst.total()/(1 << 16));\n\n switch (srcImgs[0].type())\n int depth = CV_MAT_DEPTH(srcImgs[0].type());", + "repo_full_name": "opencv/opencv", + "discussion_comments": [ + { + "comment_id": "1924918226", + "repo_full_name": "opencv/opencv", + "pr_number": 26772, + "pr_file": "modules/photo/src/denoising.cpp", + "discussion_id": "1924918226", + "commented_code": "@@ -249,60 +249,129 @@ static void fastNlMeansDenoisingMulti_( const std::vector& srcImgs, Mat& ds\n int hn = (int)h.size();\n double granularity = (double)std::max(1., (double)dst.total()/(1 << 16));\n \n- switch (srcImgs[0].type())\n+ int depth = CV_MAT_DEPTH(srcImgs[0].type());", + "comment_created_at": "2025-01-22T08:41:08+00:00", + "comment_author": "sturkmen72", + "comment_body": "what about\r\n```\r\n int depth = srcImgs[0].depth();\r\n int channels = srcImgs[0].channels();\r\nfor better readibility\r\n```", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1800592693", + "pr_number": 26281, + "pr_file": "modules/core/src/opengl.cpp", + "created_at": "2024-10-15T07:19:59+00:00", + "commented_code": "#elif !defined(HAVE_OPENCL_OPENGL_SHARING)\n NO_OPENCL_SHARING_ERROR;\n#else\n cl_uint numPlatforms;\n cl_int status = clGetPlatformIDs(0, NULL, &numPlatforms);\n cl_uint platformsCnt = 0;\n cl_uint devCnt = 0;\n cl_device_id* devices = nullptr;\n cl_uint devUsed = 0;\n cl_context context = nullptr;\n\n cl_int status = clGetPlatformIDs(0, NULL, &platformsCnt);\n if (status != CL_SUCCESS)\n CV_Error_(cv::Error::OpenCLInitError, (\"OpenCL: Can't get number of platforms: %d\", status));\n if (numPlatforms == 0)\n if (platformsCnt == 0)\n CV_Error(cv::Error::OpenCLInitError, \"OpenCL: No available platforms\");\n\n std::vector platforms(numPlatforms);\n status = clGetPlatformIDs(numPlatforms, &platforms[0], NULL);\n std::vector platforms(platformsCnt);\n status = clGetPlatformIDs(platformsCnt, &platforms[0], NULL);\n if (status != CL_SUCCESS)\n CV_Error_(cv::Error::OpenCLInitError, (\"OpenCL: Can't get number of platforms: %d\", status));\n CV_Error_(cv::Error::OpenCLInitError, (\"OpenCL: Can't get platforms: %d\", status));\n\n\n // TODO Filter platforms by name from OPENCV_OPENCL_DEVICE\n bool sharingSupported = false;\n\n int found = -1;\n cl_device_id device = NULL;\n cl_context context = NULL;\n for (unsigned int i = 0; (!sharingSupported && (i < platformsCnt)); ++i) {\n status = clGetDeviceIDs(platforms[i], CL_DEVICE_TYPE_GPU, 0, NULL, &devCnt);\n if (status != CL_SUCCESS)\n CV_Error_(cv::Error::OpenCLInitError, (\"OpenCL: No devices available: %d\", status));\n\n for (int i = 0; i < (int)numPlatforms; i++)\n {\n // query platform extension: presence of \"cl_khr_gl_sharing\" extension is required\n {\n AutoBuffer extensionStr;\n try {\n devices = new cl_device_id[devCnt];\n\n size_t extensionSize;\n status = clGetPlatformInfo(platforms[i], CL_PLATFORM_EXTENSIONS, 0, NULL, &extensionSize);\n if (status == CL_SUCCESS)\n {\n extensionStr.allocate(extensionSize+1);\n status = clGetPlatformInfo(platforms[i], CL_PLATFORM_EXTENSIONS, extensionSize, (char*)extensionStr.data(), NULL);\n }\n status = clGetDeviceIDs(platforms[i], CL_DEVICE_TYPE_GPU, devCnt, devices, NULL);\n if (status != CL_SUCCESS)\n CV_Error_(cv::Error::OpenCLInitError, (\"OpenCL: Can't get platform extension string: %d\", status));\n\n if (!strstr((const char*)extensionStr.data(), \"cl_khr_gl_sharing\"))\n continue;\n }\n\n clGetGLContextInfoKHR_fn clGetGLContextInfoKHR = (clGetGLContextInfoKHR_fn)\n clGetExtensionFunctionAddressForPlatform(platforms[i], \"clGetGLContextInfoKHR\");\n if (!clGetGLContextInfoKHR)\n CV_Error_(cv::Error::OpenCLInitError, (\"OpenCL: Can't get platform devices: %d\", status));\n\n for (unsigned int j = 0; (!sharingSupported && (j < devCnt)); ++j) {\n size_t extensionSize;\n status = clGetDeviceInfo(devices[j], CL_DEVICE_EXTENSIONS, 0, NULL, &extensionSize );\n if (status != CL_SUCCESS)\n CV_Error_(cv::Error::OpenCLInitError, (\"OpenCL: No devices available: %d\", status));\n\n if(extensionSize > 0)\n {\n char* extensions = nullptr;\n\n try {\n extensions = new char[extensionSize];\n\n status = clGetDeviceInfo(devices[j], CL_DEVICE_EXTENSIONS, extensionSize, extensions, &extensionSize);\n if (status != CL_SUCCESS)\n continue;\n } catch(...) {\n CV_Error(cv::Error::OpenCLInitError, \"OpenCL: Exception thrown during device extensions gathering\");\n }\n\n std::string devString;\n\n if(extensions != nullptr) {\n devString = extensions;\n delete[] extensions;\n }\n else {\n CV_Error(cv::Error::OpenCLInitError, \"OpenCL: Unexpected error during device extensions gathering\");\n }\n\n size_t oldPos = 0;\n size_t spacePos = devString.find(' ', oldPos); // extensions string is space delimited\n while (spacePos != devString.npos) {\n if (strcmp(GL_SHARING_EXTENSION,\n devString.substr(oldPos, spacePos - oldPos).c_str())\n == 0) {\n // Device supports context sharing with OpenGL\n devUsed = i;\n sharingSupported = true;\n break;\n }\n do {\n oldPos = spacePos + 1;\n spacePos = devString.find(' ', oldPos);\n } while (spacePos == oldPos);\n }\n }\n }", + "repo_full_name": "opencv/opencv", + "discussion_comments": [ + { + "comment_id": "1800592693", + "repo_full_name": "opencv/opencv", + "pr_number": 26281, + "pr_file": "modules/core/src/opengl.cpp", + "discussion_id": "1800592693", + "commented_code": "@@ -1635,94 +1641,148 @@ Context& initializeContextFromGL()\n #elif !defined(HAVE_OPENCL_OPENGL_SHARING)\n NO_OPENCL_SHARING_ERROR;\n #else\n- cl_uint numPlatforms;\n- cl_int status = clGetPlatformIDs(0, NULL, &numPlatforms);\n+ cl_uint platformsCnt = 0;\n+ cl_uint devCnt = 0;\n+ cl_device_id* devices = nullptr;\n+ cl_uint devUsed = 0;\n+ cl_context context = nullptr;\n+\n+ cl_int status = clGetPlatformIDs(0, NULL, &platformsCnt);\n if (status != CL_SUCCESS)\n CV_Error_(cv::Error::OpenCLInitError, (\"OpenCL: Can't get number of platforms: %d\", status));\n- if (numPlatforms == 0)\n+ if (platformsCnt == 0)\n CV_Error(cv::Error::OpenCLInitError, \"OpenCL: No available platforms\");\n \n- std::vector platforms(numPlatforms);\n- status = clGetPlatformIDs(numPlatforms, &platforms[0], NULL);\n+ std::vector platforms(platformsCnt);\n+ status = clGetPlatformIDs(platformsCnt, &platforms[0], NULL);\n if (status != CL_SUCCESS)\n- CV_Error_(cv::Error::OpenCLInitError, (\"OpenCL: Can't get number of platforms: %d\", status));\n+ CV_Error_(cv::Error::OpenCLInitError, (\"OpenCL: Can't get platforms: %d\", status));\n+\n \n // TODO Filter platforms by name from OPENCV_OPENCL_DEVICE\n+ bool sharingSupported = false;\n \n- int found = -1;\n- cl_device_id device = NULL;\n- cl_context context = NULL;\n+ for (unsigned int i = 0; (!sharingSupported && (i < platformsCnt)); ++i) {\n+ status = clGetDeviceIDs(platforms[i], CL_DEVICE_TYPE_GPU, 0, NULL, &devCnt);\n+ if (status != CL_SUCCESS)\n+ CV_Error_(cv::Error::OpenCLInitError, (\"OpenCL: No devices available: %d\", status));\n \n- for (int i = 0; i < (int)numPlatforms; i++)\n- {\n- // query platform extension: presence of \"cl_khr_gl_sharing\" extension is required\n- {\n- AutoBuffer extensionStr;\n+ try {\n+ devices = new cl_device_id[devCnt];\n \n- size_t extensionSize;\n- status = clGetPlatformInfo(platforms[i], CL_PLATFORM_EXTENSIONS, 0, NULL, &extensionSize);\n- if (status == CL_SUCCESS)\n- {\n- extensionStr.allocate(extensionSize+1);\n- status = clGetPlatformInfo(platforms[i], CL_PLATFORM_EXTENSIONS, extensionSize, (char*)extensionStr.data(), NULL);\n- }\n+ status = clGetDeviceIDs(platforms[i], CL_DEVICE_TYPE_GPU, devCnt, devices, NULL);\n if (status != CL_SUCCESS)\n- CV_Error_(cv::Error::OpenCLInitError, (\"OpenCL: Can't get platform extension string: %d\", status));\n-\n- if (!strstr((const char*)extensionStr.data(), \"cl_khr_gl_sharing\"))\n- continue;\n- }\n-\n- clGetGLContextInfoKHR_fn clGetGLContextInfoKHR = (clGetGLContextInfoKHR_fn)\n- clGetExtensionFunctionAddressForPlatform(platforms[i], \"clGetGLContextInfoKHR\");\n- if (!clGetGLContextInfoKHR)\n+ CV_Error_(cv::Error::OpenCLInitError, (\"OpenCL: Can't get platform devices: %d\", status));\n+\n+ for (unsigned int j = 0; (!sharingSupported && (j < devCnt)); ++j) {\n+ size_t extensionSize;\n+ status = clGetDeviceInfo(devices[j], CL_DEVICE_EXTENSIONS, 0, NULL, &extensionSize );\n+ if (status != CL_SUCCESS)\n+ CV_Error_(cv::Error::OpenCLInitError, (\"OpenCL: No devices available: %d\", status));\n+\n+ if(extensionSize > 0)\n+ {\n+ char* extensions = nullptr;\n+\n+ try {\n+ extensions = new char[extensionSize];\n+\n+ status = clGetDeviceInfo(devices[j], CL_DEVICE_EXTENSIONS, extensionSize, extensions, &extensionSize);\n+ if (status != CL_SUCCESS)\n+ continue;\n+ } catch(...) {\n+ CV_Error(cv::Error::OpenCLInitError, \"OpenCL: Exception thrown during device extensions gathering\");\n+ }\n+\n+ std::string devString;\n+\n+ if(extensions != nullptr) {\n+ devString = extensions;\n+ delete[] extensions;\n+ }\n+ else {\n+ CV_Error(cv::Error::OpenCLInitError, \"OpenCL: Unexpected error during device extensions gathering\");\n+ }\n+\n+ size_t oldPos = 0;\n+ size_t spacePos = devString.find(' ', oldPos); // extensions string is space delimited\n+ while (spacePos != devString.npos) {\n+ if (strcmp(GL_SHARING_EXTENSION,\n+ devString.substr(oldPos, spacePos - oldPos).c_str())\n+ == 0) {\n+ // Device supports context sharing with OpenGL\n+ devUsed = i;\n+ sharingSupported = true;\n+ break;\n+ }\n+ do {\n+ oldPos = spacePos + 1;\n+ spacePos = devString.find(' ', oldPos);\n+ } while (spacePos == oldPos);\n+ }\n+ }\n+ }", + "comment_created_at": "2024-10-15T07:19:59+00:00", + "comment_author": "asmorkalov", + "comment_body": "Let's extract extension search as dedicated static function. It should make the code more readable.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1677867568", + "pr_number": 25608, + "pr_file": "modules/imgcodecs/src/grfmt_webp.cpp", + "created_at": "2024-07-15T13:50:53+00:00", + "commented_code": "WebPBitstreamFeatures features;\n if (VP8_STATUS_OK == WebPGetFeatures(header, sizeof(header), &features))", + "repo_full_name": "opencv/opencv", + "discussion_comments": [ + { + "comment_id": "1677867568", + "repo_full_name": "opencv/opencv", + "pr_number": 25608, + "pr_file": "modules/imgcodecs/src/grfmt_webp.cpp", + "discussion_id": "1677867568", + "commented_code": "@@ -126,8 +84,39 @@ bool WebPDecoder::readHeader()\n WebPBitstreamFeatures features;\n if (VP8_STATUS_OK == WebPGetFeatures(header, sizeof(header), &features))", + "comment_created_at": "2024-07-15T13:50:53+00:00", + "comment_author": "vrabaud", + "comment_body": "Just to reduce the imbricated `if` , write it as:\r\n```cpp\r\nif (VP8_STATUS_OK == WebPGetFeatures(header, sizeof(header), &features)) return false;\r\n```\r\nAnd then you can remove the big `{}` below.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1732463520", + "pr_number": 25608, + "pr_file": "modules/imgcodecs/src/grfmt_webp.cpp", + "created_at": "2024-08-27T09:21:47+00:00", + "commented_code": "}\n\n WebPBitstreamFeatures features;\n if (VP8_STATUS_OK == WebPGetFeatures(header, sizeof(header), &features))\n if (VP8_STATUS_OK < WebPGetFeatures(header, sizeof(header), &features)) return false;\n\n m_has_animation = features.has_animation == 1;\n\n if (m_has_animation)\n {\n CV_CheckEQ(features.has_animation, 0, \"WebP backend does not support animated webp images\");\n fs.seekg(0, std::ios::beg); CV_Assert(fs && \"File stream error\");\n data.create(1, validateToInt(fs_size), CV_8UC1);\n fs.read((char*)data.ptr(), fs_size);\n CV_Assert(fs && \"Can't read file data\");\n fs.close();\n\n m_width = features.width;\n m_height = features.height;\n CV_Assert(data.type() == CV_8UC1); CV_Assert(data.rows == 1);\n\n if (features.has_alpha)\n {\n m_type = CV_8UC4;\n channels = 4;\n }\n WebPData webp_data;\n webp_data.bytes = (const uint8_t*)data.ptr();\n webp_data.size = data.total();\n\n WebPAnimDecoderOptions dec_options;\n WebPAnimDecoderOptionsInit(&dec_options);\n\n if(m_use_rgb)", + "repo_full_name": "opencv/opencv", + "discussion_comments": [ + { + "comment_id": "1732463520", + "repo_full_name": "opencv/opencv", + "pr_number": 25608, + "pr_file": "modules/imgcodecs/src/grfmt_webp.cpp", + "discussion_id": "1732463520", + "commented_code": "@@ -124,28 +81,51 @@ bool WebPDecoder::readHeader()\n }\n \n WebPBitstreamFeatures features;\n- if (VP8_STATUS_OK == WebPGetFeatures(header, sizeof(header), &features))\n+ if (VP8_STATUS_OK < WebPGetFeatures(header, sizeof(header), &features)) return false;\n+\n+ m_has_animation = features.has_animation == 1;\n+\n+ if (m_has_animation)\n {\n- CV_CheckEQ(features.has_animation, 0, \"WebP backend does not support animated webp images\");\n+ fs.seekg(0, std::ios::beg); CV_Assert(fs && \"File stream error\");\n+ data.create(1, validateToInt(fs_size), CV_8UC1);\n+ fs.read((char*)data.ptr(), fs_size);\n+ CV_Assert(fs && \"Can't read file data\");\n+ fs.close();\n \n- m_width = features.width;\n- m_height = features.height;\n+ CV_Assert(data.type() == CV_8UC1); CV_Assert(data.rows == 1);\n \n- if (features.has_alpha)\n- {\n- m_type = CV_8UC4;\n- channels = 4;\n- }\n+ WebPData webp_data;\n+ webp_data.bytes = (const uint8_t*)data.ptr();\n+ webp_data.size = data.total();\n+\n+ WebPAnimDecoderOptions dec_options;\n+ WebPAnimDecoderOptionsInit(&dec_options);\n+\n+ if(m_use_rgb)", + "comment_created_at": "2024-08-27T09:21:47+00:00", + "comment_author": "sturkmen72", + "comment_body": "which line below better for readability?\r\n\r\n```\r\nif(m_use_rgb)\r\n dec_options.color_mode = features.has_alpha ? MODE_RGBA : MODE_RGB;\r\nelse\r\n dec_options.color_mode = features.has_alpha ? MODE_BGRA : MODE_BGR;\r\n```\r\n```\r\ndec_options.color_mode = m_use_rgb\r\n ? (features.has_alpha ? MODE_RGBA : MODE_RGB)\r\n : (features.has_alpha ? MODE_BGRA : MODE_BGR);\r\n```\r\n@vrabaud , @asmorkalov ", + "pr_file_module": null + }, + { + "comment_id": "1732760920", + "repo_full_name": "opencv/opencv", + "pr_number": 25608, + "pr_file": "modules/imgcodecs/src/grfmt_webp.cpp", + "discussion_id": "1732463520", + "commented_code": "@@ -124,28 +81,51 @@ bool WebPDecoder::readHeader()\n }\n \n WebPBitstreamFeatures features;\n- if (VP8_STATUS_OK == WebPGetFeatures(header, sizeof(header), &features))\n+ if (VP8_STATUS_OK < WebPGetFeatures(header, sizeof(header), &features)) return false;\n+\n+ m_has_animation = features.has_animation == 1;\n+\n+ if (m_has_animation)\n {\n- CV_CheckEQ(features.has_animation, 0, \"WebP backend does not support animated webp images\");\n+ fs.seekg(0, std::ios::beg); CV_Assert(fs && \"File stream error\");\n+ data.create(1, validateToInt(fs_size), CV_8UC1);\n+ fs.read((char*)data.ptr(), fs_size);\n+ CV_Assert(fs && \"Can't read file data\");\n+ fs.close();\n \n- m_width = features.width;\n- m_height = features.height;\n+ CV_Assert(data.type() == CV_8UC1); CV_Assert(data.rows == 1);\n \n- if (features.has_alpha)\n- {\n- m_type = CV_8UC4;\n- channels = 4;\n- }\n+ WebPData webp_data;\n+ webp_data.bytes = (const uint8_t*)data.ptr();\n+ webp_data.size = data.total();\n+\n+ WebPAnimDecoderOptions dec_options;\n+ WebPAnimDecoderOptionsInit(&dec_options);\n+\n+ if(m_use_rgb)", + "comment_created_at": "2024-08-27T12:31:30+00:00", + "comment_author": "vrabaud", + "comment_body": "I'd say the second one.", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/opencv-code-for-readability.md b/_reviewers/opencv-code-for-readability.md index c4ee4a3..1351fb9 100644 --- a/_reviewers/opencv-code-for-readability.md +++ b/_reviewers/opencv-code-for-readability.md @@ -41,365 +41,3 @@ const size_t channels = 6; // 3 coordinates + 3 color values const size_t pointCount = sizeof(points_data)/(channels*sizeof(float)); const Mat points_mat = Mat(Size(channels, pointCount), CV_32F, &points_data); ``` - - -[ - { - "discussion_id": "2172502541", - "pr_number": 27460, - "pr_file": "modules/gapi/src/backends/ov/govbackend.cpp", - "created_at": "2025-06-27T17:15:47+00:00", - "commented_code": "const std::vector &nodes)\n : m_g(g), m_gm(m_g) {\n\n if(cv::gapi::getCompileArg>(compileArgs).has_value()) {", - "repo_full_name": "opencv/opencv", - "discussion_comments": [ - { - "comment_id": "2172502541", - "repo_full_name": "opencv/opencv", - "pr_number": 27460, - "pr_file": "modules/gapi/src/backends/ov/govbackend.cpp", - "discussion_id": "2172502541", - "commented_code": "@@ -1542,6 +1542,10 @@ cv::gimpl::ov::GOVExecutable::GOVExecutable(const ade::Graph &g,\n const std::vector &nodes)\n : m_g(g), m_gm(m_g) {\n \n+ if(cv::gapi::getCompileArg>(compileArgs).has_value()) {", - "comment_created_at": "2025-06-27T17:15:47+00:00", - "comment_author": "AsyaPronina", - "comment_body": "Better to split this check for two lines: to retrieve compile arg and then check if it `has_value()`", - "pr_file_module": null - }, - { - "comment_id": "2172504387", - "repo_full_name": "opencv/opencv", - "pr_number": 27460, - "pr_file": "modules/gapi/src/backends/ov/govbackend.cpp", - "discussion_id": "2172502541", - "commented_code": "@@ -1542,6 +1542,10 @@ cv::gimpl::ov::GOVExecutable::GOVExecutable(const ade::Graph &g,\n const std::vector &nodes)\n : m_g(g), m_gm(m_g) {\n \n+ if(cv::gapi::getCompileArg>(compileArgs).has_value()) {", - "comment_created_at": "2025-06-27T17:16:22+00:00", - "comment_author": "AsyaPronina", - "comment_body": "or might be create a brief alias for `std::reference_wrapper` here", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1851047891", - "pr_number": 26366, - "pr_file": "modules/features2d/src/orb.cpp", - "created_at": "2024-11-20T22:12:40+00:00", - "commented_code": "bool useOCL = false;\n#endif\n\n Mat image = _image.getMat(), mask = _mask.getMat();\n Mat image = _image.getMat(), mask;\n if (!_mask.empty())\n {\n cv::threshold(_mask.getMat(), mask, 0, 255, cv::THRESH_BINARY);", - "repo_full_name": "opencv/opencv", - "discussion_comments": [ - { - "comment_id": "1851047891", - "repo_full_name": "opencv/opencv", - "pr_number": 26366, - "pr_file": "modules/features2d/src/orb.cpp", - "discussion_id": "1851047891", - "commented_code": "@@ -1036,7 +1036,11 @@ void ORB_Impl::detectAndCompute( InputArray _image, InputArray _mask,\n bool useOCL = false;\n #endif\n \n- Mat image = _image.getMat(), mask = _mask.getMat();\n+ Mat image = _image.getMat(), mask;\n+ if (!_mask.empty())\n+ {\n+ cv::threshold(_mask.getMat(), mask, 0, 255, cv::THRESH_BINARY);", - "comment_created_at": "2024-11-20T22:12:40+00:00", - "comment_author": "sturkmen72", - "comment_body": "threshold(_mask, mask, 0, 255, THRESH_BINARY);", - "pr_file_module": null - }, - { - "comment_id": "1851260237", - "repo_full_name": "opencv/opencv", - "pr_number": 26366, - "pr_file": "modules/features2d/src/orb.cpp", - "discussion_id": "1851047891", - "commented_code": "@@ -1036,7 +1036,11 @@ void ORB_Impl::detectAndCompute( InputArray _image, InputArray _mask,\n bool useOCL = false;\n #endif\n \n- Mat image = _image.getMat(), mask = _mask.getMat();\n+ Mat image = _image.getMat(), mask;\n+ if (!_mask.empty())\n+ {\n+ cv::threshold(_mask.getMat(), mask, 0, 255, cv::THRESH_BINARY);", - "comment_created_at": "2024-11-21T03:21:15+00:00", - "comment_author": "shyama7004", - "comment_body": "Your snippet does the same thing but in a concise way:), do you want me to implement this change?", - "pr_file_module": null - }, - { - "comment_id": "1851462580", - "repo_full_name": "opencv/opencv", - "pr_number": 26366, - "pr_file": "modules/features2d/src/orb.cpp", - "discussion_id": "1851047891", - "commented_code": "@@ -1036,7 +1036,11 @@ void ORB_Impl::detectAndCompute( InputArray _image, InputArray _mask,\n bool useOCL = false;\n #endif\n \n- Mat image = _image.getMat(), mask = _mask.getMat();\n+ Mat image = _image.getMat(), mask;\n+ if (!_mask.empty())\n+ {\n+ cv::threshold(_mask.getMat(), mask, 0, 255, cv::THRESH_BINARY);", - "comment_created_at": "2024-11-21T06:58:16+00:00", - "comment_author": "sturkmen72", - "comment_body": "yes exactly does the same thing.\r\nno need to `cv::` prefix and `.getMat()`", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1450572150", - "pr_number": 24768, - "pr_file": "modules/dnn/src/layers/nary_eltwise_layers.cpp", - "created_at": "2024-01-12T15:02:26+00:00", - "commented_code": "return Ptr(new InfEngineNgraphNode(node));\n }\n#endif\n\n#ifdef HAVE_VULKAN\n virtual Ptr initVkCom(const std::vector > &inputs,\n std::vector > &outputs) CV_OVERRIDE\n {\n std::vector> inputWrappers;\n std::vector inputShapes;\n std::transform(inputs.begin(), inputs.end(), std::back_inserter(inputWrappers), [] (const Ptr& w) { return w.dynamicCast(); });\n for (const auto &ptr: inputWrappers) {\n CV_Assert(ptr);\n }\n std::transform(inputWrappers.begin(), inputWrappers.end(), std::back_inserter(inputShapes), [] (const Ptr& w) { return shape(*(w->getMat())); });\n auto outputWrapper = outputs[0].dynamicCast();\n CV_Assert(outputWrapper);\n auto outputShape = shape(*(outputWrapper->getMat()));\n std::vector vkBlobs; // TODO(vk) what\n\n // collect all input\n int ninputs = inputs.size();\n std::vector v_inp;\n std::transform(inputWrappers.begin(), inputWrappers.end(), std::back_inserter(v_inp), [] (const Ptr &w) { return (w->getMat())->template ptr(); });\n // const char** inp = v_inp.data();\n\n // collect ndims of all input\n std::vector v_inp_dims;\n std::transform(inputWrappers.begin(), inputWrappers.end(), std::back_inserter(v_inp_dims), [] (const Ptr &w) { return (w->getMat())->dims; });\n const int* inp_ndims = v_inp_dims.data();\n\n // collect shapes of all input\n std::vector v_inp_shape;\n std::transform(inputWrappers.begin(), inputWrappers.end(), std::back_inserter(v_inp_shape), [] (const Ptr &w) { return (w->getMat())->size.p; });\n const int** inp_shape = v_inp_shape.data();\n\n // collect steps of all input\n std::vector v_inp_step;\n std::transform(inputWrappers.begin(), inputWrappers.end(), std::back_inserter(v_inp_step), [] (const Ptr &w) { return (w->getMat())->step.p; });\n const size_t** inp_step = v_inp_step.data();\n\n // collect info of output (ndims, shape, step)\n // char* out = outputWrapper->getMat()->ptr();\n int out_ndims = outputWrapper->getMat()->dims;\n const int* out_shape = outputWrapper->getMat()->size.p;\n const size_t* out_step = outputWrapper->getMat()->step.p;\n\n // find max ndims for broadcasting\n int i, max_ndims = out_ndims > 2 ? out_ndims : 2;\n for(i = 0; i < ninputs; i++)\n max_ndims = max_ndims > inp_ndims[i] ? max_ndims : inp_ndims[i];\n\n // buf holds the following buffers for inputs & output:\n // * orig_shapes, shapes (result_shape), orig_steps, steps (result_step), (ninputs+1)*4 elements in total\n // * ptrs, (ninputs+1)*1 elements in total\n // * shape_buf & step_buf, (ninputs+1)*2*max_ndims elements in total\n // * all_ndims, (ninputs+1)*1 elements in total\n // * all_type_sizes, (ninputs+1)*1 elements in total\n AutoBuffer buf((ninputs + 1) * (2 * max_ndims + 7));\n\n int** orig_shapes = (int**)buf.data();\n int** shapes = orig_shapes + ninputs + 1;\n size_t** orig_steps = (size_t**)(shapes + ninputs + 1);\n size_t** steps = orig_steps + ninputs + 1;\n\n char** ptrs = (char**)(steps + ninputs + 1);\n\n size_t* step_buf = (size_t*)(ptrs + ninputs + 1);\n int* shape_buf = (int*)(step_buf + (ninputs + 1)*max_ndims);\n\n int* all_ndims = shape_buf + (ninputs + 1)*max_ndims;\n size_t* all_type_sizes = (size_t*)(all_ndims + ninputs + 1);\n\n for(i = 0; i <= ninputs; i++) {\n all_ndims[i] = i == 0 ? out_ndims : inp_ndims[i-1];\n all_type_sizes[i] = i == 0 ? outputWrapper->getMat()->elemSize() : inputWrappers[i-1]->getMat()->elemSize();\n orig_shapes[i] = (int*)(i == 0 ? out_shape : inp_shape ? inp_shape[i-1] : 0);\n orig_steps[i] = (size_t*)(i == 0 ? out_step : inp_step ? inp_step[i-1] : 0);\n shapes[i] = shape_buf + max_ndims*i;\n steps[i] = step_buf + max_ndims*i;\n }", - "repo_full_name": "opencv/opencv", - "discussion_comments": [ - { - "comment_id": "1450572150", - "repo_full_name": "opencv/opencv", - "pr_number": 24768, - "pr_file": "modules/dnn/src/layers/nary_eltwise_layers.cpp", - "discussion_id": "1450572150", - "commented_code": "@@ -928,6 +936,96 @@ class NaryEltwiseLayerImpl CV_FINAL : public NaryEltwiseLayer\n return Ptr(new InfEngineNgraphNode(node));\n }\n #endif\n+\n+#ifdef HAVE_VULKAN\n+ virtual Ptr initVkCom(const std::vector > &inputs,\n+ std::vector > &outputs) CV_OVERRIDE\n+ {\n+ std::vector> inputWrappers;\n+ std::vector inputShapes;\n+ std::transform(inputs.begin(), inputs.end(), std::back_inserter(inputWrappers), [] (const Ptr& w) { return w.dynamicCast(); });\n+ for (const auto &ptr: inputWrappers) {\n+ CV_Assert(ptr);\n+ }\n+ std::transform(inputWrappers.begin(), inputWrappers.end(), std::back_inserter(inputShapes), [] (const Ptr& w) { return shape(*(w->getMat())); });\n+ auto outputWrapper = outputs[0].dynamicCast();\n+ CV_Assert(outputWrapper);\n+ auto outputShape = shape(*(outputWrapper->getMat()));\n+ std::vector vkBlobs; // TODO(vk) what\n+\n+ // collect all input\n+ int ninputs = inputs.size();\n+ std::vector v_inp;\n+ std::transform(inputWrappers.begin(), inputWrappers.end(), std::back_inserter(v_inp), [] (const Ptr &w) { return (w->getMat())->template ptr(); });\n+ // const char** inp = v_inp.data();\n+\n+ // collect ndims of all input\n+ std::vector v_inp_dims;\n+ std::transform(inputWrappers.begin(), inputWrappers.end(), std::back_inserter(v_inp_dims), [] (const Ptr &w) { return (w->getMat())->dims; });\n+ const int* inp_ndims = v_inp_dims.data();\n+\n+ // collect shapes of all input\n+ std::vector v_inp_shape;\n+ std::transform(inputWrappers.begin(), inputWrappers.end(), std::back_inserter(v_inp_shape), [] (const Ptr &w) { return (w->getMat())->size.p; });\n+ const int** inp_shape = v_inp_shape.data();\n+\n+ // collect steps of all input\n+ std::vector v_inp_step;\n+ std::transform(inputWrappers.begin(), inputWrappers.end(), std::back_inserter(v_inp_step), [] (const Ptr &w) { return (w->getMat())->step.p; });\n+ const size_t** inp_step = v_inp_step.data();\n+\n+ // collect info of output (ndims, shape, step)\n+ // char* out = outputWrapper->getMat()->ptr();\n+ int out_ndims = outputWrapper->getMat()->dims;\n+ const int* out_shape = outputWrapper->getMat()->size.p;\n+ const size_t* out_step = outputWrapper->getMat()->step.p;\n+\n+ // find max ndims for broadcasting\n+ int i, max_ndims = out_ndims > 2 ? out_ndims : 2;\n+ for(i = 0; i < ninputs; i++)\n+ max_ndims = max_ndims > inp_ndims[i] ? max_ndims : inp_ndims[i];\n+\n+ // buf holds the following buffers for inputs & output:\n+ // * orig_shapes, shapes (result_shape), orig_steps, steps (result_step), (ninputs+1)*4 elements in total\n+ // * ptrs, (ninputs+1)*1 elements in total\n+ // * shape_buf & step_buf, (ninputs+1)*2*max_ndims elements in total\n+ // * all_ndims, (ninputs+1)*1 elements in total\n+ // * all_type_sizes, (ninputs+1)*1 elements in total\n+ AutoBuffer buf((ninputs + 1) * (2 * max_ndims + 7));\n+\n+ int** orig_shapes = (int**)buf.data();\n+ int** shapes = orig_shapes + ninputs + 1;\n+ size_t** orig_steps = (size_t**)(shapes + ninputs + 1);\n+ size_t** steps = orig_steps + ninputs + 1;\n+\n+ char** ptrs = (char**)(steps + ninputs + 1);\n+\n+ size_t* step_buf = (size_t*)(ptrs + ninputs + 1);\n+ int* shape_buf = (int*)(step_buf + (ninputs + 1)*max_ndims);\n+\n+ int* all_ndims = shape_buf + (ninputs + 1)*max_ndims;\n+ size_t* all_type_sizes = (size_t*)(all_ndims + ninputs + 1);\n+\n+ for(i = 0; i <= ninputs; i++) {\n+ all_ndims[i] = i == 0 ? out_ndims : inp_ndims[i-1];\n+ all_type_sizes[i] = i == 0 ? outputWrapper->getMat()->elemSize() : inputWrappers[i-1]->getMat()->elemSize();\n+ orig_shapes[i] = (int*)(i == 0 ? out_shape : inp_shape ? inp_shape[i-1] : 0);\n+ orig_steps[i] = (size_t*)(i == 0 ? out_step : inp_step ? inp_step[i-1] : 0);\n+ shapes[i] = shape_buf + max_ndims*i;\n+ steps[i] = step_buf + max_ndims*i;\n+ }", - "comment_created_at": "2024-01-12T15:02:26+00:00", - "comment_author": "fengyuentau", - "comment_body": "Since they share the same code with CPU implementation, could you extract and make it a function to call to reduce duplicate code?", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1977140260", - "pr_number": 26934, - "pr_file": "modules/objdetect/src/aruco/aruco_detector.cpp", - "created_at": "2025-03-03T09:17:51+00:00", - "commented_code": "RefineParameters refineParams;\n ArucoDetectorImpl() {}\n\n ArucoDetectorImpl(const Dictionary &_dictionary, const DetectorParameters &_detectorParams,\n const RefineParameters& _refineParams): dictionary(_dictionary),\n detectorParams(_detectorParams), refineParams(_refineParams) {}\n ArucoDetectorImpl(const vector&_dictionaries, const DetectorParameters &_detectorParams,\n const RefineParameters& _refineParams): dictionaries(_dictionaries),\n detectorParams(_detectorParams), refineParams(_refineParams) {\n CV_Assert(!dictionaries.empty());\n }\n\n /*\n * @brief Detect markers either using multiple or just first dictionary\n */\n void detectMarkers(InputArray _image, OutputArrayOfArrays _corners, OutputArray _ids,\n OutputArrayOfArrays _rejectedImgPoints, OutputArray _dictIndices, DictionaryMode dictMode) {\n CV_Assert(!_image.empty());\n\n CV_Assert(detectorParams.markerBorderBits > 0);\n // check that the parameters are set correctly if Aruco3 is used\n CV_Assert(!(detectorParams.useAruco3Detection == true &&\n detectorParams.minSideLengthCanonicalImg == 0 &&\n detectorParams.minMarkerLengthRatioOriginalImg == 0.0));\n\n Mat grey;\n _convertToGrey(_image, grey);\n\n // Aruco3 functionality is the extension of Aruco.\n // The description can be found in:\n // [1] Speeded up detection of squared fiducial markers, 2018, FJ Romera-Ramirez et al.\n // if Aruco3 functionality if not wanted\n // change some parameters to be sure to turn it off\n if (!detectorParams.useAruco3Detection) {\n detectorParams.minMarkerLengthRatioOriginalImg = 0.0;\n detectorParams.minSideLengthCanonicalImg = 0;\n }\n else {\n // always turn on corner refinement in case of Aruco3, due to upsampling\n detectorParams.cornerRefinementMethod = (int)CORNER_REFINE_SUBPIX;\n // only CORNER_REFINE_SUBPIX implement correctly for useAruco3Detection\n // Todo: update other CORNER_REFINE methods\n }\n\n /// Step 0: equation (2) from paper [1]\n const float fxfy = (!detectorParams.useAruco3Detection ? 1.f : detectorParams.minSideLengthCanonicalImg /\n (detectorParams.minSideLengthCanonicalImg + std::max(grey.cols, grey.rows)*\n detectorParams.minMarkerLengthRatioOriginalImg));\n\n /// Step 1: create image pyramid. Section 3.4. in [1]\n vector grey_pyramid;\n int closest_pyr_image_idx = 0, num_levels = 0;\n //// Step 1.1: resize image with equation (1) from paper [1]\n if (detectorParams.useAruco3Detection) {\n const float scale_pyr = 2.f;\n const float img_area = static_cast(grey.rows*grey.cols);\n const float min_area_marker = static_cast(detectorParams.minSideLengthCanonicalImg*\n detectorParams.minSideLengthCanonicalImg);\n // find max level\n num_levels = static_cast(log2(img_area / min_area_marker)/scale_pyr);\n // the closest pyramid image to the downsampled segmentation image\n // will later be used as start index for corner upsampling\n const float scale_img_area = img_area * fxfy * fxfy;\n closest_pyr_image_idx = cvRound(log2(img_area / scale_img_area)/scale_pyr);\n }\n buildPyramid(grey, grey_pyramid, num_levels);\n\n // resize to segmentation image\n // in this reduces size the contours will be detected\n if (fxfy != 1.f)\n resize(grey, grey, Size(cvRound(fxfy * grey.cols), cvRound(fxfy * grey.rows)));\n\n /// STEP 2: Detect marker candidates\n vector > candidates;\n vector > contours;\n vector ids;\n\n /// STEP 2.a Detect marker candidates :: using AprilTag\n if(detectorParams.cornerRefinementMethod == (int)CORNER_REFINE_APRILTAG){\n _apriltag(grey, detectorParams, candidates, contours);\n }\n /// STEP 2.b Detect marker candidates :: traditional way\n else {\n detectCandidates(grey, candidates, contours);\n }\n\n /// STEP 2.c FILTER OUT NEAR CANDIDATE PAIRS\n vector dictIndices;\n vector> rejectedImgPoints;\n if (DictionaryMode::Single == dictMode) {\n Dictionary& dictionary = dictionaries.at(0);\n auto selectedCandidates = filterTooCloseCandidates(candidates, contours, dictionary.markerSize);\n candidates.clear();\n contours.clear();\n\n /// STEP 2: Check candidate codification (identify markers)\n identifyCandidates(grey, grey_pyramid, selectedCandidates, candidates, contours,\n ids, dictionary, rejectedImgPoints);\n\n /// STEP 3: Corner refinement :: use corner subpix\n if (detectorParams.cornerRefinementMethod == (int)CORNER_REFINE_SUBPIX) {\n CV_Assert(detectorParams.cornerRefinementWinSize > 0 && detectorParams.cornerRefinementMaxIterations > 0 &&\n detectorParams.cornerRefinementMinAccuracy > 0);\n // Do subpixel estimation. In Aruco3 start on the lowest pyramid level and upscale the corners\n parallel_for_(Range(0, (int)candidates.size()), [&](const Range& range) {\n const int begin = range.start;\n const int end = range.end;\n\n for (int i = begin; i < end; i++) {\n if (detectorParams.useAruco3Detection) {\n const float scale_init = (float) grey_pyramid[closest_pyr_image_idx].cols / grey.cols;\n findCornerInPyrImage(scale_init, closest_pyr_image_idx, grey_pyramid, Mat(candidates[i]), detectorParams);\n } else {\n int cornerRefinementWinSize = std::max(1, cvRound(detectorParams.relativeCornerRefinmentWinSize*\n getAverageModuleSize(candidates[i], dictionary.markerSize, detectorParams.markerBorderBits)));\n cornerRefinementWinSize = min(cornerRefinementWinSize, detectorParams.cornerRefinementWinSize);\n cornerSubPix(grey, Mat(candidates[i]), Size(cornerRefinementWinSize, cornerRefinementWinSize), Size(-1, -1),\n TermCriteria(TermCriteria::MAX_ITER | TermCriteria::EPS,\n detectorParams.cornerRefinementMaxIterations,\n detectorParams.cornerRefinementMinAccuracy));\n }\n }\n });\n }\n } else if (DictionaryMode::Multi == dictMode) {\n unordered_set uniqueMarkerSizes;\n for (const Dictionary& dictionary : dictionaries) {\n uniqueMarkerSizes.insert(dictionary.markerSize);\n }\n\n // create at max 4 marker candidate trees for each dictionary size\n vector> candidatesPerDictionarySize = {{}, {}, {}, {}};\n for (int markerSize : uniqueMarkerSizes) {\n // min marker size is 4, so subtract 4 to get index\n const auto dictionarySizeIndex = markerSize - 4;\n // copy candidates\n vector> candidatesCopy = candidates;\n vector > contoursCopy = contours;\n candidatesPerDictionarySize[dictionarySizeIndex] = filterTooCloseCandidates(candidatesCopy, contoursCopy, markerSize);\n }\n candidates.clear();\n contours.clear();\n\n /// STEP 2: Check candidate codification (identify markers)\n int dictIndex = 0;\n for (const Dictionary& currentDictionary : dictionaries) {\n const auto dictionarySizeIndex = currentDictionary.markerSize - 4;\n // temporary variable to store the current candidates\n vector> currentCandidates;\n identifyCandidates(grey, grey_pyramid, candidatesPerDictionarySize[dictionarySizeIndex], currentCandidates, contours,\n ids, currentDictionary, rejectedImgPoints);\n if (_dictIndices.needed()) {\n dictIndices.insert(dictIndices.end(), currentCandidates.size(), dictIndex);\n }\n\n /// STEP 3: Corner refinement :: use corner subpix\n if (detectorParams.cornerRefinementMethod == (int)CORNER_REFINE_SUBPIX) {", - "repo_full_name": "opencv/opencv", - "discussion_comments": [ - { - "comment_id": "1977140260", - "repo_full_name": "opencv/opencv", - "pr_number": 26934, - "pr_file": "modules/objdetect/src/aruco/aruco_detector.cpp", - "discussion_id": "1977140260", - "commented_code": "@@ -651,9 +657,257 @@ struct ArucoDetector::ArucoDetectorImpl {\n RefineParameters refineParams;\n ArucoDetectorImpl() {}\n \n- ArucoDetectorImpl(const Dictionary &_dictionary, const DetectorParameters &_detectorParams,\n- const RefineParameters& _refineParams): dictionary(_dictionary),\n- detectorParams(_detectorParams), refineParams(_refineParams) {}\n+ ArucoDetectorImpl(const vector&_dictionaries, const DetectorParameters &_detectorParams,\n+ const RefineParameters& _refineParams): dictionaries(_dictionaries),\n+ detectorParams(_detectorParams), refineParams(_refineParams) {\n+ CV_Assert(!dictionaries.empty());\n+ }\n+\n+ /*\n+ * @brief Detect markers either using multiple or just first dictionary\n+ */\n+ void detectMarkers(InputArray _image, OutputArrayOfArrays _corners, OutputArray _ids,\n+ OutputArrayOfArrays _rejectedImgPoints, OutputArray _dictIndices, DictionaryMode dictMode) {\n+ CV_Assert(!_image.empty());\n+\n+ CV_Assert(detectorParams.markerBorderBits > 0);\n+ // check that the parameters are set correctly if Aruco3 is used\n+ CV_Assert(!(detectorParams.useAruco3Detection == true &&\n+ detectorParams.minSideLengthCanonicalImg == 0 &&\n+ detectorParams.minMarkerLengthRatioOriginalImg == 0.0));\n+\n+ Mat grey;\n+ _convertToGrey(_image, grey);\n+\n+ // Aruco3 functionality is the extension of Aruco.\n+ // The description can be found in:\n+ // [1] Speeded up detection of squared fiducial markers, 2018, FJ Romera-Ramirez et al.\n+ // if Aruco3 functionality if not wanted\n+ // change some parameters to be sure to turn it off\n+ if (!detectorParams.useAruco3Detection) {\n+ detectorParams.minMarkerLengthRatioOriginalImg = 0.0;\n+ detectorParams.minSideLengthCanonicalImg = 0;\n+ }\n+ else {\n+ // always turn on corner refinement in case of Aruco3, due to upsampling\n+ detectorParams.cornerRefinementMethod = (int)CORNER_REFINE_SUBPIX;\n+ // only CORNER_REFINE_SUBPIX implement correctly for useAruco3Detection\n+ // Todo: update other CORNER_REFINE methods\n+ }\n+\n+ /// Step 0: equation (2) from paper [1]\n+ const float fxfy = (!detectorParams.useAruco3Detection ? 1.f : detectorParams.minSideLengthCanonicalImg /\n+ (detectorParams.minSideLengthCanonicalImg + std::max(grey.cols, grey.rows)*\n+ detectorParams.minMarkerLengthRatioOriginalImg));\n+\n+ /// Step 1: create image pyramid. Section 3.4. in [1]\n+ vector grey_pyramid;\n+ int closest_pyr_image_idx = 0, num_levels = 0;\n+ //// Step 1.1: resize image with equation (1) from paper [1]\n+ if (detectorParams.useAruco3Detection) {\n+ const float scale_pyr = 2.f;\n+ const float img_area = static_cast(grey.rows*grey.cols);\n+ const float min_area_marker = static_cast(detectorParams.minSideLengthCanonicalImg*\n+ detectorParams.minSideLengthCanonicalImg);\n+ // find max level\n+ num_levels = static_cast(log2(img_area / min_area_marker)/scale_pyr);\n+ // the closest pyramid image to the downsampled segmentation image\n+ // will later be used as start index for corner upsampling\n+ const float scale_img_area = img_area * fxfy * fxfy;\n+ closest_pyr_image_idx = cvRound(log2(img_area / scale_img_area)/scale_pyr);\n+ }\n+ buildPyramid(grey, grey_pyramid, num_levels);\n+\n+ // resize to segmentation image\n+ // in this reduces size the contours will be detected\n+ if (fxfy != 1.f)\n+ resize(grey, grey, Size(cvRound(fxfy * grey.cols), cvRound(fxfy * grey.rows)));\n+\n+ /// STEP 2: Detect marker candidates\n+ vector > candidates;\n+ vector > contours;\n+ vector ids;\n+\n+ /// STEP 2.a Detect marker candidates :: using AprilTag\n+ if(detectorParams.cornerRefinementMethod == (int)CORNER_REFINE_APRILTAG){\n+ _apriltag(grey, detectorParams, candidates, contours);\n+ }\n+ /// STEP 2.b Detect marker candidates :: traditional way\n+ else {\n+ detectCandidates(grey, candidates, contours);\n+ }\n+\n+ /// STEP 2.c FILTER OUT NEAR CANDIDATE PAIRS\n+ vector dictIndices;\n+ vector> rejectedImgPoints;\n+ if (DictionaryMode::Single == dictMode) {\n+ Dictionary& dictionary = dictionaries.at(0);\n+ auto selectedCandidates = filterTooCloseCandidates(candidates, contours, dictionary.markerSize);\n+ candidates.clear();\n+ contours.clear();\n+\n+ /// STEP 2: Check candidate codification (identify markers)\n+ identifyCandidates(grey, grey_pyramid, selectedCandidates, candidates, contours,\n+ ids, dictionary, rejectedImgPoints);\n+\n+ /// STEP 3: Corner refinement :: use corner subpix\n+ if (detectorParams.cornerRefinementMethod == (int)CORNER_REFINE_SUBPIX) {\n+ CV_Assert(detectorParams.cornerRefinementWinSize > 0 && detectorParams.cornerRefinementMaxIterations > 0 &&\n+ detectorParams.cornerRefinementMinAccuracy > 0);\n+ // Do subpixel estimation. In Aruco3 start on the lowest pyramid level and upscale the corners\n+ parallel_for_(Range(0, (int)candidates.size()), [&](const Range& range) {\n+ const int begin = range.start;\n+ const int end = range.end;\n+\n+ for (int i = begin; i < end; i++) {\n+ if (detectorParams.useAruco3Detection) {\n+ const float scale_init = (float) grey_pyramid[closest_pyr_image_idx].cols / grey.cols;\n+ findCornerInPyrImage(scale_init, closest_pyr_image_idx, grey_pyramid, Mat(candidates[i]), detectorParams);\n+ } else {\n+ int cornerRefinementWinSize = std::max(1, cvRound(detectorParams.relativeCornerRefinmentWinSize*\n+ getAverageModuleSize(candidates[i], dictionary.markerSize, detectorParams.markerBorderBits)));\n+ cornerRefinementWinSize = min(cornerRefinementWinSize, detectorParams.cornerRefinementWinSize);\n+ cornerSubPix(grey, Mat(candidates[i]), Size(cornerRefinementWinSize, cornerRefinementWinSize), Size(-1, -1),\n+ TermCriteria(TermCriteria::MAX_ITER | TermCriteria::EPS,\n+ detectorParams.cornerRefinementMaxIterations,\n+ detectorParams.cornerRefinementMinAccuracy));\n+ }\n+ }\n+ });\n+ }\n+ } else if (DictionaryMode::Multi == dictMode) {\n+ unordered_set uniqueMarkerSizes;\n+ for (const Dictionary& dictionary : dictionaries) {\n+ uniqueMarkerSizes.insert(dictionary.markerSize);\n+ }\n+\n+ // create at max 4 marker candidate trees for each dictionary size\n+ vector> candidatesPerDictionarySize = {{}, {}, {}, {}};\n+ for (int markerSize : uniqueMarkerSizes) {\n+ // min marker size is 4, so subtract 4 to get index\n+ const auto dictionarySizeIndex = markerSize - 4;\n+ // copy candidates\n+ vector> candidatesCopy = candidates;\n+ vector > contoursCopy = contours;\n+ candidatesPerDictionarySize[dictionarySizeIndex] = filterTooCloseCandidates(candidatesCopy, contoursCopy, markerSize);\n+ }\n+ candidates.clear();\n+ contours.clear();\n+\n+ /// STEP 2: Check candidate codification (identify markers)\n+ int dictIndex = 0;\n+ for (const Dictionary& currentDictionary : dictionaries) {\n+ const auto dictionarySizeIndex = currentDictionary.markerSize - 4;\n+ // temporary variable to store the current candidates\n+ vector> currentCandidates;\n+ identifyCandidates(grey, grey_pyramid, candidatesPerDictionarySize[dictionarySizeIndex], currentCandidates, contours,\n+ ids, currentDictionary, rejectedImgPoints);\n+ if (_dictIndices.needed()) {\n+ dictIndices.insert(dictIndices.end(), currentCandidates.size(), dictIndex);\n+ }\n+\n+ /// STEP 3: Corner refinement :: use corner subpix\n+ if (detectorParams.cornerRefinementMethod == (int)CORNER_REFINE_SUBPIX) {", - "comment_created_at": "2025-03-03T09:17:51+00:00", - "comment_author": "mshabunin", - "comment_body": "Is it possible to extract this large block to separate function to reduce code duplication?", - "pr_file_module": null - }, - { - "comment_id": "1979573764", - "repo_full_name": "opencv/opencv", - "pr_number": 26934, - "pr_file": "modules/objdetect/src/aruco/aruco_detector.cpp", - "discussion_id": "1977140260", - "commented_code": "@@ -651,9 +657,257 @@ struct ArucoDetector::ArucoDetectorImpl {\n RefineParameters refineParams;\n ArucoDetectorImpl() {}\n \n- ArucoDetectorImpl(const Dictionary &_dictionary, const DetectorParameters &_detectorParams,\n- const RefineParameters& _refineParams): dictionary(_dictionary),\n- detectorParams(_detectorParams), refineParams(_refineParams) {}\n+ ArucoDetectorImpl(const vector&_dictionaries, const DetectorParameters &_detectorParams,\n+ const RefineParameters& _refineParams): dictionaries(_dictionaries),\n+ detectorParams(_detectorParams), refineParams(_refineParams) {\n+ CV_Assert(!dictionaries.empty());\n+ }\n+\n+ /*\n+ * @brief Detect markers either using multiple or just first dictionary\n+ */\n+ void detectMarkers(InputArray _image, OutputArrayOfArrays _corners, OutputArray _ids,\n+ OutputArrayOfArrays _rejectedImgPoints, OutputArray _dictIndices, DictionaryMode dictMode) {\n+ CV_Assert(!_image.empty());\n+\n+ CV_Assert(detectorParams.markerBorderBits > 0);\n+ // check that the parameters are set correctly if Aruco3 is used\n+ CV_Assert(!(detectorParams.useAruco3Detection == true &&\n+ detectorParams.minSideLengthCanonicalImg == 0 &&\n+ detectorParams.minMarkerLengthRatioOriginalImg == 0.0));\n+\n+ Mat grey;\n+ _convertToGrey(_image, grey);\n+\n+ // Aruco3 functionality is the extension of Aruco.\n+ // The description can be found in:\n+ // [1] Speeded up detection of squared fiducial markers, 2018, FJ Romera-Ramirez et al.\n+ // if Aruco3 functionality if not wanted\n+ // change some parameters to be sure to turn it off\n+ if (!detectorParams.useAruco3Detection) {\n+ detectorParams.minMarkerLengthRatioOriginalImg = 0.0;\n+ detectorParams.minSideLengthCanonicalImg = 0;\n+ }\n+ else {\n+ // always turn on corner refinement in case of Aruco3, due to upsampling\n+ detectorParams.cornerRefinementMethod = (int)CORNER_REFINE_SUBPIX;\n+ // only CORNER_REFINE_SUBPIX implement correctly for useAruco3Detection\n+ // Todo: update other CORNER_REFINE methods\n+ }\n+\n+ /// Step 0: equation (2) from paper [1]\n+ const float fxfy = (!detectorParams.useAruco3Detection ? 1.f : detectorParams.minSideLengthCanonicalImg /\n+ (detectorParams.minSideLengthCanonicalImg + std::max(grey.cols, grey.rows)*\n+ detectorParams.minMarkerLengthRatioOriginalImg));\n+\n+ /// Step 1: create image pyramid. Section 3.4. in [1]\n+ vector grey_pyramid;\n+ int closest_pyr_image_idx = 0, num_levels = 0;\n+ //// Step 1.1: resize image with equation (1) from paper [1]\n+ if (detectorParams.useAruco3Detection) {\n+ const float scale_pyr = 2.f;\n+ const float img_area = static_cast(grey.rows*grey.cols);\n+ const float min_area_marker = static_cast(detectorParams.minSideLengthCanonicalImg*\n+ detectorParams.minSideLengthCanonicalImg);\n+ // find max level\n+ num_levels = static_cast(log2(img_area / min_area_marker)/scale_pyr);\n+ // the closest pyramid image to the downsampled segmentation image\n+ // will later be used as start index for corner upsampling\n+ const float scale_img_area = img_area * fxfy * fxfy;\n+ closest_pyr_image_idx = cvRound(log2(img_area / scale_img_area)/scale_pyr);\n+ }\n+ buildPyramid(grey, grey_pyramid, num_levels);\n+\n+ // resize to segmentation image\n+ // in this reduces size the contours will be detected\n+ if (fxfy != 1.f)\n+ resize(grey, grey, Size(cvRound(fxfy * grey.cols), cvRound(fxfy * grey.rows)));\n+\n+ /// STEP 2: Detect marker candidates\n+ vector > candidates;\n+ vector > contours;\n+ vector ids;\n+\n+ /// STEP 2.a Detect marker candidates :: using AprilTag\n+ if(detectorParams.cornerRefinementMethod == (int)CORNER_REFINE_APRILTAG){\n+ _apriltag(grey, detectorParams, candidates, contours);\n+ }\n+ /// STEP 2.b Detect marker candidates :: traditional way\n+ else {\n+ detectCandidates(grey, candidates, contours);\n+ }\n+\n+ /// STEP 2.c FILTER OUT NEAR CANDIDATE PAIRS\n+ vector dictIndices;\n+ vector> rejectedImgPoints;\n+ if (DictionaryMode::Single == dictMode) {\n+ Dictionary& dictionary = dictionaries.at(0);\n+ auto selectedCandidates = filterTooCloseCandidates(candidates, contours, dictionary.markerSize);\n+ candidates.clear();\n+ contours.clear();\n+\n+ /// STEP 2: Check candidate codification (identify markers)\n+ identifyCandidates(grey, grey_pyramid, selectedCandidates, candidates, contours,\n+ ids, dictionary, rejectedImgPoints);\n+\n+ /// STEP 3: Corner refinement :: use corner subpix\n+ if (detectorParams.cornerRefinementMethod == (int)CORNER_REFINE_SUBPIX) {\n+ CV_Assert(detectorParams.cornerRefinementWinSize > 0 && detectorParams.cornerRefinementMaxIterations > 0 &&\n+ detectorParams.cornerRefinementMinAccuracy > 0);\n+ // Do subpixel estimation. In Aruco3 start on the lowest pyramid level and upscale the corners\n+ parallel_for_(Range(0, (int)candidates.size()), [&](const Range& range) {\n+ const int begin = range.start;\n+ const int end = range.end;\n+\n+ for (int i = begin; i < end; i++) {\n+ if (detectorParams.useAruco3Detection) {\n+ const float scale_init = (float) grey_pyramid[closest_pyr_image_idx].cols / grey.cols;\n+ findCornerInPyrImage(scale_init, closest_pyr_image_idx, grey_pyramid, Mat(candidates[i]), detectorParams);\n+ } else {\n+ int cornerRefinementWinSize = std::max(1, cvRound(detectorParams.relativeCornerRefinmentWinSize*\n+ getAverageModuleSize(candidates[i], dictionary.markerSize, detectorParams.markerBorderBits)));\n+ cornerRefinementWinSize = min(cornerRefinementWinSize, detectorParams.cornerRefinementWinSize);\n+ cornerSubPix(grey, Mat(candidates[i]), Size(cornerRefinementWinSize, cornerRefinementWinSize), Size(-1, -1),\n+ TermCriteria(TermCriteria::MAX_ITER | TermCriteria::EPS,\n+ detectorParams.cornerRefinementMaxIterations,\n+ detectorParams.cornerRefinementMinAccuracy));\n+ }\n+ }\n+ });\n+ }\n+ } else if (DictionaryMode::Multi == dictMode) {\n+ unordered_set uniqueMarkerSizes;\n+ for (const Dictionary& dictionary : dictionaries) {\n+ uniqueMarkerSizes.insert(dictionary.markerSize);\n+ }\n+\n+ // create at max 4 marker candidate trees for each dictionary size\n+ vector> candidatesPerDictionarySize = {{}, {}, {}, {}};\n+ for (int markerSize : uniqueMarkerSizes) {\n+ // min marker size is 4, so subtract 4 to get index\n+ const auto dictionarySizeIndex = markerSize - 4;\n+ // copy candidates\n+ vector> candidatesCopy = candidates;\n+ vector > contoursCopy = contours;\n+ candidatesPerDictionarySize[dictionarySizeIndex] = filterTooCloseCandidates(candidatesCopy, contoursCopy, markerSize);\n+ }\n+ candidates.clear();\n+ contours.clear();\n+\n+ /// STEP 2: Check candidate codification (identify markers)\n+ int dictIndex = 0;\n+ for (const Dictionary& currentDictionary : dictionaries) {\n+ const auto dictionarySizeIndex = currentDictionary.markerSize - 4;\n+ // temporary variable to store the current candidates\n+ vector> currentCandidates;\n+ identifyCandidates(grey, grey_pyramid, candidatesPerDictionarySize[dictionarySizeIndex], currentCandidates, contours,\n+ ids, currentDictionary, rejectedImgPoints);\n+ if (_dictIndices.needed()) {\n+ dictIndices.insert(dictIndices.end(), currentCandidates.size(), dictIndex);\n+ }\n+\n+ /// STEP 3: Corner refinement :: use corner subpix\n+ if (detectorParams.cornerRefinementMethod == (int)CORNER_REFINE_SUBPIX) {", - "comment_created_at": "2025-03-04T14:27:21+00:00", - "comment_author": "BenjaminKnecht", - "comment_body": "Done", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "723912815", - "pr_number": 20371, - "pr_file": "modules/core/src/opengl.cpp", - "created_at": "2021-10-07T07:22:56+00:00", - "commented_code": "#endif\n}\n\n\n////////////////////////////////////////////////////////////////////////\n// ogl::Attribute\n\ncv::ogl::Attribute::Attribute(Buffer buffer, size_t stride, size_t offset, int size, Type type, bool integer, bool normalized, unsigned int shader_loc)", - "repo_full_name": "opencv/opencv", - "discussion_comments": [ - { - "comment_id": "723912815", - "repo_full_name": "opencv/opencv", - "pr_number": 20371, - "pr_file": "modules/core/src/opengl.cpp", - "discussion_id": "723912815", - "commented_code": "@@ -1405,6 +1412,574 @@ void cv::ogl::Arrays::bind() const\n #endif\n }\n \n+\n+////////////////////////////////////////////////////////////////////////\n+// ogl::Attribute\n+\n+cv::ogl::Attribute::Attribute(Buffer buffer, size_t stride, size_t offset, int size, Type type, bool integer, bool normalized, unsigned int shader_loc)", - "comment_created_at": "2021-10-07T07:22:56+00:00", - "comment_author": "JulieBar", - "comment_body": "there is no need for an explicit constructor, an initialization list can be used", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "724440487", - "pr_number": 20371, - "pr_file": "modules/core/src/opengl.cpp", - "created_at": "2021-10-07T18:42:17+00:00", - "commented_code": "#endif\n}\n\n\n////////////////////////////////////////////////////////////////////////\n// ogl::Attribute\n\ncv::ogl::Attribute::Attribute(Buffer buffer, size_t stride, size_t offset, int size, Type type, bool integer, bool normalized, unsigned int shader_loc)\n{\n buffer_ = buffer;\n stride_ = stride;\n offset_ = offset;\n size_ = size;\n type_ = type;\n integer_ = integer;\n normalized_ = normalized;\n shader_loc_ = shader_loc;\n}\n\n////////////////////////////////////////////////////////////////////////\n// ogl::VertexArray\n\n#ifndef HAVE_OPENGL\n\nclass cv::ogl::VertexArray::Impl\n{\n};\n\n#else\n\nclass cv::ogl::VertexArray::Impl\n{\npublic:\n static const Ptr& empty();\n\n Impl(GLuint vaId, bool autoRelease);\n Impl(std::initializer_list attributes, bool autoRelease);\n ~Impl();\n\n void bind();\n\n void setAutoRelease(bool flag) { autoRelease_ = flag; }\n\n GLuint vaId() const { return vaId_; }\n\nprivate:\n Impl();\n\n GLuint vaId_;\n bool autoRelease_;\n};\n\nconst Ptr& cv::ogl::VertexArray::Impl::empty()\n{\n static Ptr p(new Impl);\n return p;\n}\n\ncv::ogl::VertexArray::Impl::Impl() : vaId_(0), autoRelease_(false)\n{\n}\n\ncv::ogl::VertexArray::Impl::Impl(GLuint vaId, bool autoRelease) : vaId_(vaId), autoRelease_(autoRelease)\n{\n CV_Assert( gl::IsVertexArray(vaId) == gl::TRUE_ );\n}\n\ncv::ogl::VertexArray::Impl::Impl(std::initializer_list attributes, bool autoRelease) : vaId_(0), autoRelease_(autoRelease)\n{\n gl::GenVertexArrays(1, &vaId_);\n CV_CheckGlError();\n\n CV_Assert( vaId_ != 0 );\n gl::BindVertexArray(vaId_);\n CV_CheckGlError();\n\n for (auto& attribute : attributes)\n {\n attribute.buffer_.bind(Buffer::ARRAY_BUFFER);\n if (attribute.integer_)\n {\n gl::VertexAttribIPointer(\n attribute.shader_loc_,\n attribute.size_,\n attribute.type_,\n attribute.stride_,\n (const void*)attribute.offset_\n );\n CV_CheckGlError();\n }\n else\n {\n gl::VertexAttribPointer(\n attribute.shader_loc_,\n attribute.size_,\n attribute.type_,\n attribute.normalized_,\n attribute.stride_,\n (const void*)attribute.offset_\n );\n CV_CheckGlError();\n }\n\n gl::EnableVertexAttribArray(attribute.shader_loc_);\n CV_CheckGlError();\n }\n}\n\ncv::ogl::VertexArray::Impl::~Impl()\n{\n if (autoRelease_ && vaId_)\n gl::DeleteVertexArrays(1, &vaId_);\n}\n\nvoid cv::ogl::VertexArray::Impl::bind()\n{\n gl::BindVertexArray(vaId_);\n CV_CheckGlError();\n}\n\n#endif // HAVE_OPENGL\n\ncv::ogl::VertexArray::VertexArray()\n{\n#ifndef HAVE_OPENGL\n throw_no_ogl();\n#else\n impl_ = Impl::empty();\n#endif\n}\n\ncv::ogl::VertexArray::VertexArray(std::initializer_list attributes, bool autoRelease)\n{\n#ifndef HAVE_OPENGL\n CV_UNUSED(attributes);\n CV_UNUSED(autoRelease);\n throw_no_ogl();\n#else\n impl_.reset(new Impl(attributes, autoRelease));\n#endif\n}\n\nvoid cv::ogl::VertexArray::create(std::initializer_list attributes, bool autoRelease)\n{\n#ifndef HAVE_OPENGL\n CV_UNUSED(attributes);\n CV_UNUSED(autoRelease);\n throw_no_ogl();\n#else\n impl_.reset(new Impl(attributes, autoRelease));\n#endif\n}\n\nvoid cv::ogl::VertexArray::release()\n{\n#ifdef HAVE_OPENGL\n if (impl_)\n impl_->setAutoRelease(true);\n impl_ = Impl::empty();\n#endif\n}\n\nvoid cv::ogl::VertexArray::setAutoRelease(bool flag)\n{\n#ifndef HAVE_OPENGL\n CV_UNUSED(flag);\n throw_no_ogl();\n#else\n impl_->setAutoRelease(flag);\n#endif\n}\n\nvoid cv::ogl::VertexArray::bind() const\n{\n#ifndef HAVE_OPENGL\n throw_no_ogl();\n#else\n impl_->bind();\n#endif\n}\n\nvoid cv::ogl::VertexArray::unbind()\n{\n#ifndef HAVE_OPENGL\n throw_no_ogl();\n#else\n gl::BindVertexArray(0);\n#endif\n}\n\nunsigned int cv::ogl::VertexArray::vaId() const\n{\n#ifndef HAVE_OPENGL\n throw_no_ogl();\n#else\n return impl_->vaId();\n#endif\n}\n\n////////////////////////////////////////////////////////////////////////\n// ogl::Shader\n\n#ifndef HAVE_OPENGL\n\nclass cv::ogl::Shader::Impl\n{\n};\n\n#else\n\nclass cv::ogl::Shader::Impl\n{\npublic:\n static const Ptr& empty();\n\n Impl(GLuint shaderId, bool autoRelease);\n Impl(const char* src, GLenum type, bool autoRelease);\n ~Impl();\n\n void setAutoRelease(bool flag) { autoRelease_ = flag; }\n\n GLuint shaderId() const { return shaderId_; }\n\nprivate:\n Impl();\n\n GLuint shaderId_;\n bool autoRelease_;\n};\n\nconst Ptr& cv::ogl::Shader::Impl::empty()\n{\n static Ptr p(new Impl);\n return p;\n}\n\ncv::ogl::Shader::Impl::Impl() : shaderId_(0), autoRelease_(false)\n{\n}\n\ncv::ogl::Shader::Impl::Impl(GLuint shaderId, bool autoRelease) : shaderId_(shaderId), autoRelease_(autoRelease)\n{\n CV_Assert( gl::IsShader(shaderId) == gl::TRUE_ );\n}\n\ncv::ogl::Shader::Impl::Impl(const char* src, GLenum type, bool autoRelease) : shaderId_(0), autoRelease_(autoRelease)\n{\n shaderId_ = gl::CreateShader(type);\n CV_CheckGlError();\n\n CV_Assert( shaderId_ != 0 );\n\n GLint status;\n gl::ShaderSource(shaderId_, 1, &src, 0);\n gl::CompileShader(shaderId_);\n gl::GetShaderiv(shaderId_, gl::COMPILE_STATUS, &status);\n if (!status) {\n String info_log;\n gl::GetShaderiv(shaderId_, gl::INFO_LOG_LENGTH, &status);\n info_log.resize(status);\n gl::GetShaderInfoLog(shaderId_, GLsizei(info_log.size()) + 1, nullptr, &info_log[0]);\n CV_Error(Error::OpenGlApiCallError, info_log);\n }\n\n CV_CheckGlError();\n}\n\ncv::ogl::Shader::Impl::~Impl()\n{\n if (autoRelease_ && shaderId_)\n gl::DeleteShader(shaderId_);\n}\n\n#endif // HAVE_OPENGL\n\ncv::ogl::Shader::Shader() : type_(Type::VERTEX)\n{\n#ifndef HAVE_OPENGL\n throw_no_ogl();\n#else\n impl_ = Impl::empty();\n#endif\n}\n\ncv::ogl::Shader::Shader(const char* src, Type type, bool autoRelease) : type_(type)\n{\n#ifndef HAVE_OPENGL\n CV_UNUSED(src);\n CV_UNUSED(type);\n CV_UNUSED(autoRelease);\n throw_no_ogl();\n#else\n impl_.reset(new Impl(src, type, autoRelease));\n#endif\n}\n\nvoid cv::ogl::Shader::create(const char* src, Type type, bool autoRelease)\n{\n#ifndef HAVE_OPENGL\n CV_UNUSED(src);\n CV_UNUSED(type);\n CV_UNUSED(autoRelease);\n throw_no_ogl();\n#else\n impl_.reset(new Impl(src, type, autoRelease));\n type_ = type;\n#endif\n}\n\nvoid cv::ogl::Shader::release()\n{\n#ifdef HAVE_OPENGL\n if (impl_)\n impl_->setAutoRelease(true);\n impl_ = Impl::empty();\n#endif\n}\n\nvoid cv::ogl::Shader::setAutoRelease(bool flag)\n{\n#ifndef HAVE_OPENGL\n CV_UNUSED(flag);\n throw_no_ogl();\n#else\n impl_->setAutoRelease(flag);\n#endif\n}\n\nunsigned int cv::ogl::Shader::shaderId() const\n{\n#ifndef HAVE_OPENGL\n throw_no_ogl();\n#else\n return impl_->shaderId();\n#endif\n}\n\n////////////////////////////////////////////////////////////////////////\n// ogl::Program\n\n#ifndef HAVE_OPENGL\n\nclass cv::ogl::Program::Impl\n{\n};\n\n#else\n\nclass cv::ogl::Program::Impl\n{\npublic:\n static const Ptr& empty();\n\n Impl(GLuint programId, bool autoRelease);\n Impl(Shader vert, Shader frag, bool autoRelease);\n ~Impl();\n\n void bind();\n\n int getAttributeLocation(const char* name) const;\n int getUniformLocation(const char* name) const;\n\n void setAutoRelease(bool flag) { autoRelease_ = flag; }\n\n GLuint programId() const { return programId_; }\n\nprivate:\n Impl();\n\n Shader vert_;\n Shader frag_;\n GLuint programId_;\n bool autoRelease_;\n};\n\nconst Ptr& cv::ogl::Program::Impl::empty()\n{\n static Ptr p(new Impl);\n return p;\n}\n\ncv::ogl::Program::Impl::Impl() : programId_(0), autoRelease_(false)\n{\n}\n\ncv::ogl::Program::Impl::Impl(GLuint programId, bool autoRelease) : programId_(programId), autoRelease_(autoRelease)\n{\n CV_Assert( gl::IsProgram(programId) == gl::TRUE_ );\n}\n\ncv::ogl::Program::Impl::Impl(Shader vert, Shader frag, bool autoRelease) : programId_(0), autoRelease_(autoRelease)\n{\n programId_ = gl::CreateProgram();\n CV_CheckGlError();\n\n CV_Assert( programId_ != 0 );\n\n gl::AttachShader(programId_, vert.shaderId());\n CV_CheckGlError();\n\n gl::AttachShader(programId_, frag.shaderId());\n CV_CheckGlError();\n\n GLint status;\n gl::LinkProgram(programId_);\n gl::GetProgramiv(programId_, gl::LINK_STATUS, &status);\n if (!status) {\n String info_log;\n gl::GetProgramiv(programId_, gl::INFO_LOG_LENGTH, &status);\n info_log.resize(status);\n gl::GetProgramInfoLog(programId_, GLsizei(info_log.size()) + 1, nullptr, &info_log[0]);\n CV_Error(Error::OpenGlApiCallError, info_log);\n }\n \n CV_CheckGlError();\n\n vert_ = vert;\n frag_ = frag;\n}\n\ncv::ogl::Program::Impl::~Impl()\n{\n if (autoRelease_ && programId_)\n gl::DeleteProgram(programId_);\n}\n\nvoid cv::ogl::Program::Impl::bind()\n{\n gl::UseProgram(programId_);\n CV_CheckGlError();\n}\n\nint cv::ogl::Program::Impl::getAttributeLocation(const char* name) const\n{\n int location = gl::GetAttribLocation(programId_, name);\n if (location < 0)\n {\n CV_Error(Error::OpenGlApiCallError, \"Attribute not found\");\n }\n CV_CheckGlError();\n return location;\n}\n\nint cv::ogl::Program::Impl::getUniformLocation(const char* name) const\n{\n int location = gl::GetUniformLocation(programId_, name);\n if (location < 0)\n {\n CV_Error(Error::OpenGlApiCallError, \"Attribute not found\");\n }\n CV_CheckGlError();\n return location;\n}\n\n#endif // HAVE_OPENGL\n\ncv::ogl::Program::Program()\n{\n#ifndef HAVE_OPENGL\n throw_no_ogl();\n#else\n impl_ = Impl::empty();\n#endif\n}\n\ncv::ogl::Program::Program(Shader vert, Shader frag, bool autoRelease)\n{\n#ifndef HAVE_OPENGL\n CV_UNUSED(vert);\n CV_UNUSED(frag);\n CV_UNUSED(autoRelease);\n throw_no_ogl();\n#else\n impl_.reset(new Impl(vert, frag, autoRelease));\n#endif\n}\n\nvoid cv::ogl::Program::create(Shader vert, Shader frag, bool autoRelease)\n{\n#ifndef HAVE_OPENGL\n CV_UNUSED(vert);\n CV_UNUSED(frag);\n CV_UNUSED(autoRelease);\n throw_no_ogl();\n#else\n impl_.reset(new Impl(vert, frag, autoRelease));\n#endif\n}\n\nvoid cv::ogl::Program::release()\n{\n#ifdef HAVE_OPENGL\n if (impl_)\n impl_->setAutoRelease(true);\n impl_ = Impl::empty();\n#endif\n}\n\nvoid cv::ogl::Program::setAutoRelease(bool flag)\n{\n#ifndef HAVE_OPENGL\n CV_UNUSED(flag);\n throw_no_ogl();\n#else\n impl_->setAutoRelease(flag);\n#endif\n}\n\nvoid cv::ogl::Program::bind() const\n{\n#ifndef HAVE_OPENGL\n throw_no_ogl();\n#else\n impl_->bind();\n#endif\n}\n\nvoid cv::ogl::Program::unbind()\n{\n#ifndef HAVE_OPENGL\n throw_no_ogl();\n#else\n gl::UseProgram(0);\n#endif\n}\n\nint cv::ogl::Program::getAttributeLocation(const char* name) const\n{\n#ifndef HAVE_OPENGL\n throw_no_ogl();\n#else\n return impl_->getAttributeLocation(name);\n#endif\n}\n\nint cv::ogl::Program::getUniformLocation(const char* name) const\n{\n#ifndef HAVE_OPENGL\n throw_no_ogl();\n#else\n return impl_->getUniformLocation(name);\n#endif\n}\n\nvoid cv::ogl::Program::setUniformVec3(int loc, Vec3f vec)\n{\n#ifndef HAVE_OPENGL\n throw_no_ogl();\n#else\n gl::Uniform3f(loc, vec(0), vec(1), vec(2));\n#endif\n}\n\nvoid cv::ogl::Program::setUniformMat4x4(int loc, Matx44f mat)\n{\n#ifndef HAVE_OPENGL\n throw_no_ogl();\n#else\n gl::UniformMatrix4fv(loc, 1, true, mat.val);\n#endif\n}\n\nunsigned int cv::ogl::Program::programId() const\n{\n#ifndef HAVE_OPENGL\n throw_no_ogl();\n#else\n return impl_->programId();\n#endif\n}", - "repo_full_name": "opencv/opencv", - "discussion_comments": [ - { - "comment_id": "724440487", - "repo_full_name": "opencv/opencv", - "pr_number": 20371, - "pr_file": "modules/core/src/opengl.cpp", - "discussion_id": "724440487", - "commented_code": "@@ -1405,6 +1412,574 @@ void cv::ogl::Arrays::bind() const\n #endif\n }\n \n+\n+////////////////////////////////////////////////////////////////////////\n+// ogl::Attribute\n+\n+cv::ogl::Attribute::Attribute(Buffer buffer, size_t stride, size_t offset, int size, Type type, bool integer, bool normalized, unsigned int shader_loc)\n+{\n+ buffer_ = buffer;\n+ stride_ = stride;\n+ offset_ = offset;\n+ size_ = size;\n+ type_ = type;\n+ integer_ = integer;\n+ normalized_ = normalized;\n+ shader_loc_ = shader_loc;\n+}\n+\n+////////////////////////////////////////////////////////////////////////\n+// ogl::VertexArray\n+\n+#ifndef HAVE_OPENGL\n+\n+class cv::ogl::VertexArray::Impl\n+{\n+};\n+\n+#else\n+\n+class cv::ogl::VertexArray::Impl\n+{\n+public:\n+ static const Ptr& empty();\n+\n+ Impl(GLuint vaId, bool autoRelease);\n+ Impl(std::initializer_list attributes, bool autoRelease);\n+ ~Impl();\n+\n+ void bind();\n+\n+ void setAutoRelease(bool flag) { autoRelease_ = flag; }\n+\n+ GLuint vaId() const { return vaId_; }\n+\n+private:\n+ Impl();\n+\n+ GLuint vaId_;\n+ bool autoRelease_;\n+};\n+\n+const Ptr& cv::ogl::VertexArray::Impl::empty()\n+{\n+ static Ptr p(new Impl);\n+ return p;\n+}\n+\n+cv::ogl::VertexArray::Impl::Impl() : vaId_(0), autoRelease_(false)\n+{\n+}\n+\n+cv::ogl::VertexArray::Impl::Impl(GLuint vaId, bool autoRelease) : vaId_(vaId), autoRelease_(autoRelease)\n+{\n+ CV_Assert( gl::IsVertexArray(vaId) == gl::TRUE_ );\n+}\n+\n+cv::ogl::VertexArray::Impl::Impl(std::initializer_list attributes, bool autoRelease) : vaId_(0), autoRelease_(autoRelease)\n+{\n+ gl::GenVertexArrays(1, &vaId_);\n+ CV_CheckGlError();\n+\n+ CV_Assert( vaId_ != 0 );\n+ gl::BindVertexArray(vaId_);\n+ CV_CheckGlError();\n+\n+ for (auto& attribute : attributes)\n+ {\n+ attribute.buffer_.bind(Buffer::ARRAY_BUFFER);\n+ if (attribute.integer_)\n+ {\n+ gl::VertexAttribIPointer(\n+ attribute.shader_loc_,\n+ attribute.size_,\n+ attribute.type_,\n+ attribute.stride_,\n+ (const void*)attribute.offset_\n+ );\n+ CV_CheckGlError();\n+ }\n+ else\n+ {\n+ gl::VertexAttribPointer(\n+ attribute.shader_loc_,\n+ attribute.size_,\n+ attribute.type_,\n+ attribute.normalized_,\n+ attribute.stride_,\n+ (const void*)attribute.offset_\n+ );\n+ CV_CheckGlError();\n+ }\n+\n+ gl::EnableVertexAttribArray(attribute.shader_loc_);\n+ CV_CheckGlError();\n+ }\n+}\n+\n+cv::ogl::VertexArray::Impl::~Impl()\n+{\n+ if (autoRelease_ && vaId_)\n+ gl::DeleteVertexArrays(1, &vaId_);\n+}\n+\n+void cv::ogl::VertexArray::Impl::bind()\n+{\n+ gl::BindVertexArray(vaId_);\n+ CV_CheckGlError();\n+}\n+\n+#endif // HAVE_OPENGL\n+\n+cv::ogl::VertexArray::VertexArray()\n+{\n+#ifndef HAVE_OPENGL\n+ throw_no_ogl();\n+#else\n+ impl_ = Impl::empty();\n+#endif\n+}\n+\n+cv::ogl::VertexArray::VertexArray(std::initializer_list attributes, bool autoRelease)\n+{\n+#ifndef HAVE_OPENGL\n+ CV_UNUSED(attributes);\n+ CV_UNUSED(autoRelease);\n+ throw_no_ogl();\n+#else\n+ impl_.reset(new Impl(attributes, autoRelease));\n+#endif\n+}\n+\n+void cv::ogl::VertexArray::create(std::initializer_list attributes, bool autoRelease)\n+{\n+#ifndef HAVE_OPENGL\n+ CV_UNUSED(attributes);\n+ CV_UNUSED(autoRelease);\n+ throw_no_ogl();\n+#else\n+ impl_.reset(new Impl(attributes, autoRelease));\n+#endif\n+}\n+\n+void cv::ogl::VertexArray::release()\n+{\n+#ifdef HAVE_OPENGL\n+ if (impl_)\n+ impl_->setAutoRelease(true);\n+ impl_ = Impl::empty();\n+#endif\n+}\n+\n+void cv::ogl::VertexArray::setAutoRelease(bool flag)\n+{\n+#ifndef HAVE_OPENGL\n+ CV_UNUSED(flag);\n+ throw_no_ogl();\n+#else\n+ impl_->setAutoRelease(flag);\n+#endif\n+}\n+\n+void cv::ogl::VertexArray::bind() const\n+{\n+#ifndef HAVE_OPENGL\n+ throw_no_ogl();\n+#else\n+ impl_->bind();\n+#endif\n+}\n+\n+void cv::ogl::VertexArray::unbind()\n+{\n+#ifndef HAVE_OPENGL\n+ throw_no_ogl();\n+#else\n+ gl::BindVertexArray(0);\n+#endif\n+}\n+\n+unsigned int cv::ogl::VertexArray::vaId() const\n+{\n+#ifndef HAVE_OPENGL\n+ throw_no_ogl();\n+#else\n+ return impl_->vaId();\n+#endif\n+}\n+\n+////////////////////////////////////////////////////////////////////////\n+// ogl::Shader\n+\n+#ifndef HAVE_OPENGL\n+\n+class cv::ogl::Shader::Impl\n+{\n+};\n+\n+#else\n+\n+class cv::ogl::Shader::Impl\n+{\n+public:\n+ static const Ptr& empty();\n+\n+ Impl(GLuint shaderId, bool autoRelease);\n+ Impl(const char* src, GLenum type, bool autoRelease);\n+ ~Impl();\n+\n+ void setAutoRelease(bool flag) { autoRelease_ = flag; }\n+\n+ GLuint shaderId() const { return shaderId_; }\n+\n+private:\n+ Impl();\n+\n+ GLuint shaderId_;\n+ bool autoRelease_;\n+};\n+\n+const Ptr& cv::ogl::Shader::Impl::empty()\n+{\n+ static Ptr p(new Impl);\n+ return p;\n+}\n+\n+cv::ogl::Shader::Impl::Impl() : shaderId_(0), autoRelease_(false)\n+{\n+}\n+\n+cv::ogl::Shader::Impl::Impl(GLuint shaderId, bool autoRelease) : shaderId_(shaderId), autoRelease_(autoRelease)\n+{\n+ CV_Assert( gl::IsShader(shaderId) == gl::TRUE_ );\n+}\n+\n+cv::ogl::Shader::Impl::Impl(const char* src, GLenum type, bool autoRelease) : shaderId_(0), autoRelease_(autoRelease)\n+{\n+ shaderId_ = gl::CreateShader(type);\n+ CV_CheckGlError();\n+\n+ CV_Assert( shaderId_ != 0 );\n+\n+ GLint status;\n+ gl::ShaderSource(shaderId_, 1, &src, 0);\n+ gl::CompileShader(shaderId_);\n+ gl::GetShaderiv(shaderId_, gl::COMPILE_STATUS, &status);\n+ if (!status) {\n+ String info_log;\n+ gl::GetShaderiv(shaderId_, gl::INFO_LOG_LENGTH, &status);\n+ info_log.resize(status);\n+ gl::GetShaderInfoLog(shaderId_, GLsizei(info_log.size()) + 1, nullptr, &info_log[0]);\n+ CV_Error(Error::OpenGlApiCallError, info_log);\n+ }\n+\n+ CV_CheckGlError();\n+}\n+\n+cv::ogl::Shader::Impl::~Impl()\n+{\n+ if (autoRelease_ && shaderId_)\n+ gl::DeleteShader(shaderId_);\n+}\n+\n+#endif // HAVE_OPENGL\n+\n+cv::ogl::Shader::Shader() : type_(Type::VERTEX)\n+{\n+#ifndef HAVE_OPENGL\n+ throw_no_ogl();\n+#else\n+ impl_ = Impl::empty();\n+#endif\n+}\n+\n+cv::ogl::Shader::Shader(const char* src, Type type, bool autoRelease) : type_(type)\n+{\n+#ifndef HAVE_OPENGL\n+ CV_UNUSED(src);\n+ CV_UNUSED(type);\n+ CV_UNUSED(autoRelease);\n+ throw_no_ogl();\n+#else\n+ impl_.reset(new Impl(src, type, autoRelease));\n+#endif\n+}\n+\n+void cv::ogl::Shader::create(const char* src, Type type, bool autoRelease)\n+{\n+#ifndef HAVE_OPENGL\n+ CV_UNUSED(src);\n+ CV_UNUSED(type);\n+ CV_UNUSED(autoRelease);\n+ throw_no_ogl();\n+#else\n+ impl_.reset(new Impl(src, type, autoRelease));\n+ type_ = type;\n+#endif\n+}\n+\n+void cv::ogl::Shader::release()\n+{\n+#ifdef HAVE_OPENGL\n+ if (impl_)\n+ impl_->setAutoRelease(true);\n+ impl_ = Impl::empty();\n+#endif\n+}\n+\n+void cv::ogl::Shader::setAutoRelease(bool flag)\n+{\n+#ifndef HAVE_OPENGL\n+ CV_UNUSED(flag);\n+ throw_no_ogl();\n+#else\n+ impl_->setAutoRelease(flag);\n+#endif\n+}\n+\n+unsigned int cv::ogl::Shader::shaderId() const\n+{\n+#ifndef HAVE_OPENGL\n+ throw_no_ogl();\n+#else\n+ return impl_->shaderId();\n+#endif\n+}\n+\n+////////////////////////////////////////////////////////////////////////\n+// ogl::Program\n+\n+#ifndef HAVE_OPENGL\n+\n+class cv::ogl::Program::Impl\n+{\n+};\n+\n+#else\n+\n+class cv::ogl::Program::Impl\n+{\n+public:\n+ static const Ptr& empty();\n+\n+ Impl(GLuint programId, bool autoRelease);\n+ Impl(Shader vert, Shader frag, bool autoRelease);\n+ ~Impl();\n+\n+ void bind();\n+\n+ int getAttributeLocation(const char* name) const;\n+ int getUniformLocation(const char* name) const;\n+\n+ void setAutoRelease(bool flag) { autoRelease_ = flag; }\n+\n+ GLuint programId() const { return programId_; }\n+\n+private:\n+ Impl();\n+\n+ Shader vert_;\n+ Shader frag_;\n+ GLuint programId_;\n+ bool autoRelease_;\n+};\n+\n+const Ptr& cv::ogl::Program::Impl::empty()\n+{\n+ static Ptr p(new Impl);\n+ return p;\n+}\n+\n+cv::ogl::Program::Impl::Impl() : programId_(0), autoRelease_(false)\n+{\n+}\n+\n+cv::ogl::Program::Impl::Impl(GLuint programId, bool autoRelease) : programId_(programId), autoRelease_(autoRelease)\n+{\n+ CV_Assert( gl::IsProgram(programId) == gl::TRUE_ );\n+}\n+\n+cv::ogl::Program::Impl::Impl(Shader vert, Shader frag, bool autoRelease) : programId_(0), autoRelease_(autoRelease)\n+{\n+ programId_ = gl::CreateProgram();\n+ CV_CheckGlError();\n+\n+ CV_Assert( programId_ != 0 );\n+\n+ gl::AttachShader(programId_, vert.shaderId());\n+ CV_CheckGlError();\n+\n+ gl::AttachShader(programId_, frag.shaderId());\n+ CV_CheckGlError();\n+\n+ GLint status;\n+ gl::LinkProgram(programId_);\n+ gl::GetProgramiv(programId_, gl::LINK_STATUS, &status);\n+ if (!status) {\n+ String info_log;\n+ gl::GetProgramiv(programId_, gl::INFO_LOG_LENGTH, &status);\n+ info_log.resize(status);\n+ gl::GetProgramInfoLog(programId_, GLsizei(info_log.size()) + 1, nullptr, &info_log[0]);\n+ CV_Error(Error::OpenGlApiCallError, info_log);\n+ }\n+ \n+ CV_CheckGlError();\n+\n+ vert_ = vert;\n+ frag_ = frag;\n+}\n+\n+cv::ogl::Program::Impl::~Impl()\n+{\n+ if (autoRelease_ && programId_)\n+ gl::DeleteProgram(programId_);\n+}\n+\n+void cv::ogl::Program::Impl::bind()\n+{\n+ gl::UseProgram(programId_);\n+ CV_CheckGlError();\n+}\n+\n+int cv::ogl::Program::Impl::getAttributeLocation(const char* name) const\n+{\n+ int location = gl::GetAttribLocation(programId_, name);\n+ if (location < 0)\n+ {\n+ CV_Error(Error::OpenGlApiCallError, \"Attribute not found\");\n+ }\n+ CV_CheckGlError();\n+ return location;\n+}\n+\n+int cv::ogl::Program::Impl::getUniformLocation(const char* name) const\n+{\n+ int location = gl::GetUniformLocation(programId_, name);\n+ if (location < 0)\n+ {\n+ CV_Error(Error::OpenGlApiCallError, \"Attribute not found\");\n+ }\n+ CV_CheckGlError();\n+ return location;\n+}\n+\n+#endif // HAVE_OPENGL\n+\n+cv::ogl::Program::Program()\n+{\n+#ifndef HAVE_OPENGL\n+ throw_no_ogl();\n+#else\n+ impl_ = Impl::empty();\n+#endif\n+}\n+\n+cv::ogl::Program::Program(Shader vert, Shader frag, bool autoRelease)\n+{\n+#ifndef HAVE_OPENGL\n+ CV_UNUSED(vert);\n+ CV_UNUSED(frag);\n+ CV_UNUSED(autoRelease);\n+ throw_no_ogl();\n+#else\n+ impl_.reset(new Impl(vert, frag, autoRelease));\n+#endif\n+}\n+\n+void cv::ogl::Program::create(Shader vert, Shader frag, bool autoRelease)\n+{\n+#ifndef HAVE_OPENGL\n+ CV_UNUSED(vert);\n+ CV_UNUSED(frag);\n+ CV_UNUSED(autoRelease);\n+ throw_no_ogl();\n+#else\n+ impl_.reset(new Impl(vert, frag, autoRelease));\n+#endif\n+}\n+\n+void cv::ogl::Program::release()\n+{\n+#ifdef HAVE_OPENGL\n+ if (impl_)\n+ impl_->setAutoRelease(true);\n+ impl_ = Impl::empty();\n+#endif\n+}\n+\n+void cv::ogl::Program::setAutoRelease(bool flag)\n+{\n+#ifndef HAVE_OPENGL\n+ CV_UNUSED(flag);\n+ throw_no_ogl();\n+#else\n+ impl_->setAutoRelease(flag);\n+#endif\n+}\n+\n+void cv::ogl::Program::bind() const\n+{\n+#ifndef HAVE_OPENGL\n+ throw_no_ogl();\n+#else\n+ impl_->bind();\n+#endif\n+}\n+\n+void cv::ogl::Program::unbind()\n+{\n+#ifndef HAVE_OPENGL\n+ throw_no_ogl();\n+#else\n+ gl::UseProgram(0);\n+#endif\n+}\n+\n+int cv::ogl::Program::getAttributeLocation(const char* name) const\n+{\n+#ifndef HAVE_OPENGL\n+ throw_no_ogl();\n+#else\n+ return impl_->getAttributeLocation(name);\n+#endif\n+}\n+\n+int cv::ogl::Program::getUniformLocation(const char* name) const\n+{\n+#ifndef HAVE_OPENGL\n+ throw_no_ogl();\n+#else\n+ return impl_->getUniformLocation(name);\n+#endif\n+}\n+\n+void cv::ogl::Program::setUniformVec3(int loc, Vec3f vec)\n+{\n+#ifndef HAVE_OPENGL\n+ throw_no_ogl();\n+#else\n+ gl::Uniform3f(loc, vec(0), vec(1), vec(2));\n+#endif\n+}\n+\n+void cv::ogl::Program::setUniformMat4x4(int loc, Matx44f mat)\n+{\n+#ifndef HAVE_OPENGL\n+ throw_no_ogl();\n+#else\n+ gl::UniformMatrix4fv(loc, 1, true, mat.val);\n+#endif\n+}\n+\n+unsigned int cv::ogl::Program::programId() const\n+{\n+#ifndef HAVE_OPENGL\n+ throw_no_ogl();\n+#else\n+ return impl_->programId();\n+#endif\n+}\n+", - "comment_created_at": "2021-10-07T18:42:17+00:00", - "comment_author": "JulieBar", - "comment_body": "please reduce the amount of duplicated code if possible. you can implement methods: empty, create, release, setAutoRelease, bind, unbind, and then re-use it. ", - "pr_file_module": null - }, - { - "comment_id": "729655562", - "repo_full_name": "opencv/opencv", - "pr_number": 20371, - "pr_file": "modules/core/src/opengl.cpp", - "discussion_id": "724440487", - "commented_code": "@@ -1405,6 +1412,574 @@ void cv::ogl::Arrays::bind() const\n #endif\n }\n \n+\n+////////////////////////////////////////////////////////////////////////\n+// ogl::Attribute\n+\n+cv::ogl::Attribute::Attribute(Buffer buffer, size_t stride, size_t offset, int size, Type type, bool integer, bool normalized, unsigned int shader_loc)\n+{\n+ buffer_ = buffer;\n+ stride_ = stride;\n+ offset_ = offset;\n+ size_ = size;\n+ type_ = type;\n+ integer_ = integer;\n+ normalized_ = normalized;\n+ shader_loc_ = shader_loc;\n+}\n+\n+////////////////////////////////////////////////////////////////////////\n+// ogl::VertexArray\n+\n+#ifndef HAVE_OPENGL\n+\n+class cv::ogl::VertexArray::Impl\n+{\n+};\n+\n+#else\n+\n+class cv::ogl::VertexArray::Impl\n+{\n+public:\n+ static const Ptr& empty();\n+\n+ Impl(GLuint vaId, bool autoRelease);\n+ Impl(std::initializer_list attributes, bool autoRelease);\n+ ~Impl();\n+\n+ void bind();\n+\n+ void setAutoRelease(bool flag) { autoRelease_ = flag; }\n+\n+ GLuint vaId() const { return vaId_; }\n+\n+private:\n+ Impl();\n+\n+ GLuint vaId_;\n+ bool autoRelease_;\n+};\n+\n+const Ptr& cv::ogl::VertexArray::Impl::empty()\n+{\n+ static Ptr p(new Impl);\n+ return p;\n+}\n+\n+cv::ogl::VertexArray::Impl::Impl() : vaId_(0), autoRelease_(false)\n+{\n+}\n+\n+cv::ogl::VertexArray::Impl::Impl(GLuint vaId, bool autoRelease) : vaId_(vaId), autoRelease_(autoRelease)\n+{\n+ CV_Assert( gl::IsVertexArray(vaId) == gl::TRUE_ );\n+}\n+\n+cv::ogl::VertexArray::Impl::Impl(std::initializer_list attributes, bool autoRelease) : vaId_(0), autoRelease_(autoRelease)\n+{\n+ gl::GenVertexArrays(1, &vaId_);\n+ CV_CheckGlError();\n+\n+ CV_Assert( vaId_ != 0 );\n+ gl::BindVertexArray(vaId_);\n+ CV_CheckGlError();\n+\n+ for (auto& attribute : attributes)\n+ {\n+ attribute.buffer_.bind(Buffer::ARRAY_BUFFER);\n+ if (attribute.integer_)\n+ {\n+ gl::VertexAttribIPointer(\n+ attribute.shader_loc_,\n+ attribute.size_,\n+ attribute.type_,\n+ attribute.stride_,\n+ (const void*)attribute.offset_\n+ );\n+ CV_CheckGlError();\n+ }\n+ else\n+ {\n+ gl::VertexAttribPointer(\n+ attribute.shader_loc_,\n+ attribute.size_,\n+ attribute.type_,\n+ attribute.normalized_,\n+ attribute.stride_,\n+ (const void*)attribute.offset_\n+ );\n+ CV_CheckGlError();\n+ }\n+\n+ gl::EnableVertexAttribArray(attribute.shader_loc_);\n+ CV_CheckGlError();\n+ }\n+}\n+\n+cv::ogl::VertexArray::Impl::~Impl()\n+{\n+ if (autoRelease_ && vaId_)\n+ gl::DeleteVertexArrays(1, &vaId_);\n+}\n+\n+void cv::ogl::VertexArray::Impl::bind()\n+{\n+ gl::BindVertexArray(vaId_);\n+ CV_CheckGlError();\n+}\n+\n+#endif // HAVE_OPENGL\n+\n+cv::ogl::VertexArray::VertexArray()\n+{\n+#ifndef HAVE_OPENGL\n+ throw_no_ogl();\n+#else\n+ impl_ = Impl::empty();\n+#endif\n+}\n+\n+cv::ogl::VertexArray::VertexArray(std::initializer_list attributes, bool autoRelease)\n+{\n+#ifndef HAVE_OPENGL\n+ CV_UNUSED(attributes);\n+ CV_UNUSED(autoRelease);\n+ throw_no_ogl();\n+#else\n+ impl_.reset(new Impl(attributes, autoRelease));\n+#endif\n+}\n+\n+void cv::ogl::VertexArray::create(std::initializer_list attributes, bool autoRelease)\n+{\n+#ifndef HAVE_OPENGL\n+ CV_UNUSED(attributes);\n+ CV_UNUSED(autoRelease);\n+ throw_no_ogl();\n+#else\n+ impl_.reset(new Impl(attributes, autoRelease));\n+#endif\n+}\n+\n+void cv::ogl::VertexArray::release()\n+{\n+#ifdef HAVE_OPENGL\n+ if (impl_)\n+ impl_->setAutoRelease(true);\n+ impl_ = Impl::empty();\n+#endif\n+}\n+\n+void cv::ogl::VertexArray::setAutoRelease(bool flag)\n+{\n+#ifndef HAVE_OPENGL\n+ CV_UNUSED(flag);\n+ throw_no_ogl();\n+#else\n+ impl_->setAutoRelease(flag);\n+#endif\n+}\n+\n+void cv::ogl::VertexArray::bind() const\n+{\n+#ifndef HAVE_OPENGL\n+ throw_no_ogl();\n+#else\n+ impl_->bind();\n+#endif\n+}\n+\n+void cv::ogl::VertexArray::unbind()\n+{\n+#ifndef HAVE_OPENGL\n+ throw_no_ogl();\n+#else\n+ gl::BindVertexArray(0);\n+#endif\n+}\n+\n+unsigned int cv::ogl::VertexArray::vaId() const\n+{\n+#ifndef HAVE_OPENGL\n+ throw_no_ogl();\n+#else\n+ return impl_->vaId();\n+#endif\n+}\n+\n+////////////////////////////////////////////////////////////////////////\n+// ogl::Shader\n+\n+#ifndef HAVE_OPENGL\n+\n+class cv::ogl::Shader::Impl\n+{\n+};\n+\n+#else\n+\n+class cv::ogl::Shader::Impl\n+{\n+public:\n+ static const Ptr& empty();\n+\n+ Impl(GLuint shaderId, bool autoRelease);\n+ Impl(const char* src, GLenum type, bool autoRelease);\n+ ~Impl();\n+\n+ void setAutoRelease(bool flag) { autoRelease_ = flag; }\n+\n+ GLuint shaderId() const { return shaderId_; }\n+\n+private:\n+ Impl();\n+\n+ GLuint shaderId_;\n+ bool autoRelease_;\n+};\n+\n+const Ptr& cv::ogl::Shader::Impl::empty()\n+{\n+ static Ptr p(new Impl);\n+ return p;\n+}\n+\n+cv::ogl::Shader::Impl::Impl() : shaderId_(0), autoRelease_(false)\n+{\n+}\n+\n+cv::ogl::Shader::Impl::Impl(GLuint shaderId, bool autoRelease) : shaderId_(shaderId), autoRelease_(autoRelease)\n+{\n+ CV_Assert( gl::IsShader(shaderId) == gl::TRUE_ );\n+}\n+\n+cv::ogl::Shader::Impl::Impl(const char* src, GLenum type, bool autoRelease) : shaderId_(0), autoRelease_(autoRelease)\n+{\n+ shaderId_ = gl::CreateShader(type);\n+ CV_CheckGlError();\n+\n+ CV_Assert( shaderId_ != 0 );\n+\n+ GLint status;\n+ gl::ShaderSource(shaderId_, 1, &src, 0);\n+ gl::CompileShader(shaderId_);\n+ gl::GetShaderiv(shaderId_, gl::COMPILE_STATUS, &status);\n+ if (!status) {\n+ String info_log;\n+ gl::GetShaderiv(shaderId_, gl::INFO_LOG_LENGTH, &status);\n+ info_log.resize(status);\n+ gl::GetShaderInfoLog(shaderId_, GLsizei(info_log.size()) + 1, nullptr, &info_log[0]);\n+ CV_Error(Error::OpenGlApiCallError, info_log);\n+ }\n+\n+ CV_CheckGlError();\n+}\n+\n+cv::ogl::Shader::Impl::~Impl()\n+{\n+ if (autoRelease_ && shaderId_)\n+ gl::DeleteShader(shaderId_);\n+}\n+\n+#endif // HAVE_OPENGL\n+\n+cv::ogl::Shader::Shader() : type_(Type::VERTEX)\n+{\n+#ifndef HAVE_OPENGL\n+ throw_no_ogl();\n+#else\n+ impl_ = Impl::empty();\n+#endif\n+}\n+\n+cv::ogl::Shader::Shader(const char* src, Type type, bool autoRelease) : type_(type)\n+{\n+#ifndef HAVE_OPENGL\n+ CV_UNUSED(src);\n+ CV_UNUSED(type);\n+ CV_UNUSED(autoRelease);\n+ throw_no_ogl();\n+#else\n+ impl_.reset(new Impl(src, type, autoRelease));\n+#endif\n+}\n+\n+void cv::ogl::Shader::create(const char* src, Type type, bool autoRelease)\n+{\n+#ifndef HAVE_OPENGL\n+ CV_UNUSED(src);\n+ CV_UNUSED(type);\n+ CV_UNUSED(autoRelease);\n+ throw_no_ogl();\n+#else\n+ impl_.reset(new Impl(src, type, autoRelease));\n+ type_ = type;\n+#endif\n+}\n+\n+void cv::ogl::Shader::release()\n+{\n+#ifdef HAVE_OPENGL\n+ if (impl_)\n+ impl_->setAutoRelease(true);\n+ impl_ = Impl::empty();\n+#endif\n+}\n+\n+void cv::ogl::Shader::setAutoRelease(bool flag)\n+{\n+#ifndef HAVE_OPENGL\n+ CV_UNUSED(flag);\n+ throw_no_ogl();\n+#else\n+ impl_->setAutoRelease(flag);\n+#endif\n+}\n+\n+unsigned int cv::ogl::Shader::shaderId() const\n+{\n+#ifndef HAVE_OPENGL\n+ throw_no_ogl();\n+#else\n+ return impl_->shaderId();\n+#endif\n+}\n+\n+////////////////////////////////////////////////////////////////////////\n+// ogl::Program\n+\n+#ifndef HAVE_OPENGL\n+\n+class cv::ogl::Program::Impl\n+{\n+};\n+\n+#else\n+\n+class cv::ogl::Program::Impl\n+{\n+public:\n+ static const Ptr& empty();\n+\n+ Impl(GLuint programId, bool autoRelease);\n+ Impl(Shader vert, Shader frag, bool autoRelease);\n+ ~Impl();\n+\n+ void bind();\n+\n+ int getAttributeLocation(const char* name) const;\n+ int getUniformLocation(const char* name) const;\n+\n+ void setAutoRelease(bool flag) { autoRelease_ = flag; }\n+\n+ GLuint programId() const { return programId_; }\n+\n+private:\n+ Impl();\n+\n+ Shader vert_;\n+ Shader frag_;\n+ GLuint programId_;\n+ bool autoRelease_;\n+};\n+\n+const Ptr& cv::ogl::Program::Impl::empty()\n+{\n+ static Ptr p(new Impl);\n+ return p;\n+}\n+\n+cv::ogl::Program::Impl::Impl() : programId_(0), autoRelease_(false)\n+{\n+}\n+\n+cv::ogl::Program::Impl::Impl(GLuint programId, bool autoRelease) : programId_(programId), autoRelease_(autoRelease)\n+{\n+ CV_Assert( gl::IsProgram(programId) == gl::TRUE_ );\n+}\n+\n+cv::ogl::Program::Impl::Impl(Shader vert, Shader frag, bool autoRelease) : programId_(0), autoRelease_(autoRelease)\n+{\n+ programId_ = gl::CreateProgram();\n+ CV_CheckGlError();\n+\n+ CV_Assert( programId_ != 0 );\n+\n+ gl::AttachShader(programId_, vert.shaderId());\n+ CV_CheckGlError();\n+\n+ gl::AttachShader(programId_, frag.shaderId());\n+ CV_CheckGlError();\n+\n+ GLint status;\n+ gl::LinkProgram(programId_);\n+ gl::GetProgramiv(programId_, gl::LINK_STATUS, &status);\n+ if (!status) {\n+ String info_log;\n+ gl::GetProgramiv(programId_, gl::INFO_LOG_LENGTH, &status);\n+ info_log.resize(status);\n+ gl::GetProgramInfoLog(programId_, GLsizei(info_log.size()) + 1, nullptr, &info_log[0]);\n+ CV_Error(Error::OpenGlApiCallError, info_log);\n+ }\n+ \n+ CV_CheckGlError();\n+\n+ vert_ = vert;\n+ frag_ = frag;\n+}\n+\n+cv::ogl::Program::Impl::~Impl()\n+{\n+ if (autoRelease_ && programId_)\n+ gl::DeleteProgram(programId_);\n+}\n+\n+void cv::ogl::Program::Impl::bind()\n+{\n+ gl::UseProgram(programId_);\n+ CV_CheckGlError();\n+}\n+\n+int cv::ogl::Program::Impl::getAttributeLocation(const char* name) const\n+{\n+ int location = gl::GetAttribLocation(programId_, name);\n+ if (location < 0)\n+ {\n+ CV_Error(Error::OpenGlApiCallError, \"Attribute not found\");\n+ }\n+ CV_CheckGlError();\n+ return location;\n+}\n+\n+int cv::ogl::Program::Impl::getUniformLocation(const char* name) const\n+{\n+ int location = gl::GetUniformLocation(programId_, name);\n+ if (location < 0)\n+ {\n+ CV_Error(Error::OpenGlApiCallError, \"Attribute not found\");\n+ }\n+ CV_CheckGlError();\n+ return location;\n+}\n+\n+#endif // HAVE_OPENGL\n+\n+cv::ogl::Program::Program()\n+{\n+#ifndef HAVE_OPENGL\n+ throw_no_ogl();\n+#else\n+ impl_ = Impl::empty();\n+#endif\n+}\n+\n+cv::ogl::Program::Program(Shader vert, Shader frag, bool autoRelease)\n+{\n+#ifndef HAVE_OPENGL\n+ CV_UNUSED(vert);\n+ CV_UNUSED(frag);\n+ CV_UNUSED(autoRelease);\n+ throw_no_ogl();\n+#else\n+ impl_.reset(new Impl(vert, frag, autoRelease));\n+#endif\n+}\n+\n+void cv::ogl::Program::create(Shader vert, Shader frag, bool autoRelease)\n+{\n+#ifndef HAVE_OPENGL\n+ CV_UNUSED(vert);\n+ CV_UNUSED(frag);\n+ CV_UNUSED(autoRelease);\n+ throw_no_ogl();\n+#else\n+ impl_.reset(new Impl(vert, frag, autoRelease));\n+#endif\n+}\n+\n+void cv::ogl::Program::release()\n+{\n+#ifdef HAVE_OPENGL\n+ if (impl_)\n+ impl_->setAutoRelease(true);\n+ impl_ = Impl::empty();\n+#endif\n+}\n+\n+void cv::ogl::Program::setAutoRelease(bool flag)\n+{\n+#ifndef HAVE_OPENGL\n+ CV_UNUSED(flag);\n+ throw_no_ogl();\n+#else\n+ impl_->setAutoRelease(flag);\n+#endif\n+}\n+\n+void cv::ogl::Program::bind() const\n+{\n+#ifndef HAVE_OPENGL\n+ throw_no_ogl();\n+#else\n+ impl_->bind();\n+#endif\n+}\n+\n+void cv::ogl::Program::unbind()\n+{\n+#ifndef HAVE_OPENGL\n+ throw_no_ogl();\n+#else\n+ gl::UseProgram(0);\n+#endif\n+}\n+\n+int cv::ogl::Program::getAttributeLocation(const char* name) const\n+{\n+#ifndef HAVE_OPENGL\n+ throw_no_ogl();\n+#else\n+ return impl_->getAttributeLocation(name);\n+#endif\n+}\n+\n+int cv::ogl::Program::getUniformLocation(const char* name) const\n+{\n+#ifndef HAVE_OPENGL\n+ throw_no_ogl();\n+#else\n+ return impl_->getUniformLocation(name);\n+#endif\n+}\n+\n+void cv::ogl::Program::setUniformVec3(int loc, Vec3f vec)\n+{\n+#ifndef HAVE_OPENGL\n+ throw_no_ogl();\n+#else\n+ gl::Uniform3f(loc, vec(0), vec(1), vec(2));\n+#endif\n+}\n+\n+void cv::ogl::Program::setUniformMat4x4(int loc, Matx44f mat)\n+{\n+#ifndef HAVE_OPENGL\n+ throw_no_ogl();\n+#else\n+ gl::UniformMatrix4fv(loc, 1, true, mat.val);\n+#endif\n+}\n+\n+unsigned int cv::ogl::Program::programId() const\n+{\n+#ifndef HAVE_OPENGL\n+ throw_no_ogl();\n+#else\n+ return impl_->programId();\n+#endif\n+}\n+", - "comment_created_at": "2021-10-15T09:12:14+00:00", - "comment_author": "RiscadoA", - "comment_body": "Can you clarify what you mean by duplicated code and those methods? I'm not seeing how it relates to the code section you commented. If the duplicated code you're referring to is those #ifndef #else #endif macros, I don't think it is possible to remove them.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "724474609", - "pr_number": 20371, - "pr_file": "modules/highgui/src/viz3d/viz3d.cpp", - "created_at": "2021-10-07T19:34:06+00:00", - "commented_code": "// This file is part of OpenCV project.\n// It is subject to the license terms in the LICENSE file found in the top-level directory\n// of this distribution and at http://opencv.org/license.html.\n\n#include \"../precomp.hpp\"\n#include \"viz3d_private.hpp\"\n#include \"opencv2/core/utils/logger.hpp\"\n#include \"opencv2/imgproc.hpp\"\n\nusing namespace cv;\nusing namespace cv::viz3d;\n\n#ifdef HAVE_OPENGL\n\nstatic void openGlDrawCallback(void* data)\n{\n Window* win = static_cast(data);\n if (win != nullptr)\n win->draw();\n}\n\nstatic void openGlFreeCallback(void* data)\n{\n Window* win = static_cast(data);\n if (win != nullptr)\n delete win;\n}\n\nstatic void mouseCallback(int event, int x, int y, int flags, void* data)\n{\n Window* win = static_cast(data);\n if (win != nullptr)\n win->onMouse(event, x, y, flags);\n}\n\nstatic Window* getWindow(const String& win_name)\n{\n namedWindow(win_name, WINDOW_OPENGL);\n const double useGl = getWindowProperty(win_name, WND_PROP_OPENGL);\n if (useGl <= 0)\n CV_Error(cv::Error::StsBadArg, \"OpenCV/UI: viz3d can't be used because the window was created without an OpenGL context\");\n\n Window* win = static_cast(getOpenGlUserData(win_name));\n\n if (!win)\n {\n\t\tsetOpenGlContext(win_name);\n\t\twin = new Window(win_name);\n\t\tsetOpenGlDrawCallback(win_name, &openGlDrawCallback, win);\n setOpenGlFreeCallback(win_name, &openGlFreeCallback);\n setMouseCallback(win_name, &mouseCallback, win);\n }\n else\n {\n auto callback = getOpenGlDrawCallback(win_name);\n if (callback != nullptr && callback != openGlDrawCallback)\n CV_Error(cv::Error::StsBadArg, \"OpenCV/UI: viz3d can't be used because the OpenGL callback is already being used on this window\");\n }\n\n return win;\n}\n\n#endif // HAVE_OPENGL\n\nvoid cv::viz3d::setPerspective(const String& win_name, float fov, float z_near, float z_far)\n{\n CV_TRACE_FUNCTION();\n#ifndef HAVE_OPENGL\n CV_UNUSED(win_name);\n CV_UNUSED(fov);\n CV_UNUSED(z_near);\n CV_UNUSED(z_far);\n CV_Error(cv::Error::OpenGlNotSupported, \"The library is compiled without OpenGL support\");\n#else\n Window* win = getWindow(win_name);\n win->getView().setPerspective(fov, z_near, z_far);\n updateWindow(win_name);\n#endif\n}\n\nvoid cv::viz3d::setGridVisible(const String& win_name, bool visible)\n{\n CV_TRACE_FUNCTION();\n#ifndef HAVE_OPENGL\n CV_UNUSED(win_name);\n CV_UNUSED(visible);\n CV_Error(cv::Error::OpenGlNotSupported, \"The library is compiled without OpenGL support\");\n#else\n Window* win = getWindow(win_name);\n win->setGridVisible(visible);\n updateWindow(win_name);\n#endif\n}\n\nvoid cv::viz3d::setSun(const String& win_name, const Vec3f& direction, const Vec3f& ambient, const Vec3f& diffuse)\n{\n CV_TRACE_FUNCTION();\n#ifndef HAVE_OPENGL\n CV_UNUSED(win_name);\n CV_UNUSED(direction);\n CV_UNUSED(ambient);\n CV_UNUSED(diffuse);\n CV_Error(cv::Error::OpenGlNotSupported, \"The library is compiled without OpenGL support\");\n#else\n Window* win = getWindow(win_name);\n win->setSun(direction, ambient, diffuse);\n updateWindow(win_name);\n#endif\n}\n\nvoid cv::viz3d::setSky(const String& win_name, const Vec3f& color)\n{\n CV_TRACE_FUNCTION();\n#ifndef HAVE_OPENGL\n CV_UNUSED(win_name);\n CV_UNUSED(color);\n CV_Error(cv::Error::OpenGlNotSupported, \"The library is compiled without OpenGL support\");\n#else\n Window* win = getWindow(win_name);\n win->setSky(color);\n updateWindow(win_name);\n#endif\n}\n\nvoid cv::viz3d::showBox(const String& win_name, const String& obj_name, const Vec3f& size, const Vec3f& color, RenderMode mode)\n{\n CV_TRACE_FUNCTION();\n#ifndef HAVE_OPENGL\n CV_UNUSED(win_name);\n CV_UNUSED(obj_name);\n CV_UNUSED(size);\n CV_UNUSED(color);\n CV_UNUSED(mode);\n CV_Error(cv::Error::OpenGlNotSupported, \"The library is compiled without OpenGL support\");\n#else\n if (mode == RENDER_WIREFRAME)\n {\n float points_data[] = {\n -size(0), -size(1), -size(2), color(0), color(1), color(2),\n +size(0), -size(1), -size(2), color(0), color(1), color(2),\n -size(0), +size(1), -size(2), color(0), color(1), color(2),\n +size(0), +size(1), -size(2), color(0), color(1), color(2),\n -size(0), -size(1), +size(2), color(0), color(1), color(2),\n +size(0), -size(1), +size(2), color(0), color(1), color(2),\n -size(0), +size(1), +size(2), color(0), color(1), color(2),\n +size(0), +size(1), +size(2), color(0), color(1), color(2),\n\n -size(0), -size(1), -size(2), color(0), color(1), color(2),\n -size(0), +size(1), -size(2), color(0), color(1), color(2),\n +size(0), -size(1), -size(2), color(0), color(1), color(2),\n +size(0), +size(1), -size(2), color(0), color(1), color(2),\n -size(0), -size(1), +size(2), color(0), color(1), color(2),\n -size(0), +size(1), +size(2), color(0), color(1), color(2),\n +size(0), -size(1), +size(2), color(0), color(1), color(2),\n +size(0), +size(1), +size(2), color(0), color(1), color(2),\n\n -size(0), -size(1), -size(2), color(0), color(1), color(2),\n -size(0), -size(1), +size(2), color(0), color(1), color(2),\n +size(0), -size(1), -size(2), color(0), color(1), color(2),\n +size(0), -size(1), +size(2), color(0), color(1), color(2),\n -size(0), +size(1), -size(2), color(0), color(1), color(2),\n -size(0), +size(1), +size(2), color(0), color(1), color(2),\n +size(0), +size(1), -size(2), color(0), color(1), color(2),\n +size(0), +size(1), +size(2), color(0), color(1), color(2),\n };\n\n const Mat points_mat = Mat(Size(6, 24), CV_32F, &points_data);", - "repo_full_name": "opencv/opencv", - "discussion_comments": [ - { - "comment_id": "724474609", - "repo_full_name": "opencv/opencv", - "pr_number": 20371, - "pr_file": "modules/highgui/src/viz3d/viz3d.cpp", - "discussion_id": "724474609", - "commented_code": "@@ -0,0 +1,1671 @@\n+// This file is part of OpenCV project.\n+// It is subject to the license terms in the LICENSE file found in the top-level directory\n+// of this distribution and at http://opencv.org/license.html.\n+\n+#include \"../precomp.hpp\"\n+#include \"viz3d_private.hpp\"\n+#include \"opencv2/core/utils/logger.hpp\"\n+#include \"opencv2/imgproc.hpp\"\n+\n+using namespace cv;\n+using namespace cv::viz3d;\n+\n+#ifdef HAVE_OPENGL\n+\n+static void openGlDrawCallback(void* data)\n+{\n+ Window* win = static_cast(data);\n+ if (win != nullptr)\n+ win->draw();\n+}\n+\n+static void openGlFreeCallback(void* data)\n+{\n+ Window* win = static_cast(data);\n+ if (win != nullptr)\n+ delete win;\n+}\n+\n+static void mouseCallback(int event, int x, int y, int flags, void* data)\n+{\n+ Window* win = static_cast(data);\n+ if (win != nullptr)\n+ win->onMouse(event, x, y, flags);\n+}\n+\n+static Window* getWindow(const String& win_name)\n+{\n+ namedWindow(win_name, WINDOW_OPENGL);\n+ const double useGl = getWindowProperty(win_name, WND_PROP_OPENGL);\n+ if (useGl <= 0)\n+ CV_Error(cv::Error::StsBadArg, \"OpenCV/UI: viz3d can't be used because the window was created without an OpenGL context\");\n+\n+ Window* win = static_cast(getOpenGlUserData(win_name));\n+\n+ if (!win)\n+ {\n+\t\tsetOpenGlContext(win_name);\n+\t\twin = new Window(win_name);\n+\t\tsetOpenGlDrawCallback(win_name, &openGlDrawCallback, win);\n+ setOpenGlFreeCallback(win_name, &openGlFreeCallback);\n+ setMouseCallback(win_name, &mouseCallback, win);\n+ }\n+ else\n+ {\n+ auto callback = getOpenGlDrawCallback(win_name);\n+ if (callback != nullptr && callback != openGlDrawCallback)\n+ CV_Error(cv::Error::StsBadArg, \"OpenCV/UI: viz3d can't be used because the OpenGL callback is already being used on this window\");\n+ }\n+\n+ return win;\n+}\n+\n+#endif // HAVE_OPENGL\n+\n+void cv::viz3d::setPerspective(const String& win_name, float fov, float z_near, float z_far)\n+{\n+ CV_TRACE_FUNCTION();\n+#ifndef HAVE_OPENGL\n+ CV_UNUSED(win_name);\n+ CV_UNUSED(fov);\n+ CV_UNUSED(z_near);\n+ CV_UNUSED(z_far);\n+ CV_Error(cv::Error::OpenGlNotSupported, \"The library is compiled without OpenGL support\");\n+#else\n+ Window* win = getWindow(win_name);\n+ win->getView().setPerspective(fov, z_near, z_far);\n+ updateWindow(win_name);\n+#endif\n+}\n+\n+void cv::viz3d::setGridVisible(const String& win_name, bool visible)\n+{\n+ CV_TRACE_FUNCTION();\n+#ifndef HAVE_OPENGL\n+ CV_UNUSED(win_name);\n+ CV_UNUSED(visible);\n+ CV_Error(cv::Error::OpenGlNotSupported, \"The library is compiled without OpenGL support\");\n+#else\n+ Window* win = getWindow(win_name);\n+ win->setGridVisible(visible);\n+ updateWindow(win_name);\n+#endif\n+}\n+\n+void cv::viz3d::setSun(const String& win_name, const Vec3f& direction, const Vec3f& ambient, const Vec3f& diffuse)\n+{\n+ CV_TRACE_FUNCTION();\n+#ifndef HAVE_OPENGL\n+ CV_UNUSED(win_name);\n+ CV_UNUSED(direction);\n+ CV_UNUSED(ambient);\n+ CV_UNUSED(diffuse);\n+ CV_Error(cv::Error::OpenGlNotSupported, \"The library is compiled without OpenGL support\");\n+#else\n+ Window* win = getWindow(win_name);\n+ win->setSun(direction, ambient, diffuse);\n+ updateWindow(win_name);\n+#endif\n+}\n+\n+void cv::viz3d::setSky(const String& win_name, const Vec3f& color)\n+{\n+ CV_TRACE_FUNCTION();\n+#ifndef HAVE_OPENGL\n+ CV_UNUSED(win_name);\n+ CV_UNUSED(color);\n+ CV_Error(cv::Error::OpenGlNotSupported, \"The library is compiled without OpenGL support\");\n+#else\n+ Window* win = getWindow(win_name);\n+ win->setSky(color);\n+ updateWindow(win_name);\n+#endif\n+}\n+\n+void cv::viz3d::showBox(const String& win_name, const String& obj_name, const Vec3f& size, const Vec3f& color, RenderMode mode)\n+{\n+ CV_TRACE_FUNCTION();\n+#ifndef HAVE_OPENGL\n+ CV_UNUSED(win_name);\n+ CV_UNUSED(obj_name);\n+ CV_UNUSED(size);\n+ CV_UNUSED(color);\n+ CV_UNUSED(mode);\n+ CV_Error(cv::Error::OpenGlNotSupported, \"The library is compiled without OpenGL support\");\n+#else\n+ if (mode == RENDER_WIREFRAME)\n+ {\n+ float points_data[] = {\n+ -size(0), -size(1), -size(2), color(0), color(1), color(2),\n+ +size(0), -size(1), -size(2), color(0), color(1), color(2),\n+ -size(0), +size(1), -size(2), color(0), color(1), color(2),\n+ +size(0), +size(1), -size(2), color(0), color(1), color(2),\n+ -size(0), -size(1), +size(2), color(0), color(1), color(2),\n+ +size(0), -size(1), +size(2), color(0), color(1), color(2),\n+ -size(0), +size(1), +size(2), color(0), color(1), color(2),\n+ +size(0), +size(1), +size(2), color(0), color(1), color(2),\n+\n+ -size(0), -size(1), -size(2), color(0), color(1), color(2),\n+ -size(0), +size(1), -size(2), color(0), color(1), color(2),\n+ +size(0), -size(1), -size(2), color(0), color(1), color(2),\n+ +size(0), +size(1), -size(2), color(0), color(1), color(2),\n+ -size(0), -size(1), +size(2), color(0), color(1), color(2),\n+ -size(0), +size(1), +size(2), color(0), color(1), color(2),\n+ +size(0), -size(1), +size(2), color(0), color(1), color(2),\n+ +size(0), +size(1), +size(2), color(0), color(1), color(2),\n+\n+ -size(0), -size(1), -size(2), color(0), color(1), color(2),\n+ -size(0), -size(1), +size(2), color(0), color(1), color(2),\n+ +size(0), -size(1), -size(2), color(0), color(1), color(2),\n+ +size(0), -size(1), +size(2), color(0), color(1), color(2),\n+ -size(0), +size(1), -size(2), color(0), color(1), color(2),\n+ -size(0), +size(1), +size(2), color(0), color(1), color(2),\n+ +size(0), +size(1), -size(2), color(0), color(1), color(2),\n+ +size(0), +size(1), +size(2), color(0), color(1), color(2),\n+ };\n+\n+ const Mat points_mat = Mat(Size(6, 24), CV_32F, &points_data);", - "comment_created_at": "2021-10-07T19:34:06+00:00", - "comment_author": "JulieBar", - "comment_body": "do not use magic numbers 6, 24; get it from the size of previous array", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "724475444", - "pr_number": 20371, - "pr_file": "modules/highgui/src/viz3d/viz3d.cpp", - "created_at": "2021-10-07T19:35:28+00:00", - "commented_code": "// This file is part of OpenCV project.\n// It is subject to the license terms in the LICENSE file found in the top-level directory\n// of this distribution and at http://opencv.org/license.html.\n\n#include \"../precomp.hpp\"\n#include \"viz3d_private.hpp\"\n#include \"opencv2/core/utils/logger.hpp\"\n#include \"opencv2/imgproc.hpp\"\n\nusing namespace cv;\nusing namespace cv::viz3d;\n\n#ifdef HAVE_OPENGL\n\nstatic void openGlDrawCallback(void* data)\n{\n Window* win = static_cast(data);\n if (win != nullptr)\n win->draw();\n}\n\nstatic void openGlFreeCallback(void* data)\n{\n Window* win = static_cast(data);\n if (win != nullptr)\n delete win;\n}\n\nstatic void mouseCallback(int event, int x, int y, int flags, void* data)\n{\n Window* win = static_cast(data);\n if (win != nullptr)\n win->onMouse(event, x, y, flags);\n}\n\nstatic Window* getWindow(const String& win_name)\n{\n namedWindow(win_name, WINDOW_OPENGL);\n const double useGl = getWindowProperty(win_name, WND_PROP_OPENGL);\n if (useGl <= 0)\n CV_Error(cv::Error::StsBadArg, \"OpenCV/UI: viz3d can't be used because the window was created without an OpenGL context\");\n\n Window* win = static_cast(getOpenGlUserData(win_name));\n\n if (!win)\n {\n\t\tsetOpenGlContext(win_name);\n\t\twin = new Window(win_name);\n\t\tsetOpenGlDrawCallback(win_name, &openGlDrawCallback, win);\n setOpenGlFreeCallback(win_name, &openGlFreeCallback);\n setMouseCallback(win_name, &mouseCallback, win);\n }\n else\n {\n auto callback = getOpenGlDrawCallback(win_name);\n if (callback != nullptr && callback != openGlDrawCallback)\n CV_Error(cv::Error::StsBadArg, \"OpenCV/UI: viz3d can't be used because the OpenGL callback is already being used on this window\");\n }\n\n return win;\n}\n\n#endif // HAVE_OPENGL\n\nvoid cv::viz3d::setPerspective(const String& win_name, float fov, float z_near, float z_far)\n{\n CV_TRACE_FUNCTION();\n#ifndef HAVE_OPENGL\n CV_UNUSED(win_name);\n CV_UNUSED(fov);\n CV_UNUSED(z_near);\n CV_UNUSED(z_far);\n CV_Error(cv::Error::OpenGlNotSupported, \"The library is compiled without OpenGL support\");\n#else\n Window* win = getWindow(win_name);\n win->getView().setPerspective(fov, z_near, z_far);\n updateWindow(win_name);\n#endif\n}\n\nvoid cv::viz3d::setGridVisible(const String& win_name, bool visible)\n{\n CV_TRACE_FUNCTION();\n#ifndef HAVE_OPENGL\n CV_UNUSED(win_name);\n CV_UNUSED(visible);\n CV_Error(cv::Error::OpenGlNotSupported, \"The library is compiled without OpenGL support\");\n#else\n Window* win = getWindow(win_name);\n win->setGridVisible(visible);\n updateWindow(win_name);\n#endif\n}\n\nvoid cv::viz3d::setSun(const String& win_name, const Vec3f& direction, const Vec3f& ambient, const Vec3f& diffuse)\n{\n CV_TRACE_FUNCTION();\n#ifndef HAVE_OPENGL\n CV_UNUSED(win_name);\n CV_UNUSED(direction);\n CV_UNUSED(ambient);\n CV_UNUSED(diffuse);\n CV_Error(cv::Error::OpenGlNotSupported, \"The library is compiled without OpenGL support\");\n#else\n Window* win = getWindow(win_name);\n win->setSun(direction, ambient, diffuse);\n updateWindow(win_name);\n#endif\n}\n\nvoid cv::viz3d::setSky(const String& win_name, const Vec3f& color)\n{\n CV_TRACE_FUNCTION();\n#ifndef HAVE_OPENGL\n CV_UNUSED(win_name);\n CV_UNUSED(color);\n CV_Error(cv::Error::OpenGlNotSupported, \"The library is compiled without OpenGL support\");\n#else\n Window* win = getWindow(win_name);\n win->setSky(color);\n updateWindow(win_name);\n#endif\n}\n\nvoid cv::viz3d::showBox(const String& win_name, const String& obj_name, const Vec3f& size, const Vec3f& color, RenderMode mode)\n{\n CV_TRACE_FUNCTION();\n#ifndef HAVE_OPENGL\n CV_UNUSED(win_name);\n CV_UNUSED(obj_name);\n CV_UNUSED(size);\n CV_UNUSED(color);\n CV_UNUSED(mode);\n CV_Error(cv::Error::OpenGlNotSupported, \"The library is compiled without OpenGL support\");\n#else\n if (mode == RENDER_WIREFRAME)\n {\n float points_data[] = {\n -size(0), -size(1), -size(2), color(0), color(1), color(2),\n +size(0), -size(1), -size(2), color(0), color(1), color(2),\n -size(0), +size(1), -size(2), color(0), color(1), color(2),\n +size(0), +size(1), -size(2), color(0), color(1), color(2),\n -size(0), -size(1), +size(2), color(0), color(1), color(2),\n +size(0), -size(1), +size(2), color(0), color(1), color(2),\n -size(0), +size(1), +size(2), color(0), color(1), color(2),\n +size(0), +size(1), +size(2), color(0), color(1), color(2),\n\n -size(0), -size(1), -size(2), color(0), color(1), color(2),\n -size(0), +size(1), -size(2), color(0), color(1), color(2),\n +size(0), -size(1), -size(2), color(0), color(1), color(2),\n +size(0), +size(1), -size(2), color(0), color(1), color(2),\n -size(0), -size(1), +size(2), color(0), color(1), color(2),\n -size(0), +size(1), +size(2), color(0), color(1), color(2),\n +size(0), -size(1), +size(2), color(0), color(1), color(2),\n +size(0), +size(1), +size(2), color(0), color(1), color(2),\n\n -size(0), -size(1), -size(2), color(0), color(1), color(2),\n -size(0), -size(1), +size(2), color(0), color(1), color(2),\n +size(0), -size(1), -size(2), color(0), color(1), color(2),\n +size(0), -size(1), +size(2), color(0), color(1), color(2),\n -size(0), +size(1), -size(2), color(0), color(1), color(2),\n -size(0), +size(1), +size(2), color(0), color(1), color(2),\n +size(0), +size(1), -size(2), color(0), color(1), color(2),\n +size(0), +size(1), +size(2), color(0), color(1), color(2),\n };\n\n const Mat points_mat = Mat(Size(6, 24), CV_32F, &points_data);\n\n showLines(win_name, obj_name, points_mat);\n }\n else if (mode == RENDER_SIMPLE)\n {\n float verts_data[] = {\n -size(0), -size(1), -size(2), color(0), color(1), color(2),\n +size(0), -size(1), -size(2), color(0), color(1), color(2),\n +size(0), +size(1), -size(2), color(0), color(1), color(2),\n -size(0), +size(1), -size(2), color(0), color(1), color(2),\n -size(0), -size(1), +size(2), color(0), color(1), color(2),\n +size(0), -size(1), +size(2), color(0), color(1), color(2),\n +size(0), +size(1), +size(2), color(0), color(1), color(2),\n -size(0), +size(1), +size(2), color(0), color(1), color(2),\n };\n\n static unsigned char indices_data[] = {\n 0, 1, 2,\n 2, 3, 0,\n 4, 5, 6,\n 6, 7, 4,\n 0, 1, 5,\n 5, 4, 0,\n 3, 2, 6,\n 6, 7, 3,\n 0, 3, 7,\n 7, 4, 0,\n 1, 2, 6,\n 6, 5, 1,\n };\n\n const Mat verts_mat = Mat(Size(6, 8), CV_32F, &verts_data);\n const Mat indices_mat = Mat(Size(3, 12), CV_8U, &indices_data);", - "repo_full_name": "opencv/opencv", - "discussion_comments": [ - { - "comment_id": "724475444", - "repo_full_name": "opencv/opencv", - "pr_number": 20371, - "pr_file": "modules/highgui/src/viz3d/viz3d.cpp", - "discussion_id": "724475444", - "commented_code": "@@ -0,0 +1,1671 @@\n+// This file is part of OpenCV project.\n+// It is subject to the license terms in the LICENSE file found in the top-level directory\n+// of this distribution and at http://opencv.org/license.html.\n+\n+#include \"../precomp.hpp\"\n+#include \"viz3d_private.hpp\"\n+#include \"opencv2/core/utils/logger.hpp\"\n+#include \"opencv2/imgproc.hpp\"\n+\n+using namespace cv;\n+using namespace cv::viz3d;\n+\n+#ifdef HAVE_OPENGL\n+\n+static void openGlDrawCallback(void* data)\n+{\n+ Window* win = static_cast(data);\n+ if (win != nullptr)\n+ win->draw();\n+}\n+\n+static void openGlFreeCallback(void* data)\n+{\n+ Window* win = static_cast(data);\n+ if (win != nullptr)\n+ delete win;\n+}\n+\n+static void mouseCallback(int event, int x, int y, int flags, void* data)\n+{\n+ Window* win = static_cast(data);\n+ if (win != nullptr)\n+ win->onMouse(event, x, y, flags);\n+}\n+\n+static Window* getWindow(const String& win_name)\n+{\n+ namedWindow(win_name, WINDOW_OPENGL);\n+ const double useGl = getWindowProperty(win_name, WND_PROP_OPENGL);\n+ if (useGl <= 0)\n+ CV_Error(cv::Error::StsBadArg, \"OpenCV/UI: viz3d can't be used because the window was created without an OpenGL context\");\n+\n+ Window* win = static_cast(getOpenGlUserData(win_name));\n+\n+ if (!win)\n+ {\n+\t\tsetOpenGlContext(win_name);\n+\t\twin = new Window(win_name);\n+\t\tsetOpenGlDrawCallback(win_name, &openGlDrawCallback, win);\n+ setOpenGlFreeCallback(win_name, &openGlFreeCallback);\n+ setMouseCallback(win_name, &mouseCallback, win);\n+ }\n+ else\n+ {\n+ auto callback = getOpenGlDrawCallback(win_name);\n+ if (callback != nullptr && callback != openGlDrawCallback)\n+ CV_Error(cv::Error::StsBadArg, \"OpenCV/UI: viz3d can't be used because the OpenGL callback is already being used on this window\");\n+ }\n+\n+ return win;\n+}\n+\n+#endif // HAVE_OPENGL\n+\n+void cv::viz3d::setPerspective(const String& win_name, float fov, float z_near, float z_far)\n+{\n+ CV_TRACE_FUNCTION();\n+#ifndef HAVE_OPENGL\n+ CV_UNUSED(win_name);\n+ CV_UNUSED(fov);\n+ CV_UNUSED(z_near);\n+ CV_UNUSED(z_far);\n+ CV_Error(cv::Error::OpenGlNotSupported, \"The library is compiled without OpenGL support\");\n+#else\n+ Window* win = getWindow(win_name);\n+ win->getView().setPerspective(fov, z_near, z_far);\n+ updateWindow(win_name);\n+#endif\n+}\n+\n+void cv::viz3d::setGridVisible(const String& win_name, bool visible)\n+{\n+ CV_TRACE_FUNCTION();\n+#ifndef HAVE_OPENGL\n+ CV_UNUSED(win_name);\n+ CV_UNUSED(visible);\n+ CV_Error(cv::Error::OpenGlNotSupported, \"The library is compiled without OpenGL support\");\n+#else\n+ Window* win = getWindow(win_name);\n+ win->setGridVisible(visible);\n+ updateWindow(win_name);\n+#endif\n+}\n+\n+void cv::viz3d::setSun(const String& win_name, const Vec3f& direction, const Vec3f& ambient, const Vec3f& diffuse)\n+{\n+ CV_TRACE_FUNCTION();\n+#ifndef HAVE_OPENGL\n+ CV_UNUSED(win_name);\n+ CV_UNUSED(direction);\n+ CV_UNUSED(ambient);\n+ CV_UNUSED(diffuse);\n+ CV_Error(cv::Error::OpenGlNotSupported, \"The library is compiled without OpenGL support\");\n+#else\n+ Window* win = getWindow(win_name);\n+ win->setSun(direction, ambient, diffuse);\n+ updateWindow(win_name);\n+#endif\n+}\n+\n+void cv::viz3d::setSky(const String& win_name, const Vec3f& color)\n+{\n+ CV_TRACE_FUNCTION();\n+#ifndef HAVE_OPENGL\n+ CV_UNUSED(win_name);\n+ CV_UNUSED(color);\n+ CV_Error(cv::Error::OpenGlNotSupported, \"The library is compiled without OpenGL support\");\n+#else\n+ Window* win = getWindow(win_name);\n+ win->setSky(color);\n+ updateWindow(win_name);\n+#endif\n+}\n+\n+void cv::viz3d::showBox(const String& win_name, const String& obj_name, const Vec3f& size, const Vec3f& color, RenderMode mode)\n+{\n+ CV_TRACE_FUNCTION();\n+#ifndef HAVE_OPENGL\n+ CV_UNUSED(win_name);\n+ CV_UNUSED(obj_name);\n+ CV_UNUSED(size);\n+ CV_UNUSED(color);\n+ CV_UNUSED(mode);\n+ CV_Error(cv::Error::OpenGlNotSupported, \"The library is compiled without OpenGL support\");\n+#else\n+ if (mode == RENDER_WIREFRAME)\n+ {\n+ float points_data[] = {\n+ -size(0), -size(1), -size(2), color(0), color(1), color(2),\n+ +size(0), -size(1), -size(2), color(0), color(1), color(2),\n+ -size(0), +size(1), -size(2), color(0), color(1), color(2),\n+ +size(0), +size(1), -size(2), color(0), color(1), color(2),\n+ -size(0), -size(1), +size(2), color(0), color(1), color(2),\n+ +size(0), -size(1), +size(2), color(0), color(1), color(2),\n+ -size(0), +size(1), +size(2), color(0), color(1), color(2),\n+ +size(0), +size(1), +size(2), color(0), color(1), color(2),\n+\n+ -size(0), -size(1), -size(2), color(0), color(1), color(2),\n+ -size(0), +size(1), -size(2), color(0), color(1), color(2),\n+ +size(0), -size(1), -size(2), color(0), color(1), color(2),\n+ +size(0), +size(1), -size(2), color(0), color(1), color(2),\n+ -size(0), -size(1), +size(2), color(0), color(1), color(2),\n+ -size(0), +size(1), +size(2), color(0), color(1), color(2),\n+ +size(0), -size(1), +size(2), color(0), color(1), color(2),\n+ +size(0), +size(1), +size(2), color(0), color(1), color(2),\n+\n+ -size(0), -size(1), -size(2), color(0), color(1), color(2),\n+ -size(0), -size(1), +size(2), color(0), color(1), color(2),\n+ +size(0), -size(1), -size(2), color(0), color(1), color(2),\n+ +size(0), -size(1), +size(2), color(0), color(1), color(2),\n+ -size(0), +size(1), -size(2), color(0), color(1), color(2),\n+ -size(0), +size(1), +size(2), color(0), color(1), color(2),\n+ +size(0), +size(1), -size(2), color(0), color(1), color(2),\n+ +size(0), +size(1), +size(2), color(0), color(1), color(2),\n+ };\n+\n+ const Mat points_mat = Mat(Size(6, 24), CV_32F, &points_data);\n+\n+ showLines(win_name, obj_name, points_mat);\n+ }\n+ else if (mode == RENDER_SIMPLE)\n+ {\n+ float verts_data[] = {\n+ -size(0), -size(1), -size(2), color(0), color(1), color(2),\n+ +size(0), -size(1), -size(2), color(0), color(1), color(2),\n+ +size(0), +size(1), -size(2), color(0), color(1), color(2),\n+ -size(0), +size(1), -size(2), color(0), color(1), color(2),\n+ -size(0), -size(1), +size(2), color(0), color(1), color(2),\n+ +size(0), -size(1), +size(2), color(0), color(1), color(2),\n+ +size(0), +size(1), +size(2), color(0), color(1), color(2),\n+ -size(0), +size(1), +size(2), color(0), color(1), color(2),\n+ };\n+\n+ static unsigned char indices_data[] = {\n+ 0, 1, 2,\n+ 2, 3, 0,\n+ 4, 5, 6,\n+ 6, 7, 4,\n+ 0, 1, 5,\n+ 5, 4, 0,\n+ 3, 2, 6,\n+ 6, 7, 3,\n+ 0, 3, 7,\n+ 7, 4, 0,\n+ 1, 2, 6,\n+ 6, 5, 1,\n+ };\n+\n+ const Mat verts_mat = Mat(Size(6, 8), CV_32F, &verts_data);\n+ const Mat indices_mat = Mat(Size(3, 12), CV_8U, &indices_data);", - "comment_created_at": "2021-10-07T19:35:28+00:00", - "comment_author": "JulieBar", - "comment_body": "no magic numbers; use std::array and get size", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "724533336", - "pr_number": 20371, - "pr_file": "modules/highgui/src/viz3d/viz3d.cpp", - "created_at": "2021-10-07T21:05:49+00:00", - "commented_code": "// This file is part of OpenCV project.\n// It is subject to the license terms in the LICENSE file found in the top-level directory\n// of this distribution and at http://opencv.org/license.html.\n\n#include \"../precomp.hpp\"\n#include \"viz3d_private.hpp\"\n#include \"opencv2/core/utils/logger.hpp\"\n#include \"opencv2/imgproc.hpp\"\n\nusing namespace cv;\nusing namespace cv::viz3d;\n\n#ifdef HAVE_OPENGL\n\nstatic void openGlDrawCallback(void* data)\n{\n Window* win = static_cast(data);\n if (win != nullptr)\n win->draw();\n}\n\nstatic void openGlFreeCallback(void* data)\n{\n Window* win = static_cast(data);\n if (win != nullptr)\n delete win;\n}\n\nstatic void mouseCallback(int event, int x, int y, int flags, void* data)\n{\n Window* win = static_cast(data);\n if (win != nullptr)\n win->onMouse(event, x, y, flags);\n}\n\nstatic Window* getWindow(const String& win_name)\n{\n namedWindow(win_name, WINDOW_OPENGL);\n const double useGl = getWindowProperty(win_name, WND_PROP_OPENGL);\n if (useGl <= 0)\n CV_Error(cv::Error::StsBadArg, \"OpenCV/UI: viz3d can't be used because the window was created without an OpenGL context\");\n\n Window* win = static_cast(getOpenGlUserData(win_name));\n\n if (!win)\n {\n\t\tsetOpenGlContext(win_name);\n\t\twin = new Window(win_name);\n\t\tsetOpenGlDrawCallback(win_name, &openGlDrawCallback, win);\n setOpenGlFreeCallback(win_name, &openGlFreeCallback);\n setMouseCallback(win_name, &mouseCallback, win);\n }\n else\n {\n auto callback = getOpenGlDrawCallback(win_name);\n if (callback != nullptr && callback != openGlDrawCallback)\n CV_Error(cv::Error::StsBadArg, \"OpenCV/UI: viz3d can't be used because the OpenGL callback is already being used on this window\");\n }\n\n return win;\n}\n\n#endif // HAVE_OPENGL\n\nvoid cv::viz3d::setPerspective(const String& win_name, float fov, float z_near, float z_far)\n{\n CV_TRACE_FUNCTION();\n#ifndef HAVE_OPENGL\n CV_UNUSED(win_name);\n CV_UNUSED(fov);\n CV_UNUSED(z_near);\n CV_UNUSED(z_far);\n CV_Error(cv::Error::OpenGlNotSupported, \"The library is compiled without OpenGL support\");\n#else\n Window* win = getWindow(win_name);\n win->getView().setPerspective(fov, z_near, z_far);\n updateWindow(win_name);\n#endif\n}\n\nvoid cv::viz3d::setGridVisible(const String& win_name, bool visible)\n{\n CV_TRACE_FUNCTION();\n#ifndef HAVE_OPENGL\n CV_UNUSED(win_name);\n CV_UNUSED(visible);\n CV_Error(cv::Error::OpenGlNotSupported, \"The library is compiled without OpenGL support\");\n#else\n Window* win = getWindow(win_name);\n win->setGridVisible(visible);\n updateWindow(win_name);\n#endif\n}\n\nvoid cv::viz3d::setSun(const String& win_name, const Vec3f& direction, const Vec3f& ambient, const Vec3f& diffuse)\n{\n CV_TRACE_FUNCTION();\n#ifndef HAVE_OPENGL\n CV_UNUSED(win_name);\n CV_UNUSED(direction);\n CV_UNUSED(ambient);\n CV_UNUSED(diffuse);\n CV_Error(cv::Error::OpenGlNotSupported, \"The library is compiled without OpenGL support\");\n#else\n Window* win = getWindow(win_name);\n win->setSun(direction, ambient, diffuse);\n updateWindow(win_name);\n#endif\n}\n\nvoid cv::viz3d::setSky(const String& win_name, const Vec3f& color)\n{\n CV_TRACE_FUNCTION();\n#ifndef HAVE_OPENGL\n CV_UNUSED(win_name);\n CV_UNUSED(color);\n CV_Error(cv::Error::OpenGlNotSupported, \"The library is compiled without OpenGL support\");\n#else\n Window* win = getWindow(win_name);\n win->setSky(color);\n updateWindow(win_name);\n#endif\n}\n\nvoid cv::viz3d::showBox(const String& win_name, const String& obj_name, const Vec3f& size, const Vec3f& color, RenderMode mode)\n{\n CV_TRACE_FUNCTION();\n#ifndef HAVE_OPENGL\n CV_UNUSED(win_name);\n CV_UNUSED(obj_name);\n CV_UNUSED(size);\n CV_UNUSED(color);\n CV_UNUSED(mode);\n CV_Error(cv::Error::OpenGlNotSupported, \"The library is compiled without OpenGL support\");\n#else\n if (mode == RENDER_WIREFRAME)\n {\n float points_data[] = {\n -size(0), -size(1), -size(2), color(0), color(1), color(2),\n +size(0), -size(1), -size(2), color(0), color(1), color(2),\n -size(0), +size(1), -size(2), color(0), color(1), color(2),\n +size(0), +size(1), -size(2), color(0), color(1), color(2),\n -size(0), -size(1), +size(2), color(0), color(1), color(2),\n +size(0), -size(1), +size(2), color(0), color(1), color(2),\n -size(0), +size(1), +size(2), color(0), color(1), color(2),\n +size(0), +size(1), +size(2), color(0), color(1), color(2),\n\n -size(0), -size(1), -size(2), color(0), color(1), color(2),\n -size(0), +size(1), -size(2), color(0), color(1), color(2),\n +size(0), -size(1), -size(2), color(0), color(1), color(2),\n +size(0), +size(1), -size(2), color(0), color(1), color(2),\n -size(0), -size(1), +size(2), color(0), color(1), color(2),\n -size(0), +size(1), +size(2), color(0), color(1), color(2),\n +size(0), -size(1), +size(2), color(0), color(1), color(2),\n +size(0), +size(1), +size(2), color(0), color(1), color(2),\n\n -size(0), -size(1), -size(2), color(0), color(1), color(2),\n -size(0), -size(1), +size(2), color(0), color(1), color(2),\n +size(0), -size(1), -size(2), color(0), color(1), color(2),\n +size(0), -size(1), +size(2), color(0), color(1), color(2),\n -size(0), +size(1), -size(2), color(0), color(1), color(2),\n -size(0), +size(1), +size(2), color(0), color(1), color(2),\n +size(0), +size(1), -size(2), color(0), color(1), color(2),\n +size(0), +size(1), +size(2), color(0), color(1), color(2),\n };\n\n const Mat points_mat = Mat(Size(6, 24), CV_32F, &points_data);\n\n showLines(win_name, obj_name, points_mat);\n }\n else if (mode == RENDER_SIMPLE)\n {\n float verts_data[] = {\n -size(0), -size(1), -size(2), color(0), color(1), color(2),\n +size(0), -size(1), -size(2), color(0), color(1), color(2),\n +size(0), +size(1), -size(2), color(0), color(1), color(2),\n -size(0), +size(1), -size(2), color(0), color(1), color(2),\n -size(0), -size(1), +size(2), color(0), color(1), color(2),\n +size(0), -size(1), +size(2), color(0), color(1), color(2),\n +size(0), +size(1), +size(2), color(0), color(1), color(2),\n -size(0), +size(1), +size(2), color(0), color(1), color(2),\n };\n\n static unsigned char indices_data[] = {\n 0, 1, 2,\n 2, 3, 0,\n 4, 5, 6,\n 6, 7, 4,\n 0, 1, 5,\n 5, 4, 0,\n 3, 2, 6,\n 6, 7, 3,\n 0, 3, 7,\n 7, 4, 0,\n 1, 2, 6,\n 6, 5, 1,\n };\n\n const Mat verts_mat = Mat(Size(6, 8), CV_32F, &verts_data);\n const Mat indices_mat = Mat(Size(3, 12), CV_8U, &indices_data);\n\n showMesh(win_name, obj_name, verts_mat, indices_mat);\n }\n else if (mode == RENDER_SHADING)\n {\n float verts_data[] = {\n -size(0), -size(1), -size(2), color(0), color(1), color(2), 0.0f, 0.0f, -1.0f, // 0\n +size(0), -size(1), -size(2), color(0), color(1), color(2), 0.0f, 0.0f, -1.0f, // 1\n +size(0), +size(1), -size(2), color(0), color(1), color(2), 0.0f, 0.0f, -1.0f, // 2\n +size(0), +size(1), -size(2), color(0), color(1), color(2), 0.0f, 0.0f, -1.0f, // 2\n -size(0), +size(1), -size(2), color(0), color(1), color(2), 0.0f, 0.0f, -1.0f, // 3\n -size(0), -size(1), -size(2), color(0), color(1), color(2), 0.0f, 0.0f, -1.0f, // 0\n\n -size(0), -size(1), +size(2), color(0), color(1), color(2), 0.0f, 0.0f, +1.0f, // 4\n +size(0), -size(1), +size(2), color(0), color(1), color(2), 0.0f, 0.0f, +1.0f, // 5\n +size(0), +size(1), +size(2), color(0), color(1), color(2), 0.0f, 0.0f, +1.0f, // 6\n +size(0), +size(1), +size(2), color(0), color(1), color(2), 0.0f, 0.0f, +1.0f, // 6\n -size(0), +size(1), +size(2), color(0), color(1), color(2), 0.0f, 0.0f, +1.0f, // 7\n -size(0), -size(1), +size(2), color(0), color(1), color(2), 0.0f, 0.0f, +1.0f, // 4\n\n\n -size(0), -size(1), -size(2), color(0), color(1), color(2), 0.0f, -1.0f, 0.0f, // 0\n +size(0), -size(1), -size(2), color(0), color(1), color(2), 0.0f, -1.0f, 0.0f, // 1\n +size(0), -size(1), +size(2), color(0), color(1), color(2), 0.0f, -1.0f, 0.0f, // 5\n +size(0), -size(1), +size(2), color(0), color(1), color(2), 0.0f, -1.0f, 0.0f, // 5\n -size(0), -size(1), +size(2), color(0), color(1), color(2), 0.0f, -1.0f, 0.0f, // 4\n -size(0), -size(1), -size(2), color(0), color(1), color(2), 0.0f, -1.0f, 0.0f, // 0\n\n -size(0), +size(1), -size(2), color(0), color(1), color(2), 0.0f, +1.0f, 0.0f, // 3\n +size(0), +size(1), -size(2), color(0), color(1), color(2), 0.0f, +1.0f, 0.0f, // 2\n +size(0), +size(1), +size(2), color(0), color(1), color(2), 0.0f, +1.0f, 0.0f, // 6\n +size(0), +size(1), +size(2), color(0), color(1), color(2), 0.0f, +1.0f, 0.0f, // 6\n -size(0), +size(1), +size(2), color(0), color(1), color(2), 0.0f, +1.0f, 0.0f, // 7\n -size(0), +size(1), -size(2), color(0), color(1), color(2), 0.0f, +1.0f, 0.0f, // 3\n\n\n -size(0), -size(1), -size(2), color(0), color(1), color(2), -1.0f, 0.0f, 0.0f, // 0\n -size(0), +size(1), -size(2), color(0), color(1), color(2), -1.0f, 0.0f, 0.0f, // 3\n -size(0), +size(1), +size(2), color(0), color(1), color(2), -1.0f, 0.0f, 0.0f, // 7\n -size(0), +size(1), +size(2), color(0), color(1), color(2), -1.0f, 0.0f, 0.0f, // 7\n -size(0), -size(1), +size(2), color(0), color(1), color(2), -1.0f, 0.0f, 0.0f, // 4\n -size(0), -size(1), -size(2), color(0), color(1), color(2), -1.0f, 0.0f, 0.0f, // 0\n\n +size(0), -size(1), -size(2), color(0), color(1), color(2), +1.0f, 0.0f, 0.0f, // 1\n +size(0), +size(1), -size(2), color(0), color(1), color(2), +1.0f, 0.0f, 0.0f, // 2\n +size(0), +size(1), +size(2), color(0), color(1), color(2), +1.0f, 0.0f, 0.0f, // 6\n +size(0), +size(1), +size(2), color(0), color(1), color(2), +1.0f, 0.0f, 0.0f, // 6\n +size(0), -size(1), +size(2), color(0), color(1), color(2), +1.0f, 0.0f, 0.0f, // 5\n +size(0), -size(1), -size(2), color(0), color(1), color(2), +1.0f, 0.0f, 0.0f, // 1\n };\n\n const Mat verts_mat = Mat(Size(9, 36), CV_32F, &verts_data);\n\n showMesh(win_name, obj_name, verts_mat);\n }\n#endif\n}\n\nvoid cv::viz3d::showPlane(const String& win_name, const String& obj_name, const Vec2f& size, const Vec3f& color, RenderMode mode)\n{\n CV_TRACE_FUNCTION();\n#ifndef HAVE_OPENGL\n CV_UNUSED(win_name);\n CV_UNUSED(obj_name);\n CV_UNUSED(size);\n CV_UNUSED(color);\n CV_UNUSED(mode);\n CV_Error(cv::Error::OpenGlNotSupported, \"The library is compiled without OpenGL support\");\n#else\n if (mode == RENDER_WIREFRAME)\n {\n float points_data[] = {\n -size(0), 0.0f, -size(1), color(0), color(1), color(2),\n +size(0), 0.0f, -size(1), color(0), color(1), color(2),\n +size(0), 0.0f, -size(1), color(0), color(1), color(2),\n +size(0), 0.0f, +size(1), color(0), color(1), color(2),\n +size(0), 0.0f, +size(1), color(0), color(1), color(2),\n -size(0), 0.0f, +size(1), color(0), color(1), color(2),\n -size(0), 0.0f, +size(1), color(0), color(1), color(2),\n -size(0), 0.0f, -size(1), color(0), color(1), color(2),\n };\n\n const Mat points_mat = Mat(Size(6, 8), CV_32F, points_data);\n\n showLines(win_name, obj_name, points_mat);\n }\n else if (mode == RENDER_SIMPLE)\n {\n float verts_data[] = {\n -size(0), 0.0f, -size(1), color(0), color(1), color(2),\n +size(0), 0.0f, -size(1), color(0), color(1), color(2),\n +size(0), 0.0f, +size(1), color(0), color(1), color(2),\n -size(0), 0.0f, -size(1), color(0), color(1), color(2),\n -size(0), 0.0f, +size(1), color(0), color(1), color(2),\n +size(0), 0.0f, +size(1), color(0), color(1), color(2),\n };\n\n const Mat verts_mat = Mat(Size(6, 6), CV_32F, verts_data);\n\n showMesh(win_name, obj_name, verts_mat);\n }\n else if (mode == RENDER_SHADING)\n {\n float verts_data[] = {\n -size(0), 0.0f, -size(1), color(0), color(1), color(2), 0.0f, 1.0f, 0.0f,\n +size(0), 0.0f, -size(1), color(0), color(1), color(2), 0.0f, 1.0f, 0.0f,\n +size(0), 0.0f, +size(1), color(0), color(1), color(2), 0.0f, 1.0f, 0.0f,\n -size(0), 0.0f, -size(1), color(0), color(1), color(2), 0.0f, 1.0f, 0.0f,\n -size(0), 0.0f, +size(1), color(0), color(1), color(2), 0.0f, 1.0f, 0.0f,\n +size(0), 0.0f, +size(1), color(0), color(1), color(2), 0.0f, 1.0f, 0.0f,\n };\n\n const Mat verts_mat = Mat(Size(9, 6), CV_32F, verts_data);\n\n showMesh(win_name, obj_name, verts_mat);\n }\n#endif\n}\n\nvoid cv::viz3d::showSphere(const String& win_name, const String& obj_name, float radius, const Vec3f& color, RenderMode mode, int divs)\n{\n CV_TRACE_FUNCTION();\n#ifndef HAVE_OPENGL\n CV_UNUSED(win_name);\n CV_UNUSED(obj_name);\n CV_UNUSED(radius);\n CV_UNUSED(color);\n CV_UNUSED(mode);\n CV_UNUSED(divs);\n CV_Error(cv::Error::OpenGlNotSupported, \"The library is compiled without OpenGL support\");\n#else\n CV_Assert(divs >= 1);\n\n if (mode == RENDER_WIREFRAME)\n {\n const float limit = 2.0f * 3.1415f;\n std::vector points_data;\n\n for (int e = 0; e < 3; ++e)\n {\n auto ex = Vec3f::zeros();\n auto ey = Vec3f::zeros();\n ex((e + 0) % 3) = radius;\n ey((e + 1) % 3) = radius;\n\n for (float t = 0.0f, n; t < limit;)\n {\n n = t + limit / (divs * 4);\n points_data.insert(points_data.end(), {\n ex(0) * cos(t) + ey(0) * sin(t), ex(1) * cos(t) + ey(1) * sin(t), ex(2) * cos(t) + ey(2) * sin(t), color(0), color(1), color(2),\n ex(0) * cos(n) + ey(0) * sin(n), ex(1) * cos(n) + ey(1) * sin(n), ex(2) * cos(n) + ey(2) * sin(n), color(0), color(1), color(2),\n });\n t = n;\n }\n }\n\n const Mat points_mat = Mat(Size(6, points_data.size() / 6), CV_32F, points_data.data());\n\n showLines(win_name, obj_name, points_mat);\n }\n else\n {\n std::vector verts_data;\n\n for (int s = -1; s <= 1; s += 2)\n for (int e = 0; e < 3; ++e)\n {\n auto ex = Vec3f::zeros();\n auto ey = Vec3f::zeros();\n auto pz = Vec3f::zeros();\n ex((e + 1) % 3) = 1.0f / divs;\n ey((e + 2) % 3) = 1.0f / divs;\n pz(e) = s;\n\n for (int x = -divs; x < divs; ++x)\n for (int y = -divs; y < divs; ++y)\n if (mode == RENDER_SIMPLE)\n verts_data.insert(verts_data.end(), {\n ex(0) * (x + 0) + ey(0) * (y + 0) + pz(0), ex(1) * (x + 0) + ey(1) * (y + 0) + pz(1), ex(2) * (x + 0) + ey(2) * (y + 0) + pz(2), color(0), color(1), color(2),\n ex(0) * (x + 1) + ey(0) * (y + 0) + pz(0), ex(1) * (x + 1) + ey(1) * (y + 0) + pz(1), ex(2) * (x + 1) + ey(2) * (y + 0) + pz(2), color(0), color(1), color(2),\n ex(0) * (x + 1) + ey(0) * (y + 1) + pz(0), ex(1) * (x + 1) + ey(1) * (y + 1) + pz(1), ex(2) * (x + 1) + ey(2) * (y + 1) + pz(2), color(0), color(1), color(2),\n ex(0) * (x + 1) + ey(0) * (y + 1) + pz(0), ex(1) * (x + 1) + ey(1) * (y + 1) + pz(1), ex(2) * (x + 1) + ey(2) * (y + 1) + pz(2), color(0), color(1), color(2),\n ex(0) * (x + 0) + ey(0) * (y + 1) + pz(0), ex(1) * (x + 0) + ey(1) * (y + 1) + pz(1), ex(2) * (x + 0) + ey(2) * (y + 1) + pz(2), color(0), color(1), color(2),\n ex(0) * (x + 0) + ey(0) * (y + 0) + pz(0), ex(1) * (x + 0) + ey(1) * (y + 0) + pz(1), ex(2) * (x + 0) + ey(2) * (y + 0) + pz(2), color(0), color(1), color(2),\n });\n else\n verts_data.insert(verts_data.end(), {\n ex(0) * (x + 0) + ey(0) * (y + 0) + pz(0), ex(1) * (x + 0) + ey(1) * (y + 0) + pz(1), ex(2) * (x + 0) + ey(2) * (y + 0) + pz(2), color(0), color(1), color(2), 0.0f, 0.0f, 0.0f,\n ex(0) * (x + 1) + ey(0) * (y + 0) + pz(0), ex(1) * (x + 1) + ey(1) * (y + 0) + pz(1), ex(2) * (x + 1) + ey(2) * (y + 0) + pz(2), color(0), color(1), color(2), 0.0f, 0.0f, 0.0f,\n ex(0) * (x + 1) + ey(0) * (y + 1) + pz(0), ex(1) * (x + 1) + ey(1) * (y + 1) + pz(1), ex(2) * (x + 1) + ey(2) * (y + 1) + pz(2), color(0), color(1), color(2), 0.0f, 0.0f, 0.0f,\n ex(0) * (x + 1) + ey(0) * (y + 1) + pz(0), ex(1) * (x + 1) + ey(1) * (y + 1) + pz(1), ex(2) * (x + 1) + ey(2) * (y + 1) + pz(2), color(0), color(1), color(2), 0.0f, 0.0f, 0.0f,\n ex(0) * (x + 0) + ey(0) * (y + 1) + pz(0), ex(1) * (x + 0) + ey(1) * (y + 1) + pz(1), ex(2) * (x + 0) + ey(2) * (y + 1) + pz(2), color(0), color(1), color(2), 0.0f, 0.0f, 0.0f,\n ex(0) * (x + 0) + ey(0) * (y + 0) + pz(0), ex(1) * (x + 0) + ey(1) * (y + 0) + pz(1), ex(2) * (x + 0) + ey(2) * (y + 0) + pz(2), color(0), color(1), color(2), 0.0f, 0.0f, 0.0f,\n });\n }\n\n if (mode == RENDER_SIMPLE)\n {\n for (int i = 0; i < verts_data.size() / 6; ++i)\n {\n float l = sqrtf(verts_data[6 * i + 0] * verts_data[6 * i + 0] + verts_data[6 * i + 1] * verts_data[6 * i + 1] + verts_data[6 * i + 2] * verts_data[6 * i + 2]);\n verts_data[6 * i + 0] *= radius / l;\n verts_data[6 * i + 1] *= radius / l;\n verts_data[6 * i + 2] *= radius / l;\n }\n\n const Mat verts_mat = Mat(Size(6, verts_data.size() / 6), CV_32F, verts_data.data());\n showMesh(win_name, obj_name, verts_mat);\n }\n else\n {\n for (int i = 0; i < verts_data.size() / 9; ++i)\n {\n float l = sqrtf(verts_data[9 * i + 0] * verts_data[9 * i + 0] + verts_data[9 * i + 1] * verts_data[9 * i + 1] + verts_data[9 * i + 2] * verts_data[9 * i + 2]);\n verts_data[9 * i + 6] = verts_data[9 * i + 0] / l;\n verts_data[9 * i + 7] = verts_data[9 * i + 1] / l;\n verts_data[9 * i + 8] = verts_data[9 * i + 2] / l;\n verts_data[9 * i + 0] *= radius / l;\n verts_data[9 * i + 1] *= radius / l;\n verts_data[9 * i + 2] *= radius / l;\n }\n\n const Mat verts_mat = Mat(Size(9, verts_data.size() / 9), CV_32F, verts_data.data());\n showMesh(win_name, obj_name, verts_mat);\n }\n }\n#endif\n}\n\nvoid cv::viz3d::showCameraTrajectory(\n const String& win_name, const String& obj_name, InputArray trajectory,\n float aspect, float scale, Vec3f frustum_color, Vec3f line_color)\n{\n CV_TRACE_FUNCTION();\n#ifndef HAVE_OPENGL\n CV_UNUSED(win_name);\n CV_UNUSED(obj_name);\n CV_UNUSED(trajectory);\n CV_UNUSED(aspect);\n CV_UNUSED(scale);\n CV_UNUSED(frustum_color);\n CV_UNUSED(line_color);\n CV_Error(cv::Error::OpenGlNotSupported, \"The library is compiled without OpenGL support\");\n#else\n CV_Assert(trajectory.dims() == 2 && trajectory.cols() == 6 && trajectory.depth() == CV_32F);\n\n auto data = trajectory.getMat();\n std::vector points_data;\n\n // Add frustums\n for (int i = 0; i < data.rows; ++i)\n {\n Vec3f position = { data.at(i, 0), data.at(i, 1), data.at(i, 2) };\n Vec3f forward = normalize(Vec3f { data.at(i, 3), data.at(i, 4), data.at(i, 5) });\n Vec3f world_up = { 0.0f, 1.0f, 0.0f };\n Vec3f right = forward.cross(world_up);\n Vec3f up = forward.cross(right);\n\n Vec3f back_f[4] = {\n position + (-right * aspect - up) * scale,\n position + ( right * aspect - up) * scale,\n position + ( right * aspect + up) * scale,\n position + (-right * aspect + up) * scale,\n };\n\n Vec3f front_f[4] = {\n position + (-right * aspect - up) * scale * 1.5f + forward * scale * 2.0f,\n position + (right * aspect - up) * scale * 1.5f + forward * scale * 2.0f,\n position + (right * aspect + up) * scale * 1.5f + forward * scale * 2.0f,\n position + (-right * aspect + up) * scale * 1.5f + forward * scale * 2.0f,\n };\n\n points_data.insert(points_data.end(), {\n back_f[0](0), back_f[0](1), back_f[0](2), frustum_color(0), frustum_color(1), frustum_color(2),\n back_f[1](0), back_f[1](1), back_f[1](2), frustum_color(0), frustum_color(1), frustum_color(2),\n back_f[1](0), back_f[1](1), back_f[1](2), frustum_color(0), frustum_color(1), frustum_color(2),\n back_f[2](0), back_f[2](1), back_f[2](2), frustum_color(0), frustum_color(1), frustum_color(2),\n back_f[2](0), back_f[2](1), back_f[2](2), frustum_color(0), frustum_color(1), frustum_color(2),\n back_f[3](0), back_f[3](1), back_f[3](2), frustum_color(0), frustum_color(1), frustum_color(2),\n back_f[3](0), back_f[3](1), back_f[3](2), frustum_color(0), frustum_color(1), frustum_color(2),\n back_f[0](0), back_f[0](1), back_f[0](2), frustum_color(0), frustum_color(1), frustum_color(2),\n\n front_f[0](0), front_f[0](1), front_f[0](2), frustum_color(0), frustum_color(1), frustum_color(2),\n front_f[1](0), front_f[1](1), front_f[1](2), frustum_color(0), frustum_color(1), frustum_color(2),\n front_f[1](0), front_f[1](1), front_f[1](2), frustum_color(0), frustum_color(1), frustum_color(2),\n front_f[2](0), front_f[2](1), front_f[2](2), frustum_color(0), frustum_color(1), frustum_color(2),\n front_f[2](0), front_f[2](1), front_f[2](2), frustum_color(0), frustum_color(1), frustum_color(2),\n front_f[3](0), front_f[3](1), front_f[3](2), frustum_color(0), frustum_color(1), frustum_color(2),\n front_f[3](0), front_f[3](1), front_f[3](2), frustum_color(0), frustum_color(1), frustum_color(2),\n front_f[0](0), front_f[0](1), front_f[0](2), frustum_color(0), frustum_color(1), frustum_color(2),\n\n back_f[0](0), back_f[0](1), back_f[0](2), frustum_color(0), frustum_color(1), frustum_color(2),\n front_f[0](0), front_f[0](1), front_f[0](2), frustum_color(0), frustum_color(1), frustum_color(2),\n back_f[1](0), back_f[1](1), back_f[1](2), frustum_color(0), frustum_color(1), frustum_color(2),\n front_f[1](0), front_f[1](1), front_f[1](2), frustum_color(0), frustum_color(1), frustum_color(2),\n back_f[2](0), back_f[2](1), back_f[2](2), frustum_color(0), frustum_color(1), frustum_color(2),\n front_f[2](0), front_f[2](1), front_f[2](2), frustum_color(0), frustum_color(1), frustum_color(2),\n back_f[3](0), back_f[3](1), back_f[3](2), frustum_color(0), frustum_color(1), frustum_color(2),\n front_f[3](0), front_f[3](1), front_f[3](2), frustum_color(0), frustum_color(1), frustum_color(2),\n });\n\n }\n\n // Add trajectory line\n for (int i = 1; i < data.rows; ++i)\n {\n points_data.insert(points_data.end(), {\n data.at(i - 1, 0), data.at(i - 1, 1), data.at(i - 1, 2), line_color(0), line_color(1), line_color(2),\n data.at(i, 0), data.at(i, 1), data.at(i, 2), line_color(0), line_color(1), line_color(2),\n });\n }\n\n const Mat points_mat = Mat(Size(6, points_data.size() / 6), CV_32F, points_data.data());\n showLines(win_name, obj_name, points_mat);\n\n#endif\n}\n\nvoid cv::viz3d::showMesh(const String& win_name, const String& obj_name, InputArray verts, InputArray indices)\n{\n CV_TRACE_FUNCTION();\n#ifndef HAVE_OPENGL\n CV_UNUSED(win_name);\n CV_UNUSED(obj_name);\n CV_UNUSED(verts);\n CV_UNUSED(indices);\n CV_Error(cv::Error::OpenGlNotSupported, \"The library is compiled without OpenGL support\");\n#else\n Window* win = getWindow(win_name);\n Object* obj = win->get(obj_name);\n if (obj)\n delete obj;\n setOpenGlContext(win_name);\n obj = new Mesh(verts, indices);\n win->set(obj_name, obj);\n updateWindow(win_name);\n#endif\n}\n\nvoid cv::viz3d::showMesh(const String& win_name, const String& obj_name, InputArray verts)\n{\n CV_TRACE_FUNCTION();\n#ifndef HAVE_OPENGL\n CV_UNUSED(win_name);\n CV_UNUSED(obj_name);\n CV_UNUSED(verts);\n CV_Error(cv::Error::OpenGlNotSupported, \"The library is compiled without OpenGL support\");\n#else\n Window* win = getWindow(win_name);\n Object* obj = win->get(obj_name);\n if (obj)\n delete obj;\n setOpenGlContext(win_name);\n obj = new Mesh(verts);\n win->set(obj_name, obj);\n updateWindow(win_name);\n#endif\n}\n\nvoid cv::viz3d::showPoints(const String& win_name, const String& obj_name, InputArray points)\n{\n CV_TRACE_FUNCTION();\n#ifndef HAVE_OPENGL\n CV_UNUSED(win_name);\n CV_UNUSED(obj_name);\n CV_UNUSED(points);\n CV_Error(cv::Error::OpenGlNotSupported, \"The library is compiled without OpenGL support\");\n#else\n Window* win = getWindow(win_name);\n Object* obj = win->get(obj_name);\n if (obj)\n delete obj;\n setOpenGlContext(win_name);\n obj = new PointCloud(points);\n win->set(obj_name, obj);\n updateWindow(win_name);\n#endif\n}\n\nvoid cv::viz3d::showRGBD(const String& win_name, const String& obj_name, InputArray img, const Matx33f& intrinsics, float scale)\n{\n CV_TRACE_FUNCTION();\n#ifndef HAVE_OPENGL\n CV_UNUSED(win_name);\n CV_UNUSED(obj_name);\n CV_UNUSED(img);\n CV_UNUSED(intrinsics);\n CV_UNUSED(scale);\n CV_Error(cv::Error::OpenGlNotSupported, \"The library is compiled without OpenGL support\");\n#else\n CV_Assert(img.dims() == 2 && img.channels() == 4 && img.type() == CV_32FC4);\n \n Mat mat = img.getMat();\n Mat points;\n\n // This section (RGBD to point cloud) should be changed to use the 3d module when\n // #20013 is merged.\n\n float fx = intrinsics(0, 0);\n float fy = intrinsics(1, 1);\n float cx = intrinsics(0, 2);\n float cy = intrinsics(1, 2);\n\n for (int u = 0; u < mat.cols; ++u)\n for (int v = 0; v < mat.rows; ++v)\n {\n Vec4f c = mat.at(v, u);\n float d = c(3) * 0.001f; // mm to m\n\n float x_over_z = (cx - (float)u) / fx;\n float y_over_z = (cy - (float)v) / fy;\n float z = d/* / sqrt(1.0f + x_over_z * x_over_z + y_over_z * y_over_z)*/;\n float x = x_over_z * z;\n float y = y_over_z * z;\n\n float point[] = {\n x * scale, y * scale, z * scale,\n c(0) / 255.0f, c(1) / 255.0f, c(2) / 255.0f,\n };\n points.push_back(Mat(1, 6, CV_32F, point));\n }\n\n showPoints(win_name, obj_name, points);\n#endif\n}\n\nvoid cv::viz3d::showLines(const String& win_name, const String& obj_name, InputArray points)\n{\n CV_TRACE_FUNCTION();\n#ifndef HAVE_OPENGL\n CV_UNUSED(win_name);\n CV_UNUSED(obj_name);\n CV_UNUSED(points);\n CV_Error(cv::Error::OpenGlNotSupported, \"The library is compiled without OpenGL support\");\n#else\n Window* win = getWindow(win_name);\n Object* obj = win->get(obj_name);\n if (obj)\n delete obj;\n setOpenGlContext(win_name);\n obj = new Lines(points);\n win->set(obj_name, obj);\n updateWindow(win_name);\n#endif\n}\n\nvoid cv::viz3d::setObjectPosition(const String& win_name, const String& obj_name, const Vec3f& position)\n{\n CV_TRACE_FUNCTION();\n#ifndef HAVE_OPENGL\n CV_UNUSED(win_name);\n CV_UNUSED(obj_name);\n CV_UNUSED(position);\n CV_Error(cv::Error::OpenGlNotSupported, \"The library is compiled without OpenGL support\");\n#else\n Window* win = getWindow(win_name);\n Object* obj = win->get(obj_name);\n if (!obj)\n CV_Error(cv::Error::StsObjectNotFound, \"Object not found\");\n obj->setPosition(position);\n updateWindow(win_name);\n#endif\n}\n\nvoid cv::viz3d::setObjectRotation(const String& win_name, const String& obj_name, const Vec3f& rotation)\n{\n CV_TRACE_FUNCTION();\n#ifndef HAVE_OPENGL\n CV_UNUSED(win_name);\n CV_UNUSED(obj_name);\n CV_UNUSED(rotation);\n CV_Error(cv::Error::OpenGlNotSupported, \"The library is compiled without OpenGL support\");\n#else\n Window* win = getWindow(win_name);\n Object* obj = win->get(obj_name);\n if (!obj)\n CV_Error(cv::Error::StsObjectNotFound, \"Object not found\");\n obj->setRotation(rotation);\n updateWindow(win_name);\n#endif\n}\n\nvoid cv::viz3d::destroyObject(const String& win_name, const String& obj_name)\n{\n CV_TRACE_FUNCTION();\n#ifndef HAVE_OPENGL\n CV_UNUSED(win_name);\n CV_UNUSED(obj_name);\n CV_Error(cv::Error::OpenGlNotSupported, \"The library is compiled without OpenGL support\");\n#else\n Window* win = getWindow(win_name);\n win->set(obj_name, nullptr);\n updateWindow(win_name);\n#endif\n}\n\n#ifdef HAVE_OPENGL\n\ncv::viz3d::View::View()\n{\n this->origin = { 0.0f, 0.0f, 0.0f };\n this->distance = 10.0f;\n this->position = { 0.0f, 0.0f, this->distance };\n this->up = { 0.0f, 1.0f, 0.0f };\n\n this->aspect = 1.0f;\n this->setPerspective(1.3f, 0.1f, 2000.0f);\n this->lookAt(this->origin, { 0.0f, 1.0f, 0.0f });\n}\n\nvoid cv::viz3d::View::setAspect(float aspect)\n{\n if (this->aspect != aspect)\n {\n this->aspect = aspect;\n this->setPerspective(this->fov, this->z_near, this->z_far);\n } \n}\n\nvoid cv::viz3d::View::setPerspective(float fov, float z_near, float z_far)\n{\n this->fov = fov;\n this->z_near = z_near;\n this->z_far = z_far;\n\n float tan_half_fovy = ::tan(this->fov / 2.0f);\n this->proj = Matx44f::zeros();\n this->proj(0, 0) = 1.0f / (this->aspect * tan_half_fovy);\n this->proj(1, 1) = 1.0f / tan_half_fovy;\n this->proj(2, 2) = (this->z_far + this->z_near) / (this->z_far - this->z_near);\n this->proj(2, 3) = 1.0f;\n this->proj(3, 2) = -(2.0f * this->z_far * this->z_near) / (this->z_far - this->z_near);\n}\n\nvoid cv::viz3d::View::rotate(float dx, float dy)\n{\n this->position = normalize(this->position - this->origin);\n float theta = atan2(this->position(2), this->position(0));\n float phi = ::asin(this->position(1));\n theta -= dx * 0.05f;\n phi += dy * 0.05f;\n phi = max(-1.5f, min(1.5f, phi));\n\n this->position(0) = ::cos(theta) * ::cos(phi) * this->distance;\n this->position(1) = ::sin(phi) * this->distance;\n this->position(2) = ::sin(theta) * ::cos(phi) * this->distance;\n this->position += this->origin;\n\n this->lookAt(this->origin, this->up);\n}\n\nvoid cv::viz3d::View::move(float dx, float dy)\n{\n Vec3f forward = normalize(this->position - this->origin);\n Vec3f right = normalize(this->up.cross(forward));\n Vec3f up = right.cross(forward);\n Vec3f delta = normalize(right * dx - up * dy) * this->distance * 0.01f;\n\n this->origin += delta;\n this->position += delta;\n this->lookAt(this->origin, this->up);\n}\n\nvoid cv::viz3d::View::scaleDistance(float amount)\n{\n this->distance *= amount;\n this->distance = max(0.1f, this->distance);\n this->position = normalize(this->position - this->origin) * this->distance + this->origin;\n\n this->lookAt(this->origin, this->up);\n}\n\nvoid cv::viz3d::View::lookAt(const Vec3f& point, const Vec3f& up)\n{\n Vec3f f = normalize(point - this->position);\n Vec3f s = normalize(up.cross(f));\n Vec3f u = f.cross(s);\n\n this->view = Matx44f::zeros();\n this->view(0, 0) = s[0];\n this->view(1, 0) = s[1];\n this->view(2, 0) = s[2];\n this->view(0, 1) = u[0];\n this->view(1, 1) = u[1];\n this->view(2, 1) = u[2];\n this->view(0, 2) = f[0];\n this->view(1, 2) = f[1];\n this->view(2, 2) = f[2];\n this->view(3, 0) = -s.dot(this->position);\n this->view(3, 1) = -u.dot(this->position);\n this->view(3, 2) = -f.dot(this->position);\n this->view(3, 3) = 1.0f;\n}\n\ncv::viz3d::Window::Window(const String& name)\n{\n this->name = name;\n this->sun.direction = normalize(Vec3f(0.3f, 1.0f, 0.5f));\n this->sun.ambient = { 0.1f, 0.1f, 0.1f };\n this->sun.diffuse = { 1.0f, 1.0f, 1.0f };\n this->sky_color = { 0.0f, 0.0f, 0.0f };\n\n\tfloat points[] = {\n\t\t0.0f, 0.0f, 0.0f, 1.0f, 0.0f, 0.0f,\n\t\t0.5f, 0.0f, 0.0f, 1.0f, 0.0f, 0.0f,\n\t\t0.0f, 0.0f, 0.0f, 0.0f, 1.0f, 0.0f,\n\t\t0.0f, 0.5f, 0.0f, 0.0f, 1.0f, 0.0f,\n\t\t0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 1.0f,\n\t\t0.0f, 0.0f, 0.5f, 0.0f, 0.0f, 1.0f,\n\t};\n\n\tMat points_mat = Mat(Size(6, 6), CV_32F, points);\n\n\tthis->crosshair = new Lines(points_mat);\n\tthis->shaders[this->crosshair->getShaderName()] = this->crosshair->buildShader();\n\tthis->crosshair->setShader(this->shaders[this->crosshair->getShaderName()]);\n\n this->grid = nullptr;\n}\n\ncv::viz3d::Window::~Window()\n{\n\tdelete this->crosshair;\n if (this->grid != nullptr)\n delete this->grid;\n\n for (auto obj : this->objects)\n\t\tdelete obj.second;\n}\n\ncv::viz3d::Object* cv::viz3d::Window::get(const String& obj_name)\n{\n\tauto it = this->objects.find(obj_name);\n\tif (it == this->objects.end())\n\t\treturn nullptr;\n\treturn it->second;\n}\n\nvoid cv::viz3d::Window::set(const String& obj_name, Object* obj)\n{\n\tauto it = this->objects.find(obj_name);\n\tif (it != this->objects.end() && it->second != obj)\n\t{\n\t\tdelete it->second;\n\n\t\tif (obj == nullptr)\n\t\t\tthis->objects.erase(it);\n\t\telse\n\t\t\tit->second = obj;\n\t}\n\telse if (obj != nullptr)\n\t\tthis->objects[obj_name] = obj;\n\n if (obj != nullptr)\n {\n String name = obj->getShaderName();\n auto it = this->shaders.find(name);\n if (it == this->shaders.end())\n this->shaders[name] = obj->buildShader();\n obj->setShader(this->shaders[name]);\n }\n}\n\nvoid cv::viz3d::Window::setSun(const Vec3f& direction, const Vec3f& ambient, const Vec3f& diffuse)\n{\n this->sun.direction = normalize(direction);\n this->sun.ambient = ambient;\n this->sun.diffuse = diffuse;\n}\n\nvoid cv::viz3d::Window::setSky(const Vec3f& color)\n{\n this->sky_color = color;\n}\n\nstatic Mat getGridVertices(const View& view)\n{\n const Vec3f grid_color = { 0.5f, 0.5f, 0.5f };\n const Vec3f axis_color = { 0.8f, 0.8f, 0.8f };\n const Vec3f center = view.getOrigin();\n const Vec3f camera_dir = view.getOrigin() - view.getPosition();\n const float scale = 0.3f;\n\n float tick_step = 1.0f;\n while (view.getDistance() * scale / tick_step > 4.0f)\n tick_step *= 2.0f;\n while (view.getDistance() * scale / tick_step < 2.0f)\n tick_step *= 0.5f;\n\n Mat points;\n float face_sign[3];\n\n const Vec3f min_p = center - Vec3f(1.0f, 1.0f, 1.0f) * view.getDistance() * scale;\n const Vec3f max_p = center + Vec3f(1.0f, 1.0f, 1.0f) * view.getDistance() * scale;\n\n // For each axis add a grid\n for (int ai = 0; ai < 3; ++ai)\n {\n Vec3f az = { 0.0f, 0.0f, 0.0f };\n az((ai + 2) % 3) = 1.0f;\n\n // Check if face is positive or negative along the az axis\n face_sign[ai] = camera_dir.dot(az) > 0.0f ? 1.0f : -1.0f;\n\n float x = (floor(min_p(ai) / tick_step) + 1.0f) * tick_step;\n for (; x < max_p(ai); x += tick_step)\n {\n Vec3f a = min_p;\n Vec3f b = max_p;\n a((ai + 0) % 3) = x;\n b((ai + 0) % 3) = x;\n if (face_sign[ai] > 0.0f)\n a((ai + 2) % 3) = b((ai + 2) % 3);\n else\n b((ai + 2) % 3) = a((ai + 2) % 3);\n\n float data[] = {\n a(0), a(1), a(2), grid_color(0), grid_color(1), grid_color(2),\n b(0), b(1), b(2), grid_color(0), grid_color(1), grid_color(2),\n };\n\n points.push_back(Mat(2, 6, CV_32F, data));\n }\n\n float y = (floor(min_p((ai + 1) % 3) / tick_step) + 1.0f) * tick_step;\n for (; y < max_p((ai + 1) % 3); y += tick_step)\n {\n Vec3f a = min_p;\n Vec3f b = max_p;\n a((ai + 1) % 3) = y;\n b((ai + 1) % 3) = y;\n if (face_sign[ai] > 0.0f)\n a((ai + 2) % 3) = b((ai + 2) % 3);\n else\n b((ai + 2) % 3) = a((ai + 2) % 3);\n\n float data[] = {\n a(0), a(1), a(2), grid_color(0), grid_color(1), grid_color(2),\n b(0), b(1), b(2), grid_color(0), grid_color(1), grid_color(2),\n };\n\n points.push_back(Mat(2, 6, CV_32F, data));\n }\n }\n\n // Draw Ox, Oy and Oz axes and ticks\n {\n Vec3f a = { face_sign[1] > 0.0f ? min_p(0) : max_p(0), face_sign[2] > 0.0f ? max_p(1) : min_p(1), face_sign[0] > 0.0f ? max_p(2) : min_p(2) };\n Vec3f b = { face_sign[1] > 0.0f ? min_p(0) : max_p(0), face_sign[2] > 0.0f ? max_p(1) : min_p(1), face_sign[0] > 0.0f ? min_p(2) : max_p(2) };\n Vec3f c = { face_sign[1] > 0.0f ? max_p(0) : min_p(0), face_sign[2] > 0.0f ? max_p(1) : min_p(1), face_sign[0] > 0.0f ? min_p(2) : max_p(2) };\n Vec3f d = { face_sign[1] > 0.0f ? max_p(0) : min_p(0), face_sign[2] > 0.0f ? min_p(1) : max_p(1), face_sign[0] > 0.0f ? min_p(2) : max_p(2) };\n\n float data[] = {\n a(0), a(1), a(2), 0.0f, 0.0f, 0.8f,\n b(0), b(1), b(2), 0.0f, 0.0f, 0.8f,\n b(0), b(1), b(2), 0.8f, 0.0f, 0.0f,\n c(0), c(1), c(2), 0.8f, 0.0f, 0.0f,\n c(0), c(1), c(2), 0.0f, 0.8f, 0.0f,\n d(0), d(1), d(2), 0.0f, 0.8f, 0.0f,\n };\n\n points.push_back(Mat(6, 6, CV_32F, data));\n\n float x = (floor(min_p(0) / tick_step) + 1.0f) * tick_step;\n for (; x < max_p(0); x += tick_step)\n {\n Vec3f a, b, c;\n a(0) = b(0) = x;\n a(1) = b(1) = face_sign[2] > 0.0f ? max_p(1) : min_p(1);\n a(2) = face_sign[0] > 0.0f ? min_p(2) : max_p(2);\n b(2) = a(2) - face_sign[0] * 0.03f * scale * view.getDistance();\n\n float line[] = {\n a(0), a(1), a(2), 0.8f, 0.0f, 0.0f,\n b(0), b(1), b(2), 0.8f, 0.0f, 0.0f,\n };\n\n points.push_back(Mat(2, 6, CV_32F, line));\n }\n\n float y = (floor(min_p(1) / tick_step) + 1.0f) * tick_step;\n for (; y < max_p(1); y += tick_step)\n {\n Vec3f a, b;\n a(0) = b(0) = face_sign[1] > 0.0f ? max_p(0) : min_p(0);\n a(1) = b(1) = y;\n a(2) = face_sign[0] > 0.0f ? min_p(2) : max_p(2);\n b(2) = a(2) - face_sign[0] * 0.03f * scale * view.getDistance();\n\n float line[] = {\n a(0), a(1), a(2), 0.0f, 0.8f, 0.0f,\n b(0), b(1), b(2), 0.0f, 0.8f, 0.0f,\n };\n\n points.push_back(Mat(2, 6, CV_32F, line));\n }\n\n float z = (floor(min_p(2) / tick_step) + 1.0f) * tick_step;\n for (; z < max_p(2); z += tick_step)\n {\n Vec3f a, b;\n a(0) = face_sign[1] > 0.0f ? min_p(0) : max_p(0);\n b(0) = a(0) - face_sign[1] * 0.03f * scale * view.getDistance();\n a(1) = b(1) = face_sign[2] > 0.0f ? max_p(1) : min_p(1);\n a(2) = b(2) = z;\n\n float line[] = {\n a(0), a(1), a(2), 0.0f, 0.0f, 0.8f,\n b(0), b(1), b(2), 0.0f, 0.0f, 0.8f,\n };\n\n points.push_back(Mat(2, 6, CV_32F, line));\n }\n }\n\n return points;\n}\n\nvoid cv::viz3d::Window::setGridVisible(bool visible)\n{\n if (visible)\n {\n this->grid = new Lines(Mat(4096, 6, CV_32F), 0);\n this->grid->setShader(this->shaders[this->grid->getShaderName()]);\n }\n else if (this->grid != nullptr)\n {\n delete this->grid;\n this->grid = nullptr;\n }\n}\n\nvoid cv::viz3d::Window::draw()\n{\n Rect rect = getWindowImageRect(this->name);\n float aspect = (float)rect.width / (float)rect.height;\n this->view.setAspect(aspect);\n\n ogl::enable(ogl::DEPTH_TEST);\n ogl::clearColor({ double(this->sky_color(0) * 255.0f), double(this->sky_color(1) * 255.0f), double(this->sky_color(2) * 255.0f), 255.0 });\n\n if (this->grid)\n {\n Mat labels;\n static_cast(this->grid)->update(getGridVertices(this->view));\n this->grid->draw(this->view, this->sun);\n }\n else\n {\n this->crosshair->setPosition(this->view.getOrigin());\n this->crosshair->draw(this->view, this->sun);\n }\n\n for (auto& obj : this->objects)\n obj.second->draw(this->view, this->sun);\n}\n\nvoid cv::viz3d::Window::onMouse(int event, int x, int y, int flags)\n{\n if (event == EVENT_LBUTTONDOWN || event == EVENT_RBUTTONDOWN)\n {\n this->l_mouse_x = x;\n this->l_mouse_y = y;\n }\n else if (event == EVENT_MOUSEMOVE && (flags & EVENT_FLAG_LBUTTON))\n {\n this->view.rotate(x - this->l_mouse_x, y - this->l_mouse_y);\n updateWindow(this->name);\n this->l_mouse_x = x;\n this->l_mouse_y = y;\n }\n else if (event == EVENT_MOUSEMOVE && (flags & EVENT_FLAG_RBUTTON))\n {\n this->view.move(x - this->l_mouse_x, y - this->l_mouse_y);\n updateWindow(this->name);\n this->l_mouse_x = x;\n this->l_mouse_y = y;\n }\n else if (event == EVENT_MOUSEWHEEL)\n {\n this->view.scaleDistance(min(1.2f, max(0.8f, 1.0f - getMouseWheelDelta(flags) / 12.0f)));\n updateWindow(this->name);\n }\n}\n\ncv::viz3d::Object::Object()\n{\n\tthis->position = { 0.0f, 0.0f, 0.0f };\n\tthis->rotation = { 0.0f, 0.0f, 0.0f };\n\tthis->model = Matx44f::eye();\n}\n\nvoid cv::viz3d::Object::setPosition(const Vec3f& position)\n{\n this->position = position;\n this->updateModel();\n}\n\nvoid cv::viz3d::Object::setRotation(const Vec3f& rotation)\n{\n this->rotation = rotation;\n this->updateModel();\n}\n\nvoid cv::viz3d::Object::updateModel()\n{\n // Calculate rotation matrices\n Matx44f rot_a = Matx44f::eye();\n rot_a(0, 0) = ::cos(this->rotation(0));\n rot_a(1, 0) = ::sin(this->rotation(0));\n rot_a(0, 1) = -::sin(this->rotation(0));\n rot_a(1, 1) = ::cos(this->rotation(0));\n\n Matx44f rot_b = Matx44f::eye();\n rot_b(1, 1) = ::cos(this->rotation(1));\n rot_b(2, 1) = ::sin(this->rotation(1));\n rot_b(1, 2) = -::sin(this->rotation(1));\n rot_b(2, 2) = ::cos(this->rotation(1));\n\n Matx44f rot_c = Matx44f::eye();\n rot_c(0, 0) = ::cos(this->rotation(2));\n rot_c(2, 0) = ::sin(this->rotation(2));\n rot_c(0, 2) = -::sin(this->rotation(2));\n rot_c(2, 2) = ::cos(this->rotation(2));\n\n // Calculate translation matrix\n Matx44f trans = Matx44f::eye();\n trans(3, 0) = this->position(0);\n trans(3, 1) = this->position(1);\n trans(3, 2) = this->position(2);\n\n // Multiply matrices\n this->model = rot_c * rot_b * rot_a * trans;\n}\n\ncv::viz3d::Mesh::Mesh(InputArray verts, InputArray indices)\n{\n // Check parameter validity\n CV_Assert(verts.channels() == 1 && verts.dims() == 2 && (verts.size().width == 3 || verts.size().width == 6 || verts.size().width == 9));\n CV_Assert(verts.depth() == CV_32F);\n CV_Assert(indices.channels() == 1 && indices.dims() == 2 && indices.size().width == 3);\n CV_Assert(indices.depth() == CV_8U || indices.depth() == CV_16U || indices.depth() == CV_32S);\n\n // Prepare buffers\n if (verts.kind() == _InputArray::OPENGL_BUFFER)\n this->verts = verts.getOGlBuffer();\n else\n this->verts.copyFrom(verts, ogl::Buffer::ARRAY_BUFFER);\n\n if (indices.kind() == _InputArray::OPENGL_BUFFER)\n this->indices = indices.getOGlBuffer();\n else\n this->indices.copyFrom(indices, ogl::Buffer::ELEMENT_ARRAY_BUFFER);\n\n switch (indices.depth())\n {\n case CV_8U:\n this->index_type = ogl::UNSIGNED_BYTE;\n break;\n case CV_16U:\n this->index_type = ogl::UNSIGNED_SHORT;\n break;\n case CV_32S:\n this->index_type = ogl::UNSIGNED_INT;\n break;\n }\n\n // Prepare vertex array\n if (verts.size().width == 3)\n {\n this->va.create({\n ogl::Attribute(", - "repo_full_name": "opencv/opencv", - "discussion_comments": [ - { - "comment_id": "724533336", - "repo_full_name": "opencv/opencv", - "pr_number": 20371, - "pr_file": "modules/highgui/src/viz3d/viz3d.cpp", - "discussion_id": "724533336", - "commented_code": "@@ -0,0 +1,1671 @@\n+// This file is part of OpenCV project.\n+// It is subject to the license terms in the LICENSE file found in the top-level directory\n+// of this distribution and at http://opencv.org/license.html.\n+\n+#include \"../precomp.hpp\"\n+#include \"viz3d_private.hpp\"\n+#include \"opencv2/core/utils/logger.hpp\"\n+#include \"opencv2/imgproc.hpp\"\n+\n+using namespace cv;\n+using namespace cv::viz3d;\n+\n+#ifdef HAVE_OPENGL\n+\n+static void openGlDrawCallback(void* data)\n+{\n+ Window* win = static_cast(data);\n+ if (win != nullptr)\n+ win->draw();\n+}\n+\n+static void openGlFreeCallback(void* data)\n+{\n+ Window* win = static_cast(data);\n+ if (win != nullptr)\n+ delete win;\n+}\n+\n+static void mouseCallback(int event, int x, int y, int flags, void* data)\n+{\n+ Window* win = static_cast(data);\n+ if (win != nullptr)\n+ win->onMouse(event, x, y, flags);\n+}\n+\n+static Window* getWindow(const String& win_name)\n+{\n+ namedWindow(win_name, WINDOW_OPENGL);\n+ const double useGl = getWindowProperty(win_name, WND_PROP_OPENGL);\n+ if (useGl <= 0)\n+ CV_Error(cv::Error::StsBadArg, \"OpenCV/UI: viz3d can't be used because the window was created without an OpenGL context\");\n+\n+ Window* win = static_cast(getOpenGlUserData(win_name));\n+\n+ if (!win)\n+ {\n+\t\tsetOpenGlContext(win_name);\n+\t\twin = new Window(win_name);\n+\t\tsetOpenGlDrawCallback(win_name, &openGlDrawCallback, win);\n+ setOpenGlFreeCallback(win_name, &openGlFreeCallback);\n+ setMouseCallback(win_name, &mouseCallback, win);\n+ }\n+ else\n+ {\n+ auto callback = getOpenGlDrawCallback(win_name);\n+ if (callback != nullptr && callback != openGlDrawCallback)\n+ CV_Error(cv::Error::StsBadArg, \"OpenCV/UI: viz3d can't be used because the OpenGL callback is already being used on this window\");\n+ }\n+\n+ return win;\n+}\n+\n+#endif // HAVE_OPENGL\n+\n+void cv::viz3d::setPerspective(const String& win_name, float fov, float z_near, float z_far)\n+{\n+ CV_TRACE_FUNCTION();\n+#ifndef HAVE_OPENGL\n+ CV_UNUSED(win_name);\n+ CV_UNUSED(fov);\n+ CV_UNUSED(z_near);\n+ CV_UNUSED(z_far);\n+ CV_Error(cv::Error::OpenGlNotSupported, \"The library is compiled without OpenGL support\");\n+#else\n+ Window* win = getWindow(win_name);\n+ win->getView().setPerspective(fov, z_near, z_far);\n+ updateWindow(win_name);\n+#endif\n+}\n+\n+void cv::viz3d::setGridVisible(const String& win_name, bool visible)\n+{\n+ CV_TRACE_FUNCTION();\n+#ifndef HAVE_OPENGL\n+ CV_UNUSED(win_name);\n+ CV_UNUSED(visible);\n+ CV_Error(cv::Error::OpenGlNotSupported, \"The library is compiled without OpenGL support\");\n+#else\n+ Window* win = getWindow(win_name);\n+ win->setGridVisible(visible);\n+ updateWindow(win_name);\n+#endif\n+}\n+\n+void cv::viz3d::setSun(const String& win_name, const Vec3f& direction, const Vec3f& ambient, const Vec3f& diffuse)\n+{\n+ CV_TRACE_FUNCTION();\n+#ifndef HAVE_OPENGL\n+ CV_UNUSED(win_name);\n+ CV_UNUSED(direction);\n+ CV_UNUSED(ambient);\n+ CV_UNUSED(diffuse);\n+ CV_Error(cv::Error::OpenGlNotSupported, \"The library is compiled without OpenGL support\");\n+#else\n+ Window* win = getWindow(win_name);\n+ win->setSun(direction, ambient, diffuse);\n+ updateWindow(win_name);\n+#endif\n+}\n+\n+void cv::viz3d::setSky(const String& win_name, const Vec3f& color)\n+{\n+ CV_TRACE_FUNCTION();\n+#ifndef HAVE_OPENGL\n+ CV_UNUSED(win_name);\n+ CV_UNUSED(color);\n+ CV_Error(cv::Error::OpenGlNotSupported, \"The library is compiled without OpenGL support\");\n+#else\n+ Window* win = getWindow(win_name);\n+ win->setSky(color);\n+ updateWindow(win_name);\n+#endif\n+}\n+\n+void cv::viz3d::showBox(const String& win_name, const String& obj_name, const Vec3f& size, const Vec3f& color, RenderMode mode)\n+{\n+ CV_TRACE_FUNCTION();\n+#ifndef HAVE_OPENGL\n+ CV_UNUSED(win_name);\n+ CV_UNUSED(obj_name);\n+ CV_UNUSED(size);\n+ CV_UNUSED(color);\n+ CV_UNUSED(mode);\n+ CV_Error(cv::Error::OpenGlNotSupported, \"The library is compiled without OpenGL support\");\n+#else\n+ if (mode == RENDER_WIREFRAME)\n+ {\n+ float points_data[] = {\n+ -size(0), -size(1), -size(2), color(0), color(1), color(2),\n+ +size(0), -size(1), -size(2), color(0), color(1), color(2),\n+ -size(0), +size(1), -size(2), color(0), color(1), color(2),\n+ +size(0), +size(1), -size(2), color(0), color(1), color(2),\n+ -size(0), -size(1), +size(2), color(0), color(1), color(2),\n+ +size(0), -size(1), +size(2), color(0), color(1), color(2),\n+ -size(0), +size(1), +size(2), color(0), color(1), color(2),\n+ +size(0), +size(1), +size(2), color(0), color(1), color(2),\n+\n+ -size(0), -size(1), -size(2), color(0), color(1), color(2),\n+ -size(0), +size(1), -size(2), color(0), color(1), color(2),\n+ +size(0), -size(1), -size(2), color(0), color(1), color(2),\n+ +size(0), +size(1), -size(2), color(0), color(1), color(2),\n+ -size(0), -size(1), +size(2), color(0), color(1), color(2),\n+ -size(0), +size(1), +size(2), color(0), color(1), color(2),\n+ +size(0), -size(1), +size(2), color(0), color(1), color(2),\n+ +size(0), +size(1), +size(2), color(0), color(1), color(2),\n+\n+ -size(0), -size(1), -size(2), color(0), color(1), color(2),\n+ -size(0), -size(1), +size(2), color(0), color(1), color(2),\n+ +size(0), -size(1), -size(2), color(0), color(1), color(2),\n+ +size(0), -size(1), +size(2), color(0), color(1), color(2),\n+ -size(0), +size(1), -size(2), color(0), color(1), color(2),\n+ -size(0), +size(1), +size(2), color(0), color(1), color(2),\n+ +size(0), +size(1), -size(2), color(0), color(1), color(2),\n+ +size(0), +size(1), +size(2), color(0), color(1), color(2),\n+ };\n+\n+ const Mat points_mat = Mat(Size(6, 24), CV_32F, &points_data);\n+\n+ showLines(win_name, obj_name, points_mat);\n+ }\n+ else if (mode == RENDER_SIMPLE)\n+ {\n+ float verts_data[] = {\n+ -size(0), -size(1), -size(2), color(0), color(1), color(2),\n+ +size(0), -size(1), -size(2), color(0), color(1), color(2),\n+ +size(0), +size(1), -size(2), color(0), color(1), color(2),\n+ -size(0), +size(1), -size(2), color(0), color(1), color(2),\n+ -size(0), -size(1), +size(2), color(0), color(1), color(2),\n+ +size(0), -size(1), +size(2), color(0), color(1), color(2),\n+ +size(0), +size(1), +size(2), color(0), color(1), color(2),\n+ -size(0), +size(1), +size(2), color(0), color(1), color(2),\n+ };\n+\n+ static unsigned char indices_data[] = {\n+ 0, 1, 2,\n+ 2, 3, 0,\n+ 4, 5, 6,\n+ 6, 7, 4,\n+ 0, 1, 5,\n+ 5, 4, 0,\n+ 3, 2, 6,\n+ 6, 7, 3,\n+ 0, 3, 7,\n+ 7, 4, 0,\n+ 1, 2, 6,\n+ 6, 5, 1,\n+ };\n+\n+ const Mat verts_mat = Mat(Size(6, 8), CV_32F, &verts_data);\n+ const Mat indices_mat = Mat(Size(3, 12), CV_8U, &indices_data);\n+\n+ showMesh(win_name, obj_name, verts_mat, indices_mat);\n+ }\n+ else if (mode == RENDER_SHADING)\n+ {\n+ float verts_data[] = {\n+ -size(0), -size(1), -size(2), color(0), color(1), color(2), 0.0f, 0.0f, -1.0f, // 0\n+ +size(0), -size(1), -size(2), color(0), color(1), color(2), 0.0f, 0.0f, -1.0f, // 1\n+ +size(0), +size(1), -size(2), color(0), color(1), color(2), 0.0f, 0.0f, -1.0f, // 2\n+ +size(0), +size(1), -size(2), color(0), color(1), color(2), 0.0f, 0.0f, -1.0f, // 2\n+ -size(0), +size(1), -size(2), color(0), color(1), color(2), 0.0f, 0.0f, -1.0f, // 3\n+ -size(0), -size(1), -size(2), color(0), color(1), color(2), 0.0f, 0.0f, -1.0f, // 0\n+\n+ -size(0), -size(1), +size(2), color(0), color(1), color(2), 0.0f, 0.0f, +1.0f, // 4\n+ +size(0), -size(1), +size(2), color(0), color(1), color(2), 0.0f, 0.0f, +1.0f, // 5\n+ +size(0), +size(1), +size(2), color(0), color(1), color(2), 0.0f, 0.0f, +1.0f, // 6\n+ +size(0), +size(1), +size(2), color(0), color(1), color(2), 0.0f, 0.0f, +1.0f, // 6\n+ -size(0), +size(1), +size(2), color(0), color(1), color(2), 0.0f, 0.0f, +1.0f, // 7\n+ -size(0), -size(1), +size(2), color(0), color(1), color(2), 0.0f, 0.0f, +1.0f, // 4\n+\n+\n+ -size(0), -size(1), -size(2), color(0), color(1), color(2), 0.0f, -1.0f, 0.0f, // 0\n+ +size(0), -size(1), -size(2), color(0), color(1), color(2), 0.0f, -1.0f, 0.0f, // 1\n+ +size(0), -size(1), +size(2), color(0), color(1), color(2), 0.0f, -1.0f, 0.0f, // 5\n+ +size(0), -size(1), +size(2), color(0), color(1), color(2), 0.0f, -1.0f, 0.0f, // 5\n+ -size(0), -size(1), +size(2), color(0), color(1), color(2), 0.0f, -1.0f, 0.0f, // 4\n+ -size(0), -size(1), -size(2), color(0), color(1), color(2), 0.0f, -1.0f, 0.0f, // 0\n+\n+ -size(0), +size(1), -size(2), color(0), color(1), color(2), 0.0f, +1.0f, 0.0f, // 3\n+ +size(0), +size(1), -size(2), color(0), color(1), color(2), 0.0f, +1.0f, 0.0f, // 2\n+ +size(0), +size(1), +size(2), color(0), color(1), color(2), 0.0f, +1.0f, 0.0f, // 6\n+ +size(0), +size(1), +size(2), color(0), color(1), color(2), 0.0f, +1.0f, 0.0f, // 6\n+ -size(0), +size(1), +size(2), color(0), color(1), color(2), 0.0f, +1.0f, 0.0f, // 7\n+ -size(0), +size(1), -size(2), color(0), color(1), color(2), 0.0f, +1.0f, 0.0f, // 3\n+\n+\n+ -size(0), -size(1), -size(2), color(0), color(1), color(2), -1.0f, 0.0f, 0.0f, // 0\n+ -size(0), +size(1), -size(2), color(0), color(1), color(2), -1.0f, 0.0f, 0.0f, // 3\n+ -size(0), +size(1), +size(2), color(0), color(1), color(2), -1.0f, 0.0f, 0.0f, // 7\n+ -size(0), +size(1), +size(2), color(0), color(1), color(2), -1.0f, 0.0f, 0.0f, // 7\n+ -size(0), -size(1), +size(2), color(0), color(1), color(2), -1.0f, 0.0f, 0.0f, // 4\n+ -size(0), -size(1), -size(2), color(0), color(1), color(2), -1.0f, 0.0f, 0.0f, // 0\n+\n+ +size(0), -size(1), -size(2), color(0), color(1), color(2), +1.0f, 0.0f, 0.0f, // 1\n+ +size(0), +size(1), -size(2), color(0), color(1), color(2), +1.0f, 0.0f, 0.0f, // 2\n+ +size(0), +size(1), +size(2), color(0), color(1), color(2), +1.0f, 0.0f, 0.0f, // 6\n+ +size(0), +size(1), +size(2), color(0), color(1), color(2), +1.0f, 0.0f, 0.0f, // 6\n+ +size(0), -size(1), +size(2), color(0), color(1), color(2), +1.0f, 0.0f, 0.0f, // 5\n+ +size(0), -size(1), -size(2), color(0), color(1), color(2), +1.0f, 0.0f, 0.0f, // 1\n+ };\n+\n+ const Mat verts_mat = Mat(Size(9, 36), CV_32F, &verts_data);\n+\n+ showMesh(win_name, obj_name, verts_mat);\n+ }\n+#endif\n+}\n+\n+void cv::viz3d::showPlane(const String& win_name, const String& obj_name, const Vec2f& size, const Vec3f& color, RenderMode mode)\n+{\n+ CV_TRACE_FUNCTION();\n+#ifndef HAVE_OPENGL\n+ CV_UNUSED(win_name);\n+ CV_UNUSED(obj_name);\n+ CV_UNUSED(size);\n+ CV_UNUSED(color);\n+ CV_UNUSED(mode);\n+ CV_Error(cv::Error::OpenGlNotSupported, \"The library is compiled without OpenGL support\");\n+#else\n+ if (mode == RENDER_WIREFRAME)\n+ {\n+ float points_data[] = {\n+ -size(0), 0.0f, -size(1), color(0), color(1), color(2),\n+ +size(0), 0.0f, -size(1), color(0), color(1), color(2),\n+ +size(0), 0.0f, -size(1), color(0), color(1), color(2),\n+ +size(0), 0.0f, +size(1), color(0), color(1), color(2),\n+ +size(0), 0.0f, +size(1), color(0), color(1), color(2),\n+ -size(0), 0.0f, +size(1), color(0), color(1), color(2),\n+ -size(0), 0.0f, +size(1), color(0), color(1), color(2),\n+ -size(0), 0.0f, -size(1), color(0), color(1), color(2),\n+ };\n+\n+ const Mat points_mat = Mat(Size(6, 8), CV_32F, points_data);\n+\n+ showLines(win_name, obj_name, points_mat);\n+ }\n+ else if (mode == RENDER_SIMPLE)\n+ {\n+ float verts_data[] = {\n+ -size(0), 0.0f, -size(1), color(0), color(1), color(2),\n+ +size(0), 0.0f, -size(1), color(0), color(1), color(2),\n+ +size(0), 0.0f, +size(1), color(0), color(1), color(2),\n+ -size(0), 0.0f, -size(1), color(0), color(1), color(2),\n+ -size(0), 0.0f, +size(1), color(0), color(1), color(2),\n+ +size(0), 0.0f, +size(1), color(0), color(1), color(2),\n+ };\n+\n+ const Mat verts_mat = Mat(Size(6, 6), CV_32F, verts_data);\n+\n+ showMesh(win_name, obj_name, verts_mat);\n+ }\n+ else if (mode == RENDER_SHADING)\n+ {\n+ float verts_data[] = {\n+ -size(0), 0.0f, -size(1), color(0), color(1), color(2), 0.0f, 1.0f, 0.0f,\n+ +size(0), 0.0f, -size(1), color(0), color(1), color(2), 0.0f, 1.0f, 0.0f,\n+ +size(0), 0.0f, +size(1), color(0), color(1), color(2), 0.0f, 1.0f, 0.0f,\n+ -size(0), 0.0f, -size(1), color(0), color(1), color(2), 0.0f, 1.0f, 0.0f,\n+ -size(0), 0.0f, +size(1), color(0), color(1), color(2), 0.0f, 1.0f, 0.0f,\n+ +size(0), 0.0f, +size(1), color(0), color(1), color(2), 0.0f, 1.0f, 0.0f,\n+ };\n+\n+ const Mat verts_mat = Mat(Size(9, 6), CV_32F, verts_data);\n+\n+ showMesh(win_name, obj_name, verts_mat);\n+ }\n+#endif\n+}\n+\n+void cv::viz3d::showSphere(const String& win_name, const String& obj_name, float radius, const Vec3f& color, RenderMode mode, int divs)\n+{\n+ CV_TRACE_FUNCTION();\n+#ifndef HAVE_OPENGL\n+ CV_UNUSED(win_name);\n+ CV_UNUSED(obj_name);\n+ CV_UNUSED(radius);\n+ CV_UNUSED(color);\n+ CV_UNUSED(mode);\n+ CV_UNUSED(divs);\n+ CV_Error(cv::Error::OpenGlNotSupported, \"The library is compiled without OpenGL support\");\n+#else\n+ CV_Assert(divs >= 1);\n+\n+ if (mode == RENDER_WIREFRAME)\n+ {\n+ const float limit = 2.0f * 3.1415f;\n+ std::vector points_data;\n+\n+ for (int e = 0; e < 3; ++e)\n+ {\n+ auto ex = Vec3f::zeros();\n+ auto ey = Vec3f::zeros();\n+ ex((e + 0) % 3) = radius;\n+ ey((e + 1) % 3) = radius;\n+\n+ for (float t = 0.0f, n; t < limit;)\n+ {\n+ n = t + limit / (divs * 4);\n+ points_data.insert(points_data.end(), {\n+ ex(0) * cos(t) + ey(0) * sin(t), ex(1) * cos(t) + ey(1) * sin(t), ex(2) * cos(t) + ey(2) * sin(t), color(0), color(1), color(2),\n+ ex(0) * cos(n) + ey(0) * sin(n), ex(1) * cos(n) + ey(1) * sin(n), ex(2) * cos(n) + ey(2) * sin(n), color(0), color(1), color(2),\n+ });\n+ t = n;\n+ }\n+ }\n+\n+ const Mat points_mat = Mat(Size(6, points_data.size() / 6), CV_32F, points_data.data());\n+\n+ showLines(win_name, obj_name, points_mat);\n+ }\n+ else\n+ {\n+ std::vector verts_data;\n+\n+ for (int s = -1; s <= 1; s += 2)\n+ for (int e = 0; e < 3; ++e)\n+ {\n+ auto ex = Vec3f::zeros();\n+ auto ey = Vec3f::zeros();\n+ auto pz = Vec3f::zeros();\n+ ex((e + 1) % 3) = 1.0f / divs;\n+ ey((e + 2) % 3) = 1.0f / divs;\n+ pz(e) = s;\n+\n+ for (int x = -divs; x < divs; ++x)\n+ for (int y = -divs; y < divs; ++y)\n+ if (mode == RENDER_SIMPLE)\n+ verts_data.insert(verts_data.end(), {\n+ ex(0) * (x + 0) + ey(0) * (y + 0) + pz(0), ex(1) * (x + 0) + ey(1) * (y + 0) + pz(1), ex(2) * (x + 0) + ey(2) * (y + 0) + pz(2), color(0), color(1), color(2),\n+ ex(0) * (x + 1) + ey(0) * (y + 0) + pz(0), ex(1) * (x + 1) + ey(1) * (y + 0) + pz(1), ex(2) * (x + 1) + ey(2) * (y + 0) + pz(2), color(0), color(1), color(2),\n+ ex(0) * (x + 1) + ey(0) * (y + 1) + pz(0), ex(1) * (x + 1) + ey(1) * (y + 1) + pz(1), ex(2) * (x + 1) + ey(2) * (y + 1) + pz(2), color(0), color(1), color(2),\n+ ex(0) * (x + 1) + ey(0) * (y + 1) + pz(0), ex(1) * (x + 1) + ey(1) * (y + 1) + pz(1), ex(2) * (x + 1) + ey(2) * (y + 1) + pz(2), color(0), color(1), color(2),\n+ ex(0) * (x + 0) + ey(0) * (y + 1) + pz(0), ex(1) * (x + 0) + ey(1) * (y + 1) + pz(1), ex(2) * (x + 0) + ey(2) * (y + 1) + pz(2), color(0), color(1), color(2),\n+ ex(0) * (x + 0) + ey(0) * (y + 0) + pz(0), ex(1) * (x + 0) + ey(1) * (y + 0) + pz(1), ex(2) * (x + 0) + ey(2) * (y + 0) + pz(2), color(0), color(1), color(2),\n+ });\n+ else\n+ verts_data.insert(verts_data.end(), {\n+ ex(0) * (x + 0) + ey(0) * (y + 0) + pz(0), ex(1) * (x + 0) + ey(1) * (y + 0) + pz(1), ex(2) * (x + 0) + ey(2) * (y + 0) + pz(2), color(0), color(1), color(2), 0.0f, 0.0f, 0.0f,\n+ ex(0) * (x + 1) + ey(0) * (y + 0) + pz(0), ex(1) * (x + 1) + ey(1) * (y + 0) + pz(1), ex(2) * (x + 1) + ey(2) * (y + 0) + pz(2), color(0), color(1), color(2), 0.0f, 0.0f, 0.0f,\n+ ex(0) * (x + 1) + ey(0) * (y + 1) + pz(0), ex(1) * (x + 1) + ey(1) * (y + 1) + pz(1), ex(2) * (x + 1) + ey(2) * (y + 1) + pz(2), color(0), color(1), color(2), 0.0f, 0.0f, 0.0f,\n+ ex(0) * (x + 1) + ey(0) * (y + 1) + pz(0), ex(1) * (x + 1) + ey(1) * (y + 1) + pz(1), ex(2) * (x + 1) + ey(2) * (y + 1) + pz(2), color(0), color(1), color(2), 0.0f, 0.0f, 0.0f,\n+ ex(0) * (x + 0) + ey(0) * (y + 1) + pz(0), ex(1) * (x + 0) + ey(1) * (y + 1) + pz(1), ex(2) * (x + 0) + ey(2) * (y + 1) + pz(2), color(0), color(1), color(2), 0.0f, 0.0f, 0.0f,\n+ ex(0) * (x + 0) + ey(0) * (y + 0) + pz(0), ex(1) * (x + 0) + ey(1) * (y + 0) + pz(1), ex(2) * (x + 0) + ey(2) * (y + 0) + pz(2), color(0), color(1), color(2), 0.0f, 0.0f, 0.0f,\n+ });\n+ }\n+\n+ if (mode == RENDER_SIMPLE)\n+ {\n+ for (int i = 0; i < verts_data.size() / 6; ++i)\n+ {\n+ float l = sqrtf(verts_data[6 * i + 0] * verts_data[6 * i + 0] + verts_data[6 * i + 1] * verts_data[6 * i + 1] + verts_data[6 * i + 2] * verts_data[6 * i + 2]);\n+ verts_data[6 * i + 0] *= radius / l;\n+ verts_data[6 * i + 1] *= radius / l;\n+ verts_data[6 * i + 2] *= radius / l;\n+ }\n+\n+ const Mat verts_mat = Mat(Size(6, verts_data.size() / 6), CV_32F, verts_data.data());\n+ showMesh(win_name, obj_name, verts_mat);\n+ }\n+ else\n+ {\n+ for (int i = 0; i < verts_data.size() / 9; ++i)\n+ {\n+ float l = sqrtf(verts_data[9 * i + 0] * verts_data[9 * i + 0] + verts_data[9 * i + 1] * verts_data[9 * i + 1] + verts_data[9 * i + 2] * verts_data[9 * i + 2]);\n+ verts_data[9 * i + 6] = verts_data[9 * i + 0] / l;\n+ verts_data[9 * i + 7] = verts_data[9 * i + 1] / l;\n+ verts_data[9 * i + 8] = verts_data[9 * i + 2] / l;\n+ verts_data[9 * i + 0] *= radius / l;\n+ verts_data[9 * i + 1] *= radius / l;\n+ verts_data[9 * i + 2] *= radius / l;\n+ }\n+\n+ const Mat verts_mat = Mat(Size(9, verts_data.size() / 9), CV_32F, verts_data.data());\n+ showMesh(win_name, obj_name, verts_mat);\n+ }\n+ }\n+#endif\n+}\n+\n+void cv::viz3d::showCameraTrajectory(\n+ const String& win_name, const String& obj_name, InputArray trajectory,\n+ float aspect, float scale, Vec3f frustum_color, Vec3f line_color)\n+{\n+ CV_TRACE_FUNCTION();\n+#ifndef HAVE_OPENGL\n+ CV_UNUSED(win_name);\n+ CV_UNUSED(obj_name);\n+ CV_UNUSED(trajectory);\n+ CV_UNUSED(aspect);\n+ CV_UNUSED(scale);\n+ CV_UNUSED(frustum_color);\n+ CV_UNUSED(line_color);\n+ CV_Error(cv::Error::OpenGlNotSupported, \"The library is compiled without OpenGL support\");\n+#else\n+ CV_Assert(trajectory.dims() == 2 && trajectory.cols() == 6 && trajectory.depth() == CV_32F);\n+\n+ auto data = trajectory.getMat();\n+ std::vector points_data;\n+\n+ // Add frustums\n+ for (int i = 0; i < data.rows; ++i)\n+ {\n+ Vec3f position = { data.at(i, 0), data.at(i, 1), data.at(i, 2) };\n+ Vec3f forward = normalize(Vec3f { data.at(i, 3), data.at(i, 4), data.at(i, 5) });\n+ Vec3f world_up = { 0.0f, 1.0f, 0.0f };\n+ Vec3f right = forward.cross(world_up);\n+ Vec3f up = forward.cross(right);\n+\n+ Vec3f back_f[4] = {\n+ position + (-right * aspect - up) * scale,\n+ position + ( right * aspect - up) * scale,\n+ position + ( right * aspect + up) * scale,\n+ position + (-right * aspect + up) * scale,\n+ };\n+\n+ Vec3f front_f[4] = {\n+ position + (-right * aspect - up) * scale * 1.5f + forward * scale * 2.0f,\n+ position + (right * aspect - up) * scale * 1.5f + forward * scale * 2.0f,\n+ position + (right * aspect + up) * scale * 1.5f + forward * scale * 2.0f,\n+ position + (-right * aspect + up) * scale * 1.5f + forward * scale * 2.0f,\n+ };\n+\n+ points_data.insert(points_data.end(), {\n+ back_f[0](0), back_f[0](1), back_f[0](2), frustum_color(0), frustum_color(1), frustum_color(2),\n+ back_f[1](0), back_f[1](1), back_f[1](2), frustum_color(0), frustum_color(1), frustum_color(2),\n+ back_f[1](0), back_f[1](1), back_f[1](2), frustum_color(0), frustum_color(1), frustum_color(2),\n+ back_f[2](0), back_f[2](1), back_f[2](2), frustum_color(0), frustum_color(1), frustum_color(2),\n+ back_f[2](0), back_f[2](1), back_f[2](2), frustum_color(0), frustum_color(1), frustum_color(2),\n+ back_f[3](0), back_f[3](1), back_f[3](2), frustum_color(0), frustum_color(1), frustum_color(2),\n+ back_f[3](0), back_f[3](1), back_f[3](2), frustum_color(0), frustum_color(1), frustum_color(2),\n+ back_f[0](0), back_f[0](1), back_f[0](2), frustum_color(0), frustum_color(1), frustum_color(2),\n+\n+ front_f[0](0), front_f[0](1), front_f[0](2), frustum_color(0), frustum_color(1), frustum_color(2),\n+ front_f[1](0), front_f[1](1), front_f[1](2), frustum_color(0), frustum_color(1), frustum_color(2),\n+ front_f[1](0), front_f[1](1), front_f[1](2), frustum_color(0), frustum_color(1), frustum_color(2),\n+ front_f[2](0), front_f[2](1), front_f[2](2), frustum_color(0), frustum_color(1), frustum_color(2),\n+ front_f[2](0), front_f[2](1), front_f[2](2), frustum_color(0), frustum_color(1), frustum_color(2),\n+ front_f[3](0), front_f[3](1), front_f[3](2), frustum_color(0), frustum_color(1), frustum_color(2),\n+ front_f[3](0), front_f[3](1), front_f[3](2), frustum_color(0), frustum_color(1), frustum_color(2),\n+ front_f[0](0), front_f[0](1), front_f[0](2), frustum_color(0), frustum_color(1), frustum_color(2),\n+\n+ back_f[0](0), back_f[0](1), back_f[0](2), frustum_color(0), frustum_color(1), frustum_color(2),\n+ front_f[0](0), front_f[0](1), front_f[0](2), frustum_color(0), frustum_color(1), frustum_color(2),\n+ back_f[1](0), back_f[1](1), back_f[1](2), frustum_color(0), frustum_color(1), frustum_color(2),\n+ front_f[1](0), front_f[1](1), front_f[1](2), frustum_color(0), frustum_color(1), frustum_color(2),\n+ back_f[2](0), back_f[2](1), back_f[2](2), frustum_color(0), frustum_color(1), frustum_color(2),\n+ front_f[2](0), front_f[2](1), front_f[2](2), frustum_color(0), frustum_color(1), frustum_color(2),\n+ back_f[3](0), back_f[3](1), back_f[3](2), frustum_color(0), frustum_color(1), frustum_color(2),\n+ front_f[3](0), front_f[3](1), front_f[3](2), frustum_color(0), frustum_color(1), frustum_color(2),\n+ });\n+\n+ }\n+\n+ // Add trajectory line\n+ for (int i = 1; i < data.rows; ++i)\n+ {\n+ points_data.insert(points_data.end(), {\n+ data.at(i - 1, 0), data.at(i - 1, 1), data.at(i - 1, 2), line_color(0), line_color(1), line_color(2),\n+ data.at(i, 0), data.at(i, 1), data.at(i, 2), line_color(0), line_color(1), line_color(2),\n+ });\n+ }\n+\n+ const Mat points_mat = Mat(Size(6, points_data.size() / 6), CV_32F, points_data.data());\n+ showLines(win_name, obj_name, points_mat);\n+\n+#endif\n+}\n+\n+void cv::viz3d::showMesh(const String& win_name, const String& obj_name, InputArray verts, InputArray indices)\n+{\n+ CV_TRACE_FUNCTION();\n+#ifndef HAVE_OPENGL\n+ CV_UNUSED(win_name);\n+ CV_UNUSED(obj_name);\n+ CV_UNUSED(verts);\n+ CV_UNUSED(indices);\n+ CV_Error(cv::Error::OpenGlNotSupported, \"The library is compiled without OpenGL support\");\n+#else\n+ Window* win = getWindow(win_name);\n+ Object* obj = win->get(obj_name);\n+ if (obj)\n+ delete obj;\n+ setOpenGlContext(win_name);\n+ obj = new Mesh(verts, indices);\n+ win->set(obj_name, obj);\n+ updateWindow(win_name);\n+#endif\n+}\n+\n+void cv::viz3d::showMesh(const String& win_name, const String& obj_name, InputArray verts)\n+{\n+ CV_TRACE_FUNCTION();\n+#ifndef HAVE_OPENGL\n+ CV_UNUSED(win_name);\n+ CV_UNUSED(obj_name);\n+ CV_UNUSED(verts);\n+ CV_Error(cv::Error::OpenGlNotSupported, \"The library is compiled without OpenGL support\");\n+#else\n+ Window* win = getWindow(win_name);\n+ Object* obj = win->get(obj_name);\n+ if (obj)\n+ delete obj;\n+ setOpenGlContext(win_name);\n+ obj = new Mesh(verts);\n+ win->set(obj_name, obj);\n+ updateWindow(win_name);\n+#endif\n+}\n+\n+void cv::viz3d::showPoints(const String& win_name, const String& obj_name, InputArray points)\n+{\n+ CV_TRACE_FUNCTION();\n+#ifndef HAVE_OPENGL\n+ CV_UNUSED(win_name);\n+ CV_UNUSED(obj_name);\n+ CV_UNUSED(points);\n+ CV_Error(cv::Error::OpenGlNotSupported, \"The library is compiled without OpenGL support\");\n+#else\n+ Window* win = getWindow(win_name);\n+ Object* obj = win->get(obj_name);\n+ if (obj)\n+ delete obj;\n+ setOpenGlContext(win_name);\n+ obj = new PointCloud(points);\n+ win->set(obj_name, obj);\n+ updateWindow(win_name);\n+#endif\n+}\n+\n+void cv::viz3d::showRGBD(const String& win_name, const String& obj_name, InputArray img, const Matx33f& intrinsics, float scale)\n+{\n+ CV_TRACE_FUNCTION();\n+#ifndef HAVE_OPENGL\n+ CV_UNUSED(win_name);\n+ CV_UNUSED(obj_name);\n+ CV_UNUSED(img);\n+ CV_UNUSED(intrinsics);\n+ CV_UNUSED(scale);\n+ CV_Error(cv::Error::OpenGlNotSupported, \"The library is compiled without OpenGL support\");\n+#else\n+ CV_Assert(img.dims() == 2 && img.channels() == 4 && img.type() == CV_32FC4);\n+ \n+ Mat mat = img.getMat();\n+ Mat points;\n+\n+ // This section (RGBD to point cloud) should be changed to use the 3d module when\n+ // #20013 is merged.\n+\n+ float fx = intrinsics(0, 0);\n+ float fy = intrinsics(1, 1);\n+ float cx = intrinsics(0, 2);\n+ float cy = intrinsics(1, 2);\n+\n+ for (int u = 0; u < mat.cols; ++u)\n+ for (int v = 0; v < mat.rows; ++v)\n+ {\n+ Vec4f c = mat.at(v, u);\n+ float d = c(3) * 0.001f; // mm to m\n+\n+ float x_over_z = (cx - (float)u) / fx;\n+ float y_over_z = (cy - (float)v) / fy;\n+ float z = d/* / sqrt(1.0f + x_over_z * x_over_z + y_over_z * y_over_z)*/;\n+ float x = x_over_z * z;\n+ float y = y_over_z * z;\n+\n+ float point[] = {\n+ x * scale, y * scale, z * scale,\n+ c(0) / 255.0f, c(1) / 255.0f, c(2) / 255.0f,\n+ };\n+ points.push_back(Mat(1, 6, CV_32F, point));\n+ }\n+\n+ showPoints(win_name, obj_name, points);\n+#endif\n+}\n+\n+void cv::viz3d::showLines(const String& win_name, const String& obj_name, InputArray points)\n+{\n+ CV_TRACE_FUNCTION();\n+#ifndef HAVE_OPENGL\n+ CV_UNUSED(win_name);\n+ CV_UNUSED(obj_name);\n+ CV_UNUSED(points);\n+ CV_Error(cv::Error::OpenGlNotSupported, \"The library is compiled without OpenGL support\");\n+#else\n+ Window* win = getWindow(win_name);\n+ Object* obj = win->get(obj_name);\n+ if (obj)\n+ delete obj;\n+ setOpenGlContext(win_name);\n+ obj = new Lines(points);\n+ win->set(obj_name, obj);\n+ updateWindow(win_name);\n+#endif\n+}\n+\n+void cv::viz3d::setObjectPosition(const String& win_name, const String& obj_name, const Vec3f& position)\n+{\n+ CV_TRACE_FUNCTION();\n+#ifndef HAVE_OPENGL\n+ CV_UNUSED(win_name);\n+ CV_UNUSED(obj_name);\n+ CV_UNUSED(position);\n+ CV_Error(cv::Error::OpenGlNotSupported, \"The library is compiled without OpenGL support\");\n+#else\n+ Window* win = getWindow(win_name);\n+ Object* obj = win->get(obj_name);\n+ if (!obj)\n+ CV_Error(cv::Error::StsObjectNotFound, \"Object not found\");\n+ obj->setPosition(position);\n+ updateWindow(win_name);\n+#endif\n+}\n+\n+void cv::viz3d::setObjectRotation(const String& win_name, const String& obj_name, const Vec3f& rotation)\n+{\n+ CV_TRACE_FUNCTION();\n+#ifndef HAVE_OPENGL\n+ CV_UNUSED(win_name);\n+ CV_UNUSED(obj_name);\n+ CV_UNUSED(rotation);\n+ CV_Error(cv::Error::OpenGlNotSupported, \"The library is compiled without OpenGL support\");\n+#else\n+ Window* win = getWindow(win_name);\n+ Object* obj = win->get(obj_name);\n+ if (!obj)\n+ CV_Error(cv::Error::StsObjectNotFound, \"Object not found\");\n+ obj->setRotation(rotation);\n+ updateWindow(win_name);\n+#endif\n+}\n+\n+void cv::viz3d::destroyObject(const String& win_name, const String& obj_name)\n+{\n+ CV_TRACE_FUNCTION();\n+#ifndef HAVE_OPENGL\n+ CV_UNUSED(win_name);\n+ CV_UNUSED(obj_name);\n+ CV_Error(cv::Error::OpenGlNotSupported, \"The library is compiled without OpenGL support\");\n+#else\n+ Window* win = getWindow(win_name);\n+ win->set(obj_name, nullptr);\n+ updateWindow(win_name);\n+#endif\n+}\n+\n+#ifdef HAVE_OPENGL\n+\n+cv::viz3d::View::View()\n+{\n+ this->origin = { 0.0f, 0.0f, 0.0f };\n+ this->distance = 10.0f;\n+ this->position = { 0.0f, 0.0f, this->distance };\n+ this->up = { 0.0f, 1.0f, 0.0f };\n+\n+ this->aspect = 1.0f;\n+ this->setPerspective(1.3f, 0.1f, 2000.0f);\n+ this->lookAt(this->origin, { 0.0f, 1.0f, 0.0f });\n+}\n+\n+void cv::viz3d::View::setAspect(float aspect)\n+{\n+ if (this->aspect != aspect)\n+ {\n+ this->aspect = aspect;\n+ this->setPerspective(this->fov, this->z_near, this->z_far);\n+ } \n+}\n+\n+void cv::viz3d::View::setPerspective(float fov, float z_near, float z_far)\n+{\n+ this->fov = fov;\n+ this->z_near = z_near;\n+ this->z_far = z_far;\n+\n+ float tan_half_fovy = ::tan(this->fov / 2.0f);\n+ this->proj = Matx44f::zeros();\n+ this->proj(0, 0) = 1.0f / (this->aspect * tan_half_fovy);\n+ this->proj(1, 1) = 1.0f / tan_half_fovy;\n+ this->proj(2, 2) = (this->z_far + this->z_near) / (this->z_far - this->z_near);\n+ this->proj(2, 3) = 1.0f;\n+ this->proj(3, 2) = -(2.0f * this->z_far * this->z_near) / (this->z_far - this->z_near);\n+}\n+\n+void cv::viz3d::View::rotate(float dx, float dy)\n+{\n+ this->position = normalize(this->position - this->origin);\n+ float theta = atan2(this->position(2), this->position(0));\n+ float phi = ::asin(this->position(1));\n+ theta -= dx * 0.05f;\n+ phi += dy * 0.05f;\n+ phi = max(-1.5f, min(1.5f, phi));\n+\n+ this->position(0) = ::cos(theta) * ::cos(phi) * this->distance;\n+ this->position(1) = ::sin(phi) * this->distance;\n+ this->position(2) = ::sin(theta) * ::cos(phi) * this->distance;\n+ this->position += this->origin;\n+\n+ this->lookAt(this->origin, this->up);\n+}\n+\n+void cv::viz3d::View::move(float dx, float dy)\n+{\n+ Vec3f forward = normalize(this->position - this->origin);\n+ Vec3f right = normalize(this->up.cross(forward));\n+ Vec3f up = right.cross(forward);\n+ Vec3f delta = normalize(right * dx - up * dy) * this->distance * 0.01f;\n+\n+ this->origin += delta;\n+ this->position += delta;\n+ this->lookAt(this->origin, this->up);\n+}\n+\n+void cv::viz3d::View::scaleDistance(float amount)\n+{\n+ this->distance *= amount;\n+ this->distance = max(0.1f, this->distance);\n+ this->position = normalize(this->position - this->origin) * this->distance + this->origin;\n+\n+ this->lookAt(this->origin, this->up);\n+}\n+\n+void cv::viz3d::View::lookAt(const Vec3f& point, const Vec3f& up)\n+{\n+ Vec3f f = normalize(point - this->position);\n+ Vec3f s = normalize(up.cross(f));\n+ Vec3f u = f.cross(s);\n+\n+ this->view = Matx44f::zeros();\n+ this->view(0, 0) = s[0];\n+ this->view(1, 0) = s[1];\n+ this->view(2, 0) = s[2];\n+ this->view(0, 1) = u[0];\n+ this->view(1, 1) = u[1];\n+ this->view(2, 1) = u[2];\n+ this->view(0, 2) = f[0];\n+ this->view(1, 2) = f[1];\n+ this->view(2, 2) = f[2];\n+ this->view(3, 0) = -s.dot(this->position);\n+ this->view(3, 1) = -u.dot(this->position);\n+ this->view(3, 2) = -f.dot(this->position);\n+ this->view(3, 3) = 1.0f;\n+}\n+\n+cv::viz3d::Window::Window(const String& name)\n+{\n+ this->name = name;\n+ this->sun.direction = normalize(Vec3f(0.3f, 1.0f, 0.5f));\n+ this->sun.ambient = { 0.1f, 0.1f, 0.1f };\n+ this->sun.diffuse = { 1.0f, 1.0f, 1.0f };\n+ this->sky_color = { 0.0f, 0.0f, 0.0f };\n+\n+\tfloat points[] = {\n+\t\t0.0f, 0.0f, 0.0f, 1.0f, 0.0f, 0.0f,\n+\t\t0.5f, 0.0f, 0.0f, 1.0f, 0.0f, 0.0f,\n+\t\t0.0f, 0.0f, 0.0f, 0.0f, 1.0f, 0.0f,\n+\t\t0.0f, 0.5f, 0.0f, 0.0f, 1.0f, 0.0f,\n+\t\t0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 1.0f,\n+\t\t0.0f, 0.0f, 0.5f, 0.0f, 0.0f, 1.0f,\n+\t};\n+\n+\tMat points_mat = Mat(Size(6, 6), CV_32F, points);\n+\n+\tthis->crosshair = new Lines(points_mat);\n+\tthis->shaders[this->crosshair->getShaderName()] = this->crosshair->buildShader();\n+\tthis->crosshair->setShader(this->shaders[this->crosshair->getShaderName()]);\n+\n+ this->grid = nullptr;\n+}\n+\n+cv::viz3d::Window::~Window()\n+{\n+\tdelete this->crosshair;\n+ if (this->grid != nullptr)\n+ delete this->grid;\n+\n+ for (auto obj : this->objects)\n+\t\tdelete obj.second;\n+}\n+\n+cv::viz3d::Object* cv::viz3d::Window::get(const String& obj_name)\n+{\n+\tauto it = this->objects.find(obj_name);\n+\tif (it == this->objects.end())\n+\t\treturn nullptr;\n+\treturn it->second;\n+}\n+\n+void cv::viz3d::Window::set(const String& obj_name, Object* obj)\n+{\n+\tauto it = this->objects.find(obj_name);\n+\tif (it != this->objects.end() && it->second != obj)\n+\t{\n+\t\tdelete it->second;\n+\n+\t\tif (obj == nullptr)\n+\t\t\tthis->objects.erase(it);\n+\t\telse\n+\t\t\tit->second = obj;\n+\t}\n+\telse if (obj != nullptr)\n+\t\tthis->objects[obj_name] = obj;\n+\n+ if (obj != nullptr)\n+ {\n+ String name = obj->getShaderName();\n+ auto it = this->shaders.find(name);\n+ if (it == this->shaders.end())\n+ this->shaders[name] = obj->buildShader();\n+ obj->setShader(this->shaders[name]);\n+ }\n+}\n+\n+void cv::viz3d::Window::setSun(const Vec3f& direction, const Vec3f& ambient, const Vec3f& diffuse)\n+{\n+ this->sun.direction = normalize(direction);\n+ this->sun.ambient = ambient;\n+ this->sun.diffuse = diffuse;\n+}\n+\n+void cv::viz3d::Window::setSky(const Vec3f& color)\n+{\n+ this->sky_color = color;\n+}\n+\n+static Mat getGridVertices(const View& view)\n+{\n+ const Vec3f grid_color = { 0.5f, 0.5f, 0.5f };\n+ const Vec3f axis_color = { 0.8f, 0.8f, 0.8f };\n+ const Vec3f center = view.getOrigin();\n+ const Vec3f camera_dir = view.getOrigin() - view.getPosition();\n+ const float scale = 0.3f;\n+\n+ float tick_step = 1.0f;\n+ while (view.getDistance() * scale / tick_step > 4.0f)\n+ tick_step *= 2.0f;\n+ while (view.getDistance() * scale / tick_step < 2.0f)\n+ tick_step *= 0.5f;\n+\n+ Mat points;\n+ float face_sign[3];\n+\n+ const Vec3f min_p = center - Vec3f(1.0f, 1.0f, 1.0f) * view.getDistance() * scale;\n+ const Vec3f max_p = center + Vec3f(1.0f, 1.0f, 1.0f) * view.getDistance() * scale;\n+\n+ // For each axis add a grid\n+ for (int ai = 0; ai < 3; ++ai)\n+ {\n+ Vec3f az = { 0.0f, 0.0f, 0.0f };\n+ az((ai + 2) % 3) = 1.0f;\n+\n+ // Check if face is positive or negative along the az axis\n+ face_sign[ai] = camera_dir.dot(az) > 0.0f ? 1.0f : -1.0f;\n+\n+ float x = (floor(min_p(ai) / tick_step) + 1.0f) * tick_step;\n+ for (; x < max_p(ai); x += tick_step)\n+ {\n+ Vec3f a = min_p;\n+ Vec3f b = max_p;\n+ a((ai + 0) % 3) = x;\n+ b((ai + 0) % 3) = x;\n+ if (face_sign[ai] > 0.0f)\n+ a((ai + 2) % 3) = b((ai + 2) % 3);\n+ else\n+ b((ai + 2) % 3) = a((ai + 2) % 3);\n+\n+ float data[] = {\n+ a(0), a(1), a(2), grid_color(0), grid_color(1), grid_color(2),\n+ b(0), b(1), b(2), grid_color(0), grid_color(1), grid_color(2),\n+ };\n+\n+ points.push_back(Mat(2, 6, CV_32F, data));\n+ }\n+\n+ float y = (floor(min_p((ai + 1) % 3) / tick_step) + 1.0f) * tick_step;\n+ for (; y < max_p((ai + 1) % 3); y += tick_step)\n+ {\n+ Vec3f a = min_p;\n+ Vec3f b = max_p;\n+ a((ai + 1) % 3) = y;\n+ b((ai + 1) % 3) = y;\n+ if (face_sign[ai] > 0.0f)\n+ a((ai + 2) % 3) = b((ai + 2) % 3);\n+ else\n+ b((ai + 2) % 3) = a((ai + 2) % 3);\n+\n+ float data[] = {\n+ a(0), a(1), a(2), grid_color(0), grid_color(1), grid_color(2),\n+ b(0), b(1), b(2), grid_color(0), grid_color(1), grid_color(2),\n+ };\n+\n+ points.push_back(Mat(2, 6, CV_32F, data));\n+ }\n+ }\n+\n+ // Draw Ox, Oy and Oz axes and ticks\n+ {\n+ Vec3f a = { face_sign[1] > 0.0f ? min_p(0) : max_p(0), face_sign[2] > 0.0f ? max_p(1) : min_p(1), face_sign[0] > 0.0f ? max_p(2) : min_p(2) };\n+ Vec3f b = { face_sign[1] > 0.0f ? min_p(0) : max_p(0), face_sign[2] > 0.0f ? max_p(1) : min_p(1), face_sign[0] > 0.0f ? min_p(2) : max_p(2) };\n+ Vec3f c = { face_sign[1] > 0.0f ? max_p(0) : min_p(0), face_sign[2] > 0.0f ? max_p(1) : min_p(1), face_sign[0] > 0.0f ? min_p(2) : max_p(2) };\n+ Vec3f d = { face_sign[1] > 0.0f ? max_p(0) : min_p(0), face_sign[2] > 0.0f ? min_p(1) : max_p(1), face_sign[0] > 0.0f ? min_p(2) : max_p(2) };\n+\n+ float data[] = {\n+ a(0), a(1), a(2), 0.0f, 0.0f, 0.8f,\n+ b(0), b(1), b(2), 0.0f, 0.0f, 0.8f,\n+ b(0), b(1), b(2), 0.8f, 0.0f, 0.0f,\n+ c(0), c(1), c(2), 0.8f, 0.0f, 0.0f,\n+ c(0), c(1), c(2), 0.0f, 0.8f, 0.0f,\n+ d(0), d(1), d(2), 0.0f, 0.8f, 0.0f,\n+ };\n+\n+ points.push_back(Mat(6, 6, CV_32F, data));\n+\n+ float x = (floor(min_p(0) / tick_step) + 1.0f) * tick_step;\n+ for (; x < max_p(0); x += tick_step)\n+ {\n+ Vec3f a, b, c;\n+ a(0) = b(0) = x;\n+ a(1) = b(1) = face_sign[2] > 0.0f ? max_p(1) : min_p(1);\n+ a(2) = face_sign[0] > 0.0f ? min_p(2) : max_p(2);\n+ b(2) = a(2) - face_sign[0] * 0.03f * scale * view.getDistance();\n+\n+ float line[] = {\n+ a(0), a(1), a(2), 0.8f, 0.0f, 0.0f,\n+ b(0), b(1), b(2), 0.8f, 0.0f, 0.0f,\n+ };\n+\n+ points.push_back(Mat(2, 6, CV_32F, line));\n+ }\n+\n+ float y = (floor(min_p(1) / tick_step) + 1.0f) * tick_step;\n+ for (; y < max_p(1); y += tick_step)\n+ {\n+ Vec3f a, b;\n+ a(0) = b(0) = face_sign[1] > 0.0f ? max_p(0) : min_p(0);\n+ a(1) = b(1) = y;\n+ a(2) = face_sign[0] > 0.0f ? min_p(2) : max_p(2);\n+ b(2) = a(2) - face_sign[0] * 0.03f * scale * view.getDistance();\n+\n+ float line[] = {\n+ a(0), a(1), a(2), 0.0f, 0.8f, 0.0f,\n+ b(0), b(1), b(2), 0.0f, 0.8f, 0.0f,\n+ };\n+\n+ points.push_back(Mat(2, 6, CV_32F, line));\n+ }\n+\n+ float z = (floor(min_p(2) / tick_step) + 1.0f) * tick_step;\n+ for (; z < max_p(2); z += tick_step)\n+ {\n+ Vec3f a, b;\n+ a(0) = face_sign[1] > 0.0f ? min_p(0) : max_p(0);\n+ b(0) = a(0) - face_sign[1] * 0.03f * scale * view.getDistance();\n+ a(1) = b(1) = face_sign[2] > 0.0f ? max_p(1) : min_p(1);\n+ a(2) = b(2) = z;\n+\n+ float line[] = {\n+ a(0), a(1), a(2), 0.0f, 0.0f, 0.8f,\n+ b(0), b(1), b(2), 0.0f, 0.0f, 0.8f,\n+ };\n+\n+ points.push_back(Mat(2, 6, CV_32F, line));\n+ }\n+ }\n+\n+ return points;\n+}\n+\n+void cv::viz3d::Window::setGridVisible(bool visible)\n+{\n+ if (visible)\n+ {\n+ this->grid = new Lines(Mat(4096, 6, CV_32F), 0);\n+ this->grid->setShader(this->shaders[this->grid->getShaderName()]);\n+ }\n+ else if (this->grid != nullptr)\n+ {\n+ delete this->grid;\n+ this->grid = nullptr;\n+ }\n+}\n+\n+void cv::viz3d::Window::draw()\n+{\n+ Rect rect = getWindowImageRect(this->name);\n+ float aspect = (float)rect.width / (float)rect.height;\n+ this->view.setAspect(aspect);\n+\n+ ogl::enable(ogl::DEPTH_TEST);\n+ ogl::clearColor({ double(this->sky_color(0) * 255.0f), double(this->sky_color(1) * 255.0f), double(this->sky_color(2) * 255.0f), 255.0 });\n+\n+ if (this->grid)\n+ {\n+ Mat labels;\n+ static_cast(this->grid)->update(getGridVertices(this->view));\n+ this->grid->draw(this->view, this->sun);\n+ }\n+ else\n+ {\n+ this->crosshair->setPosition(this->view.getOrigin());\n+ this->crosshair->draw(this->view, this->sun);\n+ }\n+\n+ for (auto& obj : this->objects)\n+ obj.second->draw(this->view, this->sun);\n+}\n+\n+void cv::viz3d::Window::onMouse(int event, int x, int y, int flags)\n+{\n+ if (event == EVENT_LBUTTONDOWN || event == EVENT_RBUTTONDOWN)\n+ {\n+ this->l_mouse_x = x;\n+ this->l_mouse_y = y;\n+ }\n+ else if (event == EVENT_MOUSEMOVE && (flags & EVENT_FLAG_LBUTTON))\n+ {\n+ this->view.rotate(x - this->l_mouse_x, y - this->l_mouse_y);\n+ updateWindow(this->name);\n+ this->l_mouse_x = x;\n+ this->l_mouse_y = y;\n+ }\n+ else if (event == EVENT_MOUSEMOVE && (flags & EVENT_FLAG_RBUTTON))\n+ {\n+ this->view.move(x - this->l_mouse_x, y - this->l_mouse_y);\n+ updateWindow(this->name);\n+ this->l_mouse_x = x;\n+ this->l_mouse_y = y;\n+ }\n+ else if (event == EVENT_MOUSEWHEEL)\n+ {\n+ this->view.scaleDistance(min(1.2f, max(0.8f, 1.0f - getMouseWheelDelta(flags) / 12.0f)));\n+ updateWindow(this->name);\n+ }\n+}\n+\n+cv::viz3d::Object::Object()\n+{\n+\tthis->position = { 0.0f, 0.0f, 0.0f };\n+\tthis->rotation = { 0.0f, 0.0f, 0.0f };\n+\tthis->model = Matx44f::eye();\n+}\n+\n+void cv::viz3d::Object::setPosition(const Vec3f& position)\n+{\n+ this->position = position;\n+ this->updateModel();\n+}\n+\n+void cv::viz3d::Object::setRotation(const Vec3f& rotation)\n+{\n+ this->rotation = rotation;\n+ this->updateModel();\n+}\n+\n+void cv::viz3d::Object::updateModel()\n+{\n+ // Calculate rotation matrices\n+ Matx44f rot_a = Matx44f::eye();\n+ rot_a(0, 0) = ::cos(this->rotation(0));\n+ rot_a(1, 0) = ::sin(this->rotation(0));\n+ rot_a(0, 1) = -::sin(this->rotation(0));\n+ rot_a(1, 1) = ::cos(this->rotation(0));\n+\n+ Matx44f rot_b = Matx44f::eye();\n+ rot_b(1, 1) = ::cos(this->rotation(1));\n+ rot_b(2, 1) = ::sin(this->rotation(1));\n+ rot_b(1, 2) = -::sin(this->rotation(1));\n+ rot_b(2, 2) = ::cos(this->rotation(1));\n+\n+ Matx44f rot_c = Matx44f::eye();\n+ rot_c(0, 0) = ::cos(this->rotation(2));\n+ rot_c(2, 0) = ::sin(this->rotation(2));\n+ rot_c(0, 2) = -::sin(this->rotation(2));\n+ rot_c(2, 2) = ::cos(this->rotation(2));\n+\n+ // Calculate translation matrix\n+ Matx44f trans = Matx44f::eye();\n+ trans(3, 0) = this->position(0);\n+ trans(3, 1) = this->position(1);\n+ trans(3, 2) = this->position(2);\n+\n+ // Multiply matrices\n+ this->model = rot_c * rot_b * rot_a * trans;\n+}\n+\n+cv::viz3d::Mesh::Mesh(InputArray verts, InputArray indices)\n+{\n+ // Check parameter validity\n+ CV_Assert(verts.channels() == 1 && verts.dims() == 2 && (verts.size().width == 3 || verts.size().width == 6 || verts.size().width == 9));\n+ CV_Assert(verts.depth() == CV_32F);\n+ CV_Assert(indices.channels() == 1 && indices.dims() == 2 && indices.size().width == 3);\n+ CV_Assert(indices.depth() == CV_8U || indices.depth() == CV_16U || indices.depth() == CV_32S);\n+\n+ // Prepare buffers\n+ if (verts.kind() == _InputArray::OPENGL_BUFFER)\n+ this->verts = verts.getOGlBuffer();\n+ else\n+ this->verts.copyFrom(verts, ogl::Buffer::ARRAY_BUFFER);\n+\n+ if (indices.kind() == _InputArray::OPENGL_BUFFER)\n+ this->indices = indices.getOGlBuffer();\n+ else\n+ this->indices.copyFrom(indices, ogl::Buffer::ELEMENT_ARRAY_BUFFER);\n+\n+ switch (indices.depth())\n+ {\n+ case CV_8U:\n+ this->index_type = ogl::UNSIGNED_BYTE;\n+ break;\n+ case CV_16U:\n+ this->index_type = ogl::UNSIGNED_SHORT;\n+ break;\n+ case CV_32S:\n+ this->index_type = ogl::UNSIGNED_INT;\n+ break;\n+ }\n+\n+ // Prepare vertex array\n+ if (verts.size().width == 3)\n+ {\n+ this->va.create({\n+ ogl::Attribute(", - "comment_created_at": "2021-10-07T21:05:49+00:00", - "comment_author": "JulieBar", - "comment_body": "decrease duplication\r\n```\r\ngenerate_attr(shader_loc) {\r\nreturn ogl::Attribute(\r\n\t\t\t this->verts,\r\n\t\t\t verts.size().width * sizeof(float), 0,\r\n\t\t\t 3, ogl::Attribute::FLOAT,\r\n\t\t\t false, false,\r\n\t\t\t shader_loc\r\n\t\t );\r\n}\r\n```", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1924918226", - "pr_number": 26772, - "pr_file": "modules/photo/src/denoising.cpp", - "created_at": "2025-01-22T08:41:08+00:00", - "commented_code": "int hn = (int)h.size();\n double granularity = (double)std::max(1., (double)dst.total()/(1 << 16));\n\n switch (srcImgs[0].type())\n int depth = CV_MAT_DEPTH(srcImgs[0].type());", - "repo_full_name": "opencv/opencv", - "discussion_comments": [ - { - "comment_id": "1924918226", - "repo_full_name": "opencv/opencv", - "pr_number": 26772, - "pr_file": "modules/photo/src/denoising.cpp", - "discussion_id": "1924918226", - "commented_code": "@@ -249,60 +249,129 @@ static void fastNlMeansDenoisingMulti_( const std::vector& srcImgs, Mat& ds\n int hn = (int)h.size();\n double granularity = (double)std::max(1., (double)dst.total()/(1 << 16));\n \n- switch (srcImgs[0].type())\n+ int depth = CV_MAT_DEPTH(srcImgs[0].type());", - "comment_created_at": "2025-01-22T08:41:08+00:00", - "comment_author": "sturkmen72", - "comment_body": "what about\r\n```\r\n int depth = srcImgs[0].depth();\r\n int channels = srcImgs[0].channels();\r\nfor better readibility\r\n```", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1800592693", - "pr_number": 26281, - "pr_file": "modules/core/src/opengl.cpp", - "created_at": "2024-10-15T07:19:59+00:00", - "commented_code": "#elif !defined(HAVE_OPENCL_OPENGL_SHARING)\n NO_OPENCL_SHARING_ERROR;\n#else\n cl_uint numPlatforms;\n cl_int status = clGetPlatformIDs(0, NULL, &numPlatforms);\n cl_uint platformsCnt = 0;\n cl_uint devCnt = 0;\n cl_device_id* devices = nullptr;\n cl_uint devUsed = 0;\n cl_context context = nullptr;\n\n cl_int status = clGetPlatformIDs(0, NULL, &platformsCnt);\n if (status != CL_SUCCESS)\n CV_Error_(cv::Error::OpenCLInitError, (\"OpenCL: Can't get number of platforms: %d\", status));\n if (numPlatforms == 0)\n if (platformsCnt == 0)\n CV_Error(cv::Error::OpenCLInitError, \"OpenCL: No available platforms\");\n\n std::vector platforms(numPlatforms);\n status = clGetPlatformIDs(numPlatforms, &platforms[0], NULL);\n std::vector platforms(platformsCnt);\n status = clGetPlatformIDs(platformsCnt, &platforms[0], NULL);\n if (status != CL_SUCCESS)\n CV_Error_(cv::Error::OpenCLInitError, (\"OpenCL: Can't get number of platforms: %d\", status));\n CV_Error_(cv::Error::OpenCLInitError, (\"OpenCL: Can't get platforms: %d\", status));\n\n\n // TODO Filter platforms by name from OPENCV_OPENCL_DEVICE\n bool sharingSupported = false;\n\n int found = -1;\n cl_device_id device = NULL;\n cl_context context = NULL;\n for (unsigned int i = 0; (!sharingSupported && (i < platformsCnt)); ++i) {\n status = clGetDeviceIDs(platforms[i], CL_DEVICE_TYPE_GPU, 0, NULL, &devCnt);\n if (status != CL_SUCCESS)\n CV_Error_(cv::Error::OpenCLInitError, (\"OpenCL: No devices available: %d\", status));\n\n for (int i = 0; i < (int)numPlatforms; i++)\n {\n // query platform extension: presence of \"cl_khr_gl_sharing\" extension is required\n {\n AutoBuffer extensionStr;\n try {\n devices = new cl_device_id[devCnt];\n\n size_t extensionSize;\n status = clGetPlatformInfo(platforms[i], CL_PLATFORM_EXTENSIONS, 0, NULL, &extensionSize);\n if (status == CL_SUCCESS)\n {\n extensionStr.allocate(extensionSize+1);\n status = clGetPlatformInfo(platforms[i], CL_PLATFORM_EXTENSIONS, extensionSize, (char*)extensionStr.data(), NULL);\n }\n status = clGetDeviceIDs(platforms[i], CL_DEVICE_TYPE_GPU, devCnt, devices, NULL);\n if (status != CL_SUCCESS)\n CV_Error_(cv::Error::OpenCLInitError, (\"OpenCL: Can't get platform extension string: %d\", status));\n\n if (!strstr((const char*)extensionStr.data(), \"cl_khr_gl_sharing\"))\n continue;\n }\n\n clGetGLContextInfoKHR_fn clGetGLContextInfoKHR = (clGetGLContextInfoKHR_fn)\n clGetExtensionFunctionAddressForPlatform(platforms[i], \"clGetGLContextInfoKHR\");\n if (!clGetGLContextInfoKHR)\n CV_Error_(cv::Error::OpenCLInitError, (\"OpenCL: Can't get platform devices: %d\", status));\n\n for (unsigned int j = 0; (!sharingSupported && (j < devCnt)); ++j) {\n size_t extensionSize;\n status = clGetDeviceInfo(devices[j], CL_DEVICE_EXTENSIONS, 0, NULL, &extensionSize );\n if (status != CL_SUCCESS)\n CV_Error_(cv::Error::OpenCLInitError, (\"OpenCL: No devices available: %d\", status));\n\n if(extensionSize > 0)\n {\n char* extensions = nullptr;\n\n try {\n extensions = new char[extensionSize];\n\n status = clGetDeviceInfo(devices[j], CL_DEVICE_EXTENSIONS, extensionSize, extensions, &extensionSize);\n if (status != CL_SUCCESS)\n continue;\n } catch(...) {\n CV_Error(cv::Error::OpenCLInitError, \"OpenCL: Exception thrown during device extensions gathering\");\n }\n\n std::string devString;\n\n if(extensions != nullptr) {\n devString = extensions;\n delete[] extensions;\n }\n else {\n CV_Error(cv::Error::OpenCLInitError, \"OpenCL: Unexpected error during device extensions gathering\");\n }\n\n size_t oldPos = 0;\n size_t spacePos = devString.find(' ', oldPos); // extensions string is space delimited\n while (spacePos != devString.npos) {\n if (strcmp(GL_SHARING_EXTENSION,\n devString.substr(oldPos, spacePos - oldPos).c_str())\n == 0) {\n // Device supports context sharing with OpenGL\n devUsed = i;\n sharingSupported = true;\n break;\n }\n do {\n oldPos = spacePos + 1;\n spacePos = devString.find(' ', oldPos);\n } while (spacePos == oldPos);\n }\n }\n }", - "repo_full_name": "opencv/opencv", - "discussion_comments": [ - { - "comment_id": "1800592693", - "repo_full_name": "opencv/opencv", - "pr_number": 26281, - "pr_file": "modules/core/src/opengl.cpp", - "discussion_id": "1800592693", - "commented_code": "@@ -1635,94 +1641,148 @@ Context& initializeContextFromGL()\n #elif !defined(HAVE_OPENCL_OPENGL_SHARING)\n NO_OPENCL_SHARING_ERROR;\n #else\n- cl_uint numPlatforms;\n- cl_int status = clGetPlatformIDs(0, NULL, &numPlatforms);\n+ cl_uint platformsCnt = 0;\n+ cl_uint devCnt = 0;\n+ cl_device_id* devices = nullptr;\n+ cl_uint devUsed = 0;\n+ cl_context context = nullptr;\n+\n+ cl_int status = clGetPlatformIDs(0, NULL, &platformsCnt);\n if (status != CL_SUCCESS)\n CV_Error_(cv::Error::OpenCLInitError, (\"OpenCL: Can't get number of platforms: %d\", status));\n- if (numPlatforms == 0)\n+ if (platformsCnt == 0)\n CV_Error(cv::Error::OpenCLInitError, \"OpenCL: No available platforms\");\n \n- std::vector platforms(numPlatforms);\n- status = clGetPlatformIDs(numPlatforms, &platforms[0], NULL);\n+ std::vector platforms(platformsCnt);\n+ status = clGetPlatformIDs(platformsCnt, &platforms[0], NULL);\n if (status != CL_SUCCESS)\n- CV_Error_(cv::Error::OpenCLInitError, (\"OpenCL: Can't get number of platforms: %d\", status));\n+ CV_Error_(cv::Error::OpenCLInitError, (\"OpenCL: Can't get platforms: %d\", status));\n+\n \n // TODO Filter platforms by name from OPENCV_OPENCL_DEVICE\n+ bool sharingSupported = false;\n \n- int found = -1;\n- cl_device_id device = NULL;\n- cl_context context = NULL;\n+ for (unsigned int i = 0; (!sharingSupported && (i < platformsCnt)); ++i) {\n+ status = clGetDeviceIDs(platforms[i], CL_DEVICE_TYPE_GPU, 0, NULL, &devCnt);\n+ if (status != CL_SUCCESS)\n+ CV_Error_(cv::Error::OpenCLInitError, (\"OpenCL: No devices available: %d\", status));\n \n- for (int i = 0; i < (int)numPlatforms; i++)\n- {\n- // query platform extension: presence of \"cl_khr_gl_sharing\" extension is required\n- {\n- AutoBuffer extensionStr;\n+ try {\n+ devices = new cl_device_id[devCnt];\n \n- size_t extensionSize;\n- status = clGetPlatformInfo(platforms[i], CL_PLATFORM_EXTENSIONS, 0, NULL, &extensionSize);\n- if (status == CL_SUCCESS)\n- {\n- extensionStr.allocate(extensionSize+1);\n- status = clGetPlatformInfo(platforms[i], CL_PLATFORM_EXTENSIONS, extensionSize, (char*)extensionStr.data(), NULL);\n- }\n+ status = clGetDeviceIDs(platforms[i], CL_DEVICE_TYPE_GPU, devCnt, devices, NULL);\n if (status != CL_SUCCESS)\n- CV_Error_(cv::Error::OpenCLInitError, (\"OpenCL: Can't get platform extension string: %d\", status));\n-\n- if (!strstr((const char*)extensionStr.data(), \"cl_khr_gl_sharing\"))\n- continue;\n- }\n-\n- clGetGLContextInfoKHR_fn clGetGLContextInfoKHR = (clGetGLContextInfoKHR_fn)\n- clGetExtensionFunctionAddressForPlatform(platforms[i], \"clGetGLContextInfoKHR\");\n- if (!clGetGLContextInfoKHR)\n+ CV_Error_(cv::Error::OpenCLInitError, (\"OpenCL: Can't get platform devices: %d\", status));\n+\n+ for (unsigned int j = 0; (!sharingSupported && (j < devCnt)); ++j) {\n+ size_t extensionSize;\n+ status = clGetDeviceInfo(devices[j], CL_DEVICE_EXTENSIONS, 0, NULL, &extensionSize );\n+ if (status != CL_SUCCESS)\n+ CV_Error_(cv::Error::OpenCLInitError, (\"OpenCL: No devices available: %d\", status));\n+\n+ if(extensionSize > 0)\n+ {\n+ char* extensions = nullptr;\n+\n+ try {\n+ extensions = new char[extensionSize];\n+\n+ status = clGetDeviceInfo(devices[j], CL_DEVICE_EXTENSIONS, extensionSize, extensions, &extensionSize);\n+ if (status != CL_SUCCESS)\n+ continue;\n+ } catch(...) {\n+ CV_Error(cv::Error::OpenCLInitError, \"OpenCL: Exception thrown during device extensions gathering\");\n+ }\n+\n+ std::string devString;\n+\n+ if(extensions != nullptr) {\n+ devString = extensions;\n+ delete[] extensions;\n+ }\n+ else {\n+ CV_Error(cv::Error::OpenCLInitError, \"OpenCL: Unexpected error during device extensions gathering\");\n+ }\n+\n+ size_t oldPos = 0;\n+ size_t spacePos = devString.find(' ', oldPos); // extensions string is space delimited\n+ while (spacePos != devString.npos) {\n+ if (strcmp(GL_SHARING_EXTENSION,\n+ devString.substr(oldPos, spacePos - oldPos).c_str())\n+ == 0) {\n+ // Device supports context sharing with OpenGL\n+ devUsed = i;\n+ sharingSupported = true;\n+ break;\n+ }\n+ do {\n+ oldPos = spacePos + 1;\n+ spacePos = devString.find(' ', oldPos);\n+ } while (spacePos == oldPos);\n+ }\n+ }\n+ }", - "comment_created_at": "2024-10-15T07:19:59+00:00", - "comment_author": "asmorkalov", - "comment_body": "Let's extract extension search as dedicated static function. It should make the code more readable.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1677867568", - "pr_number": 25608, - "pr_file": "modules/imgcodecs/src/grfmt_webp.cpp", - "created_at": "2024-07-15T13:50:53+00:00", - "commented_code": "WebPBitstreamFeatures features;\n if (VP8_STATUS_OK == WebPGetFeatures(header, sizeof(header), &features))", - "repo_full_name": "opencv/opencv", - "discussion_comments": [ - { - "comment_id": "1677867568", - "repo_full_name": "opencv/opencv", - "pr_number": 25608, - "pr_file": "modules/imgcodecs/src/grfmt_webp.cpp", - "discussion_id": "1677867568", - "commented_code": "@@ -126,8 +84,39 @@ bool WebPDecoder::readHeader()\n WebPBitstreamFeatures features;\n if (VP8_STATUS_OK == WebPGetFeatures(header, sizeof(header), &features))", - "comment_created_at": "2024-07-15T13:50:53+00:00", - "comment_author": "vrabaud", - "comment_body": "Just to reduce the imbricated `if` , write it as:\r\n```cpp\r\nif (VP8_STATUS_OK == WebPGetFeatures(header, sizeof(header), &features)) return false;\r\n```\r\nAnd then you can remove the big `{}` below.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1732463520", - "pr_number": 25608, - "pr_file": "modules/imgcodecs/src/grfmt_webp.cpp", - "created_at": "2024-08-27T09:21:47+00:00", - "commented_code": "}\n\n WebPBitstreamFeatures features;\n if (VP8_STATUS_OK == WebPGetFeatures(header, sizeof(header), &features))\n if (VP8_STATUS_OK < WebPGetFeatures(header, sizeof(header), &features)) return false;\n\n m_has_animation = features.has_animation == 1;\n\n if (m_has_animation)\n {\n CV_CheckEQ(features.has_animation, 0, \"WebP backend does not support animated webp images\");\n fs.seekg(0, std::ios::beg); CV_Assert(fs && \"File stream error\");\n data.create(1, validateToInt(fs_size), CV_8UC1);\n fs.read((char*)data.ptr(), fs_size);\n CV_Assert(fs && \"Can't read file data\");\n fs.close();\n\n m_width = features.width;\n m_height = features.height;\n CV_Assert(data.type() == CV_8UC1); CV_Assert(data.rows == 1);\n\n if (features.has_alpha)\n {\n m_type = CV_8UC4;\n channels = 4;\n }\n WebPData webp_data;\n webp_data.bytes = (const uint8_t*)data.ptr();\n webp_data.size = data.total();\n\n WebPAnimDecoderOptions dec_options;\n WebPAnimDecoderOptionsInit(&dec_options);\n\n if(m_use_rgb)", - "repo_full_name": "opencv/opencv", - "discussion_comments": [ - { - "comment_id": "1732463520", - "repo_full_name": "opencv/opencv", - "pr_number": 25608, - "pr_file": "modules/imgcodecs/src/grfmt_webp.cpp", - "discussion_id": "1732463520", - "commented_code": "@@ -124,28 +81,51 @@ bool WebPDecoder::readHeader()\n }\n \n WebPBitstreamFeatures features;\n- if (VP8_STATUS_OK == WebPGetFeatures(header, sizeof(header), &features))\n+ if (VP8_STATUS_OK < WebPGetFeatures(header, sizeof(header), &features)) return false;\n+\n+ m_has_animation = features.has_animation == 1;\n+\n+ if (m_has_animation)\n {\n- CV_CheckEQ(features.has_animation, 0, \"WebP backend does not support animated webp images\");\n+ fs.seekg(0, std::ios::beg); CV_Assert(fs && \"File stream error\");\n+ data.create(1, validateToInt(fs_size), CV_8UC1);\n+ fs.read((char*)data.ptr(), fs_size);\n+ CV_Assert(fs && \"Can't read file data\");\n+ fs.close();\n \n- m_width = features.width;\n- m_height = features.height;\n+ CV_Assert(data.type() == CV_8UC1); CV_Assert(data.rows == 1);\n \n- if (features.has_alpha)\n- {\n- m_type = CV_8UC4;\n- channels = 4;\n- }\n+ WebPData webp_data;\n+ webp_data.bytes = (const uint8_t*)data.ptr();\n+ webp_data.size = data.total();\n+\n+ WebPAnimDecoderOptions dec_options;\n+ WebPAnimDecoderOptionsInit(&dec_options);\n+\n+ if(m_use_rgb)", - "comment_created_at": "2024-08-27T09:21:47+00:00", - "comment_author": "sturkmen72", - "comment_body": "which line below better for readability?\r\n\r\n```\r\nif(m_use_rgb)\r\n dec_options.color_mode = features.has_alpha ? MODE_RGBA : MODE_RGB;\r\nelse\r\n dec_options.color_mode = features.has_alpha ? MODE_BGRA : MODE_BGR;\r\n```\r\n```\r\ndec_options.color_mode = m_use_rgb\r\n ? (features.has_alpha ? MODE_RGBA : MODE_RGB)\r\n : (features.has_alpha ? MODE_BGRA : MODE_BGR);\r\n```\r\n@vrabaud , @asmorkalov ", - "pr_file_module": null - }, - { - "comment_id": "1732760920", - "repo_full_name": "opencv/opencv", - "pr_number": 25608, - "pr_file": "modules/imgcodecs/src/grfmt_webp.cpp", - "discussion_id": "1732463520", - "commented_code": "@@ -124,28 +81,51 @@ bool WebPDecoder::readHeader()\n }\n \n WebPBitstreamFeatures features;\n- if (VP8_STATUS_OK == WebPGetFeatures(header, sizeof(header), &features))\n+ if (VP8_STATUS_OK < WebPGetFeatures(header, sizeof(header), &features)) return false;\n+\n+ m_has_animation = features.has_animation == 1;\n+\n+ if (m_has_animation)\n {\n- CV_CheckEQ(features.has_animation, 0, \"WebP backend does not support animated webp images\");\n+ fs.seekg(0, std::ios::beg); CV_Assert(fs && \"File stream error\");\n+ data.create(1, validateToInt(fs_size), CV_8UC1);\n+ fs.read((char*)data.ptr(), fs_size);\n+ CV_Assert(fs && \"Can't read file data\");\n+ fs.close();\n \n- m_width = features.width;\n- m_height = features.height;\n+ CV_Assert(data.type() == CV_8UC1); CV_Assert(data.rows == 1);\n \n- if (features.has_alpha)\n- {\n- m_type = CV_8UC4;\n- channels = 4;\n- }\n+ WebPData webp_data;\n+ webp_data.bytes = (const uint8_t*)data.ptr();\n+ webp_data.size = data.total();\n+\n+ WebPAnimDecoderOptions dec_options;\n+ WebPAnimDecoderOptionsInit(&dec_options);\n+\n+ if(m_use_rgb)", - "comment_created_at": "2024-08-27T12:31:30+00:00", - "comment_author": "vrabaud", - "comment_body": "I'd say the second one.", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/opencv-consistent-descriptive-naming.json b/_reviewers/opencv-consistent-descriptive-naming.json new file mode 100644 index 0000000..35375a1 --- /dev/null +++ b/_reviewers/opencv-consistent-descriptive-naming.json @@ -0,0 +1,270 @@ +[ + { + "discussion_id": "2092504225", + "pr_number": 27318, + "pr_file": "modules/core/include/opencv2/core/mat.hpp", + "created_at": "2025-05-16T07:35:23+00:00", + "commented_code": "*/\n void copyTo( OutputArray m, InputArray mask ) const;\n\n /** @brief Overwrites the existing matrix\n\n This method overwrites existing matrix data, just like copyTo().\n But if it does not have a proper size or type before the operation, an exception is thrown.\n This function is helpful to update ROI in an existing matrix.\n\n If (re)allocation of destination memory is necessary, use copyTo() .\n\n @param m Destination matrix.\n If it does not have a proper size or type before the operation, an exception is thrown.\n\n @sa copyTo\n\n */\n void overwriteTo( OutputArray m ) const;", + "repo_full_name": "opencv/opencv", + "discussion_comments": [ + { + "comment_id": "2092504225", + "repo_full_name": "opencv/opencv", + "pr_number": 27318, + "pr_file": "modules/core/include/opencv2/core/mat.hpp", + "discussion_id": "2092504225", + "commented_code": "@@ -1250,6 +1255,30 @@ class CV_EXPORTS Mat\n */\n void copyTo( OutputArray m, InputArray mask ) const;\n \n+ /** @brief Overwrites the existing matrix\n+\n+ This method overwrites existing matrix data, just like copyTo().\n+ But if it does not have a proper size or type before the operation, an exception is thrown.\n+ This function is helpful to update ROI in an existing matrix.\n+\n+ If (re)allocation of destination memory is necessary, use copyTo() .\n+\n+ @param m Destination matrix.\n+ If it does not have a proper size or type before the operation, an exception is thrown.\n+\n+ @sa copyTo\n+\n+ */\n+ void overwriteTo( OutputArray m ) const;", + "comment_created_at": "2025-05-16T07:35:23+00:00", + "comment_author": "opencv-alalek", + "comment_body": "Just `writeTo()` ?", + "pr_file_module": null + }, + { + "comment_id": "2092623855", + "repo_full_name": "opencv/opencv", + "pr_number": 27318, + "pr_file": "modules/core/include/opencv2/core/mat.hpp", + "discussion_id": "2092504225", + "commented_code": "@@ -1250,6 +1255,30 @@ class CV_EXPORTS Mat\n */\n void copyTo( OutputArray m, InputArray mask ) const;\n \n+ /** @brief Overwrites the existing matrix\n+\n+ This method overwrites existing matrix data, just like copyTo().\n+ But if it does not have a proper size or type before the operation, an exception is thrown.\n+ This function is helpful to update ROI in an existing matrix.\n+\n+ If (re)allocation of destination memory is necessary, use copyTo() .\n+\n+ @param m Destination matrix.\n+ If it does not have a proper size or type before the operation, an exception is thrown.\n+\n+ @sa copyTo\n+\n+ */\n+ void overwriteTo( OutputArray m ) const;", + "comment_created_at": "2025-05-16T08:49:47+00:00", + "comment_author": "Kumataro", + "comment_body": "Thank you for your comment ! Naming functions is very difficult problem.\r\n\r\nIt is required to express the condition that src and dst must be of the same type and size.\r\nFor some verbs(`writeTo`, `updateTo`), I feel that the difference with copyTo() is hard to explain.\r\n\r\nBelow are some possible function names that I was considering:\r\n```c++\r\n(0) src.copyTo(roi); // currently\r\n(1) src.updateTo(roi);.\r\n(2) src.writeTo(roi);\r\n(3) src.overwriteTo(roi);\r\n(4) src.rewriteTo(roi);\r\n(5) src.recopyTo(roi);\r\n(6) src.copyToWithoutReallocation(roi); // too long.\r\n```", + "pr_file_module": null + }, + { + "comment_id": "2094111550", + "repo_full_name": "opencv/opencv", + "pr_number": 27318, + "pr_file": "modules/core/include/opencv2/core/mat.hpp", + "discussion_id": "2092504225", + "commented_code": "@@ -1250,6 +1255,30 @@ class CV_EXPORTS Mat\n */\n void copyTo( OutputArray m, InputArray mask ) const;\n \n+ /** @brief Overwrites the existing matrix\n+\n+ This method overwrites existing matrix data, just like copyTo().\n+ But if it does not have a proper size or type before the operation, an exception is thrown.\n+ This function is helpful to update ROI in an existing matrix.\n+\n+ If (re)allocation of destination memory is necessary, use copyTo() .\n+\n+ @param m Destination matrix.\n+ If it does not have a proper size or type before the operation, an exception is thrown.\n+\n+ @sa copyTo\n+\n+ */\n+ void overwriteTo( OutputArray m ) const;", + "comment_created_at": "2025-05-17T12:29:56+00:00", + "comment_author": "Kumataro", + "comment_body": "I reconsidered it again., `writeTo()` naming is suitable for this purpose, thank you very much ! I fixed it.\r\n\r\n- `src.writeTo(emptyMat)` makes an exception -> natural.\r\n- `src.writeTo(differentTypeMat)` makes an exception -> natural\r\n- `src.writeTo(differentSizeMat)` makes an exception -> natural.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2018137510", + "pr_number": 27051, + "pr_file": "modules/photo/include/opencv2/photo/ccm.hpp", + "created_at": "2025-03-28T08:29:09+00:00", + "commented_code": "// This file is part of OpenCV project.\n// It is subject to the license terms in the LICENSE file found in the top-level directory\n// of this distribution and at http://opencv.org/license.html.\n//\n// Author: Longbu Wang \n// Jinheng Zhang \n// Chenqi Shan \n\n#ifndef OPENCV_PHOTO_CCM_HPP\n#define OPENCV_PHOTO_CCM_HPP\n\n#include \n#include \n\nnamespace cv\n{\nnamespace ccm\n{\n\n/** @defgroup ccm Color Correction module\n@{\n*/\n\n/** @brief Enum of the possible types of ccm.\n*/\nenum CCMType\n{\n CCM_LINEAR, ///< Uses a \\f$3\\times3\\f$ matrix to linearly transform RGB values without offsets.\n CCM_AFFINE, ///< Uses a \\f$4\\times3\\f$ matrix to affine transform RGB values with both scaling and offset terms.\n};\n\n/** @brief Enum of the possible types of initial method.\n*/\nenum InitialMethodType\n{\n INITIAL_METHOD_WHITE_BALANCE, ///< The white balance method. The initial value is:\\n\n /// \\f$\n /// M_{CCM}=\n /// \\begin{bmatrix}\n /// k_R & 0 & 0\\\\\n /// 0 & k_G & 0\\\\\n /// 0 & 0 & k_B\\\\\n /// \\end{bmatrix}\n /// \\f$\\n\n /// where\\n\n /// \\f$\n /// k_R=mean(R_{li}')/mean(R_{li})\\\\\n /// k_R=mean(G_{li}')/mean(G_{li})\\\\\n /// k_R=mean(B_{li}')/mean(B_{li})\n /// \\f$\n INITIAL_METHOD_LEAST_SQUARE, ///\n+// Jinheng Zhang \n+// Chenqi Shan \n+\n+#ifndef OPENCV_PHOTO_CCM_HPP\n+#define OPENCV_PHOTO_CCM_HPP\n+\n+#include \n+#include \n+\n+namespace cv\n+{\n+namespace ccm\n+{\n+\n+/** @defgroup ccm Color Correction module\n+@{\n+*/\n+\n+/** @brief Enum of the possible types of ccm.\n+*/\n+enum CCMType\n+{\n+ CCM_LINEAR, ///< Uses a \\f$3\\times3\\f$ matrix to linearly transform RGB values without offsets.\n+ CCM_AFFINE, ///< Uses a \\f$4\\times3\\f$ matrix to affine transform RGB values with both scaling and offset terms.\n+};\n+\n+/** @brief Enum of the possible types of initial method.\n+*/\n+enum InitialMethodType\n+{\n+ INITIAL_METHOD_WHITE_BALANCE, ///< The white balance method. The initial value is:\\n\n+ /// \\f$\n+ /// M_{CCM}=\n+ /// \\begin{bmatrix}\n+ /// k_R & 0 & 0\\\\\n+ /// 0 & k_G & 0\\\\\n+ /// 0 & 0 & k_B\\\\\n+ /// \\end{bmatrix}\n+ /// \\f$\\n\n+ /// where\\n\n+ /// \\f$\n+ /// k_R=mean(R_{li}')/mean(R_{li})\\\\\n+ /// k_R=mean(G_{li}')/mean(G_{li})\\\\\n+ /// k_R=mean(B_{li}')/mean(B_{li})\n+ /// \\f$\n+ INITIAL_METHOD_LEAST_SQUARE, ///\n// Jinheng Zhang \n// Chenqi Shan \n\n#ifndef OPENCV_PHOTO_CCM_HPP\n#define OPENCV_PHOTO_CCM_HPP\n\n#include \n#include \n\nnamespace cv\n{\nnamespace ccm\n{\n\n/** @defgroup ccm Color Correction module\n@{\n*/\n\n/** @brief Enum of the possible types of ccm.\n*/\nenum CCMType\n{\n CCM_LINEAR, ///< Uses a \\f$3\\times3\\f$ matrix to linearly transform RGB values without offsets.\n CCM_AFFINE, ///< Uses a \\f$4\\times3\\f$ matrix to affine transform RGB values with both scaling and offset terms.\n};\n\n/** @brief Enum of the possible types of initial method.\n*/\nenum InitialMethodType\n{\n INITIAL_METHOD_WHITE_BALANCE, ///< The white balance method. The initial value is:\\n\n /// \\f$\n /// M_{CCM}=\n /// \\begin{bmatrix}\n /// k_R & 0 & 0\\\\\n /// 0 & k_G & 0\\\\\n /// 0 & 0 & k_B\\\\\n /// \\end{bmatrix}\n /// \\f$\\n\n /// where\\n\n /// \\f$\n /// k_R=mean(R_{li}')/mean(R_{li})\\\\\n /// k_R=mean(G_{li}')/mean(G_{li})\\\\\n /// k_R=mean(B_{li}')/mean(B_{li})\n /// \\f$\n INITIAL_METHOD_LEAST_SQUARE, ///\n+// Jinheng Zhang \n+// Chenqi Shan \n+\n+#ifndef OPENCV_PHOTO_CCM_HPP\n+#define OPENCV_PHOTO_CCM_HPP\n+\n+#include \n+#include \n+\n+namespace cv\n+{\n+namespace ccm\n+{\n+\n+/** @defgroup ccm Color Correction module\n+@{\n+*/\n+\n+/** @brief Enum of the possible types of ccm.\n+*/\n+enum CCMType\n+{\n+ CCM_LINEAR, ///< Uses a \\f$3\\times3\\f$ matrix to linearly transform RGB values without offsets.\n+ CCM_AFFINE, ///< Uses a \\f$4\\times3\\f$ matrix to affine transform RGB values with both scaling and offset terms.\n+};\n+\n+/** @brief Enum of the possible types of initial method.\n+*/\n+enum InitialMethodType\n+{\n+ INITIAL_METHOD_WHITE_BALANCE, ///< The white balance method. The initial value is:\\n\n+ /// \\f$\n+ /// M_{CCM}=\n+ /// \\begin{bmatrix}\n+ /// k_R & 0 & 0\\\\\n+ /// 0 & k_G & 0\\\\\n+ /// 0 & 0 & k_B\\\\\n+ /// \\end{bmatrix}\n+ /// \\f$\\n\n+ /// where\\n\n+ /// \\f$\n+ /// k_R=mean(R_{li}')/mean(R_{li})\\\\\n+ /// k_R=mean(G_{li}')/mean(G_{li})\\\\\n+ /// k_R=mean(B_{li}')/mean(B_{li})\n+ /// \\f$\n+ INITIAL_METHOD_LEAST_SQUARE, ///\n// Jinheng Zhang \n// Chenqi Shan \n\n#ifndef OPENCV_PHOTO_CCM_HPP\n#define OPENCV_PHOTO_CCM_HPP\n\n#include \n#include \n\nnamespace cv\n{\nnamespace ccm\n{\n\n/** @defgroup ccm Color Correction module\n@{\n*/\n\n/** @brief Enum of the possible types of ccm.\n*/\nenum CCMType\n{\n CCM_LINEAR, ///< Uses a \\f$3\\times3\\f$ matrix to linearly transform RGB values without offsets.\n CCM_AFFINE, ///< Uses a \\f$4\\times3\\f$ matrix to affine transform RGB values with both scaling and offset terms.\n};\n\n/** @brief Enum of the possible types of initial method.\n*/\nenum InitialMethodType\n{\n INITIAL_METHOD_WHITE_BALANCE, ///< The white balance method. The initial value is:\\n\n /// \\f$\n /// M_{CCM}=\n /// \\begin{bmatrix}\n /// k_R & 0 & 0\\\\\n /// 0 & k_G & 0\\\\\n /// 0 & 0 & k_B\\\\\n /// \\end{bmatrix}\n /// \\f$\\n\n /// where\\n\n /// \\f$\n /// k_R=mean(R_{li}')/mean(R_{li})\\\\\n /// k_R=mean(G_{li}')/mean(G_{li})\\\\\n /// k_R=mean(B_{li}')/mean(B_{li})\n /// \\f$\n INITIAL_METHOD_LEAST_SQUARE, ///\n+// Jinheng Zhang \n+// Chenqi Shan \n+\n+#ifndef OPENCV_PHOTO_CCM_HPP\n+#define OPENCV_PHOTO_CCM_HPP\n+\n+#include \n+#include \n+\n+namespace cv\n+{\n+namespace ccm\n+{\n+\n+/** @defgroup ccm Color Correction module\n+@{\n+*/\n+\n+/** @brief Enum of the possible types of ccm.\n+*/\n+enum CCMType\n+{\n+ CCM_LINEAR, ///< Uses a \\f$3\\times3\\f$ matrix to linearly transform RGB values without offsets.\n+ CCM_AFFINE, ///< Uses a \\f$4\\times3\\f$ matrix to affine transform RGB values with both scaling and offset terms.\n+};\n+\n+/** @brief Enum of the possible types of initial method.\n+*/\n+enum InitialMethodType\n+{\n+ INITIAL_METHOD_WHITE_BALANCE, ///< The white balance method. The initial value is:\\n\n+ /// \\f$\n+ /// M_{CCM}=\n+ /// \\begin{bmatrix}\n+ /// k_R & 0 & 0\\\\\n+ /// 0 & k_G & 0\\\\\n+ /// 0 & 0 & k_B\\\\\n+ /// \\end{bmatrix}\n+ /// \\f$\\n\n+ /// where\\n\n+ /// \\f$\n+ /// k_R=mean(R_{li}')/mean(R_{li})\\\\\n+ /// k_R=mean(G_{li}')/mean(G_{li})\\\\\n+ /// k_R=mean(B_{li}')/mean(B_{li})\n+ /// \\f$\n+ INITIAL_METHOD_LEAST_SQUARE, /// `COLOR_SPACE_APPLE_RGB`", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1689877393", + "pr_number": 25394, + "pr_file": "modules/core/include/opencv2/core/mat.hpp", + "created_at": "2024-07-24T14:01:30+00:00", + "commented_code": "void release() const;\n void clear() const;\n void setTo(const _InputArray& value, const _InputArray & mask = _InputArray()) const;\n Mat reinterpretType( int type ) const;", + "repo_full_name": "opencv/opencv", + "discussion_comments": [ + { + "comment_id": "1689877393", + "repo_full_name": "opencv/opencv", + "pr_number": 25394, + "pr_file": "modules/core/include/opencv2/core/mat.hpp", + "discussion_id": "1689877393", + "commented_code": "@@ -371,6 +371,7 @@ class CV_EXPORTS _OutputArray : public _InputArray\n void release() const;\n void clear() const;\n void setTo(const _InputArray& value, const _InputArray & mask = _InputArray()) const;\n+ Mat reinterpretType( int type ) const;", + "comment_created_at": "2024-07-24T14:01:30+00:00", + "comment_author": "vpisarev", + "comment_body": "can we skip `Type` suffix in the name? (`Mat reinterpret(int type) const;`?)", + "pr_file_module": null + }, + { + "comment_id": "1695039127", + "repo_full_name": "opencv/opencv", + "pr_number": 25394, + "pr_file": "modules/core/include/opencv2/core/mat.hpp", + "discussion_id": "1689877393", + "commented_code": "@@ -371,6 +371,7 @@ class CV_EXPORTS _OutputArray : public _InputArray\n void release() const;\n void clear() const;\n void setTo(const _InputArray& value, const _InputArray & mask = _InputArray()) const;\n+ Mat reinterpretType( int type ) const;", + "comment_created_at": "2024-07-29T11:16:36+00:00", + "comment_author": "Gao-HaoYuan", + "comment_body": "@vpisarev I had done", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1958390966", + "pr_number": 26930, + "pr_file": "modules/imgcodecs/src/grfmt_gif.hpp", + "created_at": "2025-02-17T15:07:03+00:00", + "commented_code": "namespace cv\n{\n\nenum GifOpMode\n{\n GRFMT_GIF_Nothing = 0,\n GRFMT_GIF_PreviousImage = 1,\n GRFMT_GIF_Background = 2,\n GRFMT_GIF_Cover = 3\n// See https://www.w3.org/Graphics/GIF/spec-gif89a.txt\nenum DisposalMethod {", + "repo_full_name": "opencv/opencv", + "discussion_comments": [ + { + "comment_id": "1958390966", + "repo_full_name": "opencv/opencv", + "pr_number": 26930, + "pr_file": "modules/imgcodecs/src/grfmt_gif.hpp", + "discussion_id": "1958390966", + "commented_code": "@@ -11,12 +11,12 @@\n namespace cv\n {\n \n-enum GifOpMode\n-{\n- GRFMT_GIF_Nothing = 0,\n- GRFMT_GIF_PreviousImage = 1,\n- GRFMT_GIF_Background = 2,\n- GRFMT_GIF_Cover = 3\n+// See https://www.w3.org/Graphics/GIF/spec-gif89a.txt\n+enum DisposalMethod {", + "comment_created_at": "2025-02-17T15:07:03+00:00", + "comment_author": "sturkmen72", + "comment_body": "consider using \r\nenum GifDisposeMethod {\r\n GIF_DISPOSE_NA = 0,\r\n GIF_DISPOSE_NONE = 1,\r\n GIF_DISPOSE_RESTORE_BACKGROUND = 2,\r\n GIF_DISPOSE_RESTORE_PREVIOUS = 3,\r\n};", + "pr_file_module": null + }, + { + "comment_id": "1961335639", + "repo_full_name": "opencv/opencv", + "pr_number": 26930, + "pr_file": "modules/imgcodecs/src/grfmt_gif.hpp", + "discussion_id": "1958390966", + "commented_code": "@@ -11,12 +11,12 @@\n namespace cv\n {\n \n-enum GifOpMode\n-{\n- GRFMT_GIF_Nothing = 0,\n- GRFMT_GIF_PreviousImage = 1,\n- GRFMT_GIF_Background = 2,\n- GRFMT_GIF_Cover = 3\n+// See https://www.w3.org/Graphics/GIF/spec-gif89a.txt\n+enum DisposalMethod {", + "comment_created_at": "2025-02-19T09:52:48+00:00", + "comment_author": "Kumataro", + "comment_body": "Thank you for your comment, I agree and I replaced this enum with your suggestion !", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1959436677", + "pr_number": 26925, + "pr_file": "modules/core/include/opencv2/core.hpp", + "created_at": "2025-02-18T10:08:24+00:00", + "commented_code": "void writeFormat(FileStorage& fs) const;\n};\n\nclass CV_EXPORTS_W_SIMPLE DeepNeuralNet", + "repo_full_name": "opencv/opencv", + "discussion_comments": [ + { + "comment_id": "1959436677", + "repo_full_name": "opencv/opencv", + "pr_number": 26925, + "pr_file": "modules/core/include/opencv2/core.hpp", + "discussion_id": "1959436677", + "commented_code": "@@ -3294,6 +3294,12 @@ class CV_EXPORTS_W Algorithm\n void writeFormat(FileStorage& fs) const;\n };\n \n+class CV_EXPORTS_W_SIMPLE DeepNeuralNet", + "comment_created_at": "2025-02-18T10:08:24+00:00", + "comment_author": "vpisarev", + "comment_body": "the name is quite confusing. `cv::dnn::Net` vs `cv::DeepNeuralNet`. We can still have it in the core, but use more consistent name, e.g. `cv::dnn::BaseNet`:\r\n\r\n```\r\nnamespace dnn {\r\nclass CV_EXPORTS_W_SIMPLE NetBase\r\n{\r\npublic:\r\n virtual ~NetBase();\r\n};\r\n}\r\n```\r\n", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1928336874", + "pr_number": 26836, + "pr_file": "modules/imgproc/include/opencv2/imgproc.hpp", + "created_at": "2025-01-24T08:47:19+00:00", + "commented_code": "THRESH_TOZERO_INV = 4, //!< \\f[\\texttt{dst} (x,y) = \\fork{0}{if \\(\\texttt{src}(x,y) > \\texttt{thresh}\\)}{\\texttt{src}(x,y)}{otherwise}\\f]\n THRESH_MASK = 7,\n THRESH_OTSU = 8, //!< flag, use Otsu algorithm to choose the optimal threshold value\n THRESH_TRIANGLE = 16 //!< flag, use Triangle algorithm to choose the optimal threshold value\n THRESH_TRIANGLE = 16, //!< flag, use Triangle algorithm to choose the optimal threshold value\n THRESH_DISABLE = 128 //!< flag, compute threshold only (useful for OTSU/TRIANGLE) but does not actually run thresholding", + "repo_full_name": "opencv/opencv", + "discussion_comments": [ + { + "comment_id": "1928336874", + "repo_full_name": "opencv/opencv", + "pr_number": 26836, + "pr_file": "modules/imgproc/include/opencv2/imgproc.hpp", + "discussion_id": "1928336874", + "commented_code": "@@ -329,7 +329,8 @@ enum ThresholdTypes {\n THRESH_TOZERO_INV = 4, //!< \\f[\\texttt{dst} (x,y) = \\fork{0}{if \\(\\texttt{src}(x,y) > \\texttt{thresh}\\)}{\\texttt{src}(x,y)}{otherwise}\\f]\n THRESH_MASK = 7,\n THRESH_OTSU = 8, //!< flag, use Otsu algorithm to choose the optimal threshold value\n- THRESH_TRIANGLE = 16 //!< flag, use Triangle algorithm to choose the optimal threshold value\n+ THRESH_TRIANGLE = 16, //!< flag, use Triangle algorithm to choose the optimal threshold value\n+ THRESH_DISABLE = 128 //!< flag, compute threshold only (useful for OTSU/TRIANGLE) but does not actually run thresholding", + "comment_created_at": "2025-01-24T08:47:19+00:00", + "comment_author": "asmorkalov", + "comment_body": "The name is very controversial. I propose `THRESH_DRYRUN` or something similar.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1653009475", + "pr_number": 25715, + "pr_file": "modules/imgcodecs/src/grfmt_apng.hpp", + "created_at": "2024-06-25T15:06:40+00:00", + "commented_code": "// This file is part of OpenCV project.\n// It is subject to the license terms in the LICENSE file found in the top-level\n// directory of this distribution and at http://opencv.org/license.html\n\n/****************************************************************************\\\n *\n * this file includes some modified part of apngasm and APNG Optimizer 1.4\n * both have zlib license.\n *\n ****************************************************************************/\n\n\n /* apngasm\n *\n * The next generation of apngasm, the APNG Assembler.\n * The apngasm CLI tool and library can assemble and disassemble APNG image files.\n *\n * https://github.com/apngasm/apngasm\n\n\n /* APNG Optimizer 1.4\n *\n * Makes APNG files smaller.\n *\n * http://sourceforge.net/projects/apng/files\n *\n * Copyright (c) 2011-2015 Max Stepin\n * maxst at users.sourceforge.net\n *\n * zlib license\n * ------------\n *\n * This software is provided 'as-is', without any express or implied\n * warranty. In no event will the authors be held liable for any damages\n * arising from the use of this software.\n *\n * Permission is granted to anyone to use this software for any purpose,\n * including commercial applications, and to alter it and redistribute it\n * freely, subject to the following restrictions:\n *\n * 1. The origin of this software must not be misrepresented; you must not\n * claim that you wrote the original software. If you use this software\n * in a product, an acknowledgment in the product documentation would be\n * appreciated but is not required.\n * 2. Altered source versions must be plainly marked as such, and must not be\n * misrepresented as being the original software.\n * 3. This notice may not be removed or altered from any source distribution.\n *\n */\n\n#ifndef _GRFMT_APNG_H_\n#define _GRFMT_APNG_H_\n\n#ifdef HAVE_PNG\n\n#include \"grfmt_base.hpp\"\n#include \"bitstrm.hpp\"\n#include \n#include \n\nnamespace cv\n{\n\nconst uint DEFAULT_FRAME_NUMERATOR = 100; //!< @brief The default numerator for the frame delay fraction.\nconst uint DEFAULT_FRAME_DENOMINATOR = 1000; //!< @brief The default denominator for the frame delay fraction.\n\ntypedef struct {\n uchar r, g, b;\n} rgb;\n\ntypedef struct {\n uchar r, g, b, a;\n} rgba;\n\n\nclass APNGFrame\n{\npublic:\n APNGFrame();\n virtual ~APNGFrame() {}\n\n /**\n * @brief Creates an APNGFrame from a bitmapped array of RBGA pixel data.\n * @param pixels The RGBA pixel data.\n * @param width The width of the pixel data.\n * @param height The height of the pixel data.\n * @param delayNum The delay numerator for this frame (defaults to DEFAULT_FRAME_NUMERATOR).\n * @param delayDen The delay denominator for this frame (defaults to DEFAULT_FRAME_DENMINATOR).\n */\n APNGFrame(rgba* pixels, int width, int height,\n int delayNum = DEFAULT_FRAME_NUMERATOR,\n int delayDen = DEFAULT_FRAME_DENOMINATOR);\n\n /**\n * @brief Creates an APNGFrame from a PNG file.\n * @param filePath The relative or absolute path to an image file.\n * @param delayNum The delay numerator for this frame (defaults to DEFAULT_FRAME_NUMERATOR).\n * @param delayDen The delay denominator for this frame (defaults to DEFAULT_FRAME_DENMINATOR).\n */\n APNGFrame(const std::string& filePath,\n int delayNum = DEFAULT_FRAME_NUMERATOR,\n int delayDen = DEFAULT_FRAME_DENOMINATOR);\n\n /**\n * @brief Creates an APNGFrame from a bitmapped array of RBG pixel data.\n * @param pixels The RGB pixel data.\n * @param width The width of the pixel data.\n * @param height The height of the pixel data.\n * @param delayNum The delay numerator for this frame (defaults to DEFAULT_FRAME_NUMERATOR).\n * @param delayDen The delay denominator for this frame (defaults to DEFAULT_FRAME_DENMINATOR).\n */\n APNGFrame(rgb* pixels, int width, int height,\n int delayNum = DEFAULT_FRAME_NUMERATOR,\n int delayDen = DEFAULT_FRAME_DENOMINATOR);\n\n /**\n * @brief Creates an APNGFrame from a bitmapped array of RBG pixel data.\n * @param pixels The RGB pixel data.\n * @param width The width of the pixel data.\n * @param height The height of the pixel data.\n * @param trns_color An array of transparency data.\n * @param delayNum The delay numerator for this frame (defaults to DEFAULT_FRAME_NUMERATOR).\n * @param delayDen The delay denominator for this frame (defaults to DEFAULT_FRAME_DENMINATOR).\n */\n APNGFrame(rgb* pixels, int width, int height,\n rgb* trns_color = NULL, int delayNum = DEFAULT_FRAME_NUMERATOR,\n int delayDen = DEFAULT_FRAME_DENOMINATOR);\n\n /**\n * @brief Saves this frame as a single PNG file.\n * @param outPath The relative or absolute path to save the image file to.\n * @return Returns true if save was successful.\n */\n bool save(const std::string& outPath) const;\n\n int width() const { return m_width; }\n int height() const { return m_height; }\n virtual int type() const { return m_type; }\n\n // Raw pixel data\n uchar* pixels(uchar* setPixels = NULL);\n\n // Palette into\n rgb* palette(rgb* setPalette = NULL);\n rgb _palette[256];\n\n // Transparency info\n uchar* transparency(uchar* setTransparency = NULL);\n\n // Sizes for palette and transparency records\n int paletteSize(int setPaletteSize = 0);", + "repo_full_name": "opencv/opencv", + "discussion_comments": [ + { + "comment_id": "1653009475", + "repo_full_name": "opencv/opencv", + "pr_number": 25715, + "pr_file": "modules/imgcodecs/src/grfmt_apng.hpp", + "discussion_id": "1653009475", + "commented_code": "@@ -0,0 +1,178 @@\n+// This file is part of OpenCV project.\n+// It is subject to the license terms in the LICENSE file found in the top-level\n+// directory of this distribution and at http://opencv.org/license.html\n+\n+/****************************************************************************\\\n+ *\n+ * this file includes some modified part of apngasm and APNG Optimizer 1.4\n+ * both have zlib license.\n+ *\n+ ****************************************************************************/\n+\n+\n+ /* apngasm\n+ *\n+ * The next generation of apngasm, the APNG Assembler.\n+ * The apngasm CLI tool and library can assemble and disassemble APNG image files.\n+ *\n+ * https://github.com/apngasm/apngasm\n+\n+\n+ /* APNG Optimizer 1.4\n+ *\n+ * Makes APNG files smaller.\n+ *\n+ * http://sourceforge.net/projects/apng/files\n+ *\n+ * Copyright (c) 2011-2015 Max Stepin\n+ * maxst at users.sourceforge.net\n+ *\n+ * zlib license\n+ * ------------\n+ *\n+ * This software is provided 'as-is', without any express or implied\n+ * warranty. In no event will the authors be held liable for any damages\n+ * arising from the use of this software.\n+ *\n+ * Permission is granted to anyone to use this software for any purpose,\n+ * including commercial applications, and to alter it and redistribute it\n+ * freely, subject to the following restrictions:\n+ *\n+ * 1. The origin of this software must not be misrepresented; you must not\n+ * claim that you wrote the original software. If you use this software\n+ * in a product, an acknowledgment in the product documentation would be\n+ * appreciated but is not required.\n+ * 2. Altered source versions must be plainly marked as such, and must not be\n+ * misrepresented as being the original software.\n+ * 3. This notice may not be removed or altered from any source distribution.\n+ *\n+ */\n+\n+#ifndef _GRFMT_APNG_H_\n+#define _GRFMT_APNG_H_\n+\n+#ifdef HAVE_PNG\n+\n+#include \"grfmt_base.hpp\"\n+#include \"bitstrm.hpp\"\n+#include \n+#include \n+\n+namespace cv\n+{\n+\n+const uint DEFAULT_FRAME_NUMERATOR = 100; //!< @brief The default numerator for the frame delay fraction.\n+const uint DEFAULT_FRAME_DENOMINATOR = 1000; //!< @brief The default denominator for the frame delay fraction.\n+\n+typedef struct {\n+ uchar r, g, b;\n+} rgb;\n+\n+typedef struct {\n+ uchar r, g, b, a;\n+} rgba;\n+\n+\n+class APNGFrame\n+{\n+public:\n+ APNGFrame();\n+ virtual ~APNGFrame() {}\n+\n+ /**\n+ * @brief Creates an APNGFrame from a bitmapped array of RBGA pixel data.\n+ * @param pixels The RGBA pixel data.\n+ * @param width The width of the pixel data.\n+ * @param height The height of the pixel data.\n+ * @param delayNum The delay numerator for this frame (defaults to DEFAULT_FRAME_NUMERATOR).\n+ * @param delayDen The delay denominator for this frame (defaults to DEFAULT_FRAME_DENMINATOR).\n+ */\n+ APNGFrame(rgba* pixels, int width, int height,\n+ int delayNum = DEFAULT_FRAME_NUMERATOR,\n+ int delayDen = DEFAULT_FRAME_DENOMINATOR);\n+\n+ /**\n+ * @brief Creates an APNGFrame from a PNG file.\n+ * @param filePath The relative or absolute path to an image file.\n+ * @param delayNum The delay numerator for this frame (defaults to DEFAULT_FRAME_NUMERATOR).\n+ * @param delayDen The delay denominator for this frame (defaults to DEFAULT_FRAME_DENMINATOR).\n+ */\n+ APNGFrame(const std::string& filePath,\n+ int delayNum = DEFAULT_FRAME_NUMERATOR,\n+ int delayDen = DEFAULT_FRAME_DENOMINATOR);\n+\n+ /**\n+ * @brief Creates an APNGFrame from a bitmapped array of RBG pixel data.\n+ * @param pixels The RGB pixel data.\n+ * @param width The width of the pixel data.\n+ * @param height The height of the pixel data.\n+ * @param delayNum The delay numerator for this frame (defaults to DEFAULT_FRAME_NUMERATOR).\n+ * @param delayDen The delay denominator for this frame (defaults to DEFAULT_FRAME_DENMINATOR).\n+ */\n+ APNGFrame(rgb* pixels, int width, int height,\n+ int delayNum = DEFAULT_FRAME_NUMERATOR,\n+ int delayDen = DEFAULT_FRAME_DENOMINATOR);\n+\n+ /**\n+ * @brief Creates an APNGFrame from a bitmapped array of RBG pixel data.\n+ * @param pixels The RGB pixel data.\n+ * @param width The width of the pixel data.\n+ * @param height The height of the pixel data.\n+ * @param trns_color An array of transparency data.\n+ * @param delayNum The delay numerator for this frame (defaults to DEFAULT_FRAME_NUMERATOR).\n+ * @param delayDen The delay denominator for this frame (defaults to DEFAULT_FRAME_DENMINATOR).\n+ */\n+ APNGFrame(rgb* pixels, int width, int height,\n+ rgb* trns_color = NULL, int delayNum = DEFAULT_FRAME_NUMERATOR,\n+ int delayDen = DEFAULT_FRAME_DENOMINATOR);\n+\n+ /**\n+ * @brief Saves this frame as a single PNG file.\n+ * @param outPath The relative or absolute path to save the image file to.\n+ * @return Returns true if save was successful.\n+ */\n+ bool save(const std::string& outPath) const;\n+\n+ int width() const { return m_width; }\n+ int height() const { return m_height; }\n+ virtual int type() const { return m_type; }\n+\n+ // Raw pixel data\n+ uchar* pixels(uchar* setPixels = NULL);\n+\n+ // Palette into\n+ rgb* palette(rgb* setPalette = NULL);\n+ rgb _palette[256];\n+\n+ // Transparency info\n+ uchar* transparency(uchar* setTransparency = NULL);\n+\n+ // Sizes for palette and transparency records\n+ int paletteSize(int setPaletteSize = 0);", + "comment_created_at": "2024-06-25T15:06:40+00:00", + "comment_author": "vrabaud", + "comment_body": "maybe call the setters: `setPaletteSize`. I like the Goole convention for naming variables: https://google.github.io/styleguide/cppguide.html#Function_Names\r\n", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1658707587", + "pr_number": 25715, + "pr_file": "modules/imgcodecs/src/apngframe.hpp", + "created_at": "2024-06-28T13:00:34+00:00", + "commented_code": "// This file is part of OpenCV project.\n// It is subject to the license terms in the LICENSE file found in the top-level\n// directory of this distribution and at http://opencv.org/license.html\n\n/****************************************************************************\\\n *\n * this file includes some modified part of apngasm\n *\n ****************************************************************************/\n\n\n /* apngasm\n *\n * The next generation of apngasm, the APNG Assembler.\n * The apngasm CLI tool and library can assemble and disassemble APNG image files.\n *\n * https://github.com/apngasm/apngasm\n *\n * zlib license\n * ------------\n *\n * This software is provided 'as-is', without any express or implied\n * warranty. In no event will the authors be held liable for any damages\n * arising from the use of this software.\n *\n * Permission is granted to anyone to use this software for any purpose,\n * including commercial applications, and to alter it and redistribute it\n * freely, subject to the following restrictions:\n *\n * 1. The origin of this software must not be misrepresented; you must not\n * claim that you wrote the original software. If you use this software\n * in a product, an acknowledgment in the product documentation would be\n * appreciated but is not required.\n * 2. Altered source versions must be plainly marked as such, and must not be\n * misrepresented as being the original software.\n * 3. This notice may not be removed or altered from any source distribution.\n *\n */\n\n#ifndef _APNGFRAME_H_\n#define _APNGFRAME_H_\n\n#include \n\nnamespace cv {\n\nconst unsigned DEFAULT_FRAME_NUMERATOR =\n 100; //!< @brief The default numerator for the frame delay fraction.\nconst unsigned DEFAULT_FRAME_DENOMINATOR =\n 1000; //!< @brief The default denominator for the frame delay fraction.\n\ntypedef struct {\n unsigned char r, g, b;\n} rgb;\ntypedef struct {\n unsigned char r, g, b, a;\n} rgba;\n\n// Individual APNG frame\nclass APNGFrame {\npublic:\n // Raw pixel data\n unsigned char *pixels(unsigned char *setPixels = NULL);\n unsigned char *_pixels;\n\n // Width and Height\n unsigned int width(unsigned int setWidth = 0);\n unsigned int height(unsigned int setHeight = 0);\n unsigned int _width;\n unsigned int _height;\n\n // PNG color type\n unsigned char colorType(unsigned char setColorType = 255);\n unsigned char _colorType;\n\n // Palette into\n rgb *palette(rgb *setPalette = NULL);\n rgb _palette[256];\n\n // Transparency info\n unsigned char *transparency(unsigned char *setTransparency = NULL);\n unsigned char _transparency[256];\n\n // Sizes for palette and transparency records\n int paletteSize(int setPaletteSize = 0);", + "repo_full_name": "opencv/opencv", + "discussion_comments": [ + { + "comment_id": "1658707587", + "repo_full_name": "opencv/opencv", + "pr_number": 25715, + "pr_file": "modules/imgcodecs/src/apngframe.hpp", + "discussion_id": "1658707587", + "commented_code": "@@ -0,0 +1,173 @@\n+// This file is part of OpenCV project.\n+// It is subject to the license terms in the LICENSE file found in the top-level\n+// directory of this distribution and at http://opencv.org/license.html\n+\n+/****************************************************************************\\\n+ *\n+ * this file includes some modified part of apngasm\n+ *\n+ ****************************************************************************/\n+\n+\n+ /* apngasm\n+ *\n+ * The next generation of apngasm, the APNG Assembler.\n+ * The apngasm CLI tool and library can assemble and disassemble APNG image files.\n+ *\n+ * https://github.com/apngasm/apngasm\n+ *\n+ * zlib license\n+ * ------------\n+ *\n+ * This software is provided 'as-is', without any express or implied\n+ * warranty. In no event will the authors be held liable for any damages\n+ * arising from the use of this software.\n+ *\n+ * Permission is granted to anyone to use this software for any purpose,\n+ * including commercial applications, and to alter it and redistribute it\n+ * freely, subject to the following restrictions:\n+ *\n+ * 1. The origin of this software must not be misrepresented; you must not\n+ * claim that you wrote the original software. If you use this software\n+ * in a product, an acknowledgment in the product documentation would be\n+ * appreciated but is not required.\n+ * 2. Altered source versions must be plainly marked as such, and must not be\n+ * misrepresented as being the original software.\n+ * 3. This notice may not be removed or altered from any source distribution.\n+ *\n+ */\n+\n+#ifndef _APNGFRAME_H_\n+#define _APNGFRAME_H_\n+\n+#include \n+\n+namespace cv {\n+\n+const unsigned DEFAULT_FRAME_NUMERATOR =\n+ 100; //!< @brief The default numerator for the frame delay fraction.\n+const unsigned DEFAULT_FRAME_DENOMINATOR =\n+ 1000; //!< @brief The default denominator for the frame delay fraction.\n+\n+typedef struct {\n+ unsigned char r, g, b;\n+} rgb;\n+typedef struct {\n+ unsigned char r, g, b, a;\n+} rgba;\n+\n+// Individual APNG frame\n+class APNGFrame {\n+public:\n+ // Raw pixel data\n+ unsigned char *pixels(unsigned char *setPixels = NULL);\n+ unsigned char *_pixels;\n+\n+ // Width and Height\n+ unsigned int width(unsigned int setWidth = 0);\n+ unsigned int height(unsigned int setHeight = 0);\n+ unsigned int _width;\n+ unsigned int _height;\n+\n+ // PNG color type\n+ unsigned char colorType(unsigned char setColorType = 255);\n+ unsigned char _colorType;\n+\n+ // Palette into\n+ rgb *palette(rgb *setPalette = NULL);\n+ rgb _palette[256];\n+\n+ // Transparency info\n+ unsigned char *transparency(unsigned char *setTransparency = NULL);\n+ unsigned char _transparency[256];\n+\n+ // Sizes for palette and transparency records\n+ int paletteSize(int setPaletteSize = 0);", + "comment_created_at": "2024-06-28T13:00:34+00:00", + "comment_author": "vrabaud", + "comment_body": "it's hard to get this is a setter. Call it `setPaletteSize`.\r\nBut if _palette_size is public, what's the point of having a setter? Just make the member public no?", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/opencv-consistent-descriptive-naming.md b/_reviewers/opencv-consistent-descriptive-naming.md index e80a0a0..7cdbbe1 100644 --- a/_reviewers/opencv-consistent-descriptive-naming.md +++ b/_reviewers/opencv-consistent-descriptive-naming.md @@ -72,275 +72,3 @@ int setPaletteSize(int size); ``` This approach leads to more maintainable, readable, and self-documenting code while preserving consistency across the codebase. - - -[ - { - "discussion_id": "2092504225", - "pr_number": 27318, - "pr_file": "modules/core/include/opencv2/core/mat.hpp", - "created_at": "2025-05-16T07:35:23+00:00", - "commented_code": "*/\n void copyTo( OutputArray m, InputArray mask ) const;\n\n /** @brief Overwrites the existing matrix\n\n This method overwrites existing matrix data, just like copyTo().\n But if it does not have a proper size or type before the operation, an exception is thrown.\n This function is helpful to update ROI in an existing matrix.\n\n If (re)allocation of destination memory is necessary, use copyTo() .\n\n @param m Destination matrix.\n If it does not have a proper size or type before the operation, an exception is thrown.\n\n @sa copyTo\n\n */\n void overwriteTo( OutputArray m ) const;", - "repo_full_name": "opencv/opencv", - "discussion_comments": [ - { - "comment_id": "2092504225", - "repo_full_name": "opencv/opencv", - "pr_number": 27318, - "pr_file": "modules/core/include/opencv2/core/mat.hpp", - "discussion_id": "2092504225", - "commented_code": "@@ -1250,6 +1255,30 @@ class CV_EXPORTS Mat\n */\n void copyTo( OutputArray m, InputArray mask ) const;\n \n+ /** @brief Overwrites the existing matrix\n+\n+ This method overwrites existing matrix data, just like copyTo().\n+ But if it does not have a proper size or type before the operation, an exception is thrown.\n+ This function is helpful to update ROI in an existing matrix.\n+\n+ If (re)allocation of destination memory is necessary, use copyTo() .\n+\n+ @param m Destination matrix.\n+ If it does not have a proper size or type before the operation, an exception is thrown.\n+\n+ @sa copyTo\n+\n+ */\n+ void overwriteTo( OutputArray m ) const;", - "comment_created_at": "2025-05-16T07:35:23+00:00", - "comment_author": "opencv-alalek", - "comment_body": "Just `writeTo()` ?", - "pr_file_module": null - }, - { - "comment_id": "2092623855", - "repo_full_name": "opencv/opencv", - "pr_number": 27318, - "pr_file": "modules/core/include/opencv2/core/mat.hpp", - "discussion_id": "2092504225", - "commented_code": "@@ -1250,6 +1255,30 @@ class CV_EXPORTS Mat\n */\n void copyTo( OutputArray m, InputArray mask ) const;\n \n+ /** @brief Overwrites the existing matrix\n+\n+ This method overwrites existing matrix data, just like copyTo().\n+ But if it does not have a proper size or type before the operation, an exception is thrown.\n+ This function is helpful to update ROI in an existing matrix.\n+\n+ If (re)allocation of destination memory is necessary, use copyTo() .\n+\n+ @param m Destination matrix.\n+ If it does not have a proper size or type before the operation, an exception is thrown.\n+\n+ @sa copyTo\n+\n+ */\n+ void overwriteTo( OutputArray m ) const;", - "comment_created_at": "2025-05-16T08:49:47+00:00", - "comment_author": "Kumataro", - "comment_body": "Thank you for your comment ! Naming functions is very difficult problem.\r\n\r\nIt is required to express the condition that src and dst must be of the same type and size.\r\nFor some verbs(`writeTo`, `updateTo`), I feel that the difference with copyTo() is hard to explain.\r\n\r\nBelow are some possible function names that I was considering:\r\n```c++\r\n(0) src.copyTo(roi); // currently\r\n(1) src.updateTo(roi);.\r\n(2) src.writeTo(roi);\r\n(3) src.overwriteTo(roi);\r\n(4) src.rewriteTo(roi);\r\n(5) src.recopyTo(roi);\r\n(6) src.copyToWithoutReallocation(roi); // too long.\r\n```", - "pr_file_module": null - }, - { - "comment_id": "2094111550", - "repo_full_name": "opencv/opencv", - "pr_number": 27318, - "pr_file": "modules/core/include/opencv2/core/mat.hpp", - "discussion_id": "2092504225", - "commented_code": "@@ -1250,6 +1255,30 @@ class CV_EXPORTS Mat\n */\n void copyTo( OutputArray m, InputArray mask ) const;\n \n+ /** @brief Overwrites the existing matrix\n+\n+ This method overwrites existing matrix data, just like copyTo().\n+ But if it does not have a proper size or type before the operation, an exception is thrown.\n+ This function is helpful to update ROI in an existing matrix.\n+\n+ If (re)allocation of destination memory is necessary, use copyTo() .\n+\n+ @param m Destination matrix.\n+ If it does not have a proper size or type before the operation, an exception is thrown.\n+\n+ @sa copyTo\n+\n+ */\n+ void overwriteTo( OutputArray m ) const;", - "comment_created_at": "2025-05-17T12:29:56+00:00", - "comment_author": "Kumataro", - "comment_body": "I reconsidered it again., `writeTo()` naming is suitable for this purpose, thank you very much ! I fixed it.\r\n\r\n- `src.writeTo(emptyMat)` makes an exception -> natural.\r\n- `src.writeTo(differentTypeMat)` makes an exception -> natural\r\n- `src.writeTo(differentSizeMat)` makes an exception -> natural.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2018137510", - "pr_number": 27051, - "pr_file": "modules/photo/include/opencv2/photo/ccm.hpp", - "created_at": "2025-03-28T08:29:09+00:00", - "commented_code": "// This file is part of OpenCV project.\n// It is subject to the license terms in the LICENSE file found in the top-level directory\n// of this distribution and at http://opencv.org/license.html.\n//\n// Author: Longbu Wang \n// Jinheng Zhang \n// Chenqi Shan \n\n#ifndef OPENCV_PHOTO_CCM_HPP\n#define OPENCV_PHOTO_CCM_HPP\n\n#include \n#include \n\nnamespace cv\n{\nnamespace ccm\n{\n\n/** @defgroup ccm Color Correction module\n@{\n*/\n\n/** @brief Enum of the possible types of ccm.\n*/\nenum CCMType\n{\n CCM_LINEAR, ///< Uses a \\f$3\\times3\\f$ matrix to linearly transform RGB values without offsets.\n CCM_AFFINE, ///< Uses a \\f$4\\times3\\f$ matrix to affine transform RGB values with both scaling and offset terms.\n};\n\n/** @brief Enum of the possible types of initial method.\n*/\nenum InitialMethodType\n{\n INITIAL_METHOD_WHITE_BALANCE, ///< The white balance method. The initial value is:\\n\n /// \\f$\n /// M_{CCM}=\n /// \\begin{bmatrix}\n /// k_R & 0 & 0\\\\\n /// 0 & k_G & 0\\\\\n /// 0 & 0 & k_B\\\\\n /// \\end{bmatrix}\n /// \\f$\\n\n /// where\\n\n /// \\f$\n /// k_R=mean(R_{li}')/mean(R_{li})\\\\\n /// k_R=mean(G_{li}')/mean(G_{li})\\\\\n /// k_R=mean(B_{li}')/mean(B_{li})\n /// \\f$\n INITIAL_METHOD_LEAST_SQUARE, ///\n+// Jinheng Zhang \n+// Chenqi Shan \n+\n+#ifndef OPENCV_PHOTO_CCM_HPP\n+#define OPENCV_PHOTO_CCM_HPP\n+\n+#include \n+#include \n+\n+namespace cv\n+{\n+namespace ccm\n+{\n+\n+/** @defgroup ccm Color Correction module\n+@{\n+*/\n+\n+/** @brief Enum of the possible types of ccm.\n+*/\n+enum CCMType\n+{\n+ CCM_LINEAR, ///< Uses a \\f$3\\times3\\f$ matrix to linearly transform RGB values without offsets.\n+ CCM_AFFINE, ///< Uses a \\f$4\\times3\\f$ matrix to affine transform RGB values with both scaling and offset terms.\n+};\n+\n+/** @brief Enum of the possible types of initial method.\n+*/\n+enum InitialMethodType\n+{\n+ INITIAL_METHOD_WHITE_BALANCE, ///< The white balance method. The initial value is:\\n\n+ /// \\f$\n+ /// M_{CCM}=\n+ /// \\begin{bmatrix}\n+ /// k_R & 0 & 0\\\\\n+ /// 0 & k_G & 0\\\\\n+ /// 0 & 0 & k_B\\\\\n+ /// \\end{bmatrix}\n+ /// \\f$\\n\n+ /// where\\n\n+ /// \\f$\n+ /// k_R=mean(R_{li}')/mean(R_{li})\\\\\n+ /// k_R=mean(G_{li}')/mean(G_{li})\\\\\n+ /// k_R=mean(B_{li}')/mean(B_{li})\n+ /// \\f$\n+ INITIAL_METHOD_LEAST_SQUARE, ///\n// Jinheng Zhang \n// Chenqi Shan \n\n#ifndef OPENCV_PHOTO_CCM_HPP\n#define OPENCV_PHOTO_CCM_HPP\n\n#include \n#include \n\nnamespace cv\n{\nnamespace ccm\n{\n\n/** @defgroup ccm Color Correction module\n@{\n*/\n\n/** @brief Enum of the possible types of ccm.\n*/\nenum CCMType\n{\n CCM_LINEAR, ///< Uses a \\f$3\\times3\\f$ matrix to linearly transform RGB values without offsets.\n CCM_AFFINE, ///< Uses a \\f$4\\times3\\f$ matrix to affine transform RGB values with both scaling and offset terms.\n};\n\n/** @brief Enum of the possible types of initial method.\n*/\nenum InitialMethodType\n{\n INITIAL_METHOD_WHITE_BALANCE, ///< The white balance method. The initial value is:\\n\n /// \\f$\n /// M_{CCM}=\n /// \\begin{bmatrix}\n /// k_R & 0 & 0\\\\\n /// 0 & k_G & 0\\\\\n /// 0 & 0 & k_B\\\\\n /// \\end{bmatrix}\n /// \\f$\\n\n /// where\\n\n /// \\f$\n /// k_R=mean(R_{li}')/mean(R_{li})\\\\\n /// k_R=mean(G_{li}')/mean(G_{li})\\\\\n /// k_R=mean(B_{li}')/mean(B_{li})\n /// \\f$\n INITIAL_METHOD_LEAST_SQUARE, ///\n+// Jinheng Zhang \n+// Chenqi Shan \n+\n+#ifndef OPENCV_PHOTO_CCM_HPP\n+#define OPENCV_PHOTO_CCM_HPP\n+\n+#include \n+#include \n+\n+namespace cv\n+{\n+namespace ccm\n+{\n+\n+/** @defgroup ccm Color Correction module\n+@{\n+*/\n+\n+/** @brief Enum of the possible types of ccm.\n+*/\n+enum CCMType\n+{\n+ CCM_LINEAR, ///< Uses a \\f$3\\times3\\f$ matrix to linearly transform RGB values without offsets.\n+ CCM_AFFINE, ///< Uses a \\f$4\\times3\\f$ matrix to affine transform RGB values with both scaling and offset terms.\n+};\n+\n+/** @brief Enum of the possible types of initial method.\n+*/\n+enum InitialMethodType\n+{\n+ INITIAL_METHOD_WHITE_BALANCE, ///< The white balance method. The initial value is:\\n\n+ /// \\f$\n+ /// M_{CCM}=\n+ /// \\begin{bmatrix}\n+ /// k_R & 0 & 0\\\\\n+ /// 0 & k_G & 0\\\\\n+ /// 0 & 0 & k_B\\\\\n+ /// \\end{bmatrix}\n+ /// \\f$\\n\n+ /// where\\n\n+ /// \\f$\n+ /// k_R=mean(R_{li}')/mean(R_{li})\\\\\n+ /// k_R=mean(G_{li}')/mean(G_{li})\\\\\n+ /// k_R=mean(B_{li}')/mean(B_{li})\n+ /// \\f$\n+ INITIAL_METHOD_LEAST_SQUARE, ///\n// Jinheng Zhang \n// Chenqi Shan \n\n#ifndef OPENCV_PHOTO_CCM_HPP\n#define OPENCV_PHOTO_CCM_HPP\n\n#include \n#include \n\nnamespace cv\n{\nnamespace ccm\n{\n\n/** @defgroup ccm Color Correction module\n@{\n*/\n\n/** @brief Enum of the possible types of ccm.\n*/\nenum CCMType\n{\n CCM_LINEAR, ///< Uses a \\f$3\\times3\\f$ matrix to linearly transform RGB values without offsets.\n CCM_AFFINE, ///< Uses a \\f$4\\times3\\f$ matrix to affine transform RGB values with both scaling and offset terms.\n};\n\n/** @brief Enum of the possible types of initial method.\n*/\nenum InitialMethodType\n{\n INITIAL_METHOD_WHITE_BALANCE, ///< The white balance method. The initial value is:\\n\n /// \\f$\n /// M_{CCM}=\n /// \\begin{bmatrix}\n /// k_R & 0 & 0\\\\\n /// 0 & k_G & 0\\\\\n /// 0 & 0 & k_B\\\\\n /// \\end{bmatrix}\n /// \\f$\\n\n /// where\\n\n /// \\f$\n /// k_R=mean(R_{li}')/mean(R_{li})\\\\\n /// k_R=mean(G_{li}')/mean(G_{li})\\\\\n /// k_R=mean(B_{li}')/mean(B_{li})\n /// \\f$\n INITIAL_METHOD_LEAST_SQUARE, ///\n+// Jinheng Zhang \n+// Chenqi Shan \n+\n+#ifndef OPENCV_PHOTO_CCM_HPP\n+#define OPENCV_PHOTO_CCM_HPP\n+\n+#include \n+#include \n+\n+namespace cv\n+{\n+namespace ccm\n+{\n+\n+/** @defgroup ccm Color Correction module\n+@{\n+*/\n+\n+/** @brief Enum of the possible types of ccm.\n+*/\n+enum CCMType\n+{\n+ CCM_LINEAR, ///< Uses a \\f$3\\times3\\f$ matrix to linearly transform RGB values without offsets.\n+ CCM_AFFINE, ///< Uses a \\f$4\\times3\\f$ matrix to affine transform RGB values with both scaling and offset terms.\n+};\n+\n+/** @brief Enum of the possible types of initial method.\n+*/\n+enum InitialMethodType\n+{\n+ INITIAL_METHOD_WHITE_BALANCE, ///< The white balance method. The initial value is:\\n\n+ /// \\f$\n+ /// M_{CCM}=\n+ /// \\begin{bmatrix}\n+ /// k_R & 0 & 0\\\\\n+ /// 0 & k_G & 0\\\\\n+ /// 0 & 0 & k_B\\\\\n+ /// \\end{bmatrix}\n+ /// \\f$\\n\n+ /// where\\n\n+ /// \\f$\n+ /// k_R=mean(R_{li}')/mean(R_{li})\\\\\n+ /// k_R=mean(G_{li}')/mean(G_{li})\\\\\n+ /// k_R=mean(B_{li}')/mean(B_{li})\n+ /// \\f$\n+ INITIAL_METHOD_LEAST_SQUARE, /// `COLOR_SPACE_APPLE_RGB`", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1689877393", - "pr_number": 25394, - "pr_file": "modules/core/include/opencv2/core/mat.hpp", - "created_at": "2024-07-24T14:01:30+00:00", - "commented_code": "void release() const;\n void clear() const;\n void setTo(const _InputArray& value, const _InputArray & mask = _InputArray()) const;\n Mat reinterpretType( int type ) const;", - "repo_full_name": "opencv/opencv", - "discussion_comments": [ - { - "comment_id": "1689877393", - "repo_full_name": "opencv/opencv", - "pr_number": 25394, - "pr_file": "modules/core/include/opencv2/core/mat.hpp", - "discussion_id": "1689877393", - "commented_code": "@@ -371,6 +371,7 @@ class CV_EXPORTS _OutputArray : public _InputArray\n void release() const;\n void clear() const;\n void setTo(const _InputArray& value, const _InputArray & mask = _InputArray()) const;\n+ Mat reinterpretType( int type ) const;", - "comment_created_at": "2024-07-24T14:01:30+00:00", - "comment_author": "vpisarev", - "comment_body": "can we skip `Type` suffix in the name? (`Mat reinterpret(int type) const;`?)", - "pr_file_module": null - }, - { - "comment_id": "1695039127", - "repo_full_name": "opencv/opencv", - "pr_number": 25394, - "pr_file": "modules/core/include/opencv2/core/mat.hpp", - "discussion_id": "1689877393", - "commented_code": "@@ -371,6 +371,7 @@ class CV_EXPORTS _OutputArray : public _InputArray\n void release() const;\n void clear() const;\n void setTo(const _InputArray& value, const _InputArray & mask = _InputArray()) const;\n+ Mat reinterpretType( int type ) const;", - "comment_created_at": "2024-07-29T11:16:36+00:00", - "comment_author": "Gao-HaoYuan", - "comment_body": "@vpisarev I had done", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1958390966", - "pr_number": 26930, - "pr_file": "modules/imgcodecs/src/grfmt_gif.hpp", - "created_at": "2025-02-17T15:07:03+00:00", - "commented_code": "namespace cv\n{\n\nenum GifOpMode\n{\n GRFMT_GIF_Nothing = 0,\n GRFMT_GIF_PreviousImage = 1,\n GRFMT_GIF_Background = 2,\n GRFMT_GIF_Cover = 3\n// See https://www.w3.org/Graphics/GIF/spec-gif89a.txt\nenum DisposalMethod {", - "repo_full_name": "opencv/opencv", - "discussion_comments": [ - { - "comment_id": "1958390966", - "repo_full_name": "opencv/opencv", - "pr_number": 26930, - "pr_file": "modules/imgcodecs/src/grfmt_gif.hpp", - "discussion_id": "1958390966", - "commented_code": "@@ -11,12 +11,12 @@\n namespace cv\n {\n \n-enum GifOpMode\n-{\n- GRFMT_GIF_Nothing = 0,\n- GRFMT_GIF_PreviousImage = 1,\n- GRFMT_GIF_Background = 2,\n- GRFMT_GIF_Cover = 3\n+// See https://www.w3.org/Graphics/GIF/spec-gif89a.txt\n+enum DisposalMethod {", - "comment_created_at": "2025-02-17T15:07:03+00:00", - "comment_author": "sturkmen72", - "comment_body": "consider using \r\nenum GifDisposeMethod {\r\n GIF_DISPOSE_NA = 0,\r\n GIF_DISPOSE_NONE = 1,\r\n GIF_DISPOSE_RESTORE_BACKGROUND = 2,\r\n GIF_DISPOSE_RESTORE_PREVIOUS = 3,\r\n};", - "pr_file_module": null - }, - { - "comment_id": "1961335639", - "repo_full_name": "opencv/opencv", - "pr_number": 26930, - "pr_file": "modules/imgcodecs/src/grfmt_gif.hpp", - "discussion_id": "1958390966", - "commented_code": "@@ -11,12 +11,12 @@\n namespace cv\n {\n \n-enum GifOpMode\n-{\n- GRFMT_GIF_Nothing = 0,\n- GRFMT_GIF_PreviousImage = 1,\n- GRFMT_GIF_Background = 2,\n- GRFMT_GIF_Cover = 3\n+// See https://www.w3.org/Graphics/GIF/spec-gif89a.txt\n+enum DisposalMethod {", - "comment_created_at": "2025-02-19T09:52:48+00:00", - "comment_author": "Kumataro", - "comment_body": "Thank you for your comment, I agree and I replaced this enum with your suggestion !", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1959436677", - "pr_number": 26925, - "pr_file": "modules/core/include/opencv2/core.hpp", - "created_at": "2025-02-18T10:08:24+00:00", - "commented_code": "void writeFormat(FileStorage& fs) const;\n};\n\nclass CV_EXPORTS_W_SIMPLE DeepNeuralNet", - "repo_full_name": "opencv/opencv", - "discussion_comments": [ - { - "comment_id": "1959436677", - "repo_full_name": "opencv/opencv", - "pr_number": 26925, - "pr_file": "modules/core/include/opencv2/core.hpp", - "discussion_id": "1959436677", - "commented_code": "@@ -3294,6 +3294,12 @@ class CV_EXPORTS_W Algorithm\n void writeFormat(FileStorage& fs) const;\n };\n \n+class CV_EXPORTS_W_SIMPLE DeepNeuralNet", - "comment_created_at": "2025-02-18T10:08:24+00:00", - "comment_author": "vpisarev", - "comment_body": "the name is quite confusing. `cv::dnn::Net` vs `cv::DeepNeuralNet`. We can still have it in the core, but use more consistent name, e.g. `cv::dnn::BaseNet`:\r\n\r\n```\r\nnamespace dnn {\r\nclass CV_EXPORTS_W_SIMPLE NetBase\r\n{\r\npublic:\r\n virtual ~NetBase();\r\n};\r\n}\r\n```\r\n", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1928336874", - "pr_number": 26836, - "pr_file": "modules/imgproc/include/opencv2/imgproc.hpp", - "created_at": "2025-01-24T08:47:19+00:00", - "commented_code": "THRESH_TOZERO_INV = 4, //!< \\f[\\texttt{dst} (x,y) = \\fork{0}{if \\(\\texttt{src}(x,y) > \\texttt{thresh}\\)}{\\texttt{src}(x,y)}{otherwise}\\f]\n THRESH_MASK = 7,\n THRESH_OTSU = 8, //!< flag, use Otsu algorithm to choose the optimal threshold value\n THRESH_TRIANGLE = 16 //!< flag, use Triangle algorithm to choose the optimal threshold value\n THRESH_TRIANGLE = 16, //!< flag, use Triangle algorithm to choose the optimal threshold value\n THRESH_DISABLE = 128 //!< flag, compute threshold only (useful for OTSU/TRIANGLE) but does not actually run thresholding", - "repo_full_name": "opencv/opencv", - "discussion_comments": [ - { - "comment_id": "1928336874", - "repo_full_name": "opencv/opencv", - "pr_number": 26836, - "pr_file": "modules/imgproc/include/opencv2/imgproc.hpp", - "discussion_id": "1928336874", - "commented_code": "@@ -329,7 +329,8 @@ enum ThresholdTypes {\n THRESH_TOZERO_INV = 4, //!< \\f[\\texttt{dst} (x,y) = \\fork{0}{if \\(\\texttt{src}(x,y) > \\texttt{thresh}\\)}{\\texttt{src}(x,y)}{otherwise}\\f]\n THRESH_MASK = 7,\n THRESH_OTSU = 8, //!< flag, use Otsu algorithm to choose the optimal threshold value\n- THRESH_TRIANGLE = 16 //!< flag, use Triangle algorithm to choose the optimal threshold value\n+ THRESH_TRIANGLE = 16, //!< flag, use Triangle algorithm to choose the optimal threshold value\n+ THRESH_DISABLE = 128 //!< flag, compute threshold only (useful for OTSU/TRIANGLE) but does not actually run thresholding", - "comment_created_at": "2025-01-24T08:47:19+00:00", - "comment_author": "asmorkalov", - "comment_body": "The name is very controversial. I propose `THRESH_DRYRUN` or something similar.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1653009475", - "pr_number": 25715, - "pr_file": "modules/imgcodecs/src/grfmt_apng.hpp", - "created_at": "2024-06-25T15:06:40+00:00", - "commented_code": "// This file is part of OpenCV project.\n// It is subject to the license terms in the LICENSE file found in the top-level\n// directory of this distribution and at http://opencv.org/license.html\n\n/****************************************************************************\\\n *\n * this file includes some modified part of apngasm and APNG Optimizer 1.4\n * both have zlib license.\n *\n ****************************************************************************/\n\n\n /* apngasm\n *\n * The next generation of apngasm, the APNG Assembler.\n * The apngasm CLI tool and library can assemble and disassemble APNG image files.\n *\n * https://github.com/apngasm/apngasm\n\n\n /* APNG Optimizer 1.4\n *\n * Makes APNG files smaller.\n *\n * http://sourceforge.net/projects/apng/files\n *\n * Copyright (c) 2011-2015 Max Stepin\n * maxst at users.sourceforge.net\n *\n * zlib license\n * ------------\n *\n * This software is provided 'as-is', without any express or implied\n * warranty. In no event will the authors be held liable for any damages\n * arising from the use of this software.\n *\n * Permission is granted to anyone to use this software for any purpose,\n * including commercial applications, and to alter it and redistribute it\n * freely, subject to the following restrictions:\n *\n * 1. The origin of this software must not be misrepresented; you must not\n * claim that you wrote the original software. If you use this software\n * in a product, an acknowledgment in the product documentation would be\n * appreciated but is not required.\n * 2. Altered source versions must be plainly marked as such, and must not be\n * misrepresented as being the original software.\n * 3. This notice may not be removed or altered from any source distribution.\n *\n */\n\n#ifndef _GRFMT_APNG_H_\n#define _GRFMT_APNG_H_\n\n#ifdef HAVE_PNG\n\n#include \"grfmt_base.hpp\"\n#include \"bitstrm.hpp\"\n#include \n#include \n\nnamespace cv\n{\n\nconst uint DEFAULT_FRAME_NUMERATOR = 100; //!< @brief The default numerator for the frame delay fraction.\nconst uint DEFAULT_FRAME_DENOMINATOR = 1000; //!< @brief The default denominator for the frame delay fraction.\n\ntypedef struct {\n uchar r, g, b;\n} rgb;\n\ntypedef struct {\n uchar r, g, b, a;\n} rgba;\n\n\nclass APNGFrame\n{\npublic:\n APNGFrame();\n virtual ~APNGFrame() {}\n\n /**\n * @brief Creates an APNGFrame from a bitmapped array of RBGA pixel data.\n * @param pixels The RGBA pixel data.\n * @param width The width of the pixel data.\n * @param height The height of the pixel data.\n * @param delayNum The delay numerator for this frame (defaults to DEFAULT_FRAME_NUMERATOR).\n * @param delayDen The delay denominator for this frame (defaults to DEFAULT_FRAME_DENMINATOR).\n */\n APNGFrame(rgba* pixels, int width, int height,\n int delayNum = DEFAULT_FRAME_NUMERATOR,\n int delayDen = DEFAULT_FRAME_DENOMINATOR);\n\n /**\n * @brief Creates an APNGFrame from a PNG file.\n * @param filePath The relative or absolute path to an image file.\n * @param delayNum The delay numerator for this frame (defaults to DEFAULT_FRAME_NUMERATOR).\n * @param delayDen The delay denominator for this frame (defaults to DEFAULT_FRAME_DENMINATOR).\n */\n APNGFrame(const std::string& filePath,\n int delayNum = DEFAULT_FRAME_NUMERATOR,\n int delayDen = DEFAULT_FRAME_DENOMINATOR);\n\n /**\n * @brief Creates an APNGFrame from a bitmapped array of RBG pixel data.\n * @param pixels The RGB pixel data.\n * @param width The width of the pixel data.\n * @param height The height of the pixel data.\n * @param delayNum The delay numerator for this frame (defaults to DEFAULT_FRAME_NUMERATOR).\n * @param delayDen The delay denominator for this frame (defaults to DEFAULT_FRAME_DENMINATOR).\n */\n APNGFrame(rgb* pixels, int width, int height,\n int delayNum = DEFAULT_FRAME_NUMERATOR,\n int delayDen = DEFAULT_FRAME_DENOMINATOR);\n\n /**\n * @brief Creates an APNGFrame from a bitmapped array of RBG pixel data.\n * @param pixels The RGB pixel data.\n * @param width The width of the pixel data.\n * @param height The height of the pixel data.\n * @param trns_color An array of transparency data.\n * @param delayNum The delay numerator for this frame (defaults to DEFAULT_FRAME_NUMERATOR).\n * @param delayDen The delay denominator for this frame (defaults to DEFAULT_FRAME_DENMINATOR).\n */\n APNGFrame(rgb* pixels, int width, int height,\n rgb* trns_color = NULL, int delayNum = DEFAULT_FRAME_NUMERATOR,\n int delayDen = DEFAULT_FRAME_DENOMINATOR);\n\n /**\n * @brief Saves this frame as a single PNG file.\n * @param outPath The relative or absolute path to save the image file to.\n * @return Returns true if save was successful.\n */\n bool save(const std::string& outPath) const;\n\n int width() const { return m_width; }\n int height() const { return m_height; }\n virtual int type() const { return m_type; }\n\n // Raw pixel data\n uchar* pixels(uchar* setPixels = NULL);\n\n // Palette into\n rgb* palette(rgb* setPalette = NULL);\n rgb _palette[256];\n\n // Transparency info\n uchar* transparency(uchar* setTransparency = NULL);\n\n // Sizes for palette and transparency records\n int paletteSize(int setPaletteSize = 0);", - "repo_full_name": "opencv/opencv", - "discussion_comments": [ - { - "comment_id": "1653009475", - "repo_full_name": "opencv/opencv", - "pr_number": 25715, - "pr_file": "modules/imgcodecs/src/grfmt_apng.hpp", - "discussion_id": "1653009475", - "commented_code": "@@ -0,0 +1,178 @@\n+// This file is part of OpenCV project.\n+// It is subject to the license terms in the LICENSE file found in the top-level\n+// directory of this distribution and at http://opencv.org/license.html\n+\n+/****************************************************************************\\\n+ *\n+ * this file includes some modified part of apngasm and APNG Optimizer 1.4\n+ * both have zlib license.\n+ *\n+ ****************************************************************************/\n+\n+\n+ /* apngasm\n+ *\n+ * The next generation of apngasm, the APNG Assembler.\n+ * The apngasm CLI tool and library can assemble and disassemble APNG image files.\n+ *\n+ * https://github.com/apngasm/apngasm\n+\n+\n+ /* APNG Optimizer 1.4\n+ *\n+ * Makes APNG files smaller.\n+ *\n+ * http://sourceforge.net/projects/apng/files\n+ *\n+ * Copyright (c) 2011-2015 Max Stepin\n+ * maxst at users.sourceforge.net\n+ *\n+ * zlib license\n+ * ------------\n+ *\n+ * This software is provided 'as-is', without any express or implied\n+ * warranty. In no event will the authors be held liable for any damages\n+ * arising from the use of this software.\n+ *\n+ * Permission is granted to anyone to use this software for any purpose,\n+ * including commercial applications, and to alter it and redistribute it\n+ * freely, subject to the following restrictions:\n+ *\n+ * 1. The origin of this software must not be misrepresented; you must not\n+ * claim that you wrote the original software. If you use this software\n+ * in a product, an acknowledgment in the product documentation would be\n+ * appreciated but is not required.\n+ * 2. Altered source versions must be plainly marked as such, and must not be\n+ * misrepresented as being the original software.\n+ * 3. This notice may not be removed or altered from any source distribution.\n+ *\n+ */\n+\n+#ifndef _GRFMT_APNG_H_\n+#define _GRFMT_APNG_H_\n+\n+#ifdef HAVE_PNG\n+\n+#include \"grfmt_base.hpp\"\n+#include \"bitstrm.hpp\"\n+#include \n+#include \n+\n+namespace cv\n+{\n+\n+const uint DEFAULT_FRAME_NUMERATOR = 100; //!< @brief The default numerator for the frame delay fraction.\n+const uint DEFAULT_FRAME_DENOMINATOR = 1000; //!< @brief The default denominator for the frame delay fraction.\n+\n+typedef struct {\n+ uchar r, g, b;\n+} rgb;\n+\n+typedef struct {\n+ uchar r, g, b, a;\n+} rgba;\n+\n+\n+class APNGFrame\n+{\n+public:\n+ APNGFrame();\n+ virtual ~APNGFrame() {}\n+\n+ /**\n+ * @brief Creates an APNGFrame from a bitmapped array of RBGA pixel data.\n+ * @param pixels The RGBA pixel data.\n+ * @param width The width of the pixel data.\n+ * @param height The height of the pixel data.\n+ * @param delayNum The delay numerator for this frame (defaults to DEFAULT_FRAME_NUMERATOR).\n+ * @param delayDen The delay denominator for this frame (defaults to DEFAULT_FRAME_DENMINATOR).\n+ */\n+ APNGFrame(rgba* pixels, int width, int height,\n+ int delayNum = DEFAULT_FRAME_NUMERATOR,\n+ int delayDen = DEFAULT_FRAME_DENOMINATOR);\n+\n+ /**\n+ * @brief Creates an APNGFrame from a PNG file.\n+ * @param filePath The relative or absolute path to an image file.\n+ * @param delayNum The delay numerator for this frame (defaults to DEFAULT_FRAME_NUMERATOR).\n+ * @param delayDen The delay denominator for this frame (defaults to DEFAULT_FRAME_DENMINATOR).\n+ */\n+ APNGFrame(const std::string& filePath,\n+ int delayNum = DEFAULT_FRAME_NUMERATOR,\n+ int delayDen = DEFAULT_FRAME_DENOMINATOR);\n+\n+ /**\n+ * @brief Creates an APNGFrame from a bitmapped array of RBG pixel data.\n+ * @param pixels The RGB pixel data.\n+ * @param width The width of the pixel data.\n+ * @param height The height of the pixel data.\n+ * @param delayNum The delay numerator for this frame (defaults to DEFAULT_FRAME_NUMERATOR).\n+ * @param delayDen The delay denominator for this frame (defaults to DEFAULT_FRAME_DENMINATOR).\n+ */\n+ APNGFrame(rgb* pixels, int width, int height,\n+ int delayNum = DEFAULT_FRAME_NUMERATOR,\n+ int delayDen = DEFAULT_FRAME_DENOMINATOR);\n+\n+ /**\n+ * @brief Creates an APNGFrame from a bitmapped array of RBG pixel data.\n+ * @param pixels The RGB pixel data.\n+ * @param width The width of the pixel data.\n+ * @param height The height of the pixel data.\n+ * @param trns_color An array of transparency data.\n+ * @param delayNum The delay numerator for this frame (defaults to DEFAULT_FRAME_NUMERATOR).\n+ * @param delayDen The delay denominator for this frame (defaults to DEFAULT_FRAME_DENMINATOR).\n+ */\n+ APNGFrame(rgb* pixels, int width, int height,\n+ rgb* trns_color = NULL, int delayNum = DEFAULT_FRAME_NUMERATOR,\n+ int delayDen = DEFAULT_FRAME_DENOMINATOR);\n+\n+ /**\n+ * @brief Saves this frame as a single PNG file.\n+ * @param outPath The relative or absolute path to save the image file to.\n+ * @return Returns true if save was successful.\n+ */\n+ bool save(const std::string& outPath) const;\n+\n+ int width() const { return m_width; }\n+ int height() const { return m_height; }\n+ virtual int type() const { return m_type; }\n+\n+ // Raw pixel data\n+ uchar* pixels(uchar* setPixels = NULL);\n+\n+ // Palette into\n+ rgb* palette(rgb* setPalette = NULL);\n+ rgb _palette[256];\n+\n+ // Transparency info\n+ uchar* transparency(uchar* setTransparency = NULL);\n+\n+ // Sizes for palette and transparency records\n+ int paletteSize(int setPaletteSize = 0);", - "comment_created_at": "2024-06-25T15:06:40+00:00", - "comment_author": "vrabaud", - "comment_body": "maybe call the setters: `setPaletteSize`. I like the Goole convention for naming variables: https://google.github.io/styleguide/cppguide.html#Function_Names\r\n", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1658707587", - "pr_number": 25715, - "pr_file": "modules/imgcodecs/src/apngframe.hpp", - "created_at": "2024-06-28T13:00:34+00:00", - "commented_code": "// This file is part of OpenCV project.\n// It is subject to the license terms in the LICENSE file found in the top-level\n// directory of this distribution and at http://opencv.org/license.html\n\n/****************************************************************************\\\n *\n * this file includes some modified part of apngasm\n *\n ****************************************************************************/\n\n\n /* apngasm\n *\n * The next generation of apngasm, the APNG Assembler.\n * The apngasm CLI tool and library can assemble and disassemble APNG image files.\n *\n * https://github.com/apngasm/apngasm\n *\n * zlib license\n * ------------\n *\n * This software is provided 'as-is', without any express or implied\n * warranty. In no event will the authors be held liable for any damages\n * arising from the use of this software.\n *\n * Permission is granted to anyone to use this software for any purpose,\n * including commercial applications, and to alter it and redistribute it\n * freely, subject to the following restrictions:\n *\n * 1. The origin of this software must not be misrepresented; you must not\n * claim that you wrote the original software. If you use this software\n * in a product, an acknowledgment in the product documentation would be\n * appreciated but is not required.\n * 2. Altered source versions must be plainly marked as such, and must not be\n * misrepresented as being the original software.\n * 3. This notice may not be removed or altered from any source distribution.\n *\n */\n\n#ifndef _APNGFRAME_H_\n#define _APNGFRAME_H_\n\n#include \n\nnamespace cv {\n\nconst unsigned DEFAULT_FRAME_NUMERATOR =\n 100; //!< @brief The default numerator for the frame delay fraction.\nconst unsigned DEFAULT_FRAME_DENOMINATOR =\n 1000; //!< @brief The default denominator for the frame delay fraction.\n\ntypedef struct {\n unsigned char r, g, b;\n} rgb;\ntypedef struct {\n unsigned char r, g, b, a;\n} rgba;\n\n// Individual APNG frame\nclass APNGFrame {\npublic:\n // Raw pixel data\n unsigned char *pixels(unsigned char *setPixels = NULL);\n unsigned char *_pixels;\n\n // Width and Height\n unsigned int width(unsigned int setWidth = 0);\n unsigned int height(unsigned int setHeight = 0);\n unsigned int _width;\n unsigned int _height;\n\n // PNG color type\n unsigned char colorType(unsigned char setColorType = 255);\n unsigned char _colorType;\n\n // Palette into\n rgb *palette(rgb *setPalette = NULL);\n rgb _palette[256];\n\n // Transparency info\n unsigned char *transparency(unsigned char *setTransparency = NULL);\n unsigned char _transparency[256];\n\n // Sizes for palette and transparency records\n int paletteSize(int setPaletteSize = 0);", - "repo_full_name": "opencv/opencv", - "discussion_comments": [ - { - "comment_id": "1658707587", - "repo_full_name": "opencv/opencv", - "pr_number": 25715, - "pr_file": "modules/imgcodecs/src/apngframe.hpp", - "discussion_id": "1658707587", - "commented_code": "@@ -0,0 +1,173 @@\n+// This file is part of OpenCV project.\n+// It is subject to the license terms in the LICENSE file found in the top-level\n+// directory of this distribution and at http://opencv.org/license.html\n+\n+/****************************************************************************\\\n+ *\n+ * this file includes some modified part of apngasm\n+ *\n+ ****************************************************************************/\n+\n+\n+ /* apngasm\n+ *\n+ * The next generation of apngasm, the APNG Assembler.\n+ * The apngasm CLI tool and library can assemble and disassemble APNG image files.\n+ *\n+ * https://github.com/apngasm/apngasm\n+ *\n+ * zlib license\n+ * ------------\n+ *\n+ * This software is provided 'as-is', without any express or implied\n+ * warranty. In no event will the authors be held liable for any damages\n+ * arising from the use of this software.\n+ *\n+ * Permission is granted to anyone to use this software for any purpose,\n+ * including commercial applications, and to alter it and redistribute it\n+ * freely, subject to the following restrictions:\n+ *\n+ * 1. The origin of this software must not be misrepresented; you must not\n+ * claim that you wrote the original software. If you use this software\n+ * in a product, an acknowledgment in the product documentation would be\n+ * appreciated but is not required.\n+ * 2. Altered source versions must be plainly marked as such, and must not be\n+ * misrepresented as being the original software.\n+ * 3. This notice may not be removed or altered from any source distribution.\n+ *\n+ */\n+\n+#ifndef _APNGFRAME_H_\n+#define _APNGFRAME_H_\n+\n+#include \n+\n+namespace cv {\n+\n+const unsigned DEFAULT_FRAME_NUMERATOR =\n+ 100; //!< @brief The default numerator for the frame delay fraction.\n+const unsigned DEFAULT_FRAME_DENOMINATOR =\n+ 1000; //!< @brief The default denominator for the frame delay fraction.\n+\n+typedef struct {\n+ unsigned char r, g, b;\n+} rgb;\n+typedef struct {\n+ unsigned char r, g, b, a;\n+} rgba;\n+\n+// Individual APNG frame\n+class APNGFrame {\n+public:\n+ // Raw pixel data\n+ unsigned char *pixels(unsigned char *setPixels = NULL);\n+ unsigned char *_pixels;\n+\n+ // Width and Height\n+ unsigned int width(unsigned int setWidth = 0);\n+ unsigned int height(unsigned int setHeight = 0);\n+ unsigned int _width;\n+ unsigned int _height;\n+\n+ // PNG color type\n+ unsigned char colorType(unsigned char setColorType = 255);\n+ unsigned char _colorType;\n+\n+ // Palette into\n+ rgb *palette(rgb *setPalette = NULL);\n+ rgb _palette[256];\n+\n+ // Transparency info\n+ unsigned char *transparency(unsigned char *setTransparency = NULL);\n+ unsigned char _transparency[256];\n+\n+ // Sizes for palette and transparency records\n+ int paletteSize(int setPaletteSize = 0);", - "comment_created_at": "2024-06-28T13:00:34+00:00", - "comment_author": "vrabaud", - "comment_body": "it's hard to get this is a setter. Call it `setPaletteSize`.\r\nBut if _palette_size is public, what's the point of having a setter? Just make the member public no?", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/opencv-cross-platform-api-design-rules.json b/_reviewers/opencv-cross-platform-api-design-rules.json new file mode 100644 index 0000000..112e502 --- /dev/null +++ b/_reviewers/opencv-cross-platform-api-design-rules.json @@ -0,0 +1,158 @@ +[ + { + "discussion_id": "2179518705", + "pr_number": 27496, + "pr_file": "modules/imgcodecs/include/opencv2/imgcodecs.hpp", + "created_at": "2025-07-02T09:00:11+00:00", + "commented_code": "int m_curr;\n };\n\n ImageCollection();\n ImageCollection(const String& filename, int flags);\n void init(const String& img, int flags);\n size_t size() const;\n const Mat& at(int index);\n CV_WRAP ImageCollection();\n CV_WRAP ImageCollection(const String& filename, int flags = IMREAD_UNCHANGED);\n CV_WRAP void init(const String& img, int flags);\n CV_WRAP size_t size() const;", + "repo_full_name": "opencv/opencv", + "discussion_comments": [ + { + "comment_id": "2179518705", + "repo_full_name": "opencv/opencv", + "pr_number": 27496, + "pr_file": "modules/imgcodecs/include/opencv2/imgcodecs.hpp", + "discussion_id": "2179518705", + "commented_code": "@@ -711,13 +711,15 @@ class CV_EXPORTS ImageCollection {\n int m_curr;\n };\n \n- ImageCollection();\n- ImageCollection(const String& filename, int flags);\n- void init(const String& img, int flags);\n- size_t size() const;\n- const Mat& at(int index);\n+ CV_WRAP ImageCollection();\n+ CV_WRAP ImageCollection(const String& filename, int flags = IMREAD_UNCHANGED);\n+ CV_WRAP void init(const String& img, int flags);\n+ CV_WRAP size_t size() const;", + "comment_created_at": "2025-07-02T09:00:11+00:00", + "comment_author": "asmorkalov", + "comment_body": "size_t is not wrapped to Python and Java correctly. Java even does not support unsigned types. Please use int instead.", + "pr_file_module": null + }, + { + "comment_id": "2179548324", + "repo_full_name": "opencv/opencv", + "pr_number": 27496, + "pr_file": "modules/imgcodecs/include/opencv2/imgcodecs.hpp", + "discussion_id": "2179518705", + "commented_code": "@@ -711,13 +711,15 @@ class CV_EXPORTS ImageCollection {\n int m_curr;\n };\n \n- ImageCollection();\n- ImageCollection(const String& filename, int flags);\n- void init(const String& img, int flags);\n- size_t size() const;\n- const Mat& at(int index);\n+ CV_WRAP ImageCollection();\n+ CV_WRAP ImageCollection(const String& filename, int flags = IMREAD_UNCHANGED);\n+ CV_WRAP void init(const String& img, int flags);\n+ CV_WRAP size_t size() const;", + "comment_created_at": "2025-07-02T09:14:01+00:00", + "comment_author": "sturkmen72", + "comment_body": "> size_t is not wrapped to Python and Java correctly. Java even does not support unsigned types. Please use int instead.\r\n\r\nplease check the last commit. i created size32() for wrapping", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2172533185", + "pr_number": 27460, + "pr_file": "modules/gapi/include/opencv2/gapi/infer/ov.hpp", + "created_at": "2025-06-27T17:34:53+00:00", + "commented_code": "*/\nstruct benchmark_mode { };\n\nstruct workload_type {\n using callback = std::function;\n using listener = std::pair;\n std::shared_ptr addListener(callback cb){\n int id = nextId++;\n listeners.emplace_back(id, std::move(cb));\n\n auto remover = [this, id](void*){ removeListener(id);};\n\n return std::shared_ptr(nullptr, remover);", + "repo_full_name": "opencv/opencv", + "discussion_comments": [ + { + "comment_id": "2172533185", + "repo_full_name": "opencv/opencv", + "pr_number": 27460, + "pr_file": "modules/gapi/include/opencv2/gapi/infer/ov.hpp", + "discussion_id": "2172533185", + "commented_code": "@@ -691,6 +691,29 @@ namespace wip { namespace ov {\n */\n struct benchmark_mode { };\n \n+struct workload_type {\n+ using callback = std::function;\n+ using listener = std::pair;\n+ std::shared_ptr addListener(callback cb){\n+ int id = nextId++;\n+ listeners.emplace_back(id, std::move(cb));\n+\n+ auto remover = [this, id](void*){ removeListener(id);};\n+\n+ return std::shared_ptr(nullptr, remover);", + "comment_created_at": "2025-06-27T17:34:53+00:00", + "comment_author": "AsyaPronina", + "comment_body": "I don't think it is a good idea to remove listeners that way. It seems a bit tricky. I understand that this variable may outlive G-API OV backend. But, might be, we can clean this variable's listeners in the destructor of G-API OV backend itself. Please note that we are the \"users\" of given API to add listeners and we 100% know when we add a listener and when we can remove it, so there is no need for some RAII idiom for remove. If this is allowable, we can preserve these compile args somewhere in backend to access variable in the destructor.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1327872439", + "pr_number": 24151, + "pr_file": "modules/imgproc/include/opencv2/imgproc/hal/hal.hpp", + "created_at": "2023-09-16T00:07:56+00:00", + "commented_code": "CV_EXPORTS void resize(int src_type,\n const uchar * src_data, size_t src_step, int src_width, int src_height,\n uchar * dst_data, size_t dst_step, int dst_width, int dst_height,\n double inv_scale_x, double inv_scale_y, int interpolation);\n double inv_scale_x, double inv_scale_y, int interpolation, int coordinate);", + "repo_full_name": "opencv/opencv", + "discussion_comments": [ + { + "comment_id": "1327872439", + "repo_full_name": "opencv/opencv", + "pr_number": 24151, + "pr_file": "modules/imgproc/include/opencv2/imgproc/hal/hal.hpp", + "discussion_id": "1327872439", + "commented_code": "@@ -101,7 +101,7 @@ CV_EXPORTS void morph(int op, int src_type, int dst_type,\n CV_EXPORTS void resize(int src_type,\n const uchar * src_data, size_t src_step, int src_width, int src_height,\n uchar * dst_data, size_t dst_step, int dst_width, int dst_height,\n- double inv_scale_x, double inv_scale_y, int interpolation);\n+ double inv_scale_x, double inv_scale_y, int interpolation, int coordinate);", + "comment_created_at": "2023-09-16T00:07:56+00:00", + "comment_author": "opencv-alalek", + "comment_body": "We should not break HAL API/ABI.\r\nDon't modify exited function, create a new function instead.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1959486582", + "pr_number": 26934, + "pr_file": "modules/objdetect/include/opencv2/objdetect/aruco_detector.hpp", + "created_at": "2025-02-18T10:39:42+00:00", + "commented_code": "* If camera parameters and distortion coefficients are provided, missing markers are reprojected\n * using projectPoint function. If not, missing marker projections are interpolated using global\n * homography, and all the marker corners in the board must have the same Z coordinate.\n * @note This function assumes that the board only contains markers from one dictionary, so only the\n * first configured dictionary is used.\n */\n CV_WRAP void refineDetectedMarkers(InputArray image, const Board &board,\n InputOutputArrayOfArrays detectedCorners,\n InputOutputArray detectedIds, InputOutputArrayOfArrays rejectedCorners,\n InputArray cameraMatrix = noArray(), InputArray distCoeffs = noArray(),\n OutputArray recoveredIdxs = noArray()) const;\n\n CV_WRAP const Dictionary& getDictionary() const;\n CV_WRAP void setDictionary(const Dictionary& dictionary);\n CV_WRAP const Dictionary& getDictionary(size_t index = 0) const;\n CV_WRAP void setDictionary(const Dictionary& dictionary, size_t index = 0);\n CV_WRAP const std::vector& getDictionaries() const;\n CV_WRAP void setDictionaries(const std::vector& dictionaries);\n CV_WRAP void addDictionary(const Dictionary& dictionary);\n CV_WRAP void removeDictionary(size_t index);", + "repo_full_name": "opencv/opencv", + "discussion_comments": [ + { + "comment_id": "1959486582", + "repo_full_name": "opencv/opencv", + "pr_number": 26934, + "pr_file": "modules/objdetect/include/opencv2/objdetect/aruco_detector.hpp", + "discussion_id": "1959486582", + "commented_code": "@@ -329,15 +341,21 @@ class CV_EXPORTS_W ArucoDetector : public Algorithm\n * If camera parameters and distortion coefficients are provided, missing markers are reprojected\n * using projectPoint function. If not, missing marker projections are interpolated using global\n * homography, and all the marker corners in the board must have the same Z coordinate.\n+ * @note This function assumes that the board only contains markers from one dictionary, so only the\n+ * first configured dictionary is used.\n */\n CV_WRAP void refineDetectedMarkers(InputArray image, const Board &board,\n InputOutputArrayOfArrays detectedCorners,\n InputOutputArray detectedIds, InputOutputArrayOfArrays rejectedCorners,\n InputArray cameraMatrix = noArray(), InputArray distCoeffs = noArray(),\n OutputArray recoveredIdxs = noArray()) const;\n \n- CV_WRAP const Dictionary& getDictionary() const;\n- CV_WRAP void setDictionary(const Dictionary& dictionary);\n+ CV_WRAP const Dictionary& getDictionary(size_t index = 0) const;\n+ CV_WRAP void setDictionary(const Dictionary& dictionary, size_t index = 0);\n+ CV_WRAP const std::vector& getDictionaries() const;\n+ CV_WRAP void setDictionaries(const std::vector& dictionaries);\n+ CV_WRAP void addDictionary(const Dictionary& dictionary);\n+ CV_WRAP void removeDictionary(size_t index);", + "comment_created_at": "2025-02-18T10:39:42+00:00", + "comment_author": "asmorkalov", + "comment_body": "size_t does not work well with Java and Python bindings. let's use just int.\r\n", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1865541230", + "pr_number": 25584, + "pr_file": "modules/videoio/src/plugin_capture_api.hpp", + "created_at": "2024-12-02T09:47:31+00:00", + "commented_code": "CV_OUT CvPluginCapture* handle);\n}; // OpenCV_VideoIO_Capture_Plugin_API_v1_1_api_entries\n\nstruct OpenCV_VideoIO_Capture_Plugin_API_v1_2_api_entries\n{\n /** @brief Open video capture from buffer with parameters\n\n @param buffer Memory buffer\n @param params pointer on 2*n_params array of 'key,value' pairs\n @param n_params number of passed parameters\n @param[out] handle pointer on Capture handle\n\n @note API-CALL 9, API-Version == 2\n */\n CvResult (CV_API_CALL *Capture_open_buffer)(\n std::streambuf& buffer,", + "repo_full_name": "opencv/opencv", + "discussion_comments": [ + { + "comment_id": "1865541230", + "repo_full_name": "opencv/opencv", + "pr_number": 25584, + "pr_file": "modules/videoio/src/plugin_capture_api.hpp", + "discussion_id": "1865541230", + "commented_code": "@@ -121,6 +121,23 @@ struct OpenCV_VideoIO_Capture_Plugin_API_v1_1_api_entries\n CV_OUT CvPluginCapture* handle);\n }; // OpenCV_VideoIO_Capture_Plugin_API_v1_1_api_entries\n \n+struct OpenCV_VideoIO_Capture_Plugin_API_v1_2_api_entries\n+{\n+ /** @brief Open video capture from buffer with parameters\n+\n+ @param buffer Memory buffer\n+ @param params pointer on 2*n_params array of 'key,value' pairs\n+ @param n_params number of passed parameters\n+ @param[out] handle pointer on Capture handle\n+\n+ @note API-CALL 9, API-Version == 2\n+ */\n+ CvResult (CV_API_CALL *Capture_open_buffer)(\n+ std::streambuf& buffer,", + "comment_created_at": "2024-12-02T09:47:31+00:00", + "comment_author": "opencv-alalek", + "comment_body": "We should not pass C++ objects through low level plugin API. They are not usable in general.\r\n\r\nDifferent C++ ABIs are possible for OpenCV and plugin parts:\r\n- MSVS vs MinGW\r\n- GCC/Clang with different C++ standards\r\n- libc++ (GCC) / libstdc++ (Clang)", + "pr_file_module": null + }, + { + "comment_id": "1865665628", + "repo_full_name": "opencv/opencv", + "pr_number": 25584, + "pr_file": "modules/videoio/src/plugin_capture_api.hpp", + "discussion_id": "1865541230", + "commented_code": "@@ -121,6 +121,23 @@ struct OpenCV_VideoIO_Capture_Plugin_API_v1_1_api_entries\n CV_OUT CvPluginCapture* handle);\n }; // OpenCV_VideoIO_Capture_Plugin_API_v1_1_api_entries\n \n+struct OpenCV_VideoIO_Capture_Plugin_API_v1_2_api_entries\n+{\n+ /** @brief Open video capture from buffer with parameters\n+\n+ @param buffer Memory buffer\n+ @param params pointer on 2*n_params array of 'key,value' pairs\n+ @param n_params number of passed parameters\n+ @param[out] handle pointer on Capture handle\n+\n+ @note API-CALL 9, API-Version == 2\n+ */\n+ CvResult (CV_API_CALL *Capture_open_buffer)(\n+ std::streambuf& buffer,", + "comment_created_at": "2024-12-02T11:11:38+00:00", + "comment_author": "dkurt", + "comment_body": "Thanks! That was my concern. Is that a good idea to replace with functions pointers for required methods like `seek` and `read`?", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1259277978", + "pr_number": 23960, + "pr_file": "modules/video/include/opencv2/video/tracking.hpp", + "created_at": "2023-07-11T07:02:13+00:00", + "commented_code": "//bool update(InputArray image, CV_OUT Rect& boundingBox) CV_OVERRIDE;\n};\n\nenum TrackState { New = 0, Tracked, Lost};\n\nclass CV_EXPORTS Detection\n{\npublic:\n int classId;\n float confidence;\n cv::Rect box;\n};\n\nclass CV_EXPORTS Strack {", + "repo_full_name": "opencv/opencv", + "discussion_comments": [ + { + "comment_id": "1259277978", + "repo_full_name": "opencv/opencv", + "pr_number": 23960, + "pr_file": "modules/video/include/opencv2/video/tracking.hpp", + "discussion_id": "1259277978", + "commented_code": "@@ -887,6 +888,89 @@ class CV_EXPORTS_W TrackerNano : public Tracker\n //bool update(InputArray image, CV_OUT Rect& boundingBox) CV_OVERRIDE;\n };\n \n+enum TrackState { New = 0, Tracked, Lost};\n+\n+class CV_EXPORTS Detection\n+{\n+public:\n+ int classId;\n+ float confidence;\n+ cv::Rect box;\n+};\n+\n+class CV_EXPORTS Strack {", + "comment_created_at": "2023-07-11T07:02:13+00:00", + "comment_author": "asmorkalov", + "comment_body": "Please use `CV_EXPORTS_W` and `CV_WRAP` to make it available from Python, Java and other binded languages.", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/opencv-cross-platform-api-design-rules.md b/_reviewers/opencv-cross-platform-api-design-rules.md index f7d8e85..ad17e9a 100644 --- a/_reviewers/opencv-cross-platform-api-design-rules.md +++ b/_reviewers/opencv-cross-platform-api-design-rules.md @@ -39,163 +39,3 @@ public: - Document API version changes clearly Following these guidelines ensures your API works reliably across Python, Java, and C++ while maintaining long-term stability. - - -[ - { - "discussion_id": "2179518705", - "pr_number": 27496, - "pr_file": "modules/imgcodecs/include/opencv2/imgcodecs.hpp", - "created_at": "2025-07-02T09:00:11+00:00", - "commented_code": "int m_curr;\n };\n\n ImageCollection();\n ImageCollection(const String& filename, int flags);\n void init(const String& img, int flags);\n size_t size() const;\n const Mat& at(int index);\n CV_WRAP ImageCollection();\n CV_WRAP ImageCollection(const String& filename, int flags = IMREAD_UNCHANGED);\n CV_WRAP void init(const String& img, int flags);\n CV_WRAP size_t size() const;", - "repo_full_name": "opencv/opencv", - "discussion_comments": [ - { - "comment_id": "2179518705", - "repo_full_name": "opencv/opencv", - "pr_number": 27496, - "pr_file": "modules/imgcodecs/include/opencv2/imgcodecs.hpp", - "discussion_id": "2179518705", - "commented_code": "@@ -711,13 +711,15 @@ class CV_EXPORTS ImageCollection {\n int m_curr;\n };\n \n- ImageCollection();\n- ImageCollection(const String& filename, int flags);\n- void init(const String& img, int flags);\n- size_t size() const;\n- const Mat& at(int index);\n+ CV_WRAP ImageCollection();\n+ CV_WRAP ImageCollection(const String& filename, int flags = IMREAD_UNCHANGED);\n+ CV_WRAP void init(const String& img, int flags);\n+ CV_WRAP size_t size() const;", - "comment_created_at": "2025-07-02T09:00:11+00:00", - "comment_author": "asmorkalov", - "comment_body": "size_t is not wrapped to Python and Java correctly. Java even does not support unsigned types. Please use int instead.", - "pr_file_module": null - }, - { - "comment_id": "2179548324", - "repo_full_name": "opencv/opencv", - "pr_number": 27496, - "pr_file": "modules/imgcodecs/include/opencv2/imgcodecs.hpp", - "discussion_id": "2179518705", - "commented_code": "@@ -711,13 +711,15 @@ class CV_EXPORTS ImageCollection {\n int m_curr;\n };\n \n- ImageCollection();\n- ImageCollection(const String& filename, int flags);\n- void init(const String& img, int flags);\n- size_t size() const;\n- const Mat& at(int index);\n+ CV_WRAP ImageCollection();\n+ CV_WRAP ImageCollection(const String& filename, int flags = IMREAD_UNCHANGED);\n+ CV_WRAP void init(const String& img, int flags);\n+ CV_WRAP size_t size() const;", - "comment_created_at": "2025-07-02T09:14:01+00:00", - "comment_author": "sturkmen72", - "comment_body": "> size_t is not wrapped to Python and Java correctly. Java even does not support unsigned types. Please use int instead.\r\n\r\nplease check the last commit. i created size32() for wrapping", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2172533185", - "pr_number": 27460, - "pr_file": "modules/gapi/include/opencv2/gapi/infer/ov.hpp", - "created_at": "2025-06-27T17:34:53+00:00", - "commented_code": "*/\nstruct benchmark_mode { };\n\nstruct workload_type {\n using callback = std::function;\n using listener = std::pair;\n std::shared_ptr addListener(callback cb){\n int id = nextId++;\n listeners.emplace_back(id, std::move(cb));\n\n auto remover = [this, id](void*){ removeListener(id);};\n\n return std::shared_ptr(nullptr, remover);", - "repo_full_name": "opencv/opencv", - "discussion_comments": [ - { - "comment_id": "2172533185", - "repo_full_name": "opencv/opencv", - "pr_number": 27460, - "pr_file": "modules/gapi/include/opencv2/gapi/infer/ov.hpp", - "discussion_id": "2172533185", - "commented_code": "@@ -691,6 +691,29 @@ namespace wip { namespace ov {\n */\n struct benchmark_mode { };\n \n+struct workload_type {\n+ using callback = std::function;\n+ using listener = std::pair;\n+ std::shared_ptr addListener(callback cb){\n+ int id = nextId++;\n+ listeners.emplace_back(id, std::move(cb));\n+\n+ auto remover = [this, id](void*){ removeListener(id);};\n+\n+ return std::shared_ptr(nullptr, remover);", - "comment_created_at": "2025-06-27T17:34:53+00:00", - "comment_author": "AsyaPronina", - "comment_body": "I don't think it is a good idea to remove listeners that way. It seems a bit tricky. I understand that this variable may outlive G-API OV backend. But, might be, we can clean this variable's listeners in the destructor of G-API OV backend itself. Please note that we are the \"users\" of given API to add listeners and we 100% know when we add a listener and when we can remove it, so there is no need for some RAII idiom for remove. If this is allowable, we can preserve these compile args somewhere in backend to access variable in the destructor.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1327872439", - "pr_number": 24151, - "pr_file": "modules/imgproc/include/opencv2/imgproc/hal/hal.hpp", - "created_at": "2023-09-16T00:07:56+00:00", - "commented_code": "CV_EXPORTS void resize(int src_type,\n const uchar * src_data, size_t src_step, int src_width, int src_height,\n uchar * dst_data, size_t dst_step, int dst_width, int dst_height,\n double inv_scale_x, double inv_scale_y, int interpolation);\n double inv_scale_x, double inv_scale_y, int interpolation, int coordinate);", - "repo_full_name": "opencv/opencv", - "discussion_comments": [ - { - "comment_id": "1327872439", - "repo_full_name": "opencv/opencv", - "pr_number": 24151, - "pr_file": "modules/imgproc/include/opencv2/imgproc/hal/hal.hpp", - "discussion_id": "1327872439", - "commented_code": "@@ -101,7 +101,7 @@ CV_EXPORTS void morph(int op, int src_type, int dst_type,\n CV_EXPORTS void resize(int src_type,\n const uchar * src_data, size_t src_step, int src_width, int src_height,\n uchar * dst_data, size_t dst_step, int dst_width, int dst_height,\n- double inv_scale_x, double inv_scale_y, int interpolation);\n+ double inv_scale_x, double inv_scale_y, int interpolation, int coordinate);", - "comment_created_at": "2023-09-16T00:07:56+00:00", - "comment_author": "opencv-alalek", - "comment_body": "We should not break HAL API/ABI.\r\nDon't modify exited function, create a new function instead.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1959486582", - "pr_number": 26934, - "pr_file": "modules/objdetect/include/opencv2/objdetect/aruco_detector.hpp", - "created_at": "2025-02-18T10:39:42+00:00", - "commented_code": "* If camera parameters and distortion coefficients are provided, missing markers are reprojected\n * using projectPoint function. If not, missing marker projections are interpolated using global\n * homography, and all the marker corners in the board must have the same Z coordinate.\n * @note This function assumes that the board only contains markers from one dictionary, so only the\n * first configured dictionary is used.\n */\n CV_WRAP void refineDetectedMarkers(InputArray image, const Board &board,\n InputOutputArrayOfArrays detectedCorners,\n InputOutputArray detectedIds, InputOutputArrayOfArrays rejectedCorners,\n InputArray cameraMatrix = noArray(), InputArray distCoeffs = noArray(),\n OutputArray recoveredIdxs = noArray()) const;\n\n CV_WRAP const Dictionary& getDictionary() const;\n CV_WRAP void setDictionary(const Dictionary& dictionary);\n CV_WRAP const Dictionary& getDictionary(size_t index = 0) const;\n CV_WRAP void setDictionary(const Dictionary& dictionary, size_t index = 0);\n CV_WRAP const std::vector& getDictionaries() const;\n CV_WRAP void setDictionaries(const std::vector& dictionaries);\n CV_WRAP void addDictionary(const Dictionary& dictionary);\n CV_WRAP void removeDictionary(size_t index);", - "repo_full_name": "opencv/opencv", - "discussion_comments": [ - { - "comment_id": "1959486582", - "repo_full_name": "opencv/opencv", - "pr_number": 26934, - "pr_file": "modules/objdetect/include/opencv2/objdetect/aruco_detector.hpp", - "discussion_id": "1959486582", - "commented_code": "@@ -329,15 +341,21 @@ class CV_EXPORTS_W ArucoDetector : public Algorithm\n * If camera parameters and distortion coefficients are provided, missing markers are reprojected\n * using projectPoint function. If not, missing marker projections are interpolated using global\n * homography, and all the marker corners in the board must have the same Z coordinate.\n+ * @note This function assumes that the board only contains markers from one dictionary, so only the\n+ * first configured dictionary is used.\n */\n CV_WRAP void refineDetectedMarkers(InputArray image, const Board &board,\n InputOutputArrayOfArrays detectedCorners,\n InputOutputArray detectedIds, InputOutputArrayOfArrays rejectedCorners,\n InputArray cameraMatrix = noArray(), InputArray distCoeffs = noArray(),\n OutputArray recoveredIdxs = noArray()) const;\n \n- CV_WRAP const Dictionary& getDictionary() const;\n- CV_WRAP void setDictionary(const Dictionary& dictionary);\n+ CV_WRAP const Dictionary& getDictionary(size_t index = 0) const;\n+ CV_WRAP void setDictionary(const Dictionary& dictionary, size_t index = 0);\n+ CV_WRAP const std::vector& getDictionaries() const;\n+ CV_WRAP void setDictionaries(const std::vector& dictionaries);\n+ CV_WRAP void addDictionary(const Dictionary& dictionary);\n+ CV_WRAP void removeDictionary(size_t index);", - "comment_created_at": "2025-02-18T10:39:42+00:00", - "comment_author": "asmorkalov", - "comment_body": "size_t does not work well with Java and Python bindings. let's use just int.\r\n", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1865541230", - "pr_number": 25584, - "pr_file": "modules/videoio/src/plugin_capture_api.hpp", - "created_at": "2024-12-02T09:47:31+00:00", - "commented_code": "CV_OUT CvPluginCapture* handle);\n}; // OpenCV_VideoIO_Capture_Plugin_API_v1_1_api_entries\n\nstruct OpenCV_VideoIO_Capture_Plugin_API_v1_2_api_entries\n{\n /** @brief Open video capture from buffer with parameters\n\n @param buffer Memory buffer\n @param params pointer on 2*n_params array of 'key,value' pairs\n @param n_params number of passed parameters\n @param[out] handle pointer on Capture handle\n\n @note API-CALL 9, API-Version == 2\n */\n CvResult (CV_API_CALL *Capture_open_buffer)(\n std::streambuf& buffer,", - "repo_full_name": "opencv/opencv", - "discussion_comments": [ - { - "comment_id": "1865541230", - "repo_full_name": "opencv/opencv", - "pr_number": 25584, - "pr_file": "modules/videoio/src/plugin_capture_api.hpp", - "discussion_id": "1865541230", - "commented_code": "@@ -121,6 +121,23 @@ struct OpenCV_VideoIO_Capture_Plugin_API_v1_1_api_entries\n CV_OUT CvPluginCapture* handle);\n }; // OpenCV_VideoIO_Capture_Plugin_API_v1_1_api_entries\n \n+struct OpenCV_VideoIO_Capture_Plugin_API_v1_2_api_entries\n+{\n+ /** @brief Open video capture from buffer with parameters\n+\n+ @param buffer Memory buffer\n+ @param params pointer on 2*n_params array of 'key,value' pairs\n+ @param n_params number of passed parameters\n+ @param[out] handle pointer on Capture handle\n+\n+ @note API-CALL 9, API-Version == 2\n+ */\n+ CvResult (CV_API_CALL *Capture_open_buffer)(\n+ std::streambuf& buffer,", - "comment_created_at": "2024-12-02T09:47:31+00:00", - "comment_author": "opencv-alalek", - "comment_body": "We should not pass C++ objects through low level plugin API. They are not usable in general.\r\n\r\nDifferent C++ ABIs are possible for OpenCV and plugin parts:\r\n- MSVS vs MinGW\r\n- GCC/Clang with different C++ standards\r\n- libc++ (GCC) / libstdc++ (Clang)", - "pr_file_module": null - }, - { - "comment_id": "1865665628", - "repo_full_name": "opencv/opencv", - "pr_number": 25584, - "pr_file": "modules/videoio/src/plugin_capture_api.hpp", - "discussion_id": "1865541230", - "commented_code": "@@ -121,6 +121,23 @@ struct OpenCV_VideoIO_Capture_Plugin_API_v1_1_api_entries\n CV_OUT CvPluginCapture* handle);\n }; // OpenCV_VideoIO_Capture_Plugin_API_v1_1_api_entries\n \n+struct OpenCV_VideoIO_Capture_Plugin_API_v1_2_api_entries\n+{\n+ /** @brief Open video capture from buffer with parameters\n+\n+ @param buffer Memory buffer\n+ @param params pointer on 2*n_params array of 'key,value' pairs\n+ @param n_params number of passed parameters\n+ @param[out] handle pointer on Capture handle\n+\n+ @note API-CALL 9, API-Version == 2\n+ */\n+ CvResult (CV_API_CALL *Capture_open_buffer)(\n+ std::streambuf& buffer,", - "comment_created_at": "2024-12-02T11:11:38+00:00", - "comment_author": "dkurt", - "comment_body": "Thanks! That was my concern. Is that a good idea to replace with functions pointers for required methods like `seek` and `read`?", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1259277978", - "pr_number": 23960, - "pr_file": "modules/video/include/opencv2/video/tracking.hpp", - "created_at": "2023-07-11T07:02:13+00:00", - "commented_code": "//bool update(InputArray image, CV_OUT Rect& boundingBox) CV_OVERRIDE;\n};\n\nenum TrackState { New = 0, Tracked, Lost};\n\nclass CV_EXPORTS Detection\n{\npublic:\n int classId;\n float confidence;\n cv::Rect box;\n};\n\nclass CV_EXPORTS Strack {", - "repo_full_name": "opencv/opencv", - "discussion_comments": [ - { - "comment_id": "1259277978", - "repo_full_name": "opencv/opencv", - "pr_number": 23960, - "pr_file": "modules/video/include/opencv2/video/tracking.hpp", - "discussion_id": "1259277978", - "commented_code": "@@ -887,6 +888,89 @@ class CV_EXPORTS_W TrackerNano : public Tracker\n //bool update(InputArray image, CV_OUT Rect& boundingBox) CV_OVERRIDE;\n };\n \n+enum TrackState { New = 0, Tracked, Lost};\n+\n+class CV_EXPORTS Detection\n+{\n+public:\n+ int classId;\n+ float confidence;\n+ cv::Rect box;\n+};\n+\n+class CV_EXPORTS Strack {", - "comment_created_at": "2023-07-11T07:02:13+00:00", - "comment_author": "asmorkalov", - "comment_body": "Please use `CV_EXPORTS_W` and `CV_WRAP` to make it available from Python, Java and other binded languages.", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/opencv-document-configuration-version-requirements.json b/_reviewers/opencv-document-configuration-version-requirements.json new file mode 100644 index 0000000..e12a8ff --- /dev/null +++ b/_reviewers/opencv-document-configuration-version-requirements.json @@ -0,0 +1,70 @@ +[ + { + "discussion_id": "2050238177", + "pr_number": 27239, + "pr_file": "platforms/android/build_sdk.py", + "created_at": "2025-04-18T07:24:16+00:00", + "commented_code": "builder.build_library(abi, do_install, args.no_media_ndk)\n\n builder.gather_results()\n\n \n check_have_ipp_flag(os.path.join(builder.libdest, \"CMakeVars.txt\"))", + "repo_full_name": "opencv/opencv", + "discussion_comments": [ + { + "comment_id": "2050238177", + "repo_full_name": "opencv/opencv", + "pr_number": 27239, + "pr_file": "platforms/android/build_sdk.py", + "discussion_id": "2050238177", + "commented_code": "@@ -487,7 +505,9 @@ def get_ndk_dir():\n builder.build_library(abi, do_install, args.no_media_ndk)\n \n builder.gather_results()\n-\n+ \n+ check_have_ipp_flag(os.path.join(builder.libdest, \"CMakeVars.txt\"))", + "comment_created_at": "2025-04-18T07:24:16+00:00", + "comment_author": "asmorkalov", + "comment_body": "I propose to add command line option, e.g. \"--strict-dependencies\" to make the check optional.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1726812141", + "pr_number": 26057, + "pr_file": "platforms/android/default.config.py", + "created_at": "2024-08-22T10:39:53+00:00", + "commented_code": "ABIs = [\n ABI(\"2\", \"armeabi-v7a\", None, 21, cmake_vars=dict(ANDROID_ABI='armeabi-v7a with NEON')),\n ABI(\"3\", \"arm64-v8a\", None, 21, cmake_vars=dict('ANDROID_SUPPORT_FLEXIBLE_PAGE_SIZES=ON')),", + "repo_full_name": "opencv/opencv", + "discussion_comments": [ + { + "comment_id": "1726812141", + "repo_full_name": "opencv/opencv", + "pr_number": 26057, + "pr_file": "platforms/android/default.config.py", + "discussion_id": "1726812141", + "commented_code": "@@ -0,0 +1,6 @@\n+ABIs = [\n+ ABI(\"2\", \"armeabi-v7a\", None, 21, cmake_vars=dict(ANDROID_ABI='armeabi-v7a with NEON')),\n+ ABI(\"3\", \"arm64-v8a\", None, 21, cmake_vars=dict('ANDROID_SUPPORT_FLEXIBLE_PAGE_SIZES=ON')),", + "comment_created_at": "2024-08-22T10:39:53+00:00", + "comment_author": "opencv-alalek", + "comment_body": "Makes sense for \"Android NDK r27 and higher\" according to documentation.\r\n\r\nWhy do not enable that by default in the main build_sdk.py script?", + "pr_file_module": null + }, + { + "comment_id": "1727047145", + "repo_full_name": "opencv/opencv", + "pr_number": 26057, + "pr_file": "platforms/android/default.config.py", + "discussion_id": "1726812141", + "commented_code": "@@ -0,0 +1,6 @@\n+ABIs = [\n+ ABI(\"2\", \"armeabi-v7a\", None, 21, cmake_vars=dict(ANDROID_ABI='armeabi-v7a with NEON')),\n+ ABI(\"3\", \"arm64-v8a\", None, 21, cmake_vars=dict('ANDROID_SUPPORT_FLEXIBLE_PAGE_SIZES=ON')),", + "comment_created_at": "2024-08-22T13:20:09+00:00", + "comment_author": "asmorkalov", + "comment_body": "I want to switch CI to the default configuration and always build SDK and AAR packages with 16kb pages support. The packages will work with both old and new memory configuration. It just tunes ELF sections alignment a bit. The option will be ignored with NDK before 27.", + "pr_file_module": null + }, + { + "comment_id": "1728529743", + "repo_full_name": "opencv/opencv", + "pr_number": 26057, + "pr_file": "platforms/android/default.config.py", + "discussion_id": "1726812141", + "commented_code": "@@ -0,0 +1,6 @@\n+ABIs = [\n+ ABI(\"2\", \"armeabi-v7a\", None, 21, cmake_vars=dict(ANDROID_ABI='armeabi-v7a with NEON')),\n+ ABI(\"3\", \"arm64-v8a\", None, 21, cmake_vars=dict('ANDROID_SUPPORT_FLEXIBLE_PAGE_SIZES=ON')),", + "comment_created_at": "2024-08-23T07:45:36+00:00", + "comment_author": "mshabunin", + "comment_body": "```suggestion\r\n ABI(\"3\", \"arm64-v8a\", None, 21, cmake_vars=dict(ANDROID_SUPPORT_FLEXIBLE_PAGE_SIZES='ON')),\r\n```", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/opencv-document-configuration-version-requirements.md b/_reviewers/opencv-document-configuration-version-requirements.md index fb1c5c2..aa12314 100644 --- a/_reviewers/opencv-document-configuration-version-requirements.md +++ b/_reviewers/opencv-document-configuration-version-requirements.md @@ -32,75 +32,3 @@ ABI("3", "arm64-v8a", None, 21, cmake_vars=dict( ``` This approach ensures that configuration options remain flexible while clearly communicating version requirements to developers, preventing unexpected behavior across different environments. - - -[ - { - "discussion_id": "2050238177", - "pr_number": 27239, - "pr_file": "platforms/android/build_sdk.py", - "created_at": "2025-04-18T07:24:16+00:00", - "commented_code": "builder.build_library(abi, do_install, args.no_media_ndk)\n\n builder.gather_results()\n\n \n check_have_ipp_flag(os.path.join(builder.libdest, \"CMakeVars.txt\"))", - "repo_full_name": "opencv/opencv", - "discussion_comments": [ - { - "comment_id": "2050238177", - "repo_full_name": "opencv/opencv", - "pr_number": 27239, - "pr_file": "platforms/android/build_sdk.py", - "discussion_id": "2050238177", - "commented_code": "@@ -487,7 +505,9 @@ def get_ndk_dir():\n builder.build_library(abi, do_install, args.no_media_ndk)\n \n builder.gather_results()\n-\n+ \n+ check_have_ipp_flag(os.path.join(builder.libdest, \"CMakeVars.txt\"))", - "comment_created_at": "2025-04-18T07:24:16+00:00", - "comment_author": "asmorkalov", - "comment_body": "I propose to add command line option, e.g. \"--strict-dependencies\" to make the check optional.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1726812141", - "pr_number": 26057, - "pr_file": "platforms/android/default.config.py", - "created_at": "2024-08-22T10:39:53+00:00", - "commented_code": "ABIs = [\n ABI(\"2\", \"armeabi-v7a\", None, 21, cmake_vars=dict(ANDROID_ABI='armeabi-v7a with NEON')),\n ABI(\"3\", \"arm64-v8a\", None, 21, cmake_vars=dict('ANDROID_SUPPORT_FLEXIBLE_PAGE_SIZES=ON')),", - "repo_full_name": "opencv/opencv", - "discussion_comments": [ - { - "comment_id": "1726812141", - "repo_full_name": "opencv/opencv", - "pr_number": 26057, - "pr_file": "platforms/android/default.config.py", - "discussion_id": "1726812141", - "commented_code": "@@ -0,0 +1,6 @@\n+ABIs = [\n+ ABI(\"2\", \"armeabi-v7a\", None, 21, cmake_vars=dict(ANDROID_ABI='armeabi-v7a with NEON')),\n+ ABI(\"3\", \"arm64-v8a\", None, 21, cmake_vars=dict('ANDROID_SUPPORT_FLEXIBLE_PAGE_SIZES=ON')),", - "comment_created_at": "2024-08-22T10:39:53+00:00", - "comment_author": "opencv-alalek", - "comment_body": "Makes sense for \"Android NDK r27 and higher\" according to documentation.\r\n\r\nWhy do not enable that by default in the main build_sdk.py script?", - "pr_file_module": null - }, - { - "comment_id": "1727047145", - "repo_full_name": "opencv/opencv", - "pr_number": 26057, - "pr_file": "platforms/android/default.config.py", - "discussion_id": "1726812141", - "commented_code": "@@ -0,0 +1,6 @@\n+ABIs = [\n+ ABI(\"2\", \"armeabi-v7a\", None, 21, cmake_vars=dict(ANDROID_ABI='armeabi-v7a with NEON')),\n+ ABI(\"3\", \"arm64-v8a\", None, 21, cmake_vars=dict('ANDROID_SUPPORT_FLEXIBLE_PAGE_SIZES=ON')),", - "comment_created_at": "2024-08-22T13:20:09+00:00", - "comment_author": "asmorkalov", - "comment_body": "I want to switch CI to the default configuration and always build SDK and AAR packages with 16kb pages support. The packages will work with both old and new memory configuration. It just tunes ELF sections alignment a bit. The option will be ignored with NDK before 27.", - "pr_file_module": null - }, - { - "comment_id": "1728529743", - "repo_full_name": "opencv/opencv", - "pr_number": 26057, - "pr_file": "platforms/android/default.config.py", - "discussion_id": "1726812141", - "commented_code": "@@ -0,0 +1,6 @@\n+ABIs = [\n+ ABI(\"2\", \"armeabi-v7a\", None, 21, cmake_vars=dict(ANDROID_ABI='armeabi-v7a with NEON')),\n+ ABI(\"3\", \"arm64-v8a\", None, 21, cmake_vars=dict('ANDROID_SUPPORT_FLEXIBLE_PAGE_SIZES=ON')),", - "comment_created_at": "2024-08-23T07:45:36+00:00", - "comment_author": "mshabunin", - "comment_body": "```suggestion\r\n ABI(\"3\", \"arm64-v8a\", None, 21, cmake_vars=dict(ANDROID_SUPPORT_FLEXIBLE_PAGE_SIZES='ON')),\r\n```", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/opencv-document-properly-with-references.json b/_reviewers/opencv-document-properly-with-references.json new file mode 100644 index 0000000..4c7c0f7 --- /dev/null +++ b/_reviewers/opencv-document-properly-with-references.json @@ -0,0 +1,114 @@ +[ + { + "discussion_id": "1995027406", + "pr_number": 27051, + "pr_file": "modules/photo/include/opencv2/ccm.hpp", + "created_at": "2025-03-14T07:43:22+00:00", + "commented_code": "// This file is part of OpenCV project.\n// It is subject to the license terms in the LICENSE file found in the top-level directory\n// of this distribution and at http://opencv.org/license.html.\n//\n//\n// License Agreement\n// For Open Source Computer Vision Library\n//\n// Copyright(C) 2020, Huawei Technologies Co.,Ltd. All rights reserved.\n// Third party copyrights are property of their respective owners.\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n// http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n//\n// Author: Longbu Wang \n// Jinheng Zhang \n// Chenqi Shan \n\n#ifndef OPENCV_PHOTO_CCM_HPP\n#define OPENCV_PHOTO_CCM_HPP\n\n#include \n#include \n\nnamespace cv\n{\nnamespace ccm\n{\n\n/** @defgroup ccm Color Correction module\n@{\n\nIntroduction\n------------\n\nThe purpose of color correction is to adjust the color response of input\nand output devices to a known state. The device being calibrated is sometimes\ncalled the calibration source; the color space used as the standard is sometimes\ncalled the calibration target. Color calibration has been used in many industries,\nsuch as television production, games, photography, engineering, chemistry,\nmedicine, etc. Due to the manufacturing process of the input and output equipment,\nthe channel response has nonlinear distortion. In order to correct the picture output\nof the equipment, it is nessary to calibrate the captured color and the actual color.\n\n*/\n\n\n\n/** @brief Enum of the possible types of ccm.\n*/\nenum CCM_TYPE\n{\n CCM_3x3, ///< The CCM with the shape \\f$3\\times3\\f$ performs linear transformation on color values.\n CCM_4x3, ///< The CCM with the shape \\f$4\\times3\\f$ performs affine transformation.\n};\n\n/** @brief Enum of the possible types of initial method.\n*/\nenum INITIAL_METHOD_TYPE\n{\n INITIAL_METHOD_WHITE_BALANCE, ///< The white balance method. The initial value is:\\n\n /// \\f$\n /// M_{CCM}=\n /// \\begin{bmatrix}\n /// k_R & 0 & 0\\\\\n /// 0 & k_G & 0\\\\\n /// 0 & 0 & k_B\\\\\n /// \\end{bmatrix}\n /// \\f$\\n\n /// where\\n\n /// \\f$\n /// k_R=mean(R_{li}')/mean(R_{li})\\\\\n /// k_R=mean(G_{li}')/mean(G_{li})\\\\\n /// k_R=mean(B_{li}')/mean(B_{li})\n /// \\f$\n INITIAL_METHOD_LEAST_SQUARE, ///0\\\\\nC_{sl}=0, \\qquad C_s=0\n\\f]\n\nBecause \\f$exp(ln(0))\\to\\infty \\f$, the channel whose component is 0 is directly mapped to 0 in the formula above.\n\nFor fitting channels respectively, we have:\n\\f[\nr=polyfit(ln(R_s),ln(R_{dl}))\\\\\ng=polyfit(ln(G_s),ln(G_{dl}))\\\\\nb=polyfit(ln(B_s),ln(B_{dl}))\\\\\n\\f]\nNote that the parameter of \\f$ln(*) \\f$ cannot be 0.\nTherefore, we need to delete the channels whose values are 0 from \\f$R_s \\f$ and \\f$R_{dl} \\f$, \\f$G_s\\f$ and \\f$G_{dl}\\f$, \\f$B_s\\f$ and \\f$B_{dl}\\f$.\n\nTherefore:\n\n\\f[\nln(R_{sl})=r(ln(R_s)), \\qquad R_s>0\\\\\nR_{sl}=0, \\qquad R_s=0\\\\\nln(G_{sl})=g(ln(G_s)),\\qquad G_s>0\\\\\nG_{sl}=0, \\qquad G_s=0\\\\\nln(B_{sl})=b(ln(B_s)),\\qquad B_s>0\\\\\nB_{sl}=0, \\qquad B_s=0\\\\\n\\f]", + "repo_full_name": "opencv/opencv", + "discussion_comments": [ + { + "comment_id": "1995027406", + "repo_full_name": "opencv/opencv", + "pr_number": 27051, + "pr_file": "modules/photo/include/opencv2/ccm.hpp", + "discussion_id": "1995027406", + "commented_code": "@@ -0,0 +1,520 @@\n+// This file is part of OpenCV project.\n+// It is subject to the license terms in the LICENSE file found in the top-level directory\n+// of this distribution and at http://opencv.org/license.html.\n+//\n+//\n+// License Agreement\n+// For Open Source Computer Vision Library\n+//\n+// Copyright(C) 2020, Huawei Technologies Co.,Ltd. All rights reserved.\n+// Third party copyrights are property of their respective owners.\n+//\n+// Licensed under the Apache License, Version 2.0 (the \"License\");\n+// you may not use this file except in compliance with the License.\n+// You may obtain a copy of the License at\n+//\n+// http://www.apache.org/licenses/LICENSE-2.0\n+//\n+// Unless required by applicable law or agreed to in writing, software\n+// distributed under the License is distributed on an \"AS IS\" BASIS,\n+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n+// See the License for the specific language governing permissions and\n+// limitations under the License.\n+//\n+// Author: Longbu Wang \n+// Jinheng Zhang \n+// Chenqi Shan \n+\n+#ifndef OPENCV_PHOTO_CCM_HPP\n+#define OPENCV_PHOTO_CCM_HPP\n+\n+#include \n+#include \n+\n+namespace cv\n+{\n+namespace ccm\n+{\n+\n+/** @defgroup ccm Color Correction module\n+@{\n+\n+Introduction\n+------------\n+\n+The purpose of color correction is to adjust the color response of input\n+and output devices to a known state. The device being calibrated is sometimes\n+called the calibration source; the color space used as the standard is sometimes\n+called the calibration target. Color calibration has been used in many industries,\n+such as television production, games, photography, engineering, chemistry,\n+medicine, etc. Due to the manufacturing process of the input and output equipment,\n+the channel response has nonlinear distortion. In order to correct the picture output\n+of the equipment, it is nessary to calibrate the captured color and the actual color.\n+\n+*/\n+\n+\n+\n+/** @brief Enum of the possible types of ccm.\n+*/\n+enum CCM_TYPE\n+{\n+ CCM_3x3, ///< The CCM with the shape \\f$3\\times3\\f$ performs linear transformation on color values.\n+ CCM_4x3, ///< The CCM with the shape \\f$4\\times3\\f$ performs affine transformation.\n+};\n+\n+/** @brief Enum of the possible types of initial method.\n+*/\n+enum INITIAL_METHOD_TYPE\n+{\n+ INITIAL_METHOD_WHITE_BALANCE, ///< The white balance method. The initial value is:\\n\n+ /// \\f$\n+ /// M_{CCM}=\n+ /// \\begin{bmatrix}\n+ /// k_R & 0 & 0\\\\\n+ /// 0 & k_G & 0\\\\\n+ /// 0 & 0 & k_B\\\\\n+ /// \\end{bmatrix}\n+ /// \\f$\\n\n+ /// where\\n\n+ /// \\f$\n+ /// k_R=mean(R_{li}')/mean(R_{li})\\\\\n+ /// k_R=mean(G_{li}')/mean(G_{li})\\\\\n+ /// k_R=mean(B_{li}')/mean(B_{li})\n+ /// \\f$\n+ INITIAL_METHOD_LEAST_SQUARE, ///0\\\\\n+C_{sl}=0, \\qquad C_s=0\n+\\f]\n+\n+Because \\f$exp(ln(0))\\to\\infty \\f$, the channel whose component is 0 is directly mapped to 0 in the formula above.\n+\n+For fitting channels respectively, we have:\n+\\f[\n+r=polyfit(ln(R_s),ln(R_{dl}))\\\\\n+g=polyfit(ln(G_s),ln(G_{dl}))\\\\\n+b=polyfit(ln(B_s),ln(B_{dl}))\\\\\n+\\f]\n+Note that the parameter of \\f$ln(*) \\f$ cannot be 0.\n+Therefore, we need to delete the channels whose values are 0 from \\f$R_s \\f$ and \\f$R_{dl} \\f$, \\f$G_s\\f$ and \\f$G_{dl}\\f$, \\f$B_s\\f$ and \\f$B_{dl}\\f$.\n+\n+Therefore:\n+\n+\\f[\n+ln(R_{sl})=r(ln(R_s)), \\qquad R_s>0\\\\\n+R_{sl}=0, \\qquad R_s=0\\\\\n+ln(G_{sl})=g(ln(G_s)),\\qquad G_s>0\\\\\n+G_{sl}=0, \\qquad G_s=0\\\\\n+ln(B_{sl})=b(ln(B_s)),\\qquad B_s>0\\\\\n+B_{sl}=0, \\qquad B_s=0\\\\\n+\\f]", + "comment_created_at": "2025-03-14T07:43:22+00:00", + "comment_author": "asmorkalov", + "comment_body": "Please move the text to tutorial (markdown) and add references to the header file.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2073350574", + "pr_number": 27051, + "pr_file": "modules/photo/src/ccm/utils.hpp", + "created_at": "2025-05-05T12:29:45+00:00", + "commented_code": "@param gamma a constant for gamma correction.\n @param dst the output array, type of Mat.\n */\nMat gammaCorrection(const Mat& src, double gamma, Mat dst=Mat());\n CV_EXPORTS void gammaCorrection(InputArray src, OutputArray dst, double gamma);", + "repo_full_name": "opencv/opencv", + "discussion_comments": [ + { + "comment_id": "2073350574", + "repo_full_name": "opencv/opencv", + "pr_number": 27051, + "pr_file": "modules/photo/src/ccm/utils.hpp", + "discussion_id": "2073350574", + "commented_code": "@@ -25,7 +25,7 @@ double gammaCorrection_(double element, double gamma);\n @param gamma a constant for gamma correction.\n @param dst the output array, type of Mat.\n */\n-Mat gammaCorrection(const Mat& src, double gamma, Mat dst=Mat());\n+ CV_EXPORTS void gammaCorrection(InputArray src, OutputArray dst, double gamma);", + "comment_created_at": "2025-05-05T12:29:45+00:00", + "comment_author": "asmorkalov", + "comment_body": "It should go to make ccm header to be included into documentation and bindings. Please CV_EXPORTS_W to generate Java and Python bindings for it too.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "562257420", + "pr_number": 19305, + "pr_file": "doc/tutorials/introduction/test_writing/accuracy_test_writing.markdown", + "created_at": "2021-01-21T23:09:29+00:00", + "commented_code": "# Writing Accuracy Tests\n\n# {#tutorial_accuracy_test_writing}\n\n`This testing framework is based on Google Tests`\n\nThere are two major types of C++ tests: *accuracy/regression* tests and *performance* tests. Each module can have two test binaries: `opencv_test_` and `opencv_perf_`, and two tests folders: `/test>` and `/perf>`. These applications can be built for every supported platform (Win/Lin/Mac/Android).\n\n## ACCURACY TESTS\n\n### Work directory\n\nAll modules have their own dir for accuracy tests: `opencv_contrib/modules/(moduleName)/test/...`\n\n### Dir Structure\n\n- `test_precomp.hpp` - file for includes\n- `test_main.cpp` - main file of test sample\n- `test_smth1.cpp` - files for tests\n- `test_smth2.cpp`\n- `...`\n\n### Test structure\n\n```c++\n// name of this case is \"name1.name2\"\nTEST(name1, name2)\n{\n ASSERT_....;\n}\n```\n\n### Tests sample example\n\n**Files:**\n\n- test_precomp.hpp\n- test_main.cpp\n- test_sum.cpp\n- test_sub.cpp\n\n\n\n```c++\n// test_precomp.hpp\n\n#ifndef __OPENCV_TEST_PRECOMP_HPP__\n#define __OPENCV_TEST_PRECOMP_HPP__", + "repo_full_name": "opencv/opencv", + "discussion_comments": [ + { + "comment_id": "562257420", + "repo_full_name": "opencv/opencv", + "pr_number": 19305, + "pr_file": "doc/tutorials/introduction/test_writing/accuracy_test_writing.markdown", + "discussion_id": "562257420", + "commented_code": "@@ -0,0 +1,269 @@\n+# Writing Accuracy Tests\n+\n+# {#tutorial_accuracy_test_writing}\n+\n+`This testing framework is based on Google Tests`\n+\n+There are two major types of C++ tests: *accuracy/regression* tests and *performance* tests. Each module can have two test binaries: `opencv_test_` and `opencv_perf_`, and two tests folders: `/test>` and `/perf>`. These applications can be built for every supported platform (Win/Lin/Mac/Android).\n+\n+## ACCURACY TESTS\n+\n+### Work directory\n+\n+All modules have their own dir for accuracy tests: `opencv_contrib/modules/(moduleName)/test/...`\n+\n+### Dir Structure\n+\n+- `test_precomp.hpp` - file for includes\n+- `test_main.cpp` - main file of test sample\n+- `test_smth1.cpp` - files for tests\n+- `test_smth2.cpp`\n+- `...`\n+\n+### Test structure\n+\n+```c++\n+// name of this case is \"name1.name2\"\n+TEST(name1, name2)\n+{\n+ ASSERT_....;\n+}\n+```\n+\n+### Tests sample example\n+\n+**Files:**\n+\n+- test_precomp.hpp\n+- test_main.cpp\n+- test_sum.cpp\n+- test_sub.cpp\n+\n+\n+\n+```c++\n+// test_precomp.hpp\n+\n+#ifndef __OPENCV_TEST_PRECOMP_HPP__\n+#define __OPENCV_TEST_PRECOMP_HPP__", + "comment_created_at": "2021-01-21T23:09:29+00:00", + "comment_author": "alalek", + "comment_body": "There is general important rule: avoid code in documentation files/blobs/etc.\r\n\r\nCode must go into files which are regularly checked for compilation.\r\nDocumentation embeds code through `@snippet` and/or `@include`", + "pr_file_module": null + }, + { + "comment_id": "575126009", + "repo_full_name": "opencv/opencv", + "pr_number": 19305, + "pr_file": "doc/tutorials/introduction/test_writing/accuracy_test_writing.markdown", + "discussion_id": "562257420", + "commented_code": "@@ -0,0 +1,269 @@\n+# Writing Accuracy Tests\n+\n+# {#tutorial_accuracy_test_writing}\n+\n+`This testing framework is based on Google Tests`\n+\n+There are two major types of C++ tests: *accuracy/regression* tests and *performance* tests. Each module can have two test binaries: `opencv_test_` and `opencv_perf_`, and two tests folders: `/test>` and `/perf>`. These applications can be built for every supported platform (Win/Lin/Mac/Android).\n+\n+## ACCURACY TESTS\n+\n+### Work directory\n+\n+All modules have their own dir for accuracy tests: `opencv_contrib/modules/(moduleName)/test/...`\n+\n+### Dir Structure\n+\n+- `test_precomp.hpp` - file for includes\n+- `test_main.cpp` - main file of test sample\n+- `test_smth1.cpp` - files for tests\n+- `test_smth2.cpp`\n+- `...`\n+\n+### Test structure\n+\n+```c++\n+// name of this case is \"name1.name2\"\n+TEST(name1, name2)\n+{\n+ ASSERT_....;\n+}\n+```\n+\n+### Tests sample example\n+\n+**Files:**\n+\n+- test_precomp.hpp\n+- test_main.cpp\n+- test_sum.cpp\n+- test_sub.cpp\n+\n+\n+\n+```c++\n+// test_precomp.hpp\n+\n+#ifndef __OPENCV_TEST_PRECOMP_HPP__\n+#define __OPENCV_TEST_PRECOMP_HPP__", + "comment_created_at": "2021-02-12T10:30:05+00:00", + "comment_author": "DumDereDum", + "comment_body": "code is deleted", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1926898553", + "pr_number": 26669, + "pr_file": "modules/calib3d/include/opencv2/calib3d.hpp", + "created_at": "2025-01-23T12:27:20+00:00", + "commented_code": "TermCriteria criteria = TermCriteria(TermCriteria::MAX_ITER + TermCriteria::EPS, 10, 1e-8)\n );\n\n /**\n @brief Finds an object pose from 3D-2D point correspondences using the RANSAC scheme for fisheye camera moodel.\n\n @param objectPoints Array of object points in the object coordinate space, Nx3 1-channel or\n 1xN/Nx1 3-channel, where N is the number of points. vector\\ can be also passed here.\n @param imagePoints Array of corresponding image points, Nx2 1-channel or 1xN/Nx1 2-channel,\n where N is the number of points. vector\\ can be also passed here.\n @param cameraMatrix Input camera intrinsic matrix \\f$\\cameramatrix{A}\\f$ .\n @param distCoeffs Input vector of distortion coefficients (4x1/1x4).\n @param rvec Output rotation vector (see @ref Rodrigues ) that, together with tvec, brings points from\n the model coordinate system to the camera coordinate system.\n @param tvec Output translation vector.\n @param useExtrinsicGuess Parameter used for #SOLVEPNP_ITERATIVE. If true (1), the function uses\n the provided rvec and tvec values as initial approximations of the rotation and translation\n vectors, respectively, and further optimizes them.\n @param iterationsCount Number of iterations.\n @param reprojectionError Inlier threshold value used by the RANSAC procedure. The parameter value\n is the maximum allowed distance between the observed and computed point projections to consider it\n an inlier.\n @param confidence The probability that the algorithm produces a useful result.\n @param inliers Output vector that contains indices of inliers in objectPoints and imagePoints .\n @param flags Method for solving a PnP problem: see @ref calib3d_solvePnP_flags\n This function returns the rotation and the translation vectors that transform a 3D point expressed in the object\n coordinate frame to the camera coordinate frame, using different methods:\n - P3P methods (@ref SOLVEPNP_P3P, @ref SOLVEPNP_AP3P): need 4 input points to return a unique solution.\n - @ref SOLVEPNP_IPPE Input points must be >= 4 and object points must be coplanar.\n - @ref SOLVEPNP_IPPE_SQUARE Special case suitable for marker pose estimation.\n Number of input points must be 4. Object points must be defined in the following order:\n - point 0: [-squareLength / 2, squareLength / 2, 0]\n - point 1: [ squareLength / 2, squareLength / 2, 0]\n - point 2: [ squareLength / 2, -squareLength / 2, 0]\n - point 3: [-squareLength / 2, -squareLength / 2, 0]\n - for all the other flags, number of input points must be >= 4 and object points can be in any configuration.", + "repo_full_name": "opencv/opencv", + "discussion_comments": [ + { + "comment_id": "1926898553", + "repo_full_name": "opencv/opencv", + "pr_number": 26669, + "pr_file": "modules/calib3d/include/opencv2/calib3d.hpp", + "discussion_id": "1926898553", + "commented_code": "@@ -4098,6 +4098,53 @@ optimization. It is the \\f$max(width,height)/\\pi\\f$ or the provided \\f$f_x\\f$, \\\n TermCriteria criteria = TermCriteria(TermCriteria::MAX_ITER + TermCriteria::EPS, 10, 1e-8)\n );\n \n+ /**\n+ @brief Finds an object pose from 3D-2D point correspondences using the RANSAC scheme for fisheye camera moodel.\n+\n+ @param objectPoints Array of object points in the object coordinate space, Nx3 1-channel or\n+ 1xN/Nx1 3-channel, where N is the number of points. vector\\ can be also passed here.\n+ @param imagePoints Array of corresponding image points, Nx2 1-channel or 1xN/Nx1 2-channel,\n+ where N is the number of points. vector\\ can be also passed here.\n+ @param cameraMatrix Input camera intrinsic matrix \\f$\\cameramatrix{A}\\f$ .\n+ @param distCoeffs Input vector of distortion coefficients (4x1/1x4).\n+ @param rvec Output rotation vector (see @ref Rodrigues ) that, together with tvec, brings points from\n+ the model coordinate system to the camera coordinate system.\n+ @param tvec Output translation vector.\n+ @param useExtrinsicGuess Parameter used for #SOLVEPNP_ITERATIVE. If true (1), the function uses\n+ the provided rvec and tvec values as initial approximations of the rotation and translation\n+ vectors, respectively, and further optimizes them.\n+ @param iterationsCount Number of iterations.\n+ @param reprojectionError Inlier threshold value used by the RANSAC procedure. The parameter value\n+ is the maximum allowed distance between the observed and computed point projections to consider it\n+ an inlier.\n+ @param confidence The probability that the algorithm produces a useful result.\n+ @param inliers Output vector that contains indices of inliers in objectPoints and imagePoints .\n+ @param flags Method for solving a PnP problem: see @ref calib3d_solvePnP_flags\n+ This function returns the rotation and the translation vectors that transform a 3D point expressed in the object\n+ coordinate frame to the camera coordinate frame, using different methods:\n+ - P3P methods (@ref SOLVEPNP_P3P, @ref SOLVEPNP_AP3P): need 4 input points to return a unique solution.\n+ - @ref SOLVEPNP_IPPE Input points must be >= 4 and object points must be coplanar.\n+ - @ref SOLVEPNP_IPPE_SQUARE Special case suitable for marker pose estimation.\n+ Number of input points must be 4. Object points must be defined in the following order:\n+ - point 0: [-squareLength / 2, squareLength / 2, 0]\n+ - point 1: [ squareLength / 2, squareLength / 2, 0]\n+ - point 2: [ squareLength / 2, -squareLength / 2, 0]\n+ - point 3: [-squareLength / 2, -squareLength / 2, 0]\n+ - for all the other flags, number of input points must be >= 4 and object points can be in any configuration.", + "comment_created_at": "2025-01-23T12:27:20+00:00", + "comment_author": "asmorkalov", + "comment_body": "I propose to use the same description as for cv::solvePnpRansac with fisheye model reference and add reference to SOLVEPNP_XXX constants instead of copy.", + "pr_file_module": null + }, + { + "comment_id": "1927319841", + "repo_full_name": "opencv/opencv", + "pr_number": 26669, + "pr_file": "modules/calib3d/include/opencv2/calib3d.hpp", + "discussion_id": "1926898553", + "commented_code": "@@ -4098,6 +4098,53 @@ optimization. It is the \\f$max(width,height)/\\pi\\f$ or the provided \\f$f_x\\f$, \\\n TermCriteria criteria = TermCriteria(TermCriteria::MAX_ITER + TermCriteria::EPS, 10, 1e-8)\n );\n \n+ /**\n+ @brief Finds an object pose from 3D-2D point correspondences using the RANSAC scheme for fisheye camera moodel.\n+\n+ @param objectPoints Array of object points in the object coordinate space, Nx3 1-channel or\n+ 1xN/Nx1 3-channel, where N is the number of points. vector\\ can be also passed here.\n+ @param imagePoints Array of corresponding image points, Nx2 1-channel or 1xN/Nx1 2-channel,\n+ where N is the number of points. vector\\ can be also passed here.\n+ @param cameraMatrix Input camera intrinsic matrix \\f$\\cameramatrix{A}\\f$ .\n+ @param distCoeffs Input vector of distortion coefficients (4x1/1x4).\n+ @param rvec Output rotation vector (see @ref Rodrigues ) that, together with tvec, brings points from\n+ the model coordinate system to the camera coordinate system.\n+ @param tvec Output translation vector.\n+ @param useExtrinsicGuess Parameter used for #SOLVEPNP_ITERATIVE. If true (1), the function uses\n+ the provided rvec and tvec values as initial approximations of the rotation and translation\n+ vectors, respectively, and further optimizes them.\n+ @param iterationsCount Number of iterations.\n+ @param reprojectionError Inlier threshold value used by the RANSAC procedure. The parameter value\n+ is the maximum allowed distance between the observed and computed point projections to consider it\n+ an inlier.\n+ @param confidence The probability that the algorithm produces a useful result.\n+ @param inliers Output vector that contains indices of inliers in objectPoints and imagePoints .\n+ @param flags Method for solving a PnP problem: see @ref calib3d_solvePnP_flags\n+ This function returns the rotation and the translation vectors that transform a 3D point expressed in the object\n+ coordinate frame to the camera coordinate frame, using different methods:\n+ - P3P methods (@ref SOLVEPNP_P3P, @ref SOLVEPNP_AP3P): need 4 input points to return a unique solution.\n+ - @ref SOLVEPNP_IPPE Input points must be >= 4 and object points must be coplanar.\n+ - @ref SOLVEPNP_IPPE_SQUARE Special case suitable for marker pose estimation.\n+ Number of input points must be 4. Object points must be defined in the following order:\n+ - point 0: [-squareLength / 2, squareLength / 2, 0]\n+ - point 1: [ squareLength / 2, squareLength / 2, 0]\n+ - point 2: [ squareLength / 2, -squareLength / 2, 0]\n+ - point 3: [-squareLength / 2, -squareLength / 2, 0]\n+ - for all the other flags, number of input points must be >= 4 and object points can be in any configuration.", + "comment_created_at": "2025-01-23T16:45:10+00:00", + "comment_author": "GouMinghao", + "comment_body": "fixed", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/opencv-document-properly-with-references.md b/_reviewers/opencv-document-properly-with-references.md index b777e32..df67ac8 100644 --- a/_reviewers/opencv-document-properly-with-references.md +++ b/_reviewers/opencv-document-properly-with-references.md @@ -43,119 +43,3 @@ enum CCM_TYPE ``` With detailed documentation in a markdown file referenced by `@ref ccm_documentation` and any code examples included via `@snippet` or `@include` directives. - - -[ - { - "discussion_id": "1995027406", - "pr_number": 27051, - "pr_file": "modules/photo/include/opencv2/ccm.hpp", - "created_at": "2025-03-14T07:43:22+00:00", - "commented_code": "// This file is part of OpenCV project.\n// It is subject to the license terms in the LICENSE file found in the top-level directory\n// of this distribution and at http://opencv.org/license.html.\n//\n//\n// License Agreement\n// For Open Source Computer Vision Library\n//\n// Copyright(C) 2020, Huawei Technologies Co.,Ltd. All rights reserved.\n// Third party copyrights are property of their respective owners.\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n// http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n//\n// Author: Longbu Wang \n// Jinheng Zhang \n// Chenqi Shan \n\n#ifndef OPENCV_PHOTO_CCM_HPP\n#define OPENCV_PHOTO_CCM_HPP\n\n#include \n#include \n\nnamespace cv\n{\nnamespace ccm\n{\n\n/** @defgroup ccm Color Correction module\n@{\n\nIntroduction\n------------\n\nThe purpose of color correction is to adjust the color response of input\nand output devices to a known state. The device being calibrated is sometimes\ncalled the calibration source; the color space used as the standard is sometimes\ncalled the calibration target. Color calibration has been used in many industries,\nsuch as television production, games, photography, engineering, chemistry,\nmedicine, etc. Due to the manufacturing process of the input and output equipment,\nthe channel response has nonlinear distortion. In order to correct the picture output\nof the equipment, it is nessary to calibrate the captured color and the actual color.\n\n*/\n\n\n\n/** @brief Enum of the possible types of ccm.\n*/\nenum CCM_TYPE\n{\n CCM_3x3, ///< The CCM with the shape \\f$3\\times3\\f$ performs linear transformation on color values.\n CCM_4x3, ///< The CCM with the shape \\f$4\\times3\\f$ performs affine transformation.\n};\n\n/** @brief Enum of the possible types of initial method.\n*/\nenum INITIAL_METHOD_TYPE\n{\n INITIAL_METHOD_WHITE_BALANCE, ///< The white balance method. The initial value is:\\n\n /// \\f$\n /// M_{CCM}=\n /// \\begin{bmatrix}\n /// k_R & 0 & 0\\\\\n /// 0 & k_G & 0\\\\\n /// 0 & 0 & k_B\\\\\n /// \\end{bmatrix}\n /// \\f$\\n\n /// where\\n\n /// \\f$\n /// k_R=mean(R_{li}')/mean(R_{li})\\\\\n /// k_R=mean(G_{li}')/mean(G_{li})\\\\\n /// k_R=mean(B_{li}')/mean(B_{li})\n /// \\f$\n INITIAL_METHOD_LEAST_SQUARE, ///0\\\\\nC_{sl}=0, \\qquad C_s=0\n\\f]\n\nBecause \\f$exp(ln(0))\\to\\infty \\f$, the channel whose component is 0 is directly mapped to 0 in the formula above.\n\nFor fitting channels respectively, we have:\n\\f[\nr=polyfit(ln(R_s),ln(R_{dl}))\\\\\ng=polyfit(ln(G_s),ln(G_{dl}))\\\\\nb=polyfit(ln(B_s),ln(B_{dl}))\\\\\n\\f]\nNote that the parameter of \\f$ln(*) \\f$ cannot be 0.\nTherefore, we need to delete the channels whose values are 0 from \\f$R_s \\f$ and \\f$R_{dl} \\f$, \\f$G_s\\f$ and \\f$G_{dl}\\f$, \\f$B_s\\f$ and \\f$B_{dl}\\f$.\n\nTherefore:\n\n\\f[\nln(R_{sl})=r(ln(R_s)), \\qquad R_s>0\\\\\nR_{sl}=0, \\qquad R_s=0\\\\\nln(G_{sl})=g(ln(G_s)),\\qquad G_s>0\\\\\nG_{sl}=0, \\qquad G_s=0\\\\\nln(B_{sl})=b(ln(B_s)),\\qquad B_s>0\\\\\nB_{sl}=0, \\qquad B_s=0\\\\\n\\f]", - "repo_full_name": "opencv/opencv", - "discussion_comments": [ - { - "comment_id": "1995027406", - "repo_full_name": "opencv/opencv", - "pr_number": 27051, - "pr_file": "modules/photo/include/opencv2/ccm.hpp", - "discussion_id": "1995027406", - "commented_code": "@@ -0,0 +1,520 @@\n+// This file is part of OpenCV project.\n+// It is subject to the license terms in the LICENSE file found in the top-level directory\n+// of this distribution and at http://opencv.org/license.html.\n+//\n+//\n+// License Agreement\n+// For Open Source Computer Vision Library\n+//\n+// Copyright(C) 2020, Huawei Technologies Co.,Ltd. All rights reserved.\n+// Third party copyrights are property of their respective owners.\n+//\n+// Licensed under the Apache License, Version 2.0 (the \"License\");\n+// you may not use this file except in compliance with the License.\n+// You may obtain a copy of the License at\n+//\n+// http://www.apache.org/licenses/LICENSE-2.0\n+//\n+// Unless required by applicable law or agreed to in writing, software\n+// distributed under the License is distributed on an \"AS IS\" BASIS,\n+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n+// See the License for the specific language governing permissions and\n+// limitations under the License.\n+//\n+// Author: Longbu Wang \n+// Jinheng Zhang \n+// Chenqi Shan \n+\n+#ifndef OPENCV_PHOTO_CCM_HPP\n+#define OPENCV_PHOTO_CCM_HPP\n+\n+#include \n+#include \n+\n+namespace cv\n+{\n+namespace ccm\n+{\n+\n+/** @defgroup ccm Color Correction module\n+@{\n+\n+Introduction\n+------------\n+\n+The purpose of color correction is to adjust the color response of input\n+and output devices to a known state. The device being calibrated is sometimes\n+called the calibration source; the color space used as the standard is sometimes\n+called the calibration target. Color calibration has been used in many industries,\n+such as television production, games, photography, engineering, chemistry,\n+medicine, etc. Due to the manufacturing process of the input and output equipment,\n+the channel response has nonlinear distortion. In order to correct the picture output\n+of the equipment, it is nessary to calibrate the captured color and the actual color.\n+\n+*/\n+\n+\n+\n+/** @brief Enum of the possible types of ccm.\n+*/\n+enum CCM_TYPE\n+{\n+ CCM_3x3, ///< The CCM with the shape \\f$3\\times3\\f$ performs linear transformation on color values.\n+ CCM_4x3, ///< The CCM with the shape \\f$4\\times3\\f$ performs affine transformation.\n+};\n+\n+/** @brief Enum of the possible types of initial method.\n+*/\n+enum INITIAL_METHOD_TYPE\n+{\n+ INITIAL_METHOD_WHITE_BALANCE, ///< The white balance method. The initial value is:\\n\n+ /// \\f$\n+ /// M_{CCM}=\n+ /// \\begin{bmatrix}\n+ /// k_R & 0 & 0\\\\\n+ /// 0 & k_G & 0\\\\\n+ /// 0 & 0 & k_B\\\\\n+ /// \\end{bmatrix}\n+ /// \\f$\\n\n+ /// where\\n\n+ /// \\f$\n+ /// k_R=mean(R_{li}')/mean(R_{li})\\\\\n+ /// k_R=mean(G_{li}')/mean(G_{li})\\\\\n+ /// k_R=mean(B_{li}')/mean(B_{li})\n+ /// \\f$\n+ INITIAL_METHOD_LEAST_SQUARE, ///0\\\\\n+C_{sl}=0, \\qquad C_s=0\n+\\f]\n+\n+Because \\f$exp(ln(0))\\to\\infty \\f$, the channel whose component is 0 is directly mapped to 0 in the formula above.\n+\n+For fitting channels respectively, we have:\n+\\f[\n+r=polyfit(ln(R_s),ln(R_{dl}))\\\\\n+g=polyfit(ln(G_s),ln(G_{dl}))\\\\\n+b=polyfit(ln(B_s),ln(B_{dl}))\\\\\n+\\f]\n+Note that the parameter of \\f$ln(*) \\f$ cannot be 0.\n+Therefore, we need to delete the channels whose values are 0 from \\f$R_s \\f$ and \\f$R_{dl} \\f$, \\f$G_s\\f$ and \\f$G_{dl}\\f$, \\f$B_s\\f$ and \\f$B_{dl}\\f$.\n+\n+Therefore:\n+\n+\\f[\n+ln(R_{sl})=r(ln(R_s)), \\qquad R_s>0\\\\\n+R_{sl}=0, \\qquad R_s=0\\\\\n+ln(G_{sl})=g(ln(G_s)),\\qquad G_s>0\\\\\n+G_{sl}=0, \\qquad G_s=0\\\\\n+ln(B_{sl})=b(ln(B_s)),\\qquad B_s>0\\\\\n+B_{sl}=0, \\qquad B_s=0\\\\\n+\\f]", - "comment_created_at": "2025-03-14T07:43:22+00:00", - "comment_author": "asmorkalov", - "comment_body": "Please move the text to tutorial (markdown) and add references to the header file.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2073350574", - "pr_number": 27051, - "pr_file": "modules/photo/src/ccm/utils.hpp", - "created_at": "2025-05-05T12:29:45+00:00", - "commented_code": "@param gamma a constant for gamma correction.\n @param dst the output array, type of Mat.\n */\nMat gammaCorrection(const Mat& src, double gamma, Mat dst=Mat());\n CV_EXPORTS void gammaCorrection(InputArray src, OutputArray dst, double gamma);", - "repo_full_name": "opencv/opencv", - "discussion_comments": [ - { - "comment_id": "2073350574", - "repo_full_name": "opencv/opencv", - "pr_number": 27051, - "pr_file": "modules/photo/src/ccm/utils.hpp", - "discussion_id": "2073350574", - "commented_code": "@@ -25,7 +25,7 @@ double gammaCorrection_(double element, double gamma);\n @param gamma a constant for gamma correction.\n @param dst the output array, type of Mat.\n */\n-Mat gammaCorrection(const Mat& src, double gamma, Mat dst=Mat());\n+ CV_EXPORTS void gammaCorrection(InputArray src, OutputArray dst, double gamma);", - "comment_created_at": "2025-05-05T12:29:45+00:00", - "comment_author": "asmorkalov", - "comment_body": "It should go to make ccm header to be included into documentation and bindings. Please CV_EXPORTS_W to generate Java and Python bindings for it too.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "562257420", - "pr_number": 19305, - "pr_file": "doc/tutorials/introduction/test_writing/accuracy_test_writing.markdown", - "created_at": "2021-01-21T23:09:29+00:00", - "commented_code": "# Writing Accuracy Tests\n\n# {#tutorial_accuracy_test_writing}\n\n`This testing framework is based on Google Tests`\n\nThere are two major types of C++ tests: *accuracy/regression* tests and *performance* tests. Each module can have two test binaries: `opencv_test_` and `opencv_perf_`, and two tests folders: `/test>` and `/perf>`. These applications can be built for every supported platform (Win/Lin/Mac/Android).\n\n## ACCURACY TESTS\n\n### Work directory\n\nAll modules have their own dir for accuracy tests: `opencv_contrib/modules/(moduleName)/test/...`\n\n### Dir Structure\n\n- `test_precomp.hpp` - file for includes\n- `test_main.cpp` - main file of test sample\n- `test_smth1.cpp` - files for tests\n- `test_smth2.cpp`\n- `...`\n\n### Test structure\n\n```c++\n// name of this case is \"name1.name2\"\nTEST(name1, name2)\n{\n ASSERT_....;\n}\n```\n\n### Tests sample example\n\n**Files:**\n\n- test_precomp.hpp\n- test_main.cpp\n- test_sum.cpp\n- test_sub.cpp\n\n\n\n```c++\n// test_precomp.hpp\n\n#ifndef __OPENCV_TEST_PRECOMP_HPP__\n#define __OPENCV_TEST_PRECOMP_HPP__", - "repo_full_name": "opencv/opencv", - "discussion_comments": [ - { - "comment_id": "562257420", - "repo_full_name": "opencv/opencv", - "pr_number": 19305, - "pr_file": "doc/tutorials/introduction/test_writing/accuracy_test_writing.markdown", - "discussion_id": "562257420", - "commented_code": "@@ -0,0 +1,269 @@\n+# Writing Accuracy Tests\n+\n+# {#tutorial_accuracy_test_writing}\n+\n+`This testing framework is based on Google Tests`\n+\n+There are two major types of C++ tests: *accuracy/regression* tests and *performance* tests. Each module can have two test binaries: `opencv_test_` and `opencv_perf_`, and two tests folders: `/test>` and `/perf>`. These applications can be built for every supported platform (Win/Lin/Mac/Android).\n+\n+## ACCURACY TESTS\n+\n+### Work directory\n+\n+All modules have their own dir for accuracy tests: `opencv_contrib/modules/(moduleName)/test/...`\n+\n+### Dir Structure\n+\n+- `test_precomp.hpp` - file for includes\n+- `test_main.cpp` - main file of test sample\n+- `test_smth1.cpp` - files for tests\n+- `test_smth2.cpp`\n+- `...`\n+\n+### Test structure\n+\n+```c++\n+// name of this case is \"name1.name2\"\n+TEST(name1, name2)\n+{\n+ ASSERT_....;\n+}\n+```\n+\n+### Tests sample example\n+\n+**Files:**\n+\n+- test_precomp.hpp\n+- test_main.cpp\n+- test_sum.cpp\n+- test_sub.cpp\n+\n+\n+\n+```c++\n+// test_precomp.hpp\n+\n+#ifndef __OPENCV_TEST_PRECOMP_HPP__\n+#define __OPENCV_TEST_PRECOMP_HPP__", - "comment_created_at": "2021-01-21T23:09:29+00:00", - "comment_author": "alalek", - "comment_body": "There is general important rule: avoid code in documentation files/blobs/etc.\r\n\r\nCode must go into files which are regularly checked for compilation.\r\nDocumentation embeds code through `@snippet` and/or `@include`", - "pr_file_module": null - }, - { - "comment_id": "575126009", - "repo_full_name": "opencv/opencv", - "pr_number": 19305, - "pr_file": "doc/tutorials/introduction/test_writing/accuracy_test_writing.markdown", - "discussion_id": "562257420", - "commented_code": "@@ -0,0 +1,269 @@\n+# Writing Accuracy Tests\n+\n+# {#tutorial_accuracy_test_writing}\n+\n+`This testing framework is based on Google Tests`\n+\n+There are two major types of C++ tests: *accuracy/regression* tests and *performance* tests. Each module can have two test binaries: `opencv_test_` and `opencv_perf_`, and two tests folders: `/test>` and `/perf>`. These applications can be built for every supported platform (Win/Lin/Mac/Android).\n+\n+## ACCURACY TESTS\n+\n+### Work directory\n+\n+All modules have their own dir for accuracy tests: `opencv_contrib/modules/(moduleName)/test/...`\n+\n+### Dir Structure\n+\n+- `test_precomp.hpp` - file for includes\n+- `test_main.cpp` - main file of test sample\n+- `test_smth1.cpp` - files for tests\n+- `test_smth2.cpp`\n+- `...`\n+\n+### Test structure\n+\n+```c++\n+// name of this case is \"name1.name2\"\n+TEST(name1, name2)\n+{\n+ ASSERT_....;\n+}\n+```\n+\n+### Tests sample example\n+\n+**Files:**\n+\n+- test_precomp.hpp\n+- test_main.cpp\n+- test_sum.cpp\n+- test_sub.cpp\n+\n+\n+\n+```c++\n+// test_precomp.hpp\n+\n+#ifndef __OPENCV_TEST_PRECOMP_HPP__\n+#define __OPENCV_TEST_PRECOMP_HPP__", - "comment_created_at": "2021-02-12T10:30:05+00:00", - "comment_author": "DumDereDum", - "comment_body": "code is deleted", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1926898553", - "pr_number": 26669, - "pr_file": "modules/calib3d/include/opencv2/calib3d.hpp", - "created_at": "2025-01-23T12:27:20+00:00", - "commented_code": "TermCriteria criteria = TermCriteria(TermCriteria::MAX_ITER + TermCriteria::EPS, 10, 1e-8)\n );\n\n /**\n @brief Finds an object pose from 3D-2D point correspondences using the RANSAC scheme for fisheye camera moodel.\n\n @param objectPoints Array of object points in the object coordinate space, Nx3 1-channel or\n 1xN/Nx1 3-channel, where N is the number of points. vector\\ can be also passed here.\n @param imagePoints Array of corresponding image points, Nx2 1-channel or 1xN/Nx1 2-channel,\n where N is the number of points. vector\\ can be also passed here.\n @param cameraMatrix Input camera intrinsic matrix \\f$\\cameramatrix{A}\\f$ .\n @param distCoeffs Input vector of distortion coefficients (4x1/1x4).\n @param rvec Output rotation vector (see @ref Rodrigues ) that, together with tvec, brings points from\n the model coordinate system to the camera coordinate system.\n @param tvec Output translation vector.\n @param useExtrinsicGuess Parameter used for #SOLVEPNP_ITERATIVE. If true (1), the function uses\n the provided rvec and tvec values as initial approximations of the rotation and translation\n vectors, respectively, and further optimizes them.\n @param iterationsCount Number of iterations.\n @param reprojectionError Inlier threshold value used by the RANSAC procedure. The parameter value\n is the maximum allowed distance between the observed and computed point projections to consider it\n an inlier.\n @param confidence The probability that the algorithm produces a useful result.\n @param inliers Output vector that contains indices of inliers in objectPoints and imagePoints .\n @param flags Method for solving a PnP problem: see @ref calib3d_solvePnP_flags\n This function returns the rotation and the translation vectors that transform a 3D point expressed in the object\n coordinate frame to the camera coordinate frame, using different methods:\n - P3P methods (@ref SOLVEPNP_P3P, @ref SOLVEPNP_AP3P): need 4 input points to return a unique solution.\n - @ref SOLVEPNP_IPPE Input points must be >= 4 and object points must be coplanar.\n - @ref SOLVEPNP_IPPE_SQUARE Special case suitable for marker pose estimation.\n Number of input points must be 4. Object points must be defined in the following order:\n - point 0: [-squareLength / 2, squareLength / 2, 0]\n - point 1: [ squareLength / 2, squareLength / 2, 0]\n - point 2: [ squareLength / 2, -squareLength / 2, 0]\n - point 3: [-squareLength / 2, -squareLength / 2, 0]\n - for all the other flags, number of input points must be >= 4 and object points can be in any configuration.", - "repo_full_name": "opencv/opencv", - "discussion_comments": [ - { - "comment_id": "1926898553", - "repo_full_name": "opencv/opencv", - "pr_number": 26669, - "pr_file": "modules/calib3d/include/opencv2/calib3d.hpp", - "discussion_id": "1926898553", - "commented_code": "@@ -4098,6 +4098,53 @@ optimization. It is the \\f$max(width,height)/\\pi\\f$ or the provided \\f$f_x\\f$, \\\n TermCriteria criteria = TermCriteria(TermCriteria::MAX_ITER + TermCriteria::EPS, 10, 1e-8)\n );\n \n+ /**\n+ @brief Finds an object pose from 3D-2D point correspondences using the RANSAC scheme for fisheye camera moodel.\n+\n+ @param objectPoints Array of object points in the object coordinate space, Nx3 1-channel or\n+ 1xN/Nx1 3-channel, where N is the number of points. vector\\ can be also passed here.\n+ @param imagePoints Array of corresponding image points, Nx2 1-channel or 1xN/Nx1 2-channel,\n+ where N is the number of points. vector\\ can be also passed here.\n+ @param cameraMatrix Input camera intrinsic matrix \\f$\\cameramatrix{A}\\f$ .\n+ @param distCoeffs Input vector of distortion coefficients (4x1/1x4).\n+ @param rvec Output rotation vector (see @ref Rodrigues ) that, together with tvec, brings points from\n+ the model coordinate system to the camera coordinate system.\n+ @param tvec Output translation vector.\n+ @param useExtrinsicGuess Parameter used for #SOLVEPNP_ITERATIVE. If true (1), the function uses\n+ the provided rvec and tvec values as initial approximations of the rotation and translation\n+ vectors, respectively, and further optimizes them.\n+ @param iterationsCount Number of iterations.\n+ @param reprojectionError Inlier threshold value used by the RANSAC procedure. The parameter value\n+ is the maximum allowed distance between the observed and computed point projections to consider it\n+ an inlier.\n+ @param confidence The probability that the algorithm produces a useful result.\n+ @param inliers Output vector that contains indices of inliers in objectPoints and imagePoints .\n+ @param flags Method for solving a PnP problem: see @ref calib3d_solvePnP_flags\n+ This function returns the rotation and the translation vectors that transform a 3D point expressed in the object\n+ coordinate frame to the camera coordinate frame, using different methods:\n+ - P3P methods (@ref SOLVEPNP_P3P, @ref SOLVEPNP_AP3P): need 4 input points to return a unique solution.\n+ - @ref SOLVEPNP_IPPE Input points must be >= 4 and object points must be coplanar.\n+ - @ref SOLVEPNP_IPPE_SQUARE Special case suitable for marker pose estimation.\n+ Number of input points must be 4. Object points must be defined in the following order:\n+ - point 0: [-squareLength / 2, squareLength / 2, 0]\n+ - point 1: [ squareLength / 2, squareLength / 2, 0]\n+ - point 2: [ squareLength / 2, -squareLength / 2, 0]\n+ - point 3: [-squareLength / 2, -squareLength / 2, 0]\n+ - for all the other flags, number of input points must be >= 4 and object points can be in any configuration.", - "comment_created_at": "2025-01-23T12:27:20+00:00", - "comment_author": "asmorkalov", - "comment_body": "I propose to use the same description as for cv::solvePnpRansac with fisheye model reference and add reference to SOLVEPNP_XXX constants instead of copy.", - "pr_file_module": null - }, - { - "comment_id": "1927319841", - "repo_full_name": "opencv/opencv", - "pr_number": 26669, - "pr_file": "modules/calib3d/include/opencv2/calib3d.hpp", - "discussion_id": "1926898553", - "commented_code": "@@ -4098,6 +4098,53 @@ optimization. It is the \\f$max(width,height)/\\pi\\f$ or the provided \\f$f_x\\f$, \\\n TermCriteria criteria = TermCriteria(TermCriteria::MAX_ITER + TermCriteria::EPS, 10, 1e-8)\n );\n \n+ /**\n+ @brief Finds an object pose from 3D-2D point correspondences using the RANSAC scheme for fisheye camera moodel.\n+\n+ @param objectPoints Array of object points in the object coordinate space, Nx3 1-channel or\n+ 1xN/Nx1 3-channel, where N is the number of points. vector\\ can be also passed here.\n+ @param imagePoints Array of corresponding image points, Nx2 1-channel or 1xN/Nx1 2-channel,\n+ where N is the number of points. vector\\ can be also passed here.\n+ @param cameraMatrix Input camera intrinsic matrix \\f$\\cameramatrix{A}\\f$ .\n+ @param distCoeffs Input vector of distortion coefficients (4x1/1x4).\n+ @param rvec Output rotation vector (see @ref Rodrigues ) that, together with tvec, brings points from\n+ the model coordinate system to the camera coordinate system.\n+ @param tvec Output translation vector.\n+ @param useExtrinsicGuess Parameter used for #SOLVEPNP_ITERATIVE. If true (1), the function uses\n+ the provided rvec and tvec values as initial approximations of the rotation and translation\n+ vectors, respectively, and further optimizes them.\n+ @param iterationsCount Number of iterations.\n+ @param reprojectionError Inlier threshold value used by the RANSAC procedure. The parameter value\n+ is the maximum allowed distance between the observed and computed point projections to consider it\n+ an inlier.\n+ @param confidence The probability that the algorithm produces a useful result.\n+ @param inliers Output vector that contains indices of inliers in objectPoints and imagePoints .\n+ @param flags Method for solving a PnP problem: see @ref calib3d_solvePnP_flags\n+ This function returns the rotation and the translation vectors that transform a 3D point expressed in the object\n+ coordinate frame to the camera coordinate frame, using different methods:\n+ - P3P methods (@ref SOLVEPNP_P3P, @ref SOLVEPNP_AP3P): need 4 input points to return a unique solution.\n+ - @ref SOLVEPNP_IPPE Input points must be >= 4 and object points must be coplanar.\n+ - @ref SOLVEPNP_IPPE_SQUARE Special case suitable for marker pose estimation.\n+ Number of input points must be 4. Object points must be defined in the following order:\n+ - point 0: [-squareLength / 2, squareLength / 2, 0]\n+ - point 1: [ squareLength / 2, squareLength / 2, 0]\n+ - point 2: [ squareLength / 2, -squareLength / 2, 0]\n+ - point 3: [-squareLength / 2, -squareLength / 2, 0]\n+ - for all the other flags, number of input points must be >= 4 and object points can be in any configuration.", - "comment_created_at": "2025-01-23T16:45:10+00:00", - "comment_author": "GouMinghao", - "comment_body": "fixed", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/opencv-feature-flag-convention.json b/_reviewers/opencv-feature-flag-convention.json new file mode 100644 index 0000000..7a2f858 --- /dev/null +++ b/_reviewers/opencv-feature-flag-convention.json @@ -0,0 +1,128 @@ +[ + { + "discussion_id": "1335266321", + "pr_number": 22704, + "pr_file": "modules/core/CMakeLists.txt", + "created_at": "2023-09-24T23:57:22+00:00", + "commented_code": "if(OPENCV_LIBVA_LINK)\n ocv_append_source_file_compile_definitions(\"${CMAKE_CURRENT_LIST_DIR}/src/va_intel.cpp\" \"OPENCV_LIBVA_LINK=1\")\nendif()\nif(OPENCV_ENABLE_EGL_INTEROP)\n ocv_append_source_file_compile_definitions(\"${CMAKE_CURRENT_LIST_DIR}/src/opengl.cpp\" \"HAVE_EGL_INTEROP\")", + "repo_full_name": "opencv/opencv", + "discussion_comments": [ + { + "comment_id": "1335266321", + "repo_full_name": "opencv/opencv", + "pr_number": 22704, + "pr_file": "modules/core/CMakeLists.txt", + "discussion_id": "1335266321", + "commented_code": "@@ -107,6 +107,9 @@ endif()\n if(OPENCV_LIBVA_LINK)\n ocv_append_source_file_compile_definitions(\"${CMAKE_CURRENT_LIST_DIR}/src/va_intel.cpp\" \"OPENCV_LIBVA_LINK=1\")\n endif()\n+if(OPENCV_ENABLE_EGL_INTEROP)\n+ ocv_append_source_file_compile_definitions(\"${CMAKE_CURRENT_LIST_DIR}/src/opengl.cpp\" \"HAVE_EGL_INTEROP\")", + "comment_created_at": "2023-09-24T23:57:22+00:00", + "comment_author": "opencv-alalek", + "comment_body": "Perhaps `HAVE_EGL` and `HAVE_OPENGL` should be checked too.", + "pr_file_module": null + }, + { + "comment_id": "1335292018", + "repo_full_name": "opencv/opencv", + "pr_number": 22704, + "pr_file": "modules/core/CMakeLists.txt", + "discussion_id": "1335266321", + "commented_code": "@@ -107,6 +107,9 @@ endif()\n if(OPENCV_LIBVA_LINK)\n ocv_append_source_file_compile_definitions(\"${CMAKE_CURRENT_LIST_DIR}/src/va_intel.cpp\" \"OPENCV_LIBVA_LINK=1\")\n endif()\n+if(OPENCV_ENABLE_EGL_INTEROP)\n+ ocv_append_source_file_compile_definitions(\"${CMAKE_CURRENT_LIST_DIR}/src/opengl.cpp\" \"HAVE_EGL_INTEROP\")", + "comment_created_at": "2023-09-25T01:07:42+00:00", + "comment_author": "kallaballa", + "comment_body": "It does in the main [CMakeLists.txt](https://github.com/opencv/opencv/pull/22704/files/14d9a17c1a5a09efad8bea2c8cb4c8b55552afe1#diff-1e7de1ae2d059d21e1dd75d5812d5a34b0222cef273b7c3a2af62eb747f9d20aR326)", + "pr_file_module": null + }, + { + "comment_id": "1338040779", + "repo_full_name": "opencv/opencv", + "pr_number": 22704, + "pr_file": "modules/core/CMakeLists.txt", + "discussion_id": "1335266321", + "commented_code": "@@ -107,6 +107,9 @@ endif()\n if(OPENCV_LIBVA_LINK)\n ocv_append_source_file_compile_definitions(\"${CMAKE_CURRENT_LIST_DIR}/src/va_intel.cpp\" \"OPENCV_LIBVA_LINK=1\")\n endif()\n+if(OPENCV_ENABLE_EGL_INTEROP)\n+ ocv_append_source_file_compile_definitions(\"${CMAKE_CURRENT_LIST_DIR}/src/opengl.cpp\" \"HAVE_EGL_INTEROP\")", + "comment_created_at": "2023-09-27T04:55:53+00:00", + "comment_author": "opencv-alalek", + "comment_body": "Not really.\r\n\r\nUser could specify `WITH_OPENGL=ON` but there could be no OpenCL dev packages on build system (or for build target platform in case of cross-compiling).\r\n\r\n`WITH_` / `OPENCV_ENABLE_` is a user intention (lets enable this if available).\r\nNext, `find_package()` should be executed.\r\nPreferable `try_compile()` should be run to validate dependency configuration.\r\nAfter checks above `HAVE_` variable is set to `ON` / `1`.\r\nFeature usage should be guarded by `HAVE_` checks.\r\n\r\nAll dependencies should be properly detected on configuration (cmake) stage.\r\nBuild stage (make) should not fail due to dependency misusing.", + "pr_file_module": null + }, + { + "comment_id": "1338119972", + "repo_full_name": "opencv/opencv", + "pr_number": 22704, + "pr_file": "modules/core/CMakeLists.txt", + "discussion_id": "1335266321", + "commented_code": "@@ -107,6 +107,9 @@ endif()\n if(OPENCV_LIBVA_LINK)\n ocv_append_source_file_compile_definitions(\"${CMAKE_CURRENT_LIST_DIR}/src/va_intel.cpp\" \"OPENCV_LIBVA_LINK=1\")\n endif()\n+if(OPENCV_ENABLE_EGL_INTEROP)\n+ ocv_append_source_file_compile_definitions(\"${CMAKE_CURRENT_LIST_DIR}/src/opengl.cpp\" \"HAVE_EGL_INTEROP\")", + "comment_created_at": "2023-09-27T06:47:21+00:00", + "comment_author": "kallaballa", + "comment_body": "I see now. Will probably create a PR today.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2107101601", + "pr_number": 27355, + "pr_file": "modules/gapi/CMakeLists.txt", + "created_at": "2025-05-26T11:12:51+00:00", + "commented_code": "ocv_target_link_libraries(${the_module} PRIVATE wsock32 ws2_32)\nendif()\n\nocv_option(OPENCV_GAPU_MSMF \"Build G-API with MS Media Foundation support\" HAVE_MSMF)\nif(HAVE_MSMF AND OPENCV_GAPI_MSMF)", + "repo_full_name": "opencv/opencv", + "discussion_comments": [ + { + "comment_id": "2107101601", + "repo_full_name": "opencv/opencv", + "pr_number": 27355, + "pr_file": "modules/gapi/CMakeLists.txt", + "discussion_id": "2107101601", + "commented_code": "@@ -380,6 +380,15 @@ if(WIN32)\n ocv_target_link_libraries(${the_module} PRIVATE wsock32 ws2_32)\n endif()\n \n+ocv_option(OPENCV_GAPU_MSMF \"Build G-API with MS Media Foundation support\" HAVE_MSMF)\n+if(HAVE_MSMF AND OPENCV_GAPI_MSMF)", + "comment_created_at": "2025-05-26T11:12:51+00:00", + "comment_author": "opencv-alalek", + "comment_body": "`TARGET ocv.3rdparty.msmf` should be used instead of `HAVE_MSMF`.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2033872925", + "pr_number": 27194, + "pr_file": "3rdparty/ipphal/CMakeLists.txt", + "created_at": "2025-04-08T19:10:53+00:00", + "commented_code": "\"${CMAKE_CURRENT_SOURCE_DIR}/src/mean_ipp.cpp\"\n \"${CMAKE_CURRENT_SOURCE_DIR}/src/minmax_ipp.cpp\"\n \"${CMAKE_CURRENT_SOURCE_DIR}/src/norm_ipp.cpp\"\n \"${CMAKE_CURRENT_SOURCE_DIR}/src/transforms_ipp.cpp\"\n)\n\nif(HAVE_IPP_ICV)\n target_compile_definitions(ipphal PRIVATE HAVE_IPP_ICV)\nendif()\n\nif(HAVE_IPP_IW)\n target_compile_definitions(ipphal PRIVATE HAVE_IPP_IW)", + "repo_full_name": "opencv/opencv", + "discussion_comments": [ + { + "comment_id": "2033872925", + "repo_full_name": "opencv/opencv", + "pr_number": 27194, + "pr_file": "3rdparty/ipphal/CMakeLists.txt", + "discussion_id": "2033872925", + "commented_code": "@@ -11,13 +11,19 @@ add_library(ipphal STATIC\n \"${CMAKE_CURRENT_SOURCE_DIR}/src/mean_ipp.cpp\"\n \"${CMAKE_CURRENT_SOURCE_DIR}/src/minmax_ipp.cpp\"\n \"${CMAKE_CURRENT_SOURCE_DIR}/src/norm_ipp.cpp\"\n+ \"${CMAKE_CURRENT_SOURCE_DIR}/src/transforms_ipp.cpp\"\n )\n \n if(HAVE_IPP_ICV)\n target_compile_definitions(ipphal PRIVATE HAVE_IPP_ICV)\n endif()\n \n+if(HAVE_IPP_IW)\n+ target_compile_definitions(ipphal PRIVATE HAVE_IPP_IW)", + "comment_created_at": "2025-04-08T19:10:53+00:00", + "comment_author": "opencv-alalek", + "comment_body": "PRIVATE?\r\n\r\nThis macro is used in \"interface\" headers.", + "pr_file_module": null + }, + { + "comment_id": "2034495599", + "repo_full_name": "opencv/opencv", + "pr_number": 27194, + "pr_file": "3rdparty/ipphal/CMakeLists.txt", + "discussion_id": "2033872925", + "commented_code": "@@ -11,13 +11,19 @@ add_library(ipphal STATIC\n \"${CMAKE_CURRENT_SOURCE_DIR}/src/mean_ipp.cpp\"\n \"${CMAKE_CURRENT_SOURCE_DIR}/src/minmax_ipp.cpp\"\n \"${CMAKE_CURRENT_SOURCE_DIR}/src/norm_ipp.cpp\"\n+ \"${CMAKE_CURRENT_SOURCE_DIR}/src/transforms_ipp.cpp\"\n )\n \n if(HAVE_IPP_ICV)\n target_compile_definitions(ipphal PRIVATE HAVE_IPP_ICV)\n endif()\n \n+if(HAVE_IPP_IW)\n+ target_compile_definitions(ipphal PRIVATE HAVE_IPP_IW)", + "comment_created_at": "2025-04-09T05:51:48+00:00", + "comment_author": "asmorkalov", + "comment_body": "Good point. Fixed.", + "pr_file_module": null + }, + { + "comment_id": "2036555700", + "repo_full_name": "opencv/opencv", + "pr_number": 27194, + "pr_file": "3rdparty/ipphal/CMakeLists.txt", + "discussion_id": "2033872925", + "commented_code": "@@ -11,13 +11,19 @@ add_library(ipphal STATIC\n \"${CMAKE_CURRENT_SOURCE_DIR}/src/mean_ipp.cpp\"\n \"${CMAKE_CURRENT_SOURCE_DIR}/src/minmax_ipp.cpp\"\n \"${CMAKE_CURRENT_SOURCE_DIR}/src/norm_ipp.cpp\"\n+ \"${CMAKE_CURRENT_SOURCE_DIR}/src/transforms_ipp.cpp\"\n )\n \n if(HAVE_IPP_ICV)\n target_compile_definitions(ipphal PRIVATE HAVE_IPP_ICV)\n endif()\n \n+if(HAVE_IPP_IW)\n+ target_compile_definitions(ipphal PRIVATE HAVE_IPP_IW)", + "comment_created_at": "2025-04-10T05:52:28+00:00", + "comment_author": "asmorkalov", + "comment_body": "Looks like I cannot make it public for now. We get redefinition issue. The macros are defined by both HAL and IPP core. I made it private for now to exclude the redefinition issue and added note to CMake.", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/opencv-feature-flag-convention.md b/_reviewers/opencv-feature-flag-convention.md index af5ade2..9535654 100644 --- a/_reviewers/opencv-feature-flag-convention.md +++ b/_reviewers/opencv-feature-flag-convention.md @@ -45,133 +45,3 @@ endif() ``` This convention ensures all dependencies are properly detected during configuration stage, not build stage, preventing build failures due to missing dependencies. It clearly separates user intent from actual availability and promotes modern CMake practices. - - -[ - { - "discussion_id": "1335266321", - "pr_number": 22704, - "pr_file": "modules/core/CMakeLists.txt", - "created_at": "2023-09-24T23:57:22+00:00", - "commented_code": "if(OPENCV_LIBVA_LINK)\n ocv_append_source_file_compile_definitions(\"${CMAKE_CURRENT_LIST_DIR}/src/va_intel.cpp\" \"OPENCV_LIBVA_LINK=1\")\nendif()\nif(OPENCV_ENABLE_EGL_INTEROP)\n ocv_append_source_file_compile_definitions(\"${CMAKE_CURRENT_LIST_DIR}/src/opengl.cpp\" \"HAVE_EGL_INTEROP\")", - "repo_full_name": "opencv/opencv", - "discussion_comments": [ - { - "comment_id": "1335266321", - "repo_full_name": "opencv/opencv", - "pr_number": 22704, - "pr_file": "modules/core/CMakeLists.txt", - "discussion_id": "1335266321", - "commented_code": "@@ -107,6 +107,9 @@ endif()\n if(OPENCV_LIBVA_LINK)\n ocv_append_source_file_compile_definitions(\"${CMAKE_CURRENT_LIST_DIR}/src/va_intel.cpp\" \"OPENCV_LIBVA_LINK=1\")\n endif()\n+if(OPENCV_ENABLE_EGL_INTEROP)\n+ ocv_append_source_file_compile_definitions(\"${CMAKE_CURRENT_LIST_DIR}/src/opengl.cpp\" \"HAVE_EGL_INTEROP\")", - "comment_created_at": "2023-09-24T23:57:22+00:00", - "comment_author": "opencv-alalek", - "comment_body": "Perhaps `HAVE_EGL` and `HAVE_OPENGL` should be checked too.", - "pr_file_module": null - }, - { - "comment_id": "1335292018", - "repo_full_name": "opencv/opencv", - "pr_number": 22704, - "pr_file": "modules/core/CMakeLists.txt", - "discussion_id": "1335266321", - "commented_code": "@@ -107,6 +107,9 @@ endif()\n if(OPENCV_LIBVA_LINK)\n ocv_append_source_file_compile_definitions(\"${CMAKE_CURRENT_LIST_DIR}/src/va_intel.cpp\" \"OPENCV_LIBVA_LINK=1\")\n endif()\n+if(OPENCV_ENABLE_EGL_INTEROP)\n+ ocv_append_source_file_compile_definitions(\"${CMAKE_CURRENT_LIST_DIR}/src/opengl.cpp\" \"HAVE_EGL_INTEROP\")", - "comment_created_at": "2023-09-25T01:07:42+00:00", - "comment_author": "kallaballa", - "comment_body": "It does in the main [CMakeLists.txt](https://github.com/opencv/opencv/pull/22704/files/14d9a17c1a5a09efad8bea2c8cb4c8b55552afe1#diff-1e7de1ae2d059d21e1dd75d5812d5a34b0222cef273b7c3a2af62eb747f9d20aR326)", - "pr_file_module": null - }, - { - "comment_id": "1338040779", - "repo_full_name": "opencv/opencv", - "pr_number": 22704, - "pr_file": "modules/core/CMakeLists.txt", - "discussion_id": "1335266321", - "commented_code": "@@ -107,6 +107,9 @@ endif()\n if(OPENCV_LIBVA_LINK)\n ocv_append_source_file_compile_definitions(\"${CMAKE_CURRENT_LIST_DIR}/src/va_intel.cpp\" \"OPENCV_LIBVA_LINK=1\")\n endif()\n+if(OPENCV_ENABLE_EGL_INTEROP)\n+ ocv_append_source_file_compile_definitions(\"${CMAKE_CURRENT_LIST_DIR}/src/opengl.cpp\" \"HAVE_EGL_INTEROP\")", - "comment_created_at": "2023-09-27T04:55:53+00:00", - "comment_author": "opencv-alalek", - "comment_body": "Not really.\r\n\r\nUser could specify `WITH_OPENGL=ON` but there could be no OpenCL dev packages on build system (or for build target platform in case of cross-compiling).\r\n\r\n`WITH_` / `OPENCV_ENABLE_` is a user intention (lets enable this if available).\r\nNext, `find_package()` should be executed.\r\nPreferable `try_compile()` should be run to validate dependency configuration.\r\nAfter checks above `HAVE_` variable is set to `ON` / `1`.\r\nFeature usage should be guarded by `HAVE_` checks.\r\n\r\nAll dependencies should be properly detected on configuration (cmake) stage.\r\nBuild stage (make) should not fail due to dependency misusing.", - "pr_file_module": null - }, - { - "comment_id": "1338119972", - "repo_full_name": "opencv/opencv", - "pr_number": 22704, - "pr_file": "modules/core/CMakeLists.txt", - "discussion_id": "1335266321", - "commented_code": "@@ -107,6 +107,9 @@ endif()\n if(OPENCV_LIBVA_LINK)\n ocv_append_source_file_compile_definitions(\"${CMAKE_CURRENT_LIST_DIR}/src/va_intel.cpp\" \"OPENCV_LIBVA_LINK=1\")\n endif()\n+if(OPENCV_ENABLE_EGL_INTEROP)\n+ ocv_append_source_file_compile_definitions(\"${CMAKE_CURRENT_LIST_DIR}/src/opengl.cpp\" \"HAVE_EGL_INTEROP\")", - "comment_created_at": "2023-09-27T06:47:21+00:00", - "comment_author": "kallaballa", - "comment_body": "I see now. Will probably create a PR today.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2107101601", - "pr_number": 27355, - "pr_file": "modules/gapi/CMakeLists.txt", - "created_at": "2025-05-26T11:12:51+00:00", - "commented_code": "ocv_target_link_libraries(${the_module} PRIVATE wsock32 ws2_32)\nendif()\n\nocv_option(OPENCV_GAPU_MSMF \"Build G-API with MS Media Foundation support\" HAVE_MSMF)\nif(HAVE_MSMF AND OPENCV_GAPI_MSMF)", - "repo_full_name": "opencv/opencv", - "discussion_comments": [ - { - "comment_id": "2107101601", - "repo_full_name": "opencv/opencv", - "pr_number": 27355, - "pr_file": "modules/gapi/CMakeLists.txt", - "discussion_id": "2107101601", - "commented_code": "@@ -380,6 +380,15 @@ if(WIN32)\n ocv_target_link_libraries(${the_module} PRIVATE wsock32 ws2_32)\n endif()\n \n+ocv_option(OPENCV_GAPU_MSMF \"Build G-API with MS Media Foundation support\" HAVE_MSMF)\n+if(HAVE_MSMF AND OPENCV_GAPI_MSMF)", - "comment_created_at": "2025-05-26T11:12:51+00:00", - "comment_author": "opencv-alalek", - "comment_body": "`TARGET ocv.3rdparty.msmf` should be used instead of `HAVE_MSMF`.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2033872925", - "pr_number": 27194, - "pr_file": "3rdparty/ipphal/CMakeLists.txt", - "created_at": "2025-04-08T19:10:53+00:00", - "commented_code": "\"${CMAKE_CURRENT_SOURCE_DIR}/src/mean_ipp.cpp\"\n \"${CMAKE_CURRENT_SOURCE_DIR}/src/minmax_ipp.cpp\"\n \"${CMAKE_CURRENT_SOURCE_DIR}/src/norm_ipp.cpp\"\n \"${CMAKE_CURRENT_SOURCE_DIR}/src/transforms_ipp.cpp\"\n)\n\nif(HAVE_IPP_ICV)\n target_compile_definitions(ipphal PRIVATE HAVE_IPP_ICV)\nendif()\n\nif(HAVE_IPP_IW)\n target_compile_definitions(ipphal PRIVATE HAVE_IPP_IW)", - "repo_full_name": "opencv/opencv", - "discussion_comments": [ - { - "comment_id": "2033872925", - "repo_full_name": "opencv/opencv", - "pr_number": 27194, - "pr_file": "3rdparty/ipphal/CMakeLists.txt", - "discussion_id": "2033872925", - "commented_code": "@@ -11,13 +11,19 @@ add_library(ipphal STATIC\n \"${CMAKE_CURRENT_SOURCE_DIR}/src/mean_ipp.cpp\"\n \"${CMAKE_CURRENT_SOURCE_DIR}/src/minmax_ipp.cpp\"\n \"${CMAKE_CURRENT_SOURCE_DIR}/src/norm_ipp.cpp\"\n+ \"${CMAKE_CURRENT_SOURCE_DIR}/src/transforms_ipp.cpp\"\n )\n \n if(HAVE_IPP_ICV)\n target_compile_definitions(ipphal PRIVATE HAVE_IPP_ICV)\n endif()\n \n+if(HAVE_IPP_IW)\n+ target_compile_definitions(ipphal PRIVATE HAVE_IPP_IW)", - "comment_created_at": "2025-04-08T19:10:53+00:00", - "comment_author": "opencv-alalek", - "comment_body": "PRIVATE?\r\n\r\nThis macro is used in \"interface\" headers.", - "pr_file_module": null - }, - { - "comment_id": "2034495599", - "repo_full_name": "opencv/opencv", - "pr_number": 27194, - "pr_file": "3rdparty/ipphal/CMakeLists.txt", - "discussion_id": "2033872925", - "commented_code": "@@ -11,13 +11,19 @@ add_library(ipphal STATIC\n \"${CMAKE_CURRENT_SOURCE_DIR}/src/mean_ipp.cpp\"\n \"${CMAKE_CURRENT_SOURCE_DIR}/src/minmax_ipp.cpp\"\n \"${CMAKE_CURRENT_SOURCE_DIR}/src/norm_ipp.cpp\"\n+ \"${CMAKE_CURRENT_SOURCE_DIR}/src/transforms_ipp.cpp\"\n )\n \n if(HAVE_IPP_ICV)\n target_compile_definitions(ipphal PRIVATE HAVE_IPP_ICV)\n endif()\n \n+if(HAVE_IPP_IW)\n+ target_compile_definitions(ipphal PRIVATE HAVE_IPP_IW)", - "comment_created_at": "2025-04-09T05:51:48+00:00", - "comment_author": "asmorkalov", - "comment_body": "Good point. Fixed.", - "pr_file_module": null - }, - { - "comment_id": "2036555700", - "repo_full_name": "opencv/opencv", - "pr_number": 27194, - "pr_file": "3rdparty/ipphal/CMakeLists.txt", - "discussion_id": "2033872925", - "commented_code": "@@ -11,13 +11,19 @@ add_library(ipphal STATIC\n \"${CMAKE_CURRENT_SOURCE_DIR}/src/mean_ipp.cpp\"\n \"${CMAKE_CURRENT_SOURCE_DIR}/src/minmax_ipp.cpp\"\n \"${CMAKE_CURRENT_SOURCE_DIR}/src/norm_ipp.cpp\"\n+ \"${CMAKE_CURRENT_SOURCE_DIR}/src/transforms_ipp.cpp\"\n )\n \n if(HAVE_IPP_ICV)\n target_compile_definitions(ipphal PRIVATE HAVE_IPP_ICV)\n endif()\n \n+if(HAVE_IPP_IW)\n+ target_compile_definitions(ipphal PRIVATE HAVE_IPP_IW)", - "comment_created_at": "2025-04-10T05:52:28+00:00", - "comment_author": "asmorkalov", - "comment_body": "Looks like I cannot make it public for now. We get redefinition issue. The macros are defined by both HAL and IPP core. I made it private for now to exclude the redefinition issue and added note to CMake.", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/opencv-framework-synchronization-practices.json b/_reviewers/opencv-framework-synchronization-practices.json new file mode 100644 index 0000000..db71bd2 --- /dev/null +++ b/_reviewers/opencv-framework-synchronization-practices.json @@ -0,0 +1,366 @@ +[ + { + "discussion_id": "1370067753", + "pr_number": 24444, + "pr_file": "modules/imgproc/src/color_hsv.dispatch.cpp", + "created_at": "2023-10-24T12:17:05+00:00", + "commented_code": "if(_src.depth() == CV_8U)\n {\n static UMat sdiv_data;\n static UMat hdiv_data180;\n static UMat hdiv_data256;\n static int sdiv_table[256];\n static int hdiv_table180[256];\n static int hdiv_table256[256];\n static volatile bool initialized180 = false, initialized256 = false;\n static thread_local UMat sdiv_data;\n static thread_local UMat hdiv_data180;\n static thread_local UMat hdiv_data256;\n static thread_local int sdiv_table[256];", + "repo_full_name": "opencv/opencv", + "discussion_comments": [ + { + "comment_id": "1370067753", + "repo_full_name": "opencv/opencv", + "pr_number": 24444, + "pr_file": "modules/imgproc/src/color_hsv.dispatch.cpp", + "discussion_id": "1370067753", + "commented_code": "@@ -274,13 +274,13 @@ bool oclCvtColorBGR2HSV( InputArray _src, OutputArray _dst, int bidx, bool full\n \n if(_src.depth() == CV_8U)\n {\n- static UMat sdiv_data;\n- static UMat hdiv_data180;\n- static UMat hdiv_data256;\n- static int sdiv_table[256];\n- static int hdiv_table180[256];\n- static int hdiv_table256[256];\n- static volatile bool initialized180 = false, initialized256 = false;\n+ static thread_local UMat sdiv_data;\n+ static thread_local UMat hdiv_data180;\n+ static thread_local UMat hdiv_data256;\n+ static thread_local int sdiv_table[256];", + "comment_created_at": "2023-10-24T12:17:05+00:00", + "comment_author": "opencv-alalek", + "comment_body": "Lets try to apply `thread_local` for `UMat` variables only. No need to touch other tables (there is no problem with them).\r\n\r\nUse dedicated `initializedUMat` for that.", + "pr_file_module": null + }, + { + "comment_id": "1386035956", + "repo_full_name": "opencv/opencv", + "pr_number": 24444, + "pr_file": "modules/imgproc/src/color_hsv.dispatch.cpp", + "discussion_id": "1370067753", + "commented_code": "@@ -274,13 +274,13 @@ bool oclCvtColorBGR2HSV( InputArray _src, OutputArray _dst, int bidx, bool full\n \n if(_src.depth() == CV_8U)\n {\n- static UMat sdiv_data;\n- static UMat hdiv_data180;\n- static UMat hdiv_data256;\n- static int sdiv_table[256];\n- static int hdiv_table180[256];\n- static int hdiv_table256[256];\n- static volatile bool initialized180 = false, initialized256 = false;\n+ static thread_local UMat sdiv_data;\n+ static thread_local UMat hdiv_data180;\n+ static thread_local UMat hdiv_data256;\n+ static thread_local int sdiv_table[256];", + "comment_created_at": "2023-11-08T06:03:58+00:00", + "comment_author": "kallaballa", + "comment_body": "> initializedUMat\r\n\r\nI am not sure what you mean by the last part", + "pr_file_module": null + }, + { + "comment_id": "1387011521", + "repo_full_name": "opencv/opencv", + "pr_number": 24444, + "pr_file": "modules/imgproc/src/color_hsv.dispatch.cpp", + "discussion_id": "1370067753", + "commented_code": "@@ -274,13 +274,13 @@ bool oclCvtColorBGR2HSV( InputArray _src, OutputArray _dst, int bidx, bool full\n \n if(_src.depth() == CV_8U)\n {\n- static UMat sdiv_data;\n- static UMat hdiv_data180;\n- static UMat hdiv_data256;\n- static int sdiv_table[256];\n- static int hdiv_table180[256];\n- static int hdiv_table256[256];\n- static volatile bool initialized180 = false, initialized256 = false;\n+ static thread_local UMat sdiv_data;\n+ static thread_local UMat hdiv_data180;\n+ static thread_local UMat hdiv_data256;\n+ static thread_local int sdiv_table[256];", + "comment_created_at": "2023-11-08T18:00:22+00:00", + "comment_author": "opencv-alalek", + "comment_body": "Split initialization on 2 parts:\r\n- regular CPU buffers (like `int sdiv_table[256];`) - they don't need `thread-local` at all.\r\n- OpenCL related stuff which causes problems and needs thread-local variables (also put it under condition that this data is really needed - CPU-only processing doesn't need that at all).", + "pr_file_module": null + }, + { + "comment_id": "1387203457", + "repo_full_name": "opencv/opencv", + "pr_number": 24444, + "pr_file": "modules/imgproc/src/color_hsv.dispatch.cpp", + "discussion_id": "1370067753", + "commented_code": "@@ -274,13 +274,13 @@ bool oclCvtColorBGR2HSV( InputArray _src, OutputArray _dst, int bidx, bool full\n \n if(_src.depth() == CV_8U)\n {\n- static UMat sdiv_data;\n- static UMat hdiv_data180;\n- static UMat hdiv_data256;\n- static int sdiv_table[256];\n- static int hdiv_table180[256];\n- static int hdiv_table256[256];\n- static volatile bool initialized180 = false, initialized256 = false;\n+ static thread_local UMat sdiv_data;\n+ static thread_local UMat hdiv_data180;\n+ static thread_local UMat hdiv_data256;\n+ static thread_local int sdiv_table[256];", + "comment_created_at": "2023-11-08T21:10:40+00:00", + "comment_author": "vpisarev", + "comment_body": "probably, even better solution would be to merge all 3 tables together (of course, 3 pointers can be initialized to point to different parts of the joint table) and then we can convert it to UMat on-fly:\r\n\r\n```\r\nint rgb2hsv_tab[256*3];\r\nvolatile bool rgb2hsv_initialized = false;\r\nif (!rgb2hsv_initialized) { ... }\r\nif (use_opencl) {\r\nUMat rgb2hsv_utab = Mat(1, 256*3, CV_32S, rgb2hsv_tab).getUMat();\r\n... // run opencl kernel with rgb2hsv_utab as one of the parameters\r\n}\r\n```\r\n\r\nyes, it means extra copy, but compared to the processed data it should be a tiny overhead, and much easier to handle than static thread-local UMat's.", + "pr_file_module": null + }, + { + "comment_id": "1389250064", + "repo_full_name": "opencv/opencv", + "pr_number": 24444, + "pr_file": "modules/imgproc/src/color_hsv.dispatch.cpp", + "discussion_id": "1370067753", + "commented_code": "@@ -274,13 +274,13 @@ bool oclCvtColorBGR2HSV( InputArray _src, OutputArray _dst, int bidx, bool full\n \n if(_src.depth() == CV_8U)\n {\n- static UMat sdiv_data;\n- static UMat hdiv_data180;\n- static UMat hdiv_data256;\n- static int sdiv_table[256];\n- static int hdiv_table180[256];\n- static int hdiv_table256[256];\n- static volatile bool initialized180 = false, initialized256 = false;\n+ static thread_local UMat sdiv_data;\n+ static thread_local UMat hdiv_data180;\n+ static thread_local UMat hdiv_data256;\n+ static thread_local int sdiv_table[256];", + "comment_created_at": "2023-11-10T10:57:32+00:00", + "comment_author": "kallaballa", + "comment_body": "I'm not experienced enough to say the roundtrip-lateny will be more than we'd like, but it think so. But I like the safety and simplicity of your approach, so I guess I'll profile it.", + "pr_file_module": null + }, + { + "comment_id": "1389251556", + "repo_full_name": "opencv/opencv", + "pr_number": 24444, + "pr_file": "modules/imgproc/src/color_hsv.dispatch.cpp", + "discussion_id": "1370067753", + "commented_code": "@@ -274,13 +274,13 @@ bool oclCvtColorBGR2HSV( InputArray _src, OutputArray _dst, int bidx, bool full\n \n if(_src.depth() == CV_8U)\n {\n- static UMat sdiv_data;\n- static UMat hdiv_data180;\n- static UMat hdiv_data256;\n- static int sdiv_table[256];\n- static int hdiv_table180[256];\n- static int hdiv_table256[256];\n- static volatile bool initialized180 = false, initialized256 = false;\n+ static thread_local UMat sdiv_data;\n+ static thread_local UMat hdiv_data180;\n+ static thread_local UMat hdiv_data256;\n+ static thread_local int sdiv_table[256];", + "comment_created_at": "2023-11-10T10:58:59+00:00", + "comment_author": "kallaballa", + "comment_body": "@opencv-alalek @vpisarev ...how to know who of you to follow in which case? ^^", + "pr_file_module": null + }, + { + "comment_id": "1389257797", + "repo_full_name": "opencv/opencv", + "pr_number": 24444, + "pr_file": "modules/imgproc/src/color_hsv.dispatch.cpp", + "discussion_id": "1370067753", + "commented_code": "@@ -274,13 +274,13 @@ bool oclCvtColorBGR2HSV( InputArray _src, OutputArray _dst, int bidx, bool full\n \n if(_src.depth() == CV_8U)\n {\n- static UMat sdiv_data;\n- static UMat hdiv_data180;\n- static UMat hdiv_data256;\n- static int sdiv_table[256];\n- static int hdiv_table180[256];\n- static int hdiv_table256[256];\n- static volatile bool initialized180 = false, initialized256 = false;\n+ static thread_local UMat sdiv_data;\n+ static thread_local UMat hdiv_data180;\n+ static thread_local UMat hdiv_data256;\n+ static thread_local int sdiv_table[256];", + "comment_created_at": "2023-11-10T11:05:25+00:00", + "comment_author": "kallaballa", + "comment_body": "> probably, even better solution would be to merge all 3 tables together (of course, 3 pointers can be initialized to point to different parts of the joint table) and then we can convert it to UMat on-fly:\r\n> \r\n> ```\r\n> int rgb2hsv_tab[256*3];\r\n> volatile bool rgb2hsv_initialized = false;\r\n> if (!rgb2hsv_initialized) { ... }\r\n> if (use_opencl) {\r\n> UMat rgb2hsv_utab = Mat(1, 256*3, CV_32S, rgb2hsv_tab).getUMat();\r\n> ... // run opencl kernel with rgb2hsv_utab as one of the parameters\r\n> }\r\n> ```\r\n> \r\n> yes, it means extra copy, but compared to the processed data it should be a tiny overhead, and much easier to handle than static thread-local UMat's.\r\n\r\nalso... we are in oclCvtColorBGR2HSV which is \"guarded\" by CV_OCL_RUN. so i guess i can drop ```if (use_opencl)```", + "pr_file_module": null + }, + { + "comment_id": "1389291318", + "repo_full_name": "opencv/opencv", + "pr_number": 24444, + "pr_file": "modules/imgproc/src/color_hsv.dispatch.cpp", + "discussion_id": "1370067753", + "commented_code": "@@ -274,13 +274,13 @@ bool oclCvtColorBGR2HSV( InputArray _src, OutputArray _dst, int bidx, bool full\n \n if(_src.depth() == CV_8U)\n {\n- static UMat sdiv_data;\n- static UMat hdiv_data180;\n- static UMat hdiv_data256;\n- static int sdiv_table[256];\n- static int hdiv_table180[256];\n- static int hdiv_table256[256];\n- static volatile bool initialized180 = false, initialized256 = false;\n+ static thread_local UMat sdiv_data;\n+ static thread_local UMat hdiv_data180;\n+ static thread_local UMat hdiv_data256;\n+ static thread_local int sdiv_table[256];", + "comment_created_at": "2023-11-10T11:41:32+00:00", + "comment_author": "kallaballa", + "comment_body": "I neither like unecessary round-trips nor messing with the memory model, so i came up with this:\r\n\r\n\r\n```c++\r\n if(_src.depth() == CV_8U)\r\n {\r\n\t\tstatic std::mutex mtx;\r\n\t\t{\r\n\t\t\tstd::unique_lock lock(mtx);\r\n\t\t\tstatic UMat sdiv_data;\r\n\t\t\tstatic UMat hdiv_data180;\r\n\t\t\tstatic UMat hdiv_data256;\r\n\t\t\tstatic int combined_table[256];\r\n\t\t\tstatic volatile bool initialized180 = false, initialized256 = false;\r\n\t\t\tstatic volatile bool initialized = hrange == 180 ? initialized180 : initialized256;\r\n\r\n\t\t\tif (!initialized)\r\n\t\t\t{\r\n\t\t\t\tint * const hdiv_table = hrange == 180 ? combined_table : &combined_table[128], hsv_shift = 12;\r\n\t\t\t\tUMat & hdiv_data = hrange == 180 ? hdiv_data180 : hdiv_data256;\r\n\r\n\t\t\t\tcombined_table[0] = 0;\r\n\r\n\t\t\t\tint v = 255 << hsv_shift;\r\n\t\t\t\tif (!initialized180 && !initialized256)\r\n\t\t\t\t{\r\n\t\t\t\t\tfor(int i = 1; i < 256; i++ )\r\n\t\t\t\t\t\tcombined_table[i] = saturate_cast(v/(1.*i));\r\n\t\t\t\t\tMat(1, 256, CV_32SC1, combined_table).copyTo(sdiv_data);\r\n\t\t\t\t}\r\n\r\n\t\t\t\tv = hrange << hsv_shift;\r\n\t\t\t\tfor (int i = 1; i < 256; i++ )\r\n\t\t\t\t\thdiv_table[i] = saturate_cast(v/(6.*i));\r\n\r\n\t\t\t\tMat(1, 256, CV_32SC1, hdiv_table).copyTo(hdiv_data);\r\n\t\t\t\tinitialized = true;\r\n\t\t\t}\r\n\r\n\t\t\th.setArg(ocl::KernelArg::PtrReadOnly(sdiv_data));\r\n\t\t\th.setArg(hrange == 256 ? ocl::KernelArg::PtrReadOnly(hdiv_data256) :\r\n\t\t\t\t\t\t\t ocl::KernelArg::PtrReadOnly(hdiv_data180));\r\n\t\t}\r\n\t}\r\n```", + "pr_file_module": null + }, + { + "comment_id": "1389502048", + "repo_full_name": "opencv/opencv", + "pr_number": 24444, + "pr_file": "modules/imgproc/src/color_hsv.dispatch.cpp", + "discussion_id": "1370067753", + "commented_code": "@@ -274,13 +274,13 @@ bool oclCvtColorBGR2HSV( InputArray _src, OutputArray _dst, int bidx, bool full\n \n if(_src.depth() == CV_8U)\n {\n- static UMat sdiv_data;\n- static UMat hdiv_data180;\n- static UMat hdiv_data256;\n- static int sdiv_table[256];\n- static int hdiv_table180[256];\n- static int hdiv_table256[256];\n- static volatile bool initialized180 = false, initialized256 = false;\n+ static thread_local UMat sdiv_data;\n+ static thread_local UMat hdiv_data180;\n+ static thread_local UMat hdiv_data256;\n+ static thread_local int sdiv_table[256];", + "comment_created_at": "2023-11-10T14:53:26+00:00", + "comment_author": "opencv-alalek", + "comment_body": "Proposed code locks mutex on each call. That should be properly avoided.", + "pr_file_module": null + }, + { + "comment_id": "1389863226", + "repo_full_name": "opencv/opencv", + "pr_number": 24444, + "pr_file": "modules/imgproc/src/color_hsv.dispatch.cpp", + "discussion_id": "1370067753", + "commented_code": "@@ -274,13 +274,13 @@ bool oclCvtColorBGR2HSV( InputArray _src, OutputArray _dst, int bidx, bool full\n \n if(_src.depth() == CV_8U)\n {\n- static UMat sdiv_data;\n- static UMat hdiv_data180;\n- static UMat hdiv_data256;\n- static int sdiv_table[256];\n- static int hdiv_table180[256];\n- static int hdiv_table256[256];\n- static volatile bool initialized180 = false, initialized256 = false;\n+ static thread_local UMat sdiv_data;\n+ static thread_local UMat hdiv_data180;\n+ static thread_local UMat hdiv_data256;\n+ static thread_local int sdiv_table[256];", + "comment_created_at": "2023-11-10T20:04:29+00:00", + "comment_author": "vpisarev", + "comment_body": "@kallaballa, let's try the proposed by me approach without use of static UMat's at all.", + "pr_file_module": null + }, + { + "comment_id": "1390087775", + "repo_full_name": "opencv/opencv", + "pr_number": 24444, + "pr_file": "modules/imgproc/src/color_hsv.dispatch.cpp", + "discussion_id": "1370067753", + "commented_code": "@@ -274,13 +274,13 @@ bool oclCvtColorBGR2HSV( InputArray _src, OutputArray _dst, int bidx, bool full\n \n if(_src.depth() == CV_8U)\n {\n- static UMat sdiv_data;\n- static UMat hdiv_data180;\n- static UMat hdiv_data256;\n- static int sdiv_table[256];\n- static int hdiv_table180[256];\n- static int hdiv_table256[256];\n- static volatile bool initialized180 = false, initialized256 = false;\n+ static thread_local UMat sdiv_data;\n+ static thread_local UMat hdiv_data180;\n+ static thread_local UMat hdiv_data256;\n+ static thread_local int sdiv_table[256];", + "comment_created_at": "2023-11-11T02:06:59+00:00", + "comment_author": "kallaballa", + "comment_body": "@opencv-alalek double checked pattern and lock_guard 0f290bc\r\n@vpisarev I'll give it a try and provide numbers", + "pr_file_module": null + }, + { + "comment_id": "1390631190", + "repo_full_name": "opencv/opencv", + "pr_number": 24444, + "pr_file": "modules/imgproc/src/color_hsv.dispatch.cpp", + "discussion_id": "1370067753", + "commented_code": "@@ -274,13 +274,13 @@ bool oclCvtColorBGR2HSV( InputArray _src, OutputArray _dst, int bidx, bool full\n \n if(_src.depth() == CV_8U)\n {\n- static UMat sdiv_data;\n- static UMat hdiv_data180;\n- static UMat hdiv_data256;\n- static int sdiv_table[256];\n- static int hdiv_table180[256];\n- static int hdiv_table256[256];\n- static volatile bool initialized180 = false, initialized256 = false;\n+ static thread_local UMat sdiv_data;\n+ static thread_local UMat hdiv_data180;\n+ static thread_local UMat hdiv_data256;\n+ static thread_local int sdiv_table[256];", + "comment_created_at": "2023-11-13T05:12:33+00:00", + "comment_author": "kallaballa", + "comment_body": "Test failed e.g. on MacOS: OCL_TEST_P(CvtColor8u32f, RGB2HSV_FULL) { performTest(3, 3, CVTCODE(RGB2HSV_FULL), IPP_EPS); }\r\n\r\nI don't have a MacOS setup, but it stands to reason that it is a concurrency issue. So i checked on the memory model guarantees of ```volatile``` and there are none. So I guessed that is the problem. Let's see the test.\r\n\r\n@vpisarev implementation without static still pending. want both version working.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1400815714", + "pr_number": 24444, + "pr_file": "modules/imgproc/src/color_hsv.dispatch.cpp", + "created_at": "2023-11-21T15:55:50+00:00", + "commented_code": "if(_src.depth() == CV_8U)\n {\n static UMat sdiv_data;\n static UMat hdiv_data180;\n static UMat hdiv_data256;\n static int sdiv_table[256];\n static int hdiv_table180[256];\n static int hdiv_table256[256];\n static volatile bool initialized180 = false, initialized256 = false;\n volatile bool & initialized = hrange == 180 ? initialized180 : initialized256;\n static std::mutex mtx;", + "repo_full_name": "opencv/opencv", + "discussion_comments": [ + { + "comment_id": "1400815714", + "repo_full_name": "opencv/opencv", + "pr_number": 24444, + "pr_file": "modules/imgproc/src/color_hsv.dispatch.cpp", + "discussion_id": "1400815714", + "commented_code": "@@ -274,38 +275,45 @@ bool oclCvtColorBGR2HSV( InputArray _src, OutputArray _dst, int bidx, bool full\n \n if(_src.depth() == CV_8U)\n {\n- static UMat sdiv_data;\n- static UMat hdiv_data180;\n- static UMat hdiv_data256;\n- static int sdiv_table[256];\n- static int hdiv_table180[256];\n- static int hdiv_table256[256];\n- static volatile bool initialized180 = false, initialized256 = false;\n- volatile bool & initialized = hrange == 180 ? initialized180 : initialized256;\n+ static std::mutex mtx;", + "comment_created_at": "2023-11-21T15:55:50+00:00", + "comment_author": "opencv-alalek", + "comment_body": "This breaks bare metal build configurations without any threading support (see `OPENCV_DISABLE_THREAD_SUPPORT`)\r\n\r\nUse OpenCV wrappers instead.\r\n\r\nReuse `cv::getInitializationMutex()` instead of creation of new one.", + "pr_file_module": null + }, + { + "comment_id": "1402470564", + "repo_full_name": "opencv/opencv", + "pr_number": 24444, + "pr_file": "modules/imgproc/src/color_hsv.dispatch.cpp", + "discussion_id": "1400815714", + "commented_code": "@@ -274,38 +275,45 @@ bool oclCvtColorBGR2HSV( InputArray _src, OutputArray _dst, int bidx, bool full\n \n if(_src.depth() == CV_8U)\n {\n- static UMat sdiv_data;\n- static UMat hdiv_data180;\n- static UMat hdiv_data256;\n- static int sdiv_table[256];\n- static int hdiv_table180[256];\n- static int hdiv_table256[256];\n- static volatile bool initialized180 = false, initialized256 = false;\n- volatile bool & initialized = hrange == 180 ? initialized180 : initialized256;\n+ static std::mutex mtx;", + "comment_created_at": "2023-11-22T17:44:11+00:00", + "comment_author": "kallaballa", + "comment_body": "Oh i understand. thx for the info. is there a list of features i should usually test for? i started to test for WITH_OPENCL=OFF and i will keep in mind that there are targets without thread support... are there more configurations i should keep in mind?", + "pr_file_module": null + }, + { + "comment_id": "1404710045", + "repo_full_name": "opencv/opencv", + "pr_number": 24444, + "pr_file": "modules/imgproc/src/color_hsv.dispatch.cpp", + "discussion_id": "1400815714", + "commented_code": "@@ -274,38 +275,45 @@ bool oclCvtColorBGR2HSV( InputArray _src, OutputArray _dst, int bidx, bool full\n \n if(_src.depth() == CV_8U)\n {\n- static UMat sdiv_data;\n- static UMat hdiv_data180;\n- static UMat hdiv_data256;\n- static int sdiv_table[256];\n- static int hdiv_table180[256];\n- static int hdiv_table256[256];\n- static volatile bool initialized180 = false, initialized256 = false;\n- volatile bool & initialized = hrange == 180 ? initialized180 : initialized256;\n+ static std::mutex mtx;", + "comment_created_at": "2023-11-25T03:20:45+00:00", + "comment_author": "kallaballa", + "comment_body": "Done 031abea. Tested locally by running opencv_test_improc with following build configuration. Btw. i found ```cv::utils::lock_guard``` but it doesn't seem necessary since there is C++11 support.\r\n\r\n```shell\r\ncmake -DINSTALL_BIN_EXAMPLES=OFF -DBUILD_SHARED_LIBS=ON -DWITH_OPENCL=ON -DBUILD_opencv_java=OFF -DBUILD_opencv_js=OFF -DBUILD_opencv_python2=OFF -DBUILD_opencv_python3=OFF -DBUILD_EXAMPLES=OFF -DBUILD_PACKAGE=OFF -DBUILD_TESTS=ON -DBUILD_PERF_TESTS=ON -DBUILD_DOCS=OFF -DBUILD_opencv_videoio=ON -DBUILD_opencv_highgui=ON -DBUILD_opencv_ts=ON -DBUILD_opencv_imgcodecs=ON -DBUILD_opencv_plot=OFF -DBUILD_opencv_tracking=OFF -DBUILD_opencv_video=OFF -DBUILD_opencv_world=ON -DCMAKE_C_FLAGS=-DOPENCV_DISABLE_THREAD_SUPPORT -DCMAKE_CXX_FLAGS=-DOPENCV_DISABLE_THREAD_SUPPORT ../opencv\r\n```", + "pr_file_module": null + }, + { + "comment_id": "1411079146", + "repo_full_name": "opencv/opencv", + "pr_number": 24444, + "pr_file": "modules/imgproc/src/color_hsv.dispatch.cpp", + "discussion_id": "1400815714", + "commented_code": "@@ -274,38 +275,45 @@ bool oclCvtColorBGR2HSV( InputArray _src, OutputArray _dst, int bidx, bool full\n \n if(_src.depth() == CV_8U)\n {\n- static UMat sdiv_data;\n- static UMat hdiv_data180;\n- static UMat hdiv_data256;\n- static int sdiv_table[256];\n- static int hdiv_table180[256];\n- static int hdiv_table256[256];\n- static volatile bool initialized180 = false, initialized256 = false;\n- volatile bool & initialized = hrange == 180 ? initialized180 : initialized256;\n+ static std::mutex mtx;", + "comment_created_at": "2023-11-30T18:01:26+00:00", + "comment_author": "opencv-alalek", + "comment_body": "`std::lock_guard` => `cv::AutoLock`\r\n\r\nDid you measure performance changes due to added of `.copyTo()` calls?", + "pr_file_module": null + }, + { + "comment_id": "1413029712", + "repo_full_name": "opencv/opencv", + "pr_number": 24444, + "pr_file": "modules/imgproc/src/color_hsv.dispatch.cpp", + "discussion_id": "1400815714", + "commented_code": "@@ -274,38 +275,45 @@ bool oclCvtColorBGR2HSV( InputArray _src, OutputArray _dst, int bidx, bool full\n \n if(_src.depth() == CV_8U)\n {\n- static UMat sdiv_data;\n- static UMat hdiv_data180;\n- static UMat hdiv_data256;\n- static int sdiv_table[256];\n- static int hdiv_table180[256];\n- static int hdiv_table256[256];\n- static volatile bool initialized180 = false, initialized256 = false;\n- volatile bool & initialized = hrange == 180 ? initialized180 : initialized256;\n+ static std::mutex mtx;", + "comment_created_at": "2023-12-03T08:58:44+00:00", + "comment_author": "kallaballa", + "comment_body": "done: [8cbf9c7](https://github.com/opencv/opencv/pull/24444/commits/8cbf9c70e398cdf8357564ef0e5ab8df7f5d0eef)", + "pr_file_module": null + }, + { + "comment_id": "1413031013", + "repo_full_name": "opencv/opencv", + "pr_number": 24444, + "pr_file": "modules/imgproc/src/color_hsv.dispatch.cpp", + "discussion_id": "1400815714", + "commented_code": "@@ -274,38 +275,45 @@ bool oclCvtColorBGR2HSV( InputArray _src, OutputArray _dst, int bidx, bool full\n \n if(_src.depth() == CV_8U)\n {\n- static UMat sdiv_data;\n- static UMat hdiv_data180;\n- static UMat hdiv_data256;\n- static int sdiv_table[256];\n- static int hdiv_table180[256];\n- static int hdiv_table256[256];\n- static volatile bool initialized180 = false, initialized256 = false;\n- volatile bool & initialized = hrange == 180 ? initialized180 : initialized256;\n+ static std::mutex mtx;", + "comment_created_at": "2023-12-03T09:07:06+00:00", + "comment_author": "kallaballa", + "comment_body": "> `std::lock_guard` => `cv::AutoLock`\r\n> \r\n> Did you measure performance changes due to added of `.copyTo()` calls?\r\n\r\nI compared the current HEAD of 4.x with the branch of the PR (which i updated to the least 4.x HEAD) .\r\nI used follwing build configuration for both branches:\r\n```bash\r\ncmake -DINSTALL_BIN_EXAMPLES=OFF -DBUILD_SHARED_LIBS=ON -DWITH_OPENCL=ON -DBUILD_opencv_java=OFF -DBUILD_opencv_js=OFF -DBUILD_opencv_python2=OFF -DBUILD_opencv_python3=OFF -DBUILD_EXAMPLES=OFF -DBUILD_PACKAGE=OFF -DBUILD_TESTS=ON -DBUILD_PERF_TESTS=ON -DBUILD_DOCS=OFF -DBUILD_opencv_videoio=ON -DBUILD_opencv_highgui=ON -DBUILD_opencv_ts=ON -DBUILD_opencv_imgcodecs=ON -DBUILD_opencv_plot=OFF -DBUILD_opencv_tracking=OFF -DBUILD_opencv_video=OFF -DBUILD_opencv_world=ON -DENABLE_PROFILING=ON ../opencv\r\n```\r\n\r\nAnd the following test program: https://github.com/kallaballa/OpenCV-Issues/blob/main/src/oclCvtColorBGR2HSV-race/perf-test.cpp\r\n\r\nWithout patch it takes: ~14.8s\r\nWith patch it takes: ~15.2s\r\n\r\nSo there is a slow-down. I'll optimize a bit and measure again.", + "pr_file_module": null + }, + { + "comment_id": "1413036376", + "repo_full_name": "opencv/opencv", + "pr_number": 24444, + "pr_file": "modules/imgproc/src/color_hsv.dispatch.cpp", + "discussion_id": "1400815714", + "commented_code": "@@ -274,38 +275,45 @@ bool oclCvtColorBGR2HSV( InputArray _src, OutputArray _dst, int bidx, bool full\n \n if(_src.depth() == CV_8U)\n {\n- static UMat sdiv_data;\n- static UMat hdiv_data180;\n- static UMat hdiv_data256;\n- static int sdiv_table[256];\n- static int hdiv_table180[256];\n- static int hdiv_table256[256];\n- static volatile bool initialized180 = false, initialized256 = false;\n- volatile bool & initialized = hrange == 180 ? initialized180 : initialized256;\n+ static std::mutex mtx;", + "comment_created_at": "2023-12-03T09:45:08+00:00", + "comment_author": "kallaballa", + "comment_body": "When profiling i had suspicious results and so i increased the iterations by 10x.\r\nThis gave me \r\n* 154.483s for patched without copy.\r\n* 156.108s for patch with copy.\r\n* 153.803s without patch.\r\n\r\nI think the differences are still quite influenced by variance. anyway it is not much. should i continue?", + "pr_file_module": null + }, + { + "comment_id": "1413038481", + "repo_full_name": "opencv/opencv", + "pr_number": 24444, + "pr_file": "modules/imgproc/src/color_hsv.dispatch.cpp", + "discussion_id": "1400815714", + "commented_code": "@@ -274,38 +275,45 @@ bool oclCvtColorBGR2HSV( InputArray _src, OutputArray _dst, int bidx, bool full\n \n if(_src.depth() == CV_8U)\n {\n- static UMat sdiv_data;\n- static UMat hdiv_data180;\n- static UMat hdiv_data256;\n- static int sdiv_table[256];\n- static int hdiv_table180[256];\n- static int hdiv_table256[256];\n- static volatile bool initialized180 = false, initialized256 = false;\n- volatile bool & initialized = hrange == 180 ? initialized180 : initialized256;\n+ static std::mutex mtx;", + "comment_created_at": "2023-12-03T09:58:45+00:00", + "comment_author": "kallaballa", + "comment_body": "I meassured also the variant no-static/no-thread_local/no-locking: 155.512s", + "pr_file_module": null + }, + { + "comment_id": "1426663572", + "repo_full_name": "opencv/opencv", + "pr_number": 24444, + "pr_file": "modules/imgproc/src/color_hsv.dispatch.cpp", + "discussion_id": "1400815714", + "commented_code": "@@ -274,38 +275,45 @@ bool oclCvtColorBGR2HSV( InputArray _src, OutputArray _dst, int bidx, bool full\n \n if(_src.depth() == CV_8U)\n {\n- static UMat sdiv_data;\n- static UMat hdiv_data180;\n- static UMat hdiv_data256;\n- static int sdiv_table[256];\n- static int hdiv_table180[256];\n- static int hdiv_table256[256];\n- static volatile bool initialized180 = false, initialized256 = false;\n- volatile bool & initialized = hrange == 180 ? initialized180 : initialized256;\n+ static std::mutex mtx;", + "comment_created_at": "2023-12-14T12:35:23+00:00", + "comment_author": "opencv-alalek", + "comment_body": "Please always reuse OpenCV performance tests or add them if something is missing.\r\n\r\nAvoid using of hand-written tests cases\r\n\r\n> suspicious results\r\n\r\nProvided measurement code has bug due to async execution of `cvtColor(UMat)` (code should schedule OpenCL kernels, but do not wait the final result).\r\n\r\nP.S. Mat to UMat coping is synchronous call in current API - forces wait of OpenCL execution queue.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2024125007", + "pr_number": 27182, + "pr_file": "3rdparty/fastcv/src/fastcv_hal_imgproc.cpp", + "created_at": "2025-04-02T06:03:40+00:00", + "commented_code": "CV_HAL_RETURN_NOT_IMPLEMENTED(\"ROI not supported\");\n }\n\n if(src_depth == CV_32F && normalize != 1)\n CV_HAL_RETURN_NOT_IMPLEMENTED(\"normalized kernel supported for float types\");\n\n if(src_depth == CV_8U && (ksize_width != 3 && ksize_width != 5))\n CV_HAL_RETURN_NOT_IMPLEMENTED(\"kernel size not supported\");\n\n INITIALIZATION_CHECK;\n\n fcvBorderType bdr;\n uint8_t bdrVal = 0;\n switch(border_type)\n {\n case cv::BORDER_REPLICATE:\n bdr = FASTCV_BORDER_REPLICATE;\n break;\n case cv::BORDER_REFLECT:\n bdr = FASTCV_BORDER_REFLECT;\n break;\n case cv::BORDER_REFLECT101: // cv::BORDER_REFLECT_101, BORDER_DEFAULT\n bdr = FASTCV_BORDER_REFLECT_V2;\n break;\n default:\n CV_HAL_RETURN_NOT_IMPLEMENTED(\"border type not supported\");\n }\n cv::Mat dst_temp;\n bool inPlace = src_data == dst_data ? true : false ;\n\n fcvStatus status = FASTCV_SUCCESS;\n if(ksize_width == 3)\n int nThreads = cv::getNumThreads();\n\n cv::Mat src = cv::Mat(height, width, src_depth, (void*)src_data, src_step);\n\n if(inPlace)\n dst_temp = cv::Mat(height, width, src_depth);\n else\n dst_temp = cv::Mat(height, width, src_depth, (void*)dst_data, dst_step);\n\n int nStripes, stripeHeight = nThreads * 10;", + "repo_full_name": "opencv/opencv", + "discussion_comments": [ + { + "comment_id": "2024125007", + "repo_full_name": "opencv/opencv", + "pr_number": 27182, + "pr_file": "3rdparty/fastcv/src/fastcv_hal_imgproc.cpp", + "discussion_id": "2024125007", + "commented_code": "@@ -363,37 +417,46 @@ int fastcv_hal_boxFilter(\n CV_HAL_RETURN_NOT_IMPLEMENTED(\"ROI not supported\");\n }\n \n+ if(src_depth == CV_32F && normalize != 1)\n+ CV_HAL_RETURN_NOT_IMPLEMENTED(\"normalized kernel supported for float types\");\n+\n+ if(src_depth == CV_8U && (ksize_width != 3 && ksize_width != 5))\n+ CV_HAL_RETURN_NOT_IMPLEMENTED(\"kernel size not supported\");\n+\n INITIALIZATION_CHECK;\n \n- fcvBorderType bdr;\n- uint8_t bdrVal = 0;\n- switch(border_type)\n- {\n- case cv::BORDER_REPLICATE:\n- bdr = FASTCV_BORDER_REPLICATE;\n- break;\n- case cv::BORDER_REFLECT:\n- bdr = FASTCV_BORDER_REFLECT;\n- break;\n- case cv::BORDER_REFLECT101: // cv::BORDER_REFLECT_101, BORDER_DEFAULT\n- bdr = FASTCV_BORDER_REFLECT_V2;\n- break;\n- default:\n- CV_HAL_RETURN_NOT_IMPLEMENTED(\"border type not supported\");\n- }\n+ cv::Mat dst_temp;\n+ bool inPlace = src_data == dst_data ? true : false ;\n \n- fcvStatus status = FASTCV_SUCCESS;\n- if(ksize_width == 3)\n+ int nThreads = cv::getNumThreads();\n+\n+ cv::Mat src = cv::Mat(height, width, src_depth, (void*)src_data, src_step);\n+\n+ if(inPlace)\n+ dst_temp = cv::Mat(height, width, src_depth);\n+ else\n+ dst_temp = cv::Mat(height, width, src_depth, (void*)dst_data, dst_step);\n+\n+ int nStripes, stripeHeight = nThreads * 10;", + "comment_created_at": "2025-04-02T06:03:40+00:00", + "comment_author": "asmorkalov", + "comment_body": "`stripeHeight = nThreads * 10;` sounds strange. More threads -> larger piece for each thread.", + "pr_file_module": null + }, + { + "comment_id": "2024155437", + "repo_full_name": "opencv/opencv", + "pr_number": 27182, + "pr_file": "3rdparty/fastcv/src/fastcv_hal_imgproc.cpp", + "discussion_id": "2024125007", + "commented_code": "@@ -363,37 +417,46 @@ int fastcv_hal_boxFilter(\n CV_HAL_RETURN_NOT_IMPLEMENTED(\"ROI not supported\");\n }\n \n+ if(src_depth == CV_32F && normalize != 1)\n+ CV_HAL_RETURN_NOT_IMPLEMENTED(\"normalized kernel supported for float types\");\n+\n+ if(src_depth == CV_8U && (ksize_width != 3 && ksize_width != 5))\n+ CV_HAL_RETURN_NOT_IMPLEMENTED(\"kernel size not supported\");\n+\n INITIALIZATION_CHECK;\n \n- fcvBorderType bdr;\n- uint8_t bdrVal = 0;\n- switch(border_type)\n- {\n- case cv::BORDER_REPLICATE:\n- bdr = FASTCV_BORDER_REPLICATE;\n- break;\n- case cv::BORDER_REFLECT:\n- bdr = FASTCV_BORDER_REFLECT;\n- break;\n- case cv::BORDER_REFLECT101: // cv::BORDER_REFLECT_101, BORDER_DEFAULT\n- bdr = FASTCV_BORDER_REFLECT_V2;\n- break;\n- default:\n- CV_HAL_RETURN_NOT_IMPLEMENTED(\"border type not supported\");\n- }\n+ cv::Mat dst_temp;\n+ bool inPlace = src_data == dst_data ? true : false ;\n \n- fcvStatus status = FASTCV_SUCCESS;\n- if(ksize_width == 3)\n+ int nThreads = cv::getNumThreads();\n+\n+ cv::Mat src = cv::Mat(height, width, src_depth, (void*)src_data, src_step);\n+\n+ if(inPlace)\n+ dst_temp = cv::Mat(height, width, src_depth);\n+ else\n+ dst_temp = cv::Mat(height, width, src_depth, (void*)dst_data, dst_step);\n+\n+ int nStripes, stripeHeight = nThreads * 10;", + "comment_created_at": "2025-04-02T06:33:02+00:00", + "comment_author": "adsha-quic", + "comment_body": "Hi Alex\r\n\r\nWe tested this and found it having good speed, since usually threads are 8 as per our experiments. Please guide what optimizations we can do here.", + "pr_file_module": null + }, + { + "comment_id": "2024172942", + "repo_full_name": "opencv/opencv", + "pr_number": 27182, + "pr_file": "3rdparty/fastcv/src/fastcv_hal_imgproc.cpp", + "discussion_id": "2024125007", + "commented_code": "@@ -363,37 +417,46 @@ int fastcv_hal_boxFilter(\n CV_HAL_RETURN_NOT_IMPLEMENTED(\"ROI not supported\");\n }\n \n+ if(src_depth == CV_32F && normalize != 1)\n+ CV_HAL_RETURN_NOT_IMPLEMENTED(\"normalized kernel supported for float types\");\n+\n+ if(src_depth == CV_8U && (ksize_width != 3 && ksize_width != 5))\n+ CV_HAL_RETURN_NOT_IMPLEMENTED(\"kernel size not supported\");\n+\n INITIALIZATION_CHECK;\n \n- fcvBorderType bdr;\n- uint8_t bdrVal = 0;\n- switch(border_type)\n- {\n- case cv::BORDER_REPLICATE:\n- bdr = FASTCV_BORDER_REPLICATE;\n- break;\n- case cv::BORDER_REFLECT:\n- bdr = FASTCV_BORDER_REFLECT;\n- break;\n- case cv::BORDER_REFLECT101: // cv::BORDER_REFLECT_101, BORDER_DEFAULT\n- bdr = FASTCV_BORDER_REFLECT_V2;\n- break;\n- default:\n- CV_HAL_RETURN_NOT_IMPLEMENTED(\"border type not supported\");\n- }\n+ cv::Mat dst_temp;\n+ bool inPlace = src_data == dst_data ? true : false ;\n \n- fcvStatus status = FASTCV_SUCCESS;\n- if(ksize_width == 3)\n+ int nThreads = cv::getNumThreads();\n+\n+ cv::Mat src = cv::Mat(height, width, src_depth, (void*)src_data, src_step);\n+\n+ if(inPlace)\n+ dst_temp = cv::Mat(height, width, src_depth);\n+ else\n+ dst_temp = cv::Mat(height, width, src_depth, (void*)dst_data, dst_step);\n+\n+ int nStripes, stripeHeight = nThreads * 10;", + "comment_created_at": "2025-04-02T06:48:10+00:00", + "comment_author": "asmorkalov", + "comment_body": "threads are 8. It's not true:\r\n- Android build uses 2 threads by default. It's done to prevent overheating, but may be changed in future.\r\n- Linux builds use all available cores.\r\n- parallel_for_ serializes nested parallel_for_ calls. So you can easily get 1 here.", + "pr_file_module": null + }, + { + "comment_id": "2024174425", + "repo_full_name": "opencv/opencv", + "pr_number": 27182, + "pr_file": "3rdparty/fastcv/src/fastcv_hal_imgproc.cpp", + "discussion_id": "2024125007", + "commented_code": "@@ -363,37 +417,46 @@ int fastcv_hal_boxFilter(\n CV_HAL_RETURN_NOT_IMPLEMENTED(\"ROI not supported\");\n }\n \n+ if(src_depth == CV_32F && normalize != 1)\n+ CV_HAL_RETURN_NOT_IMPLEMENTED(\"normalized kernel supported for float types\");\n+\n+ if(src_depth == CV_8U && (ksize_width != 3 && ksize_width != 5))\n+ CV_HAL_RETURN_NOT_IMPLEMENTED(\"kernel size not supported\");\n+\n INITIALIZATION_CHECK;\n \n- fcvBorderType bdr;\n- uint8_t bdrVal = 0;\n- switch(border_type)\n- {\n- case cv::BORDER_REPLICATE:\n- bdr = FASTCV_BORDER_REPLICATE;\n- break;\n- case cv::BORDER_REFLECT:\n- bdr = FASTCV_BORDER_REFLECT;\n- break;\n- case cv::BORDER_REFLECT101: // cv::BORDER_REFLECT_101, BORDER_DEFAULT\n- bdr = FASTCV_BORDER_REFLECT_V2;\n- break;\n- default:\n- CV_HAL_RETURN_NOT_IMPLEMENTED(\"border type not supported\");\n- }\n+ cv::Mat dst_temp;\n+ bool inPlace = src_data == dst_data ? true : false ;\n \n- fcvStatus status = FASTCV_SUCCESS;\n- if(ksize_width == 3)\n+ int nThreads = cv::getNumThreads();\n+\n+ cv::Mat src = cv::Mat(height, width, src_depth, (void*)src_data, src_step);\n+\n+ if(inPlace)\n+ dst_temp = cv::Mat(height, width, src_depth);\n+ else\n+ dst_temp = cv::Mat(height, width, src_depth, (void*)dst_data, dst_step);\n+\n+ int nStripes, stripeHeight = nThreads * 10;", + "comment_created_at": "2025-04-02T06:49:11+00:00", + "comment_author": "asmorkalov", + "comment_body": "We usually set granularity to some reasonable size for single thread. OpenCV uses dynamic scheduling, so all other steps are done automatically.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1820133472", + "pr_number": 26379, + "pr_file": "modules/imgcodecs/src/grfmt_jpegxl.cpp", + "created_at": "2024-10-29T05:56:38+00:00", + "commented_code": "/*M///////////////////////////////////////////////////////////////////////////////////////\n//\n// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.\n//\n// By downloading, copying, installing or using the software you agree to this license.\n// If you do not agree to this license, do not download, install,\n// copy or use the software.\n//\n//\n// Intel License Agreement\n// For Open Source Computer Vision Library\n//\n// Copyright (C) 2000, Intel Corporation, all rights reserved.\n// Third party copyrights are property of their respective owners.\n//\n// Redistribution and use in source and binary forms, with or without modification,\n// are permitted provided that the following conditions are met:\n//\n// * Redistribution's of source code must retain the above copyright notice,\n// this list of conditions and the following disclaimer.\n//\n// * Redistribution's in binary form must reproduce the above copyright notice,\n// this list of conditions and the following disclaimer in the documentation\n// and/or other materials provided with the distribution.\n//\n// * The name of Intel Corporation may not be used to endorse or promote products\n// derived from this software without specific prior written permission.\n//\n// This software is provided by the copyright holders and contributors \"as is\" and\n// any express or implied warranties, including, but not limited to, the implied\n// warranties of merchantability and fitness for a particular purpose are disclaimed.\n// In no event shall the Intel Corporation or contributors be liable for any direct,\n// indirect, incidental, special, exemplary, or consequential damages\n// (including, but not limited to, procurement of substitute goods or services;\n// loss of use, data, or profits; or business interruption) however caused\n// and on any theory of liability, whether in contract, strict liability,\n// or tort (including negligence or otherwise) arising in any way out of\n// the use of this software, even if advised of the possibility of such damage.\n//\n//M*/\n\n#include \"precomp.hpp\"\n#include \"grfmt_jpegxl.hpp\"\n\n#ifdef HAVE_JPEGXL\n\n#include \n\nnamespace cv\n{\n\n/////////////////////// JpegXLDecoder ///////////////////\n\nJpegXLDecoder::JpegXLDecoder() : m_f(nullptr, fclose)\n{\n m_signature = \"\\xFF\\x0A\";\n m_decoder = nullptr;\n m_buf_supported = true;\n m_type = -1;\n}\n\nJpegXLDecoder::~JpegXLDecoder()\n{\n close();\n}\n\nvoid JpegXLDecoder::close()\n{\n if (m_decoder)\n m_decoder.release();\n if (m_f)\n m_f.release();\n m_read_buffer = {};\n m_width = m_height = 0;\n m_type = -1;\n}\n\nImageDecoder JpegXLDecoder::newDecoder() const\n{\n return makePtr();\n}\n\nbool JpegXLDecoder::read(Mat* pimg)\n{\n // Open file\n if (!m_f) {\n m_f.reset(fopen(m_filename.c_str(), \"rb\"));\n if (!m_f)\n return false;\n }\n\n // Initialize decoder\n if (!m_decoder) {\n m_decoder = JxlDecoderMake(nullptr);\n if (!m_decoder)\n return false;\n // Subscribe to the basic info event\n JxlDecoderStatus status = JxlDecoderSubscribeEvents(m_decoder.get(), JXL_DEC_BASIC_INFO | JXL_DEC_FULL_IMAGE);\n if (status != JXL_DEC_SUCCESS)\n return false;\n }\n\n // Set up parallel m_parallel_runner\n if (!m_parallel_runner) {\n m_parallel_runner = JxlThreadParallelRunnerMake(nullptr, JxlThreadParallelRunnerDefaultNumWorkerThreads());", + "repo_full_name": "opencv/opencv", + "discussion_comments": [ + { + "comment_id": "1820133472", + "repo_full_name": "opencv/opencv", + "pr_number": 26379, + "pr_file": "modules/imgcodecs/src/grfmt_jpegxl.cpp", + "discussion_id": "1820133472", + "commented_code": "@@ -0,0 +1,376 @@\n+/*M///////////////////////////////////////////////////////////////////////////////////////\n+//\n+// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.\n+//\n+// By downloading, copying, installing or using the software you agree to this license.\n+// If you do not agree to this license, do not download, install,\n+// copy or use the software.\n+//\n+//\n+// Intel License Agreement\n+// For Open Source Computer Vision Library\n+//\n+// Copyright (C) 2000, Intel Corporation, all rights reserved.\n+// Third party copyrights are property of their respective owners.\n+//\n+// Redistribution and use in source and binary forms, with or without modification,\n+// are permitted provided that the following conditions are met:\n+//\n+// * Redistribution's of source code must retain the above copyright notice,\n+// this list of conditions and the following disclaimer.\n+//\n+// * Redistribution's in binary form must reproduce the above copyright notice,\n+// this list of conditions and the following disclaimer in the documentation\n+// and/or other materials provided with the distribution.\n+//\n+// * The name of Intel Corporation may not be used to endorse or promote products\n+// derived from this software without specific prior written permission.\n+//\n+// This software is provided by the copyright holders and contributors \"as is\" and\n+// any express or implied warranties, including, but not limited to, the implied\n+// warranties of merchantability and fitness for a particular purpose are disclaimed.\n+// In no event shall the Intel Corporation or contributors be liable for any direct,\n+// indirect, incidental, special, exemplary, or consequential damages\n+// (including, but not limited to, procurement of substitute goods or services;\n+// loss of use, data, or profits; or business interruption) however caused\n+// and on any theory of liability, whether in contract, strict liability,\n+// or tort (including negligence or otherwise) arising in any way out of\n+// the use of this software, even if advised of the possibility of such damage.\n+//\n+//M*/\n+\n+#include \"precomp.hpp\"\n+#include \"grfmt_jpegxl.hpp\"\n+\n+#ifdef HAVE_JPEGXL\n+\n+#include \n+\n+namespace cv\n+{\n+\n+/////////////////////// JpegXLDecoder ///////////////////\n+\n+JpegXLDecoder::JpegXLDecoder() : m_f(nullptr, fclose)\n+{\n+ m_signature = \"\\xFF\\x0A\";\n+ m_decoder = nullptr;\n+ m_buf_supported = true;\n+ m_type = -1;\n+}\n+\n+JpegXLDecoder::~JpegXLDecoder()\n+{\n+ close();\n+}\n+\n+void JpegXLDecoder::close()\n+{\n+ if (m_decoder)\n+ m_decoder.release();\n+ if (m_f)\n+ m_f.release();\n+ m_read_buffer = {};\n+ m_width = m_height = 0;\n+ m_type = -1;\n+}\n+\n+ImageDecoder JpegXLDecoder::newDecoder() const\n+{\n+ return makePtr();\n+}\n+\n+bool JpegXLDecoder::read(Mat* pimg)\n+{\n+ // Open file\n+ if (!m_f) {\n+ m_f.reset(fopen(m_filename.c_str(), \"rb\"));\n+ if (!m_f)\n+ return false;\n+ }\n+\n+ // Initialize decoder\n+ if (!m_decoder) {\n+ m_decoder = JxlDecoderMake(nullptr);\n+ if (!m_decoder)\n+ return false;\n+ // Subscribe to the basic info event\n+ JxlDecoderStatus status = JxlDecoderSubscribeEvents(m_decoder.get(), JXL_DEC_BASIC_INFO | JXL_DEC_FULL_IMAGE);\n+ if (status != JXL_DEC_SUCCESS)\n+ return false;\n+ }\n+\n+ // Set up parallel m_parallel_runner\n+ if (!m_parallel_runner) {\n+ m_parallel_runner = JxlThreadParallelRunnerMake(nullptr, JxlThreadParallelRunnerDefaultNumWorkerThreads());", + "comment_created_at": "2024-10-29T05:56:38+00:00", + "comment_author": "asmorkalov", + "comment_body": "I propose to use `num_worker_threads = cv::getNumThreads()` to make it manageable outside. See https://docs.opencv.org/4.x/db/de0/group__core__utils.html#ga2db334ec41d98da3129ef4a2342fc4d4 ", + "pr_file_module": null + }, + { + "comment_id": "1820588823", + "repo_full_name": "opencv/opencv", + "pr_number": 26379, + "pr_file": "modules/imgcodecs/src/grfmt_jpegxl.cpp", + "discussion_id": "1820133472", + "commented_code": "@@ -0,0 +1,376 @@\n+/*M///////////////////////////////////////////////////////////////////////////////////////\n+//\n+// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.\n+//\n+// By downloading, copying, installing or using the software you agree to this license.\n+// If you do not agree to this license, do not download, install,\n+// copy or use the software.\n+//\n+//\n+// Intel License Agreement\n+// For Open Source Computer Vision Library\n+//\n+// Copyright (C) 2000, Intel Corporation, all rights reserved.\n+// Third party copyrights are property of their respective owners.\n+//\n+// Redistribution and use in source and binary forms, with or without modification,\n+// are permitted provided that the following conditions are met:\n+//\n+// * Redistribution's of source code must retain the above copyright notice,\n+// this list of conditions and the following disclaimer.\n+//\n+// * Redistribution's in binary form must reproduce the above copyright notice,\n+// this list of conditions and the following disclaimer in the documentation\n+// and/or other materials provided with the distribution.\n+//\n+// * The name of Intel Corporation may not be used to endorse or promote products\n+// derived from this software without specific prior written permission.\n+//\n+// This software is provided by the copyright holders and contributors \"as is\" and\n+// any express or implied warranties, including, but not limited to, the implied\n+// warranties of merchantability and fitness for a particular purpose are disclaimed.\n+// In no event shall the Intel Corporation or contributors be liable for any direct,\n+// indirect, incidental, special, exemplary, or consequential damages\n+// (including, but not limited to, procurement of substitute goods or services;\n+// loss of use, data, or profits; or business interruption) however caused\n+// and on any theory of liability, whether in contract, strict liability,\n+// or tort (including negligence or otherwise) arising in any way out of\n+// the use of this software, even if advised of the possibility of such damage.\n+//\n+//M*/\n+\n+#include \"precomp.hpp\"\n+#include \"grfmt_jpegxl.hpp\"\n+\n+#ifdef HAVE_JPEGXL\n+\n+#include \n+\n+namespace cv\n+{\n+\n+/////////////////////// JpegXLDecoder ///////////////////\n+\n+JpegXLDecoder::JpegXLDecoder() : m_f(nullptr, fclose)\n+{\n+ m_signature = \"\\xFF\\x0A\";\n+ m_decoder = nullptr;\n+ m_buf_supported = true;\n+ m_type = -1;\n+}\n+\n+JpegXLDecoder::~JpegXLDecoder()\n+{\n+ close();\n+}\n+\n+void JpegXLDecoder::close()\n+{\n+ if (m_decoder)\n+ m_decoder.release();\n+ if (m_f)\n+ m_f.release();\n+ m_read_buffer = {};\n+ m_width = m_height = 0;\n+ m_type = -1;\n+}\n+\n+ImageDecoder JpegXLDecoder::newDecoder() const\n+{\n+ return makePtr();\n+}\n+\n+bool JpegXLDecoder::read(Mat* pimg)\n+{\n+ // Open file\n+ if (!m_f) {\n+ m_f.reset(fopen(m_filename.c_str(), \"rb\"));\n+ if (!m_f)\n+ return false;\n+ }\n+\n+ // Initialize decoder\n+ if (!m_decoder) {\n+ m_decoder = JxlDecoderMake(nullptr);\n+ if (!m_decoder)\n+ return false;\n+ // Subscribe to the basic info event\n+ JxlDecoderStatus status = JxlDecoderSubscribeEvents(m_decoder.get(), JXL_DEC_BASIC_INFO | JXL_DEC_FULL_IMAGE);\n+ if (status != JXL_DEC_SUCCESS)\n+ return false;\n+ }\n+\n+ // Set up parallel m_parallel_runner\n+ if (!m_parallel_runner) {\n+ m_parallel_runner = JxlThreadParallelRunnerMake(nullptr, JxlThreadParallelRunnerDefaultNumWorkerThreads());", + "comment_created_at": "2024-10-29T11:05:49+00:00", + "comment_author": "cdcseacave", + "comment_body": "done", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/opencv-framework-synchronization-practices.md b/_reviewers/opencv-framework-synchronization-practices.md index 36f7873..682484b 100644 --- a/_reviewers/opencv-framework-synchronization-practices.md +++ b/_reviewers/opencv-framework-synchronization-practices.md @@ -38,371 +38,3 @@ m_parallel_runner = JxlThreadParallelRunnerMake(nullptr, cv::getNumThreads()); 4. When using static resources in multithreaded contexts, ensure proper synchronization or consider alternatives like on-demand initialization that avoid static state altogether. These practices ensure your code remains compatible with special configurations like `OPENCV_DISABLE_THREAD_SUPPORT` while maintaining optimal performance across various platforms. - - -[ - { - "discussion_id": "1370067753", - "pr_number": 24444, - "pr_file": "modules/imgproc/src/color_hsv.dispatch.cpp", - "created_at": "2023-10-24T12:17:05+00:00", - "commented_code": "if(_src.depth() == CV_8U)\n {\n static UMat sdiv_data;\n static UMat hdiv_data180;\n static UMat hdiv_data256;\n static int sdiv_table[256];\n static int hdiv_table180[256];\n static int hdiv_table256[256];\n static volatile bool initialized180 = false, initialized256 = false;\n static thread_local UMat sdiv_data;\n static thread_local UMat hdiv_data180;\n static thread_local UMat hdiv_data256;\n static thread_local int sdiv_table[256];", - "repo_full_name": "opencv/opencv", - "discussion_comments": [ - { - "comment_id": "1370067753", - "repo_full_name": "opencv/opencv", - "pr_number": 24444, - "pr_file": "modules/imgproc/src/color_hsv.dispatch.cpp", - "discussion_id": "1370067753", - "commented_code": "@@ -274,13 +274,13 @@ bool oclCvtColorBGR2HSV( InputArray _src, OutputArray _dst, int bidx, bool full\n \n if(_src.depth() == CV_8U)\n {\n- static UMat sdiv_data;\n- static UMat hdiv_data180;\n- static UMat hdiv_data256;\n- static int sdiv_table[256];\n- static int hdiv_table180[256];\n- static int hdiv_table256[256];\n- static volatile bool initialized180 = false, initialized256 = false;\n+ static thread_local UMat sdiv_data;\n+ static thread_local UMat hdiv_data180;\n+ static thread_local UMat hdiv_data256;\n+ static thread_local int sdiv_table[256];", - "comment_created_at": "2023-10-24T12:17:05+00:00", - "comment_author": "opencv-alalek", - "comment_body": "Lets try to apply `thread_local` for `UMat` variables only. No need to touch other tables (there is no problem with them).\r\n\r\nUse dedicated `initializedUMat` for that.", - "pr_file_module": null - }, - { - "comment_id": "1386035956", - "repo_full_name": "opencv/opencv", - "pr_number": 24444, - "pr_file": "modules/imgproc/src/color_hsv.dispatch.cpp", - "discussion_id": "1370067753", - "commented_code": "@@ -274,13 +274,13 @@ bool oclCvtColorBGR2HSV( InputArray _src, OutputArray _dst, int bidx, bool full\n \n if(_src.depth() == CV_8U)\n {\n- static UMat sdiv_data;\n- static UMat hdiv_data180;\n- static UMat hdiv_data256;\n- static int sdiv_table[256];\n- static int hdiv_table180[256];\n- static int hdiv_table256[256];\n- static volatile bool initialized180 = false, initialized256 = false;\n+ static thread_local UMat sdiv_data;\n+ static thread_local UMat hdiv_data180;\n+ static thread_local UMat hdiv_data256;\n+ static thread_local int sdiv_table[256];", - "comment_created_at": "2023-11-08T06:03:58+00:00", - "comment_author": "kallaballa", - "comment_body": "> initializedUMat\r\n\r\nI am not sure what you mean by the last part", - "pr_file_module": null - }, - { - "comment_id": "1387011521", - "repo_full_name": "opencv/opencv", - "pr_number": 24444, - "pr_file": "modules/imgproc/src/color_hsv.dispatch.cpp", - "discussion_id": "1370067753", - "commented_code": "@@ -274,13 +274,13 @@ bool oclCvtColorBGR2HSV( InputArray _src, OutputArray _dst, int bidx, bool full\n \n if(_src.depth() == CV_8U)\n {\n- static UMat sdiv_data;\n- static UMat hdiv_data180;\n- static UMat hdiv_data256;\n- static int sdiv_table[256];\n- static int hdiv_table180[256];\n- static int hdiv_table256[256];\n- static volatile bool initialized180 = false, initialized256 = false;\n+ static thread_local UMat sdiv_data;\n+ static thread_local UMat hdiv_data180;\n+ static thread_local UMat hdiv_data256;\n+ static thread_local int sdiv_table[256];", - "comment_created_at": "2023-11-08T18:00:22+00:00", - "comment_author": "opencv-alalek", - "comment_body": "Split initialization on 2 parts:\r\n- regular CPU buffers (like `int sdiv_table[256];`) - they don't need `thread-local` at all.\r\n- OpenCL related stuff which causes problems and needs thread-local variables (also put it under condition that this data is really needed - CPU-only processing doesn't need that at all).", - "pr_file_module": null - }, - { - "comment_id": "1387203457", - "repo_full_name": "opencv/opencv", - "pr_number": 24444, - "pr_file": "modules/imgproc/src/color_hsv.dispatch.cpp", - "discussion_id": "1370067753", - "commented_code": "@@ -274,13 +274,13 @@ bool oclCvtColorBGR2HSV( InputArray _src, OutputArray _dst, int bidx, bool full\n \n if(_src.depth() == CV_8U)\n {\n- static UMat sdiv_data;\n- static UMat hdiv_data180;\n- static UMat hdiv_data256;\n- static int sdiv_table[256];\n- static int hdiv_table180[256];\n- static int hdiv_table256[256];\n- static volatile bool initialized180 = false, initialized256 = false;\n+ static thread_local UMat sdiv_data;\n+ static thread_local UMat hdiv_data180;\n+ static thread_local UMat hdiv_data256;\n+ static thread_local int sdiv_table[256];", - "comment_created_at": "2023-11-08T21:10:40+00:00", - "comment_author": "vpisarev", - "comment_body": "probably, even better solution would be to merge all 3 tables together (of course, 3 pointers can be initialized to point to different parts of the joint table) and then we can convert it to UMat on-fly:\r\n\r\n```\r\nint rgb2hsv_tab[256*3];\r\nvolatile bool rgb2hsv_initialized = false;\r\nif (!rgb2hsv_initialized) { ... }\r\nif (use_opencl) {\r\nUMat rgb2hsv_utab = Mat(1, 256*3, CV_32S, rgb2hsv_tab).getUMat();\r\n... // run opencl kernel with rgb2hsv_utab as one of the parameters\r\n}\r\n```\r\n\r\nyes, it means extra copy, but compared to the processed data it should be a tiny overhead, and much easier to handle than static thread-local UMat's.", - "pr_file_module": null - }, - { - "comment_id": "1389250064", - "repo_full_name": "opencv/opencv", - "pr_number": 24444, - "pr_file": "modules/imgproc/src/color_hsv.dispatch.cpp", - "discussion_id": "1370067753", - "commented_code": "@@ -274,13 +274,13 @@ bool oclCvtColorBGR2HSV( InputArray _src, OutputArray _dst, int bidx, bool full\n \n if(_src.depth() == CV_8U)\n {\n- static UMat sdiv_data;\n- static UMat hdiv_data180;\n- static UMat hdiv_data256;\n- static int sdiv_table[256];\n- static int hdiv_table180[256];\n- static int hdiv_table256[256];\n- static volatile bool initialized180 = false, initialized256 = false;\n+ static thread_local UMat sdiv_data;\n+ static thread_local UMat hdiv_data180;\n+ static thread_local UMat hdiv_data256;\n+ static thread_local int sdiv_table[256];", - "comment_created_at": "2023-11-10T10:57:32+00:00", - "comment_author": "kallaballa", - "comment_body": "I'm not experienced enough to say the roundtrip-lateny will be more than we'd like, but it think so. But I like the safety and simplicity of your approach, so I guess I'll profile it.", - "pr_file_module": null - }, - { - "comment_id": "1389251556", - "repo_full_name": "opencv/opencv", - "pr_number": 24444, - "pr_file": "modules/imgproc/src/color_hsv.dispatch.cpp", - "discussion_id": "1370067753", - "commented_code": "@@ -274,13 +274,13 @@ bool oclCvtColorBGR2HSV( InputArray _src, OutputArray _dst, int bidx, bool full\n \n if(_src.depth() == CV_8U)\n {\n- static UMat sdiv_data;\n- static UMat hdiv_data180;\n- static UMat hdiv_data256;\n- static int sdiv_table[256];\n- static int hdiv_table180[256];\n- static int hdiv_table256[256];\n- static volatile bool initialized180 = false, initialized256 = false;\n+ static thread_local UMat sdiv_data;\n+ static thread_local UMat hdiv_data180;\n+ static thread_local UMat hdiv_data256;\n+ static thread_local int sdiv_table[256];", - "comment_created_at": "2023-11-10T10:58:59+00:00", - "comment_author": "kallaballa", - "comment_body": "@opencv-alalek @vpisarev ...how to know who of you to follow in which case? ^^", - "pr_file_module": null - }, - { - "comment_id": "1389257797", - "repo_full_name": "opencv/opencv", - "pr_number": 24444, - "pr_file": "modules/imgproc/src/color_hsv.dispatch.cpp", - "discussion_id": "1370067753", - "commented_code": "@@ -274,13 +274,13 @@ bool oclCvtColorBGR2HSV( InputArray _src, OutputArray _dst, int bidx, bool full\n \n if(_src.depth() == CV_8U)\n {\n- static UMat sdiv_data;\n- static UMat hdiv_data180;\n- static UMat hdiv_data256;\n- static int sdiv_table[256];\n- static int hdiv_table180[256];\n- static int hdiv_table256[256];\n- static volatile bool initialized180 = false, initialized256 = false;\n+ static thread_local UMat sdiv_data;\n+ static thread_local UMat hdiv_data180;\n+ static thread_local UMat hdiv_data256;\n+ static thread_local int sdiv_table[256];", - "comment_created_at": "2023-11-10T11:05:25+00:00", - "comment_author": "kallaballa", - "comment_body": "> probably, even better solution would be to merge all 3 tables together (of course, 3 pointers can be initialized to point to different parts of the joint table) and then we can convert it to UMat on-fly:\r\n> \r\n> ```\r\n> int rgb2hsv_tab[256*3];\r\n> volatile bool rgb2hsv_initialized = false;\r\n> if (!rgb2hsv_initialized) { ... }\r\n> if (use_opencl) {\r\n> UMat rgb2hsv_utab = Mat(1, 256*3, CV_32S, rgb2hsv_tab).getUMat();\r\n> ... // run opencl kernel with rgb2hsv_utab as one of the parameters\r\n> }\r\n> ```\r\n> \r\n> yes, it means extra copy, but compared to the processed data it should be a tiny overhead, and much easier to handle than static thread-local UMat's.\r\n\r\nalso... we are in oclCvtColorBGR2HSV which is \"guarded\" by CV_OCL_RUN. so i guess i can drop ```if (use_opencl)```", - "pr_file_module": null - }, - { - "comment_id": "1389291318", - "repo_full_name": "opencv/opencv", - "pr_number": 24444, - "pr_file": "modules/imgproc/src/color_hsv.dispatch.cpp", - "discussion_id": "1370067753", - "commented_code": "@@ -274,13 +274,13 @@ bool oclCvtColorBGR2HSV( InputArray _src, OutputArray _dst, int bidx, bool full\n \n if(_src.depth() == CV_8U)\n {\n- static UMat sdiv_data;\n- static UMat hdiv_data180;\n- static UMat hdiv_data256;\n- static int sdiv_table[256];\n- static int hdiv_table180[256];\n- static int hdiv_table256[256];\n- static volatile bool initialized180 = false, initialized256 = false;\n+ static thread_local UMat sdiv_data;\n+ static thread_local UMat hdiv_data180;\n+ static thread_local UMat hdiv_data256;\n+ static thread_local int sdiv_table[256];", - "comment_created_at": "2023-11-10T11:41:32+00:00", - "comment_author": "kallaballa", - "comment_body": "I neither like unecessary round-trips nor messing with the memory model, so i came up with this:\r\n\r\n\r\n```c++\r\n if(_src.depth() == CV_8U)\r\n {\r\n\t\tstatic std::mutex mtx;\r\n\t\t{\r\n\t\t\tstd::unique_lock lock(mtx);\r\n\t\t\tstatic UMat sdiv_data;\r\n\t\t\tstatic UMat hdiv_data180;\r\n\t\t\tstatic UMat hdiv_data256;\r\n\t\t\tstatic int combined_table[256];\r\n\t\t\tstatic volatile bool initialized180 = false, initialized256 = false;\r\n\t\t\tstatic volatile bool initialized = hrange == 180 ? initialized180 : initialized256;\r\n\r\n\t\t\tif (!initialized)\r\n\t\t\t{\r\n\t\t\t\tint * const hdiv_table = hrange == 180 ? combined_table : &combined_table[128], hsv_shift = 12;\r\n\t\t\t\tUMat & hdiv_data = hrange == 180 ? hdiv_data180 : hdiv_data256;\r\n\r\n\t\t\t\tcombined_table[0] = 0;\r\n\r\n\t\t\t\tint v = 255 << hsv_shift;\r\n\t\t\t\tif (!initialized180 && !initialized256)\r\n\t\t\t\t{\r\n\t\t\t\t\tfor(int i = 1; i < 256; i++ )\r\n\t\t\t\t\t\tcombined_table[i] = saturate_cast(v/(1.*i));\r\n\t\t\t\t\tMat(1, 256, CV_32SC1, combined_table).copyTo(sdiv_data);\r\n\t\t\t\t}\r\n\r\n\t\t\t\tv = hrange << hsv_shift;\r\n\t\t\t\tfor (int i = 1; i < 256; i++ )\r\n\t\t\t\t\thdiv_table[i] = saturate_cast(v/(6.*i));\r\n\r\n\t\t\t\tMat(1, 256, CV_32SC1, hdiv_table).copyTo(hdiv_data);\r\n\t\t\t\tinitialized = true;\r\n\t\t\t}\r\n\r\n\t\t\th.setArg(ocl::KernelArg::PtrReadOnly(sdiv_data));\r\n\t\t\th.setArg(hrange == 256 ? ocl::KernelArg::PtrReadOnly(hdiv_data256) :\r\n\t\t\t\t\t\t\t ocl::KernelArg::PtrReadOnly(hdiv_data180));\r\n\t\t}\r\n\t}\r\n```", - "pr_file_module": null - }, - { - "comment_id": "1389502048", - "repo_full_name": "opencv/opencv", - "pr_number": 24444, - "pr_file": "modules/imgproc/src/color_hsv.dispatch.cpp", - "discussion_id": "1370067753", - "commented_code": "@@ -274,13 +274,13 @@ bool oclCvtColorBGR2HSV( InputArray _src, OutputArray _dst, int bidx, bool full\n \n if(_src.depth() == CV_8U)\n {\n- static UMat sdiv_data;\n- static UMat hdiv_data180;\n- static UMat hdiv_data256;\n- static int sdiv_table[256];\n- static int hdiv_table180[256];\n- static int hdiv_table256[256];\n- static volatile bool initialized180 = false, initialized256 = false;\n+ static thread_local UMat sdiv_data;\n+ static thread_local UMat hdiv_data180;\n+ static thread_local UMat hdiv_data256;\n+ static thread_local int sdiv_table[256];", - "comment_created_at": "2023-11-10T14:53:26+00:00", - "comment_author": "opencv-alalek", - "comment_body": "Proposed code locks mutex on each call. That should be properly avoided.", - "pr_file_module": null - }, - { - "comment_id": "1389863226", - "repo_full_name": "opencv/opencv", - "pr_number": 24444, - "pr_file": "modules/imgproc/src/color_hsv.dispatch.cpp", - "discussion_id": "1370067753", - "commented_code": "@@ -274,13 +274,13 @@ bool oclCvtColorBGR2HSV( InputArray _src, OutputArray _dst, int bidx, bool full\n \n if(_src.depth() == CV_8U)\n {\n- static UMat sdiv_data;\n- static UMat hdiv_data180;\n- static UMat hdiv_data256;\n- static int sdiv_table[256];\n- static int hdiv_table180[256];\n- static int hdiv_table256[256];\n- static volatile bool initialized180 = false, initialized256 = false;\n+ static thread_local UMat sdiv_data;\n+ static thread_local UMat hdiv_data180;\n+ static thread_local UMat hdiv_data256;\n+ static thread_local int sdiv_table[256];", - "comment_created_at": "2023-11-10T20:04:29+00:00", - "comment_author": "vpisarev", - "comment_body": "@kallaballa, let's try the proposed by me approach without use of static UMat's at all.", - "pr_file_module": null - }, - { - "comment_id": "1390087775", - "repo_full_name": "opencv/opencv", - "pr_number": 24444, - "pr_file": "modules/imgproc/src/color_hsv.dispatch.cpp", - "discussion_id": "1370067753", - "commented_code": "@@ -274,13 +274,13 @@ bool oclCvtColorBGR2HSV( InputArray _src, OutputArray _dst, int bidx, bool full\n \n if(_src.depth() == CV_8U)\n {\n- static UMat sdiv_data;\n- static UMat hdiv_data180;\n- static UMat hdiv_data256;\n- static int sdiv_table[256];\n- static int hdiv_table180[256];\n- static int hdiv_table256[256];\n- static volatile bool initialized180 = false, initialized256 = false;\n+ static thread_local UMat sdiv_data;\n+ static thread_local UMat hdiv_data180;\n+ static thread_local UMat hdiv_data256;\n+ static thread_local int sdiv_table[256];", - "comment_created_at": "2023-11-11T02:06:59+00:00", - "comment_author": "kallaballa", - "comment_body": "@opencv-alalek double checked pattern and lock_guard 0f290bc\r\n@vpisarev I'll give it a try and provide numbers", - "pr_file_module": null - }, - { - "comment_id": "1390631190", - "repo_full_name": "opencv/opencv", - "pr_number": 24444, - "pr_file": "modules/imgproc/src/color_hsv.dispatch.cpp", - "discussion_id": "1370067753", - "commented_code": "@@ -274,13 +274,13 @@ bool oclCvtColorBGR2HSV( InputArray _src, OutputArray _dst, int bidx, bool full\n \n if(_src.depth() == CV_8U)\n {\n- static UMat sdiv_data;\n- static UMat hdiv_data180;\n- static UMat hdiv_data256;\n- static int sdiv_table[256];\n- static int hdiv_table180[256];\n- static int hdiv_table256[256];\n- static volatile bool initialized180 = false, initialized256 = false;\n+ static thread_local UMat sdiv_data;\n+ static thread_local UMat hdiv_data180;\n+ static thread_local UMat hdiv_data256;\n+ static thread_local int sdiv_table[256];", - "comment_created_at": "2023-11-13T05:12:33+00:00", - "comment_author": "kallaballa", - "comment_body": "Test failed e.g. on MacOS: OCL_TEST_P(CvtColor8u32f, RGB2HSV_FULL) { performTest(3, 3, CVTCODE(RGB2HSV_FULL), IPP_EPS); }\r\n\r\nI don't have a MacOS setup, but it stands to reason that it is a concurrency issue. So i checked on the memory model guarantees of ```volatile``` and there are none. So I guessed that is the problem. Let's see the test.\r\n\r\n@vpisarev implementation without static still pending. want both version working.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1400815714", - "pr_number": 24444, - "pr_file": "modules/imgproc/src/color_hsv.dispatch.cpp", - "created_at": "2023-11-21T15:55:50+00:00", - "commented_code": "if(_src.depth() == CV_8U)\n {\n static UMat sdiv_data;\n static UMat hdiv_data180;\n static UMat hdiv_data256;\n static int sdiv_table[256];\n static int hdiv_table180[256];\n static int hdiv_table256[256];\n static volatile bool initialized180 = false, initialized256 = false;\n volatile bool & initialized = hrange == 180 ? initialized180 : initialized256;\n static std::mutex mtx;", - "repo_full_name": "opencv/opencv", - "discussion_comments": [ - { - "comment_id": "1400815714", - "repo_full_name": "opencv/opencv", - "pr_number": 24444, - "pr_file": "modules/imgproc/src/color_hsv.dispatch.cpp", - "discussion_id": "1400815714", - "commented_code": "@@ -274,38 +275,45 @@ bool oclCvtColorBGR2HSV( InputArray _src, OutputArray _dst, int bidx, bool full\n \n if(_src.depth() == CV_8U)\n {\n- static UMat sdiv_data;\n- static UMat hdiv_data180;\n- static UMat hdiv_data256;\n- static int sdiv_table[256];\n- static int hdiv_table180[256];\n- static int hdiv_table256[256];\n- static volatile bool initialized180 = false, initialized256 = false;\n- volatile bool & initialized = hrange == 180 ? initialized180 : initialized256;\n+ static std::mutex mtx;", - "comment_created_at": "2023-11-21T15:55:50+00:00", - "comment_author": "opencv-alalek", - "comment_body": "This breaks bare metal build configurations without any threading support (see `OPENCV_DISABLE_THREAD_SUPPORT`)\r\n\r\nUse OpenCV wrappers instead.\r\n\r\nReuse `cv::getInitializationMutex()` instead of creation of new one.", - "pr_file_module": null - }, - { - "comment_id": "1402470564", - "repo_full_name": "opencv/opencv", - "pr_number": 24444, - "pr_file": "modules/imgproc/src/color_hsv.dispatch.cpp", - "discussion_id": "1400815714", - "commented_code": "@@ -274,38 +275,45 @@ bool oclCvtColorBGR2HSV( InputArray _src, OutputArray _dst, int bidx, bool full\n \n if(_src.depth() == CV_8U)\n {\n- static UMat sdiv_data;\n- static UMat hdiv_data180;\n- static UMat hdiv_data256;\n- static int sdiv_table[256];\n- static int hdiv_table180[256];\n- static int hdiv_table256[256];\n- static volatile bool initialized180 = false, initialized256 = false;\n- volatile bool & initialized = hrange == 180 ? initialized180 : initialized256;\n+ static std::mutex mtx;", - "comment_created_at": "2023-11-22T17:44:11+00:00", - "comment_author": "kallaballa", - "comment_body": "Oh i understand. thx for the info. is there a list of features i should usually test for? i started to test for WITH_OPENCL=OFF and i will keep in mind that there are targets without thread support... are there more configurations i should keep in mind?", - "pr_file_module": null - }, - { - "comment_id": "1404710045", - "repo_full_name": "opencv/opencv", - "pr_number": 24444, - "pr_file": "modules/imgproc/src/color_hsv.dispatch.cpp", - "discussion_id": "1400815714", - "commented_code": "@@ -274,38 +275,45 @@ bool oclCvtColorBGR2HSV( InputArray _src, OutputArray _dst, int bidx, bool full\n \n if(_src.depth() == CV_8U)\n {\n- static UMat sdiv_data;\n- static UMat hdiv_data180;\n- static UMat hdiv_data256;\n- static int sdiv_table[256];\n- static int hdiv_table180[256];\n- static int hdiv_table256[256];\n- static volatile bool initialized180 = false, initialized256 = false;\n- volatile bool & initialized = hrange == 180 ? initialized180 : initialized256;\n+ static std::mutex mtx;", - "comment_created_at": "2023-11-25T03:20:45+00:00", - "comment_author": "kallaballa", - "comment_body": "Done 031abea. Tested locally by running opencv_test_improc with following build configuration. Btw. i found ```cv::utils::lock_guard``` but it doesn't seem necessary since there is C++11 support.\r\n\r\n```shell\r\ncmake -DINSTALL_BIN_EXAMPLES=OFF -DBUILD_SHARED_LIBS=ON -DWITH_OPENCL=ON -DBUILD_opencv_java=OFF -DBUILD_opencv_js=OFF -DBUILD_opencv_python2=OFF -DBUILD_opencv_python3=OFF -DBUILD_EXAMPLES=OFF -DBUILD_PACKAGE=OFF -DBUILD_TESTS=ON -DBUILD_PERF_TESTS=ON -DBUILD_DOCS=OFF -DBUILD_opencv_videoio=ON -DBUILD_opencv_highgui=ON -DBUILD_opencv_ts=ON -DBUILD_opencv_imgcodecs=ON -DBUILD_opencv_plot=OFF -DBUILD_opencv_tracking=OFF -DBUILD_opencv_video=OFF -DBUILD_opencv_world=ON -DCMAKE_C_FLAGS=-DOPENCV_DISABLE_THREAD_SUPPORT -DCMAKE_CXX_FLAGS=-DOPENCV_DISABLE_THREAD_SUPPORT ../opencv\r\n```", - "pr_file_module": null - }, - { - "comment_id": "1411079146", - "repo_full_name": "opencv/opencv", - "pr_number": 24444, - "pr_file": "modules/imgproc/src/color_hsv.dispatch.cpp", - "discussion_id": "1400815714", - "commented_code": "@@ -274,38 +275,45 @@ bool oclCvtColorBGR2HSV( InputArray _src, OutputArray _dst, int bidx, bool full\n \n if(_src.depth() == CV_8U)\n {\n- static UMat sdiv_data;\n- static UMat hdiv_data180;\n- static UMat hdiv_data256;\n- static int sdiv_table[256];\n- static int hdiv_table180[256];\n- static int hdiv_table256[256];\n- static volatile bool initialized180 = false, initialized256 = false;\n- volatile bool & initialized = hrange == 180 ? initialized180 : initialized256;\n+ static std::mutex mtx;", - "comment_created_at": "2023-11-30T18:01:26+00:00", - "comment_author": "opencv-alalek", - "comment_body": "`std::lock_guard` => `cv::AutoLock`\r\n\r\nDid you measure performance changes due to added of `.copyTo()` calls?", - "pr_file_module": null - }, - { - "comment_id": "1413029712", - "repo_full_name": "opencv/opencv", - "pr_number": 24444, - "pr_file": "modules/imgproc/src/color_hsv.dispatch.cpp", - "discussion_id": "1400815714", - "commented_code": "@@ -274,38 +275,45 @@ bool oclCvtColorBGR2HSV( InputArray _src, OutputArray _dst, int bidx, bool full\n \n if(_src.depth() == CV_8U)\n {\n- static UMat sdiv_data;\n- static UMat hdiv_data180;\n- static UMat hdiv_data256;\n- static int sdiv_table[256];\n- static int hdiv_table180[256];\n- static int hdiv_table256[256];\n- static volatile bool initialized180 = false, initialized256 = false;\n- volatile bool & initialized = hrange == 180 ? initialized180 : initialized256;\n+ static std::mutex mtx;", - "comment_created_at": "2023-12-03T08:58:44+00:00", - "comment_author": "kallaballa", - "comment_body": "done: [8cbf9c7](https://github.com/opencv/opencv/pull/24444/commits/8cbf9c70e398cdf8357564ef0e5ab8df7f5d0eef)", - "pr_file_module": null - }, - { - "comment_id": "1413031013", - "repo_full_name": "opencv/opencv", - "pr_number": 24444, - "pr_file": "modules/imgproc/src/color_hsv.dispatch.cpp", - "discussion_id": "1400815714", - "commented_code": "@@ -274,38 +275,45 @@ bool oclCvtColorBGR2HSV( InputArray _src, OutputArray _dst, int bidx, bool full\n \n if(_src.depth() == CV_8U)\n {\n- static UMat sdiv_data;\n- static UMat hdiv_data180;\n- static UMat hdiv_data256;\n- static int sdiv_table[256];\n- static int hdiv_table180[256];\n- static int hdiv_table256[256];\n- static volatile bool initialized180 = false, initialized256 = false;\n- volatile bool & initialized = hrange == 180 ? initialized180 : initialized256;\n+ static std::mutex mtx;", - "comment_created_at": "2023-12-03T09:07:06+00:00", - "comment_author": "kallaballa", - "comment_body": "> `std::lock_guard` => `cv::AutoLock`\r\n> \r\n> Did you measure performance changes due to added of `.copyTo()` calls?\r\n\r\nI compared the current HEAD of 4.x with the branch of the PR (which i updated to the least 4.x HEAD) .\r\nI used follwing build configuration for both branches:\r\n```bash\r\ncmake -DINSTALL_BIN_EXAMPLES=OFF -DBUILD_SHARED_LIBS=ON -DWITH_OPENCL=ON -DBUILD_opencv_java=OFF -DBUILD_opencv_js=OFF -DBUILD_opencv_python2=OFF -DBUILD_opencv_python3=OFF -DBUILD_EXAMPLES=OFF -DBUILD_PACKAGE=OFF -DBUILD_TESTS=ON -DBUILD_PERF_TESTS=ON -DBUILD_DOCS=OFF -DBUILD_opencv_videoio=ON -DBUILD_opencv_highgui=ON -DBUILD_opencv_ts=ON -DBUILD_opencv_imgcodecs=ON -DBUILD_opencv_plot=OFF -DBUILD_opencv_tracking=OFF -DBUILD_opencv_video=OFF -DBUILD_opencv_world=ON -DENABLE_PROFILING=ON ../opencv\r\n```\r\n\r\nAnd the following test program: https://github.com/kallaballa/OpenCV-Issues/blob/main/src/oclCvtColorBGR2HSV-race/perf-test.cpp\r\n\r\nWithout patch it takes: ~14.8s\r\nWith patch it takes: ~15.2s\r\n\r\nSo there is a slow-down. I'll optimize a bit and measure again.", - "pr_file_module": null - }, - { - "comment_id": "1413036376", - "repo_full_name": "opencv/opencv", - "pr_number": 24444, - "pr_file": "modules/imgproc/src/color_hsv.dispatch.cpp", - "discussion_id": "1400815714", - "commented_code": "@@ -274,38 +275,45 @@ bool oclCvtColorBGR2HSV( InputArray _src, OutputArray _dst, int bidx, bool full\n \n if(_src.depth() == CV_8U)\n {\n- static UMat sdiv_data;\n- static UMat hdiv_data180;\n- static UMat hdiv_data256;\n- static int sdiv_table[256];\n- static int hdiv_table180[256];\n- static int hdiv_table256[256];\n- static volatile bool initialized180 = false, initialized256 = false;\n- volatile bool & initialized = hrange == 180 ? initialized180 : initialized256;\n+ static std::mutex mtx;", - "comment_created_at": "2023-12-03T09:45:08+00:00", - "comment_author": "kallaballa", - "comment_body": "When profiling i had suspicious results and so i increased the iterations by 10x.\r\nThis gave me \r\n* 154.483s for patched without copy.\r\n* 156.108s for patch with copy.\r\n* 153.803s without patch.\r\n\r\nI think the differences are still quite influenced by variance. anyway it is not much. should i continue?", - "pr_file_module": null - }, - { - "comment_id": "1413038481", - "repo_full_name": "opencv/opencv", - "pr_number": 24444, - "pr_file": "modules/imgproc/src/color_hsv.dispatch.cpp", - "discussion_id": "1400815714", - "commented_code": "@@ -274,38 +275,45 @@ bool oclCvtColorBGR2HSV( InputArray _src, OutputArray _dst, int bidx, bool full\n \n if(_src.depth() == CV_8U)\n {\n- static UMat sdiv_data;\n- static UMat hdiv_data180;\n- static UMat hdiv_data256;\n- static int sdiv_table[256];\n- static int hdiv_table180[256];\n- static int hdiv_table256[256];\n- static volatile bool initialized180 = false, initialized256 = false;\n- volatile bool & initialized = hrange == 180 ? initialized180 : initialized256;\n+ static std::mutex mtx;", - "comment_created_at": "2023-12-03T09:58:45+00:00", - "comment_author": "kallaballa", - "comment_body": "I meassured also the variant no-static/no-thread_local/no-locking: 155.512s", - "pr_file_module": null - }, - { - "comment_id": "1426663572", - "repo_full_name": "opencv/opencv", - "pr_number": 24444, - "pr_file": "modules/imgproc/src/color_hsv.dispatch.cpp", - "discussion_id": "1400815714", - "commented_code": "@@ -274,38 +275,45 @@ bool oclCvtColorBGR2HSV( InputArray _src, OutputArray _dst, int bidx, bool full\n \n if(_src.depth() == CV_8U)\n {\n- static UMat sdiv_data;\n- static UMat hdiv_data180;\n- static UMat hdiv_data256;\n- static int sdiv_table[256];\n- static int hdiv_table180[256];\n- static int hdiv_table256[256];\n- static volatile bool initialized180 = false, initialized256 = false;\n- volatile bool & initialized = hrange == 180 ? initialized180 : initialized256;\n+ static std::mutex mtx;", - "comment_created_at": "2023-12-14T12:35:23+00:00", - "comment_author": "opencv-alalek", - "comment_body": "Please always reuse OpenCV performance tests or add them if something is missing.\r\n\r\nAvoid using of hand-written tests cases\r\n\r\n> suspicious results\r\n\r\nProvided measurement code has bug due to async execution of `cvtColor(UMat)` (code should schedule OpenCL kernels, but do not wait the final result).\r\n\r\nP.S. Mat to UMat coping is synchronous call in current API - forces wait of OpenCL execution queue.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2024125007", - "pr_number": 27182, - "pr_file": "3rdparty/fastcv/src/fastcv_hal_imgproc.cpp", - "created_at": "2025-04-02T06:03:40+00:00", - "commented_code": "CV_HAL_RETURN_NOT_IMPLEMENTED(\"ROI not supported\");\n }\n\n if(src_depth == CV_32F && normalize != 1)\n CV_HAL_RETURN_NOT_IMPLEMENTED(\"normalized kernel supported for float types\");\n\n if(src_depth == CV_8U && (ksize_width != 3 && ksize_width != 5))\n CV_HAL_RETURN_NOT_IMPLEMENTED(\"kernel size not supported\");\n\n INITIALIZATION_CHECK;\n\n fcvBorderType bdr;\n uint8_t bdrVal = 0;\n switch(border_type)\n {\n case cv::BORDER_REPLICATE:\n bdr = FASTCV_BORDER_REPLICATE;\n break;\n case cv::BORDER_REFLECT:\n bdr = FASTCV_BORDER_REFLECT;\n break;\n case cv::BORDER_REFLECT101: // cv::BORDER_REFLECT_101, BORDER_DEFAULT\n bdr = FASTCV_BORDER_REFLECT_V2;\n break;\n default:\n CV_HAL_RETURN_NOT_IMPLEMENTED(\"border type not supported\");\n }\n cv::Mat dst_temp;\n bool inPlace = src_data == dst_data ? true : false ;\n\n fcvStatus status = FASTCV_SUCCESS;\n if(ksize_width == 3)\n int nThreads = cv::getNumThreads();\n\n cv::Mat src = cv::Mat(height, width, src_depth, (void*)src_data, src_step);\n\n if(inPlace)\n dst_temp = cv::Mat(height, width, src_depth);\n else\n dst_temp = cv::Mat(height, width, src_depth, (void*)dst_data, dst_step);\n\n int nStripes, stripeHeight = nThreads * 10;", - "repo_full_name": "opencv/opencv", - "discussion_comments": [ - { - "comment_id": "2024125007", - "repo_full_name": "opencv/opencv", - "pr_number": 27182, - "pr_file": "3rdparty/fastcv/src/fastcv_hal_imgproc.cpp", - "discussion_id": "2024125007", - "commented_code": "@@ -363,37 +417,46 @@ int fastcv_hal_boxFilter(\n CV_HAL_RETURN_NOT_IMPLEMENTED(\"ROI not supported\");\n }\n \n+ if(src_depth == CV_32F && normalize != 1)\n+ CV_HAL_RETURN_NOT_IMPLEMENTED(\"normalized kernel supported for float types\");\n+\n+ if(src_depth == CV_8U && (ksize_width != 3 && ksize_width != 5))\n+ CV_HAL_RETURN_NOT_IMPLEMENTED(\"kernel size not supported\");\n+\n INITIALIZATION_CHECK;\n \n- fcvBorderType bdr;\n- uint8_t bdrVal = 0;\n- switch(border_type)\n- {\n- case cv::BORDER_REPLICATE:\n- bdr = FASTCV_BORDER_REPLICATE;\n- break;\n- case cv::BORDER_REFLECT:\n- bdr = FASTCV_BORDER_REFLECT;\n- break;\n- case cv::BORDER_REFLECT101: // cv::BORDER_REFLECT_101, BORDER_DEFAULT\n- bdr = FASTCV_BORDER_REFLECT_V2;\n- break;\n- default:\n- CV_HAL_RETURN_NOT_IMPLEMENTED(\"border type not supported\");\n- }\n+ cv::Mat dst_temp;\n+ bool inPlace = src_data == dst_data ? true : false ;\n \n- fcvStatus status = FASTCV_SUCCESS;\n- if(ksize_width == 3)\n+ int nThreads = cv::getNumThreads();\n+\n+ cv::Mat src = cv::Mat(height, width, src_depth, (void*)src_data, src_step);\n+\n+ if(inPlace)\n+ dst_temp = cv::Mat(height, width, src_depth);\n+ else\n+ dst_temp = cv::Mat(height, width, src_depth, (void*)dst_data, dst_step);\n+\n+ int nStripes, stripeHeight = nThreads * 10;", - "comment_created_at": "2025-04-02T06:03:40+00:00", - "comment_author": "asmorkalov", - "comment_body": "`stripeHeight = nThreads * 10;` sounds strange. More threads -> larger piece for each thread.", - "pr_file_module": null - }, - { - "comment_id": "2024155437", - "repo_full_name": "opencv/opencv", - "pr_number": 27182, - "pr_file": "3rdparty/fastcv/src/fastcv_hal_imgproc.cpp", - "discussion_id": "2024125007", - "commented_code": "@@ -363,37 +417,46 @@ int fastcv_hal_boxFilter(\n CV_HAL_RETURN_NOT_IMPLEMENTED(\"ROI not supported\");\n }\n \n+ if(src_depth == CV_32F && normalize != 1)\n+ CV_HAL_RETURN_NOT_IMPLEMENTED(\"normalized kernel supported for float types\");\n+\n+ if(src_depth == CV_8U && (ksize_width != 3 && ksize_width != 5))\n+ CV_HAL_RETURN_NOT_IMPLEMENTED(\"kernel size not supported\");\n+\n INITIALIZATION_CHECK;\n \n- fcvBorderType bdr;\n- uint8_t bdrVal = 0;\n- switch(border_type)\n- {\n- case cv::BORDER_REPLICATE:\n- bdr = FASTCV_BORDER_REPLICATE;\n- break;\n- case cv::BORDER_REFLECT:\n- bdr = FASTCV_BORDER_REFLECT;\n- break;\n- case cv::BORDER_REFLECT101: // cv::BORDER_REFLECT_101, BORDER_DEFAULT\n- bdr = FASTCV_BORDER_REFLECT_V2;\n- break;\n- default:\n- CV_HAL_RETURN_NOT_IMPLEMENTED(\"border type not supported\");\n- }\n+ cv::Mat dst_temp;\n+ bool inPlace = src_data == dst_data ? true : false ;\n \n- fcvStatus status = FASTCV_SUCCESS;\n- if(ksize_width == 3)\n+ int nThreads = cv::getNumThreads();\n+\n+ cv::Mat src = cv::Mat(height, width, src_depth, (void*)src_data, src_step);\n+\n+ if(inPlace)\n+ dst_temp = cv::Mat(height, width, src_depth);\n+ else\n+ dst_temp = cv::Mat(height, width, src_depth, (void*)dst_data, dst_step);\n+\n+ int nStripes, stripeHeight = nThreads * 10;", - "comment_created_at": "2025-04-02T06:33:02+00:00", - "comment_author": "adsha-quic", - "comment_body": "Hi Alex\r\n\r\nWe tested this and found it having good speed, since usually threads are 8 as per our experiments. Please guide what optimizations we can do here.", - "pr_file_module": null - }, - { - "comment_id": "2024172942", - "repo_full_name": "opencv/opencv", - "pr_number": 27182, - "pr_file": "3rdparty/fastcv/src/fastcv_hal_imgproc.cpp", - "discussion_id": "2024125007", - "commented_code": "@@ -363,37 +417,46 @@ int fastcv_hal_boxFilter(\n CV_HAL_RETURN_NOT_IMPLEMENTED(\"ROI not supported\");\n }\n \n+ if(src_depth == CV_32F && normalize != 1)\n+ CV_HAL_RETURN_NOT_IMPLEMENTED(\"normalized kernel supported for float types\");\n+\n+ if(src_depth == CV_8U && (ksize_width != 3 && ksize_width != 5))\n+ CV_HAL_RETURN_NOT_IMPLEMENTED(\"kernel size not supported\");\n+\n INITIALIZATION_CHECK;\n \n- fcvBorderType bdr;\n- uint8_t bdrVal = 0;\n- switch(border_type)\n- {\n- case cv::BORDER_REPLICATE:\n- bdr = FASTCV_BORDER_REPLICATE;\n- break;\n- case cv::BORDER_REFLECT:\n- bdr = FASTCV_BORDER_REFLECT;\n- break;\n- case cv::BORDER_REFLECT101: // cv::BORDER_REFLECT_101, BORDER_DEFAULT\n- bdr = FASTCV_BORDER_REFLECT_V2;\n- break;\n- default:\n- CV_HAL_RETURN_NOT_IMPLEMENTED(\"border type not supported\");\n- }\n+ cv::Mat dst_temp;\n+ bool inPlace = src_data == dst_data ? true : false ;\n \n- fcvStatus status = FASTCV_SUCCESS;\n- if(ksize_width == 3)\n+ int nThreads = cv::getNumThreads();\n+\n+ cv::Mat src = cv::Mat(height, width, src_depth, (void*)src_data, src_step);\n+\n+ if(inPlace)\n+ dst_temp = cv::Mat(height, width, src_depth);\n+ else\n+ dst_temp = cv::Mat(height, width, src_depth, (void*)dst_data, dst_step);\n+\n+ int nStripes, stripeHeight = nThreads * 10;", - "comment_created_at": "2025-04-02T06:48:10+00:00", - "comment_author": "asmorkalov", - "comment_body": "threads are 8. It's not true:\r\n- Android build uses 2 threads by default. It's done to prevent overheating, but may be changed in future.\r\n- Linux builds use all available cores.\r\n- parallel_for_ serializes nested parallel_for_ calls. So you can easily get 1 here.", - "pr_file_module": null - }, - { - "comment_id": "2024174425", - "repo_full_name": "opencv/opencv", - "pr_number": 27182, - "pr_file": "3rdparty/fastcv/src/fastcv_hal_imgproc.cpp", - "discussion_id": "2024125007", - "commented_code": "@@ -363,37 +417,46 @@ int fastcv_hal_boxFilter(\n CV_HAL_RETURN_NOT_IMPLEMENTED(\"ROI not supported\");\n }\n \n+ if(src_depth == CV_32F && normalize != 1)\n+ CV_HAL_RETURN_NOT_IMPLEMENTED(\"normalized kernel supported for float types\");\n+\n+ if(src_depth == CV_8U && (ksize_width != 3 && ksize_width != 5))\n+ CV_HAL_RETURN_NOT_IMPLEMENTED(\"kernel size not supported\");\n+\n INITIALIZATION_CHECK;\n \n- fcvBorderType bdr;\n- uint8_t bdrVal = 0;\n- switch(border_type)\n- {\n- case cv::BORDER_REPLICATE:\n- bdr = FASTCV_BORDER_REPLICATE;\n- break;\n- case cv::BORDER_REFLECT:\n- bdr = FASTCV_BORDER_REFLECT;\n- break;\n- case cv::BORDER_REFLECT101: // cv::BORDER_REFLECT_101, BORDER_DEFAULT\n- bdr = FASTCV_BORDER_REFLECT_V2;\n- break;\n- default:\n- CV_HAL_RETURN_NOT_IMPLEMENTED(\"border type not supported\");\n- }\n+ cv::Mat dst_temp;\n+ bool inPlace = src_data == dst_data ? true : false ;\n \n- fcvStatus status = FASTCV_SUCCESS;\n- if(ksize_width == 3)\n+ int nThreads = cv::getNumThreads();\n+\n+ cv::Mat src = cv::Mat(height, width, src_depth, (void*)src_data, src_step);\n+\n+ if(inPlace)\n+ dst_temp = cv::Mat(height, width, src_depth);\n+ else\n+ dst_temp = cv::Mat(height, width, src_depth, (void*)dst_data, dst_step);\n+\n+ int nStripes, stripeHeight = nThreads * 10;", - "comment_created_at": "2025-04-02T06:49:11+00:00", - "comment_author": "asmorkalov", - "comment_body": "We usually set granularity to some reasonable size for single thread. OpenCV uses dynamic scheduling, so all other steps are done automatically.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1820133472", - "pr_number": 26379, - "pr_file": "modules/imgcodecs/src/grfmt_jpegxl.cpp", - "created_at": "2024-10-29T05:56:38+00:00", - "commented_code": "/*M///////////////////////////////////////////////////////////////////////////////////////\n//\n// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.\n//\n// By downloading, copying, installing or using the software you agree to this license.\n// If you do not agree to this license, do not download, install,\n// copy or use the software.\n//\n//\n// Intel License Agreement\n// For Open Source Computer Vision Library\n//\n// Copyright (C) 2000, Intel Corporation, all rights reserved.\n// Third party copyrights are property of their respective owners.\n//\n// Redistribution and use in source and binary forms, with or without modification,\n// are permitted provided that the following conditions are met:\n//\n// * Redistribution's of source code must retain the above copyright notice,\n// this list of conditions and the following disclaimer.\n//\n// * Redistribution's in binary form must reproduce the above copyright notice,\n// this list of conditions and the following disclaimer in the documentation\n// and/or other materials provided with the distribution.\n//\n// * The name of Intel Corporation may not be used to endorse or promote products\n// derived from this software without specific prior written permission.\n//\n// This software is provided by the copyright holders and contributors \"as is\" and\n// any express or implied warranties, including, but not limited to, the implied\n// warranties of merchantability and fitness for a particular purpose are disclaimed.\n// In no event shall the Intel Corporation or contributors be liable for any direct,\n// indirect, incidental, special, exemplary, or consequential damages\n// (including, but not limited to, procurement of substitute goods or services;\n// loss of use, data, or profits; or business interruption) however caused\n// and on any theory of liability, whether in contract, strict liability,\n// or tort (including negligence or otherwise) arising in any way out of\n// the use of this software, even if advised of the possibility of such damage.\n//\n//M*/\n\n#include \"precomp.hpp\"\n#include \"grfmt_jpegxl.hpp\"\n\n#ifdef HAVE_JPEGXL\n\n#include \n\nnamespace cv\n{\n\n/////////////////////// JpegXLDecoder ///////////////////\n\nJpegXLDecoder::JpegXLDecoder() : m_f(nullptr, fclose)\n{\n m_signature = \"\\xFF\\x0A\";\n m_decoder = nullptr;\n m_buf_supported = true;\n m_type = -1;\n}\n\nJpegXLDecoder::~JpegXLDecoder()\n{\n close();\n}\n\nvoid JpegXLDecoder::close()\n{\n if (m_decoder)\n m_decoder.release();\n if (m_f)\n m_f.release();\n m_read_buffer = {};\n m_width = m_height = 0;\n m_type = -1;\n}\n\nImageDecoder JpegXLDecoder::newDecoder() const\n{\n return makePtr();\n}\n\nbool JpegXLDecoder::read(Mat* pimg)\n{\n // Open file\n if (!m_f) {\n m_f.reset(fopen(m_filename.c_str(), \"rb\"));\n if (!m_f)\n return false;\n }\n\n // Initialize decoder\n if (!m_decoder) {\n m_decoder = JxlDecoderMake(nullptr);\n if (!m_decoder)\n return false;\n // Subscribe to the basic info event\n JxlDecoderStatus status = JxlDecoderSubscribeEvents(m_decoder.get(), JXL_DEC_BASIC_INFO | JXL_DEC_FULL_IMAGE);\n if (status != JXL_DEC_SUCCESS)\n return false;\n }\n\n // Set up parallel m_parallel_runner\n if (!m_parallel_runner) {\n m_parallel_runner = JxlThreadParallelRunnerMake(nullptr, JxlThreadParallelRunnerDefaultNumWorkerThreads());", - "repo_full_name": "opencv/opencv", - "discussion_comments": [ - { - "comment_id": "1820133472", - "repo_full_name": "opencv/opencv", - "pr_number": 26379, - "pr_file": "modules/imgcodecs/src/grfmt_jpegxl.cpp", - "discussion_id": "1820133472", - "commented_code": "@@ -0,0 +1,376 @@\n+/*M///////////////////////////////////////////////////////////////////////////////////////\n+//\n+// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.\n+//\n+// By downloading, copying, installing or using the software you agree to this license.\n+// If you do not agree to this license, do not download, install,\n+// copy or use the software.\n+//\n+//\n+// Intel License Agreement\n+// For Open Source Computer Vision Library\n+//\n+// Copyright (C) 2000, Intel Corporation, all rights reserved.\n+// Third party copyrights are property of their respective owners.\n+//\n+// Redistribution and use in source and binary forms, with or without modification,\n+// are permitted provided that the following conditions are met:\n+//\n+// * Redistribution's of source code must retain the above copyright notice,\n+// this list of conditions and the following disclaimer.\n+//\n+// * Redistribution's in binary form must reproduce the above copyright notice,\n+// this list of conditions and the following disclaimer in the documentation\n+// and/or other materials provided with the distribution.\n+//\n+// * The name of Intel Corporation may not be used to endorse or promote products\n+// derived from this software without specific prior written permission.\n+//\n+// This software is provided by the copyright holders and contributors \"as is\" and\n+// any express or implied warranties, including, but not limited to, the implied\n+// warranties of merchantability and fitness for a particular purpose are disclaimed.\n+// In no event shall the Intel Corporation or contributors be liable for any direct,\n+// indirect, incidental, special, exemplary, or consequential damages\n+// (including, but not limited to, procurement of substitute goods or services;\n+// loss of use, data, or profits; or business interruption) however caused\n+// and on any theory of liability, whether in contract, strict liability,\n+// or tort (including negligence or otherwise) arising in any way out of\n+// the use of this software, even if advised of the possibility of such damage.\n+//\n+//M*/\n+\n+#include \"precomp.hpp\"\n+#include \"grfmt_jpegxl.hpp\"\n+\n+#ifdef HAVE_JPEGXL\n+\n+#include \n+\n+namespace cv\n+{\n+\n+/////////////////////// JpegXLDecoder ///////////////////\n+\n+JpegXLDecoder::JpegXLDecoder() : m_f(nullptr, fclose)\n+{\n+ m_signature = \"\\xFF\\x0A\";\n+ m_decoder = nullptr;\n+ m_buf_supported = true;\n+ m_type = -1;\n+}\n+\n+JpegXLDecoder::~JpegXLDecoder()\n+{\n+ close();\n+}\n+\n+void JpegXLDecoder::close()\n+{\n+ if (m_decoder)\n+ m_decoder.release();\n+ if (m_f)\n+ m_f.release();\n+ m_read_buffer = {};\n+ m_width = m_height = 0;\n+ m_type = -1;\n+}\n+\n+ImageDecoder JpegXLDecoder::newDecoder() const\n+{\n+ return makePtr();\n+}\n+\n+bool JpegXLDecoder::read(Mat* pimg)\n+{\n+ // Open file\n+ if (!m_f) {\n+ m_f.reset(fopen(m_filename.c_str(), \"rb\"));\n+ if (!m_f)\n+ return false;\n+ }\n+\n+ // Initialize decoder\n+ if (!m_decoder) {\n+ m_decoder = JxlDecoderMake(nullptr);\n+ if (!m_decoder)\n+ return false;\n+ // Subscribe to the basic info event\n+ JxlDecoderStatus status = JxlDecoderSubscribeEvents(m_decoder.get(), JXL_DEC_BASIC_INFO | JXL_DEC_FULL_IMAGE);\n+ if (status != JXL_DEC_SUCCESS)\n+ return false;\n+ }\n+\n+ // Set up parallel m_parallel_runner\n+ if (!m_parallel_runner) {\n+ m_parallel_runner = JxlThreadParallelRunnerMake(nullptr, JxlThreadParallelRunnerDefaultNumWorkerThreads());", - "comment_created_at": "2024-10-29T05:56:38+00:00", - "comment_author": "asmorkalov", - "comment_body": "I propose to use `num_worker_threads = cv::getNumThreads()` to make it manageable outside. See https://docs.opencv.org/4.x/db/de0/group__core__utils.html#ga2db334ec41d98da3129ef4a2342fc4d4 ", - "pr_file_module": null - }, - { - "comment_id": "1820588823", - "repo_full_name": "opencv/opencv", - "pr_number": 26379, - "pr_file": "modules/imgcodecs/src/grfmt_jpegxl.cpp", - "discussion_id": "1820133472", - "commented_code": "@@ -0,0 +1,376 @@\n+/*M///////////////////////////////////////////////////////////////////////////////////////\n+//\n+// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.\n+//\n+// By downloading, copying, installing or using the software you agree to this license.\n+// If you do not agree to this license, do not download, install,\n+// copy or use the software.\n+//\n+//\n+// Intel License Agreement\n+// For Open Source Computer Vision Library\n+//\n+// Copyright (C) 2000, Intel Corporation, all rights reserved.\n+// Third party copyrights are property of their respective owners.\n+//\n+// Redistribution and use in source and binary forms, with or without modification,\n+// are permitted provided that the following conditions are met:\n+//\n+// * Redistribution's of source code must retain the above copyright notice,\n+// this list of conditions and the following disclaimer.\n+//\n+// * Redistribution's in binary form must reproduce the above copyright notice,\n+// this list of conditions and the following disclaimer in the documentation\n+// and/or other materials provided with the distribution.\n+//\n+// * The name of Intel Corporation may not be used to endorse or promote products\n+// derived from this software without specific prior written permission.\n+//\n+// This software is provided by the copyright holders and contributors \"as is\" and\n+// any express or implied warranties, including, but not limited to, the implied\n+// warranties of merchantability and fitness for a particular purpose are disclaimed.\n+// In no event shall the Intel Corporation or contributors be liable for any direct,\n+// indirect, incidental, special, exemplary, or consequential damages\n+// (including, but not limited to, procurement of substitute goods or services;\n+// loss of use, data, or profits; or business interruption) however caused\n+// and on any theory of liability, whether in contract, strict liability,\n+// or tort (including negligence or otherwise) arising in any way out of\n+// the use of this software, even if advised of the possibility of such damage.\n+//\n+//M*/\n+\n+#include \"precomp.hpp\"\n+#include \"grfmt_jpegxl.hpp\"\n+\n+#ifdef HAVE_JPEGXL\n+\n+#include \n+\n+namespace cv\n+{\n+\n+/////////////////////// JpegXLDecoder ///////////////////\n+\n+JpegXLDecoder::JpegXLDecoder() : m_f(nullptr, fclose)\n+{\n+ m_signature = \"\\xFF\\x0A\";\n+ m_decoder = nullptr;\n+ m_buf_supported = true;\n+ m_type = -1;\n+}\n+\n+JpegXLDecoder::~JpegXLDecoder()\n+{\n+ close();\n+}\n+\n+void JpegXLDecoder::close()\n+{\n+ if (m_decoder)\n+ m_decoder.release();\n+ if (m_f)\n+ m_f.release();\n+ m_read_buffer = {};\n+ m_width = m_height = 0;\n+ m_type = -1;\n+}\n+\n+ImageDecoder JpegXLDecoder::newDecoder() const\n+{\n+ return makePtr();\n+}\n+\n+bool JpegXLDecoder::read(Mat* pimg)\n+{\n+ // Open file\n+ if (!m_f) {\n+ m_f.reset(fopen(m_filename.c_str(), \"rb\"));\n+ if (!m_f)\n+ return false;\n+ }\n+\n+ // Initialize decoder\n+ if (!m_decoder) {\n+ m_decoder = JxlDecoderMake(nullptr);\n+ if (!m_decoder)\n+ return false;\n+ // Subscribe to the basic info event\n+ JxlDecoderStatus status = JxlDecoderSubscribeEvents(m_decoder.get(), JXL_DEC_BASIC_INFO | JXL_DEC_FULL_IMAGE);\n+ if (status != JXL_DEC_SUCCESS)\n+ return false;\n+ }\n+\n+ // Set up parallel m_parallel_runner\n+ if (!m_parallel_runner) {\n+ m_parallel_runner = JxlThreadParallelRunnerMake(nullptr, JxlThreadParallelRunnerDefaultNumWorkerThreads());", - "comment_created_at": "2024-10-29T11:05:49+00:00", - "comment_author": "cdcseacave", - "comment_body": "done", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/opencv-guard-optional-dependencies.json b/_reviewers/opencv-guard-optional-dependencies.json new file mode 100644 index 0000000..8c31c36 --- /dev/null +++ b/_reviewers/opencv-guard-optional-dependencies.json @@ -0,0 +1,124 @@ +[ + { + "discussion_id": "2097147988", + "pr_number": 25569, + "pr_file": "cmake/OpenCVGenConfig.cmake", + "created_at": "2025-05-20T07:06:39+00:00", + "commented_code": "set(OpenCV_USE_MANGLED_PATHS_CONFIGCMAKE FALSE)\nendif()\n\nif(ZLIB_FOUND)", + "repo_full_name": "opencv/opencv", + "discussion_comments": [ + { + "comment_id": "2097147988", + "repo_full_name": "opencv/opencv", + "pr_number": 25569, + "pr_file": "cmake/OpenCVGenConfig.cmake", + "discussion_id": "2097147988", + "commented_code": "@@ -11,6 +11,10 @@ else()\n set(OpenCV_USE_MANGLED_PATHS_CONFIGCMAKE FALSE)\n endif()\n \n+if(ZLIB_FOUND)", + "comment_created_at": "2025-05-20T07:06:39+00:00", + "comment_author": "opencv-alalek", + "comment_body": "> ZLIB_FOUND\r\n\r\nWrong.\r\n\r\nWe should not mess with CMakes `find_package(Zlib)` from anywhere (e.g. libiff may find own ZLIB).\r\n`HAVE_ZLIB` must be used for OpenCV usage checks.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2051463839", + "pr_number": 26906, + "pr_file": "modules/objdetect/src/mcc/checker_detector.hpp", + "created_at": "2025-04-19T12:09:47+00:00", + "commented_code": "// This file is part of OpenCV project.\n// It is subject to the license terms in the LICENSE file found in the top-level directory\n// of this distribution and at http://opencv.org/license.html.\n\n/*\n * MIT License\n *\n * Copyright (c) 2018 Pedro Diamel Marrero Fernández\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in all\n * copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n * SOFTWARE.\n */\n\n#ifndef _MCC_CHECKER_DETECTOR_HPP\n#define _MCC_CHECKER_DETECTOR_HPP\n\n#include \"opencv2/objdetect.hpp\"\n#include \"charts.hpp\"\n\nnamespace cv\n{\nnamespace mcc\n{\n\nclass CCheckerDetectorImpl : public CCheckerDetector\n{\n\n typedef std::vector PointsVector;\n typedef std::vector ContoursVector;\n\npublic:\n CCheckerDetectorImpl();\n CCheckerDetectorImpl(const dnn::Net& _net){", + "repo_full_name": "opencv/opencv", + "discussion_comments": [ + { + "comment_id": "2051463839", + "repo_full_name": "opencv/opencv", + "pr_number": 26906, + "pr_file": "modules/objdetect/src/mcc/checker_detector.hpp", + "discussion_id": "2051463839", + "commented_code": "@@ -0,0 +1,207 @@\n+// This file is part of OpenCV project.\n+// It is subject to the license terms in the LICENSE file found in the top-level directory\n+// of this distribution and at http://opencv.org/license.html.\n+\n+/*\n+ * MIT License\n+ *\n+ * Copyright (c) 2018 Pedro Diamel Marrero Fernández\n+ *\n+ * Permission is hereby granted, free of charge, to any person obtaining a copy\n+ * of this software and associated documentation files (the \"Software\"), to deal\n+ * in the Software without restriction, including without limitation the rights\n+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n+ * copies of the Software, and to permit persons to whom the Software is\n+ * furnished to do so, subject to the following conditions:\n+ *\n+ * The above copyright notice and this permission notice shall be included in all\n+ * copies or substantial portions of the Software.\n+ *\n+ * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n+ * SOFTWARE.\n+ */\n+\n+#ifndef _MCC_CHECKER_DETECTOR_HPP\n+#define _MCC_CHECKER_DETECTOR_HPP\n+\n+#include \"opencv2/objdetect.hpp\"\n+#include \"charts.hpp\"\n+\n+namespace cv\n+{\n+namespace mcc\n+{\n+\n+class CCheckerDetectorImpl : public CCheckerDetector\n+{\n+\n+ typedef std::vector PointsVector;\n+ typedef std::vector ContoursVector;\n+\n+public:\n+ CCheckerDetectorImpl();\n+ CCheckerDetectorImpl(const dnn::Net& _net){", + "comment_created_at": "2025-04-19T12:09:47+00:00", + "comment_author": "mshabunin", + "comment_body": "This PR breaks build without _opencv_dnn_ - all related code should be guarded by `#ifdef HAVE_OPENCV_DNN` or _objdetect_->_dnn_ dependency should be required.\r\n\r\n**cc** @asmorkalov ", + "pr_file_module": null + }, + { + "comment_id": "2051823993", + "repo_full_name": "opencv/opencv", + "pr_number": 26906, + "pr_file": "modules/objdetect/src/mcc/checker_detector.hpp", + "discussion_id": "2051463839", + "commented_code": "@@ -0,0 +1,207 @@\n+// This file is part of OpenCV project.\n+// It is subject to the license terms in the LICENSE file found in the top-level directory\n+// of this distribution and at http://opencv.org/license.html.\n+\n+/*\n+ * MIT License\n+ *\n+ * Copyright (c) 2018 Pedro Diamel Marrero Fernández\n+ *\n+ * Permission is hereby granted, free of charge, to any person obtaining a copy\n+ * of this software and associated documentation files (the \"Software\"), to deal\n+ * in the Software without restriction, including without limitation the rights\n+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n+ * copies of the Software, and to permit persons to whom the Software is\n+ * furnished to do so, subject to the following conditions:\n+ *\n+ * The above copyright notice and this permission notice shall be included in all\n+ * copies or substantial portions of the Software.\n+ *\n+ * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n+ * SOFTWARE.\n+ */\n+\n+#ifndef _MCC_CHECKER_DETECTOR_HPP\n+#define _MCC_CHECKER_DETECTOR_HPP\n+\n+#include \"opencv2/objdetect.hpp\"\n+#include \"charts.hpp\"\n+\n+namespace cv\n+{\n+namespace mcc\n+{\n+\n+class CCheckerDetectorImpl : public CCheckerDetector\n+{\n+\n+ typedef std::vector PointsVector;\n+ typedef std::vector ContoursVector;\n+\n+public:\n+ CCheckerDetectorImpl();\n+ CCheckerDetectorImpl(const dnn::Net& _net){", + "comment_created_at": "2025-04-20T22:32:21+00:00", + "comment_author": "gursimarsingh", + "comment_body": "Adding a fix in this PR https://github.com/opencv/opencv/pull/27246 ", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1899933291", + "pr_number": 26057, + "pr_file": "cmake/OpenCVCompilerOptions.cmake", + "created_at": "2024-12-31T06:06:22+00:00", + "commented_code": "endif()\nendif()\n\n# For 16k pages support with NDK prior 27\n# Details: https://developer.android.com/guide/practices/page-sizes?hl=en\nif(ANDROID AND ANDROID_SUPPORT_FLEXIBLE_PAGE_SIZES AND (ANDROID_ABI STREQUAL arm64-v8a OR ANDROID_ABI STREQUAL x86_64))", + "repo_full_name": "opencv/opencv", + "discussion_comments": [ + { + "comment_id": "1899933291", + "repo_full_name": "opencv/opencv", + "pr_number": 26057, + "pr_file": "cmake/OpenCVCompilerOptions.cmake", + "discussion_id": "1899933291", + "commented_code": "@@ -404,6 +404,14 @@ if(NOT OPENCV_SKIP_LINK_NO_UNDEFINED)\n endif()\n endif()\n \n+# For 16k pages support with NDK prior 27\n+# Details: https://developer.android.com/guide/practices/page-sizes?hl=en\n+if(ANDROID AND ANDROID_SUPPORT_FLEXIBLE_PAGE_SIZES AND (ANDROID_ABI STREQUAL arm64-v8a OR ANDROID_ABI STREQUAL x86_64))", + "comment_created_at": "2024-12-31T06:06:22+00:00", + "comment_author": "opencv-alalek", + "comment_body": "> ANDROID_SUPPORT_FLEXIBLE_PAGE_SIZES\r\n\r\n`OPENCV_ANDROID_SUPPORT_FLEXIBLE_PAGE_SIZES` to avoid possible conflicts", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1627890810", + "pr_number": 25608, + "pr_file": "cmake/OpenCVFindWebP.cmake", + "created_at": "2024-06-05T14:23:41+00:00", + "commented_code": "# Look for the library.\n FIND_LIBRARY(WEBP_LIBRARY NAMES webp)\n MARK_AS_ADVANCED(WEBP_LIBRARY)\n FIND_LIBRARY(WEBP_MUX_LIBRARY NAMES webpmux)\n FIND_LIBRARY(WEBP_DEMUX_LIBRARY NAMES webpdemux)\n\n # handle the QUIETLY and REQUIRED arguments and set WEBP_FOUND to TRUE if\n # all listed variables are TRUE\n INCLUDE(${CMAKE_ROOT}/Modules/FindPackageHandleStandardArgs.cmake)\n FIND_PACKAGE_HANDLE_STANDARD_ARGS(WebP DEFAULT_MSG WEBP_LIBRARY WEBP_INCLUDE_DIR)\n\n SET(WEBP_LIBRARIES ${WEBP_LIBRARY})\n \n SET(WEBP_LIBRARIES ${WEBP_LIBRARY} webpmux webpdemux)", + "repo_full_name": "opencv/opencv", + "discussion_comments": [ + { + "comment_id": "1627890810", + "repo_full_name": "opencv/opencv", + "pr_number": 25608, + "pr_file": "cmake/OpenCVFindWebP.cmake", + "discussion_id": "1627890810", + "commented_code": "@@ -21,13 +21,15 @@ else()\n \n # Look for the library.\n FIND_LIBRARY(WEBP_LIBRARY NAMES webp)\n- MARK_AS_ADVANCED(WEBP_LIBRARY)\n+ FIND_LIBRARY(WEBP_MUX_LIBRARY NAMES webpmux)\n+ FIND_LIBRARY(WEBP_DEMUX_LIBRARY NAMES webpdemux)\n \n # handle the QUIETLY and REQUIRED arguments and set WEBP_FOUND to TRUE if\n # all listed variables are TRUE\n INCLUDE(${CMAKE_ROOT}/Modules/FindPackageHandleStandardArgs.cmake)\n FIND_PACKAGE_HANDLE_STANDARD_ARGS(WebP DEFAULT_MSG WEBP_LIBRARY WEBP_INCLUDE_DIR)\n-\n- SET(WEBP_LIBRARIES ${WEBP_LIBRARY})\n+ \n+ SET(WEBP_LIBRARIES ${WEBP_LIBRARY} webpmux webpdemux)", + "comment_created_at": "2024-06-05T14:23:41+00:00", + "comment_author": "vrabaud", + "comment_body": "You should only add webpmux and webpdemux if they are found. When building WebP from third_party, it is all bundled in one library.\r\nSo:\r\n```cmake\r\nif(WEBP_MUX_LIBRARY)\r\n SET(WEBP_LIBRARIES ${WEBP_LIBRARY} ${WEBP_MUX_LIBRARY})\r\nendif()\r\n```", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1629475029", + "pr_number": 25608, + "pr_file": "cmake/OpenCVFindWebP.cmake", + "created_at": "2024-06-06T12:46:37+00:00", + "commented_code": "# Look for the library.\n FIND_LIBRARY(WEBP_LIBRARY NAMES webp)\n MARK_AS_ADVANCED(WEBP_LIBRARY)\n FIND_LIBRARY(WEBP_MUX_LIBRARY NAMES webpmux)\n FIND_LIBRARY(WEBP_DEMUX_LIBRARY NAMES webpdemux)\n\n # handle the QUIETLY and REQUIRED arguments and set WEBP_FOUND to TRUE if\n # all listed variables are TRUE\n INCLUDE(${CMAKE_ROOT}/Modules/FindPackageHandleStandardArgs.cmake)\n FIND_PACKAGE_HANDLE_STANDARD_ARGS(WebP DEFAULT_MSG WEBP_LIBRARY WEBP_INCLUDE_DIR)\n\n SET(WEBP_LIBRARIES ${WEBP_LIBRARY})\n if(WEBP_MUX_LIBRARY)", + "repo_full_name": "opencv/opencv", + "discussion_comments": [ + { + "comment_id": "1629475029", + "repo_full_name": "opencv/opencv", + "pr_number": 25608, + "pr_file": "cmake/OpenCVFindWebP.cmake", + "discussion_id": "1629475029", + "commented_code": "@@ -21,13 +21,17 @@ else()\n \n # Look for the library.\n FIND_LIBRARY(WEBP_LIBRARY NAMES webp)\n- MARK_AS_ADVANCED(WEBP_LIBRARY)\n+ FIND_LIBRARY(WEBP_MUX_LIBRARY NAMES webpmux)\n+ FIND_LIBRARY(WEBP_DEMUX_LIBRARY NAMES webpdemux)\n \n # handle the QUIETLY and REQUIRED arguments and set WEBP_FOUND to TRUE if\n # all listed variables are TRUE\n INCLUDE(${CMAKE_ROOT}/Modules/FindPackageHandleStandardArgs.cmake)\n FIND_PACKAGE_HANDLE_STANDARD_ARGS(WebP DEFAULT_MSG WEBP_LIBRARY WEBP_INCLUDE_DIR)\n \n- SET(WEBP_LIBRARIES ${WEBP_LIBRARY})\n+ if(WEBP_MUX_LIBRARY)", + "comment_created_at": "2024-06-06T12:46:37+00:00", + "comment_author": "vrabaud", + "comment_body": "SET(WEBP_LIBRARIES ${WEBP_LIBRARY})\r\n if(WEBP_MUX_LIBRARY)\r\n SET(WEBP_LIBRARIES ${WEBP_LIBRARIES} ${WEBP_MUX_LIBRARY} ${WEBP_DEMUX_LIBRARY})\r\n endif()", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/opencv-guard-optional-dependencies.md b/_reviewers/opencv-guard-optional-dependencies.md index 723aff1..29749ed 100644 --- a/_reviewers/opencv-guard-optional-dependencies.md +++ b/_reviewers/opencv-guard-optional-dependencies.md @@ -41,129 +41,3 @@ if(HAVE_CXX17 AND OPENEXR_VERSION VERSION_LESS "2.3.0") # Provide fallback or clear error endif() ``` - - -[ - { - "discussion_id": "2097147988", - "pr_number": 25569, - "pr_file": "cmake/OpenCVGenConfig.cmake", - "created_at": "2025-05-20T07:06:39+00:00", - "commented_code": "set(OpenCV_USE_MANGLED_PATHS_CONFIGCMAKE FALSE)\nendif()\n\nif(ZLIB_FOUND)", - "repo_full_name": "opencv/opencv", - "discussion_comments": [ - { - "comment_id": "2097147988", - "repo_full_name": "opencv/opencv", - "pr_number": 25569, - "pr_file": "cmake/OpenCVGenConfig.cmake", - "discussion_id": "2097147988", - "commented_code": "@@ -11,6 +11,10 @@ else()\n set(OpenCV_USE_MANGLED_PATHS_CONFIGCMAKE FALSE)\n endif()\n \n+if(ZLIB_FOUND)", - "comment_created_at": "2025-05-20T07:06:39+00:00", - "comment_author": "opencv-alalek", - "comment_body": "> ZLIB_FOUND\r\n\r\nWrong.\r\n\r\nWe should not mess with CMakes `find_package(Zlib)` from anywhere (e.g. libiff may find own ZLIB).\r\n`HAVE_ZLIB` must be used for OpenCV usage checks.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2051463839", - "pr_number": 26906, - "pr_file": "modules/objdetect/src/mcc/checker_detector.hpp", - "created_at": "2025-04-19T12:09:47+00:00", - "commented_code": "// This file is part of OpenCV project.\n// It is subject to the license terms in the LICENSE file found in the top-level directory\n// of this distribution and at http://opencv.org/license.html.\n\n/*\n * MIT License\n *\n * Copyright (c) 2018 Pedro Diamel Marrero Fern\u00e1ndez\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in all\n * copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n * SOFTWARE.\n */\n\n#ifndef _MCC_CHECKER_DETECTOR_HPP\n#define _MCC_CHECKER_DETECTOR_HPP\n\n#include \"opencv2/objdetect.hpp\"\n#include \"charts.hpp\"\n\nnamespace cv\n{\nnamespace mcc\n{\n\nclass CCheckerDetectorImpl : public CCheckerDetector\n{\n\n typedef std::vector PointsVector;\n typedef std::vector ContoursVector;\n\npublic:\n CCheckerDetectorImpl();\n CCheckerDetectorImpl(const dnn::Net& _net){", - "repo_full_name": "opencv/opencv", - "discussion_comments": [ - { - "comment_id": "2051463839", - "repo_full_name": "opencv/opencv", - "pr_number": 26906, - "pr_file": "modules/objdetect/src/mcc/checker_detector.hpp", - "discussion_id": "2051463839", - "commented_code": "@@ -0,0 +1,207 @@\n+// This file is part of OpenCV project.\n+// It is subject to the license terms in the LICENSE file found in the top-level directory\n+// of this distribution and at http://opencv.org/license.html.\n+\n+/*\n+ * MIT License\n+ *\n+ * Copyright (c) 2018 Pedro Diamel Marrero Fern\u00e1ndez\n+ *\n+ * Permission is hereby granted, free of charge, to any person obtaining a copy\n+ * of this software and associated documentation files (the \"Software\"), to deal\n+ * in the Software without restriction, including without limitation the rights\n+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n+ * copies of the Software, and to permit persons to whom the Software is\n+ * furnished to do so, subject to the following conditions:\n+ *\n+ * The above copyright notice and this permission notice shall be included in all\n+ * copies or substantial portions of the Software.\n+ *\n+ * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n+ * SOFTWARE.\n+ */\n+\n+#ifndef _MCC_CHECKER_DETECTOR_HPP\n+#define _MCC_CHECKER_DETECTOR_HPP\n+\n+#include \"opencv2/objdetect.hpp\"\n+#include \"charts.hpp\"\n+\n+namespace cv\n+{\n+namespace mcc\n+{\n+\n+class CCheckerDetectorImpl : public CCheckerDetector\n+{\n+\n+ typedef std::vector PointsVector;\n+ typedef std::vector ContoursVector;\n+\n+public:\n+ CCheckerDetectorImpl();\n+ CCheckerDetectorImpl(const dnn::Net& _net){", - "comment_created_at": "2025-04-19T12:09:47+00:00", - "comment_author": "mshabunin", - "comment_body": "This PR breaks build without _opencv_dnn_ - all related code should be guarded by `#ifdef HAVE_OPENCV_DNN` or _objdetect_->_dnn_ dependency should be required.\r\n\r\n**cc** @asmorkalov ", - "pr_file_module": null - }, - { - "comment_id": "2051823993", - "repo_full_name": "opencv/opencv", - "pr_number": 26906, - "pr_file": "modules/objdetect/src/mcc/checker_detector.hpp", - "discussion_id": "2051463839", - "commented_code": "@@ -0,0 +1,207 @@\n+// This file is part of OpenCV project.\n+// It is subject to the license terms in the LICENSE file found in the top-level directory\n+// of this distribution and at http://opencv.org/license.html.\n+\n+/*\n+ * MIT License\n+ *\n+ * Copyright (c) 2018 Pedro Diamel Marrero Fern\u00e1ndez\n+ *\n+ * Permission is hereby granted, free of charge, to any person obtaining a copy\n+ * of this software and associated documentation files (the \"Software\"), to deal\n+ * in the Software without restriction, including without limitation the rights\n+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n+ * copies of the Software, and to permit persons to whom the Software is\n+ * furnished to do so, subject to the following conditions:\n+ *\n+ * The above copyright notice and this permission notice shall be included in all\n+ * copies or substantial portions of the Software.\n+ *\n+ * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n+ * SOFTWARE.\n+ */\n+\n+#ifndef _MCC_CHECKER_DETECTOR_HPP\n+#define _MCC_CHECKER_DETECTOR_HPP\n+\n+#include \"opencv2/objdetect.hpp\"\n+#include \"charts.hpp\"\n+\n+namespace cv\n+{\n+namespace mcc\n+{\n+\n+class CCheckerDetectorImpl : public CCheckerDetector\n+{\n+\n+ typedef std::vector PointsVector;\n+ typedef std::vector ContoursVector;\n+\n+public:\n+ CCheckerDetectorImpl();\n+ CCheckerDetectorImpl(const dnn::Net& _net){", - "comment_created_at": "2025-04-20T22:32:21+00:00", - "comment_author": "gursimarsingh", - "comment_body": "Adding a fix in this PR https://github.com/opencv/opencv/pull/27246 ", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1899933291", - "pr_number": 26057, - "pr_file": "cmake/OpenCVCompilerOptions.cmake", - "created_at": "2024-12-31T06:06:22+00:00", - "commented_code": "endif()\nendif()\n\n# For 16k pages support with NDK prior 27\n# Details: https://developer.android.com/guide/practices/page-sizes?hl=en\nif(ANDROID AND ANDROID_SUPPORT_FLEXIBLE_PAGE_SIZES AND (ANDROID_ABI STREQUAL arm64-v8a OR ANDROID_ABI STREQUAL x86_64))", - "repo_full_name": "opencv/opencv", - "discussion_comments": [ - { - "comment_id": "1899933291", - "repo_full_name": "opencv/opencv", - "pr_number": 26057, - "pr_file": "cmake/OpenCVCompilerOptions.cmake", - "discussion_id": "1899933291", - "commented_code": "@@ -404,6 +404,14 @@ if(NOT OPENCV_SKIP_LINK_NO_UNDEFINED)\n endif()\n endif()\n \n+# For 16k pages support with NDK prior 27\n+# Details: https://developer.android.com/guide/practices/page-sizes?hl=en\n+if(ANDROID AND ANDROID_SUPPORT_FLEXIBLE_PAGE_SIZES AND (ANDROID_ABI STREQUAL arm64-v8a OR ANDROID_ABI STREQUAL x86_64))", - "comment_created_at": "2024-12-31T06:06:22+00:00", - "comment_author": "opencv-alalek", - "comment_body": "> ANDROID_SUPPORT_FLEXIBLE_PAGE_SIZES\r\n\r\n`OPENCV_ANDROID_SUPPORT_FLEXIBLE_PAGE_SIZES` to avoid possible conflicts", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1627890810", - "pr_number": 25608, - "pr_file": "cmake/OpenCVFindWebP.cmake", - "created_at": "2024-06-05T14:23:41+00:00", - "commented_code": "# Look for the library.\n FIND_LIBRARY(WEBP_LIBRARY NAMES webp)\n MARK_AS_ADVANCED(WEBP_LIBRARY)\n FIND_LIBRARY(WEBP_MUX_LIBRARY NAMES webpmux)\n FIND_LIBRARY(WEBP_DEMUX_LIBRARY NAMES webpdemux)\n\n # handle the QUIETLY and REQUIRED arguments and set WEBP_FOUND to TRUE if\n # all listed variables are TRUE\n INCLUDE(${CMAKE_ROOT}/Modules/FindPackageHandleStandardArgs.cmake)\n FIND_PACKAGE_HANDLE_STANDARD_ARGS(WebP DEFAULT_MSG WEBP_LIBRARY WEBP_INCLUDE_DIR)\n\n SET(WEBP_LIBRARIES ${WEBP_LIBRARY})\n \n SET(WEBP_LIBRARIES ${WEBP_LIBRARY} webpmux webpdemux)", - "repo_full_name": "opencv/opencv", - "discussion_comments": [ - { - "comment_id": "1627890810", - "repo_full_name": "opencv/opencv", - "pr_number": 25608, - "pr_file": "cmake/OpenCVFindWebP.cmake", - "discussion_id": "1627890810", - "commented_code": "@@ -21,13 +21,15 @@ else()\n \n # Look for the library.\n FIND_LIBRARY(WEBP_LIBRARY NAMES webp)\n- MARK_AS_ADVANCED(WEBP_LIBRARY)\n+ FIND_LIBRARY(WEBP_MUX_LIBRARY NAMES webpmux)\n+ FIND_LIBRARY(WEBP_DEMUX_LIBRARY NAMES webpdemux)\n \n # handle the QUIETLY and REQUIRED arguments and set WEBP_FOUND to TRUE if\n # all listed variables are TRUE\n INCLUDE(${CMAKE_ROOT}/Modules/FindPackageHandleStandardArgs.cmake)\n FIND_PACKAGE_HANDLE_STANDARD_ARGS(WebP DEFAULT_MSG WEBP_LIBRARY WEBP_INCLUDE_DIR)\n-\n- SET(WEBP_LIBRARIES ${WEBP_LIBRARY})\n+ \n+ SET(WEBP_LIBRARIES ${WEBP_LIBRARY} webpmux webpdemux)", - "comment_created_at": "2024-06-05T14:23:41+00:00", - "comment_author": "vrabaud", - "comment_body": "You should only add webpmux and webpdemux if they are found. When building WebP from third_party, it is all bundled in one library.\r\nSo:\r\n```cmake\r\nif(WEBP_MUX_LIBRARY)\r\n SET(WEBP_LIBRARIES ${WEBP_LIBRARY} ${WEBP_MUX_LIBRARY})\r\nendif()\r\n```", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1629475029", - "pr_number": 25608, - "pr_file": "cmake/OpenCVFindWebP.cmake", - "created_at": "2024-06-06T12:46:37+00:00", - "commented_code": "# Look for the library.\n FIND_LIBRARY(WEBP_LIBRARY NAMES webp)\n MARK_AS_ADVANCED(WEBP_LIBRARY)\n FIND_LIBRARY(WEBP_MUX_LIBRARY NAMES webpmux)\n FIND_LIBRARY(WEBP_DEMUX_LIBRARY NAMES webpdemux)\n\n # handle the QUIETLY and REQUIRED arguments and set WEBP_FOUND to TRUE if\n # all listed variables are TRUE\n INCLUDE(${CMAKE_ROOT}/Modules/FindPackageHandleStandardArgs.cmake)\n FIND_PACKAGE_HANDLE_STANDARD_ARGS(WebP DEFAULT_MSG WEBP_LIBRARY WEBP_INCLUDE_DIR)\n\n SET(WEBP_LIBRARIES ${WEBP_LIBRARY})\n if(WEBP_MUX_LIBRARY)", - "repo_full_name": "opencv/opencv", - "discussion_comments": [ - { - "comment_id": "1629475029", - "repo_full_name": "opencv/opencv", - "pr_number": 25608, - "pr_file": "cmake/OpenCVFindWebP.cmake", - "discussion_id": "1629475029", - "commented_code": "@@ -21,13 +21,17 @@ else()\n \n # Look for the library.\n FIND_LIBRARY(WEBP_LIBRARY NAMES webp)\n- MARK_AS_ADVANCED(WEBP_LIBRARY)\n+ FIND_LIBRARY(WEBP_MUX_LIBRARY NAMES webpmux)\n+ FIND_LIBRARY(WEBP_DEMUX_LIBRARY NAMES webpdemux)\n \n # handle the QUIETLY and REQUIRED arguments and set WEBP_FOUND to TRUE if\n # all listed variables are TRUE\n INCLUDE(${CMAKE_ROOT}/Modules/FindPackageHandleStandardArgs.cmake)\n FIND_PACKAGE_HANDLE_STANDARD_ARGS(WebP DEFAULT_MSG WEBP_LIBRARY WEBP_INCLUDE_DIR)\n \n- SET(WEBP_LIBRARIES ${WEBP_LIBRARY})\n+ if(WEBP_MUX_LIBRARY)", - "comment_created_at": "2024-06-06T12:46:37+00:00", - "comment_author": "vrabaud", - "comment_body": "SET(WEBP_LIBRARIES ${WEBP_LIBRARY})\r\n if(WEBP_MUX_LIBRARY)\r\n SET(WEBP_LIBRARIES ${WEBP_LIBRARIES} ${WEBP_MUX_LIBRARY} ${WEBP_DEMUX_LIBRARY})\r\n endif()", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/opencv-maintain-build-compatibility.json b/_reviewers/opencv-maintain-build-compatibility.json new file mode 100644 index 0000000..b618f71 --- /dev/null +++ b/_reviewers/opencv-maintain-build-compatibility.json @@ -0,0 +1,82 @@ +[ + { + "discussion_id": "1644650533", + "pr_number": 25569, + "pr_file": "3rdparty/zlib-ng/CMakeLists.txt", + "created_at": "2024-06-18T15:21:31+00:00", + "commented_code": "set_target_properties(${ZLIB_LIBRARY} PROPERTIES FOLDER \"3rdparty\")\nendif()\n\nif(NOT BUILD_SHARED_LIBS)\n ocv_install_target(${ZLIB_LIBRARY} EXPORT OpenCVModules ARCHIVE DESTINATION ${OPENCV_3P_LIB_INSTALL_PATH} COMPONENT dev)\nendif()", + "repo_full_name": "opencv/opencv", + "discussion_comments": [ + { + "comment_id": "1644650533", + "repo_full_name": "opencv/opencv", + "pr_number": 25569, + "pr_file": "3rdparty/zlib-ng/CMakeLists.txt", + "discussion_id": "1644650533", + "commented_code": "@@ -789,8 +789,4 @@ if(ENABLE_SOLUTION_FOLDERS)\n set_target_properties(${ZLIB_LIBRARY} PROPERTIES FOLDER \"3rdparty\")\n endif()\n \n-if(NOT BUILD_SHARED_LIBS)\n- ocv_install_target(${ZLIB_LIBRARY} EXPORT OpenCVModules ARCHIVE DESTINATION ${OPENCV_3P_LIB_INSTALL_PATH} COMPONENT dev)\n-endif()", + "comment_created_at": "2024-06-18T15:21:31+00:00", + "comment_author": "asmorkalov", + "comment_body": "It should break static linkage, if OpenCV is build against own zlib-ng, but not system-wide.", + "pr_file_module": null + }, + { + "comment_id": "1652983240", + "repo_full_name": "opencv/opencv", + "pr_number": 25569, + "pr_file": "3rdparty/zlib-ng/CMakeLists.txt", + "discussion_id": "1644650533", + "commented_code": "@@ -789,8 +789,4 @@ if(ENABLE_SOLUTION_FOLDERS)\n set_target_properties(${ZLIB_LIBRARY} PROPERTIES FOLDER \"3rdparty\")\n endif()\n \n-if(NOT BUILD_SHARED_LIBS)\n- ocv_install_target(${ZLIB_LIBRARY} EXPORT OpenCVModules ARCHIVE DESTINATION ${OPENCV_3P_LIB_INSTALL_PATH} COMPONENT dev)\n-endif()", + "comment_created_at": "2024-06-25T14:50:47+00:00", + "comment_author": "FantasqueX", + "comment_body": "Sorry for late reply. This `ocv_install_target` statement isn't removed. It is moved to outer space because both static linkage and dynamic linkage need this statement in Android build. So, this will only affect dynamic linkage.", + "pr_file_module": null + }, + { + "comment_id": "1661358623", + "repo_full_name": "opencv/opencv", + "pr_number": 25569, + "pr_file": "3rdparty/zlib-ng/CMakeLists.txt", + "discussion_id": "1644650533", + "commented_code": "@@ -789,8 +789,4 @@ if(ENABLE_SOLUTION_FOLDERS)\n set_target_properties(${ZLIB_LIBRARY} PROPERTIES FOLDER \"3rdparty\")\n endif()\n \n-if(NOT BUILD_SHARED_LIBS)\n- ocv_install_target(${ZLIB_LIBRARY} EXPORT OpenCVModules ARCHIVE DESTINATION ${OPENCV_3P_LIB_INSTALL_PATH} COMPONENT dev)\n-endif()", + "comment_created_at": "2024-07-01T17:38:37+00:00", + "comment_author": "opencv-alalek", + "comment_body": "We should not redistribute system or sysroot libraries even for static OpenCV builds. Only own 3rdparty built binaries are redistributed.\r\n\r\nWhich builder configuration does check of your modification?", + "pr_file_module": null + }, + { + "comment_id": "1661770972", + "repo_full_name": "opencv/opencv", + "pr_number": 25569, + "pr_file": "3rdparty/zlib-ng/CMakeLists.txt", + "discussion_id": "1644650533", + "commented_code": "@@ -789,8 +789,4 @@ if(ENABLE_SOLUTION_FOLDERS)\n set_target_properties(${ZLIB_LIBRARY} PROPERTIES FOLDER \"3rdparty\")\n endif()\n \n-if(NOT BUILD_SHARED_LIBS)\n- ocv_install_target(${ZLIB_LIBRARY} EXPORT OpenCVModules ARCHIVE DESTINATION ${OPENCV_3P_LIB_INSTALL_PATH} COMPONENT dev)\n-endif()", + "comment_created_at": "2024-07-02T03:29:13+00:00", + "comment_author": "FantasqueX", + "comment_body": "https://github.com/opencv/ci-gha-workflow/blob/main/.github/workflows/OCV-PR-4.x-Android-Test.yaml\r\nHowever, I cannot find previous build failure log.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2137746431", + "pr_number": 27428, + "pr_file": "modules/dnn/CMakeLists.txt", + "created_at": "2025-06-10T12:19:34+00:00", + "commented_code": "if(PROTOBUF_UPDATE_FILES)\n file(GLOB proto_files \"${CMAKE_CURRENT_LIST_DIR}/src/tensorflow/*.proto\" \"${CMAKE_CURRENT_LIST_DIR}/src/caffe/opencv-caffe.proto\" \"${CMAKE_CURRENT_LIST_DIR}/src/onnx/opencv-onnx.proto\")\n set(PROTOBUF_GENERATE_CPP_APPEND_PATH ON) # required for tensorflow\n protobuf_generate_cpp(fw_srcs fw_hdrs ${proto_files})\n protobuf_generate(\n APPEND_PATH # required for tensorflow\n LANGUAGE cpp\n IMPORT_DIRS ${Protobuf_IMPORT_DIRS}\n OUT_VAR fw_srcs\n PROTOC_EXE ${Protobuf_PROTOC_EXECUTABLE}\n PROTOS ${proto_files})\n set(fw_hdrs \"${fw_srcs}\")\n # separate the header files and source files\n list(FILTER fw_srcs EXCLUDE REGEX \".+\\.h$\")\n list(FILTER fw_hdrs INCLUDE REGEX \".+\\.h$\")", + "repo_full_name": "opencv/opencv", + "discussion_comments": [ + { + "comment_id": "2137746431", + "repo_full_name": "opencv/opencv", + "pr_number": 27428, + "pr_file": "modules/dnn/CMakeLists.txt", + "discussion_id": "2137746431", + "commented_code": "@@ -115,8 +115,17 @@ if(HAVE_PROTOBUF)\n \n if(PROTOBUF_UPDATE_FILES)\n file(GLOB proto_files \"${CMAKE_CURRENT_LIST_DIR}/src/tensorflow/*.proto\" \"${CMAKE_CURRENT_LIST_DIR}/src/caffe/opencv-caffe.proto\" \"${CMAKE_CURRENT_LIST_DIR}/src/onnx/opencv-onnx.proto\")\n- set(PROTOBUF_GENERATE_CPP_APPEND_PATH ON) # required for tensorflow\n- protobuf_generate_cpp(fw_srcs fw_hdrs ${proto_files})\n+ protobuf_generate(\n+ APPEND_PATH # required for tensorflow\n+ LANGUAGE cpp\n+ IMPORT_DIRS ${Protobuf_IMPORT_DIRS}\n+ OUT_VAR fw_srcs\n+ PROTOC_EXE ${Protobuf_PROTOC_EXECUTABLE}\n+ PROTOS ${proto_files})\n+ set(fw_hdrs \"${fw_srcs}\")\n+ # separate the header files and source files\n+ list(FILTER fw_srcs EXCLUDE REGEX \".+\\.h$\")\n+ list(FILTER fw_hdrs INCLUDE REGEX \".+\\.h$\")", + "comment_created_at": "2025-06-10T12:19:34+00:00", + "comment_author": "asmorkalov", + "comment_body": "> protobuf_generate Added in version 3.13.\r\n\r\nIt breaks build with older CMake. I propose to add CMake version check and presume the old branch for old CMake.\r\n", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/opencv-maintain-build-compatibility.md b/_reviewers/opencv-maintain-build-compatibility.md index b96fdbc..ee349eb 100644 --- a/_reviewers/opencv-maintain-build-compatibility.md +++ b/_reviewers/opencv-maintain-build-compatibility.md @@ -36,87 +36,3 @@ endif() ``` Always reference the specific CI workflow that validates your build changes (e.g., "This change is tested by the workflow at .github/workflows/OCV-PR-4.x-Android-Test.yaml") to demonstrate the build continues to work across all supported platforms and configurations. - - -[ - { - "discussion_id": "1644650533", - "pr_number": 25569, - "pr_file": "3rdparty/zlib-ng/CMakeLists.txt", - "created_at": "2024-06-18T15:21:31+00:00", - "commented_code": "set_target_properties(${ZLIB_LIBRARY} PROPERTIES FOLDER \"3rdparty\")\nendif()\n\nif(NOT BUILD_SHARED_LIBS)\n ocv_install_target(${ZLIB_LIBRARY} EXPORT OpenCVModules ARCHIVE DESTINATION ${OPENCV_3P_LIB_INSTALL_PATH} COMPONENT dev)\nendif()", - "repo_full_name": "opencv/opencv", - "discussion_comments": [ - { - "comment_id": "1644650533", - "repo_full_name": "opencv/opencv", - "pr_number": 25569, - "pr_file": "3rdparty/zlib-ng/CMakeLists.txt", - "discussion_id": "1644650533", - "commented_code": "@@ -789,8 +789,4 @@ if(ENABLE_SOLUTION_FOLDERS)\n set_target_properties(${ZLIB_LIBRARY} PROPERTIES FOLDER \"3rdparty\")\n endif()\n \n-if(NOT BUILD_SHARED_LIBS)\n- ocv_install_target(${ZLIB_LIBRARY} EXPORT OpenCVModules ARCHIVE DESTINATION ${OPENCV_3P_LIB_INSTALL_PATH} COMPONENT dev)\n-endif()", - "comment_created_at": "2024-06-18T15:21:31+00:00", - "comment_author": "asmorkalov", - "comment_body": "It should break static linkage, if OpenCV is build against own zlib-ng, but not system-wide.", - "pr_file_module": null - }, - { - "comment_id": "1652983240", - "repo_full_name": "opencv/opencv", - "pr_number": 25569, - "pr_file": "3rdparty/zlib-ng/CMakeLists.txt", - "discussion_id": "1644650533", - "commented_code": "@@ -789,8 +789,4 @@ if(ENABLE_SOLUTION_FOLDERS)\n set_target_properties(${ZLIB_LIBRARY} PROPERTIES FOLDER \"3rdparty\")\n endif()\n \n-if(NOT BUILD_SHARED_LIBS)\n- ocv_install_target(${ZLIB_LIBRARY} EXPORT OpenCVModules ARCHIVE DESTINATION ${OPENCV_3P_LIB_INSTALL_PATH} COMPONENT dev)\n-endif()", - "comment_created_at": "2024-06-25T14:50:47+00:00", - "comment_author": "FantasqueX", - "comment_body": "Sorry for late reply. This `ocv_install_target` statement isn't removed. It is moved to outer space because both static linkage and dynamic linkage need this statement in Android build. So, this will only affect dynamic linkage.", - "pr_file_module": null - }, - { - "comment_id": "1661358623", - "repo_full_name": "opencv/opencv", - "pr_number": 25569, - "pr_file": "3rdparty/zlib-ng/CMakeLists.txt", - "discussion_id": "1644650533", - "commented_code": "@@ -789,8 +789,4 @@ if(ENABLE_SOLUTION_FOLDERS)\n set_target_properties(${ZLIB_LIBRARY} PROPERTIES FOLDER \"3rdparty\")\n endif()\n \n-if(NOT BUILD_SHARED_LIBS)\n- ocv_install_target(${ZLIB_LIBRARY} EXPORT OpenCVModules ARCHIVE DESTINATION ${OPENCV_3P_LIB_INSTALL_PATH} COMPONENT dev)\n-endif()", - "comment_created_at": "2024-07-01T17:38:37+00:00", - "comment_author": "opencv-alalek", - "comment_body": "We should not redistribute system or sysroot libraries even for static OpenCV builds. Only own 3rdparty built binaries are redistributed.\r\n\r\nWhich builder configuration does check of your modification?", - "pr_file_module": null - }, - { - "comment_id": "1661770972", - "repo_full_name": "opencv/opencv", - "pr_number": 25569, - "pr_file": "3rdparty/zlib-ng/CMakeLists.txt", - "discussion_id": "1644650533", - "commented_code": "@@ -789,8 +789,4 @@ if(ENABLE_SOLUTION_FOLDERS)\n set_target_properties(${ZLIB_LIBRARY} PROPERTIES FOLDER \"3rdparty\")\n endif()\n \n-if(NOT BUILD_SHARED_LIBS)\n- ocv_install_target(${ZLIB_LIBRARY} EXPORT OpenCVModules ARCHIVE DESTINATION ${OPENCV_3P_LIB_INSTALL_PATH} COMPONENT dev)\n-endif()", - "comment_created_at": "2024-07-02T03:29:13+00:00", - "comment_author": "FantasqueX", - "comment_body": "https://github.com/opencv/ci-gha-workflow/blob/main/.github/workflows/OCV-PR-4.x-Android-Test.yaml\r\nHowever, I cannot find previous build failure log.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2137746431", - "pr_number": 27428, - "pr_file": "modules/dnn/CMakeLists.txt", - "created_at": "2025-06-10T12:19:34+00:00", - "commented_code": "if(PROTOBUF_UPDATE_FILES)\n file(GLOB proto_files \"${CMAKE_CURRENT_LIST_DIR}/src/tensorflow/*.proto\" \"${CMAKE_CURRENT_LIST_DIR}/src/caffe/opencv-caffe.proto\" \"${CMAKE_CURRENT_LIST_DIR}/src/onnx/opencv-onnx.proto\")\n set(PROTOBUF_GENERATE_CPP_APPEND_PATH ON) # required for tensorflow\n protobuf_generate_cpp(fw_srcs fw_hdrs ${proto_files})\n protobuf_generate(\n APPEND_PATH # required for tensorflow\n LANGUAGE cpp\n IMPORT_DIRS ${Protobuf_IMPORT_DIRS}\n OUT_VAR fw_srcs\n PROTOC_EXE ${Protobuf_PROTOC_EXECUTABLE}\n PROTOS ${proto_files})\n set(fw_hdrs \"${fw_srcs}\")\n # separate the header files and source files\n list(FILTER fw_srcs EXCLUDE REGEX \".+\\.h$\")\n list(FILTER fw_hdrs INCLUDE REGEX \".+\\.h$\")", - "repo_full_name": "opencv/opencv", - "discussion_comments": [ - { - "comment_id": "2137746431", - "repo_full_name": "opencv/opencv", - "pr_number": 27428, - "pr_file": "modules/dnn/CMakeLists.txt", - "discussion_id": "2137746431", - "commented_code": "@@ -115,8 +115,17 @@ if(HAVE_PROTOBUF)\n \n if(PROTOBUF_UPDATE_FILES)\n file(GLOB proto_files \"${CMAKE_CURRENT_LIST_DIR}/src/tensorflow/*.proto\" \"${CMAKE_CURRENT_LIST_DIR}/src/caffe/opencv-caffe.proto\" \"${CMAKE_CURRENT_LIST_DIR}/src/onnx/opencv-onnx.proto\")\n- set(PROTOBUF_GENERATE_CPP_APPEND_PATH ON) # required for tensorflow\n- protobuf_generate_cpp(fw_srcs fw_hdrs ${proto_files})\n+ protobuf_generate(\n+ APPEND_PATH # required for tensorflow\n+ LANGUAGE cpp\n+ IMPORT_DIRS ${Protobuf_IMPORT_DIRS}\n+ OUT_VAR fw_srcs\n+ PROTOC_EXE ${Protobuf_PROTOC_EXECUTABLE}\n+ PROTOS ${proto_files})\n+ set(fw_hdrs \"${fw_srcs}\")\n+ # separate the header files and source files\n+ list(FILTER fw_srcs EXCLUDE REGEX \".+\\.h$\")\n+ list(FILTER fw_hdrs INCLUDE REGEX \".+\\.h$\")", - "comment_created_at": "2025-06-10T12:19:34+00:00", - "comment_author": "asmorkalov", - "comment_body": "> protobuf_generate Added in version 3.13.\r\n\r\nIt breaks build with older CMake. I propose to add CMake version check and presume the old branch for old CMake.\r\n", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/opencv-maintain-code-consistency.json b/_reviewers/opencv-maintain-code-consistency.json new file mode 100644 index 0000000..49979b3 --- /dev/null +++ b/_reviewers/opencv-maintain-code-consistency.json @@ -0,0 +1,112 @@ +[ + { + "discussion_id": "2024754221", + "pr_number": 27051, + "pr_file": "modules/photo/test/test_precomp.hpp", + "created_at": "2025-04-02T12:46:24+00:00", + "commented_code": "#include \"opencv2/ts/ocl_test.hpp\"\n#include \"opencv2/photo.hpp\"\n\nnamespace opencv_test\n{\nusing namespace cv::ccm;", + "repo_full_name": "opencv/opencv", + "discussion_comments": [ + { + "comment_id": "2024754221", + "repo_full_name": "opencv/opencv", + "pr_number": 27051, + "pr_file": "modules/photo/test/test_precomp.hpp", + "discussion_id": "2024754221", + "commented_code": "@@ -8,4 +8,9 @@\n #include \"opencv2/ts/ocl_test.hpp\"\n #include \"opencv2/photo.hpp\"\n \n+namespace opencv_test\n+{\n+using namespace cv::ccm;", + "comment_created_at": "2025-04-02T12:46:24+00:00", + "comment_author": "asmorkalov", + "comment_body": "using namespace is very bad practice. It affects everything, even, if the header is not included directly.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1450246565", + "pr_number": 24768, + "pr_file": "modules/dnn/src/vkcom/shader/nary_eltwise_binary_forward.comp", + "created_at": "2024-01-12T11:01:09+00:00", + "commented_code": "#version 450\n// #extension GL_EXT_debug_printf : enable\n#define ALL_THREAD 1024\n// #define ALL_THREAD 128 // Experimental batched operation\n#define STEP_SIZE 65536\n\nlayout(binding = 0) readonly buffer Input1{\n float matA[];\n};\n\nlayout(binding = 1) readonly buffer Input2{\n float matB[];\n};\n\nlayout(binding = 2) writeonly buffer Output{\n float matOut[];\n};\n\nlayout(binding = 3) uniform Params {\n int opType;\n int ndims;\n} params;\n\nlayout(binding = 4) readonly buffer Shape {\n int shape[];\n};\n\nlayout(binding = 5) readonly buffer Step {\n int matStep[];\n};\n\n/* local_size_x, local_size_y, local_size_z there defines the number of invocations\n of this compute shader in the current work group. */\n// TODO: Check if this makes any sense\n// TODO: Check if it is required to fetch PhysicalDeviceLimit from Context\n// TODO: here we shall assume that maxGroupInvocation is 1024.\nlayout(local_size_x = ALL_THREAD, local_size_y = 1, local_size_z = 1) in; // TODO: Check if this makes any sense\n\nconst int AND = 0;\nconst int EQUAL = 1;\nconst int GREATER = 2;\nconst int GREATER_EQUAL = 3;\nconst int LESS = 4;\nconst int LESS_EQUAL = 5;\nconst int OR = 6;\nconst int POW = 7;\nconst int XOR = 8;\nconst int BITSHIFT = 9;\nconst int MAX = 10;\nconst int MEAN = 11;\nconst int MIN = 12;\nconst int MOD = 13;\nconst int PROD = 14;\nconst int SUB = 15;\nconst int SUM = 16;\nconst int ADD = 17;\nconst int DIV = 18;\nconst int WHERE = 19;\n\nvoid binary_forward()\n{\n int ndims = params.ndims;\n int dp1 = matStep[2 * ndims - 1];\n int dp2 = matStep[3 * ndims - 1];\n int dp = matStep[ndims - 1];\n int n1 = shape[ndims - 1], n2 = shape[ndims - 2];\n\n int plane_idx = int(gl_WorkGroupID.x);\n\n int ptr1 = 0;\n int ptr2 = 0;\n int ptr = 0;\n int idx = plane_idx;\n\n for (int k = ndims - 3; k >= 0; --k) {\n int next_idx = idx / shape[k];\n int i_k = idx - next_idx * shape[k]; // i_k = idx % shape[k]\n ptr1 += i_k * matStep[ndims + k];\n ptr2 += i_k * matStep[2 * ndims + k];\n ptr += i_k * matStep[k];\n idx = next_idx;\n }\n\n int i2_offset = int(gl_WorkGroupID.y);\n int i1_offset = int(gl_LocalInvocationID.x);\n\n ptr1 += i2_offset * matStep[2 * ndims - 2];\n ptr2 += i2_offset * matStep[3 * ndims - 2];\n ptr += i2_offset * matStep[ndims - 2];\n\n/*\n if (dp1 == 1 && dp2 == 1 && dp == 1) {\n for (int i1 = i1_offset; i1 < n1; i1 += ALL_THREAD) {\n perform_op(&matA + ptr1 + i1, &matB + ptr2 + i1, &matOut + ptr + i1);\n }\n } else if (dp1 == 1 && dp2 == 0 && dp == 1) {\n float k = matB[ptr2];\n for (int i1 = i1_offset; i1 < n1; i1 += ALL_THREAD) {\n perform_op(&matA + ptr1 + i1, &matB + ptr2, &matOut + ptr + i1);\n }\n } else if (dp1 == 0 && dp2 == 1 && dp == 1) {\n float k = matA[ptr1];\n for (int i1 = i1_offset; i1 < n1; i1 += ALL_THREAD) {\n perform_op(&matA + ptr1, &matB + ptr2 + i1, &matOut + ptr + i1);\n }\n } else {\n for (int i1 = i1_offset; i1 < n1; i1 += ALL_THREAD) {\n perform_op(&matA + ptr1 + i1 * dp1, &matB + ptr2 + i1 * dp2, &matOut + ptr + i1 * dp);\n }\n }\n*/\n for (int i1 = i1_offset; i1 < n1; i1 += ALL_THREAD) {\n switch (params.opType) {\n case int(ADD):\n matOut[ptr + i1 * dp] = matA[ptr1 + i1 * dp1] + matB[ptr2 + i1 * dp2];\n break;\n case int(SUB):\n matOut[ptr + i1 * dp] = matA[ptr1 + i1 * dp1] - matB[ptr2 + i1 * dp2];\n break;\n case int(PROD):\n matOut[ptr + i1 * dp] = matA[ptr1 + i1 * dp1] * matB[ptr2 + i1 * dp2];\n break;\n case int(DIV):\n matOut[ptr + i1 * dp] = matA[ptr1 + i1 * dp1] / matB[ptr2 + i1 * dp2];\n break;\n }\n }\n\n // Experimental Batched Operation\n\n // int threadID = int(gl_LocalInvocationID.x);\n // int i2_offset = (int(gl_WorkGroupID.y)) * STEP_SIZE + threadID;\n // int i1_offset = (int(gl_WorkGroupID.z)) * STEP_SIZE;\n\n // ptr1 += i2_offset * matStep[2 * ndims - 2] + i1_offset * dp1;\n // ptr2 += i2_offset * matStep[3 * ndims - 2] + i1_offset * dp2;\n // ptr += i2_offset * matStep[ndims - 2] + i1_offset * dp;\n\n\n // int i2_loop_count = STEP_SIZE / ALL_THREAD;\n // for (int i2_c = 0; i2_offset < n2 && i2_c < i2_loop_count; i2_offset += ALL_THREAD, ++i2_c)\n // {\n // if (dp1 == 1 && dp2 == 1 && dp == 1) {\n // for (int i1 = 0; i1 < STEP_SIZE; ++i1) {\n // if (ptr + i1 < n1) {\n // matOut[ptr + i1] = matA[ptr1 + i1] + matB[ptr2 + i1];\n // }\n // }\n // } else if (dp1 == 1 && dp2 == 0 && dp == 1) {\n // float k = matB[ptr2];\n // for (int i1 = 0; i1 < STEP_SIZE; ++i1) {\n // if (ptr + i1 < n1) {\n // matOut[ptr + i1] = matA[ptr1 + i1] + k;\n // }\n // }\n // } else if (dp1 == 0 && dp2 == 1 && dp == 1) {\n // float k = matA[ptr1];\n // for (int i1 = 0; i1 < STEP_SIZE; ++i1) {\n // if (ptr + i1 < n1) {\n // matOut[ptr + i1] = k + matB[ptr2 + i1];\n // }\n // }\n // } else {\n // for (int i1 = 0; i1 < STEP_SIZE; ++i1) {\n // if (ptr + i1 < n1) {\n // matOut[ptr + i1 * dp] = matA[ptr1 + i1 * dp1] + matB[ptr2 + i1 * dp2];\n // }\n // }\n // }\n // ptr1 += ALL_THREAD * matStep[2 * ndims - 2];\n // ptr2 += ALL_THREAD * matStep[3 * ndims - 2];\n // ptr += ALL_THREAD * matStep[ndims - 2];\n // }", + "repo_full_name": "opencv/opencv", + "discussion_comments": [ + { + "comment_id": "1450246565", + "repo_full_name": "opencv/opencv", + "pr_number": 24768, + "pr_file": "modules/dnn/src/vkcom/shader/nary_eltwise_binary_forward.comp", + "discussion_id": "1450246565", + "commented_code": "@@ -0,0 +1,182 @@\n+#version 450\n+// #extension GL_EXT_debug_printf : enable\n+#define ALL_THREAD 1024\n+// #define ALL_THREAD 128 // Experimental batched operation\n+#define STEP_SIZE 65536\n+\n+layout(binding = 0) readonly buffer Input1{\n+ float matA[];\n+};\n+\n+layout(binding = 1) readonly buffer Input2{\n+ float matB[];\n+};\n+\n+layout(binding = 2) writeonly buffer Output{\n+ float matOut[];\n+};\n+\n+layout(binding = 3) uniform Params {\n+ int opType;\n+ int ndims;\n+} params;\n+\n+layout(binding = 4) readonly buffer Shape {\n+ int shape[];\n+};\n+\n+layout(binding = 5) readonly buffer Step {\n+ int matStep[];\n+};\n+\n+/* local_size_x, local_size_y, local_size_z there defines the number of invocations\n+ of this compute shader in the current work group. */\n+// TODO: Check if this makes any sense\n+// TODO: Check if it is required to fetch PhysicalDeviceLimit from Context\n+// TODO: here we shall assume that maxGroupInvocation is 1024.\n+layout(local_size_x = ALL_THREAD, local_size_y = 1, local_size_z = 1) in; // TODO: Check if this makes any sense\n+\n+const int AND = 0;\n+const int EQUAL = 1;\n+const int GREATER = 2;\n+const int GREATER_EQUAL = 3;\n+const int LESS = 4;\n+const int LESS_EQUAL = 5;\n+const int OR = 6;\n+const int POW = 7;\n+const int XOR = 8;\n+const int BITSHIFT = 9;\n+const int MAX = 10;\n+const int MEAN = 11;\n+const int MIN = 12;\n+const int MOD = 13;\n+const int PROD = 14;\n+const int SUB = 15;\n+const int SUM = 16;\n+const int ADD = 17;\n+const int DIV = 18;\n+const int WHERE = 19;\n+\n+void binary_forward()\n+{\n+ int ndims = params.ndims;\n+ int dp1 = matStep[2 * ndims - 1];\n+ int dp2 = matStep[3 * ndims - 1];\n+ int dp = matStep[ndims - 1];\n+ int n1 = shape[ndims - 1], n2 = shape[ndims - 2];\n+\n+ int plane_idx = int(gl_WorkGroupID.x);\n+\n+ int ptr1 = 0;\n+ int ptr2 = 0;\n+ int ptr = 0;\n+ int idx = plane_idx;\n+\n+ for (int k = ndims - 3; k >= 0; --k) {\n+ int next_idx = idx / shape[k];\n+ int i_k = idx - next_idx * shape[k]; // i_k = idx % shape[k]\n+ ptr1 += i_k * matStep[ndims + k];\n+ ptr2 += i_k * matStep[2 * ndims + k];\n+ ptr += i_k * matStep[k];\n+ idx = next_idx;\n+ }\n+\n+ int i2_offset = int(gl_WorkGroupID.y);\n+ int i1_offset = int(gl_LocalInvocationID.x);\n+\n+ ptr1 += i2_offset * matStep[2 * ndims - 2];\n+ ptr2 += i2_offset * matStep[3 * ndims - 2];\n+ ptr += i2_offset * matStep[ndims - 2];\n+\n+/*\n+ if (dp1 == 1 && dp2 == 1 && dp == 1) {\n+ for (int i1 = i1_offset; i1 < n1; i1 += ALL_THREAD) {\n+ perform_op(&matA + ptr1 + i1, &matB + ptr2 + i1, &matOut + ptr + i1);\n+ }\n+ } else if (dp1 == 1 && dp2 == 0 && dp == 1) {\n+ float k = matB[ptr2];\n+ for (int i1 = i1_offset; i1 < n1; i1 += ALL_THREAD) {\n+ perform_op(&matA + ptr1 + i1, &matB + ptr2, &matOut + ptr + i1);\n+ }\n+ } else if (dp1 == 0 && dp2 == 1 && dp == 1) {\n+ float k = matA[ptr1];\n+ for (int i1 = i1_offset; i1 < n1; i1 += ALL_THREAD) {\n+ perform_op(&matA + ptr1, &matB + ptr2 + i1, &matOut + ptr + i1);\n+ }\n+ } else {\n+ for (int i1 = i1_offset; i1 < n1; i1 += ALL_THREAD) {\n+ perform_op(&matA + ptr1 + i1 * dp1, &matB + ptr2 + i1 * dp2, &matOut + ptr + i1 * dp);\n+ }\n+ }\n+*/\n+ for (int i1 = i1_offset; i1 < n1; i1 += ALL_THREAD) {\n+ switch (params.opType) {\n+ case int(ADD):\n+ matOut[ptr + i1 * dp] = matA[ptr1 + i1 * dp1] + matB[ptr2 + i1 * dp2];\n+ break;\n+ case int(SUB):\n+ matOut[ptr + i1 * dp] = matA[ptr1 + i1 * dp1] - matB[ptr2 + i1 * dp2];\n+ break;\n+ case int(PROD):\n+ matOut[ptr + i1 * dp] = matA[ptr1 + i1 * dp1] * matB[ptr2 + i1 * dp2];\n+ break;\n+ case int(DIV):\n+ matOut[ptr + i1 * dp] = matA[ptr1 + i1 * dp1] / matB[ptr2 + i1 * dp2];\n+ break;\n+ }\n+ }\n+\n+ // Experimental Batched Operation\n+\n+ // int threadID = int(gl_LocalInvocationID.x);\n+ // int i2_offset = (int(gl_WorkGroupID.y)) * STEP_SIZE + threadID;\n+ // int i1_offset = (int(gl_WorkGroupID.z)) * STEP_SIZE;\n+\n+ // ptr1 += i2_offset * matStep[2 * ndims - 2] + i1_offset * dp1;\n+ // ptr2 += i2_offset * matStep[3 * ndims - 2] + i1_offset * dp2;\n+ // ptr += i2_offset * matStep[ndims - 2] + i1_offset * dp;\n+\n+\n+ // int i2_loop_count = STEP_SIZE / ALL_THREAD;\n+ // for (int i2_c = 0; i2_offset < n2 && i2_c < i2_loop_count; i2_offset += ALL_THREAD, ++i2_c)\n+ // {\n+ // if (dp1 == 1 && dp2 == 1 && dp == 1) {\n+ // for (int i1 = 0; i1 < STEP_SIZE; ++i1) {\n+ // if (ptr + i1 < n1) {\n+ // matOut[ptr + i1] = matA[ptr1 + i1] + matB[ptr2 + i1];\n+ // }\n+ // }\n+ // } else if (dp1 == 1 && dp2 == 0 && dp == 1) {\n+ // float k = matB[ptr2];\n+ // for (int i1 = 0; i1 < STEP_SIZE; ++i1) {\n+ // if (ptr + i1 < n1) {\n+ // matOut[ptr + i1] = matA[ptr1 + i1] + k;\n+ // }\n+ // }\n+ // } else if (dp1 == 0 && dp2 == 1 && dp == 1) {\n+ // float k = matA[ptr1];\n+ // for (int i1 = 0; i1 < STEP_SIZE; ++i1) {\n+ // if (ptr + i1 < n1) {\n+ // matOut[ptr + i1] = k + matB[ptr2 + i1];\n+ // }\n+ // }\n+ // } else {\n+ // for (int i1 = 0; i1 < STEP_SIZE; ++i1) {\n+ // if (ptr + i1 < n1) {\n+ // matOut[ptr + i1 * dp] = matA[ptr1 + i1 * dp1] + matB[ptr2 + i1 * dp2];\n+ // }\n+ // }\n+ // }\n+ // ptr1 += ALL_THREAD * matStep[2 * ndims - 2];\n+ // ptr2 += ALL_THREAD * matStep[3 * ndims - 2];\n+ // ptr += ALL_THREAD * matStep[ndims - 2];\n+ // }", + "comment_created_at": "2024-01-12T11:01:09+00:00", + "comment_author": "asmorkalov", + "comment_body": "Please remove dead code, or guard it with macro, if you want to use it later.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1989625280", + "pr_number": 26834, + "pr_file": "modules/imgproc/src/contours_common.hpp", + "created_at": "2025-03-11T15:56:04+00:00", + "commented_code": "}\n\nprivate:\n std::stack levels;\n Tree& tree;\n std::stack levels;\n};\n\n//==============================================================================\n\ntemplate \nclass ContourDataStorage\n{\npublic:\n typedef T data_storage_t;\n typedef BlockStorage storage_t;\npublic:\n ContourDataStorage(void) = delete;\n ContourDataStorage(storage_t* _storage):storage(_storage) {}\n ContourDataStorage(const ContourDataStorage&) = delete;\n ContourDataStorage(ContourDataStorage&&) noexcept = default;\n ~ContourDataStorage() = default;\n ContourDataStorage& operator=(const ContourDataStorage&) = delete;\n ContourDataStorage& operator=(ContourDataStorage&&) noexcept = default;\npublic:\n typename storage_t::RangeIterator getRangeIterator(void) const {return storage->getRangeIterator(first, last);}\npublic:\n bool empty(void) const {return first == last;}\n size_t size(void) const {return last - first;}\npublic:\n void clear(void) {first = last;}\n bool resize(size_t newSize) {\n bool ok = (newSize <= size());\n if (ok)\n last = first+newSize;\n return ok;\n }\n void push_back(const data_storage_t& value) {", + "repo_full_name": "opencv/opencv", + "discussion_comments": [ + { + "comment_id": "1989625280", + "repo_full_name": "opencv/opencv", + "pr_number": 26834, + "pr_file": "modules/imgproc/src/contours_common.hpp", + "discussion_id": "1989625280", + "commented_code": "@@ -159,23 +171,78 @@ class TreeIterator\n }\n \n private:\n- std::stack levels;\n Tree& tree;\n+ std::stack levels;\n };\n \n //==============================================================================\n \n+template \n+class ContourDataStorage\n+{\n+public:\n+ typedef T data_storage_t;\n+ typedef BlockStorage storage_t;\n+public:\n+ ContourDataStorage(void) = delete;\n+ ContourDataStorage(storage_t* _storage):storage(_storage) {}\n+ ContourDataStorage(const ContourDataStorage&) = delete;\n+ ContourDataStorage(ContourDataStorage&&) noexcept = default;\n+ ~ContourDataStorage() = default;\n+ ContourDataStorage& operator=(const ContourDataStorage&) = delete;\n+ ContourDataStorage& operator=(ContourDataStorage&&) noexcept = default;\n+public:\n+ typename storage_t::RangeIterator getRangeIterator(void) const {return storage->getRangeIterator(first, last);}\n+public:\n+ bool empty(void) const {return first == last;}\n+ size_t size(void) const {return last - first;}\n+public:\n+ void clear(void) {first = last;}\n+ bool resize(size_t newSize) {\n+ bool ok = (newSize <= size());\n+ if (ok)\n+ last = first+newSize;\n+ return ok;\n+ }\n+ void push_back(const data_storage_t& value) {", + "comment_created_at": "2025-03-11T15:56:04+00:00", + "comment_author": "mshabunin", + "comment_body": "It would be better to keep formatting consistent in existing files. This file uses Allman brace/indent style:\r\n```\r\nvoid push_back(...)\r\n{\r\n if (...)\r\n {\r\n ...\r\n }\r\n ...\r\n}\r\n```", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1928218708", + "pr_number": 26789, + "pr_file": "3rdparty/hal_rvv/hal_rvv_1p0/minmax.hpp", + "created_at": "2025-01-24T07:27:05+00:00", + "commented_code": "// This file is part of OpenCV project.\n// It is subject to the license terms in the LICENSE file found in the top-level directory\n// of this distribution and at http://opencv.org/license.html.\n#ifndef OPENCV_HAL_RVV_MINMAXIDX_HPP_INCLUDED\n#define OPENCV_HAL_RVV_MINMAXIDX_HPP_INCLUDED\n\n#include \n\nnamespace cv { namespace cv_hal_rvv {\n\n#undef cv_hal_minMaxIdx\n#define cv_hal_minMaxIdx cv::cv_hal_rvv::minMaxIdx\n#undef cv_hal_minMaxIdxMaskStep\n#define cv_hal_minMaxIdxMaskStep cv::cv_hal_rvv::minMaxIdx\n\n#define HAL_RVV_MINMAXIDX_READTWICE_GENERATOR(D_TYPE, V_TYPE, EEW, EMUL, IS_U, IS_F, F_OR_X, F_OR_S, M_EMUL) \\\ninline int minMaxIdx_##V_TYPE(const uchar* src_data, size_t src_step, int width, int height, double* minVal, double* maxVal, \\\n int* minIdx, int* maxIdx, uchar* mask, size_t mask_step) \\\n{ \\\n int vlmax = __riscv_vsetvlmax_##EEW##EMUL(); \\\n auto vec_min = __riscv_v##IS_F##mv_v_##F_OR_X##_##V_TYPE##EMUL(std::numeric_limits::max(), vlmax); \\", + "repo_full_name": "opencv/opencv", + "discussion_comments": [ + { + "comment_id": "1928218708", + "repo_full_name": "opencv/opencv", + "pr_number": 26789, + "pr_file": "3rdparty/hal_rvv/hal_rvv_1p0/minmax.hpp", + "discussion_id": "1928218708", + "commented_code": "@@ -0,0 +1,294 @@\n+// This file is part of OpenCV project.\n+// It is subject to the license terms in the LICENSE file found in the top-level directory\n+// of this distribution and at http://opencv.org/license.html.\n+#ifndef OPENCV_HAL_RVV_MINMAXIDX_HPP_INCLUDED\n+#define OPENCV_HAL_RVV_MINMAXIDX_HPP_INCLUDED\n+\n+#include \n+\n+namespace cv { namespace cv_hal_rvv {\n+\n+#undef cv_hal_minMaxIdx\n+#define cv_hal_minMaxIdx cv::cv_hal_rvv::minMaxIdx\n+#undef cv_hal_minMaxIdxMaskStep\n+#define cv_hal_minMaxIdxMaskStep cv::cv_hal_rvv::minMaxIdx\n+\n+#define HAL_RVV_MINMAXIDX_READTWICE_GENERATOR(D_TYPE, V_TYPE, EEW, EMUL, IS_U, IS_F, F_OR_X, F_OR_S, M_EMUL) \\\n+inline int minMaxIdx_##V_TYPE(const uchar* src_data, size_t src_step, int width, int height, double* minVal, double* maxVal, \\\n+ int* minIdx, int* maxIdx, uchar* mask, size_t mask_step) \\\n+{ \\\n+ int vlmax = __riscv_vsetvlmax_##EEW##EMUL(); \\\n+ auto vec_min = __riscv_v##IS_F##mv_v_##F_OR_X##_##V_TYPE##EMUL(std::numeric_limits::max(), vlmax); \\", + "comment_created_at": "2025-01-24T07:27:05+00:00", + "comment_author": "opencv-alalek", + "comment_body": "In general it is hard to debug multi-line macros.\r\nC++ template-based approach is preferable.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1732755224", + "pr_number": 25715, + "pr_file": "modules/imgcodecs/src/grfmt_png.hpp", + "created_at": "2024-08-27T12:28:11+00:00", + "commented_code": "bool isFormatSupported( int depth ) const CV_OVERRIDE;\n bool write( const Mat& img, const std::vector& params ) CV_OVERRIDE;\n bool writemulti(const std::vector& img_vec, const std::vector& params) CV_OVERRIDE;\n bool writeanimation(const Animation& animinfo, const std::vector& params) CV_OVERRIDE;\n size_t save_apng(std::string inputFileName, std::vector& frames, uint first, uint loops, uint coltype, int deflate_method, int iter);\n void optim_dirty(std::vector& frames);\n void optim_duplicates(std::vector& frames, uint first);\n\n ImageEncoder newEncoder() const CV_OVERRIDE;\n\nprotected:\n static void writeDataToBuf(void* png_ptr, uchar* src, size_t size);\n static void flushBuf(void* png_ptr);\n\nprivate:\n void write_chunk(FILE* f, const char* name, uchar* data, uint length);\n void write_IDATs(FILE* f, int frame, uchar* data, uint length, uint idat_size);\n void process_rect(uchar* row, int rowbytes, int bpp, int stride, int h, uchar* rows);\n void deflate_rect_fin(int deflate_method, int iter, uchar* zbuf, uint* zsize, int bpp, int stride, uchar* rows, int zbuf_size, int n);\n void deflate_rect_op(uchar* pdata, int x, int y, int w, int h, int bpp, int stride, int zbuf_size, int n);\n void get_rect(uint w, uint h, uchar* pimage1, uchar* pimage2, uchar* ptemp, uint bpp, uint stride, int zbuf_size, uint has_tcolor, uint tcolor, int n);\n\n uchar* op_zbuf1;", + "repo_full_name": "opencv/opencv", + "discussion_comments": [ + { + "comment_id": "1732755224", + "repo_full_name": "opencv/opencv", + "pr_number": 25715, + "pr_file": "modules/imgcodecs/src/grfmt_png.hpp", + "discussion_id": "1732755224", + "commented_code": "@@ -86,12 +77,40 @@ class PngEncoder CV_FINAL : public BaseImageEncoder\n \n bool isFormatSupported( int depth ) const CV_OVERRIDE;\n bool write( const Mat& img, const std::vector& params ) CV_OVERRIDE;\n+ bool writemulti(const std::vector& img_vec, const std::vector& params) CV_OVERRIDE;\n+ bool writeanimation(const Animation& animinfo, const std::vector& params) CV_OVERRIDE;\n+ size_t save_apng(std::string inputFileName, std::vector& frames, uint first, uint loops, uint coltype, int deflate_method, int iter);\n+ void optim_dirty(std::vector& frames);\n+ void optim_duplicates(std::vector& frames, uint first);\n \n ImageEncoder newEncoder() const CV_OVERRIDE;\n \n protected:\n static void writeDataToBuf(void* png_ptr, uchar* src, size_t size);\n static void flushBuf(void* png_ptr);\n+\n+private:\n+ void write_chunk(FILE* f, const char* name, uchar* data, uint length);\n+ void write_IDATs(FILE* f, int frame, uchar* data, uint length, uint idat_size);\n+ void process_rect(uchar* row, int rowbytes, int bpp, int stride, int h, uchar* rows);\n+ void deflate_rect_fin(int deflate_method, int iter, uchar* zbuf, uint* zsize, int bpp, int stride, uchar* rows, int zbuf_size, int n);\n+ void deflate_rect_op(uchar* pdata, int x, int y, int w, int h, int bpp, int stride, int zbuf_size, int n);\n+ void get_rect(uint w, uint h, uchar* pimage1, uchar* pimage2, uchar* ptemp, uint bpp, uint stride, int zbuf_size, uint has_tcolor, uint tcolor, int n);\n+\n+ uchar* op_zbuf1;", + "comment_created_at": "2024-08-27T12:28:11+00:00", + "comment_author": "vrabaud", + "comment_body": "please use `unsigned char`", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/opencv-maintain-code-consistency.md b/_reviewers/opencv-maintain-code-consistency.md index 703d500..1f077ca 100644 --- a/_reviewers/opencv-maintain-code-consistency.md +++ b/_reviewers/opencv-maintain-code-consistency.md @@ -56,117 +56,3 @@ for (int i1 = 0; i1 < STEP_SIZE; ++i1) { 5. Use standard type names rather than shortcuts (e.g., `unsigned char` instead of `uchar`). Consistency in code style significantly improves readability and maintainability while reducing review friction. - - -[ - { - "discussion_id": "2024754221", - "pr_number": 27051, - "pr_file": "modules/photo/test/test_precomp.hpp", - "created_at": "2025-04-02T12:46:24+00:00", - "commented_code": "#include \"opencv2/ts/ocl_test.hpp\"\n#include \"opencv2/photo.hpp\"\n\nnamespace opencv_test\n{\nusing namespace cv::ccm;", - "repo_full_name": "opencv/opencv", - "discussion_comments": [ - { - "comment_id": "2024754221", - "repo_full_name": "opencv/opencv", - "pr_number": 27051, - "pr_file": "modules/photo/test/test_precomp.hpp", - "discussion_id": "2024754221", - "commented_code": "@@ -8,4 +8,9 @@\n #include \"opencv2/ts/ocl_test.hpp\"\n #include \"opencv2/photo.hpp\"\n \n+namespace opencv_test\n+{\n+using namespace cv::ccm;", - "comment_created_at": "2025-04-02T12:46:24+00:00", - "comment_author": "asmorkalov", - "comment_body": "using namespace is very bad practice. It affects everything, even, if the header is not included directly.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1450246565", - "pr_number": 24768, - "pr_file": "modules/dnn/src/vkcom/shader/nary_eltwise_binary_forward.comp", - "created_at": "2024-01-12T11:01:09+00:00", - "commented_code": "#version 450\n// #extension GL_EXT_debug_printf : enable\n#define ALL_THREAD 1024\n// #define ALL_THREAD 128 // Experimental batched operation\n#define STEP_SIZE 65536\n\nlayout(binding = 0) readonly buffer Input1{\n float matA[];\n};\n\nlayout(binding = 1) readonly buffer Input2{\n float matB[];\n};\n\nlayout(binding = 2) writeonly buffer Output{\n float matOut[];\n};\n\nlayout(binding = 3) uniform Params {\n int opType;\n int ndims;\n} params;\n\nlayout(binding = 4) readonly buffer Shape {\n int shape[];\n};\n\nlayout(binding = 5) readonly buffer Step {\n int matStep[];\n};\n\n/* local_size_x, local_size_y, local_size_z there defines the number of invocations\n of this compute shader in the current work group. */\n// TODO: Check if this makes any sense\n// TODO: Check if it is required to fetch PhysicalDeviceLimit from Context\n// TODO: here we shall assume that maxGroupInvocation is 1024.\nlayout(local_size_x = ALL_THREAD, local_size_y = 1, local_size_z = 1) in; // TODO: Check if this makes any sense\n\nconst int AND = 0;\nconst int EQUAL = 1;\nconst int GREATER = 2;\nconst int GREATER_EQUAL = 3;\nconst int LESS = 4;\nconst int LESS_EQUAL = 5;\nconst int OR = 6;\nconst int POW = 7;\nconst int XOR = 8;\nconst int BITSHIFT = 9;\nconst int MAX = 10;\nconst int MEAN = 11;\nconst int MIN = 12;\nconst int MOD = 13;\nconst int PROD = 14;\nconst int SUB = 15;\nconst int SUM = 16;\nconst int ADD = 17;\nconst int DIV = 18;\nconst int WHERE = 19;\n\nvoid binary_forward()\n{\n int ndims = params.ndims;\n int dp1 = matStep[2 * ndims - 1];\n int dp2 = matStep[3 * ndims - 1];\n int dp = matStep[ndims - 1];\n int n1 = shape[ndims - 1], n2 = shape[ndims - 2];\n\n int plane_idx = int(gl_WorkGroupID.x);\n\n int ptr1 = 0;\n int ptr2 = 0;\n int ptr = 0;\n int idx = plane_idx;\n\n for (int k = ndims - 3; k >= 0; --k) {\n int next_idx = idx / shape[k];\n int i_k = idx - next_idx * shape[k]; // i_k = idx % shape[k]\n ptr1 += i_k * matStep[ndims + k];\n ptr2 += i_k * matStep[2 * ndims + k];\n ptr += i_k * matStep[k];\n idx = next_idx;\n }\n\n int i2_offset = int(gl_WorkGroupID.y);\n int i1_offset = int(gl_LocalInvocationID.x);\n\n ptr1 += i2_offset * matStep[2 * ndims - 2];\n ptr2 += i2_offset * matStep[3 * ndims - 2];\n ptr += i2_offset * matStep[ndims - 2];\n\n/*\n if (dp1 == 1 && dp2 == 1 && dp == 1) {\n for (int i1 = i1_offset; i1 < n1; i1 += ALL_THREAD) {\n perform_op(&matA + ptr1 + i1, &matB + ptr2 + i1, &matOut + ptr + i1);\n }\n } else if (dp1 == 1 && dp2 == 0 && dp == 1) {\n float k = matB[ptr2];\n for (int i1 = i1_offset; i1 < n1; i1 += ALL_THREAD) {\n perform_op(&matA + ptr1 + i1, &matB + ptr2, &matOut + ptr + i1);\n }\n } else if (dp1 == 0 && dp2 == 1 && dp == 1) {\n float k = matA[ptr1];\n for (int i1 = i1_offset; i1 < n1; i1 += ALL_THREAD) {\n perform_op(&matA + ptr1, &matB + ptr2 + i1, &matOut + ptr + i1);\n }\n } else {\n for (int i1 = i1_offset; i1 < n1; i1 += ALL_THREAD) {\n perform_op(&matA + ptr1 + i1 * dp1, &matB + ptr2 + i1 * dp2, &matOut + ptr + i1 * dp);\n }\n }\n*/\n for (int i1 = i1_offset; i1 < n1; i1 += ALL_THREAD) {\n switch (params.opType) {\n case int(ADD):\n matOut[ptr + i1 * dp] = matA[ptr1 + i1 * dp1] + matB[ptr2 + i1 * dp2];\n break;\n case int(SUB):\n matOut[ptr + i1 * dp] = matA[ptr1 + i1 * dp1] - matB[ptr2 + i1 * dp2];\n break;\n case int(PROD):\n matOut[ptr + i1 * dp] = matA[ptr1 + i1 * dp1] * matB[ptr2 + i1 * dp2];\n break;\n case int(DIV):\n matOut[ptr + i1 * dp] = matA[ptr1 + i1 * dp1] / matB[ptr2 + i1 * dp2];\n break;\n }\n }\n\n // Experimental Batched Operation\n\n // int threadID = int(gl_LocalInvocationID.x);\n // int i2_offset = (int(gl_WorkGroupID.y)) * STEP_SIZE + threadID;\n // int i1_offset = (int(gl_WorkGroupID.z)) * STEP_SIZE;\n\n // ptr1 += i2_offset * matStep[2 * ndims - 2] + i1_offset * dp1;\n // ptr2 += i2_offset * matStep[3 * ndims - 2] + i1_offset * dp2;\n // ptr += i2_offset * matStep[ndims - 2] + i1_offset * dp;\n\n\n // int i2_loop_count = STEP_SIZE / ALL_THREAD;\n // for (int i2_c = 0; i2_offset < n2 && i2_c < i2_loop_count; i2_offset += ALL_THREAD, ++i2_c)\n // {\n // if (dp1 == 1 && dp2 == 1 && dp == 1) {\n // for (int i1 = 0; i1 < STEP_SIZE; ++i1) {\n // if (ptr + i1 < n1) {\n // matOut[ptr + i1] = matA[ptr1 + i1] + matB[ptr2 + i1];\n // }\n // }\n // } else if (dp1 == 1 && dp2 == 0 && dp == 1) {\n // float k = matB[ptr2];\n // for (int i1 = 0; i1 < STEP_SIZE; ++i1) {\n // if (ptr + i1 < n1) {\n // matOut[ptr + i1] = matA[ptr1 + i1] + k;\n // }\n // }\n // } else if (dp1 == 0 && dp2 == 1 && dp == 1) {\n // float k = matA[ptr1];\n // for (int i1 = 0; i1 < STEP_SIZE; ++i1) {\n // if (ptr + i1 < n1) {\n // matOut[ptr + i1] = k + matB[ptr2 + i1];\n // }\n // }\n // } else {\n // for (int i1 = 0; i1 < STEP_SIZE; ++i1) {\n // if (ptr + i1 < n1) {\n // matOut[ptr + i1 * dp] = matA[ptr1 + i1 * dp1] + matB[ptr2 + i1 * dp2];\n // }\n // }\n // }\n // ptr1 += ALL_THREAD * matStep[2 * ndims - 2];\n // ptr2 += ALL_THREAD * matStep[3 * ndims - 2];\n // ptr += ALL_THREAD * matStep[ndims - 2];\n // }", - "repo_full_name": "opencv/opencv", - "discussion_comments": [ - { - "comment_id": "1450246565", - "repo_full_name": "opencv/opencv", - "pr_number": 24768, - "pr_file": "modules/dnn/src/vkcom/shader/nary_eltwise_binary_forward.comp", - "discussion_id": "1450246565", - "commented_code": "@@ -0,0 +1,182 @@\n+#version 450\n+// #extension GL_EXT_debug_printf : enable\n+#define ALL_THREAD 1024\n+// #define ALL_THREAD 128 // Experimental batched operation\n+#define STEP_SIZE 65536\n+\n+layout(binding = 0) readonly buffer Input1{\n+ float matA[];\n+};\n+\n+layout(binding = 1) readonly buffer Input2{\n+ float matB[];\n+};\n+\n+layout(binding = 2) writeonly buffer Output{\n+ float matOut[];\n+};\n+\n+layout(binding = 3) uniform Params {\n+ int opType;\n+ int ndims;\n+} params;\n+\n+layout(binding = 4) readonly buffer Shape {\n+ int shape[];\n+};\n+\n+layout(binding = 5) readonly buffer Step {\n+ int matStep[];\n+};\n+\n+/* local_size_x, local_size_y, local_size_z there defines the number of invocations\n+ of this compute shader in the current work group. */\n+// TODO: Check if this makes any sense\n+// TODO: Check if it is required to fetch PhysicalDeviceLimit from Context\n+// TODO: here we shall assume that maxGroupInvocation is 1024.\n+layout(local_size_x = ALL_THREAD, local_size_y = 1, local_size_z = 1) in; // TODO: Check if this makes any sense\n+\n+const int AND = 0;\n+const int EQUAL = 1;\n+const int GREATER = 2;\n+const int GREATER_EQUAL = 3;\n+const int LESS = 4;\n+const int LESS_EQUAL = 5;\n+const int OR = 6;\n+const int POW = 7;\n+const int XOR = 8;\n+const int BITSHIFT = 9;\n+const int MAX = 10;\n+const int MEAN = 11;\n+const int MIN = 12;\n+const int MOD = 13;\n+const int PROD = 14;\n+const int SUB = 15;\n+const int SUM = 16;\n+const int ADD = 17;\n+const int DIV = 18;\n+const int WHERE = 19;\n+\n+void binary_forward()\n+{\n+ int ndims = params.ndims;\n+ int dp1 = matStep[2 * ndims - 1];\n+ int dp2 = matStep[3 * ndims - 1];\n+ int dp = matStep[ndims - 1];\n+ int n1 = shape[ndims - 1], n2 = shape[ndims - 2];\n+\n+ int plane_idx = int(gl_WorkGroupID.x);\n+\n+ int ptr1 = 0;\n+ int ptr2 = 0;\n+ int ptr = 0;\n+ int idx = plane_idx;\n+\n+ for (int k = ndims - 3; k >= 0; --k) {\n+ int next_idx = idx / shape[k];\n+ int i_k = idx - next_idx * shape[k]; // i_k = idx % shape[k]\n+ ptr1 += i_k * matStep[ndims + k];\n+ ptr2 += i_k * matStep[2 * ndims + k];\n+ ptr += i_k * matStep[k];\n+ idx = next_idx;\n+ }\n+\n+ int i2_offset = int(gl_WorkGroupID.y);\n+ int i1_offset = int(gl_LocalInvocationID.x);\n+\n+ ptr1 += i2_offset * matStep[2 * ndims - 2];\n+ ptr2 += i2_offset * matStep[3 * ndims - 2];\n+ ptr += i2_offset * matStep[ndims - 2];\n+\n+/*\n+ if (dp1 == 1 && dp2 == 1 && dp == 1) {\n+ for (int i1 = i1_offset; i1 < n1; i1 += ALL_THREAD) {\n+ perform_op(&matA + ptr1 + i1, &matB + ptr2 + i1, &matOut + ptr + i1);\n+ }\n+ } else if (dp1 == 1 && dp2 == 0 && dp == 1) {\n+ float k = matB[ptr2];\n+ for (int i1 = i1_offset; i1 < n1; i1 += ALL_THREAD) {\n+ perform_op(&matA + ptr1 + i1, &matB + ptr2, &matOut + ptr + i1);\n+ }\n+ } else if (dp1 == 0 && dp2 == 1 && dp == 1) {\n+ float k = matA[ptr1];\n+ for (int i1 = i1_offset; i1 < n1; i1 += ALL_THREAD) {\n+ perform_op(&matA + ptr1, &matB + ptr2 + i1, &matOut + ptr + i1);\n+ }\n+ } else {\n+ for (int i1 = i1_offset; i1 < n1; i1 += ALL_THREAD) {\n+ perform_op(&matA + ptr1 + i1 * dp1, &matB + ptr2 + i1 * dp2, &matOut + ptr + i1 * dp);\n+ }\n+ }\n+*/\n+ for (int i1 = i1_offset; i1 < n1; i1 += ALL_THREAD) {\n+ switch (params.opType) {\n+ case int(ADD):\n+ matOut[ptr + i1 * dp] = matA[ptr1 + i1 * dp1] + matB[ptr2 + i1 * dp2];\n+ break;\n+ case int(SUB):\n+ matOut[ptr + i1 * dp] = matA[ptr1 + i1 * dp1] - matB[ptr2 + i1 * dp2];\n+ break;\n+ case int(PROD):\n+ matOut[ptr + i1 * dp] = matA[ptr1 + i1 * dp1] * matB[ptr2 + i1 * dp2];\n+ break;\n+ case int(DIV):\n+ matOut[ptr + i1 * dp] = matA[ptr1 + i1 * dp1] / matB[ptr2 + i1 * dp2];\n+ break;\n+ }\n+ }\n+\n+ // Experimental Batched Operation\n+\n+ // int threadID = int(gl_LocalInvocationID.x);\n+ // int i2_offset = (int(gl_WorkGroupID.y)) * STEP_SIZE + threadID;\n+ // int i1_offset = (int(gl_WorkGroupID.z)) * STEP_SIZE;\n+\n+ // ptr1 += i2_offset * matStep[2 * ndims - 2] + i1_offset * dp1;\n+ // ptr2 += i2_offset * matStep[3 * ndims - 2] + i1_offset * dp2;\n+ // ptr += i2_offset * matStep[ndims - 2] + i1_offset * dp;\n+\n+\n+ // int i2_loop_count = STEP_SIZE / ALL_THREAD;\n+ // for (int i2_c = 0; i2_offset < n2 && i2_c < i2_loop_count; i2_offset += ALL_THREAD, ++i2_c)\n+ // {\n+ // if (dp1 == 1 && dp2 == 1 && dp == 1) {\n+ // for (int i1 = 0; i1 < STEP_SIZE; ++i1) {\n+ // if (ptr + i1 < n1) {\n+ // matOut[ptr + i1] = matA[ptr1 + i1] + matB[ptr2 + i1];\n+ // }\n+ // }\n+ // } else if (dp1 == 1 && dp2 == 0 && dp == 1) {\n+ // float k = matB[ptr2];\n+ // for (int i1 = 0; i1 < STEP_SIZE; ++i1) {\n+ // if (ptr + i1 < n1) {\n+ // matOut[ptr + i1] = matA[ptr1 + i1] + k;\n+ // }\n+ // }\n+ // } else if (dp1 == 0 && dp2 == 1 && dp == 1) {\n+ // float k = matA[ptr1];\n+ // for (int i1 = 0; i1 < STEP_SIZE; ++i1) {\n+ // if (ptr + i1 < n1) {\n+ // matOut[ptr + i1] = k + matB[ptr2 + i1];\n+ // }\n+ // }\n+ // } else {\n+ // for (int i1 = 0; i1 < STEP_SIZE; ++i1) {\n+ // if (ptr + i1 < n1) {\n+ // matOut[ptr + i1 * dp] = matA[ptr1 + i1 * dp1] + matB[ptr2 + i1 * dp2];\n+ // }\n+ // }\n+ // }\n+ // ptr1 += ALL_THREAD * matStep[2 * ndims - 2];\n+ // ptr2 += ALL_THREAD * matStep[3 * ndims - 2];\n+ // ptr += ALL_THREAD * matStep[ndims - 2];\n+ // }", - "comment_created_at": "2024-01-12T11:01:09+00:00", - "comment_author": "asmorkalov", - "comment_body": "Please remove dead code, or guard it with macro, if you want to use it later.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1989625280", - "pr_number": 26834, - "pr_file": "modules/imgproc/src/contours_common.hpp", - "created_at": "2025-03-11T15:56:04+00:00", - "commented_code": "}\n\nprivate:\n std::stack levels;\n Tree& tree;\n std::stack levels;\n};\n\n//==============================================================================\n\ntemplate \nclass ContourDataStorage\n{\npublic:\n typedef T data_storage_t;\n typedef BlockStorage storage_t;\npublic:\n ContourDataStorage(void) = delete;\n ContourDataStorage(storage_t* _storage):storage(_storage) {}\n ContourDataStorage(const ContourDataStorage&) = delete;\n ContourDataStorage(ContourDataStorage&&) noexcept = default;\n ~ContourDataStorage() = default;\n ContourDataStorage& operator=(const ContourDataStorage&) = delete;\n ContourDataStorage& operator=(ContourDataStorage&&) noexcept = default;\npublic:\n typename storage_t::RangeIterator getRangeIterator(void) const {return storage->getRangeIterator(first, last);}\npublic:\n bool empty(void) const {return first == last;}\n size_t size(void) const {return last - first;}\npublic:\n void clear(void) {first = last;}\n bool resize(size_t newSize) {\n bool ok = (newSize <= size());\n if (ok)\n last = first+newSize;\n return ok;\n }\n void push_back(const data_storage_t& value) {", - "repo_full_name": "opencv/opencv", - "discussion_comments": [ - { - "comment_id": "1989625280", - "repo_full_name": "opencv/opencv", - "pr_number": 26834, - "pr_file": "modules/imgproc/src/contours_common.hpp", - "discussion_id": "1989625280", - "commented_code": "@@ -159,23 +171,78 @@ class TreeIterator\n }\n \n private:\n- std::stack levels;\n Tree& tree;\n+ std::stack levels;\n };\n \n //==============================================================================\n \n+template \n+class ContourDataStorage\n+{\n+public:\n+ typedef T data_storage_t;\n+ typedef BlockStorage storage_t;\n+public:\n+ ContourDataStorage(void) = delete;\n+ ContourDataStorage(storage_t* _storage):storage(_storage) {}\n+ ContourDataStorage(const ContourDataStorage&) = delete;\n+ ContourDataStorage(ContourDataStorage&&) noexcept = default;\n+ ~ContourDataStorage() = default;\n+ ContourDataStorage& operator=(const ContourDataStorage&) = delete;\n+ ContourDataStorage& operator=(ContourDataStorage&&) noexcept = default;\n+public:\n+ typename storage_t::RangeIterator getRangeIterator(void) const {return storage->getRangeIterator(first, last);}\n+public:\n+ bool empty(void) const {return first == last;}\n+ size_t size(void) const {return last - first;}\n+public:\n+ void clear(void) {first = last;}\n+ bool resize(size_t newSize) {\n+ bool ok = (newSize <= size());\n+ if (ok)\n+ last = first+newSize;\n+ return ok;\n+ }\n+ void push_back(const data_storage_t& value) {", - "comment_created_at": "2025-03-11T15:56:04+00:00", - "comment_author": "mshabunin", - "comment_body": "It would be better to keep formatting consistent in existing files. This file uses Allman brace/indent style:\r\n```\r\nvoid push_back(...)\r\n{\r\n if (...)\r\n {\r\n ...\r\n }\r\n ...\r\n}\r\n```", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1928218708", - "pr_number": 26789, - "pr_file": "3rdparty/hal_rvv/hal_rvv_1p0/minmax.hpp", - "created_at": "2025-01-24T07:27:05+00:00", - "commented_code": "// This file is part of OpenCV project.\n// It is subject to the license terms in the LICENSE file found in the top-level directory\n// of this distribution and at http://opencv.org/license.html.\n#ifndef OPENCV_HAL_RVV_MINMAXIDX_HPP_INCLUDED\n#define OPENCV_HAL_RVV_MINMAXIDX_HPP_INCLUDED\n\n#include \n\nnamespace cv { namespace cv_hal_rvv {\n\n#undef cv_hal_minMaxIdx\n#define cv_hal_minMaxIdx cv::cv_hal_rvv::minMaxIdx\n#undef cv_hal_minMaxIdxMaskStep\n#define cv_hal_minMaxIdxMaskStep cv::cv_hal_rvv::minMaxIdx\n\n#define HAL_RVV_MINMAXIDX_READTWICE_GENERATOR(D_TYPE, V_TYPE, EEW, EMUL, IS_U, IS_F, F_OR_X, F_OR_S, M_EMUL) \\\ninline int minMaxIdx_##V_TYPE(const uchar* src_data, size_t src_step, int width, int height, double* minVal, double* maxVal, \\\n int* minIdx, int* maxIdx, uchar* mask, size_t mask_step) \\\n{ \\\n int vlmax = __riscv_vsetvlmax_##EEW##EMUL(); \\\n auto vec_min = __riscv_v##IS_F##mv_v_##F_OR_X##_##V_TYPE##EMUL(std::numeric_limits::max(), vlmax); \\", - "repo_full_name": "opencv/opencv", - "discussion_comments": [ - { - "comment_id": "1928218708", - "repo_full_name": "opencv/opencv", - "pr_number": 26789, - "pr_file": "3rdparty/hal_rvv/hal_rvv_1p0/minmax.hpp", - "discussion_id": "1928218708", - "commented_code": "@@ -0,0 +1,294 @@\n+// This file is part of OpenCV project.\n+// It is subject to the license terms in the LICENSE file found in the top-level directory\n+// of this distribution and at http://opencv.org/license.html.\n+#ifndef OPENCV_HAL_RVV_MINMAXIDX_HPP_INCLUDED\n+#define OPENCV_HAL_RVV_MINMAXIDX_HPP_INCLUDED\n+\n+#include \n+\n+namespace cv { namespace cv_hal_rvv {\n+\n+#undef cv_hal_minMaxIdx\n+#define cv_hal_minMaxIdx cv::cv_hal_rvv::minMaxIdx\n+#undef cv_hal_minMaxIdxMaskStep\n+#define cv_hal_minMaxIdxMaskStep cv::cv_hal_rvv::minMaxIdx\n+\n+#define HAL_RVV_MINMAXIDX_READTWICE_GENERATOR(D_TYPE, V_TYPE, EEW, EMUL, IS_U, IS_F, F_OR_X, F_OR_S, M_EMUL) \\\n+inline int minMaxIdx_##V_TYPE(const uchar* src_data, size_t src_step, int width, int height, double* minVal, double* maxVal, \\\n+ int* minIdx, int* maxIdx, uchar* mask, size_t mask_step) \\\n+{ \\\n+ int vlmax = __riscv_vsetvlmax_##EEW##EMUL(); \\\n+ auto vec_min = __riscv_v##IS_F##mv_v_##F_OR_X##_##V_TYPE##EMUL(std::numeric_limits::max(), vlmax); \\", - "comment_created_at": "2025-01-24T07:27:05+00:00", - "comment_author": "opencv-alalek", - "comment_body": "In general it is hard to debug multi-line macros.\r\nC++ template-based approach is preferable.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1732755224", - "pr_number": 25715, - "pr_file": "modules/imgcodecs/src/grfmt_png.hpp", - "created_at": "2024-08-27T12:28:11+00:00", - "commented_code": "bool isFormatSupported( int depth ) const CV_OVERRIDE;\n bool write( const Mat& img, const std::vector& params ) CV_OVERRIDE;\n bool writemulti(const std::vector& img_vec, const std::vector& params) CV_OVERRIDE;\n bool writeanimation(const Animation& animinfo, const std::vector& params) CV_OVERRIDE;\n size_t save_apng(std::string inputFileName, std::vector& frames, uint first, uint loops, uint coltype, int deflate_method, int iter);\n void optim_dirty(std::vector& frames);\n void optim_duplicates(std::vector& frames, uint first);\n\n ImageEncoder newEncoder() const CV_OVERRIDE;\n\nprotected:\n static void writeDataToBuf(void* png_ptr, uchar* src, size_t size);\n static void flushBuf(void* png_ptr);\n\nprivate:\n void write_chunk(FILE* f, const char* name, uchar* data, uint length);\n void write_IDATs(FILE* f, int frame, uchar* data, uint length, uint idat_size);\n void process_rect(uchar* row, int rowbytes, int bpp, int stride, int h, uchar* rows);\n void deflate_rect_fin(int deflate_method, int iter, uchar* zbuf, uint* zsize, int bpp, int stride, uchar* rows, int zbuf_size, int n);\n void deflate_rect_op(uchar* pdata, int x, int y, int w, int h, int bpp, int stride, int zbuf_size, int n);\n void get_rect(uint w, uint h, uchar* pimage1, uchar* pimage2, uchar* ptemp, uint bpp, uint stride, int zbuf_size, uint has_tcolor, uint tcolor, int n);\n\n uchar* op_zbuf1;", - "repo_full_name": "opencv/opencv", - "discussion_comments": [ - { - "comment_id": "1732755224", - "repo_full_name": "opencv/opencv", - "pr_number": 25715, - "pr_file": "modules/imgcodecs/src/grfmt_png.hpp", - "discussion_id": "1732755224", - "commented_code": "@@ -86,12 +77,40 @@ class PngEncoder CV_FINAL : public BaseImageEncoder\n \n bool isFormatSupported( int depth ) const CV_OVERRIDE;\n bool write( const Mat& img, const std::vector& params ) CV_OVERRIDE;\n+ bool writemulti(const std::vector& img_vec, const std::vector& params) CV_OVERRIDE;\n+ bool writeanimation(const Animation& animinfo, const std::vector& params) CV_OVERRIDE;\n+ size_t save_apng(std::string inputFileName, std::vector& frames, uint first, uint loops, uint coltype, int deflate_method, int iter);\n+ void optim_dirty(std::vector& frames);\n+ void optim_duplicates(std::vector& frames, uint first);\n \n ImageEncoder newEncoder() const CV_OVERRIDE;\n \n protected:\n static void writeDataToBuf(void* png_ptr, uchar* src, size_t size);\n static void flushBuf(void* png_ptr);\n+\n+private:\n+ void write_chunk(FILE* f, const char* name, uchar* data, uint length);\n+ void write_IDATs(FILE* f, int frame, uchar* data, uint length, uint idat_size);\n+ void process_rect(uchar* row, int rowbytes, int bpp, int stride, int h, uchar* rows);\n+ void deflate_rect_fin(int deflate_method, int iter, uchar* zbuf, uint* zsize, int bpp, int stride, uchar* rows, int zbuf_size, int n);\n+ void deflate_rect_op(uchar* pdata, int x, int y, int w, int h, int bpp, int stride, int zbuf_size, int n);\n+ void get_rect(uint w, uint h, uchar* pimage1, uchar* pimage2, uchar* ptemp, uint bpp, uint stride, int zbuf_size, uint has_tcolor, uint tcolor, int n);\n+\n+ uchar* op_zbuf1;", - "comment_created_at": "2024-08-27T12:28:11+00:00", - "comment_author": "vrabaud", - "comment_body": "please use `unsigned char`", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/opencv-meaningful-semantic-naming.json b/_reviewers/opencv-meaningful-semantic-naming.json new file mode 100644 index 0000000..8e04faa --- /dev/null +++ b/_reviewers/opencv-meaningful-semantic-naming.json @@ -0,0 +1,92 @@ +[ + { + "discussion_id": "2050236607", + "pr_number": 27239, + "pr_file": "platforms/android/build_sdk.py", + "created_at": "2025-04-18T07:23:29+00:00", + "commented_code": "return android_sdk_ndk_bundle\n return None\n\n\ndef check_have_ipp_flag(cmake_file):\n print(\"Checking build flag in:\", cmake_file)\n if not os.path.isfile(cmake_file):\n print(f\" ERROR: File {cmake_file} does not exist.\")\n sys.exit(1)\n \n with open(cmake_file, 'r') as file:\n for line in file:\n if line.strip().startswith(\"HAVE_IPP=\"):", + "repo_full_name": "opencv/opencv", + "discussion_comments": [ + { + "comment_id": "2050236607", + "repo_full_name": "opencv/opencv", + "pr_number": 27239, + "pr_file": "platforms/android/build_sdk.py", + "discussion_id": "2050236607", + "commented_code": "@@ -380,7 +380,25 @@ def get_ndk_dir():\n return android_sdk_ndk_bundle\n return None\n \n-\n+def check_have_ipp_flag(cmake_file):\n+ print(\"Checking build flag in:\", cmake_file)\n+ if not os.path.isfile(cmake_file):\n+ print(f\" ERROR: File {cmake_file} does not exist.\")\n+ sys.exit(1)\n+ \n+ with open(cmake_file, 'r') as file:\n+ for line in file:\n+ if line.strip().startswith(\"HAVE_IPP=\"):", + "comment_created_at": "2025-04-18T07:23:29+00:00", + "comment_author": "asmorkalov", + "comment_body": "It makes sense to move key name to the function parameters. E.g.\r\ncheck_cmake_flag_enabled(cmake_file, \"HAVE_IPP\")\r\n\r\nThe same function may be used for KleidiCV and other dependencies.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1960066488", + "pr_number": 26934, + "pr_file": "modules/objdetect/misc/python/test/test_objdetect_aruco.py", + "created_at": "2025-02-18T16:09:34+00:00", + "commented_code": "gold_corners = np.array([[offset, offset],[marker_size+offset-1.0,offset],\n [marker_size+offset-1.0,marker_size+offset-1.0],\n [offset, marker_size+offset-1.0]], dtype=np.float32)\n corners, ids, rejected = aruco_detector.detectMarkers(img_marker)\n corners, ids, rejected, _ = aruco_detector.detectMarkers(img_marker)", + "repo_full_name": "opencv/opencv", + "discussion_comments": [ + { + "comment_id": "1960066488", + "repo_full_name": "opencv/opencv", + "pr_number": 26934, + "pr_file": "modules/objdetect/misc/python/test/test_objdetect_aruco.py", + "discussion_id": "1960066488", + "commented_code": "@@ -156,7 +156,7 @@ def test_aruco_detector(self):\n gold_corners = np.array([[offset, offset],[marker_size+offset-1.0,offset],\n [marker_size+offset-1.0,marker_size+offset-1.0],\n [offset, marker_size+offset-1.0]], dtype=np.float32)\n- corners, ids, rejected = aruco_detector.detectMarkers(img_marker)\n+ corners, ids, rejected, _ = aruco_detector.detectMarkers(img_marker)", + "comment_created_at": "2025-02-18T16:09:34+00:00", + "comment_author": "asmorkalov", + "comment_body": "It breaks compatibility. M.b. rename the method to detectMarkersMultiDict?", + "pr_file_module": null + }, + { + "comment_id": "1962131512", + "repo_full_name": "opencv/opencv", + "pr_number": 26934, + "pr_file": "modules/objdetect/misc/python/test/test_objdetect_aruco.py", + "discussion_id": "1960066488", + "commented_code": "@@ -156,7 +156,7 @@ def test_aruco_detector(self):\n gold_corners = np.array([[offset, offset],[marker_size+offset-1.0,offset],\n [marker_size+offset-1.0,marker_size+offset-1.0],\n [offset, marker_size+offset-1.0]], dtype=np.float32)\n- corners, ids, rejected = aruco_detector.detectMarkers(img_marker)\n+ corners, ids, rejected, _ = aruco_detector.detectMarkers(img_marker)", + "comment_created_at": "2025-02-19T17:53:15+00:00", + "comment_author": "BenjaminKnecht", + "comment_body": "Unfortunate, but I managed to do create a detectMarkersMultiDict function with relative little redundant code.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1760580610", + "pr_number": 26147, + "pr_file": "modules/js/generator/embindgen.py", + "created_at": "2024-09-16T06:12:24+00:00", + "commented_code": "for ns_name, ns in sorted(self.namespaces.items()):\n if ns_name.split('.')[0] != 'cv':\n continue\n # TODO CALIB_FIX_FOCAL_LENGTH is defined both in cv:: and cv::fisheye\n prefix = 'FISHEYE_' if 'fisheye' in ns_name else ''\n for name, const in sorted(ns.consts.items()):\n name = prefix + name", + "repo_full_name": "opencv/opencv", + "discussion_comments": [ + { + "comment_id": "1760580610", + "repo_full_name": "opencv/opencv", + "pr_number": 26147, + "pr_file": "modules/js/generator/embindgen.py", + "discussion_id": "1760580610", + "commented_code": "@@ -936,7 +937,10 @@ def gen(self, dst_file, src_files, core_bindings):\n for ns_name, ns in sorted(self.namespaces.items()):\n if ns_name.split('.')[0] != 'cv':\n continue\n+ # TODO CALIB_FIX_FOCAL_LENGTH is defined both in cv:: and cv::fisheye\n+ prefix = 'FISHEYE_' if 'fisheye' in ns_name else ''\n for name, const in sorted(ns.consts.items()):\n+ name = prefix + name", + "comment_created_at": "2024-09-16T06:12:24+00:00", + "comment_author": "asmorkalov", + "comment_body": "What if use `namespace_prefix_override` solution like for the functions? Namespace is already appended to the function names, if it's not overridden by config. It's less hacky and more obvious.", + "pr_file_module": null + }, + { + "comment_id": "1782566322", + "repo_full_name": "opencv/opencv", + "pr_number": 26147, + "pr_file": "modules/js/generator/embindgen.py", + "discussion_id": "1760580610", + "commented_code": "@@ -936,7 +937,10 @@ def gen(self, dst_file, src_files, core_bindings):\n for ns_name, ns in sorted(self.namespaces.items()):\n if ns_name.split('.')[0] != 'cv':\n continue\n+ # TODO CALIB_FIX_FOCAL_LENGTH is defined both in cv:: and cv::fisheye\n+ prefix = 'FISHEYE_' if 'fisheye' in ns_name else ''\n for name, const in sorted(ns.consts.items()):\n+ name = prefix + name", + "comment_created_at": "2024-10-01T11:01:39+00:00", + "comment_author": "vrabaud", + "comment_body": "I guess that would work but I don't know how to use it. Should all the enums be added to https://github.com/opencv/opencv/blob/450e741f8d53ff12b4e194c7762adaefb952555a/modules/js/generator/embindgen.py#L989 ? ", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/opencv-meaningful-semantic-naming.md b/_reviewers/opencv-meaningful-semantic-naming.md index 0a44c84..764ff78 100644 --- a/_reviewers/opencv-meaningful-semantic-naming.md +++ b/_reviewers/opencv-meaningful-semantic-naming.md @@ -38,97 +38,3 @@ Choose names that clearly communicate purpose and follow consistent patterns acr ``` 3. Apply naming conventions consistently using established patterns in the codebase rather than creating special cases. If a mechanism exists for handling naming scenarios (like namespace prefixing for functions), use it for similar cases too. - - -[ - { - "discussion_id": "2050236607", - "pr_number": 27239, - "pr_file": "platforms/android/build_sdk.py", - "created_at": "2025-04-18T07:23:29+00:00", - "commented_code": "return android_sdk_ndk_bundle\n return None\n\n\ndef check_have_ipp_flag(cmake_file):\n print(\"Checking build flag in:\", cmake_file)\n if not os.path.isfile(cmake_file):\n print(f\" ERROR: File {cmake_file} does not exist.\")\n sys.exit(1)\n \n with open(cmake_file, 'r') as file:\n for line in file:\n if line.strip().startswith(\"HAVE_IPP=\"):", - "repo_full_name": "opencv/opencv", - "discussion_comments": [ - { - "comment_id": "2050236607", - "repo_full_name": "opencv/opencv", - "pr_number": 27239, - "pr_file": "platforms/android/build_sdk.py", - "discussion_id": "2050236607", - "commented_code": "@@ -380,7 +380,25 @@ def get_ndk_dir():\n return android_sdk_ndk_bundle\n return None\n \n-\n+def check_have_ipp_flag(cmake_file):\n+ print(\"Checking build flag in:\", cmake_file)\n+ if not os.path.isfile(cmake_file):\n+ print(f\" ERROR: File {cmake_file} does not exist.\")\n+ sys.exit(1)\n+ \n+ with open(cmake_file, 'r') as file:\n+ for line in file:\n+ if line.strip().startswith(\"HAVE_IPP=\"):", - "comment_created_at": "2025-04-18T07:23:29+00:00", - "comment_author": "asmorkalov", - "comment_body": "It makes sense to move key name to the function parameters. E.g.\r\ncheck_cmake_flag_enabled(cmake_file, \"HAVE_IPP\")\r\n\r\nThe same function may be used for KleidiCV and other dependencies.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1960066488", - "pr_number": 26934, - "pr_file": "modules/objdetect/misc/python/test/test_objdetect_aruco.py", - "created_at": "2025-02-18T16:09:34+00:00", - "commented_code": "gold_corners = np.array([[offset, offset],[marker_size+offset-1.0,offset],\n [marker_size+offset-1.0,marker_size+offset-1.0],\n [offset, marker_size+offset-1.0]], dtype=np.float32)\n corners, ids, rejected = aruco_detector.detectMarkers(img_marker)\n corners, ids, rejected, _ = aruco_detector.detectMarkers(img_marker)", - "repo_full_name": "opencv/opencv", - "discussion_comments": [ - { - "comment_id": "1960066488", - "repo_full_name": "opencv/opencv", - "pr_number": 26934, - "pr_file": "modules/objdetect/misc/python/test/test_objdetect_aruco.py", - "discussion_id": "1960066488", - "commented_code": "@@ -156,7 +156,7 @@ def test_aruco_detector(self):\n gold_corners = np.array([[offset, offset],[marker_size+offset-1.0,offset],\n [marker_size+offset-1.0,marker_size+offset-1.0],\n [offset, marker_size+offset-1.0]], dtype=np.float32)\n- corners, ids, rejected = aruco_detector.detectMarkers(img_marker)\n+ corners, ids, rejected, _ = aruco_detector.detectMarkers(img_marker)", - "comment_created_at": "2025-02-18T16:09:34+00:00", - "comment_author": "asmorkalov", - "comment_body": "It breaks compatibility. M.b. rename the method to detectMarkersMultiDict?", - "pr_file_module": null - }, - { - "comment_id": "1962131512", - "repo_full_name": "opencv/opencv", - "pr_number": 26934, - "pr_file": "modules/objdetect/misc/python/test/test_objdetect_aruco.py", - "discussion_id": "1960066488", - "commented_code": "@@ -156,7 +156,7 @@ def test_aruco_detector(self):\n gold_corners = np.array([[offset, offset],[marker_size+offset-1.0,offset],\n [marker_size+offset-1.0,marker_size+offset-1.0],\n [offset, marker_size+offset-1.0]], dtype=np.float32)\n- corners, ids, rejected = aruco_detector.detectMarkers(img_marker)\n+ corners, ids, rejected, _ = aruco_detector.detectMarkers(img_marker)", - "comment_created_at": "2025-02-19T17:53:15+00:00", - "comment_author": "BenjaminKnecht", - "comment_body": "Unfortunate, but I managed to do create a detectMarkersMultiDict function with relative little redundant code.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1760580610", - "pr_number": 26147, - "pr_file": "modules/js/generator/embindgen.py", - "created_at": "2024-09-16T06:12:24+00:00", - "commented_code": "for ns_name, ns in sorted(self.namespaces.items()):\n if ns_name.split('.')[0] != 'cv':\n continue\n # TODO CALIB_FIX_FOCAL_LENGTH is defined both in cv:: and cv::fisheye\n prefix = 'FISHEYE_' if 'fisheye' in ns_name else ''\n for name, const in sorted(ns.consts.items()):\n name = prefix + name", - "repo_full_name": "opencv/opencv", - "discussion_comments": [ - { - "comment_id": "1760580610", - "repo_full_name": "opencv/opencv", - "pr_number": 26147, - "pr_file": "modules/js/generator/embindgen.py", - "discussion_id": "1760580610", - "commented_code": "@@ -936,7 +937,10 @@ def gen(self, dst_file, src_files, core_bindings):\n for ns_name, ns in sorted(self.namespaces.items()):\n if ns_name.split('.')[0] != 'cv':\n continue\n+ # TODO CALIB_FIX_FOCAL_LENGTH is defined both in cv:: and cv::fisheye\n+ prefix = 'FISHEYE_' if 'fisheye' in ns_name else ''\n for name, const in sorted(ns.consts.items()):\n+ name = prefix + name", - "comment_created_at": "2024-09-16T06:12:24+00:00", - "comment_author": "asmorkalov", - "comment_body": "What if use `namespace_prefix_override` solution like for the functions? Namespace is already appended to the function names, if it's not overridden by config. It's less hacky and more obvious.", - "pr_file_module": null - }, - { - "comment_id": "1782566322", - "repo_full_name": "opencv/opencv", - "pr_number": 26147, - "pr_file": "modules/js/generator/embindgen.py", - "discussion_id": "1760580610", - "commented_code": "@@ -936,7 +937,10 @@ def gen(self, dst_file, src_files, core_bindings):\n for ns_name, ns in sorted(self.namespaces.items()):\n if ns_name.split('.')[0] != 'cv':\n continue\n+ # TODO CALIB_FIX_FOCAL_LENGTH is defined both in cv:: and cv::fisheye\n+ prefix = 'FISHEYE_' if 'fisheye' in ns_name else ''\n for name, const in sorted(ns.consts.items()):\n+ name = prefix + name", - "comment_created_at": "2024-10-01T11:01:39+00:00", - "comment_author": "vrabaud", - "comment_body": "I guess that would work but I don't know how to use it. Should all the enums be added to https://github.com/opencv/opencv/blob/450e741f8d53ff12b4e194c7762adaefb952555a/modules/js/generator/embindgen.py#L989 ? ", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/opencv-optimize-container-access.json b/_reviewers/opencv-optimize-container-access.json new file mode 100644 index 0000000..ad61842 --- /dev/null +++ b/_reviewers/opencv-optimize-container-access.json @@ -0,0 +1,124 @@ +[ + { + "discussion_id": "2083770400", + "pr_number": 27153, + "pr_file": "modules/videoio/src/cap_ffmpeg_impl.hpp", + "created_at": "2025-05-12T03:45:24+00:00", + "commented_code": "}\n\n/// write a frame with FFMPEG\nbool CvVideoWriter_FFMPEG::writeFrame( const unsigned char* data, int step, int width, int height, int cn, int origin )\nbool CvVideoWriter_FFMPEG::writeFrame( const unsigned char* data, int step, int width, int height, int type, int origin )\n{\n if (!encode_video) {\n CV_Assert(cn == 1 && ((width > 0 && height == 1) || (width == 1 && height > 0 && step == 1)));\n CV_Assert((type == CV_16UC1 || type == CV_8UC1) && ((width > 0 && height == 1) || (width == 1 && height > 0 && step == 1)));\n const bool set_key_frame = key_frame ? key_frame : idr_period ? frame_idx % idr_period == 0 : 1;\n bool ret = icv_av_encapsulate_video_FFMPEG(oc, video_st, context, (uint8_t*)data, width, frame_idx, pts_index, b_frame_dts_delay, set_key_frame);\n frame_idx++;\n return ret;\n }\n\n // check parameters\n if (input_pix_fmt == AV_PIX_FMT_BGR24) {\n if (cn != 3) {\n return false;\n }\n }\n else if (input_pix_fmt == AV_PIX_FMT_GRAY8 || input_pix_fmt == AV_PIX_FMT_GRAY16LE) {\n if (cn != 1) {\n return false;\n }\n }\n else {\n CV_LOG_WARNING(NULL, \"Input data does not match selected pixel format: \"\n << av_get_pix_fmt_name(input_pix_fmt)\n << \", number of channels: \" << cn);\n CV_Assert(false);\n // check parameters and do conversions if needed\n cv::Mat inputMat(height, width, type, (void*)data, step);\n cv::Mat convertedMat;\n switch (input_pix_fmt) {\n case AV_PIX_FMT_BGR24: // expected CV_8UC3\n if (type == CV_8UC3)\n convertedMat = inputMat;\n else {\n if (type == CV_16UC1) // CV_16UC1 -> CV_8UC1\n inputMat.convertTo(inputMat, CV_8UC1, 1.0 / 256);", + "repo_full_name": "opencv/opencv", + "discussion_comments": [ + { + "comment_id": "2083770400", + "repo_full_name": "opencv/opencv", + "pr_number": 27153, + "pr_file": "modules/videoio/src/cap_ffmpeg_impl.hpp", + "discussion_id": "2083770400", + "commented_code": "@@ -2512,34 +2512,57 @@ static int icv_av_write_frame_FFMPEG( AVFormatContext * oc, AVStream * video_st,\n }\n \n /// write a frame with FFMPEG\n-bool CvVideoWriter_FFMPEG::writeFrame( const unsigned char* data, int step, int width, int height, int cn, int origin )\n+bool CvVideoWriter_FFMPEG::writeFrame( const unsigned char* data, int step, int width, int height, int type, int origin )\n {\n if (!encode_video) {\n- CV_Assert(cn == 1 && ((width > 0 && height == 1) || (width == 1 && height > 0 && step == 1)));\n+ CV_Assert((type == CV_16UC1 || type == CV_8UC1) && ((width > 0 && height == 1) || (width == 1 && height > 0 && step == 1)));\n const bool set_key_frame = key_frame ? key_frame : idr_period ? frame_idx % idr_period == 0 : 1;\n bool ret = icv_av_encapsulate_video_FFMPEG(oc, video_st, context, (uint8_t*)data, width, frame_idx, pts_index, b_frame_dts_delay, set_key_frame);\n frame_idx++;\n return ret;\n }\n \n- // check parameters\n- if (input_pix_fmt == AV_PIX_FMT_BGR24) {\n- if (cn != 3) {\n- return false;\n- }\n- }\n- else if (input_pix_fmt == AV_PIX_FMT_GRAY8 || input_pix_fmt == AV_PIX_FMT_GRAY16LE) {\n- if (cn != 1) {\n- return false;\n- }\n- }\n- else {\n- CV_LOG_WARNING(NULL, \"Input data does not match selected pixel format: \"\n- << av_get_pix_fmt_name(input_pix_fmt)\n- << \", number of channels: \" << cn);\n- CV_Assert(false);\n+ // check parameters and do conversions if needed\n+ cv::Mat inputMat(height, width, type, (void*)data, step);\n+ cv::Mat convertedMat;\n+ switch (input_pix_fmt) {\n+ case AV_PIX_FMT_BGR24: // expected CV_8UC3\n+ if (type == CV_8UC3)\n+ convertedMat = inputMat;\n+ else {\n+ if (type == CV_16UC1) // CV_16UC1 -> CV_8UC1\n+ inputMat.convertTo(inputMat, CV_8UC1, 1.0 / 256);", + "comment_created_at": "2025-05-12T03:45:24+00:00", + "comment_author": "opencv-alalek", + "comment_body": "> inputMat ... inputMat\r\n\r\n\"In-place\" ops should not be used in general (causes extra `.clone()` call).", + "pr_file_module": null + }, + { + "comment_id": "2094242958", + "repo_full_name": "opencv/opencv", + "pr_number": 27153, + "pr_file": "modules/videoio/src/cap_ffmpeg_impl.hpp", + "discussion_id": "2083770400", + "commented_code": "@@ -2512,34 +2512,57 @@ static int icv_av_write_frame_FFMPEG( AVFormatContext * oc, AVStream * video_st,\n }\n \n /// write a frame with FFMPEG\n-bool CvVideoWriter_FFMPEG::writeFrame( const unsigned char* data, int step, int width, int height, int cn, int origin )\n+bool CvVideoWriter_FFMPEG::writeFrame( const unsigned char* data, int step, int width, int height, int type, int origin )\n {\n if (!encode_video) {\n- CV_Assert(cn == 1 && ((width > 0 && height == 1) || (width == 1 && height > 0 && step == 1)));\n+ CV_Assert((type == CV_16UC1 || type == CV_8UC1) && ((width > 0 && height == 1) || (width == 1 && height > 0 && step == 1)));\n const bool set_key_frame = key_frame ? key_frame : idr_period ? frame_idx % idr_period == 0 : 1;\n bool ret = icv_av_encapsulate_video_FFMPEG(oc, video_st, context, (uint8_t*)data, width, frame_idx, pts_index, b_frame_dts_delay, set_key_frame);\n frame_idx++;\n return ret;\n }\n \n- // check parameters\n- if (input_pix_fmt == AV_PIX_FMT_BGR24) {\n- if (cn != 3) {\n- return false;\n- }\n- }\n- else if (input_pix_fmt == AV_PIX_FMT_GRAY8 || input_pix_fmt == AV_PIX_FMT_GRAY16LE) {\n- if (cn != 1) {\n- return false;\n- }\n- }\n- else {\n- CV_LOG_WARNING(NULL, \"Input data does not match selected pixel format: \"\n- << av_get_pix_fmt_name(input_pix_fmt)\n- << \", number of channels: \" << cn);\n- CV_Assert(false);\n+ // check parameters and do conversions if needed\n+ cv::Mat inputMat(height, width, type, (void*)data, step);\n+ cv::Mat convertedMat;\n+ switch (input_pix_fmt) {\n+ case AV_PIX_FMT_BGR24: // expected CV_8UC3\n+ if (type == CV_8UC3)\n+ convertedMat = inputMat;\n+ else {\n+ if (type == CV_16UC1) // CV_16UC1 -> CV_8UC1\n+ inputMat.convertTo(inputMat, CV_8UC1, 1.0 / 256);", + "comment_created_at": "2025-05-17T23:43:37+00:00", + "comment_author": "03kiko", + "comment_body": "Thanks for the heads-up, I’ll avoid in-place operations moving forward.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2003012896", + "pr_number": 27097, + "pr_file": "3rdparty/hal_rvv/hal_rvv_1p0/filter.hpp", + "created_at": "2025-03-19T10:44:22+00:00", + "commented_code": "{\n sepFilter2D* data = reinterpret_cast(context);\n\n switch (data->kernelx_length)\n uchar* _dst_data = dst_data;\n size_t _dst_step = dst_step;\n size_t size = sizeof(char);\n switch (data->dst_type)\n {\n case 3:\n return filter::invoke(height, {sepFilter<3>}, data, src_data, src_step, dst_data, dst_step, width, height, full_width, full_height, offset_x, offset_y);\n case 5:\n return filter::invoke(height, {sepFilter<5>}, data, src_data, src_step, dst_data, dst_step, width, height, full_width, full_height, offset_x, offset_y);\n case CV_16SC1:\n size = sizeof(short);\n break;\n case CV_32FC1:\n size = sizeof(float);\n break;\n }\n std::vector *dst = nullptr;\n if (src_data == _dst_data)\n {\n dst = new std::vector(width * height * size);", + "repo_full_name": "opencv/opencv", + "discussion_comments": [ + { + "comment_id": "2003012896", + "repo_full_name": "opencv/opencv", + "pr_number": 27097, + "pr_file": "3rdparty/hal_rvv/hal_rvv_1p0/filter.hpp", + "discussion_id": "2003012896", + "commented_code": "@@ -492,15 +517,61 @@ inline int sepFilter(cvhalFilter2D *context, uchar *src_data, size_t src_step, u\n {\n sepFilter2D* data = reinterpret_cast(context);\n \n- switch (data->kernelx_length)\n+ uchar* _dst_data = dst_data;\n+ size_t _dst_step = dst_step;\n+ size_t size = sizeof(char);\n+ switch (data->dst_type)\n {\n- case 3:\n- return filter::invoke(height, {sepFilter<3>}, data, src_data, src_step, dst_data, dst_step, width, height, full_width, full_height, offset_x, offset_y);\n- case 5:\n- return filter::invoke(height, {sepFilter<5>}, data, src_data, src_step, dst_data, dst_step, width, height, full_width, full_height, offset_x, offset_y);\n+ case CV_16SC1:\n+ size = sizeof(short);\n+ break;\n+ case CV_32FC1:\n+ size = sizeof(float);\n+ break;\n+ }\n+ std::vector *dst = nullptr;\n+ if (src_data == _dst_data)\n+ {\n+ dst = new std::vector(width * height * size);", + "comment_created_at": "2025-03-19T10:44:22+00:00", + "comment_author": "asmorkalov", + "comment_body": "just local `std::vector<>` outside of `if` should be enough here. It's empty by default also you save one new/delete pair if condition is true.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1962204688", + "pr_number": 26834, + "pr_file": "modules/imgproc/src/contours_common.hpp", + "created_at": "2025-02-19T18:48:05+00:00", + "commented_code": "class TreeIterator\n{\npublic:\n TreeIterator(Tree& tree_) : tree(tree_)\n TreeIterator(Tree& tree_) : tree(tree_)//,levels(&tree._treeIteratorArena)\n {\n CV_Assert(!tree.isEmpty());\n levels.push(0);\n levels.push_back(0);\n }\n bool isDone() const\n {\n return levels.empty();\n }\n const TreeNode& getNext_s()\n {\n int idx = levels.top();\n levels.pop();\n int idx = levels.back();\n levels.pop_back();\n const TreeNode& res = tree.elem(idx);\n int cur = tree.lastSibling(res.first_child);\n while (cur != -1)\n {\n levels.push(cur);\n levels.push_back(cur);\n cur = tree.elem(cur).prev;\n }\n return res;\n }\n\nprivate:\n std::stack levels;\n Tree& tree;\n vectorOfRanges levels;", + "repo_full_name": "opencv/opencv", + "discussion_comments": [ + { + "comment_id": "1962204688", + "repo_full_name": "opencv/opencv", + "pr_number": 26834, + "pr_file": "modules/imgproc/src/contours_common.hpp", + "discussion_id": "1962204688", + "commented_code": "@@ -135,47 +297,140 @@ template \n class TreeIterator\n {\n public:\n- TreeIterator(Tree& tree_) : tree(tree_)\n+ TreeIterator(Tree& tree_) : tree(tree_)//,levels(&tree._treeIteratorArena)\n {\n CV_Assert(!tree.isEmpty());\n- levels.push(0);\n+ levels.push_back(0);\n }\n bool isDone() const\n {\n return levels.empty();\n }\n const TreeNode& getNext_s()\n {\n- int idx = levels.top();\n- levels.pop();\n+ int idx = levels.back();\n+ levels.pop_back();\n const TreeNode& res = tree.elem(idx);\n int cur = tree.lastSibling(res.first_child);\n while (cur != -1)\n {\n- levels.push(cur);\n+ levels.push_back(cur);\n cur = tree.elem(cur).prev;\n }\n return res;\n }\n \n private:\n- std::stack levels;\n Tree& tree;\n+ vectorOfRanges levels;", + "comment_created_at": "2025-02-19T18:48:05+00:00", + "comment_author": "mshabunin", + "comment_body": "I'm observing better performance with `std::stack` (though less stable):\r\n\r\nOn x86_64 (Core i5-11600, Linux):\r\n- stack: `samples=100 mean=44.01 median=42.88 min=40.59 stddev=2.44 (5.6%)`\r\n- vectorOfRanges: `samples=100 mean=46.97 median=46.97 min=46.65 stddev=0.16 (0.3%)`\r\n\r\nOn AArch64 (Rockchip RK3588):\r\n- stack: `samples=100 mean=79.23 median=79.02 min=78.35 stddev=1.27 (1.6%)`\r\n- vectorOfRanges: `samples=100 mean=89.51 median=89.52 min=88.90 stddev=0.21 (0.2%)`\r\n\r\nI propose to leave the stack version. Also it is better for simplicity.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1830966908", + "pr_number": 26422, + "pr_file": "modules/imgproc/src/median_blur.simd.hpp", + "created_at": "2024-11-06T12:49:50+00:00", + "commented_code": "#undef N\n#undef UPDATE_ACC\n}\n#endif\n\n// Binary search to find the median offset in the histogram\nstatic inline void\nget_median_ofs256(int &ofs, const uint16_t *hist, const int halfsum) {\n int s = 0, step = 128;\n uint16_t ds, m;\n\n#if CV_SIMD || CV_SIMD_SCALABLE\n const int lanes = VTraits::vlanes();\n#endif\n\n ofs = 0;\n while (step) {\n#if CV_SIMD || CV_SIMD_SCALABLE\n // Use SIMD instructions when the step is larger than or equal to the lane size\n if (step >= lanes) {\n v_uint16 v_ds = vx_load(hist + ofs);\n for (int i = lanes; i <= step - lanes; i += lanes)\n v_ds = v_add(v_ds, vx_load(hist + ofs + i));\n ds = v_reduce_sum(v_ds);\n }\n // For smaller steps, use scalar accumulation\n else\n#endif\n {\n ds = hist[ofs];\n for (int i = 1; i < step; i++)\n ds += hist[ofs + i];\n }\n\n // Determine if the cumulative sum has reached or surpassed half the total sum\n m = (s + ds) <= halfsum;\n ofs += m * step;\n s += ds & -m;\n step = step >> 1;\n }\n}\n\nstatic void\nmedianBlur_8u(const Mat &_src, Mat &_dst, int ksize) {\n CV_INSTRUMENT_REGION();\n\n const int channels = _src.channels();\n int radius = ksize / 2;\n CV_Assert(ksize % 2 == 1);\n\n const int rows = _src.rows;\n const int cols = _src.cols;\n const int win_half_size = ksize * ksize / 2;\n\n const uint8_t *src_data = _src.ptr();\n size_t src_step = _src.step;\n uint8_t *dst_data = _dst.ptr();\n size_t dst_step = _dst.step;\n\n double nstripes = (double) (rows * cols) / 1024.0;\n parallel_for_(Range(0, rows), [&](const Range &r) {\n int y_start = r.start;\n int y_end = r.end;\n\n std::vector> hist256(channels, std::vector(256, 0));", + "repo_full_name": "opencv/opencv", + "discussion_comments": [ + { + "comment_id": "1830966908", + "repo_full_name": "opencv/opencv", + "pr_number": 26422, + "pr_file": "modules/imgproc/src/median_blur.simd.hpp", + "discussion_id": "1830966908", + "commented_code": "@@ -489,7 +490,149 @@ medianBlur_8u_Om( const Mat& _src, Mat& _dst, int m )\n #undef N\n #undef UPDATE_ACC\n }\n+#endif\n+\n+// Binary search to find the median offset in the histogram\n+static inline void\n+get_median_ofs256(int &ofs, const uint16_t *hist, const int halfsum) {\n+ int s = 0, step = 128;\n+ uint16_t ds, m;\n \n+#if CV_SIMD || CV_SIMD_SCALABLE\n+ const int lanes = VTraits::vlanes();\n+#endif\n+\n+ ofs = 0;\n+ while (step) {\n+#if CV_SIMD || CV_SIMD_SCALABLE\n+ // Use SIMD instructions when the step is larger than or equal to the lane size\n+ if (step >= lanes) {\n+ v_uint16 v_ds = vx_load(hist + ofs);\n+ for (int i = lanes; i <= step - lanes; i += lanes)\n+ v_ds = v_add(v_ds, vx_load(hist + ofs + i));\n+ ds = v_reduce_sum(v_ds);\n+ }\n+ // For smaller steps, use scalar accumulation\n+ else\n+#endif\n+ {\n+ ds = hist[ofs];\n+ for (int i = 1; i < step; i++)\n+ ds += hist[ofs + i];\n+ }\n+\n+ // Determine if the cumulative sum has reached or surpassed half the total sum\n+ m = (s + ds) <= halfsum;\n+ ofs += m * step;\n+ s += ds & -m;\n+ step = step >> 1;\n+ }\n+}\n+\n+static void\n+medianBlur_8u(const Mat &_src, Mat &_dst, int ksize) {\n+ CV_INSTRUMENT_REGION();\n+\n+ const int channels = _src.channels();\n+ int radius = ksize / 2;\n+ CV_Assert(ksize % 2 == 1);\n+\n+ const int rows = _src.rows;\n+ const int cols = _src.cols;\n+ const int win_half_size = ksize * ksize / 2;\n+\n+ const uint8_t *src_data = _src.ptr();\n+ size_t src_step = _src.step;\n+ uint8_t *dst_data = _dst.ptr();\n+ size_t dst_step = _dst.step;\n+\n+ double nstripes = (double) (rows * cols) / 1024.0;\n+ parallel_for_(Range(0, rows), [&](const Range &r) {\n+ int y_start = r.start;\n+ int y_end = r.end;\n+\n+ std::vector> hist256(channels, std::vector(256, 0));", + "comment_created_at": "2024-11-06T12:49:50+00:00", + "comment_author": "vpisarev", + "comment_body": "don't use vector of vectors. It's very slow. Use plain 1D vector of 256*channels elements", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1830969632", + "pr_number": 26422, + "pr_file": "modules/imgproc/src/median_blur.simd.hpp", + "created_at": "2024-11-06T12:51:59+00:00", + "commented_code": "#undef N\n#undef UPDATE_ACC\n}\n#endif\n\n// Binary search to find the median offset in the histogram\nstatic inline void\nget_median_ofs256(int &ofs, const uint16_t *hist, const int halfsum) {\n int s = 0, step = 128;\n uint16_t ds, m;\n\n#if CV_SIMD || CV_SIMD_SCALABLE\n const int lanes = VTraits::vlanes();\n#endif\n\n ofs = 0;\n while (step) {\n#if CV_SIMD || CV_SIMD_SCALABLE\n // Use SIMD instructions when the step is larger than or equal to the lane size\n if (step >= lanes) {\n v_uint16 v_ds = vx_load(hist + ofs);\n for (int i = lanes; i <= step - lanes; i += lanes)\n v_ds = v_add(v_ds, vx_load(hist + ofs + i));\n ds = v_reduce_sum(v_ds);\n }\n // For smaller steps, use scalar accumulation\n else\n#endif\n {\n ds = hist[ofs];\n for (int i = 1; i < step; i++)\n ds += hist[ofs + i];\n }\n\n // Determine if the cumulative sum has reached or surpassed half the total sum\n m = (s + ds) <= halfsum;\n ofs += m * step;\n s += ds & -m;\n step = step >> 1;\n }\n}\n\nstatic void\nmedianBlur_8u(const Mat &_src, Mat &_dst, int ksize) {\n CV_INSTRUMENT_REGION();\n\n const int channels = _src.channels();\n int radius = ksize / 2;\n CV_Assert(ksize % 2 == 1);\n\n const int rows = _src.rows;\n const int cols = _src.cols;\n const int win_half_size = ksize * ksize / 2;\n\n const uint8_t *src_data = _src.ptr();\n size_t src_step = _src.step;\n uint8_t *dst_data = _dst.ptr();\n size_t dst_step = _dst.step;\n\n double nstripes = (double) (rows * cols) / 1024.0;\n parallel_for_(Range(0, rows), [&](const Range &r) {\n int y_start = r.start;\n int y_end = r.end;\n\n std::vector> hist256(channels, std::vector(256, 0));\n std::vector win_row_ptrs(ksize);", + "repo_full_name": "opencv/opencv", + "discussion_comments": [ + { + "comment_id": "1830969632", + "repo_full_name": "opencv/opencv", + "pr_number": 26422, + "pr_file": "modules/imgproc/src/median_blur.simd.hpp", + "discussion_id": "1830969632", + "commented_code": "@@ -489,7 +490,149 @@ medianBlur_8u_Om( const Mat& _src, Mat& _dst, int m )\n #undef N\n #undef UPDATE_ACC\n }\n+#endif\n+\n+// Binary search to find the median offset in the histogram\n+static inline void\n+get_median_ofs256(int &ofs, const uint16_t *hist, const int halfsum) {\n+ int s = 0, step = 128;\n+ uint16_t ds, m;\n \n+#if CV_SIMD || CV_SIMD_SCALABLE\n+ const int lanes = VTraits::vlanes();\n+#endif\n+\n+ ofs = 0;\n+ while (step) {\n+#if CV_SIMD || CV_SIMD_SCALABLE\n+ // Use SIMD instructions when the step is larger than or equal to the lane size\n+ if (step >= lanes) {\n+ v_uint16 v_ds = vx_load(hist + ofs);\n+ for (int i = lanes; i <= step - lanes; i += lanes)\n+ v_ds = v_add(v_ds, vx_load(hist + ofs + i));\n+ ds = v_reduce_sum(v_ds);\n+ }\n+ // For smaller steps, use scalar accumulation\n+ else\n+#endif\n+ {\n+ ds = hist[ofs];\n+ for (int i = 1; i < step; i++)\n+ ds += hist[ofs + i];\n+ }\n+\n+ // Determine if the cumulative sum has reached or surpassed half the total sum\n+ m = (s + ds) <= halfsum;\n+ ofs += m * step;\n+ s += ds & -m;\n+ step = step >> 1;\n+ }\n+}\n+\n+static void\n+medianBlur_8u(const Mat &_src, Mat &_dst, int ksize) {\n+ CV_INSTRUMENT_REGION();\n+\n+ const int channels = _src.channels();\n+ int radius = ksize / 2;\n+ CV_Assert(ksize % 2 == 1);\n+\n+ const int rows = _src.rows;\n+ const int cols = _src.cols;\n+ const int win_half_size = ksize * ksize / 2;\n+\n+ const uint8_t *src_data = _src.ptr();\n+ size_t src_step = _src.step;\n+ uint8_t *dst_data = _dst.ptr();\n+ size_t dst_step = _dst.step;\n+\n+ double nstripes = (double) (rows * cols) / 1024.0;\n+ parallel_for_(Range(0, rows), [&](const Range &r) {\n+ int y_start = r.start;\n+ int y_end = r.end;\n+\n+ std::vector> hist256(channels, std::vector(256, 0));\n+ std::vector win_row_ptrs(ksize);", + "comment_created_at": "2024-11-06T12:51:59+00:00", + "comment_author": "vpisarev", + "comment_body": "extract pointer from hist256 and win_row_ptrs. Accessing `std::vector` may be significantly slower than accessing plain arrays.\r\n\r\n```\r\nstd::vector<...> hist256vec(...);\r\nuint16_t* hist256 = hist256vec.data();\r\nstd::vector<...> win_row_ptrs_vec(...);\r\nconst uint8_t* win_row_ptrs = win_row_ptrs_vec.data();\r\n```\r\n", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/opencv-optimize-container-access.md b/_reviewers/opencv-optimize-container-access.md index 730ff1a..57111f4 100644 --- a/_reviewers/opencv-optimize-container-access.md +++ b/_reviewers/opencv-optimize-container-access.md @@ -65,129 +65,3 @@ Choose efficient container types and optimize access patterns in performance-cri ``` Choose container types based on actual performance measurements rather than assumptions, as the best data structure can vary by platform and usage pattern. - - -[ - { - "discussion_id": "2083770400", - "pr_number": 27153, - "pr_file": "modules/videoio/src/cap_ffmpeg_impl.hpp", - "created_at": "2025-05-12T03:45:24+00:00", - "commented_code": "}\n\n/// write a frame with FFMPEG\nbool CvVideoWriter_FFMPEG::writeFrame( const unsigned char* data, int step, int width, int height, int cn, int origin )\nbool CvVideoWriter_FFMPEG::writeFrame( const unsigned char* data, int step, int width, int height, int type, int origin )\n{\n if (!encode_video) {\n CV_Assert(cn == 1 && ((width > 0 && height == 1) || (width == 1 && height > 0 && step == 1)));\n CV_Assert((type == CV_16UC1 || type == CV_8UC1) && ((width > 0 && height == 1) || (width == 1 && height > 0 && step == 1)));\n const bool set_key_frame = key_frame ? key_frame : idr_period ? frame_idx % idr_period == 0 : 1;\n bool ret = icv_av_encapsulate_video_FFMPEG(oc, video_st, context, (uint8_t*)data, width, frame_idx, pts_index, b_frame_dts_delay, set_key_frame);\n frame_idx++;\n return ret;\n }\n\n // check parameters\n if (input_pix_fmt == AV_PIX_FMT_BGR24) {\n if (cn != 3) {\n return false;\n }\n }\n else if (input_pix_fmt == AV_PIX_FMT_GRAY8 || input_pix_fmt == AV_PIX_FMT_GRAY16LE) {\n if (cn != 1) {\n return false;\n }\n }\n else {\n CV_LOG_WARNING(NULL, \"Input data does not match selected pixel format: \"\n << av_get_pix_fmt_name(input_pix_fmt)\n << \", number of channels: \" << cn);\n CV_Assert(false);\n // check parameters and do conversions if needed\n cv::Mat inputMat(height, width, type, (void*)data, step);\n cv::Mat convertedMat;\n switch (input_pix_fmt) {\n case AV_PIX_FMT_BGR24: // expected CV_8UC3\n if (type == CV_8UC3)\n convertedMat = inputMat;\n else {\n if (type == CV_16UC1) // CV_16UC1 -> CV_8UC1\n inputMat.convertTo(inputMat, CV_8UC1, 1.0 / 256);", - "repo_full_name": "opencv/opencv", - "discussion_comments": [ - { - "comment_id": "2083770400", - "repo_full_name": "opencv/opencv", - "pr_number": 27153, - "pr_file": "modules/videoio/src/cap_ffmpeg_impl.hpp", - "discussion_id": "2083770400", - "commented_code": "@@ -2512,34 +2512,57 @@ static int icv_av_write_frame_FFMPEG( AVFormatContext * oc, AVStream * video_st,\n }\n \n /// write a frame with FFMPEG\n-bool CvVideoWriter_FFMPEG::writeFrame( const unsigned char* data, int step, int width, int height, int cn, int origin )\n+bool CvVideoWriter_FFMPEG::writeFrame( const unsigned char* data, int step, int width, int height, int type, int origin )\n {\n if (!encode_video) {\n- CV_Assert(cn == 1 && ((width > 0 && height == 1) || (width == 1 && height > 0 && step == 1)));\n+ CV_Assert((type == CV_16UC1 || type == CV_8UC1) && ((width > 0 && height == 1) || (width == 1 && height > 0 && step == 1)));\n const bool set_key_frame = key_frame ? key_frame : idr_period ? frame_idx % idr_period == 0 : 1;\n bool ret = icv_av_encapsulate_video_FFMPEG(oc, video_st, context, (uint8_t*)data, width, frame_idx, pts_index, b_frame_dts_delay, set_key_frame);\n frame_idx++;\n return ret;\n }\n \n- // check parameters\n- if (input_pix_fmt == AV_PIX_FMT_BGR24) {\n- if (cn != 3) {\n- return false;\n- }\n- }\n- else if (input_pix_fmt == AV_PIX_FMT_GRAY8 || input_pix_fmt == AV_PIX_FMT_GRAY16LE) {\n- if (cn != 1) {\n- return false;\n- }\n- }\n- else {\n- CV_LOG_WARNING(NULL, \"Input data does not match selected pixel format: \"\n- << av_get_pix_fmt_name(input_pix_fmt)\n- << \", number of channels: \" << cn);\n- CV_Assert(false);\n+ // check parameters and do conversions if needed\n+ cv::Mat inputMat(height, width, type, (void*)data, step);\n+ cv::Mat convertedMat;\n+ switch (input_pix_fmt) {\n+ case AV_PIX_FMT_BGR24: // expected CV_8UC3\n+ if (type == CV_8UC3)\n+ convertedMat = inputMat;\n+ else {\n+ if (type == CV_16UC1) // CV_16UC1 -> CV_8UC1\n+ inputMat.convertTo(inputMat, CV_8UC1, 1.0 / 256);", - "comment_created_at": "2025-05-12T03:45:24+00:00", - "comment_author": "opencv-alalek", - "comment_body": "> inputMat ... inputMat\r\n\r\n\"In-place\" ops should not be used in general (causes extra `.clone()` call).", - "pr_file_module": null - }, - { - "comment_id": "2094242958", - "repo_full_name": "opencv/opencv", - "pr_number": 27153, - "pr_file": "modules/videoio/src/cap_ffmpeg_impl.hpp", - "discussion_id": "2083770400", - "commented_code": "@@ -2512,34 +2512,57 @@ static int icv_av_write_frame_FFMPEG( AVFormatContext * oc, AVStream * video_st,\n }\n \n /// write a frame with FFMPEG\n-bool CvVideoWriter_FFMPEG::writeFrame( const unsigned char* data, int step, int width, int height, int cn, int origin )\n+bool CvVideoWriter_FFMPEG::writeFrame( const unsigned char* data, int step, int width, int height, int type, int origin )\n {\n if (!encode_video) {\n- CV_Assert(cn == 1 && ((width > 0 && height == 1) || (width == 1 && height > 0 && step == 1)));\n+ CV_Assert((type == CV_16UC1 || type == CV_8UC1) && ((width > 0 && height == 1) || (width == 1 && height > 0 && step == 1)));\n const bool set_key_frame = key_frame ? key_frame : idr_period ? frame_idx % idr_period == 0 : 1;\n bool ret = icv_av_encapsulate_video_FFMPEG(oc, video_st, context, (uint8_t*)data, width, frame_idx, pts_index, b_frame_dts_delay, set_key_frame);\n frame_idx++;\n return ret;\n }\n \n- // check parameters\n- if (input_pix_fmt == AV_PIX_FMT_BGR24) {\n- if (cn != 3) {\n- return false;\n- }\n- }\n- else if (input_pix_fmt == AV_PIX_FMT_GRAY8 || input_pix_fmt == AV_PIX_FMT_GRAY16LE) {\n- if (cn != 1) {\n- return false;\n- }\n- }\n- else {\n- CV_LOG_WARNING(NULL, \"Input data does not match selected pixel format: \"\n- << av_get_pix_fmt_name(input_pix_fmt)\n- << \", number of channels: \" << cn);\n- CV_Assert(false);\n+ // check parameters and do conversions if needed\n+ cv::Mat inputMat(height, width, type, (void*)data, step);\n+ cv::Mat convertedMat;\n+ switch (input_pix_fmt) {\n+ case AV_PIX_FMT_BGR24: // expected CV_8UC3\n+ if (type == CV_8UC3)\n+ convertedMat = inputMat;\n+ else {\n+ if (type == CV_16UC1) // CV_16UC1 -> CV_8UC1\n+ inputMat.convertTo(inputMat, CV_8UC1, 1.0 / 256);", - "comment_created_at": "2025-05-17T23:43:37+00:00", - "comment_author": "03kiko", - "comment_body": "Thanks for the heads-up, I\u2019ll avoid in-place operations moving forward.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2003012896", - "pr_number": 27097, - "pr_file": "3rdparty/hal_rvv/hal_rvv_1p0/filter.hpp", - "created_at": "2025-03-19T10:44:22+00:00", - "commented_code": "{\n sepFilter2D* data = reinterpret_cast(context);\n\n switch (data->kernelx_length)\n uchar* _dst_data = dst_data;\n size_t _dst_step = dst_step;\n size_t size = sizeof(char);\n switch (data->dst_type)\n {\n case 3:\n return filter::invoke(height, {sepFilter<3>}, data, src_data, src_step, dst_data, dst_step, width, height, full_width, full_height, offset_x, offset_y);\n case 5:\n return filter::invoke(height, {sepFilter<5>}, data, src_data, src_step, dst_data, dst_step, width, height, full_width, full_height, offset_x, offset_y);\n case CV_16SC1:\n size = sizeof(short);\n break;\n case CV_32FC1:\n size = sizeof(float);\n break;\n }\n std::vector *dst = nullptr;\n if (src_data == _dst_data)\n {\n dst = new std::vector(width * height * size);", - "repo_full_name": "opencv/opencv", - "discussion_comments": [ - { - "comment_id": "2003012896", - "repo_full_name": "opencv/opencv", - "pr_number": 27097, - "pr_file": "3rdparty/hal_rvv/hal_rvv_1p0/filter.hpp", - "discussion_id": "2003012896", - "commented_code": "@@ -492,15 +517,61 @@ inline int sepFilter(cvhalFilter2D *context, uchar *src_data, size_t src_step, u\n {\n sepFilter2D* data = reinterpret_cast(context);\n \n- switch (data->kernelx_length)\n+ uchar* _dst_data = dst_data;\n+ size_t _dst_step = dst_step;\n+ size_t size = sizeof(char);\n+ switch (data->dst_type)\n {\n- case 3:\n- return filter::invoke(height, {sepFilter<3>}, data, src_data, src_step, dst_data, dst_step, width, height, full_width, full_height, offset_x, offset_y);\n- case 5:\n- return filter::invoke(height, {sepFilter<5>}, data, src_data, src_step, dst_data, dst_step, width, height, full_width, full_height, offset_x, offset_y);\n+ case CV_16SC1:\n+ size = sizeof(short);\n+ break;\n+ case CV_32FC1:\n+ size = sizeof(float);\n+ break;\n+ }\n+ std::vector *dst = nullptr;\n+ if (src_data == _dst_data)\n+ {\n+ dst = new std::vector(width * height * size);", - "comment_created_at": "2025-03-19T10:44:22+00:00", - "comment_author": "asmorkalov", - "comment_body": "just local `std::vector<>` outside of `if` should be enough here. It's empty by default also you save one new/delete pair if condition is true.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1962204688", - "pr_number": 26834, - "pr_file": "modules/imgproc/src/contours_common.hpp", - "created_at": "2025-02-19T18:48:05+00:00", - "commented_code": "class TreeIterator\n{\npublic:\n TreeIterator(Tree& tree_) : tree(tree_)\n TreeIterator(Tree& tree_) : tree(tree_)//,levels(&tree._treeIteratorArena)\n {\n CV_Assert(!tree.isEmpty());\n levels.push(0);\n levels.push_back(0);\n }\n bool isDone() const\n {\n return levels.empty();\n }\n const TreeNode& getNext_s()\n {\n int idx = levels.top();\n levels.pop();\n int idx = levels.back();\n levels.pop_back();\n const TreeNode& res = tree.elem(idx);\n int cur = tree.lastSibling(res.first_child);\n while (cur != -1)\n {\n levels.push(cur);\n levels.push_back(cur);\n cur = tree.elem(cur).prev;\n }\n return res;\n }\n\nprivate:\n std::stack levels;\n Tree& tree;\n vectorOfRanges levels;", - "repo_full_name": "opencv/opencv", - "discussion_comments": [ - { - "comment_id": "1962204688", - "repo_full_name": "opencv/opencv", - "pr_number": 26834, - "pr_file": "modules/imgproc/src/contours_common.hpp", - "discussion_id": "1962204688", - "commented_code": "@@ -135,47 +297,140 @@ template \n class TreeIterator\n {\n public:\n- TreeIterator(Tree& tree_) : tree(tree_)\n+ TreeIterator(Tree& tree_) : tree(tree_)//,levels(&tree._treeIteratorArena)\n {\n CV_Assert(!tree.isEmpty());\n- levels.push(0);\n+ levels.push_back(0);\n }\n bool isDone() const\n {\n return levels.empty();\n }\n const TreeNode& getNext_s()\n {\n- int idx = levels.top();\n- levels.pop();\n+ int idx = levels.back();\n+ levels.pop_back();\n const TreeNode& res = tree.elem(idx);\n int cur = tree.lastSibling(res.first_child);\n while (cur != -1)\n {\n- levels.push(cur);\n+ levels.push_back(cur);\n cur = tree.elem(cur).prev;\n }\n return res;\n }\n \n private:\n- std::stack levels;\n Tree& tree;\n+ vectorOfRanges levels;", - "comment_created_at": "2025-02-19T18:48:05+00:00", - "comment_author": "mshabunin", - "comment_body": "I'm observing better performance with `std::stack` (though less stable):\r\n\r\nOn x86_64 (Core i5-11600, Linux):\r\n- stack: `samples=100 mean=44.01 median=42.88 min=40.59 stddev=2.44 (5.6%)`\r\n- vectorOfRanges: `samples=100 mean=46.97 median=46.97 min=46.65 stddev=0.16 (0.3%)`\r\n\r\nOn AArch64 (Rockchip RK3588):\r\n- stack: `samples=100 mean=79.23 median=79.02 min=78.35 stddev=1.27 (1.6%)`\r\n- vectorOfRanges: `samples=100 mean=89.51 median=89.52 min=88.90 stddev=0.21 (0.2%)`\r\n\r\nI propose to leave the stack version. Also it is better for simplicity.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1830966908", - "pr_number": 26422, - "pr_file": "modules/imgproc/src/median_blur.simd.hpp", - "created_at": "2024-11-06T12:49:50+00:00", - "commented_code": "#undef N\n#undef UPDATE_ACC\n}\n#endif\n\n// Binary search to find the median offset in the histogram\nstatic inline void\nget_median_ofs256(int &ofs, const uint16_t *hist, const int halfsum) {\n int s = 0, step = 128;\n uint16_t ds, m;\n\n#if CV_SIMD || CV_SIMD_SCALABLE\n const int lanes = VTraits::vlanes();\n#endif\n\n ofs = 0;\n while (step) {\n#if CV_SIMD || CV_SIMD_SCALABLE\n // Use SIMD instructions when the step is larger than or equal to the lane size\n if (step >= lanes) {\n v_uint16 v_ds = vx_load(hist + ofs);\n for (int i = lanes; i <= step - lanes; i += lanes)\n v_ds = v_add(v_ds, vx_load(hist + ofs + i));\n ds = v_reduce_sum(v_ds);\n }\n // For smaller steps, use scalar accumulation\n else\n#endif\n {\n ds = hist[ofs];\n for (int i = 1; i < step; i++)\n ds += hist[ofs + i];\n }\n\n // Determine if the cumulative sum has reached or surpassed half the total sum\n m = (s + ds) <= halfsum;\n ofs += m * step;\n s += ds & -m;\n step = step >> 1;\n }\n}\n\nstatic void\nmedianBlur_8u(const Mat &_src, Mat &_dst, int ksize) {\n CV_INSTRUMENT_REGION();\n\n const int channels = _src.channels();\n int radius = ksize / 2;\n CV_Assert(ksize % 2 == 1);\n\n const int rows = _src.rows;\n const int cols = _src.cols;\n const int win_half_size = ksize * ksize / 2;\n\n const uint8_t *src_data = _src.ptr();\n size_t src_step = _src.step;\n uint8_t *dst_data = _dst.ptr();\n size_t dst_step = _dst.step;\n\n double nstripes = (double) (rows * cols) / 1024.0;\n parallel_for_(Range(0, rows), [&](const Range &r) {\n int y_start = r.start;\n int y_end = r.end;\n\n std::vector> hist256(channels, std::vector(256, 0));", - "repo_full_name": "opencv/opencv", - "discussion_comments": [ - { - "comment_id": "1830966908", - "repo_full_name": "opencv/opencv", - "pr_number": 26422, - "pr_file": "modules/imgproc/src/median_blur.simd.hpp", - "discussion_id": "1830966908", - "commented_code": "@@ -489,7 +490,149 @@ medianBlur_8u_Om( const Mat& _src, Mat& _dst, int m )\n #undef N\n #undef UPDATE_ACC\n }\n+#endif\n+\n+// Binary search to find the median offset in the histogram\n+static inline void\n+get_median_ofs256(int &ofs, const uint16_t *hist, const int halfsum) {\n+ int s = 0, step = 128;\n+ uint16_t ds, m;\n \n+#if CV_SIMD || CV_SIMD_SCALABLE\n+ const int lanes = VTraits::vlanes();\n+#endif\n+\n+ ofs = 0;\n+ while (step) {\n+#if CV_SIMD || CV_SIMD_SCALABLE\n+ // Use SIMD instructions when the step is larger than or equal to the lane size\n+ if (step >= lanes) {\n+ v_uint16 v_ds = vx_load(hist + ofs);\n+ for (int i = lanes; i <= step - lanes; i += lanes)\n+ v_ds = v_add(v_ds, vx_load(hist + ofs + i));\n+ ds = v_reduce_sum(v_ds);\n+ }\n+ // For smaller steps, use scalar accumulation\n+ else\n+#endif\n+ {\n+ ds = hist[ofs];\n+ for (int i = 1; i < step; i++)\n+ ds += hist[ofs + i];\n+ }\n+\n+ // Determine if the cumulative sum has reached or surpassed half the total sum\n+ m = (s + ds) <= halfsum;\n+ ofs += m * step;\n+ s += ds & -m;\n+ step = step >> 1;\n+ }\n+}\n+\n+static void\n+medianBlur_8u(const Mat &_src, Mat &_dst, int ksize) {\n+ CV_INSTRUMENT_REGION();\n+\n+ const int channels = _src.channels();\n+ int radius = ksize / 2;\n+ CV_Assert(ksize % 2 == 1);\n+\n+ const int rows = _src.rows;\n+ const int cols = _src.cols;\n+ const int win_half_size = ksize * ksize / 2;\n+\n+ const uint8_t *src_data = _src.ptr();\n+ size_t src_step = _src.step;\n+ uint8_t *dst_data = _dst.ptr();\n+ size_t dst_step = _dst.step;\n+\n+ double nstripes = (double) (rows * cols) / 1024.0;\n+ parallel_for_(Range(0, rows), [&](const Range &r) {\n+ int y_start = r.start;\n+ int y_end = r.end;\n+\n+ std::vector> hist256(channels, std::vector(256, 0));", - "comment_created_at": "2024-11-06T12:49:50+00:00", - "comment_author": "vpisarev", - "comment_body": "don't use vector of vectors. It's very slow. Use plain 1D vector of 256*channels elements", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1830969632", - "pr_number": 26422, - "pr_file": "modules/imgproc/src/median_blur.simd.hpp", - "created_at": "2024-11-06T12:51:59+00:00", - "commented_code": "#undef N\n#undef UPDATE_ACC\n}\n#endif\n\n// Binary search to find the median offset in the histogram\nstatic inline void\nget_median_ofs256(int &ofs, const uint16_t *hist, const int halfsum) {\n int s = 0, step = 128;\n uint16_t ds, m;\n\n#if CV_SIMD || CV_SIMD_SCALABLE\n const int lanes = VTraits::vlanes();\n#endif\n\n ofs = 0;\n while (step) {\n#if CV_SIMD || CV_SIMD_SCALABLE\n // Use SIMD instructions when the step is larger than or equal to the lane size\n if (step >= lanes) {\n v_uint16 v_ds = vx_load(hist + ofs);\n for (int i = lanes; i <= step - lanes; i += lanes)\n v_ds = v_add(v_ds, vx_load(hist + ofs + i));\n ds = v_reduce_sum(v_ds);\n }\n // For smaller steps, use scalar accumulation\n else\n#endif\n {\n ds = hist[ofs];\n for (int i = 1; i < step; i++)\n ds += hist[ofs + i];\n }\n\n // Determine if the cumulative sum has reached or surpassed half the total sum\n m = (s + ds) <= halfsum;\n ofs += m * step;\n s += ds & -m;\n step = step >> 1;\n }\n}\n\nstatic void\nmedianBlur_8u(const Mat &_src, Mat &_dst, int ksize) {\n CV_INSTRUMENT_REGION();\n\n const int channels = _src.channels();\n int radius = ksize / 2;\n CV_Assert(ksize % 2 == 1);\n\n const int rows = _src.rows;\n const int cols = _src.cols;\n const int win_half_size = ksize * ksize / 2;\n\n const uint8_t *src_data = _src.ptr();\n size_t src_step = _src.step;\n uint8_t *dst_data = _dst.ptr();\n size_t dst_step = _dst.step;\n\n double nstripes = (double) (rows * cols) / 1024.0;\n parallel_for_(Range(0, rows), [&](const Range &r) {\n int y_start = r.start;\n int y_end = r.end;\n\n std::vector> hist256(channels, std::vector(256, 0));\n std::vector win_row_ptrs(ksize);", - "repo_full_name": "opencv/opencv", - "discussion_comments": [ - { - "comment_id": "1830969632", - "repo_full_name": "opencv/opencv", - "pr_number": 26422, - "pr_file": "modules/imgproc/src/median_blur.simd.hpp", - "discussion_id": "1830969632", - "commented_code": "@@ -489,7 +490,149 @@ medianBlur_8u_Om( const Mat& _src, Mat& _dst, int m )\n #undef N\n #undef UPDATE_ACC\n }\n+#endif\n+\n+// Binary search to find the median offset in the histogram\n+static inline void\n+get_median_ofs256(int &ofs, const uint16_t *hist, const int halfsum) {\n+ int s = 0, step = 128;\n+ uint16_t ds, m;\n \n+#if CV_SIMD || CV_SIMD_SCALABLE\n+ const int lanes = VTraits::vlanes();\n+#endif\n+\n+ ofs = 0;\n+ while (step) {\n+#if CV_SIMD || CV_SIMD_SCALABLE\n+ // Use SIMD instructions when the step is larger than or equal to the lane size\n+ if (step >= lanes) {\n+ v_uint16 v_ds = vx_load(hist + ofs);\n+ for (int i = lanes; i <= step - lanes; i += lanes)\n+ v_ds = v_add(v_ds, vx_load(hist + ofs + i));\n+ ds = v_reduce_sum(v_ds);\n+ }\n+ // For smaller steps, use scalar accumulation\n+ else\n+#endif\n+ {\n+ ds = hist[ofs];\n+ for (int i = 1; i < step; i++)\n+ ds += hist[ofs + i];\n+ }\n+\n+ // Determine if the cumulative sum has reached or surpassed half the total sum\n+ m = (s + ds) <= halfsum;\n+ ofs += m * step;\n+ s += ds & -m;\n+ step = step >> 1;\n+ }\n+}\n+\n+static void\n+medianBlur_8u(const Mat &_src, Mat &_dst, int ksize) {\n+ CV_INSTRUMENT_REGION();\n+\n+ const int channels = _src.channels();\n+ int radius = ksize / 2;\n+ CV_Assert(ksize % 2 == 1);\n+\n+ const int rows = _src.rows;\n+ const int cols = _src.cols;\n+ const int win_half_size = ksize * ksize / 2;\n+\n+ const uint8_t *src_data = _src.ptr();\n+ size_t src_step = _src.step;\n+ uint8_t *dst_data = _dst.ptr();\n+ size_t dst_step = _dst.step;\n+\n+ double nstripes = (double) (rows * cols) / 1024.0;\n+ parallel_for_(Range(0, rows), [&](const Range &r) {\n+ int y_start = r.start;\n+ int y_end = r.end;\n+\n+ std::vector> hist256(channels, std::vector(256, 0));\n+ std::vector win_row_ptrs(ksize);", - "comment_created_at": "2024-11-06T12:51:59+00:00", - "comment_author": "vpisarev", - "comment_body": "extract pointer from hist256 and win_row_ptrs. Accessing `std::vector` may be significantly slower than accessing plain arrays.\r\n\r\n```\r\nstd::vector<...> hist256vec(...);\r\nuint16_t* hist256 = hist256vec.data();\r\nstd::vector<...> win_row_ptrs_vec(...);\r\nconst uint8_t* win_row_ptrs = win_row_ptrs_vec.data();\r\n```\r\n", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/opencv-optimize-for-common-cases.json b/_reviewers/opencv-optimize-for-common-cases.json new file mode 100644 index 0000000..9437e3d --- /dev/null +++ b/_reviewers/opencv-optimize-for-common-cases.json @@ -0,0 +1,114 @@ +[ + { + "discussion_id": "2086365469", + "pr_number": 27299, + "pr_file": "modules/imgproc/src/median_blur.simd.hpp", + "created_at": "2025-05-13T09:32:49+00:00", + "commented_code": "#else\n int nlanes = 1;\n#endif\n for( ; j <= size.width - nlanes - cn; j += nlanes )\n if(_dst.cols > 2*nlanes) //minimum size to safely allow extra vectorized store into next row.", + "repo_full_name": "opencv/opencv", + "discussion_comments": [ + { + "comment_id": "2086365469", + "repo_full_name": "opencv/opencv", + "pr_number": 27299, + "pr_file": "modules/imgproc/src/median_blur.simd.hpp", + "discussion_id": "2086365469", + "commented_code": "@@ -693,18 +694,38 @@ medianBlur_SortNet( const Mat& _src, Mat& _dst, int m )\n #else\n int nlanes = 1;\n #endif\n- for( ; j <= size.width - nlanes - cn; j += nlanes )\n+ if(_dst.cols > 2*nlanes) //minimum size to safely allow extra vectorized store into next row.", + "comment_created_at": "2025-05-13T09:32:49+00:00", + "comment_author": "fengyuentau", + "comment_body": "What if _dst.cols <= 2*nlanes? Just skipped?", + "pr_file_module": null + }, + { + "comment_id": "2086435491", + "repo_full_name": "opencv/opencv", + "pr_number": 27299, + "pr_file": "modules/imgproc/src/median_blur.simd.hpp", + "discussion_id": "2086365469", + "commented_code": "@@ -693,18 +694,38 @@ medianBlur_SortNet( const Mat& _src, Mat& _dst, int m )\n #else\n int nlanes = 1;\n #endif\n- for( ; j <= size.width - nlanes - cn; j += nlanes )\n+ if(_dst.cols > 2*nlanes) //minimum size to safely allow extra vectorized store into next row.", + "comment_created_at": "2025-05-13T10:06:19+00:00", + "comment_author": "madanm3", + "comment_body": "this will not be skipped. when cols <= 2* lanes,\r\n- limit gets increased after this loop. \"limit = size.width\"\r\n- then it gets done in \"for( ; j < limit; j++ )\" loop above.\r\n\r\n", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1946963394", + "pr_number": 26887, + "pr_file": "3rdparty/hal_rvv/hal_rvv_1p0/inv_sqrt_f.hpp", + "created_at": "2025-02-07T18:07:36+00:00", + "commented_code": "// This file is part of OpenCV project.\n// It is subject to the license terms in the LICENSE file found in the top-level directory\n// of this distribution and at http://opencv.org/license.html.\n#ifndef OPENCV_HAL_RVV_INV_SQRTF32_HPP_INCLUDED\n#define OPENCV_HAL_RVV_INV_SQRTF32_HPP_INCLUDED\n\n#include \n\nnamespace cv { namespace cv_hal_rvv {\n\n#undef cv_hal_invSqrt32f\n#define cv_hal_invSqrt32f cv::cv_hal_rvv::invSqrt32f\n\ninline int invSqrt32f (const float *src, float *dst, const int len) {\n const size_t vl = __riscv_vsetvl_e32m8(len);\n const size_t remainings = len % vl;\n auto calc_fun = [&](const size_t i, const size_t vl) {\n vfloat32m8_t vsrc = __riscv_vle32_v_f32m8(&src[i], vl), \n vres;\n vres = __riscv_vfsqrt_v_f32m8(vsrc, vl);\n vres = __riscv_vfrdiv_vf_f32m8(vres, 1., vl);\n __riscv_vse32_v_f32m8(&dst[i], vres, vl);\n };\n\n size_t i = 0;\n for (; i < len - remainings; i += vl)", + "repo_full_name": "opencv/opencv", + "discussion_comments": [ + { + "comment_id": "1946963394", + "repo_full_name": "opencv/opencv", + "pr_number": 26887, + "pr_file": "3rdparty/hal_rvv/hal_rvv_1p0/inv_sqrt_f.hpp", + "discussion_id": "1946963394", + "commented_code": "@@ -0,0 +1,36 @@\n+// This file is part of OpenCV project.\n+// It is subject to the license terms in the LICENSE file found in the top-level directory\n+// of this distribution and at http://opencv.org/license.html.\n+#ifndef OPENCV_HAL_RVV_INV_SQRTF32_HPP_INCLUDED\n+#define OPENCV_HAL_RVV_INV_SQRTF32_HPP_INCLUDED\n+\n+#include \n+\n+namespace cv { namespace cv_hal_rvv {\n+\n+#undef cv_hal_invSqrt32f\n+#define cv_hal_invSqrt32f cv::cv_hal_rvv::invSqrt32f\n+\n+inline int invSqrt32f (const float *src, float *dst, const int len) {\n+ const size_t vl = __riscv_vsetvl_e32m8(len);\n+ const size_t remainings = len % vl;\n+ auto calc_fun = [&](const size_t i, const size_t vl) {\n+ vfloat32m8_t vsrc = __riscv_vle32_v_f32m8(&src[i], vl), \n+ vres;\n+ vres = __riscv_vfsqrt_v_f32m8(vsrc, vl);\n+ vres = __riscv_vfrdiv_vf_f32m8(vres, 1., vl);\n+ __riscv_vse32_v_f32m8(&dst[i], vres, vl);\n+ };\n+\n+ size_t i = 0;\n+ for (; i < len - remainings; i += vl)", + "comment_created_at": "2025-02-07T18:07:36+00:00", + "comment_author": "dkurt", + "comment_body": "We can avoid `len % vl` and `remainings ` variable by using the following loop:\r\n```cpp\r\nfor (; i <= len - vl; i += vl)\r\n{\r\n calc_fun(i, vl);\r\n}\r\nif (i < len)\r\n{\r\n size_t tail_len = __riscv_vsetvl_e32m8(len - i);\r\n calc_fun(i, tail_len);\r\n}\r\n```\r\n", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1830973143", + "pr_number": 26422, + "pr_file": "modules/imgproc/src/median_blur.simd.hpp", + "created_at": "2024-11-06T12:54:38+00:00", + "commented_code": "#undef N\n#undef UPDATE_ACC\n}\n#endif\n\n// Binary search to find the median offset in the histogram\nstatic inline void\nget_median_ofs256(int &ofs, const uint16_t *hist, const int halfsum) {\n int s = 0, step = 128;\n uint16_t ds, m;\n\n#if CV_SIMD || CV_SIMD_SCALABLE\n const int lanes = VTraits::vlanes();\n#endif\n\n ofs = 0;\n while (step) {\n#if CV_SIMD || CV_SIMD_SCALABLE\n // Use SIMD instructions when the step is larger than or equal to the lane size\n if (step >= lanes) {\n v_uint16 v_ds = vx_load(hist + ofs);\n for (int i = lanes; i <= step - lanes; i += lanes)\n v_ds = v_add(v_ds, vx_load(hist + ofs + i));\n ds = v_reduce_sum(v_ds);\n }\n // For smaller steps, use scalar accumulation\n else\n#endif\n {\n ds = hist[ofs];\n for (int i = 1; i < step; i++)\n ds += hist[ofs + i];\n }\n\n // Determine if the cumulative sum has reached or surpassed half the total sum\n m = (s + ds) <= halfsum;\n ofs += m * step;\n s += ds & -m;\n step = step >> 1;\n }\n}\n\nstatic void\nmedianBlur_8u(const Mat &_src, Mat &_dst, int ksize) {\n CV_INSTRUMENT_REGION();\n\n const int channels = _src.channels();\n int radius = ksize / 2;\n CV_Assert(ksize % 2 == 1);\n\n const int rows = _src.rows;\n const int cols = _src.cols;\n const int win_half_size = ksize * ksize / 2;\n\n const uint8_t *src_data = _src.ptr();\n size_t src_step = _src.step;\n uint8_t *dst_data = _dst.ptr();\n size_t dst_step = _dst.step;\n\n double nstripes = (double) (rows * cols) / 1024.0;\n parallel_for_(Range(0, rows), [&](const Range &r) {\n int y_start = r.start;\n int y_end = r.end;\n\n std::vector> hist256(channels, std::vector(256, 0));\n std::vector win_row_ptrs(ksize);\n\n // initial hist for the first pixel\n for (int dy = -radius; dy <= radius; dy++) {\n int y = std::clamp(y_start + dy, 0, rows - 1);\n const uint8_t *row_ptr = src_data + y * src_step;\n win_row_ptrs[dy + radius] = row_ptr;\n\n // always start from col 0, so need to add radius to first pixel\n for (int c = 0; c < channels; ++c)\n hist256[c][row_ptr[c]] += radius + 1;\n // add the rest of the row\n for (int dx = 1; dx <= radius; dx++) {\n int x = std::min(dx, cols - 1);\n const uint8_t *pixel_ptr = row_ptr + x * channels;\n for (int c = 0; c < channels; ++c) {\n hist256[c][pixel_ptr[c]]++;\n }\n }\n }\n\n int x_start = 0, x_end = cols, x_step = 1;\n // slide the window across the row\n for (int y = y_start; y < y_end; y++) {\n uint8_t *dst_pixel_ptr = dst_data + y * dst_step + x_start * channels;\n for (int x = x_start; x != x_end; x += x_step, dst_pixel_ptr += x_step * channels) {\n if (x != x_start) {\n // update hist by removing and adding pixel values as the window moves\n int x_remove = std::clamp(x - (radius + 1) * x_step, 0, cols - 1) * channels;\n int x_add = std::clamp(x + radius * x_step, 0, cols - 1) * channels;\n for (int dy = 0; dy < ksize; dy++) {", + "repo_full_name": "opencv/opencv", + "discussion_comments": [ + { + "comment_id": "1830973143", + "repo_full_name": "opencv/opencv", + "pr_number": 26422, + "pr_file": "modules/imgproc/src/median_blur.simd.hpp", + "discussion_id": "1830973143", + "commented_code": "@@ -489,7 +490,149 @@ medianBlur_8u_Om( const Mat& _src, Mat& _dst, int m )\n #undef N\n #undef UPDATE_ACC\n }\n+#endif\n+\n+// Binary search to find the median offset in the histogram\n+static inline void\n+get_median_ofs256(int &ofs, const uint16_t *hist, const int halfsum) {\n+ int s = 0, step = 128;\n+ uint16_t ds, m;\n \n+#if CV_SIMD || CV_SIMD_SCALABLE\n+ const int lanes = VTraits::vlanes();\n+#endif\n+\n+ ofs = 0;\n+ while (step) {\n+#if CV_SIMD || CV_SIMD_SCALABLE\n+ // Use SIMD instructions when the step is larger than or equal to the lane size\n+ if (step >= lanes) {\n+ v_uint16 v_ds = vx_load(hist + ofs);\n+ for (int i = lanes; i <= step - lanes; i += lanes)\n+ v_ds = v_add(v_ds, vx_load(hist + ofs + i));\n+ ds = v_reduce_sum(v_ds);\n+ }\n+ // For smaller steps, use scalar accumulation\n+ else\n+#endif\n+ {\n+ ds = hist[ofs];\n+ for (int i = 1; i < step; i++)\n+ ds += hist[ofs + i];\n+ }\n+\n+ // Determine if the cumulative sum has reached or surpassed half the total sum\n+ m = (s + ds) <= halfsum;\n+ ofs += m * step;\n+ s += ds & -m;\n+ step = step >> 1;\n+ }\n+}\n+\n+static void\n+medianBlur_8u(const Mat &_src, Mat &_dst, int ksize) {\n+ CV_INSTRUMENT_REGION();\n+\n+ const int channels = _src.channels();\n+ int radius = ksize / 2;\n+ CV_Assert(ksize % 2 == 1);\n+\n+ const int rows = _src.rows;\n+ const int cols = _src.cols;\n+ const int win_half_size = ksize * ksize / 2;\n+\n+ const uint8_t *src_data = _src.ptr();\n+ size_t src_step = _src.step;\n+ uint8_t *dst_data = _dst.ptr();\n+ size_t dst_step = _dst.step;\n+\n+ double nstripes = (double) (rows * cols) / 1024.0;\n+ parallel_for_(Range(0, rows), [&](const Range &r) {\n+ int y_start = r.start;\n+ int y_end = r.end;\n+\n+ std::vector> hist256(channels, std::vector(256, 0));\n+ std::vector win_row_ptrs(ksize);\n+\n+ // initial hist for the first pixel\n+ for (int dy = -radius; dy <= radius; dy++) {\n+ int y = std::clamp(y_start + dy, 0, rows - 1);\n+ const uint8_t *row_ptr = src_data + y * src_step;\n+ win_row_ptrs[dy + radius] = row_ptr;\n+\n+ // always start from col 0, so need to add radius to first pixel\n+ for (int c = 0; c < channels; ++c)\n+ hist256[c][row_ptr[c]] += radius + 1;\n+ // add the rest of the row\n+ for (int dx = 1; dx <= radius; dx++) {\n+ int x = std::min(dx, cols - 1);\n+ const uint8_t *pixel_ptr = row_ptr + x * channels;\n+ for (int c = 0; c < channels; ++c) {\n+ hist256[c][pixel_ptr[c]]++;\n+ }\n+ }\n+ }\n+\n+ int x_start = 0, x_end = cols, x_step = 1;\n+ // slide the window across the row\n+ for (int y = y_start; y < y_end; y++) {\n+ uint8_t *dst_pixel_ptr = dst_data + y * dst_step + x_start * channels;\n+ for (int x = x_start; x != x_end; x += x_step, dst_pixel_ptr += x_step * channels) {\n+ if (x != x_start) {\n+ // update hist by removing and adding pixel values as the window moves\n+ int x_remove = std::clamp(x - (radius + 1) * x_step, 0, cols - 1) * channels;\n+ int x_add = std::clamp(x + radius * x_step, 0, cols - 1) * channels;\n+ for (int dy = 0; dy < ksize; dy++) {", + "comment_created_at": "2024-11-06T12:54:38+00:00", + "comment_author": "vpisarev", + "comment_body": "please, create separate versions of histogram update loop for `channels == 1`, `channels == 3` and the generic case.\r\nstore separate pointers for 1-channel hist256 (hist0=hist256[0]) and 3-channel hist256 (`hist0 = hist256[0], hist1=hist256[1] and hist2=hist256[2]`)", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1975941881", + "pr_number": 26958, + "pr_file": "3rdparty/hal_rvv/hal_rvv_1p0/pyramids.hpp", + "created_at": "2025-02-28T19:53:20+00:00", + "commented_code": "// This file is part of OpenCV project.\n// It is subject to the license terms in the LICENSE file found in the top-level directory\n// of this distribution and at http://opencv.org/license.html.\n#ifndef OPENCV_HAL_RVV_PYRAMIDS_HPP_INCLUDED\n#define OPENCV_HAL_RVV_PYRAMIDS_HPP_INCLUDED\n\n#include \n\nnamespace cv { namespace cv_hal_rvv { namespace pyramids {\n\n#undef cv_hal_pyrdown\n#define cv_hal_pyrdown cv::cv_hal_rvv::pyramids::pyrDown\n#undef cv_hal_pyrup\n#define cv_hal_pyrup cv::cv_hal_rvv::pyramids::pyrUp\n\ntemplate struct rvv;\n\ntemplate<> struct rvv\n{\n static inline size_t vsetvl_WT(size_t a) { return __riscv_vsetvl_e32m4(a); }\n static inline vuint8m1_t vle_T(const uchar* a, size_t b) { return __riscv_vle8_v_u8m1(a, b); }\n static inline vint32m4_t vle_WT(const int* a, size_t b) { return __riscv_vle32_v_i32m4(a, b); }\n static inline vuint32m4_t vle_M(const uint* a, size_t b) { return __riscv_vle32_v_u32m4(a, b); }\n static inline vuint8m1_t vlse_T(const uchar* a, ptrdiff_t b, size_t c) { return __riscv_vlse8_v_u8m1(a, b, c); }\n static inline vuint8m1_t vloxei_T(const uchar* a, vuint32m4_t b, size_t c) { return __riscv_vloxei32_v_u8m1(a, b, c); }\n static inline void vse_T(uchar* a, vuint8m1_t b, size_t c) { return __riscv_vse8(a, b, c); }\n static inline vint32m4_t vcvt_T_WT(vuint8m1_t a, size_t b) { return __riscv_vreinterpret_v_u32m4_i32m4(__riscv_vzext_vf4(a, b)); }\n static inline vuint8m1_t vcvt_WT_T(vint32m4_t a, int b, size_t c) { return __riscv_vncvt_x(__riscv_vncvt_x(__riscv_vreinterpret_v_i32m4_u32m4(__riscv_vsra(__riscv_vadd(a, 1 << (b - 1), c), b, c)), c), c); }\n};\n\ntemplate<> struct rvv\n{\n static inline size_t vsetvl_WT(size_t a) { return __riscv_vsetvl_e32m4(a); }\n static inline vint16m2_t vle_T(const short* a, size_t b) { return __riscv_vle16_v_i16m2(a, b); }\n static inline vint32m4_t vle_WT(const int* a, size_t b) { return __riscv_vle32_v_i32m4(a, b); }\n static inline vuint32m4_t vle_M(const uint* a, size_t b) { return __riscv_vle32_v_u32m4(a, b); }\n static inline vint16m2_t vlse_T(const short* a, ptrdiff_t b, size_t c) { return __riscv_vlse16_v_i16m2(a, b, c); }\n static inline vint16m2_t vloxei_T(const short* a, vuint32m4_t b, size_t c) { return __riscv_vloxei32_v_i16m2(a, b, c); }\n static inline void vse_T(short* a, vint16m2_t b, size_t c) { return __riscv_vse16(a, b, c); }\n static inline vint32m4_t vcvt_T_WT(vint16m2_t a, size_t b) { return __riscv_vsext_vf2(a, b); }\n static inline vint16m2_t vcvt_WT_T(vint32m4_t a, int b, size_t c) { return __riscv_vncvt_x(__riscv_vsra(__riscv_vadd(a, 1 << (b - 1), c), b, c), c); }\n};\n\ntemplate<> struct rvv\n{\n static inline size_t vsetvl_WT(size_t a) { return __riscv_vsetvl_e32m4(a); }\n static inline vfloat32m4_t vle_T(const float* a, size_t b) { return __riscv_vle32_v_f32m4(a, b); }\n static inline vfloat32m4_t vle_WT(const float* a, size_t b) { return __riscv_vle32_v_f32m4(a, b); }\n static inline vuint32m4_t vle_M(const uint* a, size_t b) { return __riscv_vle32_v_u32m4(a, b); }\n static inline vfloat32m4_t vlse_T(const float* a, ptrdiff_t b, size_t c) { return __riscv_vlse32_v_f32m4(a, b, c); }\n static inline vfloat32m4_t vloxei_T(const float* a, vuint32m4_t b, size_t c) { return __riscv_vloxei32_v_f32m4(a, b, c); }\n static inline void vse_T(float* a, vfloat32m4_t b, size_t c) { return __riscv_vse32(a, b, c); }\n};\n\ntemplate struct pyrDownVec0\n{\n void operator()(const T* src, WT* row, const uint* tabM, int start, int end)\n {\n int vl;\n switch (start)\n {\n case 1:\n for( int x = start; x < end; x += vl )\n {\n vl = rvv::vsetvl_WT(end - x);\n auto vec_src0 = rvv::vcvt_T_WT(rvv::vlse_T(src + x * 2 - 2, 2 * sizeof(T), vl), vl);\n auto vec_src1 = rvv::vcvt_T_WT(rvv::vlse_T(src + x * 2 - 1, 2 * sizeof(T), vl), vl);\n auto vec_src2 = rvv::vcvt_T_WT(rvv::vlse_T(src + x * 2, 2 * sizeof(T), vl), vl);\n auto vec_src3 = rvv::vcvt_T_WT(rvv::vlse_T(src + x * 2 + 1, 2 * sizeof(T), vl), vl);\n auto vec_src4 = rvv::vcvt_T_WT(rvv::vlse_T(src + x * 2 + 2, 2 * sizeof(T), vl), vl);\n __riscv_vse32(row + x, __riscv_vadd(__riscv_vadd(__riscv_vadd(vec_src0, vec_src4, vl), __riscv_vadd(vec_src2, vec_src2, vl), vl),\n __riscv_vsll(__riscv_vadd(__riscv_vadd(vec_src1, vec_src2, vl), vec_src3, vl), 2, vl), vl), vl);\n }\n break;\n case 2:\n for( int x = start / 2; x < end / 2; x += vl )\n {\n vl = rvv::vsetvl_WT(end / 2 - x);\n auto vec_src0 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 - 2) * 2, 4 * sizeof(T), vl), vl);\n auto vec_src1 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 - 1) * 2, 4 * sizeof(T), vl), vl);\n auto vec_src2 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2) * 2, 4 * sizeof(T), vl), vl);\n auto vec_src3 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 + 1) * 2, 4 * sizeof(T), vl), vl);\n auto vec_src4 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 + 2) * 2, 4 * sizeof(T), vl), vl);\n __riscv_vsse32(row + x * 2, 2 * sizeof(WT), __riscv_vadd(__riscv_vadd(__riscv_vadd(vec_src0, vec_src4, vl), __riscv_vadd(vec_src2, vec_src2, vl), vl),\n __riscv_vsll(__riscv_vadd(__riscv_vadd(vec_src1, vec_src2, vl), vec_src3, vl), 2, vl), vl), vl);\n vec_src0 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 - 2) * 2 + 1, 4 * sizeof(T), vl), vl);\n vec_src1 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 - 1) * 2 + 1, 4 * sizeof(T), vl), vl);\n vec_src2 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2) * 2 + 1, 4 * sizeof(T), vl), vl);\n vec_src3 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 + 1) * 2 + 1, 4 * sizeof(T), vl), vl);\n vec_src4 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 + 2) * 2 + 1, 4 * sizeof(T), vl), vl);\n __riscv_vsse32(row + x * 2 + 1, 2 * sizeof(WT), __riscv_vadd(__riscv_vadd(__riscv_vadd(vec_src0, vec_src4, vl), __riscv_vadd(vec_src2, vec_src2, vl), vl),\n __riscv_vsll(__riscv_vadd(__riscv_vadd(vec_src1, vec_src2, vl), vec_src3, vl), 2, vl), vl), vl);\n }\n break;\n case 3:\n for( int x = start / 3; x < end / 3; x += vl )\n {\n vl = rvv::vsetvl_WT(end / 3 - x);\n auto vec_src0 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 - 2) * 3, 6 * sizeof(T), vl), vl);\n auto vec_src1 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 - 1) * 3, 6 * sizeof(T), vl), vl);\n auto vec_src2 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2) * 3, 6 * sizeof(T), vl), vl);\n auto vec_src3 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 + 1) * 3, 6 * sizeof(T), vl), vl);\n auto vec_src4 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 + 2) * 3, 6 * sizeof(T), vl), vl);\n __riscv_vsse32(row + x * 3, 3 * sizeof(WT), __riscv_vadd(__riscv_vadd(__riscv_vadd(vec_src0, vec_src4, vl), __riscv_vadd(vec_src2, vec_src2, vl), vl),\n __riscv_vsll(__riscv_vadd(__riscv_vadd(vec_src1, vec_src2, vl), vec_src3, vl), 2, vl), vl), vl);\n vec_src0 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 - 2) * 3 + 1, 6 * sizeof(T), vl), vl);\n vec_src1 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 - 1) * 3 + 1, 6 * sizeof(T), vl), vl);\n vec_src2 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2) * 3 + 1, 6 * sizeof(T), vl), vl);\n vec_src3 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 + 1) * 3 + 1, 6 * sizeof(T), vl), vl);\n vec_src4 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 + 2) * 3 + 1, 6 * sizeof(T), vl), vl);\n __riscv_vsse32(row + x * 3 + 1, 3 * sizeof(WT), __riscv_vadd(__riscv_vadd(__riscv_vadd(vec_src0, vec_src4, vl), __riscv_vadd(vec_src2, vec_src2, vl), vl),\n __riscv_vsll(__riscv_vadd(__riscv_vadd(vec_src1, vec_src2, vl), vec_src3, vl), 2, vl), vl), vl);\n vec_src0 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 - 2) * 3 + 2, 6 * sizeof(T), vl), vl);\n vec_src1 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 - 1) * 3 + 2, 6 * sizeof(T), vl), vl);\n vec_src2 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2) * 3 + 2, 6 * sizeof(T), vl), vl);\n vec_src3 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 + 1) * 3 + 2, 6 * sizeof(T), vl), vl);\n vec_src4 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 + 2) * 3 + 2, 6 * sizeof(T), vl), vl);\n __riscv_vsse32(row + x * 3 + 2, 3 * sizeof(WT), __riscv_vadd(__riscv_vadd(__riscv_vadd(vec_src0, vec_src4, vl), __riscv_vadd(vec_src2, vec_src2, vl), vl),\n __riscv_vsll(__riscv_vadd(__riscv_vadd(vec_src1, vec_src2, vl), vec_src3, vl), 2, vl), vl), vl);\n }\n break;\n case 4:\n for( int x = start / 4; x < end / 4; x += vl )\n {\n vl = rvv::vsetvl_WT(end / 4 - x);\n auto vec_src0 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 - 2) * 4, 8 * sizeof(T), vl), vl);\n auto vec_src1 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 - 1) * 4, 8 * sizeof(T), vl), vl);\n auto vec_src2 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2) * 4, 8 * sizeof(T), vl), vl);\n auto vec_src3 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 + 1) * 4, 8 * sizeof(T), vl), vl);\n auto vec_src4 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 + 2) * 4, 8 * sizeof(T), vl), vl);\n __riscv_vsse32(row + x * 4, 4 * sizeof(WT), __riscv_vadd(__riscv_vadd(__riscv_vadd(vec_src0, vec_src4, vl), __riscv_vadd(vec_src2, vec_src2, vl), vl),\n __riscv_vsll(__riscv_vadd(__riscv_vadd(vec_src1, vec_src2, vl), vec_src3, vl), 2, vl), vl), vl);\n vec_src0 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 - 2) * 4 + 1, 8 * sizeof(T), vl), vl);\n vec_src1 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 - 1) * 4 + 1, 8 * sizeof(T), vl), vl);\n vec_src2 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2) * 4 + 1, 8 * sizeof(T), vl), vl);\n vec_src3 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 + 1) * 4 + 1, 8 * sizeof(T), vl), vl);\n vec_src4 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 + 2) * 4 + 1, 8 * sizeof(T), vl), vl);\n __riscv_vsse32(row + x * 4 + 1, 4 * sizeof(WT), __riscv_vadd(__riscv_vadd(__riscv_vadd(vec_src0, vec_src4, vl), __riscv_vadd(vec_src2, vec_src2, vl), vl),\n __riscv_vsll(__riscv_vadd(__riscv_vadd(vec_src1, vec_src2, vl), vec_src3, vl), 2, vl), vl), vl);\n vec_src0 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 - 2) * 4 + 2, 8 * sizeof(T), vl), vl);\n vec_src1 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 - 1) * 4 + 2, 8 * sizeof(T), vl), vl);\n vec_src2 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2) * 4 + 2, 8 * sizeof(T), vl), vl);\n vec_src3 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 + 1) * 4 + 2, 8 * sizeof(T), vl), vl);\n vec_src4 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 + 2) * 4 + 2, 8 * sizeof(T), vl), vl);\n __riscv_vsse32(row + x * 4 + 2, 4 * sizeof(WT), __riscv_vadd(__riscv_vadd(__riscv_vadd(vec_src0, vec_src4, vl), __riscv_vadd(vec_src2, vec_src2, vl), vl),\n __riscv_vsll(__riscv_vadd(__riscv_vadd(vec_src1, vec_src2, vl), vec_src3, vl), 2, vl), vl), vl);\n vec_src0 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 - 2) * 4 + 3, 8 * sizeof(T), vl), vl);\n vec_src1 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 - 1) * 4 + 3, 8 * sizeof(T), vl), vl);\n vec_src2 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2) * 4 + 3, 8 * sizeof(T), vl), vl);\n vec_src3 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 + 1) * 4 + 3, 8 * sizeof(T), vl), vl);\n vec_src4 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 + 2) * 4 + 3, 8 * sizeof(T), vl), vl);\n __riscv_vsse32(row + x * 4 + 3, 4 * sizeof(WT), __riscv_vadd(__riscv_vadd(__riscv_vadd(vec_src0, vec_src4, vl), __riscv_vadd(vec_src2, vec_src2, vl), vl),\n __riscv_vsll(__riscv_vadd(__riscv_vadd(vec_src1, vec_src2, vl), vec_src3, vl), 2, vl), vl), vl);\n }\n break;\n default:\n for( int x = start; x < end; x += vl )\n {\n vl = rvv::vsetvl_WT(end - x);\n auto vec_tabM = rvv::vle_M(tabM + x, vl);\n vec_tabM = __riscv_vmul(__riscv_vsub(vec_tabM, start * 2, vl), sizeof(T), vl);\n auto vec_src0 = rvv::vcvt_T_WT(rvv::vloxei_T(src, vec_tabM, vl), vl);\n vec_tabM = __riscv_vadd(vec_tabM, start * sizeof(T), vl);\n auto vec_src1 = rvv::vcvt_T_WT(rvv::vloxei_T(src, vec_tabM, vl), vl);\n vec_tabM = __riscv_vadd(vec_tabM, start * sizeof(T), vl);\n auto vec_src2 = rvv::vcvt_T_WT(rvv::vloxei_T(src, vec_tabM, vl), vl);\n vec_tabM = __riscv_vadd(vec_tabM, start * sizeof(T), vl);\n auto vec_src3 = rvv::vcvt_T_WT(rvv::vloxei_T(src, vec_tabM, vl), vl);\n vec_tabM = __riscv_vadd(vec_tabM, start * sizeof(T), vl);\n auto vec_src4 = rvv::vcvt_T_WT(rvv::vloxei_T(src, vec_tabM, vl), vl);\n __riscv_vse32(row + x, __riscv_vadd(__riscv_vadd(__riscv_vadd(vec_src0, vec_src4, vl), __riscv_vadd(vec_src2, vec_src2, vl), vl),\n __riscv_vsll(__riscv_vadd(__riscv_vadd(vec_src1, vec_src2, vl), vec_src3, vl), 2, vl), vl), vl);\n }\n }\n }\n};\ntemplate<> struct pyrDownVec0\n{\n void operator()(const float* src, float* row, const uint* tabM, int start, int end)\n {\n int vl;\n switch (start)\n {\n case 1:\n for( int x = start; x < end; x += vl )\n {\n vl = rvv::vsetvl_WT(end - x);\n auto vec_src0 = rvv::vlse_T(src + x * 2 - 2, 2 * sizeof(float), vl);\n auto vec_src1 = rvv::vlse_T(src + x * 2 - 1, 2 * sizeof(float), vl);\n auto vec_src2 = rvv::vlse_T(src + x * 2, 2 * sizeof(float), vl);\n auto vec_src3 = rvv::vlse_T(src + x * 2 + 1, 2 * sizeof(float), vl);\n auto vec_src4 = rvv::vlse_T(src + x * 2 + 2, 2 * sizeof(float), vl);\n __riscv_vse32(row + x, __riscv_vfmadd(vec_src2, 6, __riscv_vfmadd(__riscv_vfadd(vec_src1, vec_src3, vl), 4, __riscv_vfadd(vec_src0, vec_src4, vl), vl), vl), vl);\n }\n break;\n case 2:\n for( int x = start / 2; x < end / 2; x += vl )\n {\n vl = rvv::vsetvl_WT(end / 2 - x);\n auto vec_src0 = rvv::vlse_T(src + (x * 2 - 2) * 2, 4 * sizeof(float), vl);\n auto vec_src1 = rvv::vlse_T(src + (x * 2 - 1) * 2, 4 * sizeof(float), vl);\n auto vec_src2 = rvv::vlse_T(src + (x * 2) * 2, 4 * sizeof(float), vl);\n auto vec_src3 = rvv::vlse_T(src + (x * 2 + 1) * 2, 4 * sizeof(float), vl);\n auto vec_src4 = rvv::vlse_T(src + (x * 2 + 2) * 2, 4 * sizeof(float), vl);\n __riscv_vsse32(row + x * 2, 2 * sizeof(float), __riscv_vfmadd(vec_src2, 6, __riscv_vfmadd(__riscv_vfadd(vec_src1, vec_src3, vl), 4, __riscv_vfadd(vec_src0, vec_src4, vl), vl), vl), vl);\n vec_src0 = rvv::vlse_T(src + (x * 2 - 2) * 2 + 1, 4 * sizeof(float), vl);\n vec_src1 = rvv::vlse_T(src + (x * 2 - 1) * 2 + 1, 4 * sizeof(float), vl);\n vec_src2 = rvv::vlse_T(src + (x * 2) * 2 + 1, 4 * sizeof(float), vl);\n vec_src3 = rvv::vlse_T(src + (x * 2 + 1) * 2 + 1, 4 * sizeof(float), vl);\n vec_src4 = rvv::vlse_T(src + (x * 2 + 2) * 2 + 1, 4 * sizeof(float), vl);\n __riscv_vsse32(row + x * 2 + 1, 2 * sizeof(float), __riscv_vfmadd(vec_src2, 6, __riscv_vfmadd(__riscv_vfadd(vec_src1, vec_src3, vl), 4, __riscv_vfadd(vec_src0, vec_src4, vl), vl), vl), vl);\n }\n break;\n case 3:\n for( int x = start / 3; x < end / 3; x += vl )\n {\n vl = rvv::vsetvl_WT(end / 3 - x);\n auto vec_src0 = rvv::vlse_T(src + (x * 2 - 2) * 3, 6 * sizeof(float), vl);\n auto vec_src1 = rvv::vlse_T(src + (x * 2 - 1) * 3, 6 * sizeof(float), vl);\n auto vec_src2 = rvv::vlse_T(src + (x * 2) * 3, 6 * sizeof(float), vl);\n auto vec_src3 = rvv::vlse_T(src + (x * 2 + 1) * 3, 6 * sizeof(float), vl);\n auto vec_src4 = rvv::vlse_T(src + (x * 2 + 2) * 3, 6 * sizeof(float), vl);\n __riscv_vsse32(row + x * 3, 3 * sizeof(float), __riscv_vfmadd(vec_src2, 6, __riscv_vfmadd(__riscv_vfadd(vec_src1, vec_src3, vl), 4, __riscv_vfadd(vec_src0, vec_src4, vl), vl), vl), vl);\n vec_src0 = rvv::vlse_T(src + (x * 2 - 2) * 3 + 1, 6 * sizeof(float), vl);\n vec_src1 = rvv::vlse_T(src + (x * 2 - 1) * 3 + 1, 6 * sizeof(float), vl);\n vec_src2 = rvv::vlse_T(src + (x * 2) * 3 + 1, 6 * sizeof(float), vl);\n vec_src3 = rvv::vlse_T(src + (x * 2 + 1) * 3 + 1, 6 * sizeof(float), vl);\n vec_src4 = rvv::vlse_T(src + (x * 2 + 2) * 3 + 1, 6 * sizeof(float), vl);\n __riscv_vsse32(row + x * 3 + 1, 3 * sizeof(float), __riscv_vfmadd(vec_src2, 6, __riscv_vfmadd(__riscv_vfadd(vec_src1, vec_src3, vl), 4, __riscv_vfadd(vec_src0, vec_src4, vl), vl), vl), vl);\n vec_src0 = rvv::vlse_T(src + (x * 2 - 2) * 3 + 2, 6 * sizeof(float), vl);\n vec_src1 = rvv::vlse_T(src + (x * 2 - 1) * 3 + 2, 6 * sizeof(float), vl);\n vec_src2 = rvv::vlse_T(src + (x * 2) * 3 + 2, 6 * sizeof(float), vl);\n vec_src3 = rvv::vlse_T(src + (x * 2 + 1) * 3 + 2, 6 * sizeof(float), vl);\n vec_src4 = rvv::vlse_T(src + (x * 2 + 2) * 3 + 2, 6 * sizeof(float), vl);\n __riscv_vsse32(row + x * 3 + 2, 3 * sizeof(float), __riscv_vfmadd(vec_src2, 6, __riscv_vfmadd(__riscv_vfadd(vec_src1, vec_src3, vl), 4, __riscv_vfadd(vec_src0, vec_src4, vl), vl), vl), vl);\n }\n break;\n case 4:\n for( int x = start / 4; x < end / 4; x += vl )\n {\n vl = rvv::vsetvl_WT(end / 4 - x);\n auto vec_src0 = rvv::vlse_T(src + (x * 2 - 2) * 4, 8 * sizeof(float), vl);\n auto vec_src1 = rvv::vlse_T(src + (x * 2 - 1) * 4, 8 * sizeof(float), vl);\n auto vec_src2 = rvv::vlse_T(src + (x * 2) * 4, 8 * sizeof(float), vl);\n auto vec_src3 = rvv::vlse_T(src + (x * 2 + 1) * 4, 8 * sizeof(float), vl);\n auto vec_src4 = rvv::vlse_T(src + (x * 2 + 2) * 4, 8 * sizeof(float), vl);\n __riscv_vsse32(row + x * 4, 4 * sizeof(float), __riscv_vfmadd(vec_src2, 6, __riscv_vfmadd(__riscv_vfadd(vec_src1, vec_src3, vl), 4, __riscv_vfadd(vec_src0, vec_src4, vl), vl), vl), vl);\n vec_src0 = rvv::vlse_T(src + (x * 2 - 2) * 4 + 1, 8 * sizeof(float), vl);\n vec_src1 = rvv::vlse_T(src + (x * 2 - 1) * 4 + 1, 8 * sizeof(float), vl);\n vec_src2 = rvv::vlse_T(src + (x * 2) * 4 + 1, 8 * sizeof(float), vl);\n vec_src3 = rvv::vlse_T(src + (x * 2 + 1) * 4 + 1, 8 * sizeof(float), vl);\n vec_src4 = rvv::vlse_T(src + (x * 2 + 2) * 4 + 1, 8 * sizeof(float), vl);\n __riscv_vsse32(row + x * 4 + 1, 4 * sizeof(float), __riscv_vfmadd(vec_src2, 6, __riscv_vfmadd(__riscv_vfadd(vec_src1, vec_src3, vl), 4, __riscv_vfadd(vec_src0, vec_src4, vl), vl), vl), vl);\n vec_src0 = rvv::vlse_T(src + (x * 2 - 2) * 4 + 2, 8 * sizeof(float), vl);\n vec_src1 = rvv::vlse_T(src + (x * 2 - 1) * 4 + 2, 8 * sizeof(float), vl);\n vec_src2 = rvv::vlse_T(src + (x * 2) * 4 + 2, 8 * sizeof(float), vl);\n vec_src3 = rvv::vlse_T(src + (x * 2 + 1) * 4 + 2, 8 * sizeof(float), vl);\n vec_src4 = rvv::vlse_T(src + (x * 2 + 2) * 4 + 2, 8 * sizeof(float), vl);\n __riscv_vsse32(row + x * 4 + 2, 4 * sizeof(float), __riscv_vfmadd(vec_src2, 6, __riscv_vfmadd(__riscv_vfadd(vec_src1, vec_src3, vl), 4, __riscv_vfadd(vec_src0, vec_src4, vl), vl), vl), vl);\n vec_src0 = rvv::vlse_T(src + (x * 2 - 2) * 4 + 3, 8 * sizeof(float), vl);\n vec_src1 = rvv::vlse_T(src + (x * 2 - 1) * 4 + 3, 8 * sizeof(float), vl);\n vec_src2 = rvv::vlse_T(src + (x * 2) * 4 + 3, 8 * sizeof(float), vl);\n vec_src3 = rvv::vlse_T(src + (x * 2 + 1) * 4 + 3, 8 * sizeof(float), vl);\n vec_src4 = rvv::vlse_T(src + (x * 2 + 2) * 4 + 3, 8 * sizeof(float), vl);\n __riscv_vsse32(row + x * 4 + 3, 4 * sizeof(float), __riscv_vfmadd(vec_src2, 6, __riscv_vfmadd(__riscv_vfadd(vec_src1, vec_src3, vl), 4, __riscv_vfadd(vec_src0, vec_src4, vl), vl), vl), vl);\n }\n break;\n default:\n for( int x = start; x < end; x += vl )\n {\n vl = rvv::vsetvl_WT(end - x);\n auto vec_tabM = rvv::vle_M(tabM + x, vl);\n vec_tabM = __riscv_vmul(__riscv_vsub(vec_tabM, start * 2, vl), sizeof(float), vl);\n auto vec_src0 = rvv::vloxei_T(src, vec_tabM, vl);\n vec_tabM = __riscv_vadd(vec_tabM, start * sizeof(float), vl);\n auto vec_src1 = rvv::vloxei_T(src, vec_tabM, vl);\n vec_tabM = __riscv_vadd(vec_tabM, start * sizeof(float), vl);\n auto vec_src2 = rvv::vloxei_T(src, vec_tabM, vl);\n vec_tabM = __riscv_vadd(vec_tabM, start * sizeof(float), vl);\n auto vec_src3 = rvv::vloxei_T(src, vec_tabM, vl);\n vec_tabM = __riscv_vadd(vec_tabM, start * sizeof(float), vl);\n auto vec_src4 = rvv::vloxei_T(src, vec_tabM, vl);\n __riscv_vse32(row + x, __riscv_vfmadd(__riscv_vfadd(__riscv_vfadd(vec_src1, vec_src2, vl), vec_src3, vl), 4,\n __riscv_vfadd(__riscv_vfadd(vec_src0, vec_src4, vl), __riscv_vfadd(vec_src2, vec_src2, vl), vl), vl), vl);\n }\n }\n }\n};\n\ntemplate struct pyrDownVec1\n{\n void operator()(WT* row0, WT* row1, WT* row2, WT* row3, WT* row4, T* dst, int end)\n {\n int vl;\n for( int x = 0 ; x < end; x += vl )\n {\n vl = rvv::vsetvl_WT(end - x);\n auto vec_src0 = rvv::vle_WT(row0 + x, vl);\n auto vec_src1 = rvv::vle_WT(row1 + x, vl);\n auto vec_src2 = rvv::vle_WT(row2 + x, vl);\n auto vec_src3 = rvv::vle_WT(row3 + x, vl);\n auto vec_src4 = rvv::vle_WT(row4 + x, vl);\n rvv::vse_T(dst + x, rvv::vcvt_WT_T(__riscv_vadd(__riscv_vadd(__riscv_vadd(vec_src0, vec_src4, vl), __riscv_vadd(vec_src2, vec_src2, vl), vl),\n __riscv_vsll(__riscv_vadd(__riscv_vadd(vec_src1, vec_src2, vl), vec_src3, vl), 2, vl), vl), 8, vl), vl);\n }\n }\n};\ntemplate<> struct pyrDownVec1\n{\n void operator()(float* row0, float* row1, float* row2, float* row3, float* row4, float* dst, int end)\n {\n int vl;\n for( int x = 0 ; x < end; x += vl )\n {\n vl = rvv::vsetvl_WT(end - x);\n auto vec_src0 = rvv::vle_WT(row0 + x, vl);\n auto vec_src1 = rvv::vle_WT(row1 + x, vl);\n auto vec_src2 = rvv::vle_WT(row2 + x, vl);\n auto vec_src3 = rvv::vle_WT(row3 + x, vl);\n auto vec_src4 = rvv::vle_WT(row4 + x, vl);\n rvv::vse_T(dst + x, __riscv_vfmul(__riscv_vfmadd(vec_src2, 6, __riscv_vfmadd(__riscv_vfadd(vec_src1, vec_src3, vl), 4, __riscv_vfadd(vec_src0, vec_src4, vl), vl), vl), 1.f / 256.f, vl), vl);\n }\n }\n};\n\ntemplate struct pyrUpVec0\n{\n void operator()(const T* src, WT* row, const uint* dtab, int start, int end)\n {\n int vl;\n for( int x = start; x < end; x += vl )\n {\n vl = rvv::vsetvl_WT(end - x);\n auto vec_src0 = rvv::vcvt_T_WT(rvv::vle_T(src + x - start, vl), vl);\n auto vec_src1 = rvv::vcvt_T_WT(rvv::vle_T(src + x, vl), vl);\n auto vec_src2 = rvv::vcvt_T_WT(rvv::vle_T(src + x + start, vl), vl);\n\n auto vec_dtab = rvv::vle_M(dtab + x, vl);\n vec_dtab = __riscv_vmul(vec_dtab, sizeof(WT), vl);\n __riscv_vsoxei32(row, vec_dtab, __riscv_vadd(__riscv_vadd(vec_src0, vec_src2, vl), __riscv_vadd(__riscv_vsll(vec_src1, 2, vl), __riscv_vsll(vec_src1, 1, vl), vl), vl), vl);\n __riscv_vsoxei32(row, __riscv_vadd(vec_dtab, start * sizeof(WT), vl), __riscv_vsll(__riscv_vadd(vec_src1, vec_src2, vl), 2, vl), vl);\n }\n }\n};\ntemplate<> struct pyrUpVec0\n{\n void operator()(const float* src, float* row, const uint* dtab, int start, int end)\n {\n int vl;\n for( int x = start; x < end; x += vl )\n {\n vl = rvv::vsetvl_WT(end - x);\n auto vec_src0 = rvv::vle_T(src + x - start, vl);\n auto vec_src1 = rvv::vle_T(src + x, vl);\n auto vec_src2 = rvv::vle_T(src + x + start, vl);\n\n auto vec_dtab = rvv::vle_M(dtab + x, vl);\n vec_dtab = __riscv_vmul(vec_dtab, sizeof(float), vl);\n __riscv_vsoxei32(row, vec_dtab, __riscv_vfadd(__riscv_vfmadd(vec_src1, 6, vec_src0, vl), vec_src2, vl), vl);\n __riscv_vsoxei32(row, __riscv_vadd(vec_dtab, start * sizeof(float), vl), __riscv_vfmul(__riscv_vfadd(vec_src1, vec_src2, vl), 4, vl), vl);\n }\n }\n};\n\ntemplate struct pyrUpVec1\n{\n void operator()(WT* row0, WT* row1, WT* row2, T* dst0, T* dst1, int end)\n {\n int vl;\n if (dst0 != dst1)\n {\n for( int x = 0 ; x < end; x += vl )\n {\n vl = rvv::vsetvl_WT(end - x);\n auto vec_src0 = rvv::vle_WT(row0 + x, vl);\n auto vec_src1 = rvv::vle_WT(row1 + x, vl);\n auto vec_src2 = rvv::vle_WT(row2 + x, vl);\n rvv::vse_T(dst0 + x, rvv::vcvt_WT_T(__riscv_vadd(__riscv_vadd(vec_src0, vec_src2, vl), __riscv_vadd(__riscv_vsll(vec_src1, 2, vl), __riscv_vsll(vec_src1, 1, vl), vl), vl), 6, vl), vl);\n rvv::vse_T(dst1 + x, rvv::vcvt_WT_T(__riscv_vsll(__riscv_vadd(vec_src1, vec_src2, vl), 2, vl), 6, vl), vl);\n }\n }\n else\n {\n for( int x = 0 ; x < end; x += vl )\n {\n vl = rvv::vsetvl_WT(end - x);\n auto vec_src0 = rvv::vle_WT(row0 + x, vl);\n auto vec_src1 = rvv::vle_WT(row1 + x, vl);\n auto vec_src2 = rvv::vle_WT(row2 + x, vl);\n rvv::vse_T(dst0 + x, rvv::vcvt_WT_T(__riscv_vadd(__riscv_vadd(vec_src0, vec_src2, vl), __riscv_vadd(__riscv_vsll(vec_src1, 2, vl), __riscv_vsll(vec_src1, 1, vl), vl), vl), 6, vl), vl);\n }\n }\n }\n};\ntemplate<> struct pyrUpVec1\n{\n void operator()(float* row0, float* row1, float* row2, float* dst0, float* dst1, int end)\n {\n int vl;\n if (dst0 != dst1)\n {\n for( int x = 0 ; x < end; x += vl )\n {\n vl = rvv::vsetvl_WT(end - x);\n auto vec_src0 = rvv::vle_WT(row0 + x, vl);\n auto vec_src1 = rvv::vle_WT(row1 + x, vl);\n auto vec_src2 = rvv::vle_WT(row2 + x, vl);\n rvv::vse_T(dst0 + x, __riscv_vfmul(__riscv_vfadd(__riscv_vfmadd(vec_src1, 6, vec_src0, vl), vec_src2, vl), 1.f / 64.f, vl), vl);\n rvv::vse_T(dst1 + x, __riscv_vfmul(__riscv_vfadd(vec_src1, vec_src2, vl), 1.f / 16.f, vl), vl);\n }\n }\n else\n {\n for( int x = 0 ; x < end; x += vl )\n {\n vl = rvv::vsetvl_WT(end - x);\n auto vec_src0 = rvv::vle_WT(row0 + x, vl);\n auto vec_src1 = rvv::vle_WT(row1 + x, vl);\n auto vec_src2 = rvv::vle_WT(row2 + x, vl);\n rvv::vse_T(dst0 + x, __riscv_vfmul(__riscv_vfadd(__riscv_vfmadd(vec_src1, 6, vec_src0, vl), vec_src2, vl), 1.f / 64.f, vl), vl);\n }\n }\n }\n};\n\ntemplate\nstruct PyrDownInvoker : ParallelLoopBody\n{\n PyrDownInvoker(const uchar* _src_data, size_t _src_step, int _src_width, int _src_height, uchar* _dst_data, size_t _dst_step, int _dst_width, int _dst_height, int _cn, int _borderType, int* _tabR, int* _tabM, int* _tabL)\n {\n src_data = _src_data;\n src_step = _src_step;\n src_width = _src_width;\n src_height = _src_height;\n dst_data = _dst_data;\n dst_step = _dst_step;\n dst_width = _dst_width;\n dst_height = _dst_height;\n cn = _cn;\n borderType = _borderType;\n tabR = _tabR;\n tabM = _tabM;\n tabL = _tabL;\n }\n\n void operator()(const Range& range) const CV_OVERRIDE;\n\n const uchar* src_data;\n size_t src_step;\n int src_width;\n int src_height;\n uchar* dst_data;\n size_t dst_step;\n int dst_width;\n int dst_height;\n int cn;\n int borderType;\n int* tabR;\n int* tabM;\n int* tabL;\n};\n\n// the algorithm is copied from imgproc/src/pyramids.cpp,\n// in the function template void cv::pyrDown_\ntemplate\ninline int pyrDown(const uchar* src_data, size_t src_step, int src_width, int src_height, uchar* dst_data, size_t dst_step, int dst_width, int dst_height, int cn, int borderType)\n{\n const int PD_SZ = 5;\n\n std::vector _tabM(dst_width * cn), _tabL(cn * (PD_SZ + 2)), _tabR(cn * (PD_SZ + 2));\n int *tabM = _tabM.data(), *tabL = _tabL.data(), *tabR = _tabR.data();\n\n CV_Assert( src_width > 0 && src_height > 0 &&\n std::abs(dst_width*2 - src_width) <= 2 &&\n std::abs(dst_height*2 - src_height) <= 2 );\n int width0 = std::min((src_width-PD_SZ/2-1)/2 + 1, dst_width);\n\n for (int x = 0; x <= PD_SZ+1; x++)\n {\n int sx0 = borderInterpolate(x - PD_SZ/2, src_width, borderType)*cn;", + "repo_full_name": "opencv/opencv", + "discussion_comments": [ + { + "comment_id": "1975941881", + "repo_full_name": "opencv/opencv", + "pr_number": 26958, + "pr_file": "3rdparty/hal_rvv/hal_rvv_1p0/pyramids.hpp", + "discussion_id": "1975941881", + "commented_code": "@@ -0,0 +1,681 @@\n+// This file is part of OpenCV project.\n+// It is subject to the license terms in the LICENSE file found in the top-level directory\n+// of this distribution and at http://opencv.org/license.html.\n+#ifndef OPENCV_HAL_RVV_PYRAMIDS_HPP_INCLUDED\n+#define OPENCV_HAL_RVV_PYRAMIDS_HPP_INCLUDED\n+\n+#include \n+\n+namespace cv { namespace cv_hal_rvv { namespace pyramids {\n+\n+#undef cv_hal_pyrdown\n+#define cv_hal_pyrdown cv::cv_hal_rvv::pyramids::pyrDown\n+#undef cv_hal_pyrup\n+#define cv_hal_pyrup cv::cv_hal_rvv::pyramids::pyrUp\n+\n+template struct rvv;\n+\n+template<> struct rvv\n+{\n+ static inline size_t vsetvl_WT(size_t a) { return __riscv_vsetvl_e32m4(a); }\n+ static inline vuint8m1_t vle_T(const uchar* a, size_t b) { return __riscv_vle8_v_u8m1(a, b); }\n+ static inline vint32m4_t vle_WT(const int* a, size_t b) { return __riscv_vle32_v_i32m4(a, b); }\n+ static inline vuint32m4_t vle_M(const uint* a, size_t b) { return __riscv_vle32_v_u32m4(a, b); }\n+ static inline vuint8m1_t vlse_T(const uchar* a, ptrdiff_t b, size_t c) { return __riscv_vlse8_v_u8m1(a, b, c); }\n+ static inline vuint8m1_t vloxei_T(const uchar* a, vuint32m4_t b, size_t c) { return __riscv_vloxei32_v_u8m1(a, b, c); }\n+ static inline void vse_T(uchar* a, vuint8m1_t b, size_t c) { return __riscv_vse8(a, b, c); }\n+ static inline vint32m4_t vcvt_T_WT(vuint8m1_t a, size_t b) { return __riscv_vreinterpret_v_u32m4_i32m4(__riscv_vzext_vf4(a, b)); }\n+ static inline vuint8m1_t vcvt_WT_T(vint32m4_t a, int b, size_t c) { return __riscv_vncvt_x(__riscv_vncvt_x(__riscv_vreinterpret_v_i32m4_u32m4(__riscv_vsra(__riscv_vadd(a, 1 << (b - 1), c), b, c)), c), c); }\n+};\n+\n+template<> struct rvv\n+{\n+ static inline size_t vsetvl_WT(size_t a) { return __riscv_vsetvl_e32m4(a); }\n+ static inline vint16m2_t vle_T(const short* a, size_t b) { return __riscv_vle16_v_i16m2(a, b); }\n+ static inline vint32m4_t vle_WT(const int* a, size_t b) { return __riscv_vle32_v_i32m4(a, b); }\n+ static inline vuint32m4_t vle_M(const uint* a, size_t b) { return __riscv_vle32_v_u32m4(a, b); }\n+ static inline vint16m2_t vlse_T(const short* a, ptrdiff_t b, size_t c) { return __riscv_vlse16_v_i16m2(a, b, c); }\n+ static inline vint16m2_t vloxei_T(const short* a, vuint32m4_t b, size_t c) { return __riscv_vloxei32_v_i16m2(a, b, c); }\n+ static inline void vse_T(short* a, vint16m2_t b, size_t c) { return __riscv_vse16(a, b, c); }\n+ static inline vint32m4_t vcvt_T_WT(vint16m2_t a, size_t b) { return __riscv_vsext_vf2(a, b); }\n+ static inline vint16m2_t vcvt_WT_T(vint32m4_t a, int b, size_t c) { return __riscv_vncvt_x(__riscv_vsra(__riscv_vadd(a, 1 << (b - 1), c), b, c), c); }\n+};\n+\n+template<> struct rvv\n+{\n+ static inline size_t vsetvl_WT(size_t a) { return __riscv_vsetvl_e32m4(a); }\n+ static inline vfloat32m4_t vle_T(const float* a, size_t b) { return __riscv_vle32_v_f32m4(a, b); }\n+ static inline vfloat32m4_t vle_WT(const float* a, size_t b) { return __riscv_vle32_v_f32m4(a, b); }\n+ static inline vuint32m4_t vle_M(const uint* a, size_t b) { return __riscv_vle32_v_u32m4(a, b); }\n+ static inline vfloat32m4_t vlse_T(const float* a, ptrdiff_t b, size_t c) { return __riscv_vlse32_v_f32m4(a, b, c); }\n+ static inline vfloat32m4_t vloxei_T(const float* a, vuint32m4_t b, size_t c) { return __riscv_vloxei32_v_f32m4(a, b, c); }\n+ static inline void vse_T(float* a, vfloat32m4_t b, size_t c) { return __riscv_vse32(a, b, c); }\n+};\n+\n+template struct pyrDownVec0\n+{\n+ void operator()(const T* src, WT* row, const uint* tabM, int start, int end)\n+ {\n+ int vl;\n+ switch (start)\n+ {\n+ case 1:\n+ for( int x = start; x < end; x += vl )\n+ {\n+ vl = rvv::vsetvl_WT(end - x);\n+ auto vec_src0 = rvv::vcvt_T_WT(rvv::vlse_T(src + x * 2 - 2, 2 * sizeof(T), vl), vl);\n+ auto vec_src1 = rvv::vcvt_T_WT(rvv::vlse_T(src + x * 2 - 1, 2 * sizeof(T), vl), vl);\n+ auto vec_src2 = rvv::vcvt_T_WT(rvv::vlse_T(src + x * 2, 2 * sizeof(T), vl), vl);\n+ auto vec_src3 = rvv::vcvt_T_WT(rvv::vlse_T(src + x * 2 + 1, 2 * sizeof(T), vl), vl);\n+ auto vec_src4 = rvv::vcvt_T_WT(rvv::vlse_T(src + x * 2 + 2, 2 * sizeof(T), vl), vl);\n+ __riscv_vse32(row + x, __riscv_vadd(__riscv_vadd(__riscv_vadd(vec_src0, vec_src4, vl), __riscv_vadd(vec_src2, vec_src2, vl), vl),\n+ __riscv_vsll(__riscv_vadd(__riscv_vadd(vec_src1, vec_src2, vl), vec_src3, vl), 2, vl), vl), vl);\n+ }\n+ break;\n+ case 2:\n+ for( int x = start / 2; x < end / 2; x += vl )\n+ {\n+ vl = rvv::vsetvl_WT(end / 2 - x);\n+ auto vec_src0 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 - 2) * 2, 4 * sizeof(T), vl), vl);\n+ auto vec_src1 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 - 1) * 2, 4 * sizeof(T), vl), vl);\n+ auto vec_src2 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2) * 2, 4 * sizeof(T), vl), vl);\n+ auto vec_src3 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 + 1) * 2, 4 * sizeof(T), vl), vl);\n+ auto vec_src4 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 + 2) * 2, 4 * sizeof(T), vl), vl);\n+ __riscv_vsse32(row + x * 2, 2 * sizeof(WT), __riscv_vadd(__riscv_vadd(__riscv_vadd(vec_src0, vec_src4, vl), __riscv_vadd(vec_src2, vec_src2, vl), vl),\n+ __riscv_vsll(__riscv_vadd(__riscv_vadd(vec_src1, vec_src2, vl), vec_src3, vl), 2, vl), vl), vl);\n+ vec_src0 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 - 2) * 2 + 1, 4 * sizeof(T), vl), vl);\n+ vec_src1 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 - 1) * 2 + 1, 4 * sizeof(T), vl), vl);\n+ vec_src2 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2) * 2 + 1, 4 * sizeof(T), vl), vl);\n+ vec_src3 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 + 1) * 2 + 1, 4 * sizeof(T), vl), vl);\n+ vec_src4 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 + 2) * 2 + 1, 4 * sizeof(T), vl), vl);\n+ __riscv_vsse32(row + x * 2 + 1, 2 * sizeof(WT), __riscv_vadd(__riscv_vadd(__riscv_vadd(vec_src0, vec_src4, vl), __riscv_vadd(vec_src2, vec_src2, vl), vl),\n+ __riscv_vsll(__riscv_vadd(__riscv_vadd(vec_src1, vec_src2, vl), vec_src3, vl), 2, vl), vl), vl);\n+ }\n+ break;\n+ case 3:\n+ for( int x = start / 3; x < end / 3; x += vl )\n+ {\n+ vl = rvv::vsetvl_WT(end / 3 - x);\n+ auto vec_src0 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 - 2) * 3, 6 * sizeof(T), vl), vl);\n+ auto vec_src1 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 - 1) * 3, 6 * sizeof(T), vl), vl);\n+ auto vec_src2 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2) * 3, 6 * sizeof(T), vl), vl);\n+ auto vec_src3 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 + 1) * 3, 6 * sizeof(T), vl), vl);\n+ auto vec_src4 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 + 2) * 3, 6 * sizeof(T), vl), vl);\n+ __riscv_vsse32(row + x * 3, 3 * sizeof(WT), __riscv_vadd(__riscv_vadd(__riscv_vadd(vec_src0, vec_src4, vl), __riscv_vadd(vec_src2, vec_src2, vl), vl),\n+ __riscv_vsll(__riscv_vadd(__riscv_vadd(vec_src1, vec_src2, vl), vec_src3, vl), 2, vl), vl), vl);\n+ vec_src0 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 - 2) * 3 + 1, 6 * sizeof(T), vl), vl);\n+ vec_src1 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 - 1) * 3 + 1, 6 * sizeof(T), vl), vl);\n+ vec_src2 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2) * 3 + 1, 6 * sizeof(T), vl), vl);\n+ vec_src3 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 + 1) * 3 + 1, 6 * sizeof(T), vl), vl);\n+ vec_src4 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 + 2) * 3 + 1, 6 * sizeof(T), vl), vl);\n+ __riscv_vsse32(row + x * 3 + 1, 3 * sizeof(WT), __riscv_vadd(__riscv_vadd(__riscv_vadd(vec_src0, vec_src4, vl), __riscv_vadd(vec_src2, vec_src2, vl), vl),\n+ __riscv_vsll(__riscv_vadd(__riscv_vadd(vec_src1, vec_src2, vl), vec_src3, vl), 2, vl), vl), vl);\n+ vec_src0 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 - 2) * 3 + 2, 6 * sizeof(T), vl), vl);\n+ vec_src1 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 - 1) * 3 + 2, 6 * sizeof(T), vl), vl);\n+ vec_src2 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2) * 3 + 2, 6 * sizeof(T), vl), vl);\n+ vec_src3 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 + 1) * 3 + 2, 6 * sizeof(T), vl), vl);\n+ vec_src4 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 + 2) * 3 + 2, 6 * sizeof(T), vl), vl);\n+ __riscv_vsse32(row + x * 3 + 2, 3 * sizeof(WT), __riscv_vadd(__riscv_vadd(__riscv_vadd(vec_src0, vec_src4, vl), __riscv_vadd(vec_src2, vec_src2, vl), vl),\n+ __riscv_vsll(__riscv_vadd(__riscv_vadd(vec_src1, vec_src2, vl), vec_src3, vl), 2, vl), vl), vl);\n+ }\n+ break;\n+ case 4:\n+ for( int x = start / 4; x < end / 4; x += vl )\n+ {\n+ vl = rvv::vsetvl_WT(end / 4 - x);\n+ auto vec_src0 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 - 2) * 4, 8 * sizeof(T), vl), vl);\n+ auto vec_src1 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 - 1) * 4, 8 * sizeof(T), vl), vl);\n+ auto vec_src2 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2) * 4, 8 * sizeof(T), vl), vl);\n+ auto vec_src3 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 + 1) * 4, 8 * sizeof(T), vl), vl);\n+ auto vec_src4 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 + 2) * 4, 8 * sizeof(T), vl), vl);\n+ __riscv_vsse32(row + x * 4, 4 * sizeof(WT), __riscv_vadd(__riscv_vadd(__riscv_vadd(vec_src0, vec_src4, vl), __riscv_vadd(vec_src2, vec_src2, vl), vl),\n+ __riscv_vsll(__riscv_vadd(__riscv_vadd(vec_src1, vec_src2, vl), vec_src3, vl), 2, vl), vl), vl);\n+ vec_src0 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 - 2) * 4 + 1, 8 * sizeof(T), vl), vl);\n+ vec_src1 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 - 1) * 4 + 1, 8 * sizeof(T), vl), vl);\n+ vec_src2 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2) * 4 + 1, 8 * sizeof(T), vl), vl);\n+ vec_src3 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 + 1) * 4 + 1, 8 * sizeof(T), vl), vl);\n+ vec_src4 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 + 2) * 4 + 1, 8 * sizeof(T), vl), vl);\n+ __riscv_vsse32(row + x * 4 + 1, 4 * sizeof(WT), __riscv_vadd(__riscv_vadd(__riscv_vadd(vec_src0, vec_src4, vl), __riscv_vadd(vec_src2, vec_src2, vl), vl),\n+ __riscv_vsll(__riscv_vadd(__riscv_vadd(vec_src1, vec_src2, vl), vec_src3, vl), 2, vl), vl), vl);\n+ vec_src0 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 - 2) * 4 + 2, 8 * sizeof(T), vl), vl);\n+ vec_src1 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 - 1) * 4 + 2, 8 * sizeof(T), vl), vl);\n+ vec_src2 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2) * 4 + 2, 8 * sizeof(T), vl), vl);\n+ vec_src3 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 + 1) * 4 + 2, 8 * sizeof(T), vl), vl);\n+ vec_src4 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 + 2) * 4 + 2, 8 * sizeof(T), vl), vl);\n+ __riscv_vsse32(row + x * 4 + 2, 4 * sizeof(WT), __riscv_vadd(__riscv_vadd(__riscv_vadd(vec_src0, vec_src4, vl), __riscv_vadd(vec_src2, vec_src2, vl), vl),\n+ __riscv_vsll(__riscv_vadd(__riscv_vadd(vec_src1, vec_src2, vl), vec_src3, vl), 2, vl), vl), vl);\n+ vec_src0 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 - 2) * 4 + 3, 8 * sizeof(T), vl), vl);\n+ vec_src1 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 - 1) * 4 + 3, 8 * sizeof(T), vl), vl);\n+ vec_src2 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2) * 4 + 3, 8 * sizeof(T), vl), vl);\n+ vec_src3 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 + 1) * 4 + 3, 8 * sizeof(T), vl), vl);\n+ vec_src4 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 + 2) * 4 + 3, 8 * sizeof(T), vl), vl);\n+ __riscv_vsse32(row + x * 4 + 3, 4 * sizeof(WT), __riscv_vadd(__riscv_vadd(__riscv_vadd(vec_src0, vec_src4, vl), __riscv_vadd(vec_src2, vec_src2, vl), vl),\n+ __riscv_vsll(__riscv_vadd(__riscv_vadd(vec_src1, vec_src2, vl), vec_src3, vl), 2, vl), vl), vl);\n+ }\n+ break;\n+ default:\n+ for( int x = start; x < end; x += vl )\n+ {\n+ vl = rvv::vsetvl_WT(end - x);\n+ auto vec_tabM = rvv::vle_M(tabM + x, vl);\n+ vec_tabM = __riscv_vmul(__riscv_vsub(vec_tabM, start * 2, vl), sizeof(T), vl);\n+ auto vec_src0 = rvv::vcvt_T_WT(rvv::vloxei_T(src, vec_tabM, vl), vl);\n+ vec_tabM = __riscv_vadd(vec_tabM, start * sizeof(T), vl);\n+ auto vec_src1 = rvv::vcvt_T_WT(rvv::vloxei_T(src, vec_tabM, vl), vl);\n+ vec_tabM = __riscv_vadd(vec_tabM, start * sizeof(T), vl);\n+ auto vec_src2 = rvv::vcvt_T_WT(rvv::vloxei_T(src, vec_tabM, vl), vl);\n+ vec_tabM = __riscv_vadd(vec_tabM, start * sizeof(T), vl);\n+ auto vec_src3 = rvv::vcvt_T_WT(rvv::vloxei_T(src, vec_tabM, vl), vl);\n+ vec_tabM = __riscv_vadd(vec_tabM, start * sizeof(T), vl);\n+ auto vec_src4 = rvv::vcvt_T_WT(rvv::vloxei_T(src, vec_tabM, vl), vl);\n+ __riscv_vse32(row + x, __riscv_vadd(__riscv_vadd(__riscv_vadd(vec_src0, vec_src4, vl), __riscv_vadd(vec_src2, vec_src2, vl), vl),\n+ __riscv_vsll(__riscv_vadd(__riscv_vadd(vec_src1, vec_src2, vl), vec_src3, vl), 2, vl), vl), vl);\n+ }\n+ }\n+ }\n+};\n+template<> struct pyrDownVec0\n+{\n+ void operator()(const float* src, float* row, const uint* tabM, int start, int end)\n+ {\n+ int vl;\n+ switch (start)\n+ {\n+ case 1:\n+ for( int x = start; x < end; x += vl )\n+ {\n+ vl = rvv::vsetvl_WT(end - x);\n+ auto vec_src0 = rvv::vlse_T(src + x * 2 - 2, 2 * sizeof(float), vl);\n+ auto vec_src1 = rvv::vlse_T(src + x * 2 - 1, 2 * sizeof(float), vl);\n+ auto vec_src2 = rvv::vlse_T(src + x * 2, 2 * sizeof(float), vl);\n+ auto vec_src3 = rvv::vlse_T(src + x * 2 + 1, 2 * sizeof(float), vl);\n+ auto vec_src4 = rvv::vlse_T(src + x * 2 + 2, 2 * sizeof(float), vl);\n+ __riscv_vse32(row + x, __riscv_vfmadd(vec_src2, 6, __riscv_vfmadd(__riscv_vfadd(vec_src1, vec_src3, vl), 4, __riscv_vfadd(vec_src0, vec_src4, vl), vl), vl), vl);\n+ }\n+ break;\n+ case 2:\n+ for( int x = start / 2; x < end / 2; x += vl )\n+ {\n+ vl = rvv::vsetvl_WT(end / 2 - x);\n+ auto vec_src0 = rvv::vlse_T(src + (x * 2 - 2) * 2, 4 * sizeof(float), vl);\n+ auto vec_src1 = rvv::vlse_T(src + (x * 2 - 1) * 2, 4 * sizeof(float), vl);\n+ auto vec_src2 = rvv::vlse_T(src + (x * 2) * 2, 4 * sizeof(float), vl);\n+ auto vec_src3 = rvv::vlse_T(src + (x * 2 + 1) * 2, 4 * sizeof(float), vl);\n+ auto vec_src4 = rvv::vlse_T(src + (x * 2 + 2) * 2, 4 * sizeof(float), vl);\n+ __riscv_vsse32(row + x * 2, 2 * sizeof(float), __riscv_vfmadd(vec_src2, 6, __riscv_vfmadd(__riscv_vfadd(vec_src1, vec_src3, vl), 4, __riscv_vfadd(vec_src0, vec_src4, vl), vl), vl), vl);\n+ vec_src0 = rvv::vlse_T(src + (x * 2 - 2) * 2 + 1, 4 * sizeof(float), vl);\n+ vec_src1 = rvv::vlse_T(src + (x * 2 - 1) * 2 + 1, 4 * sizeof(float), vl);\n+ vec_src2 = rvv::vlse_T(src + (x * 2) * 2 + 1, 4 * sizeof(float), vl);\n+ vec_src3 = rvv::vlse_T(src + (x * 2 + 1) * 2 + 1, 4 * sizeof(float), vl);\n+ vec_src4 = rvv::vlse_T(src + (x * 2 + 2) * 2 + 1, 4 * sizeof(float), vl);\n+ __riscv_vsse32(row + x * 2 + 1, 2 * sizeof(float), __riscv_vfmadd(vec_src2, 6, __riscv_vfmadd(__riscv_vfadd(vec_src1, vec_src3, vl), 4, __riscv_vfadd(vec_src0, vec_src4, vl), vl), vl), vl);\n+ }\n+ break;\n+ case 3:\n+ for( int x = start / 3; x < end / 3; x += vl )\n+ {\n+ vl = rvv::vsetvl_WT(end / 3 - x);\n+ auto vec_src0 = rvv::vlse_T(src + (x * 2 - 2) * 3, 6 * sizeof(float), vl);\n+ auto vec_src1 = rvv::vlse_T(src + (x * 2 - 1) * 3, 6 * sizeof(float), vl);\n+ auto vec_src2 = rvv::vlse_T(src + (x * 2) * 3, 6 * sizeof(float), vl);\n+ auto vec_src3 = rvv::vlse_T(src + (x * 2 + 1) * 3, 6 * sizeof(float), vl);\n+ auto vec_src4 = rvv::vlse_T(src + (x * 2 + 2) * 3, 6 * sizeof(float), vl);\n+ __riscv_vsse32(row + x * 3, 3 * sizeof(float), __riscv_vfmadd(vec_src2, 6, __riscv_vfmadd(__riscv_vfadd(vec_src1, vec_src3, vl), 4, __riscv_vfadd(vec_src0, vec_src4, vl), vl), vl), vl);\n+ vec_src0 = rvv::vlse_T(src + (x * 2 - 2) * 3 + 1, 6 * sizeof(float), vl);\n+ vec_src1 = rvv::vlse_T(src + (x * 2 - 1) * 3 + 1, 6 * sizeof(float), vl);\n+ vec_src2 = rvv::vlse_T(src + (x * 2) * 3 + 1, 6 * sizeof(float), vl);\n+ vec_src3 = rvv::vlse_T(src + (x * 2 + 1) * 3 + 1, 6 * sizeof(float), vl);\n+ vec_src4 = rvv::vlse_T(src + (x * 2 + 2) * 3 + 1, 6 * sizeof(float), vl);\n+ __riscv_vsse32(row + x * 3 + 1, 3 * sizeof(float), __riscv_vfmadd(vec_src2, 6, __riscv_vfmadd(__riscv_vfadd(vec_src1, vec_src3, vl), 4, __riscv_vfadd(vec_src0, vec_src4, vl), vl), vl), vl);\n+ vec_src0 = rvv::vlse_T(src + (x * 2 - 2) * 3 + 2, 6 * sizeof(float), vl);\n+ vec_src1 = rvv::vlse_T(src + (x * 2 - 1) * 3 + 2, 6 * sizeof(float), vl);\n+ vec_src2 = rvv::vlse_T(src + (x * 2) * 3 + 2, 6 * sizeof(float), vl);\n+ vec_src3 = rvv::vlse_T(src + (x * 2 + 1) * 3 + 2, 6 * sizeof(float), vl);\n+ vec_src4 = rvv::vlse_T(src + (x * 2 + 2) * 3 + 2, 6 * sizeof(float), vl);\n+ __riscv_vsse32(row + x * 3 + 2, 3 * sizeof(float), __riscv_vfmadd(vec_src2, 6, __riscv_vfmadd(__riscv_vfadd(vec_src1, vec_src3, vl), 4, __riscv_vfadd(vec_src0, vec_src4, vl), vl), vl), vl);\n+ }\n+ break;\n+ case 4:\n+ for( int x = start / 4; x < end / 4; x += vl )\n+ {\n+ vl = rvv::vsetvl_WT(end / 4 - x);\n+ auto vec_src0 = rvv::vlse_T(src + (x * 2 - 2) * 4, 8 * sizeof(float), vl);\n+ auto vec_src1 = rvv::vlse_T(src + (x * 2 - 1) * 4, 8 * sizeof(float), vl);\n+ auto vec_src2 = rvv::vlse_T(src + (x * 2) * 4, 8 * sizeof(float), vl);\n+ auto vec_src3 = rvv::vlse_T(src + (x * 2 + 1) * 4, 8 * sizeof(float), vl);\n+ auto vec_src4 = rvv::vlse_T(src + (x * 2 + 2) * 4, 8 * sizeof(float), vl);\n+ __riscv_vsse32(row + x * 4, 4 * sizeof(float), __riscv_vfmadd(vec_src2, 6, __riscv_vfmadd(__riscv_vfadd(vec_src1, vec_src3, vl), 4, __riscv_vfadd(vec_src0, vec_src4, vl), vl), vl), vl);\n+ vec_src0 = rvv::vlse_T(src + (x * 2 - 2) * 4 + 1, 8 * sizeof(float), vl);\n+ vec_src1 = rvv::vlse_T(src + (x * 2 - 1) * 4 + 1, 8 * sizeof(float), vl);\n+ vec_src2 = rvv::vlse_T(src + (x * 2) * 4 + 1, 8 * sizeof(float), vl);\n+ vec_src3 = rvv::vlse_T(src + (x * 2 + 1) * 4 + 1, 8 * sizeof(float), vl);\n+ vec_src4 = rvv::vlse_T(src + (x * 2 + 2) * 4 + 1, 8 * sizeof(float), vl);\n+ __riscv_vsse32(row + x * 4 + 1, 4 * sizeof(float), __riscv_vfmadd(vec_src2, 6, __riscv_vfmadd(__riscv_vfadd(vec_src1, vec_src3, vl), 4, __riscv_vfadd(vec_src0, vec_src4, vl), vl), vl), vl);\n+ vec_src0 = rvv::vlse_T(src + (x * 2 - 2) * 4 + 2, 8 * sizeof(float), vl);\n+ vec_src1 = rvv::vlse_T(src + (x * 2 - 1) * 4 + 2, 8 * sizeof(float), vl);\n+ vec_src2 = rvv::vlse_T(src + (x * 2) * 4 + 2, 8 * sizeof(float), vl);\n+ vec_src3 = rvv::vlse_T(src + (x * 2 + 1) * 4 + 2, 8 * sizeof(float), vl);\n+ vec_src4 = rvv::vlse_T(src + (x * 2 + 2) * 4 + 2, 8 * sizeof(float), vl);\n+ __riscv_vsse32(row + x * 4 + 2, 4 * sizeof(float), __riscv_vfmadd(vec_src2, 6, __riscv_vfmadd(__riscv_vfadd(vec_src1, vec_src3, vl), 4, __riscv_vfadd(vec_src0, vec_src4, vl), vl), vl), vl);\n+ vec_src0 = rvv::vlse_T(src + (x * 2 - 2) * 4 + 3, 8 * sizeof(float), vl);\n+ vec_src1 = rvv::vlse_T(src + (x * 2 - 1) * 4 + 3, 8 * sizeof(float), vl);\n+ vec_src2 = rvv::vlse_T(src + (x * 2) * 4 + 3, 8 * sizeof(float), vl);\n+ vec_src3 = rvv::vlse_T(src + (x * 2 + 1) * 4 + 3, 8 * sizeof(float), vl);\n+ vec_src4 = rvv::vlse_T(src + (x * 2 + 2) * 4 + 3, 8 * sizeof(float), vl);\n+ __riscv_vsse32(row + x * 4 + 3, 4 * sizeof(float), __riscv_vfmadd(vec_src2, 6, __riscv_vfmadd(__riscv_vfadd(vec_src1, vec_src3, vl), 4, __riscv_vfadd(vec_src0, vec_src4, vl), vl), vl), vl);\n+ }\n+ break;\n+ default:\n+ for( int x = start; x < end; x += vl )\n+ {\n+ vl = rvv::vsetvl_WT(end - x);\n+ auto vec_tabM = rvv::vle_M(tabM + x, vl);\n+ vec_tabM = __riscv_vmul(__riscv_vsub(vec_tabM, start * 2, vl), sizeof(float), vl);\n+ auto vec_src0 = rvv::vloxei_T(src, vec_tabM, vl);\n+ vec_tabM = __riscv_vadd(vec_tabM, start * sizeof(float), vl);\n+ auto vec_src1 = rvv::vloxei_T(src, vec_tabM, vl);\n+ vec_tabM = __riscv_vadd(vec_tabM, start * sizeof(float), vl);\n+ auto vec_src2 = rvv::vloxei_T(src, vec_tabM, vl);\n+ vec_tabM = __riscv_vadd(vec_tabM, start * sizeof(float), vl);\n+ auto vec_src3 = rvv::vloxei_T(src, vec_tabM, vl);\n+ vec_tabM = __riscv_vadd(vec_tabM, start * sizeof(float), vl);\n+ auto vec_src4 = rvv::vloxei_T(src, vec_tabM, vl);\n+ __riscv_vse32(row + x, __riscv_vfmadd(__riscv_vfadd(__riscv_vfadd(vec_src1, vec_src2, vl), vec_src3, vl), 4,\n+ __riscv_vfadd(__riscv_vfadd(vec_src0, vec_src4, vl), __riscv_vfadd(vec_src2, vec_src2, vl), vl), vl), vl);\n+ }\n+ }\n+ }\n+};\n+\n+template struct pyrDownVec1\n+{\n+ void operator()(WT* row0, WT* row1, WT* row2, WT* row3, WT* row4, T* dst, int end)\n+ {\n+ int vl;\n+ for( int x = 0 ; x < end; x += vl )\n+ {\n+ vl = rvv::vsetvl_WT(end - x);\n+ auto vec_src0 = rvv::vle_WT(row0 + x, vl);\n+ auto vec_src1 = rvv::vle_WT(row1 + x, vl);\n+ auto vec_src2 = rvv::vle_WT(row2 + x, vl);\n+ auto vec_src3 = rvv::vle_WT(row3 + x, vl);\n+ auto vec_src4 = rvv::vle_WT(row4 + x, vl);\n+ rvv::vse_T(dst + x, rvv::vcvt_WT_T(__riscv_vadd(__riscv_vadd(__riscv_vadd(vec_src0, vec_src4, vl), __riscv_vadd(vec_src2, vec_src2, vl), vl),\n+ __riscv_vsll(__riscv_vadd(__riscv_vadd(vec_src1, vec_src2, vl), vec_src3, vl), 2, vl), vl), 8, vl), vl);\n+ }\n+ }\n+};\n+template<> struct pyrDownVec1\n+{\n+ void operator()(float* row0, float* row1, float* row2, float* row3, float* row4, float* dst, int end)\n+ {\n+ int vl;\n+ for( int x = 0 ; x < end; x += vl )\n+ {\n+ vl = rvv::vsetvl_WT(end - x);\n+ auto vec_src0 = rvv::vle_WT(row0 + x, vl);\n+ auto vec_src1 = rvv::vle_WT(row1 + x, vl);\n+ auto vec_src2 = rvv::vle_WT(row2 + x, vl);\n+ auto vec_src3 = rvv::vle_WT(row3 + x, vl);\n+ auto vec_src4 = rvv::vle_WT(row4 + x, vl);\n+ rvv::vse_T(dst + x, __riscv_vfmul(__riscv_vfmadd(vec_src2, 6, __riscv_vfmadd(__riscv_vfadd(vec_src1, vec_src3, vl), 4, __riscv_vfadd(vec_src0, vec_src4, vl), vl), vl), 1.f / 256.f, vl), vl);\n+ }\n+ }\n+};\n+\n+template struct pyrUpVec0\n+{\n+ void operator()(const T* src, WT* row, const uint* dtab, int start, int end)\n+ {\n+ int vl;\n+ for( int x = start; x < end; x += vl )\n+ {\n+ vl = rvv::vsetvl_WT(end - x);\n+ auto vec_src0 = rvv::vcvt_T_WT(rvv::vle_T(src + x - start, vl), vl);\n+ auto vec_src1 = rvv::vcvt_T_WT(rvv::vle_T(src + x, vl), vl);\n+ auto vec_src2 = rvv::vcvt_T_WT(rvv::vle_T(src + x + start, vl), vl);\n+\n+ auto vec_dtab = rvv::vle_M(dtab + x, vl);\n+ vec_dtab = __riscv_vmul(vec_dtab, sizeof(WT), vl);\n+ __riscv_vsoxei32(row, vec_dtab, __riscv_vadd(__riscv_vadd(vec_src0, vec_src2, vl), __riscv_vadd(__riscv_vsll(vec_src1, 2, vl), __riscv_vsll(vec_src1, 1, vl), vl), vl), vl);\n+ __riscv_vsoxei32(row, __riscv_vadd(vec_dtab, start * sizeof(WT), vl), __riscv_vsll(__riscv_vadd(vec_src1, vec_src2, vl), 2, vl), vl);\n+ }\n+ }\n+};\n+template<> struct pyrUpVec0\n+{\n+ void operator()(const float* src, float* row, const uint* dtab, int start, int end)\n+ {\n+ int vl;\n+ for( int x = start; x < end; x += vl )\n+ {\n+ vl = rvv::vsetvl_WT(end - x);\n+ auto vec_src0 = rvv::vle_T(src + x - start, vl);\n+ auto vec_src1 = rvv::vle_T(src + x, vl);\n+ auto vec_src2 = rvv::vle_T(src + x + start, vl);\n+\n+ auto vec_dtab = rvv::vle_M(dtab + x, vl);\n+ vec_dtab = __riscv_vmul(vec_dtab, sizeof(float), vl);\n+ __riscv_vsoxei32(row, vec_dtab, __riscv_vfadd(__riscv_vfmadd(vec_src1, 6, vec_src0, vl), vec_src2, vl), vl);\n+ __riscv_vsoxei32(row, __riscv_vadd(vec_dtab, start * sizeof(float), vl), __riscv_vfmul(__riscv_vfadd(vec_src1, vec_src2, vl), 4, vl), vl);\n+ }\n+ }\n+};\n+\n+template struct pyrUpVec1\n+{\n+ void operator()(WT* row0, WT* row1, WT* row2, T* dst0, T* dst1, int end)\n+ {\n+ int vl;\n+ if (dst0 != dst1)\n+ {\n+ for( int x = 0 ; x < end; x += vl )\n+ {\n+ vl = rvv::vsetvl_WT(end - x);\n+ auto vec_src0 = rvv::vle_WT(row0 + x, vl);\n+ auto vec_src1 = rvv::vle_WT(row1 + x, vl);\n+ auto vec_src2 = rvv::vle_WT(row2 + x, vl);\n+ rvv::vse_T(dst0 + x, rvv::vcvt_WT_T(__riscv_vadd(__riscv_vadd(vec_src0, vec_src2, vl), __riscv_vadd(__riscv_vsll(vec_src1, 2, vl), __riscv_vsll(vec_src1, 1, vl), vl), vl), 6, vl), vl);\n+ rvv::vse_T(dst1 + x, rvv::vcvt_WT_T(__riscv_vsll(__riscv_vadd(vec_src1, vec_src2, vl), 2, vl), 6, vl), vl);\n+ }\n+ }\n+ else\n+ {\n+ for( int x = 0 ; x < end; x += vl )\n+ {\n+ vl = rvv::vsetvl_WT(end - x);\n+ auto vec_src0 = rvv::vle_WT(row0 + x, vl);\n+ auto vec_src1 = rvv::vle_WT(row1 + x, vl);\n+ auto vec_src2 = rvv::vle_WT(row2 + x, vl);\n+ rvv::vse_T(dst0 + x, rvv::vcvt_WT_T(__riscv_vadd(__riscv_vadd(vec_src0, vec_src2, vl), __riscv_vadd(__riscv_vsll(vec_src1, 2, vl), __riscv_vsll(vec_src1, 1, vl), vl), vl), 6, vl), vl);\n+ }\n+ }\n+ }\n+};\n+template<> struct pyrUpVec1\n+{\n+ void operator()(float* row0, float* row1, float* row2, float* dst0, float* dst1, int end)\n+ {\n+ int vl;\n+ if (dst0 != dst1)\n+ {\n+ for( int x = 0 ; x < end; x += vl )\n+ {\n+ vl = rvv::vsetvl_WT(end - x);\n+ auto vec_src0 = rvv::vle_WT(row0 + x, vl);\n+ auto vec_src1 = rvv::vle_WT(row1 + x, vl);\n+ auto vec_src2 = rvv::vle_WT(row2 + x, vl);\n+ rvv::vse_T(dst0 + x, __riscv_vfmul(__riscv_vfadd(__riscv_vfmadd(vec_src1, 6, vec_src0, vl), vec_src2, vl), 1.f / 64.f, vl), vl);\n+ rvv::vse_T(dst1 + x, __riscv_vfmul(__riscv_vfadd(vec_src1, vec_src2, vl), 1.f / 16.f, vl), vl);\n+ }\n+ }\n+ else\n+ {\n+ for( int x = 0 ; x < end; x += vl )\n+ {\n+ vl = rvv::vsetvl_WT(end - x);\n+ auto vec_src0 = rvv::vle_WT(row0 + x, vl);\n+ auto vec_src1 = rvv::vle_WT(row1 + x, vl);\n+ auto vec_src2 = rvv::vle_WT(row2 + x, vl);\n+ rvv::vse_T(dst0 + x, __riscv_vfmul(__riscv_vfadd(__riscv_vfmadd(vec_src1, 6, vec_src0, vl), vec_src2, vl), 1.f / 64.f, vl), vl);\n+ }\n+ }\n+ }\n+};\n+\n+template\n+struct PyrDownInvoker : ParallelLoopBody\n+{\n+ PyrDownInvoker(const uchar* _src_data, size_t _src_step, int _src_width, int _src_height, uchar* _dst_data, size_t _dst_step, int _dst_width, int _dst_height, int _cn, int _borderType, int* _tabR, int* _tabM, int* _tabL)\n+ {\n+ src_data = _src_data;\n+ src_step = _src_step;\n+ src_width = _src_width;\n+ src_height = _src_height;\n+ dst_data = _dst_data;\n+ dst_step = _dst_step;\n+ dst_width = _dst_width;\n+ dst_height = _dst_height;\n+ cn = _cn;\n+ borderType = _borderType;\n+ tabR = _tabR;\n+ tabM = _tabM;\n+ tabL = _tabL;\n+ }\n+\n+ void operator()(const Range& range) const CV_OVERRIDE;\n+\n+ const uchar* src_data;\n+ size_t src_step;\n+ int src_width;\n+ int src_height;\n+ uchar* dst_data;\n+ size_t dst_step;\n+ int dst_width;\n+ int dst_height;\n+ int cn;\n+ int borderType;\n+ int* tabR;\n+ int* tabM;\n+ int* tabL;\n+};\n+\n+// the algorithm is copied from imgproc/src/pyramids.cpp,\n+// in the function template void cv::pyrDown_\n+template\n+inline int pyrDown(const uchar* src_data, size_t src_step, int src_width, int src_height, uchar* dst_data, size_t dst_step, int dst_width, int dst_height, int cn, int borderType)\n+{\n+ const int PD_SZ = 5;\n+\n+ std::vector _tabM(dst_width * cn), _tabL(cn * (PD_SZ + 2)), _tabR(cn * (PD_SZ + 2));\n+ int *tabM = _tabM.data(), *tabL = _tabL.data(), *tabR = _tabR.data();\n+\n+ CV_Assert( src_width > 0 && src_height > 0 &&\n+ std::abs(dst_width*2 - src_width) <= 2 &&\n+ std::abs(dst_height*2 - src_height) <= 2 );\n+ int width0 = std::min((src_width-PD_SZ/2-1)/2 + 1, dst_width);\n+\n+ for (int x = 0; x <= PD_SZ+1; x++)\n+ {\n+ int sx0 = borderInterpolate(x - PD_SZ/2, src_width, borderType)*cn;", + "comment_created_at": "2025-02-28T19:53:20+00:00", + "comment_author": "mshabunin", + "comment_body": "Not sure what to do with `borderInterpolate`. Perhaps it should be implemented here as well (maybe in limited version only for acceptable border modes).", + "pr_file_module": null + }, + { + "comment_id": "1976248637", + "repo_full_name": "opencv/opencv", + "pr_number": 26958, + "pr_file": "3rdparty/hal_rvv/hal_rvv_1p0/pyramids.hpp", + "discussion_id": "1975941881", + "commented_code": "@@ -0,0 +1,681 @@\n+// This file is part of OpenCV project.\n+// It is subject to the license terms in the LICENSE file found in the top-level directory\n+// of this distribution and at http://opencv.org/license.html.\n+#ifndef OPENCV_HAL_RVV_PYRAMIDS_HPP_INCLUDED\n+#define OPENCV_HAL_RVV_PYRAMIDS_HPP_INCLUDED\n+\n+#include \n+\n+namespace cv { namespace cv_hal_rvv { namespace pyramids {\n+\n+#undef cv_hal_pyrdown\n+#define cv_hal_pyrdown cv::cv_hal_rvv::pyramids::pyrDown\n+#undef cv_hal_pyrup\n+#define cv_hal_pyrup cv::cv_hal_rvv::pyramids::pyrUp\n+\n+template struct rvv;\n+\n+template<> struct rvv\n+{\n+ static inline size_t vsetvl_WT(size_t a) { return __riscv_vsetvl_e32m4(a); }\n+ static inline vuint8m1_t vle_T(const uchar* a, size_t b) { return __riscv_vle8_v_u8m1(a, b); }\n+ static inline vint32m4_t vle_WT(const int* a, size_t b) { return __riscv_vle32_v_i32m4(a, b); }\n+ static inline vuint32m4_t vle_M(const uint* a, size_t b) { return __riscv_vle32_v_u32m4(a, b); }\n+ static inline vuint8m1_t vlse_T(const uchar* a, ptrdiff_t b, size_t c) { return __riscv_vlse8_v_u8m1(a, b, c); }\n+ static inline vuint8m1_t vloxei_T(const uchar* a, vuint32m4_t b, size_t c) { return __riscv_vloxei32_v_u8m1(a, b, c); }\n+ static inline void vse_T(uchar* a, vuint8m1_t b, size_t c) { return __riscv_vse8(a, b, c); }\n+ static inline vint32m4_t vcvt_T_WT(vuint8m1_t a, size_t b) { return __riscv_vreinterpret_v_u32m4_i32m4(__riscv_vzext_vf4(a, b)); }\n+ static inline vuint8m1_t vcvt_WT_T(vint32m4_t a, int b, size_t c) { return __riscv_vncvt_x(__riscv_vncvt_x(__riscv_vreinterpret_v_i32m4_u32m4(__riscv_vsra(__riscv_vadd(a, 1 << (b - 1), c), b, c)), c), c); }\n+};\n+\n+template<> struct rvv\n+{\n+ static inline size_t vsetvl_WT(size_t a) { return __riscv_vsetvl_e32m4(a); }\n+ static inline vint16m2_t vle_T(const short* a, size_t b) { return __riscv_vle16_v_i16m2(a, b); }\n+ static inline vint32m4_t vle_WT(const int* a, size_t b) { return __riscv_vle32_v_i32m4(a, b); }\n+ static inline vuint32m4_t vle_M(const uint* a, size_t b) { return __riscv_vle32_v_u32m4(a, b); }\n+ static inline vint16m2_t vlse_T(const short* a, ptrdiff_t b, size_t c) { return __riscv_vlse16_v_i16m2(a, b, c); }\n+ static inline vint16m2_t vloxei_T(const short* a, vuint32m4_t b, size_t c) { return __riscv_vloxei32_v_i16m2(a, b, c); }\n+ static inline void vse_T(short* a, vint16m2_t b, size_t c) { return __riscv_vse16(a, b, c); }\n+ static inline vint32m4_t vcvt_T_WT(vint16m2_t a, size_t b) { return __riscv_vsext_vf2(a, b); }\n+ static inline vint16m2_t vcvt_WT_T(vint32m4_t a, int b, size_t c) { return __riscv_vncvt_x(__riscv_vsra(__riscv_vadd(a, 1 << (b - 1), c), b, c), c); }\n+};\n+\n+template<> struct rvv\n+{\n+ static inline size_t vsetvl_WT(size_t a) { return __riscv_vsetvl_e32m4(a); }\n+ static inline vfloat32m4_t vle_T(const float* a, size_t b) { return __riscv_vle32_v_f32m4(a, b); }\n+ static inline vfloat32m4_t vle_WT(const float* a, size_t b) { return __riscv_vle32_v_f32m4(a, b); }\n+ static inline vuint32m4_t vle_M(const uint* a, size_t b) { return __riscv_vle32_v_u32m4(a, b); }\n+ static inline vfloat32m4_t vlse_T(const float* a, ptrdiff_t b, size_t c) { return __riscv_vlse32_v_f32m4(a, b, c); }\n+ static inline vfloat32m4_t vloxei_T(const float* a, vuint32m4_t b, size_t c) { return __riscv_vloxei32_v_f32m4(a, b, c); }\n+ static inline void vse_T(float* a, vfloat32m4_t b, size_t c) { return __riscv_vse32(a, b, c); }\n+};\n+\n+template struct pyrDownVec0\n+{\n+ void operator()(const T* src, WT* row, const uint* tabM, int start, int end)\n+ {\n+ int vl;\n+ switch (start)\n+ {\n+ case 1:\n+ for( int x = start; x < end; x += vl )\n+ {\n+ vl = rvv::vsetvl_WT(end - x);\n+ auto vec_src0 = rvv::vcvt_T_WT(rvv::vlse_T(src + x * 2 - 2, 2 * sizeof(T), vl), vl);\n+ auto vec_src1 = rvv::vcvt_T_WT(rvv::vlse_T(src + x * 2 - 1, 2 * sizeof(T), vl), vl);\n+ auto vec_src2 = rvv::vcvt_T_WT(rvv::vlse_T(src + x * 2, 2 * sizeof(T), vl), vl);\n+ auto vec_src3 = rvv::vcvt_T_WT(rvv::vlse_T(src + x * 2 + 1, 2 * sizeof(T), vl), vl);\n+ auto vec_src4 = rvv::vcvt_T_WT(rvv::vlse_T(src + x * 2 + 2, 2 * sizeof(T), vl), vl);\n+ __riscv_vse32(row + x, __riscv_vadd(__riscv_vadd(__riscv_vadd(vec_src0, vec_src4, vl), __riscv_vadd(vec_src2, vec_src2, vl), vl),\n+ __riscv_vsll(__riscv_vadd(__riscv_vadd(vec_src1, vec_src2, vl), vec_src3, vl), 2, vl), vl), vl);\n+ }\n+ break;\n+ case 2:\n+ for( int x = start / 2; x < end / 2; x += vl )\n+ {\n+ vl = rvv::vsetvl_WT(end / 2 - x);\n+ auto vec_src0 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 - 2) * 2, 4 * sizeof(T), vl), vl);\n+ auto vec_src1 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 - 1) * 2, 4 * sizeof(T), vl), vl);\n+ auto vec_src2 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2) * 2, 4 * sizeof(T), vl), vl);\n+ auto vec_src3 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 + 1) * 2, 4 * sizeof(T), vl), vl);\n+ auto vec_src4 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 + 2) * 2, 4 * sizeof(T), vl), vl);\n+ __riscv_vsse32(row + x * 2, 2 * sizeof(WT), __riscv_vadd(__riscv_vadd(__riscv_vadd(vec_src0, vec_src4, vl), __riscv_vadd(vec_src2, vec_src2, vl), vl),\n+ __riscv_vsll(__riscv_vadd(__riscv_vadd(vec_src1, vec_src2, vl), vec_src3, vl), 2, vl), vl), vl);\n+ vec_src0 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 - 2) * 2 + 1, 4 * sizeof(T), vl), vl);\n+ vec_src1 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 - 1) * 2 + 1, 4 * sizeof(T), vl), vl);\n+ vec_src2 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2) * 2 + 1, 4 * sizeof(T), vl), vl);\n+ vec_src3 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 + 1) * 2 + 1, 4 * sizeof(T), vl), vl);\n+ vec_src4 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 + 2) * 2 + 1, 4 * sizeof(T), vl), vl);\n+ __riscv_vsse32(row + x * 2 + 1, 2 * sizeof(WT), __riscv_vadd(__riscv_vadd(__riscv_vadd(vec_src0, vec_src4, vl), __riscv_vadd(vec_src2, vec_src2, vl), vl),\n+ __riscv_vsll(__riscv_vadd(__riscv_vadd(vec_src1, vec_src2, vl), vec_src3, vl), 2, vl), vl), vl);\n+ }\n+ break;\n+ case 3:\n+ for( int x = start / 3; x < end / 3; x += vl )\n+ {\n+ vl = rvv::vsetvl_WT(end / 3 - x);\n+ auto vec_src0 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 - 2) * 3, 6 * sizeof(T), vl), vl);\n+ auto vec_src1 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 - 1) * 3, 6 * sizeof(T), vl), vl);\n+ auto vec_src2 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2) * 3, 6 * sizeof(T), vl), vl);\n+ auto vec_src3 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 + 1) * 3, 6 * sizeof(T), vl), vl);\n+ auto vec_src4 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 + 2) * 3, 6 * sizeof(T), vl), vl);\n+ __riscv_vsse32(row + x * 3, 3 * sizeof(WT), __riscv_vadd(__riscv_vadd(__riscv_vadd(vec_src0, vec_src4, vl), __riscv_vadd(vec_src2, vec_src2, vl), vl),\n+ __riscv_vsll(__riscv_vadd(__riscv_vadd(vec_src1, vec_src2, vl), vec_src3, vl), 2, vl), vl), vl);\n+ vec_src0 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 - 2) * 3 + 1, 6 * sizeof(T), vl), vl);\n+ vec_src1 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 - 1) * 3 + 1, 6 * sizeof(T), vl), vl);\n+ vec_src2 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2) * 3 + 1, 6 * sizeof(T), vl), vl);\n+ vec_src3 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 + 1) * 3 + 1, 6 * sizeof(T), vl), vl);\n+ vec_src4 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 + 2) * 3 + 1, 6 * sizeof(T), vl), vl);\n+ __riscv_vsse32(row + x * 3 + 1, 3 * sizeof(WT), __riscv_vadd(__riscv_vadd(__riscv_vadd(vec_src0, vec_src4, vl), __riscv_vadd(vec_src2, vec_src2, vl), vl),\n+ __riscv_vsll(__riscv_vadd(__riscv_vadd(vec_src1, vec_src2, vl), vec_src3, vl), 2, vl), vl), vl);\n+ vec_src0 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 - 2) * 3 + 2, 6 * sizeof(T), vl), vl);\n+ vec_src1 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 - 1) * 3 + 2, 6 * sizeof(T), vl), vl);\n+ vec_src2 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2) * 3 + 2, 6 * sizeof(T), vl), vl);\n+ vec_src3 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 + 1) * 3 + 2, 6 * sizeof(T), vl), vl);\n+ vec_src4 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 + 2) * 3 + 2, 6 * sizeof(T), vl), vl);\n+ __riscv_vsse32(row + x * 3 + 2, 3 * sizeof(WT), __riscv_vadd(__riscv_vadd(__riscv_vadd(vec_src0, vec_src4, vl), __riscv_vadd(vec_src2, vec_src2, vl), vl),\n+ __riscv_vsll(__riscv_vadd(__riscv_vadd(vec_src1, vec_src2, vl), vec_src3, vl), 2, vl), vl), vl);\n+ }\n+ break;\n+ case 4:\n+ for( int x = start / 4; x < end / 4; x += vl )\n+ {\n+ vl = rvv::vsetvl_WT(end / 4 - x);\n+ auto vec_src0 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 - 2) * 4, 8 * sizeof(T), vl), vl);\n+ auto vec_src1 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 - 1) * 4, 8 * sizeof(T), vl), vl);\n+ auto vec_src2 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2) * 4, 8 * sizeof(T), vl), vl);\n+ auto vec_src3 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 + 1) * 4, 8 * sizeof(T), vl), vl);\n+ auto vec_src4 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 + 2) * 4, 8 * sizeof(T), vl), vl);\n+ __riscv_vsse32(row + x * 4, 4 * sizeof(WT), __riscv_vadd(__riscv_vadd(__riscv_vadd(vec_src0, vec_src4, vl), __riscv_vadd(vec_src2, vec_src2, vl), vl),\n+ __riscv_vsll(__riscv_vadd(__riscv_vadd(vec_src1, vec_src2, vl), vec_src3, vl), 2, vl), vl), vl);\n+ vec_src0 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 - 2) * 4 + 1, 8 * sizeof(T), vl), vl);\n+ vec_src1 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 - 1) * 4 + 1, 8 * sizeof(T), vl), vl);\n+ vec_src2 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2) * 4 + 1, 8 * sizeof(T), vl), vl);\n+ vec_src3 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 + 1) * 4 + 1, 8 * sizeof(T), vl), vl);\n+ vec_src4 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 + 2) * 4 + 1, 8 * sizeof(T), vl), vl);\n+ __riscv_vsse32(row + x * 4 + 1, 4 * sizeof(WT), __riscv_vadd(__riscv_vadd(__riscv_vadd(vec_src0, vec_src4, vl), __riscv_vadd(vec_src2, vec_src2, vl), vl),\n+ __riscv_vsll(__riscv_vadd(__riscv_vadd(vec_src1, vec_src2, vl), vec_src3, vl), 2, vl), vl), vl);\n+ vec_src0 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 - 2) * 4 + 2, 8 * sizeof(T), vl), vl);\n+ vec_src1 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 - 1) * 4 + 2, 8 * sizeof(T), vl), vl);\n+ vec_src2 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2) * 4 + 2, 8 * sizeof(T), vl), vl);\n+ vec_src3 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 + 1) * 4 + 2, 8 * sizeof(T), vl), vl);\n+ vec_src4 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 + 2) * 4 + 2, 8 * sizeof(T), vl), vl);\n+ __riscv_vsse32(row + x * 4 + 2, 4 * sizeof(WT), __riscv_vadd(__riscv_vadd(__riscv_vadd(vec_src0, vec_src4, vl), __riscv_vadd(vec_src2, vec_src2, vl), vl),\n+ __riscv_vsll(__riscv_vadd(__riscv_vadd(vec_src1, vec_src2, vl), vec_src3, vl), 2, vl), vl), vl);\n+ vec_src0 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 - 2) * 4 + 3, 8 * sizeof(T), vl), vl);\n+ vec_src1 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 - 1) * 4 + 3, 8 * sizeof(T), vl), vl);\n+ vec_src2 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2) * 4 + 3, 8 * sizeof(T), vl), vl);\n+ vec_src3 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 + 1) * 4 + 3, 8 * sizeof(T), vl), vl);\n+ vec_src4 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 + 2) * 4 + 3, 8 * sizeof(T), vl), vl);\n+ __riscv_vsse32(row + x * 4 + 3, 4 * sizeof(WT), __riscv_vadd(__riscv_vadd(__riscv_vadd(vec_src0, vec_src4, vl), __riscv_vadd(vec_src2, vec_src2, vl), vl),\n+ __riscv_vsll(__riscv_vadd(__riscv_vadd(vec_src1, vec_src2, vl), vec_src3, vl), 2, vl), vl), vl);\n+ }\n+ break;\n+ default:\n+ for( int x = start; x < end; x += vl )\n+ {\n+ vl = rvv::vsetvl_WT(end - x);\n+ auto vec_tabM = rvv::vle_M(tabM + x, vl);\n+ vec_tabM = __riscv_vmul(__riscv_vsub(vec_tabM, start * 2, vl), sizeof(T), vl);\n+ auto vec_src0 = rvv::vcvt_T_WT(rvv::vloxei_T(src, vec_tabM, vl), vl);\n+ vec_tabM = __riscv_vadd(vec_tabM, start * sizeof(T), vl);\n+ auto vec_src1 = rvv::vcvt_T_WT(rvv::vloxei_T(src, vec_tabM, vl), vl);\n+ vec_tabM = __riscv_vadd(vec_tabM, start * sizeof(T), vl);\n+ auto vec_src2 = rvv::vcvt_T_WT(rvv::vloxei_T(src, vec_tabM, vl), vl);\n+ vec_tabM = __riscv_vadd(vec_tabM, start * sizeof(T), vl);\n+ auto vec_src3 = rvv::vcvt_T_WT(rvv::vloxei_T(src, vec_tabM, vl), vl);\n+ vec_tabM = __riscv_vadd(vec_tabM, start * sizeof(T), vl);\n+ auto vec_src4 = rvv::vcvt_T_WT(rvv::vloxei_T(src, vec_tabM, vl), vl);\n+ __riscv_vse32(row + x, __riscv_vadd(__riscv_vadd(__riscv_vadd(vec_src0, vec_src4, vl), __riscv_vadd(vec_src2, vec_src2, vl), vl),\n+ __riscv_vsll(__riscv_vadd(__riscv_vadd(vec_src1, vec_src2, vl), vec_src3, vl), 2, vl), vl), vl);\n+ }\n+ }\n+ }\n+};\n+template<> struct pyrDownVec0\n+{\n+ void operator()(const float* src, float* row, const uint* tabM, int start, int end)\n+ {\n+ int vl;\n+ switch (start)\n+ {\n+ case 1:\n+ for( int x = start; x < end; x += vl )\n+ {\n+ vl = rvv::vsetvl_WT(end - x);\n+ auto vec_src0 = rvv::vlse_T(src + x * 2 - 2, 2 * sizeof(float), vl);\n+ auto vec_src1 = rvv::vlse_T(src + x * 2 - 1, 2 * sizeof(float), vl);\n+ auto vec_src2 = rvv::vlse_T(src + x * 2, 2 * sizeof(float), vl);\n+ auto vec_src3 = rvv::vlse_T(src + x * 2 + 1, 2 * sizeof(float), vl);\n+ auto vec_src4 = rvv::vlse_T(src + x * 2 + 2, 2 * sizeof(float), vl);\n+ __riscv_vse32(row + x, __riscv_vfmadd(vec_src2, 6, __riscv_vfmadd(__riscv_vfadd(vec_src1, vec_src3, vl), 4, __riscv_vfadd(vec_src0, vec_src4, vl), vl), vl), vl);\n+ }\n+ break;\n+ case 2:\n+ for( int x = start / 2; x < end / 2; x += vl )\n+ {\n+ vl = rvv::vsetvl_WT(end / 2 - x);\n+ auto vec_src0 = rvv::vlse_T(src + (x * 2 - 2) * 2, 4 * sizeof(float), vl);\n+ auto vec_src1 = rvv::vlse_T(src + (x * 2 - 1) * 2, 4 * sizeof(float), vl);\n+ auto vec_src2 = rvv::vlse_T(src + (x * 2) * 2, 4 * sizeof(float), vl);\n+ auto vec_src3 = rvv::vlse_T(src + (x * 2 + 1) * 2, 4 * sizeof(float), vl);\n+ auto vec_src4 = rvv::vlse_T(src + (x * 2 + 2) * 2, 4 * sizeof(float), vl);\n+ __riscv_vsse32(row + x * 2, 2 * sizeof(float), __riscv_vfmadd(vec_src2, 6, __riscv_vfmadd(__riscv_vfadd(vec_src1, vec_src3, vl), 4, __riscv_vfadd(vec_src0, vec_src4, vl), vl), vl), vl);\n+ vec_src0 = rvv::vlse_T(src + (x * 2 - 2) * 2 + 1, 4 * sizeof(float), vl);\n+ vec_src1 = rvv::vlse_T(src + (x * 2 - 1) * 2 + 1, 4 * sizeof(float), vl);\n+ vec_src2 = rvv::vlse_T(src + (x * 2) * 2 + 1, 4 * sizeof(float), vl);\n+ vec_src3 = rvv::vlse_T(src + (x * 2 + 1) * 2 + 1, 4 * sizeof(float), vl);\n+ vec_src4 = rvv::vlse_T(src + (x * 2 + 2) * 2 + 1, 4 * sizeof(float), vl);\n+ __riscv_vsse32(row + x * 2 + 1, 2 * sizeof(float), __riscv_vfmadd(vec_src2, 6, __riscv_vfmadd(__riscv_vfadd(vec_src1, vec_src3, vl), 4, __riscv_vfadd(vec_src0, vec_src4, vl), vl), vl), vl);\n+ }\n+ break;\n+ case 3:\n+ for( int x = start / 3; x < end / 3; x += vl )\n+ {\n+ vl = rvv::vsetvl_WT(end / 3 - x);\n+ auto vec_src0 = rvv::vlse_T(src + (x * 2 - 2) * 3, 6 * sizeof(float), vl);\n+ auto vec_src1 = rvv::vlse_T(src + (x * 2 - 1) * 3, 6 * sizeof(float), vl);\n+ auto vec_src2 = rvv::vlse_T(src + (x * 2) * 3, 6 * sizeof(float), vl);\n+ auto vec_src3 = rvv::vlse_T(src + (x * 2 + 1) * 3, 6 * sizeof(float), vl);\n+ auto vec_src4 = rvv::vlse_T(src + (x * 2 + 2) * 3, 6 * sizeof(float), vl);\n+ __riscv_vsse32(row + x * 3, 3 * sizeof(float), __riscv_vfmadd(vec_src2, 6, __riscv_vfmadd(__riscv_vfadd(vec_src1, vec_src3, vl), 4, __riscv_vfadd(vec_src0, vec_src4, vl), vl), vl), vl);\n+ vec_src0 = rvv::vlse_T(src + (x * 2 - 2) * 3 + 1, 6 * sizeof(float), vl);\n+ vec_src1 = rvv::vlse_T(src + (x * 2 - 1) * 3 + 1, 6 * sizeof(float), vl);\n+ vec_src2 = rvv::vlse_T(src + (x * 2) * 3 + 1, 6 * sizeof(float), vl);\n+ vec_src3 = rvv::vlse_T(src + (x * 2 + 1) * 3 + 1, 6 * sizeof(float), vl);\n+ vec_src4 = rvv::vlse_T(src + (x * 2 + 2) * 3 + 1, 6 * sizeof(float), vl);\n+ __riscv_vsse32(row + x * 3 + 1, 3 * sizeof(float), __riscv_vfmadd(vec_src2, 6, __riscv_vfmadd(__riscv_vfadd(vec_src1, vec_src3, vl), 4, __riscv_vfadd(vec_src0, vec_src4, vl), vl), vl), vl);\n+ vec_src0 = rvv::vlse_T(src + (x * 2 - 2) * 3 + 2, 6 * sizeof(float), vl);\n+ vec_src1 = rvv::vlse_T(src + (x * 2 - 1) * 3 + 2, 6 * sizeof(float), vl);\n+ vec_src2 = rvv::vlse_T(src + (x * 2) * 3 + 2, 6 * sizeof(float), vl);\n+ vec_src3 = rvv::vlse_T(src + (x * 2 + 1) * 3 + 2, 6 * sizeof(float), vl);\n+ vec_src4 = rvv::vlse_T(src + (x * 2 + 2) * 3 + 2, 6 * sizeof(float), vl);\n+ __riscv_vsse32(row + x * 3 + 2, 3 * sizeof(float), __riscv_vfmadd(vec_src2, 6, __riscv_vfmadd(__riscv_vfadd(vec_src1, vec_src3, vl), 4, __riscv_vfadd(vec_src0, vec_src4, vl), vl), vl), vl);\n+ }\n+ break;\n+ case 4:\n+ for( int x = start / 4; x < end / 4; x += vl )\n+ {\n+ vl = rvv::vsetvl_WT(end / 4 - x);\n+ auto vec_src0 = rvv::vlse_T(src + (x * 2 - 2) * 4, 8 * sizeof(float), vl);\n+ auto vec_src1 = rvv::vlse_T(src + (x * 2 - 1) * 4, 8 * sizeof(float), vl);\n+ auto vec_src2 = rvv::vlse_T(src + (x * 2) * 4, 8 * sizeof(float), vl);\n+ auto vec_src3 = rvv::vlse_T(src + (x * 2 + 1) * 4, 8 * sizeof(float), vl);\n+ auto vec_src4 = rvv::vlse_T(src + (x * 2 + 2) * 4, 8 * sizeof(float), vl);\n+ __riscv_vsse32(row + x * 4, 4 * sizeof(float), __riscv_vfmadd(vec_src2, 6, __riscv_vfmadd(__riscv_vfadd(vec_src1, vec_src3, vl), 4, __riscv_vfadd(vec_src0, vec_src4, vl), vl), vl), vl);\n+ vec_src0 = rvv::vlse_T(src + (x * 2 - 2) * 4 + 1, 8 * sizeof(float), vl);\n+ vec_src1 = rvv::vlse_T(src + (x * 2 - 1) * 4 + 1, 8 * sizeof(float), vl);\n+ vec_src2 = rvv::vlse_T(src + (x * 2) * 4 + 1, 8 * sizeof(float), vl);\n+ vec_src3 = rvv::vlse_T(src + (x * 2 + 1) * 4 + 1, 8 * sizeof(float), vl);\n+ vec_src4 = rvv::vlse_T(src + (x * 2 + 2) * 4 + 1, 8 * sizeof(float), vl);\n+ __riscv_vsse32(row + x * 4 + 1, 4 * sizeof(float), __riscv_vfmadd(vec_src2, 6, __riscv_vfmadd(__riscv_vfadd(vec_src1, vec_src3, vl), 4, __riscv_vfadd(vec_src0, vec_src4, vl), vl), vl), vl);\n+ vec_src0 = rvv::vlse_T(src + (x * 2 - 2) * 4 + 2, 8 * sizeof(float), vl);\n+ vec_src1 = rvv::vlse_T(src + (x * 2 - 1) * 4 + 2, 8 * sizeof(float), vl);\n+ vec_src2 = rvv::vlse_T(src + (x * 2) * 4 + 2, 8 * sizeof(float), vl);\n+ vec_src3 = rvv::vlse_T(src + (x * 2 + 1) * 4 + 2, 8 * sizeof(float), vl);\n+ vec_src4 = rvv::vlse_T(src + (x * 2 + 2) * 4 + 2, 8 * sizeof(float), vl);\n+ __riscv_vsse32(row + x * 4 + 2, 4 * sizeof(float), __riscv_vfmadd(vec_src2, 6, __riscv_vfmadd(__riscv_vfadd(vec_src1, vec_src3, vl), 4, __riscv_vfadd(vec_src0, vec_src4, vl), vl), vl), vl);\n+ vec_src0 = rvv::vlse_T(src + (x * 2 - 2) * 4 + 3, 8 * sizeof(float), vl);\n+ vec_src1 = rvv::vlse_T(src + (x * 2 - 1) * 4 + 3, 8 * sizeof(float), vl);\n+ vec_src2 = rvv::vlse_T(src + (x * 2) * 4 + 3, 8 * sizeof(float), vl);\n+ vec_src3 = rvv::vlse_T(src + (x * 2 + 1) * 4 + 3, 8 * sizeof(float), vl);\n+ vec_src4 = rvv::vlse_T(src + (x * 2 + 2) * 4 + 3, 8 * sizeof(float), vl);\n+ __riscv_vsse32(row + x * 4 + 3, 4 * sizeof(float), __riscv_vfmadd(vec_src2, 6, __riscv_vfmadd(__riscv_vfadd(vec_src1, vec_src3, vl), 4, __riscv_vfadd(vec_src0, vec_src4, vl), vl), vl), vl);\n+ }\n+ break;\n+ default:\n+ for( int x = start; x < end; x += vl )\n+ {\n+ vl = rvv::vsetvl_WT(end - x);\n+ auto vec_tabM = rvv::vle_M(tabM + x, vl);\n+ vec_tabM = __riscv_vmul(__riscv_vsub(vec_tabM, start * 2, vl), sizeof(float), vl);\n+ auto vec_src0 = rvv::vloxei_T(src, vec_tabM, vl);\n+ vec_tabM = __riscv_vadd(vec_tabM, start * sizeof(float), vl);\n+ auto vec_src1 = rvv::vloxei_T(src, vec_tabM, vl);\n+ vec_tabM = __riscv_vadd(vec_tabM, start * sizeof(float), vl);\n+ auto vec_src2 = rvv::vloxei_T(src, vec_tabM, vl);\n+ vec_tabM = __riscv_vadd(vec_tabM, start * sizeof(float), vl);\n+ auto vec_src3 = rvv::vloxei_T(src, vec_tabM, vl);\n+ vec_tabM = __riscv_vadd(vec_tabM, start * sizeof(float), vl);\n+ auto vec_src4 = rvv::vloxei_T(src, vec_tabM, vl);\n+ __riscv_vse32(row + x, __riscv_vfmadd(__riscv_vfadd(__riscv_vfadd(vec_src1, vec_src2, vl), vec_src3, vl), 4,\n+ __riscv_vfadd(__riscv_vfadd(vec_src0, vec_src4, vl), __riscv_vfadd(vec_src2, vec_src2, vl), vl), vl), vl);\n+ }\n+ }\n+ }\n+};\n+\n+template struct pyrDownVec1\n+{\n+ void operator()(WT* row0, WT* row1, WT* row2, WT* row3, WT* row4, T* dst, int end)\n+ {\n+ int vl;\n+ for( int x = 0 ; x < end; x += vl )\n+ {\n+ vl = rvv::vsetvl_WT(end - x);\n+ auto vec_src0 = rvv::vle_WT(row0 + x, vl);\n+ auto vec_src1 = rvv::vle_WT(row1 + x, vl);\n+ auto vec_src2 = rvv::vle_WT(row2 + x, vl);\n+ auto vec_src3 = rvv::vle_WT(row3 + x, vl);\n+ auto vec_src4 = rvv::vle_WT(row4 + x, vl);\n+ rvv::vse_T(dst + x, rvv::vcvt_WT_T(__riscv_vadd(__riscv_vadd(__riscv_vadd(vec_src0, vec_src4, vl), __riscv_vadd(vec_src2, vec_src2, vl), vl),\n+ __riscv_vsll(__riscv_vadd(__riscv_vadd(vec_src1, vec_src2, vl), vec_src3, vl), 2, vl), vl), 8, vl), vl);\n+ }\n+ }\n+};\n+template<> struct pyrDownVec1\n+{\n+ void operator()(float* row0, float* row1, float* row2, float* row3, float* row4, float* dst, int end)\n+ {\n+ int vl;\n+ for( int x = 0 ; x < end; x += vl )\n+ {\n+ vl = rvv::vsetvl_WT(end - x);\n+ auto vec_src0 = rvv::vle_WT(row0 + x, vl);\n+ auto vec_src1 = rvv::vle_WT(row1 + x, vl);\n+ auto vec_src2 = rvv::vle_WT(row2 + x, vl);\n+ auto vec_src3 = rvv::vle_WT(row3 + x, vl);\n+ auto vec_src4 = rvv::vle_WT(row4 + x, vl);\n+ rvv::vse_T(dst + x, __riscv_vfmul(__riscv_vfmadd(vec_src2, 6, __riscv_vfmadd(__riscv_vfadd(vec_src1, vec_src3, vl), 4, __riscv_vfadd(vec_src0, vec_src4, vl), vl), vl), 1.f / 256.f, vl), vl);\n+ }\n+ }\n+};\n+\n+template struct pyrUpVec0\n+{\n+ void operator()(const T* src, WT* row, const uint* dtab, int start, int end)\n+ {\n+ int vl;\n+ for( int x = start; x < end; x += vl )\n+ {\n+ vl = rvv::vsetvl_WT(end - x);\n+ auto vec_src0 = rvv::vcvt_T_WT(rvv::vle_T(src + x - start, vl), vl);\n+ auto vec_src1 = rvv::vcvt_T_WT(rvv::vle_T(src + x, vl), vl);\n+ auto vec_src2 = rvv::vcvt_T_WT(rvv::vle_T(src + x + start, vl), vl);\n+\n+ auto vec_dtab = rvv::vle_M(dtab + x, vl);\n+ vec_dtab = __riscv_vmul(vec_dtab, sizeof(WT), vl);\n+ __riscv_vsoxei32(row, vec_dtab, __riscv_vadd(__riscv_vadd(vec_src0, vec_src2, vl), __riscv_vadd(__riscv_vsll(vec_src1, 2, vl), __riscv_vsll(vec_src1, 1, vl), vl), vl), vl);\n+ __riscv_vsoxei32(row, __riscv_vadd(vec_dtab, start * sizeof(WT), vl), __riscv_vsll(__riscv_vadd(vec_src1, vec_src2, vl), 2, vl), vl);\n+ }\n+ }\n+};\n+template<> struct pyrUpVec0\n+{\n+ void operator()(const float* src, float* row, const uint* dtab, int start, int end)\n+ {\n+ int vl;\n+ for( int x = start; x < end; x += vl )\n+ {\n+ vl = rvv::vsetvl_WT(end - x);\n+ auto vec_src0 = rvv::vle_T(src + x - start, vl);\n+ auto vec_src1 = rvv::vle_T(src + x, vl);\n+ auto vec_src2 = rvv::vle_T(src + x + start, vl);\n+\n+ auto vec_dtab = rvv::vle_M(dtab + x, vl);\n+ vec_dtab = __riscv_vmul(vec_dtab, sizeof(float), vl);\n+ __riscv_vsoxei32(row, vec_dtab, __riscv_vfadd(__riscv_vfmadd(vec_src1, 6, vec_src0, vl), vec_src2, vl), vl);\n+ __riscv_vsoxei32(row, __riscv_vadd(vec_dtab, start * sizeof(float), vl), __riscv_vfmul(__riscv_vfadd(vec_src1, vec_src2, vl), 4, vl), vl);\n+ }\n+ }\n+};\n+\n+template struct pyrUpVec1\n+{\n+ void operator()(WT* row0, WT* row1, WT* row2, T* dst0, T* dst1, int end)\n+ {\n+ int vl;\n+ if (dst0 != dst1)\n+ {\n+ for( int x = 0 ; x < end; x += vl )\n+ {\n+ vl = rvv::vsetvl_WT(end - x);\n+ auto vec_src0 = rvv::vle_WT(row0 + x, vl);\n+ auto vec_src1 = rvv::vle_WT(row1 + x, vl);\n+ auto vec_src2 = rvv::vle_WT(row2 + x, vl);\n+ rvv::vse_T(dst0 + x, rvv::vcvt_WT_T(__riscv_vadd(__riscv_vadd(vec_src0, vec_src2, vl), __riscv_vadd(__riscv_vsll(vec_src1, 2, vl), __riscv_vsll(vec_src1, 1, vl), vl), vl), 6, vl), vl);\n+ rvv::vse_T(dst1 + x, rvv::vcvt_WT_T(__riscv_vsll(__riscv_vadd(vec_src1, vec_src2, vl), 2, vl), 6, vl), vl);\n+ }\n+ }\n+ else\n+ {\n+ for( int x = 0 ; x < end; x += vl )\n+ {\n+ vl = rvv::vsetvl_WT(end - x);\n+ auto vec_src0 = rvv::vle_WT(row0 + x, vl);\n+ auto vec_src1 = rvv::vle_WT(row1 + x, vl);\n+ auto vec_src2 = rvv::vle_WT(row2 + x, vl);\n+ rvv::vse_T(dst0 + x, rvv::vcvt_WT_T(__riscv_vadd(__riscv_vadd(vec_src0, vec_src2, vl), __riscv_vadd(__riscv_vsll(vec_src1, 2, vl), __riscv_vsll(vec_src1, 1, vl), vl), vl), 6, vl), vl);\n+ }\n+ }\n+ }\n+};\n+template<> struct pyrUpVec1\n+{\n+ void operator()(float* row0, float* row1, float* row2, float* dst0, float* dst1, int end)\n+ {\n+ int vl;\n+ if (dst0 != dst1)\n+ {\n+ for( int x = 0 ; x < end; x += vl )\n+ {\n+ vl = rvv::vsetvl_WT(end - x);\n+ auto vec_src0 = rvv::vle_WT(row0 + x, vl);\n+ auto vec_src1 = rvv::vle_WT(row1 + x, vl);\n+ auto vec_src2 = rvv::vle_WT(row2 + x, vl);\n+ rvv::vse_T(dst0 + x, __riscv_vfmul(__riscv_vfadd(__riscv_vfmadd(vec_src1, 6, vec_src0, vl), vec_src2, vl), 1.f / 64.f, vl), vl);\n+ rvv::vse_T(dst1 + x, __riscv_vfmul(__riscv_vfadd(vec_src1, vec_src2, vl), 1.f / 16.f, vl), vl);\n+ }\n+ }\n+ else\n+ {\n+ for( int x = 0 ; x < end; x += vl )\n+ {\n+ vl = rvv::vsetvl_WT(end - x);\n+ auto vec_src0 = rvv::vle_WT(row0 + x, vl);\n+ auto vec_src1 = rvv::vle_WT(row1 + x, vl);\n+ auto vec_src2 = rvv::vle_WT(row2 + x, vl);\n+ rvv::vse_T(dst0 + x, __riscv_vfmul(__riscv_vfadd(__riscv_vfmadd(vec_src1, 6, vec_src0, vl), vec_src2, vl), 1.f / 64.f, vl), vl);\n+ }\n+ }\n+ }\n+};\n+\n+template\n+struct PyrDownInvoker : ParallelLoopBody\n+{\n+ PyrDownInvoker(const uchar* _src_data, size_t _src_step, int _src_width, int _src_height, uchar* _dst_data, size_t _dst_step, int _dst_width, int _dst_height, int _cn, int _borderType, int* _tabR, int* _tabM, int* _tabL)\n+ {\n+ src_data = _src_data;\n+ src_step = _src_step;\n+ src_width = _src_width;\n+ src_height = _src_height;\n+ dst_data = _dst_data;\n+ dst_step = _dst_step;\n+ dst_width = _dst_width;\n+ dst_height = _dst_height;\n+ cn = _cn;\n+ borderType = _borderType;\n+ tabR = _tabR;\n+ tabM = _tabM;\n+ tabL = _tabL;\n+ }\n+\n+ void operator()(const Range& range) const CV_OVERRIDE;\n+\n+ const uchar* src_data;\n+ size_t src_step;\n+ int src_width;\n+ int src_height;\n+ uchar* dst_data;\n+ size_t dst_step;\n+ int dst_width;\n+ int dst_height;\n+ int cn;\n+ int borderType;\n+ int* tabR;\n+ int* tabM;\n+ int* tabL;\n+};\n+\n+// the algorithm is copied from imgproc/src/pyramids.cpp,\n+// in the function template void cv::pyrDown_\n+template\n+inline int pyrDown(const uchar* src_data, size_t src_step, int src_width, int src_height, uchar* dst_data, size_t dst_step, int dst_width, int dst_height, int cn, int borderType)\n+{\n+ const int PD_SZ = 5;\n+\n+ std::vector _tabM(dst_width * cn), _tabL(cn * (PD_SZ + 2)), _tabR(cn * (PD_SZ + 2));\n+ int *tabM = _tabM.data(), *tabL = _tabL.data(), *tabR = _tabR.data();\n+\n+ CV_Assert( src_width > 0 && src_height > 0 &&\n+ std::abs(dst_width*2 - src_width) <= 2 &&\n+ std::abs(dst_height*2 - src_height) <= 2 );\n+ int width0 = std::min((src_width-PD_SZ/2-1)/2 + 1, dst_width);\n+\n+ for (int x = 0; x <= PD_SZ+1; x++)\n+ {\n+ int sx0 = borderInterpolate(x - PD_SZ/2, src_width, borderType)*cn;", + "comment_created_at": "2025-03-01T03:11:06+00:00", + "comment_author": "amane-ame", + "comment_body": "Carotene(ARM HAL) copied this function from ::cv into its self namespace, I will follow this.", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/opencv-optimize-for-common-cases.md b/_reviewers/opencv-optimize-for-common-cases.md index 197a7f5..fa41e04 100644 --- a/_reviewers/opencv-optimize-for-common-cases.md +++ b/_reviewers/opencv-optimize-for-common-cases.md @@ -55,119 +55,3 @@ if (i < len) { ``` This pattern avoids unnecessary modulo operations and extra variables for tracking remainders. When implementing algorithms with specialized branches, ensure that all cases are handled correctly, including those that don't meet optimization criteria. - - -[ - { - "discussion_id": "2086365469", - "pr_number": 27299, - "pr_file": "modules/imgproc/src/median_blur.simd.hpp", - "created_at": "2025-05-13T09:32:49+00:00", - "commented_code": "#else\n int nlanes = 1;\n#endif\n for( ; j <= size.width - nlanes - cn; j += nlanes )\n if(_dst.cols > 2*nlanes) //minimum size to safely allow extra vectorized store into next row.", - "repo_full_name": "opencv/opencv", - "discussion_comments": [ - { - "comment_id": "2086365469", - "repo_full_name": "opencv/opencv", - "pr_number": 27299, - "pr_file": "modules/imgproc/src/median_blur.simd.hpp", - "discussion_id": "2086365469", - "commented_code": "@@ -693,18 +694,38 @@ medianBlur_SortNet( const Mat& _src, Mat& _dst, int m )\n #else\n int nlanes = 1;\n #endif\n- for( ; j <= size.width - nlanes - cn; j += nlanes )\n+ if(_dst.cols > 2*nlanes) //minimum size to safely allow extra vectorized store into next row.", - "comment_created_at": "2025-05-13T09:32:49+00:00", - "comment_author": "fengyuentau", - "comment_body": "What if _dst.cols <= 2*nlanes? Just skipped?", - "pr_file_module": null - }, - { - "comment_id": "2086435491", - "repo_full_name": "opencv/opencv", - "pr_number": 27299, - "pr_file": "modules/imgproc/src/median_blur.simd.hpp", - "discussion_id": "2086365469", - "commented_code": "@@ -693,18 +694,38 @@ medianBlur_SortNet( const Mat& _src, Mat& _dst, int m )\n #else\n int nlanes = 1;\n #endif\n- for( ; j <= size.width - nlanes - cn; j += nlanes )\n+ if(_dst.cols > 2*nlanes) //minimum size to safely allow extra vectorized store into next row.", - "comment_created_at": "2025-05-13T10:06:19+00:00", - "comment_author": "madanm3", - "comment_body": "this will not be skipped. when cols <= 2* lanes,\r\n- limit gets increased after this loop. \"limit = size.width\"\r\n- then it gets done in \"for( ; j < limit; j++ )\" loop above.\r\n\r\n", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1946963394", - "pr_number": 26887, - "pr_file": "3rdparty/hal_rvv/hal_rvv_1p0/inv_sqrt_f.hpp", - "created_at": "2025-02-07T18:07:36+00:00", - "commented_code": "// This file is part of OpenCV project.\n// It is subject to the license terms in the LICENSE file found in the top-level directory\n// of this distribution and at http://opencv.org/license.html.\n#ifndef OPENCV_HAL_RVV_INV_SQRTF32_HPP_INCLUDED\n#define OPENCV_HAL_RVV_INV_SQRTF32_HPP_INCLUDED\n\n#include \n\nnamespace cv { namespace cv_hal_rvv {\n\n#undef cv_hal_invSqrt32f\n#define cv_hal_invSqrt32f cv::cv_hal_rvv::invSqrt32f\n\ninline int invSqrt32f (const float *src, float *dst, const int len) {\n const size_t vl = __riscv_vsetvl_e32m8(len);\n const size_t remainings = len % vl;\n auto calc_fun = [&](const size_t i, const size_t vl) {\n vfloat32m8_t vsrc = __riscv_vle32_v_f32m8(&src[i], vl), \n vres;\n vres = __riscv_vfsqrt_v_f32m8(vsrc, vl);\n vres = __riscv_vfrdiv_vf_f32m8(vres, 1., vl);\n __riscv_vse32_v_f32m8(&dst[i], vres, vl);\n };\n\n size_t i = 0;\n for (; i < len - remainings; i += vl)", - "repo_full_name": "opencv/opencv", - "discussion_comments": [ - { - "comment_id": "1946963394", - "repo_full_name": "opencv/opencv", - "pr_number": 26887, - "pr_file": "3rdparty/hal_rvv/hal_rvv_1p0/inv_sqrt_f.hpp", - "discussion_id": "1946963394", - "commented_code": "@@ -0,0 +1,36 @@\n+// This file is part of OpenCV project.\n+// It is subject to the license terms in the LICENSE file found in the top-level directory\n+// of this distribution and at http://opencv.org/license.html.\n+#ifndef OPENCV_HAL_RVV_INV_SQRTF32_HPP_INCLUDED\n+#define OPENCV_HAL_RVV_INV_SQRTF32_HPP_INCLUDED\n+\n+#include \n+\n+namespace cv { namespace cv_hal_rvv {\n+\n+#undef cv_hal_invSqrt32f\n+#define cv_hal_invSqrt32f cv::cv_hal_rvv::invSqrt32f\n+\n+inline int invSqrt32f (const float *src, float *dst, const int len) {\n+ const size_t vl = __riscv_vsetvl_e32m8(len);\n+ const size_t remainings = len % vl;\n+ auto calc_fun = [&](const size_t i, const size_t vl) {\n+ vfloat32m8_t vsrc = __riscv_vle32_v_f32m8(&src[i], vl), \n+ vres;\n+ vres = __riscv_vfsqrt_v_f32m8(vsrc, vl);\n+ vres = __riscv_vfrdiv_vf_f32m8(vres, 1., vl);\n+ __riscv_vse32_v_f32m8(&dst[i], vres, vl);\n+ };\n+\n+ size_t i = 0;\n+ for (; i < len - remainings; i += vl)", - "comment_created_at": "2025-02-07T18:07:36+00:00", - "comment_author": "dkurt", - "comment_body": "We can avoid `len % vl` and `remainings ` variable by using the following loop:\r\n```cpp\r\nfor (; i <= len - vl; i += vl)\r\n{\r\n calc_fun(i, vl);\r\n}\r\nif (i < len)\r\n{\r\n size_t tail_len = __riscv_vsetvl_e32m8(len - i);\r\n calc_fun(i, tail_len);\r\n}\r\n```\r\n", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1830973143", - "pr_number": 26422, - "pr_file": "modules/imgproc/src/median_blur.simd.hpp", - "created_at": "2024-11-06T12:54:38+00:00", - "commented_code": "#undef N\n#undef UPDATE_ACC\n}\n#endif\n\n// Binary search to find the median offset in the histogram\nstatic inline void\nget_median_ofs256(int &ofs, const uint16_t *hist, const int halfsum) {\n int s = 0, step = 128;\n uint16_t ds, m;\n\n#if CV_SIMD || CV_SIMD_SCALABLE\n const int lanes = VTraits::vlanes();\n#endif\n\n ofs = 0;\n while (step) {\n#if CV_SIMD || CV_SIMD_SCALABLE\n // Use SIMD instructions when the step is larger than or equal to the lane size\n if (step >= lanes) {\n v_uint16 v_ds = vx_load(hist + ofs);\n for (int i = lanes; i <= step - lanes; i += lanes)\n v_ds = v_add(v_ds, vx_load(hist + ofs + i));\n ds = v_reduce_sum(v_ds);\n }\n // For smaller steps, use scalar accumulation\n else\n#endif\n {\n ds = hist[ofs];\n for (int i = 1; i < step; i++)\n ds += hist[ofs + i];\n }\n\n // Determine if the cumulative sum has reached or surpassed half the total sum\n m = (s + ds) <= halfsum;\n ofs += m * step;\n s += ds & -m;\n step = step >> 1;\n }\n}\n\nstatic void\nmedianBlur_8u(const Mat &_src, Mat &_dst, int ksize) {\n CV_INSTRUMENT_REGION();\n\n const int channels = _src.channels();\n int radius = ksize / 2;\n CV_Assert(ksize % 2 == 1);\n\n const int rows = _src.rows;\n const int cols = _src.cols;\n const int win_half_size = ksize * ksize / 2;\n\n const uint8_t *src_data = _src.ptr();\n size_t src_step = _src.step;\n uint8_t *dst_data = _dst.ptr();\n size_t dst_step = _dst.step;\n\n double nstripes = (double) (rows * cols) / 1024.0;\n parallel_for_(Range(0, rows), [&](const Range &r) {\n int y_start = r.start;\n int y_end = r.end;\n\n std::vector> hist256(channels, std::vector(256, 0));\n std::vector win_row_ptrs(ksize);\n\n // initial hist for the first pixel\n for (int dy = -radius; dy <= radius; dy++) {\n int y = std::clamp(y_start + dy, 0, rows - 1);\n const uint8_t *row_ptr = src_data + y * src_step;\n win_row_ptrs[dy + radius] = row_ptr;\n\n // always start from col 0, so need to add radius to first pixel\n for (int c = 0; c < channels; ++c)\n hist256[c][row_ptr[c]] += radius + 1;\n // add the rest of the row\n for (int dx = 1; dx <= radius; dx++) {\n int x = std::min(dx, cols - 1);\n const uint8_t *pixel_ptr = row_ptr + x * channels;\n for (int c = 0; c < channels; ++c) {\n hist256[c][pixel_ptr[c]]++;\n }\n }\n }\n\n int x_start = 0, x_end = cols, x_step = 1;\n // slide the window across the row\n for (int y = y_start; y < y_end; y++) {\n uint8_t *dst_pixel_ptr = dst_data + y * dst_step + x_start * channels;\n for (int x = x_start; x != x_end; x += x_step, dst_pixel_ptr += x_step * channels) {\n if (x != x_start) {\n // update hist by removing and adding pixel values as the window moves\n int x_remove = std::clamp(x - (radius + 1) * x_step, 0, cols - 1) * channels;\n int x_add = std::clamp(x + radius * x_step, 0, cols - 1) * channels;\n for (int dy = 0; dy < ksize; dy++) {", - "repo_full_name": "opencv/opencv", - "discussion_comments": [ - { - "comment_id": "1830973143", - "repo_full_name": "opencv/opencv", - "pr_number": 26422, - "pr_file": "modules/imgproc/src/median_blur.simd.hpp", - "discussion_id": "1830973143", - "commented_code": "@@ -489,7 +490,149 @@ medianBlur_8u_Om( const Mat& _src, Mat& _dst, int m )\n #undef N\n #undef UPDATE_ACC\n }\n+#endif\n+\n+// Binary search to find the median offset in the histogram\n+static inline void\n+get_median_ofs256(int &ofs, const uint16_t *hist, const int halfsum) {\n+ int s = 0, step = 128;\n+ uint16_t ds, m;\n \n+#if CV_SIMD || CV_SIMD_SCALABLE\n+ const int lanes = VTraits::vlanes();\n+#endif\n+\n+ ofs = 0;\n+ while (step) {\n+#if CV_SIMD || CV_SIMD_SCALABLE\n+ // Use SIMD instructions when the step is larger than or equal to the lane size\n+ if (step >= lanes) {\n+ v_uint16 v_ds = vx_load(hist + ofs);\n+ for (int i = lanes; i <= step - lanes; i += lanes)\n+ v_ds = v_add(v_ds, vx_load(hist + ofs + i));\n+ ds = v_reduce_sum(v_ds);\n+ }\n+ // For smaller steps, use scalar accumulation\n+ else\n+#endif\n+ {\n+ ds = hist[ofs];\n+ for (int i = 1; i < step; i++)\n+ ds += hist[ofs + i];\n+ }\n+\n+ // Determine if the cumulative sum has reached or surpassed half the total sum\n+ m = (s + ds) <= halfsum;\n+ ofs += m * step;\n+ s += ds & -m;\n+ step = step >> 1;\n+ }\n+}\n+\n+static void\n+medianBlur_8u(const Mat &_src, Mat &_dst, int ksize) {\n+ CV_INSTRUMENT_REGION();\n+\n+ const int channels = _src.channels();\n+ int radius = ksize / 2;\n+ CV_Assert(ksize % 2 == 1);\n+\n+ const int rows = _src.rows;\n+ const int cols = _src.cols;\n+ const int win_half_size = ksize * ksize / 2;\n+\n+ const uint8_t *src_data = _src.ptr();\n+ size_t src_step = _src.step;\n+ uint8_t *dst_data = _dst.ptr();\n+ size_t dst_step = _dst.step;\n+\n+ double nstripes = (double) (rows * cols) / 1024.0;\n+ parallel_for_(Range(0, rows), [&](const Range &r) {\n+ int y_start = r.start;\n+ int y_end = r.end;\n+\n+ std::vector> hist256(channels, std::vector(256, 0));\n+ std::vector win_row_ptrs(ksize);\n+\n+ // initial hist for the first pixel\n+ for (int dy = -radius; dy <= radius; dy++) {\n+ int y = std::clamp(y_start + dy, 0, rows - 1);\n+ const uint8_t *row_ptr = src_data + y * src_step;\n+ win_row_ptrs[dy + radius] = row_ptr;\n+\n+ // always start from col 0, so need to add radius to first pixel\n+ for (int c = 0; c < channels; ++c)\n+ hist256[c][row_ptr[c]] += radius + 1;\n+ // add the rest of the row\n+ for (int dx = 1; dx <= radius; dx++) {\n+ int x = std::min(dx, cols - 1);\n+ const uint8_t *pixel_ptr = row_ptr + x * channels;\n+ for (int c = 0; c < channels; ++c) {\n+ hist256[c][pixel_ptr[c]]++;\n+ }\n+ }\n+ }\n+\n+ int x_start = 0, x_end = cols, x_step = 1;\n+ // slide the window across the row\n+ for (int y = y_start; y < y_end; y++) {\n+ uint8_t *dst_pixel_ptr = dst_data + y * dst_step + x_start * channels;\n+ for (int x = x_start; x != x_end; x += x_step, dst_pixel_ptr += x_step * channels) {\n+ if (x != x_start) {\n+ // update hist by removing and adding pixel values as the window moves\n+ int x_remove = std::clamp(x - (radius + 1) * x_step, 0, cols - 1) * channels;\n+ int x_add = std::clamp(x + radius * x_step, 0, cols - 1) * channels;\n+ for (int dy = 0; dy < ksize; dy++) {", - "comment_created_at": "2024-11-06T12:54:38+00:00", - "comment_author": "vpisarev", - "comment_body": "please, create separate versions of histogram update loop for `channels == 1`, `channels == 3` and the generic case.\r\nstore separate pointers for 1-channel hist256 (hist0=hist256[0]) and 3-channel hist256 (`hist0 = hist256[0], hist1=hist256[1] and hist2=hist256[2]`)", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1975941881", - "pr_number": 26958, - "pr_file": "3rdparty/hal_rvv/hal_rvv_1p0/pyramids.hpp", - "created_at": "2025-02-28T19:53:20+00:00", - "commented_code": "// This file is part of OpenCV project.\n// It is subject to the license terms in the LICENSE file found in the top-level directory\n// of this distribution and at http://opencv.org/license.html.\n#ifndef OPENCV_HAL_RVV_PYRAMIDS_HPP_INCLUDED\n#define OPENCV_HAL_RVV_PYRAMIDS_HPP_INCLUDED\n\n#include \n\nnamespace cv { namespace cv_hal_rvv { namespace pyramids {\n\n#undef cv_hal_pyrdown\n#define cv_hal_pyrdown cv::cv_hal_rvv::pyramids::pyrDown\n#undef cv_hal_pyrup\n#define cv_hal_pyrup cv::cv_hal_rvv::pyramids::pyrUp\n\ntemplate struct rvv;\n\ntemplate<> struct rvv\n{\n static inline size_t vsetvl_WT(size_t a) { return __riscv_vsetvl_e32m4(a); }\n static inline vuint8m1_t vle_T(const uchar* a, size_t b) { return __riscv_vle8_v_u8m1(a, b); }\n static inline vint32m4_t vle_WT(const int* a, size_t b) { return __riscv_vle32_v_i32m4(a, b); }\n static inline vuint32m4_t vle_M(const uint* a, size_t b) { return __riscv_vle32_v_u32m4(a, b); }\n static inline vuint8m1_t vlse_T(const uchar* a, ptrdiff_t b, size_t c) { return __riscv_vlse8_v_u8m1(a, b, c); }\n static inline vuint8m1_t vloxei_T(const uchar* a, vuint32m4_t b, size_t c) { return __riscv_vloxei32_v_u8m1(a, b, c); }\n static inline void vse_T(uchar* a, vuint8m1_t b, size_t c) { return __riscv_vse8(a, b, c); }\n static inline vint32m4_t vcvt_T_WT(vuint8m1_t a, size_t b) { return __riscv_vreinterpret_v_u32m4_i32m4(__riscv_vzext_vf4(a, b)); }\n static inline vuint8m1_t vcvt_WT_T(vint32m4_t a, int b, size_t c) { return __riscv_vncvt_x(__riscv_vncvt_x(__riscv_vreinterpret_v_i32m4_u32m4(__riscv_vsra(__riscv_vadd(a, 1 << (b - 1), c), b, c)), c), c); }\n};\n\ntemplate<> struct rvv\n{\n static inline size_t vsetvl_WT(size_t a) { return __riscv_vsetvl_e32m4(a); }\n static inline vint16m2_t vle_T(const short* a, size_t b) { return __riscv_vle16_v_i16m2(a, b); }\n static inline vint32m4_t vle_WT(const int* a, size_t b) { return __riscv_vle32_v_i32m4(a, b); }\n static inline vuint32m4_t vle_M(const uint* a, size_t b) { return __riscv_vle32_v_u32m4(a, b); }\n static inline vint16m2_t vlse_T(const short* a, ptrdiff_t b, size_t c) { return __riscv_vlse16_v_i16m2(a, b, c); }\n static inline vint16m2_t vloxei_T(const short* a, vuint32m4_t b, size_t c) { return __riscv_vloxei32_v_i16m2(a, b, c); }\n static inline void vse_T(short* a, vint16m2_t b, size_t c) { return __riscv_vse16(a, b, c); }\n static inline vint32m4_t vcvt_T_WT(vint16m2_t a, size_t b) { return __riscv_vsext_vf2(a, b); }\n static inline vint16m2_t vcvt_WT_T(vint32m4_t a, int b, size_t c) { return __riscv_vncvt_x(__riscv_vsra(__riscv_vadd(a, 1 << (b - 1), c), b, c), c); }\n};\n\ntemplate<> struct rvv\n{\n static inline size_t vsetvl_WT(size_t a) { return __riscv_vsetvl_e32m4(a); }\n static inline vfloat32m4_t vle_T(const float* a, size_t b) { return __riscv_vle32_v_f32m4(a, b); }\n static inline vfloat32m4_t vle_WT(const float* a, size_t b) { return __riscv_vle32_v_f32m4(a, b); }\n static inline vuint32m4_t vle_M(const uint* a, size_t b) { return __riscv_vle32_v_u32m4(a, b); }\n static inline vfloat32m4_t vlse_T(const float* a, ptrdiff_t b, size_t c) { return __riscv_vlse32_v_f32m4(a, b, c); }\n static inline vfloat32m4_t vloxei_T(const float* a, vuint32m4_t b, size_t c) { return __riscv_vloxei32_v_f32m4(a, b, c); }\n static inline void vse_T(float* a, vfloat32m4_t b, size_t c) { return __riscv_vse32(a, b, c); }\n};\n\ntemplate struct pyrDownVec0\n{\n void operator()(const T* src, WT* row, const uint* tabM, int start, int end)\n {\n int vl;\n switch (start)\n {\n case 1:\n for( int x = start; x < end; x += vl )\n {\n vl = rvv::vsetvl_WT(end - x);\n auto vec_src0 = rvv::vcvt_T_WT(rvv::vlse_T(src + x * 2 - 2, 2 * sizeof(T), vl), vl);\n auto vec_src1 = rvv::vcvt_T_WT(rvv::vlse_T(src + x * 2 - 1, 2 * sizeof(T), vl), vl);\n auto vec_src2 = rvv::vcvt_T_WT(rvv::vlse_T(src + x * 2, 2 * sizeof(T), vl), vl);\n auto vec_src3 = rvv::vcvt_T_WT(rvv::vlse_T(src + x * 2 + 1, 2 * sizeof(T), vl), vl);\n auto vec_src4 = rvv::vcvt_T_WT(rvv::vlse_T(src + x * 2 + 2, 2 * sizeof(T), vl), vl);\n __riscv_vse32(row + x, __riscv_vadd(__riscv_vadd(__riscv_vadd(vec_src0, vec_src4, vl), __riscv_vadd(vec_src2, vec_src2, vl), vl),\n __riscv_vsll(__riscv_vadd(__riscv_vadd(vec_src1, vec_src2, vl), vec_src3, vl), 2, vl), vl), vl);\n }\n break;\n case 2:\n for( int x = start / 2; x < end / 2; x += vl )\n {\n vl = rvv::vsetvl_WT(end / 2 - x);\n auto vec_src0 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 - 2) * 2, 4 * sizeof(T), vl), vl);\n auto vec_src1 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 - 1) * 2, 4 * sizeof(T), vl), vl);\n auto vec_src2 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2) * 2, 4 * sizeof(T), vl), vl);\n auto vec_src3 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 + 1) * 2, 4 * sizeof(T), vl), vl);\n auto vec_src4 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 + 2) * 2, 4 * sizeof(T), vl), vl);\n __riscv_vsse32(row + x * 2, 2 * sizeof(WT), __riscv_vadd(__riscv_vadd(__riscv_vadd(vec_src0, vec_src4, vl), __riscv_vadd(vec_src2, vec_src2, vl), vl),\n __riscv_vsll(__riscv_vadd(__riscv_vadd(vec_src1, vec_src2, vl), vec_src3, vl), 2, vl), vl), vl);\n vec_src0 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 - 2) * 2 + 1, 4 * sizeof(T), vl), vl);\n vec_src1 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 - 1) * 2 + 1, 4 * sizeof(T), vl), vl);\n vec_src2 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2) * 2 + 1, 4 * sizeof(T), vl), vl);\n vec_src3 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 + 1) * 2 + 1, 4 * sizeof(T), vl), vl);\n vec_src4 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 + 2) * 2 + 1, 4 * sizeof(T), vl), vl);\n __riscv_vsse32(row + x * 2 + 1, 2 * sizeof(WT), __riscv_vadd(__riscv_vadd(__riscv_vadd(vec_src0, vec_src4, vl), __riscv_vadd(vec_src2, vec_src2, vl), vl),\n __riscv_vsll(__riscv_vadd(__riscv_vadd(vec_src1, vec_src2, vl), vec_src3, vl), 2, vl), vl), vl);\n }\n break;\n case 3:\n for( int x = start / 3; x < end / 3; x += vl )\n {\n vl = rvv::vsetvl_WT(end / 3 - x);\n auto vec_src0 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 - 2) * 3, 6 * sizeof(T), vl), vl);\n auto vec_src1 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 - 1) * 3, 6 * sizeof(T), vl), vl);\n auto vec_src2 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2) * 3, 6 * sizeof(T), vl), vl);\n auto vec_src3 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 + 1) * 3, 6 * sizeof(T), vl), vl);\n auto vec_src4 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 + 2) * 3, 6 * sizeof(T), vl), vl);\n __riscv_vsse32(row + x * 3, 3 * sizeof(WT), __riscv_vadd(__riscv_vadd(__riscv_vadd(vec_src0, vec_src4, vl), __riscv_vadd(vec_src2, vec_src2, vl), vl),\n __riscv_vsll(__riscv_vadd(__riscv_vadd(vec_src1, vec_src2, vl), vec_src3, vl), 2, vl), vl), vl);\n vec_src0 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 - 2) * 3 + 1, 6 * sizeof(T), vl), vl);\n vec_src1 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 - 1) * 3 + 1, 6 * sizeof(T), vl), vl);\n vec_src2 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2) * 3 + 1, 6 * sizeof(T), vl), vl);\n vec_src3 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 + 1) * 3 + 1, 6 * sizeof(T), vl), vl);\n vec_src4 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 + 2) * 3 + 1, 6 * sizeof(T), vl), vl);\n __riscv_vsse32(row + x * 3 + 1, 3 * sizeof(WT), __riscv_vadd(__riscv_vadd(__riscv_vadd(vec_src0, vec_src4, vl), __riscv_vadd(vec_src2, vec_src2, vl), vl),\n __riscv_vsll(__riscv_vadd(__riscv_vadd(vec_src1, vec_src2, vl), vec_src3, vl), 2, vl), vl), vl);\n vec_src0 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 - 2) * 3 + 2, 6 * sizeof(T), vl), vl);\n vec_src1 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 - 1) * 3 + 2, 6 * sizeof(T), vl), vl);\n vec_src2 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2) * 3 + 2, 6 * sizeof(T), vl), vl);\n vec_src3 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 + 1) * 3 + 2, 6 * sizeof(T), vl), vl);\n vec_src4 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 + 2) * 3 + 2, 6 * sizeof(T), vl), vl);\n __riscv_vsse32(row + x * 3 + 2, 3 * sizeof(WT), __riscv_vadd(__riscv_vadd(__riscv_vadd(vec_src0, vec_src4, vl), __riscv_vadd(vec_src2, vec_src2, vl), vl),\n __riscv_vsll(__riscv_vadd(__riscv_vadd(vec_src1, vec_src2, vl), vec_src3, vl), 2, vl), vl), vl);\n }\n break;\n case 4:\n for( int x = start / 4; x < end / 4; x += vl )\n {\n vl = rvv::vsetvl_WT(end / 4 - x);\n auto vec_src0 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 - 2) * 4, 8 * sizeof(T), vl), vl);\n auto vec_src1 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 - 1) * 4, 8 * sizeof(T), vl), vl);\n auto vec_src2 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2) * 4, 8 * sizeof(T), vl), vl);\n auto vec_src3 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 + 1) * 4, 8 * sizeof(T), vl), vl);\n auto vec_src4 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 + 2) * 4, 8 * sizeof(T), vl), vl);\n __riscv_vsse32(row + x * 4, 4 * sizeof(WT), __riscv_vadd(__riscv_vadd(__riscv_vadd(vec_src0, vec_src4, vl), __riscv_vadd(vec_src2, vec_src2, vl), vl),\n __riscv_vsll(__riscv_vadd(__riscv_vadd(vec_src1, vec_src2, vl), vec_src3, vl), 2, vl), vl), vl);\n vec_src0 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 - 2) * 4 + 1, 8 * sizeof(T), vl), vl);\n vec_src1 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 - 1) * 4 + 1, 8 * sizeof(T), vl), vl);\n vec_src2 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2) * 4 + 1, 8 * sizeof(T), vl), vl);\n vec_src3 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 + 1) * 4 + 1, 8 * sizeof(T), vl), vl);\n vec_src4 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 + 2) * 4 + 1, 8 * sizeof(T), vl), vl);\n __riscv_vsse32(row + x * 4 + 1, 4 * sizeof(WT), __riscv_vadd(__riscv_vadd(__riscv_vadd(vec_src0, vec_src4, vl), __riscv_vadd(vec_src2, vec_src2, vl), vl),\n __riscv_vsll(__riscv_vadd(__riscv_vadd(vec_src1, vec_src2, vl), vec_src3, vl), 2, vl), vl), vl);\n vec_src0 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 - 2) * 4 + 2, 8 * sizeof(T), vl), vl);\n vec_src1 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 - 1) * 4 + 2, 8 * sizeof(T), vl), vl);\n vec_src2 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2) * 4 + 2, 8 * sizeof(T), vl), vl);\n vec_src3 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 + 1) * 4 + 2, 8 * sizeof(T), vl), vl);\n vec_src4 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 + 2) * 4 + 2, 8 * sizeof(T), vl), vl);\n __riscv_vsse32(row + x * 4 + 2, 4 * sizeof(WT), __riscv_vadd(__riscv_vadd(__riscv_vadd(vec_src0, vec_src4, vl), __riscv_vadd(vec_src2, vec_src2, vl), vl),\n __riscv_vsll(__riscv_vadd(__riscv_vadd(vec_src1, vec_src2, vl), vec_src3, vl), 2, vl), vl), vl);\n vec_src0 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 - 2) * 4 + 3, 8 * sizeof(T), vl), vl);\n vec_src1 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 - 1) * 4 + 3, 8 * sizeof(T), vl), vl);\n vec_src2 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2) * 4 + 3, 8 * sizeof(T), vl), vl);\n vec_src3 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 + 1) * 4 + 3, 8 * sizeof(T), vl), vl);\n vec_src4 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 + 2) * 4 + 3, 8 * sizeof(T), vl), vl);\n __riscv_vsse32(row + x * 4 + 3, 4 * sizeof(WT), __riscv_vadd(__riscv_vadd(__riscv_vadd(vec_src0, vec_src4, vl), __riscv_vadd(vec_src2, vec_src2, vl), vl),\n __riscv_vsll(__riscv_vadd(__riscv_vadd(vec_src1, vec_src2, vl), vec_src3, vl), 2, vl), vl), vl);\n }\n break;\n default:\n for( int x = start; x < end; x += vl )\n {\n vl = rvv::vsetvl_WT(end - x);\n auto vec_tabM = rvv::vle_M(tabM + x, vl);\n vec_tabM = __riscv_vmul(__riscv_vsub(vec_tabM, start * 2, vl), sizeof(T), vl);\n auto vec_src0 = rvv::vcvt_T_WT(rvv::vloxei_T(src, vec_tabM, vl), vl);\n vec_tabM = __riscv_vadd(vec_tabM, start * sizeof(T), vl);\n auto vec_src1 = rvv::vcvt_T_WT(rvv::vloxei_T(src, vec_tabM, vl), vl);\n vec_tabM = __riscv_vadd(vec_tabM, start * sizeof(T), vl);\n auto vec_src2 = rvv::vcvt_T_WT(rvv::vloxei_T(src, vec_tabM, vl), vl);\n vec_tabM = __riscv_vadd(vec_tabM, start * sizeof(T), vl);\n auto vec_src3 = rvv::vcvt_T_WT(rvv::vloxei_T(src, vec_tabM, vl), vl);\n vec_tabM = __riscv_vadd(vec_tabM, start * sizeof(T), vl);\n auto vec_src4 = rvv::vcvt_T_WT(rvv::vloxei_T(src, vec_tabM, vl), vl);\n __riscv_vse32(row + x, __riscv_vadd(__riscv_vadd(__riscv_vadd(vec_src0, vec_src4, vl), __riscv_vadd(vec_src2, vec_src2, vl), vl),\n __riscv_vsll(__riscv_vadd(__riscv_vadd(vec_src1, vec_src2, vl), vec_src3, vl), 2, vl), vl), vl);\n }\n }\n }\n};\ntemplate<> struct pyrDownVec0\n{\n void operator()(const float* src, float* row, const uint* tabM, int start, int end)\n {\n int vl;\n switch (start)\n {\n case 1:\n for( int x = start; x < end; x += vl )\n {\n vl = rvv::vsetvl_WT(end - x);\n auto vec_src0 = rvv::vlse_T(src + x * 2 - 2, 2 * sizeof(float), vl);\n auto vec_src1 = rvv::vlse_T(src + x * 2 - 1, 2 * sizeof(float), vl);\n auto vec_src2 = rvv::vlse_T(src + x * 2, 2 * sizeof(float), vl);\n auto vec_src3 = rvv::vlse_T(src + x * 2 + 1, 2 * sizeof(float), vl);\n auto vec_src4 = rvv::vlse_T(src + x * 2 + 2, 2 * sizeof(float), vl);\n __riscv_vse32(row + x, __riscv_vfmadd(vec_src2, 6, __riscv_vfmadd(__riscv_vfadd(vec_src1, vec_src3, vl), 4, __riscv_vfadd(vec_src0, vec_src4, vl), vl), vl), vl);\n }\n break;\n case 2:\n for( int x = start / 2; x < end / 2; x += vl )\n {\n vl = rvv::vsetvl_WT(end / 2 - x);\n auto vec_src0 = rvv::vlse_T(src + (x * 2 - 2) * 2, 4 * sizeof(float), vl);\n auto vec_src1 = rvv::vlse_T(src + (x * 2 - 1) * 2, 4 * sizeof(float), vl);\n auto vec_src2 = rvv::vlse_T(src + (x * 2) * 2, 4 * sizeof(float), vl);\n auto vec_src3 = rvv::vlse_T(src + (x * 2 + 1) * 2, 4 * sizeof(float), vl);\n auto vec_src4 = rvv::vlse_T(src + (x * 2 + 2) * 2, 4 * sizeof(float), vl);\n __riscv_vsse32(row + x * 2, 2 * sizeof(float), __riscv_vfmadd(vec_src2, 6, __riscv_vfmadd(__riscv_vfadd(vec_src1, vec_src3, vl), 4, __riscv_vfadd(vec_src0, vec_src4, vl), vl), vl), vl);\n vec_src0 = rvv::vlse_T(src + (x * 2 - 2) * 2 + 1, 4 * sizeof(float), vl);\n vec_src1 = rvv::vlse_T(src + (x * 2 - 1) * 2 + 1, 4 * sizeof(float), vl);\n vec_src2 = rvv::vlse_T(src + (x * 2) * 2 + 1, 4 * sizeof(float), vl);\n vec_src3 = rvv::vlse_T(src + (x * 2 + 1) * 2 + 1, 4 * sizeof(float), vl);\n vec_src4 = rvv::vlse_T(src + (x * 2 + 2) * 2 + 1, 4 * sizeof(float), vl);\n __riscv_vsse32(row + x * 2 + 1, 2 * sizeof(float), __riscv_vfmadd(vec_src2, 6, __riscv_vfmadd(__riscv_vfadd(vec_src1, vec_src3, vl), 4, __riscv_vfadd(vec_src0, vec_src4, vl), vl), vl), vl);\n }\n break;\n case 3:\n for( int x = start / 3; x < end / 3; x += vl )\n {\n vl = rvv::vsetvl_WT(end / 3 - x);\n auto vec_src0 = rvv::vlse_T(src + (x * 2 - 2) * 3, 6 * sizeof(float), vl);\n auto vec_src1 = rvv::vlse_T(src + (x * 2 - 1) * 3, 6 * sizeof(float), vl);\n auto vec_src2 = rvv::vlse_T(src + (x * 2) * 3, 6 * sizeof(float), vl);\n auto vec_src3 = rvv::vlse_T(src + (x * 2 + 1) * 3, 6 * sizeof(float), vl);\n auto vec_src4 = rvv::vlse_T(src + (x * 2 + 2) * 3, 6 * sizeof(float), vl);\n __riscv_vsse32(row + x * 3, 3 * sizeof(float), __riscv_vfmadd(vec_src2, 6, __riscv_vfmadd(__riscv_vfadd(vec_src1, vec_src3, vl), 4, __riscv_vfadd(vec_src0, vec_src4, vl), vl), vl), vl);\n vec_src0 = rvv::vlse_T(src + (x * 2 - 2) * 3 + 1, 6 * sizeof(float), vl);\n vec_src1 = rvv::vlse_T(src + (x * 2 - 1) * 3 + 1, 6 * sizeof(float), vl);\n vec_src2 = rvv::vlse_T(src + (x * 2) * 3 + 1, 6 * sizeof(float), vl);\n vec_src3 = rvv::vlse_T(src + (x * 2 + 1) * 3 + 1, 6 * sizeof(float), vl);\n vec_src4 = rvv::vlse_T(src + (x * 2 + 2) * 3 + 1, 6 * sizeof(float), vl);\n __riscv_vsse32(row + x * 3 + 1, 3 * sizeof(float), __riscv_vfmadd(vec_src2, 6, __riscv_vfmadd(__riscv_vfadd(vec_src1, vec_src3, vl), 4, __riscv_vfadd(vec_src0, vec_src4, vl), vl), vl), vl);\n vec_src0 = rvv::vlse_T(src + (x * 2 - 2) * 3 + 2, 6 * sizeof(float), vl);\n vec_src1 = rvv::vlse_T(src + (x * 2 - 1) * 3 + 2, 6 * sizeof(float), vl);\n vec_src2 = rvv::vlse_T(src + (x * 2) * 3 + 2, 6 * sizeof(float), vl);\n vec_src3 = rvv::vlse_T(src + (x * 2 + 1) * 3 + 2, 6 * sizeof(float), vl);\n vec_src4 = rvv::vlse_T(src + (x * 2 + 2) * 3 + 2, 6 * sizeof(float), vl);\n __riscv_vsse32(row + x * 3 + 2, 3 * sizeof(float), __riscv_vfmadd(vec_src2, 6, __riscv_vfmadd(__riscv_vfadd(vec_src1, vec_src3, vl), 4, __riscv_vfadd(vec_src0, vec_src4, vl), vl), vl), vl);\n }\n break;\n case 4:\n for( int x = start / 4; x < end / 4; x += vl )\n {\n vl = rvv::vsetvl_WT(end / 4 - x);\n auto vec_src0 = rvv::vlse_T(src + (x * 2 - 2) * 4, 8 * sizeof(float), vl);\n auto vec_src1 = rvv::vlse_T(src + (x * 2 - 1) * 4, 8 * sizeof(float), vl);\n auto vec_src2 = rvv::vlse_T(src + (x * 2) * 4, 8 * sizeof(float), vl);\n auto vec_src3 = rvv::vlse_T(src + (x * 2 + 1) * 4, 8 * sizeof(float), vl);\n auto vec_src4 = rvv::vlse_T(src + (x * 2 + 2) * 4, 8 * sizeof(float), vl);\n __riscv_vsse32(row + x * 4, 4 * sizeof(float), __riscv_vfmadd(vec_src2, 6, __riscv_vfmadd(__riscv_vfadd(vec_src1, vec_src3, vl), 4, __riscv_vfadd(vec_src0, vec_src4, vl), vl), vl), vl);\n vec_src0 = rvv::vlse_T(src + (x * 2 - 2) * 4 + 1, 8 * sizeof(float), vl);\n vec_src1 = rvv::vlse_T(src + (x * 2 - 1) * 4 + 1, 8 * sizeof(float), vl);\n vec_src2 = rvv::vlse_T(src + (x * 2) * 4 + 1, 8 * sizeof(float), vl);\n vec_src3 = rvv::vlse_T(src + (x * 2 + 1) * 4 + 1, 8 * sizeof(float), vl);\n vec_src4 = rvv::vlse_T(src + (x * 2 + 2) * 4 + 1, 8 * sizeof(float), vl);\n __riscv_vsse32(row + x * 4 + 1, 4 * sizeof(float), __riscv_vfmadd(vec_src2, 6, __riscv_vfmadd(__riscv_vfadd(vec_src1, vec_src3, vl), 4, __riscv_vfadd(vec_src0, vec_src4, vl), vl), vl), vl);\n vec_src0 = rvv::vlse_T(src + (x * 2 - 2) * 4 + 2, 8 * sizeof(float), vl);\n vec_src1 = rvv::vlse_T(src + (x * 2 - 1) * 4 + 2, 8 * sizeof(float), vl);\n vec_src2 = rvv::vlse_T(src + (x * 2) * 4 + 2, 8 * sizeof(float), vl);\n vec_src3 = rvv::vlse_T(src + (x * 2 + 1) * 4 + 2, 8 * sizeof(float), vl);\n vec_src4 = rvv::vlse_T(src + (x * 2 + 2) * 4 + 2, 8 * sizeof(float), vl);\n __riscv_vsse32(row + x * 4 + 2, 4 * sizeof(float), __riscv_vfmadd(vec_src2, 6, __riscv_vfmadd(__riscv_vfadd(vec_src1, vec_src3, vl), 4, __riscv_vfadd(vec_src0, vec_src4, vl), vl), vl), vl);\n vec_src0 = rvv::vlse_T(src + (x * 2 - 2) * 4 + 3, 8 * sizeof(float), vl);\n vec_src1 = rvv::vlse_T(src + (x * 2 - 1) * 4 + 3, 8 * sizeof(float), vl);\n vec_src2 = rvv::vlse_T(src + (x * 2) * 4 + 3, 8 * sizeof(float), vl);\n vec_src3 = rvv::vlse_T(src + (x * 2 + 1) * 4 + 3, 8 * sizeof(float), vl);\n vec_src4 = rvv::vlse_T(src + (x * 2 + 2) * 4 + 3, 8 * sizeof(float), vl);\n __riscv_vsse32(row + x * 4 + 3, 4 * sizeof(float), __riscv_vfmadd(vec_src2, 6, __riscv_vfmadd(__riscv_vfadd(vec_src1, vec_src3, vl), 4, __riscv_vfadd(vec_src0, vec_src4, vl), vl), vl), vl);\n }\n break;\n default:\n for( int x = start; x < end; x += vl )\n {\n vl = rvv::vsetvl_WT(end - x);\n auto vec_tabM = rvv::vle_M(tabM + x, vl);\n vec_tabM = __riscv_vmul(__riscv_vsub(vec_tabM, start * 2, vl), sizeof(float), vl);\n auto vec_src0 = rvv::vloxei_T(src, vec_tabM, vl);\n vec_tabM = __riscv_vadd(vec_tabM, start * sizeof(float), vl);\n auto vec_src1 = rvv::vloxei_T(src, vec_tabM, vl);\n vec_tabM = __riscv_vadd(vec_tabM, start * sizeof(float), vl);\n auto vec_src2 = rvv::vloxei_T(src, vec_tabM, vl);\n vec_tabM = __riscv_vadd(vec_tabM, start * sizeof(float), vl);\n auto vec_src3 = rvv::vloxei_T(src, vec_tabM, vl);\n vec_tabM = __riscv_vadd(vec_tabM, start * sizeof(float), vl);\n auto vec_src4 = rvv::vloxei_T(src, vec_tabM, vl);\n __riscv_vse32(row + x, __riscv_vfmadd(__riscv_vfadd(__riscv_vfadd(vec_src1, vec_src2, vl), vec_src3, vl), 4,\n __riscv_vfadd(__riscv_vfadd(vec_src0, vec_src4, vl), __riscv_vfadd(vec_src2, vec_src2, vl), vl), vl), vl);\n }\n }\n }\n};\n\ntemplate struct pyrDownVec1\n{\n void operator()(WT* row0, WT* row1, WT* row2, WT* row3, WT* row4, T* dst, int end)\n {\n int vl;\n for( int x = 0 ; x < end; x += vl )\n {\n vl = rvv::vsetvl_WT(end - x);\n auto vec_src0 = rvv::vle_WT(row0 + x, vl);\n auto vec_src1 = rvv::vle_WT(row1 + x, vl);\n auto vec_src2 = rvv::vle_WT(row2 + x, vl);\n auto vec_src3 = rvv::vle_WT(row3 + x, vl);\n auto vec_src4 = rvv::vle_WT(row4 + x, vl);\n rvv::vse_T(dst + x, rvv::vcvt_WT_T(__riscv_vadd(__riscv_vadd(__riscv_vadd(vec_src0, vec_src4, vl), __riscv_vadd(vec_src2, vec_src2, vl), vl),\n __riscv_vsll(__riscv_vadd(__riscv_vadd(vec_src1, vec_src2, vl), vec_src3, vl), 2, vl), vl), 8, vl), vl);\n }\n }\n};\ntemplate<> struct pyrDownVec1\n{\n void operator()(float* row0, float* row1, float* row2, float* row3, float* row4, float* dst, int end)\n {\n int vl;\n for( int x = 0 ; x < end; x += vl )\n {\n vl = rvv::vsetvl_WT(end - x);\n auto vec_src0 = rvv::vle_WT(row0 + x, vl);\n auto vec_src1 = rvv::vle_WT(row1 + x, vl);\n auto vec_src2 = rvv::vle_WT(row2 + x, vl);\n auto vec_src3 = rvv::vle_WT(row3 + x, vl);\n auto vec_src4 = rvv::vle_WT(row4 + x, vl);\n rvv::vse_T(dst + x, __riscv_vfmul(__riscv_vfmadd(vec_src2, 6, __riscv_vfmadd(__riscv_vfadd(vec_src1, vec_src3, vl), 4, __riscv_vfadd(vec_src0, vec_src4, vl), vl), vl), 1.f / 256.f, vl), vl);\n }\n }\n};\n\ntemplate struct pyrUpVec0\n{\n void operator()(const T* src, WT* row, const uint* dtab, int start, int end)\n {\n int vl;\n for( int x = start; x < end; x += vl )\n {\n vl = rvv::vsetvl_WT(end - x);\n auto vec_src0 = rvv::vcvt_T_WT(rvv::vle_T(src + x - start, vl), vl);\n auto vec_src1 = rvv::vcvt_T_WT(rvv::vle_T(src + x, vl), vl);\n auto vec_src2 = rvv::vcvt_T_WT(rvv::vle_T(src + x + start, vl), vl);\n\n auto vec_dtab = rvv::vle_M(dtab + x, vl);\n vec_dtab = __riscv_vmul(vec_dtab, sizeof(WT), vl);\n __riscv_vsoxei32(row, vec_dtab, __riscv_vadd(__riscv_vadd(vec_src0, vec_src2, vl), __riscv_vadd(__riscv_vsll(vec_src1, 2, vl), __riscv_vsll(vec_src1, 1, vl), vl), vl), vl);\n __riscv_vsoxei32(row, __riscv_vadd(vec_dtab, start * sizeof(WT), vl), __riscv_vsll(__riscv_vadd(vec_src1, vec_src2, vl), 2, vl), vl);\n }\n }\n};\ntemplate<> struct pyrUpVec0\n{\n void operator()(const float* src, float* row, const uint* dtab, int start, int end)\n {\n int vl;\n for( int x = start; x < end; x += vl )\n {\n vl = rvv::vsetvl_WT(end - x);\n auto vec_src0 = rvv::vle_T(src + x - start, vl);\n auto vec_src1 = rvv::vle_T(src + x, vl);\n auto vec_src2 = rvv::vle_T(src + x + start, vl);\n\n auto vec_dtab = rvv::vle_M(dtab + x, vl);\n vec_dtab = __riscv_vmul(vec_dtab, sizeof(float), vl);\n __riscv_vsoxei32(row, vec_dtab, __riscv_vfadd(__riscv_vfmadd(vec_src1, 6, vec_src0, vl), vec_src2, vl), vl);\n __riscv_vsoxei32(row, __riscv_vadd(vec_dtab, start * sizeof(float), vl), __riscv_vfmul(__riscv_vfadd(vec_src1, vec_src2, vl), 4, vl), vl);\n }\n }\n};\n\ntemplate struct pyrUpVec1\n{\n void operator()(WT* row0, WT* row1, WT* row2, T* dst0, T* dst1, int end)\n {\n int vl;\n if (dst0 != dst1)\n {\n for( int x = 0 ; x < end; x += vl )\n {\n vl = rvv::vsetvl_WT(end - x);\n auto vec_src0 = rvv::vle_WT(row0 + x, vl);\n auto vec_src1 = rvv::vle_WT(row1 + x, vl);\n auto vec_src2 = rvv::vle_WT(row2 + x, vl);\n rvv::vse_T(dst0 + x, rvv::vcvt_WT_T(__riscv_vadd(__riscv_vadd(vec_src0, vec_src2, vl), __riscv_vadd(__riscv_vsll(vec_src1, 2, vl), __riscv_vsll(vec_src1, 1, vl), vl), vl), 6, vl), vl);\n rvv::vse_T(dst1 + x, rvv::vcvt_WT_T(__riscv_vsll(__riscv_vadd(vec_src1, vec_src2, vl), 2, vl), 6, vl), vl);\n }\n }\n else\n {\n for( int x = 0 ; x < end; x += vl )\n {\n vl = rvv::vsetvl_WT(end - x);\n auto vec_src0 = rvv::vle_WT(row0 + x, vl);\n auto vec_src1 = rvv::vle_WT(row1 + x, vl);\n auto vec_src2 = rvv::vle_WT(row2 + x, vl);\n rvv::vse_T(dst0 + x, rvv::vcvt_WT_T(__riscv_vadd(__riscv_vadd(vec_src0, vec_src2, vl), __riscv_vadd(__riscv_vsll(vec_src1, 2, vl), __riscv_vsll(vec_src1, 1, vl), vl), vl), 6, vl), vl);\n }\n }\n }\n};\ntemplate<> struct pyrUpVec1\n{\n void operator()(float* row0, float* row1, float* row2, float* dst0, float* dst1, int end)\n {\n int vl;\n if (dst0 != dst1)\n {\n for( int x = 0 ; x < end; x += vl )\n {\n vl = rvv::vsetvl_WT(end - x);\n auto vec_src0 = rvv::vle_WT(row0 + x, vl);\n auto vec_src1 = rvv::vle_WT(row1 + x, vl);\n auto vec_src2 = rvv::vle_WT(row2 + x, vl);\n rvv::vse_T(dst0 + x, __riscv_vfmul(__riscv_vfadd(__riscv_vfmadd(vec_src1, 6, vec_src0, vl), vec_src2, vl), 1.f / 64.f, vl), vl);\n rvv::vse_T(dst1 + x, __riscv_vfmul(__riscv_vfadd(vec_src1, vec_src2, vl), 1.f / 16.f, vl), vl);\n }\n }\n else\n {\n for( int x = 0 ; x < end; x += vl )\n {\n vl = rvv::vsetvl_WT(end - x);\n auto vec_src0 = rvv::vle_WT(row0 + x, vl);\n auto vec_src1 = rvv::vle_WT(row1 + x, vl);\n auto vec_src2 = rvv::vle_WT(row2 + x, vl);\n rvv::vse_T(dst0 + x, __riscv_vfmul(__riscv_vfadd(__riscv_vfmadd(vec_src1, 6, vec_src0, vl), vec_src2, vl), 1.f / 64.f, vl), vl);\n }\n }\n }\n};\n\ntemplate\nstruct PyrDownInvoker : ParallelLoopBody\n{\n PyrDownInvoker(const uchar* _src_data, size_t _src_step, int _src_width, int _src_height, uchar* _dst_data, size_t _dst_step, int _dst_width, int _dst_height, int _cn, int _borderType, int* _tabR, int* _tabM, int* _tabL)\n {\n src_data = _src_data;\n src_step = _src_step;\n src_width = _src_width;\n src_height = _src_height;\n dst_data = _dst_data;\n dst_step = _dst_step;\n dst_width = _dst_width;\n dst_height = _dst_height;\n cn = _cn;\n borderType = _borderType;\n tabR = _tabR;\n tabM = _tabM;\n tabL = _tabL;\n }\n\n void operator()(const Range& range) const CV_OVERRIDE;\n\n const uchar* src_data;\n size_t src_step;\n int src_width;\n int src_height;\n uchar* dst_data;\n size_t dst_step;\n int dst_width;\n int dst_height;\n int cn;\n int borderType;\n int* tabR;\n int* tabM;\n int* tabL;\n};\n\n// the algorithm is copied from imgproc/src/pyramids.cpp,\n// in the function template void cv::pyrDown_\ntemplate\ninline int pyrDown(const uchar* src_data, size_t src_step, int src_width, int src_height, uchar* dst_data, size_t dst_step, int dst_width, int dst_height, int cn, int borderType)\n{\n const int PD_SZ = 5;\n\n std::vector _tabM(dst_width * cn), _tabL(cn * (PD_SZ + 2)), _tabR(cn * (PD_SZ + 2));\n int *tabM = _tabM.data(), *tabL = _tabL.data(), *tabR = _tabR.data();\n\n CV_Assert( src_width > 0 && src_height > 0 &&\n std::abs(dst_width*2 - src_width) <= 2 &&\n std::abs(dst_height*2 - src_height) <= 2 );\n int width0 = std::min((src_width-PD_SZ/2-1)/2 + 1, dst_width);\n\n for (int x = 0; x <= PD_SZ+1; x++)\n {\n int sx0 = borderInterpolate(x - PD_SZ/2, src_width, borderType)*cn;", - "repo_full_name": "opencv/opencv", - "discussion_comments": [ - { - "comment_id": "1975941881", - "repo_full_name": "opencv/opencv", - "pr_number": 26958, - "pr_file": "3rdparty/hal_rvv/hal_rvv_1p0/pyramids.hpp", - "discussion_id": "1975941881", - "commented_code": "@@ -0,0 +1,681 @@\n+// This file is part of OpenCV project.\n+// It is subject to the license terms in the LICENSE file found in the top-level directory\n+// of this distribution and at http://opencv.org/license.html.\n+#ifndef OPENCV_HAL_RVV_PYRAMIDS_HPP_INCLUDED\n+#define OPENCV_HAL_RVV_PYRAMIDS_HPP_INCLUDED\n+\n+#include \n+\n+namespace cv { namespace cv_hal_rvv { namespace pyramids {\n+\n+#undef cv_hal_pyrdown\n+#define cv_hal_pyrdown cv::cv_hal_rvv::pyramids::pyrDown\n+#undef cv_hal_pyrup\n+#define cv_hal_pyrup cv::cv_hal_rvv::pyramids::pyrUp\n+\n+template struct rvv;\n+\n+template<> struct rvv\n+{\n+ static inline size_t vsetvl_WT(size_t a) { return __riscv_vsetvl_e32m4(a); }\n+ static inline vuint8m1_t vle_T(const uchar* a, size_t b) { return __riscv_vle8_v_u8m1(a, b); }\n+ static inline vint32m4_t vle_WT(const int* a, size_t b) { return __riscv_vle32_v_i32m4(a, b); }\n+ static inline vuint32m4_t vle_M(const uint* a, size_t b) { return __riscv_vle32_v_u32m4(a, b); }\n+ static inline vuint8m1_t vlse_T(const uchar* a, ptrdiff_t b, size_t c) { return __riscv_vlse8_v_u8m1(a, b, c); }\n+ static inline vuint8m1_t vloxei_T(const uchar* a, vuint32m4_t b, size_t c) { return __riscv_vloxei32_v_u8m1(a, b, c); }\n+ static inline void vse_T(uchar* a, vuint8m1_t b, size_t c) { return __riscv_vse8(a, b, c); }\n+ static inline vint32m4_t vcvt_T_WT(vuint8m1_t a, size_t b) { return __riscv_vreinterpret_v_u32m4_i32m4(__riscv_vzext_vf4(a, b)); }\n+ static inline vuint8m1_t vcvt_WT_T(vint32m4_t a, int b, size_t c) { return __riscv_vncvt_x(__riscv_vncvt_x(__riscv_vreinterpret_v_i32m4_u32m4(__riscv_vsra(__riscv_vadd(a, 1 << (b - 1), c), b, c)), c), c); }\n+};\n+\n+template<> struct rvv\n+{\n+ static inline size_t vsetvl_WT(size_t a) { return __riscv_vsetvl_e32m4(a); }\n+ static inline vint16m2_t vle_T(const short* a, size_t b) { return __riscv_vle16_v_i16m2(a, b); }\n+ static inline vint32m4_t vle_WT(const int* a, size_t b) { return __riscv_vle32_v_i32m4(a, b); }\n+ static inline vuint32m4_t vle_M(const uint* a, size_t b) { return __riscv_vle32_v_u32m4(a, b); }\n+ static inline vint16m2_t vlse_T(const short* a, ptrdiff_t b, size_t c) { return __riscv_vlse16_v_i16m2(a, b, c); }\n+ static inline vint16m2_t vloxei_T(const short* a, vuint32m4_t b, size_t c) { return __riscv_vloxei32_v_i16m2(a, b, c); }\n+ static inline void vse_T(short* a, vint16m2_t b, size_t c) { return __riscv_vse16(a, b, c); }\n+ static inline vint32m4_t vcvt_T_WT(vint16m2_t a, size_t b) { return __riscv_vsext_vf2(a, b); }\n+ static inline vint16m2_t vcvt_WT_T(vint32m4_t a, int b, size_t c) { return __riscv_vncvt_x(__riscv_vsra(__riscv_vadd(a, 1 << (b - 1), c), b, c), c); }\n+};\n+\n+template<> struct rvv\n+{\n+ static inline size_t vsetvl_WT(size_t a) { return __riscv_vsetvl_e32m4(a); }\n+ static inline vfloat32m4_t vle_T(const float* a, size_t b) { return __riscv_vle32_v_f32m4(a, b); }\n+ static inline vfloat32m4_t vle_WT(const float* a, size_t b) { return __riscv_vle32_v_f32m4(a, b); }\n+ static inline vuint32m4_t vle_M(const uint* a, size_t b) { return __riscv_vle32_v_u32m4(a, b); }\n+ static inline vfloat32m4_t vlse_T(const float* a, ptrdiff_t b, size_t c) { return __riscv_vlse32_v_f32m4(a, b, c); }\n+ static inline vfloat32m4_t vloxei_T(const float* a, vuint32m4_t b, size_t c) { return __riscv_vloxei32_v_f32m4(a, b, c); }\n+ static inline void vse_T(float* a, vfloat32m4_t b, size_t c) { return __riscv_vse32(a, b, c); }\n+};\n+\n+template struct pyrDownVec0\n+{\n+ void operator()(const T* src, WT* row, const uint* tabM, int start, int end)\n+ {\n+ int vl;\n+ switch (start)\n+ {\n+ case 1:\n+ for( int x = start; x < end; x += vl )\n+ {\n+ vl = rvv::vsetvl_WT(end - x);\n+ auto vec_src0 = rvv::vcvt_T_WT(rvv::vlse_T(src + x * 2 - 2, 2 * sizeof(T), vl), vl);\n+ auto vec_src1 = rvv::vcvt_T_WT(rvv::vlse_T(src + x * 2 - 1, 2 * sizeof(T), vl), vl);\n+ auto vec_src2 = rvv::vcvt_T_WT(rvv::vlse_T(src + x * 2, 2 * sizeof(T), vl), vl);\n+ auto vec_src3 = rvv::vcvt_T_WT(rvv::vlse_T(src + x * 2 + 1, 2 * sizeof(T), vl), vl);\n+ auto vec_src4 = rvv::vcvt_T_WT(rvv::vlse_T(src + x * 2 + 2, 2 * sizeof(T), vl), vl);\n+ __riscv_vse32(row + x, __riscv_vadd(__riscv_vadd(__riscv_vadd(vec_src0, vec_src4, vl), __riscv_vadd(vec_src2, vec_src2, vl), vl),\n+ __riscv_vsll(__riscv_vadd(__riscv_vadd(vec_src1, vec_src2, vl), vec_src3, vl), 2, vl), vl), vl);\n+ }\n+ break;\n+ case 2:\n+ for( int x = start / 2; x < end / 2; x += vl )\n+ {\n+ vl = rvv::vsetvl_WT(end / 2 - x);\n+ auto vec_src0 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 - 2) * 2, 4 * sizeof(T), vl), vl);\n+ auto vec_src1 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 - 1) * 2, 4 * sizeof(T), vl), vl);\n+ auto vec_src2 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2) * 2, 4 * sizeof(T), vl), vl);\n+ auto vec_src3 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 + 1) * 2, 4 * sizeof(T), vl), vl);\n+ auto vec_src4 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 + 2) * 2, 4 * sizeof(T), vl), vl);\n+ __riscv_vsse32(row + x * 2, 2 * sizeof(WT), __riscv_vadd(__riscv_vadd(__riscv_vadd(vec_src0, vec_src4, vl), __riscv_vadd(vec_src2, vec_src2, vl), vl),\n+ __riscv_vsll(__riscv_vadd(__riscv_vadd(vec_src1, vec_src2, vl), vec_src3, vl), 2, vl), vl), vl);\n+ vec_src0 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 - 2) * 2 + 1, 4 * sizeof(T), vl), vl);\n+ vec_src1 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 - 1) * 2 + 1, 4 * sizeof(T), vl), vl);\n+ vec_src2 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2) * 2 + 1, 4 * sizeof(T), vl), vl);\n+ vec_src3 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 + 1) * 2 + 1, 4 * sizeof(T), vl), vl);\n+ vec_src4 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 + 2) * 2 + 1, 4 * sizeof(T), vl), vl);\n+ __riscv_vsse32(row + x * 2 + 1, 2 * sizeof(WT), __riscv_vadd(__riscv_vadd(__riscv_vadd(vec_src0, vec_src4, vl), __riscv_vadd(vec_src2, vec_src2, vl), vl),\n+ __riscv_vsll(__riscv_vadd(__riscv_vadd(vec_src1, vec_src2, vl), vec_src3, vl), 2, vl), vl), vl);\n+ }\n+ break;\n+ case 3:\n+ for( int x = start / 3; x < end / 3; x += vl )\n+ {\n+ vl = rvv::vsetvl_WT(end / 3 - x);\n+ auto vec_src0 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 - 2) * 3, 6 * sizeof(T), vl), vl);\n+ auto vec_src1 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 - 1) * 3, 6 * sizeof(T), vl), vl);\n+ auto vec_src2 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2) * 3, 6 * sizeof(T), vl), vl);\n+ auto vec_src3 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 + 1) * 3, 6 * sizeof(T), vl), vl);\n+ auto vec_src4 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 + 2) * 3, 6 * sizeof(T), vl), vl);\n+ __riscv_vsse32(row + x * 3, 3 * sizeof(WT), __riscv_vadd(__riscv_vadd(__riscv_vadd(vec_src0, vec_src4, vl), __riscv_vadd(vec_src2, vec_src2, vl), vl),\n+ __riscv_vsll(__riscv_vadd(__riscv_vadd(vec_src1, vec_src2, vl), vec_src3, vl), 2, vl), vl), vl);\n+ vec_src0 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 - 2) * 3 + 1, 6 * sizeof(T), vl), vl);\n+ vec_src1 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 - 1) * 3 + 1, 6 * sizeof(T), vl), vl);\n+ vec_src2 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2) * 3 + 1, 6 * sizeof(T), vl), vl);\n+ vec_src3 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 + 1) * 3 + 1, 6 * sizeof(T), vl), vl);\n+ vec_src4 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 + 2) * 3 + 1, 6 * sizeof(T), vl), vl);\n+ __riscv_vsse32(row + x * 3 + 1, 3 * sizeof(WT), __riscv_vadd(__riscv_vadd(__riscv_vadd(vec_src0, vec_src4, vl), __riscv_vadd(vec_src2, vec_src2, vl), vl),\n+ __riscv_vsll(__riscv_vadd(__riscv_vadd(vec_src1, vec_src2, vl), vec_src3, vl), 2, vl), vl), vl);\n+ vec_src0 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 - 2) * 3 + 2, 6 * sizeof(T), vl), vl);\n+ vec_src1 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 - 1) * 3 + 2, 6 * sizeof(T), vl), vl);\n+ vec_src2 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2) * 3 + 2, 6 * sizeof(T), vl), vl);\n+ vec_src3 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 + 1) * 3 + 2, 6 * sizeof(T), vl), vl);\n+ vec_src4 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 + 2) * 3 + 2, 6 * sizeof(T), vl), vl);\n+ __riscv_vsse32(row + x * 3 + 2, 3 * sizeof(WT), __riscv_vadd(__riscv_vadd(__riscv_vadd(vec_src0, vec_src4, vl), __riscv_vadd(vec_src2, vec_src2, vl), vl),\n+ __riscv_vsll(__riscv_vadd(__riscv_vadd(vec_src1, vec_src2, vl), vec_src3, vl), 2, vl), vl), vl);\n+ }\n+ break;\n+ case 4:\n+ for( int x = start / 4; x < end / 4; x += vl )\n+ {\n+ vl = rvv::vsetvl_WT(end / 4 - x);\n+ auto vec_src0 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 - 2) * 4, 8 * sizeof(T), vl), vl);\n+ auto vec_src1 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 - 1) * 4, 8 * sizeof(T), vl), vl);\n+ auto vec_src2 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2) * 4, 8 * sizeof(T), vl), vl);\n+ auto vec_src3 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 + 1) * 4, 8 * sizeof(T), vl), vl);\n+ auto vec_src4 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 + 2) * 4, 8 * sizeof(T), vl), vl);\n+ __riscv_vsse32(row + x * 4, 4 * sizeof(WT), __riscv_vadd(__riscv_vadd(__riscv_vadd(vec_src0, vec_src4, vl), __riscv_vadd(vec_src2, vec_src2, vl), vl),\n+ __riscv_vsll(__riscv_vadd(__riscv_vadd(vec_src1, vec_src2, vl), vec_src3, vl), 2, vl), vl), vl);\n+ vec_src0 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 - 2) * 4 + 1, 8 * sizeof(T), vl), vl);\n+ vec_src1 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 - 1) * 4 + 1, 8 * sizeof(T), vl), vl);\n+ vec_src2 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2) * 4 + 1, 8 * sizeof(T), vl), vl);\n+ vec_src3 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 + 1) * 4 + 1, 8 * sizeof(T), vl), vl);\n+ vec_src4 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 + 2) * 4 + 1, 8 * sizeof(T), vl), vl);\n+ __riscv_vsse32(row + x * 4 + 1, 4 * sizeof(WT), __riscv_vadd(__riscv_vadd(__riscv_vadd(vec_src0, vec_src4, vl), __riscv_vadd(vec_src2, vec_src2, vl), vl),\n+ __riscv_vsll(__riscv_vadd(__riscv_vadd(vec_src1, vec_src2, vl), vec_src3, vl), 2, vl), vl), vl);\n+ vec_src0 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 - 2) * 4 + 2, 8 * sizeof(T), vl), vl);\n+ vec_src1 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 - 1) * 4 + 2, 8 * sizeof(T), vl), vl);\n+ vec_src2 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2) * 4 + 2, 8 * sizeof(T), vl), vl);\n+ vec_src3 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 + 1) * 4 + 2, 8 * sizeof(T), vl), vl);\n+ vec_src4 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 + 2) * 4 + 2, 8 * sizeof(T), vl), vl);\n+ __riscv_vsse32(row + x * 4 + 2, 4 * sizeof(WT), __riscv_vadd(__riscv_vadd(__riscv_vadd(vec_src0, vec_src4, vl), __riscv_vadd(vec_src2, vec_src2, vl), vl),\n+ __riscv_vsll(__riscv_vadd(__riscv_vadd(vec_src1, vec_src2, vl), vec_src3, vl), 2, vl), vl), vl);\n+ vec_src0 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 - 2) * 4 + 3, 8 * sizeof(T), vl), vl);\n+ vec_src1 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 - 1) * 4 + 3, 8 * sizeof(T), vl), vl);\n+ vec_src2 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2) * 4 + 3, 8 * sizeof(T), vl), vl);\n+ vec_src3 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 + 1) * 4 + 3, 8 * sizeof(T), vl), vl);\n+ vec_src4 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 + 2) * 4 + 3, 8 * sizeof(T), vl), vl);\n+ __riscv_vsse32(row + x * 4 + 3, 4 * sizeof(WT), __riscv_vadd(__riscv_vadd(__riscv_vadd(vec_src0, vec_src4, vl), __riscv_vadd(vec_src2, vec_src2, vl), vl),\n+ __riscv_vsll(__riscv_vadd(__riscv_vadd(vec_src1, vec_src2, vl), vec_src3, vl), 2, vl), vl), vl);\n+ }\n+ break;\n+ default:\n+ for( int x = start; x < end; x += vl )\n+ {\n+ vl = rvv::vsetvl_WT(end - x);\n+ auto vec_tabM = rvv::vle_M(tabM + x, vl);\n+ vec_tabM = __riscv_vmul(__riscv_vsub(vec_tabM, start * 2, vl), sizeof(T), vl);\n+ auto vec_src0 = rvv::vcvt_T_WT(rvv::vloxei_T(src, vec_tabM, vl), vl);\n+ vec_tabM = __riscv_vadd(vec_tabM, start * sizeof(T), vl);\n+ auto vec_src1 = rvv::vcvt_T_WT(rvv::vloxei_T(src, vec_tabM, vl), vl);\n+ vec_tabM = __riscv_vadd(vec_tabM, start * sizeof(T), vl);\n+ auto vec_src2 = rvv::vcvt_T_WT(rvv::vloxei_T(src, vec_tabM, vl), vl);\n+ vec_tabM = __riscv_vadd(vec_tabM, start * sizeof(T), vl);\n+ auto vec_src3 = rvv::vcvt_T_WT(rvv::vloxei_T(src, vec_tabM, vl), vl);\n+ vec_tabM = __riscv_vadd(vec_tabM, start * sizeof(T), vl);\n+ auto vec_src4 = rvv::vcvt_T_WT(rvv::vloxei_T(src, vec_tabM, vl), vl);\n+ __riscv_vse32(row + x, __riscv_vadd(__riscv_vadd(__riscv_vadd(vec_src0, vec_src4, vl), __riscv_vadd(vec_src2, vec_src2, vl), vl),\n+ __riscv_vsll(__riscv_vadd(__riscv_vadd(vec_src1, vec_src2, vl), vec_src3, vl), 2, vl), vl), vl);\n+ }\n+ }\n+ }\n+};\n+template<> struct pyrDownVec0\n+{\n+ void operator()(const float* src, float* row, const uint* tabM, int start, int end)\n+ {\n+ int vl;\n+ switch (start)\n+ {\n+ case 1:\n+ for( int x = start; x < end; x += vl )\n+ {\n+ vl = rvv::vsetvl_WT(end - x);\n+ auto vec_src0 = rvv::vlse_T(src + x * 2 - 2, 2 * sizeof(float), vl);\n+ auto vec_src1 = rvv::vlse_T(src + x * 2 - 1, 2 * sizeof(float), vl);\n+ auto vec_src2 = rvv::vlse_T(src + x * 2, 2 * sizeof(float), vl);\n+ auto vec_src3 = rvv::vlse_T(src + x * 2 + 1, 2 * sizeof(float), vl);\n+ auto vec_src4 = rvv::vlse_T(src + x * 2 + 2, 2 * sizeof(float), vl);\n+ __riscv_vse32(row + x, __riscv_vfmadd(vec_src2, 6, __riscv_vfmadd(__riscv_vfadd(vec_src1, vec_src3, vl), 4, __riscv_vfadd(vec_src0, vec_src4, vl), vl), vl), vl);\n+ }\n+ break;\n+ case 2:\n+ for( int x = start / 2; x < end / 2; x += vl )\n+ {\n+ vl = rvv::vsetvl_WT(end / 2 - x);\n+ auto vec_src0 = rvv::vlse_T(src + (x * 2 - 2) * 2, 4 * sizeof(float), vl);\n+ auto vec_src1 = rvv::vlse_T(src + (x * 2 - 1) * 2, 4 * sizeof(float), vl);\n+ auto vec_src2 = rvv::vlse_T(src + (x * 2) * 2, 4 * sizeof(float), vl);\n+ auto vec_src3 = rvv::vlse_T(src + (x * 2 + 1) * 2, 4 * sizeof(float), vl);\n+ auto vec_src4 = rvv::vlse_T(src + (x * 2 + 2) * 2, 4 * sizeof(float), vl);\n+ __riscv_vsse32(row + x * 2, 2 * sizeof(float), __riscv_vfmadd(vec_src2, 6, __riscv_vfmadd(__riscv_vfadd(vec_src1, vec_src3, vl), 4, __riscv_vfadd(vec_src0, vec_src4, vl), vl), vl), vl);\n+ vec_src0 = rvv::vlse_T(src + (x * 2 - 2) * 2 + 1, 4 * sizeof(float), vl);\n+ vec_src1 = rvv::vlse_T(src + (x * 2 - 1) * 2 + 1, 4 * sizeof(float), vl);\n+ vec_src2 = rvv::vlse_T(src + (x * 2) * 2 + 1, 4 * sizeof(float), vl);\n+ vec_src3 = rvv::vlse_T(src + (x * 2 + 1) * 2 + 1, 4 * sizeof(float), vl);\n+ vec_src4 = rvv::vlse_T(src + (x * 2 + 2) * 2 + 1, 4 * sizeof(float), vl);\n+ __riscv_vsse32(row + x * 2 + 1, 2 * sizeof(float), __riscv_vfmadd(vec_src2, 6, __riscv_vfmadd(__riscv_vfadd(vec_src1, vec_src3, vl), 4, __riscv_vfadd(vec_src0, vec_src4, vl), vl), vl), vl);\n+ }\n+ break;\n+ case 3:\n+ for( int x = start / 3; x < end / 3; x += vl )\n+ {\n+ vl = rvv::vsetvl_WT(end / 3 - x);\n+ auto vec_src0 = rvv::vlse_T(src + (x * 2 - 2) * 3, 6 * sizeof(float), vl);\n+ auto vec_src1 = rvv::vlse_T(src + (x * 2 - 1) * 3, 6 * sizeof(float), vl);\n+ auto vec_src2 = rvv::vlse_T(src + (x * 2) * 3, 6 * sizeof(float), vl);\n+ auto vec_src3 = rvv::vlse_T(src + (x * 2 + 1) * 3, 6 * sizeof(float), vl);\n+ auto vec_src4 = rvv::vlse_T(src + (x * 2 + 2) * 3, 6 * sizeof(float), vl);\n+ __riscv_vsse32(row + x * 3, 3 * sizeof(float), __riscv_vfmadd(vec_src2, 6, __riscv_vfmadd(__riscv_vfadd(vec_src1, vec_src3, vl), 4, __riscv_vfadd(vec_src0, vec_src4, vl), vl), vl), vl);\n+ vec_src0 = rvv::vlse_T(src + (x * 2 - 2) * 3 + 1, 6 * sizeof(float), vl);\n+ vec_src1 = rvv::vlse_T(src + (x * 2 - 1) * 3 + 1, 6 * sizeof(float), vl);\n+ vec_src2 = rvv::vlse_T(src + (x * 2) * 3 + 1, 6 * sizeof(float), vl);\n+ vec_src3 = rvv::vlse_T(src + (x * 2 + 1) * 3 + 1, 6 * sizeof(float), vl);\n+ vec_src4 = rvv::vlse_T(src + (x * 2 + 2) * 3 + 1, 6 * sizeof(float), vl);\n+ __riscv_vsse32(row + x * 3 + 1, 3 * sizeof(float), __riscv_vfmadd(vec_src2, 6, __riscv_vfmadd(__riscv_vfadd(vec_src1, vec_src3, vl), 4, __riscv_vfadd(vec_src0, vec_src4, vl), vl), vl), vl);\n+ vec_src0 = rvv::vlse_T(src + (x * 2 - 2) * 3 + 2, 6 * sizeof(float), vl);\n+ vec_src1 = rvv::vlse_T(src + (x * 2 - 1) * 3 + 2, 6 * sizeof(float), vl);\n+ vec_src2 = rvv::vlse_T(src + (x * 2) * 3 + 2, 6 * sizeof(float), vl);\n+ vec_src3 = rvv::vlse_T(src + (x * 2 + 1) * 3 + 2, 6 * sizeof(float), vl);\n+ vec_src4 = rvv::vlse_T(src + (x * 2 + 2) * 3 + 2, 6 * sizeof(float), vl);\n+ __riscv_vsse32(row + x * 3 + 2, 3 * sizeof(float), __riscv_vfmadd(vec_src2, 6, __riscv_vfmadd(__riscv_vfadd(vec_src1, vec_src3, vl), 4, __riscv_vfadd(vec_src0, vec_src4, vl), vl), vl), vl);\n+ }\n+ break;\n+ case 4:\n+ for( int x = start / 4; x < end / 4; x += vl )\n+ {\n+ vl = rvv::vsetvl_WT(end / 4 - x);\n+ auto vec_src0 = rvv::vlse_T(src + (x * 2 - 2) * 4, 8 * sizeof(float), vl);\n+ auto vec_src1 = rvv::vlse_T(src + (x * 2 - 1) * 4, 8 * sizeof(float), vl);\n+ auto vec_src2 = rvv::vlse_T(src + (x * 2) * 4, 8 * sizeof(float), vl);\n+ auto vec_src3 = rvv::vlse_T(src + (x * 2 + 1) * 4, 8 * sizeof(float), vl);\n+ auto vec_src4 = rvv::vlse_T(src + (x * 2 + 2) * 4, 8 * sizeof(float), vl);\n+ __riscv_vsse32(row + x * 4, 4 * sizeof(float), __riscv_vfmadd(vec_src2, 6, __riscv_vfmadd(__riscv_vfadd(vec_src1, vec_src3, vl), 4, __riscv_vfadd(vec_src0, vec_src4, vl), vl), vl), vl);\n+ vec_src0 = rvv::vlse_T(src + (x * 2 - 2) * 4 + 1, 8 * sizeof(float), vl);\n+ vec_src1 = rvv::vlse_T(src + (x * 2 - 1) * 4 + 1, 8 * sizeof(float), vl);\n+ vec_src2 = rvv::vlse_T(src + (x * 2) * 4 + 1, 8 * sizeof(float), vl);\n+ vec_src3 = rvv::vlse_T(src + (x * 2 + 1) * 4 + 1, 8 * sizeof(float), vl);\n+ vec_src4 = rvv::vlse_T(src + (x * 2 + 2) * 4 + 1, 8 * sizeof(float), vl);\n+ __riscv_vsse32(row + x * 4 + 1, 4 * sizeof(float), __riscv_vfmadd(vec_src2, 6, __riscv_vfmadd(__riscv_vfadd(vec_src1, vec_src3, vl), 4, __riscv_vfadd(vec_src0, vec_src4, vl), vl), vl), vl);\n+ vec_src0 = rvv::vlse_T(src + (x * 2 - 2) * 4 + 2, 8 * sizeof(float), vl);\n+ vec_src1 = rvv::vlse_T(src + (x * 2 - 1) * 4 + 2, 8 * sizeof(float), vl);\n+ vec_src2 = rvv::vlse_T(src + (x * 2) * 4 + 2, 8 * sizeof(float), vl);\n+ vec_src3 = rvv::vlse_T(src + (x * 2 + 1) * 4 + 2, 8 * sizeof(float), vl);\n+ vec_src4 = rvv::vlse_T(src + (x * 2 + 2) * 4 + 2, 8 * sizeof(float), vl);\n+ __riscv_vsse32(row + x * 4 + 2, 4 * sizeof(float), __riscv_vfmadd(vec_src2, 6, __riscv_vfmadd(__riscv_vfadd(vec_src1, vec_src3, vl), 4, __riscv_vfadd(vec_src0, vec_src4, vl), vl), vl), vl);\n+ vec_src0 = rvv::vlse_T(src + (x * 2 - 2) * 4 + 3, 8 * sizeof(float), vl);\n+ vec_src1 = rvv::vlse_T(src + (x * 2 - 1) * 4 + 3, 8 * sizeof(float), vl);\n+ vec_src2 = rvv::vlse_T(src + (x * 2) * 4 + 3, 8 * sizeof(float), vl);\n+ vec_src3 = rvv::vlse_T(src + (x * 2 + 1) * 4 + 3, 8 * sizeof(float), vl);\n+ vec_src4 = rvv::vlse_T(src + (x * 2 + 2) * 4 + 3, 8 * sizeof(float), vl);\n+ __riscv_vsse32(row + x * 4 + 3, 4 * sizeof(float), __riscv_vfmadd(vec_src2, 6, __riscv_vfmadd(__riscv_vfadd(vec_src1, vec_src3, vl), 4, __riscv_vfadd(vec_src0, vec_src4, vl), vl), vl), vl);\n+ }\n+ break;\n+ default:\n+ for( int x = start; x < end; x += vl )\n+ {\n+ vl = rvv::vsetvl_WT(end - x);\n+ auto vec_tabM = rvv::vle_M(tabM + x, vl);\n+ vec_tabM = __riscv_vmul(__riscv_vsub(vec_tabM, start * 2, vl), sizeof(float), vl);\n+ auto vec_src0 = rvv::vloxei_T(src, vec_tabM, vl);\n+ vec_tabM = __riscv_vadd(vec_tabM, start * sizeof(float), vl);\n+ auto vec_src1 = rvv::vloxei_T(src, vec_tabM, vl);\n+ vec_tabM = __riscv_vadd(vec_tabM, start * sizeof(float), vl);\n+ auto vec_src2 = rvv::vloxei_T(src, vec_tabM, vl);\n+ vec_tabM = __riscv_vadd(vec_tabM, start * sizeof(float), vl);\n+ auto vec_src3 = rvv::vloxei_T(src, vec_tabM, vl);\n+ vec_tabM = __riscv_vadd(vec_tabM, start * sizeof(float), vl);\n+ auto vec_src4 = rvv::vloxei_T(src, vec_tabM, vl);\n+ __riscv_vse32(row + x, __riscv_vfmadd(__riscv_vfadd(__riscv_vfadd(vec_src1, vec_src2, vl), vec_src3, vl), 4,\n+ __riscv_vfadd(__riscv_vfadd(vec_src0, vec_src4, vl), __riscv_vfadd(vec_src2, vec_src2, vl), vl), vl), vl);\n+ }\n+ }\n+ }\n+};\n+\n+template struct pyrDownVec1\n+{\n+ void operator()(WT* row0, WT* row1, WT* row2, WT* row3, WT* row4, T* dst, int end)\n+ {\n+ int vl;\n+ for( int x = 0 ; x < end; x += vl )\n+ {\n+ vl = rvv::vsetvl_WT(end - x);\n+ auto vec_src0 = rvv::vle_WT(row0 + x, vl);\n+ auto vec_src1 = rvv::vle_WT(row1 + x, vl);\n+ auto vec_src2 = rvv::vle_WT(row2 + x, vl);\n+ auto vec_src3 = rvv::vle_WT(row3 + x, vl);\n+ auto vec_src4 = rvv::vle_WT(row4 + x, vl);\n+ rvv::vse_T(dst + x, rvv::vcvt_WT_T(__riscv_vadd(__riscv_vadd(__riscv_vadd(vec_src0, vec_src4, vl), __riscv_vadd(vec_src2, vec_src2, vl), vl),\n+ __riscv_vsll(__riscv_vadd(__riscv_vadd(vec_src1, vec_src2, vl), vec_src3, vl), 2, vl), vl), 8, vl), vl);\n+ }\n+ }\n+};\n+template<> struct pyrDownVec1\n+{\n+ void operator()(float* row0, float* row1, float* row2, float* row3, float* row4, float* dst, int end)\n+ {\n+ int vl;\n+ for( int x = 0 ; x < end; x += vl )\n+ {\n+ vl = rvv::vsetvl_WT(end - x);\n+ auto vec_src0 = rvv::vle_WT(row0 + x, vl);\n+ auto vec_src1 = rvv::vle_WT(row1 + x, vl);\n+ auto vec_src2 = rvv::vle_WT(row2 + x, vl);\n+ auto vec_src3 = rvv::vle_WT(row3 + x, vl);\n+ auto vec_src4 = rvv::vle_WT(row4 + x, vl);\n+ rvv::vse_T(dst + x, __riscv_vfmul(__riscv_vfmadd(vec_src2, 6, __riscv_vfmadd(__riscv_vfadd(vec_src1, vec_src3, vl), 4, __riscv_vfadd(vec_src0, vec_src4, vl), vl), vl), 1.f / 256.f, vl), vl);\n+ }\n+ }\n+};\n+\n+template struct pyrUpVec0\n+{\n+ void operator()(const T* src, WT* row, const uint* dtab, int start, int end)\n+ {\n+ int vl;\n+ for( int x = start; x < end; x += vl )\n+ {\n+ vl = rvv::vsetvl_WT(end - x);\n+ auto vec_src0 = rvv::vcvt_T_WT(rvv::vle_T(src + x - start, vl), vl);\n+ auto vec_src1 = rvv::vcvt_T_WT(rvv::vle_T(src + x, vl), vl);\n+ auto vec_src2 = rvv::vcvt_T_WT(rvv::vle_T(src + x + start, vl), vl);\n+\n+ auto vec_dtab = rvv::vle_M(dtab + x, vl);\n+ vec_dtab = __riscv_vmul(vec_dtab, sizeof(WT), vl);\n+ __riscv_vsoxei32(row, vec_dtab, __riscv_vadd(__riscv_vadd(vec_src0, vec_src2, vl), __riscv_vadd(__riscv_vsll(vec_src1, 2, vl), __riscv_vsll(vec_src1, 1, vl), vl), vl), vl);\n+ __riscv_vsoxei32(row, __riscv_vadd(vec_dtab, start * sizeof(WT), vl), __riscv_vsll(__riscv_vadd(vec_src1, vec_src2, vl), 2, vl), vl);\n+ }\n+ }\n+};\n+template<> struct pyrUpVec0\n+{\n+ void operator()(const float* src, float* row, const uint* dtab, int start, int end)\n+ {\n+ int vl;\n+ for( int x = start; x < end; x += vl )\n+ {\n+ vl = rvv::vsetvl_WT(end - x);\n+ auto vec_src0 = rvv::vle_T(src + x - start, vl);\n+ auto vec_src1 = rvv::vle_T(src + x, vl);\n+ auto vec_src2 = rvv::vle_T(src + x + start, vl);\n+\n+ auto vec_dtab = rvv::vle_M(dtab + x, vl);\n+ vec_dtab = __riscv_vmul(vec_dtab, sizeof(float), vl);\n+ __riscv_vsoxei32(row, vec_dtab, __riscv_vfadd(__riscv_vfmadd(vec_src1, 6, vec_src0, vl), vec_src2, vl), vl);\n+ __riscv_vsoxei32(row, __riscv_vadd(vec_dtab, start * sizeof(float), vl), __riscv_vfmul(__riscv_vfadd(vec_src1, vec_src2, vl), 4, vl), vl);\n+ }\n+ }\n+};\n+\n+template struct pyrUpVec1\n+{\n+ void operator()(WT* row0, WT* row1, WT* row2, T* dst0, T* dst1, int end)\n+ {\n+ int vl;\n+ if (dst0 != dst1)\n+ {\n+ for( int x = 0 ; x < end; x += vl )\n+ {\n+ vl = rvv::vsetvl_WT(end - x);\n+ auto vec_src0 = rvv::vle_WT(row0 + x, vl);\n+ auto vec_src1 = rvv::vle_WT(row1 + x, vl);\n+ auto vec_src2 = rvv::vle_WT(row2 + x, vl);\n+ rvv::vse_T(dst0 + x, rvv::vcvt_WT_T(__riscv_vadd(__riscv_vadd(vec_src0, vec_src2, vl), __riscv_vadd(__riscv_vsll(vec_src1, 2, vl), __riscv_vsll(vec_src1, 1, vl), vl), vl), 6, vl), vl);\n+ rvv::vse_T(dst1 + x, rvv::vcvt_WT_T(__riscv_vsll(__riscv_vadd(vec_src1, vec_src2, vl), 2, vl), 6, vl), vl);\n+ }\n+ }\n+ else\n+ {\n+ for( int x = 0 ; x < end; x += vl )\n+ {\n+ vl = rvv::vsetvl_WT(end - x);\n+ auto vec_src0 = rvv::vle_WT(row0 + x, vl);\n+ auto vec_src1 = rvv::vle_WT(row1 + x, vl);\n+ auto vec_src2 = rvv::vle_WT(row2 + x, vl);\n+ rvv::vse_T(dst0 + x, rvv::vcvt_WT_T(__riscv_vadd(__riscv_vadd(vec_src0, vec_src2, vl), __riscv_vadd(__riscv_vsll(vec_src1, 2, vl), __riscv_vsll(vec_src1, 1, vl), vl), vl), 6, vl), vl);\n+ }\n+ }\n+ }\n+};\n+template<> struct pyrUpVec1\n+{\n+ void operator()(float* row0, float* row1, float* row2, float* dst0, float* dst1, int end)\n+ {\n+ int vl;\n+ if (dst0 != dst1)\n+ {\n+ for( int x = 0 ; x < end; x += vl )\n+ {\n+ vl = rvv::vsetvl_WT(end - x);\n+ auto vec_src0 = rvv::vle_WT(row0 + x, vl);\n+ auto vec_src1 = rvv::vle_WT(row1 + x, vl);\n+ auto vec_src2 = rvv::vle_WT(row2 + x, vl);\n+ rvv::vse_T(dst0 + x, __riscv_vfmul(__riscv_vfadd(__riscv_vfmadd(vec_src1, 6, vec_src0, vl), vec_src2, vl), 1.f / 64.f, vl), vl);\n+ rvv::vse_T(dst1 + x, __riscv_vfmul(__riscv_vfadd(vec_src1, vec_src2, vl), 1.f / 16.f, vl), vl);\n+ }\n+ }\n+ else\n+ {\n+ for( int x = 0 ; x < end; x += vl )\n+ {\n+ vl = rvv::vsetvl_WT(end - x);\n+ auto vec_src0 = rvv::vle_WT(row0 + x, vl);\n+ auto vec_src1 = rvv::vle_WT(row1 + x, vl);\n+ auto vec_src2 = rvv::vle_WT(row2 + x, vl);\n+ rvv::vse_T(dst0 + x, __riscv_vfmul(__riscv_vfadd(__riscv_vfmadd(vec_src1, 6, vec_src0, vl), vec_src2, vl), 1.f / 64.f, vl), vl);\n+ }\n+ }\n+ }\n+};\n+\n+template\n+struct PyrDownInvoker : ParallelLoopBody\n+{\n+ PyrDownInvoker(const uchar* _src_data, size_t _src_step, int _src_width, int _src_height, uchar* _dst_data, size_t _dst_step, int _dst_width, int _dst_height, int _cn, int _borderType, int* _tabR, int* _tabM, int* _tabL)\n+ {\n+ src_data = _src_data;\n+ src_step = _src_step;\n+ src_width = _src_width;\n+ src_height = _src_height;\n+ dst_data = _dst_data;\n+ dst_step = _dst_step;\n+ dst_width = _dst_width;\n+ dst_height = _dst_height;\n+ cn = _cn;\n+ borderType = _borderType;\n+ tabR = _tabR;\n+ tabM = _tabM;\n+ tabL = _tabL;\n+ }\n+\n+ void operator()(const Range& range) const CV_OVERRIDE;\n+\n+ const uchar* src_data;\n+ size_t src_step;\n+ int src_width;\n+ int src_height;\n+ uchar* dst_data;\n+ size_t dst_step;\n+ int dst_width;\n+ int dst_height;\n+ int cn;\n+ int borderType;\n+ int* tabR;\n+ int* tabM;\n+ int* tabL;\n+};\n+\n+// the algorithm is copied from imgproc/src/pyramids.cpp,\n+// in the function template void cv::pyrDown_\n+template\n+inline int pyrDown(const uchar* src_data, size_t src_step, int src_width, int src_height, uchar* dst_data, size_t dst_step, int dst_width, int dst_height, int cn, int borderType)\n+{\n+ const int PD_SZ = 5;\n+\n+ std::vector _tabM(dst_width * cn), _tabL(cn * (PD_SZ + 2)), _tabR(cn * (PD_SZ + 2));\n+ int *tabM = _tabM.data(), *tabL = _tabL.data(), *tabR = _tabR.data();\n+\n+ CV_Assert( src_width > 0 && src_height > 0 &&\n+ std::abs(dst_width*2 - src_width) <= 2 &&\n+ std::abs(dst_height*2 - src_height) <= 2 );\n+ int width0 = std::min((src_width-PD_SZ/2-1)/2 + 1, dst_width);\n+\n+ for (int x = 0; x <= PD_SZ+1; x++)\n+ {\n+ int sx0 = borderInterpolate(x - PD_SZ/2, src_width, borderType)*cn;", - "comment_created_at": "2025-02-28T19:53:20+00:00", - "comment_author": "mshabunin", - "comment_body": "Not sure what to do with `borderInterpolate`. Perhaps it should be implemented here as well (maybe in limited version only for acceptable border modes).", - "pr_file_module": null - }, - { - "comment_id": "1976248637", - "repo_full_name": "opencv/opencv", - "pr_number": 26958, - "pr_file": "3rdparty/hal_rvv/hal_rvv_1p0/pyramids.hpp", - "discussion_id": "1975941881", - "commented_code": "@@ -0,0 +1,681 @@\n+// This file is part of OpenCV project.\n+// It is subject to the license terms in the LICENSE file found in the top-level directory\n+// of this distribution and at http://opencv.org/license.html.\n+#ifndef OPENCV_HAL_RVV_PYRAMIDS_HPP_INCLUDED\n+#define OPENCV_HAL_RVV_PYRAMIDS_HPP_INCLUDED\n+\n+#include \n+\n+namespace cv { namespace cv_hal_rvv { namespace pyramids {\n+\n+#undef cv_hal_pyrdown\n+#define cv_hal_pyrdown cv::cv_hal_rvv::pyramids::pyrDown\n+#undef cv_hal_pyrup\n+#define cv_hal_pyrup cv::cv_hal_rvv::pyramids::pyrUp\n+\n+template struct rvv;\n+\n+template<> struct rvv\n+{\n+ static inline size_t vsetvl_WT(size_t a) { return __riscv_vsetvl_e32m4(a); }\n+ static inline vuint8m1_t vle_T(const uchar* a, size_t b) { return __riscv_vle8_v_u8m1(a, b); }\n+ static inline vint32m4_t vle_WT(const int* a, size_t b) { return __riscv_vle32_v_i32m4(a, b); }\n+ static inline vuint32m4_t vle_M(const uint* a, size_t b) { return __riscv_vle32_v_u32m4(a, b); }\n+ static inline vuint8m1_t vlse_T(const uchar* a, ptrdiff_t b, size_t c) { return __riscv_vlse8_v_u8m1(a, b, c); }\n+ static inline vuint8m1_t vloxei_T(const uchar* a, vuint32m4_t b, size_t c) { return __riscv_vloxei32_v_u8m1(a, b, c); }\n+ static inline void vse_T(uchar* a, vuint8m1_t b, size_t c) { return __riscv_vse8(a, b, c); }\n+ static inline vint32m4_t vcvt_T_WT(vuint8m1_t a, size_t b) { return __riscv_vreinterpret_v_u32m4_i32m4(__riscv_vzext_vf4(a, b)); }\n+ static inline vuint8m1_t vcvt_WT_T(vint32m4_t a, int b, size_t c) { return __riscv_vncvt_x(__riscv_vncvt_x(__riscv_vreinterpret_v_i32m4_u32m4(__riscv_vsra(__riscv_vadd(a, 1 << (b - 1), c), b, c)), c), c); }\n+};\n+\n+template<> struct rvv\n+{\n+ static inline size_t vsetvl_WT(size_t a) { return __riscv_vsetvl_e32m4(a); }\n+ static inline vint16m2_t vle_T(const short* a, size_t b) { return __riscv_vle16_v_i16m2(a, b); }\n+ static inline vint32m4_t vle_WT(const int* a, size_t b) { return __riscv_vle32_v_i32m4(a, b); }\n+ static inline vuint32m4_t vle_M(const uint* a, size_t b) { return __riscv_vle32_v_u32m4(a, b); }\n+ static inline vint16m2_t vlse_T(const short* a, ptrdiff_t b, size_t c) { return __riscv_vlse16_v_i16m2(a, b, c); }\n+ static inline vint16m2_t vloxei_T(const short* a, vuint32m4_t b, size_t c) { return __riscv_vloxei32_v_i16m2(a, b, c); }\n+ static inline void vse_T(short* a, vint16m2_t b, size_t c) { return __riscv_vse16(a, b, c); }\n+ static inline vint32m4_t vcvt_T_WT(vint16m2_t a, size_t b) { return __riscv_vsext_vf2(a, b); }\n+ static inline vint16m2_t vcvt_WT_T(vint32m4_t a, int b, size_t c) { return __riscv_vncvt_x(__riscv_vsra(__riscv_vadd(a, 1 << (b - 1), c), b, c), c); }\n+};\n+\n+template<> struct rvv\n+{\n+ static inline size_t vsetvl_WT(size_t a) { return __riscv_vsetvl_e32m4(a); }\n+ static inline vfloat32m4_t vle_T(const float* a, size_t b) { return __riscv_vle32_v_f32m4(a, b); }\n+ static inline vfloat32m4_t vle_WT(const float* a, size_t b) { return __riscv_vle32_v_f32m4(a, b); }\n+ static inline vuint32m4_t vle_M(const uint* a, size_t b) { return __riscv_vle32_v_u32m4(a, b); }\n+ static inline vfloat32m4_t vlse_T(const float* a, ptrdiff_t b, size_t c) { return __riscv_vlse32_v_f32m4(a, b, c); }\n+ static inline vfloat32m4_t vloxei_T(const float* a, vuint32m4_t b, size_t c) { return __riscv_vloxei32_v_f32m4(a, b, c); }\n+ static inline void vse_T(float* a, vfloat32m4_t b, size_t c) { return __riscv_vse32(a, b, c); }\n+};\n+\n+template struct pyrDownVec0\n+{\n+ void operator()(const T* src, WT* row, const uint* tabM, int start, int end)\n+ {\n+ int vl;\n+ switch (start)\n+ {\n+ case 1:\n+ for( int x = start; x < end; x += vl )\n+ {\n+ vl = rvv::vsetvl_WT(end - x);\n+ auto vec_src0 = rvv::vcvt_T_WT(rvv::vlse_T(src + x * 2 - 2, 2 * sizeof(T), vl), vl);\n+ auto vec_src1 = rvv::vcvt_T_WT(rvv::vlse_T(src + x * 2 - 1, 2 * sizeof(T), vl), vl);\n+ auto vec_src2 = rvv::vcvt_T_WT(rvv::vlse_T(src + x * 2, 2 * sizeof(T), vl), vl);\n+ auto vec_src3 = rvv::vcvt_T_WT(rvv::vlse_T(src + x * 2 + 1, 2 * sizeof(T), vl), vl);\n+ auto vec_src4 = rvv::vcvt_T_WT(rvv::vlse_T(src + x * 2 + 2, 2 * sizeof(T), vl), vl);\n+ __riscv_vse32(row + x, __riscv_vadd(__riscv_vadd(__riscv_vadd(vec_src0, vec_src4, vl), __riscv_vadd(vec_src2, vec_src2, vl), vl),\n+ __riscv_vsll(__riscv_vadd(__riscv_vadd(vec_src1, vec_src2, vl), vec_src3, vl), 2, vl), vl), vl);\n+ }\n+ break;\n+ case 2:\n+ for( int x = start / 2; x < end / 2; x += vl )\n+ {\n+ vl = rvv::vsetvl_WT(end / 2 - x);\n+ auto vec_src0 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 - 2) * 2, 4 * sizeof(T), vl), vl);\n+ auto vec_src1 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 - 1) * 2, 4 * sizeof(T), vl), vl);\n+ auto vec_src2 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2) * 2, 4 * sizeof(T), vl), vl);\n+ auto vec_src3 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 + 1) * 2, 4 * sizeof(T), vl), vl);\n+ auto vec_src4 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 + 2) * 2, 4 * sizeof(T), vl), vl);\n+ __riscv_vsse32(row + x * 2, 2 * sizeof(WT), __riscv_vadd(__riscv_vadd(__riscv_vadd(vec_src0, vec_src4, vl), __riscv_vadd(vec_src2, vec_src2, vl), vl),\n+ __riscv_vsll(__riscv_vadd(__riscv_vadd(vec_src1, vec_src2, vl), vec_src3, vl), 2, vl), vl), vl);\n+ vec_src0 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 - 2) * 2 + 1, 4 * sizeof(T), vl), vl);\n+ vec_src1 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 - 1) * 2 + 1, 4 * sizeof(T), vl), vl);\n+ vec_src2 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2) * 2 + 1, 4 * sizeof(T), vl), vl);\n+ vec_src3 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 + 1) * 2 + 1, 4 * sizeof(T), vl), vl);\n+ vec_src4 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 + 2) * 2 + 1, 4 * sizeof(T), vl), vl);\n+ __riscv_vsse32(row + x * 2 + 1, 2 * sizeof(WT), __riscv_vadd(__riscv_vadd(__riscv_vadd(vec_src0, vec_src4, vl), __riscv_vadd(vec_src2, vec_src2, vl), vl),\n+ __riscv_vsll(__riscv_vadd(__riscv_vadd(vec_src1, vec_src2, vl), vec_src3, vl), 2, vl), vl), vl);\n+ }\n+ break;\n+ case 3:\n+ for( int x = start / 3; x < end / 3; x += vl )\n+ {\n+ vl = rvv::vsetvl_WT(end / 3 - x);\n+ auto vec_src0 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 - 2) * 3, 6 * sizeof(T), vl), vl);\n+ auto vec_src1 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 - 1) * 3, 6 * sizeof(T), vl), vl);\n+ auto vec_src2 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2) * 3, 6 * sizeof(T), vl), vl);\n+ auto vec_src3 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 + 1) * 3, 6 * sizeof(T), vl), vl);\n+ auto vec_src4 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 + 2) * 3, 6 * sizeof(T), vl), vl);\n+ __riscv_vsse32(row + x * 3, 3 * sizeof(WT), __riscv_vadd(__riscv_vadd(__riscv_vadd(vec_src0, vec_src4, vl), __riscv_vadd(vec_src2, vec_src2, vl), vl),\n+ __riscv_vsll(__riscv_vadd(__riscv_vadd(vec_src1, vec_src2, vl), vec_src3, vl), 2, vl), vl), vl);\n+ vec_src0 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 - 2) * 3 + 1, 6 * sizeof(T), vl), vl);\n+ vec_src1 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 - 1) * 3 + 1, 6 * sizeof(T), vl), vl);\n+ vec_src2 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2) * 3 + 1, 6 * sizeof(T), vl), vl);\n+ vec_src3 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 + 1) * 3 + 1, 6 * sizeof(T), vl), vl);\n+ vec_src4 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 + 2) * 3 + 1, 6 * sizeof(T), vl), vl);\n+ __riscv_vsse32(row + x * 3 + 1, 3 * sizeof(WT), __riscv_vadd(__riscv_vadd(__riscv_vadd(vec_src0, vec_src4, vl), __riscv_vadd(vec_src2, vec_src2, vl), vl),\n+ __riscv_vsll(__riscv_vadd(__riscv_vadd(vec_src1, vec_src2, vl), vec_src3, vl), 2, vl), vl), vl);\n+ vec_src0 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 - 2) * 3 + 2, 6 * sizeof(T), vl), vl);\n+ vec_src1 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 - 1) * 3 + 2, 6 * sizeof(T), vl), vl);\n+ vec_src2 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2) * 3 + 2, 6 * sizeof(T), vl), vl);\n+ vec_src3 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 + 1) * 3 + 2, 6 * sizeof(T), vl), vl);\n+ vec_src4 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 + 2) * 3 + 2, 6 * sizeof(T), vl), vl);\n+ __riscv_vsse32(row + x * 3 + 2, 3 * sizeof(WT), __riscv_vadd(__riscv_vadd(__riscv_vadd(vec_src0, vec_src4, vl), __riscv_vadd(vec_src2, vec_src2, vl), vl),\n+ __riscv_vsll(__riscv_vadd(__riscv_vadd(vec_src1, vec_src2, vl), vec_src3, vl), 2, vl), vl), vl);\n+ }\n+ break;\n+ case 4:\n+ for( int x = start / 4; x < end / 4; x += vl )\n+ {\n+ vl = rvv::vsetvl_WT(end / 4 - x);\n+ auto vec_src0 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 - 2) * 4, 8 * sizeof(T), vl), vl);\n+ auto vec_src1 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 - 1) * 4, 8 * sizeof(T), vl), vl);\n+ auto vec_src2 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2) * 4, 8 * sizeof(T), vl), vl);\n+ auto vec_src3 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 + 1) * 4, 8 * sizeof(T), vl), vl);\n+ auto vec_src4 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 + 2) * 4, 8 * sizeof(T), vl), vl);\n+ __riscv_vsse32(row + x * 4, 4 * sizeof(WT), __riscv_vadd(__riscv_vadd(__riscv_vadd(vec_src0, vec_src4, vl), __riscv_vadd(vec_src2, vec_src2, vl), vl),\n+ __riscv_vsll(__riscv_vadd(__riscv_vadd(vec_src1, vec_src2, vl), vec_src3, vl), 2, vl), vl), vl);\n+ vec_src0 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 - 2) * 4 + 1, 8 * sizeof(T), vl), vl);\n+ vec_src1 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 - 1) * 4 + 1, 8 * sizeof(T), vl), vl);\n+ vec_src2 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2) * 4 + 1, 8 * sizeof(T), vl), vl);\n+ vec_src3 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 + 1) * 4 + 1, 8 * sizeof(T), vl), vl);\n+ vec_src4 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 + 2) * 4 + 1, 8 * sizeof(T), vl), vl);\n+ __riscv_vsse32(row + x * 4 + 1, 4 * sizeof(WT), __riscv_vadd(__riscv_vadd(__riscv_vadd(vec_src0, vec_src4, vl), __riscv_vadd(vec_src2, vec_src2, vl), vl),\n+ __riscv_vsll(__riscv_vadd(__riscv_vadd(vec_src1, vec_src2, vl), vec_src3, vl), 2, vl), vl), vl);\n+ vec_src0 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 - 2) * 4 + 2, 8 * sizeof(T), vl), vl);\n+ vec_src1 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 - 1) * 4 + 2, 8 * sizeof(T), vl), vl);\n+ vec_src2 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2) * 4 + 2, 8 * sizeof(T), vl), vl);\n+ vec_src3 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 + 1) * 4 + 2, 8 * sizeof(T), vl), vl);\n+ vec_src4 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 + 2) * 4 + 2, 8 * sizeof(T), vl), vl);\n+ __riscv_vsse32(row + x * 4 + 2, 4 * sizeof(WT), __riscv_vadd(__riscv_vadd(__riscv_vadd(vec_src0, vec_src4, vl), __riscv_vadd(vec_src2, vec_src2, vl), vl),\n+ __riscv_vsll(__riscv_vadd(__riscv_vadd(vec_src1, vec_src2, vl), vec_src3, vl), 2, vl), vl), vl);\n+ vec_src0 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 - 2) * 4 + 3, 8 * sizeof(T), vl), vl);\n+ vec_src1 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 - 1) * 4 + 3, 8 * sizeof(T), vl), vl);\n+ vec_src2 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2) * 4 + 3, 8 * sizeof(T), vl), vl);\n+ vec_src3 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 + 1) * 4 + 3, 8 * sizeof(T), vl), vl);\n+ vec_src4 = rvv::vcvt_T_WT(rvv::vlse_T(src + (x * 2 + 2) * 4 + 3, 8 * sizeof(T), vl), vl);\n+ __riscv_vsse32(row + x * 4 + 3, 4 * sizeof(WT), __riscv_vadd(__riscv_vadd(__riscv_vadd(vec_src0, vec_src4, vl), __riscv_vadd(vec_src2, vec_src2, vl), vl),\n+ __riscv_vsll(__riscv_vadd(__riscv_vadd(vec_src1, vec_src2, vl), vec_src3, vl), 2, vl), vl), vl);\n+ }\n+ break;\n+ default:\n+ for( int x = start; x < end; x += vl )\n+ {\n+ vl = rvv::vsetvl_WT(end - x);\n+ auto vec_tabM = rvv::vle_M(tabM + x, vl);\n+ vec_tabM = __riscv_vmul(__riscv_vsub(vec_tabM, start * 2, vl), sizeof(T), vl);\n+ auto vec_src0 = rvv::vcvt_T_WT(rvv::vloxei_T(src, vec_tabM, vl), vl);\n+ vec_tabM = __riscv_vadd(vec_tabM, start * sizeof(T), vl);\n+ auto vec_src1 = rvv::vcvt_T_WT(rvv::vloxei_T(src, vec_tabM, vl), vl);\n+ vec_tabM = __riscv_vadd(vec_tabM, start * sizeof(T), vl);\n+ auto vec_src2 = rvv::vcvt_T_WT(rvv::vloxei_T(src, vec_tabM, vl), vl);\n+ vec_tabM = __riscv_vadd(vec_tabM, start * sizeof(T), vl);\n+ auto vec_src3 = rvv::vcvt_T_WT(rvv::vloxei_T(src, vec_tabM, vl), vl);\n+ vec_tabM = __riscv_vadd(vec_tabM, start * sizeof(T), vl);\n+ auto vec_src4 = rvv::vcvt_T_WT(rvv::vloxei_T(src, vec_tabM, vl), vl);\n+ __riscv_vse32(row + x, __riscv_vadd(__riscv_vadd(__riscv_vadd(vec_src0, vec_src4, vl), __riscv_vadd(vec_src2, vec_src2, vl), vl),\n+ __riscv_vsll(__riscv_vadd(__riscv_vadd(vec_src1, vec_src2, vl), vec_src3, vl), 2, vl), vl), vl);\n+ }\n+ }\n+ }\n+};\n+template<> struct pyrDownVec0\n+{\n+ void operator()(const float* src, float* row, const uint* tabM, int start, int end)\n+ {\n+ int vl;\n+ switch (start)\n+ {\n+ case 1:\n+ for( int x = start; x < end; x += vl )\n+ {\n+ vl = rvv::vsetvl_WT(end - x);\n+ auto vec_src0 = rvv::vlse_T(src + x * 2 - 2, 2 * sizeof(float), vl);\n+ auto vec_src1 = rvv::vlse_T(src + x * 2 - 1, 2 * sizeof(float), vl);\n+ auto vec_src2 = rvv::vlse_T(src + x * 2, 2 * sizeof(float), vl);\n+ auto vec_src3 = rvv::vlse_T(src + x * 2 + 1, 2 * sizeof(float), vl);\n+ auto vec_src4 = rvv::vlse_T(src + x * 2 + 2, 2 * sizeof(float), vl);\n+ __riscv_vse32(row + x, __riscv_vfmadd(vec_src2, 6, __riscv_vfmadd(__riscv_vfadd(vec_src1, vec_src3, vl), 4, __riscv_vfadd(vec_src0, vec_src4, vl), vl), vl), vl);\n+ }\n+ break;\n+ case 2:\n+ for( int x = start / 2; x < end / 2; x += vl )\n+ {\n+ vl = rvv::vsetvl_WT(end / 2 - x);\n+ auto vec_src0 = rvv::vlse_T(src + (x * 2 - 2) * 2, 4 * sizeof(float), vl);\n+ auto vec_src1 = rvv::vlse_T(src + (x * 2 - 1) * 2, 4 * sizeof(float), vl);\n+ auto vec_src2 = rvv::vlse_T(src + (x * 2) * 2, 4 * sizeof(float), vl);\n+ auto vec_src3 = rvv::vlse_T(src + (x * 2 + 1) * 2, 4 * sizeof(float), vl);\n+ auto vec_src4 = rvv::vlse_T(src + (x * 2 + 2) * 2, 4 * sizeof(float), vl);\n+ __riscv_vsse32(row + x * 2, 2 * sizeof(float), __riscv_vfmadd(vec_src2, 6, __riscv_vfmadd(__riscv_vfadd(vec_src1, vec_src3, vl), 4, __riscv_vfadd(vec_src0, vec_src4, vl), vl), vl), vl);\n+ vec_src0 = rvv::vlse_T(src + (x * 2 - 2) * 2 + 1, 4 * sizeof(float), vl);\n+ vec_src1 = rvv::vlse_T(src + (x * 2 - 1) * 2 + 1, 4 * sizeof(float), vl);\n+ vec_src2 = rvv::vlse_T(src + (x * 2) * 2 + 1, 4 * sizeof(float), vl);\n+ vec_src3 = rvv::vlse_T(src + (x * 2 + 1) * 2 + 1, 4 * sizeof(float), vl);\n+ vec_src4 = rvv::vlse_T(src + (x * 2 + 2) * 2 + 1, 4 * sizeof(float), vl);\n+ __riscv_vsse32(row + x * 2 + 1, 2 * sizeof(float), __riscv_vfmadd(vec_src2, 6, __riscv_vfmadd(__riscv_vfadd(vec_src1, vec_src3, vl), 4, __riscv_vfadd(vec_src0, vec_src4, vl), vl), vl), vl);\n+ }\n+ break;\n+ case 3:\n+ for( int x = start / 3; x < end / 3; x += vl )\n+ {\n+ vl = rvv::vsetvl_WT(end / 3 - x);\n+ auto vec_src0 = rvv::vlse_T(src + (x * 2 - 2) * 3, 6 * sizeof(float), vl);\n+ auto vec_src1 = rvv::vlse_T(src + (x * 2 - 1) * 3, 6 * sizeof(float), vl);\n+ auto vec_src2 = rvv::vlse_T(src + (x * 2) * 3, 6 * sizeof(float), vl);\n+ auto vec_src3 = rvv::vlse_T(src + (x * 2 + 1) * 3, 6 * sizeof(float), vl);\n+ auto vec_src4 = rvv::vlse_T(src + (x * 2 + 2) * 3, 6 * sizeof(float), vl);\n+ __riscv_vsse32(row + x * 3, 3 * sizeof(float), __riscv_vfmadd(vec_src2, 6, __riscv_vfmadd(__riscv_vfadd(vec_src1, vec_src3, vl), 4, __riscv_vfadd(vec_src0, vec_src4, vl), vl), vl), vl);\n+ vec_src0 = rvv::vlse_T(src + (x * 2 - 2) * 3 + 1, 6 * sizeof(float), vl);\n+ vec_src1 = rvv::vlse_T(src + (x * 2 - 1) * 3 + 1, 6 * sizeof(float), vl);\n+ vec_src2 = rvv::vlse_T(src + (x * 2) * 3 + 1, 6 * sizeof(float), vl);\n+ vec_src3 = rvv::vlse_T(src + (x * 2 + 1) * 3 + 1, 6 * sizeof(float), vl);\n+ vec_src4 = rvv::vlse_T(src + (x * 2 + 2) * 3 + 1, 6 * sizeof(float), vl);\n+ __riscv_vsse32(row + x * 3 + 1, 3 * sizeof(float), __riscv_vfmadd(vec_src2, 6, __riscv_vfmadd(__riscv_vfadd(vec_src1, vec_src3, vl), 4, __riscv_vfadd(vec_src0, vec_src4, vl), vl), vl), vl);\n+ vec_src0 = rvv::vlse_T(src + (x * 2 - 2) * 3 + 2, 6 * sizeof(float), vl);\n+ vec_src1 = rvv::vlse_T(src + (x * 2 - 1) * 3 + 2, 6 * sizeof(float), vl);\n+ vec_src2 = rvv::vlse_T(src + (x * 2) * 3 + 2, 6 * sizeof(float), vl);\n+ vec_src3 = rvv::vlse_T(src + (x * 2 + 1) * 3 + 2, 6 * sizeof(float), vl);\n+ vec_src4 = rvv::vlse_T(src + (x * 2 + 2) * 3 + 2, 6 * sizeof(float), vl);\n+ __riscv_vsse32(row + x * 3 + 2, 3 * sizeof(float), __riscv_vfmadd(vec_src2, 6, __riscv_vfmadd(__riscv_vfadd(vec_src1, vec_src3, vl), 4, __riscv_vfadd(vec_src0, vec_src4, vl), vl), vl), vl);\n+ }\n+ break;\n+ case 4:\n+ for( int x = start / 4; x < end / 4; x += vl )\n+ {\n+ vl = rvv::vsetvl_WT(end / 4 - x);\n+ auto vec_src0 = rvv::vlse_T(src + (x * 2 - 2) * 4, 8 * sizeof(float), vl);\n+ auto vec_src1 = rvv::vlse_T(src + (x * 2 - 1) * 4, 8 * sizeof(float), vl);\n+ auto vec_src2 = rvv::vlse_T(src + (x * 2) * 4, 8 * sizeof(float), vl);\n+ auto vec_src3 = rvv::vlse_T(src + (x * 2 + 1) * 4, 8 * sizeof(float), vl);\n+ auto vec_src4 = rvv::vlse_T(src + (x * 2 + 2) * 4, 8 * sizeof(float), vl);\n+ __riscv_vsse32(row + x * 4, 4 * sizeof(float), __riscv_vfmadd(vec_src2, 6, __riscv_vfmadd(__riscv_vfadd(vec_src1, vec_src3, vl), 4, __riscv_vfadd(vec_src0, vec_src4, vl), vl), vl), vl);\n+ vec_src0 = rvv::vlse_T(src + (x * 2 - 2) * 4 + 1, 8 * sizeof(float), vl);\n+ vec_src1 = rvv::vlse_T(src + (x * 2 - 1) * 4 + 1, 8 * sizeof(float), vl);\n+ vec_src2 = rvv::vlse_T(src + (x * 2) * 4 + 1, 8 * sizeof(float), vl);\n+ vec_src3 = rvv::vlse_T(src + (x * 2 + 1) * 4 + 1, 8 * sizeof(float), vl);\n+ vec_src4 = rvv::vlse_T(src + (x * 2 + 2) * 4 + 1, 8 * sizeof(float), vl);\n+ __riscv_vsse32(row + x * 4 + 1, 4 * sizeof(float), __riscv_vfmadd(vec_src2, 6, __riscv_vfmadd(__riscv_vfadd(vec_src1, vec_src3, vl), 4, __riscv_vfadd(vec_src0, vec_src4, vl), vl), vl), vl);\n+ vec_src0 = rvv::vlse_T(src + (x * 2 - 2) * 4 + 2, 8 * sizeof(float), vl);\n+ vec_src1 = rvv::vlse_T(src + (x * 2 - 1) * 4 + 2, 8 * sizeof(float), vl);\n+ vec_src2 = rvv::vlse_T(src + (x * 2) * 4 + 2, 8 * sizeof(float), vl);\n+ vec_src3 = rvv::vlse_T(src + (x * 2 + 1) * 4 + 2, 8 * sizeof(float), vl);\n+ vec_src4 = rvv::vlse_T(src + (x * 2 + 2) * 4 + 2, 8 * sizeof(float), vl);\n+ __riscv_vsse32(row + x * 4 + 2, 4 * sizeof(float), __riscv_vfmadd(vec_src2, 6, __riscv_vfmadd(__riscv_vfadd(vec_src1, vec_src3, vl), 4, __riscv_vfadd(vec_src0, vec_src4, vl), vl), vl), vl);\n+ vec_src0 = rvv::vlse_T(src + (x * 2 - 2) * 4 + 3, 8 * sizeof(float), vl);\n+ vec_src1 = rvv::vlse_T(src + (x * 2 - 1) * 4 + 3, 8 * sizeof(float), vl);\n+ vec_src2 = rvv::vlse_T(src + (x * 2) * 4 + 3, 8 * sizeof(float), vl);\n+ vec_src3 = rvv::vlse_T(src + (x * 2 + 1) * 4 + 3, 8 * sizeof(float), vl);\n+ vec_src4 = rvv::vlse_T(src + (x * 2 + 2) * 4 + 3, 8 * sizeof(float), vl);\n+ __riscv_vsse32(row + x * 4 + 3, 4 * sizeof(float), __riscv_vfmadd(vec_src2, 6, __riscv_vfmadd(__riscv_vfadd(vec_src1, vec_src3, vl), 4, __riscv_vfadd(vec_src0, vec_src4, vl), vl), vl), vl);\n+ }\n+ break;\n+ default:\n+ for( int x = start; x < end; x += vl )\n+ {\n+ vl = rvv::vsetvl_WT(end - x);\n+ auto vec_tabM = rvv::vle_M(tabM + x, vl);\n+ vec_tabM = __riscv_vmul(__riscv_vsub(vec_tabM, start * 2, vl), sizeof(float), vl);\n+ auto vec_src0 = rvv::vloxei_T(src, vec_tabM, vl);\n+ vec_tabM = __riscv_vadd(vec_tabM, start * sizeof(float), vl);\n+ auto vec_src1 = rvv::vloxei_T(src, vec_tabM, vl);\n+ vec_tabM = __riscv_vadd(vec_tabM, start * sizeof(float), vl);\n+ auto vec_src2 = rvv::vloxei_T(src, vec_tabM, vl);\n+ vec_tabM = __riscv_vadd(vec_tabM, start * sizeof(float), vl);\n+ auto vec_src3 = rvv::vloxei_T(src, vec_tabM, vl);\n+ vec_tabM = __riscv_vadd(vec_tabM, start * sizeof(float), vl);\n+ auto vec_src4 = rvv::vloxei_T(src, vec_tabM, vl);\n+ __riscv_vse32(row + x, __riscv_vfmadd(__riscv_vfadd(__riscv_vfadd(vec_src1, vec_src2, vl), vec_src3, vl), 4,\n+ __riscv_vfadd(__riscv_vfadd(vec_src0, vec_src4, vl), __riscv_vfadd(vec_src2, vec_src2, vl), vl), vl), vl);\n+ }\n+ }\n+ }\n+};\n+\n+template struct pyrDownVec1\n+{\n+ void operator()(WT* row0, WT* row1, WT* row2, WT* row3, WT* row4, T* dst, int end)\n+ {\n+ int vl;\n+ for( int x = 0 ; x < end; x += vl )\n+ {\n+ vl = rvv::vsetvl_WT(end - x);\n+ auto vec_src0 = rvv::vle_WT(row0 + x, vl);\n+ auto vec_src1 = rvv::vle_WT(row1 + x, vl);\n+ auto vec_src2 = rvv::vle_WT(row2 + x, vl);\n+ auto vec_src3 = rvv::vle_WT(row3 + x, vl);\n+ auto vec_src4 = rvv::vle_WT(row4 + x, vl);\n+ rvv::vse_T(dst + x, rvv::vcvt_WT_T(__riscv_vadd(__riscv_vadd(__riscv_vadd(vec_src0, vec_src4, vl), __riscv_vadd(vec_src2, vec_src2, vl), vl),\n+ __riscv_vsll(__riscv_vadd(__riscv_vadd(vec_src1, vec_src2, vl), vec_src3, vl), 2, vl), vl), 8, vl), vl);\n+ }\n+ }\n+};\n+template<> struct pyrDownVec1\n+{\n+ void operator()(float* row0, float* row1, float* row2, float* row3, float* row4, float* dst, int end)\n+ {\n+ int vl;\n+ for( int x = 0 ; x < end; x += vl )\n+ {\n+ vl = rvv::vsetvl_WT(end - x);\n+ auto vec_src0 = rvv::vle_WT(row0 + x, vl);\n+ auto vec_src1 = rvv::vle_WT(row1 + x, vl);\n+ auto vec_src2 = rvv::vle_WT(row2 + x, vl);\n+ auto vec_src3 = rvv::vle_WT(row3 + x, vl);\n+ auto vec_src4 = rvv::vle_WT(row4 + x, vl);\n+ rvv::vse_T(dst + x, __riscv_vfmul(__riscv_vfmadd(vec_src2, 6, __riscv_vfmadd(__riscv_vfadd(vec_src1, vec_src3, vl), 4, __riscv_vfadd(vec_src0, vec_src4, vl), vl), vl), 1.f / 256.f, vl), vl);\n+ }\n+ }\n+};\n+\n+template struct pyrUpVec0\n+{\n+ void operator()(const T* src, WT* row, const uint* dtab, int start, int end)\n+ {\n+ int vl;\n+ for( int x = start; x < end; x += vl )\n+ {\n+ vl = rvv::vsetvl_WT(end - x);\n+ auto vec_src0 = rvv::vcvt_T_WT(rvv::vle_T(src + x - start, vl), vl);\n+ auto vec_src1 = rvv::vcvt_T_WT(rvv::vle_T(src + x, vl), vl);\n+ auto vec_src2 = rvv::vcvt_T_WT(rvv::vle_T(src + x + start, vl), vl);\n+\n+ auto vec_dtab = rvv::vle_M(dtab + x, vl);\n+ vec_dtab = __riscv_vmul(vec_dtab, sizeof(WT), vl);\n+ __riscv_vsoxei32(row, vec_dtab, __riscv_vadd(__riscv_vadd(vec_src0, vec_src2, vl), __riscv_vadd(__riscv_vsll(vec_src1, 2, vl), __riscv_vsll(vec_src1, 1, vl), vl), vl), vl);\n+ __riscv_vsoxei32(row, __riscv_vadd(vec_dtab, start * sizeof(WT), vl), __riscv_vsll(__riscv_vadd(vec_src1, vec_src2, vl), 2, vl), vl);\n+ }\n+ }\n+};\n+template<> struct pyrUpVec0\n+{\n+ void operator()(const float* src, float* row, const uint* dtab, int start, int end)\n+ {\n+ int vl;\n+ for( int x = start; x < end; x += vl )\n+ {\n+ vl = rvv::vsetvl_WT(end - x);\n+ auto vec_src0 = rvv::vle_T(src + x - start, vl);\n+ auto vec_src1 = rvv::vle_T(src + x, vl);\n+ auto vec_src2 = rvv::vle_T(src + x + start, vl);\n+\n+ auto vec_dtab = rvv::vle_M(dtab + x, vl);\n+ vec_dtab = __riscv_vmul(vec_dtab, sizeof(float), vl);\n+ __riscv_vsoxei32(row, vec_dtab, __riscv_vfadd(__riscv_vfmadd(vec_src1, 6, vec_src0, vl), vec_src2, vl), vl);\n+ __riscv_vsoxei32(row, __riscv_vadd(vec_dtab, start * sizeof(float), vl), __riscv_vfmul(__riscv_vfadd(vec_src1, vec_src2, vl), 4, vl), vl);\n+ }\n+ }\n+};\n+\n+template struct pyrUpVec1\n+{\n+ void operator()(WT* row0, WT* row1, WT* row2, T* dst0, T* dst1, int end)\n+ {\n+ int vl;\n+ if (dst0 != dst1)\n+ {\n+ for( int x = 0 ; x < end; x += vl )\n+ {\n+ vl = rvv::vsetvl_WT(end - x);\n+ auto vec_src0 = rvv::vle_WT(row0 + x, vl);\n+ auto vec_src1 = rvv::vle_WT(row1 + x, vl);\n+ auto vec_src2 = rvv::vle_WT(row2 + x, vl);\n+ rvv::vse_T(dst0 + x, rvv::vcvt_WT_T(__riscv_vadd(__riscv_vadd(vec_src0, vec_src2, vl), __riscv_vadd(__riscv_vsll(vec_src1, 2, vl), __riscv_vsll(vec_src1, 1, vl), vl), vl), 6, vl), vl);\n+ rvv::vse_T(dst1 + x, rvv::vcvt_WT_T(__riscv_vsll(__riscv_vadd(vec_src1, vec_src2, vl), 2, vl), 6, vl), vl);\n+ }\n+ }\n+ else\n+ {\n+ for( int x = 0 ; x < end; x += vl )\n+ {\n+ vl = rvv::vsetvl_WT(end - x);\n+ auto vec_src0 = rvv::vle_WT(row0 + x, vl);\n+ auto vec_src1 = rvv::vle_WT(row1 + x, vl);\n+ auto vec_src2 = rvv::vle_WT(row2 + x, vl);\n+ rvv::vse_T(dst0 + x, rvv::vcvt_WT_T(__riscv_vadd(__riscv_vadd(vec_src0, vec_src2, vl), __riscv_vadd(__riscv_vsll(vec_src1, 2, vl), __riscv_vsll(vec_src1, 1, vl), vl), vl), 6, vl), vl);\n+ }\n+ }\n+ }\n+};\n+template<> struct pyrUpVec1\n+{\n+ void operator()(float* row0, float* row1, float* row2, float* dst0, float* dst1, int end)\n+ {\n+ int vl;\n+ if (dst0 != dst1)\n+ {\n+ for( int x = 0 ; x < end; x += vl )\n+ {\n+ vl = rvv::vsetvl_WT(end - x);\n+ auto vec_src0 = rvv::vle_WT(row0 + x, vl);\n+ auto vec_src1 = rvv::vle_WT(row1 + x, vl);\n+ auto vec_src2 = rvv::vle_WT(row2 + x, vl);\n+ rvv::vse_T(dst0 + x, __riscv_vfmul(__riscv_vfadd(__riscv_vfmadd(vec_src1, 6, vec_src0, vl), vec_src2, vl), 1.f / 64.f, vl), vl);\n+ rvv::vse_T(dst1 + x, __riscv_vfmul(__riscv_vfadd(vec_src1, vec_src2, vl), 1.f / 16.f, vl), vl);\n+ }\n+ }\n+ else\n+ {\n+ for( int x = 0 ; x < end; x += vl )\n+ {\n+ vl = rvv::vsetvl_WT(end - x);\n+ auto vec_src0 = rvv::vle_WT(row0 + x, vl);\n+ auto vec_src1 = rvv::vle_WT(row1 + x, vl);\n+ auto vec_src2 = rvv::vle_WT(row2 + x, vl);\n+ rvv::vse_T(dst0 + x, __riscv_vfmul(__riscv_vfadd(__riscv_vfmadd(vec_src1, 6, vec_src0, vl), vec_src2, vl), 1.f / 64.f, vl), vl);\n+ }\n+ }\n+ }\n+};\n+\n+template\n+struct PyrDownInvoker : ParallelLoopBody\n+{\n+ PyrDownInvoker(const uchar* _src_data, size_t _src_step, int _src_width, int _src_height, uchar* _dst_data, size_t _dst_step, int _dst_width, int _dst_height, int _cn, int _borderType, int* _tabR, int* _tabM, int* _tabL)\n+ {\n+ src_data = _src_data;\n+ src_step = _src_step;\n+ src_width = _src_width;\n+ src_height = _src_height;\n+ dst_data = _dst_data;\n+ dst_step = _dst_step;\n+ dst_width = _dst_width;\n+ dst_height = _dst_height;\n+ cn = _cn;\n+ borderType = _borderType;\n+ tabR = _tabR;\n+ tabM = _tabM;\n+ tabL = _tabL;\n+ }\n+\n+ void operator()(const Range& range) const CV_OVERRIDE;\n+\n+ const uchar* src_data;\n+ size_t src_step;\n+ int src_width;\n+ int src_height;\n+ uchar* dst_data;\n+ size_t dst_step;\n+ int dst_width;\n+ int dst_height;\n+ int cn;\n+ int borderType;\n+ int* tabR;\n+ int* tabM;\n+ int* tabL;\n+};\n+\n+// the algorithm is copied from imgproc/src/pyramids.cpp,\n+// in the function template void cv::pyrDown_\n+template\n+inline int pyrDown(const uchar* src_data, size_t src_step, int src_width, int src_height, uchar* dst_data, size_t dst_step, int dst_width, int dst_height, int cn, int borderType)\n+{\n+ const int PD_SZ = 5;\n+\n+ std::vector _tabM(dst_width * cn), _tabL(cn * (PD_SZ + 2)), _tabR(cn * (PD_SZ + 2));\n+ int *tabM = _tabM.data(), *tabL = _tabL.data(), *tabR = _tabR.data();\n+\n+ CV_Assert( src_width > 0 && src_height > 0 &&\n+ std::abs(dst_width*2 - src_width) <= 2 &&\n+ std::abs(dst_height*2 - src_height) <= 2 );\n+ int width0 = std::min((src_width-PD_SZ/2-1)/2 + 1, dst_width);\n+\n+ for (int x = 0; x <= PD_SZ+1; x++)\n+ {\n+ int sx0 = borderInterpolate(x - PD_SZ/2, src_width, borderType)*cn;", - "comment_created_at": "2025-03-01T03:11:06+00:00", - "comment_author": "amane-ame", - "comment_body": "Carotene(ARM HAL) copied this function from ::cv into its self namespace, I will follow this.", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/opencv-optimize-memory-allocation-patterns.json b/_reviewers/opencv-optimize-memory-allocation-patterns.json new file mode 100644 index 0000000..e1f9bee --- /dev/null +++ b/_reviewers/opencv-optimize-memory-allocation-patterns.json @@ -0,0 +1,124 @@ +[ + { + "discussion_id": "2151339001", + "pr_number": 27449, + "pr_file": "modules/dnn/src/onnx/onnx_graph_simplifier.cpp", + "created_at": "2025-06-17T05:16:02+00:00", + "commented_code": "simplifySubgraphs(Ptr(new ONNXGraphWrapper(net)), subgraphs);\n}\n\nMat getMatFromTensor(const opencv_onnx::TensorProto& tensor_proto, bool uint8ToInt8)\n\n\nchar* get_raw_data(const opencv_onnx::TensorProto& tensor_proto, const std::string base_path = \"\"){", + "repo_full_name": "opencv/opencv", + "discussion_comments": [ + { + "comment_id": "2151339001", + "repo_full_name": "opencv/opencv", + "pr_number": 27449, + "pr_file": "modules/dnn/src/onnx/onnx_graph_simplifier.cpp", + "discussion_id": "2151339001", + "commented_code": "@@ -1704,29 +1704,63 @@ void simplifySubgraphs(opencv_onnx::GraphProto& net)\n simplifySubgraphs(Ptr(new ONNXGraphWrapper(net)), subgraphs);\n }\n \n-Mat getMatFromTensor(const opencv_onnx::TensorProto& tensor_proto, bool uint8ToInt8)\n+\n+\n+char* get_raw_data(const opencv_onnx::TensorProto& tensor_proto, const std::string base_path = \"\"){", + "comment_created_at": "2025-06-17T05:16:02+00:00", + "comment_author": "vpisarev", + "comment_body": "I'd suggest to modify this function (and probably rename it) to return `Mat` instead of plain pointer. This way we can track memory allocations properly. Currently, I suspect, there are memory leaks.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2053540731", + "pr_number": 27177, + "pr_file": "modules/dnn/src/llm/gguf_parser.cpp", + "created_at": "2025-04-22T07:49:08+00:00", + "commented_code": "#include \"../precomp.hpp\"\n#include \"gguf_parser.hpp\"\n//#include \"gguf_buffer.hpp\"\n#include \n#include \"opencv2/core.hpp\"\n\nnamespace cv { namespace dnn {\nCV__DNN_INLINE_NS_BEGIN\nusing namespace dnn;\n\ntensor_role get_tensor_role(std::string name) {\n if (name == \"attn_norm\") return tensor_role::attn_norm;\n if (name == \"attn_norm_2\") return tensor_role::attn_norm_2;\n if (name == \"attn_qkv\") return tensor_role::attn_qkv;\n if (name == \"attn_q\") return tensor_role::attn_q;\n if (name == \"attn_k\") return tensor_role::attn_k;\n if (name == \"attn_v\") return tensor_role::attn_v;\n if (name == \"attn_output\") return tensor_role::attn_output;\n if (name == \"ffn_norm\") return tensor_role::ffn_norm;\n if (name == \"ffn_up\") return tensor_role::ffn_up;\n if (name == \"ffn_gate\") return tensor_role::ffn_gate;\n if (name == \"ffn_down\") return tensor_role::ffn_down;\n if (name == \"ffn_gate_inp\") return tensor_role::ffn_gate_inp;\n if (name == \"ffn_gate_exp\") return tensor_role::ffn_gate_exp;\n if (name == \"ffn_down_exp\") return tensor_role::ffn_down_exp;\n if (name == \"ffn_up_exp\") return tensor_role::ffn_up_exp;\n throw std::runtime_error(\"Unknown tensor role: \" + name);\n}\n\nMat TensorMetadata::getMat(Ptr tensor_reader) {\n if (type != GGML_TYPE_F32) {\n throw std::runtime_error(\"Unsupported tensor type: \" + std::to_string(type));\n }\n if (dims.size() == 2) \n return tensor_reader->read2DMat(\n GGML_TYPE_F32, dims[0],dims[1], data_offset\n );\n if (dims.size() == 1) \n return tensor_reader->read1DMat(\n GGML_TYPE_F32, dims[0], data_offset\n );\n\n throw std::runtime_error(\n \"Unsupported tensor dimension: \" + std::to_string(dims.size()));\n};\n\nstd::string GGUFParser::get_architecture(){\n return getStringMetadata(\"architecture\");\n};\n\nMatShape BlockMetadata::getInputShape() \n{\n std::vector t = getTensorMetadata( tensor_role::attn_qkv, false, false);\n if (t.size() == 0) {\n throw std::runtime_error(\"No input tensors found\");\n }\n\n MatShape inputShape(3);\n inputShape[0] = 1; inputShape[1] = -1;\n inputShape[2] = t[0].dims[1];\n return inputShape;\n}\n\nint BlockMetadata::getDhidden() \n{\n std::vector t = getTensorMetadata( tensor_role::attn_qkv, false, false);\n if (t.size() == 0) {\n throw std::runtime_error(\"No input tensors found\");\n }\n return t[0].dims[0] / 3;\n}\n\nMatShape BlockMetadata::getOutputShape() \n{\n std::vector t = getTensorMetadata( tensor_role::attn_qkv, false, false);\n if (t.size() == 0) {\n throw std::runtime_error(\"No input tensors found\");\n }\n\n MatShape outputShape(3);\n outputShape[0] = 1; outputShape[1] = -1;\n outputShape[2] = t[0].dims[1];\n return outputShape;\n}\n\nvoid BlockMetadata::getAttentionLayerParams(Ptr tensor_reader, LayerParams& layerParams) \n{\n layerParams.type = \"Attention\";\n layerParams.name = \"attention_block\";\n layerParams.set(\"num_heads\", n_heads);\n layerParams.set(\"blockn\", blockn);\n int d_hidden = getDhidden();\n std::vector qkv_hidden_sizes = {d_hidden, d_hidden, d_hidden};\n layerParams.set(\"qkv_hidden_sizes\", DictValue::arrayInt(&qkv_hidden_sizes[0], 3));\n Mat qkv_weight = getTensorMetadata(tensor_role::attn_qkv, false, false)[0].getMat(tensor_reader);\n Mat qkv_bias = getTensorMetadata(tensor_role::attn_qkv, true, false)[0].getMat(tensor_reader);\n layerParams.blobs.push_back(qkv_weight.t());\n layerParams.blobs.push_back(qkv_bias);\n}\n\nstd::vector BlockMetadata::getTensorMetadata(tensor_role role, bool is_bias, bool allow_multiple) {\n std::vector result;\n for (const auto & tensor : tensors) {\n if (tensor.role == role && tensor.is_bias == is_bias) {\n result.push_back(tensor);\n }\n }\n return result;\n if (!allow_multiple && result.size() > 1) {\n throw std::runtime_error(\"Multiple tensors found for role: \" + std::to_string(role));\n }\n return result;\n}\n\nvoid GGUFParser::addTensorMetadata() {\n auto tensor = TensorMetadata();\n\n std::string tensor_name = reader.readString();\n std::regex layerRegex(R\"(^blk\\.(\\d+)\\.([a-zA-Z0-9_]+)\\.(weight|bias)$)\");\n std::smatch match;\n std::string layerName, paramType;\n\n int blockn;\n\n if (std::regex_match(tensor_name, match, layerRegex)) {\n blockn = std::stoi(match[1].str());\n layerName = match[2].str();\n paramType = match[3].str();\n if (paramType == \"weight\") {\n tensor.is_bias = false;\n } else if (paramType == \"bias\") {\n tensor.is_bias = true;\n } else {\n throw std::runtime_error(\"Unknown parameter type: \" + paramType);\n }\n tensor.role = get_tensor_role(layerName);\n tensor.n_block = blockn;\n } else {\n throw std::runtime_error(\"Invalid tensor name format: \" + tensor_name);\n }\n\n int dim_count = reader.readSingleValueInt();\n tensor.dims = MatShape(dim_count);\n std::vector dims;\n for (uint32_t i = 0; i < dim_count; ++i) {\n dims.push_back(reader.readSingleValueInt()); \n }", + "repo_full_name": "opencv/opencv", + "discussion_comments": [ + { + "comment_id": "2053540731", + "repo_full_name": "opencv/opencv", + "pr_number": 27177, + "pr_file": "modules/dnn/src/llm/gguf_parser.cpp", + "discussion_id": "2053540731", + "commented_code": "@@ -0,0 +1,235 @@\n+#include \"../precomp.hpp\"\n+#include \"gguf_parser.hpp\"\n+//#include \"gguf_buffer.hpp\"\n+#include \n+#include \"opencv2/core.hpp\"\n+\n+namespace cv { namespace dnn {\n+CV__DNN_INLINE_NS_BEGIN\n+using namespace dnn;\n+\n+tensor_role get_tensor_role(std::string name) {\n+ if (name == \"attn_norm\") return tensor_role::attn_norm;\n+ if (name == \"attn_norm_2\") return tensor_role::attn_norm_2;\n+ if (name == \"attn_qkv\") return tensor_role::attn_qkv;\n+ if (name == \"attn_q\") return tensor_role::attn_q;\n+ if (name == \"attn_k\") return tensor_role::attn_k;\n+ if (name == \"attn_v\") return tensor_role::attn_v;\n+ if (name == \"attn_output\") return tensor_role::attn_output;\n+ if (name == \"ffn_norm\") return tensor_role::ffn_norm;\n+ if (name == \"ffn_up\") return tensor_role::ffn_up;\n+ if (name == \"ffn_gate\") return tensor_role::ffn_gate;\n+ if (name == \"ffn_down\") return tensor_role::ffn_down;\n+ if (name == \"ffn_gate_inp\") return tensor_role::ffn_gate_inp;\n+ if (name == \"ffn_gate_exp\") return tensor_role::ffn_gate_exp;\n+ if (name == \"ffn_down_exp\") return tensor_role::ffn_down_exp;\n+ if (name == \"ffn_up_exp\") return tensor_role::ffn_up_exp;\n+ throw std::runtime_error(\"Unknown tensor role: \" + name);\n+}\n+\n+Mat TensorMetadata::getMat(Ptr tensor_reader) {\n+ if (type != GGML_TYPE_F32) {\n+ throw std::runtime_error(\"Unsupported tensor type: \" + std::to_string(type));\n+ }\n+ if (dims.size() == 2) \n+ return tensor_reader->read2DMat(\n+ GGML_TYPE_F32, dims[0],dims[1], data_offset\n+ );\n+ if (dims.size() == 1) \n+ return tensor_reader->read1DMat(\n+ GGML_TYPE_F32, dims[0], data_offset\n+ );\n+\n+ throw std::runtime_error(\n+ \"Unsupported tensor dimension: \" + std::to_string(dims.size()));\n+};\n+\n+std::string GGUFParser::get_architecture(){\n+ return getStringMetadata(\"architecture\");\n+};\n+\n+MatShape BlockMetadata::getInputShape() \n+{\n+ std::vector t = getTensorMetadata( tensor_role::attn_qkv, false, false);\n+ if (t.size() == 0) {\n+ throw std::runtime_error(\"No input tensors found\");\n+ }\n+\n+ MatShape inputShape(3);\n+ inputShape[0] = 1; inputShape[1] = -1;\n+ inputShape[2] = t[0].dims[1];\n+ return inputShape;\n+}\n+\n+int BlockMetadata::getDhidden() \n+{\n+ std::vector t = getTensorMetadata( tensor_role::attn_qkv, false, false);\n+ if (t.size() == 0) {\n+ throw std::runtime_error(\"No input tensors found\");\n+ }\n+ return t[0].dims[0] / 3;\n+}\n+\n+MatShape BlockMetadata::getOutputShape() \n+{\n+ std::vector t = getTensorMetadata( tensor_role::attn_qkv, false, false);\n+ if (t.size() == 0) {\n+ throw std::runtime_error(\"No input tensors found\");\n+ }\n+\n+ MatShape outputShape(3);\n+ outputShape[0] = 1; outputShape[1] = -1;\n+ outputShape[2] = t[0].dims[1];\n+ return outputShape;\n+}\n+\n+void BlockMetadata::getAttentionLayerParams(Ptr tensor_reader, LayerParams& layerParams) \n+{\n+ layerParams.type = \"Attention\";\n+ layerParams.name = \"attention_block\";\n+ layerParams.set(\"num_heads\", n_heads);\n+ layerParams.set(\"blockn\", blockn);\n+ int d_hidden = getDhidden();\n+ std::vector qkv_hidden_sizes = {d_hidden, d_hidden, d_hidden};\n+ layerParams.set(\"qkv_hidden_sizes\", DictValue::arrayInt(&qkv_hidden_sizes[0], 3));\n+ Mat qkv_weight = getTensorMetadata(tensor_role::attn_qkv, false, false)[0].getMat(tensor_reader);\n+ Mat qkv_bias = getTensorMetadata(tensor_role::attn_qkv, true, false)[0].getMat(tensor_reader);\n+ layerParams.blobs.push_back(qkv_weight.t());\n+ layerParams.blobs.push_back(qkv_bias);\n+}\n+\n+std::vector BlockMetadata::getTensorMetadata(tensor_role role, bool is_bias, bool allow_multiple) {\n+ std::vector result;\n+ for (const auto & tensor : tensors) {\n+ if (tensor.role == role && tensor.is_bias == is_bias) {\n+ result.push_back(tensor);\n+ }\n+ }\n+ return result;\n+ if (!allow_multiple && result.size() > 1) {\n+ throw std::runtime_error(\"Multiple tensors found for role: \" + std::to_string(role));\n+ }\n+ return result;\n+}\n+\n+void GGUFParser::addTensorMetadata() {\n+ auto tensor = TensorMetadata();\n+\n+ std::string tensor_name = reader.readString();\n+ std::regex layerRegex(R\"(^blk\\.(\\d+)\\.([a-zA-Z0-9_]+)\\.(weight|bias)$)\");\n+ std::smatch match;\n+ std::string layerName, paramType;\n+\n+ int blockn;\n+\n+ if (std::regex_match(tensor_name, match, layerRegex)) {\n+ blockn = std::stoi(match[1].str());\n+ layerName = match[2].str();\n+ paramType = match[3].str();\n+ if (paramType == \"weight\") {\n+ tensor.is_bias = false;\n+ } else if (paramType == \"bias\") {\n+ tensor.is_bias = true;\n+ } else {\n+ throw std::runtime_error(\"Unknown parameter type: \" + paramType);\n+ }\n+ tensor.role = get_tensor_role(layerName);\n+ tensor.n_block = blockn;\n+ } else {\n+ throw std::runtime_error(\"Invalid tensor name format: \" + tensor_name);\n+ }\n+\n+ int dim_count = reader.readSingleValueInt();\n+ tensor.dims = MatShape(dim_count);\n+ std::vector dims;\n+ for (uint32_t i = 0; i < dim_count; ++i) {\n+ dims.push_back(reader.readSingleValueInt()); \n+ }", + "comment_created_at": "2025-04-22T07:49:08+00:00", + "comment_author": "asmorkalov", + "comment_body": "It's more efficient to use std::vector dims(dim_count); and assign values, rather than call push_back and trigger reallocations.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1930098040", + "pr_number": 26844, + "pr_file": "modules/imgcodecs/src/grfmt_jpegxl.cpp", + "created_at": "2025-01-27T07:51:19+00:00", + "commented_code": "bool JpegXLDecoder::readHeader()\n{\n // Open file\n if (!m_f) {\n m_f.reset(fopen(m_filename.c_str(), \"rb\"));\n if (!m_f)\n return false;\n if (m_buf.empty()) {\n // Open file\n if (!m_f) {\n m_f.reset(fopen(m_filename.c_str(), \"rb\"));\n if (!m_f) {\n return false;\n }\n }\n // Create buffer for reading file\n constexpr size_t read_buffer_size = 16384; // 16KB chunks\n if (m_read_buffer.capacity() < read_buffer_size) {\n m_read_buffer.resize(read_buffer_size);\n }", + "repo_full_name": "opencv/opencv", + "discussion_comments": [ + { + "comment_id": "1930098040", + "repo_full_name": "opencv/opencv", + "pr_number": 26844, + "pr_file": "modules/imgcodecs/src/grfmt_jpegxl.cpp", + "discussion_id": "1930098040", + "commented_code": "@@ -91,11 +93,19 @@ ImageDecoder JpegXLDecoder::newDecoder() const\n \n bool JpegXLDecoder::readHeader()\n {\n- // Open file\n- if (!m_f) {\n- m_f.reset(fopen(m_filename.c_str(), \"rb\"));\n- if (!m_f)\n- return false;\n+ if (m_buf.empty()) {\n+ // Open file\n+ if (!m_f) {\n+ m_f.reset(fopen(m_filename.c_str(), \"rb\"));\n+ if (!m_f) {\n+ return false;\n+ }\n+ }\n+ // Create buffer for reading file\n+ constexpr size_t read_buffer_size = 16384; // 16KB chunks\n+ if (m_read_buffer.capacity() < read_buffer_size) {\n+ m_read_buffer.resize(read_buffer_size);\n+ }", + "comment_created_at": "2025-01-27T07:51:19+00:00", + "comment_author": "asmorkalov", + "comment_body": "If I understood correctly, The m_read_buffer is resized only once here ant it's size is always 16k. The `m_read_buffer` is used in ::read() only. M.b. it's better to make it local variable there. If you need the buffer shared between readHeader and readData then it makes sense to initialize the buffer in decoder constructor.", + "pr_file_module": null + }, + { + "comment_id": "1930290856", + "repo_full_name": "opencv/opencv", + "pr_number": 26844, + "pr_file": "modules/imgcodecs/src/grfmt_jpegxl.cpp", + "discussion_id": "1930098040", + "commented_code": "@@ -91,11 +93,19 @@ ImageDecoder JpegXLDecoder::newDecoder() const\n \n bool JpegXLDecoder::readHeader()\n {\n- // Open file\n- if (!m_f) {\n- m_f.reset(fopen(m_filename.c_str(), \"rb\"));\n- if (!m_f)\n- return false;\n+ if (m_buf.empty()) {\n+ // Open file\n+ if (!m_f) {\n+ m_f.reset(fopen(m_filename.c_str(), \"rb\"));\n+ if (!m_f) {\n+ return false;\n+ }\n+ }\n+ // Create buffer for reading file\n+ constexpr size_t read_buffer_size = 16384; // 16KB chunks\n+ if (m_read_buffer.capacity() < read_buffer_size) {\n+ m_read_buffer.resize(read_buffer_size);\n+ }", + "comment_created_at": "2025-01-27T10:22:45+00:00", + "comment_author": "Kumataro", + "comment_body": "Thank you for your comment, `m_read_buffer` should be kept accrossing 'readHeader()' and 'readData()'. So I will move it to constructor.\r\n\r\n```\r\nJXLDecoder::JxlDecoder()\r\n m_decoder = nullptr;\r\n\r\nJXLDecoder::readHeader()\r\n m_decoder = JxlDecoderMake(nullptr);\r\n\r\n read()\r\n JxlDecoderReleaseInput() // no effects because no input buffer is set\r\n JxlDecoderSetInput() ---- Input Buffer is locked.\r\n |\r\nJXLDecoder::readData() |\r\n read() |\r\n JxlDecoderReleaseInput()- Input Buffer is unlocked.\r\n JxlDecoderSetInput() ---- Input Buffer is locked.\r\n |\r\nJXLDecoder::~JxlDecoder() |\r\n close() |\r\n m_decoder.reset() ------- Input buffer is unlocked\r\n destory m_decoder\r\n```\r\n", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1890435483", + "pr_number": 26619, + "pr_file": "3rdparty/fastcv/src/fastcv_hal_imgproc.cpp", + "created_at": "2024-12-18T15:31:56+00:00", + "commented_code": "CV_HAL_RETURN_NOT_IMPLEMENTED(cv::format(\"Src type:%s is not supported\", cv::typeToString(src_type).c_str()));\n\n CV_HAL_RETURN(status, hal_warpPerspective);\n}\n\nclass FcvPyrLoop_Invoker : public cv::ParallelLoopBody\n{\npublic:\n\n FcvPyrLoop_Invoker(cv::Mat src_, int width_, int height_, cv::Mat dst_, int bdr_, int knl_, int stripeHeight_, int nStripes_) :\n cv::ParallelLoopBody(), src(src_), width(width_), height(height_), dst(dst_), bdr(bdr_), knl(knl_), stripeHeight(stripeHeight_), nStripes(nStripes_)\n {\n }\n\n virtual void operator()(const cv::Range& range) const CV_OVERRIDE\n {\n int height_ = stripeHeight * (range.end - range.start);\n int width_ = width;\n cv::Mat src_;\n int n = knl/2;\n\n if(range.end == nStripes)\n height_ += (height - range.end * stripeHeight);\n\n src_ = cv::Mat(height_ + 2*n, width_ + 2*n, CV_8U);\n\n if(range.start == 0 && range.end == nStripes)\n cv::copyMakeBorder(src(cv::Rect(0, 0, width, height)), src_, n, n, n, n, bdr);\n else if(range.start == 0)\n cv::copyMakeBorder(src(cv::Rect(0, 0, width_, height_ + n)), src_, n, 0, n, n, bdr);\n else if(range.end == nStripes)\n cv::copyMakeBorder(src(cv::Rect(0, range.start * stripeHeight - n, width_, height_ + n)), src_, 0, n, n, n, bdr);\n else\n cv::copyMakeBorder(src(cv::Rect(0, range.start * stripeHeight - n, width_, height_ + 2*n)), src_, 0, 0, n, n, bdr);\n\n int dstHeight_, dstWidth_, origDstHeight_, origDstWidth_;\n dstHeight_ = (height_ + 2*n + 1)/2;\n dstWidth_ = (width_ + 2*n + 1)/2;\n origDstHeight_ = (height_ + 1)/2;\n origDstWidth_ = (width_ + 1)/2;\n\n cv::Mat dst_padded = cv::Mat(dstHeight_, dstWidth_, CV_8U);\n\n fcvPyramidLevel_v2 *framePyr = new fcvPyramidLevel_v2[2];", + "repo_full_name": "opencv/opencv", + "discussion_comments": [ + { + "comment_id": "1890435483", + "repo_full_name": "opencv/opencv", + "pr_number": 26619, + "pr_file": "3rdparty/fastcv/src/fastcv_hal_imgproc.cpp", + "discussion_id": "1890435483", + "commented_code": "@@ -743,4 +743,253 @@ int fastcv_hal_warpPerspective(\n CV_HAL_RETURN_NOT_IMPLEMENTED(cv::format(\"Src type:%s is not supported\", cv::typeToString(src_type).c_str()));\n \n CV_HAL_RETURN(status, hal_warpPerspective);\n+}\n+\n+class FcvPyrLoop_Invoker : public cv::ParallelLoopBody\n+{\n+public:\n+\n+ FcvPyrLoop_Invoker(cv::Mat src_, int width_, int height_, cv::Mat dst_, int bdr_, int knl_, int stripeHeight_, int nStripes_) :\n+ cv::ParallelLoopBody(), src(src_), width(width_), height(height_), dst(dst_), bdr(bdr_), knl(knl_), stripeHeight(stripeHeight_), nStripes(nStripes_)\n+ {\n+ }\n+\n+ virtual void operator()(const cv::Range& range) const CV_OVERRIDE\n+ {\n+ int height_ = stripeHeight * (range.end - range.start);\n+ int width_ = width;\n+ cv::Mat src_;\n+ int n = knl/2;\n+\n+ if(range.end == nStripes)\n+ height_ += (height - range.end * stripeHeight);\n+\n+ src_ = cv::Mat(height_ + 2*n, width_ + 2*n, CV_8U);\n+\n+ if(range.start == 0 && range.end == nStripes)\n+ cv::copyMakeBorder(src(cv::Rect(0, 0, width, height)), src_, n, n, n, n, bdr);\n+ else if(range.start == 0)\n+ cv::copyMakeBorder(src(cv::Rect(0, 0, width_, height_ + n)), src_, n, 0, n, n, bdr);\n+ else if(range.end == nStripes)\n+ cv::copyMakeBorder(src(cv::Rect(0, range.start * stripeHeight - n, width_, height_ + n)), src_, 0, n, n, n, bdr);\n+ else\n+ cv::copyMakeBorder(src(cv::Rect(0, range.start * stripeHeight - n, width_, height_ + 2*n)), src_, 0, 0, n, n, bdr);\n+\n+ int dstHeight_, dstWidth_, origDstHeight_, origDstWidth_;\n+ dstHeight_ = (height_ + 2*n + 1)/2;\n+ dstWidth_ = (width_ + 2*n + 1)/2;\n+ origDstHeight_ = (height_ + 1)/2;\n+ origDstWidth_ = (width_ + 1)/2;\n+\n+ cv::Mat dst_padded = cv::Mat(dstHeight_, dstWidth_, CV_8U);\n+\n+ fcvPyramidLevel_v2 *framePyr = new fcvPyramidLevel_v2[2];", + "comment_created_at": "2024-12-18T15:31:56+00:00", + "comment_author": "asmorkalov", + "comment_body": "I propose to use local variable on stack. new is redundant here. Also it's not deleted afterwards.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1890437308", + "pr_number": 26619, + "pr_file": "3rdparty/fastcv/src/fastcv_hal_imgproc.cpp", + "created_at": "2024-12-18T15:33:11+00:00", + "commented_code": "CV_HAL_RETURN_NOT_IMPLEMENTED(cv::format(\"Src type:%s is not supported\", cv::typeToString(src_type).c_str()));\n\n CV_HAL_RETURN(status, hal_warpPerspective);\n}\n\nclass FcvPyrLoop_Invoker : public cv::ParallelLoopBody\n{\npublic:\n\n FcvPyrLoop_Invoker(cv::Mat src_, int width_, int height_, cv::Mat dst_, int bdr_, int knl_, int stripeHeight_, int nStripes_) :\n cv::ParallelLoopBody(), src(src_), width(width_), height(height_), dst(dst_), bdr(bdr_), knl(knl_), stripeHeight(stripeHeight_), nStripes(nStripes_)\n {\n }\n\n virtual void operator()(const cv::Range& range) const CV_OVERRIDE\n {\n int height_ = stripeHeight * (range.end - range.start);\n int width_ = width;\n cv::Mat src_;\n int n = knl/2;\n\n if(range.end == nStripes)\n height_ += (height - range.end * stripeHeight);\n\n src_ = cv::Mat(height_ + 2*n, width_ + 2*n, CV_8U);\n\n if(range.start == 0 && range.end == nStripes)\n cv::copyMakeBorder(src(cv::Rect(0, 0, width, height)), src_, n, n, n, n, bdr);\n else if(range.start == 0)\n cv::copyMakeBorder(src(cv::Rect(0, 0, width_, height_ + n)), src_, n, 0, n, n, bdr);\n else if(range.end == nStripes)\n cv::copyMakeBorder(src(cv::Rect(0, range.start * stripeHeight - n, width_, height_ + n)), src_, 0, n, n, n, bdr);\n else\n cv::copyMakeBorder(src(cv::Rect(0, range.start * stripeHeight - n, width_, height_ + 2*n)), src_, 0, 0, n, n, bdr);\n\n int dstHeight_, dstWidth_, origDstHeight_, origDstWidth_;\n dstHeight_ = (height_ + 2*n + 1)/2;\n dstWidth_ = (width_ + 2*n + 1)/2;\n origDstHeight_ = (height_ + 1)/2;\n origDstWidth_ = (width_ + 1)/2;\n\n cv::Mat dst_padded = cv::Mat(dstHeight_, dstWidth_, CV_8U);\n\n fcvPyramidLevel_v2 *framePyr = new fcvPyramidLevel_v2[2];\n framePyr[0].ptr = NULL;\n framePyr[1].ptr = dst_padded.data;\n framePyr[1].stride = dstWidth_;\n\n fcvPyramidCreateu8_v4(src_.data, width_ + 2*n, height_ + 2*n,\n width_ + 2*n, 2, FASTCV_PYRAMID_SCALE_HALF,\n framePyr, FASTCV_BORDER_UNDEFINED, 0);\n\n int start_val = stripeHeight * range.start;\n cv::Mat dst_temp1 = dst_padded(cv::Rect(n/2, n/2, origDstWidth_, origDstHeight_));\n cv::Mat dst_temp2 = dst(cv::Rect(0, start_val/2, origDstWidth_, origDstHeight_));\n dst_temp1.copyTo(dst_temp2);\n }\n\nprivate:\n cv::Mat src;\n const int width;\n const int height;\n cv::Mat dst;\n const int bdr;\n const int knl;\n const int stripeHeight;\n const int nStripes;\n\n FcvPyrLoop_Invoker(const FcvPyrLoop_Invoker &); // = delete;\n const FcvPyrLoop_Invoker& operator= (const FcvPyrLoop_Invoker &); // = delete;\n};\n\nint fastcv_hal_pyrdown(\n const uchar* src_data,\n size_t src_step,\n int src_width,\n int src_height,\n uchar* dst_data,\n size_t dst_step,\n int dst_width,\n int dst_height,\n int depth,\n int cn,\n int border_type)\n{\n if(depth != CV_8U || cn!= 1)\n {\n CV_HAL_RETURN_NOT_IMPLEMENTED(\"src type not supported\");\n }\n\n int dstW = (src_width & 1) == 1 ? ((src_width + 1) >> 1) : ((src_width) >> 1);\n int dstH = (src_height & 1) == 1 ? ((src_height + 1) >> 1) : ((src_height) >> 1);\n\n if((dstW > dst_width) || (dstH > dst_height))\n {\n CV_HAL_RETURN_NOT_IMPLEMENTED(\"dst size needs to be atleast half of the src size\");\n }\n\n INITIALIZATION_CHECK;\n\n fcvBorderType bdr;\n uint8_t bVal = 0;\n int nThreads = cv::getNumThreads();\n if(nThreads <= 1)\n {\n switch(border_type)\n {\n case cv::BORDER_REPLICATE:\n bdr = FASTCV_BORDER_REPLICATE;\n break;\n case cv::BORDER_REFLECT:\n bdr = FASTCV_BORDER_REFLECT;\n break;\n case cv::BORDER_REFLECT101: // cv::BORDER_REFLECT_101, BORDER_DEFAULT\n bdr = FASTCV_BORDER_REFLECT_V2;\n break;\n default:\n CV_HAL_RETURN_NOT_IMPLEMENTED(\"border type not supported\");\n }\n\n fcvPyramidLevel_v2 *frame1Pyr = new fcvPyramidLevel_v2[2];\n frame1Pyr[0].ptr = NULL;\n frame1Pyr[1].ptr = dst_data;\n frame1Pyr[1].stride = dst_step;", + "repo_full_name": "opencv/opencv", + "discussion_comments": [ + { + "comment_id": "1890437308", + "repo_full_name": "opencv/opencv", + "pr_number": 26619, + "pr_file": "3rdparty/fastcv/src/fastcv_hal_imgproc.cpp", + "discussion_id": "1890437308", + "commented_code": "@@ -743,4 +743,253 @@ int fastcv_hal_warpPerspective(\n CV_HAL_RETURN_NOT_IMPLEMENTED(cv::format(\"Src type:%s is not supported\", cv::typeToString(src_type).c_str()));\n \n CV_HAL_RETURN(status, hal_warpPerspective);\n+}\n+\n+class FcvPyrLoop_Invoker : public cv::ParallelLoopBody\n+{\n+public:\n+\n+ FcvPyrLoop_Invoker(cv::Mat src_, int width_, int height_, cv::Mat dst_, int bdr_, int knl_, int stripeHeight_, int nStripes_) :\n+ cv::ParallelLoopBody(), src(src_), width(width_), height(height_), dst(dst_), bdr(bdr_), knl(knl_), stripeHeight(stripeHeight_), nStripes(nStripes_)\n+ {\n+ }\n+\n+ virtual void operator()(const cv::Range& range) const CV_OVERRIDE\n+ {\n+ int height_ = stripeHeight * (range.end - range.start);\n+ int width_ = width;\n+ cv::Mat src_;\n+ int n = knl/2;\n+\n+ if(range.end == nStripes)\n+ height_ += (height - range.end * stripeHeight);\n+\n+ src_ = cv::Mat(height_ + 2*n, width_ + 2*n, CV_8U);\n+\n+ if(range.start == 0 && range.end == nStripes)\n+ cv::copyMakeBorder(src(cv::Rect(0, 0, width, height)), src_, n, n, n, n, bdr);\n+ else if(range.start == 0)\n+ cv::copyMakeBorder(src(cv::Rect(0, 0, width_, height_ + n)), src_, n, 0, n, n, bdr);\n+ else if(range.end == nStripes)\n+ cv::copyMakeBorder(src(cv::Rect(0, range.start * stripeHeight - n, width_, height_ + n)), src_, 0, n, n, n, bdr);\n+ else\n+ cv::copyMakeBorder(src(cv::Rect(0, range.start * stripeHeight - n, width_, height_ + 2*n)), src_, 0, 0, n, n, bdr);\n+\n+ int dstHeight_, dstWidth_, origDstHeight_, origDstWidth_;\n+ dstHeight_ = (height_ + 2*n + 1)/2;\n+ dstWidth_ = (width_ + 2*n + 1)/2;\n+ origDstHeight_ = (height_ + 1)/2;\n+ origDstWidth_ = (width_ + 1)/2;\n+\n+ cv::Mat dst_padded = cv::Mat(dstHeight_, dstWidth_, CV_8U);\n+\n+ fcvPyramidLevel_v2 *framePyr = new fcvPyramidLevel_v2[2];\n+ framePyr[0].ptr = NULL;\n+ framePyr[1].ptr = dst_padded.data;\n+ framePyr[1].stride = dstWidth_;\n+\n+ fcvPyramidCreateu8_v4(src_.data, width_ + 2*n, height_ + 2*n,\n+ width_ + 2*n, 2, FASTCV_PYRAMID_SCALE_HALF,\n+ framePyr, FASTCV_BORDER_UNDEFINED, 0);\n+\n+ int start_val = stripeHeight * range.start;\n+ cv::Mat dst_temp1 = dst_padded(cv::Rect(n/2, n/2, origDstWidth_, origDstHeight_));\n+ cv::Mat dst_temp2 = dst(cv::Rect(0, start_val/2, origDstWidth_, origDstHeight_));\n+ dst_temp1.copyTo(dst_temp2);\n+ }\n+\n+private:\n+ cv::Mat src;\n+ const int width;\n+ const int height;\n+ cv::Mat dst;\n+ const int bdr;\n+ const int knl;\n+ const int stripeHeight;\n+ const int nStripes;\n+\n+ FcvPyrLoop_Invoker(const FcvPyrLoop_Invoker &); // = delete;\n+ const FcvPyrLoop_Invoker& operator= (const FcvPyrLoop_Invoker &); // = delete;\n+};\n+\n+int fastcv_hal_pyrdown(\n+ const uchar* src_data,\n+ size_t src_step,\n+ int src_width,\n+ int src_height,\n+ uchar* dst_data,\n+ size_t dst_step,\n+ int dst_width,\n+ int dst_height,\n+ int depth,\n+ int cn,\n+ int border_type)\n+{\n+ if(depth != CV_8U || cn!= 1)\n+ {\n+ CV_HAL_RETURN_NOT_IMPLEMENTED(\"src type not supported\");\n+ }\n+\n+ int dstW = (src_width & 1) == 1 ? ((src_width + 1) >> 1) : ((src_width) >> 1);\n+ int dstH = (src_height & 1) == 1 ? ((src_height + 1) >> 1) : ((src_height) >> 1);\n+\n+ if((dstW > dst_width) || (dstH > dst_height))\n+ {\n+ CV_HAL_RETURN_NOT_IMPLEMENTED(\"dst size needs to be atleast half of the src size\");\n+ }\n+\n+ INITIALIZATION_CHECK;\n+\n+ fcvBorderType bdr;\n+ uint8_t bVal = 0;\n+ int nThreads = cv::getNumThreads();\n+ if(nThreads <= 1)\n+ {\n+ switch(border_type)\n+ {\n+ case cv::BORDER_REPLICATE:\n+ bdr = FASTCV_BORDER_REPLICATE;\n+ break;\n+ case cv::BORDER_REFLECT:\n+ bdr = FASTCV_BORDER_REFLECT;\n+ break;\n+ case cv::BORDER_REFLECT101: // cv::BORDER_REFLECT_101, BORDER_DEFAULT\n+ bdr = FASTCV_BORDER_REFLECT_V2;\n+ break;\n+ default:\n+ CV_HAL_RETURN_NOT_IMPLEMENTED(\"border type not supported\");\n+ }\n+\n+ fcvPyramidLevel_v2 *frame1Pyr = new fcvPyramidLevel_v2[2];\n+ frame1Pyr[0].ptr = NULL;\n+ frame1Pyr[1].ptr = dst_data;\n+ frame1Pyr[1].stride = dst_step;", + "comment_created_at": "2024-12-18T15:33:11+00:00", + "comment_author": "asmorkalov", + "comment_body": "I propose to use local variable on stack. new is redundant here. Also it's not deleted afterwards.", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/opencv-optimize-memory-allocation-patterns.md b/_reviewers/opencv-optimize-memory-allocation-patterns.md index f6913d0..2970845 100644 --- a/_reviewers/opencv-optimize-memory-allocation-patterns.md +++ b/_reviewers/opencv-optimize-memory-allocation-patterns.md @@ -49,129 +49,3 @@ Benefits: - Reduces allocation overhead - Improves cache utilization - More predictable performance - - -[ - { - "discussion_id": "2151339001", - "pr_number": 27449, - "pr_file": "modules/dnn/src/onnx/onnx_graph_simplifier.cpp", - "created_at": "2025-06-17T05:16:02+00:00", - "commented_code": "simplifySubgraphs(Ptr(new ONNXGraphWrapper(net)), subgraphs);\n}\n\nMat getMatFromTensor(const opencv_onnx::TensorProto& tensor_proto, bool uint8ToInt8)\n\n\nchar* get_raw_data(const opencv_onnx::TensorProto& tensor_proto, const std::string base_path = \"\"){", - "repo_full_name": "opencv/opencv", - "discussion_comments": [ - { - "comment_id": "2151339001", - "repo_full_name": "opencv/opencv", - "pr_number": 27449, - "pr_file": "modules/dnn/src/onnx/onnx_graph_simplifier.cpp", - "discussion_id": "2151339001", - "commented_code": "@@ -1704,29 +1704,63 @@ void simplifySubgraphs(opencv_onnx::GraphProto& net)\n simplifySubgraphs(Ptr(new ONNXGraphWrapper(net)), subgraphs);\n }\n \n-Mat getMatFromTensor(const opencv_onnx::TensorProto& tensor_proto, bool uint8ToInt8)\n+\n+\n+char* get_raw_data(const opencv_onnx::TensorProto& tensor_proto, const std::string base_path = \"\"){", - "comment_created_at": "2025-06-17T05:16:02+00:00", - "comment_author": "vpisarev", - "comment_body": "I'd suggest to modify this function (and probably rename it) to return `Mat` instead of plain pointer. This way we can track memory allocations properly. Currently, I suspect, there are memory leaks.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2053540731", - "pr_number": 27177, - "pr_file": "modules/dnn/src/llm/gguf_parser.cpp", - "created_at": "2025-04-22T07:49:08+00:00", - "commented_code": "#include \"../precomp.hpp\"\n#include \"gguf_parser.hpp\"\n//#include \"gguf_buffer.hpp\"\n#include \n#include \"opencv2/core.hpp\"\n\nnamespace cv { namespace dnn {\nCV__DNN_INLINE_NS_BEGIN\nusing namespace dnn;\n\ntensor_role get_tensor_role(std::string name) {\n if (name == \"attn_norm\") return tensor_role::attn_norm;\n if (name == \"attn_norm_2\") return tensor_role::attn_norm_2;\n if (name == \"attn_qkv\") return tensor_role::attn_qkv;\n if (name == \"attn_q\") return tensor_role::attn_q;\n if (name == \"attn_k\") return tensor_role::attn_k;\n if (name == \"attn_v\") return tensor_role::attn_v;\n if (name == \"attn_output\") return tensor_role::attn_output;\n if (name == \"ffn_norm\") return tensor_role::ffn_norm;\n if (name == \"ffn_up\") return tensor_role::ffn_up;\n if (name == \"ffn_gate\") return tensor_role::ffn_gate;\n if (name == \"ffn_down\") return tensor_role::ffn_down;\n if (name == \"ffn_gate_inp\") return tensor_role::ffn_gate_inp;\n if (name == \"ffn_gate_exp\") return tensor_role::ffn_gate_exp;\n if (name == \"ffn_down_exp\") return tensor_role::ffn_down_exp;\n if (name == \"ffn_up_exp\") return tensor_role::ffn_up_exp;\n throw std::runtime_error(\"Unknown tensor role: \" + name);\n}\n\nMat TensorMetadata::getMat(Ptr tensor_reader) {\n if (type != GGML_TYPE_F32) {\n throw std::runtime_error(\"Unsupported tensor type: \" + std::to_string(type));\n }\n if (dims.size() == 2) \n return tensor_reader->read2DMat(\n GGML_TYPE_F32, dims[0],dims[1], data_offset\n );\n if (dims.size() == 1) \n return tensor_reader->read1DMat(\n GGML_TYPE_F32, dims[0], data_offset\n );\n\n throw std::runtime_error(\n \"Unsupported tensor dimension: \" + std::to_string(dims.size()));\n};\n\nstd::string GGUFParser::get_architecture(){\n return getStringMetadata(\"architecture\");\n};\n\nMatShape BlockMetadata::getInputShape() \n{\n std::vector t = getTensorMetadata( tensor_role::attn_qkv, false, false);\n if (t.size() == 0) {\n throw std::runtime_error(\"No input tensors found\");\n }\n\n MatShape inputShape(3);\n inputShape[0] = 1; inputShape[1] = -1;\n inputShape[2] = t[0].dims[1];\n return inputShape;\n}\n\nint BlockMetadata::getDhidden() \n{\n std::vector t = getTensorMetadata( tensor_role::attn_qkv, false, false);\n if (t.size() == 0) {\n throw std::runtime_error(\"No input tensors found\");\n }\n return t[0].dims[0] / 3;\n}\n\nMatShape BlockMetadata::getOutputShape() \n{\n std::vector t = getTensorMetadata( tensor_role::attn_qkv, false, false);\n if (t.size() == 0) {\n throw std::runtime_error(\"No input tensors found\");\n }\n\n MatShape outputShape(3);\n outputShape[0] = 1; outputShape[1] = -1;\n outputShape[2] = t[0].dims[1];\n return outputShape;\n}\n\nvoid BlockMetadata::getAttentionLayerParams(Ptr tensor_reader, LayerParams& layerParams) \n{\n layerParams.type = \"Attention\";\n layerParams.name = \"attention_block\";\n layerParams.set(\"num_heads\", n_heads);\n layerParams.set(\"blockn\", blockn);\n int d_hidden = getDhidden();\n std::vector qkv_hidden_sizes = {d_hidden, d_hidden, d_hidden};\n layerParams.set(\"qkv_hidden_sizes\", DictValue::arrayInt(&qkv_hidden_sizes[0], 3));\n Mat qkv_weight = getTensorMetadata(tensor_role::attn_qkv, false, false)[0].getMat(tensor_reader);\n Mat qkv_bias = getTensorMetadata(tensor_role::attn_qkv, true, false)[0].getMat(tensor_reader);\n layerParams.blobs.push_back(qkv_weight.t());\n layerParams.blobs.push_back(qkv_bias);\n}\n\nstd::vector BlockMetadata::getTensorMetadata(tensor_role role, bool is_bias, bool allow_multiple) {\n std::vector result;\n for (const auto & tensor : tensors) {\n if (tensor.role == role && tensor.is_bias == is_bias) {\n result.push_back(tensor);\n }\n }\n return result;\n if (!allow_multiple && result.size() > 1) {\n throw std::runtime_error(\"Multiple tensors found for role: \" + std::to_string(role));\n }\n return result;\n}\n\nvoid GGUFParser::addTensorMetadata() {\n auto tensor = TensorMetadata();\n\n std::string tensor_name = reader.readString();\n std::regex layerRegex(R\"(^blk\\.(\\d+)\\.([a-zA-Z0-9_]+)\\.(weight|bias)$)\");\n std::smatch match;\n std::string layerName, paramType;\n\n int blockn;\n\n if (std::regex_match(tensor_name, match, layerRegex)) {\n blockn = std::stoi(match[1].str());\n layerName = match[2].str();\n paramType = match[3].str();\n if (paramType == \"weight\") {\n tensor.is_bias = false;\n } else if (paramType == \"bias\") {\n tensor.is_bias = true;\n } else {\n throw std::runtime_error(\"Unknown parameter type: \" + paramType);\n }\n tensor.role = get_tensor_role(layerName);\n tensor.n_block = blockn;\n } else {\n throw std::runtime_error(\"Invalid tensor name format: \" + tensor_name);\n }\n\n int dim_count = reader.readSingleValueInt();\n tensor.dims = MatShape(dim_count);\n std::vector dims;\n for (uint32_t i = 0; i < dim_count; ++i) {\n dims.push_back(reader.readSingleValueInt()); \n }", - "repo_full_name": "opencv/opencv", - "discussion_comments": [ - { - "comment_id": "2053540731", - "repo_full_name": "opencv/opencv", - "pr_number": 27177, - "pr_file": "modules/dnn/src/llm/gguf_parser.cpp", - "discussion_id": "2053540731", - "commented_code": "@@ -0,0 +1,235 @@\n+#include \"../precomp.hpp\"\n+#include \"gguf_parser.hpp\"\n+//#include \"gguf_buffer.hpp\"\n+#include \n+#include \"opencv2/core.hpp\"\n+\n+namespace cv { namespace dnn {\n+CV__DNN_INLINE_NS_BEGIN\n+using namespace dnn;\n+\n+tensor_role get_tensor_role(std::string name) {\n+ if (name == \"attn_norm\") return tensor_role::attn_norm;\n+ if (name == \"attn_norm_2\") return tensor_role::attn_norm_2;\n+ if (name == \"attn_qkv\") return tensor_role::attn_qkv;\n+ if (name == \"attn_q\") return tensor_role::attn_q;\n+ if (name == \"attn_k\") return tensor_role::attn_k;\n+ if (name == \"attn_v\") return tensor_role::attn_v;\n+ if (name == \"attn_output\") return tensor_role::attn_output;\n+ if (name == \"ffn_norm\") return tensor_role::ffn_norm;\n+ if (name == \"ffn_up\") return tensor_role::ffn_up;\n+ if (name == \"ffn_gate\") return tensor_role::ffn_gate;\n+ if (name == \"ffn_down\") return tensor_role::ffn_down;\n+ if (name == \"ffn_gate_inp\") return tensor_role::ffn_gate_inp;\n+ if (name == \"ffn_gate_exp\") return tensor_role::ffn_gate_exp;\n+ if (name == \"ffn_down_exp\") return tensor_role::ffn_down_exp;\n+ if (name == \"ffn_up_exp\") return tensor_role::ffn_up_exp;\n+ throw std::runtime_error(\"Unknown tensor role: \" + name);\n+}\n+\n+Mat TensorMetadata::getMat(Ptr tensor_reader) {\n+ if (type != GGML_TYPE_F32) {\n+ throw std::runtime_error(\"Unsupported tensor type: \" + std::to_string(type));\n+ }\n+ if (dims.size() == 2) \n+ return tensor_reader->read2DMat(\n+ GGML_TYPE_F32, dims[0],dims[1], data_offset\n+ );\n+ if (dims.size() == 1) \n+ return tensor_reader->read1DMat(\n+ GGML_TYPE_F32, dims[0], data_offset\n+ );\n+\n+ throw std::runtime_error(\n+ \"Unsupported tensor dimension: \" + std::to_string(dims.size()));\n+};\n+\n+std::string GGUFParser::get_architecture(){\n+ return getStringMetadata(\"architecture\");\n+};\n+\n+MatShape BlockMetadata::getInputShape() \n+{\n+ std::vector t = getTensorMetadata( tensor_role::attn_qkv, false, false);\n+ if (t.size() == 0) {\n+ throw std::runtime_error(\"No input tensors found\");\n+ }\n+\n+ MatShape inputShape(3);\n+ inputShape[0] = 1; inputShape[1] = -1;\n+ inputShape[2] = t[0].dims[1];\n+ return inputShape;\n+}\n+\n+int BlockMetadata::getDhidden() \n+{\n+ std::vector t = getTensorMetadata( tensor_role::attn_qkv, false, false);\n+ if (t.size() == 0) {\n+ throw std::runtime_error(\"No input tensors found\");\n+ }\n+ return t[0].dims[0] / 3;\n+}\n+\n+MatShape BlockMetadata::getOutputShape() \n+{\n+ std::vector t = getTensorMetadata( tensor_role::attn_qkv, false, false);\n+ if (t.size() == 0) {\n+ throw std::runtime_error(\"No input tensors found\");\n+ }\n+\n+ MatShape outputShape(3);\n+ outputShape[0] = 1; outputShape[1] = -1;\n+ outputShape[2] = t[0].dims[1];\n+ return outputShape;\n+}\n+\n+void BlockMetadata::getAttentionLayerParams(Ptr tensor_reader, LayerParams& layerParams) \n+{\n+ layerParams.type = \"Attention\";\n+ layerParams.name = \"attention_block\";\n+ layerParams.set(\"num_heads\", n_heads);\n+ layerParams.set(\"blockn\", blockn);\n+ int d_hidden = getDhidden();\n+ std::vector qkv_hidden_sizes = {d_hidden, d_hidden, d_hidden};\n+ layerParams.set(\"qkv_hidden_sizes\", DictValue::arrayInt(&qkv_hidden_sizes[0], 3));\n+ Mat qkv_weight = getTensorMetadata(tensor_role::attn_qkv, false, false)[0].getMat(tensor_reader);\n+ Mat qkv_bias = getTensorMetadata(tensor_role::attn_qkv, true, false)[0].getMat(tensor_reader);\n+ layerParams.blobs.push_back(qkv_weight.t());\n+ layerParams.blobs.push_back(qkv_bias);\n+}\n+\n+std::vector BlockMetadata::getTensorMetadata(tensor_role role, bool is_bias, bool allow_multiple) {\n+ std::vector result;\n+ for (const auto & tensor : tensors) {\n+ if (tensor.role == role && tensor.is_bias == is_bias) {\n+ result.push_back(tensor);\n+ }\n+ }\n+ return result;\n+ if (!allow_multiple && result.size() > 1) {\n+ throw std::runtime_error(\"Multiple tensors found for role: \" + std::to_string(role));\n+ }\n+ return result;\n+}\n+\n+void GGUFParser::addTensorMetadata() {\n+ auto tensor = TensorMetadata();\n+\n+ std::string tensor_name = reader.readString();\n+ std::regex layerRegex(R\"(^blk\\.(\\d+)\\.([a-zA-Z0-9_]+)\\.(weight|bias)$)\");\n+ std::smatch match;\n+ std::string layerName, paramType;\n+\n+ int blockn;\n+\n+ if (std::regex_match(tensor_name, match, layerRegex)) {\n+ blockn = std::stoi(match[1].str());\n+ layerName = match[2].str();\n+ paramType = match[3].str();\n+ if (paramType == \"weight\") {\n+ tensor.is_bias = false;\n+ } else if (paramType == \"bias\") {\n+ tensor.is_bias = true;\n+ } else {\n+ throw std::runtime_error(\"Unknown parameter type: \" + paramType);\n+ }\n+ tensor.role = get_tensor_role(layerName);\n+ tensor.n_block = blockn;\n+ } else {\n+ throw std::runtime_error(\"Invalid tensor name format: \" + tensor_name);\n+ }\n+\n+ int dim_count = reader.readSingleValueInt();\n+ tensor.dims = MatShape(dim_count);\n+ std::vector dims;\n+ for (uint32_t i = 0; i < dim_count; ++i) {\n+ dims.push_back(reader.readSingleValueInt()); \n+ }", - "comment_created_at": "2025-04-22T07:49:08+00:00", - "comment_author": "asmorkalov", - "comment_body": "It's more efficient to use std::vector dims(dim_count); and assign values, rather than call push_back and trigger reallocations.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1930098040", - "pr_number": 26844, - "pr_file": "modules/imgcodecs/src/grfmt_jpegxl.cpp", - "created_at": "2025-01-27T07:51:19+00:00", - "commented_code": "bool JpegXLDecoder::readHeader()\n{\n // Open file\n if (!m_f) {\n m_f.reset(fopen(m_filename.c_str(), \"rb\"));\n if (!m_f)\n return false;\n if (m_buf.empty()) {\n // Open file\n if (!m_f) {\n m_f.reset(fopen(m_filename.c_str(), \"rb\"));\n if (!m_f) {\n return false;\n }\n }\n // Create buffer for reading file\n constexpr size_t read_buffer_size = 16384; // 16KB chunks\n if (m_read_buffer.capacity() < read_buffer_size) {\n m_read_buffer.resize(read_buffer_size);\n }", - "repo_full_name": "opencv/opencv", - "discussion_comments": [ - { - "comment_id": "1930098040", - "repo_full_name": "opencv/opencv", - "pr_number": 26844, - "pr_file": "modules/imgcodecs/src/grfmt_jpegxl.cpp", - "discussion_id": "1930098040", - "commented_code": "@@ -91,11 +93,19 @@ ImageDecoder JpegXLDecoder::newDecoder() const\n \n bool JpegXLDecoder::readHeader()\n {\n- // Open file\n- if (!m_f) {\n- m_f.reset(fopen(m_filename.c_str(), \"rb\"));\n- if (!m_f)\n- return false;\n+ if (m_buf.empty()) {\n+ // Open file\n+ if (!m_f) {\n+ m_f.reset(fopen(m_filename.c_str(), \"rb\"));\n+ if (!m_f) {\n+ return false;\n+ }\n+ }\n+ // Create buffer for reading file\n+ constexpr size_t read_buffer_size = 16384; // 16KB chunks\n+ if (m_read_buffer.capacity() < read_buffer_size) {\n+ m_read_buffer.resize(read_buffer_size);\n+ }", - "comment_created_at": "2025-01-27T07:51:19+00:00", - "comment_author": "asmorkalov", - "comment_body": "If I understood correctly, The m_read_buffer is resized only once here ant it's size is always 16k. The `m_read_buffer` is used in ::read() only. M.b. it's better to make it local variable there. If you need the buffer shared between readHeader and readData then it makes sense to initialize the buffer in decoder constructor.", - "pr_file_module": null - }, - { - "comment_id": "1930290856", - "repo_full_name": "opencv/opencv", - "pr_number": 26844, - "pr_file": "modules/imgcodecs/src/grfmt_jpegxl.cpp", - "discussion_id": "1930098040", - "commented_code": "@@ -91,11 +93,19 @@ ImageDecoder JpegXLDecoder::newDecoder() const\n \n bool JpegXLDecoder::readHeader()\n {\n- // Open file\n- if (!m_f) {\n- m_f.reset(fopen(m_filename.c_str(), \"rb\"));\n- if (!m_f)\n- return false;\n+ if (m_buf.empty()) {\n+ // Open file\n+ if (!m_f) {\n+ m_f.reset(fopen(m_filename.c_str(), \"rb\"));\n+ if (!m_f) {\n+ return false;\n+ }\n+ }\n+ // Create buffer for reading file\n+ constexpr size_t read_buffer_size = 16384; // 16KB chunks\n+ if (m_read_buffer.capacity() < read_buffer_size) {\n+ m_read_buffer.resize(read_buffer_size);\n+ }", - "comment_created_at": "2025-01-27T10:22:45+00:00", - "comment_author": "Kumataro", - "comment_body": "Thank you for your comment, `m_read_buffer` should be kept accrossing 'readHeader()' and 'readData()'. So I will move it to constructor.\r\n\r\n```\r\nJXLDecoder::JxlDecoder()\r\n m_decoder = nullptr;\r\n\r\nJXLDecoder::readHeader()\r\n m_decoder = JxlDecoderMake(nullptr);\r\n\r\n read()\r\n JxlDecoderReleaseInput() // no effects because no input buffer is set\r\n JxlDecoderSetInput() ---- Input Buffer is locked.\r\n |\r\nJXLDecoder::readData() |\r\n read() |\r\n JxlDecoderReleaseInput()- Input Buffer is unlocked.\r\n JxlDecoderSetInput() ---- Input Buffer is locked.\r\n |\r\nJXLDecoder::~JxlDecoder() |\r\n close() |\r\n m_decoder.reset() ------- Input buffer is unlocked\r\n destory m_decoder\r\n```\r\n", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1890435483", - "pr_number": 26619, - "pr_file": "3rdparty/fastcv/src/fastcv_hal_imgproc.cpp", - "created_at": "2024-12-18T15:31:56+00:00", - "commented_code": "CV_HAL_RETURN_NOT_IMPLEMENTED(cv::format(\"Src type:%s is not supported\", cv::typeToString(src_type).c_str()));\n\n CV_HAL_RETURN(status, hal_warpPerspective);\n}\n\nclass FcvPyrLoop_Invoker : public cv::ParallelLoopBody\n{\npublic:\n\n FcvPyrLoop_Invoker(cv::Mat src_, int width_, int height_, cv::Mat dst_, int bdr_, int knl_, int stripeHeight_, int nStripes_) :\n cv::ParallelLoopBody(), src(src_), width(width_), height(height_), dst(dst_), bdr(bdr_), knl(knl_), stripeHeight(stripeHeight_), nStripes(nStripes_)\n {\n }\n\n virtual void operator()(const cv::Range& range) const CV_OVERRIDE\n {\n int height_ = stripeHeight * (range.end - range.start);\n int width_ = width;\n cv::Mat src_;\n int n = knl/2;\n\n if(range.end == nStripes)\n height_ += (height - range.end * stripeHeight);\n\n src_ = cv::Mat(height_ + 2*n, width_ + 2*n, CV_8U);\n\n if(range.start == 0 && range.end == nStripes)\n cv::copyMakeBorder(src(cv::Rect(0, 0, width, height)), src_, n, n, n, n, bdr);\n else if(range.start == 0)\n cv::copyMakeBorder(src(cv::Rect(0, 0, width_, height_ + n)), src_, n, 0, n, n, bdr);\n else if(range.end == nStripes)\n cv::copyMakeBorder(src(cv::Rect(0, range.start * stripeHeight - n, width_, height_ + n)), src_, 0, n, n, n, bdr);\n else\n cv::copyMakeBorder(src(cv::Rect(0, range.start * stripeHeight - n, width_, height_ + 2*n)), src_, 0, 0, n, n, bdr);\n\n int dstHeight_, dstWidth_, origDstHeight_, origDstWidth_;\n dstHeight_ = (height_ + 2*n + 1)/2;\n dstWidth_ = (width_ + 2*n + 1)/2;\n origDstHeight_ = (height_ + 1)/2;\n origDstWidth_ = (width_ + 1)/2;\n\n cv::Mat dst_padded = cv::Mat(dstHeight_, dstWidth_, CV_8U);\n\n fcvPyramidLevel_v2 *framePyr = new fcvPyramidLevel_v2[2];", - "repo_full_name": "opencv/opencv", - "discussion_comments": [ - { - "comment_id": "1890435483", - "repo_full_name": "opencv/opencv", - "pr_number": 26619, - "pr_file": "3rdparty/fastcv/src/fastcv_hal_imgproc.cpp", - "discussion_id": "1890435483", - "commented_code": "@@ -743,4 +743,253 @@ int fastcv_hal_warpPerspective(\n CV_HAL_RETURN_NOT_IMPLEMENTED(cv::format(\"Src type:%s is not supported\", cv::typeToString(src_type).c_str()));\n \n CV_HAL_RETURN(status, hal_warpPerspective);\n+}\n+\n+class FcvPyrLoop_Invoker : public cv::ParallelLoopBody\n+{\n+public:\n+\n+ FcvPyrLoop_Invoker(cv::Mat src_, int width_, int height_, cv::Mat dst_, int bdr_, int knl_, int stripeHeight_, int nStripes_) :\n+ cv::ParallelLoopBody(), src(src_), width(width_), height(height_), dst(dst_), bdr(bdr_), knl(knl_), stripeHeight(stripeHeight_), nStripes(nStripes_)\n+ {\n+ }\n+\n+ virtual void operator()(const cv::Range& range) const CV_OVERRIDE\n+ {\n+ int height_ = stripeHeight * (range.end - range.start);\n+ int width_ = width;\n+ cv::Mat src_;\n+ int n = knl/2;\n+\n+ if(range.end == nStripes)\n+ height_ += (height - range.end * stripeHeight);\n+\n+ src_ = cv::Mat(height_ + 2*n, width_ + 2*n, CV_8U);\n+\n+ if(range.start == 0 && range.end == nStripes)\n+ cv::copyMakeBorder(src(cv::Rect(0, 0, width, height)), src_, n, n, n, n, bdr);\n+ else if(range.start == 0)\n+ cv::copyMakeBorder(src(cv::Rect(0, 0, width_, height_ + n)), src_, n, 0, n, n, bdr);\n+ else if(range.end == nStripes)\n+ cv::copyMakeBorder(src(cv::Rect(0, range.start * stripeHeight - n, width_, height_ + n)), src_, 0, n, n, n, bdr);\n+ else\n+ cv::copyMakeBorder(src(cv::Rect(0, range.start * stripeHeight - n, width_, height_ + 2*n)), src_, 0, 0, n, n, bdr);\n+\n+ int dstHeight_, dstWidth_, origDstHeight_, origDstWidth_;\n+ dstHeight_ = (height_ + 2*n + 1)/2;\n+ dstWidth_ = (width_ + 2*n + 1)/2;\n+ origDstHeight_ = (height_ + 1)/2;\n+ origDstWidth_ = (width_ + 1)/2;\n+\n+ cv::Mat dst_padded = cv::Mat(dstHeight_, dstWidth_, CV_8U);\n+\n+ fcvPyramidLevel_v2 *framePyr = new fcvPyramidLevel_v2[2];", - "comment_created_at": "2024-12-18T15:31:56+00:00", - "comment_author": "asmorkalov", - "comment_body": "I propose to use local variable on stack. new is redundant here. Also it's not deleted afterwards.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1890437308", - "pr_number": 26619, - "pr_file": "3rdparty/fastcv/src/fastcv_hal_imgproc.cpp", - "created_at": "2024-12-18T15:33:11+00:00", - "commented_code": "CV_HAL_RETURN_NOT_IMPLEMENTED(cv::format(\"Src type:%s is not supported\", cv::typeToString(src_type).c_str()));\n\n CV_HAL_RETURN(status, hal_warpPerspective);\n}\n\nclass FcvPyrLoop_Invoker : public cv::ParallelLoopBody\n{\npublic:\n\n FcvPyrLoop_Invoker(cv::Mat src_, int width_, int height_, cv::Mat dst_, int bdr_, int knl_, int stripeHeight_, int nStripes_) :\n cv::ParallelLoopBody(), src(src_), width(width_), height(height_), dst(dst_), bdr(bdr_), knl(knl_), stripeHeight(stripeHeight_), nStripes(nStripes_)\n {\n }\n\n virtual void operator()(const cv::Range& range) const CV_OVERRIDE\n {\n int height_ = stripeHeight * (range.end - range.start);\n int width_ = width;\n cv::Mat src_;\n int n = knl/2;\n\n if(range.end == nStripes)\n height_ += (height - range.end * stripeHeight);\n\n src_ = cv::Mat(height_ + 2*n, width_ + 2*n, CV_8U);\n\n if(range.start == 0 && range.end == nStripes)\n cv::copyMakeBorder(src(cv::Rect(0, 0, width, height)), src_, n, n, n, n, bdr);\n else if(range.start == 0)\n cv::copyMakeBorder(src(cv::Rect(0, 0, width_, height_ + n)), src_, n, 0, n, n, bdr);\n else if(range.end == nStripes)\n cv::copyMakeBorder(src(cv::Rect(0, range.start * stripeHeight - n, width_, height_ + n)), src_, 0, n, n, n, bdr);\n else\n cv::copyMakeBorder(src(cv::Rect(0, range.start * stripeHeight - n, width_, height_ + 2*n)), src_, 0, 0, n, n, bdr);\n\n int dstHeight_, dstWidth_, origDstHeight_, origDstWidth_;\n dstHeight_ = (height_ + 2*n + 1)/2;\n dstWidth_ = (width_ + 2*n + 1)/2;\n origDstHeight_ = (height_ + 1)/2;\n origDstWidth_ = (width_ + 1)/2;\n\n cv::Mat dst_padded = cv::Mat(dstHeight_, dstWidth_, CV_8U);\n\n fcvPyramidLevel_v2 *framePyr = new fcvPyramidLevel_v2[2];\n framePyr[0].ptr = NULL;\n framePyr[1].ptr = dst_padded.data;\n framePyr[1].stride = dstWidth_;\n\n fcvPyramidCreateu8_v4(src_.data, width_ + 2*n, height_ + 2*n,\n width_ + 2*n, 2, FASTCV_PYRAMID_SCALE_HALF,\n framePyr, FASTCV_BORDER_UNDEFINED, 0);\n\n int start_val = stripeHeight * range.start;\n cv::Mat dst_temp1 = dst_padded(cv::Rect(n/2, n/2, origDstWidth_, origDstHeight_));\n cv::Mat dst_temp2 = dst(cv::Rect(0, start_val/2, origDstWidth_, origDstHeight_));\n dst_temp1.copyTo(dst_temp2);\n }\n\nprivate:\n cv::Mat src;\n const int width;\n const int height;\n cv::Mat dst;\n const int bdr;\n const int knl;\n const int stripeHeight;\n const int nStripes;\n\n FcvPyrLoop_Invoker(const FcvPyrLoop_Invoker &); // = delete;\n const FcvPyrLoop_Invoker& operator= (const FcvPyrLoop_Invoker &); // = delete;\n};\n\nint fastcv_hal_pyrdown(\n const uchar* src_data,\n size_t src_step,\n int src_width,\n int src_height,\n uchar* dst_data,\n size_t dst_step,\n int dst_width,\n int dst_height,\n int depth,\n int cn,\n int border_type)\n{\n if(depth != CV_8U || cn!= 1)\n {\n CV_HAL_RETURN_NOT_IMPLEMENTED(\"src type not supported\");\n }\n\n int dstW = (src_width & 1) == 1 ? ((src_width + 1) >> 1) : ((src_width) >> 1);\n int dstH = (src_height & 1) == 1 ? ((src_height + 1) >> 1) : ((src_height) >> 1);\n\n if((dstW > dst_width) || (dstH > dst_height))\n {\n CV_HAL_RETURN_NOT_IMPLEMENTED(\"dst size needs to be atleast half of the src size\");\n }\n\n INITIALIZATION_CHECK;\n\n fcvBorderType bdr;\n uint8_t bVal = 0;\n int nThreads = cv::getNumThreads();\n if(nThreads <= 1)\n {\n switch(border_type)\n {\n case cv::BORDER_REPLICATE:\n bdr = FASTCV_BORDER_REPLICATE;\n break;\n case cv::BORDER_REFLECT:\n bdr = FASTCV_BORDER_REFLECT;\n break;\n case cv::BORDER_REFLECT101: // cv::BORDER_REFLECT_101, BORDER_DEFAULT\n bdr = FASTCV_BORDER_REFLECT_V2;\n break;\n default:\n CV_HAL_RETURN_NOT_IMPLEMENTED(\"border type not supported\");\n }\n\n fcvPyramidLevel_v2 *frame1Pyr = new fcvPyramidLevel_v2[2];\n frame1Pyr[0].ptr = NULL;\n frame1Pyr[1].ptr = dst_data;\n frame1Pyr[1].stride = dst_step;", - "repo_full_name": "opencv/opencv", - "discussion_comments": [ - { - "comment_id": "1890437308", - "repo_full_name": "opencv/opencv", - "pr_number": 26619, - "pr_file": "3rdparty/fastcv/src/fastcv_hal_imgproc.cpp", - "discussion_id": "1890437308", - "commented_code": "@@ -743,4 +743,253 @@ int fastcv_hal_warpPerspective(\n CV_HAL_RETURN_NOT_IMPLEMENTED(cv::format(\"Src type:%s is not supported\", cv::typeToString(src_type).c_str()));\n \n CV_HAL_RETURN(status, hal_warpPerspective);\n+}\n+\n+class FcvPyrLoop_Invoker : public cv::ParallelLoopBody\n+{\n+public:\n+\n+ FcvPyrLoop_Invoker(cv::Mat src_, int width_, int height_, cv::Mat dst_, int bdr_, int knl_, int stripeHeight_, int nStripes_) :\n+ cv::ParallelLoopBody(), src(src_), width(width_), height(height_), dst(dst_), bdr(bdr_), knl(knl_), stripeHeight(stripeHeight_), nStripes(nStripes_)\n+ {\n+ }\n+\n+ virtual void operator()(const cv::Range& range) const CV_OVERRIDE\n+ {\n+ int height_ = stripeHeight * (range.end - range.start);\n+ int width_ = width;\n+ cv::Mat src_;\n+ int n = knl/2;\n+\n+ if(range.end == nStripes)\n+ height_ += (height - range.end * stripeHeight);\n+\n+ src_ = cv::Mat(height_ + 2*n, width_ + 2*n, CV_8U);\n+\n+ if(range.start == 0 && range.end == nStripes)\n+ cv::copyMakeBorder(src(cv::Rect(0, 0, width, height)), src_, n, n, n, n, bdr);\n+ else if(range.start == 0)\n+ cv::copyMakeBorder(src(cv::Rect(0, 0, width_, height_ + n)), src_, n, 0, n, n, bdr);\n+ else if(range.end == nStripes)\n+ cv::copyMakeBorder(src(cv::Rect(0, range.start * stripeHeight - n, width_, height_ + n)), src_, 0, n, n, n, bdr);\n+ else\n+ cv::copyMakeBorder(src(cv::Rect(0, range.start * stripeHeight - n, width_, height_ + 2*n)), src_, 0, 0, n, n, bdr);\n+\n+ int dstHeight_, dstWidth_, origDstHeight_, origDstWidth_;\n+ dstHeight_ = (height_ + 2*n + 1)/2;\n+ dstWidth_ = (width_ + 2*n + 1)/2;\n+ origDstHeight_ = (height_ + 1)/2;\n+ origDstWidth_ = (width_ + 1)/2;\n+\n+ cv::Mat dst_padded = cv::Mat(dstHeight_, dstWidth_, CV_8U);\n+\n+ fcvPyramidLevel_v2 *framePyr = new fcvPyramidLevel_v2[2];\n+ framePyr[0].ptr = NULL;\n+ framePyr[1].ptr = dst_padded.data;\n+ framePyr[1].stride = dstWidth_;\n+\n+ fcvPyramidCreateu8_v4(src_.data, width_ + 2*n, height_ + 2*n,\n+ width_ + 2*n, 2, FASTCV_PYRAMID_SCALE_HALF,\n+ framePyr, FASTCV_BORDER_UNDEFINED, 0);\n+\n+ int start_val = stripeHeight * range.start;\n+ cv::Mat dst_temp1 = dst_padded(cv::Rect(n/2, n/2, origDstWidth_, origDstHeight_));\n+ cv::Mat dst_temp2 = dst(cv::Rect(0, start_val/2, origDstWidth_, origDstHeight_));\n+ dst_temp1.copyTo(dst_temp2);\n+ }\n+\n+private:\n+ cv::Mat src;\n+ const int width;\n+ const int height;\n+ cv::Mat dst;\n+ const int bdr;\n+ const int knl;\n+ const int stripeHeight;\n+ const int nStripes;\n+\n+ FcvPyrLoop_Invoker(const FcvPyrLoop_Invoker &); // = delete;\n+ const FcvPyrLoop_Invoker& operator= (const FcvPyrLoop_Invoker &); // = delete;\n+};\n+\n+int fastcv_hal_pyrdown(\n+ const uchar* src_data,\n+ size_t src_step,\n+ int src_width,\n+ int src_height,\n+ uchar* dst_data,\n+ size_t dst_step,\n+ int dst_width,\n+ int dst_height,\n+ int depth,\n+ int cn,\n+ int border_type)\n+{\n+ if(depth != CV_8U || cn!= 1)\n+ {\n+ CV_HAL_RETURN_NOT_IMPLEMENTED(\"src type not supported\");\n+ }\n+\n+ int dstW = (src_width & 1) == 1 ? ((src_width + 1) >> 1) : ((src_width) >> 1);\n+ int dstH = (src_height & 1) == 1 ? ((src_height + 1) >> 1) : ((src_height) >> 1);\n+\n+ if((dstW > dst_width) || (dstH > dst_height))\n+ {\n+ CV_HAL_RETURN_NOT_IMPLEMENTED(\"dst size needs to be atleast half of the src size\");\n+ }\n+\n+ INITIALIZATION_CHECK;\n+\n+ fcvBorderType bdr;\n+ uint8_t bVal = 0;\n+ int nThreads = cv::getNumThreads();\n+ if(nThreads <= 1)\n+ {\n+ switch(border_type)\n+ {\n+ case cv::BORDER_REPLICATE:\n+ bdr = FASTCV_BORDER_REPLICATE;\n+ break;\n+ case cv::BORDER_REFLECT:\n+ bdr = FASTCV_BORDER_REFLECT;\n+ break;\n+ case cv::BORDER_REFLECT101: // cv::BORDER_REFLECT_101, BORDER_DEFAULT\n+ bdr = FASTCV_BORDER_REFLECT_V2;\n+ break;\n+ default:\n+ CV_HAL_RETURN_NOT_IMPLEMENTED(\"border type not supported\");\n+ }\n+\n+ fcvPyramidLevel_v2 *frame1Pyr = new fcvPyramidLevel_v2[2];\n+ frame1Pyr[0].ptr = NULL;\n+ frame1Pyr[1].ptr = dst_data;\n+ frame1Pyr[1].stride = dst_step;", - "comment_created_at": "2024-12-18T15:33:11+00:00", - "comment_author": "asmorkalov", - "comment_body": "I propose to use local variable on stack. new is redundant here. Also it's not deleted afterwards.", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/opencv-prevent-null-vulnerabilities.json b/_reviewers/opencv-prevent-null-vulnerabilities.json new file mode 100644 index 0000000..be9f7a1 --- /dev/null +++ b/_reviewers/opencv-prevent-null-vulnerabilities.json @@ -0,0 +1,206 @@ +[ + { + "discussion_id": "1632891724", + "pr_number": 25691, + "pr_file": "modules/imgcodecs/src/grfmt_gif.cpp", + "created_at": "2024-06-10T09:10:33+00:00", + "commented_code": "// This file is part of OpenCV project.\n// It is subject to the license terms in the LICENSE file found in the top-level\n// directory of this distribution and at http://opencv.org/license.html\n\n#include \"precomp.hpp\"\n#include \"grfmt_gif.hpp\"\n\nnamespace cv\n{\n//////////////////////////////////////////////////////////////////////\n//// GIF Decoder ////\n//////////////////////////////////////////////////////////////////////\nGifDecoder::GifDecoder() {\n m_signature = R\"(GIF)\";\n m_type = CV_8UC3;\n bgColor = -1;\n m_buf_supported = true;\n globalColorTableSize = 0;\n localColorTableSize = 0;\n lzwMinCodeSize = 0;\n hasRead = false;\n hasTransparentColor = false;\n transparentColor = 0;\n opMode = GRFMT_GIF_Nothing;\n top = 0, left = 0, width = 0, height = 0;\n depth = 8;\n}\n\nGifDecoder::~GifDecoder() {\n close();\n}\n\nbool GifDecoder::readHeader() {\n if (!m_buf.empty()) {\n if (!m_strm.open(m_buf)) {\n return false;\n }\n } else if (!m_strm.open(m_filename)) {\n return false;\n }\n\n try {\n m_strm.skip(6);// Skip the signature: GIF87a or GIF89a\n\n // #1: read logical screen descriptor\n m_width = m_strm.getWord();\n m_height = m_strm.getWord();\n CV_Assert(m_width > 0 && m_height > 0);\n\n char flags = (char)m_strm.getByte();\n\n // the background color -> index in the global color table, valid only if the global color table is present\n bgColor = m_strm.getByte();\n m_strm.skip(1); // Skip the aspect ratio\n\n // #2: read global color table\n depth = ((flags & 0x70) >> 4) + 1;\n if (flags & 0x80) {\n globalColorTableSize = 1 << ((flags & 0x07) + 1);\n globalColorTable.allocate(3 * globalColorTableSize);\n for (int i = 0; i < 3 * globalColorTableSize; i++) {\n globalColorTable[i] = (uchar)m_strm.getByte();\n }\n }\n } catch (...) {\n throw;\n }\n\n hasRead = false;\n return true;\n}\n\nbool GifDecoder::readData(Mat &img) {\n if (hasRead) {\n lastImage.copyTo(img);\n return true;\n }\n\n readExtensions();\n // Image separator\n CV_Assert(!(m_strm.getByte()^0x2C));\n left = m_strm.getWord();\n top = m_strm.getWord();\n width = m_strm.getWord();\n height = m_strm.getWord();\n CV_Assert(width > 0 && height > 0 && left + width <= m_width && top + height <= m_height);\n\n currentImageCodeStream.allocate(width * height);\n Mat img_;\n\n switch (opMode) {\n case GifOpMode::GRFMT_GIF_PreviousImage:\n if (lastImage.empty()){\n img_ = Mat(m_height, m_width, CV_8UC3, Scalar(0, 0, 0));\n } else {\n img_ = lastImage;\n }\n break;\n case GifOpMode::GRFMT_GIF_Background:\n // background color is valid iff global color table exists\n CV_Assert(globalColorTableSize > 0);\n img_ = Mat(m_height, m_width, CV_8UC3,\n Scalar(globalColorTable[bgColor * 3 + 2],\n globalColorTable[bgColor * 3 + 1],\n globalColorTable[bgColor * 3]));\n break;\n case GifOpMode::GRFMT_GIF_Nothing:\n case GifOpMode::GRFMT_GIF_Cover:\n // default value\n img_ = Mat(m_height, m_width, CV_8UC3, Scalar(0, 0, 0));\n break;\n default:\n CV_Assert(false);\n }\n lastImage.release();\n\n auto flags = (uchar)m_strm.getByte();\n localColorTableSize = 0;\n if (flags & 0x80) {\n // local color table\n localColorTableSize = 1 << ((flags & 0x07) + 1);\n localColorTable.allocate(3 * localColorTableSize);\n for (int i = 0; i < 3 * localColorTableSize; i++) {\n localColorTable[i] = (uchar)m_strm.getByte();\n }\n }\n\n // the case that neither global nor local color table exists is not defined in the GIF standard (but allowed)\n if (!(globalColorTableSize || localColorTableSize)) {\n // go through the length of unused data.\n m_strm.skip(1);\n int len = m_strm.getByte();\n while (len) {\n m_strm.skip(len);\n len = m_strm.getByte();\n }\n\n lastImage = img_;\n if (!img.empty())\n img_.copyTo(img);\n\n // release the memory\n img_.release();\n return true;\n }\n\n // lzw decompression to get the code stream\n hasRead = lzwDecode();\n\n // convert code stream into pixels on the image\n if (hasRead) {\n if (!(flags & 0x40)) {\n // no interlace, simply convert the code stream into pixels from top to down\n code2pixel(img_, 0, 1);\n } else {\n // consider the interlace mode, the image will be rendered in four separate passes\n code2pixel(img_, 0, 8);\n code2pixel(img_, 4, 8);\n code2pixel(img_, 2, 4);\n code2pixel(img_, 1, 2);\n }\n }\n\n lastImage = img_;\n if (!img.empty())\n img_.copyTo(img);\n\n // release the memory\n img_.release();\n\n return hasRead;\n}\n\nbool GifDecoder::nextPage() {\n if (hasRead) {\n hasRead = false;\n // end of a gif file\n if(!(m_strm.getByte() ^ 0x3B)) return false;\n m_strm.setPos(m_strm.getPos() - 1);\n return true;\n } else {\n bool success;\n try {\n Mat emptyImg;\n success = readData(emptyImg);\n emptyImg.release();\n } catch(...) {\n return false;\n }\n return success;\n }\n}\n\nvoid GifDecoder::readExtensions() {\n uchar len;\n while (!(m_strm.getByte() ^ 0x21)) {\n uchar extensionType = (uchar)m_strm.getByte();\n\n // read graphic control extension\n // the scope of this extension is the next image or plain text extension\n hasTransparentColor = false;\n opMode = GifOpMode::GRFMT_GIF_Nothing;// default value\n if (!(extensionType ^ 0xF9)) {\n len = (uchar)m_strm.getByte();\n CV_Assert(len == 4);\n uchar flags = (uchar)m_strm.getByte();\n m_strm.getWord(); // delay time, not used\n opMode = (GifOpMode)((flags & 0x1C) >> 2);\n hasTransparentColor = flags & 0x01;\n transparentColor = (uchar)m_strm.getByte();\n }\n\n // skip other kinds of extensions\n len = (uchar)m_strm.getByte();\n while (len) {\n m_strm.skip(len);\n len = (uchar)m_strm.getByte();\n }\n }\n // roll back to the block identifier\n m_strm.setPos(m_strm.getPos() - 1);\n}\n\nvoid GifDecoder::code2pixel(Mat& img, int start, int k){\n for (int i = start; i < height; i+=k) {\n for (int j = 0; j < width; j++) {\n int idx = i * width + j;\n int colorIdx = currentImageCodeStream[idx];\n if (hasTransparentColor && colorIdx == transparentColor) {\n continue;\n }\n if (colorIdx < localColorTableSize) {\n img.at(top + i, left + j)[0] = localColorTable[colorIdx * 3 + 2]; //B\n img.at(top + i, left + j)[1] = localColorTable[colorIdx * 3 + 1]; //G\n img.at(top + i, left + j)[2] = localColorTable[colorIdx * 3]; //R\n } else if (colorIdx < globalColorTableSize) {\n img.at(top + i, left + j)[0] = globalColorTable[colorIdx * 3 + 2]; //B\n img.at(top + i, left + j)[1] = globalColorTable[colorIdx * 3 + 1]; //G\n img.at(top + i, left + j)[2] = globalColorTable[colorIdx * 3]; //R\n } else {\n CV_Assert(false);\n }\n }\n }\n}\n\nvoid GifDecoder::deleteLzwExtraTablePrefix(lzwNodeD* lzwExtraTable, int lzwTableSize) const{\n for (int i = (1 << lzwMinCodeSize) + 2; i <= lzwTableSize; i++) {\n if (lzwExtraTable[i].prefix) {\n delete[] lzwExtraTable[i].prefix;\n lzwExtraTable[i].prefix = nullptr;\n }\n }\n}\n\nbool GifDecoder::lzwDecode() {\n // initialization\n lzwMinCodeSize = m_strm.getByte();\n int lzwCodeSize = lzwMinCodeSize + 1;\n int clearCode = 1 << lzwMinCodeSize;\n int exitCode = clearCode + 1;\n CV_Assert(lzwCodeSize > 2 && lzwCodeSize <= 12);\n auto* lzwExtraTable = new lzwNodeD[(1 << 12) + 1];", + "repo_full_name": "opencv/opencv", + "discussion_comments": [ + { + "comment_id": "1632891724", + "repo_full_name": "opencv/opencv", + "pr_number": 25691, + "pr_file": "modules/imgcodecs/src/grfmt_gif.cpp", + "discussion_id": "1632891724", + "commented_code": "@@ -0,0 +1,372 @@\n+// This file is part of OpenCV project.\n+// It is subject to the license terms in the LICENSE file found in the top-level\n+// directory of this distribution and at http://opencv.org/license.html\n+\n+#include \"precomp.hpp\"\n+#include \"grfmt_gif.hpp\"\n+\n+namespace cv\n+{\n+//////////////////////////////////////////////////////////////////////\n+//// GIF Decoder ////\n+//////////////////////////////////////////////////////////////////////\n+GifDecoder::GifDecoder() {\n+ m_signature = R\"(GIF)\";\n+ m_type = CV_8UC3;\n+ bgColor = -1;\n+ m_buf_supported = true;\n+ globalColorTableSize = 0;\n+ localColorTableSize = 0;\n+ lzwMinCodeSize = 0;\n+ hasRead = false;\n+ hasTransparentColor = false;\n+ transparentColor = 0;\n+ opMode = GRFMT_GIF_Nothing;\n+ top = 0, left = 0, width = 0, height = 0;\n+ depth = 8;\n+}\n+\n+GifDecoder::~GifDecoder() {\n+ close();\n+}\n+\n+bool GifDecoder::readHeader() {\n+ if (!m_buf.empty()) {\n+ if (!m_strm.open(m_buf)) {\n+ return false;\n+ }\n+ } else if (!m_strm.open(m_filename)) {\n+ return false;\n+ }\n+\n+ try {\n+ m_strm.skip(6);// Skip the signature: GIF87a or GIF89a\n+\n+ // #1: read logical screen descriptor\n+ m_width = m_strm.getWord();\n+ m_height = m_strm.getWord();\n+ CV_Assert(m_width > 0 && m_height > 0);\n+\n+ char flags = (char)m_strm.getByte();\n+\n+ // the background color -> index in the global color table, valid only if the global color table is present\n+ bgColor = m_strm.getByte();\n+ m_strm.skip(1); // Skip the aspect ratio\n+\n+ // #2: read global color table\n+ depth = ((flags & 0x70) >> 4) + 1;\n+ if (flags & 0x80) {\n+ globalColorTableSize = 1 << ((flags & 0x07) + 1);\n+ globalColorTable.allocate(3 * globalColorTableSize);\n+ for (int i = 0; i < 3 * globalColorTableSize; i++) {\n+ globalColorTable[i] = (uchar)m_strm.getByte();\n+ }\n+ }\n+ } catch (...) {\n+ throw;\n+ }\n+\n+ hasRead = false;\n+ return true;\n+}\n+\n+bool GifDecoder::readData(Mat &img) {\n+ if (hasRead) {\n+ lastImage.copyTo(img);\n+ return true;\n+ }\n+\n+ readExtensions();\n+ // Image separator\n+ CV_Assert(!(m_strm.getByte()^0x2C));\n+ left = m_strm.getWord();\n+ top = m_strm.getWord();\n+ width = m_strm.getWord();\n+ height = m_strm.getWord();\n+ CV_Assert(width > 0 && height > 0 && left + width <= m_width && top + height <= m_height);\n+\n+ currentImageCodeStream.allocate(width * height);\n+ Mat img_;\n+\n+ switch (opMode) {\n+ case GifOpMode::GRFMT_GIF_PreviousImage:\n+ if (lastImage.empty()){\n+ img_ = Mat(m_height, m_width, CV_8UC3, Scalar(0, 0, 0));\n+ } else {\n+ img_ = lastImage;\n+ }\n+ break;\n+ case GifOpMode::GRFMT_GIF_Background:\n+ // background color is valid iff global color table exists\n+ CV_Assert(globalColorTableSize > 0);\n+ img_ = Mat(m_height, m_width, CV_8UC3,\n+ Scalar(globalColorTable[bgColor * 3 + 2],\n+ globalColorTable[bgColor * 3 + 1],\n+ globalColorTable[bgColor * 3]));\n+ break;\n+ case GifOpMode::GRFMT_GIF_Nothing:\n+ case GifOpMode::GRFMT_GIF_Cover:\n+ // default value\n+ img_ = Mat(m_height, m_width, CV_8UC3, Scalar(0, 0, 0));\n+ break;\n+ default:\n+ CV_Assert(false);\n+ }\n+ lastImage.release();\n+\n+ auto flags = (uchar)m_strm.getByte();\n+ localColorTableSize = 0;\n+ if (flags & 0x80) {\n+ // local color table\n+ localColorTableSize = 1 << ((flags & 0x07) + 1);\n+ localColorTable.allocate(3 * localColorTableSize);\n+ for (int i = 0; i < 3 * localColorTableSize; i++) {\n+ localColorTable[i] = (uchar)m_strm.getByte();\n+ }\n+ }\n+\n+ // the case that neither global nor local color table exists is not defined in the GIF standard (but allowed)\n+ if (!(globalColorTableSize || localColorTableSize)) {\n+ // go through the length of unused data.\n+ m_strm.skip(1);\n+ int len = m_strm.getByte();\n+ while (len) {\n+ m_strm.skip(len);\n+ len = m_strm.getByte();\n+ }\n+\n+ lastImage = img_;\n+ if (!img.empty())\n+ img_.copyTo(img);\n+\n+ // release the memory\n+ img_.release();\n+ return true;\n+ }\n+\n+ // lzw decompression to get the code stream\n+ hasRead = lzwDecode();\n+\n+ // convert code stream into pixels on the image\n+ if (hasRead) {\n+ if (!(flags & 0x40)) {\n+ // no interlace, simply convert the code stream into pixels from top to down\n+ code2pixel(img_, 0, 1);\n+ } else {\n+ // consider the interlace mode, the image will be rendered in four separate passes\n+ code2pixel(img_, 0, 8);\n+ code2pixel(img_, 4, 8);\n+ code2pixel(img_, 2, 4);\n+ code2pixel(img_, 1, 2);\n+ }\n+ }\n+\n+ lastImage = img_;\n+ if (!img.empty())\n+ img_.copyTo(img);\n+\n+ // release the memory\n+ img_.release();\n+\n+ return hasRead;\n+}\n+\n+bool GifDecoder::nextPage() {\n+ if (hasRead) {\n+ hasRead = false;\n+ // end of a gif file\n+ if(!(m_strm.getByte() ^ 0x3B)) return false;\n+ m_strm.setPos(m_strm.getPos() - 1);\n+ return true;\n+ } else {\n+ bool success;\n+ try {\n+ Mat emptyImg;\n+ success = readData(emptyImg);\n+ emptyImg.release();\n+ } catch(...) {\n+ return false;\n+ }\n+ return success;\n+ }\n+}\n+\n+void GifDecoder::readExtensions() {\n+ uchar len;\n+ while (!(m_strm.getByte() ^ 0x21)) {\n+ uchar extensionType = (uchar)m_strm.getByte();\n+\n+ // read graphic control extension\n+ // the scope of this extension is the next image or plain text extension\n+ hasTransparentColor = false;\n+ opMode = GifOpMode::GRFMT_GIF_Nothing;// default value\n+ if (!(extensionType ^ 0xF9)) {\n+ len = (uchar)m_strm.getByte();\n+ CV_Assert(len == 4);\n+ uchar flags = (uchar)m_strm.getByte();\n+ m_strm.getWord(); // delay time, not used\n+ opMode = (GifOpMode)((flags & 0x1C) >> 2);\n+ hasTransparentColor = flags & 0x01;\n+ transparentColor = (uchar)m_strm.getByte();\n+ }\n+\n+ // skip other kinds of extensions\n+ len = (uchar)m_strm.getByte();\n+ while (len) {\n+ m_strm.skip(len);\n+ len = (uchar)m_strm.getByte();\n+ }\n+ }\n+ // roll back to the block identifier\n+ m_strm.setPos(m_strm.getPos() - 1);\n+}\n+\n+void GifDecoder::code2pixel(Mat& img, int start, int k){\n+ for (int i = start; i < height; i+=k) {\n+ for (int j = 0; j < width; j++) {\n+ int idx = i * width + j;\n+ int colorIdx = currentImageCodeStream[idx];\n+ if (hasTransparentColor && colorIdx == transparentColor) {\n+ continue;\n+ }\n+ if (colorIdx < localColorTableSize) {\n+ img.at(top + i, left + j)[0] = localColorTable[colorIdx * 3 + 2]; //B\n+ img.at(top + i, left + j)[1] = localColorTable[colorIdx * 3 + 1]; //G\n+ img.at(top + i, left + j)[2] = localColorTable[colorIdx * 3]; //R\n+ } else if (colorIdx < globalColorTableSize) {\n+ img.at(top + i, left + j)[0] = globalColorTable[colorIdx * 3 + 2]; //B\n+ img.at(top + i, left + j)[1] = globalColorTable[colorIdx * 3 + 1]; //G\n+ img.at(top + i, left + j)[2] = globalColorTable[colorIdx * 3]; //R\n+ } else {\n+ CV_Assert(false);\n+ }\n+ }\n+ }\n+}\n+\n+void GifDecoder::deleteLzwExtraTablePrefix(lzwNodeD* lzwExtraTable, int lzwTableSize) const{\n+ for (int i = (1 << lzwMinCodeSize) + 2; i <= lzwTableSize; i++) {\n+ if (lzwExtraTable[i].prefix) {\n+ delete[] lzwExtraTable[i].prefix;\n+ lzwExtraTable[i].prefix = nullptr;\n+ }\n+ }\n+}\n+\n+bool GifDecoder::lzwDecode() {\n+ // initialization\n+ lzwMinCodeSize = m_strm.getByte();\n+ int lzwCodeSize = lzwMinCodeSize + 1;\n+ int clearCode = 1 << lzwMinCodeSize;\n+ int exitCode = clearCode + 1;\n+ CV_Assert(lzwCodeSize > 2 && lzwCodeSize <= 12);\n+ auto* lzwExtraTable = new lzwNodeD[(1 << 12) + 1];", + "comment_created_at": "2024-06-10T09:10:33+00:00", + "comment_author": "asmorkalov", + "comment_body": "please use std::vector, std::array or cv::AutoBuffer to prevent memory leaks in cases of parser failrue.", + "pr_file_module": null + }, + { + "comment_id": "1655702685", + "repo_full_name": "opencv/opencv", + "pr_number": 25691, + "pr_file": "modules/imgcodecs/src/grfmt_gif.cpp", + "discussion_id": "1632891724", + "commented_code": "@@ -0,0 +1,372 @@\n+// This file is part of OpenCV project.\n+// It is subject to the license terms in the LICENSE file found in the top-level\n+// directory of this distribution and at http://opencv.org/license.html\n+\n+#include \"precomp.hpp\"\n+#include \"grfmt_gif.hpp\"\n+\n+namespace cv\n+{\n+//////////////////////////////////////////////////////////////////////\n+//// GIF Decoder ////\n+//////////////////////////////////////////////////////////////////////\n+GifDecoder::GifDecoder() {\n+ m_signature = R\"(GIF)\";\n+ m_type = CV_8UC3;\n+ bgColor = -1;\n+ m_buf_supported = true;\n+ globalColorTableSize = 0;\n+ localColorTableSize = 0;\n+ lzwMinCodeSize = 0;\n+ hasRead = false;\n+ hasTransparentColor = false;\n+ transparentColor = 0;\n+ opMode = GRFMT_GIF_Nothing;\n+ top = 0, left = 0, width = 0, height = 0;\n+ depth = 8;\n+}\n+\n+GifDecoder::~GifDecoder() {\n+ close();\n+}\n+\n+bool GifDecoder::readHeader() {\n+ if (!m_buf.empty()) {\n+ if (!m_strm.open(m_buf)) {\n+ return false;\n+ }\n+ } else if (!m_strm.open(m_filename)) {\n+ return false;\n+ }\n+\n+ try {\n+ m_strm.skip(6);// Skip the signature: GIF87a or GIF89a\n+\n+ // #1: read logical screen descriptor\n+ m_width = m_strm.getWord();\n+ m_height = m_strm.getWord();\n+ CV_Assert(m_width > 0 && m_height > 0);\n+\n+ char flags = (char)m_strm.getByte();\n+\n+ // the background color -> index in the global color table, valid only if the global color table is present\n+ bgColor = m_strm.getByte();\n+ m_strm.skip(1); // Skip the aspect ratio\n+\n+ // #2: read global color table\n+ depth = ((flags & 0x70) >> 4) + 1;\n+ if (flags & 0x80) {\n+ globalColorTableSize = 1 << ((flags & 0x07) + 1);\n+ globalColorTable.allocate(3 * globalColorTableSize);\n+ for (int i = 0; i < 3 * globalColorTableSize; i++) {\n+ globalColorTable[i] = (uchar)m_strm.getByte();\n+ }\n+ }\n+ } catch (...) {\n+ throw;\n+ }\n+\n+ hasRead = false;\n+ return true;\n+}\n+\n+bool GifDecoder::readData(Mat &img) {\n+ if (hasRead) {\n+ lastImage.copyTo(img);\n+ return true;\n+ }\n+\n+ readExtensions();\n+ // Image separator\n+ CV_Assert(!(m_strm.getByte()^0x2C));\n+ left = m_strm.getWord();\n+ top = m_strm.getWord();\n+ width = m_strm.getWord();\n+ height = m_strm.getWord();\n+ CV_Assert(width > 0 && height > 0 && left + width <= m_width && top + height <= m_height);\n+\n+ currentImageCodeStream.allocate(width * height);\n+ Mat img_;\n+\n+ switch (opMode) {\n+ case GifOpMode::GRFMT_GIF_PreviousImage:\n+ if (lastImage.empty()){\n+ img_ = Mat(m_height, m_width, CV_8UC3, Scalar(0, 0, 0));\n+ } else {\n+ img_ = lastImage;\n+ }\n+ break;\n+ case GifOpMode::GRFMT_GIF_Background:\n+ // background color is valid iff global color table exists\n+ CV_Assert(globalColorTableSize > 0);\n+ img_ = Mat(m_height, m_width, CV_8UC3,\n+ Scalar(globalColorTable[bgColor * 3 + 2],\n+ globalColorTable[bgColor * 3 + 1],\n+ globalColorTable[bgColor * 3]));\n+ break;\n+ case GifOpMode::GRFMT_GIF_Nothing:\n+ case GifOpMode::GRFMT_GIF_Cover:\n+ // default value\n+ img_ = Mat(m_height, m_width, CV_8UC3, Scalar(0, 0, 0));\n+ break;\n+ default:\n+ CV_Assert(false);\n+ }\n+ lastImage.release();\n+\n+ auto flags = (uchar)m_strm.getByte();\n+ localColorTableSize = 0;\n+ if (flags & 0x80) {\n+ // local color table\n+ localColorTableSize = 1 << ((flags & 0x07) + 1);\n+ localColorTable.allocate(3 * localColorTableSize);\n+ for (int i = 0; i < 3 * localColorTableSize; i++) {\n+ localColorTable[i] = (uchar)m_strm.getByte();\n+ }\n+ }\n+\n+ // the case that neither global nor local color table exists is not defined in the GIF standard (but allowed)\n+ if (!(globalColorTableSize || localColorTableSize)) {\n+ // go through the length of unused data.\n+ m_strm.skip(1);\n+ int len = m_strm.getByte();\n+ while (len) {\n+ m_strm.skip(len);\n+ len = m_strm.getByte();\n+ }\n+\n+ lastImage = img_;\n+ if (!img.empty())\n+ img_.copyTo(img);\n+\n+ // release the memory\n+ img_.release();\n+ return true;\n+ }\n+\n+ // lzw decompression to get the code stream\n+ hasRead = lzwDecode();\n+\n+ // convert code stream into pixels on the image\n+ if (hasRead) {\n+ if (!(flags & 0x40)) {\n+ // no interlace, simply convert the code stream into pixels from top to down\n+ code2pixel(img_, 0, 1);\n+ } else {\n+ // consider the interlace mode, the image will be rendered in four separate passes\n+ code2pixel(img_, 0, 8);\n+ code2pixel(img_, 4, 8);\n+ code2pixel(img_, 2, 4);\n+ code2pixel(img_, 1, 2);\n+ }\n+ }\n+\n+ lastImage = img_;\n+ if (!img.empty())\n+ img_.copyTo(img);\n+\n+ // release the memory\n+ img_.release();\n+\n+ return hasRead;\n+}\n+\n+bool GifDecoder::nextPage() {\n+ if (hasRead) {\n+ hasRead = false;\n+ // end of a gif file\n+ if(!(m_strm.getByte() ^ 0x3B)) return false;\n+ m_strm.setPos(m_strm.getPos() - 1);\n+ return true;\n+ } else {\n+ bool success;\n+ try {\n+ Mat emptyImg;\n+ success = readData(emptyImg);\n+ emptyImg.release();\n+ } catch(...) {\n+ return false;\n+ }\n+ return success;\n+ }\n+}\n+\n+void GifDecoder::readExtensions() {\n+ uchar len;\n+ while (!(m_strm.getByte() ^ 0x21)) {\n+ uchar extensionType = (uchar)m_strm.getByte();\n+\n+ // read graphic control extension\n+ // the scope of this extension is the next image or plain text extension\n+ hasTransparentColor = false;\n+ opMode = GifOpMode::GRFMT_GIF_Nothing;// default value\n+ if (!(extensionType ^ 0xF9)) {\n+ len = (uchar)m_strm.getByte();\n+ CV_Assert(len == 4);\n+ uchar flags = (uchar)m_strm.getByte();\n+ m_strm.getWord(); // delay time, not used\n+ opMode = (GifOpMode)((flags & 0x1C) >> 2);\n+ hasTransparentColor = flags & 0x01;\n+ transparentColor = (uchar)m_strm.getByte();\n+ }\n+\n+ // skip other kinds of extensions\n+ len = (uchar)m_strm.getByte();\n+ while (len) {\n+ m_strm.skip(len);\n+ len = (uchar)m_strm.getByte();\n+ }\n+ }\n+ // roll back to the block identifier\n+ m_strm.setPos(m_strm.getPos() - 1);\n+}\n+\n+void GifDecoder::code2pixel(Mat& img, int start, int k){\n+ for (int i = start; i < height; i+=k) {\n+ for (int j = 0; j < width; j++) {\n+ int idx = i * width + j;\n+ int colorIdx = currentImageCodeStream[idx];\n+ if (hasTransparentColor && colorIdx == transparentColor) {\n+ continue;\n+ }\n+ if (colorIdx < localColorTableSize) {\n+ img.at(top + i, left + j)[0] = localColorTable[colorIdx * 3 + 2]; //B\n+ img.at(top + i, left + j)[1] = localColorTable[colorIdx * 3 + 1]; //G\n+ img.at(top + i, left + j)[2] = localColorTable[colorIdx * 3]; //R\n+ } else if (colorIdx < globalColorTableSize) {\n+ img.at(top + i, left + j)[0] = globalColorTable[colorIdx * 3 + 2]; //B\n+ img.at(top + i, left + j)[1] = globalColorTable[colorIdx * 3 + 1]; //G\n+ img.at(top + i, left + j)[2] = globalColorTable[colorIdx * 3]; //R\n+ } else {\n+ CV_Assert(false);\n+ }\n+ }\n+ }\n+}\n+\n+void GifDecoder::deleteLzwExtraTablePrefix(lzwNodeD* lzwExtraTable, int lzwTableSize) const{\n+ for (int i = (1 << lzwMinCodeSize) + 2; i <= lzwTableSize; i++) {\n+ if (lzwExtraTable[i].prefix) {\n+ delete[] lzwExtraTable[i].prefix;\n+ lzwExtraTable[i].prefix = nullptr;\n+ }\n+ }\n+}\n+\n+bool GifDecoder::lzwDecode() {\n+ // initialization\n+ lzwMinCodeSize = m_strm.getByte();\n+ int lzwCodeSize = lzwMinCodeSize + 1;\n+ int clearCode = 1 << lzwMinCodeSize;\n+ int exitCode = clearCode + 1;\n+ CV_Assert(lzwCodeSize > 2 && lzwCodeSize <= 12);\n+ auto* lzwExtraTable = new lzwNodeD[(1 << 12) + 1];", + "comment_created_at": "2024-06-27T01:26:45+00:00", + "comment_author": "redhecker", + "comment_body": "done", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1798939112", + "pr_number": 26281, + "pr_file": "modules/core/src/opengl.cpp", + "created_at": "2024-10-14T07:57:17+00:00", + "commented_code": "#elif !defined(HAVE_OPENCL_OPENGL_SHARING)\n NO_OPENCL_SHARING_ERROR;\n#else\n cl_uint numPlatforms;\n cl_int status = clGetPlatformIDs(0, NULL, &numPlatforms);\n cl_uint platformsCnt = 0;\n cl_uint devCnt = 0;\n cl_device_id* devices = nullptr;\n cl_uint devUsed = 0;\n cl_context context = nullptr;\n\n cl_int status = clGetPlatformIDs(0, NULL, &platformsCnt);\n if (status != CL_SUCCESS)\n CV_Error_(cv::Error::OpenCLInitError, (\"OpenCL: Can't get number of platforms: %d\", status));\n if (numPlatforms == 0)\n if (platformsCnt == 0)\n CV_Error(cv::Error::OpenCLInitError, \"OpenCL: No available platforms\");\n\n std::vector platforms(numPlatforms);\n status = clGetPlatformIDs(numPlatforms, &platforms[0], NULL);\n std::vector platforms(platformsCnt);\n status = clGetPlatformIDs(platformsCnt, &platforms[0], NULL);\n if (status != CL_SUCCESS)\n CV_Error_(cv::Error::OpenCLInitError, (\"OpenCL: Can't get number of platforms: %d\", status));\n CV_Error_(cv::Error::OpenCLInitError, (\"OpenCL: Can't get platforms: %d\", status));\n\n\n // TODO Filter platforms by name from OPENCV_OPENCL_DEVICE\n bool sharingSupported = false;\n\n int found = -1;\n cl_device_id device = NULL;\n cl_context context = NULL;\n for (unsigned int i = 0; (!sharingSupported && (i < platformsCnt)); ++i) {\n status = clGetDeviceIDs(platforms[i], CL_DEVICE_TYPE_GPU, 0, NULL, &devCnt);\n if (status != CL_SUCCESS)\n CV_Error_(cv::Error::OpenCLInitError, (\"OpenCL: No devices available: %d\", status));\n\n for (int i = 0; i < (int)numPlatforms; i++)\n {\n // query platform extension: presence of \"cl_khr_gl_sharing\" extension is required\n {\n AutoBuffer extensionStr;\n try {\n devices = new cl_device_id[devCnt];", + "repo_full_name": "opencv/opencv", + "discussion_comments": [ + { + "comment_id": "1798939112", + "repo_full_name": "opencv/opencv", + "pr_number": 26281, + "pr_file": "modules/core/src/opengl.cpp", + "discussion_id": "1798939112", + "commented_code": "@@ -1635,94 +1641,148 @@ Context& initializeContextFromGL()\n #elif !defined(HAVE_OPENCL_OPENGL_SHARING)\n NO_OPENCL_SHARING_ERROR;\n #else\n- cl_uint numPlatforms;\n- cl_int status = clGetPlatformIDs(0, NULL, &numPlatforms);\n+ cl_uint platformsCnt = 0;\n+ cl_uint devCnt = 0;\n+ cl_device_id* devices = nullptr;\n+ cl_uint devUsed = 0;\n+ cl_context context = nullptr;\n+\n+ cl_int status = clGetPlatformIDs(0, NULL, &platformsCnt);\n if (status != CL_SUCCESS)\n CV_Error_(cv::Error::OpenCLInitError, (\"OpenCL: Can't get number of platforms: %d\", status));\n- if (numPlatforms == 0)\n+ if (platformsCnt == 0)\n CV_Error(cv::Error::OpenCLInitError, \"OpenCL: No available platforms\");\n \n- std::vector platforms(numPlatforms);\n- status = clGetPlatformIDs(numPlatforms, &platforms[0], NULL);\n+ std::vector platforms(platformsCnt);\n+ status = clGetPlatformIDs(platformsCnt, &platforms[0], NULL);\n if (status != CL_SUCCESS)\n- CV_Error_(cv::Error::OpenCLInitError, (\"OpenCL: Can't get number of platforms: %d\", status));\n+ CV_Error_(cv::Error::OpenCLInitError, (\"OpenCL: Can't get platforms: %d\", status));\n+\n \n // TODO Filter platforms by name from OPENCV_OPENCL_DEVICE\n+ bool sharingSupported = false;\n \n- int found = -1;\n- cl_device_id device = NULL;\n- cl_context context = NULL;\n+ for (unsigned int i = 0; (!sharingSupported && (i < platformsCnt)); ++i) {\n+ status = clGetDeviceIDs(platforms[i], CL_DEVICE_TYPE_GPU, 0, NULL, &devCnt);\n+ if (status != CL_SUCCESS)\n+ CV_Error_(cv::Error::OpenCLInitError, (\"OpenCL: No devices available: %d\", status));\n \n- for (int i = 0; i < (int)numPlatforms; i++)\n- {\n- // query platform extension: presence of \"cl_khr_gl_sharing\" extension is required\n- {\n- AutoBuffer extensionStr;\n+ try {\n+ devices = new cl_device_id[devCnt];", + "comment_created_at": "2024-10-14T07:57:17+00:00", + "comment_author": "asmorkalov", + "comment_body": "Let's use `std::vector<>` or `cv::AutoBuffer` for locals to prevent leaks.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1658732501", + "pr_number": 25715, + "pr_file": "modules/imgcodecs/src/grfmt_png.cpp", + "created_at": "2024-06-28T13:18:04+00:00", + "commented_code": "return result;\n}\n\nbool PngDecoder::nextPage() {\n if (m_f)\n {\n uint id;\n CHUNK chunkfcTL;\n\n id = read_chunk(m_f, &chunkfcTL);\n if (id == id_fcTL && chunkfcTL.size == 38)\n {\n // At this point the old frame is done. Let's start a new one.\n uint w0 = png_get_uint_32(chunkfcTL.p + 12);\n uint h0 = png_get_uint_32(chunkfcTL.p + 16);\n uint x0 = png_get_uint_32(chunkfcTL.p + 20);\n uint y0 = png_get_uint_32(chunkfcTL.p + 24);\n int delay_num = png_get_uint_16(chunkfcTL.p + 28);\n int delay_den = png_get_uint_16(chunkfcTL.p + 30);\n char dop = chunkfcTL.p[32];\n char bop = chunkfcTL.p[33];\n printf(\"**frame props\\n-------------------\\n\");\n printf(\"w0 : %d\\n\", w0);\n printf(\"h0 : %d\\n\", h0);\n printf(\"x0 : %d\\n\", x0);\n printf(\"y0 : %d\\n\", y0);\n printf(\"delay_num : %d\\n\", delay_num);\n printf(\"delay_den : %d\\n\", delay_den);\n printf(\"dop : %d\\n\", dop);\n printf(\"bop : %d\\n-------------------\\n\", bop);\n\n uchar sig[8];\n if (fread(sig, 1, 8, m_f))\n return true;\n }\n }\n return false;\n}\n\nvoid PngDecoder::compose_frame(uchar** rows_dst, uchar** rows_src, uchar bop, uint x, uint y, uint w, uint h)\n{\n uint i, j;\n int u, v, al;\n\n for (j = 0; j < h; j++)\n {\n uchar* sp = rows_src[j];\n uchar* dp = rows_dst[j + y] + x * 4;\n\n if (bop == 0)\n memcpy(dp, sp, w * 4);\n else\n for (i = 0; i < w; i++, sp += 4, dp += 4)\n {\n if (sp[3] == 255)\n memcpy(dp, sp, 4);\n else\n if (sp[3] != 0)\n {\n if (dp[3] != 0)\n {\n u = sp[3] * 255;\n v = (255 - sp[3]) * dp[3];\n al = u + v;\n dp[0] = (sp[0] * u + dp[0] * v) / al;\n dp[1] = (sp[1] * u + dp[1] * v) / al;\n dp[2] = (sp[2] * u + dp[2] * v) / al;\n dp[3] = al / 255;\n }\n else\n memcpy(dp, sp, 4);\n }\n }\n }\n}\n\nuint PngDecoder::read_chunk(FILE* f, CHUNK* pChunk)\n{\n uchar len[4];\n pChunk->size = 0;\n pChunk->p = 0;\n if (fread(&len, 4, 1, f) == 1)\n {\n pChunk->size = png_get_uint_32(len);\n if (pChunk->size > PNG_USER_CHUNK_MALLOC_MAX)\n return 0;\n pChunk->size += 12;\n pChunk->p = new uchar[pChunk->size];", + "repo_full_name": "opencv/opencv", + "discussion_comments": [ + { + "comment_id": "1658732501", + "repo_full_name": "opencv/opencv", + "pr_number": 25715, + "pr_file": "modules/imgcodecs/src/grfmt_png.cpp", + "discussion_id": "1658732501", + "commented_code": "@@ -300,6 +403,379 @@ bool PngDecoder::readData( Mat& img )\n return result;\n }\n \n+bool PngDecoder::nextPage() {\n+ if (m_f)\n+ {\n+ uint id;\n+ CHUNK chunkfcTL;\n+\n+ id = read_chunk(m_f, &chunkfcTL);\n+ if (id == id_fcTL && chunkfcTL.size == 38)\n+ {\n+ // At this point the old frame is done. Let's start a new one.\n+ uint w0 = png_get_uint_32(chunkfcTL.p + 12);\n+ uint h0 = png_get_uint_32(chunkfcTL.p + 16);\n+ uint x0 = png_get_uint_32(chunkfcTL.p + 20);\n+ uint y0 = png_get_uint_32(chunkfcTL.p + 24);\n+ int delay_num = png_get_uint_16(chunkfcTL.p + 28);\n+ int delay_den = png_get_uint_16(chunkfcTL.p + 30);\n+ char dop = chunkfcTL.p[32];\n+ char bop = chunkfcTL.p[33];\n+ printf(\"**frame props\\n-------------------\\n\");\n+ printf(\"w0 : %d\\n\", w0);\n+ printf(\"h0 : %d\\n\", h0);\n+ printf(\"x0 : %d\\n\", x0);\n+ printf(\"y0 : %d\\n\", y0);\n+ printf(\"delay_num : %d\\n\", delay_num);\n+ printf(\"delay_den : %d\\n\", delay_den);\n+ printf(\"dop : %d\\n\", dop);\n+ printf(\"bop : %d\\n-------------------\\n\", bop);\n+\n+ uchar sig[8];\n+ if (fread(sig, 1, 8, m_f))\n+ return true;\n+ }\n+ }\n+ return false;\n+}\n+\n+void PngDecoder::compose_frame(uchar** rows_dst, uchar** rows_src, uchar bop, uint x, uint y, uint w, uint h)\n+{\n+ uint i, j;\n+ int u, v, al;\n+\n+ for (j = 0; j < h; j++)\n+ {\n+ uchar* sp = rows_src[j];\n+ uchar* dp = rows_dst[j + y] + x * 4;\n+\n+ if (bop == 0)\n+ memcpy(dp, sp, w * 4);\n+ else\n+ for (i = 0; i < w; i++, sp += 4, dp += 4)\n+ {\n+ if (sp[3] == 255)\n+ memcpy(dp, sp, 4);\n+ else\n+ if (sp[3] != 0)\n+ {\n+ if (dp[3] != 0)\n+ {\n+ u = sp[3] * 255;\n+ v = (255 - sp[3]) * dp[3];\n+ al = u + v;\n+ dp[0] = (sp[0] * u + dp[0] * v) / al;\n+ dp[1] = (sp[1] * u + dp[1] * v) / al;\n+ dp[2] = (sp[2] * u + dp[2] * v) / al;\n+ dp[3] = al / 255;\n+ }\n+ else\n+ memcpy(dp, sp, 4);\n+ }\n+ }\n+ }\n+}\n+\n+uint PngDecoder::read_chunk(FILE* f, CHUNK* pChunk)\n+{\n+ uchar len[4];\n+ pChunk->size = 0;\n+ pChunk->p = 0;\n+ if (fread(&len, 4, 1, f) == 1)\n+ {\n+ pChunk->size = png_get_uint_32(len);\n+ if (pChunk->size > PNG_USER_CHUNK_MALLOC_MAX)\n+ return 0;\n+ pChunk->size += 12;\n+ pChunk->p = new uchar[pChunk->size];", + "comment_created_at": "2024-06-28T13:18:04+00:00", + "comment_author": "vrabaud", + "comment_body": "Please do not use new as it makes its hard to track memory. Use std::vector if you can.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1894579735", + "pr_number": 25715, + "pr_file": "modules/imgcodecs/src/grfmt_png.cpp", + "created_at": "2024-12-21T07:44:06+00:00", + "commented_code": "return result;\n}\n\nvoid PngEncoder::optim_dirty(std::vector& frames)\n{\n uint32_t i, j;\n unsigned char* sp;\n uint32_t size = frames[0].getWidth() * frames[0].getHeight();\n\n for (i = 0; i < frames.size(); i++)\n {\n sp = frames[i].getPixels();\n for (j = 0; j < size; j++, sp += 4)\n if (sp[3] == 0)\n sp[0] = sp[1] = sp[2] = 0;\n }\n}\n\nvoid PngEncoder::optim_duplicates(std::vector& frames, uint32_t first)\n{\n uint32_t imagesize = frames[0].getWidth() * frames[0].getHeight() * 4;\n uint32_t i = first;\n\n while (++i < frames.size())\n {\n if (memcmp(frames[i - 1].getPixels(), frames[i].getPixels(), imagesize) != 0)\n continue;\n\n i--;\n delete[] frames[i].getPixels();\n delete[] frames[i].getRows();\n uint32_t num = frames[i].getDelayNum();\n uint32_t den = frames[i].getDelayDen();\n frames.erase(frames.begin() + i);\n\n if (frames[i].getDelayDen() == den)\n frames[i].setDelayNum(frames[i].getDelayNum()+num);\n else\n {\n frames[i].setDelayNum(num * frames[i].getDelayDen() + den * frames[i].getDelayNum());\n frames[i].setDelayDen(den * frames[i].getDelayDen());\n while (num && den)\n {\n if (num > den)\n num = num % den;\n else\n den = den % num;\n }\n num += den;\n frames[i].setDelayNum(frames[i].getDelayNum() / num);\n frames[i].setDelayDen(frames[i].getDelayDen() / num);\n }\n }\n}\n\nsize_t PngEncoder::write_to_io(void const* _Buffer, size_t _ElementSize, size_t _ElementCount, FILE * _Stream)\n{\n if (_Stream)\n return fwrite(_Buffer, _ElementSize, _ElementCount, _Stream);\n\n size_t cursz = m_buf->size();\n m_buf->resize(cursz + _ElementCount);\n memcpy( &(*m_buf)[cursz], _Buffer, _ElementCount );\n return _ElementCount;\n}\n\nvoid PngEncoder::write_chunk(FILE* f, const char* name, unsigned char* data, uint32_t length)\n{\n unsigned char buf[4];\n uint32_t crc = crc32(0, Z_NULL, 0);\n\n png_save_uint_32(buf, length);\n write_to_io(buf, 1, 4, f);\n write_to_io(name, 1, 4, f);\n crc = crc32(crc, (const Bytef*)name, 4);\n\n if (memcmp(name, \"fdAT\", 4) == 0)\n {\n png_save_uint_32(buf, next_seq_num++);\n write_to_io(buf, 1, 4, f);\n crc = crc32(crc, buf, 4);\n length -= 4;\n }\n\n if (data != NULL && length > 0)\n {\n write_to_io(data, 1, length, f);\n crc = crc32(crc, data, length);\n }\n\n png_save_uint_32(buf, crc);\n write_to_io(buf, 1, 4, f);\n}\n\nvoid PngEncoder::write_IDATs(FILE* f, int frame, unsigned char* data, uint32_t length, uint32_t idat_size)\n{\n uint32_t z_cmf = data[0];\n if ((z_cmf & 0x0f) == 8 && (z_cmf & 0xf0) <= 0x70)\n {\n if (length >= 2)\n {\n uint32_t z_cinfo = z_cmf >> 4;\n uint32_t half_z_window_size = 1 << (z_cinfo + 7);\n while (idat_size <= half_z_window_size && half_z_window_size >= 256)\n {\n z_cinfo--;\n half_z_window_size >>= 1;\n }\n z_cmf = (z_cmf & 0x0f) | (z_cinfo << 4);\n if (data[0] != (unsigned char)z_cmf)\n {\n data[0] = (unsigned char)z_cmf;\n data[1] &= 0xe0;\n data[1] += (unsigned char)(0x1f - ((z_cmf << 8) + data[1]) % 0x1f);\n }\n }\n }\n\n while (length > 0)\n {\n uint32_t ds = length;\n if (ds > 32768)\n ds = 32768;\n\n if (frame == 0)\n write_chunk(f, \"IDAT\", data, ds);\n else\n write_chunk(f, \"fdAT\", data, ds + 4);\n\n data += ds;\n length -= ds;\n }\n}\n\nvoid PngEncoder::process_rect(unsigned char* row, int rowbytes, int bpp, int stride, int h, unsigned char* rows)\n{\n int i, j, v;\n int a, b, c, pa, pb, pc, p;\n unsigned char* prev = NULL;\n unsigned char* dp = rows;\n unsigned char* out;\n\n for (j = 0; j < h; j++)\n {\n uint32_t sum = 0;\n unsigned char* best_row = row_buf;\n uint32_t mins = ((uint32_t)(-1)) >> 1;\n\n out = row_buf + 1;\n for (i = 0; i < rowbytes; i++)\n {\n v = out[i] = row[i];\n sum += (v < 128) ? v : 256 - v;\n }\n mins = sum;\n\n sum = 0;\n out = sub_row + 1;\n for (i = 0; i < bpp; i++)\n {\n v = out[i] = row[i];\n sum += (v < 128) ? v : 256 - v;\n }\n for (i = bpp; i < rowbytes; i++)\n {\n v = out[i] = row[i] - row[i - bpp];\n sum += (v < 128) ? v : 256 - v;\n if (sum > mins)\n break;\n }\n if (sum < mins)\n {\n mins = sum;\n best_row = sub_row;\n }\n\n if (prev)\n {\n sum = 0;\n out = up_row + 1;\n for (i = 0; i < rowbytes; i++)\n {\n v = out[i] = row[i] - prev[i];\n sum += (v < 128) ? v : 256 - v;\n if (sum > mins)\n break;\n }\n if (sum < mins)\n {\n mins = sum;\n best_row = up_row;\n }\n\n sum = 0;\n out = avg_row + 1;\n for (i = 0; i < bpp; i++)\n {\n v = out[i] = row[i] - prev[i] / 2;\n sum += (v < 128) ? v : 256 - v;\n }\n for (i = bpp; i < rowbytes; i++)\n {\n v = out[i] = row[i] - (prev[i] + row[i - bpp]) / 2;\n sum += (v < 128) ? v : 256 - v;\n if (sum > mins)\n break;\n }\n if (sum < mins)\n {\n mins = sum;\n best_row = avg_row;\n }\n\n sum = 0;\n out = paeth_row + 1;\n for (i = 0; i < bpp; i++)\n {\n v = out[i] = row[i] - prev[i];\n sum += (v < 128) ? v : 256 - v;\n }\n for (i = bpp; i < rowbytes; i++)\n {\n a = row[i - bpp];\n b = prev[i];\n c = prev[i - bpp];\n p = b - c;\n pc = a - c;\n pa = abs(p);\n pb = abs(pc);\n pc = abs(p + pc);\n p = (pa <= pb && pa <= pc) ? a : (pb <= pc) ? b\n : c;\n v = out[i] = row[i] - p;\n sum += (v < 128) ? v : 256 - v;\n if (sum > mins)\n break;\n }\n if (sum < mins)\n {\n best_row = paeth_row;\n }\n }\n\n if (rows == NULL)\n {\n // deflate_rect_op()\n op_zstream1.next_in = row_buf;\n op_zstream1.avail_in = rowbytes + 1;\n deflate(&op_zstream1, Z_NO_FLUSH);\n\n op_zstream2.next_in = best_row;\n op_zstream2.avail_in = rowbytes + 1;\n deflate(&op_zstream2, Z_NO_FLUSH);\n }\n else\n {\n // deflate_rect_fin()\n memcpy(dp, best_row, rowbytes + 1);\n dp += rowbytes + 1;\n }\n\n prev = row;\n row += stride;\n }\n}\n\nvoid PngEncoder::deflate_rect_op(unsigned char* pdata, int x, int y, int w, int h, int bpp, int stride, int zbuf_size, int n)\n{\n unsigned char* row = pdata + y * stride + x * bpp;\n int rowbytes = w * bpp;\n\n op_zstream1.data_type = Z_BINARY;\n op_zstream1.next_out = op_zbuf1;\n op_zstream1.avail_out = zbuf_size;\n\n op_zstream2.data_type = Z_BINARY;\n op_zstream2.next_out = op_zbuf2;\n op_zstream2.avail_out = zbuf_size;\n\n process_rect(row, rowbytes, bpp, stride, h, NULL);\n\n deflate(&op_zstream1, Z_FINISH);\n deflate(&op_zstream2, Z_FINISH);\n op[n].p = pdata;\n\n if (op_zstream1.total_out < op_zstream2.total_out)\n {\n op[n].size = op_zstream1.total_out;\n op[n].filters = 0;\n }\n else\n {\n op[n].size = op_zstream2.total_out;\n op[n].filters = 1;\n }\n op[n].x = x;\n op[n].y = y;\n op[n].w = w;\n op[n].h = h;\n op[n].valid = 1;\n deflateReset(&op_zstream1);\n deflateReset(&op_zstream2);\n}\n\nvoid PngEncoder::get_rect(uint32_t w, uint32_t h, unsigned char* pimage1, unsigned char* pimage2, unsigned char* ptemp, uint32_t bpp, uint32_t stride, int zbuf_size, uint32_t has_tcolor, uint32_t tcolor, int n)\n{\n uint32_t i, j, x0, y0, w0, h0;\n uint32_t x_min = w - 1;\n uint32_t y_min = h - 1;\n uint32_t x_max = 0;\n uint32_t y_max = 0;\n uint32_t diffnum = 0;\n uint32_t over_is_possible = 1;\n\n if (!has_tcolor)\n over_is_possible = 0;\n\n if (bpp == 1)\n {\n unsigned char* pa = pimage1;\n unsigned char* pb = pimage2;\n unsigned char* pc = ptemp;\n\n for (j = 0; j < h; j++)\n for (i = 0; i < w; i++)\n {\n unsigned char c = *pb++;\n if (*pa++ != c)\n {\n diffnum++;\n if (has_tcolor && c == tcolor)\n over_is_possible = 0;\n if (i < x_min)\n x_min = i;\n if (i > x_max)\n x_max = i;\n if (j < y_min)\n y_min = j;\n if (j > y_max)\n y_max = j;\n }\n else\n c = tcolor;\n\n *pc++ = c;\n }\n }\n else if (bpp == 2)\n {\n unsigned short* pa = (unsigned short*)pimage1;\n unsigned short* pb = (unsigned short*)pimage2;\n unsigned short* pc = (unsigned short*)ptemp;\n\n for (j = 0; j < h; j++)\n for (i = 0; i < w; i++)\n {\n uint32_t c1 = *pa++;\n uint32_t c2 = *pb++;\n if ((c1 != c2) && ((c1 >> 8) || (c2 >> 8)))\n {\n diffnum++;\n if ((c2 >> 8) != 0xFF)\n over_is_possible = 0;\n if (i < x_min)\n x_min = i;\n if (i > x_max)\n x_max = i;\n if (j < y_min)\n y_min = j;\n if (j > y_max)\n y_max = j;\n }\n else\n c2 = 0;\n\n *pc++ = c2;\n }\n }\n else if (bpp == 3)\n {\n unsigned char* pa = pimage1;\n unsigned char* pb = pimage2;\n unsigned char* pc = ptemp;\n\n for (j = 0; j < h; j++)\n for (i = 0; i < w; i++)\n {\n uint32_t c1 = (pa[2] << 16) + (pa[1] << 8) + pa[0];\n uint32_t c2 = (pb[2] << 16) + (pb[1] << 8) + pb[0];\n if (c1 != c2)\n {\n diffnum++;\n if (has_tcolor && c2 == tcolor)\n over_is_possible = 0;\n if (i < x_min)\n x_min = i;\n if (i > x_max)\n x_max = i;\n if (j < y_min)\n y_min = j;\n if (j > y_max)\n y_max = j;\n }\n else\n c2 = tcolor;\n\n memcpy(pc, &c2, 3);\n pa += 3;\n pb += 3;\n pc += 3;\n }\n }\n else if (bpp == 4)\n {\n uint32_t* pa = (uint32_t*)pimage1;\n uint32_t* pb = (uint32_t*)pimage2;\n uint32_t* pc = (uint32_t*)ptemp;\n\n for (j = 0; j < h; j++)\n for (i = 0; i < w; i++)\n {\n uint32_t c1 = *pa++;\n uint32_t c2 = *pb++;\n if ((c1 != c2) && ((c1 >> 24) || (c2 >> 24)))\n {\n diffnum++;\n if ((c2 >> 24) != 0xFF)\n over_is_possible = 0;\n if (i < x_min)\n x_min = i;\n if (i > x_max)\n x_max = i;\n if (j < y_min)\n y_min = j;\n if (j > y_max)\n y_max = j;\n }\n else\n c2 = 0;\n\n *pc++ = c2;\n }\n }\n\n if (diffnum == 0)\n {\n x0 = y0 = 0;\n w0 = h0 = 1;\n }\n else\n {\n x0 = x_min;\n y0 = y_min;\n w0 = x_max - x_min + 1;\n h0 = y_max - y_min + 1;\n }\n\n deflate_rect_op(pimage2, x0, y0, w0, h0, bpp, stride, zbuf_size, n * 2);\n\n if (over_is_possible)\n deflate_rect_op(ptemp, x0, y0, w0, h0, bpp, stride, zbuf_size, n * 2 + 1);\n}\n\nvoid PngEncoder::deflate_rect_fin(int deflate_method, int iter, unsigned char* zbuf, uint32_t* zsize, int bpp, int stride, unsigned char* rows, int zbuf_size, int n)\n{\n unsigned char* row = op[n].p + op[n].y * stride + op[n].x * bpp;\n int rowbytes = op[n].w * bpp;\n\n if (op[n].filters == 0)\n {\n unsigned char* dp = rows;\n for (int j = 0; j < op[n].h; j++)\n {\n *dp++ = 0;\n memcpy(dp, row, rowbytes);\n dp += rowbytes;\n row += stride;\n }\n }\n else\n process_rect(row, rowbytes, bpp, stride, op[n].h, rows);\n\n if (deflate_method == 2)\n {\n CV_UNUSED(iter);\n#if 0 // needs include \"zopfli.h\"\n ZopfliOptions opt_zopfli;\n unsigned char* data = 0;\n size_t size = 0;\n ZopfliInitOptions(&opt_zopfli);\n opt_zopfli.numiterations = iter;\n ZopfliCompress(&opt_zopfli, ZOPFLI_FORMAT_ZLIB, rows, op[n].h * (rowbytes + 1), &data, &size);\n if (size < (size_t)zbuf_size)\n {\n memcpy(zbuf, data, size);\n *zsize = size;\n }\n free(data);\n#endif\n }\n else if (deflate_method == 1)\n {\n#if 0 // needs include \"7z.h\"\n unsigned size = zbuf_size;\n compress_rfc1950_7z(rows, op[n].h * (rowbytes + 1), zbuf, size, iter < 100 ? iter : 100, 255);\n *zsize = size;\n#endif\n }\n else\n {\n z_stream fin_zstream;\n\n fin_zstream.data_type = Z_BINARY;\n fin_zstream.zalloc = Z_NULL;\n fin_zstream.zfree = Z_NULL;\n fin_zstream.opaque = Z_NULL;\n deflateInit2(&fin_zstream, Z_BEST_COMPRESSION, 8, 15, 8, op[n].filters ? Z_FILTERED : Z_DEFAULT_STRATEGY);\n\n fin_zstream.next_out = zbuf;\n fin_zstream.avail_out = zbuf_size;\n fin_zstream.next_in = rows;\n fin_zstream.avail_in = op[n].h * (rowbytes + 1);\n deflate(&fin_zstream, Z_FINISH);\n *zsize = fin_zstream.total_out;\n deflateEnd(&fin_zstream);\n }\n}\n\nbool PngEncoder::writemulti(const std::vector& img_vec, const std::vector& params)\n{\n CV_Assert(img_vec[0].depth() == CV_8U);\n Animation animation;\n animation.frames = img_vec;\n\n for (size_t i = 0; i < animation.frames.size(); i++)\n {\n animation.durations.push_back(1000);\n }\n return writeanimation(animation, params);\n}\n\nbool PngEncoder::writeanimation(const Animation& animation, const std::vector& params)\n{\n int compression_level = 6;\n int compression_strategy = IMWRITE_PNG_STRATEGY_RLE; // Default strategy\n bool isBilevel = false;\n\n for (size_t i = 0; i < params.size(); i += 2)\n {\n if (params[i] == IMWRITE_PNG_COMPRESSION)\n {\n compression_strategy = IMWRITE_PNG_STRATEGY_DEFAULT; // Default strategy\n compression_level = params[i + 1];\n compression_level = MIN(MAX(compression_level, 0), Z_BEST_COMPRESSION);\n }\n if (params[i] == IMWRITE_PNG_STRATEGY)\n {\n compression_strategy = params[i + 1];\n compression_strategy = MIN(MAX(compression_strategy, 0), Z_FIXED);\n }\n if (params[i] == IMWRITE_PNG_BILEVEL)\n {\n isBilevel = params[i + 1] != 0;\n }\n }\n\n std::vector frames;\n std::vector tmpframes;\n\n for (size_t i = 0; i < animation.frames.size(); i++)\n {\n APNGFrame apngFrame;\n tmpframes.push_back(animation.frames[i].clone());\n if (animation.frames[i].channels() == 4)\n cvtColor(animation.frames[i], tmpframes[i], COLOR_BGRA2RGBA);\n if (animation.frames[i].channels() == 3)\n cvtColor(animation.frames[i], tmpframes[i], COLOR_BGR2RGB);\n apngFrame.setMat(tmpframes[i]);\n apngFrame.setDelayDen(animation.durations[i]);\n frames.push_back(apngFrame);\n }\n\n CV_UNUSED(isBilevel);\n uint32_t first =0;\n uint32_t loops= animation.loop_count;\n uint32_t coltype= animation.frames[0].channels() == 1 ? PNG_COLOR_TYPE_GRAY : animation.frames[0].channels() == 3 ? PNG_COLOR_TYPE_RGB : PNG_COLOR_TYPE_RGB_ALPHA;\n int deflate_method=0;\n int iter=0;\n\n FILE* m_f = NULL;\n uint32_t i, j, k;\n uint32_t x0, y0, w0, h0, dop, bop;\n uint32_t idat_size, zbuf_size, zsize;\n unsigned char* zbuf;\n unsigned char header[8] = { 137, 80, 78, 71, 13, 10, 26, 10 };\n uint32_t num_frames = (int)frames.size();\n uint32_t width = frames[0].getWidth();\n uint32_t height = frames[0].getHeight();\n uint32_t bpp = (coltype == 6) ? 4 : (coltype == 2) ? 3\n : (coltype == 4) ? 2\n : 1;\n uint32_t has_tcolor = (coltype >= 4 || (coltype <= 2 && trnssize)) ? 1 : 0;\n uint32_t tcolor = 0;\n uint32_t rowbytes = width * bpp;\n uint32_t imagesize = rowbytes * height;\n\n unsigned char* temp = new unsigned char[imagesize];\n unsigned char* over1 = new unsigned char[imagesize];\n unsigned char* over2 = new unsigned char[imagesize];\n unsigned char* over3 = new unsigned char[imagesize];\n unsigned char* rest = new unsigned char[imagesize];\n unsigned char* rows = new unsigned char[(rowbytes + 1) * height];", + "repo_full_name": "opencv/opencv", + "discussion_comments": [ + { + "comment_id": "1894579735", + "repo_full_name": "opencv/opencv", + "pr_number": 25715, + "pr_file": "modules/imgcodecs/src/grfmt_png.cpp", + "discussion_id": "1894579735", + "commented_code": "@@ -449,6 +852,843 @@ bool PngEncoder::write( const Mat& img, const std::vector& params )\n return result;\n }\n \n+void PngEncoder::optim_dirty(std::vector& frames)\n+{\n+ uint32_t i, j;\n+ unsigned char* sp;\n+ uint32_t size = frames[0].getWidth() * frames[0].getHeight();\n+\n+ for (i = 0; i < frames.size(); i++)\n+ {\n+ sp = frames[i].getPixels();\n+ for (j = 0; j < size; j++, sp += 4)\n+ if (sp[3] == 0)\n+ sp[0] = sp[1] = sp[2] = 0;\n+ }\n+}\n+\n+void PngEncoder::optim_duplicates(std::vector& frames, uint32_t first)\n+{\n+ uint32_t imagesize = frames[0].getWidth() * frames[0].getHeight() * 4;\n+ uint32_t i = first;\n+\n+ while (++i < frames.size())\n+ {\n+ if (memcmp(frames[i - 1].getPixels(), frames[i].getPixels(), imagesize) != 0)\n+ continue;\n+\n+ i--;\n+ delete[] frames[i].getPixels();\n+ delete[] frames[i].getRows();\n+ uint32_t num = frames[i].getDelayNum();\n+ uint32_t den = frames[i].getDelayDen();\n+ frames.erase(frames.begin() + i);\n+\n+ if (frames[i].getDelayDen() == den)\n+ frames[i].setDelayNum(frames[i].getDelayNum()+num);\n+ else\n+ {\n+ frames[i].setDelayNum(num * frames[i].getDelayDen() + den * frames[i].getDelayNum());\n+ frames[i].setDelayDen(den * frames[i].getDelayDen());\n+ while (num && den)\n+ {\n+ if (num > den)\n+ num = num % den;\n+ else\n+ den = den % num;\n+ }\n+ num += den;\n+ frames[i].setDelayNum(frames[i].getDelayNum() / num);\n+ frames[i].setDelayDen(frames[i].getDelayDen() / num);\n+ }\n+ }\n+}\n+\n+size_t PngEncoder::write_to_io(void const* _Buffer, size_t _ElementSize, size_t _ElementCount, FILE * _Stream)\n+{\n+ if (_Stream)\n+ return fwrite(_Buffer, _ElementSize, _ElementCount, _Stream);\n+\n+ size_t cursz = m_buf->size();\n+ m_buf->resize(cursz + _ElementCount);\n+ memcpy( &(*m_buf)[cursz], _Buffer, _ElementCount );\n+ return _ElementCount;\n+}\n+\n+void PngEncoder::write_chunk(FILE* f, const char* name, unsigned char* data, uint32_t length)\n+{\n+ unsigned char buf[4];\n+ uint32_t crc = crc32(0, Z_NULL, 0);\n+\n+ png_save_uint_32(buf, length);\n+ write_to_io(buf, 1, 4, f);\n+ write_to_io(name, 1, 4, f);\n+ crc = crc32(crc, (const Bytef*)name, 4);\n+\n+ if (memcmp(name, \"fdAT\", 4) == 0)\n+ {\n+ png_save_uint_32(buf, next_seq_num++);\n+ write_to_io(buf, 1, 4, f);\n+ crc = crc32(crc, buf, 4);\n+ length -= 4;\n+ }\n+\n+ if (data != NULL && length > 0)\n+ {\n+ write_to_io(data, 1, length, f);\n+ crc = crc32(crc, data, length);\n+ }\n+\n+ png_save_uint_32(buf, crc);\n+ write_to_io(buf, 1, 4, f);\n+}\n+\n+void PngEncoder::write_IDATs(FILE* f, int frame, unsigned char* data, uint32_t length, uint32_t idat_size)\n+{\n+ uint32_t z_cmf = data[0];\n+ if ((z_cmf & 0x0f) == 8 && (z_cmf & 0xf0) <= 0x70)\n+ {\n+ if (length >= 2)\n+ {\n+ uint32_t z_cinfo = z_cmf >> 4;\n+ uint32_t half_z_window_size = 1 << (z_cinfo + 7);\n+ while (idat_size <= half_z_window_size && half_z_window_size >= 256)\n+ {\n+ z_cinfo--;\n+ half_z_window_size >>= 1;\n+ }\n+ z_cmf = (z_cmf & 0x0f) | (z_cinfo << 4);\n+ if (data[0] != (unsigned char)z_cmf)\n+ {\n+ data[0] = (unsigned char)z_cmf;\n+ data[1] &= 0xe0;\n+ data[1] += (unsigned char)(0x1f - ((z_cmf << 8) + data[1]) % 0x1f);\n+ }\n+ }\n+ }\n+\n+ while (length > 0)\n+ {\n+ uint32_t ds = length;\n+ if (ds > 32768)\n+ ds = 32768;\n+\n+ if (frame == 0)\n+ write_chunk(f, \"IDAT\", data, ds);\n+ else\n+ write_chunk(f, \"fdAT\", data, ds + 4);\n+\n+ data += ds;\n+ length -= ds;\n+ }\n+}\n+\n+void PngEncoder::process_rect(unsigned char* row, int rowbytes, int bpp, int stride, int h, unsigned char* rows)\n+{\n+ int i, j, v;\n+ int a, b, c, pa, pb, pc, p;\n+ unsigned char* prev = NULL;\n+ unsigned char* dp = rows;\n+ unsigned char* out;\n+\n+ for (j = 0; j < h; j++)\n+ {\n+ uint32_t sum = 0;\n+ unsigned char* best_row = row_buf;\n+ uint32_t mins = ((uint32_t)(-1)) >> 1;\n+\n+ out = row_buf + 1;\n+ for (i = 0; i < rowbytes; i++)\n+ {\n+ v = out[i] = row[i];\n+ sum += (v < 128) ? v : 256 - v;\n+ }\n+ mins = sum;\n+\n+ sum = 0;\n+ out = sub_row + 1;\n+ for (i = 0; i < bpp; i++)\n+ {\n+ v = out[i] = row[i];\n+ sum += (v < 128) ? v : 256 - v;\n+ }\n+ for (i = bpp; i < rowbytes; i++)\n+ {\n+ v = out[i] = row[i] - row[i - bpp];\n+ sum += (v < 128) ? v : 256 - v;\n+ if (sum > mins)\n+ break;\n+ }\n+ if (sum < mins)\n+ {\n+ mins = sum;\n+ best_row = sub_row;\n+ }\n+\n+ if (prev)\n+ {\n+ sum = 0;\n+ out = up_row + 1;\n+ for (i = 0; i < rowbytes; i++)\n+ {\n+ v = out[i] = row[i] - prev[i];\n+ sum += (v < 128) ? v : 256 - v;\n+ if (sum > mins)\n+ break;\n+ }\n+ if (sum < mins)\n+ {\n+ mins = sum;\n+ best_row = up_row;\n+ }\n+\n+ sum = 0;\n+ out = avg_row + 1;\n+ for (i = 0; i < bpp; i++)\n+ {\n+ v = out[i] = row[i] - prev[i] / 2;\n+ sum += (v < 128) ? v : 256 - v;\n+ }\n+ for (i = bpp; i < rowbytes; i++)\n+ {\n+ v = out[i] = row[i] - (prev[i] + row[i - bpp]) / 2;\n+ sum += (v < 128) ? v : 256 - v;\n+ if (sum > mins)\n+ break;\n+ }\n+ if (sum < mins)\n+ {\n+ mins = sum;\n+ best_row = avg_row;\n+ }\n+\n+ sum = 0;\n+ out = paeth_row + 1;\n+ for (i = 0; i < bpp; i++)\n+ {\n+ v = out[i] = row[i] - prev[i];\n+ sum += (v < 128) ? v : 256 - v;\n+ }\n+ for (i = bpp; i < rowbytes; i++)\n+ {\n+ a = row[i - bpp];\n+ b = prev[i];\n+ c = prev[i - bpp];\n+ p = b - c;\n+ pc = a - c;\n+ pa = abs(p);\n+ pb = abs(pc);\n+ pc = abs(p + pc);\n+ p = (pa <= pb && pa <= pc) ? a : (pb <= pc) ? b\n+ : c;\n+ v = out[i] = row[i] - p;\n+ sum += (v < 128) ? v : 256 - v;\n+ if (sum > mins)\n+ break;\n+ }\n+ if (sum < mins)\n+ {\n+ best_row = paeth_row;\n+ }\n+ }\n+\n+ if (rows == NULL)\n+ {\n+ // deflate_rect_op()\n+ op_zstream1.next_in = row_buf;\n+ op_zstream1.avail_in = rowbytes + 1;\n+ deflate(&op_zstream1, Z_NO_FLUSH);\n+\n+ op_zstream2.next_in = best_row;\n+ op_zstream2.avail_in = rowbytes + 1;\n+ deflate(&op_zstream2, Z_NO_FLUSH);\n+ }\n+ else\n+ {\n+ // deflate_rect_fin()\n+ memcpy(dp, best_row, rowbytes + 1);\n+ dp += rowbytes + 1;\n+ }\n+\n+ prev = row;\n+ row += stride;\n+ }\n+}\n+\n+void PngEncoder::deflate_rect_op(unsigned char* pdata, int x, int y, int w, int h, int bpp, int stride, int zbuf_size, int n)\n+{\n+ unsigned char* row = pdata + y * stride + x * bpp;\n+ int rowbytes = w * bpp;\n+\n+ op_zstream1.data_type = Z_BINARY;\n+ op_zstream1.next_out = op_zbuf1;\n+ op_zstream1.avail_out = zbuf_size;\n+\n+ op_zstream2.data_type = Z_BINARY;\n+ op_zstream2.next_out = op_zbuf2;\n+ op_zstream2.avail_out = zbuf_size;\n+\n+ process_rect(row, rowbytes, bpp, stride, h, NULL);\n+\n+ deflate(&op_zstream1, Z_FINISH);\n+ deflate(&op_zstream2, Z_FINISH);\n+ op[n].p = pdata;\n+\n+ if (op_zstream1.total_out < op_zstream2.total_out)\n+ {\n+ op[n].size = op_zstream1.total_out;\n+ op[n].filters = 0;\n+ }\n+ else\n+ {\n+ op[n].size = op_zstream2.total_out;\n+ op[n].filters = 1;\n+ }\n+ op[n].x = x;\n+ op[n].y = y;\n+ op[n].w = w;\n+ op[n].h = h;\n+ op[n].valid = 1;\n+ deflateReset(&op_zstream1);\n+ deflateReset(&op_zstream2);\n+}\n+\n+void PngEncoder::get_rect(uint32_t w, uint32_t h, unsigned char* pimage1, unsigned char* pimage2, unsigned char* ptemp, uint32_t bpp, uint32_t stride, int zbuf_size, uint32_t has_tcolor, uint32_t tcolor, int n)\n+{\n+ uint32_t i, j, x0, y0, w0, h0;\n+ uint32_t x_min = w - 1;\n+ uint32_t y_min = h - 1;\n+ uint32_t x_max = 0;\n+ uint32_t y_max = 0;\n+ uint32_t diffnum = 0;\n+ uint32_t over_is_possible = 1;\n+\n+ if (!has_tcolor)\n+ over_is_possible = 0;\n+\n+ if (bpp == 1)\n+ {\n+ unsigned char* pa = pimage1;\n+ unsigned char* pb = pimage2;\n+ unsigned char* pc = ptemp;\n+\n+ for (j = 0; j < h; j++)\n+ for (i = 0; i < w; i++)\n+ {\n+ unsigned char c = *pb++;\n+ if (*pa++ != c)\n+ {\n+ diffnum++;\n+ if (has_tcolor && c == tcolor)\n+ over_is_possible = 0;\n+ if (i < x_min)\n+ x_min = i;\n+ if (i > x_max)\n+ x_max = i;\n+ if (j < y_min)\n+ y_min = j;\n+ if (j > y_max)\n+ y_max = j;\n+ }\n+ else\n+ c = tcolor;\n+\n+ *pc++ = c;\n+ }\n+ }\n+ else if (bpp == 2)\n+ {\n+ unsigned short* pa = (unsigned short*)pimage1;\n+ unsigned short* pb = (unsigned short*)pimage2;\n+ unsigned short* pc = (unsigned short*)ptemp;\n+\n+ for (j = 0; j < h; j++)\n+ for (i = 0; i < w; i++)\n+ {\n+ uint32_t c1 = *pa++;\n+ uint32_t c2 = *pb++;\n+ if ((c1 != c2) && ((c1 >> 8) || (c2 >> 8)))\n+ {\n+ diffnum++;\n+ if ((c2 >> 8) != 0xFF)\n+ over_is_possible = 0;\n+ if (i < x_min)\n+ x_min = i;\n+ if (i > x_max)\n+ x_max = i;\n+ if (j < y_min)\n+ y_min = j;\n+ if (j > y_max)\n+ y_max = j;\n+ }\n+ else\n+ c2 = 0;\n+\n+ *pc++ = c2;\n+ }\n+ }\n+ else if (bpp == 3)\n+ {\n+ unsigned char* pa = pimage1;\n+ unsigned char* pb = pimage2;\n+ unsigned char* pc = ptemp;\n+\n+ for (j = 0; j < h; j++)\n+ for (i = 0; i < w; i++)\n+ {\n+ uint32_t c1 = (pa[2] << 16) + (pa[1] << 8) + pa[0];\n+ uint32_t c2 = (pb[2] << 16) + (pb[1] << 8) + pb[0];\n+ if (c1 != c2)\n+ {\n+ diffnum++;\n+ if (has_tcolor && c2 == tcolor)\n+ over_is_possible = 0;\n+ if (i < x_min)\n+ x_min = i;\n+ if (i > x_max)\n+ x_max = i;\n+ if (j < y_min)\n+ y_min = j;\n+ if (j > y_max)\n+ y_max = j;\n+ }\n+ else\n+ c2 = tcolor;\n+\n+ memcpy(pc, &c2, 3);\n+ pa += 3;\n+ pb += 3;\n+ pc += 3;\n+ }\n+ }\n+ else if (bpp == 4)\n+ {\n+ uint32_t* pa = (uint32_t*)pimage1;\n+ uint32_t* pb = (uint32_t*)pimage2;\n+ uint32_t* pc = (uint32_t*)ptemp;\n+\n+ for (j = 0; j < h; j++)\n+ for (i = 0; i < w; i++)\n+ {\n+ uint32_t c1 = *pa++;\n+ uint32_t c2 = *pb++;\n+ if ((c1 != c2) && ((c1 >> 24) || (c2 >> 24)))\n+ {\n+ diffnum++;\n+ if ((c2 >> 24) != 0xFF)\n+ over_is_possible = 0;\n+ if (i < x_min)\n+ x_min = i;\n+ if (i > x_max)\n+ x_max = i;\n+ if (j < y_min)\n+ y_min = j;\n+ if (j > y_max)\n+ y_max = j;\n+ }\n+ else\n+ c2 = 0;\n+\n+ *pc++ = c2;\n+ }\n+ }\n+\n+ if (diffnum == 0)\n+ {\n+ x0 = y0 = 0;\n+ w0 = h0 = 1;\n+ }\n+ else\n+ {\n+ x0 = x_min;\n+ y0 = y_min;\n+ w0 = x_max - x_min + 1;\n+ h0 = y_max - y_min + 1;\n+ }\n+\n+ deflate_rect_op(pimage2, x0, y0, w0, h0, bpp, stride, zbuf_size, n * 2);\n+\n+ if (over_is_possible)\n+ deflate_rect_op(ptemp, x0, y0, w0, h0, bpp, stride, zbuf_size, n * 2 + 1);\n+}\n+\n+void PngEncoder::deflate_rect_fin(int deflate_method, int iter, unsigned char* zbuf, uint32_t* zsize, int bpp, int stride, unsigned char* rows, int zbuf_size, int n)\n+{\n+ unsigned char* row = op[n].p + op[n].y * stride + op[n].x * bpp;\n+ int rowbytes = op[n].w * bpp;\n+\n+ if (op[n].filters == 0)\n+ {\n+ unsigned char* dp = rows;\n+ for (int j = 0; j < op[n].h; j++)\n+ {\n+ *dp++ = 0;\n+ memcpy(dp, row, rowbytes);\n+ dp += rowbytes;\n+ row += stride;\n+ }\n+ }\n+ else\n+ process_rect(row, rowbytes, bpp, stride, op[n].h, rows);\n+\n+ if (deflate_method == 2)\n+ {\n+ CV_UNUSED(iter);\n+#if 0 // needs include \"zopfli.h\"\n+ ZopfliOptions opt_zopfli;\n+ unsigned char* data = 0;\n+ size_t size = 0;\n+ ZopfliInitOptions(&opt_zopfli);\n+ opt_zopfli.numiterations = iter;\n+ ZopfliCompress(&opt_zopfli, ZOPFLI_FORMAT_ZLIB, rows, op[n].h * (rowbytes + 1), &data, &size);\n+ if (size < (size_t)zbuf_size)\n+ {\n+ memcpy(zbuf, data, size);\n+ *zsize = size;\n+ }\n+ free(data);\n+#endif\n+ }\n+ else if (deflate_method == 1)\n+ {\n+#if 0 // needs include \"7z.h\"\n+ unsigned size = zbuf_size;\n+ compress_rfc1950_7z(rows, op[n].h * (rowbytes + 1), zbuf, size, iter < 100 ? iter : 100, 255);\n+ *zsize = size;\n+#endif\n+ }\n+ else\n+ {\n+ z_stream fin_zstream;\n+\n+ fin_zstream.data_type = Z_BINARY;\n+ fin_zstream.zalloc = Z_NULL;\n+ fin_zstream.zfree = Z_NULL;\n+ fin_zstream.opaque = Z_NULL;\n+ deflateInit2(&fin_zstream, Z_BEST_COMPRESSION, 8, 15, 8, op[n].filters ? Z_FILTERED : Z_DEFAULT_STRATEGY);\n+\n+ fin_zstream.next_out = zbuf;\n+ fin_zstream.avail_out = zbuf_size;\n+ fin_zstream.next_in = rows;\n+ fin_zstream.avail_in = op[n].h * (rowbytes + 1);\n+ deflate(&fin_zstream, Z_FINISH);\n+ *zsize = fin_zstream.total_out;\n+ deflateEnd(&fin_zstream);\n+ }\n+}\n+\n+bool PngEncoder::writemulti(const std::vector& img_vec, const std::vector& params)\n+{\n+ CV_Assert(img_vec[0].depth() == CV_8U);\n+ Animation animation;\n+ animation.frames = img_vec;\n+\n+ for (size_t i = 0; i < animation.frames.size(); i++)\n+ {\n+ animation.durations.push_back(1000);\n+ }\n+ return writeanimation(animation, params);\n+}\n+\n+bool PngEncoder::writeanimation(const Animation& animation, const std::vector& params)\n+{\n+ int compression_level = 6;\n+ int compression_strategy = IMWRITE_PNG_STRATEGY_RLE; // Default strategy\n+ bool isBilevel = false;\n+\n+ for (size_t i = 0; i < params.size(); i += 2)\n+ {\n+ if (params[i] == IMWRITE_PNG_COMPRESSION)\n+ {\n+ compression_strategy = IMWRITE_PNG_STRATEGY_DEFAULT; // Default strategy\n+ compression_level = params[i + 1];\n+ compression_level = MIN(MAX(compression_level, 0), Z_BEST_COMPRESSION);\n+ }\n+ if (params[i] == IMWRITE_PNG_STRATEGY)\n+ {\n+ compression_strategy = params[i + 1];\n+ compression_strategy = MIN(MAX(compression_strategy, 0), Z_FIXED);\n+ }\n+ if (params[i] == IMWRITE_PNG_BILEVEL)\n+ {\n+ isBilevel = params[i + 1] != 0;\n+ }\n+ }\n+\n+ std::vector frames;\n+ std::vector tmpframes;\n+\n+ for (size_t i = 0; i < animation.frames.size(); i++)\n+ {\n+ APNGFrame apngFrame;\n+ tmpframes.push_back(animation.frames[i].clone());\n+ if (animation.frames[i].channels() == 4)\n+ cvtColor(animation.frames[i], tmpframes[i], COLOR_BGRA2RGBA);\n+ if (animation.frames[i].channels() == 3)\n+ cvtColor(animation.frames[i], tmpframes[i], COLOR_BGR2RGB);\n+ apngFrame.setMat(tmpframes[i]);\n+ apngFrame.setDelayDen(animation.durations[i]);\n+ frames.push_back(apngFrame);\n+ }\n+\n+ CV_UNUSED(isBilevel);\n+ uint32_t first =0;\n+ uint32_t loops= animation.loop_count;\n+ uint32_t coltype= animation.frames[0].channels() == 1 ? PNG_COLOR_TYPE_GRAY : animation.frames[0].channels() == 3 ? PNG_COLOR_TYPE_RGB : PNG_COLOR_TYPE_RGB_ALPHA;\n+ int deflate_method=0;\n+ int iter=0;\n+\n+ FILE* m_f = NULL;\n+ uint32_t i, j, k;\n+ uint32_t x0, y0, w0, h0, dop, bop;\n+ uint32_t idat_size, zbuf_size, zsize;\n+ unsigned char* zbuf;\n+ unsigned char header[8] = { 137, 80, 78, 71, 13, 10, 26, 10 };\n+ uint32_t num_frames = (int)frames.size();\n+ uint32_t width = frames[0].getWidth();\n+ uint32_t height = frames[0].getHeight();\n+ uint32_t bpp = (coltype == 6) ? 4 : (coltype == 2) ? 3\n+ : (coltype == 4) ? 2\n+ : 1;\n+ uint32_t has_tcolor = (coltype >= 4 || (coltype <= 2 && trnssize)) ? 1 : 0;\n+ uint32_t tcolor = 0;\n+ uint32_t rowbytes = width * bpp;\n+ uint32_t imagesize = rowbytes * height;\n+\n+ unsigned char* temp = new unsigned char[imagesize];\n+ unsigned char* over1 = new unsigned char[imagesize];\n+ unsigned char* over2 = new unsigned char[imagesize];\n+ unsigned char* over3 = new unsigned char[imagesize];\n+ unsigned char* rest = new unsigned char[imagesize];\n+ unsigned char* rows = new unsigned char[(rowbytes + 1) * height];", + "comment_created_at": "2024-12-21T07:44:06+00:00", + "comment_author": "asmorkalov", + "comment_body": "I propose to use `std::vector` or `cv::AutoBuffer` to prevent memory leaks. ", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1668761773", + "pr_number": 25608, + "pr_file": "modules/imgcodecs/src/grfmt_webp.cpp", + "created_at": "2024-07-08T14:33:40+00:00", + "commented_code": "WebPBitstreamFeatures features;\n if (VP8_STATUS_OK == WebPGetFeatures(header, sizeof(header), &features))\n {\n CV_CheckEQ(features.has_animation, 0, \"WebP backend does not support animated webp images\");\n m_has_animation = features.has_animation > 0;\n if (m_has_animation)\n {\n fs.seekg(0, std::ios::beg); CV_Assert(fs && \"File stream error\");\n data.create(1, validateToInt(fs_size), CV_8UC1);\n fs.read((char*)data.ptr(), fs_size);\n CV_Assert(fs && \"Can't read file data\");\n fs.close();\n\n CV_Assert(data.type() == CV_8UC1); CV_Assert(data.rows == 1);\n\n WebPData webp_data;\n webp_data.bytes = (const uint8_t*)data.ptr();\n webp_data.size = data.total();\n\n WebPAnimDecoderOptions dec_options;\n WebPAnimDecoderOptionsInit(&dec_options);\n dec_options.color_mode = MODE_BGRA;\n anim_decoder = WebPAnimDecoderNew(&webp_data, &dec_options);", + "repo_full_name": "opencv/opencv", + "discussion_comments": [ + { + "comment_id": "1668761773", + "repo_full_name": "opencv/opencv", + "pr_number": 25608, + "pr_file": "modules/imgcodecs/src/grfmt_webp.cpp", + "discussion_id": "1668761773", + "commented_code": "@@ -126,8 +86,38 @@ bool WebPDecoder::readHeader()\n WebPBitstreamFeatures features;\n if (VP8_STATUS_OK == WebPGetFeatures(header, sizeof(header), &features))\n {\n- CV_CheckEQ(features.has_animation, 0, \"WebP backend does not support animated webp images\");\n+ m_has_animation = features.has_animation > 0;\n+ if (m_has_animation)\n+ {\n+ fs.seekg(0, std::ios::beg); CV_Assert(fs && \"File stream error\");\n+ data.create(1, validateToInt(fs_size), CV_8UC1);\n+ fs.read((char*)data.ptr(), fs_size);\n+ CV_Assert(fs && \"Can't read file data\");\n+ fs.close();\n+\n+ CV_Assert(data.type() == CV_8UC1); CV_Assert(data.rows == 1);\n+\n+ WebPData webp_data;\n+ webp_data.bytes = (const uint8_t*)data.ptr();\n+ webp_data.size = data.total();\n \n+ WebPAnimDecoderOptions dec_options;\n+ WebPAnimDecoderOptionsInit(&dec_options);\n+ dec_options.color_mode = MODE_BGRA;\n+ anim_decoder = WebPAnimDecoderNew(&webp_data, &dec_options);", + "comment_created_at": "2024-07-08T14:33:40+00:00", + "comment_author": "vrabaud", + "comment_body": "You are missing a WebPAnimDecoderDelete. Either you create a destructor for WebPDecoder, either you wrap the pointer in a unique_ptr. E.g., create in the header, a private struct:\r\n```cpp\r\nstruct UniquePtrDeleter\r\n{\r\n void operator()(WebPAnimDecoder * decoder) const { WebPAnimDecoderDelete(decoder); }\r\n void operator()(WebpData * dara) const { WebPDataClear(decoder); }\r\n};\r\n\r\n// Use these unique_ptr to ensure the structs are automatically destroyed.\r\nstd::unique_ptr anim_decoder;\r\n// or create a type:\r\nusing DataPtr = std::unique_ptr;\r\n```\r\nI'd go with this, so that you do not have to remember to call a destructor.", + "pr_file_module": null + }, + { + "comment_id": "1673724498", + "repo_full_name": "opencv/opencv", + "pr_number": 25608, + "pr_file": "modules/imgcodecs/src/grfmt_webp.cpp", + "discussion_id": "1668761773", + "commented_code": "@@ -126,8 +86,38 @@ bool WebPDecoder::readHeader()\n WebPBitstreamFeatures features;\n if (VP8_STATUS_OK == WebPGetFeatures(header, sizeof(header), &features))\n {\n- CV_CheckEQ(features.has_animation, 0, \"WebP backend does not support animated webp images\");\n+ m_has_animation = features.has_animation > 0;\n+ if (m_has_animation)\n+ {\n+ fs.seekg(0, std::ios::beg); CV_Assert(fs && \"File stream error\");\n+ data.create(1, validateToInt(fs_size), CV_8UC1);\n+ fs.read((char*)data.ptr(), fs_size);\n+ CV_Assert(fs && \"Can't read file data\");\n+ fs.close();\n+\n+ CV_Assert(data.type() == CV_8UC1); CV_Assert(data.rows == 1);\n+\n+ WebPData webp_data;\n+ webp_data.bytes = (const uint8_t*)data.ptr();\n+ webp_data.size = data.total();\n \n+ WebPAnimDecoderOptions dec_options;\n+ WebPAnimDecoderOptionsInit(&dec_options);\n+ dec_options.color_mode = MODE_BGRA;\n+ anim_decoder = WebPAnimDecoderNew(&webp_data, &dec_options);", + "comment_created_at": "2024-07-11T09:38:39+00:00", + "comment_author": "sturkmen72", + "comment_body": "@vrabaud could you check the last commit. frankly i tried your suggestion using std::unique_ptr but no success.. can last commit resolve this conversation.", + "pr_file_module": null + }, + { + "comment_id": "1676777267", + "repo_full_name": "opencv/opencv", + "pr_number": 25608, + "pr_file": "modules/imgcodecs/src/grfmt_webp.cpp", + "discussion_id": "1668761773", + "commented_code": "@@ -126,8 +86,38 @@ bool WebPDecoder::readHeader()\n WebPBitstreamFeatures features;\n if (VP8_STATUS_OK == WebPGetFeatures(header, sizeof(header), &features))\n {\n- CV_CheckEQ(features.has_animation, 0, \"WebP backend does not support animated webp images\");\n+ m_has_animation = features.has_animation > 0;\n+ if (m_has_animation)\n+ {\n+ fs.seekg(0, std::ios::beg); CV_Assert(fs && \"File stream error\");\n+ data.create(1, validateToInt(fs_size), CV_8UC1);\n+ fs.read((char*)data.ptr(), fs_size);\n+ CV_Assert(fs && \"Can't read file data\");\n+ fs.close();\n+\n+ CV_Assert(data.type() == CV_8UC1); CV_Assert(data.rows == 1);\n+\n+ WebPData webp_data;\n+ webp_data.bytes = (const uint8_t*)data.ptr();\n+ webp_data.size = data.total();\n \n+ WebPAnimDecoderOptions dec_options;\n+ WebPAnimDecoderOptionsInit(&dec_options);\n+ dec_options.color_mode = MODE_BGRA;\n+ anim_decoder = WebPAnimDecoderNew(&webp_data, &dec_options);", + "comment_created_at": "2024-07-13T07:24:12+00:00", + "comment_author": "vrabaud", + "comment_body": "When calling the WebP API you need to get a pointer so if you have a `std::unique_ptr anim_decoder;`, just call `anim_decoder.get()`", + "pr_file_module": null + }, + { + "comment_id": "1676807079", + "repo_full_name": "opencv/opencv", + "pr_number": 25608, + "pr_file": "modules/imgcodecs/src/grfmt_webp.cpp", + "discussion_id": "1668761773", + "commented_code": "@@ -126,8 +86,38 @@ bool WebPDecoder::readHeader()\n WebPBitstreamFeatures features;\n if (VP8_STATUS_OK == WebPGetFeatures(header, sizeof(header), &features))\n {\n- CV_CheckEQ(features.has_animation, 0, \"WebP backend does not support animated webp images\");\n+ m_has_animation = features.has_animation > 0;\n+ if (m_has_animation)\n+ {\n+ fs.seekg(0, std::ios::beg); CV_Assert(fs && \"File stream error\");\n+ data.create(1, validateToInt(fs_size), CV_8UC1);\n+ fs.read((char*)data.ptr(), fs_size);\n+ CV_Assert(fs && \"Can't read file data\");\n+ fs.close();\n+\n+ CV_Assert(data.type() == CV_8UC1); CV_Assert(data.rows == 1);\n+\n+ WebPData webp_data;\n+ webp_data.bytes = (const uint8_t*)data.ptr();\n+ webp_data.size = data.total();\n \n+ WebPAnimDecoderOptions dec_options;\n+ WebPAnimDecoderOptionsInit(&dec_options);\n+ dec_options.color_mode = MODE_BGRA;\n+ anim_decoder = WebPAnimDecoderNew(&webp_data, &dec_options);", + "comment_created_at": "2024-07-13T11:02:33+00:00", + "comment_author": "sturkmen72", + "comment_body": "@vrabaud could you check https://github.com/opencv/opencv/pull/25608/commits/a105d94aff541d2b4593150dc834bab279ad6cd6", + "pr_file_module": null + }, + { + "comment_id": "1677917053", + "repo_full_name": "opencv/opencv", + "pr_number": 25608, + "pr_file": "modules/imgcodecs/src/grfmt_webp.cpp", + "discussion_id": "1668761773", + "commented_code": "@@ -126,8 +86,38 @@ bool WebPDecoder::readHeader()\n WebPBitstreamFeatures features;\n if (VP8_STATUS_OK == WebPGetFeatures(header, sizeof(header), &features))\n {\n- CV_CheckEQ(features.has_animation, 0, \"WebP backend does not support animated webp images\");\n+ m_has_animation = features.has_animation > 0;\n+ if (m_has_animation)\n+ {\n+ fs.seekg(0, std::ios::beg); CV_Assert(fs && \"File stream error\");\n+ data.create(1, validateToInt(fs_size), CV_8UC1);\n+ fs.read((char*)data.ptr(), fs_size);\n+ CV_Assert(fs && \"Can't read file data\");\n+ fs.close();\n+\n+ CV_Assert(data.type() == CV_8UC1); CV_Assert(data.rows == 1);\n+\n+ WebPData webp_data;\n+ webp_data.bytes = (const uint8_t*)data.ptr();\n+ webp_data.size = data.total();\n \n+ WebPAnimDecoderOptions dec_options;\n+ WebPAnimDecoderOptionsInit(&dec_options);\n+ dec_options.color_mode = MODE_BGRA;\n+ anim_decoder = WebPAnimDecoderNew(&webp_data, &dec_options);", + "comment_created_at": "2024-07-15T14:23:58+00:00", + "comment_author": "sturkmen72", + "comment_body": "should i mark this as resolved?", + "pr_file_module": null + }, + { + "comment_id": "1681234937", + "repo_full_name": "opencv/opencv", + "pr_number": 25608, + "pr_file": "modules/imgcodecs/src/grfmt_webp.cpp", + "discussion_id": "1668761773", + "commented_code": "@@ -126,8 +86,38 @@ bool WebPDecoder::readHeader()\n WebPBitstreamFeatures features;\n if (VP8_STATUS_OK == WebPGetFeatures(header, sizeof(header), &features))\n {\n- CV_CheckEQ(features.has_animation, 0, \"WebP backend does not support animated webp images\");\n+ m_has_animation = features.has_animation > 0;\n+ if (m_has_animation)\n+ {\n+ fs.seekg(0, std::ios::beg); CV_Assert(fs && \"File stream error\");\n+ data.create(1, validateToInt(fs_size), CV_8UC1);\n+ fs.read((char*)data.ptr(), fs_size);\n+ CV_Assert(fs && \"Can't read file data\");\n+ fs.close();\n+\n+ CV_Assert(data.type() == CV_8UC1); CV_Assert(data.rows == 1);\n+\n+ WebPData webp_data;\n+ webp_data.bytes = (const uint8_t*)data.ptr();\n+ webp_data.size = data.total();\n \n+ WebPAnimDecoderOptions dec_options;\n+ WebPAnimDecoderOptionsInit(&dec_options);\n+ dec_options.color_mode = MODE_BGRA;\n+ anim_decoder = WebPAnimDecoderNew(&webp_data, &dec_options);", + "comment_created_at": "2024-07-17T15:11:36+00:00", + "comment_author": "vrabaud", + "comment_body": "Just do the same thing for data and encoder (or maybe simply do what you did for the encoder, by using a full unique_ptr definition if you only use those once in the code.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1259283684", + "pr_number": 23960, + "pr_file": "modules/video/src/tracking/bytetracker.cpp", + "created_at": "2023-07-11T07:07:13+00:00", + "commented_code": "// This file is part of OpenCV project.\n// It is subject to the license terms in the LICENSE file found in the top-level directory\n// of this distribution and at http://opencv.org/license.html.\n\n\n\n#include \"../precomp.hpp\"\n\n//#include \"opencv2/video/detail/bytetracker.hpp\"\n// /#include \"opencv2/video/detail/bytetracker_strack.hpp\"\n#include \"opencv2/video/detail/lapjv.hpp\"\n#include \"opencv2/video/detail/tracking.detail.hpp\"\n#include \n\n// #include \"detail/bytetracker.hpp\"\n// #include \"detail/bytetracker_strack.hpp\"\n// #include \"detail/lapjv.hpp\"\n\n#ifdef HAVE_OPENCV_DNN\n#include \"opencv2/dnn.hpp\"\n#endif\n\nusing namespace std;\nusing namespace cv;\n\nnamespace cv {\n\n\n//using cv::detail::tracking::Strack;\n//using cv::detail::tracking::Detection;\n//using cv::detail::tracking::TrackState;\n\n\n\nByteTracker::ByteTracker()\n{\n //nothing\n}\n\nByteTracker::~ByteTracker()\n{\n //nothing\n}\n\nByteTracker::Params::Params()\n{\n frameRate = 30;\n frameBuffer = 30;\n}\n \n \n\n\n\nclass ByteTrackerImpl : public ByteTracker\n{ \npublic:\n //ByteTracker(int, int);\n ByteTrackerImpl(const ByteTracker::Params& parameters) : params_(parameters)\n {\n trackThreshold_ = 0.5f;\n matchThreshold_ = 0.7f;\n lastId_ = 0;\n frame_ = 0;\n maxTimeLost_ = static_cast(params_.frameRate / 30.0f * params_.frameBuffer);\n }\n\n void init(InputArray image, const Rect& boundingBox);\n //std::vector> update(std::vector>)\n vector update(vector& objects);\n //Scalar get_color(int idx);\n int getFrame();\n map lapjv(vector> &cost);\n\nprotected:\n ByteTracker::Params params_;\n float trackThreshold_;\n float matchThreshold_;\n unordered_map trackedStracks_;\n unordered_map lostStracks_;\n int lastId_;\n int frame_;\n int maxTimeLost_;\n KalmanFilter kalmanFilter_;\n\n void getDetections(vector& Objects, vector& detections, \n vector& detectionsLow);\n\n\n void addNewDetectedTracks(unordered_map trackedMap,\n vector& inactiveStracks, vector& trackedStracks);\n\n\n Mat getCostMatrix(vector& tracks, vector& btracks);\n Mat getCostMatrix(unordered_map& atracks, vector &btracks);\n\n \n Mat calculateIous(vector& atlwhs, vector &btlwhs);\n\n\n unordered_map joinStracks(const vector& trackA, vector& trackB);\n unordered_map joinStracks(const vector& trackVector,\n unordered_map& trackMap, bool inplace);\n\n};\n\n\nvector ByteTrackerImpl::update(vector &objects)\n{\n\n // Detetions, Dk = Detections(fk)\n vector detections; // consider changing to cv::Mat_\n vector detectionsLow;\n vector remainDets;\n vector activatedStracks;\n vector reRemainTracks;\n\n getDetections(objects, detections, detectionsLow); // objects -> D and Dlow\n\n vector inactiveStracks;\n vector trackedStracks;\n\n addNewDetectedTracks(trackedStracks_, inactiveStracks, trackedStracks); // trackedStracks_ -> inactive and active\n\n unordered_map strackPool;\n strackPool = joinStracks(trackedStracks, lostStracks_, false);\n // remember that in the first association we consider the lost tracks too\n // we need to predict the tracks to do association\n // it updates strackPool with prediction, maybe pass strackPool by reference\n for (auto &track : strackPool)\n {\n cv::Mat prediction = track.second.predict(); // cx cy w h\n prediction.at(0, 0) -= prediction.at(2, 0);\n prediction.at(1, 0) -= prediction.at(3, 0);\n track.second.setTlwh(prediction);\n }\n\n // getting map keys from the indexes\n unordered_map indexToKey;\n int index = 0;\n for (const auto &pair : strackPool)\n {\n int key = pair.first;\n indexToKey[index] = key;\n ++index;\n }\n\n // First association with IoU\n vector> dists; // IoU distances, maybe change it to mat type?\n dists = getCostMatrix(strackPool, detections);\n\n vector remainTracks;\n vector strackIndex;\n vector detectionsIndex;\n map matches;\n\n matches = lapjv(dists); // returns a map (track_i,matched_det_index)\n // cout << \"\\n Num of matches: \" << matches.size();\n\n // Find unmatched track indexes\n for (int trackIndex = 0; trackIndex < static_cast(strackPool.size()); ++trackIndex)\n {\n if (matches.find(trackIndex) == matches.end())\n {\n strackIndex.push_back(trackIndex);\n }\n }\n\n // Find unmatched detection indexes\n for (int detectionIndex = 0; detectionIndex < static_cast(detections.size());\n ++detectionIndex)\n {\n bool matched = false;\n for (const auto &match : matches)\n {\n int matchedDetectionIndex = match.second;\n if (detectionIndex == matchedDetectionIndex)\n {\n matched = true;\n break;\n }\n }\n if (!matched)\n {\n detectionsIndex.push_back(detectionIndex);\n }\n }\n\n // remain tracks and dets\n for (int i = 0; i < static_cast(strackIndex.size()); i++)\n {\n int key = indexToKey[strackIndex[i]];\n Strack track = strackPool[key];\n remainTracks.push_back(track);\n }\n for (int j = 0; j < static_cast(detectionsIndex.size()); j++)\n {\n remainDets.push_back(detections[detectionsIndex[j]]);\n }\n\n for (auto &pair : matches) // row\n {\n int key = indexToKey[pair.first];\n Strack &track = strackPool[key];\n Strack &detection = detections[pair.second];\n\n // if it's tracked, update it, else reactivate it\n if (track.getState() == TrackState::Tracked)\n {\n track.update(detection);\n activatedStracks.push_back(track);\n }\n else\n {\n track.reactivate(detection, frame_);\n activatedStracks.push_back(track);\n lostStracks_.erase(track.getId());\n }\n }\n\n dists = getCostMatrix(remainTracks, detectionsLow);\n strackIndex.clear();\n detectionsIndex.clear();\n matches = lapjv(dists);\n\n // cout << \"\\n Num of low matches: \" << matches.size();\n\n // Find unmatched track indexes\n for (int trackIndex = 0; trackIndex < static_cast(remainTracks.size()); ++trackIndex)\n {\n if (matches.find(trackIndex) == matches.end())\n {\n strackIndex.push_back(trackIndex);\n }\n }\n\n // Find unmatched detection indexes\n for (int detectionIndex = 0; detectionIndex < static_cast(detectionsLow.size());\n ++detectionIndex)\n {\n bool matched = false;\n for (const auto &match : matches)\n {\n int matchedDetectionIndex = match.second;\n if (detectionIndex == matchedDetectionIndex)\n {\n matched = true;\n break;\n }\n }\n if (!matched)\n {\n detectionsIndex.push_back(detectionIndex);\n }\n }\n\n for (int i = 0; i < static_cast(strackIndex.size()); i++)\n {\n reRemainTracks.push_back(remainTracks[strackIndex[i]]);\n }\n\n for (auto pair : matches) // row\n {\n Strack &track = remainTracks[pair.first];\n Strack &detection = detectionsLow[pair.second];\n\n // if it's tracked, update it, else re_activate it\n if (track.getState() == TrackState::Tracked)\n {\n track.update(detection);\n activatedStracks.push_back(track);\n }\n else\n {\n track.reactivate(detection, frame_);\n activatedStracks.push_back(track);\n lostStracks_.erase(track.getId());\n }\n }\n\n // initialize new tracks\n for (int i = 0; i < static_cast(remainDets.size()); i++)\n {\n Strack newTrack = remainDets[i];\n newTrack.activate(getFrame(), lastId_++);\n activatedStracks.push_back(newTrack);\n }\n joinStracks(activatedStracks, trackedStracks_, true); //\"true\" is replacing in place\n joinStracks(reRemainTracks, lostStracks_, true);\n\n // deal with lost tracks and save them in an attribute\n vector keysToRemove;\n for (auto &track : lostStracks_)\n {\n track.second.incrementTrackletLen();\n if ((track.second.getTrackletLen()) >maxTimeLost_)\n keysToRemove.push_back(track.first);\n else\n track.second.setState(TrackState::Lost);\n }\n\n for (int key : keysToRemove)\n {\n lostStracks_.erase(key);\n }\n\n vector ret;\n for (const auto &pair : trackedStracks_)\n {\n const Strack &strack = pair.second;\n ret.push_back(strack);\n }\n\n return ret;\n}\n\nvoid ByteTrackerImpl::getDetections(vector &objects, vector &detections, \n vector &detectionsLow)\n{\n frame_++; // update frame\n for (const auto &obj : objects)\n {\n Strack strack(obj.box, obj.confidence);\n if (obj.confidence >= trackThreshold_) // Dhigh or Dlow\n {\n detections.push_back(strack);\n }\n else\n {\n detectionsLow.push_back(strack);\n }\n }\n}\n\nvoid ByteTrackerImpl::addNewDetectedTracks(unordered_map trackedMap, \n vector &inactiveStracks, vector &trackedStracks)\n{\n // checks if the trackedStracks are activated to keep them in the vector(same name)\n for (auto pair : trackedMap)\n {\n Strack track = pair.second;\n if (track.getState() == TrackState::Tracked)\n trackedStracks.push_back(track);\n else\n inactiveStracks.push_back(track);\n }\n}\n\nMat ByteTrackerImpl::getCostMatrix(vector &atracks, vector &btracks)\n{\n Mat costMatrix;\n if (atracks.size() == 0 || btracks.size() == 0)\n {\n return costMatrix; // returns empty matrix\n }\n\n vector atlwhs, btlwhs;\n for (auto& track : atracks)\n {\n atlwhs.push_back(track.getTlwh());\n }\n for (auto& track : btracks)\n {\n btlwhs.push_back(track.getTlwh());\n }\n\n costMatrix = calculateIous(atlwhs, btlwhs);\n subtract(1, costMatrix, costMatrix); //costMatrix = 1 - costMatrix\n\n return costMatrix;\n}\n\nMat ByteTrackerImpl::getCostMatrix(unordered_map &atracks, vector &btracks)\n{\n Mat costMatrix;\n if (atracks.size() == 0 && btracks.size() == 0)\n {\n return costMatrix; // returns empty matrix\n }\n\n vector atlwhs, btlwhs;\n for (auto &pair : atracks)\n {\n Rect tlwh = pair.second.getTlwh();\n atlwhs.push_back(tlwh);\n }\n\n for (auto &track : btracks)\n {\n Rect tlwh = track.getTlwh();\n btlwhs.push_back(tlwh);\n }\n\n costMatrix = calculateIous(atlwhs, btlwhs);\n subtract(1, costMatrix, costMatrix); //costMatrix = 1 - costMatrix\n\n return costMatrix;\n}\n\n\nMat ByteTrackerImpl::calculateIous(vector &atlwhs, vector &btlwhs)\n{\n Mat calculateIous;\n if (atlwhs.empty() || btlwhs.empty())\n {\n return calculateIous;\n }\n\n calculateIous.create(static_cast(atlwhs.size()), static_cast(btlwhs.size()), CV_32F);\n \n // bbox_ious\n for (int i = 0; i < static_cast(atlwhs.size()); ++i)\n {\n for (int j = 0; j < static_cast(btlwhs.size()); ++j)\n {\n cv::Rect intersection = atlwhs[i] & btlwhs[j];\n cv::Rect unionRect = atlwhs[i] | btlwhs[j];\n float intersectionArea = intersection.area();\n float unionArea = unionRect.area();\n calculateIous.at(i, j) = intersectionArea / unionArea;\n }\n }\n\n return calculateIous;\n}\n\nunordered_map ByteTrackerImpl::joinStracks(\n const vector& trackA, vector& trackB)\n{\n unordered_map joinedTracks;\n\n for (const auto &track : trackA)\n {\n joinedTracks.emplace(track.getId(), track);\n }\n\n for (const auto &track : trackB)\n {\n joinedTracks.emplace(track.getId(), track);\n }\n\n return joinedTracks;\n}\n\n// overload to receive a hashmap\nunordered_map ByteTrackerImpl::joinStracks(const vector& trackVector,\n unordered_map& trackMap, bool inplace)\n{\n if (inplace)\n {\n for (const auto& track : trackVector)\n {\n trackMap.emplace(track.getId(), track);\n }\n return trackMap;\n }\n\n unordered_map joinedTracks = trackMap;\n for (const auto &track : trackVector)\n {\n joinedTracks.emplace(track.getId(), track);\n }\n\n return joinedTracks;\n\n}\n\n\n\nmap ByteTrackerImpl::lapjv(vector> &cost)\n{\n map ret;\n if (cost.size() == 0 || cost[0].size() == 0)\n return ret;\n int maxI = cost.size();\n int maxJ = cost[0].size();\n int n = max(maxJ, maxI);\n double **cost_ptr;\n double *u = new double[sizeof(double) * n];\n double *v = new double[sizeof(double) * n];\n int *x_c = new int[n];\n int *y_c = new int[n];\n cost_ptr = new double *[sizeof(double *) * n];", + "repo_full_name": "opencv/opencv", + "discussion_comments": [ + { + "comment_id": "1259283684", + "repo_full_name": "opencv/opencv", + "pr_number": 23960, + "pr_file": "modules/video/src/tracking/bytetracker.cpp", + "discussion_id": "1259283684", + "commented_code": "@@ -0,0 +1,533 @@\n+// This file is part of OpenCV project.\n+// It is subject to the license terms in the LICENSE file found in the top-level directory\n+// of this distribution and at http://opencv.org/license.html.\n+\n+\n+\n+#include \"../precomp.hpp\"\n+\n+//#include \"opencv2/video/detail/bytetracker.hpp\"\n+// /#include \"opencv2/video/detail/bytetracker_strack.hpp\"\n+#include \"opencv2/video/detail/lapjv.hpp\"\n+#include \"opencv2/video/detail/tracking.detail.hpp\"\n+#include \n+\n+// #include \"detail/bytetracker.hpp\"\n+// #include \"detail/bytetracker_strack.hpp\"\n+// #include \"detail/lapjv.hpp\"\n+\n+#ifdef HAVE_OPENCV_DNN\n+#include \"opencv2/dnn.hpp\"\n+#endif\n+\n+using namespace std;\n+using namespace cv;\n+\n+namespace cv {\n+\n+\n+//using cv::detail::tracking::Strack;\n+//using cv::detail::tracking::Detection;\n+//using cv::detail::tracking::TrackState;\n+\n+\n+\n+ByteTracker::ByteTracker()\n+{\n+ //nothing\n+}\n+\n+ByteTracker::~ByteTracker()\n+{\n+ //nothing\n+}\n+\n+ByteTracker::Params::Params()\n+{\n+ frameRate = 30;\n+ frameBuffer = 30;\n+}\n+ \n+ \n+\n+\n+\n+class ByteTrackerImpl : public ByteTracker\n+{ \n+public:\n+ //ByteTracker(int, int);\n+ ByteTrackerImpl(const ByteTracker::Params& parameters) : params_(parameters)\n+ {\n+ trackThreshold_ = 0.5f;\n+ matchThreshold_ = 0.7f;\n+ lastId_ = 0;\n+ frame_ = 0;\n+ maxTimeLost_ = static_cast(params_.frameRate / 30.0f * params_.frameBuffer);\n+ }\n+\n+ void init(InputArray image, const Rect& boundingBox);\n+ //std::vector> update(std::vector>)\n+ vector update(vector& objects);\n+ //Scalar get_color(int idx);\n+ int getFrame();\n+ map lapjv(vector> &cost);\n+\n+protected:\n+ ByteTracker::Params params_;\n+ float trackThreshold_;\n+ float matchThreshold_;\n+ unordered_map trackedStracks_;\n+ unordered_map lostStracks_;\n+ int lastId_;\n+ int frame_;\n+ int maxTimeLost_;\n+ KalmanFilter kalmanFilter_;\n+\n+ void getDetections(vector& Objects, vector& detections, \n+ vector& detectionsLow);\n+\n+\n+ void addNewDetectedTracks(unordered_map trackedMap,\n+ vector& inactiveStracks, vector& trackedStracks);\n+\n+\n+ Mat getCostMatrix(vector& tracks, vector& btracks);\n+ Mat getCostMatrix(unordered_map& atracks, vector &btracks);\n+\n+ \n+ Mat calculateIous(vector& atlwhs, vector &btlwhs);\n+\n+\n+ unordered_map joinStracks(const vector& trackA, vector& trackB);\n+ unordered_map joinStracks(const vector& trackVector,\n+ unordered_map& trackMap, bool inplace);\n+\n+};\n+\n+\n+vector ByteTrackerImpl::update(vector &objects)\n+{\n+\n+ // Detetions, Dk = Detections(fk)\n+ vector detections; // consider changing to cv::Mat_\n+ vector detectionsLow;\n+ vector remainDets;\n+ vector activatedStracks;\n+ vector reRemainTracks;\n+\n+ getDetections(objects, detections, detectionsLow); // objects -> D and Dlow\n+\n+ vector inactiveStracks;\n+ vector trackedStracks;\n+\n+ addNewDetectedTracks(trackedStracks_, inactiveStracks, trackedStracks); // trackedStracks_ -> inactive and active\n+\n+ unordered_map strackPool;\n+ strackPool = joinStracks(trackedStracks, lostStracks_, false);\n+ // remember that in the first association we consider the lost tracks too\n+ // we need to predict the tracks to do association\n+ // it updates strackPool with prediction, maybe pass strackPool by reference\n+ for (auto &track : strackPool)\n+ {\n+ cv::Mat prediction = track.second.predict(); // cx cy w h\n+ prediction.at(0, 0) -= prediction.at(2, 0);\n+ prediction.at(1, 0) -= prediction.at(3, 0);\n+ track.second.setTlwh(prediction);\n+ }\n+\n+ // getting map keys from the indexes\n+ unordered_map indexToKey;\n+ int index = 0;\n+ for (const auto &pair : strackPool)\n+ {\n+ int key = pair.first;\n+ indexToKey[index] = key;\n+ ++index;\n+ }\n+\n+ // First association with IoU\n+ vector> dists; // IoU distances, maybe change it to mat type?\n+ dists = getCostMatrix(strackPool, detections);\n+\n+ vector remainTracks;\n+ vector strackIndex;\n+ vector detectionsIndex;\n+ map matches;\n+\n+ matches = lapjv(dists); // returns a map (track_i,matched_det_index)\n+ // cout << \"\\n Num of matches: \" << matches.size();\n+\n+ // Find unmatched track indexes\n+ for (int trackIndex = 0; trackIndex < static_cast(strackPool.size()); ++trackIndex)\n+ {\n+ if (matches.find(trackIndex) == matches.end())\n+ {\n+ strackIndex.push_back(trackIndex);\n+ }\n+ }\n+\n+ // Find unmatched detection indexes\n+ for (int detectionIndex = 0; detectionIndex < static_cast(detections.size());\n+ ++detectionIndex)\n+ {\n+ bool matched = false;\n+ for (const auto &match : matches)\n+ {\n+ int matchedDetectionIndex = match.second;\n+ if (detectionIndex == matchedDetectionIndex)\n+ {\n+ matched = true;\n+ break;\n+ }\n+ }\n+ if (!matched)\n+ {\n+ detectionsIndex.push_back(detectionIndex);\n+ }\n+ }\n+\n+ // remain tracks and dets\n+ for (int i = 0; i < static_cast(strackIndex.size()); i++)\n+ {\n+ int key = indexToKey[strackIndex[i]];\n+ Strack track = strackPool[key];\n+ remainTracks.push_back(track);\n+ }\n+ for (int j = 0; j < static_cast(detectionsIndex.size()); j++)\n+ {\n+ remainDets.push_back(detections[detectionsIndex[j]]);\n+ }\n+\n+ for (auto &pair : matches) // row\n+ {\n+ int key = indexToKey[pair.first];\n+ Strack &track = strackPool[key];\n+ Strack &detection = detections[pair.second];\n+\n+ // if it's tracked, update it, else reactivate it\n+ if (track.getState() == TrackState::Tracked)\n+ {\n+ track.update(detection);\n+ activatedStracks.push_back(track);\n+ }\n+ else\n+ {\n+ track.reactivate(detection, frame_);\n+ activatedStracks.push_back(track);\n+ lostStracks_.erase(track.getId());\n+ }\n+ }\n+\n+ dists = getCostMatrix(remainTracks, detectionsLow);\n+ strackIndex.clear();\n+ detectionsIndex.clear();\n+ matches = lapjv(dists);\n+\n+ // cout << \"\\n Num of low matches: \" << matches.size();\n+\n+ // Find unmatched track indexes\n+ for (int trackIndex = 0; trackIndex < static_cast(remainTracks.size()); ++trackIndex)\n+ {\n+ if (matches.find(trackIndex) == matches.end())\n+ {\n+ strackIndex.push_back(trackIndex);\n+ }\n+ }\n+\n+ // Find unmatched detection indexes\n+ for (int detectionIndex = 0; detectionIndex < static_cast(detectionsLow.size());\n+ ++detectionIndex)\n+ {\n+ bool matched = false;\n+ for (const auto &match : matches)\n+ {\n+ int matchedDetectionIndex = match.second;\n+ if (detectionIndex == matchedDetectionIndex)\n+ {\n+ matched = true;\n+ break;\n+ }\n+ }\n+ if (!matched)\n+ {\n+ detectionsIndex.push_back(detectionIndex);\n+ }\n+ }\n+\n+ for (int i = 0; i < static_cast(strackIndex.size()); i++)\n+ {\n+ reRemainTracks.push_back(remainTracks[strackIndex[i]]);\n+ }\n+\n+ for (auto pair : matches) // row\n+ {\n+ Strack &track = remainTracks[pair.first];\n+ Strack &detection = detectionsLow[pair.second];\n+\n+ // if it's tracked, update it, else re_activate it\n+ if (track.getState() == TrackState::Tracked)\n+ {\n+ track.update(detection);\n+ activatedStracks.push_back(track);\n+ }\n+ else\n+ {\n+ track.reactivate(detection, frame_);\n+ activatedStracks.push_back(track);\n+ lostStracks_.erase(track.getId());\n+ }\n+ }\n+\n+ // initialize new tracks\n+ for (int i = 0; i < static_cast(remainDets.size()); i++)\n+ {\n+ Strack newTrack = remainDets[i];\n+ newTrack.activate(getFrame(), lastId_++);\n+ activatedStracks.push_back(newTrack);\n+ }\n+ joinStracks(activatedStracks, trackedStracks_, true); //\"true\" is replacing in place\n+ joinStracks(reRemainTracks, lostStracks_, true);\n+\n+ // deal with lost tracks and save them in an attribute\n+ vector keysToRemove;\n+ for (auto &track : lostStracks_)\n+ {\n+ track.second.incrementTrackletLen();\n+ if ((track.second.getTrackletLen()) >maxTimeLost_)\n+ keysToRemove.push_back(track.first);\n+ else\n+ track.second.setState(TrackState::Lost);\n+ }\n+\n+ for (int key : keysToRemove)\n+ {\n+ lostStracks_.erase(key);\n+ }\n+\n+ vector ret;\n+ for (const auto &pair : trackedStracks_)\n+ {\n+ const Strack &strack = pair.second;\n+ ret.push_back(strack);\n+ }\n+\n+ return ret;\n+}\n+\n+void ByteTrackerImpl::getDetections(vector &objects, vector &detections, \n+ vector &detectionsLow)\n+{\n+ frame_++; // update frame\n+ for (const auto &obj : objects)\n+ {\n+ Strack strack(obj.box, obj.confidence);\n+ if (obj.confidence >= trackThreshold_) // Dhigh or Dlow\n+ {\n+ detections.push_back(strack);\n+ }\n+ else\n+ {\n+ detectionsLow.push_back(strack);\n+ }\n+ }\n+}\n+\n+void ByteTrackerImpl::addNewDetectedTracks(unordered_map trackedMap, \n+ vector &inactiveStracks, vector &trackedStracks)\n+{\n+ // checks if the trackedStracks are activated to keep them in the vector(same name)\n+ for (auto pair : trackedMap)\n+ {\n+ Strack track = pair.second;\n+ if (track.getState() == TrackState::Tracked)\n+ trackedStracks.push_back(track);\n+ else\n+ inactiveStracks.push_back(track);\n+ }\n+}\n+\n+Mat ByteTrackerImpl::getCostMatrix(vector &atracks, vector &btracks)\n+{\n+ Mat costMatrix;\n+ if (atracks.size() == 0 || btracks.size() == 0)\n+ {\n+ return costMatrix; // returns empty matrix\n+ }\n+\n+ vector atlwhs, btlwhs;\n+ for (auto& track : atracks)\n+ {\n+ atlwhs.push_back(track.getTlwh());\n+ }\n+ for (auto& track : btracks)\n+ {\n+ btlwhs.push_back(track.getTlwh());\n+ }\n+\n+ costMatrix = calculateIous(atlwhs, btlwhs);\n+ subtract(1, costMatrix, costMatrix); //costMatrix = 1 - costMatrix\n+\n+ return costMatrix;\n+}\n+\n+Mat ByteTrackerImpl::getCostMatrix(unordered_map &atracks, vector &btracks)\n+{\n+ Mat costMatrix;\n+ if (atracks.size() == 0 && btracks.size() == 0)\n+ {\n+ return costMatrix; // returns empty matrix\n+ }\n+\n+ vector atlwhs, btlwhs;\n+ for (auto &pair : atracks)\n+ {\n+ Rect tlwh = pair.second.getTlwh();\n+ atlwhs.push_back(tlwh);\n+ }\n+\n+ for (auto &track : btracks)\n+ {\n+ Rect tlwh = track.getTlwh();\n+ btlwhs.push_back(tlwh);\n+ }\n+\n+ costMatrix = calculateIous(atlwhs, btlwhs);\n+ subtract(1, costMatrix, costMatrix); //costMatrix = 1 - costMatrix\n+\n+ return costMatrix;\n+}\n+\n+\n+Mat ByteTrackerImpl::calculateIous(vector &atlwhs, vector &btlwhs)\n+{\n+ Mat calculateIous;\n+ if (atlwhs.empty() || btlwhs.empty())\n+ {\n+ return calculateIous;\n+ }\n+\n+ calculateIous.create(static_cast(atlwhs.size()), static_cast(btlwhs.size()), CV_32F);\n+ \n+ // bbox_ious\n+ for (int i = 0; i < static_cast(atlwhs.size()); ++i)\n+ {\n+ for (int j = 0; j < static_cast(btlwhs.size()); ++j)\n+ {\n+ cv::Rect intersection = atlwhs[i] & btlwhs[j];\n+ cv::Rect unionRect = atlwhs[i] | btlwhs[j];\n+ float intersectionArea = intersection.area();\n+ float unionArea = unionRect.area();\n+ calculateIous.at(i, j) = intersectionArea / unionArea;\n+ }\n+ }\n+\n+ return calculateIous;\n+}\n+\n+unordered_map ByteTrackerImpl::joinStracks(\n+ const vector& trackA, vector& trackB)\n+{\n+ unordered_map joinedTracks;\n+\n+ for (const auto &track : trackA)\n+ {\n+ joinedTracks.emplace(track.getId(), track);\n+ }\n+\n+ for (const auto &track : trackB)\n+ {\n+ joinedTracks.emplace(track.getId(), track);\n+ }\n+\n+ return joinedTracks;\n+}\n+\n+// overload to receive a hashmap\n+unordered_map ByteTrackerImpl::joinStracks(const vector& trackVector,\n+ unordered_map& trackMap, bool inplace)\n+{\n+ if (inplace)\n+ {\n+ for (const auto& track : trackVector)\n+ {\n+ trackMap.emplace(track.getId(), track);\n+ }\n+ return trackMap;\n+ }\n+\n+ unordered_map joinedTracks = trackMap;\n+ for (const auto &track : trackVector)\n+ {\n+ joinedTracks.emplace(track.getId(), track);\n+ }\n+\n+ return joinedTracks;\n+\n+}\n+\n+\n+\n+map ByteTrackerImpl::lapjv(vector> &cost)\n+{\n+ map ret;\n+ if (cost.size() == 0 || cost[0].size() == 0)\n+ return ret;\n+ int maxI = cost.size();\n+ int maxJ = cost[0].size();\n+ int n = max(maxJ, maxI);\n+ double **cost_ptr;\n+ double *u = new double[sizeof(double) * n];\n+ double *v = new double[sizeof(double) * n];\n+ int *x_c = new int[n];\n+ int *y_c = new int[n];\n+ cost_ptr = new double *[sizeof(double *) * n];", + "comment_created_at": "2023-07-11T07:07:13+00:00", + "comment_author": "asmorkalov", + "comment_body": "Pleas use `cv::AutoBuffer` or `std::vector` for temporary buffers instead new/delete to prevent memory leaks. `cv::AutoBuffer` is preferable, it may use stack space, if buffer is small enough.", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/opencv-prevent-null-vulnerabilities.md b/_reviewers/opencv-prevent-null-vulnerabilities.md index 3623dd8..617624b 100644 --- a/_reviewers/opencv-prevent-null-vulnerabilities.md +++ b/_reviewers/opencv-prevent-null-vulnerabilities.md @@ -54,211 +54,3 @@ uint32_t read_chunk(CHUNK& pChunk); ``` Proper memory management not only prevents memory leaks but also eliminates many null pointer scenarios, making your code more robust and easier to maintain. - - -[ - { - "discussion_id": "1632891724", - "pr_number": 25691, - "pr_file": "modules/imgcodecs/src/grfmt_gif.cpp", - "created_at": "2024-06-10T09:10:33+00:00", - "commented_code": "// This file is part of OpenCV project.\n// It is subject to the license terms in the LICENSE file found in the top-level\n// directory of this distribution and at http://opencv.org/license.html\n\n#include \"precomp.hpp\"\n#include \"grfmt_gif.hpp\"\n\nnamespace cv\n{\n//////////////////////////////////////////////////////////////////////\n//// GIF Decoder ////\n//////////////////////////////////////////////////////////////////////\nGifDecoder::GifDecoder() {\n m_signature = R\"(GIF)\";\n m_type = CV_8UC3;\n bgColor = -1;\n m_buf_supported = true;\n globalColorTableSize = 0;\n localColorTableSize = 0;\n lzwMinCodeSize = 0;\n hasRead = false;\n hasTransparentColor = false;\n transparentColor = 0;\n opMode = GRFMT_GIF_Nothing;\n top = 0, left = 0, width = 0, height = 0;\n depth = 8;\n}\n\nGifDecoder::~GifDecoder() {\n close();\n}\n\nbool GifDecoder::readHeader() {\n if (!m_buf.empty()) {\n if (!m_strm.open(m_buf)) {\n return false;\n }\n } else if (!m_strm.open(m_filename)) {\n return false;\n }\n\n try {\n m_strm.skip(6);// Skip the signature: GIF87a or GIF89a\n\n // #1: read logical screen descriptor\n m_width = m_strm.getWord();\n m_height = m_strm.getWord();\n CV_Assert(m_width > 0 && m_height > 0);\n\n char flags = (char)m_strm.getByte();\n\n // the background color -> index in the global color table, valid only if the global color table is present\n bgColor = m_strm.getByte();\n m_strm.skip(1); // Skip the aspect ratio\n\n // #2: read global color table\n depth = ((flags & 0x70) >> 4) + 1;\n if (flags & 0x80) {\n globalColorTableSize = 1 << ((flags & 0x07) + 1);\n globalColorTable.allocate(3 * globalColorTableSize);\n for (int i = 0; i < 3 * globalColorTableSize; i++) {\n globalColorTable[i] = (uchar)m_strm.getByte();\n }\n }\n } catch (...) {\n throw;\n }\n\n hasRead = false;\n return true;\n}\n\nbool GifDecoder::readData(Mat &img) {\n if (hasRead) {\n lastImage.copyTo(img);\n return true;\n }\n\n readExtensions();\n // Image separator\n CV_Assert(!(m_strm.getByte()^0x2C));\n left = m_strm.getWord();\n top = m_strm.getWord();\n width = m_strm.getWord();\n height = m_strm.getWord();\n CV_Assert(width > 0 && height > 0 && left + width <= m_width && top + height <= m_height);\n\n currentImageCodeStream.allocate(width * height);\n Mat img_;\n\n switch (opMode) {\n case GifOpMode::GRFMT_GIF_PreviousImage:\n if (lastImage.empty()){\n img_ = Mat(m_height, m_width, CV_8UC3, Scalar(0, 0, 0));\n } else {\n img_ = lastImage;\n }\n break;\n case GifOpMode::GRFMT_GIF_Background:\n // background color is valid iff global color table exists\n CV_Assert(globalColorTableSize > 0);\n img_ = Mat(m_height, m_width, CV_8UC3,\n Scalar(globalColorTable[bgColor * 3 + 2],\n globalColorTable[bgColor * 3 + 1],\n globalColorTable[bgColor * 3]));\n break;\n case GifOpMode::GRFMT_GIF_Nothing:\n case GifOpMode::GRFMT_GIF_Cover:\n // default value\n img_ = Mat(m_height, m_width, CV_8UC3, Scalar(0, 0, 0));\n break;\n default:\n CV_Assert(false);\n }\n lastImage.release();\n\n auto flags = (uchar)m_strm.getByte();\n localColorTableSize = 0;\n if (flags & 0x80) {\n // local color table\n localColorTableSize = 1 << ((flags & 0x07) + 1);\n localColorTable.allocate(3 * localColorTableSize);\n for (int i = 0; i < 3 * localColorTableSize; i++) {\n localColorTable[i] = (uchar)m_strm.getByte();\n }\n }\n\n // the case that neither global nor local color table exists is not defined in the GIF standard (but allowed)\n if (!(globalColorTableSize || localColorTableSize)) {\n // go through the length of unused data.\n m_strm.skip(1);\n int len = m_strm.getByte();\n while (len) {\n m_strm.skip(len);\n len = m_strm.getByte();\n }\n\n lastImage = img_;\n if (!img.empty())\n img_.copyTo(img);\n\n // release the memory\n img_.release();\n return true;\n }\n\n // lzw decompression to get the code stream\n hasRead = lzwDecode();\n\n // convert code stream into pixels on the image\n if (hasRead) {\n if (!(flags & 0x40)) {\n // no interlace, simply convert the code stream into pixels from top to down\n code2pixel(img_, 0, 1);\n } else {\n // consider the interlace mode, the image will be rendered in four separate passes\n code2pixel(img_, 0, 8);\n code2pixel(img_, 4, 8);\n code2pixel(img_, 2, 4);\n code2pixel(img_, 1, 2);\n }\n }\n\n lastImage = img_;\n if (!img.empty())\n img_.copyTo(img);\n\n // release the memory\n img_.release();\n\n return hasRead;\n}\n\nbool GifDecoder::nextPage() {\n if (hasRead) {\n hasRead = false;\n // end of a gif file\n if(!(m_strm.getByte() ^ 0x3B)) return false;\n m_strm.setPos(m_strm.getPos() - 1);\n return true;\n } else {\n bool success;\n try {\n Mat emptyImg;\n success = readData(emptyImg);\n emptyImg.release();\n } catch(...) {\n return false;\n }\n return success;\n }\n}\n\nvoid GifDecoder::readExtensions() {\n uchar len;\n while (!(m_strm.getByte() ^ 0x21)) {\n uchar extensionType = (uchar)m_strm.getByte();\n\n // read graphic control extension\n // the scope of this extension is the next image or plain text extension\n hasTransparentColor = false;\n opMode = GifOpMode::GRFMT_GIF_Nothing;// default value\n if (!(extensionType ^ 0xF9)) {\n len = (uchar)m_strm.getByte();\n CV_Assert(len == 4);\n uchar flags = (uchar)m_strm.getByte();\n m_strm.getWord(); // delay time, not used\n opMode = (GifOpMode)((flags & 0x1C) >> 2);\n hasTransparentColor = flags & 0x01;\n transparentColor = (uchar)m_strm.getByte();\n }\n\n // skip other kinds of extensions\n len = (uchar)m_strm.getByte();\n while (len) {\n m_strm.skip(len);\n len = (uchar)m_strm.getByte();\n }\n }\n // roll back to the block identifier\n m_strm.setPos(m_strm.getPos() - 1);\n}\n\nvoid GifDecoder::code2pixel(Mat& img, int start, int k){\n for (int i = start; i < height; i+=k) {\n for (int j = 0; j < width; j++) {\n int idx = i * width + j;\n int colorIdx = currentImageCodeStream[idx];\n if (hasTransparentColor && colorIdx == transparentColor) {\n continue;\n }\n if (colorIdx < localColorTableSize) {\n img.at(top + i, left + j)[0] = localColorTable[colorIdx * 3 + 2]; //B\n img.at(top + i, left + j)[1] = localColorTable[colorIdx * 3 + 1]; //G\n img.at(top + i, left + j)[2] = localColorTable[colorIdx * 3]; //R\n } else if (colorIdx < globalColorTableSize) {\n img.at(top + i, left + j)[0] = globalColorTable[colorIdx * 3 + 2]; //B\n img.at(top + i, left + j)[1] = globalColorTable[colorIdx * 3 + 1]; //G\n img.at(top + i, left + j)[2] = globalColorTable[colorIdx * 3]; //R\n } else {\n CV_Assert(false);\n }\n }\n }\n}\n\nvoid GifDecoder::deleteLzwExtraTablePrefix(lzwNodeD* lzwExtraTable, int lzwTableSize) const{\n for (int i = (1 << lzwMinCodeSize) + 2; i <= lzwTableSize; i++) {\n if (lzwExtraTable[i].prefix) {\n delete[] lzwExtraTable[i].prefix;\n lzwExtraTable[i].prefix = nullptr;\n }\n }\n}\n\nbool GifDecoder::lzwDecode() {\n // initialization\n lzwMinCodeSize = m_strm.getByte();\n int lzwCodeSize = lzwMinCodeSize + 1;\n int clearCode = 1 << lzwMinCodeSize;\n int exitCode = clearCode + 1;\n CV_Assert(lzwCodeSize > 2 && lzwCodeSize <= 12);\n auto* lzwExtraTable = new lzwNodeD[(1 << 12) + 1];", - "repo_full_name": "opencv/opencv", - "discussion_comments": [ - { - "comment_id": "1632891724", - "repo_full_name": "opencv/opencv", - "pr_number": 25691, - "pr_file": "modules/imgcodecs/src/grfmt_gif.cpp", - "discussion_id": "1632891724", - "commented_code": "@@ -0,0 +1,372 @@\n+// This file is part of OpenCV project.\n+// It is subject to the license terms in the LICENSE file found in the top-level\n+// directory of this distribution and at http://opencv.org/license.html\n+\n+#include \"precomp.hpp\"\n+#include \"grfmt_gif.hpp\"\n+\n+namespace cv\n+{\n+//////////////////////////////////////////////////////////////////////\n+//// GIF Decoder ////\n+//////////////////////////////////////////////////////////////////////\n+GifDecoder::GifDecoder() {\n+ m_signature = R\"(GIF)\";\n+ m_type = CV_8UC3;\n+ bgColor = -1;\n+ m_buf_supported = true;\n+ globalColorTableSize = 0;\n+ localColorTableSize = 0;\n+ lzwMinCodeSize = 0;\n+ hasRead = false;\n+ hasTransparentColor = false;\n+ transparentColor = 0;\n+ opMode = GRFMT_GIF_Nothing;\n+ top = 0, left = 0, width = 0, height = 0;\n+ depth = 8;\n+}\n+\n+GifDecoder::~GifDecoder() {\n+ close();\n+}\n+\n+bool GifDecoder::readHeader() {\n+ if (!m_buf.empty()) {\n+ if (!m_strm.open(m_buf)) {\n+ return false;\n+ }\n+ } else if (!m_strm.open(m_filename)) {\n+ return false;\n+ }\n+\n+ try {\n+ m_strm.skip(6);// Skip the signature: GIF87a or GIF89a\n+\n+ // #1: read logical screen descriptor\n+ m_width = m_strm.getWord();\n+ m_height = m_strm.getWord();\n+ CV_Assert(m_width > 0 && m_height > 0);\n+\n+ char flags = (char)m_strm.getByte();\n+\n+ // the background color -> index in the global color table, valid only if the global color table is present\n+ bgColor = m_strm.getByte();\n+ m_strm.skip(1); // Skip the aspect ratio\n+\n+ // #2: read global color table\n+ depth = ((flags & 0x70) >> 4) + 1;\n+ if (flags & 0x80) {\n+ globalColorTableSize = 1 << ((flags & 0x07) + 1);\n+ globalColorTable.allocate(3 * globalColorTableSize);\n+ for (int i = 0; i < 3 * globalColorTableSize; i++) {\n+ globalColorTable[i] = (uchar)m_strm.getByte();\n+ }\n+ }\n+ } catch (...) {\n+ throw;\n+ }\n+\n+ hasRead = false;\n+ return true;\n+}\n+\n+bool GifDecoder::readData(Mat &img) {\n+ if (hasRead) {\n+ lastImage.copyTo(img);\n+ return true;\n+ }\n+\n+ readExtensions();\n+ // Image separator\n+ CV_Assert(!(m_strm.getByte()^0x2C));\n+ left = m_strm.getWord();\n+ top = m_strm.getWord();\n+ width = m_strm.getWord();\n+ height = m_strm.getWord();\n+ CV_Assert(width > 0 && height > 0 && left + width <= m_width && top + height <= m_height);\n+\n+ currentImageCodeStream.allocate(width * height);\n+ Mat img_;\n+\n+ switch (opMode) {\n+ case GifOpMode::GRFMT_GIF_PreviousImage:\n+ if (lastImage.empty()){\n+ img_ = Mat(m_height, m_width, CV_8UC3, Scalar(0, 0, 0));\n+ } else {\n+ img_ = lastImage;\n+ }\n+ break;\n+ case GifOpMode::GRFMT_GIF_Background:\n+ // background color is valid iff global color table exists\n+ CV_Assert(globalColorTableSize > 0);\n+ img_ = Mat(m_height, m_width, CV_8UC3,\n+ Scalar(globalColorTable[bgColor * 3 + 2],\n+ globalColorTable[bgColor * 3 + 1],\n+ globalColorTable[bgColor * 3]));\n+ break;\n+ case GifOpMode::GRFMT_GIF_Nothing:\n+ case GifOpMode::GRFMT_GIF_Cover:\n+ // default value\n+ img_ = Mat(m_height, m_width, CV_8UC3, Scalar(0, 0, 0));\n+ break;\n+ default:\n+ CV_Assert(false);\n+ }\n+ lastImage.release();\n+\n+ auto flags = (uchar)m_strm.getByte();\n+ localColorTableSize = 0;\n+ if (flags & 0x80) {\n+ // local color table\n+ localColorTableSize = 1 << ((flags & 0x07) + 1);\n+ localColorTable.allocate(3 * localColorTableSize);\n+ for (int i = 0; i < 3 * localColorTableSize; i++) {\n+ localColorTable[i] = (uchar)m_strm.getByte();\n+ }\n+ }\n+\n+ // the case that neither global nor local color table exists is not defined in the GIF standard (but allowed)\n+ if (!(globalColorTableSize || localColorTableSize)) {\n+ // go through the length of unused data.\n+ m_strm.skip(1);\n+ int len = m_strm.getByte();\n+ while (len) {\n+ m_strm.skip(len);\n+ len = m_strm.getByte();\n+ }\n+\n+ lastImage = img_;\n+ if (!img.empty())\n+ img_.copyTo(img);\n+\n+ // release the memory\n+ img_.release();\n+ return true;\n+ }\n+\n+ // lzw decompression to get the code stream\n+ hasRead = lzwDecode();\n+\n+ // convert code stream into pixels on the image\n+ if (hasRead) {\n+ if (!(flags & 0x40)) {\n+ // no interlace, simply convert the code stream into pixels from top to down\n+ code2pixel(img_, 0, 1);\n+ } else {\n+ // consider the interlace mode, the image will be rendered in four separate passes\n+ code2pixel(img_, 0, 8);\n+ code2pixel(img_, 4, 8);\n+ code2pixel(img_, 2, 4);\n+ code2pixel(img_, 1, 2);\n+ }\n+ }\n+\n+ lastImage = img_;\n+ if (!img.empty())\n+ img_.copyTo(img);\n+\n+ // release the memory\n+ img_.release();\n+\n+ return hasRead;\n+}\n+\n+bool GifDecoder::nextPage() {\n+ if (hasRead) {\n+ hasRead = false;\n+ // end of a gif file\n+ if(!(m_strm.getByte() ^ 0x3B)) return false;\n+ m_strm.setPos(m_strm.getPos() - 1);\n+ return true;\n+ } else {\n+ bool success;\n+ try {\n+ Mat emptyImg;\n+ success = readData(emptyImg);\n+ emptyImg.release();\n+ } catch(...) {\n+ return false;\n+ }\n+ return success;\n+ }\n+}\n+\n+void GifDecoder::readExtensions() {\n+ uchar len;\n+ while (!(m_strm.getByte() ^ 0x21)) {\n+ uchar extensionType = (uchar)m_strm.getByte();\n+\n+ // read graphic control extension\n+ // the scope of this extension is the next image or plain text extension\n+ hasTransparentColor = false;\n+ opMode = GifOpMode::GRFMT_GIF_Nothing;// default value\n+ if (!(extensionType ^ 0xF9)) {\n+ len = (uchar)m_strm.getByte();\n+ CV_Assert(len == 4);\n+ uchar flags = (uchar)m_strm.getByte();\n+ m_strm.getWord(); // delay time, not used\n+ opMode = (GifOpMode)((flags & 0x1C) >> 2);\n+ hasTransparentColor = flags & 0x01;\n+ transparentColor = (uchar)m_strm.getByte();\n+ }\n+\n+ // skip other kinds of extensions\n+ len = (uchar)m_strm.getByte();\n+ while (len) {\n+ m_strm.skip(len);\n+ len = (uchar)m_strm.getByte();\n+ }\n+ }\n+ // roll back to the block identifier\n+ m_strm.setPos(m_strm.getPos() - 1);\n+}\n+\n+void GifDecoder::code2pixel(Mat& img, int start, int k){\n+ for (int i = start; i < height; i+=k) {\n+ for (int j = 0; j < width; j++) {\n+ int idx = i * width + j;\n+ int colorIdx = currentImageCodeStream[idx];\n+ if (hasTransparentColor && colorIdx == transparentColor) {\n+ continue;\n+ }\n+ if (colorIdx < localColorTableSize) {\n+ img.at(top + i, left + j)[0] = localColorTable[colorIdx * 3 + 2]; //B\n+ img.at(top + i, left + j)[1] = localColorTable[colorIdx * 3 + 1]; //G\n+ img.at(top + i, left + j)[2] = localColorTable[colorIdx * 3]; //R\n+ } else if (colorIdx < globalColorTableSize) {\n+ img.at(top + i, left + j)[0] = globalColorTable[colorIdx * 3 + 2]; //B\n+ img.at(top + i, left + j)[1] = globalColorTable[colorIdx * 3 + 1]; //G\n+ img.at(top + i, left + j)[2] = globalColorTable[colorIdx * 3]; //R\n+ } else {\n+ CV_Assert(false);\n+ }\n+ }\n+ }\n+}\n+\n+void GifDecoder::deleteLzwExtraTablePrefix(lzwNodeD* lzwExtraTable, int lzwTableSize) const{\n+ for (int i = (1 << lzwMinCodeSize) + 2; i <= lzwTableSize; i++) {\n+ if (lzwExtraTable[i].prefix) {\n+ delete[] lzwExtraTable[i].prefix;\n+ lzwExtraTable[i].prefix = nullptr;\n+ }\n+ }\n+}\n+\n+bool GifDecoder::lzwDecode() {\n+ // initialization\n+ lzwMinCodeSize = m_strm.getByte();\n+ int lzwCodeSize = lzwMinCodeSize + 1;\n+ int clearCode = 1 << lzwMinCodeSize;\n+ int exitCode = clearCode + 1;\n+ CV_Assert(lzwCodeSize > 2 && lzwCodeSize <= 12);\n+ auto* lzwExtraTable = new lzwNodeD[(1 << 12) + 1];", - "comment_created_at": "2024-06-10T09:10:33+00:00", - "comment_author": "asmorkalov", - "comment_body": "please use std::vector, std::array or cv::AutoBuffer to prevent memory leaks in cases of parser failrue.", - "pr_file_module": null - }, - { - "comment_id": "1655702685", - "repo_full_name": "opencv/opencv", - "pr_number": 25691, - "pr_file": "modules/imgcodecs/src/grfmt_gif.cpp", - "discussion_id": "1632891724", - "commented_code": "@@ -0,0 +1,372 @@\n+// This file is part of OpenCV project.\n+// It is subject to the license terms in the LICENSE file found in the top-level\n+// directory of this distribution and at http://opencv.org/license.html\n+\n+#include \"precomp.hpp\"\n+#include \"grfmt_gif.hpp\"\n+\n+namespace cv\n+{\n+//////////////////////////////////////////////////////////////////////\n+//// GIF Decoder ////\n+//////////////////////////////////////////////////////////////////////\n+GifDecoder::GifDecoder() {\n+ m_signature = R\"(GIF)\";\n+ m_type = CV_8UC3;\n+ bgColor = -1;\n+ m_buf_supported = true;\n+ globalColorTableSize = 0;\n+ localColorTableSize = 0;\n+ lzwMinCodeSize = 0;\n+ hasRead = false;\n+ hasTransparentColor = false;\n+ transparentColor = 0;\n+ opMode = GRFMT_GIF_Nothing;\n+ top = 0, left = 0, width = 0, height = 0;\n+ depth = 8;\n+}\n+\n+GifDecoder::~GifDecoder() {\n+ close();\n+}\n+\n+bool GifDecoder::readHeader() {\n+ if (!m_buf.empty()) {\n+ if (!m_strm.open(m_buf)) {\n+ return false;\n+ }\n+ } else if (!m_strm.open(m_filename)) {\n+ return false;\n+ }\n+\n+ try {\n+ m_strm.skip(6);// Skip the signature: GIF87a or GIF89a\n+\n+ // #1: read logical screen descriptor\n+ m_width = m_strm.getWord();\n+ m_height = m_strm.getWord();\n+ CV_Assert(m_width > 0 && m_height > 0);\n+\n+ char flags = (char)m_strm.getByte();\n+\n+ // the background color -> index in the global color table, valid only if the global color table is present\n+ bgColor = m_strm.getByte();\n+ m_strm.skip(1); // Skip the aspect ratio\n+\n+ // #2: read global color table\n+ depth = ((flags & 0x70) >> 4) + 1;\n+ if (flags & 0x80) {\n+ globalColorTableSize = 1 << ((flags & 0x07) + 1);\n+ globalColorTable.allocate(3 * globalColorTableSize);\n+ for (int i = 0; i < 3 * globalColorTableSize; i++) {\n+ globalColorTable[i] = (uchar)m_strm.getByte();\n+ }\n+ }\n+ } catch (...) {\n+ throw;\n+ }\n+\n+ hasRead = false;\n+ return true;\n+}\n+\n+bool GifDecoder::readData(Mat &img) {\n+ if (hasRead) {\n+ lastImage.copyTo(img);\n+ return true;\n+ }\n+\n+ readExtensions();\n+ // Image separator\n+ CV_Assert(!(m_strm.getByte()^0x2C));\n+ left = m_strm.getWord();\n+ top = m_strm.getWord();\n+ width = m_strm.getWord();\n+ height = m_strm.getWord();\n+ CV_Assert(width > 0 && height > 0 && left + width <= m_width && top + height <= m_height);\n+\n+ currentImageCodeStream.allocate(width * height);\n+ Mat img_;\n+\n+ switch (opMode) {\n+ case GifOpMode::GRFMT_GIF_PreviousImage:\n+ if (lastImage.empty()){\n+ img_ = Mat(m_height, m_width, CV_8UC3, Scalar(0, 0, 0));\n+ } else {\n+ img_ = lastImage;\n+ }\n+ break;\n+ case GifOpMode::GRFMT_GIF_Background:\n+ // background color is valid iff global color table exists\n+ CV_Assert(globalColorTableSize > 0);\n+ img_ = Mat(m_height, m_width, CV_8UC3,\n+ Scalar(globalColorTable[bgColor * 3 + 2],\n+ globalColorTable[bgColor * 3 + 1],\n+ globalColorTable[bgColor * 3]));\n+ break;\n+ case GifOpMode::GRFMT_GIF_Nothing:\n+ case GifOpMode::GRFMT_GIF_Cover:\n+ // default value\n+ img_ = Mat(m_height, m_width, CV_8UC3, Scalar(0, 0, 0));\n+ break;\n+ default:\n+ CV_Assert(false);\n+ }\n+ lastImage.release();\n+\n+ auto flags = (uchar)m_strm.getByte();\n+ localColorTableSize = 0;\n+ if (flags & 0x80) {\n+ // local color table\n+ localColorTableSize = 1 << ((flags & 0x07) + 1);\n+ localColorTable.allocate(3 * localColorTableSize);\n+ for (int i = 0; i < 3 * localColorTableSize; i++) {\n+ localColorTable[i] = (uchar)m_strm.getByte();\n+ }\n+ }\n+\n+ // the case that neither global nor local color table exists is not defined in the GIF standard (but allowed)\n+ if (!(globalColorTableSize || localColorTableSize)) {\n+ // go through the length of unused data.\n+ m_strm.skip(1);\n+ int len = m_strm.getByte();\n+ while (len) {\n+ m_strm.skip(len);\n+ len = m_strm.getByte();\n+ }\n+\n+ lastImage = img_;\n+ if (!img.empty())\n+ img_.copyTo(img);\n+\n+ // release the memory\n+ img_.release();\n+ return true;\n+ }\n+\n+ // lzw decompression to get the code stream\n+ hasRead = lzwDecode();\n+\n+ // convert code stream into pixels on the image\n+ if (hasRead) {\n+ if (!(flags & 0x40)) {\n+ // no interlace, simply convert the code stream into pixels from top to down\n+ code2pixel(img_, 0, 1);\n+ } else {\n+ // consider the interlace mode, the image will be rendered in four separate passes\n+ code2pixel(img_, 0, 8);\n+ code2pixel(img_, 4, 8);\n+ code2pixel(img_, 2, 4);\n+ code2pixel(img_, 1, 2);\n+ }\n+ }\n+\n+ lastImage = img_;\n+ if (!img.empty())\n+ img_.copyTo(img);\n+\n+ // release the memory\n+ img_.release();\n+\n+ return hasRead;\n+}\n+\n+bool GifDecoder::nextPage() {\n+ if (hasRead) {\n+ hasRead = false;\n+ // end of a gif file\n+ if(!(m_strm.getByte() ^ 0x3B)) return false;\n+ m_strm.setPos(m_strm.getPos() - 1);\n+ return true;\n+ } else {\n+ bool success;\n+ try {\n+ Mat emptyImg;\n+ success = readData(emptyImg);\n+ emptyImg.release();\n+ } catch(...) {\n+ return false;\n+ }\n+ return success;\n+ }\n+}\n+\n+void GifDecoder::readExtensions() {\n+ uchar len;\n+ while (!(m_strm.getByte() ^ 0x21)) {\n+ uchar extensionType = (uchar)m_strm.getByte();\n+\n+ // read graphic control extension\n+ // the scope of this extension is the next image or plain text extension\n+ hasTransparentColor = false;\n+ opMode = GifOpMode::GRFMT_GIF_Nothing;// default value\n+ if (!(extensionType ^ 0xF9)) {\n+ len = (uchar)m_strm.getByte();\n+ CV_Assert(len == 4);\n+ uchar flags = (uchar)m_strm.getByte();\n+ m_strm.getWord(); // delay time, not used\n+ opMode = (GifOpMode)((flags & 0x1C) >> 2);\n+ hasTransparentColor = flags & 0x01;\n+ transparentColor = (uchar)m_strm.getByte();\n+ }\n+\n+ // skip other kinds of extensions\n+ len = (uchar)m_strm.getByte();\n+ while (len) {\n+ m_strm.skip(len);\n+ len = (uchar)m_strm.getByte();\n+ }\n+ }\n+ // roll back to the block identifier\n+ m_strm.setPos(m_strm.getPos() - 1);\n+}\n+\n+void GifDecoder::code2pixel(Mat& img, int start, int k){\n+ for (int i = start; i < height; i+=k) {\n+ for (int j = 0; j < width; j++) {\n+ int idx = i * width + j;\n+ int colorIdx = currentImageCodeStream[idx];\n+ if (hasTransparentColor && colorIdx == transparentColor) {\n+ continue;\n+ }\n+ if (colorIdx < localColorTableSize) {\n+ img.at(top + i, left + j)[0] = localColorTable[colorIdx * 3 + 2]; //B\n+ img.at(top + i, left + j)[1] = localColorTable[colorIdx * 3 + 1]; //G\n+ img.at(top + i, left + j)[2] = localColorTable[colorIdx * 3]; //R\n+ } else if (colorIdx < globalColorTableSize) {\n+ img.at(top + i, left + j)[0] = globalColorTable[colorIdx * 3 + 2]; //B\n+ img.at(top + i, left + j)[1] = globalColorTable[colorIdx * 3 + 1]; //G\n+ img.at(top + i, left + j)[2] = globalColorTable[colorIdx * 3]; //R\n+ } else {\n+ CV_Assert(false);\n+ }\n+ }\n+ }\n+}\n+\n+void GifDecoder::deleteLzwExtraTablePrefix(lzwNodeD* lzwExtraTable, int lzwTableSize) const{\n+ for (int i = (1 << lzwMinCodeSize) + 2; i <= lzwTableSize; i++) {\n+ if (lzwExtraTable[i].prefix) {\n+ delete[] lzwExtraTable[i].prefix;\n+ lzwExtraTable[i].prefix = nullptr;\n+ }\n+ }\n+}\n+\n+bool GifDecoder::lzwDecode() {\n+ // initialization\n+ lzwMinCodeSize = m_strm.getByte();\n+ int lzwCodeSize = lzwMinCodeSize + 1;\n+ int clearCode = 1 << lzwMinCodeSize;\n+ int exitCode = clearCode + 1;\n+ CV_Assert(lzwCodeSize > 2 && lzwCodeSize <= 12);\n+ auto* lzwExtraTable = new lzwNodeD[(1 << 12) + 1];", - "comment_created_at": "2024-06-27T01:26:45+00:00", - "comment_author": "redhecker", - "comment_body": "done", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1798939112", - "pr_number": 26281, - "pr_file": "modules/core/src/opengl.cpp", - "created_at": "2024-10-14T07:57:17+00:00", - "commented_code": "#elif !defined(HAVE_OPENCL_OPENGL_SHARING)\n NO_OPENCL_SHARING_ERROR;\n#else\n cl_uint numPlatforms;\n cl_int status = clGetPlatformIDs(0, NULL, &numPlatforms);\n cl_uint platformsCnt = 0;\n cl_uint devCnt = 0;\n cl_device_id* devices = nullptr;\n cl_uint devUsed = 0;\n cl_context context = nullptr;\n\n cl_int status = clGetPlatformIDs(0, NULL, &platformsCnt);\n if (status != CL_SUCCESS)\n CV_Error_(cv::Error::OpenCLInitError, (\"OpenCL: Can't get number of platforms: %d\", status));\n if (numPlatforms == 0)\n if (platformsCnt == 0)\n CV_Error(cv::Error::OpenCLInitError, \"OpenCL: No available platforms\");\n\n std::vector platforms(numPlatforms);\n status = clGetPlatformIDs(numPlatforms, &platforms[0], NULL);\n std::vector platforms(platformsCnt);\n status = clGetPlatformIDs(platformsCnt, &platforms[0], NULL);\n if (status != CL_SUCCESS)\n CV_Error_(cv::Error::OpenCLInitError, (\"OpenCL: Can't get number of platforms: %d\", status));\n CV_Error_(cv::Error::OpenCLInitError, (\"OpenCL: Can't get platforms: %d\", status));\n\n\n // TODO Filter platforms by name from OPENCV_OPENCL_DEVICE\n bool sharingSupported = false;\n\n int found = -1;\n cl_device_id device = NULL;\n cl_context context = NULL;\n for (unsigned int i = 0; (!sharingSupported && (i < platformsCnt)); ++i) {\n status = clGetDeviceIDs(platforms[i], CL_DEVICE_TYPE_GPU, 0, NULL, &devCnt);\n if (status != CL_SUCCESS)\n CV_Error_(cv::Error::OpenCLInitError, (\"OpenCL: No devices available: %d\", status));\n\n for (int i = 0; i < (int)numPlatforms; i++)\n {\n // query platform extension: presence of \"cl_khr_gl_sharing\" extension is required\n {\n AutoBuffer extensionStr;\n try {\n devices = new cl_device_id[devCnt];", - "repo_full_name": "opencv/opencv", - "discussion_comments": [ - { - "comment_id": "1798939112", - "repo_full_name": "opencv/opencv", - "pr_number": 26281, - "pr_file": "modules/core/src/opengl.cpp", - "discussion_id": "1798939112", - "commented_code": "@@ -1635,94 +1641,148 @@ Context& initializeContextFromGL()\n #elif !defined(HAVE_OPENCL_OPENGL_SHARING)\n NO_OPENCL_SHARING_ERROR;\n #else\n- cl_uint numPlatforms;\n- cl_int status = clGetPlatformIDs(0, NULL, &numPlatforms);\n+ cl_uint platformsCnt = 0;\n+ cl_uint devCnt = 0;\n+ cl_device_id* devices = nullptr;\n+ cl_uint devUsed = 0;\n+ cl_context context = nullptr;\n+\n+ cl_int status = clGetPlatformIDs(0, NULL, &platformsCnt);\n if (status != CL_SUCCESS)\n CV_Error_(cv::Error::OpenCLInitError, (\"OpenCL: Can't get number of platforms: %d\", status));\n- if (numPlatforms == 0)\n+ if (platformsCnt == 0)\n CV_Error(cv::Error::OpenCLInitError, \"OpenCL: No available platforms\");\n \n- std::vector platforms(numPlatforms);\n- status = clGetPlatformIDs(numPlatforms, &platforms[0], NULL);\n+ std::vector platforms(platformsCnt);\n+ status = clGetPlatformIDs(platformsCnt, &platforms[0], NULL);\n if (status != CL_SUCCESS)\n- CV_Error_(cv::Error::OpenCLInitError, (\"OpenCL: Can't get number of platforms: %d\", status));\n+ CV_Error_(cv::Error::OpenCLInitError, (\"OpenCL: Can't get platforms: %d\", status));\n+\n \n // TODO Filter platforms by name from OPENCV_OPENCL_DEVICE\n+ bool sharingSupported = false;\n \n- int found = -1;\n- cl_device_id device = NULL;\n- cl_context context = NULL;\n+ for (unsigned int i = 0; (!sharingSupported && (i < platformsCnt)); ++i) {\n+ status = clGetDeviceIDs(platforms[i], CL_DEVICE_TYPE_GPU, 0, NULL, &devCnt);\n+ if (status != CL_SUCCESS)\n+ CV_Error_(cv::Error::OpenCLInitError, (\"OpenCL: No devices available: %d\", status));\n \n- for (int i = 0; i < (int)numPlatforms; i++)\n- {\n- // query platform extension: presence of \"cl_khr_gl_sharing\" extension is required\n- {\n- AutoBuffer extensionStr;\n+ try {\n+ devices = new cl_device_id[devCnt];", - "comment_created_at": "2024-10-14T07:57:17+00:00", - "comment_author": "asmorkalov", - "comment_body": "Let's use `std::vector<>` or `cv::AutoBuffer` for locals to prevent leaks.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1658732501", - "pr_number": 25715, - "pr_file": "modules/imgcodecs/src/grfmt_png.cpp", - "created_at": "2024-06-28T13:18:04+00:00", - "commented_code": "return result;\n}\n\nbool PngDecoder::nextPage() {\n if (m_f)\n {\n uint id;\n CHUNK chunkfcTL;\n\n id = read_chunk(m_f, &chunkfcTL);\n if (id == id_fcTL && chunkfcTL.size == 38)\n {\n // At this point the old frame is done. Let's start a new one.\n uint w0 = png_get_uint_32(chunkfcTL.p + 12);\n uint h0 = png_get_uint_32(chunkfcTL.p + 16);\n uint x0 = png_get_uint_32(chunkfcTL.p + 20);\n uint y0 = png_get_uint_32(chunkfcTL.p + 24);\n int delay_num = png_get_uint_16(chunkfcTL.p + 28);\n int delay_den = png_get_uint_16(chunkfcTL.p + 30);\n char dop = chunkfcTL.p[32];\n char bop = chunkfcTL.p[33];\n printf(\"**frame props\\n-------------------\\n\");\n printf(\"w0 : %d\\n\", w0);\n printf(\"h0 : %d\\n\", h0);\n printf(\"x0 : %d\\n\", x0);\n printf(\"y0 : %d\\n\", y0);\n printf(\"delay_num : %d\\n\", delay_num);\n printf(\"delay_den : %d\\n\", delay_den);\n printf(\"dop : %d\\n\", dop);\n printf(\"bop : %d\\n-------------------\\n\", bop);\n\n uchar sig[8];\n if (fread(sig, 1, 8, m_f))\n return true;\n }\n }\n return false;\n}\n\nvoid PngDecoder::compose_frame(uchar** rows_dst, uchar** rows_src, uchar bop, uint x, uint y, uint w, uint h)\n{\n uint i, j;\n int u, v, al;\n\n for (j = 0; j < h; j++)\n {\n uchar* sp = rows_src[j];\n uchar* dp = rows_dst[j + y] + x * 4;\n\n if (bop == 0)\n memcpy(dp, sp, w * 4);\n else\n for (i = 0; i < w; i++, sp += 4, dp += 4)\n {\n if (sp[3] == 255)\n memcpy(dp, sp, 4);\n else\n if (sp[3] != 0)\n {\n if (dp[3] != 0)\n {\n u = sp[3] * 255;\n v = (255 - sp[3]) * dp[3];\n al = u + v;\n dp[0] = (sp[0] * u + dp[0] * v) / al;\n dp[1] = (sp[1] * u + dp[1] * v) / al;\n dp[2] = (sp[2] * u + dp[2] * v) / al;\n dp[3] = al / 255;\n }\n else\n memcpy(dp, sp, 4);\n }\n }\n }\n}\n\nuint PngDecoder::read_chunk(FILE* f, CHUNK* pChunk)\n{\n uchar len[4];\n pChunk->size = 0;\n pChunk->p = 0;\n if (fread(&len, 4, 1, f) == 1)\n {\n pChunk->size = png_get_uint_32(len);\n if (pChunk->size > PNG_USER_CHUNK_MALLOC_MAX)\n return 0;\n pChunk->size += 12;\n pChunk->p = new uchar[pChunk->size];", - "repo_full_name": "opencv/opencv", - "discussion_comments": [ - { - "comment_id": "1658732501", - "repo_full_name": "opencv/opencv", - "pr_number": 25715, - "pr_file": "modules/imgcodecs/src/grfmt_png.cpp", - "discussion_id": "1658732501", - "commented_code": "@@ -300,6 +403,379 @@ bool PngDecoder::readData( Mat& img )\n return result;\n }\n \n+bool PngDecoder::nextPage() {\n+ if (m_f)\n+ {\n+ uint id;\n+ CHUNK chunkfcTL;\n+\n+ id = read_chunk(m_f, &chunkfcTL);\n+ if (id == id_fcTL && chunkfcTL.size == 38)\n+ {\n+ // At this point the old frame is done. Let's start a new one.\n+ uint w0 = png_get_uint_32(chunkfcTL.p + 12);\n+ uint h0 = png_get_uint_32(chunkfcTL.p + 16);\n+ uint x0 = png_get_uint_32(chunkfcTL.p + 20);\n+ uint y0 = png_get_uint_32(chunkfcTL.p + 24);\n+ int delay_num = png_get_uint_16(chunkfcTL.p + 28);\n+ int delay_den = png_get_uint_16(chunkfcTL.p + 30);\n+ char dop = chunkfcTL.p[32];\n+ char bop = chunkfcTL.p[33];\n+ printf(\"**frame props\\n-------------------\\n\");\n+ printf(\"w0 : %d\\n\", w0);\n+ printf(\"h0 : %d\\n\", h0);\n+ printf(\"x0 : %d\\n\", x0);\n+ printf(\"y0 : %d\\n\", y0);\n+ printf(\"delay_num : %d\\n\", delay_num);\n+ printf(\"delay_den : %d\\n\", delay_den);\n+ printf(\"dop : %d\\n\", dop);\n+ printf(\"bop : %d\\n-------------------\\n\", bop);\n+\n+ uchar sig[8];\n+ if (fread(sig, 1, 8, m_f))\n+ return true;\n+ }\n+ }\n+ return false;\n+}\n+\n+void PngDecoder::compose_frame(uchar** rows_dst, uchar** rows_src, uchar bop, uint x, uint y, uint w, uint h)\n+{\n+ uint i, j;\n+ int u, v, al;\n+\n+ for (j = 0; j < h; j++)\n+ {\n+ uchar* sp = rows_src[j];\n+ uchar* dp = rows_dst[j + y] + x * 4;\n+\n+ if (bop == 0)\n+ memcpy(dp, sp, w * 4);\n+ else\n+ for (i = 0; i < w; i++, sp += 4, dp += 4)\n+ {\n+ if (sp[3] == 255)\n+ memcpy(dp, sp, 4);\n+ else\n+ if (sp[3] != 0)\n+ {\n+ if (dp[3] != 0)\n+ {\n+ u = sp[3] * 255;\n+ v = (255 - sp[3]) * dp[3];\n+ al = u + v;\n+ dp[0] = (sp[0] * u + dp[0] * v) / al;\n+ dp[1] = (sp[1] * u + dp[1] * v) / al;\n+ dp[2] = (sp[2] * u + dp[2] * v) / al;\n+ dp[3] = al / 255;\n+ }\n+ else\n+ memcpy(dp, sp, 4);\n+ }\n+ }\n+ }\n+}\n+\n+uint PngDecoder::read_chunk(FILE* f, CHUNK* pChunk)\n+{\n+ uchar len[4];\n+ pChunk->size = 0;\n+ pChunk->p = 0;\n+ if (fread(&len, 4, 1, f) == 1)\n+ {\n+ pChunk->size = png_get_uint_32(len);\n+ if (pChunk->size > PNG_USER_CHUNK_MALLOC_MAX)\n+ return 0;\n+ pChunk->size += 12;\n+ pChunk->p = new uchar[pChunk->size];", - "comment_created_at": "2024-06-28T13:18:04+00:00", - "comment_author": "vrabaud", - "comment_body": "Please do not use new as it makes its hard to track memory. Use std::vector if you can.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1894579735", - "pr_number": 25715, - "pr_file": "modules/imgcodecs/src/grfmt_png.cpp", - "created_at": "2024-12-21T07:44:06+00:00", - "commented_code": "return result;\n}\n\nvoid PngEncoder::optim_dirty(std::vector& frames)\n{\n uint32_t i, j;\n unsigned char* sp;\n uint32_t size = frames[0].getWidth() * frames[0].getHeight();\n\n for (i = 0; i < frames.size(); i++)\n {\n sp = frames[i].getPixels();\n for (j = 0; j < size; j++, sp += 4)\n if (sp[3] == 0)\n sp[0] = sp[1] = sp[2] = 0;\n }\n}\n\nvoid PngEncoder::optim_duplicates(std::vector& frames, uint32_t first)\n{\n uint32_t imagesize = frames[0].getWidth() * frames[0].getHeight() * 4;\n uint32_t i = first;\n\n while (++i < frames.size())\n {\n if (memcmp(frames[i - 1].getPixels(), frames[i].getPixels(), imagesize) != 0)\n continue;\n\n i--;\n delete[] frames[i].getPixels();\n delete[] frames[i].getRows();\n uint32_t num = frames[i].getDelayNum();\n uint32_t den = frames[i].getDelayDen();\n frames.erase(frames.begin() + i);\n\n if (frames[i].getDelayDen() == den)\n frames[i].setDelayNum(frames[i].getDelayNum()+num);\n else\n {\n frames[i].setDelayNum(num * frames[i].getDelayDen() + den * frames[i].getDelayNum());\n frames[i].setDelayDen(den * frames[i].getDelayDen());\n while (num && den)\n {\n if (num > den)\n num = num % den;\n else\n den = den % num;\n }\n num += den;\n frames[i].setDelayNum(frames[i].getDelayNum() / num);\n frames[i].setDelayDen(frames[i].getDelayDen() / num);\n }\n }\n}\n\nsize_t PngEncoder::write_to_io(void const* _Buffer, size_t _ElementSize, size_t _ElementCount, FILE * _Stream)\n{\n if (_Stream)\n return fwrite(_Buffer, _ElementSize, _ElementCount, _Stream);\n\n size_t cursz = m_buf->size();\n m_buf->resize(cursz + _ElementCount);\n memcpy( &(*m_buf)[cursz], _Buffer, _ElementCount );\n return _ElementCount;\n}\n\nvoid PngEncoder::write_chunk(FILE* f, const char* name, unsigned char* data, uint32_t length)\n{\n unsigned char buf[4];\n uint32_t crc = crc32(0, Z_NULL, 0);\n\n png_save_uint_32(buf, length);\n write_to_io(buf, 1, 4, f);\n write_to_io(name, 1, 4, f);\n crc = crc32(crc, (const Bytef*)name, 4);\n\n if (memcmp(name, \"fdAT\", 4) == 0)\n {\n png_save_uint_32(buf, next_seq_num++);\n write_to_io(buf, 1, 4, f);\n crc = crc32(crc, buf, 4);\n length -= 4;\n }\n\n if (data != NULL && length > 0)\n {\n write_to_io(data, 1, length, f);\n crc = crc32(crc, data, length);\n }\n\n png_save_uint_32(buf, crc);\n write_to_io(buf, 1, 4, f);\n}\n\nvoid PngEncoder::write_IDATs(FILE* f, int frame, unsigned char* data, uint32_t length, uint32_t idat_size)\n{\n uint32_t z_cmf = data[0];\n if ((z_cmf & 0x0f) == 8 && (z_cmf & 0xf0) <= 0x70)\n {\n if (length >= 2)\n {\n uint32_t z_cinfo = z_cmf >> 4;\n uint32_t half_z_window_size = 1 << (z_cinfo + 7);\n while (idat_size <= half_z_window_size && half_z_window_size >= 256)\n {\n z_cinfo--;\n half_z_window_size >>= 1;\n }\n z_cmf = (z_cmf & 0x0f) | (z_cinfo << 4);\n if (data[0] != (unsigned char)z_cmf)\n {\n data[0] = (unsigned char)z_cmf;\n data[1] &= 0xe0;\n data[1] += (unsigned char)(0x1f - ((z_cmf << 8) + data[1]) % 0x1f);\n }\n }\n }\n\n while (length > 0)\n {\n uint32_t ds = length;\n if (ds > 32768)\n ds = 32768;\n\n if (frame == 0)\n write_chunk(f, \"IDAT\", data, ds);\n else\n write_chunk(f, \"fdAT\", data, ds + 4);\n\n data += ds;\n length -= ds;\n }\n}\n\nvoid PngEncoder::process_rect(unsigned char* row, int rowbytes, int bpp, int stride, int h, unsigned char* rows)\n{\n int i, j, v;\n int a, b, c, pa, pb, pc, p;\n unsigned char* prev = NULL;\n unsigned char* dp = rows;\n unsigned char* out;\n\n for (j = 0; j < h; j++)\n {\n uint32_t sum = 0;\n unsigned char* best_row = row_buf;\n uint32_t mins = ((uint32_t)(-1)) >> 1;\n\n out = row_buf + 1;\n for (i = 0; i < rowbytes; i++)\n {\n v = out[i] = row[i];\n sum += (v < 128) ? v : 256 - v;\n }\n mins = sum;\n\n sum = 0;\n out = sub_row + 1;\n for (i = 0; i < bpp; i++)\n {\n v = out[i] = row[i];\n sum += (v < 128) ? v : 256 - v;\n }\n for (i = bpp; i < rowbytes; i++)\n {\n v = out[i] = row[i] - row[i - bpp];\n sum += (v < 128) ? v : 256 - v;\n if (sum > mins)\n break;\n }\n if (sum < mins)\n {\n mins = sum;\n best_row = sub_row;\n }\n\n if (prev)\n {\n sum = 0;\n out = up_row + 1;\n for (i = 0; i < rowbytes; i++)\n {\n v = out[i] = row[i] - prev[i];\n sum += (v < 128) ? v : 256 - v;\n if (sum > mins)\n break;\n }\n if (sum < mins)\n {\n mins = sum;\n best_row = up_row;\n }\n\n sum = 0;\n out = avg_row + 1;\n for (i = 0; i < bpp; i++)\n {\n v = out[i] = row[i] - prev[i] / 2;\n sum += (v < 128) ? v : 256 - v;\n }\n for (i = bpp; i < rowbytes; i++)\n {\n v = out[i] = row[i] - (prev[i] + row[i - bpp]) / 2;\n sum += (v < 128) ? v : 256 - v;\n if (sum > mins)\n break;\n }\n if (sum < mins)\n {\n mins = sum;\n best_row = avg_row;\n }\n\n sum = 0;\n out = paeth_row + 1;\n for (i = 0; i < bpp; i++)\n {\n v = out[i] = row[i] - prev[i];\n sum += (v < 128) ? v : 256 - v;\n }\n for (i = bpp; i < rowbytes; i++)\n {\n a = row[i - bpp];\n b = prev[i];\n c = prev[i - bpp];\n p = b - c;\n pc = a - c;\n pa = abs(p);\n pb = abs(pc);\n pc = abs(p + pc);\n p = (pa <= pb && pa <= pc) ? a : (pb <= pc) ? b\n : c;\n v = out[i] = row[i] - p;\n sum += (v < 128) ? v : 256 - v;\n if (sum > mins)\n break;\n }\n if (sum < mins)\n {\n best_row = paeth_row;\n }\n }\n\n if (rows == NULL)\n {\n // deflate_rect_op()\n op_zstream1.next_in = row_buf;\n op_zstream1.avail_in = rowbytes + 1;\n deflate(&op_zstream1, Z_NO_FLUSH);\n\n op_zstream2.next_in = best_row;\n op_zstream2.avail_in = rowbytes + 1;\n deflate(&op_zstream2, Z_NO_FLUSH);\n }\n else\n {\n // deflate_rect_fin()\n memcpy(dp, best_row, rowbytes + 1);\n dp += rowbytes + 1;\n }\n\n prev = row;\n row += stride;\n }\n}\n\nvoid PngEncoder::deflate_rect_op(unsigned char* pdata, int x, int y, int w, int h, int bpp, int stride, int zbuf_size, int n)\n{\n unsigned char* row = pdata + y * stride + x * bpp;\n int rowbytes = w * bpp;\n\n op_zstream1.data_type = Z_BINARY;\n op_zstream1.next_out = op_zbuf1;\n op_zstream1.avail_out = zbuf_size;\n\n op_zstream2.data_type = Z_BINARY;\n op_zstream2.next_out = op_zbuf2;\n op_zstream2.avail_out = zbuf_size;\n\n process_rect(row, rowbytes, bpp, stride, h, NULL);\n\n deflate(&op_zstream1, Z_FINISH);\n deflate(&op_zstream2, Z_FINISH);\n op[n].p = pdata;\n\n if (op_zstream1.total_out < op_zstream2.total_out)\n {\n op[n].size = op_zstream1.total_out;\n op[n].filters = 0;\n }\n else\n {\n op[n].size = op_zstream2.total_out;\n op[n].filters = 1;\n }\n op[n].x = x;\n op[n].y = y;\n op[n].w = w;\n op[n].h = h;\n op[n].valid = 1;\n deflateReset(&op_zstream1);\n deflateReset(&op_zstream2);\n}\n\nvoid PngEncoder::get_rect(uint32_t w, uint32_t h, unsigned char* pimage1, unsigned char* pimage2, unsigned char* ptemp, uint32_t bpp, uint32_t stride, int zbuf_size, uint32_t has_tcolor, uint32_t tcolor, int n)\n{\n uint32_t i, j, x0, y0, w0, h0;\n uint32_t x_min = w - 1;\n uint32_t y_min = h - 1;\n uint32_t x_max = 0;\n uint32_t y_max = 0;\n uint32_t diffnum = 0;\n uint32_t over_is_possible = 1;\n\n if (!has_tcolor)\n over_is_possible = 0;\n\n if (bpp == 1)\n {\n unsigned char* pa = pimage1;\n unsigned char* pb = pimage2;\n unsigned char* pc = ptemp;\n\n for (j = 0; j < h; j++)\n for (i = 0; i < w; i++)\n {\n unsigned char c = *pb++;\n if (*pa++ != c)\n {\n diffnum++;\n if (has_tcolor && c == tcolor)\n over_is_possible = 0;\n if (i < x_min)\n x_min = i;\n if (i > x_max)\n x_max = i;\n if (j < y_min)\n y_min = j;\n if (j > y_max)\n y_max = j;\n }\n else\n c = tcolor;\n\n *pc++ = c;\n }\n }\n else if (bpp == 2)\n {\n unsigned short* pa = (unsigned short*)pimage1;\n unsigned short* pb = (unsigned short*)pimage2;\n unsigned short* pc = (unsigned short*)ptemp;\n\n for (j = 0; j < h; j++)\n for (i = 0; i < w; i++)\n {\n uint32_t c1 = *pa++;\n uint32_t c2 = *pb++;\n if ((c1 != c2) && ((c1 >> 8) || (c2 >> 8)))\n {\n diffnum++;\n if ((c2 >> 8) != 0xFF)\n over_is_possible = 0;\n if (i < x_min)\n x_min = i;\n if (i > x_max)\n x_max = i;\n if (j < y_min)\n y_min = j;\n if (j > y_max)\n y_max = j;\n }\n else\n c2 = 0;\n\n *pc++ = c2;\n }\n }\n else if (bpp == 3)\n {\n unsigned char* pa = pimage1;\n unsigned char* pb = pimage2;\n unsigned char* pc = ptemp;\n\n for (j = 0; j < h; j++)\n for (i = 0; i < w; i++)\n {\n uint32_t c1 = (pa[2] << 16) + (pa[1] << 8) + pa[0];\n uint32_t c2 = (pb[2] << 16) + (pb[1] << 8) + pb[0];\n if (c1 != c2)\n {\n diffnum++;\n if (has_tcolor && c2 == tcolor)\n over_is_possible = 0;\n if (i < x_min)\n x_min = i;\n if (i > x_max)\n x_max = i;\n if (j < y_min)\n y_min = j;\n if (j > y_max)\n y_max = j;\n }\n else\n c2 = tcolor;\n\n memcpy(pc, &c2, 3);\n pa += 3;\n pb += 3;\n pc += 3;\n }\n }\n else if (bpp == 4)\n {\n uint32_t* pa = (uint32_t*)pimage1;\n uint32_t* pb = (uint32_t*)pimage2;\n uint32_t* pc = (uint32_t*)ptemp;\n\n for (j = 0; j < h; j++)\n for (i = 0; i < w; i++)\n {\n uint32_t c1 = *pa++;\n uint32_t c2 = *pb++;\n if ((c1 != c2) && ((c1 >> 24) || (c2 >> 24)))\n {\n diffnum++;\n if ((c2 >> 24) != 0xFF)\n over_is_possible = 0;\n if (i < x_min)\n x_min = i;\n if (i > x_max)\n x_max = i;\n if (j < y_min)\n y_min = j;\n if (j > y_max)\n y_max = j;\n }\n else\n c2 = 0;\n\n *pc++ = c2;\n }\n }\n\n if (diffnum == 0)\n {\n x0 = y0 = 0;\n w0 = h0 = 1;\n }\n else\n {\n x0 = x_min;\n y0 = y_min;\n w0 = x_max - x_min + 1;\n h0 = y_max - y_min + 1;\n }\n\n deflate_rect_op(pimage2, x0, y0, w0, h0, bpp, stride, zbuf_size, n * 2);\n\n if (over_is_possible)\n deflate_rect_op(ptemp, x0, y0, w0, h0, bpp, stride, zbuf_size, n * 2 + 1);\n}\n\nvoid PngEncoder::deflate_rect_fin(int deflate_method, int iter, unsigned char* zbuf, uint32_t* zsize, int bpp, int stride, unsigned char* rows, int zbuf_size, int n)\n{\n unsigned char* row = op[n].p + op[n].y * stride + op[n].x * bpp;\n int rowbytes = op[n].w * bpp;\n\n if (op[n].filters == 0)\n {\n unsigned char* dp = rows;\n for (int j = 0; j < op[n].h; j++)\n {\n *dp++ = 0;\n memcpy(dp, row, rowbytes);\n dp += rowbytes;\n row += stride;\n }\n }\n else\n process_rect(row, rowbytes, bpp, stride, op[n].h, rows);\n\n if (deflate_method == 2)\n {\n CV_UNUSED(iter);\n#if 0 // needs include \"zopfli.h\"\n ZopfliOptions opt_zopfli;\n unsigned char* data = 0;\n size_t size = 0;\n ZopfliInitOptions(&opt_zopfli);\n opt_zopfli.numiterations = iter;\n ZopfliCompress(&opt_zopfli, ZOPFLI_FORMAT_ZLIB, rows, op[n].h * (rowbytes + 1), &data, &size);\n if (size < (size_t)zbuf_size)\n {\n memcpy(zbuf, data, size);\n *zsize = size;\n }\n free(data);\n#endif\n }\n else if (deflate_method == 1)\n {\n#if 0 // needs include \"7z.h\"\n unsigned size = zbuf_size;\n compress_rfc1950_7z(rows, op[n].h * (rowbytes + 1), zbuf, size, iter < 100 ? iter : 100, 255);\n *zsize = size;\n#endif\n }\n else\n {\n z_stream fin_zstream;\n\n fin_zstream.data_type = Z_BINARY;\n fin_zstream.zalloc = Z_NULL;\n fin_zstream.zfree = Z_NULL;\n fin_zstream.opaque = Z_NULL;\n deflateInit2(&fin_zstream, Z_BEST_COMPRESSION, 8, 15, 8, op[n].filters ? Z_FILTERED : Z_DEFAULT_STRATEGY);\n\n fin_zstream.next_out = zbuf;\n fin_zstream.avail_out = zbuf_size;\n fin_zstream.next_in = rows;\n fin_zstream.avail_in = op[n].h * (rowbytes + 1);\n deflate(&fin_zstream, Z_FINISH);\n *zsize = fin_zstream.total_out;\n deflateEnd(&fin_zstream);\n }\n}\n\nbool PngEncoder::writemulti(const std::vector& img_vec, const std::vector& params)\n{\n CV_Assert(img_vec[0].depth() == CV_8U);\n Animation animation;\n animation.frames = img_vec;\n\n for (size_t i = 0; i < animation.frames.size(); i++)\n {\n animation.durations.push_back(1000);\n }\n return writeanimation(animation, params);\n}\n\nbool PngEncoder::writeanimation(const Animation& animation, const std::vector& params)\n{\n int compression_level = 6;\n int compression_strategy = IMWRITE_PNG_STRATEGY_RLE; // Default strategy\n bool isBilevel = false;\n\n for (size_t i = 0; i < params.size(); i += 2)\n {\n if (params[i] == IMWRITE_PNG_COMPRESSION)\n {\n compression_strategy = IMWRITE_PNG_STRATEGY_DEFAULT; // Default strategy\n compression_level = params[i + 1];\n compression_level = MIN(MAX(compression_level, 0), Z_BEST_COMPRESSION);\n }\n if (params[i] == IMWRITE_PNG_STRATEGY)\n {\n compression_strategy = params[i + 1];\n compression_strategy = MIN(MAX(compression_strategy, 0), Z_FIXED);\n }\n if (params[i] == IMWRITE_PNG_BILEVEL)\n {\n isBilevel = params[i + 1] != 0;\n }\n }\n\n std::vector frames;\n std::vector tmpframes;\n\n for (size_t i = 0; i < animation.frames.size(); i++)\n {\n APNGFrame apngFrame;\n tmpframes.push_back(animation.frames[i].clone());\n if (animation.frames[i].channels() == 4)\n cvtColor(animation.frames[i], tmpframes[i], COLOR_BGRA2RGBA);\n if (animation.frames[i].channels() == 3)\n cvtColor(animation.frames[i], tmpframes[i], COLOR_BGR2RGB);\n apngFrame.setMat(tmpframes[i]);\n apngFrame.setDelayDen(animation.durations[i]);\n frames.push_back(apngFrame);\n }\n\n CV_UNUSED(isBilevel);\n uint32_t first =0;\n uint32_t loops= animation.loop_count;\n uint32_t coltype= animation.frames[0].channels() == 1 ? PNG_COLOR_TYPE_GRAY : animation.frames[0].channels() == 3 ? PNG_COLOR_TYPE_RGB : PNG_COLOR_TYPE_RGB_ALPHA;\n int deflate_method=0;\n int iter=0;\n\n FILE* m_f = NULL;\n uint32_t i, j, k;\n uint32_t x0, y0, w0, h0, dop, bop;\n uint32_t idat_size, zbuf_size, zsize;\n unsigned char* zbuf;\n unsigned char header[8] = { 137, 80, 78, 71, 13, 10, 26, 10 };\n uint32_t num_frames = (int)frames.size();\n uint32_t width = frames[0].getWidth();\n uint32_t height = frames[0].getHeight();\n uint32_t bpp = (coltype == 6) ? 4 : (coltype == 2) ? 3\n : (coltype == 4) ? 2\n : 1;\n uint32_t has_tcolor = (coltype >= 4 || (coltype <= 2 && trnssize)) ? 1 : 0;\n uint32_t tcolor = 0;\n uint32_t rowbytes = width * bpp;\n uint32_t imagesize = rowbytes * height;\n\n unsigned char* temp = new unsigned char[imagesize];\n unsigned char* over1 = new unsigned char[imagesize];\n unsigned char* over2 = new unsigned char[imagesize];\n unsigned char* over3 = new unsigned char[imagesize];\n unsigned char* rest = new unsigned char[imagesize];\n unsigned char* rows = new unsigned char[(rowbytes + 1) * height];", - "repo_full_name": "opencv/opencv", - "discussion_comments": [ - { - "comment_id": "1894579735", - "repo_full_name": "opencv/opencv", - "pr_number": 25715, - "pr_file": "modules/imgcodecs/src/grfmt_png.cpp", - "discussion_id": "1894579735", - "commented_code": "@@ -449,6 +852,843 @@ bool PngEncoder::write( const Mat& img, const std::vector& params )\n return result;\n }\n \n+void PngEncoder::optim_dirty(std::vector& frames)\n+{\n+ uint32_t i, j;\n+ unsigned char* sp;\n+ uint32_t size = frames[0].getWidth() * frames[0].getHeight();\n+\n+ for (i = 0; i < frames.size(); i++)\n+ {\n+ sp = frames[i].getPixels();\n+ for (j = 0; j < size; j++, sp += 4)\n+ if (sp[3] == 0)\n+ sp[0] = sp[1] = sp[2] = 0;\n+ }\n+}\n+\n+void PngEncoder::optim_duplicates(std::vector& frames, uint32_t first)\n+{\n+ uint32_t imagesize = frames[0].getWidth() * frames[0].getHeight() * 4;\n+ uint32_t i = first;\n+\n+ while (++i < frames.size())\n+ {\n+ if (memcmp(frames[i - 1].getPixels(), frames[i].getPixels(), imagesize) != 0)\n+ continue;\n+\n+ i--;\n+ delete[] frames[i].getPixels();\n+ delete[] frames[i].getRows();\n+ uint32_t num = frames[i].getDelayNum();\n+ uint32_t den = frames[i].getDelayDen();\n+ frames.erase(frames.begin() + i);\n+\n+ if (frames[i].getDelayDen() == den)\n+ frames[i].setDelayNum(frames[i].getDelayNum()+num);\n+ else\n+ {\n+ frames[i].setDelayNum(num * frames[i].getDelayDen() + den * frames[i].getDelayNum());\n+ frames[i].setDelayDen(den * frames[i].getDelayDen());\n+ while (num && den)\n+ {\n+ if (num > den)\n+ num = num % den;\n+ else\n+ den = den % num;\n+ }\n+ num += den;\n+ frames[i].setDelayNum(frames[i].getDelayNum() / num);\n+ frames[i].setDelayDen(frames[i].getDelayDen() / num);\n+ }\n+ }\n+}\n+\n+size_t PngEncoder::write_to_io(void const* _Buffer, size_t _ElementSize, size_t _ElementCount, FILE * _Stream)\n+{\n+ if (_Stream)\n+ return fwrite(_Buffer, _ElementSize, _ElementCount, _Stream);\n+\n+ size_t cursz = m_buf->size();\n+ m_buf->resize(cursz + _ElementCount);\n+ memcpy( &(*m_buf)[cursz], _Buffer, _ElementCount );\n+ return _ElementCount;\n+}\n+\n+void PngEncoder::write_chunk(FILE* f, const char* name, unsigned char* data, uint32_t length)\n+{\n+ unsigned char buf[4];\n+ uint32_t crc = crc32(0, Z_NULL, 0);\n+\n+ png_save_uint_32(buf, length);\n+ write_to_io(buf, 1, 4, f);\n+ write_to_io(name, 1, 4, f);\n+ crc = crc32(crc, (const Bytef*)name, 4);\n+\n+ if (memcmp(name, \"fdAT\", 4) == 0)\n+ {\n+ png_save_uint_32(buf, next_seq_num++);\n+ write_to_io(buf, 1, 4, f);\n+ crc = crc32(crc, buf, 4);\n+ length -= 4;\n+ }\n+\n+ if (data != NULL && length > 0)\n+ {\n+ write_to_io(data, 1, length, f);\n+ crc = crc32(crc, data, length);\n+ }\n+\n+ png_save_uint_32(buf, crc);\n+ write_to_io(buf, 1, 4, f);\n+}\n+\n+void PngEncoder::write_IDATs(FILE* f, int frame, unsigned char* data, uint32_t length, uint32_t idat_size)\n+{\n+ uint32_t z_cmf = data[0];\n+ if ((z_cmf & 0x0f) == 8 && (z_cmf & 0xf0) <= 0x70)\n+ {\n+ if (length >= 2)\n+ {\n+ uint32_t z_cinfo = z_cmf >> 4;\n+ uint32_t half_z_window_size = 1 << (z_cinfo + 7);\n+ while (idat_size <= half_z_window_size && half_z_window_size >= 256)\n+ {\n+ z_cinfo--;\n+ half_z_window_size >>= 1;\n+ }\n+ z_cmf = (z_cmf & 0x0f) | (z_cinfo << 4);\n+ if (data[0] != (unsigned char)z_cmf)\n+ {\n+ data[0] = (unsigned char)z_cmf;\n+ data[1] &= 0xe0;\n+ data[1] += (unsigned char)(0x1f - ((z_cmf << 8) + data[1]) % 0x1f);\n+ }\n+ }\n+ }\n+\n+ while (length > 0)\n+ {\n+ uint32_t ds = length;\n+ if (ds > 32768)\n+ ds = 32768;\n+\n+ if (frame == 0)\n+ write_chunk(f, \"IDAT\", data, ds);\n+ else\n+ write_chunk(f, \"fdAT\", data, ds + 4);\n+\n+ data += ds;\n+ length -= ds;\n+ }\n+}\n+\n+void PngEncoder::process_rect(unsigned char* row, int rowbytes, int bpp, int stride, int h, unsigned char* rows)\n+{\n+ int i, j, v;\n+ int a, b, c, pa, pb, pc, p;\n+ unsigned char* prev = NULL;\n+ unsigned char* dp = rows;\n+ unsigned char* out;\n+\n+ for (j = 0; j < h; j++)\n+ {\n+ uint32_t sum = 0;\n+ unsigned char* best_row = row_buf;\n+ uint32_t mins = ((uint32_t)(-1)) >> 1;\n+\n+ out = row_buf + 1;\n+ for (i = 0; i < rowbytes; i++)\n+ {\n+ v = out[i] = row[i];\n+ sum += (v < 128) ? v : 256 - v;\n+ }\n+ mins = sum;\n+\n+ sum = 0;\n+ out = sub_row + 1;\n+ for (i = 0; i < bpp; i++)\n+ {\n+ v = out[i] = row[i];\n+ sum += (v < 128) ? v : 256 - v;\n+ }\n+ for (i = bpp; i < rowbytes; i++)\n+ {\n+ v = out[i] = row[i] - row[i - bpp];\n+ sum += (v < 128) ? v : 256 - v;\n+ if (sum > mins)\n+ break;\n+ }\n+ if (sum < mins)\n+ {\n+ mins = sum;\n+ best_row = sub_row;\n+ }\n+\n+ if (prev)\n+ {\n+ sum = 0;\n+ out = up_row + 1;\n+ for (i = 0; i < rowbytes; i++)\n+ {\n+ v = out[i] = row[i] - prev[i];\n+ sum += (v < 128) ? v : 256 - v;\n+ if (sum > mins)\n+ break;\n+ }\n+ if (sum < mins)\n+ {\n+ mins = sum;\n+ best_row = up_row;\n+ }\n+\n+ sum = 0;\n+ out = avg_row + 1;\n+ for (i = 0; i < bpp; i++)\n+ {\n+ v = out[i] = row[i] - prev[i] / 2;\n+ sum += (v < 128) ? v : 256 - v;\n+ }\n+ for (i = bpp; i < rowbytes; i++)\n+ {\n+ v = out[i] = row[i] - (prev[i] + row[i - bpp]) / 2;\n+ sum += (v < 128) ? v : 256 - v;\n+ if (sum > mins)\n+ break;\n+ }\n+ if (sum < mins)\n+ {\n+ mins = sum;\n+ best_row = avg_row;\n+ }\n+\n+ sum = 0;\n+ out = paeth_row + 1;\n+ for (i = 0; i < bpp; i++)\n+ {\n+ v = out[i] = row[i] - prev[i];\n+ sum += (v < 128) ? v : 256 - v;\n+ }\n+ for (i = bpp; i < rowbytes; i++)\n+ {\n+ a = row[i - bpp];\n+ b = prev[i];\n+ c = prev[i - bpp];\n+ p = b - c;\n+ pc = a - c;\n+ pa = abs(p);\n+ pb = abs(pc);\n+ pc = abs(p + pc);\n+ p = (pa <= pb && pa <= pc) ? a : (pb <= pc) ? b\n+ : c;\n+ v = out[i] = row[i] - p;\n+ sum += (v < 128) ? v : 256 - v;\n+ if (sum > mins)\n+ break;\n+ }\n+ if (sum < mins)\n+ {\n+ best_row = paeth_row;\n+ }\n+ }\n+\n+ if (rows == NULL)\n+ {\n+ // deflate_rect_op()\n+ op_zstream1.next_in = row_buf;\n+ op_zstream1.avail_in = rowbytes + 1;\n+ deflate(&op_zstream1, Z_NO_FLUSH);\n+\n+ op_zstream2.next_in = best_row;\n+ op_zstream2.avail_in = rowbytes + 1;\n+ deflate(&op_zstream2, Z_NO_FLUSH);\n+ }\n+ else\n+ {\n+ // deflate_rect_fin()\n+ memcpy(dp, best_row, rowbytes + 1);\n+ dp += rowbytes + 1;\n+ }\n+\n+ prev = row;\n+ row += stride;\n+ }\n+}\n+\n+void PngEncoder::deflate_rect_op(unsigned char* pdata, int x, int y, int w, int h, int bpp, int stride, int zbuf_size, int n)\n+{\n+ unsigned char* row = pdata + y * stride + x * bpp;\n+ int rowbytes = w * bpp;\n+\n+ op_zstream1.data_type = Z_BINARY;\n+ op_zstream1.next_out = op_zbuf1;\n+ op_zstream1.avail_out = zbuf_size;\n+\n+ op_zstream2.data_type = Z_BINARY;\n+ op_zstream2.next_out = op_zbuf2;\n+ op_zstream2.avail_out = zbuf_size;\n+\n+ process_rect(row, rowbytes, bpp, stride, h, NULL);\n+\n+ deflate(&op_zstream1, Z_FINISH);\n+ deflate(&op_zstream2, Z_FINISH);\n+ op[n].p = pdata;\n+\n+ if (op_zstream1.total_out < op_zstream2.total_out)\n+ {\n+ op[n].size = op_zstream1.total_out;\n+ op[n].filters = 0;\n+ }\n+ else\n+ {\n+ op[n].size = op_zstream2.total_out;\n+ op[n].filters = 1;\n+ }\n+ op[n].x = x;\n+ op[n].y = y;\n+ op[n].w = w;\n+ op[n].h = h;\n+ op[n].valid = 1;\n+ deflateReset(&op_zstream1);\n+ deflateReset(&op_zstream2);\n+}\n+\n+void PngEncoder::get_rect(uint32_t w, uint32_t h, unsigned char* pimage1, unsigned char* pimage2, unsigned char* ptemp, uint32_t bpp, uint32_t stride, int zbuf_size, uint32_t has_tcolor, uint32_t tcolor, int n)\n+{\n+ uint32_t i, j, x0, y0, w0, h0;\n+ uint32_t x_min = w - 1;\n+ uint32_t y_min = h - 1;\n+ uint32_t x_max = 0;\n+ uint32_t y_max = 0;\n+ uint32_t diffnum = 0;\n+ uint32_t over_is_possible = 1;\n+\n+ if (!has_tcolor)\n+ over_is_possible = 0;\n+\n+ if (bpp == 1)\n+ {\n+ unsigned char* pa = pimage1;\n+ unsigned char* pb = pimage2;\n+ unsigned char* pc = ptemp;\n+\n+ for (j = 0; j < h; j++)\n+ for (i = 0; i < w; i++)\n+ {\n+ unsigned char c = *pb++;\n+ if (*pa++ != c)\n+ {\n+ diffnum++;\n+ if (has_tcolor && c == tcolor)\n+ over_is_possible = 0;\n+ if (i < x_min)\n+ x_min = i;\n+ if (i > x_max)\n+ x_max = i;\n+ if (j < y_min)\n+ y_min = j;\n+ if (j > y_max)\n+ y_max = j;\n+ }\n+ else\n+ c = tcolor;\n+\n+ *pc++ = c;\n+ }\n+ }\n+ else if (bpp == 2)\n+ {\n+ unsigned short* pa = (unsigned short*)pimage1;\n+ unsigned short* pb = (unsigned short*)pimage2;\n+ unsigned short* pc = (unsigned short*)ptemp;\n+\n+ for (j = 0; j < h; j++)\n+ for (i = 0; i < w; i++)\n+ {\n+ uint32_t c1 = *pa++;\n+ uint32_t c2 = *pb++;\n+ if ((c1 != c2) && ((c1 >> 8) || (c2 >> 8)))\n+ {\n+ diffnum++;\n+ if ((c2 >> 8) != 0xFF)\n+ over_is_possible = 0;\n+ if (i < x_min)\n+ x_min = i;\n+ if (i > x_max)\n+ x_max = i;\n+ if (j < y_min)\n+ y_min = j;\n+ if (j > y_max)\n+ y_max = j;\n+ }\n+ else\n+ c2 = 0;\n+\n+ *pc++ = c2;\n+ }\n+ }\n+ else if (bpp == 3)\n+ {\n+ unsigned char* pa = pimage1;\n+ unsigned char* pb = pimage2;\n+ unsigned char* pc = ptemp;\n+\n+ for (j = 0; j < h; j++)\n+ for (i = 0; i < w; i++)\n+ {\n+ uint32_t c1 = (pa[2] << 16) + (pa[1] << 8) + pa[0];\n+ uint32_t c2 = (pb[2] << 16) + (pb[1] << 8) + pb[0];\n+ if (c1 != c2)\n+ {\n+ diffnum++;\n+ if (has_tcolor && c2 == tcolor)\n+ over_is_possible = 0;\n+ if (i < x_min)\n+ x_min = i;\n+ if (i > x_max)\n+ x_max = i;\n+ if (j < y_min)\n+ y_min = j;\n+ if (j > y_max)\n+ y_max = j;\n+ }\n+ else\n+ c2 = tcolor;\n+\n+ memcpy(pc, &c2, 3);\n+ pa += 3;\n+ pb += 3;\n+ pc += 3;\n+ }\n+ }\n+ else if (bpp == 4)\n+ {\n+ uint32_t* pa = (uint32_t*)pimage1;\n+ uint32_t* pb = (uint32_t*)pimage2;\n+ uint32_t* pc = (uint32_t*)ptemp;\n+\n+ for (j = 0; j < h; j++)\n+ for (i = 0; i < w; i++)\n+ {\n+ uint32_t c1 = *pa++;\n+ uint32_t c2 = *pb++;\n+ if ((c1 != c2) && ((c1 >> 24) || (c2 >> 24)))\n+ {\n+ diffnum++;\n+ if ((c2 >> 24) != 0xFF)\n+ over_is_possible = 0;\n+ if (i < x_min)\n+ x_min = i;\n+ if (i > x_max)\n+ x_max = i;\n+ if (j < y_min)\n+ y_min = j;\n+ if (j > y_max)\n+ y_max = j;\n+ }\n+ else\n+ c2 = 0;\n+\n+ *pc++ = c2;\n+ }\n+ }\n+\n+ if (diffnum == 0)\n+ {\n+ x0 = y0 = 0;\n+ w0 = h0 = 1;\n+ }\n+ else\n+ {\n+ x0 = x_min;\n+ y0 = y_min;\n+ w0 = x_max - x_min + 1;\n+ h0 = y_max - y_min + 1;\n+ }\n+\n+ deflate_rect_op(pimage2, x0, y0, w0, h0, bpp, stride, zbuf_size, n * 2);\n+\n+ if (over_is_possible)\n+ deflate_rect_op(ptemp, x0, y0, w0, h0, bpp, stride, zbuf_size, n * 2 + 1);\n+}\n+\n+void PngEncoder::deflate_rect_fin(int deflate_method, int iter, unsigned char* zbuf, uint32_t* zsize, int bpp, int stride, unsigned char* rows, int zbuf_size, int n)\n+{\n+ unsigned char* row = op[n].p + op[n].y * stride + op[n].x * bpp;\n+ int rowbytes = op[n].w * bpp;\n+\n+ if (op[n].filters == 0)\n+ {\n+ unsigned char* dp = rows;\n+ for (int j = 0; j < op[n].h; j++)\n+ {\n+ *dp++ = 0;\n+ memcpy(dp, row, rowbytes);\n+ dp += rowbytes;\n+ row += stride;\n+ }\n+ }\n+ else\n+ process_rect(row, rowbytes, bpp, stride, op[n].h, rows);\n+\n+ if (deflate_method == 2)\n+ {\n+ CV_UNUSED(iter);\n+#if 0 // needs include \"zopfli.h\"\n+ ZopfliOptions opt_zopfli;\n+ unsigned char* data = 0;\n+ size_t size = 0;\n+ ZopfliInitOptions(&opt_zopfli);\n+ opt_zopfli.numiterations = iter;\n+ ZopfliCompress(&opt_zopfli, ZOPFLI_FORMAT_ZLIB, rows, op[n].h * (rowbytes + 1), &data, &size);\n+ if (size < (size_t)zbuf_size)\n+ {\n+ memcpy(zbuf, data, size);\n+ *zsize = size;\n+ }\n+ free(data);\n+#endif\n+ }\n+ else if (deflate_method == 1)\n+ {\n+#if 0 // needs include \"7z.h\"\n+ unsigned size = zbuf_size;\n+ compress_rfc1950_7z(rows, op[n].h * (rowbytes + 1), zbuf, size, iter < 100 ? iter : 100, 255);\n+ *zsize = size;\n+#endif\n+ }\n+ else\n+ {\n+ z_stream fin_zstream;\n+\n+ fin_zstream.data_type = Z_BINARY;\n+ fin_zstream.zalloc = Z_NULL;\n+ fin_zstream.zfree = Z_NULL;\n+ fin_zstream.opaque = Z_NULL;\n+ deflateInit2(&fin_zstream, Z_BEST_COMPRESSION, 8, 15, 8, op[n].filters ? Z_FILTERED : Z_DEFAULT_STRATEGY);\n+\n+ fin_zstream.next_out = zbuf;\n+ fin_zstream.avail_out = zbuf_size;\n+ fin_zstream.next_in = rows;\n+ fin_zstream.avail_in = op[n].h * (rowbytes + 1);\n+ deflate(&fin_zstream, Z_FINISH);\n+ *zsize = fin_zstream.total_out;\n+ deflateEnd(&fin_zstream);\n+ }\n+}\n+\n+bool PngEncoder::writemulti(const std::vector& img_vec, const std::vector& params)\n+{\n+ CV_Assert(img_vec[0].depth() == CV_8U);\n+ Animation animation;\n+ animation.frames = img_vec;\n+\n+ for (size_t i = 0; i < animation.frames.size(); i++)\n+ {\n+ animation.durations.push_back(1000);\n+ }\n+ return writeanimation(animation, params);\n+}\n+\n+bool PngEncoder::writeanimation(const Animation& animation, const std::vector& params)\n+{\n+ int compression_level = 6;\n+ int compression_strategy = IMWRITE_PNG_STRATEGY_RLE; // Default strategy\n+ bool isBilevel = false;\n+\n+ for (size_t i = 0; i < params.size(); i += 2)\n+ {\n+ if (params[i] == IMWRITE_PNG_COMPRESSION)\n+ {\n+ compression_strategy = IMWRITE_PNG_STRATEGY_DEFAULT; // Default strategy\n+ compression_level = params[i + 1];\n+ compression_level = MIN(MAX(compression_level, 0), Z_BEST_COMPRESSION);\n+ }\n+ if (params[i] == IMWRITE_PNG_STRATEGY)\n+ {\n+ compression_strategy = params[i + 1];\n+ compression_strategy = MIN(MAX(compression_strategy, 0), Z_FIXED);\n+ }\n+ if (params[i] == IMWRITE_PNG_BILEVEL)\n+ {\n+ isBilevel = params[i + 1] != 0;\n+ }\n+ }\n+\n+ std::vector frames;\n+ std::vector tmpframes;\n+\n+ for (size_t i = 0; i < animation.frames.size(); i++)\n+ {\n+ APNGFrame apngFrame;\n+ tmpframes.push_back(animation.frames[i].clone());\n+ if (animation.frames[i].channels() == 4)\n+ cvtColor(animation.frames[i], tmpframes[i], COLOR_BGRA2RGBA);\n+ if (animation.frames[i].channels() == 3)\n+ cvtColor(animation.frames[i], tmpframes[i], COLOR_BGR2RGB);\n+ apngFrame.setMat(tmpframes[i]);\n+ apngFrame.setDelayDen(animation.durations[i]);\n+ frames.push_back(apngFrame);\n+ }\n+\n+ CV_UNUSED(isBilevel);\n+ uint32_t first =0;\n+ uint32_t loops= animation.loop_count;\n+ uint32_t coltype= animation.frames[0].channels() == 1 ? PNG_COLOR_TYPE_GRAY : animation.frames[0].channels() == 3 ? PNG_COLOR_TYPE_RGB : PNG_COLOR_TYPE_RGB_ALPHA;\n+ int deflate_method=0;\n+ int iter=0;\n+\n+ FILE* m_f = NULL;\n+ uint32_t i, j, k;\n+ uint32_t x0, y0, w0, h0, dop, bop;\n+ uint32_t idat_size, zbuf_size, zsize;\n+ unsigned char* zbuf;\n+ unsigned char header[8] = { 137, 80, 78, 71, 13, 10, 26, 10 };\n+ uint32_t num_frames = (int)frames.size();\n+ uint32_t width = frames[0].getWidth();\n+ uint32_t height = frames[0].getHeight();\n+ uint32_t bpp = (coltype == 6) ? 4 : (coltype == 2) ? 3\n+ : (coltype == 4) ? 2\n+ : 1;\n+ uint32_t has_tcolor = (coltype >= 4 || (coltype <= 2 && trnssize)) ? 1 : 0;\n+ uint32_t tcolor = 0;\n+ uint32_t rowbytes = width * bpp;\n+ uint32_t imagesize = rowbytes * height;\n+\n+ unsigned char* temp = new unsigned char[imagesize];\n+ unsigned char* over1 = new unsigned char[imagesize];\n+ unsigned char* over2 = new unsigned char[imagesize];\n+ unsigned char* over3 = new unsigned char[imagesize];\n+ unsigned char* rest = new unsigned char[imagesize];\n+ unsigned char* rows = new unsigned char[(rowbytes + 1) * height];", - "comment_created_at": "2024-12-21T07:44:06+00:00", - "comment_author": "asmorkalov", - "comment_body": "I propose to use `std::vector` or `cv::AutoBuffer` to prevent memory leaks. ", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1668761773", - "pr_number": 25608, - "pr_file": "modules/imgcodecs/src/grfmt_webp.cpp", - "created_at": "2024-07-08T14:33:40+00:00", - "commented_code": "WebPBitstreamFeatures features;\n if (VP8_STATUS_OK == WebPGetFeatures(header, sizeof(header), &features))\n {\n CV_CheckEQ(features.has_animation, 0, \"WebP backend does not support animated webp images\");\n m_has_animation = features.has_animation > 0;\n if (m_has_animation)\n {\n fs.seekg(0, std::ios::beg); CV_Assert(fs && \"File stream error\");\n data.create(1, validateToInt(fs_size), CV_8UC1);\n fs.read((char*)data.ptr(), fs_size);\n CV_Assert(fs && \"Can't read file data\");\n fs.close();\n\n CV_Assert(data.type() == CV_8UC1); CV_Assert(data.rows == 1);\n\n WebPData webp_data;\n webp_data.bytes = (const uint8_t*)data.ptr();\n webp_data.size = data.total();\n\n WebPAnimDecoderOptions dec_options;\n WebPAnimDecoderOptionsInit(&dec_options);\n dec_options.color_mode = MODE_BGRA;\n anim_decoder = WebPAnimDecoderNew(&webp_data, &dec_options);", - "repo_full_name": "opencv/opencv", - "discussion_comments": [ - { - "comment_id": "1668761773", - "repo_full_name": "opencv/opencv", - "pr_number": 25608, - "pr_file": "modules/imgcodecs/src/grfmt_webp.cpp", - "discussion_id": "1668761773", - "commented_code": "@@ -126,8 +86,38 @@ bool WebPDecoder::readHeader()\n WebPBitstreamFeatures features;\n if (VP8_STATUS_OK == WebPGetFeatures(header, sizeof(header), &features))\n {\n- CV_CheckEQ(features.has_animation, 0, \"WebP backend does not support animated webp images\");\n+ m_has_animation = features.has_animation > 0;\n+ if (m_has_animation)\n+ {\n+ fs.seekg(0, std::ios::beg); CV_Assert(fs && \"File stream error\");\n+ data.create(1, validateToInt(fs_size), CV_8UC1);\n+ fs.read((char*)data.ptr(), fs_size);\n+ CV_Assert(fs && \"Can't read file data\");\n+ fs.close();\n+\n+ CV_Assert(data.type() == CV_8UC1); CV_Assert(data.rows == 1);\n+\n+ WebPData webp_data;\n+ webp_data.bytes = (const uint8_t*)data.ptr();\n+ webp_data.size = data.total();\n \n+ WebPAnimDecoderOptions dec_options;\n+ WebPAnimDecoderOptionsInit(&dec_options);\n+ dec_options.color_mode = MODE_BGRA;\n+ anim_decoder = WebPAnimDecoderNew(&webp_data, &dec_options);", - "comment_created_at": "2024-07-08T14:33:40+00:00", - "comment_author": "vrabaud", - "comment_body": "You are missing a WebPAnimDecoderDelete. Either you create a destructor for WebPDecoder, either you wrap the pointer in a unique_ptr. E.g., create in the header, a private struct:\r\n```cpp\r\nstruct UniquePtrDeleter\r\n{\r\n void operator()(WebPAnimDecoder * decoder) const { WebPAnimDecoderDelete(decoder); }\r\n void operator()(WebpData * dara) const { WebPDataClear(decoder); }\r\n};\r\n\r\n// Use these unique_ptr to ensure the structs are automatically destroyed.\r\nstd::unique_ptr anim_decoder;\r\n// or create a type:\r\nusing DataPtr = std::unique_ptr;\r\n```\r\nI'd go with this, so that you do not have to remember to call a destructor.", - "pr_file_module": null - }, - { - "comment_id": "1673724498", - "repo_full_name": "opencv/opencv", - "pr_number": 25608, - "pr_file": "modules/imgcodecs/src/grfmt_webp.cpp", - "discussion_id": "1668761773", - "commented_code": "@@ -126,8 +86,38 @@ bool WebPDecoder::readHeader()\n WebPBitstreamFeatures features;\n if (VP8_STATUS_OK == WebPGetFeatures(header, sizeof(header), &features))\n {\n- CV_CheckEQ(features.has_animation, 0, \"WebP backend does not support animated webp images\");\n+ m_has_animation = features.has_animation > 0;\n+ if (m_has_animation)\n+ {\n+ fs.seekg(0, std::ios::beg); CV_Assert(fs && \"File stream error\");\n+ data.create(1, validateToInt(fs_size), CV_8UC1);\n+ fs.read((char*)data.ptr(), fs_size);\n+ CV_Assert(fs && \"Can't read file data\");\n+ fs.close();\n+\n+ CV_Assert(data.type() == CV_8UC1); CV_Assert(data.rows == 1);\n+\n+ WebPData webp_data;\n+ webp_data.bytes = (const uint8_t*)data.ptr();\n+ webp_data.size = data.total();\n \n+ WebPAnimDecoderOptions dec_options;\n+ WebPAnimDecoderOptionsInit(&dec_options);\n+ dec_options.color_mode = MODE_BGRA;\n+ anim_decoder = WebPAnimDecoderNew(&webp_data, &dec_options);", - "comment_created_at": "2024-07-11T09:38:39+00:00", - "comment_author": "sturkmen72", - "comment_body": "@vrabaud could you check the last commit. frankly i tried your suggestion using std::unique_ptr but no success.. can last commit resolve this conversation.", - "pr_file_module": null - }, - { - "comment_id": "1676777267", - "repo_full_name": "opencv/opencv", - "pr_number": 25608, - "pr_file": "modules/imgcodecs/src/grfmt_webp.cpp", - "discussion_id": "1668761773", - "commented_code": "@@ -126,8 +86,38 @@ bool WebPDecoder::readHeader()\n WebPBitstreamFeatures features;\n if (VP8_STATUS_OK == WebPGetFeatures(header, sizeof(header), &features))\n {\n- CV_CheckEQ(features.has_animation, 0, \"WebP backend does not support animated webp images\");\n+ m_has_animation = features.has_animation > 0;\n+ if (m_has_animation)\n+ {\n+ fs.seekg(0, std::ios::beg); CV_Assert(fs && \"File stream error\");\n+ data.create(1, validateToInt(fs_size), CV_8UC1);\n+ fs.read((char*)data.ptr(), fs_size);\n+ CV_Assert(fs && \"Can't read file data\");\n+ fs.close();\n+\n+ CV_Assert(data.type() == CV_8UC1); CV_Assert(data.rows == 1);\n+\n+ WebPData webp_data;\n+ webp_data.bytes = (const uint8_t*)data.ptr();\n+ webp_data.size = data.total();\n \n+ WebPAnimDecoderOptions dec_options;\n+ WebPAnimDecoderOptionsInit(&dec_options);\n+ dec_options.color_mode = MODE_BGRA;\n+ anim_decoder = WebPAnimDecoderNew(&webp_data, &dec_options);", - "comment_created_at": "2024-07-13T07:24:12+00:00", - "comment_author": "vrabaud", - "comment_body": "When calling the WebP API you need to get a pointer so if you have a `std::unique_ptr anim_decoder;`, just call `anim_decoder.get()`", - "pr_file_module": null - }, - { - "comment_id": "1676807079", - "repo_full_name": "opencv/opencv", - "pr_number": 25608, - "pr_file": "modules/imgcodecs/src/grfmt_webp.cpp", - "discussion_id": "1668761773", - "commented_code": "@@ -126,8 +86,38 @@ bool WebPDecoder::readHeader()\n WebPBitstreamFeatures features;\n if (VP8_STATUS_OK == WebPGetFeatures(header, sizeof(header), &features))\n {\n- CV_CheckEQ(features.has_animation, 0, \"WebP backend does not support animated webp images\");\n+ m_has_animation = features.has_animation > 0;\n+ if (m_has_animation)\n+ {\n+ fs.seekg(0, std::ios::beg); CV_Assert(fs && \"File stream error\");\n+ data.create(1, validateToInt(fs_size), CV_8UC1);\n+ fs.read((char*)data.ptr(), fs_size);\n+ CV_Assert(fs && \"Can't read file data\");\n+ fs.close();\n+\n+ CV_Assert(data.type() == CV_8UC1); CV_Assert(data.rows == 1);\n+\n+ WebPData webp_data;\n+ webp_data.bytes = (const uint8_t*)data.ptr();\n+ webp_data.size = data.total();\n \n+ WebPAnimDecoderOptions dec_options;\n+ WebPAnimDecoderOptionsInit(&dec_options);\n+ dec_options.color_mode = MODE_BGRA;\n+ anim_decoder = WebPAnimDecoderNew(&webp_data, &dec_options);", - "comment_created_at": "2024-07-13T11:02:33+00:00", - "comment_author": "sturkmen72", - "comment_body": "@vrabaud could you check https://github.com/opencv/opencv/pull/25608/commits/a105d94aff541d2b4593150dc834bab279ad6cd6", - "pr_file_module": null - }, - { - "comment_id": "1677917053", - "repo_full_name": "opencv/opencv", - "pr_number": 25608, - "pr_file": "modules/imgcodecs/src/grfmt_webp.cpp", - "discussion_id": "1668761773", - "commented_code": "@@ -126,8 +86,38 @@ bool WebPDecoder::readHeader()\n WebPBitstreamFeatures features;\n if (VP8_STATUS_OK == WebPGetFeatures(header, sizeof(header), &features))\n {\n- CV_CheckEQ(features.has_animation, 0, \"WebP backend does not support animated webp images\");\n+ m_has_animation = features.has_animation > 0;\n+ if (m_has_animation)\n+ {\n+ fs.seekg(0, std::ios::beg); CV_Assert(fs && \"File stream error\");\n+ data.create(1, validateToInt(fs_size), CV_8UC1);\n+ fs.read((char*)data.ptr(), fs_size);\n+ CV_Assert(fs && \"Can't read file data\");\n+ fs.close();\n+\n+ CV_Assert(data.type() == CV_8UC1); CV_Assert(data.rows == 1);\n+\n+ WebPData webp_data;\n+ webp_data.bytes = (const uint8_t*)data.ptr();\n+ webp_data.size = data.total();\n \n+ WebPAnimDecoderOptions dec_options;\n+ WebPAnimDecoderOptionsInit(&dec_options);\n+ dec_options.color_mode = MODE_BGRA;\n+ anim_decoder = WebPAnimDecoderNew(&webp_data, &dec_options);", - "comment_created_at": "2024-07-15T14:23:58+00:00", - "comment_author": "sturkmen72", - "comment_body": "should i mark this as resolved?", - "pr_file_module": null - }, - { - "comment_id": "1681234937", - "repo_full_name": "opencv/opencv", - "pr_number": 25608, - "pr_file": "modules/imgcodecs/src/grfmt_webp.cpp", - "discussion_id": "1668761773", - "commented_code": "@@ -126,8 +86,38 @@ bool WebPDecoder::readHeader()\n WebPBitstreamFeatures features;\n if (VP8_STATUS_OK == WebPGetFeatures(header, sizeof(header), &features))\n {\n- CV_CheckEQ(features.has_animation, 0, \"WebP backend does not support animated webp images\");\n+ m_has_animation = features.has_animation > 0;\n+ if (m_has_animation)\n+ {\n+ fs.seekg(0, std::ios::beg); CV_Assert(fs && \"File stream error\");\n+ data.create(1, validateToInt(fs_size), CV_8UC1);\n+ fs.read((char*)data.ptr(), fs_size);\n+ CV_Assert(fs && \"Can't read file data\");\n+ fs.close();\n+\n+ CV_Assert(data.type() == CV_8UC1); CV_Assert(data.rows == 1);\n+\n+ WebPData webp_data;\n+ webp_data.bytes = (const uint8_t*)data.ptr();\n+ webp_data.size = data.total();\n \n+ WebPAnimDecoderOptions dec_options;\n+ WebPAnimDecoderOptionsInit(&dec_options);\n+ dec_options.color_mode = MODE_BGRA;\n+ anim_decoder = WebPAnimDecoderNew(&webp_data, &dec_options);", - "comment_created_at": "2024-07-17T15:11:36+00:00", - "comment_author": "vrabaud", - "comment_body": "Just do the same thing for data and encoder (or maybe simply do what you did for the encoder, by using a full unique_ptr definition if you only use those once in the code.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1259283684", - "pr_number": 23960, - "pr_file": "modules/video/src/tracking/bytetracker.cpp", - "created_at": "2023-07-11T07:07:13+00:00", - "commented_code": "// This file is part of OpenCV project.\n// It is subject to the license terms in the LICENSE file found in the top-level directory\n// of this distribution and at http://opencv.org/license.html.\n\n\n\n#include \"../precomp.hpp\"\n\n//#include \"opencv2/video/detail/bytetracker.hpp\"\n// /#include \"opencv2/video/detail/bytetracker_strack.hpp\"\n#include \"opencv2/video/detail/lapjv.hpp\"\n#include \"opencv2/video/detail/tracking.detail.hpp\"\n#include \n\n// #include \"detail/bytetracker.hpp\"\n// #include \"detail/bytetracker_strack.hpp\"\n// #include \"detail/lapjv.hpp\"\n\n#ifdef HAVE_OPENCV_DNN\n#include \"opencv2/dnn.hpp\"\n#endif\n\nusing namespace std;\nusing namespace cv;\n\nnamespace cv {\n\n\n//using cv::detail::tracking::Strack;\n//using cv::detail::tracking::Detection;\n//using cv::detail::tracking::TrackState;\n\n\n\nByteTracker::ByteTracker()\n{\n //nothing\n}\n\nByteTracker::~ByteTracker()\n{\n //nothing\n}\n\nByteTracker::Params::Params()\n{\n frameRate = 30;\n frameBuffer = 30;\n}\n \n \n\n\n\nclass ByteTrackerImpl : public ByteTracker\n{ \npublic:\n //ByteTracker(int, int);\n ByteTrackerImpl(const ByteTracker::Params& parameters) : params_(parameters)\n {\n trackThreshold_ = 0.5f;\n matchThreshold_ = 0.7f;\n lastId_ = 0;\n frame_ = 0;\n maxTimeLost_ = static_cast(params_.frameRate / 30.0f * params_.frameBuffer);\n }\n\n void init(InputArray image, const Rect& boundingBox);\n //std::vector> update(std::vector>)\n vector update(vector& objects);\n //Scalar get_color(int idx);\n int getFrame();\n map lapjv(vector> &cost);\n\nprotected:\n ByteTracker::Params params_;\n float trackThreshold_;\n float matchThreshold_;\n unordered_map trackedStracks_;\n unordered_map lostStracks_;\n int lastId_;\n int frame_;\n int maxTimeLost_;\n KalmanFilter kalmanFilter_;\n\n void getDetections(vector& Objects, vector& detections, \n vector& detectionsLow);\n\n\n void addNewDetectedTracks(unordered_map trackedMap,\n vector& inactiveStracks, vector& trackedStracks);\n\n\n Mat getCostMatrix(vector& tracks, vector& btracks);\n Mat getCostMatrix(unordered_map& atracks, vector &btracks);\n\n \n Mat calculateIous(vector& atlwhs, vector &btlwhs);\n\n\n unordered_map joinStracks(const vector& trackA, vector& trackB);\n unordered_map joinStracks(const vector& trackVector,\n unordered_map& trackMap, bool inplace);\n\n};\n\n\nvector ByteTrackerImpl::update(vector &objects)\n{\n\n // Detetions, Dk = Detections(fk)\n vector detections; // consider changing to cv::Mat_\n vector detectionsLow;\n vector remainDets;\n vector activatedStracks;\n vector reRemainTracks;\n\n getDetections(objects, detections, detectionsLow); // objects -> D and Dlow\n\n vector inactiveStracks;\n vector trackedStracks;\n\n addNewDetectedTracks(trackedStracks_, inactiveStracks, trackedStracks); // trackedStracks_ -> inactive and active\n\n unordered_map strackPool;\n strackPool = joinStracks(trackedStracks, lostStracks_, false);\n // remember that in the first association we consider the lost tracks too\n // we need to predict the tracks to do association\n // it updates strackPool with prediction, maybe pass strackPool by reference\n for (auto &track : strackPool)\n {\n cv::Mat prediction = track.second.predict(); // cx cy w h\n prediction.at(0, 0) -= prediction.at(2, 0);\n prediction.at(1, 0) -= prediction.at(3, 0);\n track.second.setTlwh(prediction);\n }\n\n // getting map keys from the indexes\n unordered_map indexToKey;\n int index = 0;\n for (const auto &pair : strackPool)\n {\n int key = pair.first;\n indexToKey[index] = key;\n ++index;\n }\n\n // First association with IoU\n vector> dists; // IoU distances, maybe change it to mat type?\n dists = getCostMatrix(strackPool, detections);\n\n vector remainTracks;\n vector strackIndex;\n vector detectionsIndex;\n map matches;\n\n matches = lapjv(dists); // returns a map (track_i,matched_det_index)\n // cout << \"\\n Num of matches: \" << matches.size();\n\n // Find unmatched track indexes\n for (int trackIndex = 0; trackIndex < static_cast(strackPool.size()); ++trackIndex)\n {\n if (matches.find(trackIndex) == matches.end())\n {\n strackIndex.push_back(trackIndex);\n }\n }\n\n // Find unmatched detection indexes\n for (int detectionIndex = 0; detectionIndex < static_cast(detections.size());\n ++detectionIndex)\n {\n bool matched = false;\n for (const auto &match : matches)\n {\n int matchedDetectionIndex = match.second;\n if (detectionIndex == matchedDetectionIndex)\n {\n matched = true;\n break;\n }\n }\n if (!matched)\n {\n detectionsIndex.push_back(detectionIndex);\n }\n }\n\n // remain tracks and dets\n for (int i = 0; i < static_cast(strackIndex.size()); i++)\n {\n int key = indexToKey[strackIndex[i]];\n Strack track = strackPool[key];\n remainTracks.push_back(track);\n }\n for (int j = 0; j < static_cast(detectionsIndex.size()); j++)\n {\n remainDets.push_back(detections[detectionsIndex[j]]);\n }\n\n for (auto &pair : matches) // row\n {\n int key = indexToKey[pair.first];\n Strack &track = strackPool[key];\n Strack &detection = detections[pair.second];\n\n // if it's tracked, update it, else reactivate it\n if (track.getState() == TrackState::Tracked)\n {\n track.update(detection);\n activatedStracks.push_back(track);\n }\n else\n {\n track.reactivate(detection, frame_);\n activatedStracks.push_back(track);\n lostStracks_.erase(track.getId());\n }\n }\n\n dists = getCostMatrix(remainTracks, detectionsLow);\n strackIndex.clear();\n detectionsIndex.clear();\n matches = lapjv(dists);\n\n // cout << \"\\n Num of low matches: \" << matches.size();\n\n // Find unmatched track indexes\n for (int trackIndex = 0; trackIndex < static_cast(remainTracks.size()); ++trackIndex)\n {\n if (matches.find(trackIndex) == matches.end())\n {\n strackIndex.push_back(trackIndex);\n }\n }\n\n // Find unmatched detection indexes\n for (int detectionIndex = 0; detectionIndex < static_cast(detectionsLow.size());\n ++detectionIndex)\n {\n bool matched = false;\n for (const auto &match : matches)\n {\n int matchedDetectionIndex = match.second;\n if (detectionIndex == matchedDetectionIndex)\n {\n matched = true;\n break;\n }\n }\n if (!matched)\n {\n detectionsIndex.push_back(detectionIndex);\n }\n }\n\n for (int i = 0; i < static_cast(strackIndex.size()); i++)\n {\n reRemainTracks.push_back(remainTracks[strackIndex[i]]);\n }\n\n for (auto pair : matches) // row\n {\n Strack &track = remainTracks[pair.first];\n Strack &detection = detectionsLow[pair.second];\n\n // if it's tracked, update it, else re_activate it\n if (track.getState() == TrackState::Tracked)\n {\n track.update(detection);\n activatedStracks.push_back(track);\n }\n else\n {\n track.reactivate(detection, frame_);\n activatedStracks.push_back(track);\n lostStracks_.erase(track.getId());\n }\n }\n\n // initialize new tracks\n for (int i = 0; i < static_cast(remainDets.size()); i++)\n {\n Strack newTrack = remainDets[i];\n newTrack.activate(getFrame(), lastId_++);\n activatedStracks.push_back(newTrack);\n }\n joinStracks(activatedStracks, trackedStracks_, true); //\"true\" is replacing in place\n joinStracks(reRemainTracks, lostStracks_, true);\n\n // deal with lost tracks and save them in an attribute\n vector keysToRemove;\n for (auto &track : lostStracks_)\n {\n track.second.incrementTrackletLen();\n if ((track.second.getTrackletLen()) >maxTimeLost_)\n keysToRemove.push_back(track.first);\n else\n track.second.setState(TrackState::Lost);\n }\n\n for (int key : keysToRemove)\n {\n lostStracks_.erase(key);\n }\n\n vector ret;\n for (const auto &pair : trackedStracks_)\n {\n const Strack &strack = pair.second;\n ret.push_back(strack);\n }\n\n return ret;\n}\n\nvoid ByteTrackerImpl::getDetections(vector &objects, vector &detections, \n vector &detectionsLow)\n{\n frame_++; // update frame\n for (const auto &obj : objects)\n {\n Strack strack(obj.box, obj.confidence);\n if (obj.confidence >= trackThreshold_) // Dhigh or Dlow\n {\n detections.push_back(strack);\n }\n else\n {\n detectionsLow.push_back(strack);\n }\n }\n}\n\nvoid ByteTrackerImpl::addNewDetectedTracks(unordered_map trackedMap, \n vector &inactiveStracks, vector &trackedStracks)\n{\n // checks if the trackedStracks are activated to keep them in the vector(same name)\n for (auto pair : trackedMap)\n {\n Strack track = pair.second;\n if (track.getState() == TrackState::Tracked)\n trackedStracks.push_back(track);\n else\n inactiveStracks.push_back(track);\n }\n}\n\nMat ByteTrackerImpl::getCostMatrix(vector &atracks, vector &btracks)\n{\n Mat costMatrix;\n if (atracks.size() == 0 || btracks.size() == 0)\n {\n return costMatrix; // returns empty matrix\n }\n\n vector atlwhs, btlwhs;\n for (auto& track : atracks)\n {\n atlwhs.push_back(track.getTlwh());\n }\n for (auto& track : btracks)\n {\n btlwhs.push_back(track.getTlwh());\n }\n\n costMatrix = calculateIous(atlwhs, btlwhs);\n subtract(1, costMatrix, costMatrix); //costMatrix = 1 - costMatrix\n\n return costMatrix;\n}\n\nMat ByteTrackerImpl::getCostMatrix(unordered_map &atracks, vector &btracks)\n{\n Mat costMatrix;\n if (atracks.size() == 0 && btracks.size() == 0)\n {\n return costMatrix; // returns empty matrix\n }\n\n vector atlwhs, btlwhs;\n for (auto &pair : atracks)\n {\n Rect tlwh = pair.second.getTlwh();\n atlwhs.push_back(tlwh);\n }\n\n for (auto &track : btracks)\n {\n Rect tlwh = track.getTlwh();\n btlwhs.push_back(tlwh);\n }\n\n costMatrix = calculateIous(atlwhs, btlwhs);\n subtract(1, costMatrix, costMatrix); //costMatrix = 1 - costMatrix\n\n return costMatrix;\n}\n\n\nMat ByteTrackerImpl::calculateIous(vector &atlwhs, vector &btlwhs)\n{\n Mat calculateIous;\n if (atlwhs.empty() || btlwhs.empty())\n {\n return calculateIous;\n }\n\n calculateIous.create(static_cast(atlwhs.size()), static_cast(btlwhs.size()), CV_32F);\n \n // bbox_ious\n for (int i = 0; i < static_cast(atlwhs.size()); ++i)\n {\n for (int j = 0; j < static_cast(btlwhs.size()); ++j)\n {\n cv::Rect intersection = atlwhs[i] & btlwhs[j];\n cv::Rect unionRect = atlwhs[i] | btlwhs[j];\n float intersectionArea = intersection.area();\n float unionArea = unionRect.area();\n calculateIous.at(i, j) = intersectionArea / unionArea;\n }\n }\n\n return calculateIous;\n}\n\nunordered_map ByteTrackerImpl::joinStracks(\n const vector& trackA, vector& trackB)\n{\n unordered_map joinedTracks;\n\n for (const auto &track : trackA)\n {\n joinedTracks.emplace(track.getId(), track);\n }\n\n for (const auto &track : trackB)\n {\n joinedTracks.emplace(track.getId(), track);\n }\n\n return joinedTracks;\n}\n\n// overload to receive a hashmap\nunordered_map ByteTrackerImpl::joinStracks(const vector& trackVector,\n unordered_map& trackMap, bool inplace)\n{\n if (inplace)\n {\n for (const auto& track : trackVector)\n {\n trackMap.emplace(track.getId(), track);\n }\n return trackMap;\n }\n\n unordered_map joinedTracks = trackMap;\n for (const auto &track : trackVector)\n {\n joinedTracks.emplace(track.getId(), track);\n }\n\n return joinedTracks;\n\n}\n\n\n\nmap ByteTrackerImpl::lapjv(vector> &cost)\n{\n map ret;\n if (cost.size() == 0 || cost[0].size() == 0)\n return ret;\n int maxI = cost.size();\n int maxJ = cost[0].size();\n int n = max(maxJ, maxI);\n double **cost_ptr;\n double *u = new double[sizeof(double) * n];\n double *v = new double[sizeof(double) * n];\n int *x_c = new int[n];\n int *y_c = new int[n];\n cost_ptr = new double *[sizeof(double *) * n];", - "repo_full_name": "opencv/opencv", - "discussion_comments": [ - { - "comment_id": "1259283684", - "repo_full_name": "opencv/opencv", - "pr_number": 23960, - "pr_file": "modules/video/src/tracking/bytetracker.cpp", - "discussion_id": "1259283684", - "commented_code": "@@ -0,0 +1,533 @@\n+// This file is part of OpenCV project.\n+// It is subject to the license terms in the LICENSE file found in the top-level directory\n+// of this distribution and at http://opencv.org/license.html.\n+\n+\n+\n+#include \"../precomp.hpp\"\n+\n+//#include \"opencv2/video/detail/bytetracker.hpp\"\n+// /#include \"opencv2/video/detail/bytetracker_strack.hpp\"\n+#include \"opencv2/video/detail/lapjv.hpp\"\n+#include \"opencv2/video/detail/tracking.detail.hpp\"\n+#include \n+\n+// #include \"detail/bytetracker.hpp\"\n+// #include \"detail/bytetracker_strack.hpp\"\n+// #include \"detail/lapjv.hpp\"\n+\n+#ifdef HAVE_OPENCV_DNN\n+#include \"opencv2/dnn.hpp\"\n+#endif\n+\n+using namespace std;\n+using namespace cv;\n+\n+namespace cv {\n+\n+\n+//using cv::detail::tracking::Strack;\n+//using cv::detail::tracking::Detection;\n+//using cv::detail::tracking::TrackState;\n+\n+\n+\n+ByteTracker::ByteTracker()\n+{\n+ //nothing\n+}\n+\n+ByteTracker::~ByteTracker()\n+{\n+ //nothing\n+}\n+\n+ByteTracker::Params::Params()\n+{\n+ frameRate = 30;\n+ frameBuffer = 30;\n+}\n+ \n+ \n+\n+\n+\n+class ByteTrackerImpl : public ByteTracker\n+{ \n+public:\n+ //ByteTracker(int, int);\n+ ByteTrackerImpl(const ByteTracker::Params& parameters) : params_(parameters)\n+ {\n+ trackThreshold_ = 0.5f;\n+ matchThreshold_ = 0.7f;\n+ lastId_ = 0;\n+ frame_ = 0;\n+ maxTimeLost_ = static_cast(params_.frameRate / 30.0f * params_.frameBuffer);\n+ }\n+\n+ void init(InputArray image, const Rect& boundingBox);\n+ //std::vector> update(std::vector>)\n+ vector update(vector& objects);\n+ //Scalar get_color(int idx);\n+ int getFrame();\n+ map lapjv(vector> &cost);\n+\n+protected:\n+ ByteTracker::Params params_;\n+ float trackThreshold_;\n+ float matchThreshold_;\n+ unordered_map trackedStracks_;\n+ unordered_map lostStracks_;\n+ int lastId_;\n+ int frame_;\n+ int maxTimeLost_;\n+ KalmanFilter kalmanFilter_;\n+\n+ void getDetections(vector& Objects, vector& detections, \n+ vector& detectionsLow);\n+\n+\n+ void addNewDetectedTracks(unordered_map trackedMap,\n+ vector& inactiveStracks, vector& trackedStracks);\n+\n+\n+ Mat getCostMatrix(vector& tracks, vector& btracks);\n+ Mat getCostMatrix(unordered_map& atracks, vector &btracks);\n+\n+ \n+ Mat calculateIous(vector& atlwhs, vector &btlwhs);\n+\n+\n+ unordered_map joinStracks(const vector& trackA, vector& trackB);\n+ unordered_map joinStracks(const vector& trackVector,\n+ unordered_map& trackMap, bool inplace);\n+\n+};\n+\n+\n+vector ByteTrackerImpl::update(vector &objects)\n+{\n+\n+ // Detetions, Dk = Detections(fk)\n+ vector detections; // consider changing to cv::Mat_\n+ vector detectionsLow;\n+ vector remainDets;\n+ vector activatedStracks;\n+ vector reRemainTracks;\n+\n+ getDetections(objects, detections, detectionsLow); // objects -> D and Dlow\n+\n+ vector inactiveStracks;\n+ vector trackedStracks;\n+\n+ addNewDetectedTracks(trackedStracks_, inactiveStracks, trackedStracks); // trackedStracks_ -> inactive and active\n+\n+ unordered_map strackPool;\n+ strackPool = joinStracks(trackedStracks, lostStracks_, false);\n+ // remember that in the first association we consider the lost tracks too\n+ // we need to predict the tracks to do association\n+ // it updates strackPool with prediction, maybe pass strackPool by reference\n+ for (auto &track : strackPool)\n+ {\n+ cv::Mat prediction = track.second.predict(); // cx cy w h\n+ prediction.at(0, 0) -= prediction.at(2, 0);\n+ prediction.at(1, 0) -= prediction.at(3, 0);\n+ track.second.setTlwh(prediction);\n+ }\n+\n+ // getting map keys from the indexes\n+ unordered_map indexToKey;\n+ int index = 0;\n+ for (const auto &pair : strackPool)\n+ {\n+ int key = pair.first;\n+ indexToKey[index] = key;\n+ ++index;\n+ }\n+\n+ // First association with IoU\n+ vector> dists; // IoU distances, maybe change it to mat type?\n+ dists = getCostMatrix(strackPool, detections);\n+\n+ vector remainTracks;\n+ vector strackIndex;\n+ vector detectionsIndex;\n+ map matches;\n+\n+ matches = lapjv(dists); // returns a map (track_i,matched_det_index)\n+ // cout << \"\\n Num of matches: \" << matches.size();\n+\n+ // Find unmatched track indexes\n+ for (int trackIndex = 0; trackIndex < static_cast(strackPool.size()); ++trackIndex)\n+ {\n+ if (matches.find(trackIndex) == matches.end())\n+ {\n+ strackIndex.push_back(trackIndex);\n+ }\n+ }\n+\n+ // Find unmatched detection indexes\n+ for (int detectionIndex = 0; detectionIndex < static_cast(detections.size());\n+ ++detectionIndex)\n+ {\n+ bool matched = false;\n+ for (const auto &match : matches)\n+ {\n+ int matchedDetectionIndex = match.second;\n+ if (detectionIndex == matchedDetectionIndex)\n+ {\n+ matched = true;\n+ break;\n+ }\n+ }\n+ if (!matched)\n+ {\n+ detectionsIndex.push_back(detectionIndex);\n+ }\n+ }\n+\n+ // remain tracks and dets\n+ for (int i = 0; i < static_cast(strackIndex.size()); i++)\n+ {\n+ int key = indexToKey[strackIndex[i]];\n+ Strack track = strackPool[key];\n+ remainTracks.push_back(track);\n+ }\n+ for (int j = 0; j < static_cast(detectionsIndex.size()); j++)\n+ {\n+ remainDets.push_back(detections[detectionsIndex[j]]);\n+ }\n+\n+ for (auto &pair : matches) // row\n+ {\n+ int key = indexToKey[pair.first];\n+ Strack &track = strackPool[key];\n+ Strack &detection = detections[pair.second];\n+\n+ // if it's tracked, update it, else reactivate it\n+ if (track.getState() == TrackState::Tracked)\n+ {\n+ track.update(detection);\n+ activatedStracks.push_back(track);\n+ }\n+ else\n+ {\n+ track.reactivate(detection, frame_);\n+ activatedStracks.push_back(track);\n+ lostStracks_.erase(track.getId());\n+ }\n+ }\n+\n+ dists = getCostMatrix(remainTracks, detectionsLow);\n+ strackIndex.clear();\n+ detectionsIndex.clear();\n+ matches = lapjv(dists);\n+\n+ // cout << \"\\n Num of low matches: \" << matches.size();\n+\n+ // Find unmatched track indexes\n+ for (int trackIndex = 0; trackIndex < static_cast(remainTracks.size()); ++trackIndex)\n+ {\n+ if (matches.find(trackIndex) == matches.end())\n+ {\n+ strackIndex.push_back(trackIndex);\n+ }\n+ }\n+\n+ // Find unmatched detection indexes\n+ for (int detectionIndex = 0; detectionIndex < static_cast(detectionsLow.size());\n+ ++detectionIndex)\n+ {\n+ bool matched = false;\n+ for (const auto &match : matches)\n+ {\n+ int matchedDetectionIndex = match.second;\n+ if (detectionIndex == matchedDetectionIndex)\n+ {\n+ matched = true;\n+ break;\n+ }\n+ }\n+ if (!matched)\n+ {\n+ detectionsIndex.push_back(detectionIndex);\n+ }\n+ }\n+\n+ for (int i = 0; i < static_cast(strackIndex.size()); i++)\n+ {\n+ reRemainTracks.push_back(remainTracks[strackIndex[i]]);\n+ }\n+\n+ for (auto pair : matches) // row\n+ {\n+ Strack &track = remainTracks[pair.first];\n+ Strack &detection = detectionsLow[pair.second];\n+\n+ // if it's tracked, update it, else re_activate it\n+ if (track.getState() == TrackState::Tracked)\n+ {\n+ track.update(detection);\n+ activatedStracks.push_back(track);\n+ }\n+ else\n+ {\n+ track.reactivate(detection, frame_);\n+ activatedStracks.push_back(track);\n+ lostStracks_.erase(track.getId());\n+ }\n+ }\n+\n+ // initialize new tracks\n+ for (int i = 0; i < static_cast(remainDets.size()); i++)\n+ {\n+ Strack newTrack = remainDets[i];\n+ newTrack.activate(getFrame(), lastId_++);\n+ activatedStracks.push_back(newTrack);\n+ }\n+ joinStracks(activatedStracks, trackedStracks_, true); //\"true\" is replacing in place\n+ joinStracks(reRemainTracks, lostStracks_, true);\n+\n+ // deal with lost tracks and save them in an attribute\n+ vector keysToRemove;\n+ for (auto &track : lostStracks_)\n+ {\n+ track.second.incrementTrackletLen();\n+ if ((track.second.getTrackletLen()) >maxTimeLost_)\n+ keysToRemove.push_back(track.first);\n+ else\n+ track.second.setState(TrackState::Lost);\n+ }\n+\n+ for (int key : keysToRemove)\n+ {\n+ lostStracks_.erase(key);\n+ }\n+\n+ vector ret;\n+ for (const auto &pair : trackedStracks_)\n+ {\n+ const Strack &strack = pair.second;\n+ ret.push_back(strack);\n+ }\n+\n+ return ret;\n+}\n+\n+void ByteTrackerImpl::getDetections(vector &objects, vector &detections, \n+ vector &detectionsLow)\n+{\n+ frame_++; // update frame\n+ for (const auto &obj : objects)\n+ {\n+ Strack strack(obj.box, obj.confidence);\n+ if (obj.confidence >= trackThreshold_) // Dhigh or Dlow\n+ {\n+ detections.push_back(strack);\n+ }\n+ else\n+ {\n+ detectionsLow.push_back(strack);\n+ }\n+ }\n+}\n+\n+void ByteTrackerImpl::addNewDetectedTracks(unordered_map trackedMap, \n+ vector &inactiveStracks, vector &trackedStracks)\n+{\n+ // checks if the trackedStracks are activated to keep them in the vector(same name)\n+ for (auto pair : trackedMap)\n+ {\n+ Strack track = pair.second;\n+ if (track.getState() == TrackState::Tracked)\n+ trackedStracks.push_back(track);\n+ else\n+ inactiveStracks.push_back(track);\n+ }\n+}\n+\n+Mat ByteTrackerImpl::getCostMatrix(vector &atracks, vector &btracks)\n+{\n+ Mat costMatrix;\n+ if (atracks.size() == 0 || btracks.size() == 0)\n+ {\n+ return costMatrix; // returns empty matrix\n+ }\n+\n+ vector atlwhs, btlwhs;\n+ for (auto& track : atracks)\n+ {\n+ atlwhs.push_back(track.getTlwh());\n+ }\n+ for (auto& track : btracks)\n+ {\n+ btlwhs.push_back(track.getTlwh());\n+ }\n+\n+ costMatrix = calculateIous(atlwhs, btlwhs);\n+ subtract(1, costMatrix, costMatrix); //costMatrix = 1 - costMatrix\n+\n+ return costMatrix;\n+}\n+\n+Mat ByteTrackerImpl::getCostMatrix(unordered_map &atracks, vector &btracks)\n+{\n+ Mat costMatrix;\n+ if (atracks.size() == 0 && btracks.size() == 0)\n+ {\n+ return costMatrix; // returns empty matrix\n+ }\n+\n+ vector atlwhs, btlwhs;\n+ for (auto &pair : atracks)\n+ {\n+ Rect tlwh = pair.second.getTlwh();\n+ atlwhs.push_back(tlwh);\n+ }\n+\n+ for (auto &track : btracks)\n+ {\n+ Rect tlwh = track.getTlwh();\n+ btlwhs.push_back(tlwh);\n+ }\n+\n+ costMatrix = calculateIous(atlwhs, btlwhs);\n+ subtract(1, costMatrix, costMatrix); //costMatrix = 1 - costMatrix\n+\n+ return costMatrix;\n+}\n+\n+\n+Mat ByteTrackerImpl::calculateIous(vector &atlwhs, vector &btlwhs)\n+{\n+ Mat calculateIous;\n+ if (atlwhs.empty() || btlwhs.empty())\n+ {\n+ return calculateIous;\n+ }\n+\n+ calculateIous.create(static_cast(atlwhs.size()), static_cast(btlwhs.size()), CV_32F);\n+ \n+ // bbox_ious\n+ for (int i = 0; i < static_cast(atlwhs.size()); ++i)\n+ {\n+ for (int j = 0; j < static_cast(btlwhs.size()); ++j)\n+ {\n+ cv::Rect intersection = atlwhs[i] & btlwhs[j];\n+ cv::Rect unionRect = atlwhs[i] | btlwhs[j];\n+ float intersectionArea = intersection.area();\n+ float unionArea = unionRect.area();\n+ calculateIous.at(i, j) = intersectionArea / unionArea;\n+ }\n+ }\n+\n+ return calculateIous;\n+}\n+\n+unordered_map ByteTrackerImpl::joinStracks(\n+ const vector& trackA, vector& trackB)\n+{\n+ unordered_map joinedTracks;\n+\n+ for (const auto &track : trackA)\n+ {\n+ joinedTracks.emplace(track.getId(), track);\n+ }\n+\n+ for (const auto &track : trackB)\n+ {\n+ joinedTracks.emplace(track.getId(), track);\n+ }\n+\n+ return joinedTracks;\n+}\n+\n+// overload to receive a hashmap\n+unordered_map ByteTrackerImpl::joinStracks(const vector& trackVector,\n+ unordered_map& trackMap, bool inplace)\n+{\n+ if (inplace)\n+ {\n+ for (const auto& track : trackVector)\n+ {\n+ trackMap.emplace(track.getId(), track);\n+ }\n+ return trackMap;\n+ }\n+\n+ unordered_map joinedTracks = trackMap;\n+ for (const auto &track : trackVector)\n+ {\n+ joinedTracks.emplace(track.getId(), track);\n+ }\n+\n+ return joinedTracks;\n+\n+}\n+\n+\n+\n+map ByteTrackerImpl::lapjv(vector> &cost)\n+{\n+ map ret;\n+ if (cost.size() == 0 || cost[0].size() == 0)\n+ return ret;\n+ int maxI = cost.size();\n+ int maxJ = cost[0].size();\n+ int n = max(maxJ, maxI);\n+ double **cost_ptr;\n+ double *u = new double[sizeof(double) * n];\n+ double *v = new double[sizeof(double) * n];\n+ int *x_c = new int[n];\n+ int *y_c = new int[n];\n+ cost_ptr = new double *[sizeof(double *) * n];", - "comment_created_at": "2023-07-11T07:07:13+00:00", - "comment_author": "asmorkalov", - "comment_body": "Pleas use `cv::AutoBuffer` or `std::vector` for temporary buffers instead new/delete to prevent memory leaks. `cv::AutoBuffer` is preferable, it may use stack space, if buffer is small enough.", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/opencv-thread-safe-resource-cleanup.json b/_reviewers/opencv-thread-safe-resource-cleanup.json new file mode 100644 index 0000000..64ddc61 --- /dev/null +++ b/_reviewers/opencv-thread-safe-resource-cleanup.json @@ -0,0 +1,70 @@ +[ + { + "discussion_id": "1942571100", + "pr_number": 23467, + "pr_file": "modules/core/misc/java/src/java/CustomCleaner.java", + "created_at": "2025-02-05T09:59:55+00:00", + "commented_code": "package org.opencv.core;\n\nimport java.lang.ref.PhantomReference;\nimport java.lang.ref.ReferenceQueue;\nimport java.util.Objects;\nimport java.util.concurrent.atomic.AtomicInteger;\n\n/**\n * This class is a Java 8+ implementation Cleaner similar to java.lang.ref.Cleaner available\n * from Java 9.\n *

\n * This implementation replace finalize() method that is deprecated since Java 9 and for removal\n * since Java 18\n *

\n * When OpenCV has Java 8 as its minimum version, this class can be removed and replaced by java.lang.ref.Cleaner.\n * In Mat, public static final Cleaner cleaner = Cleaner.create();\n */\npublic final class CustomCleaner {\n\n final PhantomCleanable phantomCleanableList;\n\n // The ReferenceQueue of pending cleaning actions\n final ReferenceQueue queue;\n\n private CustomCleaner() {\n queue = new ReferenceQueue<>();\n phantomCleanableList = new PhantomCleanable();\n }\n\n public static CustomCleaner create() {\n CustomCleaner customCleaner = new CustomCleaner();\n customCleaner.start();\n return customCleaner;\n }\n\n public Cleanable register(Object obj, Runnable action) {\n return new PhantomCleanable(Objects.requireNonNull(obj), Objects.requireNonNull(action));\n }\n\n private void start() {\n new PhantomCleanable(this, null);\n Thread thread = new CleanerThread(() -> {\n while (!phantomCleanableList.isListEmpty()) {\n try {\n Cleanable ref = (Cleanable) queue.remove(60 * 1000L);", + "repo_full_name": "opencv/opencv", + "discussion_comments": [ + { + "comment_id": "1942571100", + "repo_full_name": "opencv/opencv", + "pr_number": 23467, + "pr_file": "modules/core/misc/java/src/java/CustomCleaner.java", + "discussion_id": "1942571100", + "commented_code": "@@ -0,0 +1,149 @@\n+package org.opencv.core;\n+\n+import java.lang.ref.PhantomReference;\n+import java.lang.ref.ReferenceQueue;\n+import java.util.Objects;\n+import java.util.concurrent.atomic.AtomicInteger;\n+\n+/**\n+ * This class is a Java 8+ implementation Cleaner similar to java.lang.ref.Cleaner available\n+ * from Java 9.\n+ *

\n+ * This implementation replace finalize() method that is deprecated since Java 9 and for removal\n+ * since Java 18\n+ *

\n+ * When OpenCV has Java 8 as its minimum version, this class can be removed and replaced by java.lang.ref.Cleaner.\n+ * In Mat, public static final Cleaner cleaner = Cleaner.create();\n+ */\n+public final class CustomCleaner {\n+\n+ final PhantomCleanable phantomCleanableList;\n+\n+ // The ReferenceQueue of pending cleaning actions\n+ final ReferenceQueue queue;\n+\n+ private CustomCleaner() {\n+ queue = new ReferenceQueue<>();\n+ phantomCleanableList = new PhantomCleanable();\n+ }\n+\n+ public static CustomCleaner create() {\n+ CustomCleaner customCleaner = new CustomCleaner();\n+ customCleaner.start();\n+ return customCleaner;\n+ }\n+\n+ public Cleanable register(Object obj, Runnable action) {\n+ return new PhantomCleanable(Objects.requireNonNull(obj), Objects.requireNonNull(action));\n+ }\n+\n+ private void start() {\n+ new PhantomCleanable(this, null);\n+ Thread thread = new CleanerThread(() -> {\n+ while (!phantomCleanableList.isListEmpty()) {\n+ try {\n+ Cleanable ref = (Cleanable) queue.remove(60 * 1000L);", + "comment_created_at": "2025-02-05T09:59:55+00:00", + "comment_author": "opencv-alalek", + "comment_body": "Why do we need timeout?", + "pr_file_module": null + }, + { + "comment_id": "1998331173", + "repo_full_name": "opencv/opencv", + "pr_number": 23467, + "pr_file": "modules/core/misc/java/src/java/CustomCleaner.java", + "discussion_id": "1942571100", + "commented_code": "@@ -0,0 +1,149 @@\n+package org.opencv.core;\n+\n+import java.lang.ref.PhantomReference;\n+import java.lang.ref.ReferenceQueue;\n+import java.util.Objects;\n+import java.util.concurrent.atomic.AtomicInteger;\n+\n+/**\n+ * This class is a Java 8+ implementation Cleaner similar to java.lang.ref.Cleaner available\n+ * from Java 9.\n+ *

\n+ * This implementation replace finalize() method that is deprecated since Java 9 and for removal\n+ * since Java 18\n+ *

\n+ * When OpenCV has Java 8 as its minimum version, this class can be removed and replaced by java.lang.ref.Cleaner.\n+ * In Mat, public static final Cleaner cleaner = Cleaner.create();\n+ */\n+public final class CustomCleaner {\n+\n+ final PhantomCleanable phantomCleanableList;\n+\n+ // The ReferenceQueue of pending cleaning actions\n+ final ReferenceQueue queue;\n+\n+ private CustomCleaner() {\n+ queue = new ReferenceQueue<>();\n+ phantomCleanableList = new PhantomCleanable();\n+ }\n+\n+ public static CustomCleaner create() {\n+ CustomCleaner customCleaner = new CustomCleaner();\n+ customCleaner.start();\n+ return customCleaner;\n+ }\n+\n+ public Cleanable register(Object obj, Runnable action) {\n+ return new PhantomCleanable(Objects.requireNonNull(obj), Objects.requireNonNull(action));\n+ }\n+\n+ private void start() {\n+ new PhantomCleanable(this, null);\n+ Thread thread = new CleanerThread(() -> {\n+ while (!phantomCleanableList.isListEmpty()) {\n+ try {\n+ Cleanable ref = (Cleanable) queue.remove(60 * 1000L);", + "comment_created_at": "2025-03-17T09:39:27+00:00", + "comment_author": "AlexanderSchuetz97", + "comment_body": "Nroduit uses a long polling pattern here. Basically as explained above phantomCleanableList.isListEmpty() is only true when no reference to CustomCleaner exists. And if that is the case then the thread should stop because without a reference to the CustomerCleaner object enquening further cleaners is impossible.\r\n\r\nMy thoughts on this implementation:\r\nIts not how I would have implemented this, but its a valid way of doing it, possibly even the better way.\r\nNroduit has implemented this entire thing in a way that multiple cleaners and therefore multiple cleaner threads can be created. Openjdk/Oracle? recommends (somewhere in the jdk 9 'Cleaner' javadoc, which is what we are trying to re-create here) to only use exactly 1 cleaner per library. I would have not bothered with this complexity of implementing \"stopping\" of the cleaner and made it never stop once started. Then you wouldnt need a timeout. The only scenario I can think of where stopping would theoretically be required is if you wanted to support class unloading of opencv. I highly doubt that this would be a good idea as I dont think you guys properly implemented JNI_OnUnload on the native side to not leak memory. I think 95% of people dont use class unloading in their applications anyways, so its not really a problem.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1943671042", + "pr_number": 23467, + "pr_file": "modules/core/misc/java/src/java/CustomCleaner.java", + "created_at": "2025-02-05T21:14:46+00:00", + "commented_code": "package org.opencv.core;\n\nimport java.lang.ref.PhantomReference;\nimport java.lang.ref.ReferenceQueue;\nimport java.util.Objects;\nimport java.util.concurrent.atomic.AtomicInteger;\n\n/**\n * This class is a Java 8+ implementation Cleaner similar to java.lang.ref.Cleaner available\n * from Java 9.\n *

\n * This implementation replace finalize() method that is deprecated since Java 9 and for removal\n * since Java 18\n *

\n * When OpenCV has Java 8 as its minimum version, this class can be removed and replaced by java.lang.ref.Cleaner.\n * In Mat, public static final Cleaner cleaner = Cleaner.create();\n */\npublic final class CustomCleaner {\n\n final PhantomCleanable phantomCleanableList;\n\n // The ReferenceQueue of pending cleaning actions\n final ReferenceQueue queue;\n\n private CustomCleaner() {\n queue = new ReferenceQueue<>();\n phantomCleanableList = new PhantomCleanable();\n }\n\n public static CustomCleaner create() {\n CustomCleaner customCleaner = new CustomCleaner();\n customCleaner.start();\n return customCleaner;\n }\n\n public Cleanable register(Object obj, Runnable action) {\n return new PhantomCleanable(Objects.requireNonNull(obj), Objects.requireNonNull(action));\n }\n\n private void start() {\n new PhantomCleanable(this, null);\n Thread thread = new CleanerThread(() -> {\n while (!phantomCleanableList.isListEmpty()) {\n try {\n Cleanable ref = (Cleanable) queue.remove(60 * 1000L);\n if (ref != null) {\n ref.clean();\n }\n } catch (Throwable e) {\n // ignore exceptions\n }\n }\n } );\n thread.setDaemon(true);\n thread.start();\n }\n\n static final class CleanerThread extends Thread {\n\n private static final AtomicInteger threadNumber = new AtomicInteger(1);\n\n // ensure run method is run only once\n private volatile boolean hasRun;\n\n public CleanerThread(Runnable runnable) {\n super(runnable, \"CustomCleaner-\" + threadNumber.getAndIncrement());\n }\n\n @Override\n public void run() {\n if (Thread.currentThread() == this && !hasRun) {\n hasRun = true;\n super.run();\n }\n }\n }\n\n\n public interface Cleanable {\n void clean();\n }\n\n class PhantomCleanable extends PhantomReference implements Cleanable {\n\n private final Runnable action;\n PhantomCleanable prev = this;\n PhantomCleanable next = this;\n\n private final PhantomCleanable list;\n\n public PhantomCleanable(Object referent, Runnable action) {\n super(Objects.requireNonNull(referent), queue);\n this.list = phantomCleanableList;", + "repo_full_name": "opencv/opencv", + "discussion_comments": [ + { + "comment_id": "1943671042", + "repo_full_name": "opencv/opencv", + "pr_number": 23467, + "pr_file": "modules/core/misc/java/src/java/CustomCleaner.java", + "discussion_id": "1943671042", + "commented_code": "@@ -0,0 +1,149 @@\n+package org.opencv.core;\n+\n+import java.lang.ref.PhantomReference;\n+import java.lang.ref.ReferenceQueue;\n+import java.util.Objects;\n+import java.util.concurrent.atomic.AtomicInteger;\n+\n+/**\n+ * This class is a Java 8+ implementation Cleaner similar to java.lang.ref.Cleaner available\n+ * from Java 9.\n+ *

\n+ * This implementation replace finalize() method that is deprecated since Java 9 and for removal\n+ * since Java 18\n+ *

\n+ * When OpenCV has Java 8 as its minimum version, this class can be removed and replaced by java.lang.ref.Cleaner.\n+ * In Mat, public static final Cleaner cleaner = Cleaner.create();\n+ */\n+public final class CustomCleaner {\n+\n+ final PhantomCleanable phantomCleanableList;\n+\n+ // The ReferenceQueue of pending cleaning actions\n+ final ReferenceQueue queue;\n+\n+ private CustomCleaner() {\n+ queue = new ReferenceQueue<>();\n+ phantomCleanableList = new PhantomCleanable();\n+ }\n+\n+ public static CustomCleaner create() {\n+ CustomCleaner customCleaner = new CustomCleaner();\n+ customCleaner.start();\n+ return customCleaner;\n+ }\n+\n+ public Cleanable register(Object obj, Runnable action) {\n+ return new PhantomCleanable(Objects.requireNonNull(obj), Objects.requireNonNull(action));\n+ }\n+\n+ private void start() {\n+ new PhantomCleanable(this, null);\n+ Thread thread = new CleanerThread(() -> {\n+ while (!phantomCleanableList.isListEmpty()) {\n+ try {\n+ Cleanable ref = (Cleanable) queue.remove(60 * 1000L);\n+ if (ref != null) {\n+ ref.clean();\n+ }\n+ } catch (Throwable e) {\n+ // ignore exceptions\n+ }\n+ }\n+ } );\n+ thread.setDaemon(true);\n+ thread.start();\n+ }\n+\n+ static final class CleanerThread extends Thread {\n+\n+ private static final AtomicInteger threadNumber = new AtomicInteger(1);\n+\n+ // ensure run method is run only once\n+ private volatile boolean hasRun;\n+\n+ public CleanerThread(Runnable runnable) {\n+ super(runnable, \"CustomCleaner-\" + threadNumber.getAndIncrement());\n+ }\n+\n+ @Override\n+ public void run() {\n+ if (Thread.currentThread() == this && !hasRun) {\n+ hasRun = true;\n+ super.run();\n+ }\n+ }\n+ }\n+\n+\n+ public interface Cleanable {\n+ void clean();\n+ }\n+\n+ class PhantomCleanable extends PhantomReference implements Cleanable {\n+\n+ private final Runnable action;\n+ PhantomCleanable prev = this;\n+ PhantomCleanable next = this;\n+\n+ private final PhantomCleanable list;\n+\n+ public PhantomCleanable(Object referent, Runnable action) {\n+ super(Objects.requireNonNull(referent), queue);\n+ this.list = phantomCleanableList;", + "comment_created_at": "2025-02-05T21:14:46+00:00", + "comment_author": "opencv-alalek", + "comment_body": "Why do we need additional \"list\" if there is already \"queue\"?\r\n\r\nAny link on \"reference implementation code\"?\r\n\r\n**UPD**: E.g, https://stackoverflow.com/a/14450693 - there are no any extra lists.", + "pr_file_module": null + }, + { + "comment_id": "1998301635", + "repo_full_name": "opencv/opencv", + "pr_number": 23467, + "pr_file": "modules/core/misc/java/src/java/CustomCleaner.java", + "discussion_id": "1943671042", + "commented_code": "@@ -0,0 +1,149 @@\n+package org.opencv.core;\n+\n+import java.lang.ref.PhantomReference;\n+import java.lang.ref.ReferenceQueue;\n+import java.util.Objects;\n+import java.util.concurrent.atomic.AtomicInteger;\n+\n+/**\n+ * This class is a Java 8+ implementation Cleaner similar to java.lang.ref.Cleaner available\n+ * from Java 9.\n+ *

\n+ * This implementation replace finalize() method that is deprecated since Java 9 and for removal\n+ * since Java 18\n+ *

\n+ * When OpenCV has Java 8 as its minimum version, this class can be removed and replaced by java.lang.ref.Cleaner.\n+ * In Mat, public static final Cleaner cleaner = Cleaner.create();\n+ */\n+public final class CustomCleaner {\n+\n+ final PhantomCleanable phantomCleanableList;\n+\n+ // The ReferenceQueue of pending cleaning actions\n+ final ReferenceQueue queue;\n+\n+ private CustomCleaner() {\n+ queue = new ReferenceQueue<>();\n+ phantomCleanableList = new PhantomCleanable();\n+ }\n+\n+ public static CustomCleaner create() {\n+ CustomCleaner customCleaner = new CustomCleaner();\n+ customCleaner.start();\n+ return customCleaner;\n+ }\n+\n+ public Cleanable register(Object obj, Runnable action) {\n+ return new PhantomCleanable(Objects.requireNonNull(obj), Objects.requireNonNull(action));\n+ }\n+\n+ private void start() {\n+ new PhantomCleanable(this, null);\n+ Thread thread = new CleanerThread(() -> {\n+ while (!phantomCleanableList.isListEmpty()) {\n+ try {\n+ Cleanable ref = (Cleanable) queue.remove(60 * 1000L);\n+ if (ref != null) {\n+ ref.clean();\n+ }\n+ } catch (Throwable e) {\n+ // ignore exceptions\n+ }\n+ }\n+ } );\n+ thread.setDaemon(true);\n+ thread.start();\n+ }\n+\n+ static final class CleanerThread extends Thread {\n+\n+ private static final AtomicInteger threadNumber = new AtomicInteger(1);\n+\n+ // ensure run method is run only once\n+ private volatile boolean hasRun;\n+\n+ public CleanerThread(Runnable runnable) {\n+ super(runnable, \"CustomCleaner-\" + threadNumber.getAndIncrement());\n+ }\n+\n+ @Override\n+ public void run() {\n+ if (Thread.currentThread() == this && !hasRun) {\n+ hasRun = true;\n+ super.run();\n+ }\n+ }\n+ }\n+\n+\n+ public interface Cleanable {\n+ void clean();\n+ }\n+\n+ class PhantomCleanable extends PhantomReference implements Cleanable {\n+\n+ private final Runnable action;\n+ PhantomCleanable prev = this;\n+ PhantomCleanable next = this;\n+\n+ private final PhantomCleanable list;\n+\n+ public PhantomCleanable(Object referent, Runnable action) {\n+ super(Objects.requireNonNull(referent), queue);\n+ this.list = phantomCleanableList;", + "comment_created_at": "2025-03-17T09:23:00+00:00", + "comment_author": "AlexanderSchuetz97", + "comment_body": "I am not the Author of this code, but I feel qualified to comment anyways.\r\n\r\nThis is the correct way to implement this.\r\n\r\nYou need to ensure that the cleaner itself outlives the resource you want to clean.\r\nThe \"ReferenceQueue\" itself does not do that. Therefore you need a different datastructure like a list or hashmap.\r\nIt needs to be a double linked list or hashmap because you have no idea in what order the elements are to be removed. So you need to support fast removal of elements in the center. Making the cleaners themselves (they know when to be removed) the node in a list as done here is the canonical way to do it.\r\n\r\nIn the link you posted, which is a short example, the caller ensures that the cleaner reference remains on the stack and thereforce not garbage collected. This approch is fine if you know how many elements you need to clean (in the case of the example that number is 1.) However this is not possible for opencv's case you have no knowledge on how many mat's are create and you also do not want to force ever user of opencv to change their code to also keep the cleaner alive for longer than the mat.", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/opencv-thread-safe-resource-cleanup.md b/_reviewers/opencv-thread-safe-resource-cleanup.md index ea75d1d..7f8d7f2 100644 --- a/_reviewers/opencv-thread-safe-resource-cleanup.md +++ b/_reviewers/opencv-thread-safe-resource-cleanup.md @@ -59,75 +59,3 @@ public final class SafeResourceCleaner { ``` This approach is particularly important for concurrent applications where thread safety must be maintained during resource cleanup, and it provides a more reliable alternative to the deprecated finalize() method. - - -[ - { - "discussion_id": "1942571100", - "pr_number": 23467, - "pr_file": "modules/core/misc/java/src/java/CustomCleaner.java", - "created_at": "2025-02-05T09:59:55+00:00", - "commented_code": "package org.opencv.core;\n\nimport java.lang.ref.PhantomReference;\nimport java.lang.ref.ReferenceQueue;\nimport java.util.Objects;\nimport java.util.concurrent.atomic.AtomicInteger;\n\n/**\n * This class is a Java 8+ implementation Cleaner similar to java.lang.ref.Cleaner available\n * from Java 9.\n *

\n * This implementation replace finalize() method that is deprecated since Java 9 and for removal\n * since Java 18\n *

\n * When OpenCV has Java 8 as its minimum version, this class can be removed and replaced by java.lang.ref.Cleaner.\n * In Mat, public static final Cleaner cleaner = Cleaner.create();\n */\npublic final class CustomCleaner {\n\n final PhantomCleanable phantomCleanableList;\n\n // The ReferenceQueue of pending cleaning actions\n final ReferenceQueue queue;\n\n private CustomCleaner() {\n queue = new ReferenceQueue<>();\n phantomCleanableList = new PhantomCleanable();\n }\n\n public static CustomCleaner create() {\n CustomCleaner customCleaner = new CustomCleaner();\n customCleaner.start();\n return customCleaner;\n }\n\n public Cleanable register(Object obj, Runnable action) {\n return new PhantomCleanable(Objects.requireNonNull(obj), Objects.requireNonNull(action));\n }\n\n private void start() {\n new PhantomCleanable(this, null);\n Thread thread = new CleanerThread(() -> {\n while (!phantomCleanableList.isListEmpty()) {\n try {\n Cleanable ref = (Cleanable) queue.remove(60 * 1000L);", - "repo_full_name": "opencv/opencv", - "discussion_comments": [ - { - "comment_id": "1942571100", - "repo_full_name": "opencv/opencv", - "pr_number": 23467, - "pr_file": "modules/core/misc/java/src/java/CustomCleaner.java", - "discussion_id": "1942571100", - "commented_code": "@@ -0,0 +1,149 @@\n+package org.opencv.core;\n+\n+import java.lang.ref.PhantomReference;\n+import java.lang.ref.ReferenceQueue;\n+import java.util.Objects;\n+import java.util.concurrent.atomic.AtomicInteger;\n+\n+/**\n+ * This class is a Java 8+ implementation Cleaner similar to java.lang.ref.Cleaner available\n+ * from Java 9.\n+ *

\n+ * This implementation replace finalize() method that is deprecated since Java 9 and for removal\n+ * since Java 18\n+ *

\n+ * When OpenCV has Java 8 as its minimum version, this class can be removed and replaced by java.lang.ref.Cleaner.\n+ * In Mat, public static final Cleaner cleaner = Cleaner.create();\n+ */\n+public final class CustomCleaner {\n+\n+ final PhantomCleanable phantomCleanableList;\n+\n+ // The ReferenceQueue of pending cleaning actions\n+ final ReferenceQueue queue;\n+\n+ private CustomCleaner() {\n+ queue = new ReferenceQueue<>();\n+ phantomCleanableList = new PhantomCleanable();\n+ }\n+\n+ public static CustomCleaner create() {\n+ CustomCleaner customCleaner = new CustomCleaner();\n+ customCleaner.start();\n+ return customCleaner;\n+ }\n+\n+ public Cleanable register(Object obj, Runnable action) {\n+ return new PhantomCleanable(Objects.requireNonNull(obj), Objects.requireNonNull(action));\n+ }\n+\n+ private void start() {\n+ new PhantomCleanable(this, null);\n+ Thread thread = new CleanerThread(() -> {\n+ while (!phantomCleanableList.isListEmpty()) {\n+ try {\n+ Cleanable ref = (Cleanable) queue.remove(60 * 1000L);", - "comment_created_at": "2025-02-05T09:59:55+00:00", - "comment_author": "opencv-alalek", - "comment_body": "Why do we need timeout?", - "pr_file_module": null - }, - { - "comment_id": "1998331173", - "repo_full_name": "opencv/opencv", - "pr_number": 23467, - "pr_file": "modules/core/misc/java/src/java/CustomCleaner.java", - "discussion_id": "1942571100", - "commented_code": "@@ -0,0 +1,149 @@\n+package org.opencv.core;\n+\n+import java.lang.ref.PhantomReference;\n+import java.lang.ref.ReferenceQueue;\n+import java.util.Objects;\n+import java.util.concurrent.atomic.AtomicInteger;\n+\n+/**\n+ * This class is a Java 8+ implementation Cleaner similar to java.lang.ref.Cleaner available\n+ * from Java 9.\n+ *

\n+ * This implementation replace finalize() method that is deprecated since Java 9 and for removal\n+ * since Java 18\n+ *

\n+ * When OpenCV has Java 8 as its minimum version, this class can be removed and replaced by java.lang.ref.Cleaner.\n+ * In Mat, public static final Cleaner cleaner = Cleaner.create();\n+ */\n+public final class CustomCleaner {\n+\n+ final PhantomCleanable phantomCleanableList;\n+\n+ // The ReferenceQueue of pending cleaning actions\n+ final ReferenceQueue queue;\n+\n+ private CustomCleaner() {\n+ queue = new ReferenceQueue<>();\n+ phantomCleanableList = new PhantomCleanable();\n+ }\n+\n+ public static CustomCleaner create() {\n+ CustomCleaner customCleaner = new CustomCleaner();\n+ customCleaner.start();\n+ return customCleaner;\n+ }\n+\n+ public Cleanable register(Object obj, Runnable action) {\n+ return new PhantomCleanable(Objects.requireNonNull(obj), Objects.requireNonNull(action));\n+ }\n+\n+ private void start() {\n+ new PhantomCleanable(this, null);\n+ Thread thread = new CleanerThread(() -> {\n+ while (!phantomCleanableList.isListEmpty()) {\n+ try {\n+ Cleanable ref = (Cleanable) queue.remove(60 * 1000L);", - "comment_created_at": "2025-03-17T09:39:27+00:00", - "comment_author": "AlexanderSchuetz97", - "comment_body": "Nroduit uses a long polling pattern here. Basically as explained above phantomCleanableList.isListEmpty() is only true when no reference to CustomCleaner exists. And if that is the case then the thread should stop because without a reference to the CustomerCleaner object enquening further cleaners is impossible.\r\n\r\nMy thoughts on this implementation:\r\nIts not how I would have implemented this, but its a valid way of doing it, possibly even the better way.\r\nNroduit has implemented this entire thing in a way that multiple cleaners and therefore multiple cleaner threads can be created. Openjdk/Oracle? recommends (somewhere in the jdk 9 'Cleaner' javadoc, which is what we are trying to re-create here) to only use exactly 1 cleaner per library. I would have not bothered with this complexity of implementing \"stopping\" of the cleaner and made it never stop once started. Then you wouldnt need a timeout. The only scenario I can think of where stopping would theoretically be required is if you wanted to support class unloading of opencv. I highly doubt that this would be a good idea as I dont think you guys properly implemented JNI_OnUnload on the native side to not leak memory. I think 95% of people dont use class unloading in their applications anyways, so its not really a problem.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1943671042", - "pr_number": 23467, - "pr_file": "modules/core/misc/java/src/java/CustomCleaner.java", - "created_at": "2025-02-05T21:14:46+00:00", - "commented_code": "package org.opencv.core;\n\nimport java.lang.ref.PhantomReference;\nimport java.lang.ref.ReferenceQueue;\nimport java.util.Objects;\nimport java.util.concurrent.atomic.AtomicInteger;\n\n/**\n * This class is a Java 8+ implementation Cleaner similar to java.lang.ref.Cleaner available\n * from Java 9.\n *

\n * This implementation replace finalize() method that is deprecated since Java 9 and for removal\n * since Java 18\n *

\n * When OpenCV has Java 8 as its minimum version, this class can be removed and replaced by java.lang.ref.Cleaner.\n * In Mat, public static final Cleaner cleaner = Cleaner.create();\n */\npublic final class CustomCleaner {\n\n final PhantomCleanable phantomCleanableList;\n\n // The ReferenceQueue of pending cleaning actions\n final ReferenceQueue queue;\n\n private CustomCleaner() {\n queue = new ReferenceQueue<>();\n phantomCleanableList = new PhantomCleanable();\n }\n\n public static CustomCleaner create() {\n CustomCleaner customCleaner = new CustomCleaner();\n customCleaner.start();\n return customCleaner;\n }\n\n public Cleanable register(Object obj, Runnable action) {\n return new PhantomCleanable(Objects.requireNonNull(obj), Objects.requireNonNull(action));\n }\n\n private void start() {\n new PhantomCleanable(this, null);\n Thread thread = new CleanerThread(() -> {\n while (!phantomCleanableList.isListEmpty()) {\n try {\n Cleanable ref = (Cleanable) queue.remove(60 * 1000L);\n if (ref != null) {\n ref.clean();\n }\n } catch (Throwable e) {\n // ignore exceptions\n }\n }\n } );\n thread.setDaemon(true);\n thread.start();\n }\n\n static final class CleanerThread extends Thread {\n\n private static final AtomicInteger threadNumber = new AtomicInteger(1);\n\n // ensure run method is run only once\n private volatile boolean hasRun;\n\n public CleanerThread(Runnable runnable) {\n super(runnable, \"CustomCleaner-\" + threadNumber.getAndIncrement());\n }\n\n @Override\n public void run() {\n if (Thread.currentThread() == this && !hasRun) {\n hasRun = true;\n super.run();\n }\n }\n }\n\n\n public interface Cleanable {\n void clean();\n }\n\n class PhantomCleanable extends PhantomReference implements Cleanable {\n\n private final Runnable action;\n PhantomCleanable prev = this;\n PhantomCleanable next = this;\n\n private final PhantomCleanable list;\n\n public PhantomCleanable(Object referent, Runnable action) {\n super(Objects.requireNonNull(referent), queue);\n this.list = phantomCleanableList;", - "repo_full_name": "opencv/opencv", - "discussion_comments": [ - { - "comment_id": "1943671042", - "repo_full_name": "opencv/opencv", - "pr_number": 23467, - "pr_file": "modules/core/misc/java/src/java/CustomCleaner.java", - "discussion_id": "1943671042", - "commented_code": "@@ -0,0 +1,149 @@\n+package org.opencv.core;\n+\n+import java.lang.ref.PhantomReference;\n+import java.lang.ref.ReferenceQueue;\n+import java.util.Objects;\n+import java.util.concurrent.atomic.AtomicInteger;\n+\n+/**\n+ * This class is a Java 8+ implementation Cleaner similar to java.lang.ref.Cleaner available\n+ * from Java 9.\n+ *

\n+ * This implementation replace finalize() method that is deprecated since Java 9 and for removal\n+ * since Java 18\n+ *

\n+ * When OpenCV has Java 8 as its minimum version, this class can be removed and replaced by java.lang.ref.Cleaner.\n+ * In Mat, public static final Cleaner cleaner = Cleaner.create();\n+ */\n+public final class CustomCleaner {\n+\n+ final PhantomCleanable phantomCleanableList;\n+\n+ // The ReferenceQueue of pending cleaning actions\n+ final ReferenceQueue queue;\n+\n+ private CustomCleaner() {\n+ queue = new ReferenceQueue<>();\n+ phantomCleanableList = new PhantomCleanable();\n+ }\n+\n+ public static CustomCleaner create() {\n+ CustomCleaner customCleaner = new CustomCleaner();\n+ customCleaner.start();\n+ return customCleaner;\n+ }\n+\n+ public Cleanable register(Object obj, Runnable action) {\n+ return new PhantomCleanable(Objects.requireNonNull(obj), Objects.requireNonNull(action));\n+ }\n+\n+ private void start() {\n+ new PhantomCleanable(this, null);\n+ Thread thread = new CleanerThread(() -> {\n+ while (!phantomCleanableList.isListEmpty()) {\n+ try {\n+ Cleanable ref = (Cleanable) queue.remove(60 * 1000L);\n+ if (ref != null) {\n+ ref.clean();\n+ }\n+ } catch (Throwable e) {\n+ // ignore exceptions\n+ }\n+ }\n+ } );\n+ thread.setDaemon(true);\n+ thread.start();\n+ }\n+\n+ static final class CleanerThread extends Thread {\n+\n+ private static final AtomicInteger threadNumber = new AtomicInteger(1);\n+\n+ // ensure run method is run only once\n+ private volatile boolean hasRun;\n+\n+ public CleanerThread(Runnable runnable) {\n+ super(runnable, \"CustomCleaner-\" + threadNumber.getAndIncrement());\n+ }\n+\n+ @Override\n+ public void run() {\n+ if (Thread.currentThread() == this && !hasRun) {\n+ hasRun = true;\n+ super.run();\n+ }\n+ }\n+ }\n+\n+\n+ public interface Cleanable {\n+ void clean();\n+ }\n+\n+ class PhantomCleanable extends PhantomReference implements Cleanable {\n+\n+ private final Runnable action;\n+ PhantomCleanable prev = this;\n+ PhantomCleanable next = this;\n+\n+ private final PhantomCleanable list;\n+\n+ public PhantomCleanable(Object referent, Runnable action) {\n+ super(Objects.requireNonNull(referent), queue);\n+ this.list = phantomCleanableList;", - "comment_created_at": "2025-02-05T21:14:46+00:00", - "comment_author": "opencv-alalek", - "comment_body": "Why do we need additional \"list\" if there is already \"queue\"?\r\n\r\nAny link on \"reference implementation code\"?\r\n\r\n**UPD**: E.g, https://stackoverflow.com/a/14450693 - there are no any extra lists.", - "pr_file_module": null - }, - { - "comment_id": "1998301635", - "repo_full_name": "opencv/opencv", - "pr_number": 23467, - "pr_file": "modules/core/misc/java/src/java/CustomCleaner.java", - "discussion_id": "1943671042", - "commented_code": "@@ -0,0 +1,149 @@\n+package org.opencv.core;\n+\n+import java.lang.ref.PhantomReference;\n+import java.lang.ref.ReferenceQueue;\n+import java.util.Objects;\n+import java.util.concurrent.atomic.AtomicInteger;\n+\n+/**\n+ * This class is a Java 8+ implementation Cleaner similar to java.lang.ref.Cleaner available\n+ * from Java 9.\n+ *

\n+ * This implementation replace finalize() method that is deprecated since Java 9 and for removal\n+ * since Java 18\n+ *

\n+ * When OpenCV has Java 8 as its minimum version, this class can be removed and replaced by java.lang.ref.Cleaner.\n+ * In Mat, public static final Cleaner cleaner = Cleaner.create();\n+ */\n+public final class CustomCleaner {\n+\n+ final PhantomCleanable phantomCleanableList;\n+\n+ // The ReferenceQueue of pending cleaning actions\n+ final ReferenceQueue queue;\n+\n+ private CustomCleaner() {\n+ queue = new ReferenceQueue<>();\n+ phantomCleanableList = new PhantomCleanable();\n+ }\n+\n+ public static CustomCleaner create() {\n+ CustomCleaner customCleaner = new CustomCleaner();\n+ customCleaner.start();\n+ return customCleaner;\n+ }\n+\n+ public Cleanable register(Object obj, Runnable action) {\n+ return new PhantomCleanable(Objects.requireNonNull(obj), Objects.requireNonNull(action));\n+ }\n+\n+ private void start() {\n+ new PhantomCleanable(this, null);\n+ Thread thread = new CleanerThread(() -> {\n+ while (!phantomCleanableList.isListEmpty()) {\n+ try {\n+ Cleanable ref = (Cleanable) queue.remove(60 * 1000L);\n+ if (ref != null) {\n+ ref.clean();\n+ }\n+ } catch (Throwable e) {\n+ // ignore exceptions\n+ }\n+ }\n+ } );\n+ thread.setDaemon(true);\n+ thread.start();\n+ }\n+\n+ static final class CleanerThread extends Thread {\n+\n+ private static final AtomicInteger threadNumber = new AtomicInteger(1);\n+\n+ // ensure run method is run only once\n+ private volatile boolean hasRun;\n+\n+ public CleanerThread(Runnable runnable) {\n+ super(runnable, \"CustomCleaner-\" + threadNumber.getAndIncrement());\n+ }\n+\n+ @Override\n+ public void run() {\n+ if (Thread.currentThread() == this && !hasRun) {\n+ hasRun = true;\n+ super.run();\n+ }\n+ }\n+ }\n+\n+\n+ public interface Cleanable {\n+ void clean();\n+ }\n+\n+ class PhantomCleanable extends PhantomReference implements Cleanable {\n+\n+ private final Runnable action;\n+ PhantomCleanable prev = this;\n+ PhantomCleanable next = this;\n+\n+ private final PhantomCleanable list;\n+\n+ public PhantomCleanable(Object referent, Runnable action) {\n+ super(Objects.requireNonNull(referent), queue);\n+ this.list = phantomCleanableList;", - "comment_created_at": "2025-03-17T09:23:00+00:00", - "comment_author": "AlexanderSchuetz97", - "comment_body": "I am not the Author of this code, but I feel qualified to comment anyways.\r\n\r\nThis is the correct way to implement this.\r\n\r\nYou need to ensure that the cleaner itself outlives the resource you want to clean.\r\nThe \"ReferenceQueue\" itself does not do that. Therefore you need a different datastructure like a list or hashmap.\r\nIt needs to be a double linked list or hashmap because you have no idea in what order the elements are to be removed. So you need to support fast removal of elements in the center. Making the cleaners themselves (they know when to be removed) the node in a list as done here is the canonical way to do it.\r\n\r\nIn the link you posted, which is a short example, the caller ensures that the cleaner reference remains on the stack and thereforce not garbage collected. This approch is fine if you know how many elements you need to clean (in the case of the example that number is 1.) However this is not possible for opencv's case you have no knowledge on how many mat's are create and you also do not want to force ever user of opencv to change their code to also keep the cleaner alive for longer than the mat.", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/opencv-use-opencv-error-mechanisms.json b/_reviewers/opencv-use-opencv-error-mechanisms.json new file mode 100644 index 0000000..7735568 --- /dev/null +++ b/_reviewers/opencv-use-opencv-error-mechanisms.json @@ -0,0 +1,182 @@ +[ + { + "discussion_id": "2153806951", + "pr_number": 27369, + "pr_file": "modules/imgproc/src/min_enclosing_convex_polygon.cpp", + "created_at": "2025-06-18T07:03:02+00:00", + "commented_code": "/*M///////////////////////////////////////////////////////////////////////////////////////\n //\n // This file is part of OpenCV project.\n // It is subject to the license terms in the LICENSE file found in the top-level directory of this\n // distribution and at http://opencv.org/license.html.\n //\n //\n // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.\n //\n // By downloading, copying, installing or using the software you agree to this license.\n // If you do not agree to this license, do not download, install,\n // copy or use the software.\n //\n // INFORMATION REGARDING THE CONTRIBUTION:\n //\n // Author: Sara Kuhnert\n // Created: 20.05.2025\n // E-mail: \n //\n // THE IMPLEMENTATION OF THE MODULES IS BASED ON THE FOLLOWING PAPER:\n //\n // [1] A. Aggarwal, J. S. Chang, Chee K. Yap: \"Minimum area circumscribing Polygons\", The Visual\n // Computer, 1:112-117, 1985\n //\n // The overall complexity of the algorithm is theta(n^2log(n)log(k)) where \"n\" represents the number\n // of vertices in the input convex polygon and \"k\" the number of vertices in the output polygon.\n //\n //\n //\n // License Agreement\n // For Open Source Computer Vision Library\n //\n // Copyright (C) 2000, Intel Corporation, all rights reserved.\n // Copyright (C) 2013, OpenCV Foundation, all rights reserved.\n // Copyright (C) 2020, Sara Kuhnert, all rights reserved.\n // Third party copyrights are property of their respective owners.\n //\n // Redistribution and use in source and binary forms, with or without modification,\n // are permitted provided that the following conditions are met:\n //\n // * Redistribution's of source code must retain the above copyright notice,\n // this list of conditions and the following disclaimer.\n //\n // * Redistribution's in binary form must reproduce the above copyright notice,\n // this list of conditions and the following disclaimer in the documentation\n // and/or other materials provided with the distribution.\n //\n // * The name of the copyright holders may not be used to endorse or promote products\n // derived from this software without specific prior written permission.\n //\n // This software is provided by the copyright holders and contributors \"as is\" and\n // any express or implied warranties, including, but not limited to, the implied\n // warranties of merchantability and fitness for a particular purpose are disclaimed.\n // In no event shall the Intel Corporation or contributors be liable for any direct,\n // indirect, incidental, special, exemplary, or consequential damages\n // (including, but not limited to, procurement of substitute goods or services;\n // loss of use, data, or profits; or business interruption) however caused\n // and on any theory of liability, whether in contract, strict liability,\n // or tort (including negligence or otherwise) arising in any way out of\n // the use of this software, even if advised of the possibility of such damage.\n //\n //M*/\n\n#include \"precomp.hpp\"\n#include \n\n#include \n#include \n#include \n#include \n#include \n#include \n#include \n\n\n\n//////////////////////////// Constants definitions ////////////////////////////\n\n#define EPSILON 1e-6\n\n\n///////////////////////// Helper function declarations /////////////////////////\n\nnamespace minEnclosingConvexPolygon {\n\nstatic void findMinEnclosingPolygon(cv::InputArray points,\n const int &k,\n CV_OUT cv::OutputArray &kgon,\n CV_OUT double &area);\n\nstatic void findMinEnclosingPolygon(const std::vector &ngon,\n const int &k,\n cv::OutputArray &kgon,\n double &area);\n\nstatic void findMinAreaPolygon(const std::vector &ngon,\n std::vector &minPolygon,\n double &area,\n int k);\n\n} //namespace\n\n\n//////////////////////// Class and struct declarations ////////////////////////\n\nnamespace minEnclosingConvexPolygon {\n\n//! Intersection point of two sides with its position relative to the polygon\n/*!\n * @param point Intersection point of the two sides\n * @param position Is the intersection point a valid solution?\n */\nstruct IntersectionPoint\n{\n cv::Point2f point = {-1.0, -1.0};\n bool position = false;\n\n IntersectionPoint() = default;\n IntersectionPoint(const cv::Point2f& a, const cv::Point2f& b,\n const cv::Point2f& c, const cv::Point2f& d);\n IntersectionPoint(int i, int j, const std::vector& ngon);\n};\n\n//! Container for the information about the intersection point of two flush sides\n/*!\n * @param intersection Instance of IntersectionPoint\n * @param extra_area The area that would be added to the kgon compared to the ngon\n * @param done Set to true once calculated to avoid redundant calculations\n */\nstruct FlushIntersect\n{\n IntersectionPoint intersection = {};\n double extra_area = std::numeric_limits::max();\n bool done = false;\n};\n\n//! Container for the information about a balanced side between two points\n/*!\n * @param pi First intersection point\n * @param pj Second intersection point\n * @param extra_area The area that would be added to the kgon compared to the ngon\n * @param flush Flush is a special case of balanced. If the balanced side is also flush, this indicates which of the sides of the ngon it is flush with. The default for not flush sides is -1.\n * @param position Is the balanced side a valid solution?\n * @param done Set to true once calculated to avoid redundant calculations\n */\nstruct BalancedIntersect\n{\n cv::Point2f pi = {-1, -1};\n cv::Point2f pj = {-1, -1};\n double extra_area = std::numeric_limits::max();\n int flush = -1;\n bool position = false;\n bool done = false;\n};\n\n//! Container for the best segment between two points\n/*!\n * @param extra_area The area that would be added to the kgon compared to the ngon\n * @param side Side of the ngon to which it is flush (if flush is true) or edge of the ngon which it is touching (if flush is false)\n * @param flush True if it is flush. If false, it is balanced but not flush\n * @param exists Does a valid solution exist?\n * @param done Set to true once calculated to avoid redundant calculations\n */\nstruct Segment\n{\n double extra_area = std::numeric_limits::max();\n int side = -1;\n bool flush = false;\n bool exists = false;\n bool done = false;\n};\n\n//! Combination of selected sides and their corresponding chains, which generates the kgon with the minimum area\n/*!\n * @param area Extra area that the minimal kgon is bigger than the input ngon\n * @param i First side\n * @param j Second side\n */\nstruct Minimum\n{\n double area = std::numeric_limits::max();\n int i = -1;\n int j = -1;\n};\n\n//! Container for a side of the minimal kgon\n/*!\n * @param side Index of the side of the ngon which it is flush with or the vertex which it is touching\n * @param flush Is this side flush?\n */\nstruct Side\n{\n int side = -1;\n bool flush = false;\n\n Side() = default;\n Side(int i, bool b);\n};\n\n//! Container for the minimal kgon\n/*!\n * @param sides Indices of the corresponding sindes of the ngon\n * @param vertices Vertices of the kgon\n * @param extra_area Extra area that the minimal kgon is bigger than the input ngon\n * @param i, @param j The chains which build the minimal kgon are formed between those two sides\n */\nstruct Kgon\n{\n std::vector sides;\n std::vector vertices;\n double extra_area = std::numeric_limits::max();\n int i = -1;\n int j = -1;\n};\n\n//! Class for all the possible combinations of flush intersections of two sides of the input polygon\n/*!\n * @param ngon Input polygon\n * @param intersections Matrix of all possible flush intersections of two sides of the ngon\n * @param area_edges Collection of the points that define the extra area of the kgon\n */\nclass FlushIntersections\n{\nprivate:\n const std::vector& ngon;\n std::vector> intersections;\n std::vector area_edges;\n\npublic:\n FlushIntersections(const std::vector& _ngon);\n const FlushIntersect& lineIntersect(int i, int j);\n};\n\n//! Class for all the possible combinations of balanced intersections with two sides of the input polygon\n/*!\n * @param ngon Input polygon\n * @param balanced_intersections atrix of all possible balanced intersections of two sides of the ngon\n * @param area_edges Collection of the points that define the extra area of the kgon\n */\nclass BalancedIntersections\n{\nprivate:\n const std::vector& ngon;\n std::vector> balanced_intersections;\n std::vector area_edges;\n\n double extraArea ( int first, int last, const cv::Point2f& extra1, const cv::Point2f& extra2 );\n\n BalancedIntersect flush(int i, int j, int e);\n\npublic:\n BalancedIntersections(const std::vector& _ngon);\n BalancedIntersect balancedIntersect(int i, int j, int e);\n void markAsDone(int i, int j);\n const std::vector& operator[](size_t i) const;\n};\n\n//! Class for building the one-sided and h-sided chains and calculation the minimum enclosing polygon based on those chains\n/*!\n * @param ngon Input polygon\n * @param single_sides Matrix of the best one-sided chain for each combination of two sides of the ngon\n * @param middle_sides Matrix of the middle side of each h-sided chain for all relevant h and each combination of two sides of the ngon\n * @param intersections An instance of FlushIntersections to store already calculated flush intersections\n * @param balanced_inters An instance of BalancedIntersections to store already calculated balanced intersections\n */\nclass Chains\n{\nprivate:\n const std::vector& ngon;\n std::vector> single_sides;\n std::vector>> middle_sides;\n FlushIntersections intersections;\n BalancedIntersections balanced_inters;\n\n void findSingleE(int i, int j, int l, int r);\n void singleSideImpl(int i, int j1, int j2);\n void findMiddleE1(int i, int j, int l, int r);\n void middleSideImpl1(int i, int j1, int j2);\n void findMiddleE(int h, int i, int j, int l, int r);\n void middleSideImpl(int h, int i, int j1, int j2);\n\npublic:\n Chains(const std::vector& _ngon, int k);\n\n std::set relevantChainLengths(int h);\n void calcOneSidedChains();\n void calcMiddleChains(int h);\n\n Minimum minimumArea(int n, int k);\n std::vector reconstructHSidedChain(int h, int i, int j);\n std::vector findKSides(int k, int i, int j);\n std::vector findKVertices(std::vector& sides);\n\n};\n\n} //namespace\n\n\n//////////////////////// Class and struct constructors ////////////////////////\n\nnamespace minEnclosingConvexPolygon {\n\n//! Constructor for IntersectionPoint. Find the intersection point of two lines given by four points and decide whether it is on the correct side of the polygon\n/*!\n * @param a First point of the first line\n * @param b Second point of the first line\n * @param c First point of the second line\n * @param d Second point of the second line\n */\nIntersectionPoint::IntersectionPoint(const cv::Point2f& a, const cv::Point2f& b,\n const cv::Point2f& c, const cv::Point2f& d)\n{\n const cv::Point2f ab = b - a, cd = d - c, ac = c - a;\n const double det = ab.cross(-cd);\n if(std::abs (det) < EPSILON )\n return;\n\n const double loc = ac.cross(-cd) / det;\n if(loc <= 0)\n return;\n\n point = a + loc * ab;\n position = true;\n}\n\n//! Constructor for IntersectionPoint. Find the intersection point of two sides of the polygon based on the given side indices.\n/*!\n * @param i Index of the first side\n * @param j Index of the second side\n * @param ngon Input polygon with n sides\n */\nIntersectionPoint::IntersectionPoint(int i, int j,\n const std::vector& ngon) :\n IntersectionPoint(ngon[i],\n ngon[(i + 1) % ngon.size()],\n ngon[j],\n ngon[(j + 1) % ngon.size()])\n{}\n\n//! Constructor for FlushIntersections\n/*!\n * @param _ngon Input polygon with n sides\n */\nFlushIntersections::FlushIntersections(const std::vector& _ngon) :\n ngon(_ngon),\n intersections(ngon.size(),\n std::vector(ngon.size()))\n{\n area_edges.reserve(ngon.size());\n}\n\n//! Constructor for BalancedIntersections\n/*!\n * @param _ngon Input polygon with n sides\n */\nBalancedIntersections::BalancedIntersections(const std::vector& _ngon) :\nngon(_ngon),\nbalanced_intersections(ngon.size(),\n std::vector(ngon.size()))\n{\n area_edges.reserve(ngon.size());\n}\n\n//! Constructor for Side. Assign a side index and weather it is flush or not.\n/*!\n * @param i Side index\n * @param b Is side i flush?\n */\nSide::Side(int i, bool b)\n{\n side = i;\n flush = b;\n}\n\n//! Constructor for Chains\n/*!\n * @param\n */\nChains::Chains(const std::vector& _ngon, int k) :\n ngon(_ngon),\n single_sides(std::vector>(ngon.size(),\n std::vector(ngon.size()))),\n middle_sides(std::vector>>(\n k, std::vector>(ngon.size(),\n std::vector(ngon.size())))),\n intersections(ngon),\n balanced_inters(ngon)\n{}\n\n} //namespace\n\n\n////////////////////////// Class and struct functions //////////////////////////\n\nnamespace minEnclosingConvexPolygon {\n\n//! Find the intersection point of two sides, decide weather it is a valid point and if so calculate the extra area caused by that intersection.\n/*!\n * @param i Index of the first side\n * @param j Index of the second side\n */\nconst FlushIntersect& FlushIntersections::lineIntersect(int i, int j)\n{\n FlushIntersect& itr = intersections[i][j];\n if(itr.done)\n return itr;\n\n const size_t n = ngon.size();\n if((i + 1) % n == j)\n {\n itr.intersection.point = ngon[j];\n itr.intersection.position = true;\n itr.extra_area = 0.0;\n itr.done = true;\n return itr;\n }\n itr.intersection = IntersectionPoint(i, j, ngon);\n if(itr.intersection.position)\n {\n area_edges.resize(0);\n for(int t = (i + 1) % n; t != (j + 1) % n; t = (t + 1) % n)\n {\n area_edges.push_back(ngon[t]);\n }\n area_edges.push_back(itr.intersection.point);\n itr.extra_area = cv::contourArea(area_edges);\n itr.done = true;\n }\n else\n {\n itr.extra_area = std::numeric_limits::max();\n itr.intersection.position = false;\n itr.done = true;\n }\n return itr;\n}\n\n//! Calculate the added area that is enclosed by a sequence of consecutive vertices of the polygon and the intersection point of two sides.\n/*!\n * @param first Index of the first point of the sequence\n * @param last Index of the last point of the sequence\n * @param extra1 Last point of the sequence\n * @param extra2 Intersection point\n */\ndouble BalancedIntersections::extraArea(int first, int last,\n const cv::Point2f& extra1,\n const cv::Point2f& extra2)\n{\n const size_t n = ngon.size();\n area_edges.resize(0);\n for(int t = first; t != last; t = (t + 1) % n)\n area_edges.push_back(ngon[t]);\n\n area_edges.push_back(extra1);\n area_edges.push_back(extra2);\n\n return cv::contourArea(area_edges);\n}\n\n//! Determine the intersection points of a flush side e that lies between sides i and j with these two sides. Calculate the extra area and the position of the intersection points relative to the polygon. Update balanced_intersections if the new area is smaller than extraArea.\n/*!\n * @param i Index of first side\n * @param j Index of second side\n * @param e Index of a side between i and j\n */\nBalancedIntersect BalancedIntersections::flush(int i, int j, int e)\n{\n if(i == e)\n std::logic_error(\"\");\n if(j == e)\n std::logic_error(\"\");\n\n const size_t n = ngon.size();\n const int before = (e - 1 + n) % n;\n BalancedIntersect bi = balanced_intersections[i][j];\n\n const IntersectionPoint left_e(i, e, ngon);\n const IntersectionPoint right_e(e, j, ngon);\n\n if(left_e.position == true && right_e.position == true)\n {\n double extra_area = extraArea((i + 1) % n, e, ngon[e], left_e.point);\n if(extra_area < bi.extra_area)\n {\n extra_area += extraArea((e + 1) % n, j, ngon[j], right_e.point);\n if(extra_area < bi.extra_area)\n {\n bi.extra_area = extra_area;\n bi.pi = left_e.point;\n bi.pj = right_e.point;\n bi.position = true;\n bi.flush = e;\n }\n }\n }\n\n if(before != i)\n {\n const IntersectionPoint left_before(i, before, ngon);\n const IntersectionPoint right_before(before, j, ngon);\n if(left_before.position == true && right_before.position == true)\n {\n double extra_area =\n extraArea((i + 1) % n, before, ngon[before], left_before.point);\n\n if(extra_area < bi.extra_area)\n {\n extra_area += extraArea(e, j, ngon[j], right_before.point);\n if(extra_area < bi.extra_area)\n {\n bi.extra_area = extra_area;\n bi.pi = left_before.point;\n bi.pj = right_before.point;\n bi.position = true;\n bi.flush = before;\n }\n }\n }\n }\n return bi;\n}\n\n//! Determine the intersection points of a balanced side e that lies between sides i and j with these two sides. If no valid balanced edge is found, ccheck for flush sides. Calculate the extra area and the position of the intersection points relative to the polygon. Update balanced_intersections if the new area is smaller than extraArea.\n/*!\n * @param i Index of first side\n * @param j Index of second side\n * @param e Index of a side between i and j\n */\nBalancedIntersect BalancedIntersections::balancedIntersect(int i, int j, int e)\n{\n if(balanced_intersections[i][j].done)\n return balanced_intersections[i][j];\n\n const size_t n = ngon.size();\n if((i + 2) % n == j)\n {\n BalancedIntersect& bi = balanced_intersections[i][j];\n bi.pi = ngon[(i + 1) % n];\n bi.pj = ngon[j];\n bi.flush = (i + 1) % n;\n bi.position = true;\n bi.extra_area = 0.0;\n bi.done = true;\n return bi;\n }\n\n const cv::Point2f p1 = ngon[i], p2 = ngon[(i + 1) % n], p3 = ngon[e],\n p4 = ngon[j], p5 = ngon[(j + 1) % n];\n const cv::Point2f dir12 = p2 - p1, dir45 = p5 - p4;\n const double det = dir12.cross(dir45);\n if(std::abs (det) < EPSILON )\n {\n flush(i, j, e);\n return balanced_intersections[i][j];\n }\n\n BalancedIntersect bi;\n cv::Point2f temp = 2 * p3 - p2 - p4;\n const double s = temp.cross(dir45) / det;\n const double t = temp.cross(dir12) / det;\n if(s >= 0 && t >= 0)\n {\n bi.pi = p2 + dir12 * s;\n bi.pj = p4 - dir45 * t;\n bi.position = true;\n\n const cv::Point2f dir_balanced = bi.pj - bi.pi,\n dir_left = p3 - ngon[(e - 1 + n) % n],\n dir_right = ngon[(e + 1) % n] - p3;\n\n const double cross_left = dir_balanced.cross(dir_left),\n cross_right = dir_balanced.cross(dir_right);\n if((cross_left < 0 && cross_right < 0) || (cross_left > 0 && cross_right > 0))\n {\n BalancedIntersect reset;\n bi = reset;\n bi = flush(i, j, e);\n }\n else if(std::abs (cross_left) < EPSILON )\n {\n bi.flush = (e - 1 + n) % n;\n bi.extra_area =\n extraArea((i + 1) % n, (e - 1 + n) % n, ngon[(e - 1 + n) % n], bi.pi)\n + extraArea(e, j, ngon[j], bi.pj);\n }\n else if(std::abs (cross_right) < EPSILON )\n {\n bi.flush = e;\n bi.extra_area = extraArea((i + 1) % n, e, ngon[e], bi.pi)\n + extraArea((e + 1) % n, j, ngon[j], bi.pj);\n }\n else\n {\n bi.extra_area = extraArea((i + 1) % n, e, ngon[e], bi.pi)\n + extraArea(e, j, ngon[j], bi.pj);\n }\n }\n else\n {\n flush(i, j, e);\n }\n\n if(bi.extra_area < balanced_intersections[i][j].extra_area)\n {\n balanced_intersections[i][j] = bi;\n }\n return bi;\n}\n\n//! Set function for the done attribute of BalancedIntersections\n/*!\n * @param i Index of first side\n * @param j Index of second side\n */\nvoid BalancedIntersections::markAsDone(int i, int j)\n{\n balanced_intersections[i][j].done = true;\n}\n\n//! Operator to get a vector of elements from BalancedIntersections\n/*!\n * @param i index of a side\n */\nconst std::vector& BalancedIntersections::operator[](\n size_t i) const\n{\n return balanced_intersections[i];\n}\n\n//! For a combination of two fixed sides of the ngon test all sides between left and right boundary to find the one sided chain with minimal extra area\n/*!\n * @param i Index of first fixed side\n * @param j Index of second fixed side\n * @param l Index of left boundary\n * @param r Index of right boundary\n */\nvoid Chains::findSingleE(int i, int j, int l, int r)\n{\n const size_t n = ngon.size();\n Segment& one = single_sides[i][j];\n if (one.done)\n return;\n\n double min_area = std::numeric_limits::max();\n for (int e = l; e != r + 1 && e != j; e = (e + 1) %n)\n {\n BalancedIntersect candidate = balanced_inters.balancedIntersect(i, j, e);\n if(candidate.extra_area < min_area)\n {\n min_area = candidate.extra_area;\n one.side = e;\n one.extra_area = candidate.extra_area;\n one.flush = false;\n one.exists = true;\n }\n }\n one.done = true;\n balanced_inters.markAsDone(i, j);\n}\n\n//! Recursively repeat the search for the one sided chain with minimal extra area to shrink the boundaries of bigger distances\n/*!\n * @param i Fixed side of the ngon\n * @param j1 Lower boundary of search intervall\n * @param j2 Upper boundary of search intervall\n */\nvoid Chains::singleSideImpl(int i, int j1, int j2)\n{\n const size_t n = ngon.size();\n if((j1 + 1) %n == j2)\n {\n return;\n }\n\n int mid = (j1 < j2 ? ((j1 + j2) / 2) : ((j1 + n + j2) / 2)) % n;\n int l = single_sides[i][j1].side < 0 ? (j1 + 1) %n : single_sides[i][j1].side;\n int r = single_sides[i][j2].side < 0 ? (j2 - 1 + n) %n : single_sides[i][j2].side;\n\n findSingleE(i, mid, l, r);\n singleSideImpl(i, j1, mid);\n singleSideImpl(i, mid, j2);\n\n return;\n}\n\n//! For a combination of two fixed sides of the ngon test all sides between left and right boundary to find the middle side of the h-sided chain with minimal extra area. This is the version for h = 1.\n/*!\n * @param i Index of first fixed side\n * @param j Index of second fixed side\n * @param l Index of left boundary\n * @param r Index of right boundary\n */\nvoid Chains::findMiddleE1(int i, int j, int l, int r)\n{\n const size_t n = ngon.size();\n Segment& one = middle_sides[1][i][j];\n if (one.done)\n return;\n\n double min_area = std::numeric_limits::max();\n for (int e = l; e != r + 1 && e != j; e = (e + 1) %n)\n {\n const FlushIntersect& before = intersections.lineIntersect(i, e);\n if(!before.intersection.position)\n continue;\n const FlushIntersect& after = intersections.lineIntersect(e, j);\n if(!after.intersection.position)\n continue;\n\n double tmp_area = before.extra_area + after.extra_area;\n if(tmp_area < min_area)\n {\n min_area = tmp_area;\n one.side = e;\n one.extra_area = tmp_area;\n one.exists = true;\n one.flush = true;\n }\n }\n one.done = true;\n}\n\n//! Recursively repeat the search for the middle side of the h-sided chain with minimal extra area to shrink the boundaries of bigger distances. This is the version for h = 1.\n/*!\n * @param i Fixed side of the ngon\n * @param j1 Lower boundary of search intervall\n * @param j2 Upper boundary of search intervall\n */\nvoid Chains::middleSideImpl1(int i, int j1, int j2)\n{\n const size_t n = ngon.size();\n if((j1 + 1) %n == j2)\n {\n return;\n }\n\n int mid = (j1 < j2 ? ((j1 + j2) / 2) : ((j1 + n + j2) / 2)) % n;\n int l = middle_sides[1][i][j1].side < 0 ? (j1 + 1) %n : middle_sides[1][i][j1].side;\n int r = middle_sides[1][i][j2].side < 0 ? (j2 - 1 + n) %n : middle_sides[1][i][j2].side;\n\n findMiddleE1(i, mid, l, r);\n middleSideImpl1(i, j1, mid);\n middleSideImpl1(i, mid, j2);\n\n return;\n}\n\n//! For a combination of two fixed sides of the ngon test all sides between left and right boundary to find the middle side of the h-sided chain with minimal extra area. This is the version for h > 1.\n/*!\n * @param h Length of the h-sided chain\n * @param i Index of first fixed side\n * @param j Index of second fixed side\n * @param l Index of left boundary\n * @param r Index of right boundary\n */\nvoid Chains::findMiddleE(int h, int i, int j, int l, int r)\n{\n const size_t n = ngon.size();\n Segment& one = middle_sides[h][i][j];\n if (one.done)\n return;\n\n const int dist = (i <= j ? (j - i) : (j + n - i));\n const int h_floor = (h - 1) / 2;\n const int h_ceil = h - 1 - h_floor;\n\n if(dist == 0)\n throw std::logic_error(\"\");\n if(dist - 1 < h)\n {\n one.done = true;\n return;\n }\n if(dist - 1 == h)\n {\n one.side = (i + h_floor + 1) % n;\n one.extra_area = 0.0;\n one.exists = true;\n one.flush = true;\n one.done = true;\n return;\n }\n\n double min_area = std::numeric_limits::max();\n for (int e = l; e != r + 1 && e != j; e = (e + 1) %n)\n {\n const Segment& before = middle_sides[h_floor][i][e];\n if (before.extra_area == std::numeric_limits::max())\n continue;\n const Segment& after = middle_sides[h_ceil][e][j];\n if(after.extra_area == std::numeric_limits::max())\n continue;\n\n double tmp_area = before.extra_area + after.extra_area;\n if(tmp_area < min_area)\n {\n min_area = tmp_area;\n one.side = e;\n one.extra_area = tmp_area;\n one.exists = true;\n one.flush = true;\n }\n }\n one.done = true;\n}\n\n//! Recursively repeat the search for the middle side of the h-sided chain with minimal extra area to shrink the boundaries of bigger distances. This is the version for h > 1.\n/*!\n * @param h Length of the h-sided chain\n * @param i Fixed side of the ngon\n * @param j1 Lower boundary of search intervall\n * @param j2 Upper boundary of search intervall\n */\nvoid Chains::middleSideImpl(int h, int i, int j1, int j2)\n{\n const size_t n = ngon.size();\n if((j1 + 1) %n == j2)\n {\n return;\n }\n\n int mid = (j1 < j2 ? ((j1 + j2) / 2) : ((j1 + n + j2) / 2)) % n;\n int l = middle_sides[h][i][j1].side < 0 ? (j1 + 1) %n : middle_sides[h][i][j1].side;\n int r = middle_sides[h][i][j2].side < 0 ? (j2 - 1 + n) %n : middle_sides[h][i][j2].side;\n\n findMiddleE(h, i, mid, l, r);\n middleSideImpl(h, i, j1, mid);\n middleSideImpl(h, i, mid, j2);\n\n return;\n}\n\n//! Calculate the relevant chain lengths. Starting with a maximum chain length of h, down to a chain length of 1, only chains with half the length of the chain before are needed.\n/*!\n * @param h Length of the longest chain (h = k - 3)\n */\nstd::set Chains::relevantChainLengths(int h)\n{\n if(h <= 1)\n return {h};\n\n std::set hs = {h};\n const int h_floor = (h - 1) / 2;\n const int h_ceil = h - 1 - h_floor;\n std::set h1 = relevantChainLengths(h_floor);\n for(const int& hNew : h1)\n hs.insert(hNew);\n if (h_ceil != h_floor)\n {\n std::set h2 = relevantChainLengths(h_ceil);\n for(const int& hNew : h2)\n hs.insert(hNew);\n }\n return hs;\n}\n\n//! Recursively calculate all the onesided chains for each combination of polygon sides.\n/*!\n */\nvoid Chains::calcOneSidedChains()\n{\n const size_t n = ngon.size();\n for(size_t i = 0; i < n; i++)\n {\n int j1 = (i + 2) %n, j2 = (i - 2 + n) %n;\n\n findSingleE(i, j1, (i + 1) %n, (j1 - 1 + n) %n);\n findSingleE(i, j2, (i + 1) %n, (j2 - 1 + n) %n);\n singleSideImpl(i, j1, j2);\n }\n}\n\n//! Recursively calculate the middle sides of the h-sided chains for all combinations of polygon sides.\n/*!\n * @param h Length of the chains\n */\nvoid Chains::calcMiddleChains(int h)\n{\n const size_t n = ngon.size();\n if (h == 0)\n {\n for (size_t i = 0; i < n; i++)\n {\n for (size_t j = 0; j < n; j++)\n {\n Segment& one = middle_sides[h][i][j];\n const FlushIntersect itrs = intersections.lineIntersect(i, j);\n\n one.side = -1;\n one.extra_area = itrs.extra_area;\n one.exists = false;\n one.flush = false;\n one.done = true;\n }\n }\n return;\n }\n if (h == 1)\n {\n for (size_t i = 0; i < n; i++)\n {\n int j1 = (i + 2) %n, j2 = (i - 2 + n) %n;\n\n findMiddleE1(i, j1, (i + 1) %n, (j1 - 1 + n) %n);\n findMiddleE1(i, j2, (i + 1) %n, (j2 - 1 + n) %n);\n middleSideImpl1(i, j1, j2);\n }\n return;\n }\n\n for (size_t i = 0; i < n; i++)\n {\n int j1 = (i + 2) %n, j2 = (i - 2 + n) %n;\n\n findMiddleE(h, i, j1, (i + 1) %n, (j1 - 1 + n) %n);\n findMiddleE(h, i, j2, (i + 1) %n, (j2 - 1 + n) %n);\n middleSideImpl(h, i, j1, j2);\n }\n}\n\n//! Find the i and j with the smallest extra area.\n/*!\n * @param n Number of sides of the input polygon\n * @param k Number of sides of the output polygon\n */\nMinimum Chains::minimumArea(int n, int k)\n{\n Minimum min{};\n for(int i = 0; i < n; i++)\n {\n for(int j = 0; j < n; j++)\n {\n if(!single_sides[i][j].exists || !middle_sides[k - 3][j][i].exists)\n continue;\n\n double tmp_area =\n single_sides[i][j].extra_area + middle_sides[k - 3][j][i].extra_area;\n if(tmp_area < min.area)\n {\n min.area = tmp_area;\n min.i = i;\n min.j = j;\n }\n }\n }\n return min;\n}\n\n//! Reconstruct the h-sided chain based on the two sides that give the smalles extra area.\n/*!\n * @param h Length of the k - 3 sided chain.\n * @param i First index from minimumArea\n * @param j Second index from minimumArea\n */\nstd::vector Chains::reconstructHSidedChain(int h, int i, int j)\n{\n if(h == 0)\n {\n throw std::logic_error(\"\");\n }\n if(h == 1)\n {\n return std::vector{{middle_sides[h][i][j].side, true}};\n }\n\n std::vector before, after;\n const int h_floor = (h - 1) / 2;\n const int h_ceil = h - 1 - h_floor;\n if(h_floor > 0)\n before = reconstructHSidedChain(h_floor, i, middle_sides[h][i][j].side);\n if(h_ceil > 0)\n after = reconstructHSidedChain(h_ceil, middle_sides[h][i][j].side, j);\n\n std::vector sides{{middle_sides[h][i][j].side, true}};\n sides.insert(sides.end(), before.begin(), before.end());\n sides.insert(sides.end(), after.begin(), after.end());\n return sides;\n}\n\n//! Get the k sides that build the kgon based on the two sides that give the smalles extra area.\n/*!\n * @param k Number of sides of the output polygon\n * @param i First index from minimumArea\n * @param j Second index from minimumArea\n */\nstd::vector Chains::findKSides(int k, int i, int j)\n{\n std::vector sides;\n sides.push_back({i, true});\n sides.push_back({j, true});\n\n if(single_sides[i][j].flush)\n sides.push_back({single_sides[i][j].side, true});\n else\n sides.push_back({single_sides[i][j].side, false});\n\n std::vector flush_chain = reconstructHSidedChain(k - 3, j, i);\n sides.insert(sides.end(), flush_chain.begin(), flush_chain.end());\n std::sort(sides.begin(), sides.end(),\n [](const Side& lhs, const Side& rhs) { return lhs.side < rhs.side; });\n\n return sides;\n}\n\n//! Calculate the k vertices of the kgon from its k sides.\n/*!\n * @param sides Sides of the output polygon\n */\nstd::vector Chains::findKVertices(std::vector& sides)\n{\n const int k = sides.size();\n std::vector vertices(k);\n\n for(int u = 0; u < k; u++)\n {\n const int next = (u + 1) % k;\n if(sides[u].flush && sides[next].flush)\n {\n vertices[u] = intersections.lineIntersect(sides[u].side,\n sides[next].side).intersection.point;\n }\n else if(sides[u].flush && !sides[next].flush)\n {\n vertices[u] = balanced_inters[sides[u].side][sides[(u + 2) % k].side].pi;\n }\n else if(!sides[u].flush && sides[next].flush)\n {\n vertices[u] = balanced_inters[sides[(u - 1 + k) %k].side][sides[next].side].pj;\n }\n else\n {\n throw std::logic_error(\"\");\n }\n }\n return vertices;\n}\n\n} //namespace\n\n\n///////////////////////// Helper function definitions /////////////////////////\n\nnamespace minEnclosingConvexPolygon {\n\n//! Find the minimum enclosing convex kgon and its area. Converting inputs to used vector shape and data format.\n/*!\n * @param points Set of points\n * @param k Number of vertices of the output polygon\n * @param minPolygon Minimum area convex k-gon enclosing the given set of points\n * @param area Area of minPolygon\n */\nstatic void findMinEnclosingPolygon(cv::InputArray points,\n const int &k,\n CV_OUT cv::OutputArray &minPolygon,\n CV_OUT double &area) {\n CV_Assert(!points.empty());\n std::vector ngon, kgon;\n cv::convexHull(points, ngon, true);\n findMinEnclosingPolygon(ngon, k, kgon, area);\n cv::Mat(kgon).copyTo(minPolygon);\n}\n\n//! Find the minimum enclosing convex polygon and its area for a given set of points. Handling of input errors.\n/*!\n * @param ngon The polygon representing the convex hull of the points\n * @param k Number of vertices of the output polygon\n * @param minPolygon Minimum area convex k-gon enclosing the given polygon\n * @param area Area of minPolygon\n */\nstatic void findMinEnclosingPolygon(const std::vector &ngon,\n const int &k,\n cv::OutputArray &minPolygon,\n double &area) {\n try\n {\n if (ngon.size() < 3)\n {\n throw std::invalid_argument(\n \"ngon must have 3 or more different points enclosing an area\" );\n }\n if (cv::contourArea(ngon) < EPSILON )\n {\n throw std::invalid_argument( \"invalid ngon: all points on line\" );\n }\n if (k <= 3)\n {\n throw std::invalid_argument( \"k must be 3 or higher\" );\n }\n const size_t n = ngon.size();\n if ((const int)n == k)\n {\n throw std::runtime_error (\"(n = k)\");\n }\n if ((const int)n < k)\n {\n throw std::runtime_error (\"(n < k)\");\n }\n }\n catch (std::invalid_argument &message)\n {\n std::cout << \"invalid argument: \" << message.what() << std::endl;\n return;\n }\n catch (std::runtime_error &message)\n {\n std::cout << \"Warning: no minimum area polygon calculated \" << message.what() << std::endl;\n cv::Mat(ngon).copyTo(minPolygon);\n area = cv::contourArea(minPolygon);\n return;\n }\n catch (...)\n {\n std::cout << \"input error\" << std::endl;\n return;\n }\n\n try\n {\n std::vector kgon;\n findMinAreaPolygon(ngon, kgon, area, k);\n cv::Mat(kgon).copyTo(minPolygon);\n }\n catch (...)\n {\n std::cout << \"correct input but execution failed\" << std::endl;\n return;\n }", + "repo_full_name": "opencv/opencv", + "discussion_comments": [ + { + "comment_id": "2153806951", + "repo_full_name": "opencv/opencv", + "pr_number": 27369, + "pr_file": "modules/imgproc/src/min_enclosing_convex_polygon.cpp", + "discussion_id": "2153806951", + "commented_code": "@@ -0,0 +1,1185 @@\n+/*M///////////////////////////////////////////////////////////////////////////////////////\n+ //\n+ // This file is part of OpenCV project.\n+ // It is subject to the license terms in the LICENSE file found in the top-level directory of this\n+ // distribution and at http://opencv.org/license.html.\n+ //\n+ //\n+ // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.\n+ //\n+ // By downloading, copying, installing or using the software you agree to this license.\n+ // If you do not agree to this license, do not download, install,\n+ // copy or use the software.\n+ //\n+ // INFORMATION REGARDING THE CONTRIBUTION:\n+ //\n+ // Author: Sara Kuhnert\n+ // Created: 20.05.2025\n+ // E-mail: \n+ //\n+ // THE IMPLEMENTATION OF THE MODULES IS BASED ON THE FOLLOWING PAPER:\n+ //\n+ // [1] A. Aggarwal, J. S. Chang, Chee K. Yap: \"Minimum area circumscribing Polygons\", The Visual\n+ // Computer, 1:112-117, 1985\n+ //\n+ // The overall complexity of the algorithm is theta(n^2log(n)log(k)) where \"n\" represents the number\n+ // of vertices in the input convex polygon and \"k\" the number of vertices in the output polygon.\n+ //\n+ //\n+ //\n+ // License Agreement\n+ // For Open Source Computer Vision Library\n+ //\n+ // Copyright (C) 2000, Intel Corporation, all rights reserved.\n+ // Copyright (C) 2013, OpenCV Foundation, all rights reserved.\n+ // Copyright (C) 2020, Sara Kuhnert, all rights reserved.\n+ // Third party copyrights are property of their respective owners.\n+ //\n+ // Redistribution and use in source and binary forms, with or without modification,\n+ // are permitted provided that the following conditions are met:\n+ //\n+ // * Redistribution's of source code must retain the above copyright notice,\n+ // this list of conditions and the following disclaimer.\n+ //\n+ // * Redistribution's in binary form must reproduce the above copyright notice,\n+ // this list of conditions and the following disclaimer in the documentation\n+ // and/or other materials provided with the distribution.\n+ //\n+ // * The name of the copyright holders may not be used to endorse or promote products\n+ // derived from this software without specific prior written permission.\n+ //\n+ // This software is provided by the copyright holders and contributors \"as is\" and\n+ // any express or implied warranties, including, but not limited to, the implied\n+ // warranties of merchantability and fitness for a particular purpose are disclaimed.\n+ // In no event shall the Intel Corporation or contributors be liable for any direct,\n+ // indirect, incidental, special, exemplary, or consequential damages\n+ // (including, but not limited to, procurement of substitute goods or services;\n+ // loss of use, data, or profits; or business interruption) however caused\n+ // and on any theory of liability, whether in contract, strict liability,\n+ // or tort (including negligence or otherwise) arising in any way out of\n+ // the use of this software, even if advised of the possibility of such damage.\n+ //\n+ //M*/\n+\n+#include \"precomp.hpp\"\n+#include \n+\n+#include \n+#include \n+#include \n+#include \n+#include \n+#include \n+#include \n+\n+\n+\n+//////////////////////////// Constants definitions ////////////////////////////\n+\n+#define EPSILON 1e-6\n+\n+\n+///////////////////////// Helper function declarations /////////////////////////\n+\n+namespace minEnclosingConvexPolygon {\n+\n+static void findMinEnclosingPolygon(cv::InputArray points,\n+ const int &k,\n+ CV_OUT cv::OutputArray &kgon,\n+ CV_OUT double &area);\n+\n+static void findMinEnclosingPolygon(const std::vector &ngon,\n+ const int &k,\n+ cv::OutputArray &kgon,\n+ double &area);\n+\n+static void findMinAreaPolygon(const std::vector &ngon,\n+ std::vector &minPolygon,\n+ double &area,\n+ int k);\n+\n+} //namespace\n+\n+\n+//////////////////////// Class and struct declarations ////////////////////////\n+\n+namespace minEnclosingConvexPolygon {\n+\n+//! Intersection point of two sides with its position relative to the polygon\n+/*!\n+ * @param point Intersection point of the two sides\n+ * @param position Is the intersection point a valid solution?\n+ */\n+struct IntersectionPoint\n+{\n+ cv::Point2f point = {-1.0, -1.0};\n+ bool position = false;\n+\n+ IntersectionPoint() = default;\n+ IntersectionPoint(const cv::Point2f& a, const cv::Point2f& b,\n+ const cv::Point2f& c, const cv::Point2f& d);\n+ IntersectionPoint(int i, int j, const std::vector& ngon);\n+};\n+\n+//! Container for the information about the intersection point of two flush sides\n+/*!\n+ * @param intersection Instance of IntersectionPoint\n+ * @param extra_area The area that would be added to the kgon compared to the ngon\n+ * @param done Set to true once calculated to avoid redundant calculations\n+ */\n+struct FlushIntersect\n+{\n+ IntersectionPoint intersection = {};\n+ double extra_area = std::numeric_limits::max();\n+ bool done = false;\n+};\n+\n+//! Container for the information about a balanced side between two points\n+/*!\n+ * @param pi First intersection point\n+ * @param pj Second intersection point\n+ * @param extra_area The area that would be added to the kgon compared to the ngon\n+ * @param flush Flush is a special case of balanced. If the balanced side is also flush, this indicates which of the sides of the ngon it is flush with. The default for not flush sides is -1.\n+ * @param position Is the balanced side a valid solution?\n+ * @param done Set to true once calculated to avoid redundant calculations\n+ */\n+struct BalancedIntersect\n+{\n+ cv::Point2f pi = {-1, -1};\n+ cv::Point2f pj = {-1, -1};\n+ double extra_area = std::numeric_limits::max();\n+ int flush = -1;\n+ bool position = false;\n+ bool done = false;\n+};\n+\n+//! Container for the best segment between two points\n+/*!\n+ * @param extra_area The area that would be added to the kgon compared to the ngon\n+ * @param side Side of the ngon to which it is flush (if flush is true) or edge of the ngon which it is touching (if flush is false)\n+ * @param flush True if it is flush. If false, it is balanced but not flush\n+ * @param exists Does a valid solution exist?\n+ * @param done Set to true once calculated to avoid redundant calculations\n+ */\n+struct Segment\n+{\n+ double extra_area = std::numeric_limits::max();\n+ int side = -1;\n+ bool flush = false;\n+ bool exists = false;\n+ bool done = false;\n+};\n+\n+//! Combination of selected sides and their corresponding chains, which generates the kgon with the minimum area\n+/*!\n+ * @param area Extra area that the minimal kgon is bigger than the input ngon\n+ * @param i First side\n+ * @param j Second side\n+ */\n+struct Minimum\n+{\n+ double area = std::numeric_limits::max();\n+ int i = -1;\n+ int j = -1;\n+};\n+\n+//! Container for a side of the minimal kgon\n+/*!\n+ * @param side Index of the side of the ngon which it is flush with or the vertex which it is touching\n+ * @param flush Is this side flush?\n+ */\n+struct Side\n+{\n+ int side = -1;\n+ bool flush = false;\n+\n+ Side() = default;\n+ Side(int i, bool b);\n+};\n+\n+//! Container for the minimal kgon\n+/*!\n+ * @param sides Indices of the corresponding sindes of the ngon\n+ * @param vertices Vertices of the kgon\n+ * @param extra_area Extra area that the minimal kgon is bigger than the input ngon\n+ * @param i, @param j The chains which build the minimal kgon are formed between those two sides\n+ */\n+struct Kgon\n+{\n+ std::vector sides;\n+ std::vector vertices;\n+ double extra_area = std::numeric_limits::max();\n+ int i = -1;\n+ int j = -1;\n+};\n+\n+//! Class for all the possible combinations of flush intersections of two sides of the input polygon\n+/*!\n+ * @param ngon Input polygon\n+ * @param intersections Matrix of all possible flush intersections of two sides of the ngon\n+ * @param area_edges Collection of the points that define the extra area of the kgon\n+ */\n+class FlushIntersections\n+{\n+private:\n+ const std::vector& ngon;\n+ std::vector> intersections;\n+ std::vector area_edges;\n+\n+public:\n+ FlushIntersections(const std::vector& _ngon);\n+ const FlushIntersect& lineIntersect(int i, int j);\n+};\n+\n+//! Class for all the possible combinations of balanced intersections with two sides of the input polygon\n+/*!\n+ * @param ngon Input polygon\n+ * @param balanced_intersections atrix of all possible balanced intersections of two sides of the ngon\n+ * @param area_edges Collection of the points that define the extra area of the kgon\n+ */\n+class BalancedIntersections\n+{\n+private:\n+ const std::vector& ngon;\n+ std::vector> balanced_intersections;\n+ std::vector area_edges;\n+\n+ double extraArea ( int first, int last, const cv::Point2f& extra1, const cv::Point2f& extra2 );\n+\n+ BalancedIntersect flush(int i, int j, int e);\n+\n+public:\n+ BalancedIntersections(const std::vector& _ngon);\n+ BalancedIntersect balancedIntersect(int i, int j, int e);\n+ void markAsDone(int i, int j);\n+ const std::vector& operator[](size_t i) const;\n+};\n+\n+//! Class for building the one-sided and h-sided chains and calculation the minimum enclosing polygon based on those chains\n+/*!\n+ * @param ngon Input polygon\n+ * @param single_sides Matrix of the best one-sided chain for each combination of two sides of the ngon\n+ * @param middle_sides Matrix of the middle side of each h-sided chain for all relevant h and each combination of two sides of the ngon\n+ * @param intersections An instance of FlushIntersections to store already calculated flush intersections\n+ * @param balanced_inters An instance of BalancedIntersections to store already calculated balanced intersections\n+ */\n+class Chains\n+{\n+private:\n+ const std::vector& ngon;\n+ std::vector> single_sides;\n+ std::vector>> middle_sides;\n+ FlushIntersections intersections;\n+ BalancedIntersections balanced_inters;\n+\n+ void findSingleE(int i, int j, int l, int r);\n+ void singleSideImpl(int i, int j1, int j2);\n+ void findMiddleE1(int i, int j, int l, int r);\n+ void middleSideImpl1(int i, int j1, int j2);\n+ void findMiddleE(int h, int i, int j, int l, int r);\n+ void middleSideImpl(int h, int i, int j1, int j2);\n+\n+public:\n+ Chains(const std::vector& _ngon, int k);\n+\n+ std::set relevantChainLengths(int h);\n+ void calcOneSidedChains();\n+ void calcMiddleChains(int h);\n+\n+ Minimum minimumArea(int n, int k);\n+ std::vector reconstructHSidedChain(int h, int i, int j);\n+ std::vector findKSides(int k, int i, int j);\n+ std::vector findKVertices(std::vector& sides);\n+\n+};\n+\n+} //namespace\n+\n+\n+//////////////////////// Class and struct constructors ////////////////////////\n+\n+namespace minEnclosingConvexPolygon {\n+\n+//! Constructor for IntersectionPoint. Find the intersection point of two lines given by four points and decide whether it is on the correct side of the polygon\n+/*!\n+ * @param a First point of the first line\n+ * @param b Second point of the first line\n+ * @param c First point of the second line\n+ * @param d Second point of the second line\n+ */\n+IntersectionPoint::IntersectionPoint(const cv::Point2f& a, const cv::Point2f& b,\n+ const cv::Point2f& c, const cv::Point2f& d)\n+{\n+ const cv::Point2f ab = b - a, cd = d - c, ac = c - a;\n+ const double det = ab.cross(-cd);\n+ if(std::abs (det) < EPSILON )\n+ return;\n+\n+ const double loc = ac.cross(-cd) / det;\n+ if(loc <= 0)\n+ return;\n+\n+ point = a + loc * ab;\n+ position = true;\n+}\n+\n+//! Constructor for IntersectionPoint. Find the intersection point of two sides of the polygon based on the given side indices.\n+/*!\n+ * @param i Index of the first side\n+ * @param j Index of the second side\n+ * @param ngon Input polygon with n sides\n+ */\n+IntersectionPoint::IntersectionPoint(int i, int j,\n+ const std::vector& ngon) :\n+ IntersectionPoint(ngon[i],\n+ ngon[(i + 1) % ngon.size()],\n+ ngon[j],\n+ ngon[(j + 1) % ngon.size()])\n+{}\n+\n+//! Constructor for FlushIntersections\n+/*!\n+ * @param _ngon Input polygon with n sides\n+ */\n+FlushIntersections::FlushIntersections(const std::vector& _ngon) :\n+ ngon(_ngon),\n+ intersections(ngon.size(),\n+ std::vector(ngon.size()))\n+{\n+ area_edges.reserve(ngon.size());\n+}\n+\n+//! Constructor for BalancedIntersections\n+/*!\n+ * @param _ngon Input polygon with n sides\n+ */\n+BalancedIntersections::BalancedIntersections(const std::vector& _ngon) :\n+ngon(_ngon),\n+balanced_intersections(ngon.size(),\n+ std::vector(ngon.size()))\n+{\n+ area_edges.reserve(ngon.size());\n+}\n+\n+//! Constructor for Side. Assign a side index and weather it is flush or not.\n+/*!\n+ * @param i Side index\n+ * @param b Is side i flush?\n+ */\n+Side::Side(int i, bool b)\n+{\n+ side = i;\n+ flush = b;\n+}\n+\n+//! Constructor for Chains\n+/*!\n+ * @param\n+ */\n+Chains::Chains(const std::vector& _ngon, int k) :\n+ ngon(_ngon),\n+ single_sides(std::vector>(ngon.size(),\n+ std::vector(ngon.size()))),\n+ middle_sides(std::vector>>(\n+ k, std::vector>(ngon.size(),\n+ std::vector(ngon.size())))),\n+ intersections(ngon),\n+ balanced_inters(ngon)\n+{}\n+\n+} //namespace\n+\n+\n+////////////////////////// Class and struct functions //////////////////////////\n+\n+namespace minEnclosingConvexPolygon {\n+\n+//! Find the intersection point of two sides, decide weather it is a valid point and if so calculate the extra area caused by that intersection.\n+/*!\n+ * @param i Index of the first side\n+ * @param j Index of the second side\n+ */\n+const FlushIntersect& FlushIntersections::lineIntersect(int i, int j)\n+{\n+ FlushIntersect& itr = intersections[i][j];\n+ if(itr.done)\n+ return itr;\n+\n+ const size_t n = ngon.size();\n+ if((i + 1) % n == j)\n+ {\n+ itr.intersection.point = ngon[j];\n+ itr.intersection.position = true;\n+ itr.extra_area = 0.0;\n+ itr.done = true;\n+ return itr;\n+ }\n+ itr.intersection = IntersectionPoint(i, j, ngon);\n+ if(itr.intersection.position)\n+ {\n+ area_edges.resize(0);\n+ for(int t = (i + 1) % n; t != (j + 1) % n; t = (t + 1) % n)\n+ {\n+ area_edges.push_back(ngon[t]);\n+ }\n+ area_edges.push_back(itr.intersection.point);\n+ itr.extra_area = cv::contourArea(area_edges);\n+ itr.done = true;\n+ }\n+ else\n+ {\n+ itr.extra_area = std::numeric_limits::max();\n+ itr.intersection.position = false;\n+ itr.done = true;\n+ }\n+ return itr;\n+}\n+\n+//! Calculate the added area that is enclosed by a sequence of consecutive vertices of the polygon and the intersection point of two sides.\n+/*!\n+ * @param first Index of the first point of the sequence\n+ * @param last Index of the last point of the sequence\n+ * @param extra1 Last point of the sequence\n+ * @param extra2 Intersection point\n+ */\n+double BalancedIntersections::extraArea(int first, int last,\n+ const cv::Point2f& extra1,\n+ const cv::Point2f& extra2)\n+{\n+ const size_t n = ngon.size();\n+ area_edges.resize(0);\n+ for(int t = first; t != last; t = (t + 1) % n)\n+ area_edges.push_back(ngon[t]);\n+\n+ area_edges.push_back(extra1);\n+ area_edges.push_back(extra2);\n+\n+ return cv::contourArea(area_edges);\n+}\n+\n+//! Determine the intersection points of a flush side e that lies between sides i and j with these two sides. Calculate the extra area and the position of the intersection points relative to the polygon. Update balanced_intersections if the new area is smaller than extraArea.\n+/*!\n+ * @param i Index of first side\n+ * @param j Index of second side\n+ * @param e Index of a side between i and j\n+ */\n+BalancedIntersect BalancedIntersections::flush(int i, int j, int e)\n+{\n+ if(i == e)\n+ std::logic_error(\"\");\n+ if(j == e)\n+ std::logic_error(\"\");\n+\n+ const size_t n = ngon.size();\n+ const int before = (e - 1 + n) % n;\n+ BalancedIntersect bi = balanced_intersections[i][j];\n+\n+ const IntersectionPoint left_e(i, e, ngon);\n+ const IntersectionPoint right_e(e, j, ngon);\n+\n+ if(left_e.position == true && right_e.position == true)\n+ {\n+ double extra_area = extraArea((i + 1) % n, e, ngon[e], left_e.point);\n+ if(extra_area < bi.extra_area)\n+ {\n+ extra_area += extraArea((e + 1) % n, j, ngon[j], right_e.point);\n+ if(extra_area < bi.extra_area)\n+ {\n+ bi.extra_area = extra_area;\n+ bi.pi = left_e.point;\n+ bi.pj = right_e.point;\n+ bi.position = true;\n+ bi.flush = e;\n+ }\n+ }\n+ }\n+\n+ if(before != i)\n+ {\n+ const IntersectionPoint left_before(i, before, ngon);\n+ const IntersectionPoint right_before(before, j, ngon);\n+ if(left_before.position == true && right_before.position == true)\n+ {\n+ double extra_area =\n+ extraArea((i + 1) % n, before, ngon[before], left_before.point);\n+\n+ if(extra_area < bi.extra_area)\n+ {\n+ extra_area += extraArea(e, j, ngon[j], right_before.point);\n+ if(extra_area < bi.extra_area)\n+ {\n+ bi.extra_area = extra_area;\n+ bi.pi = left_before.point;\n+ bi.pj = right_before.point;\n+ bi.position = true;\n+ bi.flush = before;\n+ }\n+ }\n+ }\n+ }\n+ return bi;\n+}\n+\n+//! Determine the intersection points of a balanced side e that lies between sides i and j with these two sides. If no valid balanced edge is found, ccheck for flush sides. Calculate the extra area and the position of the intersection points relative to the polygon. Update balanced_intersections if the new area is smaller than extraArea.\n+/*!\n+ * @param i Index of first side\n+ * @param j Index of second side\n+ * @param e Index of a side between i and j\n+ */\n+BalancedIntersect BalancedIntersections::balancedIntersect(int i, int j, int e)\n+{\n+ if(balanced_intersections[i][j].done)\n+ return balanced_intersections[i][j];\n+\n+ const size_t n = ngon.size();\n+ if((i + 2) % n == j)\n+ {\n+ BalancedIntersect& bi = balanced_intersections[i][j];\n+ bi.pi = ngon[(i + 1) % n];\n+ bi.pj = ngon[j];\n+ bi.flush = (i + 1) % n;\n+ bi.position = true;\n+ bi.extra_area = 0.0;\n+ bi.done = true;\n+ return bi;\n+ }\n+\n+ const cv::Point2f p1 = ngon[i], p2 = ngon[(i + 1) % n], p3 = ngon[e],\n+ p4 = ngon[j], p5 = ngon[(j + 1) % n];\n+ const cv::Point2f dir12 = p2 - p1, dir45 = p5 - p4;\n+ const double det = dir12.cross(dir45);\n+ if(std::abs (det) < EPSILON )\n+ {\n+ flush(i, j, e);\n+ return balanced_intersections[i][j];\n+ }\n+\n+ BalancedIntersect bi;\n+ cv::Point2f temp = 2 * p3 - p2 - p4;\n+ const double s = temp.cross(dir45) / det;\n+ const double t = temp.cross(dir12) / det;\n+ if(s >= 0 && t >= 0)\n+ {\n+ bi.pi = p2 + dir12 * s;\n+ bi.pj = p4 - dir45 * t;\n+ bi.position = true;\n+\n+ const cv::Point2f dir_balanced = bi.pj - bi.pi,\n+ dir_left = p3 - ngon[(e - 1 + n) % n],\n+ dir_right = ngon[(e + 1) % n] - p3;\n+\n+ const double cross_left = dir_balanced.cross(dir_left),\n+ cross_right = dir_balanced.cross(dir_right);\n+ if((cross_left < 0 && cross_right < 0) || (cross_left > 0 && cross_right > 0))\n+ {\n+ BalancedIntersect reset;\n+ bi = reset;\n+ bi = flush(i, j, e);\n+ }\n+ else if(std::abs (cross_left) < EPSILON )\n+ {\n+ bi.flush = (e - 1 + n) % n;\n+ bi.extra_area =\n+ extraArea((i + 1) % n, (e - 1 + n) % n, ngon[(e - 1 + n) % n], bi.pi)\n+ + extraArea(e, j, ngon[j], bi.pj);\n+ }\n+ else if(std::abs (cross_right) < EPSILON )\n+ {\n+ bi.flush = e;\n+ bi.extra_area = extraArea((i + 1) % n, e, ngon[e], bi.pi)\n+ + extraArea((e + 1) % n, j, ngon[j], bi.pj);\n+ }\n+ else\n+ {\n+ bi.extra_area = extraArea((i + 1) % n, e, ngon[e], bi.pi)\n+ + extraArea(e, j, ngon[j], bi.pj);\n+ }\n+ }\n+ else\n+ {\n+ flush(i, j, e);\n+ }\n+\n+ if(bi.extra_area < balanced_intersections[i][j].extra_area)\n+ {\n+ balanced_intersections[i][j] = bi;\n+ }\n+ return bi;\n+}\n+\n+//! Set function for the done attribute of BalancedIntersections\n+/*!\n+ * @param i Index of first side\n+ * @param j Index of second side\n+ */\n+void BalancedIntersections::markAsDone(int i, int j)\n+{\n+ balanced_intersections[i][j].done = true;\n+}\n+\n+//! Operator to get a vector of elements from BalancedIntersections\n+/*!\n+ * @param i index of a side\n+ */\n+const std::vector& BalancedIntersections::operator[](\n+ size_t i) const\n+{\n+ return balanced_intersections[i];\n+}\n+\n+//! For a combination of two fixed sides of the ngon test all sides between left and right boundary to find the one sided chain with minimal extra area\n+/*!\n+ * @param i Index of first fixed side\n+ * @param j Index of second fixed side\n+ * @param l Index of left boundary\n+ * @param r Index of right boundary\n+ */\n+void Chains::findSingleE(int i, int j, int l, int r)\n+{\n+ const size_t n = ngon.size();\n+ Segment& one = single_sides[i][j];\n+ if (one.done)\n+ return;\n+\n+ double min_area = std::numeric_limits::max();\n+ for (int e = l; e != r + 1 && e != j; e = (e + 1) %n)\n+ {\n+ BalancedIntersect candidate = balanced_inters.balancedIntersect(i, j, e);\n+ if(candidate.extra_area < min_area)\n+ {\n+ min_area = candidate.extra_area;\n+ one.side = e;\n+ one.extra_area = candidate.extra_area;\n+ one.flush = false;\n+ one.exists = true;\n+ }\n+ }\n+ one.done = true;\n+ balanced_inters.markAsDone(i, j);\n+}\n+\n+//! Recursively repeat the search for the one sided chain with minimal extra area to shrink the boundaries of bigger distances\n+/*!\n+ * @param i Fixed side of the ngon\n+ * @param j1 Lower boundary of search intervall\n+ * @param j2 Upper boundary of search intervall\n+ */\n+void Chains::singleSideImpl(int i, int j1, int j2)\n+{\n+ const size_t n = ngon.size();\n+ if((j1 + 1) %n == j2)\n+ {\n+ return;\n+ }\n+\n+ int mid = (j1 < j2 ? ((j1 + j2) / 2) : ((j1 + n + j2) / 2)) % n;\n+ int l = single_sides[i][j1].side < 0 ? (j1 + 1) %n : single_sides[i][j1].side;\n+ int r = single_sides[i][j2].side < 0 ? (j2 - 1 + n) %n : single_sides[i][j2].side;\n+\n+ findSingleE(i, mid, l, r);\n+ singleSideImpl(i, j1, mid);\n+ singleSideImpl(i, mid, j2);\n+\n+ return;\n+}\n+\n+//! For a combination of two fixed sides of the ngon test all sides between left and right boundary to find the middle side of the h-sided chain with minimal extra area. This is the version for h = 1.\n+/*!\n+ * @param i Index of first fixed side\n+ * @param j Index of second fixed side\n+ * @param l Index of left boundary\n+ * @param r Index of right boundary\n+ */\n+void Chains::findMiddleE1(int i, int j, int l, int r)\n+{\n+ const size_t n = ngon.size();\n+ Segment& one = middle_sides[1][i][j];\n+ if (one.done)\n+ return;\n+\n+ double min_area = std::numeric_limits::max();\n+ for (int e = l; e != r + 1 && e != j; e = (e + 1) %n)\n+ {\n+ const FlushIntersect& before = intersections.lineIntersect(i, e);\n+ if(!before.intersection.position)\n+ continue;\n+ const FlushIntersect& after = intersections.lineIntersect(e, j);\n+ if(!after.intersection.position)\n+ continue;\n+\n+ double tmp_area = before.extra_area + after.extra_area;\n+ if(tmp_area < min_area)\n+ {\n+ min_area = tmp_area;\n+ one.side = e;\n+ one.extra_area = tmp_area;\n+ one.exists = true;\n+ one.flush = true;\n+ }\n+ }\n+ one.done = true;\n+}\n+\n+//! Recursively repeat the search for the middle side of the h-sided chain with minimal extra area to shrink the boundaries of bigger distances. This is the version for h = 1.\n+/*!\n+ * @param i Fixed side of the ngon\n+ * @param j1 Lower boundary of search intervall\n+ * @param j2 Upper boundary of search intervall\n+ */\n+void Chains::middleSideImpl1(int i, int j1, int j2)\n+{\n+ const size_t n = ngon.size();\n+ if((j1 + 1) %n == j2)\n+ {\n+ return;\n+ }\n+\n+ int mid = (j1 < j2 ? ((j1 + j2) / 2) : ((j1 + n + j2) / 2)) % n;\n+ int l = middle_sides[1][i][j1].side < 0 ? (j1 + 1) %n : middle_sides[1][i][j1].side;\n+ int r = middle_sides[1][i][j2].side < 0 ? (j2 - 1 + n) %n : middle_sides[1][i][j2].side;\n+\n+ findMiddleE1(i, mid, l, r);\n+ middleSideImpl1(i, j1, mid);\n+ middleSideImpl1(i, mid, j2);\n+\n+ return;\n+}\n+\n+//! For a combination of two fixed sides of the ngon test all sides between left and right boundary to find the middle side of the h-sided chain with minimal extra area. This is the version for h > 1.\n+/*!\n+ * @param h Length of the h-sided chain\n+ * @param i Index of first fixed side\n+ * @param j Index of second fixed side\n+ * @param l Index of left boundary\n+ * @param r Index of right boundary\n+ */\n+void Chains::findMiddleE(int h, int i, int j, int l, int r)\n+{\n+ const size_t n = ngon.size();\n+ Segment& one = middle_sides[h][i][j];\n+ if (one.done)\n+ return;\n+\n+ const int dist = (i <= j ? (j - i) : (j + n - i));\n+ const int h_floor = (h - 1) / 2;\n+ const int h_ceil = h - 1 - h_floor;\n+\n+ if(dist == 0)\n+ throw std::logic_error(\"\");\n+ if(dist - 1 < h)\n+ {\n+ one.done = true;\n+ return;\n+ }\n+ if(dist - 1 == h)\n+ {\n+ one.side = (i + h_floor + 1) % n;\n+ one.extra_area = 0.0;\n+ one.exists = true;\n+ one.flush = true;\n+ one.done = true;\n+ return;\n+ }\n+\n+ double min_area = std::numeric_limits::max();\n+ for (int e = l; e != r + 1 && e != j; e = (e + 1) %n)\n+ {\n+ const Segment& before = middle_sides[h_floor][i][e];\n+ if (before.extra_area == std::numeric_limits::max())\n+ continue;\n+ const Segment& after = middle_sides[h_ceil][e][j];\n+ if(after.extra_area == std::numeric_limits::max())\n+ continue;\n+\n+ double tmp_area = before.extra_area + after.extra_area;\n+ if(tmp_area < min_area)\n+ {\n+ min_area = tmp_area;\n+ one.side = e;\n+ one.extra_area = tmp_area;\n+ one.exists = true;\n+ one.flush = true;\n+ }\n+ }\n+ one.done = true;\n+}\n+\n+//! Recursively repeat the search for the middle side of the h-sided chain with minimal extra area to shrink the boundaries of bigger distances. This is the version for h > 1.\n+/*!\n+ * @param h Length of the h-sided chain\n+ * @param i Fixed side of the ngon\n+ * @param j1 Lower boundary of search intervall\n+ * @param j2 Upper boundary of search intervall\n+ */\n+void Chains::middleSideImpl(int h, int i, int j1, int j2)\n+{\n+ const size_t n = ngon.size();\n+ if((j1 + 1) %n == j2)\n+ {\n+ return;\n+ }\n+\n+ int mid = (j1 < j2 ? ((j1 + j2) / 2) : ((j1 + n + j2) / 2)) % n;\n+ int l = middle_sides[h][i][j1].side < 0 ? (j1 + 1) %n : middle_sides[h][i][j1].side;\n+ int r = middle_sides[h][i][j2].side < 0 ? (j2 - 1 + n) %n : middle_sides[h][i][j2].side;\n+\n+ findMiddleE(h, i, mid, l, r);\n+ middleSideImpl(h, i, j1, mid);\n+ middleSideImpl(h, i, mid, j2);\n+\n+ return;\n+}\n+\n+//! Calculate the relevant chain lengths. Starting with a maximum chain length of h, down to a chain length of 1, only chains with half the length of the chain before are needed.\n+/*!\n+ * @param h Length of the longest chain (h = k - 3)\n+ */\n+std::set Chains::relevantChainLengths(int h)\n+{\n+ if(h <= 1)\n+ return {h};\n+\n+ std::set hs = {h};\n+ const int h_floor = (h - 1) / 2;\n+ const int h_ceil = h - 1 - h_floor;\n+ std::set h1 = relevantChainLengths(h_floor);\n+ for(const int& hNew : h1)\n+ hs.insert(hNew);\n+ if (h_ceil != h_floor)\n+ {\n+ std::set h2 = relevantChainLengths(h_ceil);\n+ for(const int& hNew : h2)\n+ hs.insert(hNew);\n+ }\n+ return hs;\n+}\n+\n+//! Recursively calculate all the onesided chains for each combination of polygon sides.\n+/*!\n+ */\n+void Chains::calcOneSidedChains()\n+{\n+ const size_t n = ngon.size();\n+ for(size_t i = 0; i < n; i++)\n+ {\n+ int j1 = (i + 2) %n, j2 = (i - 2 + n) %n;\n+\n+ findSingleE(i, j1, (i + 1) %n, (j1 - 1 + n) %n);\n+ findSingleE(i, j2, (i + 1) %n, (j2 - 1 + n) %n);\n+ singleSideImpl(i, j1, j2);\n+ }\n+}\n+\n+//! Recursively calculate the middle sides of the h-sided chains for all combinations of polygon sides.\n+/*!\n+ * @param h Length of the chains\n+ */\n+void Chains::calcMiddleChains(int h)\n+{\n+ const size_t n = ngon.size();\n+ if (h == 0)\n+ {\n+ for (size_t i = 0; i < n; i++)\n+ {\n+ for (size_t j = 0; j < n; j++)\n+ {\n+ Segment& one = middle_sides[h][i][j];\n+ const FlushIntersect itrs = intersections.lineIntersect(i, j);\n+\n+ one.side = -1;\n+ one.extra_area = itrs.extra_area;\n+ one.exists = false;\n+ one.flush = false;\n+ one.done = true;\n+ }\n+ }\n+ return;\n+ }\n+ if (h == 1)\n+ {\n+ for (size_t i = 0; i < n; i++)\n+ {\n+ int j1 = (i + 2) %n, j2 = (i - 2 + n) %n;\n+\n+ findMiddleE1(i, j1, (i + 1) %n, (j1 - 1 + n) %n);\n+ findMiddleE1(i, j2, (i + 1) %n, (j2 - 1 + n) %n);\n+ middleSideImpl1(i, j1, j2);\n+ }\n+ return;\n+ }\n+\n+ for (size_t i = 0; i < n; i++)\n+ {\n+ int j1 = (i + 2) %n, j2 = (i - 2 + n) %n;\n+\n+ findMiddleE(h, i, j1, (i + 1) %n, (j1 - 1 + n) %n);\n+ findMiddleE(h, i, j2, (i + 1) %n, (j2 - 1 + n) %n);\n+ middleSideImpl(h, i, j1, j2);\n+ }\n+}\n+\n+//! Find the i and j with the smallest extra area.\n+/*!\n+ * @param n Number of sides of the input polygon\n+ * @param k Number of sides of the output polygon\n+ */\n+Minimum Chains::minimumArea(int n, int k)\n+{\n+ Minimum min{};\n+ for(int i = 0; i < n; i++)\n+ {\n+ for(int j = 0; j < n; j++)\n+ {\n+ if(!single_sides[i][j].exists || !middle_sides[k - 3][j][i].exists)\n+ continue;\n+\n+ double tmp_area =\n+ single_sides[i][j].extra_area + middle_sides[k - 3][j][i].extra_area;\n+ if(tmp_area < min.area)\n+ {\n+ min.area = tmp_area;\n+ min.i = i;\n+ min.j = j;\n+ }\n+ }\n+ }\n+ return min;\n+}\n+\n+//! Reconstruct the h-sided chain based on the two sides that give the smalles extra area.\n+/*!\n+ * @param h Length of the k - 3 sided chain.\n+ * @param i First index from minimumArea\n+ * @param j Second index from minimumArea\n+ */\n+std::vector Chains::reconstructHSidedChain(int h, int i, int j)\n+{\n+ if(h == 0)\n+ {\n+ throw std::logic_error(\"\");\n+ }\n+ if(h == 1)\n+ {\n+ return std::vector{{middle_sides[h][i][j].side, true}};\n+ }\n+\n+ std::vector before, after;\n+ const int h_floor = (h - 1) / 2;\n+ const int h_ceil = h - 1 - h_floor;\n+ if(h_floor > 0)\n+ before = reconstructHSidedChain(h_floor, i, middle_sides[h][i][j].side);\n+ if(h_ceil > 0)\n+ after = reconstructHSidedChain(h_ceil, middle_sides[h][i][j].side, j);\n+\n+ std::vector sides{{middle_sides[h][i][j].side, true}};\n+ sides.insert(sides.end(), before.begin(), before.end());\n+ sides.insert(sides.end(), after.begin(), after.end());\n+ return sides;\n+}\n+\n+//! Get the k sides that build the kgon based on the two sides that give the smalles extra area.\n+/*!\n+ * @param k Number of sides of the output polygon\n+ * @param i First index from minimumArea\n+ * @param j Second index from minimumArea\n+ */\n+std::vector Chains::findKSides(int k, int i, int j)\n+{\n+ std::vector sides;\n+ sides.push_back({i, true});\n+ sides.push_back({j, true});\n+\n+ if(single_sides[i][j].flush)\n+ sides.push_back({single_sides[i][j].side, true});\n+ else\n+ sides.push_back({single_sides[i][j].side, false});\n+\n+ std::vector flush_chain = reconstructHSidedChain(k - 3, j, i);\n+ sides.insert(sides.end(), flush_chain.begin(), flush_chain.end());\n+ std::sort(sides.begin(), sides.end(),\n+ [](const Side& lhs, const Side& rhs) { return lhs.side < rhs.side; });\n+\n+ return sides;\n+}\n+\n+//! Calculate the k vertices of the kgon from its k sides.\n+/*!\n+ * @param sides Sides of the output polygon\n+ */\n+std::vector Chains::findKVertices(std::vector& sides)\n+{\n+ const int k = sides.size();\n+ std::vector vertices(k);\n+\n+ for(int u = 0; u < k; u++)\n+ {\n+ const int next = (u + 1) % k;\n+ if(sides[u].flush && sides[next].flush)\n+ {\n+ vertices[u] = intersections.lineIntersect(sides[u].side,\n+ sides[next].side).intersection.point;\n+ }\n+ else if(sides[u].flush && !sides[next].flush)\n+ {\n+ vertices[u] = balanced_inters[sides[u].side][sides[(u + 2) % k].side].pi;\n+ }\n+ else if(!sides[u].flush && sides[next].flush)\n+ {\n+ vertices[u] = balanced_inters[sides[(u - 1 + k) %k].side][sides[next].side].pj;\n+ }\n+ else\n+ {\n+ throw std::logic_error(\"\");\n+ }\n+ }\n+ return vertices;\n+}\n+\n+} //namespace\n+\n+\n+///////////////////////// Helper function definitions /////////////////////////\n+\n+namespace minEnclosingConvexPolygon {\n+\n+//! Find the minimum enclosing convex kgon and its area. Converting inputs to used vector shape and data format.\n+/*!\n+ * @param points Set of points\n+ * @param k Number of vertices of the output polygon\n+ * @param minPolygon Minimum area convex k-gon enclosing the given set of points\n+ * @param area Area of minPolygon\n+ */\n+static void findMinEnclosingPolygon(cv::InputArray points,\n+ const int &k,\n+ CV_OUT cv::OutputArray &minPolygon,\n+ CV_OUT double &area) {\n+ CV_Assert(!points.empty());\n+ std::vector ngon, kgon;\n+ cv::convexHull(points, ngon, true);\n+ findMinEnclosingPolygon(ngon, k, kgon, area);\n+ cv::Mat(kgon).copyTo(minPolygon);\n+}\n+\n+//! Find the minimum enclosing convex polygon and its area for a given set of points. Handling of input errors.\n+/*!\n+ * @param ngon The polygon representing the convex hull of the points\n+ * @param k Number of vertices of the output polygon\n+ * @param minPolygon Minimum area convex k-gon enclosing the given polygon\n+ * @param area Area of minPolygon\n+ */\n+static void findMinEnclosingPolygon(const std::vector &ngon,\n+ const int &k,\n+ cv::OutputArray &minPolygon,\n+ double &area) {\n+ try\n+ {\n+ if (ngon.size() < 3)\n+ {\n+ throw std::invalid_argument(\n+ \"ngon must have 3 or more different points enclosing an area\" );\n+ }\n+ if (cv::contourArea(ngon) < EPSILON )\n+ {\n+ throw std::invalid_argument( \"invalid ngon: all points on line\" );\n+ }\n+ if (k <= 3)\n+ {\n+ throw std::invalid_argument( \"k must be 3 or higher\" );\n+ }\n+ const size_t n = ngon.size();\n+ if ((const int)n == k)\n+ {\n+ throw std::runtime_error (\"(n = k)\");\n+ }\n+ if ((const int)n < k)\n+ {\n+ throw std::runtime_error (\"(n < k)\");\n+ }\n+ }\n+ catch (std::invalid_argument &message)\n+ {\n+ std::cout << \"invalid argument: \" << message.what() << std::endl;\n+ return;\n+ }\n+ catch (std::runtime_error &message)\n+ {\n+ std::cout << \"Warning: no minimum area polygon calculated \" << message.what() << std::endl;\n+ cv::Mat(ngon).copyTo(minPolygon);\n+ area = cv::contourArea(minPolygon);\n+ return;\n+ }\n+ catch (...)\n+ {\n+ std::cout << \"input error\" << std::endl;\n+ return;\n+ }\n+\n+ try\n+ {\n+ std::vector kgon;\n+ findMinAreaPolygon(ngon, kgon, area, k);\n+ cv::Mat(kgon).copyTo(minPolygon);\n+ }\n+ catch (...)\n+ {\n+ std::cout << \"correct input but execution failed\" << std::endl;\n+ return;\n+ }", + "comment_created_at": "2025-06-18T07:03:02+00:00", + "comment_author": "asmorkalov", + "comment_body": "Please do not use try-catch. OpenCV uses CV_Assert, CV_Check for the function input validation and and CV_LOG_DEBUG/CV_LOG_INFO for notifications. Function should not silently ignore obvious invalid inputs like not enough points or wrong data types and ranges. ", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2153834792", + "pr_number": 27369, + "pr_file": "modules/imgproc/src/min_enclosing_convex_polygon.cpp", + "created_at": "2025-06-18T07:17:46+00:00", + "commented_code": "/*M///////////////////////////////////////////////////////////////////////////////////////\n //\n // This file is part of OpenCV project.\n // It is subject to the license terms in the LICENSE file found in the top-level directory of this\n // distribution and at http://opencv.org/license.html.\n //\n //\n // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.\n //\n // By downloading, copying, installing or using the software you agree to this license.\n // If you do not agree to this license, do not download, install,\n // copy or use the software.\n //\n // INFORMATION REGARDING THE CONTRIBUTION:\n //\n // Author: Sara Kuhnert\n // Created: 20.05.2025\n // E-mail: \n //\n // THE IMPLEMENTATION OF THE MODULES IS BASED ON THE FOLLOWING PAPER:\n //\n // [1] A. Aggarwal, J. S. Chang, Chee K. Yap: \"Minimum area circumscribing Polygons\", The Visual\n // Computer, 1:112-117, 1985\n //\n // The overall complexity of the algorithm is theta(n^2log(n)log(k)) where \"n\" represents the number\n // of vertices in the input convex polygon and \"k\" the number of vertices in the output polygon.\n //\n //\n //\n // License Agreement\n // For Open Source Computer Vision Library\n //\n // Copyright (C) 2000, Intel Corporation, all rights reserved.\n // Copyright (C) 2013, OpenCV Foundation, all rights reserved.\n // Copyright (C) 2020, Sara Kuhnert, all rights reserved.\n // Third party copyrights are property of their respective owners.\n //\n // Redistribution and use in source and binary forms, with or without modification,\n // are permitted provided that the following conditions are met:\n //\n // * Redistribution's of source code must retain the above copyright notice,\n // this list of conditions and the following disclaimer.\n //\n // * Redistribution's in binary form must reproduce the above copyright notice,\n // this list of conditions and the following disclaimer in the documentation\n // and/or other materials provided with the distribution.\n //\n // * The name of the copyright holders may not be used to endorse or promote products\n // derived from this software without specific prior written permission.\n //\n // This software is provided by the copyright holders and contributors \"as is\" and\n // any express or implied warranties, including, but not limited to, the implied\n // warranties of merchantability and fitness for a particular purpose are disclaimed.\n // In no event shall the Intel Corporation or contributors be liable for any direct,\n // indirect, incidental, special, exemplary, or consequential damages\n // (including, but not limited to, procurement of substitute goods or services;\n // loss of use, data, or profits; or business interruption) however caused\n // and on any theory of liability, whether in contract, strict liability,\n // or tort (including negligence or otherwise) arising in any way out of\n // the use of this software, even if advised of the possibility of such damage.\n //\n //M*/\n\n#include \"precomp.hpp\"\n#include \n\n#include \n#include \n#include \n#include \n#include \n#include \n#include \n\n\n\n//////////////////////////// Constants definitions ////////////////////////////\n\n#define EPSILON 1e-6\n\n\n///////////////////////// Helper function declarations /////////////////////////\n\nnamespace minEnclosingConvexPolygon {\n\nstatic void findMinEnclosingPolygon(cv::InputArray points,\n const int &k,\n CV_OUT cv::OutputArray &kgon,\n CV_OUT double &area);\n\nstatic void findMinEnclosingPolygon(const std::vector &ngon,\n const int &k,\n cv::OutputArray &kgon,\n double &area);\n\nstatic void findMinAreaPolygon(const std::vector &ngon,\n std::vector &minPolygon,\n double &area,\n int k);\n\n} //namespace\n\n\n//////////////////////// Class and struct declarations ////////////////////////\n\nnamespace minEnclosingConvexPolygon {\n\n//! Intersection point of two sides with its position relative to the polygon\n/*!\n * @param point Intersection point of the two sides\n * @param position Is the intersection point a valid solution?\n */\nstruct IntersectionPoint\n{\n cv::Point2f point = {-1.0, -1.0};\n bool position = false;\n\n IntersectionPoint() = default;\n IntersectionPoint(const cv::Point2f& a, const cv::Point2f& b,\n const cv::Point2f& c, const cv::Point2f& d);\n IntersectionPoint(int i, int j, const std::vector& ngon);\n};\n\n//! Container for the information about the intersection point of two flush sides\n/*!\n * @param intersection Instance of IntersectionPoint\n * @param extra_area The area that would be added to the kgon compared to the ngon\n * @param done Set to true once calculated to avoid redundant calculations\n */\nstruct FlushIntersect\n{\n IntersectionPoint intersection = {};\n double extra_area = std::numeric_limits::max();\n bool done = false;\n};\n\n//! Container for the information about a balanced side between two points\n/*!\n * @param pi First intersection point\n * @param pj Second intersection point\n * @param extra_area The area that would be added to the kgon compared to the ngon\n * @param flush Flush is a special case of balanced. If the balanced side is also flush, this indicates which of the sides of the ngon it is flush with. The default for not flush sides is -1.\n * @param position Is the balanced side a valid solution?\n * @param done Set to true once calculated to avoid redundant calculations\n */\nstruct BalancedIntersect\n{\n cv::Point2f pi = {-1, -1};\n cv::Point2f pj = {-1, -1};\n double extra_area = std::numeric_limits::max();\n int flush = -1;\n bool position = false;\n bool done = false;\n};\n\n//! Container for the best segment between two points\n/*!\n * @param extra_area The area that would be added to the kgon compared to the ngon\n * @param side Side of the ngon to which it is flush (if flush is true) or edge of the ngon which it is touching (if flush is false)\n * @param flush True if it is flush. If false, it is balanced but not flush\n * @param exists Does a valid solution exist?\n * @param done Set to true once calculated to avoid redundant calculations\n */\nstruct Segment\n{\n double extra_area = std::numeric_limits::max();\n int side = -1;\n bool flush = false;\n bool exists = false;\n bool done = false;\n};\n\n//! Combination of selected sides and their corresponding chains, which generates the kgon with the minimum area\n/*!\n * @param area Extra area that the minimal kgon is bigger than the input ngon\n * @param i First side\n * @param j Second side\n */\nstruct Minimum\n{\n double area = std::numeric_limits::max();\n int i = -1;\n int j = -1;\n};\n\n//! Container for a side of the minimal kgon\n/*!\n * @param side Index of the side of the ngon which it is flush with or the vertex which it is touching\n * @param flush Is this side flush?\n */\nstruct Side\n{\n int side = -1;\n bool flush = false;\n\n Side() = default;\n Side(int i, bool b);\n};\n\n//! Container for the minimal kgon\n/*!\n * @param sides Indices of the corresponding sindes of the ngon\n * @param vertices Vertices of the kgon\n * @param extra_area Extra area that the minimal kgon is bigger than the input ngon\n * @param i, @param j The chains which build the minimal kgon are formed between those two sides\n */\nstruct Kgon\n{\n std::vector sides;\n std::vector vertices;\n double extra_area = std::numeric_limits::max();\n int i = -1;\n int j = -1;\n};\n\n//! Class for all the possible combinations of flush intersections of two sides of the input polygon\n/*!\n * @param ngon Input polygon\n * @param intersections Matrix of all possible flush intersections of two sides of the ngon\n * @param area_edges Collection of the points that define the extra area of the kgon\n */\nclass FlushIntersections\n{\nprivate:\n const std::vector& ngon;\n std::vector> intersections;\n std::vector area_edges;\n\npublic:\n FlushIntersections(const std::vector& _ngon);\n const FlushIntersect& lineIntersect(int i, int j);\n};\n\n//! Class for all the possible combinations of balanced intersections with two sides of the input polygon\n/*!\n * @param ngon Input polygon\n * @param balanced_intersections atrix of all possible balanced intersections of two sides of the ngon\n * @param area_edges Collection of the points that define the extra area of the kgon\n */\nclass BalancedIntersections\n{\nprivate:\n const std::vector& ngon;\n std::vector> balanced_intersections;\n std::vector area_edges;\n\n double extraArea ( int first, int last, const cv::Point2f& extra1, const cv::Point2f& extra2 );\n\n BalancedIntersect flush(int i, int j, int e);\n\npublic:\n BalancedIntersections(const std::vector& _ngon);\n BalancedIntersect balancedIntersect(int i, int j, int e);\n void markAsDone(int i, int j);\n const std::vector& operator[](size_t i) const;\n};\n\n//! Class for building the one-sided and h-sided chains and calculation the minimum enclosing polygon based on those chains\n/*!\n * @param ngon Input polygon\n * @param single_sides Matrix of the best one-sided chain for each combination of two sides of the ngon\n * @param middle_sides Matrix of the middle side of each h-sided chain for all relevant h and each combination of two sides of the ngon\n * @param intersections An instance of FlushIntersections to store already calculated flush intersections\n * @param balanced_inters An instance of BalancedIntersections to store already calculated balanced intersections\n */\nclass Chains\n{\nprivate:\n const std::vector& ngon;\n std::vector> single_sides;\n std::vector>> middle_sides;\n FlushIntersections intersections;\n BalancedIntersections balanced_inters;\n\n void findSingleE(int i, int j, int l, int r);\n void singleSideImpl(int i, int j1, int j2);\n void findMiddleE1(int i, int j, int l, int r);\n void middleSideImpl1(int i, int j1, int j2);\n void findMiddleE(int h, int i, int j, int l, int r);\n void middleSideImpl(int h, int i, int j1, int j2);\n\npublic:\n Chains(const std::vector& _ngon, int k);\n\n std::set relevantChainLengths(int h);\n void calcOneSidedChains();\n void calcMiddleChains(int h);\n\n Minimum minimumArea(int n, int k);\n std::vector reconstructHSidedChain(int h, int i, int j);\n std::vector findKSides(int k, int i, int j);\n std::vector findKVertices(std::vector& sides);\n\n};\n\n} //namespace\n\n\n//////////////////////// Class and struct constructors ////////////////////////\n\nnamespace minEnclosingConvexPolygon {\n\n//! Constructor for IntersectionPoint. Find the intersection point of two lines given by four points and decide whether it is on the correct side of the polygon\n/*!\n * @param a First point of the first line\n * @param b Second point of the first line\n * @param c First point of the second line\n * @param d Second point of the second line\n */\nIntersectionPoint::IntersectionPoint(const cv::Point2f& a, const cv::Point2f& b,\n const cv::Point2f& c, const cv::Point2f& d)\n{\n const cv::Point2f ab = b - a, cd = d - c, ac = c - a;\n const double det = ab.cross(-cd);\n if(std::abs (det) < EPSILON )\n return;\n\n const double loc = ac.cross(-cd) / det;\n if(loc <= 0)\n return;\n\n point = a + loc * ab;\n position = true;\n}\n\n//! Constructor for IntersectionPoint. Find the intersection point of two sides of the polygon based on the given side indices.\n/*!\n * @param i Index of the first side\n * @param j Index of the second side\n * @param ngon Input polygon with n sides\n */\nIntersectionPoint::IntersectionPoint(int i, int j,\n const std::vector& ngon) :\n IntersectionPoint(ngon[i],\n ngon[(i + 1) % ngon.size()],\n ngon[j],\n ngon[(j + 1) % ngon.size()])\n{}\n\n//! Constructor for FlushIntersections\n/*!\n * @param _ngon Input polygon with n sides\n */\nFlushIntersections::FlushIntersections(const std::vector& _ngon) :\n ngon(_ngon),\n intersections(ngon.size(),\n std::vector(ngon.size()))\n{\n area_edges.reserve(ngon.size());\n}\n\n//! Constructor for BalancedIntersections\n/*!\n * @param _ngon Input polygon with n sides\n */\nBalancedIntersections::BalancedIntersections(const std::vector& _ngon) :\nngon(_ngon),\nbalanced_intersections(ngon.size(),\n std::vector(ngon.size()))\n{\n area_edges.reserve(ngon.size());\n}\n\n//! Constructor for Side. Assign a side index and weather it is flush or not.\n/*!\n * @param i Side index\n * @param b Is side i flush?\n */\nSide::Side(int i, bool b)\n{\n side = i;\n flush = b;\n}\n\n//! Constructor for Chains\n/*!\n * @param\n */\nChains::Chains(const std::vector& _ngon, int k) :\n ngon(_ngon),\n single_sides(std::vector>(ngon.size(),\n std::vector(ngon.size()))),\n middle_sides(std::vector>>(\n k, std::vector>(ngon.size(),\n std::vector(ngon.size())))),\n intersections(ngon),\n balanced_inters(ngon)\n{}\n\n} //namespace\n\n\n////////////////////////// Class and struct functions //////////////////////////\n\nnamespace minEnclosingConvexPolygon {\n\n//! Find the intersection point of two sides, decide weather it is a valid point and if so calculate the extra area caused by that intersection.\n/*!\n * @param i Index of the first side\n * @param j Index of the second side\n */\nconst FlushIntersect& FlushIntersections::lineIntersect(int i, int j)\n{\n FlushIntersect& itr = intersections[i][j];\n if(itr.done)\n return itr;\n\n const size_t n = ngon.size();\n if((i + 1) % n == j)\n {\n itr.intersection.point = ngon[j];\n itr.intersection.position = true;\n itr.extra_area = 0.0;\n itr.done = true;\n return itr;\n }\n itr.intersection = IntersectionPoint(i, j, ngon);\n if(itr.intersection.position)\n {\n area_edges.resize(0);\n for(int t = (i + 1) % n; t != (j + 1) % n; t = (t + 1) % n)\n {\n area_edges.push_back(ngon[t]);\n }\n area_edges.push_back(itr.intersection.point);\n itr.extra_area = cv::contourArea(area_edges);\n itr.done = true;\n }\n else\n {\n itr.extra_area = std::numeric_limits::max();\n itr.intersection.position = false;\n itr.done = true;\n }\n return itr;\n}\n\n//! Calculate the added area that is enclosed by a sequence of consecutive vertices of the polygon and the intersection point of two sides.\n/*!\n * @param first Index of the first point of the sequence\n * @param last Index of the last point of the sequence\n * @param extra1 Last point of the sequence\n * @param extra2 Intersection point\n */\ndouble BalancedIntersections::extraArea(int first, int last,\n const cv::Point2f& extra1,\n const cv::Point2f& extra2)\n{\n const size_t n = ngon.size();\n area_edges.resize(0);\n for(int t = first; t != last; t = (t + 1) % n)\n area_edges.push_back(ngon[t]);\n\n area_edges.push_back(extra1);\n area_edges.push_back(extra2);\n\n return cv::contourArea(area_edges);\n}\n\n//! Determine the intersection points of a flush side e that lies between sides i and j with these two sides. Calculate the extra area and the position of the intersection points relative to the polygon. Update balanced_intersections if the new area is smaller than extraArea.\n/*!\n * @param i Index of first side\n * @param j Index of second side\n * @param e Index of a side between i and j\n */\nBalancedIntersect BalancedIntersections::flush(int i, int j, int e)\n{\n if(i == e)\n std::logic_error(\"\");\n if(j == e)\n std::logic_error(\"\");\n\n const size_t n = ngon.size();\n const int before = (e - 1 + n) % n;\n BalancedIntersect bi = balanced_intersections[i][j];\n\n const IntersectionPoint left_e(i, e, ngon);\n const IntersectionPoint right_e(e, j, ngon);\n\n if(left_e.position == true && right_e.position == true)\n {\n double extra_area = extraArea((i + 1) % n, e, ngon[e], left_e.point);\n if(extra_area < bi.extra_area)\n {\n extra_area += extraArea((e + 1) % n, j, ngon[j], right_e.point);\n if(extra_area < bi.extra_area)\n {\n bi.extra_area = extra_area;\n bi.pi = left_e.point;\n bi.pj = right_e.point;\n bi.position = true;\n bi.flush = e;\n }\n }\n }\n\n if(before != i)\n {\n const IntersectionPoint left_before(i, before, ngon);\n const IntersectionPoint right_before(before, j, ngon);\n if(left_before.position == true && right_before.position == true)\n {\n double extra_area =\n extraArea((i + 1) % n, before, ngon[before], left_before.point);\n\n if(extra_area < bi.extra_area)\n {\n extra_area += extraArea(e, j, ngon[j], right_before.point);\n if(extra_area < bi.extra_area)\n {\n bi.extra_area = extra_area;\n bi.pi = left_before.point;\n bi.pj = right_before.point;\n bi.position = true;\n bi.flush = before;\n }\n }\n }\n }\n return bi;\n}\n\n//! Determine the intersection points of a balanced side e that lies between sides i and j with these two sides. If no valid balanced edge is found, ccheck for flush sides. Calculate the extra area and the position of the intersection points relative to the polygon. Update balanced_intersections if the new area is smaller than extraArea.\n/*!\n * @param i Index of first side\n * @param j Index of second side\n * @param e Index of a side between i and j\n */\nBalancedIntersect BalancedIntersections::balancedIntersect(int i, int j, int e)\n{\n if(balanced_intersections[i][j].done)\n return balanced_intersections[i][j];\n\n const size_t n = ngon.size();\n if((i + 2) % n == j)\n {\n BalancedIntersect& bi = balanced_intersections[i][j];\n bi.pi = ngon[(i + 1) % n];\n bi.pj = ngon[j];\n bi.flush = (i + 1) % n;\n bi.position = true;\n bi.extra_area = 0.0;\n bi.done = true;\n return bi;\n }\n\n const cv::Point2f p1 = ngon[i], p2 = ngon[(i + 1) % n], p3 = ngon[e],\n p4 = ngon[j], p5 = ngon[(j + 1) % n];\n const cv::Point2f dir12 = p2 - p1, dir45 = p5 - p4;\n const double det = dir12.cross(dir45);\n if(std::abs (det) < EPSILON )\n {\n flush(i, j, e);\n return balanced_intersections[i][j];\n }\n\n BalancedIntersect bi;\n cv::Point2f temp = 2 * p3 - p2 - p4;\n const double s = temp.cross(dir45) / det;\n const double t = temp.cross(dir12) / det;\n if(s >= 0 && t >= 0)\n {\n bi.pi = p2 + dir12 * s;\n bi.pj = p4 - dir45 * t;\n bi.position = true;\n\n const cv::Point2f dir_balanced = bi.pj - bi.pi,\n dir_left = p3 - ngon[(e - 1 + n) % n],\n dir_right = ngon[(e + 1) % n] - p3;\n\n const double cross_left = dir_balanced.cross(dir_left),\n cross_right = dir_balanced.cross(dir_right);\n if((cross_left < 0 && cross_right < 0) || (cross_left > 0 && cross_right > 0))\n {\n BalancedIntersect reset;\n bi = reset;\n bi = flush(i, j, e);\n }\n else if(std::abs (cross_left) < EPSILON )\n {\n bi.flush = (e - 1 + n) % n;\n bi.extra_area =\n extraArea((i + 1) % n, (e - 1 + n) % n, ngon[(e - 1 + n) % n], bi.pi)\n + extraArea(e, j, ngon[j], bi.pj);\n }\n else if(std::abs (cross_right) < EPSILON )\n {\n bi.flush = e;\n bi.extra_area = extraArea((i + 1) % n, e, ngon[e], bi.pi)\n + extraArea((e + 1) % n, j, ngon[j], bi.pj);\n }\n else\n {\n bi.extra_area = extraArea((i + 1) % n, e, ngon[e], bi.pi)\n + extraArea(e, j, ngon[j], bi.pj);\n }\n }\n else\n {\n flush(i, j, e);\n }\n\n if(bi.extra_area < balanced_intersections[i][j].extra_area)\n {\n balanced_intersections[i][j] = bi;\n }\n return bi;\n}\n\n//! Set function for the done attribute of BalancedIntersections\n/*!\n * @param i Index of first side\n * @param j Index of second side\n */\nvoid BalancedIntersections::markAsDone(int i, int j)\n{\n balanced_intersections[i][j].done = true;\n}\n\n//! Operator to get a vector of elements from BalancedIntersections\n/*!\n * @param i index of a side\n */\nconst std::vector& BalancedIntersections::operator[](\n size_t i) const\n{\n return balanced_intersections[i];\n}\n\n//! For a combination of two fixed sides of the ngon test all sides between left and right boundary to find the one sided chain with minimal extra area\n/*!\n * @param i Index of first fixed side\n * @param j Index of second fixed side\n * @param l Index of left boundary\n * @param r Index of right boundary\n */\nvoid Chains::findSingleE(int i, int j, int l, int r)\n{\n const size_t n = ngon.size();\n Segment& one = single_sides[i][j];\n if (one.done)\n return;\n\n double min_area = std::numeric_limits::max();\n for (int e = l; e != r + 1 && e != j; e = (e + 1) %n)\n {\n BalancedIntersect candidate = balanced_inters.balancedIntersect(i, j, e);\n if(candidate.extra_area < min_area)\n {\n min_area = candidate.extra_area;\n one.side = e;\n one.extra_area = candidate.extra_area;\n one.flush = false;\n one.exists = true;\n }\n }\n one.done = true;\n balanced_inters.markAsDone(i, j);\n}\n\n//! Recursively repeat the search for the one sided chain with minimal extra area to shrink the boundaries of bigger distances\n/*!\n * @param i Fixed side of the ngon\n * @param j1 Lower boundary of search intervall\n * @param j2 Upper boundary of search intervall\n */\nvoid Chains::singleSideImpl(int i, int j1, int j2)\n{\n const size_t n = ngon.size();\n if((j1 + 1) %n == j2)\n {\n return;\n }\n\n int mid = (j1 < j2 ? ((j1 + j2) / 2) : ((j1 + n + j2) / 2)) % n;\n int l = single_sides[i][j1].side < 0 ? (j1 + 1) %n : single_sides[i][j1].side;\n int r = single_sides[i][j2].side < 0 ? (j2 - 1 + n) %n : single_sides[i][j2].side;\n\n findSingleE(i, mid, l, r);\n singleSideImpl(i, j1, mid);\n singleSideImpl(i, mid, j2);\n\n return;\n}\n\n//! For a combination of two fixed sides of the ngon test all sides between left and right boundary to find the middle side of the h-sided chain with minimal extra area. This is the version for h = 1.\n/*!\n * @param i Index of first fixed side\n * @param j Index of second fixed side\n * @param l Index of left boundary\n * @param r Index of right boundary\n */\nvoid Chains::findMiddleE1(int i, int j, int l, int r)\n{\n const size_t n = ngon.size();\n Segment& one = middle_sides[1][i][j];\n if (one.done)\n return;\n\n double min_area = std::numeric_limits::max();\n for (int e = l; e != r + 1 && e != j; e = (e + 1) %n)\n {\n const FlushIntersect& before = intersections.lineIntersect(i, e);\n if(!before.intersection.position)\n continue;\n const FlushIntersect& after = intersections.lineIntersect(e, j);\n if(!after.intersection.position)\n continue;\n\n double tmp_area = before.extra_area + after.extra_area;\n if(tmp_area < min_area)\n {\n min_area = tmp_area;\n one.side = e;\n one.extra_area = tmp_area;\n one.exists = true;\n one.flush = true;\n }\n }\n one.done = true;\n}\n\n//! Recursively repeat the search for the middle side of the h-sided chain with minimal extra area to shrink the boundaries of bigger distances. This is the version for h = 1.\n/*!\n * @param i Fixed side of the ngon\n * @param j1 Lower boundary of search intervall\n * @param j2 Upper boundary of search intervall\n */\nvoid Chains::middleSideImpl1(int i, int j1, int j2)\n{\n const size_t n = ngon.size();\n if((j1 + 1) %n == j2)\n {\n return;\n }\n\n int mid = (j1 < j2 ? ((j1 + j2) / 2) : ((j1 + n + j2) / 2)) % n;\n int l = middle_sides[1][i][j1].side < 0 ? (j1 + 1) %n : middle_sides[1][i][j1].side;\n int r = middle_sides[1][i][j2].side < 0 ? (j2 - 1 + n) %n : middle_sides[1][i][j2].side;\n\n findMiddleE1(i, mid, l, r);\n middleSideImpl1(i, j1, mid);\n middleSideImpl1(i, mid, j2);\n\n return;\n}\n\n//! For a combination of two fixed sides of the ngon test all sides between left and right boundary to find the middle side of the h-sided chain with minimal extra area. This is the version for h > 1.\n/*!\n * @param h Length of the h-sided chain\n * @param i Index of first fixed side\n * @param j Index of second fixed side\n * @param l Index of left boundary\n * @param r Index of right boundary\n */\nvoid Chains::findMiddleE(int h, int i, int j, int l, int r)\n{\n const size_t n = ngon.size();\n Segment& one = middle_sides[h][i][j];\n if (one.done)\n return;\n\n const int dist = (i <= j ? (j - i) : (j + n - i));\n const int h_floor = (h - 1) / 2;\n const int h_ceil = h - 1 - h_floor;\n\n if(dist == 0)\n throw std::logic_error(\"\");\n if(dist - 1 < h)\n {\n one.done = true;\n return;\n }\n if(dist - 1 == h)\n {\n one.side = (i + h_floor + 1) % n;\n one.extra_area = 0.0;\n one.exists = true;\n one.flush = true;\n one.done = true;\n return;\n }\n\n double min_area = std::numeric_limits::max();\n for (int e = l; e != r + 1 && e != j; e = (e + 1) %n)\n {\n const Segment& before = middle_sides[h_floor][i][e];\n if (before.extra_area == std::numeric_limits::max())\n continue;\n const Segment& after = middle_sides[h_ceil][e][j];\n if(after.extra_area == std::numeric_limits::max())\n continue;\n\n double tmp_area = before.extra_area + after.extra_area;\n if(tmp_area < min_area)\n {\n min_area = tmp_area;\n one.side = e;\n one.extra_area = tmp_area;\n one.exists = true;\n one.flush = true;\n }\n }\n one.done = true;\n}\n\n//! Recursively repeat the search for the middle side of the h-sided chain with minimal extra area to shrink the boundaries of bigger distances. This is the version for h > 1.\n/*!\n * @param h Length of the h-sided chain\n * @param i Fixed side of the ngon\n * @param j1 Lower boundary of search intervall\n * @param j2 Upper boundary of search intervall\n */\nvoid Chains::middleSideImpl(int h, int i, int j1, int j2)\n{\n const size_t n = ngon.size();\n if((j1 + 1) %n == j2)\n {\n return;\n }\n\n int mid = (j1 < j2 ? ((j1 + j2) / 2) : ((j1 + n + j2) / 2)) % n;\n int l = middle_sides[h][i][j1].side < 0 ? (j1 + 1) %n : middle_sides[h][i][j1].side;\n int r = middle_sides[h][i][j2].side < 0 ? (j2 - 1 + n) %n : middle_sides[h][i][j2].side;\n\n findMiddleE(h, i, mid, l, r);\n middleSideImpl(h, i, j1, mid);\n middleSideImpl(h, i, mid, j2);\n\n return;\n}\n\n//! Calculate the relevant chain lengths. Starting with a maximum chain length of h, down to a chain length of 1, only chains with half the length of the chain before are needed.\n/*!\n * @param h Length of the longest chain (h = k - 3)\n */\nstd::set Chains::relevantChainLengths(int h)\n{\n if(h <= 1)\n return {h};\n\n std::set hs = {h};\n const int h_floor = (h - 1) / 2;\n const int h_ceil = h - 1 - h_floor;\n std::set h1 = relevantChainLengths(h_floor);\n for(const int& hNew : h1)\n hs.insert(hNew);\n if (h_ceil != h_floor)\n {\n std::set h2 = relevantChainLengths(h_ceil);\n for(const int& hNew : h2)\n hs.insert(hNew);\n }\n return hs;\n}\n\n//! Recursively calculate all the onesided chains for each combination of polygon sides.\n/*!\n */\nvoid Chains::calcOneSidedChains()\n{\n const size_t n = ngon.size();\n for(size_t i = 0; i < n; i++)\n {\n int j1 = (i + 2) %n, j2 = (i - 2 + n) %n;\n\n findSingleE(i, j1, (i + 1) %n, (j1 - 1 + n) %n);\n findSingleE(i, j2, (i + 1) %n, (j2 - 1 + n) %n);\n singleSideImpl(i, j1, j2);\n }\n}\n\n//! Recursively calculate the middle sides of the h-sided chains for all combinations of polygon sides.\n/*!\n * @param h Length of the chains\n */\nvoid Chains::calcMiddleChains(int h)\n{\n const size_t n = ngon.size();\n if (h == 0)\n {\n for (size_t i = 0; i < n; i++)\n {\n for (size_t j = 0; j < n; j++)\n {\n Segment& one = middle_sides[h][i][j];\n const FlushIntersect itrs = intersections.lineIntersect(i, j);\n\n one.side = -1;\n one.extra_area = itrs.extra_area;\n one.exists = false;\n one.flush = false;\n one.done = true;\n }\n }\n return;\n }\n if (h == 1)\n {\n for (size_t i = 0; i < n; i++)\n {\n int j1 = (i + 2) %n, j2 = (i - 2 + n) %n;\n\n findMiddleE1(i, j1, (i + 1) %n, (j1 - 1 + n) %n);\n findMiddleE1(i, j2, (i + 1) %n, (j2 - 1 + n) %n);\n middleSideImpl1(i, j1, j2);\n }\n return;\n }\n\n for (size_t i = 0; i < n; i++)\n {\n int j1 = (i + 2) %n, j2 = (i - 2 + n) %n;\n\n findMiddleE(h, i, j1, (i + 1) %n, (j1 - 1 + n) %n);\n findMiddleE(h, i, j2, (i + 1) %n, (j2 - 1 + n) %n);\n middleSideImpl(h, i, j1, j2);\n }\n}\n\n//! Find the i and j with the smallest extra area.\n/*!\n * @param n Number of sides of the input polygon\n * @param k Number of sides of the output polygon\n */\nMinimum Chains::minimumArea(int n, int k)\n{\n Minimum min{};\n for(int i = 0; i < n; i++)\n {\n for(int j = 0; j < n; j++)\n {\n if(!single_sides[i][j].exists || !middle_sides[k - 3][j][i].exists)\n continue;\n\n double tmp_area =\n single_sides[i][j].extra_area + middle_sides[k - 3][j][i].extra_area;\n if(tmp_area < min.area)\n {\n min.area = tmp_area;\n min.i = i;\n min.j = j;\n }\n }\n }\n return min;\n}\n\n//! Reconstruct the h-sided chain based on the two sides that give the smalles extra area.\n/*!\n * @param h Length of the k - 3 sided chain.\n * @param i First index from minimumArea\n * @param j Second index from minimumArea\n */\nstd::vector Chains::reconstructHSidedChain(int h, int i, int j)\n{\n if(h == 0)\n {\n throw std::logic_error(\"\");\n }\n if(h == 1)\n {\n return std::vector{{middle_sides[h][i][j].side, true}};\n }\n\n std::vector before, after;\n const int h_floor = (h - 1) / 2;\n const int h_ceil = h - 1 - h_floor;\n if(h_floor > 0)\n before = reconstructHSidedChain(h_floor, i, middle_sides[h][i][j].side);\n if(h_ceil > 0)\n after = reconstructHSidedChain(h_ceil, middle_sides[h][i][j].side, j);\n\n std::vector sides{{middle_sides[h][i][j].side, true}};\n sides.insert(sides.end(), before.begin(), before.end());\n sides.insert(sides.end(), after.begin(), after.end());\n return sides;\n}\n\n//! Get the k sides that build the kgon based on the two sides that give the smalles extra area.\n/*!\n * @param k Number of sides of the output polygon\n * @param i First index from minimumArea\n * @param j Second index from minimumArea\n */\nstd::vector Chains::findKSides(int k, int i, int j)\n{\n std::vector sides;\n sides.push_back({i, true});\n sides.push_back({j, true});\n\n if(single_sides[i][j].flush)\n sides.push_back({single_sides[i][j].side, true});\n else\n sides.push_back({single_sides[i][j].side, false});\n\n std::vector flush_chain = reconstructHSidedChain(k - 3, j, i);\n sides.insert(sides.end(), flush_chain.begin(), flush_chain.end());\n std::sort(sides.begin(), sides.end(),\n [](const Side& lhs, const Side& rhs) { return lhs.side < rhs.side; });\n\n return sides;\n}\n\n//! Calculate the k vertices of the kgon from its k sides.\n/*!\n * @param sides Sides of the output polygon\n */\nstd::vector Chains::findKVertices(std::vector& sides)\n{\n const int k = sides.size();\n std::vector vertices(k);\n\n for(int u = 0; u < k; u++)\n {\n const int next = (u + 1) % k;\n if(sides[u].flush && sides[next].flush)\n {\n vertices[u] = intersections.lineIntersect(sides[u].side,\n sides[next].side).intersection.point;\n }\n else if(sides[u].flush && !sides[next].flush)\n {\n vertices[u] = balanced_inters[sides[u].side][sides[(u + 2) % k].side].pi;\n }\n else if(!sides[u].flush && sides[next].flush)\n {\n vertices[u] = balanced_inters[sides[(u - 1 + k) %k].side][sides[next].side].pj;\n }\n else\n {\n throw std::logic_error(\"\");", + "repo_full_name": "opencv/opencv", + "discussion_comments": [ + { + "comment_id": "2153834792", + "repo_full_name": "opencv/opencv", + "pr_number": 27369, + "pr_file": "modules/imgproc/src/min_enclosing_convex_polygon.cpp", + "discussion_id": "2153834792", + "commented_code": "@@ -0,0 +1,1185 @@\n+/*M///////////////////////////////////////////////////////////////////////////////////////\n+ //\n+ // This file is part of OpenCV project.\n+ // It is subject to the license terms in the LICENSE file found in the top-level directory of this\n+ // distribution and at http://opencv.org/license.html.\n+ //\n+ //\n+ // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.\n+ //\n+ // By downloading, copying, installing or using the software you agree to this license.\n+ // If you do not agree to this license, do not download, install,\n+ // copy or use the software.\n+ //\n+ // INFORMATION REGARDING THE CONTRIBUTION:\n+ //\n+ // Author: Sara Kuhnert\n+ // Created: 20.05.2025\n+ // E-mail: \n+ //\n+ // THE IMPLEMENTATION OF THE MODULES IS BASED ON THE FOLLOWING PAPER:\n+ //\n+ // [1] A. Aggarwal, J. S. Chang, Chee K. Yap: \"Minimum area circumscribing Polygons\", The Visual\n+ // Computer, 1:112-117, 1985\n+ //\n+ // The overall complexity of the algorithm is theta(n^2log(n)log(k)) where \"n\" represents the number\n+ // of vertices in the input convex polygon and \"k\" the number of vertices in the output polygon.\n+ //\n+ //\n+ //\n+ // License Agreement\n+ // For Open Source Computer Vision Library\n+ //\n+ // Copyright (C) 2000, Intel Corporation, all rights reserved.\n+ // Copyright (C) 2013, OpenCV Foundation, all rights reserved.\n+ // Copyright (C) 2020, Sara Kuhnert, all rights reserved.\n+ // Third party copyrights are property of their respective owners.\n+ //\n+ // Redistribution and use in source and binary forms, with or without modification,\n+ // are permitted provided that the following conditions are met:\n+ //\n+ // * Redistribution's of source code must retain the above copyright notice,\n+ // this list of conditions and the following disclaimer.\n+ //\n+ // * Redistribution's in binary form must reproduce the above copyright notice,\n+ // this list of conditions and the following disclaimer in the documentation\n+ // and/or other materials provided with the distribution.\n+ //\n+ // * The name of the copyright holders may not be used to endorse or promote products\n+ // derived from this software without specific prior written permission.\n+ //\n+ // This software is provided by the copyright holders and contributors \"as is\" and\n+ // any express or implied warranties, including, but not limited to, the implied\n+ // warranties of merchantability and fitness for a particular purpose are disclaimed.\n+ // In no event shall the Intel Corporation or contributors be liable for any direct,\n+ // indirect, incidental, special, exemplary, or consequential damages\n+ // (including, but not limited to, procurement of substitute goods or services;\n+ // loss of use, data, or profits; or business interruption) however caused\n+ // and on any theory of liability, whether in contract, strict liability,\n+ // or tort (including negligence or otherwise) arising in any way out of\n+ // the use of this software, even if advised of the possibility of such damage.\n+ //\n+ //M*/\n+\n+#include \"precomp.hpp\"\n+#include \n+\n+#include \n+#include \n+#include \n+#include \n+#include \n+#include \n+#include \n+\n+\n+\n+//////////////////////////// Constants definitions ////////////////////////////\n+\n+#define EPSILON 1e-6\n+\n+\n+///////////////////////// Helper function declarations /////////////////////////\n+\n+namespace minEnclosingConvexPolygon {\n+\n+static void findMinEnclosingPolygon(cv::InputArray points,\n+ const int &k,\n+ CV_OUT cv::OutputArray &kgon,\n+ CV_OUT double &area);\n+\n+static void findMinEnclosingPolygon(const std::vector &ngon,\n+ const int &k,\n+ cv::OutputArray &kgon,\n+ double &area);\n+\n+static void findMinAreaPolygon(const std::vector &ngon,\n+ std::vector &minPolygon,\n+ double &area,\n+ int k);\n+\n+} //namespace\n+\n+\n+//////////////////////// Class and struct declarations ////////////////////////\n+\n+namespace minEnclosingConvexPolygon {\n+\n+//! Intersection point of two sides with its position relative to the polygon\n+/*!\n+ * @param point Intersection point of the two sides\n+ * @param position Is the intersection point a valid solution?\n+ */\n+struct IntersectionPoint\n+{\n+ cv::Point2f point = {-1.0, -1.0};\n+ bool position = false;\n+\n+ IntersectionPoint() = default;\n+ IntersectionPoint(const cv::Point2f& a, const cv::Point2f& b,\n+ const cv::Point2f& c, const cv::Point2f& d);\n+ IntersectionPoint(int i, int j, const std::vector& ngon);\n+};\n+\n+//! Container for the information about the intersection point of two flush sides\n+/*!\n+ * @param intersection Instance of IntersectionPoint\n+ * @param extra_area The area that would be added to the kgon compared to the ngon\n+ * @param done Set to true once calculated to avoid redundant calculations\n+ */\n+struct FlushIntersect\n+{\n+ IntersectionPoint intersection = {};\n+ double extra_area = std::numeric_limits::max();\n+ bool done = false;\n+};\n+\n+//! Container for the information about a balanced side between two points\n+/*!\n+ * @param pi First intersection point\n+ * @param pj Second intersection point\n+ * @param extra_area The area that would be added to the kgon compared to the ngon\n+ * @param flush Flush is a special case of balanced. If the balanced side is also flush, this indicates which of the sides of the ngon it is flush with. The default for not flush sides is -1.\n+ * @param position Is the balanced side a valid solution?\n+ * @param done Set to true once calculated to avoid redundant calculations\n+ */\n+struct BalancedIntersect\n+{\n+ cv::Point2f pi = {-1, -1};\n+ cv::Point2f pj = {-1, -1};\n+ double extra_area = std::numeric_limits::max();\n+ int flush = -1;\n+ bool position = false;\n+ bool done = false;\n+};\n+\n+//! Container for the best segment between two points\n+/*!\n+ * @param extra_area The area that would be added to the kgon compared to the ngon\n+ * @param side Side of the ngon to which it is flush (if flush is true) or edge of the ngon which it is touching (if flush is false)\n+ * @param flush True if it is flush. If false, it is balanced but not flush\n+ * @param exists Does a valid solution exist?\n+ * @param done Set to true once calculated to avoid redundant calculations\n+ */\n+struct Segment\n+{\n+ double extra_area = std::numeric_limits::max();\n+ int side = -1;\n+ bool flush = false;\n+ bool exists = false;\n+ bool done = false;\n+};\n+\n+//! Combination of selected sides and their corresponding chains, which generates the kgon with the minimum area\n+/*!\n+ * @param area Extra area that the minimal kgon is bigger than the input ngon\n+ * @param i First side\n+ * @param j Second side\n+ */\n+struct Minimum\n+{\n+ double area = std::numeric_limits::max();\n+ int i = -1;\n+ int j = -1;\n+};\n+\n+//! Container for a side of the minimal kgon\n+/*!\n+ * @param side Index of the side of the ngon which it is flush with or the vertex which it is touching\n+ * @param flush Is this side flush?\n+ */\n+struct Side\n+{\n+ int side = -1;\n+ bool flush = false;\n+\n+ Side() = default;\n+ Side(int i, bool b);\n+};\n+\n+//! Container for the minimal kgon\n+/*!\n+ * @param sides Indices of the corresponding sindes of the ngon\n+ * @param vertices Vertices of the kgon\n+ * @param extra_area Extra area that the minimal kgon is bigger than the input ngon\n+ * @param i, @param j The chains which build the minimal kgon are formed between those two sides\n+ */\n+struct Kgon\n+{\n+ std::vector sides;\n+ std::vector vertices;\n+ double extra_area = std::numeric_limits::max();\n+ int i = -1;\n+ int j = -1;\n+};\n+\n+//! Class for all the possible combinations of flush intersections of two sides of the input polygon\n+/*!\n+ * @param ngon Input polygon\n+ * @param intersections Matrix of all possible flush intersections of two sides of the ngon\n+ * @param area_edges Collection of the points that define the extra area of the kgon\n+ */\n+class FlushIntersections\n+{\n+private:\n+ const std::vector& ngon;\n+ std::vector> intersections;\n+ std::vector area_edges;\n+\n+public:\n+ FlushIntersections(const std::vector& _ngon);\n+ const FlushIntersect& lineIntersect(int i, int j);\n+};\n+\n+//! Class for all the possible combinations of balanced intersections with two sides of the input polygon\n+/*!\n+ * @param ngon Input polygon\n+ * @param balanced_intersections atrix of all possible balanced intersections of two sides of the ngon\n+ * @param area_edges Collection of the points that define the extra area of the kgon\n+ */\n+class BalancedIntersections\n+{\n+private:\n+ const std::vector& ngon;\n+ std::vector> balanced_intersections;\n+ std::vector area_edges;\n+\n+ double extraArea ( int first, int last, const cv::Point2f& extra1, const cv::Point2f& extra2 );\n+\n+ BalancedIntersect flush(int i, int j, int e);\n+\n+public:\n+ BalancedIntersections(const std::vector& _ngon);\n+ BalancedIntersect balancedIntersect(int i, int j, int e);\n+ void markAsDone(int i, int j);\n+ const std::vector& operator[](size_t i) const;\n+};\n+\n+//! Class for building the one-sided and h-sided chains and calculation the minimum enclosing polygon based on those chains\n+/*!\n+ * @param ngon Input polygon\n+ * @param single_sides Matrix of the best one-sided chain for each combination of two sides of the ngon\n+ * @param middle_sides Matrix of the middle side of each h-sided chain for all relevant h and each combination of two sides of the ngon\n+ * @param intersections An instance of FlushIntersections to store already calculated flush intersections\n+ * @param balanced_inters An instance of BalancedIntersections to store already calculated balanced intersections\n+ */\n+class Chains\n+{\n+private:\n+ const std::vector& ngon;\n+ std::vector> single_sides;\n+ std::vector>> middle_sides;\n+ FlushIntersections intersections;\n+ BalancedIntersections balanced_inters;\n+\n+ void findSingleE(int i, int j, int l, int r);\n+ void singleSideImpl(int i, int j1, int j2);\n+ void findMiddleE1(int i, int j, int l, int r);\n+ void middleSideImpl1(int i, int j1, int j2);\n+ void findMiddleE(int h, int i, int j, int l, int r);\n+ void middleSideImpl(int h, int i, int j1, int j2);\n+\n+public:\n+ Chains(const std::vector& _ngon, int k);\n+\n+ std::set relevantChainLengths(int h);\n+ void calcOneSidedChains();\n+ void calcMiddleChains(int h);\n+\n+ Minimum minimumArea(int n, int k);\n+ std::vector reconstructHSidedChain(int h, int i, int j);\n+ std::vector findKSides(int k, int i, int j);\n+ std::vector findKVertices(std::vector& sides);\n+\n+};\n+\n+} //namespace\n+\n+\n+//////////////////////// Class and struct constructors ////////////////////////\n+\n+namespace minEnclosingConvexPolygon {\n+\n+//! Constructor for IntersectionPoint. Find the intersection point of two lines given by four points and decide whether it is on the correct side of the polygon\n+/*!\n+ * @param a First point of the first line\n+ * @param b Second point of the first line\n+ * @param c First point of the second line\n+ * @param d Second point of the second line\n+ */\n+IntersectionPoint::IntersectionPoint(const cv::Point2f& a, const cv::Point2f& b,\n+ const cv::Point2f& c, const cv::Point2f& d)\n+{\n+ const cv::Point2f ab = b - a, cd = d - c, ac = c - a;\n+ const double det = ab.cross(-cd);\n+ if(std::abs (det) < EPSILON )\n+ return;\n+\n+ const double loc = ac.cross(-cd) / det;\n+ if(loc <= 0)\n+ return;\n+\n+ point = a + loc * ab;\n+ position = true;\n+}\n+\n+//! Constructor for IntersectionPoint. Find the intersection point of two sides of the polygon based on the given side indices.\n+/*!\n+ * @param i Index of the first side\n+ * @param j Index of the second side\n+ * @param ngon Input polygon with n sides\n+ */\n+IntersectionPoint::IntersectionPoint(int i, int j,\n+ const std::vector& ngon) :\n+ IntersectionPoint(ngon[i],\n+ ngon[(i + 1) % ngon.size()],\n+ ngon[j],\n+ ngon[(j + 1) % ngon.size()])\n+{}\n+\n+//! Constructor for FlushIntersections\n+/*!\n+ * @param _ngon Input polygon with n sides\n+ */\n+FlushIntersections::FlushIntersections(const std::vector& _ngon) :\n+ ngon(_ngon),\n+ intersections(ngon.size(),\n+ std::vector(ngon.size()))\n+{\n+ area_edges.reserve(ngon.size());\n+}\n+\n+//! Constructor for BalancedIntersections\n+/*!\n+ * @param _ngon Input polygon with n sides\n+ */\n+BalancedIntersections::BalancedIntersections(const std::vector& _ngon) :\n+ngon(_ngon),\n+balanced_intersections(ngon.size(),\n+ std::vector(ngon.size()))\n+{\n+ area_edges.reserve(ngon.size());\n+}\n+\n+//! Constructor for Side. Assign a side index and weather it is flush or not.\n+/*!\n+ * @param i Side index\n+ * @param b Is side i flush?\n+ */\n+Side::Side(int i, bool b)\n+{\n+ side = i;\n+ flush = b;\n+}\n+\n+//! Constructor for Chains\n+/*!\n+ * @param\n+ */\n+Chains::Chains(const std::vector& _ngon, int k) :\n+ ngon(_ngon),\n+ single_sides(std::vector>(ngon.size(),\n+ std::vector(ngon.size()))),\n+ middle_sides(std::vector>>(\n+ k, std::vector>(ngon.size(),\n+ std::vector(ngon.size())))),\n+ intersections(ngon),\n+ balanced_inters(ngon)\n+{}\n+\n+} //namespace\n+\n+\n+////////////////////////// Class and struct functions //////////////////////////\n+\n+namespace minEnclosingConvexPolygon {\n+\n+//! Find the intersection point of two sides, decide weather it is a valid point and if so calculate the extra area caused by that intersection.\n+/*!\n+ * @param i Index of the first side\n+ * @param j Index of the second side\n+ */\n+const FlushIntersect& FlushIntersections::lineIntersect(int i, int j)\n+{\n+ FlushIntersect& itr = intersections[i][j];\n+ if(itr.done)\n+ return itr;\n+\n+ const size_t n = ngon.size();\n+ if((i + 1) % n == j)\n+ {\n+ itr.intersection.point = ngon[j];\n+ itr.intersection.position = true;\n+ itr.extra_area = 0.0;\n+ itr.done = true;\n+ return itr;\n+ }\n+ itr.intersection = IntersectionPoint(i, j, ngon);\n+ if(itr.intersection.position)\n+ {\n+ area_edges.resize(0);\n+ for(int t = (i + 1) % n; t != (j + 1) % n; t = (t + 1) % n)\n+ {\n+ area_edges.push_back(ngon[t]);\n+ }\n+ area_edges.push_back(itr.intersection.point);\n+ itr.extra_area = cv::contourArea(area_edges);\n+ itr.done = true;\n+ }\n+ else\n+ {\n+ itr.extra_area = std::numeric_limits::max();\n+ itr.intersection.position = false;\n+ itr.done = true;\n+ }\n+ return itr;\n+}\n+\n+//! Calculate the added area that is enclosed by a sequence of consecutive vertices of the polygon and the intersection point of two sides.\n+/*!\n+ * @param first Index of the first point of the sequence\n+ * @param last Index of the last point of the sequence\n+ * @param extra1 Last point of the sequence\n+ * @param extra2 Intersection point\n+ */\n+double BalancedIntersections::extraArea(int first, int last,\n+ const cv::Point2f& extra1,\n+ const cv::Point2f& extra2)\n+{\n+ const size_t n = ngon.size();\n+ area_edges.resize(0);\n+ for(int t = first; t != last; t = (t + 1) % n)\n+ area_edges.push_back(ngon[t]);\n+\n+ area_edges.push_back(extra1);\n+ area_edges.push_back(extra2);\n+\n+ return cv::contourArea(area_edges);\n+}\n+\n+//! Determine the intersection points of a flush side e that lies between sides i and j with these two sides. Calculate the extra area and the position of the intersection points relative to the polygon. Update balanced_intersections if the new area is smaller than extraArea.\n+/*!\n+ * @param i Index of first side\n+ * @param j Index of second side\n+ * @param e Index of a side between i and j\n+ */\n+BalancedIntersect BalancedIntersections::flush(int i, int j, int e)\n+{\n+ if(i == e)\n+ std::logic_error(\"\");\n+ if(j == e)\n+ std::logic_error(\"\");\n+\n+ const size_t n = ngon.size();\n+ const int before = (e - 1 + n) % n;\n+ BalancedIntersect bi = balanced_intersections[i][j];\n+\n+ const IntersectionPoint left_e(i, e, ngon);\n+ const IntersectionPoint right_e(e, j, ngon);\n+\n+ if(left_e.position == true && right_e.position == true)\n+ {\n+ double extra_area = extraArea((i + 1) % n, e, ngon[e], left_e.point);\n+ if(extra_area < bi.extra_area)\n+ {\n+ extra_area += extraArea((e + 1) % n, j, ngon[j], right_e.point);\n+ if(extra_area < bi.extra_area)\n+ {\n+ bi.extra_area = extra_area;\n+ bi.pi = left_e.point;\n+ bi.pj = right_e.point;\n+ bi.position = true;\n+ bi.flush = e;\n+ }\n+ }\n+ }\n+\n+ if(before != i)\n+ {\n+ const IntersectionPoint left_before(i, before, ngon);\n+ const IntersectionPoint right_before(before, j, ngon);\n+ if(left_before.position == true && right_before.position == true)\n+ {\n+ double extra_area =\n+ extraArea((i + 1) % n, before, ngon[before], left_before.point);\n+\n+ if(extra_area < bi.extra_area)\n+ {\n+ extra_area += extraArea(e, j, ngon[j], right_before.point);\n+ if(extra_area < bi.extra_area)\n+ {\n+ bi.extra_area = extra_area;\n+ bi.pi = left_before.point;\n+ bi.pj = right_before.point;\n+ bi.position = true;\n+ bi.flush = before;\n+ }\n+ }\n+ }\n+ }\n+ return bi;\n+}\n+\n+//! Determine the intersection points of a balanced side e that lies between sides i and j with these two sides. If no valid balanced edge is found, ccheck for flush sides. Calculate the extra area and the position of the intersection points relative to the polygon. Update balanced_intersections if the new area is smaller than extraArea.\n+/*!\n+ * @param i Index of first side\n+ * @param j Index of second side\n+ * @param e Index of a side between i and j\n+ */\n+BalancedIntersect BalancedIntersections::balancedIntersect(int i, int j, int e)\n+{\n+ if(balanced_intersections[i][j].done)\n+ return balanced_intersections[i][j];\n+\n+ const size_t n = ngon.size();\n+ if((i + 2) % n == j)\n+ {\n+ BalancedIntersect& bi = balanced_intersections[i][j];\n+ bi.pi = ngon[(i + 1) % n];\n+ bi.pj = ngon[j];\n+ bi.flush = (i + 1) % n;\n+ bi.position = true;\n+ bi.extra_area = 0.0;\n+ bi.done = true;\n+ return bi;\n+ }\n+\n+ const cv::Point2f p1 = ngon[i], p2 = ngon[(i + 1) % n], p3 = ngon[e],\n+ p4 = ngon[j], p5 = ngon[(j + 1) % n];\n+ const cv::Point2f dir12 = p2 - p1, dir45 = p5 - p4;\n+ const double det = dir12.cross(dir45);\n+ if(std::abs (det) < EPSILON )\n+ {\n+ flush(i, j, e);\n+ return balanced_intersections[i][j];\n+ }\n+\n+ BalancedIntersect bi;\n+ cv::Point2f temp = 2 * p3 - p2 - p4;\n+ const double s = temp.cross(dir45) / det;\n+ const double t = temp.cross(dir12) / det;\n+ if(s >= 0 && t >= 0)\n+ {\n+ bi.pi = p2 + dir12 * s;\n+ bi.pj = p4 - dir45 * t;\n+ bi.position = true;\n+\n+ const cv::Point2f dir_balanced = bi.pj - bi.pi,\n+ dir_left = p3 - ngon[(e - 1 + n) % n],\n+ dir_right = ngon[(e + 1) % n] - p3;\n+\n+ const double cross_left = dir_balanced.cross(dir_left),\n+ cross_right = dir_balanced.cross(dir_right);\n+ if((cross_left < 0 && cross_right < 0) || (cross_left > 0 && cross_right > 0))\n+ {\n+ BalancedIntersect reset;\n+ bi = reset;\n+ bi = flush(i, j, e);\n+ }\n+ else if(std::abs (cross_left) < EPSILON )\n+ {\n+ bi.flush = (e - 1 + n) % n;\n+ bi.extra_area =\n+ extraArea((i + 1) % n, (e - 1 + n) % n, ngon[(e - 1 + n) % n], bi.pi)\n+ + extraArea(e, j, ngon[j], bi.pj);\n+ }\n+ else if(std::abs (cross_right) < EPSILON )\n+ {\n+ bi.flush = e;\n+ bi.extra_area = extraArea((i + 1) % n, e, ngon[e], bi.pi)\n+ + extraArea((e + 1) % n, j, ngon[j], bi.pj);\n+ }\n+ else\n+ {\n+ bi.extra_area = extraArea((i + 1) % n, e, ngon[e], bi.pi)\n+ + extraArea(e, j, ngon[j], bi.pj);\n+ }\n+ }\n+ else\n+ {\n+ flush(i, j, e);\n+ }\n+\n+ if(bi.extra_area < balanced_intersections[i][j].extra_area)\n+ {\n+ balanced_intersections[i][j] = bi;\n+ }\n+ return bi;\n+}\n+\n+//! Set function for the done attribute of BalancedIntersections\n+/*!\n+ * @param i Index of first side\n+ * @param j Index of second side\n+ */\n+void BalancedIntersections::markAsDone(int i, int j)\n+{\n+ balanced_intersections[i][j].done = true;\n+}\n+\n+//! Operator to get a vector of elements from BalancedIntersections\n+/*!\n+ * @param i index of a side\n+ */\n+const std::vector& BalancedIntersections::operator[](\n+ size_t i) const\n+{\n+ return balanced_intersections[i];\n+}\n+\n+//! For a combination of two fixed sides of the ngon test all sides between left and right boundary to find the one sided chain with minimal extra area\n+/*!\n+ * @param i Index of first fixed side\n+ * @param j Index of second fixed side\n+ * @param l Index of left boundary\n+ * @param r Index of right boundary\n+ */\n+void Chains::findSingleE(int i, int j, int l, int r)\n+{\n+ const size_t n = ngon.size();\n+ Segment& one = single_sides[i][j];\n+ if (one.done)\n+ return;\n+\n+ double min_area = std::numeric_limits::max();\n+ for (int e = l; e != r + 1 && e != j; e = (e + 1) %n)\n+ {\n+ BalancedIntersect candidate = balanced_inters.balancedIntersect(i, j, e);\n+ if(candidate.extra_area < min_area)\n+ {\n+ min_area = candidate.extra_area;\n+ one.side = e;\n+ one.extra_area = candidate.extra_area;\n+ one.flush = false;\n+ one.exists = true;\n+ }\n+ }\n+ one.done = true;\n+ balanced_inters.markAsDone(i, j);\n+}\n+\n+//! Recursively repeat the search for the one sided chain with minimal extra area to shrink the boundaries of bigger distances\n+/*!\n+ * @param i Fixed side of the ngon\n+ * @param j1 Lower boundary of search intervall\n+ * @param j2 Upper boundary of search intervall\n+ */\n+void Chains::singleSideImpl(int i, int j1, int j2)\n+{\n+ const size_t n = ngon.size();\n+ if((j1 + 1) %n == j2)\n+ {\n+ return;\n+ }\n+\n+ int mid = (j1 < j2 ? ((j1 + j2) / 2) : ((j1 + n + j2) / 2)) % n;\n+ int l = single_sides[i][j1].side < 0 ? (j1 + 1) %n : single_sides[i][j1].side;\n+ int r = single_sides[i][j2].side < 0 ? (j2 - 1 + n) %n : single_sides[i][j2].side;\n+\n+ findSingleE(i, mid, l, r);\n+ singleSideImpl(i, j1, mid);\n+ singleSideImpl(i, mid, j2);\n+\n+ return;\n+}\n+\n+//! For a combination of two fixed sides of the ngon test all sides between left and right boundary to find the middle side of the h-sided chain with minimal extra area. This is the version for h = 1.\n+/*!\n+ * @param i Index of first fixed side\n+ * @param j Index of second fixed side\n+ * @param l Index of left boundary\n+ * @param r Index of right boundary\n+ */\n+void Chains::findMiddleE1(int i, int j, int l, int r)\n+{\n+ const size_t n = ngon.size();\n+ Segment& one = middle_sides[1][i][j];\n+ if (one.done)\n+ return;\n+\n+ double min_area = std::numeric_limits::max();\n+ for (int e = l; e != r + 1 && e != j; e = (e + 1) %n)\n+ {\n+ const FlushIntersect& before = intersections.lineIntersect(i, e);\n+ if(!before.intersection.position)\n+ continue;\n+ const FlushIntersect& after = intersections.lineIntersect(e, j);\n+ if(!after.intersection.position)\n+ continue;\n+\n+ double tmp_area = before.extra_area + after.extra_area;\n+ if(tmp_area < min_area)\n+ {\n+ min_area = tmp_area;\n+ one.side = e;\n+ one.extra_area = tmp_area;\n+ one.exists = true;\n+ one.flush = true;\n+ }\n+ }\n+ one.done = true;\n+}\n+\n+//! Recursively repeat the search for the middle side of the h-sided chain with minimal extra area to shrink the boundaries of bigger distances. This is the version for h = 1.\n+/*!\n+ * @param i Fixed side of the ngon\n+ * @param j1 Lower boundary of search intervall\n+ * @param j2 Upper boundary of search intervall\n+ */\n+void Chains::middleSideImpl1(int i, int j1, int j2)\n+{\n+ const size_t n = ngon.size();\n+ if((j1 + 1) %n == j2)\n+ {\n+ return;\n+ }\n+\n+ int mid = (j1 < j2 ? ((j1 + j2) / 2) : ((j1 + n + j2) / 2)) % n;\n+ int l = middle_sides[1][i][j1].side < 0 ? (j1 + 1) %n : middle_sides[1][i][j1].side;\n+ int r = middle_sides[1][i][j2].side < 0 ? (j2 - 1 + n) %n : middle_sides[1][i][j2].side;\n+\n+ findMiddleE1(i, mid, l, r);\n+ middleSideImpl1(i, j1, mid);\n+ middleSideImpl1(i, mid, j2);\n+\n+ return;\n+}\n+\n+//! For a combination of two fixed sides of the ngon test all sides between left and right boundary to find the middle side of the h-sided chain with minimal extra area. This is the version for h > 1.\n+/*!\n+ * @param h Length of the h-sided chain\n+ * @param i Index of first fixed side\n+ * @param j Index of second fixed side\n+ * @param l Index of left boundary\n+ * @param r Index of right boundary\n+ */\n+void Chains::findMiddleE(int h, int i, int j, int l, int r)\n+{\n+ const size_t n = ngon.size();\n+ Segment& one = middle_sides[h][i][j];\n+ if (one.done)\n+ return;\n+\n+ const int dist = (i <= j ? (j - i) : (j + n - i));\n+ const int h_floor = (h - 1) / 2;\n+ const int h_ceil = h - 1 - h_floor;\n+\n+ if(dist == 0)\n+ throw std::logic_error(\"\");\n+ if(dist - 1 < h)\n+ {\n+ one.done = true;\n+ return;\n+ }\n+ if(dist - 1 == h)\n+ {\n+ one.side = (i + h_floor + 1) % n;\n+ one.extra_area = 0.0;\n+ one.exists = true;\n+ one.flush = true;\n+ one.done = true;\n+ return;\n+ }\n+\n+ double min_area = std::numeric_limits::max();\n+ for (int e = l; e != r + 1 && e != j; e = (e + 1) %n)\n+ {\n+ const Segment& before = middle_sides[h_floor][i][e];\n+ if (before.extra_area == std::numeric_limits::max())\n+ continue;\n+ const Segment& after = middle_sides[h_ceil][e][j];\n+ if(after.extra_area == std::numeric_limits::max())\n+ continue;\n+\n+ double tmp_area = before.extra_area + after.extra_area;\n+ if(tmp_area < min_area)\n+ {\n+ min_area = tmp_area;\n+ one.side = e;\n+ one.extra_area = tmp_area;\n+ one.exists = true;\n+ one.flush = true;\n+ }\n+ }\n+ one.done = true;\n+}\n+\n+//! Recursively repeat the search for the middle side of the h-sided chain with minimal extra area to shrink the boundaries of bigger distances. This is the version for h > 1.\n+/*!\n+ * @param h Length of the h-sided chain\n+ * @param i Fixed side of the ngon\n+ * @param j1 Lower boundary of search intervall\n+ * @param j2 Upper boundary of search intervall\n+ */\n+void Chains::middleSideImpl(int h, int i, int j1, int j2)\n+{\n+ const size_t n = ngon.size();\n+ if((j1 + 1) %n == j2)\n+ {\n+ return;\n+ }\n+\n+ int mid = (j1 < j2 ? ((j1 + j2) / 2) : ((j1 + n + j2) / 2)) % n;\n+ int l = middle_sides[h][i][j1].side < 0 ? (j1 + 1) %n : middle_sides[h][i][j1].side;\n+ int r = middle_sides[h][i][j2].side < 0 ? (j2 - 1 + n) %n : middle_sides[h][i][j2].side;\n+\n+ findMiddleE(h, i, mid, l, r);\n+ middleSideImpl(h, i, j1, mid);\n+ middleSideImpl(h, i, mid, j2);\n+\n+ return;\n+}\n+\n+//! Calculate the relevant chain lengths. Starting with a maximum chain length of h, down to a chain length of 1, only chains with half the length of the chain before are needed.\n+/*!\n+ * @param h Length of the longest chain (h = k - 3)\n+ */\n+std::set Chains::relevantChainLengths(int h)\n+{\n+ if(h <= 1)\n+ return {h};\n+\n+ std::set hs = {h};\n+ const int h_floor = (h - 1) / 2;\n+ const int h_ceil = h - 1 - h_floor;\n+ std::set h1 = relevantChainLengths(h_floor);\n+ for(const int& hNew : h1)\n+ hs.insert(hNew);\n+ if (h_ceil != h_floor)\n+ {\n+ std::set h2 = relevantChainLengths(h_ceil);\n+ for(const int& hNew : h2)\n+ hs.insert(hNew);\n+ }\n+ return hs;\n+}\n+\n+//! Recursively calculate all the onesided chains for each combination of polygon sides.\n+/*!\n+ */\n+void Chains::calcOneSidedChains()\n+{\n+ const size_t n = ngon.size();\n+ for(size_t i = 0; i < n; i++)\n+ {\n+ int j1 = (i + 2) %n, j2 = (i - 2 + n) %n;\n+\n+ findSingleE(i, j1, (i + 1) %n, (j1 - 1 + n) %n);\n+ findSingleE(i, j2, (i + 1) %n, (j2 - 1 + n) %n);\n+ singleSideImpl(i, j1, j2);\n+ }\n+}\n+\n+//! Recursively calculate the middle sides of the h-sided chains for all combinations of polygon sides.\n+/*!\n+ * @param h Length of the chains\n+ */\n+void Chains::calcMiddleChains(int h)\n+{\n+ const size_t n = ngon.size();\n+ if (h == 0)\n+ {\n+ for (size_t i = 0; i < n; i++)\n+ {\n+ for (size_t j = 0; j < n; j++)\n+ {\n+ Segment& one = middle_sides[h][i][j];\n+ const FlushIntersect itrs = intersections.lineIntersect(i, j);\n+\n+ one.side = -1;\n+ one.extra_area = itrs.extra_area;\n+ one.exists = false;\n+ one.flush = false;\n+ one.done = true;\n+ }\n+ }\n+ return;\n+ }\n+ if (h == 1)\n+ {\n+ for (size_t i = 0; i < n; i++)\n+ {\n+ int j1 = (i + 2) %n, j2 = (i - 2 + n) %n;\n+\n+ findMiddleE1(i, j1, (i + 1) %n, (j1 - 1 + n) %n);\n+ findMiddleE1(i, j2, (i + 1) %n, (j2 - 1 + n) %n);\n+ middleSideImpl1(i, j1, j2);\n+ }\n+ return;\n+ }\n+\n+ for (size_t i = 0; i < n; i++)\n+ {\n+ int j1 = (i + 2) %n, j2 = (i - 2 + n) %n;\n+\n+ findMiddleE(h, i, j1, (i + 1) %n, (j1 - 1 + n) %n);\n+ findMiddleE(h, i, j2, (i + 1) %n, (j2 - 1 + n) %n);\n+ middleSideImpl(h, i, j1, j2);\n+ }\n+}\n+\n+//! Find the i and j with the smallest extra area.\n+/*!\n+ * @param n Number of sides of the input polygon\n+ * @param k Number of sides of the output polygon\n+ */\n+Minimum Chains::minimumArea(int n, int k)\n+{\n+ Minimum min{};\n+ for(int i = 0; i < n; i++)\n+ {\n+ for(int j = 0; j < n; j++)\n+ {\n+ if(!single_sides[i][j].exists || !middle_sides[k - 3][j][i].exists)\n+ continue;\n+\n+ double tmp_area =\n+ single_sides[i][j].extra_area + middle_sides[k - 3][j][i].extra_area;\n+ if(tmp_area < min.area)\n+ {\n+ min.area = tmp_area;\n+ min.i = i;\n+ min.j = j;\n+ }\n+ }\n+ }\n+ return min;\n+}\n+\n+//! Reconstruct the h-sided chain based on the two sides that give the smalles extra area.\n+/*!\n+ * @param h Length of the k - 3 sided chain.\n+ * @param i First index from minimumArea\n+ * @param j Second index from minimumArea\n+ */\n+std::vector Chains::reconstructHSidedChain(int h, int i, int j)\n+{\n+ if(h == 0)\n+ {\n+ throw std::logic_error(\"\");\n+ }\n+ if(h == 1)\n+ {\n+ return std::vector{{middle_sides[h][i][j].side, true}};\n+ }\n+\n+ std::vector before, after;\n+ const int h_floor = (h - 1) / 2;\n+ const int h_ceil = h - 1 - h_floor;\n+ if(h_floor > 0)\n+ before = reconstructHSidedChain(h_floor, i, middle_sides[h][i][j].side);\n+ if(h_ceil > 0)\n+ after = reconstructHSidedChain(h_ceil, middle_sides[h][i][j].side, j);\n+\n+ std::vector sides{{middle_sides[h][i][j].side, true}};\n+ sides.insert(sides.end(), before.begin(), before.end());\n+ sides.insert(sides.end(), after.begin(), after.end());\n+ return sides;\n+}\n+\n+//! Get the k sides that build the kgon based on the two sides that give the smalles extra area.\n+/*!\n+ * @param k Number of sides of the output polygon\n+ * @param i First index from minimumArea\n+ * @param j Second index from minimumArea\n+ */\n+std::vector Chains::findKSides(int k, int i, int j)\n+{\n+ std::vector sides;\n+ sides.push_back({i, true});\n+ sides.push_back({j, true});\n+\n+ if(single_sides[i][j].flush)\n+ sides.push_back({single_sides[i][j].side, true});\n+ else\n+ sides.push_back({single_sides[i][j].side, false});\n+\n+ std::vector flush_chain = reconstructHSidedChain(k - 3, j, i);\n+ sides.insert(sides.end(), flush_chain.begin(), flush_chain.end());\n+ std::sort(sides.begin(), sides.end(),\n+ [](const Side& lhs, const Side& rhs) { return lhs.side < rhs.side; });\n+\n+ return sides;\n+}\n+\n+//! Calculate the k vertices of the kgon from its k sides.\n+/*!\n+ * @param sides Sides of the output polygon\n+ */\n+std::vector Chains::findKVertices(std::vector& sides)\n+{\n+ const int k = sides.size();\n+ std::vector vertices(k);\n+\n+ for(int u = 0; u < k; u++)\n+ {\n+ const int next = (u + 1) % k;\n+ if(sides[u].flush && sides[next].flush)\n+ {\n+ vertices[u] = intersections.lineIntersect(sides[u].side,\n+ sides[next].side).intersection.point;\n+ }\n+ else if(sides[u].flush && !sides[next].flush)\n+ {\n+ vertices[u] = balanced_inters[sides[u].side][sides[(u + 2) % k].side].pi;\n+ }\n+ else if(!sides[u].flush && sides[next].flush)\n+ {\n+ vertices[u] = balanced_inters[sides[(u - 1 + k) %k].side][sides[next].side].pj;\n+ }\n+ else\n+ {\n+ throw std::logic_error(\"\");", + "comment_created_at": "2025-06-18T07:17:46+00:00", + "comment_author": "asmorkalov", + "comment_body": "OpenCV uses CV_Assert for such purposes.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2073337340", + "pr_number": 27051, + "pr_file": "modules/photo/src/ccm/utils.cpp", + "created_at": "2025-05-05T12:20:27+00:00", + "commented_code": "namespace cv {\nnamespace ccm {\n\ninline double gammaCorrection_(double element, double gamma)\ninline double gammaOp(double element, double gamma)\n{\n return (element >= 0 ? pow(element, gamma) : -pow((-element), gamma));\n return (element >= 0.0) ? pow(element, gamma) : -pow(-element, gamma);\n}\n\nMat gammaCorrection(const Mat& src, double gamma, Mat dst)\nvoid gammaCorrection(InputArray _src, OutputArray _dst, double gamma)\n{\n return elementWise(src, [gamma](double element) -> double { return gammaCorrection_(element, gamma); }, dst);\n Mat src = _src.getMat();", + "repo_full_name": "opencv/opencv", + "discussion_comments": [ + { + "comment_id": "2073337340", + "repo_full_name": "opencv/opencv", + "pr_number": 27051, + "pr_file": "modules/photo/src/ccm/utils.cpp", + "discussion_id": "2073337340", + "commented_code": "@@ -11,14 +11,23 @@\n namespace cv {\n namespace ccm {\n \n-inline double gammaCorrection_(double element, double gamma)\n+inline double gammaOp(double element, double gamma)\n {\n- return (element >= 0 ? pow(element, gamma) : -pow((-element), gamma));\n+ return (element >= 0.0) ? pow(element, gamma) : -pow(-element, gamma);\n }\n \n-Mat gammaCorrection(const Mat& src, double gamma, Mat dst)\n+void gammaCorrection(InputArray _src, OutputArray _dst, double gamma)\n {\n- return elementWise(src, [gamma](double element) -> double { return gammaCorrection_(element, gamma); }, dst);\n+ Mat src = _src.getMat();", + "comment_created_at": "2025-05-05T12:20:27+00:00", + "comment_author": "asmorkalov", + "comment_body": "please add CV_Assert with input type checks at least. The function is public API now and should handle invalid input correctly.", + "pr_file_module": null + }, + { + "comment_id": "2139828248", + "repo_full_name": "opencv/opencv", + "pr_number": 27051, + "pr_file": "modules/photo/src/ccm/utils.cpp", + "discussion_id": "2073337340", + "commented_code": "@@ -11,14 +11,23 @@\n namespace cv {\n namespace ccm {\n \n-inline double gammaCorrection_(double element, double gamma)\n+inline double gammaOp(double element, double gamma)\n {\n- return (element >= 0 ? pow(element, gamma) : -pow((-element), gamma));\n+ return (element >= 0.0) ? pow(element, gamma) : -pow(-element, gamma);\n }\n \n-Mat gammaCorrection(const Mat& src, double gamma, Mat dst)\n+void gammaCorrection(InputArray _src, OutputArray _dst, double gamma)\n {\n- return elementWise(src, [gamma](double element) -> double { return gammaCorrection_(element, gamma); }, dst);\n+ Mat src = _src.getMat();", + "comment_created_at": "2025-06-11T10:54:35+00:00", + "comment_author": "asmorkalov", + "comment_body": "It's still relevant.", + "pr_file_module": null + }, + { + "comment_id": "2140012048", + "repo_full_name": "opencv/opencv", + "pr_number": 27051, + "pr_file": "modules/photo/src/ccm/utils.cpp", + "discussion_id": "2073337340", + "commented_code": "@@ -11,14 +11,23 @@\n namespace cv {\n namespace ccm {\n \n-inline double gammaCorrection_(double element, double gamma)\n+inline double gammaOp(double element, double gamma)\n {\n- return (element >= 0 ? pow(element, gamma) : -pow((-element), gamma));\n+ return (element >= 0.0) ? pow(element, gamma) : -pow(-element, gamma);\n }\n \n-Mat gammaCorrection(const Mat& src, double gamma, Mat dst)\n+void gammaCorrection(InputArray _src, OutputArray _dst, double gamma)\n {\n- return elementWise(src, [gamma](double element) -> double { return gammaCorrection_(element, gamma); }, dst);\n+ Mat src = _src.getMat();", + "comment_created_at": "2025-06-11T12:31:48+00:00", + "comment_author": "abhishek-gola", + "comment_body": "@asmorkalov CV_ASSERT for gamma and type checks using switch have already been added. Please check the updated file.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2053519772", + "pr_number": 27177, + "pr_file": "modules/dnn/src/llm/gguf_buffer.cpp", + "created_at": "2025-04-22T07:37:55+00:00", + "commented_code": "#include \"../precomp.hpp\"\n#include \"gguf_buffer.hpp\"\n\n\nnamespace cv { namespace dnn {\nCV__DNN_INLINE_NS_BEGIN\n\nGGUFBuffer::GGUFBuffer(const std::string & fileName){\n std::ifstream file(fileName, std::ios::binary | std::ios::ate);\n if (!file.is_open()) {\n throw std::runtime_error(\"Could not open file: \");", + "repo_full_name": "opencv/opencv", + "discussion_comments": [ + { + "comment_id": "2053519772", + "repo_full_name": "opencv/opencv", + "pr_number": 27177, + "pr_file": "modules/dnn/src/llm/gguf_buffer.cpp", + "discussion_id": "2053519772", + "commented_code": "@@ -0,0 +1,161 @@\n+#include \"../precomp.hpp\"\n+#include \"gguf_buffer.hpp\"\n+\n+\n+namespace cv { namespace dnn {\n+CV__DNN_INLINE_NS_BEGIN\n+\n+GGUFBuffer::GGUFBuffer(const std::string & fileName){\n+ std::ifstream file(fileName, std::ios::binary | std::ios::ate);\n+ if (!file.is_open()) {\n+ throw std::runtime_error(\"Could not open file: \");", + "comment_created_at": "2025-04-22T07:37:55+00:00", + "comment_author": "asmorkalov", + "comment_body": "Please use CV_Error, CV_Assert, etc. See https://docs.opencv.org/5.x/db/de0/group__core__utils.html#ga5b48c333c777666e076bd7052799f891", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1936883220", + "pr_number": 26859, + "pr_file": "modules/imgcodecs/src/grfmt_gif.cpp", + "created_at": "2025-01-31T08:49:23+00:00", + "commented_code": "} else {\n cvtColor(img_, img, COLOR_BGRA2BGR);\n }\n } else {\n } else if (img.channels() == 4){\n if (m_use_rgb) {\n cvtColor(img_, img, COLOR_BGRA2RGBA);\n } else {\n img_.copyTo(img);\n }\n } else if (img.channels() == 1){\n cvtColor(img_, img, COLOR_BGRA2GRAY);\n } else {\n CV_LOG_WARNING(NULL, \"Unsupported channels\");", + "repo_full_name": "opencv/opencv", + "discussion_comments": [ + { + "comment_id": "1936883220", + "repo_full_name": "opencv/opencv", + "pr_number": 26859, + "pr_file": "modules/imgcodecs/src/grfmt_gif.cpp", + "discussion_id": "1936883220", + "commented_code": "@@ -172,12 +172,17 @@ bool GifDecoder::readData(Mat &img) {\n } else {\n cvtColor(img_, img, COLOR_BGRA2BGR);\n }\n- } else {\n+ } else if (img.channels() == 4){\n if (m_use_rgb) {\n cvtColor(img_, img, COLOR_BGRA2RGBA);\n } else {\n img_.copyTo(img);\n }\n+ } else if (img.channels() == 1){\n+ cvtColor(img_, img, COLOR_BGRA2GRAY);\n+ } else {\n+ CV_LOG_WARNING(NULL, \"Unsupported channels\");", + "comment_created_at": "2025-01-31T08:49:23+00:00", + "comment_author": "asmorkalov", + "comment_body": "It's error. I propose to include amount of channels to error message in printf style like https://github.com/opencv/opencv/blob/c21d0ad9d08d364542bb4a6eb971ee3051ccba63/modules/imgcodecs/src/grfmt_jpeg.cpp#L771", + "pr_file_module": null + }, + { + "comment_id": "1937124526", + "repo_full_name": "opencv/opencv", + "pr_number": 26859, + "pr_file": "modules/imgcodecs/src/grfmt_gif.cpp", + "discussion_id": "1936883220", + "commented_code": "@@ -172,12 +172,17 @@ bool GifDecoder::readData(Mat &img) {\n } else {\n cvtColor(img_, img, COLOR_BGRA2BGR);\n }\n- } else {\n+ } else if (img.channels() == 4){\n if (m_use_rgb) {\n cvtColor(img_, img, COLOR_BGRA2RGBA);\n } else {\n img_.copyTo(img);\n }\n+ } else if (img.channels() == 1){\n+ cvtColor(img_, img, COLOR_BGRA2GRAY);\n+ } else {\n+ CV_LOG_WARNING(NULL, \"Unsupported channels\");", + "comment_created_at": "2025-01-31T12:03:48+00:00", + "comment_author": "Kumataro", + "comment_body": "Thank you for your comment, I fixed it to show `img.channels()`.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1820140600", + "pr_number": 26379, + "pr_file": "modules/imgcodecs/src/grfmt_jpegxl.cpp", + "created_at": "2024-10-29T06:05:36+00:00", + "commented_code": "/*M///////////////////////////////////////////////////////////////////////////////////////\n//\n// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.\n//\n// By downloading, copying, installing or using the software you agree to this license.\n// If you do not agree to this license, do not download, install,\n// copy or use the software.\n//\n//\n// Intel License Agreement\n// For Open Source Computer Vision Library\n//\n// Copyright (C) 2000, Intel Corporation, all rights reserved.\n// Third party copyrights are property of their respective owners.\n//\n// Redistribution and use in source and binary forms, with or without modification,\n// are permitted provided that the following conditions are met:\n//\n// * Redistribution's of source code must retain the above copyright notice,\n// this list of conditions and the following disclaimer.\n//\n// * Redistribution's in binary form must reproduce the above copyright notice,\n// this list of conditions and the following disclaimer in the documentation\n// and/or other materials provided with the distribution.\n//\n// * The name of Intel Corporation may not be used to endorse or promote products\n// derived from this software without specific prior written permission.\n//\n// This software is provided by the copyright holders and contributors \"as is\" and\n// any express or implied warranties, including, but not limited to, the implied\n// warranties of merchantability and fitness for a particular purpose are disclaimed.\n// In no event shall the Intel Corporation or contributors be liable for any direct,\n// indirect, incidental, special, exemplary, or consequential damages\n// (including, but not limited to, procurement of substitute goods or services;\n// loss of use, data, or profits; or business interruption) however caused\n// and on any theory of liability, whether in contract, strict liability,\n// or tort (including negligence or otherwise) arising in any way out of\n// the use of this software, even if advised of the possibility of such damage.\n//\n//M*/\n\n#include \"precomp.hpp\"\n#include \"grfmt_jpegxl.hpp\"\n\n#ifdef HAVE_JPEGXL\n\n#include \n\nnamespace cv\n{\n\n/////////////////////// JpegXLDecoder ///////////////////\n\nJpegXLDecoder::JpegXLDecoder() : m_f(nullptr, fclose)\n{\n m_signature = \"\\xFF\\x0A\";\n m_decoder = nullptr;\n m_buf_supported = true;\n m_type = -1;\n}\n\nJpegXLDecoder::~JpegXLDecoder()\n{\n close();\n}\n\nvoid JpegXLDecoder::close()\n{\n if (m_decoder)\n m_decoder.release();\n if (m_f)\n m_f.release();\n m_read_buffer = {};\n m_width = m_height = 0;\n m_type = -1;\n}\n\nImageDecoder JpegXLDecoder::newDecoder() const\n{\n return makePtr();\n}\n\nbool JpegXLDecoder::read(Mat* pimg)\n{\n // Open file\n if (!m_f) {\n m_f.reset(fopen(m_filename.c_str(), \"rb\"));\n if (!m_f)\n return false;\n }\n\n // Initialize decoder\n if (!m_decoder) {\n m_decoder = JxlDecoderMake(nullptr);\n if (!m_decoder)\n return false;\n // Subscribe to the basic info event\n JxlDecoderStatus status = JxlDecoderSubscribeEvents(m_decoder.get(), JXL_DEC_BASIC_INFO | JXL_DEC_FULL_IMAGE);\n if (status != JXL_DEC_SUCCESS)\n return false;\n }\n\n // Set up parallel m_parallel_runner\n if (!m_parallel_runner) {\n m_parallel_runner = JxlThreadParallelRunnerMake(nullptr, JxlThreadParallelRunnerDefaultNumWorkerThreads());\n if (JXL_DEC_SUCCESS != JxlDecoderSetParallelRunner(m_decoder.get(),\n JxlThreadParallelRunner,\n m_parallel_runner.get())) {\n return false;\n }\n }\n\n // Create buffer for reading\n const size_t read_buffer_size = 16384; // 16KB chunks\n if (m_read_buffer.capacity() < read_buffer_size)\n m_read_buffer.resize(read_buffer_size);\n\n // Create image if needed\n if (m_type != -1 && pimg) {\n pimg->create(m_height, m_width, m_type);\n if (!pimg->isContinuous())\n return false;\n if (JXL_DEC_SUCCESS != JxlDecoderSetImageOutBuffer(m_decoder.get(), \n &m_format,\n pimg->ptr(),\n pimg->total() * pimg->elemSize())) {\n return false;\n }\n }\n\n // Start decoding loop\n JxlDecoderStatus status = JXL_DEC_NEED_MORE_INPUT;\n do {\n // Check if we need more input\n if (status == JXL_DEC_NEED_MORE_INPUT) {\n size_t remaining = JxlDecoderReleaseInput(m_decoder.get());\n // Move any remaining bytes to the beginning\n if (remaining > 0) {\n memmove(m_read_buffer.data(), m_read_buffer.data() + m_read_buffer.size() - remaining, remaining);\n }\n \n // Read more data from file\n size_t bytes_read = fread(m_read_buffer.data() + remaining, \n 1, m_read_buffer.size() - remaining, m_f.get());\n if (bytes_read == 0) {\n if (ferror(m_f.get())) {\n throw std::runtime_error(\"Error reading input file\");\n }\n // If we reached EOF but decoder needs more input, file is truncated\n if (status == JXL_DEC_NEED_MORE_INPUT) {\n throw std::runtime_error(\"Truncated JXL file\");\n }", + "repo_full_name": "opencv/opencv", + "discussion_comments": [ + { + "comment_id": "1820140600", + "repo_full_name": "opencv/opencv", + "pr_number": 26379, + "pr_file": "modules/imgcodecs/src/grfmt_jpegxl.cpp", + "discussion_id": "1820140600", + "commented_code": "@@ -0,0 +1,376 @@\n+/*M///////////////////////////////////////////////////////////////////////////////////////\n+//\n+// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.\n+//\n+// By downloading, copying, installing or using the software you agree to this license.\n+// If you do not agree to this license, do not download, install,\n+// copy or use the software.\n+//\n+//\n+// Intel License Agreement\n+// For Open Source Computer Vision Library\n+//\n+// Copyright (C) 2000, Intel Corporation, all rights reserved.\n+// Third party copyrights are property of their respective owners.\n+//\n+// Redistribution and use in source and binary forms, with or without modification,\n+// are permitted provided that the following conditions are met:\n+//\n+// * Redistribution's of source code must retain the above copyright notice,\n+// this list of conditions and the following disclaimer.\n+//\n+// * Redistribution's in binary form must reproduce the above copyright notice,\n+// this list of conditions and the following disclaimer in the documentation\n+// and/or other materials provided with the distribution.\n+//\n+// * The name of Intel Corporation may not be used to endorse or promote products\n+// derived from this software without specific prior written permission.\n+//\n+// This software is provided by the copyright holders and contributors \"as is\" and\n+// any express or implied warranties, including, but not limited to, the implied\n+// warranties of merchantability and fitness for a particular purpose are disclaimed.\n+// In no event shall the Intel Corporation or contributors be liable for any direct,\n+// indirect, incidental, special, exemplary, or consequential damages\n+// (including, but not limited to, procurement of substitute goods or services;\n+// loss of use, data, or profits; or business interruption) however caused\n+// and on any theory of liability, whether in contract, strict liability,\n+// or tort (including negligence or otherwise) arising in any way out of\n+// the use of this software, even if advised of the possibility of such damage.\n+//\n+//M*/\n+\n+#include \"precomp.hpp\"\n+#include \"grfmt_jpegxl.hpp\"\n+\n+#ifdef HAVE_JPEGXL\n+\n+#include \n+\n+namespace cv\n+{\n+\n+/////////////////////// JpegXLDecoder ///////////////////\n+\n+JpegXLDecoder::JpegXLDecoder() : m_f(nullptr, fclose)\n+{\n+ m_signature = \"\\xFF\\x0A\";\n+ m_decoder = nullptr;\n+ m_buf_supported = true;\n+ m_type = -1;\n+}\n+\n+JpegXLDecoder::~JpegXLDecoder()\n+{\n+ close();\n+}\n+\n+void JpegXLDecoder::close()\n+{\n+ if (m_decoder)\n+ m_decoder.release();\n+ if (m_f)\n+ m_f.release();\n+ m_read_buffer = {};\n+ m_width = m_height = 0;\n+ m_type = -1;\n+}\n+\n+ImageDecoder JpegXLDecoder::newDecoder() const\n+{\n+ return makePtr();\n+}\n+\n+bool JpegXLDecoder::read(Mat* pimg)\n+{\n+ // Open file\n+ if (!m_f) {\n+ m_f.reset(fopen(m_filename.c_str(), \"rb\"));\n+ if (!m_f)\n+ return false;\n+ }\n+\n+ // Initialize decoder\n+ if (!m_decoder) {\n+ m_decoder = JxlDecoderMake(nullptr);\n+ if (!m_decoder)\n+ return false;\n+ // Subscribe to the basic info event\n+ JxlDecoderStatus status = JxlDecoderSubscribeEvents(m_decoder.get(), JXL_DEC_BASIC_INFO | JXL_DEC_FULL_IMAGE);\n+ if (status != JXL_DEC_SUCCESS)\n+ return false;\n+ }\n+\n+ // Set up parallel m_parallel_runner\n+ if (!m_parallel_runner) {\n+ m_parallel_runner = JxlThreadParallelRunnerMake(nullptr, JxlThreadParallelRunnerDefaultNumWorkerThreads());\n+ if (JXL_DEC_SUCCESS != JxlDecoderSetParallelRunner(m_decoder.get(),\n+ JxlThreadParallelRunner,\n+ m_parallel_runner.get())) {\n+ return false;\n+ }\n+ }\n+\n+ // Create buffer for reading\n+ const size_t read_buffer_size = 16384; // 16KB chunks\n+ if (m_read_buffer.capacity() < read_buffer_size)\n+ m_read_buffer.resize(read_buffer_size);\n+\n+ // Create image if needed\n+ if (m_type != -1 && pimg) {\n+ pimg->create(m_height, m_width, m_type);\n+ if (!pimg->isContinuous())\n+ return false;\n+ if (JXL_DEC_SUCCESS != JxlDecoderSetImageOutBuffer(m_decoder.get(), \n+ &m_format,\n+ pimg->ptr(),\n+ pimg->total() * pimg->elemSize())) {\n+ return false;\n+ }\n+ }\n+\n+ // Start decoding loop\n+ JxlDecoderStatus status = JXL_DEC_NEED_MORE_INPUT;\n+ do {\n+ // Check if we need more input\n+ if (status == JXL_DEC_NEED_MORE_INPUT) {\n+ size_t remaining = JxlDecoderReleaseInput(m_decoder.get());\n+ // Move any remaining bytes to the beginning\n+ if (remaining > 0) {\n+ memmove(m_read_buffer.data(), m_read_buffer.data() + m_read_buffer.size() - remaining, remaining);\n+ }\n+ \n+ // Read more data from file\n+ size_t bytes_read = fread(m_read_buffer.data() + remaining, \n+ 1, m_read_buffer.size() - remaining, m_f.get());\n+ if (bytes_read == 0) {\n+ if (ferror(m_f.get())) {\n+ throw std::runtime_error(\"Error reading input file\");\n+ }\n+ // If we reached EOF but decoder needs more input, file is truncated\n+ if (status == JXL_DEC_NEED_MORE_INPUT) {\n+ throw std::runtime_error(\"Truncated JXL file\");\n+ }", + "comment_created_at": "2024-10-29T06:05:36+00:00", + "comment_author": "asmorkalov", + "comment_body": "OpenCV's `imread` is exception safe. Please use CV_LOG_WARNING and analogs and return satus instead of exception.", + "pr_file_module": null + }, + { + "comment_id": "1820592738", + "repo_full_name": "opencv/opencv", + "pr_number": 26379, + "pr_file": "modules/imgcodecs/src/grfmt_jpegxl.cpp", + "discussion_id": "1820140600", + "commented_code": "@@ -0,0 +1,376 @@\n+/*M///////////////////////////////////////////////////////////////////////////////////////\n+//\n+// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.\n+//\n+// By downloading, copying, installing or using the software you agree to this license.\n+// If you do not agree to this license, do not download, install,\n+// copy or use the software.\n+//\n+//\n+// Intel License Agreement\n+// For Open Source Computer Vision Library\n+//\n+// Copyright (C) 2000, Intel Corporation, all rights reserved.\n+// Third party copyrights are property of their respective owners.\n+//\n+// Redistribution and use in source and binary forms, with or without modification,\n+// are permitted provided that the following conditions are met:\n+//\n+// * Redistribution's of source code must retain the above copyright notice,\n+// this list of conditions and the following disclaimer.\n+//\n+// * Redistribution's in binary form must reproduce the above copyright notice,\n+// this list of conditions and the following disclaimer in the documentation\n+// and/or other materials provided with the distribution.\n+//\n+// * The name of Intel Corporation may not be used to endorse or promote products\n+// derived from this software without specific prior written permission.\n+//\n+// This software is provided by the copyright holders and contributors \"as is\" and\n+// any express or implied warranties, including, but not limited to, the implied\n+// warranties of merchantability and fitness for a particular purpose are disclaimed.\n+// In no event shall the Intel Corporation or contributors be liable for any direct,\n+// indirect, incidental, special, exemplary, or consequential damages\n+// (including, but not limited to, procurement of substitute goods or services;\n+// loss of use, data, or profits; or business interruption) however caused\n+// and on any theory of liability, whether in contract, strict liability,\n+// or tort (including negligence or otherwise) arising in any way out of\n+// the use of this software, even if advised of the possibility of such damage.\n+//\n+//M*/\n+\n+#include \"precomp.hpp\"\n+#include \"grfmt_jpegxl.hpp\"\n+\n+#ifdef HAVE_JPEGXL\n+\n+#include \n+\n+namespace cv\n+{\n+\n+/////////////////////// JpegXLDecoder ///////////////////\n+\n+JpegXLDecoder::JpegXLDecoder() : m_f(nullptr, fclose)\n+{\n+ m_signature = \"\\xFF\\x0A\";\n+ m_decoder = nullptr;\n+ m_buf_supported = true;\n+ m_type = -1;\n+}\n+\n+JpegXLDecoder::~JpegXLDecoder()\n+{\n+ close();\n+}\n+\n+void JpegXLDecoder::close()\n+{\n+ if (m_decoder)\n+ m_decoder.release();\n+ if (m_f)\n+ m_f.release();\n+ m_read_buffer = {};\n+ m_width = m_height = 0;\n+ m_type = -1;\n+}\n+\n+ImageDecoder JpegXLDecoder::newDecoder() const\n+{\n+ return makePtr();\n+}\n+\n+bool JpegXLDecoder::read(Mat* pimg)\n+{\n+ // Open file\n+ if (!m_f) {\n+ m_f.reset(fopen(m_filename.c_str(), \"rb\"));\n+ if (!m_f)\n+ return false;\n+ }\n+\n+ // Initialize decoder\n+ if (!m_decoder) {\n+ m_decoder = JxlDecoderMake(nullptr);\n+ if (!m_decoder)\n+ return false;\n+ // Subscribe to the basic info event\n+ JxlDecoderStatus status = JxlDecoderSubscribeEvents(m_decoder.get(), JXL_DEC_BASIC_INFO | JXL_DEC_FULL_IMAGE);\n+ if (status != JXL_DEC_SUCCESS)\n+ return false;\n+ }\n+\n+ // Set up parallel m_parallel_runner\n+ if (!m_parallel_runner) {\n+ m_parallel_runner = JxlThreadParallelRunnerMake(nullptr, JxlThreadParallelRunnerDefaultNumWorkerThreads());\n+ if (JXL_DEC_SUCCESS != JxlDecoderSetParallelRunner(m_decoder.get(),\n+ JxlThreadParallelRunner,\n+ m_parallel_runner.get())) {\n+ return false;\n+ }\n+ }\n+\n+ // Create buffer for reading\n+ const size_t read_buffer_size = 16384; // 16KB chunks\n+ if (m_read_buffer.capacity() < read_buffer_size)\n+ m_read_buffer.resize(read_buffer_size);\n+\n+ // Create image if needed\n+ if (m_type != -1 && pimg) {\n+ pimg->create(m_height, m_width, m_type);\n+ if (!pimg->isContinuous())\n+ return false;\n+ if (JXL_DEC_SUCCESS != JxlDecoderSetImageOutBuffer(m_decoder.get(), \n+ &m_format,\n+ pimg->ptr(),\n+ pimg->total() * pimg->elemSize())) {\n+ return false;\n+ }\n+ }\n+\n+ // Start decoding loop\n+ JxlDecoderStatus status = JXL_DEC_NEED_MORE_INPUT;\n+ do {\n+ // Check if we need more input\n+ if (status == JXL_DEC_NEED_MORE_INPUT) {\n+ size_t remaining = JxlDecoderReleaseInput(m_decoder.get());\n+ // Move any remaining bytes to the beginning\n+ if (remaining > 0) {\n+ memmove(m_read_buffer.data(), m_read_buffer.data() + m_read_buffer.size() - remaining, remaining);\n+ }\n+ \n+ // Read more data from file\n+ size_t bytes_read = fread(m_read_buffer.data() + remaining, \n+ 1, m_read_buffer.size() - remaining, m_f.get());\n+ if (bytes_read == 0) {\n+ if (ferror(m_f.get())) {\n+ throw std::runtime_error(\"Error reading input file\");\n+ }\n+ // If we reached EOF but decoder needs more input, file is truncated\n+ if (status == JXL_DEC_NEED_MORE_INPUT) {\n+ throw std::runtime_error(\"Truncated JXL file\");\n+ }", + "comment_created_at": "2024-10-29T11:08:45+00:00", + "comment_author": "cdcseacave", + "comment_body": "done", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/opencv-use-opencv-error-mechanisms.md b/_reviewers/opencv-use-opencv-error-mechanisms.md index 4d58915..1d04265 100644 --- a/_reviewers/opencv-use-opencv-error-mechanisms.md +++ b/_reviewers/opencv-use-opencv-error-mechanisms.md @@ -38,187 +38,3 @@ void processImage(InputArray src, OutputArray dst, int param) { return false; } ``` - - -[ - { - "discussion_id": "2153806951", - "pr_number": 27369, - "pr_file": "modules/imgproc/src/min_enclosing_convex_polygon.cpp", - "created_at": "2025-06-18T07:03:02+00:00", - "commented_code": "/*M///////////////////////////////////////////////////////////////////////////////////////\n //\n // This file is part of OpenCV project.\n // It is subject to the license terms in the LICENSE file found in the top-level directory of this\n // distribution and at http://opencv.org/license.html.\n //\n //\n // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.\n //\n // By downloading, copying, installing or using the software you agree to this license.\n // If you do not agree to this license, do not download, install,\n // copy or use the software.\n //\n // INFORMATION REGARDING THE CONTRIBUTION:\n //\n // Author: Sara Kuhnert\n // Created: 20.05.2025\n // E-mail: \n //\n // THE IMPLEMENTATION OF THE MODULES IS BASED ON THE FOLLOWING PAPER:\n //\n // [1] A. Aggarwal, J. S. Chang, Chee K. Yap: \"Minimum area circumscribing Polygons\", The Visual\n // Computer, 1:112-117, 1985\n //\n // The overall complexity of the algorithm is theta(n^2log(n)log(k)) where \"n\" represents the number\n // of vertices in the input convex polygon and \"k\" the number of vertices in the output polygon.\n //\n //\n //\n // License Agreement\n // For Open Source Computer Vision Library\n //\n // Copyright (C) 2000, Intel Corporation, all rights reserved.\n // Copyright (C) 2013, OpenCV Foundation, all rights reserved.\n // Copyright (C) 2020, Sara Kuhnert, all rights reserved.\n // Third party copyrights are property of their respective owners.\n //\n // Redistribution and use in source and binary forms, with or without modification,\n // are permitted provided that the following conditions are met:\n //\n // * Redistribution's of source code must retain the above copyright notice,\n // this list of conditions and the following disclaimer.\n //\n // * Redistribution's in binary form must reproduce the above copyright notice,\n // this list of conditions and the following disclaimer in the documentation\n // and/or other materials provided with the distribution.\n //\n // * The name of the copyright holders may not be used to endorse or promote products\n // derived from this software without specific prior written permission.\n //\n // This software is provided by the copyright holders and contributors \"as is\" and\n // any express or implied warranties, including, but not limited to, the implied\n // warranties of merchantability and fitness for a particular purpose are disclaimed.\n // In no event shall the Intel Corporation or contributors be liable for any direct,\n // indirect, incidental, special, exemplary, or consequential damages\n // (including, but not limited to, procurement of substitute goods or services;\n // loss of use, data, or profits; or business interruption) however caused\n // and on any theory of liability, whether in contract, strict liability,\n // or tort (including negligence or otherwise) arising in any way out of\n // the use of this software, even if advised of the possibility of such damage.\n //\n //M*/\n\n#include \"precomp.hpp\"\n#include \n\n#include \n#include \n#include \n#include \n#include \n#include \n#include \n\n\n\n//////////////////////////// Constants definitions ////////////////////////////\n\n#define EPSILON 1e-6\n\n\n///////////////////////// Helper function declarations /////////////////////////\n\nnamespace minEnclosingConvexPolygon {\n\nstatic void findMinEnclosingPolygon(cv::InputArray points,\n const int &k,\n CV_OUT cv::OutputArray &kgon,\n CV_OUT double &area);\n\nstatic void findMinEnclosingPolygon(const std::vector &ngon,\n const int &k,\n cv::OutputArray &kgon,\n double &area);\n\nstatic void findMinAreaPolygon(const std::vector &ngon,\n std::vector &minPolygon,\n double &area,\n int k);\n\n} //namespace\n\n\n//////////////////////// Class and struct declarations ////////////////////////\n\nnamespace minEnclosingConvexPolygon {\n\n//! Intersection point of two sides with its position relative to the polygon\n/*!\n * @param point Intersection point of the two sides\n * @param position Is the intersection point a valid solution?\n */\nstruct IntersectionPoint\n{\n cv::Point2f point = {-1.0, -1.0};\n bool position = false;\n\n IntersectionPoint() = default;\n IntersectionPoint(const cv::Point2f& a, const cv::Point2f& b,\n const cv::Point2f& c, const cv::Point2f& d);\n IntersectionPoint(int i, int j, const std::vector& ngon);\n};\n\n//! Container for the information about the intersection point of two flush sides\n/*!\n * @param intersection Instance of IntersectionPoint\n * @param extra_area The area that would be added to the kgon compared to the ngon\n * @param done Set to true once calculated to avoid redundant calculations\n */\nstruct FlushIntersect\n{\n IntersectionPoint intersection = {};\n double extra_area = std::numeric_limits::max();\n bool done = false;\n};\n\n//! Container for the information about a balanced side between two points\n/*!\n * @param pi First intersection point\n * @param pj Second intersection point\n * @param extra_area The area that would be added to the kgon compared to the ngon\n * @param flush Flush is a special case of balanced. If the balanced side is also flush, this indicates which of the sides of the ngon it is flush with. The default for not flush sides is -1.\n * @param position Is the balanced side a valid solution?\n * @param done Set to true once calculated to avoid redundant calculations\n */\nstruct BalancedIntersect\n{\n cv::Point2f pi = {-1, -1};\n cv::Point2f pj = {-1, -1};\n double extra_area = std::numeric_limits::max();\n int flush = -1;\n bool position = false;\n bool done = false;\n};\n\n//! Container for the best segment between two points\n/*!\n * @param extra_area The area that would be added to the kgon compared to the ngon\n * @param side Side of the ngon to which it is flush (if flush is true) or edge of the ngon which it is touching (if flush is false)\n * @param flush True if it is flush. If false, it is balanced but not flush\n * @param exists Does a valid solution exist?\n * @param done Set to true once calculated to avoid redundant calculations\n */\nstruct Segment\n{\n double extra_area = std::numeric_limits::max();\n int side = -1;\n bool flush = false;\n bool exists = false;\n bool done = false;\n};\n\n//! Combination of selected sides and their corresponding chains, which generates the kgon with the minimum area\n/*!\n * @param area Extra area that the minimal kgon is bigger than the input ngon\n * @param i First side\n * @param j Second side\n */\nstruct Minimum\n{\n double area = std::numeric_limits::max();\n int i = -1;\n int j = -1;\n};\n\n//! Container for a side of the minimal kgon\n/*!\n * @param side Index of the side of the ngon which it is flush with or the vertex which it is touching\n * @param flush Is this side flush?\n */\nstruct Side\n{\n int side = -1;\n bool flush = false;\n\n Side() = default;\n Side(int i, bool b);\n};\n\n//! Container for the minimal kgon\n/*!\n * @param sides Indices of the corresponding sindes of the ngon\n * @param vertices Vertices of the kgon\n * @param extra_area Extra area that the minimal kgon is bigger than the input ngon\n * @param i, @param j The chains which build the minimal kgon are formed between those two sides\n */\nstruct Kgon\n{\n std::vector sides;\n std::vector vertices;\n double extra_area = std::numeric_limits::max();\n int i = -1;\n int j = -1;\n};\n\n//! Class for all the possible combinations of flush intersections of two sides of the input polygon\n/*!\n * @param ngon Input polygon\n * @param intersections Matrix of all possible flush intersections of two sides of the ngon\n * @param area_edges Collection of the points that define the extra area of the kgon\n */\nclass FlushIntersections\n{\nprivate:\n const std::vector& ngon;\n std::vector> intersections;\n std::vector area_edges;\n\npublic:\n FlushIntersections(const std::vector& _ngon);\n const FlushIntersect& lineIntersect(int i, int j);\n};\n\n//! Class for all the possible combinations of balanced intersections with two sides of the input polygon\n/*!\n * @param ngon Input polygon\n * @param balanced_intersections atrix of all possible balanced intersections of two sides of the ngon\n * @param area_edges Collection of the points that define the extra area of the kgon\n */\nclass BalancedIntersections\n{\nprivate:\n const std::vector& ngon;\n std::vector> balanced_intersections;\n std::vector area_edges;\n\n double extraArea ( int first, int last, const cv::Point2f& extra1, const cv::Point2f& extra2 );\n\n BalancedIntersect flush(int i, int j, int e);\n\npublic:\n BalancedIntersections(const std::vector& _ngon);\n BalancedIntersect balancedIntersect(int i, int j, int e);\n void markAsDone(int i, int j);\n const std::vector& operator[](size_t i) const;\n};\n\n//! Class for building the one-sided and h-sided chains and calculation the minimum enclosing polygon based on those chains\n/*!\n * @param ngon Input polygon\n * @param single_sides Matrix of the best one-sided chain for each combination of two sides of the ngon\n * @param middle_sides Matrix of the middle side of each h-sided chain for all relevant h and each combination of two sides of the ngon\n * @param intersections An instance of FlushIntersections to store already calculated flush intersections\n * @param balanced_inters An instance of BalancedIntersections to store already calculated balanced intersections\n */\nclass Chains\n{\nprivate:\n const std::vector& ngon;\n std::vector> single_sides;\n std::vector>> middle_sides;\n FlushIntersections intersections;\n BalancedIntersections balanced_inters;\n\n void findSingleE(int i, int j, int l, int r);\n void singleSideImpl(int i, int j1, int j2);\n void findMiddleE1(int i, int j, int l, int r);\n void middleSideImpl1(int i, int j1, int j2);\n void findMiddleE(int h, int i, int j, int l, int r);\n void middleSideImpl(int h, int i, int j1, int j2);\n\npublic:\n Chains(const std::vector& _ngon, int k);\n\n std::set relevantChainLengths(int h);\n void calcOneSidedChains();\n void calcMiddleChains(int h);\n\n Minimum minimumArea(int n, int k);\n std::vector reconstructHSidedChain(int h, int i, int j);\n std::vector findKSides(int k, int i, int j);\n std::vector findKVertices(std::vector& sides);\n\n};\n\n} //namespace\n\n\n//////////////////////// Class and struct constructors ////////////////////////\n\nnamespace minEnclosingConvexPolygon {\n\n//! Constructor for IntersectionPoint. Find the intersection point of two lines given by four points and decide whether it is on the correct side of the polygon\n/*!\n * @param a First point of the first line\n * @param b Second point of the first line\n * @param c First point of the second line\n * @param d Second point of the second line\n */\nIntersectionPoint::IntersectionPoint(const cv::Point2f& a, const cv::Point2f& b,\n const cv::Point2f& c, const cv::Point2f& d)\n{\n const cv::Point2f ab = b - a, cd = d - c, ac = c - a;\n const double det = ab.cross(-cd);\n if(std::abs (det) < EPSILON )\n return;\n\n const double loc = ac.cross(-cd) / det;\n if(loc <= 0)\n return;\n\n point = a + loc * ab;\n position = true;\n}\n\n//! Constructor for IntersectionPoint. Find the intersection point of two sides of the polygon based on the given side indices.\n/*!\n * @param i Index of the first side\n * @param j Index of the second side\n * @param ngon Input polygon with n sides\n */\nIntersectionPoint::IntersectionPoint(int i, int j,\n const std::vector& ngon) :\n IntersectionPoint(ngon[i],\n ngon[(i + 1) % ngon.size()],\n ngon[j],\n ngon[(j + 1) % ngon.size()])\n{}\n\n//! Constructor for FlushIntersections\n/*!\n * @param _ngon Input polygon with n sides\n */\nFlushIntersections::FlushIntersections(const std::vector& _ngon) :\n ngon(_ngon),\n intersections(ngon.size(),\n std::vector(ngon.size()))\n{\n area_edges.reserve(ngon.size());\n}\n\n//! Constructor for BalancedIntersections\n/*!\n * @param _ngon Input polygon with n sides\n */\nBalancedIntersections::BalancedIntersections(const std::vector& _ngon) :\nngon(_ngon),\nbalanced_intersections(ngon.size(),\n std::vector(ngon.size()))\n{\n area_edges.reserve(ngon.size());\n}\n\n//! Constructor for Side. Assign a side index and weather it is flush or not.\n/*!\n * @param i Side index\n * @param b Is side i flush?\n */\nSide::Side(int i, bool b)\n{\n side = i;\n flush = b;\n}\n\n//! Constructor for Chains\n/*!\n * @param\n */\nChains::Chains(const std::vector& _ngon, int k) :\n ngon(_ngon),\n single_sides(std::vector>(ngon.size(),\n std::vector(ngon.size()))),\n middle_sides(std::vector>>(\n k, std::vector>(ngon.size(),\n std::vector(ngon.size())))),\n intersections(ngon),\n balanced_inters(ngon)\n{}\n\n} //namespace\n\n\n////////////////////////// Class and struct functions //////////////////////////\n\nnamespace minEnclosingConvexPolygon {\n\n//! Find the intersection point of two sides, decide weather it is a valid point and if so calculate the extra area caused by that intersection.\n/*!\n * @param i Index of the first side\n * @param j Index of the second side\n */\nconst FlushIntersect& FlushIntersections::lineIntersect(int i, int j)\n{\n FlushIntersect& itr = intersections[i][j];\n if(itr.done)\n return itr;\n\n const size_t n = ngon.size();\n if((i + 1) % n == j)\n {\n itr.intersection.point = ngon[j];\n itr.intersection.position = true;\n itr.extra_area = 0.0;\n itr.done = true;\n return itr;\n }\n itr.intersection = IntersectionPoint(i, j, ngon);\n if(itr.intersection.position)\n {\n area_edges.resize(0);\n for(int t = (i + 1) % n; t != (j + 1) % n; t = (t + 1) % n)\n {\n area_edges.push_back(ngon[t]);\n }\n area_edges.push_back(itr.intersection.point);\n itr.extra_area = cv::contourArea(area_edges);\n itr.done = true;\n }\n else\n {\n itr.extra_area = std::numeric_limits::max();\n itr.intersection.position = false;\n itr.done = true;\n }\n return itr;\n}\n\n//! Calculate the added area that is enclosed by a sequence of consecutive vertices of the polygon and the intersection point of two sides.\n/*!\n * @param first Index of the first point of the sequence\n * @param last Index of the last point of the sequence\n * @param extra1 Last point of the sequence\n * @param extra2 Intersection point\n */\ndouble BalancedIntersections::extraArea(int first, int last,\n const cv::Point2f& extra1,\n const cv::Point2f& extra2)\n{\n const size_t n = ngon.size();\n area_edges.resize(0);\n for(int t = first; t != last; t = (t + 1) % n)\n area_edges.push_back(ngon[t]);\n\n area_edges.push_back(extra1);\n area_edges.push_back(extra2);\n\n return cv::contourArea(area_edges);\n}\n\n//! Determine the intersection points of a flush side e that lies between sides i and j with these two sides. Calculate the extra area and the position of the intersection points relative to the polygon. Update balanced_intersections if the new area is smaller than extraArea.\n/*!\n * @param i Index of first side\n * @param j Index of second side\n * @param e Index of a side between i and j\n */\nBalancedIntersect BalancedIntersections::flush(int i, int j, int e)\n{\n if(i == e)\n std::logic_error(\"\");\n if(j == e)\n std::logic_error(\"\");\n\n const size_t n = ngon.size();\n const int before = (e - 1 + n) % n;\n BalancedIntersect bi = balanced_intersections[i][j];\n\n const IntersectionPoint left_e(i, e, ngon);\n const IntersectionPoint right_e(e, j, ngon);\n\n if(left_e.position == true && right_e.position == true)\n {\n double extra_area = extraArea((i + 1) % n, e, ngon[e], left_e.point);\n if(extra_area < bi.extra_area)\n {\n extra_area += extraArea((e + 1) % n, j, ngon[j], right_e.point);\n if(extra_area < bi.extra_area)\n {\n bi.extra_area = extra_area;\n bi.pi = left_e.point;\n bi.pj = right_e.point;\n bi.position = true;\n bi.flush = e;\n }\n }\n }\n\n if(before != i)\n {\n const IntersectionPoint left_before(i, before, ngon);\n const IntersectionPoint right_before(before, j, ngon);\n if(left_before.position == true && right_before.position == true)\n {\n double extra_area =\n extraArea((i + 1) % n, before, ngon[before], left_before.point);\n\n if(extra_area < bi.extra_area)\n {\n extra_area += extraArea(e, j, ngon[j], right_before.point);\n if(extra_area < bi.extra_area)\n {\n bi.extra_area = extra_area;\n bi.pi = left_before.point;\n bi.pj = right_before.point;\n bi.position = true;\n bi.flush = before;\n }\n }\n }\n }\n return bi;\n}\n\n//! Determine the intersection points of a balanced side e that lies between sides i and j with these two sides. If no valid balanced edge is found, ccheck for flush sides. Calculate the extra area and the position of the intersection points relative to the polygon. Update balanced_intersections if the new area is smaller than extraArea.\n/*!\n * @param i Index of first side\n * @param j Index of second side\n * @param e Index of a side between i and j\n */\nBalancedIntersect BalancedIntersections::balancedIntersect(int i, int j, int e)\n{\n if(balanced_intersections[i][j].done)\n return balanced_intersections[i][j];\n\n const size_t n = ngon.size();\n if((i + 2) % n == j)\n {\n BalancedIntersect& bi = balanced_intersections[i][j];\n bi.pi = ngon[(i + 1) % n];\n bi.pj = ngon[j];\n bi.flush = (i + 1) % n;\n bi.position = true;\n bi.extra_area = 0.0;\n bi.done = true;\n return bi;\n }\n\n const cv::Point2f p1 = ngon[i], p2 = ngon[(i + 1) % n], p3 = ngon[e],\n p4 = ngon[j], p5 = ngon[(j + 1) % n];\n const cv::Point2f dir12 = p2 - p1, dir45 = p5 - p4;\n const double det = dir12.cross(dir45);\n if(std::abs (det) < EPSILON )\n {\n flush(i, j, e);\n return balanced_intersections[i][j];\n }\n\n BalancedIntersect bi;\n cv::Point2f temp = 2 * p3 - p2 - p4;\n const double s = temp.cross(dir45) / det;\n const double t = temp.cross(dir12) / det;\n if(s >= 0 && t >= 0)\n {\n bi.pi = p2 + dir12 * s;\n bi.pj = p4 - dir45 * t;\n bi.position = true;\n\n const cv::Point2f dir_balanced = bi.pj - bi.pi,\n dir_left = p3 - ngon[(e - 1 + n) % n],\n dir_right = ngon[(e + 1) % n] - p3;\n\n const double cross_left = dir_balanced.cross(dir_left),\n cross_right = dir_balanced.cross(dir_right);\n if((cross_left < 0 && cross_right < 0) || (cross_left > 0 && cross_right > 0))\n {\n BalancedIntersect reset;\n bi = reset;\n bi = flush(i, j, e);\n }\n else if(std::abs (cross_left) < EPSILON )\n {\n bi.flush = (e - 1 + n) % n;\n bi.extra_area =\n extraArea((i + 1) % n, (e - 1 + n) % n, ngon[(e - 1 + n) % n], bi.pi)\n + extraArea(e, j, ngon[j], bi.pj);\n }\n else if(std::abs (cross_right) < EPSILON )\n {\n bi.flush = e;\n bi.extra_area = extraArea((i + 1) % n, e, ngon[e], bi.pi)\n + extraArea((e + 1) % n, j, ngon[j], bi.pj);\n }\n else\n {\n bi.extra_area = extraArea((i + 1) % n, e, ngon[e], bi.pi)\n + extraArea(e, j, ngon[j], bi.pj);\n }\n }\n else\n {\n flush(i, j, e);\n }\n\n if(bi.extra_area < balanced_intersections[i][j].extra_area)\n {\n balanced_intersections[i][j] = bi;\n }\n return bi;\n}\n\n//! Set function for the done attribute of BalancedIntersections\n/*!\n * @param i Index of first side\n * @param j Index of second side\n */\nvoid BalancedIntersections::markAsDone(int i, int j)\n{\n balanced_intersections[i][j].done = true;\n}\n\n//! Operator to get a vector of elements from BalancedIntersections\n/*!\n * @param i index of a side\n */\nconst std::vector& BalancedIntersections::operator[](\n size_t i) const\n{\n return balanced_intersections[i];\n}\n\n//! For a combination of two fixed sides of the ngon test all sides between left and right boundary to find the one sided chain with minimal extra area\n/*!\n * @param i Index of first fixed side\n * @param j Index of second fixed side\n * @param l Index of left boundary\n * @param r Index of right boundary\n */\nvoid Chains::findSingleE(int i, int j, int l, int r)\n{\n const size_t n = ngon.size();\n Segment& one = single_sides[i][j];\n if (one.done)\n return;\n\n double min_area = std::numeric_limits::max();\n for (int e = l; e != r + 1 && e != j; e = (e + 1) %n)\n {\n BalancedIntersect candidate = balanced_inters.balancedIntersect(i, j, e);\n if(candidate.extra_area < min_area)\n {\n min_area = candidate.extra_area;\n one.side = e;\n one.extra_area = candidate.extra_area;\n one.flush = false;\n one.exists = true;\n }\n }\n one.done = true;\n balanced_inters.markAsDone(i, j);\n}\n\n//! Recursively repeat the search for the one sided chain with minimal extra area to shrink the boundaries of bigger distances\n/*!\n * @param i Fixed side of the ngon\n * @param j1 Lower boundary of search intervall\n * @param j2 Upper boundary of search intervall\n */\nvoid Chains::singleSideImpl(int i, int j1, int j2)\n{\n const size_t n = ngon.size();\n if((j1 + 1) %n == j2)\n {\n return;\n }\n\n int mid = (j1 < j2 ? ((j1 + j2) / 2) : ((j1 + n + j2) / 2)) % n;\n int l = single_sides[i][j1].side < 0 ? (j1 + 1) %n : single_sides[i][j1].side;\n int r = single_sides[i][j2].side < 0 ? (j2 - 1 + n) %n : single_sides[i][j2].side;\n\n findSingleE(i, mid, l, r);\n singleSideImpl(i, j1, mid);\n singleSideImpl(i, mid, j2);\n\n return;\n}\n\n//! For a combination of two fixed sides of the ngon test all sides between left and right boundary to find the middle side of the h-sided chain with minimal extra area. This is the version for h = 1.\n/*!\n * @param i Index of first fixed side\n * @param j Index of second fixed side\n * @param l Index of left boundary\n * @param r Index of right boundary\n */\nvoid Chains::findMiddleE1(int i, int j, int l, int r)\n{\n const size_t n = ngon.size();\n Segment& one = middle_sides[1][i][j];\n if (one.done)\n return;\n\n double min_area = std::numeric_limits::max();\n for (int e = l; e != r + 1 && e != j; e = (e + 1) %n)\n {\n const FlushIntersect& before = intersections.lineIntersect(i, e);\n if(!before.intersection.position)\n continue;\n const FlushIntersect& after = intersections.lineIntersect(e, j);\n if(!after.intersection.position)\n continue;\n\n double tmp_area = before.extra_area + after.extra_area;\n if(tmp_area < min_area)\n {\n min_area = tmp_area;\n one.side = e;\n one.extra_area = tmp_area;\n one.exists = true;\n one.flush = true;\n }\n }\n one.done = true;\n}\n\n//! Recursively repeat the search for the middle side of the h-sided chain with minimal extra area to shrink the boundaries of bigger distances. This is the version for h = 1.\n/*!\n * @param i Fixed side of the ngon\n * @param j1 Lower boundary of search intervall\n * @param j2 Upper boundary of search intervall\n */\nvoid Chains::middleSideImpl1(int i, int j1, int j2)\n{\n const size_t n = ngon.size();\n if((j1 + 1) %n == j2)\n {\n return;\n }\n\n int mid = (j1 < j2 ? ((j1 + j2) / 2) : ((j1 + n + j2) / 2)) % n;\n int l = middle_sides[1][i][j1].side < 0 ? (j1 + 1) %n : middle_sides[1][i][j1].side;\n int r = middle_sides[1][i][j2].side < 0 ? (j2 - 1 + n) %n : middle_sides[1][i][j2].side;\n\n findMiddleE1(i, mid, l, r);\n middleSideImpl1(i, j1, mid);\n middleSideImpl1(i, mid, j2);\n\n return;\n}\n\n//! For a combination of two fixed sides of the ngon test all sides between left and right boundary to find the middle side of the h-sided chain with minimal extra area. This is the version for h > 1.\n/*!\n * @param h Length of the h-sided chain\n * @param i Index of first fixed side\n * @param j Index of second fixed side\n * @param l Index of left boundary\n * @param r Index of right boundary\n */\nvoid Chains::findMiddleE(int h, int i, int j, int l, int r)\n{\n const size_t n = ngon.size();\n Segment& one = middle_sides[h][i][j];\n if (one.done)\n return;\n\n const int dist = (i <= j ? (j - i) : (j + n - i));\n const int h_floor = (h - 1) / 2;\n const int h_ceil = h - 1 - h_floor;\n\n if(dist == 0)\n throw std::logic_error(\"\");\n if(dist - 1 < h)\n {\n one.done = true;\n return;\n }\n if(dist - 1 == h)\n {\n one.side = (i + h_floor + 1) % n;\n one.extra_area = 0.0;\n one.exists = true;\n one.flush = true;\n one.done = true;\n return;\n }\n\n double min_area = std::numeric_limits::max();\n for (int e = l; e != r + 1 && e != j; e = (e + 1) %n)\n {\n const Segment& before = middle_sides[h_floor][i][e];\n if (before.extra_area == std::numeric_limits::max())\n continue;\n const Segment& after = middle_sides[h_ceil][e][j];\n if(after.extra_area == std::numeric_limits::max())\n continue;\n\n double tmp_area = before.extra_area + after.extra_area;\n if(tmp_area < min_area)\n {\n min_area = tmp_area;\n one.side = e;\n one.extra_area = tmp_area;\n one.exists = true;\n one.flush = true;\n }\n }\n one.done = true;\n}\n\n//! Recursively repeat the search for the middle side of the h-sided chain with minimal extra area to shrink the boundaries of bigger distances. This is the version for h > 1.\n/*!\n * @param h Length of the h-sided chain\n * @param i Fixed side of the ngon\n * @param j1 Lower boundary of search intervall\n * @param j2 Upper boundary of search intervall\n */\nvoid Chains::middleSideImpl(int h, int i, int j1, int j2)\n{\n const size_t n = ngon.size();\n if((j1 + 1) %n == j2)\n {\n return;\n }\n\n int mid = (j1 < j2 ? ((j1 + j2) / 2) : ((j1 + n + j2) / 2)) % n;\n int l = middle_sides[h][i][j1].side < 0 ? (j1 + 1) %n : middle_sides[h][i][j1].side;\n int r = middle_sides[h][i][j2].side < 0 ? (j2 - 1 + n) %n : middle_sides[h][i][j2].side;\n\n findMiddleE(h, i, mid, l, r);\n middleSideImpl(h, i, j1, mid);\n middleSideImpl(h, i, mid, j2);\n\n return;\n}\n\n//! Calculate the relevant chain lengths. Starting with a maximum chain length of h, down to a chain length of 1, only chains with half the length of the chain before are needed.\n/*!\n * @param h Length of the longest chain (h = k - 3)\n */\nstd::set Chains::relevantChainLengths(int h)\n{\n if(h <= 1)\n return {h};\n\n std::set hs = {h};\n const int h_floor = (h - 1) / 2;\n const int h_ceil = h - 1 - h_floor;\n std::set h1 = relevantChainLengths(h_floor);\n for(const int& hNew : h1)\n hs.insert(hNew);\n if (h_ceil != h_floor)\n {\n std::set h2 = relevantChainLengths(h_ceil);\n for(const int& hNew : h2)\n hs.insert(hNew);\n }\n return hs;\n}\n\n//! Recursively calculate all the onesided chains for each combination of polygon sides.\n/*!\n */\nvoid Chains::calcOneSidedChains()\n{\n const size_t n = ngon.size();\n for(size_t i = 0; i < n; i++)\n {\n int j1 = (i + 2) %n, j2 = (i - 2 + n) %n;\n\n findSingleE(i, j1, (i + 1) %n, (j1 - 1 + n) %n);\n findSingleE(i, j2, (i + 1) %n, (j2 - 1 + n) %n);\n singleSideImpl(i, j1, j2);\n }\n}\n\n//! Recursively calculate the middle sides of the h-sided chains for all combinations of polygon sides.\n/*!\n * @param h Length of the chains\n */\nvoid Chains::calcMiddleChains(int h)\n{\n const size_t n = ngon.size();\n if (h == 0)\n {\n for (size_t i = 0; i < n; i++)\n {\n for (size_t j = 0; j < n; j++)\n {\n Segment& one = middle_sides[h][i][j];\n const FlushIntersect itrs = intersections.lineIntersect(i, j);\n\n one.side = -1;\n one.extra_area = itrs.extra_area;\n one.exists = false;\n one.flush = false;\n one.done = true;\n }\n }\n return;\n }\n if (h == 1)\n {\n for (size_t i = 0; i < n; i++)\n {\n int j1 = (i + 2) %n, j2 = (i - 2 + n) %n;\n\n findMiddleE1(i, j1, (i + 1) %n, (j1 - 1 + n) %n);\n findMiddleE1(i, j2, (i + 1) %n, (j2 - 1 + n) %n);\n middleSideImpl1(i, j1, j2);\n }\n return;\n }\n\n for (size_t i = 0; i < n; i++)\n {\n int j1 = (i + 2) %n, j2 = (i - 2 + n) %n;\n\n findMiddleE(h, i, j1, (i + 1) %n, (j1 - 1 + n) %n);\n findMiddleE(h, i, j2, (i + 1) %n, (j2 - 1 + n) %n);\n middleSideImpl(h, i, j1, j2);\n }\n}\n\n//! Find the i and j with the smallest extra area.\n/*!\n * @param n Number of sides of the input polygon\n * @param k Number of sides of the output polygon\n */\nMinimum Chains::minimumArea(int n, int k)\n{\n Minimum min{};\n for(int i = 0; i < n; i++)\n {\n for(int j = 0; j < n; j++)\n {\n if(!single_sides[i][j].exists || !middle_sides[k - 3][j][i].exists)\n continue;\n\n double tmp_area =\n single_sides[i][j].extra_area + middle_sides[k - 3][j][i].extra_area;\n if(tmp_area < min.area)\n {\n min.area = tmp_area;\n min.i = i;\n min.j = j;\n }\n }\n }\n return min;\n}\n\n//! Reconstruct the h-sided chain based on the two sides that give the smalles extra area.\n/*!\n * @param h Length of the k - 3 sided chain.\n * @param i First index from minimumArea\n * @param j Second index from minimumArea\n */\nstd::vector Chains::reconstructHSidedChain(int h, int i, int j)\n{\n if(h == 0)\n {\n throw std::logic_error(\"\");\n }\n if(h == 1)\n {\n return std::vector{{middle_sides[h][i][j].side, true}};\n }\n\n std::vector before, after;\n const int h_floor = (h - 1) / 2;\n const int h_ceil = h - 1 - h_floor;\n if(h_floor > 0)\n before = reconstructHSidedChain(h_floor, i, middle_sides[h][i][j].side);\n if(h_ceil > 0)\n after = reconstructHSidedChain(h_ceil, middle_sides[h][i][j].side, j);\n\n std::vector sides{{middle_sides[h][i][j].side, true}};\n sides.insert(sides.end(), before.begin(), before.end());\n sides.insert(sides.end(), after.begin(), after.end());\n return sides;\n}\n\n//! Get the k sides that build the kgon based on the two sides that give the smalles extra area.\n/*!\n * @param k Number of sides of the output polygon\n * @param i First index from minimumArea\n * @param j Second index from minimumArea\n */\nstd::vector Chains::findKSides(int k, int i, int j)\n{\n std::vector sides;\n sides.push_back({i, true});\n sides.push_back({j, true});\n\n if(single_sides[i][j].flush)\n sides.push_back({single_sides[i][j].side, true});\n else\n sides.push_back({single_sides[i][j].side, false});\n\n std::vector flush_chain = reconstructHSidedChain(k - 3, j, i);\n sides.insert(sides.end(), flush_chain.begin(), flush_chain.end());\n std::sort(sides.begin(), sides.end(),\n [](const Side& lhs, const Side& rhs) { return lhs.side < rhs.side; });\n\n return sides;\n}\n\n//! Calculate the k vertices of the kgon from its k sides.\n/*!\n * @param sides Sides of the output polygon\n */\nstd::vector Chains::findKVertices(std::vector& sides)\n{\n const int k = sides.size();\n std::vector vertices(k);\n\n for(int u = 0; u < k; u++)\n {\n const int next = (u + 1) % k;\n if(sides[u].flush && sides[next].flush)\n {\n vertices[u] = intersections.lineIntersect(sides[u].side,\n sides[next].side).intersection.point;\n }\n else if(sides[u].flush && !sides[next].flush)\n {\n vertices[u] = balanced_inters[sides[u].side][sides[(u + 2) % k].side].pi;\n }\n else if(!sides[u].flush && sides[next].flush)\n {\n vertices[u] = balanced_inters[sides[(u - 1 + k) %k].side][sides[next].side].pj;\n }\n else\n {\n throw std::logic_error(\"\");\n }\n }\n return vertices;\n}\n\n} //namespace\n\n\n///////////////////////// Helper function definitions /////////////////////////\n\nnamespace minEnclosingConvexPolygon {\n\n//! Find the minimum enclosing convex kgon and its area. Converting inputs to used vector shape and data format.\n/*!\n * @param points Set of points\n * @param k Number of vertices of the output polygon\n * @param minPolygon Minimum area convex k-gon enclosing the given set of points\n * @param area Area of minPolygon\n */\nstatic void findMinEnclosingPolygon(cv::InputArray points,\n const int &k,\n CV_OUT cv::OutputArray &minPolygon,\n CV_OUT double &area) {\n CV_Assert(!points.empty());\n std::vector ngon, kgon;\n cv::convexHull(points, ngon, true);\n findMinEnclosingPolygon(ngon, k, kgon, area);\n cv::Mat(kgon).copyTo(minPolygon);\n}\n\n//! Find the minimum enclosing convex polygon and its area for a given set of points. Handling of input errors.\n/*!\n * @param ngon The polygon representing the convex hull of the points\n * @param k Number of vertices of the output polygon\n * @param minPolygon Minimum area convex k-gon enclosing the given polygon\n * @param area Area of minPolygon\n */\nstatic void findMinEnclosingPolygon(const std::vector &ngon,\n const int &k,\n cv::OutputArray &minPolygon,\n double &area) {\n try\n {\n if (ngon.size() < 3)\n {\n throw std::invalid_argument(\n \"ngon must have 3 or more different points enclosing an area\" );\n }\n if (cv::contourArea(ngon) < EPSILON )\n {\n throw std::invalid_argument( \"invalid ngon: all points on line\" );\n }\n if (k <= 3)\n {\n throw std::invalid_argument( \"k must be 3 or higher\" );\n }\n const size_t n = ngon.size();\n if ((const int)n == k)\n {\n throw std::runtime_error (\"(n = k)\");\n }\n if ((const int)n < k)\n {\n throw std::runtime_error (\"(n < k)\");\n }\n }\n catch (std::invalid_argument &message)\n {\n std::cout << \"invalid argument: \" << message.what() << std::endl;\n return;\n }\n catch (std::runtime_error &message)\n {\n std::cout << \"Warning: no minimum area polygon calculated \" << message.what() << std::endl;\n cv::Mat(ngon).copyTo(minPolygon);\n area = cv::contourArea(minPolygon);\n return;\n }\n catch (...)\n {\n std::cout << \"input error\" << std::endl;\n return;\n }\n\n try\n {\n std::vector kgon;\n findMinAreaPolygon(ngon, kgon, area, k);\n cv::Mat(kgon).copyTo(minPolygon);\n }\n catch (...)\n {\n std::cout << \"correct input but execution failed\" << std::endl;\n return;\n }", - "repo_full_name": "opencv/opencv", - "discussion_comments": [ - { - "comment_id": "2153806951", - "repo_full_name": "opencv/opencv", - "pr_number": 27369, - "pr_file": "modules/imgproc/src/min_enclosing_convex_polygon.cpp", - "discussion_id": "2153806951", - "commented_code": "@@ -0,0 +1,1185 @@\n+/*M///////////////////////////////////////////////////////////////////////////////////////\n+ //\n+ // This file is part of OpenCV project.\n+ // It is subject to the license terms in the LICENSE file found in the top-level directory of this\n+ // distribution and at http://opencv.org/license.html.\n+ //\n+ //\n+ // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.\n+ //\n+ // By downloading, copying, installing or using the software you agree to this license.\n+ // If you do not agree to this license, do not download, install,\n+ // copy or use the software.\n+ //\n+ // INFORMATION REGARDING THE CONTRIBUTION:\n+ //\n+ // Author: Sara Kuhnert\n+ // Created: 20.05.2025\n+ // E-mail: \n+ //\n+ // THE IMPLEMENTATION OF THE MODULES IS BASED ON THE FOLLOWING PAPER:\n+ //\n+ // [1] A. Aggarwal, J. S. Chang, Chee K. Yap: \"Minimum area circumscribing Polygons\", The Visual\n+ // Computer, 1:112-117, 1985\n+ //\n+ // The overall complexity of the algorithm is theta(n^2log(n)log(k)) where \"n\" represents the number\n+ // of vertices in the input convex polygon and \"k\" the number of vertices in the output polygon.\n+ //\n+ //\n+ //\n+ // License Agreement\n+ // For Open Source Computer Vision Library\n+ //\n+ // Copyright (C) 2000, Intel Corporation, all rights reserved.\n+ // Copyright (C) 2013, OpenCV Foundation, all rights reserved.\n+ // Copyright (C) 2020, Sara Kuhnert, all rights reserved.\n+ // Third party copyrights are property of their respective owners.\n+ //\n+ // Redistribution and use in source and binary forms, with or without modification,\n+ // are permitted provided that the following conditions are met:\n+ //\n+ // * Redistribution's of source code must retain the above copyright notice,\n+ // this list of conditions and the following disclaimer.\n+ //\n+ // * Redistribution's in binary form must reproduce the above copyright notice,\n+ // this list of conditions and the following disclaimer in the documentation\n+ // and/or other materials provided with the distribution.\n+ //\n+ // * The name of the copyright holders may not be used to endorse or promote products\n+ // derived from this software without specific prior written permission.\n+ //\n+ // This software is provided by the copyright holders and contributors \"as is\" and\n+ // any express or implied warranties, including, but not limited to, the implied\n+ // warranties of merchantability and fitness for a particular purpose are disclaimed.\n+ // In no event shall the Intel Corporation or contributors be liable for any direct,\n+ // indirect, incidental, special, exemplary, or consequential damages\n+ // (including, but not limited to, procurement of substitute goods or services;\n+ // loss of use, data, or profits; or business interruption) however caused\n+ // and on any theory of liability, whether in contract, strict liability,\n+ // or tort (including negligence or otherwise) arising in any way out of\n+ // the use of this software, even if advised of the possibility of such damage.\n+ //\n+ //M*/\n+\n+#include \"precomp.hpp\"\n+#include \n+\n+#include \n+#include \n+#include \n+#include \n+#include \n+#include \n+#include \n+\n+\n+\n+//////////////////////////// Constants definitions ////////////////////////////\n+\n+#define EPSILON 1e-6\n+\n+\n+///////////////////////// Helper function declarations /////////////////////////\n+\n+namespace minEnclosingConvexPolygon {\n+\n+static void findMinEnclosingPolygon(cv::InputArray points,\n+ const int &k,\n+ CV_OUT cv::OutputArray &kgon,\n+ CV_OUT double &area);\n+\n+static void findMinEnclosingPolygon(const std::vector &ngon,\n+ const int &k,\n+ cv::OutputArray &kgon,\n+ double &area);\n+\n+static void findMinAreaPolygon(const std::vector &ngon,\n+ std::vector &minPolygon,\n+ double &area,\n+ int k);\n+\n+} //namespace\n+\n+\n+//////////////////////// Class and struct declarations ////////////////////////\n+\n+namespace minEnclosingConvexPolygon {\n+\n+//! Intersection point of two sides with its position relative to the polygon\n+/*!\n+ * @param point Intersection point of the two sides\n+ * @param position Is the intersection point a valid solution?\n+ */\n+struct IntersectionPoint\n+{\n+ cv::Point2f point = {-1.0, -1.0};\n+ bool position = false;\n+\n+ IntersectionPoint() = default;\n+ IntersectionPoint(const cv::Point2f& a, const cv::Point2f& b,\n+ const cv::Point2f& c, const cv::Point2f& d);\n+ IntersectionPoint(int i, int j, const std::vector& ngon);\n+};\n+\n+//! Container for the information about the intersection point of two flush sides\n+/*!\n+ * @param intersection Instance of IntersectionPoint\n+ * @param extra_area The area that would be added to the kgon compared to the ngon\n+ * @param done Set to true once calculated to avoid redundant calculations\n+ */\n+struct FlushIntersect\n+{\n+ IntersectionPoint intersection = {};\n+ double extra_area = std::numeric_limits::max();\n+ bool done = false;\n+};\n+\n+//! Container for the information about a balanced side between two points\n+/*!\n+ * @param pi First intersection point\n+ * @param pj Second intersection point\n+ * @param extra_area The area that would be added to the kgon compared to the ngon\n+ * @param flush Flush is a special case of balanced. If the balanced side is also flush, this indicates which of the sides of the ngon it is flush with. The default for not flush sides is -1.\n+ * @param position Is the balanced side a valid solution?\n+ * @param done Set to true once calculated to avoid redundant calculations\n+ */\n+struct BalancedIntersect\n+{\n+ cv::Point2f pi = {-1, -1};\n+ cv::Point2f pj = {-1, -1};\n+ double extra_area = std::numeric_limits::max();\n+ int flush = -1;\n+ bool position = false;\n+ bool done = false;\n+};\n+\n+//! Container for the best segment between two points\n+/*!\n+ * @param extra_area The area that would be added to the kgon compared to the ngon\n+ * @param side Side of the ngon to which it is flush (if flush is true) or edge of the ngon which it is touching (if flush is false)\n+ * @param flush True if it is flush. If false, it is balanced but not flush\n+ * @param exists Does a valid solution exist?\n+ * @param done Set to true once calculated to avoid redundant calculations\n+ */\n+struct Segment\n+{\n+ double extra_area = std::numeric_limits::max();\n+ int side = -1;\n+ bool flush = false;\n+ bool exists = false;\n+ bool done = false;\n+};\n+\n+//! Combination of selected sides and their corresponding chains, which generates the kgon with the minimum area\n+/*!\n+ * @param area Extra area that the minimal kgon is bigger than the input ngon\n+ * @param i First side\n+ * @param j Second side\n+ */\n+struct Minimum\n+{\n+ double area = std::numeric_limits::max();\n+ int i = -1;\n+ int j = -1;\n+};\n+\n+//! Container for a side of the minimal kgon\n+/*!\n+ * @param side Index of the side of the ngon which it is flush with or the vertex which it is touching\n+ * @param flush Is this side flush?\n+ */\n+struct Side\n+{\n+ int side = -1;\n+ bool flush = false;\n+\n+ Side() = default;\n+ Side(int i, bool b);\n+};\n+\n+//! Container for the minimal kgon\n+/*!\n+ * @param sides Indices of the corresponding sindes of the ngon\n+ * @param vertices Vertices of the kgon\n+ * @param extra_area Extra area that the minimal kgon is bigger than the input ngon\n+ * @param i, @param j The chains which build the minimal kgon are formed between those two sides\n+ */\n+struct Kgon\n+{\n+ std::vector sides;\n+ std::vector vertices;\n+ double extra_area = std::numeric_limits::max();\n+ int i = -1;\n+ int j = -1;\n+};\n+\n+//! Class for all the possible combinations of flush intersections of two sides of the input polygon\n+/*!\n+ * @param ngon Input polygon\n+ * @param intersections Matrix of all possible flush intersections of two sides of the ngon\n+ * @param area_edges Collection of the points that define the extra area of the kgon\n+ */\n+class FlushIntersections\n+{\n+private:\n+ const std::vector& ngon;\n+ std::vector> intersections;\n+ std::vector area_edges;\n+\n+public:\n+ FlushIntersections(const std::vector& _ngon);\n+ const FlushIntersect& lineIntersect(int i, int j);\n+};\n+\n+//! Class for all the possible combinations of balanced intersections with two sides of the input polygon\n+/*!\n+ * @param ngon Input polygon\n+ * @param balanced_intersections atrix of all possible balanced intersections of two sides of the ngon\n+ * @param area_edges Collection of the points that define the extra area of the kgon\n+ */\n+class BalancedIntersections\n+{\n+private:\n+ const std::vector& ngon;\n+ std::vector> balanced_intersections;\n+ std::vector area_edges;\n+\n+ double extraArea ( int first, int last, const cv::Point2f& extra1, const cv::Point2f& extra2 );\n+\n+ BalancedIntersect flush(int i, int j, int e);\n+\n+public:\n+ BalancedIntersections(const std::vector& _ngon);\n+ BalancedIntersect balancedIntersect(int i, int j, int e);\n+ void markAsDone(int i, int j);\n+ const std::vector& operator[](size_t i) const;\n+};\n+\n+//! Class for building the one-sided and h-sided chains and calculation the minimum enclosing polygon based on those chains\n+/*!\n+ * @param ngon Input polygon\n+ * @param single_sides Matrix of the best one-sided chain for each combination of two sides of the ngon\n+ * @param middle_sides Matrix of the middle side of each h-sided chain for all relevant h and each combination of two sides of the ngon\n+ * @param intersections An instance of FlushIntersections to store already calculated flush intersections\n+ * @param balanced_inters An instance of BalancedIntersections to store already calculated balanced intersections\n+ */\n+class Chains\n+{\n+private:\n+ const std::vector& ngon;\n+ std::vector> single_sides;\n+ std::vector>> middle_sides;\n+ FlushIntersections intersections;\n+ BalancedIntersections balanced_inters;\n+\n+ void findSingleE(int i, int j, int l, int r);\n+ void singleSideImpl(int i, int j1, int j2);\n+ void findMiddleE1(int i, int j, int l, int r);\n+ void middleSideImpl1(int i, int j1, int j2);\n+ void findMiddleE(int h, int i, int j, int l, int r);\n+ void middleSideImpl(int h, int i, int j1, int j2);\n+\n+public:\n+ Chains(const std::vector& _ngon, int k);\n+\n+ std::set relevantChainLengths(int h);\n+ void calcOneSidedChains();\n+ void calcMiddleChains(int h);\n+\n+ Minimum minimumArea(int n, int k);\n+ std::vector reconstructHSidedChain(int h, int i, int j);\n+ std::vector findKSides(int k, int i, int j);\n+ std::vector findKVertices(std::vector& sides);\n+\n+};\n+\n+} //namespace\n+\n+\n+//////////////////////// Class and struct constructors ////////////////////////\n+\n+namespace minEnclosingConvexPolygon {\n+\n+//! Constructor for IntersectionPoint. Find the intersection point of two lines given by four points and decide whether it is on the correct side of the polygon\n+/*!\n+ * @param a First point of the first line\n+ * @param b Second point of the first line\n+ * @param c First point of the second line\n+ * @param d Second point of the second line\n+ */\n+IntersectionPoint::IntersectionPoint(const cv::Point2f& a, const cv::Point2f& b,\n+ const cv::Point2f& c, const cv::Point2f& d)\n+{\n+ const cv::Point2f ab = b - a, cd = d - c, ac = c - a;\n+ const double det = ab.cross(-cd);\n+ if(std::abs (det) < EPSILON )\n+ return;\n+\n+ const double loc = ac.cross(-cd) / det;\n+ if(loc <= 0)\n+ return;\n+\n+ point = a + loc * ab;\n+ position = true;\n+}\n+\n+//! Constructor for IntersectionPoint. Find the intersection point of two sides of the polygon based on the given side indices.\n+/*!\n+ * @param i Index of the first side\n+ * @param j Index of the second side\n+ * @param ngon Input polygon with n sides\n+ */\n+IntersectionPoint::IntersectionPoint(int i, int j,\n+ const std::vector& ngon) :\n+ IntersectionPoint(ngon[i],\n+ ngon[(i + 1) % ngon.size()],\n+ ngon[j],\n+ ngon[(j + 1) % ngon.size()])\n+{}\n+\n+//! Constructor for FlushIntersections\n+/*!\n+ * @param _ngon Input polygon with n sides\n+ */\n+FlushIntersections::FlushIntersections(const std::vector& _ngon) :\n+ ngon(_ngon),\n+ intersections(ngon.size(),\n+ std::vector(ngon.size()))\n+{\n+ area_edges.reserve(ngon.size());\n+}\n+\n+//! Constructor for BalancedIntersections\n+/*!\n+ * @param _ngon Input polygon with n sides\n+ */\n+BalancedIntersections::BalancedIntersections(const std::vector& _ngon) :\n+ngon(_ngon),\n+balanced_intersections(ngon.size(),\n+ std::vector(ngon.size()))\n+{\n+ area_edges.reserve(ngon.size());\n+}\n+\n+//! Constructor for Side. Assign a side index and weather it is flush or not.\n+/*!\n+ * @param i Side index\n+ * @param b Is side i flush?\n+ */\n+Side::Side(int i, bool b)\n+{\n+ side = i;\n+ flush = b;\n+}\n+\n+//! Constructor for Chains\n+/*!\n+ * @param\n+ */\n+Chains::Chains(const std::vector& _ngon, int k) :\n+ ngon(_ngon),\n+ single_sides(std::vector>(ngon.size(),\n+ std::vector(ngon.size()))),\n+ middle_sides(std::vector>>(\n+ k, std::vector>(ngon.size(),\n+ std::vector(ngon.size())))),\n+ intersections(ngon),\n+ balanced_inters(ngon)\n+{}\n+\n+} //namespace\n+\n+\n+////////////////////////// Class and struct functions //////////////////////////\n+\n+namespace minEnclosingConvexPolygon {\n+\n+//! Find the intersection point of two sides, decide weather it is a valid point and if so calculate the extra area caused by that intersection.\n+/*!\n+ * @param i Index of the first side\n+ * @param j Index of the second side\n+ */\n+const FlushIntersect& FlushIntersections::lineIntersect(int i, int j)\n+{\n+ FlushIntersect& itr = intersections[i][j];\n+ if(itr.done)\n+ return itr;\n+\n+ const size_t n = ngon.size();\n+ if((i + 1) % n == j)\n+ {\n+ itr.intersection.point = ngon[j];\n+ itr.intersection.position = true;\n+ itr.extra_area = 0.0;\n+ itr.done = true;\n+ return itr;\n+ }\n+ itr.intersection = IntersectionPoint(i, j, ngon);\n+ if(itr.intersection.position)\n+ {\n+ area_edges.resize(0);\n+ for(int t = (i + 1) % n; t != (j + 1) % n; t = (t + 1) % n)\n+ {\n+ area_edges.push_back(ngon[t]);\n+ }\n+ area_edges.push_back(itr.intersection.point);\n+ itr.extra_area = cv::contourArea(area_edges);\n+ itr.done = true;\n+ }\n+ else\n+ {\n+ itr.extra_area = std::numeric_limits::max();\n+ itr.intersection.position = false;\n+ itr.done = true;\n+ }\n+ return itr;\n+}\n+\n+//! Calculate the added area that is enclosed by a sequence of consecutive vertices of the polygon and the intersection point of two sides.\n+/*!\n+ * @param first Index of the first point of the sequence\n+ * @param last Index of the last point of the sequence\n+ * @param extra1 Last point of the sequence\n+ * @param extra2 Intersection point\n+ */\n+double BalancedIntersections::extraArea(int first, int last,\n+ const cv::Point2f& extra1,\n+ const cv::Point2f& extra2)\n+{\n+ const size_t n = ngon.size();\n+ area_edges.resize(0);\n+ for(int t = first; t != last; t = (t + 1) % n)\n+ area_edges.push_back(ngon[t]);\n+\n+ area_edges.push_back(extra1);\n+ area_edges.push_back(extra2);\n+\n+ return cv::contourArea(area_edges);\n+}\n+\n+//! Determine the intersection points of a flush side e that lies between sides i and j with these two sides. Calculate the extra area and the position of the intersection points relative to the polygon. Update balanced_intersections if the new area is smaller than extraArea.\n+/*!\n+ * @param i Index of first side\n+ * @param j Index of second side\n+ * @param e Index of a side between i and j\n+ */\n+BalancedIntersect BalancedIntersections::flush(int i, int j, int e)\n+{\n+ if(i == e)\n+ std::logic_error(\"\");\n+ if(j == e)\n+ std::logic_error(\"\");\n+\n+ const size_t n = ngon.size();\n+ const int before = (e - 1 + n) % n;\n+ BalancedIntersect bi = balanced_intersections[i][j];\n+\n+ const IntersectionPoint left_e(i, e, ngon);\n+ const IntersectionPoint right_e(e, j, ngon);\n+\n+ if(left_e.position == true && right_e.position == true)\n+ {\n+ double extra_area = extraArea((i + 1) % n, e, ngon[e], left_e.point);\n+ if(extra_area < bi.extra_area)\n+ {\n+ extra_area += extraArea((e + 1) % n, j, ngon[j], right_e.point);\n+ if(extra_area < bi.extra_area)\n+ {\n+ bi.extra_area = extra_area;\n+ bi.pi = left_e.point;\n+ bi.pj = right_e.point;\n+ bi.position = true;\n+ bi.flush = e;\n+ }\n+ }\n+ }\n+\n+ if(before != i)\n+ {\n+ const IntersectionPoint left_before(i, before, ngon);\n+ const IntersectionPoint right_before(before, j, ngon);\n+ if(left_before.position == true && right_before.position == true)\n+ {\n+ double extra_area =\n+ extraArea((i + 1) % n, before, ngon[before], left_before.point);\n+\n+ if(extra_area < bi.extra_area)\n+ {\n+ extra_area += extraArea(e, j, ngon[j], right_before.point);\n+ if(extra_area < bi.extra_area)\n+ {\n+ bi.extra_area = extra_area;\n+ bi.pi = left_before.point;\n+ bi.pj = right_before.point;\n+ bi.position = true;\n+ bi.flush = before;\n+ }\n+ }\n+ }\n+ }\n+ return bi;\n+}\n+\n+//! Determine the intersection points of a balanced side e that lies between sides i and j with these two sides. If no valid balanced edge is found, ccheck for flush sides. Calculate the extra area and the position of the intersection points relative to the polygon. Update balanced_intersections if the new area is smaller than extraArea.\n+/*!\n+ * @param i Index of first side\n+ * @param j Index of second side\n+ * @param e Index of a side between i and j\n+ */\n+BalancedIntersect BalancedIntersections::balancedIntersect(int i, int j, int e)\n+{\n+ if(balanced_intersections[i][j].done)\n+ return balanced_intersections[i][j];\n+\n+ const size_t n = ngon.size();\n+ if((i + 2) % n == j)\n+ {\n+ BalancedIntersect& bi = balanced_intersections[i][j];\n+ bi.pi = ngon[(i + 1) % n];\n+ bi.pj = ngon[j];\n+ bi.flush = (i + 1) % n;\n+ bi.position = true;\n+ bi.extra_area = 0.0;\n+ bi.done = true;\n+ return bi;\n+ }\n+\n+ const cv::Point2f p1 = ngon[i], p2 = ngon[(i + 1) % n], p3 = ngon[e],\n+ p4 = ngon[j], p5 = ngon[(j + 1) % n];\n+ const cv::Point2f dir12 = p2 - p1, dir45 = p5 - p4;\n+ const double det = dir12.cross(dir45);\n+ if(std::abs (det) < EPSILON )\n+ {\n+ flush(i, j, e);\n+ return balanced_intersections[i][j];\n+ }\n+\n+ BalancedIntersect bi;\n+ cv::Point2f temp = 2 * p3 - p2 - p4;\n+ const double s = temp.cross(dir45) / det;\n+ const double t = temp.cross(dir12) / det;\n+ if(s >= 0 && t >= 0)\n+ {\n+ bi.pi = p2 + dir12 * s;\n+ bi.pj = p4 - dir45 * t;\n+ bi.position = true;\n+\n+ const cv::Point2f dir_balanced = bi.pj - bi.pi,\n+ dir_left = p3 - ngon[(e - 1 + n) % n],\n+ dir_right = ngon[(e + 1) % n] - p3;\n+\n+ const double cross_left = dir_balanced.cross(dir_left),\n+ cross_right = dir_balanced.cross(dir_right);\n+ if((cross_left < 0 && cross_right < 0) || (cross_left > 0 && cross_right > 0))\n+ {\n+ BalancedIntersect reset;\n+ bi = reset;\n+ bi = flush(i, j, e);\n+ }\n+ else if(std::abs (cross_left) < EPSILON )\n+ {\n+ bi.flush = (e - 1 + n) % n;\n+ bi.extra_area =\n+ extraArea((i + 1) % n, (e - 1 + n) % n, ngon[(e - 1 + n) % n], bi.pi)\n+ + extraArea(e, j, ngon[j], bi.pj);\n+ }\n+ else if(std::abs (cross_right) < EPSILON )\n+ {\n+ bi.flush = e;\n+ bi.extra_area = extraArea((i + 1) % n, e, ngon[e], bi.pi)\n+ + extraArea((e + 1) % n, j, ngon[j], bi.pj);\n+ }\n+ else\n+ {\n+ bi.extra_area = extraArea((i + 1) % n, e, ngon[e], bi.pi)\n+ + extraArea(e, j, ngon[j], bi.pj);\n+ }\n+ }\n+ else\n+ {\n+ flush(i, j, e);\n+ }\n+\n+ if(bi.extra_area < balanced_intersections[i][j].extra_area)\n+ {\n+ balanced_intersections[i][j] = bi;\n+ }\n+ return bi;\n+}\n+\n+//! Set function for the done attribute of BalancedIntersections\n+/*!\n+ * @param i Index of first side\n+ * @param j Index of second side\n+ */\n+void BalancedIntersections::markAsDone(int i, int j)\n+{\n+ balanced_intersections[i][j].done = true;\n+}\n+\n+//! Operator to get a vector of elements from BalancedIntersections\n+/*!\n+ * @param i index of a side\n+ */\n+const std::vector& BalancedIntersections::operator[](\n+ size_t i) const\n+{\n+ return balanced_intersections[i];\n+}\n+\n+//! For a combination of two fixed sides of the ngon test all sides between left and right boundary to find the one sided chain with minimal extra area\n+/*!\n+ * @param i Index of first fixed side\n+ * @param j Index of second fixed side\n+ * @param l Index of left boundary\n+ * @param r Index of right boundary\n+ */\n+void Chains::findSingleE(int i, int j, int l, int r)\n+{\n+ const size_t n = ngon.size();\n+ Segment& one = single_sides[i][j];\n+ if (one.done)\n+ return;\n+\n+ double min_area = std::numeric_limits::max();\n+ for (int e = l; e != r + 1 && e != j; e = (e + 1) %n)\n+ {\n+ BalancedIntersect candidate = balanced_inters.balancedIntersect(i, j, e);\n+ if(candidate.extra_area < min_area)\n+ {\n+ min_area = candidate.extra_area;\n+ one.side = e;\n+ one.extra_area = candidate.extra_area;\n+ one.flush = false;\n+ one.exists = true;\n+ }\n+ }\n+ one.done = true;\n+ balanced_inters.markAsDone(i, j);\n+}\n+\n+//! Recursively repeat the search for the one sided chain with minimal extra area to shrink the boundaries of bigger distances\n+/*!\n+ * @param i Fixed side of the ngon\n+ * @param j1 Lower boundary of search intervall\n+ * @param j2 Upper boundary of search intervall\n+ */\n+void Chains::singleSideImpl(int i, int j1, int j2)\n+{\n+ const size_t n = ngon.size();\n+ if((j1 + 1) %n == j2)\n+ {\n+ return;\n+ }\n+\n+ int mid = (j1 < j2 ? ((j1 + j2) / 2) : ((j1 + n + j2) / 2)) % n;\n+ int l = single_sides[i][j1].side < 0 ? (j1 + 1) %n : single_sides[i][j1].side;\n+ int r = single_sides[i][j2].side < 0 ? (j2 - 1 + n) %n : single_sides[i][j2].side;\n+\n+ findSingleE(i, mid, l, r);\n+ singleSideImpl(i, j1, mid);\n+ singleSideImpl(i, mid, j2);\n+\n+ return;\n+}\n+\n+//! For a combination of two fixed sides of the ngon test all sides between left and right boundary to find the middle side of the h-sided chain with minimal extra area. This is the version for h = 1.\n+/*!\n+ * @param i Index of first fixed side\n+ * @param j Index of second fixed side\n+ * @param l Index of left boundary\n+ * @param r Index of right boundary\n+ */\n+void Chains::findMiddleE1(int i, int j, int l, int r)\n+{\n+ const size_t n = ngon.size();\n+ Segment& one = middle_sides[1][i][j];\n+ if (one.done)\n+ return;\n+\n+ double min_area = std::numeric_limits::max();\n+ for (int e = l; e != r + 1 && e != j; e = (e + 1) %n)\n+ {\n+ const FlushIntersect& before = intersections.lineIntersect(i, e);\n+ if(!before.intersection.position)\n+ continue;\n+ const FlushIntersect& after = intersections.lineIntersect(e, j);\n+ if(!after.intersection.position)\n+ continue;\n+\n+ double tmp_area = before.extra_area + after.extra_area;\n+ if(tmp_area < min_area)\n+ {\n+ min_area = tmp_area;\n+ one.side = e;\n+ one.extra_area = tmp_area;\n+ one.exists = true;\n+ one.flush = true;\n+ }\n+ }\n+ one.done = true;\n+}\n+\n+//! Recursively repeat the search for the middle side of the h-sided chain with minimal extra area to shrink the boundaries of bigger distances. This is the version for h = 1.\n+/*!\n+ * @param i Fixed side of the ngon\n+ * @param j1 Lower boundary of search intervall\n+ * @param j2 Upper boundary of search intervall\n+ */\n+void Chains::middleSideImpl1(int i, int j1, int j2)\n+{\n+ const size_t n = ngon.size();\n+ if((j1 + 1) %n == j2)\n+ {\n+ return;\n+ }\n+\n+ int mid = (j1 < j2 ? ((j1 + j2) / 2) : ((j1 + n + j2) / 2)) % n;\n+ int l = middle_sides[1][i][j1].side < 0 ? (j1 + 1) %n : middle_sides[1][i][j1].side;\n+ int r = middle_sides[1][i][j2].side < 0 ? (j2 - 1 + n) %n : middle_sides[1][i][j2].side;\n+\n+ findMiddleE1(i, mid, l, r);\n+ middleSideImpl1(i, j1, mid);\n+ middleSideImpl1(i, mid, j2);\n+\n+ return;\n+}\n+\n+//! For a combination of two fixed sides of the ngon test all sides between left and right boundary to find the middle side of the h-sided chain with minimal extra area. This is the version for h > 1.\n+/*!\n+ * @param h Length of the h-sided chain\n+ * @param i Index of first fixed side\n+ * @param j Index of second fixed side\n+ * @param l Index of left boundary\n+ * @param r Index of right boundary\n+ */\n+void Chains::findMiddleE(int h, int i, int j, int l, int r)\n+{\n+ const size_t n = ngon.size();\n+ Segment& one = middle_sides[h][i][j];\n+ if (one.done)\n+ return;\n+\n+ const int dist = (i <= j ? (j - i) : (j + n - i));\n+ const int h_floor = (h - 1) / 2;\n+ const int h_ceil = h - 1 - h_floor;\n+\n+ if(dist == 0)\n+ throw std::logic_error(\"\");\n+ if(dist - 1 < h)\n+ {\n+ one.done = true;\n+ return;\n+ }\n+ if(dist - 1 == h)\n+ {\n+ one.side = (i + h_floor + 1) % n;\n+ one.extra_area = 0.0;\n+ one.exists = true;\n+ one.flush = true;\n+ one.done = true;\n+ return;\n+ }\n+\n+ double min_area = std::numeric_limits::max();\n+ for (int e = l; e != r + 1 && e != j; e = (e + 1) %n)\n+ {\n+ const Segment& before = middle_sides[h_floor][i][e];\n+ if (before.extra_area == std::numeric_limits::max())\n+ continue;\n+ const Segment& after = middle_sides[h_ceil][e][j];\n+ if(after.extra_area == std::numeric_limits::max())\n+ continue;\n+\n+ double tmp_area = before.extra_area + after.extra_area;\n+ if(tmp_area < min_area)\n+ {\n+ min_area = tmp_area;\n+ one.side = e;\n+ one.extra_area = tmp_area;\n+ one.exists = true;\n+ one.flush = true;\n+ }\n+ }\n+ one.done = true;\n+}\n+\n+//! Recursively repeat the search for the middle side of the h-sided chain with minimal extra area to shrink the boundaries of bigger distances. This is the version for h > 1.\n+/*!\n+ * @param h Length of the h-sided chain\n+ * @param i Fixed side of the ngon\n+ * @param j1 Lower boundary of search intervall\n+ * @param j2 Upper boundary of search intervall\n+ */\n+void Chains::middleSideImpl(int h, int i, int j1, int j2)\n+{\n+ const size_t n = ngon.size();\n+ if((j1 + 1) %n == j2)\n+ {\n+ return;\n+ }\n+\n+ int mid = (j1 < j2 ? ((j1 + j2) / 2) : ((j1 + n + j2) / 2)) % n;\n+ int l = middle_sides[h][i][j1].side < 0 ? (j1 + 1) %n : middle_sides[h][i][j1].side;\n+ int r = middle_sides[h][i][j2].side < 0 ? (j2 - 1 + n) %n : middle_sides[h][i][j2].side;\n+\n+ findMiddleE(h, i, mid, l, r);\n+ middleSideImpl(h, i, j1, mid);\n+ middleSideImpl(h, i, mid, j2);\n+\n+ return;\n+}\n+\n+//! Calculate the relevant chain lengths. Starting with a maximum chain length of h, down to a chain length of 1, only chains with half the length of the chain before are needed.\n+/*!\n+ * @param h Length of the longest chain (h = k - 3)\n+ */\n+std::set Chains::relevantChainLengths(int h)\n+{\n+ if(h <= 1)\n+ return {h};\n+\n+ std::set hs = {h};\n+ const int h_floor = (h - 1) / 2;\n+ const int h_ceil = h - 1 - h_floor;\n+ std::set h1 = relevantChainLengths(h_floor);\n+ for(const int& hNew : h1)\n+ hs.insert(hNew);\n+ if (h_ceil != h_floor)\n+ {\n+ std::set h2 = relevantChainLengths(h_ceil);\n+ for(const int& hNew : h2)\n+ hs.insert(hNew);\n+ }\n+ return hs;\n+}\n+\n+//! Recursively calculate all the onesided chains for each combination of polygon sides.\n+/*!\n+ */\n+void Chains::calcOneSidedChains()\n+{\n+ const size_t n = ngon.size();\n+ for(size_t i = 0; i < n; i++)\n+ {\n+ int j1 = (i + 2) %n, j2 = (i - 2 + n) %n;\n+\n+ findSingleE(i, j1, (i + 1) %n, (j1 - 1 + n) %n);\n+ findSingleE(i, j2, (i + 1) %n, (j2 - 1 + n) %n);\n+ singleSideImpl(i, j1, j2);\n+ }\n+}\n+\n+//! Recursively calculate the middle sides of the h-sided chains for all combinations of polygon sides.\n+/*!\n+ * @param h Length of the chains\n+ */\n+void Chains::calcMiddleChains(int h)\n+{\n+ const size_t n = ngon.size();\n+ if (h == 0)\n+ {\n+ for (size_t i = 0; i < n; i++)\n+ {\n+ for (size_t j = 0; j < n; j++)\n+ {\n+ Segment& one = middle_sides[h][i][j];\n+ const FlushIntersect itrs = intersections.lineIntersect(i, j);\n+\n+ one.side = -1;\n+ one.extra_area = itrs.extra_area;\n+ one.exists = false;\n+ one.flush = false;\n+ one.done = true;\n+ }\n+ }\n+ return;\n+ }\n+ if (h == 1)\n+ {\n+ for (size_t i = 0; i < n; i++)\n+ {\n+ int j1 = (i + 2) %n, j2 = (i - 2 + n) %n;\n+\n+ findMiddleE1(i, j1, (i + 1) %n, (j1 - 1 + n) %n);\n+ findMiddleE1(i, j2, (i + 1) %n, (j2 - 1 + n) %n);\n+ middleSideImpl1(i, j1, j2);\n+ }\n+ return;\n+ }\n+\n+ for (size_t i = 0; i < n; i++)\n+ {\n+ int j1 = (i + 2) %n, j2 = (i - 2 + n) %n;\n+\n+ findMiddleE(h, i, j1, (i + 1) %n, (j1 - 1 + n) %n);\n+ findMiddleE(h, i, j2, (i + 1) %n, (j2 - 1 + n) %n);\n+ middleSideImpl(h, i, j1, j2);\n+ }\n+}\n+\n+//! Find the i and j with the smallest extra area.\n+/*!\n+ * @param n Number of sides of the input polygon\n+ * @param k Number of sides of the output polygon\n+ */\n+Minimum Chains::minimumArea(int n, int k)\n+{\n+ Minimum min{};\n+ for(int i = 0; i < n; i++)\n+ {\n+ for(int j = 0; j < n; j++)\n+ {\n+ if(!single_sides[i][j].exists || !middle_sides[k - 3][j][i].exists)\n+ continue;\n+\n+ double tmp_area =\n+ single_sides[i][j].extra_area + middle_sides[k - 3][j][i].extra_area;\n+ if(tmp_area < min.area)\n+ {\n+ min.area = tmp_area;\n+ min.i = i;\n+ min.j = j;\n+ }\n+ }\n+ }\n+ return min;\n+}\n+\n+//! Reconstruct the h-sided chain based on the two sides that give the smalles extra area.\n+/*!\n+ * @param h Length of the k - 3 sided chain.\n+ * @param i First index from minimumArea\n+ * @param j Second index from minimumArea\n+ */\n+std::vector Chains::reconstructHSidedChain(int h, int i, int j)\n+{\n+ if(h == 0)\n+ {\n+ throw std::logic_error(\"\");\n+ }\n+ if(h == 1)\n+ {\n+ return std::vector{{middle_sides[h][i][j].side, true}};\n+ }\n+\n+ std::vector before, after;\n+ const int h_floor = (h - 1) / 2;\n+ const int h_ceil = h - 1 - h_floor;\n+ if(h_floor > 0)\n+ before = reconstructHSidedChain(h_floor, i, middle_sides[h][i][j].side);\n+ if(h_ceil > 0)\n+ after = reconstructHSidedChain(h_ceil, middle_sides[h][i][j].side, j);\n+\n+ std::vector sides{{middle_sides[h][i][j].side, true}};\n+ sides.insert(sides.end(), before.begin(), before.end());\n+ sides.insert(sides.end(), after.begin(), after.end());\n+ return sides;\n+}\n+\n+//! Get the k sides that build the kgon based on the two sides that give the smalles extra area.\n+/*!\n+ * @param k Number of sides of the output polygon\n+ * @param i First index from minimumArea\n+ * @param j Second index from minimumArea\n+ */\n+std::vector Chains::findKSides(int k, int i, int j)\n+{\n+ std::vector sides;\n+ sides.push_back({i, true});\n+ sides.push_back({j, true});\n+\n+ if(single_sides[i][j].flush)\n+ sides.push_back({single_sides[i][j].side, true});\n+ else\n+ sides.push_back({single_sides[i][j].side, false});\n+\n+ std::vector flush_chain = reconstructHSidedChain(k - 3, j, i);\n+ sides.insert(sides.end(), flush_chain.begin(), flush_chain.end());\n+ std::sort(sides.begin(), sides.end(),\n+ [](const Side& lhs, const Side& rhs) { return lhs.side < rhs.side; });\n+\n+ return sides;\n+}\n+\n+//! Calculate the k vertices of the kgon from its k sides.\n+/*!\n+ * @param sides Sides of the output polygon\n+ */\n+std::vector Chains::findKVertices(std::vector& sides)\n+{\n+ const int k = sides.size();\n+ std::vector vertices(k);\n+\n+ for(int u = 0; u < k; u++)\n+ {\n+ const int next = (u + 1) % k;\n+ if(sides[u].flush && sides[next].flush)\n+ {\n+ vertices[u] = intersections.lineIntersect(sides[u].side,\n+ sides[next].side).intersection.point;\n+ }\n+ else if(sides[u].flush && !sides[next].flush)\n+ {\n+ vertices[u] = balanced_inters[sides[u].side][sides[(u + 2) % k].side].pi;\n+ }\n+ else if(!sides[u].flush && sides[next].flush)\n+ {\n+ vertices[u] = balanced_inters[sides[(u - 1 + k) %k].side][sides[next].side].pj;\n+ }\n+ else\n+ {\n+ throw std::logic_error(\"\");\n+ }\n+ }\n+ return vertices;\n+}\n+\n+} //namespace\n+\n+\n+///////////////////////// Helper function definitions /////////////////////////\n+\n+namespace minEnclosingConvexPolygon {\n+\n+//! Find the minimum enclosing convex kgon and its area. Converting inputs to used vector shape and data format.\n+/*!\n+ * @param points Set of points\n+ * @param k Number of vertices of the output polygon\n+ * @param minPolygon Minimum area convex k-gon enclosing the given set of points\n+ * @param area Area of minPolygon\n+ */\n+static void findMinEnclosingPolygon(cv::InputArray points,\n+ const int &k,\n+ CV_OUT cv::OutputArray &minPolygon,\n+ CV_OUT double &area) {\n+ CV_Assert(!points.empty());\n+ std::vector ngon, kgon;\n+ cv::convexHull(points, ngon, true);\n+ findMinEnclosingPolygon(ngon, k, kgon, area);\n+ cv::Mat(kgon).copyTo(minPolygon);\n+}\n+\n+//! Find the minimum enclosing convex polygon and its area for a given set of points. Handling of input errors.\n+/*!\n+ * @param ngon The polygon representing the convex hull of the points\n+ * @param k Number of vertices of the output polygon\n+ * @param minPolygon Minimum area convex k-gon enclosing the given polygon\n+ * @param area Area of minPolygon\n+ */\n+static void findMinEnclosingPolygon(const std::vector &ngon,\n+ const int &k,\n+ cv::OutputArray &minPolygon,\n+ double &area) {\n+ try\n+ {\n+ if (ngon.size() < 3)\n+ {\n+ throw std::invalid_argument(\n+ \"ngon must have 3 or more different points enclosing an area\" );\n+ }\n+ if (cv::contourArea(ngon) < EPSILON )\n+ {\n+ throw std::invalid_argument( \"invalid ngon: all points on line\" );\n+ }\n+ if (k <= 3)\n+ {\n+ throw std::invalid_argument( \"k must be 3 or higher\" );\n+ }\n+ const size_t n = ngon.size();\n+ if ((const int)n == k)\n+ {\n+ throw std::runtime_error (\"(n = k)\");\n+ }\n+ if ((const int)n < k)\n+ {\n+ throw std::runtime_error (\"(n < k)\");\n+ }\n+ }\n+ catch (std::invalid_argument &message)\n+ {\n+ std::cout << \"invalid argument: \" << message.what() << std::endl;\n+ return;\n+ }\n+ catch (std::runtime_error &message)\n+ {\n+ std::cout << \"Warning: no minimum area polygon calculated \" << message.what() << std::endl;\n+ cv::Mat(ngon).copyTo(minPolygon);\n+ area = cv::contourArea(minPolygon);\n+ return;\n+ }\n+ catch (...)\n+ {\n+ std::cout << \"input error\" << std::endl;\n+ return;\n+ }\n+\n+ try\n+ {\n+ std::vector kgon;\n+ findMinAreaPolygon(ngon, kgon, area, k);\n+ cv::Mat(kgon).copyTo(minPolygon);\n+ }\n+ catch (...)\n+ {\n+ std::cout << \"correct input but execution failed\" << std::endl;\n+ return;\n+ }", - "comment_created_at": "2025-06-18T07:03:02+00:00", - "comment_author": "asmorkalov", - "comment_body": "Please do not use try-catch. OpenCV uses CV_Assert, CV_Check for the function input validation and and CV_LOG_DEBUG/CV_LOG_INFO for notifications. Function should not silently ignore obvious invalid inputs like not enough points or wrong data types and ranges. ", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2153834792", - "pr_number": 27369, - "pr_file": "modules/imgproc/src/min_enclosing_convex_polygon.cpp", - "created_at": "2025-06-18T07:17:46+00:00", - "commented_code": "/*M///////////////////////////////////////////////////////////////////////////////////////\n //\n // This file is part of OpenCV project.\n // It is subject to the license terms in the LICENSE file found in the top-level directory of this\n // distribution and at http://opencv.org/license.html.\n //\n //\n // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.\n //\n // By downloading, copying, installing or using the software you agree to this license.\n // If you do not agree to this license, do not download, install,\n // copy or use the software.\n //\n // INFORMATION REGARDING THE CONTRIBUTION:\n //\n // Author: Sara Kuhnert\n // Created: 20.05.2025\n // E-mail: \n //\n // THE IMPLEMENTATION OF THE MODULES IS BASED ON THE FOLLOWING PAPER:\n //\n // [1] A. Aggarwal, J. S. Chang, Chee K. Yap: \"Minimum area circumscribing Polygons\", The Visual\n // Computer, 1:112-117, 1985\n //\n // The overall complexity of the algorithm is theta(n^2log(n)log(k)) where \"n\" represents the number\n // of vertices in the input convex polygon and \"k\" the number of vertices in the output polygon.\n //\n //\n //\n // License Agreement\n // For Open Source Computer Vision Library\n //\n // Copyright (C) 2000, Intel Corporation, all rights reserved.\n // Copyright (C) 2013, OpenCV Foundation, all rights reserved.\n // Copyright (C) 2020, Sara Kuhnert, all rights reserved.\n // Third party copyrights are property of their respective owners.\n //\n // Redistribution and use in source and binary forms, with or without modification,\n // are permitted provided that the following conditions are met:\n //\n // * Redistribution's of source code must retain the above copyright notice,\n // this list of conditions and the following disclaimer.\n //\n // * Redistribution's in binary form must reproduce the above copyright notice,\n // this list of conditions and the following disclaimer in the documentation\n // and/or other materials provided with the distribution.\n //\n // * The name of the copyright holders may not be used to endorse or promote products\n // derived from this software without specific prior written permission.\n //\n // This software is provided by the copyright holders and contributors \"as is\" and\n // any express or implied warranties, including, but not limited to, the implied\n // warranties of merchantability and fitness for a particular purpose are disclaimed.\n // In no event shall the Intel Corporation or contributors be liable for any direct,\n // indirect, incidental, special, exemplary, or consequential damages\n // (including, but not limited to, procurement of substitute goods or services;\n // loss of use, data, or profits; or business interruption) however caused\n // and on any theory of liability, whether in contract, strict liability,\n // or tort (including negligence or otherwise) arising in any way out of\n // the use of this software, even if advised of the possibility of such damage.\n //\n //M*/\n\n#include \"precomp.hpp\"\n#include \n\n#include \n#include \n#include \n#include \n#include \n#include \n#include \n\n\n\n//////////////////////////// Constants definitions ////////////////////////////\n\n#define EPSILON 1e-6\n\n\n///////////////////////// Helper function declarations /////////////////////////\n\nnamespace minEnclosingConvexPolygon {\n\nstatic void findMinEnclosingPolygon(cv::InputArray points,\n const int &k,\n CV_OUT cv::OutputArray &kgon,\n CV_OUT double &area);\n\nstatic void findMinEnclosingPolygon(const std::vector &ngon,\n const int &k,\n cv::OutputArray &kgon,\n double &area);\n\nstatic void findMinAreaPolygon(const std::vector &ngon,\n std::vector &minPolygon,\n double &area,\n int k);\n\n} //namespace\n\n\n//////////////////////// Class and struct declarations ////////////////////////\n\nnamespace minEnclosingConvexPolygon {\n\n//! Intersection point of two sides with its position relative to the polygon\n/*!\n * @param point Intersection point of the two sides\n * @param position Is the intersection point a valid solution?\n */\nstruct IntersectionPoint\n{\n cv::Point2f point = {-1.0, -1.0};\n bool position = false;\n\n IntersectionPoint() = default;\n IntersectionPoint(const cv::Point2f& a, const cv::Point2f& b,\n const cv::Point2f& c, const cv::Point2f& d);\n IntersectionPoint(int i, int j, const std::vector& ngon);\n};\n\n//! Container for the information about the intersection point of two flush sides\n/*!\n * @param intersection Instance of IntersectionPoint\n * @param extra_area The area that would be added to the kgon compared to the ngon\n * @param done Set to true once calculated to avoid redundant calculations\n */\nstruct FlushIntersect\n{\n IntersectionPoint intersection = {};\n double extra_area = std::numeric_limits::max();\n bool done = false;\n};\n\n//! Container for the information about a balanced side between two points\n/*!\n * @param pi First intersection point\n * @param pj Second intersection point\n * @param extra_area The area that would be added to the kgon compared to the ngon\n * @param flush Flush is a special case of balanced. If the balanced side is also flush, this indicates which of the sides of the ngon it is flush with. The default for not flush sides is -1.\n * @param position Is the balanced side a valid solution?\n * @param done Set to true once calculated to avoid redundant calculations\n */\nstruct BalancedIntersect\n{\n cv::Point2f pi = {-1, -1};\n cv::Point2f pj = {-1, -1};\n double extra_area = std::numeric_limits::max();\n int flush = -1;\n bool position = false;\n bool done = false;\n};\n\n//! Container for the best segment between two points\n/*!\n * @param extra_area The area that would be added to the kgon compared to the ngon\n * @param side Side of the ngon to which it is flush (if flush is true) or edge of the ngon which it is touching (if flush is false)\n * @param flush True if it is flush. If false, it is balanced but not flush\n * @param exists Does a valid solution exist?\n * @param done Set to true once calculated to avoid redundant calculations\n */\nstruct Segment\n{\n double extra_area = std::numeric_limits::max();\n int side = -1;\n bool flush = false;\n bool exists = false;\n bool done = false;\n};\n\n//! Combination of selected sides and their corresponding chains, which generates the kgon with the minimum area\n/*!\n * @param area Extra area that the minimal kgon is bigger than the input ngon\n * @param i First side\n * @param j Second side\n */\nstruct Minimum\n{\n double area = std::numeric_limits::max();\n int i = -1;\n int j = -1;\n};\n\n//! Container for a side of the minimal kgon\n/*!\n * @param side Index of the side of the ngon which it is flush with or the vertex which it is touching\n * @param flush Is this side flush?\n */\nstruct Side\n{\n int side = -1;\n bool flush = false;\n\n Side() = default;\n Side(int i, bool b);\n};\n\n//! Container for the minimal kgon\n/*!\n * @param sides Indices of the corresponding sindes of the ngon\n * @param vertices Vertices of the kgon\n * @param extra_area Extra area that the minimal kgon is bigger than the input ngon\n * @param i, @param j The chains which build the minimal kgon are formed between those two sides\n */\nstruct Kgon\n{\n std::vector sides;\n std::vector vertices;\n double extra_area = std::numeric_limits::max();\n int i = -1;\n int j = -1;\n};\n\n//! Class for all the possible combinations of flush intersections of two sides of the input polygon\n/*!\n * @param ngon Input polygon\n * @param intersections Matrix of all possible flush intersections of two sides of the ngon\n * @param area_edges Collection of the points that define the extra area of the kgon\n */\nclass FlushIntersections\n{\nprivate:\n const std::vector& ngon;\n std::vector> intersections;\n std::vector area_edges;\n\npublic:\n FlushIntersections(const std::vector& _ngon);\n const FlushIntersect& lineIntersect(int i, int j);\n};\n\n//! Class for all the possible combinations of balanced intersections with two sides of the input polygon\n/*!\n * @param ngon Input polygon\n * @param balanced_intersections atrix of all possible balanced intersections of two sides of the ngon\n * @param area_edges Collection of the points that define the extra area of the kgon\n */\nclass BalancedIntersections\n{\nprivate:\n const std::vector& ngon;\n std::vector> balanced_intersections;\n std::vector area_edges;\n\n double extraArea ( int first, int last, const cv::Point2f& extra1, const cv::Point2f& extra2 );\n\n BalancedIntersect flush(int i, int j, int e);\n\npublic:\n BalancedIntersections(const std::vector& _ngon);\n BalancedIntersect balancedIntersect(int i, int j, int e);\n void markAsDone(int i, int j);\n const std::vector& operator[](size_t i) const;\n};\n\n//! Class for building the one-sided and h-sided chains and calculation the minimum enclosing polygon based on those chains\n/*!\n * @param ngon Input polygon\n * @param single_sides Matrix of the best one-sided chain for each combination of two sides of the ngon\n * @param middle_sides Matrix of the middle side of each h-sided chain for all relevant h and each combination of two sides of the ngon\n * @param intersections An instance of FlushIntersections to store already calculated flush intersections\n * @param balanced_inters An instance of BalancedIntersections to store already calculated balanced intersections\n */\nclass Chains\n{\nprivate:\n const std::vector& ngon;\n std::vector> single_sides;\n std::vector>> middle_sides;\n FlushIntersections intersections;\n BalancedIntersections balanced_inters;\n\n void findSingleE(int i, int j, int l, int r);\n void singleSideImpl(int i, int j1, int j2);\n void findMiddleE1(int i, int j, int l, int r);\n void middleSideImpl1(int i, int j1, int j2);\n void findMiddleE(int h, int i, int j, int l, int r);\n void middleSideImpl(int h, int i, int j1, int j2);\n\npublic:\n Chains(const std::vector& _ngon, int k);\n\n std::set relevantChainLengths(int h);\n void calcOneSidedChains();\n void calcMiddleChains(int h);\n\n Minimum minimumArea(int n, int k);\n std::vector reconstructHSidedChain(int h, int i, int j);\n std::vector findKSides(int k, int i, int j);\n std::vector findKVertices(std::vector& sides);\n\n};\n\n} //namespace\n\n\n//////////////////////// Class and struct constructors ////////////////////////\n\nnamespace minEnclosingConvexPolygon {\n\n//! Constructor for IntersectionPoint. Find the intersection point of two lines given by four points and decide whether it is on the correct side of the polygon\n/*!\n * @param a First point of the first line\n * @param b Second point of the first line\n * @param c First point of the second line\n * @param d Second point of the second line\n */\nIntersectionPoint::IntersectionPoint(const cv::Point2f& a, const cv::Point2f& b,\n const cv::Point2f& c, const cv::Point2f& d)\n{\n const cv::Point2f ab = b - a, cd = d - c, ac = c - a;\n const double det = ab.cross(-cd);\n if(std::abs (det) < EPSILON )\n return;\n\n const double loc = ac.cross(-cd) / det;\n if(loc <= 0)\n return;\n\n point = a + loc * ab;\n position = true;\n}\n\n//! Constructor for IntersectionPoint. Find the intersection point of two sides of the polygon based on the given side indices.\n/*!\n * @param i Index of the first side\n * @param j Index of the second side\n * @param ngon Input polygon with n sides\n */\nIntersectionPoint::IntersectionPoint(int i, int j,\n const std::vector& ngon) :\n IntersectionPoint(ngon[i],\n ngon[(i + 1) % ngon.size()],\n ngon[j],\n ngon[(j + 1) % ngon.size()])\n{}\n\n//! Constructor for FlushIntersections\n/*!\n * @param _ngon Input polygon with n sides\n */\nFlushIntersections::FlushIntersections(const std::vector& _ngon) :\n ngon(_ngon),\n intersections(ngon.size(),\n std::vector(ngon.size()))\n{\n area_edges.reserve(ngon.size());\n}\n\n//! Constructor for BalancedIntersections\n/*!\n * @param _ngon Input polygon with n sides\n */\nBalancedIntersections::BalancedIntersections(const std::vector& _ngon) :\nngon(_ngon),\nbalanced_intersections(ngon.size(),\n std::vector(ngon.size()))\n{\n area_edges.reserve(ngon.size());\n}\n\n//! Constructor for Side. Assign a side index and weather it is flush or not.\n/*!\n * @param i Side index\n * @param b Is side i flush?\n */\nSide::Side(int i, bool b)\n{\n side = i;\n flush = b;\n}\n\n//! Constructor for Chains\n/*!\n * @param\n */\nChains::Chains(const std::vector& _ngon, int k) :\n ngon(_ngon),\n single_sides(std::vector>(ngon.size(),\n std::vector(ngon.size()))),\n middle_sides(std::vector>>(\n k, std::vector>(ngon.size(),\n std::vector(ngon.size())))),\n intersections(ngon),\n balanced_inters(ngon)\n{}\n\n} //namespace\n\n\n////////////////////////// Class and struct functions //////////////////////////\n\nnamespace minEnclosingConvexPolygon {\n\n//! Find the intersection point of two sides, decide weather it is a valid point and if so calculate the extra area caused by that intersection.\n/*!\n * @param i Index of the first side\n * @param j Index of the second side\n */\nconst FlushIntersect& FlushIntersections::lineIntersect(int i, int j)\n{\n FlushIntersect& itr = intersections[i][j];\n if(itr.done)\n return itr;\n\n const size_t n = ngon.size();\n if((i + 1) % n == j)\n {\n itr.intersection.point = ngon[j];\n itr.intersection.position = true;\n itr.extra_area = 0.0;\n itr.done = true;\n return itr;\n }\n itr.intersection = IntersectionPoint(i, j, ngon);\n if(itr.intersection.position)\n {\n area_edges.resize(0);\n for(int t = (i + 1) % n; t != (j + 1) % n; t = (t + 1) % n)\n {\n area_edges.push_back(ngon[t]);\n }\n area_edges.push_back(itr.intersection.point);\n itr.extra_area = cv::contourArea(area_edges);\n itr.done = true;\n }\n else\n {\n itr.extra_area = std::numeric_limits::max();\n itr.intersection.position = false;\n itr.done = true;\n }\n return itr;\n}\n\n//! Calculate the added area that is enclosed by a sequence of consecutive vertices of the polygon and the intersection point of two sides.\n/*!\n * @param first Index of the first point of the sequence\n * @param last Index of the last point of the sequence\n * @param extra1 Last point of the sequence\n * @param extra2 Intersection point\n */\ndouble BalancedIntersections::extraArea(int first, int last,\n const cv::Point2f& extra1,\n const cv::Point2f& extra2)\n{\n const size_t n = ngon.size();\n area_edges.resize(0);\n for(int t = first; t != last; t = (t + 1) % n)\n area_edges.push_back(ngon[t]);\n\n area_edges.push_back(extra1);\n area_edges.push_back(extra2);\n\n return cv::contourArea(area_edges);\n}\n\n//! Determine the intersection points of a flush side e that lies between sides i and j with these two sides. Calculate the extra area and the position of the intersection points relative to the polygon. Update balanced_intersections if the new area is smaller than extraArea.\n/*!\n * @param i Index of first side\n * @param j Index of second side\n * @param e Index of a side between i and j\n */\nBalancedIntersect BalancedIntersections::flush(int i, int j, int e)\n{\n if(i == e)\n std::logic_error(\"\");\n if(j == e)\n std::logic_error(\"\");\n\n const size_t n = ngon.size();\n const int before = (e - 1 + n) % n;\n BalancedIntersect bi = balanced_intersections[i][j];\n\n const IntersectionPoint left_e(i, e, ngon);\n const IntersectionPoint right_e(e, j, ngon);\n\n if(left_e.position == true && right_e.position == true)\n {\n double extra_area = extraArea((i + 1) % n, e, ngon[e], left_e.point);\n if(extra_area < bi.extra_area)\n {\n extra_area += extraArea((e + 1) % n, j, ngon[j], right_e.point);\n if(extra_area < bi.extra_area)\n {\n bi.extra_area = extra_area;\n bi.pi = left_e.point;\n bi.pj = right_e.point;\n bi.position = true;\n bi.flush = e;\n }\n }\n }\n\n if(before != i)\n {\n const IntersectionPoint left_before(i, before, ngon);\n const IntersectionPoint right_before(before, j, ngon);\n if(left_before.position == true && right_before.position == true)\n {\n double extra_area =\n extraArea((i + 1) % n, before, ngon[before], left_before.point);\n\n if(extra_area < bi.extra_area)\n {\n extra_area += extraArea(e, j, ngon[j], right_before.point);\n if(extra_area < bi.extra_area)\n {\n bi.extra_area = extra_area;\n bi.pi = left_before.point;\n bi.pj = right_before.point;\n bi.position = true;\n bi.flush = before;\n }\n }\n }\n }\n return bi;\n}\n\n//! Determine the intersection points of a balanced side e that lies between sides i and j with these two sides. If no valid balanced edge is found, ccheck for flush sides. Calculate the extra area and the position of the intersection points relative to the polygon. Update balanced_intersections if the new area is smaller than extraArea.\n/*!\n * @param i Index of first side\n * @param j Index of second side\n * @param e Index of a side between i and j\n */\nBalancedIntersect BalancedIntersections::balancedIntersect(int i, int j, int e)\n{\n if(balanced_intersections[i][j].done)\n return balanced_intersections[i][j];\n\n const size_t n = ngon.size();\n if((i + 2) % n == j)\n {\n BalancedIntersect& bi = balanced_intersections[i][j];\n bi.pi = ngon[(i + 1) % n];\n bi.pj = ngon[j];\n bi.flush = (i + 1) % n;\n bi.position = true;\n bi.extra_area = 0.0;\n bi.done = true;\n return bi;\n }\n\n const cv::Point2f p1 = ngon[i], p2 = ngon[(i + 1) % n], p3 = ngon[e],\n p4 = ngon[j], p5 = ngon[(j + 1) % n];\n const cv::Point2f dir12 = p2 - p1, dir45 = p5 - p4;\n const double det = dir12.cross(dir45);\n if(std::abs (det) < EPSILON )\n {\n flush(i, j, e);\n return balanced_intersections[i][j];\n }\n\n BalancedIntersect bi;\n cv::Point2f temp = 2 * p3 - p2 - p4;\n const double s = temp.cross(dir45) / det;\n const double t = temp.cross(dir12) / det;\n if(s >= 0 && t >= 0)\n {\n bi.pi = p2 + dir12 * s;\n bi.pj = p4 - dir45 * t;\n bi.position = true;\n\n const cv::Point2f dir_balanced = bi.pj - bi.pi,\n dir_left = p3 - ngon[(e - 1 + n) % n],\n dir_right = ngon[(e + 1) % n] - p3;\n\n const double cross_left = dir_balanced.cross(dir_left),\n cross_right = dir_balanced.cross(dir_right);\n if((cross_left < 0 && cross_right < 0) || (cross_left > 0 && cross_right > 0))\n {\n BalancedIntersect reset;\n bi = reset;\n bi = flush(i, j, e);\n }\n else if(std::abs (cross_left) < EPSILON )\n {\n bi.flush = (e - 1 + n) % n;\n bi.extra_area =\n extraArea((i + 1) % n, (e - 1 + n) % n, ngon[(e - 1 + n) % n], bi.pi)\n + extraArea(e, j, ngon[j], bi.pj);\n }\n else if(std::abs (cross_right) < EPSILON )\n {\n bi.flush = e;\n bi.extra_area = extraArea((i + 1) % n, e, ngon[e], bi.pi)\n + extraArea((e + 1) % n, j, ngon[j], bi.pj);\n }\n else\n {\n bi.extra_area = extraArea((i + 1) % n, e, ngon[e], bi.pi)\n + extraArea(e, j, ngon[j], bi.pj);\n }\n }\n else\n {\n flush(i, j, e);\n }\n\n if(bi.extra_area < balanced_intersections[i][j].extra_area)\n {\n balanced_intersections[i][j] = bi;\n }\n return bi;\n}\n\n//! Set function for the done attribute of BalancedIntersections\n/*!\n * @param i Index of first side\n * @param j Index of second side\n */\nvoid BalancedIntersections::markAsDone(int i, int j)\n{\n balanced_intersections[i][j].done = true;\n}\n\n//! Operator to get a vector of elements from BalancedIntersections\n/*!\n * @param i index of a side\n */\nconst std::vector& BalancedIntersections::operator[](\n size_t i) const\n{\n return balanced_intersections[i];\n}\n\n//! For a combination of two fixed sides of the ngon test all sides between left and right boundary to find the one sided chain with minimal extra area\n/*!\n * @param i Index of first fixed side\n * @param j Index of second fixed side\n * @param l Index of left boundary\n * @param r Index of right boundary\n */\nvoid Chains::findSingleE(int i, int j, int l, int r)\n{\n const size_t n = ngon.size();\n Segment& one = single_sides[i][j];\n if (one.done)\n return;\n\n double min_area = std::numeric_limits::max();\n for (int e = l; e != r + 1 && e != j; e = (e + 1) %n)\n {\n BalancedIntersect candidate = balanced_inters.balancedIntersect(i, j, e);\n if(candidate.extra_area < min_area)\n {\n min_area = candidate.extra_area;\n one.side = e;\n one.extra_area = candidate.extra_area;\n one.flush = false;\n one.exists = true;\n }\n }\n one.done = true;\n balanced_inters.markAsDone(i, j);\n}\n\n//! Recursively repeat the search for the one sided chain with minimal extra area to shrink the boundaries of bigger distances\n/*!\n * @param i Fixed side of the ngon\n * @param j1 Lower boundary of search intervall\n * @param j2 Upper boundary of search intervall\n */\nvoid Chains::singleSideImpl(int i, int j1, int j2)\n{\n const size_t n = ngon.size();\n if((j1 + 1) %n == j2)\n {\n return;\n }\n\n int mid = (j1 < j2 ? ((j1 + j2) / 2) : ((j1 + n + j2) / 2)) % n;\n int l = single_sides[i][j1].side < 0 ? (j1 + 1) %n : single_sides[i][j1].side;\n int r = single_sides[i][j2].side < 0 ? (j2 - 1 + n) %n : single_sides[i][j2].side;\n\n findSingleE(i, mid, l, r);\n singleSideImpl(i, j1, mid);\n singleSideImpl(i, mid, j2);\n\n return;\n}\n\n//! For a combination of two fixed sides of the ngon test all sides between left and right boundary to find the middle side of the h-sided chain with minimal extra area. This is the version for h = 1.\n/*!\n * @param i Index of first fixed side\n * @param j Index of second fixed side\n * @param l Index of left boundary\n * @param r Index of right boundary\n */\nvoid Chains::findMiddleE1(int i, int j, int l, int r)\n{\n const size_t n = ngon.size();\n Segment& one = middle_sides[1][i][j];\n if (one.done)\n return;\n\n double min_area = std::numeric_limits::max();\n for (int e = l; e != r + 1 && e != j; e = (e + 1) %n)\n {\n const FlushIntersect& before = intersections.lineIntersect(i, e);\n if(!before.intersection.position)\n continue;\n const FlushIntersect& after = intersections.lineIntersect(e, j);\n if(!after.intersection.position)\n continue;\n\n double tmp_area = before.extra_area + after.extra_area;\n if(tmp_area < min_area)\n {\n min_area = tmp_area;\n one.side = e;\n one.extra_area = tmp_area;\n one.exists = true;\n one.flush = true;\n }\n }\n one.done = true;\n}\n\n//! Recursively repeat the search for the middle side of the h-sided chain with minimal extra area to shrink the boundaries of bigger distances. This is the version for h = 1.\n/*!\n * @param i Fixed side of the ngon\n * @param j1 Lower boundary of search intervall\n * @param j2 Upper boundary of search intervall\n */\nvoid Chains::middleSideImpl1(int i, int j1, int j2)\n{\n const size_t n = ngon.size();\n if((j1 + 1) %n == j2)\n {\n return;\n }\n\n int mid = (j1 < j2 ? ((j1 + j2) / 2) : ((j1 + n + j2) / 2)) % n;\n int l = middle_sides[1][i][j1].side < 0 ? (j1 + 1) %n : middle_sides[1][i][j1].side;\n int r = middle_sides[1][i][j2].side < 0 ? (j2 - 1 + n) %n : middle_sides[1][i][j2].side;\n\n findMiddleE1(i, mid, l, r);\n middleSideImpl1(i, j1, mid);\n middleSideImpl1(i, mid, j2);\n\n return;\n}\n\n//! For a combination of two fixed sides of the ngon test all sides between left and right boundary to find the middle side of the h-sided chain with minimal extra area. This is the version for h > 1.\n/*!\n * @param h Length of the h-sided chain\n * @param i Index of first fixed side\n * @param j Index of second fixed side\n * @param l Index of left boundary\n * @param r Index of right boundary\n */\nvoid Chains::findMiddleE(int h, int i, int j, int l, int r)\n{\n const size_t n = ngon.size();\n Segment& one = middle_sides[h][i][j];\n if (one.done)\n return;\n\n const int dist = (i <= j ? (j - i) : (j + n - i));\n const int h_floor = (h - 1) / 2;\n const int h_ceil = h - 1 - h_floor;\n\n if(dist == 0)\n throw std::logic_error(\"\");\n if(dist - 1 < h)\n {\n one.done = true;\n return;\n }\n if(dist - 1 == h)\n {\n one.side = (i + h_floor + 1) % n;\n one.extra_area = 0.0;\n one.exists = true;\n one.flush = true;\n one.done = true;\n return;\n }\n\n double min_area = std::numeric_limits::max();\n for (int e = l; e != r + 1 && e != j; e = (e + 1) %n)\n {\n const Segment& before = middle_sides[h_floor][i][e];\n if (before.extra_area == std::numeric_limits::max())\n continue;\n const Segment& after = middle_sides[h_ceil][e][j];\n if(after.extra_area == std::numeric_limits::max())\n continue;\n\n double tmp_area = before.extra_area + after.extra_area;\n if(tmp_area < min_area)\n {\n min_area = tmp_area;\n one.side = e;\n one.extra_area = tmp_area;\n one.exists = true;\n one.flush = true;\n }\n }\n one.done = true;\n}\n\n//! Recursively repeat the search for the middle side of the h-sided chain with minimal extra area to shrink the boundaries of bigger distances. This is the version for h > 1.\n/*!\n * @param h Length of the h-sided chain\n * @param i Fixed side of the ngon\n * @param j1 Lower boundary of search intervall\n * @param j2 Upper boundary of search intervall\n */\nvoid Chains::middleSideImpl(int h, int i, int j1, int j2)\n{\n const size_t n = ngon.size();\n if((j1 + 1) %n == j2)\n {\n return;\n }\n\n int mid = (j1 < j2 ? ((j1 + j2) / 2) : ((j1 + n + j2) / 2)) % n;\n int l = middle_sides[h][i][j1].side < 0 ? (j1 + 1) %n : middle_sides[h][i][j1].side;\n int r = middle_sides[h][i][j2].side < 0 ? (j2 - 1 + n) %n : middle_sides[h][i][j2].side;\n\n findMiddleE(h, i, mid, l, r);\n middleSideImpl(h, i, j1, mid);\n middleSideImpl(h, i, mid, j2);\n\n return;\n}\n\n//! Calculate the relevant chain lengths. Starting with a maximum chain length of h, down to a chain length of 1, only chains with half the length of the chain before are needed.\n/*!\n * @param h Length of the longest chain (h = k - 3)\n */\nstd::set Chains::relevantChainLengths(int h)\n{\n if(h <= 1)\n return {h};\n\n std::set hs = {h};\n const int h_floor = (h - 1) / 2;\n const int h_ceil = h - 1 - h_floor;\n std::set h1 = relevantChainLengths(h_floor);\n for(const int& hNew : h1)\n hs.insert(hNew);\n if (h_ceil != h_floor)\n {\n std::set h2 = relevantChainLengths(h_ceil);\n for(const int& hNew : h2)\n hs.insert(hNew);\n }\n return hs;\n}\n\n//! Recursively calculate all the onesided chains for each combination of polygon sides.\n/*!\n */\nvoid Chains::calcOneSidedChains()\n{\n const size_t n = ngon.size();\n for(size_t i = 0; i < n; i++)\n {\n int j1 = (i + 2) %n, j2 = (i - 2 + n) %n;\n\n findSingleE(i, j1, (i + 1) %n, (j1 - 1 + n) %n);\n findSingleE(i, j2, (i + 1) %n, (j2 - 1 + n) %n);\n singleSideImpl(i, j1, j2);\n }\n}\n\n//! Recursively calculate the middle sides of the h-sided chains for all combinations of polygon sides.\n/*!\n * @param h Length of the chains\n */\nvoid Chains::calcMiddleChains(int h)\n{\n const size_t n = ngon.size();\n if (h == 0)\n {\n for (size_t i = 0; i < n; i++)\n {\n for (size_t j = 0; j < n; j++)\n {\n Segment& one = middle_sides[h][i][j];\n const FlushIntersect itrs = intersections.lineIntersect(i, j);\n\n one.side = -1;\n one.extra_area = itrs.extra_area;\n one.exists = false;\n one.flush = false;\n one.done = true;\n }\n }\n return;\n }\n if (h == 1)\n {\n for (size_t i = 0; i < n; i++)\n {\n int j1 = (i + 2) %n, j2 = (i - 2 + n) %n;\n\n findMiddleE1(i, j1, (i + 1) %n, (j1 - 1 + n) %n);\n findMiddleE1(i, j2, (i + 1) %n, (j2 - 1 + n) %n);\n middleSideImpl1(i, j1, j2);\n }\n return;\n }\n\n for (size_t i = 0; i < n; i++)\n {\n int j1 = (i + 2) %n, j2 = (i - 2 + n) %n;\n\n findMiddleE(h, i, j1, (i + 1) %n, (j1 - 1 + n) %n);\n findMiddleE(h, i, j2, (i + 1) %n, (j2 - 1 + n) %n);\n middleSideImpl(h, i, j1, j2);\n }\n}\n\n//! Find the i and j with the smallest extra area.\n/*!\n * @param n Number of sides of the input polygon\n * @param k Number of sides of the output polygon\n */\nMinimum Chains::minimumArea(int n, int k)\n{\n Minimum min{};\n for(int i = 0; i < n; i++)\n {\n for(int j = 0; j < n; j++)\n {\n if(!single_sides[i][j].exists || !middle_sides[k - 3][j][i].exists)\n continue;\n\n double tmp_area =\n single_sides[i][j].extra_area + middle_sides[k - 3][j][i].extra_area;\n if(tmp_area < min.area)\n {\n min.area = tmp_area;\n min.i = i;\n min.j = j;\n }\n }\n }\n return min;\n}\n\n//! Reconstruct the h-sided chain based on the two sides that give the smalles extra area.\n/*!\n * @param h Length of the k - 3 sided chain.\n * @param i First index from minimumArea\n * @param j Second index from minimumArea\n */\nstd::vector Chains::reconstructHSidedChain(int h, int i, int j)\n{\n if(h == 0)\n {\n throw std::logic_error(\"\");\n }\n if(h == 1)\n {\n return std::vector{{middle_sides[h][i][j].side, true}};\n }\n\n std::vector before, after;\n const int h_floor = (h - 1) / 2;\n const int h_ceil = h - 1 - h_floor;\n if(h_floor > 0)\n before = reconstructHSidedChain(h_floor, i, middle_sides[h][i][j].side);\n if(h_ceil > 0)\n after = reconstructHSidedChain(h_ceil, middle_sides[h][i][j].side, j);\n\n std::vector sides{{middle_sides[h][i][j].side, true}};\n sides.insert(sides.end(), before.begin(), before.end());\n sides.insert(sides.end(), after.begin(), after.end());\n return sides;\n}\n\n//! Get the k sides that build the kgon based on the two sides that give the smalles extra area.\n/*!\n * @param k Number of sides of the output polygon\n * @param i First index from minimumArea\n * @param j Second index from minimumArea\n */\nstd::vector Chains::findKSides(int k, int i, int j)\n{\n std::vector sides;\n sides.push_back({i, true});\n sides.push_back({j, true});\n\n if(single_sides[i][j].flush)\n sides.push_back({single_sides[i][j].side, true});\n else\n sides.push_back({single_sides[i][j].side, false});\n\n std::vector flush_chain = reconstructHSidedChain(k - 3, j, i);\n sides.insert(sides.end(), flush_chain.begin(), flush_chain.end());\n std::sort(sides.begin(), sides.end(),\n [](const Side& lhs, const Side& rhs) { return lhs.side < rhs.side; });\n\n return sides;\n}\n\n//! Calculate the k vertices of the kgon from its k sides.\n/*!\n * @param sides Sides of the output polygon\n */\nstd::vector Chains::findKVertices(std::vector& sides)\n{\n const int k = sides.size();\n std::vector vertices(k);\n\n for(int u = 0; u < k; u++)\n {\n const int next = (u + 1) % k;\n if(sides[u].flush && sides[next].flush)\n {\n vertices[u] = intersections.lineIntersect(sides[u].side,\n sides[next].side).intersection.point;\n }\n else if(sides[u].flush && !sides[next].flush)\n {\n vertices[u] = balanced_inters[sides[u].side][sides[(u + 2) % k].side].pi;\n }\n else if(!sides[u].flush && sides[next].flush)\n {\n vertices[u] = balanced_inters[sides[(u - 1 + k) %k].side][sides[next].side].pj;\n }\n else\n {\n throw std::logic_error(\"\");", - "repo_full_name": "opencv/opencv", - "discussion_comments": [ - { - "comment_id": "2153834792", - "repo_full_name": "opencv/opencv", - "pr_number": 27369, - "pr_file": "modules/imgproc/src/min_enclosing_convex_polygon.cpp", - "discussion_id": "2153834792", - "commented_code": "@@ -0,0 +1,1185 @@\n+/*M///////////////////////////////////////////////////////////////////////////////////////\n+ //\n+ // This file is part of OpenCV project.\n+ // It is subject to the license terms in the LICENSE file found in the top-level directory of this\n+ // distribution and at http://opencv.org/license.html.\n+ //\n+ //\n+ // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.\n+ //\n+ // By downloading, copying, installing or using the software you agree to this license.\n+ // If you do not agree to this license, do not download, install,\n+ // copy or use the software.\n+ //\n+ // INFORMATION REGARDING THE CONTRIBUTION:\n+ //\n+ // Author: Sara Kuhnert\n+ // Created: 20.05.2025\n+ // E-mail: \n+ //\n+ // THE IMPLEMENTATION OF THE MODULES IS BASED ON THE FOLLOWING PAPER:\n+ //\n+ // [1] A. Aggarwal, J. S. Chang, Chee K. Yap: \"Minimum area circumscribing Polygons\", The Visual\n+ // Computer, 1:112-117, 1985\n+ //\n+ // The overall complexity of the algorithm is theta(n^2log(n)log(k)) where \"n\" represents the number\n+ // of vertices in the input convex polygon and \"k\" the number of vertices in the output polygon.\n+ //\n+ //\n+ //\n+ // License Agreement\n+ // For Open Source Computer Vision Library\n+ //\n+ // Copyright (C) 2000, Intel Corporation, all rights reserved.\n+ // Copyright (C) 2013, OpenCV Foundation, all rights reserved.\n+ // Copyright (C) 2020, Sara Kuhnert, all rights reserved.\n+ // Third party copyrights are property of their respective owners.\n+ //\n+ // Redistribution and use in source and binary forms, with or without modification,\n+ // are permitted provided that the following conditions are met:\n+ //\n+ // * Redistribution's of source code must retain the above copyright notice,\n+ // this list of conditions and the following disclaimer.\n+ //\n+ // * Redistribution's in binary form must reproduce the above copyright notice,\n+ // this list of conditions and the following disclaimer in the documentation\n+ // and/or other materials provided with the distribution.\n+ //\n+ // * The name of the copyright holders may not be used to endorse or promote products\n+ // derived from this software without specific prior written permission.\n+ //\n+ // This software is provided by the copyright holders and contributors \"as is\" and\n+ // any express or implied warranties, including, but not limited to, the implied\n+ // warranties of merchantability and fitness for a particular purpose are disclaimed.\n+ // In no event shall the Intel Corporation or contributors be liable for any direct,\n+ // indirect, incidental, special, exemplary, or consequential damages\n+ // (including, but not limited to, procurement of substitute goods or services;\n+ // loss of use, data, or profits; or business interruption) however caused\n+ // and on any theory of liability, whether in contract, strict liability,\n+ // or tort (including negligence or otherwise) arising in any way out of\n+ // the use of this software, even if advised of the possibility of such damage.\n+ //\n+ //M*/\n+\n+#include \"precomp.hpp\"\n+#include \n+\n+#include \n+#include \n+#include \n+#include \n+#include \n+#include \n+#include \n+\n+\n+\n+//////////////////////////// Constants definitions ////////////////////////////\n+\n+#define EPSILON 1e-6\n+\n+\n+///////////////////////// Helper function declarations /////////////////////////\n+\n+namespace minEnclosingConvexPolygon {\n+\n+static void findMinEnclosingPolygon(cv::InputArray points,\n+ const int &k,\n+ CV_OUT cv::OutputArray &kgon,\n+ CV_OUT double &area);\n+\n+static void findMinEnclosingPolygon(const std::vector &ngon,\n+ const int &k,\n+ cv::OutputArray &kgon,\n+ double &area);\n+\n+static void findMinAreaPolygon(const std::vector &ngon,\n+ std::vector &minPolygon,\n+ double &area,\n+ int k);\n+\n+} //namespace\n+\n+\n+//////////////////////// Class and struct declarations ////////////////////////\n+\n+namespace minEnclosingConvexPolygon {\n+\n+//! Intersection point of two sides with its position relative to the polygon\n+/*!\n+ * @param point Intersection point of the two sides\n+ * @param position Is the intersection point a valid solution?\n+ */\n+struct IntersectionPoint\n+{\n+ cv::Point2f point = {-1.0, -1.0};\n+ bool position = false;\n+\n+ IntersectionPoint() = default;\n+ IntersectionPoint(const cv::Point2f& a, const cv::Point2f& b,\n+ const cv::Point2f& c, const cv::Point2f& d);\n+ IntersectionPoint(int i, int j, const std::vector& ngon);\n+};\n+\n+//! Container for the information about the intersection point of two flush sides\n+/*!\n+ * @param intersection Instance of IntersectionPoint\n+ * @param extra_area The area that would be added to the kgon compared to the ngon\n+ * @param done Set to true once calculated to avoid redundant calculations\n+ */\n+struct FlushIntersect\n+{\n+ IntersectionPoint intersection = {};\n+ double extra_area = std::numeric_limits::max();\n+ bool done = false;\n+};\n+\n+//! Container for the information about a balanced side between two points\n+/*!\n+ * @param pi First intersection point\n+ * @param pj Second intersection point\n+ * @param extra_area The area that would be added to the kgon compared to the ngon\n+ * @param flush Flush is a special case of balanced. If the balanced side is also flush, this indicates which of the sides of the ngon it is flush with. The default for not flush sides is -1.\n+ * @param position Is the balanced side a valid solution?\n+ * @param done Set to true once calculated to avoid redundant calculations\n+ */\n+struct BalancedIntersect\n+{\n+ cv::Point2f pi = {-1, -1};\n+ cv::Point2f pj = {-1, -1};\n+ double extra_area = std::numeric_limits::max();\n+ int flush = -1;\n+ bool position = false;\n+ bool done = false;\n+};\n+\n+//! Container for the best segment between two points\n+/*!\n+ * @param extra_area The area that would be added to the kgon compared to the ngon\n+ * @param side Side of the ngon to which it is flush (if flush is true) or edge of the ngon which it is touching (if flush is false)\n+ * @param flush True if it is flush. If false, it is balanced but not flush\n+ * @param exists Does a valid solution exist?\n+ * @param done Set to true once calculated to avoid redundant calculations\n+ */\n+struct Segment\n+{\n+ double extra_area = std::numeric_limits::max();\n+ int side = -1;\n+ bool flush = false;\n+ bool exists = false;\n+ bool done = false;\n+};\n+\n+//! Combination of selected sides and their corresponding chains, which generates the kgon with the minimum area\n+/*!\n+ * @param area Extra area that the minimal kgon is bigger than the input ngon\n+ * @param i First side\n+ * @param j Second side\n+ */\n+struct Minimum\n+{\n+ double area = std::numeric_limits::max();\n+ int i = -1;\n+ int j = -1;\n+};\n+\n+//! Container for a side of the minimal kgon\n+/*!\n+ * @param side Index of the side of the ngon which it is flush with or the vertex which it is touching\n+ * @param flush Is this side flush?\n+ */\n+struct Side\n+{\n+ int side = -1;\n+ bool flush = false;\n+\n+ Side() = default;\n+ Side(int i, bool b);\n+};\n+\n+//! Container for the minimal kgon\n+/*!\n+ * @param sides Indices of the corresponding sindes of the ngon\n+ * @param vertices Vertices of the kgon\n+ * @param extra_area Extra area that the minimal kgon is bigger than the input ngon\n+ * @param i, @param j The chains which build the minimal kgon are formed between those two sides\n+ */\n+struct Kgon\n+{\n+ std::vector sides;\n+ std::vector vertices;\n+ double extra_area = std::numeric_limits::max();\n+ int i = -1;\n+ int j = -1;\n+};\n+\n+//! Class for all the possible combinations of flush intersections of two sides of the input polygon\n+/*!\n+ * @param ngon Input polygon\n+ * @param intersections Matrix of all possible flush intersections of two sides of the ngon\n+ * @param area_edges Collection of the points that define the extra area of the kgon\n+ */\n+class FlushIntersections\n+{\n+private:\n+ const std::vector& ngon;\n+ std::vector> intersections;\n+ std::vector area_edges;\n+\n+public:\n+ FlushIntersections(const std::vector& _ngon);\n+ const FlushIntersect& lineIntersect(int i, int j);\n+};\n+\n+//! Class for all the possible combinations of balanced intersections with two sides of the input polygon\n+/*!\n+ * @param ngon Input polygon\n+ * @param balanced_intersections atrix of all possible balanced intersections of two sides of the ngon\n+ * @param area_edges Collection of the points that define the extra area of the kgon\n+ */\n+class BalancedIntersections\n+{\n+private:\n+ const std::vector& ngon;\n+ std::vector> balanced_intersections;\n+ std::vector area_edges;\n+\n+ double extraArea ( int first, int last, const cv::Point2f& extra1, const cv::Point2f& extra2 );\n+\n+ BalancedIntersect flush(int i, int j, int e);\n+\n+public:\n+ BalancedIntersections(const std::vector& _ngon);\n+ BalancedIntersect balancedIntersect(int i, int j, int e);\n+ void markAsDone(int i, int j);\n+ const std::vector& operator[](size_t i) const;\n+};\n+\n+//! Class for building the one-sided and h-sided chains and calculation the minimum enclosing polygon based on those chains\n+/*!\n+ * @param ngon Input polygon\n+ * @param single_sides Matrix of the best one-sided chain for each combination of two sides of the ngon\n+ * @param middle_sides Matrix of the middle side of each h-sided chain for all relevant h and each combination of two sides of the ngon\n+ * @param intersections An instance of FlushIntersections to store already calculated flush intersections\n+ * @param balanced_inters An instance of BalancedIntersections to store already calculated balanced intersections\n+ */\n+class Chains\n+{\n+private:\n+ const std::vector& ngon;\n+ std::vector> single_sides;\n+ std::vector>> middle_sides;\n+ FlushIntersections intersections;\n+ BalancedIntersections balanced_inters;\n+\n+ void findSingleE(int i, int j, int l, int r);\n+ void singleSideImpl(int i, int j1, int j2);\n+ void findMiddleE1(int i, int j, int l, int r);\n+ void middleSideImpl1(int i, int j1, int j2);\n+ void findMiddleE(int h, int i, int j, int l, int r);\n+ void middleSideImpl(int h, int i, int j1, int j2);\n+\n+public:\n+ Chains(const std::vector& _ngon, int k);\n+\n+ std::set relevantChainLengths(int h);\n+ void calcOneSidedChains();\n+ void calcMiddleChains(int h);\n+\n+ Minimum minimumArea(int n, int k);\n+ std::vector reconstructHSidedChain(int h, int i, int j);\n+ std::vector findKSides(int k, int i, int j);\n+ std::vector findKVertices(std::vector& sides);\n+\n+};\n+\n+} //namespace\n+\n+\n+//////////////////////// Class and struct constructors ////////////////////////\n+\n+namespace minEnclosingConvexPolygon {\n+\n+//! Constructor for IntersectionPoint. Find the intersection point of two lines given by four points and decide whether it is on the correct side of the polygon\n+/*!\n+ * @param a First point of the first line\n+ * @param b Second point of the first line\n+ * @param c First point of the second line\n+ * @param d Second point of the second line\n+ */\n+IntersectionPoint::IntersectionPoint(const cv::Point2f& a, const cv::Point2f& b,\n+ const cv::Point2f& c, const cv::Point2f& d)\n+{\n+ const cv::Point2f ab = b - a, cd = d - c, ac = c - a;\n+ const double det = ab.cross(-cd);\n+ if(std::abs (det) < EPSILON )\n+ return;\n+\n+ const double loc = ac.cross(-cd) / det;\n+ if(loc <= 0)\n+ return;\n+\n+ point = a + loc * ab;\n+ position = true;\n+}\n+\n+//! Constructor for IntersectionPoint. Find the intersection point of two sides of the polygon based on the given side indices.\n+/*!\n+ * @param i Index of the first side\n+ * @param j Index of the second side\n+ * @param ngon Input polygon with n sides\n+ */\n+IntersectionPoint::IntersectionPoint(int i, int j,\n+ const std::vector& ngon) :\n+ IntersectionPoint(ngon[i],\n+ ngon[(i + 1) % ngon.size()],\n+ ngon[j],\n+ ngon[(j + 1) % ngon.size()])\n+{}\n+\n+//! Constructor for FlushIntersections\n+/*!\n+ * @param _ngon Input polygon with n sides\n+ */\n+FlushIntersections::FlushIntersections(const std::vector& _ngon) :\n+ ngon(_ngon),\n+ intersections(ngon.size(),\n+ std::vector(ngon.size()))\n+{\n+ area_edges.reserve(ngon.size());\n+}\n+\n+//! Constructor for BalancedIntersections\n+/*!\n+ * @param _ngon Input polygon with n sides\n+ */\n+BalancedIntersections::BalancedIntersections(const std::vector& _ngon) :\n+ngon(_ngon),\n+balanced_intersections(ngon.size(),\n+ std::vector(ngon.size()))\n+{\n+ area_edges.reserve(ngon.size());\n+}\n+\n+//! Constructor for Side. Assign a side index and weather it is flush or not.\n+/*!\n+ * @param i Side index\n+ * @param b Is side i flush?\n+ */\n+Side::Side(int i, bool b)\n+{\n+ side = i;\n+ flush = b;\n+}\n+\n+//! Constructor for Chains\n+/*!\n+ * @param\n+ */\n+Chains::Chains(const std::vector& _ngon, int k) :\n+ ngon(_ngon),\n+ single_sides(std::vector>(ngon.size(),\n+ std::vector(ngon.size()))),\n+ middle_sides(std::vector>>(\n+ k, std::vector>(ngon.size(),\n+ std::vector(ngon.size())))),\n+ intersections(ngon),\n+ balanced_inters(ngon)\n+{}\n+\n+} //namespace\n+\n+\n+////////////////////////// Class and struct functions //////////////////////////\n+\n+namespace minEnclosingConvexPolygon {\n+\n+//! Find the intersection point of two sides, decide weather it is a valid point and if so calculate the extra area caused by that intersection.\n+/*!\n+ * @param i Index of the first side\n+ * @param j Index of the second side\n+ */\n+const FlushIntersect& FlushIntersections::lineIntersect(int i, int j)\n+{\n+ FlushIntersect& itr = intersections[i][j];\n+ if(itr.done)\n+ return itr;\n+\n+ const size_t n = ngon.size();\n+ if((i + 1) % n == j)\n+ {\n+ itr.intersection.point = ngon[j];\n+ itr.intersection.position = true;\n+ itr.extra_area = 0.0;\n+ itr.done = true;\n+ return itr;\n+ }\n+ itr.intersection = IntersectionPoint(i, j, ngon);\n+ if(itr.intersection.position)\n+ {\n+ area_edges.resize(0);\n+ for(int t = (i + 1) % n; t != (j + 1) % n; t = (t + 1) % n)\n+ {\n+ area_edges.push_back(ngon[t]);\n+ }\n+ area_edges.push_back(itr.intersection.point);\n+ itr.extra_area = cv::contourArea(area_edges);\n+ itr.done = true;\n+ }\n+ else\n+ {\n+ itr.extra_area = std::numeric_limits::max();\n+ itr.intersection.position = false;\n+ itr.done = true;\n+ }\n+ return itr;\n+}\n+\n+//! Calculate the added area that is enclosed by a sequence of consecutive vertices of the polygon and the intersection point of two sides.\n+/*!\n+ * @param first Index of the first point of the sequence\n+ * @param last Index of the last point of the sequence\n+ * @param extra1 Last point of the sequence\n+ * @param extra2 Intersection point\n+ */\n+double BalancedIntersections::extraArea(int first, int last,\n+ const cv::Point2f& extra1,\n+ const cv::Point2f& extra2)\n+{\n+ const size_t n = ngon.size();\n+ area_edges.resize(0);\n+ for(int t = first; t != last; t = (t + 1) % n)\n+ area_edges.push_back(ngon[t]);\n+\n+ area_edges.push_back(extra1);\n+ area_edges.push_back(extra2);\n+\n+ return cv::contourArea(area_edges);\n+}\n+\n+//! Determine the intersection points of a flush side e that lies between sides i and j with these two sides. Calculate the extra area and the position of the intersection points relative to the polygon. Update balanced_intersections if the new area is smaller than extraArea.\n+/*!\n+ * @param i Index of first side\n+ * @param j Index of second side\n+ * @param e Index of a side between i and j\n+ */\n+BalancedIntersect BalancedIntersections::flush(int i, int j, int e)\n+{\n+ if(i == e)\n+ std::logic_error(\"\");\n+ if(j == e)\n+ std::logic_error(\"\");\n+\n+ const size_t n = ngon.size();\n+ const int before = (e - 1 + n) % n;\n+ BalancedIntersect bi = balanced_intersections[i][j];\n+\n+ const IntersectionPoint left_e(i, e, ngon);\n+ const IntersectionPoint right_e(e, j, ngon);\n+\n+ if(left_e.position == true && right_e.position == true)\n+ {\n+ double extra_area = extraArea((i + 1) % n, e, ngon[e], left_e.point);\n+ if(extra_area < bi.extra_area)\n+ {\n+ extra_area += extraArea((e + 1) % n, j, ngon[j], right_e.point);\n+ if(extra_area < bi.extra_area)\n+ {\n+ bi.extra_area = extra_area;\n+ bi.pi = left_e.point;\n+ bi.pj = right_e.point;\n+ bi.position = true;\n+ bi.flush = e;\n+ }\n+ }\n+ }\n+\n+ if(before != i)\n+ {\n+ const IntersectionPoint left_before(i, before, ngon);\n+ const IntersectionPoint right_before(before, j, ngon);\n+ if(left_before.position == true && right_before.position == true)\n+ {\n+ double extra_area =\n+ extraArea((i + 1) % n, before, ngon[before], left_before.point);\n+\n+ if(extra_area < bi.extra_area)\n+ {\n+ extra_area += extraArea(e, j, ngon[j], right_before.point);\n+ if(extra_area < bi.extra_area)\n+ {\n+ bi.extra_area = extra_area;\n+ bi.pi = left_before.point;\n+ bi.pj = right_before.point;\n+ bi.position = true;\n+ bi.flush = before;\n+ }\n+ }\n+ }\n+ }\n+ return bi;\n+}\n+\n+//! Determine the intersection points of a balanced side e that lies between sides i and j with these two sides. If no valid balanced edge is found, ccheck for flush sides. Calculate the extra area and the position of the intersection points relative to the polygon. Update balanced_intersections if the new area is smaller than extraArea.\n+/*!\n+ * @param i Index of first side\n+ * @param j Index of second side\n+ * @param e Index of a side between i and j\n+ */\n+BalancedIntersect BalancedIntersections::balancedIntersect(int i, int j, int e)\n+{\n+ if(balanced_intersections[i][j].done)\n+ return balanced_intersections[i][j];\n+\n+ const size_t n = ngon.size();\n+ if((i + 2) % n == j)\n+ {\n+ BalancedIntersect& bi = balanced_intersections[i][j];\n+ bi.pi = ngon[(i + 1) % n];\n+ bi.pj = ngon[j];\n+ bi.flush = (i + 1) % n;\n+ bi.position = true;\n+ bi.extra_area = 0.0;\n+ bi.done = true;\n+ return bi;\n+ }\n+\n+ const cv::Point2f p1 = ngon[i], p2 = ngon[(i + 1) % n], p3 = ngon[e],\n+ p4 = ngon[j], p5 = ngon[(j + 1) % n];\n+ const cv::Point2f dir12 = p2 - p1, dir45 = p5 - p4;\n+ const double det = dir12.cross(dir45);\n+ if(std::abs (det) < EPSILON )\n+ {\n+ flush(i, j, e);\n+ return balanced_intersections[i][j];\n+ }\n+\n+ BalancedIntersect bi;\n+ cv::Point2f temp = 2 * p3 - p2 - p4;\n+ const double s = temp.cross(dir45) / det;\n+ const double t = temp.cross(dir12) / det;\n+ if(s >= 0 && t >= 0)\n+ {\n+ bi.pi = p2 + dir12 * s;\n+ bi.pj = p4 - dir45 * t;\n+ bi.position = true;\n+\n+ const cv::Point2f dir_balanced = bi.pj - bi.pi,\n+ dir_left = p3 - ngon[(e - 1 + n) % n],\n+ dir_right = ngon[(e + 1) % n] - p3;\n+\n+ const double cross_left = dir_balanced.cross(dir_left),\n+ cross_right = dir_balanced.cross(dir_right);\n+ if((cross_left < 0 && cross_right < 0) || (cross_left > 0 && cross_right > 0))\n+ {\n+ BalancedIntersect reset;\n+ bi = reset;\n+ bi = flush(i, j, e);\n+ }\n+ else if(std::abs (cross_left) < EPSILON )\n+ {\n+ bi.flush = (e - 1 + n) % n;\n+ bi.extra_area =\n+ extraArea((i + 1) % n, (e - 1 + n) % n, ngon[(e - 1 + n) % n], bi.pi)\n+ + extraArea(e, j, ngon[j], bi.pj);\n+ }\n+ else if(std::abs (cross_right) < EPSILON )\n+ {\n+ bi.flush = e;\n+ bi.extra_area = extraArea((i + 1) % n, e, ngon[e], bi.pi)\n+ + extraArea((e + 1) % n, j, ngon[j], bi.pj);\n+ }\n+ else\n+ {\n+ bi.extra_area = extraArea((i + 1) % n, e, ngon[e], bi.pi)\n+ + extraArea(e, j, ngon[j], bi.pj);\n+ }\n+ }\n+ else\n+ {\n+ flush(i, j, e);\n+ }\n+\n+ if(bi.extra_area < balanced_intersections[i][j].extra_area)\n+ {\n+ balanced_intersections[i][j] = bi;\n+ }\n+ return bi;\n+}\n+\n+//! Set function for the done attribute of BalancedIntersections\n+/*!\n+ * @param i Index of first side\n+ * @param j Index of second side\n+ */\n+void BalancedIntersections::markAsDone(int i, int j)\n+{\n+ balanced_intersections[i][j].done = true;\n+}\n+\n+//! Operator to get a vector of elements from BalancedIntersections\n+/*!\n+ * @param i index of a side\n+ */\n+const std::vector& BalancedIntersections::operator[](\n+ size_t i) const\n+{\n+ return balanced_intersections[i];\n+}\n+\n+//! For a combination of two fixed sides of the ngon test all sides between left and right boundary to find the one sided chain with minimal extra area\n+/*!\n+ * @param i Index of first fixed side\n+ * @param j Index of second fixed side\n+ * @param l Index of left boundary\n+ * @param r Index of right boundary\n+ */\n+void Chains::findSingleE(int i, int j, int l, int r)\n+{\n+ const size_t n = ngon.size();\n+ Segment& one = single_sides[i][j];\n+ if (one.done)\n+ return;\n+\n+ double min_area = std::numeric_limits::max();\n+ for (int e = l; e != r + 1 && e != j; e = (e + 1) %n)\n+ {\n+ BalancedIntersect candidate = balanced_inters.balancedIntersect(i, j, e);\n+ if(candidate.extra_area < min_area)\n+ {\n+ min_area = candidate.extra_area;\n+ one.side = e;\n+ one.extra_area = candidate.extra_area;\n+ one.flush = false;\n+ one.exists = true;\n+ }\n+ }\n+ one.done = true;\n+ balanced_inters.markAsDone(i, j);\n+}\n+\n+//! Recursively repeat the search for the one sided chain with minimal extra area to shrink the boundaries of bigger distances\n+/*!\n+ * @param i Fixed side of the ngon\n+ * @param j1 Lower boundary of search intervall\n+ * @param j2 Upper boundary of search intervall\n+ */\n+void Chains::singleSideImpl(int i, int j1, int j2)\n+{\n+ const size_t n = ngon.size();\n+ if((j1 + 1) %n == j2)\n+ {\n+ return;\n+ }\n+\n+ int mid = (j1 < j2 ? ((j1 + j2) / 2) : ((j1 + n + j2) / 2)) % n;\n+ int l = single_sides[i][j1].side < 0 ? (j1 + 1) %n : single_sides[i][j1].side;\n+ int r = single_sides[i][j2].side < 0 ? (j2 - 1 + n) %n : single_sides[i][j2].side;\n+\n+ findSingleE(i, mid, l, r);\n+ singleSideImpl(i, j1, mid);\n+ singleSideImpl(i, mid, j2);\n+\n+ return;\n+}\n+\n+//! For a combination of two fixed sides of the ngon test all sides between left and right boundary to find the middle side of the h-sided chain with minimal extra area. This is the version for h = 1.\n+/*!\n+ * @param i Index of first fixed side\n+ * @param j Index of second fixed side\n+ * @param l Index of left boundary\n+ * @param r Index of right boundary\n+ */\n+void Chains::findMiddleE1(int i, int j, int l, int r)\n+{\n+ const size_t n = ngon.size();\n+ Segment& one = middle_sides[1][i][j];\n+ if (one.done)\n+ return;\n+\n+ double min_area = std::numeric_limits::max();\n+ for (int e = l; e != r + 1 && e != j; e = (e + 1) %n)\n+ {\n+ const FlushIntersect& before = intersections.lineIntersect(i, e);\n+ if(!before.intersection.position)\n+ continue;\n+ const FlushIntersect& after = intersections.lineIntersect(e, j);\n+ if(!after.intersection.position)\n+ continue;\n+\n+ double tmp_area = before.extra_area + after.extra_area;\n+ if(tmp_area < min_area)\n+ {\n+ min_area = tmp_area;\n+ one.side = e;\n+ one.extra_area = tmp_area;\n+ one.exists = true;\n+ one.flush = true;\n+ }\n+ }\n+ one.done = true;\n+}\n+\n+//! Recursively repeat the search for the middle side of the h-sided chain with minimal extra area to shrink the boundaries of bigger distances. This is the version for h = 1.\n+/*!\n+ * @param i Fixed side of the ngon\n+ * @param j1 Lower boundary of search intervall\n+ * @param j2 Upper boundary of search intervall\n+ */\n+void Chains::middleSideImpl1(int i, int j1, int j2)\n+{\n+ const size_t n = ngon.size();\n+ if((j1 + 1) %n == j2)\n+ {\n+ return;\n+ }\n+\n+ int mid = (j1 < j2 ? ((j1 + j2) / 2) : ((j1 + n + j2) / 2)) % n;\n+ int l = middle_sides[1][i][j1].side < 0 ? (j1 + 1) %n : middle_sides[1][i][j1].side;\n+ int r = middle_sides[1][i][j2].side < 0 ? (j2 - 1 + n) %n : middle_sides[1][i][j2].side;\n+\n+ findMiddleE1(i, mid, l, r);\n+ middleSideImpl1(i, j1, mid);\n+ middleSideImpl1(i, mid, j2);\n+\n+ return;\n+}\n+\n+//! For a combination of two fixed sides of the ngon test all sides between left and right boundary to find the middle side of the h-sided chain with minimal extra area. This is the version for h > 1.\n+/*!\n+ * @param h Length of the h-sided chain\n+ * @param i Index of first fixed side\n+ * @param j Index of second fixed side\n+ * @param l Index of left boundary\n+ * @param r Index of right boundary\n+ */\n+void Chains::findMiddleE(int h, int i, int j, int l, int r)\n+{\n+ const size_t n = ngon.size();\n+ Segment& one = middle_sides[h][i][j];\n+ if (one.done)\n+ return;\n+\n+ const int dist = (i <= j ? (j - i) : (j + n - i));\n+ const int h_floor = (h - 1) / 2;\n+ const int h_ceil = h - 1 - h_floor;\n+\n+ if(dist == 0)\n+ throw std::logic_error(\"\");\n+ if(dist - 1 < h)\n+ {\n+ one.done = true;\n+ return;\n+ }\n+ if(dist - 1 == h)\n+ {\n+ one.side = (i + h_floor + 1) % n;\n+ one.extra_area = 0.0;\n+ one.exists = true;\n+ one.flush = true;\n+ one.done = true;\n+ return;\n+ }\n+\n+ double min_area = std::numeric_limits::max();\n+ for (int e = l; e != r + 1 && e != j; e = (e + 1) %n)\n+ {\n+ const Segment& before = middle_sides[h_floor][i][e];\n+ if (before.extra_area == std::numeric_limits::max())\n+ continue;\n+ const Segment& after = middle_sides[h_ceil][e][j];\n+ if(after.extra_area == std::numeric_limits::max())\n+ continue;\n+\n+ double tmp_area = before.extra_area + after.extra_area;\n+ if(tmp_area < min_area)\n+ {\n+ min_area = tmp_area;\n+ one.side = e;\n+ one.extra_area = tmp_area;\n+ one.exists = true;\n+ one.flush = true;\n+ }\n+ }\n+ one.done = true;\n+}\n+\n+//! Recursively repeat the search for the middle side of the h-sided chain with minimal extra area to shrink the boundaries of bigger distances. This is the version for h > 1.\n+/*!\n+ * @param h Length of the h-sided chain\n+ * @param i Fixed side of the ngon\n+ * @param j1 Lower boundary of search intervall\n+ * @param j2 Upper boundary of search intervall\n+ */\n+void Chains::middleSideImpl(int h, int i, int j1, int j2)\n+{\n+ const size_t n = ngon.size();\n+ if((j1 + 1) %n == j2)\n+ {\n+ return;\n+ }\n+\n+ int mid = (j1 < j2 ? ((j1 + j2) / 2) : ((j1 + n + j2) / 2)) % n;\n+ int l = middle_sides[h][i][j1].side < 0 ? (j1 + 1) %n : middle_sides[h][i][j1].side;\n+ int r = middle_sides[h][i][j2].side < 0 ? (j2 - 1 + n) %n : middle_sides[h][i][j2].side;\n+\n+ findMiddleE(h, i, mid, l, r);\n+ middleSideImpl(h, i, j1, mid);\n+ middleSideImpl(h, i, mid, j2);\n+\n+ return;\n+}\n+\n+//! Calculate the relevant chain lengths. Starting with a maximum chain length of h, down to a chain length of 1, only chains with half the length of the chain before are needed.\n+/*!\n+ * @param h Length of the longest chain (h = k - 3)\n+ */\n+std::set Chains::relevantChainLengths(int h)\n+{\n+ if(h <= 1)\n+ return {h};\n+\n+ std::set hs = {h};\n+ const int h_floor = (h - 1) / 2;\n+ const int h_ceil = h - 1 - h_floor;\n+ std::set h1 = relevantChainLengths(h_floor);\n+ for(const int& hNew : h1)\n+ hs.insert(hNew);\n+ if (h_ceil != h_floor)\n+ {\n+ std::set h2 = relevantChainLengths(h_ceil);\n+ for(const int& hNew : h2)\n+ hs.insert(hNew);\n+ }\n+ return hs;\n+}\n+\n+//! Recursively calculate all the onesided chains for each combination of polygon sides.\n+/*!\n+ */\n+void Chains::calcOneSidedChains()\n+{\n+ const size_t n = ngon.size();\n+ for(size_t i = 0; i < n; i++)\n+ {\n+ int j1 = (i + 2) %n, j2 = (i - 2 + n) %n;\n+\n+ findSingleE(i, j1, (i + 1) %n, (j1 - 1 + n) %n);\n+ findSingleE(i, j2, (i + 1) %n, (j2 - 1 + n) %n);\n+ singleSideImpl(i, j1, j2);\n+ }\n+}\n+\n+//! Recursively calculate the middle sides of the h-sided chains for all combinations of polygon sides.\n+/*!\n+ * @param h Length of the chains\n+ */\n+void Chains::calcMiddleChains(int h)\n+{\n+ const size_t n = ngon.size();\n+ if (h == 0)\n+ {\n+ for (size_t i = 0; i < n; i++)\n+ {\n+ for (size_t j = 0; j < n; j++)\n+ {\n+ Segment& one = middle_sides[h][i][j];\n+ const FlushIntersect itrs = intersections.lineIntersect(i, j);\n+\n+ one.side = -1;\n+ one.extra_area = itrs.extra_area;\n+ one.exists = false;\n+ one.flush = false;\n+ one.done = true;\n+ }\n+ }\n+ return;\n+ }\n+ if (h == 1)\n+ {\n+ for (size_t i = 0; i < n; i++)\n+ {\n+ int j1 = (i + 2) %n, j2 = (i - 2 + n) %n;\n+\n+ findMiddleE1(i, j1, (i + 1) %n, (j1 - 1 + n) %n);\n+ findMiddleE1(i, j2, (i + 1) %n, (j2 - 1 + n) %n);\n+ middleSideImpl1(i, j1, j2);\n+ }\n+ return;\n+ }\n+\n+ for (size_t i = 0; i < n; i++)\n+ {\n+ int j1 = (i + 2) %n, j2 = (i - 2 + n) %n;\n+\n+ findMiddleE(h, i, j1, (i + 1) %n, (j1 - 1 + n) %n);\n+ findMiddleE(h, i, j2, (i + 1) %n, (j2 - 1 + n) %n);\n+ middleSideImpl(h, i, j1, j2);\n+ }\n+}\n+\n+//! Find the i and j with the smallest extra area.\n+/*!\n+ * @param n Number of sides of the input polygon\n+ * @param k Number of sides of the output polygon\n+ */\n+Minimum Chains::minimumArea(int n, int k)\n+{\n+ Minimum min{};\n+ for(int i = 0; i < n; i++)\n+ {\n+ for(int j = 0; j < n; j++)\n+ {\n+ if(!single_sides[i][j].exists || !middle_sides[k - 3][j][i].exists)\n+ continue;\n+\n+ double tmp_area =\n+ single_sides[i][j].extra_area + middle_sides[k - 3][j][i].extra_area;\n+ if(tmp_area < min.area)\n+ {\n+ min.area = tmp_area;\n+ min.i = i;\n+ min.j = j;\n+ }\n+ }\n+ }\n+ return min;\n+}\n+\n+//! Reconstruct the h-sided chain based on the two sides that give the smalles extra area.\n+/*!\n+ * @param h Length of the k - 3 sided chain.\n+ * @param i First index from minimumArea\n+ * @param j Second index from minimumArea\n+ */\n+std::vector Chains::reconstructHSidedChain(int h, int i, int j)\n+{\n+ if(h == 0)\n+ {\n+ throw std::logic_error(\"\");\n+ }\n+ if(h == 1)\n+ {\n+ return std::vector{{middle_sides[h][i][j].side, true}};\n+ }\n+\n+ std::vector before, after;\n+ const int h_floor = (h - 1) / 2;\n+ const int h_ceil = h - 1 - h_floor;\n+ if(h_floor > 0)\n+ before = reconstructHSidedChain(h_floor, i, middle_sides[h][i][j].side);\n+ if(h_ceil > 0)\n+ after = reconstructHSidedChain(h_ceil, middle_sides[h][i][j].side, j);\n+\n+ std::vector sides{{middle_sides[h][i][j].side, true}};\n+ sides.insert(sides.end(), before.begin(), before.end());\n+ sides.insert(sides.end(), after.begin(), after.end());\n+ return sides;\n+}\n+\n+//! Get the k sides that build the kgon based on the two sides that give the smalles extra area.\n+/*!\n+ * @param k Number of sides of the output polygon\n+ * @param i First index from minimumArea\n+ * @param j Second index from minimumArea\n+ */\n+std::vector Chains::findKSides(int k, int i, int j)\n+{\n+ std::vector sides;\n+ sides.push_back({i, true});\n+ sides.push_back({j, true});\n+\n+ if(single_sides[i][j].flush)\n+ sides.push_back({single_sides[i][j].side, true});\n+ else\n+ sides.push_back({single_sides[i][j].side, false});\n+\n+ std::vector flush_chain = reconstructHSidedChain(k - 3, j, i);\n+ sides.insert(sides.end(), flush_chain.begin(), flush_chain.end());\n+ std::sort(sides.begin(), sides.end(),\n+ [](const Side& lhs, const Side& rhs) { return lhs.side < rhs.side; });\n+\n+ return sides;\n+}\n+\n+//! Calculate the k vertices of the kgon from its k sides.\n+/*!\n+ * @param sides Sides of the output polygon\n+ */\n+std::vector Chains::findKVertices(std::vector& sides)\n+{\n+ const int k = sides.size();\n+ std::vector vertices(k);\n+\n+ for(int u = 0; u < k; u++)\n+ {\n+ const int next = (u + 1) % k;\n+ if(sides[u].flush && sides[next].flush)\n+ {\n+ vertices[u] = intersections.lineIntersect(sides[u].side,\n+ sides[next].side).intersection.point;\n+ }\n+ else if(sides[u].flush && !sides[next].flush)\n+ {\n+ vertices[u] = balanced_inters[sides[u].side][sides[(u + 2) % k].side].pi;\n+ }\n+ else if(!sides[u].flush && sides[next].flush)\n+ {\n+ vertices[u] = balanced_inters[sides[(u - 1 + k) %k].side][sides[next].side].pj;\n+ }\n+ else\n+ {\n+ throw std::logic_error(\"\");", - "comment_created_at": "2025-06-18T07:17:46+00:00", - "comment_author": "asmorkalov", - "comment_body": "OpenCV uses CV_Assert for such purposes.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2073337340", - "pr_number": 27051, - "pr_file": "modules/photo/src/ccm/utils.cpp", - "created_at": "2025-05-05T12:20:27+00:00", - "commented_code": "namespace cv {\nnamespace ccm {\n\ninline double gammaCorrection_(double element, double gamma)\ninline double gammaOp(double element, double gamma)\n{\n return (element >= 0 ? pow(element, gamma) : -pow((-element), gamma));\n return (element >= 0.0) ? pow(element, gamma) : -pow(-element, gamma);\n}\n\nMat gammaCorrection(const Mat& src, double gamma, Mat dst)\nvoid gammaCorrection(InputArray _src, OutputArray _dst, double gamma)\n{\n return elementWise(src, [gamma](double element) -> double { return gammaCorrection_(element, gamma); }, dst);\n Mat src = _src.getMat();", - "repo_full_name": "opencv/opencv", - "discussion_comments": [ - { - "comment_id": "2073337340", - "repo_full_name": "opencv/opencv", - "pr_number": 27051, - "pr_file": "modules/photo/src/ccm/utils.cpp", - "discussion_id": "2073337340", - "commented_code": "@@ -11,14 +11,23 @@\n namespace cv {\n namespace ccm {\n \n-inline double gammaCorrection_(double element, double gamma)\n+inline double gammaOp(double element, double gamma)\n {\n- return (element >= 0 ? pow(element, gamma) : -pow((-element), gamma));\n+ return (element >= 0.0) ? pow(element, gamma) : -pow(-element, gamma);\n }\n \n-Mat gammaCorrection(const Mat& src, double gamma, Mat dst)\n+void gammaCorrection(InputArray _src, OutputArray _dst, double gamma)\n {\n- return elementWise(src, [gamma](double element) -> double { return gammaCorrection_(element, gamma); }, dst);\n+ Mat src = _src.getMat();", - "comment_created_at": "2025-05-05T12:20:27+00:00", - "comment_author": "asmorkalov", - "comment_body": "please add CV_Assert with input type checks at least. The function is public API now and should handle invalid input correctly.", - "pr_file_module": null - }, - { - "comment_id": "2139828248", - "repo_full_name": "opencv/opencv", - "pr_number": 27051, - "pr_file": "modules/photo/src/ccm/utils.cpp", - "discussion_id": "2073337340", - "commented_code": "@@ -11,14 +11,23 @@\n namespace cv {\n namespace ccm {\n \n-inline double gammaCorrection_(double element, double gamma)\n+inline double gammaOp(double element, double gamma)\n {\n- return (element >= 0 ? pow(element, gamma) : -pow((-element), gamma));\n+ return (element >= 0.0) ? pow(element, gamma) : -pow(-element, gamma);\n }\n \n-Mat gammaCorrection(const Mat& src, double gamma, Mat dst)\n+void gammaCorrection(InputArray _src, OutputArray _dst, double gamma)\n {\n- return elementWise(src, [gamma](double element) -> double { return gammaCorrection_(element, gamma); }, dst);\n+ Mat src = _src.getMat();", - "comment_created_at": "2025-06-11T10:54:35+00:00", - "comment_author": "asmorkalov", - "comment_body": "It's still relevant.", - "pr_file_module": null - }, - { - "comment_id": "2140012048", - "repo_full_name": "opencv/opencv", - "pr_number": 27051, - "pr_file": "modules/photo/src/ccm/utils.cpp", - "discussion_id": "2073337340", - "commented_code": "@@ -11,14 +11,23 @@\n namespace cv {\n namespace ccm {\n \n-inline double gammaCorrection_(double element, double gamma)\n+inline double gammaOp(double element, double gamma)\n {\n- return (element >= 0 ? pow(element, gamma) : -pow((-element), gamma));\n+ return (element >= 0.0) ? pow(element, gamma) : -pow(-element, gamma);\n }\n \n-Mat gammaCorrection(const Mat& src, double gamma, Mat dst)\n+void gammaCorrection(InputArray _src, OutputArray _dst, double gamma)\n {\n- return elementWise(src, [gamma](double element) -> double { return gammaCorrection_(element, gamma); }, dst);\n+ Mat src = _src.getMat();", - "comment_created_at": "2025-06-11T12:31:48+00:00", - "comment_author": "abhishek-gola", - "comment_body": "@asmorkalov CV_ASSERT for gamma and type checks using switch have already been added. Please check the updated file.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2053519772", - "pr_number": 27177, - "pr_file": "modules/dnn/src/llm/gguf_buffer.cpp", - "created_at": "2025-04-22T07:37:55+00:00", - "commented_code": "#include \"../precomp.hpp\"\n#include \"gguf_buffer.hpp\"\n\n\nnamespace cv { namespace dnn {\nCV__DNN_INLINE_NS_BEGIN\n\nGGUFBuffer::GGUFBuffer(const std::string & fileName){\n std::ifstream file(fileName, std::ios::binary | std::ios::ate);\n if (!file.is_open()) {\n throw std::runtime_error(\"Could not open file: \");", - "repo_full_name": "opencv/opencv", - "discussion_comments": [ - { - "comment_id": "2053519772", - "repo_full_name": "opencv/opencv", - "pr_number": 27177, - "pr_file": "modules/dnn/src/llm/gguf_buffer.cpp", - "discussion_id": "2053519772", - "commented_code": "@@ -0,0 +1,161 @@\n+#include \"../precomp.hpp\"\n+#include \"gguf_buffer.hpp\"\n+\n+\n+namespace cv { namespace dnn {\n+CV__DNN_INLINE_NS_BEGIN\n+\n+GGUFBuffer::GGUFBuffer(const std::string & fileName){\n+ std::ifstream file(fileName, std::ios::binary | std::ios::ate);\n+ if (!file.is_open()) {\n+ throw std::runtime_error(\"Could not open file: \");", - "comment_created_at": "2025-04-22T07:37:55+00:00", - "comment_author": "asmorkalov", - "comment_body": "Please use CV_Error, CV_Assert, etc. See https://docs.opencv.org/5.x/db/de0/group__core__utils.html#ga5b48c333c777666e076bd7052799f891", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1936883220", - "pr_number": 26859, - "pr_file": "modules/imgcodecs/src/grfmt_gif.cpp", - "created_at": "2025-01-31T08:49:23+00:00", - "commented_code": "} else {\n cvtColor(img_, img, COLOR_BGRA2BGR);\n }\n } else {\n } else if (img.channels() == 4){\n if (m_use_rgb) {\n cvtColor(img_, img, COLOR_BGRA2RGBA);\n } else {\n img_.copyTo(img);\n }\n } else if (img.channels() == 1){\n cvtColor(img_, img, COLOR_BGRA2GRAY);\n } else {\n CV_LOG_WARNING(NULL, \"Unsupported channels\");", - "repo_full_name": "opencv/opencv", - "discussion_comments": [ - { - "comment_id": "1936883220", - "repo_full_name": "opencv/opencv", - "pr_number": 26859, - "pr_file": "modules/imgcodecs/src/grfmt_gif.cpp", - "discussion_id": "1936883220", - "commented_code": "@@ -172,12 +172,17 @@ bool GifDecoder::readData(Mat &img) {\n } else {\n cvtColor(img_, img, COLOR_BGRA2BGR);\n }\n- } else {\n+ } else if (img.channels() == 4){\n if (m_use_rgb) {\n cvtColor(img_, img, COLOR_BGRA2RGBA);\n } else {\n img_.copyTo(img);\n }\n+ } else if (img.channels() == 1){\n+ cvtColor(img_, img, COLOR_BGRA2GRAY);\n+ } else {\n+ CV_LOG_WARNING(NULL, \"Unsupported channels\");", - "comment_created_at": "2025-01-31T08:49:23+00:00", - "comment_author": "asmorkalov", - "comment_body": "It's error. I propose to include amount of channels to error message in printf style like https://github.com/opencv/opencv/blob/c21d0ad9d08d364542bb4a6eb971ee3051ccba63/modules/imgcodecs/src/grfmt_jpeg.cpp#L771", - "pr_file_module": null - }, - { - "comment_id": "1937124526", - "repo_full_name": "opencv/opencv", - "pr_number": 26859, - "pr_file": "modules/imgcodecs/src/grfmt_gif.cpp", - "discussion_id": "1936883220", - "commented_code": "@@ -172,12 +172,17 @@ bool GifDecoder::readData(Mat &img) {\n } else {\n cvtColor(img_, img, COLOR_BGRA2BGR);\n }\n- } else {\n+ } else if (img.channels() == 4){\n if (m_use_rgb) {\n cvtColor(img_, img, COLOR_BGRA2RGBA);\n } else {\n img_.copyTo(img);\n }\n+ } else if (img.channels() == 1){\n+ cvtColor(img_, img, COLOR_BGRA2GRAY);\n+ } else {\n+ CV_LOG_WARNING(NULL, \"Unsupported channels\");", - "comment_created_at": "2025-01-31T12:03:48+00:00", - "comment_author": "Kumataro", - "comment_body": "Thank you for your comment, I fixed it to show `img.channels()`.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1820140600", - "pr_number": 26379, - "pr_file": "modules/imgcodecs/src/grfmt_jpegxl.cpp", - "created_at": "2024-10-29T06:05:36+00:00", - "commented_code": "/*M///////////////////////////////////////////////////////////////////////////////////////\n//\n// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.\n//\n// By downloading, copying, installing or using the software you agree to this license.\n// If you do not agree to this license, do not download, install,\n// copy or use the software.\n//\n//\n// Intel License Agreement\n// For Open Source Computer Vision Library\n//\n// Copyright (C) 2000, Intel Corporation, all rights reserved.\n// Third party copyrights are property of their respective owners.\n//\n// Redistribution and use in source and binary forms, with or without modification,\n// are permitted provided that the following conditions are met:\n//\n// * Redistribution's of source code must retain the above copyright notice,\n// this list of conditions and the following disclaimer.\n//\n// * Redistribution's in binary form must reproduce the above copyright notice,\n// this list of conditions and the following disclaimer in the documentation\n// and/or other materials provided with the distribution.\n//\n// * The name of Intel Corporation may not be used to endorse or promote products\n// derived from this software without specific prior written permission.\n//\n// This software is provided by the copyright holders and contributors \"as is\" and\n// any express or implied warranties, including, but not limited to, the implied\n// warranties of merchantability and fitness for a particular purpose are disclaimed.\n// In no event shall the Intel Corporation or contributors be liable for any direct,\n// indirect, incidental, special, exemplary, or consequential damages\n// (including, but not limited to, procurement of substitute goods or services;\n// loss of use, data, or profits; or business interruption) however caused\n// and on any theory of liability, whether in contract, strict liability,\n// or tort (including negligence or otherwise) arising in any way out of\n// the use of this software, even if advised of the possibility of such damage.\n//\n//M*/\n\n#include \"precomp.hpp\"\n#include \"grfmt_jpegxl.hpp\"\n\n#ifdef HAVE_JPEGXL\n\n#include \n\nnamespace cv\n{\n\n/////////////////////// JpegXLDecoder ///////////////////\n\nJpegXLDecoder::JpegXLDecoder() : m_f(nullptr, fclose)\n{\n m_signature = \"\\xFF\\x0A\";\n m_decoder = nullptr;\n m_buf_supported = true;\n m_type = -1;\n}\n\nJpegXLDecoder::~JpegXLDecoder()\n{\n close();\n}\n\nvoid JpegXLDecoder::close()\n{\n if (m_decoder)\n m_decoder.release();\n if (m_f)\n m_f.release();\n m_read_buffer = {};\n m_width = m_height = 0;\n m_type = -1;\n}\n\nImageDecoder JpegXLDecoder::newDecoder() const\n{\n return makePtr();\n}\n\nbool JpegXLDecoder::read(Mat* pimg)\n{\n // Open file\n if (!m_f) {\n m_f.reset(fopen(m_filename.c_str(), \"rb\"));\n if (!m_f)\n return false;\n }\n\n // Initialize decoder\n if (!m_decoder) {\n m_decoder = JxlDecoderMake(nullptr);\n if (!m_decoder)\n return false;\n // Subscribe to the basic info event\n JxlDecoderStatus status = JxlDecoderSubscribeEvents(m_decoder.get(), JXL_DEC_BASIC_INFO | JXL_DEC_FULL_IMAGE);\n if (status != JXL_DEC_SUCCESS)\n return false;\n }\n\n // Set up parallel m_parallel_runner\n if (!m_parallel_runner) {\n m_parallel_runner = JxlThreadParallelRunnerMake(nullptr, JxlThreadParallelRunnerDefaultNumWorkerThreads());\n if (JXL_DEC_SUCCESS != JxlDecoderSetParallelRunner(m_decoder.get(),\n JxlThreadParallelRunner,\n m_parallel_runner.get())) {\n return false;\n }\n }\n\n // Create buffer for reading\n const size_t read_buffer_size = 16384; // 16KB chunks\n if (m_read_buffer.capacity() < read_buffer_size)\n m_read_buffer.resize(read_buffer_size);\n\n // Create image if needed\n if (m_type != -1 && pimg) {\n pimg->create(m_height, m_width, m_type);\n if (!pimg->isContinuous())\n return false;\n if (JXL_DEC_SUCCESS != JxlDecoderSetImageOutBuffer(m_decoder.get(), \n &m_format,\n pimg->ptr(),\n pimg->total() * pimg->elemSize())) {\n return false;\n }\n }\n\n // Start decoding loop\n JxlDecoderStatus status = JXL_DEC_NEED_MORE_INPUT;\n do {\n // Check if we need more input\n if (status == JXL_DEC_NEED_MORE_INPUT) {\n size_t remaining = JxlDecoderReleaseInput(m_decoder.get());\n // Move any remaining bytes to the beginning\n if (remaining > 0) {\n memmove(m_read_buffer.data(), m_read_buffer.data() + m_read_buffer.size() - remaining, remaining);\n }\n \n // Read more data from file\n size_t bytes_read = fread(m_read_buffer.data() + remaining, \n 1, m_read_buffer.size() - remaining, m_f.get());\n if (bytes_read == 0) {\n if (ferror(m_f.get())) {\n throw std::runtime_error(\"Error reading input file\");\n }\n // If we reached EOF but decoder needs more input, file is truncated\n if (status == JXL_DEC_NEED_MORE_INPUT) {\n throw std::runtime_error(\"Truncated JXL file\");\n }", - "repo_full_name": "opencv/opencv", - "discussion_comments": [ - { - "comment_id": "1820140600", - "repo_full_name": "opencv/opencv", - "pr_number": 26379, - "pr_file": "modules/imgcodecs/src/grfmt_jpegxl.cpp", - "discussion_id": "1820140600", - "commented_code": "@@ -0,0 +1,376 @@\n+/*M///////////////////////////////////////////////////////////////////////////////////////\n+//\n+// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.\n+//\n+// By downloading, copying, installing or using the software you agree to this license.\n+// If you do not agree to this license, do not download, install,\n+// copy or use the software.\n+//\n+//\n+// Intel License Agreement\n+// For Open Source Computer Vision Library\n+//\n+// Copyright (C) 2000, Intel Corporation, all rights reserved.\n+// Third party copyrights are property of their respective owners.\n+//\n+// Redistribution and use in source and binary forms, with or without modification,\n+// are permitted provided that the following conditions are met:\n+//\n+// * Redistribution's of source code must retain the above copyright notice,\n+// this list of conditions and the following disclaimer.\n+//\n+// * Redistribution's in binary form must reproduce the above copyright notice,\n+// this list of conditions and the following disclaimer in the documentation\n+// and/or other materials provided with the distribution.\n+//\n+// * The name of Intel Corporation may not be used to endorse or promote products\n+// derived from this software without specific prior written permission.\n+//\n+// This software is provided by the copyright holders and contributors \"as is\" and\n+// any express or implied warranties, including, but not limited to, the implied\n+// warranties of merchantability and fitness for a particular purpose are disclaimed.\n+// In no event shall the Intel Corporation or contributors be liable for any direct,\n+// indirect, incidental, special, exemplary, or consequential damages\n+// (including, but not limited to, procurement of substitute goods or services;\n+// loss of use, data, or profits; or business interruption) however caused\n+// and on any theory of liability, whether in contract, strict liability,\n+// or tort (including negligence or otherwise) arising in any way out of\n+// the use of this software, even if advised of the possibility of such damage.\n+//\n+//M*/\n+\n+#include \"precomp.hpp\"\n+#include \"grfmt_jpegxl.hpp\"\n+\n+#ifdef HAVE_JPEGXL\n+\n+#include \n+\n+namespace cv\n+{\n+\n+/////////////////////// JpegXLDecoder ///////////////////\n+\n+JpegXLDecoder::JpegXLDecoder() : m_f(nullptr, fclose)\n+{\n+ m_signature = \"\\xFF\\x0A\";\n+ m_decoder = nullptr;\n+ m_buf_supported = true;\n+ m_type = -1;\n+}\n+\n+JpegXLDecoder::~JpegXLDecoder()\n+{\n+ close();\n+}\n+\n+void JpegXLDecoder::close()\n+{\n+ if (m_decoder)\n+ m_decoder.release();\n+ if (m_f)\n+ m_f.release();\n+ m_read_buffer = {};\n+ m_width = m_height = 0;\n+ m_type = -1;\n+}\n+\n+ImageDecoder JpegXLDecoder::newDecoder() const\n+{\n+ return makePtr();\n+}\n+\n+bool JpegXLDecoder::read(Mat* pimg)\n+{\n+ // Open file\n+ if (!m_f) {\n+ m_f.reset(fopen(m_filename.c_str(), \"rb\"));\n+ if (!m_f)\n+ return false;\n+ }\n+\n+ // Initialize decoder\n+ if (!m_decoder) {\n+ m_decoder = JxlDecoderMake(nullptr);\n+ if (!m_decoder)\n+ return false;\n+ // Subscribe to the basic info event\n+ JxlDecoderStatus status = JxlDecoderSubscribeEvents(m_decoder.get(), JXL_DEC_BASIC_INFO | JXL_DEC_FULL_IMAGE);\n+ if (status != JXL_DEC_SUCCESS)\n+ return false;\n+ }\n+\n+ // Set up parallel m_parallel_runner\n+ if (!m_parallel_runner) {\n+ m_parallel_runner = JxlThreadParallelRunnerMake(nullptr, JxlThreadParallelRunnerDefaultNumWorkerThreads());\n+ if (JXL_DEC_SUCCESS != JxlDecoderSetParallelRunner(m_decoder.get(),\n+ JxlThreadParallelRunner,\n+ m_parallel_runner.get())) {\n+ return false;\n+ }\n+ }\n+\n+ // Create buffer for reading\n+ const size_t read_buffer_size = 16384; // 16KB chunks\n+ if (m_read_buffer.capacity() < read_buffer_size)\n+ m_read_buffer.resize(read_buffer_size);\n+\n+ // Create image if needed\n+ if (m_type != -1 && pimg) {\n+ pimg->create(m_height, m_width, m_type);\n+ if (!pimg->isContinuous())\n+ return false;\n+ if (JXL_DEC_SUCCESS != JxlDecoderSetImageOutBuffer(m_decoder.get(), \n+ &m_format,\n+ pimg->ptr(),\n+ pimg->total() * pimg->elemSize())) {\n+ return false;\n+ }\n+ }\n+\n+ // Start decoding loop\n+ JxlDecoderStatus status = JXL_DEC_NEED_MORE_INPUT;\n+ do {\n+ // Check if we need more input\n+ if (status == JXL_DEC_NEED_MORE_INPUT) {\n+ size_t remaining = JxlDecoderReleaseInput(m_decoder.get());\n+ // Move any remaining bytes to the beginning\n+ if (remaining > 0) {\n+ memmove(m_read_buffer.data(), m_read_buffer.data() + m_read_buffer.size() - remaining, remaining);\n+ }\n+ \n+ // Read more data from file\n+ size_t bytes_read = fread(m_read_buffer.data() + remaining, \n+ 1, m_read_buffer.size() - remaining, m_f.get());\n+ if (bytes_read == 0) {\n+ if (ferror(m_f.get())) {\n+ throw std::runtime_error(\"Error reading input file\");\n+ }\n+ // If we reached EOF but decoder needs more input, file is truncated\n+ if (status == JXL_DEC_NEED_MORE_INPUT) {\n+ throw std::runtime_error(\"Truncated JXL file\");\n+ }", - "comment_created_at": "2024-10-29T06:05:36+00:00", - "comment_author": "asmorkalov", - "comment_body": "OpenCV's `imread` is exception safe. Please use CV_LOG_WARNING and analogs and return satus instead of exception.", - "pr_file_module": null - }, - { - "comment_id": "1820592738", - "repo_full_name": "opencv/opencv", - "pr_number": 26379, - "pr_file": "modules/imgcodecs/src/grfmt_jpegxl.cpp", - "discussion_id": "1820140600", - "commented_code": "@@ -0,0 +1,376 @@\n+/*M///////////////////////////////////////////////////////////////////////////////////////\n+//\n+// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.\n+//\n+// By downloading, copying, installing or using the software you agree to this license.\n+// If you do not agree to this license, do not download, install,\n+// copy or use the software.\n+//\n+//\n+// Intel License Agreement\n+// For Open Source Computer Vision Library\n+//\n+// Copyright (C) 2000, Intel Corporation, all rights reserved.\n+// Third party copyrights are property of their respective owners.\n+//\n+// Redistribution and use in source and binary forms, with or without modification,\n+// are permitted provided that the following conditions are met:\n+//\n+// * Redistribution's of source code must retain the above copyright notice,\n+// this list of conditions and the following disclaimer.\n+//\n+// * Redistribution's in binary form must reproduce the above copyright notice,\n+// this list of conditions and the following disclaimer in the documentation\n+// and/or other materials provided with the distribution.\n+//\n+// * The name of Intel Corporation may not be used to endorse or promote products\n+// derived from this software without specific prior written permission.\n+//\n+// This software is provided by the copyright holders and contributors \"as is\" and\n+// any express or implied warranties, including, but not limited to, the implied\n+// warranties of merchantability and fitness for a particular purpose are disclaimed.\n+// In no event shall the Intel Corporation or contributors be liable for any direct,\n+// indirect, incidental, special, exemplary, or consequential damages\n+// (including, but not limited to, procurement of substitute goods or services;\n+// loss of use, data, or profits; or business interruption) however caused\n+// and on any theory of liability, whether in contract, strict liability,\n+// or tort (including negligence or otherwise) arising in any way out of\n+// the use of this software, even if advised of the possibility of such damage.\n+//\n+//M*/\n+\n+#include \"precomp.hpp\"\n+#include \"grfmt_jpegxl.hpp\"\n+\n+#ifdef HAVE_JPEGXL\n+\n+#include \n+\n+namespace cv\n+{\n+\n+/////////////////////// JpegXLDecoder ///////////////////\n+\n+JpegXLDecoder::JpegXLDecoder() : m_f(nullptr, fclose)\n+{\n+ m_signature = \"\\xFF\\x0A\";\n+ m_decoder = nullptr;\n+ m_buf_supported = true;\n+ m_type = -1;\n+}\n+\n+JpegXLDecoder::~JpegXLDecoder()\n+{\n+ close();\n+}\n+\n+void JpegXLDecoder::close()\n+{\n+ if (m_decoder)\n+ m_decoder.release();\n+ if (m_f)\n+ m_f.release();\n+ m_read_buffer = {};\n+ m_width = m_height = 0;\n+ m_type = -1;\n+}\n+\n+ImageDecoder JpegXLDecoder::newDecoder() const\n+{\n+ return makePtr();\n+}\n+\n+bool JpegXLDecoder::read(Mat* pimg)\n+{\n+ // Open file\n+ if (!m_f) {\n+ m_f.reset(fopen(m_filename.c_str(), \"rb\"));\n+ if (!m_f)\n+ return false;\n+ }\n+\n+ // Initialize decoder\n+ if (!m_decoder) {\n+ m_decoder = JxlDecoderMake(nullptr);\n+ if (!m_decoder)\n+ return false;\n+ // Subscribe to the basic info event\n+ JxlDecoderStatus status = JxlDecoderSubscribeEvents(m_decoder.get(), JXL_DEC_BASIC_INFO | JXL_DEC_FULL_IMAGE);\n+ if (status != JXL_DEC_SUCCESS)\n+ return false;\n+ }\n+\n+ // Set up parallel m_parallel_runner\n+ if (!m_parallel_runner) {\n+ m_parallel_runner = JxlThreadParallelRunnerMake(nullptr, JxlThreadParallelRunnerDefaultNumWorkerThreads());\n+ if (JXL_DEC_SUCCESS != JxlDecoderSetParallelRunner(m_decoder.get(),\n+ JxlThreadParallelRunner,\n+ m_parallel_runner.get())) {\n+ return false;\n+ }\n+ }\n+\n+ // Create buffer for reading\n+ const size_t read_buffer_size = 16384; // 16KB chunks\n+ if (m_read_buffer.capacity() < read_buffer_size)\n+ m_read_buffer.resize(read_buffer_size);\n+\n+ // Create image if needed\n+ if (m_type != -1 && pimg) {\n+ pimg->create(m_height, m_width, m_type);\n+ if (!pimg->isContinuous())\n+ return false;\n+ if (JXL_DEC_SUCCESS != JxlDecoderSetImageOutBuffer(m_decoder.get(), \n+ &m_format,\n+ pimg->ptr(),\n+ pimg->total() * pimg->elemSize())) {\n+ return false;\n+ }\n+ }\n+\n+ // Start decoding loop\n+ JxlDecoderStatus status = JXL_DEC_NEED_MORE_INPUT;\n+ do {\n+ // Check if we need more input\n+ if (status == JXL_DEC_NEED_MORE_INPUT) {\n+ size_t remaining = JxlDecoderReleaseInput(m_decoder.get());\n+ // Move any remaining bytes to the beginning\n+ if (remaining > 0) {\n+ memmove(m_read_buffer.data(), m_read_buffer.data() + m_read_buffer.size() - remaining, remaining);\n+ }\n+ \n+ // Read more data from file\n+ size_t bytes_read = fread(m_read_buffer.data() + remaining, \n+ 1, m_read_buffer.size() - remaining, m_f.get());\n+ if (bytes_read == 0) {\n+ if (ferror(m_f.get())) {\n+ throw std::runtime_error(\"Error reading input file\");\n+ }\n+ // If we reached EOF but decoder needs more input, file is truncated\n+ if (status == JXL_DEC_NEED_MORE_INPUT) {\n+ throw std::runtime_error(\"Truncated JXL file\");\n+ }", - "comment_created_at": "2024-10-29T11:08:45+00:00", - "comment_author": "cdcseacave", - "comment_body": "done", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/opencv-use-optimized-functions.json b/_reviewers/opencv-use-optimized-functions.json new file mode 100644 index 0000000..7a899bb --- /dev/null +++ b/_reviewers/opencv-use-optimized-functions.json @@ -0,0 +1,276 @@ +[ + { + "discussion_id": "2024769056", + "pr_number": 27051, + "pr_file": "modules/photo/src/ccm/utils.cpp", + "created_at": "2025-04-02T12:53:13+00:00", + "commented_code": "// This file is part of OpenCV project.\n// It is subject to the license terms in the LICENSE file found in the top-level directory\n// of this distribution and at http://opencv.org/license.html.\n//\n// Author: Longbu Wang \n// Jinheng Zhang \n// Chenqi Shan \n\n#include \"utils.hpp\"\n\nnamespace cv {\nnamespace ccm {\n\ninline double gammaCorrection_(const double& element, const double& gamma)\n{\n return (element >= 0 ? pow(element, gamma) : -pow((-element), gamma));\n}\n\nMat gammaCorrection(const Mat& src, const double& gamma, Mat dst)\n{\n return elementWise(src, [gamma](double element) -> double { return gammaCorrection_(element, gamma); }, dst);\n}\n\nMat maskCopyTo(const Mat& src, const Mat& mask)\n{\n Mat dst(countNonZero(mask), 1, src.type());\n const int channel = src.channels();\n auto it_mask = mask.begin();\n switch (channel)\n {\n case 1:\n {\n auto it_src = src.begin(), end_src = src.end();\n auto it_dst = dst.begin();\n for (; it_src != end_src; ++it_src, ++it_mask)\n {\n if (*it_mask)\n {\n (*it_dst) = (*it_src);\n ++it_dst;\n }\n }\n break;\n }\n case 3:\n {\n auto it_src = src.begin(), end_src = src.end();\n auto it_dst = dst.begin();\n for (; it_src != end_src; ++it_src, ++it_mask)\n {\n if (*it_mask)\n {\n (*it_dst) = (*it_src);\n ++it_dst;\n }\n }\n break;\n }\n default:\n CV_Error(Error::StsBadArg, \"Wrong channel!\" );\n break;\n }\n return dst;\n}\n\nMat multiple(const Mat& xyz, const Mat& ccm)\n{\n Mat tmp = xyz.reshape(1, xyz.rows * xyz.cols);\n Mat res = tmp * ccm;\n res = res.reshape(res.cols, xyz.rows);\n return res;\n}\n\nMat saturate(Mat& src, const double& low, const double& up)\n{\n Mat dst = Mat::ones(src.size(), CV_8UC1);\n MatIterator_ it_src = src.begin(), end_src = src.end();\n MatIterator_ it_dst = dst.begin();\n for (; it_src != end_src; ++it_src, ++it_dst)\n {\n for (int i = 0; i < 3; ++i)\n {\n if ((*it_src)[i] > up || (*it_src)[i] < low)\n {\n *it_dst = 0;\n break;\n }\n }\n }\n return dst;\n}", + "repo_full_name": "opencv/opencv", + "discussion_comments": [ + { + "comment_id": "2024769056", + "repo_full_name": "opencv/opencv", + "pr_number": 27051, + "pr_file": "modules/photo/src/ccm/utils.cpp", + "discussion_id": "2024769056", + "commented_code": "@@ -0,0 +1,100 @@\n+// This file is part of OpenCV project.\n+// It is subject to the license terms in the LICENSE file found in the top-level directory\n+// of this distribution and at http://opencv.org/license.html.\n+//\n+// Author: Longbu Wang \n+// Jinheng Zhang \n+// Chenqi Shan \n+\n+#include \"utils.hpp\"\n+\n+namespace cv {\n+namespace ccm {\n+\n+inline double gammaCorrection_(const double& element, const double& gamma)\n+{\n+ return (element >= 0 ? pow(element, gamma) : -pow((-element), gamma));\n+}\n+\n+Mat gammaCorrection(const Mat& src, const double& gamma, Mat dst)\n+{\n+ return elementWise(src, [gamma](double element) -> double { return gammaCorrection_(element, gamma); }, dst);\n+}\n+\n+Mat maskCopyTo(const Mat& src, const Mat& mask)\n+{\n+ Mat dst(countNonZero(mask), 1, src.type());\n+ const int channel = src.channels();\n+ auto it_mask = mask.begin();\n+ switch (channel)\n+ {\n+ case 1:\n+ {\n+ auto it_src = src.begin(), end_src = src.end();\n+ auto it_dst = dst.begin();\n+ for (; it_src != end_src; ++it_src, ++it_mask)\n+ {\n+ if (*it_mask)\n+ {\n+ (*it_dst) = (*it_src);\n+ ++it_dst;\n+ }\n+ }\n+ break;\n+ }\n+ case 3:\n+ {\n+ auto it_src = src.begin(), end_src = src.end();\n+ auto it_dst = dst.begin();\n+ for (; it_src != end_src; ++it_src, ++it_mask)\n+ {\n+ if (*it_mask)\n+ {\n+ (*it_dst) = (*it_src);\n+ ++it_dst;\n+ }\n+ }\n+ break;\n+ }\n+ default:\n+ CV_Error(Error::StsBadArg, \"Wrong channel!\" );\n+ break;\n+ }\n+ return dst;\n+}\n+\n+Mat multiple(const Mat& xyz, const Mat& ccm)\n+{\n+ Mat tmp = xyz.reshape(1, xyz.rows * xyz.cols);\n+ Mat res = tmp * ccm;\n+ res = res.reshape(res.cols, xyz.rows);\n+ return res;\n+}\n+\n+Mat saturate(Mat& src, const double& low, const double& up)\n+{\n+ Mat dst = Mat::ones(src.size(), CV_8UC1);\n+ MatIterator_ it_src = src.begin(), end_src = src.end();\n+ MatIterator_ it_dst = dst.begin();\n+ for (; it_src != end_src; ++it_src, ++it_dst)\n+ {\n+ for (int i = 0; i < 3; ++i)\n+ {\n+ if ((*it_src)[i] > up || (*it_src)[i] < low)\n+ {\n+ *it_dst = 0;\n+ break;\n+ }\n+ }\n+ }\n+ return dst;\n+}", + "comment_created_at": "2025-04-02T12:53:13+00:00", + "comment_author": "asmorkalov", + "comment_body": "inRange should be enough instead of it: https://docs.opencv.org/4.x/d2/de8/group__core__array.html#ga48af0ab51e36436c5d04340e036ce981", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2064230851", + "pr_number": 27051, + "pr_file": "modules/photo/src/ccm/ccm.cpp", + "created_at": "2025-04-28T18:10:17+00:00", + "commented_code": "// This file is part of OpenCV project.\n// It is subject to the license terms in the LICENSE file found in the top-level directory\n// of this distribution and at http://opencv.org/license.html.\n//\n// Author: Longbu Wang \n// Jinheng Zhang \n// Chenqi Shan \n\n#include \"opencv2/photo.hpp\"\n#include \"linearize.hpp\"\nnamespace cv {\nnamespace ccm {\nclass ColorCorrectionModel::Impl\n{\npublic:\n Mat src;\n\n std::shared_ptr dst = std::make_shared();\n Mat dist;\n RGBBase_& cs;\n // Track initialization parameters for serialization\n ColorSpace csEnum;\n Mat mask;\n\n // RGBl of detected data and the reference\n Mat srcRgbl;\n Mat dstRgbl;\n\n // ccm type and shape\n CcmType ccmType;\n int shape;\n\n // linear method and distance\n std::shared_ptr linear = std::make_shared();\n DistanceType distance;\n LinearizationType linearizationType;\n\n Mat weights;\n Mat weightsList;\n Mat ccm;\n Mat ccm0;\n double gamma;\n int deg;\n std::vector saturatedThreshold;\n InitialMethodType initialMethodType;\n double weightsCoeff;\n int maskedLen;\n double loss;\n int maxCount;\n double epsilon;\n Impl();\n\n /** @brief Make no change for CCM_LINEAR.\n convert cv::Mat A to [A, 1] in CCM_AFFINE.\n @param inp the input array, type of cv::Mat.\n @return the output array, type of cv::Mat\n */\n Mat prepare(const Mat& inp);\n\n /** @brief Calculate weights and mask.\n @param weightsList the input array, type of cv::Mat.\n @param weightsCoeff type of double.\n @param saturateMask the input array, type of cv::Mat.\n */\n void calWeightsMasks(const Mat& weightsList, double weightsCoeff, Mat saturateMask);\n\n /** @brief Fitting nonlinear - optimization initial value by white balance.\n @return the output array, type of Mat\n */\n void initialWhiteBalance(void);\n\n /** @brief Fitting nonlinear-optimization initial value by least square.\n @param fit if fit is True, return optimalization for rgbl distance function.\n */\n void initialLeastSquare(bool fit = false);\n\n double calcLoss_(Color color);\n double calcLoss(const Mat ccm_);\n\n /** @brief Fitting ccm if distance function is associated with CIE Lab color space.\n see details in https://github.com/opencv/opencv/blob/master/modules/core/include/opencv2/core/optim.hpp\n Set terminal criteria for solver is possible.\n */\n void fitting(void);\n\n void getColor(Mat& img_, bool islinear = false);\n void getColor(ColorCheckerType constColor);\n void getColor(Mat colors_, ColorSpace cs_, Mat colored_);\n void getColor(Mat colors_, ColorSpace refColorSpace_);\n\n /** @brief Loss function base on cv::MinProblemSolver::Function.\n see details in https://github.com/opencv/opencv/blob/master/modules/core/include/opencv2/core/optim.hpp\n */\n class LossFunction : public MinProblemSolver::Function\n {\n public:\n ColorCorrectionModel::Impl* ccmLoss;\n LossFunction(ColorCorrectionModel::Impl* ccm)\n : ccmLoss(ccm) {};\n\n /** @brief Reset dims to ccm->shape.\n */\n int getDims() const CV_OVERRIDE\n {\n return ccmLoss->shape;\n }\n\n /** @brief Reset calculation.\n */\n double calc(const double* x) const CV_OVERRIDE\n {\n Mat ccm_(ccmLoss->shape, 1, CV_64F);\n for (int i = 0; i < ccmLoss->shape; i++)\n {\n ccm_.at(i, 0) = x[i];\n }\n ccm_ = ccm_.reshape(0, ccmLoss->shape / 3);\n return ccmLoss->calcLoss(ccm_);\n }\n };\n};\n\nColorCorrectionModel::Impl::Impl()\n : cs(*GetCS::getInstance().getRgb(COLOR_SPACE_SRGB))\n , csEnum(COLOR_SPACE_SRGB)\n , ccmType(CCM_LINEAR)\n , distance(DISTANCE_CIE2000)\n , linearizationType(LINEARIZATION_GAMMA)\n , weights(Mat())\n , gamma(2.2)\n , deg(3)\n , saturatedThreshold({ 0, 0.98 })\n , initialMethodType(INITIAL_METHOD_LEAST_SQUARE)\n , weightsCoeff(0)\n , maxCount(5000)\n , epsilon(1.e-4)\n{}\n\nMat ColorCorrectionModel::Impl::prepare(const Mat& inp)\n{\n switch (ccmType)\n {\n case cv::ccm::CCM_LINEAR:\n shape = 9;\n return inp;\n case cv::ccm::CCM_AFFINE:\n {\n shape = 12;\n Mat arr1 = Mat::ones(inp.size(), CV_64F);\n Mat arr_out(inp.size(), CV_64FC4);\n Mat arr_channels[3];\n split(inp, arr_channels);\n merge(std::vector { arr_channels[0], arr_channels[1], arr_channels[2], arr1 }, arr_out);\n return arr_out;\n }\n default:\n CV_Error(Error::StsBadArg, \"Wrong ccmType!\");\n break;\n }\n}\n\nvoid ColorCorrectionModel::Impl::calWeightsMasks(const Mat& weightsList_, double weightsCoeff_, Mat saturateMask)\n{\n // weights\n if (!weightsList_.empty())\n {\n weights = weightsList_;\n }\n else if (weightsCoeff_ != 0)\n {\n pow(dst->toLuminant(cs.io), weightsCoeff_, weights);\n }\n\n // masks\n Mat weight_mask = Mat::ones(src.rows, 1, CV_8U);\n if (!weights.empty())\n {\n weight_mask = weights > 0;\n }\n this->mask = (weight_mask) & (saturateMask);\n\n // weights' mask\n if (!weights.empty())\n {\n Mat weights_masked = maskCopyTo(this->weights, this->mask);\n weights = weights_masked / mean(weights_masked)[0];\n }\n maskedLen = (int)sum(mask)[0];\n}\n\nvoid ColorCorrectionModel::Impl::initialWhiteBalance(void)\n{\n Mat schannels[4];\n split(srcRgbl, schannels);\n Mat dchannels[4];\n split(dstRgbl, dchannels);\n std::vector initialVec = { sum(dchannels[0])[0] / sum(schannels[0])[0], 0, 0, 0,\n sum(dchannels[1])[0] / sum(schannels[1])[0], 0, 0, 0,\n sum(dchannels[2])[0] / sum(schannels[2])[0], 0, 0, 0 };", + "repo_full_name": "opencv/opencv", + "discussion_comments": [ + { + "comment_id": "2064230851", + "repo_full_name": "opencv/opencv", + "pr_number": 27051, + "pr_file": "modules/photo/src/ccm/ccm.cpp", + "discussion_id": "2064230851", + "commented_code": "@@ -0,0 +1,504 @@\n+// This file is part of OpenCV project.\n+// It is subject to the license terms in the LICENSE file found in the top-level directory\n+// of this distribution and at http://opencv.org/license.html.\n+//\n+// Author: Longbu Wang \n+// Jinheng Zhang \n+// Chenqi Shan \n+\n+#include \"opencv2/photo.hpp\"\n+#include \"linearize.hpp\"\n+namespace cv {\n+namespace ccm {\n+class ColorCorrectionModel::Impl\n+{\n+public:\n+ Mat src;\n+\n+ std::shared_ptr dst = std::make_shared();\n+ Mat dist;\n+ RGBBase_& cs;\n+ // Track initialization parameters for serialization\n+ ColorSpace csEnum;\n+ Mat mask;\n+\n+ // RGBl of detected data and the reference\n+ Mat srcRgbl;\n+ Mat dstRgbl;\n+\n+ // ccm type and shape\n+ CcmType ccmType;\n+ int shape;\n+\n+ // linear method and distance\n+ std::shared_ptr linear = std::make_shared();\n+ DistanceType distance;\n+ LinearizationType linearizationType;\n+\n+ Mat weights;\n+ Mat weightsList;\n+ Mat ccm;\n+ Mat ccm0;\n+ double gamma;\n+ int deg;\n+ std::vector saturatedThreshold;\n+ InitialMethodType initialMethodType;\n+ double weightsCoeff;\n+ int maskedLen;\n+ double loss;\n+ int maxCount;\n+ double epsilon;\n+ Impl();\n+\n+ /** @brief Make no change for CCM_LINEAR.\n+ convert cv::Mat A to [A, 1] in CCM_AFFINE.\n+ @param inp the input array, type of cv::Mat.\n+ @return the output array, type of cv::Mat\n+ */\n+ Mat prepare(const Mat& inp);\n+\n+ /** @brief Calculate weights and mask.\n+ @param weightsList the input array, type of cv::Mat.\n+ @param weightsCoeff type of double.\n+ @param saturateMask the input array, type of cv::Mat.\n+ */\n+ void calWeightsMasks(const Mat& weightsList, double weightsCoeff, Mat saturateMask);\n+\n+ /** @brief Fitting nonlinear - optimization initial value by white balance.\n+ @return the output array, type of Mat\n+ */\n+ void initialWhiteBalance(void);\n+\n+ /** @brief Fitting nonlinear-optimization initial value by least square.\n+ @param fit if fit is True, return optimalization for rgbl distance function.\n+ */\n+ void initialLeastSquare(bool fit = false);\n+\n+ double calcLoss_(Color color);\n+ double calcLoss(const Mat ccm_);\n+\n+ /** @brief Fitting ccm if distance function is associated with CIE Lab color space.\n+ see details in https://github.com/opencv/opencv/blob/master/modules/core/include/opencv2/core/optim.hpp\n+ Set terminal criteria for solver is possible.\n+ */\n+ void fitting(void);\n+\n+ void getColor(Mat& img_, bool islinear = false);\n+ void getColor(ColorCheckerType constColor);\n+ void getColor(Mat colors_, ColorSpace cs_, Mat colored_);\n+ void getColor(Mat colors_, ColorSpace refColorSpace_);\n+\n+ /** @brief Loss function base on cv::MinProblemSolver::Function.\n+ see details in https://github.com/opencv/opencv/blob/master/modules/core/include/opencv2/core/optim.hpp\n+ */\n+ class LossFunction : public MinProblemSolver::Function\n+ {\n+ public:\n+ ColorCorrectionModel::Impl* ccmLoss;\n+ LossFunction(ColorCorrectionModel::Impl* ccm)\n+ : ccmLoss(ccm) {};\n+\n+ /** @brief Reset dims to ccm->shape.\n+ */\n+ int getDims() const CV_OVERRIDE\n+ {\n+ return ccmLoss->shape;\n+ }\n+\n+ /** @brief Reset calculation.\n+ */\n+ double calc(const double* x) const CV_OVERRIDE\n+ {\n+ Mat ccm_(ccmLoss->shape, 1, CV_64F);\n+ for (int i = 0; i < ccmLoss->shape; i++)\n+ {\n+ ccm_.at(i, 0) = x[i];\n+ }\n+ ccm_ = ccm_.reshape(0, ccmLoss->shape / 3);\n+ return ccmLoss->calcLoss(ccm_);\n+ }\n+ };\n+};\n+\n+ColorCorrectionModel::Impl::Impl()\n+ : cs(*GetCS::getInstance().getRgb(COLOR_SPACE_SRGB))\n+ , csEnum(COLOR_SPACE_SRGB)\n+ , ccmType(CCM_LINEAR)\n+ , distance(DISTANCE_CIE2000)\n+ , linearizationType(LINEARIZATION_GAMMA)\n+ , weights(Mat())\n+ , gamma(2.2)\n+ , deg(3)\n+ , saturatedThreshold({ 0, 0.98 })\n+ , initialMethodType(INITIAL_METHOD_LEAST_SQUARE)\n+ , weightsCoeff(0)\n+ , maxCount(5000)\n+ , epsilon(1.e-4)\n+{}\n+\n+Mat ColorCorrectionModel::Impl::prepare(const Mat& inp)\n+{\n+ switch (ccmType)\n+ {\n+ case cv::ccm::CCM_LINEAR:\n+ shape = 9;\n+ return inp;\n+ case cv::ccm::CCM_AFFINE:\n+ {\n+ shape = 12;\n+ Mat arr1 = Mat::ones(inp.size(), CV_64F);\n+ Mat arr_out(inp.size(), CV_64FC4);\n+ Mat arr_channels[3];\n+ split(inp, arr_channels);\n+ merge(std::vector { arr_channels[0], arr_channels[1], arr_channels[2], arr1 }, arr_out);\n+ return arr_out;\n+ }\n+ default:\n+ CV_Error(Error::StsBadArg, \"Wrong ccmType!\");\n+ break;\n+ }\n+}\n+\n+void ColorCorrectionModel::Impl::calWeightsMasks(const Mat& weightsList_, double weightsCoeff_, Mat saturateMask)\n+{\n+ // weights\n+ if (!weightsList_.empty())\n+ {\n+ weights = weightsList_;\n+ }\n+ else if (weightsCoeff_ != 0)\n+ {\n+ pow(dst->toLuminant(cs.io), weightsCoeff_, weights);\n+ }\n+\n+ // masks\n+ Mat weight_mask = Mat::ones(src.rows, 1, CV_8U);\n+ if (!weights.empty())\n+ {\n+ weight_mask = weights > 0;\n+ }\n+ this->mask = (weight_mask) & (saturateMask);\n+\n+ // weights' mask\n+ if (!weights.empty())\n+ {\n+ Mat weights_masked = maskCopyTo(this->weights, this->mask);\n+ weights = weights_masked / mean(weights_masked)[0];\n+ }\n+ maskedLen = (int)sum(mask)[0];\n+}\n+\n+void ColorCorrectionModel::Impl::initialWhiteBalance(void)\n+{\n+ Mat schannels[4];\n+ split(srcRgbl, schannels);\n+ Mat dchannels[4];\n+ split(dstRgbl, dchannels);\n+ std::vector initialVec = { sum(dchannels[0])[0] / sum(schannels[0])[0], 0, 0, 0,\n+ sum(dchannels[1])[0] / sum(schannels[1])[0], 0, 0, 0,\n+ sum(dchannels[2])[0] / sum(schannels[2])[0], 0, 0, 0 };", + "comment_created_at": "2025-04-28T18:10:17+00:00", + "comment_author": "asmorkalov", + "comment_body": "No need to spit cv::Mat by channels for it. cv::sum supports channels: https://docs.opencv.org/5.x/d2/de8/group__core__array.html#ga716e10a2dd9e228e4d3c95818f106722", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2064246723", + "pr_number": 27051, + "pr_file": "modules/photo/src/ccm/utils.cpp", + "created_at": "2025-04-28T18:19:59+00:00", + "commented_code": "// This file is part of OpenCV project.\n// It is subject to the license terms in the LICENSE file found in the top-level directory\n// of this distribution and at http://opencv.org/license.html.\n//\n// Author: Longbu Wang \n// Jinheng Zhang \n// Chenqi Shan \n\n#include \"utils.hpp\"\n\nnamespace cv {\nnamespace ccm {\n\ninline double gammaCorrection_(double element, double gamma)\n{\n return (element >= 0 ? pow(element, gamma) : -pow((-element), gamma));\n}\n\nMat gammaCorrection(const Mat& src, double gamma, Mat dst)\n{\n return elementWise(src, [gamma](double element) -> double { return gammaCorrection_(element, gamma); }, dst);\n}", + "repo_full_name": "opencv/opencv", + "discussion_comments": [ + { + "comment_id": "2064246723", + "repo_full_name": "opencv/opencv", + "pr_number": 27051, + "pr_file": "modules/photo/src/ccm/utils.cpp", + "discussion_id": "2064246723", + "commented_code": "@@ -0,0 +1,86 @@\n+// This file is part of OpenCV project.\n+// It is subject to the license terms in the LICENSE file found in the top-level directory\n+// of this distribution and at http://opencv.org/license.html.\n+//\n+// Author: Longbu Wang \n+// Jinheng Zhang \n+// Chenqi Shan \n+\n+#include \"utils.hpp\"\n+\n+namespace cv {\n+namespace ccm {\n+\n+inline double gammaCorrection_(double element, double gamma)\n+{\n+ return (element >= 0 ? pow(element, gamma) : -pow((-element), gamma));\n+}\n+\n+Mat gammaCorrection(const Mat& src, double gamma, Mat dst)\n+{\n+ return elementWise(src, [gamma](double element) -> double { return gammaCorrection_(element, gamma); }, dst);\n+}", + "comment_created_at": "2025-04-28T18:19:59+00:00", + "comment_author": "asmorkalov", + "comment_body": "I propose to convert Gamma correction to public function with InputArray and OutputArray. It's useful independently from the pipeline and also may be significantly optimized with universal intrinsics.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2055506089", + "pr_number": 27238, + "pr_file": "modules/dnn/src/layers/attention_layer.cpp", + "created_at": "2025-04-23T08:16:24+00:00", + "commented_code": "}\n}\n\n\nstatic void rotationKernel(\n float* data, const float* rotation_table,\n size_t seq_len, size_t d\n)\n{\n CV_Assert(d % 2 == 0);\n const size_t d_half = d / 2;\n\n double nstripes = double(seq_len) * d_half * (1.0/1024.0);\n\n auto fn = [&](const cv::Range& range)\n {\n for (int t = range.start; t < range.end; ++t)\n {\n float* out_ptr = data + size_t(t) * d;\n const float* table_ptr = rotation_table + size_t(t) * d;\n size_t i = 0;\n\n#if (CV_SIMD || CV_SIMD_SCALABLE)\n const size_t w = VTraits::vlanes();\n for (; i + w <= d_half; i += w)\n {\n v_float32 sin_v, cos_v, x_even, x_odd;\n v_load_deinterleave(table_ptr + 2*i, sin_v, cos_v);\n v_load_deinterleave(out_ptr + 2*i, x_even, x_odd);\n\n v_float32 out_even = v_sub(v_mul(cos_v, x_even), v_mul(sin_v, x_odd));\n v_float32 out_odd = v_add(v_mul(sin_v, x_even), v_mul(cos_v, x_odd));\n\n v_store_interleave(out_ptr + 2*i, out_even, out_odd);\n }\n#endif\n // scalar tail\n for (; i < d_half; ++i)\n {\n float s = table_ptr[2*i ];\n float c = table_ptr[2*i+1];\n float xe = out_ptr[2*i];\n float xo = out_ptr[2*i+1];\n out_ptr[2*i] = xe * c - xo * s;\n out_ptr[2*i+1] = xo * c + xe * s;\n }\n }\n };\n\n // This will spin up threads and run fn over [0, seq_len)\n parallel_for_(cv::Range(0, int(seq_len)), fn, nstripes);\n}\n\nstatic void precompRotationTable(float *data,\n size_t seq_len,\n size_t d) {\n // RoPE precomputation\n // RoPE is a positional encoding method used in transformer models.\n // It uses sine and cosine functions to encode the position of tokens in a sequence\n // initially introduced for NLP in https://arxiv.org/pdf/2104.09864\n\n // assume data is of shape [seq_ken,d]\n const float logBase = std::log(10000.0f);\n const float inv_d = 1.0f / float(d);\n const size_t d_half = d / 2;\n for (size_t pos = 0; pos < seq_len; ++pos) {\n\n size_t i = 0;\n float* data_ptr = data + pos * d;\n\n#if (CV_SIMD || CV_SIMD_SCALABLE)\n const size_t w = VTraits::vlanes();\n const v_float32 v_logBase = v_setall_f32(logBase);\n const v_float32 v_inv_d = v_setall_f32(inv_d);\n const v_float32 v_neg2 = v_setall_f32(-2.0f);\n\n for (; i + w <= d_half; i+=w) {\n int idx_buf[CV_SIMD_WIDTH];", + "repo_full_name": "opencv/opencv", + "discussion_comments": [ + { + "comment_id": "2055506089", + "repo_full_name": "opencv/opencv", + "pr_number": 27238, + "pr_file": "modules/dnn/src/layers/attention_layer.cpp", + "discussion_id": "2055506089", + "commented_code": "@@ -24,6 +24,105 @@ static void packWeight(size_t num_heads, size_t head_size, size_t input_hidden_s\n }\n }\n \n+\n+static void rotationKernel(\n+ float* data, const float* rotation_table,\n+ size_t seq_len, size_t d\n+)\n+{\n+ CV_Assert(d % 2 == 0);\n+ const size_t d_half = d / 2;\n+\n+ double nstripes = double(seq_len) * d_half * (1.0/1024.0);\n+\n+ auto fn = [&](const cv::Range& range)\n+ {\n+ for (int t = range.start; t < range.end; ++t)\n+ {\n+ float* out_ptr = data + size_t(t) * d;\n+ const float* table_ptr = rotation_table + size_t(t) * d;\n+ size_t i = 0;\n+\n+#if (CV_SIMD || CV_SIMD_SCALABLE)\n+ const size_t w = VTraits::vlanes();\n+ for (; i + w <= d_half; i += w)\n+ {\n+ v_float32 sin_v, cos_v, x_even, x_odd;\n+ v_load_deinterleave(table_ptr + 2*i, sin_v, cos_v);\n+ v_load_deinterleave(out_ptr + 2*i, x_even, x_odd);\n+\n+ v_float32 out_even = v_sub(v_mul(cos_v, x_even), v_mul(sin_v, x_odd));\n+ v_float32 out_odd = v_add(v_mul(sin_v, x_even), v_mul(cos_v, x_odd));\n+\n+ v_store_interleave(out_ptr + 2*i, out_even, out_odd);\n+ }\n+#endif\n+ // scalar tail\n+ for (; i < d_half; ++i)\n+ {\n+ float s = table_ptr[2*i ];\n+ float c = table_ptr[2*i+1];\n+ float xe = out_ptr[2*i];\n+ float xo = out_ptr[2*i+1];\n+ out_ptr[2*i] = xe * c - xo * s;\n+ out_ptr[2*i+1] = xo * c + xe * s;\n+ }\n+ }\n+ };\n+\n+ // This will spin up threads and run fn over [0, seq_len)\n+ parallel_for_(cv::Range(0, int(seq_len)), fn, nstripes);\n+}\n+\n+static void precompRotationTable(float *data,\n+ size_t seq_len,\n+ size_t d) {\n+ // RoPE precomputation\n+ // RoPE is a positional encoding method used in transformer models.\n+ // It uses sine and cosine functions to encode the position of tokens in a sequence\n+ // initially introduced for NLP in https://arxiv.org/pdf/2104.09864\n+\n+ // assume data is of shape [seq_ken,d]\n+ const float logBase = std::log(10000.0f);\n+ const float inv_d = 1.0f / float(d);\n+ const size_t d_half = d / 2;\n+ for (size_t pos = 0; pos < seq_len; ++pos) {\n+\n+ size_t i = 0;\n+ float* data_ptr = data + pos * d;\n+\n+#if (CV_SIMD || CV_SIMD_SCALABLE)\n+ const size_t w = VTraits::vlanes();\n+ const v_float32 v_logBase = v_setall_f32(logBase);\n+ const v_float32 v_inv_d = v_setall_f32(inv_d);\n+ const v_float32 v_neg2 = v_setall_f32(-2.0f);\n+\n+ for (; i + w <= d_half; i+=w) {\n+ int idx_buf[CV_SIMD_WIDTH];", + "comment_created_at": "2025-04-23T08:16:24+00:00", + "comment_author": "asmorkalov", + "comment_body": "CV_SIMD_WIDTH is compile time constant. It may not work correctly with _SCALABLE branch. please use `VTraits::max_nlanes` instead. For fixed-size SIMD it works in the same way.", + "pr_file_module": null + }, + { + "comment_id": "2055511356", + "repo_full_name": "opencv/opencv", + "pr_number": 27238, + "pr_file": "modules/dnn/src/layers/attention_layer.cpp", + "discussion_id": "2055506089", + "commented_code": "@@ -24,6 +24,105 @@ static void packWeight(size_t num_heads, size_t head_size, size_t input_hidden_s\n }\n }\n \n+\n+static void rotationKernel(\n+ float* data, const float* rotation_table,\n+ size_t seq_len, size_t d\n+)\n+{\n+ CV_Assert(d % 2 == 0);\n+ const size_t d_half = d / 2;\n+\n+ double nstripes = double(seq_len) * d_half * (1.0/1024.0);\n+\n+ auto fn = [&](const cv::Range& range)\n+ {\n+ for (int t = range.start; t < range.end; ++t)\n+ {\n+ float* out_ptr = data + size_t(t) * d;\n+ const float* table_ptr = rotation_table + size_t(t) * d;\n+ size_t i = 0;\n+\n+#if (CV_SIMD || CV_SIMD_SCALABLE)\n+ const size_t w = VTraits::vlanes();\n+ for (; i + w <= d_half; i += w)\n+ {\n+ v_float32 sin_v, cos_v, x_even, x_odd;\n+ v_load_deinterleave(table_ptr + 2*i, sin_v, cos_v);\n+ v_load_deinterleave(out_ptr + 2*i, x_even, x_odd);\n+\n+ v_float32 out_even = v_sub(v_mul(cos_v, x_even), v_mul(sin_v, x_odd));\n+ v_float32 out_odd = v_add(v_mul(sin_v, x_even), v_mul(cos_v, x_odd));\n+\n+ v_store_interleave(out_ptr + 2*i, out_even, out_odd);\n+ }\n+#endif\n+ // scalar tail\n+ for (; i < d_half; ++i)\n+ {\n+ float s = table_ptr[2*i ];\n+ float c = table_ptr[2*i+1];\n+ float xe = out_ptr[2*i];\n+ float xo = out_ptr[2*i+1];\n+ out_ptr[2*i] = xe * c - xo * s;\n+ out_ptr[2*i+1] = xo * c + xe * s;\n+ }\n+ }\n+ };\n+\n+ // This will spin up threads and run fn over [0, seq_len)\n+ parallel_for_(cv::Range(0, int(seq_len)), fn, nstripes);\n+}\n+\n+static void precompRotationTable(float *data,\n+ size_t seq_len,\n+ size_t d) {\n+ // RoPE precomputation\n+ // RoPE is a positional encoding method used in transformer models.\n+ // It uses sine and cosine functions to encode the position of tokens in a sequence\n+ // initially introduced for NLP in https://arxiv.org/pdf/2104.09864\n+\n+ // assume data is of shape [seq_ken,d]\n+ const float logBase = std::log(10000.0f);\n+ const float inv_d = 1.0f / float(d);\n+ const size_t d_half = d / 2;\n+ for (size_t pos = 0; pos < seq_len; ++pos) {\n+\n+ size_t i = 0;\n+ float* data_ptr = data + pos * d;\n+\n+#if (CV_SIMD || CV_SIMD_SCALABLE)\n+ const size_t w = VTraits::vlanes();\n+ const v_float32 v_logBase = v_setall_f32(logBase);\n+ const v_float32 v_inv_d = v_setall_f32(inv_d);\n+ const v_float32 v_neg2 = v_setall_f32(-2.0f);\n+\n+ for (; i + w <= d_half; i+=w) {\n+ int idx_buf[CV_SIMD_WIDTH];", + "comment_created_at": "2025-04-23T08:19:39+00:00", + "comment_author": "asmorkalov", + "comment_body": "@fengyuentau please correct me, if I'm wrong.", + "pr_file_module": null + }, + { + "comment_id": "2055637649", + "repo_full_name": "opencv/opencv", + "pr_number": 27238, + "pr_file": "modules/dnn/src/layers/attention_layer.cpp", + "discussion_id": "2055506089", + "commented_code": "@@ -24,6 +24,105 @@ static void packWeight(size_t num_heads, size_t head_size, size_t input_hidden_s\n }\n }\n \n+\n+static void rotationKernel(\n+ float* data, const float* rotation_table,\n+ size_t seq_len, size_t d\n+)\n+{\n+ CV_Assert(d % 2 == 0);\n+ const size_t d_half = d / 2;\n+\n+ double nstripes = double(seq_len) * d_half * (1.0/1024.0);\n+\n+ auto fn = [&](const cv::Range& range)\n+ {\n+ for (int t = range.start; t < range.end; ++t)\n+ {\n+ float* out_ptr = data + size_t(t) * d;\n+ const float* table_ptr = rotation_table + size_t(t) * d;\n+ size_t i = 0;\n+\n+#if (CV_SIMD || CV_SIMD_SCALABLE)\n+ const size_t w = VTraits::vlanes();\n+ for (; i + w <= d_half; i += w)\n+ {\n+ v_float32 sin_v, cos_v, x_even, x_odd;\n+ v_load_deinterleave(table_ptr + 2*i, sin_v, cos_v);\n+ v_load_deinterleave(out_ptr + 2*i, x_even, x_odd);\n+\n+ v_float32 out_even = v_sub(v_mul(cos_v, x_even), v_mul(sin_v, x_odd));\n+ v_float32 out_odd = v_add(v_mul(sin_v, x_even), v_mul(cos_v, x_odd));\n+\n+ v_store_interleave(out_ptr + 2*i, out_even, out_odd);\n+ }\n+#endif\n+ // scalar tail\n+ for (; i < d_half; ++i)\n+ {\n+ float s = table_ptr[2*i ];\n+ float c = table_ptr[2*i+1];\n+ float xe = out_ptr[2*i];\n+ float xo = out_ptr[2*i+1];\n+ out_ptr[2*i] = xe * c - xo * s;\n+ out_ptr[2*i+1] = xo * c + xe * s;\n+ }\n+ }\n+ };\n+\n+ // This will spin up threads and run fn over [0, seq_len)\n+ parallel_for_(cv::Range(0, int(seq_len)), fn, nstripes);\n+}\n+\n+static void precompRotationTable(float *data,\n+ size_t seq_len,\n+ size_t d) {\n+ // RoPE precomputation\n+ // RoPE is a positional encoding method used in transformer models.\n+ // It uses sine and cosine functions to encode the position of tokens in a sequence\n+ // initially introduced for NLP in https://arxiv.org/pdf/2104.09864\n+\n+ // assume data is of shape [seq_ken,d]\n+ const float logBase = std::log(10000.0f);\n+ const float inv_d = 1.0f / float(d);\n+ const size_t d_half = d / 2;\n+ for (size_t pos = 0; pos < seq_len; ++pos) {\n+\n+ size_t i = 0;\n+ float* data_ptr = data + pos * d;\n+\n+#if (CV_SIMD || CV_SIMD_SCALABLE)\n+ const size_t w = VTraits::vlanes();\n+ const v_float32 v_logBase = v_setall_f32(logBase);\n+ const v_float32 v_inv_d = v_setall_f32(inv_d);\n+ const v_float32 v_neg2 = v_setall_f32(-2.0f);\n+\n+ for (; i + w <= d_half; i+=w) {\n+ int idx_buf[CV_SIMD_WIDTH];", + "comment_created_at": "2025-04-23T09:25:04+00:00", + "comment_author": "nklskyoy", + "comment_body": "@asmorkalov,\r\n\r\n```\r\nconst size_t w = VTraits::vlanes();\r\n...\r\nint idx_buf[w];\r\n```\r\n\r\ncaused **[-Wvla-cxx-extension]** in GCC and the build for Win64 failed. \r\nI wasn't sure what's the proper way to deal with this..", + "pr_file_module": null + }, + { + "comment_id": "2059815455", + "repo_full_name": "opencv/opencv", + "pr_number": 27238, + "pr_file": "modules/dnn/src/layers/attention_layer.cpp", + "discussion_id": "2055506089", + "commented_code": "@@ -24,6 +24,105 @@ static void packWeight(size_t num_heads, size_t head_size, size_t input_hidden_s\n }\n }\n \n+\n+static void rotationKernel(\n+ float* data, const float* rotation_table,\n+ size_t seq_len, size_t d\n+)\n+{\n+ CV_Assert(d % 2 == 0);\n+ const size_t d_half = d / 2;\n+\n+ double nstripes = double(seq_len) * d_half * (1.0/1024.0);\n+\n+ auto fn = [&](const cv::Range& range)\n+ {\n+ for (int t = range.start; t < range.end; ++t)\n+ {\n+ float* out_ptr = data + size_t(t) * d;\n+ const float* table_ptr = rotation_table + size_t(t) * d;\n+ size_t i = 0;\n+\n+#if (CV_SIMD || CV_SIMD_SCALABLE)\n+ const size_t w = VTraits::vlanes();\n+ for (; i + w <= d_half; i += w)\n+ {\n+ v_float32 sin_v, cos_v, x_even, x_odd;\n+ v_load_deinterleave(table_ptr + 2*i, sin_v, cos_v);\n+ v_load_deinterleave(out_ptr + 2*i, x_even, x_odd);\n+\n+ v_float32 out_even = v_sub(v_mul(cos_v, x_even), v_mul(sin_v, x_odd));\n+ v_float32 out_odd = v_add(v_mul(sin_v, x_even), v_mul(cos_v, x_odd));\n+\n+ v_store_interleave(out_ptr + 2*i, out_even, out_odd);\n+ }\n+#endif\n+ // scalar tail\n+ for (; i < d_half; ++i)\n+ {\n+ float s = table_ptr[2*i ];\n+ float c = table_ptr[2*i+1];\n+ float xe = out_ptr[2*i];\n+ float xo = out_ptr[2*i+1];\n+ out_ptr[2*i] = xe * c - xo * s;\n+ out_ptr[2*i+1] = xo * c + xe * s;\n+ }\n+ }\n+ };\n+\n+ // This will spin up threads and run fn over [0, seq_len)\n+ parallel_for_(cv::Range(0, int(seq_len)), fn, nstripes);\n+}\n+\n+static void precompRotationTable(float *data,\n+ size_t seq_len,\n+ size_t d) {\n+ // RoPE precomputation\n+ // RoPE is a positional encoding method used in transformer models.\n+ // It uses sine and cosine functions to encode the position of tokens in a sequence\n+ // initially introduced for NLP in https://arxiv.org/pdf/2104.09864\n+\n+ // assume data is of shape [seq_ken,d]\n+ const float logBase = std::log(10000.0f);\n+ const float inv_d = 1.0f / float(d);\n+ const size_t d_half = d / 2;\n+ for (size_t pos = 0; pos < seq_len; ++pos) {\n+\n+ size_t i = 0;\n+ float* data_ptr = data + pos * d;\n+\n+#if (CV_SIMD || CV_SIMD_SCALABLE)\n+ const size_t w = VTraits::vlanes();\n+ const v_float32 v_logBase = v_setall_f32(logBase);\n+ const v_float32 v_inv_d = v_setall_f32(inv_d);\n+ const v_float32 v_neg2 = v_setall_f32(-2.0f);\n+\n+ for (; i + w <= d_half; i+=w) {\n+ int idx_buf[CV_SIMD_WIDTH];", + "comment_created_at": "2025-04-25T08:38:09+00:00", + "comment_author": "asmorkalov", + "comment_body": "Yes, it does not work, because `w` value is not known in compile time. `VTraits::max_nlanes` is compile time constant. It's equal to `CV_SIMD_WIDTH` for fixed SIMD size architectures (x86). RISC-V RVV vector size is not known in compile time, but we know maximum vector length and use it for intermediate buffers to fit any feasible vector size.", + "pr_file_module": null + }, + { + "comment_id": "2062730807", + "repo_full_name": "opencv/opencv", + "pr_number": 27238, + "pr_file": "modules/dnn/src/layers/attention_layer.cpp", + "discussion_id": "2055506089", + "commented_code": "@@ -24,6 +24,105 @@ static void packWeight(size_t num_heads, size_t head_size, size_t input_hidden_s\n }\n }\n \n+\n+static void rotationKernel(\n+ float* data, const float* rotation_table,\n+ size_t seq_len, size_t d\n+)\n+{\n+ CV_Assert(d % 2 == 0);\n+ const size_t d_half = d / 2;\n+\n+ double nstripes = double(seq_len) * d_half * (1.0/1024.0);\n+\n+ auto fn = [&](const cv::Range& range)\n+ {\n+ for (int t = range.start; t < range.end; ++t)\n+ {\n+ float* out_ptr = data + size_t(t) * d;\n+ const float* table_ptr = rotation_table + size_t(t) * d;\n+ size_t i = 0;\n+\n+#if (CV_SIMD || CV_SIMD_SCALABLE)\n+ const size_t w = VTraits::vlanes();\n+ for (; i + w <= d_half; i += w)\n+ {\n+ v_float32 sin_v, cos_v, x_even, x_odd;\n+ v_load_deinterleave(table_ptr + 2*i, sin_v, cos_v);\n+ v_load_deinterleave(out_ptr + 2*i, x_even, x_odd);\n+\n+ v_float32 out_even = v_sub(v_mul(cos_v, x_even), v_mul(sin_v, x_odd));\n+ v_float32 out_odd = v_add(v_mul(sin_v, x_even), v_mul(cos_v, x_odd));\n+\n+ v_store_interleave(out_ptr + 2*i, out_even, out_odd);\n+ }\n+#endif\n+ // scalar tail\n+ for (; i < d_half; ++i)\n+ {\n+ float s = table_ptr[2*i ];\n+ float c = table_ptr[2*i+1];\n+ float xe = out_ptr[2*i];\n+ float xo = out_ptr[2*i+1];\n+ out_ptr[2*i] = xe * c - xo * s;\n+ out_ptr[2*i+1] = xo * c + xe * s;\n+ }\n+ }\n+ };\n+\n+ // This will spin up threads and run fn over [0, seq_len)\n+ parallel_for_(cv::Range(0, int(seq_len)), fn, nstripes);\n+}\n+\n+static void precompRotationTable(float *data,\n+ size_t seq_len,\n+ size_t d) {\n+ // RoPE precomputation\n+ // RoPE is a positional encoding method used in transformer models.\n+ // It uses sine and cosine functions to encode the position of tokens in a sequence\n+ // initially introduced for NLP in https://arxiv.org/pdf/2104.09864\n+\n+ // assume data is of shape [seq_ken,d]\n+ const float logBase = std::log(10000.0f);\n+ const float inv_d = 1.0f / float(d);\n+ const size_t d_half = d / 2;\n+ for (size_t pos = 0; pos < seq_len; ++pos) {\n+\n+ size_t i = 0;\n+ float* data_ptr = data + pos * d;\n+\n+#if (CV_SIMD || CV_SIMD_SCALABLE)\n+ const size_t w = VTraits::vlanes();\n+ const v_float32 v_logBase = v_setall_f32(logBase);\n+ const v_float32 v_inv_d = v_setall_f32(inv_d);\n+ const v_float32 v_neg2 = v_setall_f32(-2.0f);\n+\n+ for (; i + w <= d_half; i+=w) {\n+ int idx_buf[CV_SIMD_WIDTH];", + "comment_created_at": "2025-04-27T21:55:13+00:00", + "comment_author": "nklskyoy", + "comment_body": "I have replaced CV_SIMD_WIDTH with VTraits::max_nlanes for both CV_SIMD and CV_SIMD_SCALABLE branch", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1562079542", + "pr_number": 25394, + "pr_file": "modules/core/src/convert.dispatch.cpp", + "created_at": "2024-04-12T06:31:13+00:00", + "commented_code": "int dtype = CV_MAKETYPE(ddepth, cn);\n\n Mat src = *this;\n dst.create(dims, size, dtype);\n Mat dstMat = dst.getMat();\n Mat dstMat;\n if (this->data == dst.getMat().data && this->elemSize() == CV_ELEM_SIZE(dtype)) {", + "repo_full_name": "opencv/opencv", + "discussion_comments": [ + { + "comment_id": "1562079542", + "repo_full_name": "opencv/opencv", + "pr_number": 25394, + "pr_file": "modules/core/src/convert.dispatch.cpp", + "discussion_id": "1562079542", + "commented_code": "@@ -278,8 +278,16 @@ void Mat::convertTo(OutputArray dst, int type_, double alpha, double beta) const\n int dtype = CV_MAKETYPE(ddepth, cn);\n \n Mat src = *this;\n- dst.create(dims, size, dtype);\n- Mat dstMat = dst.getMat();\n+ Mat dstMat;\n+ if (this->data == dst.getMat().data && this->elemSize() == CV_ELEM_SIZE(dtype)) {", + "comment_created_at": "2024-04-12T06:31:13+00:00", + "comment_author": "dkurt", + "comment_body": "There is no check for size/dims match. For example, ROI will have the same data ptr:\r\n```cpp\r\nMat m (10, 10, CV_8U);\r\nMat roi = m.rowRange(0, 2);\r\nm.convertTo(roi, CV_8S);\r\n```", + "pr_file_module": null + }, + { + "comment_id": "1563996085", + "repo_full_name": "opencv/opencv", + "pr_number": 25394, + "pr_file": "modules/core/src/convert.dispatch.cpp", + "discussion_id": "1562079542", + "commented_code": "@@ -278,8 +278,16 @@ void Mat::convertTo(OutputArray dst, int type_, double alpha, double beta) const\n int dtype = CV_MAKETYPE(ddepth, cn);\n \n Mat src = *this;\n- dst.create(dims, size, dtype);\n- Mat dstMat = dst.getMat();\n+ Mat dstMat;\n+ if (this->data == dst.getMat().data && this->elemSize() == CV_ELEM_SIZE(dtype)) {", + "comment_created_at": "2024-04-13T12:33:15+00:00", + "comment_author": "Gao-HaoYuan", + "comment_body": "I had through **!dst.getMat().isSubmatrix()** to determine this case.", + "pr_file_module": null + }, + { + "comment_id": "1564539155", + "repo_full_name": "opencv/opencv", + "pr_number": 25394, + "pr_file": "modules/core/src/convert.dispatch.cpp", + "discussion_id": "1562079542", + "commented_code": "@@ -278,8 +278,16 @@ void Mat::convertTo(OutputArray dst, int type_, double alpha, double beta) const\n int dtype = CV_MAKETYPE(ddepth, cn);\n \n Mat src = *this;\n- dst.create(dims, size, dtype);\n- Mat dstMat = dst.getMat();\n+ Mat dstMat;\n+ if (this->data == dst.getMat().data && this->elemSize() == CV_ELEM_SIZE(dtype)) {", + "comment_created_at": "2024-04-14T07:46:22+00:00", + "comment_author": "dkurt", + "comment_body": "What about the following case? `.data` matched but not the shapes.\r\n```cpp\r\nMat m (10, 10, CV_8U);\r\nMat m2(5, 10, CV_8U, m.ptr());\r\nm.convertTo(m2, CV_8S);\r\n```", + "pr_file_module": null + }, + { + "comment_id": "1564719925", + "repo_full_name": "opencv/opencv", + "pr_number": 25394, + "pr_file": "modules/core/src/convert.dispatch.cpp", + "discussion_id": "1562079542", + "commented_code": "@@ -278,8 +278,16 @@ void Mat::convertTo(OutputArray dst, int type_, double alpha, double beta) const\n int dtype = CV_MAKETYPE(ddepth, cn);\n \n Mat src = *this;\n- dst.create(dims, size, dtype);\n- Mat dstMat = dst.getMat();\n+ Mat dstMat;\n+ if (this->data == dst.getMat().data && this->elemSize() == CV_ELEM_SIZE(dtype)) {", + "comment_created_at": "2024-04-14T13:15:04+00:00", + "comment_author": "Gao-HaoYuan", + "comment_body": "Firstly, if **roi** is a submatrix, the refcount will add 1. \r\n`if (this->data == dst.getMat().data && this->elemSize() == CV_ELEM_SIZE(dtype) && refcount == 2) `\r\n This condition can recognize the first case.\r\n`Mat m (10, 10, CV_8U);\r\nMat roi = m.rowRange(0, 2);\r\nm.convertTo(roi, CV_8S);`\r\n\r\nBUt, I have come up with a new question. Like, \r\n\r\n`Mat m (10, 10, CV_8U);\r\nMat m2(10, 10, CV_8U, m.ptr());\r\nm.convertTo(m2, CV_8S);`\r\n\r\nIn this case, the member 'u' is NULL. By checking whether 'u' is null, we can solve this problem.\r\n", + "pr_file_module": null + }, + { + "comment_id": "1564721975", + "repo_full_name": "opencv/opencv", + "pr_number": 25394, + "pr_file": "modules/core/src/convert.dispatch.cpp", + "discussion_id": "1562079542", + "commented_code": "@@ -278,8 +278,16 @@ void Mat::convertTo(OutputArray dst, int type_, double alpha, double beta) const\n int dtype = CV_MAKETYPE(ddepth, cn);\n \n Mat src = *this;\n- dst.create(dims, size, dtype);\n- Mat dstMat = dst.getMat();\n+ Mat dstMat;\n+ if (this->data == dst.getMat().data && this->elemSize() == CV_ELEM_SIZE(dtype)) {", + "comment_created_at": "2024-04-14T13:19:50+00:00", + "comment_author": "Gao-HaoYuan", + "comment_body": "`Mat m (10, 10, CV_8U); Mat m2(10, 10, CV_8U, m.ptr()); m.convertTo(m, CV_8S);`\r\n\r\nAnd, in this case, the original data of 'm' will be released, and 'm2' may encounter unpredictable errors. I believe such risky behavior should be noted by OpenCV users.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1930610880", + "pr_number": 26831, + "pr_file": "modules/photo/test/test_denoising.cpp", + "created_at": "2025-01-27T14:22:49+00:00", + "commented_code": "t = (double)getTickCount() - t;\n printf(\"execution time: %gms\\n\", t*1000./getTickFrequency());\n}\n// Related issue :\n// - https://github.com/opencv/opencv/issues/26582\nclass Photo_DenoisingGrayscaleMulti16Bit : public ::testing::Test {\nprotected:\n struct TestConfig {\n int width = 127;\n int height = 129;\n int imgs_count = 3;\n float h = 15.0f;\n int templateWindowSize = 7;\n int searchWindowSize = 21;\n };\n\n static double computePSNR(const cv::Mat& I1, const cv::Mat& I2) {\n CV_Assert(I1.type() == I2.type() && I1.size() == I2.size());\n cv::Mat s1;\n cv::absdiff(I1, I2, s1);\n s1.convertTo(s1, CV_32F);\n s1 = s1.mul(s1);\n cv::Scalar s = cv::sum(s1);\n double mse = s[0] / static_cast(I1.total());\n\n if (mse == 0) return INFINITY;\n\n double max_pixel = 65535.0;\n return 10.0 * log10((max_pixel * max_pixel) / mse);\n }", + "repo_full_name": "opencv/opencv", + "discussion_comments": [ + { + "comment_id": "1930610880", + "repo_full_name": "opencv/opencv", + "pr_number": 26831, + "pr_file": "modules/photo/test/test_denoising.cpp", + "discussion_id": "1930610880", + "commented_code": "@@ -164,5 +164,80 @@ TEST(Photo_Denoising, speed)\n t = (double)getTickCount() - t;\n printf(\"execution time: %gms\\n\", t*1000./getTickFrequency());\n }\n+// Related issue :\n+// - https://github.com/opencv/opencv/issues/26582\n+class Photo_DenoisingGrayscaleMulti16Bit : public ::testing::Test {\n+protected:\n+ struct TestConfig {\n+ int width = 127;\n+ int height = 129;\n+ int imgs_count = 3;\n+ float h = 15.0f;\n+ int templateWindowSize = 7;\n+ int searchWindowSize = 21;\n+ };\n+\n+ static double computePSNR(const cv::Mat& I1, const cv::Mat& I2) {\n+ CV_Assert(I1.type() == I2.type() && I1.size() == I2.size());\n+ cv::Mat s1;\n+ cv::absdiff(I1, I2, s1);\n+ s1.convertTo(s1, CV_32F);\n+ s1 = s1.mul(s1);\n+ cv::Scalar s = cv::sum(s1);\n+ double mse = s[0] / static_cast(I1.total());\n+\n+ if (mse == 0) return INFINITY;\n+\n+ double max_pixel = 65535.0;\n+ return 10.0 * log10((max_pixel * max_pixel) / mse);\n+ }", + "comment_created_at": "2025-01-27T14:22:49+00:00", + "comment_author": "asmorkalov", + "comment_body": "Please use cv::PSNR instead: https://docs.opencv.org/4.x/d2/de8/group__core__array.html#ga3119e3ea73010a6f810bb05aa36ac8d6", + "pr_file_module": null + }, + { + "comment_id": "1930621103", + "repo_full_name": "opencv/opencv", + "pr_number": 26831, + "pr_file": "modules/photo/test/test_denoising.cpp", + "discussion_id": "1930610880", + "commented_code": "@@ -164,5 +164,80 @@ TEST(Photo_Denoising, speed)\n t = (double)getTickCount() - t;\n printf(\"execution time: %gms\\n\", t*1000./getTickFrequency());\n }\n+// Related issue :\n+// - https://github.com/opencv/opencv/issues/26582\n+class Photo_DenoisingGrayscaleMulti16Bit : public ::testing::Test {\n+protected:\n+ struct TestConfig {\n+ int width = 127;\n+ int height = 129;\n+ int imgs_count = 3;\n+ float h = 15.0f;\n+ int templateWindowSize = 7;\n+ int searchWindowSize = 21;\n+ };\n+\n+ static double computePSNR(const cv::Mat& I1, const cv::Mat& I2) {\n+ CV_Assert(I1.type() == I2.type() && I1.size() == I2.size());\n+ cv::Mat s1;\n+ cv::absdiff(I1, I2, s1);\n+ s1.convertTo(s1, CV_32F);\n+ s1 = s1.mul(s1);\n+ cv::Scalar s = cv::sum(s1);\n+ double mse = s[0] / static_cast(I1.total());\n+\n+ if (mse == 0) return INFINITY;\n+\n+ double max_pixel = 65535.0;\n+ return 10.0 * log10((max_pixel * max_pixel) / mse);\n+ }", + "comment_created_at": "2025-01-27T14:29:00+00:00", + "comment_author": "shyama7004", + "comment_body": "Ok, I will .", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1915996982", + "pr_number": 26773, + "pr_file": "modules/imgproc/src/shapedescr.cpp", + "created_at": "2025-01-15T06:18:42+00:00", + "commented_code": "static inline Point2f getOfs(int i, float eps)\n{\n return Point2f(((i & 1)*2 - 1)*eps, ((i & 2) - 1)*eps);\n std::mt19937 gen(i);\n std::uniform_real_distribution dis(-eps, eps);\n return Point2f(dis(gen), dis(gen));", + "repo_full_name": "opencv/opencv", + "discussion_comments": [ + { + "comment_id": "1915996982", + "repo_full_name": "opencv/opencv", + "pr_number": 26773, + "pr_file": "modules/imgproc/src/shapedescr.cpp", + "discussion_id": "1915996982", + "commented_code": "@@ -342,7 +342,9 @@ namespace cv\n \n static inline Point2f getOfs(int i, float eps)\n {\n- return Point2f(((i & 1)*2 - 1)*eps, ((i & 2) - 1)*eps);\n+ std::mt19937 gen(i);\n+ std::uniform_real_distribution dis(-eps, eps);\n+ return Point2f(dis(gen), dis(gen));", + "comment_created_at": "2025-01-15T06:18:42+00:00", + "comment_author": "asmorkalov", + "comment_body": "I propose to use cv::RNG for it to make it manageable outside:\r\n- User may set seed to get deterministic behaviour\r\n- OpenCV test system fixes cv::RNG seed for each test independently.", + "pr_file_module": null + }, + { + "comment_id": "1917342515", + "repo_full_name": "opencv/opencv", + "pr_number": 26773, + "pr_file": "modules/imgproc/src/shapedescr.cpp", + "discussion_id": "1915996982", + "commented_code": "@@ -342,7 +342,9 @@ namespace cv\n \n static inline Point2f getOfs(int i, float eps)\n {\n- return Point2f(((i & 1)*2 - 1)*eps, ((i & 2) - 1)*eps);\n+ std::mt19937 gen(i);\n+ std::uniform_real_distribution dis(-eps, eps);\n+ return Point2f(dis(gen), dis(gen));", + "comment_created_at": "2025-01-15T21:14:07+00:00", + "comment_author": "MaximSmolskiy", + "comment_body": "Used `cv::RNG`", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/opencv-use-optimized-functions.md b/_reviewers/opencv-use-optimized-functions.md index 02d8a0c..ce6ee57 100644 --- a/_reviewers/opencv-use-optimized-functions.md +++ b/_reviewers/opencv-use-optimized-functions.md @@ -52,281 +52,3 @@ Mat saturate(Mat& src, const double& low, const double& up) return dst; } ``` - - -[ - { - "discussion_id": "2024769056", - "pr_number": 27051, - "pr_file": "modules/photo/src/ccm/utils.cpp", - "created_at": "2025-04-02T12:53:13+00:00", - "commented_code": "// This file is part of OpenCV project.\n// It is subject to the license terms in the LICENSE file found in the top-level directory\n// of this distribution and at http://opencv.org/license.html.\n//\n// Author: Longbu Wang \n// Jinheng Zhang \n// Chenqi Shan \n\n#include \"utils.hpp\"\n\nnamespace cv {\nnamespace ccm {\n\ninline double gammaCorrection_(const double& element, const double& gamma)\n{\n return (element >= 0 ? pow(element, gamma) : -pow((-element), gamma));\n}\n\nMat gammaCorrection(const Mat& src, const double& gamma, Mat dst)\n{\n return elementWise(src, [gamma](double element) -> double { return gammaCorrection_(element, gamma); }, dst);\n}\n\nMat maskCopyTo(const Mat& src, const Mat& mask)\n{\n Mat dst(countNonZero(mask), 1, src.type());\n const int channel = src.channels();\n auto it_mask = mask.begin();\n switch (channel)\n {\n case 1:\n {\n auto it_src = src.begin(), end_src = src.end();\n auto it_dst = dst.begin();\n for (; it_src != end_src; ++it_src, ++it_mask)\n {\n if (*it_mask)\n {\n (*it_dst) = (*it_src);\n ++it_dst;\n }\n }\n break;\n }\n case 3:\n {\n auto it_src = src.begin(), end_src = src.end();\n auto it_dst = dst.begin();\n for (; it_src != end_src; ++it_src, ++it_mask)\n {\n if (*it_mask)\n {\n (*it_dst) = (*it_src);\n ++it_dst;\n }\n }\n break;\n }\n default:\n CV_Error(Error::StsBadArg, \"Wrong channel!\" );\n break;\n }\n return dst;\n}\n\nMat multiple(const Mat& xyz, const Mat& ccm)\n{\n Mat tmp = xyz.reshape(1, xyz.rows * xyz.cols);\n Mat res = tmp * ccm;\n res = res.reshape(res.cols, xyz.rows);\n return res;\n}\n\nMat saturate(Mat& src, const double& low, const double& up)\n{\n Mat dst = Mat::ones(src.size(), CV_8UC1);\n MatIterator_ it_src = src.begin(), end_src = src.end();\n MatIterator_ it_dst = dst.begin();\n for (; it_src != end_src; ++it_src, ++it_dst)\n {\n for (int i = 0; i < 3; ++i)\n {\n if ((*it_src)[i] > up || (*it_src)[i] < low)\n {\n *it_dst = 0;\n break;\n }\n }\n }\n return dst;\n}", - "repo_full_name": "opencv/opencv", - "discussion_comments": [ - { - "comment_id": "2024769056", - "repo_full_name": "opencv/opencv", - "pr_number": 27051, - "pr_file": "modules/photo/src/ccm/utils.cpp", - "discussion_id": "2024769056", - "commented_code": "@@ -0,0 +1,100 @@\n+// This file is part of OpenCV project.\n+// It is subject to the license terms in the LICENSE file found in the top-level directory\n+// of this distribution and at http://opencv.org/license.html.\n+//\n+// Author: Longbu Wang \n+// Jinheng Zhang \n+// Chenqi Shan \n+\n+#include \"utils.hpp\"\n+\n+namespace cv {\n+namespace ccm {\n+\n+inline double gammaCorrection_(const double& element, const double& gamma)\n+{\n+ return (element >= 0 ? pow(element, gamma) : -pow((-element), gamma));\n+}\n+\n+Mat gammaCorrection(const Mat& src, const double& gamma, Mat dst)\n+{\n+ return elementWise(src, [gamma](double element) -> double { return gammaCorrection_(element, gamma); }, dst);\n+}\n+\n+Mat maskCopyTo(const Mat& src, const Mat& mask)\n+{\n+ Mat dst(countNonZero(mask), 1, src.type());\n+ const int channel = src.channels();\n+ auto it_mask = mask.begin();\n+ switch (channel)\n+ {\n+ case 1:\n+ {\n+ auto it_src = src.begin(), end_src = src.end();\n+ auto it_dst = dst.begin();\n+ for (; it_src != end_src; ++it_src, ++it_mask)\n+ {\n+ if (*it_mask)\n+ {\n+ (*it_dst) = (*it_src);\n+ ++it_dst;\n+ }\n+ }\n+ break;\n+ }\n+ case 3:\n+ {\n+ auto it_src = src.begin(), end_src = src.end();\n+ auto it_dst = dst.begin();\n+ for (; it_src != end_src; ++it_src, ++it_mask)\n+ {\n+ if (*it_mask)\n+ {\n+ (*it_dst) = (*it_src);\n+ ++it_dst;\n+ }\n+ }\n+ break;\n+ }\n+ default:\n+ CV_Error(Error::StsBadArg, \"Wrong channel!\" );\n+ break;\n+ }\n+ return dst;\n+}\n+\n+Mat multiple(const Mat& xyz, const Mat& ccm)\n+{\n+ Mat tmp = xyz.reshape(1, xyz.rows * xyz.cols);\n+ Mat res = tmp * ccm;\n+ res = res.reshape(res.cols, xyz.rows);\n+ return res;\n+}\n+\n+Mat saturate(Mat& src, const double& low, const double& up)\n+{\n+ Mat dst = Mat::ones(src.size(), CV_8UC1);\n+ MatIterator_ it_src = src.begin(), end_src = src.end();\n+ MatIterator_ it_dst = dst.begin();\n+ for (; it_src != end_src; ++it_src, ++it_dst)\n+ {\n+ for (int i = 0; i < 3; ++i)\n+ {\n+ if ((*it_src)[i] > up || (*it_src)[i] < low)\n+ {\n+ *it_dst = 0;\n+ break;\n+ }\n+ }\n+ }\n+ return dst;\n+}", - "comment_created_at": "2025-04-02T12:53:13+00:00", - "comment_author": "asmorkalov", - "comment_body": "inRange should be enough instead of it: https://docs.opencv.org/4.x/d2/de8/group__core__array.html#ga48af0ab51e36436c5d04340e036ce981", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2064230851", - "pr_number": 27051, - "pr_file": "modules/photo/src/ccm/ccm.cpp", - "created_at": "2025-04-28T18:10:17+00:00", - "commented_code": "// This file is part of OpenCV project.\n// It is subject to the license terms in the LICENSE file found in the top-level directory\n// of this distribution and at http://opencv.org/license.html.\n//\n// Author: Longbu Wang \n// Jinheng Zhang \n// Chenqi Shan \n\n#include \"opencv2/photo.hpp\"\n#include \"linearize.hpp\"\nnamespace cv {\nnamespace ccm {\nclass ColorCorrectionModel::Impl\n{\npublic:\n Mat src;\n\n std::shared_ptr dst = std::make_shared();\n Mat dist;\n RGBBase_& cs;\n // Track initialization parameters for serialization\n ColorSpace csEnum;\n Mat mask;\n\n // RGBl of detected data and the reference\n Mat srcRgbl;\n Mat dstRgbl;\n\n // ccm type and shape\n CcmType ccmType;\n int shape;\n\n // linear method and distance\n std::shared_ptr linear = std::make_shared();\n DistanceType distance;\n LinearizationType linearizationType;\n\n Mat weights;\n Mat weightsList;\n Mat ccm;\n Mat ccm0;\n double gamma;\n int deg;\n std::vector saturatedThreshold;\n InitialMethodType initialMethodType;\n double weightsCoeff;\n int maskedLen;\n double loss;\n int maxCount;\n double epsilon;\n Impl();\n\n /** @brief Make no change for CCM_LINEAR.\n convert cv::Mat A to [A, 1] in CCM_AFFINE.\n @param inp the input array, type of cv::Mat.\n @return the output array, type of cv::Mat\n */\n Mat prepare(const Mat& inp);\n\n /** @brief Calculate weights and mask.\n @param weightsList the input array, type of cv::Mat.\n @param weightsCoeff type of double.\n @param saturateMask the input array, type of cv::Mat.\n */\n void calWeightsMasks(const Mat& weightsList, double weightsCoeff, Mat saturateMask);\n\n /** @brief Fitting nonlinear - optimization initial value by white balance.\n @return the output array, type of Mat\n */\n void initialWhiteBalance(void);\n\n /** @brief Fitting nonlinear-optimization initial value by least square.\n @param fit if fit is True, return optimalization for rgbl distance function.\n */\n void initialLeastSquare(bool fit = false);\n\n double calcLoss_(Color color);\n double calcLoss(const Mat ccm_);\n\n /** @brief Fitting ccm if distance function is associated with CIE Lab color space.\n see details in https://github.com/opencv/opencv/blob/master/modules/core/include/opencv2/core/optim.hpp\n Set terminal criteria for solver is possible.\n */\n void fitting(void);\n\n void getColor(Mat& img_, bool islinear = false);\n void getColor(ColorCheckerType constColor);\n void getColor(Mat colors_, ColorSpace cs_, Mat colored_);\n void getColor(Mat colors_, ColorSpace refColorSpace_);\n\n /** @brief Loss function base on cv::MinProblemSolver::Function.\n see details in https://github.com/opencv/opencv/blob/master/modules/core/include/opencv2/core/optim.hpp\n */\n class LossFunction : public MinProblemSolver::Function\n {\n public:\n ColorCorrectionModel::Impl* ccmLoss;\n LossFunction(ColorCorrectionModel::Impl* ccm)\n : ccmLoss(ccm) {};\n\n /** @brief Reset dims to ccm->shape.\n */\n int getDims() const CV_OVERRIDE\n {\n return ccmLoss->shape;\n }\n\n /** @brief Reset calculation.\n */\n double calc(const double* x) const CV_OVERRIDE\n {\n Mat ccm_(ccmLoss->shape, 1, CV_64F);\n for (int i = 0; i < ccmLoss->shape; i++)\n {\n ccm_.at(i, 0) = x[i];\n }\n ccm_ = ccm_.reshape(0, ccmLoss->shape / 3);\n return ccmLoss->calcLoss(ccm_);\n }\n };\n};\n\nColorCorrectionModel::Impl::Impl()\n : cs(*GetCS::getInstance().getRgb(COLOR_SPACE_SRGB))\n , csEnum(COLOR_SPACE_SRGB)\n , ccmType(CCM_LINEAR)\n , distance(DISTANCE_CIE2000)\n , linearizationType(LINEARIZATION_GAMMA)\n , weights(Mat())\n , gamma(2.2)\n , deg(3)\n , saturatedThreshold({ 0, 0.98 })\n , initialMethodType(INITIAL_METHOD_LEAST_SQUARE)\n , weightsCoeff(0)\n , maxCount(5000)\n , epsilon(1.e-4)\n{}\n\nMat ColorCorrectionModel::Impl::prepare(const Mat& inp)\n{\n switch (ccmType)\n {\n case cv::ccm::CCM_LINEAR:\n shape = 9;\n return inp;\n case cv::ccm::CCM_AFFINE:\n {\n shape = 12;\n Mat arr1 = Mat::ones(inp.size(), CV_64F);\n Mat arr_out(inp.size(), CV_64FC4);\n Mat arr_channels[3];\n split(inp, arr_channels);\n merge(std::vector { arr_channels[0], arr_channels[1], arr_channels[2], arr1 }, arr_out);\n return arr_out;\n }\n default:\n CV_Error(Error::StsBadArg, \"Wrong ccmType!\");\n break;\n }\n}\n\nvoid ColorCorrectionModel::Impl::calWeightsMasks(const Mat& weightsList_, double weightsCoeff_, Mat saturateMask)\n{\n // weights\n if (!weightsList_.empty())\n {\n weights = weightsList_;\n }\n else if (weightsCoeff_ != 0)\n {\n pow(dst->toLuminant(cs.io), weightsCoeff_, weights);\n }\n\n // masks\n Mat weight_mask = Mat::ones(src.rows, 1, CV_8U);\n if (!weights.empty())\n {\n weight_mask = weights > 0;\n }\n this->mask = (weight_mask) & (saturateMask);\n\n // weights' mask\n if (!weights.empty())\n {\n Mat weights_masked = maskCopyTo(this->weights, this->mask);\n weights = weights_masked / mean(weights_masked)[0];\n }\n maskedLen = (int)sum(mask)[0];\n}\n\nvoid ColorCorrectionModel::Impl::initialWhiteBalance(void)\n{\n Mat schannels[4];\n split(srcRgbl, schannels);\n Mat dchannels[4];\n split(dstRgbl, dchannels);\n std::vector initialVec = { sum(dchannels[0])[0] / sum(schannels[0])[0], 0, 0, 0,\n sum(dchannels[1])[0] / sum(schannels[1])[0], 0, 0, 0,\n sum(dchannels[2])[0] / sum(schannels[2])[0], 0, 0, 0 };", - "repo_full_name": "opencv/opencv", - "discussion_comments": [ - { - "comment_id": "2064230851", - "repo_full_name": "opencv/opencv", - "pr_number": 27051, - "pr_file": "modules/photo/src/ccm/ccm.cpp", - "discussion_id": "2064230851", - "commented_code": "@@ -0,0 +1,504 @@\n+// This file is part of OpenCV project.\n+// It is subject to the license terms in the LICENSE file found in the top-level directory\n+// of this distribution and at http://opencv.org/license.html.\n+//\n+// Author: Longbu Wang \n+// Jinheng Zhang \n+// Chenqi Shan \n+\n+#include \"opencv2/photo.hpp\"\n+#include \"linearize.hpp\"\n+namespace cv {\n+namespace ccm {\n+class ColorCorrectionModel::Impl\n+{\n+public:\n+ Mat src;\n+\n+ std::shared_ptr dst = std::make_shared();\n+ Mat dist;\n+ RGBBase_& cs;\n+ // Track initialization parameters for serialization\n+ ColorSpace csEnum;\n+ Mat mask;\n+\n+ // RGBl of detected data and the reference\n+ Mat srcRgbl;\n+ Mat dstRgbl;\n+\n+ // ccm type and shape\n+ CcmType ccmType;\n+ int shape;\n+\n+ // linear method and distance\n+ std::shared_ptr linear = std::make_shared();\n+ DistanceType distance;\n+ LinearizationType linearizationType;\n+\n+ Mat weights;\n+ Mat weightsList;\n+ Mat ccm;\n+ Mat ccm0;\n+ double gamma;\n+ int deg;\n+ std::vector saturatedThreshold;\n+ InitialMethodType initialMethodType;\n+ double weightsCoeff;\n+ int maskedLen;\n+ double loss;\n+ int maxCount;\n+ double epsilon;\n+ Impl();\n+\n+ /** @brief Make no change for CCM_LINEAR.\n+ convert cv::Mat A to [A, 1] in CCM_AFFINE.\n+ @param inp the input array, type of cv::Mat.\n+ @return the output array, type of cv::Mat\n+ */\n+ Mat prepare(const Mat& inp);\n+\n+ /** @brief Calculate weights and mask.\n+ @param weightsList the input array, type of cv::Mat.\n+ @param weightsCoeff type of double.\n+ @param saturateMask the input array, type of cv::Mat.\n+ */\n+ void calWeightsMasks(const Mat& weightsList, double weightsCoeff, Mat saturateMask);\n+\n+ /** @brief Fitting nonlinear - optimization initial value by white balance.\n+ @return the output array, type of Mat\n+ */\n+ void initialWhiteBalance(void);\n+\n+ /** @brief Fitting nonlinear-optimization initial value by least square.\n+ @param fit if fit is True, return optimalization for rgbl distance function.\n+ */\n+ void initialLeastSquare(bool fit = false);\n+\n+ double calcLoss_(Color color);\n+ double calcLoss(const Mat ccm_);\n+\n+ /** @brief Fitting ccm if distance function is associated with CIE Lab color space.\n+ see details in https://github.com/opencv/opencv/blob/master/modules/core/include/opencv2/core/optim.hpp\n+ Set terminal criteria for solver is possible.\n+ */\n+ void fitting(void);\n+\n+ void getColor(Mat& img_, bool islinear = false);\n+ void getColor(ColorCheckerType constColor);\n+ void getColor(Mat colors_, ColorSpace cs_, Mat colored_);\n+ void getColor(Mat colors_, ColorSpace refColorSpace_);\n+\n+ /** @brief Loss function base on cv::MinProblemSolver::Function.\n+ see details in https://github.com/opencv/opencv/blob/master/modules/core/include/opencv2/core/optim.hpp\n+ */\n+ class LossFunction : public MinProblemSolver::Function\n+ {\n+ public:\n+ ColorCorrectionModel::Impl* ccmLoss;\n+ LossFunction(ColorCorrectionModel::Impl* ccm)\n+ : ccmLoss(ccm) {};\n+\n+ /** @brief Reset dims to ccm->shape.\n+ */\n+ int getDims() const CV_OVERRIDE\n+ {\n+ return ccmLoss->shape;\n+ }\n+\n+ /** @brief Reset calculation.\n+ */\n+ double calc(const double* x) const CV_OVERRIDE\n+ {\n+ Mat ccm_(ccmLoss->shape, 1, CV_64F);\n+ for (int i = 0; i < ccmLoss->shape; i++)\n+ {\n+ ccm_.at(i, 0) = x[i];\n+ }\n+ ccm_ = ccm_.reshape(0, ccmLoss->shape / 3);\n+ return ccmLoss->calcLoss(ccm_);\n+ }\n+ };\n+};\n+\n+ColorCorrectionModel::Impl::Impl()\n+ : cs(*GetCS::getInstance().getRgb(COLOR_SPACE_SRGB))\n+ , csEnum(COLOR_SPACE_SRGB)\n+ , ccmType(CCM_LINEAR)\n+ , distance(DISTANCE_CIE2000)\n+ , linearizationType(LINEARIZATION_GAMMA)\n+ , weights(Mat())\n+ , gamma(2.2)\n+ , deg(3)\n+ , saturatedThreshold({ 0, 0.98 })\n+ , initialMethodType(INITIAL_METHOD_LEAST_SQUARE)\n+ , weightsCoeff(0)\n+ , maxCount(5000)\n+ , epsilon(1.e-4)\n+{}\n+\n+Mat ColorCorrectionModel::Impl::prepare(const Mat& inp)\n+{\n+ switch (ccmType)\n+ {\n+ case cv::ccm::CCM_LINEAR:\n+ shape = 9;\n+ return inp;\n+ case cv::ccm::CCM_AFFINE:\n+ {\n+ shape = 12;\n+ Mat arr1 = Mat::ones(inp.size(), CV_64F);\n+ Mat arr_out(inp.size(), CV_64FC4);\n+ Mat arr_channels[3];\n+ split(inp, arr_channels);\n+ merge(std::vector { arr_channels[0], arr_channels[1], arr_channels[2], arr1 }, arr_out);\n+ return arr_out;\n+ }\n+ default:\n+ CV_Error(Error::StsBadArg, \"Wrong ccmType!\");\n+ break;\n+ }\n+}\n+\n+void ColorCorrectionModel::Impl::calWeightsMasks(const Mat& weightsList_, double weightsCoeff_, Mat saturateMask)\n+{\n+ // weights\n+ if (!weightsList_.empty())\n+ {\n+ weights = weightsList_;\n+ }\n+ else if (weightsCoeff_ != 0)\n+ {\n+ pow(dst->toLuminant(cs.io), weightsCoeff_, weights);\n+ }\n+\n+ // masks\n+ Mat weight_mask = Mat::ones(src.rows, 1, CV_8U);\n+ if (!weights.empty())\n+ {\n+ weight_mask = weights > 0;\n+ }\n+ this->mask = (weight_mask) & (saturateMask);\n+\n+ // weights' mask\n+ if (!weights.empty())\n+ {\n+ Mat weights_masked = maskCopyTo(this->weights, this->mask);\n+ weights = weights_masked / mean(weights_masked)[0];\n+ }\n+ maskedLen = (int)sum(mask)[0];\n+}\n+\n+void ColorCorrectionModel::Impl::initialWhiteBalance(void)\n+{\n+ Mat schannels[4];\n+ split(srcRgbl, schannels);\n+ Mat dchannels[4];\n+ split(dstRgbl, dchannels);\n+ std::vector initialVec = { sum(dchannels[0])[0] / sum(schannels[0])[0], 0, 0, 0,\n+ sum(dchannels[1])[0] / sum(schannels[1])[0], 0, 0, 0,\n+ sum(dchannels[2])[0] / sum(schannels[2])[0], 0, 0, 0 };", - "comment_created_at": "2025-04-28T18:10:17+00:00", - "comment_author": "asmorkalov", - "comment_body": "No need to spit cv::Mat by channels for it. cv::sum supports channels: https://docs.opencv.org/5.x/d2/de8/group__core__array.html#ga716e10a2dd9e228e4d3c95818f106722", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2064246723", - "pr_number": 27051, - "pr_file": "modules/photo/src/ccm/utils.cpp", - "created_at": "2025-04-28T18:19:59+00:00", - "commented_code": "// This file is part of OpenCV project.\n// It is subject to the license terms in the LICENSE file found in the top-level directory\n// of this distribution and at http://opencv.org/license.html.\n//\n// Author: Longbu Wang \n// Jinheng Zhang \n// Chenqi Shan \n\n#include \"utils.hpp\"\n\nnamespace cv {\nnamespace ccm {\n\ninline double gammaCorrection_(double element, double gamma)\n{\n return (element >= 0 ? pow(element, gamma) : -pow((-element), gamma));\n}\n\nMat gammaCorrection(const Mat& src, double gamma, Mat dst)\n{\n return elementWise(src, [gamma](double element) -> double { return gammaCorrection_(element, gamma); }, dst);\n}", - "repo_full_name": "opencv/opencv", - "discussion_comments": [ - { - "comment_id": "2064246723", - "repo_full_name": "opencv/opencv", - "pr_number": 27051, - "pr_file": "modules/photo/src/ccm/utils.cpp", - "discussion_id": "2064246723", - "commented_code": "@@ -0,0 +1,86 @@\n+// This file is part of OpenCV project.\n+// It is subject to the license terms in the LICENSE file found in the top-level directory\n+// of this distribution and at http://opencv.org/license.html.\n+//\n+// Author: Longbu Wang \n+// Jinheng Zhang \n+// Chenqi Shan \n+\n+#include \"utils.hpp\"\n+\n+namespace cv {\n+namespace ccm {\n+\n+inline double gammaCorrection_(double element, double gamma)\n+{\n+ return (element >= 0 ? pow(element, gamma) : -pow((-element), gamma));\n+}\n+\n+Mat gammaCorrection(const Mat& src, double gamma, Mat dst)\n+{\n+ return elementWise(src, [gamma](double element) -> double { return gammaCorrection_(element, gamma); }, dst);\n+}", - "comment_created_at": "2025-04-28T18:19:59+00:00", - "comment_author": "asmorkalov", - "comment_body": "I propose to convert Gamma correction to public function with InputArray and OutputArray. It's useful independently from the pipeline and also may be significantly optimized with universal intrinsics.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2055506089", - "pr_number": 27238, - "pr_file": "modules/dnn/src/layers/attention_layer.cpp", - "created_at": "2025-04-23T08:16:24+00:00", - "commented_code": "}\n}\n\n\nstatic void rotationKernel(\n float* data, const float* rotation_table,\n size_t seq_len, size_t d\n)\n{\n CV_Assert(d % 2 == 0);\n const size_t d_half = d / 2;\n\n double nstripes = double(seq_len) * d_half * (1.0/1024.0);\n\n auto fn = [&](const cv::Range& range)\n {\n for (int t = range.start; t < range.end; ++t)\n {\n float* out_ptr = data + size_t(t) * d;\n const float* table_ptr = rotation_table + size_t(t) * d;\n size_t i = 0;\n\n#if (CV_SIMD || CV_SIMD_SCALABLE)\n const size_t w = VTraits::vlanes();\n for (; i + w <= d_half; i += w)\n {\n v_float32 sin_v, cos_v, x_even, x_odd;\n v_load_deinterleave(table_ptr + 2*i, sin_v, cos_v);\n v_load_deinterleave(out_ptr + 2*i, x_even, x_odd);\n\n v_float32 out_even = v_sub(v_mul(cos_v, x_even), v_mul(sin_v, x_odd));\n v_float32 out_odd = v_add(v_mul(sin_v, x_even), v_mul(cos_v, x_odd));\n\n v_store_interleave(out_ptr + 2*i, out_even, out_odd);\n }\n#endif\n // scalar tail\n for (; i < d_half; ++i)\n {\n float s = table_ptr[2*i ];\n float c = table_ptr[2*i+1];\n float xe = out_ptr[2*i];\n float xo = out_ptr[2*i+1];\n out_ptr[2*i] = xe * c - xo * s;\n out_ptr[2*i+1] = xo * c + xe * s;\n }\n }\n };\n\n // This will spin up threads and run fn over [0, seq_len)\n parallel_for_(cv::Range(0, int(seq_len)), fn, nstripes);\n}\n\nstatic void precompRotationTable(float *data,\n size_t seq_len,\n size_t d) {\n // RoPE precomputation\n // RoPE is a positional encoding method used in transformer models.\n // It uses sine and cosine functions to encode the position of tokens in a sequence\n // initially introduced for NLP in https://arxiv.org/pdf/2104.09864\n\n // assume data is of shape [seq_ken,d]\n const float logBase = std::log(10000.0f);\n const float inv_d = 1.0f / float(d);\n const size_t d_half = d / 2;\n for (size_t pos = 0; pos < seq_len; ++pos) {\n\n size_t i = 0;\n float* data_ptr = data + pos * d;\n\n#if (CV_SIMD || CV_SIMD_SCALABLE)\n const size_t w = VTraits::vlanes();\n const v_float32 v_logBase = v_setall_f32(logBase);\n const v_float32 v_inv_d = v_setall_f32(inv_d);\n const v_float32 v_neg2 = v_setall_f32(-2.0f);\n\n for (; i + w <= d_half; i+=w) {\n int idx_buf[CV_SIMD_WIDTH];", - "repo_full_name": "opencv/opencv", - "discussion_comments": [ - { - "comment_id": "2055506089", - "repo_full_name": "opencv/opencv", - "pr_number": 27238, - "pr_file": "modules/dnn/src/layers/attention_layer.cpp", - "discussion_id": "2055506089", - "commented_code": "@@ -24,6 +24,105 @@ static void packWeight(size_t num_heads, size_t head_size, size_t input_hidden_s\n }\n }\n \n+\n+static void rotationKernel(\n+ float* data, const float* rotation_table,\n+ size_t seq_len, size_t d\n+)\n+{\n+ CV_Assert(d % 2 == 0);\n+ const size_t d_half = d / 2;\n+\n+ double nstripes = double(seq_len) * d_half * (1.0/1024.0);\n+\n+ auto fn = [&](const cv::Range& range)\n+ {\n+ for (int t = range.start; t < range.end; ++t)\n+ {\n+ float* out_ptr = data + size_t(t) * d;\n+ const float* table_ptr = rotation_table + size_t(t) * d;\n+ size_t i = 0;\n+\n+#if (CV_SIMD || CV_SIMD_SCALABLE)\n+ const size_t w = VTraits::vlanes();\n+ for (; i + w <= d_half; i += w)\n+ {\n+ v_float32 sin_v, cos_v, x_even, x_odd;\n+ v_load_deinterleave(table_ptr + 2*i, sin_v, cos_v);\n+ v_load_deinterleave(out_ptr + 2*i, x_even, x_odd);\n+\n+ v_float32 out_even = v_sub(v_mul(cos_v, x_even), v_mul(sin_v, x_odd));\n+ v_float32 out_odd = v_add(v_mul(sin_v, x_even), v_mul(cos_v, x_odd));\n+\n+ v_store_interleave(out_ptr + 2*i, out_even, out_odd);\n+ }\n+#endif\n+ // scalar tail\n+ for (; i < d_half; ++i)\n+ {\n+ float s = table_ptr[2*i ];\n+ float c = table_ptr[2*i+1];\n+ float xe = out_ptr[2*i];\n+ float xo = out_ptr[2*i+1];\n+ out_ptr[2*i] = xe * c - xo * s;\n+ out_ptr[2*i+1] = xo * c + xe * s;\n+ }\n+ }\n+ };\n+\n+ // This will spin up threads and run fn over [0, seq_len)\n+ parallel_for_(cv::Range(0, int(seq_len)), fn, nstripes);\n+}\n+\n+static void precompRotationTable(float *data,\n+ size_t seq_len,\n+ size_t d) {\n+ // RoPE precomputation\n+ // RoPE is a positional encoding method used in transformer models.\n+ // It uses sine and cosine functions to encode the position of tokens in a sequence\n+ // initially introduced for NLP in https://arxiv.org/pdf/2104.09864\n+\n+ // assume data is of shape [seq_ken,d]\n+ const float logBase = std::log(10000.0f);\n+ const float inv_d = 1.0f / float(d);\n+ const size_t d_half = d / 2;\n+ for (size_t pos = 0; pos < seq_len; ++pos) {\n+\n+ size_t i = 0;\n+ float* data_ptr = data + pos * d;\n+\n+#if (CV_SIMD || CV_SIMD_SCALABLE)\n+ const size_t w = VTraits::vlanes();\n+ const v_float32 v_logBase = v_setall_f32(logBase);\n+ const v_float32 v_inv_d = v_setall_f32(inv_d);\n+ const v_float32 v_neg2 = v_setall_f32(-2.0f);\n+\n+ for (; i + w <= d_half; i+=w) {\n+ int idx_buf[CV_SIMD_WIDTH];", - "comment_created_at": "2025-04-23T08:16:24+00:00", - "comment_author": "asmorkalov", - "comment_body": "CV_SIMD_WIDTH is compile time constant. It may not work correctly with _SCALABLE branch. please use `VTraits::max_nlanes` instead. For fixed-size SIMD it works in the same way.", - "pr_file_module": null - }, - { - "comment_id": "2055511356", - "repo_full_name": "opencv/opencv", - "pr_number": 27238, - "pr_file": "modules/dnn/src/layers/attention_layer.cpp", - "discussion_id": "2055506089", - "commented_code": "@@ -24,6 +24,105 @@ static void packWeight(size_t num_heads, size_t head_size, size_t input_hidden_s\n }\n }\n \n+\n+static void rotationKernel(\n+ float* data, const float* rotation_table,\n+ size_t seq_len, size_t d\n+)\n+{\n+ CV_Assert(d % 2 == 0);\n+ const size_t d_half = d / 2;\n+\n+ double nstripes = double(seq_len) * d_half * (1.0/1024.0);\n+\n+ auto fn = [&](const cv::Range& range)\n+ {\n+ for (int t = range.start; t < range.end; ++t)\n+ {\n+ float* out_ptr = data + size_t(t) * d;\n+ const float* table_ptr = rotation_table + size_t(t) * d;\n+ size_t i = 0;\n+\n+#if (CV_SIMD || CV_SIMD_SCALABLE)\n+ const size_t w = VTraits::vlanes();\n+ for (; i + w <= d_half; i += w)\n+ {\n+ v_float32 sin_v, cos_v, x_even, x_odd;\n+ v_load_deinterleave(table_ptr + 2*i, sin_v, cos_v);\n+ v_load_deinterleave(out_ptr + 2*i, x_even, x_odd);\n+\n+ v_float32 out_even = v_sub(v_mul(cos_v, x_even), v_mul(sin_v, x_odd));\n+ v_float32 out_odd = v_add(v_mul(sin_v, x_even), v_mul(cos_v, x_odd));\n+\n+ v_store_interleave(out_ptr + 2*i, out_even, out_odd);\n+ }\n+#endif\n+ // scalar tail\n+ for (; i < d_half; ++i)\n+ {\n+ float s = table_ptr[2*i ];\n+ float c = table_ptr[2*i+1];\n+ float xe = out_ptr[2*i];\n+ float xo = out_ptr[2*i+1];\n+ out_ptr[2*i] = xe * c - xo * s;\n+ out_ptr[2*i+1] = xo * c + xe * s;\n+ }\n+ }\n+ };\n+\n+ // This will spin up threads and run fn over [0, seq_len)\n+ parallel_for_(cv::Range(0, int(seq_len)), fn, nstripes);\n+}\n+\n+static void precompRotationTable(float *data,\n+ size_t seq_len,\n+ size_t d) {\n+ // RoPE precomputation\n+ // RoPE is a positional encoding method used in transformer models.\n+ // It uses sine and cosine functions to encode the position of tokens in a sequence\n+ // initially introduced for NLP in https://arxiv.org/pdf/2104.09864\n+\n+ // assume data is of shape [seq_ken,d]\n+ const float logBase = std::log(10000.0f);\n+ const float inv_d = 1.0f / float(d);\n+ const size_t d_half = d / 2;\n+ for (size_t pos = 0; pos < seq_len; ++pos) {\n+\n+ size_t i = 0;\n+ float* data_ptr = data + pos * d;\n+\n+#if (CV_SIMD || CV_SIMD_SCALABLE)\n+ const size_t w = VTraits::vlanes();\n+ const v_float32 v_logBase = v_setall_f32(logBase);\n+ const v_float32 v_inv_d = v_setall_f32(inv_d);\n+ const v_float32 v_neg2 = v_setall_f32(-2.0f);\n+\n+ for (; i + w <= d_half; i+=w) {\n+ int idx_buf[CV_SIMD_WIDTH];", - "comment_created_at": "2025-04-23T08:19:39+00:00", - "comment_author": "asmorkalov", - "comment_body": "@fengyuentau please correct me, if I'm wrong.", - "pr_file_module": null - }, - { - "comment_id": "2055637649", - "repo_full_name": "opencv/opencv", - "pr_number": 27238, - "pr_file": "modules/dnn/src/layers/attention_layer.cpp", - "discussion_id": "2055506089", - "commented_code": "@@ -24,6 +24,105 @@ static void packWeight(size_t num_heads, size_t head_size, size_t input_hidden_s\n }\n }\n \n+\n+static void rotationKernel(\n+ float* data, const float* rotation_table,\n+ size_t seq_len, size_t d\n+)\n+{\n+ CV_Assert(d % 2 == 0);\n+ const size_t d_half = d / 2;\n+\n+ double nstripes = double(seq_len) * d_half * (1.0/1024.0);\n+\n+ auto fn = [&](const cv::Range& range)\n+ {\n+ for (int t = range.start; t < range.end; ++t)\n+ {\n+ float* out_ptr = data + size_t(t) * d;\n+ const float* table_ptr = rotation_table + size_t(t) * d;\n+ size_t i = 0;\n+\n+#if (CV_SIMD || CV_SIMD_SCALABLE)\n+ const size_t w = VTraits::vlanes();\n+ for (; i + w <= d_half; i += w)\n+ {\n+ v_float32 sin_v, cos_v, x_even, x_odd;\n+ v_load_deinterleave(table_ptr + 2*i, sin_v, cos_v);\n+ v_load_deinterleave(out_ptr + 2*i, x_even, x_odd);\n+\n+ v_float32 out_even = v_sub(v_mul(cos_v, x_even), v_mul(sin_v, x_odd));\n+ v_float32 out_odd = v_add(v_mul(sin_v, x_even), v_mul(cos_v, x_odd));\n+\n+ v_store_interleave(out_ptr + 2*i, out_even, out_odd);\n+ }\n+#endif\n+ // scalar tail\n+ for (; i < d_half; ++i)\n+ {\n+ float s = table_ptr[2*i ];\n+ float c = table_ptr[2*i+1];\n+ float xe = out_ptr[2*i];\n+ float xo = out_ptr[2*i+1];\n+ out_ptr[2*i] = xe * c - xo * s;\n+ out_ptr[2*i+1] = xo * c + xe * s;\n+ }\n+ }\n+ };\n+\n+ // This will spin up threads and run fn over [0, seq_len)\n+ parallel_for_(cv::Range(0, int(seq_len)), fn, nstripes);\n+}\n+\n+static void precompRotationTable(float *data,\n+ size_t seq_len,\n+ size_t d) {\n+ // RoPE precomputation\n+ // RoPE is a positional encoding method used in transformer models.\n+ // It uses sine and cosine functions to encode the position of tokens in a sequence\n+ // initially introduced for NLP in https://arxiv.org/pdf/2104.09864\n+\n+ // assume data is of shape [seq_ken,d]\n+ const float logBase = std::log(10000.0f);\n+ const float inv_d = 1.0f / float(d);\n+ const size_t d_half = d / 2;\n+ for (size_t pos = 0; pos < seq_len; ++pos) {\n+\n+ size_t i = 0;\n+ float* data_ptr = data + pos * d;\n+\n+#if (CV_SIMD || CV_SIMD_SCALABLE)\n+ const size_t w = VTraits::vlanes();\n+ const v_float32 v_logBase = v_setall_f32(logBase);\n+ const v_float32 v_inv_d = v_setall_f32(inv_d);\n+ const v_float32 v_neg2 = v_setall_f32(-2.0f);\n+\n+ for (; i + w <= d_half; i+=w) {\n+ int idx_buf[CV_SIMD_WIDTH];", - "comment_created_at": "2025-04-23T09:25:04+00:00", - "comment_author": "nklskyoy", - "comment_body": "@asmorkalov,\r\n\r\n```\r\nconst size_t w = VTraits::vlanes();\r\n...\r\nint idx_buf[w];\r\n```\r\n\r\ncaused **[-Wvla-cxx-extension]** in GCC and the build for Win64 failed. \r\nI wasn't sure what's the proper way to deal with this..", - "pr_file_module": null - }, - { - "comment_id": "2059815455", - "repo_full_name": "opencv/opencv", - "pr_number": 27238, - "pr_file": "modules/dnn/src/layers/attention_layer.cpp", - "discussion_id": "2055506089", - "commented_code": "@@ -24,6 +24,105 @@ static void packWeight(size_t num_heads, size_t head_size, size_t input_hidden_s\n }\n }\n \n+\n+static void rotationKernel(\n+ float* data, const float* rotation_table,\n+ size_t seq_len, size_t d\n+)\n+{\n+ CV_Assert(d % 2 == 0);\n+ const size_t d_half = d / 2;\n+\n+ double nstripes = double(seq_len) * d_half * (1.0/1024.0);\n+\n+ auto fn = [&](const cv::Range& range)\n+ {\n+ for (int t = range.start; t < range.end; ++t)\n+ {\n+ float* out_ptr = data + size_t(t) * d;\n+ const float* table_ptr = rotation_table + size_t(t) * d;\n+ size_t i = 0;\n+\n+#if (CV_SIMD || CV_SIMD_SCALABLE)\n+ const size_t w = VTraits::vlanes();\n+ for (; i + w <= d_half; i += w)\n+ {\n+ v_float32 sin_v, cos_v, x_even, x_odd;\n+ v_load_deinterleave(table_ptr + 2*i, sin_v, cos_v);\n+ v_load_deinterleave(out_ptr + 2*i, x_even, x_odd);\n+\n+ v_float32 out_even = v_sub(v_mul(cos_v, x_even), v_mul(sin_v, x_odd));\n+ v_float32 out_odd = v_add(v_mul(sin_v, x_even), v_mul(cos_v, x_odd));\n+\n+ v_store_interleave(out_ptr + 2*i, out_even, out_odd);\n+ }\n+#endif\n+ // scalar tail\n+ for (; i < d_half; ++i)\n+ {\n+ float s = table_ptr[2*i ];\n+ float c = table_ptr[2*i+1];\n+ float xe = out_ptr[2*i];\n+ float xo = out_ptr[2*i+1];\n+ out_ptr[2*i] = xe * c - xo * s;\n+ out_ptr[2*i+1] = xo * c + xe * s;\n+ }\n+ }\n+ };\n+\n+ // This will spin up threads and run fn over [0, seq_len)\n+ parallel_for_(cv::Range(0, int(seq_len)), fn, nstripes);\n+}\n+\n+static void precompRotationTable(float *data,\n+ size_t seq_len,\n+ size_t d) {\n+ // RoPE precomputation\n+ // RoPE is a positional encoding method used in transformer models.\n+ // It uses sine and cosine functions to encode the position of tokens in a sequence\n+ // initially introduced for NLP in https://arxiv.org/pdf/2104.09864\n+\n+ // assume data is of shape [seq_ken,d]\n+ const float logBase = std::log(10000.0f);\n+ const float inv_d = 1.0f / float(d);\n+ const size_t d_half = d / 2;\n+ for (size_t pos = 0; pos < seq_len; ++pos) {\n+\n+ size_t i = 0;\n+ float* data_ptr = data + pos * d;\n+\n+#if (CV_SIMD || CV_SIMD_SCALABLE)\n+ const size_t w = VTraits::vlanes();\n+ const v_float32 v_logBase = v_setall_f32(logBase);\n+ const v_float32 v_inv_d = v_setall_f32(inv_d);\n+ const v_float32 v_neg2 = v_setall_f32(-2.0f);\n+\n+ for (; i + w <= d_half; i+=w) {\n+ int idx_buf[CV_SIMD_WIDTH];", - "comment_created_at": "2025-04-25T08:38:09+00:00", - "comment_author": "asmorkalov", - "comment_body": "Yes, it does not work, because `w` value is not known in compile time. `VTraits::max_nlanes` is compile time constant. It's equal to `CV_SIMD_WIDTH` for fixed SIMD size architectures (x86). RISC-V RVV vector size is not known in compile time, but we know maximum vector length and use it for intermediate buffers to fit any feasible vector size.", - "pr_file_module": null - }, - { - "comment_id": "2062730807", - "repo_full_name": "opencv/opencv", - "pr_number": 27238, - "pr_file": "modules/dnn/src/layers/attention_layer.cpp", - "discussion_id": "2055506089", - "commented_code": "@@ -24,6 +24,105 @@ static void packWeight(size_t num_heads, size_t head_size, size_t input_hidden_s\n }\n }\n \n+\n+static void rotationKernel(\n+ float* data, const float* rotation_table,\n+ size_t seq_len, size_t d\n+)\n+{\n+ CV_Assert(d % 2 == 0);\n+ const size_t d_half = d / 2;\n+\n+ double nstripes = double(seq_len) * d_half * (1.0/1024.0);\n+\n+ auto fn = [&](const cv::Range& range)\n+ {\n+ for (int t = range.start; t < range.end; ++t)\n+ {\n+ float* out_ptr = data + size_t(t) * d;\n+ const float* table_ptr = rotation_table + size_t(t) * d;\n+ size_t i = 0;\n+\n+#if (CV_SIMD || CV_SIMD_SCALABLE)\n+ const size_t w = VTraits::vlanes();\n+ for (; i + w <= d_half; i += w)\n+ {\n+ v_float32 sin_v, cos_v, x_even, x_odd;\n+ v_load_deinterleave(table_ptr + 2*i, sin_v, cos_v);\n+ v_load_deinterleave(out_ptr + 2*i, x_even, x_odd);\n+\n+ v_float32 out_even = v_sub(v_mul(cos_v, x_even), v_mul(sin_v, x_odd));\n+ v_float32 out_odd = v_add(v_mul(sin_v, x_even), v_mul(cos_v, x_odd));\n+\n+ v_store_interleave(out_ptr + 2*i, out_even, out_odd);\n+ }\n+#endif\n+ // scalar tail\n+ for (; i < d_half; ++i)\n+ {\n+ float s = table_ptr[2*i ];\n+ float c = table_ptr[2*i+1];\n+ float xe = out_ptr[2*i];\n+ float xo = out_ptr[2*i+1];\n+ out_ptr[2*i] = xe * c - xo * s;\n+ out_ptr[2*i+1] = xo * c + xe * s;\n+ }\n+ }\n+ };\n+\n+ // This will spin up threads and run fn over [0, seq_len)\n+ parallel_for_(cv::Range(0, int(seq_len)), fn, nstripes);\n+}\n+\n+static void precompRotationTable(float *data,\n+ size_t seq_len,\n+ size_t d) {\n+ // RoPE precomputation\n+ // RoPE is a positional encoding method used in transformer models.\n+ // It uses sine and cosine functions to encode the position of tokens in a sequence\n+ // initially introduced for NLP in https://arxiv.org/pdf/2104.09864\n+\n+ // assume data is of shape [seq_ken,d]\n+ const float logBase = std::log(10000.0f);\n+ const float inv_d = 1.0f / float(d);\n+ const size_t d_half = d / 2;\n+ for (size_t pos = 0; pos < seq_len; ++pos) {\n+\n+ size_t i = 0;\n+ float* data_ptr = data + pos * d;\n+\n+#if (CV_SIMD || CV_SIMD_SCALABLE)\n+ const size_t w = VTraits::vlanes();\n+ const v_float32 v_logBase = v_setall_f32(logBase);\n+ const v_float32 v_inv_d = v_setall_f32(inv_d);\n+ const v_float32 v_neg2 = v_setall_f32(-2.0f);\n+\n+ for (; i + w <= d_half; i+=w) {\n+ int idx_buf[CV_SIMD_WIDTH];", - "comment_created_at": "2025-04-27T21:55:13+00:00", - "comment_author": "nklskyoy", - "comment_body": "I have replaced CV_SIMD_WIDTH with VTraits::max_nlanes for both CV_SIMD and CV_SIMD_SCALABLE branch", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1562079542", - "pr_number": 25394, - "pr_file": "modules/core/src/convert.dispatch.cpp", - "created_at": "2024-04-12T06:31:13+00:00", - "commented_code": "int dtype = CV_MAKETYPE(ddepth, cn);\n\n Mat src = *this;\n dst.create(dims, size, dtype);\n Mat dstMat = dst.getMat();\n Mat dstMat;\n if (this->data == dst.getMat().data && this->elemSize() == CV_ELEM_SIZE(dtype)) {", - "repo_full_name": "opencv/opencv", - "discussion_comments": [ - { - "comment_id": "1562079542", - "repo_full_name": "opencv/opencv", - "pr_number": 25394, - "pr_file": "modules/core/src/convert.dispatch.cpp", - "discussion_id": "1562079542", - "commented_code": "@@ -278,8 +278,16 @@ void Mat::convertTo(OutputArray dst, int type_, double alpha, double beta) const\n int dtype = CV_MAKETYPE(ddepth, cn);\n \n Mat src = *this;\n- dst.create(dims, size, dtype);\n- Mat dstMat = dst.getMat();\n+ Mat dstMat;\n+ if (this->data == dst.getMat().data && this->elemSize() == CV_ELEM_SIZE(dtype)) {", - "comment_created_at": "2024-04-12T06:31:13+00:00", - "comment_author": "dkurt", - "comment_body": "There is no check for size/dims match. For example, ROI will have the same data ptr:\r\n```cpp\r\nMat m (10, 10, CV_8U);\r\nMat roi = m.rowRange(0, 2);\r\nm.convertTo(roi, CV_8S);\r\n```", - "pr_file_module": null - }, - { - "comment_id": "1563996085", - "repo_full_name": "opencv/opencv", - "pr_number": 25394, - "pr_file": "modules/core/src/convert.dispatch.cpp", - "discussion_id": "1562079542", - "commented_code": "@@ -278,8 +278,16 @@ void Mat::convertTo(OutputArray dst, int type_, double alpha, double beta) const\n int dtype = CV_MAKETYPE(ddepth, cn);\n \n Mat src = *this;\n- dst.create(dims, size, dtype);\n- Mat dstMat = dst.getMat();\n+ Mat dstMat;\n+ if (this->data == dst.getMat().data && this->elemSize() == CV_ELEM_SIZE(dtype)) {", - "comment_created_at": "2024-04-13T12:33:15+00:00", - "comment_author": "Gao-HaoYuan", - "comment_body": "I had through **!dst.getMat().isSubmatrix()** to determine this case.", - "pr_file_module": null - }, - { - "comment_id": "1564539155", - "repo_full_name": "opencv/opencv", - "pr_number": 25394, - "pr_file": "modules/core/src/convert.dispatch.cpp", - "discussion_id": "1562079542", - "commented_code": "@@ -278,8 +278,16 @@ void Mat::convertTo(OutputArray dst, int type_, double alpha, double beta) const\n int dtype = CV_MAKETYPE(ddepth, cn);\n \n Mat src = *this;\n- dst.create(dims, size, dtype);\n- Mat dstMat = dst.getMat();\n+ Mat dstMat;\n+ if (this->data == dst.getMat().data && this->elemSize() == CV_ELEM_SIZE(dtype)) {", - "comment_created_at": "2024-04-14T07:46:22+00:00", - "comment_author": "dkurt", - "comment_body": "What about the following case? `.data` matched but not the shapes.\r\n```cpp\r\nMat m (10, 10, CV_8U);\r\nMat m2(5, 10, CV_8U, m.ptr());\r\nm.convertTo(m2, CV_8S);\r\n```", - "pr_file_module": null - }, - { - "comment_id": "1564719925", - "repo_full_name": "opencv/opencv", - "pr_number": 25394, - "pr_file": "modules/core/src/convert.dispatch.cpp", - "discussion_id": "1562079542", - "commented_code": "@@ -278,8 +278,16 @@ void Mat::convertTo(OutputArray dst, int type_, double alpha, double beta) const\n int dtype = CV_MAKETYPE(ddepth, cn);\n \n Mat src = *this;\n- dst.create(dims, size, dtype);\n- Mat dstMat = dst.getMat();\n+ Mat dstMat;\n+ if (this->data == dst.getMat().data && this->elemSize() == CV_ELEM_SIZE(dtype)) {", - "comment_created_at": "2024-04-14T13:15:04+00:00", - "comment_author": "Gao-HaoYuan", - "comment_body": "Firstly, if **roi** is a submatrix, the refcount will add 1. \r\n`if (this->data == dst.getMat().data && this->elemSize() == CV_ELEM_SIZE(dtype) && refcount == 2) `\r\n This condition can recognize the first case.\r\n`Mat m (10, 10, CV_8U);\r\nMat roi = m.rowRange(0, 2);\r\nm.convertTo(roi, CV_8S);`\r\n\r\nBUt, I have come up with a new question. Like, \r\n\r\n`Mat m (10, 10, CV_8U);\r\nMat m2(10, 10, CV_8U, m.ptr());\r\nm.convertTo(m2, CV_8S);`\r\n\r\nIn this case, the member 'u' is NULL. By checking whether 'u' is null, we can solve this problem.\r\n", - "pr_file_module": null - }, - { - "comment_id": "1564721975", - "repo_full_name": "opencv/opencv", - "pr_number": 25394, - "pr_file": "modules/core/src/convert.dispatch.cpp", - "discussion_id": "1562079542", - "commented_code": "@@ -278,8 +278,16 @@ void Mat::convertTo(OutputArray dst, int type_, double alpha, double beta) const\n int dtype = CV_MAKETYPE(ddepth, cn);\n \n Mat src = *this;\n- dst.create(dims, size, dtype);\n- Mat dstMat = dst.getMat();\n+ Mat dstMat;\n+ if (this->data == dst.getMat().data && this->elemSize() == CV_ELEM_SIZE(dtype)) {", - "comment_created_at": "2024-04-14T13:19:50+00:00", - "comment_author": "Gao-HaoYuan", - "comment_body": "`Mat m (10, 10, CV_8U); Mat m2(10, 10, CV_8U, m.ptr()); m.convertTo(m, CV_8S);`\r\n\r\nAnd, in this case, the original data of 'm' will be released, and 'm2' may encounter unpredictable errors. I believe such risky behavior should be noted by OpenCV users.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1930610880", - "pr_number": 26831, - "pr_file": "modules/photo/test/test_denoising.cpp", - "created_at": "2025-01-27T14:22:49+00:00", - "commented_code": "t = (double)getTickCount() - t;\n printf(\"execution time: %gms\\n\", t*1000./getTickFrequency());\n}\n// Related issue :\n// - https://github.com/opencv/opencv/issues/26582\nclass Photo_DenoisingGrayscaleMulti16Bit : public ::testing::Test {\nprotected:\n struct TestConfig {\n int width = 127;\n int height = 129;\n int imgs_count = 3;\n float h = 15.0f;\n int templateWindowSize = 7;\n int searchWindowSize = 21;\n };\n\n static double computePSNR(const cv::Mat& I1, const cv::Mat& I2) {\n CV_Assert(I1.type() == I2.type() && I1.size() == I2.size());\n cv::Mat s1;\n cv::absdiff(I1, I2, s1);\n s1.convertTo(s1, CV_32F);\n s1 = s1.mul(s1);\n cv::Scalar s = cv::sum(s1);\n double mse = s[0] / static_cast(I1.total());\n\n if (mse == 0) return INFINITY;\n\n double max_pixel = 65535.0;\n return 10.0 * log10((max_pixel * max_pixel) / mse);\n }", - "repo_full_name": "opencv/opencv", - "discussion_comments": [ - { - "comment_id": "1930610880", - "repo_full_name": "opencv/opencv", - "pr_number": 26831, - "pr_file": "modules/photo/test/test_denoising.cpp", - "discussion_id": "1930610880", - "commented_code": "@@ -164,5 +164,80 @@ TEST(Photo_Denoising, speed)\n t = (double)getTickCount() - t;\n printf(\"execution time: %gms\\n\", t*1000./getTickFrequency());\n }\n+// Related issue :\n+// - https://github.com/opencv/opencv/issues/26582\n+class Photo_DenoisingGrayscaleMulti16Bit : public ::testing::Test {\n+protected:\n+ struct TestConfig {\n+ int width = 127;\n+ int height = 129;\n+ int imgs_count = 3;\n+ float h = 15.0f;\n+ int templateWindowSize = 7;\n+ int searchWindowSize = 21;\n+ };\n+\n+ static double computePSNR(const cv::Mat& I1, const cv::Mat& I2) {\n+ CV_Assert(I1.type() == I2.type() && I1.size() == I2.size());\n+ cv::Mat s1;\n+ cv::absdiff(I1, I2, s1);\n+ s1.convertTo(s1, CV_32F);\n+ s1 = s1.mul(s1);\n+ cv::Scalar s = cv::sum(s1);\n+ double mse = s[0] / static_cast(I1.total());\n+\n+ if (mse == 0) return INFINITY;\n+\n+ double max_pixel = 65535.0;\n+ return 10.0 * log10((max_pixel * max_pixel) / mse);\n+ }", - "comment_created_at": "2025-01-27T14:22:49+00:00", - "comment_author": "asmorkalov", - "comment_body": "Please use cv::PSNR instead: https://docs.opencv.org/4.x/d2/de8/group__core__array.html#ga3119e3ea73010a6f810bb05aa36ac8d6", - "pr_file_module": null - }, - { - "comment_id": "1930621103", - "repo_full_name": "opencv/opencv", - "pr_number": 26831, - "pr_file": "modules/photo/test/test_denoising.cpp", - "discussion_id": "1930610880", - "commented_code": "@@ -164,5 +164,80 @@ TEST(Photo_Denoising, speed)\n t = (double)getTickCount() - t;\n printf(\"execution time: %gms\\n\", t*1000./getTickFrequency());\n }\n+// Related issue :\n+// - https://github.com/opencv/opencv/issues/26582\n+class Photo_DenoisingGrayscaleMulti16Bit : public ::testing::Test {\n+protected:\n+ struct TestConfig {\n+ int width = 127;\n+ int height = 129;\n+ int imgs_count = 3;\n+ float h = 15.0f;\n+ int templateWindowSize = 7;\n+ int searchWindowSize = 21;\n+ };\n+\n+ static double computePSNR(const cv::Mat& I1, const cv::Mat& I2) {\n+ CV_Assert(I1.type() == I2.type() && I1.size() == I2.size());\n+ cv::Mat s1;\n+ cv::absdiff(I1, I2, s1);\n+ s1.convertTo(s1, CV_32F);\n+ s1 = s1.mul(s1);\n+ cv::Scalar s = cv::sum(s1);\n+ double mse = s[0] / static_cast(I1.total());\n+\n+ if (mse == 0) return INFINITY;\n+\n+ double max_pixel = 65535.0;\n+ return 10.0 * log10((max_pixel * max_pixel) / mse);\n+ }", - "comment_created_at": "2025-01-27T14:29:00+00:00", - "comment_author": "shyama7004", - "comment_body": "Ok, I will .", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1915996982", - "pr_number": 26773, - "pr_file": "modules/imgproc/src/shapedescr.cpp", - "created_at": "2025-01-15T06:18:42+00:00", - "commented_code": "static inline Point2f getOfs(int i, float eps)\n{\n return Point2f(((i & 1)*2 - 1)*eps, ((i & 2) - 1)*eps);\n std::mt19937 gen(i);\n std::uniform_real_distribution dis(-eps, eps);\n return Point2f(dis(gen), dis(gen));", - "repo_full_name": "opencv/opencv", - "discussion_comments": [ - { - "comment_id": "1915996982", - "repo_full_name": "opencv/opencv", - "pr_number": 26773, - "pr_file": "modules/imgproc/src/shapedescr.cpp", - "discussion_id": "1915996982", - "commented_code": "@@ -342,7 +342,9 @@ namespace cv\n \n static inline Point2f getOfs(int i, float eps)\n {\n- return Point2f(((i & 1)*2 - 1)*eps, ((i & 2) - 1)*eps);\n+ std::mt19937 gen(i);\n+ std::uniform_real_distribution dis(-eps, eps);\n+ return Point2f(dis(gen), dis(gen));", - "comment_created_at": "2025-01-15T06:18:42+00:00", - "comment_author": "asmorkalov", - "comment_body": "I propose to use cv::RNG for it to make it manageable outside:\r\n- User may set seed to get deterministic behaviour\r\n- OpenCV test system fixes cv::RNG seed for each test independently.", - "pr_file_module": null - }, - { - "comment_id": "1917342515", - "repo_full_name": "opencv/opencv", - "pr_number": 26773, - "pr_file": "modules/imgproc/src/shapedescr.cpp", - "discussion_id": "1915996982", - "commented_code": "@@ -342,7 +342,9 @@ namespace cv\n \n static inline Point2f getOfs(int i, float eps)\n {\n- return Point2f(((i & 1)*2 - 1)*eps, ((i & 2) - 1)*eps);\n+ std::mt19937 gen(i);\n+ std::uniform_real_distribution dis(-eps, eps);\n+ return Point2f(dis(gen), dis(gen));", - "comment_created_at": "2025-01-15T21:14:07+00:00", - "comment_author": "MaximSmolskiy", - "comment_body": "Used `cv::RNG`", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/opencv-use-proper-assertions.json b/_reviewers/opencv-use-proper-assertions.json new file mode 100644 index 0000000..7a5ab30 --- /dev/null +++ b/_reviewers/opencv-use-proper-assertions.json @@ -0,0 +1,192 @@ +[ + { + "discussion_id": "2024750669", + "pr_number": 27051, + "pr_file": "modules/photo/perf/perf_ccm.cpp", + "created_at": "2025-04-02T12:44:24+00:00", + "commented_code": "// This file is part of OpenCV project.\n// It is subject to the license terms in the LICENSE file found in the top-level directory\n// of this distribution and at http://opencv.org/license.html.\n\n#include \"perf_precomp.hpp\"\n#include \"opencv2/photo.hpp\"\n\nnamespace opencv_test\n{\nnamespace\n{\n\nusing namespace std;\nusing namespace cv::ccm;\n\nPERF_TEST(CV_mcc_perf, infer) {\n // read gold chartsRGB\n string path = cvtest::findDataFile(\"cv/mcc/mcc_ccm_test.yml\");\n FileStorage fs(path, FileStorage::READ);\n Mat chartsRGB;\n FileNode node = fs[\"chartsRGB\"];\n node >> chartsRGB;\n fs.release();", + "repo_full_name": "opencv/opencv", + "discussion_comments": [ + { + "comment_id": "2024750669", + "repo_full_name": "opencv/opencv", + "pr_number": 27051, + "pr_file": "modules/photo/perf/perf_ccm.cpp", + "discussion_id": "2024750669", + "commented_code": "@@ -0,0 +1,41 @@\n+// This file is part of OpenCV project.\n+// It is subject to the license terms in the LICENSE file found in the top-level directory\n+// of this distribution and at http://opencv.org/license.html.\n+\n+#include \"perf_precomp.hpp\"\n+#include \"opencv2/photo.hpp\"\n+\n+namespace opencv_test\n+{\n+namespace\n+{\n+\n+using namespace std;\n+using namespace cv::ccm;\n+\n+PERF_TEST(CV_mcc_perf, infer) {\n+ // read gold chartsRGB\n+ string path = cvtest::findDataFile(\"cv/mcc/mcc_ccm_test.yml\");\n+ FileStorage fs(path, FileStorage::READ);\n+ Mat chartsRGB;\n+ FileNode node = fs[\"chartsRGB\"];\n+ node >> chartsRGB;\n+ fs.release();", + "comment_created_at": "2025-04-02T12:44:24+00:00", + "comment_author": "asmorkalov", + "comment_body": "`ASSERT_FALSE(chartsRGB.empty());`", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2091413835", + "pr_number": 27051, + "pr_file": "modules/photo/perf/perf_ccm.cpp", + "created_at": "2025-05-15T15:07:08+00:00", + "commented_code": "// This file is part of OpenCV project.\n// It is subject to the license terms in the LICENSE file found in the top-level directory\n// of this distribution and at http://opencv.org/license.html.\n\n#include \"perf_precomp.hpp\"\n#include \"opencv2/photo.hpp\"\n\nnamespace opencv_test {\nnamespace {\n\nusing namespace cv;\nusing namespace std;\n\nPERF_TEST(CV_ccm_perf_480_640, correctImage)\n{\n string path = cvtest::findDataFile(\"cv/mcc/mcc_ccm_test.yml\");\n FileStorage fs(path, FileStorage::READ);\n Mat chartsRGB;\n fs[\"chartsRGB\"] >> chartsRGB;\n fs.release();\n\n cv::ccm::ColorCorrectionModel model(\n chartsRGB.col(1).clone().reshape(3, chartsRGB.rows/3) / 255.0,\n cv::ccm::COLORCHECKER_MACBETH\n );\n model.compute();\n Mat img(480, 640, CV_8UC3);\n randu(img, 0, 255);\n img.convertTo(img, CV_64F, 1.0/255.0);\n\n Mat correctedImage;\n TEST_CYCLE() { model.correctImage(img, correctedImage); }\n SANITY_CHECK_NOTHING();\n}\n\nPERF_TEST(CV_ccm_perf_720_1280, correctImage)\n{\n string path = cvtest::findDataFile(\"cv/mcc/mcc_ccm_test.yml\");\n FileStorage fs(path, FileStorage::READ);\n Mat chartsRGB;\n fs[\"chartsRGB\"] >> chartsRGB;", + "repo_full_name": "opencv/opencv", + "discussion_comments": [ + { + "comment_id": "2091413835", + "repo_full_name": "opencv/opencv", + "pr_number": 27051, + "pr_file": "modules/photo/perf/perf_ccm.cpp", + "discussion_id": "2091413835", + "commented_code": "@@ -0,0 +1,103 @@\n+// This file is part of OpenCV project.\n+// It is subject to the license terms in the LICENSE file found in the top-level directory\n+// of this distribution and at http://opencv.org/license.html.\n+\n+#include \"perf_precomp.hpp\"\n+#include \"opencv2/photo.hpp\"\n+\n+namespace opencv_test {\n+namespace {\n+\n+using namespace cv;\n+using namespace std;\n+\n+PERF_TEST(CV_ccm_perf_480_640, correctImage)\n+{\n+ string path = cvtest::findDataFile(\"cv/mcc/mcc_ccm_test.yml\");\n+ FileStorage fs(path, FileStorage::READ);\n+ Mat chartsRGB;\n+ fs[\"chartsRGB\"] >> chartsRGB;\n+ fs.release();\n+\n+ cv::ccm::ColorCorrectionModel model(\n+ chartsRGB.col(1).clone().reshape(3, chartsRGB.rows/3) / 255.0,\n+ cv::ccm::COLORCHECKER_MACBETH\n+ );\n+ model.compute();\n+ Mat img(480, 640, CV_8UC3);\n+ randu(img, 0, 255);\n+ img.convertTo(img, CV_64F, 1.0/255.0);\n+\n+ Mat correctedImage;\n+ TEST_CYCLE() { model.correctImage(img, correctedImage); }\n+ SANITY_CHECK_NOTHING();\n+}\n+\n+PERF_TEST(CV_ccm_perf_720_1280, correctImage)\n+{\n+ string path = cvtest::findDataFile(\"cv/mcc/mcc_ccm_test.yml\");\n+ FileStorage fs(path, FileStorage::READ);\n+ Mat chartsRGB;\n+ fs[\"chartsRGB\"] >> chartsRGB;", + "comment_created_at": "2025-05-15T15:07:08+00:00", + "comment_author": "asmorkalov", + "comment_body": "`ASSERT_FALSE(chartsRGB.empty());` here and bellow.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2110097906", + "pr_number": 27362, + "pr_file": "modules/core/test/test_mat.cpp", + "created_at": "2025-05-27T20:07:08+00:00", + "commented_code": "EXPECT_EQ(25u, m2.total());\n}\n\nTEST(Core_Mat, empty)\n{\n // Should not crash.\n uint8_t data[2] = {0, 1};\n cv::Mat mat_nd(/*ndims=*/0, /*sizes=*/nullptr, CV_8UC1, /*data=*/data);\n cv::Mat1b mat(0, 0, /*data=*/data, /*steps=*/1);\n}", + "repo_full_name": "opencv/opencv", + "discussion_comments": [ + { + "comment_id": "2110097906", + "repo_full_name": "opencv/opencv", + "pr_number": 27362, + "pr_file": "modules/core/test/test_mat.cpp", + "discussion_id": "2110097906", + "commented_code": "@@ -1446,6 +1446,14 @@ TEST(Core_Mat, regression_9507)\n EXPECT_EQ(25u, m2.total());\n }\n \n+TEST(Core_Mat, empty)\n+{\n+ // Should not crash.\n+ uint8_t data[2] = {0, 1};\n+ cv::Mat mat_nd(/*ndims=*/0, /*sizes=*/nullptr, CV_8UC1, /*data=*/data);\n+ cv::Mat1b mat(0, 0, /*data=*/data, /*steps=*/1);\n+}", + "comment_created_at": "2025-05-27T20:07:08+00:00", + "comment_author": "vpisarev", + "comment_body": "suggest to add some checks to the test:\r\n\r\n```\r\nTEST(Core_Mat, empty)\r\n{\r\n // Should not crash.\r\n uint8_t data[2] = {0, 1};\r\n cv::Mat mat_nd(/*ndims=*/0, /*sizes=*/nullptr, CV_8UC1, /*data=*/data);\r\n cv::Mat1b mat(0, 0, /*data=*/data, /*steps=*/1);\r\n EXPECT_EQ(mat_nd.dims, 0);\r\n EXPECT_EQ(mat.dims, 2);\r\n EXPECT_LE(mat_nd.total(), 1u); // 0 for 4.x, 1 for 5.x\r\n EXPECT_EQ(mat.total(), 0u);\r\n EXPECT_TRUE(mat.empty());\r\n // mat_nd.empty() should return true for 4.x and false for 5.x\r\n}\r\n```\r\n", + "pr_file_module": null + }, + { + "comment_id": "2115278244", + "repo_full_name": "opencv/opencv", + "pr_number": 27362, + "pr_file": "modules/core/test/test_mat.cpp", + "discussion_id": "2110097906", + "commented_code": "@@ -1446,6 +1446,14 @@ TEST(Core_Mat, regression_9507)\n EXPECT_EQ(25u, m2.total());\n }\n \n+TEST(Core_Mat, empty)\n+{\n+ // Should not crash.\n+ uint8_t data[2] = {0, 1};\n+ cv::Mat mat_nd(/*ndims=*/0, /*sizes=*/nullptr, CV_8UC1, /*data=*/data);\n+ cv::Mat1b mat(0, 0, /*data=*/data, /*steps=*/1);\n+}", + "comment_created_at": "2025-05-30T07:09:08+00:00", + "comment_author": "opencv-alalek", + "comment_body": "> // 0 for 4.x, 1 for 5.x\r\n\r\nDo we really to introduce support in OpenCV 4.x for functionality which has different behavior in OpenCV 5.x.", + "pr_file_module": null + }, + { + "comment_id": "2120384779", + "repo_full_name": "opencv/opencv", + "pr_number": 27362, + "pr_file": "modules/core/test/test_mat.cpp", + "discussion_id": "2110097906", + "commented_code": "@@ -1446,6 +1446,14 @@ TEST(Core_Mat, regression_9507)\n EXPECT_EQ(25u, m2.total());\n }\n \n+TEST(Core_Mat, empty)\n+{\n+ // Should not crash.\n+ uint8_t data[2] = {0, 1};\n+ cv::Mat mat_nd(/*ndims=*/0, /*sizes=*/nullptr, CV_8UC1, /*data=*/data);\n+ cv::Mat1b mat(0, 0, /*data=*/data, /*steps=*/1);\n+}", + "comment_created_at": "2025-06-02T08:14:36+00:00", + "comment_author": "vrabaud", + "comment_body": "I fixed the test. @vpisarev , please decide with @opencv-alalek if it worth adding the test on total and empty.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1956243656", + "pr_number": 26907, + "pr_file": "modules/features2d/test/test_fast.cpp", + "created_at": "2025-02-14T14:32:43+00:00", + "commented_code": "TEST(Features2d_FAST, regression) { CV_FastTest test; test.safe_run(); }\n\n// #define DUMP_TEST_DATA\n\nTEST(Features2d_FAST, noNMS)\n{\n Mat img = imread(string(cvtest::TS::ptr()->get_data_path()) + \"inpaint/orig.png\", cv::IMREAD_GRAYSCALE);\n string xml = string(cvtest::TS::ptr()->get_data_path()) + \"fast/result_no_nonmax.xml\";\n\n vector keypoints;\n FAST(img, keypoints, 100, false, FastFeatureDetector::DetectorType::TYPE_9_16);\n Mat kps(1, (int)(keypoints.size() * sizeof(KeyPoint)), CV_8U, &keypoints[0]);\n\n Mat gt_kps;\n FileStorage fs(xml, FileStorage::READ);\n#ifdef DUMP_TEST_DATA\n if (!fs.isOpened())\n {\n fs.open(xml, FileStorage::WRITE);\n fs << \"exp_kps\" << kps;\n fs.release();\n fs.open(xml, FileStorage::READ);\n }\n#endif\n EXPECT_TRUE(fs.isOpened());", + "repo_full_name": "opencv/opencv", + "discussion_comments": [ + { + "comment_id": "1956243656", + "repo_full_name": "opencv/opencv", + "pr_number": 26907, + "pr_file": "modules/features2d/test/test_fast.cpp", + "discussion_id": "1956243656", + "commented_code": "@@ -135,4 +135,34 @@ void CV_FastTest::run( int )\n \n TEST(Features2d_FAST, regression) { CV_FastTest test; test.safe_run(); }\n \n+// #define DUMP_TEST_DATA\n+\n+TEST(Features2d_FAST, noNMS)\n+{\n+ Mat img = imread(string(cvtest::TS::ptr()->get_data_path()) + \"inpaint/orig.png\", cv::IMREAD_GRAYSCALE);\n+ string xml = string(cvtest::TS::ptr()->get_data_path()) + \"fast/result_no_nonmax.xml\";\n+\n+ vector keypoints;\n+ FAST(img, keypoints, 100, false, FastFeatureDetector::DetectorType::TYPE_9_16);\n+ Mat kps(1, (int)(keypoints.size() * sizeof(KeyPoint)), CV_8U, &keypoints[0]);\n+\n+ Mat gt_kps;\n+ FileStorage fs(xml, FileStorage::READ);\n+#ifdef DUMP_TEST_DATA\n+ if (!fs.isOpened())\n+ {\n+ fs.open(xml, FileStorage::WRITE);\n+ fs << \"exp_kps\" << kps;\n+ fs.release();\n+ fs.open(xml, FileStorage::READ);\n+ }\n+#endif\n+ EXPECT_TRUE(fs.isOpened());", + "comment_created_at": "2025-02-14T14:32:43+00:00", + "comment_author": "mshabunin", + "comment_body": "Maybe `ASSERT_TRUE` would be better in this case? Here and on line 163 (`ASSERT_GT`).", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1940521696", + "pr_number": 26872, + "pr_file": "modules/imgcodecs/perf/perf_decode_encode.cpp", + "created_at": "2025-02-04T05:20:31+00:00", + "commented_code": "// This file is part of OpenCV project.\n// It is subject to the license terms in the LICENSE file found in the top-level directory\n// of this distribution and at http://opencv.org/license.html\n\n#include \"perf_precomp.hpp\"\n\nnamespace opencv_test\n{\n\n#ifdef HAVE_PNG\n\nusing namespace perf;\n\ntypedef perf::TestBaseWithParam Decode;\ntypedef perf::TestBaseWithParam Encode;\n\nconst string exts[] = {\n#ifdef HAVE_AVIF\n \".avif\",\n#endif\n \".bmp\",\n#ifdef HAVE_IMGCODEC_GIF\n \".gif\",\n#endif\n#if (defined(HAVE_JASPER) && defined(OPENCV_IMGCODECS_ENABLE_JASPER_TESTS)) \\\n || defined(HAVE_OPENJPEG)\n \".jp2\",\n#endif\n#ifdef HAVE_JPEG\n \".jpg\",\n#endif\n#ifdef HAVE_JPEGXL\n \".jxl\",\n#endif\n \".png\",\n#ifdef HAVE_IMGCODEC_PXM\n \".ppm\",\n#endif\n#ifdef HAVE_IMGCODEC_SUNRASTER\n \".ras\",\n#endif\n#ifdef HAVE_TIFF\n \".tiff\",\n#endif\n#ifdef HAVE_WEBP\n \".webp\",\n#endif\n};\n\nconst string exts_multi[] = {\n#ifdef HAVE_AVIF\n \".avif\",\n#endif\n#ifdef HAVE_IMGCODEC_GIF\n \".gif\",\n#endif\n \".png\",\n#ifdef HAVE_TIFF\n \".tiff\",\n#endif\n#ifdef HAVE_WEBP\n \".webp\",\n#endif\n};\n\nPERF_TEST_P(Decode, bgr, testing::ValuesIn(exts))\n{\n String filename = getDataPath(\"perf/1920x1080.png\");\n\n Mat src = imread(filename);\n vector buf;\n EXPECT_TRUE(imencode(GetParam(), src, buf));\n\n TEST_CYCLE() imdecode(buf, IMREAD_UNCHANGED);\n\n SANITY_CHECK_NOTHING();\n}\n\nPERF_TEST_P(Decode, rgb, testing::ValuesIn(exts))\n{\n String filename = getDataPath(\"perf/1920x1080.png\");\n\n Mat src = imread(filename);", + "repo_full_name": "opencv/opencv", + "discussion_comments": [ + { + "comment_id": "1940521696", + "repo_full_name": "opencv/opencv", + "pr_number": 26872, + "pr_file": "modules/imgcodecs/perf/perf_decode_encode.cpp", + "discussion_id": "1940521696", + "commented_code": "@@ -0,0 +1,127 @@\n+// This file is part of OpenCV project.\n+// It is subject to the license terms in the LICENSE file found in the top-level directory\n+// of this distribution and at http://opencv.org/license.html\n+\n+#include \"perf_precomp.hpp\"\n+\n+namespace opencv_test\n+{\n+\n+#ifdef HAVE_PNG\n+\n+using namespace perf;\n+\n+typedef perf::TestBaseWithParam Decode;\n+typedef perf::TestBaseWithParam Encode;\n+\n+const string exts[] = {\n+#ifdef HAVE_AVIF\n+ \".avif\",\n+#endif\n+ \".bmp\",\n+#ifdef HAVE_IMGCODEC_GIF\n+ \".gif\",\n+#endif\n+#if (defined(HAVE_JASPER) && defined(OPENCV_IMGCODECS_ENABLE_JASPER_TESTS)) \\\n+ || defined(HAVE_OPENJPEG)\n+ \".jp2\",\n+#endif\n+#ifdef HAVE_JPEG\n+ \".jpg\",\n+#endif\n+#ifdef HAVE_JPEGXL\n+ \".jxl\",\n+#endif\n+ \".png\",\n+#ifdef HAVE_IMGCODEC_PXM\n+ \".ppm\",\n+#endif\n+#ifdef HAVE_IMGCODEC_SUNRASTER\n+ \".ras\",\n+#endif\n+#ifdef HAVE_TIFF\n+ \".tiff\",\n+#endif\n+#ifdef HAVE_WEBP\n+ \".webp\",\n+#endif\n+};\n+\n+const string exts_multi[] = {\n+#ifdef HAVE_AVIF\n+ \".avif\",\n+#endif\n+#ifdef HAVE_IMGCODEC_GIF\n+ \".gif\",\n+#endif\n+ \".png\",\n+#ifdef HAVE_TIFF\n+ \".tiff\",\n+#endif\n+#ifdef HAVE_WEBP\n+ \".webp\",\n+#endif\n+};\n+\n+PERF_TEST_P(Decode, bgr, testing::ValuesIn(exts))\n+{\n+ String filename = getDataPath(\"perf/1920x1080.png\");\n+\n+ Mat src = imread(filename);\n+ vector buf;\n+ EXPECT_TRUE(imencode(GetParam(), src, buf));\n+\n+ TEST_CYCLE() imdecode(buf, IMREAD_UNCHANGED);\n+\n+ SANITY_CHECK_NOTHING();\n+}\n+\n+PERF_TEST_P(Decode, rgb, testing::ValuesIn(exts))\n+{\n+ String filename = getDataPath(\"perf/1920x1080.png\");\n+\n+ Mat src = imread(filename);", + "comment_created_at": "2025-02-04T05:20:31+00:00", + "comment_author": "asmorkalov", + "comment_body": "`EXPECT_FALSE(src.empty()) << Cannot open test image perf/1920x1080.png;`", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1436144317", + "pr_number": 21407, + "pr_file": "modules/imgproc/test/test_houghlines.cpp", + "created_at": "2023-12-25T19:32:31+00:00", + "commented_code": "EXPECT_NEAR(lines[0][1], 1.57179642, 1e-4);\n}\n\nTEST(WeightedHoughLines, horizontal)\n{\n Mat img(25, 25, CV_8UC1, Scalar(0));\n // draw lines. from top to bottom, stronger to weaker.\n line(img, Point(0, 6), Point(25, 6), Scalar(255));\n line(img, Point(0, 12), Point(25, 12), Scalar(254));\n line(img, Point(0, 18), Point(25, 18), Scalar(253));\n \n // detect lines\n std::vector lines;\n int threshold{253*25-1};\n bool use_edgeval{true};\n HoughLines(img, lines, 1, CV_PI/180, threshold, 0, 0, 0.0, CV_PI, use_edgeval);\n \n // check results\n ASSERT_EQ(lines.size(), 3U);\n // detected lines is assumed sorted from stronger to weaker. \n EXPECT_EQ(lines[0][0], 6);\n EXPECT_EQ(lines[1][0], 12);\n EXPECT_EQ(lines[2][0], 18);", + "repo_full_name": "opencv/opencv", + "discussion_comments": [ + { + "comment_id": "1436144317", + "repo_full_name": "opencv/opencv", + "pr_number": 21407, + "pr_file": "modules/imgproc/test/test_houghlines.cpp", + "discussion_id": "1436144317", + "commented_code": "@@ -340,6 +340,53 @@ TEST(HoughLines, regression_21983)\n EXPECT_NEAR(lines[0][1], 1.57179642, 1e-4);\n }\n \n+TEST(WeightedHoughLines, horizontal)\n+{\n+ Mat img(25, 25, CV_8UC1, Scalar(0));\n+ // draw lines. from top to bottom, stronger to weaker.\n+ line(img, Point(0, 6), Point(25, 6), Scalar(255));\n+ line(img, Point(0, 12), Point(25, 12), Scalar(254));\n+ line(img, Point(0, 18), Point(25, 18), Scalar(253));\n+ \n+ // detect lines\n+ std::vector lines;\n+ int threshold{253*25-1};\n+ bool use_edgeval{true};\n+ HoughLines(img, lines, 1, CV_PI/180, threshold, 0, 0, 0.0, CV_PI, use_edgeval);\n+ \n+ // check results\n+ ASSERT_EQ(lines.size(), 3U);\n+ // detected lines is assumed sorted from stronger to weaker. \n+ EXPECT_EQ(lines[0][0], 6);\n+ EXPECT_EQ(lines[1][0], 12);\n+ EXPECT_EQ(lines[2][0], 18);", + "comment_created_at": "2023-12-25T19:32:31+00:00", + "comment_author": "opencv-alalek", + "comment_body": "Signature for `_EQ` is `ASSERT_EQ(expected_reference, actual_result);`.\r\nCorrect order of parameters are required to emit valid error messages.\r\n\r\nReference: https://github.com/opencv/opencv/blob/4.0.0/modules/ts/include/opencv2/ts/ts_gtest.h#L8196-L8200\r\n\r\n```\r\nGTEST_API_ AssertionResult EqFailure(const char* expected_expression,\r\n const char* actual_expression,\r\n const std::string& expected_value,\r\n const std::string& actual_value,\r\n bool ignoring_case);\r\n```", + "pr_file_module": null + }, + { + "comment_id": "1881420274", + "repo_full_name": "opencv/opencv", + "pr_number": 21407, + "pr_file": "modules/imgproc/test/test_houghlines.cpp", + "discussion_id": "1436144317", + "commented_code": "@@ -340,6 +340,53 @@ TEST(HoughLines, regression_21983)\n EXPECT_NEAR(lines[0][1], 1.57179642, 1e-4);\n }\n \n+TEST(WeightedHoughLines, horizontal)\n+{\n+ Mat img(25, 25, CV_8UC1, Scalar(0));\n+ // draw lines. from top to bottom, stronger to weaker.\n+ line(img, Point(0, 6), Point(25, 6), Scalar(255));\n+ line(img, Point(0, 12), Point(25, 12), Scalar(254));\n+ line(img, Point(0, 18), Point(25, 18), Scalar(253));\n+ \n+ // detect lines\n+ std::vector lines;\n+ int threshold{253*25-1};\n+ bool use_edgeval{true};\n+ HoughLines(img, lines, 1, CV_PI/180, threshold, 0, 0, 0.0, CV_PI, use_edgeval);\n+ \n+ // check results\n+ ASSERT_EQ(lines.size(), 3U);\n+ // detected lines is assumed sorted from stronger to weaker. \n+ EXPECT_EQ(lines[0][0], 6);\n+ EXPECT_EQ(lines[1][0], 12);\n+ EXPECT_EQ(lines[2][0], 18);", + "comment_created_at": "2024-12-12T05:50:54+00:00", + "comment_author": "MasahiroOgawa", + "comment_body": "I'm sorry. I fixed it.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1789212179", + "pr_number": 26254, + "pr_file": "modules/core/test/test_mat.cpp", + "created_at": "2024-10-06T19:08:34+00:00", + "commented_code": "EXPECT_EQ(s, \"InputArray: empty()=true kind=0x00010000 flags=0x01010000 total(-1)=0 dims(-1)=0 size(-1)=0x0 type(-1)=CV_8UC1\");\n}\n\nTEST(Mat, reshape_1d)\n{\n std::vector v = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9};\n Mat m = Mat(v).reshape(0, 2);\n\n int newrows = 2;\n EXPECT_EQ(m.dims, newrows);\n EXPECT_EQ(m.type(), CV_32S);\n EXPECT_EQ(m.ptr(), &v[0]);\n EXPECT_EQ(m.rows, newrows);\n EXPECT_EQ(m.total(), v.size());\n\n int sz[] = {(int)v.size()};\n Mat m1 = m.reshape(0, 1, sz);\n EXPECT_EQ(m1.dims, 1);\n EXPECT_EQ(m1.type(), CV_32S);", + "repo_full_name": "opencv/opencv", + "discussion_comments": [ + { + "comment_id": "1789212179", + "repo_full_name": "opencv/opencv", + "pr_number": 26254, + "pr_file": "modules/core/test/test_mat.cpp", + "discussion_id": "1789212179", + "commented_code": "@@ -2647,4 +2647,77 @@ TEST(InputArray, dumpEmpty)\n EXPECT_EQ(s, \"InputArray: empty()=true kind=0x00010000 flags=0x01010000 total(-1)=0 dims(-1)=0 size(-1)=0x0 type(-1)=CV_8UC1\");\n }\n \n+TEST(Mat, reshape_1d)\n+{\n+ std::vector v = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9};\n+ Mat m = Mat(v).reshape(0, 2);\n+\n+ int newrows = 2;\n+ EXPECT_EQ(m.dims, newrows);\n+ EXPECT_EQ(m.type(), CV_32S);\n+ EXPECT_EQ(m.ptr(), &v[0]);\n+ EXPECT_EQ(m.rows, newrows);\n+ EXPECT_EQ(m.total(), v.size());\n+\n+ int sz[] = {(int)v.size()};\n+ Mat m1 = m.reshape(0, 1, sz);\n+ EXPECT_EQ(m1.dims, 1);\n+ EXPECT_EQ(m1.type(), CV_32S);", + "comment_created_at": "2024-10-06T19:08:34+00:00", + "comment_author": "opencv-alalek", + "comment_body": "Signature for `_EQ` is `ASSERT_EQ(expected_reference, actual_result);`.\r\nCorrect order of parameters are required to emit valid error messages.\r\n\r\nReference: https://github.com/opencv/opencv/blob/4.0.0/modules/ts/include/opencv2/ts/ts_gtest.h#L8196-L8200\r\n\r\n```\r\nGTEST_API_ AssertionResult EqFailure(const char* expected_expression,\r\n const char* actual_expression,\r\n const std::string& expected_value,\r\n const std::string& actual_value,\r\n bool ignoring_case);\r\n```", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/opencv-use-proper-assertions.md b/_reviewers/opencv-use-proper-assertions.md index e15e284..16fd0fa 100644 --- a/_reviewers/opencv-use-proper-assertions.md +++ b/_reviewers/opencv-use-proper-assertions.md @@ -47,197 +47,3 @@ EXPECT_PRED_FORMAT2(cvtest::MatComparator(0, 0), src_mat, dst_mat); ``` Well-written assertions make tests more reliable, easier to debug, and help identify the exact cause of failures. - - -[ - { - "discussion_id": "2024750669", - "pr_number": 27051, - "pr_file": "modules/photo/perf/perf_ccm.cpp", - "created_at": "2025-04-02T12:44:24+00:00", - "commented_code": "// This file is part of OpenCV project.\n// It is subject to the license terms in the LICENSE file found in the top-level directory\n// of this distribution and at http://opencv.org/license.html.\n\n#include \"perf_precomp.hpp\"\n#include \"opencv2/photo.hpp\"\n\nnamespace opencv_test\n{\nnamespace\n{\n\nusing namespace std;\nusing namespace cv::ccm;\n\nPERF_TEST(CV_mcc_perf, infer) {\n // read gold chartsRGB\n string path = cvtest::findDataFile(\"cv/mcc/mcc_ccm_test.yml\");\n FileStorage fs(path, FileStorage::READ);\n Mat chartsRGB;\n FileNode node = fs[\"chartsRGB\"];\n node >> chartsRGB;\n fs.release();", - "repo_full_name": "opencv/opencv", - "discussion_comments": [ - { - "comment_id": "2024750669", - "repo_full_name": "opencv/opencv", - "pr_number": 27051, - "pr_file": "modules/photo/perf/perf_ccm.cpp", - "discussion_id": "2024750669", - "commented_code": "@@ -0,0 +1,41 @@\n+// This file is part of OpenCV project.\n+// It is subject to the license terms in the LICENSE file found in the top-level directory\n+// of this distribution and at http://opencv.org/license.html.\n+\n+#include \"perf_precomp.hpp\"\n+#include \"opencv2/photo.hpp\"\n+\n+namespace opencv_test\n+{\n+namespace\n+{\n+\n+using namespace std;\n+using namespace cv::ccm;\n+\n+PERF_TEST(CV_mcc_perf, infer) {\n+ // read gold chartsRGB\n+ string path = cvtest::findDataFile(\"cv/mcc/mcc_ccm_test.yml\");\n+ FileStorage fs(path, FileStorage::READ);\n+ Mat chartsRGB;\n+ FileNode node = fs[\"chartsRGB\"];\n+ node >> chartsRGB;\n+ fs.release();", - "comment_created_at": "2025-04-02T12:44:24+00:00", - "comment_author": "asmorkalov", - "comment_body": "`ASSERT_FALSE(chartsRGB.empty());`", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2091413835", - "pr_number": 27051, - "pr_file": "modules/photo/perf/perf_ccm.cpp", - "created_at": "2025-05-15T15:07:08+00:00", - "commented_code": "// This file is part of OpenCV project.\n// It is subject to the license terms in the LICENSE file found in the top-level directory\n// of this distribution and at http://opencv.org/license.html.\n\n#include \"perf_precomp.hpp\"\n#include \"opencv2/photo.hpp\"\n\nnamespace opencv_test {\nnamespace {\n\nusing namespace cv;\nusing namespace std;\n\nPERF_TEST(CV_ccm_perf_480_640, correctImage)\n{\n string path = cvtest::findDataFile(\"cv/mcc/mcc_ccm_test.yml\");\n FileStorage fs(path, FileStorage::READ);\n Mat chartsRGB;\n fs[\"chartsRGB\"] >> chartsRGB;\n fs.release();\n\n cv::ccm::ColorCorrectionModel model(\n chartsRGB.col(1).clone().reshape(3, chartsRGB.rows/3) / 255.0,\n cv::ccm::COLORCHECKER_MACBETH\n );\n model.compute();\n Mat img(480, 640, CV_8UC3);\n randu(img, 0, 255);\n img.convertTo(img, CV_64F, 1.0/255.0);\n\n Mat correctedImage;\n TEST_CYCLE() { model.correctImage(img, correctedImage); }\n SANITY_CHECK_NOTHING();\n}\n\nPERF_TEST(CV_ccm_perf_720_1280, correctImage)\n{\n string path = cvtest::findDataFile(\"cv/mcc/mcc_ccm_test.yml\");\n FileStorage fs(path, FileStorage::READ);\n Mat chartsRGB;\n fs[\"chartsRGB\"] >> chartsRGB;", - "repo_full_name": "opencv/opencv", - "discussion_comments": [ - { - "comment_id": "2091413835", - "repo_full_name": "opencv/opencv", - "pr_number": 27051, - "pr_file": "modules/photo/perf/perf_ccm.cpp", - "discussion_id": "2091413835", - "commented_code": "@@ -0,0 +1,103 @@\n+// This file is part of OpenCV project.\n+// It is subject to the license terms in the LICENSE file found in the top-level directory\n+// of this distribution and at http://opencv.org/license.html.\n+\n+#include \"perf_precomp.hpp\"\n+#include \"opencv2/photo.hpp\"\n+\n+namespace opencv_test {\n+namespace {\n+\n+using namespace cv;\n+using namespace std;\n+\n+PERF_TEST(CV_ccm_perf_480_640, correctImage)\n+{\n+ string path = cvtest::findDataFile(\"cv/mcc/mcc_ccm_test.yml\");\n+ FileStorage fs(path, FileStorage::READ);\n+ Mat chartsRGB;\n+ fs[\"chartsRGB\"] >> chartsRGB;\n+ fs.release();\n+\n+ cv::ccm::ColorCorrectionModel model(\n+ chartsRGB.col(1).clone().reshape(3, chartsRGB.rows/3) / 255.0,\n+ cv::ccm::COLORCHECKER_MACBETH\n+ );\n+ model.compute();\n+ Mat img(480, 640, CV_8UC3);\n+ randu(img, 0, 255);\n+ img.convertTo(img, CV_64F, 1.0/255.0);\n+\n+ Mat correctedImage;\n+ TEST_CYCLE() { model.correctImage(img, correctedImage); }\n+ SANITY_CHECK_NOTHING();\n+}\n+\n+PERF_TEST(CV_ccm_perf_720_1280, correctImage)\n+{\n+ string path = cvtest::findDataFile(\"cv/mcc/mcc_ccm_test.yml\");\n+ FileStorage fs(path, FileStorage::READ);\n+ Mat chartsRGB;\n+ fs[\"chartsRGB\"] >> chartsRGB;", - "comment_created_at": "2025-05-15T15:07:08+00:00", - "comment_author": "asmorkalov", - "comment_body": "`ASSERT_FALSE(chartsRGB.empty());` here and bellow.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2110097906", - "pr_number": 27362, - "pr_file": "modules/core/test/test_mat.cpp", - "created_at": "2025-05-27T20:07:08+00:00", - "commented_code": "EXPECT_EQ(25u, m2.total());\n}\n\nTEST(Core_Mat, empty)\n{\n // Should not crash.\n uint8_t data[2] = {0, 1};\n cv::Mat mat_nd(/*ndims=*/0, /*sizes=*/nullptr, CV_8UC1, /*data=*/data);\n cv::Mat1b mat(0, 0, /*data=*/data, /*steps=*/1);\n}", - "repo_full_name": "opencv/opencv", - "discussion_comments": [ - { - "comment_id": "2110097906", - "repo_full_name": "opencv/opencv", - "pr_number": 27362, - "pr_file": "modules/core/test/test_mat.cpp", - "discussion_id": "2110097906", - "commented_code": "@@ -1446,6 +1446,14 @@ TEST(Core_Mat, regression_9507)\n EXPECT_EQ(25u, m2.total());\n }\n \n+TEST(Core_Mat, empty)\n+{\n+ // Should not crash.\n+ uint8_t data[2] = {0, 1};\n+ cv::Mat mat_nd(/*ndims=*/0, /*sizes=*/nullptr, CV_8UC1, /*data=*/data);\n+ cv::Mat1b mat(0, 0, /*data=*/data, /*steps=*/1);\n+}", - "comment_created_at": "2025-05-27T20:07:08+00:00", - "comment_author": "vpisarev", - "comment_body": "suggest to add some checks to the test:\r\n\r\n```\r\nTEST(Core_Mat, empty)\r\n{\r\n // Should not crash.\r\n uint8_t data[2] = {0, 1};\r\n cv::Mat mat_nd(/*ndims=*/0, /*sizes=*/nullptr, CV_8UC1, /*data=*/data);\r\n cv::Mat1b mat(0, 0, /*data=*/data, /*steps=*/1);\r\n EXPECT_EQ(mat_nd.dims, 0);\r\n EXPECT_EQ(mat.dims, 2);\r\n EXPECT_LE(mat_nd.total(), 1u); // 0 for 4.x, 1 for 5.x\r\n EXPECT_EQ(mat.total(), 0u);\r\n EXPECT_TRUE(mat.empty());\r\n // mat_nd.empty() should return true for 4.x and false for 5.x\r\n}\r\n```\r\n", - "pr_file_module": null - }, - { - "comment_id": "2115278244", - "repo_full_name": "opencv/opencv", - "pr_number": 27362, - "pr_file": "modules/core/test/test_mat.cpp", - "discussion_id": "2110097906", - "commented_code": "@@ -1446,6 +1446,14 @@ TEST(Core_Mat, regression_9507)\n EXPECT_EQ(25u, m2.total());\n }\n \n+TEST(Core_Mat, empty)\n+{\n+ // Should not crash.\n+ uint8_t data[2] = {0, 1};\n+ cv::Mat mat_nd(/*ndims=*/0, /*sizes=*/nullptr, CV_8UC1, /*data=*/data);\n+ cv::Mat1b mat(0, 0, /*data=*/data, /*steps=*/1);\n+}", - "comment_created_at": "2025-05-30T07:09:08+00:00", - "comment_author": "opencv-alalek", - "comment_body": "> // 0 for 4.x, 1 for 5.x\r\n\r\nDo we really to introduce support in OpenCV 4.x for functionality which has different behavior in OpenCV 5.x.", - "pr_file_module": null - }, - { - "comment_id": "2120384779", - "repo_full_name": "opencv/opencv", - "pr_number": 27362, - "pr_file": "modules/core/test/test_mat.cpp", - "discussion_id": "2110097906", - "commented_code": "@@ -1446,6 +1446,14 @@ TEST(Core_Mat, regression_9507)\n EXPECT_EQ(25u, m2.total());\n }\n \n+TEST(Core_Mat, empty)\n+{\n+ // Should not crash.\n+ uint8_t data[2] = {0, 1};\n+ cv::Mat mat_nd(/*ndims=*/0, /*sizes=*/nullptr, CV_8UC1, /*data=*/data);\n+ cv::Mat1b mat(0, 0, /*data=*/data, /*steps=*/1);\n+}", - "comment_created_at": "2025-06-02T08:14:36+00:00", - "comment_author": "vrabaud", - "comment_body": "I fixed the test. @vpisarev , please decide with @opencv-alalek if it worth adding the test on total and empty.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1956243656", - "pr_number": 26907, - "pr_file": "modules/features2d/test/test_fast.cpp", - "created_at": "2025-02-14T14:32:43+00:00", - "commented_code": "TEST(Features2d_FAST, regression) { CV_FastTest test; test.safe_run(); }\n\n// #define DUMP_TEST_DATA\n\nTEST(Features2d_FAST, noNMS)\n{\n Mat img = imread(string(cvtest::TS::ptr()->get_data_path()) + \"inpaint/orig.png\", cv::IMREAD_GRAYSCALE);\n string xml = string(cvtest::TS::ptr()->get_data_path()) + \"fast/result_no_nonmax.xml\";\n\n vector keypoints;\n FAST(img, keypoints, 100, false, FastFeatureDetector::DetectorType::TYPE_9_16);\n Mat kps(1, (int)(keypoints.size() * sizeof(KeyPoint)), CV_8U, &keypoints[0]);\n\n Mat gt_kps;\n FileStorage fs(xml, FileStorage::READ);\n#ifdef DUMP_TEST_DATA\n if (!fs.isOpened())\n {\n fs.open(xml, FileStorage::WRITE);\n fs << \"exp_kps\" << kps;\n fs.release();\n fs.open(xml, FileStorage::READ);\n }\n#endif\n EXPECT_TRUE(fs.isOpened());", - "repo_full_name": "opencv/opencv", - "discussion_comments": [ - { - "comment_id": "1956243656", - "repo_full_name": "opencv/opencv", - "pr_number": 26907, - "pr_file": "modules/features2d/test/test_fast.cpp", - "discussion_id": "1956243656", - "commented_code": "@@ -135,4 +135,34 @@ void CV_FastTest::run( int )\n \n TEST(Features2d_FAST, regression) { CV_FastTest test; test.safe_run(); }\n \n+// #define DUMP_TEST_DATA\n+\n+TEST(Features2d_FAST, noNMS)\n+{\n+ Mat img = imread(string(cvtest::TS::ptr()->get_data_path()) + \"inpaint/orig.png\", cv::IMREAD_GRAYSCALE);\n+ string xml = string(cvtest::TS::ptr()->get_data_path()) + \"fast/result_no_nonmax.xml\";\n+\n+ vector keypoints;\n+ FAST(img, keypoints, 100, false, FastFeatureDetector::DetectorType::TYPE_9_16);\n+ Mat kps(1, (int)(keypoints.size() * sizeof(KeyPoint)), CV_8U, &keypoints[0]);\n+\n+ Mat gt_kps;\n+ FileStorage fs(xml, FileStorage::READ);\n+#ifdef DUMP_TEST_DATA\n+ if (!fs.isOpened())\n+ {\n+ fs.open(xml, FileStorage::WRITE);\n+ fs << \"exp_kps\" << kps;\n+ fs.release();\n+ fs.open(xml, FileStorage::READ);\n+ }\n+#endif\n+ EXPECT_TRUE(fs.isOpened());", - "comment_created_at": "2025-02-14T14:32:43+00:00", - "comment_author": "mshabunin", - "comment_body": "Maybe `ASSERT_TRUE` would be better in this case? Here and on line 163 (`ASSERT_GT`).", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1940521696", - "pr_number": 26872, - "pr_file": "modules/imgcodecs/perf/perf_decode_encode.cpp", - "created_at": "2025-02-04T05:20:31+00:00", - "commented_code": "// This file is part of OpenCV project.\n// It is subject to the license terms in the LICENSE file found in the top-level directory\n// of this distribution and at http://opencv.org/license.html\n\n#include \"perf_precomp.hpp\"\n\nnamespace opencv_test\n{\n\n#ifdef HAVE_PNG\n\nusing namespace perf;\n\ntypedef perf::TestBaseWithParam Decode;\ntypedef perf::TestBaseWithParam Encode;\n\nconst string exts[] = {\n#ifdef HAVE_AVIF\n \".avif\",\n#endif\n \".bmp\",\n#ifdef HAVE_IMGCODEC_GIF\n \".gif\",\n#endif\n#if (defined(HAVE_JASPER) && defined(OPENCV_IMGCODECS_ENABLE_JASPER_TESTS)) \\\n || defined(HAVE_OPENJPEG)\n \".jp2\",\n#endif\n#ifdef HAVE_JPEG\n \".jpg\",\n#endif\n#ifdef HAVE_JPEGXL\n \".jxl\",\n#endif\n \".png\",\n#ifdef HAVE_IMGCODEC_PXM\n \".ppm\",\n#endif\n#ifdef HAVE_IMGCODEC_SUNRASTER\n \".ras\",\n#endif\n#ifdef HAVE_TIFF\n \".tiff\",\n#endif\n#ifdef HAVE_WEBP\n \".webp\",\n#endif\n};\n\nconst string exts_multi[] = {\n#ifdef HAVE_AVIF\n \".avif\",\n#endif\n#ifdef HAVE_IMGCODEC_GIF\n \".gif\",\n#endif\n \".png\",\n#ifdef HAVE_TIFF\n \".tiff\",\n#endif\n#ifdef HAVE_WEBP\n \".webp\",\n#endif\n};\n\nPERF_TEST_P(Decode, bgr, testing::ValuesIn(exts))\n{\n String filename = getDataPath(\"perf/1920x1080.png\");\n\n Mat src = imread(filename);\n vector buf;\n EXPECT_TRUE(imencode(GetParam(), src, buf));\n\n TEST_CYCLE() imdecode(buf, IMREAD_UNCHANGED);\n\n SANITY_CHECK_NOTHING();\n}\n\nPERF_TEST_P(Decode, rgb, testing::ValuesIn(exts))\n{\n String filename = getDataPath(\"perf/1920x1080.png\");\n\n Mat src = imread(filename);", - "repo_full_name": "opencv/opencv", - "discussion_comments": [ - { - "comment_id": "1940521696", - "repo_full_name": "opencv/opencv", - "pr_number": 26872, - "pr_file": "modules/imgcodecs/perf/perf_decode_encode.cpp", - "discussion_id": "1940521696", - "commented_code": "@@ -0,0 +1,127 @@\n+// This file is part of OpenCV project.\n+// It is subject to the license terms in the LICENSE file found in the top-level directory\n+// of this distribution and at http://opencv.org/license.html\n+\n+#include \"perf_precomp.hpp\"\n+\n+namespace opencv_test\n+{\n+\n+#ifdef HAVE_PNG\n+\n+using namespace perf;\n+\n+typedef perf::TestBaseWithParam Decode;\n+typedef perf::TestBaseWithParam Encode;\n+\n+const string exts[] = {\n+#ifdef HAVE_AVIF\n+ \".avif\",\n+#endif\n+ \".bmp\",\n+#ifdef HAVE_IMGCODEC_GIF\n+ \".gif\",\n+#endif\n+#if (defined(HAVE_JASPER) && defined(OPENCV_IMGCODECS_ENABLE_JASPER_TESTS)) \\\n+ || defined(HAVE_OPENJPEG)\n+ \".jp2\",\n+#endif\n+#ifdef HAVE_JPEG\n+ \".jpg\",\n+#endif\n+#ifdef HAVE_JPEGXL\n+ \".jxl\",\n+#endif\n+ \".png\",\n+#ifdef HAVE_IMGCODEC_PXM\n+ \".ppm\",\n+#endif\n+#ifdef HAVE_IMGCODEC_SUNRASTER\n+ \".ras\",\n+#endif\n+#ifdef HAVE_TIFF\n+ \".tiff\",\n+#endif\n+#ifdef HAVE_WEBP\n+ \".webp\",\n+#endif\n+};\n+\n+const string exts_multi[] = {\n+#ifdef HAVE_AVIF\n+ \".avif\",\n+#endif\n+#ifdef HAVE_IMGCODEC_GIF\n+ \".gif\",\n+#endif\n+ \".png\",\n+#ifdef HAVE_TIFF\n+ \".tiff\",\n+#endif\n+#ifdef HAVE_WEBP\n+ \".webp\",\n+#endif\n+};\n+\n+PERF_TEST_P(Decode, bgr, testing::ValuesIn(exts))\n+{\n+ String filename = getDataPath(\"perf/1920x1080.png\");\n+\n+ Mat src = imread(filename);\n+ vector buf;\n+ EXPECT_TRUE(imencode(GetParam(), src, buf));\n+\n+ TEST_CYCLE() imdecode(buf, IMREAD_UNCHANGED);\n+\n+ SANITY_CHECK_NOTHING();\n+}\n+\n+PERF_TEST_P(Decode, rgb, testing::ValuesIn(exts))\n+{\n+ String filename = getDataPath(\"perf/1920x1080.png\");\n+\n+ Mat src = imread(filename);", - "comment_created_at": "2025-02-04T05:20:31+00:00", - "comment_author": "asmorkalov", - "comment_body": "`EXPECT_FALSE(src.empty()) << Cannot open test image perf/1920x1080.png;`", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1436144317", - "pr_number": 21407, - "pr_file": "modules/imgproc/test/test_houghlines.cpp", - "created_at": "2023-12-25T19:32:31+00:00", - "commented_code": "EXPECT_NEAR(lines[0][1], 1.57179642, 1e-4);\n}\n\nTEST(WeightedHoughLines, horizontal)\n{\n Mat img(25, 25, CV_8UC1, Scalar(0));\n // draw lines. from top to bottom, stronger to weaker.\n line(img, Point(0, 6), Point(25, 6), Scalar(255));\n line(img, Point(0, 12), Point(25, 12), Scalar(254));\n line(img, Point(0, 18), Point(25, 18), Scalar(253));\n \n // detect lines\n std::vector lines;\n int threshold{253*25-1};\n bool use_edgeval{true};\n HoughLines(img, lines, 1, CV_PI/180, threshold, 0, 0, 0.0, CV_PI, use_edgeval);\n \n // check results\n ASSERT_EQ(lines.size(), 3U);\n // detected lines is assumed sorted from stronger to weaker. \n EXPECT_EQ(lines[0][0], 6);\n EXPECT_EQ(lines[1][0], 12);\n EXPECT_EQ(lines[2][0], 18);", - "repo_full_name": "opencv/opencv", - "discussion_comments": [ - { - "comment_id": "1436144317", - "repo_full_name": "opencv/opencv", - "pr_number": 21407, - "pr_file": "modules/imgproc/test/test_houghlines.cpp", - "discussion_id": "1436144317", - "commented_code": "@@ -340,6 +340,53 @@ TEST(HoughLines, regression_21983)\n EXPECT_NEAR(lines[0][1], 1.57179642, 1e-4);\n }\n \n+TEST(WeightedHoughLines, horizontal)\n+{\n+ Mat img(25, 25, CV_8UC1, Scalar(0));\n+ // draw lines. from top to bottom, stronger to weaker.\n+ line(img, Point(0, 6), Point(25, 6), Scalar(255));\n+ line(img, Point(0, 12), Point(25, 12), Scalar(254));\n+ line(img, Point(0, 18), Point(25, 18), Scalar(253));\n+ \n+ // detect lines\n+ std::vector lines;\n+ int threshold{253*25-1};\n+ bool use_edgeval{true};\n+ HoughLines(img, lines, 1, CV_PI/180, threshold, 0, 0, 0.0, CV_PI, use_edgeval);\n+ \n+ // check results\n+ ASSERT_EQ(lines.size(), 3U);\n+ // detected lines is assumed sorted from stronger to weaker. \n+ EXPECT_EQ(lines[0][0], 6);\n+ EXPECT_EQ(lines[1][0], 12);\n+ EXPECT_EQ(lines[2][0], 18);", - "comment_created_at": "2023-12-25T19:32:31+00:00", - "comment_author": "opencv-alalek", - "comment_body": "Signature for `_EQ` is `ASSERT_EQ(expected_reference, actual_result);`.\r\nCorrect order of parameters are required to emit valid error messages.\r\n\r\nReference: https://github.com/opencv/opencv/blob/4.0.0/modules/ts/include/opencv2/ts/ts_gtest.h#L8196-L8200\r\n\r\n```\r\nGTEST_API_ AssertionResult EqFailure(const char* expected_expression,\r\n const char* actual_expression,\r\n const std::string& expected_value,\r\n const std::string& actual_value,\r\n bool ignoring_case);\r\n```", - "pr_file_module": null - }, - { - "comment_id": "1881420274", - "repo_full_name": "opencv/opencv", - "pr_number": 21407, - "pr_file": "modules/imgproc/test/test_houghlines.cpp", - "discussion_id": "1436144317", - "commented_code": "@@ -340,6 +340,53 @@ TEST(HoughLines, regression_21983)\n EXPECT_NEAR(lines[0][1], 1.57179642, 1e-4);\n }\n \n+TEST(WeightedHoughLines, horizontal)\n+{\n+ Mat img(25, 25, CV_8UC1, Scalar(0));\n+ // draw lines. from top to bottom, stronger to weaker.\n+ line(img, Point(0, 6), Point(25, 6), Scalar(255));\n+ line(img, Point(0, 12), Point(25, 12), Scalar(254));\n+ line(img, Point(0, 18), Point(25, 18), Scalar(253));\n+ \n+ // detect lines\n+ std::vector lines;\n+ int threshold{253*25-1};\n+ bool use_edgeval{true};\n+ HoughLines(img, lines, 1, CV_PI/180, threshold, 0, 0, 0.0, CV_PI, use_edgeval);\n+ \n+ // check results\n+ ASSERT_EQ(lines.size(), 3U);\n+ // detected lines is assumed sorted from stronger to weaker. \n+ EXPECT_EQ(lines[0][0], 6);\n+ EXPECT_EQ(lines[1][0], 12);\n+ EXPECT_EQ(lines[2][0], 18);", - "comment_created_at": "2024-12-12T05:50:54+00:00", - "comment_author": "MasahiroOgawa", - "comment_body": "I'm sorry. I fixed it.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1789212179", - "pr_number": 26254, - "pr_file": "modules/core/test/test_mat.cpp", - "created_at": "2024-10-06T19:08:34+00:00", - "commented_code": "EXPECT_EQ(s, \"InputArray: empty()=true kind=0x00010000 flags=0x01010000 total(-1)=0 dims(-1)=0 size(-1)=0x0 type(-1)=CV_8UC1\");\n}\n\nTEST(Mat, reshape_1d)\n{\n std::vector v = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9};\n Mat m = Mat(v).reshape(0, 2);\n\n int newrows = 2;\n EXPECT_EQ(m.dims, newrows);\n EXPECT_EQ(m.type(), CV_32S);\n EXPECT_EQ(m.ptr(), &v[0]);\n EXPECT_EQ(m.rows, newrows);\n EXPECT_EQ(m.total(), v.size());\n\n int sz[] = {(int)v.size()};\n Mat m1 = m.reshape(0, 1, sz);\n EXPECT_EQ(m1.dims, 1);\n EXPECT_EQ(m1.type(), CV_32S);", - "repo_full_name": "opencv/opencv", - "discussion_comments": [ - { - "comment_id": "1789212179", - "repo_full_name": "opencv/opencv", - "pr_number": 26254, - "pr_file": "modules/core/test/test_mat.cpp", - "discussion_id": "1789212179", - "commented_code": "@@ -2647,4 +2647,77 @@ TEST(InputArray, dumpEmpty)\n EXPECT_EQ(s, \"InputArray: empty()=true kind=0x00010000 flags=0x01010000 total(-1)=0 dims(-1)=0 size(-1)=0x0 type(-1)=CV_8UC1\");\n }\n \n+TEST(Mat, reshape_1d)\n+{\n+ std::vector v = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9};\n+ Mat m = Mat(v).reshape(0, 2);\n+\n+ int newrows = 2;\n+ EXPECT_EQ(m.dims, newrows);\n+ EXPECT_EQ(m.type(), CV_32S);\n+ EXPECT_EQ(m.ptr(), &v[0]);\n+ EXPECT_EQ(m.rows, newrows);\n+ EXPECT_EQ(m.total(), v.size());\n+\n+ int sz[] = {(int)v.size()};\n+ Mat m1 = m.reshape(0, 1, sz);\n+ EXPECT_EQ(m1.dims, 1);\n+ EXPECT_EQ(m1.type(), CV_32S);", - "comment_created_at": "2024-10-06T19:08:34+00:00", - "comment_author": "opencv-alalek", - "comment_body": "Signature for `_EQ` is `ASSERT_EQ(expected_reference, actual_result);`.\r\nCorrect order of parameters are required to emit valid error messages.\r\n\r\nReference: https://github.com/opencv/opencv/blob/4.0.0/modules/ts/include/opencv2/ts/ts_gtest.h#L8196-L8200\r\n\r\n```\r\nGTEST_API_ AssertionResult EqFailure(const char* expected_expression,\r\n const char* actual_expression,\r\n const std::string& expected_value,\r\n const std::string& actual_value,\r\n bool ignoring_case);\r\n```", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/opencv-validate-tensor-dimensions.json b/_reviewers/opencv-validate-tensor-dimensions.json new file mode 100644 index 0000000..ae4261d --- /dev/null +++ b/_reviewers/opencv-validate-tensor-dimensions.json @@ -0,0 +1,140 @@ +[ + { + "discussion_id": "2137760658", + "pr_number": 27349, + "pr_file": "samples/dnn/deblurring.cpp", + "created_at": "2025-06-10T12:25:20+00:00", + "commented_code": "/*\nThis file is part of OpenCV project.\nIt is subject to the license terms in the LICENSE file found in the top-level directory\nof this distribution and at http://opencv.org/license.html.\n\nThis sample deblurs the given blurry image.\n\nCopyright (C) 2025, Bigvision LLC.\n\nHow to use:\n Sample command to run:\n `./example_dnn_deblurring`\n\n You can download NAFNet deblurring model using\n `python download_models.py NAFNet`\n\n References:\n Github: https://github.com/megvii-research/NAFNet\n PyTorch model: https://drive.google.com/file/d/14D4V4raNYIOhETfcuuLI3bGLB-OYIv6X/view\n\n PyTorch model was converted to ONNX and then ONNX model was further quantized using block quantization from [opencv_zoo](https://github.com/opencv/opencv_zoo/blob/main/tools/quantize/block_quantize.py)\n\n Set environment variable OPENCV_DOWNLOAD_CACHE_DIR to point to the directory where models are downloaded. Also, point OPENCV_SAMPLES_DATA_PATH to opencv/samples/data.\n*/\n\n#include \n#include \n\n#include \n#include \n#include \n\n#include \"common.hpp\"\n\nusing namespace cv;\nusing namespace dnn;\nusing namespace std;\n\nconst string about = \"Use this script for image deblurring using OpenCV. \\n\\n\"\n \"Firstly, download required models i.e. NAFNet using `download_models.py` (if not already done). Set environment variable OPENCV_DOWNLOAD_CACHE_DIR to point to the directory where models are downloaded. Also, point OPENCV_SAMPLES_DATA_PATH to opencv/samples/data.\\n\"\n \"To run:\\n\"\n \"\\t Example: ./example_dnn_deblurring [--input=] \\n\\n\"\n \"Deblurring model path can also be specified using --model argument.\\n\\n\";\n\nconst string param_keys =\n \"{ help h | | show help message}\"\n \"{ @alias | NAFNet | An alias name of model to extract preprocessing parameters from models.yml file. }\"\n \"{ zoo | ../dnn/models.yml | An optional path to file with preprocessing parameters }\"\n \"{ input i | licenseplate_motion.jpg | image file path}\";\n\nconst string backend_keys = format(\n \"{ backend | default | Choose one of computation backends: \"\n \"default: automatically (by default), \"\n \"openvino: Intel's Deep Learning Inference Engine (https://software.intel.com/openvino-toolkit), \"\n \"opencv: OpenCV implementation, \"\n \"vkcom: VKCOM, \"\n \"cuda: CUDA, \"\n \"webnn: WebNN }\");\n\nconst string target_keys = format(\n \"{ target | cpu | Choose one of target computation devices: \"\n \"cpu: CPU target (by default), \"\n \"opencl: OpenCL, \"\n \"opencl_fp16: OpenCL fp16 (half-float precision), \"\n \"vpu: VPU, \"\n \"vulkan: Vulkan, \"\n \"cuda: CUDA, \"\n \"cuda_fp16: CUDA fp16 (half-float preprocess) }\");\n\nstring keys = param_keys + backend_keys + target_keys;\n\n\nint main(int argc, char **argv)\n{\n CommandLineParser parser(argc, argv, keys);\n\n if (!parser.has(\"@alias\") || parser.has(\"help\"))\n {\n cout<(\"@alias\");\n string zooFile = findFile(parser.get(\"zoo\"));\n keys += genPreprocArguments(modelName, zooFile);\n parser = CommandLineParser(argc, argv, keys);\n parser.about(\"Use this script to run image deblurring using OpenCV.\");\n\n const string sha1 = parser.get(\"sha1\");\n const string modelPath = findModel(parser.get(\"model\"), sha1);\n string imgPath = parser.get(\"input\");\n const string backend = parser.get(\"backend\");\n const string target = parser.get(\"target\");\n float scale = parser.get(\"scale\");\n bool swapRB = parser.get(\"rgb\");\n Scalar mean_v = parser.get(\"mean\");\n\n EngineType engine = ENGINE_AUTO;\n if (backend != \"default\" || target != \"cpu\"){\n engine = ENGINE_CLASSIC;\n }\n\n Net net = readNetFromONNX(modelPath, engine);\n net.setPreferableBackend(getBackendID(backend));\n net.setPreferableTarget(getTargetID(target));\n\n Mat inputImage = imread(findFile(imgPath));\n if (inputImage.empty()) {\n cerr << \"Error: Input image could not be loaded.\" << endl;\n return -1;\n }\n Mat image = inputImage.clone();\n\n Mat image_blob = blobFromImage(image, scale, Size(image.cols, image.rows), mean_v, swapRB, false);\n\n net.setInput(image_blob);\n Mat output = net.forward();\n\n // Post Processing\n Mat output_transposed(3, &output.size[1], CV_32F, output.ptr());\n\n vector channels;\n for (int i = 0; i < 3; ++i) {\n channels.push_back(Mat(output_transposed.size[1], output_transposed.size[2], CV_32F,\n output_transposed.ptr(i)));\n }", + "repo_full_name": "opencv/opencv", + "discussion_comments": [ + { + "comment_id": "2137760658", + "repo_full_name": "opencv/opencv", + "pr_number": 27349, + "pr_file": "samples/dnn/deblurring.cpp", + "discussion_id": "2137760658", + "commented_code": "@@ -0,0 +1,136 @@\n+/*\n+This file is part of OpenCV project.\n+It is subject to the license terms in the LICENSE file found in the top-level directory\n+of this distribution and at http://opencv.org/license.html.\n+\n+This sample deblurs the given blurry image.\n+\n+Copyright (C) 2025, Bigvision LLC.\n+\n+How to use:\n+ Sample command to run:\n+ `./example_dnn_deblurring`\n+\n+ You can download NAFNet deblurring model using\n+ `python download_models.py NAFNet`\n+\n+ References:\n+ Github: https://github.com/megvii-research/NAFNet\n+ PyTorch model: https://drive.google.com/file/d/14D4V4raNYIOhETfcuuLI3bGLB-OYIv6X/view\n+\n+ PyTorch model was converted to ONNX and then ONNX model was further quantized using block quantization from [opencv_zoo](https://github.com/opencv/opencv_zoo/blob/main/tools/quantize/block_quantize.py)\n+\n+ Set environment variable OPENCV_DOWNLOAD_CACHE_DIR to point to the directory where models are downloaded. Also, point OPENCV_SAMPLES_DATA_PATH to opencv/samples/data.\n+*/\n+\n+#include \n+#include \n+\n+#include \n+#include \n+#include \n+\n+#include \"common.hpp\"\n+\n+using namespace cv;\n+using namespace dnn;\n+using namespace std;\n+\n+const string about = \"Use this script for image deblurring using OpenCV. \\n\\n\"\n+ \"Firstly, download required models i.e. NAFNet using `download_models.py` (if not already done). Set environment variable OPENCV_DOWNLOAD_CACHE_DIR to point to the directory where models are downloaded. Also, point OPENCV_SAMPLES_DATA_PATH to opencv/samples/data.\\n\"\n+ \"To run:\\n\"\n+ \"\\t Example: ./example_dnn_deblurring [--input=] \\n\\n\"\n+ \"Deblurring model path can also be specified using --model argument.\\n\\n\";\n+\n+const string param_keys =\n+ \"{ help h | | show help message}\"\n+ \"{ @alias | NAFNet | An alias name of model to extract preprocessing parameters from models.yml file. }\"\n+ \"{ zoo | ../dnn/models.yml | An optional path to file with preprocessing parameters }\"\n+ \"{ input i | licenseplate_motion.jpg | image file path}\";\n+\n+const string backend_keys = format(\n+ \"{ backend | default | Choose one of computation backends: \"\n+ \"default: automatically (by default), \"\n+ \"openvino: Intel's Deep Learning Inference Engine (https://software.intel.com/openvino-toolkit), \"\n+ \"opencv: OpenCV implementation, \"\n+ \"vkcom: VKCOM, \"\n+ \"cuda: CUDA, \"\n+ \"webnn: WebNN }\");\n+\n+const string target_keys = format(\n+ \"{ target | cpu | Choose one of target computation devices: \"\n+ \"cpu: CPU target (by default), \"\n+ \"opencl: OpenCL, \"\n+ \"opencl_fp16: OpenCL fp16 (half-float precision), \"\n+ \"vpu: VPU, \"\n+ \"vulkan: Vulkan, \"\n+ \"cuda: CUDA, \"\n+ \"cuda_fp16: CUDA fp16 (half-float preprocess) }\");\n+\n+string keys = param_keys + backend_keys + target_keys;\n+\n+\n+int main(int argc, char **argv)\n+{\n+ CommandLineParser parser(argc, argv, keys);\n+\n+ if (!parser.has(\"@alias\") || parser.has(\"help\"))\n+ {\n+ cout<(\"@alias\");\n+ string zooFile = findFile(parser.get(\"zoo\"));\n+ keys += genPreprocArguments(modelName, zooFile);\n+ parser = CommandLineParser(argc, argv, keys);\n+ parser.about(\"Use this script to run image deblurring using OpenCV.\");\n+\n+ const string sha1 = parser.get(\"sha1\");\n+ const string modelPath = findModel(parser.get(\"model\"), sha1);\n+ string imgPath = parser.get(\"input\");\n+ const string backend = parser.get(\"backend\");\n+ const string target = parser.get(\"target\");\n+ float scale = parser.get(\"scale\");\n+ bool swapRB = parser.get(\"rgb\");\n+ Scalar mean_v = parser.get(\"mean\");\n+\n+ EngineType engine = ENGINE_AUTO;\n+ if (backend != \"default\" || target != \"cpu\"){\n+ engine = ENGINE_CLASSIC;\n+ }\n+\n+ Net net = readNetFromONNX(modelPath, engine);\n+ net.setPreferableBackend(getBackendID(backend));\n+ net.setPreferableTarget(getTargetID(target));\n+\n+ Mat inputImage = imread(findFile(imgPath));\n+ if (inputImage.empty()) {\n+ cerr << \"Error: Input image could not be loaded.\" << endl;\n+ return -1;\n+ }\n+ Mat image = inputImage.clone();\n+\n+ Mat image_blob = blobFromImage(image, scale, Size(image.cols, image.rows), mean_v, swapRB, false);\n+\n+ net.setInput(image_blob);\n+ Mat output = net.forward();\n+\n+ // Post Processing\n+ Mat output_transposed(3, &output.size[1], CV_32F, output.ptr());\n+\n+ vector channels;\n+ for (int i = 0; i < 3; ++i) {\n+ channels.push_back(Mat(output_transposed.size[1], output_transposed.size[2], CV_32F,\n+ output_transposed.ptr(i)));\n+ }", + "comment_created_at": "2025-06-10T12:25:20+00:00", + "comment_author": "asmorkalov", + "comment_body": "```\r\nvector channels = {\r\n Mat(output_transposed.size[1], output_transposed.size[2], CV_32F, output_transposed.ptr(0)),\r\n Mat(output_transposed.size[1], output_transposed.size[2], CV_32F, output_transposed.ptr(1)),\r\n Mat(output_transposed.size[1], output_transposed.size[2], CV_32F, output_transposed.ptr(2))\r\n};\r\n```", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1648004283", + "pr_number": 25794, + "pr_file": "samples/dnn/yolo_detector.cpp", + "created_at": "2024-06-20T18:51:56+00:00", + "commented_code": "// remove the second element\n outs.pop_back();\n // unsqueeze the first dimension\n outs[0] = outs[0].reshape(0, std::vector{1, 8400, 84});\n outs[0] = outs[0].reshape(0, std::vector{1, 8400, nc + 4});\n }\n\n // assert if last dim is 85 or 84\n CV_CheckEQ((outs[0].size[2] == nc + 5 || outs[0].size[2] == 80 + 4), true, \"Invalid output shape: \");", + "repo_full_name": "opencv/opencv", + "discussion_comments": [ + { + "comment_id": "1648004283", + "repo_full_name": "opencv/opencv", + "pr_number": 25794, + "pr_file": "samples/dnn/yolo_detector.cpp", + "discussion_id": "1648004283", + "commented_code": "@@ -131,25 +133,28 @@ void yoloPostProcessing(\n // remove the second element\n outs.pop_back();\n // unsqueeze the first dimension\n- outs[0] = outs[0].reshape(0, std::vector{1, 8400, 84});\n+ outs[0] = outs[0].reshape(0, std::vector{1, 8400, nc + 4});\n }\n \n+ // assert if last dim is 85 or 84\n+ CV_CheckEQ((outs[0].size[2] == nc + 5 || outs[0].size[2] == 80 + 4), true, \"Invalid output shape: \");", + "comment_created_at": "2024-06-20T18:51:56+00:00", + "comment_author": "dkurt", + "comment_body": "Check also that `outs[0].dims == 2` before.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1648007571", + "pr_number": 25794, + "pr_file": "samples/dnn/yolo_detector.cpp", + "created_at": "2024-06-20T18:55:17+00:00", + "commented_code": "// remove the second element\n outs.pop_back();\n // unsqueeze the first dimension\n outs[0] = outs[0].reshape(0, std::vector{1, 8400, 84});\n outs[0] = outs[0].reshape(0, std::vector{1, 8400, nc + 4});", + "repo_full_name": "opencv/opencv", + "discussion_comments": [ + { + "comment_id": "1648007571", + "repo_full_name": "opencv/opencv", + "pr_number": 25794, + "pr_file": "samples/dnn/yolo_detector.cpp", + "discussion_id": "1648007571", + "commented_code": "@@ -131,25 +133,28 @@ void yoloPostProcessing(\n // remove the second element\n outs.pop_back();\n // unsqueeze the first dimension\n- outs[0] = outs[0].reshape(0, std::vector{1, 8400, 84});\n+ outs[0] = outs[0].reshape(0, std::vector{1, 8400, nc + 4});", + "comment_created_at": "2024-06-20T18:55:17+00:00", + "comment_author": "dkurt", + "comment_body": "8400 is also depends on number of classes? So should it be `(nc + 4) * 100` or not?", + "pr_file_module": null + }, + { + "comment_id": "1648011078", + "repo_full_name": "opencv/opencv", + "pr_number": 25794, + "pr_file": "samples/dnn/yolo_detector.cpp", + "discussion_id": "1648007571", + "commented_code": "@@ -131,25 +133,28 @@ void yoloPostProcessing(\n // remove the second element\n outs.pop_back();\n // unsqueeze the first dimension\n- outs[0] = outs[0].reshape(0, std::vector{1, 8400, 84});\n+ outs[0] = outs[0].reshape(0, std::vector{1, 8400, nc + 4});", + "comment_created_at": "2024-06-20T18:58:57+00:00", + "comment_author": "dkurt", + "comment_body": "`nc` seems redundant as it can be computed from original shape:\r\n```cpp\r\nint nc = outs[0].total() / 8400 - 4;\r\n```\r\nor, if 8400 is also depends on `nc`:\r\n```cpp\r\nint nc = sqrt(outs[0].total() / 100) - 4;\r\n```", + "pr_file_module": null + }, + { + "comment_id": "1648490438", + "repo_full_name": "opencv/opencv", + "pr_number": 25794, + "pr_file": "samples/dnn/yolo_detector.cpp", + "discussion_id": "1648007571", + "commented_code": "@@ -131,25 +133,28 @@ void yoloPostProcessing(\n // remove the second element\n outs.pop_back();\n // unsqueeze the first dimension\n- outs[0] = outs[0].reshape(0, std::vector{1, 8400, 84});\n+ outs[0] = outs[0].reshape(0, std::vector{1, 8400, nc + 4});", + "comment_created_at": "2024-06-21T06:45:46+00:00", + "comment_author": "Abdurrahheem", + "comment_body": "> 8400 is also depends on number of classes? So should it be `(nc + 4) * 100` or not?\r\n\r\n8400 depends on number of anchors only and has nothing to do with `nc`. \r\n`nc` depends on which dataset detector what trained. On `COCO` dataset `nc = 80`. For other datasets it might be different. ", + "pr_file_module": null + }, + { + "comment_id": "1648498046", + "repo_full_name": "opencv/opencv", + "pr_number": 25794, + "pr_file": "samples/dnn/yolo_detector.cpp", + "discussion_id": "1648007571", + "commented_code": "@@ -131,25 +133,28 @@ void yoloPostProcessing(\n // remove the second element\n outs.pop_back();\n // unsqueeze the first dimension\n- outs[0] = outs[0].reshape(0, std::vector{1, 8400, 84});\n+ outs[0] = outs[0].reshape(0, std::vector{1, 8400, nc + 4});", + "comment_created_at": "2024-06-21T06:54:18+00:00", + "comment_author": "dkurt", + "comment_body": "Got it, thanks! So do you mind to compute `nc` from total number of `outs[0]` elements?", + "pr_file_module": null + }, + { + "comment_id": "1650834099", + "repo_full_name": "opencv/opencv", + "pr_number": 25794, + "pr_file": "samples/dnn/yolo_detector.cpp", + "discussion_id": "1648007571", + "commented_code": "@@ -131,25 +133,28 @@ void yoloPostProcessing(\n // remove the second element\n outs.pop_back();\n // unsqueeze the first dimension\n- outs[0] = outs[0].reshape(0, std::vector{1, 8400, 84});\n+ outs[0] = outs[0].reshape(0, std::vector{1, 8400, nc + 4});", + "comment_created_at": "2024-06-24T11:06:30+00:00", + "comment_author": "Abdurrahheem", + "comment_body": "I strongly believe that it should stay as a papermeter. As this is a parameter during traning. Computing it via `outs[0]` is kind of hack. ", + "pr_file_module": null + }, + { + "comment_id": "1654471328", + "repo_full_name": "opencv/opencv", + "pr_number": 25794, + "pr_file": "samples/dnn/yolo_detector.cpp", + "discussion_id": "1648007571", + "commented_code": "@@ -131,25 +133,28 @@ void yoloPostProcessing(\n // remove the second element\n outs.pop_back();\n // unsqueeze the first dimension\n- outs[0] = outs[0].reshape(0, std::vector{1, 8400, 84});\n+ outs[0] = outs[0].reshape(0, std::vector{1, 8400, nc + 4});", + "comment_created_at": "2024-06-26T09:36:20+00:00", + "comment_author": "dkurt", + "comment_body": "The corner case is a background class. Some training utils may include this as a separate class - some not. So even if you trained model for 80 real classes, there is a variation nc=80 nc=81 and it may confuse even more.", + "pr_file_module": null + }, + { + "comment_id": "1654779677", + "repo_full_name": "opencv/opencv", + "pr_number": 25794, + "pr_file": "samples/dnn/yolo_detector.cpp", + "discussion_id": "1648007571", + "commented_code": "@@ -131,25 +133,28 @@ void yoloPostProcessing(\n // remove the second element\n outs.pop_back();\n // unsqueeze the first dimension\n- outs[0] = outs[0].reshape(0, std::vector{1, 8400, 84});\n+ outs[0] = outs[0].reshape(0, std::vector{1, 8400, nc + 4});", + "comment_created_at": "2024-06-26T12:59:39+00:00", + "comment_author": "Abdurrahheem", + "comment_body": "Computing `nc` using number of anchor points is not good since number of anchors depens on the image size. Inference can happen on different image resolutons, depending on use case. ", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/opencv-validate-tensor-dimensions.md b/_reviewers/opencv-validate-tensor-dimensions.md index fa9d6f1..583a693 100644 --- a/_reviewers/opencv-validate-tensor-dimensions.md +++ b/_reviewers/opencv-validate-tensor-dimensions.md @@ -33,145 +33,3 @@ for (int i = 0; i < 3; ++i) { ``` For parameters derived from tensor shapes, consider whether they should be computed or passed as parameters based on the model's architecture. Always document your assumptions about tensor shapes, especially when working with different AI model formats or versions. - - -[ - { - "discussion_id": "2137760658", - "pr_number": 27349, - "pr_file": "samples/dnn/deblurring.cpp", - "created_at": "2025-06-10T12:25:20+00:00", - "commented_code": "/*\nThis file is part of OpenCV project.\nIt is subject to the license terms in the LICENSE file found in the top-level directory\nof this distribution and at http://opencv.org/license.html.\n\nThis sample deblurs the given blurry image.\n\nCopyright (C) 2025, Bigvision LLC.\n\nHow to use:\n Sample command to run:\n `./example_dnn_deblurring`\n\n You can download NAFNet deblurring model using\n `python download_models.py NAFNet`\n\n References:\n Github: https://github.com/megvii-research/NAFNet\n PyTorch model: https://drive.google.com/file/d/14D4V4raNYIOhETfcuuLI3bGLB-OYIv6X/view\n\n PyTorch model was converted to ONNX and then ONNX model was further quantized using block quantization from [opencv_zoo](https://github.com/opencv/opencv_zoo/blob/main/tools/quantize/block_quantize.py)\n\n Set environment variable OPENCV_DOWNLOAD_CACHE_DIR to point to the directory where models are downloaded. Also, point OPENCV_SAMPLES_DATA_PATH to opencv/samples/data.\n*/\n\n#include \n#include \n\n#include \n#include \n#include \n\n#include \"common.hpp\"\n\nusing namespace cv;\nusing namespace dnn;\nusing namespace std;\n\nconst string about = \"Use this script for image deblurring using OpenCV. \\n\\n\"\n \"Firstly, download required models i.e. NAFNet using `download_models.py` (if not already done). Set environment variable OPENCV_DOWNLOAD_CACHE_DIR to point to the directory where models are downloaded. Also, point OPENCV_SAMPLES_DATA_PATH to opencv/samples/data.\\n\"\n \"To run:\\n\"\n \"\\t Example: ./example_dnn_deblurring [--input=] \\n\\n\"\n \"Deblurring model path can also be specified using --model argument.\\n\\n\";\n\nconst string param_keys =\n \"{ help h | | show help message}\"\n \"{ @alias | NAFNet | An alias name of model to extract preprocessing parameters from models.yml file. }\"\n \"{ zoo | ../dnn/models.yml | An optional path to file with preprocessing parameters }\"\n \"{ input i | licenseplate_motion.jpg | image file path}\";\n\nconst string backend_keys = format(\n \"{ backend | default | Choose one of computation backends: \"\n \"default: automatically (by default), \"\n \"openvino: Intel's Deep Learning Inference Engine (https://software.intel.com/openvino-toolkit), \"\n \"opencv: OpenCV implementation, \"\n \"vkcom: VKCOM, \"\n \"cuda: CUDA, \"\n \"webnn: WebNN }\");\n\nconst string target_keys = format(\n \"{ target | cpu | Choose one of target computation devices: \"\n \"cpu: CPU target (by default), \"\n \"opencl: OpenCL, \"\n \"opencl_fp16: OpenCL fp16 (half-float precision), \"\n \"vpu: VPU, \"\n \"vulkan: Vulkan, \"\n \"cuda: CUDA, \"\n \"cuda_fp16: CUDA fp16 (half-float preprocess) }\");\n\nstring keys = param_keys + backend_keys + target_keys;\n\n\nint main(int argc, char **argv)\n{\n CommandLineParser parser(argc, argv, keys);\n\n if (!parser.has(\"@alias\") || parser.has(\"help\"))\n {\n cout<(\"@alias\");\n string zooFile = findFile(parser.get(\"zoo\"));\n keys += genPreprocArguments(modelName, zooFile);\n parser = CommandLineParser(argc, argv, keys);\n parser.about(\"Use this script to run image deblurring using OpenCV.\");\n\n const string sha1 = parser.get(\"sha1\");\n const string modelPath = findModel(parser.get(\"model\"), sha1);\n string imgPath = parser.get(\"input\");\n const string backend = parser.get(\"backend\");\n const string target = parser.get(\"target\");\n float scale = parser.get(\"scale\");\n bool swapRB = parser.get(\"rgb\");\n Scalar mean_v = parser.get(\"mean\");\n\n EngineType engine = ENGINE_AUTO;\n if (backend != \"default\" || target != \"cpu\"){\n engine = ENGINE_CLASSIC;\n }\n\n Net net = readNetFromONNX(modelPath, engine);\n net.setPreferableBackend(getBackendID(backend));\n net.setPreferableTarget(getTargetID(target));\n\n Mat inputImage = imread(findFile(imgPath));\n if (inputImage.empty()) {\n cerr << \"Error: Input image could not be loaded.\" << endl;\n return -1;\n }\n Mat image = inputImage.clone();\n\n Mat image_blob = blobFromImage(image, scale, Size(image.cols, image.rows), mean_v, swapRB, false);\n\n net.setInput(image_blob);\n Mat output = net.forward();\n\n // Post Processing\n Mat output_transposed(3, &output.size[1], CV_32F, output.ptr());\n\n vector channels;\n for (int i = 0; i < 3; ++i) {\n channels.push_back(Mat(output_transposed.size[1], output_transposed.size[2], CV_32F,\n output_transposed.ptr(i)));\n }", - "repo_full_name": "opencv/opencv", - "discussion_comments": [ - { - "comment_id": "2137760658", - "repo_full_name": "opencv/opencv", - "pr_number": 27349, - "pr_file": "samples/dnn/deblurring.cpp", - "discussion_id": "2137760658", - "commented_code": "@@ -0,0 +1,136 @@\n+/*\n+This file is part of OpenCV project.\n+It is subject to the license terms in the LICENSE file found in the top-level directory\n+of this distribution and at http://opencv.org/license.html.\n+\n+This sample deblurs the given blurry image.\n+\n+Copyright (C) 2025, Bigvision LLC.\n+\n+How to use:\n+ Sample command to run:\n+ `./example_dnn_deblurring`\n+\n+ You can download NAFNet deblurring model using\n+ `python download_models.py NAFNet`\n+\n+ References:\n+ Github: https://github.com/megvii-research/NAFNet\n+ PyTorch model: https://drive.google.com/file/d/14D4V4raNYIOhETfcuuLI3bGLB-OYIv6X/view\n+\n+ PyTorch model was converted to ONNX and then ONNX model was further quantized using block quantization from [opencv_zoo](https://github.com/opencv/opencv_zoo/blob/main/tools/quantize/block_quantize.py)\n+\n+ Set environment variable OPENCV_DOWNLOAD_CACHE_DIR to point to the directory where models are downloaded. Also, point OPENCV_SAMPLES_DATA_PATH to opencv/samples/data.\n+*/\n+\n+#include \n+#include \n+\n+#include \n+#include \n+#include \n+\n+#include \"common.hpp\"\n+\n+using namespace cv;\n+using namespace dnn;\n+using namespace std;\n+\n+const string about = \"Use this script for image deblurring using OpenCV. \\n\\n\"\n+ \"Firstly, download required models i.e. NAFNet using `download_models.py` (if not already done). Set environment variable OPENCV_DOWNLOAD_CACHE_DIR to point to the directory where models are downloaded. Also, point OPENCV_SAMPLES_DATA_PATH to opencv/samples/data.\\n\"\n+ \"To run:\\n\"\n+ \"\\t Example: ./example_dnn_deblurring [--input=] \\n\\n\"\n+ \"Deblurring model path can also be specified using --model argument.\\n\\n\";\n+\n+const string param_keys =\n+ \"{ help h | | show help message}\"\n+ \"{ @alias | NAFNet | An alias name of model to extract preprocessing parameters from models.yml file. }\"\n+ \"{ zoo | ../dnn/models.yml | An optional path to file with preprocessing parameters }\"\n+ \"{ input i | licenseplate_motion.jpg | image file path}\";\n+\n+const string backend_keys = format(\n+ \"{ backend | default | Choose one of computation backends: \"\n+ \"default: automatically (by default), \"\n+ \"openvino: Intel's Deep Learning Inference Engine (https://software.intel.com/openvino-toolkit), \"\n+ \"opencv: OpenCV implementation, \"\n+ \"vkcom: VKCOM, \"\n+ \"cuda: CUDA, \"\n+ \"webnn: WebNN }\");\n+\n+const string target_keys = format(\n+ \"{ target | cpu | Choose one of target computation devices: \"\n+ \"cpu: CPU target (by default), \"\n+ \"opencl: OpenCL, \"\n+ \"opencl_fp16: OpenCL fp16 (half-float precision), \"\n+ \"vpu: VPU, \"\n+ \"vulkan: Vulkan, \"\n+ \"cuda: CUDA, \"\n+ \"cuda_fp16: CUDA fp16 (half-float preprocess) }\");\n+\n+string keys = param_keys + backend_keys + target_keys;\n+\n+\n+int main(int argc, char **argv)\n+{\n+ CommandLineParser parser(argc, argv, keys);\n+\n+ if (!parser.has(\"@alias\") || parser.has(\"help\"))\n+ {\n+ cout<(\"@alias\");\n+ string zooFile = findFile(parser.get(\"zoo\"));\n+ keys += genPreprocArguments(modelName, zooFile);\n+ parser = CommandLineParser(argc, argv, keys);\n+ parser.about(\"Use this script to run image deblurring using OpenCV.\");\n+\n+ const string sha1 = parser.get(\"sha1\");\n+ const string modelPath = findModel(parser.get(\"model\"), sha1);\n+ string imgPath = parser.get(\"input\");\n+ const string backend = parser.get(\"backend\");\n+ const string target = parser.get(\"target\");\n+ float scale = parser.get(\"scale\");\n+ bool swapRB = parser.get(\"rgb\");\n+ Scalar mean_v = parser.get(\"mean\");\n+\n+ EngineType engine = ENGINE_AUTO;\n+ if (backend != \"default\" || target != \"cpu\"){\n+ engine = ENGINE_CLASSIC;\n+ }\n+\n+ Net net = readNetFromONNX(modelPath, engine);\n+ net.setPreferableBackend(getBackendID(backend));\n+ net.setPreferableTarget(getTargetID(target));\n+\n+ Mat inputImage = imread(findFile(imgPath));\n+ if (inputImage.empty()) {\n+ cerr << \"Error: Input image could not be loaded.\" << endl;\n+ return -1;\n+ }\n+ Mat image = inputImage.clone();\n+\n+ Mat image_blob = blobFromImage(image, scale, Size(image.cols, image.rows), mean_v, swapRB, false);\n+\n+ net.setInput(image_blob);\n+ Mat output = net.forward();\n+\n+ // Post Processing\n+ Mat output_transposed(3, &output.size[1], CV_32F, output.ptr());\n+\n+ vector channels;\n+ for (int i = 0; i < 3; ++i) {\n+ channels.push_back(Mat(output_transposed.size[1], output_transposed.size[2], CV_32F,\n+ output_transposed.ptr(i)));\n+ }", - "comment_created_at": "2025-06-10T12:25:20+00:00", - "comment_author": "asmorkalov", - "comment_body": "```\r\nvector channels = {\r\n Mat(output_transposed.size[1], output_transposed.size[2], CV_32F, output_transposed.ptr(0)),\r\n Mat(output_transposed.size[1], output_transposed.size[2], CV_32F, output_transposed.ptr(1)),\r\n Mat(output_transposed.size[1], output_transposed.size[2], CV_32F, output_transposed.ptr(2))\r\n};\r\n```", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1648004283", - "pr_number": 25794, - "pr_file": "samples/dnn/yolo_detector.cpp", - "created_at": "2024-06-20T18:51:56+00:00", - "commented_code": "// remove the second element\n outs.pop_back();\n // unsqueeze the first dimension\n outs[0] = outs[0].reshape(0, std::vector{1, 8400, 84});\n outs[0] = outs[0].reshape(0, std::vector{1, 8400, nc + 4});\n }\n\n // assert if last dim is 85 or 84\n CV_CheckEQ((outs[0].size[2] == nc + 5 || outs[0].size[2] == 80 + 4), true, \"Invalid output shape: \");", - "repo_full_name": "opencv/opencv", - "discussion_comments": [ - { - "comment_id": "1648004283", - "repo_full_name": "opencv/opencv", - "pr_number": 25794, - "pr_file": "samples/dnn/yolo_detector.cpp", - "discussion_id": "1648004283", - "commented_code": "@@ -131,25 +133,28 @@ void yoloPostProcessing(\n // remove the second element\n outs.pop_back();\n // unsqueeze the first dimension\n- outs[0] = outs[0].reshape(0, std::vector{1, 8400, 84});\n+ outs[0] = outs[0].reshape(0, std::vector{1, 8400, nc + 4});\n }\n \n+ // assert if last dim is 85 or 84\n+ CV_CheckEQ((outs[0].size[2] == nc + 5 || outs[0].size[2] == 80 + 4), true, \"Invalid output shape: \");", - "comment_created_at": "2024-06-20T18:51:56+00:00", - "comment_author": "dkurt", - "comment_body": "Check also that `outs[0].dims == 2` before.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1648007571", - "pr_number": 25794, - "pr_file": "samples/dnn/yolo_detector.cpp", - "created_at": "2024-06-20T18:55:17+00:00", - "commented_code": "// remove the second element\n outs.pop_back();\n // unsqueeze the first dimension\n outs[0] = outs[0].reshape(0, std::vector{1, 8400, 84});\n outs[0] = outs[0].reshape(0, std::vector{1, 8400, nc + 4});", - "repo_full_name": "opencv/opencv", - "discussion_comments": [ - { - "comment_id": "1648007571", - "repo_full_name": "opencv/opencv", - "pr_number": 25794, - "pr_file": "samples/dnn/yolo_detector.cpp", - "discussion_id": "1648007571", - "commented_code": "@@ -131,25 +133,28 @@ void yoloPostProcessing(\n // remove the second element\n outs.pop_back();\n // unsqueeze the first dimension\n- outs[0] = outs[0].reshape(0, std::vector{1, 8400, 84});\n+ outs[0] = outs[0].reshape(0, std::vector{1, 8400, nc + 4});", - "comment_created_at": "2024-06-20T18:55:17+00:00", - "comment_author": "dkurt", - "comment_body": "8400 is also depends on number of classes? So should it be `(nc + 4) * 100` or not?", - "pr_file_module": null - }, - { - "comment_id": "1648011078", - "repo_full_name": "opencv/opencv", - "pr_number": 25794, - "pr_file": "samples/dnn/yolo_detector.cpp", - "discussion_id": "1648007571", - "commented_code": "@@ -131,25 +133,28 @@ void yoloPostProcessing(\n // remove the second element\n outs.pop_back();\n // unsqueeze the first dimension\n- outs[0] = outs[0].reshape(0, std::vector{1, 8400, 84});\n+ outs[0] = outs[0].reshape(0, std::vector{1, 8400, nc + 4});", - "comment_created_at": "2024-06-20T18:58:57+00:00", - "comment_author": "dkurt", - "comment_body": "`nc` seems redundant as it can be computed from original shape:\r\n```cpp\r\nint nc = outs[0].total() / 8400 - 4;\r\n```\r\nor, if 8400 is also depends on `nc`:\r\n```cpp\r\nint nc = sqrt(outs[0].total() / 100) - 4;\r\n```", - "pr_file_module": null - }, - { - "comment_id": "1648490438", - "repo_full_name": "opencv/opencv", - "pr_number": 25794, - "pr_file": "samples/dnn/yolo_detector.cpp", - "discussion_id": "1648007571", - "commented_code": "@@ -131,25 +133,28 @@ void yoloPostProcessing(\n // remove the second element\n outs.pop_back();\n // unsqueeze the first dimension\n- outs[0] = outs[0].reshape(0, std::vector{1, 8400, 84});\n+ outs[0] = outs[0].reshape(0, std::vector{1, 8400, nc + 4});", - "comment_created_at": "2024-06-21T06:45:46+00:00", - "comment_author": "Abdurrahheem", - "comment_body": "> 8400 is also depends on number of classes? So should it be `(nc + 4) * 100` or not?\r\n\r\n8400 depends on number of anchors only and has nothing to do with `nc`. \r\n`nc` depends on which dataset detector what trained. On `COCO` dataset `nc = 80`. For other datasets it might be different. ", - "pr_file_module": null - }, - { - "comment_id": "1648498046", - "repo_full_name": "opencv/opencv", - "pr_number": 25794, - "pr_file": "samples/dnn/yolo_detector.cpp", - "discussion_id": "1648007571", - "commented_code": "@@ -131,25 +133,28 @@ void yoloPostProcessing(\n // remove the second element\n outs.pop_back();\n // unsqueeze the first dimension\n- outs[0] = outs[0].reshape(0, std::vector{1, 8400, 84});\n+ outs[0] = outs[0].reshape(0, std::vector{1, 8400, nc + 4});", - "comment_created_at": "2024-06-21T06:54:18+00:00", - "comment_author": "dkurt", - "comment_body": "Got it, thanks! So do you mind to compute `nc` from total number of `outs[0]` elements?", - "pr_file_module": null - }, - { - "comment_id": "1650834099", - "repo_full_name": "opencv/opencv", - "pr_number": 25794, - "pr_file": "samples/dnn/yolo_detector.cpp", - "discussion_id": "1648007571", - "commented_code": "@@ -131,25 +133,28 @@ void yoloPostProcessing(\n // remove the second element\n outs.pop_back();\n // unsqueeze the first dimension\n- outs[0] = outs[0].reshape(0, std::vector{1, 8400, 84});\n+ outs[0] = outs[0].reshape(0, std::vector{1, 8400, nc + 4});", - "comment_created_at": "2024-06-24T11:06:30+00:00", - "comment_author": "Abdurrahheem", - "comment_body": "I strongly believe that it should stay as a papermeter. As this is a parameter during traning. Computing it via `outs[0]` is kind of hack. ", - "pr_file_module": null - }, - { - "comment_id": "1654471328", - "repo_full_name": "opencv/opencv", - "pr_number": 25794, - "pr_file": "samples/dnn/yolo_detector.cpp", - "discussion_id": "1648007571", - "commented_code": "@@ -131,25 +133,28 @@ void yoloPostProcessing(\n // remove the second element\n outs.pop_back();\n // unsqueeze the first dimension\n- outs[0] = outs[0].reshape(0, std::vector{1, 8400, 84});\n+ outs[0] = outs[0].reshape(0, std::vector{1, 8400, nc + 4});", - "comment_created_at": "2024-06-26T09:36:20+00:00", - "comment_author": "dkurt", - "comment_body": "The corner case is a background class. Some training utils may include this as a separate class - some not. So even if you trained model for 80 real classes, there is a variation nc=80 nc=81 and it may confuse even more.", - "pr_file_module": null - }, - { - "comment_id": "1654779677", - "repo_full_name": "opencv/opencv", - "pr_number": 25794, - "pr_file": "samples/dnn/yolo_detector.cpp", - "discussion_id": "1648007571", - "commented_code": "@@ -131,25 +133,28 @@ void yoloPostProcessing(\n // remove the second element\n outs.pop_back();\n // unsqueeze the first dimension\n- outs[0] = outs[0].reshape(0, std::vector{1, 8400, 84});\n+ outs[0] = outs[0].reshape(0, std::vector{1, 8400, nc + 4});", - "comment_created_at": "2024-06-26T12:59:39+00:00", - "comment_author": "Abdurrahheem", - "comment_body": "Computing `nc` using number of anchor points is not good since number of anchors depens on the image size. Inference can happen on different image resolutons, depending on use case. ", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/pytorch-avoid-logic-duplication.json b/_reviewers/pytorch-avoid-logic-duplication.json new file mode 100644 index 0000000..c457397 --- /dev/null +++ b/_reviewers/pytorch-avoid-logic-duplication.json @@ -0,0 +1,104 @@ +[ + { + "discussion_id": "2119470927", + "pr_number": 154198, + "pr_file": ".ci/docker/common/install_conda.sh", + "created_at": "2025-06-01T19:15:56+00:00", + "commented_code": "# Install some other packages, including those needed for Python test reporting\n pip_install -r /opt/conda/requirements-ci.txt\n\n # Install PyTorch mkl deps, as per https://github.com/pytorch/pytorch README\n if [[ $(uname -m) != \"aarch64\" ]]; then\n pip_install mkl-static==2024.2.0 mkl-include==2024.2.0", + "repo_full_name": "pytorch/pytorch", + "discussion_comments": [ + { + "comment_id": "2119470927", + "repo_full_name": "pytorch/pytorch", + "pr_number": 154198, + "pr_file": ".ci/docker/common/install_conda.sh", + "discussion_id": "2119470927", + "commented_code": "@@ -85,6 +83,11 @@ if [ -n \"$ANACONDA_PYTHON_VERSION\" ]; then\n # Install some other packages, including those needed for Python test reporting\n pip_install -r /opt/conda/requirements-ci.txt\n \n+ # Install PyTorch mkl deps, as per https://github.com/pytorch/pytorch README\n+ if [[ $(uname -m) != \"aarch64\" ]]; then\n+ pip_install mkl-static==2024.2.0 mkl-include==2024.2.0", + "comment_created_at": "2025-06-01T19:15:56+00:00", + "comment_author": "Skylion007", + "comment_body": "This logic is duplicated here right?: https://github.com/pytorch/pytorch/blob/c2e91157575a6064e19f2701e85af4035d33a23a/.ci/docker/common/install_mkl.sh#L5\r\n\r\nAlso, does mkl-static reallly distribute static libs? Won't that cause toolchain issues, especially if we want to enable Link Time Optimization on PyTorch at some point?", + "pr_file_module": null + }, + { + "comment_id": "2120298162", + "repo_full_name": "pytorch/pytorch", + "pr_number": 154198, + "pr_file": ".ci/docker/common/install_conda.sh", + "discussion_id": "2119470927", + "commented_code": "@@ -85,6 +83,11 @@ if [ -n \"$ANACONDA_PYTHON_VERSION\" ]; then\n # Install some other packages, including those needed for Python test reporting\n pip_install -r /opt/conda/requirements-ci.txt\n \n+ # Install PyTorch mkl deps, as per https://github.com/pytorch/pytorch README\n+ if [[ $(uname -m) != \"aarch64\" ]]; then\n+ pip_install mkl-static==2024.2.0 mkl-include==2024.2.0", + "comment_created_at": "2025-06-02T07:30:48+00:00", + "comment_author": "CaoE", + "comment_body": "Thanks for your comments. I don't know the relationship between `pytorch/.ci/docker/common/install_mkl.sh` and the MKL version of the CI environment. Does `pytorch/.ci/docker/common/install_mkl.sh` selects the MKL version for the pytorch releases ? I'm not sure whether such changes will cause the issues you mentioned. I tried to update MKL according to https://github.com/pytorch/pytorch README. Could you please give me some advice to upgrade MKL in CI? Thanks.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2176199626", + "pr_number": 157298, + "pr_file": ".ci/docker/common/install_executorch.sh", + "created_at": "2025-07-01T00:56:46+00:00", + "commented_code": "pushd executorch\n\n export PYTHON_EXECUTABLE=python\n export CMAKE_ARGS=\"-DEXECUTORCH_BUILD_PYBIND=ON -DEXECUTORCH_BUILD_XNNPACK=ON -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON\"\n export CMAKE_ARGS=\"-DEXECUTORCH_BUILD_PYBIND=ON -DEXECUTORCH_BUILD_XNNPACK=ON -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON -DEXECUTORCH_BUILD_TESTS=ON\"", + "repo_full_name": "pytorch/pytorch", + "discussion_comments": [ + { + "comment_id": "2176199626", + "repo_full_name": "pytorch/pytorch", + "pr_number": 157298, + "pr_file": ".ci/docker/common/install_executorch.sh", + "discussion_id": "2176199626", + "commented_code": "@@ -50,7 +50,7 @@ setup_executorch() {\n pushd executorch\n \n export PYTHON_EXECUTABLE=python\n- export CMAKE_ARGS=\"-DEXECUTORCH_BUILD_PYBIND=ON -DEXECUTORCH_BUILD_XNNPACK=ON -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON\"\n+ export CMAKE_ARGS=\"-DEXECUTORCH_BUILD_PYBIND=ON -DEXECUTORCH_BUILD_XNNPACK=ON -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON -DEXECUTORCH_BUILD_TESTS=ON\"", + "comment_created_at": "2025-07-01T00:56:46+00:00", + "comment_author": "huydhn", + "comment_body": "A follow-up AI here I think is to create a short script on ExecuTorch CI to do this step https://github.com/pytorch/executorch/blob/main/.ci/scripts/unittest-linux.sh#L20-L26 so that we could just call it here instead of copying the logic over, which could get out of sync when ET is updated", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2109566628", + "pr_number": 148419, + "pr_file": ".ci/docker/common/install_cache.sh", + "created_at": "2025-05-27T15:49:12+00:00", + "commented_code": "set -ex\n\ninstall_ubuntu() {\n echo \"Preparing to build sccache from source\"\nSCCACHE_VERSION=\"0.9.1\"\n\nCARGO_FLAGS=\"\"\n\ninstall_prereqs_ubuntu() {\n apt-get update\n # libssl-dev will not work as it is upgraded to libssl3 in Ubuntu-22.04.\n # Instead use lib and headers from OpenSSL1.1 installed in `install_openssl.sh``\n apt-get install -y cargo\n echo \"Checking out sccache repo\"\n git clone https://github.com/mozilla/sccache -b v0.9.1\n cd sccache\n echo \"Building sccache\"\n cargo build --release\n cp target/release/sccache /opt/cache/bin\n echo \"Cleaning up\"\n cd ..\n rm -rf sccache\n apt-get remove -y cargo rustc\n apt-get autoclean && apt-get clean\n\n # cleanup after ourselves\n trap 'cleanup_ubuntu' EXIT", + "repo_full_name": "pytorch/pytorch", + "discussion_comments": [ + { + "comment_id": "2109566628", + "repo_full_name": "pytorch/pytorch", + "pr_number": 148419, + "pr_file": ".ci/docker/common/install_cache.sh", + "discussion_id": "2109566628", + "commented_code": "@@ -2,29 +2,45 @@\n \n set -ex\n \n-install_ubuntu() {\n- echo \"Preparing to build sccache from source\"\n+SCCACHE_VERSION=\"0.9.1\"\n+\n+CARGO_FLAGS=\"\"\n+\n+install_prereqs_ubuntu() {\n apt-get update\n # libssl-dev will not work as it is upgraded to libssl3 in Ubuntu-22.04.\n # Instead use lib and headers from OpenSSL1.1 installed in `install_openssl.sh``\n apt-get install -y cargo\n- echo \"Checking out sccache repo\"\n- git clone https://github.com/mozilla/sccache -b v0.9.1\n- cd sccache\n- echo \"Building sccache\"\n- cargo build --release\n- cp target/release/sccache /opt/cache/bin\n- echo \"Cleaning up\"\n- cd ..\n- rm -rf sccache\n- apt-get remove -y cargo rustc\n- apt-get autoclean && apt-get clean\n+\n+ # cleanup after ourselves\n+ trap 'cleanup_ubuntu' EXIT", + "comment_created_at": "2025-05-27T15:49:12+00:00", + "comment_author": "ZainRizvi", + "comment_body": "Don't you want to do a similar cleanup for almalinux?", + "pr_file_module": null + }, + { + "comment_id": "2110614106", + "repo_full_name": "pytorch/pytorch", + "pr_number": 148419, + "pr_file": ".ci/docker/common/install_cache.sh", + "discussion_id": "2109566628", + "commented_code": "@@ -2,29 +2,45 @@\n \n set -ex\n \n-install_ubuntu() {\n- echo \"Preparing to build sccache from source\"\n+SCCACHE_VERSION=\"0.9.1\"\n+\n+CARGO_FLAGS=\"\"\n+\n+install_prereqs_ubuntu() {\n apt-get update\n # libssl-dev will not work as it is upgraded to libssl3 in Ubuntu-22.04.\n # Instead use lib and headers from OpenSSL1.1 installed in `install_openssl.sh``\n apt-get install -y cargo\n- echo \"Checking out sccache repo\"\n- git clone https://github.com/mozilla/sccache -b v0.9.1\n- cd sccache\n- echo \"Building sccache\"\n- cargo build --release\n- cp target/release/sccache /opt/cache/bin\n- echo \"Cleaning up\"\n- cd ..\n- rm -rf sccache\n- apt-get remove -y cargo rustc\n- apt-get autoclean && apt-get clean\n+\n+ # cleanup after ourselves\n+ trap 'cleanup_ubuntu' EXIT", + "comment_created_at": "2025-05-28T00:43:52+00:00", + "comment_author": "seemethere", + "comment_body": "No because we do the build in a multi-stage build meaning it'll automatically get cleaned up!", + "pr_file_module": null + }, + { + "comment_id": "2116890998", + "repo_full_name": "pytorch/pytorch", + "pr_number": 148419, + "pr_file": ".ci/docker/common/install_cache.sh", + "discussion_id": "2109566628", + "commented_code": "@@ -2,29 +2,45 @@\n \n set -ex\n \n-install_ubuntu() {\n- echo \"Preparing to build sccache from source\"\n+SCCACHE_VERSION=\"0.9.1\"\n+\n+CARGO_FLAGS=\"\"\n+\n+install_prereqs_ubuntu() {\n apt-get update\n # libssl-dev will not work as it is upgraded to libssl3 in Ubuntu-22.04.\n # Instead use lib and headers from OpenSSL1.1 installed in `install_openssl.sh``\n apt-get install -y cargo\n- echo \"Checking out sccache repo\"\n- git clone https://github.com/mozilla/sccache -b v0.9.1\n- cd sccache\n- echo \"Building sccache\"\n- cargo build --release\n- cp target/release/sccache /opt/cache/bin\n- echo \"Cleaning up\"\n- cd ..\n- rm -rf sccache\n- apt-get remove -y cargo rustc\n- apt-get autoclean && apt-get clean\n+\n+ # cleanup after ourselves\n+ trap 'cleanup_ubuntu' EXIT", + "comment_created_at": "2025-05-31T00:16:04+00:00", + "comment_author": "ZainRizvi", + "comment_body": "Why does one use a multi-stage build but the other doesn't? Is that tech debt or does it need to be done that way?", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/pytorch-avoid-logic-duplication.md b/_reviewers/pytorch-avoid-logic-duplication.md index 30f5615..1236b7b 100644 --- a/_reviewers/pytorch-avoid-logic-duplication.md +++ b/_reviewers/pytorch-avoid-logic-duplication.md @@ -25,109 +25,3 @@ setup_executorch_build() { ``` This approach is particularly important for installation routines, environment setup, and build configurations across Docker containers. When logic changes are needed, you only need to update in one place. Additionally, clearly document any intentional deviations or environment-specific adjustments to help reviewers understand why certain scripts may differ from the standard pattern. - - -[ - { - "discussion_id": "2119470927", - "pr_number": 154198, - "pr_file": ".ci/docker/common/install_conda.sh", - "created_at": "2025-06-01T19:15:56+00:00", - "commented_code": "# Install some other packages, including those needed for Python test reporting\n pip_install -r /opt/conda/requirements-ci.txt\n\n # Install PyTorch mkl deps, as per https://github.com/pytorch/pytorch README\n if [[ $(uname -m) != \"aarch64\" ]]; then\n pip_install mkl-static==2024.2.0 mkl-include==2024.2.0", - "repo_full_name": "pytorch/pytorch", - "discussion_comments": [ - { - "comment_id": "2119470927", - "repo_full_name": "pytorch/pytorch", - "pr_number": 154198, - "pr_file": ".ci/docker/common/install_conda.sh", - "discussion_id": "2119470927", - "commented_code": "@@ -85,6 +83,11 @@ if [ -n \"$ANACONDA_PYTHON_VERSION\" ]; then\n # Install some other packages, including those needed for Python test reporting\n pip_install -r /opt/conda/requirements-ci.txt\n \n+ # Install PyTorch mkl deps, as per https://github.com/pytorch/pytorch README\n+ if [[ $(uname -m) != \"aarch64\" ]]; then\n+ pip_install mkl-static==2024.2.0 mkl-include==2024.2.0", - "comment_created_at": "2025-06-01T19:15:56+00:00", - "comment_author": "Skylion007", - "comment_body": "This logic is duplicated here right?: https://github.com/pytorch/pytorch/blob/c2e91157575a6064e19f2701e85af4035d33a23a/.ci/docker/common/install_mkl.sh#L5\r\n\r\nAlso, does mkl-static reallly distribute static libs? Won't that cause toolchain issues, especially if we want to enable Link Time Optimization on PyTorch at some point?", - "pr_file_module": null - }, - { - "comment_id": "2120298162", - "repo_full_name": "pytorch/pytorch", - "pr_number": 154198, - "pr_file": ".ci/docker/common/install_conda.sh", - "discussion_id": "2119470927", - "commented_code": "@@ -85,6 +83,11 @@ if [ -n \"$ANACONDA_PYTHON_VERSION\" ]; then\n # Install some other packages, including those needed for Python test reporting\n pip_install -r /opt/conda/requirements-ci.txt\n \n+ # Install PyTorch mkl deps, as per https://github.com/pytorch/pytorch README\n+ if [[ $(uname -m) != \"aarch64\" ]]; then\n+ pip_install mkl-static==2024.2.0 mkl-include==2024.2.0", - "comment_created_at": "2025-06-02T07:30:48+00:00", - "comment_author": "CaoE", - "comment_body": "Thanks for your comments. I don't know the relationship between `pytorch/.ci/docker/common/install_mkl.sh` and the MKL version of the CI environment. Does `pytorch/.ci/docker/common/install_mkl.sh` selects the MKL version for the pytorch releases ? I'm not sure whether such changes will cause the issues you mentioned. I tried to update MKL according to https://github.com/pytorch/pytorch README. Could you please give me some advice to upgrade MKL in CI? Thanks.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2176199626", - "pr_number": 157298, - "pr_file": ".ci/docker/common/install_executorch.sh", - "created_at": "2025-07-01T00:56:46+00:00", - "commented_code": "pushd executorch\n\n export PYTHON_EXECUTABLE=python\n export CMAKE_ARGS=\"-DEXECUTORCH_BUILD_PYBIND=ON -DEXECUTORCH_BUILD_XNNPACK=ON -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON\"\n export CMAKE_ARGS=\"-DEXECUTORCH_BUILD_PYBIND=ON -DEXECUTORCH_BUILD_XNNPACK=ON -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON -DEXECUTORCH_BUILD_TESTS=ON\"", - "repo_full_name": "pytorch/pytorch", - "discussion_comments": [ - { - "comment_id": "2176199626", - "repo_full_name": "pytorch/pytorch", - "pr_number": 157298, - "pr_file": ".ci/docker/common/install_executorch.sh", - "discussion_id": "2176199626", - "commented_code": "@@ -50,7 +50,7 @@ setup_executorch() {\n pushd executorch\n \n export PYTHON_EXECUTABLE=python\n- export CMAKE_ARGS=\"-DEXECUTORCH_BUILD_PYBIND=ON -DEXECUTORCH_BUILD_XNNPACK=ON -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON\"\n+ export CMAKE_ARGS=\"-DEXECUTORCH_BUILD_PYBIND=ON -DEXECUTORCH_BUILD_XNNPACK=ON -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON -DEXECUTORCH_BUILD_TESTS=ON\"", - "comment_created_at": "2025-07-01T00:56:46+00:00", - "comment_author": "huydhn", - "comment_body": "A follow-up AI here I think is to create a short script on ExecuTorch CI to do this step https://github.com/pytorch/executorch/blob/main/.ci/scripts/unittest-linux.sh#L20-L26 so that we could just call it here instead of copying the logic over, which could get out of sync when ET is updated", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2109566628", - "pr_number": 148419, - "pr_file": ".ci/docker/common/install_cache.sh", - "created_at": "2025-05-27T15:49:12+00:00", - "commented_code": "set -ex\n\ninstall_ubuntu() {\n echo \"Preparing to build sccache from source\"\nSCCACHE_VERSION=\"0.9.1\"\n\nCARGO_FLAGS=\"\"\n\ninstall_prereqs_ubuntu() {\n apt-get update\n # libssl-dev will not work as it is upgraded to libssl3 in Ubuntu-22.04.\n # Instead use lib and headers from OpenSSL1.1 installed in `install_openssl.sh``\n apt-get install -y cargo\n echo \"Checking out sccache repo\"\n git clone https://github.com/mozilla/sccache -b v0.9.1\n cd sccache\n echo \"Building sccache\"\n cargo build --release\n cp target/release/sccache /opt/cache/bin\n echo \"Cleaning up\"\n cd ..\n rm -rf sccache\n apt-get remove -y cargo rustc\n apt-get autoclean && apt-get clean\n\n # cleanup after ourselves\n trap 'cleanup_ubuntu' EXIT", - "repo_full_name": "pytorch/pytorch", - "discussion_comments": [ - { - "comment_id": "2109566628", - "repo_full_name": "pytorch/pytorch", - "pr_number": 148419, - "pr_file": ".ci/docker/common/install_cache.sh", - "discussion_id": "2109566628", - "commented_code": "@@ -2,29 +2,45 @@\n \n set -ex\n \n-install_ubuntu() {\n- echo \"Preparing to build sccache from source\"\n+SCCACHE_VERSION=\"0.9.1\"\n+\n+CARGO_FLAGS=\"\"\n+\n+install_prereqs_ubuntu() {\n apt-get update\n # libssl-dev will not work as it is upgraded to libssl3 in Ubuntu-22.04.\n # Instead use lib and headers from OpenSSL1.1 installed in `install_openssl.sh``\n apt-get install -y cargo\n- echo \"Checking out sccache repo\"\n- git clone https://github.com/mozilla/sccache -b v0.9.1\n- cd sccache\n- echo \"Building sccache\"\n- cargo build --release\n- cp target/release/sccache /opt/cache/bin\n- echo \"Cleaning up\"\n- cd ..\n- rm -rf sccache\n- apt-get remove -y cargo rustc\n- apt-get autoclean && apt-get clean\n+\n+ # cleanup after ourselves\n+ trap 'cleanup_ubuntu' EXIT", - "comment_created_at": "2025-05-27T15:49:12+00:00", - "comment_author": "ZainRizvi", - "comment_body": "Don't you want to do a similar cleanup for almalinux?", - "pr_file_module": null - }, - { - "comment_id": "2110614106", - "repo_full_name": "pytorch/pytorch", - "pr_number": 148419, - "pr_file": ".ci/docker/common/install_cache.sh", - "discussion_id": "2109566628", - "commented_code": "@@ -2,29 +2,45 @@\n \n set -ex\n \n-install_ubuntu() {\n- echo \"Preparing to build sccache from source\"\n+SCCACHE_VERSION=\"0.9.1\"\n+\n+CARGO_FLAGS=\"\"\n+\n+install_prereqs_ubuntu() {\n apt-get update\n # libssl-dev will not work as it is upgraded to libssl3 in Ubuntu-22.04.\n # Instead use lib and headers from OpenSSL1.1 installed in `install_openssl.sh``\n apt-get install -y cargo\n- echo \"Checking out sccache repo\"\n- git clone https://github.com/mozilla/sccache -b v0.9.1\n- cd sccache\n- echo \"Building sccache\"\n- cargo build --release\n- cp target/release/sccache /opt/cache/bin\n- echo \"Cleaning up\"\n- cd ..\n- rm -rf sccache\n- apt-get remove -y cargo rustc\n- apt-get autoclean && apt-get clean\n+\n+ # cleanup after ourselves\n+ trap 'cleanup_ubuntu' EXIT", - "comment_created_at": "2025-05-28T00:43:52+00:00", - "comment_author": "seemethere", - "comment_body": "No because we do the build in a multi-stage build meaning it'll automatically get cleaned up!", - "pr_file_module": null - }, - { - "comment_id": "2116890998", - "repo_full_name": "pytorch/pytorch", - "pr_number": 148419, - "pr_file": ".ci/docker/common/install_cache.sh", - "discussion_id": "2109566628", - "commented_code": "@@ -2,29 +2,45 @@\n \n set -ex\n \n-install_ubuntu() {\n- echo \"Preparing to build sccache from source\"\n+SCCACHE_VERSION=\"0.9.1\"\n+\n+CARGO_FLAGS=\"\"\n+\n+install_prereqs_ubuntu() {\n apt-get update\n # libssl-dev will not work as it is upgraded to libssl3 in Ubuntu-22.04.\n # Instead use lib and headers from OpenSSL1.1 installed in `install_openssl.sh``\n apt-get install -y cargo\n- echo \"Checking out sccache repo\"\n- git clone https://github.com/mozilla/sccache -b v0.9.1\n- cd sccache\n- echo \"Building sccache\"\n- cargo build --release\n- cp target/release/sccache /opt/cache/bin\n- echo \"Cleaning up\"\n- cd ..\n- rm -rf sccache\n- apt-get remove -y cargo rustc\n- apt-get autoclean && apt-get clean\n+\n+ # cleanup after ourselves\n+ trap 'cleanup_ubuntu' EXIT", - "comment_created_at": "2025-05-31T00:16:04+00:00", - "comment_author": "ZainRizvi", - "comment_body": "Why does one use a multi-stage build but the other doesn't? Is that tech debt or does it need to be done that way?", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/pytorch-check-before-access.json b/_reviewers/pytorch-check-before-access.json new file mode 100644 index 0000000..d0afffd --- /dev/null +++ b/_reviewers/pytorch-check-before-access.json @@ -0,0 +1,288 @@ +[ + { + "discussion_id": "2166162800", + "pr_number": 156796, + "pr_file": "test/distributed/checkpoint/test_state_dict.py", + "created_at": "2025-06-25T08:43:36+00:00", + "commented_code": "from torch.utils._pytree import tree_all, tree_all_only\n\n\ndevice_type = torch.accelerator.current_accelerator().type", + "repo_full_name": "pytorch/pytorch", + "discussion_comments": [ + { + "comment_id": "2166162800", + "repo_full_name": "pytorch/pytorch", + "pr_number": 156796, + "pr_file": "test/distributed/checkpoint/test_state_dict.py", + "discussion_id": "2166162800", + "commented_code": "@@ -62,6 +62,9 @@\n from torch.utils._pytree import tree_all, tree_all_only\n \n \n+device_type = torch.accelerator.current_accelerator().type", + "comment_created_at": "2025-06-25T08:43:36+00:00", + "comment_author": "zeshengzong", + "comment_body": "Hi, `torch.accelerator.current_accelerator()` will return `None` when accelerator not detected, is there a check before running these tests? Thanks!", + "pr_file_module": null + }, + { + "comment_id": "2167354355", + "repo_full_name": "pytorch/pytorch", + "pr_number": 156796, + "pr_file": "test/distributed/checkpoint/test_state_dict.py", + "discussion_id": "2166162800", + "commented_code": "@@ -62,6 +62,9 @@\n from torch.utils._pytree import tree_all, tree_all_only\n \n \n+device_type = torch.accelerator.current_accelerator().type", + "comment_created_at": "2025-06-25T18:31:47+00:00", + "comment_author": "harikodali", + "comment_body": "thanks for spotting, it's an oversight, changed it to use get_devtype from common_fsdp instead", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2170357623", + "pr_number": 156689, + "pr_file": "torch/_subclasses/fake_tensor.py", + "created_at": "2025-06-27T00:34:32+00:00", + "commented_code": "nonlocal flat_arg_fake_tensors\n if not self.is_our_fake(x):\n if torch.Tag.inplace_view in func.tags:\n if hasattr(func, \"tags\") and torch.Tag.inplace_view in func.tags:", + "repo_full_name": "pytorch/pytorch", + "discussion_comments": [ + { + "comment_id": "2170357623", + "repo_full_name": "pytorch/pytorch", + "pr_number": 156689, + "pr_file": "torch/_subclasses/fake_tensor.py", + "discussion_id": "2170357623", + "commented_code": "@@ -2774,7 +2774,7 @@ def validate(x: T) -> Union[T, FakeTensor]:\n \n nonlocal flat_arg_fake_tensors\n if not self.is_our_fake(x):\n- if torch.Tag.inplace_view in func.tags:\n+ if hasattr(func, \"tags\") and torch.Tag.inplace_view in func.tags:", + "comment_created_at": "2025-06-27T00:34:32+00:00", + "comment_author": "leslie-fang-intel", + "comment_body": "The changes in this PR looks good to me. But\r\nwhen auditing the code, I noticed many instances where `func.tags` is used without checking whether `func` actually has a `tags` attribute. Do we need to review other parts of the code for similar issues or should we add the `tags` attribute to `HigherOrderOperator`? also cc @bdhirsh ", + "pr_file_module": null + }, + { + "comment_id": "2178804285", + "repo_full_name": "pytorch/pytorch", + "pr_number": 156689, + "pr_file": "torch/_subclasses/fake_tensor.py", + "discussion_id": "2170357623", + "commented_code": "@@ -2774,7 +2774,7 @@ def validate(x: T) -> Union[T, FakeTensor]:\n \n nonlocal flat_arg_fake_tensors\n if not self.is_our_fake(x):\n- if torch.Tag.inplace_view in func.tags:\n+ if hasattr(func, \"tags\") and torch.Tag.inplace_view in func.tags:", + "comment_created_at": "2025-07-02T01:27:13+00:00", + "comment_author": "Valentine233", + "comment_body": "Thanks, good point! We can discuss more in this thread, and do the follow-up in the future.\r\nBut I suppose we can firstly land this PR, as urgently required in v2.8 release. ", + "pr_file_module": null + }, + { + "comment_id": "2179394831", + "repo_full_name": "pytorch/pytorch", + "pr_number": 156689, + "pr_file": "torch/_subclasses/fake_tensor.py", + "discussion_id": "2170357623", + "commented_code": "@@ -2774,7 +2774,7 @@ def validate(x: T) -> Union[T, FakeTensor]:\n \n nonlocal flat_arg_fake_tensors\n if not self.is_our_fake(x):\n- if torch.Tag.inplace_view in func.tags:\n+ if hasattr(func, \"tags\") and torch.Tag.inplace_view in func.tags:", + "comment_created_at": "2025-07-02T08:01:11+00:00", + "comment_author": "leslie-fang-intel", + "comment_body": "> Thanks, good point! We can discuss more in this thread, and do the follow-up in the future. But I suppose we can firstly land this PR, as urgently required in v2.8 release.\r\n\r\nLooks good to me given the changes in this PR should be no side effect. Will you further work on for a formal solution?", + "pr_file_module": null + }, + { + "comment_id": "2179405142", + "repo_full_name": "pytorch/pytorch", + "pr_number": 156689, + "pr_file": "torch/_subclasses/fake_tensor.py", + "discussion_id": "2170357623", + "commented_code": "@@ -2774,7 +2774,7 @@ def validate(x: T) -> Union[T, FakeTensor]:\n \n nonlocal flat_arg_fake_tensors\n if not self.is_our_fake(x):\n- if torch.Tag.inplace_view in func.tags:\n+ if hasattr(func, \"tags\") and torch.Tag.inplace_view in func.tags:", + "comment_created_at": "2025-07-02T08:06:24+00:00", + "comment_author": "Valentine233", + "comment_body": "Maybe needs some inputs from @bdhirsh about why `HigherOrderOperator` does not have `tags` attribute. Is this part of design or should we add `tags` for `HigherOrderOperator`?", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2176304626", + "pr_number": 157317, + "pr_file": "torch/distributed/tensor/examples/comm_mode_features_example.py", + "created_at": "2025-07-01T02:44:09+00:00", + "commented_code": null, + "repo_full_name": "pytorch/pytorch", + "discussion_comments": [ + { + "comment_id": "2176304626", + "repo_full_name": "pytorch/pytorch", + "pr_number": 157317, + "pr_file": "torch/distributed/tensor/examples/comm_mode_features_example.py", + "discussion_id": "2176304626", + "commented_code": null, + "comment_created_at": "2025-07-01T02:44:09+00:00", + "comment_author": "githubsgi", + "comment_body": "This would fail as follows. \r\n\r\n```\r\n Error (MYPY) [union-attr]\r\n Item \"None\" of \"device | None\" has no attribute \"type\"\r\n\r\n 28 |\r\n 29 |def get_device_type() -> str:\r\n 30 | return (\r\n >>> 31 | torch.accelerator.current_accelerator().type\r\n 32 | if torch.accelerator.device_count() >= 4\r\n 33 | else \"cpu\"\r\n 34 | )\r\n\r\n+ echo ''\r\n```", + "pr_file_module": null + }, + { + "comment_id": "2176308770", + "repo_full_name": "pytorch/pytorch", + "pr_number": 157317, + "pr_file": "torch/distributed/tensor/examples/comm_mode_features_example.py", + "discussion_id": "2176304626", + "commented_code": null, + "comment_created_at": "2025-07-01T02:50:01+00:00", + "comment_author": "EikanWang", + "comment_body": "`current_accelerator` could return `None`.", + "pr_file_module": null + }, + { + "comment_id": "2176308958", + "repo_full_name": "pytorch/pytorch", + "pr_number": 157317, + "pr_file": "torch/distributed/tensor/examples/comm_mode_features_example.py", + "discussion_id": "2176304626", + "commented_code": null, + "comment_created_at": "2025-07-01T02:50:21+00:00", + "comment_author": "EikanWang", + "comment_body": "The code needs to be polished a little bit.", + "pr_file_module": null + }, + { + "comment_id": "2176368205", + "repo_full_name": "pytorch/pytorch", + "pr_number": 157317, + "pr_file": "torch/distributed/tensor/examples/comm_mode_features_example.py", + "discussion_id": "2176304626", + "commented_code": null, + "comment_created_at": "2025-07-01T04:13:22+00:00", + "comment_author": "guangyey", + "comment_body": "If `torch.accelerator.device_count() >= 4` is `True`, `torch.accelerator.current_accelerator()` must not return `None`\r\nI think it is OK to refine it or ignore the linter `# type: ignore[union-attr]`", + "pr_file_module": null + }, + { + "comment_id": "2176547066", + "repo_full_name": "pytorch/pytorch", + "pr_number": 157317, + "pr_file": "torch/distributed/tensor/examples/comm_mode_features_example.py", + "discussion_id": "2176304626", + "commented_code": null, + "comment_created_at": "2025-07-01T06:35:54+00:00", + "comment_author": "githubsgi", + "comment_body": "@guangyey , I think it is better not to change the original logic. ", + "pr_file_module": null + }, + { + "comment_id": "2176553104", + "repo_full_name": "pytorch/pytorch", + "pr_number": 157317, + "pr_file": "torch/distributed/tensor/examples/comm_mode_features_example.py", + "discussion_id": "2176304626", + "commented_code": null, + "comment_created_at": "2025-07-01T06:39:11+00:00", + "comment_author": "guangyey", + "comment_body": "Sounds good.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2083884131", + "pr_number": 153213, + "pr_file": "torch/distributed/tensor/examples/comm_mode_features_example.py", + "created_at": "2025-05-12T06:08:05+00:00", + "commented_code": "def __init__(self, world_size: int, rank: int) -> None:\n self.world_size = world_size\n self.rank = rank\n self.device_type = get_device_type()\n self.device_type = torch.accelerator.current_accelerator().type", + "repo_full_name": "pytorch/pytorch", + "discussion_comments": [ + { + "comment_id": "2083884131", + "repo_full_name": "pytorch/pytorch", + "pr_number": 153213, + "pr_file": "torch/distributed/tensor/examples/comm_mode_features_example.py", + "discussion_id": "2083884131", + "commented_code": "@@ -49,7 +41,7 @@ class CommDebugModeExample:\n def __init__(self, world_size: int, rank: int) -> None:\n self.world_size = world_size\n self.rank = rank\n- self.device_type = get_device_type()\n+ self.device_type = torch.accelerator.current_accelerator().type", + "comment_created_at": "2025-05-12T06:08:05+00:00", + "comment_author": "EikanWang", + "comment_body": "`torch.accelerator.current_accelerator()` may return `None`. Pls. handle the `None` case.", + "pr_file_module": null + }, + { + "comment_id": "2083885848", + "repo_full_name": "pytorch/pytorch", + "pr_number": 153213, + "pr_file": "torch/distributed/tensor/examples/comm_mode_features_example.py", + "discussion_id": "2083884131", + "commented_code": "@@ -49,7 +41,7 @@ class CommDebugModeExample:\n def __init__(self, world_size: int, rank: int) -> None:\n self.world_size = world_size\n self.rank = rank\n- self.device_type = get_device_type()\n+ self.device_type = torch.accelerator.current_accelerator().type", + "comment_created_at": "2025-05-12T06:09:57+00:00", + "comment_author": "EikanWang", + "comment_body": "`get_device_type` requires the device count to be `>=4`. The current logic missed the check.", + "pr_file_module": null + }, + { + "comment_id": "2083888453", + "repo_full_name": "pytorch/pytorch", + "pr_number": 153213, + "pr_file": "torch/distributed/tensor/examples/comm_mode_features_example.py", + "discussion_id": "2083884131", + "commented_code": "@@ -49,7 +41,7 @@ class CommDebugModeExample:\n def __init__(self, world_size: int, rank: int) -> None:\n self.world_size = world_size\n self.rank = rank\n- self.device_type = get_device_type()\n+ self.device_type = torch.accelerator.current_accelerator().type", + "comment_created_at": "2025-05-12T06:10:42+00:00", + "comment_author": "EikanWang", + "comment_body": "`torch.accelerator.current_accelerator()` does not contain `cpu` devices. Pls add the `cpu` support.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2023983308", + "pr_number": 137846, + "pr_file": "torch/utils/collect_env.py", + "created_at": "2025-04-02T02:46:51+00:00", + "commented_code": "return smi\n\n\ndef get_intel_gpu_onboard(run_lambda):\n lst = []\n platform = get_platform()\n if platform == \"linux\":\n txt = run_and_read_all(run_lambda, \"xpu-smi discovery -j\")\n if txt:\n try:\n obj = json.loads(txt)\n if type(obj[\"device_list\"]) is list:\n for o in obj[\"device_list\"]:\n lst.append(f'* {o[\"device_name\"]}')", + "repo_full_name": "pytorch/pytorch", + "discussion_comments": [ + { + "comment_id": "2023983308", + "repo_full_name": "pytorch/pytorch", + "pr_number": 137846, + "pr_file": "torch/utils/collect_env.py", + "discussion_id": "2023983308", + "commented_code": "@@ -243,6 +247,56 @@ def get_nvidia_smi():\n return smi\n \n \n+def get_intel_gpu_onboard(run_lambda):\n+ lst = []\n+ platform = get_platform()\n+ if platform == \"linux\":\n+ txt = run_and_read_all(run_lambda, \"xpu-smi discovery -j\")\n+ if txt:\n+ try:\n+ obj = json.loads(txt)\n+ if type(obj[\"device_list\"]) is list:\n+ for o in obj[\"device_list\"]:\n+ lst.append(f'* {o[\"device_name\"]}')", + "comment_created_at": "2025-04-02T02:46:51+00:00", + "comment_author": "guangyey", + "comment_body": "```suggestion\r\n device_list = obj.get(\"device_list\", [])\r\n if isinstance(device_list, list) and device_list:\r\n lst.extend(f'* {o[\"device_name\"]}' for device in device_list)\r\n else:\r\n lst.append(\"N/A\")\r\n```\r\nAvoid obj[\"device_list\"] is empty and check it before iteration.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2023996133", + "pr_number": 137846, + "pr_file": "torch/utils/collect_env.py", + "created_at": "2025-04-02T02:59:16+00:00", + "commented_code": "return smi\n\n\ndef get_intel_gpu_onboard(run_lambda):\n lst = []\n platform = get_platform()\n if platform == \"linux\":\n txt = run_and_read_all(run_lambda, \"xpu-smi discovery -j\")\n if txt:\n try:\n obj = json.loads(txt)\n if type(obj[\"device_list\"]) is list:\n for o in obj[\"device_list\"]:\n lst.append(f'* {o[\"device_name\"]}')\n else:\n lst.append(\"N/A\")\n except (ValueError, TypeError) as e:\n lst.append(str(e))\n else:\n lst.append(\"N/A\")\n if platform == \"win32\" or platform == \"cygwin\":\n txt = run_and_read_all(\n run_lambda,\n 'powershell.exe \"gwmi -Class Win32_PnpSignedDriver | where{$_.DeviceClass -eq \\\\\"DISPLAY\\\\\"\\\n -and $_.Manufacturer -match \\\\\"Intel\\\\\"} | Select-Object -Property DeviceName | ConvertTo-Json\"',\n )\n try:\n obj = json.loads(txt)\n if type(obj) is list:\n for o in obj:\n lst.append(f'* {o[\"DeviceName\"]}')\n else:\n lst.append(f'* {obj[\"DeviceName\"]}')", + "repo_full_name": "pytorch/pytorch", + "discussion_comments": [ + { + "comment_id": "2023996133", + "repo_full_name": "pytorch/pytorch", + "pr_number": 137846, + "pr_file": "torch/utils/collect_env.py", + "discussion_id": "2023996133", + "commented_code": "@@ -243,6 +247,56 @@ def get_nvidia_smi():\n return smi\n \n \n+def get_intel_gpu_onboard(run_lambda):\n+ lst = []\n+ platform = get_platform()\n+ if platform == \"linux\":\n+ txt = run_and_read_all(run_lambda, \"xpu-smi discovery -j\")\n+ if txt:\n+ try:\n+ obj = json.loads(txt)\n+ if type(obj[\"device_list\"]) is list:\n+ for o in obj[\"device_list\"]:\n+ lst.append(f'* {o[\"device_name\"]}')\n+ else:\n+ lst.append(\"N/A\")\n+ except (ValueError, TypeError) as e:\n+ lst.append(str(e))\n+ else:\n+ lst.append(\"N/A\")\n+ if platform == \"win32\" or platform == \"cygwin\":\n+ txt = run_and_read_all(\n+ run_lambda,\n+ 'powershell.exe \"gwmi -Class Win32_PnpSignedDriver | where{$_.DeviceClass -eq \\\\\"DISPLAY\\\\\"\\\n+ -and $_.Manufacturer -match \\\\\"Intel\\\\\"} | Select-Object -Property DeviceName | ConvertTo-Json\"',\n+ )\n+ try:\n+ obj = json.loads(txt)\n+ if type(obj) is list:\n+ for o in obj:\n+ lst.append(f'* {o[\"DeviceName\"]}')\n+ else:\n+ lst.append(f'* {obj[\"DeviceName\"]}')", + "comment_created_at": "2025-04-02T02:59:16+00:00", + "comment_author": "guangyey", + "comment_body": "```suggestion\r\n lst.append(f'* {obj.get(\"DeviceName\", \"N/A\"}')\r\n```", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2159408147", + "pr_number": 156207, + "pr_file": "torch/distributed/checkpoint/state_dict_saver.py", + "created_at": "2025-06-20T17:16:56+00:00", + "commented_code": "\"A CPU backend must be enabled for async save; try initializing process group with 'cpu:gloo,cuda:nccl'\"\n )\n\n use_default_staging = False\n if storage_writer is None:\n use_default_staging = True\n\n storage_writer = cast(\n StorageWriter, _storage_setup(storage_writer, checkpoint_id, reader=False)\n )\n\n state_dict = _stateful_to_state_dict(state_dict)\n\n @_dcp_method_logger(log_exceptions=True)\n def stage_state_dict():\n if isinstance(storage_writer, AsyncStager):\n staged_state_dict = storage_writer.stage(state_dict)\n else: # provides bwc for storage_writers not implementing AsyncStager\n staged_state_dict = _create_cpu_state_dict(state_dict)\n _copy_state_dict(state_dict, staged_state_dict, type_check=False)\n\n return staged_state_dict\n\n staged_state_dict = stage_state_dict()\n\n executor: _AsyncCheckpointExecutor = (\n def stage_state_dict() -> Future[STATE_DICT_TYPE]:\n staging_executor = ThreadPoolExecutor(max_workers=1)\n if isinstance(storage_writer, AsyncStager) and not use_default_staging:", + "repo_full_name": "pytorch/pytorch", + "discussion_comments": [ + { + "comment_id": "2159408147", + "repo_full_name": "pytorch/pytorch", + "pr_number": 156207, + "pr_file": "torch/distributed/checkpoint/state_dict_saver.py", + "discussion_id": "2159408147", + "commented_code": "@@ -249,49 +305,70 @@ def async_save(\n \"A CPU backend must be enabled for async save; try initializing process group with 'cpu:gloo,cuda:nccl'\"\n )\n \n+ use_default_staging = False\n+ if storage_writer is None:\n+ use_default_staging = True\n+\n storage_writer = cast(\n StorageWriter, _storage_setup(storage_writer, checkpoint_id, reader=False)\n )\n \n state_dict = _stateful_to_state_dict(state_dict)\n \n @_dcp_method_logger(log_exceptions=True)\n- def stage_state_dict():\n- if isinstance(storage_writer, AsyncStager):\n- staged_state_dict = storage_writer.stage(state_dict)\n- else: # provides bwc for storage_writers not implementing AsyncStager\n- staged_state_dict = _create_cpu_state_dict(state_dict)\n- _copy_state_dict(state_dict, staged_state_dict, type_check=False)\n-\n- return staged_state_dict\n-\n- staged_state_dict = stage_state_dict()\n-\n- executor: _AsyncCheckpointExecutor = (\n+ def stage_state_dict() -> Future[STATE_DICT_TYPE]:\n+ staging_executor = ThreadPoolExecutor(max_workers=1)\n+ if isinstance(storage_writer, AsyncStager) and not use_default_staging:", + "comment_created_at": "2025-06-20T17:16:56+00:00", + "comment_author": "teja-rao", + "comment_body": "```suggestion\r\n if storage_writer is not None and isinstance(storage_writer, AsyncStager):\r\n```", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/pytorch-check-before-access.md b/_reviewers/pytorch-check-before-access.md index 63aacfe..7520896 100644 --- a/_reviewers/pytorch-check-before-access.md +++ b/_reviewers/pytorch-check-before-access.md @@ -63,293 +63,3 @@ if storage_writer is not None and isinstance(storage_writer, AsyncStager): ``` This defensive programming approach reduces unexpected crashes and improves code robustness. - - -[ - { - "discussion_id": "2166162800", - "pr_number": 156796, - "pr_file": "test/distributed/checkpoint/test_state_dict.py", - "created_at": "2025-06-25T08:43:36+00:00", - "commented_code": "from torch.utils._pytree import tree_all, tree_all_only\n\n\ndevice_type = torch.accelerator.current_accelerator().type", - "repo_full_name": "pytorch/pytorch", - "discussion_comments": [ - { - "comment_id": "2166162800", - "repo_full_name": "pytorch/pytorch", - "pr_number": 156796, - "pr_file": "test/distributed/checkpoint/test_state_dict.py", - "discussion_id": "2166162800", - "commented_code": "@@ -62,6 +62,9 @@\n from torch.utils._pytree import tree_all, tree_all_only\n \n \n+device_type = torch.accelerator.current_accelerator().type", - "comment_created_at": "2025-06-25T08:43:36+00:00", - "comment_author": "zeshengzong", - "comment_body": "Hi, `torch.accelerator.current_accelerator()` will return `None` when accelerator not detected, is there a check before running these tests? Thanks!", - "pr_file_module": null - }, - { - "comment_id": "2167354355", - "repo_full_name": "pytorch/pytorch", - "pr_number": 156796, - "pr_file": "test/distributed/checkpoint/test_state_dict.py", - "discussion_id": "2166162800", - "commented_code": "@@ -62,6 +62,9 @@\n from torch.utils._pytree import tree_all, tree_all_only\n \n \n+device_type = torch.accelerator.current_accelerator().type", - "comment_created_at": "2025-06-25T18:31:47+00:00", - "comment_author": "harikodali", - "comment_body": "thanks for spotting, it's an oversight, changed it to use get_devtype from common_fsdp instead", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2170357623", - "pr_number": 156689, - "pr_file": "torch/_subclasses/fake_tensor.py", - "created_at": "2025-06-27T00:34:32+00:00", - "commented_code": "nonlocal flat_arg_fake_tensors\n if not self.is_our_fake(x):\n if torch.Tag.inplace_view in func.tags:\n if hasattr(func, \"tags\") and torch.Tag.inplace_view in func.tags:", - "repo_full_name": "pytorch/pytorch", - "discussion_comments": [ - { - "comment_id": "2170357623", - "repo_full_name": "pytorch/pytorch", - "pr_number": 156689, - "pr_file": "torch/_subclasses/fake_tensor.py", - "discussion_id": "2170357623", - "commented_code": "@@ -2774,7 +2774,7 @@ def validate(x: T) -> Union[T, FakeTensor]:\n \n nonlocal flat_arg_fake_tensors\n if not self.is_our_fake(x):\n- if torch.Tag.inplace_view in func.tags:\n+ if hasattr(func, \"tags\") and torch.Tag.inplace_view in func.tags:", - "comment_created_at": "2025-06-27T00:34:32+00:00", - "comment_author": "leslie-fang-intel", - "comment_body": "The changes in this PR looks good to me. But\r\nwhen auditing the code, I noticed many instances where `func.tags` is used without checking whether `func` actually has a `tags` attribute. Do we need to review other parts of the code for similar issues or should we add the `tags` attribute to `HigherOrderOperator`? also cc @bdhirsh ", - "pr_file_module": null - }, - { - "comment_id": "2178804285", - "repo_full_name": "pytorch/pytorch", - "pr_number": 156689, - "pr_file": "torch/_subclasses/fake_tensor.py", - "discussion_id": "2170357623", - "commented_code": "@@ -2774,7 +2774,7 @@ def validate(x: T) -> Union[T, FakeTensor]:\n \n nonlocal flat_arg_fake_tensors\n if not self.is_our_fake(x):\n- if torch.Tag.inplace_view in func.tags:\n+ if hasattr(func, \"tags\") and torch.Tag.inplace_view in func.tags:", - "comment_created_at": "2025-07-02T01:27:13+00:00", - "comment_author": "Valentine233", - "comment_body": "Thanks, good point! We can discuss more in this thread, and do the follow-up in the future.\r\nBut I suppose we can firstly land this PR, as urgently required in v2.8 release. ", - "pr_file_module": null - }, - { - "comment_id": "2179394831", - "repo_full_name": "pytorch/pytorch", - "pr_number": 156689, - "pr_file": "torch/_subclasses/fake_tensor.py", - "discussion_id": "2170357623", - "commented_code": "@@ -2774,7 +2774,7 @@ def validate(x: T) -> Union[T, FakeTensor]:\n \n nonlocal flat_arg_fake_tensors\n if not self.is_our_fake(x):\n- if torch.Tag.inplace_view in func.tags:\n+ if hasattr(func, \"tags\") and torch.Tag.inplace_view in func.tags:", - "comment_created_at": "2025-07-02T08:01:11+00:00", - "comment_author": "leslie-fang-intel", - "comment_body": "> Thanks, good point! We can discuss more in this thread, and do the follow-up in the future. But I suppose we can firstly land this PR, as urgently required in v2.8 release.\r\n\r\nLooks good to me given the changes in this PR should be no side effect. Will you further work on for a formal solution?", - "pr_file_module": null - }, - { - "comment_id": "2179405142", - "repo_full_name": "pytorch/pytorch", - "pr_number": 156689, - "pr_file": "torch/_subclasses/fake_tensor.py", - "discussion_id": "2170357623", - "commented_code": "@@ -2774,7 +2774,7 @@ def validate(x: T) -> Union[T, FakeTensor]:\n \n nonlocal flat_arg_fake_tensors\n if not self.is_our_fake(x):\n- if torch.Tag.inplace_view in func.tags:\n+ if hasattr(func, \"tags\") and torch.Tag.inplace_view in func.tags:", - "comment_created_at": "2025-07-02T08:06:24+00:00", - "comment_author": "Valentine233", - "comment_body": "Maybe needs some inputs from @bdhirsh about why `HigherOrderOperator` does not have `tags` attribute. Is this part of design or should we add `tags` for `HigherOrderOperator`?", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2176304626", - "pr_number": 157317, - "pr_file": "torch/distributed/tensor/examples/comm_mode_features_example.py", - "created_at": "2025-07-01T02:44:09+00:00", - "commented_code": null, - "repo_full_name": "pytorch/pytorch", - "discussion_comments": [ - { - "comment_id": "2176304626", - "repo_full_name": "pytorch/pytorch", - "pr_number": 157317, - "pr_file": "torch/distributed/tensor/examples/comm_mode_features_example.py", - "discussion_id": "2176304626", - "commented_code": null, - "comment_created_at": "2025-07-01T02:44:09+00:00", - "comment_author": "githubsgi", - "comment_body": "This would fail as follows. \r\n\r\n```\r\n Error (MYPY) [union-attr]\r\n Item \"None\" of \"device | None\" has no attribute \"type\"\r\n\r\n 28 |\r\n 29 |def get_device_type() -> str:\r\n 30 | return (\r\n >>> 31 | torch.accelerator.current_accelerator().type\r\n 32 | if torch.accelerator.device_count() >= 4\r\n 33 | else \"cpu\"\r\n 34 | )\r\n\r\n+ echo ''\r\n```", - "pr_file_module": null - }, - { - "comment_id": "2176308770", - "repo_full_name": "pytorch/pytorch", - "pr_number": 157317, - "pr_file": "torch/distributed/tensor/examples/comm_mode_features_example.py", - "discussion_id": "2176304626", - "commented_code": null, - "comment_created_at": "2025-07-01T02:50:01+00:00", - "comment_author": "EikanWang", - "comment_body": "`current_accelerator` could return `None`.", - "pr_file_module": null - }, - { - "comment_id": "2176308958", - "repo_full_name": "pytorch/pytorch", - "pr_number": 157317, - "pr_file": "torch/distributed/tensor/examples/comm_mode_features_example.py", - "discussion_id": "2176304626", - "commented_code": null, - "comment_created_at": "2025-07-01T02:50:21+00:00", - "comment_author": "EikanWang", - "comment_body": "The code needs to be polished a little bit.", - "pr_file_module": null - }, - { - "comment_id": "2176368205", - "repo_full_name": "pytorch/pytorch", - "pr_number": 157317, - "pr_file": "torch/distributed/tensor/examples/comm_mode_features_example.py", - "discussion_id": "2176304626", - "commented_code": null, - "comment_created_at": "2025-07-01T04:13:22+00:00", - "comment_author": "guangyey", - "comment_body": "If `torch.accelerator.device_count() >= 4` is `True`, `torch.accelerator.current_accelerator()` must not return `None`\r\nI think it is OK to refine it or ignore the linter `# type: ignore[union-attr]`", - "pr_file_module": null - }, - { - "comment_id": "2176547066", - "repo_full_name": "pytorch/pytorch", - "pr_number": 157317, - "pr_file": "torch/distributed/tensor/examples/comm_mode_features_example.py", - "discussion_id": "2176304626", - "commented_code": null, - "comment_created_at": "2025-07-01T06:35:54+00:00", - "comment_author": "githubsgi", - "comment_body": "@guangyey , I think it is better not to change the original logic. ", - "pr_file_module": null - }, - { - "comment_id": "2176553104", - "repo_full_name": "pytorch/pytorch", - "pr_number": 157317, - "pr_file": "torch/distributed/tensor/examples/comm_mode_features_example.py", - "discussion_id": "2176304626", - "commented_code": null, - "comment_created_at": "2025-07-01T06:39:11+00:00", - "comment_author": "guangyey", - "comment_body": "Sounds good.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2083884131", - "pr_number": 153213, - "pr_file": "torch/distributed/tensor/examples/comm_mode_features_example.py", - "created_at": "2025-05-12T06:08:05+00:00", - "commented_code": "def __init__(self, world_size: int, rank: int) -> None:\n self.world_size = world_size\n self.rank = rank\n self.device_type = get_device_type()\n self.device_type = torch.accelerator.current_accelerator().type", - "repo_full_name": "pytorch/pytorch", - "discussion_comments": [ - { - "comment_id": "2083884131", - "repo_full_name": "pytorch/pytorch", - "pr_number": 153213, - "pr_file": "torch/distributed/tensor/examples/comm_mode_features_example.py", - "discussion_id": "2083884131", - "commented_code": "@@ -49,7 +41,7 @@ class CommDebugModeExample:\n def __init__(self, world_size: int, rank: int) -> None:\n self.world_size = world_size\n self.rank = rank\n- self.device_type = get_device_type()\n+ self.device_type = torch.accelerator.current_accelerator().type", - "comment_created_at": "2025-05-12T06:08:05+00:00", - "comment_author": "EikanWang", - "comment_body": "`torch.accelerator.current_accelerator()` may return `None`. Pls. handle the `None` case.", - "pr_file_module": null - }, - { - "comment_id": "2083885848", - "repo_full_name": "pytorch/pytorch", - "pr_number": 153213, - "pr_file": "torch/distributed/tensor/examples/comm_mode_features_example.py", - "discussion_id": "2083884131", - "commented_code": "@@ -49,7 +41,7 @@ class CommDebugModeExample:\n def __init__(self, world_size: int, rank: int) -> None:\n self.world_size = world_size\n self.rank = rank\n- self.device_type = get_device_type()\n+ self.device_type = torch.accelerator.current_accelerator().type", - "comment_created_at": "2025-05-12T06:09:57+00:00", - "comment_author": "EikanWang", - "comment_body": "`get_device_type` requires the device count to be `>=4`. The current logic missed the check.", - "pr_file_module": null - }, - { - "comment_id": "2083888453", - "repo_full_name": "pytorch/pytorch", - "pr_number": 153213, - "pr_file": "torch/distributed/tensor/examples/comm_mode_features_example.py", - "discussion_id": "2083884131", - "commented_code": "@@ -49,7 +41,7 @@ class CommDebugModeExample:\n def __init__(self, world_size: int, rank: int) -> None:\n self.world_size = world_size\n self.rank = rank\n- self.device_type = get_device_type()\n+ self.device_type = torch.accelerator.current_accelerator().type", - "comment_created_at": "2025-05-12T06:10:42+00:00", - "comment_author": "EikanWang", - "comment_body": "`torch.accelerator.current_accelerator()` does not contain `cpu` devices. Pls add the `cpu` support.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2023983308", - "pr_number": 137846, - "pr_file": "torch/utils/collect_env.py", - "created_at": "2025-04-02T02:46:51+00:00", - "commented_code": "return smi\n\n\ndef get_intel_gpu_onboard(run_lambda):\n lst = []\n platform = get_platform()\n if platform == \"linux\":\n txt = run_and_read_all(run_lambda, \"xpu-smi discovery -j\")\n if txt:\n try:\n obj = json.loads(txt)\n if type(obj[\"device_list\"]) is list:\n for o in obj[\"device_list\"]:\n lst.append(f'* {o[\"device_name\"]}')", - "repo_full_name": "pytorch/pytorch", - "discussion_comments": [ - { - "comment_id": "2023983308", - "repo_full_name": "pytorch/pytorch", - "pr_number": 137846, - "pr_file": "torch/utils/collect_env.py", - "discussion_id": "2023983308", - "commented_code": "@@ -243,6 +247,56 @@ def get_nvidia_smi():\n return smi\n \n \n+def get_intel_gpu_onboard(run_lambda):\n+ lst = []\n+ platform = get_platform()\n+ if platform == \"linux\":\n+ txt = run_and_read_all(run_lambda, \"xpu-smi discovery -j\")\n+ if txt:\n+ try:\n+ obj = json.loads(txt)\n+ if type(obj[\"device_list\"]) is list:\n+ for o in obj[\"device_list\"]:\n+ lst.append(f'* {o[\"device_name\"]}')", - "comment_created_at": "2025-04-02T02:46:51+00:00", - "comment_author": "guangyey", - "comment_body": "```suggestion\r\n device_list = obj.get(\"device_list\", [])\r\n if isinstance(device_list, list) and device_list:\r\n lst.extend(f'* {o[\"device_name\"]}' for device in device_list)\r\n else:\r\n lst.append(\"N/A\")\r\n```\r\nAvoid obj[\"device_list\"] is empty and check it before iteration.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2023996133", - "pr_number": 137846, - "pr_file": "torch/utils/collect_env.py", - "created_at": "2025-04-02T02:59:16+00:00", - "commented_code": "return smi\n\n\ndef get_intel_gpu_onboard(run_lambda):\n lst = []\n platform = get_platform()\n if platform == \"linux\":\n txt = run_and_read_all(run_lambda, \"xpu-smi discovery -j\")\n if txt:\n try:\n obj = json.loads(txt)\n if type(obj[\"device_list\"]) is list:\n for o in obj[\"device_list\"]:\n lst.append(f'* {o[\"device_name\"]}')\n else:\n lst.append(\"N/A\")\n except (ValueError, TypeError) as e:\n lst.append(str(e))\n else:\n lst.append(\"N/A\")\n if platform == \"win32\" or platform == \"cygwin\":\n txt = run_and_read_all(\n run_lambda,\n 'powershell.exe \"gwmi -Class Win32_PnpSignedDriver | where{$_.DeviceClass -eq \\\\\"DISPLAY\\\\\"\\\n -and $_.Manufacturer -match \\\\\"Intel\\\\\"} | Select-Object -Property DeviceName | ConvertTo-Json\"',\n )\n try:\n obj = json.loads(txt)\n if type(obj) is list:\n for o in obj:\n lst.append(f'* {o[\"DeviceName\"]}')\n else:\n lst.append(f'* {obj[\"DeviceName\"]}')", - "repo_full_name": "pytorch/pytorch", - "discussion_comments": [ - { - "comment_id": "2023996133", - "repo_full_name": "pytorch/pytorch", - "pr_number": 137846, - "pr_file": "torch/utils/collect_env.py", - "discussion_id": "2023996133", - "commented_code": "@@ -243,6 +247,56 @@ def get_nvidia_smi():\n return smi\n \n \n+def get_intel_gpu_onboard(run_lambda):\n+ lst = []\n+ platform = get_platform()\n+ if platform == \"linux\":\n+ txt = run_and_read_all(run_lambda, \"xpu-smi discovery -j\")\n+ if txt:\n+ try:\n+ obj = json.loads(txt)\n+ if type(obj[\"device_list\"]) is list:\n+ for o in obj[\"device_list\"]:\n+ lst.append(f'* {o[\"device_name\"]}')\n+ else:\n+ lst.append(\"N/A\")\n+ except (ValueError, TypeError) as e:\n+ lst.append(str(e))\n+ else:\n+ lst.append(\"N/A\")\n+ if platform == \"win32\" or platform == \"cygwin\":\n+ txt = run_and_read_all(\n+ run_lambda,\n+ 'powershell.exe \"gwmi -Class Win32_PnpSignedDriver | where{$_.DeviceClass -eq \\\\\"DISPLAY\\\\\"\\\n+ -and $_.Manufacturer -match \\\\\"Intel\\\\\"} | Select-Object -Property DeviceName | ConvertTo-Json\"',\n+ )\n+ try:\n+ obj = json.loads(txt)\n+ if type(obj) is list:\n+ for o in obj:\n+ lst.append(f'* {o[\"DeviceName\"]}')\n+ else:\n+ lst.append(f'* {obj[\"DeviceName\"]}')", - "comment_created_at": "2025-04-02T02:59:16+00:00", - "comment_author": "guangyey", - "comment_body": "```suggestion\r\n lst.append(f'* {obj.get(\"DeviceName\", \"N/A\"}')\r\n```", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2159408147", - "pr_number": 156207, - "pr_file": "torch/distributed/checkpoint/state_dict_saver.py", - "created_at": "2025-06-20T17:16:56+00:00", - "commented_code": "\"A CPU backend must be enabled for async save; try initializing process group with 'cpu:gloo,cuda:nccl'\"\n )\n\n use_default_staging = False\n if storage_writer is None:\n use_default_staging = True\n\n storage_writer = cast(\n StorageWriter, _storage_setup(storage_writer, checkpoint_id, reader=False)\n )\n\n state_dict = _stateful_to_state_dict(state_dict)\n\n @_dcp_method_logger(log_exceptions=True)\n def stage_state_dict():\n if isinstance(storage_writer, AsyncStager):\n staged_state_dict = storage_writer.stage(state_dict)\n else: # provides bwc for storage_writers not implementing AsyncStager\n staged_state_dict = _create_cpu_state_dict(state_dict)\n _copy_state_dict(state_dict, staged_state_dict, type_check=False)\n\n return staged_state_dict\n\n staged_state_dict = stage_state_dict()\n\n executor: _AsyncCheckpointExecutor = (\n def stage_state_dict() -> Future[STATE_DICT_TYPE]:\n staging_executor = ThreadPoolExecutor(max_workers=1)\n if isinstance(storage_writer, AsyncStager) and not use_default_staging:", - "repo_full_name": "pytorch/pytorch", - "discussion_comments": [ - { - "comment_id": "2159408147", - "repo_full_name": "pytorch/pytorch", - "pr_number": 156207, - "pr_file": "torch/distributed/checkpoint/state_dict_saver.py", - "discussion_id": "2159408147", - "commented_code": "@@ -249,49 +305,70 @@ def async_save(\n \"A CPU backend must be enabled for async save; try initializing process group with 'cpu:gloo,cuda:nccl'\"\n )\n \n+ use_default_staging = False\n+ if storage_writer is None:\n+ use_default_staging = True\n+\n storage_writer = cast(\n StorageWriter, _storage_setup(storage_writer, checkpoint_id, reader=False)\n )\n \n state_dict = _stateful_to_state_dict(state_dict)\n \n @_dcp_method_logger(log_exceptions=True)\n- def stage_state_dict():\n- if isinstance(storage_writer, AsyncStager):\n- staged_state_dict = storage_writer.stage(state_dict)\n- else: # provides bwc for storage_writers not implementing AsyncStager\n- staged_state_dict = _create_cpu_state_dict(state_dict)\n- _copy_state_dict(state_dict, staged_state_dict, type_check=False)\n-\n- return staged_state_dict\n-\n- staged_state_dict = stage_state_dict()\n-\n- executor: _AsyncCheckpointExecutor = (\n+ def stage_state_dict() -> Future[STATE_DICT_TYPE]:\n+ staging_executor = ThreadPoolExecutor(max_workers=1)\n+ if isinstance(storage_writer, AsyncStager) and not use_default_staging:", - "comment_created_at": "2025-06-20T17:16:56+00:00", - "comment_author": "teja-rao", - "comment_body": "```suggestion\r\n if storage_writer is not None and isinstance(storage_writer, AsyncStager):\r\n```", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/pytorch-choose-optimal-algorithms.json b/_reviewers/pytorch-choose-optimal-algorithms.json new file mode 100644 index 0000000..9bd4d8f --- /dev/null +++ b/_reviewers/pytorch-choose-optimal-algorithms.json @@ -0,0 +1,282 @@ +[ + { + "discussion_id": "2145816443", + "pr_number": 151547, + "pr_file": "aten/src/ATen/native/CPUBlas.cpp", + "created_at": "2025-06-13T18:29:32+00:00", + "commented_code": "b, &ldb_,\n &beta_,\n float_v.data(), &ldc_);\n for (auto cv: float_v) {\n *(c++) = c10::convert(cv);\n\n for (const auto j : c10::irange(n)) {\n for (const auto i : c10::irange(m)) {\n auto offset = j * ldc + i;\n c[offset] = c10::convert(float_v[j * m + i]);", + "repo_full_name": "pytorch/pytorch", + "discussion_comments": [ + { + "comment_id": "2145816443", + "repo_full_name": "pytorch/pytorch", + "pr_number": 151547, + "pr_file": "aten/src/ATen/native/CPUBlas.cpp", + "discussion_id": "2145816443", + "commented_code": "@@ -368,8 +368,12 @@ void gemm(\n b, &ldb_,\n &beta_,\n float_v.data(), &ldc_);\n- for (auto cv: float_v) {\n- *(c++) = c10::convert(cv);\n+\n+ for (const auto j : c10::irange(n)) {\n+ for (const auto i : c10::irange(m)) {\n+ auto offset = j * ldc + i;\n+ c[offset] = c10::convert(float_v[j * m + i]);", + "comment_created_at": "2025-06-13T18:29:32+00:00", + "comment_author": "taoye9", + "comment_body": "should it be `float_v[j * ldc_ + i]` instead of `float_v[j * m + i]` given sbgemm leading dim is ldc?\r\n", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2178739355", + "pr_number": 157267, + "pr_file": "aten/src/ATen/cpu/vec/vec512/vec512_qint.h", + "created_at": "2025-07-01T23:53:23+00:00", + "commented_code": "return _mm512_permutexvar_epi32(permute_mask_v, xyzw_clamped_v);\n}\n\ntemplate <>\nat::vec::Vectorized inline convert_float_to_int8(\n at::vec::Vectorized src) {\n // The type of min_val should be int8_t to ensure correct clamping behavior.\n constexpr auto min_val = std::numeric_limits::min();\n constexpr auto max_val = std::numeric_limits::max();\n __m512 float32_min_val = _mm512_set1_ps(float(min_val));\n __m512 float32_max_val = _mm512_set1_ps(float(max_val));\n __m512 float32_src = _mm512_max_ps(src, float32_min_val);\n float32_src = _mm512_min_ps(float32_src, float32_max_val);\n __m512i int32_src = _mm512_cvttps_epi32(float32_src);\n uint8_t output_mem[Vectorized::size()];\n _mm512_mask_cvtepi32_storeu_epi8(output_mem, 0xffff, int32_src);", + "repo_full_name": "pytorch/pytorch", + "discussion_comments": [ + { + "comment_id": "2178739355", + "repo_full_name": "pytorch/pytorch", + "pr_number": 157267, + "pr_file": "aten/src/ATen/cpu/vec/vec512/vec512_qint.h", + "discussion_id": "2178739355", + "commented_code": "@@ -159,6 +161,22 @@ typename std::enable_if_t<\n return _mm512_permutexvar_epi32(permute_mask_v, xyzw_clamped_v);\n }\n \n+template <>\n+at::vec::Vectorized inline convert_float_to_int8(\n+ at::vec::Vectorized src) {\n+ // The type of min_val should be int8_t to ensure correct clamping behavior.\n+ constexpr auto min_val = std::numeric_limits::min();\n+ constexpr auto max_val = std::numeric_limits::max();\n+ __m512 float32_min_val = _mm512_set1_ps(float(min_val));\n+ __m512 float32_max_val = _mm512_set1_ps(float(max_val));\n+ __m512 float32_src = _mm512_max_ps(src, float32_min_val);\n+ float32_src = _mm512_min_ps(float32_src, float32_max_val);\n+ __m512i int32_src = _mm512_cvttps_epi32(float32_src);\n+ uint8_t output_mem[Vectorized::size()];\n+ _mm512_mask_cvtepi32_storeu_epi8(output_mem, 0xffff, int32_src);", + "comment_created_at": "2025-07-01T23:53:23+00:00", + "comment_author": "leslie-fang-intel", + "comment_body": "Could we use\r\n```\r\n__m128i int8_src = _mm512_cvtepi32_epi8(int32_src);\r\nreturn _mm512_castsi128_si512(int8_src);\r\n```", + "pr_file_module": null + }, + { + "comment_id": "2178869263", + "repo_full_name": "pytorch/pytorch", + "pr_number": 157267, + "pr_file": "aten/src/ATen/cpu/vec/vec512/vec512_qint.h", + "discussion_id": "2178739355", + "commented_code": "@@ -159,6 +161,22 @@ typename std::enable_if_t<\n return _mm512_permutexvar_epi32(permute_mask_v, xyzw_clamped_v);\n }\n \n+template <>\n+at::vec::Vectorized inline convert_float_to_int8(\n+ at::vec::Vectorized src) {\n+ // The type of min_val should be int8_t to ensure correct clamping behavior.\n+ constexpr auto min_val = std::numeric_limits::min();\n+ constexpr auto max_val = std::numeric_limits::max();\n+ __m512 float32_min_val = _mm512_set1_ps(float(min_val));\n+ __m512 float32_max_val = _mm512_set1_ps(float(max_val));\n+ __m512 float32_src = _mm512_max_ps(src, float32_min_val);\n+ float32_src = _mm512_min_ps(float32_src, float32_max_val);\n+ __m512i int32_src = _mm512_cvttps_epi32(float32_src);\n+ uint8_t output_mem[Vectorized::size()];\n+ _mm512_mask_cvtepi32_storeu_epi8(output_mem, 0xffff, int32_src);", + "comment_created_at": "2025-07-02T02:39:04+00:00", + "comment_author": "thenumberouscode", + "comment_body": "I've made the changes as per your suggestions. Please let me know if you have any further comments or questions.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2109952742", + "pr_number": 153666, + "pr_file": "aten/src/ATen/native/cuda/layer_norm_kernel.cu", + "created_at": "2025-05-27T18:59:36+00:00", + "commented_code": "vec_t data = X_vec[i];\n #pragma unroll\n for (int ii=0; ii < vec_size; ii++){\n wd = cuWelfordOnlineSum(static_cast(data.val[ii]), wd);\n if(!rms_norm){\n wd = cuWelfordOnlineSum(static_cast(data.val[ii]), wd);\n } else{\n wd = cuRMSOnlineSum(static_cast(data.val[ii]), wd);\n }\n }\n }\n // intra-warp reduction\n for (int offset = (C10_WARP_SIZE >> 1); offset > 0; offset >>= 1) {\n WelfordDataLN wdB{WARP_SHFL_DOWN(wd.mean, offset),\n WARP_SHFL_DOWN(wd.sigma2, offset), WARP_SHFL_DOWN(wd.count, offset)};\n wd = cuWelfordCombine(wd, wdB);\n WelfordDataLN wdB{WARP_SHFL_DOWN(wd.mean, offset), WARP_SHFL_DOWN(wd.sigma2, offset), WARP_SHFL_DOWN(wd.count, offset)};\n if(!rms_norm){", + "repo_full_name": "pytorch/pytorch", + "discussion_comments": [ + { + "comment_id": "2109952742", + "repo_full_name": "pytorch/pytorch", + "pr_number": 153666, + "pr_file": "aten/src/ATen/native/cuda/layer_norm_kernel.cu", + "discussion_id": "2109952742", + "commented_code": "@@ -171,14 +197,21 @@ __device__ WelfordDataLN compute_stats(\n vec_t data = X_vec[i];\n #pragma unroll\n for (int ii=0; ii < vec_size; ii++){\n- wd = cuWelfordOnlineSum(static_cast(data.val[ii]), wd);\n+ if(!rms_norm){\n+ wd = cuWelfordOnlineSum(static_cast(data.val[ii]), wd);\n+ } else{\n+ wd = cuRMSOnlineSum(static_cast(data.val[ii]), wd);\n+ }\n }\n }\n // intra-warp reduction\n for (int offset = (C10_WARP_SIZE >> 1); offset > 0; offset >>= 1) {\n- WelfordDataLN wdB{WARP_SHFL_DOWN(wd.mean, offset),\n- WARP_SHFL_DOWN(wd.sigma2, offset), WARP_SHFL_DOWN(wd.count, offset)};\n- wd = cuWelfordCombine(wd, wdB);\n+ WelfordDataLN wdB{WARP_SHFL_DOWN(wd.mean, offset), WARP_SHFL_DOWN(wd.sigma2, offset), WARP_SHFL_DOWN(wd.count, offset)};\n+ if(!rms_norm){", + "comment_created_at": "2025-05-27T18:59:36+00:00", + "comment_author": "jeffdaily", + "comment_body": "`if constexpr`", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2178606011", + "pr_number": 157290, + "pr_file": "torch/nativert/executor/memory/LayoutManager.cpp", + "created_at": "2025-07-01T21:56:40+00:00", + "commented_code": "}\n}\n\n#ifndef NDEBUG\nvoid LayoutManager::assert_no_overlapping_storages(const Node& node, size_t t)\n const {\n if (state_ != LayoutManagerState::Running) {\n return;\n }\n\n /*\n for each value\n (either an input or output)\n ensure that the associated storage\n slice lies within the allocated slice\n if it is managed (or if it is an alias,\n we can use the slice allocated to its source)\n ---\n also ensure that the current index lies\n within the lifetime of this value\n */\n\n const auto& alias_analyzer = planner_.get_alias_analyzer();\n const auto& alive_values = alias_analyzer.alive_values_at_time(t);\n\n // make sure active memory intervals are non-overlapping\n // by sorting them by start, and ensuring", + "repo_full_name": "pytorch/pytorch", + "discussion_comments": [ + { + "comment_id": "2178606011", + "repo_full_name": "pytorch/pytorch", + "pr_number": 157290, + "pr_file": "torch/nativert/executor/memory/LayoutManager.cpp", + "discussion_id": "2178606011", + "commented_code": "@@ -157,6 +160,123 @@ void LayoutManager::populate_tensor_values() {\n }\n }\n \n+#ifndef NDEBUG\n+void LayoutManager::assert_no_overlapping_storages(const Node& node, size_t t)\n+ const {\n+ if (state_ != LayoutManagerState::Running) {\n+ return;\n+ }\n+\n+ /*\n+ for each value\n+ (either an input or output)\n+ ensure that the associated storage\n+ slice lies within the allocated slice\n+ if it is managed (or if it is an alias,\n+ we can use the slice allocated to its source)\n+ ---\n+ also ensure that the current index lies\n+ within the lifetime of this value\n+ */\n+\n+ const auto& alias_analyzer = planner_.get_alias_analyzer();\n+ const auto& alive_values = alias_analyzer.alive_values_at_time(t);\n+\n+ // make sure active memory intervals are non-overlapping\n+ // by sorting them by start, and ensuring", + "comment_created_at": "2025-07-01T21:56:40+00:00", + "comment_author": "SherlockNoMad", + "comment_body": "How is the sorting achieved? Do you need a custom comparator? \r\nIf you are relying on the default std::set\\ behavior, make a remark mentioning so, coz I don't how c++ compare pairs by default. \r\n", + "pr_file_module": null + }, + { + "comment_id": "2178737089", + "repo_full_name": "pytorch/pytorch", + "pr_number": 157290, + "pr_file": "torch/nativert/executor/memory/LayoutManager.cpp", + "discussion_id": "2178606011", + "commented_code": "@@ -157,6 +160,123 @@ void LayoutManager::populate_tensor_values() {\n }\n }\n \n+#ifndef NDEBUG\n+void LayoutManager::assert_no_overlapping_storages(const Node& node, size_t t)\n+ const {\n+ if (state_ != LayoutManagerState::Running) {\n+ return;\n+ }\n+\n+ /*\n+ for each value\n+ (either an input or output)\n+ ensure that the associated storage\n+ slice lies within the allocated slice\n+ if it is managed (or if it is an alias,\n+ we can use the slice allocated to its source)\n+ ---\n+ also ensure that the current index lies\n+ within the lifetime of this value\n+ */\n+\n+ const auto& alias_analyzer = planner_.get_alias_analyzer();\n+ const auto& alive_values = alias_analyzer.alive_values_at_time(t);\n+\n+ // make sure active memory intervals are non-overlapping\n+ // by sorting them by start, and ensuring", + "comment_created_at": "2025-07-01T23:50:34+00:00", + "comment_author": "dolpm", + "comment_body": "on comparison of pairs: https://cplusplus.com/reference/utility/pair/operators/\r\n\r\ni will add this to the comment.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2172431501", + "pr_number": 156203, + "pr_file": "aten/src/ATen/native/cuda/GroupMM.cu", + "created_at": "2025-06-27T16:40:54+00:00", + "commented_code": "#include \n#include \n\n#include \n\nnamespace {\nusing Strides = at::cuda::detail::Strides; // std::array;\n\ntemplate \ntemplate \nstruct Schedule {\n // SM90\n using CooperativeSchedule =\n cutlass::gemm::KernelPtrArrayTmaWarpSpecializedCooperative;\n using PongSchedule = cutlass::gemm::KernelPtrArrayTmaWarpSpecializedPingpong;\n using CooperativeEpilogueSchedule =\n cutlass::epilogue::PtrArrayTmaWarpSpecializedCooperative;\n using PongEpilogueSchedule =\n cutlass::epilogue::PtrArrayTmaWarpSpecializedPingpong;\n using KernelSchedule =\n cute::conditional_t;\n using EpilogueSchedule = cute::\n conditional_t;\n // SM100\n using MMA1SMKernelSchedule = cutlass::gemm::KernelPtrArrayTmaWarpSpecialized1SmSm100;\n using MMA1SMEpilogueSchedule = cutlass::epilogue::PtrArrayTmaWarpSpecialized1Sm;\n using MMA2SMKernelSchedule = cutlass::gemm::KernelPtrArrayTmaWarpSpecialized2SmSm100;\n using MMA2SMEpilogueSchedule = cutlass::epilogue::PtrArrayTmaWarpSpecialized2Sm;", + "repo_full_name": "pytorch/pytorch", + "discussion_comments": [ + { + "comment_id": "2172431501", + "repo_full_name": "pytorch/pytorch", + "pr_number": 156203, + "pr_file": "aten/src/ATen/native/cuda/GroupMM.cu", + "discussion_id": "2172431501", + "commented_code": "@@ -43,22 +44,34 @@ C10_DIAGNOSTIC_PUSH_AND_IGNORED_IF_DEFINED(\"-Wunused-but-set-parameter\")\n #include \n #include \n \n+#include \n+\n namespace {\n using Strides = at::cuda::detail::Strides; // std::array;\n \n-template \n+template \n struct Schedule {\n+ // SM90\n using CooperativeSchedule =\n cutlass::gemm::KernelPtrArrayTmaWarpSpecializedCooperative;\n using PongSchedule = cutlass::gemm::KernelPtrArrayTmaWarpSpecializedPingpong;\n using CooperativeEpilogueSchedule =\n cutlass::epilogue::PtrArrayTmaWarpSpecializedCooperative;\n using PongEpilogueSchedule =\n cutlass::epilogue::PtrArrayTmaWarpSpecializedPingpong;\n- using KernelSchedule =\n- cute::conditional_t;\n- using EpilogueSchedule = cute::\n- conditional_t;\n+ // SM100\n+ using MMA1SMKernelSchedule = cutlass::gemm::KernelPtrArrayTmaWarpSpecialized1SmSm100;\n+ using MMA1SMEpilogueSchedule = cutlass::epilogue::PtrArrayTmaWarpSpecialized1Sm;\n+ using MMA2SMKernelSchedule = cutlass::gemm::KernelPtrArrayTmaWarpSpecialized2SmSm100;\n+ using MMA2SMEpilogueSchedule = cutlass::epilogue::PtrArrayTmaWarpSpecialized2Sm;\n+", + "comment_created_at": "2025-06-27T16:40:54+00:00", + "comment_author": "drisspg", + "comment_body": "Is there no pingpong schedule for sm100?", + "pr_file_module": null + }, + { + "comment_id": "2172484226", + "repo_full_name": "pytorch/pytorch", + "pr_number": 156203, + "pr_file": "aten/src/ATen/native/cuda/GroupMM.cu", + "discussion_id": "2172431501", + "commented_code": "@@ -43,22 +44,34 @@ C10_DIAGNOSTIC_PUSH_AND_IGNORED_IF_DEFINED(\"-Wunused-but-set-parameter\")\n #include \n #include \n \n+#include \n+\n namespace {\n using Strides = at::cuda::detail::Strides; // std::array;\n \n-template \n+template \n struct Schedule {\n+ // SM90\n using CooperativeSchedule =\n cutlass::gemm::KernelPtrArrayTmaWarpSpecializedCooperative;\n using PongSchedule = cutlass::gemm::KernelPtrArrayTmaWarpSpecializedPingpong;\n using CooperativeEpilogueSchedule =\n cutlass::epilogue::PtrArrayTmaWarpSpecializedCooperative;\n using PongEpilogueSchedule =\n cutlass::epilogue::PtrArrayTmaWarpSpecializedPingpong;\n- using KernelSchedule =\n- cute::conditional_t;\n- using EpilogueSchedule = cute::\n- conditional_t;\n+ // SM100\n+ using MMA1SMKernelSchedule = cutlass::gemm::KernelPtrArrayTmaWarpSpecialized1SmSm100;\n+ using MMA1SMEpilogueSchedule = cutlass::epilogue::PtrArrayTmaWarpSpecialized1Sm;\n+ using MMA2SMKernelSchedule = cutlass::gemm::KernelPtrArrayTmaWarpSpecialized2SmSm100;\n+ using MMA2SMEpilogueSchedule = cutlass::epilogue::PtrArrayTmaWarpSpecialized2Sm;\n+", + "comment_created_at": "2025-06-27T17:04:10+00:00", + "comment_author": "AaronWang04", + "comment_body": "Nope, this is what the CUTLASS team said when I asked: \"with SM100, there are no more cooperative & pingpong scheduling policies (in the name, everything is pingpong)\"", + "pr_file_module": null + }, + { + "comment_id": "2172549319", + "repo_full_name": "pytorch/pytorch", + "pr_number": 156203, + "pr_file": "aten/src/ATen/native/cuda/GroupMM.cu", + "discussion_id": "2172431501", + "commented_code": "@@ -43,22 +44,34 @@ C10_DIAGNOSTIC_PUSH_AND_IGNORED_IF_DEFINED(\"-Wunused-but-set-parameter\")\n #include \n #include \n \n+#include \n+\n namespace {\n using Strides = at::cuda::detail::Strides; // std::array;\n \n-template \n+template \n struct Schedule {\n+ // SM90\n using CooperativeSchedule =\n cutlass::gemm::KernelPtrArrayTmaWarpSpecializedCooperative;\n using PongSchedule = cutlass::gemm::KernelPtrArrayTmaWarpSpecializedPingpong;\n using CooperativeEpilogueSchedule =\n cutlass::epilogue::PtrArrayTmaWarpSpecializedCooperative;\n using PongEpilogueSchedule =\n cutlass::epilogue::PtrArrayTmaWarpSpecializedPingpong;\n- using KernelSchedule =\n- cute::conditional_t;\n- using EpilogueSchedule = cute::\n- conditional_t;\n+ // SM100\n+ using MMA1SMKernelSchedule = cutlass::gemm::KernelPtrArrayTmaWarpSpecialized1SmSm100;\n+ using MMA1SMEpilogueSchedule = cutlass::epilogue::PtrArrayTmaWarpSpecialized1Sm;\n+ using MMA2SMKernelSchedule = cutlass::gemm::KernelPtrArrayTmaWarpSpecialized2SmSm100;\n+ using MMA2SMEpilogueSchedule = cutlass::epilogue::PtrArrayTmaWarpSpecialized2Sm;\n+", + "comment_created_at": "2025-06-27T17:46:06+00:00", + "comment_author": "drisspg", + "comment_body": "Sounds good", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2084689345", + "pr_number": 153373, + "pr_file": "aten/src/ATen/cuda/cub.cu", + "created_at": "2025-05-12T13:33:03+00:00", + "commented_code": "template \nvoid inclusive_sum_truncating(const input_t *input, output_t *output, int64_t num_items) {\n#if CUB_V3_PLUS()\n inclusive_scan(input, output, ::cuda::std::plus<>{}, num_items);\n#else\n using NO_ROCM(at_cuda_detail)::cub::Sum;\n inclusive_scan(input, output, Sum{}, num_items);\n#endif", + "repo_full_name": "pytorch/pytorch", + "discussion_comments": [ + { + "comment_id": "2084689345", + "repo_full_name": "pytorch/pytorch", + "pr_number": 153373, + "pr_file": "aten/src/ATen/cuda/cub.cu", + "discussion_id": "2084689345", + "commented_code": "@@ -15,8 +20,12 @@ struct SumOp {\n \n template \n void inclusive_sum_truncating(const input_t *input, output_t *output, int64_t num_items) {\n+#if CUB_V3_PLUS()\n+ inclusive_scan(input, output, ::cuda::std::plus<>{}, num_items);\n+#else\n using NO_ROCM(at_cuda_detail)::cub::Sum;\n inclusive_scan(input, output, Sum{}, num_items);\n+#endif", + "comment_created_at": "2025-05-12T13:33:03+00:00", + "comment_author": "miscco", + "comment_body": "This change is valid unconditionally\r\n```suggestion\r\n inclusive_scan(input, output, ::cuda::std::plus<>{}, num_items);\r\n```", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2084690364", + "pr_number": 153373, + "pr_file": "aten/src/ATen/cuda/cub.cu", + "created_at": "2025-05-12T13:33:36+00:00", + "commented_code": "void mask_exclusive_sum(const uint8_t *mask, int64_t *output_idx, int64_t n) {\n CountMaskOp op{};\n#if CUB_V3_PLUS()\n auto iter = thrust::transform_iterator(mask, op);\n#else\n auto iter = NO_ROCM(at_cuda_detail)::cub::TransformInputIterator<\n bool, decltype(op), decltype(mask)>(mask, op);\n#endif", + "repo_full_name": "pytorch/pytorch", + "discussion_comments": [ + { + "comment_id": "2084690364", + "repo_full_name": "pytorch/pytorch", + "pr_number": 153373, + "pr_file": "aten/src/ATen/cuda/cub.cu", + "discussion_id": "2084690364", + "commented_code": "@@ -42,8 +51,12 @@ struct CountMaskOp {\n \n void mask_exclusive_sum(const uint8_t *mask, int64_t *output_idx, int64_t n) {\n CountMaskOp op{};\n+#if CUB_V3_PLUS()\n+ auto iter = thrust::transform_iterator(mask, op);\n+#else\n auto iter = NO_ROCM(at_cuda_detail)::cub::TransformInputIterator<\n bool, decltype(op), decltype(mask)>(mask, op);\n+#endif", + "comment_created_at": "2025-05-12T13:33:36+00:00", + "comment_author": "miscco", + "comment_body": "Ditto, I believe this can be applied unconditionally. Also we can leverage `thrust::make_transform_iterator`\r\n```suggestion\r\n auto iter = thrust::make_transform_iterator(mask, op);\r\n```", + "pr_file_module": null + }, + { + "comment_id": "2085100379", + "repo_full_name": "pytorch/pytorch", + "pr_number": 153373, + "pr_file": "aten/src/ATen/cuda/cub.cu", + "discussion_id": "2084690364", + "commented_code": "@@ -42,8 +51,12 @@ struct CountMaskOp {\n \n void mask_exclusive_sum(const uint8_t *mask, int64_t *output_idx, int64_t n) {\n CountMaskOp op{};\n+#if CUB_V3_PLUS()\n+ auto iter = thrust::transform_iterator(mask, op);\n+#else\n auto iter = NO_ROCM(at_cuda_detail)::cub::TransformInputIterator<\n bool, decltype(op), decltype(mask)>(mask, op);\n+#endif", + "comment_created_at": "2025-05-12T17:03:15+00:00", + "comment_author": "ngimel", + "comment_body": "NO_ROCM wrappers are there for a reason, to enable rocm builds, that said it's true that it would be better to enable this with a macro similar to NO_ROCM instead of ifdefs", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2084693962", + "pr_number": 153373, + "pr_file": "aten/src/ATen/cuda/cub.cuh", + "created_at": "2025-05-12T13:35:33+00:00", + "commented_code": "aggT data[ITEMS_PER_THREAD];\n aggT agg_val = 0;\n TransformFunctor transform_functor;\n#if CUB_V3_PLUS()\n auto iter_in = thrust::transform_iterator, const T*>(d_in, transform_functor);\n#else\n auto iter_in = ROCM_HIPCUB(at_cuda_detail::cub)::TransformInputIterator, const T*>(d_in, transform_functor);\n#endif", + "repo_full_name": "pytorch/pytorch", + "discussion_comments": [ + { + "comment_id": "2084693962", + "repo_full_name": "pytorch/pytorch", + "pr_number": 153373, + "pr_file": "aten/src/ATen/cuda/cub.cuh", + "discussion_id": "2084693962", + "commented_code": "@@ -425,7 +435,11 @@ __global__ void calc_block_sums(const T * d_in, aggT * agg, int64_t nelem, int i\n aggT data[ITEMS_PER_THREAD];\n aggT agg_val = 0;\n TransformFunctor transform_functor;\n+#if CUB_V3_PLUS()\n+ auto iter_in = thrust::transform_iterator, const T*>(d_in, transform_functor);\n+#else\n auto iter_in = ROCM_HIPCUB(at_cuda_detail::cub)::TransformInputIterator, const T*>(d_in, transform_functor);\n+#endif", + "comment_created_at": "2025-05-12T13:35:33+00:00", + "comment_author": "miscco", + "comment_body": "```suggestion\r\n auto iter_in = thrust::make_transform_iterator(d_in, transform_functor);\r\n```", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2084695693", + "pr_number": 153373, + "pr_file": "aten/src/ATen/cuda/cub.cuh", + "created_at": "2025-05-12T13:36:13+00:00", + "commented_code": "TORCH_CHECK(num_items <= std::numeric_limits::max(),\n \"cub InclusiveSumByKey does not support more than INT_MAX elements\");\n#if !defined(USE_ROCM)\n#if CUB_V3_PLUS()\n CUB_WRAPPER(at_cuda_detail::cub::DeviceScan::InclusiveSumByKey,\n keys, input, output, num_items, ::cuda::std::equal_to<>(), at::cuda::getCurrentCUDAStream());\n#else\n CUB_WRAPPER(at_cuda_detail::cub::DeviceScan::InclusiveSumByKey,\n keys, input, output, num_items, at_cuda_detail::cub::Equality(), at::cuda::getCurrentCUDAStream());\n#endif // CUB_V3_PLUS()", + "repo_full_name": "pytorch/pytorch", + "discussion_comments": [ + { + "comment_id": "2084695693", + "repo_full_name": "pytorch/pytorch", + "pr_number": 153373, + "pr_file": "aten/src/ATen/cuda/cub.cuh", + "discussion_id": "2084695693", + "commented_code": "@@ -567,8 +581,13 @@ inline void inclusive_sum_by_key(KeysInputIteratorT keys, ValuesInputIteratorT i\n TORCH_CHECK(num_items <= std::numeric_limits::max(),\n \"cub InclusiveSumByKey does not support more than INT_MAX elements\");\n #if !defined(USE_ROCM)\n+#if CUB_V3_PLUS()\n+ CUB_WRAPPER(at_cuda_detail::cub::DeviceScan::InclusiveSumByKey,\n+ keys, input, output, num_items, ::cuda::std::equal_to<>(), at::cuda::getCurrentCUDAStream());\n+#else\n CUB_WRAPPER(at_cuda_detail::cub::DeviceScan::InclusiveSumByKey,\n keys, input, output, num_items, at_cuda_detail::cub::Equality(), at::cuda::getCurrentCUDAStream());\n+#endif // CUB_V3_PLUS()", + "comment_created_at": "2025-05-12T13:36:13+00:00", + "comment_author": "miscco", + "comment_body": "`cuda::std::equal_to` is available unconditionally\r\n```suggestion\r\n CUB_WRAPPER(at_cuda_detail::cub::DeviceScan::InclusiveSumByKey,\r\n keys, input, output, num_items, ::cuda::std::equal_to<>(), at::cuda::getCurrentCUDAStream());\r\n```", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2084703977", + "pr_number": 153373, + "pr_file": "aten/src/ATen/native/cuda/Nonzero.cu", + "created_at": "2025-05-12T13:40:06+00:00", + "commented_code": "for (int64_t idx = 0; idx < num_chunks; idx++) {\n int remaining = std::min(chunk_size, self.numel() - idx * chunk_size);\n\n#if CUB_V3_PLUS()\n thrust::counting_iterator counting_itr(idx * chunk_size);\n thrust::transform_iterator, const scalar_t*>\n itr(self_.const_data_ptr() + idx * chunk_size,\n NonZeroOp());\n#else\n cub::CountingInputIterator counting_itr(idx * chunk_size);\n cub::TransformInputIterator, const scalar_t*>\n itr(self_.const_data_ptr() + idx * chunk_size,\n NonZeroOp());\n#endif", + "repo_full_name": "pytorch/pytorch", + "discussion_comments": [ + { + "comment_id": "2084703977", + "repo_full_name": "pytorch/pytorch", + "pr_number": 153373, + "pr_file": "aten/src/ATen/native/cuda/Nonzero.cu", + "discussion_id": "2084703977", + "commented_code": "@@ -243,10 +258,17 @@ void nonzero_cuda_out_impl(const Tensor& self, Tensor& out) {\n for (int64_t idx = 0; idx < num_chunks; idx++) {\n int remaining = std::min(chunk_size, self.numel() - idx * chunk_size);\n \n+#if CUB_V3_PLUS()\n+ thrust::counting_iterator counting_itr(idx * chunk_size);\n+ thrust::transform_iterator, const scalar_t*>\n+ itr(self_.const_data_ptr() + idx * chunk_size,\n+ NonZeroOp());\n+#else\n cub::CountingInputIterator counting_itr(idx * chunk_size);\n cub::TransformInputIterator, const scalar_t*>\n itr(self_.const_data_ptr() + idx * chunk_size,\n NonZeroOp());\n+#endif", + "comment_created_at": "2025-05-12T13:40:06+00:00", + "comment_author": "miscco", + "comment_body": "```suggestion\r\n thrust::counting_iterator counting_itr(idx * chunk_size);\r\n thrust::transform_iterator, const scalar_t*>\r\n itr(self_.const_data_ptr() + idx * chunk_size,\r\n NonZeroOp());\r\n```", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/pytorch-choose-optimal-algorithms.md b/_reviewers/pytorch-choose-optimal-algorithms.md index 30ead18..e3d8eb3 100644 --- a/_reviewers/pytorch-choose-optimal-algorithms.md +++ b/_reviewers/pytorch-choose-optimal-algorithms.md @@ -45,287 +45,3 @@ auto iter = thrust::make_transform_iterator( ``` When implementing sorting or comparison operations, be aware of how the standard library compares objects like `std::pair` (lexicographically by default) and document any custom comparators when non-standard behavior is needed. - - -[ - { - "discussion_id": "2145816443", - "pr_number": 151547, - "pr_file": "aten/src/ATen/native/CPUBlas.cpp", - "created_at": "2025-06-13T18:29:32+00:00", - "commented_code": "b, &ldb_,\n &beta_,\n float_v.data(), &ldc_);\n for (auto cv: float_v) {\n *(c++) = c10::convert(cv);\n\n for (const auto j : c10::irange(n)) {\n for (const auto i : c10::irange(m)) {\n auto offset = j * ldc + i;\n c[offset] = c10::convert(float_v[j * m + i]);", - "repo_full_name": "pytorch/pytorch", - "discussion_comments": [ - { - "comment_id": "2145816443", - "repo_full_name": "pytorch/pytorch", - "pr_number": 151547, - "pr_file": "aten/src/ATen/native/CPUBlas.cpp", - "discussion_id": "2145816443", - "commented_code": "@@ -368,8 +368,12 @@ void gemm(\n b, &ldb_,\n &beta_,\n float_v.data(), &ldc_);\n- for (auto cv: float_v) {\n- *(c++) = c10::convert(cv);\n+\n+ for (const auto j : c10::irange(n)) {\n+ for (const auto i : c10::irange(m)) {\n+ auto offset = j * ldc + i;\n+ c[offset] = c10::convert(float_v[j * m + i]);", - "comment_created_at": "2025-06-13T18:29:32+00:00", - "comment_author": "taoye9", - "comment_body": "should it be `float_v[j * ldc_ + i]` instead of `float_v[j * m + i]` given sbgemm leading dim is ldc?\r\n", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2178739355", - "pr_number": 157267, - "pr_file": "aten/src/ATen/cpu/vec/vec512/vec512_qint.h", - "created_at": "2025-07-01T23:53:23+00:00", - "commented_code": "return _mm512_permutexvar_epi32(permute_mask_v, xyzw_clamped_v);\n}\n\ntemplate <>\nat::vec::Vectorized inline convert_float_to_int8(\n at::vec::Vectorized src) {\n // The type of min_val should be int8_t to ensure correct clamping behavior.\n constexpr auto min_val = std::numeric_limits::min();\n constexpr auto max_val = std::numeric_limits::max();\n __m512 float32_min_val = _mm512_set1_ps(float(min_val));\n __m512 float32_max_val = _mm512_set1_ps(float(max_val));\n __m512 float32_src = _mm512_max_ps(src, float32_min_val);\n float32_src = _mm512_min_ps(float32_src, float32_max_val);\n __m512i int32_src = _mm512_cvttps_epi32(float32_src);\n uint8_t output_mem[Vectorized::size()];\n _mm512_mask_cvtepi32_storeu_epi8(output_mem, 0xffff, int32_src);", - "repo_full_name": "pytorch/pytorch", - "discussion_comments": [ - { - "comment_id": "2178739355", - "repo_full_name": "pytorch/pytorch", - "pr_number": 157267, - "pr_file": "aten/src/ATen/cpu/vec/vec512/vec512_qint.h", - "discussion_id": "2178739355", - "commented_code": "@@ -159,6 +161,22 @@ typename std::enable_if_t<\n return _mm512_permutexvar_epi32(permute_mask_v, xyzw_clamped_v);\n }\n \n+template <>\n+at::vec::Vectorized inline convert_float_to_int8(\n+ at::vec::Vectorized src) {\n+ // The type of min_val should be int8_t to ensure correct clamping behavior.\n+ constexpr auto min_val = std::numeric_limits::min();\n+ constexpr auto max_val = std::numeric_limits::max();\n+ __m512 float32_min_val = _mm512_set1_ps(float(min_val));\n+ __m512 float32_max_val = _mm512_set1_ps(float(max_val));\n+ __m512 float32_src = _mm512_max_ps(src, float32_min_val);\n+ float32_src = _mm512_min_ps(float32_src, float32_max_val);\n+ __m512i int32_src = _mm512_cvttps_epi32(float32_src);\n+ uint8_t output_mem[Vectorized::size()];\n+ _mm512_mask_cvtepi32_storeu_epi8(output_mem, 0xffff, int32_src);", - "comment_created_at": "2025-07-01T23:53:23+00:00", - "comment_author": "leslie-fang-intel", - "comment_body": "Could we use\r\n```\r\n__m128i int8_src = _mm512_cvtepi32_epi8(int32_src);\r\nreturn _mm512_castsi128_si512(int8_src);\r\n```", - "pr_file_module": null - }, - { - "comment_id": "2178869263", - "repo_full_name": "pytorch/pytorch", - "pr_number": 157267, - "pr_file": "aten/src/ATen/cpu/vec/vec512/vec512_qint.h", - "discussion_id": "2178739355", - "commented_code": "@@ -159,6 +161,22 @@ typename std::enable_if_t<\n return _mm512_permutexvar_epi32(permute_mask_v, xyzw_clamped_v);\n }\n \n+template <>\n+at::vec::Vectorized inline convert_float_to_int8(\n+ at::vec::Vectorized src) {\n+ // The type of min_val should be int8_t to ensure correct clamping behavior.\n+ constexpr auto min_val = std::numeric_limits::min();\n+ constexpr auto max_val = std::numeric_limits::max();\n+ __m512 float32_min_val = _mm512_set1_ps(float(min_val));\n+ __m512 float32_max_val = _mm512_set1_ps(float(max_val));\n+ __m512 float32_src = _mm512_max_ps(src, float32_min_val);\n+ float32_src = _mm512_min_ps(float32_src, float32_max_val);\n+ __m512i int32_src = _mm512_cvttps_epi32(float32_src);\n+ uint8_t output_mem[Vectorized::size()];\n+ _mm512_mask_cvtepi32_storeu_epi8(output_mem, 0xffff, int32_src);", - "comment_created_at": "2025-07-02T02:39:04+00:00", - "comment_author": "thenumberouscode", - "comment_body": "I've made the changes as per your suggestions. Please let me know if you have any further comments or questions.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2109952742", - "pr_number": 153666, - "pr_file": "aten/src/ATen/native/cuda/layer_norm_kernel.cu", - "created_at": "2025-05-27T18:59:36+00:00", - "commented_code": "vec_t data = X_vec[i];\n #pragma unroll\n for (int ii=0; ii < vec_size; ii++){\n wd = cuWelfordOnlineSum(static_cast(data.val[ii]), wd);\n if(!rms_norm){\n wd = cuWelfordOnlineSum(static_cast(data.val[ii]), wd);\n } else{\n wd = cuRMSOnlineSum(static_cast(data.val[ii]), wd);\n }\n }\n }\n // intra-warp reduction\n for (int offset = (C10_WARP_SIZE >> 1); offset > 0; offset >>= 1) {\n WelfordDataLN wdB{WARP_SHFL_DOWN(wd.mean, offset),\n WARP_SHFL_DOWN(wd.sigma2, offset), WARP_SHFL_DOWN(wd.count, offset)};\n wd = cuWelfordCombine(wd, wdB);\n WelfordDataLN wdB{WARP_SHFL_DOWN(wd.mean, offset), WARP_SHFL_DOWN(wd.sigma2, offset), WARP_SHFL_DOWN(wd.count, offset)};\n if(!rms_norm){", - "repo_full_name": "pytorch/pytorch", - "discussion_comments": [ - { - "comment_id": "2109952742", - "repo_full_name": "pytorch/pytorch", - "pr_number": 153666, - "pr_file": "aten/src/ATen/native/cuda/layer_norm_kernel.cu", - "discussion_id": "2109952742", - "commented_code": "@@ -171,14 +197,21 @@ __device__ WelfordDataLN compute_stats(\n vec_t data = X_vec[i];\n #pragma unroll\n for (int ii=0; ii < vec_size; ii++){\n- wd = cuWelfordOnlineSum(static_cast(data.val[ii]), wd);\n+ if(!rms_norm){\n+ wd = cuWelfordOnlineSum(static_cast(data.val[ii]), wd);\n+ } else{\n+ wd = cuRMSOnlineSum(static_cast(data.val[ii]), wd);\n+ }\n }\n }\n // intra-warp reduction\n for (int offset = (C10_WARP_SIZE >> 1); offset > 0; offset >>= 1) {\n- WelfordDataLN wdB{WARP_SHFL_DOWN(wd.mean, offset),\n- WARP_SHFL_DOWN(wd.sigma2, offset), WARP_SHFL_DOWN(wd.count, offset)};\n- wd = cuWelfordCombine(wd, wdB);\n+ WelfordDataLN wdB{WARP_SHFL_DOWN(wd.mean, offset), WARP_SHFL_DOWN(wd.sigma2, offset), WARP_SHFL_DOWN(wd.count, offset)};\n+ if(!rms_norm){", - "comment_created_at": "2025-05-27T18:59:36+00:00", - "comment_author": "jeffdaily", - "comment_body": "`if constexpr`", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2178606011", - "pr_number": 157290, - "pr_file": "torch/nativert/executor/memory/LayoutManager.cpp", - "created_at": "2025-07-01T21:56:40+00:00", - "commented_code": "}\n}\n\n#ifndef NDEBUG\nvoid LayoutManager::assert_no_overlapping_storages(const Node& node, size_t t)\n const {\n if (state_ != LayoutManagerState::Running) {\n return;\n }\n\n /*\n for each value\n (either an input or output)\n ensure that the associated storage\n slice lies within the allocated slice\n if it is managed (or if it is an alias,\n we can use the slice allocated to its source)\n ---\n also ensure that the current index lies\n within the lifetime of this value\n */\n\n const auto& alias_analyzer = planner_.get_alias_analyzer();\n const auto& alive_values = alias_analyzer.alive_values_at_time(t);\n\n // make sure active memory intervals are non-overlapping\n // by sorting them by start, and ensuring", - "repo_full_name": "pytorch/pytorch", - "discussion_comments": [ - { - "comment_id": "2178606011", - "repo_full_name": "pytorch/pytorch", - "pr_number": 157290, - "pr_file": "torch/nativert/executor/memory/LayoutManager.cpp", - "discussion_id": "2178606011", - "commented_code": "@@ -157,6 +160,123 @@ void LayoutManager::populate_tensor_values() {\n }\n }\n \n+#ifndef NDEBUG\n+void LayoutManager::assert_no_overlapping_storages(const Node& node, size_t t)\n+ const {\n+ if (state_ != LayoutManagerState::Running) {\n+ return;\n+ }\n+\n+ /*\n+ for each value\n+ (either an input or output)\n+ ensure that the associated storage\n+ slice lies within the allocated slice\n+ if it is managed (or if it is an alias,\n+ we can use the slice allocated to its source)\n+ ---\n+ also ensure that the current index lies\n+ within the lifetime of this value\n+ */\n+\n+ const auto& alias_analyzer = planner_.get_alias_analyzer();\n+ const auto& alive_values = alias_analyzer.alive_values_at_time(t);\n+\n+ // make sure active memory intervals are non-overlapping\n+ // by sorting them by start, and ensuring", - "comment_created_at": "2025-07-01T21:56:40+00:00", - "comment_author": "SherlockNoMad", - "comment_body": "How is the sorting achieved? Do you need a custom comparator? \r\nIf you are relying on the default std::set\\ behavior, make a remark mentioning so, coz I don't how c++ compare pairs by default. \r\n", - "pr_file_module": null - }, - { - "comment_id": "2178737089", - "repo_full_name": "pytorch/pytorch", - "pr_number": 157290, - "pr_file": "torch/nativert/executor/memory/LayoutManager.cpp", - "discussion_id": "2178606011", - "commented_code": "@@ -157,6 +160,123 @@ void LayoutManager::populate_tensor_values() {\n }\n }\n \n+#ifndef NDEBUG\n+void LayoutManager::assert_no_overlapping_storages(const Node& node, size_t t)\n+ const {\n+ if (state_ != LayoutManagerState::Running) {\n+ return;\n+ }\n+\n+ /*\n+ for each value\n+ (either an input or output)\n+ ensure that the associated storage\n+ slice lies within the allocated slice\n+ if it is managed (or if it is an alias,\n+ we can use the slice allocated to its source)\n+ ---\n+ also ensure that the current index lies\n+ within the lifetime of this value\n+ */\n+\n+ const auto& alias_analyzer = planner_.get_alias_analyzer();\n+ const auto& alive_values = alias_analyzer.alive_values_at_time(t);\n+\n+ // make sure active memory intervals are non-overlapping\n+ // by sorting them by start, and ensuring", - "comment_created_at": "2025-07-01T23:50:34+00:00", - "comment_author": "dolpm", - "comment_body": "on comparison of pairs: https://cplusplus.com/reference/utility/pair/operators/\r\n\r\ni will add this to the comment.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2172431501", - "pr_number": 156203, - "pr_file": "aten/src/ATen/native/cuda/GroupMM.cu", - "created_at": "2025-06-27T16:40:54+00:00", - "commented_code": "#include \n#include \n\n#include \n\nnamespace {\nusing Strides = at::cuda::detail::Strides; // std::array;\n\ntemplate \ntemplate \nstruct Schedule {\n // SM90\n using CooperativeSchedule =\n cutlass::gemm::KernelPtrArrayTmaWarpSpecializedCooperative;\n using PongSchedule = cutlass::gemm::KernelPtrArrayTmaWarpSpecializedPingpong;\n using CooperativeEpilogueSchedule =\n cutlass::epilogue::PtrArrayTmaWarpSpecializedCooperative;\n using PongEpilogueSchedule =\n cutlass::epilogue::PtrArrayTmaWarpSpecializedPingpong;\n using KernelSchedule =\n cute::conditional_t;\n using EpilogueSchedule = cute::\n conditional_t;\n // SM100\n using MMA1SMKernelSchedule = cutlass::gemm::KernelPtrArrayTmaWarpSpecialized1SmSm100;\n using MMA1SMEpilogueSchedule = cutlass::epilogue::PtrArrayTmaWarpSpecialized1Sm;\n using MMA2SMKernelSchedule = cutlass::gemm::KernelPtrArrayTmaWarpSpecialized2SmSm100;\n using MMA2SMEpilogueSchedule = cutlass::epilogue::PtrArrayTmaWarpSpecialized2Sm;", - "repo_full_name": "pytorch/pytorch", - "discussion_comments": [ - { - "comment_id": "2172431501", - "repo_full_name": "pytorch/pytorch", - "pr_number": 156203, - "pr_file": "aten/src/ATen/native/cuda/GroupMM.cu", - "discussion_id": "2172431501", - "commented_code": "@@ -43,22 +44,34 @@ C10_DIAGNOSTIC_PUSH_AND_IGNORED_IF_DEFINED(\"-Wunused-but-set-parameter\")\n #include \n #include \n \n+#include \n+\n namespace {\n using Strides = at::cuda::detail::Strides; // std::array;\n \n-template \n+template \n struct Schedule {\n+ // SM90\n using CooperativeSchedule =\n cutlass::gemm::KernelPtrArrayTmaWarpSpecializedCooperative;\n using PongSchedule = cutlass::gemm::KernelPtrArrayTmaWarpSpecializedPingpong;\n using CooperativeEpilogueSchedule =\n cutlass::epilogue::PtrArrayTmaWarpSpecializedCooperative;\n using PongEpilogueSchedule =\n cutlass::epilogue::PtrArrayTmaWarpSpecializedPingpong;\n- using KernelSchedule =\n- cute::conditional_t;\n- using EpilogueSchedule = cute::\n- conditional_t;\n+ // SM100\n+ using MMA1SMKernelSchedule = cutlass::gemm::KernelPtrArrayTmaWarpSpecialized1SmSm100;\n+ using MMA1SMEpilogueSchedule = cutlass::epilogue::PtrArrayTmaWarpSpecialized1Sm;\n+ using MMA2SMKernelSchedule = cutlass::gemm::KernelPtrArrayTmaWarpSpecialized2SmSm100;\n+ using MMA2SMEpilogueSchedule = cutlass::epilogue::PtrArrayTmaWarpSpecialized2Sm;\n+", - "comment_created_at": "2025-06-27T16:40:54+00:00", - "comment_author": "drisspg", - "comment_body": "Is there no pingpong schedule for sm100?", - "pr_file_module": null - }, - { - "comment_id": "2172484226", - "repo_full_name": "pytorch/pytorch", - "pr_number": 156203, - "pr_file": "aten/src/ATen/native/cuda/GroupMM.cu", - "discussion_id": "2172431501", - "commented_code": "@@ -43,22 +44,34 @@ C10_DIAGNOSTIC_PUSH_AND_IGNORED_IF_DEFINED(\"-Wunused-but-set-parameter\")\n #include \n #include \n \n+#include \n+\n namespace {\n using Strides = at::cuda::detail::Strides; // std::array;\n \n-template \n+template \n struct Schedule {\n+ // SM90\n using CooperativeSchedule =\n cutlass::gemm::KernelPtrArrayTmaWarpSpecializedCooperative;\n using PongSchedule = cutlass::gemm::KernelPtrArrayTmaWarpSpecializedPingpong;\n using CooperativeEpilogueSchedule =\n cutlass::epilogue::PtrArrayTmaWarpSpecializedCooperative;\n using PongEpilogueSchedule =\n cutlass::epilogue::PtrArrayTmaWarpSpecializedPingpong;\n- using KernelSchedule =\n- cute::conditional_t;\n- using EpilogueSchedule = cute::\n- conditional_t;\n+ // SM100\n+ using MMA1SMKernelSchedule = cutlass::gemm::KernelPtrArrayTmaWarpSpecialized1SmSm100;\n+ using MMA1SMEpilogueSchedule = cutlass::epilogue::PtrArrayTmaWarpSpecialized1Sm;\n+ using MMA2SMKernelSchedule = cutlass::gemm::KernelPtrArrayTmaWarpSpecialized2SmSm100;\n+ using MMA2SMEpilogueSchedule = cutlass::epilogue::PtrArrayTmaWarpSpecialized2Sm;\n+", - "comment_created_at": "2025-06-27T17:04:10+00:00", - "comment_author": "AaronWang04", - "comment_body": "Nope, this is what the CUTLASS team said when I asked: \"with SM100, there are no more cooperative & pingpong scheduling policies (in the name, everything is pingpong)\"", - "pr_file_module": null - }, - { - "comment_id": "2172549319", - "repo_full_name": "pytorch/pytorch", - "pr_number": 156203, - "pr_file": "aten/src/ATen/native/cuda/GroupMM.cu", - "discussion_id": "2172431501", - "commented_code": "@@ -43,22 +44,34 @@ C10_DIAGNOSTIC_PUSH_AND_IGNORED_IF_DEFINED(\"-Wunused-but-set-parameter\")\n #include \n #include \n \n+#include \n+\n namespace {\n using Strides = at::cuda::detail::Strides; // std::array;\n \n-template \n+template \n struct Schedule {\n+ // SM90\n using CooperativeSchedule =\n cutlass::gemm::KernelPtrArrayTmaWarpSpecializedCooperative;\n using PongSchedule = cutlass::gemm::KernelPtrArrayTmaWarpSpecializedPingpong;\n using CooperativeEpilogueSchedule =\n cutlass::epilogue::PtrArrayTmaWarpSpecializedCooperative;\n using PongEpilogueSchedule =\n cutlass::epilogue::PtrArrayTmaWarpSpecializedPingpong;\n- using KernelSchedule =\n- cute::conditional_t;\n- using EpilogueSchedule = cute::\n- conditional_t;\n+ // SM100\n+ using MMA1SMKernelSchedule = cutlass::gemm::KernelPtrArrayTmaWarpSpecialized1SmSm100;\n+ using MMA1SMEpilogueSchedule = cutlass::epilogue::PtrArrayTmaWarpSpecialized1Sm;\n+ using MMA2SMKernelSchedule = cutlass::gemm::KernelPtrArrayTmaWarpSpecialized2SmSm100;\n+ using MMA2SMEpilogueSchedule = cutlass::epilogue::PtrArrayTmaWarpSpecialized2Sm;\n+", - "comment_created_at": "2025-06-27T17:46:06+00:00", - "comment_author": "drisspg", - "comment_body": "Sounds good", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2084689345", - "pr_number": 153373, - "pr_file": "aten/src/ATen/cuda/cub.cu", - "created_at": "2025-05-12T13:33:03+00:00", - "commented_code": "template \nvoid inclusive_sum_truncating(const input_t *input, output_t *output, int64_t num_items) {\n#if CUB_V3_PLUS()\n inclusive_scan(input, output, ::cuda::std::plus<>{}, num_items);\n#else\n using NO_ROCM(at_cuda_detail)::cub::Sum;\n inclusive_scan(input, output, Sum{}, num_items);\n#endif", - "repo_full_name": "pytorch/pytorch", - "discussion_comments": [ - { - "comment_id": "2084689345", - "repo_full_name": "pytorch/pytorch", - "pr_number": 153373, - "pr_file": "aten/src/ATen/cuda/cub.cu", - "discussion_id": "2084689345", - "commented_code": "@@ -15,8 +20,12 @@ struct SumOp {\n \n template \n void inclusive_sum_truncating(const input_t *input, output_t *output, int64_t num_items) {\n+#if CUB_V3_PLUS()\n+ inclusive_scan(input, output, ::cuda::std::plus<>{}, num_items);\n+#else\n using NO_ROCM(at_cuda_detail)::cub::Sum;\n inclusive_scan(input, output, Sum{}, num_items);\n+#endif", - "comment_created_at": "2025-05-12T13:33:03+00:00", - "comment_author": "miscco", - "comment_body": "This change is valid unconditionally\r\n```suggestion\r\n inclusive_scan(input, output, ::cuda::std::plus<>{}, num_items);\r\n```", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2084690364", - "pr_number": 153373, - "pr_file": "aten/src/ATen/cuda/cub.cu", - "created_at": "2025-05-12T13:33:36+00:00", - "commented_code": "void mask_exclusive_sum(const uint8_t *mask, int64_t *output_idx, int64_t n) {\n CountMaskOp op{};\n#if CUB_V3_PLUS()\n auto iter = thrust::transform_iterator(mask, op);\n#else\n auto iter = NO_ROCM(at_cuda_detail)::cub::TransformInputIterator<\n bool, decltype(op), decltype(mask)>(mask, op);\n#endif", - "repo_full_name": "pytorch/pytorch", - "discussion_comments": [ - { - "comment_id": "2084690364", - "repo_full_name": "pytorch/pytorch", - "pr_number": 153373, - "pr_file": "aten/src/ATen/cuda/cub.cu", - "discussion_id": "2084690364", - "commented_code": "@@ -42,8 +51,12 @@ struct CountMaskOp {\n \n void mask_exclusive_sum(const uint8_t *mask, int64_t *output_idx, int64_t n) {\n CountMaskOp op{};\n+#if CUB_V3_PLUS()\n+ auto iter = thrust::transform_iterator(mask, op);\n+#else\n auto iter = NO_ROCM(at_cuda_detail)::cub::TransformInputIterator<\n bool, decltype(op), decltype(mask)>(mask, op);\n+#endif", - "comment_created_at": "2025-05-12T13:33:36+00:00", - "comment_author": "miscco", - "comment_body": "Ditto, I believe this can be applied unconditionally. Also we can leverage `thrust::make_transform_iterator`\r\n```suggestion\r\n auto iter = thrust::make_transform_iterator(mask, op);\r\n```", - "pr_file_module": null - }, - { - "comment_id": "2085100379", - "repo_full_name": "pytorch/pytorch", - "pr_number": 153373, - "pr_file": "aten/src/ATen/cuda/cub.cu", - "discussion_id": "2084690364", - "commented_code": "@@ -42,8 +51,12 @@ struct CountMaskOp {\n \n void mask_exclusive_sum(const uint8_t *mask, int64_t *output_idx, int64_t n) {\n CountMaskOp op{};\n+#if CUB_V3_PLUS()\n+ auto iter = thrust::transform_iterator(mask, op);\n+#else\n auto iter = NO_ROCM(at_cuda_detail)::cub::TransformInputIterator<\n bool, decltype(op), decltype(mask)>(mask, op);\n+#endif", - "comment_created_at": "2025-05-12T17:03:15+00:00", - "comment_author": "ngimel", - "comment_body": "NO_ROCM wrappers are there for a reason, to enable rocm builds, that said it's true that it would be better to enable this with a macro similar to NO_ROCM instead of ifdefs", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2084693962", - "pr_number": 153373, - "pr_file": "aten/src/ATen/cuda/cub.cuh", - "created_at": "2025-05-12T13:35:33+00:00", - "commented_code": "aggT data[ITEMS_PER_THREAD];\n aggT agg_val = 0;\n TransformFunctor transform_functor;\n#if CUB_V3_PLUS()\n auto iter_in = thrust::transform_iterator, const T*>(d_in, transform_functor);\n#else\n auto iter_in = ROCM_HIPCUB(at_cuda_detail::cub)::TransformInputIterator, const T*>(d_in, transform_functor);\n#endif", - "repo_full_name": "pytorch/pytorch", - "discussion_comments": [ - { - "comment_id": "2084693962", - "repo_full_name": "pytorch/pytorch", - "pr_number": 153373, - "pr_file": "aten/src/ATen/cuda/cub.cuh", - "discussion_id": "2084693962", - "commented_code": "@@ -425,7 +435,11 @@ __global__ void calc_block_sums(const T * d_in, aggT * agg, int64_t nelem, int i\n aggT data[ITEMS_PER_THREAD];\n aggT agg_val = 0;\n TransformFunctor transform_functor;\n+#if CUB_V3_PLUS()\n+ auto iter_in = thrust::transform_iterator, const T*>(d_in, transform_functor);\n+#else\n auto iter_in = ROCM_HIPCUB(at_cuda_detail::cub)::TransformInputIterator, const T*>(d_in, transform_functor);\n+#endif", - "comment_created_at": "2025-05-12T13:35:33+00:00", - "comment_author": "miscco", - "comment_body": "```suggestion\r\n auto iter_in = thrust::make_transform_iterator(d_in, transform_functor);\r\n```", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2084695693", - "pr_number": 153373, - "pr_file": "aten/src/ATen/cuda/cub.cuh", - "created_at": "2025-05-12T13:36:13+00:00", - "commented_code": "TORCH_CHECK(num_items <= std::numeric_limits::max(),\n \"cub InclusiveSumByKey does not support more than INT_MAX elements\");\n#if !defined(USE_ROCM)\n#if CUB_V3_PLUS()\n CUB_WRAPPER(at_cuda_detail::cub::DeviceScan::InclusiveSumByKey,\n keys, input, output, num_items, ::cuda::std::equal_to<>(), at::cuda::getCurrentCUDAStream());\n#else\n CUB_WRAPPER(at_cuda_detail::cub::DeviceScan::InclusiveSumByKey,\n keys, input, output, num_items, at_cuda_detail::cub::Equality(), at::cuda::getCurrentCUDAStream());\n#endif // CUB_V3_PLUS()", - "repo_full_name": "pytorch/pytorch", - "discussion_comments": [ - { - "comment_id": "2084695693", - "repo_full_name": "pytorch/pytorch", - "pr_number": 153373, - "pr_file": "aten/src/ATen/cuda/cub.cuh", - "discussion_id": "2084695693", - "commented_code": "@@ -567,8 +581,13 @@ inline void inclusive_sum_by_key(KeysInputIteratorT keys, ValuesInputIteratorT i\n TORCH_CHECK(num_items <= std::numeric_limits::max(),\n \"cub InclusiveSumByKey does not support more than INT_MAX elements\");\n #if !defined(USE_ROCM)\n+#if CUB_V3_PLUS()\n+ CUB_WRAPPER(at_cuda_detail::cub::DeviceScan::InclusiveSumByKey,\n+ keys, input, output, num_items, ::cuda::std::equal_to<>(), at::cuda::getCurrentCUDAStream());\n+#else\n CUB_WRAPPER(at_cuda_detail::cub::DeviceScan::InclusiveSumByKey,\n keys, input, output, num_items, at_cuda_detail::cub::Equality(), at::cuda::getCurrentCUDAStream());\n+#endif // CUB_V3_PLUS()", - "comment_created_at": "2025-05-12T13:36:13+00:00", - "comment_author": "miscco", - "comment_body": "`cuda::std::equal_to` is available unconditionally\r\n```suggestion\r\n CUB_WRAPPER(at_cuda_detail::cub::DeviceScan::InclusiveSumByKey,\r\n keys, input, output, num_items, ::cuda::std::equal_to<>(), at::cuda::getCurrentCUDAStream());\r\n```", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2084703977", - "pr_number": 153373, - "pr_file": "aten/src/ATen/native/cuda/Nonzero.cu", - "created_at": "2025-05-12T13:40:06+00:00", - "commented_code": "for (int64_t idx = 0; idx < num_chunks; idx++) {\n int remaining = std::min(chunk_size, self.numel() - idx * chunk_size);\n\n#if CUB_V3_PLUS()\n thrust::counting_iterator counting_itr(idx * chunk_size);\n thrust::transform_iterator, const scalar_t*>\n itr(self_.const_data_ptr() + idx * chunk_size,\n NonZeroOp());\n#else\n cub::CountingInputIterator counting_itr(idx * chunk_size);\n cub::TransformInputIterator, const scalar_t*>\n itr(self_.const_data_ptr() + idx * chunk_size,\n NonZeroOp());\n#endif", - "repo_full_name": "pytorch/pytorch", - "discussion_comments": [ - { - "comment_id": "2084703977", - "repo_full_name": "pytorch/pytorch", - "pr_number": 153373, - "pr_file": "aten/src/ATen/native/cuda/Nonzero.cu", - "discussion_id": "2084703977", - "commented_code": "@@ -243,10 +258,17 @@ void nonzero_cuda_out_impl(const Tensor& self, Tensor& out) {\n for (int64_t idx = 0; idx < num_chunks; idx++) {\n int remaining = std::min(chunk_size, self.numel() - idx * chunk_size);\n \n+#if CUB_V3_PLUS()\n+ thrust::counting_iterator counting_itr(idx * chunk_size);\n+ thrust::transform_iterator, const scalar_t*>\n+ itr(self_.const_data_ptr() + idx * chunk_size,\n+ NonZeroOp());\n+#else\n cub::CountingInputIterator counting_itr(idx * chunk_size);\n cub::TransformInputIterator, const scalar_t*>\n itr(self_.const_data_ptr() + idx * chunk_size,\n NonZeroOp());\n+#endif", - "comment_created_at": "2025-05-12T13:40:06+00:00", - "comment_author": "miscco", - "comment_body": "```suggestion\r\n thrust::counting_iterator counting_itr(idx * chunk_size);\r\n thrust::transform_iterator, const scalar_t*>\r\n itr(self_.const_data_ptr() + idx * chunk_size,\r\n NonZeroOp());\r\n```", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/pytorch-clean-configuration-files.json b/_reviewers/pytorch-clean-configuration-files.json new file mode 100644 index 0000000..69da8d6 --- /dev/null +++ b/_reviewers/pytorch-clean-configuration-files.json @@ -0,0 +1,80 @@ +[ + { + "discussion_id": "2162720932", + "pr_number": 154595, + "pr_file": "caffe2/CMakeLists.txt", + "created_at": "2025-06-24T00:11:28+00:00", + "commented_code": "torch_cuda\n )\n if($ENV{ATEN_STATIC_CUDA})\n if(CUDA_VERSION_MAJOR LESS_EQUAL 11)\n target_link_libraries(torch_cuda_linalg PRIVATE\n CUDA::cusolver_static\n ${CUDAToolkit_LIBRARY_DIR}/liblapack_static.a # needed for libcusolver_static\n )", + "repo_full_name": "pytorch/pytorch", + "discussion_comments": [ + { + "comment_id": "2162720932", + "repo_full_name": "pytorch/pytorch", + "pr_number": 154595, + "pr_file": "caffe2/CMakeLists.txt", + "discussion_id": "2162720932", + "commented_code": "@@ -1076,26 +1076,9 @@ elseif(USE_CUDA)\n torch_cuda\n )\n if($ENV{ATEN_STATIC_CUDA})\n- if(CUDA_VERSION_MAJOR LESS_EQUAL 11)\n- target_link_libraries(torch_cuda_linalg PRIVATE\n- CUDA::cusolver_static\n- ${CUDAToolkit_LIBRARY_DIR}/liblapack_static.a # needed for libcusolver_static\n- )", + "comment_created_at": "2025-06-24T00:11:28+00:00", + "comment_author": "malfet", + "comment_body": "Side note: We don't really test static_CUDA anymore, nor do we support CUDA-11 anymore, perhaps time to delete this logic completely...", + "pr_file_module": null + }, + { + "comment_id": "2162730930", + "repo_full_name": "pytorch/pytorch", + "pr_number": 154595, + "pr_file": "caffe2/CMakeLists.txt", + "discussion_id": "2162720932", + "commented_code": "@@ -1076,26 +1076,9 @@ elseif(USE_CUDA)\n torch_cuda\n )\n if($ENV{ATEN_STATIC_CUDA})\n- if(CUDA_VERSION_MAJOR LESS_EQUAL 11)\n- target_link_libraries(torch_cuda_linalg PRIVATE\n- CUDA::cusolver_static\n- ${CUDAToolkit_LIBRARY_DIR}/liblapack_static.a # needed for libcusolver_static\n- )", + "comment_created_at": "2025-06-24T00:22:19+00:00", + "comment_author": "cyyever", + "comment_body": "I'm not sure static CUDA is in use somewhere in META.. Could you propose to remove them?", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2071067382", + "pr_number": 152630, + "pr_file": "aten/src/ATen/native/transformers/hip/flash_attn/ck/CMakeLists.txt", + "created_at": "2025-05-02T04:06:02+00:00", + "commented_code": "execute_process(\n COMMAND python3 ${CMAKE_CURRENT_LIST_DIR}/../../../../../../../../third_party/composable_kernel/example/ck_tile/01_fmha/generate.py", + "repo_full_name": "pytorch/pytorch", + "discussion_comments": [ + { + "comment_id": "2071067382", + "repo_full_name": "pytorch/pytorch", + "pr_number": 152630, + "pr_file": "aten/src/ATen/native/transformers/hip/flash_attn/ck/CMakeLists.txt", + "discussion_id": "2071067382", + "commented_code": "@@ -11,7 +11,7 @@ endif()\n \n execute_process(\n COMMAND python3 ${CMAKE_CURRENT_LIST_DIR}/../../../../../../../../third_party/composable_kernel/example/ck_tile/01_fmha/generate.py", + "comment_created_at": "2025-05-02T04:06:02+00:00", + "comment_author": "drisspg", + "comment_body": "Nit can we update this relative path to use the root project dir, this relative path is wayyyyyy to long", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2078348575", + "pr_number": 152951, + "pr_file": "CMakeLists.txt", + "created_at": "2025-05-07T19:31:34+00:00", + "commented_code": "BUILD_LAZY_CUDA_LINALG \"Build cuda linalg ops as separate library\" ON\n \"USE_CUDA AND LINUX AND BUILD_PYTHON\" OFF)\ncmake_dependent_option(USE_ROCM \"Use ROCm\" ON \"LINUX\" OFF)\ncmake_dependent_option(USE_ROCM_CK_GEMM \"Use ROCm Composable Kernel\" ON \"USE_ROCM\" OFF)", + "repo_full_name": "pytorch/pytorch", + "discussion_comments": [ + { + "comment_id": "2078348575", + "repo_full_name": "pytorch/pytorch", + "pr_number": 152951, + "pr_file": "CMakeLists.txt", + "discussion_id": "2078348575", + "commented_code": "@@ -237,6 +237,7 @@ cmake_dependent_option(\n BUILD_LAZY_CUDA_LINALG \"Build cuda linalg ops as separate library\" ON\n \"USE_CUDA AND LINUX AND BUILD_PYTHON\" OFF)\n cmake_dependent_option(USE_ROCM \"Use ROCm\" ON \"LINUX\" OFF)\n+cmake_dependent_option(USE_ROCM_CK_GEMM \"Use ROCm Composable Kernel\" ON \"USE_ROCM\" OFF)", + "comment_created_at": "2025-05-07T19:31:34+00:00", + "comment_author": "jithunnair-amd", + "comment_body": "```suggestion\r\ncmake_dependent_option(USE_ROCM_CK_GEMM \"Use ROCm Composable Kernel for GEMMs\" ON \"USE_ROCM\" OFF)\r\n```", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/pytorch-clean-configuration-files.md b/_reviewers/pytorch-clean-configuration-files.md index cb9673b..1671ca6 100644 --- a/_reviewers/pytorch-clean-configuration-files.md +++ b/_reviewers/pytorch-clean-configuration-files.md @@ -39,85 +39,3 @@ execute_process( # Specific option description cmake_dependent_option(USE_ROCM_CK_GEMM "Use ROCm Composable Kernel for GEMMs" ON "USE_ROCM" OFF) ``` - - -[ - { - "discussion_id": "2162720932", - "pr_number": 154595, - "pr_file": "caffe2/CMakeLists.txt", - "created_at": "2025-06-24T00:11:28+00:00", - "commented_code": "torch_cuda\n )\n if($ENV{ATEN_STATIC_CUDA})\n if(CUDA_VERSION_MAJOR LESS_EQUAL 11)\n target_link_libraries(torch_cuda_linalg PRIVATE\n CUDA::cusolver_static\n ${CUDAToolkit_LIBRARY_DIR}/liblapack_static.a # needed for libcusolver_static\n )", - "repo_full_name": "pytorch/pytorch", - "discussion_comments": [ - { - "comment_id": "2162720932", - "repo_full_name": "pytorch/pytorch", - "pr_number": 154595, - "pr_file": "caffe2/CMakeLists.txt", - "discussion_id": "2162720932", - "commented_code": "@@ -1076,26 +1076,9 @@ elseif(USE_CUDA)\n torch_cuda\n )\n if($ENV{ATEN_STATIC_CUDA})\n- if(CUDA_VERSION_MAJOR LESS_EQUAL 11)\n- target_link_libraries(torch_cuda_linalg PRIVATE\n- CUDA::cusolver_static\n- ${CUDAToolkit_LIBRARY_DIR}/liblapack_static.a # needed for libcusolver_static\n- )", - "comment_created_at": "2025-06-24T00:11:28+00:00", - "comment_author": "malfet", - "comment_body": "Side note: We don't really test static_CUDA anymore, nor do we support CUDA-11 anymore, perhaps time to delete this logic completely...", - "pr_file_module": null - }, - { - "comment_id": "2162730930", - "repo_full_name": "pytorch/pytorch", - "pr_number": 154595, - "pr_file": "caffe2/CMakeLists.txt", - "discussion_id": "2162720932", - "commented_code": "@@ -1076,26 +1076,9 @@ elseif(USE_CUDA)\n torch_cuda\n )\n if($ENV{ATEN_STATIC_CUDA})\n- if(CUDA_VERSION_MAJOR LESS_EQUAL 11)\n- target_link_libraries(torch_cuda_linalg PRIVATE\n- CUDA::cusolver_static\n- ${CUDAToolkit_LIBRARY_DIR}/liblapack_static.a # needed for libcusolver_static\n- )", - "comment_created_at": "2025-06-24T00:22:19+00:00", - "comment_author": "cyyever", - "comment_body": "I'm not sure static CUDA is in use somewhere in META.. Could you propose to remove them?", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2071067382", - "pr_number": 152630, - "pr_file": "aten/src/ATen/native/transformers/hip/flash_attn/ck/CMakeLists.txt", - "created_at": "2025-05-02T04:06:02+00:00", - "commented_code": "execute_process(\n COMMAND python3 ${CMAKE_CURRENT_LIST_DIR}/../../../../../../../../third_party/composable_kernel/example/ck_tile/01_fmha/generate.py", - "repo_full_name": "pytorch/pytorch", - "discussion_comments": [ - { - "comment_id": "2071067382", - "repo_full_name": "pytorch/pytorch", - "pr_number": 152630, - "pr_file": "aten/src/ATen/native/transformers/hip/flash_attn/ck/CMakeLists.txt", - "discussion_id": "2071067382", - "commented_code": "@@ -11,7 +11,7 @@ endif()\n \n execute_process(\n COMMAND python3 ${CMAKE_CURRENT_LIST_DIR}/../../../../../../../../third_party/composable_kernel/example/ck_tile/01_fmha/generate.py", - "comment_created_at": "2025-05-02T04:06:02+00:00", - "comment_author": "drisspg", - "comment_body": "Nit can we update this relative path to use the root project dir, this relative path is wayyyyyy to long", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2078348575", - "pr_number": 152951, - "pr_file": "CMakeLists.txt", - "created_at": "2025-05-07T19:31:34+00:00", - "commented_code": "BUILD_LAZY_CUDA_LINALG \"Build cuda linalg ops as separate library\" ON\n \"USE_CUDA AND LINUX AND BUILD_PYTHON\" OFF)\ncmake_dependent_option(USE_ROCM \"Use ROCm\" ON \"LINUX\" OFF)\ncmake_dependent_option(USE_ROCM_CK_GEMM \"Use ROCm Composable Kernel\" ON \"USE_ROCM\" OFF)", - "repo_full_name": "pytorch/pytorch", - "discussion_comments": [ - { - "comment_id": "2078348575", - "repo_full_name": "pytorch/pytorch", - "pr_number": 152951, - "pr_file": "CMakeLists.txt", - "discussion_id": "2078348575", - "commented_code": "@@ -237,6 +237,7 @@ cmake_dependent_option(\n BUILD_LAZY_CUDA_LINALG \"Build cuda linalg ops as separate library\" ON\n \"USE_CUDA AND LINUX AND BUILD_PYTHON\" OFF)\n cmake_dependent_option(USE_ROCM \"Use ROCm\" ON \"LINUX\" OFF)\n+cmake_dependent_option(USE_ROCM_CK_GEMM \"Use ROCm Composable Kernel\" ON \"USE_ROCM\" OFF)", - "comment_created_at": "2025-05-07T19:31:34+00:00", - "comment_author": "jithunnair-amd", - "comment_body": "```suggestion\r\ncmake_dependent_option(USE_ROCM_CK_GEMM \"Use ROCm Composable Kernel for GEMMs\" ON \"USE_ROCM\" OFF)\r\n```", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/pytorch-complete-documented-references.json b/_reviewers/pytorch-complete-documented-references.json new file mode 100644 index 0000000..ecdad35 --- /dev/null +++ b/_reviewers/pytorch-complete-documented-references.json @@ -0,0 +1,46 @@ +[ + { + "discussion_id": "2124660049", + "pr_number": 149697, + "pr_file": "torch/_inductor/analysis/README.md", + "created_at": "2025-06-03T18:49:30+00:00", + "commented_code": "# `torch._inductor.analysis`\nContains scripts for inductor performance analysis.", + "repo_full_name": "pytorch/pytorch", + "discussion_comments": [ + { + "comment_id": "2124660049", + "repo_full_name": "pytorch/pytorch", + "pr_number": 149697, + "pr_file": "torch/_inductor/analysis/README.md", + "discussion_id": "2124660049", + "commented_code": "@@ -0,0 +1,2 @@\n+# `torch._inductor.analysis`\n+Contains scripts for inductor performance analysis.", + "comment_created_at": "2025-06-03T18:49:30+00:00", + "comment_author": "shunting314", + "comment_body": "maybe a following: can we add steps for profile capture, profile augmenting, profile comparison here to make it easier to find a reference", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2154769604", + "pr_number": 156094, + "pr_file": "docs/source/jit.md", + "created_at": "2025-06-18T14:31:31+00:00", + "commented_code": "# TorchScript\n\n```{toctree}\n:maxdepth: 1\n:caption: Builtin Functions\n:hidden:\n\ntorch.jit.supported_ops \n```\n\n```{toctree}\n:maxdepth: 1\n:caption: Language Reference\n:hidden:\n\njit_language_reference\n```\n\n```{toctree}\n:maxdepth: 1\n\njit_language_reference_v2\n```\n\n```{contents}\n:local:\n:depth: 2\n```\n\n```{eval-rst}\n.. automodule:: torch.jit\n.. currentmodule:: torch.jit\n```\n\nTorchScript is a way to create serializable and optimizable models from PyTorch code.\nAny TorchScript program can be saved from a Python\nprocess and loaded in a process where there is no Python dependency.\n\nWe provide tools to incrementally transition a model from a pure Python program\nto a TorchScript program that can be run independently from Python, such as in a standalone C++ program.\nThis makes it possible to train models in PyTorch using familiar tools in Python and then export\nthe model via TorchScript to a production environment where Python programs may be disadvantageous\nfor performance and multi-threading reasons.\n\nFor a gentle introduction to TorchScript, see the [Introduction to TorchScript](https://pytorch.org/tutorials/beginner/Intro_to_TorchScript_tutorial.html) tutorial.\n\nFor an end-to-end example of converting a PyTorch model to TorchScript and running it in C++, see the\n[Loading a PyTorch Model in C++](https://pytorch.org/tutorials/advanced/cpp_export.html) tutorial.\n\n## Creating TorchScript Code\n\n```{eval-rst}\n.. autosummary::\n :toctree: generated\n :nosignatures:\n\n script\n trace\n script_if_tracing\n trace_module\n fork\n wait\n ScriptModule\n ScriptFunction\n freeze\n optimize_for_inference\n enable_onednn_fusion\n onednn_fusion_enabled\n set_fusion_strategy\n strict_fusion\n save\n load\n ignore\n unused\n interface\n isinstance\n Attribute\n annotate\n```\n\n## Mixing Tracing and Scripting\n\nIn many cases either tracing or scripting is an easier approach for converting a model to TorchScript.\nTracing and scripting can be composed to suit the particular requirements\nof a part of a model.\n\nScripted functions can call traced functions. This is particularly useful when you need\nto use control-flow around a simple feed-forward model. For instance the beam search\nof a sequence to sequence model will typically be written in script but can call an\nencoder module generated using tracing.\n\n```{testsetup}\n# These are hidden from the docs, but these are necessary for `doctest`\n# since the `inspect` module doesn't play nicely with the execution\n# environment for `doctest`\nimport torch\n\noriginal_script = torch.jit.script\ndef script_wrapper(obj, *args, **kwargs):\n obj.__module__ = 'FakeMod'\n return original_script(obj, *args, **kwargs)\n\ntorch.jit.script = script_wrapper\n\noriginal_trace = torch.jit.trace\ndef trace_wrapper(obj, *args, **kwargs):\n obj.__module__ = 'FakeMod'\n return original_trace(obj, *args, **kwargs)\n\ntorch.jit.trace = trace_wrapper\n```\n\nExample (calling a traced function in script):\n\n```{testcode}\nimport torch\n\ndef foo(x, y):\n return 2 * x + y\n\ntraced_foo = torch.jit.trace(foo, (torch.rand(3), torch.rand(3)))\n\n@torch.jit.script\ndef bar(x):\n return traced_foo(x, x)\n```\n\nTraced functions can call script functions. This is useful when a small part of\na model requires some control-flow even though most of the model is just a feed-forward\nnetwork. Control-flow inside of a script function called by a traced function is\npreserved correctly.\n\nExample (calling a script function in a traced function):\n\n```{testcode}\nimport torch\n\n@torch.jit.script\ndef foo(x, y):\n if x.max() > y.max():\n r = x\n else:\n r = y\n return r\n\n\ndef bar(x, y, z):\n return foo(x, y) + z\n\ntraced_bar = torch.jit.trace(bar, (torch.rand(3), torch.rand(3), torch.rand(3)))\n```\n\nThis composition also works for `nn.Module`s as well, where it can be used to generate\na submodule using tracing that can be called from the methods of a script module.\n\nExample (using a traced module):\n\n```{testcode}\nimport torch\nimport torchvision\n:skipif: torchvision is None\n\nclass MyScriptModule(torch.nn.Module):\n def __init__(self):\n super().__init__()\n self.means = torch.nn.Parameter(torch.tensor([103.939, 116.779, 123.68])\n .resize_(1, 3, 1, 1))\n self.resnet = torch.jit.trace(torchvision.models.resnet18(),\n torch.rand(1, 3, 224, 224))\n\n def forward(self, input):\n return self.resnet(input - self.means)\n\nmy_script_module = torch.jit.script(MyScriptModule())\n```\n\n## TorchScript Language\n\nTorchScript is a statically typed subset of Python, so many Python features apply\ndirectly to TorchScript. See the full {ref}`language-reference-v2` for details.\n\n(builtin functions)=\n\n## Built-in Functions and Modules\n\nTorchScript supports the use of most PyTorch functions and many Python built-ins.\nSee {ref}`builtin-functions` for a full reference of supported functions.\n\n### PyTorch Functions and Modules\n\nTorchScript supports a subset of the tensor and neural network\nfunctions that PyTorch provides. Most methods on Tensor as well as functions in\nthe `torch` namespace, all functions in `torch.nn.functional` and\nmost modules from `torch.nn` are supported in TorchScript.\n\nSee {ref}`jit_unsupported` for a list of unsupported PyTorch functions and modules.\n\n### Python Functions and Modules\n\nMany of Python's [built-in functions](https://docs.python.org/3/library/functions.html) are supported in TorchScript.\nThe {any}`math` module is also supported (see {ref}`math-module` for details), but no other Python modules\n(built-in or third party) are supported.\n\n### Python Language Reference Comparison\n\nFor a full listing of supported Python features, see {ref}`python-language-reference`.\n\n## Debugging\n\n(disable TorchScript)=\n\n### Disable JIT for Debugging\n\n```{eval-rst}\n.. envvar:: PYTORCH_JIT\n```\n\nSetting the environment variable `PYTORCH_JIT=0` will disable all script\nand tracing annotations. If there is hard-to-debug error in one of your\nTorchScript models, you can use this flag to force everything to run using native\nPython. Since TorchScript (scripting and tracing) is disabled with this flag,\nyou can use tools like `pdb` to debug the model code. For example:\n\n```python\n@torch.jit.script\ndef scripted_fn(x : torch.Tensor):\n for i in range(12):\n x = x + x\n return x\n\ndef fn(x):\n x = torch.neg(x)\n import pdb; pdb.set_trace()\n return scripted_fn(x)\n\ntraced_fn = torch.jit.trace(fn, (torch.rand(4, 5),))\ntraced_fn(torch.rand(3, 4))\n```\n\nDebugging this script with `pdb` works except for when we invoke the\n{func}`@torch.jit.script ` function. We can globally disable\nJIT, so that we can call the {func}`@torch.jit.script `\nfunction as a normal Python function and not compile it. If the above script\nis called `disable_jit_example.py`, we can invoke it like so:\n\n```bash\n$ PYTORCH_JIT=0 python disable_jit_example.py\n```\n\nand we will be able to step into the {func}`@torch.jit.script `\nfunction as a normal Python function. To disable the\nTorchScript compiler for a specific function, see\n{func}`@torch.jit.ignore `.\n\n(inspecting-code)=\n\n### Inspecting Code\n\nTorchScript provides a code pretty-printer for all {class}`ScriptModule` instances. This\npretty-printer gives an interpretation of the script method's code as valid\nPython syntax. For example:\n\n```{testcode}\n@torch.jit.script\ndef foo(len):\n # type: (int) -> torch.Tensor\n rv = torch.zeros(3, 4)\n for i in range(len):\n if i < 10:\n rv = rv - 1.0\n else:\n rv = rv + 1.0\n return rv\n\nprint(foo.code)\n```\n\n```{testoutput}\n:hide:\n\n...\n```\n\nA {class}`ScriptModule` with a single `forward` method will have an attribute\n`code`, which you can use to inspect the {class}`ScriptModule`'s code.\nIf the {class}`ScriptModule` has more than one method, you will need to access\n`.code` on the method itself and not the module. We can inspect the\ncode of a method named `foo` on a {class}`ScriptModule` by accessing `.foo.code`.\nThe example above produces this output:\n\n```python\ndef foo(len: int) -> Tensor:\n rv = torch.zeros([3, 4], dtype=None, layout=None, device=None, pin_memory=None)\n rv0 = rv\n for i in range(len):\n if torch.lt(i, 10):\n rv1 = torch.sub(rv0, 1., 1)\n else:\n rv1 = torch.add(rv0, 1., 1)\n rv0 = rv1\n return rv0\n```\n\nThis is TorchScript's compilation of the code for the `forward` method.\nYou can use this to ensure TorchScript (tracing or scripting) has captured\nyour model code correctly.\n\n(interpreting-graphs)=\n\n### Interpreting Graphs\n\nTorchScript also has a representation at a lower level than the code pretty-printer, in the form of IR graphs.\n\nTorchScript uses a static single assignment (SSA) intermediate representation\n(IR) to represent computation. The instructions in this format consist of\nATen (the C++ backend of PyTorch) operators and other primitive operators,\nincluding control flow operators for loops and conditionals. As an example:\n\n```{testcode}\n@torch.jit.script\ndef foo(len):\n # type: (int) -> torch.Tensor\n rv = torch.zeros(3, 4)\n for i in range(len):\n if i < 10:\n rv = rv - 1.0\n else:\n rv = rv + 1.0\n return rv\n\nprint(foo.graph)\n```\n\n```{testoutput}\n:hide:\n\n...\n```\n\n`graph` follows the same rules described in the {ref}`inspecting-code` section\nwith regard to `forward` method lookup.\n\nThe example script above produces the graph:\n\n```\ngraph(%len.1 : int):\n %24 : int = prim::Constant[value=1]()\n %17 : bool = prim::Constant[value=1]() # test.py:10:5\n %12 : bool? = prim::Constant()\n %10 : Device? = prim::Constant()\n %6 : int? = prim::Constant()\n %1 : int = prim::Constant[value=3]() # test.py:9:22\n %2 : int = prim::Constant[value=4]() # test.py:9:25\n %20 : int = prim::Constant[value=10]() # test.py:11:16\n %23 : float = prim::Constant[value=1]() # test.py:12:23\n %4 : int[] = prim::ListConstruct(%1, %2)\n %rv.1 : Tensor = aten::zeros(%4, %6, %6, %10, %12) # test.py:9:10\n %rv : Tensor = prim::Loop(%len.1, %17, %rv.1) # test.py:10:5\n block0(%i.1 : int, %rv.14 : Tensor):\n %21 : bool = aten::lt(%i.1, %20) # test.py:11:12\n %rv.13 : Tensor = prim::If(%21) # test.py:11:9\n block0():\n %rv.3 : Tensor = aten::sub(%rv.14, %23, %24) # test.py:12:18\n -> (%rv.3)\n block1():\n %rv.6 : Tensor = aten::add(%rv.14, %23, %24) # test.py:14:18\n -> (%rv.6)\n -> (%17, %rv.13)\n return (%rv)\n```\n\nTake the instruction `%rv.1 : Tensor = aten::zeros(%4, %6, %6, %10, %12) # test.py:9:10` for\nexample.\n\n- `%rv.1 : Tensor` means we assign the output to a (unique) value named `rv.1`, that value is of `Tensor` type and that we do not know its concrete shape.\n- `aten::zeros` is the operator (equivalent to `torch.zeros`) and the input list `(%4, %6, %6, %10, %12)` specifies which values in scope should be passed as inputs. The schema for built-in functions like `aten::zeros` can be found at Builtin Functions.", + "repo_full_name": "pytorch/pytorch", + "discussion_comments": [ + { + "comment_id": "2154769604", + "repo_full_name": "pytorch/pytorch", + "pr_number": 156094, + "pr_file": "docs/source/jit.md", + "discussion_id": "2154769604", + "commented_code": "@@ -0,0 +1,892 @@\n+# TorchScript\n+\n+```{toctree}\n+:maxdepth: 1\n+:caption: Builtin Functions\n+:hidden:\n+\n+torch.jit.supported_ops \n+```\n+\n+```{toctree}\n+:maxdepth: 1\n+:caption: Language Reference\n+:hidden:\n+\n+jit_language_reference\n+```\n+\n+```{toctree}\n+:maxdepth: 1\n+\n+jit_language_reference_v2\n+```\n+\n+```{contents}\n+:local:\n+:depth: 2\n+```\n+\n+```{eval-rst}\n+.. automodule:: torch.jit\n+.. currentmodule:: torch.jit\n+```\n+\n+TorchScript is a way to create serializable and optimizable models from PyTorch code.\n+Any TorchScript program can be saved from a Python\n+process and loaded in a process where there is no Python dependency.\n+\n+We provide tools to incrementally transition a model from a pure Python program\n+to a TorchScript program that can be run independently from Python, such as in a standalone C++ program.\n+This makes it possible to train models in PyTorch using familiar tools in Python and then export\n+the model via TorchScript to a production environment where Python programs may be disadvantageous\n+for performance and multi-threading reasons.\n+\n+For a gentle introduction to TorchScript, see the [Introduction to TorchScript](https://pytorch.org/tutorials/beginner/Intro_to_TorchScript_tutorial.html) tutorial.\n+\n+For an end-to-end example of converting a PyTorch model to TorchScript and running it in C++, see the\n+[Loading a PyTorch Model in C++](https://pytorch.org/tutorials/advanced/cpp_export.html) tutorial.\n+\n+## Creating TorchScript Code\n+\n+```{eval-rst}\n+.. autosummary::\n+ :toctree: generated\n+ :nosignatures:\n+\n+ script\n+ trace\n+ script_if_tracing\n+ trace_module\n+ fork\n+ wait\n+ ScriptModule\n+ ScriptFunction\n+ freeze\n+ optimize_for_inference\n+ enable_onednn_fusion\n+ onednn_fusion_enabled\n+ set_fusion_strategy\n+ strict_fusion\n+ save\n+ load\n+ ignore\n+ unused\n+ interface\n+ isinstance\n+ Attribute\n+ annotate\n+```\n+\n+## Mixing Tracing and Scripting\n+\n+In many cases either tracing or scripting is an easier approach for converting a model to TorchScript.\n+Tracing and scripting can be composed to suit the particular requirements\n+of a part of a model.\n+\n+Scripted functions can call traced functions. This is particularly useful when you need\n+to use control-flow around a simple feed-forward model. For instance the beam search\n+of a sequence to sequence model will typically be written in script but can call an\n+encoder module generated using tracing.\n+\n+```{testsetup}\n+# These are hidden from the docs, but these are necessary for `doctest`\n+# since the `inspect` module doesn't play nicely with the execution\n+# environment for `doctest`\n+import torch\n+\n+original_script = torch.jit.script\n+def script_wrapper(obj, *args, **kwargs):\n+ obj.__module__ = 'FakeMod'\n+ return original_script(obj, *args, **kwargs)\n+\n+torch.jit.script = script_wrapper\n+\n+original_trace = torch.jit.trace\n+def trace_wrapper(obj, *args, **kwargs):\n+ obj.__module__ = 'FakeMod'\n+ return original_trace(obj, *args, **kwargs)\n+\n+torch.jit.trace = trace_wrapper\n+```\n+\n+Example (calling a traced function in script):\n+\n+```{testcode}\n+import torch\n+\n+def foo(x, y):\n+ return 2 * x + y\n+\n+traced_foo = torch.jit.trace(foo, (torch.rand(3), torch.rand(3)))\n+\n+@torch.jit.script\n+def bar(x):\n+ return traced_foo(x, x)\n+```\n+\n+Traced functions can call script functions. This is useful when a small part of\n+a model requires some control-flow even though most of the model is just a feed-forward\n+network. Control-flow inside of a script function called by a traced function is\n+preserved correctly.\n+\n+Example (calling a script function in a traced function):\n+\n+```{testcode}\n+import torch\n+\n+@torch.jit.script\n+def foo(x, y):\n+ if x.max() > y.max():\n+ r = x\n+ else:\n+ r = y\n+ return r\n+\n+\n+def bar(x, y, z):\n+ return foo(x, y) + z\n+\n+traced_bar = torch.jit.trace(bar, (torch.rand(3), torch.rand(3), torch.rand(3)))\n+```\n+\n+This composition also works for `nn.Module`s as well, where it can be used to generate\n+a submodule using tracing that can be called from the methods of a script module.\n+\n+Example (using a traced module):\n+\n+```{testcode}\n+import torch\n+import torchvision\n+:skipif: torchvision is None\n+\n+class MyScriptModule(torch.nn.Module):\n+ def __init__(self):\n+ super().__init__()\n+ self.means = torch.nn.Parameter(torch.tensor([103.939, 116.779, 123.68])\n+ .resize_(1, 3, 1, 1))\n+ self.resnet = torch.jit.trace(torchvision.models.resnet18(),\n+ torch.rand(1, 3, 224, 224))\n+\n+ def forward(self, input):\n+ return self.resnet(input - self.means)\n+\n+my_script_module = torch.jit.script(MyScriptModule())\n+```\n+\n+## TorchScript Language\n+\n+TorchScript is a statically typed subset of Python, so many Python features apply\n+directly to TorchScript. See the full {ref}`language-reference-v2` for details.\n+\n+(builtin functions)=\n+\n+## Built-in Functions and Modules\n+\n+TorchScript supports the use of most PyTorch functions and many Python built-ins.\n+See {ref}`builtin-functions` for a full reference of supported functions.\n+\n+### PyTorch Functions and Modules\n+\n+TorchScript supports a subset of the tensor and neural network\n+functions that PyTorch provides. Most methods on Tensor as well as functions in\n+the `torch` namespace, all functions in `torch.nn.functional` and\n+most modules from `torch.nn` are supported in TorchScript.\n+\n+See {ref}`jit_unsupported` for a list of unsupported PyTorch functions and modules.\n+\n+### Python Functions and Modules\n+\n+Many of Python's [built-in functions](https://docs.python.org/3/library/functions.html) are supported in TorchScript.\n+The {any}`math` module is also supported (see {ref}`math-module` for details), but no other Python modules\n+(built-in or third party) are supported.\n+\n+### Python Language Reference Comparison\n+\n+For a full listing of supported Python features, see {ref}`python-language-reference`.\n+\n+## Debugging\n+\n+(disable TorchScript)=\n+\n+### Disable JIT for Debugging\n+\n+```{eval-rst}\n+.. envvar:: PYTORCH_JIT\n+```\n+\n+Setting the environment variable `PYTORCH_JIT=0` will disable all script\n+and tracing annotations. If there is hard-to-debug error in one of your\n+TorchScript models, you can use this flag to force everything to run using native\n+Python. Since TorchScript (scripting and tracing) is disabled with this flag,\n+you can use tools like `pdb` to debug the model code. For example:\n+\n+```python\n+@torch.jit.script\n+def scripted_fn(x : torch.Tensor):\n+ for i in range(12):\n+ x = x + x\n+ return x\n+\n+def fn(x):\n+ x = torch.neg(x)\n+ import pdb; pdb.set_trace()\n+ return scripted_fn(x)\n+\n+traced_fn = torch.jit.trace(fn, (torch.rand(4, 5),))\n+traced_fn(torch.rand(3, 4))\n+```\n+\n+Debugging this script with `pdb` works except for when we invoke the\n+{func}`@torch.jit.script ` function. We can globally disable\n+JIT, so that we can call the {func}`@torch.jit.script `\n+function as a normal Python function and not compile it. If the above script\n+is called `disable_jit_example.py`, we can invoke it like so:\n+\n+```bash\n+$ PYTORCH_JIT=0 python disable_jit_example.py\n+```\n+\n+and we will be able to step into the {func}`@torch.jit.script `\n+function as a normal Python function. To disable the\n+TorchScript compiler for a specific function, see\n+{func}`@torch.jit.ignore `.\n+\n+(inspecting-code)=\n+\n+### Inspecting Code\n+\n+TorchScript provides a code pretty-printer for all {class}`ScriptModule` instances. This\n+pretty-printer gives an interpretation of the script method's code as valid\n+Python syntax. For example:\n+\n+```{testcode}\n+@torch.jit.script\n+def foo(len):\n+ # type: (int) -> torch.Tensor\n+ rv = torch.zeros(3, 4)\n+ for i in range(len):\n+ if i < 10:\n+ rv = rv - 1.0\n+ else:\n+ rv = rv + 1.0\n+ return rv\n+\n+print(foo.code)\n+```\n+\n+```{testoutput}\n+:hide:\n+\n+...\n+```\n+\n+A {class}`ScriptModule` with a single `forward` method will have an attribute\n+`code`, which you can use to inspect the {class}`ScriptModule`'s code.\n+If the {class}`ScriptModule` has more than one method, you will need to access\n+`.code` on the method itself and not the module. We can inspect the\n+code of a method named `foo` on a {class}`ScriptModule` by accessing `.foo.code`.\n+The example above produces this output:\n+\n+```python\n+def foo(len: int) -> Tensor:\n+ rv = torch.zeros([3, 4], dtype=None, layout=None, device=None, pin_memory=None)\n+ rv0 = rv\n+ for i in range(len):\n+ if torch.lt(i, 10):\n+ rv1 = torch.sub(rv0, 1., 1)\n+ else:\n+ rv1 = torch.add(rv0, 1., 1)\n+ rv0 = rv1\n+ return rv0\n+```\n+\n+This is TorchScript's compilation of the code for the `forward` method.\n+You can use this to ensure TorchScript (tracing or scripting) has captured\n+your model code correctly.\n+\n+(interpreting-graphs)=\n+\n+### Interpreting Graphs\n+\n+TorchScript also has a representation at a lower level than the code pretty-printer, in the form of IR graphs.\n+\n+TorchScript uses a static single assignment (SSA) intermediate representation\n+(IR) to represent computation. The instructions in this format consist of\n+ATen (the C++ backend of PyTorch) operators and other primitive operators,\n+including control flow operators for loops and conditionals. As an example:\n+\n+```{testcode}\n+@torch.jit.script\n+def foo(len):\n+ # type: (int) -> torch.Tensor\n+ rv = torch.zeros(3, 4)\n+ for i in range(len):\n+ if i < 10:\n+ rv = rv - 1.0\n+ else:\n+ rv = rv + 1.0\n+ return rv\n+\n+print(foo.graph)\n+```\n+\n+```{testoutput}\n+:hide:\n+\n+...\n+```\n+\n+`graph` follows the same rules described in the {ref}`inspecting-code` section\n+with regard to `forward` method lookup.\n+\n+The example script above produces the graph:\n+\n+```\n+graph(%len.1 : int):\n+ %24 : int = prim::Constant[value=1]()\n+ %17 : bool = prim::Constant[value=1]() # test.py:10:5\n+ %12 : bool? = prim::Constant()\n+ %10 : Device? = prim::Constant()\n+ %6 : int? = prim::Constant()\n+ %1 : int = prim::Constant[value=3]() # test.py:9:22\n+ %2 : int = prim::Constant[value=4]() # test.py:9:25\n+ %20 : int = prim::Constant[value=10]() # test.py:11:16\n+ %23 : float = prim::Constant[value=1]() # test.py:12:23\n+ %4 : int[] = prim::ListConstruct(%1, %2)\n+ %rv.1 : Tensor = aten::zeros(%4, %6, %6, %10, %12) # test.py:9:10\n+ %rv : Tensor = prim::Loop(%len.1, %17, %rv.1) # test.py:10:5\n+ block0(%i.1 : int, %rv.14 : Tensor):\n+ %21 : bool = aten::lt(%i.1, %20) # test.py:11:12\n+ %rv.13 : Tensor = prim::If(%21) # test.py:11:9\n+ block0():\n+ %rv.3 : Tensor = aten::sub(%rv.14, %23, %24) # test.py:12:18\n+ -> (%rv.3)\n+ block1():\n+ %rv.6 : Tensor = aten::add(%rv.14, %23, %24) # test.py:14:18\n+ -> (%rv.6)\n+ -> (%17, %rv.13)\n+ return (%rv)\n+```\n+\n+Take the instruction `%rv.1 : Tensor = aten::zeros(%4, %6, %6, %10, %12) # test.py:9:10` for\n+example.\n+\n+- `%rv.1 : Tensor` means we assign the output to a (unique) value named `rv.1`, that value is of `Tensor` type and that we do not know its concrete shape.\n+- `aten::zeros` is the operator (equivalent to `torch.zeros`) and the input list `(%4, %6, %6, %10, %12)` specifies which values in scope should be passed as inputs. The schema for built-in functions like `aten::zeros` can be found at Builtin Functions.", + "comment_created_at": "2025-06-18T14:31:31+00:00", + "comment_author": "svekars", + "comment_body": "```suggestion\r\n- `aten::zeros` is the operator (equivalent to `torch.zeros`) and the input list `(%4, %6, %6, %10, %12)` specifies which values in scope should be passed as inputs. The schema for built-in functions like `aten::zeros` can be found at {ref}``.\r\n```", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/pytorch-complete-documented-references.md b/_reviewers/pytorch-complete-documented-references.md index f449b16..eff799d 100644 --- a/_reviewers/pytorch-complete-documented-references.md +++ b/_reviewers/pytorch-complete-documented-references.md @@ -35,51 +35,3 @@ The schema for built-in functions like `aten::zeros` can be found at {ref}`built ``` This helps users navigate through PyTorch's extensive documentation and find relevant information quickly, which is essential for effectively using the framework's complex features and tools. - - -[ - { - "discussion_id": "2124660049", - "pr_number": 149697, - "pr_file": "torch/_inductor/analysis/README.md", - "created_at": "2025-06-03T18:49:30+00:00", - "commented_code": "# `torch._inductor.analysis`\nContains scripts for inductor performance analysis.", - "repo_full_name": "pytorch/pytorch", - "discussion_comments": [ - { - "comment_id": "2124660049", - "repo_full_name": "pytorch/pytorch", - "pr_number": 149697, - "pr_file": "torch/_inductor/analysis/README.md", - "discussion_id": "2124660049", - "commented_code": "@@ -0,0 +1,2 @@\n+# `torch._inductor.analysis`\n+Contains scripts for inductor performance analysis.", - "comment_created_at": "2025-06-03T18:49:30+00:00", - "comment_author": "shunting314", - "comment_body": "maybe a following: can we add steps for profile capture, profile augmenting, profile comparison here to make it easier to find a reference", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2154769604", - "pr_number": 156094, - "pr_file": "docs/source/jit.md", - "created_at": "2025-06-18T14:31:31+00:00", - "commented_code": "# TorchScript\n\n```{toctree}\n:maxdepth: 1\n:caption: Builtin Functions\n:hidden:\n\ntorch.jit.supported_ops \n```\n\n```{toctree}\n:maxdepth: 1\n:caption: Language Reference\n:hidden:\n\njit_language_reference\n```\n\n```{toctree}\n:maxdepth: 1\n\njit_language_reference_v2\n```\n\n```{contents}\n:local:\n:depth: 2\n```\n\n```{eval-rst}\n.. automodule:: torch.jit\n.. currentmodule:: torch.jit\n```\n\nTorchScript is a way to create serializable and optimizable models from PyTorch code.\nAny TorchScript program can be saved from a Python\nprocess and loaded in a process where there is no Python dependency.\n\nWe provide tools to incrementally transition a model from a pure Python program\nto a TorchScript program that can be run independently from Python, such as in a standalone C++ program.\nThis makes it possible to train models in PyTorch using familiar tools in Python and then export\nthe model via TorchScript to a production environment where Python programs may be disadvantageous\nfor performance and multi-threading reasons.\n\nFor a gentle introduction to TorchScript, see the [Introduction to TorchScript](https://pytorch.org/tutorials/beginner/Intro_to_TorchScript_tutorial.html) tutorial.\n\nFor an end-to-end example of converting a PyTorch model to TorchScript and running it in C++, see the\n[Loading a PyTorch Model in C++](https://pytorch.org/tutorials/advanced/cpp_export.html) tutorial.\n\n## Creating TorchScript Code\n\n```{eval-rst}\n.. autosummary::\n :toctree: generated\n :nosignatures:\n\n script\n trace\n script_if_tracing\n trace_module\n fork\n wait\n ScriptModule\n ScriptFunction\n freeze\n optimize_for_inference\n enable_onednn_fusion\n onednn_fusion_enabled\n set_fusion_strategy\n strict_fusion\n save\n load\n ignore\n unused\n interface\n isinstance\n Attribute\n annotate\n```\n\n## Mixing Tracing and Scripting\n\nIn many cases either tracing or scripting is an easier approach for converting a model to TorchScript.\nTracing and scripting can be composed to suit the particular requirements\nof a part of a model.\n\nScripted functions can call traced functions. This is particularly useful when you need\nto use control-flow around a simple feed-forward model. For instance the beam search\nof a sequence to sequence model will typically be written in script but can call an\nencoder module generated using tracing.\n\n```{testsetup}\n# These are hidden from the docs, but these are necessary for `doctest`\n# since the `inspect` module doesn't play nicely with the execution\n# environment for `doctest`\nimport torch\n\noriginal_script = torch.jit.script\ndef script_wrapper(obj, *args, **kwargs):\n obj.__module__ = 'FakeMod'\n return original_script(obj, *args, **kwargs)\n\ntorch.jit.script = script_wrapper\n\noriginal_trace = torch.jit.trace\ndef trace_wrapper(obj, *args, **kwargs):\n obj.__module__ = 'FakeMod'\n return original_trace(obj, *args, **kwargs)\n\ntorch.jit.trace = trace_wrapper\n```\n\nExample (calling a traced function in script):\n\n```{testcode}\nimport torch\n\ndef foo(x, y):\n return 2 * x + y\n\ntraced_foo = torch.jit.trace(foo, (torch.rand(3), torch.rand(3)))\n\n@torch.jit.script\ndef bar(x):\n return traced_foo(x, x)\n```\n\nTraced functions can call script functions. This is useful when a small part of\na model requires some control-flow even though most of the model is just a feed-forward\nnetwork. Control-flow inside of a script function called by a traced function is\npreserved correctly.\n\nExample (calling a script function in a traced function):\n\n```{testcode}\nimport torch\n\n@torch.jit.script\ndef foo(x, y):\n if x.max() > y.max():\n r = x\n else:\n r = y\n return r\n\n\ndef bar(x, y, z):\n return foo(x, y) + z\n\ntraced_bar = torch.jit.trace(bar, (torch.rand(3), torch.rand(3), torch.rand(3)))\n```\n\nThis composition also works for `nn.Module`s as well, where it can be used to generate\na submodule using tracing that can be called from the methods of a script module.\n\nExample (using a traced module):\n\n```{testcode}\nimport torch\nimport torchvision\n:skipif: torchvision is None\n\nclass MyScriptModule(torch.nn.Module):\n def __init__(self):\n super().__init__()\n self.means = torch.nn.Parameter(torch.tensor([103.939, 116.779, 123.68])\n .resize_(1, 3, 1, 1))\n self.resnet = torch.jit.trace(torchvision.models.resnet18(),\n torch.rand(1, 3, 224, 224))\n\n def forward(self, input):\n return self.resnet(input - self.means)\n\nmy_script_module = torch.jit.script(MyScriptModule())\n```\n\n## TorchScript Language\n\nTorchScript is a statically typed subset of Python, so many Python features apply\ndirectly to TorchScript. See the full {ref}`language-reference-v2` for details.\n\n(builtin functions)=\n\n## Built-in Functions and Modules\n\nTorchScript supports the use of most PyTorch functions and many Python built-ins.\nSee {ref}`builtin-functions` for a full reference of supported functions.\n\n### PyTorch Functions and Modules\n\nTorchScript supports a subset of the tensor and neural network\nfunctions that PyTorch provides. Most methods on Tensor as well as functions in\nthe `torch` namespace, all functions in `torch.nn.functional` and\nmost modules from `torch.nn` are supported in TorchScript.\n\nSee {ref}`jit_unsupported` for a list of unsupported PyTorch functions and modules.\n\n### Python Functions and Modules\n\nMany of Python's [built-in functions](https://docs.python.org/3/library/functions.html) are supported in TorchScript.\nThe {any}`math` module is also supported (see {ref}`math-module` for details), but no other Python modules\n(built-in or third party) are supported.\n\n### Python Language Reference Comparison\n\nFor a full listing of supported Python features, see {ref}`python-language-reference`.\n\n## Debugging\n\n(disable TorchScript)=\n\n### Disable JIT for Debugging\n\n```{eval-rst}\n.. envvar:: PYTORCH_JIT\n```\n\nSetting the environment variable `PYTORCH_JIT=0` will disable all script\nand tracing annotations. If there is hard-to-debug error in one of your\nTorchScript models, you can use this flag to force everything to run using native\nPython. Since TorchScript (scripting and tracing) is disabled with this flag,\nyou can use tools like `pdb` to debug the model code. For example:\n\n```python\n@torch.jit.script\ndef scripted_fn(x : torch.Tensor):\n for i in range(12):\n x = x + x\n return x\n\ndef fn(x):\n x = torch.neg(x)\n import pdb; pdb.set_trace()\n return scripted_fn(x)\n\ntraced_fn = torch.jit.trace(fn, (torch.rand(4, 5),))\ntraced_fn(torch.rand(3, 4))\n```\n\nDebugging this script with `pdb` works except for when we invoke the\n{func}`@torch.jit.script ` function. We can globally disable\nJIT, so that we can call the {func}`@torch.jit.script `\nfunction as a normal Python function and not compile it. If the above script\nis called `disable_jit_example.py`, we can invoke it like so:\n\n```bash\n$ PYTORCH_JIT=0 python disable_jit_example.py\n```\n\nand we will be able to step into the {func}`@torch.jit.script `\nfunction as a normal Python function. To disable the\nTorchScript compiler for a specific function, see\n{func}`@torch.jit.ignore `.\n\n(inspecting-code)=\n\n### Inspecting Code\n\nTorchScript provides a code pretty-printer for all {class}`ScriptModule` instances. This\npretty-printer gives an interpretation of the script method's code as valid\nPython syntax. For example:\n\n```{testcode}\n@torch.jit.script\ndef foo(len):\n # type: (int) -> torch.Tensor\n rv = torch.zeros(3, 4)\n for i in range(len):\n if i < 10:\n rv = rv - 1.0\n else:\n rv = rv + 1.0\n return rv\n\nprint(foo.code)\n```\n\n```{testoutput}\n:hide:\n\n...\n```\n\nA {class}`ScriptModule` with a single `forward` method will have an attribute\n`code`, which you can use to inspect the {class}`ScriptModule`'s code.\nIf the {class}`ScriptModule` has more than one method, you will need to access\n`.code` on the method itself and not the module. We can inspect the\ncode of a method named `foo` on a {class}`ScriptModule` by accessing `.foo.code`.\nThe example above produces this output:\n\n```python\ndef foo(len: int) -> Tensor:\n rv = torch.zeros([3, 4], dtype=None, layout=None, device=None, pin_memory=None)\n rv0 = rv\n for i in range(len):\n if torch.lt(i, 10):\n rv1 = torch.sub(rv0, 1., 1)\n else:\n rv1 = torch.add(rv0, 1., 1)\n rv0 = rv1\n return rv0\n```\n\nThis is TorchScript's compilation of the code for the `forward` method.\nYou can use this to ensure TorchScript (tracing or scripting) has captured\nyour model code correctly.\n\n(interpreting-graphs)=\n\n### Interpreting Graphs\n\nTorchScript also has a representation at a lower level than the code pretty-printer, in the form of IR graphs.\n\nTorchScript uses a static single assignment (SSA) intermediate representation\n(IR) to represent computation. The instructions in this format consist of\nATen (the C++ backend of PyTorch) operators and other primitive operators,\nincluding control flow operators for loops and conditionals. As an example:\n\n```{testcode}\n@torch.jit.script\ndef foo(len):\n # type: (int) -> torch.Tensor\n rv = torch.zeros(3, 4)\n for i in range(len):\n if i < 10:\n rv = rv - 1.0\n else:\n rv = rv + 1.0\n return rv\n\nprint(foo.graph)\n```\n\n```{testoutput}\n:hide:\n\n...\n```\n\n`graph` follows the same rules described in the {ref}`inspecting-code` section\nwith regard to `forward` method lookup.\n\nThe example script above produces the graph:\n\n```\ngraph(%len.1 : int):\n %24 : int = prim::Constant[value=1]()\n %17 : bool = prim::Constant[value=1]() # test.py:10:5\n %12 : bool? = prim::Constant()\n %10 : Device? = prim::Constant()\n %6 : int? = prim::Constant()\n %1 : int = prim::Constant[value=3]() # test.py:9:22\n %2 : int = prim::Constant[value=4]() # test.py:9:25\n %20 : int = prim::Constant[value=10]() # test.py:11:16\n %23 : float = prim::Constant[value=1]() # test.py:12:23\n %4 : int[] = prim::ListConstruct(%1, %2)\n %rv.1 : Tensor = aten::zeros(%4, %6, %6, %10, %12) # test.py:9:10\n %rv : Tensor = prim::Loop(%len.1, %17, %rv.1) # test.py:10:5\n block0(%i.1 : int, %rv.14 : Tensor):\n %21 : bool = aten::lt(%i.1, %20) # test.py:11:12\n %rv.13 : Tensor = prim::If(%21) # test.py:11:9\n block0():\n %rv.3 : Tensor = aten::sub(%rv.14, %23, %24) # test.py:12:18\n -> (%rv.3)\n block1():\n %rv.6 : Tensor = aten::add(%rv.14, %23, %24) # test.py:14:18\n -> (%rv.6)\n -> (%17, %rv.13)\n return (%rv)\n```\n\nTake the instruction `%rv.1 : Tensor = aten::zeros(%4, %6, %6, %10, %12) # test.py:9:10` for\nexample.\n\n- `%rv.1 : Tensor` means we assign the output to a (unique) value named `rv.1`, that value is of `Tensor` type and that we do not know its concrete shape.\n- `aten::zeros` is the operator (equivalent to `torch.zeros`) and the input list `(%4, %6, %6, %10, %12)` specifies which values in scope should be passed as inputs. The schema for built-in functions like `aten::zeros` can be found at Builtin Functions.", - "repo_full_name": "pytorch/pytorch", - "discussion_comments": [ - { - "comment_id": "2154769604", - "repo_full_name": "pytorch/pytorch", - "pr_number": 156094, - "pr_file": "docs/source/jit.md", - "discussion_id": "2154769604", - "commented_code": "@@ -0,0 +1,892 @@\n+# TorchScript\n+\n+```{toctree}\n+:maxdepth: 1\n+:caption: Builtin Functions\n+:hidden:\n+\n+torch.jit.supported_ops \n+```\n+\n+```{toctree}\n+:maxdepth: 1\n+:caption: Language Reference\n+:hidden:\n+\n+jit_language_reference\n+```\n+\n+```{toctree}\n+:maxdepth: 1\n+\n+jit_language_reference_v2\n+```\n+\n+```{contents}\n+:local:\n+:depth: 2\n+```\n+\n+```{eval-rst}\n+.. automodule:: torch.jit\n+.. currentmodule:: torch.jit\n+```\n+\n+TorchScript is a way to create serializable and optimizable models from PyTorch code.\n+Any TorchScript program can be saved from a Python\n+process and loaded in a process where there is no Python dependency.\n+\n+We provide tools to incrementally transition a model from a pure Python program\n+to a TorchScript program that can be run independently from Python, such as in a standalone C++ program.\n+This makes it possible to train models in PyTorch using familiar tools in Python and then export\n+the model via TorchScript to a production environment where Python programs may be disadvantageous\n+for performance and multi-threading reasons.\n+\n+For a gentle introduction to TorchScript, see the [Introduction to TorchScript](https://pytorch.org/tutorials/beginner/Intro_to_TorchScript_tutorial.html) tutorial.\n+\n+For an end-to-end example of converting a PyTorch model to TorchScript and running it in C++, see the\n+[Loading a PyTorch Model in C++](https://pytorch.org/tutorials/advanced/cpp_export.html) tutorial.\n+\n+## Creating TorchScript Code\n+\n+```{eval-rst}\n+.. autosummary::\n+ :toctree: generated\n+ :nosignatures:\n+\n+ script\n+ trace\n+ script_if_tracing\n+ trace_module\n+ fork\n+ wait\n+ ScriptModule\n+ ScriptFunction\n+ freeze\n+ optimize_for_inference\n+ enable_onednn_fusion\n+ onednn_fusion_enabled\n+ set_fusion_strategy\n+ strict_fusion\n+ save\n+ load\n+ ignore\n+ unused\n+ interface\n+ isinstance\n+ Attribute\n+ annotate\n+```\n+\n+## Mixing Tracing and Scripting\n+\n+In many cases either tracing or scripting is an easier approach for converting a model to TorchScript.\n+Tracing and scripting can be composed to suit the particular requirements\n+of a part of a model.\n+\n+Scripted functions can call traced functions. This is particularly useful when you need\n+to use control-flow around a simple feed-forward model. For instance the beam search\n+of a sequence to sequence model will typically be written in script but can call an\n+encoder module generated using tracing.\n+\n+```{testsetup}\n+# These are hidden from the docs, but these are necessary for `doctest`\n+# since the `inspect` module doesn't play nicely with the execution\n+# environment for `doctest`\n+import torch\n+\n+original_script = torch.jit.script\n+def script_wrapper(obj, *args, **kwargs):\n+ obj.__module__ = 'FakeMod'\n+ return original_script(obj, *args, **kwargs)\n+\n+torch.jit.script = script_wrapper\n+\n+original_trace = torch.jit.trace\n+def trace_wrapper(obj, *args, **kwargs):\n+ obj.__module__ = 'FakeMod'\n+ return original_trace(obj, *args, **kwargs)\n+\n+torch.jit.trace = trace_wrapper\n+```\n+\n+Example (calling a traced function in script):\n+\n+```{testcode}\n+import torch\n+\n+def foo(x, y):\n+ return 2 * x + y\n+\n+traced_foo = torch.jit.trace(foo, (torch.rand(3), torch.rand(3)))\n+\n+@torch.jit.script\n+def bar(x):\n+ return traced_foo(x, x)\n+```\n+\n+Traced functions can call script functions. This is useful when a small part of\n+a model requires some control-flow even though most of the model is just a feed-forward\n+network. Control-flow inside of a script function called by a traced function is\n+preserved correctly.\n+\n+Example (calling a script function in a traced function):\n+\n+```{testcode}\n+import torch\n+\n+@torch.jit.script\n+def foo(x, y):\n+ if x.max() > y.max():\n+ r = x\n+ else:\n+ r = y\n+ return r\n+\n+\n+def bar(x, y, z):\n+ return foo(x, y) + z\n+\n+traced_bar = torch.jit.trace(bar, (torch.rand(3), torch.rand(3), torch.rand(3)))\n+```\n+\n+This composition also works for `nn.Module`s as well, where it can be used to generate\n+a submodule using tracing that can be called from the methods of a script module.\n+\n+Example (using a traced module):\n+\n+```{testcode}\n+import torch\n+import torchvision\n+:skipif: torchvision is None\n+\n+class MyScriptModule(torch.nn.Module):\n+ def __init__(self):\n+ super().__init__()\n+ self.means = torch.nn.Parameter(torch.tensor([103.939, 116.779, 123.68])\n+ .resize_(1, 3, 1, 1))\n+ self.resnet = torch.jit.trace(torchvision.models.resnet18(),\n+ torch.rand(1, 3, 224, 224))\n+\n+ def forward(self, input):\n+ return self.resnet(input - self.means)\n+\n+my_script_module = torch.jit.script(MyScriptModule())\n+```\n+\n+## TorchScript Language\n+\n+TorchScript is a statically typed subset of Python, so many Python features apply\n+directly to TorchScript. See the full {ref}`language-reference-v2` for details.\n+\n+(builtin functions)=\n+\n+## Built-in Functions and Modules\n+\n+TorchScript supports the use of most PyTorch functions and many Python built-ins.\n+See {ref}`builtin-functions` for a full reference of supported functions.\n+\n+### PyTorch Functions and Modules\n+\n+TorchScript supports a subset of the tensor and neural network\n+functions that PyTorch provides. Most methods on Tensor as well as functions in\n+the `torch` namespace, all functions in `torch.nn.functional` and\n+most modules from `torch.nn` are supported in TorchScript.\n+\n+See {ref}`jit_unsupported` for a list of unsupported PyTorch functions and modules.\n+\n+### Python Functions and Modules\n+\n+Many of Python's [built-in functions](https://docs.python.org/3/library/functions.html) are supported in TorchScript.\n+The {any}`math` module is also supported (see {ref}`math-module` for details), but no other Python modules\n+(built-in or third party) are supported.\n+\n+### Python Language Reference Comparison\n+\n+For a full listing of supported Python features, see {ref}`python-language-reference`.\n+\n+## Debugging\n+\n+(disable TorchScript)=\n+\n+### Disable JIT for Debugging\n+\n+```{eval-rst}\n+.. envvar:: PYTORCH_JIT\n+```\n+\n+Setting the environment variable `PYTORCH_JIT=0` will disable all script\n+and tracing annotations. If there is hard-to-debug error in one of your\n+TorchScript models, you can use this flag to force everything to run using native\n+Python. Since TorchScript (scripting and tracing) is disabled with this flag,\n+you can use tools like `pdb` to debug the model code. For example:\n+\n+```python\n+@torch.jit.script\n+def scripted_fn(x : torch.Tensor):\n+ for i in range(12):\n+ x = x + x\n+ return x\n+\n+def fn(x):\n+ x = torch.neg(x)\n+ import pdb; pdb.set_trace()\n+ return scripted_fn(x)\n+\n+traced_fn = torch.jit.trace(fn, (torch.rand(4, 5),))\n+traced_fn(torch.rand(3, 4))\n+```\n+\n+Debugging this script with `pdb` works except for when we invoke the\n+{func}`@torch.jit.script ` function. We can globally disable\n+JIT, so that we can call the {func}`@torch.jit.script `\n+function as a normal Python function and not compile it. If the above script\n+is called `disable_jit_example.py`, we can invoke it like so:\n+\n+```bash\n+$ PYTORCH_JIT=0 python disable_jit_example.py\n+```\n+\n+and we will be able to step into the {func}`@torch.jit.script `\n+function as a normal Python function. To disable the\n+TorchScript compiler for a specific function, see\n+{func}`@torch.jit.ignore `.\n+\n+(inspecting-code)=\n+\n+### Inspecting Code\n+\n+TorchScript provides a code pretty-printer for all {class}`ScriptModule` instances. This\n+pretty-printer gives an interpretation of the script method's code as valid\n+Python syntax. For example:\n+\n+```{testcode}\n+@torch.jit.script\n+def foo(len):\n+ # type: (int) -> torch.Tensor\n+ rv = torch.zeros(3, 4)\n+ for i in range(len):\n+ if i < 10:\n+ rv = rv - 1.0\n+ else:\n+ rv = rv + 1.0\n+ return rv\n+\n+print(foo.code)\n+```\n+\n+```{testoutput}\n+:hide:\n+\n+...\n+```\n+\n+A {class}`ScriptModule` with a single `forward` method will have an attribute\n+`code`, which you can use to inspect the {class}`ScriptModule`'s code.\n+If the {class}`ScriptModule` has more than one method, you will need to access\n+`.code` on the method itself and not the module. We can inspect the\n+code of a method named `foo` on a {class}`ScriptModule` by accessing `.foo.code`.\n+The example above produces this output:\n+\n+```python\n+def foo(len: int) -> Tensor:\n+ rv = torch.zeros([3, 4], dtype=None, layout=None, device=None, pin_memory=None)\n+ rv0 = rv\n+ for i in range(len):\n+ if torch.lt(i, 10):\n+ rv1 = torch.sub(rv0, 1., 1)\n+ else:\n+ rv1 = torch.add(rv0, 1., 1)\n+ rv0 = rv1\n+ return rv0\n+```\n+\n+This is TorchScript's compilation of the code for the `forward` method.\n+You can use this to ensure TorchScript (tracing or scripting) has captured\n+your model code correctly.\n+\n+(interpreting-graphs)=\n+\n+### Interpreting Graphs\n+\n+TorchScript also has a representation at a lower level than the code pretty-printer, in the form of IR graphs.\n+\n+TorchScript uses a static single assignment (SSA) intermediate representation\n+(IR) to represent computation. The instructions in this format consist of\n+ATen (the C++ backend of PyTorch) operators and other primitive operators,\n+including control flow operators for loops and conditionals. As an example:\n+\n+```{testcode}\n+@torch.jit.script\n+def foo(len):\n+ # type: (int) -> torch.Tensor\n+ rv = torch.zeros(3, 4)\n+ for i in range(len):\n+ if i < 10:\n+ rv = rv - 1.0\n+ else:\n+ rv = rv + 1.0\n+ return rv\n+\n+print(foo.graph)\n+```\n+\n+```{testoutput}\n+:hide:\n+\n+...\n+```\n+\n+`graph` follows the same rules described in the {ref}`inspecting-code` section\n+with regard to `forward` method lookup.\n+\n+The example script above produces the graph:\n+\n+```\n+graph(%len.1 : int):\n+ %24 : int = prim::Constant[value=1]()\n+ %17 : bool = prim::Constant[value=1]() # test.py:10:5\n+ %12 : bool? = prim::Constant()\n+ %10 : Device? = prim::Constant()\n+ %6 : int? = prim::Constant()\n+ %1 : int = prim::Constant[value=3]() # test.py:9:22\n+ %2 : int = prim::Constant[value=4]() # test.py:9:25\n+ %20 : int = prim::Constant[value=10]() # test.py:11:16\n+ %23 : float = prim::Constant[value=1]() # test.py:12:23\n+ %4 : int[] = prim::ListConstruct(%1, %2)\n+ %rv.1 : Tensor = aten::zeros(%4, %6, %6, %10, %12) # test.py:9:10\n+ %rv : Tensor = prim::Loop(%len.1, %17, %rv.1) # test.py:10:5\n+ block0(%i.1 : int, %rv.14 : Tensor):\n+ %21 : bool = aten::lt(%i.1, %20) # test.py:11:12\n+ %rv.13 : Tensor = prim::If(%21) # test.py:11:9\n+ block0():\n+ %rv.3 : Tensor = aten::sub(%rv.14, %23, %24) # test.py:12:18\n+ -> (%rv.3)\n+ block1():\n+ %rv.6 : Tensor = aten::add(%rv.14, %23, %24) # test.py:14:18\n+ -> (%rv.6)\n+ -> (%17, %rv.13)\n+ return (%rv)\n+```\n+\n+Take the instruction `%rv.1 : Tensor = aten::zeros(%4, %6, %6, %10, %12) # test.py:9:10` for\n+example.\n+\n+- `%rv.1 : Tensor` means we assign the output to a (unique) value named `rv.1`, that value is of `Tensor` type and that we do not know its concrete shape.\n+- `aten::zeros` is the operator (equivalent to `torch.zeros`) and the input list `(%4, %6, %6, %10, %12)` specifies which values in scope should be passed as inputs. The schema for built-in functions like `aten::zeros` can be found at Builtin Functions.", - "comment_created_at": "2025-06-18T14:31:31+00:00", - "comment_author": "svekars", - "comment_body": "```suggestion\r\n- `aten::zeros` is the operator (equivalent to `torch.zeros`) and the input list `(%4, %6, %6, %10, %12)` specifies which values in scope should be passed as inputs. The schema for built-in functions like `aten::zeros` can be found at {ref}``.\r\n```", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/pytorch-descriptive-meaningful-names.json b/_reviewers/pytorch-descriptive-meaningful-names.json new file mode 100644 index 0000000..a6f9afc --- /dev/null +++ b/_reviewers/pytorch-descriptive-meaningful-names.json @@ -0,0 +1,148 @@ +[ + { + "discussion_id": "2178202103", + "pr_number": 157290, + "pr_file": "torch/nativert/executor/SerialGraphExecutor.cpp", + "created_at": "2025-07-01T17:45:50+00:00", + "commented_code": "std::vector SerialGraphExecutor::executeWithPrefilledFrame(\n ExecutionFrame& executionFrame) {\n executionFrame.withMemoryPlanner([&]() {\n executionFrame.withManagedMemory([&](const LayoutManager* m) {\n // Execute kernels for all nodes except prim.Input and prim.Output\n for (NodeIndex nodeIdx = 1; nodeIdx < nodeKernels_.size() - 1; ++nodeIdx) {", + "repo_full_name": "pytorch/pytorch", + "discussion_comments": [ + { + "comment_id": "2178202103", + "repo_full_name": "pytorch/pytorch", + "pr_number": 157290, + "pr_file": "torch/nativert/executor/SerialGraphExecutor.cpp", + "discussion_id": "2178202103", + "commented_code": "@@ -14,11 +14,18 @@ std::vector SerialGraphExecutor::execute(\n \n std::vector SerialGraphExecutor::executeWithPrefilledFrame(\n ExecutionFrame& executionFrame) {\n- executionFrame.withMemoryPlanner([&]() {\n+ executionFrame.withManagedMemory([&](const LayoutManager* m) {\n // Execute kernels for all nodes except prim.Input and prim.Output\n for (NodeIndex nodeIdx = 1; nodeIdx < nodeKernels_.size() - 1; ++nodeIdx) {", + "comment_created_at": "2025-07-01T17:45:50+00:00", + "comment_author": "SherlockNoMad", + "comment_body": "cc @zhxchen17 \r\n\r\nWe should follow pytorch's variable naming convention when porting code to OSS. \r\nShould do one pass for cleaning up camel case? \r\n", + "pr_file_module": null + }, + { + "comment_id": "2178737967", + "repo_full_name": "pytorch/pytorch", + "pr_number": 157290, + "pr_file": "torch/nativert/executor/SerialGraphExecutor.cpp", + "discussion_id": "2178202103", + "commented_code": "@@ -14,11 +14,18 @@ std::vector SerialGraphExecutor::execute(\n \n std::vector SerialGraphExecutor::executeWithPrefilledFrame(\n ExecutionFrame& executionFrame) {\n- executionFrame.withMemoryPlanner([&]() {\n+ executionFrame.withManagedMemory([&](const LayoutManager* m) {\n // Execute kernels for all nodes except prim.Input and prim.Output\n for (NodeIndex nodeIdx = 1; nodeIdx < nodeKernels_.size() - 1; ++nodeIdx) {", + "comment_created_at": "2025-07-01T23:51:25+00:00", + "comment_author": "dolpm", + "comment_body": "100%", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2178599052", + "pr_number": 157290, + "pr_file": "torch/nativert/executor/memory/LayoutManager.cpp", + "created_at": "2025-07-01T21:50:28+00:00", + "commented_code": "}\n}\n\n#ifndef NDEBUG\nvoid LayoutManager::assert_no_overlapping_storages(const Node& node, size_t t)", + "repo_full_name": "pytorch/pytorch", + "discussion_comments": [ + { + "comment_id": "2178599052", + "repo_full_name": "pytorch/pytorch", + "pr_number": 157290, + "pr_file": "torch/nativert/executor/memory/LayoutManager.cpp", + "discussion_id": "2178599052", + "commented_code": "@@ -157,6 +160,121 @@ void LayoutManager::populate_tensor_values() {\n }\n }\n \n+#ifndef NDEBUG\n+void LayoutManager::assert_no_overlapping_storages(const Node& node, size_t t)", + "comment_created_at": "2025-07-01T21:50:28+00:00", + "comment_author": "SherlockNoMad", + "comment_body": "man... what's the meaning of size_t t?\r\n\r\nI have to check the call site, it says nodeIdx? \r\n\r\nPlease give meaningful names to variable. \r\n\r\nreadability really matters, especially when this code is now is OSS", + "pr_file_module": null + }, + { + "comment_id": "2178747280", + "repo_full_name": "pytorch/pytorch", + "pr_number": 157290, + "pr_file": "torch/nativert/executor/memory/LayoutManager.cpp", + "discussion_id": "2178599052", + "commented_code": "@@ -157,6 +160,121 @@ void LayoutManager::populate_tensor_values() {\n }\n }\n \n+#ifndef NDEBUG\n+void LayoutManager::assert_no_overlapping_storages(const Node& node, size_t t)", + "comment_created_at": "2025-07-02T00:05:19+00:00", + "comment_author": "dolpm", + "comment_body": "t for time. this is bad naming, i agree.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2024110858", + "pr_number": 150506, + "pr_file": "aten/src/ATen/native/TensorAdvancedIndexing.cpp", + "created_at": "2025-04-02T05:48:38+00:00", + "commented_code": "AdvancedIndex::AdvancedIndex(const Tensor& src, TensorList indices_list) {", + "repo_full_name": "pytorch/pytorch", + "discussion_comments": [ + { + "comment_id": "2024110858", + "repo_full_name": "pytorch/pytorch", + "pr_number": 150506, + "pr_file": "aten/src/ATen/native/TensorAdvancedIndexing.cpp", + "discussion_id": "2024110858", + "commented_code": "@@ -647,14 +647,14 @@ static Tensor reshape_indexer(\n \n AdvancedIndex::AdvancedIndex(const Tensor& src, TensorList indices_list) {", + "comment_created_at": "2025-04-02T05:48:38+00:00", + "comment_author": "malfet", + "comment_body": "Why not change `src` name to `src_` to avoid shadowing?", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2024113352", + "pr_number": 150506, + "pr_file": "aten/src/ATen/native/TensorAdvancedIndexing.cpp", + "created_at": "2025-04-02T05:51:11+00:00", + "commented_code": "IntArrayRef a, IntArrayRef b, int64_t dim) -> bool {", + "repo_full_name": "pytorch/pytorch", + "discussion_comments": [ + { + "comment_id": "2024113352", + "repo_full_name": "pytorch/pytorch", + "pr_number": 150506, + "pr_file": "aten/src/ATen/native/TensorAdvancedIndexing.cpp", + "discussion_id": "2024113352", + "commented_code": "@@ -1224,7 +1231,7 @@ TORCH_IMPL_FUNC(index_add_cpu_out)\n IntArrayRef a, IntArrayRef b, int64_t dim) -> bool {", + "comment_created_at": "2025-04-02T05:51:11+00:00", + "comment_author": "malfet", + "comment_body": "Why not make dim a `size_t`? Also, its name shadows method argument, isn't it?", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2132818445", + "pr_number": 155134, + "pr_file": "c10/cuda/CUDACachingAllocator.h", + "created_at": "2025-06-06T19:42:54+00:00", + "commented_code": "MemPool(\n CUDACachingAllocator::CUDAAllocator* allocator = nullptr,\n bool is_user_created = true,\n bool use_on_oom = false);\n bool use_on_oom = false,\n bool symm_mem = false);\n MemPool(const MemPool&) = delete;\n MemPool(MemPool&&) = default;\n MemPool& operator=(const MemPool&) = delete;\n MemPool& operator=(MemPool&&) = default;\n ~MemPool();\n\n MempoolId_t id();\n bool symm_mem();", + "repo_full_name": "pytorch/pytorch", + "discussion_comments": [ + { + "comment_id": "2132818445", + "repo_full_name": "pytorch/pytorch", + "pr_number": 155134, + "pr_file": "c10/cuda/CUDACachingAllocator.h", + "discussion_id": "2132818445", + "commented_code": "@@ -535,14 +535,16 @@ struct C10_CUDA_API MemPool {\n MemPool(\n CUDACachingAllocator::CUDAAllocator* allocator = nullptr,\n bool is_user_created = true,\n- bool use_on_oom = false);\n+ bool use_on_oom = false,\n+ bool symm_mem = false);\n MemPool(const MemPool&) = delete;\n MemPool(MemPool&&) = default;\n MemPool& operator=(const MemPool&) = delete;\n MemPool& operator=(MemPool&&) = default;\n ~MemPool();\n \n MempoolId_t id();\n+ bool symm_mem();", + "comment_created_at": "2025-06-06T19:42:54+00:00", + "comment_author": "kwen2501", + "comment_body": "nit: `is_symmetric()`?", + "pr_file_module": null + }, + { + "comment_id": "2159858849", + "repo_full_name": "pytorch/pytorch", + "pr_number": 155134, + "pr_file": "c10/cuda/CUDACachingAllocator.h", + "discussion_id": "2132818445", + "commented_code": "@@ -535,14 +535,16 @@ struct C10_CUDA_API MemPool {\n MemPool(\n CUDACachingAllocator::CUDAAllocator* allocator = nullptr,\n bool is_user_created = true,\n- bool use_on_oom = false);\n+ bool use_on_oom = false,\n+ bool symm_mem = false);\n MemPool(const MemPool&) = delete;\n MemPool(MemPool&&) = default;\n MemPool& operator=(const MemPool&) = delete;\n MemPool& operator=(MemPool&&) = default;\n ~MemPool();\n \n MempoolId_t id();\n+ bool symm_mem();", + "comment_created_at": "2025-06-21T04:25:54+00:00", + "comment_author": "syed-ahmed", + "comment_body": "Sounds good!", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/pytorch-descriptive-meaningful-names.md b/_reviewers/pytorch-descriptive-meaningful-names.md index bb17d2e..290a697 100644 --- a/_reviewers/pytorch-descriptive-meaningful-names.md +++ b/_reviewers/pytorch-descriptive-meaningful-names.md @@ -18,153 +18,3 @@ Examples: - ❌ `src` (shadowing) → ✅ `src_` Always follow PyTorch's established naming conventions, avoiding camel case for variables. Properly named identifiers significantly improve code readability and maintainability, especially in open source contexts where many developers interact with the code. - - -[ - { - "discussion_id": "2178202103", - "pr_number": 157290, - "pr_file": "torch/nativert/executor/SerialGraphExecutor.cpp", - "created_at": "2025-07-01T17:45:50+00:00", - "commented_code": "std::vector SerialGraphExecutor::executeWithPrefilledFrame(\n ExecutionFrame& executionFrame) {\n executionFrame.withMemoryPlanner([&]() {\n executionFrame.withManagedMemory([&](const LayoutManager* m) {\n // Execute kernels for all nodes except prim.Input and prim.Output\n for (NodeIndex nodeIdx = 1; nodeIdx < nodeKernels_.size() - 1; ++nodeIdx) {", - "repo_full_name": "pytorch/pytorch", - "discussion_comments": [ - { - "comment_id": "2178202103", - "repo_full_name": "pytorch/pytorch", - "pr_number": 157290, - "pr_file": "torch/nativert/executor/SerialGraphExecutor.cpp", - "discussion_id": "2178202103", - "commented_code": "@@ -14,11 +14,18 @@ std::vector SerialGraphExecutor::execute(\n \n std::vector SerialGraphExecutor::executeWithPrefilledFrame(\n ExecutionFrame& executionFrame) {\n- executionFrame.withMemoryPlanner([&]() {\n+ executionFrame.withManagedMemory([&](const LayoutManager* m) {\n // Execute kernels for all nodes except prim.Input and prim.Output\n for (NodeIndex nodeIdx = 1; nodeIdx < nodeKernels_.size() - 1; ++nodeIdx) {", - "comment_created_at": "2025-07-01T17:45:50+00:00", - "comment_author": "SherlockNoMad", - "comment_body": "cc @zhxchen17 \r\n\r\nWe should follow pytorch's variable naming convention when porting code to OSS. \r\nShould do one pass for cleaning up camel case? \r\n", - "pr_file_module": null - }, - { - "comment_id": "2178737967", - "repo_full_name": "pytorch/pytorch", - "pr_number": 157290, - "pr_file": "torch/nativert/executor/SerialGraphExecutor.cpp", - "discussion_id": "2178202103", - "commented_code": "@@ -14,11 +14,18 @@ std::vector SerialGraphExecutor::execute(\n \n std::vector SerialGraphExecutor::executeWithPrefilledFrame(\n ExecutionFrame& executionFrame) {\n- executionFrame.withMemoryPlanner([&]() {\n+ executionFrame.withManagedMemory([&](const LayoutManager* m) {\n // Execute kernels for all nodes except prim.Input and prim.Output\n for (NodeIndex nodeIdx = 1; nodeIdx < nodeKernels_.size() - 1; ++nodeIdx) {", - "comment_created_at": "2025-07-01T23:51:25+00:00", - "comment_author": "dolpm", - "comment_body": "100%", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2178599052", - "pr_number": 157290, - "pr_file": "torch/nativert/executor/memory/LayoutManager.cpp", - "created_at": "2025-07-01T21:50:28+00:00", - "commented_code": "}\n}\n\n#ifndef NDEBUG\nvoid LayoutManager::assert_no_overlapping_storages(const Node& node, size_t t)", - "repo_full_name": "pytorch/pytorch", - "discussion_comments": [ - { - "comment_id": "2178599052", - "repo_full_name": "pytorch/pytorch", - "pr_number": 157290, - "pr_file": "torch/nativert/executor/memory/LayoutManager.cpp", - "discussion_id": "2178599052", - "commented_code": "@@ -157,6 +160,121 @@ void LayoutManager::populate_tensor_values() {\n }\n }\n \n+#ifndef NDEBUG\n+void LayoutManager::assert_no_overlapping_storages(const Node& node, size_t t)", - "comment_created_at": "2025-07-01T21:50:28+00:00", - "comment_author": "SherlockNoMad", - "comment_body": "man... what's the meaning of size_t t?\r\n\r\nI have to check the call site, it says nodeIdx? \r\n\r\nPlease give meaningful names to variable. \r\n\r\nreadability really matters, especially when this code is now is OSS", - "pr_file_module": null - }, - { - "comment_id": "2178747280", - "repo_full_name": "pytorch/pytorch", - "pr_number": 157290, - "pr_file": "torch/nativert/executor/memory/LayoutManager.cpp", - "discussion_id": "2178599052", - "commented_code": "@@ -157,6 +160,121 @@ void LayoutManager::populate_tensor_values() {\n }\n }\n \n+#ifndef NDEBUG\n+void LayoutManager::assert_no_overlapping_storages(const Node& node, size_t t)", - "comment_created_at": "2025-07-02T00:05:19+00:00", - "comment_author": "dolpm", - "comment_body": "t for time. this is bad naming, i agree.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2024110858", - "pr_number": 150506, - "pr_file": "aten/src/ATen/native/TensorAdvancedIndexing.cpp", - "created_at": "2025-04-02T05:48:38+00:00", - "commented_code": "AdvancedIndex::AdvancedIndex(const Tensor& src, TensorList indices_list) {", - "repo_full_name": "pytorch/pytorch", - "discussion_comments": [ - { - "comment_id": "2024110858", - "repo_full_name": "pytorch/pytorch", - "pr_number": 150506, - "pr_file": "aten/src/ATen/native/TensorAdvancedIndexing.cpp", - "discussion_id": "2024110858", - "commented_code": "@@ -647,14 +647,14 @@ static Tensor reshape_indexer(\n \n AdvancedIndex::AdvancedIndex(const Tensor& src, TensorList indices_list) {", - "comment_created_at": "2025-04-02T05:48:38+00:00", - "comment_author": "malfet", - "comment_body": "Why not change `src` name to `src_` to avoid shadowing?", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2024113352", - "pr_number": 150506, - "pr_file": "aten/src/ATen/native/TensorAdvancedIndexing.cpp", - "created_at": "2025-04-02T05:51:11+00:00", - "commented_code": "IntArrayRef a, IntArrayRef b, int64_t dim) -> bool {", - "repo_full_name": "pytorch/pytorch", - "discussion_comments": [ - { - "comment_id": "2024113352", - "repo_full_name": "pytorch/pytorch", - "pr_number": 150506, - "pr_file": "aten/src/ATen/native/TensorAdvancedIndexing.cpp", - "discussion_id": "2024113352", - "commented_code": "@@ -1224,7 +1231,7 @@ TORCH_IMPL_FUNC(index_add_cpu_out)\n IntArrayRef a, IntArrayRef b, int64_t dim) -> bool {", - "comment_created_at": "2025-04-02T05:51:11+00:00", - "comment_author": "malfet", - "comment_body": "Why not make dim a `size_t`? Also, its name shadows method argument, isn't it?", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2132818445", - "pr_number": 155134, - "pr_file": "c10/cuda/CUDACachingAllocator.h", - "created_at": "2025-06-06T19:42:54+00:00", - "commented_code": "MemPool(\n CUDACachingAllocator::CUDAAllocator* allocator = nullptr,\n bool is_user_created = true,\n bool use_on_oom = false);\n bool use_on_oom = false,\n bool symm_mem = false);\n MemPool(const MemPool&) = delete;\n MemPool(MemPool&&) = default;\n MemPool& operator=(const MemPool&) = delete;\n MemPool& operator=(MemPool&&) = default;\n ~MemPool();\n\n MempoolId_t id();\n bool symm_mem();", - "repo_full_name": "pytorch/pytorch", - "discussion_comments": [ - { - "comment_id": "2132818445", - "repo_full_name": "pytorch/pytorch", - "pr_number": 155134, - "pr_file": "c10/cuda/CUDACachingAllocator.h", - "discussion_id": "2132818445", - "commented_code": "@@ -535,14 +535,16 @@ struct C10_CUDA_API MemPool {\n MemPool(\n CUDACachingAllocator::CUDAAllocator* allocator = nullptr,\n bool is_user_created = true,\n- bool use_on_oom = false);\n+ bool use_on_oom = false,\n+ bool symm_mem = false);\n MemPool(const MemPool&) = delete;\n MemPool(MemPool&&) = default;\n MemPool& operator=(const MemPool&) = delete;\n MemPool& operator=(MemPool&&) = default;\n ~MemPool();\n \n MempoolId_t id();\n+ bool symm_mem();", - "comment_created_at": "2025-06-06T19:42:54+00:00", - "comment_author": "kwen2501", - "comment_body": "nit: `is_symmetric()`?", - "pr_file_module": null - }, - { - "comment_id": "2159858849", - "repo_full_name": "pytorch/pytorch", - "pr_number": 155134, - "pr_file": "c10/cuda/CUDACachingAllocator.h", - "discussion_id": "2132818445", - "commented_code": "@@ -535,14 +535,16 @@ struct C10_CUDA_API MemPool {\n MemPool(\n CUDACachingAllocator::CUDAAllocator* allocator = nullptr,\n bool is_user_created = true,\n- bool use_on_oom = false);\n+ bool use_on_oom = false,\n+ bool symm_mem = false);\n MemPool(const MemPool&) = delete;\n MemPool(MemPool&&) = default;\n MemPool& operator=(const MemPool&) = delete;\n MemPool& operator=(MemPool&&) = default;\n ~MemPool();\n \n MempoolId_t id();\n+ bool symm_mem();", - "comment_created_at": "2025-06-21T04:25:54+00:00", - "comment_author": "syed-ahmed", - "comment_body": "Sounds good!", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/pytorch-document-for-future.json b/_reviewers/pytorch-document-for-future.json new file mode 100644 index 0000000..996348e --- /dev/null +++ b/_reviewers/pytorch-document-for-future.json @@ -0,0 +1,196 @@ +[ + { + "discussion_id": "2172807638", + "pr_number": 157130, + "pr_file": "torch/onnx/_internal/exporter/_torchlib/ops/nn.py", + "created_at": "2025-06-27T20:11:29+00:00", + "commented_code": "\"SDPA (MHA) requires q_num_heads = kv_num_heads\"\n )\n\n # NOTE: There was extended discussion on whether the num_heads attributes (q_num_heads/kv_num_heads)\n # should be set as ONNX attributes or inferred from the tensor shape. In ONNX, num_heads is needed\n # for 3D attention inputs (shape: [B, S, N*H]), but not for 4D ([B, N, S, H]), which is the only\n # input accepted by this exporter. Thus, the attribute is not strictly necessary here, but adding it\n # may ease future optimization or conversion to 3D formats (e.g., GQA ops)\n # NOTE: num_heads attributes (q_num_heads/kv_num_heads) are not mandatory for 4D.", + "repo_full_name": "pytorch/pytorch", + "discussion_comments": [ + { + "comment_id": "2172807638", + "repo_full_name": "pytorch/pytorch", + "pr_number": 157130, + "pr_file": "torch/onnx/_internal/exporter/_torchlib/ops/nn.py", + "discussion_id": "2172807638", + "commented_code": "@@ -119,19 +119,19 @@ def aten_scaled_dot_product_attention_23(\n \"SDPA (MHA) requires q_num_heads = kv_num_heads\"\n )\n \n- # NOTE: There was extended discussion on whether the num_heads attributes (q_num_heads/kv_num_heads)\n- # should be set as ONNX attributes or inferred from the tensor shape. In ONNX, num_heads is needed\n- # for 3D attention inputs (shape: [B, S, N*H]), but not for 4D ([B, N, S, H]), which is the only\n- # input accepted by this exporter. Thus, the attribute is not strictly necessary here, but adding it\n- # may ease future optimization or conversion to 3D formats (e.g., GQA ops)\n+ # NOTE: num_heads attributes (q_num_heads/kv_num_heads) are not mandatory for 4D.", + "comment_created_at": "2025-06-27T20:11:29+00:00", + "comment_author": "gramalingam", + "comment_body": "nit: may be clearer if we change \"are not mandatory for 4D\" to \"should NOT be specified for 4D (even though the ONNX op documentation does not make this clear)\"", + "pr_file_module": null + }, + { + "comment_id": "2175067772", + "repo_full_name": "pytorch/pytorch", + "pr_number": 157130, + "pr_file": "torch/onnx/_internal/exporter/_torchlib/ops/nn.py", + "discussion_id": "2172807638", + "commented_code": "@@ -119,19 +119,19 @@ def aten_scaled_dot_product_attention_23(\n \"SDPA (MHA) requires q_num_heads = kv_num_heads\"\n )\n \n- # NOTE: There was extended discussion on whether the num_heads attributes (q_num_heads/kv_num_heads)\n- # should be set as ONNX attributes or inferred from the tensor shape. In ONNX, num_heads is needed\n- # for 3D attention inputs (shape: [B, S, N*H]), but not for 4D ([B, N, S, H]), which is the only\n- # input accepted by this exporter. Thus, the attribute is not strictly necessary here, but adding it\n- # may ease future optimization or conversion to 3D formats (e.g., GQA ops)\n+ # NOTE: num_heads attributes (q_num_heads/kv_num_heads) are not mandatory for 4D.", + "comment_created_at": "2025-06-30T13:21:34+00:00", + "comment_author": "xadupre", + "comment_body": "changed, I removed the part in (...) in case the documentation of onnx is changed.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2175940579", + "pr_number": 156705, + "pr_file": "torch/distributed/checkpoint/hf_storage.py", + "created_at": "2025-06-30T21:01:09+00:00", + "commented_code": "return super()._write_data(planner, file_queue)\n\n def finish(self, metadata: Metadata, results: list[list[WriteResult]]) -> None:\n if self._save_sharded:\n if self.save_distributed and not self.consolidated_output_path:\n logger.info(\"Not consolidating sharded checkpoint in finish step.\")", + "repo_full_name": "pytorch/pytorch", + "discussion_comments": [ + { + "comment_id": "2175940579", + "repo_full_name": "pytorch/pytorch", + "pr_number": 156705, + "pr_file": "torch/distributed/checkpoint/hf_storage.py", + "discussion_id": "2175940579", + "commented_code": "@@ -136,8 +154,16 @@ def write_data(\n return super()._write_data(planner, file_queue)\n \n def finish(self, metadata: Metadata, results: list[list[WriteResult]]) -> None:\n- if self._save_sharded:\n+ if self.save_distributed and not self.consolidated_output_path:\n+ logger.info(\"Not consolidating sharded checkpoint in finish step.\")", + "comment_created_at": "2025-06-30T21:01:09+00:00", + "comment_author": "saumishr", + "comment_body": "It would be worth adding a documentation here explaining different scenarios and why the metadata write is also being skipped. ", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2153270155", + "pr_number": 156207, + "pr_file": "torch/distributed/checkpoint/state_dict_saver.py", + "created_at": "2025-06-17T22:25:11+00:00", + "commented_code": "planner=planner,\n )\n\ndef default_stage(state_dict: STATE_DICT_TYPE, cpu_state_dict: STATE_DICT_TYPE) -> STATE_DICT_TYPE:\n \"\"\"Default stage function. This function will stage the state_dict on to the\n staging storage (defaults to CPU memory).\n\n Args:\n state_dict STATE_DICT_TYPE: The state_dict to stage.\n\n Returns:\n STATE_DICT_TYPE: staged state_dict\n \"\"\"\n staging_stream = torch.cuda.Stream()\n with staging_stream:\n staged_state_dict = _copy_state_dict(state_dict, cpu_state_dict, True, type_check=False)\n staging_stream.synchronize()\n return staged_state_dict\n\n@dataclass\nclass AsyncSaveResponse:", + "repo_full_name": "pytorch/pytorch", + "discussion_comments": [ + { + "comment_id": "2153270155", + "repo_full_name": "pytorch/pytorch", + "pr_number": 156207, + "pr_file": "torch/distributed/checkpoint/state_dict_saver.py", + "discussion_id": "2153270155", + "commented_code": "@@ -181,6 +189,36 @@ def save(\n planner=planner,\n )\n \n+def default_stage(state_dict: STATE_DICT_TYPE, cpu_state_dict: STATE_DICT_TYPE) -> STATE_DICT_TYPE:\n+ \"\"\"Default stage function. This function will stage the state_dict on to the\n+ staging storage (defaults to CPU memory).\n+\n+ Args:\n+ state_dict STATE_DICT_TYPE: The state_dict to stage.\n+\n+ Returns:\n+ STATE_DICT_TYPE: staged state_dict\n+ \"\"\"\n+ staging_stream = torch.cuda.Stream()\n+ with staging_stream:\n+ staged_state_dict = _copy_state_dict(state_dict, cpu_state_dict, True, type_check=False)\n+ staging_stream.synchronize()\n+ return staged_state_dict\n+\n+@dataclass\n+class AsyncSaveResponse:", + "comment_created_at": "2025-06-17T22:25:11+00:00", + "comment_author": "fegin", + "comment_body": "Need a good docstring since this is a public dataclass.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2094381930", + "pr_number": 153198, + "pr_file": "torch/_refs/__init__.py", + "created_at": "2025-05-18T05:51:33+00:00", + "commented_code": "return permuted_result.reshape(target_shape)\n\n\ndef _reshape_view_helper(a: TensorLikeType, *shape, allow_copy: bool) -> TensorLikeType:\n from torch._dynamo.exc import UserError, UserErrorType\n# this function is python match of computeStride_impl in TensorUtils.cpp\ndef _compute_stride(old_shape, old_stride, new_shape):\n from torch.fx.experimental.symbolic_shapes import (\n guard_or_false,\n guard_or_true,\n GuardOnDataDependentSymNode,\n sym_eq,\n )\n\n if len(old_shape) == 0:\n return [1] * len(new_shape)\n\n numel = reduce(operator.mul, old_shape, 1)\n zero_numel = guard_or_false(numel == 0)\n if zero_numel and guard_or_false(sym_eq(old_shape, new_shape)):\n return old_stride\n\n new_stride = [0] * len(new_shape)\n\n if zero_numel:\n for view_d in range(len(new_shape) - 1, -1, -1):\n if view_d == len(new_shape) - 1:\n new_stride[view_d] = 1\n else:\n new_stride[view_d] = (\n max(new_shape[view_d + 1], 1) * new_stride[view_d + 1]\n )\n return new_stride\n\n view_d = len(new_shape) - 1\n chunk_base_stride = old_stride[-1]\n tensor_numel = 1\n view_numel = 1\n\n for tensor_d in range(len(old_shape) - 1, -1, -1):\n tensor_numel *= old_shape[tensor_d]\n\n if tensor_d == 0 or (\n guard_or_true(old_shape[tensor_d - 1] != 1)\n and guard_or_true(\n old_stride[tensor_d - 1] != tensor_numel * chunk_base_stride\n )\n ):\n while view_d >= 0 and (\n guard_or_true(view_numel < tensor_numel)\n or guard_or_false(new_shape[view_d] == 1)\n ):\n new_stride[view_d] = view_numel * chunk_base_stride\n view_numel *= new_shape[view_d]\n view_d -= 1\n\n if guard_or_true(view_numel != tensor_numel):\n return None\n\n if tensor_d > 0:\n chunk_base_stride = old_stride[tensor_d - 1]\n tensor_numel = 1\n view_numel = 1\n if view_d != -1:\n return None\n return new_stride\n\n\n# if a is contiguous, we can always reshape with as_strided and contiguous_strides.\n# if a is not contiguous and _compute_stride succeed we also use as_strided without clone.", + "repo_full_name": "pytorch/pytorch", + "discussion_comments": [ + { + "comment_id": "2094381930", + "repo_full_name": "pytorch/pytorch", + "pr_number": 153198, + "pr_file": "torch/_refs/__init__.py", + "discussion_id": "2094381930", + "commented_code": "@@ -3729,62 +3729,111 @@ def repeat(a: Tensor, *repeat_shape) -> Tensor:\n return permuted_result.reshape(target_shape)\n \n \n-def _reshape_view_helper(a: TensorLikeType, *shape, allow_copy: bool) -> TensorLikeType:\n- from torch._dynamo.exc import UserError, UserErrorType\n+# this function is python match of computeStride_impl in TensorUtils.cpp\n+def _compute_stride(old_shape, old_stride, new_shape):\n from torch.fx.experimental.symbolic_shapes import (\n guard_or_false,\n guard_or_true,\n- GuardOnDataDependentSymNode,\n+ sym_eq,\n )\n \n+ if len(old_shape) == 0:\n+ return [1] * len(new_shape)\n+\n+ numel = reduce(operator.mul, old_shape, 1)\n+ zero_numel = guard_or_false(numel == 0)\n+ if zero_numel and guard_or_false(sym_eq(old_shape, new_shape)):\n+ return old_stride\n+\n+ new_stride = [0] * len(new_shape)\n+\n+ if zero_numel:\n+ for view_d in range(len(new_shape) - 1, -1, -1):\n+ if view_d == len(new_shape) - 1:\n+ new_stride[view_d] = 1\n+ else:\n+ new_stride[view_d] = (\n+ max(new_shape[view_d + 1], 1) * new_stride[view_d + 1]\n+ )\n+ return new_stride\n+\n+ view_d = len(new_shape) - 1\n+ chunk_base_stride = old_stride[-1]\n+ tensor_numel = 1\n+ view_numel = 1\n+\n+ for tensor_d in range(len(old_shape) - 1, -1, -1):\n+ tensor_numel *= old_shape[tensor_d]\n+\n+ if tensor_d == 0 or (\n+ guard_or_true(old_shape[tensor_d - 1] != 1)\n+ and guard_or_true(\n+ old_stride[tensor_d - 1] != tensor_numel * chunk_base_stride\n+ )\n+ ):\n+ while view_d >= 0 and (\n+ guard_or_true(view_numel < tensor_numel)\n+ or guard_or_false(new_shape[view_d] == 1)\n+ ):\n+ new_stride[view_d] = view_numel * chunk_base_stride\n+ view_numel *= new_shape[view_d]\n+ view_d -= 1\n+\n+ if guard_or_true(view_numel != tensor_numel):\n+ return None\n+\n+ if tensor_d > 0:\n+ chunk_base_stride = old_stride[tensor_d - 1]\n+ tensor_numel = 1\n+ view_numel = 1\n+ if view_d != -1:\n+ return None\n+ return new_stride\n+\n+\n+# if a is contiguous, we can always reshape with as_strided and contiguous_strides.\n+# if a is not contiguous and _compute_stride succeed we also use as_strided without clone.", + "comment_created_at": "2025-05-18T05:51:33+00:00", + "comment_author": "bobrenjc93", + "comment_body": "Can you please elaborate on these cases? For these correctness comments, you don't need to be frugal with your words. Most of the comments sound like notes for yourself, but for these comments you should really imagine what another core pytorch dev reading this comment 6 months from now will think.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2098995525", + "pr_number": 153991, + "pr_file": "torch/vulkan/__init__.py", + "created_at": "2025-05-20T22:58:06+00:00", + "commented_code": "# mypy: allow-untyped-defs\nr\"\"\"\nThis package enables an interface for accessing the Vulkan backend in Python.\n\"\"\"\nfrom typing import Union\n\nimport torch\nfrom torch import Tensor\n\ndef device_count() -> int:\n r\"\"\"Returns the number of available Vulkan devices.\"\"\"\n # TODO: actually get the number!\n return int(torch.is_vulkan_available())\n\n\ndef compile_shader(name: str, source: str):\n r\"\"\"Compiles compute shader from source and allows one to invoke kernels\n defined there from the comfort of Python runtime\n Example::\n\n >>> # xdoctest: +REQUIRES(env:TORCH_DOCTEST_MPS)\n >>> lib = torch.mps.compile_shader(\n ... \"kernel void full(device float* out, constant float& val, uint idx [[thread_position_in_grid]]) { out[idx] = val; }\"\n ... )\n >>> x = torch.zeros(16, device=\"mps\")\n >>> lib.full(x, 3.14)\n \"\"\"\n from pathlib import Path\n\n from torch.utils._cpp_embed_headers import _embed_headers\n\n if not hasattr(torch._C, \"_vulkan_compileShader\"):\n raise RuntimeError(\"Vulkan is not available\")\n source = _embed_headers(\n [l + \"\\n\" for l in source.split(\"\\n\")],\n [Path(__file__).parent.parent / \"include\"],\n set(),\n )\n return torch._C._vulkan_compileShader(name, source)\n\n\ndef is_available() -> bool:\n return device_count() > 0\n\n\n__all__ = [", + "repo_full_name": "pytorch/pytorch", + "discussion_comments": [ + { + "comment_id": "2098995525", + "repo_full_name": "pytorch/pytorch", + "pr_number": 153991, + "pr_file": "torch/vulkan/__init__.py", + "discussion_id": "2098995525", + "commented_code": "@@ -0,0 +1,51 @@\n+# mypy: allow-untyped-defs\n+r\"\"\"\n+This package enables an interface for accessing the Vulkan backend in Python.\n+\"\"\"\n+from typing import Union\n+\n+import torch\n+from torch import Tensor\n+\n+def device_count() -> int:\n+ r\"\"\"Returns the number of available Vulkan devices.\"\"\"\n+ # TODO: actually get the number!\n+ return int(torch.is_vulkan_available())\n+\n+\n+def compile_shader(name: str, source: str):\n+ r\"\"\"Compiles compute shader from source and allows one to invoke kernels\n+ defined there from the comfort of Python runtime\n+ Example::\n+\n+ >>> # xdoctest: +REQUIRES(env:TORCH_DOCTEST_MPS)\n+ >>> lib = torch.mps.compile_shader(\n+ ... \"kernel void full(device float* out, constant float& val, uint idx [[thread_position_in_grid]]) { out[idx] = val; }\"\n+ ... )\n+ >>> x = torch.zeros(16, device=\"mps\")\n+ >>> lib.full(x, 3.14)\n+ \"\"\"\n+ from pathlib import Path\n+\n+ from torch.utils._cpp_embed_headers import _embed_headers\n+\n+ if not hasattr(torch._C, \"_vulkan_compileShader\"):\n+ raise RuntimeError(\"Vulkan is not available\")\n+ source = _embed_headers(\n+ [l + \"\\n\" for l in source.split(\"\\n\")],\n+ [Path(__file__).parent.parent / \"include\"],\n+ set(),\n+ )\n+ return torch._C._vulkan_compileShader(name, source)\n+\n+\n+def is_available() -> bool:\n+ return device_count() > 0\n+\n+\n+__all__ = [", + "comment_created_at": "2025-05-20T22:58:06+00:00", + "comment_author": "albanD", + "comment_body": "We're really making these public APIs that we will maintain for the foreseeable future?\r\nIf so they need doc and tests!", + "pr_file_module": null + }, + { + "comment_id": "2099003823", + "repo_full_name": "pytorch/pytorch", + "pr_number": 153991, + "pr_file": "torch/vulkan/__init__.py", + "discussion_id": "2098995525", + "commented_code": "@@ -0,0 +1,51 @@\n+# mypy: allow-untyped-defs\n+r\"\"\"\n+This package enables an interface for accessing the Vulkan backend in Python.\n+\"\"\"\n+from typing import Union\n+\n+import torch\n+from torch import Tensor\n+\n+def device_count() -> int:\n+ r\"\"\"Returns the number of available Vulkan devices.\"\"\"\n+ # TODO: actually get the number!\n+ return int(torch.is_vulkan_available())\n+\n+\n+def compile_shader(name: str, source: str):\n+ r\"\"\"Compiles compute shader from source and allows one to invoke kernels\n+ defined there from the comfort of Python runtime\n+ Example::\n+\n+ >>> # xdoctest: +REQUIRES(env:TORCH_DOCTEST_MPS)\n+ >>> lib = torch.mps.compile_shader(\n+ ... \"kernel void full(device float* out, constant float& val, uint idx [[thread_position_in_grid]]) { out[idx] = val; }\"\n+ ... )\n+ >>> x = torch.zeros(16, device=\"mps\")\n+ >>> lib.full(x, 3.14)\n+ \"\"\"\n+ from pathlib import Path\n+\n+ from torch.utils._cpp_embed_headers import _embed_headers\n+\n+ if not hasattr(torch._C, \"_vulkan_compileShader\"):\n+ raise RuntimeError(\"Vulkan is not available\")\n+ source = _embed_headers(\n+ [l + \"\\n\" for l in source.split(\"\\n\")],\n+ [Path(__file__).parent.parent / \"include\"],\n+ set(),\n+ )\n+ return torch._C._vulkan_compileShader(name, source)\n+\n+\n+def is_available() -> bool:\n+ return device_count() > 0\n+\n+\n+__all__ = [", + "comment_created_at": "2025-05-20T23:07:58+00:00", + "comment_author": "albanD", + "comment_body": "Actually is_available, device_count and synchronize should be ok if they do have a semantic that match other backends.\r\nThe shader compiler is most likely more private.", + "pr_file_module": null + }, + { + "comment_id": "2099011455", + "repo_full_name": "pytorch/pytorch", + "pr_number": 153991, + "pr_file": "torch/vulkan/__init__.py", + "discussion_id": "2098995525", + "commented_code": "@@ -0,0 +1,51 @@\n+# mypy: allow-untyped-defs\n+r\"\"\"\n+This package enables an interface for accessing the Vulkan backend in Python.\n+\"\"\"\n+from typing import Union\n+\n+import torch\n+from torch import Tensor\n+\n+def device_count() -> int:\n+ r\"\"\"Returns the number of available Vulkan devices.\"\"\"\n+ # TODO: actually get the number!\n+ return int(torch.is_vulkan_available())\n+\n+\n+def compile_shader(name: str, source: str):\n+ r\"\"\"Compiles compute shader from source and allows one to invoke kernels\n+ defined there from the comfort of Python runtime\n+ Example::\n+\n+ >>> # xdoctest: +REQUIRES(env:TORCH_DOCTEST_MPS)\n+ >>> lib = torch.mps.compile_shader(\n+ ... \"kernel void full(device float* out, constant float& val, uint idx [[thread_position_in_grid]]) { out[idx] = val; }\"\n+ ... )\n+ >>> x = torch.zeros(16, device=\"mps\")\n+ >>> lib.full(x, 3.14)\n+ \"\"\"\n+ from pathlib import Path\n+\n+ from torch.utils._cpp_embed_headers import _embed_headers\n+\n+ if not hasattr(torch._C, \"_vulkan_compileShader\"):\n+ raise RuntimeError(\"Vulkan is not available\")\n+ source = _embed_headers(\n+ [l + \"\\n\" for l in source.split(\"\\n\")],\n+ [Path(__file__).parent.parent / \"include\"],\n+ set(),\n+ )\n+ return torch._C._vulkan_compileShader(name, source)\n+\n+\n+def is_available() -> bool:\n+ return device_count() > 0\n+\n+\n+__all__ = [", + "comment_created_at": "2025-05-20T23:14:54+00:00", + "comment_author": "swolchok", + "comment_body": "> The shader compiler is most likely more private.\r\n\r\nThis is way smaller than `torch/mps/__init__.py`! torch.mps.compile_shader is public https://github.com/pytorch/pytorch/blob/main/torch/mps/__init__.py#L143\r\n", + "pr_file_module": null + }, + { + "comment_id": "2099016867", + "repo_full_name": "pytorch/pytorch", + "pr_number": 153991, + "pr_file": "torch/vulkan/__init__.py", + "discussion_id": "2098995525", + "commented_code": "@@ -0,0 +1,51 @@\n+# mypy: allow-untyped-defs\n+r\"\"\"\n+This package enables an interface for accessing the Vulkan backend in Python.\n+\"\"\"\n+from typing import Union\n+\n+import torch\n+from torch import Tensor\n+\n+def device_count() -> int:\n+ r\"\"\"Returns the number of available Vulkan devices.\"\"\"\n+ # TODO: actually get the number!\n+ return int(torch.is_vulkan_available())\n+\n+\n+def compile_shader(name: str, source: str):\n+ r\"\"\"Compiles compute shader from source and allows one to invoke kernels\n+ defined there from the comfort of Python runtime\n+ Example::\n+\n+ >>> # xdoctest: +REQUIRES(env:TORCH_DOCTEST_MPS)\n+ >>> lib = torch.mps.compile_shader(\n+ ... \"kernel void full(device float* out, constant float& val, uint idx [[thread_position_in_grid]]) { out[idx] = val; }\"\n+ ... )\n+ >>> x = torch.zeros(16, device=\"mps\")\n+ >>> lib.full(x, 3.14)\n+ \"\"\"\n+ from pathlib import Path\n+\n+ from torch.utils._cpp_embed_headers import _embed_headers\n+\n+ if not hasattr(torch._C, \"_vulkan_compileShader\"):\n+ raise RuntimeError(\"Vulkan is not available\")\n+ source = _embed_headers(\n+ [l + \"\\n\" for l in source.split(\"\\n\")],\n+ [Path(__file__).parent.parent / \"include\"],\n+ set(),\n+ )\n+ return torch._C._vulkan_compileShader(name, source)\n+\n+\n+def is_available() -> bool:\n+ return device_count() > 0\n+\n+\n+__all__ = [", + "comment_created_at": "2025-05-20T23:18:23+00:00", + "comment_author": "swolchok", + "comment_body": "btw, @albanD, do we have a process or mechanism that would, if we decided this was even promising enough to commit, allow committing this stuff *without* promising to maintain or never change it? Is the \"prototype\" status from https://pytorch.org/blog/pytorch-feature-classification-changes/ still available?", + "pr_file_module": null + }, + { + "comment_id": "2099068635", + "repo_full_name": "pytorch/pytorch", + "pr_number": 153991, + "pr_file": "torch/vulkan/__init__.py", + "discussion_id": "2098995525", + "commented_code": "@@ -0,0 +1,51 @@\n+# mypy: allow-untyped-defs\n+r\"\"\"\n+This package enables an interface for accessing the Vulkan backend in Python.\n+\"\"\"\n+from typing import Union\n+\n+import torch\n+from torch import Tensor\n+\n+def device_count() -> int:\n+ r\"\"\"Returns the number of available Vulkan devices.\"\"\"\n+ # TODO: actually get the number!\n+ return int(torch.is_vulkan_available())\n+\n+\n+def compile_shader(name: str, source: str):\n+ r\"\"\"Compiles compute shader from source and allows one to invoke kernels\n+ defined there from the comfort of Python runtime\n+ Example::\n+\n+ >>> # xdoctest: +REQUIRES(env:TORCH_DOCTEST_MPS)\n+ >>> lib = torch.mps.compile_shader(\n+ ... \"kernel void full(device float* out, constant float& val, uint idx [[thread_position_in_grid]]) { out[idx] = val; }\"\n+ ... )\n+ >>> x = torch.zeros(16, device=\"mps\")\n+ >>> lib.full(x, 3.14)\n+ \"\"\"\n+ from pathlib import Path\n+\n+ from torch.utils._cpp_embed_headers import _embed_headers\n+\n+ if not hasattr(torch._C, \"_vulkan_compileShader\"):\n+ raise RuntimeError(\"Vulkan is not available\")\n+ source = _embed_headers(\n+ [l + \"\\n\" for l in source.split(\"\\n\")],\n+ [Path(__file__).parent.parent / \"include\"],\n+ set(),\n+ )\n+ return torch._C._vulkan_compileShader(name, source)\n+\n+\n+def is_available() -> bool:\n+ return device_count() > 0\n+\n+\n+__all__ = [", + "comment_created_at": "2025-05-21T00:16:16+00:00", + "comment_author": "albanD", + "comment_body": "You should make it a private API (via prepending _*) to mark it as \"should not be used\".", + "pr_file_module": null + }, + { + "comment_id": "2099069584", + "repo_full_name": "pytorch/pytorch", + "pr_number": 153991, + "pr_file": "torch/vulkan/__init__.py", + "discussion_id": "2098995525", + "commented_code": "@@ -0,0 +1,51 @@\n+# mypy: allow-untyped-defs\n+r\"\"\"\n+This package enables an interface for accessing the Vulkan backend in Python.\n+\"\"\"\n+from typing import Union\n+\n+import torch\n+from torch import Tensor\n+\n+def device_count() -> int:\n+ r\"\"\"Returns the number of available Vulkan devices.\"\"\"\n+ # TODO: actually get the number!\n+ return int(torch.is_vulkan_available())\n+\n+\n+def compile_shader(name: str, source: str):\n+ r\"\"\"Compiles compute shader from source and allows one to invoke kernels\n+ defined there from the comfort of Python runtime\n+ Example::\n+\n+ >>> # xdoctest: +REQUIRES(env:TORCH_DOCTEST_MPS)\n+ >>> lib = torch.mps.compile_shader(\n+ ... \"kernel void full(device float* out, constant float& val, uint idx [[thread_position_in_grid]]) { out[idx] = val; }\"\n+ ... )\n+ >>> x = torch.zeros(16, device=\"mps\")\n+ >>> lib.full(x, 3.14)\n+ \"\"\"\n+ from pathlib import Path\n+\n+ from torch.utils._cpp_embed_headers import _embed_headers\n+\n+ if not hasattr(torch._C, \"_vulkan_compileShader\"):\n+ raise RuntimeError(\"Vulkan is not available\")\n+ source = _embed_headers(\n+ [l + \"\\n\" for l in source.split(\"\\n\")],\n+ [Path(__file__).parent.parent / \"include\"],\n+ set(),\n+ )\n+ return torch._C._vulkan_compileShader(name, source)\n+\n+\n+def is_available() -> bool:\n+ return device_count() > 0\n+\n+\n+__all__ = [", + "comment_created_at": "2025-05-21T00:17:53+00:00", + "comment_author": "albanD", + "comment_body": "> This is way smaller than torch/mps/__init__.py! torch.mps.compile_shader is public\r\n\r\nI'm not saying I don't think we should make it public. I'm asking if we're confident that this API is stable and we will maintain it for the foreseeable future? If the answer is yes, let's document all of this and go for it !\r\nIf not, let's just make it private for now and see later once it's more mature.", + "pr_file_module": null + }, + { + "comment_id": "2099088683", + "repo_full_name": "pytorch/pytorch", + "pr_number": 153991, + "pr_file": "torch/vulkan/__init__.py", + "discussion_id": "2098995525", + "commented_code": "@@ -0,0 +1,51 @@\n+# mypy: allow-untyped-defs\n+r\"\"\"\n+This package enables an interface for accessing the Vulkan backend in Python.\n+\"\"\"\n+from typing import Union\n+\n+import torch\n+from torch import Tensor\n+\n+def device_count() -> int:\n+ r\"\"\"Returns the number of available Vulkan devices.\"\"\"\n+ # TODO: actually get the number!\n+ return int(torch.is_vulkan_available())\n+\n+\n+def compile_shader(name: str, source: str):\n+ r\"\"\"Compiles compute shader from source and allows one to invoke kernels\n+ defined there from the comfort of Python runtime\n+ Example::\n+\n+ >>> # xdoctest: +REQUIRES(env:TORCH_DOCTEST_MPS)\n+ >>> lib = torch.mps.compile_shader(\n+ ... \"kernel void full(device float* out, constant float& val, uint idx [[thread_position_in_grid]]) { out[idx] = val; }\"\n+ ... )\n+ >>> x = torch.zeros(16, device=\"mps\")\n+ >>> lib.full(x, 3.14)\n+ \"\"\"\n+ from pathlib import Path\n+\n+ from torch.utils._cpp_embed_headers import _embed_headers\n+\n+ if not hasattr(torch._C, \"_vulkan_compileShader\"):\n+ raise RuntimeError(\"Vulkan is not available\")\n+ source = _embed_headers(\n+ [l + \"\\n\" for l in source.split(\"\\n\")],\n+ [Path(__file__).parent.parent / \"include\"],\n+ set(),\n+ )\n+ return torch._C._vulkan_compileShader(name, source)\n+\n+\n+def is_available() -> bool:\n+ return device_count() > 0\n+\n+\n+__all__ = [", + "comment_created_at": "2025-05-21T00:43:15+00:00", + "comment_author": "swolchok", + "comment_body": "> we're confident that this API is stable and we will maintain it for the foreseeable future\r\n\r\ndefinitely not. this is currently experimental status, not clearly motivated, timeboxed (and I extended the timebox because I was close to getting this out). Hence the \"experimental\" label and draft PR.\r\n\r\nI'll go ahead and underscore-prefix everything. ", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/pytorch-document-for-future.md b/_reviewers/pytorch-document-for-future.md index 6782f4f..ea2b4f6 100644 --- a/_reviewers/pytorch-document-for-future.md +++ b/_reviewers/pytorch-document-for-future.md @@ -33,201 +33,3 @@ Improved version: ``` Remember that comments which seem clear when you write them may be cryptic to someone revisiting the code later. Write documentation that will stand the test of time. - - -[ - { - "discussion_id": "2172807638", - "pr_number": 157130, - "pr_file": "torch/onnx/_internal/exporter/_torchlib/ops/nn.py", - "created_at": "2025-06-27T20:11:29+00:00", - "commented_code": "\"SDPA (MHA) requires q_num_heads = kv_num_heads\"\n )\n\n # NOTE: There was extended discussion on whether the num_heads attributes (q_num_heads/kv_num_heads)\n # should be set as ONNX attributes or inferred from the tensor shape. In ONNX, num_heads is needed\n # for 3D attention inputs (shape: [B, S, N*H]), but not for 4D ([B, N, S, H]), which is the only\n # input accepted by this exporter. Thus, the attribute is not strictly necessary here, but adding it\n # may ease future optimization or conversion to 3D formats (e.g., GQA ops)\n # NOTE: num_heads attributes (q_num_heads/kv_num_heads) are not mandatory for 4D.", - "repo_full_name": "pytorch/pytorch", - "discussion_comments": [ - { - "comment_id": "2172807638", - "repo_full_name": "pytorch/pytorch", - "pr_number": 157130, - "pr_file": "torch/onnx/_internal/exporter/_torchlib/ops/nn.py", - "discussion_id": "2172807638", - "commented_code": "@@ -119,19 +119,19 @@ def aten_scaled_dot_product_attention_23(\n \"SDPA (MHA) requires q_num_heads = kv_num_heads\"\n )\n \n- # NOTE: There was extended discussion on whether the num_heads attributes (q_num_heads/kv_num_heads)\n- # should be set as ONNX attributes or inferred from the tensor shape. In ONNX, num_heads is needed\n- # for 3D attention inputs (shape: [B, S, N*H]), but not for 4D ([B, N, S, H]), which is the only\n- # input accepted by this exporter. Thus, the attribute is not strictly necessary here, but adding it\n- # may ease future optimization or conversion to 3D formats (e.g., GQA ops)\n+ # NOTE: num_heads attributes (q_num_heads/kv_num_heads) are not mandatory for 4D.", - "comment_created_at": "2025-06-27T20:11:29+00:00", - "comment_author": "gramalingam", - "comment_body": "nit: may be clearer if we change \"are not mandatory for 4D\" to \"should NOT be specified for 4D (even though the ONNX op documentation does not make this clear)\"", - "pr_file_module": null - }, - { - "comment_id": "2175067772", - "repo_full_name": "pytorch/pytorch", - "pr_number": 157130, - "pr_file": "torch/onnx/_internal/exporter/_torchlib/ops/nn.py", - "discussion_id": "2172807638", - "commented_code": "@@ -119,19 +119,19 @@ def aten_scaled_dot_product_attention_23(\n \"SDPA (MHA) requires q_num_heads = kv_num_heads\"\n )\n \n- # NOTE: There was extended discussion on whether the num_heads attributes (q_num_heads/kv_num_heads)\n- # should be set as ONNX attributes or inferred from the tensor shape. In ONNX, num_heads is needed\n- # for 3D attention inputs (shape: [B, S, N*H]), but not for 4D ([B, N, S, H]), which is the only\n- # input accepted by this exporter. Thus, the attribute is not strictly necessary here, but adding it\n- # may ease future optimization or conversion to 3D formats (e.g., GQA ops)\n+ # NOTE: num_heads attributes (q_num_heads/kv_num_heads) are not mandatory for 4D.", - "comment_created_at": "2025-06-30T13:21:34+00:00", - "comment_author": "xadupre", - "comment_body": "changed, I removed the part in (...) in case the documentation of onnx is changed.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2175940579", - "pr_number": 156705, - "pr_file": "torch/distributed/checkpoint/hf_storage.py", - "created_at": "2025-06-30T21:01:09+00:00", - "commented_code": "return super()._write_data(planner, file_queue)\n\n def finish(self, metadata: Metadata, results: list[list[WriteResult]]) -> None:\n if self._save_sharded:\n if self.save_distributed and not self.consolidated_output_path:\n logger.info(\"Not consolidating sharded checkpoint in finish step.\")", - "repo_full_name": "pytorch/pytorch", - "discussion_comments": [ - { - "comment_id": "2175940579", - "repo_full_name": "pytorch/pytorch", - "pr_number": 156705, - "pr_file": "torch/distributed/checkpoint/hf_storage.py", - "discussion_id": "2175940579", - "commented_code": "@@ -136,8 +154,16 @@ def write_data(\n return super()._write_data(planner, file_queue)\n \n def finish(self, metadata: Metadata, results: list[list[WriteResult]]) -> None:\n- if self._save_sharded:\n+ if self.save_distributed and not self.consolidated_output_path:\n+ logger.info(\"Not consolidating sharded checkpoint in finish step.\")", - "comment_created_at": "2025-06-30T21:01:09+00:00", - "comment_author": "saumishr", - "comment_body": "It would be worth adding a documentation here explaining different scenarios and why the metadata write is also being skipped. ", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2153270155", - "pr_number": 156207, - "pr_file": "torch/distributed/checkpoint/state_dict_saver.py", - "created_at": "2025-06-17T22:25:11+00:00", - "commented_code": "planner=planner,\n )\n\ndef default_stage(state_dict: STATE_DICT_TYPE, cpu_state_dict: STATE_DICT_TYPE) -> STATE_DICT_TYPE:\n \"\"\"Default stage function. This function will stage the state_dict on to the\n staging storage (defaults to CPU memory).\n\n Args:\n state_dict STATE_DICT_TYPE: The state_dict to stage.\n\n Returns:\n STATE_DICT_TYPE: staged state_dict\n \"\"\"\n staging_stream = torch.cuda.Stream()\n with staging_stream:\n staged_state_dict = _copy_state_dict(state_dict, cpu_state_dict, True, type_check=False)\n staging_stream.synchronize()\n return staged_state_dict\n\n@dataclass\nclass AsyncSaveResponse:", - "repo_full_name": "pytorch/pytorch", - "discussion_comments": [ - { - "comment_id": "2153270155", - "repo_full_name": "pytorch/pytorch", - "pr_number": 156207, - "pr_file": "torch/distributed/checkpoint/state_dict_saver.py", - "discussion_id": "2153270155", - "commented_code": "@@ -181,6 +189,36 @@ def save(\n planner=planner,\n )\n \n+def default_stage(state_dict: STATE_DICT_TYPE, cpu_state_dict: STATE_DICT_TYPE) -> STATE_DICT_TYPE:\n+ \"\"\"Default stage function. This function will stage the state_dict on to the\n+ staging storage (defaults to CPU memory).\n+\n+ Args:\n+ state_dict STATE_DICT_TYPE: The state_dict to stage.\n+\n+ Returns:\n+ STATE_DICT_TYPE: staged state_dict\n+ \"\"\"\n+ staging_stream = torch.cuda.Stream()\n+ with staging_stream:\n+ staged_state_dict = _copy_state_dict(state_dict, cpu_state_dict, True, type_check=False)\n+ staging_stream.synchronize()\n+ return staged_state_dict\n+\n+@dataclass\n+class AsyncSaveResponse:", - "comment_created_at": "2025-06-17T22:25:11+00:00", - "comment_author": "fegin", - "comment_body": "Need a good docstring since this is a public dataclass.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2094381930", - "pr_number": 153198, - "pr_file": "torch/_refs/__init__.py", - "created_at": "2025-05-18T05:51:33+00:00", - "commented_code": "return permuted_result.reshape(target_shape)\n\n\ndef _reshape_view_helper(a: TensorLikeType, *shape, allow_copy: bool) -> TensorLikeType:\n from torch._dynamo.exc import UserError, UserErrorType\n# this function is python match of computeStride_impl in TensorUtils.cpp\ndef _compute_stride(old_shape, old_stride, new_shape):\n from torch.fx.experimental.symbolic_shapes import (\n guard_or_false,\n guard_or_true,\n GuardOnDataDependentSymNode,\n sym_eq,\n )\n\n if len(old_shape) == 0:\n return [1] * len(new_shape)\n\n numel = reduce(operator.mul, old_shape, 1)\n zero_numel = guard_or_false(numel == 0)\n if zero_numel and guard_or_false(sym_eq(old_shape, new_shape)):\n return old_stride\n\n new_stride = [0] * len(new_shape)\n\n if zero_numel:\n for view_d in range(len(new_shape) - 1, -1, -1):\n if view_d == len(new_shape) - 1:\n new_stride[view_d] = 1\n else:\n new_stride[view_d] = (\n max(new_shape[view_d + 1], 1) * new_stride[view_d + 1]\n )\n return new_stride\n\n view_d = len(new_shape) - 1\n chunk_base_stride = old_stride[-1]\n tensor_numel = 1\n view_numel = 1\n\n for tensor_d in range(len(old_shape) - 1, -1, -1):\n tensor_numel *= old_shape[tensor_d]\n\n if tensor_d == 0 or (\n guard_or_true(old_shape[tensor_d - 1] != 1)\n and guard_or_true(\n old_stride[tensor_d - 1] != tensor_numel * chunk_base_stride\n )\n ):\n while view_d >= 0 and (\n guard_or_true(view_numel < tensor_numel)\n or guard_or_false(new_shape[view_d] == 1)\n ):\n new_stride[view_d] = view_numel * chunk_base_stride\n view_numel *= new_shape[view_d]\n view_d -= 1\n\n if guard_or_true(view_numel != tensor_numel):\n return None\n\n if tensor_d > 0:\n chunk_base_stride = old_stride[tensor_d - 1]\n tensor_numel = 1\n view_numel = 1\n if view_d != -1:\n return None\n return new_stride\n\n\n# if a is contiguous, we can always reshape with as_strided and contiguous_strides.\n# if a is not contiguous and _compute_stride succeed we also use as_strided without clone.", - "repo_full_name": "pytorch/pytorch", - "discussion_comments": [ - { - "comment_id": "2094381930", - "repo_full_name": "pytorch/pytorch", - "pr_number": 153198, - "pr_file": "torch/_refs/__init__.py", - "discussion_id": "2094381930", - "commented_code": "@@ -3729,62 +3729,111 @@ def repeat(a: Tensor, *repeat_shape) -> Tensor:\n return permuted_result.reshape(target_shape)\n \n \n-def _reshape_view_helper(a: TensorLikeType, *shape, allow_copy: bool) -> TensorLikeType:\n- from torch._dynamo.exc import UserError, UserErrorType\n+# this function is python match of computeStride_impl in TensorUtils.cpp\n+def _compute_stride(old_shape, old_stride, new_shape):\n from torch.fx.experimental.symbolic_shapes import (\n guard_or_false,\n guard_or_true,\n- GuardOnDataDependentSymNode,\n+ sym_eq,\n )\n \n+ if len(old_shape) == 0:\n+ return [1] * len(new_shape)\n+\n+ numel = reduce(operator.mul, old_shape, 1)\n+ zero_numel = guard_or_false(numel == 0)\n+ if zero_numel and guard_or_false(sym_eq(old_shape, new_shape)):\n+ return old_stride\n+\n+ new_stride = [0] * len(new_shape)\n+\n+ if zero_numel:\n+ for view_d in range(len(new_shape) - 1, -1, -1):\n+ if view_d == len(new_shape) - 1:\n+ new_stride[view_d] = 1\n+ else:\n+ new_stride[view_d] = (\n+ max(new_shape[view_d + 1], 1) * new_stride[view_d + 1]\n+ )\n+ return new_stride\n+\n+ view_d = len(new_shape) - 1\n+ chunk_base_stride = old_stride[-1]\n+ tensor_numel = 1\n+ view_numel = 1\n+\n+ for tensor_d in range(len(old_shape) - 1, -1, -1):\n+ tensor_numel *= old_shape[tensor_d]\n+\n+ if tensor_d == 0 or (\n+ guard_or_true(old_shape[tensor_d - 1] != 1)\n+ and guard_or_true(\n+ old_stride[tensor_d - 1] != tensor_numel * chunk_base_stride\n+ )\n+ ):\n+ while view_d >= 0 and (\n+ guard_or_true(view_numel < tensor_numel)\n+ or guard_or_false(new_shape[view_d] == 1)\n+ ):\n+ new_stride[view_d] = view_numel * chunk_base_stride\n+ view_numel *= new_shape[view_d]\n+ view_d -= 1\n+\n+ if guard_or_true(view_numel != tensor_numel):\n+ return None\n+\n+ if tensor_d > 0:\n+ chunk_base_stride = old_stride[tensor_d - 1]\n+ tensor_numel = 1\n+ view_numel = 1\n+ if view_d != -1:\n+ return None\n+ return new_stride\n+\n+\n+# if a is contiguous, we can always reshape with as_strided and contiguous_strides.\n+# if a is not contiguous and _compute_stride succeed we also use as_strided without clone.", - "comment_created_at": "2025-05-18T05:51:33+00:00", - "comment_author": "bobrenjc93", - "comment_body": "Can you please elaborate on these cases? For these correctness comments, you don't need to be frugal with your words. Most of the comments sound like notes for yourself, but for these comments you should really imagine what another core pytorch dev reading this comment 6 months from now will think.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2098995525", - "pr_number": 153991, - "pr_file": "torch/vulkan/__init__.py", - "created_at": "2025-05-20T22:58:06+00:00", - "commented_code": "# mypy: allow-untyped-defs\nr\"\"\"\nThis package enables an interface for accessing the Vulkan backend in Python.\n\"\"\"\nfrom typing import Union\n\nimport torch\nfrom torch import Tensor\n\ndef device_count() -> int:\n r\"\"\"Returns the number of available Vulkan devices.\"\"\"\n # TODO: actually get the number!\n return int(torch.is_vulkan_available())\n\n\ndef compile_shader(name: str, source: str):\n r\"\"\"Compiles compute shader from source and allows one to invoke kernels\n defined there from the comfort of Python runtime\n Example::\n\n >>> # xdoctest: +REQUIRES(env:TORCH_DOCTEST_MPS)\n >>> lib = torch.mps.compile_shader(\n ... \"kernel void full(device float* out, constant float& val, uint idx [[thread_position_in_grid]]) { out[idx] = val; }\"\n ... )\n >>> x = torch.zeros(16, device=\"mps\")\n >>> lib.full(x, 3.14)\n \"\"\"\n from pathlib import Path\n\n from torch.utils._cpp_embed_headers import _embed_headers\n\n if not hasattr(torch._C, \"_vulkan_compileShader\"):\n raise RuntimeError(\"Vulkan is not available\")\n source = _embed_headers(\n [l + \"\\n\" for l in source.split(\"\\n\")],\n [Path(__file__).parent.parent / \"include\"],\n set(),\n )\n return torch._C._vulkan_compileShader(name, source)\n\n\ndef is_available() -> bool:\n return device_count() > 0\n\n\n__all__ = [", - "repo_full_name": "pytorch/pytorch", - "discussion_comments": [ - { - "comment_id": "2098995525", - "repo_full_name": "pytorch/pytorch", - "pr_number": 153991, - "pr_file": "torch/vulkan/__init__.py", - "discussion_id": "2098995525", - "commented_code": "@@ -0,0 +1,51 @@\n+# mypy: allow-untyped-defs\n+r\"\"\"\n+This package enables an interface for accessing the Vulkan backend in Python.\n+\"\"\"\n+from typing import Union\n+\n+import torch\n+from torch import Tensor\n+\n+def device_count() -> int:\n+ r\"\"\"Returns the number of available Vulkan devices.\"\"\"\n+ # TODO: actually get the number!\n+ return int(torch.is_vulkan_available())\n+\n+\n+def compile_shader(name: str, source: str):\n+ r\"\"\"Compiles compute shader from source and allows one to invoke kernels\n+ defined there from the comfort of Python runtime\n+ Example::\n+\n+ >>> # xdoctest: +REQUIRES(env:TORCH_DOCTEST_MPS)\n+ >>> lib = torch.mps.compile_shader(\n+ ... \"kernel void full(device float* out, constant float& val, uint idx [[thread_position_in_grid]]) { out[idx] = val; }\"\n+ ... )\n+ >>> x = torch.zeros(16, device=\"mps\")\n+ >>> lib.full(x, 3.14)\n+ \"\"\"\n+ from pathlib import Path\n+\n+ from torch.utils._cpp_embed_headers import _embed_headers\n+\n+ if not hasattr(torch._C, \"_vulkan_compileShader\"):\n+ raise RuntimeError(\"Vulkan is not available\")\n+ source = _embed_headers(\n+ [l + \"\\n\" for l in source.split(\"\\n\")],\n+ [Path(__file__).parent.parent / \"include\"],\n+ set(),\n+ )\n+ return torch._C._vulkan_compileShader(name, source)\n+\n+\n+def is_available() -> bool:\n+ return device_count() > 0\n+\n+\n+__all__ = [", - "comment_created_at": "2025-05-20T22:58:06+00:00", - "comment_author": "albanD", - "comment_body": "We're really making these public APIs that we will maintain for the foreseeable future?\r\nIf so they need doc and tests!", - "pr_file_module": null - }, - { - "comment_id": "2099003823", - "repo_full_name": "pytorch/pytorch", - "pr_number": 153991, - "pr_file": "torch/vulkan/__init__.py", - "discussion_id": "2098995525", - "commented_code": "@@ -0,0 +1,51 @@\n+# mypy: allow-untyped-defs\n+r\"\"\"\n+This package enables an interface for accessing the Vulkan backend in Python.\n+\"\"\"\n+from typing import Union\n+\n+import torch\n+from torch import Tensor\n+\n+def device_count() -> int:\n+ r\"\"\"Returns the number of available Vulkan devices.\"\"\"\n+ # TODO: actually get the number!\n+ return int(torch.is_vulkan_available())\n+\n+\n+def compile_shader(name: str, source: str):\n+ r\"\"\"Compiles compute shader from source and allows one to invoke kernels\n+ defined there from the comfort of Python runtime\n+ Example::\n+\n+ >>> # xdoctest: +REQUIRES(env:TORCH_DOCTEST_MPS)\n+ >>> lib = torch.mps.compile_shader(\n+ ... \"kernel void full(device float* out, constant float& val, uint idx [[thread_position_in_grid]]) { out[idx] = val; }\"\n+ ... )\n+ >>> x = torch.zeros(16, device=\"mps\")\n+ >>> lib.full(x, 3.14)\n+ \"\"\"\n+ from pathlib import Path\n+\n+ from torch.utils._cpp_embed_headers import _embed_headers\n+\n+ if not hasattr(torch._C, \"_vulkan_compileShader\"):\n+ raise RuntimeError(\"Vulkan is not available\")\n+ source = _embed_headers(\n+ [l + \"\\n\" for l in source.split(\"\\n\")],\n+ [Path(__file__).parent.parent / \"include\"],\n+ set(),\n+ )\n+ return torch._C._vulkan_compileShader(name, source)\n+\n+\n+def is_available() -> bool:\n+ return device_count() > 0\n+\n+\n+__all__ = [", - "comment_created_at": "2025-05-20T23:07:58+00:00", - "comment_author": "albanD", - "comment_body": "Actually is_available, device_count and synchronize should be ok if they do have a semantic that match other backends.\r\nThe shader compiler is most likely more private.", - "pr_file_module": null - }, - { - "comment_id": "2099011455", - "repo_full_name": "pytorch/pytorch", - "pr_number": 153991, - "pr_file": "torch/vulkan/__init__.py", - "discussion_id": "2098995525", - "commented_code": "@@ -0,0 +1,51 @@\n+# mypy: allow-untyped-defs\n+r\"\"\"\n+This package enables an interface for accessing the Vulkan backend in Python.\n+\"\"\"\n+from typing import Union\n+\n+import torch\n+from torch import Tensor\n+\n+def device_count() -> int:\n+ r\"\"\"Returns the number of available Vulkan devices.\"\"\"\n+ # TODO: actually get the number!\n+ return int(torch.is_vulkan_available())\n+\n+\n+def compile_shader(name: str, source: str):\n+ r\"\"\"Compiles compute shader from source and allows one to invoke kernels\n+ defined there from the comfort of Python runtime\n+ Example::\n+\n+ >>> # xdoctest: +REQUIRES(env:TORCH_DOCTEST_MPS)\n+ >>> lib = torch.mps.compile_shader(\n+ ... \"kernel void full(device float* out, constant float& val, uint idx [[thread_position_in_grid]]) { out[idx] = val; }\"\n+ ... )\n+ >>> x = torch.zeros(16, device=\"mps\")\n+ >>> lib.full(x, 3.14)\n+ \"\"\"\n+ from pathlib import Path\n+\n+ from torch.utils._cpp_embed_headers import _embed_headers\n+\n+ if not hasattr(torch._C, \"_vulkan_compileShader\"):\n+ raise RuntimeError(\"Vulkan is not available\")\n+ source = _embed_headers(\n+ [l + \"\\n\" for l in source.split(\"\\n\")],\n+ [Path(__file__).parent.parent / \"include\"],\n+ set(),\n+ )\n+ return torch._C._vulkan_compileShader(name, source)\n+\n+\n+def is_available() -> bool:\n+ return device_count() > 0\n+\n+\n+__all__ = [", - "comment_created_at": "2025-05-20T23:14:54+00:00", - "comment_author": "swolchok", - "comment_body": "> The shader compiler is most likely more private.\r\n\r\nThis is way smaller than `torch/mps/__init__.py`! torch.mps.compile_shader is public https://github.com/pytorch/pytorch/blob/main/torch/mps/__init__.py#L143\r\n", - "pr_file_module": null - }, - { - "comment_id": "2099016867", - "repo_full_name": "pytorch/pytorch", - "pr_number": 153991, - "pr_file": "torch/vulkan/__init__.py", - "discussion_id": "2098995525", - "commented_code": "@@ -0,0 +1,51 @@\n+# mypy: allow-untyped-defs\n+r\"\"\"\n+This package enables an interface for accessing the Vulkan backend in Python.\n+\"\"\"\n+from typing import Union\n+\n+import torch\n+from torch import Tensor\n+\n+def device_count() -> int:\n+ r\"\"\"Returns the number of available Vulkan devices.\"\"\"\n+ # TODO: actually get the number!\n+ return int(torch.is_vulkan_available())\n+\n+\n+def compile_shader(name: str, source: str):\n+ r\"\"\"Compiles compute shader from source and allows one to invoke kernels\n+ defined there from the comfort of Python runtime\n+ Example::\n+\n+ >>> # xdoctest: +REQUIRES(env:TORCH_DOCTEST_MPS)\n+ >>> lib = torch.mps.compile_shader(\n+ ... \"kernel void full(device float* out, constant float& val, uint idx [[thread_position_in_grid]]) { out[idx] = val; }\"\n+ ... )\n+ >>> x = torch.zeros(16, device=\"mps\")\n+ >>> lib.full(x, 3.14)\n+ \"\"\"\n+ from pathlib import Path\n+\n+ from torch.utils._cpp_embed_headers import _embed_headers\n+\n+ if not hasattr(torch._C, \"_vulkan_compileShader\"):\n+ raise RuntimeError(\"Vulkan is not available\")\n+ source = _embed_headers(\n+ [l + \"\\n\" for l in source.split(\"\\n\")],\n+ [Path(__file__).parent.parent / \"include\"],\n+ set(),\n+ )\n+ return torch._C._vulkan_compileShader(name, source)\n+\n+\n+def is_available() -> bool:\n+ return device_count() > 0\n+\n+\n+__all__ = [", - "comment_created_at": "2025-05-20T23:18:23+00:00", - "comment_author": "swolchok", - "comment_body": "btw, @albanD, do we have a process or mechanism that would, if we decided this was even promising enough to commit, allow committing this stuff *without* promising to maintain or never change it? Is the \"prototype\" status from https://pytorch.org/blog/pytorch-feature-classification-changes/ still available?", - "pr_file_module": null - }, - { - "comment_id": "2099068635", - "repo_full_name": "pytorch/pytorch", - "pr_number": 153991, - "pr_file": "torch/vulkan/__init__.py", - "discussion_id": "2098995525", - "commented_code": "@@ -0,0 +1,51 @@\n+# mypy: allow-untyped-defs\n+r\"\"\"\n+This package enables an interface for accessing the Vulkan backend in Python.\n+\"\"\"\n+from typing import Union\n+\n+import torch\n+from torch import Tensor\n+\n+def device_count() -> int:\n+ r\"\"\"Returns the number of available Vulkan devices.\"\"\"\n+ # TODO: actually get the number!\n+ return int(torch.is_vulkan_available())\n+\n+\n+def compile_shader(name: str, source: str):\n+ r\"\"\"Compiles compute shader from source and allows one to invoke kernels\n+ defined there from the comfort of Python runtime\n+ Example::\n+\n+ >>> # xdoctest: +REQUIRES(env:TORCH_DOCTEST_MPS)\n+ >>> lib = torch.mps.compile_shader(\n+ ... \"kernel void full(device float* out, constant float& val, uint idx [[thread_position_in_grid]]) { out[idx] = val; }\"\n+ ... )\n+ >>> x = torch.zeros(16, device=\"mps\")\n+ >>> lib.full(x, 3.14)\n+ \"\"\"\n+ from pathlib import Path\n+\n+ from torch.utils._cpp_embed_headers import _embed_headers\n+\n+ if not hasattr(torch._C, \"_vulkan_compileShader\"):\n+ raise RuntimeError(\"Vulkan is not available\")\n+ source = _embed_headers(\n+ [l + \"\\n\" for l in source.split(\"\\n\")],\n+ [Path(__file__).parent.parent / \"include\"],\n+ set(),\n+ )\n+ return torch._C._vulkan_compileShader(name, source)\n+\n+\n+def is_available() -> bool:\n+ return device_count() > 0\n+\n+\n+__all__ = [", - "comment_created_at": "2025-05-21T00:16:16+00:00", - "comment_author": "albanD", - "comment_body": "You should make it a private API (via prepending _*) to mark it as \"should not be used\".", - "pr_file_module": null - }, - { - "comment_id": "2099069584", - "repo_full_name": "pytorch/pytorch", - "pr_number": 153991, - "pr_file": "torch/vulkan/__init__.py", - "discussion_id": "2098995525", - "commented_code": "@@ -0,0 +1,51 @@\n+# mypy: allow-untyped-defs\n+r\"\"\"\n+This package enables an interface for accessing the Vulkan backend in Python.\n+\"\"\"\n+from typing import Union\n+\n+import torch\n+from torch import Tensor\n+\n+def device_count() -> int:\n+ r\"\"\"Returns the number of available Vulkan devices.\"\"\"\n+ # TODO: actually get the number!\n+ return int(torch.is_vulkan_available())\n+\n+\n+def compile_shader(name: str, source: str):\n+ r\"\"\"Compiles compute shader from source and allows one to invoke kernels\n+ defined there from the comfort of Python runtime\n+ Example::\n+\n+ >>> # xdoctest: +REQUIRES(env:TORCH_DOCTEST_MPS)\n+ >>> lib = torch.mps.compile_shader(\n+ ... \"kernel void full(device float* out, constant float& val, uint idx [[thread_position_in_grid]]) { out[idx] = val; }\"\n+ ... )\n+ >>> x = torch.zeros(16, device=\"mps\")\n+ >>> lib.full(x, 3.14)\n+ \"\"\"\n+ from pathlib import Path\n+\n+ from torch.utils._cpp_embed_headers import _embed_headers\n+\n+ if not hasattr(torch._C, \"_vulkan_compileShader\"):\n+ raise RuntimeError(\"Vulkan is not available\")\n+ source = _embed_headers(\n+ [l + \"\\n\" for l in source.split(\"\\n\")],\n+ [Path(__file__).parent.parent / \"include\"],\n+ set(),\n+ )\n+ return torch._C._vulkan_compileShader(name, source)\n+\n+\n+def is_available() -> bool:\n+ return device_count() > 0\n+\n+\n+__all__ = [", - "comment_created_at": "2025-05-21T00:17:53+00:00", - "comment_author": "albanD", - "comment_body": "> This is way smaller than torch/mps/__init__.py! torch.mps.compile_shader is public\r\n\r\nI'm not saying I don't think we should make it public. I'm asking if we're confident that this API is stable and we will maintain it for the foreseeable future? If the answer is yes, let's document all of this and go for it !\r\nIf not, let's just make it private for now and see later once it's more mature.", - "pr_file_module": null - }, - { - "comment_id": "2099088683", - "repo_full_name": "pytorch/pytorch", - "pr_number": 153991, - "pr_file": "torch/vulkan/__init__.py", - "discussion_id": "2098995525", - "commented_code": "@@ -0,0 +1,51 @@\n+# mypy: allow-untyped-defs\n+r\"\"\"\n+This package enables an interface for accessing the Vulkan backend in Python.\n+\"\"\"\n+from typing import Union\n+\n+import torch\n+from torch import Tensor\n+\n+def device_count() -> int:\n+ r\"\"\"Returns the number of available Vulkan devices.\"\"\"\n+ # TODO: actually get the number!\n+ return int(torch.is_vulkan_available())\n+\n+\n+def compile_shader(name: str, source: str):\n+ r\"\"\"Compiles compute shader from source and allows one to invoke kernels\n+ defined there from the comfort of Python runtime\n+ Example::\n+\n+ >>> # xdoctest: +REQUIRES(env:TORCH_DOCTEST_MPS)\n+ >>> lib = torch.mps.compile_shader(\n+ ... \"kernel void full(device float* out, constant float& val, uint idx [[thread_position_in_grid]]) { out[idx] = val; }\"\n+ ... )\n+ >>> x = torch.zeros(16, device=\"mps\")\n+ >>> lib.full(x, 3.14)\n+ \"\"\"\n+ from pathlib import Path\n+\n+ from torch.utils._cpp_embed_headers import _embed_headers\n+\n+ if not hasattr(torch._C, \"_vulkan_compileShader\"):\n+ raise RuntimeError(\"Vulkan is not available\")\n+ source = _embed_headers(\n+ [l + \"\\n\" for l in source.split(\"\\n\")],\n+ [Path(__file__).parent.parent / \"include\"],\n+ set(),\n+ )\n+ return torch._C._vulkan_compileShader(name, source)\n+\n+\n+def is_available() -> bool:\n+ return device_count() > 0\n+\n+\n+__all__ = [", - "comment_created_at": "2025-05-21T00:43:15+00:00", - "comment_author": "swolchok", - "comment_body": "> we're confident that this API is stable and we will maintain it for the foreseeable future\r\n\r\ndefinitely not. this is currently experimental status, not clearly motivated, timeboxed (and I extended the timebox because I was close to getting this out). Hence the \"experimental\" label and draft PR.\r\n\r\nI'll go ahead and underscore-prefix everything. ", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/pytorch-handle-non-deterministic-collections.json b/_reviewers/pytorch-handle-non-deterministic-collections.json new file mode 100644 index 0000000..654db28 --- /dev/null +++ b/_reviewers/pytorch-handle-non-deterministic-collections.json @@ -0,0 +1,244 @@ +[ + { + "discussion_id": "2106228684", + "pr_number": 153150, + "pr_file": "torch/_dynamo/utils.py", + "created_at": "2025-05-25T15:08:17+00:00", + "commented_code": "return next(itertools.islice(dict_class.keys(d), n, n + 1))\n\n\ndef set_getitem(s, n):\n # Mimic set.__getitem__ by converting the set to a dict to have a partial\n # ordering.\n return list(dict.fromkeys(s))[n]", + "repo_full_name": "pytorch/pytorch", + "discussion_comments": [ + { + "comment_id": "2106228684", + "repo_full_name": "pytorch/pytorch", + "pr_number": 153150, + "pr_file": "torch/_dynamo/utils.py", + "discussion_id": "2106228684", + "commented_code": "@@ -2525,6 +2525,12 @@ def dict_keys_getitem(d, n):\n return next(itertools.islice(dict_class.keys(d), n, n + 1))\n \n \n+def set_getitem(s, n):\n+ # Mimic set.__getitem__ by converting the set to a dict to have a partial\n+ # ordering.\n+ return list(dict.fromkeys(s))[n]", + "comment_created_at": "2025-05-25T15:08:17+00:00", + "comment_author": "Skylion007", + "comment_body": "I wonder if itertools islice is better here?", + "pr_file_module": null + }, + { + "comment_id": "2106293442", + "repo_full_name": "pytorch/pytorch", + "pr_number": 153150, + "pr_file": "torch/_dynamo/utils.py", + "discussion_id": "2106228684", + "commented_code": "@@ -2525,6 +2525,12 @@ def dict_keys_getitem(d, n):\n return next(itertools.islice(dict_class.keys(d), n, n + 1))\n \n \n+def set_getitem(s, n):\n+ # Mimic set.__getitem__ by converting the set to a dict to have a partial\n+ # ordering.\n+ return list(dict.fromkeys(s))[n]", + "comment_created_at": "2025-05-25T19:58:51+00:00", + "comment_author": "guilhermeleobas", + "comment_body": "I guess this would only work if we sort the set first:\r\n\r\n```bash\r\n$ PYTHONHASHSEED=1 python -c 'import itertools; s = set(\"abc\"); print(list(itertools.islice(s, 0, None)))'\r\n['b', 'a', 'c']\r\n\r\n$ PYTHONHASHSEED=2 python -c 'import itertools; s = set(\"abc\"); print(list(itertools.islice(s, 0, None)))'\r\n['c', 'b', 'a']\r\n```\r\n\r\n```bash\r\n$ PYTHONHASHSEED=1 python -c 'import itertools; s = set(\"abc\"); print(list(itertools.islice(sorted(s), 0, None)))'\r\n['a', 'b', 'c']\r\n\r\n$ PYTHONHASHSEED=2 python -c 'import itertools; s = set(\"abc\"); print(list(itertools.islice(sorted(s), 0, None)))'\r\n['a', 'b', 'c']\r\n```", + "pr_file_module": null + }, + { + "comment_id": "2109842371", + "repo_full_name": "pytorch/pytorch", + "pr_number": 153150, + "pr_file": "torch/_dynamo/utils.py", + "discussion_id": "2106228684", + "commented_code": "@@ -2525,6 +2525,12 @@ def dict_keys_getitem(d, n):\n return next(itertools.islice(dict_class.keys(d), n, n + 1))\n \n \n+def set_getitem(s, n):\n+ # Mimic set.__getitem__ by converting the set to a dict to have a partial\n+ # ordering.\n+ return list(dict.fromkeys(s))[n]", + "comment_created_at": "2025-05-27T18:07:15+00:00", + "comment_author": "williamwen42", + "comment_body": "Assuming here that `dict.fromkeys` will return the same order every time?", + "pr_file_module": null + }, + { + "comment_id": "2110495363", + "repo_full_name": "pytorch/pytorch", + "pr_number": 153150, + "pr_file": "torch/_dynamo/utils.py", + "discussion_id": "2106228684", + "commented_code": "@@ -2525,6 +2525,12 @@ def dict_keys_getitem(d, n):\n return next(itertools.islice(dict_class.keys(d), n, n + 1))\n \n \n+def set_getitem(s, n):\n+ # Mimic set.__getitem__ by converting the set to a dict to have a partial\n+ # ordering.\n+ return list(dict.fromkeys(s))[n]", + "comment_created_at": "2025-05-27T23:20:13+00:00", + "comment_author": "guilhermeleobas", + "comment_body": "> Assuming here that dict.fromkeys will return the same order every time?\r\n\r\nYes! And I just realized that this is not guaranteed as well. I need to think if there's any alternative that we can use that gives a predictable ordering and does not depend on hash values.\r\n", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2116629641", + "pr_number": 153150, + "pr_file": "torch/_dynamo/utils.py", + "created_at": "2025-05-30T21:00:17+00:00", + "commented_code": "return next(itertools.islice(dict_class.keys(d), n, n + 1))\n\n\ndef set_getitem(s, n):\n # Mimic set.__getitem__ by sorting the set using the hash\n return sorted(s, key=hash)[n]", + "repo_full_name": "pytorch/pytorch", + "discussion_comments": [ + { + "comment_id": "2116629641", + "repo_full_name": "pytorch/pytorch", + "pr_number": 153150, + "pr_file": "torch/_dynamo/utils.py", + "discussion_id": "2116629641", + "commented_code": "@@ -2530,6 +2530,11 @@ def dict_keys_getitem(d, n):\n return next(itertools.islice(dict_class.keys(d), n, n + 1))\n \n \n+def set_getitem(s, n):\n+ # Mimic set.__getitem__ by sorting the set using the hash\n+ return sorted(s, key=hash)[n]", + "comment_created_at": "2025-05-30T21:00:17+00:00", + "comment_author": "williamwen42", + "comment_body": "How will we deal with hash collisions? (What if the object has a custom hash that always returns 0?)", + "pr_file_module": null + }, + { + "comment_id": "2122042749", + "repo_full_name": "pytorch/pytorch", + "pr_number": 153150, + "pr_file": "torch/_dynamo/utils.py", + "discussion_id": "2116629641", + "commented_code": "@@ -2530,6 +2530,11 @@ def dict_keys_getitem(d, n):\n return next(itertools.islice(dict_class.keys(d), n, n + 1))\n \n \n+def set_getitem(s, n):\n+ # Mimic set.__getitem__ by sorting the set using the hash\n+ return sorted(s, key=hash)[n]", + "comment_created_at": "2025-06-02T20:00:34+00:00", + "comment_author": "zou3519", + "comment_body": "Does this lead to anything worse than a recompile in the current PR?", + "pr_file_module": null + }, + { + "comment_id": "2122108696", + "repo_full_name": "pytorch/pytorch", + "pr_number": 153150, + "pr_file": "torch/_dynamo/utils.py", + "discussion_id": "2116629641", + "commented_code": "@@ -2530,6 +2530,11 @@ def dict_keys_getitem(d, n):\n return next(itertools.islice(dict_class.keys(d), n, n + 1))\n \n \n+def set_getitem(s, n):\n+ # Mimic set.__getitem__ by sorting the set using the hash\n+ return sorted(s, key=hash)[n]", + "comment_created_at": "2025-06-02T20:37:57+00:00", + "comment_author": "guilhermeleobas", + "comment_body": "I don't think so. I couldn't actually create a test case for this because we don't support set of objects with custom hash functions:\r\nhttps://github.com/pytorch/pytorch/blob/48807d568ec5d4ad654fff27aaa1cec3b715c473/torch/_dynamo/variables/dicts.py#L101-L106", + "pr_file_module": null + }, + { + "comment_id": "2122437629", + "repo_full_name": "pytorch/pytorch", + "pr_number": 153150, + "pr_file": "torch/_dynamo/utils.py", + "discussion_id": "2116629641", + "commented_code": "@@ -2530,6 +2530,11 @@ def dict_keys_getitem(d, n):\n return next(itertools.islice(dict_class.keys(d), n, n + 1))\n \n \n+def set_getitem(s, n):\n+ # Mimic set.__getitem__ by sorting the set using the hash\n+ return sorted(s, key=hash)[n]", + "comment_created_at": "2025-06-03T01:41:11+00:00", + "comment_author": "guilhermeleobas", + "comment_body": "If recompilation is the worst thing we can have, then one can just do `list(set_obj)[index]` instead. It will be faster than sorting the elements by the hash or calling `list(dict.fromkeys(set_object))[index]`", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2140304805", + "pr_number": 153150, + "pr_file": "torch/_dynamo/variables/builder.py", + "created_at": "2025-06-11T14:11:47+00:00", + "commented_code": "var = TorchFunctionModeVariable(value, source=self.source)\n self.tx.output.side_effects.track_object_existing(value, var)\n return var\n elif istype(value, set):\n self.install_guards(GuardBuilder.TYPE_MATCH)\n self.install_guards(GuardBuilder.SEQUENCE_LENGTH)\n\n # The dictionary gives a ordering for the set items\n L = list(value)", + "repo_full_name": "pytorch/pytorch", + "discussion_comments": [ + { + "comment_id": "2140304805", + "repo_full_name": "pytorch/pytorch", + "pr_number": 153150, + "pr_file": "torch/_dynamo/variables/builder.py", + "discussion_id": "2140304805", + "commented_code": "@@ -755,6 +756,18 @@ def build_key_value(i, k, v):\n var = TorchFunctionModeVariable(value, source=self.source)\n self.tx.output.side_effects.track_object_existing(value, var)\n return var\n+ elif istype(value, set):\n+ self.install_guards(GuardBuilder.TYPE_MATCH)\n+ self.install_guards(GuardBuilder.SEQUENCE_LENGTH)\n+\n+ # The dictionary gives a ordering for the set items\n+ L = list(value)", + "comment_created_at": "2025-06-11T14:11:47+00:00", + "comment_author": "zou3519", + "comment_body": "Can you leave a comment here spifying:\r\n1) set ordering in cpython is based on hash value order\r\n2) the order being incorrect would just lead to a recompilation (no silent incorrectness)", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2140380541", + "pr_number": 153150, + "pr_file": "torch/_dynamo/guards.py", + "created_at": "2025-06-11T14:39:31+00:00", + "commented_code": "example_value=example_value,\n guard_manager_enum=guard_manager_enum,\n )\n elif istype(source, SetGetItemSource):", + "repo_full_name": "pytorch/pytorch", + "discussion_comments": [ + { + "comment_id": "2140380541", + "repo_full_name": "pytorch/pytorch", + "pr_number": 153150, + "pr_file": "torch/_dynamo/guards.py", + "discussion_id": "2140380541", + "commented_code": "@@ -1284,6 +1290,14 @@ def get_guard_manager_from_source(self, source):\n example_value=example_value,\n guard_manager_enum=guard_manager_enum,\n )\n+ elif istype(source, SetGetItemSource):", + "comment_created_at": "2025-06-11T14:39:31+00:00", + "comment_author": "zou3519", + "comment_body": "One interesting thing I learned was that the hash of a set is not deterministic across interpreter runs. For example. try hash(\"a\") in different pythons. This means that this source isn't serializable (and precompile should not try to serialize it! cc @zhxchen17)\r\n\r\nWe should add a comment about this somewhere.", + "pr_file_module": null + }, + { + "comment_id": "2140403574", + "repo_full_name": "pytorch/pytorch", + "pr_number": 153150, + "pr_file": "torch/_dynamo/guards.py", + "discussion_id": "2140380541", + "commented_code": "@@ -1284,6 +1290,14 @@ def get_guard_manager_from_source(self, source):\n example_value=example_value,\n guard_manager_enum=guard_manager_enum,\n )\n+ elif istype(source, SetGetItemSource):", + "comment_created_at": "2025-06-11T14:49:13+00:00", + "comment_author": "zou3519", + "comment_body": "Maybe rename it NonSerializableSetGetItemSource", + "pr_file_module": null + }, + { + "comment_id": "2141162550", + "repo_full_name": "pytorch/pytorch", + "pr_number": 153150, + "pr_file": "torch/_dynamo/guards.py", + "discussion_id": "2140380541", + "commented_code": "@@ -1284,6 +1290,14 @@ def get_guard_manager_from_source(self, source):\n example_value=example_value,\n guard_manager_enum=guard_manager_enum,\n )\n+ elif istype(source, SetGetItemSource):", + "comment_created_at": "2025-06-11T21:41:44+00:00", + "comment_author": "guilhermeleobas", + "comment_body": "> One interesting thing I learned was that the hash of a set is not deterministic across interpreter runs. \r\n\r\nYou mean the set getitem, right? Because sets are non-hashable.", + "pr_file_module": null + }, + { + "comment_id": "2141168033", + "repo_full_name": "pytorch/pytorch", + "pr_number": 153150, + "pr_file": "torch/_dynamo/guards.py", + "discussion_id": "2140380541", + "commented_code": "@@ -1284,6 +1290,14 @@ def get_guard_manager_from_source(self, source):\n example_value=example_value,\n guard_manager_enum=guard_manager_enum,\n )\n+ elif istype(source, SetGetItemSource):", + "comment_created_at": "2025-06-11T21:46:49+00:00", + "comment_author": "zou3519", + "comment_body": "Sorry, what I meant was \"was that the hash of a string is not deterministic\"", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1386767400", + "pr_number": 113258, + "pr_file": "torch/utils/_pytree/api/python.py", + "created_at": "2023-11-08T15:03:27+00:00", + "commented_code": "_deque_unflatten,\n serialized_type_name=\"collections.deque\",\n)\n_private_register_pytree_node(\n structseq,\n _structseq_flatten,\n _structseq_unflatten,\n serialized_type_name=\"structseq\",\n to_dumpable_context=_structseq_serialize,\n from_dumpable_context=_structseq_deserialize,\n)\n\n\ndef _get_node_type(tree: Any) -> Any:\n node_type = type(tree)\n if node_type not in SUPPORTED_NODES and is_namedtuple_class(node_type):\n return namedtuple\n node_type = tree.__class__ # Dynamo complains about using `type(tree)`", + "repo_full_name": "pytorch/pytorch", + "discussion_comments": [ + { + "comment_id": "1386767400", + "repo_full_name": "pytorch/pytorch", + "pr_number": 113258, + "pr_file": "torch/utils/_pytree/api/python.py", + "discussion_id": "1386767400", + "commented_code": "@@ -465,12 +491,23 @@ def _deque_unflatten(values: Iterable[Any], context: Context) -> Deque[Any]:\n _deque_unflatten,\n serialized_type_name=\"collections.deque\",\n )\n+_private_register_pytree_node(\n+ structseq,\n+ _structseq_flatten,\n+ _structseq_unflatten,\n+ serialized_type_name=\"structseq\",\n+ to_dumpable_context=_structseq_serialize,\n+ from_dumpable_context=_structseq_deserialize,\n+)\n \n \n def _get_node_type(tree: Any) -> Any:\n- node_type = type(tree)\n- if node_type not in SUPPORTED_NODES and is_namedtuple_class(node_type):\n- return namedtuple\n+ node_type = tree.__class__ # Dynamo complains about using `type(tree)`", + "comment_created_at": "2023-11-08T15:03:27+00:00", + "comment_author": "XuehaiPan", + "comment_body": "I'm a bit confused about this. The previous commit uses `node_type = type(tree)`. It works fine and passes all CI tests. But Dynamo asks for changing it to `node_type = tree.__class__` in the CI test in this PR.\r\n\r\n```\r\n[2023-11-08 23:04:51,484] [0/0] torch._dynamo.variables.higher_order_ops: [WARNING] speculate_subgraph: while introspecting wrap, we were unable to trace function `NestedUserFunctionVariable` into a single graph. This means that Dynamo was unable to prove safety for this API and will fall back to eager-mode PyTorch, which could lead to a slowdown.\r\n[2023-11-08 23:04:51,484] [0/0] torch._dynamo.variables.higher_order_ops: [ERROR] Can't access members of type(obj) for a generated custom object. Please use __class__ instead\r\n[2023-11-08 23:04:51,484] [0/0] torch._dynamo.variables.higher_order_ops: [ERROR] For more information about this error, see: https://pytorch.org/docs/main/generated/exportdb/index.html#type-reflection-method\r\nEinline_call [(\"Can't access members of type(obj) for a generated custom object. Please use __class__ instead\\nFor more information about this error, see: https://pytorch.org/docs/main/generated/exportdb/index.html#type-reflection-method\", 1)]\r\nunimplemented [(\"speculate_subgraph: while introspecting wrap, we were unable to trace function `NestedUserFunctionVariable` into a single graph. This means that Dynamo was unable to prove safety for this API and will fall back to eager-mode PyTorch, which could lead to a slowdown. Scroll up for the stack trace of the initial exception. The reason was: Can't access members of type(obj) for a generated custom object. Please use __class__ instead\\nFor more information about this error, see: https://pytorch.org/docs/main/generated/exportdb/index.html#type-reflection-method\", 1)]\r\n```\r\n\r\nShould we change all `type(obj)` to `obj.__class__` in the pytree utilities?", + "pr_file_module": null + }, + { + "comment_id": "1386815594", + "repo_full_name": "pytorch/pytorch", + "pr_number": 113258, + "pr_file": "torch/utils/_pytree/api/python.py", + "discussion_id": "1386767400", + "commented_code": "@@ -465,12 +491,23 @@ def _deque_unflatten(values: Iterable[Any], context: Context) -> Deque[Any]:\n _deque_unflatten,\n serialized_type_name=\"collections.deque\",\n )\n+_private_register_pytree_node(\n+ structseq,\n+ _structseq_flatten,\n+ _structseq_unflatten,\n+ serialized_type_name=\"structseq\",\n+ to_dumpable_context=_structseq_serialize,\n+ from_dumpable_context=_structseq_deserialize,\n+)\n \n \n def _get_node_type(tree: Any) -> Any:\n- node_type = type(tree)\n- if node_type not in SUPPORTED_NODES and is_namedtuple_class(node_type):\n- return namedtuple\n+ node_type = tree.__class__ # Dynamo complains about using `type(tree)`", + "comment_created_at": "2023-11-08T15:35:09+00:00", + "comment_author": "XuehaiPan", + "comment_body": "Now the new CI test errors ask to change back to `node_type = type(tree)`.\r\n\r\nUse `node_type = type(tree)`, will get error:\r\n\r\n
\r\nError outputs\r\n\r\n```console\r\n$ python test/dynamo/test_higher_order_ops.py -k test_access_module_attr\r\n[2023-11-08 23:32:18,089] [0/0] torch._dynamo.variables.higher_order_ops: [WARNING] speculate_subgraph: while introspecting wrap, we were unable to trace function `NestedUserFunctionVariable` into a single graph. This means that Dynamo was unable to prove safety for this API and will fall back to eager-mode PyTorch, which could lead to a slowdown.\r\n[2023-11-08 23:32:18,089] [0/0] torch._dynamo.variables.higher_order_ops: [ERROR] Can't access members of type(obj) for a generated custom object. Please use __class__ instead\r\n[2023-11-08 23:32:18,089] [0/0] torch._dynamo.variables.higher_order_ops: [ERROR] For more information about this error, see: https://pytorch.org/docs/main/generated/exportdb/index.html#type-reflection-method\r\nEinline_call [(\"Can't access members of type(obj) for a generated custom object. Please use __class__ instead\\nFor more information about this error, see: https://pytorch.org/docs/main/generated/exportdb/index.html#type-reflection-method\", 1)]\r\nunimplemented [(\"speculate_subgraph: while introspecting wrap, we were unable to trace function `NestedUserFunctionVariable` into a single graph. This means that Dynamo was unable to prove safety for this API and will fall back to eager-mode PyTorch, which could lead to a slowdown. Scroll up for the stack trace of the initial exception. The reason was: Can't access members of type(obj) for a generated custom object. Please use __class__ instead\\nFor more information about this error, see: https://pytorch.org/docs/main/generated/exportdb/index.html#type-reflection-method\", 1)]\r\n\r\n======================================================================\r\nERROR: test_access_module_attr (__main__.HigherOrderOpTests.test_access_module_attr)\r\n----------------------------------------------------------------------\r\nTraceback (most recent call last):\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/variables/higher_order_ops.py\", line 229, in speculate_subgraph\r\n tree_flatten_output = tree_flatten.call_function(tx, [output], {})\r\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/variables/functions.py\", line 248, in call_function\r\n return super().call_function(tx, args, kwargs)\r\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/variables/functions.py\", line 81, in call_function\r\n return tx.inline_user_function_return(\r\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 685, in inline_user_function_return\r\n return InliningInstructionTranslator.inline_call(self, fn, args, kwargs)\r\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 2252, in inline_call\r\n return cls.inline_call_(parent, func, args, kwargs)\r\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 2384, in inline_call_\r\n tracer.run()\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 815, in run\r\n and self.step()\r\n ^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 778, in step\r\n getattr(self, inst.opname)(inst)\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 469, in wrapper\r\n return inner_fn(self, inst)\r\n ^^^^^^^^^^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 1799, in CALL\r\n self.call_function(fn, args, kwargs)\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 650, in call_function\r\n self.push(fn.call_function(self, args, kwargs))\r\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/variables/functions.py\", line 248, in call_function\r\n return super().call_function(tx, args, kwargs)\r\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/variables/functions.py\", line 81, in call_function\r\n return tx.inline_user_function_return(\r\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 685, in inline_user_function_return\r\n return InliningInstructionTranslator.inline_call(self, fn, args, kwargs)\r\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 2252, in inline_call\r\n return cls.inline_call_(parent, func, args, kwargs)\r\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 2384, in inline_call_\r\n tracer.run()\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 815, in run\r\n and self.step()\r\n ^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 778, in step\r\n getattr(self, inst.opname)(inst)\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 469, in wrapper\r\n return inner_fn(self, inst)\r\n ^^^^^^^^^^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 1799, in CALL\r\n self.call_function(fn, args, kwargs)\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 650, in call_function\r\n self.push(fn.call_function(self, args, kwargs))\r\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/variables/functions.py\", line 248, in call_function\r\n return super().call_function(tx, args, kwargs)\r\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/variables/functions.py\", line 81, in call_function\r\n return tx.inline_user_function_return(\r\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 685, in inline_user_function_return\r\n return InliningInstructionTranslator.inline_call(self, fn, args, kwargs)\r\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 2252, in inline_call\r\n return cls.inline_call_(parent, func, args, kwargs)\r\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 2384, in inline_call_\r\n tracer.run()\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 815, in run\r\n and self.step()\r\n ^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 778, in step\r\n getattr(self, inst.opname)(inst)\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 469, in wrapper\r\n return inner_fn(self, inst)\r\n ^^^^^^^^^^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 1799, in CALL\r\n self.call_function(fn, args, kwargs)\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 650, in call_function\r\n self.push(fn.call_function(self, args, kwargs))\r\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/variables/functions.py\", line 248, in call_function\r\n return super().call_function(tx, args, kwargs)\r\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/variables/functions.py\", line 81, in call_function\r\n return tx.inline_user_function_return(\r\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 685, in inline_user_function_return\r\n return InliningInstructionTranslator.inline_call(self, fn, args, kwargs)\r\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 2252, in inline_call\r\n return cls.inline_call_(parent, func, args, kwargs)\r\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 2384, in inline_call_\r\n tracer.run()\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 815, in run\r\n and self.step()\r\n ^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 778, in step\r\n getattr(self, inst.opname)(inst)\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 469, in wrapper\r\n return inner_fn(self, inst)\r\n ^^^^^^^^^^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 1799, in CALL\r\n self.call_function(fn, args, kwargs)\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 650, in call_function\r\n self.push(fn.call_function(self, args, kwargs))\r\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/variables/functions.py\", line 248, in call_function\r\n return super().call_function(tx, args, kwargs)\r\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/variables/functions.py\", line 81, in call_function\r\n return tx.inline_user_function_return(\r\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 685, in inline_user_function_return\r\n return InliningInstructionTranslator.inline_call(self, fn, args, kwargs)\r\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 2252, in inline_call\r\n return cls.inline_call_(parent, func, args, kwargs)\r\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 2384, in inline_call_\r\n tracer.run()\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 815, in run\r\n and self.step()\r\n ^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 778, in step\r\n getattr(self, inst.opname)(inst)\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 1298, in LOAD_ATTR\r\n result = BuiltinVariable(getattr).call_function(\r\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/variables/builtin.py\", line 614, in call_function\r\n result = handler(tx, *args, **kwargs)\r\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/variables/builtin.py\", line 1151, in call_getattr\r\n return obj.var_getattr(tx, name).clone(source=source)\r\n ^^^^^^^^^^^^^^^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/variables/base.py\", line 262, in var_getattr\r\n value = self.const_getattr(tx, name)\r\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/variables/constant.py\", line 122, in const_getattr\r\n raise UserError(\r\ntorch._dynamo.exc.UserError: Can't access members of type(obj) for a generated custom object. Please use __class__ instead\r\nFor more information about this error, see: https://pytorch.org/docs/main/generated/exportdb/index.html#type-reflection-method\r\n\r\nThe above exception was the direct cause of the following exception:\r\n\r\nTraceback (most recent call last):\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/testing/_internal/common_utils.py\", line 2536, in wrapper\r\n method(*args, **kwargs)\r\n File \"/Users/PanXuehai/Projects/pytorch/test/dynamo/test_higher_order_ops.py\", line 1830, in test_access_module_attr\r\n result = f(x)\r\n ^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/eval_frame.py\", line 409, in _fn\r\n return fn(*args, **kwargs)\r\n ^^^^^^^^^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/eval_frame.py\", line 570, in catch_errors\r\n return callback(frame, cache_entry, hooks, frame_state)\r\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/convert_frame.py\", line 377, in _convert_frame_assert\r\n return _compile(\r\n ^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/convert_frame.py\", line 594, in _compile\r\n guarded_code = compile_inner(code, one_graph, hooks, transform)\r\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/utils.py\", line 222, in time_wrapper\r\n r = func(*args, **kwargs)\r\n ^^^^^^^^^^^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/convert_frame.py\", line 511, in compile_inner\r\n out_code = transform_code_object(code, transform)\r\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/bytecode_transformation.py\", line 1033, in transform_code_object\r\n transformations(instructions, code_options)\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/convert_frame.py\", line 150, in _fn\r\n return fn(*args, **kwargs)\r\n ^^^^^^^^^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/convert_frame.py\", line 476, in transform\r\n tracer.run()\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 2120, in run\r\n super().run()\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 815, in run\r\n and self.step()\r\n ^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 778, in step\r\n getattr(self, inst.opname)(inst)\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 469, in wrapper\r\n return inner_fn(self, inst)\r\n ^^^^^^^^^^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 1799, in CALL\r\n self.call_function(fn, args, kwargs)\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 650, in call_function\r\n self.push(fn.call_function(self, args, kwargs))\r\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/variables/higher_order_ops.py\", line 1227, in call_function\r\n p_args, p_kwargs, example_value, treespec = self.create_wrapped_node(\r\n ^^^^^^^^^^^^^^^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/variables/higher_order_ops.py\", line 1186, in create_wrapped_node\r\n ) = speculate_subgraph(\r\n ^^^^^^^^^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/variables/higher_order_ops.py\", line 288, in speculate_subgraph\r\n raise Unsupported(\r\ntorch._dynamo.exc.Unsupported: speculate_subgraph: while introspecting wrap, we were unable to trace function `NestedUserFunctionVariable` into a single graph. This means that Dynamo was unable to prove safety for this API and will fall back to eager-mode PyTorch, which could lead to a slowdown. Scroll up for the stack trace of the initial exception. The reason was: Can't access members of type(obj) for a generated custom object. Please use __class__ instead\r\nFor more information about this error, see: https://pytorch.org/docs/main/generated/exportdb/index.html#type-reflection-method\r\n\r\nfrom user code:\r\n File \"/Users/PanXuehai/Projects/pytorch/test/dynamo/test_higher_order_ops.py\", line 1828, in f\r\n return wrap(lambda y: y - mod.bias, y)\r\n\r\nSet TORCH_LOGS=\"+dynamo\" and TORCHDYNAMO_VERBOSE=1 for more information\r\n\r\n\r\nYou can suppress this exception and fall back to eager by setting:\r\n import torch._dynamo\r\n torch._dynamo.config.suppress_errors = True\r\n\r\n\r\nTo execute this test, run the following from the base repo dir:\r\n python test/dynamo/test_higher_order_ops.py -k test_access_module_attr\r\n\r\nThis message can be suppressed by setting PYTORCH_PRINT_REPRO_ON_FAILURE=0\r\n\r\n----------------------------------------------------------------------\r\nRan 1 test in 0.089s\r\n\r\nFAILED (errors=1)\r\n```\r\n\r\n
\r\n\r\n------\r\n\r\nUse `node_type = tree.__class__`, will get error:\r\n\r\n
\r\nError outputs\r\n\r\n```console\r\n$ python test/dynamo/test_misc.py -k test_tracing_tree_map_only\r\nEinline_call [('call_method ConstDictVariable() __contains__ [GetAttrVariable(ConstDictVariable(), __class__)] {}', 1)]\r\n\r\n======================================================================\r\nERROR: test_tracing_tree_map_only (__main__.MiscTests.test_tracing_tree_map_only)\r\n----------------------------------------------------------------------\r\nTraceback (most recent call last):\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/testing/_internal/common_utils.py\", line 2536, in wrapper\r\n method(*args, **kwargs)\r\n File \"/Users/PanXuehai/Projects/pytorch/test/dynamo/test_misc.py\", line 7174, in test_tracing_tree_map_only\r\n comp_out = torch._dynamo.optimize(counter, nopython=True)(fn)(xsb)\r\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/eval_frame.py\", line 409, in _fn\r\n return fn(*args, **kwargs)\r\n ^^^^^^^^^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/eval_frame.py\", line 570, in catch_errors\r\n return callback(frame, cache_entry, hooks, frame_state)\r\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/convert_frame.py\", line 377, in _convert_frame_assert\r\n return _compile(\r\n ^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/convert_frame.py\", line 594, in _compile\r\n guarded_code = compile_inner(code, one_graph, hooks, transform)\r\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/utils.py\", line 222, in time_wrapper\r\n r = func(*args, **kwargs)\r\n ^^^^^^^^^^^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/convert_frame.py\", line 511, in compile_inner\r\n out_code = transform_code_object(code, transform)\r\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/bytecode_transformation.py\", line 1033, in transform_code_object\r\n transformations(instructions, code_options)\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/convert_frame.py\", line 150, in _fn\r\n return fn(*args, **kwargs)\r\n ^^^^^^^^^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/convert_frame.py\", line 476, in transform\r\n tracer.run()\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 2120, in run\r\n super().run()\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 815, in run\r\n and self.step()\r\n ^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 778, in step\r\n getattr(self, inst.opname)(inst)\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 469, in wrapper\r\n return inner_fn(self, inst)\r\n ^^^^^^^^^^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 1799, in CALL\r\n self.call_function(fn, args, kwargs)\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 650, in call_function\r\n self.push(fn.call_function(self, args, kwargs))\r\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/variables/functions.py\", line 248, in call_function\r\n return super().call_function(tx, args, kwargs)\r\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/variables/functions.py\", line 81, in call_function\r\n return tx.inline_user_function_return(\r\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 685, in inline_user_function_return\r\n return InliningInstructionTranslator.inline_call(self, fn, args, kwargs)\r\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 2252, in inline_call\r\n return cls.inline_call_(parent, func, args, kwargs)\r\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 2384, in inline_call_\r\n tracer.run()\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 815, in run\r\n and self.step()\r\n ^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 778, in step\r\n getattr(self, inst.opname)(inst)\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 469, in wrapper\r\n return inner_fn(self, inst)\r\n ^^^^^^^^^^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 1799, in CALL\r\n self.call_function(fn, args, kwargs)\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 650, in call_function\r\n self.push(fn.call_function(self, args, kwargs))\r\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/variables/functions.py\", line 248, in call_function\r\n return super().call_function(tx, args, kwargs)\r\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/variables/functions.py\", line 81, in call_function\r\n return tx.inline_user_function_return(\r\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 685, in inline_user_function_return\r\n return InliningInstructionTranslator.inline_call(self, fn, args, kwargs)\r\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 2252, in inline_call\r\n return cls.inline_call_(parent, func, args, kwargs)\r\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 2384, in inline_call_\r\n tracer.run()\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 815, in run\r\n and self.step()\r\n ^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 778, in step\r\n getattr(self, inst.opname)(inst)\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 469, in wrapper\r\n return inner_fn(self, inst)\r\n ^^^^^^^^^^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 1799, in CALL\r\n self.call_function(fn, args, kwargs)\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 650, in call_function\r\n self.push(fn.call_function(self, args, kwargs))\r\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/variables/functions.py\", line 248, in call_function\r\n return super().call_function(tx, args, kwargs)\r\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/variables/functions.py\", line 81, in call_function\r\n return tx.inline_user_function_return(\r\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 685, in inline_user_function_return\r\n return InliningInstructionTranslator.inline_call(self, fn, args, kwargs)\r\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 2252, in inline_call\r\n return cls.inline_call_(parent, func, args, kwargs)\r\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 2384, in inline_call_\r\n tracer.run()\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 815, in run\r\n and self.step()\r\n ^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 778, in step\r\n getattr(self, inst.opname)(inst)\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 469, in wrapper\r\n return inner_fn(self, inst)\r\n ^^^^^^^^^^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 1799, in CALL\r\n self.call_function(fn, args, kwargs)\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 650, in call_function\r\n self.push(fn.call_function(self, args, kwargs))\r\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/variables/functions.py\", line 248, in call_function\r\n return super().call_function(tx, args, kwargs)\r\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/variables/functions.py\", line 81, in call_function\r\n return tx.inline_user_function_return(\r\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 685, in inline_user_function_return\r\n return InliningInstructionTranslator.inline_call(self, fn, args, kwargs)\r\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 2252, in inline_call\r\n return cls.inline_call_(parent, func, args, kwargs)\r\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 2384, in inline_call_\r\n tracer.run()\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 815, in run\r\n and self.step()\r\n ^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 778, in step\r\n getattr(self, inst.opname)(inst)\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 469, in wrapper\r\n return inner_fn(self, inst)\r\n ^^^^^^^^^^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 1799, in CALL\r\n self.call_function(fn, args, kwargs)\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 650, in call_function\r\n self.push(fn.call_function(self, args, kwargs))\r\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/variables/functions.py\", line 248, in call_function\r\n return super().call_function(tx, args, kwargs)\r\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/variables/functions.py\", line 81, in call_function\r\n return tx.inline_user_function_return(\r\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 685, in inline_user_function_return\r\n return InliningInstructionTranslator.inline_call(self, fn, args, kwargs)\r\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 2252, in inline_call\r\n return cls.inline_call_(parent, func, args, kwargs)\r\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 2384, in inline_call_\r\n tracer.run()\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 815, in run\r\n and self.step()\r\n ^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 778, in step\r\n getattr(self, inst.opname)(inst)\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 469, in wrapper\r\n return inner_fn(self, inst)\r\n ^^^^^^^^^^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 1799, in CALL\r\n self.call_function(fn, args, kwargs)\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 650, in call_function\r\n self.push(fn.call_function(self, args, kwargs))\r\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/variables/functions.py\", line 248, in call_function\r\n return super().call_function(tx, args, kwargs)\r\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/variables/functions.py\", line 81, in call_function\r\n return tx.inline_user_function_return(\r\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 685, in inline_user_function_return\r\n return InliningInstructionTranslator.inline_call(self, fn, args, kwargs)\r\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 2252, in inline_call\r\n return cls.inline_call_(parent, func, args, kwargs)\r\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 2384, in inline_call_\r\n tracer.run()\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 815, in run\r\n and self.step()\r\n ^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 778, in step\r\n getattr(self, inst.opname)(inst)\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 1639, in CONTAINS_OP\r\n self.push(right.call_method(self, \"__contains__\", [left], {}))\r\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/variables/dicts.py\", line 183, in call_method\r\n return super().call_method(tx, name, args, kwargs)\r\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/variables/base.py\", line 321, in call_method\r\n raise unimplemented(f\"call_method {self} {name} {args} {kwargs}\")\r\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/exc.py\", line 192, in unimplemented\r\n raise Unsupported(msg)\r\ntorch._dynamo.exc.Unsupported: call_method ConstDictVariable() __contains__ [GetAttrVariable(ConstDictVariable(), __class__)] {}\r\n\r\nfrom user code:\r\n File \"/Users/PanXuehai/Projects/pytorch/test/dynamo/test_misc.py\", line 7166, in fn\r\n y = pytree.tree_map_only(torch.Tensor, mapper, xs)\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/utils/_pytree/api/python.py\", line 761, in tree_map_only\r\n return tree_map(map_only(__type_or_types)(func), tree)\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/utils/_pytree/api/python.py\", line 652, in tree_map\r\n flat_args, spec = tree_flatten(tree)\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/utils/_pytree/api/python.py\", line 587, in tree_flatten\r\n spec = _tree_flatten_helper(tree, leaves)\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/utils/_pytree/api/python.py\", line 568, in _tree_flatten_helper\r\n if _is_leaf(tree):\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/utils/_pytree/api/python.py\", line 516, in _is_leaf\r\n return _get_node_type(tree) not in SUPPORTED_NODES\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/utils/_pytree/api/python.py\", line 506, in _get_node_type\r\n if node_type not in SUPPORTED_NODES:\r\n\r\nSet TORCH_LOGS=\"+dynamo\" and TORCHDYNAMO_VERBOSE=1 for more information\r\n\r\n\r\nYou can suppress this exception and fall back to eager by setting:\r\n import torch._dynamo\r\n torch._dynamo.config.suppress_errors = True\r\n\r\n\r\nTo execute this test, run the following from the base repo dir:\r\n python test/dynamo/test_misc.py -k test_tracing_tree_map_only\r\n\r\nThis message can be suppressed by setting PYTORCH_PRINT_REPRO_ON_FAILURE=0\r\n\r\n----------------------------------------------------------------------\r\nRan 1 test in 0.447s\r\n\r\nFAILED (errors=1)\r\n```\r\n\r\n
", + "pr_file_module": null + }, + { + "comment_id": "1386923960", + "repo_full_name": "pytorch/pytorch", + "pr_number": 113258, + "pr_file": "torch/utils/_pytree/api/python.py", + "discussion_id": "1386767400", + "commented_code": "@@ -465,12 +491,23 @@ def _deque_unflatten(values: Iterable[Any], context: Context) -> Deque[Any]:\n _deque_unflatten,\n serialized_type_name=\"collections.deque\",\n )\n+_private_register_pytree_node(\n+ structseq,\n+ _structseq_flatten,\n+ _structseq_unflatten,\n+ serialized_type_name=\"structseq\",\n+ to_dumpable_context=_structseq_serialize,\n+ from_dumpable_context=_structseq_deserialize,\n+)\n \n \n def _get_node_type(tree: Any) -> Any:\n- node_type = type(tree)\n- if node_type not in SUPPORTED_NODES and is_namedtuple_class(node_type):\n- return namedtuple\n+ node_type = tree.__class__ # Dynamo complains about using `type(tree)`", + "comment_created_at": "2023-11-08T16:47:26+00:00", + "comment_author": "XuehaiPan", + "comment_body": "I found that this error was caused by Dynamo not yet implementing `cls.__base__`.", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/pytorch-handle-non-deterministic-collections.md b/_reviewers/pytorch-handle-non-deterministic-collections.md index d6d1889..1ca6804 100644 --- a/_reviewers/pytorch-handle-non-deterministic-collections.md +++ b/_reviewers/pytorch-handle-non-deterministic-collections.md @@ -45,249 +45,3 @@ first_element = next(iter(my_set)) # Order-dependent! ``` Remember that hash values for strings and other objects vary across interpreter runs due to Python's hash randomization security feature. Always ensure algorithms that interact with hash-based collections handle this non-determinism appropriately. - - -[ - { - "discussion_id": "2106228684", - "pr_number": 153150, - "pr_file": "torch/_dynamo/utils.py", - "created_at": "2025-05-25T15:08:17+00:00", - "commented_code": "return next(itertools.islice(dict_class.keys(d), n, n + 1))\n\n\ndef set_getitem(s, n):\n # Mimic set.__getitem__ by converting the set to a dict to have a partial\n # ordering.\n return list(dict.fromkeys(s))[n]", - "repo_full_name": "pytorch/pytorch", - "discussion_comments": [ - { - "comment_id": "2106228684", - "repo_full_name": "pytorch/pytorch", - "pr_number": 153150, - "pr_file": "torch/_dynamo/utils.py", - "discussion_id": "2106228684", - "commented_code": "@@ -2525,6 +2525,12 @@ def dict_keys_getitem(d, n):\n return next(itertools.islice(dict_class.keys(d), n, n + 1))\n \n \n+def set_getitem(s, n):\n+ # Mimic set.__getitem__ by converting the set to a dict to have a partial\n+ # ordering.\n+ return list(dict.fromkeys(s))[n]", - "comment_created_at": "2025-05-25T15:08:17+00:00", - "comment_author": "Skylion007", - "comment_body": "I wonder if itertools islice is better here?", - "pr_file_module": null - }, - { - "comment_id": "2106293442", - "repo_full_name": "pytorch/pytorch", - "pr_number": 153150, - "pr_file": "torch/_dynamo/utils.py", - "discussion_id": "2106228684", - "commented_code": "@@ -2525,6 +2525,12 @@ def dict_keys_getitem(d, n):\n return next(itertools.islice(dict_class.keys(d), n, n + 1))\n \n \n+def set_getitem(s, n):\n+ # Mimic set.__getitem__ by converting the set to a dict to have a partial\n+ # ordering.\n+ return list(dict.fromkeys(s))[n]", - "comment_created_at": "2025-05-25T19:58:51+00:00", - "comment_author": "guilhermeleobas", - "comment_body": "I guess this would only work if we sort the set first:\r\n\r\n```bash\r\n$ PYTHONHASHSEED=1 python -c 'import itertools; s = set(\"abc\"); print(list(itertools.islice(s, 0, None)))'\r\n['b', 'a', 'c']\r\n\r\n$ PYTHONHASHSEED=2 python -c 'import itertools; s = set(\"abc\"); print(list(itertools.islice(s, 0, None)))'\r\n['c', 'b', 'a']\r\n```\r\n\r\n```bash\r\n$ PYTHONHASHSEED=1 python -c 'import itertools; s = set(\"abc\"); print(list(itertools.islice(sorted(s), 0, None)))'\r\n['a', 'b', 'c']\r\n\r\n$ PYTHONHASHSEED=2 python -c 'import itertools; s = set(\"abc\"); print(list(itertools.islice(sorted(s), 0, None)))'\r\n['a', 'b', 'c']\r\n```", - "pr_file_module": null - }, - { - "comment_id": "2109842371", - "repo_full_name": "pytorch/pytorch", - "pr_number": 153150, - "pr_file": "torch/_dynamo/utils.py", - "discussion_id": "2106228684", - "commented_code": "@@ -2525,6 +2525,12 @@ def dict_keys_getitem(d, n):\n return next(itertools.islice(dict_class.keys(d), n, n + 1))\n \n \n+def set_getitem(s, n):\n+ # Mimic set.__getitem__ by converting the set to a dict to have a partial\n+ # ordering.\n+ return list(dict.fromkeys(s))[n]", - "comment_created_at": "2025-05-27T18:07:15+00:00", - "comment_author": "williamwen42", - "comment_body": "Assuming here that `dict.fromkeys` will return the same order every time?", - "pr_file_module": null - }, - { - "comment_id": "2110495363", - "repo_full_name": "pytorch/pytorch", - "pr_number": 153150, - "pr_file": "torch/_dynamo/utils.py", - "discussion_id": "2106228684", - "commented_code": "@@ -2525,6 +2525,12 @@ def dict_keys_getitem(d, n):\n return next(itertools.islice(dict_class.keys(d), n, n + 1))\n \n \n+def set_getitem(s, n):\n+ # Mimic set.__getitem__ by converting the set to a dict to have a partial\n+ # ordering.\n+ return list(dict.fromkeys(s))[n]", - "comment_created_at": "2025-05-27T23:20:13+00:00", - "comment_author": "guilhermeleobas", - "comment_body": "> Assuming here that dict.fromkeys will return the same order every time?\r\n\r\nYes! And I just realized that this is not guaranteed as well. I need to think if there's any alternative that we can use that gives a predictable ordering and does not depend on hash values.\r\n", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2116629641", - "pr_number": 153150, - "pr_file": "torch/_dynamo/utils.py", - "created_at": "2025-05-30T21:00:17+00:00", - "commented_code": "return next(itertools.islice(dict_class.keys(d), n, n + 1))\n\n\ndef set_getitem(s, n):\n # Mimic set.__getitem__ by sorting the set using the hash\n return sorted(s, key=hash)[n]", - "repo_full_name": "pytorch/pytorch", - "discussion_comments": [ - { - "comment_id": "2116629641", - "repo_full_name": "pytorch/pytorch", - "pr_number": 153150, - "pr_file": "torch/_dynamo/utils.py", - "discussion_id": "2116629641", - "commented_code": "@@ -2530,6 +2530,11 @@ def dict_keys_getitem(d, n):\n return next(itertools.islice(dict_class.keys(d), n, n + 1))\n \n \n+def set_getitem(s, n):\n+ # Mimic set.__getitem__ by sorting the set using the hash\n+ return sorted(s, key=hash)[n]", - "comment_created_at": "2025-05-30T21:00:17+00:00", - "comment_author": "williamwen42", - "comment_body": "How will we deal with hash collisions? (What if the object has a custom hash that always returns 0?)", - "pr_file_module": null - }, - { - "comment_id": "2122042749", - "repo_full_name": "pytorch/pytorch", - "pr_number": 153150, - "pr_file": "torch/_dynamo/utils.py", - "discussion_id": "2116629641", - "commented_code": "@@ -2530,6 +2530,11 @@ def dict_keys_getitem(d, n):\n return next(itertools.islice(dict_class.keys(d), n, n + 1))\n \n \n+def set_getitem(s, n):\n+ # Mimic set.__getitem__ by sorting the set using the hash\n+ return sorted(s, key=hash)[n]", - "comment_created_at": "2025-06-02T20:00:34+00:00", - "comment_author": "zou3519", - "comment_body": "Does this lead to anything worse than a recompile in the current PR?", - "pr_file_module": null - }, - { - "comment_id": "2122108696", - "repo_full_name": "pytorch/pytorch", - "pr_number": 153150, - "pr_file": "torch/_dynamo/utils.py", - "discussion_id": "2116629641", - "commented_code": "@@ -2530,6 +2530,11 @@ def dict_keys_getitem(d, n):\n return next(itertools.islice(dict_class.keys(d), n, n + 1))\n \n \n+def set_getitem(s, n):\n+ # Mimic set.__getitem__ by sorting the set using the hash\n+ return sorted(s, key=hash)[n]", - "comment_created_at": "2025-06-02T20:37:57+00:00", - "comment_author": "guilhermeleobas", - "comment_body": "I don't think so. I couldn't actually create a test case for this because we don't support set of objects with custom hash functions:\r\nhttps://github.com/pytorch/pytorch/blob/48807d568ec5d4ad654fff27aaa1cec3b715c473/torch/_dynamo/variables/dicts.py#L101-L106", - "pr_file_module": null - }, - { - "comment_id": "2122437629", - "repo_full_name": "pytorch/pytorch", - "pr_number": 153150, - "pr_file": "torch/_dynamo/utils.py", - "discussion_id": "2116629641", - "commented_code": "@@ -2530,6 +2530,11 @@ def dict_keys_getitem(d, n):\n return next(itertools.islice(dict_class.keys(d), n, n + 1))\n \n \n+def set_getitem(s, n):\n+ # Mimic set.__getitem__ by sorting the set using the hash\n+ return sorted(s, key=hash)[n]", - "comment_created_at": "2025-06-03T01:41:11+00:00", - "comment_author": "guilhermeleobas", - "comment_body": "If recompilation is the worst thing we can have, then one can just do `list(set_obj)[index]` instead. It will be faster than sorting the elements by the hash or calling `list(dict.fromkeys(set_object))[index]`", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2140304805", - "pr_number": 153150, - "pr_file": "torch/_dynamo/variables/builder.py", - "created_at": "2025-06-11T14:11:47+00:00", - "commented_code": "var = TorchFunctionModeVariable(value, source=self.source)\n self.tx.output.side_effects.track_object_existing(value, var)\n return var\n elif istype(value, set):\n self.install_guards(GuardBuilder.TYPE_MATCH)\n self.install_guards(GuardBuilder.SEQUENCE_LENGTH)\n\n # The dictionary gives a ordering for the set items\n L = list(value)", - "repo_full_name": "pytorch/pytorch", - "discussion_comments": [ - { - "comment_id": "2140304805", - "repo_full_name": "pytorch/pytorch", - "pr_number": 153150, - "pr_file": "torch/_dynamo/variables/builder.py", - "discussion_id": "2140304805", - "commented_code": "@@ -755,6 +756,18 @@ def build_key_value(i, k, v):\n var = TorchFunctionModeVariable(value, source=self.source)\n self.tx.output.side_effects.track_object_existing(value, var)\n return var\n+ elif istype(value, set):\n+ self.install_guards(GuardBuilder.TYPE_MATCH)\n+ self.install_guards(GuardBuilder.SEQUENCE_LENGTH)\n+\n+ # The dictionary gives a ordering for the set items\n+ L = list(value)", - "comment_created_at": "2025-06-11T14:11:47+00:00", - "comment_author": "zou3519", - "comment_body": "Can you leave a comment here spifying:\r\n1) set ordering in cpython is based on hash value order\r\n2) the order being incorrect would just lead to a recompilation (no silent incorrectness)", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2140380541", - "pr_number": 153150, - "pr_file": "torch/_dynamo/guards.py", - "created_at": "2025-06-11T14:39:31+00:00", - "commented_code": "example_value=example_value,\n guard_manager_enum=guard_manager_enum,\n )\n elif istype(source, SetGetItemSource):", - "repo_full_name": "pytorch/pytorch", - "discussion_comments": [ - { - "comment_id": "2140380541", - "repo_full_name": "pytorch/pytorch", - "pr_number": 153150, - "pr_file": "torch/_dynamo/guards.py", - "discussion_id": "2140380541", - "commented_code": "@@ -1284,6 +1290,14 @@ def get_guard_manager_from_source(self, source):\n example_value=example_value,\n guard_manager_enum=guard_manager_enum,\n )\n+ elif istype(source, SetGetItemSource):", - "comment_created_at": "2025-06-11T14:39:31+00:00", - "comment_author": "zou3519", - "comment_body": "One interesting thing I learned was that the hash of a set is not deterministic across interpreter runs. For example. try hash(\"a\") in different pythons. This means that this source isn't serializable (and precompile should not try to serialize it! cc @zhxchen17)\r\n\r\nWe should add a comment about this somewhere.", - "pr_file_module": null - }, - { - "comment_id": "2140403574", - "repo_full_name": "pytorch/pytorch", - "pr_number": 153150, - "pr_file": "torch/_dynamo/guards.py", - "discussion_id": "2140380541", - "commented_code": "@@ -1284,6 +1290,14 @@ def get_guard_manager_from_source(self, source):\n example_value=example_value,\n guard_manager_enum=guard_manager_enum,\n )\n+ elif istype(source, SetGetItemSource):", - "comment_created_at": "2025-06-11T14:49:13+00:00", - "comment_author": "zou3519", - "comment_body": "Maybe rename it NonSerializableSetGetItemSource", - "pr_file_module": null - }, - { - "comment_id": "2141162550", - "repo_full_name": "pytorch/pytorch", - "pr_number": 153150, - "pr_file": "torch/_dynamo/guards.py", - "discussion_id": "2140380541", - "commented_code": "@@ -1284,6 +1290,14 @@ def get_guard_manager_from_source(self, source):\n example_value=example_value,\n guard_manager_enum=guard_manager_enum,\n )\n+ elif istype(source, SetGetItemSource):", - "comment_created_at": "2025-06-11T21:41:44+00:00", - "comment_author": "guilhermeleobas", - "comment_body": "> One interesting thing I learned was that the hash of a set is not deterministic across interpreter runs. \r\n\r\nYou mean the set getitem, right? Because sets are non-hashable.", - "pr_file_module": null - }, - { - "comment_id": "2141168033", - "repo_full_name": "pytorch/pytorch", - "pr_number": 153150, - "pr_file": "torch/_dynamo/guards.py", - "discussion_id": "2140380541", - "commented_code": "@@ -1284,6 +1290,14 @@ def get_guard_manager_from_source(self, source):\n example_value=example_value,\n guard_manager_enum=guard_manager_enum,\n )\n+ elif istype(source, SetGetItemSource):", - "comment_created_at": "2025-06-11T21:46:49+00:00", - "comment_author": "zou3519", - "comment_body": "Sorry, what I meant was \"was that the hash of a string is not deterministic\"", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1386767400", - "pr_number": 113258, - "pr_file": "torch/utils/_pytree/api/python.py", - "created_at": "2023-11-08T15:03:27+00:00", - "commented_code": "_deque_unflatten,\n serialized_type_name=\"collections.deque\",\n)\n_private_register_pytree_node(\n structseq,\n _structseq_flatten,\n _structseq_unflatten,\n serialized_type_name=\"structseq\",\n to_dumpable_context=_structseq_serialize,\n from_dumpable_context=_structseq_deserialize,\n)\n\n\ndef _get_node_type(tree: Any) -> Any:\n node_type = type(tree)\n if node_type not in SUPPORTED_NODES and is_namedtuple_class(node_type):\n return namedtuple\n node_type = tree.__class__ # Dynamo complains about using `type(tree)`", - "repo_full_name": "pytorch/pytorch", - "discussion_comments": [ - { - "comment_id": "1386767400", - "repo_full_name": "pytorch/pytorch", - "pr_number": 113258, - "pr_file": "torch/utils/_pytree/api/python.py", - "discussion_id": "1386767400", - "commented_code": "@@ -465,12 +491,23 @@ def _deque_unflatten(values: Iterable[Any], context: Context) -> Deque[Any]:\n _deque_unflatten,\n serialized_type_name=\"collections.deque\",\n )\n+_private_register_pytree_node(\n+ structseq,\n+ _structseq_flatten,\n+ _structseq_unflatten,\n+ serialized_type_name=\"structseq\",\n+ to_dumpable_context=_structseq_serialize,\n+ from_dumpable_context=_structseq_deserialize,\n+)\n \n \n def _get_node_type(tree: Any) -> Any:\n- node_type = type(tree)\n- if node_type not in SUPPORTED_NODES and is_namedtuple_class(node_type):\n- return namedtuple\n+ node_type = tree.__class__ # Dynamo complains about using `type(tree)`", - "comment_created_at": "2023-11-08T15:03:27+00:00", - "comment_author": "XuehaiPan", - "comment_body": "I'm a bit confused about this. The previous commit uses `node_type = type(tree)`. It works fine and passes all CI tests. But Dynamo asks for changing it to `node_type = tree.__class__` in the CI test in this PR.\r\n\r\n```\r\n[2023-11-08 23:04:51,484] [0/0] torch._dynamo.variables.higher_order_ops: [WARNING] speculate_subgraph: while introspecting wrap, we were unable to trace function `NestedUserFunctionVariable` into a single graph. This means that Dynamo was unable to prove safety for this API and will fall back to eager-mode PyTorch, which could lead to a slowdown.\r\n[2023-11-08 23:04:51,484] [0/0] torch._dynamo.variables.higher_order_ops: [ERROR] Can't access members of type(obj) for a generated custom object. Please use __class__ instead\r\n[2023-11-08 23:04:51,484] [0/0] torch._dynamo.variables.higher_order_ops: [ERROR] For more information about this error, see: https://pytorch.org/docs/main/generated/exportdb/index.html#type-reflection-method\r\nEinline_call [(\"Can't access members of type(obj) for a generated custom object. Please use __class__ instead\\nFor more information about this error, see: https://pytorch.org/docs/main/generated/exportdb/index.html#type-reflection-method\", 1)]\r\nunimplemented [(\"speculate_subgraph: while introspecting wrap, we were unable to trace function `NestedUserFunctionVariable` into a single graph. This means that Dynamo was unable to prove safety for this API and will fall back to eager-mode PyTorch, which could lead to a slowdown. Scroll up for the stack trace of the initial exception. The reason was: Can't access members of type(obj) for a generated custom object. Please use __class__ instead\\nFor more information about this error, see: https://pytorch.org/docs/main/generated/exportdb/index.html#type-reflection-method\", 1)]\r\n```\r\n\r\nShould we change all `type(obj)` to `obj.__class__` in the pytree utilities?", - "pr_file_module": null - }, - { - "comment_id": "1386815594", - "repo_full_name": "pytorch/pytorch", - "pr_number": 113258, - "pr_file": "torch/utils/_pytree/api/python.py", - "discussion_id": "1386767400", - "commented_code": "@@ -465,12 +491,23 @@ def _deque_unflatten(values: Iterable[Any], context: Context) -> Deque[Any]:\n _deque_unflatten,\n serialized_type_name=\"collections.deque\",\n )\n+_private_register_pytree_node(\n+ structseq,\n+ _structseq_flatten,\n+ _structseq_unflatten,\n+ serialized_type_name=\"structseq\",\n+ to_dumpable_context=_structseq_serialize,\n+ from_dumpable_context=_structseq_deserialize,\n+)\n \n \n def _get_node_type(tree: Any) -> Any:\n- node_type = type(tree)\n- if node_type not in SUPPORTED_NODES and is_namedtuple_class(node_type):\n- return namedtuple\n+ node_type = tree.__class__ # Dynamo complains about using `type(tree)`", - "comment_created_at": "2023-11-08T15:35:09+00:00", - "comment_author": "XuehaiPan", - "comment_body": "Now the new CI test errors ask to change back to `node_type = type(tree)`.\r\n\r\nUse `node_type = type(tree)`, will get error:\r\n\r\n
\r\nError outputs\r\n\r\n```console\r\n$ python test/dynamo/test_higher_order_ops.py -k test_access_module_attr\r\n[2023-11-08 23:32:18,089] [0/0] torch._dynamo.variables.higher_order_ops: [WARNING] speculate_subgraph: while introspecting wrap, we were unable to trace function `NestedUserFunctionVariable` into a single graph. This means that Dynamo was unable to prove safety for this API and will fall back to eager-mode PyTorch, which could lead to a slowdown.\r\n[2023-11-08 23:32:18,089] [0/0] torch._dynamo.variables.higher_order_ops: [ERROR] Can't access members of type(obj) for a generated custom object. Please use __class__ instead\r\n[2023-11-08 23:32:18,089] [0/0] torch._dynamo.variables.higher_order_ops: [ERROR] For more information about this error, see: https://pytorch.org/docs/main/generated/exportdb/index.html#type-reflection-method\r\nEinline_call [(\"Can't access members of type(obj) for a generated custom object. Please use __class__ instead\\nFor more information about this error, see: https://pytorch.org/docs/main/generated/exportdb/index.html#type-reflection-method\", 1)]\r\nunimplemented [(\"speculate_subgraph: while introspecting wrap, we were unable to trace function `NestedUserFunctionVariable` into a single graph. This means that Dynamo was unable to prove safety for this API and will fall back to eager-mode PyTorch, which could lead to a slowdown. Scroll up for the stack trace of the initial exception. The reason was: Can't access members of type(obj) for a generated custom object. Please use __class__ instead\\nFor more information about this error, see: https://pytorch.org/docs/main/generated/exportdb/index.html#type-reflection-method\", 1)]\r\n\r\n======================================================================\r\nERROR: test_access_module_attr (__main__.HigherOrderOpTests.test_access_module_attr)\r\n----------------------------------------------------------------------\r\nTraceback (most recent call last):\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/variables/higher_order_ops.py\", line 229, in speculate_subgraph\r\n tree_flatten_output = tree_flatten.call_function(tx, [output], {})\r\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/variables/functions.py\", line 248, in call_function\r\n return super().call_function(tx, args, kwargs)\r\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/variables/functions.py\", line 81, in call_function\r\n return tx.inline_user_function_return(\r\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 685, in inline_user_function_return\r\n return InliningInstructionTranslator.inline_call(self, fn, args, kwargs)\r\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 2252, in inline_call\r\n return cls.inline_call_(parent, func, args, kwargs)\r\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 2384, in inline_call_\r\n tracer.run()\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 815, in run\r\n and self.step()\r\n ^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 778, in step\r\n getattr(self, inst.opname)(inst)\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 469, in wrapper\r\n return inner_fn(self, inst)\r\n ^^^^^^^^^^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 1799, in CALL\r\n self.call_function(fn, args, kwargs)\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 650, in call_function\r\n self.push(fn.call_function(self, args, kwargs))\r\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/variables/functions.py\", line 248, in call_function\r\n return super().call_function(tx, args, kwargs)\r\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/variables/functions.py\", line 81, in call_function\r\n return tx.inline_user_function_return(\r\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 685, in inline_user_function_return\r\n return InliningInstructionTranslator.inline_call(self, fn, args, kwargs)\r\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 2252, in inline_call\r\n return cls.inline_call_(parent, func, args, kwargs)\r\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 2384, in inline_call_\r\n tracer.run()\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 815, in run\r\n and self.step()\r\n ^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 778, in step\r\n getattr(self, inst.opname)(inst)\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 469, in wrapper\r\n return inner_fn(self, inst)\r\n ^^^^^^^^^^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 1799, in CALL\r\n self.call_function(fn, args, kwargs)\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 650, in call_function\r\n self.push(fn.call_function(self, args, kwargs))\r\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/variables/functions.py\", line 248, in call_function\r\n return super().call_function(tx, args, kwargs)\r\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/variables/functions.py\", line 81, in call_function\r\n return tx.inline_user_function_return(\r\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 685, in inline_user_function_return\r\n return InliningInstructionTranslator.inline_call(self, fn, args, kwargs)\r\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 2252, in inline_call\r\n return cls.inline_call_(parent, func, args, kwargs)\r\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 2384, in inline_call_\r\n tracer.run()\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 815, in run\r\n and self.step()\r\n ^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 778, in step\r\n getattr(self, inst.opname)(inst)\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 469, in wrapper\r\n return inner_fn(self, inst)\r\n ^^^^^^^^^^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 1799, in CALL\r\n self.call_function(fn, args, kwargs)\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 650, in call_function\r\n self.push(fn.call_function(self, args, kwargs))\r\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/variables/functions.py\", line 248, in call_function\r\n return super().call_function(tx, args, kwargs)\r\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/variables/functions.py\", line 81, in call_function\r\n return tx.inline_user_function_return(\r\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 685, in inline_user_function_return\r\n return InliningInstructionTranslator.inline_call(self, fn, args, kwargs)\r\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 2252, in inline_call\r\n return cls.inline_call_(parent, func, args, kwargs)\r\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 2384, in inline_call_\r\n tracer.run()\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 815, in run\r\n and self.step()\r\n ^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 778, in step\r\n getattr(self, inst.opname)(inst)\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 469, in wrapper\r\n return inner_fn(self, inst)\r\n ^^^^^^^^^^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 1799, in CALL\r\n self.call_function(fn, args, kwargs)\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 650, in call_function\r\n self.push(fn.call_function(self, args, kwargs))\r\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/variables/functions.py\", line 248, in call_function\r\n return super().call_function(tx, args, kwargs)\r\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/variables/functions.py\", line 81, in call_function\r\n return tx.inline_user_function_return(\r\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 685, in inline_user_function_return\r\n return InliningInstructionTranslator.inline_call(self, fn, args, kwargs)\r\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 2252, in inline_call\r\n return cls.inline_call_(parent, func, args, kwargs)\r\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 2384, in inline_call_\r\n tracer.run()\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 815, in run\r\n and self.step()\r\n ^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 778, in step\r\n getattr(self, inst.opname)(inst)\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 1298, in LOAD_ATTR\r\n result = BuiltinVariable(getattr).call_function(\r\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/variables/builtin.py\", line 614, in call_function\r\n result = handler(tx, *args, **kwargs)\r\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/variables/builtin.py\", line 1151, in call_getattr\r\n return obj.var_getattr(tx, name).clone(source=source)\r\n ^^^^^^^^^^^^^^^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/variables/base.py\", line 262, in var_getattr\r\n value = self.const_getattr(tx, name)\r\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/variables/constant.py\", line 122, in const_getattr\r\n raise UserError(\r\ntorch._dynamo.exc.UserError: Can't access members of type(obj) for a generated custom object. Please use __class__ instead\r\nFor more information about this error, see: https://pytorch.org/docs/main/generated/exportdb/index.html#type-reflection-method\r\n\r\nThe above exception was the direct cause of the following exception:\r\n\r\nTraceback (most recent call last):\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/testing/_internal/common_utils.py\", line 2536, in wrapper\r\n method(*args, **kwargs)\r\n File \"/Users/PanXuehai/Projects/pytorch/test/dynamo/test_higher_order_ops.py\", line 1830, in test_access_module_attr\r\n result = f(x)\r\n ^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/eval_frame.py\", line 409, in _fn\r\n return fn(*args, **kwargs)\r\n ^^^^^^^^^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/eval_frame.py\", line 570, in catch_errors\r\n return callback(frame, cache_entry, hooks, frame_state)\r\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/convert_frame.py\", line 377, in _convert_frame_assert\r\n return _compile(\r\n ^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/convert_frame.py\", line 594, in _compile\r\n guarded_code = compile_inner(code, one_graph, hooks, transform)\r\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/utils.py\", line 222, in time_wrapper\r\n r = func(*args, **kwargs)\r\n ^^^^^^^^^^^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/convert_frame.py\", line 511, in compile_inner\r\n out_code = transform_code_object(code, transform)\r\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/bytecode_transformation.py\", line 1033, in transform_code_object\r\n transformations(instructions, code_options)\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/convert_frame.py\", line 150, in _fn\r\n return fn(*args, **kwargs)\r\n ^^^^^^^^^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/convert_frame.py\", line 476, in transform\r\n tracer.run()\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 2120, in run\r\n super().run()\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 815, in run\r\n and self.step()\r\n ^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 778, in step\r\n getattr(self, inst.opname)(inst)\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 469, in wrapper\r\n return inner_fn(self, inst)\r\n ^^^^^^^^^^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 1799, in CALL\r\n self.call_function(fn, args, kwargs)\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 650, in call_function\r\n self.push(fn.call_function(self, args, kwargs))\r\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/variables/higher_order_ops.py\", line 1227, in call_function\r\n p_args, p_kwargs, example_value, treespec = self.create_wrapped_node(\r\n ^^^^^^^^^^^^^^^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/variables/higher_order_ops.py\", line 1186, in create_wrapped_node\r\n ) = speculate_subgraph(\r\n ^^^^^^^^^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/variables/higher_order_ops.py\", line 288, in speculate_subgraph\r\n raise Unsupported(\r\ntorch._dynamo.exc.Unsupported: speculate_subgraph: while introspecting wrap, we were unable to trace function `NestedUserFunctionVariable` into a single graph. This means that Dynamo was unable to prove safety for this API and will fall back to eager-mode PyTorch, which could lead to a slowdown. Scroll up for the stack trace of the initial exception. The reason was: Can't access members of type(obj) for a generated custom object. Please use __class__ instead\r\nFor more information about this error, see: https://pytorch.org/docs/main/generated/exportdb/index.html#type-reflection-method\r\n\r\nfrom user code:\r\n File \"/Users/PanXuehai/Projects/pytorch/test/dynamo/test_higher_order_ops.py\", line 1828, in f\r\n return wrap(lambda y: y - mod.bias, y)\r\n\r\nSet TORCH_LOGS=\"+dynamo\" and TORCHDYNAMO_VERBOSE=1 for more information\r\n\r\n\r\nYou can suppress this exception and fall back to eager by setting:\r\n import torch._dynamo\r\n torch._dynamo.config.suppress_errors = True\r\n\r\n\r\nTo execute this test, run the following from the base repo dir:\r\n python test/dynamo/test_higher_order_ops.py -k test_access_module_attr\r\n\r\nThis message can be suppressed by setting PYTORCH_PRINT_REPRO_ON_FAILURE=0\r\n\r\n----------------------------------------------------------------------\r\nRan 1 test in 0.089s\r\n\r\nFAILED (errors=1)\r\n```\r\n\r\n
\r\n\r\n------\r\n\r\nUse `node_type = tree.__class__`, will get error:\r\n\r\n
\r\nError outputs\r\n\r\n```console\r\n$ python test/dynamo/test_misc.py -k test_tracing_tree_map_only\r\nEinline_call [('call_method ConstDictVariable() __contains__ [GetAttrVariable(ConstDictVariable(), __class__)] {}', 1)]\r\n\r\n======================================================================\r\nERROR: test_tracing_tree_map_only (__main__.MiscTests.test_tracing_tree_map_only)\r\n----------------------------------------------------------------------\r\nTraceback (most recent call last):\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/testing/_internal/common_utils.py\", line 2536, in wrapper\r\n method(*args, **kwargs)\r\n File \"/Users/PanXuehai/Projects/pytorch/test/dynamo/test_misc.py\", line 7174, in test_tracing_tree_map_only\r\n comp_out = torch._dynamo.optimize(counter, nopython=True)(fn)(xsb)\r\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/eval_frame.py\", line 409, in _fn\r\n return fn(*args, **kwargs)\r\n ^^^^^^^^^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/eval_frame.py\", line 570, in catch_errors\r\n return callback(frame, cache_entry, hooks, frame_state)\r\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/convert_frame.py\", line 377, in _convert_frame_assert\r\n return _compile(\r\n ^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/convert_frame.py\", line 594, in _compile\r\n guarded_code = compile_inner(code, one_graph, hooks, transform)\r\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/utils.py\", line 222, in time_wrapper\r\n r = func(*args, **kwargs)\r\n ^^^^^^^^^^^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/convert_frame.py\", line 511, in compile_inner\r\n out_code = transform_code_object(code, transform)\r\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/bytecode_transformation.py\", line 1033, in transform_code_object\r\n transformations(instructions, code_options)\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/convert_frame.py\", line 150, in _fn\r\n return fn(*args, **kwargs)\r\n ^^^^^^^^^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/convert_frame.py\", line 476, in transform\r\n tracer.run()\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 2120, in run\r\n super().run()\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 815, in run\r\n and self.step()\r\n ^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 778, in step\r\n getattr(self, inst.opname)(inst)\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 469, in wrapper\r\n return inner_fn(self, inst)\r\n ^^^^^^^^^^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 1799, in CALL\r\n self.call_function(fn, args, kwargs)\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 650, in call_function\r\n self.push(fn.call_function(self, args, kwargs))\r\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/variables/functions.py\", line 248, in call_function\r\n return super().call_function(tx, args, kwargs)\r\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/variables/functions.py\", line 81, in call_function\r\n return tx.inline_user_function_return(\r\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 685, in inline_user_function_return\r\n return InliningInstructionTranslator.inline_call(self, fn, args, kwargs)\r\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 2252, in inline_call\r\n return cls.inline_call_(parent, func, args, kwargs)\r\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 2384, in inline_call_\r\n tracer.run()\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 815, in run\r\n and self.step()\r\n ^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 778, in step\r\n getattr(self, inst.opname)(inst)\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 469, in wrapper\r\n return inner_fn(self, inst)\r\n ^^^^^^^^^^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 1799, in CALL\r\n self.call_function(fn, args, kwargs)\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 650, in call_function\r\n self.push(fn.call_function(self, args, kwargs))\r\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/variables/functions.py\", line 248, in call_function\r\n return super().call_function(tx, args, kwargs)\r\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/variables/functions.py\", line 81, in call_function\r\n return tx.inline_user_function_return(\r\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 685, in inline_user_function_return\r\n return InliningInstructionTranslator.inline_call(self, fn, args, kwargs)\r\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 2252, in inline_call\r\n return cls.inline_call_(parent, func, args, kwargs)\r\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 2384, in inline_call_\r\n tracer.run()\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 815, in run\r\n and self.step()\r\n ^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 778, in step\r\n getattr(self, inst.opname)(inst)\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 469, in wrapper\r\n return inner_fn(self, inst)\r\n ^^^^^^^^^^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 1799, in CALL\r\n self.call_function(fn, args, kwargs)\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 650, in call_function\r\n self.push(fn.call_function(self, args, kwargs))\r\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/variables/functions.py\", line 248, in call_function\r\n return super().call_function(tx, args, kwargs)\r\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/variables/functions.py\", line 81, in call_function\r\n return tx.inline_user_function_return(\r\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 685, in inline_user_function_return\r\n return InliningInstructionTranslator.inline_call(self, fn, args, kwargs)\r\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 2252, in inline_call\r\n return cls.inline_call_(parent, func, args, kwargs)\r\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 2384, in inline_call_\r\n tracer.run()\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 815, in run\r\n and self.step()\r\n ^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 778, in step\r\n getattr(self, inst.opname)(inst)\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 469, in wrapper\r\n return inner_fn(self, inst)\r\n ^^^^^^^^^^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 1799, in CALL\r\n self.call_function(fn, args, kwargs)\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 650, in call_function\r\n self.push(fn.call_function(self, args, kwargs))\r\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/variables/functions.py\", line 248, in call_function\r\n return super().call_function(tx, args, kwargs)\r\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/variables/functions.py\", line 81, in call_function\r\n return tx.inline_user_function_return(\r\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 685, in inline_user_function_return\r\n return InliningInstructionTranslator.inline_call(self, fn, args, kwargs)\r\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 2252, in inline_call\r\n return cls.inline_call_(parent, func, args, kwargs)\r\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 2384, in inline_call_\r\n tracer.run()\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 815, in run\r\n and self.step()\r\n ^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 778, in step\r\n getattr(self, inst.opname)(inst)\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 469, in wrapper\r\n return inner_fn(self, inst)\r\n ^^^^^^^^^^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 1799, in CALL\r\n self.call_function(fn, args, kwargs)\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 650, in call_function\r\n self.push(fn.call_function(self, args, kwargs))\r\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/variables/functions.py\", line 248, in call_function\r\n return super().call_function(tx, args, kwargs)\r\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/variables/functions.py\", line 81, in call_function\r\n return tx.inline_user_function_return(\r\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 685, in inline_user_function_return\r\n return InliningInstructionTranslator.inline_call(self, fn, args, kwargs)\r\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 2252, in inline_call\r\n return cls.inline_call_(parent, func, args, kwargs)\r\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 2384, in inline_call_\r\n tracer.run()\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 815, in run\r\n and self.step()\r\n ^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 778, in step\r\n getattr(self, inst.opname)(inst)\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 469, in wrapper\r\n return inner_fn(self, inst)\r\n ^^^^^^^^^^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 1799, in CALL\r\n self.call_function(fn, args, kwargs)\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 650, in call_function\r\n self.push(fn.call_function(self, args, kwargs))\r\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/variables/functions.py\", line 248, in call_function\r\n return super().call_function(tx, args, kwargs)\r\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/variables/functions.py\", line 81, in call_function\r\n return tx.inline_user_function_return(\r\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 685, in inline_user_function_return\r\n return InliningInstructionTranslator.inline_call(self, fn, args, kwargs)\r\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 2252, in inline_call\r\n return cls.inline_call_(parent, func, args, kwargs)\r\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 2384, in inline_call_\r\n tracer.run()\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 815, in run\r\n and self.step()\r\n ^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 778, in step\r\n getattr(self, inst.opname)(inst)\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/symbolic_convert.py\", line 1639, in CONTAINS_OP\r\n self.push(right.call_method(self, \"__contains__\", [left], {}))\r\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/variables/dicts.py\", line 183, in call_method\r\n return super().call_method(tx, name, args, kwargs)\r\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/variables/base.py\", line 321, in call_method\r\n raise unimplemented(f\"call_method {self} {name} {args} {kwargs}\")\r\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/_dynamo/exc.py\", line 192, in unimplemented\r\n raise Unsupported(msg)\r\ntorch._dynamo.exc.Unsupported: call_method ConstDictVariable() __contains__ [GetAttrVariable(ConstDictVariable(), __class__)] {}\r\n\r\nfrom user code:\r\n File \"/Users/PanXuehai/Projects/pytorch/test/dynamo/test_misc.py\", line 7166, in fn\r\n y = pytree.tree_map_only(torch.Tensor, mapper, xs)\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/utils/_pytree/api/python.py\", line 761, in tree_map_only\r\n return tree_map(map_only(__type_or_types)(func), tree)\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/utils/_pytree/api/python.py\", line 652, in tree_map\r\n flat_args, spec = tree_flatten(tree)\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/utils/_pytree/api/python.py\", line 587, in tree_flatten\r\n spec = _tree_flatten_helper(tree, leaves)\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/utils/_pytree/api/python.py\", line 568, in _tree_flatten_helper\r\n if _is_leaf(tree):\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/utils/_pytree/api/python.py\", line 516, in _is_leaf\r\n return _get_node_type(tree) not in SUPPORTED_NODES\r\n File \"/Users/PanXuehai/Projects/pytorch/torch/utils/_pytree/api/python.py\", line 506, in _get_node_type\r\n if node_type not in SUPPORTED_NODES:\r\n\r\nSet TORCH_LOGS=\"+dynamo\" and TORCHDYNAMO_VERBOSE=1 for more information\r\n\r\n\r\nYou can suppress this exception and fall back to eager by setting:\r\n import torch._dynamo\r\n torch._dynamo.config.suppress_errors = True\r\n\r\n\r\nTo execute this test, run the following from the base repo dir:\r\n python test/dynamo/test_misc.py -k test_tracing_tree_map_only\r\n\r\nThis message can be suppressed by setting PYTORCH_PRINT_REPRO_ON_FAILURE=0\r\n\r\n----------------------------------------------------------------------\r\nRan 1 test in 0.447s\r\n\r\nFAILED (errors=1)\r\n```\r\n\r\n
", - "pr_file_module": null - }, - { - "comment_id": "1386923960", - "repo_full_name": "pytorch/pytorch", - "pr_number": 113258, - "pr_file": "torch/utils/_pytree/api/python.py", - "discussion_id": "1386767400", - "commented_code": "@@ -465,12 +491,23 @@ def _deque_unflatten(values: Iterable[Any], context: Context) -> Deque[Any]:\n _deque_unflatten,\n serialized_type_name=\"collections.deque\",\n )\n+_private_register_pytree_node(\n+ structseq,\n+ _structseq_flatten,\n+ _structseq_unflatten,\n+ serialized_type_name=\"structseq\",\n+ to_dumpable_context=_structseq_serialize,\n+ from_dumpable_context=_structseq_deserialize,\n+)\n \n \n def _get_node_type(tree: Any) -> Any:\n- node_type = type(tree)\n- if node_type not in SUPPORTED_NODES and is_namedtuple_class(node_type):\n- return namedtuple\n+ node_type = tree.__class__ # Dynamo complains about using `type(tree)`", - "comment_created_at": "2023-11-08T16:47:26+00:00", - "comment_author": "XuehaiPan", - "comment_body": "I found that this error was caused by Dynamo not yet implementing `cls.__base__`.", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/pytorch-keep-apis-user-centric-simple.json b/_reviewers/pytorch-keep-apis-user-centric-simple.json new file mode 100644 index 0000000..7344f88 --- /dev/null +++ b/_reviewers/pytorch-keep-apis-user-centric-simple.json @@ -0,0 +1,162 @@ +[ + { + "discussion_id": "2153268320", + "pr_number": 156207, + "pr_file": "torch/distributed/checkpoint/state_dict_saver.py", + "created_at": "2025-06-17T22:23:21+00:00", + "commented_code": "planner=planner,\n )\n\ndef default_stage(state_dict: STATE_DICT_TYPE, cpu_state_dict: STATE_DICT_TYPE) -> STATE_DICT_TYPE:\n \"\"\"Default stage function. This function will stage the state_dict on to the\n staging storage (defaults to CPU memory).\n\n Args:\n state_dict STATE_DICT_TYPE: The state_dict to stage.\n\n Returns:\n STATE_DICT_TYPE: staged state_dict\n \"\"\"\n staging_stream = torch.cuda.Stream()\n with staging_stream:\n staged_state_dict = _copy_state_dict(state_dict, cpu_state_dict, True, type_check=False)\n staging_stream.synchronize()\n return staged_state_dict\n\n@dataclass\nclass AsyncSaveResponse:\n \"\"\"This class contains futures for staging and upload completion\"\"\"\n staging_completion: Future[None]\n upload_completion: Future[None]\n\ndef close(): \n \"\"\"\n MUST CALL THIS FUNCTION AFTER TRAINING IS COMPLETE IF USING ASYNC_SAVE\n \"\"\"", + "repo_full_name": "pytorch/pytorch", + "discussion_comments": [ + { + "comment_id": "2153268320", + "repo_full_name": "pytorch/pytorch", + "pr_number": 156207, + "pr_file": "torch/distributed/checkpoint/state_dict_saver.py", + "discussion_id": "2153268320", + "commented_code": "@@ -181,6 +189,36 @@ def save(\n planner=planner,\n )\n \n+def default_stage(state_dict: STATE_DICT_TYPE, cpu_state_dict: STATE_DICT_TYPE) -> STATE_DICT_TYPE:\n+ \"\"\"Default stage function. This function will stage the state_dict on to the\n+ staging storage (defaults to CPU memory).\n+\n+ Args:\n+ state_dict STATE_DICT_TYPE: The state_dict to stage.\n+\n+ Returns:\n+ STATE_DICT_TYPE: staged state_dict\n+ \"\"\"\n+ staging_stream = torch.cuda.Stream()\n+ with staging_stream:\n+ staged_state_dict = _copy_state_dict(state_dict, cpu_state_dict, True, type_check=False)\n+ staging_stream.synchronize()\n+ return staged_state_dict\n+\n+@dataclass\n+class AsyncSaveResponse:\n+ \"\"\"This class contains futures for staging and upload completion\"\"\"\n+ staging_completion: Future[None]\n+ upload_completion: Future[None]\n+\n+def close(): \n+ \"\"\"\n+ MUST CALL THIS FUNCTION AFTER TRAINING IS COMPLETE IF USING ASYNC_SAVE\n+ \"\"\"", + "comment_created_at": "2025-06-17T22:23:21+00:00", + "comment_author": "fegin", + "comment_body": "Probably not a good idea to ask users to selectively call `close()`. I would suggest that users should always call `close()`. Also if this is a public API, the docstring should follow the template. You can check other docstring.", + "pr_file_module": null + }, + { + "comment_id": "2153271922", + "repo_full_name": "pytorch/pytorch", + "pr_number": 156207, + "pr_file": "torch/distributed/checkpoint/state_dict_saver.py", + "discussion_id": "2153268320", + "commented_code": "@@ -181,6 +189,36 @@ def save(\n planner=planner,\n )\n \n+def default_stage(state_dict: STATE_DICT_TYPE, cpu_state_dict: STATE_DICT_TYPE) -> STATE_DICT_TYPE:\n+ \"\"\"Default stage function. This function will stage the state_dict on to the\n+ staging storage (defaults to CPU memory).\n+\n+ Args:\n+ state_dict STATE_DICT_TYPE: The state_dict to stage.\n+\n+ Returns:\n+ STATE_DICT_TYPE: staged state_dict\n+ \"\"\"\n+ staging_stream = torch.cuda.Stream()\n+ with staging_stream:\n+ staged_state_dict = _copy_state_dict(state_dict, cpu_state_dict, True, type_check=False)\n+ staging_stream.synchronize()\n+ return staged_state_dict\n+\n+@dataclass\n+class AsyncSaveResponse:\n+ \"\"\"This class contains futures for staging and upload completion\"\"\"\n+ staging_completion: Future[None]\n+ upload_completion: Future[None]\n+\n+def close(): \n+ \"\"\"\n+ MUST CALL THIS FUNCTION AFTER TRAINING IS COMPLETE IF USING ASYNC_SAVE\n+ \"\"\"", + "comment_created_at": "2025-06-17T22:27:10+00:00", + "comment_author": "fegin", + "comment_body": "Should we also wait for the last `async_save` inside this API as well?", + "pr_file_module": null + }, + { + "comment_id": "2153341662", + "repo_full_name": "pytorch/pytorch", + "pr_number": 156207, + "pr_file": "torch/distributed/checkpoint/state_dict_saver.py", + "discussion_id": "2153268320", + "commented_code": "@@ -181,6 +189,36 @@ def save(\n planner=planner,\n )\n \n+def default_stage(state_dict: STATE_DICT_TYPE, cpu_state_dict: STATE_DICT_TYPE) -> STATE_DICT_TYPE:\n+ \"\"\"Default stage function. This function will stage the state_dict on to the\n+ staging storage (defaults to CPU memory).\n+\n+ Args:\n+ state_dict STATE_DICT_TYPE: The state_dict to stage.\n+\n+ Returns:\n+ STATE_DICT_TYPE: staged state_dict\n+ \"\"\"\n+ staging_stream = torch.cuda.Stream()\n+ with staging_stream:\n+ staged_state_dict = _copy_state_dict(state_dict, cpu_state_dict, True, type_check=False)\n+ staging_stream.synchronize()\n+ return staged_state_dict\n+\n+@dataclass\n+class AsyncSaveResponse:\n+ \"\"\"This class contains futures for staging and upload completion\"\"\"\n+ staging_completion: Future[None]\n+ upload_completion: Future[None]\n+\n+def close(): \n+ \"\"\"\n+ MUST CALL THIS FUNCTION AFTER TRAINING IS COMPLETE IF USING ASYNC_SAVE\n+ \"\"\"", + "comment_created_at": "2025-06-17T23:44:42+00:00", + "comment_author": "Saiteja64", + "comment_body": "I think we should just leave that to the users. In general, I want to limit global training state within DCP as much as possible. ", + "pr_file_module": null + }, + { + "comment_id": "2159417737", + "repo_full_name": "pytorch/pytorch", + "pr_number": 156207, + "pr_file": "torch/distributed/checkpoint/state_dict_saver.py", + "discussion_id": "2153268320", + "commented_code": "@@ -181,6 +189,36 @@ def save(\n planner=planner,\n )\n \n+def default_stage(state_dict: STATE_DICT_TYPE, cpu_state_dict: STATE_DICT_TYPE) -> STATE_DICT_TYPE:\n+ \"\"\"Default stage function. This function will stage the state_dict on to the\n+ staging storage (defaults to CPU memory).\n+\n+ Args:\n+ state_dict STATE_DICT_TYPE: The state_dict to stage.\n+\n+ Returns:\n+ STATE_DICT_TYPE: staged state_dict\n+ \"\"\"\n+ staging_stream = torch.cuda.Stream()\n+ with staging_stream:\n+ staged_state_dict = _copy_state_dict(state_dict, cpu_state_dict, True, type_check=False)\n+ staging_stream.synchronize()\n+ return staged_state_dict\n+\n+@dataclass\n+class AsyncSaveResponse:\n+ \"\"\"This class contains futures for staging and upload completion\"\"\"\n+ staging_completion: Future[None]\n+ upload_completion: Future[None]\n+\n+def close(): \n+ \"\"\"\n+ MUST CALL THIS FUNCTION AFTER TRAINING IS COMPLETE IF USING ASYNC_SAVE\n+ \"\"\"", + "comment_created_at": "2025-06-20T17:24:22+00:00", + "comment_author": "teja-rao", + "comment_body": "Can we cache the state in AsyncStager and let user manage the lifetime of async stager? this will allow user to init async stager and destroy as needed. this can be done on every checkpoint or at the end of the job as the user sees fit.\r\n\r\nNow, close() method is out of context. It is not on any resource/obj. It is hard for users to understand what close means and why they have to call it. \r\n\r\n", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2159421534", + "pr_number": 156207, + "pr_file": "torch/distributed/checkpoint/state_dict_saver.py", + "created_at": "2025-06-20T17:27:28+00:00", + "commented_code": "\"A CPU backend must be enabled for async save; try initializing process group with 'cpu:gloo,cuda:nccl'\"\n )\n\n use_default_staging = False\n if storage_writer is None:\n use_default_staging = True\n\n storage_writer = cast(\n StorageWriter, _storage_setup(storage_writer, checkpoint_id, reader=False)\n )\n\n state_dict = _stateful_to_state_dict(state_dict)\n\n @_dcp_method_logger(log_exceptions=True)\n def stage_state_dict():\n if isinstance(storage_writer, AsyncStager):\n staged_state_dict = storage_writer.stage(state_dict)\n else: # provides bwc for storage_writers not implementing AsyncStager\n staged_state_dict = _create_cpu_state_dict(state_dict)\n _copy_state_dict(state_dict, staged_state_dict, type_check=False)\n\n return staged_state_dict\n\n staged_state_dict = stage_state_dict()\n\n executor: _AsyncCheckpointExecutor = (\n def stage_state_dict() -> Future[STATE_DICT_TYPE]:\n staging_executor = ThreadPoolExecutor(max_workers=1)\n if isinstance(storage_writer, AsyncStager) and not use_default_staging:\n staging_future = staging_executor.submit(storage_writer.stage, state_dict)\n else:\n # provides bwc for storage_writers not implementing AsyncStager", + "repo_full_name": "pytorch/pytorch", + "discussion_comments": [ + { + "comment_id": "2159421534", + "repo_full_name": "pytorch/pytorch", + "pr_number": 156207, + "pr_file": "torch/distributed/checkpoint/state_dict_saver.py", + "discussion_id": "2159421534", + "commented_code": "@@ -249,49 +305,70 @@ def async_save(\n \"A CPU backend must be enabled for async save; try initializing process group with 'cpu:gloo,cuda:nccl'\"\n )\n \n+ use_default_staging = False\n+ if storage_writer is None:\n+ use_default_staging = True\n+\n storage_writer = cast(\n StorageWriter, _storage_setup(storage_writer, checkpoint_id, reader=False)\n )\n \n state_dict = _stateful_to_state_dict(state_dict)\n \n @_dcp_method_logger(log_exceptions=True)\n- def stage_state_dict():\n- if isinstance(storage_writer, AsyncStager):\n- staged_state_dict = storage_writer.stage(state_dict)\n- else: # provides bwc for storage_writers not implementing AsyncStager\n- staged_state_dict = _create_cpu_state_dict(state_dict)\n- _copy_state_dict(state_dict, staged_state_dict, type_check=False)\n-\n- return staged_state_dict\n-\n- staged_state_dict = stage_state_dict()\n-\n- executor: _AsyncCheckpointExecutor = (\n+ def stage_state_dict() -> Future[STATE_DICT_TYPE]:\n+ staging_executor = ThreadPoolExecutor(max_workers=1)\n+ if isinstance(storage_writer, AsyncStager) and not use_default_staging:\n+ staging_future = staging_executor.submit(storage_writer.stage, state_dict)\n+ else:\n+ # provides bwc for storage_writers not implementing AsyncStager", + "comment_created_at": "2025-06-20T17:27:28+00:00", + "comment_author": "teja-rao", + "comment_body": "do we need to handle this case? can we ask user to implement async stager if they need to use zero-copy? I think it is simpler to support and cleaner from API point of view. ", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2167615520", + "pr_number": 156207, + "pr_file": "torch/distributed/checkpoint/state_dict_saver.py", + "created_at": "2025-06-25T20:54:34+00:00", + "commented_code": "planner: Optional[SavePlanner] = None,\n process_group: Optional[dist.ProcessGroup] = None,\n async_checkpointer_type: AsyncCheckpointerType = AsyncCheckpointerType.THREAD,\n) -> Future:\n async_stager: Optional[AsyncStager] = None,\n block_on_staging: bool = True,", + "repo_full_name": "pytorch/pytorch", + "discussion_comments": [ + { + "comment_id": "2167615520", + "repo_full_name": "pytorch/pytorch", + "pr_number": 156207, + "pr_file": "torch/distributed/checkpoint/state_dict_saver.py", + "discussion_id": "2167615520", + "commented_code": "@@ -191,12 +210,15 @@ def async_save(\n planner: Optional[SavePlanner] = None,\n process_group: Optional[dist.ProcessGroup] = None,\n async_checkpointer_type: AsyncCheckpointerType = AsyncCheckpointerType.THREAD,\n-) -> Future:\n+ async_stager: Optional[AsyncStager] = None,\n+ block_on_staging: bool = True,", + "comment_created_at": "2025-06-25T20:54:34+00:00", + "comment_author": "meetv18", + "comment_body": "One suggestion: \r\n\r\nCan we have these as a property in AsyncStager protocol, instead of directly bloating the API. These are mainly for power users, a simple/first time user will probably be perplexed by additional params that aren't self explanatory. ", + "pr_file_module": null + }, + { + "comment_id": "2167979087", + "repo_full_name": "pytorch/pytorch", + "pr_number": 156207, + "pr_file": "torch/distributed/checkpoint/state_dict_saver.py", + "discussion_id": "2167615520", + "commented_code": "@@ -191,12 +210,15 @@ def async_save(\n planner: Optional[SavePlanner] = None,\n process_group: Optional[dist.ProcessGroup] = None,\n async_checkpointer_type: AsyncCheckpointerType = AsyncCheckpointerType.THREAD,\n-) -> Future:\n+ async_stager: Optional[AsyncStager] = None,\n+ block_on_staging: bool = True,", + "comment_created_at": "2025-06-26T02:34:55+00:00", + "comment_author": "Saiteja64", + "comment_body": "For async_stager argument, this is a directional change. We are separating out the stager from storage_writer. Conceptually, you should be able to use a stager with any storage_writer. Combining the two just forces us and users to change StorageWriter every time we want to make a change to stager (new properties, etc)\r\n\r\nblock_on_staging: Think we can remove this one. Works for all cases and simplifies API:\r\n1. If user configures AsyncStager to return a future instead of a state_dict, driver code can just return that future. \r\n2. If AsyncStager returns state_dict we can just keep existing logic of checking synchronize_after_execute and call synchronize_staging if needed", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1939922583", + "pr_number": 145000, + "pr_file": "torch/__init__.py", + "created_at": "2025-02-03T19:34:21+00:00", + "commented_code": "matrix_rank,\n solve,\n)\nfrom torch.utils.dlpack import from_dlpack, to_dlpack\nfrom torch.utils.dlpack import from_dlpack, to_dlpack, to_dlpack_unversioned", + "repo_full_name": "pytorch/pytorch", + "discussion_comments": [ + { + "comment_id": "1939922583", + "repo_full_name": "pytorch/pytorch", + "pr_number": 145000, + "pr_file": "torch/__init__.py", + "discussion_id": "1939922583", + "commented_code": "@@ -2270,7 +2270,7 @@ def compiled_with_cxx11_abi() -> builtins.bool:\n matrix_rank,\n solve,\n )\n-from torch.utils.dlpack import from_dlpack, to_dlpack\n+from torch.utils.dlpack import from_dlpack, to_dlpack, to_dlpack_unversioned", + "comment_created_at": "2025-02-03T19:34:21+00:00", + "comment_author": "albanD", + "comment_body": "While I understand this matches current code, I don't think we want to have this as a new top level API? Being part of torch.utils.dlpack is enough?", + "pr_file_module": null + }, + { + "comment_id": "1941436061", + "repo_full_name": "pytorch/pytorch", + "pr_number": 145000, + "pr_file": "torch/__init__.py", + "discussion_id": "1939922583", + "commented_code": "@@ -2270,7 +2270,7 @@ def compiled_with_cxx11_abi() -> builtins.bool:\n matrix_rank,\n solve,\n )\n-from torch.utils.dlpack import from_dlpack, to_dlpack\n+from torch.utils.dlpack import from_dlpack, to_dlpack, to_dlpack_unversioned", + "comment_created_at": "2025-02-04T15:50:51+00:00", + "comment_author": "rgommers", + "comment_body": "That comment sounds right - I don't think it should be needed. For the introduction strategy to a versioned protocol, see the explanation and prototype code (`if max_version is None: ....`) at https://data-apis.org/array-api/latest/API_specification/generated/array_api.array.__dlpack__.html", + "pr_file_module": null + }, + { + "comment_id": "1941444094", + "repo_full_name": "pytorch/pytorch", + "pr_number": 145000, + "pr_file": "torch/__init__.py", + "discussion_id": "1939922583", + "commented_code": "@@ -2270,7 +2270,7 @@ def compiled_with_cxx11_abi() -> builtins.bool:\n matrix_rank,\n solve,\n )\n-from torch.utils.dlpack import from_dlpack, to_dlpack\n+from torch.utils.dlpack import from_dlpack, to_dlpack, to_dlpack_unversioned", + "comment_created_at": "2025-02-04T15:55:19+00:00", + "comment_author": "rgommers", + "comment_body": "If it's useful/needed to have a Python function to use here so that can be called from within `Tensor.__dlpack__`, then making it a private function by prepending an underscore should be fine I think.", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/pytorch-keep-apis-user-centric-simple.md b/_reviewers/pytorch-keep-apis-user-centric-simple.md index 0d68b0d..2b1d32e 100644 --- a/_reviewers/pytorch-keep-apis-user-centric-simple.md +++ b/_reviewers/pytorch-keep-apis-user-centric-simple.md @@ -43,167 +43,3 @@ class AsyncStager: ``` This makes the basic API simple while allowing advanced users to configure behavior through well-defined configuration objects. - - -[ - { - "discussion_id": "2153268320", - "pr_number": 156207, - "pr_file": "torch/distributed/checkpoint/state_dict_saver.py", - "created_at": "2025-06-17T22:23:21+00:00", - "commented_code": "planner=planner,\n )\n\ndef default_stage(state_dict: STATE_DICT_TYPE, cpu_state_dict: STATE_DICT_TYPE) -> STATE_DICT_TYPE:\n \"\"\"Default stage function. This function will stage the state_dict on to the\n staging storage (defaults to CPU memory).\n\n Args:\n state_dict STATE_DICT_TYPE: The state_dict to stage.\n\n Returns:\n STATE_DICT_TYPE: staged state_dict\n \"\"\"\n staging_stream = torch.cuda.Stream()\n with staging_stream:\n staged_state_dict = _copy_state_dict(state_dict, cpu_state_dict, True, type_check=False)\n staging_stream.synchronize()\n return staged_state_dict\n\n@dataclass\nclass AsyncSaveResponse:\n \"\"\"This class contains futures for staging and upload completion\"\"\"\n staging_completion: Future[None]\n upload_completion: Future[None]\n\ndef close(): \n \"\"\"\n MUST CALL THIS FUNCTION AFTER TRAINING IS COMPLETE IF USING ASYNC_SAVE\n \"\"\"", - "repo_full_name": "pytorch/pytorch", - "discussion_comments": [ - { - "comment_id": "2153268320", - "repo_full_name": "pytorch/pytorch", - "pr_number": 156207, - "pr_file": "torch/distributed/checkpoint/state_dict_saver.py", - "discussion_id": "2153268320", - "commented_code": "@@ -181,6 +189,36 @@ def save(\n planner=planner,\n )\n \n+def default_stage(state_dict: STATE_DICT_TYPE, cpu_state_dict: STATE_DICT_TYPE) -> STATE_DICT_TYPE:\n+ \"\"\"Default stage function. This function will stage the state_dict on to the\n+ staging storage (defaults to CPU memory).\n+\n+ Args:\n+ state_dict STATE_DICT_TYPE: The state_dict to stage.\n+\n+ Returns:\n+ STATE_DICT_TYPE: staged state_dict\n+ \"\"\"\n+ staging_stream = torch.cuda.Stream()\n+ with staging_stream:\n+ staged_state_dict = _copy_state_dict(state_dict, cpu_state_dict, True, type_check=False)\n+ staging_stream.synchronize()\n+ return staged_state_dict\n+\n+@dataclass\n+class AsyncSaveResponse:\n+ \"\"\"This class contains futures for staging and upload completion\"\"\"\n+ staging_completion: Future[None]\n+ upload_completion: Future[None]\n+\n+def close(): \n+ \"\"\"\n+ MUST CALL THIS FUNCTION AFTER TRAINING IS COMPLETE IF USING ASYNC_SAVE\n+ \"\"\"", - "comment_created_at": "2025-06-17T22:23:21+00:00", - "comment_author": "fegin", - "comment_body": "Probably not a good idea to ask users to selectively call `close()`. I would suggest that users should always call `close()`. Also if this is a public API, the docstring should follow the template. You can check other docstring.", - "pr_file_module": null - }, - { - "comment_id": "2153271922", - "repo_full_name": "pytorch/pytorch", - "pr_number": 156207, - "pr_file": "torch/distributed/checkpoint/state_dict_saver.py", - "discussion_id": "2153268320", - "commented_code": "@@ -181,6 +189,36 @@ def save(\n planner=planner,\n )\n \n+def default_stage(state_dict: STATE_DICT_TYPE, cpu_state_dict: STATE_DICT_TYPE) -> STATE_DICT_TYPE:\n+ \"\"\"Default stage function. This function will stage the state_dict on to the\n+ staging storage (defaults to CPU memory).\n+\n+ Args:\n+ state_dict STATE_DICT_TYPE: The state_dict to stage.\n+\n+ Returns:\n+ STATE_DICT_TYPE: staged state_dict\n+ \"\"\"\n+ staging_stream = torch.cuda.Stream()\n+ with staging_stream:\n+ staged_state_dict = _copy_state_dict(state_dict, cpu_state_dict, True, type_check=False)\n+ staging_stream.synchronize()\n+ return staged_state_dict\n+\n+@dataclass\n+class AsyncSaveResponse:\n+ \"\"\"This class contains futures for staging and upload completion\"\"\"\n+ staging_completion: Future[None]\n+ upload_completion: Future[None]\n+\n+def close(): \n+ \"\"\"\n+ MUST CALL THIS FUNCTION AFTER TRAINING IS COMPLETE IF USING ASYNC_SAVE\n+ \"\"\"", - "comment_created_at": "2025-06-17T22:27:10+00:00", - "comment_author": "fegin", - "comment_body": "Should we also wait for the last `async_save` inside this API as well?", - "pr_file_module": null - }, - { - "comment_id": "2153341662", - "repo_full_name": "pytorch/pytorch", - "pr_number": 156207, - "pr_file": "torch/distributed/checkpoint/state_dict_saver.py", - "discussion_id": "2153268320", - "commented_code": "@@ -181,6 +189,36 @@ def save(\n planner=planner,\n )\n \n+def default_stage(state_dict: STATE_DICT_TYPE, cpu_state_dict: STATE_DICT_TYPE) -> STATE_DICT_TYPE:\n+ \"\"\"Default stage function. This function will stage the state_dict on to the\n+ staging storage (defaults to CPU memory).\n+\n+ Args:\n+ state_dict STATE_DICT_TYPE: The state_dict to stage.\n+\n+ Returns:\n+ STATE_DICT_TYPE: staged state_dict\n+ \"\"\"\n+ staging_stream = torch.cuda.Stream()\n+ with staging_stream:\n+ staged_state_dict = _copy_state_dict(state_dict, cpu_state_dict, True, type_check=False)\n+ staging_stream.synchronize()\n+ return staged_state_dict\n+\n+@dataclass\n+class AsyncSaveResponse:\n+ \"\"\"This class contains futures for staging and upload completion\"\"\"\n+ staging_completion: Future[None]\n+ upload_completion: Future[None]\n+\n+def close(): \n+ \"\"\"\n+ MUST CALL THIS FUNCTION AFTER TRAINING IS COMPLETE IF USING ASYNC_SAVE\n+ \"\"\"", - "comment_created_at": "2025-06-17T23:44:42+00:00", - "comment_author": "Saiteja64", - "comment_body": "I think we should just leave that to the users. In general, I want to limit global training state within DCP as much as possible. ", - "pr_file_module": null - }, - { - "comment_id": "2159417737", - "repo_full_name": "pytorch/pytorch", - "pr_number": 156207, - "pr_file": "torch/distributed/checkpoint/state_dict_saver.py", - "discussion_id": "2153268320", - "commented_code": "@@ -181,6 +189,36 @@ def save(\n planner=planner,\n )\n \n+def default_stage(state_dict: STATE_DICT_TYPE, cpu_state_dict: STATE_DICT_TYPE) -> STATE_DICT_TYPE:\n+ \"\"\"Default stage function. This function will stage the state_dict on to the\n+ staging storage (defaults to CPU memory).\n+\n+ Args:\n+ state_dict STATE_DICT_TYPE: The state_dict to stage.\n+\n+ Returns:\n+ STATE_DICT_TYPE: staged state_dict\n+ \"\"\"\n+ staging_stream = torch.cuda.Stream()\n+ with staging_stream:\n+ staged_state_dict = _copy_state_dict(state_dict, cpu_state_dict, True, type_check=False)\n+ staging_stream.synchronize()\n+ return staged_state_dict\n+\n+@dataclass\n+class AsyncSaveResponse:\n+ \"\"\"This class contains futures for staging and upload completion\"\"\"\n+ staging_completion: Future[None]\n+ upload_completion: Future[None]\n+\n+def close(): \n+ \"\"\"\n+ MUST CALL THIS FUNCTION AFTER TRAINING IS COMPLETE IF USING ASYNC_SAVE\n+ \"\"\"", - "comment_created_at": "2025-06-20T17:24:22+00:00", - "comment_author": "teja-rao", - "comment_body": "Can we cache the state in AsyncStager and let user manage the lifetime of async stager? this will allow user to init async stager and destroy as needed. this can be done on every checkpoint or at the end of the job as the user sees fit.\r\n\r\nNow, close() method is out of context. It is not on any resource/obj. It is hard for users to understand what close means and why they have to call it. \r\n\r\n", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2159421534", - "pr_number": 156207, - "pr_file": "torch/distributed/checkpoint/state_dict_saver.py", - "created_at": "2025-06-20T17:27:28+00:00", - "commented_code": "\"A CPU backend must be enabled for async save; try initializing process group with 'cpu:gloo,cuda:nccl'\"\n )\n\n use_default_staging = False\n if storage_writer is None:\n use_default_staging = True\n\n storage_writer = cast(\n StorageWriter, _storage_setup(storage_writer, checkpoint_id, reader=False)\n )\n\n state_dict = _stateful_to_state_dict(state_dict)\n\n @_dcp_method_logger(log_exceptions=True)\n def stage_state_dict():\n if isinstance(storage_writer, AsyncStager):\n staged_state_dict = storage_writer.stage(state_dict)\n else: # provides bwc for storage_writers not implementing AsyncStager\n staged_state_dict = _create_cpu_state_dict(state_dict)\n _copy_state_dict(state_dict, staged_state_dict, type_check=False)\n\n return staged_state_dict\n\n staged_state_dict = stage_state_dict()\n\n executor: _AsyncCheckpointExecutor = (\n def stage_state_dict() -> Future[STATE_DICT_TYPE]:\n staging_executor = ThreadPoolExecutor(max_workers=1)\n if isinstance(storage_writer, AsyncStager) and not use_default_staging:\n staging_future = staging_executor.submit(storage_writer.stage, state_dict)\n else:\n # provides bwc for storage_writers not implementing AsyncStager", - "repo_full_name": "pytorch/pytorch", - "discussion_comments": [ - { - "comment_id": "2159421534", - "repo_full_name": "pytorch/pytorch", - "pr_number": 156207, - "pr_file": "torch/distributed/checkpoint/state_dict_saver.py", - "discussion_id": "2159421534", - "commented_code": "@@ -249,49 +305,70 @@ def async_save(\n \"A CPU backend must be enabled for async save; try initializing process group with 'cpu:gloo,cuda:nccl'\"\n )\n \n+ use_default_staging = False\n+ if storage_writer is None:\n+ use_default_staging = True\n+\n storage_writer = cast(\n StorageWriter, _storage_setup(storage_writer, checkpoint_id, reader=False)\n )\n \n state_dict = _stateful_to_state_dict(state_dict)\n \n @_dcp_method_logger(log_exceptions=True)\n- def stage_state_dict():\n- if isinstance(storage_writer, AsyncStager):\n- staged_state_dict = storage_writer.stage(state_dict)\n- else: # provides bwc for storage_writers not implementing AsyncStager\n- staged_state_dict = _create_cpu_state_dict(state_dict)\n- _copy_state_dict(state_dict, staged_state_dict, type_check=False)\n-\n- return staged_state_dict\n-\n- staged_state_dict = stage_state_dict()\n-\n- executor: _AsyncCheckpointExecutor = (\n+ def stage_state_dict() -> Future[STATE_DICT_TYPE]:\n+ staging_executor = ThreadPoolExecutor(max_workers=1)\n+ if isinstance(storage_writer, AsyncStager) and not use_default_staging:\n+ staging_future = staging_executor.submit(storage_writer.stage, state_dict)\n+ else:\n+ # provides bwc for storage_writers not implementing AsyncStager", - "comment_created_at": "2025-06-20T17:27:28+00:00", - "comment_author": "teja-rao", - "comment_body": "do we need to handle this case? can we ask user to implement async stager if they need to use zero-copy? I think it is simpler to support and cleaner from API point of view. ", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2167615520", - "pr_number": 156207, - "pr_file": "torch/distributed/checkpoint/state_dict_saver.py", - "created_at": "2025-06-25T20:54:34+00:00", - "commented_code": "planner: Optional[SavePlanner] = None,\n process_group: Optional[dist.ProcessGroup] = None,\n async_checkpointer_type: AsyncCheckpointerType = AsyncCheckpointerType.THREAD,\n) -> Future:\n async_stager: Optional[AsyncStager] = None,\n block_on_staging: bool = True,", - "repo_full_name": "pytorch/pytorch", - "discussion_comments": [ - { - "comment_id": "2167615520", - "repo_full_name": "pytorch/pytorch", - "pr_number": 156207, - "pr_file": "torch/distributed/checkpoint/state_dict_saver.py", - "discussion_id": "2167615520", - "commented_code": "@@ -191,12 +210,15 @@ def async_save(\n planner: Optional[SavePlanner] = None,\n process_group: Optional[dist.ProcessGroup] = None,\n async_checkpointer_type: AsyncCheckpointerType = AsyncCheckpointerType.THREAD,\n-) -> Future:\n+ async_stager: Optional[AsyncStager] = None,\n+ block_on_staging: bool = True,", - "comment_created_at": "2025-06-25T20:54:34+00:00", - "comment_author": "meetv18", - "comment_body": "One suggestion: \r\n\r\nCan we have these as a property in AsyncStager protocol, instead of directly bloating the API. These are mainly for power users, a simple/first time user will probably be perplexed by additional params that aren't self explanatory. ", - "pr_file_module": null - }, - { - "comment_id": "2167979087", - "repo_full_name": "pytorch/pytorch", - "pr_number": 156207, - "pr_file": "torch/distributed/checkpoint/state_dict_saver.py", - "discussion_id": "2167615520", - "commented_code": "@@ -191,12 +210,15 @@ def async_save(\n planner: Optional[SavePlanner] = None,\n process_group: Optional[dist.ProcessGroup] = None,\n async_checkpointer_type: AsyncCheckpointerType = AsyncCheckpointerType.THREAD,\n-) -> Future:\n+ async_stager: Optional[AsyncStager] = None,\n+ block_on_staging: bool = True,", - "comment_created_at": "2025-06-26T02:34:55+00:00", - "comment_author": "Saiteja64", - "comment_body": "For async_stager argument, this is a directional change. We are separating out the stager from storage_writer. Conceptually, you should be able to use a stager with any storage_writer. Combining the two just forces us and users to change StorageWriter every time we want to make a change to stager (new properties, etc)\r\n\r\nblock_on_staging: Think we can remove this one. Works for all cases and simplifies API:\r\n1. If user configures AsyncStager to return a future instead of a state_dict, driver code can just return that future. \r\n2. If AsyncStager returns state_dict we can just keep existing logic of checking synchronize_after_execute and call synchronize_staging if needed", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1939922583", - "pr_number": 145000, - "pr_file": "torch/__init__.py", - "created_at": "2025-02-03T19:34:21+00:00", - "commented_code": "matrix_rank,\n solve,\n)\nfrom torch.utils.dlpack import from_dlpack, to_dlpack\nfrom torch.utils.dlpack import from_dlpack, to_dlpack, to_dlpack_unversioned", - "repo_full_name": "pytorch/pytorch", - "discussion_comments": [ - { - "comment_id": "1939922583", - "repo_full_name": "pytorch/pytorch", - "pr_number": 145000, - "pr_file": "torch/__init__.py", - "discussion_id": "1939922583", - "commented_code": "@@ -2270,7 +2270,7 @@ def compiled_with_cxx11_abi() -> builtins.bool:\n matrix_rank,\n solve,\n )\n-from torch.utils.dlpack import from_dlpack, to_dlpack\n+from torch.utils.dlpack import from_dlpack, to_dlpack, to_dlpack_unversioned", - "comment_created_at": "2025-02-03T19:34:21+00:00", - "comment_author": "albanD", - "comment_body": "While I understand this matches current code, I don't think we want to have this as a new top level API? Being part of torch.utils.dlpack is enough?", - "pr_file_module": null - }, - { - "comment_id": "1941436061", - "repo_full_name": "pytorch/pytorch", - "pr_number": 145000, - "pr_file": "torch/__init__.py", - "discussion_id": "1939922583", - "commented_code": "@@ -2270,7 +2270,7 @@ def compiled_with_cxx11_abi() -> builtins.bool:\n matrix_rank,\n solve,\n )\n-from torch.utils.dlpack import from_dlpack, to_dlpack\n+from torch.utils.dlpack import from_dlpack, to_dlpack, to_dlpack_unversioned", - "comment_created_at": "2025-02-04T15:50:51+00:00", - "comment_author": "rgommers", - "comment_body": "That comment sounds right - I don't think it should be needed. For the introduction strategy to a versioned protocol, see the explanation and prototype code (`if max_version is None: ....`) at https://data-apis.org/array-api/latest/API_specification/generated/array_api.array.__dlpack__.html", - "pr_file_module": null - }, - { - "comment_id": "1941444094", - "repo_full_name": "pytorch/pytorch", - "pr_number": 145000, - "pr_file": "torch/__init__.py", - "discussion_id": "1939922583", - "commented_code": "@@ -2270,7 +2270,7 @@ def compiled_with_cxx11_abi() -> builtins.bool:\n matrix_rank,\n solve,\n )\n-from torch.utils.dlpack import from_dlpack, to_dlpack\n+from torch.utils.dlpack import from_dlpack, to_dlpack, to_dlpack_unversioned", - "comment_created_at": "2025-02-04T15:55:19+00:00", - "comment_author": "rgommers", - "comment_body": "If it's useful/needed to have a Python function to use here so that can be called from within `Tensor.__dlpack__`, then making it a private function by prepending an underscore should be fine I think.", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/pytorch-meaningful-descriptive-identifiers.json b/_reviewers/pytorch-meaningful-descriptive-identifiers.json new file mode 100644 index 0000000..c2c010a --- /dev/null +++ b/_reviewers/pytorch-meaningful-descriptive-identifiers.json @@ -0,0 +1,202 @@ +[ + { + "discussion_id": "2116854356", + "pr_number": 154770, + "pr_file": ".github/actions/reuse-old-whl/reuse_old_whl.py", + "created_at": "2025-05-30T23:19:38+00:00", + "commented_code": "subprocess.check_output(\n [\"unzip\", \"-o\", new_path, \"-d\", f\"artifacts/dist/{new_path.stem}\"],\n )\n\n # Remove the old wheel (which is now a zip file)\n os.remove(new_path)", + "repo_full_name": "pytorch/pytorch", + "discussion_comments": [ + { + "comment_id": "2116854356", + "repo_full_name": "pytorch/pytorch", + "pr_number": 154770, + "pr_file": ".github/actions/reuse-old-whl/reuse_old_whl.py", + "discussion_id": "2116854356", + "commented_code": "@@ -190,6 +190,10 @@ def unzip_artifact_and_replace_files() -> None:\n subprocess.check_output(\n [\"unzip\", \"-o\", new_path, \"-d\", f\"artifacts/dist/{new_path.stem}\"],\n )\n+\n+ # Remove the old wheel (which is now a zip file)\n+ os.remove(new_path)", + "comment_created_at": "2025-05-30T23:19:38+00:00", + "comment_author": "ZainRizvi", + "comment_body": "minor nit: consider renaming `new_path` to `zip_path` for clarity", + "pr_file_module": null + }, + { + "comment_id": "2116888961", + "repo_full_name": "pytorch/pytorch", + "pr_number": 154770, + "pr_file": ".github/actions/reuse-old-whl/reuse_old_whl.py", + "discussion_id": "2116854356", + "commented_code": "@@ -190,6 +190,10 @@ def unzip_artifact_and_replace_files() -> None:\n subprocess.check_output(\n [\"unzip\", \"-o\", new_path, \"-d\", f\"artifacts/dist/{new_path.stem}\"],\n )\n+\n+ # Remove the old wheel (which is now a zip file)\n+ os.remove(new_path)", + "comment_created_at": "2025-05-31T00:11:20+00:00", + "comment_author": "clee2000", + "comment_body": "probably a later pr, new_path is used in a bunch of places that are unrelated to this specific change", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1976180681", + "pr_number": 147881, + "pr_file": "torch/export/dynamic_shapes.py", + "created_at": "2025-03-01T00:45:12+00:00", + "commented_code": "- AUTO means automatic inference of shape (static or dynamic).\n - STATIC means static shape (always specialized).\n - DYNAMIC means dynamic, will error out if specialized.\n - _OBLIVIOUS allocates an backed symbol with size-oblivious semantics.", + "repo_full_name": "pytorch/pytorch", + "discussion_comments": [ + { + "comment_id": "1976180681", + "repo_full_name": "pytorch/pytorch", + "pr_number": 147881, + "pr_file": "torch/export/dynamic_shapes.py", + "discussion_id": "1976180681", + "commented_code": "@@ -46,11 +46,13 @@ class _DimHint(Enum):\n - AUTO means automatic inference of shape (static or dynamic).\n - STATIC means static shape (always specialized).\n - DYNAMIC means dynamic, will error out if specialized.\n+ - _OBLIVIOUS allocates an backed symbol with size-oblivious semantics.", + "comment_created_at": "2025-03-01T00:45:12+00:00", + "comment_author": "avikchaudhuri", + "comment_body": "It's probably good to pick a name that is more meaningful here. I guess the `_` means you're not committing to the name lol.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2071453448", + "pr_number": 151780, + "pr_file": "torch/_inductor/config.py", + "created_at": "2025-05-02T11:03:55+00:00", + "commented_code": "# decomposed into 7x4x2 thread blocks along MxNxK of a GEMM.\n gemm_thread_factors = os.environ.get(\"TORCHINDUCTOR_CPP_GEMM_THREAD_FACTORS\", None)\n\n # Set GEMM Transverse strategy: support VERTICAL, HORIZONTAL\n # If both are enabled, the strategy will be selected based on the heuristic.\n cpp_gemm_transverse_strategy = os.environ.get(", + "repo_full_name": "pytorch/pytorch", + "discussion_comments": [ + { + "comment_id": "2071453448", + "repo_full_name": "pytorch/pytorch", + "pr_number": 151780, + "pr_file": "torch/_inductor/config.py", + "discussion_id": "2071453448", + "commented_code": "@@ -997,6 +997,12 @@ class cpp:\n # decomposed into 7x4x2 thread blocks along MxNxK of a GEMM.\n gemm_thread_factors = os.environ.get(\"TORCHINDUCTOR_CPP_GEMM_THREAD_FACTORS\", None)\n \n+ # Set GEMM Transverse strategy: support VERTICAL, HORIZONTAL\n+ # If both are enabled, the strategy will be selected based on the heuristic.\n+ cpp_gemm_transverse_strategy = os.environ.get(", + "comment_created_at": "2025-05-02T11:03:55+00:00", + "comment_author": "jgong5", + "comment_body": " nit: you don't have to add \"cpp_\" prefix to the configuration name since it is already inside the `cpp` class.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2159742806", + "pr_number": 156312, + "pr_file": "torch/_functorch/eager_transforms.py", + "created_at": "2025-06-20T22:52:41+00:00", + "commented_code": "... c.add_(1)\n ... return b\n ...\n >>> inpt = torch.randn(2)\n >>> input = torch.randn(2)", + "repo_full_name": "pytorch/pytorch", + "discussion_comments": [ + { + "comment_id": "2159742806", + "repo_full_name": "pytorch/pytorch", + "pr_number": 156312, + "pr_file": "torch/_functorch/eager_transforms.py", + "discussion_id": "2159742806", + "commented_code": "@@ -1516,18 +1516,18 @@ def functionalize(func: Callable, *, remove: str = \"mutations\") -> Callable:\n ... c.add_(1)\n ... return b\n ...\n- >>> inpt = torch.randn(2)\n+ >>> input = torch.randn(2)", + "comment_created_at": "2025-06-20T22:52:41+00:00", + "comment_author": "albanD", + "comment_body": "This is done on purpose as \"input\" is a reserved keyword in python", + "pr_file_module": null + }, + { + "comment_id": "2161678229", + "repo_full_name": "pytorch/pytorch", + "pr_number": 156312, + "pr_file": "torch/_functorch/eager_transforms.py", + "discussion_id": "2159742806", + "commented_code": "@@ -1516,18 +1516,18 @@ def functionalize(func: Callable, *, remove: str = \"mutations\") -> Callable:\n ... c.add_(1)\n ... return b\n ...\n- >>> inpt = torch.randn(2)\n+ >>> input = torch.randn(2)", + "comment_created_at": "2025-06-23T13:45:01+00:00", + "comment_author": "XuehaiPan", + "comment_body": "Changed to `input_` as a common practice.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2161539521", + "pr_number": 156312, + "pr_file": "torch/_functorch/_aot_autograd/runtime_wrappers.py", + "created_at": "2025-06-23T12:46:40+00:00", + "commented_code": "storage_ref_to_idx: dict[StorageWeakRef, list[int]] = collections.defaultdict(list)\n base_args = []\n other_args = []\n for i, inpt in enumerate(fwd_inputs):\n if isinstance(inpt, Tensor):\n storage_ref = StorageWeakRef(inpt.untyped_storage())\n for i, input in enumerate(fwd_inputs):", + "repo_full_name": "pytorch/pytorch", + "discussion_comments": [ + { + "comment_id": "2161539521", + "repo_full_name": "pytorch/pytorch", + "pr_number": 156312, + "pr_file": "torch/_functorch/_aot_autograd/runtime_wrappers.py", + "discussion_id": "2161539521", + "commented_code": "@@ -1344,12 +1344,12 @@ def _same_dtype_views(view1, view2):\n storage_ref_to_idx: dict[StorageWeakRef, list[int]] = collections.defaultdict(list)\n base_args = []\n other_args = []\n- for i, inpt in enumerate(fwd_inputs):\n- if isinstance(inpt, Tensor):\n- storage_ref = StorageWeakRef(inpt.untyped_storage())\n+ for i, input in enumerate(fwd_inputs):", + "comment_created_at": "2025-06-23T12:46:40+00:00", + "comment_author": "albanD", + "comment_body": "These are not really ok either. We shouldn't override the builtin input wherever possible", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2131477194", + "pr_number": 137846, + "pr_file": "torch/utils/collect_env.py", + "created_at": "2025-06-06T04:32:11+00:00", + "commented_code": "return smi\n\n\ndef detect_linux_pkg_manager():", + "repo_full_name": "pytorch/pytorch", + "discussion_comments": [ + { + "comment_id": "2131477194", + "repo_full_name": "pytorch/pytorch", + "pr_number": 137846, + "pr_file": "torch/utils/collect_env.py", + "discussion_id": "2131477194", + "commented_code": "@@ -243,6 +269,149 @@ def get_nvidia_smi():\n return smi\n \n \n+def detect_linux_pkg_manager():", + "comment_created_at": "2025-06-06T04:32:11+00:00", + "comment_author": "malfet", + "comment_body": "```suggestion\r\ndef _detect_linux_pkg_manager():\r\n```", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2165261508", + "pr_number": 156683, + "pr_file": "torch/_inductor/config.py", + "created_at": "2025-06-25T01:34:57+00:00", + "commented_code": ").upper()\n\n\n# Specify the size of the benchmarking space for GEMM autotuning with the neural network model.\n# SAME - There should be no functional difference between this and max_autotune_gemm_search_space\n# DEFAULT - Benchmark the same number of configs as max_autotune, but search over a larger space using the model\n# - Use the top configs as predicted by the model.\ndef parse_matmul_gemm_autotune_benchmark_space() -> Union[\n int, Literal[\"SAME\", \"DEFAULT\"]\n]:\n value = os.environ.get(\"TORCHINDUCTOR_MATMUL_GEMM_AUTOTUNE_BENCHMARK_SPACE\")\n if value is not None:\n # Try to parse as an integer first\n try:\n return int(value)\n except ValueError:\n if os.environ.get(\"TORCHINDUCTOR_FAST_AUTOTUNE\") == \"1\":\n return 1\n return \"DEFAULT\"\n # just keep the search space the same as\n return \"SAME\"\n\n\nmatmul_gemm_autotune_benchmark_space: Union[int, Literal[\"SAME\", \"DEFAULT\"]] = (\n parse_matmul_gemm_autotune_benchmark_space()\n)\n\n\ndef parse_matmul_gemm_autotune_search_space() -> Literal[\"DEFAULT\", \"EXHAUSTIVE\"]:\n benchmarking_space = parse_matmul_gemm_autotune_benchmark_space()\n if benchmarking_space == \"SAME\":\n val = os.environ.get(\n \"TORCHINDUCTOR_MATMUL_GEMM_AUTOTUNE_BENCHMARK_SPACE\", \"DEFAULT\"\n ).upper()\n if val in [\"DEFAULT\", \"EXHAUSTIVE\"]:\n return val # type: ignore[return-value]\n return \"DEFAULT\"\n # If we are using the model, the configs we're considering should be exhaustive\n return \"EXHAUSTIVE\"\n\n\n# Specify the size of the search space for GEMM autotuning.\n# DEFAULT - balance between compile time overhead and performance\n# EXHAUSTIVE - maximize performance\nmax_autotune_gemm_search_space: Literal[\"DEFAULT\", \"EXHAUSTIVE\"] = os.environ.get(\n \"TORCHINDUCTOR_MAX_AUTOTUNE_GEMM_SEARCH_SPACE\", \"DEFAULT\"\n).upper() # type: ignore[assignment]\nmax_autotune_gemm_search_space: Literal[\"DEFAULT\", \"EXHAUSTIVE\"] = (", + "repo_full_name": "pytorch/pytorch", + "discussion_comments": [ + { + "comment_id": "2165261508", + "repo_full_name": "pytorch/pytorch", + "pr_number": 156683, + "pr_file": "torch/_inductor/config.py", + "discussion_id": "2165261508", + "commented_code": "@@ -443,12 +443,61 @@ def prologue_fusion_enabled() -> bool:\n ).upper()\n \n \n+# Specify the size of the benchmarking space for GEMM autotuning with the neural network model.\n+# SAME - There should be no functional difference between this and max_autotune_gemm_search_space\n+# DEFAULT - Benchmark the same number of configs as max_autotune, but search over a larger space using the model\n+# - Use the top configs as predicted by the model.\n+def parse_matmul_gemm_autotune_benchmark_space() -> Union[\n+ int, Literal[\"SAME\", \"DEFAULT\"]\n+]:\n+ value = os.environ.get(\"TORCHINDUCTOR_MATMUL_GEMM_AUTOTUNE_BENCHMARK_SPACE\")\n+ if value is not None:\n+ # Try to parse as an integer first\n+ try:\n+ return int(value)\n+ except ValueError:\n+ if os.environ.get(\"TORCHINDUCTOR_FAST_AUTOTUNE\") == \"1\":\n+ return 1\n+ return \"DEFAULT\"\n+ # just keep the search space the same as\n+ return \"SAME\"\n+\n+\n+matmul_gemm_autotune_benchmark_space: Union[int, Literal[\"SAME\", \"DEFAULT\"]] = (\n+ parse_matmul_gemm_autotune_benchmark_space()\n+)\n+\n+\n+def parse_matmul_gemm_autotune_search_space() -> Literal[\"DEFAULT\", \"EXHAUSTIVE\"]:\n+ benchmarking_space = parse_matmul_gemm_autotune_benchmark_space()\n+ if benchmarking_space == \"SAME\":\n+ val = os.environ.get(\n+ \"TORCHINDUCTOR_MATMUL_GEMM_AUTOTUNE_BENCHMARK_SPACE\", \"DEFAULT\"\n+ ).upper()\n+ if val in [\"DEFAULT\", \"EXHAUSTIVE\"]:\n+ return val # type: ignore[return-value]\n+ return \"DEFAULT\"\n+ # If we are using the model, the configs we're considering should be exhaustive\n+ return \"EXHAUSTIVE\"\n+\n+\n # Specify the size of the search space for GEMM autotuning.\n # DEFAULT - balance between compile time overhead and performance\n # EXHAUSTIVE - maximize performance\n-max_autotune_gemm_search_space: Literal[\"DEFAULT\", \"EXHAUSTIVE\"] = os.environ.get(\n- \"TORCHINDUCTOR_MAX_AUTOTUNE_GEMM_SEARCH_SPACE\", \"DEFAULT\"\n-).upper() # type: ignore[assignment]\n+max_autotune_gemm_search_space: Literal[\"DEFAULT\", \"EXHAUSTIVE\"] = (", + "comment_created_at": "2025-06-25T01:34:57+00:00", + "comment_author": "coconutruben", + "comment_body": "I would why away from subjective names like fast for now and rather do more sterile things until we have a better idea about usability, topk, etc, but that's more of a nit, not a blocker", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2170703508", + "pr_number": 155886, + "pr_file": "torch/_dynamo/side_effects.py", + "created_at": "2025-06-27T03:52:41+00:00", + "commented_code": "# Only applicable if this graph is created from Dynamo tracing in Compiled Autograd.\n self.ca_final_callbacks_var = None\n\n # Tracks VariableTracker objects whose mutations can be skipped.\n # For normal mutated variables, Dynamo generates code to replay/reconstruct\n # the mutations after graph execution. However, variables in this set have\n # their mutations ignored - the mutations happen during\n # execution but don't need to be replayed in the generated code.\n # Used for temporary mutations in contexts like torch.func.functional_call,\n # where module parameters/buffers are modified but later restored.\n self.skip_mutation_variables = set()\n\n def skip_mutation(self, var):\n \"\"\"Mutations to this variable will be executed but not not tracked,\n typically used for temporary mutations that are later restored.\"\"\"\n self.skip_mutation_variables.add(var)\n\n def remove_skip_mutation(self, var):\n \"\"\"Remove a variable from the skip mutation set, restoring normal mutation tracking.\"\"\"\n if var in self.skip_mutation_variables:\n self.skip_mutation_variables.remove(var)", + "repo_full_name": "pytorch/pytorch", + "discussion_comments": [ + { + "comment_id": "2170703508", + "repo_full_name": "pytorch/pytorch", + "pr_number": 155886, + "pr_file": "torch/_dynamo/side_effects.py", + "discussion_id": "2170703508", + "commented_code": "@@ -124,6 +124,25 @@ def __init__(\n # Only applicable if this graph is created from Dynamo tracing in Compiled Autograd.\n self.ca_final_callbacks_var = None\n \n+ # Tracks VariableTracker objects whose mutations can be skipped.\n+ # For normal mutated variables, Dynamo generates code to replay/reconstruct\n+ # the mutations after graph execution. However, variables in this set have\n+ # their mutations ignored - the mutations happen during\n+ # execution but don't need to be replayed in the generated code.\n+ # Used for temporary mutations in contexts like torch.func.functional_call,\n+ # where module parameters/buffers are modified but later restored.\n+ self.skip_mutation_variables = set()\n+\n+ def skip_mutation(self, var):\n+ \"\"\"Mutations to this variable will be executed but not not tracked,\n+ typically used for temporary mutations that are later restored.\"\"\"\n+ self.skip_mutation_variables.add(var)\n+\n+ def remove_skip_mutation(self, var):\n+ \"\"\"Remove a variable from the skip mutation set, restoring normal mutation tracking.\"\"\"\n+ if var in self.skip_mutation_variables:\n+ self.skip_mutation_variables.remove(var)", + "comment_created_at": "2025-06-27T03:52:41+00:00", + "comment_author": "zou3519", + "comment_body": "Nit: some more descriptive names please. More words is fine.\r\n\r\nself.skip_mutation(var) -> self.ignore_mutations_on(var)\r\nself.remove_skip_mutation(var) -> self.stop_ignoring_mutations_on(var)\r\nself.skip_mutation_variables -> self.ignore_mutation_on_these_variables = set()", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/pytorch-meaningful-descriptive-identifiers.md b/_reviewers/pytorch-meaningful-descriptive-identifiers.md index b5bca03..445a623 100644 --- a/_reviewers/pytorch-meaningful-descriptive-identifiers.md +++ b/_reviewers/pytorch-meaningful-descriptive-identifiers.md @@ -30,207 +30,3 @@ Key guidelines: - Prefer objective, technical terms over subjective descriptors (e.g., `topk` rather than `fast`) When variable usage changes during implementation, rename accordingly to maintain semantic clarity. - - -[ - { - "discussion_id": "2116854356", - "pr_number": 154770, - "pr_file": ".github/actions/reuse-old-whl/reuse_old_whl.py", - "created_at": "2025-05-30T23:19:38+00:00", - "commented_code": "subprocess.check_output(\n [\"unzip\", \"-o\", new_path, \"-d\", f\"artifacts/dist/{new_path.stem}\"],\n )\n\n # Remove the old wheel (which is now a zip file)\n os.remove(new_path)", - "repo_full_name": "pytorch/pytorch", - "discussion_comments": [ - { - "comment_id": "2116854356", - "repo_full_name": "pytorch/pytorch", - "pr_number": 154770, - "pr_file": ".github/actions/reuse-old-whl/reuse_old_whl.py", - "discussion_id": "2116854356", - "commented_code": "@@ -190,6 +190,10 @@ def unzip_artifact_and_replace_files() -> None:\n subprocess.check_output(\n [\"unzip\", \"-o\", new_path, \"-d\", f\"artifacts/dist/{new_path.stem}\"],\n )\n+\n+ # Remove the old wheel (which is now a zip file)\n+ os.remove(new_path)", - "comment_created_at": "2025-05-30T23:19:38+00:00", - "comment_author": "ZainRizvi", - "comment_body": "minor nit: consider renaming `new_path` to `zip_path` for clarity", - "pr_file_module": null - }, - { - "comment_id": "2116888961", - "repo_full_name": "pytorch/pytorch", - "pr_number": 154770, - "pr_file": ".github/actions/reuse-old-whl/reuse_old_whl.py", - "discussion_id": "2116854356", - "commented_code": "@@ -190,6 +190,10 @@ def unzip_artifact_and_replace_files() -> None:\n subprocess.check_output(\n [\"unzip\", \"-o\", new_path, \"-d\", f\"artifacts/dist/{new_path.stem}\"],\n )\n+\n+ # Remove the old wheel (which is now a zip file)\n+ os.remove(new_path)", - "comment_created_at": "2025-05-31T00:11:20+00:00", - "comment_author": "clee2000", - "comment_body": "probably a later pr, new_path is used in a bunch of places that are unrelated to this specific change", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1976180681", - "pr_number": 147881, - "pr_file": "torch/export/dynamic_shapes.py", - "created_at": "2025-03-01T00:45:12+00:00", - "commented_code": "- AUTO means automatic inference of shape (static or dynamic).\n - STATIC means static shape (always specialized).\n - DYNAMIC means dynamic, will error out if specialized.\n - _OBLIVIOUS allocates an backed symbol with size-oblivious semantics.", - "repo_full_name": "pytorch/pytorch", - "discussion_comments": [ - { - "comment_id": "1976180681", - "repo_full_name": "pytorch/pytorch", - "pr_number": 147881, - "pr_file": "torch/export/dynamic_shapes.py", - "discussion_id": "1976180681", - "commented_code": "@@ -46,11 +46,13 @@ class _DimHint(Enum):\n - AUTO means automatic inference of shape (static or dynamic).\n - STATIC means static shape (always specialized).\n - DYNAMIC means dynamic, will error out if specialized.\n+ - _OBLIVIOUS allocates an backed symbol with size-oblivious semantics.", - "comment_created_at": "2025-03-01T00:45:12+00:00", - "comment_author": "avikchaudhuri", - "comment_body": "It's probably good to pick a name that is more meaningful here. I guess the `_` means you're not committing to the name lol.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2071453448", - "pr_number": 151780, - "pr_file": "torch/_inductor/config.py", - "created_at": "2025-05-02T11:03:55+00:00", - "commented_code": "# decomposed into 7x4x2 thread blocks along MxNxK of a GEMM.\n gemm_thread_factors = os.environ.get(\"TORCHINDUCTOR_CPP_GEMM_THREAD_FACTORS\", None)\n\n # Set GEMM Transverse strategy: support VERTICAL, HORIZONTAL\n # If both are enabled, the strategy will be selected based on the heuristic.\n cpp_gemm_transverse_strategy = os.environ.get(", - "repo_full_name": "pytorch/pytorch", - "discussion_comments": [ - { - "comment_id": "2071453448", - "repo_full_name": "pytorch/pytorch", - "pr_number": 151780, - "pr_file": "torch/_inductor/config.py", - "discussion_id": "2071453448", - "commented_code": "@@ -997,6 +997,12 @@ class cpp:\n # decomposed into 7x4x2 thread blocks along MxNxK of a GEMM.\n gemm_thread_factors = os.environ.get(\"TORCHINDUCTOR_CPP_GEMM_THREAD_FACTORS\", None)\n \n+ # Set GEMM Transverse strategy: support VERTICAL, HORIZONTAL\n+ # If both are enabled, the strategy will be selected based on the heuristic.\n+ cpp_gemm_transverse_strategy = os.environ.get(", - "comment_created_at": "2025-05-02T11:03:55+00:00", - "comment_author": "jgong5", - "comment_body": " nit: you don't have to add \"cpp_\" prefix to the configuration name since it is already inside the `cpp` class.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2159742806", - "pr_number": 156312, - "pr_file": "torch/_functorch/eager_transforms.py", - "created_at": "2025-06-20T22:52:41+00:00", - "commented_code": "... c.add_(1)\n ... return b\n ...\n >>> inpt = torch.randn(2)\n >>> input = torch.randn(2)", - "repo_full_name": "pytorch/pytorch", - "discussion_comments": [ - { - "comment_id": "2159742806", - "repo_full_name": "pytorch/pytorch", - "pr_number": 156312, - "pr_file": "torch/_functorch/eager_transforms.py", - "discussion_id": "2159742806", - "commented_code": "@@ -1516,18 +1516,18 @@ def functionalize(func: Callable, *, remove: str = \"mutations\") -> Callable:\n ... c.add_(1)\n ... return b\n ...\n- >>> inpt = torch.randn(2)\n+ >>> input = torch.randn(2)", - "comment_created_at": "2025-06-20T22:52:41+00:00", - "comment_author": "albanD", - "comment_body": "This is done on purpose as \"input\" is a reserved keyword in python", - "pr_file_module": null - }, - { - "comment_id": "2161678229", - "repo_full_name": "pytorch/pytorch", - "pr_number": 156312, - "pr_file": "torch/_functorch/eager_transforms.py", - "discussion_id": "2159742806", - "commented_code": "@@ -1516,18 +1516,18 @@ def functionalize(func: Callable, *, remove: str = \"mutations\") -> Callable:\n ... c.add_(1)\n ... return b\n ...\n- >>> inpt = torch.randn(2)\n+ >>> input = torch.randn(2)", - "comment_created_at": "2025-06-23T13:45:01+00:00", - "comment_author": "XuehaiPan", - "comment_body": "Changed to `input_` as a common practice.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2161539521", - "pr_number": 156312, - "pr_file": "torch/_functorch/_aot_autograd/runtime_wrappers.py", - "created_at": "2025-06-23T12:46:40+00:00", - "commented_code": "storage_ref_to_idx: dict[StorageWeakRef, list[int]] = collections.defaultdict(list)\n base_args = []\n other_args = []\n for i, inpt in enumerate(fwd_inputs):\n if isinstance(inpt, Tensor):\n storage_ref = StorageWeakRef(inpt.untyped_storage())\n for i, input in enumerate(fwd_inputs):", - "repo_full_name": "pytorch/pytorch", - "discussion_comments": [ - { - "comment_id": "2161539521", - "repo_full_name": "pytorch/pytorch", - "pr_number": 156312, - "pr_file": "torch/_functorch/_aot_autograd/runtime_wrappers.py", - "discussion_id": "2161539521", - "commented_code": "@@ -1344,12 +1344,12 @@ def _same_dtype_views(view1, view2):\n storage_ref_to_idx: dict[StorageWeakRef, list[int]] = collections.defaultdict(list)\n base_args = []\n other_args = []\n- for i, inpt in enumerate(fwd_inputs):\n- if isinstance(inpt, Tensor):\n- storage_ref = StorageWeakRef(inpt.untyped_storage())\n+ for i, input in enumerate(fwd_inputs):", - "comment_created_at": "2025-06-23T12:46:40+00:00", - "comment_author": "albanD", - "comment_body": "These are not really ok either. We shouldn't override the builtin input wherever possible", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2131477194", - "pr_number": 137846, - "pr_file": "torch/utils/collect_env.py", - "created_at": "2025-06-06T04:32:11+00:00", - "commented_code": "return smi\n\n\ndef detect_linux_pkg_manager():", - "repo_full_name": "pytorch/pytorch", - "discussion_comments": [ - { - "comment_id": "2131477194", - "repo_full_name": "pytorch/pytorch", - "pr_number": 137846, - "pr_file": "torch/utils/collect_env.py", - "discussion_id": "2131477194", - "commented_code": "@@ -243,6 +269,149 @@ def get_nvidia_smi():\n return smi\n \n \n+def detect_linux_pkg_manager():", - "comment_created_at": "2025-06-06T04:32:11+00:00", - "comment_author": "malfet", - "comment_body": "```suggestion\r\ndef _detect_linux_pkg_manager():\r\n```", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2165261508", - "pr_number": 156683, - "pr_file": "torch/_inductor/config.py", - "created_at": "2025-06-25T01:34:57+00:00", - "commented_code": ").upper()\n\n\n# Specify the size of the benchmarking space for GEMM autotuning with the neural network model.\n# SAME - There should be no functional difference between this and max_autotune_gemm_search_space\n# DEFAULT - Benchmark the same number of configs as max_autotune, but search over a larger space using the model\n# - Use the top configs as predicted by the model.\ndef parse_matmul_gemm_autotune_benchmark_space() -> Union[\n int, Literal[\"SAME\", \"DEFAULT\"]\n]:\n value = os.environ.get(\"TORCHINDUCTOR_MATMUL_GEMM_AUTOTUNE_BENCHMARK_SPACE\")\n if value is not None:\n # Try to parse as an integer first\n try:\n return int(value)\n except ValueError:\n if os.environ.get(\"TORCHINDUCTOR_FAST_AUTOTUNE\") == \"1\":\n return 1\n return \"DEFAULT\"\n # just keep the search space the same as\n return \"SAME\"\n\n\nmatmul_gemm_autotune_benchmark_space: Union[int, Literal[\"SAME\", \"DEFAULT\"]] = (\n parse_matmul_gemm_autotune_benchmark_space()\n)\n\n\ndef parse_matmul_gemm_autotune_search_space() -> Literal[\"DEFAULT\", \"EXHAUSTIVE\"]:\n benchmarking_space = parse_matmul_gemm_autotune_benchmark_space()\n if benchmarking_space == \"SAME\":\n val = os.environ.get(\n \"TORCHINDUCTOR_MATMUL_GEMM_AUTOTUNE_BENCHMARK_SPACE\", \"DEFAULT\"\n ).upper()\n if val in [\"DEFAULT\", \"EXHAUSTIVE\"]:\n return val # type: ignore[return-value]\n return \"DEFAULT\"\n # If we are using the model, the configs we're considering should be exhaustive\n return \"EXHAUSTIVE\"\n\n\n# Specify the size of the search space for GEMM autotuning.\n# DEFAULT - balance between compile time overhead and performance\n# EXHAUSTIVE - maximize performance\nmax_autotune_gemm_search_space: Literal[\"DEFAULT\", \"EXHAUSTIVE\"] = os.environ.get(\n \"TORCHINDUCTOR_MAX_AUTOTUNE_GEMM_SEARCH_SPACE\", \"DEFAULT\"\n).upper() # type: ignore[assignment]\nmax_autotune_gemm_search_space: Literal[\"DEFAULT\", \"EXHAUSTIVE\"] = (", - "repo_full_name": "pytorch/pytorch", - "discussion_comments": [ - { - "comment_id": "2165261508", - "repo_full_name": "pytorch/pytorch", - "pr_number": 156683, - "pr_file": "torch/_inductor/config.py", - "discussion_id": "2165261508", - "commented_code": "@@ -443,12 +443,61 @@ def prologue_fusion_enabled() -> bool:\n ).upper()\n \n \n+# Specify the size of the benchmarking space for GEMM autotuning with the neural network model.\n+# SAME - There should be no functional difference between this and max_autotune_gemm_search_space\n+# DEFAULT - Benchmark the same number of configs as max_autotune, but search over a larger space using the model\n+# - Use the top configs as predicted by the model.\n+def parse_matmul_gemm_autotune_benchmark_space() -> Union[\n+ int, Literal[\"SAME\", \"DEFAULT\"]\n+]:\n+ value = os.environ.get(\"TORCHINDUCTOR_MATMUL_GEMM_AUTOTUNE_BENCHMARK_SPACE\")\n+ if value is not None:\n+ # Try to parse as an integer first\n+ try:\n+ return int(value)\n+ except ValueError:\n+ if os.environ.get(\"TORCHINDUCTOR_FAST_AUTOTUNE\") == \"1\":\n+ return 1\n+ return \"DEFAULT\"\n+ # just keep the search space the same as\n+ return \"SAME\"\n+\n+\n+matmul_gemm_autotune_benchmark_space: Union[int, Literal[\"SAME\", \"DEFAULT\"]] = (\n+ parse_matmul_gemm_autotune_benchmark_space()\n+)\n+\n+\n+def parse_matmul_gemm_autotune_search_space() -> Literal[\"DEFAULT\", \"EXHAUSTIVE\"]:\n+ benchmarking_space = parse_matmul_gemm_autotune_benchmark_space()\n+ if benchmarking_space == \"SAME\":\n+ val = os.environ.get(\n+ \"TORCHINDUCTOR_MATMUL_GEMM_AUTOTUNE_BENCHMARK_SPACE\", \"DEFAULT\"\n+ ).upper()\n+ if val in [\"DEFAULT\", \"EXHAUSTIVE\"]:\n+ return val # type: ignore[return-value]\n+ return \"DEFAULT\"\n+ # If we are using the model, the configs we're considering should be exhaustive\n+ return \"EXHAUSTIVE\"\n+\n+\n # Specify the size of the search space for GEMM autotuning.\n # DEFAULT - balance between compile time overhead and performance\n # EXHAUSTIVE - maximize performance\n-max_autotune_gemm_search_space: Literal[\"DEFAULT\", \"EXHAUSTIVE\"] = os.environ.get(\n- \"TORCHINDUCTOR_MAX_AUTOTUNE_GEMM_SEARCH_SPACE\", \"DEFAULT\"\n-).upper() # type: ignore[assignment]\n+max_autotune_gemm_search_space: Literal[\"DEFAULT\", \"EXHAUSTIVE\"] = (", - "comment_created_at": "2025-06-25T01:34:57+00:00", - "comment_author": "coconutruben", - "comment_body": "I would why away from subjective names like fast for now and rather do more sterile things until we have a better idea about usability, topk, etc, but that's more of a nit, not a blocker", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2170703508", - "pr_number": 155886, - "pr_file": "torch/_dynamo/side_effects.py", - "created_at": "2025-06-27T03:52:41+00:00", - "commented_code": "# Only applicable if this graph is created from Dynamo tracing in Compiled Autograd.\n self.ca_final_callbacks_var = None\n\n # Tracks VariableTracker objects whose mutations can be skipped.\n # For normal mutated variables, Dynamo generates code to replay/reconstruct\n # the mutations after graph execution. However, variables in this set have\n # their mutations ignored - the mutations happen during\n # execution but don't need to be replayed in the generated code.\n # Used for temporary mutations in contexts like torch.func.functional_call,\n # where module parameters/buffers are modified but later restored.\n self.skip_mutation_variables = set()\n\n def skip_mutation(self, var):\n \"\"\"Mutations to this variable will be executed but not not tracked,\n typically used for temporary mutations that are later restored.\"\"\"\n self.skip_mutation_variables.add(var)\n\n def remove_skip_mutation(self, var):\n \"\"\"Remove a variable from the skip mutation set, restoring normal mutation tracking.\"\"\"\n if var in self.skip_mutation_variables:\n self.skip_mutation_variables.remove(var)", - "repo_full_name": "pytorch/pytorch", - "discussion_comments": [ - { - "comment_id": "2170703508", - "repo_full_name": "pytorch/pytorch", - "pr_number": 155886, - "pr_file": "torch/_dynamo/side_effects.py", - "discussion_id": "2170703508", - "commented_code": "@@ -124,6 +124,25 @@ def __init__(\n # Only applicable if this graph is created from Dynamo tracing in Compiled Autograd.\n self.ca_final_callbacks_var = None\n \n+ # Tracks VariableTracker objects whose mutations can be skipped.\n+ # For normal mutated variables, Dynamo generates code to replay/reconstruct\n+ # the mutations after graph execution. However, variables in this set have\n+ # their mutations ignored - the mutations happen during\n+ # execution but don't need to be replayed in the generated code.\n+ # Used for temporary mutations in contexts like torch.func.functional_call,\n+ # where module parameters/buffers are modified but later restored.\n+ self.skip_mutation_variables = set()\n+\n+ def skip_mutation(self, var):\n+ \"\"\"Mutations to this variable will be executed but not not tracked,\n+ typically used for temporary mutations that are later restored.\"\"\"\n+ self.skip_mutation_variables.add(var)\n+\n+ def remove_skip_mutation(self, var):\n+ \"\"\"Remove a variable from the skip mutation set, restoring normal mutation tracking.\"\"\"\n+ if var in self.skip_mutation_variables:\n+ self.skip_mutation_variables.remove(var)", - "comment_created_at": "2025-06-27T03:52:41+00:00", - "comment_author": "zou3519", - "comment_body": "Nit: some more descriptive names please. More words is fine.\r\n\r\nself.skip_mutation(var) -> self.ignore_mutations_on(var)\r\nself.remove_skip_mutation(var) -> self.stop_ignoring_mutations_on(var)\r\nself.skip_mutation_variables -> self.ignore_mutation_on_these_variables = set()", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/pytorch-optimize-device-transfers.json b/_reviewers/pytorch-optimize-device-transfers.json new file mode 100644 index 0000000..e7a7bde --- /dev/null +++ b/_reviewers/pytorch-optimize-device-transfers.json @@ -0,0 +1,172 @@ +[ + { + "discussion_id": "2106417227", + "pr_number": 154202, + "pr_file": "aten/src/ATen/native/mkldnn/xpu/Conv.cpp", + "created_at": "2025-05-26T02:31:41+00:00", + "commented_code": "return std::tuple{grad_input, grad_weight, grad_bias};\n}\n\nTensor convolution_pointwise(\n const Tensor& input_t,\n const Tensor& weight_t,\n const std::optional& bias_opt,", + "repo_full_name": "pytorch/pytorch", + "discussion_comments": [ + { + "comment_id": "2106417227", + "repo_full_name": "pytorch/pytorch", + "pr_number": 154202, + "pr_file": "aten/src/ATen/native/mkldnn/xpu/Conv.cpp", + "discussion_id": "2106417227", + "commented_code": "@@ -751,11 +681,134 @@ std::tuple convolution_backward_overrideable(\n return std::tuple{grad_input, grad_weight, grad_bias};\n }\n \n+Tensor convolution_pointwise(\n+ const Tensor& input_t,\n+ const Tensor& weight_t,\n+ const std::optional& bias_opt,", + "comment_created_at": "2025-05-26T02:31:41+00:00", + "comment_author": "EikanWang", + "comment_body": "Pls. add device guard for input tensors and take bias_opt into account.", + "pr_file_module": null + }, + { + "comment_id": "2106656163", + "repo_full_name": "pytorch/pytorch", + "pr_number": 154202, + "pr_file": "aten/src/ATen/native/mkldnn/xpu/Conv.cpp", + "discussion_id": "2106417227", + "commented_code": "@@ -751,11 +681,134 @@ std::tuple convolution_backward_overrideable(\n return std::tuple{grad_input, grad_weight, grad_bias};\n }\n \n+Tensor convolution_pointwise(\n+ const Tensor& input_t,\n+ const Tensor& weight_t,\n+ const std::optional& bias_opt,", + "comment_created_at": "2025-05-26T06:38:47+00:00", + "comment_author": "ZhiweiYan-96", + "comment_body": "all functions added device guard. \r\n\r\n>>> take bias_opt into account.\r\n\r\nThe device guard should only needs operates on 1 single input, which I refer to the generated files. The reason might be that we only need to change runtime context once.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2155857288", + "pr_number": 156384, + "pr_file": "aten/src/ATen/native/IndexingUtils.h", + "created_at": "2025-06-19T01:50:25+00:00", + "commented_code": "}\n // Replace with nonzeros\n auto nonzero = index.nonzero();\n if (ensure_same_device && nonzero.device() != self.device()) {\n bool non_blocking = nonzero.is_cpu() && self.device().is_cuda();", + "repo_full_name": "pytorch/pytorch", + "discussion_comments": [ + { + "comment_id": "2155857288", + "repo_full_name": "pytorch/pytorch", + "pr_number": 156384, + "pr_file": "aten/src/ATen/native/IndexingUtils.h", + "discussion_id": "2155857288", + "commented_code": "@@ -39,9 +40,15 @@ static void invalid_mask(const Tensor & self, int64_t idx, const Tensor & mask,\n }\n // Replace with nonzeros\n auto nonzero = index.nonzero();\n+ if (ensure_same_device && nonzero.device() != self.device()) {\n+ bool non_blocking = nonzero.is_cpu() && self.device().is_cuda();", + "comment_created_at": "2025-06-19T01:50:25+00:00", + "comment_author": "ngimel", + "comment_body": "this is not enough to make a copy non-blocking, you also need to copy nonzero to pinned memory", + "pr_file_module": null + }, + { + "comment_id": "2156606565", + "repo_full_name": "pytorch/pytorch", + "pr_number": 156384, + "pr_file": "aten/src/ATen/native/IndexingUtils.h", + "discussion_id": "2155857288", + "commented_code": "@@ -39,9 +40,15 @@ static void invalid_mask(const Tensor & self, int64_t idx, const Tensor & mask,\n }\n // Replace with nonzeros\n auto nonzero = index.nonzero();\n+ if (ensure_same_device && nonzero.device() != self.device()) {\n+ bool non_blocking = nonzero.is_cpu() && self.device().is_cuda();", + "comment_created_at": "2025-06-19T09:50:53+00:00", + "comment_author": "lgeiger", + "comment_body": "Thanks for the fast review. I verified in the pytorch profile that `cudaStreamSynchronize` is not called with this code and `test_sync_warning` also checks that.\r\n\r\nHow would I create a `nonzero` in pinned memory? Can I just create an zero sized empty tensor in pinned memory and use that as the output parameter or is that problematic since we don't know the tensorsize beforehand?\r\n\r\nI'm just asking because I believe simply calling [`nonzero.pin_memory()` is not recommended ](https://docs.pytorch.org/tutorials/intermediate/pinmem_nonblock.html#what-you-will-learn) or is this different here?", + "pr_file_module": null + }, + { + "comment_id": "2160697950", + "repo_full_name": "pytorch/pytorch", + "pr_number": 156384, + "pr_file": "aten/src/ATen/native/IndexingUtils.h", + "discussion_id": "2155857288", + "commented_code": "@@ -39,9 +40,15 @@ static void invalid_mask(const Tensor & self, int64_t idx, const Tensor & mask,\n }\n // Replace with nonzeros\n auto nonzero = index.nonzero();\n+ if (ensure_same_device && nonzero.device() != self.device()) {\n+ bool non_blocking = nonzero.is_cpu() && self.device().is_cuda();", + "comment_created_at": "2025-06-23T04:35:27+00:00", + "comment_author": "ngimel", + "comment_body": "You could allocate zero-element pinned tensor and send it as `out` arg to `nonzero`", + "pr_file_module": null + }, + { + "comment_id": "2161316931", + "repo_full_name": "pytorch/pytorch", + "pr_number": 156384, + "pr_file": "aten/src/ATen/native/IndexingUtils.h", + "discussion_id": "2155857288", + "commented_code": "@@ -39,9 +40,15 @@ static void invalid_mask(const Tensor & self, int64_t idx, const Tensor & mask,\n }\n // Replace with nonzeros\n auto nonzero = index.nonzero();\n+ if (ensure_same_device && nonzero.device() != self.device()) {\n+ bool non_blocking = nonzero.is_cpu() && self.device().is_cuda();", + "comment_created_at": "2025-06-23T10:53:24+00:00", + "comment_author": "lgeiger", + "comment_body": "@ngimel Done in 0b7acc263f79a11c7f36216b0a620f24969220e8", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2088107795", + "pr_number": 150218, + "pr_file": "aten/src/ATen/DLConvertor.cpp", + "created_at": "2025-05-14T06:03:19+00:00", + "commented_code": "return fromDLPackImpl(src, std::move(deleter));\n}\n\nTensor maybeCopyTensor(\n const Tensor& data,\n std::optional optional_dl_device,\n std::optional copy) {\n bool force_copy = copy.has_value() && *copy;\n bool force_move = copy.has_value() && !*copy;\n\n if (optional_dl_device.has_value()) {\n auto device = at::getATenDevice(\n optional_dl_device->device_type,\n static_cast(optional_dl_device->device_id));\n\n if (device != data.device()) {\n TORCH_CHECK(\n !force_move,\n \"cannot move tensor from \",\n data.device(),\n \" to \",\n device,\n \" without copying. Set copy=True is needed.\");", + "repo_full_name": "pytorch/pytorch", + "discussion_comments": [ + { + "comment_id": "2088107795", + "repo_full_name": "pytorch/pytorch", + "pr_number": 150218, + "pr_file": "aten/src/ATen/DLConvertor.cpp", + "discussion_id": "2088107795", + "commented_code": "@@ -388,4 +389,35 @@ Tensor fromDLPackVersioned(DLManagedTensorVersioned* src, std::function(src, std::move(deleter));\n }\n \n+Tensor maybeCopyTensor(\n+ const Tensor& data,\n+ std::optional optional_dl_device,\n+ std::optional copy) {\n+ bool force_copy = copy.has_value() && *copy;\n+ bool force_move = copy.has_value() && !*copy;\n+\n+ if (optional_dl_device.has_value()) {\n+ auto device = at::getATenDevice(\n+ optional_dl_device->device_type,\n+ static_cast(optional_dl_device->device_id));\n+\n+ if (device != data.device()) {\n+ TORCH_CHECK(\n+ !force_move,\n+ \"cannot move tensor from \",\n+ data.device(),\n+ \" to \",\n+ device,\n+ \" without copying. Set copy=True is needed.\");", + "comment_created_at": "2025-05-14T06:03:19+00:00", + "comment_author": "msaroufim", + "comment_body": "just double checking this check would make it clear that users would need to set the copy flag in\r\n\r\n`torch.from_dlpack(..., copy=copy)`", + "pr_file_module": null + }, + { + "comment_id": "2105836795", + "repo_full_name": "pytorch/pytorch", + "pr_number": 150218, + "pr_file": "aten/src/ATen/DLConvertor.cpp", + "discussion_id": "2088107795", + "commented_code": "@@ -388,4 +389,35 @@ Tensor fromDLPackVersioned(DLManagedTensorVersioned* src, std::function(src, std::move(deleter));\n }\n \n+Tensor maybeCopyTensor(\n+ const Tensor& data,\n+ std::optional optional_dl_device,\n+ std::optional copy) {\n+ bool force_copy = copy.has_value() && *copy;\n+ bool force_move = copy.has_value() && !*copy;\n+\n+ if (optional_dl_device.has_value()) {\n+ auto device = at::getATenDevice(\n+ optional_dl_device->device_type,\n+ static_cast(optional_dl_device->device_id));\n+\n+ if (device != data.device()) {\n+ TORCH_CHECK(\n+ !force_move,\n+ \"cannot move tensor from \",\n+ data.device(),\n+ \" to \",\n+ device,\n+ \" without copying. Set copy=True is needed.\");", + "comment_created_at": "2025-05-24T14:18:41+00:00", + "comment_author": "ysiraichi", + "comment_body": "Ah, no. Not specifying `copy` should also work, here. Will fix.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2089796566", + "pr_number": 150218, + "pr_file": "aten/src/ATen/DLConvertor.cpp", + "created_at": "2025-05-14T21:42:05+00:00", + "commented_code": "return fromDLPackImpl(src, std::move(deleter));\n}\n\nTensor maybeCopyTensor(\n const Tensor& data,\n std::optional optional_dl_device,\n std::optional copy) {\n bool force_copy = copy.has_value() && *copy;\n bool force_move = copy.has_value() && !*copy;\n\n if (optional_dl_device.has_value()) {\n auto device = at::getATenDevice(\n optional_dl_device->device_type,\n static_cast(optional_dl_device->device_id));\n\n if (device != data.device()) {\n TORCH_CHECK(\n !force_move,\n \"cannot move tensor from \",\n data.device(),\n \" to \",\n device,\n \" without copying. Set copy=True is needed.\");", + "repo_full_name": "pytorch/pytorch", + "discussion_comments": [ + { + "comment_id": "2089796566", + "repo_full_name": "pytorch/pytorch", + "pr_number": 150218, + "pr_file": "aten/src/ATen/DLConvertor.cpp", + "discussion_id": "2089796566", + "commented_code": "@@ -388,4 +389,35 @@ Tensor fromDLPackVersioned(DLManagedTensorVersioned* src, std::function(src, std::move(deleter));\n }\n \n+Tensor maybeCopyTensor(\n+ const Tensor& data,\n+ std::optional optional_dl_device,\n+ std::optional copy) {\n+ bool force_copy = copy.has_value() && *copy;\n+ bool force_move = copy.has_value() && !*copy;\n+\n+ if (optional_dl_device.has_value()) {\n+ auto device = at::getATenDevice(\n+ optional_dl_device->device_type,\n+ static_cast(optional_dl_device->device_id));\n+\n+ if (device != data.device()) {\n+ TORCH_CHECK(\n+ !force_move,\n+ \"cannot move tensor from \",\n+ data.device(),\n+ \" to \",\n+ device,\n+ \" without copying. Set copy=True is needed.\");", + "comment_created_at": "2025-05-14T21:42:05+00:00", + "comment_author": "albanD", + "comment_body": "copy=None would also work. It's more that you cannot set copy=False in this case", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2089797023", + "pr_number": 150218, + "pr_file": "aten/src/ATen/DLConvertor.cpp", + "created_at": "2025-05-14T21:42:36+00:00", + "commented_code": "return fromDLPackImpl(src, std::move(deleter));\n}\n\nTensor maybeCopyTensor(\n const Tensor& data,\n std::optional optional_dl_device,\n std::optional copy) {\n bool force_copy = copy.has_value() && *copy;\n bool force_move = copy.has_value() && !*copy;\n\n if (optional_dl_device.has_value()) {\n auto device = at::getATenDevice(\n optional_dl_device->device_type,\n static_cast(optional_dl_device->device_id));\n\n if (device != data.device()) {\n TORCH_CHECK(\n !force_move,\n \"cannot move tensor from \",\n data.device(),\n \" to \",\n device,\n \" without copying. Set copy=True is needed.\");\n return data.to(device);\n }\n }\n\n if (force_copy) {\n return data.detach().clone();", + "repo_full_name": "pytorch/pytorch", + "discussion_comments": [ + { + "comment_id": "2089797023", + "repo_full_name": "pytorch/pytorch", + "pr_number": 150218, + "pr_file": "aten/src/ATen/DLConvertor.cpp", + "discussion_id": "2089797023", + "commented_code": "@@ -388,4 +389,35 @@ Tensor fromDLPackVersioned(DLManagedTensorVersioned* src, std::function(src, std::move(deleter));\n }\n \n+Tensor maybeCopyTensor(\n+ const Tensor& data,\n+ std::optional optional_dl_device,\n+ std::optional copy) {\n+ bool force_copy = copy.has_value() && *copy;\n+ bool force_move = copy.has_value() && !*copy;\n+\n+ if (optional_dl_device.has_value()) {\n+ auto device = at::getATenDevice(\n+ optional_dl_device->device_type,\n+ static_cast(optional_dl_device->device_id));\n+\n+ if (device != data.device()) {\n+ TORCH_CHECK(\n+ !force_move,\n+ \"cannot move tensor from \",\n+ data.device(),\n+ \" to \",\n+ device,\n+ \" without copying. Set copy=True is needed.\");\n+ return data.to(device);\n+ }\n+ }\n+\n+ if (force_copy) {\n+ return data.detach().clone();", + "comment_created_at": "2025-05-14T21:42:36+00:00", + "comment_author": "albanD", + "comment_body": "There should be no autograd here already right? So detach() is not needed", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/pytorch-optimize-device-transfers.md b/_reviewers/pytorch-optimize-device-transfers.md index a987eeb..20640d4 100644 --- a/_reviewers/pytorch-optimize-device-transfers.md +++ b/_reviewers/pytorch-optimize-device-transfers.md @@ -41,177 +41,3 @@ TORCH_CHECK( ``` These optimizations significantly improve performance by reducing synchronization overhead between CPU and GPU operations. - - -[ - { - "discussion_id": "2106417227", - "pr_number": 154202, - "pr_file": "aten/src/ATen/native/mkldnn/xpu/Conv.cpp", - "created_at": "2025-05-26T02:31:41+00:00", - "commented_code": "return std::tuple{grad_input, grad_weight, grad_bias};\n}\n\nTensor convolution_pointwise(\n const Tensor& input_t,\n const Tensor& weight_t,\n const std::optional& bias_opt,", - "repo_full_name": "pytorch/pytorch", - "discussion_comments": [ - { - "comment_id": "2106417227", - "repo_full_name": "pytorch/pytorch", - "pr_number": 154202, - "pr_file": "aten/src/ATen/native/mkldnn/xpu/Conv.cpp", - "discussion_id": "2106417227", - "commented_code": "@@ -751,11 +681,134 @@ std::tuple convolution_backward_overrideable(\n return std::tuple{grad_input, grad_weight, grad_bias};\n }\n \n+Tensor convolution_pointwise(\n+ const Tensor& input_t,\n+ const Tensor& weight_t,\n+ const std::optional& bias_opt,", - "comment_created_at": "2025-05-26T02:31:41+00:00", - "comment_author": "EikanWang", - "comment_body": "Pls. add device guard for input tensors and take bias_opt into account.", - "pr_file_module": null - }, - { - "comment_id": "2106656163", - "repo_full_name": "pytorch/pytorch", - "pr_number": 154202, - "pr_file": "aten/src/ATen/native/mkldnn/xpu/Conv.cpp", - "discussion_id": "2106417227", - "commented_code": "@@ -751,11 +681,134 @@ std::tuple convolution_backward_overrideable(\n return std::tuple{grad_input, grad_weight, grad_bias};\n }\n \n+Tensor convolution_pointwise(\n+ const Tensor& input_t,\n+ const Tensor& weight_t,\n+ const std::optional& bias_opt,", - "comment_created_at": "2025-05-26T06:38:47+00:00", - "comment_author": "ZhiweiYan-96", - "comment_body": "all functions added device guard. \r\n\r\n>>> take bias_opt into account.\r\n\r\nThe device guard should only needs operates on 1 single input, which I refer to the generated files. The reason might be that we only need to change runtime context once.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2155857288", - "pr_number": 156384, - "pr_file": "aten/src/ATen/native/IndexingUtils.h", - "created_at": "2025-06-19T01:50:25+00:00", - "commented_code": "}\n // Replace with nonzeros\n auto nonzero = index.nonzero();\n if (ensure_same_device && nonzero.device() != self.device()) {\n bool non_blocking = nonzero.is_cpu() && self.device().is_cuda();", - "repo_full_name": "pytorch/pytorch", - "discussion_comments": [ - { - "comment_id": "2155857288", - "repo_full_name": "pytorch/pytorch", - "pr_number": 156384, - "pr_file": "aten/src/ATen/native/IndexingUtils.h", - "discussion_id": "2155857288", - "commented_code": "@@ -39,9 +40,15 @@ static void invalid_mask(const Tensor & self, int64_t idx, const Tensor & mask,\n }\n // Replace with nonzeros\n auto nonzero = index.nonzero();\n+ if (ensure_same_device && nonzero.device() != self.device()) {\n+ bool non_blocking = nonzero.is_cpu() && self.device().is_cuda();", - "comment_created_at": "2025-06-19T01:50:25+00:00", - "comment_author": "ngimel", - "comment_body": "this is not enough to make a copy non-blocking, you also need to copy nonzero to pinned memory", - "pr_file_module": null - }, - { - "comment_id": "2156606565", - "repo_full_name": "pytorch/pytorch", - "pr_number": 156384, - "pr_file": "aten/src/ATen/native/IndexingUtils.h", - "discussion_id": "2155857288", - "commented_code": "@@ -39,9 +40,15 @@ static void invalid_mask(const Tensor & self, int64_t idx, const Tensor & mask,\n }\n // Replace with nonzeros\n auto nonzero = index.nonzero();\n+ if (ensure_same_device && nonzero.device() != self.device()) {\n+ bool non_blocking = nonzero.is_cpu() && self.device().is_cuda();", - "comment_created_at": "2025-06-19T09:50:53+00:00", - "comment_author": "lgeiger", - "comment_body": "Thanks for the fast review. I verified in the pytorch profile that `cudaStreamSynchronize` is not called with this code and `test_sync_warning` also checks that.\r\n\r\nHow would I create a `nonzero` in pinned memory? Can I just create an zero sized empty tensor in pinned memory and use that as the output parameter or is that problematic since we don't know the tensorsize beforehand?\r\n\r\nI'm just asking because I believe simply calling [`nonzero.pin_memory()` is not recommended ](https://docs.pytorch.org/tutorials/intermediate/pinmem_nonblock.html#what-you-will-learn) or is this different here?", - "pr_file_module": null - }, - { - "comment_id": "2160697950", - "repo_full_name": "pytorch/pytorch", - "pr_number": 156384, - "pr_file": "aten/src/ATen/native/IndexingUtils.h", - "discussion_id": "2155857288", - "commented_code": "@@ -39,9 +40,15 @@ static void invalid_mask(const Tensor & self, int64_t idx, const Tensor & mask,\n }\n // Replace with nonzeros\n auto nonzero = index.nonzero();\n+ if (ensure_same_device && nonzero.device() != self.device()) {\n+ bool non_blocking = nonzero.is_cpu() && self.device().is_cuda();", - "comment_created_at": "2025-06-23T04:35:27+00:00", - "comment_author": "ngimel", - "comment_body": "You could allocate zero-element pinned tensor and send it as `out` arg to `nonzero`", - "pr_file_module": null - }, - { - "comment_id": "2161316931", - "repo_full_name": "pytorch/pytorch", - "pr_number": 156384, - "pr_file": "aten/src/ATen/native/IndexingUtils.h", - "discussion_id": "2155857288", - "commented_code": "@@ -39,9 +40,15 @@ static void invalid_mask(const Tensor & self, int64_t idx, const Tensor & mask,\n }\n // Replace with nonzeros\n auto nonzero = index.nonzero();\n+ if (ensure_same_device && nonzero.device() != self.device()) {\n+ bool non_blocking = nonzero.is_cpu() && self.device().is_cuda();", - "comment_created_at": "2025-06-23T10:53:24+00:00", - "comment_author": "lgeiger", - "comment_body": "@ngimel Done in 0b7acc263f79a11c7f36216b0a620f24969220e8", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2088107795", - "pr_number": 150218, - "pr_file": "aten/src/ATen/DLConvertor.cpp", - "created_at": "2025-05-14T06:03:19+00:00", - "commented_code": "return fromDLPackImpl(src, std::move(deleter));\n}\n\nTensor maybeCopyTensor(\n const Tensor& data,\n std::optional optional_dl_device,\n std::optional copy) {\n bool force_copy = copy.has_value() && *copy;\n bool force_move = copy.has_value() && !*copy;\n\n if (optional_dl_device.has_value()) {\n auto device = at::getATenDevice(\n optional_dl_device->device_type,\n static_cast(optional_dl_device->device_id));\n\n if (device != data.device()) {\n TORCH_CHECK(\n !force_move,\n \"cannot move tensor from \",\n data.device(),\n \" to \",\n device,\n \" without copying. Set copy=True is needed.\");", - "repo_full_name": "pytorch/pytorch", - "discussion_comments": [ - { - "comment_id": "2088107795", - "repo_full_name": "pytorch/pytorch", - "pr_number": 150218, - "pr_file": "aten/src/ATen/DLConvertor.cpp", - "discussion_id": "2088107795", - "commented_code": "@@ -388,4 +389,35 @@ Tensor fromDLPackVersioned(DLManagedTensorVersioned* src, std::function(src, std::move(deleter));\n }\n \n+Tensor maybeCopyTensor(\n+ const Tensor& data,\n+ std::optional optional_dl_device,\n+ std::optional copy) {\n+ bool force_copy = copy.has_value() && *copy;\n+ bool force_move = copy.has_value() && !*copy;\n+\n+ if (optional_dl_device.has_value()) {\n+ auto device = at::getATenDevice(\n+ optional_dl_device->device_type,\n+ static_cast(optional_dl_device->device_id));\n+\n+ if (device != data.device()) {\n+ TORCH_CHECK(\n+ !force_move,\n+ \"cannot move tensor from \",\n+ data.device(),\n+ \" to \",\n+ device,\n+ \" without copying. Set copy=True is needed.\");", - "comment_created_at": "2025-05-14T06:03:19+00:00", - "comment_author": "msaroufim", - "comment_body": "just double checking this check would make it clear that users would need to set the copy flag in\r\n\r\n`torch.from_dlpack(..., copy=copy)`", - "pr_file_module": null - }, - { - "comment_id": "2105836795", - "repo_full_name": "pytorch/pytorch", - "pr_number": 150218, - "pr_file": "aten/src/ATen/DLConvertor.cpp", - "discussion_id": "2088107795", - "commented_code": "@@ -388,4 +389,35 @@ Tensor fromDLPackVersioned(DLManagedTensorVersioned* src, std::function(src, std::move(deleter));\n }\n \n+Tensor maybeCopyTensor(\n+ const Tensor& data,\n+ std::optional optional_dl_device,\n+ std::optional copy) {\n+ bool force_copy = copy.has_value() && *copy;\n+ bool force_move = copy.has_value() && !*copy;\n+\n+ if (optional_dl_device.has_value()) {\n+ auto device = at::getATenDevice(\n+ optional_dl_device->device_type,\n+ static_cast(optional_dl_device->device_id));\n+\n+ if (device != data.device()) {\n+ TORCH_CHECK(\n+ !force_move,\n+ \"cannot move tensor from \",\n+ data.device(),\n+ \" to \",\n+ device,\n+ \" without copying. Set copy=True is needed.\");", - "comment_created_at": "2025-05-24T14:18:41+00:00", - "comment_author": "ysiraichi", - "comment_body": "Ah, no. Not specifying `copy` should also work, here. Will fix.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2089796566", - "pr_number": 150218, - "pr_file": "aten/src/ATen/DLConvertor.cpp", - "created_at": "2025-05-14T21:42:05+00:00", - "commented_code": "return fromDLPackImpl(src, std::move(deleter));\n}\n\nTensor maybeCopyTensor(\n const Tensor& data,\n std::optional optional_dl_device,\n std::optional copy) {\n bool force_copy = copy.has_value() && *copy;\n bool force_move = copy.has_value() && !*copy;\n\n if (optional_dl_device.has_value()) {\n auto device = at::getATenDevice(\n optional_dl_device->device_type,\n static_cast(optional_dl_device->device_id));\n\n if (device != data.device()) {\n TORCH_CHECK(\n !force_move,\n \"cannot move tensor from \",\n data.device(),\n \" to \",\n device,\n \" without copying. Set copy=True is needed.\");", - "repo_full_name": "pytorch/pytorch", - "discussion_comments": [ - { - "comment_id": "2089796566", - "repo_full_name": "pytorch/pytorch", - "pr_number": 150218, - "pr_file": "aten/src/ATen/DLConvertor.cpp", - "discussion_id": "2089796566", - "commented_code": "@@ -388,4 +389,35 @@ Tensor fromDLPackVersioned(DLManagedTensorVersioned* src, std::function(src, std::move(deleter));\n }\n \n+Tensor maybeCopyTensor(\n+ const Tensor& data,\n+ std::optional optional_dl_device,\n+ std::optional copy) {\n+ bool force_copy = copy.has_value() && *copy;\n+ bool force_move = copy.has_value() && !*copy;\n+\n+ if (optional_dl_device.has_value()) {\n+ auto device = at::getATenDevice(\n+ optional_dl_device->device_type,\n+ static_cast(optional_dl_device->device_id));\n+\n+ if (device != data.device()) {\n+ TORCH_CHECK(\n+ !force_move,\n+ \"cannot move tensor from \",\n+ data.device(),\n+ \" to \",\n+ device,\n+ \" without copying. Set copy=True is needed.\");", - "comment_created_at": "2025-05-14T21:42:05+00:00", - "comment_author": "albanD", - "comment_body": "copy=None would also work. It's more that you cannot set copy=False in this case", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2089797023", - "pr_number": 150218, - "pr_file": "aten/src/ATen/DLConvertor.cpp", - "created_at": "2025-05-14T21:42:36+00:00", - "commented_code": "return fromDLPackImpl(src, std::move(deleter));\n}\n\nTensor maybeCopyTensor(\n const Tensor& data,\n std::optional optional_dl_device,\n std::optional copy) {\n bool force_copy = copy.has_value() && *copy;\n bool force_move = copy.has_value() && !*copy;\n\n if (optional_dl_device.has_value()) {\n auto device = at::getATenDevice(\n optional_dl_device->device_type,\n static_cast(optional_dl_device->device_id));\n\n if (device != data.device()) {\n TORCH_CHECK(\n !force_move,\n \"cannot move tensor from \",\n data.device(),\n \" to \",\n device,\n \" without copying. Set copy=True is needed.\");\n return data.to(device);\n }\n }\n\n if (force_copy) {\n return data.detach().clone();", - "repo_full_name": "pytorch/pytorch", - "discussion_comments": [ - { - "comment_id": "2089797023", - "repo_full_name": "pytorch/pytorch", - "pr_number": 150218, - "pr_file": "aten/src/ATen/DLConvertor.cpp", - "discussion_id": "2089797023", - "commented_code": "@@ -388,4 +389,35 @@ Tensor fromDLPackVersioned(DLManagedTensorVersioned* src, std::function(src, std::move(deleter));\n }\n \n+Tensor maybeCopyTensor(\n+ const Tensor& data,\n+ std::optional optional_dl_device,\n+ std::optional copy) {\n+ bool force_copy = copy.has_value() && *copy;\n+ bool force_move = copy.has_value() && !*copy;\n+\n+ if (optional_dl_device.has_value()) {\n+ auto device = at::getATenDevice(\n+ optional_dl_device->device_type,\n+ static_cast(optional_dl_device->device_id));\n+\n+ if (device != data.device()) {\n+ TORCH_CHECK(\n+ !force_move,\n+ \"cannot move tensor from \",\n+ data.device(),\n+ \" to \",\n+ device,\n+ \" without copying. Set copy=True is needed.\");\n+ return data.to(device);\n+ }\n+ }\n+\n+ if (force_copy) {\n+ return data.detach().clone();", - "comment_created_at": "2025-05-14T21:42:36+00:00", - "comment_author": "albanD", - "comment_body": "There should be no autograd here already right? So detach() is not needed", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/pytorch-optimize-memory-operations.json b/_reviewers/pytorch-optimize-memory-operations.json new file mode 100644 index 0000000..84097f3 --- /dev/null +++ b/_reviewers/pytorch-optimize-memory-operations.json @@ -0,0 +1,126 @@ +[ + { + "discussion_id": "2145822363", + "pr_number": 151547, + "pr_file": "aten/src/ATen/native/CPUBlas.cpp", + "created_at": "2025-06-13T18:30:57+00:00", + "commented_code": "b, &ldb_,\n &beta_,\n float_v.data(), &ldc_);\n for (auto cv: float_v) {\n *(c++) = c10::convert(cv);\n\n for (const auto j : c10::irange(n)) {\n for (const auto i : c10::irange(m)) {\n auto offset = j * ldc + i;", + "repo_full_name": "pytorch/pytorch", + "discussion_comments": [ + { + "comment_id": "2145822363", + "repo_full_name": "pytorch/pytorch", + "pr_number": 151547, + "pr_file": "aten/src/ATen/native/CPUBlas.cpp", + "discussion_id": "2145822363", + "commented_code": "@@ -368,8 +368,12 @@ void gemm(\n b, &ldb_,\n &beta_,\n float_v.data(), &ldc_);\n- for (auto cv: float_v) {\n- *(c++) = c10::convert(cv);\n+\n+ for (const auto j : c10::irange(n)) {\n+ for (const auto i : c10::irange(m)) {\n+ auto offset = j * ldc + i;", + "comment_created_at": "2025-06-13T18:30:57+00:00", + "comment_author": "taoye9", + "comment_body": "it's better to move j * ldc out of inner loop to reduce unnecessary multiplication.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1996412678", + "pr_number": 148605, + "pr_file": "aten/src/ATen/native/cuda/layer_norm_kernel.cu", + "created_at": "2025-03-14T23:11:11+00:00", + "commented_code": "}\n}\n\n\ntemplate \n__global__ void GammaBetaBackwardSimpleCUDAKernel(\n// When the data size is a multiple of the tile size we can use fewer\n// instructions and registers. Example, this is the case when M=256 N=256.\ntemplate \n__device__\n__forceinline__\nvoid\nblockReduceGammaBetaBackwardsAligned(\n int64_t M,\n int64_t N,\n const T* dY,\n const T* X,\n const T_ACC* mean,\n const T_ACC* rstd,\n T* dg,\n T* db) {\n const int64_t j = ((int64_t) blockIdx.x) * blockDim.x + threadIdx.x;\n if (j < N) {\n T_ACC sum1 = 0;\n T_ACC sum2 = 0;\n for (int64_t i = 0; i < M; ++i) {\n const int64_t index = i * N + j;\n sum1 += dg == nullptr ? T_ACC(0)\n : static_cast(dY[index]) *\n (static_cast(X[index]) - static_cast(mean[i])) *\n static_cast(rstd[i]);\n sum2 += db == nullptr ? T_ACC(0) : static_cast(dY[index]);\n const T* __restrict__ dY,\n const T* __restrict__ X,\n const T_ACC* __restrict__ mean,\n const T_ACC* __restrict__ rstd,\n T* __restrict__ dg,\n T* __restrict__ db,\n T_ACC &dg_sum,\n T_ACC &db_sum\n) {\n constexpr int rows_per_thread_y = rows_per_block_y / block_dim_y;\n int64_t thread_x = blockIdx.x * block_dim_x + threadIdx.x;\n for (int64_t M_start = blockIdx.y * rows_per_block_y;\n M_start < M;\n M_start += rows_per_block_y * gridDim.y) {\n int lane_id = (threadIdx.y * blockDim.x + threadIdx.x) & (kWarpSize - 1);\n int64_t mean_index = M_start + threadIdx.y * rows_per_thread_y;\n T_ACC warp_mean = 0, warp_rstd = 0;\n if (lane_id < rows_per_thread_y) {\n warp_mean = mean[mean_index + lane_id];\n warp_rstd = rstd[mean_index + lane_id];\n }\n if (dg != nullptr) {\n dg[j] = sum1;\n WARP_SYNC();\n T_ACC dY_regs[rows_per_thread_y] = {0};\n T_ACC X_regs[rows_per_thread_y] = {0};\n #pragma unroll\n for (int i = 0; i < rows_per_thread_y; ++i) {\n int64_t current_y = M_start + threadIdx.y * rows_per_thread_y + i;\n if (aligned_grid || (current_y < M && thread_x < N)) {\n dY_regs[i] = dY[current_y * N + thread_x];\n X_regs[i] = X[current_y * N + thread_x];\n }\n }\n if (db != nullptr) {\n db[j] = sum2;\n\n #pragma unroll\n for (int i = 0; i < rows_per_thread_y; ++i) {\n T_ACC mean_reg = WARP_SHFL(warp_mean, i, kWarpSize);\n T_ACC rstd_reg = WARP_SHFL(warp_rstd, i, kWarpSize);\n dg_sum += dY_regs[i] * (X_regs[i] - mean_reg) * rstd_reg;\n db_sum += dY_regs[i];\n }\n }\n}\n\n// This implementation gets called if M and N divide with 32. This case should\n// be the most common. We can then make better use of warp level intrinsics\n// to improve performance.\n\ntemplate \n__global__ void GammaBetaBackwardCUDAKernel_32x32(\ntemplate \n__device__\n__forceinline__\nvoid\nblockReduceGammaBetaBackwardsHelper(\n int64_t M_start,\n int64_t M,\n int64_t N,\n const T* dY,\n const T* X,\n const T_ACC* mean,\n const T_ACC* rstd,\n T* dg,\n T* db) {\n alignas(sizeof(double)) extern __shared__ char s_data1[];\n T_ACC* s_data_typed = reinterpret_cast(&s_data1);\n T_ACC* s_dg;\n T_ACC* s_db;\n\n T_ACC dg_sum = 0;\n T_ACC db_sum = 0;\n\n const int64_t j = ((int64_t) blockIdx.x) * blockDim.x + threadIdx.x;\n\n if (j < N) {\n constexpr int unroll_factor = 8;\n int laneId = threadIdx.x & (C10_WARP_SIZE - 1);\n const T* __restrict__ dY,\n const T* __restrict__ X,\n const T_ACC* __restrict__ mean,\n const T_ACC* __restrict__ rstd,\n T* __restrict__ dg,\n T* __restrict__ db,\n T_ACC &dg_sum,\n T_ACC &db_sum\n) {\n constexpr int rows_per_thread_y = rows_per_block_y / block_dim_y;\n int64_t thread_x = blockIdx.x * block_dim_x + threadIdx.x;\n\n int lane_id = (threadIdx.y * blockDim.x + threadIdx.x) & (kWarpSize - 1);\n int64_t mean_index = M_start + threadIdx.y * rows_per_thread_y;\n T_ACC warp_mean = 0, warp_rstd = 0;\n if (lane_id < rows_per_thread_y && mean_index + lane_id < M) {\n warp_mean = mean[mean_index + lane_id];\n warp_rstd = rstd[mean_index + lane_id];\n }\n WARP_SYNC();\n\n T_ACC mean_reg, mean_reg_tmp;\n T_ACC rstd_reg, rstd_reg_tmp;\n T dY_reg;\n T X_reg;\n\n // Main loop\n int bcounter;\n for (bcounter = 0; bcounter < M / (blockDim.y * unroll_factor);\n bcounter++) {\n int offset = (bcounter * blockDim.y + threadIdx.y) * unroll_factor;\n\n if (laneId < unroll_factor) {\n mean_reg_tmp = mean[offset + laneId];\n rstd_reg_tmp = rstd[offset + laneId];\n T_ACC dY_regs[rows_per_thread_y] = {0};\n T_ACC X_regs[rows_per_thread_y] = {0};\n #pragma unroll\n for (int i = 0; i < rows_per_thread_y; ++i) {\n int64_t current_y = M_start + threadIdx.y * rows_per_thread_y + i;\n bool active = true;\n if (check_x && thread_x >= N) {\n active = false;\n }\n WARP_SYNC();\n\n #pragma unroll\n for (int ii = 0; ii < unroll_factor; ++ii) {\n dY_reg = dY[(offset + ii) * N + j];\n X_reg = X[(offset + ii) * N + j];\n mean_reg = WARP_SHFL(mean_reg_tmp, ii, kWarpSize);\n rstd_reg = WARP_SHFL(rstd_reg_tmp, ii, kWarpSize);\n dg_sum += dY_reg * (X_reg - mean_reg) * rstd_reg;\n db_sum += dY_reg;\n if (check_y && current_y >= M) {\n active = false;\n }\n }\n\n // Remainder loop\n int offset = (bcounter * blockDim.y + threadIdx.y) * unroll_factor;\n for (int ii = 0; ii < unroll_factor; ii++) {\n if ((offset + ii) < M) {\n mean_reg = mean[offset + ii];\n rstd_reg = rstd[offset + ii];\n dY_reg = dY[(offset + ii) * N + j];\n X_reg = X[(offset + ii) * N + j];\n dg_sum += dY_reg * (X_reg - mean_reg) * rstd_reg;\n db_sum += dY_reg;\n if (active) {\n dY_regs[i] = dY[current_y * N + thread_x];\n X_regs[i] = X[current_y * N + thread_x];\n }\n }\n\n // This kernel uses a block of (C10_WARP_SIZE x C10_WARP_SIZE) and\n // gets called when M; N divide by 32. We can use warp shuffles\n // for the final reduction step. This removes 4 shmem loads and\n // stores with their corresponding __syncthreads()\n\n // This greatly reduces bank conflicts at the expense of a little\n // extra shared memory. It does not impact occupancy\n int padded_bx = (1 + blockDim.x);\n\n s_dg = s_data_typed;\n s_db = s_data_typed + (padded_bx * blockDim.y);\n s_dg[threadIdx.y * padded_bx + threadIdx.x] = dg_sum;\n s_db[threadIdx.y * padded_bx + threadIdx.x] = db_sum;\n __syncthreads();\n\n // Load transposed so that a warp holds an entire column\n T_ACC reg_dg = s_dg[threadIdx.x * padded_bx + threadIdx.y];\n T_ACC reg_db = s_db[threadIdx.x * padded_bx + threadIdx.y];\n for (unsigned delta = C10_WARP_SIZE >> 1; delta >= 1; delta >>= 1) {\n reg_dg += WARP_SHFL_XOR(reg_dg, delta, kWarpSize);\n reg_db += WARP_SHFL_XOR(reg_db, delta, kWarpSize);\n #pragma unroll\n for (int i = 0; i < rows_per_thread_y; ++i) {\n T_ACC mean_reg = WARP_SHFL(warp_mean, i, kWarpSize);\n T_ACC rstd_reg = WARP_SHFL(warp_rstd, i, kWarpSize);\n dg_sum += dY_regs[i] * (X_regs[i] - mean_reg) * rstd_reg;\n db_sum += dY_regs[i];\n }\n}\n\n if (threadIdx.x == 0) {\n const int64_t j = blockIdx.x * blockDim.x + threadIdx.y;\n if (dg) {\n dg[j] = reg_dg;\n }\n if (db) {\n db[j] = reg_db;\n }\ntemplate \n__device__\n__forceinline__\nvoid\nblockReduceGammaBetaBackwardsWithChecks(\n int64_t M,\n int64_t N,\n const T* __restrict__ dY,\n const T* __restrict__ X,\n const T_ACC* __restrict__ mean,\n const T_ACC* __restrict__ rstd,\n T* __restrict__ dg,\n T* __restrict__ db,\n T_ACC &dg_sum,\n T_ACC &db_sum\n) {\n for (int64_t M_start = blockIdx.y * rows_per_block_y;\n M_start < M;\n M_start += rows_per_block_y * gridDim.y) {\n int64_t M_end = M_start + rows_per_block_y - 1;\n if (M_end < M) {\n blockReduceGammaBetaBackwardsHelper\n (M_start, M, N, dY, X, mean, rstd, dg, db, dg_sum, db_sum);\n } else {\n blockReduceGammaBetaBackwardsHelper\n (M_start, M, N, dY, X, mean, rstd, dg, db, dg_sum, db_sum);\n }\n }\n}\n\ntemplate \n__global__ void GammaBetaBackwardCUDAKernel(\n// block_dim_x is the number of threads in the x dimension per block.\n// block_dim_y is the number of threads in the y dimension per block.\n// rows_per_block_y is the size of the tile (number of data elements)\n// in the y dimension per block.\n// partial_reduction indicates whether we need to reduce across threads\n// or not. If set to true, we will not reduce across threads. This can\n// be faster in the M >> N case but requires another kernel to do a full\n// final reduction.\n// aligned_grid means the data size is a multiple of tile size. In that\n// case we don't need to check for boundary conditions which can provide\n// a further speedup by not needing instructions to check for edge cases\n// and not needing predicate registers.\ntemplate \n__global__\nvoid\n GammaBetaBackwardCUDAKernelTemplate(\n int64_t M,\n int64_t N,\n const T* dY,\n const T* X,\n const T_ACC* mean,\n const T_ACC* rstd,\n T* dg,\n T* db) {\n alignas(sizeof(double)) extern __shared__ char s_data1[];\n T_ACC* s_data_typed = reinterpret_cast(&s_data1);\n T_ACC* s_dg;\n T_ACC* s_db;\n\n const int64_t j = ((int64_t) blockIdx.x) * blockDim.x + threadIdx.x;\n const T* __restrict__ dY,\n const T* __restrict__ X,\n const T_ACC* __restrict__ mean,\n const T_ACC* __restrict__ rstd,\n T* __restrict__ dg,\n T* __restrict__ db) {\n // This assert is a compile-time check only.\n constexpr int rows_per_thread_y = rows_per_block_y / block_dim_y;\n static_assert(rows_per_thread_y <= kWarpSize);\n\n T_ACC dg_sum = 0;\n T_ACC db_sum = 0;\n\n if (j < N) {\n constexpr int unroll_factor = 8;\n\n T_ACC mean_reg;\n T_ACC rstd_reg;\n T dY_reg;\n T X_reg;\n if (aligned_grid) {\n // When N and M align perfectly with block_dim_x and block_dim_y, we\n // can skip boundary condition checks that waste instruction issue slots.\n blockReduceGammaBetaBackwardsAligned\n \n (M, N, dY, X, mean, rstd, dg, db, dg_sum, db_sum);\n } else {\n // In the general case we need to check boundary conditions in the M\n // dimension. However, we can still avoid boundary checks in the N dimension\n // for the inner blocks. So try to avoid those checks when possible.\n if (blockIdx.x * block_dim_x + block_dim_x - 1 < N) {\n blockReduceGammaBetaBackwardsWithChecks\n \n (M, N, dY, X, mean, rstd, dg, db, dg_sum, db_sum);\n } else {\n blockReduceGammaBetaBackwardsWithChecks\n \n (M, N, dY, X, mean, rstd, dg, db, dg_sum, db_sum);\n }\n }\n\n // Main Loop\n int bcounter;\n for (bcounter = 0; bcounter < M / (blockDim.y * unroll_factor); bcounter++){\n int offset = (bcounter * blockDim.y + threadIdx.y) * unroll_factor;\n int64_t thread_x = blockIdx.x * block_dim_x + threadIdx.x;\n\n #pragma unroll\n for (int ii = 0; ii < unroll_factor; ++ii) {\n dY_reg = dY[(offset + ii) * N + j];\n X_reg = X[(offset + ii) * N + j];\n mean_reg = mean[offset + ii];\n rstd_reg = rstd[offset + ii];\n dg_sum += dY_reg * (X_reg - mean_reg) * rstd_reg;\n db_sum += dY_reg;\n // When partial_reduction is requested, we don't reduce within a block.\n // We also don't reduce if we are only a single block in the y dimension.\n if (partial_reduction || (blockDim.y == 1 && gridDim.y == 1)) {\n if (aligned_grid || thread_x < N) {\n int64_t thread_y = blockIdx.y * blockDim.y + threadIdx.y;\n if (dg) {\n dg[thread_y * N + thread_x] = dg_sum;\n }\n }\n\n // Remainder loop\n int offset = (bcounter * blockDim.y + threadIdx.y) * unroll_factor;\n for (int ii = 0; ii < unroll_factor; ii++ ){\n if ((offset + ii) < M) {\n dY_reg = dY[(offset + ii) * N + j ];\n X_reg = X[(offset + ii) * N + j];\n mean_reg = mean[offset + ii];\n rstd_reg = rstd[offset + ii];\n dg_sum += dY_reg * (X_reg - mean_reg) * rstd_reg;\n db_sum += dY_reg;\n if (db) {\n db[thread_y * N + thread_x] = db_sum;\n }\n }\n\n // Do the final reduction in shared memory\n } else {\n // The caller requested a full reduction so we must reduce across\n // warps using shared memory and warp shuffles.\n static_assert(rows_per_thread_y <= C10_WARP_SIZE);\n alignas(sizeof(double)) extern __shared__ char s_data1[];\n T_ACC* s_data_typed = reinterpret_cast(&s_data1);\n T_ACC* s_dg;\n T_ACC* s_db;\n int padded_bx = (block_dim_x + 1);\n // Transpose dg and db.\n s_dg = s_data_typed;\n s_db = s_data_typed + blockDim.x * blockDim.y;\n s_dg[threadIdx.y * blockDim.x + threadIdx.x] = dg_sum;\n s_db[threadIdx.y * blockDim.x + threadIdx.x] = db_sum;\n s_db = s_data_typed + (padded_bx * block_dim_y);\n s_dg[threadIdx.y * padded_bx + threadIdx.x] = dg_sum;\n s_db[threadIdx.y * padded_bx + threadIdx.x] = db_sum;\n __syncthreads();\n\n for (int offset = blockDim.y / 2; offset >= 1; offset /= 2) {\n if (threadIdx.y < offset) {\n s_dg[threadIdx.y * blockDim.x + threadIdx.x] +=\n s_dg[(threadIdx.y + offset) * blockDim.x + threadIdx.x];\n s_db[threadIdx.y * blockDim.x + threadIdx.x] +=\n s_db[(threadIdx.y + offset) * blockDim.x + threadIdx.x];\n // Load transposed so that a warp holds an entire column\n // Because block_dim_x != block_dim_y in the general case, we need\n // some code to handle the general case.\n static_assert(block_dim_x * block_dim_y % C10_WARP_SIZE == 0);\n constexpr int warps_available_to_reduce = block_dim_x * block_dim_y / C10_WARP_SIZE;\n int thread_id = threadIdx.y * block_dim_x + threadIdx.x;\n int warp_id = thread_id / C10_WARP_SIZE;\n int lane_id = thread_id & (C10_WARP_SIZE - 1);\n #pragma unroll\n for (int i = warp_id; i < block_dim_x; i += warps_available_to_reduce) {\n T_ACC reg_db, reg_dg;\n if (lane_id < block_dim_y) {\n reg_dg = s_dg[lane_id * padded_bx + i];\n reg_db = s_db[lane_id * padded_bx + i];\n }\n #pragma unroll\n for (unsigned delta = block_dim_y >> 1; delta >= 1; delta >>= 1) {\n reg_dg += WARP_SHFL_XOR(reg_dg, delta, kWarpSize);\n reg_db += WARP_SHFL_XOR(reg_db, delta, kWarpSize);\n }\n // Reduce is done. Now write it out to global memory.\n int64_t out_index = blockIdx.x * block_dim_x + i;\n if (threadIdx.x == 0 && (aligned_grid || out_index < N)) {\n if (dg) {\n dg[out_index] = reg_dg;\n }\n __syncthreads();\n if (db) {\n db[out_index] = reg_db;\n }\n }\n }\n }\n}\n\n if (threadIdx.y == 0) {\n if (dg) {\n dg[j] = s_dg[threadIdx.x];\n }\n if (db) {\n db[j] = s_db[threadIdx.x];\n }\ntemplate\nvoid LaunchAndCheckGammaBetaBackwardKernel(\n bool aligned_grid,\n dim3 blocks,\n dim3 threads,\n size_t shmem_sz,\n cudaStream_t cuda_stream,\n const T* dY_data,\n const T* X_data,\n const T_ACC* mean_data,\n const T_ACC* rstd_data,\n int64_t M,\n int64_t N,\n T* dgamma_data,\n T* dbeta_data) {\nif (aligned_grid) {\n GammaBetaBackwardCUDAKernelTemplate\n <<>>(\n M,\n N,\n dY_data,\n X_data,\n mean_data,\n rstd_data,\n dgamma_data,\n dbeta_data);\n } else {\n GammaBetaBackwardCUDAKernelTemplate\n <<>>(\n M,\n N,\n dY_data,\n X_data,\n mean_data,\n rstd_data,\n dgamma_data,\n dbeta_data);\n }\n C10_CUDA_KERNEL_LAUNCH_CHECK();\n}\n\ntemplate\nvoid ConfigureAndLaunchGammaBetaBackwardKernel(\n const T* dY_data,\n const T* X_data,\n const T_ACC* mean_data,\n const T_ACC* rstd_data,\n int64_t M,\n int64_t N,\n Tensor* dgamma,\n Tensor* dbeta,\n cudaStream_t cuda_stream) {\n T* dgamma_data =\n dgamma->defined() ? dgamma->template data_ptr() : nullptr;\n T* dbeta_data = dbeta->defined() ? dbeta->template data_ptr() : nullptr;\n bool aligned_grid = (M % rows_per_block_y == 0) && (N % block_dim_x == 0);\n dim3 threads{block_dim_x, block_dim_y};\n dim3 blocks;\n blocks.x = (N + block_dim_x - 1) / block_dim_x;\n blocks.y = 1;\n size_t shmem_sz = (block_dim_x + 1) * block_dim_y * sizeof(T_ACC) * 2;\n if (blocks.y == 1 && threads.y == 1) {\n // Optimization: since there is just one thread doing all the summation, we don't need a reduction\n // across threads. So we set partial_reduction to true.\n LaunchAndCheckGammaBetaBackwardKernel(\n aligned_grid, blocks, threads, shmem_sz, cuda_stream, dY_data, X_data, mean_data, rstd_data, M, N, dgamma_data, dbeta_data);\n } else {\n LaunchAndCheckGammaBetaBackwardKernel(\n aligned_grid, blocks, threads, shmem_sz, cuda_stream, dY_data, X_data, mean_data, rstd_data, M, N, dgamma_data, dbeta_data);\n }\n\n}\n\ntemplate\nvoid LaunchGammaBetaBackwardCUDAKernel(\n const T* dY_data,\n const T* X_data,\n const T_ACC* mean_data,\n const T_ACC* rstd_data,\n int64_t M,\n int64_t N,\n Tensor* dgamma,\n Tensor* dbeta,\n cudaStream_t cuda_stream) {\n if (M > 64 * 1024 && N / kWarpSize < 64) {\n // We have a situation where M >> N and N is small.\n // In this case we can speed up the computation by parallelizing in the M dimension.\n // We launch multiple blocks in the y-dimension, and compute partial sums for the\n // gradient in the first pass. Then we do a .sum(0) to do a final reduction.\n // Although we launch 2 kernels, we can get up to a 10x speedup for large M.\n constexpr int block_dim_x = 32;\n constexpr int block_dim_y = 1;\n constexpr int rows_per_block_y = 32;\n bool aligned_grid = (M % rows_per_block_y == 0) && (N % block_dim_x == 0);\n dim3 threads{block_dim_x, block_dim_y};\n dim3 blocks;\n blocks.x = (N + block_dim_x - 1) / block_dim_x;\n // int rows_per_block = my_gamma_beta_unroll_factor *\n blocks.y = (M + rows_per_block_y - 1) / rows_per_block_y;\n constexpr int max_grid_size = 64 * 1024 / 2;\n blocks.y = std::min(max_grid_size / blocks.x, blocks.y);\n auto options = dgamma->options();\n Tensor dgamma_blocks;\n Tensor dbeta_blocks;\n T * dgamma_blocks_ptr = nullptr;\n T * dbeta_blocks_ptr = nullptr;\n if (dgamma->defined()) {\n dgamma_blocks = at::zeros({blocks.y * threads.y, dgamma->size(-1)}, options);\n dgamma_blocks_ptr = dgamma_blocks.data_ptr();\n }\n if (dbeta->defined()) {\n dbeta_blocks = at::zeros({blocks.y * threads.y, dgamma->size(-1)}, options);\n dbeta_blocks_ptr = dbeta_blocks.data_ptr();\n }\n LaunchAndCheckGammaBetaBackwardKernel(\n aligned_grid, blocks, threads, 0, cuda_stream, dY_data, X_data, mean_data, rstd_data, M, N, dgamma_blocks_ptr, dbeta_blocks_ptr);\n\n *dgamma = dgamma_blocks.sum(0);\n *dbeta = dbeta_blocks.sum(0);\n } else {\n // We are in the normal case where M is not that large.\n // We can change the tile shape (which is the last template parameter) in accordance with M.\n // For small M it is faster to have a smaller tile, otherwise we could have idle threads.\n // For larger M we use a bigger tile size.\n if (M < 64) {", + "repo_full_name": "pytorch/pytorch", + "discussion_comments": [ + { + "comment_id": "1996412678", + "repo_full_name": "pytorch/pytorch", + "pr_number": 148605, + "pr_file": "aten/src/ATen/native/cuda/layer_norm_kernel.cu", + "discussion_id": "1996412678", + "commented_code": "@@ -508,223 +509,418 @@ __global__ void layer_norm_grad_input_kernel_vectorized(\n }\n }\n \n-\n-template \n-__global__ void GammaBetaBackwardSimpleCUDAKernel(\n+// When the data size is a multiple of the tile size we can use fewer\n+// instructions and registers. Example, this is the case when M=256 N=256.\n+template \n+__device__\n+__forceinline__\n+void\n+blockReduceGammaBetaBackwardsAligned(\n int64_t M,\n int64_t N,\n- const T* dY,\n- const T* X,\n- const T_ACC* mean,\n- const T_ACC* rstd,\n- T* dg,\n- T* db) {\n- const int64_t j = ((int64_t) blockIdx.x) * blockDim.x + threadIdx.x;\n- if (j < N) {\n- T_ACC sum1 = 0;\n- T_ACC sum2 = 0;\n- for (int64_t i = 0; i < M; ++i) {\n- const int64_t index = i * N + j;\n- sum1 += dg == nullptr ? T_ACC(0)\n- : static_cast(dY[index]) *\n- (static_cast(X[index]) - static_cast(mean[i])) *\n- static_cast(rstd[i]);\n- sum2 += db == nullptr ? T_ACC(0) : static_cast(dY[index]);\n+ const T* __restrict__ dY,\n+ const T* __restrict__ X,\n+ const T_ACC* __restrict__ mean,\n+ const T_ACC* __restrict__ rstd,\n+ T* __restrict__ dg,\n+ T* __restrict__ db,\n+ T_ACC &dg_sum,\n+ T_ACC &db_sum\n+) {\n+ constexpr int rows_per_thread_y = rows_per_block_y / block_dim_y;\n+ int64_t thread_x = blockIdx.x * block_dim_x + threadIdx.x;\n+ for (int64_t M_start = blockIdx.y * rows_per_block_y;\n+ M_start < M;\n+ M_start += rows_per_block_y * gridDim.y) {\n+ int lane_id = (threadIdx.y * blockDim.x + threadIdx.x) & (kWarpSize - 1);\n+ int64_t mean_index = M_start + threadIdx.y * rows_per_thread_y;\n+ T_ACC warp_mean = 0, warp_rstd = 0;\n+ if (lane_id < rows_per_thread_y) {\n+ warp_mean = mean[mean_index + lane_id];\n+ warp_rstd = rstd[mean_index + lane_id];\n }\n- if (dg != nullptr) {\n- dg[j] = sum1;\n+ WARP_SYNC();\n+ T_ACC dY_regs[rows_per_thread_y] = {0};\n+ T_ACC X_regs[rows_per_thread_y] = {0};\n+ #pragma unroll\n+ for (int i = 0; i < rows_per_thread_y; ++i) {\n+ int64_t current_y = M_start + threadIdx.y * rows_per_thread_y + i;\n+ if (aligned_grid || (current_y < M && thread_x < N)) {\n+ dY_regs[i] = dY[current_y * N + thread_x];\n+ X_regs[i] = X[current_y * N + thread_x];\n+ }\n }\n- if (db != nullptr) {\n- db[j] = sum2;\n+\n+ #pragma unroll\n+ for (int i = 0; i < rows_per_thread_y; ++i) {\n+ T_ACC mean_reg = WARP_SHFL(warp_mean, i, kWarpSize);\n+ T_ACC rstd_reg = WARP_SHFL(warp_rstd, i, kWarpSize);\n+ dg_sum += dY_regs[i] * (X_regs[i] - mean_reg) * rstd_reg;\n+ db_sum += dY_regs[i];\n }\n }\n }\n \n-// This implementation gets called if M and N divide with 32. This case should\n-// be the most common. We can then make better use of warp level intrinsics\n-// to improve performance.\n-\n-template \n-__global__ void GammaBetaBackwardCUDAKernel_32x32(\n+template \n+__device__\n+__forceinline__\n+void\n+blockReduceGammaBetaBackwardsHelper(\n+ int64_t M_start,\n int64_t M,\n int64_t N,\n- const T* dY,\n- const T* X,\n- const T_ACC* mean,\n- const T_ACC* rstd,\n- T* dg,\n- T* db) {\n- alignas(sizeof(double)) extern __shared__ char s_data1[];\n- T_ACC* s_data_typed = reinterpret_cast(&s_data1);\n- T_ACC* s_dg;\n- T_ACC* s_db;\n-\n- T_ACC dg_sum = 0;\n- T_ACC db_sum = 0;\n-\n- const int64_t j = ((int64_t) blockIdx.x) * blockDim.x + threadIdx.x;\n-\n- if (j < N) {\n- constexpr int unroll_factor = 8;\n- int laneId = threadIdx.x & (C10_WARP_SIZE - 1);\n+ const T* __restrict__ dY,\n+ const T* __restrict__ X,\n+ const T_ACC* __restrict__ mean,\n+ const T_ACC* __restrict__ rstd,\n+ T* __restrict__ dg,\n+ T* __restrict__ db,\n+ T_ACC &dg_sum,\n+ T_ACC &db_sum\n+) {\n+ constexpr int rows_per_thread_y = rows_per_block_y / block_dim_y;\n+ int64_t thread_x = blockIdx.x * block_dim_x + threadIdx.x;\n+\n+ int lane_id = (threadIdx.y * blockDim.x + threadIdx.x) & (kWarpSize - 1);\n+ int64_t mean_index = M_start + threadIdx.y * rows_per_thread_y;\n+ T_ACC warp_mean = 0, warp_rstd = 0;\n+ if (lane_id < rows_per_thread_y && mean_index + lane_id < M) {\n+ warp_mean = mean[mean_index + lane_id];\n+ warp_rstd = rstd[mean_index + lane_id];\n+ }\n+ WARP_SYNC();\n \n- T_ACC mean_reg, mean_reg_tmp;\n- T_ACC rstd_reg, rstd_reg_tmp;\n- T dY_reg;\n- T X_reg;\n \n- // Main loop\n- int bcounter;\n- for (bcounter = 0; bcounter < M / (blockDim.y * unroll_factor);\n- bcounter++) {\n- int offset = (bcounter * blockDim.y + threadIdx.y) * unroll_factor;\n \n- if (laneId < unroll_factor) {\n- mean_reg_tmp = mean[offset + laneId];\n- rstd_reg_tmp = rstd[offset + laneId];\n+ T_ACC dY_regs[rows_per_thread_y] = {0};\n+ T_ACC X_regs[rows_per_thread_y] = {0};\n+ #pragma unroll\n+ for (int i = 0; i < rows_per_thread_y; ++i) {\n+ int64_t current_y = M_start + threadIdx.y * rows_per_thread_y + i;\n+ bool active = true;\n+ if (check_x && thread_x >= N) {\n+ active = false;\n }\n- WARP_SYNC();\n-\n- #pragma unroll\n- for (int ii = 0; ii < unroll_factor; ++ii) {\n- dY_reg = dY[(offset + ii) * N + j];\n- X_reg = X[(offset + ii) * N + j];\n- mean_reg = WARP_SHFL(mean_reg_tmp, ii, kWarpSize);\n- rstd_reg = WARP_SHFL(rstd_reg_tmp, ii, kWarpSize);\n- dg_sum += dY_reg * (X_reg - mean_reg) * rstd_reg;\n- db_sum += dY_reg;\n+ if (check_y && current_y >= M) {\n+ active = false;\n }\n- }\n-\n- // Remainder loop\n- int offset = (bcounter * blockDim.y + threadIdx.y) * unroll_factor;\n- for (int ii = 0; ii < unroll_factor; ii++) {\n- if ((offset + ii) < M) {\n- mean_reg = mean[offset + ii];\n- rstd_reg = rstd[offset + ii];\n- dY_reg = dY[(offset + ii) * N + j];\n- X_reg = X[(offset + ii) * N + j];\n- dg_sum += dY_reg * (X_reg - mean_reg) * rstd_reg;\n- db_sum += dY_reg;\n+ if (active) {\n+ dY_regs[i] = dY[current_y * N + thread_x];\n+ X_regs[i] = X[current_y * N + thread_x];\n }\n }\n \n- // This kernel uses a block of (C10_WARP_SIZE x C10_WARP_SIZE) and\n- // gets called when M; N divide by 32. We can use warp shuffles\n- // for the final reduction step. This removes 4 shmem loads and\n- // stores with their corresponding __syncthreads()\n-\n- // This greatly reduces bank conflicts at the expense of a little\n- // extra shared memory. It does not impact occupancy\n- int padded_bx = (1 + blockDim.x);\n-\n- s_dg = s_data_typed;\n- s_db = s_data_typed + (padded_bx * blockDim.y);\n- s_dg[threadIdx.y * padded_bx + threadIdx.x] = dg_sum;\n- s_db[threadIdx.y * padded_bx + threadIdx.x] = db_sum;\n- __syncthreads();\n-\n- // Load transposed so that a warp holds an entire column\n- T_ACC reg_dg = s_dg[threadIdx.x * padded_bx + threadIdx.y];\n- T_ACC reg_db = s_db[threadIdx.x * padded_bx + threadIdx.y];\n- for (unsigned delta = C10_WARP_SIZE >> 1; delta >= 1; delta >>= 1) {\n- reg_dg += WARP_SHFL_XOR(reg_dg, delta, kWarpSize);\n- reg_db += WARP_SHFL_XOR(reg_db, delta, kWarpSize);\n+ #pragma unroll\n+ for (int i = 0; i < rows_per_thread_y; ++i) {\n+ T_ACC mean_reg = WARP_SHFL(warp_mean, i, kWarpSize);\n+ T_ACC rstd_reg = WARP_SHFL(warp_rstd, i, kWarpSize);\n+ dg_sum += dY_regs[i] * (X_regs[i] - mean_reg) * rstd_reg;\n+ db_sum += dY_regs[i];\n }\n+}\n \n- if (threadIdx.x == 0) {\n- const int64_t j = blockIdx.x * blockDim.x + threadIdx.y;\n- if (dg) {\n- dg[j] = reg_dg;\n- }\n- if (db) {\n- db[j] = reg_db;\n- }\n+template \n+__device__\n+__forceinline__\n+void\n+blockReduceGammaBetaBackwardsWithChecks(\n+ int64_t M,\n+ int64_t N,\n+ const T* __restrict__ dY,\n+ const T* __restrict__ X,\n+ const T_ACC* __restrict__ mean,\n+ const T_ACC* __restrict__ rstd,\n+ T* __restrict__ dg,\n+ T* __restrict__ db,\n+ T_ACC &dg_sum,\n+ T_ACC &db_sum\n+) {\n+ for (int64_t M_start = blockIdx.y * rows_per_block_y;\n+ M_start < M;\n+ M_start += rows_per_block_y * gridDim.y) {\n+ int64_t M_end = M_start + rows_per_block_y - 1;\n+ if (M_end < M) {\n+ blockReduceGammaBetaBackwardsHelper\n+ (M_start, M, N, dY, X, mean, rstd, dg, db, dg_sum, db_sum);\n+ } else {\n+ blockReduceGammaBetaBackwardsHelper\n+ (M_start, M, N, dY, X, mean, rstd, dg, db, dg_sum, db_sum);\n }\n }\n }\n \n-template \n-__global__ void GammaBetaBackwardCUDAKernel(\n+// block_dim_x is the number of threads in the x dimension per block.\n+// block_dim_y is the number of threads in the y dimension per block.\n+// rows_per_block_y is the size of the tile (number of data elements)\n+// in the y dimension per block.\n+// partial_reduction indicates whether we need to reduce across threads\n+// or not. If set to true, we will not reduce across threads. This can\n+// be faster in the M >> N case but requires another kernel to do a full\n+// final reduction.\n+// aligned_grid means the data size is a multiple of tile size. In that\n+// case we don't need to check for boundary conditions which can provide\n+// a further speedup by not needing instructions to check for edge cases\n+// and not needing predicate registers.\n+template \n+__global__\n+void\n+ GammaBetaBackwardCUDAKernelTemplate(\n int64_t M,\n int64_t N,\n- const T* dY,\n- const T* X,\n- const T_ACC* mean,\n- const T_ACC* rstd,\n- T* dg,\n- T* db) {\n- alignas(sizeof(double)) extern __shared__ char s_data1[];\n- T_ACC* s_data_typed = reinterpret_cast(&s_data1);\n- T_ACC* s_dg;\n- T_ACC* s_db;\n-\n- const int64_t j = ((int64_t) blockIdx.x) * blockDim.x + threadIdx.x;\n+ const T* __restrict__ dY,\n+ const T* __restrict__ X,\n+ const T_ACC* __restrict__ mean,\n+ const T_ACC* __restrict__ rstd,\n+ T* __restrict__ dg,\n+ T* __restrict__ db) {\n+ // This assert is a compile-time check only.\n+ constexpr int rows_per_thread_y = rows_per_block_y / block_dim_y;\n+ static_assert(rows_per_thread_y <= kWarpSize);\n \n T_ACC dg_sum = 0;\n T_ACC db_sum = 0;\n \n- if (j < N) {\n- constexpr int unroll_factor = 8;\n-\n- T_ACC mean_reg;\n- T_ACC rstd_reg;\n- T dY_reg;\n- T X_reg;\n+ if (aligned_grid) {\n+ // When N and M align perfectly with block_dim_x and block_dim_y, we\n+ // can skip boundary condition checks that waste instruction issue slots.\n+ blockReduceGammaBetaBackwardsAligned\n+ \n+ (M, N, dY, X, mean, rstd, dg, db, dg_sum, db_sum);\n+ } else {\n+ // In the general case we need to check boundary conditions in the M\n+ // dimension. However, we can still avoid boundary checks in the N dimension\n+ // for the inner blocks. So try to avoid those checks when possible.\n+ if (blockIdx.x * block_dim_x + block_dim_x - 1 < N) {\n+ blockReduceGammaBetaBackwardsWithChecks\n+ \n+ (M, N, dY, X, mean, rstd, dg, db, dg_sum, db_sum);\n+ } else {\n+ blockReduceGammaBetaBackwardsWithChecks\n+ \n+ (M, N, dY, X, mean, rstd, dg, db, dg_sum, db_sum);\n+ }\n+ }\n \n- // Main Loop\n- int bcounter;\n- for (bcounter = 0; bcounter < M / (blockDim.y * unroll_factor); bcounter++){\n- int offset = (bcounter * blockDim.y + threadIdx.y) * unroll_factor;\n+ int64_t thread_x = blockIdx.x * block_dim_x + threadIdx.x;\n \n- #pragma unroll\n- for (int ii = 0; ii < unroll_factor; ++ii) {\n- dY_reg = dY[(offset + ii) * N + j];\n- X_reg = X[(offset + ii) * N + j];\n- mean_reg = mean[offset + ii];\n- rstd_reg = rstd[offset + ii];\n- dg_sum += dY_reg * (X_reg - mean_reg) * rstd_reg;\n- db_sum += dY_reg;\n+ // When partial_reduction is requested, we don't reduce within a block.\n+ // We also don't reduce if we are only a single block in the y dimension.\n+ if (partial_reduction || (blockDim.y == 1 && gridDim.y == 1)) {\n+ if (aligned_grid || thread_x < N) {\n+ int64_t thread_y = blockIdx.y * blockDim.y + threadIdx.y;\n+ if (dg) {\n+ dg[thread_y * N + thread_x] = dg_sum;\n }\n- }\n-\n- // Remainder loop\n- int offset = (bcounter * blockDim.y + threadIdx.y) * unroll_factor;\n- for (int ii = 0; ii < unroll_factor; ii++ ){\n- if ((offset + ii) < M) {\n- dY_reg = dY[(offset + ii) * N + j ];\n- X_reg = X[(offset + ii) * N + j];\n- mean_reg = mean[offset + ii];\n- rstd_reg = rstd[offset + ii];\n- dg_sum += dY_reg * (X_reg - mean_reg) * rstd_reg;\n- db_sum += dY_reg;\n+ if (db) {\n+ db[thread_y * N + thread_x] = db_sum;\n }\n }\n-\n- // Do the final reduction in shared memory\n+ } else {\n+ // The caller requested a full reduction so we must reduce across\n+ // warps using shared memory and warp shuffles.\n+ static_assert(rows_per_thread_y <= C10_WARP_SIZE);\n+ alignas(sizeof(double)) extern __shared__ char s_data1[];\n+ T_ACC* s_data_typed = reinterpret_cast(&s_data1);\n+ T_ACC* s_dg;\n+ T_ACC* s_db;\n+ int padded_bx = (block_dim_x + 1);\n+ // Transpose dg and db.\n s_dg = s_data_typed;\n- s_db = s_data_typed + blockDim.x * blockDim.y;\n- s_dg[threadIdx.y * blockDim.x + threadIdx.x] = dg_sum;\n- s_db[threadIdx.y * blockDim.x + threadIdx.x] = db_sum;\n+ s_db = s_data_typed + (padded_bx * block_dim_y);\n+ s_dg[threadIdx.y * padded_bx + threadIdx.x] = dg_sum;\n+ s_db[threadIdx.y * padded_bx + threadIdx.x] = db_sum;\n __syncthreads();\n \n- for (int offset = blockDim.y / 2; offset >= 1; offset /= 2) {\n- if (threadIdx.y < offset) {\n- s_dg[threadIdx.y * blockDim.x + threadIdx.x] +=\n- s_dg[(threadIdx.y + offset) * blockDim.x + threadIdx.x];\n- s_db[threadIdx.y * blockDim.x + threadIdx.x] +=\n- s_db[(threadIdx.y + offset) * blockDim.x + threadIdx.x];\n+ // Load transposed so that a warp holds an entire column\n+ // Because block_dim_x != block_dim_y in the general case, we need\n+ // some code to handle the general case.\n+ static_assert(block_dim_x * block_dim_y % C10_WARP_SIZE == 0);\n+ constexpr int warps_available_to_reduce = block_dim_x * block_dim_y / C10_WARP_SIZE;\n+ int thread_id = threadIdx.y * block_dim_x + threadIdx.x;\n+ int warp_id = thread_id / C10_WARP_SIZE;\n+ int lane_id = thread_id & (C10_WARP_SIZE - 1);\n+ #pragma unroll\n+ for (int i = warp_id; i < block_dim_x; i += warps_available_to_reduce) {\n+ T_ACC reg_db, reg_dg;\n+ if (lane_id < block_dim_y) {\n+ reg_dg = s_dg[lane_id * padded_bx + i];\n+ reg_db = s_db[lane_id * padded_bx + i];\n+ }\n+ #pragma unroll\n+ for (unsigned delta = block_dim_y >> 1; delta >= 1; delta >>= 1) {\n+ reg_dg += WARP_SHFL_XOR(reg_dg, delta, kWarpSize);\n+ reg_db += WARP_SHFL_XOR(reg_db, delta, kWarpSize);\n+ }\n+ // Reduce is done. Now write it out to global memory.\n+ int64_t out_index = blockIdx.x * block_dim_x + i;\n+ if (threadIdx.x == 0 && (aligned_grid || out_index < N)) {\n+ if (dg) {\n+ dg[out_index] = reg_dg;\n }\n- __syncthreads();\n+ if (db) {\n+ db[out_index] = reg_db;\n+ }\n+ }\n }\n+ }\n+}\n \n- if (threadIdx.y == 0) {\n- if (dg) {\n- dg[j] = s_dg[threadIdx.x];\n- }\n- if (db) {\n- db[j] = s_db[threadIdx.x];\n- }\n+template\n+void LaunchAndCheckGammaBetaBackwardKernel(\n+ bool aligned_grid,\n+ dim3 blocks,\n+ dim3 threads,\n+ size_t shmem_sz,\n+ cudaStream_t cuda_stream,\n+ const T* dY_data,\n+ const T* X_data,\n+ const T_ACC* mean_data,\n+ const T_ACC* rstd_data,\n+ int64_t M,\n+ int64_t N,\n+ T* dgamma_data,\n+ T* dbeta_data) {\n+if (aligned_grid) {\n+ GammaBetaBackwardCUDAKernelTemplate\n+ <<>>(\n+ M,\n+ N,\n+ dY_data,\n+ X_data,\n+ mean_data,\n+ rstd_data,\n+ dgamma_data,\n+ dbeta_data);\n+ } else {\n+ GammaBetaBackwardCUDAKernelTemplate\n+ <<>>(\n+ M,\n+ N,\n+ dY_data,\n+ X_data,\n+ mean_data,\n+ rstd_data,\n+ dgamma_data,\n+ dbeta_data);\n+ }\n+ C10_CUDA_KERNEL_LAUNCH_CHECK();\n+}\n+\n+template\n+void ConfigureAndLaunchGammaBetaBackwardKernel(\n+ const T* dY_data,\n+ const T* X_data,\n+ const T_ACC* mean_data,\n+ const T_ACC* rstd_data,\n+ int64_t M,\n+ int64_t N,\n+ Tensor* dgamma,\n+ Tensor* dbeta,\n+ cudaStream_t cuda_stream) {\n+ T* dgamma_data =\n+ dgamma->defined() ? dgamma->template data_ptr() : nullptr;\n+ T* dbeta_data = dbeta->defined() ? dbeta->template data_ptr() : nullptr;\n+ bool aligned_grid = (M % rows_per_block_y == 0) && (N % block_dim_x == 0);\n+ dim3 threads{block_dim_x, block_dim_y};\n+ dim3 blocks;\n+ blocks.x = (N + block_dim_x - 1) / block_dim_x;\n+ blocks.y = 1;\n+ size_t shmem_sz = (block_dim_x + 1) * block_dim_y * sizeof(T_ACC) * 2;\n+ if (blocks.y == 1 && threads.y == 1) {\n+ // Optimization: since there is just one thread doing all the summation, we don't need a reduction\n+ // across threads. So we set partial_reduction to true.\n+ LaunchAndCheckGammaBetaBackwardKernel(\n+ aligned_grid, blocks, threads, shmem_sz, cuda_stream, dY_data, X_data, mean_data, rstd_data, M, N, dgamma_data, dbeta_data);\n+ } else {\n+ LaunchAndCheckGammaBetaBackwardKernel(\n+ aligned_grid, blocks, threads, shmem_sz, cuda_stream, dY_data, X_data, mean_data, rstd_data, M, N, dgamma_data, dbeta_data);\n+ }\n+\n+}\n+\n+template\n+void LaunchGammaBetaBackwardCUDAKernel(\n+ const T* dY_data,\n+ const T* X_data,\n+ const T_ACC* mean_data,\n+ const T_ACC* rstd_data,\n+ int64_t M,\n+ int64_t N,\n+ Tensor* dgamma,\n+ Tensor* dbeta,\n+ cudaStream_t cuda_stream) {\n+ if (M > 64 * 1024 && N / kWarpSize < 64) {\n+ // We have a situation where M >> N and N is small.\n+ // In this case we can speed up the computation by parallelizing in the M dimension.\n+ // We launch multiple blocks in the y-dimension, and compute partial sums for the\n+ // gradient in the first pass. Then we do a .sum(0) to do a final reduction.\n+ // Although we launch 2 kernels, we can get up to a 10x speedup for large M.\n+ constexpr int block_dim_x = 32;\n+ constexpr int block_dim_y = 1;\n+ constexpr int rows_per_block_y = 32;\n+ bool aligned_grid = (M % rows_per_block_y == 0) && (N % block_dim_x == 0);\n+ dim3 threads{block_dim_x, block_dim_y};\n+ dim3 blocks;\n+ blocks.x = (N + block_dim_x - 1) / block_dim_x;\n+ // int rows_per_block = my_gamma_beta_unroll_factor *\n+ blocks.y = (M + rows_per_block_y - 1) / rows_per_block_y;\n+ constexpr int max_grid_size = 64 * 1024 / 2;\n+ blocks.y = std::min(max_grid_size / blocks.x, blocks.y);\n+ auto options = dgamma->options();\n+ Tensor dgamma_blocks;\n+ Tensor dbeta_blocks;\n+ T * dgamma_blocks_ptr = nullptr;\n+ T * dbeta_blocks_ptr = nullptr;\n+ if (dgamma->defined()) {\n+ dgamma_blocks = at::zeros({blocks.y * threads.y, dgamma->size(-1)}, options);\n+ dgamma_blocks_ptr = dgamma_blocks.data_ptr();\n+ }\n+ if (dbeta->defined()) {\n+ dbeta_blocks = at::zeros({blocks.y * threads.y, dgamma->size(-1)}, options);\n+ dbeta_blocks_ptr = dbeta_blocks.data_ptr();\n+ }\n+ LaunchAndCheckGammaBetaBackwardKernel(\n+ aligned_grid, blocks, threads, 0, cuda_stream, dY_data, X_data, mean_data, rstd_data, M, N, dgamma_blocks_ptr, dbeta_blocks_ptr);\n+\n+ *dgamma = dgamma_blocks.sum(0);\n+ *dbeta = dbeta_blocks.sum(0);\n+ } else {\n+ // We are in the normal case where M is not that large.\n+ // We can change the tile shape (which is the last template parameter) in accordance with M.\n+ // For small M it is faster to have a smaller tile, otherwise we could have idle threads.\n+ // For larger M we use a bigger tile size.\n+ if (M < 64) {", + "comment_created_at": "2025-03-14T23:11:11+00:00", + "comment_author": "ngimel", + "comment_body": "do we need all 4 instantiations here? How much is the perf impact?", + "pr_file_module": null + }, + { + "comment_id": "2000093553", + "repo_full_name": "pytorch/pytorch", + "pr_number": 148605, + "pr_file": "aten/src/ATen/native/cuda/layer_norm_kernel.cu", + "discussion_id": "1996412678", + "commented_code": "@@ -508,223 +509,418 @@ __global__ void layer_norm_grad_input_kernel_vectorized(\n }\n }\n \n-\n-template \n-__global__ void GammaBetaBackwardSimpleCUDAKernel(\n+// When the data size is a multiple of the tile size we can use fewer\n+// instructions and registers. Example, this is the case when M=256 N=256.\n+template \n+__device__\n+__forceinline__\n+void\n+blockReduceGammaBetaBackwardsAligned(\n int64_t M,\n int64_t N,\n- const T* dY,\n- const T* X,\n- const T_ACC* mean,\n- const T_ACC* rstd,\n- T* dg,\n- T* db) {\n- const int64_t j = ((int64_t) blockIdx.x) * blockDim.x + threadIdx.x;\n- if (j < N) {\n- T_ACC sum1 = 0;\n- T_ACC sum2 = 0;\n- for (int64_t i = 0; i < M; ++i) {\n- const int64_t index = i * N + j;\n- sum1 += dg == nullptr ? T_ACC(0)\n- : static_cast(dY[index]) *\n- (static_cast(X[index]) - static_cast(mean[i])) *\n- static_cast(rstd[i]);\n- sum2 += db == nullptr ? T_ACC(0) : static_cast(dY[index]);\n+ const T* __restrict__ dY,\n+ const T* __restrict__ X,\n+ const T_ACC* __restrict__ mean,\n+ const T_ACC* __restrict__ rstd,\n+ T* __restrict__ dg,\n+ T* __restrict__ db,\n+ T_ACC &dg_sum,\n+ T_ACC &db_sum\n+) {\n+ constexpr int rows_per_thread_y = rows_per_block_y / block_dim_y;\n+ int64_t thread_x = blockIdx.x * block_dim_x + threadIdx.x;\n+ for (int64_t M_start = blockIdx.y * rows_per_block_y;\n+ M_start < M;\n+ M_start += rows_per_block_y * gridDim.y) {\n+ int lane_id = (threadIdx.y * blockDim.x + threadIdx.x) & (kWarpSize - 1);\n+ int64_t mean_index = M_start + threadIdx.y * rows_per_thread_y;\n+ T_ACC warp_mean = 0, warp_rstd = 0;\n+ if (lane_id < rows_per_thread_y) {\n+ warp_mean = mean[mean_index + lane_id];\n+ warp_rstd = rstd[mean_index + lane_id];\n }\n- if (dg != nullptr) {\n- dg[j] = sum1;\n+ WARP_SYNC();\n+ T_ACC dY_regs[rows_per_thread_y] = {0};\n+ T_ACC X_regs[rows_per_thread_y] = {0};\n+ #pragma unroll\n+ for (int i = 0; i < rows_per_thread_y; ++i) {\n+ int64_t current_y = M_start + threadIdx.y * rows_per_thread_y + i;\n+ if (aligned_grid || (current_y < M && thread_x < N)) {\n+ dY_regs[i] = dY[current_y * N + thread_x];\n+ X_regs[i] = X[current_y * N + thread_x];\n+ }\n }\n- if (db != nullptr) {\n- db[j] = sum2;\n+\n+ #pragma unroll\n+ for (int i = 0; i < rows_per_thread_y; ++i) {\n+ T_ACC mean_reg = WARP_SHFL(warp_mean, i, kWarpSize);\n+ T_ACC rstd_reg = WARP_SHFL(warp_rstd, i, kWarpSize);\n+ dg_sum += dY_regs[i] * (X_regs[i] - mean_reg) * rstd_reg;\n+ db_sum += dY_regs[i];\n }\n }\n }\n \n-// This implementation gets called if M and N divide with 32. This case should\n-// be the most common. We can then make better use of warp level intrinsics\n-// to improve performance.\n-\n-template \n-__global__ void GammaBetaBackwardCUDAKernel_32x32(\n+template \n+__device__\n+__forceinline__\n+void\n+blockReduceGammaBetaBackwardsHelper(\n+ int64_t M_start,\n int64_t M,\n int64_t N,\n- const T* dY,\n- const T* X,\n- const T_ACC* mean,\n- const T_ACC* rstd,\n- T* dg,\n- T* db) {\n- alignas(sizeof(double)) extern __shared__ char s_data1[];\n- T_ACC* s_data_typed = reinterpret_cast(&s_data1);\n- T_ACC* s_dg;\n- T_ACC* s_db;\n-\n- T_ACC dg_sum = 0;\n- T_ACC db_sum = 0;\n-\n- const int64_t j = ((int64_t) blockIdx.x) * blockDim.x + threadIdx.x;\n-\n- if (j < N) {\n- constexpr int unroll_factor = 8;\n- int laneId = threadIdx.x & (C10_WARP_SIZE - 1);\n+ const T* __restrict__ dY,\n+ const T* __restrict__ X,\n+ const T_ACC* __restrict__ mean,\n+ const T_ACC* __restrict__ rstd,\n+ T* __restrict__ dg,\n+ T* __restrict__ db,\n+ T_ACC &dg_sum,\n+ T_ACC &db_sum\n+) {\n+ constexpr int rows_per_thread_y = rows_per_block_y / block_dim_y;\n+ int64_t thread_x = blockIdx.x * block_dim_x + threadIdx.x;\n+\n+ int lane_id = (threadIdx.y * blockDim.x + threadIdx.x) & (kWarpSize - 1);\n+ int64_t mean_index = M_start + threadIdx.y * rows_per_thread_y;\n+ T_ACC warp_mean = 0, warp_rstd = 0;\n+ if (lane_id < rows_per_thread_y && mean_index + lane_id < M) {\n+ warp_mean = mean[mean_index + lane_id];\n+ warp_rstd = rstd[mean_index + lane_id];\n+ }\n+ WARP_SYNC();\n \n- T_ACC mean_reg, mean_reg_tmp;\n- T_ACC rstd_reg, rstd_reg_tmp;\n- T dY_reg;\n- T X_reg;\n \n- // Main loop\n- int bcounter;\n- for (bcounter = 0; bcounter < M / (blockDim.y * unroll_factor);\n- bcounter++) {\n- int offset = (bcounter * blockDim.y + threadIdx.y) * unroll_factor;\n \n- if (laneId < unroll_factor) {\n- mean_reg_tmp = mean[offset + laneId];\n- rstd_reg_tmp = rstd[offset + laneId];\n+ T_ACC dY_regs[rows_per_thread_y] = {0};\n+ T_ACC X_regs[rows_per_thread_y] = {0};\n+ #pragma unroll\n+ for (int i = 0; i < rows_per_thread_y; ++i) {\n+ int64_t current_y = M_start + threadIdx.y * rows_per_thread_y + i;\n+ bool active = true;\n+ if (check_x && thread_x >= N) {\n+ active = false;\n }\n- WARP_SYNC();\n-\n- #pragma unroll\n- for (int ii = 0; ii < unroll_factor; ++ii) {\n- dY_reg = dY[(offset + ii) * N + j];\n- X_reg = X[(offset + ii) * N + j];\n- mean_reg = WARP_SHFL(mean_reg_tmp, ii, kWarpSize);\n- rstd_reg = WARP_SHFL(rstd_reg_tmp, ii, kWarpSize);\n- dg_sum += dY_reg * (X_reg - mean_reg) * rstd_reg;\n- db_sum += dY_reg;\n+ if (check_y && current_y >= M) {\n+ active = false;\n }\n- }\n-\n- // Remainder loop\n- int offset = (bcounter * blockDim.y + threadIdx.y) * unroll_factor;\n- for (int ii = 0; ii < unroll_factor; ii++) {\n- if ((offset + ii) < M) {\n- mean_reg = mean[offset + ii];\n- rstd_reg = rstd[offset + ii];\n- dY_reg = dY[(offset + ii) * N + j];\n- X_reg = X[(offset + ii) * N + j];\n- dg_sum += dY_reg * (X_reg - mean_reg) * rstd_reg;\n- db_sum += dY_reg;\n+ if (active) {\n+ dY_regs[i] = dY[current_y * N + thread_x];\n+ X_regs[i] = X[current_y * N + thread_x];\n }\n }\n \n- // This kernel uses a block of (C10_WARP_SIZE x C10_WARP_SIZE) and\n- // gets called when M; N divide by 32. We can use warp shuffles\n- // for the final reduction step. This removes 4 shmem loads and\n- // stores with their corresponding __syncthreads()\n-\n- // This greatly reduces bank conflicts at the expense of a little\n- // extra shared memory. It does not impact occupancy\n- int padded_bx = (1 + blockDim.x);\n-\n- s_dg = s_data_typed;\n- s_db = s_data_typed + (padded_bx * blockDim.y);\n- s_dg[threadIdx.y * padded_bx + threadIdx.x] = dg_sum;\n- s_db[threadIdx.y * padded_bx + threadIdx.x] = db_sum;\n- __syncthreads();\n-\n- // Load transposed so that a warp holds an entire column\n- T_ACC reg_dg = s_dg[threadIdx.x * padded_bx + threadIdx.y];\n- T_ACC reg_db = s_db[threadIdx.x * padded_bx + threadIdx.y];\n- for (unsigned delta = C10_WARP_SIZE >> 1; delta >= 1; delta >>= 1) {\n- reg_dg += WARP_SHFL_XOR(reg_dg, delta, kWarpSize);\n- reg_db += WARP_SHFL_XOR(reg_db, delta, kWarpSize);\n+ #pragma unroll\n+ for (int i = 0; i < rows_per_thread_y; ++i) {\n+ T_ACC mean_reg = WARP_SHFL(warp_mean, i, kWarpSize);\n+ T_ACC rstd_reg = WARP_SHFL(warp_rstd, i, kWarpSize);\n+ dg_sum += dY_regs[i] * (X_regs[i] - mean_reg) * rstd_reg;\n+ db_sum += dY_regs[i];\n }\n+}\n \n- if (threadIdx.x == 0) {\n- const int64_t j = blockIdx.x * blockDim.x + threadIdx.y;\n- if (dg) {\n- dg[j] = reg_dg;\n- }\n- if (db) {\n- db[j] = reg_db;\n- }\n+template \n+__device__\n+__forceinline__\n+void\n+blockReduceGammaBetaBackwardsWithChecks(\n+ int64_t M,\n+ int64_t N,\n+ const T* __restrict__ dY,\n+ const T* __restrict__ X,\n+ const T_ACC* __restrict__ mean,\n+ const T_ACC* __restrict__ rstd,\n+ T* __restrict__ dg,\n+ T* __restrict__ db,\n+ T_ACC &dg_sum,\n+ T_ACC &db_sum\n+) {\n+ for (int64_t M_start = blockIdx.y * rows_per_block_y;\n+ M_start < M;\n+ M_start += rows_per_block_y * gridDim.y) {\n+ int64_t M_end = M_start + rows_per_block_y - 1;\n+ if (M_end < M) {\n+ blockReduceGammaBetaBackwardsHelper\n+ (M_start, M, N, dY, X, mean, rstd, dg, db, dg_sum, db_sum);\n+ } else {\n+ blockReduceGammaBetaBackwardsHelper\n+ (M_start, M, N, dY, X, mean, rstd, dg, db, dg_sum, db_sum);\n }\n }\n }\n \n-template \n-__global__ void GammaBetaBackwardCUDAKernel(\n+// block_dim_x is the number of threads in the x dimension per block.\n+// block_dim_y is the number of threads in the y dimension per block.\n+// rows_per_block_y is the size of the tile (number of data elements)\n+// in the y dimension per block.\n+// partial_reduction indicates whether we need to reduce across threads\n+// or not. If set to true, we will not reduce across threads. This can\n+// be faster in the M >> N case but requires another kernel to do a full\n+// final reduction.\n+// aligned_grid means the data size is a multiple of tile size. In that\n+// case we don't need to check for boundary conditions which can provide\n+// a further speedup by not needing instructions to check for edge cases\n+// and not needing predicate registers.\n+template \n+__global__\n+void\n+ GammaBetaBackwardCUDAKernelTemplate(\n int64_t M,\n int64_t N,\n- const T* dY,\n- const T* X,\n- const T_ACC* mean,\n- const T_ACC* rstd,\n- T* dg,\n- T* db) {\n- alignas(sizeof(double)) extern __shared__ char s_data1[];\n- T_ACC* s_data_typed = reinterpret_cast(&s_data1);\n- T_ACC* s_dg;\n- T_ACC* s_db;\n-\n- const int64_t j = ((int64_t) blockIdx.x) * blockDim.x + threadIdx.x;\n+ const T* __restrict__ dY,\n+ const T* __restrict__ X,\n+ const T_ACC* __restrict__ mean,\n+ const T_ACC* __restrict__ rstd,\n+ T* __restrict__ dg,\n+ T* __restrict__ db) {\n+ // This assert is a compile-time check only.\n+ constexpr int rows_per_thread_y = rows_per_block_y / block_dim_y;\n+ static_assert(rows_per_thread_y <= kWarpSize);\n \n T_ACC dg_sum = 0;\n T_ACC db_sum = 0;\n \n- if (j < N) {\n- constexpr int unroll_factor = 8;\n-\n- T_ACC mean_reg;\n- T_ACC rstd_reg;\n- T dY_reg;\n- T X_reg;\n+ if (aligned_grid) {\n+ // When N and M align perfectly with block_dim_x and block_dim_y, we\n+ // can skip boundary condition checks that waste instruction issue slots.\n+ blockReduceGammaBetaBackwardsAligned\n+ \n+ (M, N, dY, X, mean, rstd, dg, db, dg_sum, db_sum);\n+ } else {\n+ // In the general case we need to check boundary conditions in the M\n+ // dimension. However, we can still avoid boundary checks in the N dimension\n+ // for the inner blocks. So try to avoid those checks when possible.\n+ if (blockIdx.x * block_dim_x + block_dim_x - 1 < N) {\n+ blockReduceGammaBetaBackwardsWithChecks\n+ \n+ (M, N, dY, X, mean, rstd, dg, db, dg_sum, db_sum);\n+ } else {\n+ blockReduceGammaBetaBackwardsWithChecks\n+ \n+ (M, N, dY, X, mean, rstd, dg, db, dg_sum, db_sum);\n+ }\n+ }\n \n- // Main Loop\n- int bcounter;\n- for (bcounter = 0; bcounter < M / (blockDim.y * unroll_factor); bcounter++){\n- int offset = (bcounter * blockDim.y + threadIdx.y) * unroll_factor;\n+ int64_t thread_x = blockIdx.x * block_dim_x + threadIdx.x;\n \n- #pragma unroll\n- for (int ii = 0; ii < unroll_factor; ++ii) {\n- dY_reg = dY[(offset + ii) * N + j];\n- X_reg = X[(offset + ii) * N + j];\n- mean_reg = mean[offset + ii];\n- rstd_reg = rstd[offset + ii];\n- dg_sum += dY_reg * (X_reg - mean_reg) * rstd_reg;\n- db_sum += dY_reg;\n+ // When partial_reduction is requested, we don't reduce within a block.\n+ // We also don't reduce if we are only a single block in the y dimension.\n+ if (partial_reduction || (blockDim.y == 1 && gridDim.y == 1)) {\n+ if (aligned_grid || thread_x < N) {\n+ int64_t thread_y = blockIdx.y * blockDim.y + threadIdx.y;\n+ if (dg) {\n+ dg[thread_y * N + thread_x] = dg_sum;\n }\n- }\n-\n- // Remainder loop\n- int offset = (bcounter * blockDim.y + threadIdx.y) * unroll_factor;\n- for (int ii = 0; ii < unroll_factor; ii++ ){\n- if ((offset + ii) < M) {\n- dY_reg = dY[(offset + ii) * N + j ];\n- X_reg = X[(offset + ii) * N + j];\n- mean_reg = mean[offset + ii];\n- rstd_reg = rstd[offset + ii];\n- dg_sum += dY_reg * (X_reg - mean_reg) * rstd_reg;\n- db_sum += dY_reg;\n+ if (db) {\n+ db[thread_y * N + thread_x] = db_sum;\n }\n }\n-\n- // Do the final reduction in shared memory\n+ } else {\n+ // The caller requested a full reduction so we must reduce across\n+ // warps using shared memory and warp shuffles.\n+ static_assert(rows_per_thread_y <= C10_WARP_SIZE);\n+ alignas(sizeof(double)) extern __shared__ char s_data1[];\n+ T_ACC* s_data_typed = reinterpret_cast(&s_data1);\n+ T_ACC* s_dg;\n+ T_ACC* s_db;\n+ int padded_bx = (block_dim_x + 1);\n+ // Transpose dg and db.\n s_dg = s_data_typed;\n- s_db = s_data_typed + blockDim.x * blockDim.y;\n- s_dg[threadIdx.y * blockDim.x + threadIdx.x] = dg_sum;\n- s_db[threadIdx.y * blockDim.x + threadIdx.x] = db_sum;\n+ s_db = s_data_typed + (padded_bx * block_dim_y);\n+ s_dg[threadIdx.y * padded_bx + threadIdx.x] = dg_sum;\n+ s_db[threadIdx.y * padded_bx + threadIdx.x] = db_sum;\n __syncthreads();\n \n- for (int offset = blockDim.y / 2; offset >= 1; offset /= 2) {\n- if (threadIdx.y < offset) {\n- s_dg[threadIdx.y * blockDim.x + threadIdx.x] +=\n- s_dg[(threadIdx.y + offset) * blockDim.x + threadIdx.x];\n- s_db[threadIdx.y * blockDim.x + threadIdx.x] +=\n- s_db[(threadIdx.y + offset) * blockDim.x + threadIdx.x];\n+ // Load transposed so that a warp holds an entire column\n+ // Because block_dim_x != block_dim_y in the general case, we need\n+ // some code to handle the general case.\n+ static_assert(block_dim_x * block_dim_y % C10_WARP_SIZE == 0);\n+ constexpr int warps_available_to_reduce = block_dim_x * block_dim_y / C10_WARP_SIZE;\n+ int thread_id = threadIdx.y * block_dim_x + threadIdx.x;\n+ int warp_id = thread_id / C10_WARP_SIZE;\n+ int lane_id = thread_id & (C10_WARP_SIZE - 1);\n+ #pragma unroll\n+ for (int i = warp_id; i < block_dim_x; i += warps_available_to_reduce) {\n+ T_ACC reg_db, reg_dg;\n+ if (lane_id < block_dim_y) {\n+ reg_dg = s_dg[lane_id * padded_bx + i];\n+ reg_db = s_db[lane_id * padded_bx + i];\n+ }\n+ #pragma unroll\n+ for (unsigned delta = block_dim_y >> 1; delta >= 1; delta >>= 1) {\n+ reg_dg += WARP_SHFL_XOR(reg_dg, delta, kWarpSize);\n+ reg_db += WARP_SHFL_XOR(reg_db, delta, kWarpSize);\n+ }\n+ // Reduce is done. Now write it out to global memory.\n+ int64_t out_index = blockIdx.x * block_dim_x + i;\n+ if (threadIdx.x == 0 && (aligned_grid || out_index < N)) {\n+ if (dg) {\n+ dg[out_index] = reg_dg;\n }\n- __syncthreads();\n+ if (db) {\n+ db[out_index] = reg_db;\n+ }\n+ }\n }\n+ }\n+}\n \n- if (threadIdx.y == 0) {\n- if (dg) {\n- dg[j] = s_dg[threadIdx.x];\n- }\n- if (db) {\n- db[j] = s_db[threadIdx.x];\n- }\n+template\n+void LaunchAndCheckGammaBetaBackwardKernel(\n+ bool aligned_grid,\n+ dim3 blocks,\n+ dim3 threads,\n+ size_t shmem_sz,\n+ cudaStream_t cuda_stream,\n+ const T* dY_data,\n+ const T* X_data,\n+ const T_ACC* mean_data,\n+ const T_ACC* rstd_data,\n+ int64_t M,\n+ int64_t N,\n+ T* dgamma_data,\n+ T* dbeta_data) {\n+if (aligned_grid) {\n+ GammaBetaBackwardCUDAKernelTemplate\n+ <<>>(\n+ M,\n+ N,\n+ dY_data,\n+ X_data,\n+ mean_data,\n+ rstd_data,\n+ dgamma_data,\n+ dbeta_data);\n+ } else {\n+ GammaBetaBackwardCUDAKernelTemplate\n+ <<>>(\n+ M,\n+ N,\n+ dY_data,\n+ X_data,\n+ mean_data,\n+ rstd_data,\n+ dgamma_data,\n+ dbeta_data);\n+ }\n+ C10_CUDA_KERNEL_LAUNCH_CHECK();\n+}\n+\n+template\n+void ConfigureAndLaunchGammaBetaBackwardKernel(\n+ const T* dY_data,\n+ const T* X_data,\n+ const T_ACC* mean_data,\n+ const T_ACC* rstd_data,\n+ int64_t M,\n+ int64_t N,\n+ Tensor* dgamma,\n+ Tensor* dbeta,\n+ cudaStream_t cuda_stream) {\n+ T* dgamma_data =\n+ dgamma->defined() ? dgamma->template data_ptr() : nullptr;\n+ T* dbeta_data = dbeta->defined() ? dbeta->template data_ptr() : nullptr;\n+ bool aligned_grid = (M % rows_per_block_y == 0) && (N % block_dim_x == 0);\n+ dim3 threads{block_dim_x, block_dim_y};\n+ dim3 blocks;\n+ blocks.x = (N + block_dim_x - 1) / block_dim_x;\n+ blocks.y = 1;\n+ size_t shmem_sz = (block_dim_x + 1) * block_dim_y * sizeof(T_ACC) * 2;\n+ if (blocks.y == 1 && threads.y == 1) {\n+ // Optimization: since there is just one thread doing all the summation, we don't need a reduction\n+ // across threads. So we set partial_reduction to true.\n+ LaunchAndCheckGammaBetaBackwardKernel(\n+ aligned_grid, blocks, threads, shmem_sz, cuda_stream, dY_data, X_data, mean_data, rstd_data, M, N, dgamma_data, dbeta_data);\n+ } else {\n+ LaunchAndCheckGammaBetaBackwardKernel(\n+ aligned_grid, blocks, threads, shmem_sz, cuda_stream, dY_data, X_data, mean_data, rstd_data, M, N, dgamma_data, dbeta_data);\n+ }\n+\n+}\n+\n+template\n+void LaunchGammaBetaBackwardCUDAKernel(\n+ const T* dY_data,\n+ const T* X_data,\n+ const T_ACC* mean_data,\n+ const T_ACC* rstd_data,\n+ int64_t M,\n+ int64_t N,\n+ Tensor* dgamma,\n+ Tensor* dbeta,\n+ cudaStream_t cuda_stream) {\n+ if (M > 64 * 1024 && N / kWarpSize < 64) {\n+ // We have a situation where M >> N and N is small.\n+ // In this case we can speed up the computation by parallelizing in the M dimension.\n+ // We launch multiple blocks in the y-dimension, and compute partial sums for the\n+ // gradient in the first pass. Then we do a .sum(0) to do a final reduction.\n+ // Although we launch 2 kernels, we can get up to a 10x speedup for large M.\n+ constexpr int block_dim_x = 32;\n+ constexpr int block_dim_y = 1;\n+ constexpr int rows_per_block_y = 32;\n+ bool aligned_grid = (M % rows_per_block_y == 0) && (N % block_dim_x == 0);\n+ dim3 threads{block_dim_x, block_dim_y};\n+ dim3 blocks;\n+ blocks.x = (N + block_dim_x - 1) / block_dim_x;\n+ // int rows_per_block = my_gamma_beta_unroll_factor *\n+ blocks.y = (M + rows_per_block_y - 1) / rows_per_block_y;\n+ constexpr int max_grid_size = 64 * 1024 / 2;\n+ blocks.y = std::min(max_grid_size / blocks.x, blocks.y);\n+ auto options = dgamma->options();\n+ Tensor dgamma_blocks;\n+ Tensor dbeta_blocks;\n+ T * dgamma_blocks_ptr = nullptr;\n+ T * dbeta_blocks_ptr = nullptr;\n+ if (dgamma->defined()) {\n+ dgamma_blocks = at::zeros({blocks.y * threads.y, dgamma->size(-1)}, options);\n+ dgamma_blocks_ptr = dgamma_blocks.data_ptr();\n+ }\n+ if (dbeta->defined()) {\n+ dbeta_blocks = at::zeros({blocks.y * threads.y, dgamma->size(-1)}, options);\n+ dbeta_blocks_ptr = dbeta_blocks.data_ptr();\n+ }\n+ LaunchAndCheckGammaBetaBackwardKernel(\n+ aligned_grid, blocks, threads, 0, cuda_stream, dY_data, X_data, mean_data, rstd_data, M, N, dgamma_blocks_ptr, dbeta_blocks_ptr);\n+\n+ *dgamma = dgamma_blocks.sum(0);\n+ *dbeta = dbeta_blocks.sum(0);\n+ } else {\n+ // We are in the normal case where M is not that large.\n+ // We can change the tile shape (which is the last template parameter) in accordance with M.\n+ // For small M it is faster to have a smaller tile, otherwise we could have idle threads.\n+ // For larger M we use a bigger tile size.\n+ if (M < 64) {", + "comment_created_at": "2025-03-18T03:03:26+00:00", + "comment_author": "ahmadsharif1", + "comment_body": "\"image\"\r\n\r\nThe perf impact is substantial (20% regression) even for powers of 2 without this specialization. Note that even before my change we were specializing for M < 128. I hyper-specialize a little bit (keeping the same code -- just different template parameters) which results in a speed-up over the baseline:\r\n\r\n\"image\"\r\n\r\nMy code has each thread handling 8 rows so it's pointless launching threads that don't handle any rows at all for \"short and wide\" cases (i.e. for low values of M). They just cause wasted occupancy.\r\n", + "pr_file_module": null + }, + { + "comment_id": "2006461566", + "repo_full_name": "pytorch/pytorch", + "pr_number": 148605, + "pr_file": "aten/src/ATen/native/cuda/layer_norm_kernel.cu", + "discussion_id": "1996412678", + "commented_code": "@@ -508,223 +509,418 @@ __global__ void layer_norm_grad_input_kernel_vectorized(\n }\n }\n \n-\n-template \n-__global__ void GammaBetaBackwardSimpleCUDAKernel(\n+// When the data size is a multiple of the tile size we can use fewer\n+// instructions and registers. Example, this is the case when M=256 N=256.\n+template \n+__device__\n+__forceinline__\n+void\n+blockReduceGammaBetaBackwardsAligned(\n int64_t M,\n int64_t N,\n- const T* dY,\n- const T* X,\n- const T_ACC* mean,\n- const T_ACC* rstd,\n- T* dg,\n- T* db) {\n- const int64_t j = ((int64_t) blockIdx.x) * blockDim.x + threadIdx.x;\n- if (j < N) {\n- T_ACC sum1 = 0;\n- T_ACC sum2 = 0;\n- for (int64_t i = 0; i < M; ++i) {\n- const int64_t index = i * N + j;\n- sum1 += dg == nullptr ? T_ACC(0)\n- : static_cast(dY[index]) *\n- (static_cast(X[index]) - static_cast(mean[i])) *\n- static_cast(rstd[i]);\n- sum2 += db == nullptr ? T_ACC(0) : static_cast(dY[index]);\n+ const T* __restrict__ dY,\n+ const T* __restrict__ X,\n+ const T_ACC* __restrict__ mean,\n+ const T_ACC* __restrict__ rstd,\n+ T* __restrict__ dg,\n+ T* __restrict__ db,\n+ T_ACC &dg_sum,\n+ T_ACC &db_sum\n+) {\n+ constexpr int rows_per_thread_y = rows_per_block_y / block_dim_y;\n+ int64_t thread_x = blockIdx.x * block_dim_x + threadIdx.x;\n+ for (int64_t M_start = blockIdx.y * rows_per_block_y;\n+ M_start < M;\n+ M_start += rows_per_block_y * gridDim.y) {\n+ int lane_id = (threadIdx.y * blockDim.x + threadIdx.x) & (kWarpSize - 1);\n+ int64_t mean_index = M_start + threadIdx.y * rows_per_thread_y;\n+ T_ACC warp_mean = 0, warp_rstd = 0;\n+ if (lane_id < rows_per_thread_y) {\n+ warp_mean = mean[mean_index + lane_id];\n+ warp_rstd = rstd[mean_index + lane_id];\n }\n- if (dg != nullptr) {\n- dg[j] = sum1;\n+ WARP_SYNC();\n+ T_ACC dY_regs[rows_per_thread_y] = {0};\n+ T_ACC X_regs[rows_per_thread_y] = {0};\n+ #pragma unroll\n+ for (int i = 0; i < rows_per_thread_y; ++i) {\n+ int64_t current_y = M_start + threadIdx.y * rows_per_thread_y + i;\n+ if (aligned_grid || (current_y < M && thread_x < N)) {\n+ dY_regs[i] = dY[current_y * N + thread_x];\n+ X_regs[i] = X[current_y * N + thread_x];\n+ }\n }\n- if (db != nullptr) {\n- db[j] = sum2;\n+\n+ #pragma unroll\n+ for (int i = 0; i < rows_per_thread_y; ++i) {\n+ T_ACC mean_reg = WARP_SHFL(warp_mean, i, kWarpSize);\n+ T_ACC rstd_reg = WARP_SHFL(warp_rstd, i, kWarpSize);\n+ dg_sum += dY_regs[i] * (X_regs[i] - mean_reg) * rstd_reg;\n+ db_sum += dY_regs[i];\n }\n }\n }\n \n-// This implementation gets called if M and N divide with 32. This case should\n-// be the most common. We can then make better use of warp level intrinsics\n-// to improve performance.\n-\n-template \n-__global__ void GammaBetaBackwardCUDAKernel_32x32(\n+template \n+__device__\n+__forceinline__\n+void\n+blockReduceGammaBetaBackwardsHelper(\n+ int64_t M_start,\n int64_t M,\n int64_t N,\n- const T* dY,\n- const T* X,\n- const T_ACC* mean,\n- const T_ACC* rstd,\n- T* dg,\n- T* db) {\n- alignas(sizeof(double)) extern __shared__ char s_data1[];\n- T_ACC* s_data_typed = reinterpret_cast(&s_data1);\n- T_ACC* s_dg;\n- T_ACC* s_db;\n-\n- T_ACC dg_sum = 0;\n- T_ACC db_sum = 0;\n-\n- const int64_t j = ((int64_t) blockIdx.x) * blockDim.x + threadIdx.x;\n-\n- if (j < N) {\n- constexpr int unroll_factor = 8;\n- int laneId = threadIdx.x & (C10_WARP_SIZE - 1);\n+ const T* __restrict__ dY,\n+ const T* __restrict__ X,\n+ const T_ACC* __restrict__ mean,\n+ const T_ACC* __restrict__ rstd,\n+ T* __restrict__ dg,\n+ T* __restrict__ db,\n+ T_ACC &dg_sum,\n+ T_ACC &db_sum\n+) {\n+ constexpr int rows_per_thread_y = rows_per_block_y / block_dim_y;\n+ int64_t thread_x = blockIdx.x * block_dim_x + threadIdx.x;\n+\n+ int lane_id = (threadIdx.y * blockDim.x + threadIdx.x) & (kWarpSize - 1);\n+ int64_t mean_index = M_start + threadIdx.y * rows_per_thread_y;\n+ T_ACC warp_mean = 0, warp_rstd = 0;\n+ if (lane_id < rows_per_thread_y && mean_index + lane_id < M) {\n+ warp_mean = mean[mean_index + lane_id];\n+ warp_rstd = rstd[mean_index + lane_id];\n+ }\n+ WARP_SYNC();\n \n- T_ACC mean_reg, mean_reg_tmp;\n- T_ACC rstd_reg, rstd_reg_tmp;\n- T dY_reg;\n- T X_reg;\n \n- // Main loop\n- int bcounter;\n- for (bcounter = 0; bcounter < M / (blockDim.y * unroll_factor);\n- bcounter++) {\n- int offset = (bcounter * blockDim.y + threadIdx.y) * unroll_factor;\n \n- if (laneId < unroll_factor) {\n- mean_reg_tmp = mean[offset + laneId];\n- rstd_reg_tmp = rstd[offset + laneId];\n+ T_ACC dY_regs[rows_per_thread_y] = {0};\n+ T_ACC X_regs[rows_per_thread_y] = {0};\n+ #pragma unroll\n+ for (int i = 0; i < rows_per_thread_y; ++i) {\n+ int64_t current_y = M_start + threadIdx.y * rows_per_thread_y + i;\n+ bool active = true;\n+ if (check_x && thread_x >= N) {\n+ active = false;\n }\n- WARP_SYNC();\n-\n- #pragma unroll\n- for (int ii = 0; ii < unroll_factor; ++ii) {\n- dY_reg = dY[(offset + ii) * N + j];\n- X_reg = X[(offset + ii) * N + j];\n- mean_reg = WARP_SHFL(mean_reg_tmp, ii, kWarpSize);\n- rstd_reg = WARP_SHFL(rstd_reg_tmp, ii, kWarpSize);\n- dg_sum += dY_reg * (X_reg - mean_reg) * rstd_reg;\n- db_sum += dY_reg;\n+ if (check_y && current_y >= M) {\n+ active = false;\n }\n- }\n-\n- // Remainder loop\n- int offset = (bcounter * blockDim.y + threadIdx.y) * unroll_factor;\n- for (int ii = 0; ii < unroll_factor; ii++) {\n- if ((offset + ii) < M) {\n- mean_reg = mean[offset + ii];\n- rstd_reg = rstd[offset + ii];\n- dY_reg = dY[(offset + ii) * N + j];\n- X_reg = X[(offset + ii) * N + j];\n- dg_sum += dY_reg * (X_reg - mean_reg) * rstd_reg;\n- db_sum += dY_reg;\n+ if (active) {\n+ dY_regs[i] = dY[current_y * N + thread_x];\n+ X_regs[i] = X[current_y * N + thread_x];\n }\n }\n \n- // This kernel uses a block of (C10_WARP_SIZE x C10_WARP_SIZE) and\n- // gets called when M; N divide by 32. We can use warp shuffles\n- // for the final reduction step. This removes 4 shmem loads and\n- // stores with their corresponding __syncthreads()\n-\n- // This greatly reduces bank conflicts at the expense of a little\n- // extra shared memory. It does not impact occupancy\n- int padded_bx = (1 + blockDim.x);\n-\n- s_dg = s_data_typed;\n- s_db = s_data_typed + (padded_bx * blockDim.y);\n- s_dg[threadIdx.y * padded_bx + threadIdx.x] = dg_sum;\n- s_db[threadIdx.y * padded_bx + threadIdx.x] = db_sum;\n- __syncthreads();\n-\n- // Load transposed so that a warp holds an entire column\n- T_ACC reg_dg = s_dg[threadIdx.x * padded_bx + threadIdx.y];\n- T_ACC reg_db = s_db[threadIdx.x * padded_bx + threadIdx.y];\n- for (unsigned delta = C10_WARP_SIZE >> 1; delta >= 1; delta >>= 1) {\n- reg_dg += WARP_SHFL_XOR(reg_dg, delta, kWarpSize);\n- reg_db += WARP_SHFL_XOR(reg_db, delta, kWarpSize);\n+ #pragma unroll\n+ for (int i = 0; i < rows_per_thread_y; ++i) {\n+ T_ACC mean_reg = WARP_SHFL(warp_mean, i, kWarpSize);\n+ T_ACC rstd_reg = WARP_SHFL(warp_rstd, i, kWarpSize);\n+ dg_sum += dY_regs[i] * (X_regs[i] - mean_reg) * rstd_reg;\n+ db_sum += dY_regs[i];\n }\n+}\n \n- if (threadIdx.x == 0) {\n- const int64_t j = blockIdx.x * blockDim.x + threadIdx.y;\n- if (dg) {\n- dg[j] = reg_dg;\n- }\n- if (db) {\n- db[j] = reg_db;\n- }\n+template \n+__device__\n+__forceinline__\n+void\n+blockReduceGammaBetaBackwardsWithChecks(\n+ int64_t M,\n+ int64_t N,\n+ const T* __restrict__ dY,\n+ const T* __restrict__ X,\n+ const T_ACC* __restrict__ mean,\n+ const T_ACC* __restrict__ rstd,\n+ T* __restrict__ dg,\n+ T* __restrict__ db,\n+ T_ACC &dg_sum,\n+ T_ACC &db_sum\n+) {\n+ for (int64_t M_start = blockIdx.y * rows_per_block_y;\n+ M_start < M;\n+ M_start += rows_per_block_y * gridDim.y) {\n+ int64_t M_end = M_start + rows_per_block_y - 1;\n+ if (M_end < M) {\n+ blockReduceGammaBetaBackwardsHelper\n+ (M_start, M, N, dY, X, mean, rstd, dg, db, dg_sum, db_sum);\n+ } else {\n+ blockReduceGammaBetaBackwardsHelper\n+ (M_start, M, N, dY, X, mean, rstd, dg, db, dg_sum, db_sum);\n }\n }\n }\n \n-template \n-__global__ void GammaBetaBackwardCUDAKernel(\n+// block_dim_x is the number of threads in the x dimension per block.\n+// block_dim_y is the number of threads in the y dimension per block.\n+// rows_per_block_y is the size of the tile (number of data elements)\n+// in the y dimension per block.\n+// partial_reduction indicates whether we need to reduce across threads\n+// or not. If set to true, we will not reduce across threads. This can\n+// be faster in the M >> N case but requires another kernel to do a full\n+// final reduction.\n+// aligned_grid means the data size is a multiple of tile size. In that\n+// case we don't need to check for boundary conditions which can provide\n+// a further speedup by not needing instructions to check for edge cases\n+// and not needing predicate registers.\n+template \n+__global__\n+void\n+ GammaBetaBackwardCUDAKernelTemplate(\n int64_t M,\n int64_t N,\n- const T* dY,\n- const T* X,\n- const T_ACC* mean,\n- const T_ACC* rstd,\n- T* dg,\n- T* db) {\n- alignas(sizeof(double)) extern __shared__ char s_data1[];\n- T_ACC* s_data_typed = reinterpret_cast(&s_data1);\n- T_ACC* s_dg;\n- T_ACC* s_db;\n-\n- const int64_t j = ((int64_t) blockIdx.x) * blockDim.x + threadIdx.x;\n+ const T* __restrict__ dY,\n+ const T* __restrict__ X,\n+ const T_ACC* __restrict__ mean,\n+ const T_ACC* __restrict__ rstd,\n+ T* __restrict__ dg,\n+ T* __restrict__ db) {\n+ // This assert is a compile-time check only.\n+ constexpr int rows_per_thread_y = rows_per_block_y / block_dim_y;\n+ static_assert(rows_per_thread_y <= kWarpSize);\n \n T_ACC dg_sum = 0;\n T_ACC db_sum = 0;\n \n- if (j < N) {\n- constexpr int unroll_factor = 8;\n-\n- T_ACC mean_reg;\n- T_ACC rstd_reg;\n- T dY_reg;\n- T X_reg;\n+ if (aligned_grid) {\n+ // When N and M align perfectly with block_dim_x and block_dim_y, we\n+ // can skip boundary condition checks that waste instruction issue slots.\n+ blockReduceGammaBetaBackwardsAligned\n+ \n+ (M, N, dY, X, mean, rstd, dg, db, dg_sum, db_sum);\n+ } else {\n+ // In the general case we need to check boundary conditions in the M\n+ // dimension. However, we can still avoid boundary checks in the N dimension\n+ // for the inner blocks. So try to avoid those checks when possible.\n+ if (blockIdx.x * block_dim_x + block_dim_x - 1 < N) {\n+ blockReduceGammaBetaBackwardsWithChecks\n+ \n+ (M, N, dY, X, mean, rstd, dg, db, dg_sum, db_sum);\n+ } else {\n+ blockReduceGammaBetaBackwardsWithChecks\n+ \n+ (M, N, dY, X, mean, rstd, dg, db, dg_sum, db_sum);\n+ }\n+ }\n \n- // Main Loop\n- int bcounter;\n- for (bcounter = 0; bcounter < M / (blockDim.y * unroll_factor); bcounter++){\n- int offset = (bcounter * blockDim.y + threadIdx.y) * unroll_factor;\n+ int64_t thread_x = blockIdx.x * block_dim_x + threadIdx.x;\n \n- #pragma unroll\n- for (int ii = 0; ii < unroll_factor; ++ii) {\n- dY_reg = dY[(offset + ii) * N + j];\n- X_reg = X[(offset + ii) * N + j];\n- mean_reg = mean[offset + ii];\n- rstd_reg = rstd[offset + ii];\n- dg_sum += dY_reg * (X_reg - mean_reg) * rstd_reg;\n- db_sum += dY_reg;\n+ // When partial_reduction is requested, we don't reduce within a block.\n+ // We also don't reduce if we are only a single block in the y dimension.\n+ if (partial_reduction || (blockDim.y == 1 && gridDim.y == 1)) {\n+ if (aligned_grid || thread_x < N) {\n+ int64_t thread_y = blockIdx.y * blockDim.y + threadIdx.y;\n+ if (dg) {\n+ dg[thread_y * N + thread_x] = dg_sum;\n }\n- }\n-\n- // Remainder loop\n- int offset = (bcounter * blockDim.y + threadIdx.y) * unroll_factor;\n- for (int ii = 0; ii < unroll_factor; ii++ ){\n- if ((offset + ii) < M) {\n- dY_reg = dY[(offset + ii) * N + j ];\n- X_reg = X[(offset + ii) * N + j];\n- mean_reg = mean[offset + ii];\n- rstd_reg = rstd[offset + ii];\n- dg_sum += dY_reg * (X_reg - mean_reg) * rstd_reg;\n- db_sum += dY_reg;\n+ if (db) {\n+ db[thread_y * N + thread_x] = db_sum;\n }\n }\n-\n- // Do the final reduction in shared memory\n+ } else {\n+ // The caller requested a full reduction so we must reduce across\n+ // warps using shared memory and warp shuffles.\n+ static_assert(rows_per_thread_y <= C10_WARP_SIZE);\n+ alignas(sizeof(double)) extern __shared__ char s_data1[];\n+ T_ACC* s_data_typed = reinterpret_cast(&s_data1);\n+ T_ACC* s_dg;\n+ T_ACC* s_db;\n+ int padded_bx = (block_dim_x + 1);\n+ // Transpose dg and db.\n s_dg = s_data_typed;\n- s_db = s_data_typed + blockDim.x * blockDim.y;\n- s_dg[threadIdx.y * blockDim.x + threadIdx.x] = dg_sum;\n- s_db[threadIdx.y * blockDim.x + threadIdx.x] = db_sum;\n+ s_db = s_data_typed + (padded_bx * block_dim_y);\n+ s_dg[threadIdx.y * padded_bx + threadIdx.x] = dg_sum;\n+ s_db[threadIdx.y * padded_bx + threadIdx.x] = db_sum;\n __syncthreads();\n \n- for (int offset = blockDim.y / 2; offset >= 1; offset /= 2) {\n- if (threadIdx.y < offset) {\n- s_dg[threadIdx.y * blockDim.x + threadIdx.x] +=\n- s_dg[(threadIdx.y + offset) * blockDim.x + threadIdx.x];\n- s_db[threadIdx.y * blockDim.x + threadIdx.x] +=\n- s_db[(threadIdx.y + offset) * blockDim.x + threadIdx.x];\n+ // Load transposed so that a warp holds an entire column\n+ // Because block_dim_x != block_dim_y in the general case, we need\n+ // some code to handle the general case.\n+ static_assert(block_dim_x * block_dim_y % C10_WARP_SIZE == 0);\n+ constexpr int warps_available_to_reduce = block_dim_x * block_dim_y / C10_WARP_SIZE;\n+ int thread_id = threadIdx.y * block_dim_x + threadIdx.x;\n+ int warp_id = thread_id / C10_WARP_SIZE;\n+ int lane_id = thread_id & (C10_WARP_SIZE - 1);\n+ #pragma unroll\n+ for (int i = warp_id; i < block_dim_x; i += warps_available_to_reduce) {\n+ T_ACC reg_db, reg_dg;\n+ if (lane_id < block_dim_y) {\n+ reg_dg = s_dg[lane_id * padded_bx + i];\n+ reg_db = s_db[lane_id * padded_bx + i];\n+ }\n+ #pragma unroll\n+ for (unsigned delta = block_dim_y >> 1; delta >= 1; delta >>= 1) {\n+ reg_dg += WARP_SHFL_XOR(reg_dg, delta, kWarpSize);\n+ reg_db += WARP_SHFL_XOR(reg_db, delta, kWarpSize);\n+ }\n+ // Reduce is done. Now write it out to global memory.\n+ int64_t out_index = blockIdx.x * block_dim_x + i;\n+ if (threadIdx.x == 0 && (aligned_grid || out_index < N)) {\n+ if (dg) {\n+ dg[out_index] = reg_dg;\n }\n- __syncthreads();\n+ if (db) {\n+ db[out_index] = reg_db;\n+ }\n+ }\n }\n+ }\n+}\n \n- if (threadIdx.y == 0) {\n- if (dg) {\n- dg[j] = s_dg[threadIdx.x];\n- }\n- if (db) {\n- db[j] = s_db[threadIdx.x];\n- }\n+template\n+void LaunchAndCheckGammaBetaBackwardKernel(\n+ bool aligned_grid,\n+ dim3 blocks,\n+ dim3 threads,\n+ size_t shmem_sz,\n+ cudaStream_t cuda_stream,\n+ const T* dY_data,\n+ const T* X_data,\n+ const T_ACC* mean_data,\n+ const T_ACC* rstd_data,\n+ int64_t M,\n+ int64_t N,\n+ T* dgamma_data,\n+ T* dbeta_data) {\n+if (aligned_grid) {\n+ GammaBetaBackwardCUDAKernelTemplate\n+ <<>>(\n+ M,\n+ N,\n+ dY_data,\n+ X_data,\n+ mean_data,\n+ rstd_data,\n+ dgamma_data,\n+ dbeta_data);\n+ } else {\n+ GammaBetaBackwardCUDAKernelTemplate\n+ <<>>(\n+ M,\n+ N,\n+ dY_data,\n+ X_data,\n+ mean_data,\n+ rstd_data,\n+ dgamma_data,\n+ dbeta_data);\n+ }\n+ C10_CUDA_KERNEL_LAUNCH_CHECK();\n+}\n+\n+template\n+void ConfigureAndLaunchGammaBetaBackwardKernel(\n+ const T* dY_data,\n+ const T* X_data,\n+ const T_ACC* mean_data,\n+ const T_ACC* rstd_data,\n+ int64_t M,\n+ int64_t N,\n+ Tensor* dgamma,\n+ Tensor* dbeta,\n+ cudaStream_t cuda_stream) {\n+ T* dgamma_data =\n+ dgamma->defined() ? dgamma->template data_ptr() : nullptr;\n+ T* dbeta_data = dbeta->defined() ? dbeta->template data_ptr() : nullptr;\n+ bool aligned_grid = (M % rows_per_block_y == 0) && (N % block_dim_x == 0);\n+ dim3 threads{block_dim_x, block_dim_y};\n+ dim3 blocks;\n+ blocks.x = (N + block_dim_x - 1) / block_dim_x;\n+ blocks.y = 1;\n+ size_t shmem_sz = (block_dim_x + 1) * block_dim_y * sizeof(T_ACC) * 2;\n+ if (blocks.y == 1 && threads.y == 1) {\n+ // Optimization: since there is just one thread doing all the summation, we don't need a reduction\n+ // across threads. So we set partial_reduction to true.\n+ LaunchAndCheckGammaBetaBackwardKernel(\n+ aligned_grid, blocks, threads, shmem_sz, cuda_stream, dY_data, X_data, mean_data, rstd_data, M, N, dgamma_data, dbeta_data);\n+ } else {\n+ LaunchAndCheckGammaBetaBackwardKernel(\n+ aligned_grid, blocks, threads, shmem_sz, cuda_stream, dY_data, X_data, mean_data, rstd_data, M, N, dgamma_data, dbeta_data);\n+ }\n+\n+}\n+\n+template\n+void LaunchGammaBetaBackwardCUDAKernel(\n+ const T* dY_data,\n+ const T* X_data,\n+ const T_ACC* mean_data,\n+ const T_ACC* rstd_data,\n+ int64_t M,\n+ int64_t N,\n+ Tensor* dgamma,\n+ Tensor* dbeta,\n+ cudaStream_t cuda_stream) {\n+ if (M > 64 * 1024 && N / kWarpSize < 64) {\n+ // We have a situation where M >> N and N is small.\n+ // In this case we can speed up the computation by parallelizing in the M dimension.\n+ // We launch multiple blocks in the y-dimension, and compute partial sums for the\n+ // gradient in the first pass. Then we do a .sum(0) to do a final reduction.\n+ // Although we launch 2 kernels, we can get up to a 10x speedup for large M.\n+ constexpr int block_dim_x = 32;\n+ constexpr int block_dim_y = 1;\n+ constexpr int rows_per_block_y = 32;\n+ bool aligned_grid = (M % rows_per_block_y == 0) && (N % block_dim_x == 0);\n+ dim3 threads{block_dim_x, block_dim_y};\n+ dim3 blocks;\n+ blocks.x = (N + block_dim_x - 1) / block_dim_x;\n+ // int rows_per_block = my_gamma_beta_unroll_factor *\n+ blocks.y = (M + rows_per_block_y - 1) / rows_per_block_y;\n+ constexpr int max_grid_size = 64 * 1024 / 2;\n+ blocks.y = std::min(max_grid_size / blocks.x, blocks.y);\n+ auto options = dgamma->options();\n+ Tensor dgamma_blocks;\n+ Tensor dbeta_blocks;\n+ T * dgamma_blocks_ptr = nullptr;\n+ T * dbeta_blocks_ptr = nullptr;\n+ if (dgamma->defined()) {\n+ dgamma_blocks = at::zeros({blocks.y * threads.y, dgamma->size(-1)}, options);\n+ dgamma_blocks_ptr = dgamma_blocks.data_ptr();\n+ }\n+ if (dbeta->defined()) {\n+ dbeta_blocks = at::zeros({blocks.y * threads.y, dgamma->size(-1)}, options);\n+ dbeta_blocks_ptr = dbeta_blocks.data_ptr();\n+ }\n+ LaunchAndCheckGammaBetaBackwardKernel(\n+ aligned_grid, blocks, threads, 0, cuda_stream, dY_data, X_data, mean_data, rstd_data, M, N, dgamma_blocks_ptr, dbeta_blocks_ptr);\n+\n+ *dgamma = dgamma_blocks.sum(0);\n+ *dbeta = dbeta_blocks.sum(0);\n+ } else {\n+ // We are in the normal case where M is not that large.\n+ // We can change the tile shape (which is the last template parameter) in accordance with M.\n+ // For small M it is faster to have a smaller tile, otherwise we could have idle threads.\n+ // For larger M we use a bigger tile size.\n+ if (M < 64) {", + "comment_created_at": "2025-03-20T21:12:03+00:00", + "comment_author": "ahmadsharif1", + "comment_body": "For completeness, I did more experiments with smaller values of M and N and a lot more datapoints.\r\n\r\nWithout specialization:\r\n![image](https://github.com/user-attachments/assets/1fc62865-3f7b-4f12-97ed-01e2eaba4cc1)\r\n\r\n\r\nWith specialization:\r\n![image](https://github.com/user-attachments/assets/5325ed92-cc36-4f2a-a137-0edbeeb0ccbd)\r\n\r\n\r\nYou can observe that the \"short and wide\" region regressions go away with specialization (basically large N and small M which is at the bottom right of the chart).\r\n\r\nThere are still some regressions left, but those are with very small values of N (like 4 or 8 -- not sure if those shapes are used much).", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2033537678", + "pr_number": 150218, + "pr_file": "aten/src/ATen/DLConvertor.cpp", + "created_at": "2025-04-08T16:03:52+00:00", + "commented_code": "return fromDLPackImpl(src, std::move(deleter));\n}\n\nTensor maybeCopyTensor(\n const Tensor& data,\n std::optional optional_dl_device,\n std::optional copy) {\n bool force_copy = copy.has_value() && *copy;\n bool force_move = copy.has_value() && !*copy;\n\n if (optional_dl_device.has_value()) {\n auto device = at::getATenDevice(optional_dl_device->device_type, static_cast(optional_dl_device->device_id));\n\n if (device != data.device()) {\n TORCH_CHECK(\n !force_move,\n \"cannot move tensor from \",\n data.device(),\n \" to \",\n device,\n \" without copying. Set copy=True is needed.\");\n return data.to(device);\n }\n }\n\n if (force_copy) {\n return data.clone().detach();", + "repo_full_name": "pytorch/pytorch", + "discussion_comments": [ + { + "comment_id": "2033537678", + "repo_full_name": "pytorch/pytorch", + "pr_number": 150218, + "pr_file": "aten/src/ATen/DLConvertor.cpp", + "discussion_id": "2033537678", + "commented_code": "@@ -388,4 +389,33 @@ Tensor fromDLPackVersioned(DLManagedTensorVersioned* src, std::function(src, std::move(deleter));\n }\n \n+Tensor maybeCopyTensor(\n+ const Tensor& data,\n+ std::optional optional_dl_device,\n+ std::optional copy) {\n+ bool force_copy = copy.has_value() && *copy;\n+ bool force_move = copy.has_value() && !*copy;\n+\n+ if (optional_dl_device.has_value()) {\n+ auto device = at::getATenDevice(optional_dl_device->device_type, static_cast(optional_dl_device->device_id));\n+\n+ if (device != data.device()) {\n+ TORCH_CHECK(\n+ !force_move,\n+ \"cannot move tensor from \",\n+ data.device(),\n+ \" to \",\n+ device,\n+ \" without copying. Set copy=True is needed.\");\n+ return data.to(device);\n+ }\n+ }\n+\n+ if (force_copy) {\n+ return data.clone().detach();", + "comment_created_at": "2025-04-08T16:03:52+00:00", + "comment_author": "Skylion007", + "comment_body": "```suggestion\r\n return data.detach().clone();\r\n```\r\nThis will copy less data / be more efficient.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2063591267", + "pr_number": 152089, + "pr_file": "aten/src/ATen/core/dispatch/OperatorEntry.cpp", + "created_at": "2025-04-28T12:51:29+00:00", + "commented_code": "#endif\n}\n\nvoid OperatorEntry::unsafeSetTags(const std::vector& tags) {\n #ifndef C10_MOBILE\n tags_ = std::move(tags);\n #endif", + "repo_full_name": "pytorch/pytorch", + "discussion_comments": [ + { + "comment_id": "2063591267", + "repo_full_name": "pytorch/pytorch", + "pr_number": 152089, + "pr_file": "aten/src/ATen/core/dispatch/OperatorEntry.cpp", + "discussion_id": "2063591267", + "commented_code": "@@ -157,6 +157,12 @@ void OperatorEntry::registerSchema(FunctionSchema&& schema, std::string&& debug,\n #endif\n }\n \n+void OperatorEntry::unsafeSetTags(const std::vector& tags) {\n+ #ifndef C10_MOBILE\n+ tags_ = std::move(tags);\n+ #endif", + "comment_created_at": "2025-04-28T12:51:29+00:00", + "comment_author": "zou3519", + "comment_body": "I'm not that good with C++, why is this a std::move?", + "pr_file_module": null + }, + { + "comment_id": "2065568732", + "repo_full_name": "pytorch/pytorch", + "pr_number": 152089, + "pr_file": "aten/src/ATen/core/dispatch/OperatorEntry.cpp", + "discussion_id": "2063591267", + "commented_code": "@@ -157,6 +157,12 @@ void OperatorEntry::registerSchema(FunctionSchema&& schema, std::string&& debug,\n #endif\n }\n \n+void OperatorEntry::unsafeSetTags(const std::vector& tags) {\n+ #ifndef C10_MOBILE\n+ tags_ = std::move(tags);\n+ #endif", + "comment_created_at": "2025-04-29T06:17:35+00:00", + "comment_author": "jijiew", + "comment_body": "it avoids unnecessary copy", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/pytorch-optimize-memory-operations.md b/_reviewers/pytorch-optimize-memory-operations.md index b26775e..eaa97db 100644 --- a/_reviewers/pytorch-optimize-memory-operations.md +++ b/_reviewers/pytorch-optimize-memory-operations.md @@ -63,131 +63,3 @@ Minimize unnecessary memory operations to improve performance. This includes opt ``` Always question if memory is being allocated, initialized, copied, or accessed unnecessarily, as these operations are often performance bottlenecks in data-intensive applications. - - -[ - { - "discussion_id": "2145822363", - "pr_number": 151547, - "pr_file": "aten/src/ATen/native/CPUBlas.cpp", - "created_at": "2025-06-13T18:30:57+00:00", - "commented_code": "b, &ldb_,\n &beta_,\n float_v.data(), &ldc_);\n for (auto cv: float_v) {\n *(c++) = c10::convert(cv);\n\n for (const auto j : c10::irange(n)) {\n for (const auto i : c10::irange(m)) {\n auto offset = j * ldc + i;", - "repo_full_name": "pytorch/pytorch", - "discussion_comments": [ - { - "comment_id": "2145822363", - "repo_full_name": "pytorch/pytorch", - "pr_number": 151547, - "pr_file": "aten/src/ATen/native/CPUBlas.cpp", - "discussion_id": "2145822363", - "commented_code": "@@ -368,8 +368,12 @@ void gemm(\n b, &ldb_,\n &beta_,\n float_v.data(), &ldc_);\n- for (auto cv: float_v) {\n- *(c++) = c10::convert(cv);\n+\n+ for (const auto j : c10::irange(n)) {\n+ for (const auto i : c10::irange(m)) {\n+ auto offset = j * ldc + i;", - "comment_created_at": "2025-06-13T18:30:57+00:00", - "comment_author": "taoye9", - "comment_body": "it's better to move j * ldc out of inner loop to reduce unnecessary multiplication.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1996412678", - "pr_number": 148605, - "pr_file": "aten/src/ATen/native/cuda/layer_norm_kernel.cu", - "created_at": "2025-03-14T23:11:11+00:00", - "commented_code": "}\n}\n\n\ntemplate \n__global__ void GammaBetaBackwardSimpleCUDAKernel(\n// When the data size is a multiple of the tile size we can use fewer\n// instructions and registers. Example, this is the case when M=256 N=256.\ntemplate \n__device__\n__forceinline__\nvoid\nblockReduceGammaBetaBackwardsAligned(\n int64_t M,\n int64_t N,\n const T* dY,\n const T* X,\n const T_ACC* mean,\n const T_ACC* rstd,\n T* dg,\n T* db) {\n const int64_t j = ((int64_t) blockIdx.x) * blockDim.x + threadIdx.x;\n if (j < N) {\n T_ACC sum1 = 0;\n T_ACC sum2 = 0;\n for (int64_t i = 0; i < M; ++i) {\n const int64_t index = i * N + j;\n sum1 += dg == nullptr ? T_ACC(0)\n : static_cast(dY[index]) *\n (static_cast(X[index]) - static_cast(mean[i])) *\n static_cast(rstd[i]);\n sum2 += db == nullptr ? T_ACC(0) : static_cast(dY[index]);\n const T* __restrict__ dY,\n const T* __restrict__ X,\n const T_ACC* __restrict__ mean,\n const T_ACC* __restrict__ rstd,\n T* __restrict__ dg,\n T* __restrict__ db,\n T_ACC &dg_sum,\n T_ACC &db_sum\n) {\n constexpr int rows_per_thread_y = rows_per_block_y / block_dim_y;\n int64_t thread_x = blockIdx.x * block_dim_x + threadIdx.x;\n for (int64_t M_start = blockIdx.y * rows_per_block_y;\n M_start < M;\n M_start += rows_per_block_y * gridDim.y) {\n int lane_id = (threadIdx.y * blockDim.x + threadIdx.x) & (kWarpSize - 1);\n int64_t mean_index = M_start + threadIdx.y * rows_per_thread_y;\n T_ACC warp_mean = 0, warp_rstd = 0;\n if (lane_id < rows_per_thread_y) {\n warp_mean = mean[mean_index + lane_id];\n warp_rstd = rstd[mean_index + lane_id];\n }\n if (dg != nullptr) {\n dg[j] = sum1;\n WARP_SYNC();\n T_ACC dY_regs[rows_per_thread_y] = {0};\n T_ACC X_regs[rows_per_thread_y] = {0};\n #pragma unroll\n for (int i = 0; i < rows_per_thread_y; ++i) {\n int64_t current_y = M_start + threadIdx.y * rows_per_thread_y + i;\n if (aligned_grid || (current_y < M && thread_x < N)) {\n dY_regs[i] = dY[current_y * N + thread_x];\n X_regs[i] = X[current_y * N + thread_x];\n }\n }\n if (db != nullptr) {\n db[j] = sum2;\n\n #pragma unroll\n for (int i = 0; i < rows_per_thread_y; ++i) {\n T_ACC mean_reg = WARP_SHFL(warp_mean, i, kWarpSize);\n T_ACC rstd_reg = WARP_SHFL(warp_rstd, i, kWarpSize);\n dg_sum += dY_regs[i] * (X_regs[i] - mean_reg) * rstd_reg;\n db_sum += dY_regs[i];\n }\n }\n}\n\n// This implementation gets called if M and N divide with 32. This case should\n// be the most common. We can then make better use of warp level intrinsics\n// to improve performance.\n\ntemplate \n__global__ void GammaBetaBackwardCUDAKernel_32x32(\ntemplate \n__device__\n__forceinline__\nvoid\nblockReduceGammaBetaBackwardsHelper(\n int64_t M_start,\n int64_t M,\n int64_t N,\n const T* dY,\n const T* X,\n const T_ACC* mean,\n const T_ACC* rstd,\n T* dg,\n T* db) {\n alignas(sizeof(double)) extern __shared__ char s_data1[];\n T_ACC* s_data_typed = reinterpret_cast(&s_data1);\n T_ACC* s_dg;\n T_ACC* s_db;\n\n T_ACC dg_sum = 0;\n T_ACC db_sum = 0;\n\n const int64_t j = ((int64_t) blockIdx.x) * blockDim.x + threadIdx.x;\n\n if (j < N) {\n constexpr int unroll_factor = 8;\n int laneId = threadIdx.x & (C10_WARP_SIZE - 1);\n const T* __restrict__ dY,\n const T* __restrict__ X,\n const T_ACC* __restrict__ mean,\n const T_ACC* __restrict__ rstd,\n T* __restrict__ dg,\n T* __restrict__ db,\n T_ACC &dg_sum,\n T_ACC &db_sum\n) {\n constexpr int rows_per_thread_y = rows_per_block_y / block_dim_y;\n int64_t thread_x = blockIdx.x * block_dim_x + threadIdx.x;\n\n int lane_id = (threadIdx.y * blockDim.x + threadIdx.x) & (kWarpSize - 1);\n int64_t mean_index = M_start + threadIdx.y * rows_per_thread_y;\n T_ACC warp_mean = 0, warp_rstd = 0;\n if (lane_id < rows_per_thread_y && mean_index + lane_id < M) {\n warp_mean = mean[mean_index + lane_id];\n warp_rstd = rstd[mean_index + lane_id];\n }\n WARP_SYNC();\n\n T_ACC mean_reg, mean_reg_tmp;\n T_ACC rstd_reg, rstd_reg_tmp;\n T dY_reg;\n T X_reg;\n\n // Main loop\n int bcounter;\n for (bcounter = 0; bcounter < M / (blockDim.y * unroll_factor);\n bcounter++) {\n int offset = (bcounter * blockDim.y + threadIdx.y) * unroll_factor;\n\n if (laneId < unroll_factor) {\n mean_reg_tmp = mean[offset + laneId];\n rstd_reg_tmp = rstd[offset + laneId];\n T_ACC dY_regs[rows_per_thread_y] = {0};\n T_ACC X_regs[rows_per_thread_y] = {0};\n #pragma unroll\n for (int i = 0; i < rows_per_thread_y; ++i) {\n int64_t current_y = M_start + threadIdx.y * rows_per_thread_y + i;\n bool active = true;\n if (check_x && thread_x >= N) {\n active = false;\n }\n WARP_SYNC();\n\n #pragma unroll\n for (int ii = 0; ii < unroll_factor; ++ii) {\n dY_reg = dY[(offset + ii) * N + j];\n X_reg = X[(offset + ii) * N + j];\n mean_reg = WARP_SHFL(mean_reg_tmp, ii, kWarpSize);\n rstd_reg = WARP_SHFL(rstd_reg_tmp, ii, kWarpSize);\n dg_sum += dY_reg * (X_reg - mean_reg) * rstd_reg;\n db_sum += dY_reg;\n if (check_y && current_y >= M) {\n active = false;\n }\n }\n\n // Remainder loop\n int offset = (bcounter * blockDim.y + threadIdx.y) * unroll_factor;\n for (int ii = 0; ii < unroll_factor; ii++) {\n if ((offset + ii) < M) {\n mean_reg = mean[offset + ii];\n rstd_reg = rstd[offset + ii];\n dY_reg = dY[(offset + ii) * N + j];\n X_reg = X[(offset + ii) * N + j];\n dg_sum += dY_reg * (X_reg - mean_reg) * rstd_reg;\n db_sum += dY_reg;\n if (active) {\n dY_regs[i] = dY[current_y * N + thread_x];\n X_regs[i] = X[current_y * N + thread_x];\n }\n }\n\n // This kernel uses a block of (C10_WARP_SIZE x C10_WARP_SIZE) and\n // gets called when M; N divide by 32. We can use warp shuffles\n // for the final reduction step. This removes 4 shmem loads and\n // stores with their corresponding __syncthreads()\n\n // This greatly reduces bank conflicts at the expense of a little\n // extra shared memory. It does not impact occupancy\n int padded_bx = (1 + blockDim.x);\n\n s_dg = s_data_typed;\n s_db = s_data_typed + (padded_bx * blockDim.y);\n s_dg[threadIdx.y * padded_bx + threadIdx.x] = dg_sum;\n s_db[threadIdx.y * padded_bx + threadIdx.x] = db_sum;\n __syncthreads();\n\n // Load transposed so that a warp holds an entire column\n T_ACC reg_dg = s_dg[threadIdx.x * padded_bx + threadIdx.y];\n T_ACC reg_db = s_db[threadIdx.x * padded_bx + threadIdx.y];\n for (unsigned delta = C10_WARP_SIZE >> 1; delta >= 1; delta >>= 1) {\n reg_dg += WARP_SHFL_XOR(reg_dg, delta, kWarpSize);\n reg_db += WARP_SHFL_XOR(reg_db, delta, kWarpSize);\n #pragma unroll\n for (int i = 0; i < rows_per_thread_y; ++i) {\n T_ACC mean_reg = WARP_SHFL(warp_mean, i, kWarpSize);\n T_ACC rstd_reg = WARP_SHFL(warp_rstd, i, kWarpSize);\n dg_sum += dY_regs[i] * (X_regs[i] - mean_reg) * rstd_reg;\n db_sum += dY_regs[i];\n }\n}\n\n if (threadIdx.x == 0) {\n const int64_t j = blockIdx.x * blockDim.x + threadIdx.y;\n if (dg) {\n dg[j] = reg_dg;\n }\n if (db) {\n db[j] = reg_db;\n }\ntemplate \n__device__\n__forceinline__\nvoid\nblockReduceGammaBetaBackwardsWithChecks(\n int64_t M,\n int64_t N,\n const T* __restrict__ dY,\n const T* __restrict__ X,\n const T_ACC* __restrict__ mean,\n const T_ACC* __restrict__ rstd,\n T* __restrict__ dg,\n T* __restrict__ db,\n T_ACC &dg_sum,\n T_ACC &db_sum\n) {\n for (int64_t M_start = blockIdx.y * rows_per_block_y;\n M_start < M;\n M_start += rows_per_block_y * gridDim.y) {\n int64_t M_end = M_start + rows_per_block_y - 1;\n if (M_end < M) {\n blockReduceGammaBetaBackwardsHelper\n (M_start, M, N, dY, X, mean, rstd, dg, db, dg_sum, db_sum);\n } else {\n blockReduceGammaBetaBackwardsHelper\n (M_start, M, N, dY, X, mean, rstd, dg, db, dg_sum, db_sum);\n }\n }\n}\n\ntemplate \n__global__ void GammaBetaBackwardCUDAKernel(\n// block_dim_x is the number of threads in the x dimension per block.\n// block_dim_y is the number of threads in the y dimension per block.\n// rows_per_block_y is the size of the tile (number of data elements)\n// in the y dimension per block.\n// partial_reduction indicates whether we need to reduce across threads\n// or not. If set to true, we will not reduce across threads. This can\n// be faster in the M >> N case but requires another kernel to do a full\n// final reduction.\n// aligned_grid means the data size is a multiple of tile size. In that\n// case we don't need to check for boundary conditions which can provide\n// a further speedup by not needing instructions to check for edge cases\n// and not needing predicate registers.\ntemplate \n__global__\nvoid\n GammaBetaBackwardCUDAKernelTemplate(\n int64_t M,\n int64_t N,\n const T* dY,\n const T* X,\n const T_ACC* mean,\n const T_ACC* rstd,\n T* dg,\n T* db) {\n alignas(sizeof(double)) extern __shared__ char s_data1[];\n T_ACC* s_data_typed = reinterpret_cast(&s_data1);\n T_ACC* s_dg;\n T_ACC* s_db;\n\n const int64_t j = ((int64_t) blockIdx.x) * blockDim.x + threadIdx.x;\n const T* __restrict__ dY,\n const T* __restrict__ X,\n const T_ACC* __restrict__ mean,\n const T_ACC* __restrict__ rstd,\n T* __restrict__ dg,\n T* __restrict__ db) {\n // This assert is a compile-time check only.\n constexpr int rows_per_thread_y = rows_per_block_y / block_dim_y;\n static_assert(rows_per_thread_y <= kWarpSize);\n\n T_ACC dg_sum = 0;\n T_ACC db_sum = 0;\n\n if (j < N) {\n constexpr int unroll_factor = 8;\n\n T_ACC mean_reg;\n T_ACC rstd_reg;\n T dY_reg;\n T X_reg;\n if (aligned_grid) {\n // When N and M align perfectly with block_dim_x and block_dim_y, we\n // can skip boundary condition checks that waste instruction issue slots.\n blockReduceGammaBetaBackwardsAligned\n \n (M, N, dY, X, mean, rstd, dg, db, dg_sum, db_sum);\n } else {\n // In the general case we need to check boundary conditions in the M\n // dimension. However, we can still avoid boundary checks in the N dimension\n // for the inner blocks. So try to avoid those checks when possible.\n if (blockIdx.x * block_dim_x + block_dim_x - 1 < N) {\n blockReduceGammaBetaBackwardsWithChecks\n \n (M, N, dY, X, mean, rstd, dg, db, dg_sum, db_sum);\n } else {\n blockReduceGammaBetaBackwardsWithChecks\n \n (M, N, dY, X, mean, rstd, dg, db, dg_sum, db_sum);\n }\n }\n\n // Main Loop\n int bcounter;\n for (bcounter = 0; bcounter < M / (blockDim.y * unroll_factor); bcounter++){\n int offset = (bcounter * blockDim.y + threadIdx.y) * unroll_factor;\n int64_t thread_x = blockIdx.x * block_dim_x + threadIdx.x;\n\n #pragma unroll\n for (int ii = 0; ii < unroll_factor; ++ii) {\n dY_reg = dY[(offset + ii) * N + j];\n X_reg = X[(offset + ii) * N + j];\n mean_reg = mean[offset + ii];\n rstd_reg = rstd[offset + ii];\n dg_sum += dY_reg * (X_reg - mean_reg) * rstd_reg;\n db_sum += dY_reg;\n // When partial_reduction is requested, we don't reduce within a block.\n // We also don't reduce if we are only a single block in the y dimension.\n if (partial_reduction || (blockDim.y == 1 && gridDim.y == 1)) {\n if (aligned_grid || thread_x < N) {\n int64_t thread_y = blockIdx.y * blockDim.y + threadIdx.y;\n if (dg) {\n dg[thread_y * N + thread_x] = dg_sum;\n }\n }\n\n // Remainder loop\n int offset = (bcounter * blockDim.y + threadIdx.y) * unroll_factor;\n for (int ii = 0; ii < unroll_factor; ii++ ){\n if ((offset + ii) < M) {\n dY_reg = dY[(offset + ii) * N + j ];\n X_reg = X[(offset + ii) * N + j];\n mean_reg = mean[offset + ii];\n rstd_reg = rstd[offset + ii];\n dg_sum += dY_reg * (X_reg - mean_reg) * rstd_reg;\n db_sum += dY_reg;\n if (db) {\n db[thread_y * N + thread_x] = db_sum;\n }\n }\n\n // Do the final reduction in shared memory\n } else {\n // The caller requested a full reduction so we must reduce across\n // warps using shared memory and warp shuffles.\n static_assert(rows_per_thread_y <= C10_WARP_SIZE);\n alignas(sizeof(double)) extern __shared__ char s_data1[];\n T_ACC* s_data_typed = reinterpret_cast(&s_data1);\n T_ACC* s_dg;\n T_ACC* s_db;\n int padded_bx = (block_dim_x + 1);\n // Transpose dg and db.\n s_dg = s_data_typed;\n s_db = s_data_typed + blockDim.x * blockDim.y;\n s_dg[threadIdx.y * blockDim.x + threadIdx.x] = dg_sum;\n s_db[threadIdx.y * blockDim.x + threadIdx.x] = db_sum;\n s_db = s_data_typed + (padded_bx * block_dim_y);\n s_dg[threadIdx.y * padded_bx + threadIdx.x] = dg_sum;\n s_db[threadIdx.y * padded_bx + threadIdx.x] = db_sum;\n __syncthreads();\n\n for (int offset = blockDim.y / 2; offset >= 1; offset /= 2) {\n if (threadIdx.y < offset) {\n s_dg[threadIdx.y * blockDim.x + threadIdx.x] +=\n s_dg[(threadIdx.y + offset) * blockDim.x + threadIdx.x];\n s_db[threadIdx.y * blockDim.x + threadIdx.x] +=\n s_db[(threadIdx.y + offset) * blockDim.x + threadIdx.x];\n // Load transposed so that a warp holds an entire column\n // Because block_dim_x != block_dim_y in the general case, we need\n // some code to handle the general case.\n static_assert(block_dim_x * block_dim_y % C10_WARP_SIZE == 0);\n constexpr int warps_available_to_reduce = block_dim_x * block_dim_y / C10_WARP_SIZE;\n int thread_id = threadIdx.y * block_dim_x + threadIdx.x;\n int warp_id = thread_id / C10_WARP_SIZE;\n int lane_id = thread_id & (C10_WARP_SIZE - 1);\n #pragma unroll\n for (int i = warp_id; i < block_dim_x; i += warps_available_to_reduce) {\n T_ACC reg_db, reg_dg;\n if (lane_id < block_dim_y) {\n reg_dg = s_dg[lane_id * padded_bx + i];\n reg_db = s_db[lane_id * padded_bx + i];\n }\n #pragma unroll\n for (unsigned delta = block_dim_y >> 1; delta >= 1; delta >>= 1) {\n reg_dg += WARP_SHFL_XOR(reg_dg, delta, kWarpSize);\n reg_db += WARP_SHFL_XOR(reg_db, delta, kWarpSize);\n }\n // Reduce is done. Now write it out to global memory.\n int64_t out_index = blockIdx.x * block_dim_x + i;\n if (threadIdx.x == 0 && (aligned_grid || out_index < N)) {\n if (dg) {\n dg[out_index] = reg_dg;\n }\n __syncthreads();\n if (db) {\n db[out_index] = reg_db;\n }\n }\n }\n }\n}\n\n if (threadIdx.y == 0) {\n if (dg) {\n dg[j] = s_dg[threadIdx.x];\n }\n if (db) {\n db[j] = s_db[threadIdx.x];\n }\ntemplate\nvoid LaunchAndCheckGammaBetaBackwardKernel(\n bool aligned_grid,\n dim3 blocks,\n dim3 threads,\n size_t shmem_sz,\n cudaStream_t cuda_stream,\n const T* dY_data,\n const T* X_data,\n const T_ACC* mean_data,\n const T_ACC* rstd_data,\n int64_t M,\n int64_t N,\n T* dgamma_data,\n T* dbeta_data) {\nif (aligned_grid) {\n GammaBetaBackwardCUDAKernelTemplate\n <<>>(\n M,\n N,\n dY_data,\n X_data,\n mean_data,\n rstd_data,\n dgamma_data,\n dbeta_data);\n } else {\n GammaBetaBackwardCUDAKernelTemplate\n <<>>(\n M,\n N,\n dY_data,\n X_data,\n mean_data,\n rstd_data,\n dgamma_data,\n dbeta_data);\n }\n C10_CUDA_KERNEL_LAUNCH_CHECK();\n}\n\ntemplate\nvoid ConfigureAndLaunchGammaBetaBackwardKernel(\n const T* dY_data,\n const T* X_data,\n const T_ACC* mean_data,\n const T_ACC* rstd_data,\n int64_t M,\n int64_t N,\n Tensor* dgamma,\n Tensor* dbeta,\n cudaStream_t cuda_stream) {\n T* dgamma_data =\n dgamma->defined() ? dgamma->template data_ptr() : nullptr;\n T* dbeta_data = dbeta->defined() ? dbeta->template data_ptr() : nullptr;\n bool aligned_grid = (M % rows_per_block_y == 0) && (N % block_dim_x == 0);\n dim3 threads{block_dim_x, block_dim_y};\n dim3 blocks;\n blocks.x = (N + block_dim_x - 1) / block_dim_x;\n blocks.y = 1;\n size_t shmem_sz = (block_dim_x + 1) * block_dim_y * sizeof(T_ACC) * 2;\n if (blocks.y == 1 && threads.y == 1) {\n // Optimization: since there is just one thread doing all the summation, we don't need a reduction\n // across threads. So we set partial_reduction to true.\n LaunchAndCheckGammaBetaBackwardKernel(\n aligned_grid, blocks, threads, shmem_sz, cuda_stream, dY_data, X_data, mean_data, rstd_data, M, N, dgamma_data, dbeta_data);\n } else {\n LaunchAndCheckGammaBetaBackwardKernel(\n aligned_grid, blocks, threads, shmem_sz, cuda_stream, dY_data, X_data, mean_data, rstd_data, M, N, dgamma_data, dbeta_data);\n }\n\n}\n\ntemplate\nvoid LaunchGammaBetaBackwardCUDAKernel(\n const T* dY_data,\n const T* X_data,\n const T_ACC* mean_data,\n const T_ACC* rstd_data,\n int64_t M,\n int64_t N,\n Tensor* dgamma,\n Tensor* dbeta,\n cudaStream_t cuda_stream) {\n if (M > 64 * 1024 && N / kWarpSize < 64) {\n // We have a situation where M >> N and N is small.\n // In this case we can speed up the computation by parallelizing in the M dimension.\n // We launch multiple blocks in the y-dimension, and compute partial sums for the\n // gradient in the first pass. Then we do a .sum(0) to do a final reduction.\n // Although we launch 2 kernels, we can get up to a 10x speedup for large M.\n constexpr int block_dim_x = 32;\n constexpr int block_dim_y = 1;\n constexpr int rows_per_block_y = 32;\n bool aligned_grid = (M % rows_per_block_y == 0) && (N % block_dim_x == 0);\n dim3 threads{block_dim_x, block_dim_y};\n dim3 blocks;\n blocks.x = (N + block_dim_x - 1) / block_dim_x;\n // int rows_per_block = my_gamma_beta_unroll_factor *\n blocks.y = (M + rows_per_block_y - 1) / rows_per_block_y;\n constexpr int max_grid_size = 64 * 1024 / 2;\n blocks.y = std::min(max_grid_size / blocks.x, blocks.y);\n auto options = dgamma->options();\n Tensor dgamma_blocks;\n Tensor dbeta_blocks;\n T * dgamma_blocks_ptr = nullptr;\n T * dbeta_blocks_ptr = nullptr;\n if (dgamma->defined()) {\n dgamma_blocks = at::zeros({blocks.y * threads.y, dgamma->size(-1)}, options);\n dgamma_blocks_ptr = dgamma_blocks.data_ptr();\n }\n if (dbeta->defined()) {\n dbeta_blocks = at::zeros({blocks.y * threads.y, dgamma->size(-1)}, options);\n dbeta_blocks_ptr = dbeta_blocks.data_ptr();\n }\n LaunchAndCheckGammaBetaBackwardKernel(\n aligned_grid, blocks, threads, 0, cuda_stream, dY_data, X_data, mean_data, rstd_data, M, N, dgamma_blocks_ptr, dbeta_blocks_ptr);\n\n *dgamma = dgamma_blocks.sum(0);\n *dbeta = dbeta_blocks.sum(0);\n } else {\n // We are in the normal case where M is not that large.\n // We can change the tile shape (which is the last template parameter) in accordance with M.\n // For small M it is faster to have a smaller tile, otherwise we could have idle threads.\n // For larger M we use a bigger tile size.\n if (M < 64) {", - "repo_full_name": "pytorch/pytorch", - "discussion_comments": [ - { - "comment_id": "1996412678", - "repo_full_name": "pytorch/pytorch", - "pr_number": 148605, - "pr_file": "aten/src/ATen/native/cuda/layer_norm_kernel.cu", - "discussion_id": "1996412678", - "commented_code": "@@ -508,223 +509,418 @@ __global__ void layer_norm_grad_input_kernel_vectorized(\n }\n }\n \n-\n-template \n-__global__ void GammaBetaBackwardSimpleCUDAKernel(\n+// When the data size is a multiple of the tile size we can use fewer\n+// instructions and registers. Example, this is the case when M=256 N=256.\n+template \n+__device__\n+__forceinline__\n+void\n+blockReduceGammaBetaBackwardsAligned(\n int64_t M,\n int64_t N,\n- const T* dY,\n- const T* X,\n- const T_ACC* mean,\n- const T_ACC* rstd,\n- T* dg,\n- T* db) {\n- const int64_t j = ((int64_t) blockIdx.x) * blockDim.x + threadIdx.x;\n- if (j < N) {\n- T_ACC sum1 = 0;\n- T_ACC sum2 = 0;\n- for (int64_t i = 0; i < M; ++i) {\n- const int64_t index = i * N + j;\n- sum1 += dg == nullptr ? T_ACC(0)\n- : static_cast(dY[index]) *\n- (static_cast(X[index]) - static_cast(mean[i])) *\n- static_cast(rstd[i]);\n- sum2 += db == nullptr ? T_ACC(0) : static_cast(dY[index]);\n+ const T* __restrict__ dY,\n+ const T* __restrict__ X,\n+ const T_ACC* __restrict__ mean,\n+ const T_ACC* __restrict__ rstd,\n+ T* __restrict__ dg,\n+ T* __restrict__ db,\n+ T_ACC &dg_sum,\n+ T_ACC &db_sum\n+) {\n+ constexpr int rows_per_thread_y = rows_per_block_y / block_dim_y;\n+ int64_t thread_x = blockIdx.x * block_dim_x + threadIdx.x;\n+ for (int64_t M_start = blockIdx.y * rows_per_block_y;\n+ M_start < M;\n+ M_start += rows_per_block_y * gridDim.y) {\n+ int lane_id = (threadIdx.y * blockDim.x + threadIdx.x) & (kWarpSize - 1);\n+ int64_t mean_index = M_start + threadIdx.y * rows_per_thread_y;\n+ T_ACC warp_mean = 0, warp_rstd = 0;\n+ if (lane_id < rows_per_thread_y) {\n+ warp_mean = mean[mean_index + lane_id];\n+ warp_rstd = rstd[mean_index + lane_id];\n }\n- if (dg != nullptr) {\n- dg[j] = sum1;\n+ WARP_SYNC();\n+ T_ACC dY_regs[rows_per_thread_y] = {0};\n+ T_ACC X_regs[rows_per_thread_y] = {0};\n+ #pragma unroll\n+ for (int i = 0; i < rows_per_thread_y; ++i) {\n+ int64_t current_y = M_start + threadIdx.y * rows_per_thread_y + i;\n+ if (aligned_grid || (current_y < M && thread_x < N)) {\n+ dY_regs[i] = dY[current_y * N + thread_x];\n+ X_regs[i] = X[current_y * N + thread_x];\n+ }\n }\n- if (db != nullptr) {\n- db[j] = sum2;\n+\n+ #pragma unroll\n+ for (int i = 0; i < rows_per_thread_y; ++i) {\n+ T_ACC mean_reg = WARP_SHFL(warp_mean, i, kWarpSize);\n+ T_ACC rstd_reg = WARP_SHFL(warp_rstd, i, kWarpSize);\n+ dg_sum += dY_regs[i] * (X_regs[i] - mean_reg) * rstd_reg;\n+ db_sum += dY_regs[i];\n }\n }\n }\n \n-// This implementation gets called if M and N divide with 32. This case should\n-// be the most common. We can then make better use of warp level intrinsics\n-// to improve performance.\n-\n-template \n-__global__ void GammaBetaBackwardCUDAKernel_32x32(\n+template \n+__device__\n+__forceinline__\n+void\n+blockReduceGammaBetaBackwardsHelper(\n+ int64_t M_start,\n int64_t M,\n int64_t N,\n- const T* dY,\n- const T* X,\n- const T_ACC* mean,\n- const T_ACC* rstd,\n- T* dg,\n- T* db) {\n- alignas(sizeof(double)) extern __shared__ char s_data1[];\n- T_ACC* s_data_typed = reinterpret_cast(&s_data1);\n- T_ACC* s_dg;\n- T_ACC* s_db;\n-\n- T_ACC dg_sum = 0;\n- T_ACC db_sum = 0;\n-\n- const int64_t j = ((int64_t) blockIdx.x) * blockDim.x + threadIdx.x;\n-\n- if (j < N) {\n- constexpr int unroll_factor = 8;\n- int laneId = threadIdx.x & (C10_WARP_SIZE - 1);\n+ const T* __restrict__ dY,\n+ const T* __restrict__ X,\n+ const T_ACC* __restrict__ mean,\n+ const T_ACC* __restrict__ rstd,\n+ T* __restrict__ dg,\n+ T* __restrict__ db,\n+ T_ACC &dg_sum,\n+ T_ACC &db_sum\n+) {\n+ constexpr int rows_per_thread_y = rows_per_block_y / block_dim_y;\n+ int64_t thread_x = blockIdx.x * block_dim_x + threadIdx.x;\n+\n+ int lane_id = (threadIdx.y * blockDim.x + threadIdx.x) & (kWarpSize - 1);\n+ int64_t mean_index = M_start + threadIdx.y * rows_per_thread_y;\n+ T_ACC warp_mean = 0, warp_rstd = 0;\n+ if (lane_id < rows_per_thread_y && mean_index + lane_id < M) {\n+ warp_mean = mean[mean_index + lane_id];\n+ warp_rstd = rstd[mean_index + lane_id];\n+ }\n+ WARP_SYNC();\n \n- T_ACC mean_reg, mean_reg_tmp;\n- T_ACC rstd_reg, rstd_reg_tmp;\n- T dY_reg;\n- T X_reg;\n \n- // Main loop\n- int bcounter;\n- for (bcounter = 0; bcounter < M / (blockDim.y * unroll_factor);\n- bcounter++) {\n- int offset = (bcounter * blockDim.y + threadIdx.y) * unroll_factor;\n \n- if (laneId < unroll_factor) {\n- mean_reg_tmp = mean[offset + laneId];\n- rstd_reg_tmp = rstd[offset + laneId];\n+ T_ACC dY_regs[rows_per_thread_y] = {0};\n+ T_ACC X_regs[rows_per_thread_y] = {0};\n+ #pragma unroll\n+ for (int i = 0; i < rows_per_thread_y; ++i) {\n+ int64_t current_y = M_start + threadIdx.y * rows_per_thread_y + i;\n+ bool active = true;\n+ if (check_x && thread_x >= N) {\n+ active = false;\n }\n- WARP_SYNC();\n-\n- #pragma unroll\n- for (int ii = 0; ii < unroll_factor; ++ii) {\n- dY_reg = dY[(offset + ii) * N + j];\n- X_reg = X[(offset + ii) * N + j];\n- mean_reg = WARP_SHFL(mean_reg_tmp, ii, kWarpSize);\n- rstd_reg = WARP_SHFL(rstd_reg_tmp, ii, kWarpSize);\n- dg_sum += dY_reg * (X_reg - mean_reg) * rstd_reg;\n- db_sum += dY_reg;\n+ if (check_y && current_y >= M) {\n+ active = false;\n }\n- }\n-\n- // Remainder loop\n- int offset = (bcounter * blockDim.y + threadIdx.y) * unroll_factor;\n- for (int ii = 0; ii < unroll_factor; ii++) {\n- if ((offset + ii) < M) {\n- mean_reg = mean[offset + ii];\n- rstd_reg = rstd[offset + ii];\n- dY_reg = dY[(offset + ii) * N + j];\n- X_reg = X[(offset + ii) * N + j];\n- dg_sum += dY_reg * (X_reg - mean_reg) * rstd_reg;\n- db_sum += dY_reg;\n+ if (active) {\n+ dY_regs[i] = dY[current_y * N + thread_x];\n+ X_regs[i] = X[current_y * N + thread_x];\n }\n }\n \n- // This kernel uses a block of (C10_WARP_SIZE x C10_WARP_SIZE) and\n- // gets called when M; N divide by 32. We can use warp shuffles\n- // for the final reduction step. This removes 4 shmem loads and\n- // stores with their corresponding __syncthreads()\n-\n- // This greatly reduces bank conflicts at the expense of a little\n- // extra shared memory. It does not impact occupancy\n- int padded_bx = (1 + blockDim.x);\n-\n- s_dg = s_data_typed;\n- s_db = s_data_typed + (padded_bx * blockDim.y);\n- s_dg[threadIdx.y * padded_bx + threadIdx.x] = dg_sum;\n- s_db[threadIdx.y * padded_bx + threadIdx.x] = db_sum;\n- __syncthreads();\n-\n- // Load transposed so that a warp holds an entire column\n- T_ACC reg_dg = s_dg[threadIdx.x * padded_bx + threadIdx.y];\n- T_ACC reg_db = s_db[threadIdx.x * padded_bx + threadIdx.y];\n- for (unsigned delta = C10_WARP_SIZE >> 1; delta >= 1; delta >>= 1) {\n- reg_dg += WARP_SHFL_XOR(reg_dg, delta, kWarpSize);\n- reg_db += WARP_SHFL_XOR(reg_db, delta, kWarpSize);\n+ #pragma unroll\n+ for (int i = 0; i < rows_per_thread_y; ++i) {\n+ T_ACC mean_reg = WARP_SHFL(warp_mean, i, kWarpSize);\n+ T_ACC rstd_reg = WARP_SHFL(warp_rstd, i, kWarpSize);\n+ dg_sum += dY_regs[i] * (X_regs[i] - mean_reg) * rstd_reg;\n+ db_sum += dY_regs[i];\n }\n+}\n \n- if (threadIdx.x == 0) {\n- const int64_t j = blockIdx.x * blockDim.x + threadIdx.y;\n- if (dg) {\n- dg[j] = reg_dg;\n- }\n- if (db) {\n- db[j] = reg_db;\n- }\n+template \n+__device__\n+__forceinline__\n+void\n+blockReduceGammaBetaBackwardsWithChecks(\n+ int64_t M,\n+ int64_t N,\n+ const T* __restrict__ dY,\n+ const T* __restrict__ X,\n+ const T_ACC* __restrict__ mean,\n+ const T_ACC* __restrict__ rstd,\n+ T* __restrict__ dg,\n+ T* __restrict__ db,\n+ T_ACC &dg_sum,\n+ T_ACC &db_sum\n+) {\n+ for (int64_t M_start = blockIdx.y * rows_per_block_y;\n+ M_start < M;\n+ M_start += rows_per_block_y * gridDim.y) {\n+ int64_t M_end = M_start + rows_per_block_y - 1;\n+ if (M_end < M) {\n+ blockReduceGammaBetaBackwardsHelper\n+ (M_start, M, N, dY, X, mean, rstd, dg, db, dg_sum, db_sum);\n+ } else {\n+ blockReduceGammaBetaBackwardsHelper\n+ (M_start, M, N, dY, X, mean, rstd, dg, db, dg_sum, db_sum);\n }\n }\n }\n \n-template \n-__global__ void GammaBetaBackwardCUDAKernel(\n+// block_dim_x is the number of threads in the x dimension per block.\n+// block_dim_y is the number of threads in the y dimension per block.\n+// rows_per_block_y is the size of the tile (number of data elements)\n+// in the y dimension per block.\n+// partial_reduction indicates whether we need to reduce across threads\n+// or not. If set to true, we will not reduce across threads. This can\n+// be faster in the M >> N case but requires another kernel to do a full\n+// final reduction.\n+// aligned_grid means the data size is a multiple of tile size. In that\n+// case we don't need to check for boundary conditions which can provide\n+// a further speedup by not needing instructions to check for edge cases\n+// and not needing predicate registers.\n+template \n+__global__\n+void\n+ GammaBetaBackwardCUDAKernelTemplate(\n int64_t M,\n int64_t N,\n- const T* dY,\n- const T* X,\n- const T_ACC* mean,\n- const T_ACC* rstd,\n- T* dg,\n- T* db) {\n- alignas(sizeof(double)) extern __shared__ char s_data1[];\n- T_ACC* s_data_typed = reinterpret_cast(&s_data1);\n- T_ACC* s_dg;\n- T_ACC* s_db;\n-\n- const int64_t j = ((int64_t) blockIdx.x) * blockDim.x + threadIdx.x;\n+ const T* __restrict__ dY,\n+ const T* __restrict__ X,\n+ const T_ACC* __restrict__ mean,\n+ const T_ACC* __restrict__ rstd,\n+ T* __restrict__ dg,\n+ T* __restrict__ db) {\n+ // This assert is a compile-time check only.\n+ constexpr int rows_per_thread_y = rows_per_block_y / block_dim_y;\n+ static_assert(rows_per_thread_y <= kWarpSize);\n \n T_ACC dg_sum = 0;\n T_ACC db_sum = 0;\n \n- if (j < N) {\n- constexpr int unroll_factor = 8;\n-\n- T_ACC mean_reg;\n- T_ACC rstd_reg;\n- T dY_reg;\n- T X_reg;\n+ if (aligned_grid) {\n+ // When N and M align perfectly with block_dim_x and block_dim_y, we\n+ // can skip boundary condition checks that waste instruction issue slots.\n+ blockReduceGammaBetaBackwardsAligned\n+ \n+ (M, N, dY, X, mean, rstd, dg, db, dg_sum, db_sum);\n+ } else {\n+ // In the general case we need to check boundary conditions in the M\n+ // dimension. However, we can still avoid boundary checks in the N dimension\n+ // for the inner blocks. So try to avoid those checks when possible.\n+ if (blockIdx.x * block_dim_x + block_dim_x - 1 < N) {\n+ blockReduceGammaBetaBackwardsWithChecks\n+ \n+ (M, N, dY, X, mean, rstd, dg, db, dg_sum, db_sum);\n+ } else {\n+ blockReduceGammaBetaBackwardsWithChecks\n+ \n+ (M, N, dY, X, mean, rstd, dg, db, dg_sum, db_sum);\n+ }\n+ }\n \n- // Main Loop\n- int bcounter;\n- for (bcounter = 0; bcounter < M / (blockDim.y * unroll_factor); bcounter++){\n- int offset = (bcounter * blockDim.y + threadIdx.y) * unroll_factor;\n+ int64_t thread_x = blockIdx.x * block_dim_x + threadIdx.x;\n \n- #pragma unroll\n- for (int ii = 0; ii < unroll_factor; ++ii) {\n- dY_reg = dY[(offset + ii) * N + j];\n- X_reg = X[(offset + ii) * N + j];\n- mean_reg = mean[offset + ii];\n- rstd_reg = rstd[offset + ii];\n- dg_sum += dY_reg * (X_reg - mean_reg) * rstd_reg;\n- db_sum += dY_reg;\n+ // When partial_reduction is requested, we don't reduce within a block.\n+ // We also don't reduce if we are only a single block in the y dimension.\n+ if (partial_reduction || (blockDim.y == 1 && gridDim.y == 1)) {\n+ if (aligned_grid || thread_x < N) {\n+ int64_t thread_y = blockIdx.y * blockDim.y + threadIdx.y;\n+ if (dg) {\n+ dg[thread_y * N + thread_x] = dg_sum;\n }\n- }\n-\n- // Remainder loop\n- int offset = (bcounter * blockDim.y + threadIdx.y) * unroll_factor;\n- for (int ii = 0; ii < unroll_factor; ii++ ){\n- if ((offset + ii) < M) {\n- dY_reg = dY[(offset + ii) * N + j ];\n- X_reg = X[(offset + ii) * N + j];\n- mean_reg = mean[offset + ii];\n- rstd_reg = rstd[offset + ii];\n- dg_sum += dY_reg * (X_reg - mean_reg) * rstd_reg;\n- db_sum += dY_reg;\n+ if (db) {\n+ db[thread_y * N + thread_x] = db_sum;\n }\n }\n-\n- // Do the final reduction in shared memory\n+ } else {\n+ // The caller requested a full reduction so we must reduce across\n+ // warps using shared memory and warp shuffles.\n+ static_assert(rows_per_thread_y <= C10_WARP_SIZE);\n+ alignas(sizeof(double)) extern __shared__ char s_data1[];\n+ T_ACC* s_data_typed = reinterpret_cast(&s_data1);\n+ T_ACC* s_dg;\n+ T_ACC* s_db;\n+ int padded_bx = (block_dim_x + 1);\n+ // Transpose dg and db.\n s_dg = s_data_typed;\n- s_db = s_data_typed + blockDim.x * blockDim.y;\n- s_dg[threadIdx.y * blockDim.x + threadIdx.x] = dg_sum;\n- s_db[threadIdx.y * blockDim.x + threadIdx.x] = db_sum;\n+ s_db = s_data_typed + (padded_bx * block_dim_y);\n+ s_dg[threadIdx.y * padded_bx + threadIdx.x] = dg_sum;\n+ s_db[threadIdx.y * padded_bx + threadIdx.x] = db_sum;\n __syncthreads();\n \n- for (int offset = blockDim.y / 2; offset >= 1; offset /= 2) {\n- if (threadIdx.y < offset) {\n- s_dg[threadIdx.y * blockDim.x + threadIdx.x] +=\n- s_dg[(threadIdx.y + offset) * blockDim.x + threadIdx.x];\n- s_db[threadIdx.y * blockDim.x + threadIdx.x] +=\n- s_db[(threadIdx.y + offset) * blockDim.x + threadIdx.x];\n+ // Load transposed so that a warp holds an entire column\n+ // Because block_dim_x != block_dim_y in the general case, we need\n+ // some code to handle the general case.\n+ static_assert(block_dim_x * block_dim_y % C10_WARP_SIZE == 0);\n+ constexpr int warps_available_to_reduce = block_dim_x * block_dim_y / C10_WARP_SIZE;\n+ int thread_id = threadIdx.y * block_dim_x + threadIdx.x;\n+ int warp_id = thread_id / C10_WARP_SIZE;\n+ int lane_id = thread_id & (C10_WARP_SIZE - 1);\n+ #pragma unroll\n+ for (int i = warp_id; i < block_dim_x; i += warps_available_to_reduce) {\n+ T_ACC reg_db, reg_dg;\n+ if (lane_id < block_dim_y) {\n+ reg_dg = s_dg[lane_id * padded_bx + i];\n+ reg_db = s_db[lane_id * padded_bx + i];\n+ }\n+ #pragma unroll\n+ for (unsigned delta = block_dim_y >> 1; delta >= 1; delta >>= 1) {\n+ reg_dg += WARP_SHFL_XOR(reg_dg, delta, kWarpSize);\n+ reg_db += WARP_SHFL_XOR(reg_db, delta, kWarpSize);\n+ }\n+ // Reduce is done. Now write it out to global memory.\n+ int64_t out_index = blockIdx.x * block_dim_x + i;\n+ if (threadIdx.x == 0 && (aligned_grid || out_index < N)) {\n+ if (dg) {\n+ dg[out_index] = reg_dg;\n }\n- __syncthreads();\n+ if (db) {\n+ db[out_index] = reg_db;\n+ }\n+ }\n }\n+ }\n+}\n \n- if (threadIdx.y == 0) {\n- if (dg) {\n- dg[j] = s_dg[threadIdx.x];\n- }\n- if (db) {\n- db[j] = s_db[threadIdx.x];\n- }\n+template\n+void LaunchAndCheckGammaBetaBackwardKernel(\n+ bool aligned_grid,\n+ dim3 blocks,\n+ dim3 threads,\n+ size_t shmem_sz,\n+ cudaStream_t cuda_stream,\n+ const T* dY_data,\n+ const T* X_data,\n+ const T_ACC* mean_data,\n+ const T_ACC* rstd_data,\n+ int64_t M,\n+ int64_t N,\n+ T* dgamma_data,\n+ T* dbeta_data) {\n+if (aligned_grid) {\n+ GammaBetaBackwardCUDAKernelTemplate\n+ <<>>(\n+ M,\n+ N,\n+ dY_data,\n+ X_data,\n+ mean_data,\n+ rstd_data,\n+ dgamma_data,\n+ dbeta_data);\n+ } else {\n+ GammaBetaBackwardCUDAKernelTemplate\n+ <<>>(\n+ M,\n+ N,\n+ dY_data,\n+ X_data,\n+ mean_data,\n+ rstd_data,\n+ dgamma_data,\n+ dbeta_data);\n+ }\n+ C10_CUDA_KERNEL_LAUNCH_CHECK();\n+}\n+\n+template\n+void ConfigureAndLaunchGammaBetaBackwardKernel(\n+ const T* dY_data,\n+ const T* X_data,\n+ const T_ACC* mean_data,\n+ const T_ACC* rstd_data,\n+ int64_t M,\n+ int64_t N,\n+ Tensor* dgamma,\n+ Tensor* dbeta,\n+ cudaStream_t cuda_stream) {\n+ T* dgamma_data =\n+ dgamma->defined() ? dgamma->template data_ptr() : nullptr;\n+ T* dbeta_data = dbeta->defined() ? dbeta->template data_ptr() : nullptr;\n+ bool aligned_grid = (M % rows_per_block_y == 0) && (N % block_dim_x == 0);\n+ dim3 threads{block_dim_x, block_dim_y};\n+ dim3 blocks;\n+ blocks.x = (N + block_dim_x - 1) / block_dim_x;\n+ blocks.y = 1;\n+ size_t shmem_sz = (block_dim_x + 1) * block_dim_y * sizeof(T_ACC) * 2;\n+ if (blocks.y == 1 && threads.y == 1) {\n+ // Optimization: since there is just one thread doing all the summation, we don't need a reduction\n+ // across threads. So we set partial_reduction to true.\n+ LaunchAndCheckGammaBetaBackwardKernel(\n+ aligned_grid, blocks, threads, shmem_sz, cuda_stream, dY_data, X_data, mean_data, rstd_data, M, N, dgamma_data, dbeta_data);\n+ } else {\n+ LaunchAndCheckGammaBetaBackwardKernel(\n+ aligned_grid, blocks, threads, shmem_sz, cuda_stream, dY_data, X_data, mean_data, rstd_data, M, N, dgamma_data, dbeta_data);\n+ }\n+\n+}\n+\n+template\n+void LaunchGammaBetaBackwardCUDAKernel(\n+ const T* dY_data,\n+ const T* X_data,\n+ const T_ACC* mean_data,\n+ const T_ACC* rstd_data,\n+ int64_t M,\n+ int64_t N,\n+ Tensor* dgamma,\n+ Tensor* dbeta,\n+ cudaStream_t cuda_stream) {\n+ if (M > 64 * 1024 && N / kWarpSize < 64) {\n+ // We have a situation where M >> N and N is small.\n+ // In this case we can speed up the computation by parallelizing in the M dimension.\n+ // We launch multiple blocks in the y-dimension, and compute partial sums for the\n+ // gradient in the first pass. Then we do a .sum(0) to do a final reduction.\n+ // Although we launch 2 kernels, we can get up to a 10x speedup for large M.\n+ constexpr int block_dim_x = 32;\n+ constexpr int block_dim_y = 1;\n+ constexpr int rows_per_block_y = 32;\n+ bool aligned_grid = (M % rows_per_block_y == 0) && (N % block_dim_x == 0);\n+ dim3 threads{block_dim_x, block_dim_y};\n+ dim3 blocks;\n+ blocks.x = (N + block_dim_x - 1) / block_dim_x;\n+ // int rows_per_block = my_gamma_beta_unroll_factor *\n+ blocks.y = (M + rows_per_block_y - 1) / rows_per_block_y;\n+ constexpr int max_grid_size = 64 * 1024 / 2;\n+ blocks.y = std::min(max_grid_size / blocks.x, blocks.y);\n+ auto options = dgamma->options();\n+ Tensor dgamma_blocks;\n+ Tensor dbeta_blocks;\n+ T * dgamma_blocks_ptr = nullptr;\n+ T * dbeta_blocks_ptr = nullptr;\n+ if (dgamma->defined()) {\n+ dgamma_blocks = at::zeros({blocks.y * threads.y, dgamma->size(-1)}, options);\n+ dgamma_blocks_ptr = dgamma_blocks.data_ptr();\n+ }\n+ if (dbeta->defined()) {\n+ dbeta_blocks = at::zeros({blocks.y * threads.y, dgamma->size(-1)}, options);\n+ dbeta_blocks_ptr = dbeta_blocks.data_ptr();\n+ }\n+ LaunchAndCheckGammaBetaBackwardKernel(\n+ aligned_grid, blocks, threads, 0, cuda_stream, dY_data, X_data, mean_data, rstd_data, M, N, dgamma_blocks_ptr, dbeta_blocks_ptr);\n+\n+ *dgamma = dgamma_blocks.sum(0);\n+ *dbeta = dbeta_blocks.sum(0);\n+ } else {\n+ // We are in the normal case where M is not that large.\n+ // We can change the tile shape (which is the last template parameter) in accordance with M.\n+ // For small M it is faster to have a smaller tile, otherwise we could have idle threads.\n+ // For larger M we use a bigger tile size.\n+ if (M < 64) {", - "comment_created_at": "2025-03-14T23:11:11+00:00", - "comment_author": "ngimel", - "comment_body": "do we need all 4 instantiations here? How much is the perf impact?", - "pr_file_module": null - }, - { - "comment_id": "2000093553", - "repo_full_name": "pytorch/pytorch", - "pr_number": 148605, - "pr_file": "aten/src/ATen/native/cuda/layer_norm_kernel.cu", - "discussion_id": "1996412678", - "commented_code": "@@ -508,223 +509,418 @@ __global__ void layer_norm_grad_input_kernel_vectorized(\n }\n }\n \n-\n-template \n-__global__ void GammaBetaBackwardSimpleCUDAKernel(\n+// When the data size is a multiple of the tile size we can use fewer\n+// instructions and registers. Example, this is the case when M=256 N=256.\n+template \n+__device__\n+__forceinline__\n+void\n+blockReduceGammaBetaBackwardsAligned(\n int64_t M,\n int64_t N,\n- const T* dY,\n- const T* X,\n- const T_ACC* mean,\n- const T_ACC* rstd,\n- T* dg,\n- T* db) {\n- const int64_t j = ((int64_t) blockIdx.x) * blockDim.x + threadIdx.x;\n- if (j < N) {\n- T_ACC sum1 = 0;\n- T_ACC sum2 = 0;\n- for (int64_t i = 0; i < M; ++i) {\n- const int64_t index = i * N + j;\n- sum1 += dg == nullptr ? T_ACC(0)\n- : static_cast(dY[index]) *\n- (static_cast(X[index]) - static_cast(mean[i])) *\n- static_cast(rstd[i]);\n- sum2 += db == nullptr ? T_ACC(0) : static_cast(dY[index]);\n+ const T* __restrict__ dY,\n+ const T* __restrict__ X,\n+ const T_ACC* __restrict__ mean,\n+ const T_ACC* __restrict__ rstd,\n+ T* __restrict__ dg,\n+ T* __restrict__ db,\n+ T_ACC &dg_sum,\n+ T_ACC &db_sum\n+) {\n+ constexpr int rows_per_thread_y = rows_per_block_y / block_dim_y;\n+ int64_t thread_x = blockIdx.x * block_dim_x + threadIdx.x;\n+ for (int64_t M_start = blockIdx.y * rows_per_block_y;\n+ M_start < M;\n+ M_start += rows_per_block_y * gridDim.y) {\n+ int lane_id = (threadIdx.y * blockDim.x + threadIdx.x) & (kWarpSize - 1);\n+ int64_t mean_index = M_start + threadIdx.y * rows_per_thread_y;\n+ T_ACC warp_mean = 0, warp_rstd = 0;\n+ if (lane_id < rows_per_thread_y) {\n+ warp_mean = mean[mean_index + lane_id];\n+ warp_rstd = rstd[mean_index + lane_id];\n }\n- if (dg != nullptr) {\n- dg[j] = sum1;\n+ WARP_SYNC();\n+ T_ACC dY_regs[rows_per_thread_y] = {0};\n+ T_ACC X_regs[rows_per_thread_y] = {0};\n+ #pragma unroll\n+ for (int i = 0; i < rows_per_thread_y; ++i) {\n+ int64_t current_y = M_start + threadIdx.y * rows_per_thread_y + i;\n+ if (aligned_grid || (current_y < M && thread_x < N)) {\n+ dY_regs[i] = dY[current_y * N + thread_x];\n+ X_regs[i] = X[current_y * N + thread_x];\n+ }\n }\n- if (db != nullptr) {\n- db[j] = sum2;\n+\n+ #pragma unroll\n+ for (int i = 0; i < rows_per_thread_y; ++i) {\n+ T_ACC mean_reg = WARP_SHFL(warp_mean, i, kWarpSize);\n+ T_ACC rstd_reg = WARP_SHFL(warp_rstd, i, kWarpSize);\n+ dg_sum += dY_regs[i] * (X_regs[i] - mean_reg) * rstd_reg;\n+ db_sum += dY_regs[i];\n }\n }\n }\n \n-// This implementation gets called if M and N divide with 32. This case should\n-// be the most common. We can then make better use of warp level intrinsics\n-// to improve performance.\n-\n-template \n-__global__ void GammaBetaBackwardCUDAKernel_32x32(\n+template \n+__device__\n+__forceinline__\n+void\n+blockReduceGammaBetaBackwardsHelper(\n+ int64_t M_start,\n int64_t M,\n int64_t N,\n- const T* dY,\n- const T* X,\n- const T_ACC* mean,\n- const T_ACC* rstd,\n- T* dg,\n- T* db) {\n- alignas(sizeof(double)) extern __shared__ char s_data1[];\n- T_ACC* s_data_typed = reinterpret_cast(&s_data1);\n- T_ACC* s_dg;\n- T_ACC* s_db;\n-\n- T_ACC dg_sum = 0;\n- T_ACC db_sum = 0;\n-\n- const int64_t j = ((int64_t) blockIdx.x) * blockDim.x + threadIdx.x;\n-\n- if (j < N) {\n- constexpr int unroll_factor = 8;\n- int laneId = threadIdx.x & (C10_WARP_SIZE - 1);\n+ const T* __restrict__ dY,\n+ const T* __restrict__ X,\n+ const T_ACC* __restrict__ mean,\n+ const T_ACC* __restrict__ rstd,\n+ T* __restrict__ dg,\n+ T* __restrict__ db,\n+ T_ACC &dg_sum,\n+ T_ACC &db_sum\n+) {\n+ constexpr int rows_per_thread_y = rows_per_block_y / block_dim_y;\n+ int64_t thread_x = blockIdx.x * block_dim_x + threadIdx.x;\n+\n+ int lane_id = (threadIdx.y * blockDim.x + threadIdx.x) & (kWarpSize - 1);\n+ int64_t mean_index = M_start + threadIdx.y * rows_per_thread_y;\n+ T_ACC warp_mean = 0, warp_rstd = 0;\n+ if (lane_id < rows_per_thread_y && mean_index + lane_id < M) {\n+ warp_mean = mean[mean_index + lane_id];\n+ warp_rstd = rstd[mean_index + lane_id];\n+ }\n+ WARP_SYNC();\n \n- T_ACC mean_reg, mean_reg_tmp;\n- T_ACC rstd_reg, rstd_reg_tmp;\n- T dY_reg;\n- T X_reg;\n \n- // Main loop\n- int bcounter;\n- for (bcounter = 0; bcounter < M / (blockDim.y * unroll_factor);\n- bcounter++) {\n- int offset = (bcounter * blockDim.y + threadIdx.y) * unroll_factor;\n \n- if (laneId < unroll_factor) {\n- mean_reg_tmp = mean[offset + laneId];\n- rstd_reg_tmp = rstd[offset + laneId];\n+ T_ACC dY_regs[rows_per_thread_y] = {0};\n+ T_ACC X_regs[rows_per_thread_y] = {0};\n+ #pragma unroll\n+ for (int i = 0; i < rows_per_thread_y; ++i) {\n+ int64_t current_y = M_start + threadIdx.y * rows_per_thread_y + i;\n+ bool active = true;\n+ if (check_x && thread_x >= N) {\n+ active = false;\n }\n- WARP_SYNC();\n-\n- #pragma unroll\n- for (int ii = 0; ii < unroll_factor; ++ii) {\n- dY_reg = dY[(offset + ii) * N + j];\n- X_reg = X[(offset + ii) * N + j];\n- mean_reg = WARP_SHFL(mean_reg_tmp, ii, kWarpSize);\n- rstd_reg = WARP_SHFL(rstd_reg_tmp, ii, kWarpSize);\n- dg_sum += dY_reg * (X_reg - mean_reg) * rstd_reg;\n- db_sum += dY_reg;\n+ if (check_y && current_y >= M) {\n+ active = false;\n }\n- }\n-\n- // Remainder loop\n- int offset = (bcounter * blockDim.y + threadIdx.y) * unroll_factor;\n- for (int ii = 0; ii < unroll_factor; ii++) {\n- if ((offset + ii) < M) {\n- mean_reg = mean[offset + ii];\n- rstd_reg = rstd[offset + ii];\n- dY_reg = dY[(offset + ii) * N + j];\n- X_reg = X[(offset + ii) * N + j];\n- dg_sum += dY_reg * (X_reg - mean_reg) * rstd_reg;\n- db_sum += dY_reg;\n+ if (active) {\n+ dY_regs[i] = dY[current_y * N + thread_x];\n+ X_regs[i] = X[current_y * N + thread_x];\n }\n }\n \n- // This kernel uses a block of (C10_WARP_SIZE x C10_WARP_SIZE) and\n- // gets called when M; N divide by 32. We can use warp shuffles\n- // for the final reduction step. This removes 4 shmem loads and\n- // stores with their corresponding __syncthreads()\n-\n- // This greatly reduces bank conflicts at the expense of a little\n- // extra shared memory. It does not impact occupancy\n- int padded_bx = (1 + blockDim.x);\n-\n- s_dg = s_data_typed;\n- s_db = s_data_typed + (padded_bx * blockDim.y);\n- s_dg[threadIdx.y * padded_bx + threadIdx.x] = dg_sum;\n- s_db[threadIdx.y * padded_bx + threadIdx.x] = db_sum;\n- __syncthreads();\n-\n- // Load transposed so that a warp holds an entire column\n- T_ACC reg_dg = s_dg[threadIdx.x * padded_bx + threadIdx.y];\n- T_ACC reg_db = s_db[threadIdx.x * padded_bx + threadIdx.y];\n- for (unsigned delta = C10_WARP_SIZE >> 1; delta >= 1; delta >>= 1) {\n- reg_dg += WARP_SHFL_XOR(reg_dg, delta, kWarpSize);\n- reg_db += WARP_SHFL_XOR(reg_db, delta, kWarpSize);\n+ #pragma unroll\n+ for (int i = 0; i < rows_per_thread_y; ++i) {\n+ T_ACC mean_reg = WARP_SHFL(warp_mean, i, kWarpSize);\n+ T_ACC rstd_reg = WARP_SHFL(warp_rstd, i, kWarpSize);\n+ dg_sum += dY_regs[i] * (X_regs[i] - mean_reg) * rstd_reg;\n+ db_sum += dY_regs[i];\n }\n+}\n \n- if (threadIdx.x == 0) {\n- const int64_t j = blockIdx.x * blockDim.x + threadIdx.y;\n- if (dg) {\n- dg[j] = reg_dg;\n- }\n- if (db) {\n- db[j] = reg_db;\n- }\n+template \n+__device__\n+__forceinline__\n+void\n+blockReduceGammaBetaBackwardsWithChecks(\n+ int64_t M,\n+ int64_t N,\n+ const T* __restrict__ dY,\n+ const T* __restrict__ X,\n+ const T_ACC* __restrict__ mean,\n+ const T_ACC* __restrict__ rstd,\n+ T* __restrict__ dg,\n+ T* __restrict__ db,\n+ T_ACC &dg_sum,\n+ T_ACC &db_sum\n+) {\n+ for (int64_t M_start = blockIdx.y * rows_per_block_y;\n+ M_start < M;\n+ M_start += rows_per_block_y * gridDim.y) {\n+ int64_t M_end = M_start + rows_per_block_y - 1;\n+ if (M_end < M) {\n+ blockReduceGammaBetaBackwardsHelper\n+ (M_start, M, N, dY, X, mean, rstd, dg, db, dg_sum, db_sum);\n+ } else {\n+ blockReduceGammaBetaBackwardsHelper\n+ (M_start, M, N, dY, X, mean, rstd, dg, db, dg_sum, db_sum);\n }\n }\n }\n \n-template \n-__global__ void GammaBetaBackwardCUDAKernel(\n+// block_dim_x is the number of threads in the x dimension per block.\n+// block_dim_y is the number of threads in the y dimension per block.\n+// rows_per_block_y is the size of the tile (number of data elements)\n+// in the y dimension per block.\n+// partial_reduction indicates whether we need to reduce across threads\n+// or not. If set to true, we will not reduce across threads. This can\n+// be faster in the M >> N case but requires another kernel to do a full\n+// final reduction.\n+// aligned_grid means the data size is a multiple of tile size. In that\n+// case we don't need to check for boundary conditions which can provide\n+// a further speedup by not needing instructions to check for edge cases\n+// and not needing predicate registers.\n+template \n+__global__\n+void\n+ GammaBetaBackwardCUDAKernelTemplate(\n int64_t M,\n int64_t N,\n- const T* dY,\n- const T* X,\n- const T_ACC* mean,\n- const T_ACC* rstd,\n- T* dg,\n- T* db) {\n- alignas(sizeof(double)) extern __shared__ char s_data1[];\n- T_ACC* s_data_typed = reinterpret_cast(&s_data1);\n- T_ACC* s_dg;\n- T_ACC* s_db;\n-\n- const int64_t j = ((int64_t) blockIdx.x) * blockDim.x + threadIdx.x;\n+ const T* __restrict__ dY,\n+ const T* __restrict__ X,\n+ const T_ACC* __restrict__ mean,\n+ const T_ACC* __restrict__ rstd,\n+ T* __restrict__ dg,\n+ T* __restrict__ db) {\n+ // This assert is a compile-time check only.\n+ constexpr int rows_per_thread_y = rows_per_block_y / block_dim_y;\n+ static_assert(rows_per_thread_y <= kWarpSize);\n \n T_ACC dg_sum = 0;\n T_ACC db_sum = 0;\n \n- if (j < N) {\n- constexpr int unroll_factor = 8;\n-\n- T_ACC mean_reg;\n- T_ACC rstd_reg;\n- T dY_reg;\n- T X_reg;\n+ if (aligned_grid) {\n+ // When N and M align perfectly with block_dim_x and block_dim_y, we\n+ // can skip boundary condition checks that waste instruction issue slots.\n+ blockReduceGammaBetaBackwardsAligned\n+ \n+ (M, N, dY, X, mean, rstd, dg, db, dg_sum, db_sum);\n+ } else {\n+ // In the general case we need to check boundary conditions in the M\n+ // dimension. However, we can still avoid boundary checks in the N dimension\n+ // for the inner blocks. So try to avoid those checks when possible.\n+ if (blockIdx.x * block_dim_x + block_dim_x - 1 < N) {\n+ blockReduceGammaBetaBackwardsWithChecks\n+ \n+ (M, N, dY, X, mean, rstd, dg, db, dg_sum, db_sum);\n+ } else {\n+ blockReduceGammaBetaBackwardsWithChecks\n+ \n+ (M, N, dY, X, mean, rstd, dg, db, dg_sum, db_sum);\n+ }\n+ }\n \n- // Main Loop\n- int bcounter;\n- for (bcounter = 0; bcounter < M / (blockDim.y * unroll_factor); bcounter++){\n- int offset = (bcounter * blockDim.y + threadIdx.y) * unroll_factor;\n+ int64_t thread_x = blockIdx.x * block_dim_x + threadIdx.x;\n \n- #pragma unroll\n- for (int ii = 0; ii < unroll_factor; ++ii) {\n- dY_reg = dY[(offset + ii) * N + j];\n- X_reg = X[(offset + ii) * N + j];\n- mean_reg = mean[offset + ii];\n- rstd_reg = rstd[offset + ii];\n- dg_sum += dY_reg * (X_reg - mean_reg) * rstd_reg;\n- db_sum += dY_reg;\n+ // When partial_reduction is requested, we don't reduce within a block.\n+ // We also don't reduce if we are only a single block in the y dimension.\n+ if (partial_reduction || (blockDim.y == 1 && gridDim.y == 1)) {\n+ if (aligned_grid || thread_x < N) {\n+ int64_t thread_y = blockIdx.y * blockDim.y + threadIdx.y;\n+ if (dg) {\n+ dg[thread_y * N + thread_x] = dg_sum;\n }\n- }\n-\n- // Remainder loop\n- int offset = (bcounter * blockDim.y + threadIdx.y) * unroll_factor;\n- for (int ii = 0; ii < unroll_factor; ii++ ){\n- if ((offset + ii) < M) {\n- dY_reg = dY[(offset + ii) * N + j ];\n- X_reg = X[(offset + ii) * N + j];\n- mean_reg = mean[offset + ii];\n- rstd_reg = rstd[offset + ii];\n- dg_sum += dY_reg * (X_reg - mean_reg) * rstd_reg;\n- db_sum += dY_reg;\n+ if (db) {\n+ db[thread_y * N + thread_x] = db_sum;\n }\n }\n-\n- // Do the final reduction in shared memory\n+ } else {\n+ // The caller requested a full reduction so we must reduce across\n+ // warps using shared memory and warp shuffles.\n+ static_assert(rows_per_thread_y <= C10_WARP_SIZE);\n+ alignas(sizeof(double)) extern __shared__ char s_data1[];\n+ T_ACC* s_data_typed = reinterpret_cast(&s_data1);\n+ T_ACC* s_dg;\n+ T_ACC* s_db;\n+ int padded_bx = (block_dim_x + 1);\n+ // Transpose dg and db.\n s_dg = s_data_typed;\n- s_db = s_data_typed + blockDim.x * blockDim.y;\n- s_dg[threadIdx.y * blockDim.x + threadIdx.x] = dg_sum;\n- s_db[threadIdx.y * blockDim.x + threadIdx.x] = db_sum;\n+ s_db = s_data_typed + (padded_bx * block_dim_y);\n+ s_dg[threadIdx.y * padded_bx + threadIdx.x] = dg_sum;\n+ s_db[threadIdx.y * padded_bx + threadIdx.x] = db_sum;\n __syncthreads();\n \n- for (int offset = blockDim.y / 2; offset >= 1; offset /= 2) {\n- if (threadIdx.y < offset) {\n- s_dg[threadIdx.y * blockDim.x + threadIdx.x] +=\n- s_dg[(threadIdx.y + offset) * blockDim.x + threadIdx.x];\n- s_db[threadIdx.y * blockDim.x + threadIdx.x] +=\n- s_db[(threadIdx.y + offset) * blockDim.x + threadIdx.x];\n+ // Load transposed so that a warp holds an entire column\n+ // Because block_dim_x != block_dim_y in the general case, we need\n+ // some code to handle the general case.\n+ static_assert(block_dim_x * block_dim_y % C10_WARP_SIZE == 0);\n+ constexpr int warps_available_to_reduce = block_dim_x * block_dim_y / C10_WARP_SIZE;\n+ int thread_id = threadIdx.y * block_dim_x + threadIdx.x;\n+ int warp_id = thread_id / C10_WARP_SIZE;\n+ int lane_id = thread_id & (C10_WARP_SIZE - 1);\n+ #pragma unroll\n+ for (int i = warp_id; i < block_dim_x; i += warps_available_to_reduce) {\n+ T_ACC reg_db, reg_dg;\n+ if (lane_id < block_dim_y) {\n+ reg_dg = s_dg[lane_id * padded_bx + i];\n+ reg_db = s_db[lane_id * padded_bx + i];\n+ }\n+ #pragma unroll\n+ for (unsigned delta = block_dim_y >> 1; delta >= 1; delta >>= 1) {\n+ reg_dg += WARP_SHFL_XOR(reg_dg, delta, kWarpSize);\n+ reg_db += WARP_SHFL_XOR(reg_db, delta, kWarpSize);\n+ }\n+ // Reduce is done. Now write it out to global memory.\n+ int64_t out_index = blockIdx.x * block_dim_x + i;\n+ if (threadIdx.x == 0 && (aligned_grid || out_index < N)) {\n+ if (dg) {\n+ dg[out_index] = reg_dg;\n }\n- __syncthreads();\n+ if (db) {\n+ db[out_index] = reg_db;\n+ }\n+ }\n }\n+ }\n+}\n \n- if (threadIdx.y == 0) {\n- if (dg) {\n- dg[j] = s_dg[threadIdx.x];\n- }\n- if (db) {\n- db[j] = s_db[threadIdx.x];\n- }\n+template\n+void LaunchAndCheckGammaBetaBackwardKernel(\n+ bool aligned_grid,\n+ dim3 blocks,\n+ dim3 threads,\n+ size_t shmem_sz,\n+ cudaStream_t cuda_stream,\n+ const T* dY_data,\n+ const T* X_data,\n+ const T_ACC* mean_data,\n+ const T_ACC* rstd_data,\n+ int64_t M,\n+ int64_t N,\n+ T* dgamma_data,\n+ T* dbeta_data) {\n+if (aligned_grid) {\n+ GammaBetaBackwardCUDAKernelTemplate\n+ <<>>(\n+ M,\n+ N,\n+ dY_data,\n+ X_data,\n+ mean_data,\n+ rstd_data,\n+ dgamma_data,\n+ dbeta_data);\n+ } else {\n+ GammaBetaBackwardCUDAKernelTemplate\n+ <<>>(\n+ M,\n+ N,\n+ dY_data,\n+ X_data,\n+ mean_data,\n+ rstd_data,\n+ dgamma_data,\n+ dbeta_data);\n+ }\n+ C10_CUDA_KERNEL_LAUNCH_CHECK();\n+}\n+\n+template\n+void ConfigureAndLaunchGammaBetaBackwardKernel(\n+ const T* dY_data,\n+ const T* X_data,\n+ const T_ACC* mean_data,\n+ const T_ACC* rstd_data,\n+ int64_t M,\n+ int64_t N,\n+ Tensor* dgamma,\n+ Tensor* dbeta,\n+ cudaStream_t cuda_stream) {\n+ T* dgamma_data =\n+ dgamma->defined() ? dgamma->template data_ptr() : nullptr;\n+ T* dbeta_data = dbeta->defined() ? dbeta->template data_ptr() : nullptr;\n+ bool aligned_grid = (M % rows_per_block_y == 0) && (N % block_dim_x == 0);\n+ dim3 threads{block_dim_x, block_dim_y};\n+ dim3 blocks;\n+ blocks.x = (N + block_dim_x - 1) / block_dim_x;\n+ blocks.y = 1;\n+ size_t shmem_sz = (block_dim_x + 1) * block_dim_y * sizeof(T_ACC) * 2;\n+ if (blocks.y == 1 && threads.y == 1) {\n+ // Optimization: since there is just one thread doing all the summation, we don't need a reduction\n+ // across threads. So we set partial_reduction to true.\n+ LaunchAndCheckGammaBetaBackwardKernel(\n+ aligned_grid, blocks, threads, shmem_sz, cuda_stream, dY_data, X_data, mean_data, rstd_data, M, N, dgamma_data, dbeta_data);\n+ } else {\n+ LaunchAndCheckGammaBetaBackwardKernel(\n+ aligned_grid, blocks, threads, shmem_sz, cuda_stream, dY_data, X_data, mean_data, rstd_data, M, N, dgamma_data, dbeta_data);\n+ }\n+\n+}\n+\n+template\n+void LaunchGammaBetaBackwardCUDAKernel(\n+ const T* dY_data,\n+ const T* X_data,\n+ const T_ACC* mean_data,\n+ const T_ACC* rstd_data,\n+ int64_t M,\n+ int64_t N,\n+ Tensor* dgamma,\n+ Tensor* dbeta,\n+ cudaStream_t cuda_stream) {\n+ if (M > 64 * 1024 && N / kWarpSize < 64) {\n+ // We have a situation where M >> N and N is small.\n+ // In this case we can speed up the computation by parallelizing in the M dimension.\n+ // We launch multiple blocks in the y-dimension, and compute partial sums for the\n+ // gradient in the first pass. Then we do a .sum(0) to do a final reduction.\n+ // Although we launch 2 kernels, we can get up to a 10x speedup for large M.\n+ constexpr int block_dim_x = 32;\n+ constexpr int block_dim_y = 1;\n+ constexpr int rows_per_block_y = 32;\n+ bool aligned_grid = (M % rows_per_block_y == 0) && (N % block_dim_x == 0);\n+ dim3 threads{block_dim_x, block_dim_y};\n+ dim3 blocks;\n+ blocks.x = (N + block_dim_x - 1) / block_dim_x;\n+ // int rows_per_block = my_gamma_beta_unroll_factor *\n+ blocks.y = (M + rows_per_block_y - 1) / rows_per_block_y;\n+ constexpr int max_grid_size = 64 * 1024 / 2;\n+ blocks.y = std::min(max_grid_size / blocks.x, blocks.y);\n+ auto options = dgamma->options();\n+ Tensor dgamma_blocks;\n+ Tensor dbeta_blocks;\n+ T * dgamma_blocks_ptr = nullptr;\n+ T * dbeta_blocks_ptr = nullptr;\n+ if (dgamma->defined()) {\n+ dgamma_blocks = at::zeros({blocks.y * threads.y, dgamma->size(-1)}, options);\n+ dgamma_blocks_ptr = dgamma_blocks.data_ptr();\n+ }\n+ if (dbeta->defined()) {\n+ dbeta_blocks = at::zeros({blocks.y * threads.y, dgamma->size(-1)}, options);\n+ dbeta_blocks_ptr = dbeta_blocks.data_ptr();\n+ }\n+ LaunchAndCheckGammaBetaBackwardKernel(\n+ aligned_grid, blocks, threads, 0, cuda_stream, dY_data, X_data, mean_data, rstd_data, M, N, dgamma_blocks_ptr, dbeta_blocks_ptr);\n+\n+ *dgamma = dgamma_blocks.sum(0);\n+ *dbeta = dbeta_blocks.sum(0);\n+ } else {\n+ // We are in the normal case where M is not that large.\n+ // We can change the tile shape (which is the last template parameter) in accordance with M.\n+ // For small M it is faster to have a smaller tile, otherwise we could have idle threads.\n+ // For larger M we use a bigger tile size.\n+ if (M < 64) {", - "comment_created_at": "2025-03-18T03:03:26+00:00", - "comment_author": "ahmadsharif1", - "comment_body": "\"image\"\r\n\r\nThe perf impact is substantial (20% regression) even for powers of 2 without this specialization. Note that even before my change we were specializing for M < 128. I hyper-specialize a little bit (keeping the same code -- just different template parameters) which results in a speed-up over the baseline:\r\n\r\n\"image\"\r\n\r\nMy code has each thread handling 8 rows so it's pointless launching threads that don't handle any rows at all for \"short and wide\" cases (i.e. for low values of M). They just cause wasted occupancy.\r\n", - "pr_file_module": null - }, - { - "comment_id": "2006461566", - "repo_full_name": "pytorch/pytorch", - "pr_number": 148605, - "pr_file": "aten/src/ATen/native/cuda/layer_norm_kernel.cu", - "discussion_id": "1996412678", - "commented_code": "@@ -508,223 +509,418 @@ __global__ void layer_norm_grad_input_kernel_vectorized(\n }\n }\n \n-\n-template \n-__global__ void GammaBetaBackwardSimpleCUDAKernel(\n+// When the data size is a multiple of the tile size we can use fewer\n+// instructions and registers. Example, this is the case when M=256 N=256.\n+template \n+__device__\n+__forceinline__\n+void\n+blockReduceGammaBetaBackwardsAligned(\n int64_t M,\n int64_t N,\n- const T* dY,\n- const T* X,\n- const T_ACC* mean,\n- const T_ACC* rstd,\n- T* dg,\n- T* db) {\n- const int64_t j = ((int64_t) blockIdx.x) * blockDim.x + threadIdx.x;\n- if (j < N) {\n- T_ACC sum1 = 0;\n- T_ACC sum2 = 0;\n- for (int64_t i = 0; i < M; ++i) {\n- const int64_t index = i * N + j;\n- sum1 += dg == nullptr ? T_ACC(0)\n- : static_cast(dY[index]) *\n- (static_cast(X[index]) - static_cast(mean[i])) *\n- static_cast(rstd[i]);\n- sum2 += db == nullptr ? T_ACC(0) : static_cast(dY[index]);\n+ const T* __restrict__ dY,\n+ const T* __restrict__ X,\n+ const T_ACC* __restrict__ mean,\n+ const T_ACC* __restrict__ rstd,\n+ T* __restrict__ dg,\n+ T* __restrict__ db,\n+ T_ACC &dg_sum,\n+ T_ACC &db_sum\n+) {\n+ constexpr int rows_per_thread_y = rows_per_block_y / block_dim_y;\n+ int64_t thread_x = blockIdx.x * block_dim_x + threadIdx.x;\n+ for (int64_t M_start = blockIdx.y * rows_per_block_y;\n+ M_start < M;\n+ M_start += rows_per_block_y * gridDim.y) {\n+ int lane_id = (threadIdx.y * blockDim.x + threadIdx.x) & (kWarpSize - 1);\n+ int64_t mean_index = M_start + threadIdx.y * rows_per_thread_y;\n+ T_ACC warp_mean = 0, warp_rstd = 0;\n+ if (lane_id < rows_per_thread_y) {\n+ warp_mean = mean[mean_index + lane_id];\n+ warp_rstd = rstd[mean_index + lane_id];\n }\n- if (dg != nullptr) {\n- dg[j] = sum1;\n+ WARP_SYNC();\n+ T_ACC dY_regs[rows_per_thread_y] = {0};\n+ T_ACC X_regs[rows_per_thread_y] = {0};\n+ #pragma unroll\n+ for (int i = 0; i < rows_per_thread_y; ++i) {\n+ int64_t current_y = M_start + threadIdx.y * rows_per_thread_y + i;\n+ if (aligned_grid || (current_y < M && thread_x < N)) {\n+ dY_regs[i] = dY[current_y * N + thread_x];\n+ X_regs[i] = X[current_y * N + thread_x];\n+ }\n }\n- if (db != nullptr) {\n- db[j] = sum2;\n+\n+ #pragma unroll\n+ for (int i = 0; i < rows_per_thread_y; ++i) {\n+ T_ACC mean_reg = WARP_SHFL(warp_mean, i, kWarpSize);\n+ T_ACC rstd_reg = WARP_SHFL(warp_rstd, i, kWarpSize);\n+ dg_sum += dY_regs[i] * (X_regs[i] - mean_reg) * rstd_reg;\n+ db_sum += dY_regs[i];\n }\n }\n }\n \n-// This implementation gets called if M and N divide with 32. This case should\n-// be the most common. We can then make better use of warp level intrinsics\n-// to improve performance.\n-\n-template \n-__global__ void GammaBetaBackwardCUDAKernel_32x32(\n+template \n+__device__\n+__forceinline__\n+void\n+blockReduceGammaBetaBackwardsHelper(\n+ int64_t M_start,\n int64_t M,\n int64_t N,\n- const T* dY,\n- const T* X,\n- const T_ACC* mean,\n- const T_ACC* rstd,\n- T* dg,\n- T* db) {\n- alignas(sizeof(double)) extern __shared__ char s_data1[];\n- T_ACC* s_data_typed = reinterpret_cast(&s_data1);\n- T_ACC* s_dg;\n- T_ACC* s_db;\n-\n- T_ACC dg_sum = 0;\n- T_ACC db_sum = 0;\n-\n- const int64_t j = ((int64_t) blockIdx.x) * blockDim.x + threadIdx.x;\n-\n- if (j < N) {\n- constexpr int unroll_factor = 8;\n- int laneId = threadIdx.x & (C10_WARP_SIZE - 1);\n+ const T* __restrict__ dY,\n+ const T* __restrict__ X,\n+ const T_ACC* __restrict__ mean,\n+ const T_ACC* __restrict__ rstd,\n+ T* __restrict__ dg,\n+ T* __restrict__ db,\n+ T_ACC &dg_sum,\n+ T_ACC &db_sum\n+) {\n+ constexpr int rows_per_thread_y = rows_per_block_y / block_dim_y;\n+ int64_t thread_x = blockIdx.x * block_dim_x + threadIdx.x;\n+\n+ int lane_id = (threadIdx.y * blockDim.x + threadIdx.x) & (kWarpSize - 1);\n+ int64_t mean_index = M_start + threadIdx.y * rows_per_thread_y;\n+ T_ACC warp_mean = 0, warp_rstd = 0;\n+ if (lane_id < rows_per_thread_y && mean_index + lane_id < M) {\n+ warp_mean = mean[mean_index + lane_id];\n+ warp_rstd = rstd[mean_index + lane_id];\n+ }\n+ WARP_SYNC();\n \n- T_ACC mean_reg, mean_reg_tmp;\n- T_ACC rstd_reg, rstd_reg_tmp;\n- T dY_reg;\n- T X_reg;\n \n- // Main loop\n- int bcounter;\n- for (bcounter = 0; bcounter < M / (blockDim.y * unroll_factor);\n- bcounter++) {\n- int offset = (bcounter * blockDim.y + threadIdx.y) * unroll_factor;\n \n- if (laneId < unroll_factor) {\n- mean_reg_tmp = mean[offset + laneId];\n- rstd_reg_tmp = rstd[offset + laneId];\n+ T_ACC dY_regs[rows_per_thread_y] = {0};\n+ T_ACC X_regs[rows_per_thread_y] = {0};\n+ #pragma unroll\n+ for (int i = 0; i < rows_per_thread_y; ++i) {\n+ int64_t current_y = M_start + threadIdx.y * rows_per_thread_y + i;\n+ bool active = true;\n+ if (check_x && thread_x >= N) {\n+ active = false;\n }\n- WARP_SYNC();\n-\n- #pragma unroll\n- for (int ii = 0; ii < unroll_factor; ++ii) {\n- dY_reg = dY[(offset + ii) * N + j];\n- X_reg = X[(offset + ii) * N + j];\n- mean_reg = WARP_SHFL(mean_reg_tmp, ii, kWarpSize);\n- rstd_reg = WARP_SHFL(rstd_reg_tmp, ii, kWarpSize);\n- dg_sum += dY_reg * (X_reg - mean_reg) * rstd_reg;\n- db_sum += dY_reg;\n+ if (check_y && current_y >= M) {\n+ active = false;\n }\n- }\n-\n- // Remainder loop\n- int offset = (bcounter * blockDim.y + threadIdx.y) * unroll_factor;\n- for (int ii = 0; ii < unroll_factor; ii++) {\n- if ((offset + ii) < M) {\n- mean_reg = mean[offset + ii];\n- rstd_reg = rstd[offset + ii];\n- dY_reg = dY[(offset + ii) * N + j];\n- X_reg = X[(offset + ii) * N + j];\n- dg_sum += dY_reg * (X_reg - mean_reg) * rstd_reg;\n- db_sum += dY_reg;\n+ if (active) {\n+ dY_regs[i] = dY[current_y * N + thread_x];\n+ X_regs[i] = X[current_y * N + thread_x];\n }\n }\n \n- // This kernel uses a block of (C10_WARP_SIZE x C10_WARP_SIZE) and\n- // gets called when M; N divide by 32. We can use warp shuffles\n- // for the final reduction step. This removes 4 shmem loads and\n- // stores with their corresponding __syncthreads()\n-\n- // This greatly reduces bank conflicts at the expense of a little\n- // extra shared memory. It does not impact occupancy\n- int padded_bx = (1 + blockDim.x);\n-\n- s_dg = s_data_typed;\n- s_db = s_data_typed + (padded_bx * blockDim.y);\n- s_dg[threadIdx.y * padded_bx + threadIdx.x] = dg_sum;\n- s_db[threadIdx.y * padded_bx + threadIdx.x] = db_sum;\n- __syncthreads();\n-\n- // Load transposed so that a warp holds an entire column\n- T_ACC reg_dg = s_dg[threadIdx.x * padded_bx + threadIdx.y];\n- T_ACC reg_db = s_db[threadIdx.x * padded_bx + threadIdx.y];\n- for (unsigned delta = C10_WARP_SIZE >> 1; delta >= 1; delta >>= 1) {\n- reg_dg += WARP_SHFL_XOR(reg_dg, delta, kWarpSize);\n- reg_db += WARP_SHFL_XOR(reg_db, delta, kWarpSize);\n+ #pragma unroll\n+ for (int i = 0; i < rows_per_thread_y; ++i) {\n+ T_ACC mean_reg = WARP_SHFL(warp_mean, i, kWarpSize);\n+ T_ACC rstd_reg = WARP_SHFL(warp_rstd, i, kWarpSize);\n+ dg_sum += dY_regs[i] * (X_regs[i] - mean_reg) * rstd_reg;\n+ db_sum += dY_regs[i];\n }\n+}\n \n- if (threadIdx.x == 0) {\n- const int64_t j = blockIdx.x * blockDim.x + threadIdx.y;\n- if (dg) {\n- dg[j] = reg_dg;\n- }\n- if (db) {\n- db[j] = reg_db;\n- }\n+template \n+__device__\n+__forceinline__\n+void\n+blockReduceGammaBetaBackwardsWithChecks(\n+ int64_t M,\n+ int64_t N,\n+ const T* __restrict__ dY,\n+ const T* __restrict__ X,\n+ const T_ACC* __restrict__ mean,\n+ const T_ACC* __restrict__ rstd,\n+ T* __restrict__ dg,\n+ T* __restrict__ db,\n+ T_ACC &dg_sum,\n+ T_ACC &db_sum\n+) {\n+ for (int64_t M_start = blockIdx.y * rows_per_block_y;\n+ M_start < M;\n+ M_start += rows_per_block_y * gridDim.y) {\n+ int64_t M_end = M_start + rows_per_block_y - 1;\n+ if (M_end < M) {\n+ blockReduceGammaBetaBackwardsHelper\n+ (M_start, M, N, dY, X, mean, rstd, dg, db, dg_sum, db_sum);\n+ } else {\n+ blockReduceGammaBetaBackwardsHelper\n+ (M_start, M, N, dY, X, mean, rstd, dg, db, dg_sum, db_sum);\n }\n }\n }\n \n-template \n-__global__ void GammaBetaBackwardCUDAKernel(\n+// block_dim_x is the number of threads in the x dimension per block.\n+// block_dim_y is the number of threads in the y dimension per block.\n+// rows_per_block_y is the size of the tile (number of data elements)\n+// in the y dimension per block.\n+// partial_reduction indicates whether we need to reduce across threads\n+// or not. If set to true, we will not reduce across threads. This can\n+// be faster in the M >> N case but requires another kernel to do a full\n+// final reduction.\n+// aligned_grid means the data size is a multiple of tile size. In that\n+// case we don't need to check for boundary conditions which can provide\n+// a further speedup by not needing instructions to check for edge cases\n+// and not needing predicate registers.\n+template \n+__global__\n+void\n+ GammaBetaBackwardCUDAKernelTemplate(\n int64_t M,\n int64_t N,\n- const T* dY,\n- const T* X,\n- const T_ACC* mean,\n- const T_ACC* rstd,\n- T* dg,\n- T* db) {\n- alignas(sizeof(double)) extern __shared__ char s_data1[];\n- T_ACC* s_data_typed = reinterpret_cast(&s_data1);\n- T_ACC* s_dg;\n- T_ACC* s_db;\n-\n- const int64_t j = ((int64_t) blockIdx.x) * blockDim.x + threadIdx.x;\n+ const T* __restrict__ dY,\n+ const T* __restrict__ X,\n+ const T_ACC* __restrict__ mean,\n+ const T_ACC* __restrict__ rstd,\n+ T* __restrict__ dg,\n+ T* __restrict__ db) {\n+ // This assert is a compile-time check only.\n+ constexpr int rows_per_thread_y = rows_per_block_y / block_dim_y;\n+ static_assert(rows_per_thread_y <= kWarpSize);\n \n T_ACC dg_sum = 0;\n T_ACC db_sum = 0;\n \n- if (j < N) {\n- constexpr int unroll_factor = 8;\n-\n- T_ACC mean_reg;\n- T_ACC rstd_reg;\n- T dY_reg;\n- T X_reg;\n+ if (aligned_grid) {\n+ // When N and M align perfectly with block_dim_x and block_dim_y, we\n+ // can skip boundary condition checks that waste instruction issue slots.\n+ blockReduceGammaBetaBackwardsAligned\n+ \n+ (M, N, dY, X, mean, rstd, dg, db, dg_sum, db_sum);\n+ } else {\n+ // In the general case we need to check boundary conditions in the M\n+ // dimension. However, we can still avoid boundary checks in the N dimension\n+ // for the inner blocks. So try to avoid those checks when possible.\n+ if (blockIdx.x * block_dim_x + block_dim_x - 1 < N) {\n+ blockReduceGammaBetaBackwardsWithChecks\n+ \n+ (M, N, dY, X, mean, rstd, dg, db, dg_sum, db_sum);\n+ } else {\n+ blockReduceGammaBetaBackwardsWithChecks\n+ \n+ (M, N, dY, X, mean, rstd, dg, db, dg_sum, db_sum);\n+ }\n+ }\n \n- // Main Loop\n- int bcounter;\n- for (bcounter = 0; bcounter < M / (blockDim.y * unroll_factor); bcounter++){\n- int offset = (bcounter * blockDim.y + threadIdx.y) * unroll_factor;\n+ int64_t thread_x = blockIdx.x * block_dim_x + threadIdx.x;\n \n- #pragma unroll\n- for (int ii = 0; ii < unroll_factor; ++ii) {\n- dY_reg = dY[(offset + ii) * N + j];\n- X_reg = X[(offset + ii) * N + j];\n- mean_reg = mean[offset + ii];\n- rstd_reg = rstd[offset + ii];\n- dg_sum += dY_reg * (X_reg - mean_reg) * rstd_reg;\n- db_sum += dY_reg;\n+ // When partial_reduction is requested, we don't reduce within a block.\n+ // We also don't reduce if we are only a single block in the y dimension.\n+ if (partial_reduction || (blockDim.y == 1 && gridDim.y == 1)) {\n+ if (aligned_grid || thread_x < N) {\n+ int64_t thread_y = blockIdx.y * blockDim.y + threadIdx.y;\n+ if (dg) {\n+ dg[thread_y * N + thread_x] = dg_sum;\n }\n- }\n-\n- // Remainder loop\n- int offset = (bcounter * blockDim.y + threadIdx.y) * unroll_factor;\n- for (int ii = 0; ii < unroll_factor; ii++ ){\n- if ((offset + ii) < M) {\n- dY_reg = dY[(offset + ii) * N + j ];\n- X_reg = X[(offset + ii) * N + j];\n- mean_reg = mean[offset + ii];\n- rstd_reg = rstd[offset + ii];\n- dg_sum += dY_reg * (X_reg - mean_reg) * rstd_reg;\n- db_sum += dY_reg;\n+ if (db) {\n+ db[thread_y * N + thread_x] = db_sum;\n }\n }\n-\n- // Do the final reduction in shared memory\n+ } else {\n+ // The caller requested a full reduction so we must reduce across\n+ // warps using shared memory and warp shuffles.\n+ static_assert(rows_per_thread_y <= C10_WARP_SIZE);\n+ alignas(sizeof(double)) extern __shared__ char s_data1[];\n+ T_ACC* s_data_typed = reinterpret_cast(&s_data1);\n+ T_ACC* s_dg;\n+ T_ACC* s_db;\n+ int padded_bx = (block_dim_x + 1);\n+ // Transpose dg and db.\n s_dg = s_data_typed;\n- s_db = s_data_typed + blockDim.x * blockDim.y;\n- s_dg[threadIdx.y * blockDim.x + threadIdx.x] = dg_sum;\n- s_db[threadIdx.y * blockDim.x + threadIdx.x] = db_sum;\n+ s_db = s_data_typed + (padded_bx * block_dim_y);\n+ s_dg[threadIdx.y * padded_bx + threadIdx.x] = dg_sum;\n+ s_db[threadIdx.y * padded_bx + threadIdx.x] = db_sum;\n __syncthreads();\n \n- for (int offset = blockDim.y / 2; offset >= 1; offset /= 2) {\n- if (threadIdx.y < offset) {\n- s_dg[threadIdx.y * blockDim.x + threadIdx.x] +=\n- s_dg[(threadIdx.y + offset) * blockDim.x + threadIdx.x];\n- s_db[threadIdx.y * blockDim.x + threadIdx.x] +=\n- s_db[(threadIdx.y + offset) * blockDim.x + threadIdx.x];\n+ // Load transposed so that a warp holds an entire column\n+ // Because block_dim_x != block_dim_y in the general case, we need\n+ // some code to handle the general case.\n+ static_assert(block_dim_x * block_dim_y % C10_WARP_SIZE == 0);\n+ constexpr int warps_available_to_reduce = block_dim_x * block_dim_y / C10_WARP_SIZE;\n+ int thread_id = threadIdx.y * block_dim_x + threadIdx.x;\n+ int warp_id = thread_id / C10_WARP_SIZE;\n+ int lane_id = thread_id & (C10_WARP_SIZE - 1);\n+ #pragma unroll\n+ for (int i = warp_id; i < block_dim_x; i += warps_available_to_reduce) {\n+ T_ACC reg_db, reg_dg;\n+ if (lane_id < block_dim_y) {\n+ reg_dg = s_dg[lane_id * padded_bx + i];\n+ reg_db = s_db[lane_id * padded_bx + i];\n+ }\n+ #pragma unroll\n+ for (unsigned delta = block_dim_y >> 1; delta >= 1; delta >>= 1) {\n+ reg_dg += WARP_SHFL_XOR(reg_dg, delta, kWarpSize);\n+ reg_db += WARP_SHFL_XOR(reg_db, delta, kWarpSize);\n+ }\n+ // Reduce is done. Now write it out to global memory.\n+ int64_t out_index = blockIdx.x * block_dim_x + i;\n+ if (threadIdx.x == 0 && (aligned_grid || out_index < N)) {\n+ if (dg) {\n+ dg[out_index] = reg_dg;\n }\n- __syncthreads();\n+ if (db) {\n+ db[out_index] = reg_db;\n+ }\n+ }\n }\n+ }\n+}\n \n- if (threadIdx.y == 0) {\n- if (dg) {\n- dg[j] = s_dg[threadIdx.x];\n- }\n- if (db) {\n- db[j] = s_db[threadIdx.x];\n- }\n+template\n+void LaunchAndCheckGammaBetaBackwardKernel(\n+ bool aligned_grid,\n+ dim3 blocks,\n+ dim3 threads,\n+ size_t shmem_sz,\n+ cudaStream_t cuda_stream,\n+ const T* dY_data,\n+ const T* X_data,\n+ const T_ACC* mean_data,\n+ const T_ACC* rstd_data,\n+ int64_t M,\n+ int64_t N,\n+ T* dgamma_data,\n+ T* dbeta_data) {\n+if (aligned_grid) {\n+ GammaBetaBackwardCUDAKernelTemplate\n+ <<>>(\n+ M,\n+ N,\n+ dY_data,\n+ X_data,\n+ mean_data,\n+ rstd_data,\n+ dgamma_data,\n+ dbeta_data);\n+ } else {\n+ GammaBetaBackwardCUDAKernelTemplate\n+ <<>>(\n+ M,\n+ N,\n+ dY_data,\n+ X_data,\n+ mean_data,\n+ rstd_data,\n+ dgamma_data,\n+ dbeta_data);\n+ }\n+ C10_CUDA_KERNEL_LAUNCH_CHECK();\n+}\n+\n+template\n+void ConfigureAndLaunchGammaBetaBackwardKernel(\n+ const T* dY_data,\n+ const T* X_data,\n+ const T_ACC* mean_data,\n+ const T_ACC* rstd_data,\n+ int64_t M,\n+ int64_t N,\n+ Tensor* dgamma,\n+ Tensor* dbeta,\n+ cudaStream_t cuda_stream) {\n+ T* dgamma_data =\n+ dgamma->defined() ? dgamma->template data_ptr() : nullptr;\n+ T* dbeta_data = dbeta->defined() ? dbeta->template data_ptr() : nullptr;\n+ bool aligned_grid = (M % rows_per_block_y == 0) && (N % block_dim_x == 0);\n+ dim3 threads{block_dim_x, block_dim_y};\n+ dim3 blocks;\n+ blocks.x = (N + block_dim_x - 1) / block_dim_x;\n+ blocks.y = 1;\n+ size_t shmem_sz = (block_dim_x + 1) * block_dim_y * sizeof(T_ACC) * 2;\n+ if (blocks.y == 1 && threads.y == 1) {\n+ // Optimization: since there is just one thread doing all the summation, we don't need a reduction\n+ // across threads. So we set partial_reduction to true.\n+ LaunchAndCheckGammaBetaBackwardKernel(\n+ aligned_grid, blocks, threads, shmem_sz, cuda_stream, dY_data, X_data, mean_data, rstd_data, M, N, dgamma_data, dbeta_data);\n+ } else {\n+ LaunchAndCheckGammaBetaBackwardKernel(\n+ aligned_grid, blocks, threads, shmem_sz, cuda_stream, dY_data, X_data, mean_data, rstd_data, M, N, dgamma_data, dbeta_data);\n+ }\n+\n+}\n+\n+template\n+void LaunchGammaBetaBackwardCUDAKernel(\n+ const T* dY_data,\n+ const T* X_data,\n+ const T_ACC* mean_data,\n+ const T_ACC* rstd_data,\n+ int64_t M,\n+ int64_t N,\n+ Tensor* dgamma,\n+ Tensor* dbeta,\n+ cudaStream_t cuda_stream) {\n+ if (M > 64 * 1024 && N / kWarpSize < 64) {\n+ // We have a situation where M >> N and N is small.\n+ // In this case we can speed up the computation by parallelizing in the M dimension.\n+ // We launch multiple blocks in the y-dimension, and compute partial sums for the\n+ // gradient in the first pass. Then we do a .sum(0) to do a final reduction.\n+ // Although we launch 2 kernels, we can get up to a 10x speedup for large M.\n+ constexpr int block_dim_x = 32;\n+ constexpr int block_dim_y = 1;\n+ constexpr int rows_per_block_y = 32;\n+ bool aligned_grid = (M % rows_per_block_y == 0) && (N % block_dim_x == 0);\n+ dim3 threads{block_dim_x, block_dim_y};\n+ dim3 blocks;\n+ blocks.x = (N + block_dim_x - 1) / block_dim_x;\n+ // int rows_per_block = my_gamma_beta_unroll_factor *\n+ blocks.y = (M + rows_per_block_y - 1) / rows_per_block_y;\n+ constexpr int max_grid_size = 64 * 1024 / 2;\n+ blocks.y = std::min(max_grid_size / blocks.x, blocks.y);\n+ auto options = dgamma->options();\n+ Tensor dgamma_blocks;\n+ Tensor dbeta_blocks;\n+ T * dgamma_blocks_ptr = nullptr;\n+ T * dbeta_blocks_ptr = nullptr;\n+ if (dgamma->defined()) {\n+ dgamma_blocks = at::zeros({blocks.y * threads.y, dgamma->size(-1)}, options);\n+ dgamma_blocks_ptr = dgamma_blocks.data_ptr();\n+ }\n+ if (dbeta->defined()) {\n+ dbeta_blocks = at::zeros({blocks.y * threads.y, dgamma->size(-1)}, options);\n+ dbeta_blocks_ptr = dbeta_blocks.data_ptr();\n+ }\n+ LaunchAndCheckGammaBetaBackwardKernel(\n+ aligned_grid, blocks, threads, 0, cuda_stream, dY_data, X_data, mean_data, rstd_data, M, N, dgamma_blocks_ptr, dbeta_blocks_ptr);\n+\n+ *dgamma = dgamma_blocks.sum(0);\n+ *dbeta = dbeta_blocks.sum(0);\n+ } else {\n+ // We are in the normal case where M is not that large.\n+ // We can change the tile shape (which is the last template parameter) in accordance with M.\n+ // For small M it is faster to have a smaller tile, otherwise we could have idle threads.\n+ // For larger M we use a bigger tile size.\n+ if (M < 64) {", - "comment_created_at": "2025-03-20T21:12:03+00:00", - "comment_author": "ahmadsharif1", - "comment_body": "For completeness, I did more experiments with smaller values of M and N and a lot more datapoints.\r\n\r\nWithout specialization:\r\n![image](https://github.com/user-attachments/assets/1fc62865-3f7b-4f12-97ed-01e2eaba4cc1)\r\n\r\n\r\nWith specialization:\r\n![image](https://github.com/user-attachments/assets/5325ed92-cc36-4f2a-a137-0edbeeb0ccbd)\r\n\r\n\r\nYou can observe that the \"short and wide\" region regressions go away with specialization (basically large N and small M which is at the bottom right of the chart).\r\n\r\nThere are still some regressions left, but those are with very small values of N (like 4 or 8 -- not sure if those shapes are used much).", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2033537678", - "pr_number": 150218, - "pr_file": "aten/src/ATen/DLConvertor.cpp", - "created_at": "2025-04-08T16:03:52+00:00", - "commented_code": "return fromDLPackImpl(src, std::move(deleter));\n}\n\nTensor maybeCopyTensor(\n const Tensor& data,\n std::optional optional_dl_device,\n std::optional copy) {\n bool force_copy = copy.has_value() && *copy;\n bool force_move = copy.has_value() && !*copy;\n\n if (optional_dl_device.has_value()) {\n auto device = at::getATenDevice(optional_dl_device->device_type, static_cast(optional_dl_device->device_id));\n\n if (device != data.device()) {\n TORCH_CHECK(\n !force_move,\n \"cannot move tensor from \",\n data.device(),\n \" to \",\n device,\n \" without copying. Set copy=True is needed.\");\n return data.to(device);\n }\n }\n\n if (force_copy) {\n return data.clone().detach();", - "repo_full_name": "pytorch/pytorch", - "discussion_comments": [ - { - "comment_id": "2033537678", - "repo_full_name": "pytorch/pytorch", - "pr_number": 150218, - "pr_file": "aten/src/ATen/DLConvertor.cpp", - "discussion_id": "2033537678", - "commented_code": "@@ -388,4 +389,33 @@ Tensor fromDLPackVersioned(DLManagedTensorVersioned* src, std::function(src, std::move(deleter));\n }\n \n+Tensor maybeCopyTensor(\n+ const Tensor& data,\n+ std::optional optional_dl_device,\n+ std::optional copy) {\n+ bool force_copy = copy.has_value() && *copy;\n+ bool force_move = copy.has_value() && !*copy;\n+\n+ if (optional_dl_device.has_value()) {\n+ auto device = at::getATenDevice(optional_dl_device->device_type, static_cast(optional_dl_device->device_id));\n+\n+ if (device != data.device()) {\n+ TORCH_CHECK(\n+ !force_move,\n+ \"cannot move tensor from \",\n+ data.device(),\n+ \" to \",\n+ device,\n+ \" without copying. Set copy=True is needed.\");\n+ return data.to(device);\n+ }\n+ }\n+\n+ if (force_copy) {\n+ return data.clone().detach();", - "comment_created_at": "2025-04-08T16:03:52+00:00", - "comment_author": "Skylion007", - "comment_body": "```suggestion\r\n return data.detach().clone();\r\n```\r\nThis will copy less data / be more efficient.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2063591267", - "pr_number": 152089, - "pr_file": "aten/src/ATen/core/dispatch/OperatorEntry.cpp", - "created_at": "2025-04-28T12:51:29+00:00", - "commented_code": "#endif\n}\n\nvoid OperatorEntry::unsafeSetTags(const std::vector& tags) {\n #ifndef C10_MOBILE\n tags_ = std::move(tags);\n #endif", - "repo_full_name": "pytorch/pytorch", - "discussion_comments": [ - { - "comment_id": "2063591267", - "repo_full_name": "pytorch/pytorch", - "pr_number": 152089, - "pr_file": "aten/src/ATen/core/dispatch/OperatorEntry.cpp", - "discussion_id": "2063591267", - "commented_code": "@@ -157,6 +157,12 @@ void OperatorEntry::registerSchema(FunctionSchema&& schema, std::string&& debug,\n #endif\n }\n \n+void OperatorEntry::unsafeSetTags(const std::vector& tags) {\n+ #ifndef C10_MOBILE\n+ tags_ = std::move(tags);\n+ #endif", - "comment_created_at": "2025-04-28T12:51:29+00:00", - "comment_author": "zou3519", - "comment_body": "I'm not that good with C++, why is this a std::move?", - "pr_file_module": null - }, - { - "comment_id": "2065568732", - "repo_full_name": "pytorch/pytorch", - "pr_number": 152089, - "pr_file": "aten/src/ATen/core/dispatch/OperatorEntry.cpp", - "discussion_id": "2063591267", - "commented_code": "@@ -157,6 +157,12 @@ void OperatorEntry::registerSchema(FunctionSchema&& schema, std::string&& debug,\n #endif\n }\n \n+void OperatorEntry::unsafeSetTags(const std::vector& tags) {\n+ #ifndef C10_MOBILE\n+ tags_ = std::move(tags);\n+ #endif", - "comment_created_at": "2025-04-29T06:17:35+00:00", - "comment_author": "jijiew", - "comment_body": "it avoids unnecessary copy", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/pytorch-optimize-workflow-resources.json b/_reviewers/pytorch-optimize-workflow-resources.json new file mode 100644 index 0000000..bc90cc1 --- /dev/null +++ b/_reviewers/pytorch-optimize-workflow-resources.json @@ -0,0 +1,164 @@ +[ + { + "discussion_id": "2102587511", + "pr_number": 152298, + "pr_file": ".github/workflows/inductor-perf-test-nightly-x86.yml", + "created_at": "2025-05-22T13:37:39+00:00", + "commented_code": "selected-test-configs: ${{ inputs.benchmark_configs }}\n secrets: inherit\n\n\n linux-jammy-cpu-py3_9-gcc11-inductor-test-nightly:\n name: linux-jammy-cpu-py3.9-gcc11-inductor\n uses: ./.github/workflows/_linux-test.yml\n needs: linux-jammy-cpu-py3_9-gcc11-inductor-build\n if: github.event.schedule == '0 7 * * *'\n if: github.event.schedule == '0 7 * * *' || github.event_name == 'pull_request'\n with:\n build-environment: linux-jammy-py3.9-gcc11-build\n dashboard-tag: training-false-inference-true-default-true-dynamic-true-cppwrapper-true-aotinductor-true\n dashboard-tag: training-false-inference-true-default-true-default_freezing-true-dynamic-true-dynamic_freezing-true-cppwrapper-true-cppwrapper_freezing-true-aotinductor-true-aotinductor_freezing-true", + "repo_full_name": "pytorch/pytorch", + "discussion_comments": [ + { + "comment_id": "2102587511", + "repo_full_name": "pytorch/pytorch", + "pr_number": 152298, + "pr_file": ".github/workflows/inductor-perf-test-nightly-x86.yml", + "discussion_id": "2102587511", + "commented_code": "@@ -90,15 +98,14 @@ jobs:\n selected-test-configs: ${{ inputs.benchmark_configs }}\n secrets: inherit\n \n-\n linux-jammy-cpu-py3_9-gcc11-inductor-test-nightly:\n name: linux-jammy-cpu-py3.9-gcc11-inductor\n uses: ./.github/workflows/_linux-test.yml\n needs: linux-jammy-cpu-py3_9-gcc11-inductor-build\n- if: github.event.schedule == '0 7 * * *'\n+ if: github.event.schedule == '0 7 * * *' || github.event_name == 'pull_request'\n with:\n build-environment: linux-jammy-py3.9-gcc11-build\n- dashboard-tag: training-false-inference-true-default-true-dynamic-true-cppwrapper-true-aotinductor-true\n+ dashboard-tag: training-false-inference-true-default-true-default_freezing-true-dynamic-true-dynamic_freezing-true-cppwrapper-true-cppwrapper_freezing-true-aotinductor-true-aotinductor_freezing-true", + "comment_created_at": "2025-05-22T13:37:39+00:00", + "comment_author": "desertfire", + "comment_body": "Again, all you need to is to add a single `freeing-true` here.", + "pr_file_module": null + }, + { + "comment_id": "2103755948", + "repo_full_name": "pytorch/pytorch", + "pr_number": 152298, + "pr_file": ".github/workflows/inductor-perf-test-nightly-x86.yml", + "discussion_id": "2102587511", + "commented_code": "@@ -90,15 +98,14 @@ jobs:\n selected-test-configs: ${{ inputs.benchmark_configs }}\n secrets: inherit\n \n-\n linux-jammy-cpu-py3_9-gcc11-inductor-test-nightly:\n name: linux-jammy-cpu-py3.9-gcc11-inductor\n uses: ./.github/workflows/_linux-test.yml\n needs: linux-jammy-cpu-py3_9-gcc11-inductor-build\n- if: github.event.schedule == '0 7 * * *'\n+ if: github.event.schedule == '0 7 * * *' || github.event_name == 'pull_request'\n with:\n build-environment: linux-jammy-py3.9-gcc11-build\n- dashboard-tag: training-false-inference-true-default-true-dynamic-true-cppwrapper-true-aotinductor-true\n+ dashboard-tag: training-false-inference-true-default-true-default_freezing-true-dynamic-true-dynamic_freezing-true-cppwrapper-true-cppwrapper_freezing-true-aotinductor-true-aotinductor_freezing-true", + "comment_created_at": "2025-05-23T04:11:10+00:00", + "comment_author": "LifengWang", + "comment_body": "Sure. Now I have added a new CI workflow for the tests of the freezing models. So that we can run both the default and the freezing models in the CPU nightly test.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2113851339", + "pr_number": 152298, + "pr_file": ".github/workflows/inductor-perf-test-nightly-x86.yml", + "created_at": "2025-05-29T12:33:16+00:00", + "commented_code": "monitor-data-collect-interval: 4\n secrets: inherit\n\n linux-jammy-cpu-py3_9-gcc11-inductor-test-nightly-freezing:\n name: linux-jammy-cpu-py3.9-gcc11-inductor\n uses: ./.github/workflows/_linux-test.yml\n needs: linux-jammy-cpu-py3_9-gcc11-inductor-build\n if: github.event.schedule == '0 7 * * *' || github.event_name == 'pull_request'", + "repo_full_name": "pytorch/pytorch", + "discussion_comments": [ + { + "comment_id": "2113851339", + "repo_full_name": "pytorch/pytorch", + "pr_number": 152298, + "pr_file": ".github/workflows/inductor-perf-test-nightly-x86.yml", + "discussion_id": "2113851339", + "commented_code": "@@ -108,6 +115,22 @@ jobs:\n monitor-data-collect-interval: 4\n secrets: inherit\n \n+ linux-jammy-cpu-py3_9-gcc11-inductor-test-nightly-freezing:\n+ name: linux-jammy-cpu-py3.9-gcc11-inductor\n+ uses: ./.github/workflows/_linux-test.yml\n+ needs: linux-jammy-cpu-py3_9-gcc11-inductor-build\n+ if: github.event.schedule == '0 7 * * *' || github.event_name == 'pull_request'", + "comment_created_at": "2025-05-29T12:33:16+00:00", + "comment_author": "desertfire", + "comment_body": "This means we will test both freezing ON and OFF every night. Is this really we want here?", + "pr_file_module": null + }, + { + "comment_id": "2113899127", + "repo_full_name": "pytorch/pytorch", + "pr_number": 152298, + "pr_file": ".github/workflows/inductor-perf-test-nightly-x86.yml", + "discussion_id": "2113851339", + "commented_code": "@@ -108,6 +115,22 @@ jobs:\n monitor-data-collect-interval: 4\n secrets: inherit\n \n+ linux-jammy-cpu-py3_9-gcc11-inductor-test-nightly-freezing:\n+ name: linux-jammy-cpu-py3.9-gcc11-inductor\n+ uses: ./.github/workflows/_linux-test.yml\n+ needs: linux-jammy-cpu-py3_9-gcc11-inductor-build\n+ if: github.event.schedule == '0 7 * * *' || github.event_name == 'pull_request'", + "comment_created_at": "2025-05-29T13:02:31+00:00", + "comment_author": "LifengWang", + "comment_body": "Yes, I remember that you mentioned that we need to keep the default test. Maybe we can just run a few tests with freezing off?", + "pr_file_module": null + }, + { + "comment_id": "2114052120", + "repo_full_name": "pytorch/pytorch", + "pr_number": 152298, + "pr_file": ".github/workflows/inductor-perf-test-nightly-x86.yml", + "discussion_id": "2113851339", + "commented_code": "@@ -108,6 +115,22 @@ jobs:\n monitor-data-collect-interval: 4\n secrets: inherit\n \n+ linux-jammy-cpu-py3_9-gcc11-inductor-test-nightly-freezing:\n+ name: linux-jammy-cpu-py3.9-gcc11-inductor\n+ uses: ./.github/workflows/_linux-test.yml\n+ needs: linux-jammy-cpu-py3_9-gcc11-inductor-build\n+ if: github.event.schedule == '0 7 * * *' || github.event_name == 'pull_request'", + "comment_created_at": "2025-05-29T14:05:01+00:00", + "comment_author": "desertfire", + "comment_body": "Did you trigger a dashboard run? Do you have a dashboard link shows how the data is going to be displayed?", + "pr_file_module": null + }, + { + "comment_id": "2115081221", + "repo_full_name": "pytorch/pytorch", + "pr_number": 152298, + "pr_file": ".github/workflows/inductor-perf-test-nightly-x86.yml", + "discussion_id": "2113851339", + "commented_code": "@@ -108,6 +115,22 @@ jobs:\n monitor-data-collect-interval: 4\n secrets: inherit\n \n+ linux-jammy-cpu-py3_9-gcc11-inductor-test-nightly-freezing:\n+ name: linux-jammy-cpu-py3.9-gcc11-inductor\n+ uses: ./.github/workflows/_linux-test.yml\n+ needs: linux-jammy-cpu-py3_9-gcc11-inductor-build\n+ if: github.event.schedule == '0 7 * * *' || github.event_name == 'pull_request'", + "comment_created_at": "2025-05-30T03:32:34+00:00", + "comment_author": "LifengWang", + "comment_body": "Hi @desertfire. I checked the dashboard page for the performance data related to freezing using the following [link](https://hud.pytorch.org/benchmark/compilers) . Set the time range to the last 14 days, precision to AMP, and device to CPU (x86).\r\nIt appears that the freezing test configuration is already included in the dashboard, but no performance data is being displayed.\r\n![image](https://github.com/user-attachments/assets/56df2d1a-1234-4b24-886a-b95e46da0cda)\r\n\r\n\r\n", + "pr_file_module": null + }, + { + "comment_id": "2116346509", + "repo_full_name": "pytorch/pytorch", + "pr_number": 152298, + "pr_file": ".github/workflows/inductor-perf-test-nightly-x86.yml", + "discussion_id": "2113851339", + "commented_code": "@@ -108,6 +115,22 @@ jobs:\n monitor-data-collect-interval: 4\n secrets: inherit\n \n+ linux-jammy-cpu-py3_9-gcc11-inductor-test-nightly-freezing:\n+ name: linux-jammy-cpu-py3.9-gcc11-inductor\n+ uses: ./.github/workflows/_linux-test.yml\n+ needs: linux-jammy-cpu-py3_9-gcc11-inductor-build\n+ if: github.event.schedule == '0 7 * * *' || github.event_name == 'pull_request'", + "comment_created_at": "2025-05-30T17:55:09+00:00", + "comment_author": "desertfire", + "comment_body": "OK, I see two problems here:\r\n\r\n1. The landing page of the OSS dashboard defaults to bf16 for inference. If a user selects x86 as the inference backend, they will see an empty page thinking the system is down or something.\r\n2. If you think freezing should be default, we should avoid testing the non-freezing one to cut the hardware expense. Also the names with freezing on is too long which should be fixed.\r\n\r\nUI fixes need to be done in https://github.com/pytorch/test-infra.\r\n\r\n", + "pr_file_module": null + }, + { + "comment_id": "2158256290", + "repo_full_name": "pytorch/pytorch", + "pr_number": 152298, + "pr_file": ".github/workflows/inductor-perf-test-nightly-x86.yml", + "discussion_id": "2113851339", + "commented_code": "@@ -108,6 +115,22 @@ jobs:\n monitor-data-collect-interval: 4\n secrets: inherit\n \n+ linux-jammy-cpu-py3_9-gcc11-inductor-test-nightly-freezing:\n+ name: linux-jammy-cpu-py3.9-gcc11-inductor\n+ uses: ./.github/workflows/_linux-test.yml\n+ needs: linux-jammy-cpu-py3_9-gcc11-inductor-build\n+ if: github.event.schedule == '0 7 * * *' || github.event_name == 'pull_request'", + "comment_created_at": "2025-06-20T07:37:01+00:00", + "comment_author": "LifengWang", + "comment_body": "Hi, @desertfire. Now I have removed the non-freezing test, and the test results in the HUD are as follows. Would it make sense to update the CI tests in this PR first, and address the dashboard changes in a follow-up?\r\n\r\n![image](https://github.com/user-attachments/assets/41762078-2102-4003-b5ee-5fec5c944e65)\r\n", + "pr_file_module": null + }, + { + "comment_id": "2167903726", + "repo_full_name": "pytorch/pytorch", + "pr_number": 152298, + "pr_file": ".github/workflows/inductor-perf-test-nightly-x86.yml", + "discussion_id": "2113851339", + "commented_code": "@@ -108,6 +115,22 @@ jobs:\n monitor-data-collect-interval: 4\n secrets: inherit\n \n+ linux-jammy-cpu-py3_9-gcc11-inductor-test-nightly-freezing:\n+ name: linux-jammy-cpu-py3.9-gcc11-inductor\n+ uses: ./.github/workflows/_linux-test.yml\n+ needs: linux-jammy-cpu-py3_9-gcc11-inductor-build\n+ if: github.event.schedule == '0 7 * * *' || github.event_name == 'pull_request'", + "comment_created_at": "2025-06-26T01:19:13+00:00", + "comment_author": "LifengWang", + "comment_body": "Hi, @huydhn, do you have any insights for the dashboard update? Since we want to update the CPU nightly test using the freezing model.", + "pr_file_module": null + }, + { + "comment_id": "2172574300", + "repo_full_name": "pytorch/pytorch", + "pr_number": 152298, + "pr_file": ".github/workflows/inductor-perf-test-nightly-x86.yml", + "discussion_id": "2113851339", + "commented_code": "@@ -108,6 +115,22 @@ jobs:\n monitor-data-collect-interval: 4\n secrets: inherit\n \n+ linux-jammy-cpu-py3_9-gcc11-inductor-test-nightly-freezing:\n+ name: linux-jammy-cpu-py3.9-gcc11-inductor\n+ uses: ./.github/workflows/_linux-test.yml\n+ needs: linux-jammy-cpu-py3_9-gcc11-inductor-build\n+ if: github.event.schedule == '0 7 * * *' || github.event_name == 'pull_request'", + "comment_created_at": "2025-06-27T18:02:08+00:00", + "comment_author": "huydhn", + "comment_body": "Sorry for not seeing your message earlier! It's ok to land this first and update the dashboard later\r\n\r\ncc @yangw-dev ", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2172999953", + "pr_number": 157181, + "pr_file": ".github/workflows/h100-symm-mem.yml", + "created_at": "2025-06-28T00:15:23+00:00", + "commented_code": "name: Limited CI for symmetric memory tests on H100\n\non:\n pull_request:\n paths:\n - .github/workflows/h100-symm-mem.yml\n workflow_dispatch:\n push:\n tags:\n - ciflow/h100-symm-mem/*\n schedule:\n - cron: 22 8 * * * # about 1:22am PDT\n\nconcurrency:\n group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }}\n cancel-in-progress: true\n\njobs:\n\n get-label-type:\n if: github.repository_owner == 'pytorch'\n name: get-label-type\n uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@main\n with:\n triggering_actor: ${{ github.triggering_actor }}\n issue_owner: ${{ github.event.pull_request.user.login || github.event.issue.user.login }}\n curr_branch: ${{ github.head_ref || github.ref_name }}\n curr_ref_type: ${{ github.ref_type }}\n\n linux-jammy-cuda12_8-py3_10-gcc11-sm90-build-symm:\n name: linux-jammy-cuda12.8-py3.10-gcc11-sm90-symm\n uses: ./.github/workflows/_linux-build.yml\n needs: get-label-type\n with:\n runner_prefix: \"${{ needs.get-label-type.outputs.label-type }}\"\n runner: \"linux.12xlarge\"", + "repo_full_name": "pytorch/pytorch", + "discussion_comments": [ + { + "comment_id": "2172999953", + "repo_full_name": "pytorch/pytorch", + "pr_number": 157181, + "pr_file": ".github/workflows/h100-symm-mem.yml", + "discussion_id": "2172999953", + "commented_code": "@@ -0,0 +1,55 @@\n+name: Limited CI for symmetric memory tests on H100\n+\n+on:\n+ pull_request:\n+ paths:\n+ - .github/workflows/h100-symm-mem.yml\n+ workflow_dispatch:\n+ push:\n+ tags:\n+ - ciflow/h100-symm-mem/*\n+ schedule:\n+ - cron: 22 8 * * * # about 1:22am PDT\n+\n+concurrency:\n+ group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }}\n+ cancel-in-progress: true\n+\n+jobs:\n+\n+ get-label-type:\n+ if: github.repository_owner == 'pytorch'\n+ name: get-label-type\n+ uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@main\n+ with:\n+ triggering_actor: ${{ github.triggering_actor }}\n+ issue_owner: ${{ github.event.pull_request.user.login || github.event.issue.user.login }}\n+ curr_branch: ${{ github.head_ref || github.ref_name }}\n+ curr_ref_type: ${{ github.ref_type }}\n+\n+ linux-jammy-cuda12_8-py3_10-gcc11-sm90-build-symm:\n+ name: linux-jammy-cuda12.8-py3.10-gcc11-sm90-symm\n+ uses: ./.github/workflows/_linux-build.yml\n+ needs: get-label-type\n+ with:\n+ runner_prefix: \"${{ needs.get-label-type.outputs.label-type }}\"\n+ runner: \"linux.12xlarge\"", + "comment_created_at": "2025-06-28T00:15:23+00:00", + "comment_author": "huydhn", + "comment_body": "Nit: I'm not sure why we need `12xlarge` here besides the fact that https://github.com/pytorch/pytorch/blame/main/.github/workflows/h100-distributed.yml also has it. I don't think you need to set the runner here at all ", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/pytorch-optimize-workflow-resources.md b/_reviewers/pytorch-optimize-workflow-resources.md index ee16d73..5554ed3 100644 --- a/_reviewers/pytorch-optimize-workflow-resources.md +++ b/_reviewers/pytorch-optimize-workflow-resources.md @@ -47,169 +47,3 @@ When designing CI/CD workflows, optimize resource usage while maintaining approp - Make smaller, focused PRs that address one concern at a time This practice ensures cost-effective use of CI/CD resources, faster build times, and more maintainable workflow definitions. - - -[ - { - "discussion_id": "2102587511", - "pr_number": 152298, - "pr_file": ".github/workflows/inductor-perf-test-nightly-x86.yml", - "created_at": "2025-05-22T13:37:39+00:00", - "commented_code": "selected-test-configs: ${{ inputs.benchmark_configs }}\n secrets: inherit\n\n\n linux-jammy-cpu-py3_9-gcc11-inductor-test-nightly:\n name: linux-jammy-cpu-py3.9-gcc11-inductor\n uses: ./.github/workflows/_linux-test.yml\n needs: linux-jammy-cpu-py3_9-gcc11-inductor-build\n if: github.event.schedule == '0 7 * * *'\n if: github.event.schedule == '0 7 * * *' || github.event_name == 'pull_request'\n with:\n build-environment: linux-jammy-py3.9-gcc11-build\n dashboard-tag: training-false-inference-true-default-true-dynamic-true-cppwrapper-true-aotinductor-true\n dashboard-tag: training-false-inference-true-default-true-default_freezing-true-dynamic-true-dynamic_freezing-true-cppwrapper-true-cppwrapper_freezing-true-aotinductor-true-aotinductor_freezing-true", - "repo_full_name": "pytorch/pytorch", - "discussion_comments": [ - { - "comment_id": "2102587511", - "repo_full_name": "pytorch/pytorch", - "pr_number": 152298, - "pr_file": ".github/workflows/inductor-perf-test-nightly-x86.yml", - "discussion_id": "2102587511", - "commented_code": "@@ -90,15 +98,14 @@ jobs:\n selected-test-configs: ${{ inputs.benchmark_configs }}\n secrets: inherit\n \n-\n linux-jammy-cpu-py3_9-gcc11-inductor-test-nightly:\n name: linux-jammy-cpu-py3.9-gcc11-inductor\n uses: ./.github/workflows/_linux-test.yml\n needs: linux-jammy-cpu-py3_9-gcc11-inductor-build\n- if: github.event.schedule == '0 7 * * *'\n+ if: github.event.schedule == '0 7 * * *' || github.event_name == 'pull_request'\n with:\n build-environment: linux-jammy-py3.9-gcc11-build\n- dashboard-tag: training-false-inference-true-default-true-dynamic-true-cppwrapper-true-aotinductor-true\n+ dashboard-tag: training-false-inference-true-default-true-default_freezing-true-dynamic-true-dynamic_freezing-true-cppwrapper-true-cppwrapper_freezing-true-aotinductor-true-aotinductor_freezing-true", - "comment_created_at": "2025-05-22T13:37:39+00:00", - "comment_author": "desertfire", - "comment_body": "Again, all you need to is to add a single `freeing-true` here.", - "pr_file_module": null - }, - { - "comment_id": "2103755948", - "repo_full_name": "pytorch/pytorch", - "pr_number": 152298, - "pr_file": ".github/workflows/inductor-perf-test-nightly-x86.yml", - "discussion_id": "2102587511", - "commented_code": "@@ -90,15 +98,14 @@ jobs:\n selected-test-configs: ${{ inputs.benchmark_configs }}\n secrets: inherit\n \n-\n linux-jammy-cpu-py3_9-gcc11-inductor-test-nightly:\n name: linux-jammy-cpu-py3.9-gcc11-inductor\n uses: ./.github/workflows/_linux-test.yml\n needs: linux-jammy-cpu-py3_9-gcc11-inductor-build\n- if: github.event.schedule == '0 7 * * *'\n+ if: github.event.schedule == '0 7 * * *' || github.event_name == 'pull_request'\n with:\n build-environment: linux-jammy-py3.9-gcc11-build\n- dashboard-tag: training-false-inference-true-default-true-dynamic-true-cppwrapper-true-aotinductor-true\n+ dashboard-tag: training-false-inference-true-default-true-default_freezing-true-dynamic-true-dynamic_freezing-true-cppwrapper-true-cppwrapper_freezing-true-aotinductor-true-aotinductor_freezing-true", - "comment_created_at": "2025-05-23T04:11:10+00:00", - "comment_author": "LifengWang", - "comment_body": "Sure. Now I have added a new CI workflow for the tests of the freezing models. So that we can run both the default and the freezing models in the CPU nightly test.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2113851339", - "pr_number": 152298, - "pr_file": ".github/workflows/inductor-perf-test-nightly-x86.yml", - "created_at": "2025-05-29T12:33:16+00:00", - "commented_code": "monitor-data-collect-interval: 4\n secrets: inherit\n\n linux-jammy-cpu-py3_9-gcc11-inductor-test-nightly-freezing:\n name: linux-jammy-cpu-py3.9-gcc11-inductor\n uses: ./.github/workflows/_linux-test.yml\n needs: linux-jammy-cpu-py3_9-gcc11-inductor-build\n if: github.event.schedule == '0 7 * * *' || github.event_name == 'pull_request'", - "repo_full_name": "pytorch/pytorch", - "discussion_comments": [ - { - "comment_id": "2113851339", - "repo_full_name": "pytorch/pytorch", - "pr_number": 152298, - "pr_file": ".github/workflows/inductor-perf-test-nightly-x86.yml", - "discussion_id": "2113851339", - "commented_code": "@@ -108,6 +115,22 @@ jobs:\n monitor-data-collect-interval: 4\n secrets: inherit\n \n+ linux-jammy-cpu-py3_9-gcc11-inductor-test-nightly-freezing:\n+ name: linux-jammy-cpu-py3.9-gcc11-inductor\n+ uses: ./.github/workflows/_linux-test.yml\n+ needs: linux-jammy-cpu-py3_9-gcc11-inductor-build\n+ if: github.event.schedule == '0 7 * * *' || github.event_name == 'pull_request'", - "comment_created_at": "2025-05-29T12:33:16+00:00", - "comment_author": "desertfire", - "comment_body": "This means we will test both freezing ON and OFF every night. Is this really we want here?", - "pr_file_module": null - }, - { - "comment_id": "2113899127", - "repo_full_name": "pytorch/pytorch", - "pr_number": 152298, - "pr_file": ".github/workflows/inductor-perf-test-nightly-x86.yml", - "discussion_id": "2113851339", - "commented_code": "@@ -108,6 +115,22 @@ jobs:\n monitor-data-collect-interval: 4\n secrets: inherit\n \n+ linux-jammy-cpu-py3_9-gcc11-inductor-test-nightly-freezing:\n+ name: linux-jammy-cpu-py3.9-gcc11-inductor\n+ uses: ./.github/workflows/_linux-test.yml\n+ needs: linux-jammy-cpu-py3_9-gcc11-inductor-build\n+ if: github.event.schedule == '0 7 * * *' || github.event_name == 'pull_request'", - "comment_created_at": "2025-05-29T13:02:31+00:00", - "comment_author": "LifengWang", - "comment_body": "Yes, I remember that you mentioned that we need to keep the default test. Maybe we can just run a few tests with freezing off?", - "pr_file_module": null - }, - { - "comment_id": "2114052120", - "repo_full_name": "pytorch/pytorch", - "pr_number": 152298, - "pr_file": ".github/workflows/inductor-perf-test-nightly-x86.yml", - "discussion_id": "2113851339", - "commented_code": "@@ -108,6 +115,22 @@ jobs:\n monitor-data-collect-interval: 4\n secrets: inherit\n \n+ linux-jammy-cpu-py3_9-gcc11-inductor-test-nightly-freezing:\n+ name: linux-jammy-cpu-py3.9-gcc11-inductor\n+ uses: ./.github/workflows/_linux-test.yml\n+ needs: linux-jammy-cpu-py3_9-gcc11-inductor-build\n+ if: github.event.schedule == '0 7 * * *' || github.event_name == 'pull_request'", - "comment_created_at": "2025-05-29T14:05:01+00:00", - "comment_author": "desertfire", - "comment_body": "Did you trigger a dashboard run? Do you have a dashboard link shows how the data is going to be displayed?", - "pr_file_module": null - }, - { - "comment_id": "2115081221", - "repo_full_name": "pytorch/pytorch", - "pr_number": 152298, - "pr_file": ".github/workflows/inductor-perf-test-nightly-x86.yml", - "discussion_id": "2113851339", - "commented_code": "@@ -108,6 +115,22 @@ jobs:\n monitor-data-collect-interval: 4\n secrets: inherit\n \n+ linux-jammy-cpu-py3_9-gcc11-inductor-test-nightly-freezing:\n+ name: linux-jammy-cpu-py3.9-gcc11-inductor\n+ uses: ./.github/workflows/_linux-test.yml\n+ needs: linux-jammy-cpu-py3_9-gcc11-inductor-build\n+ if: github.event.schedule == '0 7 * * *' || github.event_name == 'pull_request'", - "comment_created_at": "2025-05-30T03:32:34+00:00", - "comment_author": "LifengWang", - "comment_body": "Hi @desertfire. I checked the dashboard page for the performance data related to freezing using the following [link](https://hud.pytorch.org/benchmark/compilers) . Set the time range to the last 14 days, precision to AMP, and device to CPU (x86).\r\nIt appears that the freezing test configuration is already included in the dashboard, but no performance data is being displayed.\r\n![image](https://github.com/user-attachments/assets/56df2d1a-1234-4b24-886a-b95e46da0cda)\r\n\r\n\r\n", - "pr_file_module": null - }, - { - "comment_id": "2116346509", - "repo_full_name": "pytorch/pytorch", - "pr_number": 152298, - "pr_file": ".github/workflows/inductor-perf-test-nightly-x86.yml", - "discussion_id": "2113851339", - "commented_code": "@@ -108,6 +115,22 @@ jobs:\n monitor-data-collect-interval: 4\n secrets: inherit\n \n+ linux-jammy-cpu-py3_9-gcc11-inductor-test-nightly-freezing:\n+ name: linux-jammy-cpu-py3.9-gcc11-inductor\n+ uses: ./.github/workflows/_linux-test.yml\n+ needs: linux-jammy-cpu-py3_9-gcc11-inductor-build\n+ if: github.event.schedule == '0 7 * * *' || github.event_name == 'pull_request'", - "comment_created_at": "2025-05-30T17:55:09+00:00", - "comment_author": "desertfire", - "comment_body": "OK, I see two problems here:\r\n\r\n1. The landing page of the OSS dashboard defaults to bf16 for inference. If a user selects x86 as the inference backend, they will see an empty page thinking the system is down or something.\r\n2. If you think freezing should be default, we should avoid testing the non-freezing one to cut the hardware expense. Also the names with freezing on is too long which should be fixed.\r\n\r\nUI fixes need to be done in https://github.com/pytorch/test-infra.\r\n\r\n", - "pr_file_module": null - }, - { - "comment_id": "2158256290", - "repo_full_name": "pytorch/pytorch", - "pr_number": 152298, - "pr_file": ".github/workflows/inductor-perf-test-nightly-x86.yml", - "discussion_id": "2113851339", - "commented_code": "@@ -108,6 +115,22 @@ jobs:\n monitor-data-collect-interval: 4\n secrets: inherit\n \n+ linux-jammy-cpu-py3_9-gcc11-inductor-test-nightly-freezing:\n+ name: linux-jammy-cpu-py3.9-gcc11-inductor\n+ uses: ./.github/workflows/_linux-test.yml\n+ needs: linux-jammy-cpu-py3_9-gcc11-inductor-build\n+ if: github.event.schedule == '0 7 * * *' || github.event_name == 'pull_request'", - "comment_created_at": "2025-06-20T07:37:01+00:00", - "comment_author": "LifengWang", - "comment_body": "Hi, @desertfire. Now I have removed the non-freezing test, and the test results in the HUD are as follows. Would it make sense to update the CI tests in this PR first, and address the dashboard changes in a follow-up\uff1f\r\n\r\n![image](https://github.com/user-attachments/assets/41762078-2102-4003-b5ee-5fec5c944e65)\r\n", - "pr_file_module": null - }, - { - "comment_id": "2167903726", - "repo_full_name": "pytorch/pytorch", - "pr_number": 152298, - "pr_file": ".github/workflows/inductor-perf-test-nightly-x86.yml", - "discussion_id": "2113851339", - "commented_code": "@@ -108,6 +115,22 @@ jobs:\n monitor-data-collect-interval: 4\n secrets: inherit\n \n+ linux-jammy-cpu-py3_9-gcc11-inductor-test-nightly-freezing:\n+ name: linux-jammy-cpu-py3.9-gcc11-inductor\n+ uses: ./.github/workflows/_linux-test.yml\n+ needs: linux-jammy-cpu-py3_9-gcc11-inductor-build\n+ if: github.event.schedule == '0 7 * * *' || github.event_name == 'pull_request'", - "comment_created_at": "2025-06-26T01:19:13+00:00", - "comment_author": "LifengWang", - "comment_body": "Hi, @huydhn, do you have any insights for the dashboard update? Since we want to update the CPU nightly test using the freezing model.", - "pr_file_module": null - }, - { - "comment_id": "2172574300", - "repo_full_name": "pytorch/pytorch", - "pr_number": 152298, - "pr_file": ".github/workflows/inductor-perf-test-nightly-x86.yml", - "discussion_id": "2113851339", - "commented_code": "@@ -108,6 +115,22 @@ jobs:\n monitor-data-collect-interval: 4\n secrets: inherit\n \n+ linux-jammy-cpu-py3_9-gcc11-inductor-test-nightly-freezing:\n+ name: linux-jammy-cpu-py3.9-gcc11-inductor\n+ uses: ./.github/workflows/_linux-test.yml\n+ needs: linux-jammy-cpu-py3_9-gcc11-inductor-build\n+ if: github.event.schedule == '0 7 * * *' || github.event_name == 'pull_request'", - "comment_created_at": "2025-06-27T18:02:08+00:00", - "comment_author": "huydhn", - "comment_body": "Sorry for not seeing your message earlier! It's ok to land this first and update the dashboard later\r\n\r\ncc @yangw-dev ", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2172999953", - "pr_number": 157181, - "pr_file": ".github/workflows/h100-symm-mem.yml", - "created_at": "2025-06-28T00:15:23+00:00", - "commented_code": "name: Limited CI for symmetric memory tests on H100\n\non:\n pull_request:\n paths:\n - .github/workflows/h100-symm-mem.yml\n workflow_dispatch:\n push:\n tags:\n - ciflow/h100-symm-mem/*\n schedule:\n - cron: 22 8 * * * # about 1:22am PDT\n\nconcurrency:\n group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }}\n cancel-in-progress: true\n\njobs:\n\n get-label-type:\n if: github.repository_owner == 'pytorch'\n name: get-label-type\n uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@main\n with:\n triggering_actor: ${{ github.triggering_actor }}\n issue_owner: ${{ github.event.pull_request.user.login || github.event.issue.user.login }}\n curr_branch: ${{ github.head_ref || github.ref_name }}\n curr_ref_type: ${{ github.ref_type }}\n\n linux-jammy-cuda12_8-py3_10-gcc11-sm90-build-symm:\n name: linux-jammy-cuda12.8-py3.10-gcc11-sm90-symm\n uses: ./.github/workflows/_linux-build.yml\n needs: get-label-type\n with:\n runner_prefix: \"${{ needs.get-label-type.outputs.label-type }}\"\n runner: \"linux.12xlarge\"", - "repo_full_name": "pytorch/pytorch", - "discussion_comments": [ - { - "comment_id": "2172999953", - "repo_full_name": "pytorch/pytorch", - "pr_number": 157181, - "pr_file": ".github/workflows/h100-symm-mem.yml", - "discussion_id": "2172999953", - "commented_code": "@@ -0,0 +1,55 @@\n+name: Limited CI for symmetric memory tests on H100\n+\n+on:\n+ pull_request:\n+ paths:\n+ - .github/workflows/h100-symm-mem.yml\n+ workflow_dispatch:\n+ push:\n+ tags:\n+ - ciflow/h100-symm-mem/*\n+ schedule:\n+ - cron: 22 8 * * * # about 1:22am PDT\n+\n+concurrency:\n+ group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }}\n+ cancel-in-progress: true\n+\n+jobs:\n+\n+ get-label-type:\n+ if: github.repository_owner == 'pytorch'\n+ name: get-label-type\n+ uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@main\n+ with:\n+ triggering_actor: ${{ github.triggering_actor }}\n+ issue_owner: ${{ github.event.pull_request.user.login || github.event.issue.user.login }}\n+ curr_branch: ${{ github.head_ref || github.ref_name }}\n+ curr_ref_type: ${{ github.ref_type }}\n+\n+ linux-jammy-cuda12_8-py3_10-gcc11-sm90-build-symm:\n+ name: linux-jammy-cuda12.8-py3.10-gcc11-sm90-symm\n+ uses: ./.github/workflows/_linux-build.yml\n+ needs: get-label-type\n+ with:\n+ runner_prefix: \"${{ needs.get-label-type.outputs.label-type }}\"\n+ runner: \"linux.12xlarge\"", - "comment_created_at": "2025-06-28T00:15:23+00:00", - "comment_author": "huydhn", - "comment_body": "Nit: I'm not sure why we need `12xlarge` here besides the fact that https://github.com/pytorch/pytorch/blame/main/.github/workflows/h100-distributed.yml also has it. I don't think you need to set the runner here at all ", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/pytorch-platform-specific-configuration-guards.json b/_reviewers/pytorch-platform-specific-configuration-guards.json new file mode 100644 index 0000000..ea99b89 --- /dev/null +++ b/_reviewers/pytorch-platform-specific-configuration-guards.json @@ -0,0 +1,220 @@ +[ + { + "discussion_id": "2168294829", + "pr_number": 156097, + "pr_file": "c10/cuda/driver_api.h", + "created_at": "2025-06-26T06:47:56+00:00", + "commented_code": "} \\\n } while (0)\n\n#define C10_LIBCUDA_DRIVER_API(_) \\\n _(cuDeviceGetAttribute) \\\n _(cuMemAddressReserve) \\\n _(cuMemRelease) \\\n _(cuMemMap) \\\n _(cuMemAddressFree) \\\n _(cuMemSetAccess) \\\n _(cuMemUnmap) \\\n _(cuMemCreate) \\\n _(cuMemGetAllocationGranularity) \\\n _(cuMemExportToShareableHandle) \\\n _(cuMemImportFromShareableHandle) \\\n _(cuMemsetD32Async) \\\n _(cuStreamWriteValue32) \\\n _(cuGetErrorString)\n\n#if defined(CUDA_VERSION) && (CUDA_VERSION >= 12030)\n#define C10_LIBCUDA_DRIVER_API_12030(_) \\\n _(cuMulticastAddDevice) \\\n _(cuMulticastBindMem) \\\n _(cuMulticastCreate)\n#if defined(CUDA_VERSION) && CUDA_VERSION >= 12030\n#define IF_CUDA_VERSION_GE_12030(x) x\n#else\n#define C10_LIBCUDA_DRIVER_API_12030(_)\n#define IF_CUDA_VERSION_GE_12030(x)\n#endif\n\n#define C10_LIBCUDA_DRIVER_API(_) \\\n _(cuDeviceGetAttribute, 12000) \\\n _(cuMemAddressReserve, 12000) \\\n _(cuMemRelease, 12000) \\\n _(cuMemMap, 12000) \\\n _(cuMemAddressFree, 12000) \\\n _(cuMemSetAccess, 12000) \\\n _(cuMemUnmap, 12000) \\\n _(cuMemCreate, 12000) \\\n _(cuMemGetAllocationGranularity, 12000) \\\n _(cuMemExportToShareableHandle, 12000) \\\n _(cuMemImportFromShareableHandle, 12000) \\\n _(cuMemsetD32Async, 12000) \\\n _(cuStreamWriteValue32, 12000) \\\n _(cuGetErrorString, 12000) \\\n IF_CUDA_VERSION_GE_12030(_(cuMulticastAddDevice, 12030)) \\", + "repo_full_name": "pytorch/pytorch", + "discussion_comments": [ + { + "comment_id": "2168294829", + "repo_full_name": "pytorch/pytorch", + "pr_number": 156097, + "pr_file": "c10/cuda/driver_api.h", + "discussion_id": "2168294829", + "commented_code": "@@ -20,31 +20,31 @@\n } \\\n } while (0)\n \n-#define C10_LIBCUDA_DRIVER_API(_) \\\n- _(cuDeviceGetAttribute) \\\n- _(cuMemAddressReserve) \\\n- _(cuMemRelease) \\\n- _(cuMemMap) \\\n- _(cuMemAddressFree) \\\n- _(cuMemSetAccess) \\\n- _(cuMemUnmap) \\\n- _(cuMemCreate) \\\n- _(cuMemGetAllocationGranularity) \\\n- _(cuMemExportToShareableHandle) \\\n- _(cuMemImportFromShareableHandle) \\\n- _(cuMemsetD32Async) \\\n- _(cuStreamWriteValue32) \\\n- _(cuGetErrorString)\n-\n-#if defined(CUDA_VERSION) && (CUDA_VERSION >= 12030)\n-#define C10_LIBCUDA_DRIVER_API_12030(_) \\\n- _(cuMulticastAddDevice) \\\n- _(cuMulticastBindMem) \\\n- _(cuMulticastCreate)\n+#if defined(CUDA_VERSION) && CUDA_VERSION >= 12030\n+#define IF_CUDA_VERSION_GE_12030(x) x\n #else\n-#define C10_LIBCUDA_DRIVER_API_12030(_)\n+#define IF_CUDA_VERSION_GE_12030(x)\n #endif\n \n+#define C10_LIBCUDA_DRIVER_API(_) \\\n+ _(cuDeviceGetAttribute, 12000) \\\n+ _(cuMemAddressReserve, 12000) \\\n+ _(cuMemRelease, 12000) \\\n+ _(cuMemMap, 12000) \\\n+ _(cuMemAddressFree, 12000) \\\n+ _(cuMemSetAccess, 12000) \\\n+ _(cuMemUnmap, 12000) \\\n+ _(cuMemCreate, 12000) \\\n+ _(cuMemGetAllocationGranularity, 12000) \\\n+ _(cuMemExportToShareableHandle, 12000) \\\n+ _(cuMemImportFromShareableHandle, 12000) \\\n+ _(cuMemsetD32Async, 12000) \\\n+ _(cuStreamWriteValue32, 12000) \\\n+ _(cuGetErrorString, 12000) \\\n+ IF_CUDA_VERSION_GE_12030(_(cuMulticastAddDevice, 12030)) \\", + "comment_created_at": "2025-06-26T06:47:56+00:00", + "comment_author": "wujingyue", + "comment_body": "```suggestion\r\n _(cuMulticastAddDevice, 12030) \\\r\n```\r\nWhat you wrote is totally fine. \r\n\r\nI'd make it less verbose. If the CUDA driver is older than 12.3 (or 12.1 as you said), cuMulticastAddDevice will fail [this assertion](https://github.com/pytorch/pytorch/blob/50b2069b61942e923528c94ccbbc8ab5e92c381e/c10/cuda/driver_api.cpp#L19). An assertion error is harder to discover than a compile-time error, but I tend to find it OK in practice. ", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2151477389", + "pr_number": 156161, + "pr_file": "aten/src/ATen/native/cpu/ScatterGatherKernel.cpp", + "created_at": "2025-06-17T07:00:14+00:00", + "commented_code": "int64_t* sorted_col_index_keys = nullptr;\n int64_t* sorted_col_index_values = nullptr;\n \n#if defined(USE_FBGEMM) && (defined(__x86_64__) || defined(_M_X64))", + "repo_full_name": "pytorch/pytorch", + "discussion_comments": [ + { + "comment_id": "2151477389", + "repo_full_name": "pytorch/pytorch", + "pr_number": 156161, + "pr_file": "aten/src/ATen/native/cpu/ScatterGatherKernel.cpp", + "discussion_id": "2151477389", + "commented_code": "@@ -719,13 +939,24 @@ void cpu_scatter_reduce_expanded_index(const Tensor& self, const Tensor& index,\n \n int64_t* sorted_col_index_keys = nullptr;\n int64_t* sorted_col_index_values = nullptr;\n+ \n+#if defined(USE_FBGEMM) && (defined(__x86_64__) || defined(_M_X64))", + "comment_created_at": "2025-06-17T07:00:14+00:00", + "comment_author": "fadara01", + "comment_body": "why is `#if defined(USE_FBGEMM)` not enough?", + "pr_file_module": null + }, + { + "comment_id": "2163038637", + "repo_full_name": "pytorch/pytorch", + "pr_number": 156161, + "pr_file": "aten/src/ATen/native/cpu/ScatterGatherKernel.cpp", + "discussion_id": "2151477389", + "commented_code": "@@ -719,13 +939,24 @@ void cpu_scatter_reduce_expanded_index(const Tensor& self, const Tensor& index,\n \n int64_t* sorted_col_index_keys = nullptr;\n int64_t* sorted_col_index_values = nullptr;\n+ \n+#if defined(USE_FBGEMM) && (defined(__x86_64__) || defined(_M_X64))", + "comment_created_at": "2025-06-24T06:07:56+00:00", + "comment_author": "maajidkhann", + "comment_body": "Right now, USE_FBGEMM is gated to x86 platforms. So checking just #ifdef USE_FBGEMM is sufficient and simpler.\r\nI was defensively guarding against a potential misconfiguration like - USE_FBGEMM being defined on ARM accidentally.\r\n\r\nAnyways, build system is well-behaved in pytorch, so updated the logic to just #ifdef USE_FBGEMM", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2164495759", + "pr_number": 156097, + "pr_file": "c10/cuda/driver_api.cpp", + "created_at": "2025-06-24T16:57:18+00:00", + "commented_code": "return &singleton;\n}\n\nvoid* get_symbol(const char* name) {\n int runtime_ver = 0, driver_ver = 0;\n C10_CUDA_CHECK(cudaRuntimeGetVersion(&runtime_ver));\n C10_CUDA_CHECK(cudaDriverGetVersion(&driver_ver));\n\n void* out = nullptr;\n cudaDriverEntryPointQueryResult qres{};\n if (auto st = cudaGetDriverEntryPoint(name, &out, cudaEnableDefault, &qres);\n st == cudaSuccess && qres == cudaDriverEntryPointSuccess && out) {\n return out;\n }\n\n unsigned int req_ver = std::min(runtime_ver, driver_ver);\n if (auto st = cudaGetDriverEntryPointByVersion(", + "repo_full_name": "pytorch/pytorch", + "discussion_comments": [ + { + "comment_id": "2164495759", + "repo_full_name": "pytorch/pytorch", + "pr_number": 156097, + "pr_file": "c10/cuda/driver_api.cpp", + "discussion_id": "2164495759", + "commented_code": "@@ -47,6 +43,32 @@ C10_EXPORT DriverAPI* DriverAPI::get() {\n return &singleton;\n }\n \n+void* get_symbol(const char* name) {\n+ int runtime_ver = 0, driver_ver = 0;\n+ C10_CUDA_CHECK(cudaRuntimeGetVersion(&runtime_ver));\n+ C10_CUDA_CHECK(cudaDriverGetVersion(&driver_ver));\n+\n+ void* out = nullptr;\n+ cudaDriverEntryPointQueryResult qres{};\n+ if (auto st = cudaGetDriverEntryPoint(name, &out, cudaEnableDefault, &qres);\n+ st == cudaSuccess && qres == cudaDriverEntryPointSuccess && out) {\n+ return out;\n+ }\n+\n+ unsigned int req_ver = std::min(runtime_ver, driver_ver);\n+ if (auto st = cudaGetDriverEntryPointByVersion(", + "comment_created_at": "2025-06-24T16:57:18+00:00", + "comment_author": "ngimel", + "comment_body": "in which runtime version was cudaGetDriverEntryPointByVersion added?", + "pr_file_module": null + }, + { + "comment_id": "2165192255", + "repo_full_name": "pytorch/pytorch", + "pr_number": 156097, + "pr_file": "c10/cuda/driver_api.cpp", + "discussion_id": "2164495759", + "commented_code": "@@ -47,6 +43,32 @@ C10_EXPORT DriverAPI* DriverAPI::get() {\n return &singleton;\n }\n \n+void* get_symbol(const char* name) {\n+ int runtime_ver = 0, driver_ver = 0;\n+ C10_CUDA_CHECK(cudaRuntimeGetVersion(&runtime_ver));\n+ C10_CUDA_CHECK(cudaDriverGetVersion(&driver_ver));\n+\n+ void* out = nullptr;\n+ cudaDriverEntryPointQueryResult qres{};\n+ if (auto st = cudaGetDriverEntryPoint(name, &out, cudaEnableDefault, &qres);\n+ st == cudaSuccess && qres == cudaDriverEntryPointSuccess && out) {\n+ return out;\n+ }\n+\n+ unsigned int req_ver = std::min(runtime_ver, driver_ver);\n+ if (auto st = cudaGetDriverEntryPointByVersion(", + "comment_created_at": "2025-06-25T00:23:01+00:00", + "comment_author": "syed-ahmed", + "comment_body": "Per https://github.com/NVIDIA/cutlass/pull/2086, it was added in 12.5", + "pr_file_module": null + }, + { + "comment_id": "2165209310", + "repo_full_name": "pytorch/pytorch", + "pr_number": 156097, + "pr_file": "c10/cuda/driver_api.cpp", + "discussion_id": "2164495759", + "commented_code": "@@ -47,6 +43,32 @@ C10_EXPORT DriverAPI* DriverAPI::get() {\n return &singleton;\n }\n \n+void* get_symbol(const char* name) {\n+ int runtime_ver = 0, driver_ver = 0;\n+ C10_CUDA_CHECK(cudaRuntimeGetVersion(&runtime_ver));\n+ C10_CUDA_CHECK(cudaDriverGetVersion(&driver_ver));\n+\n+ void* out = nullptr;\n+ cudaDriverEntryPointQueryResult qres{};\n+ if (auto st = cudaGetDriverEntryPoint(name, &out, cudaEnableDefault, &qres);\n+ st == cudaSuccess && qres == cudaDriverEntryPointSuccess && out) {\n+ return out;\n+ }\n+\n+ unsigned int req_ver = std::min(runtime_ver, driver_ver);\n+ if (auto st = cudaGetDriverEntryPointByVersion(", + "comment_created_at": "2025-06-25T00:39:14+00:00", + "comment_author": "syed-ahmed", + "comment_body": "Yes indeed it was added in 12.5.\r\n12.5: https://docs.nvidia.com/cuda/archive/12.5.0/cuda-runtime-api/group__CUDART__DRIVER__ENTRY__POINT.html#group__CUDART__DRIVER__ENTRY__POINT_1gcf55d143722ccfa9252758181701c876\r\n12.4: https://docs.nvidia.com/cuda/archive/12.4.1/cuda-runtime-api/group__CUDART__DRIVER__ENTRY__POINT.html#group__CUDART__DRIVER__ENTRY__POINT_1gcf55d143722ccfa9252758181701c876", + "pr_file_module": null + }, + { + "comment_id": "2165268430", + "repo_full_name": "pytorch/pytorch", + "pr_number": 156097, + "pr_file": "c10/cuda/driver_api.cpp", + "discussion_id": "2164495759", + "commented_code": "@@ -47,6 +43,32 @@ C10_EXPORT DriverAPI* DriverAPI::get() {\n return &singleton;\n }\n \n+void* get_symbol(const char* name) {\n+ int runtime_ver = 0, driver_ver = 0;\n+ C10_CUDA_CHECK(cudaRuntimeGetVersion(&runtime_ver));\n+ C10_CUDA_CHECK(cudaDriverGetVersion(&driver_ver));\n+\n+ void* out = nullptr;\n+ cudaDriverEntryPointQueryResult qres{};\n+ if (auto st = cudaGetDriverEntryPoint(name, &out, cudaEnableDefault, &qres);\n+ st == cudaSuccess && qres == cudaDriverEntryPointSuccess && out) {\n+ return out;\n+ }\n+\n+ unsigned int req_ver = std::min(runtime_ver, driver_ver);\n+ if (auto st = cudaGetDriverEntryPointByVersion(", + "comment_created_at": "2025-06-25T01:42:51+00:00", + "comment_author": "ngimel", + "comment_body": "So this needs to be guarded on cuda version? We don't have 12.4 builds in CI (they will be reintroduced soon), but internally we do, and this will be a build error. ", + "pr_file_module": null + }, + { + "comment_id": "2165277453", + "repo_full_name": "pytorch/pytorch", + "pr_number": 156097, + "pr_file": "c10/cuda/driver_api.cpp", + "discussion_id": "2164495759", + "commented_code": "@@ -47,6 +43,32 @@ C10_EXPORT DriverAPI* DriverAPI::get() {\n return &singleton;\n }\n \n+void* get_symbol(const char* name) {\n+ int runtime_ver = 0, driver_ver = 0;\n+ C10_CUDA_CHECK(cudaRuntimeGetVersion(&runtime_ver));\n+ C10_CUDA_CHECK(cudaDriverGetVersion(&driver_ver));\n+\n+ void* out = nullptr;\n+ cudaDriverEntryPointQueryResult qres{};\n+ if (auto st = cudaGetDriverEntryPoint(name, &out, cudaEnableDefault, &qres);\n+ st == cudaSuccess && qres == cudaDriverEntryPointSuccess && out) {\n+ return out;\n+ }\n+\n+ unsigned int req_ver = std::min(runtime_ver, driver_ver);\n+ if (auto st = cudaGetDriverEntryPointByVersion(", + "comment_created_at": "2025-06-25T01:54:24+00:00", + "comment_author": "eee4017", + "comment_body": "OK, I think cudaGetDriverEntryPoint is available starting from 11.3. Do we need to worry about supporting versions earlier than 11.3 ? https://docs.nvidia.com/cuda/archive/11.3.0/cuda-toolkit-release-notes/index.html#cuda-general-new-features", + "pr_file_module": null + }, + { + "comment_id": "2165592482", + "repo_full_name": "pytorch/pytorch", + "pr_number": 156097, + "pr_file": "c10/cuda/driver_api.cpp", + "discussion_id": "2164495759", + "commented_code": "@@ -47,6 +43,32 @@ C10_EXPORT DriverAPI* DriverAPI::get() {\n return &singleton;\n }\n \n+void* get_symbol(const char* name) {\n+ int runtime_ver = 0, driver_ver = 0;\n+ C10_CUDA_CHECK(cudaRuntimeGetVersion(&runtime_ver));\n+ C10_CUDA_CHECK(cudaDriverGetVersion(&driver_ver));\n+\n+ void* out = nullptr;\n+ cudaDriverEntryPointQueryResult qres{};\n+ if (auto st = cudaGetDriverEntryPoint(name, &out, cudaEnableDefault, &qres);\n+ st == cudaSuccess && qres == cudaDriverEntryPointSuccess && out) {\n+ return out;\n+ }\n+\n+ unsigned int req_ver = std::min(runtime_ver, driver_ver);\n+ if (auto st = cudaGetDriverEntryPointByVersion(", + "comment_created_at": "2025-06-25T03:42:54+00:00", + "comment_author": "ngimel", + "comment_body": "No, 12.0 is the earliest version we suppot", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2098997916", + "pr_number": 153991, + "pr_file": "torch/csrc/Module.cpp", + "created_at": "2025-05-20T23:01:15+00:00", + "commented_code": "#ifdef USE_MPS\n torch::mps::initModule(module);\n#endif\n#ifdef USE_VULKAN", + "repo_full_name": "pytorch/pytorch", + "discussion_comments": [ + { + "comment_id": "2098997916", + "repo_full_name": "pytorch/pytorch", + "pr_number": 153991, + "pr_file": "torch/csrc/Module.cpp", + "discussion_id": "2098997916", + "commented_code": "@@ -1891,6 +1892,9 @@ PyObject* initModule() {\n #ifdef USE_MPS\n torch::mps::initModule(module);\n #endif\n+#ifdef USE_VULKAN", + "comment_created_at": "2025-05-20T23:01:15+00:00", + "comment_author": "albanD", + "comment_body": "Can we do this ifdef insite the initModule?\r\nThis is a footgun where it becomes very easy to define APIs that exist or not on the python module depending on build flags.\r\nThis is a big issue for distributed that does that today as anyone that \"import\" your function in python will see their script crash in a weird way with another install of PyTorch. And catching these import errors is hard.\r\n\r\nIt is best if we could have always the same API but it throws an error if you try to use it.", + "pr_file_module": null + }, + { + "comment_id": "2099013258", + "repo_full_name": "pytorch/pytorch", + "pr_number": 153991, + "pr_file": "torch/csrc/Module.cpp", + "discussion_id": "2098997916", + "commented_code": "@@ -1891,6 +1892,9 @@ PyObject* initModule() {\n #ifdef USE_MPS\n torch::mps::initModule(module);\n #endif\n+#ifdef USE_VULKAN", + "comment_created_at": "2025-05-20T23:15:42+00:00", + "comment_author": "swolchok", + "comment_body": "I tried to make this match MPS, which is directly above. I take it this is a case where it's too late to fix MPS but not Vulkan?", + "pr_file_module": null + }, + { + "comment_id": "2099067993", + "repo_full_name": "pytorch/pytorch", + "pr_number": 153991, + "pr_file": "torch/csrc/Module.cpp", + "discussion_id": "2098997916", + "commented_code": "@@ -1891,6 +1892,9 @@ PyObject* initModule() {\n #ifdef USE_MPS\n torch::mps::initModule(module);\n #endif\n+#ifdef USE_VULKAN", + "comment_created_at": "2025-05-21T00:15:19+00:00", + "comment_author": "albanD", + "comment_body": "Correct! :) ", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2029450813", + "pr_number": 150705, + "pr_file": "aten/src/ATen/native/cuda/MemoryAccess.cuh", + "created_at": "2025-04-04T21:21:23+00:00", + "commented_code": "uint64_t address = reinterpret_cast(pointer);\n constexpr int vec2_alignment = std::alignment_of_v>;\n constexpr int vec4_alignment = std::alignment_of_v>;\n#if defined(USE_ROCM) || (defined(CUDA_VERSION) && CUDA_VERSION >= 12080)", + "repo_full_name": "pytorch/pytorch", + "discussion_comments": [ + { + "comment_id": "2029450813", + "repo_full_name": "pytorch/pytorch", + "pr_number": 150705, + "pr_file": "aten/src/ATen/native/cuda/MemoryAccess.cuh", + "discussion_id": "2029450813", + "commented_code": "@@ -486,7 +486,9 @@ inline C10_HOST_DEVICE int can_vectorize_up_to(const char *pointer) {\n uint64_t address = reinterpret_cast(pointer);\n constexpr int vec2_alignment = std::alignment_of_v>;\n constexpr int vec4_alignment = std::alignment_of_v>;\n+#if defined(USE_ROCM) || (defined(CUDA_VERSION) && CUDA_VERSION >= 12080)", + "comment_created_at": "2025-04-04T21:21:23+00:00", + "comment_author": "eqy", + "comment_body": "should `USE_ROCM` here also be inverted if the `CUDA_VERSION` condition is `>= 12080`", + "pr_file_module": null + }, + { + "comment_id": "2029462805", + "repo_full_name": "pytorch/pytorch", + "pr_number": 150705, + "pr_file": "aten/src/ATen/native/cuda/MemoryAccess.cuh", + "discussion_id": "2029450813", + "commented_code": "@@ -486,7 +486,9 @@ inline C10_HOST_DEVICE int can_vectorize_up_to(const char *pointer) {\n uint64_t address = reinterpret_cast(pointer);\n constexpr int vec2_alignment = std::alignment_of_v>;\n constexpr int vec4_alignment = std::alignment_of_v>;\n+#if defined(USE_ROCM) || (defined(CUDA_VERSION) && CUDA_VERSION >= 12080)", + "comment_created_at": "2025-04-04T21:34:33+00:00", + "comment_author": "malfet", + "comment_body": "No, I don't think so. Before https://github.com/pytorch/pytorch/pull/145746 vec8_alignment were only available to `USE_ROCM`, after it was enabled unconditionally and I want it to be enabled for either ROCM or CUDA newer than 12.6", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/pytorch-platform-specific-configuration-guards.md b/_reviewers/pytorch-platform-specific-configuration-guards.md index 487365b..84e0f89 100644 --- a/_reviewers/pytorch-platform-specific-configuration-guards.md +++ b/_reviewers/pytorch-platform-specific-configuration-guards.md @@ -53,225 +53,3 @@ When implementing platform-specific or version-dependent code, use the simplest ``` Following these practices ensures your code is more maintainable, provides consistent APIs across different builds, and avoids unnecessary complexity in platform-specific sections. - - -[ - { - "discussion_id": "2168294829", - "pr_number": 156097, - "pr_file": "c10/cuda/driver_api.h", - "created_at": "2025-06-26T06:47:56+00:00", - "commented_code": "} \\\n } while (0)\n\n#define C10_LIBCUDA_DRIVER_API(_) \\\n _(cuDeviceGetAttribute) \\\n _(cuMemAddressReserve) \\\n _(cuMemRelease) \\\n _(cuMemMap) \\\n _(cuMemAddressFree) \\\n _(cuMemSetAccess) \\\n _(cuMemUnmap) \\\n _(cuMemCreate) \\\n _(cuMemGetAllocationGranularity) \\\n _(cuMemExportToShareableHandle) \\\n _(cuMemImportFromShareableHandle) \\\n _(cuMemsetD32Async) \\\n _(cuStreamWriteValue32) \\\n _(cuGetErrorString)\n\n#if defined(CUDA_VERSION) && (CUDA_VERSION >= 12030)\n#define C10_LIBCUDA_DRIVER_API_12030(_) \\\n _(cuMulticastAddDevice) \\\n _(cuMulticastBindMem) \\\n _(cuMulticastCreate)\n#if defined(CUDA_VERSION) && CUDA_VERSION >= 12030\n#define IF_CUDA_VERSION_GE_12030(x) x\n#else\n#define C10_LIBCUDA_DRIVER_API_12030(_)\n#define IF_CUDA_VERSION_GE_12030(x)\n#endif\n\n#define C10_LIBCUDA_DRIVER_API(_) \\\n _(cuDeviceGetAttribute, 12000) \\\n _(cuMemAddressReserve, 12000) \\\n _(cuMemRelease, 12000) \\\n _(cuMemMap, 12000) \\\n _(cuMemAddressFree, 12000) \\\n _(cuMemSetAccess, 12000) \\\n _(cuMemUnmap, 12000) \\\n _(cuMemCreate, 12000) \\\n _(cuMemGetAllocationGranularity, 12000) \\\n _(cuMemExportToShareableHandle, 12000) \\\n _(cuMemImportFromShareableHandle, 12000) \\\n _(cuMemsetD32Async, 12000) \\\n _(cuStreamWriteValue32, 12000) \\\n _(cuGetErrorString, 12000) \\\n IF_CUDA_VERSION_GE_12030(_(cuMulticastAddDevice, 12030)) \\", - "repo_full_name": "pytorch/pytorch", - "discussion_comments": [ - { - "comment_id": "2168294829", - "repo_full_name": "pytorch/pytorch", - "pr_number": 156097, - "pr_file": "c10/cuda/driver_api.h", - "discussion_id": "2168294829", - "commented_code": "@@ -20,31 +20,31 @@\n } \\\n } while (0)\n \n-#define C10_LIBCUDA_DRIVER_API(_) \\\n- _(cuDeviceGetAttribute) \\\n- _(cuMemAddressReserve) \\\n- _(cuMemRelease) \\\n- _(cuMemMap) \\\n- _(cuMemAddressFree) \\\n- _(cuMemSetAccess) \\\n- _(cuMemUnmap) \\\n- _(cuMemCreate) \\\n- _(cuMemGetAllocationGranularity) \\\n- _(cuMemExportToShareableHandle) \\\n- _(cuMemImportFromShareableHandle) \\\n- _(cuMemsetD32Async) \\\n- _(cuStreamWriteValue32) \\\n- _(cuGetErrorString)\n-\n-#if defined(CUDA_VERSION) && (CUDA_VERSION >= 12030)\n-#define C10_LIBCUDA_DRIVER_API_12030(_) \\\n- _(cuMulticastAddDevice) \\\n- _(cuMulticastBindMem) \\\n- _(cuMulticastCreate)\n+#if defined(CUDA_VERSION) && CUDA_VERSION >= 12030\n+#define IF_CUDA_VERSION_GE_12030(x) x\n #else\n-#define C10_LIBCUDA_DRIVER_API_12030(_)\n+#define IF_CUDA_VERSION_GE_12030(x)\n #endif\n \n+#define C10_LIBCUDA_DRIVER_API(_) \\\n+ _(cuDeviceGetAttribute, 12000) \\\n+ _(cuMemAddressReserve, 12000) \\\n+ _(cuMemRelease, 12000) \\\n+ _(cuMemMap, 12000) \\\n+ _(cuMemAddressFree, 12000) \\\n+ _(cuMemSetAccess, 12000) \\\n+ _(cuMemUnmap, 12000) \\\n+ _(cuMemCreate, 12000) \\\n+ _(cuMemGetAllocationGranularity, 12000) \\\n+ _(cuMemExportToShareableHandle, 12000) \\\n+ _(cuMemImportFromShareableHandle, 12000) \\\n+ _(cuMemsetD32Async, 12000) \\\n+ _(cuStreamWriteValue32, 12000) \\\n+ _(cuGetErrorString, 12000) \\\n+ IF_CUDA_VERSION_GE_12030(_(cuMulticastAddDevice, 12030)) \\", - "comment_created_at": "2025-06-26T06:47:56+00:00", - "comment_author": "wujingyue", - "comment_body": "```suggestion\r\n _(cuMulticastAddDevice, 12030) \\\r\n```\r\nWhat you wrote is totally fine. \r\n\r\nI'd make it less verbose. If the CUDA driver is older than 12.3 (or 12.1 as you said), cuMulticastAddDevice will fail [this assertion](https://github.com/pytorch/pytorch/blob/50b2069b61942e923528c94ccbbc8ab5e92c381e/c10/cuda/driver_api.cpp#L19). An assertion error is harder to discover than a compile-time error, but I tend to find it OK in practice. ", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2151477389", - "pr_number": 156161, - "pr_file": "aten/src/ATen/native/cpu/ScatterGatherKernel.cpp", - "created_at": "2025-06-17T07:00:14+00:00", - "commented_code": "int64_t* sorted_col_index_keys = nullptr;\n int64_t* sorted_col_index_values = nullptr;\n \n#if defined(USE_FBGEMM) && (defined(__x86_64__) || defined(_M_X64))", - "repo_full_name": "pytorch/pytorch", - "discussion_comments": [ - { - "comment_id": "2151477389", - "repo_full_name": "pytorch/pytorch", - "pr_number": 156161, - "pr_file": "aten/src/ATen/native/cpu/ScatterGatherKernel.cpp", - "discussion_id": "2151477389", - "commented_code": "@@ -719,13 +939,24 @@ void cpu_scatter_reduce_expanded_index(const Tensor& self, const Tensor& index,\n \n int64_t* sorted_col_index_keys = nullptr;\n int64_t* sorted_col_index_values = nullptr;\n+ \n+#if defined(USE_FBGEMM) && (defined(__x86_64__) || defined(_M_X64))", - "comment_created_at": "2025-06-17T07:00:14+00:00", - "comment_author": "fadara01", - "comment_body": "why is `#if defined(USE_FBGEMM)` not enough?", - "pr_file_module": null - }, - { - "comment_id": "2163038637", - "repo_full_name": "pytorch/pytorch", - "pr_number": 156161, - "pr_file": "aten/src/ATen/native/cpu/ScatterGatherKernel.cpp", - "discussion_id": "2151477389", - "commented_code": "@@ -719,13 +939,24 @@ void cpu_scatter_reduce_expanded_index(const Tensor& self, const Tensor& index,\n \n int64_t* sorted_col_index_keys = nullptr;\n int64_t* sorted_col_index_values = nullptr;\n+ \n+#if defined(USE_FBGEMM) && (defined(__x86_64__) || defined(_M_X64))", - "comment_created_at": "2025-06-24T06:07:56+00:00", - "comment_author": "maajidkhann", - "comment_body": "Right now, USE_FBGEMM is gated to x86 platforms. So checking just #ifdef USE_FBGEMM is sufficient and simpler.\r\nI was defensively guarding against a potential misconfiguration like - USE_FBGEMM being defined on ARM accidentally.\r\n\r\nAnyways, build system is well-behaved in pytorch, so updated the logic to just #ifdef USE_FBGEMM", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2164495759", - "pr_number": 156097, - "pr_file": "c10/cuda/driver_api.cpp", - "created_at": "2025-06-24T16:57:18+00:00", - "commented_code": "return &singleton;\n}\n\nvoid* get_symbol(const char* name) {\n int runtime_ver = 0, driver_ver = 0;\n C10_CUDA_CHECK(cudaRuntimeGetVersion(&runtime_ver));\n C10_CUDA_CHECK(cudaDriverGetVersion(&driver_ver));\n\n void* out = nullptr;\n cudaDriverEntryPointQueryResult qres{};\n if (auto st = cudaGetDriverEntryPoint(name, &out, cudaEnableDefault, &qres);\n st == cudaSuccess && qres == cudaDriverEntryPointSuccess && out) {\n return out;\n }\n\n unsigned int req_ver = std::min(runtime_ver, driver_ver);\n if (auto st = cudaGetDriverEntryPointByVersion(", - "repo_full_name": "pytorch/pytorch", - "discussion_comments": [ - { - "comment_id": "2164495759", - "repo_full_name": "pytorch/pytorch", - "pr_number": 156097, - "pr_file": "c10/cuda/driver_api.cpp", - "discussion_id": "2164495759", - "commented_code": "@@ -47,6 +43,32 @@ C10_EXPORT DriverAPI* DriverAPI::get() {\n return &singleton;\n }\n \n+void* get_symbol(const char* name) {\n+ int runtime_ver = 0, driver_ver = 0;\n+ C10_CUDA_CHECK(cudaRuntimeGetVersion(&runtime_ver));\n+ C10_CUDA_CHECK(cudaDriverGetVersion(&driver_ver));\n+\n+ void* out = nullptr;\n+ cudaDriverEntryPointQueryResult qres{};\n+ if (auto st = cudaGetDriverEntryPoint(name, &out, cudaEnableDefault, &qres);\n+ st == cudaSuccess && qres == cudaDriverEntryPointSuccess && out) {\n+ return out;\n+ }\n+\n+ unsigned int req_ver = std::min(runtime_ver, driver_ver);\n+ if (auto st = cudaGetDriverEntryPointByVersion(", - "comment_created_at": "2025-06-24T16:57:18+00:00", - "comment_author": "ngimel", - "comment_body": "in which runtime version was cudaGetDriverEntryPointByVersion added?", - "pr_file_module": null - }, - { - "comment_id": "2165192255", - "repo_full_name": "pytorch/pytorch", - "pr_number": 156097, - "pr_file": "c10/cuda/driver_api.cpp", - "discussion_id": "2164495759", - "commented_code": "@@ -47,6 +43,32 @@ C10_EXPORT DriverAPI* DriverAPI::get() {\n return &singleton;\n }\n \n+void* get_symbol(const char* name) {\n+ int runtime_ver = 0, driver_ver = 0;\n+ C10_CUDA_CHECK(cudaRuntimeGetVersion(&runtime_ver));\n+ C10_CUDA_CHECK(cudaDriverGetVersion(&driver_ver));\n+\n+ void* out = nullptr;\n+ cudaDriverEntryPointQueryResult qres{};\n+ if (auto st = cudaGetDriverEntryPoint(name, &out, cudaEnableDefault, &qres);\n+ st == cudaSuccess && qres == cudaDriverEntryPointSuccess && out) {\n+ return out;\n+ }\n+\n+ unsigned int req_ver = std::min(runtime_ver, driver_ver);\n+ if (auto st = cudaGetDriverEntryPointByVersion(", - "comment_created_at": "2025-06-25T00:23:01+00:00", - "comment_author": "syed-ahmed", - "comment_body": "Per https://github.com/NVIDIA/cutlass/pull/2086, it was added in 12.5", - "pr_file_module": null - }, - { - "comment_id": "2165209310", - "repo_full_name": "pytorch/pytorch", - "pr_number": 156097, - "pr_file": "c10/cuda/driver_api.cpp", - "discussion_id": "2164495759", - "commented_code": "@@ -47,6 +43,32 @@ C10_EXPORT DriverAPI* DriverAPI::get() {\n return &singleton;\n }\n \n+void* get_symbol(const char* name) {\n+ int runtime_ver = 0, driver_ver = 0;\n+ C10_CUDA_CHECK(cudaRuntimeGetVersion(&runtime_ver));\n+ C10_CUDA_CHECK(cudaDriverGetVersion(&driver_ver));\n+\n+ void* out = nullptr;\n+ cudaDriverEntryPointQueryResult qres{};\n+ if (auto st = cudaGetDriverEntryPoint(name, &out, cudaEnableDefault, &qres);\n+ st == cudaSuccess && qres == cudaDriverEntryPointSuccess && out) {\n+ return out;\n+ }\n+\n+ unsigned int req_ver = std::min(runtime_ver, driver_ver);\n+ if (auto st = cudaGetDriverEntryPointByVersion(", - "comment_created_at": "2025-06-25T00:39:14+00:00", - "comment_author": "syed-ahmed", - "comment_body": "Yes indeed it was added in 12.5.\r\n12.5: https://docs.nvidia.com/cuda/archive/12.5.0/cuda-runtime-api/group__CUDART__DRIVER__ENTRY__POINT.html#group__CUDART__DRIVER__ENTRY__POINT_1gcf55d143722ccfa9252758181701c876\r\n12.4: https://docs.nvidia.com/cuda/archive/12.4.1/cuda-runtime-api/group__CUDART__DRIVER__ENTRY__POINT.html#group__CUDART__DRIVER__ENTRY__POINT_1gcf55d143722ccfa9252758181701c876", - "pr_file_module": null - }, - { - "comment_id": "2165268430", - "repo_full_name": "pytorch/pytorch", - "pr_number": 156097, - "pr_file": "c10/cuda/driver_api.cpp", - "discussion_id": "2164495759", - "commented_code": "@@ -47,6 +43,32 @@ C10_EXPORT DriverAPI* DriverAPI::get() {\n return &singleton;\n }\n \n+void* get_symbol(const char* name) {\n+ int runtime_ver = 0, driver_ver = 0;\n+ C10_CUDA_CHECK(cudaRuntimeGetVersion(&runtime_ver));\n+ C10_CUDA_CHECK(cudaDriverGetVersion(&driver_ver));\n+\n+ void* out = nullptr;\n+ cudaDriverEntryPointQueryResult qres{};\n+ if (auto st = cudaGetDriverEntryPoint(name, &out, cudaEnableDefault, &qres);\n+ st == cudaSuccess && qres == cudaDriverEntryPointSuccess && out) {\n+ return out;\n+ }\n+\n+ unsigned int req_ver = std::min(runtime_ver, driver_ver);\n+ if (auto st = cudaGetDriverEntryPointByVersion(", - "comment_created_at": "2025-06-25T01:42:51+00:00", - "comment_author": "ngimel", - "comment_body": "So this needs to be guarded on cuda version? We don't have 12.4 builds in CI (they will be reintroduced soon), but internally we do, and this will be a build error. ", - "pr_file_module": null - }, - { - "comment_id": "2165277453", - "repo_full_name": "pytorch/pytorch", - "pr_number": 156097, - "pr_file": "c10/cuda/driver_api.cpp", - "discussion_id": "2164495759", - "commented_code": "@@ -47,6 +43,32 @@ C10_EXPORT DriverAPI* DriverAPI::get() {\n return &singleton;\n }\n \n+void* get_symbol(const char* name) {\n+ int runtime_ver = 0, driver_ver = 0;\n+ C10_CUDA_CHECK(cudaRuntimeGetVersion(&runtime_ver));\n+ C10_CUDA_CHECK(cudaDriverGetVersion(&driver_ver));\n+\n+ void* out = nullptr;\n+ cudaDriverEntryPointQueryResult qres{};\n+ if (auto st = cudaGetDriverEntryPoint(name, &out, cudaEnableDefault, &qres);\n+ st == cudaSuccess && qres == cudaDriverEntryPointSuccess && out) {\n+ return out;\n+ }\n+\n+ unsigned int req_ver = std::min(runtime_ver, driver_ver);\n+ if (auto st = cudaGetDriverEntryPointByVersion(", - "comment_created_at": "2025-06-25T01:54:24+00:00", - "comment_author": "eee4017", - "comment_body": "OK, I think cudaGetDriverEntryPoint is available starting from 11.3. Do we need to worry about supporting versions earlier than 11.3 ? https://docs.nvidia.com/cuda/archive/11.3.0/cuda-toolkit-release-notes/index.html#cuda-general-new-features", - "pr_file_module": null - }, - { - "comment_id": "2165592482", - "repo_full_name": "pytorch/pytorch", - "pr_number": 156097, - "pr_file": "c10/cuda/driver_api.cpp", - "discussion_id": "2164495759", - "commented_code": "@@ -47,6 +43,32 @@ C10_EXPORT DriverAPI* DriverAPI::get() {\n return &singleton;\n }\n \n+void* get_symbol(const char* name) {\n+ int runtime_ver = 0, driver_ver = 0;\n+ C10_CUDA_CHECK(cudaRuntimeGetVersion(&runtime_ver));\n+ C10_CUDA_CHECK(cudaDriverGetVersion(&driver_ver));\n+\n+ void* out = nullptr;\n+ cudaDriverEntryPointQueryResult qres{};\n+ if (auto st = cudaGetDriverEntryPoint(name, &out, cudaEnableDefault, &qres);\n+ st == cudaSuccess && qres == cudaDriverEntryPointSuccess && out) {\n+ return out;\n+ }\n+\n+ unsigned int req_ver = std::min(runtime_ver, driver_ver);\n+ if (auto st = cudaGetDriverEntryPointByVersion(", - "comment_created_at": "2025-06-25T03:42:54+00:00", - "comment_author": "ngimel", - "comment_body": "No, 12.0 is the earliest version we suppot", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2098997916", - "pr_number": 153991, - "pr_file": "torch/csrc/Module.cpp", - "created_at": "2025-05-20T23:01:15+00:00", - "commented_code": "#ifdef USE_MPS\n torch::mps::initModule(module);\n#endif\n#ifdef USE_VULKAN", - "repo_full_name": "pytorch/pytorch", - "discussion_comments": [ - { - "comment_id": "2098997916", - "repo_full_name": "pytorch/pytorch", - "pr_number": 153991, - "pr_file": "torch/csrc/Module.cpp", - "discussion_id": "2098997916", - "commented_code": "@@ -1891,6 +1892,9 @@ PyObject* initModule() {\n #ifdef USE_MPS\n torch::mps::initModule(module);\n #endif\n+#ifdef USE_VULKAN", - "comment_created_at": "2025-05-20T23:01:15+00:00", - "comment_author": "albanD", - "comment_body": "Can we do this ifdef insite the initModule?\r\nThis is a footgun where it becomes very easy to define APIs that exist or not on the python module depending on build flags.\r\nThis is a big issue for distributed that does that today as anyone that \"import\" your function in python will see their script crash in a weird way with another install of PyTorch. And catching these import errors is hard.\r\n\r\nIt is best if we could have always the same API but it throws an error if you try to use it.", - "pr_file_module": null - }, - { - "comment_id": "2099013258", - "repo_full_name": "pytorch/pytorch", - "pr_number": 153991, - "pr_file": "torch/csrc/Module.cpp", - "discussion_id": "2098997916", - "commented_code": "@@ -1891,6 +1892,9 @@ PyObject* initModule() {\n #ifdef USE_MPS\n torch::mps::initModule(module);\n #endif\n+#ifdef USE_VULKAN", - "comment_created_at": "2025-05-20T23:15:42+00:00", - "comment_author": "swolchok", - "comment_body": "I tried to make this match MPS, which is directly above. I take it this is a case where it's too late to fix MPS but not Vulkan?", - "pr_file_module": null - }, - { - "comment_id": "2099067993", - "repo_full_name": "pytorch/pytorch", - "pr_number": 153991, - "pr_file": "torch/csrc/Module.cpp", - "discussion_id": "2098997916", - "commented_code": "@@ -1891,6 +1892,9 @@ PyObject* initModule() {\n #ifdef USE_MPS\n torch::mps::initModule(module);\n #endif\n+#ifdef USE_VULKAN", - "comment_created_at": "2025-05-21T00:15:19+00:00", - "comment_author": "albanD", - "comment_body": "Correct! :) ", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2029450813", - "pr_number": 150705, - "pr_file": "aten/src/ATen/native/cuda/MemoryAccess.cuh", - "created_at": "2025-04-04T21:21:23+00:00", - "commented_code": "uint64_t address = reinterpret_cast(pointer);\n constexpr int vec2_alignment = std::alignment_of_v>;\n constexpr int vec4_alignment = std::alignment_of_v>;\n#if defined(USE_ROCM) || (defined(CUDA_VERSION) && CUDA_VERSION >= 12080)", - "repo_full_name": "pytorch/pytorch", - "discussion_comments": [ - { - "comment_id": "2029450813", - "repo_full_name": "pytorch/pytorch", - "pr_number": 150705, - "pr_file": "aten/src/ATen/native/cuda/MemoryAccess.cuh", - "discussion_id": "2029450813", - "commented_code": "@@ -486,7 +486,9 @@ inline C10_HOST_DEVICE int can_vectorize_up_to(const char *pointer) {\n uint64_t address = reinterpret_cast(pointer);\n constexpr int vec2_alignment = std::alignment_of_v>;\n constexpr int vec4_alignment = std::alignment_of_v>;\n+#if defined(USE_ROCM) || (defined(CUDA_VERSION) && CUDA_VERSION >= 12080)", - "comment_created_at": "2025-04-04T21:21:23+00:00", - "comment_author": "eqy", - "comment_body": "should `USE_ROCM` here also be inverted if the `CUDA_VERSION` condition is `>= 12080`", - "pr_file_module": null - }, - { - "comment_id": "2029462805", - "repo_full_name": "pytorch/pytorch", - "pr_number": 150705, - "pr_file": "aten/src/ATen/native/cuda/MemoryAccess.cuh", - "discussion_id": "2029450813", - "commented_code": "@@ -486,7 +486,9 @@ inline C10_HOST_DEVICE int can_vectorize_up_to(const char *pointer) {\n uint64_t address = reinterpret_cast(pointer);\n constexpr int vec2_alignment = std::alignment_of_v>;\n constexpr int vec4_alignment = std::alignment_of_v>;\n+#if defined(USE_ROCM) || (defined(CUDA_VERSION) && CUDA_VERSION >= 12080)", - "comment_created_at": "2025-04-04T21:34:33+00:00", - "comment_author": "malfet", - "comment_body": "No, I don't think so. Before https://github.com/pytorch/pytorch/pull/145746 vec8_alignment were only available to `USE_ROCM`, after it was enabled unconditionally and I want it to be enabled for either ROCM or CUDA newer than 12.6", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/pytorch-preserve-api-compatibility.json b/_reviewers/pytorch-preserve-api-compatibility.json new file mode 100644 index 0000000..4cf0d47 --- /dev/null +++ b/_reviewers/pytorch-preserve-api-compatibility.json @@ -0,0 +1,150 @@ +[ + { + "discussion_id": "2101258835", + "pr_number": 154042, + "pr_file": "torch/csrc/cuda/CUDAPluggableAllocator.h", + "created_at": "2025-05-21T22:11:21+00:00", + "commented_code": "bool initialized() override;\n double getMemoryFraction(c10::DeviceIndex device) override;\n void setMemoryFraction(double fraction, c10::DeviceIndex device) override;\n void emptyCache() override;\n void emptyCache(c10::cuda::MempoolId_t) override;", + "repo_full_name": "pytorch/pytorch", + "discussion_comments": [ + { + "comment_id": "2101258835", + "repo_full_name": "pytorch/pytorch", + "pr_number": 154042, + "pr_file": "torch/csrc/cuda/CUDAPluggableAllocator.h", + "discussion_id": "2101258835", + "commented_code": "@@ -114,7 +114,7 @@ struct TORCH_CUDA_CPP_API CUDAPluggableAllocator\n bool initialized() override;\n double getMemoryFraction(c10::DeviceIndex device) override;\n void setMemoryFraction(double fraction, c10::DeviceIndex device) override;\n- void emptyCache() override;\n+ void emptyCache(c10::cuda::MempoolId_t) override;", + "comment_created_at": "2025-05-21T22:11:21+00:00", + "comment_author": "albanD", + "comment_body": "Are these public APIs? Should we add default value to not BC-break?", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1939920168", + "pr_number": 145000, + "pr_file": "aten/src/ATen/DLConvertor.h", + "created_at": "2025-02-03T19:32:11+00:00", + "commented_code": "namespace at {\n\nTORCH_API ScalarType toScalarType(const DLDataType& dtype);\nTORCH_API DLManagedTensor* toDLPack(const Tensor& src);\nTORCH_API Tensor fromDLPack(DLManagedTensor* src);\nTORCH_API Tensor\nfromDLPack(DLManagedTensor* src, std::function deleter);\nTORCH_API DLManagedTensorVersioned* toDLPack(const Tensor& src);", + "repo_full_name": "pytorch/pytorch", + "discussion_comments": [ + { + "comment_id": "1939920168", + "repo_full_name": "pytorch/pytorch", + "pr_number": 145000, + "pr_file": "aten/src/ATen/DLConvertor.h", + "discussion_id": "1939920168", + "commented_code": "@@ -11,11 +11,50 @@\n namespace at {\n \n TORCH_API ScalarType toScalarType(const DLDataType& dtype);\n-TORCH_API DLManagedTensor* toDLPack(const Tensor& src);\n-TORCH_API Tensor fromDLPack(DLManagedTensor* src);\n-TORCH_API Tensor\n-fromDLPack(DLManagedTensor* src, std::function deleter);\n+TORCH_API DLManagedTensorVersioned* toDLPack(const Tensor& src);", + "comment_created_at": "2025-02-03T19:32:11+00:00", + "comment_author": "albanD", + "comment_body": "Is this API used by C++ libraries? This would be a BC-breaking change for these users right?", + "pr_file_module": null + }, + { + "comment_id": "1946617614", + "repo_full_name": "pytorch/pytorch", + "pr_number": 145000, + "pr_file": "aten/src/ATen/DLConvertor.h", + "discussion_id": "1939920168", + "commented_code": "@@ -11,11 +11,50 @@\n namespace at {\n \n TORCH_API ScalarType toScalarType(const DLDataType& dtype);\n-TORCH_API DLManagedTensor* toDLPack(const Tensor& src);\n-TORCH_API Tensor fromDLPack(DLManagedTensor* src);\n-TORCH_API Tensor\n-fromDLPack(DLManagedTensor* src, std::function deleter);\n+TORCH_API DLManagedTensorVersioned* toDLPack(const Tensor& src);", + "comment_created_at": "2025-02-07T14:29:00+00:00", + "comment_author": "ysiraichi", + "comment_body": "I think it's being used by PyTorch/XLA. Yes, it's definitely BC-breaking. I was thinking that, since DLPack 1.0 should be the new default, the old version should have the name with a suffix. However, now that you brought this up, not being BC-breaking sounds more important.\r\n\r\nIn summary, I will change the names so that we are not BC-breaking.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1939924463", + "pr_number": 145000, + "pr_file": "torch/csrc/Module.cpp", + "created_at": "2025-02-03T19:35:59+00:00", + "commented_code": "METH_NOARGS,\n nullptr},\n {\"_to_dlpack\", THPModule_toDLPack, METH_O, nullptr},\n {\"_to_dlpack_unversioned\", THPModule_toDLPackUnversioned, METH_O, nullptr},", + "repo_full_name": "pytorch/pytorch", + "discussion_comments": [ + { + "comment_id": "1939924463", + "repo_full_name": "pytorch/pytorch", + "pr_number": 145000, + "pr_file": "torch/csrc/Module.cpp", + "discussion_id": "1939924463", + "commented_code": "@@ -1599,6 +1616,7 @@ static std::initializer_list TorchMethods = {\n METH_NOARGS,\n nullptr},\n {\"_to_dlpack\", THPModule_toDLPack, METH_O, nullptr},\n+ {\"_to_dlpack_unversioned\", THPModule_toDLPackUnversioned, METH_O, nullptr},", + "comment_created_at": "2025-02-03T19:35:59+00:00", + "comment_author": "albanD", + "comment_body": "Why do we still need this one?", + "pr_file_module": null + }, + { + "comment_id": "1941463598", + "repo_full_name": "pytorch/pytorch", + "pr_number": 145000, + "pr_file": "torch/csrc/Module.cpp", + "discussion_id": "1939924463", + "commented_code": "@@ -1599,6 +1616,7 @@ static std::initializer_list TorchMethods = {\n METH_NOARGS,\n nullptr},\n {\"_to_dlpack\", THPModule_toDLPack, METH_O, nullptr},\n+ {\"_to_dlpack_unversioned\", THPModule_toDLPackUnversioned, METH_O, nullptr},", + "comment_created_at": "2025-02-04T16:05:23+00:00", + "comment_author": "rgommers", + "comment_body": "This does look necessary to me. For DLPack to change the ABI once, there's a dance that needs doing to be not-super-disruptive: continue returning the old (0.8) version, unless the consumer indicates it can handle the new (1.X) version by passing in `max_version=(1, 0)` (or `(1, x)` in the future).", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1978540718", + "pr_number": 147095, + "pr_file": "torch/csrc/jit/serialization/pickler.h", + "created_at": "2025-03-04T03:17:50+00:00", + "commented_code": "void(const at::Tensor&, std::unordered_map&)>;\n\n// A allowlist of device type, currently available is PrivateUse1\ninline std::unordered_set& GetBackendMetaAllowlist() {\n static std::unordered_set DeviceTypeAllowlist{\n c10::DeviceType::PrivateUse1};\n return DeviceTypeAllowlist;\n}\nTORCH_API std::unordered_set& GetBackendMetaAllowlist();\n\n// Dynamically obtain serialization function pairs\n// that require the corresponding backend.\ninline std::array<\nTORCH_API std::array<", + "repo_full_name": "pytorch/pytorch", + "discussion_comments": [ + { + "comment_id": "1978540718", + "repo_full_name": "pytorch/pytorch", + "pr_number": 147095, + "pr_file": "torch/csrc/jit/serialization/pickler.h", + "discussion_id": "1978540718", + "commented_code": "@@ -299,27 +299,14 @@ using BackendMetaPtr = std::function<\n void(const at::Tensor&, std::unordered_map&)>;\n \n // A allowlist of device type, currently available is PrivateUse1\n-inline std::unordered_set& GetBackendMetaAllowlist() {\n- static std::unordered_set DeviceTypeAllowlist{\n- c10::DeviceType::PrivateUse1};\n- return DeviceTypeAllowlist;\n-}\n+TORCH_API std::unordered_set& GetBackendMetaAllowlist();\n \n // Dynamically obtain serialization function pairs\n // that require the corresponding backend.\n-inline std::array<\n+TORCH_API std::array<", + "comment_created_at": "2025-03-04T03:17:50+00:00", + "comment_author": "cyyever", + "comment_body": "Why added TORCH_API?", + "pr_file_module": null + }, + { + "comment_id": "1997886240", + "repo_full_name": "pytorch/pytorch", + "pr_number": 147095, + "pr_file": "torch/csrc/jit/serialization/pickler.h", + "discussion_id": "1978540718", + "commented_code": "@@ -299,27 +299,14 @@ using BackendMetaPtr = std::function<\n void(const at::Tensor&, std::unordered_map&)>;\n \n // A allowlist of device type, currently available is PrivateUse1\n-inline std::unordered_set& GetBackendMetaAllowlist() {\n- static std::unordered_set DeviceTypeAllowlist{\n- c10::DeviceType::PrivateUse1};\n- return DeviceTypeAllowlist;\n-}\n+TORCH_API std::unordered_set& GetBackendMetaAllowlist();\n \n // Dynamically obtain serialization function pairs\n // that require the corresponding backend.\n-inline std::array<\n+TORCH_API std::array<", + "comment_created_at": "2025-03-17T03:39:20+00:00", + "comment_author": "FFFrog", + "comment_body": "Sorry for the late reply.\r\n\r\nGetBackendMetaSerialization can be called by some inline function like [setTensorMetadata](https://github.com/pytorch/pytorch/blob/916e8979d3e0d651a9091732ce3e59da32e72b0e/torch/csrc/jit/serialization/pickler.h#L386), which can be called in other [.cpp](https://github.com/pytorch/pytorch/blob/916e8979d3e0d651a9091732ce3e59da32e72b0e/torch/csrc/Module.cpp#L2438) and the cpp can be built into another libtorch_python.so, therefore we need to add TORCH_API for GetBackendMetaSerialization to make it visible for other .so.", + "pr_file_module": null + }, + { + "comment_id": "1997956969", + "repo_full_name": "pytorch/pytorch", + "pr_number": 147095, + "pr_file": "torch/csrc/jit/serialization/pickler.h", + "discussion_id": "1978540718", + "commented_code": "@@ -299,27 +299,14 @@ using BackendMetaPtr = std::function<\n void(const at::Tensor&, std::unordered_map&)>;\n \n // A allowlist of device type, currently available is PrivateUse1\n-inline std::unordered_set& GetBackendMetaAllowlist() {\n- static std::unordered_set DeviceTypeAllowlist{\n- c10::DeviceType::PrivateUse1};\n- return DeviceTypeAllowlist;\n-}\n+TORCH_API std::unordered_set& GetBackendMetaAllowlist();\n \n // Dynamically obtain serialization function pairs\n // that require the corresponding backend.\n-inline std::array<\n+TORCH_API std::array<", + "comment_created_at": "2025-03-17T05:29:58+00:00", + "comment_author": "cyyever", + "comment_body": "It's not need in your description, it's just an inner function. What error did you encounter without TORCH_API? Post it? ", + "pr_file_module": null + }, + { + "comment_id": "1998032914", + "repo_full_name": "pytorch/pytorch", + "pr_number": 147095, + "pr_file": "torch/csrc/jit/serialization/pickler.h", + "discussion_id": "1978540718", + "commented_code": "@@ -299,27 +299,14 @@ using BackendMetaPtr = std::function<\n void(const at::Tensor&, std::unordered_map&)>;\n \n // A allowlist of device type, currently available is PrivateUse1\n-inline std::unordered_set& GetBackendMetaAllowlist() {\n- static std::unordered_set DeviceTypeAllowlist{\n- c10::DeviceType::PrivateUse1};\n- return DeviceTypeAllowlist;\n-}\n+TORCH_API std::unordered_set& GetBackendMetaAllowlist();\n \n // Dynamically obtain serialization function pairs\n // that require the corresponding backend.\n-inline std::array<\n+TORCH_API std::array<", + "comment_created_at": "2025-03-17T06:37:30+00:00", + "comment_author": "FFFrog", + "comment_body": "![image](https://github.com/user-attachments/assets/6c8814d2-20b3-4029-9db7-9108a3c71378)\r\n\r\nThe **pickler.h** file is included by **Module.cpp**, so the function `getTensorMetadata` will exist in Module.cpp (although it is marked as inline, the compiler will probably treat it as a normal function, but to avoid symbol conflicts, it is additionally marked as a weak symbol). Then `getTensorMetadata` calls `GetBackendMetaSerialization` internally, and the default hidden behavior makes the symbol invisible.", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/pytorch-preserve-api-compatibility.md b/_reviewers/pytorch-preserve-api-compatibility.md index eb3f1be..592a254 100644 --- a/_reviewers/pytorch-preserve-api-compatibility.md +++ b/_reviewers/pytorch-preserve-api-compatibility.md @@ -47,155 +47,3 @@ To maintain compatibility: ``` When compatibility cannot be maintained, clearly document the breaking change and increment the major version number appropriately. - - -[ - { - "discussion_id": "2101258835", - "pr_number": 154042, - "pr_file": "torch/csrc/cuda/CUDAPluggableAllocator.h", - "created_at": "2025-05-21T22:11:21+00:00", - "commented_code": "bool initialized() override;\n double getMemoryFraction(c10::DeviceIndex device) override;\n void setMemoryFraction(double fraction, c10::DeviceIndex device) override;\n void emptyCache() override;\n void emptyCache(c10::cuda::MempoolId_t) override;", - "repo_full_name": "pytorch/pytorch", - "discussion_comments": [ - { - "comment_id": "2101258835", - "repo_full_name": "pytorch/pytorch", - "pr_number": 154042, - "pr_file": "torch/csrc/cuda/CUDAPluggableAllocator.h", - "discussion_id": "2101258835", - "commented_code": "@@ -114,7 +114,7 @@ struct TORCH_CUDA_CPP_API CUDAPluggableAllocator\n bool initialized() override;\n double getMemoryFraction(c10::DeviceIndex device) override;\n void setMemoryFraction(double fraction, c10::DeviceIndex device) override;\n- void emptyCache() override;\n+ void emptyCache(c10::cuda::MempoolId_t) override;", - "comment_created_at": "2025-05-21T22:11:21+00:00", - "comment_author": "albanD", - "comment_body": "Are these public APIs? Should we add default value to not BC-break?", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1939920168", - "pr_number": 145000, - "pr_file": "aten/src/ATen/DLConvertor.h", - "created_at": "2025-02-03T19:32:11+00:00", - "commented_code": "namespace at {\n\nTORCH_API ScalarType toScalarType(const DLDataType& dtype);\nTORCH_API DLManagedTensor* toDLPack(const Tensor& src);\nTORCH_API Tensor fromDLPack(DLManagedTensor* src);\nTORCH_API Tensor\nfromDLPack(DLManagedTensor* src, std::function deleter);\nTORCH_API DLManagedTensorVersioned* toDLPack(const Tensor& src);", - "repo_full_name": "pytorch/pytorch", - "discussion_comments": [ - { - "comment_id": "1939920168", - "repo_full_name": "pytorch/pytorch", - "pr_number": 145000, - "pr_file": "aten/src/ATen/DLConvertor.h", - "discussion_id": "1939920168", - "commented_code": "@@ -11,11 +11,50 @@\n namespace at {\n \n TORCH_API ScalarType toScalarType(const DLDataType& dtype);\n-TORCH_API DLManagedTensor* toDLPack(const Tensor& src);\n-TORCH_API Tensor fromDLPack(DLManagedTensor* src);\n-TORCH_API Tensor\n-fromDLPack(DLManagedTensor* src, std::function deleter);\n+TORCH_API DLManagedTensorVersioned* toDLPack(const Tensor& src);", - "comment_created_at": "2025-02-03T19:32:11+00:00", - "comment_author": "albanD", - "comment_body": "Is this API used by C++ libraries? This would be a BC-breaking change for these users right?", - "pr_file_module": null - }, - { - "comment_id": "1946617614", - "repo_full_name": "pytorch/pytorch", - "pr_number": 145000, - "pr_file": "aten/src/ATen/DLConvertor.h", - "discussion_id": "1939920168", - "commented_code": "@@ -11,11 +11,50 @@\n namespace at {\n \n TORCH_API ScalarType toScalarType(const DLDataType& dtype);\n-TORCH_API DLManagedTensor* toDLPack(const Tensor& src);\n-TORCH_API Tensor fromDLPack(DLManagedTensor* src);\n-TORCH_API Tensor\n-fromDLPack(DLManagedTensor* src, std::function deleter);\n+TORCH_API DLManagedTensorVersioned* toDLPack(const Tensor& src);", - "comment_created_at": "2025-02-07T14:29:00+00:00", - "comment_author": "ysiraichi", - "comment_body": "I think it's being used by PyTorch/XLA. Yes, it's definitely BC-breaking. I was thinking that, since DLPack 1.0 should be the new default, the old version should have the name with a suffix. However, now that you brought this up, not being BC-breaking sounds more important.\r\n\r\nIn summary, I will change the names so that we are not BC-breaking.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1939924463", - "pr_number": 145000, - "pr_file": "torch/csrc/Module.cpp", - "created_at": "2025-02-03T19:35:59+00:00", - "commented_code": "METH_NOARGS,\n nullptr},\n {\"_to_dlpack\", THPModule_toDLPack, METH_O, nullptr},\n {\"_to_dlpack_unversioned\", THPModule_toDLPackUnversioned, METH_O, nullptr},", - "repo_full_name": "pytorch/pytorch", - "discussion_comments": [ - { - "comment_id": "1939924463", - "repo_full_name": "pytorch/pytorch", - "pr_number": 145000, - "pr_file": "torch/csrc/Module.cpp", - "discussion_id": "1939924463", - "commented_code": "@@ -1599,6 +1616,7 @@ static std::initializer_list TorchMethods = {\n METH_NOARGS,\n nullptr},\n {\"_to_dlpack\", THPModule_toDLPack, METH_O, nullptr},\n+ {\"_to_dlpack_unversioned\", THPModule_toDLPackUnversioned, METH_O, nullptr},", - "comment_created_at": "2025-02-03T19:35:59+00:00", - "comment_author": "albanD", - "comment_body": "Why do we still need this one?", - "pr_file_module": null - }, - { - "comment_id": "1941463598", - "repo_full_name": "pytorch/pytorch", - "pr_number": 145000, - "pr_file": "torch/csrc/Module.cpp", - "discussion_id": "1939924463", - "commented_code": "@@ -1599,6 +1616,7 @@ static std::initializer_list TorchMethods = {\n METH_NOARGS,\n nullptr},\n {\"_to_dlpack\", THPModule_toDLPack, METH_O, nullptr},\n+ {\"_to_dlpack_unversioned\", THPModule_toDLPackUnversioned, METH_O, nullptr},", - "comment_created_at": "2025-02-04T16:05:23+00:00", - "comment_author": "rgommers", - "comment_body": "This does look necessary to me. For DLPack to change the ABI once, there's a dance that needs doing to be not-super-disruptive: continue returning the old (0.8) version, unless the consumer indicates it can handle the new (1.X) version by passing in `max_version=(1, 0)` (or `(1, x)` in the future).", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1978540718", - "pr_number": 147095, - "pr_file": "torch/csrc/jit/serialization/pickler.h", - "created_at": "2025-03-04T03:17:50+00:00", - "commented_code": "void(const at::Tensor&, std::unordered_map&)>;\n\n// A allowlist of device type, currently available is PrivateUse1\ninline std::unordered_set& GetBackendMetaAllowlist() {\n static std::unordered_set DeviceTypeAllowlist{\n c10::DeviceType::PrivateUse1};\n return DeviceTypeAllowlist;\n}\nTORCH_API std::unordered_set& GetBackendMetaAllowlist();\n\n// Dynamically obtain serialization function pairs\n// that require the corresponding backend.\ninline std::array<\nTORCH_API std::array<", - "repo_full_name": "pytorch/pytorch", - "discussion_comments": [ - { - "comment_id": "1978540718", - "repo_full_name": "pytorch/pytorch", - "pr_number": 147095, - "pr_file": "torch/csrc/jit/serialization/pickler.h", - "discussion_id": "1978540718", - "commented_code": "@@ -299,27 +299,14 @@ using BackendMetaPtr = std::function<\n void(const at::Tensor&, std::unordered_map&)>;\n \n // A allowlist of device type, currently available is PrivateUse1\n-inline std::unordered_set& GetBackendMetaAllowlist() {\n- static std::unordered_set DeviceTypeAllowlist{\n- c10::DeviceType::PrivateUse1};\n- return DeviceTypeAllowlist;\n-}\n+TORCH_API std::unordered_set& GetBackendMetaAllowlist();\n \n // Dynamically obtain serialization function pairs\n // that require the corresponding backend.\n-inline std::array<\n+TORCH_API std::array<", - "comment_created_at": "2025-03-04T03:17:50+00:00", - "comment_author": "cyyever", - "comment_body": "Why added TORCH_API?", - "pr_file_module": null - }, - { - "comment_id": "1997886240", - "repo_full_name": "pytorch/pytorch", - "pr_number": 147095, - "pr_file": "torch/csrc/jit/serialization/pickler.h", - "discussion_id": "1978540718", - "commented_code": "@@ -299,27 +299,14 @@ using BackendMetaPtr = std::function<\n void(const at::Tensor&, std::unordered_map&)>;\n \n // A allowlist of device type, currently available is PrivateUse1\n-inline std::unordered_set& GetBackendMetaAllowlist() {\n- static std::unordered_set DeviceTypeAllowlist{\n- c10::DeviceType::PrivateUse1};\n- return DeviceTypeAllowlist;\n-}\n+TORCH_API std::unordered_set& GetBackendMetaAllowlist();\n \n // Dynamically obtain serialization function pairs\n // that require the corresponding backend.\n-inline std::array<\n+TORCH_API std::array<", - "comment_created_at": "2025-03-17T03:39:20+00:00", - "comment_author": "FFFrog", - "comment_body": "Sorry for the late reply.\r\n\ufeff\r\nGetBackendMetaSerialization can be called by some inline function like [setTensorMetadata](https://github.com/pytorch/pytorch/blob/916e8979d3e0d651a9091732ce3e59da32e72b0e/torch/csrc/jit/serialization/pickler.h#L386), which can be called in other [.cpp](https://github.com/pytorch/pytorch/blob/916e8979d3e0d651a9091732ce3e59da32e72b0e/torch/csrc/Module.cpp#L2438) and the cpp can be built into another libtorch_python.so, therefore we need to add TORCH_API for GetBackendMetaSerialization to make it visible for other .so.", - "pr_file_module": null - }, - { - "comment_id": "1997956969", - "repo_full_name": "pytorch/pytorch", - "pr_number": 147095, - "pr_file": "torch/csrc/jit/serialization/pickler.h", - "discussion_id": "1978540718", - "commented_code": "@@ -299,27 +299,14 @@ using BackendMetaPtr = std::function<\n void(const at::Tensor&, std::unordered_map&)>;\n \n // A allowlist of device type, currently available is PrivateUse1\n-inline std::unordered_set& GetBackendMetaAllowlist() {\n- static std::unordered_set DeviceTypeAllowlist{\n- c10::DeviceType::PrivateUse1};\n- return DeviceTypeAllowlist;\n-}\n+TORCH_API std::unordered_set& GetBackendMetaAllowlist();\n \n // Dynamically obtain serialization function pairs\n // that require the corresponding backend.\n-inline std::array<\n+TORCH_API std::array<", - "comment_created_at": "2025-03-17T05:29:58+00:00", - "comment_author": "cyyever", - "comment_body": "It's not need in your description, it's just an inner function. What error did you encounter without TORCH_API? Post it? ", - "pr_file_module": null - }, - { - "comment_id": "1998032914", - "repo_full_name": "pytorch/pytorch", - "pr_number": 147095, - "pr_file": "torch/csrc/jit/serialization/pickler.h", - "discussion_id": "1978540718", - "commented_code": "@@ -299,27 +299,14 @@ using BackendMetaPtr = std::function<\n void(const at::Tensor&, std::unordered_map&)>;\n \n // A allowlist of device type, currently available is PrivateUse1\n-inline std::unordered_set& GetBackendMetaAllowlist() {\n- static std::unordered_set DeviceTypeAllowlist{\n- c10::DeviceType::PrivateUse1};\n- return DeviceTypeAllowlist;\n-}\n+TORCH_API std::unordered_set& GetBackendMetaAllowlist();\n \n // Dynamically obtain serialization function pairs\n // that require the corresponding backend.\n-inline std::array<\n+TORCH_API std::array<", - "comment_created_at": "2025-03-17T06:37:30+00:00", - "comment_author": "FFFrog", - "comment_body": "![image](https://github.com/user-attachments/assets/6c8814d2-20b3-4029-9db7-9108a3c71378)\r\n\r\nThe **pickler.h** file is included by **Module.cpp**, so the function `getTensorMetadata` will exist in Module.cpp (although it is marked as inline, the compiler will probably treat it as a normal function, but to avoid symbol conflicts, it is additionally marked as a weak symbol). Then `getTensorMetadata` calls `GetBackendMetaSerialization` internally, and the default hidden behavior makes the symbol invisible.", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/pytorch-preserve-error-context.json b/_reviewers/pytorch-preserve-error-context.json new file mode 100644 index 0000000..34d4d97 --- /dev/null +++ b/_reviewers/pytorch-preserve-error-context.json @@ -0,0 +1,188 @@ +[ + { + "discussion_id": "2110667766", + "pr_number": 152023, + "pr_file": "c10/cuda/CUDAException.cpp", + "created_at": "2025-05-28T01:27:54+00:00", + "commented_code": "\"Device-side assertions were explicitly omitted for this error check; the error probably arose while initializing the DSA handlers.\");\n }\n#endif\n\n TORCH_CHECK(false, check_message);\n throw c10::DeviceError(err, check_message);", + "repo_full_name": "pytorch/pytorch", + "discussion_comments": [ + { + "comment_id": "2110667766", + "repo_full_name": "pytorch/pytorch", + "pr_number": 152023, + "pr_file": "c10/cuda/CUDAException.cpp", + "discussion_id": "2110667766", + "commented_code": "@@ -38,8 +38,7 @@ void c10_cuda_check_implementation(\n \"Device-side assertions were explicitly omitted for this error check; the error probably arose while initializing the DSA handlers.\");\n }\n #endif\n-\n- TORCH_CHECK(false, check_message);\n+ throw c10::DeviceError(err, check_message);", + "comment_created_at": "2025-05-28T01:27:54+00:00", + "comment_author": "ngimel", + "comment_body": "TORCH_CHECK also provides information about the callsite that this will lose? https://github.com/pytorch/pytorch/blob/e9e1aacef8913bc3baabe502e2858b776e6aea3f/c10/util/Exception.cpp#L109", + "pr_file_module": null + }, + { + "comment_id": "2110685549", + "repo_full_name": "pytorch/pytorch", + "pr_number": 152023, + "pr_file": "c10/cuda/CUDAException.cpp", + "discussion_id": "2110667766", + "commented_code": "@@ -38,8 +38,7 @@ void c10_cuda_check_implementation(\n \"Device-side assertions were explicitly omitted for this error check; the error probably arose while initializing the DSA handlers.\");\n }\n #endif\n-\n- TORCH_CHECK(false, check_message);\n+ throw c10::DeviceError(err, check_message);", + "comment_created_at": "2025-05-28T01:41:55+00:00", + "comment_author": "ngimel", + "comment_body": "TORCH_CHECK_WITH(DeviceError, false, message)", + "pr_file_module": null + }, + { + "comment_id": "2110945768", + "repo_full_name": "pytorch/pytorch", + "pr_number": 152023, + "pr_file": "c10/cuda/CUDAException.cpp", + "discussion_id": "2110667766", + "commented_code": "@@ -38,8 +38,7 @@ void c10_cuda_check_implementation(\n \"Device-side assertions were explicitly omitted for this error check; the error probably arose while initializing the DSA handlers.\");\n }\n #endif\n-\n- TORCH_CHECK(false, check_message);\n+ throw c10::DeviceError(err, check_message);", + "comment_created_at": "2025-05-28T05:24:01+00:00", + "comment_author": "mradmila", + "comment_body": "Actually, wouldn't that leave \"err\" behind? We want all of it, no?", + "pr_file_module": null + }, + { + "comment_id": "2110961413", + "repo_full_name": "pytorch/pytorch", + "pr_number": 152023, + "pr_file": "c10/cuda/CUDAException.cpp", + "discussion_id": "2110667766", + "commented_code": "@@ -38,8 +38,7 @@ void c10_cuda_check_implementation(\n \"Device-side assertions were explicitly omitted for this error check; the error probably arose while initializing the DSA handlers.\");\n }\n #endif\n-\n- TORCH_CHECK(false, check_message);\n+ throw c10::DeviceError(err, check_message);", + "comment_created_at": "2025-05-28T05:36:26+00:00", + "comment_author": "mradmila", + "comment_body": "Also, I hope we don't strip error messages on the use side?", + "pr_file_module": null + }, + { + "comment_id": "2111059460", + "repo_full_name": "pytorch/pytorch", + "pr_number": 152023, + "pr_file": "c10/cuda/CUDAException.cpp", + "discussion_id": "2110667766", + "commented_code": "@@ -38,8 +38,7 @@ void c10_cuda_check_implementation(\n \"Device-side assertions were explicitly omitted for this error check; the error probably arose while initializing the DSA handlers.\");\n }\n #endif\n-\n- TORCH_CHECK(false, check_message);\n+ throw c10::DeviceError(err, check_message);", + "comment_created_at": "2025-05-28T06:45:02+00:00", + "comment_author": "malfet", + "comment_body": "@ngimel TORCH_CHECK will not preserve the exist code. And callsite was already stripped in the existing code (it was passed as arguments to that function, but ignored) I can add it back as prerequisite/followup PR", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2089785335", + "pr_number": 150218, + "pr_file": "torch/csrc/Module.cpp", + "created_at": "2025-05-14T21:31:27+00:00", + "commented_code": "END_HANDLE_TH_ERRORS\n}\n\nstatic PyObject* THPModule_torchDeviceToDLDevice(\n PyObject* _unused,\n PyObject* data) {\n HANDLE_TH_ERRORS\n TORCH_CHECK(\n THPDevice_Check(data),\n \"torchDeviceToDLDevice: expected torch.device argument.\");\n auto device = reinterpret_cast(data)->device;\n auto dl_device = at::torchDeviceToDLDevice(device);\n auto tuple = PyTuple_New(2);", + "repo_full_name": "pytorch/pytorch", + "discussion_comments": [ + { + "comment_id": "2089785335", + "repo_full_name": "pytorch/pytorch", + "pr_number": 150218, + "pr_file": "torch/csrc/Module.cpp", + "discussion_id": "2089785335", + "commented_code": "@@ -636,6 +668,22 @@ static PyObject* THPModule_fromDLPack(PyObject* _unused, PyObject* data) {\n END_HANDLE_TH_ERRORS\n }\n \n+static PyObject* THPModule_torchDeviceToDLDevice(\n+ PyObject* _unused,\n+ PyObject* data) {\n+ HANDLE_TH_ERRORS\n+ TORCH_CHECK(\n+ THPDevice_Check(data),\n+ \"torchDeviceToDLDevice: expected torch.device argument.\");\n+ auto device = reinterpret_cast(data)->device;\n+ auto dl_device = at::torchDeviceToDLDevice(device);\n+ auto tuple = PyTuple_New(2);", + "comment_created_at": "2025-05-14T21:31:27+00:00", + "comment_author": "albanD", + "comment_body": "Add error checking in case this failed", + "pr_file_module": null + }, + { + "comment_id": "2105840117", + "repo_full_name": "pytorch/pytorch", + "pr_number": 150218, + "pr_file": "torch/csrc/Module.cpp", + "discussion_id": "2089785335", + "commented_code": "@@ -636,6 +668,22 @@ static PyObject* THPModule_fromDLPack(PyObject* _unused, PyObject* data) {\n END_HANDLE_TH_ERRORS\n }\n \n+static PyObject* THPModule_torchDeviceToDLDevice(\n+ PyObject* _unused,\n+ PyObject* data) {\n+ HANDLE_TH_ERRORS\n+ TORCH_CHECK(\n+ THPDevice_Check(data),\n+ \"torchDeviceToDLDevice: expected torch.device argument.\");\n+ auto device = reinterpret_cast(data)->device;\n+ auto dl_device = at::torchDeviceToDLDevice(device);\n+ auto tuple = PyTuple_New(2);", + "comment_created_at": "2025-05-24T14:28:55+00:00", + "comment_author": "ysiraichi", + "comment_body": "Not sure I get what you mean. If `at::torchDeviceToDLDevice` errors, the error will bubble up to Python, won't it?", + "pr_file_module": null + }, + { + "comment_id": "2150360414", + "repo_full_name": "pytorch/pytorch", + "pr_number": 150218, + "pr_file": "torch/csrc/Module.cpp", + "discussion_id": "2089785335", + "commented_code": "@@ -636,6 +668,22 @@ static PyObject* THPModule_fromDLPack(PyObject* _unused, PyObject* data) {\n END_HANDLE_TH_ERRORS\n }\n \n+static PyObject* THPModule_torchDeviceToDLDevice(\n+ PyObject* _unused,\n+ PyObject* data) {\n+ HANDLE_TH_ERRORS\n+ TORCH_CHECK(\n+ THPDevice_Check(data),\n+ \"torchDeviceToDLDevice: expected torch.device argument.\");\n+ auto device = reinterpret_cast(data)->device;\n+ auto dl_device = at::torchDeviceToDLDevice(device);\n+ auto tuple = PyTuple_New(2);", + "comment_created_at": "2025-06-16T16:00:19+00:00", + "comment_author": "albanD", + "comment_body": "CPython is C API, not C++. There is no error throwing here, you need to check the tuple is not null and if so, make this a c++ error\r\n\r\n```cpp\r\nif (!tuple) {\r\n throw python_error();\r\n}\r\n```", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2089799016", + "pr_number": 150218, + "pr_file": "torch/csrc/Module.cpp", + "created_at": "2025-05-14T21:44:38+00:00", + "commented_code": "}\n\ntemplate \nPyObject* THPModule_toDLPackImpl(PyObject* _unused, PyObject* data) {\nPyObject* THPModule_toDLPackImpl(\n PyObject* self,\n PyObject* args,\n PyObject* kwargs) {\n HANDLE_TH_ERRORS\n TORCH_CHECK(THPVariable_Check(data), \"data must be a Tensor\");\n auto tensor = at::DLPackTraits::toDLPack(THPVariable_Unpack(data));\n return PyCapsule_New(\n tensor, at::DLPackTraits::capsule, DLPack_Capsule_Destructor);\n static torch::PythonArgParser parser(\n {\"_to_dlpack(Tensor data, *, IntArrayRef? dl_device=None, bool? copy=None)\"});\n torch::ParsedArgs<3> parsed_args{};\n auto r = parser.parse(args, kwargs, parsed_args);\n\n if (r.idx == 0) {\n auto data = r.tensor(0);\n auto dl_device = r.intlist(1);\n auto copy = r.toBoolOptional(2);\n\n // Parse the int list into a tuple.\n std::optional optional_dl_device;\n\n if (!dl_device.empty()) {\n TORCH_CHECK(\n dl_device.size() == 2,\n \"dl_device must be either None or a tuple of ints\");\n optional_dl_device = DLDevice{\n static_cast(dl_device[0]),\n static_cast(dl_device[1])};\n }\n\n auto tensor = at::DLPackTraits::toDLPack(\n at::maybeCopyTensor(data, optional_dl_device, copy));\n return PyCapsule_New(\n tensor, at::DLPackTraits::capsule, DLPack_Capsule_Destructor);\n }\n\n return nullptr;", + "repo_full_name": "pytorch/pytorch", + "discussion_comments": [ + { + "comment_id": "2089799016", + "repo_full_name": "pytorch/pytorch", + "pr_number": 150218, + "pr_file": "torch/csrc/Module.cpp", + "discussion_id": "2089799016", + "commented_code": "@@ -607,25 +607,57 @@ void DLPack_Capsule_Destructor(PyObject* data) {\n }\n \n template \n-PyObject* THPModule_toDLPackImpl(PyObject* _unused, PyObject* data) {\n+PyObject* THPModule_toDLPackImpl(\n+ PyObject* self,\n+ PyObject* args,\n+ PyObject* kwargs) {\n HANDLE_TH_ERRORS\n- TORCH_CHECK(THPVariable_Check(data), \"data must be a Tensor\");\n- auto tensor = at::DLPackTraits::toDLPack(THPVariable_Unpack(data));\n- return PyCapsule_New(\n- tensor, at::DLPackTraits::capsule, DLPack_Capsule_Destructor);\n+ static torch::PythonArgParser parser(\n+ {\"_to_dlpack(Tensor data, *, IntArrayRef? dl_device=None, bool? copy=None)\"});\n+ torch::ParsedArgs<3> parsed_args{};\n+ auto r = parser.parse(args, kwargs, parsed_args);\n+\n+ if (r.idx == 0) {\n+ auto data = r.tensor(0);\n+ auto dl_device = r.intlist(1);\n+ auto copy = r.toBoolOptional(2);\n+\n+ // Parse the int list into a tuple.\n+ std::optional optional_dl_device;\n+\n+ if (!dl_device.empty()) {\n+ TORCH_CHECK(\n+ dl_device.size() == 2,\n+ \"dl_device must be either None or a tuple of ints\");\n+ optional_dl_device = DLDevice{\n+ static_cast(dl_device[0]),\n+ static_cast(dl_device[1])};\n+ }\n+\n+ auto tensor = at::DLPackTraits::toDLPack(\n+ at::maybeCopyTensor(data, optional_dl_device, copy));\n+ return PyCapsule_New(\n+ tensor, at::DLPackTraits::capsule, DLPack_Capsule_Destructor);\n+ }\n+\n+ return nullptr;", + "comment_created_at": "2025-05-14T21:44:38+00:00", + "comment_author": "albanD", + "comment_body": "I'm don't recall, will this have the right python err set?", + "pr_file_module": null + }, + { + "comment_id": "2105839664", + "repo_full_name": "pytorch/pytorch", + "pr_number": 150218, + "pr_file": "torch/csrc/Module.cpp", + "discussion_id": "2089799016", + "commented_code": "@@ -607,25 +607,57 @@ void DLPack_Capsule_Destructor(PyObject* data) {\n }\n \n template \n-PyObject* THPModule_toDLPackImpl(PyObject* _unused, PyObject* data) {\n+PyObject* THPModule_toDLPackImpl(\n+ PyObject* self,\n+ PyObject* args,\n+ PyObject* kwargs) {\n HANDLE_TH_ERRORS\n- TORCH_CHECK(THPVariable_Check(data), \"data must be a Tensor\");\n- auto tensor = at::DLPackTraits::toDLPack(THPVariable_Unpack(data));\n- return PyCapsule_New(\n- tensor, at::DLPackTraits::capsule, DLPack_Capsule_Destructor);\n+ static torch::PythonArgParser parser(\n+ {\"_to_dlpack(Tensor data, *, IntArrayRef? dl_device=None, bool? copy=None)\"});\n+ torch::ParsedArgs<3> parsed_args{};\n+ auto r = parser.parse(args, kwargs, parsed_args);\n+\n+ if (r.idx == 0) {\n+ auto data = r.tensor(0);\n+ auto dl_device = r.intlist(1);\n+ auto copy = r.toBoolOptional(2);\n+\n+ // Parse the int list into a tuple.\n+ std::optional optional_dl_device;\n+\n+ if (!dl_device.empty()) {\n+ TORCH_CHECK(\n+ dl_device.size() == 2,\n+ \"dl_device must be either None or a tuple of ints\");\n+ optional_dl_device = DLDevice{\n+ static_cast(dl_device[0]),\n+ static_cast(dl_device[1])};\n+ }\n+\n+ auto tensor = at::DLPackTraits::toDLPack(\n+ at::maybeCopyTensor(data, optional_dl_device, copy));\n+ return PyCapsule_New(\n+ tensor, at::DLPackTraits::capsule, DLPack_Capsule_Destructor);\n+ }\n+\n+ return nullptr;", + "comment_created_at": "2025-05-24T14:26:49+00:00", + "comment_author": "ysiraichi", + "comment_body": "I think so. At least, it will throw an error inside the parser. I will replace it with `Py_RETURN_NONE`, just for consistency.", + "pr_file_module": null + }, + { + "comment_id": "2147035848", + "repo_full_name": "pytorch/pytorch", + "pr_number": 150218, + "pr_file": "torch/csrc/Module.cpp", + "discussion_id": "2089799016", + "commented_code": "@@ -607,25 +607,57 @@ void DLPack_Capsule_Destructor(PyObject* data) {\n }\n \n template \n-PyObject* THPModule_toDLPackImpl(PyObject* _unused, PyObject* data) {\n+PyObject* THPModule_toDLPackImpl(\n+ PyObject* self,\n+ PyObject* args,\n+ PyObject* kwargs) {\n HANDLE_TH_ERRORS\n- TORCH_CHECK(THPVariable_Check(data), \"data must be a Tensor\");\n- auto tensor = at::DLPackTraits::toDLPack(THPVariable_Unpack(data));\n- return PyCapsule_New(\n- tensor, at::DLPackTraits::capsule, DLPack_Capsule_Destructor);\n+ static torch::PythonArgParser parser(\n+ {\"_to_dlpack(Tensor data, *, IntArrayRef? dl_device=None, bool? copy=None)\"});\n+ torch::ParsedArgs<3> parsed_args{};\n+ auto r = parser.parse(args, kwargs, parsed_args);\n+\n+ if (r.idx == 0) {\n+ auto data = r.tensor(0);\n+ auto dl_device = r.intlist(1);\n+ auto copy = r.toBoolOptional(2);\n+\n+ // Parse the int list into a tuple.\n+ std::optional optional_dl_device;\n+\n+ if (!dl_device.empty()) {\n+ TORCH_CHECK(\n+ dl_device.size() == 2,\n+ \"dl_device must be either None or a tuple of ints\");\n+ optional_dl_device = DLDevice{\n+ static_cast(dl_device[0]),\n+ static_cast(dl_device[1])};\n+ }\n+\n+ auto tensor = at::DLPackTraits::toDLPack(\n+ at::maybeCopyTensor(data, optional_dl_device, copy));\n+ return PyCapsule_New(\n+ tensor, at::DLPackTraits::capsule, DLPack_Capsule_Destructor);\n+ }\n+\n+ return nullptr;", + "comment_created_at": "2025-06-14T17:36:14+00:00", + "comment_author": "ysiraichi", + "comment_body": "I think I hadn't understood you question earlier. If you are asking whether a C++ exception will be mapped the correct Python error set, the answer is: yes! `END_HANDLE_TH_ERRORS` will take care of that. Specifically, `torch::translate_exception_to_python(std::current_exception())` call will do the job.", + "pr_file_module": null + }, + { + "comment_id": "2150371678", + "repo_full_name": "pytorch/pytorch", + "pr_number": 150218, + "pr_file": "torch/csrc/Module.cpp", + "discussion_id": "2089799016", + "commented_code": "@@ -607,25 +607,57 @@ void DLPack_Capsule_Destructor(PyObject* data) {\n }\n \n template \n-PyObject* THPModule_toDLPackImpl(PyObject* _unused, PyObject* data) {\n+PyObject* THPModule_toDLPackImpl(\n+ PyObject* self,\n+ PyObject* args,\n+ PyObject* kwargs) {\n HANDLE_TH_ERRORS\n- TORCH_CHECK(THPVariable_Check(data), \"data must be a Tensor\");\n- auto tensor = at::DLPackTraits::toDLPack(THPVariable_Unpack(data));\n- return PyCapsule_New(\n- tensor, at::DLPackTraits::capsule, DLPack_Capsule_Destructor);\n+ static torch::PythonArgParser parser(\n+ {\"_to_dlpack(Tensor data, *, IntArrayRef? dl_device=None, bool? copy=None)\"});\n+ torch::ParsedArgs<3> parsed_args{};\n+ auto r = parser.parse(args, kwargs, parsed_args);\n+\n+ if (r.idx == 0) {\n+ auto data = r.tensor(0);\n+ auto dl_device = r.intlist(1);\n+ auto copy = r.toBoolOptional(2);\n+\n+ // Parse the int list into a tuple.\n+ std::optional optional_dl_device;\n+\n+ if (!dl_device.empty()) {\n+ TORCH_CHECK(\n+ dl_device.size() == 2,\n+ \"dl_device must be either None or a tuple of ints\");\n+ optional_dl_device = DLDevice{\n+ static_cast(dl_device[0]),\n+ static_cast(dl_device[1])};\n+ }\n+\n+ auto tensor = at::DLPackTraits::toDLPack(\n+ at::maybeCopyTensor(data, optional_dl_device, copy));\n+ return PyCapsule_New(\n+ tensor, at::DLPackTraits::capsule, DLPack_Capsule_Destructor);\n+ }\n+\n+ return nullptr;", + "comment_created_at": "2025-06-16T16:06:38+00:00", + "comment_author": "albanD", + "comment_body": "Returning None here is completely different. These are in no way interchangeable.\r\nReturning `nullptr` from these APIs mean that something went wrong and the caller should check the globally set error for more info. Returning None means that all went well and the result is \"None\".\r\nYou either want one or the other :D \r\nOr maybe you're saying this is dead code?", + "pr_file_module": null + }, + { + "comment_id": "2158931025", + "repo_full_name": "pytorch/pytorch", + "pr_number": 150218, + "pr_file": "torch/csrc/Module.cpp", + "discussion_id": "2089799016", + "commented_code": "@@ -607,25 +607,57 @@ void DLPack_Capsule_Destructor(PyObject* data) {\n }\n \n template \n-PyObject* THPModule_toDLPackImpl(PyObject* _unused, PyObject* data) {\n+PyObject* THPModule_toDLPackImpl(\n+ PyObject* self,\n+ PyObject* args,\n+ PyObject* kwargs) {\n HANDLE_TH_ERRORS\n- TORCH_CHECK(THPVariable_Check(data), \"data must be a Tensor\");\n- auto tensor = at::DLPackTraits::toDLPack(THPVariable_Unpack(data));\n- return PyCapsule_New(\n- tensor, at::DLPackTraits::capsule, DLPack_Capsule_Destructor);\n+ static torch::PythonArgParser parser(\n+ {\"_to_dlpack(Tensor data, *, IntArrayRef? dl_device=None, bool? copy=None)\"});\n+ torch::ParsedArgs<3> parsed_args{};\n+ auto r = parser.parse(args, kwargs, parsed_args);\n+\n+ if (r.idx == 0) {\n+ auto data = r.tensor(0);\n+ auto dl_device = r.intlist(1);\n+ auto copy = r.toBoolOptional(2);\n+\n+ // Parse the int list into a tuple.\n+ std::optional optional_dl_device;\n+\n+ if (!dl_device.empty()) {\n+ TORCH_CHECK(\n+ dl_device.size() == 2,\n+ \"dl_device must be either None or a tuple of ints\");\n+ optional_dl_device = DLDevice{\n+ static_cast(dl_device[0]),\n+ static_cast(dl_device[1])};\n+ }\n+\n+ auto tensor = at::DLPackTraits::toDLPack(\n+ at::maybeCopyTensor(data, optional_dl_device, copy));\n+ return PyCapsule_New(\n+ tensor, at::DLPackTraits::capsule, DLPack_Capsule_Destructor);\n+ }\n+\n+ return nullptr;", + "comment_created_at": "2025-06-20T13:08:36+00:00", + "comment_author": "ysiraichi", + "comment_body": "This is essentially deadcode.\r\nOn second thoughts, it would be better to just `TORCH_INTERNAL_ASSERT(r.idx == 0);`. Then, there would be no need to return None, since the bug would be in the arg parser.", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/pytorch-preserve-error-context.md b/_reviewers/pytorch-preserve-error-context.md index 3921867..d638070 100644 --- a/_reviewers/pytorch-preserve-error-context.md +++ b/_reviewers/pytorch-preserve-error-context.md @@ -36,193 +36,3 @@ TORCH_CHECK( ``` This approach ensures that when errors occur, developers have the information they need to understand and fix the issue quickly. - - -[ - { - "discussion_id": "2110667766", - "pr_number": 152023, - "pr_file": "c10/cuda/CUDAException.cpp", - "created_at": "2025-05-28T01:27:54+00:00", - "commented_code": "\"Device-side assertions were explicitly omitted for this error check; the error probably arose while initializing the DSA handlers.\");\n }\n#endif\n\n TORCH_CHECK(false, check_message);\n throw c10::DeviceError(err, check_message);", - "repo_full_name": "pytorch/pytorch", - "discussion_comments": [ - { - "comment_id": "2110667766", - "repo_full_name": "pytorch/pytorch", - "pr_number": 152023, - "pr_file": "c10/cuda/CUDAException.cpp", - "discussion_id": "2110667766", - "commented_code": "@@ -38,8 +38,7 @@ void c10_cuda_check_implementation(\n \"Device-side assertions were explicitly omitted for this error check; the error probably arose while initializing the DSA handlers.\");\n }\n #endif\n-\n- TORCH_CHECK(false, check_message);\n+ throw c10::DeviceError(err, check_message);", - "comment_created_at": "2025-05-28T01:27:54+00:00", - "comment_author": "ngimel", - "comment_body": "TORCH_CHECK also provides information about the callsite that this will lose? https://github.com/pytorch/pytorch/blob/e9e1aacef8913bc3baabe502e2858b776e6aea3f/c10/util/Exception.cpp#L109", - "pr_file_module": null - }, - { - "comment_id": "2110685549", - "repo_full_name": "pytorch/pytorch", - "pr_number": 152023, - "pr_file": "c10/cuda/CUDAException.cpp", - "discussion_id": "2110667766", - "commented_code": "@@ -38,8 +38,7 @@ void c10_cuda_check_implementation(\n \"Device-side assertions were explicitly omitted for this error check; the error probably arose while initializing the DSA handlers.\");\n }\n #endif\n-\n- TORCH_CHECK(false, check_message);\n+ throw c10::DeviceError(err, check_message);", - "comment_created_at": "2025-05-28T01:41:55+00:00", - "comment_author": "ngimel", - "comment_body": "TORCH_CHECK_WITH(DeviceError, false, message)", - "pr_file_module": null - }, - { - "comment_id": "2110945768", - "repo_full_name": "pytorch/pytorch", - "pr_number": 152023, - "pr_file": "c10/cuda/CUDAException.cpp", - "discussion_id": "2110667766", - "commented_code": "@@ -38,8 +38,7 @@ void c10_cuda_check_implementation(\n \"Device-side assertions were explicitly omitted for this error check; the error probably arose while initializing the DSA handlers.\");\n }\n #endif\n-\n- TORCH_CHECK(false, check_message);\n+ throw c10::DeviceError(err, check_message);", - "comment_created_at": "2025-05-28T05:24:01+00:00", - "comment_author": "mradmila", - "comment_body": "Actually, wouldn't that leave \"err\" behind? We want all of it, no?", - "pr_file_module": null - }, - { - "comment_id": "2110961413", - "repo_full_name": "pytorch/pytorch", - "pr_number": 152023, - "pr_file": "c10/cuda/CUDAException.cpp", - "discussion_id": "2110667766", - "commented_code": "@@ -38,8 +38,7 @@ void c10_cuda_check_implementation(\n \"Device-side assertions were explicitly omitted for this error check; the error probably arose while initializing the DSA handlers.\");\n }\n #endif\n-\n- TORCH_CHECK(false, check_message);\n+ throw c10::DeviceError(err, check_message);", - "comment_created_at": "2025-05-28T05:36:26+00:00", - "comment_author": "mradmila", - "comment_body": "Also, I hope we don't strip error messages on the use side?", - "pr_file_module": null - }, - { - "comment_id": "2111059460", - "repo_full_name": "pytorch/pytorch", - "pr_number": 152023, - "pr_file": "c10/cuda/CUDAException.cpp", - "discussion_id": "2110667766", - "commented_code": "@@ -38,8 +38,7 @@ void c10_cuda_check_implementation(\n \"Device-side assertions were explicitly omitted for this error check; the error probably arose while initializing the DSA handlers.\");\n }\n #endif\n-\n- TORCH_CHECK(false, check_message);\n+ throw c10::DeviceError(err, check_message);", - "comment_created_at": "2025-05-28T06:45:02+00:00", - "comment_author": "malfet", - "comment_body": "@ngimel TORCH_CHECK will not preserve the exist code. And callsite was already stripped in the existing code (it was passed as arguments to that function, but ignored) I can add it back as prerequisite/followup PR", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2089785335", - "pr_number": 150218, - "pr_file": "torch/csrc/Module.cpp", - "created_at": "2025-05-14T21:31:27+00:00", - "commented_code": "END_HANDLE_TH_ERRORS\n}\n\nstatic PyObject* THPModule_torchDeviceToDLDevice(\n PyObject* _unused,\n PyObject* data) {\n HANDLE_TH_ERRORS\n TORCH_CHECK(\n THPDevice_Check(data),\n \"torchDeviceToDLDevice: expected torch.device argument.\");\n auto device = reinterpret_cast(data)->device;\n auto dl_device = at::torchDeviceToDLDevice(device);\n auto tuple = PyTuple_New(2);", - "repo_full_name": "pytorch/pytorch", - "discussion_comments": [ - { - "comment_id": "2089785335", - "repo_full_name": "pytorch/pytorch", - "pr_number": 150218, - "pr_file": "torch/csrc/Module.cpp", - "discussion_id": "2089785335", - "commented_code": "@@ -636,6 +668,22 @@ static PyObject* THPModule_fromDLPack(PyObject* _unused, PyObject* data) {\n END_HANDLE_TH_ERRORS\n }\n \n+static PyObject* THPModule_torchDeviceToDLDevice(\n+ PyObject* _unused,\n+ PyObject* data) {\n+ HANDLE_TH_ERRORS\n+ TORCH_CHECK(\n+ THPDevice_Check(data),\n+ \"torchDeviceToDLDevice: expected torch.device argument.\");\n+ auto device = reinterpret_cast(data)->device;\n+ auto dl_device = at::torchDeviceToDLDevice(device);\n+ auto tuple = PyTuple_New(2);", - "comment_created_at": "2025-05-14T21:31:27+00:00", - "comment_author": "albanD", - "comment_body": "Add error checking in case this failed", - "pr_file_module": null - }, - { - "comment_id": "2105840117", - "repo_full_name": "pytorch/pytorch", - "pr_number": 150218, - "pr_file": "torch/csrc/Module.cpp", - "discussion_id": "2089785335", - "commented_code": "@@ -636,6 +668,22 @@ static PyObject* THPModule_fromDLPack(PyObject* _unused, PyObject* data) {\n END_HANDLE_TH_ERRORS\n }\n \n+static PyObject* THPModule_torchDeviceToDLDevice(\n+ PyObject* _unused,\n+ PyObject* data) {\n+ HANDLE_TH_ERRORS\n+ TORCH_CHECK(\n+ THPDevice_Check(data),\n+ \"torchDeviceToDLDevice: expected torch.device argument.\");\n+ auto device = reinterpret_cast(data)->device;\n+ auto dl_device = at::torchDeviceToDLDevice(device);\n+ auto tuple = PyTuple_New(2);", - "comment_created_at": "2025-05-24T14:28:55+00:00", - "comment_author": "ysiraichi", - "comment_body": "Not sure I get what you mean. If `at::torchDeviceToDLDevice` errors, the error will bubble up to Python, won't it?", - "pr_file_module": null - }, - { - "comment_id": "2150360414", - "repo_full_name": "pytorch/pytorch", - "pr_number": 150218, - "pr_file": "torch/csrc/Module.cpp", - "discussion_id": "2089785335", - "commented_code": "@@ -636,6 +668,22 @@ static PyObject* THPModule_fromDLPack(PyObject* _unused, PyObject* data) {\n END_HANDLE_TH_ERRORS\n }\n \n+static PyObject* THPModule_torchDeviceToDLDevice(\n+ PyObject* _unused,\n+ PyObject* data) {\n+ HANDLE_TH_ERRORS\n+ TORCH_CHECK(\n+ THPDevice_Check(data),\n+ \"torchDeviceToDLDevice: expected torch.device argument.\");\n+ auto device = reinterpret_cast(data)->device;\n+ auto dl_device = at::torchDeviceToDLDevice(device);\n+ auto tuple = PyTuple_New(2);", - "comment_created_at": "2025-06-16T16:00:19+00:00", - "comment_author": "albanD", - "comment_body": "CPython is C API, not C++. There is no error throwing here, you need to check the tuple is not null and if so, make this a c++ error\r\n\r\n```cpp\r\nif (!tuple) {\r\n throw python_error();\r\n}\r\n```", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2089799016", - "pr_number": 150218, - "pr_file": "torch/csrc/Module.cpp", - "created_at": "2025-05-14T21:44:38+00:00", - "commented_code": "}\n\ntemplate \nPyObject* THPModule_toDLPackImpl(PyObject* _unused, PyObject* data) {\nPyObject* THPModule_toDLPackImpl(\n PyObject* self,\n PyObject* args,\n PyObject* kwargs) {\n HANDLE_TH_ERRORS\n TORCH_CHECK(THPVariable_Check(data), \"data must be a Tensor\");\n auto tensor = at::DLPackTraits::toDLPack(THPVariable_Unpack(data));\n return PyCapsule_New(\n tensor, at::DLPackTraits::capsule, DLPack_Capsule_Destructor);\n static torch::PythonArgParser parser(\n {\"_to_dlpack(Tensor data, *, IntArrayRef? dl_device=None, bool? copy=None)\"});\n torch::ParsedArgs<3> parsed_args{};\n auto r = parser.parse(args, kwargs, parsed_args);\n\n if (r.idx == 0) {\n auto data = r.tensor(0);\n auto dl_device = r.intlist(1);\n auto copy = r.toBoolOptional(2);\n\n // Parse the int list into a tuple.\n std::optional optional_dl_device;\n\n if (!dl_device.empty()) {\n TORCH_CHECK(\n dl_device.size() == 2,\n \"dl_device must be either None or a tuple of ints\");\n optional_dl_device = DLDevice{\n static_cast(dl_device[0]),\n static_cast(dl_device[1])};\n }\n\n auto tensor = at::DLPackTraits::toDLPack(\n at::maybeCopyTensor(data, optional_dl_device, copy));\n return PyCapsule_New(\n tensor, at::DLPackTraits::capsule, DLPack_Capsule_Destructor);\n }\n\n return nullptr;", - "repo_full_name": "pytorch/pytorch", - "discussion_comments": [ - { - "comment_id": "2089799016", - "repo_full_name": "pytorch/pytorch", - "pr_number": 150218, - "pr_file": "torch/csrc/Module.cpp", - "discussion_id": "2089799016", - "commented_code": "@@ -607,25 +607,57 @@ void DLPack_Capsule_Destructor(PyObject* data) {\n }\n \n template \n-PyObject* THPModule_toDLPackImpl(PyObject* _unused, PyObject* data) {\n+PyObject* THPModule_toDLPackImpl(\n+ PyObject* self,\n+ PyObject* args,\n+ PyObject* kwargs) {\n HANDLE_TH_ERRORS\n- TORCH_CHECK(THPVariable_Check(data), \"data must be a Tensor\");\n- auto tensor = at::DLPackTraits::toDLPack(THPVariable_Unpack(data));\n- return PyCapsule_New(\n- tensor, at::DLPackTraits::capsule, DLPack_Capsule_Destructor);\n+ static torch::PythonArgParser parser(\n+ {\"_to_dlpack(Tensor data, *, IntArrayRef? dl_device=None, bool? copy=None)\"});\n+ torch::ParsedArgs<3> parsed_args{};\n+ auto r = parser.parse(args, kwargs, parsed_args);\n+\n+ if (r.idx == 0) {\n+ auto data = r.tensor(0);\n+ auto dl_device = r.intlist(1);\n+ auto copy = r.toBoolOptional(2);\n+\n+ // Parse the int list into a tuple.\n+ std::optional optional_dl_device;\n+\n+ if (!dl_device.empty()) {\n+ TORCH_CHECK(\n+ dl_device.size() == 2,\n+ \"dl_device must be either None or a tuple of ints\");\n+ optional_dl_device = DLDevice{\n+ static_cast(dl_device[0]),\n+ static_cast(dl_device[1])};\n+ }\n+\n+ auto tensor = at::DLPackTraits::toDLPack(\n+ at::maybeCopyTensor(data, optional_dl_device, copy));\n+ return PyCapsule_New(\n+ tensor, at::DLPackTraits::capsule, DLPack_Capsule_Destructor);\n+ }\n+\n+ return nullptr;", - "comment_created_at": "2025-05-14T21:44:38+00:00", - "comment_author": "albanD", - "comment_body": "I'm don't recall, will this have the right python err set?", - "pr_file_module": null - }, - { - "comment_id": "2105839664", - "repo_full_name": "pytorch/pytorch", - "pr_number": 150218, - "pr_file": "torch/csrc/Module.cpp", - "discussion_id": "2089799016", - "commented_code": "@@ -607,25 +607,57 @@ void DLPack_Capsule_Destructor(PyObject* data) {\n }\n \n template \n-PyObject* THPModule_toDLPackImpl(PyObject* _unused, PyObject* data) {\n+PyObject* THPModule_toDLPackImpl(\n+ PyObject* self,\n+ PyObject* args,\n+ PyObject* kwargs) {\n HANDLE_TH_ERRORS\n- TORCH_CHECK(THPVariable_Check(data), \"data must be a Tensor\");\n- auto tensor = at::DLPackTraits::toDLPack(THPVariable_Unpack(data));\n- return PyCapsule_New(\n- tensor, at::DLPackTraits::capsule, DLPack_Capsule_Destructor);\n+ static torch::PythonArgParser parser(\n+ {\"_to_dlpack(Tensor data, *, IntArrayRef? dl_device=None, bool? copy=None)\"});\n+ torch::ParsedArgs<3> parsed_args{};\n+ auto r = parser.parse(args, kwargs, parsed_args);\n+\n+ if (r.idx == 0) {\n+ auto data = r.tensor(0);\n+ auto dl_device = r.intlist(1);\n+ auto copy = r.toBoolOptional(2);\n+\n+ // Parse the int list into a tuple.\n+ std::optional optional_dl_device;\n+\n+ if (!dl_device.empty()) {\n+ TORCH_CHECK(\n+ dl_device.size() == 2,\n+ \"dl_device must be either None or a tuple of ints\");\n+ optional_dl_device = DLDevice{\n+ static_cast(dl_device[0]),\n+ static_cast(dl_device[1])};\n+ }\n+\n+ auto tensor = at::DLPackTraits::toDLPack(\n+ at::maybeCopyTensor(data, optional_dl_device, copy));\n+ return PyCapsule_New(\n+ tensor, at::DLPackTraits::capsule, DLPack_Capsule_Destructor);\n+ }\n+\n+ return nullptr;", - "comment_created_at": "2025-05-24T14:26:49+00:00", - "comment_author": "ysiraichi", - "comment_body": "I think so. At least, it will throw an error inside the parser. I will replace it with `Py_RETURN_NONE`, just for consistency.", - "pr_file_module": null - }, - { - "comment_id": "2147035848", - "repo_full_name": "pytorch/pytorch", - "pr_number": 150218, - "pr_file": "torch/csrc/Module.cpp", - "discussion_id": "2089799016", - "commented_code": "@@ -607,25 +607,57 @@ void DLPack_Capsule_Destructor(PyObject* data) {\n }\n \n template \n-PyObject* THPModule_toDLPackImpl(PyObject* _unused, PyObject* data) {\n+PyObject* THPModule_toDLPackImpl(\n+ PyObject* self,\n+ PyObject* args,\n+ PyObject* kwargs) {\n HANDLE_TH_ERRORS\n- TORCH_CHECK(THPVariable_Check(data), \"data must be a Tensor\");\n- auto tensor = at::DLPackTraits::toDLPack(THPVariable_Unpack(data));\n- return PyCapsule_New(\n- tensor, at::DLPackTraits::capsule, DLPack_Capsule_Destructor);\n+ static torch::PythonArgParser parser(\n+ {\"_to_dlpack(Tensor data, *, IntArrayRef? dl_device=None, bool? copy=None)\"});\n+ torch::ParsedArgs<3> parsed_args{};\n+ auto r = parser.parse(args, kwargs, parsed_args);\n+\n+ if (r.idx == 0) {\n+ auto data = r.tensor(0);\n+ auto dl_device = r.intlist(1);\n+ auto copy = r.toBoolOptional(2);\n+\n+ // Parse the int list into a tuple.\n+ std::optional optional_dl_device;\n+\n+ if (!dl_device.empty()) {\n+ TORCH_CHECK(\n+ dl_device.size() == 2,\n+ \"dl_device must be either None or a tuple of ints\");\n+ optional_dl_device = DLDevice{\n+ static_cast(dl_device[0]),\n+ static_cast(dl_device[1])};\n+ }\n+\n+ auto tensor = at::DLPackTraits::toDLPack(\n+ at::maybeCopyTensor(data, optional_dl_device, copy));\n+ return PyCapsule_New(\n+ tensor, at::DLPackTraits::capsule, DLPack_Capsule_Destructor);\n+ }\n+\n+ return nullptr;", - "comment_created_at": "2025-06-14T17:36:14+00:00", - "comment_author": "ysiraichi", - "comment_body": "I think I hadn't understood you question earlier. If you are asking whether a C++ exception will be mapped the correct Python error set, the answer is: yes! `END_HANDLE_TH_ERRORS` will take care of that. Specifically, `torch::translate_exception_to_python(std::current_exception())` call will do the job.", - "pr_file_module": null - }, - { - "comment_id": "2150371678", - "repo_full_name": "pytorch/pytorch", - "pr_number": 150218, - "pr_file": "torch/csrc/Module.cpp", - "discussion_id": "2089799016", - "commented_code": "@@ -607,25 +607,57 @@ void DLPack_Capsule_Destructor(PyObject* data) {\n }\n \n template \n-PyObject* THPModule_toDLPackImpl(PyObject* _unused, PyObject* data) {\n+PyObject* THPModule_toDLPackImpl(\n+ PyObject* self,\n+ PyObject* args,\n+ PyObject* kwargs) {\n HANDLE_TH_ERRORS\n- TORCH_CHECK(THPVariable_Check(data), \"data must be a Tensor\");\n- auto tensor = at::DLPackTraits::toDLPack(THPVariable_Unpack(data));\n- return PyCapsule_New(\n- tensor, at::DLPackTraits::capsule, DLPack_Capsule_Destructor);\n+ static torch::PythonArgParser parser(\n+ {\"_to_dlpack(Tensor data, *, IntArrayRef? dl_device=None, bool? copy=None)\"});\n+ torch::ParsedArgs<3> parsed_args{};\n+ auto r = parser.parse(args, kwargs, parsed_args);\n+\n+ if (r.idx == 0) {\n+ auto data = r.tensor(0);\n+ auto dl_device = r.intlist(1);\n+ auto copy = r.toBoolOptional(2);\n+\n+ // Parse the int list into a tuple.\n+ std::optional optional_dl_device;\n+\n+ if (!dl_device.empty()) {\n+ TORCH_CHECK(\n+ dl_device.size() == 2,\n+ \"dl_device must be either None or a tuple of ints\");\n+ optional_dl_device = DLDevice{\n+ static_cast(dl_device[0]),\n+ static_cast(dl_device[1])};\n+ }\n+\n+ auto tensor = at::DLPackTraits::toDLPack(\n+ at::maybeCopyTensor(data, optional_dl_device, copy));\n+ return PyCapsule_New(\n+ tensor, at::DLPackTraits::capsule, DLPack_Capsule_Destructor);\n+ }\n+\n+ return nullptr;", - "comment_created_at": "2025-06-16T16:06:38+00:00", - "comment_author": "albanD", - "comment_body": "Returning None here is completely different. These are in no way interchangeable.\r\nReturning `nullptr` from these APIs mean that something went wrong and the caller should check the globally set error for more info. Returning None means that all went well and the result is \"None\".\r\nYou either want one or the other :D \r\nOr maybe you're saying this is dead code?", - "pr_file_module": null - }, - { - "comment_id": "2158931025", - "repo_full_name": "pytorch/pytorch", - "pr_number": 150218, - "pr_file": "torch/csrc/Module.cpp", - "discussion_id": "2089799016", - "commented_code": "@@ -607,25 +607,57 @@ void DLPack_Capsule_Destructor(PyObject* data) {\n }\n \n template \n-PyObject* THPModule_toDLPackImpl(PyObject* _unused, PyObject* data) {\n+PyObject* THPModule_toDLPackImpl(\n+ PyObject* self,\n+ PyObject* args,\n+ PyObject* kwargs) {\n HANDLE_TH_ERRORS\n- TORCH_CHECK(THPVariable_Check(data), \"data must be a Tensor\");\n- auto tensor = at::DLPackTraits::toDLPack(THPVariable_Unpack(data));\n- return PyCapsule_New(\n- tensor, at::DLPackTraits::capsule, DLPack_Capsule_Destructor);\n+ static torch::PythonArgParser parser(\n+ {\"_to_dlpack(Tensor data, *, IntArrayRef? dl_device=None, bool? copy=None)\"});\n+ torch::ParsedArgs<3> parsed_args{};\n+ auto r = parser.parse(args, kwargs, parsed_args);\n+\n+ if (r.idx == 0) {\n+ auto data = r.tensor(0);\n+ auto dl_device = r.intlist(1);\n+ auto copy = r.toBoolOptional(2);\n+\n+ // Parse the int list into a tuple.\n+ std::optional optional_dl_device;\n+\n+ if (!dl_device.empty()) {\n+ TORCH_CHECK(\n+ dl_device.size() == 2,\n+ \"dl_device must be either None or a tuple of ints\");\n+ optional_dl_device = DLDevice{\n+ static_cast(dl_device[0]),\n+ static_cast(dl_device[1])};\n+ }\n+\n+ auto tensor = at::DLPackTraits::toDLPack(\n+ at::maybeCopyTensor(data, optional_dl_device, copy));\n+ return PyCapsule_New(\n+ tensor, at::DLPackTraits::capsule, DLPack_Capsule_Destructor);\n+ }\n+\n+ return nullptr;", - "comment_created_at": "2025-06-20T13:08:36+00:00", - "comment_author": "ysiraichi", - "comment_body": "This is essentially deadcode.\r\nOn second thoughts, it would be better to just `TORCH_INTERNAL_ASSERT(r.idx == 0);`. Then, there would be no need to return None, since the bug would be in the arg parser.", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/pytorch-prevent-memory-vulnerabilities.json b/_reviewers/pytorch-prevent-memory-vulnerabilities.json new file mode 100644 index 0000000..5d7aff3 --- /dev/null +++ b/_reviewers/pytorch-prevent-memory-vulnerabilities.json @@ -0,0 +1,58 @@ +[ + { + "discussion_id": "2012679126", + "pr_number": 148605, + "pr_file": "aten/src/ATen/native/cuda/layer_norm_kernel.cu", + "created_at": "2025-03-25T18:10:26+00:00", + "commented_code": "}\n}\n\n\ntemplate \n__global__ void GammaBetaBackwardSimpleCUDAKernel(\ntemplate \n__device__\n__forceinline__\nvoid\nblockReduceGammaBetaBackwardsHelper(\n int64_t M_start,\n int64_t M,\n int64_t N,\n const T* dY,\n const T* X,\n const T_ACC* mean,\n const T_ACC* rstd,\n T* dg,\n T* db) {\n const int64_t j = ((int64_t) blockIdx.x) * blockDim.x + threadIdx.x;\n if (j < N) {\n T_ACC sum1 = 0;\n T_ACC sum2 = 0;\n for (int64_t i = 0; i < M; ++i) {\n const int64_t index = i * N + j;\n sum1 += dg == nullptr ? T_ACC(0)\n : static_cast(dY[index]) *\n (static_cast(X[index]) - static_cast(mean[i])) *\n static_cast(rstd[i]);\n sum2 += db == nullptr ? T_ACC(0) : static_cast(dY[index]);\n const T* __restrict__ dY,\n const T* __restrict__ X,\n const T_ACC* __restrict__ mean,\n const T_ACC* __restrict__ rstd,\n T* __restrict__ dg,\n T* __restrict__ db,\n T_ACC &dg_sum,\n T_ACC &db_sum\n) {\n constexpr int rows_per_thread_y = rows_per_block_y / block_dim_y;\n int64_t thread_x = blockIdx.x * block_dim_x + threadIdx.x;\n\n int lane_id = (threadIdx.y * blockDim.x + threadIdx.x) & (kWarpSize - 1);\n int64_t mean_index = M_start + threadIdx.y * rows_per_thread_y;\n T_ACC warp_mean = 0, warp_rstd = 0;\n if (lane_id < rows_per_thread_y && mean_index + lane_id < M) {\n warp_mean = mean[mean_index + lane_id];\n warp_rstd = rstd[mean_index + lane_id];\n }\n if (dg != nullptr) {\n dg[j] = sum1;\n // We do a WARP_SYNC() here because we use WARP_SHFL below to access\n // warp_mean and warp_rstd.\n WARP_SYNC();\n\n T_ACC dY_regs[rows_per_thread_y] = {0};\n T_ACC X_regs[rows_per_thread_y] = {0};\n #pragma unroll\n for (int i = 0; i < rows_per_thread_y; ++i) {\n int64_t current_y = M_start + threadIdx.y * rows_per_thread_y + i;\n bool active = true;\n if (check_x && thread_x >= N) {\n active = false;\n }\n if (check_y && current_y >= M) {\n active = false;\n }\n if (active) {\n dY_regs[i] = dY[current_y * N + thread_x];\n X_regs[i] = X[current_y * N + thread_x];\n }\n }\n if (db != nullptr) {\n db[j] = sum2;\n\n #pragma unroll\n for (int i = 0; i < rows_per_thread_y; ++i) {\n T_ACC mean_reg = WARP_SHFL(warp_mean, i, kWarpSize);\n T_ACC rstd_reg = WARP_SHFL(warp_rstd, i, kWarpSize);\n dg_sum += dY_regs[i] * (X_regs[i] - mean_reg) * rstd_reg;\n db_sum += dY_regs[i];\n }\n }\n}\n\n// This implementation gets called if M and N divide with 32. This case should\n// be the most common. We can then make better use of warp level intrinsics\n// to improve performance.\ntemplate \n__device__\n__forceinline__\nvoid\nblockReduceGammaBetaBackwardsWithChecks(\n int64_t M,\n int64_t N,\n const T* __restrict__ dY,\n const T* __restrict__ X,\n const T_ACC* __restrict__ mean,\n const T_ACC* __restrict__ rstd,\n T* __restrict__ dg,\n T* __restrict__ db,\n T_ACC &dg_sum,\n T_ACC &db_sum\n) {\n for (int64_t M_start = blockIdx.y * rows_per_block_y;\n M_start < M;\n M_start += rows_per_block_y * gridDim.y) {\n int64_t M_end = M_start + rows_per_block_y - 1;\n if (!check_y || M_end < M) {\n blockReduceGammaBetaBackwardsHelper\n (M_start, M, N, dY, X, mean, rstd, dg, db, dg_sum, db_sum);\n } else {\n blockReduceGammaBetaBackwardsHelper\n (M_start, M, N, dY, X, mean, rstd, dg, db, dg_sum, db_sum);\n }\n }\n}\n\ntemplate \n__global__ void GammaBetaBackwardCUDAKernel_32x32(\n// block_dim_x is the number of threads in the x dimension per block.\n// block_dim_y is the number of threads in the y dimension per block.\n// rows_per_block_y is the size of the tile (number of data elements)\n// in the y dimension per block.\n// partial_reduction indicates whether we need to reduce across threads\n// or not. If set to true, we will not reduce across threads. This can\n// be faster in the M >> N case but requires another kernel to do a full\n// final reduction.\n// aligned_grid means the data size is a multiple of tile size. In that\n// case we don't need to check for boundary conditions which can provide\n// a further speedup by not needing instructions to check for edge cases\n// and not needing predicate registers.\ntemplate \n__global__\nvoid\n GammaBetaBackwardCUDAKernelTemplate(\n int64_t M,\n int64_t N,\n const T* dY,\n const T* X,\n const T_ACC* mean,\n const T_ACC* rstd,\n T* dg,\n T* db) {\n alignas(sizeof(double)) extern __shared__ char s_data1[];\n T_ACC* s_data_typed = reinterpret_cast(&s_data1);\n T_ACC* s_dg;\n T_ACC* s_db;\n const T* __restrict__ dY,\n const T* __restrict__ X,\n const T_ACC* __restrict__ mean,\n const T_ACC* __restrict__ rstd,\n T* __restrict__ dg,\n T* __restrict__ db) {\n // This assert is a compile-time check only.\n constexpr int rows_per_thread_y = rows_per_block_y / block_dim_y;\n static_assert(rows_per_thread_y <= kWarpSize);\n\n T_ACC dg_sum = 0;\n T_ACC db_sum = 0;\n\n const int64_t j = ((int64_t) blockIdx.x) * blockDim.x + threadIdx.x;\n\n if (j < N) {\n constexpr int unroll_factor = 8;\n int laneId = threadIdx.x & (C10_WARP_SIZE - 1);\n\n T_ACC mean_reg, mean_reg_tmp;\n T_ACC rstd_reg, rstd_reg_tmp;\n T dY_reg;\n T X_reg;\n\n // Main loop\n int bcounter;\n for (bcounter = 0; bcounter < M / (blockDim.y * unroll_factor);\n bcounter++) {\n int offset = (bcounter * blockDim.y + threadIdx.y) * unroll_factor;\n if (aligned_grid) {\n // When N and M align perfectly with block_dim_x and block_dim_y, we\n // can skip boundary condition checks that waste instruction issue slots.\n blockReduceGammaBetaBackwardsWithChecks\n \n (M, N, dY, X, mean, rstd, dg, db, dg_sum, db_sum);\n } else {\n // In the general case we need to check boundary conditions in the M\n // dimension. However, we can still avoid boundary checks in the N dimension\n // for the inner blocks. So try to avoid those checks when possible.\n if (blockIdx.x * block_dim_x + block_dim_x - 1 < N) {\n blockReduceGammaBetaBackwardsWithChecks\n \n (M, N, dY, X, mean, rstd, dg, db, dg_sum, db_sum);\n } else {\n blockReduceGammaBetaBackwardsWithChecks\n \n (M, N, dY, X, mean, rstd, dg, db, dg_sum, db_sum);\n }\n }\n\n if (laneId < unroll_factor) {\n mean_reg_tmp = mean[offset + laneId];\n rstd_reg_tmp = rstd[offset + laneId];\n }\n WARP_SYNC();\n int64_t thread_x = blockIdx.x * block_dim_x + threadIdx.x;", + "repo_full_name": "pytorch/pytorch", + "discussion_comments": [ + { + "comment_id": "2012679126", + "repo_full_name": "pytorch/pytorch", + "pr_number": 148605, + "pr_file": "aten/src/ATen/native/cuda/layer_norm_kernel.cu", + "discussion_id": "2012679126", + "commented_code": "@@ -508,223 +509,362 @@ __global__ void layer_norm_grad_input_kernel_vectorized(\n }\n }\n \n-\n-template \n-__global__ void GammaBetaBackwardSimpleCUDAKernel(\n+template \n+__device__\n+__forceinline__\n+void\n+blockReduceGammaBetaBackwardsHelper(\n+ int64_t M_start,\n int64_t M,\n int64_t N,\n- const T* dY,\n- const T* X,\n- const T_ACC* mean,\n- const T_ACC* rstd,\n- T* dg,\n- T* db) {\n- const int64_t j = ((int64_t) blockIdx.x) * blockDim.x + threadIdx.x;\n- if (j < N) {\n- T_ACC sum1 = 0;\n- T_ACC sum2 = 0;\n- for (int64_t i = 0; i < M; ++i) {\n- const int64_t index = i * N + j;\n- sum1 += dg == nullptr ? T_ACC(0)\n- : static_cast(dY[index]) *\n- (static_cast(X[index]) - static_cast(mean[i])) *\n- static_cast(rstd[i]);\n- sum2 += db == nullptr ? T_ACC(0) : static_cast(dY[index]);\n+ const T* __restrict__ dY,\n+ const T* __restrict__ X,\n+ const T_ACC* __restrict__ mean,\n+ const T_ACC* __restrict__ rstd,\n+ T* __restrict__ dg,\n+ T* __restrict__ db,\n+ T_ACC &dg_sum,\n+ T_ACC &db_sum\n+) {\n+ constexpr int rows_per_thread_y = rows_per_block_y / block_dim_y;\n+ int64_t thread_x = blockIdx.x * block_dim_x + threadIdx.x;\n+\n+ int lane_id = (threadIdx.y * blockDim.x + threadIdx.x) & (kWarpSize - 1);\n+ int64_t mean_index = M_start + threadIdx.y * rows_per_thread_y;\n+ T_ACC warp_mean = 0, warp_rstd = 0;\n+ if (lane_id < rows_per_thread_y && mean_index + lane_id < M) {\n+ warp_mean = mean[mean_index + lane_id];\n+ warp_rstd = rstd[mean_index + lane_id];\n }\n- if (dg != nullptr) {\n- dg[j] = sum1;\n+ // We do a WARP_SYNC() here because we use WARP_SHFL below to access\n+ // warp_mean and warp_rstd.\n+ WARP_SYNC();\n+\n+ T_ACC dY_regs[rows_per_thread_y] = {0};\n+ T_ACC X_regs[rows_per_thread_y] = {0};\n+ #pragma unroll\n+ for (int i = 0; i < rows_per_thread_y; ++i) {\n+ int64_t current_y = M_start + threadIdx.y * rows_per_thread_y + i;\n+ bool active = true;\n+ if (check_x && thread_x >= N) {\n+ active = false;\n+ }\n+ if (check_y && current_y >= M) {\n+ active = false;\n+ }\n+ if (active) {\n+ dY_regs[i] = dY[current_y * N + thread_x];\n+ X_regs[i] = X[current_y * N + thread_x];\n+ }\n }\n- if (db != nullptr) {\n- db[j] = sum2;\n+\n+ #pragma unroll\n+ for (int i = 0; i < rows_per_thread_y; ++i) {\n+ T_ACC mean_reg = WARP_SHFL(warp_mean, i, kWarpSize);\n+ T_ACC rstd_reg = WARP_SHFL(warp_rstd, i, kWarpSize);\n+ dg_sum += dY_regs[i] * (X_regs[i] - mean_reg) * rstd_reg;\n+ db_sum += dY_regs[i];\n }\n- }\n }\n \n-// This implementation gets called if M and N divide with 32. This case should\n-// be the most common. We can then make better use of warp level intrinsics\n-// to improve performance.\n+template \n+__device__\n+__forceinline__\n+void\n+blockReduceGammaBetaBackwardsWithChecks(\n+ int64_t M,\n+ int64_t N,\n+ const T* __restrict__ dY,\n+ const T* __restrict__ X,\n+ const T_ACC* __restrict__ mean,\n+ const T_ACC* __restrict__ rstd,\n+ T* __restrict__ dg,\n+ T* __restrict__ db,\n+ T_ACC &dg_sum,\n+ T_ACC &db_sum\n+) {\n+ for (int64_t M_start = blockIdx.y * rows_per_block_y;\n+ M_start < M;\n+ M_start += rows_per_block_y * gridDim.y) {\n+ int64_t M_end = M_start + rows_per_block_y - 1;\n+ if (!check_y || M_end < M) {\n+ blockReduceGammaBetaBackwardsHelper\n+ (M_start, M, N, dY, X, mean, rstd, dg, db, dg_sum, db_sum);\n+ } else {\n+ blockReduceGammaBetaBackwardsHelper\n+ (M_start, M, N, dY, X, mean, rstd, dg, db, dg_sum, db_sum);\n+ }\n+ }\n+}\n \n-template \n-__global__ void GammaBetaBackwardCUDAKernel_32x32(\n+// block_dim_x is the number of threads in the x dimension per block.\n+// block_dim_y is the number of threads in the y dimension per block.\n+// rows_per_block_y is the size of the tile (number of data elements)\n+// in the y dimension per block.\n+// partial_reduction indicates whether we need to reduce across threads\n+// or not. If set to true, we will not reduce across threads. This can\n+// be faster in the M >> N case but requires another kernel to do a full\n+// final reduction.\n+// aligned_grid means the data size is a multiple of tile size. In that\n+// case we don't need to check for boundary conditions which can provide\n+// a further speedup by not needing instructions to check for edge cases\n+// and not needing predicate registers.\n+template \n+__global__\n+void\n+ GammaBetaBackwardCUDAKernelTemplate(\n int64_t M,\n int64_t N,\n- const T* dY,\n- const T* X,\n- const T_ACC* mean,\n- const T_ACC* rstd,\n- T* dg,\n- T* db) {\n- alignas(sizeof(double)) extern __shared__ char s_data1[];\n- T_ACC* s_data_typed = reinterpret_cast(&s_data1);\n- T_ACC* s_dg;\n- T_ACC* s_db;\n+ const T* __restrict__ dY,\n+ const T* __restrict__ X,\n+ const T_ACC* __restrict__ mean,\n+ const T_ACC* __restrict__ rstd,\n+ T* __restrict__ dg,\n+ T* __restrict__ db) {\n+ // This assert is a compile-time check only.\n+ constexpr int rows_per_thread_y = rows_per_block_y / block_dim_y;\n+ static_assert(rows_per_thread_y <= kWarpSize);\n \n T_ACC dg_sum = 0;\n T_ACC db_sum = 0;\n \n- const int64_t j = ((int64_t) blockIdx.x) * blockDim.x + threadIdx.x;\n-\n- if (j < N) {\n- constexpr int unroll_factor = 8;\n- int laneId = threadIdx.x & (C10_WARP_SIZE - 1);\n-\n- T_ACC mean_reg, mean_reg_tmp;\n- T_ACC rstd_reg, rstd_reg_tmp;\n- T dY_reg;\n- T X_reg;\n-\n- // Main loop\n- int bcounter;\n- for (bcounter = 0; bcounter < M / (blockDim.y * unroll_factor);\n- bcounter++) {\n- int offset = (bcounter * blockDim.y + threadIdx.y) * unroll_factor;\n+ if (aligned_grid) {\n+ // When N and M align perfectly with block_dim_x and block_dim_y, we\n+ // can skip boundary condition checks that waste instruction issue slots.\n+ blockReduceGammaBetaBackwardsWithChecks\n+ \n+ (M, N, dY, X, mean, rstd, dg, db, dg_sum, db_sum);\n+ } else {\n+ // In the general case we need to check boundary conditions in the M\n+ // dimension. However, we can still avoid boundary checks in the N dimension\n+ // for the inner blocks. So try to avoid those checks when possible.\n+ if (blockIdx.x * block_dim_x + block_dim_x - 1 < N) {\n+ blockReduceGammaBetaBackwardsWithChecks\n+ \n+ (M, N, dY, X, mean, rstd, dg, db, dg_sum, db_sum);\n+ } else {\n+ blockReduceGammaBetaBackwardsWithChecks\n+ \n+ (M, N, dY, X, mean, rstd, dg, db, dg_sum, db_sum);\n+ }\n+ }\n \n- if (laneId < unroll_factor) {\n- mean_reg_tmp = mean[offset + laneId];\n- rstd_reg_tmp = rstd[offset + laneId];\n- }\n- WARP_SYNC();\n+ int64_t thread_x = blockIdx.x * block_dim_x + threadIdx.x;", + "comment_created_at": "2025-03-25T18:10:26+00:00", + "comment_author": "eqy", + "comment_body": "nit: I think this is potentially unsafe if you intend this to do 64-bit indexing. Overflow will still happen as blockIDx.y and threadIdx.y are 32-bit wide, so one of the scalars here should be cast to the wider type first.", + "pr_file_module": null + }, + { + "comment_id": "2012809564", + "repo_full_name": "pytorch/pytorch", + "pr_number": 148605, + "pr_file": "aten/src/ATen/native/cuda/layer_norm_kernel.cu", + "discussion_id": "2012679126", + "commented_code": "@@ -508,223 +509,362 @@ __global__ void layer_norm_grad_input_kernel_vectorized(\n }\n }\n \n-\n-template \n-__global__ void GammaBetaBackwardSimpleCUDAKernel(\n+template \n+__device__\n+__forceinline__\n+void\n+blockReduceGammaBetaBackwardsHelper(\n+ int64_t M_start,\n int64_t M,\n int64_t N,\n- const T* dY,\n- const T* X,\n- const T_ACC* mean,\n- const T_ACC* rstd,\n- T* dg,\n- T* db) {\n- const int64_t j = ((int64_t) blockIdx.x) * blockDim.x + threadIdx.x;\n- if (j < N) {\n- T_ACC sum1 = 0;\n- T_ACC sum2 = 0;\n- for (int64_t i = 0; i < M; ++i) {\n- const int64_t index = i * N + j;\n- sum1 += dg == nullptr ? T_ACC(0)\n- : static_cast(dY[index]) *\n- (static_cast(X[index]) - static_cast(mean[i])) *\n- static_cast(rstd[i]);\n- sum2 += db == nullptr ? T_ACC(0) : static_cast(dY[index]);\n+ const T* __restrict__ dY,\n+ const T* __restrict__ X,\n+ const T_ACC* __restrict__ mean,\n+ const T_ACC* __restrict__ rstd,\n+ T* __restrict__ dg,\n+ T* __restrict__ db,\n+ T_ACC &dg_sum,\n+ T_ACC &db_sum\n+) {\n+ constexpr int rows_per_thread_y = rows_per_block_y / block_dim_y;\n+ int64_t thread_x = blockIdx.x * block_dim_x + threadIdx.x;\n+\n+ int lane_id = (threadIdx.y * blockDim.x + threadIdx.x) & (kWarpSize - 1);\n+ int64_t mean_index = M_start + threadIdx.y * rows_per_thread_y;\n+ T_ACC warp_mean = 0, warp_rstd = 0;\n+ if (lane_id < rows_per_thread_y && mean_index + lane_id < M) {\n+ warp_mean = mean[mean_index + lane_id];\n+ warp_rstd = rstd[mean_index + lane_id];\n }\n- if (dg != nullptr) {\n- dg[j] = sum1;\n+ // We do a WARP_SYNC() here because we use WARP_SHFL below to access\n+ // warp_mean and warp_rstd.\n+ WARP_SYNC();\n+\n+ T_ACC dY_regs[rows_per_thread_y] = {0};\n+ T_ACC X_regs[rows_per_thread_y] = {0};\n+ #pragma unroll\n+ for (int i = 0; i < rows_per_thread_y; ++i) {\n+ int64_t current_y = M_start + threadIdx.y * rows_per_thread_y + i;\n+ bool active = true;\n+ if (check_x && thread_x >= N) {\n+ active = false;\n+ }\n+ if (check_y && current_y >= M) {\n+ active = false;\n+ }\n+ if (active) {\n+ dY_regs[i] = dY[current_y * N + thread_x];\n+ X_regs[i] = X[current_y * N + thread_x];\n+ }\n }\n- if (db != nullptr) {\n- db[j] = sum2;\n+\n+ #pragma unroll\n+ for (int i = 0; i < rows_per_thread_y; ++i) {\n+ T_ACC mean_reg = WARP_SHFL(warp_mean, i, kWarpSize);\n+ T_ACC rstd_reg = WARP_SHFL(warp_rstd, i, kWarpSize);\n+ dg_sum += dY_regs[i] * (X_regs[i] - mean_reg) * rstd_reg;\n+ db_sum += dY_regs[i];\n }\n- }\n }\n \n-// This implementation gets called if M and N divide with 32. This case should\n-// be the most common. We can then make better use of warp level intrinsics\n-// to improve performance.\n+template \n+__device__\n+__forceinline__\n+void\n+blockReduceGammaBetaBackwardsWithChecks(\n+ int64_t M,\n+ int64_t N,\n+ const T* __restrict__ dY,\n+ const T* __restrict__ X,\n+ const T_ACC* __restrict__ mean,\n+ const T_ACC* __restrict__ rstd,\n+ T* __restrict__ dg,\n+ T* __restrict__ db,\n+ T_ACC &dg_sum,\n+ T_ACC &db_sum\n+) {\n+ for (int64_t M_start = blockIdx.y * rows_per_block_y;\n+ M_start < M;\n+ M_start += rows_per_block_y * gridDim.y) {\n+ int64_t M_end = M_start + rows_per_block_y - 1;\n+ if (!check_y || M_end < M) {\n+ blockReduceGammaBetaBackwardsHelper\n+ (M_start, M, N, dY, X, mean, rstd, dg, db, dg_sum, db_sum);\n+ } else {\n+ blockReduceGammaBetaBackwardsHelper\n+ (M_start, M, N, dY, X, mean, rstd, dg, db, dg_sum, db_sum);\n+ }\n+ }\n+}\n \n-template \n-__global__ void GammaBetaBackwardCUDAKernel_32x32(\n+// block_dim_x is the number of threads in the x dimension per block.\n+// block_dim_y is the number of threads in the y dimension per block.\n+// rows_per_block_y is the size of the tile (number of data elements)\n+// in the y dimension per block.\n+// partial_reduction indicates whether we need to reduce across threads\n+// or not. If set to true, we will not reduce across threads. This can\n+// be faster in the M >> N case but requires another kernel to do a full\n+// final reduction.\n+// aligned_grid means the data size is a multiple of tile size. In that\n+// case we don't need to check for boundary conditions which can provide\n+// a further speedup by not needing instructions to check for edge cases\n+// and not needing predicate registers.\n+template \n+__global__\n+void\n+ GammaBetaBackwardCUDAKernelTemplate(\n int64_t M,\n int64_t N,\n- const T* dY,\n- const T* X,\n- const T_ACC* mean,\n- const T_ACC* rstd,\n- T* dg,\n- T* db) {\n- alignas(sizeof(double)) extern __shared__ char s_data1[];\n- T_ACC* s_data_typed = reinterpret_cast(&s_data1);\n- T_ACC* s_dg;\n- T_ACC* s_db;\n+ const T* __restrict__ dY,\n+ const T* __restrict__ X,\n+ const T_ACC* __restrict__ mean,\n+ const T_ACC* __restrict__ rstd,\n+ T* __restrict__ dg,\n+ T* __restrict__ db) {\n+ // This assert is a compile-time check only.\n+ constexpr int rows_per_thread_y = rows_per_block_y / block_dim_y;\n+ static_assert(rows_per_thread_y <= kWarpSize);\n \n T_ACC dg_sum = 0;\n T_ACC db_sum = 0;\n \n- const int64_t j = ((int64_t) blockIdx.x) * blockDim.x + threadIdx.x;\n-\n- if (j < N) {\n- constexpr int unroll_factor = 8;\n- int laneId = threadIdx.x & (C10_WARP_SIZE - 1);\n-\n- T_ACC mean_reg, mean_reg_tmp;\n- T_ACC rstd_reg, rstd_reg_tmp;\n- T dY_reg;\n- T X_reg;\n-\n- // Main loop\n- int bcounter;\n- for (bcounter = 0; bcounter < M / (blockDim.y * unroll_factor);\n- bcounter++) {\n- int offset = (bcounter * blockDim.y + threadIdx.y) * unroll_factor;\n+ if (aligned_grid) {\n+ // When N and M align perfectly with block_dim_x and block_dim_y, we\n+ // can skip boundary condition checks that waste instruction issue slots.\n+ blockReduceGammaBetaBackwardsWithChecks\n+ \n+ (M, N, dY, X, mean, rstd, dg, db, dg_sum, db_sum);\n+ } else {\n+ // In the general case we need to check boundary conditions in the M\n+ // dimension. However, we can still avoid boundary checks in the N dimension\n+ // for the inner blocks. So try to avoid those checks when possible.\n+ if (blockIdx.x * block_dim_x + block_dim_x - 1 < N) {\n+ blockReduceGammaBetaBackwardsWithChecks\n+ \n+ (M, N, dY, X, mean, rstd, dg, db, dg_sum, db_sum);\n+ } else {\n+ blockReduceGammaBetaBackwardsWithChecks\n+ \n+ (M, N, dY, X, mean, rstd, dg, db, dg_sum, db_sum);\n+ }\n+ }\n \n- if (laneId < unroll_factor) {\n- mean_reg_tmp = mean[offset + laneId];\n- rstd_reg_tmp = rstd[offset + laneId];\n- }\n- WARP_SYNC();\n+ int64_t thread_x = blockIdx.x * block_dim_x + threadIdx.x;", + "comment_created_at": "2025-03-25T19:31:32+00:00", + "comment_author": "ahmadsharif1", + "comment_body": "Great point. Done!", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2109841015", + "pr_number": 153150, + "pr_file": "torch/csrc/dynamo/guards.cpp", + "created_at": "2025-05-27T18:06:14+00:00", + "commented_code": "Py_ssize_t _index{-1};\n};\n\n/**\n * Represents set[index] accessor by converting the set into a dictionary.\n */\nclass SetGetItemGuardAccessor : public GuardAccessor {\n public:\n SetGetItemGuardAccessor(\n RootGuardManager* root,\n const py::object& index,\n std::string source,\n py::handle example_value,\n py::handle guard_manager_enum)\n : GuardAccessor(\n root,\n index,\n std::move(source),\n example_value,\n guard_manager_enum),\n _index(py::cast(index)) {}\n\n // NB: Intentional duplication between check_nopybind and\n // check_verbose_nopybind.\n bool check_nopybind(PyObject* obj, bool matches_dict_tag = false)\n override { // borrowed ref\n PyObject* PyDict = (PyObject*)&PyDict_Type;\n PyObject* dict =", + "repo_full_name": "pytorch/pytorch", + "discussion_comments": [ + { + "comment_id": "2109841015", + "repo_full_name": "pytorch/pytorch", + "pr_number": 153150, + "pr_file": "torch/csrc/dynamo/guards.cpp", + "discussion_id": "2109841015", + "commented_code": "@@ -4117,6 +4141,85 @@ class ListGetItemGuardAccessor : public GuardAccessor {\n Py_ssize_t _index{-1};\n };\n \n+/**\n+ * Represents set[index] accessor by converting the set into a dictionary.\n+ */\n+class SetGetItemGuardAccessor : public GuardAccessor {\n+ public:\n+ SetGetItemGuardAccessor(\n+ RootGuardManager* root,\n+ const py::object& index,\n+ std::string source,\n+ py::handle example_value,\n+ py::handle guard_manager_enum)\n+ : GuardAccessor(\n+ root,\n+ index,\n+ std::move(source),\n+ example_value,\n+ guard_manager_enum),\n+ _index(py::cast(index)) {}\n+\n+ // NB: Intentional duplication between check_nopybind and\n+ // check_verbose_nopybind.\n+ bool check_nopybind(PyObject* obj, bool matches_dict_tag = false)\n+ override { // borrowed ref\n+ PyObject* PyDict = (PyObject*)&PyDict_Type;\n+ PyObject* dict =", + "comment_created_at": "2025-05-27T18:06:14+00:00", + "comment_author": "williamwen42", + "comment_body": "Borrowed/new references don't seem to be handled correctly here. Either use `py::object` to automatically delete or manually call `Py_[X]DECREF`.", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/pytorch-prevent-memory-vulnerabilities.md b/_reviewers/pytorch-prevent-memory-vulnerabilities.md index 644dc34..58c7ca9 100644 --- a/_reviewers/pytorch-prevent-memory-vulnerabilities.md +++ b/_reviewers/pytorch-prevent-memory-vulnerabilities.md @@ -36,63 +36,3 @@ PyObject* dict = ...; // Use dict Py_XDECREF(dict); ``` - - -[ - { - "discussion_id": "2012679126", - "pr_number": 148605, - "pr_file": "aten/src/ATen/native/cuda/layer_norm_kernel.cu", - "created_at": "2025-03-25T18:10:26+00:00", - "commented_code": "}\n}\n\n\ntemplate \n__global__ void GammaBetaBackwardSimpleCUDAKernel(\ntemplate \n__device__\n__forceinline__\nvoid\nblockReduceGammaBetaBackwardsHelper(\n int64_t M_start,\n int64_t M,\n int64_t N,\n const T* dY,\n const T* X,\n const T_ACC* mean,\n const T_ACC* rstd,\n T* dg,\n T* db) {\n const int64_t j = ((int64_t) blockIdx.x) * blockDim.x + threadIdx.x;\n if (j < N) {\n T_ACC sum1 = 0;\n T_ACC sum2 = 0;\n for (int64_t i = 0; i < M; ++i) {\n const int64_t index = i * N + j;\n sum1 += dg == nullptr ? T_ACC(0)\n : static_cast(dY[index]) *\n (static_cast(X[index]) - static_cast(mean[i])) *\n static_cast(rstd[i]);\n sum2 += db == nullptr ? T_ACC(0) : static_cast(dY[index]);\n const T* __restrict__ dY,\n const T* __restrict__ X,\n const T_ACC* __restrict__ mean,\n const T_ACC* __restrict__ rstd,\n T* __restrict__ dg,\n T* __restrict__ db,\n T_ACC &dg_sum,\n T_ACC &db_sum\n) {\n constexpr int rows_per_thread_y = rows_per_block_y / block_dim_y;\n int64_t thread_x = blockIdx.x * block_dim_x + threadIdx.x;\n\n int lane_id = (threadIdx.y * blockDim.x + threadIdx.x) & (kWarpSize - 1);\n int64_t mean_index = M_start + threadIdx.y * rows_per_thread_y;\n T_ACC warp_mean = 0, warp_rstd = 0;\n if (lane_id < rows_per_thread_y && mean_index + lane_id < M) {\n warp_mean = mean[mean_index + lane_id];\n warp_rstd = rstd[mean_index + lane_id];\n }\n if (dg != nullptr) {\n dg[j] = sum1;\n // We do a WARP_SYNC() here because we use WARP_SHFL below to access\n // warp_mean and warp_rstd.\n WARP_SYNC();\n\n T_ACC dY_regs[rows_per_thread_y] = {0};\n T_ACC X_regs[rows_per_thread_y] = {0};\n #pragma unroll\n for (int i = 0; i < rows_per_thread_y; ++i) {\n int64_t current_y = M_start + threadIdx.y * rows_per_thread_y + i;\n bool active = true;\n if (check_x && thread_x >= N) {\n active = false;\n }\n if (check_y && current_y >= M) {\n active = false;\n }\n if (active) {\n dY_regs[i] = dY[current_y * N + thread_x];\n X_regs[i] = X[current_y * N + thread_x];\n }\n }\n if (db != nullptr) {\n db[j] = sum2;\n\n #pragma unroll\n for (int i = 0; i < rows_per_thread_y; ++i) {\n T_ACC mean_reg = WARP_SHFL(warp_mean, i, kWarpSize);\n T_ACC rstd_reg = WARP_SHFL(warp_rstd, i, kWarpSize);\n dg_sum += dY_regs[i] * (X_regs[i] - mean_reg) * rstd_reg;\n db_sum += dY_regs[i];\n }\n }\n}\n\n// This implementation gets called if M and N divide with 32. This case should\n// be the most common. We can then make better use of warp level intrinsics\n// to improve performance.\ntemplate \n__device__\n__forceinline__\nvoid\nblockReduceGammaBetaBackwardsWithChecks(\n int64_t M,\n int64_t N,\n const T* __restrict__ dY,\n const T* __restrict__ X,\n const T_ACC* __restrict__ mean,\n const T_ACC* __restrict__ rstd,\n T* __restrict__ dg,\n T* __restrict__ db,\n T_ACC &dg_sum,\n T_ACC &db_sum\n) {\n for (int64_t M_start = blockIdx.y * rows_per_block_y;\n M_start < M;\n M_start += rows_per_block_y * gridDim.y) {\n int64_t M_end = M_start + rows_per_block_y - 1;\n if (!check_y || M_end < M) {\n blockReduceGammaBetaBackwardsHelper\n (M_start, M, N, dY, X, mean, rstd, dg, db, dg_sum, db_sum);\n } else {\n blockReduceGammaBetaBackwardsHelper\n (M_start, M, N, dY, X, mean, rstd, dg, db, dg_sum, db_sum);\n }\n }\n}\n\ntemplate \n__global__ void GammaBetaBackwardCUDAKernel_32x32(\n// block_dim_x is the number of threads in the x dimension per block.\n// block_dim_y is the number of threads in the y dimension per block.\n// rows_per_block_y is the size of the tile (number of data elements)\n// in the y dimension per block.\n// partial_reduction indicates whether we need to reduce across threads\n// or not. If set to true, we will not reduce across threads. This can\n// be faster in the M >> N case but requires another kernel to do a full\n// final reduction.\n// aligned_grid means the data size is a multiple of tile size. In that\n// case we don't need to check for boundary conditions which can provide\n// a further speedup by not needing instructions to check for edge cases\n// and not needing predicate registers.\ntemplate \n__global__\nvoid\n GammaBetaBackwardCUDAKernelTemplate(\n int64_t M,\n int64_t N,\n const T* dY,\n const T* X,\n const T_ACC* mean,\n const T_ACC* rstd,\n T* dg,\n T* db) {\n alignas(sizeof(double)) extern __shared__ char s_data1[];\n T_ACC* s_data_typed = reinterpret_cast(&s_data1);\n T_ACC* s_dg;\n T_ACC* s_db;\n const T* __restrict__ dY,\n const T* __restrict__ X,\n const T_ACC* __restrict__ mean,\n const T_ACC* __restrict__ rstd,\n T* __restrict__ dg,\n T* __restrict__ db) {\n // This assert is a compile-time check only.\n constexpr int rows_per_thread_y = rows_per_block_y / block_dim_y;\n static_assert(rows_per_thread_y <= kWarpSize);\n\n T_ACC dg_sum = 0;\n T_ACC db_sum = 0;\n\n const int64_t j = ((int64_t) blockIdx.x) * blockDim.x + threadIdx.x;\n\n if (j < N) {\n constexpr int unroll_factor = 8;\n int laneId = threadIdx.x & (C10_WARP_SIZE - 1);\n\n T_ACC mean_reg, mean_reg_tmp;\n T_ACC rstd_reg, rstd_reg_tmp;\n T dY_reg;\n T X_reg;\n\n // Main loop\n int bcounter;\n for (bcounter = 0; bcounter < M / (blockDim.y * unroll_factor);\n bcounter++) {\n int offset = (bcounter * blockDim.y + threadIdx.y) * unroll_factor;\n if (aligned_grid) {\n // When N and M align perfectly with block_dim_x and block_dim_y, we\n // can skip boundary condition checks that waste instruction issue slots.\n blockReduceGammaBetaBackwardsWithChecks\n \n (M, N, dY, X, mean, rstd, dg, db, dg_sum, db_sum);\n } else {\n // In the general case we need to check boundary conditions in the M\n // dimension. However, we can still avoid boundary checks in the N dimension\n // for the inner blocks. So try to avoid those checks when possible.\n if (blockIdx.x * block_dim_x + block_dim_x - 1 < N) {\n blockReduceGammaBetaBackwardsWithChecks\n \n (M, N, dY, X, mean, rstd, dg, db, dg_sum, db_sum);\n } else {\n blockReduceGammaBetaBackwardsWithChecks\n \n (M, N, dY, X, mean, rstd, dg, db, dg_sum, db_sum);\n }\n }\n\n if (laneId < unroll_factor) {\n mean_reg_tmp = mean[offset + laneId];\n rstd_reg_tmp = rstd[offset + laneId];\n }\n WARP_SYNC();\n int64_t thread_x = blockIdx.x * block_dim_x + threadIdx.x;", - "repo_full_name": "pytorch/pytorch", - "discussion_comments": [ - { - "comment_id": "2012679126", - "repo_full_name": "pytorch/pytorch", - "pr_number": 148605, - "pr_file": "aten/src/ATen/native/cuda/layer_norm_kernel.cu", - "discussion_id": "2012679126", - "commented_code": "@@ -508,223 +509,362 @@ __global__ void layer_norm_grad_input_kernel_vectorized(\n }\n }\n \n-\n-template \n-__global__ void GammaBetaBackwardSimpleCUDAKernel(\n+template \n+__device__\n+__forceinline__\n+void\n+blockReduceGammaBetaBackwardsHelper(\n+ int64_t M_start,\n int64_t M,\n int64_t N,\n- const T* dY,\n- const T* X,\n- const T_ACC* mean,\n- const T_ACC* rstd,\n- T* dg,\n- T* db) {\n- const int64_t j = ((int64_t) blockIdx.x) * blockDim.x + threadIdx.x;\n- if (j < N) {\n- T_ACC sum1 = 0;\n- T_ACC sum2 = 0;\n- for (int64_t i = 0; i < M; ++i) {\n- const int64_t index = i * N + j;\n- sum1 += dg == nullptr ? T_ACC(0)\n- : static_cast(dY[index]) *\n- (static_cast(X[index]) - static_cast(mean[i])) *\n- static_cast(rstd[i]);\n- sum2 += db == nullptr ? T_ACC(0) : static_cast(dY[index]);\n+ const T* __restrict__ dY,\n+ const T* __restrict__ X,\n+ const T_ACC* __restrict__ mean,\n+ const T_ACC* __restrict__ rstd,\n+ T* __restrict__ dg,\n+ T* __restrict__ db,\n+ T_ACC &dg_sum,\n+ T_ACC &db_sum\n+) {\n+ constexpr int rows_per_thread_y = rows_per_block_y / block_dim_y;\n+ int64_t thread_x = blockIdx.x * block_dim_x + threadIdx.x;\n+\n+ int lane_id = (threadIdx.y * blockDim.x + threadIdx.x) & (kWarpSize - 1);\n+ int64_t mean_index = M_start + threadIdx.y * rows_per_thread_y;\n+ T_ACC warp_mean = 0, warp_rstd = 0;\n+ if (lane_id < rows_per_thread_y && mean_index + lane_id < M) {\n+ warp_mean = mean[mean_index + lane_id];\n+ warp_rstd = rstd[mean_index + lane_id];\n }\n- if (dg != nullptr) {\n- dg[j] = sum1;\n+ // We do a WARP_SYNC() here because we use WARP_SHFL below to access\n+ // warp_mean and warp_rstd.\n+ WARP_SYNC();\n+\n+ T_ACC dY_regs[rows_per_thread_y] = {0};\n+ T_ACC X_regs[rows_per_thread_y] = {0};\n+ #pragma unroll\n+ for (int i = 0; i < rows_per_thread_y; ++i) {\n+ int64_t current_y = M_start + threadIdx.y * rows_per_thread_y + i;\n+ bool active = true;\n+ if (check_x && thread_x >= N) {\n+ active = false;\n+ }\n+ if (check_y && current_y >= M) {\n+ active = false;\n+ }\n+ if (active) {\n+ dY_regs[i] = dY[current_y * N + thread_x];\n+ X_regs[i] = X[current_y * N + thread_x];\n+ }\n }\n- if (db != nullptr) {\n- db[j] = sum2;\n+\n+ #pragma unroll\n+ for (int i = 0; i < rows_per_thread_y; ++i) {\n+ T_ACC mean_reg = WARP_SHFL(warp_mean, i, kWarpSize);\n+ T_ACC rstd_reg = WARP_SHFL(warp_rstd, i, kWarpSize);\n+ dg_sum += dY_regs[i] * (X_regs[i] - mean_reg) * rstd_reg;\n+ db_sum += dY_regs[i];\n }\n- }\n }\n \n-// This implementation gets called if M and N divide with 32. This case should\n-// be the most common. We can then make better use of warp level intrinsics\n-// to improve performance.\n+template \n+__device__\n+__forceinline__\n+void\n+blockReduceGammaBetaBackwardsWithChecks(\n+ int64_t M,\n+ int64_t N,\n+ const T* __restrict__ dY,\n+ const T* __restrict__ X,\n+ const T_ACC* __restrict__ mean,\n+ const T_ACC* __restrict__ rstd,\n+ T* __restrict__ dg,\n+ T* __restrict__ db,\n+ T_ACC &dg_sum,\n+ T_ACC &db_sum\n+) {\n+ for (int64_t M_start = blockIdx.y * rows_per_block_y;\n+ M_start < M;\n+ M_start += rows_per_block_y * gridDim.y) {\n+ int64_t M_end = M_start + rows_per_block_y - 1;\n+ if (!check_y || M_end < M) {\n+ blockReduceGammaBetaBackwardsHelper\n+ (M_start, M, N, dY, X, mean, rstd, dg, db, dg_sum, db_sum);\n+ } else {\n+ blockReduceGammaBetaBackwardsHelper\n+ (M_start, M, N, dY, X, mean, rstd, dg, db, dg_sum, db_sum);\n+ }\n+ }\n+}\n \n-template \n-__global__ void GammaBetaBackwardCUDAKernel_32x32(\n+// block_dim_x is the number of threads in the x dimension per block.\n+// block_dim_y is the number of threads in the y dimension per block.\n+// rows_per_block_y is the size of the tile (number of data elements)\n+// in the y dimension per block.\n+// partial_reduction indicates whether we need to reduce across threads\n+// or not. If set to true, we will not reduce across threads. This can\n+// be faster in the M >> N case but requires another kernel to do a full\n+// final reduction.\n+// aligned_grid means the data size is a multiple of tile size. In that\n+// case we don't need to check for boundary conditions which can provide\n+// a further speedup by not needing instructions to check for edge cases\n+// and not needing predicate registers.\n+template \n+__global__\n+void\n+ GammaBetaBackwardCUDAKernelTemplate(\n int64_t M,\n int64_t N,\n- const T* dY,\n- const T* X,\n- const T_ACC* mean,\n- const T_ACC* rstd,\n- T* dg,\n- T* db) {\n- alignas(sizeof(double)) extern __shared__ char s_data1[];\n- T_ACC* s_data_typed = reinterpret_cast(&s_data1);\n- T_ACC* s_dg;\n- T_ACC* s_db;\n+ const T* __restrict__ dY,\n+ const T* __restrict__ X,\n+ const T_ACC* __restrict__ mean,\n+ const T_ACC* __restrict__ rstd,\n+ T* __restrict__ dg,\n+ T* __restrict__ db) {\n+ // This assert is a compile-time check only.\n+ constexpr int rows_per_thread_y = rows_per_block_y / block_dim_y;\n+ static_assert(rows_per_thread_y <= kWarpSize);\n \n T_ACC dg_sum = 0;\n T_ACC db_sum = 0;\n \n- const int64_t j = ((int64_t) blockIdx.x) * blockDim.x + threadIdx.x;\n-\n- if (j < N) {\n- constexpr int unroll_factor = 8;\n- int laneId = threadIdx.x & (C10_WARP_SIZE - 1);\n-\n- T_ACC mean_reg, mean_reg_tmp;\n- T_ACC rstd_reg, rstd_reg_tmp;\n- T dY_reg;\n- T X_reg;\n-\n- // Main loop\n- int bcounter;\n- for (bcounter = 0; bcounter < M / (blockDim.y * unroll_factor);\n- bcounter++) {\n- int offset = (bcounter * blockDim.y + threadIdx.y) * unroll_factor;\n+ if (aligned_grid) {\n+ // When N and M align perfectly with block_dim_x and block_dim_y, we\n+ // can skip boundary condition checks that waste instruction issue slots.\n+ blockReduceGammaBetaBackwardsWithChecks\n+ \n+ (M, N, dY, X, mean, rstd, dg, db, dg_sum, db_sum);\n+ } else {\n+ // In the general case we need to check boundary conditions in the M\n+ // dimension. However, we can still avoid boundary checks in the N dimension\n+ // for the inner blocks. So try to avoid those checks when possible.\n+ if (blockIdx.x * block_dim_x + block_dim_x - 1 < N) {\n+ blockReduceGammaBetaBackwardsWithChecks\n+ \n+ (M, N, dY, X, mean, rstd, dg, db, dg_sum, db_sum);\n+ } else {\n+ blockReduceGammaBetaBackwardsWithChecks\n+ \n+ (M, N, dY, X, mean, rstd, dg, db, dg_sum, db_sum);\n+ }\n+ }\n \n- if (laneId < unroll_factor) {\n- mean_reg_tmp = mean[offset + laneId];\n- rstd_reg_tmp = rstd[offset + laneId];\n- }\n- WARP_SYNC();\n+ int64_t thread_x = blockIdx.x * block_dim_x + threadIdx.x;", - "comment_created_at": "2025-03-25T18:10:26+00:00", - "comment_author": "eqy", - "comment_body": "nit: I think this is potentially unsafe if you intend this to do 64-bit indexing. Overflow will still happen as blockIDx.y and threadIdx.y are 32-bit wide, so one of the scalars here should be cast to the wider type first.", - "pr_file_module": null - }, - { - "comment_id": "2012809564", - "repo_full_name": "pytorch/pytorch", - "pr_number": 148605, - "pr_file": "aten/src/ATen/native/cuda/layer_norm_kernel.cu", - "discussion_id": "2012679126", - "commented_code": "@@ -508,223 +509,362 @@ __global__ void layer_norm_grad_input_kernel_vectorized(\n }\n }\n \n-\n-template \n-__global__ void GammaBetaBackwardSimpleCUDAKernel(\n+template \n+__device__\n+__forceinline__\n+void\n+blockReduceGammaBetaBackwardsHelper(\n+ int64_t M_start,\n int64_t M,\n int64_t N,\n- const T* dY,\n- const T* X,\n- const T_ACC* mean,\n- const T_ACC* rstd,\n- T* dg,\n- T* db) {\n- const int64_t j = ((int64_t) blockIdx.x) * blockDim.x + threadIdx.x;\n- if (j < N) {\n- T_ACC sum1 = 0;\n- T_ACC sum2 = 0;\n- for (int64_t i = 0; i < M; ++i) {\n- const int64_t index = i * N + j;\n- sum1 += dg == nullptr ? T_ACC(0)\n- : static_cast(dY[index]) *\n- (static_cast(X[index]) - static_cast(mean[i])) *\n- static_cast(rstd[i]);\n- sum2 += db == nullptr ? T_ACC(0) : static_cast(dY[index]);\n+ const T* __restrict__ dY,\n+ const T* __restrict__ X,\n+ const T_ACC* __restrict__ mean,\n+ const T_ACC* __restrict__ rstd,\n+ T* __restrict__ dg,\n+ T* __restrict__ db,\n+ T_ACC &dg_sum,\n+ T_ACC &db_sum\n+) {\n+ constexpr int rows_per_thread_y = rows_per_block_y / block_dim_y;\n+ int64_t thread_x = blockIdx.x * block_dim_x + threadIdx.x;\n+\n+ int lane_id = (threadIdx.y * blockDim.x + threadIdx.x) & (kWarpSize - 1);\n+ int64_t mean_index = M_start + threadIdx.y * rows_per_thread_y;\n+ T_ACC warp_mean = 0, warp_rstd = 0;\n+ if (lane_id < rows_per_thread_y && mean_index + lane_id < M) {\n+ warp_mean = mean[mean_index + lane_id];\n+ warp_rstd = rstd[mean_index + lane_id];\n }\n- if (dg != nullptr) {\n- dg[j] = sum1;\n+ // We do a WARP_SYNC() here because we use WARP_SHFL below to access\n+ // warp_mean and warp_rstd.\n+ WARP_SYNC();\n+\n+ T_ACC dY_regs[rows_per_thread_y] = {0};\n+ T_ACC X_regs[rows_per_thread_y] = {0};\n+ #pragma unroll\n+ for (int i = 0; i < rows_per_thread_y; ++i) {\n+ int64_t current_y = M_start + threadIdx.y * rows_per_thread_y + i;\n+ bool active = true;\n+ if (check_x && thread_x >= N) {\n+ active = false;\n+ }\n+ if (check_y && current_y >= M) {\n+ active = false;\n+ }\n+ if (active) {\n+ dY_regs[i] = dY[current_y * N + thread_x];\n+ X_regs[i] = X[current_y * N + thread_x];\n+ }\n }\n- if (db != nullptr) {\n- db[j] = sum2;\n+\n+ #pragma unroll\n+ for (int i = 0; i < rows_per_thread_y; ++i) {\n+ T_ACC mean_reg = WARP_SHFL(warp_mean, i, kWarpSize);\n+ T_ACC rstd_reg = WARP_SHFL(warp_rstd, i, kWarpSize);\n+ dg_sum += dY_regs[i] * (X_regs[i] - mean_reg) * rstd_reg;\n+ db_sum += dY_regs[i];\n }\n- }\n }\n \n-// This implementation gets called if M and N divide with 32. This case should\n-// be the most common. We can then make better use of warp level intrinsics\n-// to improve performance.\n+template \n+__device__\n+__forceinline__\n+void\n+blockReduceGammaBetaBackwardsWithChecks(\n+ int64_t M,\n+ int64_t N,\n+ const T* __restrict__ dY,\n+ const T* __restrict__ X,\n+ const T_ACC* __restrict__ mean,\n+ const T_ACC* __restrict__ rstd,\n+ T* __restrict__ dg,\n+ T* __restrict__ db,\n+ T_ACC &dg_sum,\n+ T_ACC &db_sum\n+) {\n+ for (int64_t M_start = blockIdx.y * rows_per_block_y;\n+ M_start < M;\n+ M_start += rows_per_block_y * gridDim.y) {\n+ int64_t M_end = M_start + rows_per_block_y - 1;\n+ if (!check_y || M_end < M) {\n+ blockReduceGammaBetaBackwardsHelper\n+ (M_start, M, N, dY, X, mean, rstd, dg, db, dg_sum, db_sum);\n+ } else {\n+ blockReduceGammaBetaBackwardsHelper\n+ (M_start, M, N, dY, X, mean, rstd, dg, db, dg_sum, db_sum);\n+ }\n+ }\n+}\n \n-template \n-__global__ void GammaBetaBackwardCUDAKernel_32x32(\n+// block_dim_x is the number of threads in the x dimension per block.\n+// block_dim_y is the number of threads in the y dimension per block.\n+// rows_per_block_y is the size of the tile (number of data elements)\n+// in the y dimension per block.\n+// partial_reduction indicates whether we need to reduce across threads\n+// or not. If set to true, we will not reduce across threads. This can\n+// be faster in the M >> N case but requires another kernel to do a full\n+// final reduction.\n+// aligned_grid means the data size is a multiple of tile size. In that\n+// case we don't need to check for boundary conditions which can provide\n+// a further speedup by not needing instructions to check for edge cases\n+// and not needing predicate registers.\n+template \n+__global__\n+void\n+ GammaBetaBackwardCUDAKernelTemplate(\n int64_t M,\n int64_t N,\n- const T* dY,\n- const T* X,\n- const T_ACC* mean,\n- const T_ACC* rstd,\n- T* dg,\n- T* db) {\n- alignas(sizeof(double)) extern __shared__ char s_data1[];\n- T_ACC* s_data_typed = reinterpret_cast(&s_data1);\n- T_ACC* s_dg;\n- T_ACC* s_db;\n+ const T* __restrict__ dY,\n+ const T* __restrict__ X,\n+ const T_ACC* __restrict__ mean,\n+ const T_ACC* __restrict__ rstd,\n+ T* __restrict__ dg,\n+ T* __restrict__ db) {\n+ // This assert is a compile-time check only.\n+ constexpr int rows_per_thread_y = rows_per_block_y / block_dim_y;\n+ static_assert(rows_per_thread_y <= kWarpSize);\n \n T_ACC dg_sum = 0;\n T_ACC db_sum = 0;\n \n- const int64_t j = ((int64_t) blockIdx.x) * blockDim.x + threadIdx.x;\n-\n- if (j < N) {\n- constexpr int unroll_factor = 8;\n- int laneId = threadIdx.x & (C10_WARP_SIZE - 1);\n-\n- T_ACC mean_reg, mean_reg_tmp;\n- T_ACC rstd_reg, rstd_reg_tmp;\n- T dY_reg;\n- T X_reg;\n-\n- // Main loop\n- int bcounter;\n- for (bcounter = 0; bcounter < M / (blockDim.y * unroll_factor);\n- bcounter++) {\n- int offset = (bcounter * blockDim.y + threadIdx.y) * unroll_factor;\n+ if (aligned_grid) {\n+ // When N and M align perfectly with block_dim_x and block_dim_y, we\n+ // can skip boundary condition checks that waste instruction issue slots.\n+ blockReduceGammaBetaBackwardsWithChecks\n+ \n+ (M, N, dY, X, mean, rstd, dg, db, dg_sum, db_sum);\n+ } else {\n+ // In the general case we need to check boundary conditions in the M\n+ // dimension. However, we can still avoid boundary checks in the N dimension\n+ // for the inner blocks. So try to avoid those checks when possible.\n+ if (blockIdx.x * block_dim_x + block_dim_x - 1 < N) {\n+ blockReduceGammaBetaBackwardsWithChecks\n+ \n+ (M, N, dY, X, mean, rstd, dg, db, dg_sum, db_sum);\n+ } else {\n+ blockReduceGammaBetaBackwardsWithChecks\n+ \n+ (M, N, dY, X, mean, rstd, dg, db, dg_sum, db_sum);\n+ }\n+ }\n \n- if (laneId < unroll_factor) {\n- mean_reg_tmp = mean[offset + laneId];\n- rstd_reg_tmp = rstd[offset + laneId];\n- }\n- WARP_SYNC();\n+ int64_t thread_x = blockIdx.x * block_dim_x + threadIdx.x;", - "comment_created_at": "2025-03-25T19:31:32+00:00", - "comment_author": "ahmadsharif1", - "comment_body": "Great point. Done!", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2109841015", - "pr_number": 153150, - "pr_file": "torch/csrc/dynamo/guards.cpp", - "created_at": "2025-05-27T18:06:14+00:00", - "commented_code": "Py_ssize_t _index{-1};\n};\n\n/**\n * Represents set[index] accessor by converting the set into a dictionary.\n */\nclass SetGetItemGuardAccessor : public GuardAccessor {\n public:\n SetGetItemGuardAccessor(\n RootGuardManager* root,\n const py::object& index,\n std::string source,\n py::handle example_value,\n py::handle guard_manager_enum)\n : GuardAccessor(\n root,\n index,\n std::move(source),\n example_value,\n guard_manager_enum),\n _index(py::cast(index)) {}\n\n // NB: Intentional duplication between check_nopybind and\n // check_verbose_nopybind.\n bool check_nopybind(PyObject* obj, bool matches_dict_tag = false)\n override { // borrowed ref\n PyObject* PyDict = (PyObject*)&PyDict_Type;\n PyObject* dict =", - "repo_full_name": "pytorch/pytorch", - "discussion_comments": [ - { - "comment_id": "2109841015", - "repo_full_name": "pytorch/pytorch", - "pr_number": 153150, - "pr_file": "torch/csrc/dynamo/guards.cpp", - "discussion_id": "2109841015", - "commented_code": "@@ -4117,6 +4141,85 @@ class ListGetItemGuardAccessor : public GuardAccessor {\n Py_ssize_t _index{-1};\n };\n \n+/**\n+ * Represents set[index] accessor by converting the set into a dictionary.\n+ */\n+class SetGetItemGuardAccessor : public GuardAccessor {\n+ public:\n+ SetGetItemGuardAccessor(\n+ RootGuardManager* root,\n+ const py::object& index,\n+ std::string source,\n+ py::handle example_value,\n+ py::handle guard_manager_enum)\n+ : GuardAccessor(\n+ root,\n+ index,\n+ std::move(source),\n+ example_value,\n+ guard_manager_enum),\n+ _index(py::cast(index)) {}\n+\n+ // NB: Intentional duplication between check_nopybind and\n+ // check_verbose_nopybind.\n+ bool check_nopybind(PyObject* obj, bool matches_dict_tag = false)\n+ override { // borrowed ref\n+ PyObject* PyDict = (PyObject*)&PyDict_Type;\n+ PyObject* dict =", - "comment_created_at": "2025-05-27T18:06:14+00:00", - "comment_author": "williamwen42", - "comment_body": "Borrowed/new references don't seem to be handled correctly here. Either use `py::object` to automatically delete or manually call `Py_[X]DECREF`.", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/pytorch-remove-code-clutter.json b/_reviewers/pytorch-remove-code-clutter.json new file mode 100644 index 0000000..e381fef --- /dev/null +++ b/_reviewers/pytorch-remove-code-clutter.json @@ -0,0 +1,250 @@ +[ + { + "discussion_id": "1996369008", + "pr_number": 148605, + "pr_file": "aten/src/ATen/native/cuda/layer_norm_kernel.cu", + "created_at": "2025-03-14T22:23:15+00:00", + "commented_code": "}\n}\n\n\ntemplate \nvoid launch_vectorized_layer_norm_kernel(\n int N,\n int64_t M,\n T_ACC eps,\n const T* X_data,\n const T* gamma_data,\n const T* beta_data,\n T* Y_data,\n T_ACC* mean_data,\n T_ACC* rstd_data\n// When the data size is a multiple of the tile size we can use fewer\n// instructions and registers. Example, this is the case when M=256 N=256.\ntemplate \n__device__\n__forceinline__\nvoid\nblockReduceGammaBetaBackwardsAligned(\n int64_t M,\n int64_t N,\n const T* __restrict__ dY,\n const T* __restrict__ X,\n const T_ACC* __restrict__ mean,\n const T_ACC* __restrict__ rstd,\n T* __restrict__ dg,\n T* __restrict__ db,\n T_ACC &dg_sum,\n T_ACC &db_sum\n) {\n //constexpr int alignment = 16; //currently unused to make sure float and half results are bw accurate\n auto stream = at::cuda::getCurrentCUDAStream().stream();\n const int warp_size = at::cuda::warp_size();\n const dim3 threads(warp_size, num_threads() / warp_size, 1);\n dim3 blocks(M);\n\n#ifdef USE_ROCM\n uint64_t workgroupSize = static_cast(blocks.x) * static_cast(threads.x);\n // this caused invalid configuration problem\n if (workgroupSize > std::numeric_limits::max()) {\n // Fix invalid configuration https://github.com/pytorch/pytorch/issues/136291\n blocks.x = std::numeric_limits::max() / threads.x;\n constexpr int rows_per_thread_y = rows_per_block_y / block_dim_y;\n int64_t thread_x = blockIdx.x * block_dim_x + threadIdx.x;\n for (int64_t M_start = blockIdx.y * rows_per_block_y;\n M_start < M;\n M_start += rows_per_block_y * gridDim.y) {\n int lane_id = (threadIdx.y * blockDim.x + threadIdx.x) & (kWarpSize - 1);\n int64_t mean_index = M_start + threadIdx.y * rows_per_thread_y;\n T_ACC warp_mean = 0, warp_rstd = 0;\n if (lane_id < rows_per_thread_y) {\n warp_mean = mean[mean_index + lane_id];\n warp_rstd = rstd[mean_index + lane_id];\n }\n WARP_SYNC();\n T_ACC dY_regs[rows_per_thread_y] = {0};\n T_ACC X_regs[rows_per_thread_y] = {0};\n #pragma unroll\n for (int i = 0; i < rows_per_thread_y; ++i) {\n int64_t current_y = M_start + threadIdx.y * rows_per_thread_y + i;\n if (aligned_grid || (current_y < M && thread_x < N)) {\n dY_regs[i] = dY[current_y * N + thread_x];\n X_regs[i] = X[current_y * N + thread_x];\n }\n }\n#endif\n\n TORCH_INTERNAL_ASSERT_DEBUG_ONLY(threads.y % 2 == 0 || threads.y == 1);\n int nshared = threads.y > 1 ? threads.y * 3/2 *sizeof(T_ACC) : 0;\n vectorized_layer_norm_kernel<<>>(N, eps, X_data,\n gamma_data, beta_data, mean_data, rstd_data, Y_data);\n C10_CUDA_KERNEL_LAUNCH_CHECK();\n #pragma unroll\n for (int i = 0; i < rows_per_thread_y; ++i) {\n T_ACC mean_reg = WARP_SHFL(warp_mean, i, kWarpSize);\n T_ACC rstd_reg = WARP_SHFL(warp_rstd, i, kWarpSize);\n dg_sum += dY_regs[i] * (X_regs[i] - mean_reg) * rstd_reg;\n db_sum += dY_regs[i];\n }\n }\n}\n\n#ifdef USE_ROCM\n // the blocks.x contains the max grid x dimention without invalid configuration error\n // Fix invalid configuration https://github.com/pytorch/pytorch/issues/136291\n // Ensure all elements are processed. Prepare for next round\n int64_t remaining = M - blocks.x;\n const T* X_data2 = X_data;\n T_ACC* mean_data2 = mean_data;\n T_ACC* rstd_data2 = rstd_data;\n T* Y_data2 = Y_data;\ntemplate \n__device__\n__forceinline__\nvoid\nblockReduceGammaBetaBackwardsHelper(\n int64_t M_start,\n int64_t M,\n int64_t N,\n const T* __restrict__ dY,\n const T* __restrict__ X,\n const T_ACC* __restrict__ mean,\n const T_ACC* __restrict__ rstd,\n T* __restrict__ dg,\n T* __restrict__ db,\n T_ACC &dg_sum,\n T_ACC &db_sum\n) {\n constexpr int rows_per_thread_y = rows_per_block_y / block_dim_y;\n int64_t thread_x = blockIdx.x * block_dim_x + threadIdx.x;\n\n while (remaining > 0) {\n X_data2 += N * blocks.x;\n mean_data2 += blocks.x;\n rstd_data2 += blocks.x;\n Y_data2 += N * blocks.x;\n int lane_id = (threadIdx.y * blockDim.x + threadIdx.x) & (kWarpSize - 1);\n int64_t mean_index = M_start + threadIdx.y * rows_per_thread_y;\n T_ACC warp_mean = 0, warp_rstd = 0;\n if (lane_id < rows_per_thread_y && mean_index + lane_id < M) {\n warp_mean = mean[mean_index + lane_id];\n warp_rstd = rstd[mean_index + lane_id];\n }\n WARP_SYNC();\n\n blocks.x = (remaining > blocks.x) ? blocks.x : remaining;\n\n vectorized_layer_norm_kernel<<>>(N, eps, X_data2,\n gamma_data, beta_data, mean_data2, rstd_data2, Y_data2);\n C10_CUDA_KERNEL_LAUNCH_CHECK();\n\n remaining -= blocks.x;\n T_ACC dY_regs[rows_per_thread_y] = {0};\n T_ACC X_regs[rows_per_thread_y] = {0};\n #pragma unroll\n for (int i = 0; i < rows_per_thread_y; ++i) {\n int64_t current_y = M_start + threadIdx.y * rows_per_thread_y + i;\n bool active = true;\n if (check_x && thread_x >= N) {\n active = false;\n }\n if (check_y && current_y >= M) {\n active = false;\n }\n if (active) {\n dY_regs[i] = dY[current_y * N + thread_x];\n X_regs[i] = X[current_y * N + thread_x];\n }\n }\n#endif\n\n #pragma unroll\n for (int i = 0; i < rows_per_thread_y; ++i) {\n T_ACC mean_reg = WARP_SHFL(warp_mean, i, kWarpSize);\n T_ACC rstd_reg = WARP_SHFL(warp_rstd, i, kWarpSize);\n dg_sum += dY_regs[i] * (X_regs[i] - mean_reg) * rstd_reg;\n db_sum += dY_regs[i];\n }\n}\n\ntemplate \nvoid LayerNormKernelImplInternal(\n const Tensor& X,\n const Tensor& gamma,\n const Tensor& beta,\ntemplate ", + "repo_full_name": "pytorch/pytorch", + "discussion_comments": [ + { + "comment_id": "1996369008", + "repo_full_name": "pytorch/pytorch", + "pr_number": 148605, + "pr_file": "aten/src/ATen/native/cuda/layer_norm_kernel.cu", + "discussion_id": "1996369008", + "commented_code": "@@ -509,862 +509,859 @@ __global__ void layer_norm_grad_input_kernel_vectorized(\n }\n }\n \n-\n-template \n-void launch_vectorized_layer_norm_kernel(\n- int N,\n- int64_t M,\n- T_ACC eps,\n- const T* X_data,\n- const T* gamma_data,\n- const T* beta_data,\n- T* Y_data,\n- T_ACC* mean_data,\n- T_ACC* rstd_data\n+// When the data size is a multiple of the tile size we can use fewer\n+// instructions and registers. Example, this is the case when M=256 N=256.\n+template \n+__device__\n+__forceinline__\n+void\n+blockReduceGammaBetaBackwardsAligned(\n+ int64_t M,\n+ int64_t N,\n+ const T* __restrict__ dY,\n+ const T* __restrict__ X,\n+ const T_ACC* __restrict__ mean,\n+ const T_ACC* __restrict__ rstd,\n+ T* __restrict__ dg,\n+ T* __restrict__ db,\n+ T_ACC &dg_sum,\n+ T_ACC &db_sum\n ) {\n- //constexpr int alignment = 16; //currently unused to make sure float and half results are bw accurate\n- auto stream = at::cuda::getCurrentCUDAStream().stream();\n- const int warp_size = at::cuda::warp_size();\n- const dim3 threads(warp_size, num_threads() / warp_size, 1);\n- dim3 blocks(M);\n-\n-#ifdef USE_ROCM\n- uint64_t workgroupSize = static_cast(blocks.x) * static_cast(threads.x);\n- // this caused invalid configuration problem\n- if (workgroupSize > std::numeric_limits::max()) {\n- // Fix invalid configuration https://github.com/pytorch/pytorch/issues/136291\n- blocks.x = std::numeric_limits::max() / threads.x;\n+ constexpr int rows_per_thread_y = rows_per_block_y / block_dim_y;\n+ int64_t thread_x = blockIdx.x * block_dim_x + threadIdx.x;\n+ for (int64_t M_start = blockIdx.y * rows_per_block_y;\n+ M_start < M;\n+ M_start += rows_per_block_y * gridDim.y) {\n+ int lane_id = (threadIdx.y * blockDim.x + threadIdx.x) & (kWarpSize - 1);\n+ int64_t mean_index = M_start + threadIdx.y * rows_per_thread_y;\n+ T_ACC warp_mean = 0, warp_rstd = 0;\n+ if (lane_id < rows_per_thread_y) {\n+ warp_mean = mean[mean_index + lane_id];\n+ warp_rstd = rstd[mean_index + lane_id];\n+ }\n+ WARP_SYNC();\n+ T_ACC dY_regs[rows_per_thread_y] = {0};\n+ T_ACC X_regs[rows_per_thread_y] = {0};\n+ #pragma unroll\n+ for (int i = 0; i < rows_per_thread_y; ++i) {\n+ int64_t current_y = M_start + threadIdx.y * rows_per_thread_y + i;\n+ if (aligned_grid || (current_y < M && thread_x < N)) {\n+ dY_regs[i] = dY[current_y * N + thread_x];\n+ X_regs[i] = X[current_y * N + thread_x];\n+ }\n }\n-#endif\n \n- TORCH_INTERNAL_ASSERT_DEBUG_ONLY(threads.y % 2 == 0 || threads.y == 1);\n- int nshared = threads.y > 1 ? threads.y * 3/2 *sizeof(T_ACC) : 0;\n- vectorized_layer_norm_kernel<<>>(N, eps, X_data,\n- gamma_data, beta_data, mean_data, rstd_data, Y_data);\n- C10_CUDA_KERNEL_LAUNCH_CHECK();\n+ #pragma unroll\n+ for (int i = 0; i < rows_per_thread_y; ++i) {\n+ T_ACC mean_reg = WARP_SHFL(warp_mean, i, kWarpSize);\n+ T_ACC rstd_reg = WARP_SHFL(warp_rstd, i, kWarpSize);\n+ dg_sum += dY_regs[i] * (X_regs[i] - mean_reg) * rstd_reg;\n+ db_sum += dY_regs[i];\n+ }\n+ }\n+}\n \n-#ifdef USE_ROCM\n- // the blocks.x contains the max grid x dimention without invalid configuration error\n- // Fix invalid configuration https://github.com/pytorch/pytorch/issues/136291\n- // Ensure all elements are processed. Prepare for next round\n- int64_t remaining = M - blocks.x;\n- const T* X_data2 = X_data;\n- T_ACC* mean_data2 = mean_data;\n- T_ACC* rstd_data2 = rstd_data;\n- T* Y_data2 = Y_data;\n+template \n+__device__\n+__forceinline__\n+void\n+blockReduceGammaBetaBackwardsHelper(\n+ int64_t M_start,\n+ int64_t M,\n+ int64_t N,\n+ const T* __restrict__ dY,\n+ const T* __restrict__ X,\n+ const T_ACC* __restrict__ mean,\n+ const T_ACC* __restrict__ rstd,\n+ T* __restrict__ dg,\n+ T* __restrict__ db,\n+ T_ACC &dg_sum,\n+ T_ACC &db_sum\n+) {\n+ constexpr int rows_per_thread_y = rows_per_block_y / block_dim_y;\n+ int64_t thread_x = blockIdx.x * block_dim_x + threadIdx.x;\n \n- while (remaining > 0) {\n- X_data2 += N * blocks.x;\n- mean_data2 += blocks.x;\n- rstd_data2 += blocks.x;\n- Y_data2 += N * blocks.x;\n+ int lane_id = (threadIdx.y * blockDim.x + threadIdx.x) & (kWarpSize - 1);\n+ int64_t mean_index = M_start + threadIdx.y * rows_per_thread_y;\n+ T_ACC warp_mean = 0, warp_rstd = 0;\n+ if (lane_id < rows_per_thread_y && mean_index + lane_id < M) {\n+ warp_mean = mean[mean_index + lane_id];\n+ warp_rstd = rstd[mean_index + lane_id];\n+ }\n+ WARP_SYNC();\n \n- blocks.x = (remaining > blocks.x) ? blocks.x : remaining;\n \n- vectorized_layer_norm_kernel<<>>(N, eps, X_data2,\n- gamma_data, beta_data, mean_data2, rstd_data2, Y_data2);\n- C10_CUDA_KERNEL_LAUNCH_CHECK();\n \n- remaining -= blocks.x;\n+ T_ACC dY_regs[rows_per_thread_y] = {0};\n+ T_ACC X_regs[rows_per_thread_y] = {0};\n+ #pragma unroll\n+ for (int i = 0; i < rows_per_thread_y; ++i) {\n+ int64_t current_y = M_start + threadIdx.y * rows_per_thread_y + i;\n+ bool active = true;\n+ if (check_x && thread_x >= N) {\n+ active = false;\n+ }\n+ if (check_y && current_y >= M) {\n+ active = false;\n+ }\n+ if (active) {\n+ dY_regs[i] = dY[current_y * N + thread_x];\n+ X_regs[i] = X[current_y * N + thread_x];\n+ }\n }\n-#endif\n \n+ #pragma unroll\n+ for (int i = 0; i < rows_per_thread_y; ++i) {\n+ T_ACC mean_reg = WARP_SHFL(warp_mean, i, kWarpSize);\n+ T_ACC rstd_reg = WARP_SHFL(warp_rstd, i, kWarpSize);\n+ dg_sum += dY_regs[i] * (X_regs[i] - mean_reg) * rstd_reg;\n+ db_sum += dY_regs[i];\n+ }\n }\n \n-template \n-void LayerNormKernelImplInternal(\n- const Tensor& X,\n- const Tensor& gamma,\n- const Tensor& beta,\n+template ", + "comment_created_at": "2025-03-14T22:23:15+00:00", + "comment_author": "ngimel", + "comment_body": "check_y id never used in this function", + "pr_file_module": null + }, + { + "comment_id": "2000093583", + "repo_full_name": "pytorch/pytorch", + "pr_number": 148605, + "pr_file": "aten/src/ATen/native/cuda/layer_norm_kernel.cu", + "discussion_id": "1996369008", + "commented_code": "@@ -509,862 +509,859 @@ __global__ void layer_norm_grad_input_kernel_vectorized(\n }\n }\n \n-\n-template \n-void launch_vectorized_layer_norm_kernel(\n- int N,\n- int64_t M,\n- T_ACC eps,\n- const T* X_data,\n- const T* gamma_data,\n- const T* beta_data,\n- T* Y_data,\n- T_ACC* mean_data,\n- T_ACC* rstd_data\n+// When the data size is a multiple of the tile size we can use fewer\n+// instructions and registers. Example, this is the case when M=256 N=256.\n+template \n+__device__\n+__forceinline__\n+void\n+blockReduceGammaBetaBackwardsAligned(\n+ int64_t M,\n+ int64_t N,\n+ const T* __restrict__ dY,\n+ const T* __restrict__ X,\n+ const T_ACC* __restrict__ mean,\n+ const T_ACC* __restrict__ rstd,\n+ T* __restrict__ dg,\n+ T* __restrict__ db,\n+ T_ACC &dg_sum,\n+ T_ACC &db_sum\n ) {\n- //constexpr int alignment = 16; //currently unused to make sure float and half results are bw accurate\n- auto stream = at::cuda::getCurrentCUDAStream().stream();\n- const int warp_size = at::cuda::warp_size();\n- const dim3 threads(warp_size, num_threads() / warp_size, 1);\n- dim3 blocks(M);\n-\n-#ifdef USE_ROCM\n- uint64_t workgroupSize = static_cast(blocks.x) * static_cast(threads.x);\n- // this caused invalid configuration problem\n- if (workgroupSize > std::numeric_limits::max()) {\n- // Fix invalid configuration https://github.com/pytorch/pytorch/issues/136291\n- blocks.x = std::numeric_limits::max() / threads.x;\n+ constexpr int rows_per_thread_y = rows_per_block_y / block_dim_y;\n+ int64_t thread_x = blockIdx.x * block_dim_x + threadIdx.x;\n+ for (int64_t M_start = blockIdx.y * rows_per_block_y;\n+ M_start < M;\n+ M_start += rows_per_block_y * gridDim.y) {\n+ int lane_id = (threadIdx.y * blockDim.x + threadIdx.x) & (kWarpSize - 1);\n+ int64_t mean_index = M_start + threadIdx.y * rows_per_thread_y;\n+ T_ACC warp_mean = 0, warp_rstd = 0;\n+ if (lane_id < rows_per_thread_y) {\n+ warp_mean = mean[mean_index + lane_id];\n+ warp_rstd = rstd[mean_index + lane_id];\n+ }\n+ WARP_SYNC();\n+ T_ACC dY_regs[rows_per_thread_y] = {0};\n+ T_ACC X_regs[rows_per_thread_y] = {0};\n+ #pragma unroll\n+ for (int i = 0; i < rows_per_thread_y; ++i) {\n+ int64_t current_y = M_start + threadIdx.y * rows_per_thread_y + i;\n+ if (aligned_grid || (current_y < M && thread_x < N)) {\n+ dY_regs[i] = dY[current_y * N + thread_x];\n+ X_regs[i] = X[current_y * N + thread_x];\n+ }\n }\n-#endif\n \n- TORCH_INTERNAL_ASSERT_DEBUG_ONLY(threads.y % 2 == 0 || threads.y == 1);\n- int nshared = threads.y > 1 ? threads.y * 3/2 *sizeof(T_ACC) : 0;\n- vectorized_layer_norm_kernel<<>>(N, eps, X_data,\n- gamma_data, beta_data, mean_data, rstd_data, Y_data);\n- C10_CUDA_KERNEL_LAUNCH_CHECK();\n+ #pragma unroll\n+ for (int i = 0; i < rows_per_thread_y; ++i) {\n+ T_ACC mean_reg = WARP_SHFL(warp_mean, i, kWarpSize);\n+ T_ACC rstd_reg = WARP_SHFL(warp_rstd, i, kWarpSize);\n+ dg_sum += dY_regs[i] * (X_regs[i] - mean_reg) * rstd_reg;\n+ db_sum += dY_regs[i];\n+ }\n+ }\n+}\n \n-#ifdef USE_ROCM\n- // the blocks.x contains the max grid x dimention without invalid configuration error\n- // Fix invalid configuration https://github.com/pytorch/pytorch/issues/136291\n- // Ensure all elements are processed. Prepare for next round\n- int64_t remaining = M - blocks.x;\n- const T* X_data2 = X_data;\n- T_ACC* mean_data2 = mean_data;\n- T_ACC* rstd_data2 = rstd_data;\n- T* Y_data2 = Y_data;\n+template \n+__device__\n+__forceinline__\n+void\n+blockReduceGammaBetaBackwardsHelper(\n+ int64_t M_start,\n+ int64_t M,\n+ int64_t N,\n+ const T* __restrict__ dY,\n+ const T* __restrict__ X,\n+ const T_ACC* __restrict__ mean,\n+ const T_ACC* __restrict__ rstd,\n+ T* __restrict__ dg,\n+ T* __restrict__ db,\n+ T_ACC &dg_sum,\n+ T_ACC &db_sum\n+) {\n+ constexpr int rows_per_thread_y = rows_per_block_y / block_dim_y;\n+ int64_t thread_x = blockIdx.x * block_dim_x + threadIdx.x;\n \n- while (remaining > 0) {\n- X_data2 += N * blocks.x;\n- mean_data2 += blocks.x;\n- rstd_data2 += blocks.x;\n- Y_data2 += N * blocks.x;\n+ int lane_id = (threadIdx.y * blockDim.x + threadIdx.x) & (kWarpSize - 1);\n+ int64_t mean_index = M_start + threadIdx.y * rows_per_thread_y;\n+ T_ACC warp_mean = 0, warp_rstd = 0;\n+ if (lane_id < rows_per_thread_y && mean_index + lane_id < M) {\n+ warp_mean = mean[mean_index + lane_id];\n+ warp_rstd = rstd[mean_index + lane_id];\n+ }\n+ WARP_SYNC();\n \n- blocks.x = (remaining > blocks.x) ? blocks.x : remaining;\n \n- vectorized_layer_norm_kernel<<>>(N, eps, X_data2,\n- gamma_data, beta_data, mean_data2, rstd_data2, Y_data2);\n- C10_CUDA_KERNEL_LAUNCH_CHECK();\n \n- remaining -= blocks.x;\n+ T_ACC dY_regs[rows_per_thread_y] = {0};\n+ T_ACC X_regs[rows_per_thread_y] = {0};\n+ #pragma unroll\n+ for (int i = 0; i < rows_per_thread_y; ++i) {\n+ int64_t current_y = M_start + threadIdx.y * rows_per_thread_y + i;\n+ bool active = true;\n+ if (check_x && thread_x >= N) {\n+ active = false;\n+ }\n+ if (check_y && current_y >= M) {\n+ active = false;\n+ }\n+ if (active) {\n+ dY_regs[i] = dY[current_y * N + thread_x];\n+ X_regs[i] = X[current_y * N + thread_x];\n+ }\n }\n-#endif\n \n+ #pragma unroll\n+ for (int i = 0; i < rows_per_thread_y; ++i) {\n+ T_ACC mean_reg = WARP_SHFL(warp_mean, i, kWarpSize);\n+ T_ACC rstd_reg = WARP_SHFL(warp_rstd, i, kWarpSize);\n+ dg_sum += dY_regs[i] * (X_regs[i] - mean_reg) * rstd_reg;\n+ db_sum += dY_regs[i];\n+ }\n }\n \n-template \n-void LayerNormKernelImplInternal(\n- const Tensor& X,\n- const Tensor& gamma,\n- const Tensor& beta,\n+template ", + "comment_created_at": "2025-03-18T03:03:30+00:00", + "comment_author": "ahmadsharif1", + "comment_body": "Great point. Done.\r\n\r\n(This was also done for historical reasons while I was writing the kernel. )", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2138662133", + "pr_number": 155590, + "pr_file": "c10/core/SymbolicShapeMeta.cpp", + "created_at": "2025-06-10T19:52:08+00:00", + "commented_code": "return _compute_contiguous(sizes, strides, numel());\n}\n\nSymBool SymbolicShapeMeta::compute_def_contiguous() const {\n if (!strides_valid_) {\n return false;\n }\n c10::SymIntArrayRef sizes(sizes_);\n c10::SymIntArrayRef strides(strides_);\n // definitely_contiguous from Contiguity.h\n return _compute_def_contiguous(sizes, strides, numel());\n}\n\n// The rest of them\n#define DEFINE_EAGER_SYMBOOL_COMPUTE(name, nodeimpl, fallback) \\\n SymBool SymbolicShapeMeta::name() const { \\\n if (!strides_valid_) { \\\n return false; \\\n } \\\n c10::SymIntArrayRef sizes(sizes_); \\\n c10::SymIntArrayRef strides(strides_); \\\n return fallback(sizes, strides); \\\n#define DEFINE_EAGER_SYMBOOL_COMPUTE(name, fallback) \\", + "repo_full_name": "pytorch/pytorch", + "discussion_comments": [ + { + "comment_id": "2138662133", + "repo_full_name": "pytorch/pytorch", + "pr_number": 155590, + "pr_file": "c10/core/SymbolicShapeMeta.cpp", + "discussion_id": "2138662133", + "commented_code": "@@ -82,15 +86,25 @@ SymBool SymbolicShapeMeta::compute_contiguous() const {\n return _compute_contiguous(sizes, strides, numel());\n }\n \n+SymBool SymbolicShapeMeta::compute_def_contiguous() const {\n+ if (!strides_valid_) {\n+ return false;\n+ }\n+ c10::SymIntArrayRef sizes(sizes_);\n+ c10::SymIntArrayRef strides(strides_);\n+ // definitely_contiguous from Contiguity.h\n+ return _compute_def_contiguous(sizes, strides, numel());\n+}\n+\n // The rest of them\n-#define DEFINE_EAGER_SYMBOOL_COMPUTE(name, nodeimpl, fallback) \\\n- SymBool SymbolicShapeMeta::name() const { \\\n- if (!strides_valid_) { \\\n- return false; \\\n- } \\\n- c10::SymIntArrayRef sizes(sizes_); \\\n- c10::SymIntArrayRef strides(strides_); \\\n- return fallback(sizes, strides); \\\n+#define DEFINE_EAGER_SYMBOOL_COMPUTE(name, fallback) \\", + "comment_created_at": "2025-06-10T19:52:08+00:00", + "comment_author": "laithsakka", + "comment_body": "nodeimpl arg is redundant?", + "pr_file_module": null + }, + { + "comment_id": "2160604579", + "repo_full_name": "pytorch/pytorch", + "pr_number": 155590, + "pr_file": "c10/core/SymbolicShapeMeta.cpp", + "discussion_id": "2138662133", + "commented_code": "@@ -82,15 +86,25 @@ SymBool SymbolicShapeMeta::compute_contiguous() const {\n return _compute_contiguous(sizes, strides, numel());\n }\n \n+SymBool SymbolicShapeMeta::compute_def_contiguous() const {\n+ if (!strides_valid_) {\n+ return false;\n+ }\n+ c10::SymIntArrayRef sizes(sizes_);\n+ c10::SymIntArrayRef strides(strides_);\n+ // definitely_contiguous from Contiguity.h\n+ return _compute_def_contiguous(sizes, strides, numel());\n+}\n+\n // The rest of them\n-#define DEFINE_EAGER_SYMBOOL_COMPUTE(name, nodeimpl, fallback) \\\n- SymBool SymbolicShapeMeta::name() const { \\\n- if (!strides_valid_) { \\\n- return false; \\\n- } \\\n- c10::SymIntArrayRef sizes(sizes_); \\\n- c10::SymIntArrayRef strides(strides_); \\\n- return fallback(sizes, strides); \\\n+#define DEFINE_EAGER_SYMBOOL_COMPUTE(name, fallback) \\", + "comment_created_at": "2025-06-23T02:18:20+00:00", + "comment_author": "ezyang", + "comment_body": "Yeah, probably from when we needed to figure out how to dispatch", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2024110522", + "pr_number": 150506, + "pr_file": "aten/src/ATen/native/TensorAdvancedIndexing.cpp", + "created_at": "2025-04-02T05:48:14+00:00", + "commented_code": "false, \"index is out of bounds for dimension with size 0\");\n }\n\n this->dims_before = dims_before;\n this->dims_after = dims_after;\n this->src = restride_src(src, dims_before, dims_indexed, replacement_shape);\n this->dims_before = new_dims_before;\n this->dims_after = new_dims_after;", + "repo_full_name": "pytorch/pytorch", + "discussion_comments": [ + { + "comment_id": "2024110522", + "repo_full_name": "pytorch/pytorch", + "pr_number": 150506, + "pr_file": "aten/src/ATen/native/TensorAdvancedIndexing.cpp", + "discussion_id": "2024110522", + "commented_code": "@@ -677,13 +677,13 @@ AdvancedIndex::AdvancedIndex(const Tensor& src, TensorList indices_list) {\n false, \"index is out of bounds for dimension with size 0\");\n }\n \n- this->dims_before = dims_before;\n- this->dims_after = dims_after;\n- this->src = restride_src(src, dims_before, dims_indexed, replacement_shape);\n+ this->dims_before = new_dims_before;\n+ this->dims_after = new_dims_after;", + "comment_created_at": "2025-04-02T05:48:14+00:00", + "comment_author": "malfet", + "comment_body": "You can remove `this->` as class members no longer shadowed by local variables", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2024855751", + "pr_number": 150503, + "pr_file": "aten/src/ATen/native/cpu/int4mm_kernel.cpp", + "created_at": "2025-04-02T13:40:45+00:00", + "commented_code": "}\n };\n\n // NOLINTNEXTLINE(modernize-avoid-c-arrays,cppcoreguidelines-avoid-c-arrays)\n auto lhs_qa8dx_buffer = std::make_unique(\n m * (k + sizeof(float) + sizeof(int32_t))); // Allocate for LHS\n int8_t* lhs_qa8dx = lhs_qa8dx_buffer.get();", + "repo_full_name": "pytorch/pytorch", + "discussion_comments": [ + { + "comment_id": "2024855751", + "repo_full_name": "pytorch/pytorch", + "pr_number": 150503, + "pr_file": "aten/src/ATen/native/cpu/int4mm_kernel.cpp", + "discussion_id": "2024855751", + "commented_code": "@@ -1068,6 +1070,7 @@ static void ref_dyn_quant_matmul_4bit_groupwise_kernel(\n }\n };\n \n+ // NOLINTNEXTLINE(modernize-avoid-c-arrays,cppcoreguidelines-avoid-c-arrays)\n auto lhs_qa8dx_buffer = std::make_unique(\n m * (k + sizeof(float) + sizeof(int32_t))); // Allocate for LHS\n int8_t* lhs_qa8dx = lhs_qa8dx_buffer.get();", + "comment_created_at": "2025-04-02T13:40:45+00:00", + "comment_author": "Skylion007", + "comment_body": "```suggestion\r\n std::vector lhs_qa8dx_buffer(m * (k + sizeof(float) + sizeof(int32_t)));\r\n int8_t* lhs_qa8dx = lhs_qa8dx_buffer.data();\r\n```\r\nWouldn't this be cleaner?", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2177707270", + "pr_number": 156161, + "pr_file": "aten/src/ATen/native/cpu/ScatterGatherKernel.cpp", + "created_at": "2025-07-01T14:09:38+00:00", + "commented_code": "}\n#endif\n\n// implementation taken from FBGEMM/src/Utils.cc\nnamespace {\n// histogram size per thread\nconstexpr int RDX_HIST_SIZE = 256;\n\nvoid update_prefsum_and_offset_in_range(\n int64_t& offset,\n const int bins_beg,\n const int bins_end,\n const int nthreads,\n const int64_t* const histogram,\n int64_t* const histogram_ps) {\n for (int bins = bins_beg; bins < bins_end; ++bins) {\n for (int t = 0; t < nthreads; ++t) {\n histogram_ps[t * RDX_HIST_SIZE + bins] = offset;\n offset += histogram[t * RDX_HIST_SIZE + bins];\n }\n }\n}\n\nstatic inline void combine_prefix_sum(\n const int nthreads,\n const int64_t elements_count,\n const int64_t* const histogram,\n int64_t* const histogram_ps) {\n int64_t offset = 0;\n update_prefsum_and_offset_in_range(\n offset, 0, RDX_HIST_SIZE, nthreads, histogram, histogram_ps);\n // TODO(DamianSzwichtenberg): Is assert sufficient? In most cases, it will\n // work only in debug build.\n assert(offset == elements_count);\n // Suppress unused variable warning\n (void)elements_count;\n}\n\nstatic inline void combine_prefix_sum_for_msb(\n const int nthreads,\n const int64_t elements_count,\n const int64_t* const histogram,\n int64_t* const histogram_ps) {\n int64_t offset = 0;\n update_prefsum_and_offset_in_range(\n offset, 128, RDX_HIST_SIZE, nthreads, histogram, histogram_ps);\n update_prefsum_and_offset_in_range(\n offset, 0, 128, nthreads, histogram, histogram_ps);\n // TODO(DamianSzwichtenberg): Is assert sufficient? In most cases, it will\n // work only in debug build.\n assert(offset == elements_count);\n // Suppress unused variable warning\n (void)elements_count;", + "repo_full_name": "pytorch/pytorch", + "discussion_comments": [ + { + "comment_id": "2177707270", + "repo_full_name": "pytorch/pytorch", + "pr_number": 156161, + "pr_file": "aten/src/ATen/native/cpu/ScatterGatherKernel.cpp", + "discussion_id": "2177707270", + "commented_code": "@@ -665,6 +675,208 @@ std::pair radix_sort_parallel(\n }\n #endif\n \n+// implementation taken from FBGEMM/src/Utils.cc\n+namespace {\n+// histogram size per thread\n+constexpr int RDX_HIST_SIZE = 256;\n+\n+void update_prefsum_and_offset_in_range(\n+ int64_t& offset,\n+ const int bins_beg,\n+ const int bins_end,\n+ const int nthreads,\n+ const int64_t* const histogram,\n+ int64_t* const histogram_ps) {\n+ for (int bins = bins_beg; bins < bins_end; ++bins) {\n+ for (int t = 0; t < nthreads; ++t) {\n+ histogram_ps[t * RDX_HIST_SIZE + bins] = offset;\n+ offset += histogram[t * RDX_HIST_SIZE + bins];\n+ }\n+ }\n+}\n+\n+static inline void combine_prefix_sum(\n+ const int nthreads,\n+ const int64_t elements_count,\n+ const int64_t* const histogram,\n+ int64_t* const histogram_ps) {\n+ int64_t offset = 0;\n+ update_prefsum_and_offset_in_range(\n+ offset, 0, RDX_HIST_SIZE, nthreads, histogram, histogram_ps);\n+ // TODO(DamianSzwichtenberg): Is assert sufficient? In most cases, it will\n+ // work only in debug build.\n+ assert(offset == elements_count);\n+ // Suppress unused variable warning\n+ (void)elements_count;\n+}\n+\n+static inline void combine_prefix_sum_for_msb(\n+ const int nthreads,\n+ const int64_t elements_count,\n+ const int64_t* const histogram,\n+ int64_t* const histogram_ps) {\n+ int64_t offset = 0;\n+ update_prefsum_and_offset_in_range(\n+ offset, 128, RDX_HIST_SIZE, nthreads, histogram, histogram_ps);\n+ update_prefsum_and_offset_in_range(\n+ offset, 0, 128, nthreads, histogram, histogram_ps);\n+ // TODO(DamianSzwichtenberg): Is assert sufficient? In most cases, it will\n+ // work only in debug build.\n+ assert(offset == elements_count);\n+ // Suppress unused variable warning\n+ (void)elements_count;", + "comment_created_at": "2025-07-01T14:09:38+00:00", + "comment_author": "Skylion007", + "comment_body": "We are on C++17, why not just use [maybe unused] now?", + "pr_file_module": null + }, + { + "comment_id": "2177885551", + "repo_full_name": "pytorch/pytorch", + "pr_number": 156161, + "pr_file": "aten/src/ATen/native/cpu/ScatterGatherKernel.cpp", + "discussion_id": "2177707270", + "commented_code": "@@ -665,6 +675,208 @@ std::pair radix_sort_parallel(\n }\n #endif\n \n+// implementation taken from FBGEMM/src/Utils.cc\n+namespace {\n+// histogram size per thread\n+constexpr int RDX_HIST_SIZE = 256;\n+\n+void update_prefsum_and_offset_in_range(\n+ int64_t& offset,\n+ const int bins_beg,\n+ const int bins_end,\n+ const int nthreads,\n+ const int64_t* const histogram,\n+ int64_t* const histogram_ps) {\n+ for (int bins = bins_beg; bins < bins_end; ++bins) {\n+ for (int t = 0; t < nthreads; ++t) {\n+ histogram_ps[t * RDX_HIST_SIZE + bins] = offset;\n+ offset += histogram[t * RDX_HIST_SIZE + bins];\n+ }\n+ }\n+}\n+\n+static inline void combine_prefix_sum(\n+ const int nthreads,\n+ const int64_t elements_count,\n+ const int64_t* const histogram,\n+ int64_t* const histogram_ps) {\n+ int64_t offset = 0;\n+ update_prefsum_and_offset_in_range(\n+ offset, 0, RDX_HIST_SIZE, nthreads, histogram, histogram_ps);\n+ // TODO(DamianSzwichtenberg): Is assert sufficient? In most cases, it will\n+ // work only in debug build.\n+ assert(offset == elements_count);\n+ // Suppress unused variable warning\n+ (void)elements_count;\n+}\n+\n+static inline void combine_prefix_sum_for_msb(\n+ const int nthreads,\n+ const int64_t elements_count,\n+ const int64_t* const histogram,\n+ int64_t* const histogram_ps) {\n+ int64_t offset = 0;\n+ update_prefsum_and_offset_in_range(\n+ offset, 128, RDX_HIST_SIZE, nthreads, histogram, histogram_ps);\n+ update_prefsum_and_offset_in_range(\n+ offset, 0, 128, nthreads, histogram, histogram_ps);\n+ // TODO(DamianSzwichtenberg): Is assert sufficient? In most cases, it will\n+ // work only in debug build.\n+ assert(offset == elements_count);\n+ // Suppress unused variable warning\n+ (void)elements_count;", + "comment_created_at": "2025-07-01T15:19:16+00:00", + "comment_author": "maajidkhann", + "comment_body": "Thanks for pointing out. Added the suggestion in the code.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2167715137", + "pr_number": 149601, + "pr_file": "c10/core/AllocatorConfig.cpp", + "created_at": "2025-06-25T22:01:33+00:00", + "commented_code": "#include \n#include \n#include \n#include \n\n#include \n#include \n\nnamespace c10::CachingAllocator {\n\nnamespace {\nconstexpr size_t kRoundUpPowerOfTwoIntervals = 16;\nconstexpr size_t kMB = 1024 * 1024ul;\nconstexpr size_t kRoundUpPowerOfTwoStart = 1 * kMB; // 1MB\nconstexpr size_t kRoundUpPowerOfTwoEnd = 64 * 1024ul * kMB; // 64GB\nconstexpr size_t kPinnedMaxRegisterThreads = 128;\n} // anonymous namespace\n\nAllocatorConfig& AllocatorConfig::instance() {\n static AllocatorConfig instance;\n static c10::once_flag init_once;\n#define C10_ALLOCATOR_CONFIG_PARSE_ENV(env, deprecated) \\", + "repo_full_name": "pytorch/pytorch", + "discussion_comments": [ + { + "comment_id": "2167715137", + "repo_full_name": "pytorch/pytorch", + "pr_number": 149601, + "pr_file": "c10/core/AllocatorConfig.cpp", + "discussion_id": "2167715137", + "commented_code": "@@ -0,0 +1,438 @@\n+#include \n+#include \n+#include \n+#include \n+\n+#include \n+#include \n+\n+namespace c10::CachingAllocator {\n+\n+namespace {\n+constexpr size_t kRoundUpPowerOfTwoIntervals = 16;\n+constexpr size_t kMB = 1024 * 1024ul;\n+constexpr size_t kRoundUpPowerOfTwoStart = 1 * kMB; // 1MB\n+constexpr size_t kRoundUpPowerOfTwoEnd = 64 * 1024ul * kMB; // 64GB\n+constexpr size_t kPinnedMaxRegisterThreads = 128;\n+} // anonymous namespace\n+\n+AllocatorConfig& AllocatorConfig::instance() {\n+ static AllocatorConfig instance;\n+ static c10::once_flag init_once;\n+#define C10_ALLOCATOR_CONFIG_PARSE_ENV(env, deprecated) \\", + "comment_created_at": "2025-06-25T22:01:33+00:00", + "comment_author": "albanD", + "comment_body": "You should either define this outside the function if it used elsewhere/you want it available in the whole file or undef it here.\r\n", + "pr_file_module": null + }, + { + "comment_id": "2168045769", + "repo_full_name": "pytorch/pytorch", + "pr_number": 149601, + "pr_file": "c10/core/AllocatorConfig.cpp", + "discussion_id": "2167715137", + "commented_code": "@@ -0,0 +1,438 @@\n+#include \n+#include \n+#include \n+#include \n+\n+#include \n+#include \n+\n+namespace c10::CachingAllocator {\n+\n+namespace {\n+constexpr size_t kRoundUpPowerOfTwoIntervals = 16;\n+constexpr size_t kMB = 1024 * 1024ul;\n+constexpr size_t kRoundUpPowerOfTwoStart = 1 * kMB; // 1MB\n+constexpr size_t kRoundUpPowerOfTwoEnd = 64 * 1024ul * kMB; // 64GB\n+constexpr size_t kPinnedMaxRegisterThreads = 128;\n+} // anonymous namespace\n+\n+AllocatorConfig& AllocatorConfig::instance() {\n+ static AllocatorConfig instance;\n+ static c10::once_flag init_once;\n+#define C10_ALLOCATOR_CONFIG_PARSE_ENV(env, deprecated) \\", + "comment_created_at": "2025-06-26T03:53:44+00:00", + "comment_author": "guangyey", + "comment_body": "`#undef` added.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2104407669", + "pr_number": 154202, + "pr_file": "aten/src/ATen/native/mkldnn/xpu/Conv.cpp", + "created_at": "2025-05-23T11:37:58+00:00", + "commented_code": "return std::tuple{grad_input, grad_weight, grad_bias};\n}\n\nTensor convolution_pointwise(\n const Tensor& input_t,\n const Tensor& weight_t,\n const c10::optional& bias_opt,\n IntArrayRef padding,\n IntArrayRef stride,\n IntArrayRef dilation,\n int64_t groups,\n c10::string_view attr,\n torch::List> scalars,\n c10::optional algorithm) {\n Attr att;\n att = construct_unary_attr(att, attr, scalars, algorithm);\n const Tensor bias = bias_opt.has_value() ? bias_opt.value() : at::Tensor();\n std::vector output_padding = {0};\n\n return _convolution(\n input_t,\n weight_t,\n bias,\n stride,\n padding,\n dilation,\n /*transposed*/ false,\n output_padding,\n groups,\n att);\n}\n\nTensor convolution_pointwise_binary(\n const Tensor& input_t,\n const Tensor& other_t,\n const Tensor& weight_t,\n const c10::optional& bias_opt,\n IntArrayRef padding,\n IntArrayRef stride,\n IntArrayRef dilation,\n int64_t groups,\n c10::string_view binary_attr,\n c10::optional alpha,\n c10::optional unary_attr,\n torch::List> unary_scalars,\n c10::optional unary_algorithm) {\n Tensor output;\n Tensor bias = bias_opt.has_value() ? bias_opt.value() : at::Tensor();\n // Step1: Construct binary attr\n Attr attr;\n attr = construct_binary_attr(attr, binary_attr, other_t);\n // Step2: Append unary attr\n if (unary_attr.has_value())\n attr = construct_unary_attr(\n attr, unary_attr.value(), unary_scalars, unary_algorithm);\n\n Tensor res = _convolution_out(\n output,\n input_t,\n weight_t,\n bias,\n stride,\n padding,\n dilation,\n /*transpoced*/ false,\n {{0, 0}},\n groups,\n attr);\n\n // Step3: Run conv\n return res;\n}\n\n\nTensor& convolution_pointwise_binary_(\n Tensor& other_t,\n const Tensor& input_t,\n const Tensor& weight_t,\n const c10::optional& bias_opt,\n IntArrayRef padding,\n IntArrayRef stride,\n IntArrayRef dilation,\n int64_t groups,\n c10::string_view binary_attr,\n c10::optional alpha,\n c10::optional unary_attr,\n torch::List> unary_scalars,\n c10::optional unary_algorithm) {\n Tensor bias = bias_opt.has_value() ? bias_opt.value() : at::Tensor();\n // Step1: Construct binary attr\n Attr attr;\n attr = construct_binary_attr(attr, binary_attr, other_t);\n \n std::cout<<\"binary_attr\"< convolution_backward_overrideable(\n return std::tuple{grad_input, grad_weight, grad_bias};\n }\n \n+Tensor convolution_pointwise(\n+ const Tensor& input_t,\n+ const Tensor& weight_t,\n+ const c10::optional& bias_opt,\n+ IntArrayRef padding,\n+ IntArrayRef stride,\n+ IntArrayRef dilation,\n+ int64_t groups,\n+ c10::string_view attr,\n+ torch::List> scalars,\n+ c10::optional algorithm) {\n+ Attr att;\n+ att = construct_unary_attr(att, attr, scalars, algorithm);\n+ const Tensor bias = bias_opt.has_value() ? bias_opt.value() : at::Tensor();\n+ std::vector output_padding = {0};\n+\n+ return _convolution(\n+ input_t,\n+ weight_t,\n+ bias,\n+ stride,\n+ padding,\n+ dilation,\n+ /*transposed*/ false,\n+ output_padding,\n+ groups,\n+ att);\n+}\n+\n+Tensor convolution_pointwise_binary(\n+ const Tensor& input_t,\n+ const Tensor& other_t,\n+ const Tensor& weight_t,\n+ const c10::optional& bias_opt,\n+ IntArrayRef padding,\n+ IntArrayRef stride,\n+ IntArrayRef dilation,\n+ int64_t groups,\n+ c10::string_view binary_attr,\n+ c10::optional alpha,\n+ c10::optional unary_attr,\n+ torch::List> unary_scalars,\n+ c10::optional unary_algorithm) {\n+ Tensor output;\n+ Tensor bias = bias_opt.has_value() ? bias_opt.value() : at::Tensor();\n+ // Step1: Construct binary attr\n+ Attr attr;\n+ attr = construct_binary_attr(attr, binary_attr, other_t);\n+ // Step2: Append unary attr\n+ if (unary_attr.has_value())\n+ attr = construct_unary_attr(\n+ attr, unary_attr.value(), unary_scalars, unary_algorithm);\n+\n+ Tensor res = _convolution_out(\n+ output,\n+ input_t,\n+ weight_t,\n+ bias,\n+ stride,\n+ padding,\n+ dilation,\n+ /*transpoced*/ false,\n+ {{0, 0}},\n+ groups,\n+ attr);\n+\n+ // Step3: Run conv\n+ return res;\n+}\n+\n+\n+Tensor& convolution_pointwise_binary_(\n+ Tensor& other_t,\n+ const Tensor& input_t,\n+ const Tensor& weight_t,\n+ const c10::optional& bias_opt,\n+ IntArrayRef padding,\n+ IntArrayRef stride,\n+ IntArrayRef dilation,\n+ int64_t groups,\n+ c10::string_view binary_attr,\n+ c10::optional alpha,\n+ c10::optional unary_attr,\n+ torch::List> unary_scalars,\n+ c10::optional unary_algorithm) {\n+ Tensor bias = bias_opt.has_value() ? bias_opt.value() : at::Tensor();\n+ // Step1: Construct binary attr\n+ Attr attr;\n+ attr = construct_binary_attr(attr, binary_attr, other_t);\n+ \n+ std::cout<<\"binary_attr\"< convolution_backward_overrideable(\n return std::tuple{grad_input, grad_weight, grad_bias};\n }\n \n+Tensor convolution_pointwise(\n+ const Tensor& input_t,\n+ const Tensor& weight_t,\n+ const c10::optional& bias_opt,\n+ IntArrayRef padding,\n+ IntArrayRef stride,\n+ IntArrayRef dilation,\n+ int64_t groups,\n+ c10::string_view attr,\n+ torch::List> scalars,\n+ c10::optional algorithm) {\n+ Attr att;\n+ att = construct_unary_attr(att, attr, scalars, algorithm);\n+ const Tensor bias = bias_opt.has_value() ? bias_opt.value() : at::Tensor();\n+ std::vector output_padding = {0};\n+\n+ return _convolution(\n+ input_t,\n+ weight_t,\n+ bias,\n+ stride,\n+ padding,\n+ dilation,\n+ /*transposed*/ false,\n+ output_padding,\n+ groups,\n+ att);\n+}\n+\n+Tensor convolution_pointwise_binary(\n+ const Tensor& input_t,\n+ const Tensor& other_t,\n+ const Tensor& weight_t,\n+ const c10::optional& bias_opt,\n+ IntArrayRef padding,\n+ IntArrayRef stride,\n+ IntArrayRef dilation,\n+ int64_t groups,\n+ c10::string_view binary_attr,\n+ c10::optional alpha,\n+ c10::optional unary_attr,\n+ torch::List> unary_scalars,\n+ c10::optional unary_algorithm) {\n+ Tensor output;\n+ Tensor bias = bias_opt.has_value() ? bias_opt.value() : at::Tensor();\n+ // Step1: Construct binary attr\n+ Attr attr;\n+ attr = construct_binary_attr(attr, binary_attr, other_t);\n+ // Step2: Append unary attr\n+ if (unary_attr.has_value())\n+ attr = construct_unary_attr(\n+ attr, unary_attr.value(), unary_scalars, unary_algorithm);\n+\n+ Tensor res = _convolution_out(\n+ output,\n+ input_t,\n+ weight_t,\n+ bias,\n+ stride,\n+ padding,\n+ dilation,\n+ /*transpoced*/ false,\n+ {{0, 0}},\n+ groups,\n+ attr);\n+\n+ // Step3: Run conv\n+ return res;\n+}\n+\n+\n+Tensor& convolution_pointwise_binary_(\n+ Tensor& other_t,\n+ const Tensor& input_t,\n+ const Tensor& weight_t,\n+ const c10::optional& bias_opt,\n+ IntArrayRef padding,\n+ IntArrayRef stride,\n+ IntArrayRef dilation,\n+ int64_t groups,\n+ c10::string_view binary_attr,\n+ c10::optional alpha,\n+ c10::optional unary_attr,\n+ torch::List> unary_scalars,\n+ c10::optional unary_algorithm) {\n+ Tensor bias = bias_opt.has_value() ? bias_opt.value() : at::Tensor();\n+ // Step1: Construct binary attr\n+ Attr attr;\n+ attr = construct_binary_attr(attr, binary_attr, other_t);\n+ \n+ std::cout<<\"binary_attr\"<(args)[i_arg - 1]);\n check_type(schema_arg, args[i_arg - 1]);", + "repo_full_name": "pytorch/pytorch", + "discussion_comments": [ + { + "comment_id": "1957124391", + "repo_full_name": "pytorch/pytorch", + "pr_number": 147253, + "pr_file": "torch/csrc/jit/runtime/static/impl.cpp", + "discussion_id": "1957124391", + "commented_code": "@@ -1042,7 +1042,7 @@ void BlockRunner::set_inputs(IValueList&& args, const KeywordArgs& kwargs) {\n const auto& schema_arg = schema_args[i_arg];\n \n if (i_arg - 1 < args.size()) {\n- check_type(schema_arg, std::forward(args)[i_arg - 1]);\n+ check_type(schema_arg, args[i_arg - 1]);", + "comment_created_at": "2025-02-15T14:08:24+00:00", + "comment_author": "Skylion007", + "comment_body": "What error does this resolve?", + "pr_file_module": null + }, + { + "comment_id": "1957154993", + "repo_full_name": "pytorch/pytorch", + "pr_number": 147253, + "pr_file": "torch/csrc/jit/runtime/static/impl.cpp", + "discussion_id": "1957124391", + "commented_code": "@@ -1042,7 +1042,7 @@ void BlockRunner::set_inputs(IValueList&& args, const KeywordArgs& kwargs) {\n const auto& schema_arg = schema_args[i_arg];\n \n if (i_arg - 1 < args.size()) {\n- check_type(schema_arg, std::forward(args)[i_arg - 1]);\n+ check_type(schema_arg, args[i_arg - 1]);", + "comment_created_at": "2025-02-15T16:48:46+00:00", + "comment_author": "cyyever", + "comment_body": "Tidy warns use-after-move. Although that isn't real, removing std::forward can make the code cleaner. ", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/pytorch-remove-code-clutter.md b/_reviewers/pytorch-remove-code-clutter.md index fd0ed46..7ec4283 100644 --- a/_reviewers/pytorch-remove-code-clutter.md +++ b/_reviewers/pytorch-remove-code-clutter.md @@ -87,255 +87,3 @@ Eliminate unnecessary code elements that add cognitive load without providing va ``` Removing clutter improves readability, reduces maintenance burden, and helps avoid subtle bugs. - - -[ - { - "discussion_id": "1996369008", - "pr_number": 148605, - "pr_file": "aten/src/ATen/native/cuda/layer_norm_kernel.cu", - "created_at": "2025-03-14T22:23:15+00:00", - "commented_code": "}\n}\n\n\ntemplate \nvoid launch_vectorized_layer_norm_kernel(\n int N,\n int64_t M,\n T_ACC eps,\n const T* X_data,\n const T* gamma_data,\n const T* beta_data,\n T* Y_data,\n T_ACC* mean_data,\n T_ACC* rstd_data\n// When the data size is a multiple of the tile size we can use fewer\n// instructions and registers. Example, this is the case when M=256 N=256.\ntemplate \n__device__\n__forceinline__\nvoid\nblockReduceGammaBetaBackwardsAligned(\n int64_t M,\n int64_t N,\n const T* __restrict__ dY,\n const T* __restrict__ X,\n const T_ACC* __restrict__ mean,\n const T_ACC* __restrict__ rstd,\n T* __restrict__ dg,\n T* __restrict__ db,\n T_ACC &dg_sum,\n T_ACC &db_sum\n) {\n //constexpr int alignment = 16; //currently unused to make sure float and half results are bw accurate\n auto stream = at::cuda::getCurrentCUDAStream().stream();\n const int warp_size = at::cuda::warp_size();\n const dim3 threads(warp_size, num_threads() / warp_size, 1);\n dim3 blocks(M);\n\n#ifdef USE_ROCM\n uint64_t workgroupSize = static_cast(blocks.x) * static_cast(threads.x);\n // this caused invalid configuration problem\n if (workgroupSize > std::numeric_limits::max()) {\n // Fix invalid configuration https://github.com/pytorch/pytorch/issues/136291\n blocks.x = std::numeric_limits::max() / threads.x;\n constexpr int rows_per_thread_y = rows_per_block_y / block_dim_y;\n int64_t thread_x = blockIdx.x * block_dim_x + threadIdx.x;\n for (int64_t M_start = blockIdx.y * rows_per_block_y;\n M_start < M;\n M_start += rows_per_block_y * gridDim.y) {\n int lane_id = (threadIdx.y * blockDim.x + threadIdx.x) & (kWarpSize - 1);\n int64_t mean_index = M_start + threadIdx.y * rows_per_thread_y;\n T_ACC warp_mean = 0, warp_rstd = 0;\n if (lane_id < rows_per_thread_y) {\n warp_mean = mean[mean_index + lane_id];\n warp_rstd = rstd[mean_index + lane_id];\n }\n WARP_SYNC();\n T_ACC dY_regs[rows_per_thread_y] = {0};\n T_ACC X_regs[rows_per_thread_y] = {0};\n #pragma unroll\n for (int i = 0; i < rows_per_thread_y; ++i) {\n int64_t current_y = M_start + threadIdx.y * rows_per_thread_y + i;\n if (aligned_grid || (current_y < M && thread_x < N)) {\n dY_regs[i] = dY[current_y * N + thread_x];\n X_regs[i] = X[current_y * N + thread_x];\n }\n }\n#endif\n\n TORCH_INTERNAL_ASSERT_DEBUG_ONLY(threads.y % 2 == 0 || threads.y == 1);\n int nshared = threads.y > 1 ? threads.y * 3/2 *sizeof(T_ACC) : 0;\n vectorized_layer_norm_kernel<<>>(N, eps, X_data,\n gamma_data, beta_data, mean_data, rstd_data, Y_data);\n C10_CUDA_KERNEL_LAUNCH_CHECK();\n #pragma unroll\n for (int i = 0; i < rows_per_thread_y; ++i) {\n T_ACC mean_reg = WARP_SHFL(warp_mean, i, kWarpSize);\n T_ACC rstd_reg = WARP_SHFL(warp_rstd, i, kWarpSize);\n dg_sum += dY_regs[i] * (X_regs[i] - mean_reg) * rstd_reg;\n db_sum += dY_regs[i];\n }\n }\n}\n\n#ifdef USE_ROCM\n // the blocks.x contains the max grid x dimention without invalid configuration error\n // Fix invalid configuration https://github.com/pytorch/pytorch/issues/136291\n // Ensure all elements are processed. Prepare for next round\n int64_t remaining = M - blocks.x;\n const T* X_data2 = X_data;\n T_ACC* mean_data2 = mean_data;\n T_ACC* rstd_data2 = rstd_data;\n T* Y_data2 = Y_data;\ntemplate \n__device__\n__forceinline__\nvoid\nblockReduceGammaBetaBackwardsHelper(\n int64_t M_start,\n int64_t M,\n int64_t N,\n const T* __restrict__ dY,\n const T* __restrict__ X,\n const T_ACC* __restrict__ mean,\n const T_ACC* __restrict__ rstd,\n T* __restrict__ dg,\n T* __restrict__ db,\n T_ACC &dg_sum,\n T_ACC &db_sum\n) {\n constexpr int rows_per_thread_y = rows_per_block_y / block_dim_y;\n int64_t thread_x = blockIdx.x * block_dim_x + threadIdx.x;\n\n while (remaining > 0) {\n X_data2 += N * blocks.x;\n mean_data2 += blocks.x;\n rstd_data2 += blocks.x;\n Y_data2 += N * blocks.x;\n int lane_id = (threadIdx.y * blockDim.x + threadIdx.x) & (kWarpSize - 1);\n int64_t mean_index = M_start + threadIdx.y * rows_per_thread_y;\n T_ACC warp_mean = 0, warp_rstd = 0;\n if (lane_id < rows_per_thread_y && mean_index + lane_id < M) {\n warp_mean = mean[mean_index + lane_id];\n warp_rstd = rstd[mean_index + lane_id];\n }\n WARP_SYNC();\n\n blocks.x = (remaining > blocks.x) ? blocks.x : remaining;\n\n vectorized_layer_norm_kernel<<>>(N, eps, X_data2,\n gamma_data, beta_data, mean_data2, rstd_data2, Y_data2);\n C10_CUDA_KERNEL_LAUNCH_CHECK();\n\n remaining -= blocks.x;\n T_ACC dY_regs[rows_per_thread_y] = {0};\n T_ACC X_regs[rows_per_thread_y] = {0};\n #pragma unroll\n for (int i = 0; i < rows_per_thread_y; ++i) {\n int64_t current_y = M_start + threadIdx.y * rows_per_thread_y + i;\n bool active = true;\n if (check_x && thread_x >= N) {\n active = false;\n }\n if (check_y && current_y >= M) {\n active = false;\n }\n if (active) {\n dY_regs[i] = dY[current_y * N + thread_x];\n X_regs[i] = X[current_y * N + thread_x];\n }\n }\n#endif\n\n #pragma unroll\n for (int i = 0; i < rows_per_thread_y; ++i) {\n T_ACC mean_reg = WARP_SHFL(warp_mean, i, kWarpSize);\n T_ACC rstd_reg = WARP_SHFL(warp_rstd, i, kWarpSize);\n dg_sum += dY_regs[i] * (X_regs[i] - mean_reg) * rstd_reg;\n db_sum += dY_regs[i];\n }\n}\n\ntemplate \nvoid LayerNormKernelImplInternal(\n const Tensor& X,\n const Tensor& gamma,\n const Tensor& beta,\ntemplate ", - "repo_full_name": "pytorch/pytorch", - "discussion_comments": [ - { - "comment_id": "1996369008", - "repo_full_name": "pytorch/pytorch", - "pr_number": 148605, - "pr_file": "aten/src/ATen/native/cuda/layer_norm_kernel.cu", - "discussion_id": "1996369008", - "commented_code": "@@ -509,862 +509,859 @@ __global__ void layer_norm_grad_input_kernel_vectorized(\n }\n }\n \n-\n-template \n-void launch_vectorized_layer_norm_kernel(\n- int N,\n- int64_t M,\n- T_ACC eps,\n- const T* X_data,\n- const T* gamma_data,\n- const T* beta_data,\n- T* Y_data,\n- T_ACC* mean_data,\n- T_ACC* rstd_data\n+// When the data size is a multiple of the tile size we can use fewer\n+// instructions and registers. Example, this is the case when M=256 N=256.\n+template \n+__device__\n+__forceinline__\n+void\n+blockReduceGammaBetaBackwardsAligned(\n+ int64_t M,\n+ int64_t N,\n+ const T* __restrict__ dY,\n+ const T* __restrict__ X,\n+ const T_ACC* __restrict__ mean,\n+ const T_ACC* __restrict__ rstd,\n+ T* __restrict__ dg,\n+ T* __restrict__ db,\n+ T_ACC &dg_sum,\n+ T_ACC &db_sum\n ) {\n- //constexpr int alignment = 16; //currently unused to make sure float and half results are bw accurate\n- auto stream = at::cuda::getCurrentCUDAStream().stream();\n- const int warp_size = at::cuda::warp_size();\n- const dim3 threads(warp_size, num_threads() / warp_size, 1);\n- dim3 blocks(M);\n-\n-#ifdef USE_ROCM\n- uint64_t workgroupSize = static_cast(blocks.x) * static_cast(threads.x);\n- // this caused invalid configuration problem\n- if (workgroupSize > std::numeric_limits::max()) {\n- // Fix invalid configuration https://github.com/pytorch/pytorch/issues/136291\n- blocks.x = std::numeric_limits::max() / threads.x;\n+ constexpr int rows_per_thread_y = rows_per_block_y / block_dim_y;\n+ int64_t thread_x = blockIdx.x * block_dim_x + threadIdx.x;\n+ for (int64_t M_start = blockIdx.y * rows_per_block_y;\n+ M_start < M;\n+ M_start += rows_per_block_y * gridDim.y) {\n+ int lane_id = (threadIdx.y * blockDim.x + threadIdx.x) & (kWarpSize - 1);\n+ int64_t mean_index = M_start + threadIdx.y * rows_per_thread_y;\n+ T_ACC warp_mean = 0, warp_rstd = 0;\n+ if (lane_id < rows_per_thread_y) {\n+ warp_mean = mean[mean_index + lane_id];\n+ warp_rstd = rstd[mean_index + lane_id];\n+ }\n+ WARP_SYNC();\n+ T_ACC dY_regs[rows_per_thread_y] = {0};\n+ T_ACC X_regs[rows_per_thread_y] = {0};\n+ #pragma unroll\n+ for (int i = 0; i < rows_per_thread_y; ++i) {\n+ int64_t current_y = M_start + threadIdx.y * rows_per_thread_y + i;\n+ if (aligned_grid || (current_y < M && thread_x < N)) {\n+ dY_regs[i] = dY[current_y * N + thread_x];\n+ X_regs[i] = X[current_y * N + thread_x];\n+ }\n }\n-#endif\n \n- TORCH_INTERNAL_ASSERT_DEBUG_ONLY(threads.y % 2 == 0 || threads.y == 1);\n- int nshared = threads.y > 1 ? threads.y * 3/2 *sizeof(T_ACC) : 0;\n- vectorized_layer_norm_kernel<<>>(N, eps, X_data,\n- gamma_data, beta_data, mean_data, rstd_data, Y_data);\n- C10_CUDA_KERNEL_LAUNCH_CHECK();\n+ #pragma unroll\n+ for (int i = 0; i < rows_per_thread_y; ++i) {\n+ T_ACC mean_reg = WARP_SHFL(warp_mean, i, kWarpSize);\n+ T_ACC rstd_reg = WARP_SHFL(warp_rstd, i, kWarpSize);\n+ dg_sum += dY_regs[i] * (X_regs[i] - mean_reg) * rstd_reg;\n+ db_sum += dY_regs[i];\n+ }\n+ }\n+}\n \n-#ifdef USE_ROCM\n- // the blocks.x contains the max grid x dimention without invalid configuration error\n- // Fix invalid configuration https://github.com/pytorch/pytorch/issues/136291\n- // Ensure all elements are processed. Prepare for next round\n- int64_t remaining = M - blocks.x;\n- const T* X_data2 = X_data;\n- T_ACC* mean_data2 = mean_data;\n- T_ACC* rstd_data2 = rstd_data;\n- T* Y_data2 = Y_data;\n+template \n+__device__\n+__forceinline__\n+void\n+blockReduceGammaBetaBackwardsHelper(\n+ int64_t M_start,\n+ int64_t M,\n+ int64_t N,\n+ const T* __restrict__ dY,\n+ const T* __restrict__ X,\n+ const T_ACC* __restrict__ mean,\n+ const T_ACC* __restrict__ rstd,\n+ T* __restrict__ dg,\n+ T* __restrict__ db,\n+ T_ACC &dg_sum,\n+ T_ACC &db_sum\n+) {\n+ constexpr int rows_per_thread_y = rows_per_block_y / block_dim_y;\n+ int64_t thread_x = blockIdx.x * block_dim_x + threadIdx.x;\n \n- while (remaining > 0) {\n- X_data2 += N * blocks.x;\n- mean_data2 += blocks.x;\n- rstd_data2 += blocks.x;\n- Y_data2 += N * blocks.x;\n+ int lane_id = (threadIdx.y * blockDim.x + threadIdx.x) & (kWarpSize - 1);\n+ int64_t mean_index = M_start + threadIdx.y * rows_per_thread_y;\n+ T_ACC warp_mean = 0, warp_rstd = 0;\n+ if (lane_id < rows_per_thread_y && mean_index + lane_id < M) {\n+ warp_mean = mean[mean_index + lane_id];\n+ warp_rstd = rstd[mean_index + lane_id];\n+ }\n+ WARP_SYNC();\n \n- blocks.x = (remaining > blocks.x) ? blocks.x : remaining;\n \n- vectorized_layer_norm_kernel<<>>(N, eps, X_data2,\n- gamma_data, beta_data, mean_data2, rstd_data2, Y_data2);\n- C10_CUDA_KERNEL_LAUNCH_CHECK();\n \n- remaining -= blocks.x;\n+ T_ACC dY_regs[rows_per_thread_y] = {0};\n+ T_ACC X_regs[rows_per_thread_y] = {0};\n+ #pragma unroll\n+ for (int i = 0; i < rows_per_thread_y; ++i) {\n+ int64_t current_y = M_start + threadIdx.y * rows_per_thread_y + i;\n+ bool active = true;\n+ if (check_x && thread_x >= N) {\n+ active = false;\n+ }\n+ if (check_y && current_y >= M) {\n+ active = false;\n+ }\n+ if (active) {\n+ dY_regs[i] = dY[current_y * N + thread_x];\n+ X_regs[i] = X[current_y * N + thread_x];\n+ }\n }\n-#endif\n \n+ #pragma unroll\n+ for (int i = 0; i < rows_per_thread_y; ++i) {\n+ T_ACC mean_reg = WARP_SHFL(warp_mean, i, kWarpSize);\n+ T_ACC rstd_reg = WARP_SHFL(warp_rstd, i, kWarpSize);\n+ dg_sum += dY_regs[i] * (X_regs[i] - mean_reg) * rstd_reg;\n+ db_sum += dY_regs[i];\n+ }\n }\n \n-template \n-void LayerNormKernelImplInternal(\n- const Tensor& X,\n- const Tensor& gamma,\n- const Tensor& beta,\n+template ", - "comment_created_at": "2025-03-14T22:23:15+00:00", - "comment_author": "ngimel", - "comment_body": "check_y id never used in this function", - "pr_file_module": null - }, - { - "comment_id": "2000093583", - "repo_full_name": "pytorch/pytorch", - "pr_number": 148605, - "pr_file": "aten/src/ATen/native/cuda/layer_norm_kernel.cu", - "discussion_id": "1996369008", - "commented_code": "@@ -509,862 +509,859 @@ __global__ void layer_norm_grad_input_kernel_vectorized(\n }\n }\n \n-\n-template \n-void launch_vectorized_layer_norm_kernel(\n- int N,\n- int64_t M,\n- T_ACC eps,\n- const T* X_data,\n- const T* gamma_data,\n- const T* beta_data,\n- T* Y_data,\n- T_ACC* mean_data,\n- T_ACC* rstd_data\n+// When the data size is a multiple of the tile size we can use fewer\n+// instructions and registers. Example, this is the case when M=256 N=256.\n+template \n+__device__\n+__forceinline__\n+void\n+blockReduceGammaBetaBackwardsAligned(\n+ int64_t M,\n+ int64_t N,\n+ const T* __restrict__ dY,\n+ const T* __restrict__ X,\n+ const T_ACC* __restrict__ mean,\n+ const T_ACC* __restrict__ rstd,\n+ T* __restrict__ dg,\n+ T* __restrict__ db,\n+ T_ACC &dg_sum,\n+ T_ACC &db_sum\n ) {\n- //constexpr int alignment = 16; //currently unused to make sure float and half results are bw accurate\n- auto stream = at::cuda::getCurrentCUDAStream().stream();\n- const int warp_size = at::cuda::warp_size();\n- const dim3 threads(warp_size, num_threads() / warp_size, 1);\n- dim3 blocks(M);\n-\n-#ifdef USE_ROCM\n- uint64_t workgroupSize = static_cast(blocks.x) * static_cast(threads.x);\n- // this caused invalid configuration problem\n- if (workgroupSize > std::numeric_limits::max()) {\n- // Fix invalid configuration https://github.com/pytorch/pytorch/issues/136291\n- blocks.x = std::numeric_limits::max() / threads.x;\n+ constexpr int rows_per_thread_y = rows_per_block_y / block_dim_y;\n+ int64_t thread_x = blockIdx.x * block_dim_x + threadIdx.x;\n+ for (int64_t M_start = blockIdx.y * rows_per_block_y;\n+ M_start < M;\n+ M_start += rows_per_block_y * gridDim.y) {\n+ int lane_id = (threadIdx.y * blockDim.x + threadIdx.x) & (kWarpSize - 1);\n+ int64_t mean_index = M_start + threadIdx.y * rows_per_thread_y;\n+ T_ACC warp_mean = 0, warp_rstd = 0;\n+ if (lane_id < rows_per_thread_y) {\n+ warp_mean = mean[mean_index + lane_id];\n+ warp_rstd = rstd[mean_index + lane_id];\n+ }\n+ WARP_SYNC();\n+ T_ACC dY_regs[rows_per_thread_y] = {0};\n+ T_ACC X_regs[rows_per_thread_y] = {0};\n+ #pragma unroll\n+ for (int i = 0; i < rows_per_thread_y; ++i) {\n+ int64_t current_y = M_start + threadIdx.y * rows_per_thread_y + i;\n+ if (aligned_grid || (current_y < M && thread_x < N)) {\n+ dY_regs[i] = dY[current_y * N + thread_x];\n+ X_regs[i] = X[current_y * N + thread_x];\n+ }\n }\n-#endif\n \n- TORCH_INTERNAL_ASSERT_DEBUG_ONLY(threads.y % 2 == 0 || threads.y == 1);\n- int nshared = threads.y > 1 ? threads.y * 3/2 *sizeof(T_ACC) : 0;\n- vectorized_layer_norm_kernel<<>>(N, eps, X_data,\n- gamma_data, beta_data, mean_data, rstd_data, Y_data);\n- C10_CUDA_KERNEL_LAUNCH_CHECK();\n+ #pragma unroll\n+ for (int i = 0; i < rows_per_thread_y; ++i) {\n+ T_ACC mean_reg = WARP_SHFL(warp_mean, i, kWarpSize);\n+ T_ACC rstd_reg = WARP_SHFL(warp_rstd, i, kWarpSize);\n+ dg_sum += dY_regs[i] * (X_regs[i] - mean_reg) * rstd_reg;\n+ db_sum += dY_regs[i];\n+ }\n+ }\n+}\n \n-#ifdef USE_ROCM\n- // the blocks.x contains the max grid x dimention without invalid configuration error\n- // Fix invalid configuration https://github.com/pytorch/pytorch/issues/136291\n- // Ensure all elements are processed. Prepare for next round\n- int64_t remaining = M - blocks.x;\n- const T* X_data2 = X_data;\n- T_ACC* mean_data2 = mean_data;\n- T_ACC* rstd_data2 = rstd_data;\n- T* Y_data2 = Y_data;\n+template \n+__device__\n+__forceinline__\n+void\n+blockReduceGammaBetaBackwardsHelper(\n+ int64_t M_start,\n+ int64_t M,\n+ int64_t N,\n+ const T* __restrict__ dY,\n+ const T* __restrict__ X,\n+ const T_ACC* __restrict__ mean,\n+ const T_ACC* __restrict__ rstd,\n+ T* __restrict__ dg,\n+ T* __restrict__ db,\n+ T_ACC &dg_sum,\n+ T_ACC &db_sum\n+) {\n+ constexpr int rows_per_thread_y = rows_per_block_y / block_dim_y;\n+ int64_t thread_x = blockIdx.x * block_dim_x + threadIdx.x;\n \n- while (remaining > 0) {\n- X_data2 += N * blocks.x;\n- mean_data2 += blocks.x;\n- rstd_data2 += blocks.x;\n- Y_data2 += N * blocks.x;\n+ int lane_id = (threadIdx.y * blockDim.x + threadIdx.x) & (kWarpSize - 1);\n+ int64_t mean_index = M_start + threadIdx.y * rows_per_thread_y;\n+ T_ACC warp_mean = 0, warp_rstd = 0;\n+ if (lane_id < rows_per_thread_y && mean_index + lane_id < M) {\n+ warp_mean = mean[mean_index + lane_id];\n+ warp_rstd = rstd[mean_index + lane_id];\n+ }\n+ WARP_SYNC();\n \n- blocks.x = (remaining > blocks.x) ? blocks.x : remaining;\n \n- vectorized_layer_norm_kernel<<>>(N, eps, X_data2,\n- gamma_data, beta_data, mean_data2, rstd_data2, Y_data2);\n- C10_CUDA_KERNEL_LAUNCH_CHECK();\n \n- remaining -= blocks.x;\n+ T_ACC dY_regs[rows_per_thread_y] = {0};\n+ T_ACC X_regs[rows_per_thread_y] = {0};\n+ #pragma unroll\n+ for (int i = 0; i < rows_per_thread_y; ++i) {\n+ int64_t current_y = M_start + threadIdx.y * rows_per_thread_y + i;\n+ bool active = true;\n+ if (check_x && thread_x >= N) {\n+ active = false;\n+ }\n+ if (check_y && current_y >= M) {\n+ active = false;\n+ }\n+ if (active) {\n+ dY_regs[i] = dY[current_y * N + thread_x];\n+ X_regs[i] = X[current_y * N + thread_x];\n+ }\n }\n-#endif\n \n+ #pragma unroll\n+ for (int i = 0; i < rows_per_thread_y; ++i) {\n+ T_ACC mean_reg = WARP_SHFL(warp_mean, i, kWarpSize);\n+ T_ACC rstd_reg = WARP_SHFL(warp_rstd, i, kWarpSize);\n+ dg_sum += dY_regs[i] * (X_regs[i] - mean_reg) * rstd_reg;\n+ db_sum += dY_regs[i];\n+ }\n }\n \n-template \n-void LayerNormKernelImplInternal(\n- const Tensor& X,\n- const Tensor& gamma,\n- const Tensor& beta,\n+template ", - "comment_created_at": "2025-03-18T03:03:30+00:00", - "comment_author": "ahmadsharif1", - "comment_body": "Great point. Done.\r\n\r\n(This was also done for historical reasons while I was writing the kernel. )", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2138662133", - "pr_number": 155590, - "pr_file": "c10/core/SymbolicShapeMeta.cpp", - "created_at": "2025-06-10T19:52:08+00:00", - "commented_code": "return _compute_contiguous(sizes, strides, numel());\n}\n\nSymBool SymbolicShapeMeta::compute_def_contiguous() const {\n if (!strides_valid_) {\n return false;\n }\n c10::SymIntArrayRef sizes(sizes_);\n c10::SymIntArrayRef strides(strides_);\n // definitely_contiguous from Contiguity.h\n return _compute_def_contiguous(sizes, strides, numel());\n}\n\n// The rest of them\n#define DEFINE_EAGER_SYMBOOL_COMPUTE(name, nodeimpl, fallback) \\\n SymBool SymbolicShapeMeta::name() const { \\\n if (!strides_valid_) { \\\n return false; \\\n } \\\n c10::SymIntArrayRef sizes(sizes_); \\\n c10::SymIntArrayRef strides(strides_); \\\n return fallback(sizes, strides); \\\n#define DEFINE_EAGER_SYMBOOL_COMPUTE(name, fallback) \\", - "repo_full_name": "pytorch/pytorch", - "discussion_comments": [ - { - "comment_id": "2138662133", - "repo_full_name": "pytorch/pytorch", - "pr_number": 155590, - "pr_file": "c10/core/SymbolicShapeMeta.cpp", - "discussion_id": "2138662133", - "commented_code": "@@ -82,15 +86,25 @@ SymBool SymbolicShapeMeta::compute_contiguous() const {\n return _compute_contiguous(sizes, strides, numel());\n }\n \n+SymBool SymbolicShapeMeta::compute_def_contiguous() const {\n+ if (!strides_valid_) {\n+ return false;\n+ }\n+ c10::SymIntArrayRef sizes(sizes_);\n+ c10::SymIntArrayRef strides(strides_);\n+ // definitely_contiguous from Contiguity.h\n+ return _compute_def_contiguous(sizes, strides, numel());\n+}\n+\n // The rest of them\n-#define DEFINE_EAGER_SYMBOOL_COMPUTE(name, nodeimpl, fallback) \\\n- SymBool SymbolicShapeMeta::name() const { \\\n- if (!strides_valid_) { \\\n- return false; \\\n- } \\\n- c10::SymIntArrayRef sizes(sizes_); \\\n- c10::SymIntArrayRef strides(strides_); \\\n- return fallback(sizes, strides); \\\n+#define DEFINE_EAGER_SYMBOOL_COMPUTE(name, fallback) \\", - "comment_created_at": "2025-06-10T19:52:08+00:00", - "comment_author": "laithsakka", - "comment_body": "nodeimpl arg is redundant?", - "pr_file_module": null - }, - { - "comment_id": "2160604579", - "repo_full_name": "pytorch/pytorch", - "pr_number": 155590, - "pr_file": "c10/core/SymbolicShapeMeta.cpp", - "discussion_id": "2138662133", - "commented_code": "@@ -82,15 +86,25 @@ SymBool SymbolicShapeMeta::compute_contiguous() const {\n return _compute_contiguous(sizes, strides, numel());\n }\n \n+SymBool SymbolicShapeMeta::compute_def_contiguous() const {\n+ if (!strides_valid_) {\n+ return false;\n+ }\n+ c10::SymIntArrayRef sizes(sizes_);\n+ c10::SymIntArrayRef strides(strides_);\n+ // definitely_contiguous from Contiguity.h\n+ return _compute_def_contiguous(sizes, strides, numel());\n+}\n+\n // The rest of them\n-#define DEFINE_EAGER_SYMBOOL_COMPUTE(name, nodeimpl, fallback) \\\n- SymBool SymbolicShapeMeta::name() const { \\\n- if (!strides_valid_) { \\\n- return false; \\\n- } \\\n- c10::SymIntArrayRef sizes(sizes_); \\\n- c10::SymIntArrayRef strides(strides_); \\\n- return fallback(sizes, strides); \\\n+#define DEFINE_EAGER_SYMBOOL_COMPUTE(name, fallback) \\", - "comment_created_at": "2025-06-23T02:18:20+00:00", - "comment_author": "ezyang", - "comment_body": "Yeah, probably from when we needed to figure out how to dispatch", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2024110522", - "pr_number": 150506, - "pr_file": "aten/src/ATen/native/TensorAdvancedIndexing.cpp", - "created_at": "2025-04-02T05:48:14+00:00", - "commented_code": "false, \"index is out of bounds for dimension with size 0\");\n }\n\n this->dims_before = dims_before;\n this->dims_after = dims_after;\n this->src = restride_src(src, dims_before, dims_indexed, replacement_shape);\n this->dims_before = new_dims_before;\n this->dims_after = new_dims_after;", - "repo_full_name": "pytorch/pytorch", - "discussion_comments": [ - { - "comment_id": "2024110522", - "repo_full_name": "pytorch/pytorch", - "pr_number": 150506, - "pr_file": "aten/src/ATen/native/TensorAdvancedIndexing.cpp", - "discussion_id": "2024110522", - "commented_code": "@@ -677,13 +677,13 @@ AdvancedIndex::AdvancedIndex(const Tensor& src, TensorList indices_list) {\n false, \"index is out of bounds for dimension with size 0\");\n }\n \n- this->dims_before = dims_before;\n- this->dims_after = dims_after;\n- this->src = restride_src(src, dims_before, dims_indexed, replacement_shape);\n+ this->dims_before = new_dims_before;\n+ this->dims_after = new_dims_after;", - "comment_created_at": "2025-04-02T05:48:14+00:00", - "comment_author": "malfet", - "comment_body": "You can remove `this->` as class members no longer shadowed by local variables", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2024855751", - "pr_number": 150503, - "pr_file": "aten/src/ATen/native/cpu/int4mm_kernel.cpp", - "created_at": "2025-04-02T13:40:45+00:00", - "commented_code": "}\n };\n\n // NOLINTNEXTLINE(modernize-avoid-c-arrays,cppcoreguidelines-avoid-c-arrays)\n auto lhs_qa8dx_buffer = std::make_unique(\n m * (k + sizeof(float) + sizeof(int32_t))); // Allocate for LHS\n int8_t* lhs_qa8dx = lhs_qa8dx_buffer.get();", - "repo_full_name": "pytorch/pytorch", - "discussion_comments": [ - { - "comment_id": "2024855751", - "repo_full_name": "pytorch/pytorch", - "pr_number": 150503, - "pr_file": "aten/src/ATen/native/cpu/int4mm_kernel.cpp", - "discussion_id": "2024855751", - "commented_code": "@@ -1068,6 +1070,7 @@ static void ref_dyn_quant_matmul_4bit_groupwise_kernel(\n }\n };\n \n+ // NOLINTNEXTLINE(modernize-avoid-c-arrays,cppcoreguidelines-avoid-c-arrays)\n auto lhs_qa8dx_buffer = std::make_unique(\n m * (k + sizeof(float) + sizeof(int32_t))); // Allocate for LHS\n int8_t* lhs_qa8dx = lhs_qa8dx_buffer.get();", - "comment_created_at": "2025-04-02T13:40:45+00:00", - "comment_author": "Skylion007", - "comment_body": "```suggestion\r\n std::vector lhs_qa8dx_buffer(m * (k + sizeof(float) + sizeof(int32_t)));\r\n int8_t* lhs_qa8dx = lhs_qa8dx_buffer.data();\r\n```\r\nWouldn't this be cleaner?", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2177707270", - "pr_number": 156161, - "pr_file": "aten/src/ATen/native/cpu/ScatterGatherKernel.cpp", - "created_at": "2025-07-01T14:09:38+00:00", - "commented_code": "}\n#endif\n\n// implementation taken from FBGEMM/src/Utils.cc\nnamespace {\n// histogram size per thread\nconstexpr int RDX_HIST_SIZE = 256;\n\nvoid update_prefsum_and_offset_in_range(\n int64_t& offset,\n const int bins_beg,\n const int bins_end,\n const int nthreads,\n const int64_t* const histogram,\n int64_t* const histogram_ps) {\n for (int bins = bins_beg; bins < bins_end; ++bins) {\n for (int t = 0; t < nthreads; ++t) {\n histogram_ps[t * RDX_HIST_SIZE + bins] = offset;\n offset += histogram[t * RDX_HIST_SIZE + bins];\n }\n }\n}\n\nstatic inline void combine_prefix_sum(\n const int nthreads,\n const int64_t elements_count,\n const int64_t* const histogram,\n int64_t* const histogram_ps) {\n int64_t offset = 0;\n update_prefsum_and_offset_in_range(\n offset, 0, RDX_HIST_SIZE, nthreads, histogram, histogram_ps);\n // TODO(DamianSzwichtenberg): Is assert sufficient? In most cases, it will\n // work only in debug build.\n assert(offset == elements_count);\n // Suppress unused variable warning\n (void)elements_count;\n}\n\nstatic inline void combine_prefix_sum_for_msb(\n const int nthreads,\n const int64_t elements_count,\n const int64_t* const histogram,\n int64_t* const histogram_ps) {\n int64_t offset = 0;\n update_prefsum_and_offset_in_range(\n offset, 128, RDX_HIST_SIZE, nthreads, histogram, histogram_ps);\n update_prefsum_and_offset_in_range(\n offset, 0, 128, nthreads, histogram, histogram_ps);\n // TODO(DamianSzwichtenberg): Is assert sufficient? In most cases, it will\n // work only in debug build.\n assert(offset == elements_count);\n // Suppress unused variable warning\n (void)elements_count;", - "repo_full_name": "pytorch/pytorch", - "discussion_comments": [ - { - "comment_id": "2177707270", - "repo_full_name": "pytorch/pytorch", - "pr_number": 156161, - "pr_file": "aten/src/ATen/native/cpu/ScatterGatherKernel.cpp", - "discussion_id": "2177707270", - "commented_code": "@@ -665,6 +675,208 @@ std::pair radix_sort_parallel(\n }\n #endif\n \n+// implementation taken from FBGEMM/src/Utils.cc\n+namespace {\n+// histogram size per thread\n+constexpr int RDX_HIST_SIZE = 256;\n+\n+void update_prefsum_and_offset_in_range(\n+ int64_t& offset,\n+ const int bins_beg,\n+ const int bins_end,\n+ const int nthreads,\n+ const int64_t* const histogram,\n+ int64_t* const histogram_ps) {\n+ for (int bins = bins_beg; bins < bins_end; ++bins) {\n+ for (int t = 0; t < nthreads; ++t) {\n+ histogram_ps[t * RDX_HIST_SIZE + bins] = offset;\n+ offset += histogram[t * RDX_HIST_SIZE + bins];\n+ }\n+ }\n+}\n+\n+static inline void combine_prefix_sum(\n+ const int nthreads,\n+ const int64_t elements_count,\n+ const int64_t* const histogram,\n+ int64_t* const histogram_ps) {\n+ int64_t offset = 0;\n+ update_prefsum_and_offset_in_range(\n+ offset, 0, RDX_HIST_SIZE, nthreads, histogram, histogram_ps);\n+ // TODO(DamianSzwichtenberg): Is assert sufficient? In most cases, it will\n+ // work only in debug build.\n+ assert(offset == elements_count);\n+ // Suppress unused variable warning\n+ (void)elements_count;\n+}\n+\n+static inline void combine_prefix_sum_for_msb(\n+ const int nthreads,\n+ const int64_t elements_count,\n+ const int64_t* const histogram,\n+ int64_t* const histogram_ps) {\n+ int64_t offset = 0;\n+ update_prefsum_and_offset_in_range(\n+ offset, 128, RDX_HIST_SIZE, nthreads, histogram, histogram_ps);\n+ update_prefsum_and_offset_in_range(\n+ offset, 0, 128, nthreads, histogram, histogram_ps);\n+ // TODO(DamianSzwichtenberg): Is assert sufficient? In most cases, it will\n+ // work only in debug build.\n+ assert(offset == elements_count);\n+ // Suppress unused variable warning\n+ (void)elements_count;", - "comment_created_at": "2025-07-01T14:09:38+00:00", - "comment_author": "Skylion007", - "comment_body": "We are on C++17, why not just use [maybe unused] now?", - "pr_file_module": null - }, - { - "comment_id": "2177885551", - "repo_full_name": "pytorch/pytorch", - "pr_number": 156161, - "pr_file": "aten/src/ATen/native/cpu/ScatterGatherKernel.cpp", - "discussion_id": "2177707270", - "commented_code": "@@ -665,6 +675,208 @@ std::pair radix_sort_parallel(\n }\n #endif\n \n+// implementation taken from FBGEMM/src/Utils.cc\n+namespace {\n+// histogram size per thread\n+constexpr int RDX_HIST_SIZE = 256;\n+\n+void update_prefsum_and_offset_in_range(\n+ int64_t& offset,\n+ const int bins_beg,\n+ const int bins_end,\n+ const int nthreads,\n+ const int64_t* const histogram,\n+ int64_t* const histogram_ps) {\n+ for (int bins = bins_beg; bins < bins_end; ++bins) {\n+ for (int t = 0; t < nthreads; ++t) {\n+ histogram_ps[t * RDX_HIST_SIZE + bins] = offset;\n+ offset += histogram[t * RDX_HIST_SIZE + bins];\n+ }\n+ }\n+}\n+\n+static inline void combine_prefix_sum(\n+ const int nthreads,\n+ const int64_t elements_count,\n+ const int64_t* const histogram,\n+ int64_t* const histogram_ps) {\n+ int64_t offset = 0;\n+ update_prefsum_and_offset_in_range(\n+ offset, 0, RDX_HIST_SIZE, nthreads, histogram, histogram_ps);\n+ // TODO(DamianSzwichtenberg): Is assert sufficient? In most cases, it will\n+ // work only in debug build.\n+ assert(offset == elements_count);\n+ // Suppress unused variable warning\n+ (void)elements_count;\n+}\n+\n+static inline void combine_prefix_sum_for_msb(\n+ const int nthreads,\n+ const int64_t elements_count,\n+ const int64_t* const histogram,\n+ int64_t* const histogram_ps) {\n+ int64_t offset = 0;\n+ update_prefsum_and_offset_in_range(\n+ offset, 128, RDX_HIST_SIZE, nthreads, histogram, histogram_ps);\n+ update_prefsum_and_offset_in_range(\n+ offset, 0, 128, nthreads, histogram, histogram_ps);\n+ // TODO(DamianSzwichtenberg): Is assert sufficient? In most cases, it will\n+ // work only in debug build.\n+ assert(offset == elements_count);\n+ // Suppress unused variable warning\n+ (void)elements_count;", - "comment_created_at": "2025-07-01T15:19:16+00:00", - "comment_author": "maajidkhann", - "comment_body": "Thanks for pointing out. Added the suggestion in the code.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2167715137", - "pr_number": 149601, - "pr_file": "c10/core/AllocatorConfig.cpp", - "created_at": "2025-06-25T22:01:33+00:00", - "commented_code": "#include \n#include \n#include \n#include \n\n#include \n#include \n\nnamespace c10::CachingAllocator {\n\nnamespace {\nconstexpr size_t kRoundUpPowerOfTwoIntervals = 16;\nconstexpr size_t kMB = 1024 * 1024ul;\nconstexpr size_t kRoundUpPowerOfTwoStart = 1 * kMB; // 1MB\nconstexpr size_t kRoundUpPowerOfTwoEnd = 64 * 1024ul * kMB; // 64GB\nconstexpr size_t kPinnedMaxRegisterThreads = 128;\n} // anonymous namespace\n\nAllocatorConfig& AllocatorConfig::instance() {\n static AllocatorConfig instance;\n static c10::once_flag init_once;\n#define C10_ALLOCATOR_CONFIG_PARSE_ENV(env, deprecated) \\", - "repo_full_name": "pytorch/pytorch", - "discussion_comments": [ - { - "comment_id": "2167715137", - "repo_full_name": "pytorch/pytorch", - "pr_number": 149601, - "pr_file": "c10/core/AllocatorConfig.cpp", - "discussion_id": "2167715137", - "commented_code": "@@ -0,0 +1,438 @@\n+#include \n+#include \n+#include \n+#include \n+\n+#include \n+#include \n+\n+namespace c10::CachingAllocator {\n+\n+namespace {\n+constexpr size_t kRoundUpPowerOfTwoIntervals = 16;\n+constexpr size_t kMB = 1024 * 1024ul;\n+constexpr size_t kRoundUpPowerOfTwoStart = 1 * kMB; // 1MB\n+constexpr size_t kRoundUpPowerOfTwoEnd = 64 * 1024ul * kMB; // 64GB\n+constexpr size_t kPinnedMaxRegisterThreads = 128;\n+} // anonymous namespace\n+\n+AllocatorConfig& AllocatorConfig::instance() {\n+ static AllocatorConfig instance;\n+ static c10::once_flag init_once;\n+#define C10_ALLOCATOR_CONFIG_PARSE_ENV(env, deprecated) \\", - "comment_created_at": "2025-06-25T22:01:33+00:00", - "comment_author": "albanD", - "comment_body": "You should either define this outside the function if it used elsewhere/you want it available in the whole file or undef it here.\r\n", - "pr_file_module": null - }, - { - "comment_id": "2168045769", - "repo_full_name": "pytorch/pytorch", - "pr_number": 149601, - "pr_file": "c10/core/AllocatorConfig.cpp", - "discussion_id": "2167715137", - "commented_code": "@@ -0,0 +1,438 @@\n+#include \n+#include \n+#include \n+#include \n+\n+#include \n+#include \n+\n+namespace c10::CachingAllocator {\n+\n+namespace {\n+constexpr size_t kRoundUpPowerOfTwoIntervals = 16;\n+constexpr size_t kMB = 1024 * 1024ul;\n+constexpr size_t kRoundUpPowerOfTwoStart = 1 * kMB; // 1MB\n+constexpr size_t kRoundUpPowerOfTwoEnd = 64 * 1024ul * kMB; // 64GB\n+constexpr size_t kPinnedMaxRegisterThreads = 128;\n+} // anonymous namespace\n+\n+AllocatorConfig& AllocatorConfig::instance() {\n+ static AllocatorConfig instance;\n+ static c10::once_flag init_once;\n+#define C10_ALLOCATOR_CONFIG_PARSE_ENV(env, deprecated) \\", - "comment_created_at": "2025-06-26T03:53:44+00:00", - "comment_author": "guangyey", - "comment_body": "`#undef` added.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2104407669", - "pr_number": 154202, - "pr_file": "aten/src/ATen/native/mkldnn/xpu/Conv.cpp", - "created_at": "2025-05-23T11:37:58+00:00", - "commented_code": "return std::tuple{grad_input, grad_weight, grad_bias};\n}\n\nTensor convolution_pointwise(\n const Tensor& input_t,\n const Tensor& weight_t,\n const c10::optional& bias_opt,\n IntArrayRef padding,\n IntArrayRef stride,\n IntArrayRef dilation,\n int64_t groups,\n c10::string_view attr,\n torch::List> scalars,\n c10::optional algorithm) {\n Attr att;\n att = construct_unary_attr(att, attr, scalars, algorithm);\n const Tensor bias = bias_opt.has_value() ? bias_opt.value() : at::Tensor();\n std::vector output_padding = {0};\n\n return _convolution(\n input_t,\n weight_t,\n bias,\n stride,\n padding,\n dilation,\n /*transposed*/ false,\n output_padding,\n groups,\n att);\n}\n\nTensor convolution_pointwise_binary(\n const Tensor& input_t,\n const Tensor& other_t,\n const Tensor& weight_t,\n const c10::optional& bias_opt,\n IntArrayRef padding,\n IntArrayRef stride,\n IntArrayRef dilation,\n int64_t groups,\n c10::string_view binary_attr,\n c10::optional alpha,\n c10::optional unary_attr,\n torch::List> unary_scalars,\n c10::optional unary_algorithm) {\n Tensor output;\n Tensor bias = bias_opt.has_value() ? bias_opt.value() : at::Tensor();\n // Step1: Construct binary attr\n Attr attr;\n attr = construct_binary_attr(attr, binary_attr, other_t);\n // Step2: Append unary attr\n if (unary_attr.has_value())\n attr = construct_unary_attr(\n attr, unary_attr.value(), unary_scalars, unary_algorithm);\n\n Tensor res = _convolution_out(\n output,\n input_t,\n weight_t,\n bias,\n stride,\n padding,\n dilation,\n /*transpoced*/ false,\n {{0, 0}},\n groups,\n attr);\n\n // Step3: Run conv\n return res;\n}\n\n\nTensor& convolution_pointwise_binary_(\n Tensor& other_t,\n const Tensor& input_t,\n const Tensor& weight_t,\n const c10::optional& bias_opt,\n IntArrayRef padding,\n IntArrayRef stride,\n IntArrayRef dilation,\n int64_t groups,\n c10::string_view binary_attr,\n c10::optional alpha,\n c10::optional unary_attr,\n torch::List> unary_scalars,\n c10::optional unary_algorithm) {\n Tensor bias = bias_opt.has_value() ? bias_opt.value() : at::Tensor();\n // Step1: Construct binary attr\n Attr attr;\n attr = construct_binary_attr(attr, binary_attr, other_t);\n \n std::cout<<\"binary_attr\"< convolution_backward_overrideable(\n return std::tuple{grad_input, grad_weight, grad_bias};\n }\n \n+Tensor convolution_pointwise(\n+ const Tensor& input_t,\n+ const Tensor& weight_t,\n+ const c10::optional& bias_opt,\n+ IntArrayRef padding,\n+ IntArrayRef stride,\n+ IntArrayRef dilation,\n+ int64_t groups,\n+ c10::string_view attr,\n+ torch::List> scalars,\n+ c10::optional algorithm) {\n+ Attr att;\n+ att = construct_unary_attr(att, attr, scalars, algorithm);\n+ const Tensor bias = bias_opt.has_value() ? bias_opt.value() : at::Tensor();\n+ std::vector output_padding = {0};\n+\n+ return _convolution(\n+ input_t,\n+ weight_t,\n+ bias,\n+ stride,\n+ padding,\n+ dilation,\n+ /*transposed*/ false,\n+ output_padding,\n+ groups,\n+ att);\n+}\n+\n+Tensor convolution_pointwise_binary(\n+ const Tensor& input_t,\n+ const Tensor& other_t,\n+ const Tensor& weight_t,\n+ const c10::optional& bias_opt,\n+ IntArrayRef padding,\n+ IntArrayRef stride,\n+ IntArrayRef dilation,\n+ int64_t groups,\n+ c10::string_view binary_attr,\n+ c10::optional alpha,\n+ c10::optional unary_attr,\n+ torch::List> unary_scalars,\n+ c10::optional unary_algorithm) {\n+ Tensor output;\n+ Tensor bias = bias_opt.has_value() ? bias_opt.value() : at::Tensor();\n+ // Step1: Construct binary attr\n+ Attr attr;\n+ attr = construct_binary_attr(attr, binary_attr, other_t);\n+ // Step2: Append unary attr\n+ if (unary_attr.has_value())\n+ attr = construct_unary_attr(\n+ attr, unary_attr.value(), unary_scalars, unary_algorithm);\n+\n+ Tensor res = _convolution_out(\n+ output,\n+ input_t,\n+ weight_t,\n+ bias,\n+ stride,\n+ padding,\n+ dilation,\n+ /*transpoced*/ false,\n+ {{0, 0}},\n+ groups,\n+ attr);\n+\n+ // Step3: Run conv\n+ return res;\n+}\n+\n+\n+Tensor& convolution_pointwise_binary_(\n+ Tensor& other_t,\n+ const Tensor& input_t,\n+ const Tensor& weight_t,\n+ const c10::optional& bias_opt,\n+ IntArrayRef padding,\n+ IntArrayRef stride,\n+ IntArrayRef dilation,\n+ int64_t groups,\n+ c10::string_view binary_attr,\n+ c10::optional alpha,\n+ c10::optional unary_attr,\n+ torch::List> unary_scalars,\n+ c10::optional unary_algorithm) {\n+ Tensor bias = bias_opt.has_value() ? bias_opt.value() : at::Tensor();\n+ // Step1: Construct binary attr\n+ Attr attr;\n+ attr = construct_binary_attr(attr, binary_attr, other_t);\n+ \n+ std::cout<<\"binary_attr\"< convolution_backward_overrideable(\n return std::tuple{grad_input, grad_weight, grad_bias};\n }\n \n+Tensor convolution_pointwise(\n+ const Tensor& input_t,\n+ const Tensor& weight_t,\n+ const c10::optional& bias_opt,\n+ IntArrayRef padding,\n+ IntArrayRef stride,\n+ IntArrayRef dilation,\n+ int64_t groups,\n+ c10::string_view attr,\n+ torch::List> scalars,\n+ c10::optional algorithm) {\n+ Attr att;\n+ att = construct_unary_attr(att, attr, scalars, algorithm);\n+ const Tensor bias = bias_opt.has_value() ? bias_opt.value() : at::Tensor();\n+ std::vector output_padding = {0};\n+\n+ return _convolution(\n+ input_t,\n+ weight_t,\n+ bias,\n+ stride,\n+ padding,\n+ dilation,\n+ /*transposed*/ false,\n+ output_padding,\n+ groups,\n+ att);\n+}\n+\n+Tensor convolution_pointwise_binary(\n+ const Tensor& input_t,\n+ const Tensor& other_t,\n+ const Tensor& weight_t,\n+ const c10::optional& bias_opt,\n+ IntArrayRef padding,\n+ IntArrayRef stride,\n+ IntArrayRef dilation,\n+ int64_t groups,\n+ c10::string_view binary_attr,\n+ c10::optional alpha,\n+ c10::optional unary_attr,\n+ torch::List> unary_scalars,\n+ c10::optional unary_algorithm) {\n+ Tensor output;\n+ Tensor bias = bias_opt.has_value() ? bias_opt.value() : at::Tensor();\n+ // Step1: Construct binary attr\n+ Attr attr;\n+ attr = construct_binary_attr(attr, binary_attr, other_t);\n+ // Step2: Append unary attr\n+ if (unary_attr.has_value())\n+ attr = construct_unary_attr(\n+ attr, unary_attr.value(), unary_scalars, unary_algorithm);\n+\n+ Tensor res = _convolution_out(\n+ output,\n+ input_t,\n+ weight_t,\n+ bias,\n+ stride,\n+ padding,\n+ dilation,\n+ /*transpoced*/ false,\n+ {{0, 0}},\n+ groups,\n+ attr);\n+\n+ // Step3: Run conv\n+ return res;\n+}\n+\n+\n+Tensor& convolution_pointwise_binary_(\n+ Tensor& other_t,\n+ const Tensor& input_t,\n+ const Tensor& weight_t,\n+ const c10::optional& bias_opt,\n+ IntArrayRef padding,\n+ IntArrayRef stride,\n+ IntArrayRef dilation,\n+ int64_t groups,\n+ c10::string_view binary_attr,\n+ c10::optional alpha,\n+ c10::optional unary_attr,\n+ torch::List> unary_scalars,\n+ c10::optional unary_algorithm) {\n+ Tensor bias = bias_opt.has_value() ? bias_opt.value() : at::Tensor();\n+ // Step1: Construct binary attr\n+ Attr attr;\n+ attr = construct_binary_attr(attr, binary_attr, other_t);\n+ \n+ std::cout<<\"binary_attr\"<(args)[i_arg - 1]);\n check_type(schema_arg, args[i_arg - 1]);", - "repo_full_name": "pytorch/pytorch", - "discussion_comments": [ - { - "comment_id": "1957124391", - "repo_full_name": "pytorch/pytorch", - "pr_number": 147253, - "pr_file": "torch/csrc/jit/runtime/static/impl.cpp", - "discussion_id": "1957124391", - "commented_code": "@@ -1042,7 +1042,7 @@ void BlockRunner::set_inputs(IValueList&& args, const KeywordArgs& kwargs) {\n const auto& schema_arg = schema_args[i_arg];\n \n if (i_arg - 1 < args.size()) {\n- check_type(schema_arg, std::forward(args)[i_arg - 1]);\n+ check_type(schema_arg, args[i_arg - 1]);", - "comment_created_at": "2025-02-15T14:08:24+00:00", - "comment_author": "Skylion007", - "comment_body": "What error does this resolve?", - "pr_file_module": null - }, - { - "comment_id": "1957154993", - "repo_full_name": "pytorch/pytorch", - "pr_number": 147253, - "pr_file": "torch/csrc/jit/runtime/static/impl.cpp", - "discussion_id": "1957124391", - "commented_code": "@@ -1042,7 +1042,7 @@ void BlockRunner::set_inputs(IValueList&& args, const KeywordArgs& kwargs) {\n const auto& schema_arg = schema_args[i_arg];\n \n if (i_arg - 1 < args.size()) {\n- check_type(schema_arg, std::forward(args)[i_arg - 1]);\n+ check_type(schema_arg, args[i_arg - 1]);", - "comment_created_at": "2025-02-15T16:48:46+00:00", - "comment_author": "cyyever", - "comment_body": "Tidy warns use-after-move. Although that isn't real, removing std::forward can make the code cleaner. ", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/pytorch-specific-exception-handling.json b/_reviewers/pytorch-specific-exception-handling.json new file mode 100644 index 0000000..70ed808 --- /dev/null +++ b/_reviewers/pytorch-specific-exception-handling.json @@ -0,0 +1,102 @@ +[ + { + "discussion_id": "2169092276", + "pr_number": 155978, + "pr_file": "torch/_dynamo/variables/constant.py", + "created_at": "2025-06-26T13:34:37+00:00", + "commented_code": "raise_observed_exception(type(e), tx)\n elif isinstance(self.value, (float, int)):\n if not (args or kwargs):\n return ConstantVariable.create(getattr(self.value, name)())\n try:\n return ConstantVariable.create(getattr(self.value, name)())\n except Exception as exc:", + "repo_full_name": "pytorch/pytorch", + "discussion_comments": [ + { + "comment_id": "2169092276", + "repo_full_name": "pytorch/pytorch", + "pr_number": 155978, + "pr_file": "torch/_dynamo/variables/constant.py", + "discussion_id": "2169092276", + "commented_code": "@@ -173,7 +173,14 @@ def call_method(\n raise_observed_exception(type(e), tx)\n elif isinstance(self.value, (float, int)):\n if not (args or kwargs):\n- return ConstantVariable.create(getattr(self.value, name)())\n+ try:\n+ return ConstantVariable.create(getattr(self.value, name)())\n+ except Exception as exc:", + "comment_created_at": "2025-06-26T13:34:37+00:00", + "comment_author": "zou3519", + "comment_body": "Is there a specific exception we're looking for here?", + "pr_file_module": null + }, + { + "comment_id": "2169149235", + "repo_full_name": "pytorch/pytorch", + "pr_number": 155978, + "pr_file": "torch/_dynamo/variables/constant.py", + "discussion_id": "2169092276", + "commented_code": "@@ -173,7 +173,14 @@ def call_method(\n raise_observed_exception(type(e), tx)\n elif isinstance(self.value, (float, int)):\n if not (args or kwargs):\n- return ConstantVariable.create(getattr(self.value, name)())\n+ try:\n+ return ConstantVariable.create(getattr(self.value, name)())\n+ except Exception as exc:", + "comment_created_at": "2025-06-26T14:00:02+00:00", + "comment_author": "guilhermeleobas", + "comment_body": "OverflowError and ValueError. The last one appears when one tries to convert a NaN to an int.\r\n\r\n```python\r\nIn [5]: int(math.nan)\r\n---------------------------------------------------------------------------\r\nValueError Traceback (most recent call last)\r\nCell In[5], line 1\r\n----> 1 int(math.nan)\r\n\r\nValueError: cannot convert float NaN to integer\r\n```", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2049250850", + "pr_number": 149697, + "pr_file": "torch/_inductor/utils.py", + "created_at": "2025-04-17T15:57:52+00:00", + "commented_code": "@functools.lru_cache(None)\ndef get_device_tflops(dtype: torch.dtype) -> int:\ndef get_device_tflops(dtype: torch.dtype) -> float:\n \"\"\"\n We don't want to throw errors in this function. First check to see if the device is in device_info.py,\n then fall back to the inaccurate triton estimation.\n \"\"\"\n try:", + "repo_full_name": "pytorch/pytorch", + "discussion_comments": [ + { + "comment_id": "2049250850", + "repo_full_name": "pytorch/pytorch", + "pr_number": 149697, + "pr_file": "torch/_inductor/utils.py", + "discussion_id": "2049250850", + "commented_code": "@@ -1887,16 +1895,26 @@ def get_backend_num_stages() -> int:\n \n \n @functools.lru_cache(None)\n-def get_device_tflops(dtype: torch.dtype) -> int:\n+def get_device_tflops(dtype: torch.dtype) -> float:\n+ \"\"\"\n+ We don't want to throw errors in this function. First check to see if the device is in device_info.py,\n+ then fall back to the inaccurate triton estimation.\n+ \"\"\"\n+ try:", + "comment_created_at": "2025-04-17T15:57:52+00:00", + "comment_author": "eellison", + "comment_body": "Can we have datasheet_tops just return None if unsuccessful ? I prefer to avoid pattern where we're catching arbitrary exceptions. ", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2159235161", + "pr_number": 156148, + "pr_file": "torch/_dynamo/variables/builtin.py", + "created_at": "2025-06-20T15:24:28+00:00", + "commented_code": "def expand_list_like(tx: \"InstructionTranslator\", lst, const):\n if isinstance(lst, ConstantVariable):\n lst, const = const, lst\n return lst.__class__(\n items=lst.items * const.as_python_constant(),\n mutation_type=ValueMutationNew(),\n )\n try:\n return lst.__class__(\n items=lst.items * const.as_python_constant(),\n mutation_type=ValueMutationNew(),\n )\n except Exception as exc:\n # MemoryError\n raise_observed_exception(\n type(exc),\n tx,\n args=list(map(ConstantVariable.create, exc.args)),\n )", + "repo_full_name": "pytorch/pytorch", + "discussion_comments": [ + { + "comment_id": "2159235161", + "repo_full_name": "pytorch/pytorch", + "pr_number": 156148, + "pr_file": "torch/_dynamo/variables/builtin.py", + "discussion_id": "2159235161", + "commented_code": "@@ -542,10 +542,18 @@ def list_iadd_handler(tx: \"InstructionTranslator\", a, b):\n def expand_list_like(tx: \"InstructionTranslator\", lst, const):\n if isinstance(lst, ConstantVariable):\n lst, const = const, lst\n- return lst.__class__(\n- items=lst.items * const.as_python_constant(),\n- mutation_type=ValueMutationNew(),\n- )\n+ try:\n+ return lst.__class__(\n+ items=lst.items * const.as_python_constant(),\n+ mutation_type=ValueMutationNew(),\n+ )\n+ except Exception as exc:\n+ # MemoryError\n+ raise_observed_exception(\n+ type(exc),\n+ tx,\n+ args=list(map(ConstantVariable.create, exc.args)),\n+ )", + "comment_created_at": "2025-06-20T15:24:28+00:00", + "comment_author": "zou3519", + "comment_body": "What exceptions are we trying to catch here? If it's just MemoryError then we should use that in the except block.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2173506878", + "pr_number": 157203, + "pr_file": "torch/_inductor/codecache.py", + "created_at": "2025-06-28T19:48:33+00:00", + "commented_code": "# Include SASS for the current specific arch\n f\"-gencode arch=compute_{current_arch},code=sm_{current_arch} \"\n )\n subprocess.run(\n cmd.split(), capture_output=True, text=True, check=True\n )\n try:\n subprocess.run(\n cmd.split(),\n capture_output=True,\n text=True,\n check=True,\n )\n except subprocess.CalledProcessError as e:\n print(f\"{cmd} failed with: {e.stderr}\")", + "repo_full_name": "pytorch/pytorch", + "discussion_comments": [ + { + "comment_id": "2173506878", + "repo_full_name": "pytorch/pytorch", + "pr_number": 157203, + "pr_file": "torch/_inductor/codecache.py", + "discussion_id": "2173506878", + "commented_code": "@@ -2143,9 +2143,15 @@ def _pad_to_alignment(raw_bytes: bytes) -> bytes:\n # Include SASS for the current specific arch\n f\"-gencode arch=compute_{current_arch},code=sm_{current_arch} \"\n )\n- subprocess.run(\n- cmd.split(), capture_output=True, text=True, check=True\n- )\n+ try:\n+ subprocess.run(\n+ cmd.split(),\n+ capture_output=True,\n+ text=True,\n+ check=True,\n+ )\n+ except subprocess.CalledProcessError as e:\n+ print(f\"{cmd} failed with: {e.stderr}\")", + "comment_created_at": "2025-06-28T19:48:33+00:00", + "comment_author": "jansel", + "comment_body": "```suggestion\r\n print(f\"{cmd} failed with:\\nstdout:\\n{e.stdout}\\nstderr:\\n{e.stderr}\", file=sys.stderr)\r\n raise\r\n```\r\n\r\nI assume you want to re-raise here?", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/pytorch-specific-exception-handling.md b/_reviewers/pytorch-specific-exception-handling.md index 391cade..f893493 100644 --- a/_reviewers/pytorch-specific-exception-handling.md +++ b/_reviewers/pytorch-specific-exception-handling.md @@ -44,107 +44,3 @@ except subprocess.CalledProcessError as e: ``` Only catch generic exceptions when you have a compelling reason, such as protecting critical cleanup operations, and even then consider whether specific exception types would be more appropriate. - - -[ - { - "discussion_id": "2169092276", - "pr_number": 155978, - "pr_file": "torch/_dynamo/variables/constant.py", - "created_at": "2025-06-26T13:34:37+00:00", - "commented_code": "raise_observed_exception(type(e), tx)\n elif isinstance(self.value, (float, int)):\n if not (args or kwargs):\n return ConstantVariable.create(getattr(self.value, name)())\n try:\n return ConstantVariable.create(getattr(self.value, name)())\n except Exception as exc:", - "repo_full_name": "pytorch/pytorch", - "discussion_comments": [ - { - "comment_id": "2169092276", - "repo_full_name": "pytorch/pytorch", - "pr_number": 155978, - "pr_file": "torch/_dynamo/variables/constant.py", - "discussion_id": "2169092276", - "commented_code": "@@ -173,7 +173,14 @@ def call_method(\n raise_observed_exception(type(e), tx)\n elif isinstance(self.value, (float, int)):\n if not (args or kwargs):\n- return ConstantVariable.create(getattr(self.value, name)())\n+ try:\n+ return ConstantVariable.create(getattr(self.value, name)())\n+ except Exception as exc:", - "comment_created_at": "2025-06-26T13:34:37+00:00", - "comment_author": "zou3519", - "comment_body": "Is there a specific exception we're looking for here?", - "pr_file_module": null - }, - { - "comment_id": "2169149235", - "repo_full_name": "pytorch/pytorch", - "pr_number": 155978, - "pr_file": "torch/_dynamo/variables/constant.py", - "discussion_id": "2169092276", - "commented_code": "@@ -173,7 +173,14 @@ def call_method(\n raise_observed_exception(type(e), tx)\n elif isinstance(self.value, (float, int)):\n if not (args or kwargs):\n- return ConstantVariable.create(getattr(self.value, name)())\n+ try:\n+ return ConstantVariable.create(getattr(self.value, name)())\n+ except Exception as exc:", - "comment_created_at": "2025-06-26T14:00:02+00:00", - "comment_author": "guilhermeleobas", - "comment_body": "OverflowError and ValueError. The last one appears when one tries to convert a NaN to an int.\r\n\r\n```python\r\nIn [5]: int(math.nan)\r\n---------------------------------------------------------------------------\r\nValueError Traceback (most recent call last)\r\nCell In[5], line 1\r\n----> 1 int(math.nan)\r\n\r\nValueError: cannot convert float NaN to integer\r\n```", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2049250850", - "pr_number": 149697, - "pr_file": "torch/_inductor/utils.py", - "created_at": "2025-04-17T15:57:52+00:00", - "commented_code": "@functools.lru_cache(None)\ndef get_device_tflops(dtype: torch.dtype) -> int:\ndef get_device_tflops(dtype: torch.dtype) -> float:\n \"\"\"\n We don't want to throw errors in this function. First check to see if the device is in device_info.py,\n then fall back to the inaccurate triton estimation.\n \"\"\"\n try:", - "repo_full_name": "pytorch/pytorch", - "discussion_comments": [ - { - "comment_id": "2049250850", - "repo_full_name": "pytorch/pytorch", - "pr_number": 149697, - "pr_file": "torch/_inductor/utils.py", - "discussion_id": "2049250850", - "commented_code": "@@ -1887,16 +1895,26 @@ def get_backend_num_stages() -> int:\n \n \n @functools.lru_cache(None)\n-def get_device_tflops(dtype: torch.dtype) -> int:\n+def get_device_tflops(dtype: torch.dtype) -> float:\n+ \"\"\"\n+ We don't want to throw errors in this function. First check to see if the device is in device_info.py,\n+ then fall back to the inaccurate triton estimation.\n+ \"\"\"\n+ try:", - "comment_created_at": "2025-04-17T15:57:52+00:00", - "comment_author": "eellison", - "comment_body": "Can we have datasheet_tops just return None if unsuccessful ? I prefer to avoid pattern where we're catching arbitrary exceptions. ", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2159235161", - "pr_number": 156148, - "pr_file": "torch/_dynamo/variables/builtin.py", - "created_at": "2025-06-20T15:24:28+00:00", - "commented_code": "def expand_list_like(tx: \"InstructionTranslator\", lst, const):\n if isinstance(lst, ConstantVariable):\n lst, const = const, lst\n return lst.__class__(\n items=lst.items * const.as_python_constant(),\n mutation_type=ValueMutationNew(),\n )\n try:\n return lst.__class__(\n items=lst.items * const.as_python_constant(),\n mutation_type=ValueMutationNew(),\n )\n except Exception as exc:\n # MemoryError\n raise_observed_exception(\n type(exc),\n tx,\n args=list(map(ConstantVariable.create, exc.args)),\n )", - "repo_full_name": "pytorch/pytorch", - "discussion_comments": [ - { - "comment_id": "2159235161", - "repo_full_name": "pytorch/pytorch", - "pr_number": 156148, - "pr_file": "torch/_dynamo/variables/builtin.py", - "discussion_id": "2159235161", - "commented_code": "@@ -542,10 +542,18 @@ def list_iadd_handler(tx: \"InstructionTranslator\", a, b):\n def expand_list_like(tx: \"InstructionTranslator\", lst, const):\n if isinstance(lst, ConstantVariable):\n lst, const = const, lst\n- return lst.__class__(\n- items=lst.items * const.as_python_constant(),\n- mutation_type=ValueMutationNew(),\n- )\n+ try:\n+ return lst.__class__(\n+ items=lst.items * const.as_python_constant(),\n+ mutation_type=ValueMutationNew(),\n+ )\n+ except Exception as exc:\n+ # MemoryError\n+ raise_observed_exception(\n+ type(exc),\n+ tx,\n+ args=list(map(ConstantVariable.create, exc.args)),\n+ )", - "comment_created_at": "2025-06-20T15:24:28+00:00", - "comment_author": "zou3519", - "comment_body": "What exceptions are we trying to catch here? If it's just MemoryError then we should use that in the except block.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2173506878", - "pr_number": 157203, - "pr_file": "torch/_inductor/codecache.py", - "created_at": "2025-06-28T19:48:33+00:00", - "commented_code": "# Include SASS for the current specific arch\n f\"-gencode arch=compute_{current_arch},code=sm_{current_arch} \"\n )\n subprocess.run(\n cmd.split(), capture_output=True, text=True, check=True\n )\n try:\n subprocess.run(\n cmd.split(),\n capture_output=True,\n text=True,\n check=True,\n )\n except subprocess.CalledProcessError as e:\n print(f\"{cmd} failed with: {e.stderr}\")", - "repo_full_name": "pytorch/pytorch", - "discussion_comments": [ - { - "comment_id": "2173506878", - "repo_full_name": "pytorch/pytorch", - "pr_number": 157203, - "pr_file": "torch/_inductor/codecache.py", - "discussion_id": "2173506878", - "commented_code": "@@ -2143,9 +2143,15 @@ def _pad_to_alignment(raw_bytes: bytes) -> bytes:\n # Include SASS for the current specific arch\n f\"-gencode arch=compute_{current_arch},code=sm_{current_arch} \"\n )\n- subprocess.run(\n- cmd.split(), capture_output=True, text=True, check=True\n- )\n+ try:\n+ subprocess.run(\n+ cmd.split(),\n+ capture_output=True,\n+ text=True,\n+ check=True,\n+ )\n+ except subprocess.CalledProcessError as e:\n+ print(f\"{cmd} failed with: {e.stderr}\")", - "comment_created_at": "2025-06-28T19:48:33+00:00", - "comment_author": "jansel", - "comment_body": "```suggestion\r\n print(f\"{cmd} failed with:\\nstdout:\\n{e.stdout}\\nstderr:\\n{e.stderr}\", file=sys.stderr)\r\n raise\r\n```\r\n\r\nI assume you want to re-raise here?", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/pytorch-test-comprehensively-and-systematically.json b/_reviewers/pytorch-test-comprehensively-and-systematically.json new file mode 100644 index 0000000..2eb6d6b --- /dev/null +++ b/_reviewers/pytorch-test-comprehensively-and-systematically.json @@ -0,0 +1,180 @@ +[ + { + "discussion_id": "2164098880", + "pr_number": 156689, + "pr_file": "torch/_subclasses/fake_tensor.py", + "created_at": "2025-06-24T13:59:52+00:00", + "commented_code": "nonlocal flat_arg_fake_tensors\n if not self.is_our_fake(x):", + "repo_full_name": "pytorch/pytorch", + "discussion_comments": [ + { + "comment_id": "2164098880", + "repo_full_name": "pytorch/pytorch", + "pr_number": 156689, + "pr_file": "torch/_subclasses/fake_tensor.py", + "discussion_id": "2164098880", + "commented_code": "@@ -2774,7 +2774,7 @@ def validate(x: T) -> Union[T, FakeTensor]:\n \n nonlocal flat_arg_fake_tensors\n if not self.is_our_fake(x):", + "comment_created_at": "2025-06-24T13:59:52+00:00", + "comment_author": "atalman", + "comment_body": "Hi @Valentine233 could you please also add unit test for this change ?", + "pr_file_module": null + }, + { + "comment_id": "2165804339", + "repo_full_name": "pytorch/pytorch", + "pr_number": 156689, + "pr_file": "torch/_subclasses/fake_tensor.py", + "discussion_id": "2164098880", + "commented_code": "@@ -2774,7 +2774,7 @@ def validate(x: T) -> Union[T, FakeTensor]:\n \n nonlocal flat_arg_fake_tensors\n if not self.is_our_fake(x):", + "comment_created_at": "2025-06-25T05:22:03+00:00", + "comment_author": "Valentine233", + "comment_body": "Thanks @atalman , the UT has been added.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2109527687", + "pr_number": 154333, + "pr_file": "test/test_export_serde.py", + "created_at": "2025-05-27T15:34:17+00:00", + "commented_code": "import io\nimport torch\nfrom torch._export.serde.serialize import deserialize_torch_artifact\n\ndef test_deserialize_torch_artifact_dict():", + "repo_full_name": "pytorch/pytorch", + "discussion_comments": [ + { + "comment_id": "2109527687", + "repo_full_name": "pytorch/pytorch", + "pr_number": 154333, + "pr_file": "test/test_export_serde.py", + "discussion_id": "2109527687", + "commented_code": "@@ -0,0 +1,13 @@\n+import io\n+import torch\n+from torch._export.serde.serialize import deserialize_torch_artifact\n+\n+def test_deserialize_torch_artifact_dict():", + "comment_created_at": "2025-05-27T15:34:17+00:00", + "comment_author": "angelayi", + "comment_body": "Can you move your test case into [test_serialize.py](https://github.com/pytorch/pytorch/blob/7ae204c3b67e58405127b9758f7a7da3c3b51b83/test/export/test_serialize.py#L959)?", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2169264519", + "pr_number": 156339, + "pr_file": "torch/_dynamo/variables/lists.py", + "created_at": "2025-06-26T14:51:49+00:00", + "commented_code": "else:\n self.items[key.as_python_constant()] = value\n return ConstantVariable.create(None)\n elif name == \"__delitem__\" and self.is_mutable():\n if kwargs or len(args) != 1:\n raise_args_mismatch(tx, name)", + "repo_full_name": "pytorch/pytorch", + "discussion_comments": [ + { + "comment_id": "2169264519", + "repo_full_name": "pytorch/pytorch", + "pr_number": 156339, + "pr_file": "torch/_dynamo/variables/lists.py", + "discussion_id": "2169264519", + "commented_code": "@@ -531,6 +531,29 @@ def call_method(\n else:\n self.items[key.as_python_constant()] = value\n return ConstantVariable.create(None)\n+ elif name == \"__delitem__\" and self.is_mutable():\n+ if kwargs or len(args) != 1:\n+ raise_args_mismatch(tx, name)", + "comment_created_at": "2025-06-26T14:51:49+00:00", + "comment_author": "zou3519", + "comment_body": "Can you add a test for what happens if someone does `__delitem__` on a global list? I'm hoping dynamo generates the right bytecode for that.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2114123690", + "pr_number": 153150, + "pr_file": "test/dynamo/test_sets.py", + "created_at": "2025-05-29T14:43:53+00:00", + "commented_code": "# Owner(s): [\"module: dynamo\"]\n\n# TODO: move set tests from test_functions.py/test_misc.py to this file\n\n\nimport torch\nimport torch._dynamo.test_case\nfrom torch._dynamo.testing import CompileCounter\n\n\nclass SetGuardsSet(torch._dynamo.test_case.TestCase):\n def test_set_recompile_on_key_pop(self):\n s = {\n torch._C._set_grad_enabled,\n torch.amp._enter_autocast,\n torch.amp._exit_autocast,\n }\n\n cnts = CompileCounter()\n\n def fn(x, s):\n if torch.amp._exit_autocast in s:\n return x.sin()\n return x.cos()\n\n x = torch.randn(4)\n opt_fn = torch.compile(fn, backend=cnts, fullgraph=True)\n res = opt_fn(x, s)\n opt_fn(x, s)\n self.assertEqual(res, fn(x, s))\n # No recompilation\n self.assertEqual(cnts.frame_count, 1)\n\n # Pop a value\n s.remove(torch.amp._exit_autocast)\n\n res = opt_fn(x, s)\n # Check recompilation\n self.assertEqual(cnts.frame_count, 2)\n self.assertEqual(res, fn(x, s))\n\n def test_set_recompile_on_key_change(self):\n s = {\n torch._C._set_grad_enabled,\n torch.amp._enter_autocast,\n torch.amp._exit_autocast,\n }\n\n cnts = CompileCounter()\n\n def fn(x, s):\n if torch.amp._exit_autocast in s:\n return x.sin()\n return x.cos()\n\n x = torch.randn(4)\n opt_fn = torch.compile(fn, backend=cnts, fullgraph=True)\n res = opt_fn(x, s)\n opt_fn(x, s)\n self.assertEqual(res, fn(x, s))\n # No recompilation\n self.assertEqual(cnts.frame_count, 1)\n\n # Pop a value\n s.remove(torch.amp._exit_autocast)\n # Add a different value\n s.add(torch._C._set_autograd_fallback_mode)\n\n res = opt_fn(x, s)\n # Check recompilation\n self.assertEqual(cnts.frame_count, 2)\n self.assertEqual(res, fn(x, s))\n\n def test_set_guard_on_keys_change(self):\n # This test guarantee that we're not triggering any of the dict guards\n # on sets\n s = {\n torch._C._set_grad_enabled,\n torch.amp._enter_autocast,\n torch.amp._exit_autocast,\n }\n\n cnts = CompileCounter()\n\n def fn(x, s):\n for e in s:\n x = x * len(str(e))\n return x\n\n opt_fn = torch.compile(fn, backend=cnts, fullgraph=True)\n opt_fn(torch.randn(4), s)\n opt_fn(torch.randn(4), s)\n # No recompilation\n self.assertEqual(cnts.frame_count, 1)\n\n # pop and add the same item\n s.remove(torch.amp._exit_autocast)\n s.add(torch.amp._exit_autocast)\n\n x = torch.randn(4)\n res = opt_fn(x, s)\n # Check Dynamo don't recompile\n self.assertEqual(cnts.frame_count, 1)\n self.assertEqual(res, fn(x, s))", + "repo_full_name": "pytorch/pytorch", + "discussion_comments": [ + { + "comment_id": "2114123690", + "repo_full_name": "pytorch/pytorch", + "pr_number": 153150, + "pr_file": "test/dynamo/test_sets.py", + "discussion_id": "2114123690", + "commented_code": "@@ -0,0 +1,110 @@\n+# Owner(s): [\"module: dynamo\"]\n+\n+# TODO: move set tests from test_functions.py/test_misc.py to this file\n+\n+\n+import torch\n+import torch._dynamo.test_case\n+from torch._dynamo.testing import CompileCounter\n+\n+\n+class SetGuardsSet(torch._dynamo.test_case.TestCase):\n+ def test_set_recompile_on_key_pop(self):\n+ s = {\n+ torch._C._set_grad_enabled,\n+ torch.amp._enter_autocast,\n+ torch.amp._exit_autocast,\n+ }\n+\n+ cnts = CompileCounter()\n+\n+ def fn(x, s):\n+ if torch.amp._exit_autocast in s:\n+ return x.sin()\n+ return x.cos()\n+\n+ x = torch.randn(4)\n+ opt_fn = torch.compile(fn, backend=cnts, fullgraph=True)\n+ res = opt_fn(x, s)\n+ opt_fn(x, s)\n+ self.assertEqual(res, fn(x, s))\n+ # No recompilation\n+ self.assertEqual(cnts.frame_count, 1)\n+\n+ # Pop a value\n+ s.remove(torch.amp._exit_autocast)\n+\n+ res = opt_fn(x, s)\n+ # Check recompilation\n+ self.assertEqual(cnts.frame_count, 2)\n+ self.assertEqual(res, fn(x, s))\n+\n+ def test_set_recompile_on_key_change(self):\n+ s = {\n+ torch._C._set_grad_enabled,\n+ torch.amp._enter_autocast,\n+ torch.amp._exit_autocast,\n+ }\n+\n+ cnts = CompileCounter()\n+\n+ def fn(x, s):\n+ if torch.amp._exit_autocast in s:\n+ return x.sin()\n+ return x.cos()\n+\n+ x = torch.randn(4)\n+ opt_fn = torch.compile(fn, backend=cnts, fullgraph=True)\n+ res = opt_fn(x, s)\n+ opt_fn(x, s)\n+ self.assertEqual(res, fn(x, s))\n+ # No recompilation\n+ self.assertEqual(cnts.frame_count, 1)\n+\n+ # Pop a value\n+ s.remove(torch.amp._exit_autocast)\n+ # Add a different value\n+ s.add(torch._C._set_autograd_fallback_mode)\n+\n+ res = opt_fn(x, s)\n+ # Check recompilation\n+ self.assertEqual(cnts.frame_count, 2)\n+ self.assertEqual(res, fn(x, s))\n+\n+ def test_set_guard_on_keys_change(self):\n+ # This test guarantee that we're not triggering any of the dict guards\n+ # on sets\n+ s = {\n+ torch._C._set_grad_enabled,\n+ torch.amp._enter_autocast,\n+ torch.amp._exit_autocast,\n+ }\n+\n+ cnts = CompileCounter()\n+\n+ def fn(x, s):\n+ for e in s:\n+ x = x * len(str(e))\n+ return x\n+\n+ opt_fn = torch.compile(fn, backend=cnts, fullgraph=True)\n+ opt_fn(torch.randn(4), s)\n+ opt_fn(torch.randn(4), s)\n+ # No recompilation\n+ self.assertEqual(cnts.frame_count, 1)\n+\n+ # pop and add the same item\n+ s.remove(torch.amp._exit_autocast)\n+ s.add(torch.amp._exit_autocast)\n+\n+ x = torch.randn(4)\n+ res = opt_fn(x, s)\n+ # Check Dynamo don't recompile\n+ self.assertEqual(cnts.frame_count, 1)\n+ self.assertEqual(res, fn(x, s))\n+\n+", + "comment_created_at": "2025-05-29T14:43:53+00:00", + "comment_author": "zou3519", + "comment_body": "Do we support sets of Tensors? If so, can we add some tests for those? I expect some of the guards might be too conservative but that's OK", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2064433335", + "pr_number": 152161, + "pr_file": "test/test_indexing.py", + "created_at": "2025-04-28T19:52:50+00:00", + "commented_code": "self.assertEqual(v[0].tolist(), [0, 3, 0, 4])\n self.assertEqual(v[1:].sum(), 0)\n\n def test_take_along_dim_negative_indices(self) -> None:", + "repo_full_name": "pytorch/pytorch", + "discussion_comments": [ + { + "comment_id": "2064433335", + "repo_full_name": "pytorch/pytorch", + "pr_number": 152161, + "pr_file": "test/test_indexing.py", + "discussion_id": "2064433335", + "commented_code": "@@ -843,6 +843,13 @@ def test_step_assignment(self, device):\n self.assertEqual(v[0].tolist(), [0, 3, 0, 4])\n self.assertEqual(v[1:].sum(), 0)\n \n+ def test_take_along_dim_negative_indices(self) -> None:", + "comment_created_at": "2025-04-28T19:52:50+00:00", + "comment_author": "albanD", + "comment_body": "You can update the generic test for take_along_dim to test these values in https://github.com/pytorch/pytorch/blob/5f4c8e4c896f9a1d89e1bf9b3bd92b3e7db5c3b9/torch/testing/_internal/common_methods_invocations.py#L2988\r\nThis is better than a one-off test.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2175646168", + "pr_number": 156572, + "pr_file": "test/inductor/test_gpu_cpp_wrapper.py", + "created_at": "2025-06-30T18:22:03+00:00", + "commented_code": "device=None,\n tests=test_select_algorithm.TestSelectAlgorithm(),\n ),\n BaseTest(", + "repo_full_name": "pytorch/pytorch", + "discussion_comments": [ + { + "comment_id": "2175646168", + "repo_full_name": "pytorch/pytorch", + "pr_number": 156572, + "pr_file": "test/inductor/test_gpu_cpp_wrapper.py", + "discussion_id": "2175646168", + "commented_code": "@@ -291,6 +294,36 @@ class BaseTest(NamedTuple):\n device=None,\n tests=test_select_algorithm.TestSelectAlgorithm(),\n ),\n+ BaseTest(", + "comment_created_at": "2025-06-30T18:22:03+00:00", + "comment_author": "desertfire", + "comment_body": "We actually test way more cpp-wrapper mode than tests in this file, see https://github.com/pytorch/pytorch/blob/c7b6c98d1097bec9dc92bde2fe324aa126a5daa2/.ci/pytorch/test.sh#L462-L464. Can we do the same for XPU and stop adding tests to this file?", + "pr_file_module": null + }, + { + "comment_id": "2178781741", + "repo_full_name": "pytorch/pytorch", + "pr_number": 156572, + "pr_file": "test/inductor/test_gpu_cpp_wrapper.py", + "discussion_id": "2175646168", + "commented_code": "@@ -291,6 +294,36 @@ class BaseTest(NamedTuple):\n device=None,\n tests=test_select_algorithm.TestSelectAlgorithm(),\n ),\n+ BaseTest(", + "comment_created_at": "2025-07-02T00:56:07+00:00", + "comment_author": "etaf", + "comment_body": "@desertfire: Sure, thanks for your advice.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2172786579", + "pr_number": 157136, + "pr_file": "test/dynamo/test_fx_graph_runnable.py", + "created_at": "2025-06-27T19:59:14+00:00", + "commented_code": "torch.compile(f)(torch.randn(5))\n self._exec_and_verify_payload()\n\n # testing dynamic shapes\n def test_dynamic_shapes_run(self):\n torch._dynamo.reset()\n torch._dynamo.config.dynamic_shapes = True\n\n def f(x):\n return (x @ x.transpose(0, 1)).relu()\n\n a = torch.randn(10, 12)\n torch._dynamo.mark_dynamic(a, 0)\n torch._dynamo.mark_dynamic(a, 1)\n\n torch.compile(f)(a)\n self._exec_and_verify_payload()\n\n def test_broadcast_add_dynamic(self):\n torch._dynamo.reset()\n torch._dynamo.config.dynamic_shapes = True\n\n def f(x, y):\n return x + y * 2\n\n x = torch.randn(5, 1)\n y = torch.randn(1, 8)\n torch._dynamo.mark_dynamic(x, 0)\n torch._dynamo.mark_dynamic(y, 1)\n\n torch.compile(f)(x, y)\n self._exec_and_verify_payload()\n\n def test_toy_model_basic(self):\n model = ToyModel(input_size=8, hidden_size=16, output_size=4)\n model.eval() # Set to eval mode to avoid dropout randomness\n\n x = torch.randn(3, 8)\n torch.compile(model)(x)\n self._exec_and_verify_payload()\n\n def test_toy_model_batch_processing(self):\n model = ToyModel(input_size=12, hidden_size=24, output_size=6)\n model.eval()\n\n x = torch.randn(16, 12)\n torch.compile(model)(x)\n self._exec_and_verify_payload()\n\n def test_toy_model_dynamic_batch(self):\n torch._dynamo.reset()", + "repo_full_name": "pytorch/pytorch", + "discussion_comments": [ + { + "comment_id": "2172786579", + "repo_full_name": "pytorch/pytorch", + "pr_number": 157136, + "pr_file": "test/dynamo/test_fx_graph_runnable.py", + "discussion_id": "2172786579", + "commented_code": "@@ -90,6 +106,65 @@ def f(x):\n torch.compile(f)(torch.randn(5))\n self._exec_and_verify_payload()\n \n+ # testing dynamic shapes\n+ def test_dynamic_shapes_run(self):\n+ torch._dynamo.reset()\n+ torch._dynamo.config.dynamic_shapes = True\n+\n+ def f(x):\n+ return (x @ x.transpose(0, 1)).relu()\n+\n+ a = torch.randn(10, 12)\n+ torch._dynamo.mark_dynamic(a, 0)\n+ torch._dynamo.mark_dynamic(a, 1)\n+\n+ torch.compile(f)(a)\n+ self._exec_and_verify_payload()\n+\n+ def test_broadcast_add_dynamic(self):\n+ torch._dynamo.reset()\n+ torch._dynamo.config.dynamic_shapes = True\n+\n+ def f(x, y):\n+ return x + y * 2\n+\n+ x = torch.randn(5, 1)\n+ y = torch.randn(1, 8)\n+ torch._dynamo.mark_dynamic(x, 0)\n+ torch._dynamo.mark_dynamic(y, 1)\n+\n+ torch.compile(f)(x, y)\n+ self._exec_and_verify_payload()\n+\n+ def test_toy_model_basic(self):\n+ model = ToyModel(input_size=8, hidden_size=16, output_size=4)\n+ model.eval() # Set to eval mode to avoid dropout randomness\n+\n+ x = torch.randn(3, 8)\n+ torch.compile(model)(x)\n+ self._exec_and_verify_payload()\n+\n+ def test_toy_model_batch_processing(self):\n+ model = ToyModel(input_size=12, hidden_size=24, output_size=6)\n+ model.eval()\n+\n+ x = torch.randn(16, 12)\n+ torch.compile(model)(x)\n+ self._exec_and_verify_payload()\n+\n+ def test_toy_model_dynamic_batch(self):\n+ torch._dynamo.reset()", + "comment_created_at": "2025-06-27T19:59:14+00:00", + "comment_author": "xmfan", + "comment_body": "let's move this to the test `setUp`", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/pytorch-test-comprehensively-and-systematically.md b/_reviewers/pytorch-test-comprehensively-and-systematically.md index 6be9d0e..9d99211 100644 --- a/_reviewers/pytorch-test-comprehensively-and-systematically.md +++ b/_reviewers/pytorch-test-comprehensively-and-systematically.md @@ -63,185 +63,3 @@ Key practices: ``` By following these practices, you'll create tests that serve as both validation and documentation, ensuring your code remains robust across changes and is easier for others to understand and maintain. - - -[ - { - "discussion_id": "2164098880", - "pr_number": 156689, - "pr_file": "torch/_subclasses/fake_tensor.py", - "created_at": "2025-06-24T13:59:52+00:00", - "commented_code": "nonlocal flat_arg_fake_tensors\n if not self.is_our_fake(x):", - "repo_full_name": "pytorch/pytorch", - "discussion_comments": [ - { - "comment_id": "2164098880", - "repo_full_name": "pytorch/pytorch", - "pr_number": 156689, - "pr_file": "torch/_subclasses/fake_tensor.py", - "discussion_id": "2164098880", - "commented_code": "@@ -2774,7 +2774,7 @@ def validate(x: T) -> Union[T, FakeTensor]:\n \n nonlocal flat_arg_fake_tensors\n if not self.is_our_fake(x):", - "comment_created_at": "2025-06-24T13:59:52+00:00", - "comment_author": "atalman", - "comment_body": "Hi @Valentine233 could you please also add unit test for this change ?", - "pr_file_module": null - }, - { - "comment_id": "2165804339", - "repo_full_name": "pytorch/pytorch", - "pr_number": 156689, - "pr_file": "torch/_subclasses/fake_tensor.py", - "discussion_id": "2164098880", - "commented_code": "@@ -2774,7 +2774,7 @@ def validate(x: T) -> Union[T, FakeTensor]:\n \n nonlocal flat_arg_fake_tensors\n if not self.is_our_fake(x):", - "comment_created_at": "2025-06-25T05:22:03+00:00", - "comment_author": "Valentine233", - "comment_body": "Thanks @atalman , the UT has been added.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2109527687", - "pr_number": 154333, - "pr_file": "test/test_export_serde.py", - "created_at": "2025-05-27T15:34:17+00:00", - "commented_code": "import io\nimport torch\nfrom torch._export.serde.serialize import deserialize_torch_artifact\n\ndef test_deserialize_torch_artifact_dict():", - "repo_full_name": "pytorch/pytorch", - "discussion_comments": [ - { - "comment_id": "2109527687", - "repo_full_name": "pytorch/pytorch", - "pr_number": 154333, - "pr_file": "test/test_export_serde.py", - "discussion_id": "2109527687", - "commented_code": "@@ -0,0 +1,13 @@\n+import io\n+import torch\n+from torch._export.serde.serialize import deserialize_torch_artifact\n+\n+def test_deserialize_torch_artifact_dict():", - "comment_created_at": "2025-05-27T15:34:17+00:00", - "comment_author": "angelayi", - "comment_body": "Can you move your test case into [test_serialize.py](https://github.com/pytorch/pytorch/blob/7ae204c3b67e58405127b9758f7a7da3c3b51b83/test/export/test_serialize.py#L959)?", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2169264519", - "pr_number": 156339, - "pr_file": "torch/_dynamo/variables/lists.py", - "created_at": "2025-06-26T14:51:49+00:00", - "commented_code": "else:\n self.items[key.as_python_constant()] = value\n return ConstantVariable.create(None)\n elif name == \"__delitem__\" and self.is_mutable():\n if kwargs or len(args) != 1:\n raise_args_mismatch(tx, name)", - "repo_full_name": "pytorch/pytorch", - "discussion_comments": [ - { - "comment_id": "2169264519", - "repo_full_name": "pytorch/pytorch", - "pr_number": 156339, - "pr_file": "torch/_dynamo/variables/lists.py", - "discussion_id": "2169264519", - "commented_code": "@@ -531,6 +531,29 @@ def call_method(\n else:\n self.items[key.as_python_constant()] = value\n return ConstantVariable.create(None)\n+ elif name == \"__delitem__\" and self.is_mutable():\n+ if kwargs or len(args) != 1:\n+ raise_args_mismatch(tx, name)", - "comment_created_at": "2025-06-26T14:51:49+00:00", - "comment_author": "zou3519", - "comment_body": "Can you add a test for what happens if someone does `__delitem__` on a global list? I'm hoping dynamo generates the right bytecode for that.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2114123690", - "pr_number": 153150, - "pr_file": "test/dynamo/test_sets.py", - "created_at": "2025-05-29T14:43:53+00:00", - "commented_code": "# Owner(s): [\"module: dynamo\"]\n\n# TODO: move set tests from test_functions.py/test_misc.py to this file\n\n\nimport torch\nimport torch._dynamo.test_case\nfrom torch._dynamo.testing import CompileCounter\n\n\nclass SetGuardsSet(torch._dynamo.test_case.TestCase):\n def test_set_recompile_on_key_pop(self):\n s = {\n torch._C._set_grad_enabled,\n torch.amp._enter_autocast,\n torch.amp._exit_autocast,\n }\n\n cnts = CompileCounter()\n\n def fn(x, s):\n if torch.amp._exit_autocast in s:\n return x.sin()\n return x.cos()\n\n x = torch.randn(4)\n opt_fn = torch.compile(fn, backend=cnts, fullgraph=True)\n res = opt_fn(x, s)\n opt_fn(x, s)\n self.assertEqual(res, fn(x, s))\n # No recompilation\n self.assertEqual(cnts.frame_count, 1)\n\n # Pop a value\n s.remove(torch.amp._exit_autocast)\n\n res = opt_fn(x, s)\n # Check recompilation\n self.assertEqual(cnts.frame_count, 2)\n self.assertEqual(res, fn(x, s))\n\n def test_set_recompile_on_key_change(self):\n s = {\n torch._C._set_grad_enabled,\n torch.amp._enter_autocast,\n torch.amp._exit_autocast,\n }\n\n cnts = CompileCounter()\n\n def fn(x, s):\n if torch.amp._exit_autocast in s:\n return x.sin()\n return x.cos()\n\n x = torch.randn(4)\n opt_fn = torch.compile(fn, backend=cnts, fullgraph=True)\n res = opt_fn(x, s)\n opt_fn(x, s)\n self.assertEqual(res, fn(x, s))\n # No recompilation\n self.assertEqual(cnts.frame_count, 1)\n\n # Pop a value\n s.remove(torch.amp._exit_autocast)\n # Add a different value\n s.add(torch._C._set_autograd_fallback_mode)\n\n res = opt_fn(x, s)\n # Check recompilation\n self.assertEqual(cnts.frame_count, 2)\n self.assertEqual(res, fn(x, s))\n\n def test_set_guard_on_keys_change(self):\n # This test guarantee that we're not triggering any of the dict guards\n # on sets\n s = {\n torch._C._set_grad_enabled,\n torch.amp._enter_autocast,\n torch.amp._exit_autocast,\n }\n\n cnts = CompileCounter()\n\n def fn(x, s):\n for e in s:\n x = x * len(str(e))\n return x\n\n opt_fn = torch.compile(fn, backend=cnts, fullgraph=True)\n opt_fn(torch.randn(4), s)\n opt_fn(torch.randn(4), s)\n # No recompilation\n self.assertEqual(cnts.frame_count, 1)\n\n # pop and add the same item\n s.remove(torch.amp._exit_autocast)\n s.add(torch.amp._exit_autocast)\n\n x = torch.randn(4)\n res = opt_fn(x, s)\n # Check Dynamo don't recompile\n self.assertEqual(cnts.frame_count, 1)\n self.assertEqual(res, fn(x, s))", - "repo_full_name": "pytorch/pytorch", - "discussion_comments": [ - { - "comment_id": "2114123690", - "repo_full_name": "pytorch/pytorch", - "pr_number": 153150, - "pr_file": "test/dynamo/test_sets.py", - "discussion_id": "2114123690", - "commented_code": "@@ -0,0 +1,110 @@\n+# Owner(s): [\"module: dynamo\"]\n+\n+# TODO: move set tests from test_functions.py/test_misc.py to this file\n+\n+\n+import torch\n+import torch._dynamo.test_case\n+from torch._dynamo.testing import CompileCounter\n+\n+\n+class SetGuardsSet(torch._dynamo.test_case.TestCase):\n+ def test_set_recompile_on_key_pop(self):\n+ s = {\n+ torch._C._set_grad_enabled,\n+ torch.amp._enter_autocast,\n+ torch.amp._exit_autocast,\n+ }\n+\n+ cnts = CompileCounter()\n+\n+ def fn(x, s):\n+ if torch.amp._exit_autocast in s:\n+ return x.sin()\n+ return x.cos()\n+\n+ x = torch.randn(4)\n+ opt_fn = torch.compile(fn, backend=cnts, fullgraph=True)\n+ res = opt_fn(x, s)\n+ opt_fn(x, s)\n+ self.assertEqual(res, fn(x, s))\n+ # No recompilation\n+ self.assertEqual(cnts.frame_count, 1)\n+\n+ # Pop a value\n+ s.remove(torch.amp._exit_autocast)\n+\n+ res = opt_fn(x, s)\n+ # Check recompilation\n+ self.assertEqual(cnts.frame_count, 2)\n+ self.assertEqual(res, fn(x, s))\n+\n+ def test_set_recompile_on_key_change(self):\n+ s = {\n+ torch._C._set_grad_enabled,\n+ torch.amp._enter_autocast,\n+ torch.amp._exit_autocast,\n+ }\n+\n+ cnts = CompileCounter()\n+\n+ def fn(x, s):\n+ if torch.amp._exit_autocast in s:\n+ return x.sin()\n+ return x.cos()\n+\n+ x = torch.randn(4)\n+ opt_fn = torch.compile(fn, backend=cnts, fullgraph=True)\n+ res = opt_fn(x, s)\n+ opt_fn(x, s)\n+ self.assertEqual(res, fn(x, s))\n+ # No recompilation\n+ self.assertEqual(cnts.frame_count, 1)\n+\n+ # Pop a value\n+ s.remove(torch.amp._exit_autocast)\n+ # Add a different value\n+ s.add(torch._C._set_autograd_fallback_mode)\n+\n+ res = opt_fn(x, s)\n+ # Check recompilation\n+ self.assertEqual(cnts.frame_count, 2)\n+ self.assertEqual(res, fn(x, s))\n+\n+ def test_set_guard_on_keys_change(self):\n+ # This test guarantee that we're not triggering any of the dict guards\n+ # on sets\n+ s = {\n+ torch._C._set_grad_enabled,\n+ torch.amp._enter_autocast,\n+ torch.amp._exit_autocast,\n+ }\n+\n+ cnts = CompileCounter()\n+\n+ def fn(x, s):\n+ for e in s:\n+ x = x * len(str(e))\n+ return x\n+\n+ opt_fn = torch.compile(fn, backend=cnts, fullgraph=True)\n+ opt_fn(torch.randn(4), s)\n+ opt_fn(torch.randn(4), s)\n+ # No recompilation\n+ self.assertEqual(cnts.frame_count, 1)\n+\n+ # pop and add the same item\n+ s.remove(torch.amp._exit_autocast)\n+ s.add(torch.amp._exit_autocast)\n+\n+ x = torch.randn(4)\n+ res = opt_fn(x, s)\n+ # Check Dynamo don't recompile\n+ self.assertEqual(cnts.frame_count, 1)\n+ self.assertEqual(res, fn(x, s))\n+\n+", - "comment_created_at": "2025-05-29T14:43:53+00:00", - "comment_author": "zou3519", - "comment_body": "Do we support sets of Tensors? If so, can we add some tests for those? I expect some of the guards might be too conservative but that's OK", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2064433335", - "pr_number": 152161, - "pr_file": "test/test_indexing.py", - "created_at": "2025-04-28T19:52:50+00:00", - "commented_code": "self.assertEqual(v[0].tolist(), [0, 3, 0, 4])\n self.assertEqual(v[1:].sum(), 0)\n\n def test_take_along_dim_negative_indices(self) -> None:", - "repo_full_name": "pytorch/pytorch", - "discussion_comments": [ - { - "comment_id": "2064433335", - "repo_full_name": "pytorch/pytorch", - "pr_number": 152161, - "pr_file": "test/test_indexing.py", - "discussion_id": "2064433335", - "commented_code": "@@ -843,6 +843,13 @@ def test_step_assignment(self, device):\n self.assertEqual(v[0].tolist(), [0, 3, 0, 4])\n self.assertEqual(v[1:].sum(), 0)\n \n+ def test_take_along_dim_negative_indices(self) -> None:", - "comment_created_at": "2025-04-28T19:52:50+00:00", - "comment_author": "albanD", - "comment_body": "You can update the generic test for take_along_dim to test these values in https://github.com/pytorch/pytorch/blob/5f4c8e4c896f9a1d89e1bf9b3bd92b3e7db5c3b9/torch/testing/_internal/common_methods_invocations.py#L2988\r\nThis is better than a one-off test.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2175646168", - "pr_number": 156572, - "pr_file": "test/inductor/test_gpu_cpp_wrapper.py", - "created_at": "2025-06-30T18:22:03+00:00", - "commented_code": "device=None,\n tests=test_select_algorithm.TestSelectAlgorithm(),\n ),\n BaseTest(", - "repo_full_name": "pytorch/pytorch", - "discussion_comments": [ - { - "comment_id": "2175646168", - "repo_full_name": "pytorch/pytorch", - "pr_number": 156572, - "pr_file": "test/inductor/test_gpu_cpp_wrapper.py", - "discussion_id": "2175646168", - "commented_code": "@@ -291,6 +294,36 @@ class BaseTest(NamedTuple):\n device=None,\n tests=test_select_algorithm.TestSelectAlgorithm(),\n ),\n+ BaseTest(", - "comment_created_at": "2025-06-30T18:22:03+00:00", - "comment_author": "desertfire", - "comment_body": "We actually test way more cpp-wrapper mode than tests in this file, see https://github.com/pytorch/pytorch/blob/c7b6c98d1097bec9dc92bde2fe324aa126a5daa2/.ci/pytorch/test.sh#L462-L464. Can we do the same for XPU and stop adding tests to this file?", - "pr_file_module": null - }, - { - "comment_id": "2178781741", - "repo_full_name": "pytorch/pytorch", - "pr_number": 156572, - "pr_file": "test/inductor/test_gpu_cpp_wrapper.py", - "discussion_id": "2175646168", - "commented_code": "@@ -291,6 +294,36 @@ class BaseTest(NamedTuple):\n device=None,\n tests=test_select_algorithm.TestSelectAlgorithm(),\n ),\n+ BaseTest(", - "comment_created_at": "2025-07-02T00:56:07+00:00", - "comment_author": "etaf", - "comment_body": "@desertfire: Sure, thanks for your advice.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2172786579", - "pr_number": 157136, - "pr_file": "test/dynamo/test_fx_graph_runnable.py", - "created_at": "2025-06-27T19:59:14+00:00", - "commented_code": "torch.compile(f)(torch.randn(5))\n self._exec_and_verify_payload()\n\n # testing dynamic shapes\n def test_dynamic_shapes_run(self):\n torch._dynamo.reset()\n torch._dynamo.config.dynamic_shapes = True\n\n def f(x):\n return (x @ x.transpose(0, 1)).relu()\n\n a = torch.randn(10, 12)\n torch._dynamo.mark_dynamic(a, 0)\n torch._dynamo.mark_dynamic(a, 1)\n\n torch.compile(f)(a)\n self._exec_and_verify_payload()\n\n def test_broadcast_add_dynamic(self):\n torch._dynamo.reset()\n torch._dynamo.config.dynamic_shapes = True\n\n def f(x, y):\n return x + y * 2\n\n x = torch.randn(5, 1)\n y = torch.randn(1, 8)\n torch._dynamo.mark_dynamic(x, 0)\n torch._dynamo.mark_dynamic(y, 1)\n\n torch.compile(f)(x, y)\n self._exec_and_verify_payload()\n\n def test_toy_model_basic(self):\n model = ToyModel(input_size=8, hidden_size=16, output_size=4)\n model.eval() # Set to eval mode to avoid dropout randomness\n\n x = torch.randn(3, 8)\n torch.compile(model)(x)\n self._exec_and_verify_payload()\n\n def test_toy_model_batch_processing(self):\n model = ToyModel(input_size=12, hidden_size=24, output_size=6)\n model.eval()\n\n x = torch.randn(16, 12)\n torch.compile(model)(x)\n self._exec_and_verify_payload()\n\n def test_toy_model_dynamic_batch(self):\n torch._dynamo.reset()", - "repo_full_name": "pytorch/pytorch", - "discussion_comments": [ - { - "comment_id": "2172786579", - "repo_full_name": "pytorch/pytorch", - "pr_number": 157136, - "pr_file": "test/dynamo/test_fx_graph_runnable.py", - "discussion_id": "2172786579", - "commented_code": "@@ -90,6 +106,65 @@ def f(x):\n torch.compile(f)(torch.randn(5))\n self._exec_and_verify_payload()\n \n+ # testing dynamic shapes\n+ def test_dynamic_shapes_run(self):\n+ torch._dynamo.reset()\n+ torch._dynamo.config.dynamic_shapes = True\n+\n+ def f(x):\n+ return (x @ x.transpose(0, 1)).relu()\n+\n+ a = torch.randn(10, 12)\n+ torch._dynamo.mark_dynamic(a, 0)\n+ torch._dynamo.mark_dynamic(a, 1)\n+\n+ torch.compile(f)(a)\n+ self._exec_and_verify_payload()\n+\n+ def test_broadcast_add_dynamic(self):\n+ torch._dynamo.reset()\n+ torch._dynamo.config.dynamic_shapes = True\n+\n+ def f(x, y):\n+ return x + y * 2\n+\n+ x = torch.randn(5, 1)\n+ y = torch.randn(1, 8)\n+ torch._dynamo.mark_dynamic(x, 0)\n+ torch._dynamo.mark_dynamic(y, 1)\n+\n+ torch.compile(f)(x, y)\n+ self._exec_and_verify_payload()\n+\n+ def test_toy_model_basic(self):\n+ model = ToyModel(input_size=8, hidden_size=16, output_size=4)\n+ model.eval() # Set to eval mode to avoid dropout randomness\n+\n+ x = torch.randn(3, 8)\n+ torch.compile(model)(x)\n+ self._exec_and_verify_payload()\n+\n+ def test_toy_model_batch_processing(self):\n+ model = ToyModel(input_size=12, hidden_size=24, output_size=6)\n+ model.eval()\n+\n+ x = torch.randn(16, 12)\n+ torch.compile(model)(x)\n+ self._exec_and_verify_payload()\n+\n+ def test_toy_model_dynamic_batch(self):\n+ torch._dynamo.reset()", - "comment_created_at": "2025-06-27T19:59:14+00:00", - "comment_author": "xmfan", - "comment_body": "let's move this to the test `setUp`", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/pytorch-use-accelerator-agnostic-code.json b/_reviewers/pytorch-use-accelerator-agnostic-code.json new file mode 100644 index 0000000..4d225fb --- /dev/null +++ b/_reviewers/pytorch-use-accelerator-agnostic-code.json @@ -0,0 +1,374 @@ +[ + { + "discussion_id": "2177758844", + "pr_number": 156287, + "pr_file": "torch/_functorch/partitioners.py", + "created_at": "2025-07-01T14:28:29+00:00", + "commented_code": "):\n with no_dispatch(), unset_fake_temporarily():\n objects = [[x.name for x in saved_values]]\n # TODO: maybe use a different process group for this\n torch.distributed.broadcast_object_list(objects, src=0)\n saved_values_names = objects[0]\n saved_ops_names_all_ranks: list[list[str]] = [\n [] for _ in range(torch.distributed.get_world_size())\n ]\n\n torch.distributed.all_gather_object(saved_ops_names_all_ranks, objects[0])\n name_to_node = get_name_to_node(joint_graph)\n saved_values = [name_to_node[n] for n in saved_values_names]\n saved_sizes: list[int] = []\n for saved_ops_names in saved_ops_names_all_ranks:\n saved_nodes = [name_to_node[op_name] for op_name in saved_ops_names]\n saved_size = 0\n for node in saved_nodes:\n saved_size += _size_of(node)\n saved_sizes.append(saved_size)\n\n saved_sizes_tensor = torch.tensor(\n saved_sizes, device=torch.cuda.current_device()", + "repo_full_name": "pytorch/pytorch", + "discussion_comments": [ + { + "comment_id": "2177758844", + "repo_full_name": "pytorch/pytorch", + "pr_number": 156287, + "pr_file": "torch/_functorch/partitioners.py", + "discussion_id": "2177758844", + "commented_code": "@@ -2515,11 +2515,38 @@ def has_same_nodes(joint_graph):\n ):\n with no_dispatch(), unset_fake_temporarily():\n objects = [[x.name for x in saved_values]]\n- # TODO: maybe use a different process group for this\n- torch.distributed.broadcast_object_list(objects, src=0)\n- saved_values_names = objects[0]\n+ saved_ops_names_all_ranks: list[list[str]] = [\n+ [] for _ in range(torch.distributed.get_world_size())\n+ ]\n+\n+ torch.distributed.all_gather_object(saved_ops_names_all_ranks, objects[0])\n name_to_node = get_name_to_node(joint_graph)\n- saved_values = [name_to_node[n] for n in saved_values_names]\n+ saved_sizes: list[int] = []\n+ for saved_ops_names in saved_ops_names_all_ranks:\n+ saved_nodes = [name_to_node[op_name] for op_name in saved_ops_names]\n+ saved_size = 0\n+ for node in saved_nodes:\n+ saved_size += _size_of(node)\n+ saved_sizes.append(saved_size)\n+\n+ saved_sizes_tensor = torch.tensor(\n+ saved_sizes, device=torch.cuda.current_device()", + "comment_created_at": "2025-07-01T14:28:29+00:00", + "comment_author": "bdhirsh", + "comment_body": "Instead of hardcoding the cuda device here, can we try to use the same device that our collectives are going to use?\r\n\r\nMaybe @wconstab knows of a cleaner way, but it looks like `all_gather_object` figures out what device to use with [this](https://github.com/pytorch/pytorch/blob/main/torch/distributed/distributed_c10d.py#L3153C5-L3153C52)\r\n```\r\ncurrent_device = _get_object_coll_device(_group_or_default_group())\r\n```", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2179128797", + "pr_number": 155108, + "pr_file": "test/distributed/pipelining/test_backward.py", + "created_at": "2025-07-02T05:14:19+00:00", + "commented_code": "stage_backward_input,\n stage_backward_weight,\n)\nfrom torch.testing._internal.common_device_type import instantiate_device_type_tests\nfrom torch.testing._internal.common_utils import run_tests, TestCase\n\n\nd_hid = 512\nbatch_size = 256\n\ndevice = torch.accelerator.current_accelerator().type", + "repo_full_name": "pytorch/pytorch", + "discussion_comments": [ + { + "comment_id": "2179128797", + "repo_full_name": "pytorch/pytorch", + "pr_number": 155108, + "pr_file": "test/distributed/pipelining/test_backward.py", + "discussion_id": "2179128797", + "commented_code": "@@ -10,16 +10,17 @@\n stage_backward_input,\n stage_backward_weight,\n )\n-from torch.testing._internal.common_device_type import instantiate_device_type_tests\n from torch.testing._internal.common_utils import run_tests, TestCase\n \n \n d_hid = 512\n batch_size = 256\n \n+device = torch.accelerator.current_accelerator().type", + "comment_created_at": "2025-07-02T05:14:19+00:00", + "comment_author": "guangyey", + "comment_body": "This will fail on the CPU-only machine.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2179130007", + "pr_number": 155108, + "pr_file": "test/distributed/pipelining/test_backward.py", + "created_at": "2025-07-02T05:15:18+00:00", + "commented_code": "stage_backward_input,\n stage_backward_weight,\n)\nfrom torch.testing._internal.common_device_type import instantiate_device_type_tests\nfrom torch.testing._internal.common_utils import run_tests, TestCase\n\n\nd_hid = 512\nbatch_size = 256\n\ndevice = torch.accelerator.current_accelerator().type", + "repo_full_name": "pytorch/pytorch", + "discussion_comments": [ + { + "comment_id": "2179130007", + "repo_full_name": "pytorch/pytorch", + "pr_number": 155108, + "pr_file": "test/distributed/pipelining/test_backward.py", + "discussion_id": "2179130007", + "commented_code": "@@ -10,16 +10,17 @@\n stage_backward_input,\n stage_backward_weight,\n )\n-from torch.testing._internal.common_device_type import instantiate_device_type_tests\n from torch.testing._internal.common_utils import run_tests, TestCase\n \n \n d_hid = 512\n batch_size = 256\n \n+device = torch.accelerator.current_accelerator().type", + "comment_created_at": "2025-07-02T05:15:18+00:00", + "comment_author": "guangyey", + "comment_body": "```suggestion\r\ndevice = acc.type if (acc := torch.accelerator.current_accelerator()) else \"cpu\"\r\n```", + "pr_file_module": null + }, + { + "comment_id": "2179135225", + "repo_full_name": "pytorch/pytorch", + "pr_number": 155108, + "pr_file": "test/distributed/pipelining/test_backward.py", + "discussion_id": "2179130007", + "commented_code": "@@ -10,16 +10,17 @@\n stage_backward_input,\n stage_backward_weight,\n )\n-from torch.testing._internal.common_device_type import instantiate_device_type_tests\n from torch.testing._internal.common_utils import run_tests, TestCase\n \n \n d_hid = 512\n batch_size = 256\n \n+device = torch.accelerator.current_accelerator().type", + "comment_created_at": "2025-07-02T05:19:14+00:00", + "comment_author": "AnantGulati", + "comment_body": "As per my understanding, since pipeline parallelism is dependent on communication collectives supported only for accelerator backends, not by GLOO hence we would not want to test on CPU\r\n\r\n@H-Huang Could you please confirm\r\n\r\nThanks ", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2176247028", + "pr_number": 157317, + "pr_file": "torch/distributed/tensor/examples/visualize_sharding_example.py", + "created_at": "2025-07-01T01:56:31+00:00", + "commented_code": "assert int(os.getenv(\"WORLD_SIZE\", \"1\")) >= 4, \"We need at least 4 devices\"\nrank = int(os.environ[\"RANK\"])\n\ndef get_device_type() -> str:\n return (\n torch.accelerator.current_accelerator().type\n if torch.accelerator.current_accelerator() and torch.accelerator.device_count()\n else \"cpu\"\n )", + "repo_full_name": "pytorch/pytorch", + "discussion_comments": [ + { + "comment_id": "2176247028", + "repo_full_name": "pytorch/pytorch", + "pr_number": 157317, + "pr_file": "torch/distributed/tensor/examples/visualize_sharding_example.py", + "discussion_id": "2176247028", + "commented_code": "@@ -17,6 +17,15 @@\n assert int(os.getenv(\"WORLD_SIZE\", \"1\")) >= 4, \"We need at least 4 devices\"\n rank = int(os.environ[\"RANK\"])\n \n+def get_device_type() -> str:\n+ return (\n+ torch.accelerator.current_accelerator().type\n+ if torch.accelerator.current_accelerator() and torch.accelerator.device_count()\n+ else \"cpu\"\n+ )", + "comment_created_at": "2025-07-01T01:56:31+00:00", + "comment_author": "guangyey", + "comment_body": "```suggestion\r\n return acc.type if (acc := torch.accelerator.current_accelerator(True)) else \"cpu\"\r\n\r\n```", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2176248163", + "pr_number": 157317, + "pr_file": "torch/distributed/tensor/examples/comm_mode_features_example.py", + "created_at": "2025-07-01T01:58:07+00:00", + "commented_code": "def get_device_type() -> str:\n return (\n \"cuda\"\n if torch.cuda.is_available() and torch.cuda.device_count() >= 4\n torch.accelerator.current_accelerator().type\n if torch.accelerator.current_accelerator() and torch.accelerator.device_count() >= 4", + "repo_full_name": "pytorch/pytorch", + "discussion_comments": [ + { + "comment_id": "2176248163", + "repo_full_name": "pytorch/pytorch", + "pr_number": 157317, + "pr_file": "torch/distributed/tensor/examples/comm_mode_features_example.py", + "discussion_id": "2176248163", + "commented_code": "@@ -28,12 +28,13 @@\n \n def get_device_type() -> str:\n return (\n- \"cuda\"\n- if torch.cuda.is_available() and torch.cuda.device_count() >= 4\n+ torch.accelerator.current_accelerator().type\n+ if torch.accelerator.current_accelerator() and torch.accelerator.device_count() >= 4", + "comment_created_at": "2025-07-01T01:58:07+00:00", + "comment_author": "guangyey", + "comment_body": "```suggestion\r\n if torch.accelerator.device_count() >= 4\r\n```", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2019529880", + "pr_number": 149697, + "pr_file": "torch/_inductor/scheduler.py", + "created_at": "2025-03-28T22:54:56+00:00", + "commented_code": "return buf_byte_accesses\n\n @cache_on_self\n def estimate_flops(self) -> int | None:\n from torch._subclasses.fake_tensor import FakeTensorMode\n from torch.utils.flop_counter import FlopCounterMode\n\n op = kernel_name_to_op.get(getattr(self.node, \"python_kernel_name\", \"\"), None)", + "repo_full_name": "pytorch/pytorch", + "discussion_comments": [ + { + "comment_id": "2019529880", + "repo_full_name": "pytorch/pytorch", + "pr_number": 149697, + "pr_file": "torch/_inductor/scheduler.py", + "discussion_id": "2019529880", + "commented_code": "@@ -736,6 +737,48 @@ def get_buf_bytes(\n \n return buf_byte_accesses\n \n+ @cache_on_self\n+ def estimate_flops(self) -> int | None:\n+ from torch._subclasses.fake_tensor import FakeTensorMode\n+ from torch.utils.flop_counter import FlopCounterMode\n+\n+ op = kernel_name_to_op.get(getattr(self.node, \"python_kernel_name\", \"\"), None)", + "comment_created_at": "2025-03-28T22:54:56+00:00", + "comment_author": "eellison", + "comment_body": "this should handle triton templates as well.", + "pr_file_module": null + }, + { + "comment_id": "2019538427", + "repo_full_name": "pytorch/pytorch", + "pr_number": 149697, + "pr_file": "torch/_inductor/scheduler.py", + "discussion_id": "2019529880", + "commented_code": "@@ -736,6 +737,48 @@ def get_buf_bytes(\n \n return buf_byte_accesses\n \n+ @cache_on_self\n+ def estimate_flops(self) -> int | None:\n+ from torch._subclasses.fake_tensor import FakeTensorMode\n+ from torch.utils.flop_counter import FlopCounterMode\n+\n+ op = kernel_name_to_op.get(getattr(self.node, \"python_kernel_name\", \"\"), None)", + "comment_created_at": "2025-03-28T23:05:13+00:00", + "comment_author": "eellison", + "comment_body": "can you check that we preserve flops with fusion, say mm + relu?", + "pr_file_module": null + }, + { + "comment_id": "2025682278", + "repo_full_name": "pytorch/pytorch", + "pr_number": 149697, + "pr_file": "torch/_inductor/scheduler.py", + "discussion_id": "2019529880", + "commented_code": "@@ -736,6 +737,48 @@ def get_buf_bytes(\n \n return buf_byte_accesses\n \n+ @cache_on_self\n+ def estimate_flops(self) -> int | None:\n+ from torch._subclasses.fake_tensor import FakeTensorMode\n+ from torch.utils.flop_counter import FlopCounterMode\n+\n+ op = kernel_name_to_op.get(getattr(self.node, \"python_kernel_name\", \"\"), None)", + "comment_created_at": "2025-04-02T22:22:55+00:00", + "comment_author": "exclamaforte", + "comment_body": "added", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2096875232", + "pr_number": 153213, + "pr_file": "torch/distributed/tensor/examples/visualize_sharding_example.py", + "created_at": "2025-05-20T04:17:10+00:00", + "commented_code": "assert int(os.getenv(\"WORLD_SIZE\", \"1\")) >= 4, \"We need at least 4 devices\"\nrank = int(os.environ[\"RANK\"])\n\ndevice_type = 'cpu' if not torch.accelerator.current_accelerator() else torch.accelerator.current_accelerator().type", + "repo_full_name": "pytorch/pytorch", + "discussion_comments": [ + { + "comment_id": "2096875232", + "repo_full_name": "pytorch/pytorch", + "pr_number": 153213, + "pr_file": "torch/distributed/tensor/examples/visualize_sharding_example.py", + "discussion_id": "2096875232", + "commented_code": "@@ -17,6 +17,8 @@\n assert int(os.getenv(\"WORLD_SIZE\", \"1\")) >= 4, \"We need at least 4 devices\"\n rank = int(os.environ[\"RANK\"])\n \n+device_type = 'cpu' if not torch.accelerator.current_accelerator() else torch.accelerator.current_accelerator().type", + "comment_created_at": "2025-05-20T04:17:10+00:00", + "comment_author": "guangyey", + "comment_body": "```suggestion\r\ndevice_type = 'cpu' if not torch.accelerator.is_available() else torch.accelerator.current_accelerator().type\r\n```", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1836158932", + "pr_number": 137846, + "pr_file": "torch/utils/collect_env.py", + "created_at": "2024-11-11T08:55:41+00:00", + "commented_code": "debug_mode_str = str(torch.version.debug)\n cuda_available_str = str(torch.cuda.is_available())\n cuda_version_str = torch.version.cuda\n xpu_available_str = str(torch.xpu.is_available())", + "repo_full_name": "pytorch/pytorch", + "discussion_comments": [ + { + "comment_id": "1836158932", + "repo_full_name": "pytorch/pytorch", + "pr_number": 137846, + "pr_file": "torch/utils/collect_env.py", + "discussion_id": "1836158932", + "commented_code": "@@ -458,6 +592,7 @@ def get_env_info():\n debug_mode_str = str(torch.version.debug)\n cuda_available_str = str(torch.cuda.is_available())\n cuda_version_str = torch.version.cuda\n+ xpu_available_str = str(torch.xpu.is_available())", + "comment_created_at": "2024-11-11T08:55:41+00:00", + "comment_author": "guangyey", + "comment_body": "Now, we can use `torch.version.xpu` here.", + "pr_file_module": null + }, + { + "comment_id": "1837584799", + "repo_full_name": "pytorch/pytorch", + "pr_number": 137846, + "pr_file": "torch/utils/collect_env.py", + "discussion_id": "1836158932", + "commented_code": "@@ -458,6 +592,7 @@ def get_env_info():\n debug_mode_str = str(torch.version.debug)\n cuda_available_str = str(torch.cuda.is_available())\n cuda_version_str = torch.version.cuda\n+ xpu_available_str = str(torch.xpu.is_available())", + "comment_created_at": "2024-11-12T07:14:02+00:00", + "comment_author": "jingxu10", + "comment_body": "added.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1837866602", + "pr_number": 137846, + "pr_file": "torch/utils/collect_env.py", + "created_at": "2024-11-12T10:33:50+00:00", + "commented_code": "hip_runtime_version = get_version_or_na(cfg, 'HIP Runtime')\n miopen_runtime_version = get_version_or_na(cfg, 'MIOpen')\n cuda_version_str = 'N/A'\n xpu_version_str = 'N/A'\n hip_compiled_version = torch.version.hip\n else:\n version_str = debug_mode_str = cuda_available_str = cuda_version_str = 'N/A'\n version_str = debug_mode_str = cuda_available_str = cuda_version_str = xpu_available_str = xpu_version_str = 'N/A'", + "repo_full_name": "pytorch/pytorch", + "discussion_comments": [ + { + "comment_id": "1837866602", + "repo_full_name": "pytorch/pytorch", + "pr_number": 137846, + "pr_file": "torch/utils/collect_env.py", + "discussion_id": "1837866602", + "commented_code": "@@ -511,9 +645,10 @@ def get_version_or_na(cfg, prefix):\n hip_runtime_version = get_version_or_na(cfg, 'HIP Runtime')\n miopen_runtime_version = get_version_or_na(cfg, 'MIOpen')\n cuda_version_str = 'N/A'\n+ xpu_version_str = 'N/A'\n hip_compiled_version = torch.version.hip\n else:\n- version_str = debug_mode_str = cuda_available_str = cuda_version_str = 'N/A'\n+ version_str = debug_mode_str = cuda_available_str = cuda_version_str = xpu_available_str = xpu_version_str = 'N/A'", + "comment_created_at": "2024-11-12T10:33:50+00:00", + "comment_author": "guangyey", + "comment_body": "```suggestion\r\n version_str = debug_mode_str = cuda_available_str = cuda_version_str = xpu_version_str = 'N/A'\r\n```", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2023970199", + "pr_number": 137846, + "pr_file": "torch/utils/collect_env.py", + "created_at": "2025-04-02T02:39:13+00:00", + "commented_code": "return smi\n\n\ndef get_intel_gpu_onboard(run_lambda):\n lst = []\n platform = get_platform()\n if platform == \"linux\":\n txt = run_and_read_all(run_lambda, \"xpu-smi discovery -j\")\n if txt:\n try:\n obj = json.loads(txt)\n if type(obj[\"device_list\"]) is list:\n for o in obj[\"device_list\"]:\n lst.append(f'* {o[\"device_name\"]}')\n else:\n lst.append(\"N/A\")\n except (ValueError, TypeError) as e:\n lst.append(str(e))\n else:\n lst.append(\"N/A\")\n if platform == \"win32\" or platform == \"cygwin\":\n txt = run_and_read_all(\n run_lambda,\n 'powershell.exe \"gwmi -Class Win32_PnpSignedDriver | where{$_.DeviceClass -eq \\\\\"DISPLAY\\\\\"\\\n -and $_.Manufacturer -match \\\\\"Intel\\\\\"} | Select-Object -Property DeviceName | ConvertTo-Json\"',\n )\n try:\n obj = json.loads(txt)\n if type(obj) is list:\n for o in obj:\n lst.append(f'* {o[\"DeviceName\"]}')\n else:\n lst.append(f'* {obj[\"DeviceName\"]}')\n except ValueError as e:\n lst.append(txt)\n lst.append(str(e))\n return \"\\n\".join(lst)\n\n\ndef get_intel_gpu_detected(run_lambda):\n if TORCH_AVAILABLE:\n devices = [\n f\"* [{i}] {torch.xpu.get_device_properties(i)}\"\n for i in range(torch.xpu.device_count())\n ]\n if len(devices) > 0:\n return \"\\n\".join(devices)\n else:\n return \"N/A\"\n else:\n return \"N/A\"", + "repo_full_name": "pytorch/pytorch", + "discussion_comments": [ + { + "comment_id": "2023970199", + "repo_full_name": "pytorch/pytorch", + "pr_number": 137846, + "pr_file": "torch/utils/collect_env.py", + "discussion_id": "2023970199", + "commented_code": "@@ -243,6 +247,56 @@ def get_nvidia_smi():\n return smi\n \n \n+def get_intel_gpu_onboard(run_lambda):\n+ lst = []\n+ platform = get_platform()\n+ if platform == \"linux\":\n+ txt = run_and_read_all(run_lambda, \"xpu-smi discovery -j\")\n+ if txt:\n+ try:\n+ obj = json.loads(txt)\n+ if type(obj[\"device_list\"]) is list:\n+ for o in obj[\"device_list\"]:\n+ lst.append(f'* {o[\"device_name\"]}')\n+ else:\n+ lst.append(\"N/A\")\n+ except (ValueError, TypeError) as e:\n+ lst.append(str(e))\n+ else:\n+ lst.append(\"N/A\")\n+ if platform == \"win32\" or platform == \"cygwin\":\n+ txt = run_and_read_all(\n+ run_lambda,\n+ 'powershell.exe \"gwmi -Class Win32_PnpSignedDriver | where{$_.DeviceClass -eq \\\\\"DISPLAY\\\\\"\\\n+ -and $_.Manufacturer -match \\\\\"Intel\\\\\"} | Select-Object -Property DeviceName | ConvertTo-Json\"',\n+ )\n+ try:\n+ obj = json.loads(txt)\n+ if type(obj) is list:\n+ for o in obj:\n+ lst.append(f'* {o[\"DeviceName\"]}')\n+ else:\n+ lst.append(f'* {obj[\"DeviceName\"]}')\n+ except ValueError as e:\n+ lst.append(txt)\n+ lst.append(str(e))\n+ return \"\\n\".join(lst)\n+\n+\n+def get_intel_gpu_detected(run_lambda):\n+ if TORCH_AVAILABLE:\n+ devices = [\n+ f\"* [{i}] {torch.xpu.get_device_properties(i)}\"\n+ for i in range(torch.xpu.device_count())\n+ ]\n+ if len(devices) > 0:\n+ return \"\\n\".join(devices)\n+ else:\n+ return \"N/A\"\n+ else:\n+ return \"N/A\"", + "comment_created_at": "2025-04-02T02:39:13+00:00", + "comment_author": "guangyey", + "comment_body": "```suggestion\r\n if not TORCH_AVAILABLE:\r\n return \"N/A\"\r\n\r\n device_count = torch.xpu.device_count()\r\n if device_count == 0:\r\n return \"N/A\"\r\n\r\n devices = [f\"* [{i}] {torch.xpu.get_device_properties(i)}\" for i in range(device_count)]\r\n return \"\\n\".join(devices)\r\n```", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1942979749", + "pr_number": 145136, + "pr_file": "test/inductor/test_torchinductor.py", + "created_at": "2025-02-05T13:49:25+00:00", + "commented_code": "):\n c_op(x, kernel_size=2, stride=2)\n\n @torch._dynamo.config.patch(recompile_limit=12)\n def test_conv_errors_with_uint(self):\n for dim in (1, 2, 3):\n for dtype in (torch.uint8, torch.uint16, torch.uint32, torch.uint64):\n input_shape = [1, 8] + [64] * dim\n x = torch.randn(input_shape).to(dtype).cuda()\n conv_weight = (torch.ones(8, 1, *([4] * dim)) / (4 ** dim)).cuda()", + "repo_full_name": "pytorch/pytorch", + "discussion_comments": [ + { + "comment_id": "1942979749", + "repo_full_name": "pytorch/pytorch", + "pr_number": 145136, + "pr_file": "test/inductor/test_torchinductor.py", + "discussion_id": "1942979749", + "commented_code": "@@ -6316,6 +6316,21 @@ def test_avg_pool_errors_with_uint(self):\n ):\n c_op(x, kernel_size=2, stride=2)\n \n+ @torch._dynamo.config.patch(recompile_limit=12)\n+ def test_conv_errors_with_uint(self):\n+ for dim in (1, 2, 3):\n+ for dtype in (torch.uint8, torch.uint16, torch.uint32, torch.uint64):\n+ input_shape = [1, 8] + [64] * dim\n+ x = torch.randn(input_shape).to(dtype).cuda()\n+ conv_weight = (torch.ones(8, 1, *([4] * dim)) / (4 ** dim)).cuda()", + "comment_created_at": "2025-02-05T13:49:25+00:00", + "comment_author": "etaf", + "comment_body": "Hi, the test_torchinductor.py is shared by GPUs like `cuda`, `xpu`. But you use hard code `cuda` here so this case will failed on other GPUs. I suggest you decorate this case with `requires_cuda` or use `to(GPU_TYPE)` instead of `.cuda`.", + "pr_file_module": null + }, + { + "comment_id": "1944038313", + "repo_full_name": "pytorch/pytorch", + "pr_number": 145136, + "pr_file": "test/inductor/test_torchinductor.py", + "discussion_id": "1942979749", + "commented_code": "@@ -6316,6 +6316,21 @@ def test_avg_pool_errors_with_uint(self):\n ):\n c_op(x, kernel_size=2, stride=2)\n \n+ @torch._dynamo.config.patch(recompile_limit=12)\n+ def test_conv_errors_with_uint(self):\n+ for dim in (1, 2, 3):\n+ for dtype in (torch.uint8, torch.uint16, torch.uint32, torch.uint64):\n+ input_shape = [1, 8] + [64] * dim\n+ x = torch.randn(input_shape).to(dtype).cuda()\n+ conv_weight = (torch.ones(8, 1, *([4] * dim)) / (4 ** dim)).cuda()", + "comment_created_at": "2025-02-06T03:21:34+00:00", + "comment_author": "shaoyuyoung", + "comment_body": "really thanks, I'll do this :)", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2089800822", + "pr_number": 150218, + "pr_file": "torch/utils/dlpack.py", + "created_at": "2025-05-14T21:46:12+00:00", + "commented_code": "\"\"\"\n if hasattr(ext_tensor, '__dlpack__'):\n kwargs: dict[str, Any] = {}\n kwargs[\"max_version\"] = (1, 0)\n\n device = ext_tensor.__dlpack_device__()\n # device is either CUDA or ROCm, we need to pass the current\n kwargs[\"max_version\"] = (1, 0)\n kwargs[\"copy\"] = copy\n\n # Parse the device parameter.\n # At this moment, it can either be a torch.device or a str representing\n # a torch.device, e.g. \"cpu\", \"cuda\", etc.\n if device is not None:\n if isinstance(device, str):\n device = torch.device(device)\n assert isinstance(device, torch.device), (\n f\"from_dlpack: unsupported device type: {type(device)}\"\n )\n device = torch._C._torchDeviceToDLDevice(device)\n\n kwargs[\"dl_device\"] = device\n\n ext_device = ext_tensor.__dlpack_device__()\n # ext_device is either CUDA or ROCm, we need to pass the current", + "repo_full_name": "pytorch/pytorch", + "discussion_comments": [ + { + "comment_id": "2089800822", + "repo_full_name": "pytorch/pytorch", + "pr_number": 150218, + "pr_file": "torch/utils/dlpack.py", + "discussion_id": "2089800822", + "commented_code": "@@ -107,19 +107,34 @@ def from_dlpack(ext_tensor: Any) -> 'torch.Tensor':\n \"\"\"\n if hasattr(ext_tensor, '__dlpack__'):\n kwargs: dict[str, Any] = {}\n- kwargs[\"max_version\"] = (1, 0)\n \n- device = ext_tensor.__dlpack_device__()\n- # device is either CUDA or ROCm, we need to pass the current\n+ kwargs[\"max_version\"] = (1, 0)\n+ kwargs[\"copy\"] = copy\n+\n+ # Parse the device parameter.\n+ # At this moment, it can either be a torch.device or a str representing\n+ # a torch.device, e.g. \"cpu\", \"cuda\", etc.\n+ if device is not None:\n+ if isinstance(device, str):\n+ device = torch.device(device)\n+ assert isinstance(device, torch.device), (\n+ f\"from_dlpack: unsupported device type: {type(device)}\"\n+ )\n+ device = torch._C._torchDeviceToDLDevice(device)\n+\n+ kwargs[\"dl_device\"] = device\n+\n+ ext_device = ext_tensor.__dlpack_device__()\n+ # ext_device is either CUDA or ROCm, we need to pass the current", + "comment_created_at": "2025-05-14T21:46:12+00:00", + "comment_author": "albanD", + "comment_body": "Note that a lot of this string handling should get generalized to any device which is an accelerator in PT.\r\nOk to do later, but as we're going to see issues with hip or xpu, we should just refactor it together", + "pr_file_module": null + }, + { + "comment_id": "2105841400", + "repo_full_name": "pytorch/pytorch", + "pr_number": 150218, + "pr_file": "torch/utils/dlpack.py", + "discussion_id": "2089800822", + "commented_code": "@@ -107,19 +107,34 @@ def from_dlpack(ext_tensor: Any) -> 'torch.Tensor':\n \"\"\"\n if hasattr(ext_tensor, '__dlpack__'):\n kwargs: dict[str, Any] = {}\n- kwargs[\"max_version\"] = (1, 0)\n \n- device = ext_tensor.__dlpack_device__()\n- # device is either CUDA or ROCm, we need to pass the current\n+ kwargs[\"max_version\"] = (1, 0)\n+ kwargs[\"copy\"] = copy\n+\n+ # Parse the device parameter.\n+ # At this moment, it can either be a torch.device or a str representing\n+ # a torch.device, e.g. \"cpu\", \"cuda\", etc.\n+ if device is not None:\n+ if isinstance(device, str):\n+ device = torch.device(device)\n+ assert isinstance(device, torch.device), (\n+ f\"from_dlpack: unsupported device type: {type(device)}\"\n+ )\n+ device = torch._C._torchDeviceToDLDevice(device)\n+\n+ kwargs[\"dl_device\"] = device\n+\n+ ext_device = ext_tensor.__dlpack_device__()\n+ # ext_device is either CUDA or ROCm, we need to pass the current", + "comment_created_at": "2025-05-24T14:35:39+00:00", + "comment_author": "ysiraichi", + "comment_body": "Could you help me understand what string handling you are referring to? Doesn't `torch.device(device)` work with any accelerator in PT?", + "pr_file_module": null + }, + { + "comment_id": "2150373765", + "repo_full_name": "pytorch/pytorch", + "pr_number": 150218, + "pr_file": "torch/utils/dlpack.py", + "discussion_id": "2089800822", + "commented_code": "@@ -107,19 +107,34 @@ def from_dlpack(ext_tensor: Any) -> 'torch.Tensor':\n \"\"\"\n if hasattr(ext_tensor, '__dlpack__'):\n kwargs: dict[str, Any] = {}\n- kwargs[\"max_version\"] = (1, 0)\n \n- device = ext_tensor.__dlpack_device__()\n- # device is either CUDA or ROCm, we need to pass the current\n+ kwargs[\"max_version\"] = (1, 0)\n+ kwargs[\"copy\"] = copy\n+\n+ # Parse the device parameter.\n+ # At this moment, it can either be a torch.device or a str representing\n+ # a torch.device, e.g. \"cpu\", \"cuda\", etc.\n+ if device is not None:\n+ if isinstance(device, str):\n+ device = torch.device(device)\n+ assert isinstance(device, torch.device), (\n+ f\"from_dlpack: unsupported device type: {type(device)}\"\n+ )\n+ device = torch._C._torchDeviceToDLDevice(device)\n+\n+ kwargs[\"dl_device\"] = device\n+\n+ ext_device = ext_tensor.__dlpack_device__()\n+ # ext_device is either CUDA or ROCm, we need to pass the current", + "comment_created_at": "2025-06-16T16:07:55+00:00", + "comment_author": "albanD", + "comment_body": "Only the fact that you do custom processing only for CUDA and ROCm but we have quite a few other accelerators that would require the same treatment in the future.", + "pr_file_module": null + }, + { + "comment_id": "2158864989", + "repo_full_name": "pytorch/pytorch", + "pr_number": 150218, + "pr_file": "torch/utils/dlpack.py", + "discussion_id": "2089800822", + "commented_code": "@@ -107,19 +107,34 @@ def from_dlpack(ext_tensor: Any) -> 'torch.Tensor':\n \"\"\"\n if hasattr(ext_tensor, '__dlpack__'):\n kwargs: dict[str, Any] = {}\n- kwargs[\"max_version\"] = (1, 0)\n \n- device = ext_tensor.__dlpack_device__()\n- # device is either CUDA or ROCm, we need to pass the current\n+ kwargs[\"max_version\"] = (1, 0)\n+ kwargs[\"copy\"] = copy\n+\n+ # Parse the device parameter.\n+ # At this moment, it can either be a torch.device or a str representing\n+ # a torch.device, e.g. \"cpu\", \"cuda\", etc.\n+ if device is not None:\n+ if isinstance(device, str):\n+ device = torch.device(device)\n+ assert isinstance(device, torch.device), (\n+ f\"from_dlpack: unsupported device type: {type(device)}\"\n+ )\n+ device = torch._C._torchDeviceToDLDevice(device)\n+\n+ kwargs[\"dl_device\"] = device\n+\n+ ext_device = ext_tensor.__dlpack_device__()\n+ # ext_device is either CUDA or ROCm, we need to pass the current", + "comment_created_at": "2025-06-20T12:37:17+00:00", + "comment_author": "ysiraichi", + "comment_body": "Since I'm not familiar with these other accelerators, I think I will leave it for a future PR. In this one, I'm simply adding support for the extra keywords (in this case, passing them through to `__dlpack__()` method). What do you think?", + "pr_file_module": null + }, + { + "comment_id": "2159671574", + "repo_full_name": "pytorch/pytorch", + "pr_number": 150218, + "pr_file": "torch/utils/dlpack.py", + "discussion_id": "2089800822", + "commented_code": "@@ -107,19 +107,34 @@ def from_dlpack(ext_tensor: Any) -> 'torch.Tensor':\n \"\"\"\n if hasattr(ext_tensor, '__dlpack__'):\n kwargs: dict[str, Any] = {}\n- kwargs[\"max_version\"] = (1, 0)\n \n- device = ext_tensor.__dlpack_device__()\n- # device is either CUDA or ROCm, we need to pass the current\n+ kwargs[\"max_version\"] = (1, 0)\n+ kwargs[\"copy\"] = copy\n+\n+ # Parse the device parameter.\n+ # At this moment, it can either be a torch.device or a str representing\n+ # a torch.device, e.g. \"cpu\", \"cuda\", etc.\n+ if device is not None:\n+ if isinstance(device, str):\n+ device = torch.device(device)\n+ assert isinstance(device, torch.device), (\n+ f\"from_dlpack: unsupported device type: {type(device)}\"\n+ )\n+ device = torch._C._torchDeviceToDLDevice(device)\n+\n+ kwargs[\"dl_device\"] = device\n+\n+ ext_device = ext_tensor.__dlpack_device__()\n+ # ext_device is either CUDA or ROCm, we need to pass the current", + "comment_created_at": "2025-06-20T21:13:52+00:00", + "comment_author": "albanD", + "comment_body": "Yes let's just open an issue for it and add the xpu and privateuse1 labels on it. ", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/pytorch-use-accelerator-agnostic-code.md b/_reviewers/pytorch-use-accelerator-agnostic-code.md index ec7ce63..fe66449 100644 --- a/_reviewers/pytorch-use-accelerator-agnostic-code.md +++ b/_reviewers/pytorch-use-accelerator-agnostic-code.md @@ -52,379 +52,3 @@ def test_feature(): ``` By writing accelerator-agnostic code, you ensure your PyTorch code works consistently across different hardware platforms without modification. - - -[ - { - "discussion_id": "2177758844", - "pr_number": 156287, - "pr_file": "torch/_functorch/partitioners.py", - "created_at": "2025-07-01T14:28:29+00:00", - "commented_code": "):\n with no_dispatch(), unset_fake_temporarily():\n objects = [[x.name for x in saved_values]]\n # TODO: maybe use a different process group for this\n torch.distributed.broadcast_object_list(objects, src=0)\n saved_values_names = objects[0]\n saved_ops_names_all_ranks: list[list[str]] = [\n [] for _ in range(torch.distributed.get_world_size())\n ]\n\n torch.distributed.all_gather_object(saved_ops_names_all_ranks, objects[0])\n name_to_node = get_name_to_node(joint_graph)\n saved_values = [name_to_node[n] for n in saved_values_names]\n saved_sizes: list[int] = []\n for saved_ops_names in saved_ops_names_all_ranks:\n saved_nodes = [name_to_node[op_name] for op_name in saved_ops_names]\n saved_size = 0\n for node in saved_nodes:\n saved_size += _size_of(node)\n saved_sizes.append(saved_size)\n\n saved_sizes_tensor = torch.tensor(\n saved_sizes, device=torch.cuda.current_device()", - "repo_full_name": "pytorch/pytorch", - "discussion_comments": [ - { - "comment_id": "2177758844", - "repo_full_name": "pytorch/pytorch", - "pr_number": 156287, - "pr_file": "torch/_functorch/partitioners.py", - "discussion_id": "2177758844", - "commented_code": "@@ -2515,11 +2515,38 @@ def has_same_nodes(joint_graph):\n ):\n with no_dispatch(), unset_fake_temporarily():\n objects = [[x.name for x in saved_values]]\n- # TODO: maybe use a different process group for this\n- torch.distributed.broadcast_object_list(objects, src=0)\n- saved_values_names = objects[0]\n+ saved_ops_names_all_ranks: list[list[str]] = [\n+ [] for _ in range(torch.distributed.get_world_size())\n+ ]\n+\n+ torch.distributed.all_gather_object(saved_ops_names_all_ranks, objects[0])\n name_to_node = get_name_to_node(joint_graph)\n- saved_values = [name_to_node[n] for n in saved_values_names]\n+ saved_sizes: list[int] = []\n+ for saved_ops_names in saved_ops_names_all_ranks:\n+ saved_nodes = [name_to_node[op_name] for op_name in saved_ops_names]\n+ saved_size = 0\n+ for node in saved_nodes:\n+ saved_size += _size_of(node)\n+ saved_sizes.append(saved_size)\n+\n+ saved_sizes_tensor = torch.tensor(\n+ saved_sizes, device=torch.cuda.current_device()", - "comment_created_at": "2025-07-01T14:28:29+00:00", - "comment_author": "bdhirsh", - "comment_body": "Instead of hardcoding the cuda device here, can we try to use the same device that our collectives are going to use?\r\n\r\nMaybe @wconstab knows of a cleaner way, but it looks like `all_gather_object` figures out what device to use with [this](https://github.com/pytorch/pytorch/blob/main/torch/distributed/distributed_c10d.py#L3153C5-L3153C52)\r\n```\r\ncurrent_device = _get_object_coll_device(_group_or_default_group())\r\n```", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2179128797", - "pr_number": 155108, - "pr_file": "test/distributed/pipelining/test_backward.py", - "created_at": "2025-07-02T05:14:19+00:00", - "commented_code": "stage_backward_input,\n stage_backward_weight,\n)\nfrom torch.testing._internal.common_device_type import instantiate_device_type_tests\nfrom torch.testing._internal.common_utils import run_tests, TestCase\n\n\nd_hid = 512\nbatch_size = 256\n\ndevice = torch.accelerator.current_accelerator().type", - "repo_full_name": "pytorch/pytorch", - "discussion_comments": [ - { - "comment_id": "2179128797", - "repo_full_name": "pytorch/pytorch", - "pr_number": 155108, - "pr_file": "test/distributed/pipelining/test_backward.py", - "discussion_id": "2179128797", - "commented_code": "@@ -10,16 +10,17 @@\n stage_backward_input,\n stage_backward_weight,\n )\n-from torch.testing._internal.common_device_type import instantiate_device_type_tests\n from torch.testing._internal.common_utils import run_tests, TestCase\n \n \n d_hid = 512\n batch_size = 256\n \n+device = torch.accelerator.current_accelerator().type", - "comment_created_at": "2025-07-02T05:14:19+00:00", - "comment_author": "guangyey", - "comment_body": "This will fail on the CPU-only machine.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2179130007", - "pr_number": 155108, - "pr_file": "test/distributed/pipelining/test_backward.py", - "created_at": "2025-07-02T05:15:18+00:00", - "commented_code": "stage_backward_input,\n stage_backward_weight,\n)\nfrom torch.testing._internal.common_device_type import instantiate_device_type_tests\nfrom torch.testing._internal.common_utils import run_tests, TestCase\n\n\nd_hid = 512\nbatch_size = 256\n\ndevice = torch.accelerator.current_accelerator().type", - "repo_full_name": "pytorch/pytorch", - "discussion_comments": [ - { - "comment_id": "2179130007", - "repo_full_name": "pytorch/pytorch", - "pr_number": 155108, - "pr_file": "test/distributed/pipelining/test_backward.py", - "discussion_id": "2179130007", - "commented_code": "@@ -10,16 +10,17 @@\n stage_backward_input,\n stage_backward_weight,\n )\n-from torch.testing._internal.common_device_type import instantiate_device_type_tests\n from torch.testing._internal.common_utils import run_tests, TestCase\n \n \n d_hid = 512\n batch_size = 256\n \n+device = torch.accelerator.current_accelerator().type", - "comment_created_at": "2025-07-02T05:15:18+00:00", - "comment_author": "guangyey", - "comment_body": "```suggestion\r\ndevice = acc.type if (acc := torch.accelerator.current_accelerator()) else \"cpu\"\r\n```", - "pr_file_module": null - }, - { - "comment_id": "2179135225", - "repo_full_name": "pytorch/pytorch", - "pr_number": 155108, - "pr_file": "test/distributed/pipelining/test_backward.py", - "discussion_id": "2179130007", - "commented_code": "@@ -10,16 +10,17 @@\n stage_backward_input,\n stage_backward_weight,\n )\n-from torch.testing._internal.common_device_type import instantiate_device_type_tests\n from torch.testing._internal.common_utils import run_tests, TestCase\n \n \n d_hid = 512\n batch_size = 256\n \n+device = torch.accelerator.current_accelerator().type", - "comment_created_at": "2025-07-02T05:19:14+00:00", - "comment_author": "AnantGulati", - "comment_body": "As per my understanding, since pipeline parallelism is dependent on communication collectives supported only for accelerator backends, not by GLOO hence we would not want to test on CPU\r\n\r\n@H-Huang Could you please confirm\r\n\r\nThanks ", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2176247028", - "pr_number": 157317, - "pr_file": "torch/distributed/tensor/examples/visualize_sharding_example.py", - "created_at": "2025-07-01T01:56:31+00:00", - "commented_code": "assert int(os.getenv(\"WORLD_SIZE\", \"1\")) >= 4, \"We need at least 4 devices\"\nrank = int(os.environ[\"RANK\"])\n\ndef get_device_type() -> str:\n return (\n torch.accelerator.current_accelerator().type\n if torch.accelerator.current_accelerator() and torch.accelerator.device_count()\n else \"cpu\"\n )", - "repo_full_name": "pytorch/pytorch", - "discussion_comments": [ - { - "comment_id": "2176247028", - "repo_full_name": "pytorch/pytorch", - "pr_number": 157317, - "pr_file": "torch/distributed/tensor/examples/visualize_sharding_example.py", - "discussion_id": "2176247028", - "commented_code": "@@ -17,6 +17,15 @@\n assert int(os.getenv(\"WORLD_SIZE\", \"1\")) >= 4, \"We need at least 4 devices\"\n rank = int(os.environ[\"RANK\"])\n \n+def get_device_type() -> str:\n+ return (\n+ torch.accelerator.current_accelerator().type\n+ if torch.accelerator.current_accelerator() and torch.accelerator.device_count()\n+ else \"cpu\"\n+ )", - "comment_created_at": "2025-07-01T01:56:31+00:00", - "comment_author": "guangyey", - "comment_body": "```suggestion\r\n return acc.type if (acc := torch.accelerator.current_accelerator(True)) else \"cpu\"\r\n\r\n```", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2176248163", - "pr_number": 157317, - "pr_file": "torch/distributed/tensor/examples/comm_mode_features_example.py", - "created_at": "2025-07-01T01:58:07+00:00", - "commented_code": "def get_device_type() -> str:\n return (\n \"cuda\"\n if torch.cuda.is_available() and torch.cuda.device_count() >= 4\n torch.accelerator.current_accelerator().type\n if torch.accelerator.current_accelerator() and torch.accelerator.device_count() >= 4", - "repo_full_name": "pytorch/pytorch", - "discussion_comments": [ - { - "comment_id": "2176248163", - "repo_full_name": "pytorch/pytorch", - "pr_number": 157317, - "pr_file": "torch/distributed/tensor/examples/comm_mode_features_example.py", - "discussion_id": "2176248163", - "commented_code": "@@ -28,12 +28,13 @@\n \n def get_device_type() -> str:\n return (\n- \"cuda\"\n- if torch.cuda.is_available() and torch.cuda.device_count() >= 4\n+ torch.accelerator.current_accelerator().type\n+ if torch.accelerator.current_accelerator() and torch.accelerator.device_count() >= 4", - "comment_created_at": "2025-07-01T01:58:07+00:00", - "comment_author": "guangyey", - "comment_body": "```suggestion\r\n if torch.accelerator.device_count() >= 4\r\n```", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2019529880", - "pr_number": 149697, - "pr_file": "torch/_inductor/scheduler.py", - "created_at": "2025-03-28T22:54:56+00:00", - "commented_code": "return buf_byte_accesses\n\n @cache_on_self\n def estimate_flops(self) -> int | None:\n from torch._subclasses.fake_tensor import FakeTensorMode\n from torch.utils.flop_counter import FlopCounterMode\n\n op = kernel_name_to_op.get(getattr(self.node, \"python_kernel_name\", \"\"), None)", - "repo_full_name": "pytorch/pytorch", - "discussion_comments": [ - { - "comment_id": "2019529880", - "repo_full_name": "pytorch/pytorch", - "pr_number": 149697, - "pr_file": "torch/_inductor/scheduler.py", - "discussion_id": "2019529880", - "commented_code": "@@ -736,6 +737,48 @@ def get_buf_bytes(\n \n return buf_byte_accesses\n \n+ @cache_on_self\n+ def estimate_flops(self) -> int | None:\n+ from torch._subclasses.fake_tensor import FakeTensorMode\n+ from torch.utils.flop_counter import FlopCounterMode\n+\n+ op = kernel_name_to_op.get(getattr(self.node, \"python_kernel_name\", \"\"), None)", - "comment_created_at": "2025-03-28T22:54:56+00:00", - "comment_author": "eellison", - "comment_body": "this should handle triton templates as well.", - "pr_file_module": null - }, - { - "comment_id": "2019538427", - "repo_full_name": "pytorch/pytorch", - "pr_number": 149697, - "pr_file": "torch/_inductor/scheduler.py", - "discussion_id": "2019529880", - "commented_code": "@@ -736,6 +737,48 @@ def get_buf_bytes(\n \n return buf_byte_accesses\n \n+ @cache_on_self\n+ def estimate_flops(self) -> int | None:\n+ from torch._subclasses.fake_tensor import FakeTensorMode\n+ from torch.utils.flop_counter import FlopCounterMode\n+\n+ op = kernel_name_to_op.get(getattr(self.node, \"python_kernel_name\", \"\"), None)", - "comment_created_at": "2025-03-28T23:05:13+00:00", - "comment_author": "eellison", - "comment_body": "can you check that we preserve flops with fusion, say mm + relu?", - "pr_file_module": null - }, - { - "comment_id": "2025682278", - "repo_full_name": "pytorch/pytorch", - "pr_number": 149697, - "pr_file": "torch/_inductor/scheduler.py", - "discussion_id": "2019529880", - "commented_code": "@@ -736,6 +737,48 @@ def get_buf_bytes(\n \n return buf_byte_accesses\n \n+ @cache_on_self\n+ def estimate_flops(self) -> int | None:\n+ from torch._subclasses.fake_tensor import FakeTensorMode\n+ from torch.utils.flop_counter import FlopCounterMode\n+\n+ op = kernel_name_to_op.get(getattr(self.node, \"python_kernel_name\", \"\"), None)", - "comment_created_at": "2025-04-02T22:22:55+00:00", - "comment_author": "exclamaforte", - "comment_body": "added", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2096875232", - "pr_number": 153213, - "pr_file": "torch/distributed/tensor/examples/visualize_sharding_example.py", - "created_at": "2025-05-20T04:17:10+00:00", - "commented_code": "assert int(os.getenv(\"WORLD_SIZE\", \"1\")) >= 4, \"We need at least 4 devices\"\nrank = int(os.environ[\"RANK\"])\n\ndevice_type = 'cpu' if not torch.accelerator.current_accelerator() else torch.accelerator.current_accelerator().type", - "repo_full_name": "pytorch/pytorch", - "discussion_comments": [ - { - "comment_id": "2096875232", - "repo_full_name": "pytorch/pytorch", - "pr_number": 153213, - "pr_file": "torch/distributed/tensor/examples/visualize_sharding_example.py", - "discussion_id": "2096875232", - "commented_code": "@@ -17,6 +17,8 @@\n assert int(os.getenv(\"WORLD_SIZE\", \"1\")) >= 4, \"We need at least 4 devices\"\n rank = int(os.environ[\"RANK\"])\n \n+device_type = 'cpu' if not torch.accelerator.current_accelerator() else torch.accelerator.current_accelerator().type", - "comment_created_at": "2025-05-20T04:17:10+00:00", - "comment_author": "guangyey", - "comment_body": "```suggestion\r\ndevice_type = 'cpu' if not torch.accelerator.is_available() else torch.accelerator.current_accelerator().type\r\n```", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1836158932", - "pr_number": 137846, - "pr_file": "torch/utils/collect_env.py", - "created_at": "2024-11-11T08:55:41+00:00", - "commented_code": "debug_mode_str = str(torch.version.debug)\n cuda_available_str = str(torch.cuda.is_available())\n cuda_version_str = torch.version.cuda\n xpu_available_str = str(torch.xpu.is_available())", - "repo_full_name": "pytorch/pytorch", - "discussion_comments": [ - { - "comment_id": "1836158932", - "repo_full_name": "pytorch/pytorch", - "pr_number": 137846, - "pr_file": "torch/utils/collect_env.py", - "discussion_id": "1836158932", - "commented_code": "@@ -458,6 +592,7 @@ def get_env_info():\n debug_mode_str = str(torch.version.debug)\n cuda_available_str = str(torch.cuda.is_available())\n cuda_version_str = torch.version.cuda\n+ xpu_available_str = str(torch.xpu.is_available())", - "comment_created_at": "2024-11-11T08:55:41+00:00", - "comment_author": "guangyey", - "comment_body": "Now, we can use `torch.version.xpu` here.", - "pr_file_module": null - }, - { - "comment_id": "1837584799", - "repo_full_name": "pytorch/pytorch", - "pr_number": 137846, - "pr_file": "torch/utils/collect_env.py", - "discussion_id": "1836158932", - "commented_code": "@@ -458,6 +592,7 @@ def get_env_info():\n debug_mode_str = str(torch.version.debug)\n cuda_available_str = str(torch.cuda.is_available())\n cuda_version_str = torch.version.cuda\n+ xpu_available_str = str(torch.xpu.is_available())", - "comment_created_at": "2024-11-12T07:14:02+00:00", - "comment_author": "jingxu10", - "comment_body": "added.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1837866602", - "pr_number": 137846, - "pr_file": "torch/utils/collect_env.py", - "created_at": "2024-11-12T10:33:50+00:00", - "commented_code": "hip_runtime_version = get_version_or_na(cfg, 'HIP Runtime')\n miopen_runtime_version = get_version_or_na(cfg, 'MIOpen')\n cuda_version_str = 'N/A'\n xpu_version_str = 'N/A'\n hip_compiled_version = torch.version.hip\n else:\n version_str = debug_mode_str = cuda_available_str = cuda_version_str = 'N/A'\n version_str = debug_mode_str = cuda_available_str = cuda_version_str = xpu_available_str = xpu_version_str = 'N/A'", - "repo_full_name": "pytorch/pytorch", - "discussion_comments": [ - { - "comment_id": "1837866602", - "repo_full_name": "pytorch/pytorch", - "pr_number": 137846, - "pr_file": "torch/utils/collect_env.py", - "discussion_id": "1837866602", - "commented_code": "@@ -511,9 +645,10 @@ def get_version_or_na(cfg, prefix):\n hip_runtime_version = get_version_or_na(cfg, 'HIP Runtime')\n miopen_runtime_version = get_version_or_na(cfg, 'MIOpen')\n cuda_version_str = 'N/A'\n+ xpu_version_str = 'N/A'\n hip_compiled_version = torch.version.hip\n else:\n- version_str = debug_mode_str = cuda_available_str = cuda_version_str = 'N/A'\n+ version_str = debug_mode_str = cuda_available_str = cuda_version_str = xpu_available_str = xpu_version_str = 'N/A'", - "comment_created_at": "2024-11-12T10:33:50+00:00", - "comment_author": "guangyey", - "comment_body": "```suggestion\r\n version_str = debug_mode_str = cuda_available_str = cuda_version_str = xpu_version_str = 'N/A'\r\n```", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2023970199", - "pr_number": 137846, - "pr_file": "torch/utils/collect_env.py", - "created_at": "2025-04-02T02:39:13+00:00", - "commented_code": "return smi\n\n\ndef get_intel_gpu_onboard(run_lambda):\n lst = []\n platform = get_platform()\n if platform == \"linux\":\n txt = run_and_read_all(run_lambda, \"xpu-smi discovery -j\")\n if txt:\n try:\n obj = json.loads(txt)\n if type(obj[\"device_list\"]) is list:\n for o in obj[\"device_list\"]:\n lst.append(f'* {o[\"device_name\"]}')\n else:\n lst.append(\"N/A\")\n except (ValueError, TypeError) as e:\n lst.append(str(e))\n else:\n lst.append(\"N/A\")\n if platform == \"win32\" or platform == \"cygwin\":\n txt = run_and_read_all(\n run_lambda,\n 'powershell.exe \"gwmi -Class Win32_PnpSignedDriver | where{$_.DeviceClass -eq \\\\\"DISPLAY\\\\\"\\\n -and $_.Manufacturer -match \\\\\"Intel\\\\\"} | Select-Object -Property DeviceName | ConvertTo-Json\"',\n )\n try:\n obj = json.loads(txt)\n if type(obj) is list:\n for o in obj:\n lst.append(f'* {o[\"DeviceName\"]}')\n else:\n lst.append(f'* {obj[\"DeviceName\"]}')\n except ValueError as e:\n lst.append(txt)\n lst.append(str(e))\n return \"\\n\".join(lst)\n\n\ndef get_intel_gpu_detected(run_lambda):\n if TORCH_AVAILABLE:\n devices = [\n f\"* [{i}] {torch.xpu.get_device_properties(i)}\"\n for i in range(torch.xpu.device_count())\n ]\n if len(devices) > 0:\n return \"\\n\".join(devices)\n else:\n return \"N/A\"\n else:\n return \"N/A\"", - "repo_full_name": "pytorch/pytorch", - "discussion_comments": [ - { - "comment_id": "2023970199", - "repo_full_name": "pytorch/pytorch", - "pr_number": 137846, - "pr_file": "torch/utils/collect_env.py", - "discussion_id": "2023970199", - "commented_code": "@@ -243,6 +247,56 @@ def get_nvidia_smi():\n return smi\n \n \n+def get_intel_gpu_onboard(run_lambda):\n+ lst = []\n+ platform = get_platform()\n+ if platform == \"linux\":\n+ txt = run_and_read_all(run_lambda, \"xpu-smi discovery -j\")\n+ if txt:\n+ try:\n+ obj = json.loads(txt)\n+ if type(obj[\"device_list\"]) is list:\n+ for o in obj[\"device_list\"]:\n+ lst.append(f'* {o[\"device_name\"]}')\n+ else:\n+ lst.append(\"N/A\")\n+ except (ValueError, TypeError) as e:\n+ lst.append(str(e))\n+ else:\n+ lst.append(\"N/A\")\n+ if platform == \"win32\" or platform == \"cygwin\":\n+ txt = run_and_read_all(\n+ run_lambda,\n+ 'powershell.exe \"gwmi -Class Win32_PnpSignedDriver | where{$_.DeviceClass -eq \\\\\"DISPLAY\\\\\"\\\n+ -and $_.Manufacturer -match \\\\\"Intel\\\\\"} | Select-Object -Property DeviceName | ConvertTo-Json\"',\n+ )\n+ try:\n+ obj = json.loads(txt)\n+ if type(obj) is list:\n+ for o in obj:\n+ lst.append(f'* {o[\"DeviceName\"]}')\n+ else:\n+ lst.append(f'* {obj[\"DeviceName\"]}')\n+ except ValueError as e:\n+ lst.append(txt)\n+ lst.append(str(e))\n+ return \"\\n\".join(lst)\n+\n+\n+def get_intel_gpu_detected(run_lambda):\n+ if TORCH_AVAILABLE:\n+ devices = [\n+ f\"* [{i}] {torch.xpu.get_device_properties(i)}\"\n+ for i in range(torch.xpu.device_count())\n+ ]\n+ if len(devices) > 0:\n+ return \"\\n\".join(devices)\n+ else:\n+ return \"N/A\"\n+ else:\n+ return \"N/A\"", - "comment_created_at": "2025-04-02T02:39:13+00:00", - "comment_author": "guangyey", - "comment_body": "```suggestion\r\n if not TORCH_AVAILABLE:\r\n return \"N/A\"\r\n\r\n device_count = torch.xpu.device_count()\r\n if device_count == 0:\r\n return \"N/A\"\r\n\r\n devices = [f\"* [{i}] {torch.xpu.get_device_properties(i)}\" for i in range(device_count)]\r\n return \"\\n\".join(devices)\r\n```", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1942979749", - "pr_number": 145136, - "pr_file": "test/inductor/test_torchinductor.py", - "created_at": "2025-02-05T13:49:25+00:00", - "commented_code": "):\n c_op(x, kernel_size=2, stride=2)\n\n @torch._dynamo.config.patch(recompile_limit=12)\n def test_conv_errors_with_uint(self):\n for dim in (1, 2, 3):\n for dtype in (torch.uint8, torch.uint16, torch.uint32, torch.uint64):\n input_shape = [1, 8] + [64] * dim\n x = torch.randn(input_shape).to(dtype).cuda()\n conv_weight = (torch.ones(8, 1, *([4] * dim)) / (4 ** dim)).cuda()", - "repo_full_name": "pytorch/pytorch", - "discussion_comments": [ - { - "comment_id": "1942979749", - "repo_full_name": "pytorch/pytorch", - "pr_number": 145136, - "pr_file": "test/inductor/test_torchinductor.py", - "discussion_id": "1942979749", - "commented_code": "@@ -6316,6 +6316,21 @@ def test_avg_pool_errors_with_uint(self):\n ):\n c_op(x, kernel_size=2, stride=2)\n \n+ @torch._dynamo.config.patch(recompile_limit=12)\n+ def test_conv_errors_with_uint(self):\n+ for dim in (1, 2, 3):\n+ for dtype in (torch.uint8, torch.uint16, torch.uint32, torch.uint64):\n+ input_shape = [1, 8] + [64] * dim\n+ x = torch.randn(input_shape).to(dtype).cuda()\n+ conv_weight = (torch.ones(8, 1, *([4] * dim)) / (4 ** dim)).cuda()", - "comment_created_at": "2025-02-05T13:49:25+00:00", - "comment_author": "etaf", - "comment_body": "Hi, the test_torchinductor.py is shared by GPUs like `cuda`, `xpu`. But you use hard code `cuda` here so this case will failed on other GPUs. I suggest you decorate this case with `requires_cuda` or use `to(GPU_TYPE)` instead of `.cuda`.", - "pr_file_module": null - }, - { - "comment_id": "1944038313", - "repo_full_name": "pytorch/pytorch", - "pr_number": 145136, - "pr_file": "test/inductor/test_torchinductor.py", - "discussion_id": "1942979749", - "commented_code": "@@ -6316,6 +6316,21 @@ def test_avg_pool_errors_with_uint(self):\n ):\n c_op(x, kernel_size=2, stride=2)\n \n+ @torch._dynamo.config.patch(recompile_limit=12)\n+ def test_conv_errors_with_uint(self):\n+ for dim in (1, 2, 3):\n+ for dtype in (torch.uint8, torch.uint16, torch.uint32, torch.uint64):\n+ input_shape = [1, 8] + [64] * dim\n+ x = torch.randn(input_shape).to(dtype).cuda()\n+ conv_weight = (torch.ones(8, 1, *([4] * dim)) / (4 ** dim)).cuda()", - "comment_created_at": "2025-02-06T03:21:34+00:00", - "comment_author": "shaoyuyoung", - "comment_body": "really thanks, I'll do this :)", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2089800822", - "pr_number": 150218, - "pr_file": "torch/utils/dlpack.py", - "created_at": "2025-05-14T21:46:12+00:00", - "commented_code": "\"\"\"\n if hasattr(ext_tensor, '__dlpack__'):\n kwargs: dict[str, Any] = {}\n kwargs[\"max_version\"] = (1, 0)\n\n device = ext_tensor.__dlpack_device__()\n # device is either CUDA or ROCm, we need to pass the current\n kwargs[\"max_version\"] = (1, 0)\n kwargs[\"copy\"] = copy\n\n # Parse the device parameter.\n # At this moment, it can either be a torch.device or a str representing\n # a torch.device, e.g. \"cpu\", \"cuda\", etc.\n if device is not None:\n if isinstance(device, str):\n device = torch.device(device)\n assert isinstance(device, torch.device), (\n f\"from_dlpack: unsupported device type: {type(device)}\"\n )\n device = torch._C._torchDeviceToDLDevice(device)\n\n kwargs[\"dl_device\"] = device\n\n ext_device = ext_tensor.__dlpack_device__()\n # ext_device is either CUDA or ROCm, we need to pass the current", - "repo_full_name": "pytorch/pytorch", - "discussion_comments": [ - { - "comment_id": "2089800822", - "repo_full_name": "pytorch/pytorch", - "pr_number": 150218, - "pr_file": "torch/utils/dlpack.py", - "discussion_id": "2089800822", - "commented_code": "@@ -107,19 +107,34 @@ def from_dlpack(ext_tensor: Any) -> 'torch.Tensor':\n \"\"\"\n if hasattr(ext_tensor, '__dlpack__'):\n kwargs: dict[str, Any] = {}\n- kwargs[\"max_version\"] = (1, 0)\n \n- device = ext_tensor.__dlpack_device__()\n- # device is either CUDA or ROCm, we need to pass the current\n+ kwargs[\"max_version\"] = (1, 0)\n+ kwargs[\"copy\"] = copy\n+\n+ # Parse the device parameter.\n+ # At this moment, it can either be a torch.device or a str representing\n+ # a torch.device, e.g. \"cpu\", \"cuda\", etc.\n+ if device is not None:\n+ if isinstance(device, str):\n+ device = torch.device(device)\n+ assert isinstance(device, torch.device), (\n+ f\"from_dlpack: unsupported device type: {type(device)}\"\n+ )\n+ device = torch._C._torchDeviceToDLDevice(device)\n+\n+ kwargs[\"dl_device\"] = device\n+\n+ ext_device = ext_tensor.__dlpack_device__()\n+ # ext_device is either CUDA or ROCm, we need to pass the current", - "comment_created_at": "2025-05-14T21:46:12+00:00", - "comment_author": "albanD", - "comment_body": "Note that a lot of this string handling should get generalized to any device which is an accelerator in PT.\r\nOk to do later, but as we're going to see issues with hip or xpu, we should just refactor it together", - "pr_file_module": null - }, - { - "comment_id": "2105841400", - "repo_full_name": "pytorch/pytorch", - "pr_number": 150218, - "pr_file": "torch/utils/dlpack.py", - "discussion_id": "2089800822", - "commented_code": "@@ -107,19 +107,34 @@ def from_dlpack(ext_tensor: Any) -> 'torch.Tensor':\n \"\"\"\n if hasattr(ext_tensor, '__dlpack__'):\n kwargs: dict[str, Any] = {}\n- kwargs[\"max_version\"] = (1, 0)\n \n- device = ext_tensor.__dlpack_device__()\n- # device is either CUDA or ROCm, we need to pass the current\n+ kwargs[\"max_version\"] = (1, 0)\n+ kwargs[\"copy\"] = copy\n+\n+ # Parse the device parameter.\n+ # At this moment, it can either be a torch.device or a str representing\n+ # a torch.device, e.g. \"cpu\", \"cuda\", etc.\n+ if device is not None:\n+ if isinstance(device, str):\n+ device = torch.device(device)\n+ assert isinstance(device, torch.device), (\n+ f\"from_dlpack: unsupported device type: {type(device)}\"\n+ )\n+ device = torch._C._torchDeviceToDLDevice(device)\n+\n+ kwargs[\"dl_device\"] = device\n+\n+ ext_device = ext_tensor.__dlpack_device__()\n+ # ext_device is either CUDA or ROCm, we need to pass the current", - "comment_created_at": "2025-05-24T14:35:39+00:00", - "comment_author": "ysiraichi", - "comment_body": "Could you help me understand what string handling you are referring to? Doesn't `torch.device(device)` work with any accelerator in PT?", - "pr_file_module": null - }, - { - "comment_id": "2150373765", - "repo_full_name": "pytorch/pytorch", - "pr_number": 150218, - "pr_file": "torch/utils/dlpack.py", - "discussion_id": "2089800822", - "commented_code": "@@ -107,19 +107,34 @@ def from_dlpack(ext_tensor: Any) -> 'torch.Tensor':\n \"\"\"\n if hasattr(ext_tensor, '__dlpack__'):\n kwargs: dict[str, Any] = {}\n- kwargs[\"max_version\"] = (1, 0)\n \n- device = ext_tensor.__dlpack_device__()\n- # device is either CUDA or ROCm, we need to pass the current\n+ kwargs[\"max_version\"] = (1, 0)\n+ kwargs[\"copy\"] = copy\n+\n+ # Parse the device parameter.\n+ # At this moment, it can either be a torch.device or a str representing\n+ # a torch.device, e.g. \"cpu\", \"cuda\", etc.\n+ if device is not None:\n+ if isinstance(device, str):\n+ device = torch.device(device)\n+ assert isinstance(device, torch.device), (\n+ f\"from_dlpack: unsupported device type: {type(device)}\"\n+ )\n+ device = torch._C._torchDeviceToDLDevice(device)\n+\n+ kwargs[\"dl_device\"] = device\n+\n+ ext_device = ext_tensor.__dlpack_device__()\n+ # ext_device is either CUDA or ROCm, we need to pass the current", - "comment_created_at": "2025-06-16T16:07:55+00:00", - "comment_author": "albanD", - "comment_body": "Only the fact that you do custom processing only for CUDA and ROCm but we have quite a few other accelerators that would require the same treatment in the future.", - "pr_file_module": null - }, - { - "comment_id": "2158864989", - "repo_full_name": "pytorch/pytorch", - "pr_number": 150218, - "pr_file": "torch/utils/dlpack.py", - "discussion_id": "2089800822", - "commented_code": "@@ -107,19 +107,34 @@ def from_dlpack(ext_tensor: Any) -> 'torch.Tensor':\n \"\"\"\n if hasattr(ext_tensor, '__dlpack__'):\n kwargs: dict[str, Any] = {}\n- kwargs[\"max_version\"] = (1, 0)\n \n- device = ext_tensor.__dlpack_device__()\n- # device is either CUDA or ROCm, we need to pass the current\n+ kwargs[\"max_version\"] = (1, 0)\n+ kwargs[\"copy\"] = copy\n+\n+ # Parse the device parameter.\n+ # At this moment, it can either be a torch.device or a str representing\n+ # a torch.device, e.g. \"cpu\", \"cuda\", etc.\n+ if device is not None:\n+ if isinstance(device, str):\n+ device = torch.device(device)\n+ assert isinstance(device, torch.device), (\n+ f\"from_dlpack: unsupported device type: {type(device)}\"\n+ )\n+ device = torch._C._torchDeviceToDLDevice(device)\n+\n+ kwargs[\"dl_device\"] = device\n+\n+ ext_device = ext_tensor.__dlpack_device__()\n+ # ext_device is either CUDA or ROCm, we need to pass the current", - "comment_created_at": "2025-06-20T12:37:17+00:00", - "comment_author": "ysiraichi", - "comment_body": "Since I'm not familiar with these other accelerators, I think I will leave it for a future PR. In this one, I'm simply adding support for the extra keywords (in this case, passing them through to `__dlpack__()` method). What do you think?", - "pr_file_module": null - }, - { - "comment_id": "2159671574", - "repo_full_name": "pytorch/pytorch", - "pr_number": 150218, - "pr_file": "torch/utils/dlpack.py", - "discussion_id": "2089800822", - "commented_code": "@@ -107,19 +107,34 @@ def from_dlpack(ext_tensor: Any) -> 'torch.Tensor':\n \"\"\"\n if hasattr(ext_tensor, '__dlpack__'):\n kwargs: dict[str, Any] = {}\n- kwargs[\"max_version\"] = (1, 0)\n \n- device = ext_tensor.__dlpack_device__()\n- # device is either CUDA or ROCm, we need to pass the current\n+ kwargs[\"max_version\"] = (1, 0)\n+ kwargs[\"copy\"] = copy\n+\n+ # Parse the device parameter.\n+ # At this moment, it can either be a torch.device or a str representing\n+ # a torch.device, e.g. \"cpu\", \"cuda\", etc.\n+ if device is not None:\n+ if isinstance(device, str):\n+ device = torch.device(device)\n+ assert isinstance(device, torch.device), (\n+ f\"from_dlpack: unsupported device type: {type(device)}\"\n+ )\n+ device = torch._C._torchDeviceToDLDevice(device)\n+\n+ kwargs[\"dl_device\"] = device\n+\n+ ext_device = ext_tensor.__dlpack_device__()\n+ # ext_device is either CUDA or ROCm, we need to pass the current", - "comment_created_at": "2025-06-20T21:13:52+00:00", - "comment_author": "albanD", - "comment_body": "Yes let's just open an issue for it and add the xpu and privateuse1 labels on it. ", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/pytorch-validate-environment-variables.json b/_reviewers/pytorch-validate-environment-variables.json new file mode 100644 index 0000000..bd32857 --- /dev/null +++ b/_reviewers/pytorch-validate-environment-variables.json @@ -0,0 +1,160 @@ +[ + { + "discussion_id": "2148479631", + "pr_number": 155998, + "pr_file": "setup.py", + "created_at": "2025-06-15T16:41:57+00:00", + "commented_code": "os.path.isdir(folder) and len(os.listdir(folder)) == 0\n )\n\n if bool(os.getenv(\"USE_SYSTEM_LIBS\", False)):\n if os.getenv(\"USE_SYSTEM_LIBS\"):", + "repo_full_name": "pytorch/pytorch", + "discussion_comments": [ + { + "comment_id": "2148479631", + "repo_full_name": "pytorch/pytorch", + "pr_number": 155998, + "pr_file": "setup.py", + "discussion_id": "2148479631", + "commented_code": "@@ -413,7 +426,7 @@ def not_exists_or_empty(folder):\n os.path.isdir(folder) and len(os.listdir(folder)) == 0\n )\n \n- if bool(os.getenv(\"USE_SYSTEM_LIBS\", False)):\n+ if os.getenv(\"USE_SYSTEM_LIBS\"):", + "comment_created_at": "2025-06-15T16:41:57+00:00", + "comment_author": "malfet", + "comment_body": "Again, this is unrelated to the PR in question and also kind of semantically broken (it should use `str2bool` or something helper) that would convert 0 and \"false\" strings to false", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2178277997", + "pr_number": 157374, + "pr_file": "torch/cuda/_memory_viz.py", + "created_at": "2025-07-01T18:18:51+00:00", + "commented_code": "def format_flamegraph(flamegraph_lines, flamegraph_script=None):\n if flamegraph_script is None:\n flamegraph_script = f\"/tmp/{os.getuid()}_flamegraph.pl\"\n cache_dir = os.path.expanduser(\"~/.cache/\")", + "repo_full_name": "pytorch/pytorch", + "discussion_comments": [ + { + "comment_id": "2178277997", + "repo_full_name": "pytorch/pytorch", + "pr_number": 157374, + "pr_file": "torch/cuda/_memory_viz.py", + "discussion_id": "2178277997", + "commented_code": "@@ -89,7 +89,9 @@ def _block_extra(b):\n \n def format_flamegraph(flamegraph_lines, flamegraph_script=None):\n if flamegraph_script is None:\n- flamegraph_script = f\"/tmp/{os.getuid()}_flamegraph.pl\"\n+ cache_dir = os.path.expanduser(\"~/.cache/\")", + "comment_created_at": "2025-07-01T18:18:51+00:00", + "comment_author": "XuehaiPan", + "comment_body": "```suggestion\r\n cache_dir = os.getenv(\"XDG_CACHE_HOME\", os.path.expanduser(\"~/.cache\"))\r\n```", + "pr_file_module": null + }, + { + "comment_id": "2178493565", + "repo_full_name": "pytorch/pytorch", + "pr_number": 157374, + "pr_file": "torch/cuda/_memory_viz.py", + "discussion_id": "2178277997", + "commented_code": "@@ -89,7 +89,9 @@ def _block_extra(b):\n \n def format_flamegraph(flamegraph_lines, flamegraph_script=None):\n if flamegraph_script is None:\n- flamegraph_script = f\"/tmp/{os.getuid()}_flamegraph.pl\"\n+ cache_dir = os.path.expanduser(\"~/.cache/\")", + "comment_created_at": "2025-07-01T20:53:10+00:00", + "comment_author": "malfet", + "comment_body": "I though about it. Do you know when home would not be sufficient?", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2112886671", + "pr_number": 137846, + "pr_file": "torch/utils/collect_env.py", + "created_at": "2025-05-28T22:47:29+00:00", + "commented_code": "return smi\n\n\ndef get_pkg_version(run_lambda, pkg):\n ret = \"\"\n index = -1\n if get_platform() == \"linux\":", + "repo_full_name": "pytorch/pytorch", + "discussion_comments": [ + { + "comment_id": "2112886671", + "repo_full_name": "pytorch/pytorch", + "pr_number": 137846, + "pr_file": "torch/utils/collect_env.py", + "discussion_id": "2112886671", + "commented_code": "@@ -243,6 +248,128 @@ def get_nvidia_smi():\n return smi\n \n \n+def get_pkg_version(run_lambda, pkg):\n+ ret = \"\"\n+ index = -1\n+ if get_platform() == \"linux\":", + "comment_created_at": "2025-05-28T22:47:29+00:00", + "comment_author": "malfet", + "comment_body": "Why not replace it with something like:\r\n```\r\nif get_platform() != \"linux\": return \"N/A\"\r\n```", + "pr_file_module": null + }, + { + "comment_id": "2113890020", + "repo_full_name": "pytorch/pytorch", + "pr_number": 137846, + "pr_file": "torch/utils/collect_env.py", + "discussion_id": "2112886671", + "commented_code": "@@ -243,6 +248,128 @@ def get_nvidia_smi():\n return smi\n \n \n+def get_pkg_version(run_lambda, pkg):\n+ ret = \"\"\n+ index = -1\n+ if get_platform() == \"linux\":", + "comment_created_at": "2025-05-29T12:57:14+00:00", + "comment_author": "jingxu10", + "comment_body": "done", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2153261028", + "pr_number": 156207, + "pr_file": "torch/distributed/_pin_memory_utils.py", + "created_at": "2025-06-17T22:15:49+00:00", + "commented_code": "# mypy: allow-untyped-defs\nimport logging\nimport os\nfrom logging import getLogger\n\nimport torch\n\nlogger = getLogger()\nlogger.setLevel(logging.INFO)\n\n\n# Allows retrying cudaHostRegister if it fails\nCKPT_PIN_ALLOW_RETRY = os.environ.get(\"CKPT_PIN_ALLOW_RETRY\", \"1\") == \"1\"\n# Peeks last cudaError before pinning shared memory\nCKPT_PIN_PEEK_CUDA_ERROR = os.environ.get(\"CKPT_PIN_PEEK_CUDA_ERROR\", \"0\") == \"1\"\n# Pops last cudaError before pinning shared memory\nCKPT_PIN_POP_CUDA_ERROR = os.environ.get(\"CKPT_PIN_POP_CUDA_ERROR\", \"0\") == \"1\"", + "repo_full_name": "pytorch/pytorch", + "discussion_comments": [ + { + "comment_id": "2153261028", + "repo_full_name": "pytorch/pytorch", + "pr_number": 156207, + "pr_file": "torch/distributed/_pin_memory_utils.py", + "discussion_id": "2153261028", + "commented_code": "@@ -0,0 +1,66 @@\n+# mypy: allow-untyped-defs\n+import logging\n+import os\n+from logging import getLogger\n+\n+import torch\n+\n+logger = getLogger()\n+logger.setLevel(logging.INFO)\n+\n+\n+# Allows retrying cudaHostRegister if it fails\n+CKPT_PIN_ALLOW_RETRY = os.environ.get(\"CKPT_PIN_ALLOW_RETRY\", \"1\") == \"1\"\n+# Peeks last cudaError before pinning shared memory\n+CKPT_PIN_PEEK_CUDA_ERROR = os.environ.get(\"CKPT_PIN_PEEK_CUDA_ERROR\", \"0\") == \"1\"\n+# Pops last cudaError before pinning shared memory\n+CKPT_PIN_POP_CUDA_ERROR = os.environ.get(\"CKPT_PIN_POP_CUDA_ERROR\", \"0\") == \"1\"", + "comment_created_at": "2025-06-17T22:15:49+00:00", + "comment_author": "fegin", + "comment_body": "Do we want to export these variables? I guess not. We can just move before where they are used. For example, `CKPT_PIN_ALLOW_RETRY` is not used at all.", + "pr_file_module": null + }, + { + "comment_id": "2153276222", + "repo_full_name": "pytorch/pytorch", + "pr_number": 156207, + "pr_file": "torch/distributed/_pin_memory_utils.py", + "discussion_id": "2153261028", + "commented_code": "@@ -0,0 +1,66 @@\n+# mypy: allow-untyped-defs\n+import logging\n+import os\n+from logging import getLogger\n+\n+import torch\n+\n+logger = getLogger()\n+logger.setLevel(logging.INFO)\n+\n+\n+# Allows retrying cudaHostRegister if it fails\n+CKPT_PIN_ALLOW_RETRY = os.environ.get(\"CKPT_PIN_ALLOW_RETRY\", \"1\") == \"1\"\n+# Peeks last cudaError before pinning shared memory\n+CKPT_PIN_PEEK_CUDA_ERROR = os.environ.get(\"CKPT_PIN_PEEK_CUDA_ERROR\", \"0\") == \"1\"\n+# Pops last cudaError before pinning shared memory\n+CKPT_PIN_POP_CUDA_ERROR = os.environ.get(\"CKPT_PIN_POP_CUDA_ERROR\", \"0\") == \"1\"", + "comment_created_at": "2025-06-17T22:31:58+00:00", + "comment_author": "vadimkantorov", + "comment_body": "Also, should these env vars be `TORCH_`-prefixed?", + "pr_file_module": null + }, + { + "comment_id": "2153356898", + "repo_full_name": "pytorch/pytorch", + "pr_number": 156207, + "pr_file": "torch/distributed/_pin_memory_utils.py", + "discussion_id": "2153261028", + "commented_code": "@@ -0,0 +1,66 @@\n+# mypy: allow-untyped-defs\n+import logging\n+import os\n+from logging import getLogger\n+\n+import torch\n+\n+logger = getLogger()\n+logger.setLevel(logging.INFO)\n+\n+\n+# Allows retrying cudaHostRegister if it fails\n+CKPT_PIN_ALLOW_RETRY = os.environ.get(\"CKPT_PIN_ALLOW_RETRY\", \"1\") == \"1\"\n+# Peeks last cudaError before pinning shared memory\n+CKPT_PIN_PEEK_CUDA_ERROR = os.environ.get(\"CKPT_PIN_PEEK_CUDA_ERROR\", \"0\") == \"1\"\n+# Pops last cudaError before pinning shared memory\n+CKPT_PIN_POP_CUDA_ERROR = os.environ.get(\"CKPT_PIN_POP_CUDA_ERROR\", \"0\") == \"1\"", + "comment_created_at": "2025-06-17T23:58:19+00:00", + "comment_author": "Saiteja64", + "comment_body": "Removed and simplified implementation. Can add these separately. Not necessary for ZOC", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2168080662", + "pr_number": 156923, + "pr_file": "torch/utils/cpp_extension.py", + "created_at": "2025-06-26T04:30:00+00:00", + "commented_code": "# See cmake/Modules_CUDA_fix/upstream/FindCUDA/select_compute_arch.cmake\n _arch_list = os.environ.get('TORCH_CUDA_ARCH_LIST', None)", + "repo_full_name": "pytorch/pytorch", + "discussion_comments": [ + { + "comment_id": "2168080662", + "repo_full_name": "pytorch/pytorch", + "pr_number": 156923, + "pr_file": "torch/utils/cpp_extension.py", + "discussion_id": "2168080662", + "commented_code": "@@ -2420,11 +2420,12 @@ def _get_cuda_arch_flags(cflags: Optional[list[str]] = None) -> list[str]:\n # See cmake/Modules_CUDA_fix/upstream/FindCUDA/select_compute_arch.cmake\n _arch_list = os.environ.get('TORCH_CUDA_ARCH_LIST', None)", + "comment_created_at": "2025-06-26T04:30:00+00:00", + "comment_author": "Copilot", + "comment_body": "Normalize the environment variable with `_arch_list = _arch_list.strip().lower()` before comparison to ensure case-insensitive matching and avoid unintended whitespace issues.\n```suggestion\n _arch_list = os.environ.get('TORCH_CUDA_ARCH_LIST', None)\n if _arch_list:\n _arch_list = _arch_list.strip().lower()\n```", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/pytorch-validate-environment-variables.md b/_reviewers/pytorch-validate-environment-variables.md index 7e79183..b4fbd2b 100644 --- a/_reviewers/pytorch-validate-environment-variables.md +++ b/_reviewers/pytorch-validate-environment-variables.md @@ -72,165 +72,3 @@ return result ``` Properly handling environment variables improves code reliability, maintainability, and makes behavior more predictable across different environments. - - -[ - { - "discussion_id": "2148479631", - "pr_number": 155998, - "pr_file": "setup.py", - "created_at": "2025-06-15T16:41:57+00:00", - "commented_code": "os.path.isdir(folder) and len(os.listdir(folder)) == 0\n )\n\n if bool(os.getenv(\"USE_SYSTEM_LIBS\", False)):\n if os.getenv(\"USE_SYSTEM_LIBS\"):", - "repo_full_name": "pytorch/pytorch", - "discussion_comments": [ - { - "comment_id": "2148479631", - "repo_full_name": "pytorch/pytorch", - "pr_number": 155998, - "pr_file": "setup.py", - "discussion_id": "2148479631", - "commented_code": "@@ -413,7 +426,7 @@ def not_exists_or_empty(folder):\n os.path.isdir(folder) and len(os.listdir(folder)) == 0\n )\n \n- if bool(os.getenv(\"USE_SYSTEM_LIBS\", False)):\n+ if os.getenv(\"USE_SYSTEM_LIBS\"):", - "comment_created_at": "2025-06-15T16:41:57+00:00", - "comment_author": "malfet", - "comment_body": "Again, this is unrelated to the PR in question and also kind of semantically broken (it should use `str2bool` or something helper) that would convert 0 and \"false\" strings to false", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2178277997", - "pr_number": 157374, - "pr_file": "torch/cuda/_memory_viz.py", - "created_at": "2025-07-01T18:18:51+00:00", - "commented_code": "def format_flamegraph(flamegraph_lines, flamegraph_script=None):\n if flamegraph_script is None:\n flamegraph_script = f\"/tmp/{os.getuid()}_flamegraph.pl\"\n cache_dir = os.path.expanduser(\"~/.cache/\")", - "repo_full_name": "pytorch/pytorch", - "discussion_comments": [ - { - "comment_id": "2178277997", - "repo_full_name": "pytorch/pytorch", - "pr_number": 157374, - "pr_file": "torch/cuda/_memory_viz.py", - "discussion_id": "2178277997", - "commented_code": "@@ -89,7 +89,9 @@ def _block_extra(b):\n \n def format_flamegraph(flamegraph_lines, flamegraph_script=None):\n if flamegraph_script is None:\n- flamegraph_script = f\"/tmp/{os.getuid()}_flamegraph.pl\"\n+ cache_dir = os.path.expanduser(\"~/.cache/\")", - "comment_created_at": "2025-07-01T18:18:51+00:00", - "comment_author": "XuehaiPan", - "comment_body": "```suggestion\r\n cache_dir = os.getenv(\"XDG_CACHE_HOME\", os.path.expanduser(\"~/.cache\"))\r\n```", - "pr_file_module": null - }, - { - "comment_id": "2178493565", - "repo_full_name": "pytorch/pytorch", - "pr_number": 157374, - "pr_file": "torch/cuda/_memory_viz.py", - "discussion_id": "2178277997", - "commented_code": "@@ -89,7 +89,9 @@ def _block_extra(b):\n \n def format_flamegraph(flamegraph_lines, flamegraph_script=None):\n if flamegraph_script is None:\n- flamegraph_script = f\"/tmp/{os.getuid()}_flamegraph.pl\"\n+ cache_dir = os.path.expanduser(\"~/.cache/\")", - "comment_created_at": "2025-07-01T20:53:10+00:00", - "comment_author": "malfet", - "comment_body": "I though about it. Do you know when home would not be sufficient?", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2112886671", - "pr_number": 137846, - "pr_file": "torch/utils/collect_env.py", - "created_at": "2025-05-28T22:47:29+00:00", - "commented_code": "return smi\n\n\ndef get_pkg_version(run_lambda, pkg):\n ret = \"\"\n index = -1\n if get_platform() == \"linux\":", - "repo_full_name": "pytorch/pytorch", - "discussion_comments": [ - { - "comment_id": "2112886671", - "repo_full_name": "pytorch/pytorch", - "pr_number": 137846, - "pr_file": "torch/utils/collect_env.py", - "discussion_id": "2112886671", - "commented_code": "@@ -243,6 +248,128 @@ def get_nvidia_smi():\n return smi\n \n \n+def get_pkg_version(run_lambda, pkg):\n+ ret = \"\"\n+ index = -1\n+ if get_platform() == \"linux\":", - "comment_created_at": "2025-05-28T22:47:29+00:00", - "comment_author": "malfet", - "comment_body": "Why not replace it with something like:\r\n```\r\nif get_platform() != \"linux\": return \"N/A\"\r\n```", - "pr_file_module": null - }, - { - "comment_id": "2113890020", - "repo_full_name": "pytorch/pytorch", - "pr_number": 137846, - "pr_file": "torch/utils/collect_env.py", - "discussion_id": "2112886671", - "commented_code": "@@ -243,6 +248,128 @@ def get_nvidia_smi():\n return smi\n \n \n+def get_pkg_version(run_lambda, pkg):\n+ ret = \"\"\n+ index = -1\n+ if get_platform() == \"linux\":", - "comment_created_at": "2025-05-29T12:57:14+00:00", - "comment_author": "jingxu10", - "comment_body": "done", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2153261028", - "pr_number": 156207, - "pr_file": "torch/distributed/_pin_memory_utils.py", - "created_at": "2025-06-17T22:15:49+00:00", - "commented_code": "# mypy: allow-untyped-defs\nimport logging\nimport os\nfrom logging import getLogger\n\nimport torch\n\nlogger = getLogger()\nlogger.setLevel(logging.INFO)\n\n\n# Allows retrying cudaHostRegister if it fails\nCKPT_PIN_ALLOW_RETRY = os.environ.get(\"CKPT_PIN_ALLOW_RETRY\", \"1\") == \"1\"\n# Peeks last cudaError before pinning shared memory\nCKPT_PIN_PEEK_CUDA_ERROR = os.environ.get(\"CKPT_PIN_PEEK_CUDA_ERROR\", \"0\") == \"1\"\n# Pops last cudaError before pinning shared memory\nCKPT_PIN_POP_CUDA_ERROR = os.environ.get(\"CKPT_PIN_POP_CUDA_ERROR\", \"0\") == \"1\"", - "repo_full_name": "pytorch/pytorch", - "discussion_comments": [ - { - "comment_id": "2153261028", - "repo_full_name": "pytorch/pytorch", - "pr_number": 156207, - "pr_file": "torch/distributed/_pin_memory_utils.py", - "discussion_id": "2153261028", - "commented_code": "@@ -0,0 +1,66 @@\n+# mypy: allow-untyped-defs\n+import logging\n+import os\n+from logging import getLogger\n+\n+import torch\n+\n+logger = getLogger()\n+logger.setLevel(logging.INFO)\n+\n+\n+# Allows retrying cudaHostRegister if it fails\n+CKPT_PIN_ALLOW_RETRY = os.environ.get(\"CKPT_PIN_ALLOW_RETRY\", \"1\") == \"1\"\n+# Peeks last cudaError before pinning shared memory\n+CKPT_PIN_PEEK_CUDA_ERROR = os.environ.get(\"CKPT_PIN_PEEK_CUDA_ERROR\", \"0\") == \"1\"\n+# Pops last cudaError before pinning shared memory\n+CKPT_PIN_POP_CUDA_ERROR = os.environ.get(\"CKPT_PIN_POP_CUDA_ERROR\", \"0\") == \"1\"", - "comment_created_at": "2025-06-17T22:15:49+00:00", - "comment_author": "fegin", - "comment_body": "Do we want to export these variables? I guess not. We can just move before where they are used. For example, `CKPT_PIN_ALLOW_RETRY` is not used at all.", - "pr_file_module": null - }, - { - "comment_id": "2153276222", - "repo_full_name": "pytorch/pytorch", - "pr_number": 156207, - "pr_file": "torch/distributed/_pin_memory_utils.py", - "discussion_id": "2153261028", - "commented_code": "@@ -0,0 +1,66 @@\n+# mypy: allow-untyped-defs\n+import logging\n+import os\n+from logging import getLogger\n+\n+import torch\n+\n+logger = getLogger()\n+logger.setLevel(logging.INFO)\n+\n+\n+# Allows retrying cudaHostRegister if it fails\n+CKPT_PIN_ALLOW_RETRY = os.environ.get(\"CKPT_PIN_ALLOW_RETRY\", \"1\") == \"1\"\n+# Peeks last cudaError before pinning shared memory\n+CKPT_PIN_PEEK_CUDA_ERROR = os.environ.get(\"CKPT_PIN_PEEK_CUDA_ERROR\", \"0\") == \"1\"\n+# Pops last cudaError before pinning shared memory\n+CKPT_PIN_POP_CUDA_ERROR = os.environ.get(\"CKPT_PIN_POP_CUDA_ERROR\", \"0\") == \"1\"", - "comment_created_at": "2025-06-17T22:31:58+00:00", - "comment_author": "vadimkantorov", - "comment_body": "Also, should these env vars be `TORCH_`-prefixed?", - "pr_file_module": null - }, - { - "comment_id": "2153356898", - "repo_full_name": "pytorch/pytorch", - "pr_number": 156207, - "pr_file": "torch/distributed/_pin_memory_utils.py", - "discussion_id": "2153261028", - "commented_code": "@@ -0,0 +1,66 @@\n+# mypy: allow-untyped-defs\n+import logging\n+import os\n+from logging import getLogger\n+\n+import torch\n+\n+logger = getLogger()\n+logger.setLevel(logging.INFO)\n+\n+\n+# Allows retrying cudaHostRegister if it fails\n+CKPT_PIN_ALLOW_RETRY = os.environ.get(\"CKPT_PIN_ALLOW_RETRY\", \"1\") == \"1\"\n+# Peeks last cudaError before pinning shared memory\n+CKPT_PIN_PEEK_CUDA_ERROR = os.environ.get(\"CKPT_PIN_PEEK_CUDA_ERROR\", \"0\") == \"1\"\n+# Pops last cudaError before pinning shared memory\n+CKPT_PIN_POP_CUDA_ERROR = os.environ.get(\"CKPT_PIN_POP_CUDA_ERROR\", \"0\") == \"1\"", - "comment_created_at": "2025-06-17T23:58:19+00:00", - "comment_author": "Saiteja64", - "comment_body": "Removed and simplified implementation. Can add these separately. Not necessary for ZOC", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2168080662", - "pr_number": 156923, - "pr_file": "torch/utils/cpp_extension.py", - "created_at": "2025-06-26T04:30:00+00:00", - "commented_code": "# See cmake/Modules_CUDA_fix/upstream/FindCUDA/select_compute_arch.cmake\n _arch_list = os.environ.get('TORCH_CUDA_ARCH_LIST', None)", - "repo_full_name": "pytorch/pytorch", - "discussion_comments": [ - { - "comment_id": "2168080662", - "repo_full_name": "pytorch/pytorch", - "pr_number": 156923, - "pr_file": "torch/utils/cpp_extension.py", - "discussion_id": "2168080662", - "commented_code": "@@ -2420,11 +2420,12 @@ def _get_cuda_arch_flags(cflags: Optional[list[str]] = None) -> list[str]:\n # See cmake/Modules_CUDA_fix/upstream/FindCUDA/select_compute_arch.cmake\n _arch_list = os.environ.get('TORCH_CUDA_ARCH_LIST', None)", - "comment_created_at": "2025-06-26T04:30:00+00:00", - "comment_author": "Copilot", - "comment_body": "Normalize the environment variable with `_arch_list = _arch_list.strip().lower()` before comparison to ensure case-insensitive matching and avoid unintended whitespace issues.\n```suggestion\n _arch_list = os.environ.get('TORCH_CUDA_ARCH_LIST', None)\n if _arch_list:\n _arch_list = _arch_list.strip().lower()\n```", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/pytorch-validate-performance-metrics.json b/_reviewers/pytorch-validate-performance-metrics.json new file mode 100644 index 0000000..d5cc56b --- /dev/null +++ b/_reviewers/pytorch-validate-performance-metrics.json @@ -0,0 +1,274 @@ +[ + { + "discussion_id": "2049183436", + "pr_number": 149697, + "pr_file": "torch/_inductor/analysis/device_info.py", + "created_at": "2025-04-17T15:20:04+00:00", + "commented_code": "from dataclasses import dataclass\n\nimport torch\n\n\n@dataclass(frozen=True)\nclass DeviceInfo:\n \"\"\"\n Theoretical Numbers from data sheet. If two numbers are given, Tensor/Matrix Core vs not,\n then the higher number is reported. Sparsity is not considered.\n\n\n Bandwidth numbers are tricky, because there are platform differences that may not show up in the profiler trace.\n For example,\n \"\"\"\n\n tops: dict[torch.dtype, float]\n dram_bw_gbs: float\n dram_gb: float\n\n\n# TODO investigate profiler support for tf32 and allow device to report correct number when it's turned on.\n_device_mapping: dict[str, DeviceInfo] = {\n # Source: https://resources.nvidia.com/en-us-tensor-core/nvidia-tensor-core-gpu-datasheet\n \"NVIDIA H100\": DeviceInfo(\n tops={\n torch.float64: 9.7,\n torch.float32: 19.5,\n torch.bfloat16: 1979.0,\n torch.float16: 1979.0,\n torch.float8_e8m0fnu: 3958.0,\n torch.float8_e8m0fnu: 3958.0,\n torch.float8_e4m3fnuz: 3958.0,", + "repo_full_name": "pytorch/pytorch", + "discussion_comments": [ + { + "comment_id": "2049183436", + "repo_full_name": "pytorch/pytorch", + "pr_number": 149697, + "pr_file": "torch/_inductor/analysis/device_info.py", + "discussion_id": "2049183436", + "commented_code": "@@ -0,0 +1,136 @@\n+from dataclasses import dataclass\n+\n+import torch\n+\n+\n+@dataclass(frozen=True)\n+class DeviceInfo:\n+ \"\"\"\n+ Theoretical Numbers from data sheet. If two numbers are given, Tensor/Matrix Core vs not,\n+ then the higher number is reported. Sparsity is not considered.\n+\n+\n+ Bandwidth numbers are tricky, because there are platform differences that may not show up in the profiler trace.\n+ For example,\n+ \"\"\"\n+\n+ tops: dict[torch.dtype, float]\n+ dram_bw_gbs: float\n+ dram_gb: float\n+\n+\n+# TODO investigate profiler support for tf32 and allow device to report correct number when it's turned on.\n+_device_mapping: dict[str, DeviceInfo] = {\n+ # Source: https://resources.nvidia.com/en-us-tensor-core/nvidia-tensor-core-gpu-datasheet\n+ \"NVIDIA H100\": DeviceInfo(\n+ tops={\n+ torch.float64: 9.7,\n+ torch.float32: 19.5,\n+ torch.bfloat16: 1979.0,\n+ torch.float16: 1979.0,\n+ torch.float8_e8m0fnu: 3958.0,\n+ torch.float8_e8m0fnu: 3958.0,\n+ torch.float8_e4m3fnuz: 3958.0,", + "comment_created_at": "2025-04-17T15:20:04+00:00", + "comment_author": "eellison", + "comment_body": "I know the fbcode servers are clock rate limited. So, the numbers will be off for those.. I think this is actually somewhat important for getting accurate numbers. \r\n\r\ncc @bertmaher - who did similar analysis here - https://fb.workplace.com/groups/420659799592399/posts/761265522198490/\r\n\r\nHow would you adjust for clock rate ? Is something simple like current_clock_rate/default sufficient ? I dont have a good sense of this.", + "pr_file_module": null + }, + { + "comment_id": "2049374582", + "repo_full_name": "pytorch/pytorch", + "pr_number": 149697, + "pr_file": "torch/_inductor/analysis/device_info.py", + "discussion_id": "2049183436", + "commented_code": "@@ -0,0 +1,136 @@\n+from dataclasses import dataclass\n+\n+import torch\n+\n+\n+@dataclass(frozen=True)\n+class DeviceInfo:\n+ \"\"\"\n+ Theoretical Numbers from data sheet. If two numbers are given, Tensor/Matrix Core vs not,\n+ then the higher number is reported. Sparsity is not considered.\n+\n+\n+ Bandwidth numbers are tricky, because there are platform differences that may not show up in the profiler trace.\n+ For example,\n+ \"\"\"\n+\n+ tops: dict[torch.dtype, float]\n+ dram_bw_gbs: float\n+ dram_gb: float\n+\n+\n+# TODO investigate profiler support for tf32 and allow device to report correct number when it's turned on.\n+_device_mapping: dict[str, DeviceInfo] = {\n+ # Source: https://resources.nvidia.com/en-us-tensor-core/nvidia-tensor-core-gpu-datasheet\n+ \"NVIDIA H100\": DeviceInfo(\n+ tops={\n+ torch.float64: 9.7,\n+ torch.float32: 19.5,\n+ torch.bfloat16: 1979.0,\n+ torch.float16: 1979.0,\n+ torch.float8_e8m0fnu: 3958.0,\n+ torch.float8_e8m0fnu: 3958.0,\n+ torch.float8_e4m3fnuz: 3958.0,", + "comment_created_at": "2025-04-17T17:19:17+00:00", + "comment_author": "exclamaforte", + "comment_body": "interested in Bert's opinion too, I would think that `current_clock_rate/default sufficient` would be fine considering that most of the flops calculation are just clockrate * core count * flops per core.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2049248412", + "pr_number": 149697, + "pr_file": "torch/_inductor/scheduler.py", + "created_at": "2025-04-17T15:56:09+00:00", + "commented_code": "if isinstance(self, ExternKernelSchedulerNode):", + "repo_full_name": "pytorch/pytorch", + "discussion_comments": [ + { + "comment_id": "2049248412", + "repo_full_name": "pytorch/pytorch", + "pr_number": 149697, + "pr_file": "torch/_inductor/scheduler.py", + "discussion_id": "2049248412", + "commented_code": "@@ -819,15 +854,13 @@ def get_estimated_runtime(self) -> float:\n \n if isinstance(self, ExternKernelSchedulerNode):", + "comment_created_at": "2025-04-17T15:56:09+00:00", + "comment_author": "eellison", + "comment_body": "This is going to skip any triton template we codegen.\r\n\r\nWhat about instead:\r\n\r\n- check the nodes origins and see if any of them are in flop counter\r\n- use the fx node to get flops\r\n\r\nIn some cases, we populate origins for things like layout conversions for inputs to matmul, or also, unfusing bias. to handle that lets just skip nodes which are not either `is_template()` or ExternKernels.\r\n", + "pr_file_module": null + }, + { + "comment_id": "2049274487", + "repo_full_name": "pytorch/pytorch", + "pr_number": 149697, + "pr_file": "torch/_inductor/scheduler.py", + "discussion_id": "2049248412", + "commented_code": "@@ -819,15 +854,13 @@ def get_estimated_runtime(self) -> float:\n \n if isinstance(self, ExternKernelSchedulerNode):", + "comment_created_at": "2025-04-17T16:14:57+00:00", + "comment_author": "eellison", + "comment_body": "Looks like that will also fix the failures with this pr. \r\n\r\nYou can reuse/factor out this:\r\n\r\nhttps://github.com/pytorch/pytorch/blob/4843ce7611b5b9e85b16f1c05cd3b4959c166fce/torch/_inductor/graph.py#L632-L655", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2092021006", + "pr_number": 149697, + "pr_file": "torch/_inductor/graph.py", + "created_at": "2025-05-15T22:03:53+00:00", + "commented_code": "# only grouped convolutions benchmarked as slower in conv samples for inference only\n if is_inference:\n from torch.utils.flop_counter import FlopCounterMode\n\n flop_counts: dict[str, float] = defaultdict(float)\n for node in conv_nodes:\n success, args, kwargs = torch._inductor.fx_utils.get_fake_args_kwargs(\n node\n )\n\n if success:\n with FlopCounterMode(display=False) as flop_counter_mode:\n with V.fake_mode:\n node.target(*args, **kwargs)\n\n counted_flops = flop_counter_mode.get_total_flops()\n if is_grouped(node):\n node_type = \"grouped\"\n elif is_small_channel(node):\n node_type = \"small\"\n elif is_in_out_channel(node):\n node_type = \"in_out\"\n else:\n node_type = \"default\"\n counted_flops = count_flops_fx(node)", + "repo_full_name": "pytorch/pytorch", + "discussion_comments": [ + { + "comment_id": "2092021006", + "repo_full_name": "pytorch/pytorch", + "pr_number": 149697, + "pr_file": "torch/_inductor/graph.py", + "discussion_id": "2092021006", + "commented_code": "@@ -652,32 +653,24 @@ def is_small_channel(n: torch.fx.Node) -> bool:\n \n # only grouped convolutions benchmarked as slower in conv samples for inference only\n if is_inference:\n- from torch.utils.flop_counter import FlopCounterMode\n-\n flop_counts: dict[str, float] = defaultdict(float)\n for node in conv_nodes:\n- success, args, kwargs = torch._inductor.fx_utils.get_fake_args_kwargs(\n- node\n- )\n-\n- if success:\n- with FlopCounterMode(display=False) as flop_counter_mode:\n- with V.fake_mode:\n- node.target(*args, **kwargs)\n-\n- counted_flops = flop_counter_mode.get_total_flops()\n- if is_grouped(node):\n- node_type = \"grouped\"\n- elif is_small_channel(node):\n- node_type = \"small\"\n- elif is_in_out_channel(node):\n- node_type = \"in_out\"\n- else:\n- node_type = \"default\"\n+ counted_flops = count_flops_fx(node)", + "comment_created_at": "2025-05-15T22:03:53+00:00", + "comment_author": "eellison", + "comment_body": "Can we make `count_flops_fx` a little bit smarter where it does not try to run this unless the node has a registration in flop_counter ? currently we always instantiate the tensors, and faketensormode, and run ?", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2105535986", + "pr_number": 149697, + "pr_file": "test/inductor/test_analysis.py", + "created_at": "2025-05-24T00:01:29+00:00", + "commented_code": "# Owner(s): [\"module: inductor\"]\n\nimport json\nimport re\nimport tempfile\nimport unittest\nimport uuid\nfrom io import StringIO\nfrom unittest.mock import patch\n\nimport torch\nimport torch.nn.functional as F\nfrom torch._inductor.analysis.profile_analysis import (\n _augment_trace_helper,\n _create_extern_mapping,\n JsonProfile,\n main,\n)\nfrom torch._inductor.utils import (\n fresh_inductor_cache,\n run_and_get_code,\n tabulate_2d,\n zip_dicts,\n)\nfrom torch.testing._internal.common_cuda import SM70OrLater\nfrom torch.testing._internal.common_device_type import (\n dtypes,\n instantiate_device_type_tests,\n skipIf,\n)\nfrom torch.testing._internal.common_utils import parametrize, run_tests, TestCase\nfrom torch.testing._internal.inductor_utils import IS_BIG_GPU\n\n\nexample_profile = \"\"\"\n{\n \"schemaVersion\": 1,\n \"deviceProperties\": [\n {\n \"id\": 0, \"name\": \"NVIDIA H100\", \"totalGlobalMem\": 101997215744,\n \"computeMajor\": 9, \"computeMinor\": 0,\n \"maxThreadsPerBlock\": 1024, \"maxThreadsPerMultiprocessor\": 2048,\n \"regsPerBlock\": 65536, \"warpSize\": 32,\n \"sharedMemPerBlock\": 49152, \"numSms\": 132\n , \"regsPerMultiprocessor\": 65536, \"sharedMemPerBlockOptin\": 232448, \"sharedMemPerMultiprocessor\": 233472\n }\n ],\n \"cupti_version\": 24,\n \"cuda_runtime_version\": 12060,\n \"with_flops\": 1,\n \"record_shapes\": 1,\n \"cuda_driver_version\": 12040,\n \"profile_memory\": 1,\n \"trace_id\": \"301995E163ED42048FBD783860E6E7DC\",\n \"displayTimeUnit\": \"ms\",\n \"baseTimeNanoseconds\": 1743521598000000000,\n \"traceEvents\": [\n {\n \"ph\": \"X\", \"cat\": \"cpu_op\", \"name\": \"aten::convolution\", \"pid\": 1147039, \"tid\": 1147039,\n \"ts\": 198093488368.463, \"dur\": 425.453,\n \"args\": {\n \"External id\": 1340,\"Sequence number\": 0, \"Fwd thread id\": 0, \"Record function id\": 0, \"Concrete Inputs\": \\\n[\"\", \"\", \"\", \"[2, 2]\", \"[3, 3]\", \"[1, 1]\", \"False\", \"[0, 0]\", \"1\"], \"Input type\": [\"float\", \"float\", \"\", \\\n\"ScalarList\", \"ScalarList\", \"ScalarList\", \"Scalar\", \"ScalarList\", \"Scalar\"], \"Input Strides\": [[150528, 1, 672, 3],\\\n[147, 1, 21, 3], [], [], [], [], [], [], []], \"Input Dims\": [[1, 3, 224, 224], [64, 3, 7, 7], [], [], [], [], [], \\\n[], []], \"Ev Idx\": 1339\n }\n },\n {\n \"ph\": \"X\", \"cat\": \"cpu_op\", \"name\": \"aten::_convolution\", \"pid\": 1147039, \"tid\": 1147039,\n \"ts\": 198093488444.498, \"dur\": 341.867,\n \"args\": {\n \"External id\": 1341,\"Record function id\": 0, \"Concrete Inputs\": [\"\", \"\", \"\", \"[2, 2]\", \"[3, 3]\", \"[1, 1]\",\\\n \"False\", \"[0, 0]\", \"1\", \"False\", \"False\", \"True\", \"True\"], \"Input type\": [\"float\", \"float\", \"\", \"ScalarList\",\\\n \"ScalarList\", \"ScalarList\", \"Scalar\", \"ScalarList\", \"Scalar\", \"Scalar\", \"Scalar\", \"Scalar\", \"Scalar\"], \"Input Strides\": \\\n[[150528, 1, 672, 3], [147, 1, 21, 3], [], [], [], [], [], [], [], [], [], [], []], \"Input Dims\": [[1, 3, 224, 224], \\\n[64, 3, 7, 7], [], [], [], [], [], [], [], [], [], [], []], \"Ev Idx\": 1340\n }\n },\n {\n \"ph\": \"X\", \"cat\": \"cpu_op\", \"name\": \"aten::addmm\", \"pid\": 1147039, \"tid\": 1147039,\n \"ts\": 198093513655.849, \"dur\": 251.130,\n \"args\": {\n \"External id\": 1619,\"Sequence number\": 0, \"Fwd thread id\": 0, \"Record function id\": 0, \"Concrete Inputs\": \\\n[\"\", \"\", \"\", \"1\", \"1\", \"\"], \"Input type\": [\"float\", \"float\", \"float\", \"Scalar\", \"Scalar\", \"float\"], \"Input Strides\":\\\n [[1], [0, 1], [1, 2048], [], [], [1000, 1]], \"Input Dims\": [[1000], [1, 2048], [2048, 1000], [], [], [1, 1000]], \\\n\"Ev Idx\": 1618\n }\n },\n {\n \"ph\": \"X\", \"cat\": \"kernel\", \"name\": \"void cutlass_addmm\", \"pid\": 1147039, \"tid\": 1147039,\n \"ts\": 198093513655.849, \"dur\": 251.130,\n \"args\": {\n \"External id\": 1619,\"Sequence number\": 0, \"Fwd thread id\": 0, \"Record function id\": 0, \"Ev Idx\": 1618\n }\n },\n {\n \"ph\": \"X\", \"cat\": \"kernel\", \"name\": \"void convolution_kernel\", \"pid\": 1147039, \"tid\": 1147039,\n \"ts\": 198093513655.849, \"dur\": 200.130,\n \"args\": {\n \"External id\": 1342, \"Sequence number\": 0, \"Fwd thread id\": 0, \"Record function id\": 0, \"Ev Idx\": 1618\n }\n },\n {\n \"ph\": \"X\", \"cat\": \"cpu_op\", \"name\": \"aten::convolution\", \"pid\": 1147039, \"tid\": 1147039,\n \"ts\": 198093488444.498, \"dur\": 341.867,\n \"args\": {\n \"External id\": 1342,\"Record function id\": 0, \"Concrete Inputs\": [\"\", \"\", \"\", \"[2, 2]\", \"[3, 3]\", \"[1, 1]\", \\\n\"False\", \"[0, 0]\", \"1\", \"False\", \"False\", \"True\", \"True\"], \"Input type\": [\"float\", \"float\", \"\", \"ScalarList\", \\\n\"ScalarList\", \"ScalarList\", \"Scalar\", \"ScalarList\", \"Scalar\", \"Scalar\", \"Scalar\", \"Scalar\", \"Scalar\"], \"Input \\\nStrides\": [[150528, 1, 672, 3], [147, 1, 21, 3], [], [], [], [], [], [], [], [], [], [], []], \"Input Dims\": \\\n[[1, 3, 224, 224], [64, 3, 7, 7], [], [], [], [], [], [], [], [], [], [], []], \"Ev Idx\": 1340\n }\n }\n],\n \"traceName\": \"/tmp/compiled_module_profile.json\"\n}\n\"\"\"\n\n\ndef verify_flops(self, expected_flops, out_profile):\n j = 0\n for i in range(len(out_profile[\"traceEvents\"])):\n if \"kernel_flop\" in out_profile[\"traceEvents\"][i][\"args\"]:\n self.assertEqual(\n out_profile[\"traceEvents\"][i][\"args\"][\"kernel_flop\"],\n expected_flops[j],\n )\n j += 1\n\n\ndef random_tensor(size, dtype, **kwargs):\n if dtype in [torch.half, torch.bfloat16, torch.float, torch.double]:\n return torch.randn(size, dtype=dtype, **kwargs)\n elif dtype in [torch.uint8, torch.int8, torch.short, torch.int, torch.long]:\n return torch.randint(0, 100, size, dtype=dtype, **kwargs)\n else:\n raise ValueError(\"Unsupported data type\")\n\n\ndef cT(device, dtype):\n def T(*shape, requires_grad=False):\n return random_tensor(\n shape, requires_grad=requires_grad, device=device, dtype=dtype\n )\n\n return T\n\n\ndef FlopCounterMode(*args, **kwargs):\n return torch.utils.flop_counter.FlopCounterMode(*args, **kwargs, display=False)\n\n\nTMP_DIR = tempfile.mkdtemp()\n\n\ndef trace_files():\n TRACE1 = f\"{TMP_DIR}/trace1-{uuid.uuid4()}.json\"\n TRACE2 = f\"{TMP_DIR}/trace2-{uuid.uuid4()}.json\"\n return TRACE1, TRACE2\n\n\ndef omni_model(device, dtype, compile=True, addmm=True, bmm=True):\n T = cT(device, dtype)\n\n def model():\n input_conv = T(1, 3, 56, 56)\n conv_weight = T(12, 3, 5, 5)\n\n # Increased matrix sizes\n B = 8\n M = 256\n N = 512\n K = 768\n mat1 = T(M, N)\n mat2 = T(N, K)\n\n batch_mat1 = T(B, M, N)\n batch_mat2 = T(B, N, K)\n\n conv_output = F.conv2d(input_conv, conv_weight)\n\n conv_output = conv_output * 10\n mm_output = torch.mm(mat1, mat2)\n ret = [\n conv_output.flatten(),\n mm_output.flatten(),\n ]\n\n if addmm:\n addmm_output = torch.addmm(\n torch.zeros(mm_output.shape, device=mat1.device, dtype=mat1.dtype),\n mat1,\n mat2,\n )\n ret.append(addmm_output.flatten())\n\n if bmm:\n bmm_output = torch.bmm(batch_mat1, batch_mat2)\n ret.append(bmm_output.flatten())\n\n if bmm and addmm:\n baddbmm_output = torch.baddbmm(\n torch.zeros(\n 1,\n *mm_output.shape,\n device=batch_mat1.device,\n dtype=batch_mat1.dtype,\n ),\n batch_mat1,\n batch_mat2,\n )\n ret.append(baddbmm_output.flatten())\n\n return torch.cat(ret)\n\n if compile:\n return torch.compile(\n model, options={\"benchmark_kernel\": True, \"profile_bandwidth\": True}\n )\n return model\n\n\nprefix = [\"profile.py\"]\n\n\nclass TestUtils(TestCase):\n def test_tabulate2d(self):\n headers = [\"Kernel\", \"Self H100 TIME (ms)\", \"Count\", \"Percent\"]\n rows = [\n [\"aten::mm\", 0.500, 7, 0.0],\n [\"aten::bmm\", 0.400, 6, 0.0],\n [\"aten::baddbmm\", 0.300, 5, 0.0],\n [\"aten::convolution\", 0.200, 4, 0.0],\n [\"aten::cudnn_convolution\", 0.100, 3, 0.0],\n ]\n table = [\n \" Kernel | Self H100 TIME (ms) | Count | Percent \",\n \"-----------------------------------------------------------------\",\n \" aten::mm | 0.5 | 7 | 0.0 \",\n \" aten::bmm | 0.4 | 6 | 0.0 \",\n \" aten::baddbmm | 0.3 | 5 | 0.0 \",\n \" aten::convolution | 0.2 | 4 | 0.0 \",\n \" aten::cudnn_convolution | 0.1 | 3 | 0.0 \",\n ]\n res = tabulate_2d(rows, headers)\n for r, t in zip(res.split(\"\\n\"), table):\n self.assertEqual(r, t)\n\n def test_zip_dicts(self):\n d1 = {\"a\": 1, \"b\": 2}\n d2 = {\"a\": 3, \"c\": 4}\n res = zip_dicts(d1, d2, d1_default=\"foo\", d2_default=\"bar\")\n self.assertEqual(set(res), {(\"a\", 1, 3), (\"b\", 2, \"bar\"), (\"c\", \"foo\", 4)})\n res = zip_dicts(d1, d2)\n self.assertEqual(set(res), {(\"a\", 1, 3), (\"b\", 2, None), (\"c\", None, 4)})\n\n\nclass TestAnalysis(TestCase):\n @skipIf(not SM70OrLater, \"Requires sm70\")\n def test_noop(self):\n with (\n patch(\"sys.stdout\", new_callable=StringIO) as mock_stdout,\n patch(\"sys.argv\", [*prefix]),\n ):\n main()\n self.assertEqual(mock_stdout.getvalue(), \"\")\n\n @skipIf(not SM70OrLater, \"Requires sm70\")\n @dtypes(torch.float, torch.double, torch.float16)\n def test_diff(self, device, dtype):\n \"\"\"\n diff, testing out the nruns feature too.\n \"\"\"\n if device == \"cpu\":\n # TODO cpu support\n return\n om = omni_model(device, dtype)\n REPEAT = 5\n trace1, trace2 = trace_files()\n print(\"first trace\")\n torch._dynamo.reset() # reset the cache\n with fresh_inductor_cache():\n with torch.profiler.profile(record_shapes=True) as p:\n om()\n p.export_chrome_trace(trace1)\n\n print(\"second trace\")\n torch._dynamo.reset() # reset the cache\n with fresh_inductor_cache():\n with torch.profiler.profile(record_shapes=True) as p:\n for _ in range(REPEAT):\n om()\n p.export_chrome_trace(trace2)\n\n print(\"diffing...\")\n with patch(", + "repo_full_name": "pytorch/pytorch", + "discussion_comments": [ + { + "comment_id": "2105535986", + "repo_full_name": "pytorch/pytorch", + "pr_number": 149697, + "pr_file": "test/inductor/test_analysis.py", + "discussion_id": "2105535986", + "commented_code": "@@ -0,0 +1,601 @@\n+# Owner(s): [\"module: inductor\"]\n+\n+import json\n+import re\n+import tempfile\n+import unittest\n+import uuid\n+from io import StringIO\n+from unittest.mock import patch\n+\n+import torch\n+import torch.nn.functional as F\n+from torch._inductor.analysis.profile_analysis import (\n+ _augment_trace_helper,\n+ _create_extern_mapping,\n+ JsonProfile,\n+ main,\n+)\n+from torch._inductor.utils import (\n+ fresh_inductor_cache,\n+ run_and_get_code,\n+ tabulate_2d,\n+ zip_dicts,\n+)\n+from torch.testing._internal.common_cuda import SM70OrLater\n+from torch.testing._internal.common_device_type import (\n+ dtypes,\n+ instantiate_device_type_tests,\n+ skipIf,\n+)\n+from torch.testing._internal.common_utils import parametrize, run_tests, TestCase\n+from torch.testing._internal.inductor_utils import IS_BIG_GPU\n+\n+\n+example_profile = \"\"\"\n+{\n+ \"schemaVersion\": 1,\n+ \"deviceProperties\": [\n+ {\n+ \"id\": 0, \"name\": \"NVIDIA H100\", \"totalGlobalMem\": 101997215744,\n+ \"computeMajor\": 9, \"computeMinor\": 0,\n+ \"maxThreadsPerBlock\": 1024, \"maxThreadsPerMultiprocessor\": 2048,\n+ \"regsPerBlock\": 65536, \"warpSize\": 32,\n+ \"sharedMemPerBlock\": 49152, \"numSms\": 132\n+ , \"regsPerMultiprocessor\": 65536, \"sharedMemPerBlockOptin\": 232448, \"sharedMemPerMultiprocessor\": 233472\n+ }\n+ ],\n+ \"cupti_version\": 24,\n+ \"cuda_runtime_version\": 12060,\n+ \"with_flops\": 1,\n+ \"record_shapes\": 1,\n+ \"cuda_driver_version\": 12040,\n+ \"profile_memory\": 1,\n+ \"trace_id\": \"301995E163ED42048FBD783860E6E7DC\",\n+ \"displayTimeUnit\": \"ms\",\n+ \"baseTimeNanoseconds\": 1743521598000000000,\n+ \"traceEvents\": [\n+ {\n+ \"ph\": \"X\", \"cat\": \"cpu_op\", \"name\": \"aten::convolution\", \"pid\": 1147039, \"tid\": 1147039,\n+ \"ts\": 198093488368.463, \"dur\": 425.453,\n+ \"args\": {\n+ \"External id\": 1340,\"Sequence number\": 0, \"Fwd thread id\": 0, \"Record function id\": 0, \"Concrete Inputs\": \\\n+[\"\", \"\", \"\", \"[2, 2]\", \"[3, 3]\", \"[1, 1]\", \"False\", \"[0, 0]\", \"1\"], \"Input type\": [\"float\", \"float\", \"\", \\\n+\"ScalarList\", \"ScalarList\", \"ScalarList\", \"Scalar\", \"ScalarList\", \"Scalar\"], \"Input Strides\": [[150528, 1, 672, 3],\\\n+[147, 1, 21, 3], [], [], [], [], [], [], []], \"Input Dims\": [[1, 3, 224, 224], [64, 3, 7, 7], [], [], [], [], [], \\\n+[], []], \"Ev Idx\": 1339\n+ }\n+ },\n+ {\n+ \"ph\": \"X\", \"cat\": \"cpu_op\", \"name\": \"aten::_convolution\", \"pid\": 1147039, \"tid\": 1147039,\n+ \"ts\": 198093488444.498, \"dur\": 341.867,\n+ \"args\": {\n+ \"External id\": 1341,\"Record function id\": 0, \"Concrete Inputs\": [\"\", \"\", \"\", \"[2, 2]\", \"[3, 3]\", \"[1, 1]\",\\\n+ \"False\", \"[0, 0]\", \"1\", \"False\", \"False\", \"True\", \"True\"], \"Input type\": [\"float\", \"float\", \"\", \"ScalarList\",\\\n+ \"ScalarList\", \"ScalarList\", \"Scalar\", \"ScalarList\", \"Scalar\", \"Scalar\", \"Scalar\", \"Scalar\", \"Scalar\"], \"Input Strides\": \\\n+[[150528, 1, 672, 3], [147, 1, 21, 3], [], [], [], [], [], [], [], [], [], [], []], \"Input Dims\": [[1, 3, 224, 224], \\\n+[64, 3, 7, 7], [], [], [], [], [], [], [], [], [], [], []], \"Ev Idx\": 1340\n+ }\n+ },\n+ {\n+ \"ph\": \"X\", \"cat\": \"cpu_op\", \"name\": \"aten::addmm\", \"pid\": 1147039, \"tid\": 1147039,\n+ \"ts\": 198093513655.849, \"dur\": 251.130,\n+ \"args\": {\n+ \"External id\": 1619,\"Sequence number\": 0, \"Fwd thread id\": 0, \"Record function id\": 0, \"Concrete Inputs\": \\\n+[\"\", \"\", \"\", \"1\", \"1\", \"\"], \"Input type\": [\"float\", \"float\", \"float\", \"Scalar\", \"Scalar\", \"float\"], \"Input Strides\":\\\n+ [[1], [0, 1], [1, 2048], [], [], [1000, 1]], \"Input Dims\": [[1000], [1, 2048], [2048, 1000], [], [], [1, 1000]], \\\n+\"Ev Idx\": 1618\n+ }\n+ },\n+ {\n+ \"ph\": \"X\", \"cat\": \"kernel\", \"name\": \"void cutlass_addmm\", \"pid\": 1147039, \"tid\": 1147039,\n+ \"ts\": 198093513655.849, \"dur\": 251.130,\n+ \"args\": {\n+ \"External id\": 1619,\"Sequence number\": 0, \"Fwd thread id\": 0, \"Record function id\": 0, \"Ev Idx\": 1618\n+ }\n+ },\n+ {\n+ \"ph\": \"X\", \"cat\": \"kernel\", \"name\": \"void convolution_kernel\", \"pid\": 1147039, \"tid\": 1147039,\n+ \"ts\": 198093513655.849, \"dur\": 200.130,\n+ \"args\": {\n+ \"External id\": 1342, \"Sequence number\": 0, \"Fwd thread id\": 0, \"Record function id\": 0, \"Ev Idx\": 1618\n+ }\n+ },\n+ {\n+ \"ph\": \"X\", \"cat\": \"cpu_op\", \"name\": \"aten::convolution\", \"pid\": 1147039, \"tid\": 1147039,\n+ \"ts\": 198093488444.498, \"dur\": 341.867,\n+ \"args\": {\n+ \"External id\": 1342,\"Record function id\": 0, \"Concrete Inputs\": [\"\", \"\", \"\", \"[2, 2]\", \"[3, 3]\", \"[1, 1]\", \\\n+\"False\", \"[0, 0]\", \"1\", \"False\", \"False\", \"True\", \"True\"], \"Input type\": [\"float\", \"float\", \"\", \"ScalarList\", \\\n+\"ScalarList\", \"ScalarList\", \"Scalar\", \"ScalarList\", \"Scalar\", \"Scalar\", \"Scalar\", \"Scalar\", \"Scalar\"], \"Input \\\n+Strides\": [[150528, 1, 672, 3], [147, 1, 21, 3], [], [], [], [], [], [], [], [], [], [], []], \"Input Dims\": \\\n+[[1, 3, 224, 224], [64, 3, 7, 7], [], [], [], [], [], [], [], [], [], [], []], \"Ev Idx\": 1340\n+ }\n+ }\n+],\n+ \"traceName\": \"/tmp/compiled_module_profile.json\"\n+}\n+\"\"\"\n+\n+\n+def verify_flops(self, expected_flops, out_profile):\n+ j = 0\n+ for i in range(len(out_profile[\"traceEvents\"])):\n+ if \"kernel_flop\" in out_profile[\"traceEvents\"][i][\"args\"]:\n+ self.assertEqual(\n+ out_profile[\"traceEvents\"][i][\"args\"][\"kernel_flop\"],\n+ expected_flops[j],\n+ )\n+ j += 1\n+\n+\n+def random_tensor(size, dtype, **kwargs):\n+ if dtype in [torch.half, torch.bfloat16, torch.float, torch.double]:\n+ return torch.randn(size, dtype=dtype, **kwargs)\n+ elif dtype in [torch.uint8, torch.int8, torch.short, torch.int, torch.long]:\n+ return torch.randint(0, 100, size, dtype=dtype, **kwargs)\n+ else:\n+ raise ValueError(\"Unsupported data type\")\n+\n+\n+def cT(device, dtype):\n+ def T(*shape, requires_grad=False):\n+ return random_tensor(\n+ shape, requires_grad=requires_grad, device=device, dtype=dtype\n+ )\n+\n+ return T\n+\n+\n+def FlopCounterMode(*args, **kwargs):\n+ return torch.utils.flop_counter.FlopCounterMode(*args, **kwargs, display=False)\n+\n+\n+TMP_DIR = tempfile.mkdtemp()\n+\n+\n+def trace_files():\n+ TRACE1 = f\"{TMP_DIR}/trace1-{uuid.uuid4()}.json\"\n+ TRACE2 = f\"{TMP_DIR}/trace2-{uuid.uuid4()}.json\"\n+ return TRACE1, TRACE2\n+\n+\n+def omni_model(device, dtype, compile=True, addmm=True, bmm=True):\n+ T = cT(device, dtype)\n+\n+ def model():\n+ input_conv = T(1, 3, 56, 56)\n+ conv_weight = T(12, 3, 5, 5)\n+\n+ # Increased matrix sizes\n+ B = 8\n+ M = 256\n+ N = 512\n+ K = 768\n+ mat1 = T(M, N)\n+ mat2 = T(N, K)\n+\n+ batch_mat1 = T(B, M, N)\n+ batch_mat2 = T(B, N, K)\n+\n+ conv_output = F.conv2d(input_conv, conv_weight)\n+\n+ conv_output = conv_output * 10\n+ mm_output = torch.mm(mat1, mat2)\n+ ret = [\n+ conv_output.flatten(),\n+ mm_output.flatten(),\n+ ]\n+\n+ if addmm:\n+ addmm_output = torch.addmm(\n+ torch.zeros(mm_output.shape, device=mat1.device, dtype=mat1.dtype),\n+ mat1,\n+ mat2,\n+ )\n+ ret.append(addmm_output.flatten())\n+\n+ if bmm:\n+ bmm_output = torch.bmm(batch_mat1, batch_mat2)\n+ ret.append(bmm_output.flatten())\n+\n+ if bmm and addmm:\n+ baddbmm_output = torch.baddbmm(\n+ torch.zeros(\n+ 1,\n+ *mm_output.shape,\n+ device=batch_mat1.device,\n+ dtype=batch_mat1.dtype,\n+ ),\n+ batch_mat1,\n+ batch_mat2,\n+ )\n+ ret.append(baddbmm_output.flatten())\n+\n+ return torch.cat(ret)\n+\n+ if compile:\n+ return torch.compile(\n+ model, options={\"benchmark_kernel\": True, \"profile_bandwidth\": True}\n+ )\n+ return model\n+\n+\n+prefix = [\"profile.py\"]\n+\n+\n+class TestUtils(TestCase):\n+ def test_tabulate2d(self):\n+ headers = [\"Kernel\", \"Self H100 TIME (ms)\", \"Count\", \"Percent\"]\n+ rows = [\n+ [\"aten::mm\", 0.500, 7, 0.0],\n+ [\"aten::bmm\", 0.400, 6, 0.0],\n+ [\"aten::baddbmm\", 0.300, 5, 0.0],\n+ [\"aten::convolution\", 0.200, 4, 0.0],\n+ [\"aten::cudnn_convolution\", 0.100, 3, 0.0],\n+ ]\n+ table = [\n+ \" Kernel | Self H100 TIME (ms) | Count | Percent \",\n+ \"-----------------------------------------------------------------\",\n+ \" aten::mm | 0.5 | 7 | 0.0 \",\n+ \" aten::bmm | 0.4 | 6 | 0.0 \",\n+ \" aten::baddbmm | 0.3 | 5 | 0.0 \",\n+ \" aten::convolution | 0.2 | 4 | 0.0 \",\n+ \" aten::cudnn_convolution | 0.1 | 3 | 0.0 \",\n+ ]\n+ res = tabulate_2d(rows, headers)\n+ for r, t in zip(res.split(\"\\n\"), table):\n+ self.assertEqual(r, t)\n+\n+ def test_zip_dicts(self):\n+ d1 = {\"a\": 1, \"b\": 2}\n+ d2 = {\"a\": 3, \"c\": 4}\n+ res = zip_dicts(d1, d2, d1_default=\"foo\", d2_default=\"bar\")\n+ self.assertEqual(set(res), {(\"a\", 1, 3), (\"b\", 2, \"bar\"), (\"c\", \"foo\", 4)})\n+ res = zip_dicts(d1, d2)\n+ self.assertEqual(set(res), {(\"a\", 1, 3), (\"b\", 2, None), (\"c\", None, 4)})\n+\n+\n+class TestAnalysis(TestCase):\n+ @skipIf(not SM70OrLater, \"Requires sm70\")\n+ def test_noop(self):\n+ with (\n+ patch(\"sys.stdout\", new_callable=StringIO) as mock_stdout,\n+ patch(\"sys.argv\", [*prefix]),\n+ ):\n+ main()\n+ self.assertEqual(mock_stdout.getvalue(), \"\")\n+\n+ @skipIf(not SM70OrLater, \"Requires sm70\")\n+ @dtypes(torch.float, torch.double, torch.float16)\n+ def test_diff(self, device, dtype):\n+ \"\"\"\n+ diff, testing out the nruns feature too.\n+ \"\"\"\n+ if device == \"cpu\":\n+ # TODO cpu support\n+ return\n+ om = omni_model(device, dtype)\n+ REPEAT = 5\n+ trace1, trace2 = trace_files()\n+ print(\"first trace\")\n+ torch._dynamo.reset() # reset the cache\n+ with fresh_inductor_cache():\n+ with torch.profiler.profile(record_shapes=True) as p:\n+ om()\n+ p.export_chrome_trace(trace1)\n+\n+ print(\"second trace\")\n+ torch._dynamo.reset() # reset the cache\n+ with fresh_inductor_cache():\n+ with torch.profiler.profile(record_shapes=True) as p:\n+ for _ in range(REPEAT):\n+ om()\n+ p.export_chrome_trace(trace2)\n+\n+ print(\"diffing...\")\n+ with patch(", + "comment_created_at": "2025-05-24T00:01:29+00:00", + "comment_author": "eellison", + "comment_body": "I ran this test and I see all the achieved bandwidth being < .10. this seems wrong - can we double check the numbers. I think i mentioned this in our 1-1 but anything <.01 or > 1 is probably off.", + "pr_file_module": null + }, + { + "comment_id": "2117123016", + "repo_full_name": "pytorch/pytorch", + "pr_number": 149697, + "pr_file": "test/inductor/test_analysis.py", + "discussion_id": "2105535986", + "commented_code": "@@ -0,0 +1,601 @@\n+# Owner(s): [\"module: inductor\"]\n+\n+import json\n+import re\n+import tempfile\n+import unittest\n+import uuid\n+from io import StringIO\n+from unittest.mock import patch\n+\n+import torch\n+import torch.nn.functional as F\n+from torch._inductor.analysis.profile_analysis import (\n+ _augment_trace_helper,\n+ _create_extern_mapping,\n+ JsonProfile,\n+ main,\n+)\n+from torch._inductor.utils import (\n+ fresh_inductor_cache,\n+ run_and_get_code,\n+ tabulate_2d,\n+ zip_dicts,\n+)\n+from torch.testing._internal.common_cuda import SM70OrLater\n+from torch.testing._internal.common_device_type import (\n+ dtypes,\n+ instantiate_device_type_tests,\n+ skipIf,\n+)\n+from torch.testing._internal.common_utils import parametrize, run_tests, TestCase\n+from torch.testing._internal.inductor_utils import IS_BIG_GPU\n+\n+\n+example_profile = \"\"\"\n+{\n+ \"schemaVersion\": 1,\n+ \"deviceProperties\": [\n+ {\n+ \"id\": 0, \"name\": \"NVIDIA H100\", \"totalGlobalMem\": 101997215744,\n+ \"computeMajor\": 9, \"computeMinor\": 0,\n+ \"maxThreadsPerBlock\": 1024, \"maxThreadsPerMultiprocessor\": 2048,\n+ \"regsPerBlock\": 65536, \"warpSize\": 32,\n+ \"sharedMemPerBlock\": 49152, \"numSms\": 132\n+ , \"regsPerMultiprocessor\": 65536, \"sharedMemPerBlockOptin\": 232448, \"sharedMemPerMultiprocessor\": 233472\n+ }\n+ ],\n+ \"cupti_version\": 24,\n+ \"cuda_runtime_version\": 12060,\n+ \"with_flops\": 1,\n+ \"record_shapes\": 1,\n+ \"cuda_driver_version\": 12040,\n+ \"profile_memory\": 1,\n+ \"trace_id\": \"301995E163ED42048FBD783860E6E7DC\",\n+ \"displayTimeUnit\": \"ms\",\n+ \"baseTimeNanoseconds\": 1743521598000000000,\n+ \"traceEvents\": [\n+ {\n+ \"ph\": \"X\", \"cat\": \"cpu_op\", \"name\": \"aten::convolution\", \"pid\": 1147039, \"tid\": 1147039,\n+ \"ts\": 198093488368.463, \"dur\": 425.453,\n+ \"args\": {\n+ \"External id\": 1340,\"Sequence number\": 0, \"Fwd thread id\": 0, \"Record function id\": 0, \"Concrete Inputs\": \\\n+[\"\", \"\", \"\", \"[2, 2]\", \"[3, 3]\", \"[1, 1]\", \"False\", \"[0, 0]\", \"1\"], \"Input type\": [\"float\", \"float\", \"\", \\\n+\"ScalarList\", \"ScalarList\", \"ScalarList\", \"Scalar\", \"ScalarList\", \"Scalar\"], \"Input Strides\": [[150528, 1, 672, 3],\\\n+[147, 1, 21, 3], [], [], [], [], [], [], []], \"Input Dims\": [[1, 3, 224, 224], [64, 3, 7, 7], [], [], [], [], [], \\\n+[], []], \"Ev Idx\": 1339\n+ }\n+ },\n+ {\n+ \"ph\": \"X\", \"cat\": \"cpu_op\", \"name\": \"aten::_convolution\", \"pid\": 1147039, \"tid\": 1147039,\n+ \"ts\": 198093488444.498, \"dur\": 341.867,\n+ \"args\": {\n+ \"External id\": 1341,\"Record function id\": 0, \"Concrete Inputs\": [\"\", \"\", \"\", \"[2, 2]\", \"[3, 3]\", \"[1, 1]\",\\\n+ \"False\", \"[0, 0]\", \"1\", \"False\", \"False\", \"True\", \"True\"], \"Input type\": [\"float\", \"float\", \"\", \"ScalarList\",\\\n+ \"ScalarList\", \"ScalarList\", \"Scalar\", \"ScalarList\", \"Scalar\", \"Scalar\", \"Scalar\", \"Scalar\", \"Scalar\"], \"Input Strides\": \\\n+[[150528, 1, 672, 3], [147, 1, 21, 3], [], [], [], [], [], [], [], [], [], [], []], \"Input Dims\": [[1, 3, 224, 224], \\\n+[64, 3, 7, 7], [], [], [], [], [], [], [], [], [], [], []], \"Ev Idx\": 1340\n+ }\n+ },\n+ {\n+ \"ph\": \"X\", \"cat\": \"cpu_op\", \"name\": \"aten::addmm\", \"pid\": 1147039, \"tid\": 1147039,\n+ \"ts\": 198093513655.849, \"dur\": 251.130,\n+ \"args\": {\n+ \"External id\": 1619,\"Sequence number\": 0, \"Fwd thread id\": 0, \"Record function id\": 0, \"Concrete Inputs\": \\\n+[\"\", \"\", \"\", \"1\", \"1\", \"\"], \"Input type\": [\"float\", \"float\", \"float\", \"Scalar\", \"Scalar\", \"float\"], \"Input Strides\":\\\n+ [[1], [0, 1], [1, 2048], [], [], [1000, 1]], \"Input Dims\": [[1000], [1, 2048], [2048, 1000], [], [], [1, 1000]], \\\n+\"Ev Idx\": 1618\n+ }\n+ },\n+ {\n+ \"ph\": \"X\", \"cat\": \"kernel\", \"name\": \"void cutlass_addmm\", \"pid\": 1147039, \"tid\": 1147039,\n+ \"ts\": 198093513655.849, \"dur\": 251.130,\n+ \"args\": {\n+ \"External id\": 1619,\"Sequence number\": 0, \"Fwd thread id\": 0, \"Record function id\": 0, \"Ev Idx\": 1618\n+ }\n+ },\n+ {\n+ \"ph\": \"X\", \"cat\": \"kernel\", \"name\": \"void convolution_kernel\", \"pid\": 1147039, \"tid\": 1147039,\n+ \"ts\": 198093513655.849, \"dur\": 200.130,\n+ \"args\": {\n+ \"External id\": 1342, \"Sequence number\": 0, \"Fwd thread id\": 0, \"Record function id\": 0, \"Ev Idx\": 1618\n+ }\n+ },\n+ {\n+ \"ph\": \"X\", \"cat\": \"cpu_op\", \"name\": \"aten::convolution\", \"pid\": 1147039, \"tid\": 1147039,\n+ \"ts\": 198093488444.498, \"dur\": 341.867,\n+ \"args\": {\n+ \"External id\": 1342,\"Record function id\": 0, \"Concrete Inputs\": [\"\", \"\", \"\", \"[2, 2]\", \"[3, 3]\", \"[1, 1]\", \\\n+\"False\", \"[0, 0]\", \"1\", \"False\", \"False\", \"True\", \"True\"], \"Input type\": [\"float\", \"float\", \"\", \"ScalarList\", \\\n+\"ScalarList\", \"ScalarList\", \"Scalar\", \"ScalarList\", \"Scalar\", \"Scalar\", \"Scalar\", \"Scalar\", \"Scalar\"], \"Input \\\n+Strides\": [[150528, 1, 672, 3], [147, 1, 21, 3], [], [], [], [], [], [], [], [], [], [], []], \"Input Dims\": \\\n+[[1, 3, 224, 224], [64, 3, 7, 7], [], [], [], [], [], [], [], [], [], [], []], \"Ev Idx\": 1340\n+ }\n+ }\n+],\n+ \"traceName\": \"/tmp/compiled_module_profile.json\"\n+}\n+\"\"\"\n+\n+\n+def verify_flops(self, expected_flops, out_profile):\n+ j = 0\n+ for i in range(len(out_profile[\"traceEvents\"])):\n+ if \"kernel_flop\" in out_profile[\"traceEvents\"][i][\"args\"]:\n+ self.assertEqual(\n+ out_profile[\"traceEvents\"][i][\"args\"][\"kernel_flop\"],\n+ expected_flops[j],\n+ )\n+ j += 1\n+\n+\n+def random_tensor(size, dtype, **kwargs):\n+ if dtype in [torch.half, torch.bfloat16, torch.float, torch.double]:\n+ return torch.randn(size, dtype=dtype, **kwargs)\n+ elif dtype in [torch.uint8, torch.int8, torch.short, torch.int, torch.long]:\n+ return torch.randint(0, 100, size, dtype=dtype, **kwargs)\n+ else:\n+ raise ValueError(\"Unsupported data type\")\n+\n+\n+def cT(device, dtype):\n+ def T(*shape, requires_grad=False):\n+ return random_tensor(\n+ shape, requires_grad=requires_grad, device=device, dtype=dtype\n+ )\n+\n+ return T\n+\n+\n+def FlopCounterMode(*args, **kwargs):\n+ return torch.utils.flop_counter.FlopCounterMode(*args, **kwargs, display=False)\n+\n+\n+TMP_DIR = tempfile.mkdtemp()\n+\n+\n+def trace_files():\n+ TRACE1 = f\"{TMP_DIR}/trace1-{uuid.uuid4()}.json\"\n+ TRACE2 = f\"{TMP_DIR}/trace2-{uuid.uuid4()}.json\"\n+ return TRACE1, TRACE2\n+\n+\n+def omni_model(device, dtype, compile=True, addmm=True, bmm=True):\n+ T = cT(device, dtype)\n+\n+ def model():\n+ input_conv = T(1, 3, 56, 56)\n+ conv_weight = T(12, 3, 5, 5)\n+\n+ # Increased matrix sizes\n+ B = 8\n+ M = 256\n+ N = 512\n+ K = 768\n+ mat1 = T(M, N)\n+ mat2 = T(N, K)\n+\n+ batch_mat1 = T(B, M, N)\n+ batch_mat2 = T(B, N, K)\n+\n+ conv_output = F.conv2d(input_conv, conv_weight)\n+\n+ conv_output = conv_output * 10\n+ mm_output = torch.mm(mat1, mat2)\n+ ret = [\n+ conv_output.flatten(),\n+ mm_output.flatten(),\n+ ]\n+\n+ if addmm:\n+ addmm_output = torch.addmm(\n+ torch.zeros(mm_output.shape, device=mat1.device, dtype=mat1.dtype),\n+ mat1,\n+ mat2,\n+ )\n+ ret.append(addmm_output.flatten())\n+\n+ if bmm:\n+ bmm_output = torch.bmm(batch_mat1, batch_mat2)\n+ ret.append(bmm_output.flatten())\n+\n+ if bmm and addmm:\n+ baddbmm_output = torch.baddbmm(\n+ torch.zeros(\n+ 1,\n+ *mm_output.shape,\n+ device=batch_mat1.device,\n+ dtype=batch_mat1.dtype,\n+ ),\n+ batch_mat1,\n+ batch_mat2,\n+ )\n+ ret.append(baddbmm_output.flatten())\n+\n+ return torch.cat(ret)\n+\n+ if compile:\n+ return torch.compile(\n+ model, options={\"benchmark_kernel\": True, \"profile_bandwidth\": True}\n+ )\n+ return model\n+\n+\n+prefix = [\"profile.py\"]\n+\n+\n+class TestUtils(TestCase):\n+ def test_tabulate2d(self):\n+ headers = [\"Kernel\", \"Self H100 TIME (ms)\", \"Count\", \"Percent\"]\n+ rows = [\n+ [\"aten::mm\", 0.500, 7, 0.0],\n+ [\"aten::bmm\", 0.400, 6, 0.0],\n+ [\"aten::baddbmm\", 0.300, 5, 0.0],\n+ [\"aten::convolution\", 0.200, 4, 0.0],\n+ [\"aten::cudnn_convolution\", 0.100, 3, 0.0],\n+ ]\n+ table = [\n+ \" Kernel | Self H100 TIME (ms) | Count | Percent \",\n+ \"-----------------------------------------------------------------\",\n+ \" aten::mm | 0.5 | 7 | 0.0 \",\n+ \" aten::bmm | 0.4 | 6 | 0.0 \",\n+ \" aten::baddbmm | 0.3 | 5 | 0.0 \",\n+ \" aten::convolution | 0.2 | 4 | 0.0 \",\n+ \" aten::cudnn_convolution | 0.1 | 3 | 0.0 \",\n+ ]\n+ res = tabulate_2d(rows, headers)\n+ for r, t in zip(res.split(\"\\n\"), table):\n+ self.assertEqual(r, t)\n+\n+ def test_zip_dicts(self):\n+ d1 = {\"a\": 1, \"b\": 2}\n+ d2 = {\"a\": 3, \"c\": 4}\n+ res = zip_dicts(d1, d2, d1_default=\"foo\", d2_default=\"bar\")\n+ self.assertEqual(set(res), {(\"a\", 1, 3), (\"b\", 2, \"bar\"), (\"c\", \"foo\", 4)})\n+ res = zip_dicts(d1, d2)\n+ self.assertEqual(set(res), {(\"a\", 1, 3), (\"b\", 2, None), (\"c\", None, 4)})\n+\n+\n+class TestAnalysis(TestCase):\n+ @skipIf(not SM70OrLater, \"Requires sm70\")\n+ def test_noop(self):\n+ with (\n+ patch(\"sys.stdout\", new_callable=StringIO) as mock_stdout,\n+ patch(\"sys.argv\", [*prefix]),\n+ ):\n+ main()\n+ self.assertEqual(mock_stdout.getvalue(), \"\")\n+\n+ @skipIf(not SM70OrLater, \"Requires sm70\")\n+ @dtypes(torch.float, torch.double, torch.float16)\n+ def test_diff(self, device, dtype):\n+ \"\"\"\n+ diff, testing out the nruns feature too.\n+ \"\"\"\n+ if device == \"cpu\":\n+ # TODO cpu support\n+ return\n+ om = omni_model(device, dtype)\n+ REPEAT = 5\n+ trace1, trace2 = trace_files()\n+ print(\"first trace\")\n+ torch._dynamo.reset() # reset the cache\n+ with fresh_inductor_cache():\n+ with torch.profiler.profile(record_shapes=True) as p:\n+ om()\n+ p.export_chrome_trace(trace1)\n+\n+ print(\"second trace\")\n+ torch._dynamo.reset() # reset the cache\n+ with fresh_inductor_cache():\n+ with torch.profiler.profile(record_shapes=True) as p:\n+ for _ in range(REPEAT):\n+ om()\n+ p.export_chrome_trace(trace2)\n+\n+ print(\"diffing...\")\n+ with patch(", + "comment_created_at": "2025-05-31T03:08:54+00:00", + "comment_author": "exclamaforte", + "comment_body": "I think it looks good now!\r\n```\r\n Kernel Name | foo Kernel Count | foo FLOPS | foo Kernel Reads (GB) | foo Dur (us) | foo Achieved FLOPS % | foo Achieved Bandwidth % | bar Kernel Count | bar FLOPS | bar Kernel Reads (GB) | bar Dur (us) | bar Achieved FLOPS % | bar Achieved Bandwidth % \r\n--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------\r\n void at::native::vectorized_el | 141 | 0 | 2291.3034663102667 | 27.48305673758865 | 0 | 68.39711839732142 | 140 | 0 | 2296.773433304663 | 27.408307142857144 | 0 | 68.56040099416907 \r\n triton_poi_fused_cat_8 | 78 | 0 | 1574.7606227700633 | 9.219589743589747 | 0 | 47.007779784181 | 78 | 0 | 1562.5378211858626 | 9.225320512820513 | 0 | 46.64292003539889 \r\n triton_poi_fused_zeros_4 | 20 | 0 | 252.8315785679245 | 1.5600000000000003 | 0 | 7.547211300535059 | 22 | 0 | 247.92602043168762 | 1.591227272727273 | 0 | 7.4007767293041065 \r\n nvjet_hsh_64x32_64x16_2x4_v_ba | 1 | 44306028169014.09 | 360.5633802816902 | 4.544 | 2.238808901920874 | 10.763085978557916 | 4 | 50766875540146.125 | 413.14189078894964 | 3.992 | 2.5652792086986422 | 12.332593754894019 \r\n triton_poi_fused_randn_5 | 22 | 0 | 272.44196070060616 | 7.698772727272726 | 0 | 8.13259584180914 | 24 | 0 | 272.03694126279845 | 7.7105 | 0 | 8.120505709337268 \r\n triton_poi_fused_randn_6 | 30 | 0 | 328.2370610588902 | 19.173099999999998 | 0 | 9.798121225638516 | 35 | 0 | 327.5356462783756 | 19.21434285714286 | 0 | 9.777183470996286 \r\n nvjet_hsh_96x128_64x6_2x1_v_bz | 1 | 227745013574660.62 | 1779.2579185520362 | 7.072 | 11.508085577294626 | 53.112176673195115 | 5 | 188095526438492.2 | 1469.4963003007203 | 8.633600000000001 | 9.504574352627197 | 43.86556120300658 \r\n triton_poi_fused_zeros_7 | 21 | 0 | 253.7686410608481 | 1.5543333333333336 | 0 | 7.575183315249197 | 22 | 0 | 249.15748848030734 | 1.5841363636363637 | 0 | 7.437536969561413 \r\n void at::native::elementwise_k | 3 | 0 | 194.78879307640364 | 3.936 | 0 | 5.814590838101601 | 13 | 0 | 181.2755167170482 | 3.7068461538461532 | 0 | 5.411209454240244 \r\n nvjet_hsh_96x128_64x6_2x1_v_ba | 1 | 167772160000000.0 | 1365.3333333333333 | 9.6 | 8.477623041940374 | 40.75621890547263 | 5 | 253727683090235.0 | 2064.84117098173 | 6.5405999999999995 | 12.82100470390273 | 61.63704988005164 \r\n triton_poi_fused_randn_1 | 86 | 0 | 0.6861092067063755 | 2.8322325581395353 | 0 | 0.020480871841981354 | 91 | 0 | 0.7055919536190854 | 2.7624175824175823 | 0 | 0.02106244637668912 \r\n void at::native::vectorized_el | 2 | 0 | 9.567026991614258 | 1.568 | 0 | 0.28558289527206737 | 7 | 0 | 15.017761845823944 | 1.5178571428571428 | 0 | 0.44829139838280435 \r\n void at::native::im2col_kernel | 1 | 551086956521.739 | 275.64538043478257 | 8.832 | 0.02784673858118944 | 8.228220311486046 | 3 | 540686595521.0607 | 270.4432767907672 | 9.002666666666668 | 0.027321202401266332 | 8.07293363554529 \r\n void cutlass::Kernel2 list[BaseConfig]:\n import torch\n\n pruned_configs = []\n for gemm_config in configs:\n device = torch.cuda.current_device()\n props = torch.cuda.get_device_properties(device)\n sm_available = props.shared_memory_per_block_optin # type: ignore[attr-defined]\n NUM_REG = 255\n\n acc_regs = math.ceil(\n gemm_config.block_m * gemm_config.block_n / (gemm_config.num_warps * 32)\n )\n\n shared_mem_accum = dtype_size * (\n gemm_config.block_m * gemm_config.block_k\n + gemm_config.block_n * gemm_config.block_k\n )\n\n # Will use more shared memory than available\n if shared_mem_accum * gemm_config.num_stages > sm_available:\n continue\n # Lower bound for register spillage, if exceeds the kernel will certainly spill\n elif acc_regs > NUM_REG:\n continue", + "repo_full_name": "pytorch/pytorch", + "discussion_comments": [ + { + "comment_id": "2176455141", + "repo_full_name": "pytorch/pytorch", + "pr_number": 156610, + "pr_file": "torch/_inductor/template_heuristics.py", + "discussion_id": "2176455141", + "commented_code": "@@ -519,6 +520,40 @@ def _scale_mm_configs(\n \n return scaled_configs\n \n+ def _prune_exhaustive_configs(\n+ self,\n+ configs: list[BaseConfig],\n+ dtype_size: int,\n+ ) -> list[BaseConfig]:\n+ import torch\n+\n+ pruned_configs = []\n+ for gemm_config in configs:\n+ device = torch.cuda.current_device()\n+ props = torch.cuda.get_device_properties(device)\n+ sm_available = props.shared_memory_per_block_optin # type: ignore[attr-defined]\n+ NUM_REG = 255\n+\n+ acc_regs = math.ceil(\n+ gemm_config.block_m * gemm_config.block_n / (gemm_config.num_warps * 32)\n+ )\n+\n+ shared_mem_accum = dtype_size * (\n+ gemm_config.block_m * gemm_config.block_k\n+ + gemm_config.block_n * gemm_config.block_k\n+ )\n+\n+ # Will use more shared memory than available\n+ if shared_mem_accum * gemm_config.num_stages > sm_available:\n+ continue\n+ # Lower bound for register spillage, if exceeds the kernel will certainly spill\n+ elif acc_regs > NUM_REG:\n+ continue", + "comment_created_at": "2025-07-01T05:23:57+00:00", + "comment_author": "nmacchioni", + "comment_body": "If I understand correctly this prunes all spilling config, are we sure we want to do this? Spilled registers does not necessarily mean that performance is bad, although we could potentially prune when # spilled >> some metric.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2165669923", + "pr_number": 156785, + "pr_file": "torch/_inductor/kernel/mm.py", + "created_at": "2025-06-25T04:13:25+00:00", + "commented_code": "mm_configs = V.choices.get_base_mm_configs(device_type)\n persistent_mm_configs = V.choices.get_persistent_mm_configs(device_type)\n extra_mm_configs = V.choices.get_extra_mm_configs(device_type)\n lookup_dict = get_lookup_table([mat1, mat2], name)", + "repo_full_name": "pytorch/pytorch", + "discussion_comments": [ + { + "comment_id": "2165669923", + "repo_full_name": "pytorch/pytorch", + "pr_number": 156785, + "pr_file": "torch/_inductor/kernel/mm.py", + "discussion_id": "2165669923", + "commented_code": "@@ -696,14 +699,18 @@ def tuned_mm(mat1, mat2, *, layout=None):\n mm_configs = V.choices.get_base_mm_configs(device_type)\n persistent_mm_configs = V.choices.get_persistent_mm_configs(device_type)\n extra_mm_configs = V.choices.get_extra_mm_configs(device_type)\n+ lookup_dict = get_lookup_table([mat1, mat2], name)", + "comment_created_at": "2025-06-25T04:13:25+00:00", + "comment_author": "jansel", + "comment_body": "Do we need to take into account the epilogue in the lookup table?", + "pr_file_module": null + }, + { + "comment_id": "2165867537", + "repo_full_name": "pytorch/pytorch", + "pr_number": 156785, + "pr_file": "torch/_inductor/kernel/mm.py", + "discussion_id": "2165669923", + "commented_code": "@@ -696,14 +699,18 @@ def tuned_mm(mat1, mat2, *, layout=None):\n mm_configs = V.choices.get_base_mm_configs(device_type)\n persistent_mm_configs = V.choices.get_persistent_mm_configs(device_type)\n extra_mm_configs = V.choices.get_extra_mm_configs(device_type)\n+ lookup_dict = get_lookup_table([mat1, mat2], name)", + "comment_created_at": "2025-06-25T06:11:01+00:00", + "comment_author": "coconutruben", + "comment_body": "IIUC no. This is if you mean the template epilogue (e.g. addmm) because the table is function specific right now, so config e.g. for addmm will be picked specifically for addmm, and is found in the lookup table under the addmm key.", + "pr_file_module": null + }, + { + "comment_id": "2166929857", + "repo_full_name": "pytorch/pytorch", + "pr_number": 156785, + "pr_file": "torch/_inductor/kernel/mm.py", + "discussion_id": "2165669923", + "commented_code": "@@ -696,14 +699,18 @@ def tuned_mm(mat1, mat2, *, layout=None):\n mm_configs = V.choices.get_base_mm_configs(device_type)\n persistent_mm_configs = V.choices.get_persistent_mm_configs(device_type)\n extra_mm_configs = V.choices.get_extra_mm_configs(device_type)\n+ lookup_dict = get_lookup_table([mat1, mat2], name)", + "comment_created_at": "2025-06-25T14:50:42+00:00", + "comment_author": "jansel", + "comment_body": "But doesn't the autotuning measure using the epilogue? I recall @eellison added that. This can affect the performance (sometimes a lot of the epilogue causes register spills).", + "pr_file_module": null + }, + { + "comment_id": "2167362851", + "repo_full_name": "pytorch/pytorch", + "pr_number": 156785, + "pr_file": "torch/_inductor/kernel/mm.py", + "discussion_id": "2165669923", + "commented_code": "@@ -696,14 +699,18 @@ def tuned_mm(mat1, mat2, *, layout=None):\n mm_configs = V.choices.get_base_mm_configs(device_type)\n persistent_mm_configs = V.choices.get_persistent_mm_configs(device_type)\n extra_mm_configs = V.choices.get_extra_mm_configs(device_type)\n+ lookup_dict = get_lookup_table([mat1, mat2], name)", + "comment_created_at": "2025-06-25T18:36:37+00:00", + "comment_author": "coconutruben", + "comment_body": "IIUC what @eellison explained to me this should be safe because it happens in two stages. We first find the best config (here) and then we benchmark using that best config + epilogue vs not fusing the epilogue. So in this stage we're not interfering with that decision. We don't do a NxN comparison of configs + epilogue vs configs", + "pr_file_module": null + }, + { + "comment_id": "2167450658", + "repo_full_name": "pytorch/pytorch", + "pr_number": 156785, + "pr_file": "torch/_inductor/kernel/mm.py", + "discussion_id": "2165669923", + "commented_code": "@@ -696,14 +699,18 @@ def tuned_mm(mat1, mat2, *, layout=None):\n mm_configs = V.choices.get_base_mm_configs(device_type)\n persistent_mm_configs = V.choices.get_persistent_mm_configs(device_type)\n extra_mm_configs = V.choices.get_extra_mm_configs(device_type)\n+ lookup_dict = get_lookup_table([mat1, mat2], name)", + "comment_created_at": "2025-06-25T19:21:28+00:00", + "comment_author": "PaulZhang12", + "comment_body": "@jansel It depends on how we construct the lookup table. If we take the autotune logs for example, then it is not fusion aware. However, if we look at whether the scheduler does a fusion with a certain config in a previous max-autotune run, we can use that config in the lookup table.\r\n\r\nThe lookup table just uses whichever config(s) we give it. There is no benchmarking of epilogues by default as that can lead to significant compile time overhead.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2175414212", + "pr_number": 156851, + "pr_file": "torch/_inductor/models/mm_kernel_prediction_model.py", + "created_at": "2025-06-30T15:55:10+00:00", + "commented_code": "\"\"\"\nNeural network model for predicting triton kernel performance.\n\nThis module provides functionality to load and use a pre-trained neural network\nfor predicting the performance of triton kernels.\n\"\"\"\n\nimport os\nimport time\nfrom collections.abc import Sequence\nfrom typing import Any\n\nimport numpy as np\nimport pandas as pd # type: ignore[import-untyped]\n\nimport torch\nimport torch.nn as nn\nfrom torch._inductor.kernel_lut import TritonGEMMConfig\n\n# Default model path - can be overridden by environment variable\nscript_dir = os.path.dirname(__file__)\nDEFAULT_MODEL_PATH = os.path.join(os.path.dirname(__file__), \"aoti_mm_model.pt2\")\nMODEL_PATH = os.environ.get(\"TRITON_KERNEL_SELECTION_MODEL_PATH\", DEFAULT_MODEL_PATH)\nimport logging\n\n\nlog = logging.getLogger(__name__)\n# turn on info logging\nlogging.basicConfig(level=logging.INFO)\n\n\nclass NeuralNetwork(nn.Module):\n \"\"\"\n Multilayer perceptron with a single output.\n\n It is designed for modeling runtime when there is a constant overhead of\n `kernel_overhead` and the non-overhead runtime tends to be easier to model\n on a log scale (e.g. doubling a dimension involved in a matrix\n multiplication results in runtime roughly doubling.)\n \"\"\"\n\n def __init__(\n self,\n n_inputs: int,\n hidden_layer_widths: Sequence[int],\n kernel_overhead: float = 0.00541,\n ) -> None:\n \"\"\"\n Args:\n n_inputs: Number of inputs\n hidden_layer_widths: Hidden layer widths\n kernel_overhead: Overhead of the kernel, assumed to be constant. The\n default of 0.00541 is the lowest runtime seen in Triton H100 data.\n \"\"\"\n super().__init__()\n self.n_inputs = n_inputs\n self.kernel_overhead = kernel_overhead\n self.log_kernel_overhead: float = torch.log(\n torch.tensor(kernel_overhead, device=\"cuda\")\n ).item()\n all_layer_widths = list(hidden_layer_widths) + [1]\n all_input_widths = [n_inputs] + list(hidden_layer_widths)\n layers: list[nn.Module] = []\n for n_in, n_out in zip(all_input_widths, all_layer_widths, strict=True):\n layers.append(nn.Linear(n_in, n_out))\n layers.append(nn.BatchNorm1d(n_out))\n layers.append(nn.ReLU())\n\n self.linear_relu_stack = nn.Sequential(*layers[:-2])\n\n def forward(self, x: torch.Tensor) -> torch.Tensor:\n \"\"\"\n Predict as log(exp(inputs) + self.kernel_overhead).\n\n Works well for predicting log(runtime) when runtime contains a constant\n overhead of `kernel_overhead`. (The log specification means that this\n wouldn't be trivially modeled with a bias term.)\n\n Probably could have fit the overhead rather than hard-coding it by\n having `self.kernel_overhead` be a tunable parameter or by having exp\n and log layers.\n \"\"\"\n log_base_pred = self.linear_relu_stack(x)\n log_overhead_tsr = torch.full_like(\n input=log_base_pred, fill_value=self.log_kernel_overhead, device=\"cuda\"\n )\n return torch.logsumexp(\n torch.stack([log_base_pred, log_overhead_tsr], dim=-1), dim=-1\n )\n\n\ndef get_nn_x(\n df: pd.DataFrame, mean: torch.Tensor | None = None, std: torch.Tensor | None = None\n) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor]:\n \"\"\"Standardize the data and convert it to a tensor.\"\"\"\n x_df = df[\n [\n \"dtype_size\",\n \"dim_m\",\n \"dim_n\",\n \"dim_k\",\n \"total_gb\",\n \"total_gflop\",\n \"flops_per_byte\",\n \"config_block_k\",\n \"config_block_m\",\n \"config_block_n\",\n \"config_num_stages\",\n \"config_num_warps\",\n ]\n ].copy()\n for col in x_df.columns:\n x_df[col] = np.log(x_df[col])\n\n x_tens = torch.from_numpy(x_df.astype(float).to_numpy()).to(device=\"cuda\")\n if mean is None:\n mean = torch.from_numpy(x_df.mean().to_numpy()).to(device=\"cuda\")\n if std is None:\n std = torch.from_numpy(x_df.std().to_numpy()).to(device=\"cuda\")\n x_tens -= mean\n x_tens /= std\n return x_tens.to(torch.float32), mean, std\n\n\ndef get_total_gb_feature(df: pd.DataFrame) -> pd.Series:\n \"\"\"\n Calculate the total gigabytes feature from the dataframe.\n\n Args:\n df: DataFrame containing the necessary columns for calculation\n\n Returns:\n Series containing the calculated total gigabytes\n \"\"\"\n # Calculate memory access in bytes\n m, n, k = df[\"dim_m\"], df[\"dim_n\"], df[\"dim_k\"]\n dtype_size = df[\"dtype_size\"] / 8 # Convert bits to bytes\n\n # A: m×k, B: k×n, C: m×n\n return ((m * k + k * n + m * n) * dtype_size) / 1e9 # Convert to GB\n\n\ndef get_total_gflop_feature(df: pd.DataFrame) -> pd.Series:\n \"\"\"\n Calculate the total gigaflops feature from the dataframe.\n\n Args:\n df: DataFrame containing the necessary columns for calculation\n\n Returns:\n Series containing the calculated total gigaflops\n \"\"\"\n # For matrix multiplication, flops = 2 * m * n * k\n m, n, k = df[\"dim_m\"], df[\"dim_n\"], df[\"dim_k\"]\n return (2 * m * n * k) / 1e9 # Convert to GFLOP\n\n\nclass ModelWrapper:\n \"\"\"\n Wrapper for the neural network model that handles encoding inputs and decoding outputs.\n\n This class provides methods to prepare inputs for the model and interpret its outputs,\n handling the necessary standardization and feature engineering.\n \"\"\"\n\n def __init__(self) -> None:\n \"\"\"Initialize the model wrapper with the pre-trained model and standardization parameters.\"\"\"\n start_time = time.time()\n self.model = NeuralNetwork(\n n_inputs=12, hidden_layer_widths=[2**8 for _ in range(6)]\n )\n self.model: NeuralNetwork = torch._inductor.aoti_load_package(MODEL_PATH)\n end_time = time.time()\n\n log.info(\"NN Kernel Prediction Model loaded.\")\n log.info(\"Took: %s seconds\", end_time - start_time)\n\n # Mean values for standardizing input features\n self.mean_for_standardization = torch.tensor(\n [\n 2.78275084,\n 8.23996746,\n 7.27791873,\n 7.92035942,\n -2.39558163,\n 3.40679233,\n 5.80237395,\n 3.95781827,\n 4.19478321,\n 4.19098234,\n 0.9045909,\n 1.28331208,\n ],\n device=\"cuda\",\n )\n\n # Standard deviation values for standardizing input features\n self.std_for_standardization = torch.tensor(\n [\n 0.08322756,\n 2.31893439,\n 1.65605574,\n 2.15447078,\n 2.19682881,\n 2.99600806,\n 1.24328795,\n 0.92352521,\n 0.93849802,\n 0.93872011,\n 0.57455891,\n 0.5837217,\n ],\n device=\"cuda\",\n )\n\n def vec(\n self, m: int, n: int, k: int, dsize: int, config: Any\n ) -> tuple[int, int, int, int, int, int, int, int, int]:\n \"\"\"\n Convert matrix multiplication parameters and config to a feature vector.\n\n Args:\n m: First dimension of matrix multiplication\n n: Second dimension of matrix multiplication\n k: Third dimension of matrix multiplication\n dsize: Data size in bits (e.g., 16 for float16, 32 for float32)\n config: Configuration object containing kernel parameters\n\n Returns:\n Tuple containing the extracted features\n \"\"\"\n kwargs = config.all_kwargs()\n\n return (\n int(m),\n int(n),\n int(k),\n int(dsize),\n int(kwargs[\"BLOCK_M\"]),\n int(kwargs[\"BLOCK_N\"]),\n int(kwargs[\"BLOCK_K\"]),\n int(kwargs[\"num_stages\"]),\n int(kwargs[\"num_warps\"]),\n )\n\n @staticmethod\n def vec_params(\n m: int, n: int, k: int, dsize: int, params: TritonGEMMConfig\n ) -> tuple[int, int, int, int, int, int, int, int, int]:\n \"\"\"\n Convert matrix multiplication parameters and config to a feature vector.\n\n Args:\n m: First dimension of matrix multiplication\n n: Second dimension of matrix multiplication\n k: Third dimension of matrix multiplication\n dsize: Data size in bits (e.g., 16 for float16, 32 for float32)\n config: Configuration object containing kernel parameters\n\n Returns:\n Tuple containing the extracted features\n \"\"\"\n\n return (\n int(m),\n int(n),\n int(k),\n int(dsize),\n int(params.block_m),\n int(params.block_n),\n int(params.block_k),\n int(params.num_stages),\n int(params.num_warps),\n )\n\n def encode(\n self, m: int, n: int, k: int, dtype: torch.dtype, configs: list[Any]\n ) -> torch.Tensor:\n \"\"\"\n Encode the matrix multiplication parameters and configs as input tensors for the model.\n\n Args:\n m: First dimension of matrix multiplication\n n: Second dimension of matrix multiplication\n k: Third dimension of matrix multiplication\n dtype: Data type of the matrices\n configs: List of configuration objects\n\n Returns:\n Tensor containing the encoded inputs ready for the model\n\n Raises:\n ValueError: If the dtype is not supported\n \"\"\"\n # Determine data size based on dtype\n if dtype == torch.bfloat16 or dtype == torch.float16:\n dsize = 16\n elif dtype == torch.float32:\n dsize = 32\n else:\n raise ValueError(f\"Unsupported dtype: {dtype}. Add support for this dtype.\")\n\n # Create feature dataframe\n df = pd.DataFrame(", + "repo_full_name": "pytorch/pytorch", + "discussion_comments": [ + { + "comment_id": "2175414212", + "repo_full_name": "pytorch/pytorch", + "pr_number": 156851, + "pr_file": "torch/_inductor/models/mm_kernel_prediction_model.py", + "discussion_id": "2175414212", + "commented_code": "@@ -0,0 +1,364 @@\n+\"\"\"\n+Neural network model for predicting triton kernel performance.\n+\n+This module provides functionality to load and use a pre-trained neural network\n+for predicting the performance of triton kernels.\n+\"\"\"\n+\n+import os\n+import time\n+from collections.abc import Sequence\n+from typing import Any\n+\n+import numpy as np\n+import pandas as pd # type: ignore[import-untyped]\n+\n+import torch\n+import torch.nn as nn\n+from torch._inductor.kernel_lut import TritonGEMMConfig\n+\n+# Default model path - can be overridden by environment variable\n+script_dir = os.path.dirname(__file__)\n+DEFAULT_MODEL_PATH = os.path.join(os.path.dirname(__file__), \"aoti_mm_model.pt2\")\n+MODEL_PATH = os.environ.get(\"TRITON_KERNEL_SELECTION_MODEL_PATH\", DEFAULT_MODEL_PATH)\n+import logging\n+\n+\n+log = logging.getLogger(__name__)\n+# turn on info logging\n+logging.basicConfig(level=logging.INFO)\n+\n+\n+class NeuralNetwork(nn.Module):\n+ \"\"\"\n+ Multilayer perceptron with a single output.\n+\n+ It is designed for modeling runtime when there is a constant overhead of\n+ `kernel_overhead` and the non-overhead runtime tends to be easier to model\n+ on a log scale (e.g. doubling a dimension involved in a matrix\n+ multiplication results in runtime roughly doubling.)\n+ \"\"\"\n+\n+ def __init__(\n+ self,\n+ n_inputs: int,\n+ hidden_layer_widths: Sequence[int],\n+ kernel_overhead: float = 0.00541,\n+ ) -> None:\n+ \"\"\"\n+ Args:\n+ n_inputs: Number of inputs\n+ hidden_layer_widths: Hidden layer widths\n+ kernel_overhead: Overhead of the kernel, assumed to be constant. The\n+ default of 0.00541 is the lowest runtime seen in Triton H100 data.\n+ \"\"\"\n+ super().__init__()\n+ self.n_inputs = n_inputs\n+ self.kernel_overhead = kernel_overhead\n+ self.log_kernel_overhead: float = torch.log(\n+ torch.tensor(kernel_overhead, device=\"cuda\")\n+ ).item()\n+ all_layer_widths = list(hidden_layer_widths) + [1]\n+ all_input_widths = [n_inputs] + list(hidden_layer_widths)\n+ layers: list[nn.Module] = []\n+ for n_in, n_out in zip(all_input_widths, all_layer_widths, strict=True):\n+ layers.append(nn.Linear(n_in, n_out))\n+ layers.append(nn.BatchNorm1d(n_out))\n+ layers.append(nn.ReLU())\n+\n+ self.linear_relu_stack = nn.Sequential(*layers[:-2])\n+\n+ def forward(self, x: torch.Tensor) -> torch.Tensor:\n+ \"\"\"\n+ Predict as log(exp(inputs) + self.kernel_overhead).\n+\n+ Works well for predicting log(runtime) when runtime contains a constant\n+ overhead of `kernel_overhead`. (The log specification means that this\n+ wouldn't be trivially modeled with a bias term.)\n+\n+ Probably could have fit the overhead rather than hard-coding it by\n+ having `self.kernel_overhead` be a tunable parameter or by having exp\n+ and log layers.\n+ \"\"\"\n+ log_base_pred = self.linear_relu_stack(x)\n+ log_overhead_tsr = torch.full_like(\n+ input=log_base_pred, fill_value=self.log_kernel_overhead, device=\"cuda\"\n+ )\n+ return torch.logsumexp(\n+ torch.stack([log_base_pred, log_overhead_tsr], dim=-1), dim=-1\n+ )\n+\n+\n+def get_nn_x(\n+ df: pd.DataFrame, mean: torch.Tensor | None = None, std: torch.Tensor | None = None\n+) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor]:\n+ \"\"\"Standardize the data and convert it to a tensor.\"\"\"\n+ x_df = df[\n+ [\n+ \"dtype_size\",\n+ \"dim_m\",\n+ \"dim_n\",\n+ \"dim_k\",\n+ \"total_gb\",\n+ \"total_gflop\",\n+ \"flops_per_byte\",\n+ \"config_block_k\",\n+ \"config_block_m\",\n+ \"config_block_n\",\n+ \"config_num_stages\",\n+ \"config_num_warps\",\n+ ]\n+ ].copy()\n+ for col in x_df.columns:\n+ x_df[col] = np.log(x_df[col])\n+\n+ x_tens = torch.from_numpy(x_df.astype(float).to_numpy()).to(device=\"cuda\")\n+ if mean is None:\n+ mean = torch.from_numpy(x_df.mean().to_numpy()).to(device=\"cuda\")\n+ if std is None:\n+ std = torch.from_numpy(x_df.std().to_numpy()).to(device=\"cuda\")\n+ x_tens -= mean\n+ x_tens /= std\n+ return x_tens.to(torch.float32), mean, std\n+\n+\n+def get_total_gb_feature(df: pd.DataFrame) -> pd.Series:\n+ \"\"\"\n+ Calculate the total gigabytes feature from the dataframe.\n+\n+ Args:\n+ df: DataFrame containing the necessary columns for calculation\n+\n+ Returns:\n+ Series containing the calculated total gigabytes\n+ \"\"\"\n+ # Calculate memory access in bytes\n+ m, n, k = df[\"dim_m\"], df[\"dim_n\"], df[\"dim_k\"]\n+ dtype_size = df[\"dtype_size\"] / 8 # Convert bits to bytes\n+\n+ # A: m×k, B: k×n, C: m×n\n+ return ((m * k + k * n + m * n) * dtype_size) / 1e9 # Convert to GB\n+\n+\n+def get_total_gflop_feature(df: pd.DataFrame) -> pd.Series:\n+ \"\"\"\n+ Calculate the total gigaflops feature from the dataframe.\n+\n+ Args:\n+ df: DataFrame containing the necessary columns for calculation\n+\n+ Returns:\n+ Series containing the calculated total gigaflops\n+ \"\"\"\n+ # For matrix multiplication, flops = 2 * m * n * k\n+ m, n, k = df[\"dim_m\"], df[\"dim_n\"], df[\"dim_k\"]\n+ return (2 * m * n * k) / 1e9 # Convert to GFLOP\n+\n+\n+class ModelWrapper:\n+ \"\"\"\n+ Wrapper for the neural network model that handles encoding inputs and decoding outputs.\n+\n+ This class provides methods to prepare inputs for the model and interpret its outputs,\n+ handling the necessary standardization and feature engineering.\n+ \"\"\"\n+\n+ def __init__(self) -> None:\n+ \"\"\"Initialize the model wrapper with the pre-trained model and standardization parameters.\"\"\"\n+ start_time = time.time()\n+ self.model = NeuralNetwork(\n+ n_inputs=12, hidden_layer_widths=[2**8 for _ in range(6)]\n+ )\n+ self.model: NeuralNetwork = torch._inductor.aoti_load_package(MODEL_PATH)\n+ end_time = time.time()\n+\n+ log.info(\"NN Kernel Prediction Model loaded.\")\n+ log.info(\"Took: %s seconds\", end_time - start_time)\n+\n+ # Mean values for standardizing input features\n+ self.mean_for_standardization = torch.tensor(\n+ [\n+ 2.78275084,\n+ 8.23996746,\n+ 7.27791873,\n+ 7.92035942,\n+ -2.39558163,\n+ 3.40679233,\n+ 5.80237395,\n+ 3.95781827,\n+ 4.19478321,\n+ 4.19098234,\n+ 0.9045909,\n+ 1.28331208,\n+ ],\n+ device=\"cuda\",\n+ )\n+\n+ # Standard deviation values for standardizing input features\n+ self.std_for_standardization = torch.tensor(\n+ [\n+ 0.08322756,\n+ 2.31893439,\n+ 1.65605574,\n+ 2.15447078,\n+ 2.19682881,\n+ 2.99600806,\n+ 1.24328795,\n+ 0.92352521,\n+ 0.93849802,\n+ 0.93872011,\n+ 0.57455891,\n+ 0.5837217,\n+ ],\n+ device=\"cuda\",\n+ )\n+\n+ def vec(\n+ self, m: int, n: int, k: int, dsize: int, config: Any\n+ ) -> tuple[int, int, int, int, int, int, int, int, int]:\n+ \"\"\"\n+ Convert matrix multiplication parameters and config to a feature vector.\n+\n+ Args:\n+ m: First dimension of matrix multiplication\n+ n: Second dimension of matrix multiplication\n+ k: Third dimension of matrix multiplication\n+ dsize: Data size in bits (e.g., 16 for float16, 32 for float32)\n+ config: Configuration object containing kernel parameters\n+\n+ Returns:\n+ Tuple containing the extracted features\n+ \"\"\"\n+ kwargs = config.all_kwargs()\n+\n+ return (\n+ int(m),\n+ int(n),\n+ int(k),\n+ int(dsize),\n+ int(kwargs[\"BLOCK_M\"]),\n+ int(kwargs[\"BLOCK_N\"]),\n+ int(kwargs[\"BLOCK_K\"]),\n+ int(kwargs[\"num_stages\"]),\n+ int(kwargs[\"num_warps\"]),\n+ )\n+\n+ @staticmethod\n+ def vec_params(\n+ m: int, n: int, k: int, dsize: int, params: TritonGEMMConfig\n+ ) -> tuple[int, int, int, int, int, int, int, int, int]:\n+ \"\"\"\n+ Convert matrix multiplication parameters and config to a feature vector.\n+\n+ Args:\n+ m: First dimension of matrix multiplication\n+ n: Second dimension of matrix multiplication\n+ k: Third dimension of matrix multiplication\n+ dsize: Data size in bits (e.g., 16 for float16, 32 for float32)\n+ config: Configuration object containing kernel parameters\n+\n+ Returns:\n+ Tuple containing the extracted features\n+ \"\"\"\n+\n+ return (\n+ int(m),\n+ int(n),\n+ int(k),\n+ int(dsize),\n+ int(params.block_m),\n+ int(params.block_n),\n+ int(params.block_k),\n+ int(params.num_stages),\n+ int(params.num_warps),\n+ )\n+\n+ def encode(\n+ self, m: int, n: int, k: int, dtype: torch.dtype, configs: list[Any]\n+ ) -> torch.Tensor:\n+ \"\"\"\n+ Encode the matrix multiplication parameters and configs as input tensors for the model.\n+\n+ Args:\n+ m: First dimension of matrix multiplication\n+ n: Second dimension of matrix multiplication\n+ k: Third dimension of matrix multiplication\n+ dtype: Data type of the matrices\n+ configs: List of configuration objects\n+\n+ Returns:\n+ Tensor containing the encoded inputs ready for the model\n+\n+ Raises:\n+ ValueError: If the dtype is not supported\n+ \"\"\"\n+ # Determine data size based on dtype\n+ if dtype == torch.bfloat16 or dtype == torch.float16:\n+ dsize = 16\n+ elif dtype == torch.float32:\n+ dsize = 32\n+ else:\n+ raise ValueError(f\"Unsupported dtype: {dtype}. Add support for this dtype.\")\n+\n+ # Create feature dataframe\n+ df = pd.DataFrame(", + "comment_created_at": "2025-06-30T15:55:10+00:00", + "comment_author": "esantorella", + "comment_body": "Not something that needs to be addressed right now, but I wonder if a lot of the overhead we're seeing is from using Pandas, which can be super slow and may introduce a new dependency. We can easily rewrite this to use solely PyTorch.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2064234144", + "pr_number": 152197, + "pr_file": "torch/_inductor/runtime/triton_heuristics.py", + "created_at": "2025-04-28T18:12:34+00:00", + "commented_code": "if autotune_cache:\n if best_config := autotune_cache.read_best(inductor_meta, configs):\n configs = [best_config]\n\n autotune_cache_info[\"best_config\"] = triton_config_to_hashable(\n best_config\n )\n autotune_cache_info[\"autotune_cache_state\"] = \"hit\"\n else:\n autotune_cache_info[\"autotune_cache_state\"] = \"miss\"\n autotune_cache_info[\"num_configs\"] = len(configs)\n if inductor_meta.get(\"coordinate_descent_tuning\"):\n autotune_cache_info[\"coordesc_tuning\"] = True\n if len(configs) == 1:", + "repo_full_name": "pytorch/pytorch", + "discussion_comments": [ + { + "comment_id": "2064234144", + "repo_full_name": "pytorch/pytorch", + "pr_number": 152197, + "pr_file": "torch/_inductor/runtime/triton_heuristics.py", + "discussion_id": "2064234144", + "commented_code": "@@ -1698,9 +1701,28 @@ def cached_autotune(\n if autotune_cache:\n if best_config := autotune_cache.read_best(inductor_meta, configs):\n configs = [best_config]\n-\n+ autotune_cache_info[\"best_config\"] = triton_config_to_hashable(\n+ best_config\n+ )\n+ autotune_cache_info[\"autotune_cache_state\"] = \"hit\"\n+ else:\n+ autotune_cache_info[\"autotune_cache_state\"] = \"miss\"\n+ autotune_cache_info[\"num_configs\"] = len(configs)\n+ if inductor_meta.get(\"coordinate_descent_tuning\"):\n+ autotune_cache_info[\"coordesc_tuning\"] = True\n+ if len(configs) == 1:", + "comment_created_at": "2025-04-28T18:12:34+00:00", + "comment_author": "bdhirsh", + "comment_body": "basic question: given that we are logging the results of autotuning, what does it actually mean for there to be more than one config here? (shouldn't autotuning always end in a single config we can log?) ", + "pr_file_module": null + }, + { + "comment_id": "2066787922", + "repo_full_name": "pytorch/pytorch", + "pr_number": 152197, + "pr_file": "torch/_inductor/runtime/triton_heuristics.py", + "discussion_id": "2064234144", + "commented_code": "@@ -1698,9 +1701,28 @@ def cached_autotune(\n if autotune_cache:\n if best_config := autotune_cache.read_best(inductor_meta, configs):\n configs = [best_config]\n-\n+ autotune_cache_info[\"best_config\"] = triton_config_to_hashable(\n+ best_config\n+ )\n+ autotune_cache_info[\"autotune_cache_state\"] = \"hit\"\n+ else:\n+ autotune_cache_info[\"autotune_cache_state\"] = \"miss\"\n+ autotune_cache_info[\"num_configs\"] = len(configs)\n+ if inductor_meta.get(\"coordinate_descent_tuning\"):\n+ autotune_cache_info[\"coordesc_tuning\"] = True\n+ if len(configs) == 1:", + "comment_created_at": "2025-04-29T15:11:20+00:00", + "comment_author": "jamesjwu", + "comment_body": "We're logging the compile time \"results\", in that we're logging all the possible configs we need to actually autotune when the function is actually called. But we haven't run autotuning yet, so there can be more than one config. \r\n\r\nWe run autotuning later after dynamo returns, in CachingAutotuner.benchmark_all_configs. There, it should be possible to log just the best config. ", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/pytorch-validate-performance-metrics.md b/_reviewers/pytorch-validate-performance-metrics.md index e54c5a1..fd95611 100644 --- a/_reviewers/pytorch-validate-performance-metrics.md +++ b/_reviewers/pytorch-validate-performance-metrics.md @@ -47,279 +47,3 @@ def count_flops_fx(node): return flop_counter.get_total_flops() return 0 ``` - - -[ - { - "discussion_id": "2049183436", - "pr_number": 149697, - "pr_file": "torch/_inductor/analysis/device_info.py", - "created_at": "2025-04-17T15:20:04+00:00", - "commented_code": "from dataclasses import dataclass\n\nimport torch\n\n\n@dataclass(frozen=True)\nclass DeviceInfo:\n \"\"\"\n Theoretical Numbers from data sheet. If two numbers are given, Tensor/Matrix Core vs not,\n then the higher number is reported. Sparsity is not considered.\n\n\n Bandwidth numbers are tricky, because there are platform differences that may not show up in the profiler trace.\n For example,\n \"\"\"\n\n tops: dict[torch.dtype, float]\n dram_bw_gbs: float\n dram_gb: float\n\n\n# TODO investigate profiler support for tf32 and allow device to report correct number when it's turned on.\n_device_mapping: dict[str, DeviceInfo] = {\n # Source: https://resources.nvidia.com/en-us-tensor-core/nvidia-tensor-core-gpu-datasheet\n \"NVIDIA H100\": DeviceInfo(\n tops={\n torch.float64: 9.7,\n torch.float32: 19.5,\n torch.bfloat16: 1979.0,\n torch.float16: 1979.0,\n torch.float8_e8m0fnu: 3958.0,\n torch.float8_e8m0fnu: 3958.0,\n torch.float8_e4m3fnuz: 3958.0,", - "repo_full_name": "pytorch/pytorch", - "discussion_comments": [ - { - "comment_id": "2049183436", - "repo_full_name": "pytorch/pytorch", - "pr_number": 149697, - "pr_file": "torch/_inductor/analysis/device_info.py", - "discussion_id": "2049183436", - "commented_code": "@@ -0,0 +1,136 @@\n+from dataclasses import dataclass\n+\n+import torch\n+\n+\n+@dataclass(frozen=True)\n+class DeviceInfo:\n+ \"\"\"\n+ Theoretical Numbers from data sheet. If two numbers are given, Tensor/Matrix Core vs not,\n+ then the higher number is reported. Sparsity is not considered.\n+\n+\n+ Bandwidth numbers are tricky, because there are platform differences that may not show up in the profiler trace.\n+ For example,\n+ \"\"\"\n+\n+ tops: dict[torch.dtype, float]\n+ dram_bw_gbs: float\n+ dram_gb: float\n+\n+\n+# TODO investigate profiler support for tf32 and allow device to report correct number when it's turned on.\n+_device_mapping: dict[str, DeviceInfo] = {\n+ # Source: https://resources.nvidia.com/en-us-tensor-core/nvidia-tensor-core-gpu-datasheet\n+ \"NVIDIA H100\": DeviceInfo(\n+ tops={\n+ torch.float64: 9.7,\n+ torch.float32: 19.5,\n+ torch.bfloat16: 1979.0,\n+ torch.float16: 1979.0,\n+ torch.float8_e8m0fnu: 3958.0,\n+ torch.float8_e8m0fnu: 3958.0,\n+ torch.float8_e4m3fnuz: 3958.0,", - "comment_created_at": "2025-04-17T15:20:04+00:00", - "comment_author": "eellison", - "comment_body": "I know the fbcode servers are clock rate limited. So, the numbers will be off for those.. I think this is actually somewhat important for getting accurate numbers. \r\n\r\ncc @bertmaher - who did similar analysis here - https://fb.workplace.com/groups/420659799592399/posts/761265522198490/\r\n\r\nHow would you adjust for clock rate ? Is something simple like current_clock_rate/default sufficient ? I dont have a good sense of this.", - "pr_file_module": null - }, - { - "comment_id": "2049374582", - "repo_full_name": "pytorch/pytorch", - "pr_number": 149697, - "pr_file": "torch/_inductor/analysis/device_info.py", - "discussion_id": "2049183436", - "commented_code": "@@ -0,0 +1,136 @@\n+from dataclasses import dataclass\n+\n+import torch\n+\n+\n+@dataclass(frozen=True)\n+class DeviceInfo:\n+ \"\"\"\n+ Theoretical Numbers from data sheet. If two numbers are given, Tensor/Matrix Core vs not,\n+ then the higher number is reported. Sparsity is not considered.\n+\n+\n+ Bandwidth numbers are tricky, because there are platform differences that may not show up in the profiler trace.\n+ For example,\n+ \"\"\"\n+\n+ tops: dict[torch.dtype, float]\n+ dram_bw_gbs: float\n+ dram_gb: float\n+\n+\n+# TODO investigate profiler support for tf32 and allow device to report correct number when it's turned on.\n+_device_mapping: dict[str, DeviceInfo] = {\n+ # Source: https://resources.nvidia.com/en-us-tensor-core/nvidia-tensor-core-gpu-datasheet\n+ \"NVIDIA H100\": DeviceInfo(\n+ tops={\n+ torch.float64: 9.7,\n+ torch.float32: 19.5,\n+ torch.bfloat16: 1979.0,\n+ torch.float16: 1979.0,\n+ torch.float8_e8m0fnu: 3958.0,\n+ torch.float8_e8m0fnu: 3958.0,\n+ torch.float8_e4m3fnuz: 3958.0,", - "comment_created_at": "2025-04-17T17:19:17+00:00", - "comment_author": "exclamaforte", - "comment_body": "interested in Bert's opinion too, I would think that `current_clock_rate/default sufficient` would be fine considering that most of the flops calculation are just clockrate * core count * flops per core.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2049248412", - "pr_number": 149697, - "pr_file": "torch/_inductor/scheduler.py", - "created_at": "2025-04-17T15:56:09+00:00", - "commented_code": "if isinstance(self, ExternKernelSchedulerNode):", - "repo_full_name": "pytorch/pytorch", - "discussion_comments": [ - { - "comment_id": "2049248412", - "repo_full_name": "pytorch/pytorch", - "pr_number": 149697, - "pr_file": "torch/_inductor/scheduler.py", - "discussion_id": "2049248412", - "commented_code": "@@ -819,15 +854,13 @@ def get_estimated_runtime(self) -> float:\n \n if isinstance(self, ExternKernelSchedulerNode):", - "comment_created_at": "2025-04-17T15:56:09+00:00", - "comment_author": "eellison", - "comment_body": "This is going to skip any triton template we codegen.\r\n\r\nWhat about instead:\r\n\r\n- check the nodes origins and see if any of them are in flop counter\r\n- use the fx node to get flops\r\n\r\nIn some cases, we populate origins for things like layout conversions for inputs to matmul, or also, unfusing bias. to handle that lets just skip nodes which are not either `is_template()` or ExternKernels.\r\n", - "pr_file_module": null - }, - { - "comment_id": "2049274487", - "repo_full_name": "pytorch/pytorch", - "pr_number": 149697, - "pr_file": "torch/_inductor/scheduler.py", - "discussion_id": "2049248412", - "commented_code": "@@ -819,15 +854,13 @@ def get_estimated_runtime(self) -> float:\n \n if isinstance(self, ExternKernelSchedulerNode):", - "comment_created_at": "2025-04-17T16:14:57+00:00", - "comment_author": "eellison", - "comment_body": "Looks like that will also fix the failures with this pr. \r\n\r\nYou can reuse/factor out this:\r\n\r\nhttps://github.com/pytorch/pytorch/blob/4843ce7611b5b9e85b16f1c05cd3b4959c166fce/torch/_inductor/graph.py#L632-L655", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2092021006", - "pr_number": 149697, - "pr_file": "torch/_inductor/graph.py", - "created_at": "2025-05-15T22:03:53+00:00", - "commented_code": "# only grouped convolutions benchmarked as slower in conv samples for inference only\n if is_inference:\n from torch.utils.flop_counter import FlopCounterMode\n\n flop_counts: dict[str, float] = defaultdict(float)\n for node in conv_nodes:\n success, args, kwargs = torch._inductor.fx_utils.get_fake_args_kwargs(\n node\n )\n\n if success:\n with FlopCounterMode(display=False) as flop_counter_mode:\n with V.fake_mode:\n node.target(*args, **kwargs)\n\n counted_flops = flop_counter_mode.get_total_flops()\n if is_grouped(node):\n node_type = \"grouped\"\n elif is_small_channel(node):\n node_type = \"small\"\n elif is_in_out_channel(node):\n node_type = \"in_out\"\n else:\n node_type = \"default\"\n counted_flops = count_flops_fx(node)", - "repo_full_name": "pytorch/pytorch", - "discussion_comments": [ - { - "comment_id": "2092021006", - "repo_full_name": "pytorch/pytorch", - "pr_number": 149697, - "pr_file": "torch/_inductor/graph.py", - "discussion_id": "2092021006", - "commented_code": "@@ -652,32 +653,24 @@ def is_small_channel(n: torch.fx.Node) -> bool:\n \n # only grouped convolutions benchmarked as slower in conv samples for inference only\n if is_inference:\n- from torch.utils.flop_counter import FlopCounterMode\n-\n flop_counts: dict[str, float] = defaultdict(float)\n for node in conv_nodes:\n- success, args, kwargs = torch._inductor.fx_utils.get_fake_args_kwargs(\n- node\n- )\n-\n- if success:\n- with FlopCounterMode(display=False) as flop_counter_mode:\n- with V.fake_mode:\n- node.target(*args, **kwargs)\n-\n- counted_flops = flop_counter_mode.get_total_flops()\n- if is_grouped(node):\n- node_type = \"grouped\"\n- elif is_small_channel(node):\n- node_type = \"small\"\n- elif is_in_out_channel(node):\n- node_type = \"in_out\"\n- else:\n- node_type = \"default\"\n+ counted_flops = count_flops_fx(node)", - "comment_created_at": "2025-05-15T22:03:53+00:00", - "comment_author": "eellison", - "comment_body": "Can we make `count_flops_fx` a little bit smarter where it does not try to run this unless the node has a registration in flop_counter ? currently we always instantiate the tensors, and faketensormode, and run ?", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2105535986", - "pr_number": 149697, - "pr_file": "test/inductor/test_analysis.py", - "created_at": "2025-05-24T00:01:29+00:00", - "commented_code": "# Owner(s): [\"module: inductor\"]\n\nimport json\nimport re\nimport tempfile\nimport unittest\nimport uuid\nfrom io import StringIO\nfrom unittest.mock import patch\n\nimport torch\nimport torch.nn.functional as F\nfrom torch._inductor.analysis.profile_analysis import (\n _augment_trace_helper,\n _create_extern_mapping,\n JsonProfile,\n main,\n)\nfrom torch._inductor.utils import (\n fresh_inductor_cache,\n run_and_get_code,\n tabulate_2d,\n zip_dicts,\n)\nfrom torch.testing._internal.common_cuda import SM70OrLater\nfrom torch.testing._internal.common_device_type import (\n dtypes,\n instantiate_device_type_tests,\n skipIf,\n)\nfrom torch.testing._internal.common_utils import parametrize, run_tests, TestCase\nfrom torch.testing._internal.inductor_utils import IS_BIG_GPU\n\n\nexample_profile = \"\"\"\n{\n \"schemaVersion\": 1,\n \"deviceProperties\": [\n {\n \"id\": 0, \"name\": \"NVIDIA H100\", \"totalGlobalMem\": 101997215744,\n \"computeMajor\": 9, \"computeMinor\": 0,\n \"maxThreadsPerBlock\": 1024, \"maxThreadsPerMultiprocessor\": 2048,\n \"regsPerBlock\": 65536, \"warpSize\": 32,\n \"sharedMemPerBlock\": 49152, \"numSms\": 132\n , \"regsPerMultiprocessor\": 65536, \"sharedMemPerBlockOptin\": 232448, \"sharedMemPerMultiprocessor\": 233472\n }\n ],\n \"cupti_version\": 24,\n \"cuda_runtime_version\": 12060,\n \"with_flops\": 1,\n \"record_shapes\": 1,\n \"cuda_driver_version\": 12040,\n \"profile_memory\": 1,\n \"trace_id\": \"301995E163ED42048FBD783860E6E7DC\",\n \"displayTimeUnit\": \"ms\",\n \"baseTimeNanoseconds\": 1743521598000000000,\n \"traceEvents\": [\n {\n \"ph\": \"X\", \"cat\": \"cpu_op\", \"name\": \"aten::convolution\", \"pid\": 1147039, \"tid\": 1147039,\n \"ts\": 198093488368.463, \"dur\": 425.453,\n \"args\": {\n \"External id\": 1340,\"Sequence number\": 0, \"Fwd thread id\": 0, \"Record function id\": 0, \"Concrete Inputs\": \\\n[\"\", \"\", \"\", \"[2, 2]\", \"[3, 3]\", \"[1, 1]\", \"False\", \"[0, 0]\", \"1\"], \"Input type\": [\"float\", \"float\", \"\", \\\n\"ScalarList\", \"ScalarList\", \"ScalarList\", \"Scalar\", \"ScalarList\", \"Scalar\"], \"Input Strides\": [[150528, 1, 672, 3],\\\n[147, 1, 21, 3], [], [], [], [], [], [], []], \"Input Dims\": [[1, 3, 224, 224], [64, 3, 7, 7], [], [], [], [], [], \\\n[], []], \"Ev Idx\": 1339\n }\n },\n {\n \"ph\": \"X\", \"cat\": \"cpu_op\", \"name\": \"aten::_convolution\", \"pid\": 1147039, \"tid\": 1147039,\n \"ts\": 198093488444.498, \"dur\": 341.867,\n \"args\": {\n \"External id\": 1341,\"Record function id\": 0, \"Concrete Inputs\": [\"\", \"\", \"\", \"[2, 2]\", \"[3, 3]\", \"[1, 1]\",\\\n \"False\", \"[0, 0]\", \"1\", \"False\", \"False\", \"True\", \"True\"], \"Input type\": [\"float\", \"float\", \"\", \"ScalarList\",\\\n \"ScalarList\", \"ScalarList\", \"Scalar\", \"ScalarList\", \"Scalar\", \"Scalar\", \"Scalar\", \"Scalar\", \"Scalar\"], \"Input Strides\": \\\n[[150528, 1, 672, 3], [147, 1, 21, 3], [], [], [], [], [], [], [], [], [], [], []], \"Input Dims\": [[1, 3, 224, 224], \\\n[64, 3, 7, 7], [], [], [], [], [], [], [], [], [], [], []], \"Ev Idx\": 1340\n }\n },\n {\n \"ph\": \"X\", \"cat\": \"cpu_op\", \"name\": \"aten::addmm\", \"pid\": 1147039, \"tid\": 1147039,\n \"ts\": 198093513655.849, \"dur\": 251.130,\n \"args\": {\n \"External id\": 1619,\"Sequence number\": 0, \"Fwd thread id\": 0, \"Record function id\": 0, \"Concrete Inputs\": \\\n[\"\", \"\", \"\", \"1\", \"1\", \"\"], \"Input type\": [\"float\", \"float\", \"float\", \"Scalar\", \"Scalar\", \"float\"], \"Input Strides\":\\\n [[1], [0, 1], [1, 2048], [], [], [1000, 1]], \"Input Dims\": [[1000], [1, 2048], [2048, 1000], [], [], [1, 1000]], \\\n\"Ev Idx\": 1618\n }\n },\n {\n \"ph\": \"X\", \"cat\": \"kernel\", \"name\": \"void cutlass_addmm\", \"pid\": 1147039, \"tid\": 1147039,\n \"ts\": 198093513655.849, \"dur\": 251.130,\n \"args\": {\n \"External id\": 1619,\"Sequence number\": 0, \"Fwd thread id\": 0, \"Record function id\": 0, \"Ev Idx\": 1618\n }\n },\n {\n \"ph\": \"X\", \"cat\": \"kernel\", \"name\": \"void convolution_kernel\", \"pid\": 1147039, \"tid\": 1147039,\n \"ts\": 198093513655.849, \"dur\": 200.130,\n \"args\": {\n \"External id\": 1342, \"Sequence number\": 0, \"Fwd thread id\": 0, \"Record function id\": 0, \"Ev Idx\": 1618\n }\n },\n {\n \"ph\": \"X\", \"cat\": \"cpu_op\", \"name\": \"aten::convolution\", \"pid\": 1147039, \"tid\": 1147039,\n \"ts\": 198093488444.498, \"dur\": 341.867,\n \"args\": {\n \"External id\": 1342,\"Record function id\": 0, \"Concrete Inputs\": [\"\", \"\", \"\", \"[2, 2]\", \"[3, 3]\", \"[1, 1]\", \\\n\"False\", \"[0, 0]\", \"1\", \"False\", \"False\", \"True\", \"True\"], \"Input type\": [\"float\", \"float\", \"\", \"ScalarList\", \\\n\"ScalarList\", \"ScalarList\", \"Scalar\", \"ScalarList\", \"Scalar\", \"Scalar\", \"Scalar\", \"Scalar\", \"Scalar\"], \"Input \\\nStrides\": [[150528, 1, 672, 3], [147, 1, 21, 3], [], [], [], [], [], [], [], [], [], [], []], \"Input Dims\": \\\n[[1, 3, 224, 224], [64, 3, 7, 7], [], [], [], [], [], [], [], [], [], [], []], \"Ev Idx\": 1340\n }\n }\n],\n \"traceName\": \"/tmp/compiled_module_profile.json\"\n}\n\"\"\"\n\n\ndef verify_flops(self, expected_flops, out_profile):\n j = 0\n for i in range(len(out_profile[\"traceEvents\"])):\n if \"kernel_flop\" in out_profile[\"traceEvents\"][i][\"args\"]:\n self.assertEqual(\n out_profile[\"traceEvents\"][i][\"args\"][\"kernel_flop\"],\n expected_flops[j],\n )\n j += 1\n\n\ndef random_tensor(size, dtype, **kwargs):\n if dtype in [torch.half, torch.bfloat16, torch.float, torch.double]:\n return torch.randn(size, dtype=dtype, **kwargs)\n elif dtype in [torch.uint8, torch.int8, torch.short, torch.int, torch.long]:\n return torch.randint(0, 100, size, dtype=dtype, **kwargs)\n else:\n raise ValueError(\"Unsupported data type\")\n\n\ndef cT(device, dtype):\n def T(*shape, requires_grad=False):\n return random_tensor(\n shape, requires_grad=requires_grad, device=device, dtype=dtype\n )\n\n return T\n\n\ndef FlopCounterMode(*args, **kwargs):\n return torch.utils.flop_counter.FlopCounterMode(*args, **kwargs, display=False)\n\n\nTMP_DIR = tempfile.mkdtemp()\n\n\ndef trace_files():\n TRACE1 = f\"{TMP_DIR}/trace1-{uuid.uuid4()}.json\"\n TRACE2 = f\"{TMP_DIR}/trace2-{uuid.uuid4()}.json\"\n return TRACE1, TRACE2\n\n\ndef omni_model(device, dtype, compile=True, addmm=True, bmm=True):\n T = cT(device, dtype)\n\n def model():\n input_conv = T(1, 3, 56, 56)\n conv_weight = T(12, 3, 5, 5)\n\n # Increased matrix sizes\n B = 8\n M = 256\n N = 512\n K = 768\n mat1 = T(M, N)\n mat2 = T(N, K)\n\n batch_mat1 = T(B, M, N)\n batch_mat2 = T(B, N, K)\n\n conv_output = F.conv2d(input_conv, conv_weight)\n\n conv_output = conv_output * 10\n mm_output = torch.mm(mat1, mat2)\n ret = [\n conv_output.flatten(),\n mm_output.flatten(),\n ]\n\n if addmm:\n addmm_output = torch.addmm(\n torch.zeros(mm_output.shape, device=mat1.device, dtype=mat1.dtype),\n mat1,\n mat2,\n )\n ret.append(addmm_output.flatten())\n\n if bmm:\n bmm_output = torch.bmm(batch_mat1, batch_mat2)\n ret.append(bmm_output.flatten())\n\n if bmm and addmm:\n baddbmm_output = torch.baddbmm(\n torch.zeros(\n 1,\n *mm_output.shape,\n device=batch_mat1.device,\n dtype=batch_mat1.dtype,\n ),\n batch_mat1,\n batch_mat2,\n )\n ret.append(baddbmm_output.flatten())\n\n return torch.cat(ret)\n\n if compile:\n return torch.compile(\n model, options={\"benchmark_kernel\": True, \"profile_bandwidth\": True}\n )\n return model\n\n\nprefix = [\"profile.py\"]\n\n\nclass TestUtils(TestCase):\n def test_tabulate2d(self):\n headers = [\"Kernel\", \"Self H100 TIME (ms)\", \"Count\", \"Percent\"]\n rows = [\n [\"aten::mm\", 0.500, 7, 0.0],\n [\"aten::bmm\", 0.400, 6, 0.0],\n [\"aten::baddbmm\", 0.300, 5, 0.0],\n [\"aten::convolution\", 0.200, 4, 0.0],\n [\"aten::cudnn_convolution\", 0.100, 3, 0.0],\n ]\n table = [\n \" Kernel | Self H100 TIME (ms) | Count | Percent \",\n \"-----------------------------------------------------------------\",\n \" aten::mm | 0.5 | 7 | 0.0 \",\n \" aten::bmm | 0.4 | 6 | 0.0 \",\n \" aten::baddbmm | 0.3 | 5 | 0.0 \",\n \" aten::convolution | 0.2 | 4 | 0.0 \",\n \" aten::cudnn_convolution | 0.1 | 3 | 0.0 \",\n ]\n res = tabulate_2d(rows, headers)\n for r, t in zip(res.split(\"\\n\"), table):\n self.assertEqual(r, t)\n\n def test_zip_dicts(self):\n d1 = {\"a\": 1, \"b\": 2}\n d2 = {\"a\": 3, \"c\": 4}\n res = zip_dicts(d1, d2, d1_default=\"foo\", d2_default=\"bar\")\n self.assertEqual(set(res), {(\"a\", 1, 3), (\"b\", 2, \"bar\"), (\"c\", \"foo\", 4)})\n res = zip_dicts(d1, d2)\n self.assertEqual(set(res), {(\"a\", 1, 3), (\"b\", 2, None), (\"c\", None, 4)})\n\n\nclass TestAnalysis(TestCase):\n @skipIf(not SM70OrLater, \"Requires sm70\")\n def test_noop(self):\n with (\n patch(\"sys.stdout\", new_callable=StringIO) as mock_stdout,\n patch(\"sys.argv\", [*prefix]),\n ):\n main()\n self.assertEqual(mock_stdout.getvalue(), \"\")\n\n @skipIf(not SM70OrLater, \"Requires sm70\")\n @dtypes(torch.float, torch.double, torch.float16)\n def test_diff(self, device, dtype):\n \"\"\"\n diff, testing out the nruns feature too.\n \"\"\"\n if device == \"cpu\":\n # TODO cpu support\n return\n om = omni_model(device, dtype)\n REPEAT = 5\n trace1, trace2 = trace_files()\n print(\"first trace\")\n torch._dynamo.reset() # reset the cache\n with fresh_inductor_cache():\n with torch.profiler.profile(record_shapes=True) as p:\n om()\n p.export_chrome_trace(trace1)\n\n print(\"second trace\")\n torch._dynamo.reset() # reset the cache\n with fresh_inductor_cache():\n with torch.profiler.profile(record_shapes=True) as p:\n for _ in range(REPEAT):\n om()\n p.export_chrome_trace(trace2)\n\n print(\"diffing...\")\n with patch(", - "repo_full_name": "pytorch/pytorch", - "discussion_comments": [ - { - "comment_id": "2105535986", - "repo_full_name": "pytorch/pytorch", - "pr_number": 149697, - "pr_file": "test/inductor/test_analysis.py", - "discussion_id": "2105535986", - "commented_code": "@@ -0,0 +1,601 @@\n+# Owner(s): [\"module: inductor\"]\n+\n+import json\n+import re\n+import tempfile\n+import unittest\n+import uuid\n+from io import StringIO\n+from unittest.mock import patch\n+\n+import torch\n+import torch.nn.functional as F\n+from torch._inductor.analysis.profile_analysis import (\n+ _augment_trace_helper,\n+ _create_extern_mapping,\n+ JsonProfile,\n+ main,\n+)\n+from torch._inductor.utils import (\n+ fresh_inductor_cache,\n+ run_and_get_code,\n+ tabulate_2d,\n+ zip_dicts,\n+)\n+from torch.testing._internal.common_cuda import SM70OrLater\n+from torch.testing._internal.common_device_type import (\n+ dtypes,\n+ instantiate_device_type_tests,\n+ skipIf,\n+)\n+from torch.testing._internal.common_utils import parametrize, run_tests, TestCase\n+from torch.testing._internal.inductor_utils import IS_BIG_GPU\n+\n+\n+example_profile = \"\"\"\n+{\n+ \"schemaVersion\": 1,\n+ \"deviceProperties\": [\n+ {\n+ \"id\": 0, \"name\": \"NVIDIA H100\", \"totalGlobalMem\": 101997215744,\n+ \"computeMajor\": 9, \"computeMinor\": 0,\n+ \"maxThreadsPerBlock\": 1024, \"maxThreadsPerMultiprocessor\": 2048,\n+ \"regsPerBlock\": 65536, \"warpSize\": 32,\n+ \"sharedMemPerBlock\": 49152, \"numSms\": 132\n+ , \"regsPerMultiprocessor\": 65536, \"sharedMemPerBlockOptin\": 232448, \"sharedMemPerMultiprocessor\": 233472\n+ }\n+ ],\n+ \"cupti_version\": 24,\n+ \"cuda_runtime_version\": 12060,\n+ \"with_flops\": 1,\n+ \"record_shapes\": 1,\n+ \"cuda_driver_version\": 12040,\n+ \"profile_memory\": 1,\n+ \"trace_id\": \"301995E163ED42048FBD783860E6E7DC\",\n+ \"displayTimeUnit\": \"ms\",\n+ \"baseTimeNanoseconds\": 1743521598000000000,\n+ \"traceEvents\": [\n+ {\n+ \"ph\": \"X\", \"cat\": \"cpu_op\", \"name\": \"aten::convolution\", \"pid\": 1147039, \"tid\": 1147039,\n+ \"ts\": 198093488368.463, \"dur\": 425.453,\n+ \"args\": {\n+ \"External id\": 1340,\"Sequence number\": 0, \"Fwd thread id\": 0, \"Record function id\": 0, \"Concrete Inputs\": \\\n+[\"\", \"\", \"\", \"[2, 2]\", \"[3, 3]\", \"[1, 1]\", \"False\", \"[0, 0]\", \"1\"], \"Input type\": [\"float\", \"float\", \"\", \\\n+\"ScalarList\", \"ScalarList\", \"ScalarList\", \"Scalar\", \"ScalarList\", \"Scalar\"], \"Input Strides\": [[150528, 1, 672, 3],\\\n+[147, 1, 21, 3], [], [], [], [], [], [], []], \"Input Dims\": [[1, 3, 224, 224], [64, 3, 7, 7], [], [], [], [], [], \\\n+[], []], \"Ev Idx\": 1339\n+ }\n+ },\n+ {\n+ \"ph\": \"X\", \"cat\": \"cpu_op\", \"name\": \"aten::_convolution\", \"pid\": 1147039, \"tid\": 1147039,\n+ \"ts\": 198093488444.498, \"dur\": 341.867,\n+ \"args\": {\n+ \"External id\": 1341,\"Record function id\": 0, \"Concrete Inputs\": [\"\", \"\", \"\", \"[2, 2]\", \"[3, 3]\", \"[1, 1]\",\\\n+ \"False\", \"[0, 0]\", \"1\", \"False\", \"False\", \"True\", \"True\"], \"Input type\": [\"float\", \"float\", \"\", \"ScalarList\",\\\n+ \"ScalarList\", \"ScalarList\", \"Scalar\", \"ScalarList\", \"Scalar\", \"Scalar\", \"Scalar\", \"Scalar\", \"Scalar\"], \"Input Strides\": \\\n+[[150528, 1, 672, 3], [147, 1, 21, 3], [], [], [], [], [], [], [], [], [], [], []], \"Input Dims\": [[1, 3, 224, 224], \\\n+[64, 3, 7, 7], [], [], [], [], [], [], [], [], [], [], []], \"Ev Idx\": 1340\n+ }\n+ },\n+ {\n+ \"ph\": \"X\", \"cat\": \"cpu_op\", \"name\": \"aten::addmm\", \"pid\": 1147039, \"tid\": 1147039,\n+ \"ts\": 198093513655.849, \"dur\": 251.130,\n+ \"args\": {\n+ \"External id\": 1619,\"Sequence number\": 0, \"Fwd thread id\": 0, \"Record function id\": 0, \"Concrete Inputs\": \\\n+[\"\", \"\", \"\", \"1\", \"1\", \"\"], \"Input type\": [\"float\", \"float\", \"float\", \"Scalar\", \"Scalar\", \"float\"], \"Input Strides\":\\\n+ [[1], [0, 1], [1, 2048], [], [], [1000, 1]], \"Input Dims\": [[1000], [1, 2048], [2048, 1000], [], [], [1, 1000]], \\\n+\"Ev Idx\": 1618\n+ }\n+ },\n+ {\n+ \"ph\": \"X\", \"cat\": \"kernel\", \"name\": \"void cutlass_addmm\", \"pid\": 1147039, \"tid\": 1147039,\n+ \"ts\": 198093513655.849, \"dur\": 251.130,\n+ \"args\": {\n+ \"External id\": 1619,\"Sequence number\": 0, \"Fwd thread id\": 0, \"Record function id\": 0, \"Ev Idx\": 1618\n+ }\n+ },\n+ {\n+ \"ph\": \"X\", \"cat\": \"kernel\", \"name\": \"void convolution_kernel\", \"pid\": 1147039, \"tid\": 1147039,\n+ \"ts\": 198093513655.849, \"dur\": 200.130,\n+ \"args\": {\n+ \"External id\": 1342, \"Sequence number\": 0, \"Fwd thread id\": 0, \"Record function id\": 0, \"Ev Idx\": 1618\n+ }\n+ },\n+ {\n+ \"ph\": \"X\", \"cat\": \"cpu_op\", \"name\": \"aten::convolution\", \"pid\": 1147039, \"tid\": 1147039,\n+ \"ts\": 198093488444.498, \"dur\": 341.867,\n+ \"args\": {\n+ \"External id\": 1342,\"Record function id\": 0, \"Concrete Inputs\": [\"\", \"\", \"\", \"[2, 2]\", \"[3, 3]\", \"[1, 1]\", \\\n+\"False\", \"[0, 0]\", \"1\", \"False\", \"False\", \"True\", \"True\"], \"Input type\": [\"float\", \"float\", \"\", \"ScalarList\", \\\n+\"ScalarList\", \"ScalarList\", \"Scalar\", \"ScalarList\", \"Scalar\", \"Scalar\", \"Scalar\", \"Scalar\", \"Scalar\"], \"Input \\\n+Strides\": [[150528, 1, 672, 3], [147, 1, 21, 3], [], [], [], [], [], [], [], [], [], [], []], \"Input Dims\": \\\n+[[1, 3, 224, 224], [64, 3, 7, 7], [], [], [], [], [], [], [], [], [], [], []], \"Ev Idx\": 1340\n+ }\n+ }\n+],\n+ \"traceName\": \"/tmp/compiled_module_profile.json\"\n+}\n+\"\"\"\n+\n+\n+def verify_flops(self, expected_flops, out_profile):\n+ j = 0\n+ for i in range(len(out_profile[\"traceEvents\"])):\n+ if \"kernel_flop\" in out_profile[\"traceEvents\"][i][\"args\"]:\n+ self.assertEqual(\n+ out_profile[\"traceEvents\"][i][\"args\"][\"kernel_flop\"],\n+ expected_flops[j],\n+ )\n+ j += 1\n+\n+\n+def random_tensor(size, dtype, **kwargs):\n+ if dtype in [torch.half, torch.bfloat16, torch.float, torch.double]:\n+ return torch.randn(size, dtype=dtype, **kwargs)\n+ elif dtype in [torch.uint8, torch.int8, torch.short, torch.int, torch.long]:\n+ return torch.randint(0, 100, size, dtype=dtype, **kwargs)\n+ else:\n+ raise ValueError(\"Unsupported data type\")\n+\n+\n+def cT(device, dtype):\n+ def T(*shape, requires_grad=False):\n+ return random_tensor(\n+ shape, requires_grad=requires_grad, device=device, dtype=dtype\n+ )\n+\n+ return T\n+\n+\n+def FlopCounterMode(*args, **kwargs):\n+ return torch.utils.flop_counter.FlopCounterMode(*args, **kwargs, display=False)\n+\n+\n+TMP_DIR = tempfile.mkdtemp()\n+\n+\n+def trace_files():\n+ TRACE1 = f\"{TMP_DIR}/trace1-{uuid.uuid4()}.json\"\n+ TRACE2 = f\"{TMP_DIR}/trace2-{uuid.uuid4()}.json\"\n+ return TRACE1, TRACE2\n+\n+\n+def omni_model(device, dtype, compile=True, addmm=True, bmm=True):\n+ T = cT(device, dtype)\n+\n+ def model():\n+ input_conv = T(1, 3, 56, 56)\n+ conv_weight = T(12, 3, 5, 5)\n+\n+ # Increased matrix sizes\n+ B = 8\n+ M = 256\n+ N = 512\n+ K = 768\n+ mat1 = T(M, N)\n+ mat2 = T(N, K)\n+\n+ batch_mat1 = T(B, M, N)\n+ batch_mat2 = T(B, N, K)\n+\n+ conv_output = F.conv2d(input_conv, conv_weight)\n+\n+ conv_output = conv_output * 10\n+ mm_output = torch.mm(mat1, mat2)\n+ ret = [\n+ conv_output.flatten(),\n+ mm_output.flatten(),\n+ ]\n+\n+ if addmm:\n+ addmm_output = torch.addmm(\n+ torch.zeros(mm_output.shape, device=mat1.device, dtype=mat1.dtype),\n+ mat1,\n+ mat2,\n+ )\n+ ret.append(addmm_output.flatten())\n+\n+ if bmm:\n+ bmm_output = torch.bmm(batch_mat1, batch_mat2)\n+ ret.append(bmm_output.flatten())\n+\n+ if bmm and addmm:\n+ baddbmm_output = torch.baddbmm(\n+ torch.zeros(\n+ 1,\n+ *mm_output.shape,\n+ device=batch_mat1.device,\n+ dtype=batch_mat1.dtype,\n+ ),\n+ batch_mat1,\n+ batch_mat2,\n+ )\n+ ret.append(baddbmm_output.flatten())\n+\n+ return torch.cat(ret)\n+\n+ if compile:\n+ return torch.compile(\n+ model, options={\"benchmark_kernel\": True, \"profile_bandwidth\": True}\n+ )\n+ return model\n+\n+\n+prefix = [\"profile.py\"]\n+\n+\n+class TestUtils(TestCase):\n+ def test_tabulate2d(self):\n+ headers = [\"Kernel\", \"Self H100 TIME (ms)\", \"Count\", \"Percent\"]\n+ rows = [\n+ [\"aten::mm\", 0.500, 7, 0.0],\n+ [\"aten::bmm\", 0.400, 6, 0.0],\n+ [\"aten::baddbmm\", 0.300, 5, 0.0],\n+ [\"aten::convolution\", 0.200, 4, 0.0],\n+ [\"aten::cudnn_convolution\", 0.100, 3, 0.0],\n+ ]\n+ table = [\n+ \" Kernel | Self H100 TIME (ms) | Count | Percent \",\n+ \"-----------------------------------------------------------------\",\n+ \" aten::mm | 0.5 | 7 | 0.0 \",\n+ \" aten::bmm | 0.4 | 6 | 0.0 \",\n+ \" aten::baddbmm | 0.3 | 5 | 0.0 \",\n+ \" aten::convolution | 0.2 | 4 | 0.0 \",\n+ \" aten::cudnn_convolution | 0.1 | 3 | 0.0 \",\n+ ]\n+ res = tabulate_2d(rows, headers)\n+ for r, t in zip(res.split(\"\\n\"), table):\n+ self.assertEqual(r, t)\n+\n+ def test_zip_dicts(self):\n+ d1 = {\"a\": 1, \"b\": 2}\n+ d2 = {\"a\": 3, \"c\": 4}\n+ res = zip_dicts(d1, d2, d1_default=\"foo\", d2_default=\"bar\")\n+ self.assertEqual(set(res), {(\"a\", 1, 3), (\"b\", 2, \"bar\"), (\"c\", \"foo\", 4)})\n+ res = zip_dicts(d1, d2)\n+ self.assertEqual(set(res), {(\"a\", 1, 3), (\"b\", 2, None), (\"c\", None, 4)})\n+\n+\n+class TestAnalysis(TestCase):\n+ @skipIf(not SM70OrLater, \"Requires sm70\")\n+ def test_noop(self):\n+ with (\n+ patch(\"sys.stdout\", new_callable=StringIO) as mock_stdout,\n+ patch(\"sys.argv\", [*prefix]),\n+ ):\n+ main()\n+ self.assertEqual(mock_stdout.getvalue(), \"\")\n+\n+ @skipIf(not SM70OrLater, \"Requires sm70\")\n+ @dtypes(torch.float, torch.double, torch.float16)\n+ def test_diff(self, device, dtype):\n+ \"\"\"\n+ diff, testing out the nruns feature too.\n+ \"\"\"\n+ if device == \"cpu\":\n+ # TODO cpu support\n+ return\n+ om = omni_model(device, dtype)\n+ REPEAT = 5\n+ trace1, trace2 = trace_files()\n+ print(\"first trace\")\n+ torch._dynamo.reset() # reset the cache\n+ with fresh_inductor_cache():\n+ with torch.profiler.profile(record_shapes=True) as p:\n+ om()\n+ p.export_chrome_trace(trace1)\n+\n+ print(\"second trace\")\n+ torch._dynamo.reset() # reset the cache\n+ with fresh_inductor_cache():\n+ with torch.profiler.profile(record_shapes=True) as p:\n+ for _ in range(REPEAT):\n+ om()\n+ p.export_chrome_trace(trace2)\n+\n+ print(\"diffing...\")\n+ with patch(", - "comment_created_at": "2025-05-24T00:01:29+00:00", - "comment_author": "eellison", - "comment_body": "I ran this test and I see all the achieved bandwidth being < .10. this seems wrong - can we double check the numbers. I think i mentioned this in our 1-1 but anything <.01 or > 1 is probably off.", - "pr_file_module": null - }, - { - "comment_id": "2117123016", - "repo_full_name": "pytorch/pytorch", - "pr_number": 149697, - "pr_file": "test/inductor/test_analysis.py", - "discussion_id": "2105535986", - "commented_code": "@@ -0,0 +1,601 @@\n+# Owner(s): [\"module: inductor\"]\n+\n+import json\n+import re\n+import tempfile\n+import unittest\n+import uuid\n+from io import StringIO\n+from unittest.mock import patch\n+\n+import torch\n+import torch.nn.functional as F\n+from torch._inductor.analysis.profile_analysis import (\n+ _augment_trace_helper,\n+ _create_extern_mapping,\n+ JsonProfile,\n+ main,\n+)\n+from torch._inductor.utils import (\n+ fresh_inductor_cache,\n+ run_and_get_code,\n+ tabulate_2d,\n+ zip_dicts,\n+)\n+from torch.testing._internal.common_cuda import SM70OrLater\n+from torch.testing._internal.common_device_type import (\n+ dtypes,\n+ instantiate_device_type_tests,\n+ skipIf,\n+)\n+from torch.testing._internal.common_utils import parametrize, run_tests, TestCase\n+from torch.testing._internal.inductor_utils import IS_BIG_GPU\n+\n+\n+example_profile = \"\"\"\n+{\n+ \"schemaVersion\": 1,\n+ \"deviceProperties\": [\n+ {\n+ \"id\": 0, \"name\": \"NVIDIA H100\", \"totalGlobalMem\": 101997215744,\n+ \"computeMajor\": 9, \"computeMinor\": 0,\n+ \"maxThreadsPerBlock\": 1024, \"maxThreadsPerMultiprocessor\": 2048,\n+ \"regsPerBlock\": 65536, \"warpSize\": 32,\n+ \"sharedMemPerBlock\": 49152, \"numSms\": 132\n+ , \"regsPerMultiprocessor\": 65536, \"sharedMemPerBlockOptin\": 232448, \"sharedMemPerMultiprocessor\": 233472\n+ }\n+ ],\n+ \"cupti_version\": 24,\n+ \"cuda_runtime_version\": 12060,\n+ \"with_flops\": 1,\n+ \"record_shapes\": 1,\n+ \"cuda_driver_version\": 12040,\n+ \"profile_memory\": 1,\n+ \"trace_id\": \"301995E163ED42048FBD783860E6E7DC\",\n+ \"displayTimeUnit\": \"ms\",\n+ \"baseTimeNanoseconds\": 1743521598000000000,\n+ \"traceEvents\": [\n+ {\n+ \"ph\": \"X\", \"cat\": \"cpu_op\", \"name\": \"aten::convolution\", \"pid\": 1147039, \"tid\": 1147039,\n+ \"ts\": 198093488368.463, \"dur\": 425.453,\n+ \"args\": {\n+ \"External id\": 1340,\"Sequence number\": 0, \"Fwd thread id\": 0, \"Record function id\": 0, \"Concrete Inputs\": \\\n+[\"\", \"\", \"\", \"[2, 2]\", \"[3, 3]\", \"[1, 1]\", \"False\", \"[0, 0]\", \"1\"], \"Input type\": [\"float\", \"float\", \"\", \\\n+\"ScalarList\", \"ScalarList\", \"ScalarList\", \"Scalar\", \"ScalarList\", \"Scalar\"], \"Input Strides\": [[150528, 1, 672, 3],\\\n+[147, 1, 21, 3], [], [], [], [], [], [], []], \"Input Dims\": [[1, 3, 224, 224], [64, 3, 7, 7], [], [], [], [], [], \\\n+[], []], \"Ev Idx\": 1339\n+ }\n+ },\n+ {\n+ \"ph\": \"X\", \"cat\": \"cpu_op\", \"name\": \"aten::_convolution\", \"pid\": 1147039, \"tid\": 1147039,\n+ \"ts\": 198093488444.498, \"dur\": 341.867,\n+ \"args\": {\n+ \"External id\": 1341,\"Record function id\": 0, \"Concrete Inputs\": [\"\", \"\", \"\", \"[2, 2]\", \"[3, 3]\", \"[1, 1]\",\\\n+ \"False\", \"[0, 0]\", \"1\", \"False\", \"False\", \"True\", \"True\"], \"Input type\": [\"float\", \"float\", \"\", \"ScalarList\",\\\n+ \"ScalarList\", \"ScalarList\", \"Scalar\", \"ScalarList\", \"Scalar\", \"Scalar\", \"Scalar\", \"Scalar\", \"Scalar\"], \"Input Strides\": \\\n+[[150528, 1, 672, 3], [147, 1, 21, 3], [], [], [], [], [], [], [], [], [], [], []], \"Input Dims\": [[1, 3, 224, 224], \\\n+[64, 3, 7, 7], [], [], [], [], [], [], [], [], [], [], []], \"Ev Idx\": 1340\n+ }\n+ },\n+ {\n+ \"ph\": \"X\", \"cat\": \"cpu_op\", \"name\": \"aten::addmm\", \"pid\": 1147039, \"tid\": 1147039,\n+ \"ts\": 198093513655.849, \"dur\": 251.130,\n+ \"args\": {\n+ \"External id\": 1619,\"Sequence number\": 0, \"Fwd thread id\": 0, \"Record function id\": 0, \"Concrete Inputs\": \\\n+[\"\", \"\", \"\", \"1\", \"1\", \"\"], \"Input type\": [\"float\", \"float\", \"float\", \"Scalar\", \"Scalar\", \"float\"], \"Input Strides\":\\\n+ [[1], [0, 1], [1, 2048], [], [], [1000, 1]], \"Input Dims\": [[1000], [1, 2048], [2048, 1000], [], [], [1, 1000]], \\\n+\"Ev Idx\": 1618\n+ }\n+ },\n+ {\n+ \"ph\": \"X\", \"cat\": \"kernel\", \"name\": \"void cutlass_addmm\", \"pid\": 1147039, \"tid\": 1147039,\n+ \"ts\": 198093513655.849, \"dur\": 251.130,\n+ \"args\": {\n+ \"External id\": 1619,\"Sequence number\": 0, \"Fwd thread id\": 0, \"Record function id\": 0, \"Ev Idx\": 1618\n+ }\n+ },\n+ {\n+ \"ph\": \"X\", \"cat\": \"kernel\", \"name\": \"void convolution_kernel\", \"pid\": 1147039, \"tid\": 1147039,\n+ \"ts\": 198093513655.849, \"dur\": 200.130,\n+ \"args\": {\n+ \"External id\": 1342, \"Sequence number\": 0, \"Fwd thread id\": 0, \"Record function id\": 0, \"Ev Idx\": 1618\n+ }\n+ },\n+ {\n+ \"ph\": \"X\", \"cat\": \"cpu_op\", \"name\": \"aten::convolution\", \"pid\": 1147039, \"tid\": 1147039,\n+ \"ts\": 198093488444.498, \"dur\": 341.867,\n+ \"args\": {\n+ \"External id\": 1342,\"Record function id\": 0, \"Concrete Inputs\": [\"\", \"\", \"\", \"[2, 2]\", \"[3, 3]\", \"[1, 1]\", \\\n+\"False\", \"[0, 0]\", \"1\", \"False\", \"False\", \"True\", \"True\"], \"Input type\": [\"float\", \"float\", \"\", \"ScalarList\", \\\n+\"ScalarList\", \"ScalarList\", \"Scalar\", \"ScalarList\", \"Scalar\", \"Scalar\", \"Scalar\", \"Scalar\", \"Scalar\"], \"Input \\\n+Strides\": [[150528, 1, 672, 3], [147, 1, 21, 3], [], [], [], [], [], [], [], [], [], [], []], \"Input Dims\": \\\n+[[1, 3, 224, 224], [64, 3, 7, 7], [], [], [], [], [], [], [], [], [], [], []], \"Ev Idx\": 1340\n+ }\n+ }\n+],\n+ \"traceName\": \"/tmp/compiled_module_profile.json\"\n+}\n+\"\"\"\n+\n+\n+def verify_flops(self, expected_flops, out_profile):\n+ j = 0\n+ for i in range(len(out_profile[\"traceEvents\"])):\n+ if \"kernel_flop\" in out_profile[\"traceEvents\"][i][\"args\"]:\n+ self.assertEqual(\n+ out_profile[\"traceEvents\"][i][\"args\"][\"kernel_flop\"],\n+ expected_flops[j],\n+ )\n+ j += 1\n+\n+\n+def random_tensor(size, dtype, **kwargs):\n+ if dtype in [torch.half, torch.bfloat16, torch.float, torch.double]:\n+ return torch.randn(size, dtype=dtype, **kwargs)\n+ elif dtype in [torch.uint8, torch.int8, torch.short, torch.int, torch.long]:\n+ return torch.randint(0, 100, size, dtype=dtype, **kwargs)\n+ else:\n+ raise ValueError(\"Unsupported data type\")\n+\n+\n+def cT(device, dtype):\n+ def T(*shape, requires_grad=False):\n+ return random_tensor(\n+ shape, requires_grad=requires_grad, device=device, dtype=dtype\n+ )\n+\n+ return T\n+\n+\n+def FlopCounterMode(*args, **kwargs):\n+ return torch.utils.flop_counter.FlopCounterMode(*args, **kwargs, display=False)\n+\n+\n+TMP_DIR = tempfile.mkdtemp()\n+\n+\n+def trace_files():\n+ TRACE1 = f\"{TMP_DIR}/trace1-{uuid.uuid4()}.json\"\n+ TRACE2 = f\"{TMP_DIR}/trace2-{uuid.uuid4()}.json\"\n+ return TRACE1, TRACE2\n+\n+\n+def omni_model(device, dtype, compile=True, addmm=True, bmm=True):\n+ T = cT(device, dtype)\n+\n+ def model():\n+ input_conv = T(1, 3, 56, 56)\n+ conv_weight = T(12, 3, 5, 5)\n+\n+ # Increased matrix sizes\n+ B = 8\n+ M = 256\n+ N = 512\n+ K = 768\n+ mat1 = T(M, N)\n+ mat2 = T(N, K)\n+\n+ batch_mat1 = T(B, M, N)\n+ batch_mat2 = T(B, N, K)\n+\n+ conv_output = F.conv2d(input_conv, conv_weight)\n+\n+ conv_output = conv_output * 10\n+ mm_output = torch.mm(mat1, mat2)\n+ ret = [\n+ conv_output.flatten(),\n+ mm_output.flatten(),\n+ ]\n+\n+ if addmm:\n+ addmm_output = torch.addmm(\n+ torch.zeros(mm_output.shape, device=mat1.device, dtype=mat1.dtype),\n+ mat1,\n+ mat2,\n+ )\n+ ret.append(addmm_output.flatten())\n+\n+ if bmm:\n+ bmm_output = torch.bmm(batch_mat1, batch_mat2)\n+ ret.append(bmm_output.flatten())\n+\n+ if bmm and addmm:\n+ baddbmm_output = torch.baddbmm(\n+ torch.zeros(\n+ 1,\n+ *mm_output.shape,\n+ device=batch_mat1.device,\n+ dtype=batch_mat1.dtype,\n+ ),\n+ batch_mat1,\n+ batch_mat2,\n+ )\n+ ret.append(baddbmm_output.flatten())\n+\n+ return torch.cat(ret)\n+\n+ if compile:\n+ return torch.compile(\n+ model, options={\"benchmark_kernel\": True, \"profile_bandwidth\": True}\n+ )\n+ return model\n+\n+\n+prefix = [\"profile.py\"]\n+\n+\n+class TestUtils(TestCase):\n+ def test_tabulate2d(self):\n+ headers = [\"Kernel\", \"Self H100 TIME (ms)\", \"Count\", \"Percent\"]\n+ rows = [\n+ [\"aten::mm\", 0.500, 7, 0.0],\n+ [\"aten::bmm\", 0.400, 6, 0.0],\n+ [\"aten::baddbmm\", 0.300, 5, 0.0],\n+ [\"aten::convolution\", 0.200, 4, 0.0],\n+ [\"aten::cudnn_convolution\", 0.100, 3, 0.0],\n+ ]\n+ table = [\n+ \" Kernel | Self H100 TIME (ms) | Count | Percent \",\n+ \"-----------------------------------------------------------------\",\n+ \" aten::mm | 0.5 | 7 | 0.0 \",\n+ \" aten::bmm | 0.4 | 6 | 0.0 \",\n+ \" aten::baddbmm | 0.3 | 5 | 0.0 \",\n+ \" aten::convolution | 0.2 | 4 | 0.0 \",\n+ \" aten::cudnn_convolution | 0.1 | 3 | 0.0 \",\n+ ]\n+ res = tabulate_2d(rows, headers)\n+ for r, t in zip(res.split(\"\\n\"), table):\n+ self.assertEqual(r, t)\n+\n+ def test_zip_dicts(self):\n+ d1 = {\"a\": 1, \"b\": 2}\n+ d2 = {\"a\": 3, \"c\": 4}\n+ res = zip_dicts(d1, d2, d1_default=\"foo\", d2_default=\"bar\")\n+ self.assertEqual(set(res), {(\"a\", 1, 3), (\"b\", 2, \"bar\"), (\"c\", \"foo\", 4)})\n+ res = zip_dicts(d1, d2)\n+ self.assertEqual(set(res), {(\"a\", 1, 3), (\"b\", 2, None), (\"c\", None, 4)})\n+\n+\n+class TestAnalysis(TestCase):\n+ @skipIf(not SM70OrLater, \"Requires sm70\")\n+ def test_noop(self):\n+ with (\n+ patch(\"sys.stdout\", new_callable=StringIO) as mock_stdout,\n+ patch(\"sys.argv\", [*prefix]),\n+ ):\n+ main()\n+ self.assertEqual(mock_stdout.getvalue(), \"\")\n+\n+ @skipIf(not SM70OrLater, \"Requires sm70\")\n+ @dtypes(torch.float, torch.double, torch.float16)\n+ def test_diff(self, device, dtype):\n+ \"\"\"\n+ diff, testing out the nruns feature too.\n+ \"\"\"\n+ if device == \"cpu\":\n+ # TODO cpu support\n+ return\n+ om = omni_model(device, dtype)\n+ REPEAT = 5\n+ trace1, trace2 = trace_files()\n+ print(\"first trace\")\n+ torch._dynamo.reset() # reset the cache\n+ with fresh_inductor_cache():\n+ with torch.profiler.profile(record_shapes=True) as p:\n+ om()\n+ p.export_chrome_trace(trace1)\n+\n+ print(\"second trace\")\n+ torch._dynamo.reset() # reset the cache\n+ with fresh_inductor_cache():\n+ with torch.profiler.profile(record_shapes=True) as p:\n+ for _ in range(REPEAT):\n+ om()\n+ p.export_chrome_trace(trace2)\n+\n+ print(\"diffing...\")\n+ with patch(", - "comment_created_at": "2025-05-31T03:08:54+00:00", - "comment_author": "exclamaforte", - "comment_body": "I think it looks good now!\r\n```\r\n Kernel Name | foo Kernel Count | foo FLOPS | foo Kernel Reads (GB) | foo Dur (us) | foo Achieved FLOPS % | foo Achieved Bandwidth % | bar Kernel Count | bar FLOPS | bar Kernel Reads (GB) | bar Dur (us) | bar Achieved FLOPS % | bar Achieved Bandwidth % \r\n--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------\r\n void at::native::vectorized_el | 141 | 0 | 2291.3034663102667 | 27.48305673758865 | 0 | 68.39711839732142 | 140 | 0 | 2296.773433304663 | 27.408307142857144 | 0 | 68.56040099416907 \r\n triton_poi_fused_cat_8 | 78 | 0 | 1574.7606227700633 | 9.219589743589747 | 0 | 47.007779784181 | 78 | 0 | 1562.5378211858626 | 9.225320512820513 | 0 | 46.64292003539889 \r\n triton_poi_fused_zeros_4 | 20 | 0 | 252.8315785679245 | 1.5600000000000003 | 0 | 7.547211300535059 | 22 | 0 | 247.92602043168762 | 1.591227272727273 | 0 | 7.4007767293041065 \r\n nvjet_hsh_64x32_64x16_2x4_v_ba | 1 | 44306028169014.09 | 360.5633802816902 | 4.544 | 2.238808901920874 | 10.763085978557916 | 4 | 50766875540146.125 | 413.14189078894964 | 3.992 | 2.5652792086986422 | 12.332593754894019 \r\n triton_poi_fused_randn_5 | 22 | 0 | 272.44196070060616 | 7.698772727272726 | 0 | 8.13259584180914 | 24 | 0 | 272.03694126279845 | 7.7105 | 0 | 8.120505709337268 \r\n triton_poi_fused_randn_6 | 30 | 0 | 328.2370610588902 | 19.173099999999998 | 0 | 9.798121225638516 | 35 | 0 | 327.5356462783756 | 19.21434285714286 | 0 | 9.777183470996286 \r\n nvjet_hsh_96x128_64x6_2x1_v_bz | 1 | 227745013574660.62 | 1779.2579185520362 | 7.072 | 11.508085577294626 | 53.112176673195115 | 5 | 188095526438492.2 | 1469.4963003007203 | 8.633600000000001 | 9.504574352627197 | 43.86556120300658 \r\n triton_poi_fused_zeros_7 | 21 | 0 | 253.7686410608481 | 1.5543333333333336 | 0 | 7.575183315249197 | 22 | 0 | 249.15748848030734 | 1.5841363636363637 | 0 | 7.437536969561413 \r\n void at::native::elementwise_k | 3 | 0 | 194.78879307640364 | 3.936 | 0 | 5.814590838101601 | 13 | 0 | 181.2755167170482 | 3.7068461538461532 | 0 | 5.411209454240244 \r\n nvjet_hsh_96x128_64x6_2x1_v_ba | 1 | 167772160000000.0 | 1365.3333333333333 | 9.6 | 8.477623041940374 | 40.75621890547263 | 5 | 253727683090235.0 | 2064.84117098173 | 6.5405999999999995 | 12.82100470390273 | 61.63704988005164 \r\n triton_poi_fused_randn_1 | 86 | 0 | 0.6861092067063755 | 2.8322325581395353 | 0 | 0.020480871841981354 | 91 | 0 | 0.7055919536190854 | 2.7624175824175823 | 0 | 0.02106244637668912 \r\n void at::native::vectorized_el | 2 | 0 | 9.567026991614258 | 1.568 | 0 | 0.28558289527206737 | 7 | 0 | 15.017761845823944 | 1.5178571428571428 | 0 | 0.44829139838280435 \r\n void at::native::im2col_kernel | 1 | 551086956521.739 | 275.64538043478257 | 8.832 | 0.02784673858118944 | 8.228220311486046 | 3 | 540686595521.0607 | 270.4432767907672 | 9.002666666666668 | 0.027321202401266332 | 8.07293363554529 \r\n void cutlass::Kernel2 list[BaseConfig]:\n import torch\n\n pruned_configs = []\n for gemm_config in configs:\n device = torch.cuda.current_device()\n props = torch.cuda.get_device_properties(device)\n sm_available = props.shared_memory_per_block_optin # type: ignore[attr-defined]\n NUM_REG = 255\n\n acc_regs = math.ceil(\n gemm_config.block_m * gemm_config.block_n / (gemm_config.num_warps * 32)\n )\n\n shared_mem_accum = dtype_size * (\n gemm_config.block_m * gemm_config.block_k\n + gemm_config.block_n * gemm_config.block_k\n )\n\n # Will use more shared memory than available\n if shared_mem_accum * gemm_config.num_stages > sm_available:\n continue\n # Lower bound for register spillage, if exceeds the kernel will certainly spill\n elif acc_regs > NUM_REG:\n continue", - "repo_full_name": "pytorch/pytorch", - "discussion_comments": [ - { - "comment_id": "2176455141", - "repo_full_name": "pytorch/pytorch", - "pr_number": 156610, - "pr_file": "torch/_inductor/template_heuristics.py", - "discussion_id": "2176455141", - "commented_code": "@@ -519,6 +520,40 @@ def _scale_mm_configs(\n \n return scaled_configs\n \n+ def _prune_exhaustive_configs(\n+ self,\n+ configs: list[BaseConfig],\n+ dtype_size: int,\n+ ) -> list[BaseConfig]:\n+ import torch\n+\n+ pruned_configs = []\n+ for gemm_config in configs:\n+ device = torch.cuda.current_device()\n+ props = torch.cuda.get_device_properties(device)\n+ sm_available = props.shared_memory_per_block_optin # type: ignore[attr-defined]\n+ NUM_REG = 255\n+\n+ acc_regs = math.ceil(\n+ gemm_config.block_m * gemm_config.block_n / (gemm_config.num_warps * 32)\n+ )\n+\n+ shared_mem_accum = dtype_size * (\n+ gemm_config.block_m * gemm_config.block_k\n+ + gemm_config.block_n * gemm_config.block_k\n+ )\n+\n+ # Will use more shared memory than available\n+ if shared_mem_accum * gemm_config.num_stages > sm_available:\n+ continue\n+ # Lower bound for register spillage, if exceeds the kernel will certainly spill\n+ elif acc_regs > NUM_REG:\n+ continue", - "comment_created_at": "2025-07-01T05:23:57+00:00", - "comment_author": "nmacchioni", - "comment_body": "If I understand correctly this prunes all spilling config, are we sure we want to do this? Spilled registers does not necessarily mean that performance is bad, although we could potentially prune when # spilled >> some metric.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2165669923", - "pr_number": 156785, - "pr_file": "torch/_inductor/kernel/mm.py", - "created_at": "2025-06-25T04:13:25+00:00", - "commented_code": "mm_configs = V.choices.get_base_mm_configs(device_type)\n persistent_mm_configs = V.choices.get_persistent_mm_configs(device_type)\n extra_mm_configs = V.choices.get_extra_mm_configs(device_type)\n lookup_dict = get_lookup_table([mat1, mat2], name)", - "repo_full_name": "pytorch/pytorch", - "discussion_comments": [ - { - "comment_id": "2165669923", - "repo_full_name": "pytorch/pytorch", - "pr_number": 156785, - "pr_file": "torch/_inductor/kernel/mm.py", - "discussion_id": "2165669923", - "commented_code": "@@ -696,14 +699,18 @@ def tuned_mm(mat1, mat2, *, layout=None):\n mm_configs = V.choices.get_base_mm_configs(device_type)\n persistent_mm_configs = V.choices.get_persistent_mm_configs(device_type)\n extra_mm_configs = V.choices.get_extra_mm_configs(device_type)\n+ lookup_dict = get_lookup_table([mat1, mat2], name)", - "comment_created_at": "2025-06-25T04:13:25+00:00", - "comment_author": "jansel", - "comment_body": "Do we need to take into account the epilogue in the lookup table?", - "pr_file_module": null - }, - { - "comment_id": "2165867537", - "repo_full_name": "pytorch/pytorch", - "pr_number": 156785, - "pr_file": "torch/_inductor/kernel/mm.py", - "discussion_id": "2165669923", - "commented_code": "@@ -696,14 +699,18 @@ def tuned_mm(mat1, mat2, *, layout=None):\n mm_configs = V.choices.get_base_mm_configs(device_type)\n persistent_mm_configs = V.choices.get_persistent_mm_configs(device_type)\n extra_mm_configs = V.choices.get_extra_mm_configs(device_type)\n+ lookup_dict = get_lookup_table([mat1, mat2], name)", - "comment_created_at": "2025-06-25T06:11:01+00:00", - "comment_author": "coconutruben", - "comment_body": "IIUC no. This is if you mean the template epilogue (e.g. addmm) because the table is function specific right now, so config e.g. for addmm will be picked specifically for addmm, and is found in the lookup table under the addmm key.", - "pr_file_module": null - }, - { - "comment_id": "2166929857", - "repo_full_name": "pytorch/pytorch", - "pr_number": 156785, - "pr_file": "torch/_inductor/kernel/mm.py", - "discussion_id": "2165669923", - "commented_code": "@@ -696,14 +699,18 @@ def tuned_mm(mat1, mat2, *, layout=None):\n mm_configs = V.choices.get_base_mm_configs(device_type)\n persistent_mm_configs = V.choices.get_persistent_mm_configs(device_type)\n extra_mm_configs = V.choices.get_extra_mm_configs(device_type)\n+ lookup_dict = get_lookup_table([mat1, mat2], name)", - "comment_created_at": "2025-06-25T14:50:42+00:00", - "comment_author": "jansel", - "comment_body": "But doesn't the autotuning measure using the epilogue? I recall @eellison added that. This can affect the performance (sometimes a lot of the epilogue causes register spills).", - "pr_file_module": null - }, - { - "comment_id": "2167362851", - "repo_full_name": "pytorch/pytorch", - "pr_number": 156785, - "pr_file": "torch/_inductor/kernel/mm.py", - "discussion_id": "2165669923", - "commented_code": "@@ -696,14 +699,18 @@ def tuned_mm(mat1, mat2, *, layout=None):\n mm_configs = V.choices.get_base_mm_configs(device_type)\n persistent_mm_configs = V.choices.get_persistent_mm_configs(device_type)\n extra_mm_configs = V.choices.get_extra_mm_configs(device_type)\n+ lookup_dict = get_lookup_table([mat1, mat2], name)", - "comment_created_at": "2025-06-25T18:36:37+00:00", - "comment_author": "coconutruben", - "comment_body": "IIUC what @eellison explained to me this should be safe because it happens in two stages. We first find the best config (here) and then we benchmark using that best config + epilogue vs not fusing the epilogue. So in this stage we're not interfering with that decision. We don't do a NxN comparison of configs + epilogue vs configs", - "pr_file_module": null - }, - { - "comment_id": "2167450658", - "repo_full_name": "pytorch/pytorch", - "pr_number": 156785, - "pr_file": "torch/_inductor/kernel/mm.py", - "discussion_id": "2165669923", - "commented_code": "@@ -696,14 +699,18 @@ def tuned_mm(mat1, mat2, *, layout=None):\n mm_configs = V.choices.get_base_mm_configs(device_type)\n persistent_mm_configs = V.choices.get_persistent_mm_configs(device_type)\n extra_mm_configs = V.choices.get_extra_mm_configs(device_type)\n+ lookup_dict = get_lookup_table([mat1, mat2], name)", - "comment_created_at": "2025-06-25T19:21:28+00:00", - "comment_author": "PaulZhang12", - "comment_body": "@jansel It depends on how we construct the lookup table. If we take the autotune logs for example, then it is not fusion aware. However, if we look at whether the scheduler does a fusion with a certain config in a previous max-autotune run, we can use that config in the lookup table.\r\n\r\nThe lookup table just uses whichever config(s) we give it. There is no benchmarking of epilogues by default as that can lead to significant compile time overhead.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2175414212", - "pr_number": 156851, - "pr_file": "torch/_inductor/models/mm_kernel_prediction_model.py", - "created_at": "2025-06-30T15:55:10+00:00", - "commented_code": "\"\"\"\nNeural network model for predicting triton kernel performance.\n\nThis module provides functionality to load and use a pre-trained neural network\nfor predicting the performance of triton kernels.\n\"\"\"\n\nimport os\nimport time\nfrom collections.abc import Sequence\nfrom typing import Any\n\nimport numpy as np\nimport pandas as pd # type: ignore[import-untyped]\n\nimport torch\nimport torch.nn as nn\nfrom torch._inductor.kernel_lut import TritonGEMMConfig\n\n# Default model path - can be overridden by environment variable\nscript_dir = os.path.dirname(__file__)\nDEFAULT_MODEL_PATH = os.path.join(os.path.dirname(__file__), \"aoti_mm_model.pt2\")\nMODEL_PATH = os.environ.get(\"TRITON_KERNEL_SELECTION_MODEL_PATH\", DEFAULT_MODEL_PATH)\nimport logging\n\n\nlog = logging.getLogger(__name__)\n# turn on info logging\nlogging.basicConfig(level=logging.INFO)\n\n\nclass NeuralNetwork(nn.Module):\n \"\"\"\n Multilayer perceptron with a single output.\n\n It is designed for modeling runtime when there is a constant overhead of\n `kernel_overhead` and the non-overhead runtime tends to be easier to model\n on a log scale (e.g. doubling a dimension involved in a matrix\n multiplication results in runtime roughly doubling.)\n \"\"\"\n\n def __init__(\n self,\n n_inputs: int,\n hidden_layer_widths: Sequence[int],\n kernel_overhead: float = 0.00541,\n ) -> None:\n \"\"\"\n Args:\n n_inputs: Number of inputs\n hidden_layer_widths: Hidden layer widths\n kernel_overhead: Overhead of the kernel, assumed to be constant. The\n default of 0.00541 is the lowest runtime seen in Triton H100 data.\n \"\"\"\n super().__init__()\n self.n_inputs = n_inputs\n self.kernel_overhead = kernel_overhead\n self.log_kernel_overhead: float = torch.log(\n torch.tensor(kernel_overhead, device=\"cuda\")\n ).item()\n all_layer_widths = list(hidden_layer_widths) + [1]\n all_input_widths = [n_inputs] + list(hidden_layer_widths)\n layers: list[nn.Module] = []\n for n_in, n_out in zip(all_input_widths, all_layer_widths, strict=True):\n layers.append(nn.Linear(n_in, n_out))\n layers.append(nn.BatchNorm1d(n_out))\n layers.append(nn.ReLU())\n\n self.linear_relu_stack = nn.Sequential(*layers[:-2])\n\n def forward(self, x: torch.Tensor) -> torch.Tensor:\n \"\"\"\n Predict as log(exp(inputs) + self.kernel_overhead).\n\n Works well for predicting log(runtime) when runtime contains a constant\n overhead of `kernel_overhead`. (The log specification means that this\n wouldn't be trivially modeled with a bias term.)\n\n Probably could have fit the overhead rather than hard-coding it by\n having `self.kernel_overhead` be a tunable parameter or by having exp\n and log layers.\n \"\"\"\n log_base_pred = self.linear_relu_stack(x)\n log_overhead_tsr = torch.full_like(\n input=log_base_pred, fill_value=self.log_kernel_overhead, device=\"cuda\"\n )\n return torch.logsumexp(\n torch.stack([log_base_pred, log_overhead_tsr], dim=-1), dim=-1\n )\n\n\ndef get_nn_x(\n df: pd.DataFrame, mean: torch.Tensor | None = None, std: torch.Tensor | None = None\n) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor]:\n \"\"\"Standardize the data and convert it to a tensor.\"\"\"\n x_df = df[\n [\n \"dtype_size\",\n \"dim_m\",\n \"dim_n\",\n \"dim_k\",\n \"total_gb\",\n \"total_gflop\",\n \"flops_per_byte\",\n \"config_block_k\",\n \"config_block_m\",\n \"config_block_n\",\n \"config_num_stages\",\n \"config_num_warps\",\n ]\n ].copy()\n for col in x_df.columns:\n x_df[col] = np.log(x_df[col])\n\n x_tens = torch.from_numpy(x_df.astype(float).to_numpy()).to(device=\"cuda\")\n if mean is None:\n mean = torch.from_numpy(x_df.mean().to_numpy()).to(device=\"cuda\")\n if std is None:\n std = torch.from_numpy(x_df.std().to_numpy()).to(device=\"cuda\")\n x_tens -= mean\n x_tens /= std\n return x_tens.to(torch.float32), mean, std\n\n\ndef get_total_gb_feature(df: pd.DataFrame) -> pd.Series:\n \"\"\"\n Calculate the total gigabytes feature from the dataframe.\n\n Args:\n df: DataFrame containing the necessary columns for calculation\n\n Returns:\n Series containing the calculated total gigabytes\n \"\"\"\n # Calculate memory access in bytes\n m, n, k = df[\"dim_m\"], df[\"dim_n\"], df[\"dim_k\"]\n dtype_size = df[\"dtype_size\"] / 8 # Convert bits to bytes\n\n # A: m\u00d7k, B: k\u00d7n, C: m\u00d7n\n return ((m * k + k * n + m * n) * dtype_size) / 1e9 # Convert to GB\n\n\ndef get_total_gflop_feature(df: pd.DataFrame) -> pd.Series:\n \"\"\"\n Calculate the total gigaflops feature from the dataframe.\n\n Args:\n df: DataFrame containing the necessary columns for calculation\n\n Returns:\n Series containing the calculated total gigaflops\n \"\"\"\n # For matrix multiplication, flops = 2 * m * n * k\n m, n, k = df[\"dim_m\"], df[\"dim_n\"], df[\"dim_k\"]\n return (2 * m * n * k) / 1e9 # Convert to GFLOP\n\n\nclass ModelWrapper:\n \"\"\"\n Wrapper for the neural network model that handles encoding inputs and decoding outputs.\n\n This class provides methods to prepare inputs for the model and interpret its outputs,\n handling the necessary standardization and feature engineering.\n \"\"\"\n\n def __init__(self) -> None:\n \"\"\"Initialize the model wrapper with the pre-trained model and standardization parameters.\"\"\"\n start_time = time.time()\n self.model = NeuralNetwork(\n n_inputs=12, hidden_layer_widths=[2**8 for _ in range(6)]\n )\n self.model: NeuralNetwork = torch._inductor.aoti_load_package(MODEL_PATH)\n end_time = time.time()\n\n log.info(\"NN Kernel Prediction Model loaded.\")\n log.info(\"Took: %s seconds\", end_time - start_time)\n\n # Mean values for standardizing input features\n self.mean_for_standardization = torch.tensor(\n [\n 2.78275084,\n 8.23996746,\n 7.27791873,\n 7.92035942,\n -2.39558163,\n 3.40679233,\n 5.80237395,\n 3.95781827,\n 4.19478321,\n 4.19098234,\n 0.9045909,\n 1.28331208,\n ],\n device=\"cuda\",\n )\n\n # Standard deviation values for standardizing input features\n self.std_for_standardization = torch.tensor(\n [\n 0.08322756,\n 2.31893439,\n 1.65605574,\n 2.15447078,\n 2.19682881,\n 2.99600806,\n 1.24328795,\n 0.92352521,\n 0.93849802,\n 0.93872011,\n 0.57455891,\n 0.5837217,\n ],\n device=\"cuda\",\n )\n\n def vec(\n self, m: int, n: int, k: int, dsize: int, config: Any\n ) -> tuple[int, int, int, int, int, int, int, int, int]:\n \"\"\"\n Convert matrix multiplication parameters and config to a feature vector.\n\n Args:\n m: First dimension of matrix multiplication\n n: Second dimension of matrix multiplication\n k: Third dimension of matrix multiplication\n dsize: Data size in bits (e.g., 16 for float16, 32 for float32)\n config: Configuration object containing kernel parameters\n\n Returns:\n Tuple containing the extracted features\n \"\"\"\n kwargs = config.all_kwargs()\n\n return (\n int(m),\n int(n),\n int(k),\n int(dsize),\n int(kwargs[\"BLOCK_M\"]),\n int(kwargs[\"BLOCK_N\"]),\n int(kwargs[\"BLOCK_K\"]),\n int(kwargs[\"num_stages\"]),\n int(kwargs[\"num_warps\"]),\n )\n\n @staticmethod\n def vec_params(\n m: int, n: int, k: int, dsize: int, params: TritonGEMMConfig\n ) -> tuple[int, int, int, int, int, int, int, int, int]:\n \"\"\"\n Convert matrix multiplication parameters and config to a feature vector.\n\n Args:\n m: First dimension of matrix multiplication\n n: Second dimension of matrix multiplication\n k: Third dimension of matrix multiplication\n dsize: Data size in bits (e.g., 16 for float16, 32 for float32)\n config: Configuration object containing kernel parameters\n\n Returns:\n Tuple containing the extracted features\n \"\"\"\n\n return (\n int(m),\n int(n),\n int(k),\n int(dsize),\n int(params.block_m),\n int(params.block_n),\n int(params.block_k),\n int(params.num_stages),\n int(params.num_warps),\n )\n\n def encode(\n self, m: int, n: int, k: int, dtype: torch.dtype, configs: list[Any]\n ) -> torch.Tensor:\n \"\"\"\n Encode the matrix multiplication parameters and configs as input tensors for the model.\n\n Args:\n m: First dimension of matrix multiplication\n n: Second dimension of matrix multiplication\n k: Third dimension of matrix multiplication\n dtype: Data type of the matrices\n configs: List of configuration objects\n\n Returns:\n Tensor containing the encoded inputs ready for the model\n\n Raises:\n ValueError: If the dtype is not supported\n \"\"\"\n # Determine data size based on dtype\n if dtype == torch.bfloat16 or dtype == torch.float16:\n dsize = 16\n elif dtype == torch.float32:\n dsize = 32\n else:\n raise ValueError(f\"Unsupported dtype: {dtype}. Add support for this dtype.\")\n\n # Create feature dataframe\n df = pd.DataFrame(", - "repo_full_name": "pytorch/pytorch", - "discussion_comments": [ - { - "comment_id": "2175414212", - "repo_full_name": "pytorch/pytorch", - "pr_number": 156851, - "pr_file": "torch/_inductor/models/mm_kernel_prediction_model.py", - "discussion_id": "2175414212", - "commented_code": "@@ -0,0 +1,364 @@\n+\"\"\"\n+Neural network model for predicting triton kernel performance.\n+\n+This module provides functionality to load and use a pre-trained neural network\n+for predicting the performance of triton kernels.\n+\"\"\"\n+\n+import os\n+import time\n+from collections.abc import Sequence\n+from typing import Any\n+\n+import numpy as np\n+import pandas as pd # type: ignore[import-untyped]\n+\n+import torch\n+import torch.nn as nn\n+from torch._inductor.kernel_lut import TritonGEMMConfig\n+\n+# Default model path - can be overridden by environment variable\n+script_dir = os.path.dirname(__file__)\n+DEFAULT_MODEL_PATH = os.path.join(os.path.dirname(__file__), \"aoti_mm_model.pt2\")\n+MODEL_PATH = os.environ.get(\"TRITON_KERNEL_SELECTION_MODEL_PATH\", DEFAULT_MODEL_PATH)\n+import logging\n+\n+\n+log = logging.getLogger(__name__)\n+# turn on info logging\n+logging.basicConfig(level=logging.INFO)\n+\n+\n+class NeuralNetwork(nn.Module):\n+ \"\"\"\n+ Multilayer perceptron with a single output.\n+\n+ It is designed for modeling runtime when there is a constant overhead of\n+ `kernel_overhead` and the non-overhead runtime tends to be easier to model\n+ on a log scale (e.g. doubling a dimension involved in a matrix\n+ multiplication results in runtime roughly doubling.)\n+ \"\"\"\n+\n+ def __init__(\n+ self,\n+ n_inputs: int,\n+ hidden_layer_widths: Sequence[int],\n+ kernel_overhead: float = 0.00541,\n+ ) -> None:\n+ \"\"\"\n+ Args:\n+ n_inputs: Number of inputs\n+ hidden_layer_widths: Hidden layer widths\n+ kernel_overhead: Overhead of the kernel, assumed to be constant. The\n+ default of 0.00541 is the lowest runtime seen in Triton H100 data.\n+ \"\"\"\n+ super().__init__()\n+ self.n_inputs = n_inputs\n+ self.kernel_overhead = kernel_overhead\n+ self.log_kernel_overhead: float = torch.log(\n+ torch.tensor(kernel_overhead, device=\"cuda\")\n+ ).item()\n+ all_layer_widths = list(hidden_layer_widths) + [1]\n+ all_input_widths = [n_inputs] + list(hidden_layer_widths)\n+ layers: list[nn.Module] = []\n+ for n_in, n_out in zip(all_input_widths, all_layer_widths, strict=True):\n+ layers.append(nn.Linear(n_in, n_out))\n+ layers.append(nn.BatchNorm1d(n_out))\n+ layers.append(nn.ReLU())\n+\n+ self.linear_relu_stack = nn.Sequential(*layers[:-2])\n+\n+ def forward(self, x: torch.Tensor) -> torch.Tensor:\n+ \"\"\"\n+ Predict as log(exp(inputs) + self.kernel_overhead).\n+\n+ Works well for predicting log(runtime) when runtime contains a constant\n+ overhead of `kernel_overhead`. (The log specification means that this\n+ wouldn't be trivially modeled with a bias term.)\n+\n+ Probably could have fit the overhead rather than hard-coding it by\n+ having `self.kernel_overhead` be a tunable parameter or by having exp\n+ and log layers.\n+ \"\"\"\n+ log_base_pred = self.linear_relu_stack(x)\n+ log_overhead_tsr = torch.full_like(\n+ input=log_base_pred, fill_value=self.log_kernel_overhead, device=\"cuda\"\n+ )\n+ return torch.logsumexp(\n+ torch.stack([log_base_pred, log_overhead_tsr], dim=-1), dim=-1\n+ )\n+\n+\n+def get_nn_x(\n+ df: pd.DataFrame, mean: torch.Tensor | None = None, std: torch.Tensor | None = None\n+) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor]:\n+ \"\"\"Standardize the data and convert it to a tensor.\"\"\"\n+ x_df = df[\n+ [\n+ \"dtype_size\",\n+ \"dim_m\",\n+ \"dim_n\",\n+ \"dim_k\",\n+ \"total_gb\",\n+ \"total_gflop\",\n+ \"flops_per_byte\",\n+ \"config_block_k\",\n+ \"config_block_m\",\n+ \"config_block_n\",\n+ \"config_num_stages\",\n+ \"config_num_warps\",\n+ ]\n+ ].copy()\n+ for col in x_df.columns:\n+ x_df[col] = np.log(x_df[col])\n+\n+ x_tens = torch.from_numpy(x_df.astype(float).to_numpy()).to(device=\"cuda\")\n+ if mean is None:\n+ mean = torch.from_numpy(x_df.mean().to_numpy()).to(device=\"cuda\")\n+ if std is None:\n+ std = torch.from_numpy(x_df.std().to_numpy()).to(device=\"cuda\")\n+ x_tens -= mean\n+ x_tens /= std\n+ return x_tens.to(torch.float32), mean, std\n+\n+\n+def get_total_gb_feature(df: pd.DataFrame) -> pd.Series:\n+ \"\"\"\n+ Calculate the total gigabytes feature from the dataframe.\n+\n+ Args:\n+ df: DataFrame containing the necessary columns for calculation\n+\n+ Returns:\n+ Series containing the calculated total gigabytes\n+ \"\"\"\n+ # Calculate memory access in bytes\n+ m, n, k = df[\"dim_m\"], df[\"dim_n\"], df[\"dim_k\"]\n+ dtype_size = df[\"dtype_size\"] / 8 # Convert bits to bytes\n+\n+ # A: m\u00d7k, B: k\u00d7n, C: m\u00d7n\n+ return ((m * k + k * n + m * n) * dtype_size) / 1e9 # Convert to GB\n+\n+\n+def get_total_gflop_feature(df: pd.DataFrame) -> pd.Series:\n+ \"\"\"\n+ Calculate the total gigaflops feature from the dataframe.\n+\n+ Args:\n+ df: DataFrame containing the necessary columns for calculation\n+\n+ Returns:\n+ Series containing the calculated total gigaflops\n+ \"\"\"\n+ # For matrix multiplication, flops = 2 * m * n * k\n+ m, n, k = df[\"dim_m\"], df[\"dim_n\"], df[\"dim_k\"]\n+ return (2 * m * n * k) / 1e9 # Convert to GFLOP\n+\n+\n+class ModelWrapper:\n+ \"\"\"\n+ Wrapper for the neural network model that handles encoding inputs and decoding outputs.\n+\n+ This class provides methods to prepare inputs for the model and interpret its outputs,\n+ handling the necessary standardization and feature engineering.\n+ \"\"\"\n+\n+ def __init__(self) -> None:\n+ \"\"\"Initialize the model wrapper with the pre-trained model and standardization parameters.\"\"\"\n+ start_time = time.time()\n+ self.model = NeuralNetwork(\n+ n_inputs=12, hidden_layer_widths=[2**8 for _ in range(6)]\n+ )\n+ self.model: NeuralNetwork = torch._inductor.aoti_load_package(MODEL_PATH)\n+ end_time = time.time()\n+\n+ log.info(\"NN Kernel Prediction Model loaded.\")\n+ log.info(\"Took: %s seconds\", end_time - start_time)\n+\n+ # Mean values for standardizing input features\n+ self.mean_for_standardization = torch.tensor(\n+ [\n+ 2.78275084,\n+ 8.23996746,\n+ 7.27791873,\n+ 7.92035942,\n+ -2.39558163,\n+ 3.40679233,\n+ 5.80237395,\n+ 3.95781827,\n+ 4.19478321,\n+ 4.19098234,\n+ 0.9045909,\n+ 1.28331208,\n+ ],\n+ device=\"cuda\",\n+ )\n+\n+ # Standard deviation values for standardizing input features\n+ self.std_for_standardization = torch.tensor(\n+ [\n+ 0.08322756,\n+ 2.31893439,\n+ 1.65605574,\n+ 2.15447078,\n+ 2.19682881,\n+ 2.99600806,\n+ 1.24328795,\n+ 0.92352521,\n+ 0.93849802,\n+ 0.93872011,\n+ 0.57455891,\n+ 0.5837217,\n+ ],\n+ device=\"cuda\",\n+ )\n+\n+ def vec(\n+ self, m: int, n: int, k: int, dsize: int, config: Any\n+ ) -> tuple[int, int, int, int, int, int, int, int, int]:\n+ \"\"\"\n+ Convert matrix multiplication parameters and config to a feature vector.\n+\n+ Args:\n+ m: First dimension of matrix multiplication\n+ n: Second dimension of matrix multiplication\n+ k: Third dimension of matrix multiplication\n+ dsize: Data size in bits (e.g., 16 for float16, 32 for float32)\n+ config: Configuration object containing kernel parameters\n+\n+ Returns:\n+ Tuple containing the extracted features\n+ \"\"\"\n+ kwargs = config.all_kwargs()\n+\n+ return (\n+ int(m),\n+ int(n),\n+ int(k),\n+ int(dsize),\n+ int(kwargs[\"BLOCK_M\"]),\n+ int(kwargs[\"BLOCK_N\"]),\n+ int(kwargs[\"BLOCK_K\"]),\n+ int(kwargs[\"num_stages\"]),\n+ int(kwargs[\"num_warps\"]),\n+ )\n+\n+ @staticmethod\n+ def vec_params(\n+ m: int, n: int, k: int, dsize: int, params: TritonGEMMConfig\n+ ) -> tuple[int, int, int, int, int, int, int, int, int]:\n+ \"\"\"\n+ Convert matrix multiplication parameters and config to a feature vector.\n+\n+ Args:\n+ m: First dimension of matrix multiplication\n+ n: Second dimension of matrix multiplication\n+ k: Third dimension of matrix multiplication\n+ dsize: Data size in bits (e.g., 16 for float16, 32 for float32)\n+ config: Configuration object containing kernel parameters\n+\n+ Returns:\n+ Tuple containing the extracted features\n+ \"\"\"\n+\n+ return (\n+ int(m),\n+ int(n),\n+ int(k),\n+ int(dsize),\n+ int(params.block_m),\n+ int(params.block_n),\n+ int(params.block_k),\n+ int(params.num_stages),\n+ int(params.num_warps),\n+ )\n+\n+ def encode(\n+ self, m: int, n: int, k: int, dtype: torch.dtype, configs: list[Any]\n+ ) -> torch.Tensor:\n+ \"\"\"\n+ Encode the matrix multiplication parameters and configs as input tensors for the model.\n+\n+ Args:\n+ m: First dimension of matrix multiplication\n+ n: Second dimension of matrix multiplication\n+ k: Third dimension of matrix multiplication\n+ dtype: Data type of the matrices\n+ configs: List of configuration objects\n+\n+ Returns:\n+ Tensor containing the encoded inputs ready for the model\n+\n+ Raises:\n+ ValueError: If the dtype is not supported\n+ \"\"\"\n+ # Determine data size based on dtype\n+ if dtype == torch.bfloat16 or dtype == torch.float16:\n+ dsize = 16\n+ elif dtype == torch.float32:\n+ dsize = 32\n+ else:\n+ raise ValueError(f\"Unsupported dtype: {dtype}. Add support for this dtype.\")\n+\n+ # Create feature dataframe\n+ df = pd.DataFrame(", - "comment_created_at": "2025-06-30T15:55:10+00:00", - "comment_author": "esantorella", - "comment_body": "Not something that needs to be addressed right now, but I wonder if a lot of the overhead we're seeing is from using Pandas, which can be super slow and may introduce a new dependency. We can easily rewrite this to use solely PyTorch.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2064234144", - "pr_number": 152197, - "pr_file": "torch/_inductor/runtime/triton_heuristics.py", - "created_at": "2025-04-28T18:12:34+00:00", - "commented_code": "if autotune_cache:\n if best_config := autotune_cache.read_best(inductor_meta, configs):\n configs = [best_config]\n\n autotune_cache_info[\"best_config\"] = triton_config_to_hashable(\n best_config\n )\n autotune_cache_info[\"autotune_cache_state\"] = \"hit\"\n else:\n autotune_cache_info[\"autotune_cache_state\"] = \"miss\"\n autotune_cache_info[\"num_configs\"] = len(configs)\n if inductor_meta.get(\"coordinate_descent_tuning\"):\n autotune_cache_info[\"coordesc_tuning\"] = True\n if len(configs) == 1:", - "repo_full_name": "pytorch/pytorch", - "discussion_comments": [ - { - "comment_id": "2064234144", - "repo_full_name": "pytorch/pytorch", - "pr_number": 152197, - "pr_file": "torch/_inductor/runtime/triton_heuristics.py", - "discussion_id": "2064234144", - "commented_code": "@@ -1698,9 +1701,28 @@ def cached_autotune(\n if autotune_cache:\n if best_config := autotune_cache.read_best(inductor_meta, configs):\n configs = [best_config]\n-\n+ autotune_cache_info[\"best_config\"] = triton_config_to_hashable(\n+ best_config\n+ )\n+ autotune_cache_info[\"autotune_cache_state\"] = \"hit\"\n+ else:\n+ autotune_cache_info[\"autotune_cache_state\"] = \"miss\"\n+ autotune_cache_info[\"num_configs\"] = len(configs)\n+ if inductor_meta.get(\"coordinate_descent_tuning\"):\n+ autotune_cache_info[\"coordesc_tuning\"] = True\n+ if len(configs) == 1:", - "comment_created_at": "2025-04-28T18:12:34+00:00", - "comment_author": "bdhirsh", - "comment_body": "basic question: given that we are logging the results of autotuning, what does it actually mean for there to be more than one config here? (shouldn't autotuning always end in a single config we can log?) ", - "pr_file_module": null - }, - { - "comment_id": "2066787922", - "repo_full_name": "pytorch/pytorch", - "pr_number": 152197, - "pr_file": "torch/_inductor/runtime/triton_heuristics.py", - "discussion_id": "2064234144", - "commented_code": "@@ -1698,9 +1701,28 @@ def cached_autotune(\n if autotune_cache:\n if best_config := autotune_cache.read_best(inductor_meta, configs):\n configs = [best_config]\n-\n+ autotune_cache_info[\"best_config\"] = triton_config_to_hashable(\n+ best_config\n+ )\n+ autotune_cache_info[\"autotune_cache_state\"] = \"hit\"\n+ else:\n+ autotune_cache_info[\"autotune_cache_state\"] = \"miss\"\n+ autotune_cache_info[\"num_configs\"] = len(configs)\n+ if inductor_meta.get(\"coordinate_descent_tuning\"):\n+ autotune_cache_info[\"coordesc_tuning\"] = True\n+ if len(configs) == 1:", - "comment_created_at": "2025-04-29T15:11:20+00:00", - "comment_author": "jamesjwu", - "comment_body": "We're logging the compile time \"results\", in that we're logging all the possible configs we need to actually autotune when the function is actually called. But we haven't run autotuning yet, so there can be more than one config. \r\n\r\nWe run autotuning later after dynamo returns, in CachingAutotuner.benchmark_all_configs. There, it should be possible to log just the best config. ", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/pytorch-write-clean-maintainable-code.json b/_reviewers/pytorch-write-clean-maintainable-code.json new file mode 100644 index 0000000..ddfd040 --- /dev/null +++ b/_reviewers/pytorch-write-clean-maintainable-code.json @@ -0,0 +1,146 @@ +[ + { + "discussion_id": "2176926064", + "pr_number": 156742, + "pr_file": "setup.py", + "created_at": "2025-07-01T09:04:42+00:00", + "commented_code": "\"lib/*.lib\",\n ]\n )\n aotriton_image_path = os.path.join(lib_path, \"aotriton.images\")\n aks2_files = []\n for root, dirs, files in os.walk(aotriton_image_path):\n subpath = os.path.relpath(root, start=aotriton_image_path)\n for fn in files:\n aks2_files.append(os.path.join(\"lib/aotriton.images\", subpath, fn))\n # XXX: Why not use wildcards [\"lib/aotriton.images/*\", \"lib/aotriton.images/**/*\"] here?\n aotriton_image_path = TORCH_DIR / \"lib\" / \"aotriton.images\"\n aks2_files = [\n file.relative_to(TORCH_DIR).as_posix()\n for file in aotriton_image_path.rglob(\"*\")\n if file.is_file()\n ]", + "repo_full_name": "pytorch/pytorch", + "discussion_comments": [ + { + "comment_id": "2176926064", + "repo_full_name": "pytorch/pytorch", + "pr_number": 156742, + "pr_file": "setup.py", + "discussion_id": "2176926064", + "commented_code": "@@ -1313,12 +1333,13 @@ def main() -> None:\n \"lib/*.lib\",\n ]\n )\n- aotriton_image_path = os.path.join(lib_path, \"aotriton.images\")\n- aks2_files = []\n- for root, dirs, files in os.walk(aotriton_image_path):\n- subpath = os.path.relpath(root, start=aotriton_image_path)\n- for fn in files:\n- aks2_files.append(os.path.join(\"lib/aotriton.images\", subpath, fn))\n+ # XXX: Why not use wildcards [\"lib/aotriton.images/*\", \"lib/aotriton.images/**/*\"] here?\n+ aotriton_image_path = TORCH_DIR / \"lib\" / \"aotriton.images\"\n+ aks2_files = [\n+ file.relative_to(TORCH_DIR).as_posix()\n+ for file in aotriton_image_path.rglob(\"*\")\n+ if file.is_file()\n+ ]", + "comment_created_at": "2025-07-01T09:04:42+00:00", + "comment_author": "XuehaiPan", + "comment_body": "The previous code gets `aks2_files = []` and breaks the ROCm build on trunk. I'm not sure why.\r\n\r\nI think the following code seems equivalent.\r\n\r\n```diff\r\n aotriton_image_path = TORCH_DIR / \"lib\" / \"aotriton.images\"\r\n- aks2_files: list[str] = []\r\n- for file in filter(lambda p: p.is_file(), aotriton_image_path.glob(\"**\")):\r\n- subpath = file.relative_to(aotriton_image_path)\r\n- aks2_files.append(os.path.join(\"lib/aotriton.images\", subpath))\r\n+ aks2_files = [\r\n+ file.relative_to(TORCH_DIR).as_posix()\r\n+ for file in aotriton_image_path.rglob(\"*\")\r\n+ if file.is_file()\r\n+ ]\r\n```\r\n\r\nPS: `path.rglob(\"*\")` is equivalent to `path.glob(\"**/*\")`. `path.glob(\"**\")` will yield `path` while `path.glob(\"**/*\")` will not. But we are looking for files only, so they are the same.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2148467367", + "pr_number": 155998, + "pr_file": "setup.py", + "created_at": "2025-06-15T16:37:04+00:00", + "commented_code": "import platform\n\n\nBUILD_LIBTORCH_WHL = os.getenv(\"BUILD_LIBTORCH_WHL\", \"0\") == \"1\"\nBUILD_PYTHON_ONLY = os.getenv(\"BUILD_PYTHON_ONLY\", \"0\") == \"1\"\n\n# Also update `project.requires-python` in pyproject.toml when changing this\npython_min_version = (3, 9, 0)\npython_min_version_str = \".\".join(map(str, python_min_version))\nif sys.version_info < python_min_version:\n print(\n f\"You are using Python {platform.python_version()}. Python >={python_min_version_str} is required.\"\n f\"You are using Python {platform.python_version()}. \"\n f\"Python >={python_min_version_str} is required.\"", + "repo_full_name": "pytorch/pytorch", + "discussion_comments": [ + { + "comment_id": "2148467367", + "repo_full_name": "pytorch/pytorch", + "pr_number": 155998, + "pr_file": "setup.py", + "discussion_id": "2148467367", + "commented_code": "@@ -234,14 +234,13 @@\n import platform\n \n \n-BUILD_LIBTORCH_WHL = os.getenv(\"BUILD_LIBTORCH_WHL\", \"0\") == \"1\"\n-BUILD_PYTHON_ONLY = os.getenv(\"BUILD_PYTHON_ONLY\", \"0\") == \"1\"\n-\n+# Also update `project.requires-python` in pyproject.toml when changing this\n python_min_version = (3, 9, 0)\n python_min_version_str = \".\".join(map(str, python_min_version))\n if sys.version_info < python_min_version:\n print(\n- f\"You are using Python {platform.python_version()}. Python >={python_min_version_str} is required.\"\n+ f\"You are using Python {platform.python_version()}. \"\n+ f\"Python >={python_min_version_str} is required.\"", + "comment_created_at": "2025-06-15T16:37:04+00:00", + "comment_author": "malfet", + "comment_body": "Please don't mix unrelated formatting changes with some functional changes", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2019529371", + "pr_number": 149697, + "pr_file": "torch/_inductor/scheduler.py", + "created_at": "2025-03-28T22:54:18+00:00", + "commented_code": "return buf_byte_accesses\n\n @cache_on_self\n def estimate_flops(self) -> int | None:\n from torch._subclasses.fake_tensor import FakeTensorMode\n from torch.utils.flop_counter import FlopCounterMode\n\n op = kernel_name_to_op.get(getattr(self.node, \"python_kernel_name\", \"\"), None)\n\n if isinstance(self, ExternKernel):\n if op is not None:", + "repo_full_name": "pytorch/pytorch", + "discussion_comments": [ + { + "comment_id": "2019529371", + "repo_full_name": "pytorch/pytorch", + "pr_number": 149697, + "pr_file": "torch/_inductor/scheduler.py", + "discussion_id": "2019529371", + "commented_code": "@@ -736,6 +737,48 @@ def get_buf_bytes(\n \n return buf_byte_accesses\n \n+ @cache_on_self\n+ def estimate_flops(self) -> int | None:\n+ from torch._subclasses.fake_tensor import FakeTensorMode\n+ from torch.utils.flop_counter import FlopCounterMode\n+\n+ op = kernel_name_to_op.get(getattr(self.node, \"python_kernel_name\", \"\"), None)\n+\n+ if isinstance(self, ExternKernel):\n+ if op is not None:", + "comment_created_at": "2025-03-28T22:54:18+00:00", + "comment_author": "eellison", + "comment_body": " nit? can we reduce nesting. \r\n\r\n```\r\nif not isinstance(self, ExternKernel) or op is None:\r\n return None\r\n```\r\n", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2019531149", + "pr_number": 149697, + "pr_file": "torch/_inductor/scheduler.py", + "created_at": "2025-03-28T22:57:08+00:00", + "commented_code": "return buf_byte_accesses\n\n @cache_on_self\n def estimate_flops(self) -> int | None:\n from torch._subclasses.fake_tensor import FakeTensorMode\n from torch.utils.flop_counter import FlopCounterMode\n\n op = kernel_name_to_op.get(getattr(self.node, \"python_kernel_name\", \"\"), None)\n\n if isinstance(self, ExternKernel):\n if op is not None:\n # make mypy happy\n # mypy isn't smart enough to infer from InputsKernel that self.node.inputs\n # and self.node.fx_node exists\n kern: ExternKernel = self\n if kern.node is None:\n return None\n if any(\n len(free_unbacked_symbols(n.get_numel())) > 0\n for n in kern.node.inputs\n ):\n # Tensor has unbacked symints, we don't know how to estimate\n # runtime for that today\n return None\n\n with (\n FakeTensorMode() as fake_mode,\n FlopCounterMode(display=False) as flop_counter_mode,\n V.set_current_node(kern.node.fx_node), # type ignore[attr-defined]\n V.set_fake_mode(fake_mode),\n ):\n from .ir import ir_node_to_tensor\n\n fake_inputs = [\n ir_node_to_tensor(input, guard_shape=False)\n for input in kern.node.inputs # type: ignore[attr-defined]\n ]\n cls = kern.node.__class__\n cls.process_kernel(op, *fake_inputs, **kern.node.kwargs)\n\n ret = flop_counter_mode.get_total_flops()", + "repo_full_name": "pytorch/pytorch", + "discussion_comments": [ + { + "comment_id": "2019531149", + "repo_full_name": "pytorch/pytorch", + "pr_number": 149697, + "pr_file": "torch/_inductor/scheduler.py", + "discussion_id": "2019531149", + "commented_code": "@@ -736,6 +737,48 @@ def get_buf_bytes(\n \n return buf_byte_accesses\n \n+ @cache_on_self\n+ def estimate_flops(self) -> int | None:\n+ from torch._subclasses.fake_tensor import FakeTensorMode\n+ from torch.utils.flop_counter import FlopCounterMode\n+\n+ op = kernel_name_to_op.get(getattr(self.node, \"python_kernel_name\", \"\"), None)\n+\n+ if isinstance(self, ExternKernel):\n+ if op is not None:\n+ # make mypy happy\n+ # mypy isn't smart enough to infer from InputsKernel that self.node.inputs\n+ # and self.node.fx_node exists\n+ kern: ExternKernel = self\n+ if kern.node is None:\n+ return None\n+ if any(\n+ len(free_unbacked_symbols(n.get_numel())) > 0\n+ for n in kern.node.inputs\n+ ):\n+ # Tensor has unbacked symints, we don't know how to estimate\n+ # runtime for that today\n+ return None\n+\n+ with (\n+ FakeTensorMode() as fake_mode,\n+ FlopCounterMode(display=False) as flop_counter_mode,\n+ V.set_current_node(kern.node.fx_node), # type ignore[attr-defined]\n+ V.set_fake_mode(fake_mode),\n+ ):\n+ from .ir import ir_node_to_tensor\n+\n+ fake_inputs = [\n+ ir_node_to_tensor(input, guard_shape=False)\n+ for input in kern.node.inputs # type: ignore[attr-defined]\n+ ]\n+ cls = kern.node.__class__\n+ cls.process_kernel(op, *fake_inputs, **kern.node.kwargs)\n+\n+ ret = flop_counter_mode.get_total_flops()", + "comment_created_at": "2025-03-28T22:57:08+00:00", + "comment_author": "eellison", + "comment_body": "nit: no need for `ret` var..", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "2173356037", + "pr_number": 157178, + "pr_file": "torch/distributed/tensor/_sharding_prop.py", + "created_at": "2025-06-28T15:31:34+00:00", + "commented_code": "f\"Operator {op_schema.op} does not have a sharding strategy registered.\"\n )\n\n def _select_strategy(self, strategy: OpStrategy) -> OpSpec:\n def _select_strategy(self, op_name, strategy: OpStrategy) -> OpSpec:\n print(f\"{op_name}, {strategy.strategies}\")", + "repo_full_name": "pytorch/pytorch", + "discussion_comments": [ + { + "comment_id": "2173356037", + "repo_full_name": "pytorch/pytorch", + "pr_number": 157178, + "pr_file": "torch/distributed/tensor/_sharding_prop.py", + "discussion_id": "2173356037", + "commented_code": "@@ -487,15 +487,23 @@ def propagate_op_sharding_non_cached(self, op_schema: OpSchema) -> OutputShardin\n f\"Operator {op_schema.op} does not have a sharding strategy registered.\"\n )\n \n- def _select_strategy(self, strategy: OpStrategy) -> OpSpec:\n+ def _select_strategy(self, op_name, strategy: OpStrategy) -> OpSpec:\n+ print(f\"{op_name}, {strategy.strategies}\")", + "comment_created_at": "2025-06-28T15:31:34+00:00", + "comment_author": "Skylion007", + "comment_body": "Print statement left in", + "pr_file_module": null + }, + { + "comment_id": "2173563701", + "repo_full_name": "pytorch/pytorch", + "pr_number": 157178, + "pr_file": "torch/distributed/tensor/_sharding_prop.py", + "discussion_id": "2173356037", + "commented_code": "@@ -487,15 +487,23 @@ def propagate_op_sharding_non_cached(self, op_schema: OpSchema) -> OutputShardin\n f\"Operator {op_schema.op} does not have a sharding strategy registered.\"\n )\n \n- def _select_strategy(self, strategy: OpStrategy) -> OpSpec:\n+ def _select_strategy(self, op_name, strategy: OpStrategy) -> OpSpec:\n+ print(f\"{op_name}, {strategy.strategies}\")", + "comment_created_at": "2025-06-29T00:53:52+00:00", + "comment_author": "zpcore", + "comment_body": "Oops, removed!", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1805708823", + "pr_number": 137846, + "pr_file": "torch/utils/collect_env.py", + "created_at": "2024-10-18T01:47:18+00:00", + "commented_code": "return smi\n\n\ndef get_pkg_version(run_lambda, pkg):\n ret = \"\"\n index = -1\n if get_platform() == \"linux\":\n mgr_name = \"\"\n if mgr_name == \"\":\n rc, _, _ = run(\"which dpkg\")\n if rc == 0:\n mgr_name = \"dpkg\"\n if mgr_name == \"\":\n rc, _, _ = run(\"which dnf\")\n if rc == 0:\n mgr_name = \"dnf\"\n if mgr_name == \"\":\n rc, _, _ = run(\"which yum\")\n if rc == 0:\n mgr_name = \"yum\"\n if mgr_name == \"\":\n rc, _, _ = run(\"which zypper\")\n if rc == 0:\n mgr_name = \"zypper\"", + "repo_full_name": "pytorch/pytorch", + "discussion_comments": [ + { + "comment_id": "1805708823", + "repo_full_name": "pytorch/pytorch", + "pr_number": 137846, + "pr_file": "torch/utils/collect_env.py", + "discussion_id": "1805708823", + "commented_code": "@@ -225,6 +230,139 @@ def get_nvidia_smi():\n return smi\n \n \n+def get_pkg_version(run_lambda, pkg):\n+ ret = \"\"\n+ index = -1\n+ if get_platform() == \"linux\":\n+ mgr_name = \"\"\n+ if mgr_name == \"\":\n+ rc, _, _ = run(\"which dpkg\")\n+ if rc == 0:\n+ mgr_name = \"dpkg\"\n+ if mgr_name == \"\":\n+ rc, _, _ = run(\"which dnf\")\n+ if rc == 0:\n+ mgr_name = \"dnf\"\n+ if mgr_name == \"\":\n+ rc, _, _ = run(\"which yum\")\n+ if rc == 0:\n+ mgr_name = \"yum\"\n+ if mgr_name == \"\":\n+ rc, _, _ = run(\"which zypper\")\n+ if rc == 0:\n+ mgr_name = \"zypper\"", + "comment_created_at": "2024-10-18T01:47:18+00:00", + "comment_author": "malfet", + "comment_body": "Please Avoid code duplication, use loops\r\n```suggestion\r\n for mgr_name in [\"dpkg\", \"dnf\", \"yum\", \"zypper\", \"\"]:\r\n if mgr_name == \"\":\r\n continue\r\n rc, _, _ = run(f\"which {mgr_name}\")\r\n if rc == 0:\r\n break\r\n```", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/pytorch-write-clean-maintainable-code.md b/_reviewers/pytorch-write-clean-maintainable-code.md index ceb8f27..c5d2d19 100644 --- a/_reviewers/pytorch-write-clean-maintainable-code.md +++ b/_reviewers/pytorch-write-clean-maintainable-code.md @@ -92,151 +92,3 @@ for mgr_name in ["dpkg", "dnf", "yum", "zypper", ""]: ``` These practices significantly improve code readability, reduce bugs, and make maintenance easier for everyone working with the codebase. - - -[ - { - "discussion_id": "2176926064", - "pr_number": 156742, - "pr_file": "setup.py", - "created_at": "2025-07-01T09:04:42+00:00", - "commented_code": "\"lib/*.lib\",\n ]\n )\n aotriton_image_path = os.path.join(lib_path, \"aotriton.images\")\n aks2_files = []\n for root, dirs, files in os.walk(aotriton_image_path):\n subpath = os.path.relpath(root, start=aotriton_image_path)\n for fn in files:\n aks2_files.append(os.path.join(\"lib/aotriton.images\", subpath, fn))\n # XXX: Why not use wildcards [\"lib/aotriton.images/*\", \"lib/aotriton.images/**/*\"] here?\n aotriton_image_path = TORCH_DIR / \"lib\" / \"aotriton.images\"\n aks2_files = [\n file.relative_to(TORCH_DIR).as_posix()\n for file in aotriton_image_path.rglob(\"*\")\n if file.is_file()\n ]", - "repo_full_name": "pytorch/pytorch", - "discussion_comments": [ - { - "comment_id": "2176926064", - "repo_full_name": "pytorch/pytorch", - "pr_number": 156742, - "pr_file": "setup.py", - "discussion_id": "2176926064", - "commented_code": "@@ -1313,12 +1333,13 @@ def main() -> None:\n \"lib/*.lib\",\n ]\n )\n- aotriton_image_path = os.path.join(lib_path, \"aotriton.images\")\n- aks2_files = []\n- for root, dirs, files in os.walk(aotriton_image_path):\n- subpath = os.path.relpath(root, start=aotriton_image_path)\n- for fn in files:\n- aks2_files.append(os.path.join(\"lib/aotriton.images\", subpath, fn))\n+ # XXX: Why not use wildcards [\"lib/aotriton.images/*\", \"lib/aotriton.images/**/*\"] here?\n+ aotriton_image_path = TORCH_DIR / \"lib\" / \"aotriton.images\"\n+ aks2_files = [\n+ file.relative_to(TORCH_DIR).as_posix()\n+ for file in aotriton_image_path.rglob(\"*\")\n+ if file.is_file()\n+ ]", - "comment_created_at": "2025-07-01T09:04:42+00:00", - "comment_author": "XuehaiPan", - "comment_body": "The previous code gets `aks2_files = []` and breaks the ROCm build on trunk. I'm not sure why.\r\n\r\nI think the following code seems equivalent.\r\n\r\n```diff\r\n aotriton_image_path = TORCH_DIR / \"lib\" / \"aotriton.images\"\r\n- aks2_files: list[str] = []\r\n- for file in filter(lambda p: p.is_file(), aotriton_image_path.glob(\"**\")):\r\n- subpath = file.relative_to(aotriton_image_path)\r\n- aks2_files.append(os.path.join(\"lib/aotriton.images\", subpath))\r\n+ aks2_files = [\r\n+ file.relative_to(TORCH_DIR).as_posix()\r\n+ for file in aotriton_image_path.rglob(\"*\")\r\n+ if file.is_file()\r\n+ ]\r\n```\r\n\r\nPS: `path.rglob(\"*\")` is equivalent to `path.glob(\"**/*\")`. `path.glob(\"**\")` will yield `path` while `path.glob(\"**/*\")` will not. But we are looking for files only, so they are the same.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2148467367", - "pr_number": 155998, - "pr_file": "setup.py", - "created_at": "2025-06-15T16:37:04+00:00", - "commented_code": "import platform\n\n\nBUILD_LIBTORCH_WHL = os.getenv(\"BUILD_LIBTORCH_WHL\", \"0\") == \"1\"\nBUILD_PYTHON_ONLY = os.getenv(\"BUILD_PYTHON_ONLY\", \"0\") == \"1\"\n\n# Also update `project.requires-python` in pyproject.toml when changing this\npython_min_version = (3, 9, 0)\npython_min_version_str = \".\".join(map(str, python_min_version))\nif sys.version_info < python_min_version:\n print(\n f\"You are using Python {platform.python_version()}. Python >={python_min_version_str} is required.\"\n f\"You are using Python {platform.python_version()}. \"\n f\"Python >={python_min_version_str} is required.\"", - "repo_full_name": "pytorch/pytorch", - "discussion_comments": [ - { - "comment_id": "2148467367", - "repo_full_name": "pytorch/pytorch", - "pr_number": 155998, - "pr_file": "setup.py", - "discussion_id": "2148467367", - "commented_code": "@@ -234,14 +234,13 @@\n import platform\n \n \n-BUILD_LIBTORCH_WHL = os.getenv(\"BUILD_LIBTORCH_WHL\", \"0\") == \"1\"\n-BUILD_PYTHON_ONLY = os.getenv(\"BUILD_PYTHON_ONLY\", \"0\") == \"1\"\n-\n+# Also update `project.requires-python` in pyproject.toml when changing this\n python_min_version = (3, 9, 0)\n python_min_version_str = \".\".join(map(str, python_min_version))\n if sys.version_info < python_min_version:\n print(\n- f\"You are using Python {platform.python_version()}. Python >={python_min_version_str} is required.\"\n+ f\"You are using Python {platform.python_version()}. \"\n+ f\"Python >={python_min_version_str} is required.\"", - "comment_created_at": "2025-06-15T16:37:04+00:00", - "comment_author": "malfet", - "comment_body": "Please don't mix unrelated formatting changes with some functional changes", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2019529371", - "pr_number": 149697, - "pr_file": "torch/_inductor/scheduler.py", - "created_at": "2025-03-28T22:54:18+00:00", - "commented_code": "return buf_byte_accesses\n\n @cache_on_self\n def estimate_flops(self) -> int | None:\n from torch._subclasses.fake_tensor import FakeTensorMode\n from torch.utils.flop_counter import FlopCounterMode\n\n op = kernel_name_to_op.get(getattr(self.node, \"python_kernel_name\", \"\"), None)\n\n if isinstance(self, ExternKernel):\n if op is not None:", - "repo_full_name": "pytorch/pytorch", - "discussion_comments": [ - { - "comment_id": "2019529371", - "repo_full_name": "pytorch/pytorch", - "pr_number": 149697, - "pr_file": "torch/_inductor/scheduler.py", - "discussion_id": "2019529371", - "commented_code": "@@ -736,6 +737,48 @@ def get_buf_bytes(\n \n return buf_byte_accesses\n \n+ @cache_on_self\n+ def estimate_flops(self) -> int | None:\n+ from torch._subclasses.fake_tensor import FakeTensorMode\n+ from torch.utils.flop_counter import FlopCounterMode\n+\n+ op = kernel_name_to_op.get(getattr(self.node, \"python_kernel_name\", \"\"), None)\n+\n+ if isinstance(self, ExternKernel):\n+ if op is not None:", - "comment_created_at": "2025-03-28T22:54:18+00:00", - "comment_author": "eellison", - "comment_body": " nit? can we reduce nesting. \r\n\r\n```\r\nif not isinstance(self, ExternKernel) or op is None:\r\n return None\r\n```\r\n", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2019531149", - "pr_number": 149697, - "pr_file": "torch/_inductor/scheduler.py", - "created_at": "2025-03-28T22:57:08+00:00", - "commented_code": "return buf_byte_accesses\n\n @cache_on_self\n def estimate_flops(self) -> int | None:\n from torch._subclasses.fake_tensor import FakeTensorMode\n from torch.utils.flop_counter import FlopCounterMode\n\n op = kernel_name_to_op.get(getattr(self.node, \"python_kernel_name\", \"\"), None)\n\n if isinstance(self, ExternKernel):\n if op is not None:\n # make mypy happy\n # mypy isn't smart enough to infer from InputsKernel that self.node.inputs\n # and self.node.fx_node exists\n kern: ExternKernel = self\n if kern.node is None:\n return None\n if any(\n len(free_unbacked_symbols(n.get_numel())) > 0\n for n in kern.node.inputs\n ):\n # Tensor has unbacked symints, we don't know how to estimate\n # runtime for that today\n return None\n\n with (\n FakeTensorMode() as fake_mode,\n FlopCounterMode(display=False) as flop_counter_mode,\n V.set_current_node(kern.node.fx_node), # type ignore[attr-defined]\n V.set_fake_mode(fake_mode),\n ):\n from .ir import ir_node_to_tensor\n\n fake_inputs = [\n ir_node_to_tensor(input, guard_shape=False)\n for input in kern.node.inputs # type: ignore[attr-defined]\n ]\n cls = kern.node.__class__\n cls.process_kernel(op, *fake_inputs, **kern.node.kwargs)\n\n ret = flop_counter_mode.get_total_flops()", - "repo_full_name": "pytorch/pytorch", - "discussion_comments": [ - { - "comment_id": "2019531149", - "repo_full_name": "pytorch/pytorch", - "pr_number": 149697, - "pr_file": "torch/_inductor/scheduler.py", - "discussion_id": "2019531149", - "commented_code": "@@ -736,6 +737,48 @@ def get_buf_bytes(\n \n return buf_byte_accesses\n \n+ @cache_on_self\n+ def estimate_flops(self) -> int | None:\n+ from torch._subclasses.fake_tensor import FakeTensorMode\n+ from torch.utils.flop_counter import FlopCounterMode\n+\n+ op = kernel_name_to_op.get(getattr(self.node, \"python_kernel_name\", \"\"), None)\n+\n+ if isinstance(self, ExternKernel):\n+ if op is not None:\n+ # make mypy happy\n+ # mypy isn't smart enough to infer from InputsKernel that self.node.inputs\n+ # and self.node.fx_node exists\n+ kern: ExternKernel = self\n+ if kern.node is None:\n+ return None\n+ if any(\n+ len(free_unbacked_symbols(n.get_numel())) > 0\n+ for n in kern.node.inputs\n+ ):\n+ # Tensor has unbacked symints, we don't know how to estimate\n+ # runtime for that today\n+ return None\n+\n+ with (\n+ FakeTensorMode() as fake_mode,\n+ FlopCounterMode(display=False) as flop_counter_mode,\n+ V.set_current_node(kern.node.fx_node), # type ignore[attr-defined]\n+ V.set_fake_mode(fake_mode),\n+ ):\n+ from .ir import ir_node_to_tensor\n+\n+ fake_inputs = [\n+ ir_node_to_tensor(input, guard_shape=False)\n+ for input in kern.node.inputs # type: ignore[attr-defined]\n+ ]\n+ cls = kern.node.__class__\n+ cls.process_kernel(op, *fake_inputs, **kern.node.kwargs)\n+\n+ ret = flop_counter_mode.get_total_flops()", - "comment_created_at": "2025-03-28T22:57:08+00:00", - "comment_author": "eellison", - "comment_body": "nit: no need for `ret` var..", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "2173356037", - "pr_number": 157178, - "pr_file": "torch/distributed/tensor/_sharding_prop.py", - "created_at": "2025-06-28T15:31:34+00:00", - "commented_code": "f\"Operator {op_schema.op} does not have a sharding strategy registered.\"\n )\n\n def _select_strategy(self, strategy: OpStrategy) -> OpSpec:\n def _select_strategy(self, op_name, strategy: OpStrategy) -> OpSpec:\n print(f\"{op_name}, {strategy.strategies}\")", - "repo_full_name": "pytorch/pytorch", - "discussion_comments": [ - { - "comment_id": "2173356037", - "repo_full_name": "pytorch/pytorch", - "pr_number": 157178, - "pr_file": "torch/distributed/tensor/_sharding_prop.py", - "discussion_id": "2173356037", - "commented_code": "@@ -487,15 +487,23 @@ def propagate_op_sharding_non_cached(self, op_schema: OpSchema) -> OutputShardin\n f\"Operator {op_schema.op} does not have a sharding strategy registered.\"\n )\n \n- def _select_strategy(self, strategy: OpStrategy) -> OpSpec:\n+ def _select_strategy(self, op_name, strategy: OpStrategy) -> OpSpec:\n+ print(f\"{op_name}, {strategy.strategies}\")", - "comment_created_at": "2025-06-28T15:31:34+00:00", - "comment_author": "Skylion007", - "comment_body": "Print statement left in", - "pr_file_module": null - }, - { - "comment_id": "2173563701", - "repo_full_name": "pytorch/pytorch", - "pr_number": 157178, - "pr_file": "torch/distributed/tensor/_sharding_prop.py", - "discussion_id": "2173356037", - "commented_code": "@@ -487,15 +487,23 @@ def propagate_op_sharding_non_cached(self, op_schema: OpSchema) -> OutputShardin\n f\"Operator {op_schema.op} does not have a sharding strategy registered.\"\n )\n \n- def _select_strategy(self, strategy: OpStrategy) -> OpSpec:\n+ def _select_strategy(self, op_name, strategy: OpStrategy) -> OpSpec:\n+ print(f\"{op_name}, {strategy.strategies}\")", - "comment_created_at": "2025-06-29T00:53:52+00:00", - "comment_author": "zpcore", - "comment_body": "Oops, removed!", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1805708823", - "pr_number": 137846, - "pr_file": "torch/utils/collect_env.py", - "created_at": "2024-10-18T01:47:18+00:00", - "commented_code": "return smi\n\n\ndef get_pkg_version(run_lambda, pkg):\n ret = \"\"\n index = -1\n if get_platform() == \"linux\":\n mgr_name = \"\"\n if mgr_name == \"\":\n rc, _, _ = run(\"which dpkg\")\n if rc == 0:\n mgr_name = \"dpkg\"\n if mgr_name == \"\":\n rc, _, _ = run(\"which dnf\")\n if rc == 0:\n mgr_name = \"dnf\"\n if mgr_name == \"\":\n rc, _, _ = run(\"which yum\")\n if rc == 0:\n mgr_name = \"yum\"\n if mgr_name == \"\":\n rc, _, _ = run(\"which zypper\")\n if rc == 0:\n mgr_name = \"zypper\"", - "repo_full_name": "pytorch/pytorch", - "discussion_comments": [ - { - "comment_id": "1805708823", - "repo_full_name": "pytorch/pytorch", - "pr_number": 137846, - "pr_file": "torch/utils/collect_env.py", - "discussion_id": "1805708823", - "commented_code": "@@ -225,6 +230,139 @@ def get_nvidia_smi():\n return smi\n \n \n+def get_pkg_version(run_lambda, pkg):\n+ ret = \"\"\n+ index = -1\n+ if get_platform() == \"linux\":\n+ mgr_name = \"\"\n+ if mgr_name == \"\":\n+ rc, _, _ = run(\"which dpkg\")\n+ if rc == 0:\n+ mgr_name = \"dpkg\"\n+ if mgr_name == \"\":\n+ rc, _, _ = run(\"which dnf\")\n+ if rc == 0:\n+ mgr_name = \"dnf\"\n+ if mgr_name == \"\":\n+ rc, _, _ = run(\"which yum\")\n+ if rc == 0:\n+ mgr_name = \"yum\"\n+ if mgr_name == \"\":\n+ rc, _, _ = run(\"which zypper\")\n+ if rc == 0:\n+ mgr_name = \"zypper\"", - "comment_created_at": "2024-10-18T01:47:18+00:00", - "comment_author": "malfet", - "comment_body": "Please Avoid code duplication, use loops\r\n```suggestion\r\n for mgr_name in [\"dpkg\", \"dnf\", \"yum\", \"zypper\", \"\"]:\r\n if mgr_name == \"\":\r\n continue\r\n rc, _, _ = run(f\"which {mgr_name}\")\r\n if rc == 0:\r\n break\r\n```", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/rails-configure-at-proper-scope.json b/_reviewers/rails-configure-at-proper-scope.json new file mode 100644 index 0000000..a5f96b6 --- /dev/null +++ b/_reviewers/rails-configure-at-proper-scope.json @@ -0,0 +1,252 @@ +[ + { + "discussion_id": "1904669769", + "pr_number": 54129, + "pr_file": "actionview/lib/action_view/cache_expiry.rb", + "created_at": "2025-01-06T21:45:56+00:00", + "commented_code": "if @watched_dirs != dirs_to_watch\n @watched_dirs = dirs_to_watch\n new_watcher = @watcher_class.new([], @watched_dirs) do\n new_watcher = @watcher_class.new([], @watched_dirs, filter_path: Rails.root.to_s) do", + "repo_full_name": "rails/rails", + "discussion_comments": [ + { + "comment_id": "1904669769", + "repo_full_name": "rails/rails", + "pr_number": 54129, + "pr_file": "actionview/lib/action_view/cache_expiry.rb", + "discussion_id": "1904669769", + "commented_code": "@@ -40,7 +40,7 @@ def build_watcher\n \n if @watched_dirs != dirs_to_watch\n @watched_dirs = dirs_to_watch\n- new_watcher = @watcher_class.new([], @watched_dirs) do\n+ new_watcher = @watcher_class.new([], @watched_dirs, filter_path: Rails.root.to_s) do", + "comment_created_at": "2025-01-06T21:45:56+00:00", + "comment_author": "rafaelfranca", + "comment_body": "This is coupling Action View with Railties in the wrong place.\r\n\r\nWe should pass a configuration to ActionView in its Railtie and use it here.", + "pr_file_module": null + }, + { + "comment_id": "1905515638", + "repo_full_name": "rails/rails", + "pr_number": 54129, + "pr_file": "actionview/lib/action_view/cache_expiry.rb", + "discussion_id": "1904669769", + "commented_code": "@@ -40,7 +40,7 @@ def build_watcher\n \n if @watched_dirs != dirs_to_watch\n @watched_dirs = dirs_to_watch\n- new_watcher = @watcher_class.new([], @watched_dirs) do\n+ new_watcher = @watcher_class.new([], @watched_dirs, filter_path: Rails.root.to_s) do", + "comment_created_at": "2025-01-07T14:12:50+00:00", + "comment_author": "ermolaev", + "comment_body": "agree, rewrote the code, also added `filter_path` to `EventedFileUpdateChecker`, and added tests", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1374948225", + "pr_number": 49765, + "pr_file": "activerecord/lib/active_record/railtie.rb", + "created_at": "2023-10-27T18:59:39+00:00", + "commented_code": "end\n end\n end\n\n initializer \"active_record.attributes_for_inspect\" do |app|\n ActiveSupport.on_load(:active_record) do\n if app.config.consider_all_requests_local\n ActiveRecord::Base.attributes_for_inspect = :all", + "repo_full_name": "rails/rails", + "discussion_comments": [ + { + "comment_id": "1374948225", + "repo_full_name": "rails/rails", + "pr_number": 49765, + "pr_file": "activerecord/lib/active_record/railtie.rb", + "discussion_id": "1374948225", + "commented_code": "@@ -458,5 +458,13 @@ class Railtie < Rails::Railtie # :nodoc:\n end\n end\n end\n+\n+ initializer \"active_record.attributes_for_inspect\" do |app|\n+ ActiveSupport.on_load(:active_record) do\n+ if app.config.consider_all_requests_local\n+ ActiveRecord::Base.attributes_for_inspect = :all", + "comment_created_at": "2023-10-27T18:59:39+00:00", + "comment_author": "rafaelfranca", + "comment_body": "This will override any config the user set, no?", + "pr_file_module": null + }, + { + "comment_id": "1387061473", + "repo_full_name": "rails/rails", + "pr_number": 49765, + "pr_file": "activerecord/lib/active_record/railtie.rb", + "discussion_id": "1374948225", + "commented_code": "@@ -458,5 +458,13 @@ class Railtie < Rails::Railtie # :nodoc:\n end\n end\n end\n+\n+ initializer \"active_record.attributes_for_inspect\" do |app|\n+ ActiveSupport.on_load(:active_record) do\n+ if app.config.consider_all_requests_local\n+ ActiveRecord::Base.attributes_for_inspect = :all", + "comment_created_at": "2023-11-08T18:48:00+00:00", + "comment_author": "andrewn617", + "comment_body": "I don't think so, since it is run in the `:active_record` load hook, it will be set before `Base` is inherited by anything. I tested in my app and it worked as expected. But I will add a test like you suggested in your other comment.", + "pr_file_module": null + }, + { + "comment_id": "1387073788", + "repo_full_name": "rails/rails", + "pr_number": 49765, + "pr_file": "activerecord/lib/active_record/railtie.rb", + "discussion_id": "1374948225", + "commented_code": "@@ -458,5 +458,13 @@ class Railtie < Rails::Railtie # :nodoc:\n end\n end\n end\n+\n+ initializer \"active_record.attributes_for_inspect\" do |app|\n+ ActiveSupport.on_load(:active_record) do\n+ if app.config.consider_all_requests_local\n+ ActiveRecord::Base.attributes_for_inspect = :all", + "comment_created_at": "2023-11-08T18:59:43+00:00", + "comment_author": "rafaelfranca", + "comment_body": "What I mean is setting `config.active_record.attributes_for_inspect = [:uuid]`", + "pr_file_module": null + }, + { + "comment_id": "1387092660", + "repo_full_name": "rails/rails", + "pr_number": 49765, + "pr_file": "activerecord/lib/active_record/railtie.rb", + "discussion_id": "1374948225", + "commented_code": "@@ -458,5 +458,13 @@ class Railtie < Rails::Railtie # :nodoc:\n end\n end\n end\n+\n+ initializer \"active_record.attributes_for_inspect\" do |app|\n+ ActiveSupport.on_load(:active_record) do\n+ if app.config.consider_all_requests_local\n+ ActiveRecord::Base.attributes_for_inspect = :all", + "comment_created_at": "2023-11-08T19:16:04+00:00", + "comment_author": "andrewn617", + "comment_body": "Ah sorry, wasn't thinking of that angle. Fixed it and added a test. Thanks!", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1761869122", + "pr_number": 52297, + "pr_file": "actionpack/lib/action_dispatch/routing/mapper.rb", + "created_at": "2024-09-16T20:21:05+00:00", + "commented_code": "end\n else\n def route_source_location\n if Mapper.route_source_locations\n if Mapper.route_source_locations || ActionDispatch.verbose_redirect_logs", + "repo_full_name": "rails/rails", + "discussion_comments": [ + { + "comment_id": "1761869122", + "repo_full_name": "rails/rails", + "pr_number": 52297, + "pr_file": "actionpack/lib/action_dispatch/routing/mapper.rb", + "discussion_id": "1761869122", + "commented_code": "@@ -392,7 +392,7 @@ def route_source_location\n end\n else\n def route_source_location\n- if Mapper.route_source_locations\n+ if Mapper.route_source_locations || ActionDispatch.verbose_redirect_logs", + "comment_created_at": "2024-09-16T20:21:05+00:00", + "comment_author": "p8", + "comment_body": "If we are going to return \"route source locations\", we might as well change how `Mapper.route_source_locations` is set and avoid `Mapper` having to know about `verbose_redirect_logs`.\r\n\r\n```suggestion\r\n if Mapper.route_source_locations\r\n```\r\n\r\nWe could set it here instead:\r\nhttps://github.com/rails/rails/blob/d0f830033b2ea27bc1e66683ce5b16bd9ca55df1/actionpack/lib/action_dispatch/railtie.rb#L71\r\n\r\n```ruby\r\n ActionDispatch::Routing::Mapper.route_source_locations = Rails.env.development? || ActionDispatch.verbose_redirect_logs\r\n```\r\n\r\nOr could we just leave it out?\r\nNot sure what the result would be.\r\n", + "pr_file_module": null + }, + { + "comment_id": "1781095922", + "repo_full_name": "rails/rails", + "pr_number": 52297, + "pr_file": "actionpack/lib/action_dispatch/routing/mapper.rb", + "discussion_id": "1761869122", + "commented_code": "@@ -392,7 +392,7 @@ def route_source_location\n end\n else\n def route_source_location\n- if Mapper.route_source_locations\n+ if Mapper.route_source_locations || ActionDispatch.verbose_redirect_logs", + "comment_created_at": "2024-09-30T13:05:27+00:00", + "comment_author": "dennispaagman", + "comment_body": "Yeah that's a better place! \r\n\r\nI think leaving it out will make it that if you enable `verbose_redirect_logs` in a different environment than development the source locations won't be there for redirects from routes.\r\n\r\nI'm not sure if anyone wants to, or should, run this setting in different environments though, but it's possible.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "508997606", + "pr_number": 39398, + "pr_file": "actionpack/lib/action_dispatch/http/content_security_policy.rb", + "created_at": "2020-10-21T05:25:18+00:00", + "commented_code": "POLICY = \"Content-Security-Policy\"\n POLICY_REPORT_ONLY = \"Content-Security-Policy-Report-Only\"\n\n def initialize(app)\n def initialize(app, warn_on_no_content_security_policy = false)\n @app = app\n @warn_on_no_content_security_policy = warn_on_no_content_security_policy\n end\n\n def call(env)\n request = ActionDispatch::Request.new env\n _, headers, _ = response = @app.call(env)\n\n return response unless html_response?(headers)\n return response if policy_present?(headers)\n\n if policy = request.content_security_policy\n nonce = request.content_security_policy_nonce\n nonce_directives = request.content_security_policy_nonce_directives\n context = request.controller_instance || request\n headers[header_name(request)] = policy.build(context, nonce, nonce_directives)\n headers[header_name(request)] = policy.build(context, nonce, nonce_directives, headers[CONTENT_TYPE])\n else\n if (Rails.env.development? || Rails.env.test?) && logger(request) && @warn_on_no_content_security_policy", + "repo_full_name": "rails/rails", + "discussion_comments": [ + { + "comment_id": "508997606", + "repo_full_name": "rails/rails", + "pr_number": 39398, + "pr_file": "actionpack/lib/action_dispatch/http/content_security_policy.rb", + "discussion_id": "508997606", + "commented_code": "@@ -9,34 +9,32 @@ class Middleware\n POLICY = \"Content-Security-Policy\"\n POLICY_REPORT_ONLY = \"Content-Security-Policy-Report-Only\"\n \n- def initialize(app)\n+ def initialize(app, warn_on_no_content_security_policy = false)\n @app = app\n+ @warn_on_no_content_security_policy = warn_on_no_content_security_policy\n end\n \n def call(env)\n request = ActionDispatch::Request.new env\n _, headers, _ = response = @app.call(env)\n \n- return response unless html_response?(headers)\n return response if policy_present?(headers)\n \n if policy = request.content_security_policy\n nonce = request.content_security_policy_nonce\n nonce_directives = request.content_security_policy_nonce_directives\n context = request.controller_instance || request\n- headers[header_name(request)] = policy.build(context, nonce, nonce_directives)\n+ headers[header_name(request)] = policy.build(context, nonce, nonce_directives, headers[CONTENT_TYPE])\n+ else\n+ if (Rails.env.development? || Rails.env.test?) && logger(request) && @warn_on_no_content_security_policy", + "comment_created_at": "2020-10-21T05:25:18+00:00", + "comment_author": "rafaelfranca", + "comment_body": "Checks for environment can't enter the action pack code. If we want different behavior per environment this should be a config on this class.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1030612902", + "pr_number": 46555, + "pr_file": "activejob/lib/active_job/log_subscriber.rb", + "created_at": "2022-11-23T15:54:29+00:00", + "commented_code": "job = event.payload[:job]\n ex = event.payload[:exception_object]\n if ex\n cleaned_backtrace = Rails.backtrace_cleaner.clean(ex.backtrace)", + "repo_full_name": "rails/rails", + "discussion_comments": [ + { + "comment_id": "1030612902", + "repo_full_name": "rails/rails", + "pr_number": 46555, + "pr_file": "activejob/lib/active_job/log_subscriber.rb", + "discussion_id": "1030612902", + "commented_code": "@@ -57,8 +57,9 @@ def perform(event)\n job = event.payload[:job]\n ex = event.payload[:exception_object]\n if ex\n+ cleaned_backtrace = Rails.backtrace_cleaner.clean(ex.backtrace)", + "comment_created_at": "2022-11-23T15:54:29+00:00", + "comment_author": "rafaelfranca", + "comment_body": "No Rails framework should have access to the `Rails` constant inside it since they can be used outside of a Rails application. We should be doing the same as `ActiveRecord::LogSubscriber`:\r\n\r\nhttps://github.com/rails/rails/blob/40563a3ab457f5b1d8a5defaecb7375c55bc3fd5/activerecord/lib/active_record/railtie.rb#L97-L99\r\nhttps://github.com/rails/rails/blob/40563a3ab457f5b1d8a5defaecb7375c55bc3fd5/activerecord/lib/active_record/log_subscriber.rb#L7", + "pr_file_module": null + }, + { + "comment_id": "1030725259", + "repo_full_name": "rails/rails", + "pr_number": 46555, + "pr_file": "activejob/lib/active_job/log_subscriber.rb", + "discussion_id": "1030612902", + "commented_code": "@@ -57,8 +57,9 @@ def perform(event)\n job = event.payload[:job]\n ex = event.payload[:exception_object]\n if ex\n+ cleaned_backtrace = Rails.backtrace_cleaner.clean(ex.backtrace)", + "comment_created_at": "2022-11-23T17:36:19+00:00", + "comment_author": "warrenbhw", + "comment_body": "Hey @rafaelfranca, thanks for taking a look.\r\nI'll address this and fix the tests soon\r\nCheers!", + "pr_file_module": null + }, + { + "comment_id": "1034174603", + "repo_full_name": "rails/rails", + "pr_number": 46555, + "pr_file": "activejob/lib/active_job/log_subscriber.rb", + "discussion_id": "1030612902", + "commented_code": "@@ -57,8 +57,9 @@ def perform(event)\n job = event.payload[:job]\n ex = event.payload[:exception_object]\n if ex\n+ cleaned_backtrace = Rails.backtrace_cleaner.clean(ex.backtrace)", + "comment_created_at": "2022-11-29T00:10:17+00:00", + "comment_author": "warrenbhw", + "comment_body": "@rafaelfranca the changes that you suggested are made. CI seems to be failing on a flaky test. Is there a way for me to queue another build another than pushing some whitespace changes?", + "pr_file_module": null + }, + { + "comment_id": "1034245869", + "repo_full_name": "rails/rails", + "pr_number": 46555, + "pr_file": "activejob/lib/active_job/log_subscriber.rb", + "discussion_id": "1030612902", + "commented_code": "@@ -57,8 +57,9 @@ def perform(event)\n job = event.payload[:job]\n ex = event.payload[:exception_object]\n if ex\n+ cleaned_backtrace = Rails.backtrace_cleaner.clean(ex.backtrace)", + "comment_created_at": "2022-11-29T02:41:52+00:00", + "comment_author": "warrenbhw", + "comment_body": "@rafaelfranca CI tests are passing. Lmk if there's anything else that I need to address here to get this merged!", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1706409109", + "pr_number": 52353, + "pr_file": "actionpack/lib/action_dispatch/routing/route_set.rb", + "created_at": "2024-08-07T05:39:52+00:00", + "commented_code": "end\n\n def self.new_with_config(config)\n route_set_config = DEFAULT_CONFIG\n route_set_config = DEFAULT_CONFIG.dup", + "repo_full_name": "rails/rails", + "discussion_comments": [ + { + "comment_id": "1706409109", + "repo_full_name": "rails/rails", + "pr_number": 52353, + "pr_file": "actionpack/lib/action_dispatch/routing/route_set.rb", + "discussion_id": "1706409109", + "commented_code": "@@ -363,7 +363,7 @@ def self.default_resources_path_names\n end\n \n def self.new_with_config(config)\n- route_set_config = DEFAULT_CONFIG\n+ route_set_config = DEFAULT_CONFIG.dup", + "comment_created_at": "2024-08-07T05:39:52+00:00", + "comment_author": "gmcgibbon", + "comment_body": "This has to be a bug. We mutate a constant in the next few lines which changes the config of future route sets. Duping the constant prevents this.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1478393815", + "pr_number": 50969, + "pr_file": "activerecord/lib/active_record/query_logs.rb", + "created_at": "2024-02-05T15:02:15+00:00", + "commented_code": "#\n # config.active_record.cache_query_log_tags = true\n module QueryLogs\n mattr_accessor :taggings, instance_accessor: false, default: {}\n mattr_accessor :taggings, instance_accessor: false, default: {\n line: -> { query_source_location }", + "repo_full_name": "rails/rails", + "discussion_comments": [ + { + "comment_id": "1478393815", + "repo_full_name": "rails/rails", + "pr_number": 50969, + "pr_file": "activerecord/lib/active_record/query_logs.rb", + "discussion_id": "1478393815", + "commented_code": "@@ -71,7 +72,10 @@ module ActiveRecord\n #\n # config.active_record.cache_query_log_tags = true\n module QueryLogs\n- mattr_accessor :taggings, instance_accessor: false, default: {}\n+ mattr_accessor :taggings, instance_accessor: false, default: {\n+ line: -> { query_source_location }", + "comment_created_at": "2024-02-05T15:02:15+00:00", + "comment_author": "byroot", + "comment_body": "This should be set in `activerecord/railtie.rb` like the others, otherwise it risks being overidden by users.", + "pr_file_module": null + } + ] + } +] \ No newline at end of file diff --git a/_reviewers/rails-configure-at-proper-scope.md b/_reviewers/rails-configure-at-proper-scope.md index 6f28c51..f3ad623 100644 --- a/_reviewers/rails-configure-at-proper-scope.md +++ b/_reviewers/rails-configure-at-proper-scope.md @@ -99,257 +99,3 @@ When working with configurations: ``` Following these practices ensures components remain properly isolated and configurations behave predictably across different environments and application setups. - - -[ - { - "discussion_id": "1904669769", - "pr_number": 54129, - "pr_file": "actionview/lib/action_view/cache_expiry.rb", - "created_at": "2025-01-06T21:45:56+00:00", - "commented_code": "if @watched_dirs != dirs_to_watch\n @watched_dirs = dirs_to_watch\n new_watcher = @watcher_class.new([], @watched_dirs) do\n new_watcher = @watcher_class.new([], @watched_dirs, filter_path: Rails.root.to_s) do", - "repo_full_name": "rails/rails", - "discussion_comments": [ - { - "comment_id": "1904669769", - "repo_full_name": "rails/rails", - "pr_number": 54129, - "pr_file": "actionview/lib/action_view/cache_expiry.rb", - "discussion_id": "1904669769", - "commented_code": "@@ -40,7 +40,7 @@ def build_watcher\n \n if @watched_dirs != dirs_to_watch\n @watched_dirs = dirs_to_watch\n- new_watcher = @watcher_class.new([], @watched_dirs) do\n+ new_watcher = @watcher_class.new([], @watched_dirs, filter_path: Rails.root.to_s) do", - "comment_created_at": "2025-01-06T21:45:56+00:00", - "comment_author": "rafaelfranca", - "comment_body": "This is coupling Action View with Railties in the wrong place.\r\n\r\nWe should pass a configuration to ActionView in its Railtie and use it here.", - "pr_file_module": null - }, - { - "comment_id": "1905515638", - "repo_full_name": "rails/rails", - "pr_number": 54129, - "pr_file": "actionview/lib/action_view/cache_expiry.rb", - "discussion_id": "1904669769", - "commented_code": "@@ -40,7 +40,7 @@ def build_watcher\n \n if @watched_dirs != dirs_to_watch\n @watched_dirs = dirs_to_watch\n- new_watcher = @watcher_class.new([], @watched_dirs) do\n+ new_watcher = @watcher_class.new([], @watched_dirs, filter_path: Rails.root.to_s) do", - "comment_created_at": "2025-01-07T14:12:50+00:00", - "comment_author": "ermolaev", - "comment_body": "agree, rewrote the code, also added `filter_path` to `EventedFileUpdateChecker`, and added tests", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1374948225", - "pr_number": 49765, - "pr_file": "activerecord/lib/active_record/railtie.rb", - "created_at": "2023-10-27T18:59:39+00:00", - "commented_code": "end\n end\n end\n\n initializer \"active_record.attributes_for_inspect\" do |app|\n ActiveSupport.on_load(:active_record) do\n if app.config.consider_all_requests_local\n ActiveRecord::Base.attributes_for_inspect = :all", - "repo_full_name": "rails/rails", - "discussion_comments": [ - { - "comment_id": "1374948225", - "repo_full_name": "rails/rails", - "pr_number": 49765, - "pr_file": "activerecord/lib/active_record/railtie.rb", - "discussion_id": "1374948225", - "commented_code": "@@ -458,5 +458,13 @@ class Railtie < Rails::Railtie # :nodoc:\n end\n end\n end\n+\n+ initializer \"active_record.attributes_for_inspect\" do |app|\n+ ActiveSupport.on_load(:active_record) do\n+ if app.config.consider_all_requests_local\n+ ActiveRecord::Base.attributes_for_inspect = :all", - "comment_created_at": "2023-10-27T18:59:39+00:00", - "comment_author": "rafaelfranca", - "comment_body": "This will override any config the user set, no?", - "pr_file_module": null - }, - { - "comment_id": "1387061473", - "repo_full_name": "rails/rails", - "pr_number": 49765, - "pr_file": "activerecord/lib/active_record/railtie.rb", - "discussion_id": "1374948225", - "commented_code": "@@ -458,5 +458,13 @@ class Railtie < Rails::Railtie # :nodoc:\n end\n end\n end\n+\n+ initializer \"active_record.attributes_for_inspect\" do |app|\n+ ActiveSupport.on_load(:active_record) do\n+ if app.config.consider_all_requests_local\n+ ActiveRecord::Base.attributes_for_inspect = :all", - "comment_created_at": "2023-11-08T18:48:00+00:00", - "comment_author": "andrewn617", - "comment_body": "I don't think so, since it is run in the `:active_record` load hook, it will be set before `Base` is inherited by anything. I tested in my app and it worked as expected. But I will add a test like you suggested in your other comment.", - "pr_file_module": null - }, - { - "comment_id": "1387073788", - "repo_full_name": "rails/rails", - "pr_number": 49765, - "pr_file": "activerecord/lib/active_record/railtie.rb", - "discussion_id": "1374948225", - "commented_code": "@@ -458,5 +458,13 @@ class Railtie < Rails::Railtie # :nodoc:\n end\n end\n end\n+\n+ initializer \"active_record.attributes_for_inspect\" do |app|\n+ ActiveSupport.on_load(:active_record) do\n+ if app.config.consider_all_requests_local\n+ ActiveRecord::Base.attributes_for_inspect = :all", - "comment_created_at": "2023-11-08T18:59:43+00:00", - "comment_author": "rafaelfranca", - "comment_body": "What I mean is setting `config.active_record.attributes_for_inspect = [:uuid]`", - "pr_file_module": null - }, - { - "comment_id": "1387092660", - "repo_full_name": "rails/rails", - "pr_number": 49765, - "pr_file": "activerecord/lib/active_record/railtie.rb", - "discussion_id": "1374948225", - "commented_code": "@@ -458,5 +458,13 @@ class Railtie < Rails::Railtie # :nodoc:\n end\n end\n end\n+\n+ initializer \"active_record.attributes_for_inspect\" do |app|\n+ ActiveSupport.on_load(:active_record) do\n+ if app.config.consider_all_requests_local\n+ ActiveRecord::Base.attributes_for_inspect = :all", - "comment_created_at": "2023-11-08T19:16:04+00:00", - "comment_author": "andrewn617", - "comment_body": "Ah sorry, wasn't thinking of that angle. Fixed it and added a test. Thanks!", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1761869122", - "pr_number": 52297, - "pr_file": "actionpack/lib/action_dispatch/routing/mapper.rb", - "created_at": "2024-09-16T20:21:05+00:00", - "commented_code": "end\n else\n def route_source_location\n if Mapper.route_source_locations\n if Mapper.route_source_locations || ActionDispatch.verbose_redirect_logs", - "repo_full_name": "rails/rails", - "discussion_comments": [ - { - "comment_id": "1761869122", - "repo_full_name": "rails/rails", - "pr_number": 52297, - "pr_file": "actionpack/lib/action_dispatch/routing/mapper.rb", - "discussion_id": "1761869122", - "commented_code": "@@ -392,7 +392,7 @@ def route_source_location\n end\n else\n def route_source_location\n- if Mapper.route_source_locations\n+ if Mapper.route_source_locations || ActionDispatch.verbose_redirect_logs", - "comment_created_at": "2024-09-16T20:21:05+00:00", - "comment_author": "p8", - "comment_body": "If we are going to return \"route source locations\", we might as well change how `Mapper.route_source_locations` is set and avoid `Mapper` having to know about `verbose_redirect_logs`.\r\n\r\n```suggestion\r\n if Mapper.route_source_locations\r\n```\r\n\r\nWe could set it here instead:\r\nhttps://github.com/rails/rails/blob/d0f830033b2ea27bc1e66683ce5b16bd9ca55df1/actionpack/lib/action_dispatch/railtie.rb#L71\r\n\r\n```ruby\r\n ActionDispatch::Routing::Mapper.route_source_locations = Rails.env.development? || ActionDispatch.verbose_redirect_logs\r\n```\r\n\r\nOr could we just leave it out?\r\nNot sure what the result would be.\r\n", - "pr_file_module": null - }, - { - "comment_id": "1781095922", - "repo_full_name": "rails/rails", - "pr_number": 52297, - "pr_file": "actionpack/lib/action_dispatch/routing/mapper.rb", - "discussion_id": "1761869122", - "commented_code": "@@ -392,7 +392,7 @@ def route_source_location\n end\n else\n def route_source_location\n- if Mapper.route_source_locations\n+ if Mapper.route_source_locations || ActionDispatch.verbose_redirect_logs", - "comment_created_at": "2024-09-30T13:05:27+00:00", - "comment_author": "dennispaagman", - "comment_body": "Yeah that's a better place! \r\n\r\nI think leaving it out will make it that if you enable `verbose_redirect_logs` in a different environment than development the source locations won't be there for redirects from routes.\r\n\r\nI'm not sure if anyone wants to, or should, run this setting in different environments though, but it's possible.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "508997606", - "pr_number": 39398, - "pr_file": "actionpack/lib/action_dispatch/http/content_security_policy.rb", - "created_at": "2020-10-21T05:25:18+00:00", - "commented_code": "POLICY = \"Content-Security-Policy\"\n POLICY_REPORT_ONLY = \"Content-Security-Policy-Report-Only\"\n\n def initialize(app)\n def initialize(app, warn_on_no_content_security_policy = false)\n @app = app\n @warn_on_no_content_security_policy = warn_on_no_content_security_policy\n end\n\n def call(env)\n request = ActionDispatch::Request.new env\n _, headers, _ = response = @app.call(env)\n\n return response unless html_response?(headers)\n return response if policy_present?(headers)\n\n if policy = request.content_security_policy\n nonce = request.content_security_policy_nonce\n nonce_directives = request.content_security_policy_nonce_directives\n context = request.controller_instance || request\n headers[header_name(request)] = policy.build(context, nonce, nonce_directives)\n headers[header_name(request)] = policy.build(context, nonce, nonce_directives, headers[CONTENT_TYPE])\n else\n if (Rails.env.development? || Rails.env.test?) && logger(request) && @warn_on_no_content_security_policy", - "repo_full_name": "rails/rails", - "discussion_comments": [ - { - "comment_id": "508997606", - "repo_full_name": "rails/rails", - "pr_number": 39398, - "pr_file": "actionpack/lib/action_dispatch/http/content_security_policy.rb", - "discussion_id": "508997606", - "commented_code": "@@ -9,34 +9,32 @@ class Middleware\n POLICY = \"Content-Security-Policy\"\n POLICY_REPORT_ONLY = \"Content-Security-Policy-Report-Only\"\n \n- def initialize(app)\n+ def initialize(app, warn_on_no_content_security_policy = false)\n @app = app\n+ @warn_on_no_content_security_policy = warn_on_no_content_security_policy\n end\n \n def call(env)\n request = ActionDispatch::Request.new env\n _, headers, _ = response = @app.call(env)\n \n- return response unless html_response?(headers)\n return response if policy_present?(headers)\n \n if policy = request.content_security_policy\n nonce = request.content_security_policy_nonce\n nonce_directives = request.content_security_policy_nonce_directives\n context = request.controller_instance || request\n- headers[header_name(request)] = policy.build(context, nonce, nonce_directives)\n+ headers[header_name(request)] = policy.build(context, nonce, nonce_directives, headers[CONTENT_TYPE])\n+ else\n+ if (Rails.env.development? || Rails.env.test?) && logger(request) && @warn_on_no_content_security_policy", - "comment_created_at": "2020-10-21T05:25:18+00:00", - "comment_author": "rafaelfranca", - "comment_body": "Checks for environment can't enter the action pack code. If we want different behavior per environment this should be a config on this class.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1030612902", - "pr_number": 46555, - "pr_file": "activejob/lib/active_job/log_subscriber.rb", - "created_at": "2022-11-23T15:54:29+00:00", - "commented_code": "job = event.payload[:job]\n ex = event.payload[:exception_object]\n if ex\n cleaned_backtrace = Rails.backtrace_cleaner.clean(ex.backtrace)", - "repo_full_name": "rails/rails", - "discussion_comments": [ - { - "comment_id": "1030612902", - "repo_full_name": "rails/rails", - "pr_number": 46555, - "pr_file": "activejob/lib/active_job/log_subscriber.rb", - "discussion_id": "1030612902", - "commented_code": "@@ -57,8 +57,9 @@ def perform(event)\n job = event.payload[:job]\n ex = event.payload[:exception_object]\n if ex\n+ cleaned_backtrace = Rails.backtrace_cleaner.clean(ex.backtrace)", - "comment_created_at": "2022-11-23T15:54:29+00:00", - "comment_author": "rafaelfranca", - "comment_body": "No Rails framework should have access to the `Rails` constant inside it since they can be used outside of a Rails application. We should be doing the same as `ActiveRecord::LogSubscriber`:\r\n\r\nhttps://github.com/rails/rails/blob/40563a3ab457f5b1d8a5defaecb7375c55bc3fd5/activerecord/lib/active_record/railtie.rb#L97-L99\r\nhttps://github.com/rails/rails/blob/40563a3ab457f5b1d8a5defaecb7375c55bc3fd5/activerecord/lib/active_record/log_subscriber.rb#L7", - "pr_file_module": null - }, - { - "comment_id": "1030725259", - "repo_full_name": "rails/rails", - "pr_number": 46555, - "pr_file": "activejob/lib/active_job/log_subscriber.rb", - "discussion_id": "1030612902", - "commented_code": "@@ -57,8 +57,9 @@ def perform(event)\n job = event.payload[:job]\n ex = event.payload[:exception_object]\n if ex\n+ cleaned_backtrace = Rails.backtrace_cleaner.clean(ex.backtrace)", - "comment_created_at": "2022-11-23T17:36:19+00:00", - "comment_author": "warrenbhw", - "comment_body": "Hey @rafaelfranca, thanks for taking a look.\r\nI'll address this and fix the tests soon\r\nCheers!", - "pr_file_module": null - }, - { - "comment_id": "1034174603", - "repo_full_name": "rails/rails", - "pr_number": 46555, - "pr_file": "activejob/lib/active_job/log_subscriber.rb", - "discussion_id": "1030612902", - "commented_code": "@@ -57,8 +57,9 @@ def perform(event)\n job = event.payload[:job]\n ex = event.payload[:exception_object]\n if ex\n+ cleaned_backtrace = Rails.backtrace_cleaner.clean(ex.backtrace)", - "comment_created_at": "2022-11-29T00:10:17+00:00", - "comment_author": "warrenbhw", - "comment_body": "@rafaelfranca the changes that you suggested are made. CI seems to be failing on a flaky test. Is there a way for me to queue another build another than pushing some whitespace changes?", - "pr_file_module": null - }, - { - "comment_id": "1034245869", - "repo_full_name": "rails/rails", - "pr_number": 46555, - "pr_file": "activejob/lib/active_job/log_subscriber.rb", - "discussion_id": "1030612902", - "commented_code": "@@ -57,8 +57,9 @@ def perform(event)\n job = event.payload[:job]\n ex = event.payload[:exception_object]\n if ex\n+ cleaned_backtrace = Rails.backtrace_cleaner.clean(ex.backtrace)", - "comment_created_at": "2022-11-29T02:41:52+00:00", - "comment_author": "warrenbhw", - "comment_body": "@rafaelfranca CI tests are passing. Lmk if there's anything else that I need to address here to get this merged!", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1706409109", - "pr_number": 52353, - "pr_file": "actionpack/lib/action_dispatch/routing/route_set.rb", - "created_at": "2024-08-07T05:39:52+00:00", - "commented_code": "end\n\n def self.new_with_config(config)\n route_set_config = DEFAULT_CONFIG\n route_set_config = DEFAULT_CONFIG.dup", - "repo_full_name": "rails/rails", - "discussion_comments": [ - { - "comment_id": "1706409109", - "repo_full_name": "rails/rails", - "pr_number": 52353, - "pr_file": "actionpack/lib/action_dispatch/routing/route_set.rb", - "discussion_id": "1706409109", - "commented_code": "@@ -363,7 +363,7 @@ def self.default_resources_path_names\n end\n \n def self.new_with_config(config)\n- route_set_config = DEFAULT_CONFIG\n+ route_set_config = DEFAULT_CONFIG.dup", - "comment_created_at": "2024-08-07T05:39:52+00:00", - "comment_author": "gmcgibbon", - "comment_body": "This has to be a bug. We mutate a constant in the next few lines which changes the config of future route sets. Duping the constant prevents this.", - "pr_file_module": null - } - ] - }, - { - "discussion_id": "1478393815", - "pr_number": 50969, - "pr_file": "activerecord/lib/active_record/query_logs.rb", - "created_at": "2024-02-05T15:02:15+00:00", - "commented_code": "#\n # config.active_record.cache_query_log_tags = true\n module QueryLogs\n mattr_accessor :taggings, instance_accessor: false, default: {}\n mattr_accessor :taggings, instance_accessor: false, default: {\n line: -> { query_source_location }", - "repo_full_name": "rails/rails", - "discussion_comments": [ - { - "comment_id": "1478393815", - "repo_full_name": "rails/rails", - "pr_number": 50969, - "pr_file": "activerecord/lib/active_record/query_logs.rb", - "discussion_id": "1478393815", - "commented_code": "@@ -71,7 +72,10 @@ module ActiveRecord\n #\n # config.active_record.cache_query_log_tags = true\n module QueryLogs\n- mattr_accessor :taggings, instance_accessor: false, default: {}\n+ mattr_accessor :taggings, instance_accessor: false, default: {\n+ line: -> { query_source_location }", - "comment_created_at": "2024-02-05T15:02:15+00:00", - "comment_author": "byroot", - "comment_body": "This should be set in `activerecord/railtie.rb` like the others, otherwise it risks being overidden by users.", - "pr_file_module": null - } - ] - } -] diff --git a/_reviewers/rails-consistent-terminology-usage.json b/_reviewers/rails-consistent-terminology-usage.json new file mode 100644 index 0000000..96e0a32 --- /dev/null +++ b/_reviewers/rails-consistent-terminology-usage.json @@ -0,0 +1,200 @@ +[ + { + "discussion_id": "2001071505", + "pr_number": 54765, + "pr_file": "railties/lib/rails/generators/rails/app/templates/config/databases/mysql.yml.tt", + "created_at": "2025-03-18T13:37:28+00:00", + "commented_code": "# production:\n# url: <%%= ENV[\"MY_APP_DATABASE_URL\"] %>\n#\n<%- unless options.skip_solid? -%>\n# The connection URL for non-primary databases can also be configured using a\n# similar environment variable prefixed with the configuration key in uppercase.", + "repo_full_name": "rails/rails", + "discussion_comments": [ + { + "comment_id": "2001071505", + "repo_full_name": "rails/rails", + "pr_number": 54765, + "pr_file": "railties/lib/rails/generators/rails/app/templates/config/databases/mysql.yml.tt", + "discussion_id": "2001071505", + "commented_code": "@@ -49,6 +49,14 @@ test:\n # production:\n # url: <%%= ENV[\"MY_APP_DATABASE_URL\"] %>\n #\n+<%- unless options.skip_solid? -%>\n+# The connection URL for non-primary databases can also be configured using a\n+# similar environment variable prefixed with the configuration key in uppercase.", + "comment_created_at": "2025-03-18T13:37:28+00:00", + "comment_author": "eileencodes", + "comment_body": "It's more correct to refer to that key as the connection name, because it's 3-tier and the key is `development` and `animals`. I'm not trying to be pedantic but I'm particular about this because I don't want multiple names for the same thing in our docs.\r\n\r\n```suggestion\r\n# similar environment variable prefixed with the connection name appended to `DATABASE_URL`.\r\n```", + "pr_file_module": null + }, + { + "comment_id": "2006439629", + "repo_full_name": "rails/rails", + "pr_number": 54765, + "pr_file": "railties/lib/rails/generators/rails/app/templates/config/databases/mysql.yml.tt", + "discussion_id": "2001071505", + "commented_code": "@@ -49,6 +49,14 @@ test:\n # production:\n # url: <%%= ENV[\"MY_APP_DATABASE_URL\"] %>\n #\n+<%- unless options.skip_solid? -%>\n+# The connection URL for non-primary databases can also be configured using a\n+# similar environment variable prefixed with the configuration key in uppercase.", + "comment_created_at": "2025-03-20T20:53:58+00:00", + "comment_author": "floehopper", + "comment_body": "> It's more correct to refer to that key as the connection name, because it's 3-tier and the key is development and animals. I'm not trying to be pedantic but I'm particular about this because I don't want multiple names for the same thing in our docs.\r\n\r\nThat makes complete sense. However, I also find the revised version slightly confusing because it talks about both prefixing and appending. How about changing the original to this:\r\n\r\n```\r\n# Connection URLs for non-primary databases can also be configured using\r\n# environment variables. The variable name is formed by concatenating the\r\n# connection name with `_DATABASE_URL`. For example:\r\n#\r\n# CACHE_DATABASE_URL=\"mysql2://cacheuser:cachepass@localhost/cachedatabase\"\r\n#\r\n```\r\n", + "pr_file_module": null + }, + { + "comment_id": "2007327178", + "repo_full_name": "rails/rails", + "pr_number": 54765, + "pr_file": "railties/lib/rails/generators/rails/app/templates/config/databases/mysql.yml.tt", + "discussion_id": "2001071505", + "commented_code": "@@ -49,6 +49,14 @@ test:\n # production:\n # url: <%%= ENV[\"MY_APP_DATABASE_URL\"] %>\n #\n+<%- unless options.skip_solid? -%>\n+# The connection URL for non-primary databases can also be configured using a\n+# similar environment variable prefixed with the configuration key in uppercase.", + "comment_created_at": "2025-03-21T10:50:18+00:00", + "comment_author": "floehopper", + "comment_body": "@eileencodes I've attempted to address your concerns in https://github.com/rails/rails/pull/54765/commits/689188a5a9d9e3f7d6b60a3958b84be865384335. Let me know what you think.", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1554102769", + "pr_number": 51499, + "pr_file": "guides/source/layout.html.erb", + "created_at": "2024-04-05T18:28:18+00:00", + "commented_code": "\n \n \n\n
\n
\n \n+\n
\n
\n \n\n
\n
\n \n+\n
\n
\n
\n\n
\n\n
\n
\n <%= yield :header_section %>\n
\n
\n
\n
\n <%= yield :header_section %>\n\n <%= yield :index_section %>\n <%= yield :index_section %>\n
\n
\n\n
\n
\n
\n
\n
\n
\n <%= yield %>\n
\n\n
\n
\n
\n <%= yield %>\n
\n

Feedback

\n

\n You're encouraged to help improve the quality of this guide.\n

\n

\n Please contribute if you see any typos or factual errors.\n To get started, you can read our <%= link_to 'documentation contributions', 'https://edgeguides.rubyonrails.org/contributing_to_ruby_on_rails.html#contributing-to-the-rails-documentation' %> section.\n

\n

\n You may also find incomplete content or stuff that is not up to date.\n Please do add any missing documentation for main. Make sure to check\n <%= link_to 'Edge Guides', 'https://edgeguides.rubyonrails.org' %> first to verify\n if the issues are already fixed or not on the main branch.\n Check the <%= link_to 'Ruby on Rails Guides Guidelines', 'ruby_on_rails_guides_guidelines.html' %>\n for style and conventions.\n

\n

\n If for whatever reason you spot something to fix but cannot patch it yourself, please\n <%= link_to 'open an issue', 'https://github.com/rails/rails/issues' %>.\n

\n

And last but not least, any kind of discussion regarding Ruby on Rails\n documentation is very welcome on the <%= link_to 'official Ruby on Rails Forum', 'https://discuss.rubyonrails.org/c/rubyonrails-docs' %>.\n

\n
\n\n
", + "repo_full_name": "rails/rails", + "discussion_comments": [ + { + "comment_id": "1554119475", + "repo_full_name": "rails/rails", + "pr_number": 51499, + "pr_file": "guides/source/layout.html.erb", + "discussion_id": "1554119475", + "commented_code": "@@ -94,48 +106,54 @@\n \n
\n
\n+\n
\n \n-
\n-
\n- <%= yield :header_section %>\n+
\n+
\n+
\n+
\n+ <%= yield :header_section %>\n \n- <%= yield :index_section %>\n+ <%= yield :index_section %>\n+
\n+
\n \n-
\n-
\n-
\n+
\n+
\n+
\n+ <%= yield %>\n+
\n \n-
\n-
\n-
\n- <%= yield %>\n-
\n-

Feedback

\n-

\n- You're encouraged to help improve the quality of this guide.\n-

\n-

\n- Please contribute if you see any typos or factual errors.\n- To get started, you can read our <%= link_to 'documentation contributions', 'https://edgeguides.rubyonrails.org/contributing_to_ruby_on_rails.html#contributing-to-the-rails-documentation' %> section.\n-

\n-

\n- You may also find incomplete content or stuff that is not up to date.\n- Please do add any missing documentation for main. Make sure to check\n- <%= link_to 'Edge Guides', 'https://edgeguides.rubyonrails.org' %> first to verify\n- if the issues are already fixed or not on the main branch.\n- Check the <%= link_to 'Ruby on Rails Guides Guidelines', 'ruby_on_rails_guides_guidelines.html' %>\n- for style and conventions.\n-

\n-

\n- If for whatever reason you spot something to fix but cannot patch it yourself, please\n- <%= link_to 'open an issue', 'https://github.com/rails/rails/issues' %>.\n-

\n-

And last but not least, any kind of discussion regarding Ruby on Rails\n- documentation is very welcome on the <%= link_to 'official Ruby on Rails Forum', 'https://discuss.rubyonrails.org/c/rubyonrails-docs' %>.\n-

\n+
\n+\n+
", + "comment_created_at": "2024-04-05T18:34:38+00:00", + "comment_author": "brunoprietog", + "comment_body": "This should not be a footer. The footer should only be the one below the main element. In fact, when a footer is included inside the main element NVDA doesn't recognize it as such either, and even the h3 is already more than enough to indicate that it is a feedback section, which makes it somewhat repetitive.", + "pr_file_module": null + }, + { + "comment_id": "1557051680", + "repo_full_name": "rails/rails", + "pr_number": 51499, + "pr_file": "guides/source/layout.html.erb", + "discussion_id": "1554119475", + "commented_code": "@@ -94,48 +106,54 @@\n \n
\n
\n+\n
\n \n-
\n-
\n- <%= yield :header_section %>\n+
\n+
\n+
\n+
\n+ <%= yield :header_section %>\n \n- <%= yield :index_section %>\n+ <%= yield :index_section %>\n+
\n+
\n \n-
\n-
\n-
\n+
\n+
\n+
\n+ <%= yield %>\n+
\n \n-
\n-
\n-
\n- <%= yield %>\n-
\n-

Feedback

\n-

\n- You're encouraged to help improve the quality of this guide.\n-

\n-

\n- Please contribute if you see any typos or factual errors.\n- To get started, you can read our <%= link_to 'documentation contributions', 'https://edgeguides.rubyonrails.org/contributing_to_ruby_on_rails.html#contributing-to-the-rails-documentation' %> section.\n-

\n-

\n- You may also find incomplete content or stuff that is not up to date.\n- Please do add any missing documentation for main. Make sure to check\n- <%= link_to 'Edge Guides', 'https://edgeguides.rubyonrails.org' %> first to verify\n- if the issues are already fixed or not on the main branch.\n- Check the <%= link_to 'Ruby on Rails Guides Guidelines', 'ruby_on_rails_guides_guidelines.html' %>\n- for style and conventions.\n-

\n-

\n- If for whatever reason you spot something to fix but cannot patch it yourself, please\n- <%= link_to 'open an issue', 'https://github.com/rails/rails/issues' %>.\n-

\n-

And last but not least, any kind of discussion regarding Ruby on Rails\n- documentation is very welcome on the <%= link_to 'official Ruby on Rails Forum', 'https://discuss.rubyonrails.org/c/rubyonrails-docs' %>.\n-

\n+
\n+\n+
", + "comment_created_at": "2024-04-09T07:05:24+00:00", + "comment_author": "SleeplessByte", + "comment_body": "In html 5, footer may also be an element inside an article or section. I experience the same behaviour as you (because I also use NVDA when I use a screen reader) but I believe jaws does denote it differently.", + "pr_file_module": null + }, + { + "comment_id": "1557543733", + "repo_full_name": "rails/rails", + "pr_number": 51499, + "pr_file": "guides/source/layout.html.erb", + "discussion_id": "1554119475", + "commented_code": "@@ -94,48 +106,54 @@\n \n
\n
\n+\n
\n \n-
\n-
\n- <%= yield :header_section %>\n+
\n+
\n+
\n+
\n+ <%= yield :header_section %>\n \n- <%= yield :index_section %>\n+ <%= yield :index_section %>\n+
\n+
\n \n-
\n-
\n-
\n+
\n+
\n+
\n+ <%= yield %>\n+
\n \n-
\n-
\n-
\n- <%= yield %>\n-
\n-

Feedback

\n-

\n- You're encouraged to help improve the quality of this guide.\n-

\n-

\n- Please contribute if you see any typos or factual errors.\n- To get started, you can read our <%= link_to 'documentation contributions', 'https://edgeguides.rubyonrails.org/contributing_to_ruby_on_rails.html#contributing-to-the-rails-documentation' %> section.\n-

\n-

\n- You may also find incomplete content or stuff that is not up to date.\n- Please do add any missing documentation for main. Make sure to check\n- <%= link_to 'Edge Guides', 'https://edgeguides.rubyonrails.org' %> first to verify\n- if the issues are already fixed or not on the main branch.\n- Check the <%= link_to 'Ruby on Rails Guides Guidelines', 'ruby_on_rails_guides_guidelines.html' %>\n- for style and conventions.\n-

\n-

\n- If for whatever reason you spot something to fix but cannot patch it yourself, please\n- <%= link_to 'open an issue', 'https://github.com/rails/rails/issues' %>.\n-

\n-

And last but not least, any kind of discussion regarding Ruby on Rails\n- documentation is very welcome on the <%= link_to 'official Ruby on Rails Forum', 'https://discuss.rubyonrails.org/c/rubyonrails-docs' %>.\n-

\n+
\n+\n+
", + "comment_created_at": "2024-04-09T12:22:17+00:00", + "comment_author": "brunoprietog", + "comment_body": "Oh, I don't know. I'm not too convinced about adding so many additional regions beyond the navigation, main region and footer. It adds a lot of noise that is already covered by the heading levels and becomes a bit repetitive information.\r\n\r\nBy the way, I personally don't tend to use many links to skip to specific sections and hope this can be done natively. And really, there's no way to skip to the body of the article with a command to go to a region. You can easily use 2, but that makes me think we're not being very consistent with using regions, or if we're even using them properly.\r\n\r\nWhat do you think?", + "pr_file_module": null + }, + { + "comment_id": "1557576138", + "repo_full_name": "rails/rails", + "pr_number": 51499, + "pr_file": "guides/source/layout.html.erb", + "discussion_id": "1554119475", + "commented_code": "@@ -94,48 +106,54 @@\n \n
\n \n+\n
\n \n-
\n-
\n- <%= yield :header_section %>\n+
\n+
\n+
\n+
\n+ <%= yield :header_section %>\n \n- <%= yield :index_section %>\n+ <%= yield :index_section %>\n+
\n+
\n \n-
\n-
\n-
\n+
\n+
\n+
\n+ <%= yield %>\n+
\n \n-
\n-
\n-
\n- <%= yield %>\n-
\n-

Feedback

\n-

\n- You're encouraged to help improve the quality of this guide.\n-

\n-

\n- Please contribute if you see any typos or factual errors.\n- To get started, you can read our <%= link_to 'documentation contributions', 'https://edgeguides.rubyonrails.org/contributing_to_ruby_on_rails.html#contributing-to-the-rails-documentation' %> section.\n-

\n-

\n- You may also find incomplete content or stuff that is not up to date.\n- Please do add any missing documentation for main. Make sure to check\n- <%= link_to 'Edge Guides', 'https://edgeguides.rubyonrails.org' %> first to verify\n- if the issues are already fixed or not on the main branch.\n- Check the <%= link_to 'Ruby on Rails Guides Guidelines', 'ruby_on_rails_guides_guidelines.html' %>\n- for style and conventions.\n-

\n-

\n- If for whatever reason you spot something to fix but cannot patch it yourself, please\n- <%= link_to 'open an issue', 'https://github.com/rails/rails/issues' %>.\n-

\n-

And last but not least, any kind of discussion regarding Ruby on Rails\n- documentation is very welcome on the <%= link_to 'official Ruby on Rails Forum', 'https://discuss.rubyonrails.org/c/rubyonrails-docs' %>.\n-

\n+
\n+\n+
", + "comment_created_at": "2024-04-09T12:48:08+00:00", + "comment_author": "SleeplessByte", + "comment_body": "My personal thoughts here were:\r\n\r\n- I am browsing the guides extensively\r\n- I am in the middle of the article and find something I want to give feedback on\r\n- I know there is a _region_ (labelled section) that explains how to give feedback\r\n- I want to go there straight away\r\n\r\nWith NVDA I can get there right away when browsing the article pressing D, which should navigate to the next landmark or region. In this case the footer is nested inside article, and thus not _complementary_, but it is labelled by the heading, and thus it becomes a section with role=\"region\", iirc. Using the TOC navigation here would be sub-optimal as it doesn't allow be to go to \"last heading of the current level\", by pressing 2. I would need to press it repeatedly, right? Since there are so little regions, that's not the case.\r\n\r\nDoes this make sense?", + "pr_file_module": null + }, + { + "comment_id": "1557588925", + "repo_full_name": "rails/rails", + "pr_number": 51499, + "pr_file": "guides/source/layout.html.erb", + "discussion_id": "1554119475", + "commented_code": "@@ -94,48 +106,54 @@\n \n
\n \n+\n
\n \n-
\n-
\n- <%= yield :header_section %>\n+
\n+
\n+
\n+
\n+ <%= yield :header_section %>\n \n- <%= yield :index_section %>\n+ <%= yield :index_section %>\n+
\n+
\n \n-
\n-
\n-
\n+
\n+
\n+
\n+ <%= yield %>\n+
\n \n-
\n-
\n-
\n- <%= yield %>\n-
\n-

Feedback

\n-

\n- You're encouraged to help improve the quality of this guide.\n-

\n-

\n- Please contribute if you see any typos or factual errors.\n- To get started, you can read our <%= link_to 'documentation contributions', 'https://edgeguides.rubyonrails.org/contributing_to_ruby_on_rails.html#contributing-to-the-rails-documentation' %> section.\n-

\n-

\n- You may also find incomplete content or stuff that is not up to date.\n- Please do add any missing documentation for main. Make sure to check\n- <%= link_to 'Edge Guides', 'https://edgeguides.rubyonrails.org' %> first to verify\n- if the issues are already fixed or not on the main branch.\n- Check the <%= link_to 'Ruby on Rails Guides Guidelines', 'ruby_on_rails_guides_guidelines.html' %>\n- for style and conventions.\n-

\n-

\n- If for whatever reason you spot something to fix but cannot patch it yourself, please\n- <%= link_to 'open an issue', 'https://github.com/rails/rails/issues' %>.\n-

\n-

And last but not least, any kind of discussion regarding Ruby on Rails\n- documentation is very welcome on the <%= link_to 'official Ruby on Rails Forum', 'https://discuss.rubyonrails.org/c/rubyonrails-docs' %>.\n-

\n+
\n+\n+
", + "comment_created_at": "2024-04-09T12:57:42+00:00", + "comment_author": "brunoprietog", + "comment_body": "That might make sense, but then, we would also have to have a region for the article, right? To get back quickly, at least to the beginning of the article.\r\n\r\nActually, this shouldn't be a problem with the article element, but unfortunately it hasn't worked in Chromium and NVDA for a while now.\r\n\r\nAnd in the case of the feedback region, I guess we would have to give it the role=\"region\" explicitly, since NVDA doesn't recognize it either because it's a footer inside a main element.", + "pr_file_module": null + }, + { + "comment_id": "1557594136", + "repo_full_name": "rails/rails", + "pr_number": 51499, + "pr_file": "guides/source/layout.html.erb", + "discussion_id": "1554119475", + "commented_code": "@@ -94,48 +106,54 @@\n \n
\n \n+\n
\n \n-
\n-
\n- <%= yield :header_section %>\n+
\n+
\n+
\n+
\n+ <%= yield :header_section %>\n \n- <%= yield :index_section %>\n+ <%= yield :index_section %>\n+
\n+
\n \n-
\n-
\n-
\n+
\n+
\n+
\n+ <%= yield %>\n+
\n \n-
\n-
\n-
\n- <%= yield %>\n-
\n-

Feedback

\n-

\n- You're encouraged to help improve the quality of this guide.\n-

\n-

\n- Please contribute if you see any typos or factual errors.\n- To get started, you can read our <%= link_to 'documentation contributions', 'https://edgeguides.rubyonrails.org/contributing_to_ruby_on_rails.html#contributing-to-the-rails-documentation' %> section.\n-

\n-

\n- You may also find incomplete content or stuff that is not up to date.\n- Please do add any missing documentation for main. Make sure to check\n- <%= link_to 'Edge Guides', 'https://edgeguides.rubyonrails.org' %> first to verify\n- if the issues are already fixed or not on the main branch.\n- Check the <%= link_to 'Ruby on Rails Guides Guidelines', 'ruby_on_rails_guides_guidelines.html' %>\n- for style and conventions.\n-

\n-

\n- If for whatever reason you spot something to fix but cannot patch it yourself, please\n- <%= link_to 'open an issue', 'https://github.com/rails/rails/issues' %>.\n-

\n-

And last but not least, any kind of discussion regarding Ruby on Rails\n- documentation is very welcome on the <%= link_to 'official Ruby on Rails Forum', 'https://discuss.rubyonrails.org/c/rubyonrails-docs' %>.\n-

\n+
\n+\n+
", + "comment_created_at": "2024-04-09T13:01:21+00:00", + "comment_author": "SleeplessByte", + "comment_body": "We have a region for the article: https://github.com/rails/rails/pull/51499/files/93f9819c62890301f398e752b6083c9d2c9f6c40#diff-dc279d925a05373ac5f34307441799f98ac4accbe0b0a8f12f99f703585afa7dR113 Is that not working for you? I haven't tested mine but I can.\r\n\r\nIt _should_ be:\r\n\r\n```html\r\n
\r\n
\r\n\r\n
\r\n article introduction\r\n\r\n chapter index / toc\r\n
\r\n\r\n article contents with lots of headings \r\n\r\n
\r\n

Feedback

\r\n\r\n feedback explanation\r\n
\r\n
\r\n
\r\n```\r\n\r\n", + "pr_file_module": null + }, + { + "comment_id": "1557595145", + "repo_full_name": "rails/rails", + "pr_number": 51499, + "pr_file": "guides/source/layout.html.erb", + "discussion_id": "1554119475", + "commented_code": "@@ -94,48 +106,54 @@\n \n
\n \n+\n
\n \n-
\n-
\n- <%= yield :header_section %>\n+
\n+
\n+
\n+
\n+ <%= yield :header_section %>\n \n- <%= yield :index_section %>\n+ <%= yield :index_section %>\n+
\n+
\n \n-
\n-
\n-
\n+
\n+
\n+
\n+ <%= yield %>\n+
\n \n-
\n-
\n-
\n- <%= yield %>\n-
\n-

Feedback

\n-

\n- You're encouraged to help improve the quality of this guide.\n-

\n-

\n- Please contribute if you see any typos or factual errors.\n- To get started, you can read our <%= link_to 'documentation contributions', 'https://edgeguides.rubyonrails.org/contributing_to_ruby_on_rails.html#contributing-to-the-rails-documentation' %> section.\n-

\n-

\n- You may also find incomplete content or stuff that is not up to date.\n- Please do add any missing documentation for main. Make sure to check\n- <%= link_to 'Edge Guides', 'https://edgeguides.rubyonrails.org' %> first to verify\n- if the issues are already fixed or not on the main branch.\n- Check the <%= link_to 'Ruby on Rails Guides Guidelines', 'ruby_on_rails_guides_guidelines.html' %>\n- for style and conventions.\n-

\n-

\n- If for whatever reason you spot something to fix but cannot patch it yourself, please\n- <%= link_to 'open an issue', 'https://github.com/rails/rails/issues' %>.\n-

\n-

And last but not least, any kind of discussion regarding Ruby on Rails\n- documentation is very welcome on the <%= link_to 'official Ruby on Rails Forum', 'https://discuss.rubyonrails.org/c/rubyonrails-docs' %>.\n-

\n+
\n+\n+
", + "comment_created_at": "2024-04-09T13:02:09+00:00", + "comment_author": "SleeplessByte", + "comment_body": "(sidenote: I am fine with explicit role-ing things if SRs don't recognise it, so let's do that if we must)", + "pr_file_module": null + }, + { + "comment_id": "1557603577", + "repo_full_name": "rails/rails", + "pr_number": 51499, + "pr_file": "guides/source/layout.html.erb", + "discussion_id": "1554119475", + "commented_code": "@@ -94,48 +106,54 @@\n \n
\n \n+\n
\n \n-
\n-
\n- <%= yield :header_section %>\n+
\n+
\n+
\n+
\n+ <%= yield :header_section %>\n \n- <%= yield :index_section %>\n+ <%= yield :index_section %>\n+
\n+
\n \n-
\n-
\n-
\n+
\n+
\n+
\n+ <%= yield %>\n+
\n \n-
\n-
\n-
\n- <%= yield %>\n-
\n-

Feedback

\n-

\n- You're encouraged to help improve the quality of this guide.\n-

\n-

\n- Please contribute if you see any typos or factual errors.\n- To get started, you can read our <%= link_to 'documentation contributions', 'https://edgeguides.rubyonrails.org/contributing_to_ruby_on_rails.html#contributing-to-the-rails-documentation' %> section.\n-

\n-

\n- You may also find incomplete content or stuff that is not up to date.\n- Please do add any missing documentation for main. Make sure to check\n- <%= link_to 'Edge Guides', 'https://edgeguides.rubyonrails.org' %> first to verify\n- if the issues are already fixed or not on the main branch.\n- Check the <%= link_to 'Ruby on Rails Guides Guidelines', 'ruby_on_rails_guides_guidelines.html' %>\n- for style and conventions.\n-

\n-

\n- If for whatever reason you spot something to fix but cannot patch it yourself, please\n- <%= link_to 'open an issue', 'https://github.com/rails/rails/issues' %>.\n-

\n-

And last but not least, any kind of discussion regarding Ruby on Rails\n- documentation is very welcome on the <%= link_to 'official Ruby on Rails Forum', 'https://discuss.rubyonrails.org/c/rubyonrails-docs' %>.\n-

\n+
\n+\n+
", + "comment_created_at": "2024-04-09T13:08:34+00:00", + "comment_author": "brunoprietog", + "comment_body": "Right! So I think assigning the explicit roles would be enough to solve this.\r\n\r\nIn any case, an article is not exposed as a region, it's exposed as another type of element. In fact, at least in NVDA, there's no default shortcut to jump to it, I configured the A to do so but it's not the common case.", + "pr_file_module": null + }, + { + "comment_id": "1557622932", + "repo_full_name": "rails/rails", + "pr_number": 51499, + "pr_file": "guides/source/layout.html.erb", + "discussion_id": "1554119475", + "commented_code": "@@ -94,48 +106,54 @@\n \n
\n \n+\n
\n \n-
\n-
\n- <%= yield :header_section %>\n+
\n+
\n+
\n+
\n+ <%= yield :header_section %>\n \n- <%= yield :index_section %>\n+ <%= yield :index_section %>\n+
\n+
\n \n-
\n-
\n-
\n+
\n+
\n+
\n+ <%= yield %>\n+
\n \n-
\n-
\n-
\n- <%= yield %>\n-
\n-

Feedback

\n-

\n- You're encouraged to help improve the quality of this guide.\n-

\n-

\n- Please contribute if you see any typos or factual errors.\n- To get started, you can read our <%= link_to 'documentation contributions', 'https://edgeguides.rubyonrails.org/contributing_to_ruby_on_rails.html#contributing-to-the-rails-documentation' %> section.\n-

\n-

\n- You may also find incomplete content or stuff that is not up to date.\n- Please do add any missing documentation for main. Make sure to check\n- <%= link_to 'Edge Guides', 'https://edgeguides.rubyonrails.org' %> first to verify\n- if the issues are already fixed or not on the main branch.\n- Check the <%= link_to 'Ruby on Rails Guides Guidelines', 'ruby_on_rails_guides_guidelines.html' %>\n- for style and conventions.\n-

\n-

\n- If for whatever reason you spot something to fix but cannot patch it yourself, please\n- <%= link_to 'open an issue', 'https://github.com/rails/rails/issues' %>.\n-

\n-

And last but not least, any kind of discussion regarding Ruby on Rails\n- documentation is very welcome on the <%= link_to 'official Ruby on Rails Forum', 'https://discuss.rubyonrails.org/c/rubyonrails-docs' %>.\n-

\n+
\n+\n+
", + "comment_created_at": "2024-04-09T13:22:14+00:00", + "comment_author": "SleeplessByte", + "comment_body": "Cool! Yeah, I _love_ different support for different SRs and even different versions of the same SR 🗡️ 😁. \r\n\r\nI'll explicitly role both the article header and article footer (so your NVDA can skip back/forth to them)\r\n", + "pr_file_module": null + }, + { + "comment_id": "1557636426", + "repo_full_name": "rails/rails", + "pr_number": 51499, + "pr_file": "guides/source/layout.html.erb", + "discussion_id": "1554119475", + "commented_code": "@@ -94,48 +106,54 @@\n \n
\n \n+\n
\n \n-
\n-
\n- <%= yield :header_section %>\n+
\n+
\n+
\n+
\n+ <%= yield :header_section %>\n \n- <%= yield :index_section %>\n+ <%= yield :index_section %>\n+
\n+
\n \n-
\n-
\n-
\n+
\n+
\n+
\n+ <%= yield %>\n+
\n \n-
\n-
\n-
\n- <%= yield %>\n-
\n-

Feedback

\n-

\n- You're encouraged to help improve the quality of this guide.\n-

\n-

\n- Please contribute if you see any typos or factual errors.\n- To get started, you can read our <%= link_to 'documentation contributions', 'https://edgeguides.rubyonrails.org/contributing_to_ruby_on_rails.html#contributing-to-the-rails-documentation' %> section.\n-

\n-

\n- You may also find incomplete content or stuff that is not up to date.\n- Please do add any missing documentation for main. Make sure to check\n- <%= link_to 'Edge Guides', 'https://edgeguides.rubyonrails.org' %> first to verify\n- if the issues are already fixed or not on the main branch.\n- Check the <%= link_to 'Ruby on Rails Guides Guidelines', 'ruby_on_rails_guides_guidelines.html' %>\n- for style and conventions.\n-

\n-

\n- If for whatever reason you spot something to fix but cannot patch it yourself, please\n- <%= link_to 'open an issue', 'https://github.com/rails/rails/issues' %>.\n-

\n-

And last but not least, any kind of discussion regarding Ruby on Rails\n- documentation is very welcome on the <%= link_to 'official Ruby on Rails Forum', 'https://discuss.rubyonrails.org/c/rubyonrails-docs' %>.\n-

\n+
\n+\n+
", + "comment_created_at": "2024-04-09T13:30:53+00:00", + "comment_author": "brunoprietog", + "comment_body": "Yes, sadly. Just like the history of browser feature compatibility 😰", + "pr_file_module": null + } + ] + }, + { + "discussion_id": "1556511957", + "pr_number": 51499, + "pr_file": "guides/source/index.html.erb", + "created_at": "2024-04-08T23:00:31+00:00", + "commented_code": "<% content_for :index_section do %>\n
\n \n+\n
\n \n-
\n-
\n- <%= yield :header_section %>\n+
\n+
\n+
\n+
\n+ <%= yield :header_section %>\n \n- <%= yield :index_section %>\n+ <%= yield :index_section %>\n+
\n+
\n \n-
\n-
\n-
\n+
\n+
\n+
\n+ <%= yield %>\n+
\n \n-
\n-
\n-
\n- <%= yield %>\n-
\n-

Feedback

\n-

\n- You're encouraged to help improve the quality of this guide.\n-

\n-

\n- Please contribute if you see any typos or factual errors.\n- To get started, you can read our <%= link_to 'documentation contributions', 'https://edgeguides.rubyonrails.org/contributing_to_ruby_on_rails.html#contributing-to-the-rails-documentation' %> section.\n-

\n-

\n- You may also find incomplete content or stuff that is not up to date.\n- Please do add any missing documentation for main. Make sure to check\n- <%= link_to 'Edge Guides', 'https://edgeguides.rubyonrails.org' %> first to verify\n- if the issues are already fixed or not on the main branch.\n- Check the <%= link_to 'Ruby on Rails Guides Guidelines', 'ruby_on_rails_guides_guidelines.html' %>\n- for style and conventions.\n-

\n-

\n- If for whatever reason you spot something to fix but cannot patch it yourself, please\n- <%= link_to 'open an issue', 'https://github.com/rails/rails/issues' %>.\n-

\n-

And last but not least, any kind of discussion regarding Ruby on Rails\n- documentation is very welcome on the <%= link_to 'official Ruby on Rails Forum', 'https://discuss.rubyonrails.org/c/rubyonrails-docs' %>.\n-

\n+
\n+\n+
", - "comment_created_at": "2024-04-05T18:34:38+00:00", - "comment_author": "brunoprietog", - "comment_body": "This should not be a footer. The footer should only be the one below the main element. In fact, when a footer is included inside the main element NVDA doesn't recognize it as such either, and even the h3 is already more than enough to indicate that it is a feedback section, which makes it somewhat repetitive.", - "pr_file_module": null - }, - { - "comment_id": "1557051680", - "repo_full_name": "rails/rails", - "pr_number": 51499, - "pr_file": "guides/source/layout.html.erb", - "discussion_id": "1554119475", - "commented_code": "@@ -94,48 +106,54 @@\n \n
\n \n+\n
\n \n-
\n-
\n- <%= yield :header_section %>\n+
\n+
\n+
\n+
\n+ <%= yield :header_section %>\n \n- <%= yield :index_section %>\n+ <%= yield :index_section %>\n+
\n+
\n \n-
\n-
\n-
\n+
\n+
\n+
\n+ <%= yield %>\n+
\n \n-
\n-
\n-
\n- <%= yield %>\n-
\n-

Feedback

\n-

\n- You're encouraged to help improve the quality of this guide.\n-

\n-

\n- Please contribute if you see any typos or factual errors.\n- To get started, you can read our <%= link_to 'documentation contributions', 'https://edgeguides.rubyonrails.org/contributing_to_ruby_on_rails.html#contributing-to-the-rails-documentation' %> section.\n-

\n-

\n- You may also find incomplete content or stuff that is not up to date.\n- Please do add any missing documentation for main. Make sure to check\n- <%= link_to 'Edge Guides', 'https://edgeguides.rubyonrails.org' %> first to verify\n- if the issues are already fixed or not on the main branch.\n- Check the <%= link_to 'Ruby on Rails Guides Guidelines', 'ruby_on_rails_guides_guidelines.html' %>\n- for style and conventions.\n-

\n-

\n- If for whatever reason you spot something to fix but cannot patch it yourself, please\n- <%= link_to 'open an issue', 'https://github.com/rails/rails/issues' %>.\n-

\n-

And last but not least, any kind of discussion regarding Ruby on Rails\n- documentation is very welcome on the <%= link_to 'official Ruby on Rails Forum', 'https://discuss.rubyonrails.org/c/rubyonrails-docs' %>.\n-

\n+
\n+\n+
", - "comment_created_at": "2024-04-09T07:05:24+00:00", - "comment_author": "SleeplessByte", - "comment_body": "In html 5, footer may also be an element inside an article or section. I experience the same behaviour as you (because I also use NVDA when I use a screen reader) but I believe jaws does denote it differently.", - "pr_file_module": null - }, - { - "comment_id": "1557543733", - "repo_full_name": "rails/rails", - "pr_number": 51499, - "pr_file": "guides/source/layout.html.erb", - "discussion_id": "1554119475", - "commented_code": "@@ -94,48 +106,54 @@\n \n
\n \n+\n
\n \n-
\n-
\n- <%= yield :header_section %>\n+
\n+
\n+
\n+
\n+ <%= yield :header_section %>\n \n- <%= yield :index_section %>\n+ <%= yield :index_section %>\n+
\n+
\n \n-
\n-
\n-
\n+
\n+
\n+
\n+ <%= yield %>\n+
\n \n-
\n-
\n-
\n- <%= yield %>\n-
\n-

Feedback

\n-

\n- You're encouraged to help improve the quality of this guide.\n-

\n-

\n- Please contribute if you see any typos or factual errors.\n- To get started, you can read our <%= link_to 'documentation contributions', 'https://edgeguides.rubyonrails.org/contributing_to_ruby_on_rails.html#contributing-to-the-rails-documentation' %> section.\n-

\n-

\n- You may also find incomplete content or stuff that is not up to date.\n- Please do add any missing documentation for main. Make sure to check\n- <%= link_to 'Edge Guides', 'https://edgeguides.rubyonrails.org' %> first to verify\n- if the issues are already fixed or not on the main branch.\n- Check the <%= link_to 'Ruby on Rails Guides Guidelines', 'ruby_on_rails_guides_guidelines.html' %>\n- for style and conventions.\n-

\n-

\n- If for whatever reason you spot something to fix but cannot patch it yourself, please\n- <%= link_to 'open an issue', 'https://github.com/rails/rails/issues' %>.\n-

\n-

And last but not least, any kind of discussion regarding Ruby on Rails\n- documentation is very welcome on the <%= link_to 'official Ruby on Rails Forum', 'https://discuss.rubyonrails.org/c/rubyonrails-docs' %>.\n-

\n+
\n+\n+
", - "comment_created_at": "2024-04-09T12:22:17+00:00", - "comment_author": "brunoprietog", - "comment_body": "Oh, I don't know. I'm not too convinced about adding so many additional regions beyond the navigation, main region and footer. It adds a lot of noise that is already covered by the heading levels and becomes a bit repetitive information.\r\n\r\nBy the way, I personally don't tend to use many links to skip to specific sections and hope this can be done natively. And really, there's no way to skip to the body of the article with a command to go to a region. You can easily use 2, but that makes me think we're not being very consistent with using regions, or if we're even using them properly.\r\n\r\nWhat do you think?", - "pr_file_module": null - }, - { - "comment_id": "1557576138", - "repo_full_name": "rails/rails", - "pr_number": 51499, - "pr_file": "guides/source/layout.html.erb", - "discussion_id": "1554119475", - "commented_code": "@@ -94,48 +106,54 @@\n \n
\n \n+\n
\n \n-
\n-
\n- <%= yield :header_section %>\n+
\n+
\n+
\n+
\n+ <%= yield :header_section %>\n \n- <%= yield :index_section %>\n+ <%= yield :index_section %>\n+
\n+
\n \n-
\n-
\n-
\n+
\n+
\n+
\n+ <%= yield %>\n+
\n \n-
\n-
\n-
\n- <%= yield %>\n-
\n-

Feedback

\n-

\n- You're encouraged to help improve the quality of this guide.\n-

\n-

\n- Please contribute if you see any typos or factual errors.\n- To get started, you can read our <%= link_to 'documentation contributions', 'https://edgeguides.rubyonrails.org/contributing_to_ruby_on_rails.html#contributing-to-the-rails-documentation' %> section.\n-

\n-

\n- You may also find incomplete content or stuff that is not up to date.\n- Please do add any missing documentation for main. Make sure to check\n- <%= link_to 'Edge Guides', 'https://edgeguides.rubyonrails.org' %> first to verify\n- if the issues are already fixed or not on the main branch.\n- Check the <%= link_to 'Ruby on Rails Guides Guidelines', 'ruby_on_rails_guides_guidelines.html' %>\n- for style and conventions.\n-

\n-

\n- If for whatever reason you spot something to fix but cannot patch it yourself, please\n- <%= link_to 'open an issue', 'https://github.com/rails/rails/issues' %>.\n-

\n-

And last but not least, any kind of discussion regarding Ruby on Rails\n- documentation is very welcome on the <%= link_to 'official Ruby on Rails Forum', 'https://discuss.rubyonrails.org/c/rubyonrails-docs' %>.\n-

\n+
\n+\n+
", - "comment_created_at": "2024-04-09T12:48:08+00:00", - "comment_author": "SleeplessByte", - "comment_body": "My personal thoughts here were:\r\n\r\n- I am browsing the guides extensively\r\n- I am in the middle of the article and find something I want to give feedback on\r\n- I know there is a _region_ (labelled section) that explains how to give feedback\r\n- I want to go there straight away\r\n\r\nWith NVDA I can get there right away when browsing the article pressing D, which should navigate to the next landmark or region. In this case the footer is nested inside article, and thus not _complementary_, but it is labelled by the heading, and thus it becomes a section with role=\"region\", iirc. Using the TOC navigation here would be sub-optimal as it doesn't allow be to go to \"last heading of the current level\", by pressing 2. I would need to press it repeatedly, right? Since there are so little regions, that's not the case.\r\n\r\nDoes this make sense?", - "pr_file_module": null - }, - { - "comment_id": "1557588925", - "repo_full_name": "rails/rails", - "pr_number": 51499, - "pr_file": "guides/source/layout.html.erb", - "discussion_id": "1554119475", - "commented_code": "@@ -94,48 +106,54 @@\n \n
\n \n+\n
\n \n-
\n-
\n- <%= yield :header_section %>\n+
\n+
\n+
\n+
\n+ <%= yield :header_section %>\n \n- <%= yield :index_section %>\n+ <%= yield :index_section %>\n+
\n+
\n \n-
\n-
\n-
\n+
\n+
\n+
\n+ <%= yield %>\n+
\n \n-
\n-
\n-
\n- <%= yield %>\n-
\n-

Feedback

\n-

\n- You're encouraged to help improve the quality of this guide.\n-

\n-

\n- Please contribute if you see any typos or factual errors.\n- To get started, you can read our <%= link_to 'documentation contributions', 'https://edgeguides.rubyonrails.org/contributing_to_ruby_on_rails.html#contributing-to-the-rails-documentation' %> section.\n-

\n-

\n- You may also find incomplete content or stuff that is not up to date.\n- Please do add any missing documentation for main. Make sure to check\n- <%= link_to 'Edge Guides', 'https://edgeguides.rubyonrails.org' %> first to verify\n- if the issues are already fixed or not on the main branch.\n- Check the <%= link_to 'Ruby on Rails Guides Guidelines', 'ruby_on_rails_guides_guidelines.html' %>\n- for style and conventions.\n-

\n-

\n- If for whatever reason you spot something to fix but cannot patch it yourself, please\n- <%= link_to 'open an issue', 'https://github.com/rails/rails/issues' %>.\n-

\n-

And last but not least, any kind of discussion regarding Ruby on Rails\n- documentation is very welcome on the <%= link_to 'official Ruby on Rails Forum', 'https://discuss.rubyonrails.org/c/rubyonrails-docs' %>.\n-

\n+
\n+\n+
", - "comment_created_at": "2024-04-09T12:57:42+00:00", - "comment_author": "brunoprietog", - "comment_body": "That might make sense, but then, we would also have to have a region for the article, right? To get back quickly, at least to the beginning of the article.\r\n\r\nActually, this shouldn't be a problem with the article element, but unfortunately it hasn't worked in Chromium and NVDA for a while now.\r\n\r\nAnd in the case of the feedback region, I guess we would have to give it the role=\"region\" explicitly, since NVDA doesn't recognize it either because it's a footer inside a main element.", - "pr_file_module": null - }, - { - "comment_id": "1557594136", - "repo_full_name": "rails/rails", - "pr_number": 51499, - "pr_file": "guides/source/layout.html.erb", - "discussion_id": "1554119475", - "commented_code": "@@ -94,48 +106,54 @@\n \n
\n \n+\n
\n \n-
\n-
\n- <%= yield :header_section %>\n+
\n+
\n+
\n+
\n+ <%= yield :header_section %>\n \n- <%= yield :index_section %>\n+ <%= yield :index_section %>\n+
\n+
\n \n-
\n-
\n-
\n+
\n+
\n+
\n+ <%= yield %>\n+
\n \n-
\n-
\n-
\n- <%= yield %>\n-
\n-

Feedback

\n-

\n- You're encouraged to help improve the quality of this guide.\n-

\n-

\n- Please contribute if you see any typos or factual errors.\n- To get started, you can read our <%= link_to 'documentation contributions', 'https://edgeguides.rubyonrails.org/contributing_to_ruby_on_rails.html#contributing-to-the-rails-documentation' %> section.\n-

\n-

\n- You may also find incomplete content or stuff that is not up to date.\n- Please do add any missing documentation for main. Make sure to check\n- <%= link_to 'Edge Guides', 'https://edgeguides.rubyonrails.org' %> first to verify\n- if the issues are already fixed or not on the main branch.\n- Check the <%= link_to 'Ruby on Rails Guides Guidelines', 'ruby_on_rails_guides_guidelines.html' %>\n- for style and conventions.\n-

\n-

\n- If for whatever reason you spot something to fix but cannot patch it yourself, please\n- <%= link_to 'open an issue', 'https://github.com/rails/rails/issues' %>.\n-

\n-

And last but not least, any kind of discussion regarding Ruby on Rails\n- documentation is very welcome on the <%= link_to 'official Ruby on Rails Forum', 'https://discuss.rubyonrails.org/c/rubyonrails-docs' %>.\n-

\n+
\n+\n+
", - "comment_created_at": "2024-04-09T13:01:21+00:00", - "comment_author": "SleeplessByte", - "comment_body": "We have a region for the article: https://github.com/rails/rails/pull/51499/files/93f9819c62890301f398e752b6083c9d2c9f6c40#diff-dc279d925a05373ac5f34307441799f98ac4accbe0b0a8f12f99f703585afa7dR113 Is that not working for you? I haven't tested mine but I can.\r\n\r\nIt _should_ be:\r\n\r\n```html\r\n
\r\n
\r\n\r\n
\r\n article introduction\r\n\r\n chapter index / toc\r\n
\r\n\r\n article contents with lots of headings \r\n\r\n
\r\n

Feedback

\r\n\r\n feedback explanation\r\n
\r\n
\r\n
\r\n```\r\n\r\n", - "pr_file_module": null - }, - { - "comment_id": "1557595145", - "repo_full_name": "rails/rails", - "pr_number": 51499, - "pr_file": "guides/source/layout.html.erb", - "discussion_id": "1554119475", - "commented_code": "@@ -94,48 +106,54 @@\n \n
\n \n+\n
\n \n-
\n-
\n- <%= yield :header_section %>\n+
\n+
\n+
\n+
\n+ <%= yield :header_section %>\n \n- <%= yield :index_section %>\n+ <%= yield :index_section %>\n+
\n+
\n \n-
\n-
\n-
\n+
\n+
\n+
\n+ <%= yield %>\n+
\n \n-
\n-
\n-
\n- <%= yield %>\n-
\n-

Feedback

\n-

\n- You're encouraged to help improve the quality of this guide.\n-

\n-

\n- Please contribute if you see any typos or factual errors.\n- To get started, you can read our <%= link_to 'documentation contributions', 'https://edgeguides.rubyonrails.org/contributing_to_ruby_on_rails.html#contributing-to-the-rails-documentation' %> section.\n-

\n-

\n- You may also find incomplete content or stuff that is not up to date.\n- Please do add any missing documentation for main. Make sure to check\n- <%= link_to 'Edge Guides', 'https://edgeguides.rubyonrails.org' %> first to verify\n- if the issues are already fixed or not on the main branch.\n- Check the <%= link_to 'Ruby on Rails Guides Guidelines', 'ruby_on_rails_guides_guidelines.html' %>\n- for style and conventions.\n-

\n-

\n- If for whatever reason you spot something to fix but cannot patch it yourself, please\n- <%= link_to 'open an issue', 'https://github.com/rails/rails/issues' %>.\n-

\n-

And last but not least, any kind of discussion regarding Ruby on Rails\n- documentation is very welcome on the <%= link_to 'official Ruby on Rails Forum', 'https://discuss.rubyonrails.org/c/rubyonrails-docs' %>.\n-

\n+
\n+\n+
", - "comment_created_at": "2024-04-09T13:02:09+00:00", - "comment_author": "SleeplessByte", - "comment_body": "(sidenote: I am fine with explicit role-ing things if SRs don't recognise it, so let's do that if we must)", - "pr_file_module": null - }, - { - "comment_id": "1557603577", - "repo_full_name": "rails/rails", - "pr_number": 51499, - "pr_file": "guides/source/layout.html.erb", - "discussion_id": "1554119475", - "commented_code": "@@ -94,48 +106,54 @@\n \n
\n \n+\n
\n \n-
\n-
\n- <%= yield :header_section %>\n+
\n+
\n+
\n+
\n+ <%= yield :header_section %>\n \n- <%= yield :index_section %>\n+ <%= yield :index_section %>\n+
\n+
\n \n-
\n-
\n-
\n+
\n+
\n+
\n+ <%= yield %>\n+
\n \n-
\n-
\n-
\n- <%= yield %>\n-
\n-

Feedback

\n-

\n- You're encouraged to help improve the quality of this guide.\n-

\n-

\n- Please contribute if you see any typos or factual errors.\n- To get started, you can read our <%= link_to 'documentation contributions', 'https://edgeguides.rubyonrails.org/contributing_to_ruby_on_rails.html#contributing-to-the-rails-documentation' %> section.\n-

\n-

\n- You may also find incomplete content or stuff that is not up to date.\n- Please do add any missing documentation for main. Make sure to check\n- <%= link_to 'Edge Guides', 'https://edgeguides.rubyonrails.org' %> first to verify\n- if the issues are already fixed or not on the main branch.\n- Check the <%= link_to 'Ruby on Rails Guides Guidelines', 'ruby_on_rails_guides_guidelines.html' %>\n- for style and conventions.\n-

\n-

\n- If for whatever reason you spot something to fix but cannot patch it yourself, please\n- <%= link_to 'open an issue', 'https://github.com/rails/rails/issues' %>.\n-

\n-

And last but not least, any kind of discussion regarding Ruby on Rails\n- documentation is very welcome on the <%= link_to 'official Ruby on Rails Forum', 'https://discuss.rubyonrails.org/c/rubyonrails-docs' %>.\n-

\n+
\n+\n+
", - "comment_created_at": "2024-04-09T13:08:34+00:00", - "comment_author": "brunoprietog", - "comment_body": "Right! So I think assigning the explicit roles would be enough to solve this.\r\n\r\nIn any case, an article is not exposed as a region, it's exposed as another type of element. In fact, at least in NVDA, there's no default shortcut to jump to it, I configured the A to do so but it's not the common case.", - "pr_file_module": null - }, - { - "comment_id": "1557622932", - "repo_full_name": "rails/rails", - "pr_number": 51499, - "pr_file": "guides/source/layout.html.erb", - "discussion_id": "1554119475", - "commented_code": "@@ -94,48 +106,54 @@\n \n
\n \n+\n
\n \n-
\n-
\n- <%= yield :header_section %>\n+
\n+
\n+
\n+
\n+ <%= yield :header_section %>\n \n- <%= yield :index_section %>\n+ <%= yield :index_section %>\n+
\n+
\n \n-
\n-
\n-
\n+
\n+
\n+
\n+ <%= yield %>\n+
\n \n-
\n-
\n-
\n- <%= yield %>\n-
\n-

Feedback

\n-

\n- You're encouraged to help improve the quality of this guide.\n-

\n-

\n- Please contribute if you see any typos or factual errors.\n- To get started, you can read our <%= link_to 'documentation contributions', 'https://edgeguides.rubyonrails.org/contributing_to_ruby_on_rails.html#contributing-to-the-rails-documentation' %> section.\n-

\n-

\n- You may also find incomplete content or stuff that is not up to date.\n- Please do add any missing documentation for main. Make sure to check\n- <%= link_to 'Edge Guides', 'https://edgeguides.rubyonrails.org' %> first to verify\n- if the issues are already fixed or not on the main branch.\n- Check the <%= link_to 'Ruby on Rails Guides Guidelines', 'ruby_on_rails_guides_guidelines.html' %>\n- for style and conventions.\n-

\n-

\n- If for whatever reason you spot something to fix but cannot patch it yourself, please\n- <%= link_to 'open an issue', 'https://github.com/rails/rails/issues' %>.\n-

\n-

And last but not least, any kind of discussion regarding Ruby on Rails\n- documentation is very welcome on the <%= link_to 'official Ruby on Rails Forum', 'https://discuss.rubyonrails.org/c/rubyonrails-docs' %>.\n-

\n+
\n+\n+
", - "comment_created_at": "2024-04-09T13:22:14+00:00", - "comment_author": "SleeplessByte", - "comment_body": "Cool! Yeah, I _love_ different support for different SRs and even different versions of the same SR \ud83d\udde1\ufe0f \ud83d\ude01. \r\n\r\nI'll explicitly role both the article header and article footer (so your NVDA can skip back/forth to them)\r\n", - "pr_file_module": null - }, - { - "comment_id": "1557636426", - "repo_full_name": "rails/rails", - "pr_number": 51499, - "pr_file": "guides/source/layout.html.erb", - "discussion_id": "1554119475", - "commented_code": "@@ -94,48 +106,54 @@\n \n
\n \n+\n
\n \n-
\n-
\n- <%= yield :header_section %>\n+
\n+
\n+
\n+
\n+ <%= yield :header_section %>\n \n- <%= yield :index_section %>\n+ <%= yield :index_section %>\n+
\n+
\n \n-
\n-
\n-
\n+
\n+
\n+
\n+ <%= yield %>\n+
\n \n-
\n-
\n-
\n- <%= yield %>\n-
\n-

Feedback

\n-

\n- You're encouraged to help improve the quality of this guide.\n-

\n-

\n- Please contribute if you see any typos or factual errors.\n- To get started, you can read our <%= link_to 'documentation contributions', 'https://edgeguides.rubyonrails.org/contributing_to_ruby_on_rails.html#contributing-to-the-rails-documentation' %> section.\n-

\n-

\n- You may also find incomplete content or stuff that is not up to date.\n- Please do add any missing documentation for main. Make sure to check\n- <%= link_to 'Edge Guides', 'https://edgeguides.rubyonrails.org' %> first to verify\n- if the issues are already fixed or not on the main branch.\n- Check the <%= link_to 'Ruby on Rails Guides Guidelines', 'ruby_on_rails_guides_guidelines.html' %>\n- for style and conventions.\n-

\n-

\n- If for whatever reason you spot something to fix but cannot patch it yourself, please\n- <%= link_to 'open an issue', 'https://github.com/rails/rails/issues' %>.\n-

\n-

And last but not least, any kind of discussion regarding Ruby on Rails\n- documentation is very welcome on the <%= link_to 'official Ruby on Rails Forum', 'https://discuss.rubyonrails.org/c/rubyonrails-docs' %>.\n-

\n+
\n+\n+